1// SPDX-License-Identifier: GPL-2.0-only
2/*
3 * intel_powerclamp.c - package c-state idle injection
4 *
5 * Copyright (c) 2012-2023, Intel Corporation.
6 *
7 * Authors:
8 *     Arjan van de Ven <arjan@linux.intel.com>
9 *     Jacob Pan <jacob.jun.pan@linux.intel.com>
10 *
11 *	TODO:
12 *           1. better handle wakeup from external interrupts, currently a fixed
13 *              compensation is added to clamping duration when excessive amount
14 *              of wakeups are observed during idle time. the reason is that in
15 *              case of external interrupts without need for ack, clamping down
16 *              cpu in non-irq context does not reduce irq. for majority of the
17 *              cases, clamping down cpu does help reduce irq as well, we should
18 *              be able to differentiate the two cases and give a quantitative
19 *              solution for the irqs that we can control. perhaps based on
20 *              get_cpu_iowait_time_us()
21 *
22 *	     2. synchronization with other hw blocks
23 */
24
25#define pr_fmt(fmt)	KBUILD_MODNAME ": " fmt
26
27#include <linux/module.h>
28#include <linux/kernel.h>
29#include <linux/delay.h>
30#include <linux/cpu.h>
31#include <linux/thermal.h>
32#include <linux/debugfs.h>
33#include <linux/seq_file.h>
34#include <linux/idle_inject.h>
35
36#include <asm/msr.h>
37#include <asm/mwait.h>
38#include <asm/cpu_device_id.h>
39
40#define MAX_TARGET_RATIO (100U)
41/* For each undisturbed clamping period (no extra wake ups during idle time),
42 * we increment the confidence counter for the given target ratio.
43 * CONFIDENCE_OK defines the level where runtime calibration results are
44 * valid.
45 */
46#define CONFIDENCE_OK (3)
47/* Default idle injection duration, driver adjust sleep time to meet target
48 * idle ratio. Similar to frequency modulation.
49 */
50#define DEFAULT_DURATION_JIFFIES (6)
51
52static struct dentry *debug_dir;
53static bool poll_pkg_cstate_enable;
54
55/* Idle ratio observed using package C-state counters */
56static unsigned int current_ratio;
57
58/* Skip the idle injection till set to true */
59static bool should_skip;
60
61struct powerclamp_data {
62	unsigned int cpu;
63	unsigned int count;
64	unsigned int guard;
65	unsigned int window_size_now;
66	unsigned int target_ratio;
67	bool clamping;
68};
69
70static struct powerclamp_data powerclamp_data;
71
72static struct thermal_cooling_device *cooling_dev;
73
74static DEFINE_MUTEX(powerclamp_lock);
75
76/* This duration is in microseconds */
77static unsigned int duration;
78static unsigned int pkg_cstate_ratio_cur;
79static unsigned int window_size;
80
81static int duration_set(const char *arg, const struct kernel_param *kp)
82{
83	int ret = 0;
84	unsigned long new_duration;
85
86	ret = kstrtoul(arg, 10, &new_duration);
87	if (ret)
88		goto exit;
89	if (new_duration > 25 || new_duration < 6) {
90		pr_err("Out of recommended range %lu, between 6-25ms\n",
91			new_duration);
92		ret = -EINVAL;
93		goto exit;
94	}
95
96	mutex_lock(&powerclamp_lock);
97	duration = clamp(new_duration, 6ul, 25ul) * 1000;
98	mutex_unlock(&powerclamp_lock);
99exit:
100
101	return ret;
102}
103
104static int duration_get(char *buf, const struct kernel_param *kp)
105{
106	int ret;
107
108	mutex_lock(&powerclamp_lock);
109	ret = sysfs_emit(buf, "%d\n", duration / 1000);
110	mutex_unlock(&powerclamp_lock);
111
112	return ret;
113}
114
115static const struct kernel_param_ops duration_ops = {
116	.set = duration_set,
117	.get = duration_get,
118};
119
120module_param_cb(duration, &duration_ops, NULL, 0644);
121MODULE_PARM_DESC(duration, "forced idle time for each attempt in msec.");
122
123#define DEFAULT_MAX_IDLE	50
124#define MAX_ALL_CPU_IDLE	75
125
126static u8 max_idle = DEFAULT_MAX_IDLE;
127
128static cpumask_var_t idle_injection_cpu_mask;
129
130static int allocate_copy_idle_injection_mask(const struct cpumask *copy_mask)
131{
132	if (cpumask_available(idle_injection_cpu_mask))
133		goto copy_mask;
134
135	/* This mask is allocated only one time and freed during module exit */
136	if (!alloc_cpumask_var(&idle_injection_cpu_mask, GFP_KERNEL))
137		return -ENOMEM;
138
139copy_mask:
140	cpumask_copy(idle_injection_cpu_mask, copy_mask);
141
142	return 0;
143}
144
145/* Return true if the cpumask and idle percent combination is invalid */
146static bool check_invalid(cpumask_var_t mask, u8 idle)
147{
148	if (cpumask_equal(cpu_present_mask, mask) && idle > MAX_ALL_CPU_IDLE)
149		return true;
150
151	return false;
152}
153
154static int cpumask_set(const char *arg, const struct kernel_param *kp)
155{
156	cpumask_var_t new_mask;
157	int ret;
158
159	mutex_lock(&powerclamp_lock);
160
161	/* Can't set mask when cooling device is in use */
162	if (powerclamp_data.clamping) {
163		ret = -EAGAIN;
164		goto skip_cpumask_set;
165	}
166
167	ret = alloc_cpumask_var(&new_mask, GFP_KERNEL);
168	if (!ret)
169		goto skip_cpumask_set;
170
171	ret = bitmap_parse(arg, strlen(arg), cpumask_bits(new_mask),
172			   nr_cpumask_bits);
173	if (ret)
174		goto free_cpumask_set;
175
176	if (cpumask_empty(new_mask) || check_invalid(new_mask, max_idle)) {
177		ret = -EINVAL;
178		goto free_cpumask_set;
179	}
180
181	/*
182	 * When module parameters are passed from kernel command line
183	 * during insmod, the module parameter callback is called
184	 * before powerclamp_init(), so we can't assume that some
185	 * cpumask can be allocated and copied before here. Also
186	 * in this case this cpumask is used as the default mask.
187	 */
188	ret = allocate_copy_idle_injection_mask(new_mask);
189
190free_cpumask_set:
191	free_cpumask_var(new_mask);
192skip_cpumask_set:
193	mutex_unlock(&powerclamp_lock);
194
195	return ret;
196}
197
198static int cpumask_get(char *buf, const struct kernel_param *kp)
199{
200	if (!cpumask_available(idle_injection_cpu_mask))
201		return -ENODEV;
202
203	return bitmap_print_to_pagebuf(false, buf, cpumask_bits(idle_injection_cpu_mask),
204				       nr_cpumask_bits);
205}
206
207static const struct kernel_param_ops cpumask_ops = {
208	.set = cpumask_set,
209	.get = cpumask_get,
210};
211
212module_param_cb(cpumask, &cpumask_ops, NULL, 0644);
213MODULE_PARM_DESC(cpumask, "Mask of CPUs to use for idle injection.");
214
215static int max_idle_set(const char *arg, const struct kernel_param *kp)
216{
217	u8 new_max_idle;
218	int ret = 0;
219
220	mutex_lock(&powerclamp_lock);
221
222	/* Can't set mask when cooling device is in use */
223	if (powerclamp_data.clamping) {
224		ret = -EAGAIN;
225		goto skip_limit_set;
226	}
227
228	ret = kstrtou8(arg, 10, &new_max_idle);
229	if (ret)
230		goto skip_limit_set;
231
232	if (new_max_idle > MAX_TARGET_RATIO) {
233		ret = -EINVAL;
234		goto skip_limit_set;
235	}
236
237	if (!cpumask_available(idle_injection_cpu_mask)) {
238		ret = allocate_copy_idle_injection_mask(cpu_present_mask);
239		if (ret)
240			goto skip_limit_set;
241	}
242
243	if (check_invalid(idle_injection_cpu_mask, new_max_idle)) {
244		ret = -EINVAL;
245		goto skip_limit_set;
246	}
247
248	max_idle = new_max_idle;
249
250skip_limit_set:
251	mutex_unlock(&powerclamp_lock);
252
253	return ret;
254}
255
256static const struct kernel_param_ops max_idle_ops = {
257	.set = max_idle_set,
258	.get = param_get_byte,
259};
260
261module_param_cb(max_idle, &max_idle_ops, &max_idle, 0644);
262MODULE_PARM_DESC(max_idle, "maximum injected idle time to the total CPU time ratio in percent range:1-100");
263
264struct powerclamp_calibration_data {
265	unsigned long confidence;  /* used for calibration, basically a counter
266				    * gets incremented each time a clamping
267				    * period is completed without extra wakeups
268				    * once that counter is reached given level,
269				    * compensation is deemed usable.
270				    */
271	unsigned long steady_comp; /* steady state compensation used when
272				    * no extra wakeups occurred.
273				    */
274	unsigned long dynamic_comp; /* compensate excessive wakeup from idle
275				     * mostly from external interrupts.
276				     */
277};
278
279static struct powerclamp_calibration_data cal_data[MAX_TARGET_RATIO];
280
281static int window_size_set(const char *arg, const struct kernel_param *kp)
282{
283	int ret = 0;
284	unsigned long new_window_size;
285
286	ret = kstrtoul(arg, 10, &new_window_size);
287	if (ret)
288		goto exit_win;
289	if (new_window_size > 10 || new_window_size < 2) {
290		pr_err("Out of recommended window size %lu, between 2-10\n",
291			new_window_size);
292		ret = -EINVAL;
293	}
294
295	window_size = clamp(new_window_size, 2ul, 10ul);
296	smp_mb();
297
298exit_win:
299
300	return ret;
301}
302
303static const struct kernel_param_ops window_size_ops = {
304	.set = window_size_set,
305	.get = param_get_int,
306};
307
308module_param_cb(window_size, &window_size_ops, &window_size, 0644);
309MODULE_PARM_DESC(window_size, "sliding window in number of clamping cycles\n"
310	"\tpowerclamp controls idle ratio within this window. larger\n"
311	"\twindow size results in slower response time but more smooth\n"
312	"\tclamping results. default to 2.");
313
314struct pkg_cstate_info {
315	bool skip;
316	int msr_index;
317	int cstate_id;
318};
319
320#define PKG_CSTATE_INIT(id) {				\
321		.msr_index = MSR_PKG_C##id##_RESIDENCY, \
322		.cstate_id = id				\
323			}
324
325static struct pkg_cstate_info pkg_cstates[] = {
326	PKG_CSTATE_INIT(2),
327	PKG_CSTATE_INIT(3),
328	PKG_CSTATE_INIT(6),
329	PKG_CSTATE_INIT(7),
330	PKG_CSTATE_INIT(8),
331	PKG_CSTATE_INIT(9),
332	PKG_CSTATE_INIT(10),
333	{NULL},
334};
335
336static bool has_pkg_state_counter(void)
337{
338	u64 val;
339	struct pkg_cstate_info *info = pkg_cstates;
340
341	/* check if any one of the counter msrs exists */
342	while (info->msr_index) {
343		if (!rdmsrl_safe(info->msr_index, &val))
344			return true;
345		info++;
346	}
347
348	return false;
349}
350
351static u64 pkg_state_counter(void)
352{
353	u64 val;
354	u64 count = 0;
355	struct pkg_cstate_info *info = pkg_cstates;
356
357	while (info->msr_index) {
358		if (!info->skip) {
359			if (!rdmsrl_safe(info->msr_index, &val))
360				count += val;
361			else
362				info->skip = true;
363		}
364		info++;
365	}
366
367	return count;
368}
369
370static unsigned int get_compensation(int ratio)
371{
372	unsigned int comp = 0;
373
374	if (!poll_pkg_cstate_enable)
375		return 0;
376
377	/* we only use compensation if all adjacent ones are good */
378	if (ratio == 1 &&
379		cal_data[ratio].confidence >= CONFIDENCE_OK &&
380		cal_data[ratio + 1].confidence >= CONFIDENCE_OK &&
381		cal_data[ratio + 2].confidence >= CONFIDENCE_OK) {
382		comp = (cal_data[ratio].steady_comp +
383			cal_data[ratio + 1].steady_comp +
384			cal_data[ratio + 2].steady_comp) / 3;
385	} else if (ratio == MAX_TARGET_RATIO - 1 &&
386		cal_data[ratio].confidence >= CONFIDENCE_OK &&
387		cal_data[ratio - 1].confidence >= CONFIDENCE_OK &&
388		cal_data[ratio - 2].confidence >= CONFIDENCE_OK) {
389		comp = (cal_data[ratio].steady_comp +
390			cal_data[ratio - 1].steady_comp +
391			cal_data[ratio - 2].steady_comp) / 3;
392	} else if (cal_data[ratio].confidence >= CONFIDENCE_OK &&
393		cal_data[ratio - 1].confidence >= CONFIDENCE_OK &&
394		cal_data[ratio + 1].confidence >= CONFIDENCE_OK) {
395		comp = (cal_data[ratio].steady_comp +
396			cal_data[ratio - 1].steady_comp +
397			cal_data[ratio + 1].steady_comp) / 3;
398	}
399
400	/* do not exceed limit */
401	if (comp + ratio >= MAX_TARGET_RATIO)
402		comp = MAX_TARGET_RATIO - ratio - 1;
403
404	return comp;
405}
406
407static void adjust_compensation(int target_ratio, unsigned int win)
408{
409	int delta;
410	struct powerclamp_calibration_data *d = &cal_data[target_ratio];
411
412	/*
413	 * adjust compensations if confidence level has not been reached.
414	 */
415	if (d->confidence >= CONFIDENCE_OK)
416		return;
417
418	delta = powerclamp_data.target_ratio - current_ratio;
419	/* filter out bad data */
420	if (delta >= 0 && delta <= (1+target_ratio/10)) {
421		if (d->steady_comp)
422			d->steady_comp =
423				roundup(delta+d->steady_comp, 2)/2;
424		else
425			d->steady_comp = delta;
426		d->confidence++;
427	}
428}
429
430static bool powerclamp_adjust_controls(unsigned int target_ratio,
431				unsigned int guard, unsigned int win)
432{
433	static u64 msr_last, tsc_last;
434	u64 msr_now, tsc_now;
435	u64 val64;
436
437	/* check result for the last window */
438	msr_now = pkg_state_counter();
439	tsc_now = rdtsc();
440
441	/* calculate pkg cstate vs tsc ratio */
442	if (!msr_last || !tsc_last)
443		current_ratio = 1;
444	else if (tsc_now-tsc_last) {
445		val64 = 100*(msr_now-msr_last);
446		do_div(val64, (tsc_now-tsc_last));
447		current_ratio = val64;
448	}
449
450	/* update record */
451	msr_last = msr_now;
452	tsc_last = tsc_now;
453
454	adjust_compensation(target_ratio, win);
455
456	/* if we are above target+guard, skip */
457	return powerclamp_data.target_ratio + guard <= current_ratio;
458}
459
460/*
461 * This function calculates runtime from the current target ratio.
462 * This function gets called under powerclamp_lock.
463 */
464static unsigned int get_run_time(void)
465{
466	unsigned int compensated_ratio;
467	unsigned int runtime;
468
469	/*
470	 * make sure user selected ratio does not take effect until
471	 * the next round. adjust target_ratio if user has changed
472	 * target such that we can converge quickly.
473	 */
474	powerclamp_data.guard = 1 + powerclamp_data.target_ratio / 20;
475	powerclamp_data.window_size_now = window_size;
476
477	/*
478	 * systems may have different ability to enter package level
479	 * c-states, thus we need to compensate the injected idle ratio
480	 * to achieve the actual target reported by the HW.
481	 */
482	compensated_ratio = powerclamp_data.target_ratio +
483		get_compensation(powerclamp_data.target_ratio);
484	if (compensated_ratio <= 0)
485		compensated_ratio = 1;
486
487	runtime = duration * 100 / compensated_ratio - duration;
488
489	return runtime;
490}
491
492/*
493 * 1 HZ polling while clamping is active, useful for userspace
494 * to monitor actual idle ratio.
495 */
496static void poll_pkg_cstate(struct work_struct *dummy);
497static DECLARE_DELAYED_WORK(poll_pkg_cstate_work, poll_pkg_cstate);
498static void poll_pkg_cstate(struct work_struct *dummy)
499{
500	static u64 msr_last;
501	static u64 tsc_last;
502
503	u64 msr_now;
504	u64 tsc_now;
505	u64 val64;
506
507	msr_now = pkg_state_counter();
508	tsc_now = rdtsc();
509
510	/* calculate pkg cstate vs tsc ratio */
511	if (!msr_last || !tsc_last)
512		pkg_cstate_ratio_cur = 1;
513	else {
514		if (tsc_now - tsc_last) {
515			val64 = 100 * (msr_now - msr_last);
516			do_div(val64, (tsc_now - tsc_last));
517			pkg_cstate_ratio_cur = val64;
518		}
519	}
520
521	/* update record */
522	msr_last = msr_now;
523	tsc_last = tsc_now;
524
525	mutex_lock(&powerclamp_lock);
526	if (powerclamp_data.clamping)
527		schedule_delayed_work(&poll_pkg_cstate_work, HZ);
528	mutex_unlock(&powerclamp_lock);
529}
530
531static struct idle_inject_device *ii_dev;
532
533/*
534 * This function is called from idle injection core on timer expiry
535 * for the run duration. This allows powerclamp to readjust or skip
536 * injecting idle for this cycle.
537 */
538static bool idle_inject_update(void)
539{
540	bool update = false;
541
542	/* We can't sleep in this callback */
543	if (!mutex_trylock(&powerclamp_lock))
544		return true;
545
546	if (!(powerclamp_data.count % powerclamp_data.window_size_now)) {
547
548		should_skip = powerclamp_adjust_controls(powerclamp_data.target_ratio,
549							 powerclamp_data.guard,
550							 powerclamp_data.window_size_now);
551		update = true;
552	}
553
554	if (update) {
555		unsigned int runtime = get_run_time();
556
557		idle_inject_set_duration(ii_dev, runtime, duration);
558	}
559
560	powerclamp_data.count++;
561
562	mutex_unlock(&powerclamp_lock);
563
564	if (should_skip)
565		return false;
566
567	return true;
568}
569
570/* This function starts idle injection by calling idle_inject_start() */
571static void trigger_idle_injection(void)
572{
573	unsigned int runtime = get_run_time();
574
575	idle_inject_set_duration(ii_dev, runtime, duration);
576	idle_inject_start(ii_dev);
577	powerclamp_data.clamping = true;
578}
579
580/*
581 * This function is called from start_power_clamp() to register
582 * CPUS with powercap idle injection register and set default
583 * idle duration and latency.
584 */
585static int powerclamp_idle_injection_register(void)
586{
587	poll_pkg_cstate_enable = false;
588	if (cpumask_equal(cpu_present_mask, idle_injection_cpu_mask)) {
589		ii_dev = idle_inject_register_full(idle_injection_cpu_mask, idle_inject_update);
590		if (topology_max_packages() == 1 && topology_max_dies_per_package() == 1)
591			poll_pkg_cstate_enable = true;
592	} else {
593		ii_dev = idle_inject_register(idle_injection_cpu_mask);
594	}
595
596	if (!ii_dev) {
597		pr_err("powerclamp: idle_inject_register failed\n");
598		return -EAGAIN;
599	}
600
601	idle_inject_set_duration(ii_dev, TICK_USEC, duration);
602	idle_inject_set_latency(ii_dev, UINT_MAX);
603
604	return 0;
605}
606
607/*
608 * This function is called from end_power_clamp() to stop idle injection
609 * and unregister CPUS from powercap idle injection core.
610 */
611static void remove_idle_injection(void)
612{
613	if (!powerclamp_data.clamping)
614		return;
615
616	powerclamp_data.clamping = false;
617	idle_inject_stop(ii_dev);
618}
619
620/*
621 * This function is called when user change the cooling device
622 * state from zero to some other value.
623 */
624static int start_power_clamp(void)
625{
626	int ret;
627
628	ret = powerclamp_idle_injection_register();
629	if (!ret) {
630		trigger_idle_injection();
631		if (poll_pkg_cstate_enable)
632			schedule_delayed_work(&poll_pkg_cstate_work, 0);
633	}
634
635	return ret;
636}
637
638/*
639 * This function is called when user change the cooling device
640 * state from non zero value zero.
641 */
642static void end_power_clamp(void)
643{
644	if (powerclamp_data.clamping) {
645		remove_idle_injection();
646		idle_inject_unregister(ii_dev);
647	}
648}
649
650static int powerclamp_get_max_state(struct thermal_cooling_device *cdev,
651				 unsigned long *state)
652{
653	*state = MAX_TARGET_RATIO;
654
655	return 0;
656}
657
658static int powerclamp_get_cur_state(struct thermal_cooling_device *cdev,
659				 unsigned long *state)
660{
661	mutex_lock(&powerclamp_lock);
662	*state = powerclamp_data.target_ratio;
663	mutex_unlock(&powerclamp_lock);
664
665	return 0;
666}
667
668static int powerclamp_set_cur_state(struct thermal_cooling_device *cdev,
669				 unsigned long new_target_ratio)
670{
671	int ret = 0;
672
673	mutex_lock(&powerclamp_lock);
674
675	new_target_ratio = clamp(new_target_ratio, 0UL,
676				(unsigned long) (max_idle - 1));
677
678	if (powerclamp_data.target_ratio == new_target_ratio)
679		goto exit_set;
680
681	if (!powerclamp_data.target_ratio && new_target_ratio > 0) {
682		pr_info("Start idle injection to reduce power\n");
683		powerclamp_data.target_ratio = new_target_ratio;
684		ret = start_power_clamp();
685		if (ret)
686			powerclamp_data.target_ratio = 0;
687		goto exit_set;
688	} else	if (powerclamp_data.target_ratio > 0 && new_target_ratio == 0) {
689		pr_info("Stop forced idle injection\n");
690		end_power_clamp();
691		powerclamp_data.target_ratio = 0;
692	} else	/* adjust currently running */ {
693		unsigned int runtime;
694
695		powerclamp_data.target_ratio = new_target_ratio;
696		runtime = get_run_time();
697		idle_inject_set_duration(ii_dev, runtime, duration);
698	}
699
700exit_set:
701	mutex_unlock(&powerclamp_lock);
702
703	return ret;
704}
705
706/* bind to generic thermal layer as cooling device*/
707static const struct thermal_cooling_device_ops powerclamp_cooling_ops = {
708	.get_max_state = powerclamp_get_max_state,
709	.get_cur_state = powerclamp_get_cur_state,
710	.set_cur_state = powerclamp_set_cur_state,
711};
712
713static const struct x86_cpu_id __initconst intel_powerclamp_ids[] = {
714	X86_MATCH_VENDOR_FEATURE(INTEL, X86_FEATURE_MWAIT, NULL),
715	{}
716};
717MODULE_DEVICE_TABLE(x86cpu, intel_powerclamp_ids);
718
719static int __init powerclamp_probe(void)
720{
721
722	if (!x86_match_cpu(intel_powerclamp_ids)) {
723		pr_err("CPU does not support MWAIT\n");
724		return -ENODEV;
725	}
726
727	/* The goal for idle time alignment is to achieve package cstate. */
728	if (!has_pkg_state_counter()) {
729		pr_info("No package C-state available\n");
730		return -ENODEV;
731	}
732
733	return 0;
734}
735
736static int powerclamp_debug_show(struct seq_file *m, void *unused)
737{
738	int i = 0;
739
740	seq_printf(m, "pct confidence steady dynamic (compensation)\n");
741	for (i = 0; i < MAX_TARGET_RATIO; i++) {
742		seq_printf(m, "%d\t%lu\t%lu\t%lu\n",
743			i,
744			cal_data[i].confidence,
745			cal_data[i].steady_comp,
746			cal_data[i].dynamic_comp);
747	}
748
749	return 0;
750}
751
752DEFINE_SHOW_ATTRIBUTE(powerclamp_debug);
753
754static inline void powerclamp_create_debug_files(void)
755{
756	debug_dir = debugfs_create_dir("intel_powerclamp", NULL);
757
758	debugfs_create_file("powerclamp_calib", S_IRUGO, debug_dir, cal_data,
759			    &powerclamp_debug_fops);
760}
761
762static int __init powerclamp_init(void)
763{
764	int retval;
765
766	/* probe cpu features and ids here */
767	retval = powerclamp_probe();
768	if (retval)
769		return retval;
770
771	mutex_lock(&powerclamp_lock);
772	if (!cpumask_available(idle_injection_cpu_mask))
773		retval = allocate_copy_idle_injection_mask(cpu_present_mask);
774	mutex_unlock(&powerclamp_lock);
775
776	if (retval)
777		return retval;
778
779	/* set default limit, maybe adjusted during runtime based on feedback */
780	window_size = 2;
781
782	cooling_dev = thermal_cooling_device_register("intel_powerclamp", NULL,
783						      &powerclamp_cooling_ops);
784	if (IS_ERR(cooling_dev))
785		return -ENODEV;
786
787	if (!duration)
788		duration = jiffies_to_usecs(DEFAULT_DURATION_JIFFIES);
789
790	powerclamp_create_debug_files();
791
792	return 0;
793}
794module_init(powerclamp_init);
795
796static void __exit powerclamp_exit(void)
797{
798	mutex_lock(&powerclamp_lock);
799	end_power_clamp();
800	mutex_unlock(&powerclamp_lock);
801
802	thermal_cooling_device_unregister(cooling_dev);
803
804	cancel_delayed_work_sync(&poll_pkg_cstate_work);
805	debugfs_remove_recursive(debug_dir);
806
807	if (cpumask_available(idle_injection_cpu_mask))
808		free_cpumask_var(idle_injection_cpu_mask);
809}
810module_exit(powerclamp_exit);
811
812MODULE_IMPORT_NS(IDLE_INJECT);
813
814MODULE_LICENSE("GPL");
815MODULE_AUTHOR("Arjan van de Ven <arjan@linux.intel.com>");
816MODULE_AUTHOR("Jacob Pan <jacob.jun.pan@linux.intel.com>");
817MODULE_DESCRIPTION("Package Level C-state Idle Injection for Intel CPUs");
818