1// SPDX-License-Identifier: GPL-2.0-or-later
2/*
3 * amd-pstate.c - AMD Processor P-state Frequency Driver
4 *
5 * Copyright (C) 2021 Advanced Micro Devices, Inc. All Rights Reserved.
6 *
7 * Author: Huang Rui <ray.huang@amd.com>
8 *
9 * AMD P-State introduces a new CPU performance scaling design for AMD
10 * processors using the ACPI Collaborative Performance and Power Control (CPPC)
11 * feature which works with the AMD SMU firmware providing a finer grained
12 * frequency control range. It is to replace the legacy ACPI P-States control,
13 * allows a flexible, low-latency interface for the Linux kernel to directly
14 * communicate the performance hints to hardware.
15 *
16 * AMD P-State is supported on recent AMD Zen base CPU series include some of
17 * Zen2 and Zen3 processors. _CPC needs to be present in the ACPI tables of AMD
18 * P-State supported system. And there are two types of hardware implementations
19 * for AMD P-State: 1) Full MSR Solution and 2) Shared Memory Solution.
20 * X86_FEATURE_CPPC CPU feature flag is used to distinguish the different types.
21 */
22
23#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
24
25#include <linux/kernel.h>
26#include <linux/module.h>
27#include <linux/init.h>
28#include <linux/smp.h>
29#include <linux/sched.h>
30#include <linux/cpufreq.h>
31#include <linux/compiler.h>
32#include <linux/dmi.h>
33#include <linux/slab.h>
34#include <linux/acpi.h>
35#include <linux/io.h>
36#include <linux/delay.h>
37#include <linux/uaccess.h>
38#include <linux/static_call.h>
39#include <linux/amd-pstate.h>
40#include <linux/topology.h>
41
42#include <acpi/processor.h>
43#include <acpi/cppc_acpi.h>
44
45#include <asm/msr.h>
46#include <asm/processor.h>
47#include <asm/cpufeature.h>
48#include <asm/cpu_device_id.h>
49#include "amd-pstate-trace.h"
50
51#define AMD_PSTATE_TRANSITION_LATENCY	20000
52#define AMD_PSTATE_TRANSITION_DELAY	1000
53#define AMD_PSTATE_PREFCORE_THRESHOLD	166
54
55/*
56 * TODO: We need more time to fine tune processors with shared memory solution
57 * with community together.
58 *
59 * There are some performance drops on the CPU benchmarks which reports from
60 * Suse. We are co-working with them to fine tune the shared memory solution. So
61 * we disable it by default to go acpi-cpufreq on these processors and add a
62 * module parameter to be able to enable it manually for debugging.
63 */
64static struct cpufreq_driver *current_pstate_driver;
65static struct cpufreq_driver amd_pstate_driver;
66static struct cpufreq_driver amd_pstate_epp_driver;
67static int cppc_state = AMD_PSTATE_UNDEFINED;
68static bool cppc_enabled;
69static bool amd_pstate_prefcore = true;
70
71/*
72 * AMD Energy Preference Performance (EPP)
73 * The EPP is used in the CCLK DPM controller to drive
74 * the frequency that a core is going to operate during
75 * short periods of activity. EPP values will be utilized for
76 * different OS profiles (balanced, performance, power savings)
77 * display strings corresponding to EPP index in the
78 * energy_perf_strings[]
79 *	index		String
80 *-------------------------------------
81 *	0		default
82 *	1		performance
83 *	2		balance_performance
84 *	3		balance_power
85 *	4		power
86 */
87enum energy_perf_value_index {
88	EPP_INDEX_DEFAULT = 0,
89	EPP_INDEX_PERFORMANCE,
90	EPP_INDEX_BALANCE_PERFORMANCE,
91	EPP_INDEX_BALANCE_POWERSAVE,
92	EPP_INDEX_POWERSAVE,
93};
94
95static const char * const energy_perf_strings[] = {
96	[EPP_INDEX_DEFAULT] = "default",
97	[EPP_INDEX_PERFORMANCE] = "performance",
98	[EPP_INDEX_BALANCE_PERFORMANCE] = "balance_performance",
99	[EPP_INDEX_BALANCE_POWERSAVE] = "balance_power",
100	[EPP_INDEX_POWERSAVE] = "power",
101	NULL
102};
103
104static unsigned int epp_values[] = {
105	[EPP_INDEX_DEFAULT] = 0,
106	[EPP_INDEX_PERFORMANCE] = AMD_CPPC_EPP_PERFORMANCE,
107	[EPP_INDEX_BALANCE_PERFORMANCE] = AMD_CPPC_EPP_BALANCE_PERFORMANCE,
108	[EPP_INDEX_BALANCE_POWERSAVE] = AMD_CPPC_EPP_BALANCE_POWERSAVE,
109	[EPP_INDEX_POWERSAVE] = AMD_CPPC_EPP_POWERSAVE,
110 };
111
112typedef int (*cppc_mode_transition_fn)(int);
113
114static inline int get_mode_idx_from_str(const char *str, size_t size)
115{
116	int i;
117
118	for (i=0; i < AMD_PSTATE_MAX; i++) {
119		if (!strncmp(str, amd_pstate_mode_string[i], size))
120			return i;
121	}
122	return -EINVAL;
123}
124
125static DEFINE_MUTEX(amd_pstate_limits_lock);
126static DEFINE_MUTEX(amd_pstate_driver_lock);
127
128static s16 amd_pstate_get_epp(struct amd_cpudata *cpudata, u64 cppc_req_cached)
129{
130	u64 epp;
131	int ret;
132
133	if (boot_cpu_has(X86_FEATURE_CPPC)) {
134		if (!cppc_req_cached) {
135			epp = rdmsrl_on_cpu(cpudata->cpu, MSR_AMD_CPPC_REQ,
136					&cppc_req_cached);
137			if (epp)
138				return epp;
139		}
140		epp = (cppc_req_cached >> 24) & 0xFF;
141	} else {
142		ret = cppc_get_epp_perf(cpudata->cpu, &epp);
143		if (ret < 0) {
144			pr_debug("Could not retrieve energy perf value (%d)\n", ret);
145			return -EIO;
146		}
147	}
148
149	return (s16)(epp & 0xff);
150}
151
152static int amd_pstate_get_energy_pref_index(struct amd_cpudata *cpudata)
153{
154	s16 epp;
155	int index = -EINVAL;
156
157	epp = amd_pstate_get_epp(cpudata, 0);
158	if (epp < 0)
159		return epp;
160
161	switch (epp) {
162	case AMD_CPPC_EPP_PERFORMANCE:
163		index = EPP_INDEX_PERFORMANCE;
164		break;
165	case AMD_CPPC_EPP_BALANCE_PERFORMANCE:
166		index = EPP_INDEX_BALANCE_PERFORMANCE;
167		break;
168	case AMD_CPPC_EPP_BALANCE_POWERSAVE:
169		index = EPP_INDEX_BALANCE_POWERSAVE;
170		break;
171	case AMD_CPPC_EPP_POWERSAVE:
172		index = EPP_INDEX_POWERSAVE;
173		break;
174	default:
175		break;
176	}
177
178	return index;
179}
180
181static int amd_pstate_set_epp(struct amd_cpudata *cpudata, u32 epp)
182{
183	int ret;
184	struct cppc_perf_ctrls perf_ctrls;
185
186	if (boot_cpu_has(X86_FEATURE_CPPC)) {
187		u64 value = READ_ONCE(cpudata->cppc_req_cached);
188
189		value &= ~GENMASK_ULL(31, 24);
190		value |= (u64)epp << 24;
191		WRITE_ONCE(cpudata->cppc_req_cached, value);
192
193		ret = wrmsrl_on_cpu(cpudata->cpu, MSR_AMD_CPPC_REQ, value);
194		if (!ret)
195			cpudata->epp_cached = epp;
196	} else {
197		perf_ctrls.energy_perf = epp;
198		ret = cppc_set_epp_perf(cpudata->cpu, &perf_ctrls, 1);
199		if (ret) {
200			pr_debug("failed to set energy perf value (%d)\n", ret);
201			return ret;
202		}
203		cpudata->epp_cached = epp;
204	}
205
206	return ret;
207}
208
209static int amd_pstate_set_energy_pref_index(struct amd_cpudata *cpudata,
210		int pref_index)
211{
212	int epp = -EINVAL;
213	int ret;
214
215	if (!pref_index) {
216		pr_debug("EPP pref_index is invalid\n");
217		return -EINVAL;
218	}
219
220	if (epp == -EINVAL)
221		epp = epp_values[pref_index];
222
223	if (epp > 0 && cpudata->policy == CPUFREQ_POLICY_PERFORMANCE) {
224		pr_debug("EPP cannot be set under performance policy\n");
225		return -EBUSY;
226	}
227
228	ret = amd_pstate_set_epp(cpudata, epp);
229
230	return ret;
231}
232
233static inline int pstate_enable(bool enable)
234{
235	int ret, cpu;
236	unsigned long logical_proc_id_mask = 0;
237
238	if (enable == cppc_enabled)
239		return 0;
240
241	for_each_present_cpu(cpu) {
242		unsigned long logical_id = topology_logical_die_id(cpu);
243
244		if (test_bit(logical_id, &logical_proc_id_mask))
245			continue;
246
247		set_bit(logical_id, &logical_proc_id_mask);
248
249		ret = wrmsrl_safe_on_cpu(cpu, MSR_AMD_CPPC_ENABLE,
250				enable);
251		if (ret)
252			return ret;
253	}
254
255	cppc_enabled = enable;
256	return 0;
257}
258
259static int cppc_enable(bool enable)
260{
261	int cpu, ret = 0;
262	struct cppc_perf_ctrls perf_ctrls;
263
264	if (enable == cppc_enabled)
265		return 0;
266
267	for_each_present_cpu(cpu) {
268		ret = cppc_set_enable(cpu, enable);
269		if (ret)
270			return ret;
271
272		/* Enable autonomous mode for EPP */
273		if (cppc_state == AMD_PSTATE_ACTIVE) {
274			/* Set desired perf as zero to allow EPP firmware control */
275			perf_ctrls.desired_perf = 0;
276			ret = cppc_set_perf(cpu, &perf_ctrls);
277			if (ret)
278				return ret;
279		}
280	}
281
282	cppc_enabled = enable;
283	return ret;
284}
285
286DEFINE_STATIC_CALL(amd_pstate_enable, pstate_enable);
287
288static inline int amd_pstate_enable(bool enable)
289{
290	return static_call(amd_pstate_enable)(enable);
291}
292
293static int pstate_init_perf(struct amd_cpudata *cpudata)
294{
295	u64 cap1;
296	u32 highest_perf;
297
298	int ret = rdmsrl_safe_on_cpu(cpudata->cpu, MSR_AMD_CPPC_CAP1,
299				     &cap1);
300	if (ret)
301		return ret;
302
303	/* For platforms that do not support the preferred core feature, the
304	 * highest_pef may be configured with 166 or 255, to avoid max frequency
305	 * calculated wrongly. we take the AMD_CPPC_HIGHEST_PERF(cap1) value as
306	 * the default max perf.
307	 */
308	if (cpudata->hw_prefcore)
309		highest_perf = AMD_PSTATE_PREFCORE_THRESHOLD;
310	else
311		highest_perf = AMD_CPPC_HIGHEST_PERF(cap1);
312
313	WRITE_ONCE(cpudata->highest_perf, highest_perf);
314	WRITE_ONCE(cpudata->max_limit_perf, highest_perf);
315	WRITE_ONCE(cpudata->nominal_perf, AMD_CPPC_NOMINAL_PERF(cap1));
316	WRITE_ONCE(cpudata->lowest_nonlinear_perf, AMD_CPPC_LOWNONLIN_PERF(cap1));
317	WRITE_ONCE(cpudata->lowest_perf, AMD_CPPC_LOWEST_PERF(cap1));
318	WRITE_ONCE(cpudata->prefcore_ranking, AMD_CPPC_HIGHEST_PERF(cap1));
319	WRITE_ONCE(cpudata->min_limit_perf, AMD_CPPC_LOWEST_PERF(cap1));
320	return 0;
321}
322
323static int cppc_init_perf(struct amd_cpudata *cpudata)
324{
325	struct cppc_perf_caps cppc_perf;
326	u32 highest_perf;
327
328	int ret = cppc_get_perf_caps(cpudata->cpu, &cppc_perf);
329	if (ret)
330		return ret;
331
332	if (cpudata->hw_prefcore)
333		highest_perf = AMD_PSTATE_PREFCORE_THRESHOLD;
334	else
335		highest_perf = cppc_perf.highest_perf;
336
337	WRITE_ONCE(cpudata->highest_perf, highest_perf);
338	WRITE_ONCE(cpudata->max_limit_perf, highest_perf);
339	WRITE_ONCE(cpudata->nominal_perf, cppc_perf.nominal_perf);
340	WRITE_ONCE(cpudata->lowest_nonlinear_perf,
341		   cppc_perf.lowest_nonlinear_perf);
342	WRITE_ONCE(cpudata->lowest_perf, cppc_perf.lowest_perf);
343	WRITE_ONCE(cpudata->prefcore_ranking, cppc_perf.highest_perf);
344	WRITE_ONCE(cpudata->min_limit_perf, cppc_perf.lowest_perf);
345
346	if (cppc_state == AMD_PSTATE_ACTIVE)
347		return 0;
348
349	ret = cppc_get_auto_sel_caps(cpudata->cpu, &cppc_perf);
350	if (ret) {
351		pr_warn("failed to get auto_sel, ret: %d\n", ret);
352		return 0;
353	}
354
355	ret = cppc_set_auto_sel(cpudata->cpu,
356			(cppc_state == AMD_PSTATE_PASSIVE) ? 0 : 1);
357
358	if (ret)
359		pr_warn("failed to set auto_sel, ret: %d\n", ret);
360
361	return ret;
362}
363
364DEFINE_STATIC_CALL(amd_pstate_init_perf, pstate_init_perf);
365
366static inline int amd_pstate_init_perf(struct amd_cpudata *cpudata)
367{
368	return static_call(amd_pstate_init_perf)(cpudata);
369}
370
371static void pstate_update_perf(struct amd_cpudata *cpudata, u32 min_perf,
372			       u32 des_perf, u32 max_perf, bool fast_switch)
373{
374	if (fast_switch)
375		wrmsrl(MSR_AMD_CPPC_REQ, READ_ONCE(cpudata->cppc_req_cached));
376	else
377		wrmsrl_on_cpu(cpudata->cpu, MSR_AMD_CPPC_REQ,
378			      READ_ONCE(cpudata->cppc_req_cached));
379}
380
381static void cppc_update_perf(struct amd_cpudata *cpudata,
382			     u32 min_perf, u32 des_perf,
383			     u32 max_perf, bool fast_switch)
384{
385	struct cppc_perf_ctrls perf_ctrls;
386
387	perf_ctrls.max_perf = max_perf;
388	perf_ctrls.min_perf = min_perf;
389	perf_ctrls.desired_perf = des_perf;
390
391	cppc_set_perf(cpudata->cpu, &perf_ctrls);
392}
393
394DEFINE_STATIC_CALL(amd_pstate_update_perf, pstate_update_perf);
395
396static inline void amd_pstate_update_perf(struct amd_cpudata *cpudata,
397					  u32 min_perf, u32 des_perf,
398					  u32 max_perf, bool fast_switch)
399{
400	static_call(amd_pstate_update_perf)(cpudata, min_perf, des_perf,
401					    max_perf, fast_switch);
402}
403
404static inline bool amd_pstate_sample(struct amd_cpudata *cpudata)
405{
406	u64 aperf, mperf, tsc;
407	unsigned long flags;
408
409	local_irq_save(flags);
410	rdmsrl(MSR_IA32_APERF, aperf);
411	rdmsrl(MSR_IA32_MPERF, mperf);
412	tsc = rdtsc();
413
414	if (cpudata->prev.mperf == mperf || cpudata->prev.tsc == tsc) {
415		local_irq_restore(flags);
416		return false;
417	}
418
419	local_irq_restore(flags);
420
421	cpudata->cur.aperf = aperf;
422	cpudata->cur.mperf = mperf;
423	cpudata->cur.tsc =  tsc;
424	cpudata->cur.aperf -= cpudata->prev.aperf;
425	cpudata->cur.mperf -= cpudata->prev.mperf;
426	cpudata->cur.tsc -= cpudata->prev.tsc;
427
428	cpudata->prev.aperf = aperf;
429	cpudata->prev.mperf = mperf;
430	cpudata->prev.tsc = tsc;
431
432	cpudata->freq = div64_u64((cpudata->cur.aperf * cpu_khz), cpudata->cur.mperf);
433
434	return true;
435}
436
437static void amd_pstate_update(struct amd_cpudata *cpudata, u32 min_perf,
438			      u32 des_perf, u32 max_perf, bool fast_switch, int gov_flags)
439{
440	u64 prev = READ_ONCE(cpudata->cppc_req_cached);
441	u64 value = prev;
442
443	min_perf = clamp_t(unsigned long, min_perf, cpudata->min_limit_perf,
444			cpudata->max_limit_perf);
445	max_perf = clamp_t(unsigned long, max_perf, cpudata->min_limit_perf,
446			cpudata->max_limit_perf);
447	des_perf = clamp_t(unsigned long, des_perf, min_perf, max_perf);
448
449	if ((cppc_state == AMD_PSTATE_GUIDED) && (gov_flags & CPUFREQ_GOV_DYNAMIC_SWITCHING)) {
450		min_perf = des_perf;
451		des_perf = 0;
452	}
453
454	value &= ~AMD_CPPC_MIN_PERF(~0L);
455	value |= AMD_CPPC_MIN_PERF(min_perf);
456
457	value &= ~AMD_CPPC_DES_PERF(~0L);
458	value |= AMD_CPPC_DES_PERF(des_perf);
459
460	value &= ~AMD_CPPC_MAX_PERF(~0L);
461	value |= AMD_CPPC_MAX_PERF(max_perf);
462
463	if (trace_amd_pstate_perf_enabled() && amd_pstate_sample(cpudata)) {
464		trace_amd_pstate_perf(min_perf, des_perf, max_perf, cpudata->freq,
465			cpudata->cur.mperf, cpudata->cur.aperf, cpudata->cur.tsc,
466				cpudata->cpu, (value != prev), fast_switch);
467	}
468
469	if (value == prev)
470		return;
471
472	WRITE_ONCE(cpudata->cppc_req_cached, value);
473
474	amd_pstate_update_perf(cpudata, min_perf, des_perf,
475			       max_perf, fast_switch);
476}
477
478static int amd_pstate_verify(struct cpufreq_policy_data *policy)
479{
480	cpufreq_verify_within_cpu_limits(policy);
481
482	return 0;
483}
484
485static int amd_pstate_update_min_max_limit(struct cpufreq_policy *policy)
486{
487	u32 max_limit_perf, min_limit_perf, lowest_perf;
488	struct amd_cpudata *cpudata = policy->driver_data;
489
490	max_limit_perf = div_u64(policy->max * cpudata->highest_perf, cpudata->max_freq);
491	min_limit_perf = div_u64(policy->min * cpudata->highest_perf, cpudata->max_freq);
492
493	lowest_perf = READ_ONCE(cpudata->lowest_perf);
494	if (min_limit_perf < lowest_perf)
495		min_limit_perf = lowest_perf;
496
497	if (max_limit_perf < min_limit_perf)
498		max_limit_perf = min_limit_perf;
499
500	WRITE_ONCE(cpudata->max_limit_perf, max_limit_perf);
501	WRITE_ONCE(cpudata->min_limit_perf, min_limit_perf);
502	WRITE_ONCE(cpudata->max_limit_freq, policy->max);
503	WRITE_ONCE(cpudata->min_limit_freq, policy->min);
504
505	return 0;
506}
507
508static int amd_pstate_update_freq(struct cpufreq_policy *policy,
509				  unsigned int target_freq, bool fast_switch)
510{
511	struct cpufreq_freqs freqs;
512	struct amd_cpudata *cpudata = policy->driver_data;
513	unsigned long max_perf, min_perf, des_perf, cap_perf;
514
515	if (!cpudata->max_freq)
516		return -ENODEV;
517
518	if (policy->min != cpudata->min_limit_freq || policy->max != cpudata->max_limit_freq)
519		amd_pstate_update_min_max_limit(policy);
520
521	cap_perf = READ_ONCE(cpudata->highest_perf);
522	min_perf = READ_ONCE(cpudata->lowest_perf);
523	max_perf = cap_perf;
524
525	freqs.old = policy->cur;
526	freqs.new = target_freq;
527
528	des_perf = DIV_ROUND_CLOSEST(target_freq * cap_perf,
529				     cpudata->max_freq);
530
531	WARN_ON(fast_switch && !policy->fast_switch_enabled);
532	/*
533	 * If fast_switch is desired, then there aren't any registered
534	 * transition notifiers. See comment for
535	 * cpufreq_enable_fast_switch().
536	 */
537	if (!fast_switch)
538		cpufreq_freq_transition_begin(policy, &freqs);
539
540	amd_pstate_update(cpudata, min_perf, des_perf,
541			max_perf, fast_switch, policy->governor->flags);
542
543	if (!fast_switch)
544		cpufreq_freq_transition_end(policy, &freqs, false);
545
546	return 0;
547}
548
549static int amd_pstate_target(struct cpufreq_policy *policy,
550			     unsigned int target_freq,
551			     unsigned int relation)
552{
553	return amd_pstate_update_freq(policy, target_freq, false);
554}
555
556static unsigned int amd_pstate_fast_switch(struct cpufreq_policy *policy,
557				  unsigned int target_freq)
558{
559	if (!amd_pstate_update_freq(policy, target_freq, true))
560		return target_freq;
561	return policy->cur;
562}
563
564static void amd_pstate_adjust_perf(unsigned int cpu,
565				   unsigned long _min_perf,
566				   unsigned long target_perf,
567				   unsigned long capacity)
568{
569	unsigned long max_perf, min_perf, des_perf,
570		      cap_perf, lowest_nonlinear_perf, max_freq;
571	struct cpufreq_policy *policy = cpufreq_cpu_get(cpu);
572	struct amd_cpudata *cpudata = policy->driver_data;
573	unsigned int target_freq;
574
575	if (policy->min != cpudata->min_limit_freq || policy->max != cpudata->max_limit_freq)
576		amd_pstate_update_min_max_limit(policy);
577
578
579	cap_perf = READ_ONCE(cpudata->highest_perf);
580	lowest_nonlinear_perf = READ_ONCE(cpudata->lowest_nonlinear_perf);
581	max_freq = READ_ONCE(cpudata->max_freq);
582
583	des_perf = cap_perf;
584	if (target_perf < capacity)
585		des_perf = DIV_ROUND_UP(cap_perf * target_perf, capacity);
586
587	min_perf = READ_ONCE(cpudata->lowest_perf);
588	if (_min_perf < capacity)
589		min_perf = DIV_ROUND_UP(cap_perf * _min_perf, capacity);
590
591	if (min_perf < lowest_nonlinear_perf)
592		min_perf = lowest_nonlinear_perf;
593
594	max_perf = cap_perf;
595	if (max_perf < min_perf)
596		max_perf = min_perf;
597
598	des_perf = clamp_t(unsigned long, des_perf, min_perf, max_perf);
599	target_freq = div_u64(des_perf * max_freq, max_perf);
600	policy->cur = target_freq;
601
602	amd_pstate_update(cpudata, min_perf, des_perf, max_perf, true,
603			policy->governor->flags);
604	cpufreq_cpu_put(policy);
605}
606
607static int amd_get_min_freq(struct amd_cpudata *cpudata)
608{
609	struct cppc_perf_caps cppc_perf;
610
611	int ret = cppc_get_perf_caps(cpudata->cpu, &cppc_perf);
612	if (ret)
613		return ret;
614
615	/* Switch to khz */
616	return cppc_perf.lowest_freq * 1000;
617}
618
619static int amd_get_max_freq(struct amd_cpudata *cpudata)
620{
621	struct cppc_perf_caps cppc_perf;
622	u32 max_perf, max_freq, nominal_freq, nominal_perf;
623	u64 boost_ratio;
624
625	int ret = cppc_get_perf_caps(cpudata->cpu, &cppc_perf);
626	if (ret)
627		return ret;
628
629	nominal_freq = cppc_perf.nominal_freq;
630	nominal_perf = READ_ONCE(cpudata->nominal_perf);
631	max_perf = READ_ONCE(cpudata->highest_perf);
632
633	boost_ratio = div_u64(max_perf << SCHED_CAPACITY_SHIFT,
634			      nominal_perf);
635
636	max_freq = nominal_freq * boost_ratio >> SCHED_CAPACITY_SHIFT;
637
638	/* Switch to khz */
639	return max_freq * 1000;
640}
641
642static int amd_get_nominal_freq(struct amd_cpudata *cpudata)
643{
644	struct cppc_perf_caps cppc_perf;
645
646	int ret = cppc_get_perf_caps(cpudata->cpu, &cppc_perf);
647	if (ret)
648		return ret;
649
650	/* Switch to khz */
651	return cppc_perf.nominal_freq * 1000;
652}
653
654static int amd_get_lowest_nonlinear_freq(struct amd_cpudata *cpudata)
655{
656	struct cppc_perf_caps cppc_perf;
657	u32 lowest_nonlinear_freq, lowest_nonlinear_perf,
658	    nominal_freq, nominal_perf;
659	u64 lowest_nonlinear_ratio;
660
661	int ret = cppc_get_perf_caps(cpudata->cpu, &cppc_perf);
662	if (ret)
663		return ret;
664
665	nominal_freq = cppc_perf.nominal_freq;
666	nominal_perf = READ_ONCE(cpudata->nominal_perf);
667
668	lowest_nonlinear_perf = cppc_perf.lowest_nonlinear_perf;
669
670	lowest_nonlinear_ratio = div_u64(lowest_nonlinear_perf << SCHED_CAPACITY_SHIFT,
671					 nominal_perf);
672
673	lowest_nonlinear_freq = nominal_freq * lowest_nonlinear_ratio >> SCHED_CAPACITY_SHIFT;
674
675	/* Switch to khz */
676	return lowest_nonlinear_freq * 1000;
677}
678
679static int amd_pstate_set_boost(struct cpufreq_policy *policy, int state)
680{
681	struct amd_cpudata *cpudata = policy->driver_data;
682	int ret;
683
684	if (!cpudata->boost_supported) {
685		pr_err("Boost mode is not supported by this processor or SBIOS\n");
686		return -EINVAL;
687	}
688
689	if (state)
690		policy->cpuinfo.max_freq = cpudata->max_freq;
691	else
692		policy->cpuinfo.max_freq = cpudata->nominal_freq;
693
694	policy->max = policy->cpuinfo.max_freq;
695
696	ret = freq_qos_update_request(&cpudata->req[1],
697				      policy->cpuinfo.max_freq);
698	if (ret < 0)
699		return ret;
700
701	return 0;
702}
703
704static void amd_pstate_boost_init(struct amd_cpudata *cpudata)
705{
706	u32 highest_perf, nominal_perf;
707
708	highest_perf = READ_ONCE(cpudata->highest_perf);
709	nominal_perf = READ_ONCE(cpudata->nominal_perf);
710
711	if (highest_perf <= nominal_perf)
712		return;
713
714	cpudata->boost_supported = true;
715	current_pstate_driver->boost_enabled = true;
716}
717
718static void amd_perf_ctl_reset(unsigned int cpu)
719{
720	wrmsrl_on_cpu(cpu, MSR_AMD_PERF_CTL, 0);
721}
722
723/*
724 * Set amd-pstate preferred core enable can't be done directly from cpufreq callbacks
725 * due to locking, so queue the work for later.
726 */
727static void amd_pstste_sched_prefcore_workfn(struct work_struct *work)
728{
729	sched_set_itmt_support();
730}
731static DECLARE_WORK(sched_prefcore_work, amd_pstste_sched_prefcore_workfn);
732
733/*
734 * Get the highest performance register value.
735 * @cpu: CPU from which to get highest performance.
736 * @highest_perf: Return address.
737 *
738 * Return: 0 for success, -EIO otherwise.
739 */
740static int amd_pstate_get_highest_perf(int cpu, u32 *highest_perf)
741{
742	int ret;
743
744	if (boot_cpu_has(X86_FEATURE_CPPC)) {
745		u64 cap1;
746
747		ret = rdmsrl_safe_on_cpu(cpu, MSR_AMD_CPPC_CAP1, &cap1);
748		if (ret)
749			return ret;
750		WRITE_ONCE(*highest_perf, AMD_CPPC_HIGHEST_PERF(cap1));
751	} else {
752		u64 cppc_highest_perf;
753
754		ret = cppc_get_highest_perf(cpu, &cppc_highest_perf);
755		if (ret)
756			return ret;
757		WRITE_ONCE(*highest_perf, cppc_highest_perf);
758	}
759
760	return (ret);
761}
762
763#define CPPC_MAX_PERF	U8_MAX
764
765static void amd_pstate_init_prefcore(struct amd_cpudata *cpudata)
766{
767	int ret, prio;
768	u32 highest_perf;
769
770	ret = amd_pstate_get_highest_perf(cpudata->cpu, &highest_perf);
771	if (ret)
772		return;
773
774	cpudata->hw_prefcore = true;
775	/* check if CPPC preferred core feature is enabled*/
776	if (highest_perf < CPPC_MAX_PERF)
777		prio = (int)highest_perf;
778	else {
779		pr_debug("AMD CPPC preferred core is unsupported!\n");
780		cpudata->hw_prefcore = false;
781		return;
782	}
783
784	if (!amd_pstate_prefcore)
785		return;
786
787	/*
788	 * The priorities can be set regardless of whether or not
789	 * sched_set_itmt_support(true) has been called and it is valid to
790	 * update them at any time after it has been called.
791	 */
792	sched_set_itmt_core_prio(prio, cpudata->cpu);
793
794	schedule_work(&sched_prefcore_work);
795}
796
797static void amd_pstate_update_limits(unsigned int cpu)
798{
799	struct cpufreq_policy *policy = cpufreq_cpu_get(cpu);
800	struct amd_cpudata *cpudata = policy->driver_data;
801	u32 prev_high = 0, cur_high = 0;
802	int ret;
803	bool highest_perf_changed = false;
804
805	mutex_lock(&amd_pstate_driver_lock);
806	if ((!amd_pstate_prefcore) || (!cpudata->hw_prefcore))
807		goto free_cpufreq_put;
808
809	ret = amd_pstate_get_highest_perf(cpu, &cur_high);
810	if (ret)
811		goto free_cpufreq_put;
812
813	prev_high = READ_ONCE(cpudata->prefcore_ranking);
814	if (prev_high != cur_high) {
815		highest_perf_changed = true;
816		WRITE_ONCE(cpudata->prefcore_ranking, cur_high);
817
818		if (cur_high < CPPC_MAX_PERF)
819			sched_set_itmt_core_prio((int)cur_high, cpu);
820	}
821
822free_cpufreq_put:
823	cpufreq_cpu_put(policy);
824
825	if (!highest_perf_changed)
826		cpufreq_update_policy(cpu);
827
828	mutex_unlock(&amd_pstate_driver_lock);
829}
830
831static int amd_pstate_cpu_init(struct cpufreq_policy *policy)
832{
833	int min_freq, max_freq, nominal_freq, lowest_nonlinear_freq, ret;
834	struct device *dev;
835	struct amd_cpudata *cpudata;
836
837	/*
838	 * Resetting PERF_CTL_MSR will put the CPU in P0 frequency,
839	 * which is ideal for initialization process.
840	 */
841	amd_perf_ctl_reset(policy->cpu);
842	dev = get_cpu_device(policy->cpu);
843	if (!dev)
844		return -ENODEV;
845
846	cpudata = kzalloc(sizeof(*cpudata), GFP_KERNEL);
847	if (!cpudata)
848		return -ENOMEM;
849
850	cpudata->cpu = policy->cpu;
851
852	amd_pstate_init_prefcore(cpudata);
853
854	ret = amd_pstate_init_perf(cpudata);
855	if (ret)
856		goto free_cpudata1;
857
858	min_freq = amd_get_min_freq(cpudata);
859	max_freq = amd_get_max_freq(cpudata);
860	nominal_freq = amd_get_nominal_freq(cpudata);
861	lowest_nonlinear_freq = amd_get_lowest_nonlinear_freq(cpudata);
862
863	if (min_freq < 0 || max_freq < 0 || min_freq > max_freq) {
864		dev_err(dev, "min_freq(%d) or max_freq(%d) value is incorrect\n",
865			min_freq, max_freq);
866		ret = -EINVAL;
867		goto free_cpudata1;
868	}
869
870	policy->cpuinfo.transition_latency = AMD_PSTATE_TRANSITION_LATENCY;
871	policy->transition_delay_us = AMD_PSTATE_TRANSITION_DELAY;
872
873	policy->min = min_freq;
874	policy->max = max_freq;
875
876	policy->cpuinfo.min_freq = min_freq;
877	policy->cpuinfo.max_freq = max_freq;
878
879	/* It will be updated by governor */
880	policy->cur = policy->cpuinfo.min_freq;
881
882	if (boot_cpu_has(X86_FEATURE_CPPC))
883		policy->fast_switch_possible = true;
884
885	ret = freq_qos_add_request(&policy->constraints, &cpudata->req[0],
886				   FREQ_QOS_MIN, policy->cpuinfo.min_freq);
887	if (ret < 0) {
888		dev_err(dev, "Failed to add min-freq constraint (%d)\n", ret);
889		goto free_cpudata1;
890	}
891
892	ret = freq_qos_add_request(&policy->constraints, &cpudata->req[1],
893				   FREQ_QOS_MAX, policy->cpuinfo.max_freq);
894	if (ret < 0) {
895		dev_err(dev, "Failed to add max-freq constraint (%d)\n", ret);
896		goto free_cpudata2;
897	}
898
899	/* Initial processor data capability frequencies */
900	cpudata->max_freq = max_freq;
901	cpudata->min_freq = min_freq;
902	cpudata->max_limit_freq = max_freq;
903	cpudata->min_limit_freq = min_freq;
904	cpudata->nominal_freq = nominal_freq;
905	cpudata->lowest_nonlinear_freq = lowest_nonlinear_freq;
906
907	policy->driver_data = cpudata;
908
909	amd_pstate_boost_init(cpudata);
910	if (!current_pstate_driver->adjust_perf)
911		current_pstate_driver->adjust_perf = amd_pstate_adjust_perf;
912
913	return 0;
914
915free_cpudata2:
916	freq_qos_remove_request(&cpudata->req[0]);
917free_cpudata1:
918	kfree(cpudata);
919	return ret;
920}
921
922static int amd_pstate_cpu_exit(struct cpufreq_policy *policy)
923{
924	struct amd_cpudata *cpudata = policy->driver_data;
925
926	freq_qos_remove_request(&cpudata->req[1]);
927	freq_qos_remove_request(&cpudata->req[0]);
928	policy->fast_switch_possible = false;
929	kfree(cpudata);
930
931	return 0;
932}
933
934static int amd_pstate_cpu_resume(struct cpufreq_policy *policy)
935{
936	int ret;
937
938	ret = amd_pstate_enable(true);
939	if (ret)
940		pr_err("failed to enable amd-pstate during resume, return %d\n", ret);
941
942	return ret;
943}
944
945static int amd_pstate_cpu_suspend(struct cpufreq_policy *policy)
946{
947	int ret;
948
949	ret = amd_pstate_enable(false);
950	if (ret)
951		pr_err("failed to disable amd-pstate during suspend, return %d\n", ret);
952
953	return ret;
954}
955
956/* Sysfs attributes */
957
958/*
959 * This frequency is to indicate the maximum hardware frequency.
960 * If boost is not active but supported, the frequency will be larger than the
961 * one in cpuinfo.
962 */
963static ssize_t show_amd_pstate_max_freq(struct cpufreq_policy *policy,
964					char *buf)
965{
966	int max_freq;
967	struct amd_cpudata *cpudata = policy->driver_data;
968
969	max_freq = amd_get_max_freq(cpudata);
970	if (max_freq < 0)
971		return max_freq;
972
973	return sysfs_emit(buf, "%u\n", max_freq);
974}
975
976static ssize_t show_amd_pstate_lowest_nonlinear_freq(struct cpufreq_policy *policy,
977						     char *buf)
978{
979	int freq;
980	struct amd_cpudata *cpudata = policy->driver_data;
981
982	freq = amd_get_lowest_nonlinear_freq(cpudata);
983	if (freq < 0)
984		return freq;
985
986	return sysfs_emit(buf, "%u\n", freq);
987}
988
989/*
990 * In some of ASICs, the highest_perf is not the one in the _CPC table, so we
991 * need to expose it to sysfs.
992 */
993static ssize_t show_amd_pstate_highest_perf(struct cpufreq_policy *policy,
994					    char *buf)
995{
996	u32 perf;
997	struct amd_cpudata *cpudata = policy->driver_data;
998
999	perf = READ_ONCE(cpudata->highest_perf);
1000
1001	return sysfs_emit(buf, "%u\n", perf);
1002}
1003
1004static ssize_t show_amd_pstate_prefcore_ranking(struct cpufreq_policy *policy,
1005						char *buf)
1006{
1007	u32 perf;
1008	struct amd_cpudata *cpudata = policy->driver_data;
1009
1010	perf = READ_ONCE(cpudata->prefcore_ranking);
1011
1012	return sysfs_emit(buf, "%u\n", perf);
1013}
1014
1015static ssize_t show_amd_pstate_hw_prefcore(struct cpufreq_policy *policy,
1016					   char *buf)
1017{
1018	bool hw_prefcore;
1019	struct amd_cpudata *cpudata = policy->driver_data;
1020
1021	hw_prefcore = READ_ONCE(cpudata->hw_prefcore);
1022
1023	return sysfs_emit(buf, "%s\n", str_enabled_disabled(hw_prefcore));
1024}
1025
1026static ssize_t show_energy_performance_available_preferences(
1027				struct cpufreq_policy *policy, char *buf)
1028{
1029	int i = 0;
1030	int offset = 0;
1031	struct amd_cpudata *cpudata = policy->driver_data;
1032
1033	if (cpudata->policy == CPUFREQ_POLICY_PERFORMANCE)
1034		return sysfs_emit_at(buf, offset, "%s\n",
1035				energy_perf_strings[EPP_INDEX_PERFORMANCE]);
1036
1037	while (energy_perf_strings[i] != NULL)
1038		offset += sysfs_emit_at(buf, offset, "%s ", energy_perf_strings[i++]);
1039
1040	offset += sysfs_emit_at(buf, offset, "\n");
1041
1042	return offset;
1043}
1044
1045static ssize_t store_energy_performance_preference(
1046		struct cpufreq_policy *policy, const char *buf, size_t count)
1047{
1048	struct amd_cpudata *cpudata = policy->driver_data;
1049	char str_preference[21];
1050	ssize_t ret;
1051
1052	ret = sscanf(buf, "%20s", str_preference);
1053	if (ret != 1)
1054		return -EINVAL;
1055
1056	ret = match_string(energy_perf_strings, -1, str_preference);
1057	if (ret < 0)
1058		return -EINVAL;
1059
1060	mutex_lock(&amd_pstate_limits_lock);
1061	ret = amd_pstate_set_energy_pref_index(cpudata, ret);
1062	mutex_unlock(&amd_pstate_limits_lock);
1063
1064	return ret ?: count;
1065}
1066
1067static ssize_t show_energy_performance_preference(
1068				struct cpufreq_policy *policy, char *buf)
1069{
1070	struct amd_cpudata *cpudata = policy->driver_data;
1071	int preference;
1072
1073	preference = amd_pstate_get_energy_pref_index(cpudata);
1074	if (preference < 0)
1075		return preference;
1076
1077	return sysfs_emit(buf, "%s\n", energy_perf_strings[preference]);
1078}
1079
1080static void amd_pstate_driver_cleanup(void)
1081{
1082	amd_pstate_enable(false);
1083	cppc_state = AMD_PSTATE_DISABLE;
1084	current_pstate_driver = NULL;
1085}
1086
1087static int amd_pstate_register_driver(int mode)
1088{
1089	int ret;
1090
1091	if (mode == AMD_PSTATE_PASSIVE || mode == AMD_PSTATE_GUIDED)
1092		current_pstate_driver = &amd_pstate_driver;
1093	else if (mode == AMD_PSTATE_ACTIVE)
1094		current_pstate_driver = &amd_pstate_epp_driver;
1095	else
1096		return -EINVAL;
1097
1098	cppc_state = mode;
1099	ret = cpufreq_register_driver(current_pstate_driver);
1100	if (ret) {
1101		amd_pstate_driver_cleanup();
1102		return ret;
1103	}
1104	return 0;
1105}
1106
1107static int amd_pstate_unregister_driver(int dummy)
1108{
1109	cpufreq_unregister_driver(current_pstate_driver);
1110	amd_pstate_driver_cleanup();
1111	return 0;
1112}
1113
1114static int amd_pstate_change_mode_without_dvr_change(int mode)
1115{
1116	int cpu = 0;
1117
1118	cppc_state = mode;
1119
1120	if (boot_cpu_has(X86_FEATURE_CPPC) || cppc_state == AMD_PSTATE_ACTIVE)
1121		return 0;
1122
1123	for_each_present_cpu(cpu) {
1124		cppc_set_auto_sel(cpu, (cppc_state == AMD_PSTATE_PASSIVE) ? 0 : 1);
1125	}
1126
1127	return 0;
1128}
1129
1130static int amd_pstate_change_driver_mode(int mode)
1131{
1132	int ret;
1133
1134	ret = amd_pstate_unregister_driver(0);
1135	if (ret)
1136		return ret;
1137
1138	ret = amd_pstate_register_driver(mode);
1139	if (ret)
1140		return ret;
1141
1142	return 0;
1143}
1144
1145static cppc_mode_transition_fn mode_state_machine[AMD_PSTATE_MAX][AMD_PSTATE_MAX] = {
1146	[AMD_PSTATE_DISABLE]         = {
1147		[AMD_PSTATE_DISABLE]     = NULL,
1148		[AMD_PSTATE_PASSIVE]     = amd_pstate_register_driver,
1149		[AMD_PSTATE_ACTIVE]      = amd_pstate_register_driver,
1150		[AMD_PSTATE_GUIDED]      = amd_pstate_register_driver,
1151	},
1152	[AMD_PSTATE_PASSIVE]         = {
1153		[AMD_PSTATE_DISABLE]     = amd_pstate_unregister_driver,
1154		[AMD_PSTATE_PASSIVE]     = NULL,
1155		[AMD_PSTATE_ACTIVE]      = amd_pstate_change_driver_mode,
1156		[AMD_PSTATE_GUIDED]      = amd_pstate_change_mode_without_dvr_change,
1157	},
1158	[AMD_PSTATE_ACTIVE]          = {
1159		[AMD_PSTATE_DISABLE]     = amd_pstate_unregister_driver,
1160		[AMD_PSTATE_PASSIVE]     = amd_pstate_change_driver_mode,
1161		[AMD_PSTATE_ACTIVE]      = NULL,
1162		[AMD_PSTATE_GUIDED]      = amd_pstate_change_driver_mode,
1163	},
1164	[AMD_PSTATE_GUIDED]          = {
1165		[AMD_PSTATE_DISABLE]     = amd_pstate_unregister_driver,
1166		[AMD_PSTATE_PASSIVE]     = amd_pstate_change_mode_without_dvr_change,
1167		[AMD_PSTATE_ACTIVE]      = amd_pstate_change_driver_mode,
1168		[AMD_PSTATE_GUIDED]      = NULL,
1169	},
1170};
1171
1172static ssize_t amd_pstate_show_status(char *buf)
1173{
1174	if (!current_pstate_driver)
1175		return sysfs_emit(buf, "disable\n");
1176
1177	return sysfs_emit(buf, "%s\n", amd_pstate_mode_string[cppc_state]);
1178}
1179
1180static int amd_pstate_update_status(const char *buf, size_t size)
1181{
1182	int mode_idx;
1183
1184	if (size > strlen("passive") || size < strlen("active"))
1185		return -EINVAL;
1186
1187	mode_idx = get_mode_idx_from_str(buf, size);
1188
1189	if (mode_idx < 0 || mode_idx >= AMD_PSTATE_MAX)
1190		return -EINVAL;
1191
1192	if (mode_state_machine[cppc_state][mode_idx])
1193		return mode_state_machine[cppc_state][mode_idx](mode_idx);
1194
1195	return 0;
1196}
1197
1198static ssize_t status_show(struct device *dev,
1199			   struct device_attribute *attr, char *buf)
1200{
1201	ssize_t ret;
1202
1203	mutex_lock(&amd_pstate_driver_lock);
1204	ret = amd_pstate_show_status(buf);
1205	mutex_unlock(&amd_pstate_driver_lock);
1206
1207	return ret;
1208}
1209
1210static ssize_t status_store(struct device *a, struct device_attribute *b,
1211			    const char *buf, size_t count)
1212{
1213	char *p = memchr(buf, '\n', count);
1214	int ret;
1215
1216	mutex_lock(&amd_pstate_driver_lock);
1217	ret = amd_pstate_update_status(buf, p ? p - buf : count);
1218	mutex_unlock(&amd_pstate_driver_lock);
1219
1220	return ret < 0 ? ret : count;
1221}
1222
1223static ssize_t prefcore_show(struct device *dev,
1224			     struct device_attribute *attr, char *buf)
1225{
1226	return sysfs_emit(buf, "%s\n", str_enabled_disabled(amd_pstate_prefcore));
1227}
1228
1229cpufreq_freq_attr_ro(amd_pstate_max_freq);
1230cpufreq_freq_attr_ro(amd_pstate_lowest_nonlinear_freq);
1231
1232cpufreq_freq_attr_ro(amd_pstate_highest_perf);
1233cpufreq_freq_attr_ro(amd_pstate_prefcore_ranking);
1234cpufreq_freq_attr_ro(amd_pstate_hw_prefcore);
1235cpufreq_freq_attr_rw(energy_performance_preference);
1236cpufreq_freq_attr_ro(energy_performance_available_preferences);
1237static DEVICE_ATTR_RW(status);
1238static DEVICE_ATTR_RO(prefcore);
1239
1240static struct freq_attr *amd_pstate_attr[] = {
1241	&amd_pstate_max_freq,
1242	&amd_pstate_lowest_nonlinear_freq,
1243	&amd_pstate_highest_perf,
1244	&amd_pstate_prefcore_ranking,
1245	&amd_pstate_hw_prefcore,
1246	NULL,
1247};
1248
1249static struct freq_attr *amd_pstate_epp_attr[] = {
1250	&amd_pstate_max_freq,
1251	&amd_pstate_lowest_nonlinear_freq,
1252	&amd_pstate_highest_perf,
1253	&amd_pstate_prefcore_ranking,
1254	&amd_pstate_hw_prefcore,
1255	&energy_performance_preference,
1256	&energy_performance_available_preferences,
1257	NULL,
1258};
1259
1260static struct attribute *pstate_global_attributes[] = {
1261	&dev_attr_status.attr,
1262	&dev_attr_prefcore.attr,
1263	NULL
1264};
1265
1266static const struct attribute_group amd_pstate_global_attr_group = {
1267	.name = "amd_pstate",
1268	.attrs = pstate_global_attributes,
1269};
1270
1271static bool amd_pstate_acpi_pm_profile_server(void)
1272{
1273	switch (acpi_gbl_FADT.preferred_profile) {
1274	case PM_ENTERPRISE_SERVER:
1275	case PM_SOHO_SERVER:
1276	case PM_PERFORMANCE_SERVER:
1277		return true;
1278	}
1279	return false;
1280}
1281
1282static bool amd_pstate_acpi_pm_profile_undefined(void)
1283{
1284	if (acpi_gbl_FADT.preferred_profile == PM_UNSPECIFIED)
1285		return true;
1286	if (acpi_gbl_FADT.preferred_profile >= NR_PM_PROFILES)
1287		return true;
1288	return false;
1289}
1290
1291static int amd_pstate_epp_cpu_init(struct cpufreq_policy *policy)
1292{
1293	int min_freq, max_freq, nominal_freq, lowest_nonlinear_freq, ret;
1294	struct amd_cpudata *cpudata;
1295	struct device *dev;
1296	u64 value;
1297
1298	/*
1299	 * Resetting PERF_CTL_MSR will put the CPU in P0 frequency,
1300	 * which is ideal for initialization process.
1301	 */
1302	amd_perf_ctl_reset(policy->cpu);
1303	dev = get_cpu_device(policy->cpu);
1304	if (!dev)
1305		return -ENODEV;
1306
1307	cpudata = kzalloc(sizeof(*cpudata), GFP_KERNEL);
1308	if (!cpudata)
1309		return -ENOMEM;
1310
1311	cpudata->cpu = policy->cpu;
1312	cpudata->epp_policy = 0;
1313
1314	amd_pstate_init_prefcore(cpudata);
1315
1316	ret = amd_pstate_init_perf(cpudata);
1317	if (ret)
1318		goto free_cpudata1;
1319
1320	min_freq = amd_get_min_freq(cpudata);
1321	max_freq = amd_get_max_freq(cpudata);
1322	nominal_freq = amd_get_nominal_freq(cpudata);
1323	lowest_nonlinear_freq = amd_get_lowest_nonlinear_freq(cpudata);
1324	if (min_freq < 0 || max_freq < 0 || min_freq > max_freq) {
1325		dev_err(dev, "min_freq(%d) or max_freq(%d) value is incorrect\n",
1326				min_freq, max_freq);
1327		ret = -EINVAL;
1328		goto free_cpudata1;
1329	}
1330
1331	policy->cpuinfo.min_freq = min_freq;
1332	policy->cpuinfo.max_freq = max_freq;
1333	/* It will be updated by governor */
1334	policy->cur = policy->cpuinfo.min_freq;
1335
1336	/* Initial processor data capability frequencies */
1337	cpudata->max_freq = max_freq;
1338	cpudata->min_freq = min_freq;
1339	cpudata->nominal_freq = nominal_freq;
1340	cpudata->lowest_nonlinear_freq = lowest_nonlinear_freq;
1341
1342	policy->driver_data = cpudata;
1343
1344	cpudata->epp_cached = amd_pstate_get_epp(cpudata, 0);
1345
1346	policy->min = policy->cpuinfo.min_freq;
1347	policy->max = policy->cpuinfo.max_freq;
1348
1349	/*
1350	 * Set the policy to provide a valid fallback value in case
1351	 * the default cpufreq governor is neither powersave nor performance.
1352	 */
1353	if (amd_pstate_acpi_pm_profile_server() ||
1354	    amd_pstate_acpi_pm_profile_undefined())
1355		policy->policy = CPUFREQ_POLICY_PERFORMANCE;
1356	else
1357		policy->policy = CPUFREQ_POLICY_POWERSAVE;
1358
1359	if (boot_cpu_has(X86_FEATURE_CPPC)) {
1360		ret = rdmsrl_on_cpu(cpudata->cpu, MSR_AMD_CPPC_REQ, &value);
1361		if (ret)
1362			return ret;
1363		WRITE_ONCE(cpudata->cppc_req_cached, value);
1364
1365		ret = rdmsrl_on_cpu(cpudata->cpu, MSR_AMD_CPPC_CAP1, &value);
1366		if (ret)
1367			return ret;
1368		WRITE_ONCE(cpudata->cppc_cap1_cached, value);
1369	}
1370	amd_pstate_boost_init(cpudata);
1371
1372	return 0;
1373
1374free_cpudata1:
1375	kfree(cpudata);
1376	return ret;
1377}
1378
1379static int amd_pstate_epp_cpu_exit(struct cpufreq_policy *policy)
1380{
1381	pr_debug("CPU %d exiting\n", policy->cpu);
1382	return 0;
1383}
1384
1385static void amd_pstate_epp_update_limit(struct cpufreq_policy *policy)
1386{
1387	struct amd_cpudata *cpudata = policy->driver_data;
1388	u32 max_perf, min_perf, min_limit_perf, max_limit_perf;
1389	u64 value;
1390	s16 epp;
1391
1392	max_perf = READ_ONCE(cpudata->highest_perf);
1393	min_perf = READ_ONCE(cpudata->lowest_perf);
1394	max_limit_perf = div_u64(policy->max * cpudata->highest_perf, cpudata->max_freq);
1395	min_limit_perf = div_u64(policy->min * cpudata->highest_perf, cpudata->max_freq);
1396
1397	if (min_limit_perf < min_perf)
1398		min_limit_perf = min_perf;
1399
1400	if (max_limit_perf < min_limit_perf)
1401		max_limit_perf = min_limit_perf;
1402
1403	WRITE_ONCE(cpudata->max_limit_perf, max_limit_perf);
1404	WRITE_ONCE(cpudata->min_limit_perf, min_limit_perf);
1405
1406	max_perf = clamp_t(unsigned long, max_perf, cpudata->min_limit_perf,
1407			cpudata->max_limit_perf);
1408	min_perf = clamp_t(unsigned long, min_perf, cpudata->min_limit_perf,
1409			cpudata->max_limit_perf);
1410	value = READ_ONCE(cpudata->cppc_req_cached);
1411
1412	if (cpudata->policy == CPUFREQ_POLICY_PERFORMANCE)
1413		min_perf = max_perf;
1414
1415	/* Initial min/max values for CPPC Performance Controls Register */
1416	value &= ~AMD_CPPC_MIN_PERF(~0L);
1417	value |= AMD_CPPC_MIN_PERF(min_perf);
1418
1419	value &= ~AMD_CPPC_MAX_PERF(~0L);
1420	value |= AMD_CPPC_MAX_PERF(max_perf);
1421
1422	/* CPPC EPP feature require to set zero to the desire perf bit */
1423	value &= ~AMD_CPPC_DES_PERF(~0L);
1424	value |= AMD_CPPC_DES_PERF(0);
1425
1426	cpudata->epp_policy = cpudata->policy;
1427
1428	/* Get BIOS pre-defined epp value */
1429	epp = amd_pstate_get_epp(cpudata, value);
1430	if (epp < 0) {
1431		/**
1432		 * This return value can only be negative for shared_memory
1433		 * systems where EPP register read/write not supported.
1434		 */
1435		return;
1436	}
1437
1438	if (cpudata->policy == CPUFREQ_POLICY_PERFORMANCE)
1439		epp = 0;
1440
1441	/* Set initial EPP value */
1442	if (boot_cpu_has(X86_FEATURE_CPPC)) {
1443		value &= ~GENMASK_ULL(31, 24);
1444		value |= (u64)epp << 24;
1445	}
1446
1447	WRITE_ONCE(cpudata->cppc_req_cached, value);
1448	amd_pstate_set_epp(cpudata, epp);
1449}
1450
1451static int amd_pstate_epp_set_policy(struct cpufreq_policy *policy)
1452{
1453	struct amd_cpudata *cpudata = policy->driver_data;
1454
1455	if (!policy->cpuinfo.max_freq)
1456		return -ENODEV;
1457
1458	pr_debug("set_policy: cpuinfo.max %u policy->max %u\n",
1459				policy->cpuinfo.max_freq, policy->max);
1460
1461	cpudata->policy = policy->policy;
1462
1463	amd_pstate_epp_update_limit(policy);
1464
1465	return 0;
1466}
1467
1468static void amd_pstate_epp_reenable(struct amd_cpudata *cpudata)
1469{
1470	struct cppc_perf_ctrls perf_ctrls;
1471	u64 value, max_perf;
1472	int ret;
1473
1474	ret = amd_pstate_enable(true);
1475	if (ret)
1476		pr_err("failed to enable amd pstate during resume, return %d\n", ret);
1477
1478	value = READ_ONCE(cpudata->cppc_req_cached);
1479	max_perf = READ_ONCE(cpudata->highest_perf);
1480
1481	if (boot_cpu_has(X86_FEATURE_CPPC)) {
1482		wrmsrl_on_cpu(cpudata->cpu, MSR_AMD_CPPC_REQ, value);
1483	} else {
1484		perf_ctrls.max_perf = max_perf;
1485		perf_ctrls.energy_perf = AMD_CPPC_ENERGY_PERF_PREF(cpudata->epp_cached);
1486		cppc_set_perf(cpudata->cpu, &perf_ctrls);
1487	}
1488}
1489
1490static int amd_pstate_epp_cpu_online(struct cpufreq_policy *policy)
1491{
1492	struct amd_cpudata *cpudata = policy->driver_data;
1493
1494	pr_debug("AMD CPU Core %d going online\n", cpudata->cpu);
1495
1496	if (cppc_state == AMD_PSTATE_ACTIVE) {
1497		amd_pstate_epp_reenable(cpudata);
1498		cpudata->suspended = false;
1499	}
1500
1501	return 0;
1502}
1503
1504static void amd_pstate_epp_offline(struct cpufreq_policy *policy)
1505{
1506	struct amd_cpudata *cpudata = policy->driver_data;
1507	struct cppc_perf_ctrls perf_ctrls;
1508	int min_perf;
1509	u64 value;
1510
1511	min_perf = READ_ONCE(cpudata->lowest_perf);
1512	value = READ_ONCE(cpudata->cppc_req_cached);
1513
1514	mutex_lock(&amd_pstate_limits_lock);
1515	if (boot_cpu_has(X86_FEATURE_CPPC)) {
1516		cpudata->epp_policy = CPUFREQ_POLICY_UNKNOWN;
1517
1518		/* Set max perf same as min perf */
1519		value &= ~AMD_CPPC_MAX_PERF(~0L);
1520		value |= AMD_CPPC_MAX_PERF(min_perf);
1521		value &= ~AMD_CPPC_MIN_PERF(~0L);
1522		value |= AMD_CPPC_MIN_PERF(min_perf);
1523		wrmsrl_on_cpu(cpudata->cpu, MSR_AMD_CPPC_REQ, value);
1524	} else {
1525		perf_ctrls.desired_perf = 0;
1526		perf_ctrls.max_perf = min_perf;
1527		perf_ctrls.energy_perf = AMD_CPPC_ENERGY_PERF_PREF(HWP_EPP_BALANCE_POWERSAVE);
1528		cppc_set_perf(cpudata->cpu, &perf_ctrls);
1529	}
1530	mutex_unlock(&amd_pstate_limits_lock);
1531}
1532
1533static int amd_pstate_epp_cpu_offline(struct cpufreq_policy *policy)
1534{
1535	struct amd_cpudata *cpudata = policy->driver_data;
1536
1537	pr_debug("AMD CPU Core %d going offline\n", cpudata->cpu);
1538
1539	if (cpudata->suspended)
1540		return 0;
1541
1542	if (cppc_state == AMD_PSTATE_ACTIVE)
1543		amd_pstate_epp_offline(policy);
1544
1545	return 0;
1546}
1547
1548static int amd_pstate_epp_verify_policy(struct cpufreq_policy_data *policy)
1549{
1550	cpufreq_verify_within_cpu_limits(policy);
1551	pr_debug("policy_max =%d, policy_min=%d\n", policy->max, policy->min);
1552	return 0;
1553}
1554
1555static int amd_pstate_epp_suspend(struct cpufreq_policy *policy)
1556{
1557	struct amd_cpudata *cpudata = policy->driver_data;
1558	int ret;
1559
1560	/* avoid suspending when EPP is not enabled */
1561	if (cppc_state != AMD_PSTATE_ACTIVE)
1562		return 0;
1563
1564	/* set this flag to avoid setting core offline*/
1565	cpudata->suspended = true;
1566
1567	/* disable CPPC in lowlevel firmware */
1568	ret = amd_pstate_enable(false);
1569	if (ret)
1570		pr_err("failed to suspend, return %d\n", ret);
1571
1572	return 0;
1573}
1574
1575static int amd_pstate_epp_resume(struct cpufreq_policy *policy)
1576{
1577	struct amd_cpudata *cpudata = policy->driver_data;
1578
1579	if (cpudata->suspended) {
1580		mutex_lock(&amd_pstate_limits_lock);
1581
1582		/* enable amd pstate from suspend state*/
1583		amd_pstate_epp_reenable(cpudata);
1584
1585		mutex_unlock(&amd_pstate_limits_lock);
1586
1587		cpudata->suspended = false;
1588	}
1589
1590	return 0;
1591}
1592
1593static struct cpufreq_driver amd_pstate_driver = {
1594	.flags		= CPUFREQ_CONST_LOOPS | CPUFREQ_NEED_UPDATE_LIMITS,
1595	.verify		= amd_pstate_verify,
1596	.target		= amd_pstate_target,
1597	.fast_switch    = amd_pstate_fast_switch,
1598	.init		= amd_pstate_cpu_init,
1599	.exit		= amd_pstate_cpu_exit,
1600	.suspend	= amd_pstate_cpu_suspend,
1601	.resume		= amd_pstate_cpu_resume,
1602	.set_boost	= amd_pstate_set_boost,
1603	.update_limits	= amd_pstate_update_limits,
1604	.name		= "amd-pstate",
1605	.attr		= amd_pstate_attr,
1606};
1607
1608static struct cpufreq_driver amd_pstate_epp_driver = {
1609	.flags		= CPUFREQ_CONST_LOOPS,
1610	.verify		= amd_pstate_epp_verify_policy,
1611	.setpolicy	= amd_pstate_epp_set_policy,
1612	.init		= amd_pstate_epp_cpu_init,
1613	.exit		= amd_pstate_epp_cpu_exit,
1614	.offline	= amd_pstate_epp_cpu_offline,
1615	.online		= amd_pstate_epp_cpu_online,
1616	.suspend	= amd_pstate_epp_suspend,
1617	.resume		= amd_pstate_epp_resume,
1618	.update_limits	= amd_pstate_update_limits,
1619	.name		= "amd-pstate-epp",
1620	.attr		= amd_pstate_epp_attr,
1621};
1622
1623static int __init amd_pstate_set_driver(int mode_idx)
1624{
1625	if (mode_idx >= AMD_PSTATE_DISABLE && mode_idx < AMD_PSTATE_MAX) {
1626		cppc_state = mode_idx;
1627		if (cppc_state == AMD_PSTATE_DISABLE)
1628			pr_info("driver is explicitly disabled\n");
1629
1630		if (cppc_state == AMD_PSTATE_ACTIVE)
1631			current_pstate_driver = &amd_pstate_epp_driver;
1632
1633		if (cppc_state == AMD_PSTATE_PASSIVE || cppc_state == AMD_PSTATE_GUIDED)
1634			current_pstate_driver = &amd_pstate_driver;
1635
1636		return 0;
1637	}
1638
1639	return -EINVAL;
1640}
1641
1642static int __init amd_pstate_init(void)
1643{
1644	struct device *dev_root;
1645	int ret;
1646
1647	if (boot_cpu_data.x86_vendor != X86_VENDOR_AMD)
1648		return -ENODEV;
1649
1650	if (!acpi_cpc_valid()) {
1651		pr_warn_once("the _CPC object is not present in SBIOS or ACPI disabled\n");
1652		return -ENODEV;
1653	}
1654
1655	/* don't keep reloading if cpufreq_driver exists */
1656	if (cpufreq_get_current_driver())
1657		return -EEXIST;
1658
1659	switch (cppc_state) {
1660	case AMD_PSTATE_UNDEFINED:
1661		/* Disable on the following configs by default:
1662		 * 1. Undefined platforms
1663		 * 2. Server platforms
1664		 * 3. Shared memory designs
1665		 */
1666		if (amd_pstate_acpi_pm_profile_undefined() ||
1667		    amd_pstate_acpi_pm_profile_server() ||
1668		    !boot_cpu_has(X86_FEATURE_CPPC)) {
1669			pr_info("driver load is disabled, boot with specific mode to enable this\n");
1670			return -ENODEV;
1671		}
1672		ret = amd_pstate_set_driver(CONFIG_X86_AMD_PSTATE_DEFAULT_MODE);
1673		if (ret)
1674			return ret;
1675		break;
1676	case AMD_PSTATE_DISABLE:
1677		return -ENODEV;
1678	case AMD_PSTATE_PASSIVE:
1679	case AMD_PSTATE_ACTIVE:
1680	case AMD_PSTATE_GUIDED:
1681		break;
1682	default:
1683		return -EINVAL;
1684	}
1685
1686	/* capability check */
1687	if (boot_cpu_has(X86_FEATURE_CPPC)) {
1688		pr_debug("AMD CPPC MSR based functionality is supported\n");
1689		if (cppc_state != AMD_PSTATE_ACTIVE)
1690			current_pstate_driver->adjust_perf = amd_pstate_adjust_perf;
1691	} else {
1692		pr_debug("AMD CPPC shared memory based functionality is supported\n");
1693		static_call_update(amd_pstate_enable, cppc_enable);
1694		static_call_update(amd_pstate_init_perf, cppc_init_perf);
1695		static_call_update(amd_pstate_update_perf, cppc_update_perf);
1696	}
1697
1698	/* enable amd pstate feature */
1699	ret = amd_pstate_enable(true);
1700	if (ret) {
1701		pr_err("failed to enable with return %d\n", ret);
1702		return ret;
1703	}
1704
1705	ret = cpufreq_register_driver(current_pstate_driver);
1706	if (ret)
1707		pr_err("failed to register with return %d\n", ret);
1708
1709	dev_root = bus_get_dev_root(&cpu_subsys);
1710	if (dev_root) {
1711		ret = sysfs_create_group(&dev_root->kobj, &amd_pstate_global_attr_group);
1712		put_device(dev_root);
1713		if (ret) {
1714			pr_err("sysfs attribute export failed with error %d.\n", ret);
1715			goto global_attr_free;
1716		}
1717	}
1718
1719	return ret;
1720
1721global_attr_free:
1722	cpufreq_unregister_driver(current_pstate_driver);
1723	return ret;
1724}
1725device_initcall(amd_pstate_init);
1726
1727static int __init amd_pstate_param(char *str)
1728{
1729	size_t size;
1730	int mode_idx;
1731
1732	if (!str)
1733		return -EINVAL;
1734
1735	size = strlen(str);
1736	mode_idx = get_mode_idx_from_str(str, size);
1737
1738	return amd_pstate_set_driver(mode_idx);
1739}
1740
1741static int __init amd_prefcore_param(char *str)
1742{
1743	if (!strcmp(str, "disable"))
1744		amd_pstate_prefcore = false;
1745
1746	return 0;
1747}
1748
1749early_param("amd_pstate", amd_pstate_param);
1750early_param("amd_prefcore", amd_prefcore_param);
1751
1752MODULE_AUTHOR("Huang Rui <ray.huang@amd.com>");
1753MODULE_DESCRIPTION("AMD Processor P-state Frequency Driver");
1754