• Home
  • History
  • Annotate
  • Line#
  • Navigate
  • Raw
  • Download
  • only in /asuswrt-rt-n18u-9.0.0.4.380.2695/release/src-rt-6.x.4708/linux/linux-2.6.36/arch/x86/kernel/cpu/cpufreq/
1/*
2 *   (c) 2003-2010 Advanced Micro Devices, Inc.
3 *  Your use of this code is subject to the terms and conditions of the
4 *  GNU general public license version 2. See "COPYING" or
5 *  http://www.gnu.org/licenses/gpl.html
6 *
7 *  Support : mark.langsdorf@amd.com
8 *
9 *  Based on the powernow-k7.c module written by Dave Jones.
10 *  (C) 2003 Dave Jones on behalf of SuSE Labs
11 *  (C) 2004 Dominik Brodowski <linux@brodo.de>
12 *  (C) 2004 Pavel Machek <pavel@ucw.cz>
13 *  Licensed under the terms of the GNU GPL License version 2.
14 *  Based upon datasheets & sample CPUs kindly provided by AMD.
15 *
16 *  Valuable input gratefully received from Dave Jones, Pavel Machek,
17 *  Dominik Brodowski, Jacob Shin, and others.
18 *  Originally developed by Paul Devriendt.
19 *  Processor information obtained from Chapter 9 (Power and Thermal Management)
20 *  of the "BIOS and Kernel Developer's Guide for the AMD Athlon 64 and AMD
21 *  Opteron Processors" available for download from www.amd.com
22 *
23 *  Tables for specific CPUs can be inferred from
24 *     http://www.amd.com/us-en/assets/content_type/white_papers_and_tech_docs/30430.pdf
25 */
26
27#include <linux/kernel.h>
28#include <linux/smp.h>
29#include <linux/module.h>
30#include <linux/init.h>
31#include <linux/cpufreq.h>
32#include <linux/slab.h>
33#include <linux/string.h>
34#include <linux/cpumask.h>
35#include <linux/sched.h>	/* for current / set_cpus_allowed() */
36#include <linux/io.h>
37#include <linux/delay.h>
38
39#include <asm/msr.h>
40
41#include <linux/acpi.h>
42#include <linux/mutex.h>
43#include <acpi/processor.h>
44
45#define PFX "powernow-k8: "
46#define VERSION "version 2.20.00"
47#include "powernow-k8.h"
48#include "mperf.h"
49
50/* serialize freq changes  */
51static DEFINE_MUTEX(fidvid_mutex);
52
53static DEFINE_PER_CPU(struct powernow_k8_data *, powernow_data);
54
55static int cpu_family = CPU_OPTERON;
56
57/* core performance boost */
58static bool cpb_capable, cpb_enabled;
59static struct msr __percpu *msrs;
60
61static struct cpufreq_driver cpufreq_amd64_driver;
62
63#ifndef CONFIG_SMP
64static inline const struct cpumask *cpu_core_mask(int cpu)
65{
66	return cpumask_of(0);
67}
68#endif
69
70/* Return a frequency in MHz, given an input fid */
71static u32 find_freq_from_fid(u32 fid)
72{
73	return 800 + (fid * 100);
74}
75
76/* Return a frequency in KHz, given an input fid */
77static u32 find_khz_freq_from_fid(u32 fid)
78{
79	return 1000 * find_freq_from_fid(fid);
80}
81
82static u32 find_khz_freq_from_pstate(struct cpufreq_frequency_table *data,
83		u32 pstate)
84{
85	return data[pstate].frequency;
86}
87
88/* Return the vco fid for an input fid
89 *
90 * Each "low" fid has corresponding "high" fid, and you can get to "low" fids
91 * only from corresponding high fids. This returns "high" fid corresponding to
92 * "low" one.
93 */
94static u32 convert_fid_to_vco_fid(u32 fid)
95{
96	if (fid < HI_FID_TABLE_BOTTOM)
97		return 8 + (2 * fid);
98	else
99		return fid;
100}
101
102/*
103 * Return 1 if the pending bit is set. Unless we just instructed the processor
104 * to transition to a new state, seeing this bit set is really bad news.
105 */
106static int pending_bit_stuck(void)
107{
108	u32 lo, hi;
109
110	if (cpu_family == CPU_HW_PSTATE)
111		return 0;
112
113	rdmsr(MSR_FIDVID_STATUS, lo, hi);
114	return lo & MSR_S_LO_CHANGE_PENDING ? 1 : 0;
115}
116
117/*
118 * Update the global current fid / vid values from the status msr.
119 * Returns 1 on error.
120 */
121static int query_current_values_with_pending_wait(struct powernow_k8_data *data)
122{
123	u32 lo, hi;
124	u32 i = 0;
125
126	if (cpu_family == CPU_HW_PSTATE) {
127		rdmsr(MSR_PSTATE_STATUS, lo, hi);
128		i = lo & HW_PSTATE_MASK;
129		data->currpstate = i;
130
131		if ((boot_cpu_data.x86 == 0x11) && (i >= data->numps))
132			data->currpstate = HW_PSTATE_0;
133
134		return 0;
135	}
136	do {
137		if (i++ > 10000) {
138			dprintk("detected change pending stuck\n");
139			return 1;
140		}
141		rdmsr(MSR_FIDVID_STATUS, lo, hi);
142	} while (lo & MSR_S_LO_CHANGE_PENDING);
143
144	data->currvid = hi & MSR_S_HI_CURRENT_VID;
145	data->currfid = lo & MSR_S_LO_CURRENT_FID;
146
147	return 0;
148}
149
150/* the isochronous relief time */
151static void count_off_irt(struct powernow_k8_data *data)
152{
153	udelay((1 << data->irt) * 10);
154	return;
155}
156
157/* the voltage stabilization time */
158static void count_off_vst(struct powernow_k8_data *data)
159{
160	udelay(data->vstable * VST_UNITS_20US);
161	return;
162}
163
164/* need to init the control msr to a safe value (for each cpu) */
165static void fidvid_msr_init(void)
166{
167	u32 lo, hi;
168	u8 fid, vid;
169
170	rdmsr(MSR_FIDVID_STATUS, lo, hi);
171	vid = hi & MSR_S_HI_CURRENT_VID;
172	fid = lo & MSR_S_LO_CURRENT_FID;
173	lo = fid | (vid << MSR_C_LO_VID_SHIFT);
174	hi = MSR_C_HI_STP_GNT_BENIGN;
175	dprintk("cpu%d, init lo 0x%x, hi 0x%x\n", smp_processor_id(), lo, hi);
176	wrmsr(MSR_FIDVID_CTL, lo, hi);
177}
178
179/* write the new fid value along with the other control fields to the msr */
180static int write_new_fid(struct powernow_k8_data *data, u32 fid)
181{
182	u32 lo;
183	u32 savevid = data->currvid;
184	u32 i = 0;
185
186	if ((fid & INVALID_FID_MASK) || (data->currvid & INVALID_VID_MASK)) {
187		printk(KERN_ERR PFX "internal error - overflow on fid write\n");
188		return 1;
189	}
190
191	lo = fid;
192	lo |= (data->currvid << MSR_C_LO_VID_SHIFT);
193	lo |= MSR_C_LO_INIT_FID_VID;
194
195	dprintk("writing fid 0x%x, lo 0x%x, hi 0x%x\n",
196		fid, lo, data->plllock * PLL_LOCK_CONVERSION);
197
198	do {
199		wrmsr(MSR_FIDVID_CTL, lo, data->plllock * PLL_LOCK_CONVERSION);
200		if (i++ > 100) {
201			printk(KERN_ERR PFX
202				"Hardware error - pending bit very stuck - "
203				"no further pstate changes possible\n");
204			return 1;
205		}
206	} while (query_current_values_with_pending_wait(data));
207
208	count_off_irt(data);
209
210	if (savevid != data->currvid) {
211		printk(KERN_ERR PFX
212			"vid change on fid trans, old 0x%x, new 0x%x\n",
213			savevid, data->currvid);
214		return 1;
215	}
216
217	if (fid != data->currfid) {
218		printk(KERN_ERR PFX
219			"fid trans failed, fid 0x%x, curr 0x%x\n", fid,
220			data->currfid);
221		return 1;
222	}
223
224	return 0;
225}
226
227/* Write a new vid to the hardware */
228static int write_new_vid(struct powernow_k8_data *data, u32 vid)
229{
230	u32 lo;
231	u32 savefid = data->currfid;
232	int i = 0;
233
234	if ((data->currfid & INVALID_FID_MASK) || (vid & INVALID_VID_MASK)) {
235		printk(KERN_ERR PFX "internal error - overflow on vid write\n");
236		return 1;
237	}
238
239	lo = data->currfid;
240	lo |= (vid << MSR_C_LO_VID_SHIFT);
241	lo |= MSR_C_LO_INIT_FID_VID;
242
243	dprintk("writing vid 0x%x, lo 0x%x, hi 0x%x\n",
244		vid, lo, STOP_GRANT_5NS);
245
246	do {
247		wrmsr(MSR_FIDVID_CTL, lo, STOP_GRANT_5NS);
248		if (i++ > 100) {
249			printk(KERN_ERR PFX "internal error - pending bit "
250					"very stuck - no further pstate "
251					"changes possible\n");
252			return 1;
253		}
254	} while (query_current_values_with_pending_wait(data));
255
256	if (savefid != data->currfid) {
257		printk(KERN_ERR PFX "fid changed on vid trans, old "
258			"0x%x new 0x%x\n",
259		       savefid, data->currfid);
260		return 1;
261	}
262
263	if (vid != data->currvid) {
264		printk(KERN_ERR PFX "vid trans failed, vid 0x%x, "
265				"curr 0x%x\n",
266				vid, data->currvid);
267		return 1;
268	}
269
270	return 0;
271}
272
273/*
274 * Reduce the vid by the max of step or reqvid.
275 * Decreasing vid codes represent increasing voltages:
276 * vid of 0 is 1.550V, vid of 0x1e is 0.800V, vid of VID_OFF is off.
277 */
278static int decrease_vid_code_by_step(struct powernow_k8_data *data,
279		u32 reqvid, u32 step)
280{
281	if ((data->currvid - reqvid) > step)
282		reqvid = data->currvid - step;
283
284	if (write_new_vid(data, reqvid))
285		return 1;
286
287	count_off_vst(data);
288
289	return 0;
290}
291
292/* Change hardware pstate by single MSR write */
293static int transition_pstate(struct powernow_k8_data *data, u32 pstate)
294{
295	wrmsr(MSR_PSTATE_CTRL, pstate, 0);
296	data->currpstate = pstate;
297	return 0;
298}
299
300/* Change Opteron/Athlon64 fid and vid, by the 3 phases. */
301static int transition_fid_vid(struct powernow_k8_data *data,
302		u32 reqfid, u32 reqvid)
303{
304	if (core_voltage_pre_transition(data, reqvid, reqfid))
305		return 1;
306
307	if (core_frequency_transition(data, reqfid))
308		return 1;
309
310	if (core_voltage_post_transition(data, reqvid))
311		return 1;
312
313	if (query_current_values_with_pending_wait(data))
314		return 1;
315
316	if ((reqfid != data->currfid) || (reqvid != data->currvid)) {
317		printk(KERN_ERR PFX "failed (cpu%d): req 0x%x 0x%x, "
318				"curr 0x%x 0x%x\n",
319				smp_processor_id(),
320				reqfid, reqvid, data->currfid, data->currvid);
321		return 1;
322	}
323
324	dprintk("transitioned (cpu%d): new fid 0x%x, vid 0x%x\n",
325		smp_processor_id(), data->currfid, data->currvid);
326
327	return 0;
328}
329
330/* Phase 1 - core voltage transition ... setup voltage */
331static int core_voltage_pre_transition(struct powernow_k8_data *data,
332		u32 reqvid, u32 reqfid)
333{
334	u32 rvosteps = data->rvo;
335	u32 savefid = data->currfid;
336	u32 maxvid, lo, rvomult = 1;
337
338	dprintk("ph1 (cpu%d): start, currfid 0x%x, currvid 0x%x, "
339		"reqvid 0x%x, rvo 0x%x\n",
340		smp_processor_id(),
341		data->currfid, data->currvid, reqvid, data->rvo);
342
343	if ((savefid < LO_FID_TABLE_TOP) && (reqfid < LO_FID_TABLE_TOP))
344		rvomult = 2;
345	rvosteps *= rvomult;
346	rdmsr(MSR_FIDVID_STATUS, lo, maxvid);
347	maxvid = 0x1f & (maxvid >> 16);
348	dprintk("ph1 maxvid=0x%x\n", maxvid);
349	if (reqvid < maxvid) /* lower numbers are higher voltages */
350		reqvid = maxvid;
351
352	while (data->currvid > reqvid) {
353		dprintk("ph1: curr 0x%x, req vid 0x%x\n",
354			data->currvid, reqvid);
355		if (decrease_vid_code_by_step(data, reqvid, data->vidmvs))
356			return 1;
357	}
358
359	while ((rvosteps > 0) &&
360			((rvomult * data->rvo + data->currvid) > reqvid)) {
361		if (data->currvid == maxvid) {
362			rvosteps = 0;
363		} else {
364			dprintk("ph1: changing vid for rvo, req 0x%x\n",
365				data->currvid - 1);
366			if (decrease_vid_code_by_step(data, data->currvid-1, 1))
367				return 1;
368			rvosteps--;
369		}
370	}
371
372	if (query_current_values_with_pending_wait(data))
373		return 1;
374
375	if (savefid != data->currfid) {
376		printk(KERN_ERR PFX "ph1 err, currfid changed 0x%x\n",
377				data->currfid);
378		return 1;
379	}
380
381	dprintk("ph1 complete, currfid 0x%x, currvid 0x%x\n",
382		data->currfid, data->currvid);
383
384	return 0;
385}
386
387/* Phase 2 - core frequency transition */
388static int core_frequency_transition(struct powernow_k8_data *data, u32 reqfid)
389{
390	u32 vcoreqfid, vcocurrfid, vcofiddiff;
391	u32 fid_interval, savevid = data->currvid;
392
393	if (data->currfid == reqfid) {
394		printk(KERN_ERR PFX "ph2 null fid transition 0x%x\n",
395				data->currfid);
396		return 0;
397	}
398
399	dprintk("ph2 (cpu%d): starting, currfid 0x%x, currvid 0x%x, "
400		"reqfid 0x%x\n",
401		smp_processor_id(),
402		data->currfid, data->currvid, reqfid);
403
404	vcoreqfid = convert_fid_to_vco_fid(reqfid);
405	vcocurrfid = convert_fid_to_vco_fid(data->currfid);
406	vcofiddiff = vcocurrfid > vcoreqfid ? vcocurrfid - vcoreqfid
407	    : vcoreqfid - vcocurrfid;
408
409	if ((reqfid <= LO_FID_TABLE_TOP) && (data->currfid <= LO_FID_TABLE_TOP))
410		vcofiddiff = 0;
411
412	while (vcofiddiff > 2) {
413		(data->currfid & 1) ? (fid_interval = 1) : (fid_interval = 2);
414
415		if (reqfid > data->currfid) {
416			if (data->currfid > LO_FID_TABLE_TOP) {
417				if (write_new_fid(data,
418						data->currfid + fid_interval))
419					return 1;
420			} else {
421				if (write_new_fid
422				    (data,
423				     2 + convert_fid_to_vco_fid(data->currfid)))
424					return 1;
425			}
426		} else {
427			if (write_new_fid(data, data->currfid - fid_interval))
428				return 1;
429		}
430
431		vcocurrfid = convert_fid_to_vco_fid(data->currfid);
432		vcofiddiff = vcocurrfid > vcoreqfid ? vcocurrfid - vcoreqfid
433		    : vcoreqfid - vcocurrfid;
434	}
435
436	if (write_new_fid(data, reqfid))
437		return 1;
438
439	if (query_current_values_with_pending_wait(data))
440		return 1;
441
442	if (data->currfid != reqfid) {
443		printk(KERN_ERR PFX
444			"ph2: mismatch, failed fid transition, "
445			"curr 0x%x, req 0x%x\n",
446			data->currfid, reqfid);
447		return 1;
448	}
449
450	if (savevid != data->currvid) {
451		printk(KERN_ERR PFX "ph2: vid changed, save 0x%x, curr 0x%x\n",
452			savevid, data->currvid);
453		return 1;
454	}
455
456	dprintk("ph2 complete, currfid 0x%x, currvid 0x%x\n",
457		data->currfid, data->currvid);
458
459	return 0;
460}
461
462/* Phase 3 - core voltage transition flow ... jump to the final vid. */
463static int core_voltage_post_transition(struct powernow_k8_data *data,
464		u32 reqvid)
465{
466	u32 savefid = data->currfid;
467	u32 savereqvid = reqvid;
468
469	dprintk("ph3 (cpu%d): starting, currfid 0x%x, currvid 0x%x\n",
470		smp_processor_id(),
471		data->currfid, data->currvid);
472
473	if (reqvid != data->currvid) {
474		if (write_new_vid(data, reqvid))
475			return 1;
476
477		if (savefid != data->currfid) {
478			printk(KERN_ERR PFX
479			       "ph3: bad fid change, save 0x%x, curr 0x%x\n",
480			       savefid, data->currfid);
481			return 1;
482		}
483
484		if (data->currvid != reqvid) {
485			printk(KERN_ERR PFX
486			       "ph3: failed vid transition\n, "
487			       "req 0x%x, curr 0x%x",
488			       reqvid, data->currvid);
489			return 1;
490		}
491	}
492
493	if (query_current_values_with_pending_wait(data))
494		return 1;
495
496	if (savereqvid != data->currvid) {
497		dprintk("ph3 failed, currvid 0x%x\n", data->currvid);
498		return 1;
499	}
500
501	if (savefid != data->currfid) {
502		dprintk("ph3 failed, currfid changed 0x%x\n",
503			data->currfid);
504		return 1;
505	}
506
507	dprintk("ph3 complete, currfid 0x%x, currvid 0x%x\n",
508		data->currfid, data->currvid);
509
510	return 0;
511}
512
513static void check_supported_cpu(void *_rc)
514{
515	u32 eax, ebx, ecx, edx;
516	int *rc = _rc;
517
518	*rc = -ENODEV;
519
520	if (current_cpu_data.x86_vendor != X86_VENDOR_AMD)
521		return;
522
523	eax = cpuid_eax(CPUID_PROCESSOR_SIGNATURE);
524	if (((eax & CPUID_XFAM) != CPUID_XFAM_K8) &&
525	    ((eax & CPUID_XFAM) < CPUID_XFAM_10H))
526		return;
527
528	if ((eax & CPUID_XFAM) == CPUID_XFAM_K8) {
529		if (((eax & CPUID_USE_XFAM_XMOD) != CPUID_USE_XFAM_XMOD) ||
530		    ((eax & CPUID_XMOD) > CPUID_XMOD_REV_MASK)) {
531			printk(KERN_INFO PFX
532				"Processor cpuid %x not supported\n", eax);
533			return;
534		}
535
536		eax = cpuid_eax(CPUID_GET_MAX_CAPABILITIES);
537		if (eax < CPUID_FREQ_VOLT_CAPABILITIES) {
538			printk(KERN_INFO PFX
539			       "No frequency change capabilities detected\n");
540			return;
541		}
542
543		cpuid(CPUID_FREQ_VOLT_CAPABILITIES, &eax, &ebx, &ecx, &edx);
544		if ((edx & P_STATE_TRANSITION_CAPABLE)
545			!= P_STATE_TRANSITION_CAPABLE) {
546			printk(KERN_INFO PFX
547				"Power state transitions not supported\n");
548			return;
549		}
550	} else { /* must be a HW Pstate capable processor */
551		cpuid(CPUID_FREQ_VOLT_CAPABILITIES, &eax, &ebx, &ecx, &edx);
552		if ((edx & USE_HW_PSTATE) == USE_HW_PSTATE)
553			cpu_family = CPU_HW_PSTATE;
554		else
555			return;
556	}
557
558	*rc = 0;
559}
560
561static int check_pst_table(struct powernow_k8_data *data, struct pst_s *pst,
562		u8 maxvid)
563{
564	unsigned int j;
565	u8 lastfid = 0xff;
566
567	for (j = 0; j < data->numps; j++) {
568		if (pst[j].vid > LEAST_VID) {
569			printk(KERN_ERR FW_BUG PFX "vid %d invalid : 0x%x\n",
570			       j, pst[j].vid);
571			return -EINVAL;
572		}
573		if (pst[j].vid < data->rvo) {
574			/* vid + rvo >= 0 */
575			printk(KERN_ERR FW_BUG PFX "0 vid exceeded with pstate"
576			       " %d\n", j);
577			return -ENODEV;
578		}
579		if (pst[j].vid < maxvid + data->rvo) {
580			/* vid + rvo >= maxvid */
581			printk(KERN_ERR FW_BUG PFX "maxvid exceeded with pstate"
582			       " %d\n", j);
583			return -ENODEV;
584		}
585		if (pst[j].fid > MAX_FID) {
586			printk(KERN_ERR FW_BUG PFX "maxfid exceeded with pstate"
587			       " %d\n", j);
588			return -ENODEV;
589		}
590		if (j && (pst[j].fid < HI_FID_TABLE_BOTTOM)) {
591			/* Only first fid is allowed to be in "low" range */
592			printk(KERN_ERR FW_BUG PFX "two low fids - %d : "
593			       "0x%x\n", j, pst[j].fid);
594			return -EINVAL;
595		}
596		if (pst[j].fid < lastfid)
597			lastfid = pst[j].fid;
598	}
599	if (lastfid & 1) {
600		printk(KERN_ERR FW_BUG PFX "lastfid invalid\n");
601		return -EINVAL;
602	}
603	if (lastfid > LO_FID_TABLE_TOP)
604		printk(KERN_INFO FW_BUG PFX
605			"first fid not from lo freq table\n");
606
607	return 0;
608}
609
610static void invalidate_entry(struct cpufreq_frequency_table *powernow_table,
611		unsigned int entry)
612{
613	powernow_table[entry].frequency = CPUFREQ_ENTRY_INVALID;
614}
615
616static void print_basics(struct powernow_k8_data *data)
617{
618	int j;
619	for (j = 0; j < data->numps; j++) {
620		if (data->powernow_table[j].frequency !=
621				CPUFREQ_ENTRY_INVALID) {
622			if (cpu_family == CPU_HW_PSTATE) {
623				printk(KERN_INFO PFX
624					"   %d : pstate %d (%d MHz)\n", j,
625					data->powernow_table[j].index,
626					data->powernow_table[j].frequency/1000);
627			} else {
628				printk(KERN_INFO PFX
629					"   %d : fid 0x%x (%d MHz), vid 0x%x\n",
630					j,
631					data->powernow_table[j].index & 0xff,
632					data->powernow_table[j].frequency/1000,
633					data->powernow_table[j].index >> 8);
634			}
635		}
636	}
637	if (data->batps)
638		printk(KERN_INFO PFX "Only %d pstates on battery\n",
639				data->batps);
640}
641
642static u32 freq_from_fid_did(u32 fid, u32 did)
643{
644	u32 mhz = 0;
645
646	if (boot_cpu_data.x86 == 0x10)
647		mhz = (100 * (fid + 0x10)) >> did;
648	else if (boot_cpu_data.x86 == 0x11)
649		mhz = (100 * (fid + 8)) >> did;
650	else
651		BUG();
652
653	return mhz * 1000;
654}
655
656static int fill_powernow_table(struct powernow_k8_data *data,
657		struct pst_s *pst, u8 maxvid)
658{
659	struct cpufreq_frequency_table *powernow_table;
660	unsigned int j;
661
662	if (data->batps) {
663		/* use ACPI support to get full speed on mains power */
664		printk(KERN_WARNING PFX
665			"Only %d pstates usable (use ACPI driver for full "
666			"range\n", data->batps);
667		data->numps = data->batps;
668	}
669
670	for (j = 1; j < data->numps; j++) {
671		if (pst[j-1].fid >= pst[j].fid) {
672			printk(KERN_ERR PFX "PST out of sequence\n");
673			return -EINVAL;
674		}
675	}
676
677	if (data->numps < 2) {
678		printk(KERN_ERR PFX "no p states to transition\n");
679		return -ENODEV;
680	}
681
682	if (check_pst_table(data, pst, maxvid))
683		return -EINVAL;
684
685	powernow_table = kmalloc((sizeof(struct cpufreq_frequency_table)
686		* (data->numps + 1)), GFP_KERNEL);
687	if (!powernow_table) {
688		printk(KERN_ERR PFX "powernow_table memory alloc failure\n");
689		return -ENOMEM;
690	}
691
692	for (j = 0; j < data->numps; j++) {
693		int freq;
694		powernow_table[j].index = pst[j].fid; /* lower 8 bits */
695		powernow_table[j].index |= (pst[j].vid << 8); /* upper 8 bits */
696		freq = find_khz_freq_from_fid(pst[j].fid);
697		powernow_table[j].frequency = freq;
698	}
699	powernow_table[data->numps].frequency = CPUFREQ_TABLE_END;
700	powernow_table[data->numps].index = 0;
701
702	if (query_current_values_with_pending_wait(data)) {
703		kfree(powernow_table);
704		return -EIO;
705	}
706
707	dprintk("cfid 0x%x, cvid 0x%x\n", data->currfid, data->currvid);
708	data->powernow_table = powernow_table;
709	if (cpumask_first(cpu_core_mask(data->cpu)) == data->cpu)
710		print_basics(data);
711
712	for (j = 0; j < data->numps; j++)
713		if ((pst[j].fid == data->currfid) &&
714		    (pst[j].vid == data->currvid))
715			return 0;
716
717	dprintk("currfid/vid do not match PST, ignoring\n");
718	return 0;
719}
720
721/* Find and validate the PSB/PST table in BIOS. */
722static int find_psb_table(struct powernow_k8_data *data)
723{
724	struct psb_s *psb;
725	unsigned int i;
726	u32 mvs;
727	u8 maxvid;
728	u32 cpst = 0;
729	u32 thiscpuid;
730
731	for (i = 0xc0000; i < 0xffff0; i += 0x10) {
732		/* Scan BIOS looking for the signature. */
733		/* It can not be at ffff0 - it is too big. */
734
735		psb = phys_to_virt(i);
736		if (memcmp(psb, PSB_ID_STRING, PSB_ID_STRING_LEN) != 0)
737			continue;
738
739		dprintk("found PSB header at 0x%p\n", psb);
740
741		dprintk("table vers: 0x%x\n", psb->tableversion);
742		if (psb->tableversion != PSB_VERSION_1_4) {
743			printk(KERN_ERR FW_BUG PFX "PSB table is not v1.4\n");
744			return -ENODEV;
745		}
746
747		dprintk("flags: 0x%x\n", psb->flags1);
748		if (psb->flags1) {
749			printk(KERN_ERR FW_BUG PFX "unknown flags\n");
750			return -ENODEV;
751		}
752
753		data->vstable = psb->vstable;
754		dprintk("voltage stabilization time: %d(*20us)\n",
755				data->vstable);
756
757		dprintk("flags2: 0x%x\n", psb->flags2);
758		data->rvo = psb->flags2 & 3;
759		data->irt = ((psb->flags2) >> 2) & 3;
760		mvs = ((psb->flags2) >> 4) & 3;
761		data->vidmvs = 1 << mvs;
762		data->batps = ((psb->flags2) >> 6) & 3;
763
764		dprintk("ramp voltage offset: %d\n", data->rvo);
765		dprintk("isochronous relief time: %d\n", data->irt);
766		dprintk("maximum voltage step: %d - 0x%x\n", mvs, data->vidmvs);
767
768		dprintk("numpst: 0x%x\n", psb->num_tables);
769		cpst = psb->num_tables;
770		if ((psb->cpuid == 0x00000fc0) ||
771		    (psb->cpuid == 0x00000fe0)) {
772			thiscpuid = cpuid_eax(CPUID_PROCESSOR_SIGNATURE);
773			if ((thiscpuid == 0x00000fc0) ||
774			    (thiscpuid == 0x00000fe0))
775				cpst = 1;
776		}
777		if (cpst != 1) {
778			printk(KERN_ERR FW_BUG PFX "numpst must be 1\n");
779			return -ENODEV;
780		}
781
782		data->plllock = psb->plllocktime;
783		dprintk("plllocktime: 0x%x (units 1us)\n", psb->plllocktime);
784		dprintk("maxfid: 0x%x\n", psb->maxfid);
785		dprintk("maxvid: 0x%x\n", psb->maxvid);
786		maxvid = psb->maxvid;
787
788		data->numps = psb->numps;
789		dprintk("numpstates: 0x%x\n", data->numps);
790		return fill_powernow_table(data,
791				(struct pst_s *)(psb+1), maxvid);
792	}
793	/*
794	 * If you see this message, complain to BIOS manufacturer. If
795	 * he tells you "we do not support Linux" or some similar
796	 * nonsense, remember that Windows 2000 uses the same legacy
797	 * mechanism that the old Linux PSB driver uses. Tell them it
798	 * is broken with Windows 2000.
799	 *
800	 * The reference to the AMD documentation is chapter 9 in the
801	 * BIOS and Kernel Developer's Guide, which is available on
802	 * www.amd.com
803	 */
804	printk(KERN_ERR FW_BUG PFX "No PSB or ACPI _PSS objects\n");
805	printk(KERN_ERR PFX "Make sure that your BIOS is up to date"
806		" and Cool'N'Quiet support is enabled in BIOS setup\n");
807	return -ENODEV;
808}
809
810static void powernow_k8_acpi_pst_values(struct powernow_k8_data *data,
811		unsigned int index)
812{
813	u64 control;
814
815	if (!data->acpi_data.state_count || (cpu_family == CPU_HW_PSTATE))
816		return;
817
818	control = data->acpi_data.states[index].control;
819	data->irt = (control >> IRT_SHIFT) & IRT_MASK;
820	data->rvo = (control >> RVO_SHIFT) & RVO_MASK;
821	data->exttype = (control >> EXT_TYPE_SHIFT) & EXT_TYPE_MASK;
822	data->plllock = (control >> PLL_L_SHIFT) & PLL_L_MASK;
823	data->vidmvs = 1 << ((control >> MVS_SHIFT) & MVS_MASK);
824	data->vstable = (control >> VST_SHIFT) & VST_MASK;
825}
826
827static int powernow_k8_cpu_init_acpi(struct powernow_k8_data *data)
828{
829	struct cpufreq_frequency_table *powernow_table;
830	int ret_val = -ENODEV;
831	u64 control, status;
832
833	if (acpi_processor_register_performance(&data->acpi_data, data->cpu)) {
834		dprintk("register performance failed: bad ACPI data\n");
835		return -EIO;
836	}
837
838	/* verify the data contained in the ACPI structures */
839	if (data->acpi_data.state_count <= 1) {
840		dprintk("No ACPI P-States\n");
841		goto err_out;
842	}
843
844	control = data->acpi_data.control_register.space_id;
845	status = data->acpi_data.status_register.space_id;
846
847	if ((control != ACPI_ADR_SPACE_FIXED_HARDWARE) ||
848	    (status != ACPI_ADR_SPACE_FIXED_HARDWARE)) {
849		dprintk("Invalid control/status registers (%x - %x)\n",
850			control, status);
851		goto err_out;
852	}
853
854	/* fill in data->powernow_table */
855	powernow_table = kmalloc((sizeof(struct cpufreq_frequency_table)
856		* (data->acpi_data.state_count + 1)), GFP_KERNEL);
857	if (!powernow_table) {
858		dprintk("powernow_table memory alloc failure\n");
859		goto err_out;
860	}
861
862	/* fill in data */
863	data->numps = data->acpi_data.state_count;
864	powernow_k8_acpi_pst_values(data, 0);
865
866	if (cpu_family == CPU_HW_PSTATE)
867		ret_val = fill_powernow_table_pstate(data, powernow_table);
868	else
869		ret_val = fill_powernow_table_fidvid(data, powernow_table);
870	if (ret_val)
871		goto err_out_mem;
872
873	powernow_table[data->acpi_data.state_count].frequency =
874		CPUFREQ_TABLE_END;
875	powernow_table[data->acpi_data.state_count].index = 0;
876	data->powernow_table = powernow_table;
877
878	if (cpumask_first(cpu_core_mask(data->cpu)) == data->cpu)
879		print_basics(data);
880
881	/* notify BIOS that we exist */
882	acpi_processor_notify_smm(THIS_MODULE);
883
884	if (!zalloc_cpumask_var(&data->acpi_data.shared_cpu_map, GFP_KERNEL)) {
885		printk(KERN_ERR PFX
886				"unable to alloc powernow_k8_data cpumask\n");
887		ret_val = -ENOMEM;
888		goto err_out_mem;
889	}
890
891	return 0;
892
893err_out_mem:
894	kfree(powernow_table);
895
896err_out:
897	acpi_processor_unregister_performance(&data->acpi_data, data->cpu);
898
899	/* data->acpi_data.state_count informs us at ->exit()
900	 * whether ACPI was used */
901	data->acpi_data.state_count = 0;
902
903	return ret_val;
904}
905
906static int fill_powernow_table_pstate(struct powernow_k8_data *data,
907		struct cpufreq_frequency_table *powernow_table)
908{
909	int i;
910	u32 hi = 0, lo = 0;
911	rdmsr(MSR_PSTATE_CUR_LIMIT, lo, hi);
912	data->max_hw_pstate = (lo & HW_PSTATE_MAX_MASK) >> HW_PSTATE_MAX_SHIFT;
913
914	for (i = 0; i < data->acpi_data.state_count; i++) {
915		u32 index;
916
917		index = data->acpi_data.states[i].control & HW_PSTATE_MASK;
918		if (index > data->max_hw_pstate) {
919			printk(KERN_ERR PFX "invalid pstate %d - "
920					"bad value %d.\n", i, index);
921			printk(KERN_ERR PFX "Please report to BIOS "
922					"manufacturer\n");
923			invalidate_entry(powernow_table, i);
924			continue;
925		}
926		rdmsr(MSR_PSTATE_DEF_BASE + index, lo, hi);
927		if (!(hi & HW_PSTATE_VALID_MASK)) {
928			dprintk("invalid pstate %d, ignoring\n", index);
929			invalidate_entry(powernow_table, i);
930			continue;
931		}
932
933		powernow_table[i].index = index;
934
935		/* Frequency may be rounded for these */
936		if ((boot_cpu_data.x86 == 0x10 && boot_cpu_data.x86_model < 10)
937				 || boot_cpu_data.x86 == 0x11) {
938			powernow_table[i].frequency =
939				freq_from_fid_did(lo & 0x3f, (lo >> 6) & 7);
940		} else
941			powernow_table[i].frequency =
942				data->acpi_data.states[i].core_frequency * 1000;
943	}
944	return 0;
945}
946
947static int fill_powernow_table_fidvid(struct powernow_k8_data *data,
948		struct cpufreq_frequency_table *powernow_table)
949{
950	int i;
951
952	for (i = 0; i < data->acpi_data.state_count; i++) {
953		u32 fid;
954		u32 vid;
955		u32 freq, index;
956		u64 status, control;
957
958		if (data->exttype) {
959			status =  data->acpi_data.states[i].status;
960			fid = status & EXT_FID_MASK;
961			vid = (status >> VID_SHIFT) & EXT_VID_MASK;
962		} else {
963			control =  data->acpi_data.states[i].control;
964			fid = control & FID_MASK;
965			vid = (control >> VID_SHIFT) & VID_MASK;
966		}
967
968		dprintk("   %d : fid 0x%x, vid 0x%x\n", i, fid, vid);
969
970		index = fid | (vid<<8);
971		powernow_table[i].index = index;
972
973		freq = find_khz_freq_from_fid(fid);
974		powernow_table[i].frequency = freq;
975
976		/* verify frequency is OK */
977		if ((freq > (MAX_FREQ * 1000)) || (freq < (MIN_FREQ * 1000))) {
978			dprintk("invalid freq %u kHz, ignoring\n", freq);
979			invalidate_entry(powernow_table, i);
980			continue;
981		}
982
983		/* verify voltage is OK -
984		 * BIOSs are using "off" to indicate invalid */
985		if (vid == VID_OFF) {
986			dprintk("invalid vid %u, ignoring\n", vid);
987			invalidate_entry(powernow_table, i);
988			continue;
989		}
990
991		if (freq != (data->acpi_data.states[i].core_frequency * 1000)) {
992			printk(KERN_INFO PFX "invalid freq entries "
993				"%u kHz vs. %u kHz\n", freq,
994				(unsigned int)
995				(data->acpi_data.states[i].core_frequency
996				 * 1000));
997			invalidate_entry(powernow_table, i);
998			continue;
999		}
1000	}
1001	return 0;
1002}
1003
1004static void powernow_k8_cpu_exit_acpi(struct powernow_k8_data *data)
1005{
1006	if (data->acpi_data.state_count)
1007		acpi_processor_unregister_performance(&data->acpi_data,
1008				data->cpu);
1009	free_cpumask_var(data->acpi_data.shared_cpu_map);
1010}
1011
1012static int get_transition_latency(struct powernow_k8_data *data)
1013{
1014	int max_latency = 0;
1015	int i;
1016	for (i = 0; i < data->acpi_data.state_count; i++) {
1017		int cur_latency = data->acpi_data.states[i].transition_latency
1018			+ data->acpi_data.states[i].bus_master_latency;
1019		if (cur_latency > max_latency)
1020			max_latency = cur_latency;
1021	}
1022	if (max_latency == 0) {
1023		/*
1024		 * Fam 11h and later may return 0 as transition latency. This
1025		 * is intended and means "very fast". While cpufreq core and
1026		 * governors currently can handle that gracefully, better set it
1027		 * to 1 to avoid problems in the future.
1028		 */
1029		if (boot_cpu_data.x86 < 0x11)
1030			printk(KERN_ERR FW_WARN PFX "Invalid zero transition "
1031				"latency\n");
1032		max_latency = 1;
1033	}
1034	/* value in usecs, needs to be in nanoseconds */
1035	return 1000 * max_latency;
1036}
1037
1038/* Take a frequency, and issue the fid/vid transition command */
1039static int transition_frequency_fidvid(struct powernow_k8_data *data,
1040		unsigned int index)
1041{
1042	u32 fid = 0;
1043	u32 vid = 0;
1044	int res, i;
1045	struct cpufreq_freqs freqs;
1046
1047	dprintk("cpu %d transition to index %u\n", smp_processor_id(), index);
1048
1049	/* fid/vid correctness check for k8 */
1050	/* fid are the lower 8 bits of the index we stored into
1051	 * the cpufreq frequency table in find_psb_table, vid
1052	 * are the upper 8 bits.
1053	 */
1054	fid = data->powernow_table[index].index & 0xFF;
1055	vid = (data->powernow_table[index].index & 0xFF00) >> 8;
1056
1057	dprintk("table matched fid 0x%x, giving vid 0x%x\n", fid, vid);
1058
1059	if (query_current_values_with_pending_wait(data))
1060		return 1;
1061
1062	if ((data->currvid == vid) && (data->currfid == fid)) {
1063		dprintk("target matches current values (fid 0x%x, vid 0x%x)\n",
1064			fid, vid);
1065		return 0;
1066	}
1067
1068	dprintk("cpu %d, changing to fid 0x%x, vid 0x%x\n",
1069		smp_processor_id(), fid, vid);
1070	freqs.old = find_khz_freq_from_fid(data->currfid);
1071	freqs.new = find_khz_freq_from_fid(fid);
1072
1073	for_each_cpu(i, data->available_cores) {
1074		freqs.cpu = i;
1075		cpufreq_notify_transition(&freqs, CPUFREQ_PRECHANGE);
1076	}
1077
1078	res = transition_fid_vid(data, fid, vid);
1079	freqs.new = find_khz_freq_from_fid(data->currfid);
1080
1081	for_each_cpu(i, data->available_cores) {
1082		freqs.cpu = i;
1083		cpufreq_notify_transition(&freqs, CPUFREQ_POSTCHANGE);
1084	}
1085	return res;
1086}
1087
1088/* Take a frequency, and issue the hardware pstate transition command */
1089static int transition_frequency_pstate(struct powernow_k8_data *data,
1090		unsigned int index)
1091{
1092	u32 pstate = 0;
1093	int res, i;
1094	struct cpufreq_freqs freqs;
1095
1096	dprintk("cpu %d transition to index %u\n", smp_processor_id(), index);
1097
1098	/* get MSR index for hardware pstate transition */
1099	pstate = index & HW_PSTATE_MASK;
1100	if (pstate > data->max_hw_pstate)
1101		return 0;
1102	freqs.old = find_khz_freq_from_pstate(data->powernow_table,
1103			data->currpstate);
1104	freqs.new = find_khz_freq_from_pstate(data->powernow_table, pstate);
1105
1106	for_each_cpu(i, data->available_cores) {
1107		freqs.cpu = i;
1108		cpufreq_notify_transition(&freqs, CPUFREQ_PRECHANGE);
1109	}
1110
1111	res = transition_pstate(data, pstate);
1112	freqs.new = find_khz_freq_from_pstate(data->powernow_table, pstate);
1113
1114	for_each_cpu(i, data->available_cores) {
1115		freqs.cpu = i;
1116		cpufreq_notify_transition(&freqs, CPUFREQ_POSTCHANGE);
1117	}
1118	return res;
1119}
1120
1121/* Driver entry point to switch to the target frequency */
1122static int powernowk8_target(struct cpufreq_policy *pol,
1123		unsigned targfreq, unsigned relation)
1124{
1125	cpumask_var_t oldmask;
1126	struct powernow_k8_data *data = per_cpu(powernow_data, pol->cpu);
1127	u32 checkfid;
1128	u32 checkvid;
1129	unsigned int newstate;
1130	int ret = -EIO;
1131
1132	if (!data)
1133		return -EINVAL;
1134
1135	checkfid = data->currfid;
1136	checkvid = data->currvid;
1137
1138	/* only run on specific CPU from here on. */
1139	/* This is poor form: use a workqueue or smp_call_function_single */
1140	if (!alloc_cpumask_var(&oldmask, GFP_KERNEL))
1141		return -ENOMEM;
1142
1143	cpumask_copy(oldmask, tsk_cpus_allowed(current));
1144	set_cpus_allowed_ptr(current, cpumask_of(pol->cpu));
1145
1146	if (smp_processor_id() != pol->cpu) {
1147		printk(KERN_ERR PFX "limiting to cpu %u failed\n", pol->cpu);
1148		goto err_out;
1149	}
1150
1151	if (pending_bit_stuck()) {
1152		printk(KERN_ERR PFX "failing targ, change pending bit set\n");
1153		goto err_out;
1154	}
1155
1156	dprintk("targ: cpu %d, %d kHz, min %d, max %d, relation %d\n",
1157		pol->cpu, targfreq, pol->min, pol->max, relation);
1158
1159	if (query_current_values_with_pending_wait(data))
1160		goto err_out;
1161
1162	if (cpu_family != CPU_HW_PSTATE) {
1163		dprintk("targ: curr fid 0x%x, vid 0x%x\n",
1164		data->currfid, data->currvid);
1165
1166		if ((checkvid != data->currvid) ||
1167		    (checkfid != data->currfid)) {
1168			printk(KERN_INFO PFX
1169				"error - out of sync, fix 0x%x 0x%x, "
1170				"vid 0x%x 0x%x\n",
1171				checkfid, data->currfid,
1172				checkvid, data->currvid);
1173		}
1174	}
1175
1176	if (cpufreq_frequency_table_target(pol, data->powernow_table,
1177				targfreq, relation, &newstate))
1178		goto err_out;
1179
1180	mutex_lock(&fidvid_mutex);
1181
1182	powernow_k8_acpi_pst_values(data, newstate);
1183
1184	if (cpu_family == CPU_HW_PSTATE)
1185		ret = transition_frequency_pstate(data, newstate);
1186	else
1187		ret = transition_frequency_fidvid(data, newstate);
1188	if (ret) {
1189		printk(KERN_ERR PFX "transition frequency failed\n");
1190		ret = 1;
1191		mutex_unlock(&fidvid_mutex);
1192		goto err_out;
1193	}
1194	mutex_unlock(&fidvid_mutex);
1195
1196	if (cpu_family == CPU_HW_PSTATE)
1197		pol->cur = find_khz_freq_from_pstate(data->powernow_table,
1198				newstate);
1199	else
1200		pol->cur = find_khz_freq_from_fid(data->currfid);
1201	ret = 0;
1202
1203err_out:
1204	set_cpus_allowed_ptr(current, oldmask);
1205	free_cpumask_var(oldmask);
1206	return ret;
1207}
1208
1209/* Driver entry point to verify the policy and range of frequencies */
1210static int powernowk8_verify(struct cpufreq_policy *pol)
1211{
1212	struct powernow_k8_data *data = per_cpu(powernow_data, pol->cpu);
1213
1214	if (!data)
1215		return -EINVAL;
1216
1217	return cpufreq_frequency_table_verify(pol, data->powernow_table);
1218}
1219
1220struct init_on_cpu {
1221	struct powernow_k8_data *data;
1222	int rc;
1223};
1224
1225static void __cpuinit powernowk8_cpu_init_on_cpu(void *_init_on_cpu)
1226{
1227	struct init_on_cpu *init_on_cpu = _init_on_cpu;
1228
1229	if (pending_bit_stuck()) {
1230		printk(KERN_ERR PFX "failing init, change pending bit set\n");
1231		init_on_cpu->rc = -ENODEV;
1232		return;
1233	}
1234
1235	if (query_current_values_with_pending_wait(init_on_cpu->data)) {
1236		init_on_cpu->rc = -ENODEV;
1237		return;
1238	}
1239
1240	if (cpu_family == CPU_OPTERON)
1241		fidvid_msr_init();
1242
1243	init_on_cpu->rc = 0;
1244}
1245
1246/* per CPU init entry point to the driver */
1247static int __cpuinit powernowk8_cpu_init(struct cpufreq_policy *pol)
1248{
1249	static const char ACPI_PSS_BIOS_BUG_MSG[] =
1250		KERN_ERR FW_BUG PFX "No compatible ACPI _PSS objects found.\n"
1251		FW_BUG PFX "Try again with latest BIOS.\n";
1252	struct powernow_k8_data *data;
1253	struct init_on_cpu init_on_cpu;
1254	int rc;
1255	struct cpuinfo_x86 *c = &cpu_data(pol->cpu);
1256
1257	if (!cpu_online(pol->cpu))
1258		return -ENODEV;
1259
1260	smp_call_function_single(pol->cpu, check_supported_cpu, &rc, 1);
1261	if (rc)
1262		return -ENODEV;
1263
1264	data = kzalloc(sizeof(struct powernow_k8_data), GFP_KERNEL);
1265	if (!data) {
1266		printk(KERN_ERR PFX "unable to alloc powernow_k8_data");
1267		return -ENOMEM;
1268	}
1269
1270	data->cpu = pol->cpu;
1271	data->currpstate = HW_PSTATE_INVALID;
1272
1273	if (powernow_k8_cpu_init_acpi(data)) {
1274		/*
1275		 * Use the PSB BIOS structure. This is only availabe on
1276		 * an UP version, and is deprecated by AMD.
1277		 */
1278		if (num_online_cpus() != 1) {
1279			printk_once(ACPI_PSS_BIOS_BUG_MSG);
1280			goto err_out;
1281		}
1282		if (pol->cpu != 0) {
1283			printk(KERN_ERR FW_BUG PFX "No ACPI _PSS objects for "
1284			       "CPU other than CPU0. Complain to your BIOS "
1285			       "vendor.\n");
1286			goto err_out;
1287		}
1288		rc = find_psb_table(data);
1289		if (rc)
1290			goto err_out;
1291
1292		/* Take a crude guess here.
1293		 * That guess was in microseconds, so multiply with 1000 */
1294		pol->cpuinfo.transition_latency = (
1295			 ((data->rvo + 8) * data->vstable * VST_UNITS_20US) +
1296			 ((1 << data->irt) * 30)) * 1000;
1297	} else /* ACPI _PSS objects available */
1298		pol->cpuinfo.transition_latency = get_transition_latency(data);
1299
1300	/* only run on specific CPU from here on */
1301	init_on_cpu.data = data;
1302	smp_call_function_single(data->cpu, powernowk8_cpu_init_on_cpu,
1303				 &init_on_cpu, 1);
1304	rc = init_on_cpu.rc;
1305	if (rc != 0)
1306		goto err_out_exit_acpi;
1307
1308	if (cpu_family == CPU_HW_PSTATE)
1309		cpumask_copy(pol->cpus, cpumask_of(pol->cpu));
1310	else
1311		cpumask_copy(pol->cpus, cpu_core_mask(pol->cpu));
1312	data->available_cores = pol->cpus;
1313
1314	if (cpu_family == CPU_HW_PSTATE)
1315		pol->cur = find_khz_freq_from_pstate(data->powernow_table,
1316				data->currpstate);
1317	else
1318		pol->cur = find_khz_freq_from_fid(data->currfid);
1319	dprintk("policy current frequency %d kHz\n", pol->cur);
1320
1321	/* min/max the cpu is capable of */
1322	if (cpufreq_frequency_table_cpuinfo(pol, data->powernow_table)) {
1323		printk(KERN_ERR FW_BUG PFX "invalid powernow_table\n");
1324		powernow_k8_cpu_exit_acpi(data);
1325		kfree(data->powernow_table);
1326		kfree(data);
1327		return -EINVAL;
1328	}
1329
1330	/* Check for APERF/MPERF support in hardware */
1331	if (cpu_has(c, X86_FEATURE_APERFMPERF))
1332		cpufreq_amd64_driver.getavg = cpufreq_get_measured_perf;
1333
1334	cpufreq_frequency_table_get_attr(data->powernow_table, pol->cpu);
1335
1336	if (cpu_family == CPU_HW_PSTATE)
1337		dprintk("cpu_init done, current pstate 0x%x\n",
1338				data->currpstate);
1339	else
1340		dprintk("cpu_init done, current fid 0x%x, vid 0x%x\n",
1341			data->currfid, data->currvid);
1342
1343	per_cpu(powernow_data, pol->cpu) = data;
1344
1345	return 0;
1346
1347err_out_exit_acpi:
1348	powernow_k8_cpu_exit_acpi(data);
1349
1350err_out:
1351	kfree(data);
1352	return -ENODEV;
1353}
1354
1355static int __devexit powernowk8_cpu_exit(struct cpufreq_policy *pol)
1356{
1357	struct powernow_k8_data *data = per_cpu(powernow_data, pol->cpu);
1358
1359	if (!data)
1360		return -EINVAL;
1361
1362	powernow_k8_cpu_exit_acpi(data);
1363
1364	cpufreq_frequency_table_put_attr(pol->cpu);
1365
1366	kfree(data->powernow_table);
1367	kfree(data);
1368	per_cpu(powernow_data, pol->cpu) = NULL;
1369
1370	return 0;
1371}
1372
1373static void query_values_on_cpu(void *_err)
1374{
1375	int *err = _err;
1376	struct powernow_k8_data *data = __get_cpu_var(powernow_data);
1377
1378	*err = query_current_values_with_pending_wait(data);
1379}
1380
1381static unsigned int powernowk8_get(unsigned int cpu)
1382{
1383	struct powernow_k8_data *data = per_cpu(powernow_data, cpu);
1384	unsigned int khz = 0;
1385	int err;
1386
1387	if (!data)
1388		return 0;
1389
1390	smp_call_function_single(cpu, query_values_on_cpu, &err, true);
1391	if (err)
1392		goto out;
1393
1394	if (cpu_family == CPU_HW_PSTATE)
1395		khz = find_khz_freq_from_pstate(data->powernow_table,
1396						data->currpstate);
1397	else
1398		khz = find_khz_freq_from_fid(data->currfid);
1399
1400
1401out:
1402	return khz;
1403}
1404
1405static void _cpb_toggle_msrs(bool t)
1406{
1407	int cpu;
1408
1409	get_online_cpus();
1410
1411	rdmsr_on_cpus(cpu_online_mask, MSR_K7_HWCR, msrs);
1412
1413	for_each_cpu(cpu, cpu_online_mask) {
1414		struct msr *reg = per_cpu_ptr(msrs, cpu);
1415		if (t)
1416			reg->l &= ~BIT(25);
1417		else
1418			reg->l |= BIT(25);
1419	}
1420	wrmsr_on_cpus(cpu_online_mask, MSR_K7_HWCR, msrs);
1421
1422	put_online_cpus();
1423}
1424
1425/*
1426 * Switch on/off core performance boosting.
1427 *
1428 * 0=disable
1429 * 1=enable.
1430 */
1431static void cpb_toggle(bool t)
1432{
1433	if (!cpb_capable)
1434		return;
1435
1436	if (t && !cpb_enabled) {
1437		cpb_enabled = true;
1438		_cpb_toggle_msrs(t);
1439		printk(KERN_INFO PFX "Core Boosting enabled.\n");
1440	} else if (!t && cpb_enabled) {
1441		cpb_enabled = false;
1442		_cpb_toggle_msrs(t);
1443		printk(KERN_INFO PFX "Core Boosting disabled.\n");
1444	}
1445}
1446
1447static ssize_t store_cpb(struct cpufreq_policy *policy, const char *buf,
1448				 size_t count)
1449{
1450	int ret = -EINVAL;
1451	unsigned long val = 0;
1452
1453	ret = strict_strtoul(buf, 10, &val);
1454	if (!ret && (val == 0 || val == 1) && cpb_capable)
1455		cpb_toggle(val);
1456	else
1457		return -EINVAL;
1458
1459	return count;
1460}
1461
1462static ssize_t show_cpb(struct cpufreq_policy *policy, char *buf)
1463{
1464	return sprintf(buf, "%u\n", cpb_enabled);
1465}
1466
1467#define define_one_rw(_name) \
1468static struct freq_attr _name = \
1469__ATTR(_name, 0644, show_##_name, store_##_name)
1470
1471define_one_rw(cpb);
1472
1473static struct freq_attr *powernow_k8_attr[] = {
1474	&cpufreq_freq_attr_scaling_available_freqs,
1475	&cpb,
1476	NULL,
1477};
1478
1479static struct cpufreq_driver cpufreq_amd64_driver = {
1480	.verify		= powernowk8_verify,
1481	.target		= powernowk8_target,
1482	.bios_limit	= acpi_processor_get_bios_limit,
1483	.init		= powernowk8_cpu_init,
1484	.exit		= __devexit_p(powernowk8_cpu_exit),
1485	.get		= powernowk8_get,
1486	.name		= "powernow-k8",
1487	.owner		= THIS_MODULE,
1488	.attr		= powernow_k8_attr,
1489};
1490
1491/*
1492 * Clear the boost-disable flag on the CPU_DOWN path so that this cpu
1493 * cannot block the remaining ones from boosting. On the CPU_UP path we
1494 * simply keep the boost-disable flag in sync with the current global
1495 * state.
1496 */
1497static int cpb_notify(struct notifier_block *nb, unsigned long action,
1498		      void *hcpu)
1499{
1500	unsigned cpu = (long)hcpu;
1501	u32 lo, hi;
1502
1503	switch (action) {
1504	case CPU_UP_PREPARE:
1505	case CPU_UP_PREPARE_FROZEN:
1506
1507		if (!cpb_enabled) {
1508			rdmsr_on_cpu(cpu, MSR_K7_HWCR, &lo, &hi);
1509			lo |= BIT(25);
1510			wrmsr_on_cpu(cpu, MSR_K7_HWCR, lo, hi);
1511		}
1512		break;
1513
1514	case CPU_DOWN_PREPARE:
1515	case CPU_DOWN_PREPARE_FROZEN:
1516		rdmsr_on_cpu(cpu, MSR_K7_HWCR, &lo, &hi);
1517		lo &= ~BIT(25);
1518		wrmsr_on_cpu(cpu, MSR_K7_HWCR, lo, hi);
1519		break;
1520
1521	default:
1522		break;
1523	}
1524
1525	return NOTIFY_OK;
1526}
1527
1528static struct notifier_block cpb_nb = {
1529	.notifier_call		= cpb_notify,
1530};
1531
1532/* driver entry point for init */
1533static int __cpuinit powernowk8_init(void)
1534{
1535	unsigned int i, supported_cpus = 0, cpu;
1536
1537	for_each_online_cpu(i) {
1538		int rc;
1539		smp_call_function_single(i, check_supported_cpu, &rc, 1);
1540		if (rc == 0)
1541			supported_cpus++;
1542	}
1543
1544	if (supported_cpus != num_online_cpus())
1545		return -ENODEV;
1546
1547	printk(KERN_INFO PFX "Found %d %s (%d cpu cores) (" VERSION ")\n",
1548		num_online_nodes(), boot_cpu_data.x86_model_id, supported_cpus);
1549
1550	if (boot_cpu_has(X86_FEATURE_CPB)) {
1551
1552		cpb_capable = true;
1553
1554		register_cpu_notifier(&cpb_nb);
1555
1556		msrs = msrs_alloc();
1557		if (!msrs) {
1558			printk(KERN_ERR "%s: Error allocating msrs!\n", __func__);
1559			return -ENOMEM;
1560		}
1561
1562		rdmsr_on_cpus(cpu_online_mask, MSR_K7_HWCR, msrs);
1563
1564		for_each_cpu(cpu, cpu_online_mask) {
1565			struct msr *reg = per_cpu_ptr(msrs, cpu);
1566			cpb_enabled |= !(!!(reg->l & BIT(25)));
1567		}
1568
1569		printk(KERN_INFO PFX "Core Performance Boosting: %s.\n",
1570			(cpb_enabled ? "on" : "off"));
1571	}
1572
1573	return cpufreq_register_driver(&cpufreq_amd64_driver);
1574}
1575
1576/* driver entry point for term */
1577static void __exit powernowk8_exit(void)
1578{
1579	dprintk("exit\n");
1580
1581	if (boot_cpu_has(X86_FEATURE_CPB)) {
1582		msrs_free(msrs);
1583		msrs = NULL;
1584
1585		unregister_cpu_notifier(&cpb_nb);
1586	}
1587
1588	cpufreq_unregister_driver(&cpufreq_amd64_driver);
1589}
1590
1591MODULE_AUTHOR("Paul Devriendt <paul.devriendt@amd.com> and "
1592		"Mark Langsdorf <mark.langsdorf@amd.com>");
1593MODULE_DESCRIPTION("AMD Athlon 64 and Opteron processor frequency driver.");
1594MODULE_LICENSE("GPL");
1595
1596late_initcall(powernowk8_init);
1597module_exit(powernowk8_exit);
1598