• Home
  • History
  • Annotate
  • Line#
  • Navigate
  • Raw
  • Download
  • only in /asuswrt-rt-n18u-9.0.0.4.380.2695/release/src-rt-6.x.4708/linux/linux-2.6/arch/x86/kernel/cpu/
1/*
2 *	Routines to indentify caches on Intel CPU.
3 *
4 *	Changes:
5 *	Venkatesh Pallipadi	: Adding cache identification through cpuid(4)
6 *	Ashok Raj <ashok.raj@intel.com>: Work with CPU hotplug infrastructure.
7 *	Andi Kleen / Andreas Herrmann	: CPUID4 emulation on AMD.
8 */
9
10#include <linux/init.h>
11#include <linux/slab.h>
12#include <linux/device.h>
13#include <linux/compiler.h>
14#include <linux/cpu.h>
15#include <linux/sched.h>
16#include <linux/pci.h>
17
18#include <asm/processor.h>
19#include <linux/smp.h>
20#include <asm/k8.h>
21#include <asm/smp.h>
22
23#define LVL_1_INST	1
24#define LVL_1_DATA	2
25#define LVL_2		3
26#define LVL_3		4
27#define LVL_TRACE	5
28
29struct _cache_table {
30	unsigned char descriptor;
31	char cache_type;
32	short size;
33};
34
35#define MB(x)	((x) * 1024)
36
37/* All the cache descriptor types we care about (no TLB or
38   trace cache entries) */
39
40static const struct _cache_table __cpuinitconst cache_table[] =
41{
42	{ 0x06, LVL_1_INST, 8 },	/* 4-way set assoc, 32 byte line size */
43	{ 0x08, LVL_1_INST, 16 },	/* 4-way set assoc, 32 byte line size */
44	{ 0x09, LVL_1_INST, 32 },	/* 4-way set assoc, 64 byte line size */
45	{ 0x0a, LVL_1_DATA, 8 },	/* 2 way set assoc, 32 byte line size */
46	{ 0x0c, LVL_1_DATA, 16 },	/* 4-way set assoc, 32 byte line size */
47	{ 0x0d, LVL_1_DATA, 16 },	/* 4-way set assoc, 64 byte line size */
48	{ 0x21, LVL_2,      256 },	/* 8-way set assoc, 64 byte line size */
49	{ 0x22, LVL_3,      512 },	/* 4-way set assoc, sectored cache, 64 byte line size */
50	{ 0x23, LVL_3,      MB(1) },	/* 8-way set assoc, sectored cache, 64 byte line size */
51	{ 0x25, LVL_3,      MB(2) },	/* 8-way set assoc, sectored cache, 64 byte line size */
52	{ 0x29, LVL_3,      MB(4) },	/* 8-way set assoc, sectored cache, 64 byte line size */
53	{ 0x2c, LVL_1_DATA, 32 },	/* 8-way set assoc, 64 byte line size */
54	{ 0x30, LVL_1_INST, 32 },	/* 8-way set assoc, 64 byte line size */
55	{ 0x39, LVL_2,      128 },	/* 4-way set assoc, sectored cache, 64 byte line size */
56	{ 0x3a, LVL_2,      192 },	/* 6-way set assoc, sectored cache, 64 byte line size */
57	{ 0x3b, LVL_2,      128 },	/* 2-way set assoc, sectored cache, 64 byte line size */
58	{ 0x3c, LVL_2,      256 },	/* 4-way set assoc, sectored cache, 64 byte line size */
59	{ 0x3d, LVL_2,      384 },	/* 6-way set assoc, sectored cache, 64 byte line size */
60	{ 0x3e, LVL_2,      512 },	/* 4-way set assoc, sectored cache, 64 byte line size */
61	{ 0x3f, LVL_2,      256 },	/* 2-way set assoc, 64 byte line size */
62	{ 0x41, LVL_2,      128 },	/* 4-way set assoc, 32 byte line size */
63	{ 0x42, LVL_2,      256 },	/* 4-way set assoc, 32 byte line size */
64	{ 0x43, LVL_2,      512 },	/* 4-way set assoc, 32 byte line size */
65	{ 0x44, LVL_2,      MB(1) },	/* 4-way set assoc, 32 byte line size */
66	{ 0x45, LVL_2,      MB(2) },	/* 4-way set assoc, 32 byte line size */
67	{ 0x46, LVL_3,      MB(4) },	/* 4-way set assoc, 64 byte line size */
68	{ 0x47, LVL_3,      MB(8) },	/* 8-way set assoc, 64 byte line size */
69	{ 0x49, LVL_3,      MB(4) },	/* 16-way set assoc, 64 byte line size */
70	{ 0x4a, LVL_3,      MB(6) },	/* 12-way set assoc, 64 byte line size */
71	{ 0x4b, LVL_3,      MB(8) },	/* 16-way set assoc, 64 byte line size */
72	{ 0x4c, LVL_3,      MB(12) },	/* 12-way set assoc, 64 byte line size */
73	{ 0x4d, LVL_3,      MB(16) },	/* 16-way set assoc, 64 byte line size */
74	{ 0x4e, LVL_2,      MB(6) },	/* 24-way set assoc, 64 byte line size */
75	{ 0x60, LVL_1_DATA, 16 },	/* 8-way set assoc, sectored cache, 64 byte line size */
76	{ 0x66, LVL_1_DATA, 8 },	/* 4-way set assoc, sectored cache, 64 byte line size */
77	{ 0x67, LVL_1_DATA, 16 },	/* 4-way set assoc, sectored cache, 64 byte line size */
78	{ 0x68, LVL_1_DATA, 32 },	/* 4-way set assoc, sectored cache, 64 byte line size */
79	{ 0x70, LVL_TRACE,  12 },	/* 8-way set assoc */
80	{ 0x71, LVL_TRACE,  16 },	/* 8-way set assoc */
81	{ 0x72, LVL_TRACE,  32 },	/* 8-way set assoc */
82	{ 0x73, LVL_TRACE,  64 },	/* 8-way set assoc */
83	{ 0x78, LVL_2,      MB(1) },	/* 4-way set assoc, 64 byte line size */
84	{ 0x79, LVL_2,      128 },	/* 8-way set assoc, sectored cache, 64 byte line size */
85	{ 0x7a, LVL_2,      256 },	/* 8-way set assoc, sectored cache, 64 byte line size */
86	{ 0x7b, LVL_2,      512 },	/* 8-way set assoc, sectored cache, 64 byte line size */
87	{ 0x7c, LVL_2,      MB(1) },	/* 8-way set assoc, sectored cache, 64 byte line size */
88	{ 0x7d, LVL_2,      MB(2) },	/* 8-way set assoc, 64 byte line size */
89	{ 0x7f, LVL_2,      512 },	/* 2-way set assoc, 64 byte line size */
90	{ 0x82, LVL_2,      256 },	/* 8-way set assoc, 32 byte line size */
91	{ 0x83, LVL_2,      512 },	/* 8-way set assoc, 32 byte line size */
92	{ 0x84, LVL_2,      MB(1) },	/* 8-way set assoc, 32 byte line size */
93	{ 0x85, LVL_2,      MB(2) },	/* 8-way set assoc, 32 byte line size */
94	{ 0x86, LVL_2,      512 },	/* 4-way set assoc, 64 byte line size */
95	{ 0x87, LVL_2,      MB(1) },	/* 8-way set assoc, 64 byte line size */
96	{ 0xd0, LVL_3,      512 },	/* 4-way set assoc, 64 byte line size */
97	{ 0xd1, LVL_3,      MB(1) },	/* 4-way set assoc, 64 byte line size */
98	{ 0xd2, LVL_3,      MB(2) },	/* 4-way set assoc, 64 byte line size */
99	{ 0xd6, LVL_3,      MB(1) },	/* 8-way set assoc, 64 byte line size */
100	{ 0xd7, LVL_3,      MB(2) },	/* 8-way set assoc, 64 byte line size */
101	{ 0xd8, LVL_3,      MB(4) },	/* 12-way set assoc, 64 byte line size */
102	{ 0xdc, LVL_3,      MB(2) },	/* 12-way set assoc, 64 byte line size */
103	{ 0xdd, LVL_3,      MB(4) },	/* 12-way set assoc, 64 byte line size */
104	{ 0xde, LVL_3,      MB(8) },	/* 12-way set assoc, 64 byte line size */
105	{ 0xe2, LVL_3,      MB(2) },	/* 16-way set assoc, 64 byte line size */
106	{ 0xe3, LVL_3,      MB(4) },	/* 16-way set assoc, 64 byte line size */
107	{ 0xe4, LVL_3,      MB(8) },	/* 16-way set assoc, 64 byte line size */
108	{ 0xea, LVL_3,      MB(12) },	/* 24-way set assoc, 64 byte line size */
109	{ 0xeb, LVL_3,      MB(18) },	/* 24-way set assoc, 64 byte line size */
110	{ 0xec, LVL_3,      MB(24) },	/* 24-way set assoc, 64 byte line size */
111	{ 0x00, 0, 0}
112};
113
114
115enum _cache_type {
116	CACHE_TYPE_NULL	= 0,
117	CACHE_TYPE_DATA = 1,
118	CACHE_TYPE_INST = 2,
119	CACHE_TYPE_UNIFIED = 3
120};
121
122union _cpuid4_leaf_eax {
123	struct {
124		enum _cache_type	type:5;
125		unsigned int		level:3;
126		unsigned int		is_self_initializing:1;
127		unsigned int		is_fully_associative:1;
128		unsigned int		reserved:4;
129		unsigned int		num_threads_sharing:12;
130		unsigned int		num_cores_on_die:6;
131	} split;
132	u32 full;
133};
134
135union _cpuid4_leaf_ebx {
136	struct {
137		unsigned int		coherency_line_size:12;
138		unsigned int		physical_line_partition:10;
139		unsigned int		ways_of_associativity:10;
140	} split;
141	u32 full;
142};
143
144union _cpuid4_leaf_ecx {
145	struct {
146		unsigned int		number_of_sets:32;
147	} split;
148	u32 full;
149};
150
151struct amd_l3_cache {
152	struct	 pci_dev *dev;
153	bool	 can_disable;
154	unsigned indices;
155	u8	 subcaches[4];
156};
157
158struct _cpuid4_info {
159	union _cpuid4_leaf_eax eax;
160	union _cpuid4_leaf_ebx ebx;
161	union _cpuid4_leaf_ecx ecx;
162	unsigned long size;
163	struct amd_l3_cache *l3;
164	DECLARE_BITMAP(shared_cpu_map, NR_CPUS);
165};
166
167/* subset of above _cpuid4_info w/o shared_cpu_map */
168struct _cpuid4_info_regs {
169	union _cpuid4_leaf_eax eax;
170	union _cpuid4_leaf_ebx ebx;
171	union _cpuid4_leaf_ecx ecx;
172	unsigned long size;
173	struct amd_l3_cache *l3;
174};
175
176unsigned short			num_cache_leaves;
177
178/* AMD doesn't have CPUID4. Emulate it here to report the same
179   information to the user.  This makes some assumptions about the machine:
180   L2 not shared, no SMT etc. that is currently true on AMD CPUs.
181
182   In theory the TLBs could be reported as fake type (they are in "dummy").
183   Maybe later */
184union l1_cache {
185	struct {
186		unsigned line_size:8;
187		unsigned lines_per_tag:8;
188		unsigned assoc:8;
189		unsigned size_in_kb:8;
190	};
191	unsigned val;
192};
193
194union l2_cache {
195	struct {
196		unsigned line_size:8;
197		unsigned lines_per_tag:4;
198		unsigned assoc:4;
199		unsigned size_in_kb:16;
200	};
201	unsigned val;
202};
203
204union l3_cache {
205	struct {
206		unsigned line_size:8;
207		unsigned lines_per_tag:4;
208		unsigned assoc:4;
209		unsigned res:2;
210		unsigned size_encoded:14;
211	};
212	unsigned val;
213};
214
215static const unsigned short __cpuinitconst assocs[] = {
216	[1] = 1,
217	[2] = 2,
218	[4] = 4,
219	[6] = 8,
220	[8] = 16,
221	[0xa] = 32,
222	[0xb] = 48,
223	[0xc] = 64,
224	[0xd] = 96,
225	[0xe] = 128,
226	[0xf] = 0xffff /* fully associative - no way to show this currently */
227};
228
229static const unsigned char __cpuinitconst levels[] = { 1, 1, 2, 3 };
230static const unsigned char __cpuinitconst types[] = { 1, 2, 3, 3 };
231
232static void __cpuinit
233amd_cpuid4(int leaf, union _cpuid4_leaf_eax *eax,
234		     union _cpuid4_leaf_ebx *ebx,
235		     union _cpuid4_leaf_ecx *ecx)
236{
237	unsigned dummy;
238	unsigned line_size, lines_per_tag, assoc, size_in_kb;
239	union l1_cache l1i, l1d;
240	union l2_cache l2;
241	union l3_cache l3;
242	union l1_cache *l1 = &l1d;
243
244	eax->full = 0;
245	ebx->full = 0;
246	ecx->full = 0;
247
248	cpuid(0x80000005, &dummy, &dummy, &l1d.val, &l1i.val);
249	cpuid(0x80000006, &dummy, &dummy, &l2.val, &l3.val);
250
251	switch (leaf) {
252	case 1:
253		l1 = &l1i;
254	case 0:
255		if (!l1->val)
256			return;
257		assoc = assocs[l1->assoc];
258		line_size = l1->line_size;
259		lines_per_tag = l1->lines_per_tag;
260		size_in_kb = l1->size_in_kb;
261		break;
262	case 2:
263		if (!l2.val)
264			return;
265		assoc = assocs[l2.assoc];
266		line_size = l2.line_size;
267		lines_per_tag = l2.lines_per_tag;
268		/* cpu_data has errata corrections for K7 applied */
269		size_in_kb = current_cpu_data.x86_cache_size;
270		break;
271	case 3:
272		if (!l3.val)
273			return;
274		assoc = assocs[l3.assoc];
275		line_size = l3.line_size;
276		lines_per_tag = l3.lines_per_tag;
277		size_in_kb = l3.size_encoded * 512;
278		if (boot_cpu_has(X86_FEATURE_AMD_DCM)) {
279			size_in_kb = size_in_kb >> 1;
280			assoc = assoc >> 1;
281		}
282		break;
283	default:
284		return;
285	}
286
287	eax->split.is_self_initializing = 1;
288	eax->split.type = types[leaf];
289	eax->split.level = levels[leaf];
290	eax->split.num_threads_sharing = 0;
291	eax->split.num_cores_on_die = current_cpu_data.x86_max_cores - 1;
292
293
294	if (assoc == 0xffff)
295		eax->split.is_fully_associative = 1;
296	ebx->split.coherency_line_size = line_size - 1;
297	ebx->split.ways_of_associativity = assoc - 1;
298	ebx->split.physical_line_partition = lines_per_tag - 1;
299	ecx->split.number_of_sets = (size_in_kb * 1024) / line_size /
300		(ebx->split.ways_of_associativity + 1) - 1;
301}
302
303struct _cache_attr {
304	struct attribute attr;
305	ssize_t (*show)(struct _cpuid4_info *, char *);
306	ssize_t (*store)(struct _cpuid4_info *, const char *, size_t count);
307};
308
309#ifdef CONFIG_CPU_SUP_AMD
310
311/*
312 * L3 cache descriptors
313 */
314static struct amd_l3_cache **__cpuinitdata l3_caches;
315
316static void __cpuinit amd_calc_l3_indices(struct amd_l3_cache *l3)
317{
318	unsigned int sc0, sc1, sc2, sc3;
319	u32 val = 0;
320
321	pci_read_config_dword(l3->dev, 0x1C4, &val);
322
323	/* calculate subcache sizes */
324	l3->subcaches[0] = sc0 = !(val & BIT(0));
325	l3->subcaches[1] = sc1 = !(val & BIT(4));
326	l3->subcaches[2] = sc2 = !(val & BIT(8))  + !(val & BIT(9));
327	l3->subcaches[3] = sc3 = !(val & BIT(12)) + !(val & BIT(13));
328
329	l3->indices = (max(max(max(sc0, sc1), sc2), sc3) << 10) - 1;
330}
331
332static struct amd_l3_cache * __cpuinit amd_init_l3_cache(int node)
333{
334	struct amd_l3_cache *l3;
335	struct pci_dev *dev = node_to_k8_nb_misc(node);
336
337	l3 = kzalloc(sizeof(struct amd_l3_cache), GFP_ATOMIC);
338	if (!l3) {
339		printk(KERN_WARNING "Error allocating L3 struct\n");
340		return NULL;
341	}
342
343	l3->dev = dev;
344
345	amd_calc_l3_indices(l3);
346
347	return l3;
348}
349
350static void __cpuinit amd_check_l3_disable(struct _cpuid4_info_regs *this_leaf,
351					   int index)
352{
353	int node;
354
355	if (boot_cpu_data.x86 != 0x10)
356		return;
357
358	if (index < 3)
359		return;
360
361	/* see errata #382 and #388 */
362	if (boot_cpu_data.x86_model < 0x8)
363		return;
364
365	if ((boot_cpu_data.x86_model == 0x8 ||
366	     boot_cpu_data.x86_model == 0x9)
367		&&
368	     boot_cpu_data.x86_mask < 0x1)
369			return;
370
371	/* not in virtualized environments */
372	if (num_k8_northbridges == 0)
373		return;
374
375	/*
376	 * Strictly speaking, the amount in @size below is leaked since it is
377	 * never freed but this is done only on shutdown so it doesn't matter.
378	 */
379	if (!l3_caches) {
380		int size = num_k8_northbridges * sizeof(struct amd_l3_cache *);
381
382		l3_caches = kzalloc(size, GFP_ATOMIC);
383		if (!l3_caches)
384			return;
385	}
386
387	node = amd_get_nb_id(smp_processor_id());
388
389	if (!l3_caches[node]) {
390		l3_caches[node] = amd_init_l3_cache(node);
391		l3_caches[node]->can_disable = true;
392	}
393
394	WARN_ON(!l3_caches[node]);
395
396	this_leaf->l3 = l3_caches[node];
397}
398
399/*
400 * check whether a slot used for disabling an L3 index is occupied.
401 * @l3: L3 cache descriptor
402 * @slot: slot number (0..1)
403 *
404 * @returns: the disabled index if used or negative value if slot free.
405 */
406int amd_get_l3_disable_slot(struct amd_l3_cache *l3, unsigned slot)
407{
408	unsigned int reg = 0;
409
410	pci_read_config_dword(l3->dev, 0x1BC + slot * 4, &reg);
411
412	/* check whether this slot is activated already */
413	if (reg & (3UL << 30))
414		return reg & 0xfff;
415
416	return -1;
417}
418
419static ssize_t show_cache_disable(struct _cpuid4_info *this_leaf, char *buf,
420				  unsigned int slot)
421{
422	int index;
423
424	if (!this_leaf->l3 || !this_leaf->l3->can_disable)
425		return -EINVAL;
426
427	index = amd_get_l3_disable_slot(this_leaf->l3, slot);
428	if (index >= 0)
429		return sprintf(buf, "%d\n", index);
430
431	return sprintf(buf, "FREE\n");
432}
433
434#define SHOW_CACHE_DISABLE(slot)					\
435static ssize_t								\
436show_cache_disable_##slot(struct _cpuid4_info *this_leaf, char *buf)	\
437{									\
438	return show_cache_disable(this_leaf, buf, slot);		\
439}
440SHOW_CACHE_DISABLE(0)
441SHOW_CACHE_DISABLE(1)
442
443static void amd_l3_disable_index(struct amd_l3_cache *l3, int cpu,
444				 unsigned slot, unsigned long idx)
445{
446	int i;
447
448	idx |= BIT(30);
449
450	/*
451	 *  disable index in all 4 subcaches
452	 */
453	for (i = 0; i < 4; i++) {
454		u32 reg = idx | (i << 20);
455
456		if (!l3->subcaches[i])
457			continue;
458
459		pci_write_config_dword(l3->dev, 0x1BC + slot * 4, reg);
460
461		/*
462		 * We need to WBINVD on a core on the node containing the L3
463		 * cache which indices we disable therefore a simple wbinvd()
464		 * is not sufficient.
465		 */
466		wbinvd_on_cpu(cpu);
467
468		reg |= BIT(31);
469		pci_write_config_dword(l3->dev, 0x1BC + slot * 4, reg);
470	}
471}
472
473/*
474 * disable a L3 cache index by using a disable-slot
475 *
476 * @l3:    L3 cache descriptor
477 * @cpu:   A CPU on the node containing the L3 cache
478 * @slot:  slot number (0..1)
479 * @index: index to disable
480 *
481 * @return: 0 on success, error status on failure
482 */
483int amd_set_l3_disable_slot(struct amd_l3_cache *l3, int cpu, unsigned slot,
484			    unsigned long index)
485{
486	int ret = 0;
487
488#define SUBCACHE_MASK	(3UL << 20)
489#define SUBCACHE_INDEX	0xfff
490
491	/*
492	 * check whether this slot is already used or
493	 * the index is already disabled
494	 */
495	ret = amd_get_l3_disable_slot(l3, slot);
496	if (ret >= 0)
497		return -EINVAL;
498
499	/*
500	 * check whether the other slot has disabled the
501	 * same index already
502	 */
503	if (index == amd_get_l3_disable_slot(l3, !slot))
504		return -EINVAL;
505
506	/* do not allow writes outside of allowed bits */
507	if ((index & ~(SUBCACHE_MASK | SUBCACHE_INDEX)) ||
508	    ((index & SUBCACHE_INDEX) > l3->indices))
509		return -EINVAL;
510
511	amd_l3_disable_index(l3, cpu, slot, index);
512
513	return 0;
514}
515
516static ssize_t store_cache_disable(struct _cpuid4_info *this_leaf,
517				  const char *buf, size_t count,
518				  unsigned int slot)
519{
520	unsigned long val = 0;
521	int cpu, err = 0;
522
523	if (!capable(CAP_SYS_ADMIN))
524		return -EPERM;
525
526	if (!this_leaf->l3 || !this_leaf->l3->can_disable)
527		return -EINVAL;
528
529	cpu = cpumask_first(to_cpumask(this_leaf->shared_cpu_map));
530
531	if (strict_strtoul(buf, 10, &val) < 0)
532		return -EINVAL;
533
534	err = amd_set_l3_disable_slot(this_leaf->l3, cpu, slot, val);
535	if (err) {
536		if (err == -EEXIST)
537			printk(KERN_WARNING "L3 disable slot %d in use!\n",
538					    slot);
539		return err;
540	}
541	return count;
542}
543
544#define STORE_CACHE_DISABLE(slot)					\
545static ssize_t								\
546store_cache_disable_##slot(struct _cpuid4_info *this_leaf,		\
547			    const char *buf, size_t count)		\
548{									\
549	return store_cache_disable(this_leaf, buf, count, slot);	\
550}
551STORE_CACHE_DISABLE(0)
552STORE_CACHE_DISABLE(1)
553
554static struct _cache_attr cache_disable_0 = __ATTR(cache_disable_0, 0644,
555		show_cache_disable_0, store_cache_disable_0);
556static struct _cache_attr cache_disable_1 = __ATTR(cache_disable_1, 0644,
557		show_cache_disable_1, store_cache_disable_1);
558
559#else	/* CONFIG_CPU_SUP_AMD */
560static void __cpuinit
561amd_check_l3_disable(struct _cpuid4_info_regs *this_leaf, int index)
562{
563};
564#endif /* CONFIG_CPU_SUP_AMD */
565
566static int
567__cpuinit cpuid4_cache_lookup_regs(int index,
568				   struct _cpuid4_info_regs *this_leaf)
569{
570	union _cpuid4_leaf_eax 	eax;
571	union _cpuid4_leaf_ebx 	ebx;
572	union _cpuid4_leaf_ecx 	ecx;
573	unsigned		edx;
574
575	if (boot_cpu_data.x86_vendor == X86_VENDOR_AMD) {
576		amd_cpuid4(index, &eax, &ebx, &ecx);
577		amd_check_l3_disable(this_leaf, index);
578	} else {
579		cpuid_count(4, index, &eax.full, &ebx.full, &ecx.full, &edx);
580	}
581
582	if (eax.split.type == CACHE_TYPE_NULL)
583		return -EIO; /* better error ? */
584
585	this_leaf->eax = eax;
586	this_leaf->ebx = ebx;
587	this_leaf->ecx = ecx;
588	this_leaf->size = (ecx.split.number_of_sets          + 1) *
589			  (ebx.split.coherency_line_size     + 1) *
590			  (ebx.split.physical_line_partition + 1) *
591			  (ebx.split.ways_of_associativity   + 1);
592	return 0;
593}
594
595static int __cpuinit find_num_cache_leaves(void)
596{
597	unsigned int		eax, ebx, ecx, edx;
598	union _cpuid4_leaf_eax	cache_eax;
599	int 			i = -1;
600
601	do {
602		++i;
603		/* Do cpuid(4) loop to find out num_cache_leaves */
604		cpuid_count(4, i, &eax, &ebx, &ecx, &edx);
605		cache_eax.full = eax;
606	} while (cache_eax.split.type != CACHE_TYPE_NULL);
607	return i;
608}
609
610unsigned int __cpuinit init_intel_cacheinfo(struct cpuinfo_x86 *c)
611{
612	/* Cache sizes */
613	unsigned int trace = 0, l1i = 0, l1d = 0, l2 = 0, l3 = 0;
614	unsigned int new_l1d = 0, new_l1i = 0; /* Cache sizes from cpuid(4) */
615	unsigned int new_l2 = 0, new_l3 = 0, i; /* Cache sizes from cpuid(4) */
616	unsigned int l2_id = 0, l3_id = 0, num_threads_sharing, index_msb;
617#ifdef CONFIG_X86_HT
618	unsigned int cpu = c->cpu_index;
619#endif
620
621	if (c->cpuid_level > 3) {
622		static int is_initialized;
623
624		if (is_initialized == 0) {
625			/* Init num_cache_leaves from boot CPU */
626			num_cache_leaves = find_num_cache_leaves();
627			is_initialized++;
628		}
629
630		/*
631		 * Whenever possible use cpuid(4), deterministic cache
632		 * parameters cpuid leaf to find the cache details
633		 */
634		for (i = 0; i < num_cache_leaves; i++) {
635			struct _cpuid4_info_regs this_leaf;
636			int retval;
637
638			retval = cpuid4_cache_lookup_regs(i, &this_leaf);
639			if (retval >= 0) {
640				switch (this_leaf.eax.split.level) {
641				case 1:
642					if (this_leaf.eax.split.type ==
643							CACHE_TYPE_DATA)
644						new_l1d = this_leaf.size/1024;
645					else if (this_leaf.eax.split.type ==
646							CACHE_TYPE_INST)
647						new_l1i = this_leaf.size/1024;
648					break;
649				case 2:
650					new_l2 = this_leaf.size/1024;
651					num_threads_sharing = 1 + this_leaf.eax.split.num_threads_sharing;
652					index_msb = get_count_order(num_threads_sharing);
653					l2_id = c->apicid >> index_msb;
654					break;
655				case 3:
656					new_l3 = this_leaf.size/1024;
657					num_threads_sharing = 1 + this_leaf.eax.split.num_threads_sharing;
658					index_msb = get_count_order(
659							num_threads_sharing);
660					l3_id = c->apicid >> index_msb;
661					break;
662				default:
663					break;
664				}
665			}
666		}
667	}
668	/*
669	 * Don't use cpuid2 if cpuid4 is supported. For P4, we use cpuid2 for
670	 * trace cache
671	 */
672	if ((num_cache_leaves == 0 || c->x86 == 15) && c->cpuid_level > 1) {
673		/* supports eax=2  call */
674		int j, n;
675		unsigned int regs[4];
676		unsigned char *dp = (unsigned char *)regs;
677		int only_trace = 0;
678
679		if (num_cache_leaves != 0 && c->x86 == 15)
680			only_trace = 1;
681
682		/* Number of times to iterate */
683		n = cpuid_eax(2) & 0xFF;
684
685		for (i = 0 ; i < n ; i++) {
686			cpuid(2, &regs[0], &regs[1], &regs[2], &regs[3]);
687
688			/* If bit 31 is set, this is an unknown format */
689			for (j = 0 ; j < 3 ; j++)
690				if (regs[j] & (1 << 31))
691					regs[j] = 0;
692
693			/* Byte 0 is level count, not a descriptor */
694			for (j = 1 ; j < 16 ; j++) {
695				unsigned char des = dp[j];
696				unsigned char k = 0;
697
698				/* look up this descriptor in the table */
699				while (cache_table[k].descriptor != 0) {
700					if (cache_table[k].descriptor == des) {
701						if (only_trace && cache_table[k].cache_type != LVL_TRACE)
702							break;
703						switch (cache_table[k].cache_type) {
704						case LVL_1_INST:
705							l1i += cache_table[k].size;
706							break;
707						case LVL_1_DATA:
708							l1d += cache_table[k].size;
709							break;
710						case LVL_2:
711							l2 += cache_table[k].size;
712							break;
713						case LVL_3:
714							l3 += cache_table[k].size;
715							break;
716						case LVL_TRACE:
717							trace += cache_table[k].size;
718							break;
719						}
720
721						break;
722					}
723
724					k++;
725				}
726			}
727		}
728	}
729
730	if (new_l1d)
731		l1d = new_l1d;
732
733	if (new_l1i)
734		l1i = new_l1i;
735
736	if (new_l2) {
737		l2 = new_l2;
738#ifdef CONFIG_X86_HT
739		per_cpu(cpu_llc_id, cpu) = l2_id;
740#endif
741	}
742
743	if (new_l3) {
744		l3 = new_l3;
745#ifdef CONFIG_X86_HT
746		per_cpu(cpu_llc_id, cpu) = l3_id;
747#endif
748	}
749
750	c->x86_cache_size = l3 ? l3 : (l2 ? l2 : (l1i+l1d));
751
752	return l2;
753}
754
755#ifdef CONFIG_SYSFS
756
757/* pointer to _cpuid4_info array (for each cache leaf) */
758static DEFINE_PER_CPU(struct _cpuid4_info *, ici_cpuid4_info);
759#define CPUID4_INFO_IDX(x, y)	(&((per_cpu(ici_cpuid4_info, x))[y]))
760
761#ifdef CONFIG_SMP
762static void __cpuinit cache_shared_cpu_map_setup(unsigned int cpu, int index)
763{
764	struct _cpuid4_info	*this_leaf, *sibling_leaf;
765	unsigned long num_threads_sharing;
766	int index_msb, i, sibling;
767	struct cpuinfo_x86 *c = &cpu_data(cpu);
768
769	if ((index == 3) && (c->x86_vendor == X86_VENDOR_AMD)) {
770		for_each_cpu(i, c->llc_shared_map) {
771			if (!per_cpu(ici_cpuid4_info, i))
772				continue;
773			this_leaf = CPUID4_INFO_IDX(i, index);
774			for_each_cpu(sibling, c->llc_shared_map) {
775				if (!cpu_online(sibling))
776					continue;
777				set_bit(sibling, this_leaf->shared_cpu_map);
778			}
779		}
780		return;
781	}
782	this_leaf = CPUID4_INFO_IDX(cpu, index);
783	num_threads_sharing = 1 + this_leaf->eax.split.num_threads_sharing;
784
785	if (num_threads_sharing == 1)
786		cpumask_set_cpu(cpu, to_cpumask(this_leaf->shared_cpu_map));
787	else {
788		index_msb = get_count_order(num_threads_sharing);
789
790		for_each_online_cpu(i) {
791			if (cpu_data(i).apicid >> index_msb ==
792			    c->apicid >> index_msb) {
793				cpumask_set_cpu(i,
794					to_cpumask(this_leaf->shared_cpu_map));
795				if (i != cpu && per_cpu(ici_cpuid4_info, i))  {
796					sibling_leaf =
797						CPUID4_INFO_IDX(i, index);
798					cpumask_set_cpu(cpu, to_cpumask(
799						sibling_leaf->shared_cpu_map));
800				}
801			}
802		}
803	}
804}
805static void __cpuinit cache_remove_shared_cpu_map(unsigned int cpu, int index)
806{
807	struct _cpuid4_info	*this_leaf, *sibling_leaf;
808	int sibling;
809
810	this_leaf = CPUID4_INFO_IDX(cpu, index);
811	for_each_cpu(sibling, to_cpumask(this_leaf->shared_cpu_map)) {
812		sibling_leaf = CPUID4_INFO_IDX(sibling, index);
813		cpumask_clear_cpu(cpu,
814				  to_cpumask(sibling_leaf->shared_cpu_map));
815	}
816}
817#else
818static void __cpuinit cache_shared_cpu_map_setup(unsigned int cpu, int index)
819{
820}
821
822static void __cpuinit cache_remove_shared_cpu_map(unsigned int cpu, int index)
823{
824}
825#endif
826
827static void __cpuinit free_cache_attributes(unsigned int cpu)
828{
829	int i;
830
831	for (i = 0; i < num_cache_leaves; i++)
832		cache_remove_shared_cpu_map(cpu, i);
833
834	kfree(per_cpu(ici_cpuid4_info, cpu)->l3);
835	kfree(per_cpu(ici_cpuid4_info, cpu));
836	per_cpu(ici_cpuid4_info, cpu) = NULL;
837}
838
839static int
840__cpuinit cpuid4_cache_lookup(int index, struct _cpuid4_info *this_leaf)
841{
842	struct _cpuid4_info_regs *leaf_regs =
843		(struct _cpuid4_info_regs *)this_leaf;
844
845	return cpuid4_cache_lookup_regs(index, leaf_regs);
846}
847
848static void __cpuinit get_cpu_leaves(void *_retval)
849{
850	int j, *retval = _retval, cpu = smp_processor_id();
851
852	/* Do cpuid and store the results */
853	for (j = 0; j < num_cache_leaves; j++) {
854		struct _cpuid4_info *this_leaf;
855		this_leaf = CPUID4_INFO_IDX(cpu, j);
856		*retval = cpuid4_cache_lookup(j, this_leaf);
857		if (unlikely(*retval < 0)) {
858			int i;
859
860			for (i = 0; i < j; i++)
861				cache_remove_shared_cpu_map(cpu, i);
862			break;
863		}
864		cache_shared_cpu_map_setup(cpu, j);
865	}
866}
867
868static int __cpuinit detect_cache_attributes(unsigned int cpu)
869{
870	int			retval;
871
872	if (num_cache_leaves == 0)
873		return -ENOENT;
874
875	per_cpu(ici_cpuid4_info, cpu) = kzalloc(
876	    sizeof(struct _cpuid4_info) * num_cache_leaves, GFP_KERNEL);
877	if (per_cpu(ici_cpuid4_info, cpu) == NULL)
878		return -ENOMEM;
879
880	smp_call_function_single(cpu, get_cpu_leaves, &retval, true);
881	if (retval) {
882		kfree(per_cpu(ici_cpuid4_info, cpu));
883		per_cpu(ici_cpuid4_info, cpu) = NULL;
884	}
885
886	return retval;
887}
888
889#include <linux/kobject.h>
890#include <linux/sysfs.h>
891
892extern struct sysdev_class cpu_sysdev_class; /* from drivers/base/cpu.c */
893
894/* pointer to kobject for cpuX/cache */
895static DEFINE_PER_CPU(struct kobject *, ici_cache_kobject);
896
897struct _index_kobject {
898	struct kobject kobj;
899	unsigned int cpu;
900	unsigned short index;
901};
902
903/* pointer to array of kobjects for cpuX/cache/indexY */
904static DEFINE_PER_CPU(struct _index_kobject *, ici_index_kobject);
905#define INDEX_KOBJECT_PTR(x, y)		(&((per_cpu(ici_index_kobject, x))[y]))
906
907#define show_one_plus(file_name, object, val)				\
908static ssize_t show_##file_name						\
909			(struct _cpuid4_info *this_leaf, char *buf)	\
910{									\
911	return sprintf(buf, "%lu\n", (unsigned long)this_leaf->object + val); \
912}
913
914show_one_plus(level, eax.split.level, 0);
915show_one_plus(coherency_line_size, ebx.split.coherency_line_size, 1);
916show_one_plus(physical_line_partition, ebx.split.physical_line_partition, 1);
917show_one_plus(ways_of_associativity, ebx.split.ways_of_associativity, 1);
918show_one_plus(number_of_sets, ecx.split.number_of_sets, 1);
919
920static ssize_t show_size(struct _cpuid4_info *this_leaf, char *buf)
921{
922	return sprintf(buf, "%luK\n", this_leaf->size / 1024);
923}
924
925static ssize_t show_shared_cpu_map_func(struct _cpuid4_info *this_leaf,
926					int type, char *buf)
927{
928	ptrdiff_t len = PTR_ALIGN(buf + PAGE_SIZE - 1, PAGE_SIZE) - buf;
929	int n = 0;
930
931	if (len > 1) {
932		const struct cpumask *mask;
933
934		mask = to_cpumask(this_leaf->shared_cpu_map);
935		n = type ?
936			cpulist_scnprintf(buf, len-2, mask) :
937			cpumask_scnprintf(buf, len-2, mask);
938		buf[n++] = '\n';
939		buf[n] = '\0';
940	}
941	return n;
942}
943
944static inline ssize_t show_shared_cpu_map(struct _cpuid4_info *leaf, char *buf)
945{
946	return show_shared_cpu_map_func(leaf, 0, buf);
947}
948
949static inline ssize_t show_shared_cpu_list(struct _cpuid4_info *leaf, char *buf)
950{
951	return show_shared_cpu_map_func(leaf, 1, buf);
952}
953
954static ssize_t show_type(struct _cpuid4_info *this_leaf, char *buf)
955{
956	switch (this_leaf->eax.split.type) {
957	case CACHE_TYPE_DATA:
958		return sprintf(buf, "Data\n");
959	case CACHE_TYPE_INST:
960		return sprintf(buf, "Instruction\n");
961	case CACHE_TYPE_UNIFIED:
962		return sprintf(buf, "Unified\n");
963	default:
964		return sprintf(buf, "Unknown\n");
965	}
966}
967
968#define to_object(k)	container_of(k, struct _index_kobject, kobj)
969#define to_attr(a)	container_of(a, struct _cache_attr, attr)
970
971#define define_one_ro(_name) \
972static struct _cache_attr _name = \
973	__ATTR(_name, 0444, show_##_name, NULL)
974
975define_one_ro(level);
976define_one_ro(type);
977define_one_ro(coherency_line_size);
978define_one_ro(physical_line_partition);
979define_one_ro(ways_of_associativity);
980define_one_ro(number_of_sets);
981define_one_ro(size);
982define_one_ro(shared_cpu_map);
983define_one_ro(shared_cpu_list);
984
985#define DEFAULT_SYSFS_CACHE_ATTRS	\
986	&type.attr,			\
987	&level.attr,			\
988	&coherency_line_size.attr,	\
989	&physical_line_partition.attr,	\
990	&ways_of_associativity.attr,	\
991	&number_of_sets.attr,		\
992	&size.attr,			\
993	&shared_cpu_map.attr,		\
994	&shared_cpu_list.attr
995
996static struct attribute *default_attrs[] = {
997	DEFAULT_SYSFS_CACHE_ATTRS,
998	NULL
999};
1000
1001static struct attribute *default_l3_attrs[] = {
1002	DEFAULT_SYSFS_CACHE_ATTRS,
1003#ifdef CONFIG_CPU_SUP_AMD
1004	&cache_disable_0.attr,
1005	&cache_disable_1.attr,
1006#endif
1007	NULL
1008};
1009
1010static ssize_t show(struct kobject *kobj, struct attribute *attr, char *buf)
1011{
1012	struct _cache_attr *fattr = to_attr(attr);
1013	struct _index_kobject *this_leaf = to_object(kobj);
1014	ssize_t ret;
1015
1016	ret = fattr->show ?
1017		fattr->show(CPUID4_INFO_IDX(this_leaf->cpu, this_leaf->index),
1018			buf) :
1019		0;
1020	return ret;
1021}
1022
1023static ssize_t store(struct kobject *kobj, struct attribute *attr,
1024		     const char *buf, size_t count)
1025{
1026	struct _cache_attr *fattr = to_attr(attr);
1027	struct _index_kobject *this_leaf = to_object(kobj);
1028	ssize_t ret;
1029
1030	ret = fattr->store ?
1031		fattr->store(CPUID4_INFO_IDX(this_leaf->cpu, this_leaf->index),
1032			buf, count) :
1033		0;
1034	return ret;
1035}
1036
1037static const struct sysfs_ops sysfs_ops = {
1038	.show   = show,
1039	.store  = store,
1040};
1041
1042static struct kobj_type ktype_cache = {
1043	.sysfs_ops	= &sysfs_ops,
1044	.default_attrs	= default_attrs,
1045};
1046
1047static struct kobj_type ktype_percpu_entry = {
1048	.sysfs_ops	= &sysfs_ops,
1049};
1050
1051static void __cpuinit cpuid4_cache_sysfs_exit(unsigned int cpu)
1052{
1053	kfree(per_cpu(ici_cache_kobject, cpu));
1054	kfree(per_cpu(ici_index_kobject, cpu));
1055	per_cpu(ici_cache_kobject, cpu) = NULL;
1056	per_cpu(ici_index_kobject, cpu) = NULL;
1057	free_cache_attributes(cpu);
1058}
1059
1060static int __cpuinit cpuid4_cache_sysfs_init(unsigned int cpu)
1061{
1062	int err;
1063
1064	if (num_cache_leaves == 0)
1065		return -ENOENT;
1066
1067	err = detect_cache_attributes(cpu);
1068	if (err)
1069		return err;
1070
1071	/* Allocate all required memory */
1072	per_cpu(ici_cache_kobject, cpu) =
1073		kzalloc(sizeof(struct kobject), GFP_KERNEL);
1074	if (unlikely(per_cpu(ici_cache_kobject, cpu) == NULL))
1075		goto err_out;
1076
1077	per_cpu(ici_index_kobject, cpu) = kzalloc(
1078	    sizeof(struct _index_kobject) * num_cache_leaves, GFP_KERNEL);
1079	if (unlikely(per_cpu(ici_index_kobject, cpu) == NULL))
1080		goto err_out;
1081
1082	return 0;
1083
1084err_out:
1085	cpuid4_cache_sysfs_exit(cpu);
1086	return -ENOMEM;
1087}
1088
1089static DECLARE_BITMAP(cache_dev_map, NR_CPUS);
1090
1091/* Add/Remove cache interface for CPU device */
1092static int __cpuinit cache_add_dev(struct sys_device * sys_dev)
1093{
1094	unsigned int cpu = sys_dev->id;
1095	unsigned long i, j;
1096	struct _index_kobject *this_object;
1097	struct _cpuid4_info   *this_leaf;
1098	int retval;
1099
1100	retval = cpuid4_cache_sysfs_init(cpu);
1101	if (unlikely(retval < 0))
1102		return retval;
1103
1104	retval = kobject_init_and_add(per_cpu(ici_cache_kobject, cpu),
1105				      &ktype_percpu_entry,
1106				      &sys_dev->kobj, "%s", "cache");
1107	if (retval < 0) {
1108		cpuid4_cache_sysfs_exit(cpu);
1109		return retval;
1110	}
1111
1112	for (i = 0; i < num_cache_leaves; i++) {
1113		this_object = INDEX_KOBJECT_PTR(cpu, i);
1114		this_object->cpu = cpu;
1115		this_object->index = i;
1116
1117		this_leaf = CPUID4_INFO_IDX(cpu, i);
1118
1119		if (this_leaf->l3 && this_leaf->l3->can_disable)
1120			ktype_cache.default_attrs = default_l3_attrs;
1121		else
1122			ktype_cache.default_attrs = default_attrs;
1123
1124		retval = kobject_init_and_add(&(this_object->kobj),
1125					      &ktype_cache,
1126					      per_cpu(ici_cache_kobject, cpu),
1127					      "index%1lu", i);
1128		if (unlikely(retval)) {
1129			for (j = 0; j < i; j++)
1130				kobject_put(&(INDEX_KOBJECT_PTR(cpu, j)->kobj));
1131			kobject_put(per_cpu(ici_cache_kobject, cpu));
1132			cpuid4_cache_sysfs_exit(cpu);
1133			return retval;
1134		}
1135		kobject_uevent(&(this_object->kobj), KOBJ_ADD);
1136	}
1137	cpumask_set_cpu(cpu, to_cpumask(cache_dev_map));
1138
1139	kobject_uevent(per_cpu(ici_cache_kobject, cpu), KOBJ_ADD);
1140	return 0;
1141}
1142
1143static void __cpuinit cache_remove_dev(struct sys_device * sys_dev)
1144{
1145	unsigned int cpu = sys_dev->id;
1146	unsigned long i;
1147
1148	if (per_cpu(ici_cpuid4_info, cpu) == NULL)
1149		return;
1150	if (!cpumask_test_cpu(cpu, to_cpumask(cache_dev_map)))
1151		return;
1152	cpumask_clear_cpu(cpu, to_cpumask(cache_dev_map));
1153
1154	for (i = 0; i < num_cache_leaves; i++)
1155		kobject_put(&(INDEX_KOBJECT_PTR(cpu, i)->kobj));
1156	kobject_put(per_cpu(ici_cache_kobject, cpu));
1157	cpuid4_cache_sysfs_exit(cpu);
1158}
1159
1160static int __cpuinit cacheinfo_cpu_callback(struct notifier_block *nfb,
1161					unsigned long action, void *hcpu)
1162{
1163	unsigned int cpu = (unsigned long)hcpu;
1164	struct sys_device *sys_dev;
1165
1166	sys_dev = get_cpu_sysdev(cpu);
1167	switch (action) {
1168	case CPU_ONLINE:
1169	case CPU_ONLINE_FROZEN:
1170		cache_add_dev(sys_dev);
1171		break;
1172	case CPU_DEAD:
1173	case CPU_DEAD_FROZEN:
1174		cache_remove_dev(sys_dev);
1175		break;
1176	}
1177	return NOTIFY_OK;
1178}
1179
1180static struct notifier_block __cpuinitdata cacheinfo_cpu_notifier = {
1181	.notifier_call = cacheinfo_cpu_callback,
1182};
1183
1184static int __cpuinit cache_sysfs_init(void)
1185{
1186	int i;
1187
1188	if (num_cache_leaves == 0)
1189		return 0;
1190
1191	for_each_online_cpu(i) {
1192		int err;
1193		struct sys_device *sys_dev = get_cpu_sysdev(i);
1194
1195		err = cache_add_dev(sys_dev);
1196		if (err)
1197			return err;
1198	}
1199	register_hotcpu_notifier(&cacheinfo_cpu_notifier);
1200	return 0;
1201}
1202
1203device_initcall(cache_sysfs_init);
1204
1205#endif
1206