1/*
2 *  linux/arch/x86-64/kernel/setup.c
3 *
4 *  Copyright (C) 1995  Linus Torvalds
5 *
6 *  Nov 2001 Dave Jones <davej@suse.de>
7 *  Forked from i386 setup code.
8 */
9
10/*
11 * This file handles the architecture-dependent parts of initialization
12 */
13
14#include <linux/errno.h>
15#include <linux/sched.h>
16#include <linux/kernel.h>
17#include <linux/mm.h>
18#include <linux/stddef.h>
19#include <linux/unistd.h>
20#include <linux/ptrace.h>
21#include <linux/slab.h>
22#include <linux/user.h>
23#include <linux/a.out.h>
24#include <linux/screen_info.h>
25#include <linux/ioport.h>
26#include <linux/delay.h>
27#include <linux/init.h>
28#include <linux/initrd.h>
29#include <linux/highmem.h>
30#include <linux/bootmem.h>
31#include <linux/module.h>
32#include <asm/processor.h>
33#include <linux/console.h>
34#include <linux/seq_file.h>
35#include <linux/crash_dump.h>
36#include <linux/root_dev.h>
37#include <linux/pci.h>
38#include <linux/acpi.h>
39#include <linux/kallsyms.h>
40#include <linux/edd.h>
41#include <linux/mmzone.h>
42#include <linux/kexec.h>
43#include <linux/cpufreq.h>
44#include <linux/dmi.h>
45#include <linux/dma-mapping.h>
46#include <linux/ctype.h>
47
48#include <asm/mtrr.h>
49#include <asm/uaccess.h>
50#include <asm/system.h>
51#include <asm/io.h>
52#include <asm/smp.h>
53#include <asm/msr.h>
54#include <asm/desc.h>
55#include <video/edid.h>
56#include <asm/e820.h>
57#include <asm/dma.h>
58#include <asm/mpspec.h>
59#include <asm/mmu_context.h>
60#include <asm/bootsetup.h>
61#include <asm/proto.h>
62#include <asm/setup.h>
63#include <asm/mach_apic.h>
64#include <asm/numa.h>
65#include <asm/sections.h>
66#include <asm/dmi.h>
67
68/*
69 * Machine setup..
70 */
71
72struct cpuinfo_x86 boot_cpu_data __read_mostly;
73EXPORT_SYMBOL(boot_cpu_data);
74
75unsigned long mmu_cr4_features;
76
77/* Boot loader ID as an integer, for the benefit of proc_dointvec */
78int bootloader_type;
79
80unsigned long saved_video_mode;
81
82int force_mwait __cpuinitdata;
83
84/*
85 * Early DMI memory
86 */
87int dmi_alloc_index;
88char dmi_alloc_data[DMI_MAX_DATA];
89
90/*
91 * Setup options
92 */
93struct screen_info screen_info;
94EXPORT_SYMBOL(screen_info);
95struct sys_desc_table_struct {
96	unsigned short length;
97	unsigned char table[0];
98};
99
100struct edid_info edid_info;
101EXPORT_SYMBOL_GPL(edid_info);
102
103extern int root_mountflags;
104
105char __initdata command_line[COMMAND_LINE_SIZE];
106
107struct resource standard_io_resources[] = {
108	{ .name = "dma1", .start = 0x00, .end = 0x1f,
109		.flags = IORESOURCE_BUSY | IORESOURCE_IO },
110	{ .name = "pic1", .start = 0x20, .end = 0x21,
111		.flags = IORESOURCE_BUSY | IORESOURCE_IO },
112	{ .name = "timer0", .start = 0x40, .end = 0x43,
113		.flags = IORESOURCE_BUSY | IORESOURCE_IO },
114	{ .name = "timer1", .start = 0x50, .end = 0x53,
115		.flags = IORESOURCE_BUSY | IORESOURCE_IO },
116	{ .name = "keyboard", .start = 0x60, .end = 0x6f,
117		.flags = IORESOURCE_BUSY | IORESOURCE_IO },
118	{ .name = "dma page reg", .start = 0x80, .end = 0x8f,
119		.flags = IORESOURCE_BUSY | IORESOURCE_IO },
120	{ .name = "pic2", .start = 0xa0, .end = 0xa1,
121		.flags = IORESOURCE_BUSY | IORESOURCE_IO },
122	{ .name = "dma2", .start = 0xc0, .end = 0xdf,
123		.flags = IORESOURCE_BUSY | IORESOURCE_IO },
124	{ .name = "fpu", .start = 0xf0, .end = 0xff,
125		.flags = IORESOURCE_BUSY | IORESOURCE_IO }
126};
127
128#define IORESOURCE_RAM (IORESOURCE_BUSY | IORESOURCE_MEM)
129
130struct resource data_resource = {
131	.name = "Kernel data",
132	.start = 0,
133	.end = 0,
134	.flags = IORESOURCE_RAM,
135};
136struct resource code_resource = {
137	.name = "Kernel code",
138	.start = 0,
139	.end = 0,
140	.flags = IORESOURCE_RAM,
141};
142
143#ifdef CONFIG_PROC_VMCORE
144/* elfcorehdr= specifies the location of elf core header
145 * stored by the crashed kernel. This option will be passed
146 * by kexec loader to the capture kernel.
147 */
148static int __init setup_elfcorehdr(char *arg)
149{
150	char *end;
151	if (!arg)
152		return -EINVAL;
153	elfcorehdr_addr = memparse(arg, &end);
154	return end > arg ? 0 : -EINVAL;
155}
156early_param("elfcorehdr", setup_elfcorehdr);
157#endif
158
159#ifndef CONFIG_NUMA
160static void __init
161contig_initmem_init(unsigned long start_pfn, unsigned long end_pfn)
162{
163	unsigned long bootmap_size, bootmap;
164
165	bootmap_size = bootmem_bootmap_pages(end_pfn)<<PAGE_SHIFT;
166	bootmap = find_e820_area(0, end_pfn<<PAGE_SHIFT, bootmap_size);
167	if (bootmap == -1L)
168		panic("Cannot find bootmem map of size %ld\n",bootmap_size);
169	bootmap_size = init_bootmem(bootmap >> PAGE_SHIFT, end_pfn);
170	e820_register_active_regions(0, start_pfn, end_pfn);
171	free_bootmem_with_active_regions(0, end_pfn);
172	reserve_bootmem(bootmap, bootmap_size);
173}
174#endif
175
176#if defined(CONFIG_EDD) || defined(CONFIG_EDD_MODULE)
177struct edd edd;
178#ifdef CONFIG_EDD_MODULE
179EXPORT_SYMBOL(edd);
180#endif
181/**
182 * copy_edd() - Copy the BIOS EDD information
183 *              from boot_params into a safe place.
184 *
185 */
186static inline void copy_edd(void)
187{
188     memcpy(edd.mbr_signature, EDD_MBR_SIGNATURE, sizeof(edd.mbr_signature));
189     memcpy(edd.edd_info, EDD_BUF, sizeof(edd.edd_info));
190     edd.mbr_signature_nr = EDD_MBR_SIG_NR;
191     edd.edd_info_nr = EDD_NR;
192}
193#else
194static inline void copy_edd(void)
195{
196}
197#endif
198
199#define EBDA_ADDR_POINTER 0x40E
200
201unsigned __initdata ebda_addr;
202unsigned __initdata ebda_size;
203
204static void discover_ebda(void)
205{
206	/*
207	 * there is a real-mode segmented pointer pointing to the
208	 * 4K EBDA area at 0x40E
209	 */
210	ebda_addr = *(unsigned short *)__va(EBDA_ADDR_POINTER);
211	ebda_addr <<= 4;
212
213	ebda_size = *(unsigned short *)__va(ebda_addr);
214
215	/* Round EBDA up to pages */
216	if (ebda_size == 0)
217		ebda_size = 1;
218	ebda_size <<= 10;
219	ebda_size = round_up(ebda_size + (ebda_addr & ~PAGE_MASK), PAGE_SIZE);
220	if (ebda_size > 64*1024)
221		ebda_size = 64*1024;
222}
223
224void __init setup_arch(char **cmdline_p)
225{
226	printk(KERN_INFO "Command line: %s\n", boot_command_line);
227
228 	ROOT_DEV = old_decode_dev(ORIG_ROOT_DEV);
229 	screen_info = SCREEN_INFO;
230	edid_info = EDID_INFO;
231	saved_video_mode = SAVED_VIDEO_MODE;
232	bootloader_type = LOADER_TYPE;
233
234#ifdef CONFIG_BLK_DEV_RAM
235	rd_image_start = RAMDISK_FLAGS & RAMDISK_IMAGE_START_MASK;
236	rd_prompt = ((RAMDISK_FLAGS & RAMDISK_PROMPT_FLAG) != 0);
237	rd_doload = ((RAMDISK_FLAGS & RAMDISK_LOAD_FLAG) != 0);
238#endif
239	setup_memory_region();
240	copy_edd();
241
242	if (!MOUNT_ROOT_RDONLY)
243		root_mountflags &= ~MS_RDONLY;
244	init_mm.start_code = (unsigned long) &_text;
245	init_mm.end_code = (unsigned long) &_etext;
246	init_mm.end_data = (unsigned long) &_edata;
247	init_mm.brk = (unsigned long) &_end;
248
249	code_resource.start = virt_to_phys(&_text);
250	code_resource.end = virt_to_phys(&_etext)-1;
251	data_resource.start = virt_to_phys(&_etext);
252	data_resource.end = virt_to_phys(&_edata)-1;
253
254	early_identify_cpu(&boot_cpu_data);
255
256	strlcpy(command_line, boot_command_line, COMMAND_LINE_SIZE);
257	*cmdline_p = command_line;
258
259	parse_early_param();
260
261	finish_e820_parsing();
262
263	e820_register_active_regions(0, 0, -1UL);
264	/*
265	 * partially used pages are not usable - thus
266	 * we are rounding upwards:
267	 */
268	end_pfn = e820_end_of_ram();
269	num_physpages = end_pfn;
270
271	check_efer();
272
273	discover_ebda();
274
275	init_memory_mapping(0, (end_pfn_map << PAGE_SHIFT));
276
277	dmi_scan_machine();
278
279#ifdef CONFIG_ACPI
280	/*
281	 * Initialize the ACPI boot-time table parser (gets the RSDP and SDT).
282	 * Call this early for SRAT node setup.
283	 */
284	acpi_boot_table_init();
285#endif
286
287	/* How many end-of-memory variables you have, grandma! */
288	max_low_pfn = end_pfn;
289	max_pfn = end_pfn;
290	high_memory = (void *)__va(end_pfn * PAGE_SIZE - 1) + 1;
291
292	/* Remove active ranges so rediscovery with NUMA-awareness happens */
293	remove_all_active_ranges();
294
295#ifdef CONFIG_ACPI_NUMA
296	/*
297	 * Parse SRAT to discover nodes.
298	 */
299	acpi_numa_init();
300#endif
301
302#ifdef CONFIG_NUMA
303	numa_initmem_init(0, end_pfn);
304#else
305	contig_initmem_init(0, end_pfn);
306#endif
307
308	/* Reserve direct mapping */
309	reserve_bootmem_generic(table_start << PAGE_SHIFT,
310				(table_end - table_start) << PAGE_SHIFT);
311
312	/* reserve kernel */
313	reserve_bootmem_generic(__pa_symbol(&_text),
314				__pa_symbol(&_end) - __pa_symbol(&_text));
315
316	/*
317	 * reserve physical page 0 - it's a special BIOS page on many boxes,
318	 * enabling clean reboots, SMP operation, laptop functions.
319	 */
320	reserve_bootmem_generic(0, PAGE_SIZE);
321
322	/* reserve ebda region */
323	if (ebda_addr)
324		reserve_bootmem_generic(ebda_addr, ebda_size);
325#ifdef CONFIG_NUMA
326	/* reserve nodemap region */
327	if (nodemap_addr)
328		reserve_bootmem_generic(nodemap_addr, nodemap_size);
329#endif
330
331#ifdef CONFIG_SMP
332	/* Reserve SMP trampoline */
333	reserve_bootmem_generic(SMP_TRAMPOLINE_BASE, 2*PAGE_SIZE);
334#endif
335
336#ifdef CONFIG_ACPI_SLEEP
337       /*
338        * Reserve low memory region for sleep support.
339        */
340       acpi_reserve_bootmem();
341#endif
342	/*
343	 * Find and reserve possible boot-time SMP configuration:
344	 */
345	find_smp_config();
346#ifdef CONFIG_BLK_DEV_INITRD
347	if (LOADER_TYPE && INITRD_START) {
348		if (INITRD_START + INITRD_SIZE <= (end_pfn << PAGE_SHIFT)) {
349			reserve_bootmem_generic(INITRD_START, INITRD_SIZE);
350			initrd_start = INITRD_START + PAGE_OFFSET;
351			initrd_end = initrd_start+INITRD_SIZE;
352		}
353		else {
354			printk(KERN_ERR "initrd extends beyond end of memory "
355			    "(0x%08lx > 0x%08lx)\ndisabling initrd\n",
356			    (unsigned long)(INITRD_START + INITRD_SIZE),
357			    (unsigned long)(end_pfn << PAGE_SHIFT));
358			initrd_start = 0;
359		}
360	}
361#endif
362#ifdef CONFIG_KEXEC
363	if (crashk_res.start != crashk_res.end) {
364		reserve_bootmem_generic(crashk_res.start,
365			crashk_res.end - crashk_res.start + 1);
366	}
367#endif
368
369	paging_init();
370
371#ifdef CONFIG_PCI
372	early_quirks();
373#endif
374
375	/*
376	 * set this early, so we dont allocate cpu0
377	 * if MADT list doesnt list BSP first
378	 * mpparse.c/MP_processor_info() allocates logical cpu numbers.
379	 */
380	cpu_set(0, cpu_present_map);
381#ifdef CONFIG_ACPI
382	/*
383	 * Read APIC and some other early information from ACPI tables.
384	 */
385	acpi_boot_init();
386#endif
387
388	init_cpu_to_node();
389
390	/*
391	 * get boot-time SMP configuration:
392	 */
393	if (smp_found_config)
394		get_smp_config();
395	init_apic_mappings();
396
397	/*
398	 * We trust e820 completely. No explicit ROM probing in memory.
399 	 */
400	e820_reserve_resources();
401	e820_mark_nosave_regions();
402
403	{
404	unsigned i;
405	/* request I/O space for devices used on all i[345]86 PCs */
406	for (i = 0; i < ARRAY_SIZE(standard_io_resources); i++)
407		request_resource(&ioport_resource, &standard_io_resources[i]);
408	}
409
410	e820_setup_gap();
411
412#ifdef CONFIG_VT
413#if defined(CONFIG_VGA_CONSOLE)
414	conswitchp = &vga_con;
415#elif defined(CONFIG_DUMMY_CONSOLE)
416	conswitchp = &dummy_con;
417#endif
418#endif
419}
420
421static int __cpuinit get_model_name(struct cpuinfo_x86 *c)
422{
423	unsigned int *v;
424
425	if (c->extended_cpuid_level < 0x80000004)
426		return 0;
427
428	v = (unsigned int *) c->x86_model_id;
429	cpuid(0x80000002, &v[0], &v[1], &v[2], &v[3]);
430	cpuid(0x80000003, &v[4], &v[5], &v[6], &v[7]);
431	cpuid(0x80000004, &v[8], &v[9], &v[10], &v[11]);
432	c->x86_model_id[48] = 0;
433	return 1;
434}
435
436
437static void __cpuinit display_cacheinfo(struct cpuinfo_x86 *c)
438{
439	unsigned int n, dummy, eax, ebx, ecx, edx;
440
441	n = c->extended_cpuid_level;
442
443	if (n >= 0x80000005) {
444		cpuid(0x80000005, &dummy, &ebx, &ecx, &edx);
445		printk(KERN_INFO "CPU: L1 I Cache: %dK (%d bytes/line), D cache %dK (%d bytes/line)\n",
446			edx>>24, edx&0xFF, ecx>>24, ecx&0xFF);
447		c->x86_cache_size=(ecx>>24)+(edx>>24);
448		/* On K8 L1 TLB is inclusive, so don't count it */
449		c->x86_tlbsize = 0;
450	}
451
452	if (n >= 0x80000006) {
453		cpuid(0x80000006, &dummy, &ebx, &ecx, &edx);
454		ecx = cpuid_ecx(0x80000006);
455		c->x86_cache_size = ecx >> 16;
456		c->x86_tlbsize += ((ebx >> 16) & 0xfff) + (ebx & 0xfff);
457
458		printk(KERN_INFO "CPU: L2 Cache: %dK (%d bytes/line)\n",
459		c->x86_cache_size, ecx & 0xFF);
460	}
461
462	if (n >= 0x80000007)
463		cpuid(0x80000007, &dummy, &dummy, &dummy, &c->x86_power);
464	if (n >= 0x80000008) {
465		cpuid(0x80000008, &eax, &dummy, &dummy, &dummy);
466		c->x86_virt_bits = (eax >> 8) & 0xff;
467		c->x86_phys_bits = eax & 0xff;
468	}
469}
470
471#ifdef CONFIG_NUMA
472static int nearby_node(int apicid)
473{
474	int i;
475	for (i = apicid - 1; i >= 0; i--) {
476		int node = apicid_to_node[i];
477		if (node != NUMA_NO_NODE && node_online(node))
478			return node;
479	}
480	for (i = apicid + 1; i < MAX_LOCAL_APIC; i++) {
481		int node = apicid_to_node[i];
482		if (node != NUMA_NO_NODE && node_online(node))
483			return node;
484	}
485	return first_node(node_online_map); /* Shouldn't happen */
486}
487#endif
488
489/*
490 * On a AMD dual core setup the lower bits of the APIC id distingush the cores.
491 * Assumes number of cores is a power of two.
492 */
493static void __init amd_detect_cmp(struct cpuinfo_x86 *c)
494{
495#ifdef CONFIG_SMP
496	unsigned bits;
497#ifdef CONFIG_NUMA
498	int cpu = smp_processor_id();
499	int node = 0;
500	unsigned apicid = hard_smp_processor_id();
501#endif
502	unsigned ecx = cpuid_ecx(0x80000008);
503
504	c->x86_max_cores = (ecx & 0xff) + 1;
505
506	/* CPU telling us the core id bits shift? */
507	bits = (ecx >> 12) & 0xF;
508
509	/* Otherwise recompute */
510	if (bits == 0) {
511		while ((1 << bits) < c->x86_max_cores)
512			bits++;
513	}
514
515	/* Low order bits define the core id (index of core in socket) */
516	c->cpu_core_id = c->phys_proc_id & ((1 << bits)-1);
517	/* Convert the APIC ID into the socket ID */
518	c->phys_proc_id = phys_pkg_id(bits);
519
520#ifdef CONFIG_NUMA
521  	node = c->phys_proc_id;
522 	if (apicid_to_node[apicid] != NUMA_NO_NODE)
523 		node = apicid_to_node[apicid];
524 	if (!node_online(node)) {
525 		/* Two possibilities here:
526 		   - The CPU is missing memory and no node was created.
527 		   In that case try picking one from a nearby CPU
528 		   - The APIC IDs differ from the HyperTransport node IDs
529 		   which the K8 northbridge parsing fills in.
530 		   Assume they are all increased by a constant offset,
531 		   but in the same order as the HT nodeids.
532 		   If that doesn't result in a usable node fall back to the
533 		   path for the previous case.  */
534 		int ht_nodeid = apicid - (cpu_data[0].phys_proc_id << bits);
535 		if (ht_nodeid >= 0 &&
536 		    apicid_to_node[ht_nodeid] != NUMA_NO_NODE)
537 			node = apicid_to_node[ht_nodeid];
538 		/* Pick a nearby node */
539 		if (!node_online(node))
540 			node = nearby_node(apicid);
541 	}
542	numa_set_node(cpu, node);
543
544	printk(KERN_INFO "CPU %d/%x -> Node %d\n", cpu, apicid, node);
545#endif
546#endif
547}
548
549static void __cpuinit init_amd(struct cpuinfo_x86 *c)
550{
551	unsigned level;
552
553#ifdef CONFIG_SMP
554	unsigned long value;
555
556	/*
557	 * Disable TLB flush filter by setting HWCR.FFDIS on K8
558	 * bit 6 of msr C001_0015
559 	 *
560	 * Errata 63 for SH-B3 steppings
561	 * Errata 122 for all steppings (F+ have it disabled by default)
562	 */
563	if (c->x86 == 15) {
564		rdmsrl(MSR_K8_HWCR, value);
565		value |= 1 << 6;
566		wrmsrl(MSR_K8_HWCR, value);
567	}
568#endif
569
570	/* Bit 31 in normal CPUID used for nonstandard 3DNow ID;
571	   3DNow is IDd by bit 31 in extended CPUID (1*32+31) anyway */
572	clear_bit(0*32+31, &c->x86_capability);
573
574	/* On C+ stepping K8 rep microcode works well for copy/memset */
575	level = cpuid_eax(1);
576	if (c->x86 == 15 && ((level >= 0x0f48 && level < 0x0f50) || level >= 0x0f58))
577		set_bit(X86_FEATURE_REP_GOOD, &c->x86_capability);
578
579	if (c->x86 >= 6)
580		set_bit(X86_FEATURE_FXSAVE_LEAK, &c->x86_capability);
581
582	level = get_model_name(c);
583	if (!level) {
584		switch (c->x86) {
585		case 15:
586			/* Should distinguish Models here, but this is only
587			   a fallback anyways. */
588			strcpy(c->x86_model_id, "Hammer");
589			break;
590		}
591	}
592	display_cacheinfo(c);
593
594	/* c->x86_power is 8000_0007 edx. Bit 8 is constant TSC */
595	if (c->x86_power & (1<<8))
596		set_bit(X86_FEATURE_CONSTANT_TSC, &c->x86_capability);
597
598	/* Multi core CPU? */
599	if (c->extended_cpuid_level >= 0x80000008)
600		amd_detect_cmp(c);
601
602	/* Fix cpuid4 emulation for more */
603	num_cache_leaves = 3;
604
605	/* RDTSC can be speculated around */
606	clear_bit(X86_FEATURE_SYNC_RDTSC, &c->x86_capability);
607
608	/* Family 10 doesn't support C states in MWAIT so don't use it */
609	if (c->x86 == 0x10 && !force_mwait)
610		clear_bit(X86_FEATURE_MWAIT, &c->x86_capability);
611}
612
613static void __cpuinit detect_ht(struct cpuinfo_x86 *c)
614{
615#ifdef CONFIG_SMP
616	u32 	eax, ebx, ecx, edx;
617	int 	index_msb, core_bits;
618
619	cpuid(1, &eax, &ebx, &ecx, &edx);
620
621
622	if (!cpu_has(c, X86_FEATURE_HT))
623		return;
624 	if (cpu_has(c, X86_FEATURE_CMP_LEGACY))
625		goto out;
626
627	smp_num_siblings = (ebx & 0xff0000) >> 16;
628
629	if (smp_num_siblings == 1) {
630		printk(KERN_INFO  "CPU: Hyper-Threading is disabled\n");
631	} else if (smp_num_siblings > 1 ) {
632
633		if (smp_num_siblings > NR_CPUS) {
634			printk(KERN_WARNING "CPU: Unsupported number of the siblings %d", smp_num_siblings);
635			smp_num_siblings = 1;
636			return;
637		}
638
639		index_msb = get_count_order(smp_num_siblings);
640		c->phys_proc_id = phys_pkg_id(index_msb);
641
642		smp_num_siblings = smp_num_siblings / c->x86_max_cores;
643
644		index_msb = get_count_order(smp_num_siblings) ;
645
646		core_bits = get_count_order(c->x86_max_cores);
647
648		c->cpu_core_id = phys_pkg_id(index_msb) &
649					       ((1 << core_bits) - 1);
650	}
651out:
652	if ((c->x86_max_cores * smp_num_siblings) > 1) {
653		printk(KERN_INFO  "CPU: Physical Processor ID: %d\n", c->phys_proc_id);
654		printk(KERN_INFO  "CPU: Processor Core ID: %d\n", c->cpu_core_id);
655	}
656
657#endif
658}
659
660/*
661 * find out the number of processor cores on the die
662 */
663static int __cpuinit intel_num_cpu_cores(struct cpuinfo_x86 *c)
664{
665	unsigned int eax, t;
666
667	if (c->cpuid_level < 4)
668		return 1;
669
670	cpuid_count(4, 0, &eax, &t, &t, &t);
671
672	if (eax & 0x1f)
673		return ((eax >> 26) + 1);
674	else
675		return 1;
676}
677
678static void srat_detect_node(void)
679{
680#ifdef CONFIG_NUMA
681	unsigned node;
682	int cpu = smp_processor_id();
683	int apicid = hard_smp_processor_id();
684
685	/* Don't do the funky fallback heuristics the AMD version employs
686	   for now. */
687	node = apicid_to_node[apicid];
688	if (node == NUMA_NO_NODE)
689		node = first_node(node_online_map);
690	numa_set_node(cpu, node);
691
692	printk(KERN_INFO "CPU %d/%x -> Node %d\n", cpu, apicid, node);
693#endif
694}
695
696static void __cpuinit init_intel(struct cpuinfo_x86 *c)
697{
698	/* Cache sizes */
699	unsigned n;
700
701	init_intel_cacheinfo(c);
702	if (c->cpuid_level > 9 ) {
703		unsigned eax = cpuid_eax(10);
704		/* Check for version and the number of counters */
705		if ((eax & 0xff) && (((eax>>8) & 0xff) > 1))
706			set_bit(X86_FEATURE_ARCH_PERFMON, &c->x86_capability);
707	}
708
709	if (cpu_has_ds) {
710		unsigned int l1, l2;
711		rdmsr(MSR_IA32_MISC_ENABLE, l1, l2);
712		if (!(l1 & (1<<11)))
713			set_bit(X86_FEATURE_BTS, c->x86_capability);
714		if (!(l1 & (1<<12)))
715			set_bit(X86_FEATURE_PEBS, c->x86_capability);
716	}
717
718	n = c->extended_cpuid_level;
719	if (n >= 0x80000008) {
720		unsigned eax = cpuid_eax(0x80000008);
721		c->x86_virt_bits = (eax >> 8) & 0xff;
722		c->x86_phys_bits = eax & 0xff;
723		if (c->x86_vendor == X86_VENDOR_INTEL &&
724		    c->x86 == 0xF && c->x86_model == 0x3 &&
725		    c->x86_mask == 0x4)
726			c->x86_phys_bits = 36;
727	}
728
729	if (c->x86 == 15)
730		c->x86_cache_alignment = c->x86_clflush_size * 2;
731	if ((c->x86 == 0xf && c->x86_model >= 0x03) ||
732	    (c->x86 == 0x6 && c->x86_model >= 0x0e))
733		set_bit(X86_FEATURE_CONSTANT_TSC, &c->x86_capability);
734	if (c->x86 == 6)
735		set_bit(X86_FEATURE_REP_GOOD, &c->x86_capability);
736	if (c->x86 == 15)
737		set_bit(X86_FEATURE_SYNC_RDTSC, &c->x86_capability);
738	else
739		clear_bit(X86_FEATURE_SYNC_RDTSC, &c->x86_capability);
740 	c->x86_max_cores = intel_num_cpu_cores(c);
741
742	srat_detect_node();
743}
744
745static void __cpuinit get_cpu_vendor(struct cpuinfo_x86 *c)
746{
747	char *v = c->x86_vendor_id;
748
749	if (!strcmp(v, "AuthenticAMD"))
750		c->x86_vendor = X86_VENDOR_AMD;
751	else if (!strcmp(v, "GenuineIntel"))
752		c->x86_vendor = X86_VENDOR_INTEL;
753	else
754		c->x86_vendor = X86_VENDOR_UNKNOWN;
755}
756
757struct cpu_model_info {
758	int vendor;
759	int family;
760	char *model_names[16];
761};
762
763/* Do some early cpuid on the boot CPU to get some parameter that are
764   needed before check_bugs. Everything advanced is in identify_cpu
765   below. */
766void __cpuinit early_identify_cpu(struct cpuinfo_x86 *c)
767{
768	u32 tfms;
769
770	c->loops_per_jiffy = loops_per_jiffy;
771	c->x86_cache_size = -1;
772	c->x86_vendor = X86_VENDOR_UNKNOWN;
773	c->x86_model = c->x86_mask = 0;	/* So far unknown... */
774	c->x86_vendor_id[0] = '\0'; /* Unset */
775	c->x86_model_id[0] = '\0';  /* Unset */
776	c->x86_clflush_size = 64;
777	c->x86_cache_alignment = c->x86_clflush_size;
778	c->x86_max_cores = 1;
779	c->extended_cpuid_level = 0;
780	memset(&c->x86_capability, 0, sizeof c->x86_capability);
781
782	/* Get vendor name */
783	cpuid(0x00000000, (unsigned int *)&c->cpuid_level,
784	      (unsigned int *)&c->x86_vendor_id[0],
785	      (unsigned int *)&c->x86_vendor_id[8],
786	      (unsigned int *)&c->x86_vendor_id[4]);
787
788	get_cpu_vendor(c);
789
790	/* Initialize the standard set of capabilities */
791	/* Note that the vendor-specific code below might override */
792
793	/* Intel-defined flags: level 0x00000001 */
794	if (c->cpuid_level >= 0x00000001) {
795		__u32 misc;
796		cpuid(0x00000001, &tfms, &misc, &c->x86_capability[4],
797		      &c->x86_capability[0]);
798		c->x86 = (tfms >> 8) & 0xf;
799		c->x86_model = (tfms >> 4) & 0xf;
800		c->x86_mask = tfms & 0xf;
801		if (c->x86 == 0xf)
802			c->x86 += (tfms >> 20) & 0xff;
803		if (c->x86 >= 0x6)
804			c->x86_model += ((tfms >> 16) & 0xF) << 4;
805		if (c->x86_capability[0] & (1<<19))
806			c->x86_clflush_size = ((misc >> 8) & 0xff) * 8;
807	} else {
808		/* Have CPUID level 0 only - unheard of */
809		c->x86 = 4;
810	}
811
812#ifdef CONFIG_SMP
813	c->phys_proc_id = (cpuid_ebx(1) >> 24) & 0xff;
814#endif
815}
816
817/*
818 * This does the hard work of actually picking apart the CPU stuff...
819 */
820void __cpuinit identify_cpu(struct cpuinfo_x86 *c)
821{
822	int i;
823	u32 xlvl;
824
825	early_identify_cpu(c);
826
827	/* AMD-defined flags: level 0x80000001 */
828	xlvl = cpuid_eax(0x80000000);
829	c->extended_cpuid_level = xlvl;
830	if ((xlvl & 0xffff0000) == 0x80000000) {
831		if (xlvl >= 0x80000001) {
832			c->x86_capability[1] = cpuid_edx(0x80000001);
833			c->x86_capability[6] = cpuid_ecx(0x80000001);
834		}
835		if (xlvl >= 0x80000004)
836			get_model_name(c); /* Default name */
837	}
838
839	/* Transmeta-defined flags: level 0x80860001 */
840	xlvl = cpuid_eax(0x80860000);
841	if ((xlvl & 0xffff0000) == 0x80860000) {
842		/* Don't set x86_cpuid_level here for now to not confuse. */
843		if (xlvl >= 0x80860001)
844			c->x86_capability[2] = cpuid_edx(0x80860001);
845	}
846
847	c->apicid = phys_pkg_id(0);
848
849	/*
850	 * Vendor-specific initialization.  In this section we
851	 * canonicalize the feature flags, meaning if there are
852	 * features a certain CPU supports which CPUID doesn't
853	 * tell us, CPUID claiming incorrect flags, or other bugs,
854	 * we handle them here.
855	 *
856	 * At the end of this section, c->x86_capability better
857	 * indicate the features this CPU genuinely supports!
858	 */
859	switch (c->x86_vendor) {
860	case X86_VENDOR_AMD:
861		init_amd(c);
862		break;
863
864	case X86_VENDOR_INTEL:
865		init_intel(c);
866		break;
867
868	case X86_VENDOR_UNKNOWN:
869	default:
870		display_cacheinfo(c);
871		break;
872	}
873
874	select_idle_routine(c);
875	detect_ht(c);
876
877	/*
878	 * On SMP, boot_cpu_data holds the common feature set between
879	 * all CPUs; so make sure that we indicate which features are
880	 * common between the CPUs.  The first time this routine gets
881	 * executed, c == &boot_cpu_data.
882	 */
883	if (c != &boot_cpu_data) {
884		/* AND the already accumulated flags with these */
885		for (i = 0 ; i < NCAPINTS ; i++)
886			boot_cpu_data.x86_capability[i] &= c->x86_capability[i];
887	}
888
889#ifdef CONFIG_X86_MCE
890	mcheck_init(c);
891#endif
892	if (c != &boot_cpu_data)
893		mtrr_ap_init();
894#ifdef CONFIG_NUMA
895	numa_add_cpu(smp_processor_id());
896#endif
897}
898
899
900void __cpuinit print_cpu_info(struct cpuinfo_x86 *c)
901{
902	if (c->x86_model_id[0])
903		printk("%s", c->x86_model_id);
904
905	if (c->x86_mask || c->cpuid_level >= 0)
906		printk(" stepping %02x\n", c->x86_mask);
907	else
908		printk("\n");
909}
910
911/*
912 *	Get CPU information for use by the procfs.
913 */
914
915static int show_cpuinfo(struct seq_file *m, void *v)
916{
917	struct cpuinfo_x86 *c = v;
918
919	/*
920	 * These flag bits must match the definitions in <asm/cpufeature.h>.
921	 * NULL means this bit is undefined or reserved; either way it doesn't
922	 * have meaning as far as Linux is concerned.  Note that it's important
923	 * to realize there is a difference between this table and CPUID -- if
924	 * applications want to get the raw CPUID data, they should access
925	 * /dev/cpu/<cpu_nr>/cpuid instead.
926	 */
927	static char *x86_cap_flags[] = {
928		/* Intel-defined */
929	        "fpu", "vme", "de", "pse", "tsc", "msr", "pae", "mce",
930	        "cx8", "apic", NULL, "sep", "mtrr", "pge", "mca", "cmov",
931	        "pat", "pse36", "pn", "clflush", NULL, "dts", "acpi", "mmx",
932	        "fxsr", "sse", "sse2", "ss", "ht", "tm", "ia64", NULL,
933
934		/* AMD-defined */
935		NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
936		NULL, NULL, NULL, "syscall", NULL, NULL, NULL, NULL,
937		NULL, NULL, NULL, NULL, "nx", NULL, "mmxext", NULL,
938		NULL, "fxsr_opt", "pdpe1gb", "rdtscp", NULL, "lm",
939		"3dnowext", "3dnow",
940
941		/* Transmeta-defined */
942		"recovery", "longrun", NULL, "lrti", NULL, NULL, NULL, NULL,
943		NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
944		NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
945		NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
946
947		/* Other (Linux-defined) */
948		"cxmmx", NULL, "cyrix_arr", "centaur_mcr", NULL,
949		"constant_tsc", NULL, NULL,
950		"up", NULL, NULL, NULL, NULL, NULL, NULL, NULL,
951		NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
952		NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
953
954		/* Intel-defined (#2) */
955		"pni", NULL, NULL, "monitor", "ds_cpl", "vmx", "smx", "est",
956		"tm2", "ssse3", "cid", NULL, NULL, "cx16", "xtpr", NULL,
957		NULL, NULL, "dca", NULL, NULL, NULL, NULL, "popcnt",
958		NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
959
960		/* VIA/Cyrix/Centaur-defined */
961		NULL, NULL, "rng", "rng_en", NULL, NULL, "ace", "ace_en",
962		NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
963		NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
964		NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
965
966		/* AMD-defined (#2) */
967		"lahf_lm", "cmp_legacy", "svm", "extapic", "cr8_legacy",
968		"altmovcr8", "abm", "sse4a",
969		"misalignsse", "3dnowprefetch",
970		"osvw", "ibs", NULL, NULL, NULL, NULL,
971		NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
972		NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
973	};
974	static char *x86_power_flags[] = {
975		"ts",	/* temperature sensor */
976		"fid",  /* frequency id control */
977		"vid",  /* voltage id control */
978		"ttp",  /* thermal trip */
979		"tm",
980		"stc",
981		"100mhzsteps",
982		"hwpstate",
983		"",	/* tsc invariant mapped to constant_tsc */
984		/* nothing */
985	};
986
987
988#ifdef CONFIG_SMP
989	if (!cpu_online(c-cpu_data))
990		return 0;
991#endif
992
993	seq_printf(m,"processor\t: %u\n"
994		     "vendor_id\t: %s\n"
995		     "cpu family\t: %d\n"
996		     "model\t\t: %d\n"
997		     "model name\t: %s\n",
998		     (unsigned)(c-cpu_data),
999		     c->x86_vendor_id[0] ? c->x86_vendor_id : "unknown",
1000		     c->x86,
1001		     (int)c->x86_model,
1002		     c->x86_model_id[0] ? c->x86_model_id : "unknown");
1003
1004	if (c->x86_mask || c->cpuid_level >= 0)
1005		seq_printf(m, "stepping\t: %d\n", c->x86_mask);
1006	else
1007		seq_printf(m, "stepping\t: unknown\n");
1008
1009	if (cpu_has(c,X86_FEATURE_TSC)) {
1010		unsigned int freq = cpufreq_quick_get((unsigned)(c-cpu_data));
1011		if (!freq)
1012			freq = cpu_khz;
1013		seq_printf(m, "cpu MHz\t\t: %u.%03u\n",
1014			     freq / 1000, (freq % 1000));
1015	}
1016
1017	/* Cache size */
1018	if (c->x86_cache_size >= 0)
1019		seq_printf(m, "cache size\t: %d KB\n", c->x86_cache_size);
1020
1021#ifdef CONFIG_SMP
1022	if (smp_num_siblings * c->x86_max_cores > 1) {
1023		int cpu = c - cpu_data;
1024		seq_printf(m, "physical id\t: %d\n", c->phys_proc_id);
1025		seq_printf(m, "siblings\t: %d\n", cpus_weight(cpu_core_map[cpu]));
1026		seq_printf(m, "core id\t\t: %d\n", c->cpu_core_id);
1027		seq_printf(m, "cpu cores\t: %d\n", c->booted_cores);
1028	}
1029#endif
1030
1031	seq_printf(m,
1032	        "fpu\t\t: yes\n"
1033	        "fpu_exception\t: yes\n"
1034	        "cpuid level\t: %d\n"
1035	        "wp\t\t: yes\n"
1036	        "flags\t\t:",
1037		   c->cpuid_level);
1038
1039	{
1040		int i;
1041		for ( i = 0 ; i < 32*NCAPINTS ; i++ )
1042			if (cpu_has(c, i) && x86_cap_flags[i] != NULL)
1043				seq_printf(m, " %s", x86_cap_flags[i]);
1044	}
1045
1046	seq_printf(m, "\nbogomips\t: %lu.%02lu\n",
1047		   c->loops_per_jiffy/(500000/HZ),
1048		   (c->loops_per_jiffy/(5000/HZ)) % 100);
1049
1050	if (c->x86_tlbsize > 0)
1051		seq_printf(m, "TLB size\t: %d 4K pages\n", c->x86_tlbsize);
1052	seq_printf(m, "clflush size\t: %d\n", c->x86_clflush_size);
1053	seq_printf(m, "cache_alignment\t: %d\n", c->x86_cache_alignment);
1054
1055	seq_printf(m, "address sizes\t: %u bits physical, %u bits virtual\n",
1056		   c->x86_phys_bits, c->x86_virt_bits);
1057
1058	seq_printf(m, "power management:");
1059	{
1060		unsigned i;
1061		for (i = 0; i < 32; i++)
1062			if (c->x86_power & (1 << i)) {
1063				if (i < ARRAY_SIZE(x86_power_flags) &&
1064					x86_power_flags[i])
1065					seq_printf(m, "%s%s",
1066						x86_power_flags[i][0]?" ":"",
1067						x86_power_flags[i]);
1068				else
1069					seq_printf(m, " [%d]", i);
1070			}
1071	}
1072
1073	seq_printf(m, "\n\n");
1074
1075	return 0;
1076}
1077
1078static void *c_start(struct seq_file *m, loff_t *pos)
1079{
1080	return *pos < NR_CPUS ? cpu_data + *pos : NULL;
1081}
1082
1083static void *c_next(struct seq_file *m, void *v, loff_t *pos)
1084{
1085	++*pos;
1086	return c_start(m, pos);
1087}
1088
1089static void c_stop(struct seq_file *m, void *v)
1090{
1091}
1092
1093struct seq_operations cpuinfo_op = {
1094	.start =c_start,
1095	.next =	c_next,
1096	.stop =	c_stop,
1097	.show =	show_cpuinfo,
1098};
1099