1/*
2 * Copyright 2002-2010, Axel D��rfler, axeld@pinc-software.de.
3 * Copyright 2012, Alex Smith, alex@alex-smith.me.uk.
4 * Distributed under the terms of the MIT License.
5 *
6 * Copyright 2001-2002, Travis Geiselbrecht. All rights reserved.
7 * Distributed under the terms of the NewOS License.
8 */
9
10
11#include <cpu.h>
12
13#include <string.h>
14#include <stdlib.h>
15#include <stdio.h>
16
17#include <ACPI.h>
18
19#include <boot_device.h>
20#include <commpage.h>
21#include <debug.h>
22#include <elf.h>
23#include <smp.h>
24#include <vm/vm.h>
25#include <vm/vm_types.h>
26#include <vm/VMAddressSpace.h>
27
28#include <arch_system_info.h>
29#include <arch/x86/apic.h>
30#include <boot/kernel_args.h>
31
32#include "paging/X86PagingStructures.h"
33#include "paging/X86VMTranslationMap.h"
34
35
36#define DUMP_FEATURE_STRING 1
37
38
39/* cpu vendor info */
40struct cpu_vendor_info {
41	const char *vendor;
42	const char *ident_string[2];
43};
44
45static const struct cpu_vendor_info vendor_info[VENDOR_NUM] = {
46	{ "Intel", { "GenuineIntel" } },
47	{ "AMD", { "AuthenticAMD" } },
48	{ "Cyrix", { "CyrixInstead" } },
49	{ "UMC", { "UMC UMC UMC" } },
50	{ "NexGen", { "NexGenDriven" } },
51	{ "Centaur", { "CentaurHauls" } },
52	{ "Rise", { "RiseRiseRise" } },
53	{ "Transmeta", { "GenuineTMx86", "TransmetaCPU" } },
54	{ "NSC", { "Geode by NSC" } },
55};
56
57#define CR0_CACHE_DISABLE		(1UL << 30)
58#define CR0_NOT_WRITE_THROUGH	(1UL << 29)
59#define CR0_FPU_EMULATION		(1UL << 2)
60#define CR0_MONITOR_FPU			(1UL << 1)
61
62#define CR4_OS_FXSR				(1UL << 9)
63#define CR4_OS_XMM_EXCEPTION	(1UL << 10)
64
65#define K8_SMIONCMPHALT			(1ULL << 27)
66#define K8_C1EONCMPHALT			(1ULL << 28)
67
68#define K8_CMPHALT				(K8_SMIONCMPHALT | K8_C1EONCMPHALT)
69
70/*
71 * 0 favors highest performance while 15 corresponds to the maximum energy
72 * savings. 7 means balance between performance and energy savings.
73 * Refer to Section 14.3.4 in <Intel 64 and IA-32 Architectures Software
74 * Developer's Manual Volume 3>  for details
75 */
76#define ENERGY_PERF_BIAS_PERFORMANCE	0
77#define ENERGY_PERF_BIAS_BALANCE		7
78#define ENERGY_PERF_BIAS_POWERSAVE		15
79
80struct set_mtrr_parameter {
81	int32	index;
82	uint64	base;
83	uint64	length;
84	uint8	type;
85};
86
87struct set_mtrrs_parameter {
88	const x86_mtrr_info*	infos;
89	uint32					count;
90	uint8					defaultType;
91};
92
93
94extern "C" void x86_reboot(void);
95	// from arch.S
96
97void (*gCpuIdleFunc)(void);
98void (*gX86SwapFPUFunc)(void* oldState, const void* newState) = x86_noop_swap;
99bool gHasSSE = false;
100
101static uint32 sCpuRendezvous;
102static uint32 sCpuRendezvous2;
103static uint32 sCpuRendezvous3;
104static vint32 sTSCSyncRendezvous;
105
106/* Some specials for the double fault handler */
107static uint8* sDoubleFaultStacks;
108static const size_t kDoubleFaultStackSize = 4096;	// size per CPU
109
110static x86_cpu_module_info* sCpuModule;
111
112
113extern "C" void memcpy_generic(void* dest, const void* source, size_t count);
114extern int memcpy_generic_end;
115extern "C" void memset_generic(void* dest, int value, size_t count);
116extern int memset_generic_end;
117
118x86_optimized_functions gOptimizedFunctions = {
119	memcpy_generic,
120	&memcpy_generic_end,
121	memset_generic,
122	&memset_generic_end
123};
124
125
126static status_t
127acpi_shutdown(bool rebootSystem)
128{
129	if (debug_debugger_running() || !are_interrupts_enabled())
130		return B_ERROR;
131
132	acpi_module_info* acpi;
133	if (get_module(B_ACPI_MODULE_NAME, (module_info**)&acpi) != B_OK)
134		return B_NOT_SUPPORTED;
135
136	status_t status;
137	if (rebootSystem) {
138		status = acpi->reboot();
139	} else {
140		// Make sure we run on the boot CPU (apparently needed for some ACPI
141		// implementations)
142		_user_set_cpu_enabled(0, true);
143		for (int32 cpu = 1; cpu < smp_get_num_cpus(); cpu++) {
144			_user_set_cpu_enabled(cpu, false);
145		}
146		// TODO: must not be called from the idle thread!
147		thread_yield(true);
148
149		status = acpi->prepare_sleep_state(ACPI_POWER_STATE_OFF, NULL, 0);
150		if (status == B_OK) {
151			//cpu_status state = disable_interrupts();
152			status = acpi->enter_sleep_state(ACPI_POWER_STATE_OFF);
153			//restore_interrupts(state);
154		}
155	}
156
157	put_module(B_ACPI_MODULE_NAME);
158	return status;
159}
160
161
162/*!	Disable CPU caches, and invalidate them. */
163static void
164disable_caches()
165{
166	x86_write_cr0((x86_read_cr0() | CR0_CACHE_DISABLE)
167		& ~CR0_NOT_WRITE_THROUGH);
168	wbinvd();
169	arch_cpu_global_TLB_invalidate();
170}
171
172
173/*!	Invalidate CPU caches, and enable them. */
174static void
175enable_caches()
176{
177	wbinvd();
178	arch_cpu_global_TLB_invalidate();
179	x86_write_cr0(x86_read_cr0()
180		& ~(CR0_CACHE_DISABLE | CR0_NOT_WRITE_THROUGH));
181}
182
183
184static void
185set_mtrr(void* _parameter, int cpu)
186{
187	struct set_mtrr_parameter* parameter
188		= (struct set_mtrr_parameter*)_parameter;
189
190	// wait until all CPUs have arrived here
191	smp_cpu_rendezvous(&sCpuRendezvous, cpu);
192
193	// One CPU has to reset sCpuRendezvous3 -- it is needed to prevent the CPU
194	// that initiated the call_all_cpus() from doing that again and clearing
195	// sCpuRendezvous2 before the last CPU has actually left the loop in
196	// smp_cpu_rendezvous();
197	if (cpu == 0)
198		atomic_set((vint32*)&sCpuRendezvous3, 0);
199
200	disable_caches();
201
202	sCpuModule->set_mtrr(parameter->index, parameter->base, parameter->length,
203		parameter->type);
204
205	enable_caches();
206
207	// wait until all CPUs have arrived here
208	smp_cpu_rendezvous(&sCpuRendezvous2, cpu);
209	smp_cpu_rendezvous(&sCpuRendezvous3, cpu);
210}
211
212
213static void
214set_mtrrs(void* _parameter, int cpu)
215{
216	set_mtrrs_parameter* parameter = (set_mtrrs_parameter*)_parameter;
217
218	// wait until all CPUs have arrived here
219	smp_cpu_rendezvous(&sCpuRendezvous, cpu);
220
221	// One CPU has to reset sCpuRendezvous3 -- it is needed to prevent the CPU
222	// that initiated the call_all_cpus() from doing that again and clearing
223	// sCpuRendezvous2 before the last CPU has actually left the loop in
224	// smp_cpu_rendezvous();
225	if (cpu == 0)
226		atomic_set((vint32*)&sCpuRendezvous3, 0);
227
228	disable_caches();
229
230	sCpuModule->set_mtrrs(parameter->defaultType, parameter->infos,
231		parameter->count);
232
233	enable_caches();
234
235	// wait until all CPUs have arrived here
236	smp_cpu_rendezvous(&sCpuRendezvous2, cpu);
237	smp_cpu_rendezvous(&sCpuRendezvous3, cpu);
238}
239
240
241static void
242init_mtrrs(void* _unused, int cpu)
243{
244	// wait until all CPUs have arrived here
245	smp_cpu_rendezvous(&sCpuRendezvous, cpu);
246
247	// One CPU has to reset sCpuRendezvous3 -- it is needed to prevent the CPU
248	// that initiated the call_all_cpus() from doing that again and clearing
249	// sCpuRendezvous2 before the last CPU has actually left the loop in
250	// smp_cpu_rendezvous();
251	if (cpu == 0)
252		atomic_set((vint32*)&sCpuRendezvous3, 0);
253
254	disable_caches();
255
256	sCpuModule->init_mtrrs();
257
258	enable_caches();
259
260	// wait until all CPUs have arrived here
261	smp_cpu_rendezvous(&sCpuRendezvous2, cpu);
262	smp_cpu_rendezvous(&sCpuRendezvous3, cpu);
263}
264
265
266uint32
267x86_count_mtrrs(void)
268{
269	if (sCpuModule == NULL)
270		return 0;
271
272	return sCpuModule->count_mtrrs();
273}
274
275
276void
277x86_set_mtrr(uint32 index, uint64 base, uint64 length, uint8 type)
278{
279	struct set_mtrr_parameter parameter;
280	parameter.index = index;
281	parameter.base = base;
282	parameter.length = length;
283	parameter.type = type;
284
285	sCpuRendezvous = sCpuRendezvous2 = 0;
286	call_all_cpus(&set_mtrr, &parameter);
287}
288
289
290status_t
291x86_get_mtrr(uint32 index, uint64* _base, uint64* _length, uint8* _type)
292{
293	// the MTRRs are identical on all CPUs, so it doesn't matter
294	// on which CPU this runs
295	return sCpuModule->get_mtrr(index, _base, _length, _type);
296}
297
298
299void
300x86_set_mtrrs(uint8 defaultType, const x86_mtrr_info* infos, uint32 count)
301{
302	if (sCpuModule == NULL)
303		return;
304
305	struct set_mtrrs_parameter parameter;
306	parameter.defaultType = defaultType;
307	parameter.infos = infos;
308	parameter.count = count;
309
310	sCpuRendezvous = sCpuRendezvous2 = 0;
311	call_all_cpus(&set_mtrrs, &parameter);
312}
313
314
315void
316x86_init_fpu(void)
317{
318	// All x86_64 CPUs support SSE, don't need to bother checking for it.
319#ifndef __x86_64__
320	if (!x86_check_feature(IA32_FEATURE_FPU, FEATURE_COMMON)) {
321		// No FPU... time to install one in your 386?
322		dprintf("%s: Warning: CPU has no reported FPU.\n", __func__);
323		gX86SwapFPUFunc = x86_noop_swap;
324		return;
325	}
326
327	if (!x86_check_feature(IA32_FEATURE_SSE, FEATURE_COMMON)
328		|| !x86_check_feature(IA32_FEATURE_FXSR, FEATURE_COMMON)) {
329		dprintf("%s: CPU has no SSE... just enabling FPU.\n", __func__);
330		// we don't have proper SSE support, just enable FPU
331		x86_write_cr0(x86_read_cr0() & ~(CR0_FPU_EMULATION | CR0_MONITOR_FPU));
332		gX86SwapFPUFunc = x86_fnsave_swap;
333		return;
334	}
335#endif
336
337	dprintf("%s: CPU has SSE... enabling FXSR and XMM.\n", __func__);
338
339	// enable OS support for SSE
340	x86_write_cr4(x86_read_cr4() | CR4_OS_FXSR | CR4_OS_XMM_EXCEPTION);
341	x86_write_cr0(x86_read_cr0() & ~(CR0_FPU_EMULATION | CR0_MONITOR_FPU));
342
343	gX86SwapFPUFunc = x86_fxsave_swap;
344	gHasSSE = true;
345}
346
347
348#if DUMP_FEATURE_STRING
349static void
350dump_feature_string(int currentCPU, cpu_ent* cpu)
351{
352	char features[384];
353	features[0] = 0;
354
355	if (cpu->arch.feature[FEATURE_COMMON] & IA32_FEATURE_FPU)
356		strlcat(features, "fpu ", sizeof(features));
357	if (cpu->arch.feature[FEATURE_COMMON] & IA32_FEATURE_VME)
358		strlcat(features, "vme ", sizeof(features));
359	if (cpu->arch.feature[FEATURE_COMMON] & IA32_FEATURE_DE)
360		strlcat(features, "de ", sizeof(features));
361	if (cpu->arch.feature[FEATURE_COMMON] & IA32_FEATURE_PSE)
362		strlcat(features, "pse ", sizeof(features));
363	if (cpu->arch.feature[FEATURE_COMMON] & IA32_FEATURE_TSC)
364		strlcat(features, "tsc ", sizeof(features));
365	if (cpu->arch.feature[FEATURE_COMMON] & IA32_FEATURE_MSR)
366		strlcat(features, "msr ", sizeof(features));
367	if (cpu->arch.feature[FEATURE_COMMON] & IA32_FEATURE_PAE)
368		strlcat(features, "pae ", sizeof(features));
369	if (cpu->arch.feature[FEATURE_COMMON] & IA32_FEATURE_MCE)
370		strlcat(features, "mce ", sizeof(features));
371	if (cpu->arch.feature[FEATURE_COMMON] & IA32_FEATURE_CX8)
372		strlcat(features, "cx8 ", sizeof(features));
373	if (cpu->arch.feature[FEATURE_COMMON] & IA32_FEATURE_APIC)
374		strlcat(features, "apic ", sizeof(features));
375	if (cpu->arch.feature[FEATURE_COMMON] & IA32_FEATURE_SEP)
376		strlcat(features, "sep ", sizeof(features));
377	if (cpu->arch.feature[FEATURE_COMMON] & IA32_FEATURE_MTRR)
378		strlcat(features, "mtrr ", sizeof(features));
379	if (cpu->arch.feature[FEATURE_COMMON] & IA32_FEATURE_PGE)
380		strlcat(features, "pge ", sizeof(features));
381	if (cpu->arch.feature[FEATURE_COMMON] & IA32_FEATURE_MCA)
382		strlcat(features, "mca ", sizeof(features));
383	if (cpu->arch.feature[FEATURE_COMMON] & IA32_FEATURE_CMOV)
384		strlcat(features, "cmov ", sizeof(features));
385	if (cpu->arch.feature[FEATURE_COMMON] & IA32_FEATURE_PAT)
386		strlcat(features, "pat ", sizeof(features));
387	if (cpu->arch.feature[FEATURE_COMMON] & IA32_FEATURE_PSE36)
388		strlcat(features, "pse36 ", sizeof(features));
389	if (cpu->arch.feature[FEATURE_COMMON] & IA32_FEATURE_PSN)
390		strlcat(features, "psn ", sizeof(features));
391	if (cpu->arch.feature[FEATURE_COMMON] & IA32_FEATURE_CLFSH)
392		strlcat(features, "clfsh ", sizeof(features));
393	if (cpu->arch.feature[FEATURE_COMMON] & IA32_FEATURE_DS)
394		strlcat(features, "ds ", sizeof(features));
395	if (cpu->arch.feature[FEATURE_COMMON] & IA32_FEATURE_ACPI)
396		strlcat(features, "acpi ", sizeof(features));
397	if (cpu->arch.feature[FEATURE_COMMON] & IA32_FEATURE_MMX)
398		strlcat(features, "mmx ", sizeof(features));
399	if (cpu->arch.feature[FEATURE_COMMON] & IA32_FEATURE_FXSR)
400		strlcat(features, "fxsr ", sizeof(features));
401	if (cpu->arch.feature[FEATURE_COMMON] & IA32_FEATURE_SSE)
402		strlcat(features, "sse ", sizeof(features));
403	if (cpu->arch.feature[FEATURE_COMMON] & IA32_FEATURE_SSE2)
404		strlcat(features, "sse2 ", sizeof(features));
405	if (cpu->arch.feature[FEATURE_COMMON] & IA32_FEATURE_SS)
406		strlcat(features, "ss ", sizeof(features));
407	if (cpu->arch.feature[FEATURE_COMMON] & IA32_FEATURE_HTT)
408		strlcat(features, "htt ", sizeof(features));
409	if (cpu->arch.feature[FEATURE_COMMON] & IA32_FEATURE_TM)
410		strlcat(features, "tm ", sizeof(features));
411	if (cpu->arch.feature[FEATURE_COMMON] & IA32_FEATURE_PBE)
412		strlcat(features, "pbe ", sizeof(features));
413	if (cpu->arch.feature[FEATURE_EXT] & IA32_FEATURE_EXT_SSE3)
414		strlcat(features, "sse3 ", sizeof(features));
415	if (cpu->arch.feature[FEATURE_EXT] & IA32_FEATURE_EXT_PCLMULQDQ)
416		strlcat(features, "pclmulqdq ", sizeof(features));
417	if (cpu->arch.feature[FEATURE_EXT] & IA32_FEATURE_EXT_DTES64)
418		strlcat(features, "dtes64 ", sizeof(features));
419	if (cpu->arch.feature[FEATURE_EXT] & IA32_FEATURE_EXT_MONITOR)
420		strlcat(features, "monitor ", sizeof(features));
421	if (cpu->arch.feature[FEATURE_EXT] & IA32_FEATURE_EXT_DSCPL)
422		strlcat(features, "dscpl ", sizeof(features));
423	if (cpu->arch.feature[FEATURE_EXT] & IA32_FEATURE_EXT_VMX)
424		strlcat(features, "vmx ", sizeof(features));
425	if (cpu->arch.feature[FEATURE_EXT] & IA32_FEATURE_EXT_SMX)
426		strlcat(features, "smx ", sizeof(features));
427	if (cpu->arch.feature[FEATURE_EXT] & IA32_FEATURE_EXT_EST)
428		strlcat(features, "est ", sizeof(features));
429	if (cpu->arch.feature[FEATURE_EXT] & IA32_FEATURE_EXT_TM2)
430		strlcat(features, "tm2 ", sizeof(features));
431	if (cpu->arch.feature[FEATURE_EXT] & IA32_FEATURE_EXT_SSSE3)
432		strlcat(features, "ssse3 ", sizeof(features));
433	if (cpu->arch.feature[FEATURE_EXT] & IA32_FEATURE_EXT_CNXTID)
434		strlcat(features, "cnxtid ", sizeof(features));
435	if (cpu->arch.feature[FEATURE_EXT] & IA32_FEATURE_EXT_FMA)
436		strlcat(features, "fma ", sizeof(features));
437	if (cpu->arch.feature[FEATURE_EXT] & IA32_FEATURE_EXT_CX16)
438		strlcat(features, "cx16 ", sizeof(features));
439	if (cpu->arch.feature[FEATURE_EXT] & IA32_FEATURE_EXT_XTPR)
440		strlcat(features, "xtpr ", sizeof(features));
441	if (cpu->arch.feature[FEATURE_EXT] & IA32_FEATURE_EXT_PDCM)
442		strlcat(features, "pdcm ", sizeof(features));
443	if (cpu->arch.feature[FEATURE_EXT] & IA32_FEATURE_EXT_PCID)
444		strlcat(features, "pcid ", sizeof(features));
445	if (cpu->arch.feature[FEATURE_EXT] & IA32_FEATURE_EXT_DCA)
446		strlcat(features, "dca ", sizeof(features));
447	if (cpu->arch.feature[FEATURE_EXT] & IA32_FEATURE_EXT_SSE4_1)
448		strlcat(features, "sse4_1 ", sizeof(features));
449	if (cpu->arch.feature[FEATURE_EXT] & IA32_FEATURE_EXT_SSE4_2)
450		strlcat(features, "sse4_2 ", sizeof(features));
451	if (cpu->arch.feature[FEATURE_EXT] & IA32_FEATURE_EXT_X2APIC)
452		strlcat(features, "x2apic ", sizeof(features));
453	if (cpu->arch.feature[FEATURE_EXT] & IA32_FEATURE_EXT_MOVBE)
454		strlcat(features, "movbe ", sizeof(features));
455	if (cpu->arch.feature[FEATURE_EXT] & IA32_FEATURE_EXT_POPCNT)
456		strlcat(features, "popcnt ", sizeof(features));
457	if (cpu->arch.feature[FEATURE_EXT] & IA32_FEATURE_EXT_TSCDEADLINE)
458		strlcat(features, "tscdeadline ", sizeof(features));
459	if (cpu->arch.feature[FEATURE_EXT] & IA32_FEATURE_EXT_AES)
460		strlcat(features, "aes ", sizeof(features));
461	if (cpu->arch.feature[FEATURE_EXT] & IA32_FEATURE_EXT_XSAVE)
462		strlcat(features, "xsave ", sizeof(features));
463	if (cpu->arch.feature[FEATURE_EXT] & IA32_FEATURE_EXT_OSXSAVE)
464		strlcat(features, "osxsave ", sizeof(features));
465	if (cpu->arch.feature[FEATURE_EXT] & IA32_FEATURE_EXT_AVX)
466		strlcat(features, "avx ", sizeof(features));
467	if (cpu->arch.feature[FEATURE_EXT] & IA32_FEATURE_EXT_F16C)
468		strlcat(features, "f16c ", sizeof(features));
469	if (cpu->arch.feature[FEATURE_EXT] & IA32_FEATURE_EXT_RDRND)
470		strlcat(features, "rdrnd ", sizeof(features));
471	if (cpu->arch.feature[FEATURE_EXT] & IA32_FEATURE_EXT_HYPERVISOR)
472		strlcat(features, "hypervisor ", sizeof(features));
473	if (cpu->arch.feature[FEATURE_EXT_AMD] & IA32_FEATURE_AMD_EXT_SYSCALL)
474		strlcat(features, "syscall ", sizeof(features));
475	if (cpu->arch.feature[FEATURE_EXT_AMD] & IA32_FEATURE_AMD_EXT_NX)
476		strlcat(features, "nx ", sizeof(features));
477	if (cpu->arch.feature[FEATURE_EXT_AMD] & IA32_FEATURE_AMD_EXT_MMXEXT)
478		strlcat(features, "mmxext ", sizeof(features));
479	if (cpu->arch.feature[FEATURE_EXT_AMD] & IA32_FEATURE_AMD_EXT_FFXSR)
480		strlcat(features, "ffxsr ", sizeof(features));
481	if (cpu->arch.feature[FEATURE_EXT_AMD] & IA32_FEATURE_AMD_EXT_LONG)
482		strlcat(features, "long ", sizeof(features));
483	if (cpu->arch.feature[FEATURE_EXT_AMD] & IA32_FEATURE_AMD_EXT_3DNOWEXT)
484		strlcat(features, "3dnowext ", sizeof(features));
485	if (cpu->arch.feature[FEATURE_EXT_AMD] & IA32_FEATURE_AMD_EXT_3DNOW)
486		strlcat(features, "3dnow ", sizeof(features));
487	if (cpu->arch.feature[FEATURE_6_EAX] & IA32_FEATURE_DTS)
488		strlcat(features, "dts ", sizeof(features));
489	if (cpu->arch.feature[FEATURE_6_EAX] & IA32_FEATURE_ITB)
490		strlcat(features, "itb ", sizeof(features));
491	if (cpu->arch.feature[FEATURE_6_EAX] & IA32_FEATURE_ARAT)
492		strlcat(features, "arat ", sizeof(features));
493	if (cpu->arch.feature[FEATURE_6_EAX] & IA32_FEATURE_PLN)
494		strlcat(features, "pln ", sizeof(features));
495	if (cpu->arch.feature[FEATURE_6_EAX] & IA32_FEATURE_ECMD)
496		strlcat(features, "ecmd ", sizeof(features));
497	if (cpu->arch.feature[FEATURE_6_EAX] & IA32_FEATURE_PTM)
498		strlcat(features, "ptm ", sizeof(features));
499	if (cpu->arch.feature[FEATURE_6_ECX] & IA32_FEATURE_APERFMPERF)
500		strlcat(features, "aperfmperf ", sizeof(features));
501	if (cpu->arch.feature[FEATURE_6_ECX] & IA32_FEATURE_EPB)
502		strlcat(features, "epb ", sizeof(features));
503
504	dprintf("CPU %d: features: %s\n", currentCPU, features);
505}
506#endif	// DUMP_FEATURE_STRING
507
508
509static void
510detect_cpu(int currentCPU)
511{
512	cpu_ent* cpu = get_cpu_struct();
513	char vendorString[17];
514	cpuid_info cpuid;
515
516	// clear out the cpu info data
517	cpu->arch.vendor = VENDOR_UNKNOWN;
518	cpu->arch.vendor_name = "UNKNOWN VENDOR";
519	cpu->arch.feature[FEATURE_COMMON] = 0;
520	cpu->arch.feature[FEATURE_EXT] = 0;
521	cpu->arch.feature[FEATURE_EXT_AMD] = 0;
522	cpu->arch.model_name[0] = 0;
523
524	// print some fun data
525	get_current_cpuid(&cpuid, 0);
526
527	// build the vendor string
528	memset(vendorString, 0, sizeof(vendorString));
529	memcpy(vendorString, cpuid.eax_0.vendor_id, sizeof(cpuid.eax_0.vendor_id));
530
531	// get the family, model, stepping
532	get_current_cpuid(&cpuid, 1);
533	cpu->arch.type = cpuid.eax_1.type;
534	cpu->arch.family = cpuid.eax_1.family;
535	cpu->arch.extended_family = cpuid.eax_1.extended_family;
536	cpu->arch.model = cpuid.eax_1.model;
537	cpu->arch.extended_model = cpuid.eax_1.extended_model;
538	cpu->arch.stepping = cpuid.eax_1.stepping;
539	dprintf("CPU %d: type %d family %d extended_family %d model %d "
540		"extended_model %d stepping %d, string '%s'\n",
541		currentCPU, cpu->arch.type, cpu->arch.family,
542		cpu->arch.extended_family, cpu->arch.model,
543		cpu->arch.extended_model, cpu->arch.stepping, vendorString);
544
545	// figure out what vendor we have here
546
547	for (int32 i = 0; i < VENDOR_NUM; i++) {
548		if (vendor_info[i].ident_string[0]
549			&& !strcmp(vendorString, vendor_info[i].ident_string[0])) {
550			cpu->arch.vendor = (x86_vendors)i;
551			cpu->arch.vendor_name = vendor_info[i].vendor;
552			break;
553		}
554		if (vendor_info[i].ident_string[1]
555			&& !strcmp(vendorString, vendor_info[i].ident_string[1])) {
556			cpu->arch.vendor = (x86_vendors)i;
557			cpu->arch.vendor_name = vendor_info[i].vendor;
558			break;
559		}
560	}
561
562	// see if we can get the model name
563	get_current_cpuid(&cpuid, 0x80000000);
564	if (cpuid.eax_0.max_eax >= 0x80000004) {
565		// build the model string (need to swap ecx/edx data before copying)
566		unsigned int temp;
567		memset(cpu->arch.model_name, 0, sizeof(cpu->arch.model_name));
568
569		get_current_cpuid(&cpuid, 0x80000002);
570		temp = cpuid.regs.edx;
571		cpuid.regs.edx = cpuid.regs.ecx;
572		cpuid.regs.ecx = temp;
573		memcpy(cpu->arch.model_name, cpuid.as_chars, sizeof(cpuid.as_chars));
574
575		get_current_cpuid(&cpuid, 0x80000003);
576		temp = cpuid.regs.edx;
577		cpuid.regs.edx = cpuid.regs.ecx;
578		cpuid.regs.ecx = temp;
579		memcpy(cpu->arch.model_name + 16, cpuid.as_chars,
580			sizeof(cpuid.as_chars));
581
582		get_current_cpuid(&cpuid, 0x80000004);
583		temp = cpuid.regs.edx;
584		cpuid.regs.edx = cpuid.regs.ecx;
585		cpuid.regs.ecx = temp;
586		memcpy(cpu->arch.model_name + 32, cpuid.as_chars,
587			sizeof(cpuid.as_chars));
588
589		// some cpus return a right-justified string
590		int32 i = 0;
591		while (cpu->arch.model_name[i] == ' ')
592			i++;
593		if (i > 0) {
594			memmove(cpu->arch.model_name, &cpu->arch.model_name[i],
595				strlen(&cpu->arch.model_name[i]) + 1);
596		}
597
598		dprintf("CPU %d: vendor '%s' model name '%s'\n",
599			currentCPU, cpu->arch.vendor_name, cpu->arch.model_name);
600	} else {
601		strlcpy(cpu->arch.model_name, "unknown", sizeof(cpu->arch.model_name));
602	}
603
604	// load feature bits
605	get_current_cpuid(&cpuid, 1);
606	cpu->arch.feature[FEATURE_COMMON] = cpuid.eax_1.features; // edx
607	cpu->arch.feature[FEATURE_EXT] = cpuid.eax_1.extended_features; // ecx
608	if (cpu->arch.vendor == VENDOR_AMD) {
609		get_current_cpuid(&cpuid, 0x80000001);
610		cpu->arch.feature[FEATURE_EXT_AMD] = cpuid.regs.edx; // edx
611	}
612	get_current_cpuid(&cpuid, 6);
613	cpu->arch.feature[FEATURE_6_EAX] = cpuid.regs.eax;
614	cpu->arch.feature[FEATURE_6_ECX] = cpuid.regs.ecx;
615
616#if DUMP_FEATURE_STRING
617	dump_feature_string(currentCPU, cpu);
618#endif
619}
620
621
622bool
623x86_check_feature(uint32 feature, enum x86_feature_type type)
624{
625	cpu_ent* cpu = get_cpu_struct();
626
627#if 0
628	int i;
629	dprintf("x86_check_feature: feature 0x%x, type %d\n", feature, type);
630	for (i = 0; i < FEATURE_NUM; i++) {
631		dprintf("features %d: 0x%x\n", i, cpu->arch.feature[i]);
632	}
633#endif
634
635	return (cpu->arch.feature[type] & feature) != 0;
636}
637
638
639void*
640x86_get_double_fault_stack(int32 cpu, size_t* _size)
641{
642	*_size = kDoubleFaultStackSize;
643	return sDoubleFaultStacks + kDoubleFaultStackSize * cpu;
644}
645
646
647/*!	Returns the index of the current CPU. Can only be called from the double
648	fault handler.
649*/
650int32
651x86_double_fault_get_cpu(void)
652{
653	addr_t stack = x86_get_stack_frame();
654	return (stack - (addr_t)sDoubleFaultStacks) / kDoubleFaultStackSize;
655}
656
657
658//	#pragma mark -
659
660
661status_t
662arch_cpu_preboot_init_percpu(kernel_args* args, int cpu)
663{
664	// On SMP system we want to synchronize the CPUs' TSCs, so system_time()
665	// will return consistent values.
666	if (smp_get_num_cpus() > 1) {
667		// let the first CPU prepare the rendezvous point
668		if (cpu == 0)
669			sTSCSyncRendezvous = smp_get_num_cpus() - 1;
670
671		// One CPU after the other will drop out of this loop and be caught by
672		// the loop below, until the last CPU (0) gets there. Save for +/- a few
673		// cycles the CPUs should pass the second loop at the same time.
674		while (sTSCSyncRendezvous != cpu) {
675		}
676
677		sTSCSyncRendezvous = cpu - 1;
678
679		while (sTSCSyncRendezvous != -1) {
680		}
681
682		// reset TSC to 0
683		x86_write_msr(IA32_MSR_TSC, 0);
684	}
685
686	return B_OK;
687}
688
689
690static void
691halt_idle(void)
692{
693	asm("hlt");
694}
695
696
697static void
698amdc1e_noarat_idle(void)
699{
700	uint64 msr = x86_read_msr(K8_MSR_IPM);
701	if (msr & K8_CMPHALT)
702		x86_write_msr(K8_MSR_IPM, msr & ~K8_CMPHALT);
703	halt_idle();
704}
705
706
707static bool
708detect_amdc1e_noarat()
709{
710	cpu_ent* cpu = get_cpu_struct();
711
712	if (cpu->arch.vendor != VENDOR_AMD)
713		return false;
714
715	// Family 0x12 and higher processors support ARAT
716	// Family lower than 0xf processors doesn't support C1E
717	// Family 0xf with model <= 0x40 procssors doesn't support C1E
718	uint32 family = cpu->arch.family + cpu->arch.extended_family;
719	uint32 model = (cpu->arch.extended_model << 4) | cpu->arch.model;
720	return (family < 0x12 && family > 0xf) || (family == 0xf && model > 0x40);
721}
722
723
724status_t
725arch_cpu_init_percpu(kernel_args* args, int cpu)
726{
727	// Load descriptor tables for this CPU.
728	x86_descriptors_init_percpu(args, cpu);
729
730	detect_cpu(cpu);
731
732	if (!gCpuIdleFunc) {
733		if (detect_amdc1e_noarat())
734			gCpuIdleFunc = amdc1e_noarat_idle;
735		else
736			gCpuIdleFunc = halt_idle;
737	}
738
739	if (x86_check_feature(IA32_FEATURE_EPB, FEATURE_6_ECX)) {
740		uint64 msr = x86_read_msr(IA32_MSR_ENERGY_PERF_BIAS);
741		if ((msr & 0xf) == ENERGY_PERF_BIAS_PERFORMANCE) {
742			msr &= ~0xf;
743			msr |= ENERGY_PERF_BIAS_BALANCE;
744			x86_write_msr(IA32_MSR_ENERGY_PERF_BIAS, msr);
745		}
746	}
747
748	return B_OK;
749}
750
751
752status_t
753arch_cpu_init(kernel_args* args)
754{
755	// init the TSC -> system_time() conversion factors
756
757	uint32 conversionFactor = args->arch_args.system_time_cv_factor;
758	uint64 conversionFactorNsecs = (uint64)conversionFactor * 1000;
759
760#ifdef __x86_64__
761	// The x86_64 system_time() implementation uses 64-bit multiplication and
762	// therefore shifting is not necessary for low frequencies (it's also not
763	// too likely that there'll be any x86_64 CPUs clocked under 1GHz).
764	__x86_setup_system_time((uint64)conversionFactor << 32,
765		conversionFactorNsecs);
766#else
767	if (conversionFactorNsecs >> 32 != 0) {
768		// the TSC frequency is < 1 GHz, which forces us to shift the factor
769		__x86_setup_system_time(conversionFactor, conversionFactorNsecs >> 16,
770			true);
771	} else {
772		// the TSC frequency is >= 1 GHz
773		__x86_setup_system_time(conversionFactor, conversionFactorNsecs, false);
774	}
775#endif
776
777	// Initialize descriptor tables.
778	x86_descriptors_init(args);
779
780	return B_OK;
781}
782
783
784status_t
785arch_cpu_init_post_vm(kernel_args* args)
786{
787	uint32 i;
788
789	// allocate an area for the double fault stacks
790	virtual_address_restrictions virtualRestrictions = {};
791	virtualRestrictions.address_specification = B_ANY_KERNEL_ADDRESS;
792	physical_address_restrictions physicalRestrictions = {};
793	create_area_etc(B_SYSTEM_TEAM, "double fault stacks",
794		kDoubleFaultStackSize * smp_get_num_cpus(), B_FULL_LOCK,
795		B_KERNEL_READ_AREA | B_KERNEL_WRITE_AREA, CREATE_AREA_DONT_WAIT, 0,
796		&virtualRestrictions, &physicalRestrictions,
797		(void**)&sDoubleFaultStacks);
798
799	// More descriptor table setup.
800	x86_descriptors_init_post_vm(args);
801
802	X86PagingStructures* kernelPagingStructures
803		= static_cast<X86VMTranslationMap*>(
804			VMAddressSpace::Kernel()->TranslationMap())->PagingStructures();
805
806	// Set active translation map on each CPU.
807	for (i = 0; i < args->num_cpus; i++) {
808		gCPU[i].arch.active_paging_structures = kernelPagingStructures;
809		kernelPagingStructures->AddReference();
810	}
811
812	if (!apic_available())
813		x86_init_fpu();
814	// else fpu gets set up in smp code
815
816	return B_OK;
817}
818
819
820status_t
821arch_cpu_init_post_modules(kernel_args* args)
822{
823	// initialize CPU module
824
825	void* cookie = open_module_list("cpu");
826
827	while (true) {
828		char name[B_FILE_NAME_LENGTH];
829		size_t nameLength = sizeof(name);
830
831		if (read_next_module_name(cookie, name, &nameLength) != B_OK
832			|| get_module(name, (module_info**)&sCpuModule) == B_OK)
833			break;
834	}
835
836	close_module_list(cookie);
837
838	// initialize MTRRs if available
839	if (x86_count_mtrrs() > 0) {
840		sCpuRendezvous = sCpuRendezvous2 = 0;
841		call_all_cpus(&init_mtrrs, NULL);
842	}
843
844	// get optimized functions from the CPU module
845	if (sCpuModule != NULL && sCpuModule->get_optimized_functions != NULL) {
846		x86_optimized_functions functions;
847		memset(&functions, 0, sizeof(functions));
848
849		sCpuModule->get_optimized_functions(&functions);
850
851		if (functions.memcpy != NULL) {
852			gOptimizedFunctions.memcpy = functions.memcpy;
853			gOptimizedFunctions.memcpy_end = functions.memcpy_end;
854		}
855
856		if (functions.memset != NULL) {
857			gOptimizedFunctions.memset = functions.memset;
858			gOptimizedFunctions.memset_end = functions.memset_end;
859		}
860	}
861
862	// put the optimized functions into the commpage
863	size_t memcpyLen = (addr_t)gOptimizedFunctions.memcpy_end
864		- (addr_t)gOptimizedFunctions.memcpy;
865	fill_commpage_entry(COMMPAGE_ENTRY_X86_MEMCPY,
866		(const void*)gOptimizedFunctions.memcpy, memcpyLen);
867	size_t memsetLen = (addr_t)gOptimizedFunctions.memset_end
868		- (addr_t)gOptimizedFunctions.memset;
869	fill_commpage_entry(COMMPAGE_ENTRY_X86_MEMSET,
870		(const void*)gOptimizedFunctions.memset, memsetLen);
871
872	// add the functions to the commpage image
873	image_id image = get_commpage_image();
874	elf_add_memory_image_symbol(image, "commpage_memcpy",
875		((addr_t*)USER_COMMPAGE_ADDR)[COMMPAGE_ENTRY_X86_MEMCPY], memcpyLen,
876		B_SYMBOL_TYPE_TEXT);
877	elf_add_memory_image_symbol(image, "commpage_memset",
878		((addr_t*)USER_COMMPAGE_ADDR)[COMMPAGE_ENTRY_X86_MEMSET], memsetLen,
879		B_SYMBOL_TYPE_TEXT);
880
881	return B_OK;
882}
883
884
885void
886arch_cpu_user_TLB_invalidate(void)
887{
888	x86_write_cr3(x86_read_cr3());
889}
890
891
892void
893arch_cpu_global_TLB_invalidate(void)
894{
895	uint32 flags = x86_read_cr4();
896
897	if (flags & IA32_CR4_GLOBAL_PAGES) {
898		// disable and reenable the global pages to flush all TLBs regardless
899		// of the global page bit
900		x86_write_cr4(flags & ~IA32_CR4_GLOBAL_PAGES);
901		x86_write_cr4(flags | IA32_CR4_GLOBAL_PAGES);
902	} else {
903		cpu_status state = disable_interrupts();
904		arch_cpu_user_TLB_invalidate();
905		restore_interrupts(state);
906	}
907}
908
909
910void
911arch_cpu_invalidate_TLB_range(addr_t start, addr_t end)
912{
913	int32 num_pages = end / B_PAGE_SIZE - start / B_PAGE_SIZE;
914	while (num_pages-- >= 0) {
915		invalidate_TLB(start);
916		start += B_PAGE_SIZE;
917	}
918}
919
920
921void
922arch_cpu_invalidate_TLB_list(addr_t pages[], int num_pages)
923{
924	int i;
925	for (i = 0; i < num_pages; i++) {
926		invalidate_TLB(pages[i]);
927	}
928}
929
930
931status_t
932arch_cpu_shutdown(bool rebootSystem)
933{
934	if (acpi_shutdown(rebootSystem) == B_OK)
935		return B_OK;
936
937	if (!rebootSystem) {
938#ifndef __x86_64__
939		return apm_shutdown();
940#else
941		return B_NOT_SUPPORTED;
942#endif
943	}
944
945	cpu_status state = disable_interrupts();
946
947	// try to reset the system using the keyboard controller
948	out8(0xfe, 0x64);
949
950	// Give some time to the controller to do its job (0.5s)
951	snooze(500000);
952
953	// if that didn't help, try it this way
954	x86_reboot();
955
956	restore_interrupts(state);
957	return B_ERROR;
958}
959
960
961void
962arch_cpu_idle(void)
963{
964	gCpuIdleFunc();
965}
966
967
968void
969arch_cpu_sync_icache(void* address, size_t length)
970{
971	// instruction cache is always consistent on x86
972}
973
974
975void
976arch_cpu_memory_read_barrier(void)
977{
978#ifdef __x86_64__
979	asm volatile("lfence" : : : "memory");
980#else
981	asm volatile ("lock;" : : : "memory");
982	asm volatile ("addl $0, 0(%%esp);" : : : "memory");
983#endif
984}
985
986
987void
988arch_cpu_memory_write_barrier(void)
989{
990#ifdef __x86_64__
991	asm volatile("sfence" : : : "memory");
992#else
993	asm volatile ("lock;" : : : "memory");
994	asm volatile ("addl $0, 0(%%esp);" : : : "memory");
995#endif
996}
997
998