1/* 2 * Copyright (c) 2003-2012 Apple Inc. All rights reserved. 3 * 4 * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ 5 * 6 * This file contains Original Code and/or Modifications of Original Code 7 * as defined in and that are subject to the Apple Public Source License 8 * Version 2.0 (the 'License'). You may not use this file except in 9 * compliance with the License. The rights granted to you under the License 10 * may not be used to create, or enable the creation or redistribution of, 11 * unlawful or unlicensed copies of an Apple operating system, or to 12 * circumvent, violate, or enable the circumvention or violation of, any 13 * terms of an Apple operating system software license agreement. 14 * 15 * Please obtain a copy of the License at 16 * http://www.opensource.apple.com/apsl/ and read it before using this file. 17 * 18 * The Original Code and all software distributed under the License are 19 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER 20 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, 21 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, 22 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT. 23 * Please see the License for the specific language governing rights and 24 * limitations under the License. 25 * 26 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@ 27 */ 28/* 29 * @OSF_COPYRIGHT@ 30 */ 31/* 32 * Mach Operating System 33 * Copyright (c) 1991,1990,1989, 1988 Carnegie Mellon University 34 * All Rights Reserved. 35 * 36 * Permission to use, copy, modify and distribute this software and its 37 * documentation is hereby granted, provided that both the copyright 38 * notice and this permission notice appear in all copies of the 39 * software, derivative works or modified versions, and any portions 40 * thereof, and that both notices appear in supporting documentation. 41 * 42 * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS" 43 * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND FOR 44 * ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE. 45 * 46 * Carnegie Mellon requests users of this software to return to 47 * 48 * Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU 49 * School of Computer Science 50 * Carnegie Mellon University 51 * Pittsburgh PA 15213-3890 52 * 53 * any improvements or extensions that they make and grant Carnegie Mellon 54 * the rights to redistribute these changes. 55 */ 56 57 58#include <mach/i386/vm_param.h> 59 60#include <string.h> 61#include <mach/vm_param.h> 62#include <mach/vm_prot.h> 63#include <mach/machine.h> 64#include <mach/time_value.h> 65#include <kern/spl.h> 66#include <kern/assert.h> 67#include <kern/debug.h> 68#include <kern/misc_protos.h> 69#include <kern/startup.h> 70#include <kern/clock.h> 71#include <kern/pms.h> 72#include <kern/xpr.h> 73#include <kern/cpu_data.h> 74#include <kern/processor.h> 75#include <sys/kdebug.h> 76#include <console/serial_protos.h> 77#include <vm/vm_page.h> 78#include <vm/pmap.h> 79#include <vm/vm_kern.h> 80#include <machine/pal_routines.h> 81#include <i386/fpu.h> 82#include <i386/pmap.h> 83#include <i386/misc_protos.h> 84#include <i386/cpu_threads.h> 85#include <i386/cpuid.h> 86#include <i386/lapic.h> 87#include <i386/mp.h> 88#include <i386/mp_desc.h> 89#if CONFIG_MTRR 90#include <i386/mtrr.h> 91#endif 92#include <i386/machine_routines.h> 93#if CONFIG_MCA 94#include <i386/machine_check.h> 95#endif 96#include <i386/ucode.h> 97#include <i386/postcode.h> 98#include <i386/Diagnostics.h> 99#include <i386/pmCPU.h> 100#include <i386/tsc.h> 101#include <i386/locks.h> /* LcksOpts */ 102#if DEBUG 103#include <machine/pal_routines.h> 104#endif 105#if DEBUG 106#define DBG(x...) kprintf(x) 107#else 108#define DBG(x...) 109#endif 110 111int debug_task; 112 113static boot_args *kernelBootArgs; 114 115extern int disableConsoleOutput; 116extern const char version[]; 117extern const char version_variant[]; 118extern int nx_enabled; 119 120uint64_t physmap_base, physmap_max; 121 122pd_entry_t *KPTphys; 123pd_entry_t *IdlePTD; 124pdpt_entry_t *IdlePDPT; 125pml4_entry_t *IdlePML4; 126 127char *physfree; 128 129/* 130 * Note: ALLOCPAGES() can only be used safely within Idle_PTs_init() 131 * due to the mutation of physfree. 132 */ 133static void * 134ALLOCPAGES(int npages) 135{ 136 uintptr_t tmp = (uintptr_t)physfree; 137 bzero(physfree, npages * PAGE_SIZE); 138 physfree += npages * PAGE_SIZE; 139 tmp += VM_MIN_KERNEL_ADDRESS & ~LOW_4GB_MASK; 140 return (void *)tmp; 141} 142 143static void 144fillkpt(pt_entry_t *base, int prot, uintptr_t src, int index, int count) 145{ 146 int i; 147 for (i=0; i<count; i++) { 148 base[index] = src | prot | INTEL_PTE_VALID; 149 src += PAGE_SIZE; 150 index++; 151 } 152} 153 154extern pmap_paddr_t first_avail; 155 156int break_kprintf = 0; 157 158uint64_t 159x86_64_pre_sleep(void) 160{ 161 IdlePML4[0] = IdlePML4[KERNEL_PML4_INDEX]; 162 uint64_t oldcr3 = get_cr3_raw(); 163 set_cr3_raw((uint32_t) (uintptr_t)ID_MAP_VTOP(IdlePML4)); 164 return oldcr3; 165} 166 167void 168x86_64_post_sleep(uint64_t new_cr3) 169{ 170 IdlePML4[0] = 0; 171 set_cr3_raw((uint32_t) new_cr3); 172} 173 174 175 176 177// Set up the physical mapping - NPHYSMAP GB of memory mapped at a high address 178// NPHYSMAP is determined by the maximum supported RAM size plus 4GB to account 179// the PCI hole (which is less 4GB but not more). 180 181/* Compile-time guard: NPHYSMAP is capped to 256GiB, accounting for 182 * randomisation 183 */ 184extern int maxphymapsupported[NPHYSMAP <= (PTE_PER_PAGE/2) ? 1 : -1]; 185 186static void 187physmap_init(void) 188{ 189 pt_entry_t *physmapL3 = ALLOCPAGES(1); 190 struct { 191 pt_entry_t entries[PTE_PER_PAGE]; 192 } * physmapL2 = ALLOCPAGES(NPHYSMAP); 193 194 uint64_t i; 195 uint8_t phys_random_L3 = early_random() & 0xFF; 196 197 /* We assume NX support. Mark all levels of the PHYSMAP NX 198 * to avoid granting executability via a single bit flip. 199 */ 200#if DEVELOPMENT || DEBUG 201 uint32_t reg[4]; 202 do_cpuid(0x80000000, reg); 203 if (reg[eax] >= 0x80000001) { 204 do_cpuid(0x80000001, reg); 205 assert(reg[edx] & CPUID_EXTFEATURE_XD); 206 } 207#endif /* DEVELOPMENT || DEBUG */ 208 209 for(i = 0; i < NPHYSMAP; i++) { 210 physmapL3[i + phys_random_L3] = 211 ((uintptr_t)ID_MAP_VTOP(&physmapL2[i])) 212 | INTEL_PTE_VALID 213 | INTEL_PTE_NX 214 | INTEL_PTE_WRITE; 215 216 uint64_t j; 217 for(j = 0; j < PTE_PER_PAGE; j++) { 218 physmapL2[i].entries[j] = 219 ((i * PTE_PER_PAGE + j) << PDSHIFT) 220 | INTEL_PTE_PS 221 | INTEL_PTE_VALID 222 | INTEL_PTE_NX 223 | INTEL_PTE_WRITE; 224 } 225 } 226 227 IdlePML4[KERNEL_PHYSMAP_PML4_INDEX] = 228 ((uintptr_t)ID_MAP_VTOP(physmapL3)) 229 | INTEL_PTE_VALID 230 | INTEL_PTE_NX 231 | INTEL_PTE_WRITE; 232 233 physmap_base = KVADDR(KERNEL_PHYSMAP_PML4_INDEX, phys_random_L3, 0, 0); 234 physmap_max = physmap_base + NPHYSMAP * GB; 235 DBG("Physical address map base: 0x%qx\n", physmap_base); 236 DBG("Physical map idlepml4[%d]: 0x%llx\n", 237 KERNEL_PHYSMAP_PML4_INDEX, IdlePML4[KERNEL_PHYSMAP_PML4_INDEX]); 238} 239 240static void 241descriptor_alias_init() 242{ 243 vm_offset_t master_gdt_phys; 244 vm_offset_t master_gdt_alias_phys; 245 vm_offset_t master_idt_phys; 246 vm_offset_t master_idt_alias_phys; 247 248 assert(((vm_offset_t)master_gdt & PAGE_MASK) == 0); 249 assert(((vm_offset_t)master_idt64 & PAGE_MASK) == 0); 250 251 master_gdt_phys = (vm_offset_t) ID_MAP_VTOP(master_gdt); 252 master_idt_phys = (vm_offset_t) ID_MAP_VTOP(master_idt64); 253 master_gdt_alias_phys = (vm_offset_t) ID_MAP_VTOP(MASTER_GDT_ALIAS); 254 master_idt_alias_phys = (vm_offset_t) ID_MAP_VTOP(MASTER_IDT_ALIAS); 255 256 DBG("master_gdt_phys: %p\n", (void *) master_gdt_phys); 257 DBG("master_idt_phys: %p\n", (void *) master_idt_phys); 258 DBG("master_gdt_alias_phys: %p\n", (void *) master_gdt_alias_phys); 259 DBG("master_idt_alias_phys: %p\n", (void *) master_idt_alias_phys); 260 261 KPTphys[atop_kernel(master_gdt_alias_phys)] = master_gdt_phys | 262 INTEL_PTE_VALID | INTEL_PTE_NX | INTEL_PTE_WRITE; 263 KPTphys[atop_kernel(master_idt_alias_phys)] = master_idt_phys | 264 INTEL_PTE_VALID | INTEL_PTE_NX; /* read-only */ 265} 266 267static void 268Idle_PTs_init(void) 269{ 270 /* Allocate the "idle" kernel page tables: */ 271 KPTphys = ALLOCPAGES(NKPT); /* level 1 */ 272 IdlePTD = ALLOCPAGES(NPGPTD); /* level 2 */ 273 IdlePDPT = ALLOCPAGES(1); /* level 3 */ 274 IdlePML4 = ALLOCPAGES(1); /* level 4 */ 275 276 // Fill the lowest level with everything up to physfree 277 fillkpt(KPTphys, 278 INTEL_PTE_WRITE, 0, 0, (int)(((uintptr_t)physfree) >> PAGE_SHIFT)); 279 280 /* IdlePTD */ 281 fillkpt(IdlePTD, 282 INTEL_PTE_WRITE, (uintptr_t)ID_MAP_VTOP(KPTphys), 0, NKPT); 283 284 // IdlePDPT entries 285 fillkpt(IdlePDPT, 286 INTEL_PTE_WRITE, (uintptr_t)ID_MAP_VTOP(IdlePTD), 0, NPGPTD); 287 288 // IdlePML4 single entry for kernel space. 289 fillkpt(IdlePML4 + KERNEL_PML4_INDEX, 290 INTEL_PTE_WRITE, (uintptr_t)ID_MAP_VTOP(IdlePDPT), 0, 1); 291 292 postcode(VSTART_PHYSMAP_INIT); 293 294 physmap_init(); 295 296 postcode(VSTART_DESC_ALIAS_INIT); 297 298 descriptor_alias_init(); 299 300 postcode(VSTART_SET_CR3); 301 302 // Switch to the page tables.. 303 set_cr3_raw((uintptr_t)ID_MAP_VTOP(IdlePML4)); 304 305} 306 307 308/* 309 * vstart() is called in the natural mode (64bit for K64, 32 for K32) 310 * on a set of bootstrap pagetables which use large, 2MB pages to map 311 * all of physical memory in both. See idle_pt.c for details. 312 * 313 * In K64 this identity mapping is mirrored the top and bottom 512GB 314 * slots of PML4. 315 * 316 * The bootstrap processor called with argument boot_args_start pointing to 317 * the boot-args block. The kernel's (4K page) page tables are allocated and 318 * initialized before switching to these. 319 * 320 * Non-bootstrap processors are called with argument boot_args_start NULL. 321 * These processors switch immediately to the existing kernel page tables. 322 */ 323void 324vstart(vm_offset_t boot_args_start) 325{ 326 boolean_t is_boot_cpu = !(boot_args_start == 0); 327 int cpu; 328 uint32_t lphysfree; 329 330 postcode(VSTART_ENTRY); 331 332 if (is_boot_cpu) { 333 /* 334 * Get startup parameters. 335 */ 336 kernelBootArgs = (boot_args *)boot_args_start; 337 lphysfree = kernelBootArgs->kaddr + kernelBootArgs->ksize; 338 physfree = (void *)(uintptr_t)((lphysfree + PAGE_SIZE - 1) &~ (PAGE_SIZE - 1)); 339#if DEBUG 340 pal_serial_init(); 341#endif 342 DBG("revision 0x%x\n", kernelBootArgs->Revision); 343 DBG("version 0x%x\n", kernelBootArgs->Version); 344 DBG("command line %s\n", kernelBootArgs->CommandLine); 345 DBG("memory map 0x%x\n", kernelBootArgs->MemoryMap); 346 DBG("memory map sz 0x%x\n", kernelBootArgs->MemoryMapSize); 347 DBG("kaddr 0x%x\n", kernelBootArgs->kaddr); 348 DBG("ksize 0x%x\n", kernelBootArgs->ksize); 349 DBG("physfree %p\n", physfree); 350 DBG("bootargs: %p, &ksize: %p &kaddr: %p\n", 351 kernelBootArgs, 352 &kernelBootArgs->ksize, 353 &kernelBootArgs->kaddr); 354 /* 355 * Setup boot args given the physical start address. 356 * Note: PE_init_platform needs to be called before Idle_PTs_init 357 * because access to the DeviceTree is required to read the 358 * random seed before generating a random physical map slide. 359 */ 360 kernelBootArgs = (boot_args *) 361 ml_static_ptovirt(boot_args_start); 362 DBG("i386_init(0x%lx) kernelBootArgs=%p\n", 363 (unsigned long)boot_args_start, kernelBootArgs); 364 PE_init_platform(FALSE, kernelBootArgs); 365 postcode(PE_INIT_PLATFORM_D); 366 367 Idle_PTs_init(); 368 postcode(VSTART_IDLE_PTS_INIT); 369 370 first_avail = (vm_offset_t)ID_MAP_VTOP(physfree); 371 372 cpu = 0; 373 cpu_data_alloc(TRUE); 374 } else { 375 /* Switch to kernel's page tables (from the Boot PTs) */ 376 set_cr3_raw((uintptr_t)ID_MAP_VTOP(IdlePML4)); 377 /* Find our logical cpu number */ 378 cpu = lapic_to_cpu[(LAPIC_READ(ID)>>LAPIC_ID_SHIFT) & LAPIC_ID_MASK]; 379 DBG("CPU: %d, GSBASE initial value: 0x%llx\n", cpu, rdmsr64(MSR_IA32_GS_BASE)); 380 } 381 382 postcode(VSTART_CPU_DESC_INIT); 383 if(is_boot_cpu) 384 cpu_desc_init64(cpu_datap(cpu)); 385 cpu_desc_load64(cpu_datap(cpu)); 386 postcode(VSTART_CPU_MODE_INIT); 387 if (is_boot_cpu) 388 cpu_mode_init(current_cpu_datap()); /* cpu_mode_init() will be 389 * invoked on the APs 390 * via i386_init_slave() 391 */ 392 postcode(VSTART_EXIT); 393 x86_init_wrapper(is_boot_cpu ? (uintptr_t) i386_init 394 : (uintptr_t) i386_init_slave, 395 cpu_datap(cpu)->cpu_int_stack_top); 396} 397 398void 399pstate_trace(void) 400{ 401} 402 403/* 404 * Cpu initialization. Running virtual, but without MACH VM 405 * set up. 406 */ 407void 408i386_init(void) 409{ 410 unsigned int maxmem; 411 uint64_t maxmemtouse; 412 unsigned int cpus = 0; 413 boolean_t fidn; 414 boolean_t IA32e = TRUE; 415 416 postcode(I386_INIT_ENTRY); 417 418 pal_i386_init(); 419 tsc_init(); 420 rtclock_early_init(); /* mach_absolute_time() now functionsl */ 421 422 kernel_debug_string("i386_init"); 423 pstate_trace(); 424 425#if CONFIG_MCA 426 /* Initialize machine-check handling */ 427 mca_cpu_init(); 428#endif 429 430 master_cpu = 0; 431 cpu_init(); 432 433 postcode(CPU_INIT_D); 434 435 printf_init(); /* Init this in case we need debugger */ 436 panic_init(); /* Init this in case we need debugger */ 437 438 /* setup debugging output if one has been chosen */ 439 kernel_debug_string("PE_init_kprintf"); 440 PE_init_kprintf(FALSE); 441 442 kernel_debug_string("kernel_early_bootstrap"); 443 kernel_early_bootstrap(); 444 445 if (!PE_parse_boot_argn("diag", &dgWork.dgFlags, sizeof (dgWork.dgFlags))) 446 dgWork.dgFlags = 0; 447 448 serialmode = 0; 449 if(PE_parse_boot_argn("serial", &serialmode, sizeof (serialmode))) { 450 /* We want a serial keyboard and/or console */ 451 kprintf("Serial mode specified: %08X\n", serialmode); 452 } 453 if(serialmode & 1) { 454 (void)switch_to_serial_console(); 455 disableConsoleOutput = FALSE; /* Allow printfs to happen */ 456 } 457 458 /* setup console output */ 459 kernel_debug_string("PE_init_printf"); 460 PE_init_printf(FALSE); 461 462 kprintf("version_variant = %s\n", version_variant); 463 kprintf("version = %s\n", version); 464 465 if (!PE_parse_boot_argn("maxmem", &maxmem, sizeof (maxmem))) 466 maxmemtouse = 0; 467 else 468 maxmemtouse = ((uint64_t)maxmem) * MB; 469 470 if (PE_parse_boot_argn("cpus", &cpus, sizeof (cpus))) { 471 if ((0 < cpus) && (cpus < max_ncpus)) 472 max_ncpus = cpus; 473 } 474 475 /* 476 * debug support for > 4G systems 477 */ 478 PE_parse_boot_argn("himemory_mode", &vm_himemory_mode, sizeof (vm_himemory_mode)); 479 if (vm_himemory_mode != 0) 480 kprintf("himemory_mode: %d\n", vm_himemory_mode); 481 482 if (!PE_parse_boot_argn("immediate_NMI", &fidn, sizeof (fidn))) 483 force_immediate_debugger_NMI = FALSE; 484 else 485 force_immediate_debugger_NMI = fidn; 486 487#if DEBUG 488 nanoseconds_to_absolutetime(URGENCY_NOTIFICATION_ASSERT_NS, &urgency_notification_assert_abstime_threshold); 489#endif 490 PE_parse_boot_argn("urgency_notification_abstime", 491 &urgency_notification_assert_abstime_threshold, 492 sizeof(urgency_notification_assert_abstime_threshold)); 493 494 if (!(cpuid_extfeatures() & CPUID_EXTFEATURE_XD)) 495 nx_enabled = 0; 496 497 /* 498 * VM initialization, after this we're using page tables... 499 * Thn maximum number of cpus must be set beforehand. 500 */ 501 kernel_debug_string("i386_vm_init"); 502 i386_vm_init(maxmemtouse, IA32e, kernelBootArgs); 503 504 /* create the console for verbose or pretty mode */ 505 /* Note: doing this prior to tsc_init() allows for graceful panic! */ 506 PE_init_platform(TRUE, kernelBootArgs); 507 PE_create_console(); 508 509 kernel_debug_string("power_management_init"); 510 power_management_init(); 511 processor_bootstrap(); 512 thread_bootstrap(); 513 514 pstate_trace(); 515 kernel_debug_string("machine_startup"); 516 machine_startup(); 517 pstate_trace(); 518} 519 520static void 521do_init_slave(boolean_t fast_restart) 522{ 523 void *init_param = FULL_SLAVE_INIT; 524 525 postcode(I386_INIT_SLAVE); 526 527 if (!fast_restart) { 528 /* Ensure that caching and write-through are enabled */ 529 set_cr0(get_cr0() & ~(CR0_NW|CR0_CD)); 530 531 DBG("i386_init_slave() CPU%d: phys (%d) active.\n", 532 get_cpu_number(), get_cpu_phys_number()); 533 534 assert(!ml_get_interrupts_enabled()); 535 536 cpu_mode_init(current_cpu_datap()); 537 pmap_cpu_init(); 538 539#if CONFIG_MCA 540 mca_cpu_init(); 541#endif 542 543 LAPIC_INIT(); 544 lapic_configure(); 545 LAPIC_DUMP(); 546 LAPIC_CPU_MAP_DUMP(); 547 548 init_fpu(); 549 550#if CONFIG_MTRR 551 mtrr_update_cpu(); 552#endif 553 /* update CPU microcode */ 554 ucode_update_wake(); 555 } else 556 init_param = FAST_SLAVE_INIT; 557 558#if CONFIG_VMX 559 /* resume VT operation */ 560 vmx_resume(); 561#endif 562 563#if CONFIG_MTRR 564 if (!fast_restart) 565 pat_init(); 566#endif 567 568 cpu_thread_init(); /* not strictly necessary */ 569 570 cpu_init(); /* Sets cpu_running which starter cpu waits for */ 571 slave_main(init_param); 572 573 panic("do_init_slave() returned from slave_main()"); 574} 575 576/* 577 * i386_init_slave() is called from pstart. 578 * We're in the cpu's interrupt stack with interrupts disabled. 579 * At this point we are in legacy mode. We need to switch on IA32e 580 * if the mode is set to 64-bits. 581 */ 582void 583i386_init_slave(void) 584{ 585 do_init_slave(FALSE); 586} 587 588/* 589 * i386_init_slave_fast() is called from pmCPUHalt. 590 * We're running on the idle thread and need to fix up 591 * some accounting and get it so that the scheduler sees this 592 * CPU again. 593 */ 594void 595i386_init_slave_fast(void) 596{ 597 do_init_slave(TRUE); 598} 599 600 601