1/* 2 * Copyright (c) 2000-2012 Apple Inc. All rights reserved. 3 * 4 * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ 5 * 6 * This file contains Original Code and/or Modifications of Original Code 7 * as defined in and that are subject to the Apple Public Source License 8 * Version 2.0 (the 'License'). You may not use this file except in 9 * compliance with the License. The rights granted to you under the License 10 * may not be used to create, or enable the creation or redistribution of, 11 * unlawful or unlicensed copies of an Apple operating system, or to 12 * circumvent, violate, or enable the circumvention or violation of, any 13 * terms of an Apple operating system software license agreement. 14 * 15 * Please obtain a copy of the License at 16 * http://www.opensource.apple.com/apsl/ and read it before using this file. 17 * 18 * The Original Code and all software distributed under the License are 19 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER 20 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, 21 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, 22 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT. 23 * Please see the License for the specific language governing rights and 24 * limitations under the License. 25 * 26 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@ 27 */ 28/* 29 * @OSF_COPYRIGHT@ 30 */ 31/* 32 * Mach Operating System 33 * Copyright (c) 1991,1990 Carnegie Mellon University 34 * All Rights Reserved. 35 * 36 * Permission to use, copy, modify and distribute this software and its 37 * documentation is hereby granted, provided that both the copyright 38 * notice and this permission notice appear in all copies of the 39 * software, derivative works or modified versions, and any portions 40 * thereof, and that both notices appear in supporting documentation. 41 * 42 * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS" 43 * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND FOR 44 * ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE. 45 * 46 * Carnegie Mellon requests users of this software to return to 47 * 48 * Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU 49 * School of Computer Science 50 * Carnegie Mellon University 51 * Pittsburgh PA 15213-3890 52 * 53 * any improvements or extensions that they make and grant Carnegie Mellon 54 * the rights to redistribute these changes. 55 */ 56 57/* 58 */ 59 60#include <kern/cpu_number.h> 61#include <kern/kalloc.h> 62#include <kern/cpu_data.h> 63#include <mach/mach_types.h> 64#include <mach/machine.h> 65#include <mach/vm_map.h> 66#include <mach/machine/vm_param.h> 67#include <vm/vm_kern.h> 68#include <vm/vm_map.h> 69 70#include <i386/bit_routines.h> 71#include <i386/mp_desc.h> 72#include <i386/misc_protos.h> 73#include <i386/mp.h> 74#include <i386/pmap.h> 75#if defined(__i386__) || defined(__x86_64__) 76#include <i386/pmap_internal.h> 77#endif /* i386 */ 78#if CONFIG_MCA 79#include <i386/machine_check.h> 80#endif 81 82#include <kern/misc_protos.h> 83 84#define K_INTR_GATE (ACC_P|ACC_PL_K|ACC_INTR_GATE) 85#define U_INTR_GATE (ACC_P|ACC_PL_U|ACC_INTR_GATE) 86 87// Declare macros that will declare the externs 88#define TRAP(n, name) extern void *name ; 89#define TRAP_ERR(n, name) extern void *name ; 90#define TRAP_SPC(n, name) extern void *name ; 91#define TRAP_IST1(n, name) extern void *name ; 92#define TRAP_IST2(n, name) extern void *name ; 93#define INTERRUPT(n) extern void *_intr_ ## n ; 94#define USER_TRAP(n, name) extern void *name ; 95#define USER_TRAP_SPC(n, name) extern void *name ; 96 97// Include the table to declare the externs 98#include "../x86_64/idt_table.h" 99 100// Undef the macros, then redefine them so we can declare the table 101#undef TRAP 102#undef TRAP_ERR 103#undef TRAP_SPC 104#undef TRAP_IST1 105#undef TRAP_IST2 106#undef INTERRUPT 107#undef USER_TRAP 108#undef USER_TRAP_SPC 109 110#define TRAP(n, name) \ 111 [n] = { \ 112 (uintptr_t)&name, \ 113 KERNEL64_CS, \ 114 0, \ 115 K_INTR_GATE, \ 116 0 \ 117 }, 118 119#define TRAP_ERR TRAP 120#define TRAP_SPC TRAP 121 122#define TRAP_IST1(n, name) \ 123 [n] = { \ 124 (uintptr_t)&name, \ 125 KERNEL64_CS, \ 126 1, \ 127 K_INTR_GATE, \ 128 0 \ 129 }, 130 131#define TRAP_IST2(n, name) \ 132 [n] = { \ 133 (uintptr_t)&name, \ 134 KERNEL64_CS, \ 135 2, \ 136 K_INTR_GATE, \ 137 0 \ 138 }, 139 140#define INTERRUPT(n) \ 141 [n] = { \ 142 (uintptr_t)&_intr_ ## n,\ 143 KERNEL64_CS, \ 144 0, \ 145 K_INTR_GATE, \ 146 0 \ 147 }, 148 149#define USER_TRAP(n, name) \ 150 [n] = { \ 151 (uintptr_t)&name, \ 152 KERNEL64_CS, \ 153 0, \ 154 U_INTR_GATE, \ 155 0 \ 156 }, 157 158#define USER_TRAP_SPC USER_TRAP 159 160// Declare the table using the macros we just set up 161struct fake_descriptor64 master_idt64[IDTSZ] 162 __attribute__ ((section("__HIB,__desc"))) 163 __attribute__ ((aligned(PAGE_SIZE))) = { 164#include "../x86_64/idt_table.h" 165}; 166 167/* 168 * First cpu`s interrupt stack. 169 */ 170extern uint32_t low_intstack[]; /* bottom */ 171extern uint32_t low_eintstack[]; /* top */ 172 173/* 174 * Per-cpu data area pointers. 175 * The master cpu (cpu 0) has its data area statically allocated; 176 * others are allocated dynamically and this array is updated at runtime. 177 */ 178static cpu_data_t cpu_data_master = { 179 .cpu_this = &cpu_data_master, 180 .cpu_nanotime = &pal_rtc_nanotime_info, 181 .cpu_int_stack_top = (vm_offset_t) low_eintstack, 182}; 183cpu_data_t *cpu_data_ptr[MAX_CPUS] = { [0] = &cpu_data_master }; 184 185decl_simple_lock_data(,ncpus_lock); /* protects real_ncpus */ 186unsigned int real_ncpus = 1; 187unsigned int max_ncpus = MAX_CPUS; 188 189extern void hi64_sysenter(void); 190extern void hi64_syscall(void); 191 192/* 193 * Multiprocessor i386/i486 systems use a separate copy of the 194 * GDT, IDT, LDT, and kernel TSS per processor. The first three 195 * are separate to avoid lock contention: the i386 uses locked 196 * memory cycles to access the descriptor tables. The TSS is 197 * separate since each processor needs its own kernel stack, 198 * and since using a TSS marks it busy. 199 */ 200 201/* 202 * Allocate and initialize the per-processor descriptor tables. 203 */ 204 205struct fake_descriptor ldt_desc_pattern = { 206 (unsigned int) 0, 207 LDTSZ_MIN * sizeof(struct fake_descriptor) - 1, 208 0, 209 ACC_P|ACC_PL_K|ACC_LDT 210}; 211 212struct fake_descriptor tss_desc_pattern = { 213 (unsigned int) 0, 214 sizeof(struct i386_tss) - 1, 215 0, 216 ACC_P|ACC_PL_K|ACC_TSS 217}; 218 219struct fake_descriptor cpudata_desc_pattern = { 220 (unsigned int) 0, 221 sizeof(cpu_data_t)-1, 222 SZ_32, 223 ACC_P|ACC_PL_K|ACC_DATA_W 224}; 225 226#if NCOPY_WINDOWS > 0 227struct fake_descriptor userwindow_desc_pattern = { 228 (unsigned int) 0, 229 ((NBPDE * NCOPY_WINDOWS) / PAGE_SIZE) - 1, 230 SZ_32 | SZ_G, 231 ACC_P|ACC_PL_U|ACC_DATA_W 232}; 233#endif 234 235struct fake_descriptor physwindow_desc_pattern = { 236 (unsigned int) 0, 237 PAGE_SIZE - 1, 238 SZ_32, 239 ACC_P|ACC_PL_K|ACC_DATA_W 240}; 241 242/* 243 * This is the expanded, 64-bit variant of the kernel LDT descriptor. 244 * When switching to 64-bit mode this replaces KERNEL_LDT entry 245 * and the following empty slot. This enables the LDT to be referenced 246 * in the uber-space remapping window on the kernel. 247 */ 248struct fake_descriptor64 kernel_ldt_desc64 = { 249 0, 250 LDTSZ_MIN*sizeof(struct fake_descriptor)-1, 251 0, 252 ACC_P|ACC_PL_K|ACC_LDT, 253 0 254}; 255 256/* 257 * This is the expanded, 64-bit variant of the kernel TSS descriptor. 258 * It is follows pattern of the KERNEL_LDT. 259 */ 260struct fake_descriptor64 kernel_tss_desc64 = { 261 0, 262 sizeof(struct x86_64_tss)-1, 263 0, 264 ACC_P|ACC_PL_K|ACC_TSS, 265 0 266}; 267 268/* 269 * Convert a descriptor from fake to real format. 270 * 271 * Fake descriptor format: 272 * bytes 0..3 base 31..0 273 * bytes 4..5 limit 15..0 274 * byte 6 access byte 2 | limit 19..16 275 * byte 7 access byte 1 276 * 277 * Real descriptor format: 278 * bytes 0..1 limit 15..0 279 * bytes 2..3 base 15..0 280 * byte 4 base 23..16 281 * byte 5 access byte 1 282 * byte 6 access byte 2 | limit 19..16 283 * byte 7 base 31..24 284 * 285 * Fake gate format: 286 * bytes 0..3 offset 287 * bytes 4..5 selector 288 * byte 6 word count << 4 (to match fake descriptor) 289 * byte 7 access byte 1 290 * 291 * Real gate format: 292 * bytes 0..1 offset 15..0 293 * bytes 2..3 selector 294 * byte 4 word count 295 * byte 5 access byte 1 296 * bytes 6..7 offset 31..16 297 */ 298void 299fix_desc(void *d, int num_desc) { 300 //early_kprintf("fix_desc(%x, %x)\n", d, num_desc); 301 uint8_t *desc = (uint8_t*) d; 302 303 do { 304 if ((desc[7] & 0x14) == 0x04) { /* gate */ 305 uint32_t offset; 306 uint16_t selector; 307 uint8_t wordcount; 308 uint8_t acc; 309 310 offset = *((uint32_t*)(desc)); 311 selector = *((uint32_t*)(desc+4)); 312 wordcount = desc[6] >> 4; 313 acc = desc[7]; 314 315 *((uint16_t*)desc) = offset & 0xFFFF; 316 *((uint16_t*)(desc+2)) = selector; 317 desc[4] = wordcount; 318 desc[5] = acc; 319 *((uint16_t*)(desc+6)) = offset >> 16; 320 321 } else { /* descriptor */ 322 uint32_t base; 323 uint16_t limit; 324 uint8_t acc1, acc2; 325 326 base = *((uint32_t*)(desc)); 327 limit = *((uint16_t*)(desc+4)); 328 acc2 = desc[6]; 329 acc1 = desc[7]; 330 331 *((uint16_t*)(desc)) = limit; 332 *((uint16_t*)(desc+2)) = base & 0xFFFF; 333 desc[4] = (base >> 16) & 0xFF; 334 desc[5] = acc1; 335 desc[6] = acc2; 336 desc[7] = base >> 24; 337 } 338 desc += 8; 339 } while (--num_desc); 340} 341 342void 343fix_desc64(void *descp, int count) 344{ 345 struct fake_descriptor64 *fakep; 346 union { 347 struct real_gate64 gate; 348 struct real_descriptor64 desc; 349 } real; 350 int i; 351 352 fakep = (struct fake_descriptor64 *) descp; 353 354 for (i = 0; i < count; i++, fakep++) { 355 /* 356 * Construct the real decriptor locally. 357 */ 358 359 bzero((void *) &real, sizeof(real)); 360 361 switch (fakep->access & ACC_TYPE) { 362 case 0: 363 break; 364 case ACC_CALL_GATE: 365 case ACC_INTR_GATE: 366 case ACC_TRAP_GATE: 367 real.gate.offset_low16 = (uint16_t)(fakep->offset64 & 0xFFFF); 368 real.gate.selector16 = fakep->lim_or_seg & 0xFFFF; 369 real.gate.IST = fakep->size_or_IST & 0x7; 370 real.gate.access8 = fakep->access; 371 real.gate.offset_high16 = (uint16_t)((fakep->offset64>>16) & 0xFFFF); 372 real.gate.offset_top32 = (uint32_t)(fakep->offset64>>32); 373 break; 374 default: /* Otherwise */ 375 real.desc.limit_low16 = fakep->lim_or_seg & 0xFFFF; 376 real.desc.base_low16 = (uint16_t)(fakep->offset64 & 0xFFFF); 377 real.desc.base_med8 = (uint8_t)((fakep->offset64 >> 16) & 0xFF); 378 real.desc.access8 = fakep->access; 379 real.desc.limit_high4 = (fakep->lim_or_seg >> 16) & 0xFF; 380 real.desc.granularity4 = fakep->size_or_IST; 381 real.desc.base_high8 = (uint8_t)((fakep->offset64 >> 24) & 0xFF); 382 real.desc.base_top32 = (uint32_t)(fakep->offset64>>32); 383 } 384 385 /* 386 * Now copy back over the fake structure. 387 */ 388 bcopy((void *) &real, (void *) fakep, sizeof(real)); 389 } 390} 391 392static void 393cpu_gdt_alias(vm_map_offset_t gdt, vm_map_offset_t alias) 394{ 395 pt_entry_t *pte = NULL; 396 397 /* Require page alignment */ 398 assert(page_aligned(gdt)); 399 assert(page_aligned(alias)); 400 401 pte = pmap_pte(kernel_pmap, alias); 402 pmap_store_pte(pte, kvtophys(gdt) | INTEL_PTE_REF 403 | INTEL_PTE_MOD 404 | INTEL_PTE_WIRED 405 | INTEL_PTE_VALID 406 | INTEL_PTE_WRITE 407 | INTEL_PTE_NX); 408 409 /* TLB flush unneccessry because target processor isn't running yet */ 410} 411 412 413void 414cpu_desc_init64(cpu_data_t *cdp) 415{ 416 cpu_desc_index_t *cdi = &cdp->cpu_desc_index; 417 418 if (cdp == &cpu_data_master) { 419 /* 420 * Master CPU uses the tables built at boot time. 421 * Just set the index pointers to the low memory space. 422 */ 423 cdi->cdi_ktss = (void *)&master_ktss64; 424 cdi->cdi_sstk = (vm_offset_t) &master_sstk.top; 425 cdi->cdi_gdt.ptr = (void *)MASTER_GDT_ALIAS; 426 cdi->cdi_idt.ptr = (void *)MASTER_IDT_ALIAS; 427 cdi->cdi_ldt = (struct fake_descriptor *) master_ldt; 428 429 /* Replace the expanded LDTs and TSS slots in the GDT */ 430 kernel_ldt_desc64.offset64 = (uintptr_t) &master_ldt; 431 *(struct fake_descriptor64 *) &master_gdt[sel_idx(KERNEL_LDT)] = 432 kernel_ldt_desc64; 433 *(struct fake_descriptor64 *) &master_gdt[sel_idx(USER_LDT)] = 434 kernel_ldt_desc64; 435 kernel_tss_desc64.offset64 = (uintptr_t) &master_ktss64; 436 *(struct fake_descriptor64 *) &master_gdt[sel_idx(KERNEL_TSS)] = 437 kernel_tss_desc64; 438 439 /* Fix up the expanded descriptors for 64-bit. */ 440 fix_desc64((void *) &master_idt64, IDTSZ); 441 fix_desc64((void *) &master_gdt[sel_idx(KERNEL_LDT)], 1); 442 fix_desc64((void *) &master_gdt[sel_idx(USER_LDT)], 1); 443 fix_desc64((void *) &master_gdt[sel_idx(KERNEL_TSS)], 1); 444 445 /* 446 * Set the NMI/fault stacks as IST2/IST1 in the 64-bit TSS 447 * Note: this will be dynamically re-allocated in VM later. 448 */ 449 master_ktss64.ist2 = (uintptr_t) low_eintstack; 450 master_ktss64.ist1 = (uintptr_t) low_eintstack 451 - sizeof(x86_64_intr_stack_frame_t); 452 453 } else if (cdi->cdi_ktss == NULL) { /* Skipping re-init on wake */ 454 cpu_desc_table64_t *cdt = (cpu_desc_table64_t *) cdp->cpu_desc_tablep; 455 456 /* 457 * Per-cpu GDT, IDT, KTSS descriptors are allocated in kernel 458 * heap (cpu_desc_table). 459 * LDT descriptors are mapped into a separate area. 460 * GDT descriptors are addressed by alias to avoid sgdt leaks to user-space. 461 */ 462 cdi->cdi_idt.ptr = (void *)MASTER_IDT_ALIAS; 463 cdi->cdi_gdt.ptr = (void *)CPU_GDT_ALIAS(cdp->cpu_number); 464 cdi->cdi_ktss = (void *)&cdt->ktss; 465 cdi->cdi_sstk = (vm_offset_t)&cdt->sstk.top; 466 cdi->cdi_ldt = cdp->cpu_ldtp; 467 468 /* Make the virtual alias address for the GDT */ 469 cpu_gdt_alias((vm_map_offset_t) &cdt->gdt, 470 (vm_map_offset_t) cdi->cdi_gdt.ptr); 471 472 /* 473 * Copy the tables 474 */ 475 bcopy((char *)master_gdt, (char *)cdt->gdt, sizeof(master_gdt)); 476 bcopy((char *)master_ldt, (char *)cdp->cpu_ldtp, sizeof(master_ldt)); 477 bcopy((char *)&master_ktss64, (char *)&cdt->ktss, sizeof(struct x86_64_tss)); 478 479 /* 480 * Fix up the entries in the GDT to point to 481 * this LDT and this TSS. 482 */ 483 kernel_ldt_desc64.offset64 = (uintptr_t) cdi->cdi_ldt; 484 *(struct fake_descriptor64 *) &cdt->gdt[sel_idx(KERNEL_LDT)] = 485 kernel_ldt_desc64; 486 fix_desc64(&cdt->gdt[sel_idx(KERNEL_LDT)], 1); 487 488 kernel_ldt_desc64.offset64 = (uintptr_t) cdi->cdi_ldt; 489 *(struct fake_descriptor64 *) &cdt->gdt[sel_idx(USER_LDT)] = 490 kernel_ldt_desc64; 491 fix_desc64(&cdt->gdt[sel_idx(USER_LDT)], 1); 492 493 kernel_tss_desc64.offset64 = (uintptr_t) cdi->cdi_ktss; 494 *(struct fake_descriptor64 *) &cdt->gdt[sel_idx(KERNEL_TSS)] = 495 kernel_tss_desc64; 496 fix_desc64(&cdt->gdt[sel_idx(KERNEL_TSS)], 1); 497 498 /* Set (zeroed) fault stack as IST1, NMI intr stack IST2 */ 499 bzero((void *) cdt->fstk, sizeof(cdt->fstk)); 500 cdt->ktss.ist2 = (unsigned long)cdt->fstk + sizeof(cdt->fstk); 501 cdt->ktss.ist1 = cdt->ktss.ist2 502 - sizeof(x86_64_intr_stack_frame_t); 503 } 504 505 /* Require that the top of the sysenter stack is 16-byte aligned */ 506 if ((cdi->cdi_sstk % 16) != 0) 507 panic("cpu_desc_init64() sysenter stack not 16-byte aligned"); 508} 509 510 511void 512cpu_desc_load64(cpu_data_t *cdp) 513{ 514 cpu_desc_index_t *cdi = &cdp->cpu_desc_index; 515 516 /* Stuff the kernel per-cpu data area address into the MSRs */ 517 wrmsr64(MSR_IA32_GS_BASE, (uintptr_t) cdp); 518 wrmsr64(MSR_IA32_KERNEL_GS_BASE, (uintptr_t) cdp); 519 520 /* 521 * Ensure the TSS segment's busy bit is clear. This is required 522 * for the case of reloading descriptors at wake to avoid 523 * their complete re-initialization. 524 */ 525 gdt_desc_p(KERNEL_TSS)->access &= ~ACC_TSS_BUSY; 526 527 /* Load the GDT, LDT, IDT and TSS */ 528 cdi->cdi_gdt.size = sizeof(struct real_descriptor)*GDTSZ - 1; 529 cdi->cdi_idt.size = 0x1000 + cdp->cpu_number; 530 lgdt((uintptr_t *) &cdi->cdi_gdt); 531 lidt((uintptr_t *) &cdi->cdi_idt); 532 lldt(KERNEL_LDT); 533 set_tr(KERNEL_TSS); 534 535#if GPROF // Hack to enable mcount to work on K64 536 __asm__ volatile("mov %0, %%gs" : : "rm" ((unsigned short)(KERNEL_DS))); 537#endif 538} 539 540 541/* 542 * Set MSRs for sysenter/sysexit and syscall/sysret for 64-bit. 543 */ 544static void 545fast_syscall_init64(__unused cpu_data_t *cdp) 546{ 547 wrmsr64(MSR_IA32_SYSENTER_CS, SYSENTER_CS); 548 wrmsr64(MSR_IA32_SYSENTER_EIP, (uintptr_t) hi64_sysenter); 549 wrmsr64(MSR_IA32_SYSENTER_ESP, current_sstk()); 550 /* Enable syscall/sysret */ 551 wrmsr64(MSR_IA32_EFER, rdmsr64(MSR_IA32_EFER) | MSR_IA32_EFER_SCE); 552 553 /* 554 * MSRs for 64-bit syscall/sysret 555 * Note USER_CS because sysret uses this + 16 when returning to 556 * 64-bit code. 557 */ 558 wrmsr64(MSR_IA32_LSTAR, (uintptr_t) hi64_syscall); 559 wrmsr64(MSR_IA32_STAR, (((uint64_t)USER_CS) << 48) | 560 (((uint64_t)KERNEL64_CS) << 32)); 561 /* 562 * Emulate eflags cleared by sysenter but note that 563 * we also clear the trace trap to avoid the complications 564 * of single-stepping into a syscall. The nested task bit 565 * is also cleared to avoid a spurious "task switch" 566 * should we choose to return via an IRET. 567 */ 568 wrmsr64(MSR_IA32_FMASK, EFL_DF|EFL_IF|EFL_TF|EFL_NT); 569 570} 571 572 573cpu_data_t * 574cpu_data_alloc(boolean_t is_boot_cpu) 575{ 576 int ret; 577 cpu_data_t *cdp; 578 579 if (is_boot_cpu) { 580 assert(real_ncpus == 1); 581 cdp = cpu_datap(0); 582 if (cdp->cpu_processor == NULL) { 583 simple_lock_init(&ncpus_lock, 0); 584 cdp->cpu_processor = cpu_processor_alloc(TRUE); 585#if NCOPY_WINDOWS > 0 586 cdp->cpu_pmap = pmap_cpu_alloc(TRUE); 587#endif 588 } 589 return cdp; 590 } 591 592 /* 593 * Allocate per-cpu data: 594 */ 595 ret = kmem_alloc(kernel_map, (vm_offset_t *) &cdp, sizeof(cpu_data_t)); 596 if (ret != KERN_SUCCESS) { 597 printf("cpu_data_alloc() failed, ret=%d\n", ret); 598 goto abort; 599 } 600 bzero((void*) cdp, sizeof(cpu_data_t)); 601 cdp->cpu_this = cdp; 602 603 /* 604 * Allocate interrupt stack: 605 */ 606 ret = kmem_alloc(kernel_map, 607 (vm_offset_t *) &cdp->cpu_int_stack_top, 608 INTSTACK_SIZE); 609 if (ret != KERN_SUCCESS) { 610 printf("cpu_data_alloc() int stack failed, ret=%d\n", ret); 611 goto abort; 612 } 613 bzero((void*) cdp->cpu_int_stack_top, INTSTACK_SIZE); 614 cdp->cpu_int_stack_top += INTSTACK_SIZE; 615 616 /* 617 * Allocate descriptor table: 618 */ 619 ret = kmem_alloc(kernel_map, 620 (vm_offset_t *) &cdp->cpu_desc_tablep, 621 sizeof(cpu_desc_table64_t)); 622 if (ret != KERN_SUCCESS) { 623 printf("cpu_data_alloc() desc_table failed, ret=%d\n", ret); 624 goto abort; 625 } 626 627 /* 628 * Allocate LDT 629 */ 630 ret = kmem_alloc(kernel_map, 631 (vm_offset_t *) &cdp->cpu_ldtp, 632 sizeof(struct real_descriptor) * LDTSZ); 633 if (ret != KERN_SUCCESS) { 634 printf("cpu_data_alloc() ldt failed, ret=%d\n", ret); 635 goto abort; 636 } 637 638#if CONFIG_MCA 639 /* Machine-check shadow register allocation. */ 640 mca_cpu_alloc(cdp); 641#endif 642 643 simple_lock(&ncpus_lock); 644 645 cpu_data_ptr[real_ncpus] = cdp; 646 cdp->cpu_number = real_ncpus; 647 real_ncpus++; 648 simple_unlock(&ncpus_lock); 649 650 /* 651 * Before this cpu has been assigned a real thread context, 652 * we give it a fake, unique, non-zero thread id which the locking 653 * primitives use as their lock value. 654 * Note that this does not apply to the boot processor, cpu 0, which 655 * transitions to a thread context well before other processors are 656 * started. 657 */ 658 cdp->cpu_active_thread = (thread_t) (uintptr_t) cdp->cpu_number; 659 660 cdp->cpu_nanotime = &pal_rtc_nanotime_info; 661 662 kprintf("cpu_data_alloc(%d) %p desc_table: %p " 663 "ldt: %p " 664 "int_stack: 0x%lx-0x%lx\n", 665 cdp->cpu_number, cdp, cdp->cpu_desc_tablep, cdp->cpu_ldtp, 666 (long)(cdp->cpu_int_stack_top - INTSTACK_SIZE), (long)(cdp->cpu_int_stack_top)); 667 668 return cdp; 669 670abort: 671 if (cdp) { 672 if (cdp->cpu_desc_tablep) 673 kfree((void *) cdp->cpu_desc_tablep, 674 sizeof(cpu_desc_table64_t)); 675 if (cdp->cpu_int_stack_top) 676 kfree((void *) (cdp->cpu_int_stack_top - INTSTACK_SIZE), 677 INTSTACK_SIZE); 678 kfree((void *) cdp, sizeof(*cdp)); 679 } 680 return NULL; 681} 682 683boolean_t 684valid_user_data_selector(uint16_t selector) 685{ 686 sel_t sel = selector_to_sel(selector); 687 688 if (selector == 0) 689 return (TRUE); 690 691 if (sel.ti == SEL_LDT) 692 return (TRUE); 693 else if (sel.index < GDTSZ) { 694 if ((gdt_desc_p(selector)->access & ACC_PL_U) == ACC_PL_U) 695 return (TRUE); 696 } 697 698 return (FALSE); 699} 700 701boolean_t 702valid_user_code_selector(uint16_t selector) 703{ 704 sel_t sel = selector_to_sel(selector); 705 706 if (selector == 0) 707 return (FALSE); 708 709 if (sel.ti == SEL_LDT) { 710 if (sel.rpl == USER_PRIV) 711 return (TRUE); 712 } 713 else if (sel.index < GDTSZ && sel.rpl == USER_PRIV) { 714 if ((gdt_desc_p(selector)->access & ACC_PL_U) == ACC_PL_U) 715 return (TRUE); 716 } 717 718 return (FALSE); 719} 720 721boolean_t 722valid_user_stack_selector(uint16_t selector) 723{ 724 sel_t sel = selector_to_sel(selector); 725 726 if (selector == 0) 727 return (FALSE); 728 729 if (sel.ti == SEL_LDT) { 730 if (sel.rpl == USER_PRIV) 731 return (TRUE); 732 } 733 else if (sel.index < GDTSZ && sel.rpl == USER_PRIV) { 734 if ((gdt_desc_p(selector)->access & ACC_PL_U) == ACC_PL_U) 735 return (TRUE); 736 } 737 738 return (FALSE); 739} 740 741boolean_t 742valid_user_segment_selectors(uint16_t cs, 743 uint16_t ss, 744 uint16_t ds, 745 uint16_t es, 746 uint16_t fs, 747 uint16_t gs) 748{ 749 return valid_user_code_selector(cs) && 750 valid_user_stack_selector(ss) && 751 valid_user_data_selector(ds) && 752 valid_user_data_selector(es) && 753 valid_user_data_selector(fs) && 754 valid_user_data_selector(gs); 755} 756 757#if NCOPY_WINDOWS > 0 758 759static vm_offset_t user_window_base = 0; 760 761void 762cpu_userwindow_init(int cpu) 763{ 764 cpu_data_t *cdp = cpu_data_ptr[cpu]; 765 vm_offset_t user_window; 766 vm_offset_t vaddr; 767 int num_cpus; 768 769 num_cpus = ml_get_max_cpus(); 770 771 if (cpu >= num_cpus) 772 panic("cpu_userwindow_init: cpu > num_cpus"); 773 774 if (user_window_base == 0) { 775 776 if (vm_allocate(kernel_map, &vaddr, 777 (NBPDE * NCOPY_WINDOWS * num_cpus) + NBPDE, 778 VM_FLAGS_ANYWHERE) != KERN_SUCCESS) 779 panic("cpu_userwindow_init: " 780 "couldn't allocate user map window"); 781 782 /* 783 * window must start on a page table boundary 784 * in the virtual address space 785 */ 786 user_window_base = (vaddr + (NBPDE - 1)) & ~(NBPDE - 1); 787 788 /* 789 * get rid of any allocation leading up to our 790 * starting boundary 791 */ 792 vm_deallocate(kernel_map, vaddr, user_window_base - vaddr); 793 794 /* 795 * get rid of tail that we don't need 796 */ 797 user_window = user_window_base + 798 (NBPDE * NCOPY_WINDOWS * num_cpus); 799 800 vm_deallocate(kernel_map, user_window, 801 (vaddr + 802 ((NBPDE * NCOPY_WINDOWS * num_cpus) + NBPDE)) - 803 user_window); 804 } 805 806 user_window = user_window_base + (cpu * NCOPY_WINDOWS * NBPDE); 807 808 cdp->cpu_copywindow_base = user_window; 809 /* 810 * Abuse this pdp entry, the pdp now actually points to 811 * an array of copy windows addresses. 812 */ 813 cdp->cpu_copywindow_pdp = pmap_pde(kernel_pmap, user_window); 814 815} 816 817void 818cpu_physwindow_init(int cpu) 819{ 820 cpu_data_t *cdp = cpu_data_ptr[cpu]; 821 vm_offset_t phys_window = cdp->cpu_physwindow_base; 822 823 if (phys_window == 0) { 824 if (vm_allocate(kernel_map, &phys_window, 825 PAGE_SIZE, VM_FLAGS_ANYWHERE) 826 != KERN_SUCCESS) 827 panic("cpu_physwindow_init: " 828 "couldn't allocate phys map window"); 829 830 /* 831 * make sure the page that encompasses the 832 * pte pointer we're interested in actually 833 * exists in the page table 834 */ 835 pmap_expand(kernel_pmap, phys_window, PMAP_EXPAND_OPTIONS_NONE); 836 837 cdp->cpu_physwindow_base = phys_window; 838 cdp->cpu_physwindow_ptep = vtopte(phys_window); 839 } 840} 841#endif /* NCOPY_WINDOWS > 0 */ 842 843/* 844 * Load the segment descriptor tables for the current processor. 845 */ 846void 847cpu_mode_init(cpu_data_t *cdp) 848{ 849 fast_syscall_init64(cdp); 850} 851 852/* 853 * Allocate a new interrupt stack for the boot processor from the 854 * heap rather than continue to use the statically allocated space. 855 * Also switch to a dynamically allocated cpu data area. 856 */ 857void 858cpu_data_realloc(void) 859{ 860 int ret; 861 vm_offset_t istk; 862 vm_offset_t fstk; 863 cpu_data_t *cdp; 864 boolean_t istate; 865 866 ret = kmem_alloc(kernel_map, &istk, INTSTACK_SIZE); 867 if (ret != KERN_SUCCESS) { 868 panic("cpu_data_realloc() stack alloc, ret=%d\n", ret); 869 } 870 bzero((void*) istk, INTSTACK_SIZE); 871 istk += INTSTACK_SIZE; 872 873 ret = kmem_alloc(kernel_map, (vm_offset_t *) &cdp, sizeof(cpu_data_t)); 874 if (ret != KERN_SUCCESS) { 875 panic("cpu_data_realloc() cpu data alloc, ret=%d\n", ret); 876 } 877 878 /* Copy old contents into new area and make fix-ups */ 879 assert(cpu_number() == 0); 880 bcopy((void *) cpu_data_ptr[0], (void*) cdp, sizeof(cpu_data_t)); 881 cdp->cpu_this = cdp; 882 cdp->cpu_int_stack_top = istk; 883 timer_call_queue_init(&cdp->rtclock_timer.queue); 884 885 /* Allocate the separate fault stack */ 886 ret = kmem_alloc(kernel_map, &fstk, PAGE_SIZE); 887 if (ret != KERN_SUCCESS) { 888 panic("cpu_data_realloc() fault stack alloc, ret=%d\n", ret); 889 } 890 bzero((void*) fstk, PAGE_SIZE); 891 fstk += PAGE_SIZE; 892 893 /* 894 * With interrupts disabled commmit the new areas. 895 */ 896 istate = ml_set_interrupts_enabled(FALSE); 897 cpu_data_ptr[0] = cdp; 898 master_ktss64.ist2 = (uintptr_t) fstk; 899 master_ktss64.ist1 = (uintptr_t) fstk 900 - sizeof(x86_64_intr_stack_frame_t); 901 wrmsr64(MSR_IA32_GS_BASE, (uintptr_t) cdp); 902 wrmsr64(MSR_IA32_KERNEL_GS_BASE, (uintptr_t) cdp); 903 (void) ml_set_interrupts_enabled(istate); 904 905 kprintf("Reallocated master cpu data: %p," 906 " interrupt stack: %p, fault stack: %p\n", 907 (void *) cdp, (void *) istk, (void *) fstk); 908} 909