1/* 2 * 3 * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ 4 * 5 * This file contains Original Code and/or Modifications of Original Code 6 * as defined in and that are subject to the Apple Public Source License 7 * Version 2.0 (the 'License'). You may not use this file except in 8 * compliance with the License. The rights granted to you under the License 9 * may not be used to create, or enable the creation or redistribution of, 10 * unlawful or unlicensed copies of an Apple operating system, or to 11 * circumvent, violate, or enable the circumvention or violation of, any 12 * terms of an Apple operating system software license agreement. 13 * 14 * Please obtain a copy of the License at 15 * http://www.opensource.apple.com/apsl/ and read it before using this file. 16 * 17 * The Original Code and all software distributed under the License are 18 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER 19 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, 20 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, 21 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT. 22 * Please see the License for the specific language governing rights and 23 * limitations under the License. 24 * 25 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@ 26 */ 27/* 28 * @OSF_COPYRIGHT@ 29 */ 30 31#include <mach_rt.h> 32#include <mach_kdp.h> 33#include <mach_ldebug.h> 34#include <gprof.h> 35 36#include <mach/mach_types.h> 37#include <mach/kern_return.h> 38 39#include <kern/kern_types.h> 40#include <kern/startup.h> 41#include <kern/timer_queue.h> 42#include <kern/processor.h> 43#include <kern/cpu_number.h> 44#include <kern/cpu_data.h> 45#include <kern/assert.h> 46#include <kern/machine.h> 47#include <kern/pms.h> 48#include <kern/misc_protos.h> 49#include <kern/etimer.h> 50#include <kern/kalloc.h> 51#include <kern/queue.h> 52 53#include <vm/vm_map.h> 54#include <vm/vm_kern.h> 55 56#include <profiling/profile-mk.h> 57 58#include <i386/proc_reg.h> 59#include <i386/cpu_threads.h> 60#include <i386/mp_desc.h> 61#include <i386/misc_protos.h> 62#include <i386/trap.h> 63#include <i386/postcode.h> 64#include <i386/machine_routines.h> 65#include <i386/mp.h> 66#include <i386/mp_events.h> 67#include <i386/lapic.h> 68#include <i386/cpuid.h> 69#include <i386/fpu.h> 70#include <i386/machine_cpu.h> 71#include <i386/pmCPU.h> 72#if CONFIG_MCA 73#include <i386/machine_check.h> 74#endif 75#include <i386/acpi.h> 76 77#include <chud/chud_xnu.h> 78#include <chud/chud_xnu_private.h> 79 80#include <sys/kdebug.h> 81 82#if MP_DEBUG 83#define PAUSE delay(1000000) 84#define DBG(x...) kprintf(x) 85#else 86#define DBG(x...) 87#define PAUSE 88#endif /* MP_DEBUG */ 89 90/* Debugging/test trace events: */ 91#define TRACE_MP_TLB_FLUSH MACHDBG_CODE(DBG_MACH_MP, 0) 92#define TRACE_MP_CPUS_CALL MACHDBG_CODE(DBG_MACH_MP, 1) 93#define TRACE_MP_CPUS_CALL_LOCAL MACHDBG_CODE(DBG_MACH_MP, 2) 94#define TRACE_MP_CPUS_CALL_ACTION MACHDBG_CODE(DBG_MACH_MP, 3) 95#define TRACE_MP_CPUS_CALL_NOBUF MACHDBG_CODE(DBG_MACH_MP, 4) 96#define TRACE_MP_CPU_FAST_START MACHDBG_CODE(DBG_MACH_MP, 5) 97#define TRACE_MP_CPU_START MACHDBG_CODE(DBG_MACH_MP, 6) 98#define TRACE_MP_CPU_DEACTIVATE MACHDBG_CODE(DBG_MACH_MP, 7) 99 100#define ABS(v) (((v) > 0)?(v):-(v)) 101 102void slave_boot_init(void); 103void i386_cpu_IPI(int cpu); 104 105static void mp_kdp_wait(boolean_t flush, boolean_t isNMI); 106static void mp_rendezvous_action(void); 107static void mp_broadcast_action(void); 108 109static boolean_t cpu_signal_pending(int cpu, mp_event_t event); 110static int NMIInterruptHandler(x86_saved_state_t *regs); 111 112boolean_t smp_initialized = FALSE; 113uint32_t TSC_sync_margin = 0xFFF; 114volatile boolean_t force_immediate_debugger_NMI = FALSE; 115volatile boolean_t pmap_tlb_flush_timeout = FALSE; 116decl_simple_lock_data(,mp_kdp_lock); 117 118decl_lck_mtx_data(static, mp_cpu_boot_lock); 119lck_mtx_ext_t mp_cpu_boot_lock_ext; 120 121/* Variables needed for MP rendezvous. */ 122decl_simple_lock_data(,mp_rv_lock); 123static void (*mp_rv_setup_func)(void *arg); 124static void (*mp_rv_action_func)(void *arg); 125static void (*mp_rv_teardown_func)(void *arg); 126static void *mp_rv_func_arg; 127static volatile int mp_rv_ncpus; 128 /* Cache-aligned barriers: */ 129static volatile long mp_rv_entry __attribute__((aligned(64))); 130static volatile long mp_rv_exit __attribute__((aligned(64))); 131static volatile long mp_rv_complete __attribute__((aligned(64))); 132 133volatile uint64_t debugger_entry_time; 134volatile uint64_t debugger_exit_time; 135#if MACH_KDP 136#include <kdp/kdp.h> 137extern int kdp_snapshot; 138static struct _kdp_xcpu_call_func { 139 kdp_x86_xcpu_func_t func; 140 void *arg0, *arg1; 141 volatile long ret; 142 volatile uint16_t cpu; 143} kdp_xcpu_call_func = { 144 .cpu = KDP_XCPU_NONE 145}; 146 147#endif 148 149/* Variables needed for MP broadcast. */ 150static void (*mp_bc_action_func)(void *arg); 151static void *mp_bc_func_arg; 152static int mp_bc_ncpus; 153static volatile long mp_bc_count; 154decl_lck_mtx_data(static, mp_bc_lock); 155lck_mtx_ext_t mp_bc_lock_ext; 156static volatile int debugger_cpu = -1; 157volatile long NMIPI_acks = 0; 158 159static void mp_cpus_call_init(void); 160static void mp_cpus_call_cpu_init(void); 161static void mp_cpus_call_action(void); 162static void mp_call_PM(void); 163 164char mp_slave_stack[PAGE_SIZE] __attribute__((aligned(PAGE_SIZE))); // Temp stack for slave init 165 166/* PAL-related routines */ 167boolean_t i386_smp_init(int nmi_vector, i386_intr_func_t nmi_handler, 168 int ipi_vector, i386_intr_func_t ipi_handler); 169void i386_start_cpu(int lapic_id, int cpu_num); 170void i386_send_NMI(int cpu); 171 172#if GPROF 173/* 174 * Initialize dummy structs for profiling. These aren't used but 175 * allows hertz_tick() to be built with GPROF defined. 176 */ 177struct profile_vars _profile_vars; 178struct profile_vars *_profile_vars_cpus[MAX_CPUS] = { &_profile_vars }; 179#define GPROF_INIT() \ 180{ \ 181 int i; \ 182 \ 183 /* Hack to initialize pointers to unused profiling structs */ \ 184 for (i = 1; i < MAX_CPUS; i++) \ 185 _profile_vars_cpus[i] = &_profile_vars; \ 186} 187#else 188#define GPROF_INIT() 189#endif /* GPROF */ 190 191static lck_grp_t smp_lck_grp; 192static lck_grp_attr_t smp_lck_grp_attr; 193 194#define NUM_CPU_WARM_CALLS 20 195struct timer_call cpu_warm_call_arr[NUM_CPU_WARM_CALLS]; 196queue_head_t cpu_warm_call_list; 197decl_simple_lock_data(static, cpu_warm_lock); 198 199typedef struct cpu_warm_data { 200 timer_call_t cwd_call; 201 uint64_t cwd_deadline; 202 int cwd_result; 203} *cpu_warm_data_t; 204 205static void cpu_prewarm_init(void); 206static void cpu_warm_timer_call_func(call_entry_param_t p0, call_entry_param_t p1); 207static void _cpu_warm_setup(void *arg); 208static timer_call_t grab_warm_timer_call(void); 209static void free_warm_timer_call(timer_call_t call); 210 211void 212smp_init(void) 213{ 214 simple_lock_init(&mp_kdp_lock, 0); 215 simple_lock_init(&mp_rv_lock, 0); 216 lck_grp_attr_setdefault(&smp_lck_grp_attr); 217 lck_grp_init(&smp_lck_grp, "i386_smp", &smp_lck_grp_attr); 218 lck_mtx_init_ext(&mp_cpu_boot_lock, &mp_cpu_boot_lock_ext, &smp_lck_grp, LCK_ATTR_NULL); 219 lck_mtx_init_ext(&mp_bc_lock, &mp_bc_lock_ext, &smp_lck_grp, LCK_ATTR_NULL); 220 console_init(); 221 222 if(!i386_smp_init(LAPIC_NMI_INTERRUPT, NMIInterruptHandler, 223 LAPIC_VECTOR(INTERPROCESSOR), cpu_signal_handler)) 224 return; 225 226 cpu_thread_init(); 227 228 GPROF_INIT(); 229 DBGLOG_CPU_INIT(master_cpu); 230 231 mp_cpus_call_init(); 232 mp_cpus_call_cpu_init(); 233 234 if (PE_parse_boot_argn("TSC_sync_margin", 235 &TSC_sync_margin, sizeof(TSC_sync_margin))) { 236 kprintf("TSC sync Margin 0x%x\n", TSC_sync_margin); 237 } else if (cpuid_vmm_present()) { 238 kprintf("TSC sync margin disabled\n"); 239 TSC_sync_margin = 0; 240 } 241 smp_initialized = TRUE; 242 243 cpu_prewarm_init(); 244 245 return; 246} 247 248typedef struct { 249 int target_cpu; 250 int target_lapic; 251 int starter_cpu; 252} processor_start_info_t; 253static processor_start_info_t start_info __attribute__((aligned(64))); 254 255/* 256 * Cache-alignment is to avoid cross-cpu false-sharing interference. 257 */ 258static volatile long tsc_entry_barrier __attribute__((aligned(64))); 259static volatile long tsc_exit_barrier __attribute__((aligned(64))); 260static volatile uint64_t tsc_target __attribute__((aligned(64))); 261 262/* 263 * Poll a CPU to see when it has marked itself as running. 264 */ 265static void 266mp_wait_for_cpu_up(int slot_num, unsigned int iters, unsigned int usecdelay) 267{ 268 while (iters-- > 0) { 269 if (cpu_datap(slot_num)->cpu_running) 270 break; 271 delay(usecdelay); 272 } 273} 274 275/* 276 * Quickly bring a CPU back online which has been halted. 277 */ 278kern_return_t 279intel_startCPU_fast(int slot_num) 280{ 281 kern_return_t rc; 282 283 /* 284 * Try to perform a fast restart 285 */ 286 rc = pmCPUExitHalt(slot_num); 287 if (rc != KERN_SUCCESS) 288 /* 289 * The CPU was not eligible for a fast restart. 290 */ 291 return(rc); 292 293 KERNEL_DEBUG_CONSTANT( 294 TRACE_MP_CPU_FAST_START | DBG_FUNC_START, 295 slot_num, 0, 0, 0, 0); 296 297 /* 298 * Wait until the CPU is back online. 299 */ 300 mp_disable_preemption(); 301 302 /* 303 * We use short pauses (1us) for low latency. 30,000 iterations is 304 * longer than a full restart would require so it should be more 305 * than long enough. 306 */ 307 308 mp_wait_for_cpu_up(slot_num, 30000, 1); 309 mp_enable_preemption(); 310 311 KERNEL_DEBUG_CONSTANT( 312 TRACE_MP_CPU_FAST_START | DBG_FUNC_END, 313 slot_num, cpu_datap(slot_num)->cpu_running, 0, 0, 0); 314 315 /* 316 * Check to make sure that the CPU is really running. If not, 317 * go through the slow path. 318 */ 319 if (cpu_datap(slot_num)->cpu_running) 320 return(KERN_SUCCESS); 321 else 322 return(KERN_FAILURE); 323} 324 325static void 326started_cpu(void) 327{ 328 /* Here on the started cpu with cpu_running set TRUE */ 329 330 if (TSC_sync_margin && 331 start_info.target_cpu == cpu_number()) { 332 /* 333 * I've just started-up, synchronize again with the starter cpu 334 * and then snap my TSC. 335 */ 336 tsc_target = 0; 337 atomic_decl(&tsc_entry_barrier, 1); 338 while (tsc_entry_barrier != 0) 339 ; /* spin for starter and target at barrier */ 340 tsc_target = rdtsc64(); 341 atomic_decl(&tsc_exit_barrier, 1); 342 } 343} 344 345static void 346start_cpu(void *arg) 347{ 348 int i = 1000; 349 processor_start_info_t *psip = (processor_start_info_t *) arg; 350 351 /* Ignore this if the current processor is not the starter */ 352 if (cpu_number() != psip->starter_cpu) 353 return; 354 355 DBG("start_cpu(%p) about to start cpu %d, lapic %d\n", 356 arg, psip->target_cpu, psip->target_lapic); 357 358 KERNEL_DEBUG_CONSTANT( 359 TRACE_MP_CPU_START | DBG_FUNC_START, 360 psip->target_cpu, 361 psip->target_lapic, 0, 0, 0); 362 363 i386_start_cpu(psip->target_lapic, psip->target_cpu); 364 365#ifdef POSTCODE_DELAY 366 /* Wait much longer if postcodes are displayed for a delay period. */ 367 i *= 10000; 368#endif 369 DBG("start_cpu(%p) about to wait for cpu %d\n", 370 arg, psip->target_cpu); 371 372 mp_wait_for_cpu_up(psip->target_cpu, i*100, 100); 373 374 KERNEL_DEBUG_CONSTANT( 375 TRACE_MP_CPU_START | DBG_FUNC_END, 376 psip->target_cpu, 377 cpu_datap(psip->target_cpu)->cpu_running, 0, 0, 0); 378 379 if (TSC_sync_margin && 380 cpu_datap(psip->target_cpu)->cpu_running) { 381 /* 382 * Compare the TSC from the started processor with ours. 383 * Report and log/panic if it diverges by more than 384 * TSC_sync_margin (TSC_SYNC_MARGIN) ticks. This margin 385 * can be overriden by boot-arg (with 0 meaning no checking). 386 */ 387 uint64_t tsc_starter; 388 int64_t tsc_delta; 389 atomic_decl(&tsc_entry_barrier, 1); 390 while (tsc_entry_barrier != 0) 391 ; /* spin for both processors at barrier */ 392 tsc_starter = rdtsc64(); 393 atomic_decl(&tsc_exit_barrier, 1); 394 while (tsc_exit_barrier != 0) 395 ; /* spin for target to store its TSC */ 396 tsc_delta = tsc_target - tsc_starter; 397 kprintf("TSC sync for cpu %d: 0x%016llx delta 0x%llx (%lld)\n", 398 psip->target_cpu, tsc_target, tsc_delta, tsc_delta); 399 if (ABS(tsc_delta) > (int64_t) TSC_sync_margin) { 400#if DEBUG 401 panic( 402#else 403 printf( 404#endif 405 "Unsynchronized TSC for cpu %d: " 406 "0x%016llx, delta 0x%llx\n", 407 psip->target_cpu, tsc_target, tsc_delta); 408 } 409 } 410} 411 412kern_return_t 413intel_startCPU( 414 int slot_num) 415{ 416 int lapic = cpu_to_lapic[slot_num]; 417 boolean_t istate; 418 419 assert(lapic != -1); 420 421 DBGLOG_CPU_INIT(slot_num); 422 423 DBG("intel_startCPU(%d) lapic_id=%d\n", slot_num, lapic); 424 DBG("IdlePTD(%p): 0x%x\n", &IdlePTD, (int) (uintptr_t)IdlePTD); 425 426 /* 427 * Initialize (or re-initialize) the descriptor tables for this cpu. 428 * Propagate processor mode to slave. 429 */ 430 if (cpu_mode_is64bit()) 431 cpu_desc_init64(cpu_datap(slot_num)); 432 else 433 cpu_desc_init(cpu_datap(slot_num)); 434 435 /* Serialize use of the slave boot stack, etc. */ 436 lck_mtx_lock(&mp_cpu_boot_lock); 437 438 istate = ml_set_interrupts_enabled(FALSE); 439 if (slot_num == get_cpu_number()) { 440 ml_set_interrupts_enabled(istate); 441 lck_mtx_unlock(&mp_cpu_boot_lock); 442 return KERN_SUCCESS; 443 } 444 445 start_info.starter_cpu = cpu_number(); 446 start_info.target_cpu = slot_num; 447 start_info.target_lapic = lapic; 448 tsc_entry_barrier = 2; 449 tsc_exit_barrier = 2; 450 451 /* 452 * Perform the processor startup sequence with all running 453 * processors rendezvous'ed. This is required during periods when 454 * the cache-disable bit is set for MTRR/PAT initialization. 455 */ 456 mp_rendezvous_no_intrs(start_cpu, (void *) &start_info); 457 458 start_info.target_cpu = 0; 459 460 ml_set_interrupts_enabled(istate); 461 lck_mtx_unlock(&mp_cpu_boot_lock); 462 463 if (!cpu_datap(slot_num)->cpu_running) { 464 kprintf("Failed to start CPU %02d\n", slot_num); 465 printf("Failed to start CPU %02d, rebooting...\n", slot_num); 466 delay(1000000); 467 halt_cpu(); 468 return KERN_SUCCESS; 469 } else { 470 kprintf("Started cpu %d (lapic id %08x)\n", slot_num, lapic); 471 return KERN_SUCCESS; 472 } 473} 474 475#if MP_DEBUG 476cpu_signal_event_log_t *cpu_signal[MAX_CPUS]; 477cpu_signal_event_log_t *cpu_handle[MAX_CPUS]; 478 479MP_EVENT_NAME_DECL(); 480 481#endif /* MP_DEBUG */ 482 483int 484cpu_signal_handler(x86_saved_state_t *regs) 485{ 486 int my_cpu; 487 volatile int *my_word; 488 489 SCHED_STATS_IPI(current_processor()); 490 491 my_cpu = cpu_number(); 492 my_word = &cpu_data_ptr[my_cpu]->cpu_signals; 493 /* Store the initial set of signals for diagnostics. New 494 * signals could arrive while these are being processed 495 * so it's no more than a hint. 496 */ 497 498 cpu_data_ptr[my_cpu]->cpu_prior_signals = *my_word; 499 500 do { 501#if MACH_KDP 502 if (i_bit(MP_KDP, my_word)) { 503 DBGLOG(cpu_handle,my_cpu,MP_KDP); 504 i_bit_clear(MP_KDP, my_word); 505/* Ensure that the i386_kernel_state at the base of the 506 * current thread's stack (if any) is synchronized with the 507 * context at the moment of the interrupt, to facilitate 508 * access through the debugger. 509 */ 510 sync_iss_to_iks(regs); 511 if (pmsafe_debug && !kdp_snapshot) 512 pmSafeMode(¤t_cpu_datap()->lcpu, PM_SAFE_FL_SAFE); 513 mp_kdp_wait(TRUE, FALSE); 514 if (pmsafe_debug && !kdp_snapshot) 515 pmSafeMode(¤t_cpu_datap()->lcpu, PM_SAFE_FL_NORMAL); 516 } else 517#endif /* MACH_KDP */ 518 if (i_bit(MP_TLB_FLUSH, my_word)) { 519 DBGLOG(cpu_handle,my_cpu,MP_TLB_FLUSH); 520 i_bit_clear(MP_TLB_FLUSH, my_word); 521 pmap_update_interrupt(); 522 } else if (i_bit(MP_AST, my_word)) { 523 DBGLOG(cpu_handle,my_cpu,MP_AST); 524 i_bit_clear(MP_AST, my_word); 525 ast_check(cpu_to_processor(my_cpu)); 526 } else if (i_bit(MP_RENDEZVOUS, my_word)) { 527 DBGLOG(cpu_handle,my_cpu,MP_RENDEZVOUS); 528 i_bit_clear(MP_RENDEZVOUS, my_word); 529 mp_rendezvous_action(); 530 } else if (i_bit(MP_BROADCAST, my_word)) { 531 DBGLOG(cpu_handle,my_cpu,MP_BROADCAST); 532 i_bit_clear(MP_BROADCAST, my_word); 533 mp_broadcast_action(); 534 } else if (i_bit(MP_CHUD, my_word)) { 535 DBGLOG(cpu_handle,my_cpu,MP_CHUD); 536 i_bit_clear(MP_CHUD, my_word); 537 chudxnu_cpu_signal_handler(); 538 } else if (i_bit(MP_CALL, my_word)) { 539 DBGLOG(cpu_handle,my_cpu,MP_CALL); 540 i_bit_clear(MP_CALL, my_word); 541 mp_cpus_call_action(); 542 } else if (i_bit(MP_CALL_PM, my_word)) { 543 DBGLOG(cpu_handle,my_cpu,MP_CALL_PM); 544 i_bit_clear(MP_CALL_PM, my_word); 545 mp_call_PM(); 546 } 547 } while (*my_word); 548 549 return 0; 550} 551 552static int 553NMIInterruptHandler(x86_saved_state_t *regs) 554{ 555 void *stackptr; 556 557 if (panic_active() && !panicDebugging) { 558 if (pmsafe_debug) 559 pmSafeMode(¤t_cpu_datap()->lcpu, PM_SAFE_FL_SAFE); 560 for(;;) 561 cpu_pause(); 562 } 563 564 atomic_incl(&NMIPI_acks, 1); 565 sync_iss_to_iks_unconditionally(regs); 566#if defined (__i386__) 567 __asm__ volatile("movl %%ebp, %0" : "=m" (stackptr)); 568#elif defined (__x86_64__) 569 __asm__ volatile("movq %%rbp, %0" : "=m" (stackptr)); 570#endif 571 572 if (cpu_number() == debugger_cpu) 573 goto NMExit; 574 575 if (spinlock_timed_out) { 576 char pstr[192]; 577 snprintf(&pstr[0], sizeof(pstr), "Panic(CPU %d): NMIPI for spinlock acquisition timeout, spinlock: %p, spinlock owner: %p, current_thread: %p, spinlock_owner_cpu: 0x%x\n", cpu_number(), spinlock_timed_out, (void *) spinlock_timed_out->interlock.lock_data, current_thread(), spinlock_owner_cpu); 578 panic_i386_backtrace(stackptr, 64, &pstr[0], TRUE, regs); 579 } else if (pmap_tlb_flush_timeout == TRUE) { 580 char pstr[128]; 581 snprintf(&pstr[0], sizeof(pstr), "Panic(CPU %d): Unresponsive processor (this CPU did not acknowledge interrupts) TLB state:0x%x\n", cpu_number(), current_cpu_datap()->cpu_tlb_invalid); 582 panic_i386_backtrace(stackptr, 48, &pstr[0], TRUE, regs); 583 } 584 585#if MACH_KDP 586 if (pmsafe_debug && !kdp_snapshot) 587 pmSafeMode(¤t_cpu_datap()->lcpu, PM_SAFE_FL_SAFE); 588 current_cpu_datap()->cpu_NMI_acknowledged = TRUE; 589 mp_kdp_wait(FALSE, pmap_tlb_flush_timeout || spinlock_timed_out || panic_active()); 590 if (pmsafe_debug && !kdp_snapshot) 591 pmSafeMode(¤t_cpu_datap()->lcpu, PM_SAFE_FL_NORMAL); 592#endif 593NMExit: 594 return 1; 595} 596 597 598/* 599 * cpu_interrupt is really just to be used by the scheduler to 600 * get a CPU's attention it may not always issue an IPI. If an 601 * IPI is always needed then use i386_cpu_IPI. 602 */ 603void 604cpu_interrupt(int cpu) 605{ 606 boolean_t did_IPI = FALSE; 607 608 if (smp_initialized 609 && pmCPUExitIdle(cpu_datap(cpu))) { 610 i386_cpu_IPI(cpu); 611 did_IPI = TRUE; 612 } 613 614 KERNEL_DEBUG_CONSTANT(MACHDBG_CODE(DBG_MACH_SCHED, MACH_REMOTE_AST), cpu, did_IPI, 0, 0, 0); 615} 616 617/* 618 * Send a true NMI via the local APIC to the specified CPU. 619 */ 620void 621cpu_NMI_interrupt(int cpu) 622{ 623 if (smp_initialized) { 624 i386_send_NMI(cpu); 625 } 626} 627 628static void (* volatile mp_PM_func)(void) = NULL; 629 630static void 631mp_call_PM(void) 632{ 633 assert(!ml_get_interrupts_enabled()); 634 635 if (mp_PM_func != NULL) 636 mp_PM_func(); 637} 638 639void 640cpu_PM_interrupt(int cpu) 641{ 642 assert(!ml_get_interrupts_enabled()); 643 644 if (mp_PM_func != NULL) { 645 if (cpu == cpu_number()) 646 mp_PM_func(); 647 else 648 i386_signal_cpu(cpu, MP_CALL_PM, ASYNC); 649 } 650} 651 652void 653PM_interrupt_register(void (*fn)(void)) 654{ 655 mp_PM_func = fn; 656} 657 658void 659i386_signal_cpu(int cpu, mp_event_t event, mp_sync_t mode) 660{ 661 volatile int *signals = &cpu_datap(cpu)->cpu_signals; 662 uint64_t tsc_timeout; 663 664 665 if (!cpu_datap(cpu)->cpu_running) 666 return; 667 668 if (event == MP_TLB_FLUSH) 669 KERNEL_DEBUG(TRACE_MP_TLB_FLUSH | DBG_FUNC_START, cpu, 0, 0, 0, 0); 670 671 DBGLOG(cpu_signal, cpu, event); 672 673 i_bit_set(event, signals); 674 i386_cpu_IPI(cpu); 675 if (mode == SYNC) { 676 again: 677 tsc_timeout = rdtsc64() + (1000*1000*1000); 678 while (i_bit(event, signals) && rdtsc64() < tsc_timeout) { 679 cpu_pause(); 680 } 681 if (i_bit(event, signals)) { 682 DBG("i386_signal_cpu(%d, 0x%x, SYNC) timed out\n", 683 cpu, event); 684 goto again; 685 } 686 } 687 if (event == MP_TLB_FLUSH) 688 KERNEL_DEBUG(TRACE_MP_TLB_FLUSH | DBG_FUNC_END, cpu, 0, 0, 0, 0); 689} 690 691/* 692 * Send event to all running cpus. 693 * Called with the topology locked. 694 */ 695void 696i386_signal_cpus(mp_event_t event, mp_sync_t mode) 697{ 698 unsigned int cpu; 699 unsigned int my_cpu = cpu_number(); 700 701 assert(hw_lock_held((hw_lock_t)&x86_topo_lock)); 702 703 for (cpu = 0; cpu < real_ncpus; cpu++) { 704 if (cpu == my_cpu || !cpu_datap(cpu)->cpu_running) 705 continue; 706 i386_signal_cpu(cpu, event, mode); 707 } 708} 709 710/* 711 * Return the number of running cpus. 712 * Called with the topology locked. 713 */ 714int 715i386_active_cpus(void) 716{ 717 unsigned int cpu; 718 unsigned int ncpus = 0; 719 720 assert(hw_lock_held((hw_lock_t)&x86_topo_lock)); 721 722 for (cpu = 0; cpu < real_ncpus; cpu++) { 723 if (cpu_datap(cpu)->cpu_running) 724 ncpus++; 725 } 726 return(ncpus); 727} 728 729/* 730 * All-CPU rendezvous: 731 * - CPUs are signalled, 732 * - all execute the setup function (if specified), 733 * - rendezvous (i.e. all cpus reach a barrier), 734 * - all execute the action function (if specified), 735 * - rendezvous again, 736 * - execute the teardown function (if specified), and then 737 * - resume. 738 * 739 * Note that the supplied external functions _must_ be reentrant and aware 740 * that they are running in parallel and in an unknown lock context. 741 */ 742 743static void 744mp_rendezvous_action(void) 745{ 746 boolean_t intrs_enabled; 747 748 /* setup function */ 749 if (mp_rv_setup_func != NULL) 750 mp_rv_setup_func(mp_rv_func_arg); 751 752 intrs_enabled = ml_get_interrupts_enabled(); 753 754 /* spin on entry rendezvous */ 755 atomic_incl(&mp_rv_entry, 1); 756 while (mp_rv_entry < mp_rv_ncpus) { 757 /* poll for pesky tlb flushes if interrupts disabled */ 758 if (!intrs_enabled) 759 handle_pending_TLB_flushes(); 760 cpu_pause(); 761 } 762 763 /* action function */ 764 if (mp_rv_action_func != NULL) 765 mp_rv_action_func(mp_rv_func_arg); 766 767 /* spin on exit rendezvous */ 768 atomic_incl(&mp_rv_exit, 1); 769 while (mp_rv_exit < mp_rv_ncpus) { 770 if (!intrs_enabled) 771 handle_pending_TLB_flushes(); 772 cpu_pause(); 773 } 774 775 /* teardown function */ 776 if (mp_rv_teardown_func != NULL) 777 mp_rv_teardown_func(mp_rv_func_arg); 778 779 /* Bump completion count */ 780 atomic_incl(&mp_rv_complete, 1); 781} 782 783void 784mp_rendezvous(void (*setup_func)(void *), 785 void (*action_func)(void *), 786 void (*teardown_func)(void *), 787 void *arg) 788{ 789 790 if (!smp_initialized) { 791 if (setup_func != NULL) 792 setup_func(arg); 793 if (action_func != NULL) 794 action_func(arg); 795 if (teardown_func != NULL) 796 teardown_func(arg); 797 return; 798 } 799 800 /* obtain rendezvous lock */ 801 simple_lock(&mp_rv_lock); 802 803 /* set static function pointers */ 804 mp_rv_setup_func = setup_func; 805 mp_rv_action_func = action_func; 806 mp_rv_teardown_func = teardown_func; 807 mp_rv_func_arg = arg; 808 809 mp_rv_entry = 0; 810 mp_rv_exit = 0; 811 mp_rv_complete = 0; 812 813 /* 814 * signal other processors, which will call mp_rendezvous_action() 815 * with interrupts disabled 816 */ 817 simple_lock(&x86_topo_lock); 818 mp_rv_ncpus = i386_active_cpus(); 819 i386_signal_cpus(MP_RENDEZVOUS, ASYNC); 820 simple_unlock(&x86_topo_lock); 821 822 /* call executor function on this cpu */ 823 mp_rendezvous_action(); 824 825 /* 826 * Spin for everyone to complete. 827 * This is necessary to ensure that all processors have proceeded 828 * from the exit barrier before we release the rendezvous structure. 829 */ 830 while (mp_rv_complete < mp_rv_ncpus) { 831 cpu_pause(); 832 } 833 834 /* Tidy up */ 835 mp_rv_setup_func = NULL; 836 mp_rv_action_func = NULL; 837 mp_rv_teardown_func = NULL; 838 mp_rv_func_arg = NULL; 839 840 /* release lock */ 841 simple_unlock(&mp_rv_lock); 842} 843 844void 845mp_rendezvous_break_lock(void) 846{ 847 simple_lock_init(&mp_rv_lock, 0); 848} 849 850static void 851setup_disable_intrs(__unused void * param_not_used) 852{ 853 /* disable interrupts before the first barrier */ 854 boolean_t intr = ml_set_interrupts_enabled(FALSE); 855 856 current_cpu_datap()->cpu_iflag = intr; 857 DBG("CPU%d: %s\n", get_cpu_number(), __FUNCTION__); 858} 859 860static void 861teardown_restore_intrs(__unused void * param_not_used) 862{ 863 /* restore interrupt flag following MTRR changes */ 864 ml_set_interrupts_enabled(current_cpu_datap()->cpu_iflag); 865 DBG("CPU%d: %s\n", get_cpu_number(), __FUNCTION__); 866} 867 868/* 869 * A wrapper to mp_rendezvous() to call action_func() with interrupts disabled. 870 * This is exported for use by kexts. 871 */ 872void 873mp_rendezvous_no_intrs( 874 void (*action_func)(void *), 875 void *arg) 876{ 877 mp_rendezvous(setup_disable_intrs, 878 action_func, 879 teardown_restore_intrs, 880 arg); 881} 882 883 884typedef struct { 885 queue_chain_t link; /* queue linkage */ 886 void (*func)(void *,void *); /* routine to call */ 887 void *arg0; /* routine's 1st arg */ 888 void *arg1; /* routine's 2nd arg */ 889 volatile long *countp; /* completion counter */ 890} mp_call_t; 891 892 893typedef struct { 894 queue_head_t queue; 895 decl_simple_lock_data(, lock); 896} mp_call_queue_t; 897#define MP_CPUS_CALL_BUFS_PER_CPU MAX_CPUS 898static mp_call_queue_t mp_cpus_call_freelist; 899static mp_call_queue_t mp_cpus_call_head[MAX_CPUS]; 900 901static inline boolean_t 902mp_call_head_lock(mp_call_queue_t *cqp) 903{ 904 boolean_t intrs_enabled; 905 906 intrs_enabled = ml_set_interrupts_enabled(FALSE); 907 simple_lock(&cqp->lock); 908 909 return intrs_enabled; 910} 911 912static inline boolean_t 913mp_call_head_is_locked(mp_call_queue_t *cqp) 914{ 915 return !ml_get_interrupts_enabled() && 916 hw_lock_held((hw_lock_t)&cqp->lock); 917} 918 919static inline void 920mp_call_head_unlock(mp_call_queue_t *cqp, boolean_t intrs_enabled) 921{ 922 simple_unlock(&cqp->lock); 923 ml_set_interrupts_enabled(intrs_enabled); 924} 925 926static inline mp_call_t * 927mp_call_alloc(void) 928{ 929 mp_call_t *callp = NULL; 930 boolean_t intrs_enabled; 931 mp_call_queue_t *cqp = &mp_cpus_call_freelist; 932 933 intrs_enabled = mp_call_head_lock(cqp); 934 if (!queue_empty(&cqp->queue)) 935 queue_remove_first(&cqp->queue, callp, typeof(callp), link); 936 mp_call_head_unlock(cqp, intrs_enabled); 937 938 return callp; 939} 940 941static inline void 942mp_call_free(mp_call_t *callp) 943{ 944 boolean_t intrs_enabled; 945 mp_call_queue_t *cqp = &mp_cpus_call_freelist; 946 947 intrs_enabled = mp_call_head_lock(cqp); 948 queue_enter_first(&cqp->queue, callp, typeof(callp), link); 949 mp_call_head_unlock(cqp, intrs_enabled); 950} 951 952static inline mp_call_t * 953mp_call_dequeue_locked(mp_call_queue_t *cqp) 954{ 955 mp_call_t *callp = NULL; 956 957 assert(mp_call_head_is_locked(cqp)); 958 if (!queue_empty(&cqp->queue)) 959 queue_remove_first(&cqp->queue, callp, typeof(callp), link); 960 return callp; 961} 962 963static inline void 964mp_call_enqueue_locked( 965 mp_call_queue_t *cqp, 966 mp_call_t *callp) 967{ 968 queue_enter(&cqp->queue, callp, typeof(callp), link); 969} 970 971/* Called on the boot processor to initialize global structures */ 972static void 973mp_cpus_call_init(void) 974{ 975 mp_call_queue_t *cqp = &mp_cpus_call_freelist; 976 977 DBG("mp_cpus_call_init()\n"); 978 simple_lock_init(&cqp->lock, 0); 979 queue_init(&cqp->queue); 980} 981 982/* 983 * Called by each processor to add call buffers to the free list 984 * and to initialize the per-cpu call queue. 985 * Also called but ignored on slave processors on re-start/wake. 986 */ 987static void 988mp_cpus_call_cpu_init(void) 989{ 990 int i; 991 mp_call_queue_t *cqp = &mp_cpus_call_head[cpu_number()]; 992 mp_call_t *callp; 993 994 if (cqp->queue.next != NULL) 995 return; /* restart/wake case: called already */ 996 997 simple_lock_init(&cqp->lock, 0); 998 queue_init(&cqp->queue); 999 for (i = 0; i < MP_CPUS_CALL_BUFS_PER_CPU; i++) { 1000 callp = (mp_call_t *) kalloc(sizeof(mp_call_t)); 1001 mp_call_free(callp); 1002 } 1003 1004 DBG("mp_cpus_call_init() done on cpu %d\n", cpu_number()); 1005} 1006 1007/* 1008 * This is called from cpu_signal_handler() to process an MP_CALL signal. 1009 * And also from i386_deactivate_cpu() when a cpu is being taken offline. 1010 */ 1011static void 1012mp_cpus_call_action(void) 1013{ 1014 mp_call_queue_t *cqp; 1015 boolean_t intrs_enabled; 1016 mp_call_t *callp; 1017 mp_call_t call; 1018 1019 assert(!ml_get_interrupts_enabled()); 1020 cqp = &mp_cpus_call_head[cpu_number()]; 1021 intrs_enabled = mp_call_head_lock(cqp); 1022 while ((callp = mp_call_dequeue_locked(cqp)) != NULL) { 1023 /* Copy call request to the stack to free buffer */ 1024 call = *callp; 1025 mp_call_free(callp); 1026 if (call.func != NULL) { 1027 mp_call_head_unlock(cqp, intrs_enabled); 1028 KERNEL_DEBUG_CONSTANT( 1029 TRACE_MP_CPUS_CALL_ACTION, 1030 call.func, call.arg0, call.arg1, call.countp, 0); 1031 call.func(call.arg0, call.arg1); 1032 (void) mp_call_head_lock(cqp); 1033 } 1034 if (call.countp != NULL) 1035 atomic_incl(call.countp, 1); 1036 } 1037 mp_call_head_unlock(cqp, intrs_enabled); 1038} 1039 1040/* 1041 * mp_cpus_call() runs a given function on cpus specified in a given cpu mask. 1042 * Possible modes are: 1043 * SYNC: function is called serially on target cpus in logical cpu order 1044 * waiting for each call to be acknowledged before proceeding 1045 * ASYNC: function call is queued to the specified cpus 1046 * waiting for all calls to complete in parallel before returning 1047 * NOSYNC: function calls are queued 1048 * but we return before confirmation of calls completing. 1049 * The action function may be NULL. 1050 * The cpu mask may include the local cpu. Offline cpus are ignored. 1051 * The return value is the number of cpus on which the call was made or queued. 1052 */ 1053cpu_t 1054mp_cpus_call( 1055 cpumask_t cpus, 1056 mp_sync_t mode, 1057 void (*action_func)(void *), 1058 void *arg) 1059{ 1060 return mp_cpus_call1( 1061 cpus, 1062 mode, 1063 (void (*)(void *,void *))action_func, 1064 arg, 1065 NULL, 1066 NULL, 1067 NULL); 1068} 1069 1070static void 1071mp_cpus_call_wait(boolean_t intrs_enabled, 1072 long mp_cpus_signals, 1073 volatile long *mp_cpus_calls) 1074{ 1075 mp_call_queue_t *cqp; 1076 1077 cqp = &mp_cpus_call_head[cpu_number()]; 1078 1079 while (*mp_cpus_calls < mp_cpus_signals) { 1080 if (!intrs_enabled) { 1081 /* Sniffing w/o locking */ 1082 if (!queue_empty(&cqp->queue)) 1083 mp_cpus_call_action(); 1084 handle_pending_TLB_flushes(); 1085 } 1086 cpu_pause(); 1087 } 1088} 1089 1090cpu_t 1091mp_cpus_call1( 1092 cpumask_t cpus, 1093 mp_sync_t mode, 1094 void (*action_func)(void *, void *), 1095 void *arg0, 1096 void *arg1, 1097 cpumask_t *cpus_calledp, 1098 cpumask_t *cpus_notcalledp) 1099{ 1100 cpu_t cpu; 1101 boolean_t intrs_enabled = FALSE; 1102 boolean_t call_self = FALSE; 1103 cpumask_t cpus_called = 0; 1104 cpumask_t cpus_notcalled = 0; 1105 long mp_cpus_signals = 0; 1106 volatile long mp_cpus_calls = 0; 1107 1108 KERNEL_DEBUG_CONSTANT( 1109 TRACE_MP_CPUS_CALL | DBG_FUNC_START, 1110 cpus, mode, VM_KERNEL_UNSLIDE(action_func), arg0, arg1); 1111 1112 if (!smp_initialized) { 1113 if ((cpus & CPUMASK_SELF) == 0) 1114 goto out; 1115 if (action_func != NULL) { 1116 intrs_enabled = ml_set_interrupts_enabled(FALSE); 1117 action_func(arg0, arg1); 1118 ml_set_interrupts_enabled(intrs_enabled); 1119 } 1120 call_self = TRUE; 1121 goto out; 1122 } 1123 1124 /* 1125 * Queue the call for each non-local requested cpu. 1126 * The topo lock is not taken. Instead we sniff the cpu_running state 1127 * and then re-check it after taking the call lock. A cpu being taken 1128 * offline runs the action function after clearing the cpu_running. 1129 */ 1130 for (cpu = 0; cpu < (cpu_t) real_ncpus; cpu++) { 1131 if (((cpu_to_cpumask(cpu) & cpus) == 0) || 1132 !cpu_datap(cpu)->cpu_running) 1133 continue; 1134 if (cpu == (cpu_t) cpu_number()) { 1135 /* 1136 * We don't IPI ourself and if calling asynchronously, 1137 * we defer our call until we have signalled all others. 1138 */ 1139 call_self = TRUE; 1140 cpus_called |= cpu_to_cpumask(cpu); 1141 if (mode == SYNC && action_func != NULL) { 1142 KERNEL_DEBUG_CONSTANT( 1143 TRACE_MP_CPUS_CALL_LOCAL, 1144 VM_KERNEL_UNSLIDE(action_func), 1145 arg0, arg1, 0, 0); 1146 action_func(arg0, arg1); 1147 } 1148 } else { 1149 /* 1150 * Here to queue a call to cpu and IPI. 1151 * Spinning for request buffer unless NOSYNC. 1152 */ 1153 mp_call_t *callp = NULL; 1154 mp_call_queue_t *cqp = &mp_cpus_call_head[cpu]; 1155 1156 queue_call: 1157 if (callp == NULL) 1158 callp = mp_call_alloc(); 1159 intrs_enabled = mp_call_head_lock(cqp); 1160 if (!cpu_datap(cpu)->cpu_running) { 1161 mp_call_head_unlock(cqp, intrs_enabled); 1162 continue; 1163 } 1164 if (mode == NOSYNC) { 1165 if (callp == NULL) { 1166 cpus_notcalled |= cpu_to_cpumask(cpu); 1167 mp_call_head_unlock(cqp, intrs_enabled); 1168 KERNEL_DEBUG_CONSTANT( 1169 TRACE_MP_CPUS_CALL_NOBUF, 1170 cpu, 0, 0, 0, 0); 1171 continue; 1172 } 1173 callp->countp = NULL; 1174 } else { 1175 if (callp == NULL) { 1176 mp_call_head_unlock(cqp, intrs_enabled); 1177 KERNEL_DEBUG_CONSTANT( 1178 TRACE_MP_CPUS_CALL_NOBUF, 1179 cpu, 0, 0, 0, 0); 1180 if (!intrs_enabled) { 1181 /* Sniffing w/o locking */ 1182 if (!queue_empty(&cqp->queue)) 1183 mp_cpus_call_action(); 1184 handle_pending_TLB_flushes(); 1185 } 1186 cpu_pause(); 1187 goto queue_call; 1188 } 1189 callp->countp = &mp_cpus_calls; 1190 } 1191 callp->func = action_func; 1192 callp->arg0 = arg0; 1193 callp->arg1 = arg1; 1194 mp_call_enqueue_locked(cqp, callp); 1195 mp_cpus_signals++; 1196 cpus_called |= cpu_to_cpumask(cpu); 1197 i386_signal_cpu(cpu, MP_CALL, ASYNC); 1198 mp_call_head_unlock(cqp, intrs_enabled); 1199 if (mode == SYNC) { 1200 mp_cpus_call_wait(intrs_enabled, mp_cpus_signals, &mp_cpus_calls); 1201 } 1202 } 1203 } 1204 1205 /* Call locally if mode not SYNC */ 1206 if (mode != SYNC && call_self ) { 1207 KERNEL_DEBUG_CONSTANT( 1208 TRACE_MP_CPUS_CALL_LOCAL, 1209 VM_KERNEL_UNSLIDE(action_func), arg0, arg1, 0, 0); 1210 if (action_func != NULL) { 1211 ml_set_interrupts_enabled(FALSE); 1212 action_func(arg0, arg1); 1213 ml_set_interrupts_enabled(intrs_enabled); 1214 } 1215 } 1216 1217 /* For ASYNC, now wait for all signaled cpus to complete their calls */ 1218 if (mode == ASYNC) { 1219 mp_cpus_call_wait(intrs_enabled, mp_cpus_signals, &mp_cpus_calls); 1220 } 1221 1222out: 1223 cpu = (cpu_t) mp_cpus_signals + (call_self ? 1 : 0); 1224 1225 if (cpus_calledp) 1226 *cpus_calledp = cpus_called; 1227 if (cpus_notcalledp) 1228 *cpus_notcalledp = cpus_notcalled; 1229 1230 KERNEL_DEBUG_CONSTANT( 1231 TRACE_MP_CPUS_CALL | DBG_FUNC_END, 1232 cpu, cpus_called, cpus_notcalled, 0, 0); 1233 1234 return cpu; 1235} 1236 1237 1238static void 1239mp_broadcast_action(void) 1240{ 1241 /* call action function */ 1242 if (mp_bc_action_func != NULL) 1243 mp_bc_action_func(mp_bc_func_arg); 1244 1245 /* if we're the last one through, wake up the instigator */ 1246 if (atomic_decl_and_test(&mp_bc_count, 1)) 1247 thread_wakeup(((event_t)(uintptr_t) &mp_bc_count)); 1248} 1249 1250/* 1251 * mp_broadcast() runs a given function on all active cpus. 1252 * The caller blocks until the functions has run on all cpus. 1253 * The caller will also block if there is another pending braodcast. 1254 */ 1255void 1256mp_broadcast( 1257 void (*action_func)(void *), 1258 void *arg) 1259{ 1260 if (!smp_initialized) { 1261 if (action_func != NULL) 1262 action_func(arg); 1263 return; 1264 } 1265 1266 /* obtain broadcast lock */ 1267 lck_mtx_lock(&mp_bc_lock); 1268 1269 /* set static function pointers */ 1270 mp_bc_action_func = action_func; 1271 mp_bc_func_arg = arg; 1272 1273 assert_wait((event_t)(uintptr_t)&mp_bc_count, THREAD_UNINT); 1274 1275 /* 1276 * signal other processors, which will call mp_broadcast_action() 1277 */ 1278 simple_lock(&x86_topo_lock); 1279 mp_bc_ncpus = i386_active_cpus(); /* total including this cpu */ 1280 mp_bc_count = mp_bc_ncpus; 1281 i386_signal_cpus(MP_BROADCAST, ASYNC); 1282 1283 /* call executor function on this cpu */ 1284 mp_broadcast_action(); 1285 simple_unlock(&x86_topo_lock); 1286 1287 /* block for all cpus to have run action_func */ 1288 if (mp_bc_ncpus > 1) 1289 thread_block(THREAD_CONTINUE_NULL); 1290 else 1291 clear_wait(current_thread(), THREAD_AWAKENED); 1292 1293 /* release lock */ 1294 lck_mtx_unlock(&mp_bc_lock); 1295} 1296 1297void 1298i386_activate_cpu(void) 1299{ 1300 cpu_data_t *cdp = current_cpu_datap(); 1301 1302 assert(!ml_get_interrupts_enabled()); 1303 1304 if (!smp_initialized) { 1305 cdp->cpu_running = TRUE; 1306 return; 1307 } 1308 1309 simple_lock(&x86_topo_lock); 1310 cdp->cpu_running = TRUE; 1311 started_cpu(); 1312 simple_unlock(&x86_topo_lock); 1313 flush_tlb_raw(); 1314} 1315 1316extern void etimer_timer_expire(void *arg); 1317 1318void 1319i386_deactivate_cpu(void) 1320{ 1321 cpu_data_t *cdp = current_cpu_datap(); 1322 1323 assert(!ml_get_interrupts_enabled()); 1324 1325 KERNEL_DEBUG_CONSTANT( 1326 TRACE_MP_CPU_DEACTIVATE | DBG_FUNC_START, 1327 0, 0, 0, 0, 0); 1328 1329 simple_lock(&x86_topo_lock); 1330 cdp->cpu_running = FALSE; 1331 simple_unlock(&x86_topo_lock); 1332 1333 /* 1334 * Move all of this cpu's timers to the master/boot cpu, 1335 * and poke it in case there's a sooner deadline for it to schedule. 1336 */ 1337 timer_queue_shutdown(&cdp->rtclock_timer.queue); 1338 mp_cpus_call(cpu_to_cpumask(master_cpu), ASYNC, etimer_timer_expire, NULL); 1339 1340 /* 1341 * Open an interrupt window 1342 * and ensure any pending IPI or timer is serviced 1343 */ 1344 mp_disable_preemption(); 1345 ml_set_interrupts_enabled(TRUE); 1346 1347 while (cdp->cpu_signals && x86_lcpu()->rtcDeadline != EndOfAllTime) 1348 cpu_pause(); 1349 /* 1350 * Ensure there's no remaining timer deadline set 1351 * - AICPM may have left one active. 1352 */ 1353 setPop(0); 1354 1355 ml_set_interrupts_enabled(FALSE); 1356 mp_enable_preemption(); 1357 1358 KERNEL_DEBUG_CONSTANT( 1359 TRACE_MP_CPU_DEACTIVATE | DBG_FUNC_END, 1360 0, 0, 0, 0, 0); 1361} 1362 1363int pmsafe_debug = 1; 1364 1365#if MACH_KDP 1366volatile boolean_t mp_kdp_trap = FALSE; 1367volatile unsigned long mp_kdp_ncpus; 1368boolean_t mp_kdp_state; 1369 1370 1371void 1372mp_kdp_enter(void) 1373{ 1374 unsigned int cpu; 1375 unsigned int ncpus = 0; 1376 unsigned int my_cpu; 1377 uint64_t tsc_timeout; 1378 1379 DBG("mp_kdp_enter()\n"); 1380 1381 /* 1382 * Here to enter the debugger. 1383 * In case of races, only one cpu is allowed to enter kdp after 1384 * stopping others. 1385 */ 1386 mp_kdp_state = ml_set_interrupts_enabled(FALSE); 1387 my_cpu = cpu_number(); 1388 1389 if (my_cpu == (unsigned) debugger_cpu) { 1390 kprintf("\n\nRECURSIVE DEBUGGER ENTRY DETECTED\n\n"); 1391 kdp_reset(); 1392 return; 1393 } 1394 1395 cpu_datap(my_cpu)->debugger_entry_time = mach_absolute_time(); 1396 simple_lock(&mp_kdp_lock); 1397 1398 if (pmsafe_debug && !kdp_snapshot) 1399 pmSafeMode(¤t_cpu_datap()->lcpu, PM_SAFE_FL_SAFE); 1400 1401 while (mp_kdp_trap) { 1402 simple_unlock(&mp_kdp_lock); 1403 DBG("mp_kdp_enter() race lost\n"); 1404#if MACH_KDP 1405 mp_kdp_wait(TRUE, FALSE); 1406#endif 1407 simple_lock(&mp_kdp_lock); 1408 } 1409 debugger_cpu = my_cpu; 1410 ncpus = 1; 1411 mp_kdp_ncpus = 1; /* self */ 1412 mp_kdp_trap = TRUE; 1413 debugger_entry_time = cpu_datap(my_cpu)->debugger_entry_time; 1414 simple_unlock(&mp_kdp_lock); 1415 1416 /* 1417 * Deliver a nudge to other cpus, counting how many 1418 */ 1419 DBG("mp_kdp_enter() signaling other processors\n"); 1420 if (force_immediate_debugger_NMI == FALSE) { 1421 for (cpu = 0; cpu < real_ncpus; cpu++) { 1422 if (cpu == my_cpu || !cpu_datap(cpu)->cpu_running) 1423 continue; 1424 ncpus++; 1425 i386_signal_cpu(cpu, MP_KDP, ASYNC); 1426 } 1427 /* 1428 * Wait other processors to synchronize 1429 */ 1430 DBG("mp_kdp_enter() waiting for (%d) processors to suspend\n", ncpus); 1431 1432 /* 1433 * This timeout is rather arbitrary; we don't want to NMI 1434 * processors that are executing at potentially 1435 * "unsafe-to-interrupt" points such as the trampolines, 1436 * but neither do we want to lose state by waiting too long. 1437 */ 1438 tsc_timeout = rdtsc64() + (ncpus * 1000 * 1000 * 10ULL); 1439 1440 if (virtualized) 1441 tsc_timeout = ~0ULL; 1442 1443 while (mp_kdp_ncpus != ncpus && rdtsc64() < tsc_timeout) { 1444 /* 1445 * A TLB shootdown request may be pending--this would 1446 * result in the requesting processor waiting in 1447 * PMAP_UPDATE_TLBS() until this processor deals with it. 1448 * Process it, so it can now enter mp_kdp_wait() 1449 */ 1450 handle_pending_TLB_flushes(); 1451 cpu_pause(); 1452 } 1453 /* If we've timed out, and some processor(s) are still unresponsive, 1454 * interrupt them with an NMI via the local APIC. 1455 */ 1456 if (mp_kdp_ncpus != ncpus) { 1457 for (cpu = 0; cpu < real_ncpus; cpu++) { 1458 if (cpu == my_cpu || !cpu_datap(cpu)->cpu_running) 1459 continue; 1460 if (cpu_signal_pending(cpu, MP_KDP)) 1461 cpu_NMI_interrupt(cpu); 1462 } 1463 } 1464 } 1465 else 1466 for (cpu = 0; cpu < real_ncpus; cpu++) { 1467 if (cpu == my_cpu || !cpu_datap(cpu)->cpu_running) 1468 continue; 1469 cpu_NMI_interrupt(cpu); 1470 } 1471 1472 DBG("mp_kdp_enter() %d processors done %s\n", 1473 (int)mp_kdp_ncpus, (mp_kdp_ncpus == ncpus) ? "OK" : "timed out"); 1474 1475 postcode(MP_KDP_ENTER); 1476} 1477 1478static boolean_t 1479cpu_signal_pending(int cpu, mp_event_t event) 1480{ 1481 volatile int *signals = &cpu_datap(cpu)->cpu_signals; 1482 boolean_t retval = FALSE; 1483 1484 if (i_bit(event, signals)) 1485 retval = TRUE; 1486 return retval; 1487} 1488 1489long kdp_x86_xcpu_invoke(const uint16_t lcpu, kdp_x86_xcpu_func_t func, 1490 void *arg0, void *arg1) 1491{ 1492 if (lcpu > (real_ncpus - 1)) 1493 return -1; 1494 1495 if (func == NULL) 1496 return -1; 1497 1498 kdp_xcpu_call_func.func = func; 1499 kdp_xcpu_call_func.ret = -1; 1500 kdp_xcpu_call_func.arg0 = arg0; 1501 kdp_xcpu_call_func.arg1 = arg1; 1502 kdp_xcpu_call_func.cpu = lcpu; 1503 DBG("Invoking function %p on CPU %d\n", func, (int32_t)lcpu); 1504 while (kdp_xcpu_call_func.cpu != KDP_XCPU_NONE) 1505 cpu_pause(); 1506 return kdp_xcpu_call_func.ret; 1507} 1508 1509static void 1510kdp_x86_xcpu_poll(void) 1511{ 1512 if ((uint16_t)cpu_number() == kdp_xcpu_call_func.cpu) { 1513 kdp_xcpu_call_func.ret = 1514 kdp_xcpu_call_func.func(kdp_xcpu_call_func.arg0, 1515 kdp_xcpu_call_func.arg1, 1516 cpu_number()); 1517 kdp_xcpu_call_func.cpu = KDP_XCPU_NONE; 1518 } 1519} 1520 1521static void 1522mp_kdp_wait(boolean_t flush, boolean_t isNMI) 1523{ 1524 DBG("mp_kdp_wait()\n"); 1525 /* If an I/O port has been specified as a debugging aid, issue a read */ 1526 panic_io_port_read(); 1527 current_cpu_datap()->debugger_ipi_time = mach_absolute_time(); 1528#if CONFIG_MCA 1529 /* If we've trapped due to a machine-check, save MCA registers */ 1530 mca_check_save(); 1531#endif 1532 1533 atomic_incl((volatile long *)&mp_kdp_ncpus, 1); 1534 while (mp_kdp_trap || (isNMI == TRUE)) { 1535 /* 1536 * A TLB shootdown request may be pending--this would result 1537 * in the requesting processor waiting in PMAP_UPDATE_TLBS() 1538 * until this processor handles it. 1539 * Process it, so it can now enter mp_kdp_wait() 1540 */ 1541 if (flush) 1542 handle_pending_TLB_flushes(); 1543 1544 kdp_x86_xcpu_poll(); 1545 cpu_pause(); 1546 } 1547 1548 atomic_decl((volatile long *)&mp_kdp_ncpus, 1); 1549 DBG("mp_kdp_wait() done\n"); 1550} 1551 1552void 1553mp_kdp_exit(void) 1554{ 1555 DBG("mp_kdp_exit()\n"); 1556 debugger_cpu = -1; 1557 atomic_decl((volatile long *)&mp_kdp_ncpus, 1); 1558 1559 debugger_exit_time = mach_absolute_time(); 1560 1561 mp_kdp_trap = FALSE; 1562 __asm__ volatile("mfence"); 1563 1564 /* Wait other processors to stop spinning. XXX needs timeout */ 1565 DBG("mp_kdp_exit() waiting for processors to resume\n"); 1566 while (mp_kdp_ncpus > 0) { 1567 /* 1568 * a TLB shootdown request may be pending... this would result in the requesting 1569 * processor waiting in PMAP_UPDATE_TLBS() until this processor deals with it. 1570 * Process it, so it can now enter mp_kdp_wait() 1571 */ 1572 handle_pending_TLB_flushes(); 1573 1574 cpu_pause(); 1575 } 1576 1577 if (pmsafe_debug && !kdp_snapshot) 1578 pmSafeMode(¤t_cpu_datap()->lcpu, PM_SAFE_FL_NORMAL); 1579 1580 debugger_exit_time = mach_absolute_time(); 1581 1582 DBG("mp_kdp_exit() done\n"); 1583 (void) ml_set_interrupts_enabled(mp_kdp_state); 1584 postcode(0); 1585} 1586#endif /* MACH_KDP */ 1587 1588boolean_t 1589mp_recent_debugger_activity() { 1590 uint64_t abstime = mach_absolute_time(); 1591 return (((abstime - debugger_entry_time) < LastDebuggerEntryAllowance) || 1592 ((abstime - debugger_exit_time) < LastDebuggerEntryAllowance)); 1593} 1594 1595/*ARGSUSED*/ 1596void 1597init_ast_check( 1598 __unused processor_t processor) 1599{ 1600} 1601 1602void 1603cause_ast_check( 1604 processor_t processor) 1605{ 1606 int cpu = processor->cpu_id; 1607 1608 if (cpu != cpu_number()) { 1609 i386_signal_cpu(cpu, MP_AST, ASYNC); 1610 KERNEL_DEBUG_CONSTANT(MACHDBG_CODE(DBG_MACH_SCHED, MACH_REMOTE_AST), cpu, 1, 0, 0, 0); 1611 } 1612} 1613 1614void 1615slave_machine_init(void *param) 1616{ 1617 /* 1618 * Here in process context, but with interrupts disabled. 1619 */ 1620 DBG("slave_machine_init() CPU%d\n", get_cpu_number()); 1621 1622 if (param == FULL_SLAVE_INIT) { 1623 /* 1624 * Cold start 1625 */ 1626 clock_init(); 1627 cpu_machine_init(); /* Interrupts enabled hereafter */ 1628 mp_cpus_call_cpu_init(); 1629 } else { 1630 cpu_machine_init(); /* Interrupts enabled hereafter */ 1631 } 1632} 1633 1634#undef cpu_number 1635int cpu_number(void) 1636{ 1637 return get_cpu_number(); 1638} 1639 1640static void 1641cpu_prewarm_init() 1642{ 1643 int i; 1644 1645 simple_lock_init(&cpu_warm_lock, 0); 1646 queue_init(&cpu_warm_call_list); 1647 for (i = 0; i < NUM_CPU_WARM_CALLS; i++) { 1648 enqueue_head(&cpu_warm_call_list, (queue_entry_t)&cpu_warm_call_arr[i]); 1649 } 1650} 1651 1652static timer_call_t 1653grab_warm_timer_call() 1654{ 1655 spl_t x; 1656 timer_call_t call = NULL; 1657 1658 x = splsched(); 1659 simple_lock(&cpu_warm_lock); 1660 if (!queue_empty(&cpu_warm_call_list)) { 1661 call = (timer_call_t) dequeue_head(&cpu_warm_call_list); 1662 } 1663 simple_unlock(&cpu_warm_lock); 1664 splx(x); 1665 1666 return call; 1667} 1668 1669static void 1670free_warm_timer_call(timer_call_t call) 1671{ 1672 spl_t x; 1673 1674 x = splsched(); 1675 simple_lock(&cpu_warm_lock); 1676 enqueue_head(&cpu_warm_call_list, (queue_entry_t)call); 1677 simple_unlock(&cpu_warm_lock); 1678 splx(x); 1679} 1680 1681/* 1682 * Runs in timer call context (interrupts disabled). 1683 */ 1684static void 1685cpu_warm_timer_call_func( 1686 call_entry_param_t p0, 1687 __unused call_entry_param_t p1) 1688{ 1689 free_warm_timer_call((timer_call_t)p0); 1690 return; 1691} 1692 1693/* 1694 * Runs with interrupts disabled on the CPU we wish to warm (i.e. CPU 0). 1695 */ 1696static void 1697_cpu_warm_setup( 1698 void *arg) 1699{ 1700 cpu_warm_data_t cwdp = (cpu_warm_data_t)arg; 1701 1702 timer_call_enter(cwdp->cwd_call, cwdp->cwd_deadline, TIMER_CALL_CRITICAL | TIMER_CALL_LOCAL); 1703 cwdp->cwd_result = 0; 1704 1705 return; 1706} 1707 1708/* 1709 * Not safe to call with interrupts disabled. 1710 */ 1711kern_return_t 1712ml_interrupt_prewarm( 1713 uint64_t deadline) 1714{ 1715 struct cpu_warm_data cwd; 1716 timer_call_t call; 1717 cpu_t ct; 1718 1719 if (ml_get_interrupts_enabled() == FALSE) { 1720 panic("%s: Interrupts disabled?\n", __FUNCTION__); 1721 } 1722 1723 /* 1724 * If the platform doesn't need our help, say that we succeeded. 1725 */ 1726 if (!ml_get_interrupt_prewake_applicable()) { 1727 return KERN_SUCCESS; 1728 } 1729 1730 /* 1731 * Grab a timer call to use. 1732 */ 1733 call = grab_warm_timer_call(); 1734 if (call == NULL) { 1735 return KERN_RESOURCE_SHORTAGE; 1736 } 1737 1738 timer_call_setup(call, cpu_warm_timer_call_func, call); 1739 cwd.cwd_call = call; 1740 cwd.cwd_deadline = deadline; 1741 cwd.cwd_result = 0; 1742 1743 /* 1744 * For now, non-local interrupts happen on the master processor. 1745 */ 1746 ct = mp_cpus_call(cpu_to_cpumask(master_cpu), SYNC, _cpu_warm_setup, &cwd); 1747 if (ct == 0) { 1748 free_warm_timer_call(call); 1749 return KERN_FAILURE; 1750 } else { 1751 return cwd.cwd_result; 1752 } 1753} 1754