1/* 2 * Copyright (c) 2000-2012 Apple Inc. All rights reserved. 3 * 4 * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ 5 * 6 * This file contains Original Code and/or Modifications of Original Code 7 * as defined in and that are subject to the Apple Public Source License 8 * Version 2.0 (the 'License'). You may not use this file except in 9 * compliance with the License. The rights granted to you under the License 10 * may not be used to create, or enable the creation or redistribution of, 11 * unlawful or unlicensed copies of an Apple operating system, or to 12 * circumvent, violate, or enable the circumvention or violation of, any 13 * terms of an Apple operating system software license agreement. 14 * 15 * Please obtain a copy of the License at 16 * http://www.opensource.apple.com/apsl/ and read it before using this file. 17 * 18 * The Original Code and all software distributed under the License are 19 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER 20 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, 21 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, 22 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT. 23 * Please see the License for the specific language governing rights and 24 * limitations under the License. 25 * 26 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@ 27 */ 28/* 29 * @OSF_COPYRIGHT@ 30 */ 31 32#include <mach_rt.h> 33#include <mach_kdp.h> 34#include <mach_ldebug.h> 35#include <gprof.h> 36 37#include <mach/mach_types.h> 38#include <mach/kern_return.h> 39 40#include <kern/kern_types.h> 41#include <kern/startup.h> 42#include <kern/timer_queue.h> 43#include <kern/processor.h> 44#include <kern/cpu_number.h> 45#include <kern/cpu_data.h> 46#include <kern/assert.h> 47#include <kern/machine.h> 48#include <kern/pms.h> 49#include <kern/misc_protos.h> 50#include <kern/timer_call.h> 51#include <kern/kalloc.h> 52#include <kern/queue.h> 53#include <prng/random.h> 54 55#include <vm/vm_map.h> 56#include <vm/vm_kern.h> 57 58#include <profiling/profile-mk.h> 59 60#include <i386/bit_routines.h> 61#include <i386/proc_reg.h> 62#include <i386/cpu_threads.h> 63#include <i386/mp_desc.h> 64#include <i386/misc_protos.h> 65#include <i386/trap.h> 66#include <i386/postcode.h> 67#include <i386/machine_routines.h> 68#include <i386/mp.h> 69#include <i386/mp_events.h> 70#include <i386/lapic.h> 71#include <i386/cpuid.h> 72#include <i386/fpu.h> 73#include <i386/machine_cpu.h> 74#include <i386/pmCPU.h> 75#if CONFIG_MCA 76#include <i386/machine_check.h> 77#endif 78#include <i386/acpi.h> 79 80#include <chud/chud_xnu.h> 81#include <chud/chud_xnu_private.h> 82 83#include <sys/kdebug.h> 84 85#include <console/serial_protos.h> 86 87#if MP_DEBUG 88#define PAUSE delay(1000000) 89#define DBG(x...) kprintf(x) 90#else 91#define DBG(x...) 92#define PAUSE 93#endif /* MP_DEBUG */ 94 95/* Debugging/test trace events: */ 96#define TRACE_MP_TLB_FLUSH MACHDBG_CODE(DBG_MACH_MP, 0) 97#define TRACE_MP_CPUS_CALL MACHDBG_CODE(DBG_MACH_MP, 1) 98#define TRACE_MP_CPUS_CALL_LOCAL MACHDBG_CODE(DBG_MACH_MP, 2) 99#define TRACE_MP_CPUS_CALL_ACTION MACHDBG_CODE(DBG_MACH_MP, 3) 100#define TRACE_MP_CPUS_CALL_NOBUF MACHDBG_CODE(DBG_MACH_MP, 4) 101#define TRACE_MP_CPU_FAST_START MACHDBG_CODE(DBG_MACH_MP, 5) 102#define TRACE_MP_CPU_START MACHDBG_CODE(DBG_MACH_MP, 6) 103#define TRACE_MP_CPU_DEACTIVATE MACHDBG_CODE(DBG_MACH_MP, 7) 104 105#define ABS(v) (((v) > 0)?(v):-(v)) 106 107void slave_boot_init(void); 108void i386_cpu_IPI(int cpu); 109 110#if MACH_KDP 111static void mp_kdp_wait(boolean_t flush, boolean_t isNMI); 112#endif /* MACH_KDP */ 113static void mp_rendezvous_action(void); 114static void mp_broadcast_action(void); 115 116#if MACH_KDP 117static boolean_t cpu_signal_pending(int cpu, mp_event_t event); 118#endif /* MACH_KDP */ 119static int NMIInterruptHandler(x86_saved_state_t *regs); 120 121boolean_t smp_initialized = FALSE; 122uint32_t TSC_sync_margin = 0xFFF; 123volatile boolean_t force_immediate_debugger_NMI = FALSE; 124volatile boolean_t pmap_tlb_flush_timeout = FALSE; 125decl_simple_lock_data(,mp_kdp_lock); 126 127decl_lck_mtx_data(static, mp_cpu_boot_lock); 128lck_mtx_ext_t mp_cpu_boot_lock_ext; 129 130/* Variables needed for MP rendezvous. */ 131decl_simple_lock_data(,mp_rv_lock); 132static void (*mp_rv_setup_func)(void *arg); 133static void (*mp_rv_action_func)(void *arg); 134static void (*mp_rv_teardown_func)(void *arg); 135static void *mp_rv_func_arg; 136static volatile int mp_rv_ncpus; 137 /* Cache-aligned barriers: */ 138static volatile long mp_rv_entry __attribute__((aligned(64))); 139static volatile long mp_rv_exit __attribute__((aligned(64))); 140static volatile long mp_rv_complete __attribute__((aligned(64))); 141 142volatile uint64_t debugger_entry_time; 143volatile uint64_t debugger_exit_time; 144#if MACH_KDP 145#include <kdp/kdp.h> 146extern int kdp_snapshot; 147static struct _kdp_xcpu_call_func { 148 kdp_x86_xcpu_func_t func; 149 void *arg0, *arg1; 150 volatile long ret; 151 volatile uint16_t cpu; 152} kdp_xcpu_call_func = { 153 .cpu = KDP_XCPU_NONE 154}; 155 156#endif 157 158/* Variables needed for MP broadcast. */ 159static void (*mp_bc_action_func)(void *arg); 160static void *mp_bc_func_arg; 161static int mp_bc_ncpus; 162static volatile long mp_bc_count; 163decl_lck_mtx_data(static, mp_bc_lock); 164lck_mtx_ext_t mp_bc_lock_ext; 165static volatile int debugger_cpu = -1; 166volatile long NMIPI_acks = 0; 167volatile long NMI_count = 0; 168 169extern void NMI_cpus(void); 170 171static void mp_cpus_call_init(void); 172static void mp_cpus_call_action(void); 173static void mp_call_PM(void); 174 175static boolean_t mp_cpus_call_wait_timeout = FALSE; 176 177char mp_slave_stack[PAGE_SIZE] __attribute__((aligned(PAGE_SIZE))); // Temp stack for slave init 178 179/* PAL-related routines */ 180boolean_t i386_smp_init(int nmi_vector, i386_intr_func_t nmi_handler, 181 int ipi_vector, i386_intr_func_t ipi_handler); 182void i386_start_cpu(int lapic_id, int cpu_num); 183void i386_send_NMI(int cpu); 184 185#if GPROF 186/* 187 * Initialize dummy structs for profiling. These aren't used but 188 * allows hertz_tick() to be built with GPROF defined. 189 */ 190struct profile_vars _profile_vars; 191struct profile_vars *_profile_vars_cpus[MAX_CPUS] = { &_profile_vars }; 192#define GPROF_INIT() \ 193{ \ 194 int i; \ 195 \ 196 /* Hack to initialize pointers to unused profiling structs */ \ 197 for (i = 1; i < MAX_CPUS; i++) \ 198 _profile_vars_cpus[i] = &_profile_vars; \ 199} 200#else 201#define GPROF_INIT() 202#endif /* GPROF */ 203 204static lck_grp_t smp_lck_grp; 205static lck_grp_attr_t smp_lck_grp_attr; 206 207#define NUM_CPU_WARM_CALLS 20 208struct timer_call cpu_warm_call_arr[NUM_CPU_WARM_CALLS]; 209queue_head_t cpu_warm_call_list; 210decl_simple_lock_data(static, cpu_warm_lock); 211 212typedef struct cpu_warm_data { 213 timer_call_t cwd_call; 214 uint64_t cwd_deadline; 215 int cwd_result; 216} *cpu_warm_data_t; 217 218static void cpu_prewarm_init(void); 219static void cpu_warm_timer_call_func(call_entry_param_t p0, call_entry_param_t p1); 220static void _cpu_warm_setup(void *arg); 221static timer_call_t grab_warm_timer_call(void); 222static void free_warm_timer_call(timer_call_t call); 223 224void 225smp_init(void) 226{ 227 simple_lock_init(&mp_kdp_lock, 0); 228 simple_lock_init(&mp_rv_lock, 0); 229 lck_grp_attr_setdefault(&smp_lck_grp_attr); 230 lck_grp_init(&smp_lck_grp, "i386_smp", &smp_lck_grp_attr); 231 lck_mtx_init_ext(&mp_cpu_boot_lock, &mp_cpu_boot_lock_ext, &smp_lck_grp, LCK_ATTR_NULL); 232 lck_mtx_init_ext(&mp_bc_lock, &mp_bc_lock_ext, &smp_lck_grp, LCK_ATTR_NULL); 233 console_init(); 234 235 if(!i386_smp_init(LAPIC_NMI_INTERRUPT, NMIInterruptHandler, 236 LAPIC_VECTOR(INTERPROCESSOR), cpu_signal_handler)) 237 return; 238 239 cpu_thread_init(); 240 241 GPROF_INIT(); 242 DBGLOG_CPU_INIT(master_cpu); 243 244 mp_cpus_call_init(); 245 mp_cpus_call_cpu_init(master_cpu); 246 247 if (PE_parse_boot_argn("TSC_sync_margin", 248 &TSC_sync_margin, sizeof(TSC_sync_margin))) { 249 kprintf("TSC sync Margin 0x%x\n", TSC_sync_margin); 250 } else if (cpuid_vmm_present()) { 251 kprintf("TSC sync margin disabled\n"); 252 TSC_sync_margin = 0; 253 } 254 smp_initialized = TRUE; 255 256 cpu_prewarm_init(); 257 258 return; 259} 260 261typedef struct { 262 int target_cpu; 263 int target_lapic; 264 int starter_cpu; 265} processor_start_info_t; 266static processor_start_info_t start_info __attribute__((aligned(64))); 267 268/* 269 * Cache-alignment is to avoid cross-cpu false-sharing interference. 270 */ 271static volatile long tsc_entry_barrier __attribute__((aligned(64))); 272static volatile long tsc_exit_barrier __attribute__((aligned(64))); 273static volatile uint64_t tsc_target __attribute__((aligned(64))); 274 275/* 276 * Poll a CPU to see when it has marked itself as running. 277 */ 278static void 279mp_wait_for_cpu_up(int slot_num, unsigned int iters, unsigned int usecdelay) 280{ 281 while (iters-- > 0) { 282 if (cpu_datap(slot_num)->cpu_running) 283 break; 284 delay(usecdelay); 285 } 286} 287 288/* 289 * Quickly bring a CPU back online which has been halted. 290 */ 291kern_return_t 292intel_startCPU_fast(int slot_num) 293{ 294 kern_return_t rc; 295 296 /* 297 * Try to perform a fast restart 298 */ 299 rc = pmCPUExitHalt(slot_num); 300 if (rc != KERN_SUCCESS) 301 /* 302 * The CPU was not eligible for a fast restart. 303 */ 304 return(rc); 305 306 KERNEL_DEBUG_CONSTANT( 307 TRACE_MP_CPU_FAST_START | DBG_FUNC_START, 308 slot_num, 0, 0, 0, 0); 309 310 /* 311 * Wait until the CPU is back online. 312 */ 313 mp_disable_preemption(); 314 315 /* 316 * We use short pauses (1us) for low latency. 30,000 iterations is 317 * longer than a full restart would require so it should be more 318 * than long enough. 319 */ 320 321 mp_wait_for_cpu_up(slot_num, 30000, 1); 322 mp_enable_preemption(); 323 324 KERNEL_DEBUG_CONSTANT( 325 TRACE_MP_CPU_FAST_START | DBG_FUNC_END, 326 slot_num, cpu_datap(slot_num)->cpu_running, 0, 0, 0); 327 328 /* 329 * Check to make sure that the CPU is really running. If not, 330 * go through the slow path. 331 */ 332 if (cpu_datap(slot_num)->cpu_running) 333 return(KERN_SUCCESS); 334 else 335 return(KERN_FAILURE); 336} 337 338static void 339started_cpu(void) 340{ 341 /* Here on the started cpu with cpu_running set TRUE */ 342 343 if (TSC_sync_margin && 344 start_info.target_cpu == cpu_number()) { 345 /* 346 * I've just started-up, synchronize again with the starter cpu 347 * and then snap my TSC. 348 */ 349 tsc_target = 0; 350 atomic_decl(&tsc_entry_barrier, 1); 351 while (tsc_entry_barrier != 0) 352 ; /* spin for starter and target at barrier */ 353 tsc_target = rdtsc64(); 354 atomic_decl(&tsc_exit_barrier, 1); 355 } 356} 357 358static void 359start_cpu(void *arg) 360{ 361 int i = 1000; 362 processor_start_info_t *psip = (processor_start_info_t *) arg; 363 364 /* Ignore this if the current processor is not the starter */ 365 if (cpu_number() != psip->starter_cpu) 366 return; 367 368 DBG("start_cpu(%p) about to start cpu %d, lapic %d\n", 369 arg, psip->target_cpu, psip->target_lapic); 370 371 KERNEL_DEBUG_CONSTANT( 372 TRACE_MP_CPU_START | DBG_FUNC_START, 373 psip->target_cpu, 374 psip->target_lapic, 0, 0, 0); 375 376 i386_start_cpu(psip->target_lapic, psip->target_cpu); 377 378#ifdef POSTCODE_DELAY 379 /* Wait much longer if postcodes are displayed for a delay period. */ 380 i *= 10000; 381#endif 382 DBG("start_cpu(%p) about to wait for cpu %d\n", 383 arg, psip->target_cpu); 384 385 mp_wait_for_cpu_up(psip->target_cpu, i*100, 100); 386 387 KERNEL_DEBUG_CONSTANT( 388 TRACE_MP_CPU_START | DBG_FUNC_END, 389 psip->target_cpu, 390 cpu_datap(psip->target_cpu)->cpu_running, 0, 0, 0); 391 392 if (TSC_sync_margin && 393 cpu_datap(psip->target_cpu)->cpu_running) { 394 /* 395 * Compare the TSC from the started processor with ours. 396 * Report and log/panic if it diverges by more than 397 * TSC_sync_margin (TSC_SYNC_MARGIN) ticks. This margin 398 * can be overriden by boot-arg (with 0 meaning no checking). 399 */ 400 uint64_t tsc_starter; 401 int64_t tsc_delta; 402 atomic_decl(&tsc_entry_barrier, 1); 403 while (tsc_entry_barrier != 0) 404 ; /* spin for both processors at barrier */ 405 tsc_starter = rdtsc64(); 406 atomic_decl(&tsc_exit_barrier, 1); 407 while (tsc_exit_barrier != 0) 408 ; /* spin for target to store its TSC */ 409 tsc_delta = tsc_target - tsc_starter; 410 kprintf("TSC sync for cpu %d: 0x%016llx delta 0x%llx (%lld)\n", 411 psip->target_cpu, tsc_target, tsc_delta, tsc_delta); 412 if (ABS(tsc_delta) > (int64_t) TSC_sync_margin) { 413#if DEBUG 414 panic( 415#else 416 printf( 417#endif 418 "Unsynchronized TSC for cpu %d: " 419 "0x%016llx, delta 0x%llx\n", 420 psip->target_cpu, tsc_target, tsc_delta); 421 } 422 } 423} 424 425kern_return_t 426intel_startCPU( 427 int slot_num) 428{ 429 int lapic = cpu_to_lapic[slot_num]; 430 boolean_t istate; 431 432 assert(lapic != -1); 433 434 DBGLOG_CPU_INIT(slot_num); 435 436 DBG("intel_startCPU(%d) lapic_id=%d\n", slot_num, lapic); 437 DBG("IdlePTD(%p): 0x%x\n", &IdlePTD, (int) (uintptr_t)IdlePTD); 438 439 /* 440 * Initialize (or re-initialize) the descriptor tables for this cpu. 441 * Propagate processor mode to slave. 442 */ 443 cpu_desc_init64(cpu_datap(slot_num)); 444 445 /* Serialize use of the slave boot stack, etc. */ 446 lck_mtx_lock(&mp_cpu_boot_lock); 447 448 istate = ml_set_interrupts_enabled(FALSE); 449 if (slot_num == get_cpu_number()) { 450 ml_set_interrupts_enabled(istate); 451 lck_mtx_unlock(&mp_cpu_boot_lock); 452 return KERN_SUCCESS; 453 } 454 455 start_info.starter_cpu = cpu_number(); 456 start_info.target_cpu = slot_num; 457 start_info.target_lapic = lapic; 458 tsc_entry_barrier = 2; 459 tsc_exit_barrier = 2; 460 461 /* 462 * Perform the processor startup sequence with all running 463 * processors rendezvous'ed. This is required during periods when 464 * the cache-disable bit is set for MTRR/PAT initialization. 465 */ 466 mp_rendezvous_no_intrs(start_cpu, (void *) &start_info); 467 468 start_info.target_cpu = 0; 469 470 ml_set_interrupts_enabled(istate); 471 lck_mtx_unlock(&mp_cpu_boot_lock); 472 473 if (!cpu_datap(slot_num)->cpu_running) { 474 kprintf("Failed to start CPU %02d\n", slot_num); 475 printf("Failed to start CPU %02d, rebooting...\n", slot_num); 476 delay(1000000); 477 halt_cpu(); 478 return KERN_SUCCESS; 479 } else { 480 kprintf("Started cpu %d (lapic id %08x)\n", slot_num, lapic); 481 return KERN_SUCCESS; 482 } 483} 484 485#if MP_DEBUG 486cpu_signal_event_log_t *cpu_signal[MAX_CPUS]; 487cpu_signal_event_log_t *cpu_handle[MAX_CPUS]; 488 489MP_EVENT_NAME_DECL(); 490 491#endif /* MP_DEBUG */ 492 493/* 494 * Note: called with NULL state when polling for TLB flush and cross-calls. 495 */ 496int 497cpu_signal_handler(x86_saved_state_t *regs) 498{ 499#if !MACH_KDP 500#pragma unused (regs) 501#endif /* !MACH_KDP */ 502 int my_cpu; 503 volatile int *my_word; 504 505 SCHED_STATS_IPI(current_processor()); 506 507 my_cpu = cpu_number(); 508 my_word = &cpu_data_ptr[my_cpu]->cpu_signals; 509 /* Store the initial set of signals for diagnostics. New 510 * signals could arrive while these are being processed 511 * so it's no more than a hint. 512 */ 513 514 cpu_data_ptr[my_cpu]->cpu_prior_signals = *my_word; 515 516 do { 517#if MACH_KDP 518 if (i_bit(MP_KDP, my_word)) { 519 DBGLOG(cpu_handle,my_cpu,MP_KDP); 520 i_bit_clear(MP_KDP, my_word); 521/* Ensure that the i386_kernel_state at the base of the 522 * current thread's stack (if any) is synchronized with the 523 * context at the moment of the interrupt, to facilitate 524 * access through the debugger. 525 */ 526 sync_iss_to_iks(regs); 527 if (pmsafe_debug && !kdp_snapshot) 528 pmSafeMode(¤t_cpu_datap()->lcpu, PM_SAFE_FL_SAFE); 529 mp_kdp_wait(TRUE, FALSE); 530 if (pmsafe_debug && !kdp_snapshot) 531 pmSafeMode(¤t_cpu_datap()->lcpu, PM_SAFE_FL_NORMAL); 532 } else 533#endif /* MACH_KDP */ 534 if (i_bit(MP_TLB_FLUSH, my_word)) { 535 DBGLOG(cpu_handle,my_cpu,MP_TLB_FLUSH); 536 i_bit_clear(MP_TLB_FLUSH, my_word); 537 pmap_update_interrupt(); 538 } else if (i_bit(MP_RENDEZVOUS, my_word)) { 539 DBGLOG(cpu_handle,my_cpu,MP_RENDEZVOUS); 540 i_bit_clear(MP_RENDEZVOUS, my_word); 541 mp_rendezvous_action(); 542 } else if (i_bit(MP_BROADCAST, my_word)) { 543 DBGLOG(cpu_handle,my_cpu,MP_BROADCAST); 544 i_bit_clear(MP_BROADCAST, my_word); 545 mp_broadcast_action(); 546 } else if (i_bit(MP_CHUD, my_word)) { 547 DBGLOG(cpu_handle,my_cpu,MP_CHUD); 548 i_bit_clear(MP_CHUD, my_word); 549 chudxnu_cpu_signal_handler(); 550 } else if (i_bit(MP_CALL, my_word)) { 551 DBGLOG(cpu_handle,my_cpu,MP_CALL); 552 i_bit_clear(MP_CALL, my_word); 553 mp_cpus_call_action(); 554 } else if (i_bit(MP_CALL_PM, my_word)) { 555 DBGLOG(cpu_handle,my_cpu,MP_CALL_PM); 556 i_bit_clear(MP_CALL_PM, my_word); 557 mp_call_PM(); 558 } 559 if (regs == NULL) { 560 /* Called to poll only for cross-calls and TLB flush */ 561 break; 562 } else if (i_bit(MP_AST, my_word)) { 563 DBGLOG(cpu_handle,my_cpu,MP_AST); 564 i_bit_clear(MP_AST, my_word); 565 ast_check(cpu_to_processor(my_cpu)); 566 } 567 } while (*my_word); 568 569 return 0; 570} 571 572extern void kprintf_break_lock(void); 573static int 574NMIInterruptHandler(x86_saved_state_t *regs) 575{ 576 void *stackptr; 577 578 if (panic_active() && !panicDebugging) { 579 if (pmsafe_debug) 580 pmSafeMode(¤t_cpu_datap()->lcpu, PM_SAFE_FL_SAFE); 581 for(;;) 582 cpu_pause(); 583 } 584 585 atomic_incl(&NMIPI_acks, 1); 586 atomic_incl(&NMI_count, 1); 587 sync_iss_to_iks_unconditionally(regs); 588 __asm__ volatile("movq %%rbp, %0" : "=m" (stackptr)); 589 590 if (cpu_number() == debugger_cpu) 591 goto NMExit; 592 593 if (spinlock_timed_out) { 594 char pstr[192]; 595 snprintf(&pstr[0], sizeof(pstr), "Panic(CPU %d): NMIPI for spinlock acquisition timeout, spinlock: %p, spinlock owner: %p, current_thread: %p, spinlock_owner_cpu: 0x%x\n", cpu_number(), spinlock_timed_out, (void *) spinlock_timed_out->interlock.lock_data, current_thread(), spinlock_owner_cpu); 596 panic_i386_backtrace(stackptr, 64, &pstr[0], TRUE, regs); 597 } else if (mp_cpus_call_wait_timeout) { 598 char pstr[192]; 599 snprintf(&pstr[0], sizeof(pstr), "Panic(CPU %d): Unresponsive processor, this CPU timed-out during cross-call\n", cpu_number()); 600 panic_i386_backtrace(stackptr, 64, &pstr[0], TRUE, regs); 601 } else if (pmap_tlb_flush_timeout == TRUE) { 602 char pstr[128]; 603 snprintf(&pstr[0], sizeof(pstr), "Panic(CPU %d): Unresponsive processor (this CPU did not acknowledge interrupts) TLB state:0x%x\n", cpu_number(), current_cpu_datap()->cpu_tlb_invalid); 604 panic_i386_backtrace(stackptr, 48, &pstr[0], TRUE, regs); 605 } 606 607#if MACH_KDP 608 if (pmsafe_debug && !kdp_snapshot) 609 pmSafeMode(¤t_cpu_datap()->lcpu, PM_SAFE_FL_SAFE); 610 current_cpu_datap()->cpu_NMI_acknowledged = TRUE; 611 i_bit_clear(MP_KDP, ¤t_cpu_datap()->cpu_signals); 612 if (pmap_tlb_flush_timeout || 613 spinlock_timed_out || 614 mp_cpus_call_wait_timeout || 615 panic_active()) { 616 mp_kdp_wait(FALSE, TRUE); 617 } else if (virtualized && (debug_boot_arg & DB_NMI)) { 618 /* 619 * Under a VMM with the debug boot-arg set, drop into kdp. 620 * Since an NMI is involved, there's a risk of contending with 621 * a panic. And side-effects of NMIs may result in entry into, 622 * and continuing from, the debugger being unreliable. 623 */ 624 kprintf_break_lock(); 625 kprintf("Debugger entry requested by NMI\n"); 626 kdp_i386_trap(T_DEBUG, saved_state64(regs), 0, 0); 627 printf("Debugger entry requested by NMI\n"); 628 } else { 629 mp_kdp_wait(FALSE, FALSE); 630 } 631 if (pmsafe_debug && !kdp_snapshot) 632 pmSafeMode(¤t_cpu_datap()->lcpu, PM_SAFE_FL_NORMAL); 633#endif 634NMExit: 635 return 1; 636} 637 638 639/* 640 * cpu_interrupt is really just to be used by the scheduler to 641 * get a CPU's attention it may not always issue an IPI. If an 642 * IPI is always needed then use i386_cpu_IPI. 643 */ 644void 645cpu_interrupt(int cpu) 646{ 647 boolean_t did_IPI = FALSE; 648 649 if (smp_initialized 650 && pmCPUExitIdle(cpu_datap(cpu))) { 651 i386_cpu_IPI(cpu); 652 did_IPI = TRUE; 653 } 654 655 KERNEL_DEBUG_CONSTANT(MACHDBG_CODE(DBG_MACH_SCHED, MACH_REMOTE_AST), cpu, did_IPI, 0, 0, 0); 656} 657 658/* 659 * Send a true NMI via the local APIC to the specified CPU. 660 */ 661void 662cpu_NMI_interrupt(int cpu) 663{ 664 if (smp_initialized) { 665 i386_send_NMI(cpu); 666 } 667} 668 669void 670NMI_cpus(void) 671{ 672 unsigned int cpu; 673 boolean_t intrs_enabled; 674 uint64_t tsc_timeout; 675 676 intrs_enabled = ml_set_interrupts_enabled(FALSE); 677 678 for (cpu = 0; cpu < real_ncpus; cpu++) { 679 if (!cpu_datap(cpu)->cpu_running) 680 continue; 681 cpu_datap(cpu)->cpu_NMI_acknowledged = FALSE; 682 cpu_NMI_interrupt(cpu); 683 tsc_timeout = !machine_timeout_suspended() ? 684 rdtsc64() + (1000 * 1000 * 1000 * 10ULL) : 685 ~0ULL; 686 while (!cpu_datap(cpu)->cpu_NMI_acknowledged) { 687 handle_pending_TLB_flushes(); 688 cpu_pause(); 689 if (rdtsc64() > tsc_timeout) 690 panic("NMI_cpus() timeout cpu %d", cpu); 691 } 692 cpu_datap(cpu)->cpu_NMI_acknowledged = FALSE; 693 } 694 695 ml_set_interrupts_enabled(intrs_enabled); 696} 697 698static void (* volatile mp_PM_func)(void) = NULL; 699 700static void 701mp_call_PM(void) 702{ 703 assert(!ml_get_interrupts_enabled()); 704 705 if (mp_PM_func != NULL) 706 mp_PM_func(); 707} 708 709void 710cpu_PM_interrupt(int cpu) 711{ 712 assert(!ml_get_interrupts_enabled()); 713 714 if (mp_PM_func != NULL) { 715 if (cpu == cpu_number()) 716 mp_PM_func(); 717 else 718 i386_signal_cpu(cpu, MP_CALL_PM, ASYNC); 719 } 720} 721 722void 723PM_interrupt_register(void (*fn)(void)) 724{ 725 mp_PM_func = fn; 726} 727 728void 729i386_signal_cpu(int cpu, mp_event_t event, mp_sync_t mode) 730{ 731 volatile int *signals = &cpu_datap(cpu)->cpu_signals; 732 uint64_t tsc_timeout; 733 734 735 if (!cpu_datap(cpu)->cpu_running) 736 return; 737 738 if (event == MP_TLB_FLUSH) 739 KERNEL_DEBUG(TRACE_MP_TLB_FLUSH | DBG_FUNC_START, cpu, 0, 0, 0, 0); 740 741 DBGLOG(cpu_signal, cpu, event); 742 743 i_bit_set(event, signals); 744 i386_cpu_IPI(cpu); 745 if (mode == SYNC) { 746 again: 747 tsc_timeout = !machine_timeout_suspended() ? 748 rdtsc64() + (1000*1000*1000) : 749 ~0ULL; 750 while (i_bit(event, signals) && rdtsc64() < tsc_timeout) { 751 cpu_pause(); 752 } 753 if (i_bit(event, signals)) { 754 DBG("i386_signal_cpu(%d, 0x%x, SYNC) timed out\n", 755 cpu, event); 756 goto again; 757 } 758 } 759 if (event == MP_TLB_FLUSH) 760 KERNEL_DEBUG(TRACE_MP_TLB_FLUSH | DBG_FUNC_END, cpu, 0, 0, 0, 0); 761} 762 763/* 764 * Send event to all running cpus. 765 * Called with the topology locked. 766 */ 767void 768i386_signal_cpus(mp_event_t event, mp_sync_t mode) 769{ 770 unsigned int cpu; 771 unsigned int my_cpu = cpu_number(); 772 773 assert(hw_lock_held((hw_lock_t)&x86_topo_lock)); 774 775 for (cpu = 0; cpu < real_ncpus; cpu++) { 776 if (cpu == my_cpu || !cpu_datap(cpu)->cpu_running) 777 continue; 778 i386_signal_cpu(cpu, event, mode); 779 } 780} 781 782/* 783 * Return the number of running cpus. 784 * Called with the topology locked. 785 */ 786int 787i386_active_cpus(void) 788{ 789 unsigned int cpu; 790 unsigned int ncpus = 0; 791 792 assert(hw_lock_held((hw_lock_t)&x86_topo_lock)); 793 794 for (cpu = 0; cpu < real_ncpus; cpu++) { 795 if (cpu_datap(cpu)->cpu_running) 796 ncpus++; 797 } 798 return(ncpus); 799} 800 801/* 802 * Helper function called when busy-waiting: panic if too long 803 * a TSC-based time has elapsed since the start of the spin. 804 */ 805static boolean_t 806mp_spin_timeout(uint64_t tsc_start) 807{ 808 uint64_t tsc_timeout; 809 810 cpu_pause(); 811 if (machine_timeout_suspended()) 812 return FALSE; 813 814 /* 815 * The timeout is 4 * the spinlock timeout period 816 * unless we have serial console printing (kprintf) enabled 817 * in which case we allow an even greater margin. 818 */ 819 tsc_timeout = disable_serial_output ? (uint64_t) LockTimeOutTSC << 2 820 : (uint64_t) LockTimeOutTSC << 4; 821 return (rdtsc64() > tsc_start + tsc_timeout); 822} 823 824/* 825 * Helper function to take a spinlock while ensuring that incoming IPIs 826 * are still serviced if interrupts are masked while we spin. 827 */ 828static boolean_t 829mp_safe_spin_lock(usimple_lock_t lock) 830{ 831 if (ml_get_interrupts_enabled()) { 832 simple_lock(lock); 833 return TRUE; 834 } else { 835 uint64_t tsc_spin_start = rdtsc64(); 836 while (!simple_lock_try(lock)) { 837 cpu_signal_handler(NULL); 838 if (mp_spin_timeout(tsc_spin_start)) { 839 uint32_t lock_cpu; 840 uintptr_t lowner = (uintptr_t) 841 lock->interlock.lock_data; 842 spinlock_timed_out = lock; 843 lock_cpu = spinlock_timeout_NMI(lowner); 844 panic("mp_safe_spin_lock() timed out," 845 " lock: %p, owner thread: 0x%lx," 846 " current_thread: %p, owner on CPU 0x%x", 847 lock, lowner, 848 current_thread(), lock_cpu); 849 } 850 } 851 return FALSE; 852 } 853} 854 855/* 856 * All-CPU rendezvous: 857 * - CPUs are signalled, 858 * - all execute the setup function (if specified), 859 * - rendezvous (i.e. all cpus reach a barrier), 860 * - all execute the action function (if specified), 861 * - rendezvous again, 862 * - execute the teardown function (if specified), and then 863 * - resume. 864 * 865 * Note that the supplied external functions _must_ be reentrant and aware 866 * that they are running in parallel and in an unknown lock context. 867 */ 868 869static void 870mp_rendezvous_action(void) 871{ 872 boolean_t intrs_enabled; 873 uint64_t tsc_spin_start; 874 875 /* setup function */ 876 if (mp_rv_setup_func != NULL) 877 mp_rv_setup_func(mp_rv_func_arg); 878 879 intrs_enabled = ml_get_interrupts_enabled(); 880 881 /* spin on entry rendezvous */ 882 atomic_incl(&mp_rv_entry, 1); 883 tsc_spin_start = rdtsc64(); 884 while (mp_rv_entry < mp_rv_ncpus) { 885 /* poll for pesky tlb flushes if interrupts disabled */ 886 if (!intrs_enabled) 887 handle_pending_TLB_flushes(); 888 if (mp_spin_timeout(tsc_spin_start)) 889 panic("mp_rendezvous_action() entry"); 890 } 891 892 /* action function */ 893 if (mp_rv_action_func != NULL) 894 mp_rv_action_func(mp_rv_func_arg); 895 896 /* spin on exit rendezvous */ 897 atomic_incl(&mp_rv_exit, 1); 898 tsc_spin_start = rdtsc64(); 899 while (mp_rv_exit < mp_rv_ncpus) { 900 if (!intrs_enabled) 901 handle_pending_TLB_flushes(); 902 if (mp_spin_timeout(tsc_spin_start)) 903 panic("mp_rendezvous_action() exit"); 904 } 905 906 /* teardown function */ 907 if (mp_rv_teardown_func != NULL) 908 mp_rv_teardown_func(mp_rv_func_arg); 909 910 /* Bump completion count */ 911 atomic_incl(&mp_rv_complete, 1); 912} 913 914void 915mp_rendezvous(void (*setup_func)(void *), 916 void (*action_func)(void *), 917 void (*teardown_func)(void *), 918 void *arg) 919{ 920 uint64_t tsc_spin_start; 921 922 if (!smp_initialized) { 923 if (setup_func != NULL) 924 setup_func(arg); 925 if (action_func != NULL) 926 action_func(arg); 927 if (teardown_func != NULL) 928 teardown_func(arg); 929 return; 930 } 931 932 /* obtain rendezvous lock */ 933 (void) mp_safe_spin_lock(&mp_rv_lock); 934 935 /* set static function pointers */ 936 mp_rv_setup_func = setup_func; 937 mp_rv_action_func = action_func; 938 mp_rv_teardown_func = teardown_func; 939 mp_rv_func_arg = arg; 940 941 mp_rv_entry = 0; 942 mp_rv_exit = 0; 943 mp_rv_complete = 0; 944 945 /* 946 * signal other processors, which will call mp_rendezvous_action() 947 * with interrupts disabled 948 */ 949 (void) mp_safe_spin_lock(&x86_topo_lock); 950 mp_rv_ncpus = i386_active_cpus(); 951 i386_signal_cpus(MP_RENDEZVOUS, ASYNC); 952 simple_unlock(&x86_topo_lock); 953 954 /* call executor function on this cpu */ 955 mp_rendezvous_action(); 956 957 /* 958 * Spin for everyone to complete. 959 * This is necessary to ensure that all processors have proceeded 960 * from the exit barrier before we release the rendezvous structure. 961 */ 962 tsc_spin_start = rdtsc64(); 963 while (mp_rv_complete < mp_rv_ncpus) { 964 if (mp_spin_timeout(tsc_spin_start)) 965 panic("mp_rendezvous() timeout"); 966 } 967 968 /* Tidy up */ 969 mp_rv_setup_func = NULL; 970 mp_rv_action_func = NULL; 971 mp_rv_teardown_func = NULL; 972 mp_rv_func_arg = NULL; 973 974 /* release lock */ 975 simple_unlock(&mp_rv_lock); 976} 977 978void 979mp_rendezvous_break_lock(void) 980{ 981 simple_lock_init(&mp_rv_lock, 0); 982} 983 984static void 985setup_disable_intrs(__unused void * param_not_used) 986{ 987 /* disable interrupts before the first barrier */ 988 boolean_t intr = ml_set_interrupts_enabled(FALSE); 989 990 current_cpu_datap()->cpu_iflag = intr; 991 DBG("CPU%d: %s\n", get_cpu_number(), __FUNCTION__); 992} 993 994static void 995teardown_restore_intrs(__unused void * param_not_used) 996{ 997 /* restore interrupt flag following MTRR changes */ 998 ml_set_interrupts_enabled(current_cpu_datap()->cpu_iflag); 999 DBG("CPU%d: %s\n", get_cpu_number(), __FUNCTION__); 1000} 1001 1002/* 1003 * A wrapper to mp_rendezvous() to call action_func() with interrupts disabled. 1004 * This is exported for use by kexts. 1005 */ 1006void 1007mp_rendezvous_no_intrs( 1008 void (*action_func)(void *), 1009 void *arg) 1010{ 1011 mp_rendezvous(setup_disable_intrs, 1012 action_func, 1013 teardown_restore_intrs, 1014 arg); 1015} 1016 1017 1018typedef struct { 1019 queue_chain_t link; /* queue linkage */ 1020 void (*func)(void *,void *); /* routine to call */ 1021 void *arg0; /* routine's 1st arg */ 1022 void *arg1; /* routine's 2nd arg */ 1023 cpumask_t *maskp; /* completion response mask */ 1024} mp_call_t; 1025 1026 1027typedef struct { 1028 queue_head_t queue; 1029 decl_simple_lock_data(, lock); 1030} mp_call_queue_t; 1031#define MP_CPUS_CALL_BUFS_PER_CPU MAX_CPUS 1032static mp_call_queue_t mp_cpus_call_freelist; 1033static mp_call_queue_t mp_cpus_call_head[MAX_CPUS]; 1034 1035static inline boolean_t 1036mp_call_head_lock(mp_call_queue_t *cqp) 1037{ 1038 boolean_t intrs_enabled; 1039 1040 intrs_enabled = ml_set_interrupts_enabled(FALSE); 1041 simple_lock(&cqp->lock); 1042 1043 return intrs_enabled; 1044} 1045 1046void 1047mp_cpus_NMIPI(cpumask_t cpu_mask) { 1048 unsigned int cpu, cpu_bit; 1049 uint64_t deadline; 1050 1051 for (cpu = 0, cpu_bit = 1; cpu < real_ncpus; cpu++, cpu_bit <<= 1) { 1052 if (cpu_mask & cpu_bit) 1053 cpu_NMI_interrupt(cpu); 1054 } 1055 deadline = mach_absolute_time() + (LockTimeOut); 1056 while (mach_absolute_time() < deadline) 1057 cpu_pause(); 1058} 1059 1060#if MACH_ASSERT 1061static inline boolean_t 1062mp_call_head_is_locked(mp_call_queue_t *cqp) 1063{ 1064 return !ml_get_interrupts_enabled() && 1065 hw_lock_held((hw_lock_t)&cqp->lock); 1066} 1067#endif 1068 1069static inline void 1070mp_call_head_unlock(mp_call_queue_t *cqp, boolean_t intrs_enabled) 1071{ 1072 simple_unlock(&cqp->lock); 1073 ml_set_interrupts_enabled(intrs_enabled); 1074} 1075 1076static inline mp_call_t * 1077mp_call_alloc(void) 1078{ 1079 mp_call_t *callp = NULL; 1080 boolean_t intrs_enabled; 1081 mp_call_queue_t *cqp = &mp_cpus_call_freelist; 1082 1083 intrs_enabled = mp_call_head_lock(cqp); 1084 if (!queue_empty(&cqp->queue)) 1085 queue_remove_first(&cqp->queue, callp, typeof(callp), link); 1086 mp_call_head_unlock(cqp, intrs_enabled); 1087 1088 return callp; 1089} 1090 1091static inline void 1092mp_call_free(mp_call_t *callp) 1093{ 1094 boolean_t intrs_enabled; 1095 mp_call_queue_t *cqp = &mp_cpus_call_freelist; 1096 1097 intrs_enabled = mp_call_head_lock(cqp); 1098 queue_enter_first(&cqp->queue, callp, typeof(callp), link); 1099 mp_call_head_unlock(cqp, intrs_enabled); 1100} 1101 1102static inline mp_call_t * 1103mp_call_dequeue_locked(mp_call_queue_t *cqp) 1104{ 1105 mp_call_t *callp = NULL; 1106 1107 assert(mp_call_head_is_locked(cqp)); 1108 if (!queue_empty(&cqp->queue)) 1109 queue_remove_first(&cqp->queue, callp, typeof(callp), link); 1110 return callp; 1111} 1112 1113static inline void 1114mp_call_enqueue_locked( 1115 mp_call_queue_t *cqp, 1116 mp_call_t *callp) 1117{ 1118 queue_enter(&cqp->queue, callp, typeof(callp), link); 1119} 1120 1121/* Called on the boot processor to initialize global structures */ 1122static void 1123mp_cpus_call_init(void) 1124{ 1125 mp_call_queue_t *cqp = &mp_cpus_call_freelist; 1126 1127 DBG("mp_cpus_call_init()\n"); 1128 simple_lock_init(&cqp->lock, 0); 1129 queue_init(&cqp->queue); 1130} 1131 1132/* 1133 * Called at processor registration to add call buffers to the free list 1134 * and to initialize the per-cpu call queue. 1135 */ 1136void 1137mp_cpus_call_cpu_init(int cpu) 1138{ 1139 int i; 1140 mp_call_queue_t *cqp = &mp_cpus_call_head[cpu]; 1141 mp_call_t *callp; 1142 1143 simple_lock_init(&cqp->lock, 0); 1144 queue_init(&cqp->queue); 1145 for (i = 0; i < MP_CPUS_CALL_BUFS_PER_CPU; i++) { 1146 callp = (mp_call_t *) kalloc(sizeof(mp_call_t)); 1147 mp_call_free(callp); 1148 } 1149 1150 DBG("mp_cpus_call_init(%d) done\n", cpu); 1151} 1152 1153/* 1154 * This is called from cpu_signal_handler() to process an MP_CALL signal. 1155 * And also from i386_deactivate_cpu() when a cpu is being taken offline. 1156 */ 1157static void 1158mp_cpus_call_action(void) 1159{ 1160 mp_call_queue_t *cqp; 1161 boolean_t intrs_enabled; 1162 mp_call_t *callp; 1163 mp_call_t call; 1164 1165 assert(!ml_get_interrupts_enabled()); 1166 cqp = &mp_cpus_call_head[cpu_number()]; 1167 intrs_enabled = mp_call_head_lock(cqp); 1168 while ((callp = mp_call_dequeue_locked(cqp)) != NULL) { 1169 /* Copy call request to the stack to free buffer */ 1170 call = *callp; 1171 mp_call_free(callp); 1172 if (call.func != NULL) { 1173 mp_call_head_unlock(cqp, intrs_enabled); 1174 KERNEL_DEBUG_CONSTANT( 1175 TRACE_MP_CPUS_CALL_ACTION, 1176 call.func, call.arg0, call.arg1, call.maskp, 0); 1177 call.func(call.arg0, call.arg1); 1178 (void) mp_call_head_lock(cqp); 1179 } 1180 if (call.maskp != NULL) 1181 i_bit_set(cpu_number(), call.maskp); 1182 } 1183 mp_call_head_unlock(cqp, intrs_enabled); 1184} 1185 1186/* 1187 * mp_cpus_call() runs a given function on cpus specified in a given cpu mask. 1188 * Possible modes are: 1189 * SYNC: function is called serially on target cpus in logical cpu order 1190 * waiting for each call to be acknowledged before proceeding 1191 * ASYNC: function call is queued to the specified cpus 1192 * waiting for all calls to complete in parallel before returning 1193 * NOSYNC: function calls are queued 1194 * but we return before confirmation of calls completing. 1195 * The action function may be NULL. 1196 * The cpu mask may include the local cpu. Offline cpus are ignored. 1197 * The return value is the number of cpus on which the call was made or queued. 1198 */ 1199cpu_t 1200mp_cpus_call( 1201 cpumask_t cpus, 1202 mp_sync_t mode, 1203 void (*action_func)(void *), 1204 void *arg) 1205{ 1206 return mp_cpus_call1( 1207 cpus, 1208 mode, 1209 (void (*)(void *,void *))action_func, 1210 arg, 1211 NULL, 1212 NULL, 1213 NULL); 1214} 1215 1216static void 1217mp_cpus_call_wait(boolean_t intrs_enabled, 1218 cpumask_t cpus_called, 1219 cpumask_t *cpus_responded) 1220{ 1221 mp_call_queue_t *cqp; 1222 uint64_t tsc_spin_start; 1223 1224 cqp = &mp_cpus_call_head[cpu_number()]; 1225 1226 tsc_spin_start = rdtsc64(); 1227 while (*cpus_responded != cpus_called) { 1228 if (!intrs_enabled) { 1229 /* Sniffing w/o locking */ 1230 if (!queue_empty(&cqp->queue)) 1231 mp_cpus_call_action(); 1232 cpu_signal_handler(NULL); 1233 } 1234 if (mp_spin_timeout(tsc_spin_start)) { 1235 cpumask_t cpus_unresponsive; 1236 1237 mp_cpus_call_wait_timeout = TRUE; 1238 cpus_unresponsive = cpus_called & ~(*cpus_responded); 1239 mp_cpus_NMIPI(cpus_unresponsive); 1240 panic("mp_cpus_call_wait() timeout, cpus: 0x%lx", 1241 cpus_unresponsive); 1242 } 1243 } 1244} 1245 1246cpu_t 1247mp_cpus_call1( 1248 cpumask_t cpus, 1249 mp_sync_t mode, 1250 void (*action_func)(void *, void *), 1251 void *arg0, 1252 void *arg1, 1253 cpumask_t *cpus_calledp, 1254 cpumask_t *cpus_notcalledp) 1255{ 1256 cpu_t cpu; 1257 boolean_t intrs_enabled = FALSE; 1258 boolean_t call_self = FALSE; 1259 cpumask_t cpus_called = 0; 1260 cpumask_t cpus_notcalled = 0; 1261 cpumask_t cpus_responded = 0; 1262 long cpus_call_count = 0; 1263 uint64_t tsc_spin_start; 1264 boolean_t topo_lock; 1265 1266 KERNEL_DEBUG_CONSTANT( 1267 TRACE_MP_CPUS_CALL | DBG_FUNC_START, 1268 cpus, mode, VM_KERNEL_UNSLIDE(action_func), arg0, arg1); 1269 1270 if (!smp_initialized) { 1271 if ((cpus & CPUMASK_SELF) == 0) 1272 goto out; 1273 if (action_func != NULL) { 1274 intrs_enabled = ml_set_interrupts_enabled(FALSE); 1275 action_func(arg0, arg1); 1276 ml_set_interrupts_enabled(intrs_enabled); 1277 } 1278 call_self = TRUE; 1279 goto out; 1280 } 1281 1282 /* 1283 * Queue the call for each non-local requested cpu. 1284 * This is performed under the topo lock to prevent changes to 1285 * cpus online state and to prevent concurrent rendezvouses -- 1286 * although an exception is made if we're calling only the master 1287 * processor since that always remains active. Note: this exception 1288 * is expected for longterm timer nosync cross-calls to the master cpu. 1289 */ 1290 mp_disable_preemption(); 1291 intrs_enabled = ml_get_interrupts_enabled(); 1292 topo_lock = (cpus != cpu_to_cpumask(master_cpu)); 1293 if (topo_lock) { 1294 ml_set_interrupts_enabled(FALSE); 1295 (void) mp_safe_spin_lock(&x86_topo_lock); 1296 } 1297 for (cpu = 0; cpu < (cpu_t) real_ncpus; cpu++) { 1298 if (((cpu_to_cpumask(cpu) & cpus) == 0) || 1299 !cpu_datap(cpu)->cpu_running) 1300 continue; 1301 tsc_spin_start = rdtsc64(); 1302 if (cpu == (cpu_t) cpu_number()) { 1303 /* 1304 * We don't IPI ourself and if calling asynchronously, 1305 * we defer our call until we have signalled all others. 1306 */ 1307 call_self = TRUE; 1308 if (mode == SYNC && action_func != NULL) { 1309 KERNEL_DEBUG_CONSTANT( 1310 TRACE_MP_CPUS_CALL_LOCAL, 1311 VM_KERNEL_UNSLIDE(action_func), 1312 arg0, arg1, 0, 0); 1313 action_func(arg0, arg1); 1314 } 1315 } else { 1316 /* 1317 * Here to queue a call to cpu and IPI. 1318 * Spinning for request buffer unless NOSYNC. 1319 */ 1320 mp_call_t *callp = NULL; 1321 mp_call_queue_t *cqp = &mp_cpus_call_head[cpu]; 1322 boolean_t intrs_inner; 1323 1324 queue_call: 1325 if (callp == NULL) 1326 callp = mp_call_alloc(); 1327 intrs_inner = mp_call_head_lock(cqp); 1328 if (mode == NOSYNC) { 1329 if (callp == NULL) { 1330 cpus_notcalled |= cpu_to_cpumask(cpu); 1331 mp_call_head_unlock(cqp, intrs_inner); 1332 KERNEL_DEBUG_CONSTANT( 1333 TRACE_MP_CPUS_CALL_NOBUF, 1334 cpu, 0, 0, 0, 0); 1335 continue; 1336 } 1337 callp->maskp = NULL; 1338 } else { 1339 if (callp == NULL) { 1340 mp_call_head_unlock(cqp, intrs_inner); 1341 KERNEL_DEBUG_CONSTANT( 1342 TRACE_MP_CPUS_CALL_NOBUF, 1343 cpu, 0, 0, 0, 0); 1344 if (!intrs_inner) { 1345 /* Sniffing w/o locking */ 1346 if (!queue_empty(&cqp->queue)) 1347 mp_cpus_call_action(); 1348 handle_pending_TLB_flushes(); 1349 } 1350 if (mp_spin_timeout(tsc_spin_start)) 1351 panic("mp_cpus_call1() timeout"); 1352 goto queue_call; 1353 } 1354 callp->maskp = &cpus_responded; 1355 } 1356 callp->func = action_func; 1357 callp->arg0 = arg0; 1358 callp->arg1 = arg1; 1359 mp_call_enqueue_locked(cqp, callp); 1360 cpus_call_count++; 1361 cpus_called |= cpu_to_cpumask(cpu); 1362 i386_signal_cpu(cpu, MP_CALL, ASYNC); 1363 mp_call_head_unlock(cqp, intrs_inner); 1364 if (mode == SYNC) { 1365 mp_cpus_call_wait(intrs_inner, cpus_called, &cpus_responded); 1366 } 1367 } 1368 } 1369 if (topo_lock) { 1370 simple_unlock(&x86_topo_lock); 1371 ml_set_interrupts_enabled(intrs_enabled); 1372 } 1373 1374 /* Call locally if mode not SYNC */ 1375 if (mode != SYNC && call_self ) { 1376 KERNEL_DEBUG_CONSTANT( 1377 TRACE_MP_CPUS_CALL_LOCAL, 1378 VM_KERNEL_UNSLIDE(action_func), arg0, arg1, 0, 0); 1379 if (action_func != NULL) { 1380 ml_set_interrupts_enabled(FALSE); 1381 action_func(arg0, arg1); 1382 ml_set_interrupts_enabled(intrs_enabled); 1383 } 1384 } 1385 1386 /* Safe to allow pre-emption now */ 1387 mp_enable_preemption(); 1388 1389 /* For ASYNC, now wait for all signaled cpus to complete their calls */ 1390 if (mode == ASYNC) 1391 mp_cpus_call_wait(intrs_enabled, cpus_called, &cpus_responded); 1392 1393out: 1394 if (call_self){ 1395 cpus_called |= cpu_to_cpumask(cpu); 1396 cpus_call_count++; 1397 } 1398 1399 if (cpus_calledp) 1400 *cpus_calledp = cpus_called; 1401 if (cpus_notcalledp) 1402 *cpus_notcalledp = cpus_notcalled; 1403 1404 KERNEL_DEBUG_CONSTANT( 1405 TRACE_MP_CPUS_CALL | DBG_FUNC_END, 1406 cpus_call_count, cpus_called, cpus_notcalled, 0, 0); 1407 1408 return (cpu_t) cpus_call_count; 1409} 1410 1411 1412static void 1413mp_broadcast_action(void) 1414{ 1415 /* call action function */ 1416 if (mp_bc_action_func != NULL) 1417 mp_bc_action_func(mp_bc_func_arg); 1418 1419 /* if we're the last one through, wake up the instigator */ 1420 if (atomic_decl_and_test(&mp_bc_count, 1)) 1421 thread_wakeup(((event_t)(uintptr_t) &mp_bc_count)); 1422} 1423 1424/* 1425 * mp_broadcast() runs a given function on all active cpus. 1426 * The caller blocks until the functions has run on all cpus. 1427 * The caller will also block if there is another pending braodcast. 1428 */ 1429void 1430mp_broadcast( 1431 void (*action_func)(void *), 1432 void *arg) 1433{ 1434 if (!smp_initialized) { 1435 if (action_func != NULL) 1436 action_func(arg); 1437 return; 1438 } 1439 1440 /* obtain broadcast lock */ 1441 lck_mtx_lock(&mp_bc_lock); 1442 1443 /* set static function pointers */ 1444 mp_bc_action_func = action_func; 1445 mp_bc_func_arg = arg; 1446 1447 assert_wait((event_t)(uintptr_t)&mp_bc_count, THREAD_UNINT); 1448 1449 /* 1450 * signal other processors, which will call mp_broadcast_action() 1451 */ 1452 simple_lock(&x86_topo_lock); 1453 mp_bc_ncpus = i386_active_cpus(); /* total including this cpu */ 1454 mp_bc_count = mp_bc_ncpus; 1455 i386_signal_cpus(MP_BROADCAST, ASYNC); 1456 1457 /* call executor function on this cpu */ 1458 mp_broadcast_action(); 1459 simple_unlock(&x86_topo_lock); 1460 1461 /* block for all cpus to have run action_func */ 1462 if (mp_bc_ncpus > 1) 1463 thread_block(THREAD_CONTINUE_NULL); 1464 else 1465 clear_wait(current_thread(), THREAD_AWAKENED); 1466 1467 /* release lock */ 1468 lck_mtx_unlock(&mp_bc_lock); 1469} 1470 1471void 1472mp_cpus_kick(cpumask_t cpus) 1473{ 1474 cpu_t cpu; 1475 boolean_t intrs_enabled = FALSE; 1476 1477 intrs_enabled = ml_set_interrupts_enabled(FALSE); 1478 mp_safe_spin_lock(&x86_topo_lock); 1479 1480 for (cpu = 0; cpu < (cpu_t) real_ncpus; cpu++) { 1481 if ((cpu == (cpu_t) cpu_number()) 1482 || ((cpu_to_cpumask(cpu) & cpus) == 0) 1483 || (!cpu_datap(cpu)->cpu_running)) 1484 { 1485 continue; 1486 } 1487 1488 lapic_send_ipi(cpu, LAPIC_VECTOR(KICK)); 1489 } 1490 1491 simple_unlock(&x86_topo_lock); 1492 ml_set_interrupts_enabled(intrs_enabled); 1493} 1494 1495void 1496i386_activate_cpu(void) 1497{ 1498 cpu_data_t *cdp = current_cpu_datap(); 1499 1500 assert(!ml_get_interrupts_enabled()); 1501 1502 if (!smp_initialized) { 1503 cdp->cpu_running = TRUE; 1504 return; 1505 } 1506 1507 simple_lock(&x86_topo_lock); 1508 cdp->cpu_running = TRUE; 1509 started_cpu(); 1510 simple_unlock(&x86_topo_lock); 1511 flush_tlb_raw(); 1512} 1513 1514void 1515i386_deactivate_cpu(void) 1516{ 1517 cpu_data_t *cdp = current_cpu_datap(); 1518 1519 assert(!ml_get_interrupts_enabled()); 1520 1521 KERNEL_DEBUG_CONSTANT( 1522 TRACE_MP_CPU_DEACTIVATE | DBG_FUNC_START, 1523 0, 0, 0, 0, 0); 1524 1525 simple_lock(&x86_topo_lock); 1526 cdp->cpu_running = FALSE; 1527 simple_unlock(&x86_topo_lock); 1528 1529 /* 1530 * Move all of this cpu's timers to the master/boot cpu, 1531 * and poke it in case there's a sooner deadline for it to schedule. 1532 */ 1533 timer_queue_shutdown(&cdp->rtclock_timer.queue); 1534 mp_cpus_call(cpu_to_cpumask(master_cpu), ASYNC, timer_queue_expire_local, NULL); 1535 1536 /* 1537 * Open an interrupt window 1538 * and ensure any pending IPI or timer is serviced 1539 */ 1540 mp_disable_preemption(); 1541 ml_set_interrupts_enabled(TRUE); 1542 1543 while (cdp->cpu_signals && x86_lcpu()->rtcDeadline != EndOfAllTime) 1544 cpu_pause(); 1545 /* 1546 * Ensure there's no remaining timer deadline set 1547 * - AICPM may have left one active. 1548 */ 1549 setPop(0); 1550 1551 ml_set_interrupts_enabled(FALSE); 1552 mp_enable_preemption(); 1553 1554 KERNEL_DEBUG_CONSTANT( 1555 TRACE_MP_CPU_DEACTIVATE | DBG_FUNC_END, 1556 0, 0, 0, 0, 0); 1557} 1558 1559int pmsafe_debug = 1; 1560 1561#if MACH_KDP 1562volatile boolean_t mp_kdp_trap = FALSE; 1563volatile unsigned long mp_kdp_ncpus; 1564boolean_t mp_kdp_state; 1565 1566 1567void 1568mp_kdp_enter(void) 1569{ 1570 unsigned int cpu; 1571 unsigned int ncpus = 0; 1572 unsigned int my_cpu; 1573 uint64_t tsc_timeout; 1574 1575 DBG("mp_kdp_enter()\n"); 1576 1577#if DEBUG 1578 if (!smp_initialized) 1579 simple_lock_init(&mp_kdp_lock, 0); 1580#endif 1581 1582 /* 1583 * Here to enter the debugger. 1584 * In case of races, only one cpu is allowed to enter kdp after 1585 * stopping others. 1586 */ 1587 mp_kdp_state = ml_set_interrupts_enabled(FALSE); 1588 my_cpu = cpu_number(); 1589 1590 if (my_cpu == (unsigned) debugger_cpu) { 1591 kprintf("\n\nRECURSIVE DEBUGGER ENTRY DETECTED\n\n"); 1592 kdp_reset(); 1593 return; 1594 } 1595 1596 cpu_datap(my_cpu)->debugger_entry_time = mach_absolute_time(); 1597 simple_lock(&mp_kdp_lock); 1598 1599 if (pmsafe_debug && !kdp_snapshot) 1600 pmSafeMode(¤t_cpu_datap()->lcpu, PM_SAFE_FL_SAFE); 1601 1602 while (mp_kdp_trap) { 1603 simple_unlock(&mp_kdp_lock); 1604 DBG("mp_kdp_enter() race lost\n"); 1605#if MACH_KDP 1606 mp_kdp_wait(TRUE, FALSE); 1607#endif 1608 simple_lock(&mp_kdp_lock); 1609 } 1610 debugger_cpu = my_cpu; 1611 ncpus = 1; 1612 mp_kdp_ncpus = 1; /* self */ 1613 mp_kdp_trap = TRUE; 1614 debugger_entry_time = cpu_datap(my_cpu)->debugger_entry_time; 1615 simple_unlock(&mp_kdp_lock); 1616 1617 /* 1618 * Deliver a nudge to other cpus, counting how many 1619 */ 1620 DBG("mp_kdp_enter() signaling other processors\n"); 1621 if (force_immediate_debugger_NMI == FALSE) { 1622 for (cpu = 0; cpu < real_ncpus; cpu++) { 1623 if (cpu == my_cpu || !cpu_datap(cpu)->cpu_running) 1624 continue; 1625 ncpus++; 1626 i386_signal_cpu(cpu, MP_KDP, ASYNC); 1627 } 1628 /* 1629 * Wait other processors to synchronize 1630 */ 1631 DBG("mp_kdp_enter() waiting for (%d) processors to suspend\n", ncpus); 1632 1633 /* 1634 * This timeout is rather arbitrary; we don't want to NMI 1635 * processors that are executing at potentially 1636 * "unsafe-to-interrupt" points such as the trampolines, 1637 * but neither do we want to lose state by waiting too long. 1638 */ 1639 tsc_timeout = rdtsc64() + (ncpus * 1000 * 1000 * 10ULL); 1640 1641 if (virtualized) 1642 tsc_timeout = ~0ULL; 1643 1644 while (mp_kdp_ncpus != ncpus && rdtsc64() < tsc_timeout) { 1645 /* 1646 * A TLB shootdown request may be pending--this would 1647 * result in the requesting processor waiting in 1648 * PMAP_UPDATE_TLBS() until this processor deals with it. 1649 * Process it, so it can now enter mp_kdp_wait() 1650 */ 1651 handle_pending_TLB_flushes(); 1652 cpu_pause(); 1653 } 1654 /* If we've timed out, and some processor(s) are still unresponsive, 1655 * interrupt them with an NMI via the local APIC. 1656 */ 1657 if (mp_kdp_ncpus != ncpus) { 1658 for (cpu = 0; cpu < real_ncpus; cpu++) { 1659 if (cpu == my_cpu || !cpu_datap(cpu)->cpu_running) 1660 continue; 1661 if (cpu_signal_pending(cpu, MP_KDP)) 1662 cpu_NMI_interrupt(cpu); 1663 } 1664 } 1665 } 1666 else 1667 for (cpu = 0; cpu < real_ncpus; cpu++) { 1668 if (cpu == my_cpu || !cpu_datap(cpu)->cpu_running) 1669 continue; 1670 cpu_NMI_interrupt(cpu); 1671 } 1672 1673 DBG("mp_kdp_enter() %d processors done %s\n", 1674 (int)mp_kdp_ncpus, (mp_kdp_ncpus == ncpus) ? "OK" : "timed out"); 1675 1676 postcode(MP_KDP_ENTER); 1677} 1678 1679static boolean_t 1680cpu_signal_pending(int cpu, mp_event_t event) 1681{ 1682 volatile int *signals = &cpu_datap(cpu)->cpu_signals; 1683 boolean_t retval = FALSE; 1684 1685 if (i_bit(event, signals)) 1686 retval = TRUE; 1687 return retval; 1688} 1689 1690long kdp_x86_xcpu_invoke(const uint16_t lcpu, kdp_x86_xcpu_func_t func, 1691 void *arg0, void *arg1) 1692{ 1693 if (lcpu > (real_ncpus - 1)) 1694 return -1; 1695 1696 if (func == NULL) 1697 return -1; 1698 1699 kdp_xcpu_call_func.func = func; 1700 kdp_xcpu_call_func.ret = -1; 1701 kdp_xcpu_call_func.arg0 = arg0; 1702 kdp_xcpu_call_func.arg1 = arg1; 1703 kdp_xcpu_call_func.cpu = lcpu; 1704 DBG("Invoking function %p on CPU %d\n", func, (int32_t)lcpu); 1705 while (kdp_xcpu_call_func.cpu != KDP_XCPU_NONE) 1706 cpu_pause(); 1707 return kdp_xcpu_call_func.ret; 1708} 1709 1710static void 1711kdp_x86_xcpu_poll(void) 1712{ 1713 if ((uint16_t)cpu_number() == kdp_xcpu_call_func.cpu) { 1714 kdp_xcpu_call_func.ret = 1715 kdp_xcpu_call_func.func(kdp_xcpu_call_func.arg0, 1716 kdp_xcpu_call_func.arg1, 1717 cpu_number()); 1718 kdp_xcpu_call_func.cpu = KDP_XCPU_NONE; 1719 } 1720} 1721 1722static void 1723mp_kdp_wait(boolean_t flush, boolean_t isNMI) 1724{ 1725 DBG("mp_kdp_wait()\n"); 1726 /* If an I/O port has been specified as a debugging aid, issue a read */ 1727 panic_io_port_read(); 1728 current_cpu_datap()->debugger_ipi_time = mach_absolute_time(); 1729#if CONFIG_MCA 1730 /* If we've trapped due to a machine-check, save MCA registers */ 1731 mca_check_save(); 1732#endif 1733 1734 atomic_incl((volatile long *)&mp_kdp_ncpus, 1); 1735 while (mp_kdp_trap || (isNMI == TRUE)) { 1736 /* 1737 * A TLB shootdown request may be pending--this would result 1738 * in the requesting processor waiting in PMAP_UPDATE_TLBS() 1739 * until this processor handles it. 1740 * Process it, so it can now enter mp_kdp_wait() 1741 */ 1742 if (flush) 1743 handle_pending_TLB_flushes(); 1744 1745 kdp_x86_xcpu_poll(); 1746 cpu_pause(); 1747 } 1748 1749 atomic_decl((volatile long *)&mp_kdp_ncpus, 1); 1750 DBG("mp_kdp_wait() done\n"); 1751} 1752 1753void 1754mp_kdp_exit(void) 1755{ 1756 DBG("mp_kdp_exit()\n"); 1757 debugger_cpu = -1; 1758 atomic_decl((volatile long *)&mp_kdp_ncpus, 1); 1759 1760 debugger_exit_time = mach_absolute_time(); 1761 1762 mp_kdp_trap = FALSE; 1763 mfence(); 1764 1765 /* Wait other processors to stop spinning. XXX needs timeout */ 1766 DBG("mp_kdp_exit() waiting for processors to resume\n"); 1767 while (mp_kdp_ncpus > 0) { 1768 /* 1769 * a TLB shootdown request may be pending... this would result in the requesting 1770 * processor waiting in PMAP_UPDATE_TLBS() until this processor deals with it. 1771 * Process it, so it can now enter mp_kdp_wait() 1772 */ 1773 handle_pending_TLB_flushes(); 1774 1775 cpu_pause(); 1776 } 1777 1778 if (pmsafe_debug && !kdp_snapshot) 1779 pmSafeMode(¤t_cpu_datap()->lcpu, PM_SAFE_FL_NORMAL); 1780 1781 debugger_exit_time = mach_absolute_time(); 1782 1783 DBG("mp_kdp_exit() done\n"); 1784 (void) ml_set_interrupts_enabled(mp_kdp_state); 1785 postcode(0); 1786} 1787#endif /* MACH_KDP */ 1788 1789boolean_t 1790mp_recent_debugger_activity() { 1791 uint64_t abstime = mach_absolute_time(); 1792 return (((abstime - debugger_entry_time) < LastDebuggerEntryAllowance) || 1793 ((abstime - debugger_exit_time) < LastDebuggerEntryAllowance)); 1794} 1795 1796/*ARGSUSED*/ 1797void 1798init_ast_check( 1799 __unused processor_t processor) 1800{ 1801} 1802 1803void 1804cause_ast_check( 1805 processor_t processor) 1806{ 1807 int cpu = processor->cpu_id; 1808 1809 if (cpu != cpu_number()) { 1810 i386_signal_cpu(cpu, MP_AST, ASYNC); 1811 KERNEL_DEBUG_CONSTANT(MACHDBG_CODE(DBG_MACH_SCHED, MACH_REMOTE_AST), cpu, 1, 0, 0, 0); 1812 } 1813} 1814 1815void 1816slave_machine_init(void *param) 1817{ 1818 /* 1819 * Here in process context, but with interrupts disabled. 1820 */ 1821 DBG("slave_machine_init() CPU%d\n", get_cpu_number()); 1822 1823 if (param == FULL_SLAVE_INIT) { 1824 /* 1825 * Cold start 1826 */ 1827 clock_init(); 1828 } 1829 cpu_machine_init(); /* Interrupts enabled hereafter */ 1830} 1831 1832#undef cpu_number 1833int cpu_number(void) 1834{ 1835 return get_cpu_number(); 1836} 1837 1838static void 1839cpu_prewarm_init() 1840{ 1841 int i; 1842 1843 simple_lock_init(&cpu_warm_lock, 0); 1844 queue_init(&cpu_warm_call_list); 1845 for (i = 0; i < NUM_CPU_WARM_CALLS; i++) { 1846 enqueue_head(&cpu_warm_call_list, (queue_entry_t)&cpu_warm_call_arr[i]); 1847 } 1848} 1849 1850static timer_call_t 1851grab_warm_timer_call() 1852{ 1853 spl_t x; 1854 timer_call_t call = NULL; 1855 1856 x = splsched(); 1857 simple_lock(&cpu_warm_lock); 1858 if (!queue_empty(&cpu_warm_call_list)) { 1859 call = (timer_call_t) dequeue_head(&cpu_warm_call_list); 1860 } 1861 simple_unlock(&cpu_warm_lock); 1862 splx(x); 1863 1864 return call; 1865} 1866 1867static void 1868free_warm_timer_call(timer_call_t call) 1869{ 1870 spl_t x; 1871 1872 x = splsched(); 1873 simple_lock(&cpu_warm_lock); 1874 enqueue_head(&cpu_warm_call_list, (queue_entry_t)call); 1875 simple_unlock(&cpu_warm_lock); 1876 splx(x); 1877} 1878 1879/* 1880 * Runs in timer call context (interrupts disabled). 1881 */ 1882static void 1883cpu_warm_timer_call_func( 1884 call_entry_param_t p0, 1885 __unused call_entry_param_t p1) 1886{ 1887 free_warm_timer_call((timer_call_t)p0); 1888 return; 1889} 1890 1891/* 1892 * Runs with interrupts disabled on the CPU we wish to warm (i.e. CPU 0). 1893 */ 1894static void 1895_cpu_warm_setup( 1896 void *arg) 1897{ 1898 cpu_warm_data_t cwdp = (cpu_warm_data_t)arg; 1899 1900 timer_call_enter(cwdp->cwd_call, cwdp->cwd_deadline, TIMER_CALL_SYS_CRITICAL | TIMER_CALL_LOCAL); 1901 cwdp->cwd_result = 0; 1902 1903 return; 1904} 1905 1906/* 1907 * Not safe to call with interrupts disabled. 1908 */ 1909kern_return_t 1910ml_interrupt_prewarm( 1911 uint64_t deadline) 1912{ 1913 struct cpu_warm_data cwd; 1914 timer_call_t call; 1915 cpu_t ct; 1916 1917 if (ml_get_interrupts_enabled() == FALSE) { 1918 panic("%s: Interrupts disabled?\n", __FUNCTION__); 1919 } 1920 1921 /* 1922 * If the platform doesn't need our help, say that we succeeded. 1923 */ 1924 if (!ml_get_interrupt_prewake_applicable()) { 1925 return KERN_SUCCESS; 1926 } 1927 1928 /* 1929 * Grab a timer call to use. 1930 */ 1931 call = grab_warm_timer_call(); 1932 if (call == NULL) { 1933 return KERN_RESOURCE_SHORTAGE; 1934 } 1935 1936 timer_call_setup(call, cpu_warm_timer_call_func, call); 1937 cwd.cwd_call = call; 1938 cwd.cwd_deadline = deadline; 1939 cwd.cwd_result = 0; 1940 1941 /* 1942 * For now, non-local interrupts happen on the master processor. 1943 */ 1944 ct = mp_cpus_call(cpu_to_cpumask(master_cpu), SYNC, _cpu_warm_setup, &cwd); 1945 if (ct == 0) { 1946 free_warm_timer_call(call); 1947 return KERN_FAILURE; 1948 } else { 1949 return cwd.cwd_result; 1950 } 1951} 1952