mp_x86.c revision 85793
1/* 2 * Copyright (c) 1996, by Steve Passe 3 * All rights reserved. 4 * 5 * Redistribution and use in source and binary forms, with or without 6 * modification, are permitted provided that the following conditions 7 * are met: 8 * 1. Redistributions of source code must retain the above copyright 9 * notice, this list of conditions and the following disclaimer. 10 * 2. The name of the developer may NOT be used to endorse or promote products 11 * derived from this software without specific prior written permission. 12 * 13 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND 14 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 15 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 16 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE 17 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 18 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 19 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 20 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 21 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 22 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 23 * SUCH DAMAGE. 24 * 25 * $FreeBSD: head/sys/i386/i386/mp_machdep.c 85793 2001-10-31 23:54:27Z mjacob $ 26 */ 27 28#include "opt_cpu.h" 29#include "opt_kstack_pages.h" 30 31#ifdef SMP 32#include <machine/smptests.h> 33#else 34#error 35#endif 36 37#include <sys/param.h> 38#include <sys/systm.h> 39#include <sys/bus.h> 40#include <sys/cons.h> /* cngetc() */ 41#include <sys/dkstat.h> 42#ifdef GPROF 43#include <sys/gmon.h> 44#endif 45#include <sys/kernel.h> 46#include <sys/ktr.h> 47#include <sys/lock.h> 48#include <sys/malloc.h> 49#include <sys/memrange.h> 50#include <sys/mutex.h> 51#include <sys/pcpu.h> 52#include <sys/proc.h> 53#include <sys/smp.h> 54#include <sys/sysctl.h> 55#include <sys/user.h> 56 57#include <vm/vm.h> 58#include <vm/vm_param.h> 59#include <vm/pmap.h> 60#include <vm/vm_kern.h> 61#include <vm/vm_extern.h> 62#include <vm/vm_map.h> 63 64#include <machine/apic.h> 65#include <machine/atomic.h> 66#include <machine/cpu.h> 67#include <machine/cpufunc.h> 68#include <machine/mpapic.h> 69#include <machine/psl.h> 70#include <machine/segments.h> 71#include <machine/smptests.h> /** TEST_DEFAULT_CONFIG, TEST_TEST1 */ 72#include <machine/tss.h> 73#include <machine/specialreg.h> 74#include <machine/globaldata.h> 75#include <machine/privatespace.h> 76 77#if defined(APIC_IO) 78#include <machine/md_var.h> /* setidt() */ 79#include <i386/isa/icu.h> /* IPIs */ 80#include <i386/isa/intr_machdep.h> /* IPIs */ 81#endif /* APIC_IO */ 82 83#if defined(TEST_DEFAULT_CONFIG) 84#define MPFPS_MPFB1 TEST_DEFAULT_CONFIG 85#else 86#define MPFPS_MPFB1 mpfps->mpfb1 87#endif /* TEST_DEFAULT_CONFIG */ 88 89#define WARMBOOT_TARGET 0 90#define WARMBOOT_OFF (KERNBASE + 0x0467) 91#define WARMBOOT_SEG (KERNBASE + 0x0469) 92 93#ifdef PC98 94#define BIOS_BASE (0xe8000) 95#define BIOS_SIZE (0x18000) 96#else 97#define BIOS_BASE (0xf0000) 98#define BIOS_SIZE (0x10000) 99#endif 100#define BIOS_COUNT (BIOS_SIZE/4) 101 102#define CMOS_REG (0x70) 103#define CMOS_DATA (0x71) 104#define BIOS_RESET (0x0f) 105#define BIOS_WARM (0x0a) 106 107#define PROCENTRY_FLAG_EN 0x01 108#define PROCENTRY_FLAG_BP 0x02 109#define IOAPICENTRY_FLAG_EN 0x01 110 111 112/* MP Floating Pointer Structure */ 113typedef struct MPFPS { 114 char signature[4]; 115 void *pap; 116 u_char length; 117 u_char spec_rev; 118 u_char checksum; 119 u_char mpfb1; 120 u_char mpfb2; 121 u_char mpfb3; 122 u_char mpfb4; 123 u_char mpfb5; 124} *mpfps_t; 125 126/* MP Configuration Table Header */ 127typedef struct MPCTH { 128 char signature[4]; 129 u_short base_table_length; 130 u_char spec_rev; 131 u_char checksum; 132 u_char oem_id[8]; 133 u_char product_id[12]; 134 void *oem_table_pointer; 135 u_short oem_table_size; 136 u_short entry_count; 137 void *apic_address; 138 u_short extended_table_length; 139 u_char extended_table_checksum; 140 u_char reserved; 141} *mpcth_t; 142 143 144typedef struct PROCENTRY { 145 u_char type; 146 u_char apic_id; 147 u_char apic_version; 148 u_char cpu_flags; 149 u_long cpu_signature; 150 u_long feature_flags; 151 u_long reserved1; 152 u_long reserved2; 153} *proc_entry_ptr; 154 155typedef struct BUSENTRY { 156 u_char type; 157 u_char bus_id; 158 char bus_type[6]; 159} *bus_entry_ptr; 160 161typedef struct IOAPICENTRY { 162 u_char type; 163 u_char apic_id; 164 u_char apic_version; 165 u_char apic_flags; 166 void *apic_address; 167} *io_apic_entry_ptr; 168 169typedef struct INTENTRY { 170 u_char type; 171 u_char int_type; 172 u_short int_flags; 173 u_char src_bus_id; 174 u_char src_bus_irq; 175 u_char dst_apic_id; 176 u_char dst_apic_int; 177} *int_entry_ptr; 178 179/* descriptions of MP basetable entries */ 180typedef struct BASETABLE_ENTRY { 181 u_char type; 182 u_char length; 183 char name[16]; 184} basetable_entry; 185 186/* 187 * this code MUST be enabled here and in mpboot.s. 188 * it follows the very early stages of AP boot by placing values in CMOS ram. 189 * it NORMALLY will never be needed and thus the primitive method for enabling. 190 * 191#define CHECK_POINTS 192 */ 193 194#if defined(CHECK_POINTS) && !defined(PC98) 195#define CHECK_READ(A) (outb(CMOS_REG, (A)), inb(CMOS_DATA)) 196#define CHECK_WRITE(A,D) (outb(CMOS_REG, (A)), outb(CMOS_DATA, (D))) 197 198#define CHECK_INIT(D); \ 199 CHECK_WRITE(0x34, (D)); \ 200 CHECK_WRITE(0x35, (D)); \ 201 CHECK_WRITE(0x36, (D)); \ 202 CHECK_WRITE(0x37, (D)); \ 203 CHECK_WRITE(0x38, (D)); \ 204 CHECK_WRITE(0x39, (D)); 205 206#define CHECK_PRINT(S); \ 207 printf("%s: %d, %d, %d, %d, %d, %d\n", \ 208 (S), \ 209 CHECK_READ(0x34), \ 210 CHECK_READ(0x35), \ 211 CHECK_READ(0x36), \ 212 CHECK_READ(0x37), \ 213 CHECK_READ(0x38), \ 214 CHECK_READ(0x39)); 215 216#else /* CHECK_POINTS */ 217 218#define CHECK_INIT(D) 219#define CHECK_PRINT(S) 220 221#endif /* CHECK_POINTS */ 222 223/* 224 * Values to send to the POST hardware. 225 */ 226#define MP_BOOTADDRESS_POST 0x10 227#define MP_PROBE_POST 0x11 228#define MPTABLE_PASS1_POST 0x12 229 230#define MP_START_POST 0x13 231#define MP_ENABLE_POST 0x14 232#define MPTABLE_PASS2_POST 0x15 233 234#define START_ALL_APS_POST 0x16 235#define INSTALL_AP_TRAMP_POST 0x17 236#define START_AP_POST 0x18 237 238#define MP_ANNOUNCE_POST 0x19 239 240/* used to hold the AP's until we are ready to release them */ 241static struct mtx ap_boot_mtx; 242 243/** XXX FIXME: where does this really belong, isa.h/isa.c perhaps? */ 244int current_postcode; 245 246/** XXX FIXME: what system files declare these??? */ 247extern struct region_descriptor r_gdt, r_idt; 248 249int bsp_apic_ready = 0; /* flags useability of BSP apic */ 250int mp_naps; /* # of Applications processors */ 251int mp_nbusses; /* # of busses */ 252int mp_napics; /* # of IO APICs */ 253int boot_cpu_id; /* designated BSP */ 254vm_offset_t cpu_apic_address; 255vm_offset_t io_apic_address[NAPICID]; /* NAPICID is more than enough */ 256extern int nkpt; 257 258u_int32_t cpu_apic_versions[MAXCPU]; 259u_int32_t *io_apic_versions; 260 261#ifdef APIC_INTR_REORDER 262struct { 263 volatile int *location; 264 int bit; 265} apic_isrbit_location[32]; 266#endif 267 268struct apic_intmapinfo int_to_apicintpin[APIC_INTMAPSIZE]; 269 270/* 271 * APIC ID logical/physical mapping structures. 272 * We oversize these to simplify boot-time config. 273 */ 274int cpu_num_to_apic_id[NAPICID]; 275int io_num_to_apic_id[NAPICID]; 276int apic_id_to_logical[NAPICID]; 277 278 279/* AP uses this during bootstrap. Do not staticize. */ 280char *bootSTK; 281static int bootAP; 282 283/* Hotwire a 0->4MB V==P mapping */ 284extern pt_entry_t *KPTphys; 285 286/* SMP page table page */ 287extern pt_entry_t *SMPpt; 288 289struct pcb stoppcbs[MAXCPU]; 290 291int invltlb_ok = 0; /* throttle smp_invltlb() till safe */ 292SYSCTL_INT(_machdep, OID_AUTO, invltlb_ok, CTLFLAG_RW, &invltlb_ok, 0, ""); 293 294/* 295 * Local data and functions. 296 */ 297 298/* Set to 1 once we're ready to let the APs out of the pen. */ 299static volatile int aps_ready = 0; 300 301static int mp_capable; 302static u_int boot_address; 303static u_int base_memory; 304 305static int picmode; /* 0: virtual wire mode, 1: PIC mode */ 306static mpfps_t mpfps; 307static int search_for_sig(u_int32_t target, int count); 308static void mp_enable(u_int boot_addr); 309 310static void mptable_pass1(void); 311static int mptable_pass2(void); 312static void default_mp_table(int type); 313static void fix_mp_table(void); 314static void setup_apic_irq_mapping(void); 315static void init_locks(void); 316static int start_all_aps(u_int boot_addr); 317static void install_ap_tramp(u_int boot_addr); 318static int start_ap(int logicalCpu, u_int boot_addr); 319void ap_init(void); 320static int apic_int_is_bus_type(int intr, int bus_type); 321static void release_aps(void *dummy); 322 323/* 324 * initialize all the SMP locks 325 */ 326 327/* critical region around IO APIC, apic_imen */ 328struct mtx imen_mtx; 329 330/* lock region used by kernel profiling */ 331int mcount_lock; 332 333#ifdef USE_COMLOCK 334/* locks com (tty) data/hardware accesses: a FASTINTR() */ 335struct mtx com_mtx; 336#endif /* USE_COMLOCK */ 337 338static void 339init_locks(void) 340{ 341 342#ifdef USE_COMLOCK 343 mtx_init(&com_mtx, "com", MTX_SPIN); 344#endif /* USE_COMLOCK */ 345} 346 347/* 348 * Calculate usable address in base memory for AP trampoline code. 349 */ 350u_int 351mp_bootaddress(u_int basemem) 352{ 353 POSTCODE(MP_BOOTADDRESS_POST); 354 355 base_memory = basemem * 1024; /* convert to bytes */ 356 357 boot_address = base_memory & ~0xfff; /* round down to 4k boundary */ 358 if ((base_memory - boot_address) < bootMP_size) 359 boot_address -= 4096; /* not enough, lower by 4k */ 360 361 return boot_address; 362} 363 364 365/* 366 * Look for an Intel MP spec table (ie, SMP capable hardware). 367 */ 368void 369i386_mp_probe(void) 370{ 371 int x; 372 u_long segment; 373 u_int32_t target; 374 375 POSTCODE(MP_PROBE_POST); 376 377 /* see if EBDA exists */ 378 if ((segment = (u_long) * (u_short *) (KERNBASE + 0x40e)) != 0) { 379 /* search first 1K of EBDA */ 380 target = (u_int32_t) (segment << 4); 381 if ((x = search_for_sig(target, 1024 / 4)) >= 0) 382 goto found; 383 } else { 384 /* last 1K of base memory, effective 'top of base' passed in */ 385 target = (u_int32_t) (base_memory - 0x400); 386 if ((x = search_for_sig(target, 1024 / 4)) >= 0) 387 goto found; 388 } 389 390 /* search the BIOS */ 391 target = (u_int32_t) BIOS_BASE; 392 if ((x = search_for_sig(target, BIOS_COUNT)) >= 0) 393 goto found; 394 395 /* nothing found */ 396 mpfps = (mpfps_t)0; 397 mp_capable = 0; 398 return; 399 400found: 401 /* calculate needed resources */ 402 mpfps = (mpfps_t)x; 403 mptable_pass1(); 404 405 /* flag fact that we are running multiple processors */ 406 mp_capable = 1; 407} 408 409int 410cpu_mp_probe(void) 411{ 412 /* 413 * Record BSP in CPU map 414 * This is done here so that MBUF init code works correctly. 415 */ 416 all_cpus = 1; 417 418 return (mp_capable); 419} 420 421/* 422 * Initialize the SMP hardware and the APIC and start up the AP's. 423 */ 424void 425cpu_mp_start(void) 426{ 427 POSTCODE(MP_START_POST); 428 429 /* look for MP capable motherboard */ 430 if (mp_capable) 431 mp_enable(boot_address); 432 else 433 panic("MP hardware not found!"); 434 435 cpu_setregs(); 436} 437 438 439/* 440 * Print various information about the SMP system hardware and setup. 441 */ 442void 443cpu_mp_announce(void) 444{ 445 int x; 446 447 POSTCODE(MP_ANNOUNCE_POST); 448 449 printf(" cpu0 (BSP): apic id: %2d", CPU_TO_ID(0)); 450 printf(", version: 0x%08x", cpu_apic_versions[0]); 451 printf(", at 0x%08x\n", cpu_apic_address); 452 for (x = 1; x <= mp_naps; ++x) { 453 printf(" cpu%d (AP): apic id: %2d", x, CPU_TO_ID(x)); 454 printf(", version: 0x%08x", cpu_apic_versions[x]); 455 printf(", at 0x%08x\n", cpu_apic_address); 456 } 457 458#if defined(APIC_IO) 459 for (x = 0; x < mp_napics; ++x) { 460 printf(" io%d (APIC): apic id: %2d", x, IO_TO_ID(x)); 461 printf(", version: 0x%08x", io_apic_versions[x]); 462 printf(", at 0x%08x\n", io_apic_address[x]); 463 } 464#else 465 printf(" Warning: APIC I/O disabled\n"); 466#endif /* APIC_IO */ 467} 468 469/* 470 * AP cpu's call this to sync up protected mode. 471 */ 472void 473init_secondary(void) 474{ 475 int gsel_tss; 476 int x, myid = bootAP; 477 478 gdt_segs[GPRIV_SEL].ssd_base = (int) &SMP_prvspace[myid]; 479 gdt_segs[GPROC0_SEL].ssd_base = 480 (int) &SMP_prvspace[myid].globaldata.gd_common_tss; 481 SMP_prvspace[myid].globaldata.gd_prvspace = 482 &SMP_prvspace[myid].globaldata; 483 484 for (x = 0; x < NGDT; x++) { 485 ssdtosd(&gdt_segs[x], &gdt[myid * NGDT + x].sd); 486 } 487 488 r_gdt.rd_limit = NGDT * sizeof(gdt[0]) - 1; 489 r_gdt.rd_base = (int) &gdt[myid * NGDT]; 490 lgdt(&r_gdt); /* does magic intra-segment return */ 491 492 lidt(&r_idt); 493 494 lldt(_default_ldt); 495 PCPU_SET(currentldt, _default_ldt); 496 497 gsel_tss = GSEL(GPROC0_SEL, SEL_KPL); 498 gdt[myid * NGDT + GPROC0_SEL].sd.sd_type = SDT_SYS386TSS; 499 PCPU_SET(common_tss.tss_esp0, 0); /* not used until after switch */ 500 PCPU_SET(common_tss.tss_ss0, GSEL(GDATA_SEL, SEL_KPL)); 501 PCPU_SET(common_tss.tss_ioopt, (sizeof (struct i386tss)) << 16); 502 PCPU_SET(tss_gdt, &gdt[myid * NGDT + GPROC0_SEL].sd); 503 PCPU_SET(common_tssd, *PCPU_GET(tss_gdt)); 504 ltr(gsel_tss); 505 506 pmap_set_opt(); 507} 508 509 510#if defined(APIC_IO) 511/* 512 * Final configuration of the BSP's local APIC: 513 * - disable 'pic mode'. 514 * - disable 'virtual wire mode'. 515 * - enable NMI. 516 */ 517void 518bsp_apic_configure(void) 519{ 520 u_char byte; 521 u_int32_t temp; 522 523 /* leave 'pic mode' if necessary */ 524 if (picmode) { 525 outb(0x22, 0x70); /* select IMCR */ 526 byte = inb(0x23); /* current contents */ 527 byte |= 0x01; /* mask external INTR */ 528 outb(0x23, byte); /* disconnect 8259s/NMI */ 529 } 530 531 /* mask lint0 (the 8259 'virtual wire' connection) */ 532 temp = lapic.lvt_lint0; 533 temp |= APIC_LVT_M; /* set the mask */ 534 lapic.lvt_lint0 = temp; 535 536 /* setup lint1 to handle NMI */ 537 temp = lapic.lvt_lint1; 538 temp &= ~APIC_LVT_M; /* clear the mask */ 539 lapic.lvt_lint1 = temp; 540 541 if (bootverbose) 542 apic_dump("bsp_apic_configure()"); 543} 544#endif /* APIC_IO */ 545 546 547/******************************************************************* 548 * local functions and data 549 */ 550 551/* 552 * start the SMP system 553 */ 554static void 555mp_enable(u_int boot_addr) 556{ 557 int x; 558#if defined(APIC_IO) 559 int apic; 560 u_int ux; 561#endif /* APIC_IO */ 562 563 POSTCODE(MP_ENABLE_POST); 564 565 /* turn on 4MB of V == P addressing so we can get to MP table */ 566 *(int *)PTD = PG_V | PG_RW | ((uintptr_t)(void *)KPTphys & PG_FRAME); 567 invltlb(); 568 569 /* examine the MP table for needed info, uses physical addresses */ 570 x = mptable_pass2(); 571 572 *(int *)PTD = 0; 573 invltlb(); 574 575 /* can't process default configs till the CPU APIC is pmapped */ 576 if (x) 577 default_mp_table(x); 578 579 /* post scan cleanup */ 580 fix_mp_table(); 581 setup_apic_irq_mapping(); 582 583#if defined(APIC_IO) 584 585 /* fill the LOGICAL io_apic_versions table */ 586 for (apic = 0; apic < mp_napics; ++apic) { 587 ux = io_apic_read(apic, IOAPIC_VER); 588 io_apic_versions[apic] = ux; 589 io_apic_set_id(apic, IO_TO_ID(apic)); 590 } 591 592 /* program each IO APIC in the system */ 593 for (apic = 0; apic < mp_napics; ++apic) 594 if (io_apic_setup(apic) < 0) 595 panic("IO APIC setup failure"); 596 597 /* install a 'Spurious INTerrupt' vector */ 598 setidt(XSPURIOUSINT_OFFSET, Xspuriousint, 599 SDT_SYS386IGT, SEL_KPL, GSEL(GCODE_SEL, SEL_KPL)); 600 601 /* install an inter-CPU IPI for TLB invalidation */ 602 setidt(XINVLTLB_OFFSET, Xinvltlb, 603 SDT_SYS386IGT, SEL_KPL, GSEL(GCODE_SEL, SEL_KPL)); 604 605 /* install an inter-CPU IPI for forwarding hardclock() */ 606 setidt(XHARDCLOCK_OFFSET, Xhardclock, 607 SDT_SYS386IGT, SEL_KPL, GSEL(GCODE_SEL, SEL_KPL)); 608 609 /* install an inter-CPU IPI for forwarding statclock() */ 610 setidt(XSTATCLOCK_OFFSET, Xstatclock, 611 SDT_SYS386IGT, SEL_KPL, GSEL(GCODE_SEL, SEL_KPL)); 612 613 /* install an inter-CPU IPI for all-CPU rendezvous */ 614 setidt(XRENDEZVOUS_OFFSET, Xrendezvous, 615 SDT_SYS386IGT, SEL_KPL, GSEL(GCODE_SEL, SEL_KPL)); 616 617 /* install an inter-CPU IPI for forcing an additional software trap */ 618 setidt(XCPUAST_OFFSET, Xcpuast, 619 SDT_SYS386IGT, SEL_KPL, GSEL(GCODE_SEL, SEL_KPL)); 620 621 /* install an inter-CPU IPI for CPU stop/restart */ 622 setidt(XCPUSTOP_OFFSET, Xcpustop, 623 SDT_SYS386IGT, SEL_KPL, GSEL(GCODE_SEL, SEL_KPL)); 624 625#if defined(TEST_TEST1) 626 /* install a "fake hardware INTerrupt" vector */ 627 setidt(XTEST1_OFFSET, Xtest1, 628 SDT_SYS386IGT, SEL_KPL, GSEL(GCODE_SEL, SEL_KPL)); 629#endif /** TEST_TEST1 */ 630 631#endif /* APIC_IO */ 632 633 /* initialize all SMP locks */ 634 init_locks(); 635 636 /* start each Application Processor */ 637 start_all_aps(boot_addr); 638} 639 640 641/* 642 * look for the MP spec signature 643 */ 644 645/* string defined by the Intel MP Spec as identifying the MP table */ 646#define MP_SIG 0x5f504d5f /* _MP_ */ 647#define NEXT(X) ((X) += 4) 648static int 649search_for_sig(u_int32_t target, int count) 650{ 651 int x; 652 u_int32_t *addr = (u_int32_t *) (KERNBASE + target); 653 654 for (x = 0; x < count; NEXT(x)) 655 if (addr[x] == MP_SIG) 656 /* make array index a byte index */ 657 return (target + (x * sizeof(u_int32_t))); 658 659 return -1; 660} 661 662 663static basetable_entry basetable_entry_types[] = 664{ 665 {0, 20, "Processor"}, 666 {1, 8, "Bus"}, 667 {2, 8, "I/O APIC"}, 668 {3, 8, "I/O INT"}, 669 {4, 8, "Local INT"} 670}; 671 672typedef struct BUSDATA { 673 u_char bus_id; 674 enum busTypes bus_type; 675} bus_datum; 676 677typedef struct INTDATA { 678 u_char int_type; 679 u_short int_flags; 680 u_char src_bus_id; 681 u_char src_bus_irq; 682 u_char dst_apic_id; 683 u_char dst_apic_int; 684 u_char int_vector; 685} io_int, local_int; 686 687typedef struct BUSTYPENAME { 688 u_char type; 689 char name[7]; 690} bus_type_name; 691 692static bus_type_name bus_type_table[] = 693{ 694 {CBUS, "CBUS"}, 695 {CBUSII, "CBUSII"}, 696 {EISA, "EISA"}, 697 {MCA, "MCA"}, 698 {UNKNOWN_BUSTYPE, "---"}, 699 {ISA, "ISA"}, 700 {MCA, "MCA"}, 701 {UNKNOWN_BUSTYPE, "---"}, 702 {UNKNOWN_BUSTYPE, "---"}, 703 {UNKNOWN_BUSTYPE, "---"}, 704 {UNKNOWN_BUSTYPE, "---"}, 705 {UNKNOWN_BUSTYPE, "---"}, 706 {PCI, "PCI"}, 707 {UNKNOWN_BUSTYPE, "---"}, 708 {UNKNOWN_BUSTYPE, "---"}, 709 {UNKNOWN_BUSTYPE, "---"}, 710 {UNKNOWN_BUSTYPE, "---"}, 711 {XPRESS, "XPRESS"}, 712 {UNKNOWN_BUSTYPE, "---"} 713}; 714/* from MP spec v1.4, table 5-1 */ 715static int default_data[7][5] = 716{ 717/* nbus, id0, type0, id1, type1 */ 718 {1, 0, ISA, 255, 255}, 719 {1, 0, EISA, 255, 255}, 720 {1, 0, EISA, 255, 255}, 721 {1, 0, MCA, 255, 255}, 722 {2, 0, ISA, 1, PCI}, 723 {2, 0, EISA, 1, PCI}, 724 {2, 0, MCA, 1, PCI} 725}; 726 727 728/* the bus data */ 729static bus_datum *bus_data; 730 731/* the IO INT data, one entry per possible APIC INTerrupt */ 732static io_int *io_apic_ints; 733 734static int nintrs; 735 736static int processor_entry __P((proc_entry_ptr entry, int cpu)); 737static int bus_entry __P((bus_entry_ptr entry, int bus)); 738static int io_apic_entry __P((io_apic_entry_ptr entry, int apic)); 739static int int_entry __P((int_entry_ptr entry, int intr)); 740static int lookup_bus_type __P((char *name)); 741 742 743/* 744 * 1st pass on motherboard's Intel MP specification table. 745 * 746 * initializes: 747 * mp_ncpus = 1 748 * 749 * determines: 750 * cpu_apic_address (common to all CPUs) 751 * io_apic_address[N] 752 * mp_naps 753 * mp_nbusses 754 * mp_napics 755 * nintrs 756 */ 757static void 758mptable_pass1(void) 759{ 760 int x; 761 mpcth_t cth; 762 int totalSize; 763 void* position; 764 int count; 765 int type; 766 767 POSTCODE(MPTABLE_PASS1_POST); 768 769 /* clear various tables */ 770 for (x = 0; x < NAPICID; ++x) { 771 io_apic_address[x] = ~0; /* IO APIC address table */ 772 } 773 774 /* init everything to empty */ 775 mp_naps = 0; 776 mp_nbusses = 0; 777 mp_napics = 0; 778 nintrs = 0; 779 780 /* check for use of 'default' configuration */ 781 if (MPFPS_MPFB1 != 0) { 782 /* use default addresses */ 783 cpu_apic_address = DEFAULT_APIC_BASE; 784 io_apic_address[0] = DEFAULT_IO_APIC_BASE; 785 786 /* fill in with defaults */ 787 mp_naps = 2; /* includes BSP */ 788 mp_nbusses = default_data[MPFPS_MPFB1 - 1][0]; 789#if defined(APIC_IO) 790 mp_napics = 1; 791 nintrs = 16; 792#endif /* APIC_IO */ 793 } 794 else { 795 if ((cth = mpfps->pap) == 0) 796 panic("MP Configuration Table Header MISSING!"); 797 798 cpu_apic_address = (vm_offset_t) cth->apic_address; 799 800 /* walk the table, recording info of interest */ 801 totalSize = cth->base_table_length - sizeof(struct MPCTH); 802 position = (u_char *) cth + sizeof(struct MPCTH); 803 count = cth->entry_count; 804 805 while (count--) { 806 switch (type = *(u_char *) position) { 807 case 0: /* processor_entry */ 808 if (((proc_entry_ptr)position)->cpu_flags 809 & PROCENTRY_FLAG_EN) 810 ++mp_naps; 811 break; 812 case 1: /* bus_entry */ 813 ++mp_nbusses; 814 break; 815 case 2: /* io_apic_entry */ 816 if (((io_apic_entry_ptr)position)->apic_flags 817 & IOAPICENTRY_FLAG_EN) 818 io_apic_address[mp_napics++] = 819 (vm_offset_t)((io_apic_entry_ptr) 820 position)->apic_address; 821 break; 822 case 3: /* int_entry */ 823 ++nintrs; 824 break; 825 case 4: /* int_entry */ 826 break; 827 default: 828 panic("mpfps Base Table HOSED!"); 829 /* NOTREACHED */ 830 } 831 832 totalSize -= basetable_entry_types[type].length; 833 (u_char*)position += basetable_entry_types[type].length; 834 } 835 } 836 837 /* qualify the numbers */ 838 if (mp_naps > MAXCPU) { 839 printf("Warning: only using %d of %d available CPUs!\n", 840 MAXCPU, mp_naps); 841 mp_naps = MAXCPU; 842 } 843 844 /* 845 * Count the BSP. 846 * This is also used as a counter while starting the APs. 847 */ 848 mp_ncpus = 1; 849 850 --mp_naps; /* subtract the BSP */ 851} 852 853 854/* 855 * 2nd pass on motherboard's Intel MP specification table. 856 * 857 * sets: 858 * boot_cpu_id 859 * ID_TO_IO(N), phy APIC ID to log CPU/IO table 860 * CPU_TO_ID(N), logical CPU to APIC ID table 861 * IO_TO_ID(N), logical IO to APIC ID table 862 * bus_data[N] 863 * io_apic_ints[N] 864 */ 865static int 866mptable_pass2(void) 867{ 868 int x; 869 mpcth_t cth; 870 int totalSize; 871 void* position; 872 int count; 873 int type; 874 int apic, bus, cpu, intr; 875 int i, j; 876 int pgeflag; 877 878 POSTCODE(MPTABLE_PASS2_POST); 879 880 pgeflag = 0; /* XXX - Not used under SMP yet. */ 881 882 MALLOC(io_apic_versions, u_int32_t *, sizeof(u_int32_t) * mp_napics, 883 M_DEVBUF, M_WAITOK); 884 MALLOC(ioapic, volatile ioapic_t **, sizeof(ioapic_t *) * mp_napics, 885 M_DEVBUF, M_WAITOK); 886 MALLOC(io_apic_ints, io_int *, sizeof(io_int) * (nintrs + 1), 887 M_DEVBUF, M_WAITOK); 888 MALLOC(bus_data, bus_datum *, sizeof(bus_datum) * mp_nbusses, 889 M_DEVBUF, M_WAITOK); 890 891 bzero(ioapic, sizeof(ioapic_t *) * mp_napics); 892 893 for (i = 0; i < mp_napics; i++) { 894 for (j = 0; j < mp_napics; j++) { 895 /* same page frame as a previous IO apic? */ 896 if (((vm_offset_t)SMPpt[NPTEPG-2-j] & PG_FRAME) == 897 (io_apic_address[i] & PG_FRAME)) { 898 ioapic[i] = (ioapic_t *)((u_int)SMP_prvspace 899 + (NPTEPG-2-j) * PAGE_SIZE 900 + (io_apic_address[i] & PAGE_MASK)); 901 break; 902 } 903 /* use this slot if available */ 904 if (((vm_offset_t)SMPpt[NPTEPG-2-j] & PG_FRAME) == 0) { 905 SMPpt[NPTEPG-2-j] = (pt_entry_t)(PG_V | PG_RW | 906 pgeflag | (io_apic_address[i] & PG_FRAME)); 907 ioapic[i] = (ioapic_t *)((u_int)SMP_prvspace 908 + (NPTEPG-2-j) * PAGE_SIZE 909 + (io_apic_address[i] & PAGE_MASK)); 910 break; 911 } 912 } 913 } 914 915 /* clear various tables */ 916 for (x = 0; x < NAPICID; ++x) { 917 ID_TO_IO(x) = -1; /* phy APIC ID to log CPU/IO table */ 918 CPU_TO_ID(x) = -1; /* logical CPU to APIC ID table */ 919 IO_TO_ID(x) = -1; /* logical IO to APIC ID table */ 920 } 921 922 /* clear bus data table */ 923 for (x = 0; x < mp_nbusses; ++x) 924 bus_data[x].bus_id = 0xff; 925 926 /* clear IO APIC INT table */ 927 for (x = 0; x < (nintrs + 1); ++x) { 928 io_apic_ints[x].int_type = 0xff; 929 io_apic_ints[x].int_vector = 0xff; 930 } 931 932 /* setup the cpu/apic mapping arrays */ 933 boot_cpu_id = -1; 934 935 /* record whether PIC or virtual-wire mode */ 936 picmode = (mpfps->mpfb2 & 0x80) ? 1 : 0; 937 938 /* check for use of 'default' configuration */ 939 if (MPFPS_MPFB1 != 0) 940 return MPFPS_MPFB1; /* return default configuration type */ 941 942 if ((cth = mpfps->pap) == 0) 943 panic("MP Configuration Table Header MISSING!"); 944 945 /* walk the table, recording info of interest */ 946 totalSize = cth->base_table_length - sizeof(struct MPCTH); 947 position = (u_char *) cth + sizeof(struct MPCTH); 948 count = cth->entry_count; 949 apic = bus = intr = 0; 950 cpu = 1; /* pre-count the BSP */ 951 952 while (count--) { 953 switch (type = *(u_char *) position) { 954 case 0: 955 if (processor_entry(position, cpu)) 956 ++cpu; 957 break; 958 case 1: 959 if (bus_entry(position, bus)) 960 ++bus; 961 break; 962 case 2: 963 if (io_apic_entry(position, apic)) 964 ++apic; 965 break; 966 case 3: 967 if (int_entry(position, intr)) 968 ++intr; 969 break; 970 case 4: 971 /* int_entry(position); */ 972 break; 973 default: 974 panic("mpfps Base Table HOSED!"); 975 /* NOTREACHED */ 976 } 977 978 totalSize -= basetable_entry_types[type].length; 979 (u_char *) position += basetable_entry_types[type].length; 980 } 981 982 if (boot_cpu_id == -1) 983 panic("NO BSP found!"); 984 985 /* report fact that its NOT a default configuration */ 986 return 0; 987} 988 989 990void 991assign_apic_irq(int apic, int intpin, int irq) 992{ 993 int x; 994 995 if (int_to_apicintpin[irq].ioapic != -1) 996 panic("assign_apic_irq: inconsistent table"); 997 998 int_to_apicintpin[irq].ioapic = apic; 999 int_to_apicintpin[irq].int_pin = intpin; 1000 int_to_apicintpin[irq].apic_address = ioapic[apic]; 1001 int_to_apicintpin[irq].redirindex = IOAPIC_REDTBL + 2 * intpin; 1002 1003 for (x = 0; x < nintrs; x++) { 1004 if ((io_apic_ints[x].int_type == 0 || 1005 io_apic_ints[x].int_type == 3) && 1006 io_apic_ints[x].int_vector == 0xff && 1007 io_apic_ints[x].dst_apic_id == IO_TO_ID(apic) && 1008 io_apic_ints[x].dst_apic_int == intpin) 1009 io_apic_ints[x].int_vector = irq; 1010 } 1011} 1012 1013void 1014revoke_apic_irq(int irq) 1015{ 1016 int x; 1017 int oldapic; 1018 int oldintpin; 1019 1020 if (int_to_apicintpin[irq].ioapic == -1) 1021 panic("assign_apic_irq: inconsistent table"); 1022 1023 oldapic = int_to_apicintpin[irq].ioapic; 1024 oldintpin = int_to_apicintpin[irq].int_pin; 1025 1026 int_to_apicintpin[irq].ioapic = -1; 1027 int_to_apicintpin[irq].int_pin = 0; 1028 int_to_apicintpin[irq].apic_address = NULL; 1029 int_to_apicintpin[irq].redirindex = 0; 1030 1031 for (x = 0; x < nintrs; x++) { 1032 if ((io_apic_ints[x].int_type == 0 || 1033 io_apic_ints[x].int_type == 3) && 1034 io_apic_ints[x].int_vector == 0xff && 1035 io_apic_ints[x].dst_apic_id == IO_TO_ID(oldapic) && 1036 io_apic_ints[x].dst_apic_int == oldintpin) 1037 io_apic_ints[x].int_vector = 0xff; 1038 } 1039} 1040 1041 1042static void 1043allocate_apic_irq(int intr) 1044{ 1045 int apic; 1046 int intpin; 1047 int irq; 1048 1049 if (io_apic_ints[intr].int_vector != 0xff) 1050 return; /* Interrupt handler already assigned */ 1051 1052 if (io_apic_ints[intr].int_type != 0 && 1053 (io_apic_ints[intr].int_type != 3 || 1054 (io_apic_ints[intr].dst_apic_id == IO_TO_ID(0) && 1055 io_apic_ints[intr].dst_apic_int == 0))) 1056 return; /* Not INT or ExtInt on != (0, 0) */ 1057 1058 irq = 0; 1059 while (irq < APIC_INTMAPSIZE && 1060 int_to_apicintpin[irq].ioapic != -1) 1061 irq++; 1062 1063 if (irq >= APIC_INTMAPSIZE) 1064 return; /* No free interrupt handlers */ 1065 1066 apic = ID_TO_IO(io_apic_ints[intr].dst_apic_id); 1067 intpin = io_apic_ints[intr].dst_apic_int; 1068 1069 assign_apic_irq(apic, intpin, irq); 1070 io_apic_setup_intpin(apic, intpin); 1071} 1072 1073 1074static void 1075swap_apic_id(int apic, int oldid, int newid) 1076{ 1077 int x; 1078 int oapic; 1079 1080 1081 if (oldid == newid) 1082 return; /* Nothing to do */ 1083 1084 printf("Changing APIC ID for IO APIC #%d from %d to %d in MP table\n", 1085 apic, oldid, newid); 1086 1087 /* Swap physical APIC IDs in interrupt entries */ 1088 for (x = 0; x < nintrs; x++) { 1089 if (io_apic_ints[x].dst_apic_id == oldid) 1090 io_apic_ints[x].dst_apic_id = newid; 1091 else if (io_apic_ints[x].dst_apic_id == newid) 1092 io_apic_ints[x].dst_apic_id = oldid; 1093 } 1094 1095 /* Swap physical APIC IDs in IO_TO_ID mappings */ 1096 for (oapic = 0; oapic < mp_napics; oapic++) 1097 if (IO_TO_ID(oapic) == newid) 1098 break; 1099 1100 if (oapic < mp_napics) { 1101 printf("Changing APIC ID for IO APIC #%d from " 1102 "%d to %d in MP table\n", 1103 oapic, newid, oldid); 1104 IO_TO_ID(oapic) = oldid; 1105 } 1106 IO_TO_ID(apic) = newid; 1107} 1108 1109 1110static void 1111fix_id_to_io_mapping(void) 1112{ 1113 int x; 1114 1115 for (x = 0; x < NAPICID; x++) 1116 ID_TO_IO(x) = -1; 1117 1118 for (x = 0; x <= mp_naps; x++) 1119 if (CPU_TO_ID(x) < NAPICID) 1120 ID_TO_IO(CPU_TO_ID(x)) = x; 1121 1122 for (x = 0; x < mp_napics; x++) 1123 if (IO_TO_ID(x) < NAPICID) 1124 ID_TO_IO(IO_TO_ID(x)) = x; 1125} 1126 1127 1128static int 1129first_free_apic_id(void) 1130{ 1131 int freeid, x; 1132 1133 for (freeid = 0; freeid < NAPICID; freeid++) { 1134 for (x = 0; x <= mp_naps; x++) 1135 if (CPU_TO_ID(x) == freeid) 1136 break; 1137 if (x <= mp_naps) 1138 continue; 1139 for (x = 0; x < mp_napics; x++) 1140 if (IO_TO_ID(x) == freeid) 1141 break; 1142 if (x < mp_napics) 1143 continue; 1144 return freeid; 1145 } 1146 return freeid; 1147} 1148 1149 1150static int 1151io_apic_id_acceptable(int apic, int id) 1152{ 1153 int cpu; /* Logical CPU number */ 1154 int oapic; /* Logical IO APIC number for other IO APIC */ 1155 1156 if (id >= NAPICID) 1157 return 0; /* Out of range */ 1158 1159 for (cpu = 0; cpu <= mp_naps; cpu++) 1160 if (CPU_TO_ID(cpu) == id) 1161 return 0; /* Conflict with CPU */ 1162 1163 for (oapic = 0; oapic < mp_napics && oapic < apic; oapic++) 1164 if (IO_TO_ID(oapic) == id) 1165 return 0; /* Conflict with other APIC */ 1166 1167 return 1; /* ID is acceptable for IO APIC */ 1168} 1169 1170 1171/* 1172 * parse an Intel MP specification table 1173 */ 1174static void 1175fix_mp_table(void) 1176{ 1177 int x; 1178 int id; 1179 int bus_0 = 0; /* Stop GCC warning */ 1180 int bus_pci = 0; /* Stop GCC warning */ 1181 int num_pci_bus; 1182 int apic; /* IO APIC unit number */ 1183 int freeid; /* Free physical APIC ID */ 1184 int physid; /* Current physical IO APIC ID */ 1185 1186 /* 1187 * Fix mis-numbering of the PCI bus and its INT entries if the BIOS 1188 * did it wrong. The MP spec says that when more than 1 PCI bus 1189 * exists the BIOS must begin with bus entries for the PCI bus and use 1190 * actual PCI bus numbering. This implies that when only 1 PCI bus 1191 * exists the BIOS can choose to ignore this ordering, and indeed many 1192 * MP motherboards do ignore it. This causes a problem when the PCI 1193 * sub-system makes requests of the MP sub-system based on PCI bus 1194 * numbers. So here we look for the situation and renumber the 1195 * busses and associated INTs in an effort to "make it right". 1196 */ 1197 1198 /* find bus 0, PCI bus, count the number of PCI busses */ 1199 for (num_pci_bus = 0, x = 0; x < mp_nbusses; ++x) { 1200 if (bus_data[x].bus_id == 0) { 1201 bus_0 = x; 1202 } 1203 if (bus_data[x].bus_type == PCI) { 1204 ++num_pci_bus; 1205 bus_pci = x; 1206 } 1207 } 1208 /* 1209 * bus_0 == slot of bus with ID of 0 1210 * bus_pci == slot of last PCI bus encountered 1211 */ 1212 1213 /* check the 1 PCI bus case for sanity */ 1214 /* if it is number 0 all is well */ 1215 if (num_pci_bus == 1 && 1216 bus_data[bus_pci].bus_id != 0) { 1217 1218 /* mis-numbered, swap with whichever bus uses slot 0 */ 1219 1220 /* swap the bus entry types */ 1221 bus_data[bus_pci].bus_type = bus_data[bus_0].bus_type; 1222 bus_data[bus_0].bus_type = PCI; 1223 1224 /* swap each relavant INTerrupt entry */ 1225 id = bus_data[bus_pci].bus_id; 1226 for (x = 0; x < nintrs; ++x) { 1227 if (io_apic_ints[x].src_bus_id == id) { 1228 io_apic_ints[x].src_bus_id = 0; 1229 } 1230 else if (io_apic_ints[x].src_bus_id == 0) { 1231 io_apic_ints[x].src_bus_id = id; 1232 } 1233 } 1234 } 1235 1236 /* Assign IO APIC IDs. 1237 * 1238 * First try the existing ID. If a conflict is detected, try 1239 * the ID in the MP table. If a conflict is still detected, find 1240 * a free id. 1241 * 1242 * We cannot use the ID_TO_IO table before all conflicts has been 1243 * resolved and the table has been corrected. 1244 */ 1245 for (apic = 0; apic < mp_napics; ++apic) { /* For all IO APICs */ 1246 1247 /* First try to use the value set by the BIOS */ 1248 physid = io_apic_get_id(apic); 1249 if (io_apic_id_acceptable(apic, physid)) { 1250 if (IO_TO_ID(apic) != physid) 1251 swap_apic_id(apic, IO_TO_ID(apic), physid); 1252 continue; 1253 } 1254 1255 /* Then check if the value in the MP table is acceptable */ 1256 if (io_apic_id_acceptable(apic, IO_TO_ID(apic))) 1257 continue; 1258 1259 /* Last resort, find a free APIC ID and use it */ 1260 freeid = first_free_apic_id(); 1261 if (freeid >= NAPICID) 1262 panic("No free physical APIC IDs found"); 1263 1264 if (io_apic_id_acceptable(apic, freeid)) { 1265 swap_apic_id(apic, IO_TO_ID(apic), freeid); 1266 continue; 1267 } 1268 panic("Free physical APIC ID not usable"); 1269 } 1270 fix_id_to_io_mapping(); 1271 1272 /* detect and fix broken Compaq MP table */ 1273 if (apic_int_type(0, 0) == -1) { 1274 printf("APIC_IO: MP table broken: 8259->APIC entry missing!\n"); 1275 io_apic_ints[nintrs].int_type = 3; /* ExtInt */ 1276 io_apic_ints[nintrs].int_vector = 0xff; /* Unassigned */ 1277 /* XXX fixme, set src bus id etc, but it doesn't seem to hurt */ 1278 io_apic_ints[nintrs].dst_apic_id = IO_TO_ID(0); 1279 io_apic_ints[nintrs].dst_apic_int = 0; /* Pin 0 */ 1280 nintrs++; 1281 } 1282} 1283 1284 1285/* Assign low level interrupt handlers */ 1286static void 1287setup_apic_irq_mapping(void) 1288{ 1289 int x; 1290 int int_vector; 1291 1292 /* Clear array */ 1293 for (x = 0; x < APIC_INTMAPSIZE; x++) { 1294 int_to_apicintpin[x].ioapic = -1; 1295 int_to_apicintpin[x].int_pin = 0; 1296 int_to_apicintpin[x].apic_address = NULL; 1297 int_to_apicintpin[x].redirindex = 0; 1298 } 1299 1300 /* First assign ISA/EISA interrupts */ 1301 for (x = 0; x < nintrs; x++) { 1302 int_vector = io_apic_ints[x].src_bus_irq; 1303 if (int_vector < APIC_INTMAPSIZE && 1304 io_apic_ints[x].int_vector == 0xff && 1305 int_to_apicintpin[int_vector].ioapic == -1 && 1306 (apic_int_is_bus_type(x, ISA) || 1307 apic_int_is_bus_type(x, EISA)) && 1308 io_apic_ints[x].int_type == 0) { 1309 assign_apic_irq(ID_TO_IO(io_apic_ints[x].dst_apic_id), 1310 io_apic_ints[x].dst_apic_int, 1311 int_vector); 1312 } 1313 } 1314 1315 /* Assign ExtInt entry if no ISA/EISA interrupt 0 entry */ 1316 for (x = 0; x < nintrs; x++) { 1317 if (io_apic_ints[x].dst_apic_int == 0 && 1318 io_apic_ints[x].dst_apic_id == IO_TO_ID(0) && 1319 io_apic_ints[x].int_vector == 0xff && 1320 int_to_apicintpin[0].ioapic == -1 && 1321 io_apic_ints[x].int_type == 3) { 1322 assign_apic_irq(0, 0, 0); 1323 break; 1324 } 1325 } 1326 /* PCI interrupt assignment is deferred */ 1327} 1328 1329 1330static int 1331processor_entry(proc_entry_ptr entry, int cpu) 1332{ 1333 /* check for usability */ 1334 if (!(entry->cpu_flags & PROCENTRY_FLAG_EN)) 1335 return 0; 1336 1337 if(entry->apic_id >= NAPICID) 1338 panic("CPU APIC ID out of range (0..%d)", NAPICID - 1); 1339 /* check for BSP flag */ 1340 if (entry->cpu_flags & PROCENTRY_FLAG_BP) { 1341 boot_cpu_id = entry->apic_id; 1342 CPU_TO_ID(0) = entry->apic_id; 1343 ID_TO_CPU(entry->apic_id) = 0; 1344 return 0; /* its already been counted */ 1345 } 1346 1347 /* add another AP to list, if less than max number of CPUs */ 1348 else if (cpu < MAXCPU) { 1349 CPU_TO_ID(cpu) = entry->apic_id; 1350 ID_TO_CPU(entry->apic_id) = cpu; 1351 return 1; 1352 } 1353 1354 return 0; 1355} 1356 1357 1358static int 1359bus_entry(bus_entry_ptr entry, int bus) 1360{ 1361 int x; 1362 char c, name[8]; 1363 1364 /* encode the name into an index */ 1365 for (x = 0; x < 6; ++x) { 1366 if ((c = entry->bus_type[x]) == ' ') 1367 break; 1368 name[x] = c; 1369 } 1370 name[x] = '\0'; 1371 1372 if ((x = lookup_bus_type(name)) == UNKNOWN_BUSTYPE) 1373 panic("unknown bus type: '%s'", name); 1374 1375 bus_data[bus].bus_id = entry->bus_id; 1376 bus_data[bus].bus_type = x; 1377 1378 return 1; 1379} 1380 1381 1382static int 1383io_apic_entry(io_apic_entry_ptr entry, int apic) 1384{ 1385 if (!(entry->apic_flags & IOAPICENTRY_FLAG_EN)) 1386 return 0; 1387 1388 IO_TO_ID(apic) = entry->apic_id; 1389 if (entry->apic_id < NAPICID) 1390 ID_TO_IO(entry->apic_id) = apic; 1391 1392 return 1; 1393} 1394 1395 1396static int 1397lookup_bus_type(char *name) 1398{ 1399 int x; 1400 1401 for (x = 0; x < MAX_BUSTYPE; ++x) 1402 if (strcmp(bus_type_table[x].name, name) == 0) 1403 return bus_type_table[x].type; 1404 1405 return UNKNOWN_BUSTYPE; 1406} 1407 1408 1409static int 1410int_entry(int_entry_ptr entry, int intr) 1411{ 1412 int apic; 1413 1414 io_apic_ints[intr].int_type = entry->int_type; 1415 io_apic_ints[intr].int_flags = entry->int_flags; 1416 io_apic_ints[intr].src_bus_id = entry->src_bus_id; 1417 io_apic_ints[intr].src_bus_irq = entry->src_bus_irq; 1418 if (entry->dst_apic_id == 255) { 1419 /* This signal goes to all IO APICS. Select an IO APIC 1420 with sufficient number of interrupt pins */ 1421 for (apic = 0; apic < mp_napics; apic++) 1422 if (((io_apic_read(apic, IOAPIC_VER) & 1423 IOART_VER_MAXREDIR) >> MAXREDIRSHIFT) >= 1424 entry->dst_apic_int) 1425 break; 1426 if (apic < mp_napics) 1427 io_apic_ints[intr].dst_apic_id = IO_TO_ID(apic); 1428 else 1429 io_apic_ints[intr].dst_apic_id = entry->dst_apic_id; 1430 } else 1431 io_apic_ints[intr].dst_apic_id = entry->dst_apic_id; 1432 io_apic_ints[intr].dst_apic_int = entry->dst_apic_int; 1433 1434 return 1; 1435} 1436 1437 1438static int 1439apic_int_is_bus_type(int intr, int bus_type) 1440{ 1441 int bus; 1442 1443 for (bus = 0; bus < mp_nbusses; ++bus) 1444 if ((bus_data[bus].bus_id == io_apic_ints[intr].src_bus_id) 1445 && ((int) bus_data[bus].bus_type == bus_type)) 1446 return 1; 1447 1448 return 0; 1449} 1450 1451 1452/* 1453 * Given a traditional ISA INT mask, return an APIC mask. 1454 */ 1455u_int 1456isa_apic_mask(u_int isa_mask) 1457{ 1458 int isa_irq; 1459 int apic_pin; 1460 1461#if defined(SKIP_IRQ15_REDIRECT) 1462 if (isa_mask == (1 << 15)) { 1463 printf("skipping ISA IRQ15 redirect\n"); 1464 return isa_mask; 1465 } 1466#endif /* SKIP_IRQ15_REDIRECT */ 1467 1468 isa_irq = ffs(isa_mask); /* find its bit position */ 1469 if (isa_irq == 0) /* doesn't exist */ 1470 return 0; 1471 --isa_irq; /* make it zero based */ 1472 1473 apic_pin = isa_apic_irq(isa_irq); /* look for APIC connection */ 1474 if (apic_pin == -1) 1475 return 0; 1476 1477 return (1 << apic_pin); /* convert pin# to a mask */ 1478} 1479 1480 1481/* 1482 * Determine which APIC pin an ISA/EISA INT is attached to. 1483 */ 1484#define INTTYPE(I) (io_apic_ints[(I)].int_type) 1485#define INTPIN(I) (io_apic_ints[(I)].dst_apic_int) 1486#define INTIRQ(I) (io_apic_ints[(I)].int_vector) 1487#define INTAPIC(I) (ID_TO_IO(io_apic_ints[(I)].dst_apic_id)) 1488 1489#define SRCBUSIRQ(I) (io_apic_ints[(I)].src_bus_irq) 1490int 1491isa_apic_irq(int isa_irq) 1492{ 1493 int intr; 1494 1495 for (intr = 0; intr < nintrs; ++intr) { /* check each record */ 1496 if (INTTYPE(intr) == 0) { /* standard INT */ 1497 if (SRCBUSIRQ(intr) == isa_irq) { 1498 if (apic_int_is_bus_type(intr, ISA) || 1499 apic_int_is_bus_type(intr, EISA)) { 1500 if (INTIRQ(intr) == 0xff) 1501 return -1; /* unassigned */ 1502 return INTIRQ(intr); /* found */ 1503 } 1504 } 1505 } 1506 } 1507 return -1; /* NOT found */ 1508} 1509 1510 1511/* 1512 * Determine which APIC pin a PCI INT is attached to. 1513 */ 1514#define SRCBUSID(I) (io_apic_ints[(I)].src_bus_id) 1515#define SRCBUSDEVICE(I) ((io_apic_ints[(I)].src_bus_irq >> 2) & 0x1f) 1516#define SRCBUSLINE(I) (io_apic_ints[(I)].src_bus_irq & 0x03) 1517int 1518pci_apic_irq(int pciBus, int pciDevice, int pciInt) 1519{ 1520 int intr; 1521 1522 --pciInt; /* zero based */ 1523 1524 for (intr = 0; intr < nintrs; ++intr) /* check each record */ 1525 if ((INTTYPE(intr) == 0) /* standard INT */ 1526 && (SRCBUSID(intr) == pciBus) 1527 && (SRCBUSDEVICE(intr) == pciDevice) 1528 && (SRCBUSLINE(intr) == pciInt)) /* a candidate IRQ */ 1529 if (apic_int_is_bus_type(intr, PCI)) { 1530 if (INTIRQ(intr) == 0xff) 1531 allocate_apic_irq(intr); 1532 if (INTIRQ(intr) == 0xff) 1533 return -1; /* unassigned */ 1534 return INTIRQ(intr); /* exact match */ 1535 } 1536 1537 return -1; /* NOT found */ 1538} 1539 1540int 1541next_apic_irq(int irq) 1542{ 1543 int intr, ointr; 1544 int bus, bustype; 1545 1546 bus = 0; 1547 bustype = 0; 1548 for (intr = 0; intr < nintrs; intr++) { 1549 if (INTIRQ(intr) != irq || INTTYPE(intr) != 0) 1550 continue; 1551 bus = SRCBUSID(intr); 1552 bustype = apic_bus_type(bus); 1553 if (bustype != ISA && 1554 bustype != EISA && 1555 bustype != PCI) 1556 continue; 1557 break; 1558 } 1559 if (intr >= nintrs) { 1560 return -1; 1561 } 1562 for (ointr = intr + 1; ointr < nintrs; ointr++) { 1563 if (INTTYPE(ointr) != 0) 1564 continue; 1565 if (bus != SRCBUSID(ointr)) 1566 continue; 1567 if (bustype == PCI) { 1568 if (SRCBUSDEVICE(intr) != SRCBUSDEVICE(ointr)) 1569 continue; 1570 if (SRCBUSLINE(intr) != SRCBUSLINE(ointr)) 1571 continue; 1572 } 1573 if (bustype == ISA || bustype == EISA) { 1574 if (SRCBUSIRQ(intr) != SRCBUSIRQ(ointr)) 1575 continue; 1576 } 1577 if (INTPIN(intr) == INTPIN(ointr)) 1578 continue; 1579 break; 1580 } 1581 if (ointr >= nintrs) { 1582 return -1; 1583 } 1584 return INTIRQ(ointr); 1585} 1586#undef SRCBUSLINE 1587#undef SRCBUSDEVICE 1588#undef SRCBUSID 1589#undef SRCBUSIRQ 1590 1591#undef INTPIN 1592#undef INTIRQ 1593#undef INTAPIC 1594#undef INTTYPE 1595 1596 1597/* 1598 * Reprogram the MB chipset to NOT redirect an ISA INTerrupt. 1599 * 1600 * XXX FIXME: 1601 * Exactly what this means is unclear at this point. It is a solution 1602 * for motherboards that redirect the MBIRQ0 pin. Generically a motherboard 1603 * could route any of the ISA INTs to upper (>15) IRQ values. But most would 1604 * NOT be redirected via MBIRQ0, thus "undirect()ing" them would NOT be an 1605 * option. 1606 */ 1607int 1608undirect_isa_irq(int rirq) 1609{ 1610#if defined(READY) 1611 if (bootverbose) 1612 printf("Freeing redirected ISA irq %d.\n", rirq); 1613 /** FIXME: tickle the MB redirector chip */ 1614 return -1; 1615#else 1616 if (bootverbose) 1617 printf("Freeing (NOT implemented) redirected ISA irq %d.\n", rirq); 1618 return 0; 1619#endif /* READY */ 1620} 1621 1622 1623/* 1624 * Reprogram the MB chipset to NOT redirect a PCI INTerrupt 1625 */ 1626int 1627undirect_pci_irq(int rirq) 1628{ 1629#if defined(READY) 1630 if (bootverbose) 1631 printf("Freeing redirected PCI irq %d.\n", rirq); 1632 1633 /** FIXME: tickle the MB redirector chip */ 1634 return -1; 1635#else 1636 if (bootverbose) 1637 printf("Freeing (NOT implemented) redirected PCI irq %d.\n", 1638 rirq); 1639 return 0; 1640#endif /* READY */ 1641} 1642 1643 1644/* 1645 * given a bus ID, return: 1646 * the bus type if found 1647 * -1 if NOT found 1648 */ 1649int 1650apic_bus_type(int id) 1651{ 1652 int x; 1653 1654 for (x = 0; x < mp_nbusses; ++x) 1655 if (bus_data[x].bus_id == id) 1656 return bus_data[x].bus_type; 1657 1658 return -1; 1659} 1660 1661 1662/* 1663 * given a LOGICAL APIC# and pin#, return: 1664 * the associated src bus ID if found 1665 * -1 if NOT found 1666 */ 1667int 1668apic_src_bus_id(int apic, int pin) 1669{ 1670 int x; 1671 1672 /* search each of the possible INTerrupt sources */ 1673 for (x = 0; x < nintrs; ++x) 1674 if ((apic == ID_TO_IO(io_apic_ints[x].dst_apic_id)) && 1675 (pin == io_apic_ints[x].dst_apic_int)) 1676 return (io_apic_ints[x].src_bus_id); 1677 1678 return -1; /* NOT found */ 1679} 1680 1681 1682/* 1683 * given a LOGICAL APIC# and pin#, return: 1684 * the associated src bus IRQ if found 1685 * -1 if NOT found 1686 */ 1687int 1688apic_src_bus_irq(int apic, int pin) 1689{ 1690 int x; 1691 1692 for (x = 0; x < nintrs; x++) 1693 if ((apic == ID_TO_IO(io_apic_ints[x].dst_apic_id)) && 1694 (pin == io_apic_ints[x].dst_apic_int)) 1695 return (io_apic_ints[x].src_bus_irq); 1696 1697 return -1; /* NOT found */ 1698} 1699 1700 1701/* 1702 * given a LOGICAL APIC# and pin#, return: 1703 * the associated INTerrupt type if found 1704 * -1 if NOT found 1705 */ 1706int 1707apic_int_type(int apic, int pin) 1708{ 1709 int x; 1710 1711 /* search each of the possible INTerrupt sources */ 1712 for (x = 0; x < nintrs; ++x) 1713 if ((apic == ID_TO_IO(io_apic_ints[x].dst_apic_id)) && 1714 (pin == io_apic_ints[x].dst_apic_int)) 1715 return (io_apic_ints[x].int_type); 1716 1717 return -1; /* NOT found */ 1718} 1719 1720int 1721apic_irq(int apic, int pin) 1722{ 1723 int x; 1724 int res; 1725 1726 for (x = 0; x < nintrs; ++x) 1727 if ((apic == ID_TO_IO(io_apic_ints[x].dst_apic_id)) && 1728 (pin == io_apic_ints[x].dst_apic_int)) { 1729 res = io_apic_ints[x].int_vector; 1730 if (res == 0xff) 1731 return -1; 1732 if (apic != int_to_apicintpin[res].ioapic) 1733 panic("apic_irq: inconsistent table"); 1734 if (pin != int_to_apicintpin[res].int_pin) 1735 panic("apic_irq inconsistent table (2)"); 1736 return res; 1737 } 1738 return -1; 1739} 1740 1741 1742/* 1743 * given a LOGICAL APIC# and pin#, return: 1744 * the associated trigger mode if found 1745 * -1 if NOT found 1746 */ 1747int 1748apic_trigger(int apic, int pin) 1749{ 1750 int x; 1751 1752 /* search each of the possible INTerrupt sources */ 1753 for (x = 0; x < nintrs; ++x) 1754 if ((apic == ID_TO_IO(io_apic_ints[x].dst_apic_id)) && 1755 (pin == io_apic_ints[x].dst_apic_int)) 1756 return ((io_apic_ints[x].int_flags >> 2) & 0x03); 1757 1758 return -1; /* NOT found */ 1759} 1760 1761 1762/* 1763 * given a LOGICAL APIC# and pin#, return: 1764 * the associated 'active' level if found 1765 * -1 if NOT found 1766 */ 1767int 1768apic_polarity(int apic, int pin) 1769{ 1770 int x; 1771 1772 /* search each of the possible INTerrupt sources */ 1773 for (x = 0; x < nintrs; ++x) 1774 if ((apic == ID_TO_IO(io_apic_ints[x].dst_apic_id)) && 1775 (pin == io_apic_ints[x].dst_apic_int)) 1776 return (io_apic_ints[x].int_flags & 0x03); 1777 1778 return -1; /* NOT found */ 1779} 1780 1781 1782/* 1783 * set data according to MP defaults 1784 * FIXME: probably not complete yet... 1785 */ 1786static void 1787default_mp_table(int type) 1788{ 1789 int ap_cpu_id; 1790#if defined(APIC_IO) 1791 int io_apic_id; 1792 int pin; 1793#endif /* APIC_IO */ 1794 1795#if 0 1796 printf(" MP default config type: %d\n", type); 1797 switch (type) { 1798 case 1: 1799 printf(" bus: ISA, APIC: 82489DX\n"); 1800 break; 1801 case 2: 1802 printf(" bus: EISA, APIC: 82489DX\n"); 1803 break; 1804 case 3: 1805 printf(" bus: EISA, APIC: 82489DX\n"); 1806 break; 1807 case 4: 1808 printf(" bus: MCA, APIC: 82489DX\n"); 1809 break; 1810 case 5: 1811 printf(" bus: ISA+PCI, APIC: Integrated\n"); 1812 break; 1813 case 6: 1814 printf(" bus: EISA+PCI, APIC: Integrated\n"); 1815 break; 1816 case 7: 1817 printf(" bus: MCA+PCI, APIC: Integrated\n"); 1818 break; 1819 default: 1820 printf(" future type\n"); 1821 break; 1822 /* NOTREACHED */ 1823 } 1824#endif /* 0 */ 1825 1826 boot_cpu_id = (lapic.id & APIC_ID_MASK) >> 24; 1827 ap_cpu_id = (boot_cpu_id == 0) ? 1 : 0; 1828 1829 /* BSP */ 1830 CPU_TO_ID(0) = boot_cpu_id; 1831 ID_TO_CPU(boot_cpu_id) = 0; 1832 1833 /* one and only AP */ 1834 CPU_TO_ID(1) = ap_cpu_id; 1835 ID_TO_CPU(ap_cpu_id) = 1; 1836 1837#if defined(APIC_IO) 1838 /* one and only IO APIC */ 1839 io_apic_id = (io_apic_read(0, IOAPIC_ID) & APIC_ID_MASK) >> 24; 1840 1841 /* 1842 * sanity check, refer to MP spec section 3.6.6, last paragraph 1843 * necessary as some hardware isn't properly setting up the IO APIC 1844 */ 1845#if defined(REALLY_ANAL_IOAPICID_VALUE) 1846 if (io_apic_id != 2) { 1847#else 1848 if ((io_apic_id == 0) || (io_apic_id == 1) || (io_apic_id == 15)) { 1849#endif /* REALLY_ANAL_IOAPICID_VALUE */ 1850 io_apic_set_id(0, 2); 1851 io_apic_id = 2; 1852 } 1853 IO_TO_ID(0) = io_apic_id; 1854 ID_TO_IO(io_apic_id) = 0; 1855#endif /* APIC_IO */ 1856 1857 /* fill out bus entries */ 1858 switch (type) { 1859 case 1: 1860 case 2: 1861 case 3: 1862 case 4: 1863 case 5: 1864 case 6: 1865 case 7: 1866 bus_data[0].bus_id = default_data[type - 1][1]; 1867 bus_data[0].bus_type = default_data[type - 1][2]; 1868 bus_data[1].bus_id = default_data[type - 1][3]; 1869 bus_data[1].bus_type = default_data[type - 1][4]; 1870 break; 1871 1872 /* case 4: case 7: MCA NOT supported */ 1873 default: /* illegal/reserved */ 1874 panic("BAD default MP config: %d", type); 1875 /* NOTREACHED */ 1876 } 1877 1878#if defined(APIC_IO) 1879 /* general cases from MP v1.4, table 5-2 */ 1880 for (pin = 0; pin < 16; ++pin) { 1881 io_apic_ints[pin].int_type = 0; 1882 io_apic_ints[pin].int_flags = 0x05; /* edge/active-hi */ 1883 io_apic_ints[pin].src_bus_id = 0; 1884 io_apic_ints[pin].src_bus_irq = pin; /* IRQ2 caught below */ 1885 io_apic_ints[pin].dst_apic_id = io_apic_id; 1886 io_apic_ints[pin].dst_apic_int = pin; /* 1-to-1 */ 1887 } 1888 1889 /* special cases from MP v1.4, table 5-2 */ 1890 if (type == 2) { 1891 io_apic_ints[2].int_type = 0xff; /* N/C */ 1892 io_apic_ints[13].int_type = 0xff; /* N/C */ 1893#if !defined(APIC_MIXED_MODE) 1894 /** FIXME: ??? */ 1895 panic("sorry, can't support type 2 default yet"); 1896#endif /* APIC_MIXED_MODE */ 1897 } 1898 else 1899 io_apic_ints[2].src_bus_irq = 0; /* ISA IRQ0 is on APIC INT 2 */ 1900 1901 if (type == 7) 1902 io_apic_ints[0].int_type = 0xff; /* N/C */ 1903 else 1904 io_apic_ints[0].int_type = 3; /* vectored 8259 */ 1905#endif /* APIC_IO */ 1906} 1907 1908 1909/* 1910 * start each AP in our list 1911 */ 1912static int 1913start_all_aps(u_int boot_addr) 1914{ 1915 int x, i, pg; 1916 u_char mpbiosreason; 1917 u_long mpbioswarmvec; 1918 struct globaldata *gd; 1919 char *stack; 1920 uintptr_t kptbase; 1921 1922 POSTCODE(START_ALL_APS_POST); 1923 1924 mtx_init(&ap_boot_mtx, "ap boot", MTX_SPIN); 1925 1926 /* initialize BSP's local APIC */ 1927 apic_initialize(); 1928 bsp_apic_ready = 1; 1929 1930 /* install the AP 1st level boot code */ 1931 install_ap_tramp(boot_addr); 1932 1933 1934 /* save the current value of the warm-start vector */ 1935 mpbioswarmvec = *((u_long *) WARMBOOT_OFF); 1936#ifndef PC98 1937 outb(CMOS_REG, BIOS_RESET); 1938 mpbiosreason = inb(CMOS_DATA); 1939#endif 1940 1941 /* set up temporary P==V mapping for AP boot */ 1942 /* XXX this is a hack, we should boot the AP on its own stack/PTD */ 1943 kptbase = (uintptr_t)(void *)KPTphys; 1944 for (x = 0; x < NKPT; x++) 1945 PTD[x] = (pd_entry_t)(PG_V | PG_RW | 1946 ((kptbase + x * PAGE_SIZE) & PG_FRAME)); 1947 invltlb(); 1948 1949 /* start each AP */ 1950 for (x = 1; x <= mp_naps; ++x) { 1951 1952 /* This is a bit verbose, it will go away soon. */ 1953 1954 /* first page of AP's private space */ 1955 pg = x * i386_btop(sizeof(struct privatespace)); 1956 1957 /* allocate a new private data page */ 1958 gd = (struct globaldata *)kmem_alloc(kernel_map, PAGE_SIZE); 1959 1960 /* wire it into the private page table page */ 1961 SMPpt[pg] = (pt_entry_t)(PG_V | PG_RW | vtophys(gd)); 1962 1963 /* allocate and set up an idle stack data page */ 1964 stack = (char *)kmem_alloc(kernel_map, KSTACK_PAGES * PAGE_SIZE); /* XXXKSE */ 1965 for (i = 0; i < KSTACK_PAGES; i++) 1966 SMPpt[pg + 1 + i] = (pt_entry_t) 1967 (PG_V | PG_RW | vtophys(PAGE_SIZE * i + stack)); 1968 1969 /* prime data page for it to use */ 1970 gd->gd_cpuid = x; 1971 globaldata_register(gd); 1972 1973 /* setup a vector to our boot code */ 1974 *((volatile u_short *) WARMBOOT_OFF) = WARMBOOT_TARGET; 1975 *((volatile u_short *) WARMBOOT_SEG) = (boot_addr >> 4); 1976#ifndef PC98 1977 outb(CMOS_REG, BIOS_RESET); 1978 outb(CMOS_DATA, BIOS_WARM); /* 'warm-start' */ 1979#endif 1980 1981 bootSTK = &SMP_prvspace[x].idlekstack[KSTACK_PAGES * PAGE_SIZE]; 1982 bootAP = x; 1983 1984 /* attempt to start the Application Processor */ 1985 CHECK_INIT(99); /* setup checkpoints */ 1986 if (!start_ap(x, boot_addr)) { 1987 printf("AP #%d (PHY# %d) failed!\n", x, CPU_TO_ID(x)); 1988 CHECK_PRINT("trace"); /* show checkpoints */ 1989 /* better panic as the AP may be running loose */ 1990 printf("panic y/n? [y] "); 1991 if (cngetc() != 'n') 1992 panic("bye-bye"); 1993 } 1994 CHECK_PRINT("trace"); /* show checkpoints */ 1995 1996 /* record its version info */ 1997 cpu_apic_versions[x] = cpu_apic_versions[0]; 1998 1999 all_cpus |= (1 << x); /* record AP in CPU map */ 2000 } 2001 2002 /* build our map of 'other' CPUs */ 2003 PCPU_SET(other_cpus, all_cpus & ~(1 << PCPU_GET(cpuid))); 2004 2005 /* fill in our (BSP) APIC version */ 2006 cpu_apic_versions[0] = lapic.version; 2007 2008 /* restore the warmstart vector */ 2009 *(u_long *) WARMBOOT_OFF = mpbioswarmvec; 2010#ifndef PC98 2011 outb(CMOS_REG, BIOS_RESET); 2012 outb(CMOS_DATA, mpbiosreason); 2013#endif 2014 2015 /* 2016 * Set up the idle context for the BSP. Similar to above except 2017 * that some was done by locore, some by pmap.c and some is implicit 2018 * because the BSP is cpu#0 and the page is initially zero, and also 2019 * because we can refer to variables by name on the BSP.. 2020 */ 2021 2022 /* Allocate and setup BSP idle stack */ 2023 stack = (char *)kmem_alloc(kernel_map, KSTACK_PAGES * PAGE_SIZE); 2024 for (i = 0; i < KSTACK_PAGES; i++) 2025 SMPpt[1 + i] = (pt_entry_t) 2026 (PG_V | PG_RW | vtophys(PAGE_SIZE * i + stack)); 2027 2028 for (x = 0; x < NKPT; x++) 2029 PTD[x] = 0; 2030 pmap_set_opt(); 2031 2032 /* number of APs actually started */ 2033 return mp_ncpus - 1; 2034} 2035 2036 2037/* 2038 * load the 1st level AP boot code into base memory. 2039 */ 2040 2041/* targets for relocation */ 2042extern void bigJump(void); 2043extern void bootCodeSeg(void); 2044extern void bootDataSeg(void); 2045extern void MPentry(void); 2046extern u_int MP_GDT; 2047extern u_int mp_gdtbase; 2048 2049static void 2050install_ap_tramp(u_int boot_addr) 2051{ 2052 int x; 2053 int size = *(int *) ((u_long) & bootMP_size); 2054 u_char *src = (u_char *) ((u_long) bootMP); 2055 u_char *dst = (u_char *) boot_addr + KERNBASE; 2056 u_int boot_base = (u_int) bootMP; 2057 u_int8_t *dst8; 2058 u_int16_t *dst16; 2059 u_int32_t *dst32; 2060 2061 POSTCODE(INSTALL_AP_TRAMP_POST); 2062 2063 for (x = 0; x < size; ++x) 2064 *dst++ = *src++; 2065 2066 /* 2067 * modify addresses in code we just moved to basemem. unfortunately we 2068 * need fairly detailed info about mpboot.s for this to work. changes 2069 * to mpboot.s might require changes here. 2070 */ 2071 2072 /* boot code is located in KERNEL space */ 2073 dst = (u_char *) boot_addr + KERNBASE; 2074 2075 /* modify the lgdt arg */ 2076 dst32 = (u_int32_t *) (dst + ((u_int) & mp_gdtbase - boot_base)); 2077 *dst32 = boot_addr + ((u_int) & MP_GDT - boot_base); 2078 2079 /* modify the ljmp target for MPentry() */ 2080 dst32 = (u_int32_t *) (dst + ((u_int) bigJump - boot_base) + 1); 2081 *dst32 = ((u_int) MPentry - KERNBASE); 2082 2083 /* modify the target for boot code segment */ 2084 dst16 = (u_int16_t *) (dst + ((u_int) bootCodeSeg - boot_base)); 2085 dst8 = (u_int8_t *) (dst16 + 1); 2086 *dst16 = (u_int) boot_addr & 0xffff; 2087 *dst8 = ((u_int) boot_addr >> 16) & 0xff; 2088 2089 /* modify the target for boot data segment */ 2090 dst16 = (u_int16_t *) (dst + ((u_int) bootDataSeg - boot_base)); 2091 dst8 = (u_int8_t *) (dst16 + 1); 2092 *dst16 = (u_int) boot_addr & 0xffff; 2093 *dst8 = ((u_int) boot_addr >> 16) & 0xff; 2094} 2095 2096 2097/* 2098 * this function starts the AP (application processor) identified 2099 * by the APIC ID 'physicalCpu'. It does quite a "song and dance" 2100 * to accomplish this. This is necessary because of the nuances 2101 * of the different hardware we might encounter. It ain't pretty, 2102 * but it seems to work. 2103 */ 2104static int 2105start_ap(int logical_cpu, u_int boot_addr) 2106{ 2107 int physical_cpu; 2108 int vector; 2109 int cpus; 2110 u_long icr_lo, icr_hi; 2111 2112 POSTCODE(START_AP_POST); 2113 2114 /* get the PHYSICAL APIC ID# */ 2115 physical_cpu = CPU_TO_ID(logical_cpu); 2116 2117 /* calculate the vector */ 2118 vector = (boot_addr >> 12) & 0xff; 2119 2120 /* used as a watchpoint to signal AP startup */ 2121 cpus = mp_ncpus; 2122 2123 /* 2124 * first we do an INIT/RESET IPI this INIT IPI might be run, reseting 2125 * and running the target CPU. OR this INIT IPI might be latched (P5 2126 * bug), CPU waiting for STARTUP IPI. OR this INIT IPI might be 2127 * ignored. 2128 */ 2129 2130 /* setup the address for the target AP */ 2131 icr_hi = lapic.icr_hi & ~APIC_ID_MASK; 2132 icr_hi |= (physical_cpu << 24); 2133 lapic.icr_hi = icr_hi; 2134 2135 /* do an INIT IPI: assert RESET */ 2136 icr_lo = lapic.icr_lo & 0xfff00000; 2137 lapic.icr_lo = icr_lo | 0x0000c500; 2138 2139 /* wait for pending status end */ 2140 while (lapic.icr_lo & APIC_DELSTAT_MASK) 2141 /* spin */ ; 2142 2143 /* do an INIT IPI: deassert RESET */ 2144 lapic.icr_lo = icr_lo | 0x00008500; 2145 2146 /* wait for pending status end */ 2147 u_sleep(10000); /* wait ~10mS */ 2148 while (lapic.icr_lo & APIC_DELSTAT_MASK) 2149 /* spin */ ; 2150 2151 /* 2152 * next we do a STARTUP IPI: the previous INIT IPI might still be 2153 * latched, (P5 bug) this 1st STARTUP would then terminate 2154 * immediately, and the previously started INIT IPI would continue. OR 2155 * the previous INIT IPI has already run. and this STARTUP IPI will 2156 * run. OR the previous INIT IPI was ignored. and this STARTUP IPI 2157 * will run. 2158 */ 2159 2160 /* do a STARTUP IPI */ 2161 lapic.icr_lo = icr_lo | 0x00000600 | vector; 2162 while (lapic.icr_lo & APIC_DELSTAT_MASK) 2163 /* spin */ ; 2164 u_sleep(200); /* wait ~200uS */ 2165 2166 /* 2167 * finally we do a 2nd STARTUP IPI: this 2nd STARTUP IPI should run IF 2168 * the previous STARTUP IPI was cancelled by a latched INIT IPI. OR 2169 * this STARTUP IPI will be ignored, as only ONE STARTUP IPI is 2170 * recognized after hardware RESET or INIT IPI. 2171 */ 2172 2173 lapic.icr_lo = icr_lo | 0x00000600 | vector; 2174 while (lapic.icr_lo & APIC_DELSTAT_MASK) 2175 /* spin */ ; 2176 u_sleep(200); /* wait ~200uS */ 2177 2178 /* wait for it to start */ 2179 set_apic_timer(5000000);/* == 5 seconds */ 2180 while (read_apic_timer()) 2181 if (mp_ncpus > cpus) 2182 return 1; /* return SUCCESS */ 2183 2184 return 0; /* return FAILURE */ 2185} 2186 2187/* 2188 * Flush the TLB on all other CPU's 2189 * 2190 * XXX: Needs to handshake and wait for completion before proceding. 2191 */ 2192void 2193smp_invltlb(void) 2194{ 2195#if defined(APIC_IO) 2196 if (smp_started && invltlb_ok) 2197 ipi_all_but_self(IPI_INVLTLB); 2198#endif /* APIC_IO */ 2199} 2200 2201void 2202invlpg(u_int addr) 2203{ 2204 __asm __volatile("invlpg (%0)"::"r"(addr):"memory"); 2205 2206 /* send a message to the other CPUs */ 2207 smp_invltlb(); 2208} 2209 2210void 2211invltlb(void) 2212{ 2213 u_long temp; 2214 2215 /* 2216 * This should be implemented as load_cr3(rcr3()) when load_cr3() is 2217 * inlined. 2218 */ 2219 __asm __volatile("movl %%cr3, %0; movl %0, %%cr3":"=r"(temp) :: "memory"); 2220 2221 /* send a message to the other CPUs */ 2222 smp_invltlb(); 2223} 2224 2225 2226/* 2227 * This is called once the rest of the system is up and running and we're 2228 * ready to let the AP's out of the pen. 2229 */ 2230extern void enable_sse(void); 2231 2232void 2233ap_init(void) 2234{ 2235 u_int apic_id; 2236 2237 /* spin until all the AP's are ready */ 2238 while (!aps_ready) 2239 /* spin */ ; 2240 2241 /* 2242 * Set curproc to our per-cpu idleproc so that mutexes have 2243 * something unique to lock with. 2244 */ 2245 PCPU_SET(curthread, PCPU_GET(idlethread)); 2246 PCPU_SET(spinlocks, NULL); 2247 2248 /* lock against other AP's that are waking up */ 2249 mtx_lock_spin(&ap_boot_mtx); 2250 2251 /* BSP may have changed PTD while we're waiting for the lock */ 2252 cpu_invltlb(); 2253 2254 smp_cpus++; 2255 2256#if defined(I586_CPU) && !defined(NO_F00F_HACK) 2257 lidt(&r_idt); 2258#endif 2259 2260 /* Build our map of 'other' CPUs. */ 2261 PCPU_SET(other_cpus, all_cpus & ~(1 << PCPU_GET(cpuid))); 2262 2263 printf("SMP: AP CPU #%d Launched!\n", PCPU_GET(cpuid)); 2264 2265 /* set up CPU registers and state */ 2266 cpu_setregs(); 2267 2268 /* set up FPU state on the AP */ 2269 npxinit(__INITIAL_NPXCW__); 2270 2271 /* set up SSE registers */ 2272 enable_sse(); 2273 2274 /* A quick check from sanity claus */ 2275 apic_id = (apic_id_to_logical[(lapic.id & 0x0f000000) >> 24]); 2276 if (PCPU_GET(cpuid) != apic_id) { 2277 printf("SMP: cpuid = %d\n", PCPU_GET(cpuid)); 2278 printf("SMP: apic_id = %d\n", apic_id); 2279 printf("PTD[MPPTDI] = %p\n", (void *)PTD[MPPTDI]); 2280 panic("cpuid mismatch! boom!!"); 2281 } 2282 2283 /* Init local apic for irq's */ 2284 apic_initialize(); 2285 2286 /* Set memory range attributes for this CPU to match the BSP */ 2287 mem_range_AP_init(); 2288 2289 /* 2290 * Activate smp_invltlb, although strictly speaking, this isn't 2291 * quite correct yet. We should have a bitfield for cpus willing 2292 * to accept TLB flush IPI's or something and sync them. 2293 */ 2294 if (smp_cpus == mp_ncpus) { 2295 invltlb_ok = 1; 2296 smp_started = 1; /* enable IPI's, tlb shootdown, freezes etc */ 2297 smp_active = 1; /* historic */ 2298 } 2299 2300 /* let other AP's wake up now */ 2301 mtx_unlock_spin(&ap_boot_mtx); 2302 2303 /* wait until all the AP's are up */ 2304 while (smp_started == 0) 2305 ; /* nothing */ 2306 2307 microuptime(PCPU_PTR(switchtime)); 2308 PCPU_SET(switchticks, ticks); 2309 2310 /* ok, now grab sched_lock and enter the scheduler */ 2311 enable_intr(); 2312 mtx_lock_spin(&sched_lock); 2313 cpu_throw(); /* doesn't return */ 2314 2315 panic("scheduler returned us to ap_init"); 2316} 2317 2318/* 2319 * For statclock, we send an IPI to all CPU's to have them call this 2320 * function. 2321 */ 2322void 2323forwarded_statclock(struct trapframe frame) 2324{ 2325 2326 mtx_lock_spin(&sched_lock); 2327 statclock_process(curthread->td_kse, TRAPF_PC(&frame), TRAPF_USERMODE(&frame)); 2328 mtx_unlock_spin(&sched_lock); 2329} 2330 2331void 2332forward_statclock(void) 2333{ 2334 int map; 2335 2336 CTR0(KTR_SMP, "forward_statclock"); 2337 2338 if (!smp_started || !invltlb_ok || cold || panicstr) 2339 return; 2340 2341 map = PCPU_GET(other_cpus) & ~stopped_cpus ; 2342 if (map != 0) 2343 ipi_selected(map, IPI_STATCLOCK); 2344} 2345 2346/* 2347 * For each hardclock(), we send an IPI to all other CPU's to have them 2348 * execute this function. It would be nice to reduce contention on 2349 * sched_lock if we could simply peek at the CPU to determine the user/kernel 2350 * state and call hardclock_process() on the CPU receiving the clock interrupt 2351 * and then just use a simple IPI to handle any ast's if needed. 2352 */ 2353void 2354forwarded_hardclock(struct trapframe frame) 2355{ 2356 2357 mtx_lock_spin(&sched_lock); 2358 hardclock_process(curthread, TRAPF_USERMODE(&frame)); 2359 mtx_unlock_spin(&sched_lock); 2360} 2361 2362void 2363forward_hardclock(void) 2364{ 2365 u_int map; 2366 2367 CTR0(KTR_SMP, "forward_hardclock"); 2368 2369 if (!smp_started || !invltlb_ok || cold || panicstr) 2370 return; 2371 2372 map = PCPU_GET(other_cpus) & ~stopped_cpus ; 2373 if (map != 0) 2374 ipi_selected(map, IPI_HARDCLOCK); 2375} 2376 2377#ifdef APIC_INTR_REORDER 2378/* 2379 * Maintain mapping from softintr vector to isr bit in local apic. 2380 */ 2381void 2382set_lapic_isrloc(int intr, int vector) 2383{ 2384 if (intr < 0 || intr > 32) 2385 panic("set_apic_isrloc: bad intr argument: %d",intr); 2386 if (vector < ICU_OFFSET || vector > 255) 2387 panic("set_apic_isrloc: bad vector argument: %d",vector); 2388 apic_isrbit_location[intr].location = &lapic.isr0 + ((vector>>5)<<2); 2389 apic_isrbit_location[intr].bit = (1<<(vector & 31)); 2390} 2391#endif 2392 2393/* 2394 * send an IPI to a set of cpus. 2395 */ 2396void 2397ipi_selected(u_int32_t cpus, u_int ipi) 2398{ 2399 2400 CTR2(KTR_SMP, __func__ ": cpus: %x ipi: %x", cpus, ipi); 2401 selected_apic_ipi(cpus, ipi, APIC_DELMODE_FIXED); 2402} 2403 2404/* 2405 * send an IPI INTerrupt containing 'vector' to all CPUs, including myself 2406 */ 2407void 2408ipi_all(u_int ipi) 2409{ 2410 2411 CTR1(KTR_SMP, __func__ ": ipi: %x", ipi); 2412 apic_ipi(APIC_DEST_ALLISELF, ipi, APIC_DELMODE_FIXED); 2413} 2414 2415/* 2416 * send an IPI to all CPUs EXCEPT myself 2417 */ 2418void 2419ipi_all_but_self(u_int ipi) 2420{ 2421 2422 CTR1(KTR_SMP, __func__ ": ipi: %x", ipi); 2423 apic_ipi(APIC_DEST_ALLESELF, ipi, APIC_DELMODE_FIXED); 2424} 2425 2426/* 2427 * send an IPI to myself 2428 */ 2429void 2430ipi_self(u_int ipi) 2431{ 2432 2433 CTR1(KTR_SMP, __func__ ": ipi: %x", ipi); 2434 apic_ipi(APIC_DEST_SELF, ipi, APIC_DELMODE_FIXED); 2435} 2436 2437void 2438release_aps(void *dummy __unused) 2439{ 2440 atomic_store_rel_int(&aps_ready, 1); 2441} 2442 2443SYSINIT(start_aps, SI_SUB_SMP, SI_ORDER_FIRST, release_aps, NULL); 2444