mptable.c revision 29213
1/* 2 * Copyright (c) 1996, by Steve Passe 3 * All rights reserved. 4 * 5 * Redistribution and use in source and binary forms, with or without 6 * modification, are permitted provided that the following conditions 7 * are met: 8 * 1. Redistributions of source code must retain the above copyright 9 * notice, this list of conditions and the following disclaimer. 10 * 2. The name of the developer may NOT be used to endorse or promote products 11 * derived from this software without specific prior written permission. 12 * 13 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND 14 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 15 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 16 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE 17 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 18 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 19 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 20 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 21 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 22 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 23 * SUCH DAMAGE. 24 * 25 * $Id: mp_machdep.c,v 1.38 1997/09/05 20:23:34 smp Exp smp $ 26 */ 27 28#include "opt_smp.h" 29 30#include <sys/param.h> 31#include <sys/systm.h> 32#include <sys/kernel.h> 33#include <sys/proc.h> 34#include <sys/sysctl.h> 35 36#include <vm/vm.h> 37#include <vm/vm_param.h> 38#include <vm/pmap.h> 39#include <vm/vm_kern.h> 40#include <vm/vm_extern.h> 41 42#include <machine/smp.h> 43#include <machine/apic.h> 44#include <machine/mpapic.h> 45#include <machine/segments.h> 46#include <machine/smptests.h> /** TEST_DEFAULT_CONFIG, TEST_TEST1 */ 47#include <machine/tss.h> 48#include <machine/specialreg.h> 49 50#include <i386/i386/cons.h> /* cngetc() */ 51 52#if defined(APIC_IO) 53#include <machine/md_var.h> /* setidt() */ 54#include <i386/isa/icu.h> /* IPIs */ 55#include <i386/isa/intr_machdep.h> /* IPIs */ 56#endif /* APIC_IO */ 57 58#if defined(TEST_DEFAULT_CONFIG) 59#define MPFPS_MPFB1 TEST_DEFAULT_CONFIG 60#else 61#define MPFPS_MPFB1 mpfps->mpfb1 62#endif /* TEST_DEFAULT_CONFIG */ 63 64#define WARMBOOT_TARGET 0 65#define WARMBOOT_OFF (KERNBASE + 0x0467) 66#define WARMBOOT_SEG (KERNBASE + 0x0469) 67 68#define BIOS_BASE (0xf0000) 69#define BIOS_SIZE (0x10000) 70#define BIOS_COUNT (BIOS_SIZE/4) 71 72#define CMOS_REG (0x70) 73#define CMOS_DATA (0x71) 74#define BIOS_RESET (0x0f) 75#define BIOS_WARM (0x0a) 76 77#define PROCENTRY_FLAG_EN 0x01 78#define PROCENTRY_FLAG_BP 0x02 79#define IOAPICENTRY_FLAG_EN 0x01 80 81 82/* MP Floating Pointer Structure */ 83typedef struct MPFPS { 84 char signature[4]; 85 void *pap; 86 u_char length; 87 u_char spec_rev; 88 u_char checksum; 89 u_char mpfb1; 90 u_char mpfb2; 91 u_char mpfb3; 92 u_char mpfb4; 93 u_char mpfb5; 94} *mpfps_t; 95 96/* MP Configuration Table Header */ 97typedef struct MPCTH { 98 char signature[4]; 99 u_short base_table_length; 100 u_char spec_rev; 101 u_char checksum; 102 u_char oem_id[8]; 103 u_char product_id[12]; 104 void *oem_table_pointer; 105 u_short oem_table_size; 106 u_short entry_count; 107 void *apic_address; 108 u_short extended_table_length; 109 u_char extended_table_checksum; 110 u_char reserved; 111} *mpcth_t; 112 113 114typedef struct PROCENTRY { 115 u_char type; 116 u_char apic_id; 117 u_char apic_version; 118 u_char cpu_flags; 119 u_long cpu_signature; 120 u_long feature_flags; 121 u_long reserved1; 122 u_long reserved2; 123} *proc_entry_ptr; 124 125typedef struct BUSENTRY { 126 u_char type; 127 u_char bus_id; 128 char bus_type[6]; 129} *bus_entry_ptr; 130 131typedef struct IOAPICENTRY { 132 u_char type; 133 u_char apic_id; 134 u_char apic_version; 135 u_char apic_flags; 136 void *apic_address; 137} *io_apic_entry_ptr; 138 139typedef struct INTENTRY { 140 u_char type; 141 u_char int_type; 142 u_short int_flags; 143 u_char src_bus_id; 144 u_char src_bus_irq; 145 u_char dst_apic_id; 146 u_char dst_apic_int; 147} *int_entry_ptr; 148 149/* descriptions of MP basetable entries */ 150typedef struct BASETABLE_ENTRY { 151 u_char type; 152 u_char length; 153 char name[16]; 154} basetable_entry; 155 156/* 157 * this code MUST be enabled here and in mpboot.s. 158 * it follows the very early stages of AP boot by placing values in CMOS ram. 159 * it NORMALLY will never be needed and thus the primitive method for enabling. 160 * 161#define CHECK_POINTS 162 */ 163 164#if defined(CHECK_POINTS) 165#define CHECK_READ(A) (outb(CMOS_REG, (A)), inb(CMOS_DATA)) 166#define CHECK_WRITE(A,D) (outb(CMOS_REG, (A)), outb(CMOS_DATA, (D))) 167 168#define CHECK_INIT(D); \ 169 CHECK_WRITE(0x34, (D)); \ 170 CHECK_WRITE(0x35, (D)); \ 171 CHECK_WRITE(0x36, (D)); \ 172 CHECK_WRITE(0x37, (D)); \ 173 CHECK_WRITE(0x38, (D)); \ 174 CHECK_WRITE(0x39, (D)); 175 176#define CHECK_PRINT(S); \ 177 printf("%s: %d, %d, %d, %d, %d, %d\n", \ 178 (S), \ 179 CHECK_READ(0x34), \ 180 CHECK_READ(0x35), \ 181 CHECK_READ(0x36), \ 182 CHECK_READ(0x37), \ 183 CHECK_READ(0x38), \ 184 CHECK_READ(0x39)); 185 186#else /* CHECK_POINTS */ 187 188#define CHECK_INIT(D) 189#define CHECK_PRINT(S) 190 191#endif /* CHECK_POINTS */ 192 193/* 194 * Values to send to the POST hardware. 195 */ 196#define MP_BOOTADDRESS_POST 0x10 197#define MP_PROBE_POST 0x11 198#define MPTABLE_PASS1_POST 0x12 199 200#define MP_START_POST 0x13 201#define MP_ENABLE_POST 0x14 202#define MPTABLE_PASS2_POST 0x15 203 204#define START_ALL_APS_POST 0x16 205#define INSTALL_AP_TRAMP_POST 0x17 206#define START_AP_POST 0x18 207 208#define MP_ANNOUNCE_POST 0x19 209 210 211/** XXX FIXME: where does this really belong, isa.h/isa.c perhaps? */ 212int current_postcode; 213 214/** XXX FIXME: what system files declare these??? */ 215extern struct region_descriptor r_gdt, r_idt; 216 217int bsp_apic_ready = 0; /* flags useability of BSP apic */ 218int mp_ncpus; /* # of CPUs, including BSP */ 219int mp_naps; /* # of Applications processors */ 220int mp_nbusses; /* # of busses */ 221int mp_napics; /* # of IO APICs */ 222int boot_cpu_id; /* designated BSP */ 223vm_offset_t cpu_apic_address; 224vm_offset_t io_apic_address[NAPICID]; /* NAPICID is more than enough */ 225 226u_int32_t cpu_apic_versions[NCPU]; 227u_int32_t io_apic_versions[NAPIC]; 228 229/* 230 * APIC ID logical/physical mapping structures. 231 * We oversize these to simplify boot-time config. 232 */ 233int cpu_num_to_apic_id[NAPICID]; 234int io_num_to_apic_id[NAPICID]; 235int apic_id_to_logical[NAPICID]; 236 237/* Bitmap of all available CPUs */ 238u_int all_cpus; 239 240/* AP uses this PTD during bootstrap */ 241pd_entry_t *bootPTD; 242 243/* Hotwire a 0->4MB V==P mapping */ 244extern pt_entry_t *KPTphys; 245 246/* Virtual address of per-cpu common_tss */ 247extern struct i386tss common_tss; 248#ifdef VM86 249extern u_int private_tss; /* flag indicating private tss */ 250extern struct segment_descriptor common_tssd; 251#endif /* VM86 */ 252 253/* IdlePTD per cpu */ 254pd_entry_t *IdlePTDS[NCPU]; 255 256/* "my" private page table page, for BSP init */ 257extern pt_entry_t SMP_prvpt[]; 258 259/* Private page pointer to curcpu's PTD, used during BSP init */ 260extern pd_entry_t *my_idlePTD; 261 262static int smp_started; /* has the system started? */ 263 264/* 265 * Local data and functions. 266 */ 267 268static int mp_capable; 269static u_int boot_address; 270static u_int base_memory; 271 272static int picmode; /* 0: virtual wire mode, 1: PIC mode */ 273static mpfps_t mpfps; 274static int search_for_sig(u_int32_t target, int count); 275static void mp_enable(u_int boot_addr); 276 277static int mptable_pass1(void); 278static int mptable_pass2(void); 279static void default_mp_table(int type); 280static void fix_mp_table(void); 281static void init_locks(void); 282static int start_all_aps(u_int boot_addr); 283static void install_ap_tramp(u_int boot_addr); 284static int start_ap(int logicalCpu, u_int boot_addr); 285 286 287/* 288 * Calculate usable address in base memory for AP trampoline code. 289 */ 290u_int 291mp_bootaddress(u_int basemem) 292{ 293 POSTCODE(MP_BOOTADDRESS_POST); 294 295 base_memory = basemem * 1024; /* convert to bytes */ 296 297 boot_address = base_memory & ~0xfff; /* round down to 4k boundary */ 298 if ((base_memory - boot_address) < bootMP_size) 299 boot_address -= 4096; /* not enough, lower by 4k */ 300 301 return boot_address; 302} 303 304 305/* 306 * Look for an Intel MP spec table (ie, SMP capable hardware). 307 */ 308int 309mp_probe(void) 310{ 311 int x; 312 u_long segment; 313 u_int32_t target; 314 315 POSTCODE(MP_PROBE_POST); 316 317 /* see if EBDA exists */ 318 if (segment = (u_long) * (u_short *) (KERNBASE + 0x40e)) { 319 /* search first 1K of EBDA */ 320 target = (u_int32_t) (segment << 4); 321 if ((x = search_for_sig(target, 1024 / 4)) >= 0) 322 goto found; 323 } else { 324 /* last 1K of base memory, effective 'top of base' passed in */ 325 target = (u_int32_t) (base_memory - 0x400); 326 if ((x = search_for_sig(target, 1024 / 4)) >= 0) 327 goto found; 328 } 329 330 /* search the BIOS */ 331 target = (u_int32_t) BIOS_BASE; 332 if ((x = search_for_sig(target, BIOS_COUNT)) >= 0) 333 goto found; 334 335 /* nothing found */ 336 mpfps = (mpfps_t)0; 337 mp_capable = 0; 338 return 0; 339 340found: 341 /* calculate needed resources */ 342 mpfps = (mpfps_t)x; 343 if (mptable_pass1()) 344 panic("you must reconfigure your kernel"); 345 346 /* flag fact that we are running multiple processors */ 347 mp_capable = 1; 348 return 1; 349} 350 351 352/* 353 * Startup the SMP processors. 354 */ 355void 356mp_start(void) 357{ 358 POSTCODE(MP_START_POST); 359 360 /* look for MP capable motherboard */ 361 if (mp_capable) 362 mp_enable(boot_address); 363 else 364 panic("MP hardware not found!"); 365} 366 367 368/* 369 * Print various information about the SMP system hardware and setup. 370 */ 371void 372mp_announce(void) 373{ 374 int x; 375 376 POSTCODE(MP_ANNOUNCE_POST); 377 378 printf("FreeBSD/SMP: Multiprocessor motherboard\n"); 379 printf(" cpu0 (BSP): apic id: %2d", CPU_TO_ID(0)); 380 printf(", version: 0x%08x", cpu_apic_versions[0]); 381 printf(", at 0x%08x\n", cpu_apic_address); 382 for (x = 1; x <= mp_naps; ++x) { 383 printf(" cpu%d (AP): apic id: %2d", x, CPU_TO_ID(x)); 384 printf(", version: 0x%08x", cpu_apic_versions[x]); 385 printf(", at 0x%08x\n", cpu_apic_address); 386 } 387 388#if defined(APIC_IO) 389 for (x = 0; x < mp_napics; ++x) { 390 printf(" io%d (APIC): apic id: %2d", x, IO_TO_ID(x)); 391 printf(", version: 0x%08x", io_apic_versions[x]); 392 printf(", at 0x%08x\n", io_apic_address[x]); 393 } 394#else 395 printf(" Warning: APIC I/O disabled\n"); 396#endif /* APIC_IO */ 397} 398 399/* 400 * AP cpu's call this to sync up protected mode. 401 */ 402void 403init_secondary(void) 404{ 405 int gsel_tss, slot; 406 407 r_gdt.rd_limit = sizeof(gdt[0]) * (NGDT + NCPU) - 1; 408 r_gdt.rd_base = (int) gdt; 409 lgdt(&r_gdt); /* does magic intra-segment return */ 410 lidt(&r_idt); 411 lldt(_default_ldt); 412 413 slot = NGDT + cpuid; 414 gsel_tss = GSEL(slot, SEL_KPL); 415 gdt[slot].sd.sd_type = SDT_SYS386TSS; 416 common_tss.tss_esp0 = 0; /* not used until after switch */ 417 common_tss.tss_ss0 = GSEL(GDATA_SEL, SEL_KPL); 418 common_tss.tss_ioopt = (sizeof common_tss) << 16; 419#ifdef VM86 420 common_tssd = gdt[slot].sd; 421 private_tss = 0; 422#endif /* VM86 */ 423 ltr(gsel_tss); 424 425 load_cr0(0x8005003b); /* XXX! */ 426 427 PTD[0] = 0; 428 pmap_set_opt((unsigned *)PTD); 429 430 invltlb(); 431} 432 433 434#if defined(APIC_IO) 435/* 436 * Final configuration of the BSP's local APIC: 437 * - disable 'pic mode'. 438 * - disable 'virtual wire mode'. 439 * - enable NMI. 440 */ 441void 442bsp_apic_configure(void) 443{ 444 u_char byte; 445 u_int32_t temp; 446 447 /* leave 'pic mode' if necessary */ 448 if (picmode) { 449 outb(0x22, 0x70); /* select IMCR */ 450 byte = inb(0x23); /* current contents */ 451 byte |= 0x01; /* mask external INTR */ 452 outb(0x23, byte); /* disconnect 8259s/NMI */ 453 } 454 455 /* mask lint0 (the 8259 'virtual wire' connection) */ 456 temp = lapic.lvt_lint0; 457 temp |= APIC_LVT_M; /* set the mask */ 458 lapic.lvt_lint0 = temp; 459 460 /* setup lint1 to handle NMI */ 461 temp = lapic.lvt_lint1; 462 temp &= ~APIC_LVT_M; /* clear the mask */ 463 lapic.lvt_lint1 = temp; 464 465 if (bootverbose) 466 apic_dump("bsp_apic_configure()"); 467} 468#endif /* APIC_IO */ 469 470 471/******************************************************************* 472 * local functions and data 473 */ 474 475/* 476 * start the SMP system 477 */ 478static void 479mp_enable(u_int boot_addr) 480{ 481 int x; 482#if defined(APIC_IO) 483 int apic; 484 u_int ux; 485#endif /* APIC_IO */ 486 487 POSTCODE(MP_ENABLE_POST); 488 489 /* turn on 4MB of V == P addressing so we can get to MP table */ 490 *(int *)PTD = PG_V | PG_RW | ((u_long)KPTphys & PG_FRAME); 491 invltlb(); 492 493 /* examine the MP table for needed info, uses physical addresses */ 494 x = mptable_pass2(); 495 496 *(int *)PTD = 0; 497 invltlb(); 498 499 /* can't process default configs till the CPU APIC is pmapped */ 500 if (x) 501 default_mp_table(x); 502 503 /* post scan cleanup */ 504 fix_mp_table(); 505 506#if defined(APIC_IO) 507 508 /* fill the LOGICAL io_apic_versions table */ 509 for (apic = 0; apic < mp_napics; ++apic) { 510 ux = io_apic_read(apic, IOAPIC_VER); 511 io_apic_versions[apic] = ux; 512 } 513 514 /* program each IO APIC in the system */ 515 for (apic = 0; apic < mp_napics; ++apic) 516 if (io_apic_setup(apic) < 0) 517 panic("IO APIC setup failure"); 518 519 /* install a 'Spurious INTerrupt' vector */ 520 setidt(XSPURIOUSINT_OFFSET, Xspuriousint, 521 SDT_SYS386IGT, SEL_KPL, GSEL(GCODE_SEL, SEL_KPL)); 522 523 /* install an inter-CPU IPI for TLB invalidation */ 524 setidt(XINVLTLB_OFFSET, Xinvltlb, 525 SDT_SYS386IGT, SEL_KPL, GSEL(GCODE_SEL, SEL_KPL)); 526 527 /* install an inter-CPU IPI for CPU stop/restart */ 528 setidt(XCPUSTOP_OFFSET, Xcpustop, 529 SDT_SYS386IGT, SEL_KPL, GSEL(GCODE_SEL, SEL_KPL)); 530 531#if defined(TEST_TEST1) 532 /* install a "fake hardware INTerrupt" vector */ 533 setidt(XTEST1_OFFSET, Xtest1, 534 SDT_SYS386IGT, SEL_KPL, GSEL(GCODE_SEL, SEL_KPL)); 535#endif /** TEST_TEST1 */ 536 537#endif /* APIC_IO */ 538 539 /* initialize all SMP locks */ 540 init_locks(); 541 542 /* start each Application Processor */ 543 start_all_aps(boot_addr); 544 545 /* 546 * The init process might be started on a different CPU now, 547 * and the boot CPU might not call prepare_usermode to get 548 * cr0 correctly configured. Thus we initialize cr0 here. 549 */ 550 load_cr0(rcr0() | CR0_WP | CR0_AM); 551} 552 553 554/* 555 * look for the MP spec signature 556 */ 557 558/* string defined by the Intel MP Spec as identifying the MP table */ 559#define MP_SIG 0x5f504d5f /* _MP_ */ 560#define NEXT(X) ((X) += 4) 561static int 562search_for_sig(u_int32_t target, int count) 563{ 564 int x; 565 u_int32_t *addr = (u_int32_t *) (KERNBASE + target); 566 567 for (x = 0; x < count; NEXT(x)) 568 if (addr[x] == MP_SIG) 569 /* make array index a byte index */ 570 return (target + (x * sizeof(u_int32_t))); 571 572 return -1; 573} 574 575 576static basetable_entry basetable_entry_types[] = 577{ 578 {0, 20, "Processor"}, 579 {1, 8, "Bus"}, 580 {2, 8, "I/O APIC"}, 581 {3, 8, "I/O INT"}, 582 {4, 8, "Local INT"} 583}; 584 585typedef struct BUSDATA { 586 u_char bus_id; 587 enum busTypes bus_type; 588} bus_datum; 589 590typedef struct INTDATA { 591 u_char int_type; 592 u_short int_flags; 593 u_char src_bus_id; 594 u_char src_bus_irq; 595 u_char dst_apic_id; 596 u_char dst_apic_int; 597} io_int, local_int; 598 599typedef struct BUSTYPENAME { 600 u_char type; 601 char name[7]; 602} bus_type_name; 603 604static bus_type_name bus_type_table[] = 605{ 606 {CBUS, "CBUS"}, 607 {CBUSII, "CBUSII"}, 608 {EISA, "EISA"}, 609 {UNKNOWN_BUSTYPE, "---"}, 610 {UNKNOWN_BUSTYPE, "---"}, 611 {ISA, "ISA"}, 612 {UNKNOWN_BUSTYPE, "---"}, 613 {UNKNOWN_BUSTYPE, "---"}, 614 {UNKNOWN_BUSTYPE, "---"}, 615 {UNKNOWN_BUSTYPE, "---"}, 616 {UNKNOWN_BUSTYPE, "---"}, 617 {UNKNOWN_BUSTYPE, "---"}, 618 {PCI, "PCI"}, 619 {UNKNOWN_BUSTYPE, "---"}, 620 {UNKNOWN_BUSTYPE, "---"}, 621 {UNKNOWN_BUSTYPE, "---"}, 622 {UNKNOWN_BUSTYPE, "---"}, 623 {XPRESS, "XPRESS"}, 624 {UNKNOWN_BUSTYPE, "---"} 625}; 626/* from MP spec v1.4, table 5-1 */ 627static int default_data[7][5] = 628{ 629/* nbus, id0, type0, id1, type1 */ 630 {1, 0, ISA, 255, 255}, 631 {1, 0, EISA, 255, 255}, 632 {1, 0, EISA, 255, 255}, 633 {0, 255, 255, 255, 255},/* MCA not supported */ 634 {2, 0, ISA, 1, PCI}, 635 {2, 0, EISA, 1, PCI}, 636 {0, 255, 255, 255, 255} /* MCA not supported */ 637}; 638 639 640/* the bus data */ 641bus_datum bus_data[NBUS]; 642 643/* the IO INT data, one entry per possible APIC INTerrupt */ 644io_int io_apic_ints[NINTR]; 645 646static int nintrs; 647 648static int processor_entry __P((proc_entry_ptr entry, int cpu)); 649static int bus_entry __P((bus_entry_ptr entry, int bus)); 650static int io_apic_entry __P((io_apic_entry_ptr entry, int apic)); 651static int int_entry __P((int_entry_ptr entry, int intr)); 652static int lookup_bus_type __P((char *name)); 653 654 655/* 656 * 1st pass on motherboard's Intel MP specification table. 657 * 658 * initializes: 659 * mp_ncpus = 1 660 * 661 * determines: 662 * cpu_apic_address (common to all CPUs) 663 * io_apic_address[N] 664 * mp_naps 665 * mp_nbusses 666 * mp_napics 667 * nintrs 668 */ 669static int 670mptable_pass1(void) 671{ 672 int x; 673 mpcth_t cth; 674 int totalSize; 675 void* position; 676 int count; 677 int type; 678 int mustpanic; 679 680 POSTCODE(MPTABLE_PASS1_POST); 681 682 mustpanic = 0; 683 684 /* clear various tables */ 685 for (x = 0; x < NAPICID; ++x) { 686 io_apic_address[x] = ~0; /* IO APIC address table */ 687 } 688 689 /* init everything to empty */ 690 mp_naps = 0; 691 mp_nbusses = 0; 692 mp_napics = 0; 693 nintrs = 0; 694 695 /* check for use of 'default' configuration */ 696 if (MPFPS_MPFB1 != 0) { 697 /* use default addresses */ 698 cpu_apic_address = DEFAULT_APIC_BASE; 699 io_apic_address[0] = DEFAULT_IO_APIC_BASE; 700 701 /* fill in with defaults */ 702 mp_naps = 2; /* includes BSP */ 703 mp_nbusses = default_data[MPFPS_MPFB1 - 1][0]; 704#if defined(APIC_IO) 705 mp_napics = 1; 706 nintrs = 16; 707#endif /* APIC_IO */ 708 } 709 else { 710 if ((cth = mpfps->pap) == 0) 711 panic("MP Configuration Table Header MISSING!"); 712 713 cpu_apic_address = (vm_offset_t) cth->apic_address; 714 715 /* walk the table, recording info of interest */ 716 totalSize = cth->base_table_length - sizeof(struct MPCTH); 717 position = (u_char *) cth + sizeof(struct MPCTH); 718 count = cth->entry_count; 719 720 while (count--) { 721 switch (type = *(u_char *) position) { 722 case 0: /* processor_entry */ 723 if (((proc_entry_ptr)position)->cpu_flags 724 & PROCENTRY_FLAG_EN) 725 ++mp_naps; 726 break; 727 case 1: /* bus_entry */ 728 ++mp_nbusses; 729 break; 730 case 2: /* io_apic_entry */ 731 if (((io_apic_entry_ptr)position)->apic_flags 732 & IOAPICENTRY_FLAG_EN) 733 io_apic_address[mp_napics++] = 734 (vm_offset_t)((io_apic_entry_ptr) 735 position)->apic_address; 736 break; 737 case 3: /* int_entry */ 738 ++nintrs; 739 break; 740 case 4: /* int_entry */ 741 break; 742 default: 743 panic("mpfps Base Table HOSED!"); 744 /* NOTREACHED */ 745 } 746 747 totalSize -= basetable_entry_types[type].length; 748 (u_char*)position += basetable_entry_types[type].length; 749 } 750 } 751 752 /* qualify the numbers */ 753 if (mp_naps > NCPU) 754#if 0 /* XXX FIXME: kern/4255 */ 755 printf("Warning: only using %d of %d available CPUs!\n", 756 NCPU, mp_naps); 757#else 758 { 759 printf("NCPU cannot be different than actual CPU count.\n"); 760 printf(" add 'options NCPU=%d' to your kernel config file,\n", 761 mp_naps); 762 printf(" then rerun config & rebuild your SMP kernel\n"); 763 mustpanic = 1; 764 } 765#endif /* XXX FIXME: kern/4255 */ 766 if (mp_nbusses > NBUS) { 767 printf("found %d busses, increase NBUS\n", mp_nbusses); 768 mustpanic = 1; 769 } 770 if (mp_napics > NAPIC) { 771 printf("found %d apics, increase NAPIC\n", mp_napics); 772 mustpanic = 1; 773 } 774 if (nintrs > NINTR) { 775 printf("found %d intrs, increase NINTR\n", nintrs); 776 mustpanic = 1; 777 } 778 779 /* 780 * Count the BSP. 781 * This is also used as a counter while starting the APs. 782 */ 783 mp_ncpus = 1; 784 785 --mp_naps; /* subtract the BSP */ 786 787 return mustpanic; 788} 789 790 791/* 792 * 2nd pass on motherboard's Intel MP specification table. 793 * 794 * sets: 795 * boot_cpu_id 796 * ID_TO_IO(N), phy APIC ID to log CPU/IO table 797 * CPU_TO_ID(N), logical CPU to APIC ID table 798 * IO_TO_ID(N), logical IO to APIC ID table 799 * bus_data[N] 800 * io_apic_ints[N] 801 */ 802static int 803mptable_pass2(void) 804{ 805 int x; 806 mpcth_t cth; 807 int totalSize; 808 void* position; 809 int count; 810 int type; 811 int apic, bus, cpu, intr; 812 813 POSTCODE(MPTABLE_PASS2_POST); 814 815 /* clear various tables */ 816 for (x = 0; x < NAPICID; ++x) { 817 ID_TO_IO(x) = -1; /* phy APIC ID to log CPU/IO table */ 818 CPU_TO_ID(x) = -1; /* logical CPU to APIC ID table */ 819 IO_TO_ID(x) = -1; /* logical IO to APIC ID table */ 820 } 821 822 /* clear bus data table */ 823 for (x = 0; x < NBUS; ++x) 824 bus_data[x].bus_id = 0xff; 825 826 /* clear IO APIC INT table */ 827 for (x = 0; x < NINTR; ++x) 828 io_apic_ints[x].int_type = 0xff; 829 830 /* setup the cpu/apic mapping arrays */ 831 boot_cpu_id = -1; 832 833 /* record whether PIC or virtual-wire mode */ 834 picmode = (mpfps->mpfb2 & 0x80) ? 1 : 0; 835 836 /* check for use of 'default' configuration */ 837 if (MPFPS_MPFB1 != 0) 838 return MPFPS_MPFB1; /* return default configuration type */ 839 840 if ((cth = mpfps->pap) == 0) 841 panic("MP Configuration Table Header MISSING!"); 842 843 /* walk the table, recording info of interest */ 844 totalSize = cth->base_table_length - sizeof(struct MPCTH); 845 position = (u_char *) cth + sizeof(struct MPCTH); 846 count = cth->entry_count; 847 apic = bus = intr = 0; 848 cpu = 1; /* pre-count the BSP */ 849 850 while (count--) { 851 switch (type = *(u_char *) position) { 852 case 0: 853 if (processor_entry(position, cpu)) 854 ++cpu; 855 break; 856 case 1: 857 if (bus_entry(position, bus)) 858 ++bus; 859 break; 860 case 2: 861 if (io_apic_entry(position, apic)) 862 ++apic; 863 break; 864 case 3: 865 if (int_entry(position, intr)) 866 ++intr; 867 break; 868 case 4: 869 /* int_entry(position); */ 870 break; 871 default: 872 panic("mpfps Base Table HOSED!"); 873 /* NOTREACHED */ 874 } 875 876 totalSize -= basetable_entry_types[type].length; 877 (u_char *) position += basetable_entry_types[type].length; 878 } 879 880 if (boot_cpu_id == -1) 881 panic("NO BSP found!"); 882 883 /* report fact that its NOT a default configuration */ 884 return 0; 885} 886 887 888/* 889 * parse an Intel MP specification table 890 */ 891static void 892fix_mp_table(void) 893{ 894 int x; 895 int id; 896 int bus_0; 897 int bus_pci; 898 int num_pci_bus; 899 900 /* 901 * Fix mis-numbering of the PCI bus and its INT entries if the BIOS 902 * did it wrong. The MP spec says that when more than 1 PCI bus 903 * exists the BIOS must begin with bus entries for the PCI bus and use 904 * actual PCI bus numbering. This implies that when only 1 PCI bus 905 * exists the BIOS can choose to ignore this ordering, and indeed many 906 * MP motherboards do ignore it. This causes a problem when the PCI 907 * sub-system makes requests of the MP sub-system based on PCI bus 908 * numbers. So here we look for the situation and renumber the 909 * busses and associated INTs in an effort to "make it right". 910 */ 911 912 /* find bus 0, PCI bus, count the number of PCI busses */ 913 for (num_pci_bus = 0, x = 0; x < mp_nbusses; ++x) { 914 if (bus_data[x].bus_id == 0) { 915 bus_0 = x; 916 } 917 if (bus_data[x].bus_type == PCI) { 918 ++num_pci_bus; 919 bus_pci = x; 920 } 921 } 922 /* 923 * bus_0 == slot of bus with ID of 0 924 * bus_pci == slot of last PCI bus encountered 925 */ 926 927 /* check the 1 PCI bus case for sanity */ 928 if (num_pci_bus == 1) { 929 930 /* if it is number 0 all is well */ 931 if (bus_data[bus_pci].bus_id == 0) 932 return; 933 934 /* mis-numbered, swap with whichever bus uses slot 0 */ 935 936 /* swap the bus entry types */ 937 bus_data[bus_pci].bus_type = bus_data[bus_0].bus_type; 938 bus_data[bus_0].bus_type = PCI; 939 940 /* swap each relavant INTerrupt entry */ 941 id = bus_data[bus_pci].bus_id; 942 for (x = 0; x < nintrs; ++x) { 943 if (io_apic_ints[x].src_bus_id == id) { 944 io_apic_ints[x].src_bus_id = 0; 945 } 946 else if (io_apic_ints[x].src_bus_id == 0) { 947 io_apic_ints[x].src_bus_id = id; 948 } 949 } 950 } 951 /* sanity check if more than 1 PCI bus */ 952 else if (num_pci_bus > 1) { 953 for (x = 0; x < mp_nbusses; ++x) { 954 if (bus_data[x].bus_type != PCI) 955 continue; 956 if (bus_data[x].bus_id >= num_pci_bus) 957 panic("bad PCI bus numbering"); 958 } 959 } 960} 961 962 963static int 964processor_entry(proc_entry_ptr entry, int cpu) 965{ 966 /* check for usability */ 967 if ((cpu >= NCPU) || !(entry->cpu_flags & PROCENTRY_FLAG_EN)) 968 return 0; 969 970 /* check for BSP flag */ 971 if (entry->cpu_flags & PROCENTRY_FLAG_BP) { 972 boot_cpu_id = entry->apic_id; 973 CPU_TO_ID(0) = entry->apic_id; 974 ID_TO_CPU(entry->apic_id) = 0; 975 return 0; /* its already been counted */ 976 } 977 978 /* add another AP to list, if less than max number of CPUs */ 979 else { 980 CPU_TO_ID(cpu) = entry->apic_id; 981 ID_TO_CPU(entry->apic_id) = cpu; 982 return 1; 983 } 984} 985 986 987static int 988bus_entry(bus_entry_ptr entry, int bus) 989{ 990 int x; 991 char c, name[8]; 992 993 /* encode the name into an index */ 994 for (x = 0; x < 6; ++x) { 995 if ((c = entry->bus_type[x]) == ' ') 996 break; 997 name[x] = c; 998 } 999 name[x] = '\0'; 1000 1001 if ((x = lookup_bus_type(name)) == UNKNOWN_BUSTYPE) 1002 panic("unknown bus type: '%s'", name); 1003 1004 bus_data[bus].bus_id = entry->bus_id; 1005 bus_data[bus].bus_type = x; 1006 1007 return 1; 1008} 1009 1010 1011static int 1012io_apic_entry(io_apic_entry_ptr entry, int apic) 1013{ 1014 if (!(entry->apic_flags & IOAPICENTRY_FLAG_EN)) 1015 return 0; 1016 1017 IO_TO_ID(apic) = entry->apic_id; 1018 ID_TO_IO(entry->apic_id) = apic; 1019 1020 return 1; 1021} 1022 1023 1024static int 1025lookup_bus_type(char *name) 1026{ 1027 int x; 1028 1029 for (x = 0; x < MAX_BUSTYPE; ++x) 1030 if (strcmp(bus_type_table[x].name, name) == 0) 1031 return bus_type_table[x].type; 1032 1033 return UNKNOWN_BUSTYPE; 1034} 1035 1036 1037static int 1038int_entry(int_entry_ptr entry, int intr) 1039{ 1040 io_apic_ints[intr].int_type = entry->int_type; 1041 io_apic_ints[intr].int_flags = entry->int_flags; 1042 io_apic_ints[intr].src_bus_id = entry->src_bus_id; 1043 io_apic_ints[intr].src_bus_irq = entry->src_bus_irq; 1044 io_apic_ints[intr].dst_apic_id = entry->dst_apic_id; 1045 io_apic_ints[intr].dst_apic_int = entry->dst_apic_int; 1046 1047 return 1; 1048} 1049 1050 1051static int 1052apic_int_is_bus_type(int intr, int bus_type) 1053{ 1054 int bus; 1055 1056 for (bus = 0; bus < mp_nbusses; ++bus) 1057 if ((bus_data[bus].bus_id == io_apic_ints[intr].src_bus_id) 1058 && ((int) bus_data[bus].bus_type == bus_type)) 1059 return 1; 1060 1061 return 0; 1062} 1063 1064 1065/* 1066 * Given a traditional ISA INT mask, return an APIC mask. 1067 */ 1068u_int 1069isa_apic_mask(u_int isa_mask) 1070{ 1071 int isa_irq; 1072 int apic_pin; 1073 1074#if defined(SKIP_IRQ15_REDIRECT) 1075 if (isa_mask == (1 << 15)) { 1076 printf("skipping ISA IRQ15 redirect\n"); 1077 return isa_mask; 1078 } 1079#endif /* SKIP_IRQ15_REDIRECT */ 1080 1081 isa_irq = ffs(isa_mask); /* find its bit position */ 1082 if (isa_irq == 0) /* doesn't exist */ 1083 return 0; 1084 --isa_irq; /* make it zero based */ 1085 1086 apic_pin = isa_apic_pin(isa_irq); /* look for APIC connection */ 1087 if (apic_pin == -1) 1088 return 0; 1089 1090 return (1 << apic_pin); /* convert pin# to a mask */ 1091} 1092 1093 1094/* 1095 * Determine which APIC pin an ISA/EISA INT is attached to. 1096 */ 1097#define INTTYPE(I) (io_apic_ints[(I)].int_type) 1098#define INTPIN(I) (io_apic_ints[(I)].dst_apic_int) 1099 1100#define SRCBUSIRQ(I) (io_apic_ints[(I)].src_bus_irq) 1101int 1102isa_apic_pin(int isa_irq) 1103{ 1104 int intr; 1105 1106 for (intr = 0; intr < nintrs; ++intr) { /* check each record */ 1107 if (INTTYPE(intr) == 0) { /* standard INT */ 1108 if (SRCBUSIRQ(intr) == isa_irq) { 1109 if (apic_int_is_bus_type(intr, ISA) || 1110 apic_int_is_bus_type(intr, EISA)) 1111 return INTPIN(intr); /* found */ 1112 } 1113 } 1114 } 1115 return -1; /* NOT found */ 1116} 1117#undef SRCBUSIRQ 1118 1119 1120/* 1121 * Determine which APIC pin a PCI INT is attached to. 1122 */ 1123#define SRCBUSID(I) (io_apic_ints[(I)].src_bus_id) 1124#define SRCBUSDEVICE(I) ((io_apic_ints[(I)].src_bus_irq >> 2) & 0x1f) 1125#define SRCBUSLINE(I) (io_apic_ints[(I)].src_bus_irq & 0x03) 1126int 1127pci_apic_pin(int pciBus, int pciDevice, int pciInt) 1128{ 1129 int intr; 1130 1131 --pciInt; /* zero based */ 1132 1133 for (intr = 0; intr < nintrs; ++intr) /* check each record */ 1134 if ((INTTYPE(intr) == 0) /* standard INT */ 1135 && (SRCBUSID(intr) == pciBus) 1136 && (SRCBUSDEVICE(intr) == pciDevice) 1137 && (SRCBUSLINE(intr) == pciInt)) /* a candidate IRQ */ 1138 if (apic_int_is_bus_type(intr, PCI)) 1139 return INTPIN(intr); /* exact match */ 1140 1141 return -1; /* NOT found */ 1142} 1143#undef SRCBUSLINE 1144#undef SRCBUSDEVICE 1145#undef SRCBUSID 1146 1147#undef INTPIN 1148#undef INTTYPE 1149 1150 1151/* 1152 * Reprogram the MB chipset to NOT redirect an ISA INTerrupt. 1153 * 1154 * XXX FIXME: 1155 * Exactly what this means is unclear at this point. It is a solution 1156 * for motherboards that redirect the MBIRQ0 pin. Generically a motherboard 1157 * could route any of the ISA INTs to upper (>15) IRQ values. But most would 1158 * NOT be redirected via MBIRQ0, thus "undirect()ing" them would NOT be an 1159 * option. 1160 */ 1161int 1162undirect_isa_irq(int rirq) 1163{ 1164#if defined(READY) 1165 printf("Freeing redirected ISA irq %d.\n", rirq); 1166 /** FIXME: tickle the MB redirector chip */ 1167 return ???; 1168#else 1169 printf("Freeing (NOT implemented) redirected ISA irq %d.\n", rirq); 1170 return 0; 1171#endif /* READY */ 1172} 1173 1174 1175/* 1176 * Reprogram the MB chipset to NOT redirect a PCI INTerrupt 1177 */ 1178int 1179undirect_pci_irq(int rirq) 1180{ 1181#if defined(READY) 1182 if (bootverbose) 1183 printf("Freeing redirected PCI irq %d.\n", rirq); 1184 1185 /** FIXME: tickle the MB redirector chip */ 1186 return ???; 1187#else 1188 if (bootverbose) 1189 printf("Freeing (NOT implemented) redirected PCI irq %d.\n", 1190 rirq); 1191 return 0; 1192#endif /* READY */ 1193} 1194 1195 1196/* 1197 * given a bus ID, return: 1198 * the bus type if found 1199 * -1 if NOT found 1200 */ 1201int 1202apic_bus_type(int id) 1203{ 1204 int x; 1205 1206 for (x = 0; x < mp_nbusses; ++x) 1207 if (bus_data[x].bus_id == id) 1208 return bus_data[x].bus_type; 1209 1210 return -1; 1211} 1212 1213 1214/* 1215 * given a LOGICAL APIC# and pin#, return: 1216 * the associated src bus ID if found 1217 * -1 if NOT found 1218 */ 1219int 1220apic_src_bus_id(int apic, int pin) 1221{ 1222 int x; 1223 1224 /* search each of the possible INTerrupt sources */ 1225 for (x = 0; x < nintrs; ++x) 1226 if ((apic == ID_TO_IO(io_apic_ints[x].dst_apic_id)) && 1227 (pin == io_apic_ints[x].dst_apic_int)) 1228 return (io_apic_ints[x].src_bus_id); 1229 1230 return -1; /* NOT found */ 1231} 1232 1233 1234/* 1235 * given a LOGICAL APIC# and pin#, return: 1236 * the associated src bus IRQ if found 1237 * -1 if NOT found 1238 */ 1239int 1240apic_src_bus_irq(int apic, int pin) 1241{ 1242 int x; 1243 1244 for (x = 0; x < nintrs; x++) 1245 if ((apic == ID_TO_IO(io_apic_ints[x].dst_apic_id)) && 1246 (pin == io_apic_ints[x].dst_apic_int)) 1247 return (io_apic_ints[x].src_bus_irq); 1248 1249 return -1; /* NOT found */ 1250} 1251 1252 1253/* 1254 * given a LOGICAL APIC# and pin#, return: 1255 * the associated INTerrupt type if found 1256 * -1 if NOT found 1257 */ 1258int 1259apic_int_type(int apic, int pin) 1260{ 1261 int x; 1262 1263 /* search each of the possible INTerrupt sources */ 1264 for (x = 0; x < nintrs; ++x) 1265 if ((apic == ID_TO_IO(io_apic_ints[x].dst_apic_id)) && 1266 (pin == io_apic_ints[x].dst_apic_int)) 1267 return (io_apic_ints[x].int_type); 1268 1269 return -1; /* NOT found */ 1270} 1271 1272 1273/* 1274 * given a LOGICAL APIC# and pin#, return: 1275 * the associated trigger mode if found 1276 * -1 if NOT found 1277 */ 1278int 1279apic_trigger(int apic, int pin) 1280{ 1281 int x; 1282 1283 /* search each of the possible INTerrupt sources */ 1284 for (x = 0; x < nintrs; ++x) 1285 if ((apic == ID_TO_IO(io_apic_ints[x].dst_apic_id)) && 1286 (pin == io_apic_ints[x].dst_apic_int)) 1287 return ((io_apic_ints[x].int_flags >> 2) & 0x03); 1288 1289 return -1; /* NOT found */ 1290} 1291 1292 1293/* 1294 * given a LOGICAL APIC# and pin#, return: 1295 * the associated 'active' level if found 1296 * -1 if NOT found 1297 */ 1298int 1299apic_polarity(int apic, int pin) 1300{ 1301 int x; 1302 1303 /* search each of the possible INTerrupt sources */ 1304 for (x = 0; x < nintrs; ++x) 1305 if ((apic == ID_TO_IO(io_apic_ints[x].dst_apic_id)) && 1306 (pin == io_apic_ints[x].dst_apic_int)) 1307 return (io_apic_ints[x].int_flags & 0x03); 1308 1309 return -1; /* NOT found */ 1310} 1311 1312 1313/* 1314 * set data according to MP defaults 1315 * FIXME: probably not complete yet... 1316 */ 1317static void 1318default_mp_table(int type) 1319{ 1320 int ap_cpu_id; 1321#if defined(APIC_IO) 1322 u_int32_t ux; 1323 int io_apic_id; 1324 int pin; 1325#endif /* APIC_IO */ 1326 1327#if 0 1328 printf(" MP default config type: %d\n", type); 1329 switch (type) { 1330 case 1: 1331 printf(" bus: ISA, APIC: 82489DX\n"); 1332 break; 1333 case 2: 1334 printf(" bus: EISA, APIC: 82489DX\n"); 1335 break; 1336 case 3: 1337 printf(" bus: EISA, APIC: 82489DX\n"); 1338 break; 1339 case 4: 1340 printf(" bus: MCA, APIC: 82489DX\n"); 1341 break; 1342 case 5: 1343 printf(" bus: ISA+PCI, APIC: Integrated\n"); 1344 break; 1345 case 6: 1346 printf(" bus: EISA+PCI, APIC: Integrated\n"); 1347 break; 1348 case 7: 1349 printf(" bus: MCA+PCI, APIC: Integrated\n"); 1350 break; 1351 default: 1352 printf(" future type\n"); 1353 break; 1354 /* NOTREACHED */ 1355 } 1356#endif /* 0 */ 1357 1358 boot_cpu_id = (lapic.id & APIC_ID_MASK) >> 24; 1359 ap_cpu_id = (boot_cpu_id == 0) ? 1 : 0; 1360 1361 /* BSP */ 1362 CPU_TO_ID(0) = boot_cpu_id; 1363 ID_TO_CPU(boot_cpu_id) = 0; 1364 1365 /* one and only AP */ 1366 CPU_TO_ID(1) = ap_cpu_id; 1367 ID_TO_CPU(ap_cpu_id) = 1; 1368 1369#if defined(APIC_IO) 1370 /* one and only IO APIC */ 1371 io_apic_id = (io_apic_read(0, IOAPIC_ID) & APIC_ID_MASK) >> 24; 1372 1373 /* 1374 * sanity check, refer to MP spec section 3.6.6, last paragraph 1375 * necessary as some hardware isn't properly setting up the IO APIC 1376 */ 1377#if defined(REALLY_ANAL_IOAPICID_VALUE) 1378 if (io_apic_id != 2) { 1379#else 1380 if ((io_apic_id == 0) || (io_apic_id == 1) || (io_apic_id == 15)) { 1381#endif /* REALLY_ANAL_IOAPICID_VALUE */ 1382 ux = io_apic_read(0, IOAPIC_ID); /* get current contents */ 1383 ux &= ~APIC_ID_MASK; /* clear the ID field */ 1384 ux |= 0x02000000; /* set it to '2' */ 1385 io_apic_write(0, IOAPIC_ID, ux); /* write new value */ 1386 ux = io_apic_read(0, IOAPIC_ID); /* re-read && test */ 1387 if ((ux & APIC_ID_MASK) != 0x02000000) 1388 panic("can't control IO APIC ID, reg: 0x%08x", ux); 1389 io_apic_id = 2; 1390 } 1391 IO_TO_ID(0) = io_apic_id; 1392 ID_TO_IO(io_apic_id) = 0; 1393#endif /* APIC_IO */ 1394 1395 /* fill out bus entries */ 1396 switch (type) { 1397 case 1: 1398 case 2: 1399 case 3: 1400 case 5: 1401 case 6: 1402 bus_data[0].bus_id = default_data[type - 1][1]; 1403 bus_data[0].bus_type = default_data[type - 1][2]; 1404 bus_data[1].bus_id = default_data[type - 1][3]; 1405 bus_data[1].bus_type = default_data[type - 1][4]; 1406 break; 1407 1408 /* case 4: case 7: MCA NOT supported */ 1409 default: /* illegal/reserved */ 1410 panic("BAD default MP config: %d", type); 1411 /* NOTREACHED */ 1412 } 1413 1414#if defined(APIC_IO) 1415 /* general cases from MP v1.4, table 5-2 */ 1416 for (pin = 0; pin < 16; ++pin) { 1417 io_apic_ints[pin].int_type = 0; 1418 io_apic_ints[pin].int_flags = 0x05; /* edge/active-hi */ 1419 io_apic_ints[pin].src_bus_id = 0; 1420 io_apic_ints[pin].src_bus_irq = pin; /* IRQ2 caught below */ 1421 io_apic_ints[pin].dst_apic_id = io_apic_id; 1422 io_apic_ints[pin].dst_apic_int = pin; /* 1-to-1 */ 1423 } 1424 1425 /* special cases from MP v1.4, table 5-2 */ 1426 if (type == 2) { 1427 io_apic_ints[2].int_type = 0xff; /* N/C */ 1428 io_apic_ints[13].int_type = 0xff; /* N/C */ 1429#if !defined(APIC_MIXED_MODE) 1430 /** FIXME: ??? */ 1431 panic("sorry, can't support type 2 default yet"); 1432#endif /* APIC_MIXED_MODE */ 1433 } 1434 else 1435 io_apic_ints[2].src_bus_irq = 0; /* ISA IRQ0 is on APIC INT 2 */ 1436 1437 if (type == 7) 1438 io_apic_ints[0].int_type = 0xff; /* N/C */ 1439 else 1440 io_apic_ints[0].int_type = 3; /* vectored 8259 */ 1441#endif /* APIC_IO */ 1442} 1443 1444 1445/* 1446 * initialize all the SMP locks 1447 */ 1448 1449/* critical region around IO APIC, apic_imen */ 1450struct simplelock imen_lock; 1451 1452/* critical region around splxx(), cpl, cml, cil, ipending */ 1453struct simplelock cpl_lock; 1454 1455/* Make FAST_INTR() routines sequential */ 1456struct simplelock fast_intr_lock; 1457 1458/* critical region around INTR() routines */ 1459struct simplelock intr_lock; 1460 1461/* lock regions protected in UP kernel via cli/sti */ 1462struct simplelock mpintr_lock; 1463 1464#ifdef USE_COMLOCK 1465/* locks com (tty) data/hardware accesses: a FASTINTR() */ 1466struct simplelock com_lock; 1467#endif /* USE_COMLOCK */ 1468 1469#ifdef USE_CLOCKLOCK 1470/* lock regions around the clock hardware */ 1471struct simplelock clock_lock; 1472#endif /* USE_CLOCKLOCK */ 1473 1474static void 1475init_locks(void) 1476{ 1477 /* 1478 * Get the initial mp_lock with a count of 1 for the BSP. 1479 * This uses a LOGICAL cpu ID, ie BSP == 0. 1480 */ 1481 mp_lock = 0x00000001; 1482 1483 /* ISR uses its own "giant lock" */ 1484 isr_lock = FREE_LOCK; 1485 1486 s_lock_init((struct simplelock*)&mpintr_lock); 1487 1488 s_lock_init((struct simplelock*)&fast_intr_lock); 1489 s_lock_init((struct simplelock*)&intr_lock); 1490 s_lock_init((struct simplelock*)&imen_lock); 1491 s_lock_init((struct simplelock*)&cpl_lock); 1492 1493#ifdef USE_COMLOCK 1494 s_lock_init((struct simplelock*)&com_lock); 1495#endif /* USE_COMLOCK */ 1496#ifdef USE_CLOCKLOCK 1497 s_lock_init((struct simplelock*)&clock_lock); 1498#endif /* USE_CLOCKLOCK */ 1499} 1500 1501 1502/* 1503 * start each AP in our list 1504 */ 1505static int 1506start_all_aps(u_int boot_addr) 1507{ 1508 int x, i; 1509 u_char mpbiosreason; 1510 u_long mpbioswarmvec; 1511 pd_entry_t *newptd; 1512 pt_entry_t *newpt; 1513 int *newpp, *stack; 1514 1515 POSTCODE(START_ALL_APS_POST); 1516 1517 /* initialize BSP's local APIC */ 1518 apic_initialize(); 1519 bsp_apic_ready = 1; 1520 1521 /* install the AP 1st level boot code */ 1522 install_ap_tramp(boot_addr); 1523 1524 1525 /* save the current value of the warm-start vector */ 1526 mpbioswarmvec = *((u_long *) WARMBOOT_OFF); 1527 outb(CMOS_REG, BIOS_RESET); 1528 mpbiosreason = inb(CMOS_DATA); 1529 1530 /* record BSP in CPU map */ 1531 all_cpus = 1; 1532 1533 /* start each AP */ 1534 for (x = 1; x <= mp_naps; ++x) { 1535 1536 /* This is a bit verbose, it will go away soon. */ 1537 1538 /* alloc new page table directory */ 1539 newptd = (pd_entry_t *)(kmem_alloc(kernel_map, PAGE_SIZE)); 1540 1541 /* Store the virtual PTD address for this CPU */ 1542 IdlePTDS[x] = newptd; 1543 1544 /* clone currently active one (ie: IdlePTD) */ 1545 bcopy(PTD, newptd, PAGE_SIZE); /* inc prv page pde */ 1546 1547 /* set up 0 -> 4MB P==V mapping for AP boot */ 1548 newptd[0] = (pd_entry_t) (PG_V | PG_RW | 1549 ((u_long)KPTphys & PG_FRAME)); 1550 1551 /* store PTD for this AP's boot sequence */ 1552 bootPTD = (pd_entry_t *)vtophys(newptd); 1553 1554 /* alloc new page table page */ 1555 newpt = (pt_entry_t *)(kmem_alloc(kernel_map, PAGE_SIZE)); 1556 1557 /* set the new PTD's private page to point there */ 1558 newptd[MPPTDI] = (pt_entry_t)(PG_V | PG_RW | vtophys(newpt)); 1559 1560 /* install self referential entry */ 1561 newptd[PTDPTDI] = (pd_entry_t)(PG_V | PG_RW | vtophys(newptd)); 1562 1563 /* allocate a new private data page */ 1564 newpp = (int *)kmem_alloc(kernel_map, PAGE_SIZE); 1565 1566 /* wire it into the private page table page */ 1567 newpt[0] = (pt_entry_t)(PG_V | PG_RW | vtophys(newpp)); 1568 1569 /* wire the ptp into itself for access */ 1570 newpt[1] = (pt_entry_t)(PG_V | PG_RW | vtophys(newpt)); 1571 1572 /* copy in the pointer to the local apic */ 1573 newpt[2] = SMP_prvpt[2]; 1574 1575 /* and the IO apic mapping[s] */ 1576 for (i = 16; i < 32; i++) 1577 newpt[i] = SMP_prvpt[i]; 1578 1579 /* allocate and set up an idle stack data page */ 1580 stack = (int *)kmem_alloc(kernel_map, PAGE_SIZE); 1581 newpt[3] = (pt_entry_t)(PG_V | PG_RW | vtophys(stack)); 1582 1583 newpt[4] = 0; /* *prv_CMAP1 */ 1584 newpt[5] = 0; /* *prv_CMAP2 */ 1585 newpt[6] = 0; /* *prv_CMAP3 */ 1586 1587 /* prime data page for it to use */ 1588 newpp[0] = x; /* cpuid */ 1589 newpp[1] = 0; /* curproc */ 1590 newpp[2] = 0; /* curpcb */ 1591 newpp[3] = 0; /* npxproc */ 1592 newpp[4] = 0; /* runtime.tv_sec */ 1593 newpp[5] = 0; /* runtime.tv_usec */ 1594 newpp[6] = x << 24; /* cpu_lockid */ 1595 newpp[7] = 0; /* other_cpus */ 1596 newpp[8] = (int)bootPTD; /* my_idlePTD */ 1597 newpp[9] = 0; /* ss_tpr */ 1598 newpp[10] = (int)&newpt[4]; /* prv_CMAP1 */ 1599 newpp[11] = (int)&newpt[5]; /* prv_CMAP2 */ 1600 newpp[12] = (int)&newpt[6]; /* prv_CMAP3 */ 1601 1602 /* setup a vector to our boot code */ 1603 *((volatile u_short *) WARMBOOT_OFF) = WARMBOOT_TARGET; 1604 *((volatile u_short *) WARMBOOT_SEG) = (boot_addr >> 4); 1605 outb(CMOS_REG, BIOS_RESET); 1606 outb(CMOS_DATA, BIOS_WARM); /* 'warm-start' */ 1607 1608 /* attempt to start the Application Processor */ 1609 CHECK_INIT(99); /* setup checkpoints */ 1610 if (!start_ap(x, boot_addr)) { 1611 printf("AP #%d (PHY# %d) failed!\n", x, CPU_TO_ID(x)); 1612 CHECK_PRINT("trace"); /* show checkpoints */ 1613 /* better panic as the AP may be running loose */ 1614 printf("panic y/n? [y] "); 1615 if (cngetc() != 'n') 1616 panic("bye-bye"); 1617 } 1618 CHECK_PRINT("trace"); /* show checkpoints */ 1619 1620 /* record its version info */ 1621 cpu_apic_versions[x] = cpu_apic_versions[0]; 1622 1623 all_cpus |= (1 << x); /* record AP in CPU map */ 1624 } 1625 1626 /* build our map of 'other' CPUs */ 1627 other_cpus = all_cpus & ~(1 << cpuid); 1628 1629 /* fill in our (BSP) APIC version */ 1630 cpu_apic_versions[0] = lapic.version; 1631 1632 /* restore the warmstart vector */ 1633 *(u_long *) WARMBOOT_OFF = mpbioswarmvec; 1634 outb(CMOS_REG, BIOS_RESET); 1635 outb(CMOS_DATA, mpbiosreason); 1636 1637 /* 1638 * Set up the idle context for the BSP. Similar to above except 1639 * that some was done by locore, some by pmap.c and some is implicit 1640 * because the BSP is cpu#0 and the page is initially zero, and also 1641 * because we can refer to variables by name on the BSP.. 1642 */ 1643 newptd = (pd_entry_t *)(kmem_alloc(kernel_map, PAGE_SIZE)); 1644 1645 bcopy(PTD, newptd, PAGE_SIZE); /* inc prv page pde */ 1646 IdlePTDS[0] = newptd; 1647 1648 /* Point PTD[] to this page instead of IdlePTD's physical page */ 1649 newptd[PTDPTDI] = (pd_entry_t)(PG_V | PG_RW | vtophys(newptd)); 1650 1651 my_idlePTD = (pd_entry_t *)vtophys(newptd); 1652 1653 /* Allocate and setup BSP idle stack */ 1654 stack = (int *)kmem_alloc(kernel_map, PAGE_SIZE); 1655 SMP_prvpt[3] = (pt_entry_t)(PG_V | PG_RW | vtophys(stack)); 1656 1657 pmap_set_opt_bsp(); 1658 1659 /* number of APs actually started */ 1660 return mp_ncpus - 1; 1661} 1662 1663 1664/* 1665 * load the 1st level AP boot code into base memory. 1666 */ 1667 1668/* targets for relocation */ 1669extern void bigJump(void); 1670extern void bootCodeSeg(void); 1671extern void bootDataSeg(void); 1672extern void MPentry(void); 1673extern u_int MP_GDT; 1674extern u_int mp_gdtbase; 1675 1676static void 1677install_ap_tramp(u_int boot_addr) 1678{ 1679 int x; 1680 int size = *(int *) ((u_long) & bootMP_size); 1681 u_char *src = (u_char *) ((u_long) bootMP); 1682 u_char *dst = (u_char *) boot_addr + KERNBASE; 1683 u_int boot_base = (u_int) bootMP; 1684 u_int8_t *dst8; 1685 u_int16_t *dst16; 1686 u_int32_t *dst32; 1687 1688 POSTCODE(INSTALL_AP_TRAMP_POST); 1689 1690 for (x = 0; x < size; ++x) 1691 *dst++ = *src++; 1692 1693 /* 1694 * modify addresses in code we just moved to basemem. unfortunately we 1695 * need fairly detailed info about mpboot.s for this to work. changes 1696 * to mpboot.s might require changes here. 1697 */ 1698 1699 /* boot code is located in KERNEL space */ 1700 dst = (u_char *) boot_addr + KERNBASE; 1701 1702 /* modify the lgdt arg */ 1703 dst32 = (u_int32_t *) (dst + ((u_int) & mp_gdtbase - boot_base)); 1704 *dst32 = boot_addr + ((u_int) & MP_GDT - boot_base); 1705 1706 /* modify the ljmp target for MPentry() */ 1707 dst32 = (u_int32_t *) (dst + ((u_int) bigJump - boot_base) + 1); 1708 *dst32 = ((u_int) MPentry - KERNBASE); 1709 1710 /* modify the target for boot code segment */ 1711 dst16 = (u_int16_t *) (dst + ((u_int) bootCodeSeg - boot_base)); 1712 dst8 = (u_int8_t *) (dst16 + 1); 1713 *dst16 = (u_int) boot_addr & 0xffff; 1714 *dst8 = ((u_int) boot_addr >> 16) & 0xff; 1715 1716 /* modify the target for boot data segment */ 1717 dst16 = (u_int16_t *) (dst + ((u_int) bootDataSeg - boot_base)); 1718 dst8 = (u_int8_t *) (dst16 + 1); 1719 *dst16 = (u_int) boot_addr & 0xffff; 1720 *dst8 = ((u_int) boot_addr >> 16) & 0xff; 1721} 1722 1723 1724/* 1725 * this function starts the AP (application processor) identified 1726 * by the APIC ID 'physicalCpu'. It does quite a "song and dance" 1727 * to accomplish this. This is necessary because of the nuances 1728 * of the different hardware we might encounter. It ain't pretty, 1729 * but it seems to work. 1730 */ 1731static int 1732start_ap(int logical_cpu, u_int boot_addr) 1733{ 1734 int physical_cpu; 1735 int vector; 1736 int cpus; 1737 u_long icr_lo, icr_hi; 1738 1739 POSTCODE(START_AP_POST); 1740 1741 /* get the PHYSICAL APIC ID# */ 1742 physical_cpu = CPU_TO_ID(logical_cpu); 1743 1744 /* calculate the vector */ 1745 vector = (boot_addr >> 12) & 0xff; 1746 1747 /* used as a watchpoint to signal AP startup */ 1748 cpus = mp_ncpus; 1749 1750 /* 1751 * first we do an INIT/RESET IPI this INIT IPI might be run, reseting 1752 * and running the target CPU. OR this INIT IPI might be latched (P5 1753 * bug), CPU waiting for STARTUP IPI. OR this INIT IPI might be 1754 * ignored. 1755 */ 1756 1757 /* setup the address for the target AP */ 1758 icr_hi = lapic.icr_hi & ~APIC_ID_MASK; 1759 icr_hi |= (physical_cpu << 24); 1760 lapic.icr_hi = icr_hi; 1761 1762 /* do an INIT IPI: assert RESET */ 1763 icr_lo = lapic.icr_lo & 0xfff00000; 1764 lapic.icr_lo = icr_lo | 0x0000c500; 1765 1766 /* wait for pending status end */ 1767 while (lapic.icr_lo & APIC_DELSTAT_MASK) 1768 /* spin */ ; 1769 1770 /* do an INIT IPI: deassert RESET */ 1771 lapic.icr_lo = icr_lo | 0x00008500; 1772 1773 /* wait for pending status end */ 1774 u_sleep(10000); /* wait ~10mS */ 1775 while (lapic.icr_lo & APIC_DELSTAT_MASK) 1776 /* spin */ ; 1777 1778 /* 1779 * next we do a STARTUP IPI: the previous INIT IPI might still be 1780 * latched, (P5 bug) this 1st STARTUP would then terminate 1781 * immediately, and the previously started INIT IPI would continue. OR 1782 * the previous INIT IPI has already run. and this STARTUP IPI will 1783 * run. OR the previous INIT IPI was ignored. and this STARTUP IPI 1784 * will run. 1785 */ 1786 1787 /* do a STARTUP IPI */ 1788 lapic.icr_lo = icr_lo | 0x00000600 | vector; 1789 while (lapic.icr_lo & APIC_DELSTAT_MASK) 1790 /* spin */ ; 1791 u_sleep(200); /* wait ~200uS */ 1792 1793 /* 1794 * finally we do a 2nd STARTUP IPI: this 2nd STARTUP IPI should run IF 1795 * the previous STARTUP IPI was cancelled by a latched INIT IPI. OR 1796 * this STARTUP IPI will be ignored, as only ONE STARTUP IPI is 1797 * recognized after hardware RESET or INIT IPI. 1798 */ 1799 1800 lapic.icr_lo = icr_lo | 0x00000600 | vector; 1801 while (lapic.icr_lo & APIC_DELSTAT_MASK) 1802 /* spin */ ; 1803 u_sleep(200); /* wait ~200uS */ 1804 1805 /* wait for it to start */ 1806 set_apic_timer(5000000);/* == 5 seconds */ 1807 while (read_apic_timer()) 1808 if (mp_ncpus > cpus) 1809 return 1; /* return SUCCESS */ 1810 1811 return 0; /* return FAILURE */ 1812} 1813 1814 1815/* 1816 * Flush the TLB on all other CPU's 1817 * 1818 * XXX: Needs to handshake and wait for completion before proceding. 1819 */ 1820void 1821smp_invltlb(void) 1822{ 1823#if defined(APIC_IO) 1824 if (smp_started && invltlb_ok) 1825 all_but_self_ipi(XINVLTLB_OFFSET); 1826#endif /* APIC_IO */ 1827} 1828 1829void 1830invlpg(u_int addr) 1831{ 1832 __asm __volatile("invlpg (%0)"::"r"(addr):"memory"); 1833 1834 /* send a message to the other CPUs */ 1835 smp_invltlb(); 1836} 1837 1838void 1839invltlb(void) 1840{ 1841 u_long temp; 1842 1843 /* 1844 * This should be implemented as load_cr3(rcr3()) when load_cr3() is 1845 * inlined. 1846 */ 1847 __asm __volatile("movl %%cr3, %0; movl %0, %%cr3":"=r"(temp) :: "memory"); 1848 1849 /* send a message to the other CPUs */ 1850 smp_invltlb(); 1851} 1852 1853 1854/* 1855 * When called the executing CPU will send an IPI to all other CPUs 1856 * requesting that they halt execution. 1857 * 1858 * Usually (but not necessarily) called with 'other_cpus' as its arg. 1859 * 1860 * - Signals all CPUs in map to stop. 1861 * - Waits for each to stop. 1862 * 1863 * Returns: 1864 * -1: error 1865 * 0: NA 1866 * 1: ok 1867 * 1868 * XXX FIXME: this is not MP-safe, needs a lock to prevent multiple CPUs 1869 * from executing at same time. 1870 */ 1871int 1872stop_cpus(u_int map) 1873{ 1874 if (!smp_started) 1875 return 0; 1876 1877 /* send IPI to all CPUs in map */ 1878 stopped_cpus = 0; 1879 1880 /* send the Xcpustop IPI to all CPUs in map */ 1881 selected_apic_ipi(map, XCPUSTOP_OFFSET, APIC_DELMODE_FIXED); 1882 1883 while (stopped_cpus != map) 1884 /* spin */ ; 1885 1886 return 1; 1887} 1888 1889 1890/* 1891 * Called by a CPU to restart stopped CPUs. 1892 * 1893 * Usually (but not necessarily) called with 'stopped_cpus' as its arg. 1894 * 1895 * - Signals all CPUs in map to restart. 1896 * - Waits for each to restart. 1897 * 1898 * Returns: 1899 * -1: error 1900 * 0: NA 1901 * 1: ok 1902 */ 1903int 1904restart_cpus(u_int map) 1905{ 1906 if (!smp_started) 1907 return 0; 1908 1909 started_cpus = map; /* signal other cpus to restart */ 1910 1911 while (started_cpus) /* wait for each to clear its bit */ 1912 /* spin */ ; 1913 1914 return 1; 1915} 1916 1917int smp_active = 0; /* are the APs allowed to run? */ 1918SYSCTL_INT(_machdep, OID_AUTO, smp_active, CTLFLAG_RW, &smp_active, 0, ""); 1919 1920/* XXX maybe should be hw.ncpu */ 1921int smp_cpus = 1; /* how many cpu's running */ 1922SYSCTL_INT(_machdep, OID_AUTO, smp_cpus, CTLFLAG_RD, &smp_cpus, 0, ""); 1923 1924int invltlb_ok = 0; /* throttle smp_invltlb() till safe */ 1925SYSCTL_INT(_machdep, OID_AUTO, invltlb_ok, CTLFLAG_RW, &invltlb_ok, 0, ""); 1926 1927int do_page_zero_idle = 0; /* bzero pages for fun and profit in idleloop */ 1928SYSCTL_INT(_machdep, OID_AUTO, do_page_zero_idle, CTLFLAG_RW, 1929 &do_page_zero_idle, 0, ""); 1930 1931 1932/* 1933 * This is called once the rest of the system is up and running and we're 1934 * ready to let the AP's out of the pen. 1935 */ 1936void ap_init(void); 1937 1938void 1939ap_init() 1940{ 1941 u_int temp; 1942 u_int apic_id; 1943 1944 smp_cpus++; 1945 1946 /* Build our map of 'other' CPUs. */ 1947 other_cpus = all_cpus & ~(1 << cpuid); 1948 1949 printf("SMP: AP CPU #%d Launched!\n", cpuid); 1950 1951 /* XXX FIXME: i386 specific, and redundant: Setup the FPU. */ 1952 load_cr0((rcr0() & ~CR0_EM) | CR0_MP | CR0_NE | CR0_TS); 1953 1954 /* A quick check from sanity claus */ 1955 apic_id = (apic_id_to_logical[(lapic.id & 0x0f000000) >> 24]); 1956 if (cpuid != apic_id) { 1957 printf("SMP: cpuid = %d\n", cpuid); 1958 printf("SMP: apic_id = %d\n", apic_id); 1959 printf("PTD[MPPTDI] = %08x\n", PTD[MPPTDI]); 1960 panic("cpuid mismatch! boom!!"); 1961 } 1962 1963 /* Init local apic for irq's */ 1964 apic_initialize(); 1965 1966 /* 1967 * Activate smp_invltlb, although strictly speaking, this isn't 1968 * quite correct yet. We should have a bitfield for cpus willing 1969 * to accept TLB flush IPI's or something and sync them. 1970 */ 1971 invltlb_ok = 1; 1972 smp_started = 1; /* enable IPI's, tlb shootdown, freezes etc */ 1973 smp_active = 1; /* historic */ 1974 1975 curproc = NULL; /* make sure */ 1976} 1977