mp_x86.c revision 115016
166200Simp/* 252506Simp * Copyright (c) 1996, by Steve Passe 3139749Simp * All rights reserved. 452506Simp * 552506Simp * Redistribution and use in source and binary forms, with or without 652506Simp * modification, are permitted provided that the following conditions 752506Simp * are met: 852506Simp * 1. Redistributions of source code must retain the above copyright 952506Simp * notice, this list of conditions and the following disclaimer. 1052506Simp * 2. The name of the developer may NOT be used to endorse or promote products 1152506Simp * derived from this software without specific prior written permission. 1252506Simp * 1352506Simp * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND 1452506Simp * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 1552506Simp * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 1652506Simp * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE 1752506Simp * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 1852506Simp * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 1952506Simp * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 2052506Simp * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 2152506Simp * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 2252506Simp * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 2352506Simp * SUCH DAMAGE. 2452506Simp * 2552506Simp * $FreeBSD: head/sys/i386/i386/mp_machdep.c 115016 2003-05-15 05:12:24Z alc $ 2652506Simp */ 2752506Simp 2852506Simp#include "opt_cpu.h" 2952506Simp#include "opt_kstack_pages.h" 3052506Simp#include "opt_swtch.h" 3152506Simp 32119418Sobrien#ifdef SMP 33119418Sobrien#include <machine/smptests.h> 34119418Sobrien#else 3552506Simp#error 3652506Simp#endif 3752506Simp 3852506Simp#include <sys/param.h> 3952506Simp#include <sys/systm.h> 4052506Simp#include <sys/bus.h> 4191786Simp#include <sys/cons.h> /* cngetc() */ 4252506Simp#ifdef GPROF 4352506Simp#include <sys/gmon.h> 4452506Simp#endif 4552506Simp#include <sys/kernel.h> 4652506Simp#include <sys/ktr.h> 4752506Simp#include <sys/lock.h> 4852506Simp#include <sys/malloc.h> 4982781Sshiba#include <sys/memrange.h> 5082781Sshiba#include <sys/mutex.h> 5152506Simp#include <sys/pcpu.h> 5252506Simp#include <sys/proc.h> 53150362Simp#include <sys/smp.h> 54144930Simp#include <sys/sysctl.h> 5552506Simp#include <sys/user.h> 5655500Simp 5759193Simp#include <vm/vm.h> 5855500Simp#include <vm/vm_param.h> 5955500Simp#include <vm/pmap.h> 6055500Simp#include <vm/vm_kern.h> 6191786Simp#include <vm/vm_extern.h> 62227309Sed#include <vm/vm_map.h> 6391786Simp 6491786Simp#include <machine/apic.h> 65267992Shselasky#include <machine/atomic.h> 6691786Simp#include <machine/clock.h> 6791786Simp#include <machine/cpu.h> 6891786Simp#include <machine/cpufunc.h> 6991786Simp#include <machine/mpapic.h> 70267992Shselasky#include <machine/psl.h> 7191786Simp#include <machine/segments.h> 7291786Simp#include <machine/smp.h> 7352506Simp#include <machine/smptests.h> /** TEST_DEFAULT_CONFIG, TEST_TEST1 */ 7452506Simp#include <machine/tss.h> 7555500Simp#include <machine/specialreg.h> 7667333Simp#include <machine/privatespace.h> 7767333Simp 7852506Simp#if defined(APIC_IO) 7952506Simp#include <machine/md_var.h> /* setidt() */ 8055500Simp#include <i386/isa/icu.h> /* IPIs */ 8167333Simp#include <i386/isa/intr_machdep.h> /* IPIs */ 8267333Simp#endif /* APIC_IO */ 8352506Simp 8452506Simp#if defined(TEST_DEFAULT_CONFIG) 8582378Sjon#define MPFPS_MPFB1 TEST_DEFAULT_CONFIG 8682378Sjon#else 8782378Sjon#define MPFPS_MPFB1 mpfps->mpfb1 88106362Simp#endif /* TEST_DEFAULT_CONFIG */ 89181342Simp 9082378Sjon#define WARMBOOT_TARGET 0 9182378Sjon#define WARMBOOT_OFF (KERNBASE + 0x0467) 9282378Sjon#define WARMBOOT_SEG (KERNBASE + 0x0469) 9382378Sjon 9482378Sjon#ifdef PC98 9582378Sjon#define BIOS_BASE (0xe8000) 9682378Sjon#define BIOS_SIZE (0x18000) 9782378Sjon#else 9882378Sjon#define BIOS_BASE (0xf0000) 9982378Sjon#define BIOS_SIZE (0x10000) 10082378Sjon#endif 10182378Sjon#define BIOS_COUNT (BIOS_SIZE/4) 10282378Sjon 10382378Sjon#define CMOS_REG (0x70) 10482378Sjon#define CMOS_DATA (0x71) 10582378Sjon#define BIOS_RESET (0x0f) 106188179Simp#define BIOS_WARM (0x0a) 10782378Sjon 108140692Simp#define PROCENTRY_FLAG_EN 0x01 109181342Simp#define PROCENTRY_FLAG_BP 0x02 110181342Simp#define IOAPICENTRY_FLAG_EN 0x01 111104641Simp 11282378Sjon 113188179Simp/* MP Floating Pointer Structure */ 11482378Sjontypedef struct MPFPS { 11582378Sjon char signature[4]; 11682378Sjon void *pap; 11782378Sjon u_char length; 11882378Sjon u_char spec_rev; 11982378Sjon u_char checksum; 12082378Sjon u_char mpfb1; 121170163Spiso u_char mpfb2; 122170163Spiso u_char mpfb3; 12382378Sjon u_char mpfb4; 124166901Spiso u_char mpfb5; 125166901Spiso} *mpfps_t; 12682378Sjon 12782378Sjon/* MP Configuration Table Header */ 12852506Simptypedef struct MPCTH { 12997613Stakawata char signature[4]; 13097613Stakawata u_short base_table_length; 13197613Stakawata u_char spec_rev; 13297613Stakawata u_char checksum; 13397613Stakawata u_char oem_id[8]; 13497613Stakawata u_char product_id[12]; 13582378Sjon void *oem_table_pointer; 13674632Simp u_short oem_table_size; 13752506Simp u_short entry_count; 13852506Simp void *apic_address; 13952506Simp u_short extended_table_length; 14052506Simp u_char extended_table_checksum; 14152506Simp u_char reserved; 14282378Sjon} *mpcth_t; 14374632Simp 14452506Simp 14552506Simptypedef struct PROCENTRY { 14652506Simp u_char type; 14752506Simp u_char apic_id; 14852506Simp u_char apic_version; 14952506Simp u_char cpu_flags; 15052506Simp u_long cpu_signature; 15159193Simp u_long feature_flags; 152113242Simp u_long reserved1; 153113242Simp u_long reserved2; 154121521Simp} *proc_entry_ptr; 155133865Simp 156121521Simptypedef struct BUSENTRY { 157113242Simp u_char type; 158113242Simp u_char bus_id; 159113242Simp char bus_type[6]; 160113242Simp} *bus_entry_ptr; 161113242Simp 162133865Simptypedef struct IOAPICENTRY { 163133865Simp u_char type; 164133865Simp u_char apic_id; 165133865Simp u_char apic_version; 166133865Simp u_char apic_flags; 167133865Simp void *apic_address; 168133865Simp} *io_apic_entry_ptr; 169133865Simp 170133865Simptypedef struct INTENTRY { 171133865Simp u_char type; 172133865Simp u_char int_type; 173133865Simp u_short int_flags; 174188212Swkoszek u_char src_bus_id; 175133865Simp u_char src_bus_irq; 176133865Simp u_char dst_apic_id; 177133865Simp u_char dst_apic_int; 178113242Simp} *int_entry_ptr; 179113242Simp 180113242Simp/* descriptions of MP basetable entries */ 181113242Simptypedef struct BASETABLE_ENTRY { 18259193Simp u_char type; 18352506Simp u_char length; 18464850Simp char name[16]; 18552506Simp} basetable_entry; 18665917Simp 18761788Simp/* 188102713Simp * this code MUST be enabled here and in mpboot.s. 18952506Simp * it follows the very early stages of AP boot by placing values in CMOS ram. 190153773Simp * it NORMALLY will never be needed and thus the primitive method for enabling. 191153773Simp * 192153773Simp#define CHECK_POINTS 193153773Simp */ 19452506Simp 19555500Simp#if defined(CHECK_POINTS) && !defined(PC98) 19655500Simp#define CHECK_READ(A) (outb(CMOS_REG, (A)), inb(CMOS_DATA)) 19752506Simp#define CHECK_WRITE(A,D) (outb(CMOS_REG, (A)), outb(CMOS_DATA, (D))) 19855500Simp 19952506Simp#define CHECK_INIT(D); \ 20052506Simp CHECK_WRITE(0x34, (D)); \ 20155500Simp CHECK_WRITE(0x35, (D)); \ 20252506Simp CHECK_WRITE(0x36, (D)); \ 20352506Simp CHECK_WRITE(0x37, (D)); \ 20452506Simp CHECK_WRITE(0x38, (D)); \ 20552506Simp CHECK_WRITE(0x39, (D)); 20652506Simp 20752506Simp#define CHECK_PRINT(S); \ 20852506Simp printf("%s: %d, %d, %d, %d, %d, %d\n", \ 20970715Sjon (S), \ 210102713Simp CHECK_READ(0x34), \ 21152506Simp CHECK_READ(0x35), \ 21270715Sjon CHECK_READ(0x36), \ 21370715Sjon CHECK_READ(0x37), \ 214102713Simp CHECK_READ(0x38), \ 21552506Simp CHECK_READ(0x39)); 21670715Sjon 21752506Simp#else /* CHECK_POINTS */ 21890436Simp 21952506Simp#define CHECK_INIT(D) 22052506Simp#define CHECK_PRINT(S) 22155500Simp 222102713Simp#endif /* CHECK_POINTS */ 22352506Simp 224102713Simp/* 225102713Simp * Values to send to the POST hardware. 226102713Simp */ 227102713Simp#define MP_BOOTADDRESS_POST 0x10 22852506Simp#define MP_PROBE_POST 0x11 229102713Simp#define MPTABLE_PASS1_POST 0x12 23052506Simp 23152506Simp#define MP_START_POST 0x13 23264927Simp#define MP_ENABLE_POST 0x14 23352506Simp#define MPTABLE_PASS2_POST 0x15 234104641Simp 235102713Simp#define START_ALL_APS_POST 0x16 23682378Sjon#define INSTALL_AP_TRAMP_POST 0x17 23752506Simp#define START_AP_POST 0x18 23852506Simp 23952506Simp#define MP_ANNOUNCE_POST 0x19 24067897Sdwmalone 241111119Simpstatic int need_hyperthreading_fixup; 242143815Simpstatic u_int logical_cpus; 24361788Simpstatic u_int logical_cpus_mask; 24465917Simp 245147963Simp/* used to hold the AP's until we are ready to release them */ 24667187Simpstatic struct mtx ap_boot_mtx; 247181342Simp 24852506Simp/** XXX FIXME: where does this really belong, isa.h/isa.c perhaps? */ 24974632Simpint current_postcode; 25052506Simp 25152506Simp/** XXX FIXME: what system files declare these??? */ 25259193Simpextern struct region_descriptor r_gdt, r_idt; 253181342Simp 254181342Simpint bsp_apic_ready = 0; /* flags useability of BSP apic */ 255181342Simpint mp_naps; /* # of Applications processors */ 256181342Simpint mp_nbusses; /* # of busses */ 257181342Simpint mp_napics; /* # of IO APICs */ 258181342Simpint boot_cpu_id; /* designated BSP */ 259181342Simpvm_offset_t cpu_apic_address; 260181342Simpvm_offset_t io_apic_address[NAPICID]; /* NAPICID is more than enough */ 261181342Simpextern int nkpt; 262181342Simp 263181342Simpu_int32_t cpu_apic_versions[MAXCPU]; 264181342Simpu_int32_t *io_apic_versions; 265181395Simp 266181342Simp#ifdef APIC_INTR_REORDER 267181342Simpstruct { 268181342Simp volatile int *location; 269181342Simp int bit; 270181342Simp} apic_isrbit_location[32]; 271181342Simp#endif 272181342Simp 273181342Simpstruct apic_intmapinfo int_to_apicintpin[APIC_INTMAPSIZE]; 274181342Simp 275181342Simp/* 276181342Simp * APIC ID logical/physical mapping structures. 277181342Simp * We oversize these to simplify boot-time config. 278181342Simp */ 279181342Simpint cpu_num_to_apic_id[NAPICID]; 280181342Simpint io_num_to_apic_id[NAPICID]; 281181342Simpint apic_id_to_logical[NAPICID]; 282181342Simp 283181342Simp 284181342Simp/* AP uses this during bootstrap. Do not staticize. */ 285181342Simpchar *bootSTK; 286181342Simpstatic int bootAP; 287181342Simp 288181342Simp/* Hotwire a 0->4MB V==P mapping */ 289181342Simpextern pt_entry_t *KPTphys; 290181342Simp 291181342Simp/* SMP page table page */ 292181342Simpextern pt_entry_t *SMPpt; 293181342Simp 294181342Simpstruct pcb stoppcbs[MAXCPU]; 295181342Simp 296181342Simp#ifdef APIC_IO 297181342Simp/* Variables needed for SMP tlb shootdown. */ 298188212Swkoszekvm_offset_t smp_tlb_addr1; 299188212Swkoszekvm_offset_t smp_tlb_addr2; 300181342Simpvolatile int smp_tlb_wait; 301181342Simpstatic struct mtx smp_tlb_mtx; 302181342Simp#endif 303181342Simp 304181342Simp/* 305181342Simp * Local data and functions. 306181342Simp */ 307181342Simp 308181342Simp/* Set to 1 once we're ready to let the APs out of the pen. */ 309181342Simpstatic volatile int aps_ready = 0; 310181342Simp 311181342Simpstatic int mp_capable; 312181342Simpstatic u_int boot_address; 313181342Simpstatic u_int base_memory; 314181342Simp 315181342Simpstatic int picmode; /* 0: virtual wire mode, 1: PIC mode */ 316181342Simpstatic mpfps_t mpfps; 317181342Simpstatic int search_for_sig(u_int32_t target, int count); 318181342Simpstatic void mp_enable(u_int boot_addr); 319181342Simp 320181342Simpstatic void mptable_hyperthread_fixup(u_int id_mask); 321181342Simpstatic void mptable_pass1(void); 322181342Simpstatic int mptable_pass2(void); 323106362Simpstatic void default_mp_table(int type); 32452506Simpstatic void fix_mp_table(void); 32564850Simpstatic void setup_apic_irq_mapping(void); 32652506Simpstatic void init_locks(void); 32782378Sjonstatic int start_all_aps(u_int boot_addr); 328166453Simpstatic void install_ap_tramp(u_int boot_addr); 329119755Simpstatic int start_ap(int logicalCpu, u_int boot_addr); 33052506Simpvoid ap_init(void); 33152506Simpstatic int apic_int_is_bus_type(int intr, int bus_type); 33252506Simpstatic void release_aps(void *dummy); 33352506Simp 33452506Simp/* 33552506Simp * initialize all the SMP locks 336119755Simp */ 337119755Simp 338119755Simp/* lock region used by kernel profiling */ 33982378Sjonint mcount_lock; 34082378Sjon 34186907Simp#ifdef USE_COMLOCK 34286907Simp/* locks com (tty) data/hardware accesses: a FASTINTR() */ 34382378Sjonstruct mtx com_mtx; 344166453Simp#endif /* USE_COMLOCK */ 345106896Simp 346166453Simpstatic void 34752506Simpinit_locks(void) 34882378Sjon{ 34982378Sjon 35082378Sjon#ifdef USE_COMLOCK 35182378Sjon mtx_init(&com_mtx, "com", NULL, MTX_SPIN); 35282378Sjon#endif /* USE_COMLOCK */ 35382378Sjon#ifdef APIC_IO 35482378Sjon mtx_init(&smp_tlb_mtx, "tlb", NULL, MTX_SPIN); 35582378Sjon#endif 35682378Sjon} 35782378Sjon 35882378Sjon/* 359153773Simp * Calculate usable address in base memory for AP trampoline code. 36074632Simp */ 36152506Simpu_int 36252506Simpmp_bootaddress(u_int basemem) 36397613Stakawata{ 36497613Stakawata POSTCODE(MP_BOOTADDRESS_POST); 365112359Simp 366112359Simp base_memory = basemem * 1024; /* convert to bytes */ 36766200Simp 36866200Simp boot_address = base_memory & ~0xfff; /* round down to 4k boundary */ 36966200Simp if ((base_memory - boot_address) < bootMP_size) 370140692Simp boot_address -= 4096; /* not enough, lower by 4k */ 371140692Simp 372121521Simp return boot_address; 373121521Simp} 374140366Simp 375140366Simp 37666200Simp/* 37766200Simp * Look for an Intel MP spec table (ie, SMP capable hardware). 37866200Simp */ 379112359Simpvoid 380112359Simpi386_mp_probe(void) 38166200Simp{ 38266200Simp int x; 38366200Simp u_long segment; 38466200Simp u_int32_t target; 38566200Simp 38666200Simp POSTCODE(MP_PROBE_POST); 38766200Simp 38866200Simp /* see if EBDA exists */ 38966200Simp if ((segment = (u_long) * (u_short *) (KERNBASE + 0x40e)) != 0) { 390140366Simp /* search first 1K of EBDA */ 391140366Simp target = (u_int32_t) (segment << 4); 392140366Simp if ((x = search_for_sig(target, 1024 / 4)) >= 0) 393140366Simp goto found; 394113313Simp } else { 39582378Sjon /* last 1K of base memory, effective 'top of base' passed in */ 39666200Simp target = (u_int32_t) (base_memory - 0x400); 39786642Simp if ((x = search_for_sig(target, 1024 / 4)) >= 0) 398113078Ssanpei goto found; 39986642Simp } 40086642Simp 401113300Simp /* search the BIOS */ 402113300Simp target = (u_int32_t) BIOS_BASE; 403113300Simp if ((x = search_for_sig(target, BIOS_COUNT)) >= 0) 404113300Simp goto found; 40586642Simp 40686642Simp /* nothing found */ 40766200Simp mpfps = (mpfps_t)0; 40866200Simp mp_capable = 0; 40966200Simp return; 41066200Simp 41166200Simpfound: 41266200Simp /* calculate needed resources */ 41371322Simp mpfps = (mpfps_t)x; 414133865Simp mptable_pass1(); 415133865Simp 41666200Simp /* flag fact that we are running multiple processors */ 41771322Simp mp_capable = 1; 418133865Simp} 419133865Simp 42066200Simpint 421140366Simpcpu_mp_probe(void) 422140366Simp{ 423140366Simp /* 424140366Simp * Record BSP in CPU map 425140366Simp * This is done here so that MBUF init code works correctly. 426140366Simp */ 427140366Simp all_cpus = 1; 428140366Simp 42966200Simp return (mp_capable); 43066200Simp} 43166200Simp 43266200Simp/* 43366200Simp * Initialize the SMP hardware and the APIC and start up the AP's. 43466200Simp */ 43566200Simpvoid 43666200Simpcpu_mp_start(void) 437181342Simp{ 438181342Simp POSTCODE(MP_START_POST); 439181342Simp 440181342Simp /* look for MP capable motherboard */ 441181342Simp if (mp_capable) 442181342Simp mp_enable(boot_address); 443181342Simp else 444181342Simp panic("MP hardware not found!"); 445181342Simp 446181342Simp cpu_setregs(); 447181342Simp} 448181342Simp 449181342Simp 450181342Simp/* 451181342Simp * Print various information about the SMP system hardware and setup. 452181342Simp */ 453181342Simpvoid 454181342Simpcpu_mp_announce(void) 455181342Simp{ 45652506Simp int x; 45752506Simp 45852506Simp POSTCODE(MP_ANNOUNCE_POST); 45982382Simp 46082382Simp printf(" cpu0 (BSP): apic id: %2d", CPU_TO_ID(0)); 46182382Simp printf(", version: 0x%08x", cpu_apic_versions[0]); 46282382Simp printf(", at 0x%08x\n", cpu_apic_address); 46382382Simp for (x = 1; x <= mp_naps; ++x) { 46482382Simp printf(" cpu%d (AP): apic id: %2d", x, CPU_TO_ID(x)); 46552506Simp printf(", version: 0x%08x", cpu_apic_versions[x]); 46682378Sjon printf(", at 0x%08x\n", cpu_apic_address); 467181342Simp } 46852506Simp 46965917Simp#if defined(APIC_IO) 47067242Simp for (x = 0; x < mp_napics; ++x) { 47167242Simp printf(" io%d (APIC): apic id: %2d", x, IO_TO_ID(x)); 47270762Simp printf(", version: 0x%08x", io_apic_versions[x]); 47367242Simp printf(", at 0x%08x\n", io_apic_address[x]); 474237692Simp } 475237692Simp#else 47667242Simp printf(" Warning: APIC I/O disabled\n"); 477144955Simp#endif /* APIC_IO */ 478144955Simp} 47965917Simp 48070715Sjon/* 48170715Sjon * AP cpu's call this to sync up protected mode. 48270715Sjon */ 48370715Sjonvoid 484181342Simpinit_secondary(void) 485181342Simp{ 486181342Simp int gsel_tss; 487181342Simp int x, myid = bootAP; 488181342Simp u_int cr0; 489181342Simp 490181342Simp gdt_segs[GPRIV_SEL].ssd_base = (int) &SMP_prvspace[myid]; 491181342Simp gdt_segs[GPROC0_SEL].ssd_base = 492181342Simp (int) &SMP_prvspace[myid].pcpu.pc_common_tss; 493181342Simp SMP_prvspace[myid].pcpu.pc_prvspace = 494181342Simp &SMP_prvspace[myid].pcpu; 495181342Simp 49667242Simp for (x = 0; x < NGDT; x++) { 49772012Sphk ssdtosd(&gdt_segs[x], &gdt[myid * NGDT + x].sd); 498144930Simp } 499144930Simp 500181342Simp r_gdt.rd_limit = NGDT * sizeof(gdt[0]) - 1; 501181342Simp r_gdt.rd_base = (int) &gdt[myid * NGDT]; 50290897Simp lgdt(&r_gdt); /* does magic intra-segment return */ 50367187Simp 504237692Simp lidt(&r_idt); 505237692Simp 50667424Simp lldt(_default_ldt); 507237692Simp PCPU_SET(currentldt, _default_ldt); 50867424Simp 509144930Simp gsel_tss = GSEL(GPROC0_SEL, SEL_KPL); 510188212Swkoszek gdt[myid * NGDT + GPROC0_SEL].sd.sd_type = SDT_SYS386TSS; 51190897Simp PCPU_SET(common_tss.tss_esp0, 0); /* not used until after switch */ 512144930Simp PCPU_SET(common_tss.tss_ss0, GSEL(GDATA_SEL, SEL_KPL)); 513237692Simp PCPU_SET(common_tss.tss_ioopt, (sizeof (struct i386tss)) << 16); 514144955Simp PCPU_SET(tss_gdt, &gdt[myid * NGDT + GPROC0_SEL].sd); 515144955Simp PCPU_SET(common_tssd, *PCPU_GET(tss_gdt)); 516237692Simp ltr(gsel_tss); 517237692Simp 51867187Simp /* 519237692Simp * Set to a known state: 520144927Simp * Set by mpboot.s: CR0_PG, CR0_PE 521237692Simp * Set by cpu_setregs: CR0_NE, CR0_MP, CR0_TS, CR0_WP, CR0_AM 522144927Simp */ 523144930Simp cr0 = rcr0(); 52476424Simp cr0 &= ~(CR0_CD | CR0_NW | CR0_EM); 52590897Simp load_cr0(cr0); 52667187Simp 527144930Simp pmap_set_opt(); 528237692Simp} 529237692Simp 530144930Simp 531237692Simp#if defined(APIC_IO) 532144930Simp/* 533144930Simp * Final configuration of the BSP's local APIC: 534237692Simp * - disable 'pic mode'. 535237692Simp * - disable 'virtual wire mode'. 536237692Simp * - enable NMI. 537144930Simp */ 538237692Simpvoid 539144955Simpbsp_apic_configure(void) 540144955Simp{ 541237692Simp u_char byte; 542237692Simp u_int32_t temp; 543237692Simp 544237692Simp /* leave 'pic mode' if necessary */ 545237692Simp if (picmode) { 546144930Simp outb(0x22, 0x70); /* select IMCR */ 547237692Simp byte = inb(0x23); /* current contents */ 548144930Simp byte |= 0x01; /* mask external INTR */ 549144930Simp outb(0x23, byte); /* disconnect 8259s/NMI */ 550144930Simp } 551144930Simp 55267187Simp /* mask lint0 (the 8259 'virtual wire' connection) */ 55390897Simp temp = lapic.lvt_lint0; 554140488Simp temp |= APIC_LVT_M; /* set the mask */ 55590897Simp lapic.lvt_lint0 = temp; 55690897Simp 55767187Simp /* setup lint1 to handle NMI */ 558144930Simp temp = lapic.lvt_lint1; 559144955Simp temp &= ~APIC_LVT_M; /* clear the mask */ 560144955Simp lapic.lvt_lint1 = temp; 561237692Simp 562237692Simp if (bootverbose) 56367187Simp apic_dump("bsp_apic_configure()"); 564237692Simp} 565144930Simp#endif /* APIC_IO */ 56667242Simp 567144927Simp 568144930Simp/******************************************************************* 56976424Simp * local functions and data 57067187Simp */ 57167187Simp 57267167Simp/* 57367187Simp * start the SMP system 57467187Simp */ 57567424Simpstatic void 57667424Simpmp_enable(u_int boot_addr) 577144955Simp{ 57867167Simp int x; 57952506Simp#if defined(APIC_IO) 58052506Simp int apic; 58182378Sjon u_int ux; 58282378Sjon#endif /* APIC_IO */ 58382378Sjon 58482382Simp POSTCODE(MP_ENABLE_POST); 58582382Simp 58682382Simp /* turn on 4MB of V == P addressing so we can get to MP table */ 58782378Sjon *(int *)PTD = PG_V | PG_RW | ((uintptr_t)(void *)KPTphys & PG_FRAME); 58882378Sjon invltlb(); 58982378Sjon 59082378Sjon /* examine the MP table for needed info, uses physical addresses */ 59182378Sjon x = mptable_pass2(); 59282378Sjon 59382378Sjon *(int *)PTD = 0; 59482378Sjon invltlb(); 595181342Simp 59682378Sjon /* can't process default configs till the CPU APIC is pmapped */ 59782378Sjon if (x) 59882378Sjon default_mp_table(x); 599143785Simp 60082378Sjon /* post scan cleanup */ 601113242Simp fix_mp_table(); 60282378Sjon setup_apic_irq_mapping(); 60382378Sjon 60482378Sjon#if defined(APIC_IO) 605188212Swkoszek 60682378Sjon /* fill the LOGICAL io_apic_versions table */ 60782378Sjon for (apic = 0; apic < mp_napics; ++apic) { 60882378Sjon ux = io_apic_read(apic, IOAPIC_VER); 609113242Simp io_apic_versions[apic] = ux; 61082378Sjon io_apic_set_id(apic, IO_TO_ID(apic)); 61182378Sjon } 61282378Sjon 61382378Sjon /* program each IO APIC in the system */ 61482378Sjon for (apic = 0; apic < mp_napics; ++apic) 61582378Sjon if (io_apic_setup(apic) < 0) 616121905Simp panic("IO APIC setup failure"); 617121905Simp 618121905Simp /* install a 'Spurious INTerrupt' vector */ 619121905Simp setidt(XSPURIOUSINT_OFFSET, Xspuriousint, 620121958Simp SDT_SYS386IGT, SEL_KPL, GSEL(GCODE_SEL, SEL_KPL)); 621121905Simp 622121905Simp /* install an inter-CPU IPI for TLB invalidation */ 623121905Simp setidt(XINVLTLB_OFFSET, Xinvltlb, 624121905Simp SDT_SYS386IGT, SEL_KPL, GSEL(GCODE_SEL, SEL_KPL)); 625121905Simp setidt(XINVLPG_OFFSET, Xinvlpg, 626121905Simp SDT_SYS386IGT, SEL_KPL, GSEL(GCODE_SEL, SEL_KPL)); 627121905Simp setidt(XINVLRNG_OFFSET, Xinvlrng, 628121905Simp SDT_SYS386IGT, SEL_KPL, GSEL(GCODE_SEL, SEL_KPL)); 629121905Simp 630121905Simp /* install an inter-CPU IPI for forwarding hardclock() */ 631121905Simp setidt(XHARDCLOCK_OFFSET, Xhardclock, 632121905Simp SDT_SYS386IGT, SEL_KPL, GSEL(GCODE_SEL, SEL_KPL)); 633121905Simp 634121905Simp /* install an inter-CPU IPI for forwarding statclock() */ 635121905Simp setidt(XSTATCLOCK_OFFSET, Xstatclock, 636121905Simp SDT_SYS386IGT, SEL_KPL, GSEL(GCODE_SEL, SEL_KPL)); 637121905Simp 638121905Simp#ifdef LAZY_SWITCH 639121905Simp /* install an inter-CPU IPI for lazy pmap release */ 640121905Simp setidt(XLAZYPMAP_OFFSET, Xlazypmap, 641122032Simp SDT_SYS386IGT, SEL_KPL, GSEL(GCODE_SEL, SEL_KPL)); 642122032Simp#endif 643121905Simp 644121905Simp /* install an inter-CPU IPI for all-CPU rendezvous */ 645121905Simp setidt(XRENDEZVOUS_OFFSET, Xrendezvous, 646121905Simp SDT_SYS386IGT, SEL_KPL, GSEL(GCODE_SEL, SEL_KPL)); 647121905Simp 648121905Simp /* install an inter-CPU IPI for forcing an additional software trap */ 649121905Simp setidt(XCPUAST_OFFSET, Xcpuast, 650121905Simp SDT_SYS386IGT, SEL_KPL, GSEL(GCODE_SEL, SEL_KPL)); 651121905Simp 65252506Simp /* install an inter-CPU IPI for CPU stop/restart */ 65382378Sjon setidt(XCPUSTOP_OFFSET, Xcpustop, 65455720Simp SDT_SYS386IGT, SEL_KPL, GSEL(GCODE_SEL, SEL_KPL)); 65552506Simp 65652506Simp#if defined(TEST_TEST1) 65752506Simp /* install a "fake hardware INTerrupt" vector */ 65855720Simp setidt(XTEST1_OFFSET, Xtest1, 65970746Simp SDT_SYS386IGT, SEL_KPL, GSEL(GCODE_SEL, SEL_KPL)); 66067333Simp#endif /** TEST_TEST1 */ 66167333Simp 66274632Simp#endif /* APIC_IO */ 66367333Simp 66452506Simp /* initialize all SMP locks */ 665181392Simp init_locks(); 666181392Simp 66782378Sjon /* start each Application Processor */ 66852506Simp start_all_aps(boot_addr); 66952506Simp} 67052506Simp 67152506Simp 67252506Simp/* 67352506Simp * look for the MP spec signature 67452506Simp */ 67552506Simp 67652506Simp/* string defined by the Intel MP Spec as identifying the MP table */ 67782378Sjon#define MP_SIG 0x5f504d5f /* _MP_ */ 67882378Sjon#define NEXT(X) ((X) += 4) 67952506Simpstatic int 68052506Simpsearch_for_sig(u_int32_t target, int count) 68152506Simp{ 68252506Simp int x; 68352506Simp u_int32_t *addr = (u_int32_t *) (KERNBASE + target); 68452506Simp 68552506Simp for (x = 0; x < count; NEXT(x)) 68652506Simp if (addr[x] == MP_SIG) 68770715Sjon /* make array index a byte index */ 68852506Simp return (target + (x * sizeof(u_int32_t))); 68970715Sjon 69052506Simp return -1; 69152506Simp} 69252506Simp 69352506Simp 69452506Simpstatic basetable_entry basetable_entry_types[] = 69555720Simp{ 69655720Simp {0, 20, "Processor"}, 697150097Simp {1, 8, "Bus"}, 69870715Sjon {2, 8, "I/O APIC"}, 69952506Simp {3, 8, "I/O INT"}, 700188212Swkoszek {4, 8, "Local INT"} 70170746Simp}; 70270746Simp 70361788Simptypedef struct BUSDATA { 70461788Simp u_char bus_id; 70570715Sjon enum busTypes bus_type; 70670748Simp} bus_datum; 70755720Simp 70855720Simptypedef struct INTDATA { 70955720Simp u_char int_type; 71052506Simp u_short int_flags; 71152506Simp u_char src_bus_id; 71252506Simp u_char src_bus_irq; 71352506Simp u_char dst_apic_id; 71452506Simp u_char dst_apic_int; 71552506Simp u_char int_vector; 71652506Simp} io_int, local_int; 71782383Simp 71852506Simptypedef struct BUSTYPENAME { 71952506Simp u_char type; 72052506Simp char name[7]; 72152506Simp} bus_type_name; 72252506Simp 72352506Simpstatic bus_type_name bus_type_table[] = 72452506Simp{ 72552506Simp {CBUS, "CBUS"}, 72652506Simp {CBUSII, "CBUSII"}, 72752506Simp {EISA, "EISA"}, 72852506Simp {MCA, "MCA"}, 72952506Simp {UNKNOWN_BUSTYPE, "---"}, 730121905Simp {ISA, "ISA"}, 731121905Simp {MCA, "MCA"}, 73252506Simp {UNKNOWN_BUSTYPE, "---"}, 73352506Simp {UNKNOWN_BUSTYPE, "---"}, 73452506Simp {UNKNOWN_BUSTYPE, "---"}, 73552506Simp {UNKNOWN_BUSTYPE, "---"}, 73670715Sjon {UNKNOWN_BUSTYPE, "---"}, 737188212Swkoszek {PCI, "PCI"}, 738188212Swkoszek {UNKNOWN_BUSTYPE, "---"}, 73970715Sjon {UNKNOWN_BUSTYPE, "---"}, 74055500Simp {UNKNOWN_BUSTYPE, "---"}, 74155500Simp {UNKNOWN_BUSTYPE, "---"}, 74255500Simp {XPRESS, "XPRESS"}, 74355500Simp {UNKNOWN_BUSTYPE, "---"} 74455500Simp}; 74555500Simp/* from MP spec v1.4, table 5-1 */ 74670715Sjonstatic int default_data[7][5] = 74755500Simp{ 74855500Simp/* nbus, id0, type0, id1, type1 */ 74955500Simp {1, 0, ISA, 255, 255}, 75052506Simp {1, 0, EISA, 255, 255}, 75152506Simp {1, 0, EISA, 255, 255}, 75252506Simp {1, 0, MCA, 255, 255}, 75352506Simp {2, 0, ISA, 1, PCI}, 75452506Simp {2, 0, EISA, 1, PCI}, 75552506Simp {2, 0, MCA, 1, PCI} 75652506Simp}; 75752506Simp 75852506Simp 75952506Simp/* the bus data */ 76052506Simpstatic bus_datum *bus_data; 76182378Sjon 76265098Simp/* the IO INT data, one entry per possible APIC INTerrupt */ 76352506Simpstatic io_int *io_apic_ints; 76452506Simp 76552506Simpstatic int nintrs; 76652506Simp 76752506Simpstatic int processor_entry(proc_entry_ptr entry, int cpu); 76882378Sjonstatic int bus_entry(bus_entry_ptr entry, int bus); 76955720Simpstatic int io_apic_entry(io_apic_entry_ptr entry, int apic); 77052506Simpstatic int int_entry(int_entry_ptr entry, int intr); 77152506Simpstatic int lookup_bus_type(char *name); 77255720Simp 77352506Simp 77452506Simp/* 77561788Simp * 1st pass on motherboard's Intel MP specification table. 77652506Simp * 777181392Simp * initializes: 77852506Simp * mp_ncpus = 1 77970715Sjon * 78082378Sjon * determines: 78182378Sjon * cpu_apic_address (common to all CPUs) 78282378Sjon * io_apic_address[N] 783144927Simp * mp_naps 784144927Simp * mp_nbusses 78582378Sjon * mp_napics 78682378Sjon * nintrs 78770715Sjon */ 78870715Sjonstatic void 78952506Simpmptable_pass1(void) 79052506Simp{ 79152506Simp int x; 79252506Simp mpcth_t cth; 79352506Simp int totalSize; 79452506Simp void* position; 79552506Simp int count; 79652506Simp int type; 79752506Simp u_int id_mask; 79852506Simp 79952506Simp POSTCODE(MPTABLE_PASS1_POST); 80082378Sjon 80182378Sjon /* clear various tables */ 80252506Simp for (x = 0; x < NAPICID; ++x) { 80352506Simp io_apic_address[x] = ~0; /* IO APIC address table */ 80452506Simp } 80552506Simp 80652506Simp /* init everything to empty */ 80770715Sjon mp_naps = 0; 80855720Simp mp_nbusses = 0; 80955720Simp mp_napics = 0; 81052506Simp nintrs = 0; 81152506Simp id_mask = 0; 81252506Simp 81352506Simp /* check for use of 'default' configuration */ 81452506Simp if (MPFPS_MPFB1 != 0) { 81552506Simp /* use default addresses */ 81682378Sjon cpu_apic_address = DEFAULT_APIC_BASE; 81752506Simp io_apic_address[0] = DEFAULT_IO_APIC_BASE; 81852506Simp 81953873Simp /* fill in with defaults */ 82053873Simp mp_naps = 2; /* includes BSP */ 82153873Simp mp_maxid = 1; 82253873Simp mp_nbusses = default_data[MPFPS_MPFB1 - 1][0]; 82353873Simp#if defined(APIC_IO) 82452506Simp mp_napics = 1; 82552506Simp nintrs = 16; 82652506Simp#endif /* APIC_IO */ 82767333Simp } 828150391Simp else { 82952506Simp if ((cth = mpfps->pap) == 0) 83052506Simp panic("MP Configuration Table Header MISSING!"); 83159193Simp 83259193Simp cpu_apic_address = (vm_offset_t) cth->apic_address; 83359193Simp 83464850Simp /* walk the table, recording info of interest */ 835150362Simp totalSize = cth->base_table_length - sizeof(struct MPCTH); 83661788Simp position = (u_char *) cth + sizeof(struct MPCTH); 83759193Simp count = cth->entry_count; 83861788Simp 839150362Simp while (count--) { 840150362Simp switch (type = *(u_char *) position) { 841153773Simp case 0: /* processor_entry */ 84274632Simp if (((proc_entry_ptr)position)->cpu_flags 84359193Simp & PROCENTRY_FLAG_EN) { 84459193Simp ++mp_naps; 84582378Sjon mp_maxid++; 84682378Sjon id_mask |= 1 << 84782378Sjon ((proc_entry_ptr)position)->apic_id; 848106362Simp } 849150362Simp break; 850150362Simp case 1: /* bus_entry */ 85182378Sjon ++mp_nbusses; 85282378Sjon break; 85387975Simp case 2: /* io_apic_entry */ 85487975Simp if (((io_apic_entry_ptr)position)->apic_flags 85587975Simp & IOAPICENTRY_FLAG_EN) 856106362Simp io_apic_address[mp_napics++] = 85787975Simp (vm_offset_t)((io_apic_entry_ptr) 85887975Simp position)->apic_address; 85987975Simp break; 86087975Simp case 3: /* int_entry */ 86187975Simp ++nintrs; 86287975Simp break; 86387975Simp case 4: /* int_entry */ 86487975Simp break; 86587975Simp default: 86687975Simp panic("mpfps Base Table HOSED!"); 86753873Simp /* NOTREACHED */ 86853873Simp } 86953873Simp 87053873Simp totalSize -= basetable_entry_types[type].length; 87153873Simp (u_char*)position += basetable_entry_types[type].length; 87253873Simp } 87353873Simp } 87453873Simp 87553873Simp /* qualify the numbers */ 87653873Simp if (mp_naps > MAXCPU) { 87753873Simp printf("Warning: only using %d of %d available CPUs!\n", 87876424Simp MAXCPU, mp_naps); 87953873Simp mp_naps = MAXCPU; 88053873Simp } 88153873Simp 88253873Simp /* See if we need to fixup HT logical CPUs. */ 88353873Simp mptable_hyperthread_fixup(id_mask); 88453873Simp 88553873Simp /* 88653873Simp * Count the BSP. 88753873Simp * This is also used as a counter while starting the APs. 88853873Simp */ 88953873Simp mp_ncpus = 1; 89053873Simp 89153873Simp --mp_naps; /* subtract the BSP */ 89253873Simp} 89353873Simp 89453873Simp 89553873Simp/* 89653873Simp * 2nd pass on motherboard's Intel MP specification table. 89753873Simp * 89853873Simp * sets: 89966847Simp * boot_cpu_id 90053873Simp * ID_TO_IO(N), phy APIC ID to log CPU/IO table 90153873Simp * CPU_TO_ID(N), logical CPU to APIC ID table 90253873Simp * IO_TO_ID(N), logical IO to APIC ID table 90353873Simp * bus_data[N] 90453873Simp * io_apic_ints[N] 90553873Simp */ 90676424Simpstatic int 90753873Simpmptable_pass2(void) 90853873Simp{ 90953873Simp struct PROCENTRY proc; 91053873Simp int x; 91153873Simp mpcth_t cth; 91253873Simp int totalSize; 91370715Sjon void* position; 91453873Simp int count; 915147963Simp int type; 916147963Simp int apic, bus, cpu, intr; 91753873Simp int i, j; 91853873Simp int pgeflag; 91953873Simp 92053873Simp POSTCODE(MPTABLE_PASS2_POST); 92153873Simp 92253873Simp /* Initialize fake proc entry for use with HT fixup. */ 92353873Simp bzero(&proc, sizeof(proc)); 92453873Simp proc.type = 0; 92553873Simp proc.cpu_flags = PROCENTRY_FLAG_EN; 926147963Simp 92753873Simp pgeflag = 0; /* XXX - Not used under SMP yet. */ 92866847Simp 92953873Simp MALLOC(io_apic_versions, u_int32_t *, sizeof(u_int32_t) * mp_napics, 93053873Simp M_DEVBUF, M_WAITOK); 93153873Simp MALLOC(ioapic, volatile ioapic_t **, sizeof(ioapic_t *) * mp_napics, 93253873Simp M_DEVBUF, M_WAITOK); 93374632Simp MALLOC(io_apic_ints, io_int *, sizeof(io_int) * (nintrs + 1), 93453873Simp M_DEVBUF, M_WAITOK); 93574632Simp MALLOC(bus_data, bus_datum *, sizeof(bus_datum) * mp_nbusses, 93653873Simp M_DEVBUF, M_WAITOK); 93774632Simp 93853873Simp bzero(ioapic, sizeof(ioapic_t *) * mp_napics); 93974632Simp 94053873Simp for (i = 0; i < mp_napics; i++) { 94174632Simp for (j = 0; j < mp_napics; j++) { 94253873Simp /* same page frame as a previous IO apic? */ 94374632Simp if (((vm_offset_t)SMPpt[NPTEPG-2-j] & PG_FRAME) == 94453873Simp (io_apic_address[i] & PG_FRAME)) { 94553873Simp ioapic[i] = (ioapic_t *)((u_int)SMP_prvspace 94682378Sjon + (NPTEPG-2-j) * PAGE_SIZE 94782378Sjon + (io_apic_address[i] & PAGE_MASK)); 94882378Sjon break; 94982378Sjon } 95082378Sjon /* use this slot if available */ 95153873Simp if (((vm_offset_t)SMPpt[NPTEPG-2-j] & PG_FRAME) == 0) { 95253873Simp SMPpt[NPTEPG-2-j] = (pt_entry_t)(PG_V | PG_RW | 95353873Simp pgeflag | (io_apic_address[i] & PG_FRAME)); 95453873Simp ioapic[i] = (ioapic_t *)((u_int)SMP_prvspace 95553873Simp + (NPTEPG-2-j) * PAGE_SIZE 95653873Simp + (io_apic_address[i] & PAGE_MASK)); 95766847Simp break; 95853873Simp } 95953873Simp } 96053873Simp } 96153873Simp 96276424Simp /* clear various tables */ 96374632Simp for (x = 0; x < NAPICID; ++x) { 96470715Sjon ID_TO_IO(x) = -1; /* phy APIC ID to log CPU/IO table */ 96576424Simp CPU_TO_ID(x) = -1; /* logical CPU to APIC ID table */ 96653873Simp IO_TO_ID(x) = -1; /* logical IO to APIC ID table */ 96776424Simp } 96853873Simp 96953873Simp /* clear bus data table */ 97074632Simp for (x = 0; x < mp_nbusses; ++x) 97153873Simp bus_data[x].bus_id = 0xff; 97253873Simp 97353873Simp /* clear IO APIC INT table */ 97453873Simp for (x = 0; x < (nintrs + 1); ++x) { 97553873Simp io_apic_ints[x].int_type = 0xff; 97666847Simp io_apic_ints[x].int_vector = 0xff; 97753873Simp } 97853873Simp 97953873Simp /* setup the cpu/apic mapping arrays */ 98053873Simp boot_cpu_id = -1; 98159193Simp 98259193Simp /* record whether PIC or virtual-wire mode */ 983188179Simp picmode = (mpfps->mpfb2 & 0x80) ? 1 : 0; 98459193Simp 98574632Simp /* check for use of 'default' configuration */ 98674632Simp if (MPFPS_MPFB1 != 0) 98759193Simp return MPFPS_MPFB1; /* return default configuration type */ 98859193Simp 98959193Simp if ((cth = mpfps->pap) == 0) 99059193Simp panic("MP Configuration Table Header MISSING!"); 991140692Simp 99270715Sjon /* walk the table, recording info of interest */ 99359193Simp totalSize = cth->base_table_length - sizeof(struct MPCTH); 99474632Simp position = (u_char *) cth + sizeof(struct MPCTH); 99574632Simp count = cth->entry_count; 99659193Simp apic = bus = intr = 0; 99759193Simp cpu = 1; /* pre-count the BSP */ 998104641Simp 999104641Simp while (count--) { 1000104641Simp switch (type = *(u_char *) position) { 1001104641Simp case 0: 1002147963Simp if (processor_entry(position, cpu)) { 1003104641Simp if (logical_cpus != 0 && 1004148019Simp cpu % logical_cpus != 0) 1005104641Simp logical_cpus_mask |= (1 << cpu); 1006104641Simp ++cpu; 1007148012Simp } 1008148012Simp if (need_hyperthreading_fixup) { 1009148012Simp /* 1010148012Simp * Create fake mptable processor entries 1011148012Simp * and feed them to processor_entry() to 1012148012Simp * enumerate the logical CPUs. 1013148012Simp */ 1014104641Simp proc.apic_id = ((proc_entry_ptr)position)->apic_id; 1015104641Simp for (i = 1; i < logical_cpus; i++) { 1016104641Simp proc.apic_id++; 101766058Simp (void)processor_entry(&proc, cpu); 1018104641Simp logical_cpus_mask |= (1 << cpu); 1019104641Simp cpu++; 1020104641Simp } 1021104641Simp } 1022147963Simp break; 1023104641Simp case 1: 1024147963Simp if (bus_entry(position, bus)) 1025104641Simp ++bus; 1026104641Simp break; 1027104641Simp case 2: 1028147963Simp if (io_apic_entry(position, apic)) 1029147963Simp ++apic; 1030147963Simp break; 1031147963Simp case 3: 1032147963Simp if (int_entry(position, intr)) 1033147963Simp ++intr; 1034147963Simp break; 1035147963Simp case 4: 1036158086Simp /* int_entry(position); */ 1037147963Simp break; 1038147963Simp default: 1039147963Simp panic("mpfps Base Table HOSED!"); 1040147963Simp /* NOTREACHED */ 1041147963Simp } 1042147963Simp 1043147963Simp totalSize -= basetable_entry_types[type].length; 1044147963Simp (u_char *) position += basetable_entry_types[type].length; 1045147963Simp } 1046147963Simp 1047147963Simp if (boot_cpu_id == -1) 1048104641Simp panic("NO BSP found!"); 1049104641Simp 1050104641Simp /* report fact that its NOT a default configuration */ 1051104641Simp return 0; 1052104641Simp} 1053147963Simp 1054104641Simp/* 1055147963Simp * Check if we should perform a hyperthreading "fix-up" to 1056104641Simp * enumerate any logical CPU's that aren't already listed 1057147963Simp * in the table. 1058147963Simp * 1059104641Simp * XXX: We assume that all of the physical CPUs in the 1060104641Simp * system have the same number of logical CPUs. 1061147963Simp * 1062104641Simp * XXX: We assume that APIC ID's are allocated such that 1063104641Simp * the APIC ID's for a physical processor are aligned 1064104641Simp * with the number of logical CPU's in the processor. 1065104641Simp */ 1066188179Simpstatic void 106766058Simpmptable_hyperthread_fixup(u_int id_mask) 106866847Simp{ 1069147963Simp u_int i, id; 107066779Simp 107166779Simp /* Nothing to do if there is no HTT support. */ 1072147963Simp if ((cpu_feature & CPUID_HTT) == 0) 1073147963Simp return; 107466779Simp logical_cpus = (cpu_procinfo & CPUID_HTT_CORES) >> 16; 107566779Simp if (logical_cpus <= 1) 1076147963Simp return; 1077189318Simp 1078189318Simp /* 1079189318Simp * For each APIC ID of a CPU that is set in the mask, 1080189318Simp * scan the other candidate APIC ID's for this 108166779Simp * physical processor. If any of those ID's are 1082147963Simp * already in the table, then kill the fixup. 108366779Simp */ 108466779Simp for (id = 0; id <= MAXCPU; id++) { 1085147963Simp if ((id_mask & 1 << id) == 0) 108666779Simp continue; 108766779Simp /* First, make sure we are on a logical_cpus boundary. */ 1088147963Simp if (id % logical_cpus != 0) 108966779Simp return; 109090964Sshiba for (i = id + 1; i < id + logical_cpus; i++) 1091147963Simp if ((id_mask & 1 << i) != 0) 109290964Sshiba return; 109375761Simp } 1094147963Simp 109575761Simp /* 109666779Simp * Ok, the ID's checked out, so enable the fixup. We have to fixup 1097147963Simp * mp_naps and mp_maxid right now. 109866779Simp */ 109966779Simp need_hyperthreading_fixup = 1; 1100147963Simp mp_maxid *= logical_cpus; 110166779Simp mp_naps *= logical_cpus; 110266779Simp} 1103147963Simp 110466779Simpvoid 110566779Simpassign_apic_irq(int apic, int intpin, int irq) 1106147963Simp{ 110766779Simp int x; 110867167Simp 1109147963Simp if (int_to_apicintpin[irq].ioapic != -1) 111067167Simp panic("assign_apic_irq: inconsistent table"); 111166779Simp 111266779Simp int_to_apicintpin[irq].ioapic = apic; 111366058Simp int_to_apicintpin[irq].int_pin = intpin; 111466058Simp int_to_apicintpin[irq].apic_address = ioapic[apic]; 111566779Simp int_to_apicintpin[irq].redirindex = IOAPIC_REDTBL + 2 * intpin; 111666779Simp 111766779Simp for (x = 0; x < nintrs; x++) { 111882378Sjon if ((io_apic_ints[x].int_type == 0 || 111982378Sjon io_apic_ints[x].int_type == 3) && 112082378Sjon io_apic_ints[x].int_vector == 0xff && 112182378Sjon io_apic_ints[x].dst_apic_id == IO_TO_ID(apic) && 112282378Sjon io_apic_ints[x].dst_apic_int == intpin) 112382378Sjon io_apic_ints[x].int_vector = irq; 112482378Sjon } 112582378Sjon} 112682378Sjon 112782378Sjonvoid 1128181342Simprevoke_apic_irq(int irq) 112982378Sjon{ 113082378Sjon int x; 113166779Simp int oldapic; 113266058Simp int oldintpin; 113367242Simp 113467242Simp if (int_to_apicintpin[irq].ioapic == -1) 113567242Simp panic("revoke_apic_irq: inconsistent table"); 113667242Simp 113782378Sjon oldapic = int_to_apicintpin[irq].ioapic; 113882378Sjon oldintpin = int_to_apicintpin[irq].int_pin; 113982378Sjon 1140121905Simp int_to_apicintpin[irq].ioapic = -1; 1141104641Simp int_to_apicintpin[irq].int_pin = 0; 114267242Simp int_to_apicintpin[irq].apic_address = NULL; 1143121905Simp int_to_apicintpin[irq].redirindex = 0; 114482378Sjon 114582378Sjon for (x = 0; x < nintrs; x++) { 114682378Sjon if ((io_apic_ints[x].int_type == 0 || 114782378Sjon io_apic_ints[x].int_type == 3) && 114870715Sjon io_apic_ints[x].int_vector != 0xff && 114982378Sjon io_apic_ints[x].dst_apic_id == IO_TO_ID(oldapic) && 115082378Sjon io_apic_ints[x].dst_apic_int == oldintpin) 115170715Sjon io_apic_ints[x].int_vector = 0xff; 1152121905Simp } 1153121905Simp} 1154121905Simp 1155144955Simp 1156121905Simpstatic void 1157144955Simpallocate_apic_irq(int intr) 1158121905Simp{ 1159121905Simp int apic; 1160121905Simp int intpin; 1161121905Simp int irq; 1162121905Simp 1163121905Simp if (io_apic_ints[intr].int_vector != 0xff) 1164121905Simp return; /* Interrupt handler already assigned */ 1165121905Simp 116667269Simp if (io_apic_ints[intr].int_type != 0 && 1167121905Simp (io_apic_ints[intr].int_type != 3 || 1168144955Simp (io_apic_ints[intr].dst_apic_id == IO_TO_ID(0) && 1169144955Simp io_apic_ints[intr].dst_apic_int == 0))) 1170121905Simp return; /* Not INT or ExtInt on != (0, 0) */ 1171113242Simp 1172104641Simp irq = 0; 1173144955Simp while (irq < APIC_INTMAPSIZE && 1174144955Simp int_to_apicintpin[irq].ioapic != -1) 1175144955Simp irq++; 1176104641Simp 1177104641Simp if (irq >= APIC_INTMAPSIZE) 1178104641Simp return; /* No free interrupt handlers */ 1179104641Simp 118067242Simp apic = ID_TO_IO(io_apic_ints[intr].dst_apic_id); 118167242Simp intpin = io_apic_ints[intr].dst_apic_int; 118267242Simp 118367242Simp assign_apic_irq(apic, intpin, irq); 118467242Simp io_apic_setup_intpin(apic, intpin); 118567242Simp} 118682378Sjon 118782378Sjon 118882378Sjonstatic void 118970715Sjonswap_apic_id(int apic, int oldid, int newid) 119082378Sjon{ 119182378Sjon int x; 119282378Sjon int oapic; 119370715Sjon 119482378Sjon 119570715Sjon if (oldid == newid) 119682378Sjon return; /* Nothing to do */ 119770715Sjon 119882378Sjon printf("Changing APIC ID for IO APIC #%d from %d to %d in MP table\n", 119982378Sjon apic, oldid, newid); 1200188212Swkoszek 120182378Sjon /* Swap physical APIC IDs in interrupt entries */ 120282378Sjon for (x = 0; x < nintrs; x++) { 120370715Sjon if (io_apic_ints[x].dst_apic_id == oldid) 120482378Sjon io_apic_ints[x].dst_apic_id = newid; 120582378Sjon else if (io_apic_ints[x].dst_apic_id == newid) 120682378Sjon io_apic_ints[x].dst_apic_id = oldid; 120770715Sjon } 1208144955Simp 1209144955Simp /* Swap physical APIC IDs in IO_TO_ID mappings */ 1210144955Simp for (oapic = 0; oapic < mp_napics; oapic++) 1211144955Simp if (IO_TO_ID(oapic) == newid) 1212144955Simp break; 1213144955Simp 1214144955Simp if (oapic < mp_napics) { 121567242Simp printf("Changing APIC ID for IO APIC #%d from " 121667242Simp "%d to %d in MP table\n", 121767333Simp oapic, newid, oldid); 121867333Simp IO_TO_ID(oapic) = oldid; 121967333Simp } 122067333Simp IO_TO_ID(apic) = newid; 1221147963Simp} 122267333Simp 122382378Sjon 122467333Simpstatic void 122567333Simpfix_id_to_io_mapping(void) 1226166901Spiso{ 1227170163Spiso int x; 122870762Simp 122982378Sjon for (x = 0; x < NAPICID; x++) 123082378Sjon ID_TO_IO(x) = -1; 1231102923Simp 123282378Sjon for (x = 0; x <= mp_naps; x++) 123382383Simp if (CPU_TO_ID(x) < NAPICID) 1234102923Simp ID_TO_IO(CPU_TO_ID(x)) = x; 1235102923Simp 1236102923Simp for (x = 0; x < mp_napics; x++) 1237102923Simp if (IO_TO_ID(x) < NAPICID) 1238102923Simp ID_TO_IO(IO_TO_ID(x)) = x; 1239102923Simp} 1240102923Simp 1241102923Simp 1242102923Simpstatic int 1243102923Simpfirst_free_apic_id(void) 1244116311Simp{ 1245116311Simp int freeid, x; 1246116311Simp 1247116311Simp for (freeid = 0; freeid < NAPICID; freeid++) { 124882383Simp for (x = 0; x <= mp_naps; x++) 1249102923Simp if (CPU_TO_ID(x) == freeid) 1250102923Simp break; 1251102923Simp if (x <= mp_naps) 1252102923Simp continue; 1253102923Simp for (x = 0; x < mp_napics; x++) 1254102923Simp if (IO_TO_ID(x) == freeid) 1255102923Simp break; 1256102923Simp if (x < mp_napics) 1257166901Spiso continue; 1258170163Spiso return freeid; 1259170163Spiso } 1260170849Simp return freeid; 1261166901Spiso} 1262170163Spiso 126370715Sjon 126470715Sjonstatic int 1265170163Spisoio_apic_id_acceptable(int apic, int id) 1266170163Spiso{ 1267170163Spiso int cpu; /* Logical CPU number */ 1268170163Spiso int oapic; /* Logical IO APIC number for other IO APIC */ 1269170163Spiso 1270170163Spiso if (id >= NAPICID) 1271170163Spiso return 0; /* Out of range */ 1272170163Spiso 127370715Sjon for (cpu = 0; cpu <= mp_naps; cpu++) 127470762Simp if (CPU_TO_ID(cpu) == id) 1275166901Spiso return 0; /* Conflict with CPU */ 1276166901Spiso 127770715Sjon for (oapic = 0; oapic < mp_napics && oapic < apic; oapic++) 1278102713Simp if (IO_TO_ID(oapic) == id) 127970715Sjon return 0; /* Conflict with other APIC */ 1280147963Simp 128190445Simp return 1; /* ID is acceptable for IO APIC */ 128270715Sjon} 1283170163Spiso 1284101762Simp 1285170163Spiso/* 1286170849Simp * parse an Intel MP specification table 128790445Simp */ 128890445Simpstatic void 1289170163Spisofix_mp_table(void) 1290147963Simp{ 1291147963Simp int x; 1292147963Simp int id; 1293102713Simp int bus_0 = 0; /* Stop GCC warning */ 1294147963Simp int bus_pci = 0; /* Stop GCC warning */ 1295147963Simp int num_pci_bus; 1296102713Simp int apic; /* IO APIC unit number */ 1297102713Simp int freeid; /* Free physical APIC ID */ 129874632Simp int physid; /* Current physical IO APIC ID */ 129970715Sjon 130070715Sjon /* 130170715Sjon * Fix mis-numbering of the PCI bus and its INT entries if the BIOS 130270762Simp * did it wrong. The MP spec says that when more than 1 PCI bus 130370762Simp * exists the BIOS must begin with bus entries for the PCI bus and use 130470715Sjon * actual PCI bus numbering. This implies that when only 1 PCI bus 1305102713Simp * exists the BIOS can choose to ignore this ordering, and indeed many 130670715Sjon * MP motherboards do ignore it. This causes a problem when the PCI 1307147963Simp * sub-system makes requests of the MP sub-system based on PCI bus 130882378Sjon * numbers. So here we look for the situation and renumber the 130970715Sjon * busses and associated INTs in an effort to "make it right". 1310102713Simp */ 1311147963Simp 1312147963Simp /* find bus 0, PCI bus, count the number of PCI busses */ 1313102713Simp for (num_pci_bus = 0, x = 0; x < mp_nbusses; ++x) { 1314102713Simp if (bus_data[x].bus_id == 0) { 131590445Simp bus_0 = x; 131682378Sjon } 1317147963Simp if (bus_data[x].bus_type == PCI) { 1318147963Simp ++num_pci_bus; 1319147963Simp bus_pci = x; 132082378Sjon } 132170715Sjon } 132282378Sjon /* 132370715Sjon * bus_0 == slot of bus with ID of 0 132470715Sjon * bus_pci == slot of last PCI bus encountered 1325121905Simp */ 1326121905Simp 1327121905Simp /* check the 1 PCI bus case for sanity */ 1328121905Simp /* if it is number 0 all is well */ 1329121905Simp if (num_pci_bus == 1 && 1330147963Simp bus_data[bus_pci].bus_id != 0) { 1331121905Simp 1332121905Simp /* mis-numbered, swap with whichever bus uses slot 0 */ 1333121905Simp 1334121905Simp /* swap the bus entry types */ 1335121905Simp bus_data[bus_pci].bus_type = bus_data[bus_0].bus_type; 1336121905Simp bus_data[bus_0].bus_type = PCI; 1337121905Simp 1338121905Simp /* swap each relavant INTerrupt entry */ 1339121905Simp id = bus_data[bus_pci].bus_id; 1340121905Simp for (x = 0; x < nintrs; ++x) { 1341121905Simp if (io_apic_ints[x].src_bus_id == id) { 1342121905Simp io_apic_ints[x].src_bus_id = 0; 1343121905Simp } 1344121905Simp else if (io_apic_ints[x].src_bus_id == 0) { 1345121905Simp io_apic_ints[x].src_bus_id = id; 1346121905Simp } 1347121905Simp } 1348121905Simp } 1349121905Simp 1350121905Simp /* Assign IO APIC IDs. 1351121905Simp * 1352121905Simp * First try the existing ID. If a conflict is detected, try 1353121905Simp * the ID in the MP table. If a conflict is still detected, find 1354121905Simp * a free id. 1355121905Simp * 1356150098Simp * We cannot use the ID_TO_IO table before all conflicts has been 1357150098Simp * resolved and the table has been corrected. 1358150098Simp */ 1359150098Simp for (apic = 0; apic < mp_napics; ++apic) { /* For all IO APICs */ 1360150098Simp 1361150098Simp /* First try to use the value set by the BIOS */ 1362150098Simp physid = io_apic_get_id(apic); 1363150098Simp if (io_apic_id_acceptable(apic, physid)) { 1364150098Simp if (IO_TO_ID(apic) != physid) 1365150098Simp swap_apic_id(apic, IO_TO_ID(apic), physid); 1366150098Simp continue; 1367150098Simp } 1368150098Simp 1369150098Simp /* Then check if the value in the MP table is acceptable */ 1370150098Simp if (io_apic_id_acceptable(apic, IO_TO_ID(apic))) 1371150098Simp continue; 1372150098Simp 1373150098Simp /* Last resort, find a free APIC ID and use it */ 1374150098Simp freeid = first_free_apic_id(); 1375150098Simp if (freeid >= NAPICID) 1376150098Simp panic("No free physical APIC IDs found"); 1377150098Simp 1378150098Simp if (io_apic_id_acceptable(apic, freeid)) { 1379150098Simp swap_apic_id(apic, IO_TO_ID(apic), freeid); 1380150098Simp continue; 1381150098Simp } 1382150098Simp panic("Free physical APIC ID not usable"); 1383150098Simp } 1384150098Simp fix_id_to_io_mapping(); 1385150098Simp 1386150098Simp /* detect and fix broken Compaq MP table */ 1387150098Simp if (apic_int_type(0, 0) == -1) { 1388150098Simp printf("APIC_IO: MP table broken: 8259->APIC entry missing!\n"); 1389150098Simp io_apic_ints[nintrs].int_type = 3; /* ExtInt */ 1390150098Simp io_apic_ints[nintrs].int_vector = 0xff; /* Unassigned */ 1391150098Simp /* XXX fixme, set src bus id etc, but it doesn't seem to hurt */ 1392150098Simp io_apic_ints[nintrs].dst_apic_id = IO_TO_ID(0); 1393150098Simp io_apic_ints[nintrs].dst_apic_int = 0; /* Pin 0 */ 1394150098Simp nintrs++; 1395150098Simp } 1396150098Simp} 1397150098Simp 1398150098Simp 1399150098Simp/* Assign low level interrupt handlers */ 1400150098Simpstatic void 1401150098Simpsetup_apic_irq_mapping(void) 1402150098Simp{ 1403150098Simp int x; 1404150098Simp int int_vector; 1405150098Simp 1406150098Simp /* Clear array */ 1407150098Simp for (x = 0; x < APIC_INTMAPSIZE; x++) { 1408150098Simp int_to_apicintpin[x].ioapic = -1; 1409150098Simp int_to_apicintpin[x].int_pin = 0; 1410150098Simp int_to_apicintpin[x].apic_address = NULL; 1411150098Simp int_to_apicintpin[x].redirindex = 0; 1412150098Simp } 1413150098Simp 1414150098Simp /* First assign ISA/EISA interrupts */ 1415150098Simp for (x = 0; x < nintrs; x++) { 1416222764Simp int_vector = io_apic_ints[x].src_bus_irq; 1417222764Simp if (int_vector < APIC_INTMAPSIZE && 1418150098Simp io_apic_ints[x].int_vector == 0xff && 1419150098Simp int_to_apicintpin[int_vector].ioapic == -1 && 1420150098Simp (apic_int_is_bus_type(x, ISA) || 1421150098Simp apic_int_is_bus_type(x, EISA)) && 1422150098Simp io_apic_ints[x].int_type == 0) { 1423150098Simp assign_apic_irq(ID_TO_IO(io_apic_ints[x].dst_apic_id), 1424150098Simp io_apic_ints[x].dst_apic_int, 1425150098Simp int_vector); 1426150098Simp } 1427150098Simp } 1428150098Simp 1429150098Simp /* Assign ExtInt entry if no ISA/EISA interrupt 0 entry */ 1430150098Simp for (x = 0; x < nintrs; x++) { 1431150098Simp if (io_apic_ints[x].dst_apic_int == 0 && 1432222764Simp io_apic_ints[x].dst_apic_id == IO_TO_ID(0) && 1433222764Simp io_apic_ints[x].int_vector == 0xff && 1434150098Simp int_to_apicintpin[0].ioapic == -1 && 1435150098Simp io_apic_ints[x].int_type == 3) { 1436150098Simp assign_apic_irq(0, 0, 0); 1437150098Simp break; 1438150098Simp } 1439150098Simp } 144052506Simp /* PCI interrupt assignment is deferred */ 144152506Simp} 144252506Simp 144359193Simp 144482378Sjonstatic int 144552506Simpprocessor_entry(proc_entry_ptr entry, int cpu) 144687975Simp{ 144787975Simp /* check for usability */ 144852506Simp if (!(entry->cpu_flags & PROCENTRY_FLAG_EN)) 144952506Simp return 0; 145052506Simp 145166779Simp if(entry->apic_id >= NAPICID) 145267333Simp panic("CPU APIC ID out of range (0..%d)", NAPICID - 1); 145367242Simp /* check for BSP flag */ 145467242Simp if (entry->cpu_flags & PROCENTRY_FLAG_BP) { 1455121905Simp boot_cpu_id = entry->apic_id; 1456121905Simp CPU_TO_ID(0) = entry->apic_id; 145770715Sjon ID_TO_CPU(entry->apic_id) = 0; 145870715Sjon return 0; /* its already been counted */ 145952506Simp } 146052506Simp 146152506Simp /* add another AP to list, if less than max number of CPUs */ 1462104641Simp else if (cpu < MAXCPU) { 146366058Simp CPU_TO_ID(cpu) = entry->apic_id; 1464104641Simp ID_TO_CPU(entry->apic_id) = cpu; 1465104641Simp return 1; 146652506Simp } 146759193Simp 146859193Simp return 0; 146959193Simp} 147059193Simp 147159193Simp 1472150098Simpstatic int 1473147711Simpbus_entry(bus_entry_ptr entry, int bus) 1474150098Simp{ 1475150098Simp int x; 1476150098Simp char c, name[8]; 1477150098Simp 147859193Simp /* encode the name into an index */ 147952506Simp for (x = 0; x < 6; ++x) { 148052506Simp if ((c = entry->bus_type[x]) == ' ') 148152506Simp break; 148252506Simp name[x] = c; 148352506Simp } 148452506Simp name[x] = '\0'; 148564850Simp 148652506Simp if ((x = lookup_bus_type(name)) == UNKNOWN_BUSTYPE) 148752506Simp panic("unknown bus type: '%s'", name); 148852506Simp 148952506Simp bus_data[bus].bus_id = entry->bus_id; 1490101905Simp bus_data[bus].bus_type = x; 149153873Simp 1492101905Simp return 1; 149364927Simp} 1494 1495 1496static int 1497io_apic_entry(io_apic_entry_ptr entry, int apic) 1498{ 1499 if (!(entry->apic_flags & IOAPICENTRY_FLAG_EN)) 1500 return 0; 1501 1502 IO_TO_ID(apic) = entry->apic_id; 1503 if (entry->apic_id < NAPICID) 1504 ID_TO_IO(entry->apic_id) = apic; 1505 1506 return 1; 1507} 1508 1509 1510static int 1511lookup_bus_type(char *name) 1512{ 1513 int x; 1514 1515 for (x = 0; x < MAX_BUSTYPE; ++x) 1516 if (strcmp(bus_type_table[x].name, name) == 0) 1517 return bus_type_table[x].type; 1518 1519 return UNKNOWN_BUSTYPE; 1520} 1521 1522 1523static int 1524int_entry(int_entry_ptr entry, int intr) 1525{ 1526 int apic; 1527 1528 io_apic_ints[intr].int_type = entry->int_type; 1529 io_apic_ints[intr].int_flags = entry->int_flags; 1530 io_apic_ints[intr].src_bus_id = entry->src_bus_id; 1531 io_apic_ints[intr].src_bus_irq = entry->src_bus_irq; 1532 if (entry->dst_apic_id == 255) { 1533 /* This signal goes to all IO APICS. Select an IO APIC 1534 with sufficient number of interrupt pins */ 1535 for (apic = 0; apic < mp_napics; apic++) 1536 if (((io_apic_read(apic, IOAPIC_VER) & 1537 IOART_VER_MAXREDIR) >> MAXREDIRSHIFT) >= 1538 entry->dst_apic_int) 1539 break; 1540 if (apic < mp_napics) 1541 io_apic_ints[intr].dst_apic_id = IO_TO_ID(apic); 1542 else 1543 io_apic_ints[intr].dst_apic_id = entry->dst_apic_id; 1544 } else 1545 io_apic_ints[intr].dst_apic_id = entry->dst_apic_id; 1546 io_apic_ints[intr].dst_apic_int = entry->dst_apic_int; 1547 1548 return 1; 1549} 1550 1551 1552static int 1553apic_int_is_bus_type(int intr, int bus_type) 1554{ 1555 int bus; 1556 1557 for (bus = 0; bus < mp_nbusses; ++bus) 1558 if ((bus_data[bus].bus_id == io_apic_ints[intr].src_bus_id) 1559 && ((int) bus_data[bus].bus_type == bus_type)) 1560 return 1; 1561 1562 return 0; 1563} 1564 1565 1566/* 1567 * Given a traditional ISA INT mask, return an APIC mask. 1568 */ 1569u_int 1570isa_apic_mask(u_int isa_mask) 1571{ 1572 int isa_irq; 1573 int apic_pin; 1574 1575#if defined(SKIP_IRQ15_REDIRECT) 1576 if (isa_mask == (1 << 15)) { 1577 printf("skipping ISA IRQ15 redirect\n"); 1578 return isa_mask; 1579 } 1580#endif /* SKIP_IRQ15_REDIRECT */ 1581 1582 isa_irq = ffs(isa_mask); /* find its bit position */ 1583 if (isa_irq == 0) /* doesn't exist */ 1584 return 0; 1585 --isa_irq; /* make it zero based */ 1586 1587 apic_pin = isa_apic_irq(isa_irq); /* look for APIC connection */ 1588 if (apic_pin == -1) 1589 return 0; 1590 1591 return (1 << apic_pin); /* convert pin# to a mask */ 1592} 1593 1594 1595/* 1596 * Determine which APIC pin an ISA/EISA INT is attached to. 1597 */ 1598#define INTTYPE(I) (io_apic_ints[(I)].int_type) 1599#define INTPIN(I) (io_apic_ints[(I)].dst_apic_int) 1600#define INTIRQ(I) (io_apic_ints[(I)].int_vector) 1601#define INTAPIC(I) (ID_TO_IO(io_apic_ints[(I)].dst_apic_id)) 1602 1603#define SRCBUSIRQ(I) (io_apic_ints[(I)].src_bus_irq) 1604int 1605isa_apic_irq(int isa_irq) 1606{ 1607 int intr; 1608 1609 for (intr = 0; intr < nintrs; ++intr) { /* check each record */ 1610 if (INTTYPE(intr) == 0) { /* standard INT */ 1611 if (SRCBUSIRQ(intr) == isa_irq) { 1612 if (apic_int_is_bus_type(intr, ISA) || 1613 apic_int_is_bus_type(intr, EISA)) { 1614 if (INTIRQ(intr) == 0xff) 1615 return -1; /* unassigned */ 1616 return INTIRQ(intr); /* found */ 1617 } 1618 } 1619 } 1620 } 1621 return -1; /* NOT found */ 1622} 1623 1624 1625/* 1626 * Determine which APIC pin a PCI INT is attached to. 1627 */ 1628#define SRCBUSID(I) (io_apic_ints[(I)].src_bus_id) 1629#define SRCBUSDEVICE(I) ((io_apic_ints[(I)].src_bus_irq >> 2) & 0x1f) 1630#define SRCBUSLINE(I) (io_apic_ints[(I)].src_bus_irq & 0x03) 1631int 1632pci_apic_irq(int pciBus, int pciDevice, int pciInt) 1633{ 1634 int intr; 1635 1636 --pciInt; /* zero based */ 1637 1638 for (intr = 0; intr < nintrs; ++intr) /* check each record */ 1639 if ((INTTYPE(intr) == 0) /* standard INT */ 1640 && (SRCBUSID(intr) == pciBus) 1641 && (SRCBUSDEVICE(intr) == pciDevice) 1642 && (SRCBUSLINE(intr) == pciInt)) /* a candidate IRQ */ 1643 if (apic_int_is_bus_type(intr, PCI)) { 1644 if (INTIRQ(intr) == 0xff) 1645 allocate_apic_irq(intr); 1646 if (INTIRQ(intr) == 0xff) 1647 return -1; /* unassigned */ 1648 return INTIRQ(intr); /* exact match */ 1649 } 1650 1651 return -1; /* NOT found */ 1652} 1653 1654int 1655next_apic_irq(int irq) 1656{ 1657 int intr, ointr; 1658 int bus, bustype; 1659 1660 bus = 0; 1661 bustype = 0; 1662 for (intr = 0; intr < nintrs; intr++) { 1663 if (INTIRQ(intr) != irq || INTTYPE(intr) != 0) 1664 continue; 1665 bus = SRCBUSID(intr); 1666 bustype = apic_bus_type(bus); 1667 if (bustype != ISA && 1668 bustype != EISA && 1669 bustype != PCI) 1670 continue; 1671 break; 1672 } 1673 if (intr >= nintrs) { 1674 return -1; 1675 } 1676 for (ointr = intr + 1; ointr < nintrs; ointr++) { 1677 if (INTTYPE(ointr) != 0) 1678 continue; 1679 if (bus != SRCBUSID(ointr)) 1680 continue; 1681 if (bustype == PCI) { 1682 if (SRCBUSDEVICE(intr) != SRCBUSDEVICE(ointr)) 1683 continue; 1684 if (SRCBUSLINE(intr) != SRCBUSLINE(ointr)) 1685 continue; 1686 } 1687 if (bustype == ISA || bustype == EISA) { 1688 if (SRCBUSIRQ(intr) != SRCBUSIRQ(ointr)) 1689 continue; 1690 } 1691 if (INTPIN(intr) == INTPIN(ointr)) 1692 continue; 1693 break; 1694 } 1695 if (ointr >= nintrs) { 1696 return -1; 1697 } 1698 return INTIRQ(ointr); 1699} 1700#undef SRCBUSLINE 1701#undef SRCBUSDEVICE 1702#undef SRCBUSID 1703#undef SRCBUSIRQ 1704 1705#undef INTPIN 1706#undef INTIRQ 1707#undef INTAPIC 1708#undef INTTYPE 1709 1710 1711/* 1712 * Reprogram the MB chipset to NOT redirect an ISA INTerrupt. 1713 * 1714 * XXX FIXME: 1715 * Exactly what this means is unclear at this point. It is a solution 1716 * for motherboards that redirect the MBIRQ0 pin. Generically a motherboard 1717 * could route any of the ISA INTs to upper (>15) IRQ values. But most would 1718 * NOT be redirected via MBIRQ0, thus "undirect()ing" them would NOT be an 1719 * option. 1720 */ 1721int 1722undirect_isa_irq(int rirq) 1723{ 1724#if defined(READY) 1725 if (bootverbose) 1726 printf("Freeing redirected ISA irq %d.\n", rirq); 1727 /** FIXME: tickle the MB redirector chip */ 1728 return -1; 1729#else 1730 if (bootverbose) 1731 printf("Freeing (NOT implemented) redirected ISA irq %d.\n", rirq); 1732 return 0; 1733#endif /* READY */ 1734} 1735 1736 1737/* 1738 * Reprogram the MB chipset to NOT redirect a PCI INTerrupt 1739 */ 1740int 1741undirect_pci_irq(int rirq) 1742{ 1743#if defined(READY) 1744 if (bootverbose) 1745 printf("Freeing redirected PCI irq %d.\n", rirq); 1746 1747 /** FIXME: tickle the MB redirector chip */ 1748 return -1; 1749#else 1750 if (bootverbose) 1751 printf("Freeing (NOT implemented) redirected PCI irq %d.\n", 1752 rirq); 1753 return 0; 1754#endif /* READY */ 1755} 1756 1757 1758/* 1759 * given a bus ID, return: 1760 * the bus type if found 1761 * -1 if NOT found 1762 */ 1763int 1764apic_bus_type(int id) 1765{ 1766 int x; 1767 1768 for (x = 0; x < mp_nbusses; ++x) 1769 if (bus_data[x].bus_id == id) 1770 return bus_data[x].bus_type; 1771 1772 return -1; 1773} 1774 1775 1776/* 1777 * given a LOGICAL APIC# and pin#, return: 1778 * the associated src bus ID if found 1779 * -1 if NOT found 1780 */ 1781int 1782apic_src_bus_id(int apic, int pin) 1783{ 1784 int x; 1785 1786 /* search each of the possible INTerrupt sources */ 1787 for (x = 0; x < nintrs; ++x) 1788 if ((apic == ID_TO_IO(io_apic_ints[x].dst_apic_id)) && 1789 (pin == io_apic_ints[x].dst_apic_int)) 1790 return (io_apic_ints[x].src_bus_id); 1791 1792 return -1; /* NOT found */ 1793} 1794 1795 1796/* 1797 * given a LOGICAL APIC# and pin#, return: 1798 * the associated src bus IRQ if found 1799 * -1 if NOT found 1800 */ 1801int 1802apic_src_bus_irq(int apic, int pin) 1803{ 1804 int x; 1805 1806 for (x = 0; x < nintrs; x++) 1807 if ((apic == ID_TO_IO(io_apic_ints[x].dst_apic_id)) && 1808 (pin == io_apic_ints[x].dst_apic_int)) 1809 return (io_apic_ints[x].src_bus_irq); 1810 1811 return -1; /* NOT found */ 1812} 1813 1814 1815/* 1816 * given a LOGICAL APIC# and pin#, return: 1817 * the associated INTerrupt type if found 1818 * -1 if NOT found 1819 */ 1820int 1821apic_int_type(int apic, int pin) 1822{ 1823 int x; 1824 1825 /* search each of the possible INTerrupt sources */ 1826 for (x = 0; x < nintrs; ++x) 1827 if ((apic == ID_TO_IO(io_apic_ints[x].dst_apic_id)) && 1828 (pin == io_apic_ints[x].dst_apic_int)) 1829 return (io_apic_ints[x].int_type); 1830 1831 return -1; /* NOT found */ 1832} 1833 1834int 1835apic_irq(int apic, int pin) 1836{ 1837 int x; 1838 int res; 1839 1840 for (x = 0; x < nintrs; ++x) 1841 if ((apic == ID_TO_IO(io_apic_ints[x].dst_apic_id)) && 1842 (pin == io_apic_ints[x].dst_apic_int)) { 1843 res = io_apic_ints[x].int_vector; 1844 if (res == 0xff) 1845 return -1; 1846 if (apic != int_to_apicintpin[res].ioapic) 1847 panic("apic_irq: inconsistent table"); 1848 if (pin != int_to_apicintpin[res].int_pin) 1849 panic("apic_irq inconsistent table (2)"); 1850 return res; 1851 } 1852 return -1; 1853} 1854 1855 1856/* 1857 * given a LOGICAL APIC# and pin#, return: 1858 * the associated trigger mode if found 1859 * -1 if NOT found 1860 */ 1861int 1862apic_trigger(int apic, int pin) 1863{ 1864 int x; 1865 1866 /* search each of the possible INTerrupt sources */ 1867 for (x = 0; x < nintrs; ++x) 1868 if ((apic == ID_TO_IO(io_apic_ints[x].dst_apic_id)) && 1869 (pin == io_apic_ints[x].dst_apic_int)) 1870 return ((io_apic_ints[x].int_flags >> 2) & 0x03); 1871 1872 return -1; /* NOT found */ 1873} 1874 1875 1876/* 1877 * given a LOGICAL APIC# and pin#, return: 1878 * the associated 'active' level if found 1879 * -1 if NOT found 1880 */ 1881int 1882apic_polarity(int apic, int pin) 1883{ 1884 int x; 1885 1886 /* search each of the possible INTerrupt sources */ 1887 for (x = 0; x < nintrs; ++x) 1888 if ((apic == ID_TO_IO(io_apic_ints[x].dst_apic_id)) && 1889 (pin == io_apic_ints[x].dst_apic_int)) 1890 return (io_apic_ints[x].int_flags & 0x03); 1891 1892 return -1; /* NOT found */ 1893} 1894 1895 1896/* 1897 * set data according to MP defaults 1898 * FIXME: probably not complete yet... 1899 */ 1900static void 1901default_mp_table(int type) 1902{ 1903 int ap_cpu_id; 1904#if defined(APIC_IO) 1905 int io_apic_id; 1906 int pin; 1907#endif /* APIC_IO */ 1908 1909#if 0 1910 printf(" MP default config type: %d\n", type); 1911 switch (type) { 1912 case 1: 1913 printf(" bus: ISA, APIC: 82489DX\n"); 1914 break; 1915 case 2: 1916 printf(" bus: EISA, APIC: 82489DX\n"); 1917 break; 1918 case 3: 1919 printf(" bus: EISA, APIC: 82489DX\n"); 1920 break; 1921 case 4: 1922 printf(" bus: MCA, APIC: 82489DX\n"); 1923 break; 1924 case 5: 1925 printf(" bus: ISA+PCI, APIC: Integrated\n"); 1926 break; 1927 case 6: 1928 printf(" bus: EISA+PCI, APIC: Integrated\n"); 1929 break; 1930 case 7: 1931 printf(" bus: MCA+PCI, APIC: Integrated\n"); 1932 break; 1933 default: 1934 printf(" future type\n"); 1935 break; 1936 /* NOTREACHED */ 1937 } 1938#endif /* 0 */ 1939 1940 boot_cpu_id = (lapic.id & APIC_ID_MASK) >> 24; 1941 ap_cpu_id = (boot_cpu_id == 0) ? 1 : 0; 1942 1943 /* BSP */ 1944 CPU_TO_ID(0) = boot_cpu_id; 1945 ID_TO_CPU(boot_cpu_id) = 0; 1946 1947 /* one and only AP */ 1948 CPU_TO_ID(1) = ap_cpu_id; 1949 ID_TO_CPU(ap_cpu_id) = 1; 1950 1951#if defined(APIC_IO) 1952 /* one and only IO APIC */ 1953 io_apic_id = (io_apic_read(0, IOAPIC_ID) & APIC_ID_MASK) >> 24; 1954 1955 /* 1956 * sanity check, refer to MP spec section 3.6.6, last paragraph 1957 * necessary as some hardware isn't properly setting up the IO APIC 1958 */ 1959#if defined(REALLY_ANAL_IOAPICID_VALUE) 1960 if (io_apic_id != 2) { 1961#else 1962 if ((io_apic_id == 0) || (io_apic_id == 1) || (io_apic_id == 15)) { 1963#endif /* REALLY_ANAL_IOAPICID_VALUE */ 1964 io_apic_set_id(0, 2); 1965 io_apic_id = 2; 1966 } 1967 IO_TO_ID(0) = io_apic_id; 1968 ID_TO_IO(io_apic_id) = 0; 1969#endif /* APIC_IO */ 1970 1971 /* fill out bus entries */ 1972 switch (type) { 1973 case 1: 1974 case 2: 1975 case 3: 1976 case 4: 1977 case 5: 1978 case 6: 1979 case 7: 1980 bus_data[0].bus_id = default_data[type - 1][1]; 1981 bus_data[0].bus_type = default_data[type - 1][2]; 1982 bus_data[1].bus_id = default_data[type - 1][3]; 1983 bus_data[1].bus_type = default_data[type - 1][4]; 1984 break; 1985 1986 /* case 4: case 7: MCA NOT supported */ 1987 default: /* illegal/reserved */ 1988 panic("BAD default MP config: %d", type); 1989 /* NOTREACHED */ 1990 } 1991 1992#if defined(APIC_IO) 1993 /* general cases from MP v1.4, table 5-2 */ 1994 for (pin = 0; pin < 16; ++pin) { 1995 io_apic_ints[pin].int_type = 0; 1996 io_apic_ints[pin].int_flags = 0x05; /* edge/active-hi */ 1997 io_apic_ints[pin].src_bus_id = 0; 1998 io_apic_ints[pin].src_bus_irq = pin; /* IRQ2 caught below */ 1999 io_apic_ints[pin].dst_apic_id = io_apic_id; 2000 io_apic_ints[pin].dst_apic_int = pin; /* 1-to-1 */ 2001 } 2002 2003 /* special cases from MP v1.4, table 5-2 */ 2004 if (type == 2) { 2005 io_apic_ints[2].int_type = 0xff; /* N/C */ 2006 io_apic_ints[13].int_type = 0xff; /* N/C */ 2007#if !defined(APIC_MIXED_MODE) 2008 /** FIXME: ??? */ 2009 panic("sorry, can't support type 2 default yet"); 2010#endif /* APIC_MIXED_MODE */ 2011 } 2012 else 2013 io_apic_ints[2].src_bus_irq = 0; /* ISA IRQ0 is on APIC INT 2 */ 2014 2015 if (type == 7) 2016 io_apic_ints[0].int_type = 0xff; /* N/C */ 2017 else 2018 io_apic_ints[0].int_type = 3; /* vectored 8259 */ 2019#endif /* APIC_IO */ 2020} 2021 2022 2023/* 2024 * start each AP in our list 2025 */ 2026static int 2027start_all_aps(u_int boot_addr) 2028{ 2029 int x, i, pg; 2030#ifndef PC98 2031 u_char mpbiosreason; 2032#endif 2033 u_long mpbioswarmvec; 2034 struct pcpu *pc; 2035 char *stack; 2036 uintptr_t kptbase; 2037 2038 POSTCODE(START_ALL_APS_POST); 2039 2040 mtx_init(&ap_boot_mtx, "ap boot", NULL, MTX_SPIN); 2041 2042 /* initialize BSP's local APIC */ 2043 apic_initialize(); 2044 bsp_apic_ready = 1; 2045 2046 /* install the AP 1st level boot code */ 2047 install_ap_tramp(boot_addr); 2048 2049 2050 /* save the current value of the warm-start vector */ 2051 mpbioswarmvec = *((u_long *) WARMBOOT_OFF); 2052#ifndef PC98 2053 outb(CMOS_REG, BIOS_RESET); 2054 mpbiosreason = inb(CMOS_DATA); 2055#endif 2056 2057 /* set up temporary P==V mapping for AP boot */ 2058 /* XXX this is a hack, we should boot the AP on its own stack/PTD */ 2059 kptbase = (uintptr_t)(void *)KPTphys; 2060 for (x = 0; x < NKPT; x++) 2061 PTD[x] = (pd_entry_t)(PG_V | PG_RW | 2062 ((kptbase + x * PAGE_SIZE) & PG_FRAME)); 2063 invltlb(); 2064 2065 /* start each AP */ 2066 for (x = 1; x <= mp_naps; ++x) { 2067 2068 /* This is a bit verbose, it will go away soon. */ 2069 2070 /* first page of AP's private space */ 2071 pg = x * i386_btop(sizeof(struct privatespace)); 2072 2073 /* allocate a new private data page */ 2074 pc = (struct pcpu *)kmem_alloc(kernel_map, PAGE_SIZE); 2075 2076 /* wire it into the private page table page */ 2077 SMPpt[pg] = (pt_entry_t)(PG_V | PG_RW | vtophys(pc)); 2078 2079 /* allocate and set up an idle stack data page */ 2080 stack = (char *)kmem_alloc(kernel_map, KSTACK_PAGES * PAGE_SIZE); /* XXXKSE */ 2081 for (i = 0; i < KSTACK_PAGES; i++) 2082 SMPpt[pg + 1 + i] = (pt_entry_t) 2083 (PG_V | PG_RW | vtophys(PAGE_SIZE * i + stack)); 2084 2085 /* prime data page for it to use */ 2086 pcpu_init(pc, x, sizeof(struct pcpu)); 2087 2088 /* setup a vector to our boot code */ 2089 *((volatile u_short *) WARMBOOT_OFF) = WARMBOOT_TARGET; 2090 *((volatile u_short *) WARMBOOT_SEG) = (boot_addr >> 4); 2091#ifndef PC98 2092 outb(CMOS_REG, BIOS_RESET); 2093 outb(CMOS_DATA, BIOS_WARM); /* 'warm-start' */ 2094#endif 2095 2096 bootSTK = &SMP_prvspace[x].idlekstack[KSTACK_PAGES * PAGE_SIZE]; 2097 bootAP = x; 2098 2099 /* attempt to start the Application Processor */ 2100 CHECK_INIT(99); /* setup checkpoints */ 2101 if (!start_ap(x, boot_addr)) { 2102 printf("AP #%d (PHY# %d) failed!\n", x, CPU_TO_ID(x)); 2103 CHECK_PRINT("trace"); /* show checkpoints */ 2104 /* better panic as the AP may be running loose */ 2105 printf("panic y/n? [y] "); 2106 if (cngetc() != 'n') 2107 panic("bye-bye"); 2108 } 2109 CHECK_PRINT("trace"); /* show checkpoints */ 2110 2111 /* record its version info */ 2112 cpu_apic_versions[x] = cpu_apic_versions[0]; 2113 2114 all_cpus |= (1 << x); /* record AP in CPU map */ 2115 } 2116 2117 /* build our map of 'other' CPUs */ 2118 PCPU_SET(other_cpus, all_cpus & ~PCPU_GET(cpumask)); 2119 2120 /* fill in our (BSP) APIC version */ 2121 cpu_apic_versions[0] = lapic.version; 2122 2123 /* restore the warmstart vector */ 2124 *(u_long *) WARMBOOT_OFF = mpbioswarmvec; 2125#ifndef PC98 2126 outb(CMOS_REG, BIOS_RESET); 2127 outb(CMOS_DATA, mpbiosreason); 2128#endif 2129 2130 /* 2131 * Set up the idle context for the BSP. Similar to above except 2132 * that some was done by locore, some by pmap.c and some is implicit 2133 * because the BSP is cpu#0 and the page is initially zero, and also 2134 * because we can refer to variables by name on the BSP.. 2135 */ 2136 2137 /* Allocate and setup BSP idle stack */ 2138 stack = (char *)kmem_alloc(kernel_map, KSTACK_PAGES * PAGE_SIZE); 2139 for (i = 0; i < KSTACK_PAGES; i++) 2140 SMPpt[1 + i] = (pt_entry_t) 2141 (PG_V | PG_RW | vtophys(PAGE_SIZE * i + stack)); 2142 2143 for (x = 0; x < NKPT; x++) 2144 PTD[x] = 0; 2145 pmap_set_opt(); 2146 2147 /* number of APs actually started */ 2148 return mp_ncpus - 1; 2149} 2150 2151 2152/* 2153 * load the 1st level AP boot code into base memory. 2154 */ 2155 2156/* targets for relocation */ 2157extern void bigJump(void); 2158extern void bootCodeSeg(void); 2159extern void bootDataSeg(void); 2160extern void MPentry(void); 2161extern u_int MP_GDT; 2162extern u_int mp_gdtbase; 2163 2164static void 2165install_ap_tramp(u_int boot_addr) 2166{ 2167 int x; 2168 int size = *(int *) ((u_long) & bootMP_size); 2169 u_char *src = (u_char *) ((u_long) bootMP); 2170 u_char *dst = (u_char *) boot_addr + KERNBASE; 2171 u_int boot_base = (u_int) bootMP; 2172 u_int8_t *dst8; 2173 u_int16_t *dst16; 2174 u_int32_t *dst32; 2175 2176 POSTCODE(INSTALL_AP_TRAMP_POST); 2177 2178 for (x = 0; x < size; ++x) 2179 *dst++ = *src++; 2180 2181 /* 2182 * modify addresses in code we just moved to basemem. unfortunately we 2183 * need fairly detailed info about mpboot.s for this to work. changes 2184 * to mpboot.s might require changes here. 2185 */ 2186 2187 /* boot code is located in KERNEL space */ 2188 dst = (u_char *) boot_addr + KERNBASE; 2189 2190 /* modify the lgdt arg */ 2191 dst32 = (u_int32_t *) (dst + ((u_int) & mp_gdtbase - boot_base)); 2192 *dst32 = boot_addr + ((u_int) & MP_GDT - boot_base); 2193 2194 /* modify the ljmp target for MPentry() */ 2195 dst32 = (u_int32_t *) (dst + ((u_int) bigJump - boot_base) + 1); 2196 *dst32 = ((u_int) MPentry - KERNBASE); 2197 2198 /* modify the target for boot code segment */ 2199 dst16 = (u_int16_t *) (dst + ((u_int) bootCodeSeg - boot_base)); 2200 dst8 = (u_int8_t *) (dst16 + 1); 2201 *dst16 = (u_int) boot_addr & 0xffff; 2202 *dst8 = ((u_int) boot_addr >> 16) & 0xff; 2203 2204 /* modify the target for boot data segment */ 2205 dst16 = (u_int16_t *) (dst + ((u_int) bootDataSeg - boot_base)); 2206 dst8 = (u_int8_t *) (dst16 + 1); 2207 *dst16 = (u_int) boot_addr & 0xffff; 2208 *dst8 = ((u_int) boot_addr >> 16) & 0xff; 2209} 2210 2211 2212/* 2213 * this function starts the AP (application processor) identified 2214 * by the APIC ID 'physicalCpu'. It does quite a "song and dance" 2215 * to accomplish this. This is necessary because of the nuances 2216 * of the different hardware we might encounter. It ain't pretty, 2217 * but it seems to work. 2218 */ 2219static int 2220start_ap(int logical_cpu, u_int boot_addr) 2221{ 2222 int physical_cpu; 2223 int vector; 2224 int cpus; 2225 u_long icr_lo, icr_hi; 2226 2227 POSTCODE(START_AP_POST); 2228 2229 /* get the PHYSICAL APIC ID# */ 2230 physical_cpu = CPU_TO_ID(logical_cpu); 2231 2232 /* calculate the vector */ 2233 vector = (boot_addr >> 12) & 0xff; 2234 2235 /* used as a watchpoint to signal AP startup */ 2236 cpus = mp_ncpus; 2237 2238 /* 2239 * first we do an INIT/RESET IPI this INIT IPI might be run, reseting 2240 * and running the target CPU. OR this INIT IPI might be latched (P5 2241 * bug), CPU waiting for STARTUP IPI. OR this INIT IPI might be 2242 * ignored. 2243 */ 2244 2245 /* setup the address for the target AP */ 2246 icr_hi = lapic.icr_hi & ~APIC_ID_MASK; 2247 icr_hi |= (physical_cpu << 24); 2248 lapic.icr_hi = icr_hi; 2249 2250 /* do an INIT IPI: assert RESET */ 2251 icr_lo = lapic.icr_lo & 0xfff00000; 2252 lapic.icr_lo = icr_lo | 0x0000c500; 2253 2254 /* wait for pending status end */ 2255 while (lapic.icr_lo & APIC_DELSTAT_MASK) 2256 /* spin */ ; 2257 2258 /* do an INIT IPI: deassert RESET */ 2259 lapic.icr_lo = icr_lo | 0x00008500; 2260 2261 /* wait for pending status end */ 2262 u_sleep(10000); /* wait ~10mS */ 2263 while (lapic.icr_lo & APIC_DELSTAT_MASK) 2264 /* spin */ ; 2265 2266 /* 2267 * next we do a STARTUP IPI: the previous INIT IPI might still be 2268 * latched, (P5 bug) this 1st STARTUP would then terminate 2269 * immediately, and the previously started INIT IPI would continue. OR 2270 * the previous INIT IPI has already run. and this STARTUP IPI will 2271 * run. OR the previous INIT IPI was ignored. and this STARTUP IPI 2272 * will run. 2273 */ 2274 2275 /* do a STARTUP IPI */ 2276 lapic.icr_lo = icr_lo | 0x00000600 | vector; 2277 while (lapic.icr_lo & APIC_DELSTAT_MASK) 2278 /* spin */ ; 2279 u_sleep(200); /* wait ~200uS */ 2280 2281 /* 2282 * finally we do a 2nd STARTUP IPI: this 2nd STARTUP IPI should run IF 2283 * the previous STARTUP IPI was cancelled by a latched INIT IPI. OR 2284 * this STARTUP IPI will be ignored, as only ONE STARTUP IPI is 2285 * recognized after hardware RESET or INIT IPI. 2286 */ 2287 2288 lapic.icr_lo = icr_lo | 0x00000600 | vector; 2289 while (lapic.icr_lo & APIC_DELSTAT_MASK) 2290 /* spin */ ; 2291 u_sleep(200); /* wait ~200uS */ 2292 2293 /* wait for it to start */ 2294 set_apic_timer(5000000);/* == 5 seconds */ 2295 while (read_apic_timer()) 2296 if (mp_ncpus > cpus) 2297 return 1; /* return SUCCESS */ 2298 2299 return 0; /* return FAILURE */ 2300} 2301 2302#if defined(APIC_IO) 2303 2304#ifdef COUNT_XINVLTLB_HITS 2305u_int xhits_gbl[MAXCPU]; 2306u_int xhits_pg[MAXCPU]; 2307u_int xhits_rng[MAXCPU]; 2308SYSCTL_NODE(_debug, OID_AUTO, xhits, CTLFLAG_RW, 0, ""); 2309SYSCTL_OPAQUE(_debug_xhits, OID_AUTO, global, CTLFLAG_RW, &xhits_gbl, 2310 sizeof(xhits_gbl), "IU", ""); 2311SYSCTL_OPAQUE(_debug_xhits, OID_AUTO, page, CTLFLAG_RW, &xhits_pg, 2312 sizeof(xhits_pg), "IU", ""); 2313SYSCTL_OPAQUE(_debug_xhits, OID_AUTO, range, CTLFLAG_RW, &xhits_rng, 2314 sizeof(xhits_rng), "IU", ""); 2315 2316u_int ipi_global; 2317u_int ipi_page; 2318u_int ipi_range; 2319u_int ipi_range_size; 2320SYSCTL_INT(_debug_xhits, OID_AUTO, ipi_global, CTLFLAG_RW, &ipi_global, 0, ""); 2321SYSCTL_INT(_debug_xhits, OID_AUTO, ipi_page, CTLFLAG_RW, &ipi_page, 0, ""); 2322SYSCTL_INT(_debug_xhits, OID_AUTO, ipi_range, CTLFLAG_RW, &ipi_range, 0, ""); 2323SYSCTL_INT(_debug_xhits, OID_AUTO, ipi_range_size, CTLFLAG_RW, &ipi_range_size, 2324 0, ""); 2325 2326u_int ipi_masked_global; 2327u_int ipi_masked_page; 2328u_int ipi_masked_range; 2329u_int ipi_masked_range_size; 2330SYSCTL_INT(_debug_xhits, OID_AUTO, ipi_masked_global, CTLFLAG_RW, 2331 &ipi_masked_global, 0, ""); 2332SYSCTL_INT(_debug_xhits, OID_AUTO, ipi_masked_page, CTLFLAG_RW, 2333 &ipi_masked_page, 0, ""); 2334SYSCTL_INT(_debug_xhits, OID_AUTO, ipi_masked_range, CTLFLAG_RW, 2335 &ipi_masked_range, 0, ""); 2336SYSCTL_INT(_debug_xhits, OID_AUTO, ipi_masked_range_size, CTLFLAG_RW, 2337 &ipi_masked_range_size, 0, ""); 2338#endif 2339 2340/* 2341 * Flush the TLB on all other CPU's 2342 */ 2343static void 2344smp_tlb_shootdown(u_int vector, vm_offset_t addr1, vm_offset_t addr2) 2345{ 2346 u_int ncpu; 2347 register_t eflags; 2348 2349 ncpu = mp_ncpus - 1; /* does not shootdown self */ 2350 if (ncpu < 1) 2351 return; /* no other cpus */ 2352 eflags = read_eflags(); 2353 if ((eflags & PSL_I) == 0) 2354 panic("absolutely cannot call smp_ipi_shootdown with interrupts already disabled"); 2355 mtx_lock_spin(&smp_tlb_mtx); 2356 smp_tlb_addr1 = addr1; 2357 smp_tlb_addr2 = addr2; 2358 atomic_store_rel_int(&smp_tlb_wait, 0); 2359 ipi_all_but_self(vector); 2360 while (smp_tlb_wait < ncpu) 2361 ia32_pause(); 2362 mtx_unlock_spin(&smp_tlb_mtx); 2363} 2364 2365/* 2366 * This is about as magic as it gets. fortune(1) has got similar code 2367 * for reversing bits in a word. Who thinks up this stuff?? 2368 * 2369 * Yes, it does appear to be consistently faster than: 2370 * while (i = ffs(m)) { 2371 * m >>= i; 2372 * bits++; 2373 * } 2374 * and 2375 * while (lsb = (m & -m)) { // This is magic too 2376 * m &= ~lsb; // or: m ^= lsb 2377 * bits++; 2378 * } 2379 * Both of these latter forms do some very strange things on gcc-3.1 with 2380 * -mcpu=pentiumpro and/or -march=pentiumpro and/or -O or -O2. 2381 * There is probably an SSE or MMX popcnt instruction. 2382 * 2383 * I wonder if this should be in libkern? 2384 * 2385 * XXX Stop the presses! Another one: 2386 * static __inline u_int32_t 2387 * popcnt1(u_int32_t v) 2388 * { 2389 * v -= ((v >> 1) & 0x55555555); 2390 * v = (v & 0x33333333) + ((v >> 2) & 0x33333333); 2391 * v = (v + (v >> 4)) & 0x0F0F0F0F; 2392 * return (v * 0x01010101) >> 24; 2393 * } 2394 * The downside is that it has a multiply. With a pentium3 with 2395 * -mcpu=pentiumpro and -march=pentiumpro then gcc-3.1 will use 2396 * an imull, and in that case it is faster. In most other cases 2397 * it appears slightly slower. 2398 */ 2399static __inline u_int32_t 2400popcnt(u_int32_t m) 2401{ 2402 2403 m = (m & 0x55555555) + ((m & 0xaaaaaaaa) >> 1); 2404 m = (m & 0x33333333) + ((m & 0xcccccccc) >> 2); 2405 m = (m & 0x0f0f0f0f) + ((m & 0xf0f0f0f0) >> 4); 2406 m = (m & 0x00ff00ff) + ((m & 0xff00ff00) >> 8); 2407 m = (m & 0x0000ffff) + ((m & 0xffff0000) >> 16); 2408 return m; 2409} 2410 2411static void 2412smp_targeted_tlb_shootdown(u_int mask, u_int vector, vm_offset_t addr1, vm_offset_t addr2) 2413{ 2414 int ncpu, othercpus; 2415 register_t eflags; 2416 2417 othercpus = mp_ncpus - 1; 2418 if (mask == (u_int)-1) { 2419 ncpu = othercpus; 2420 if (ncpu < 1) 2421 return; 2422 } else { 2423 /* XXX there should be a pcpu self mask */ 2424 mask &= ~(1 << PCPU_GET(cpuid)); 2425 if (mask == 0) 2426 return; 2427 ncpu = popcnt(mask); 2428 if (ncpu > othercpus) { 2429 /* XXX this should be a panic offence */ 2430 printf("SMP: tlb shootdown to %d other cpus (only have %d)\n", 2431 ncpu, othercpus); 2432 ncpu = othercpus; 2433 } 2434 /* XXX should be a panic, implied by mask == 0 above */ 2435 if (ncpu < 1) 2436 return; 2437 } 2438 eflags = read_eflags(); 2439 if ((eflags & PSL_I) == 0) 2440 panic("absolutely cannot call smp_targeted_ipi_shootdown with interrupts already disabled"); 2441 mtx_lock_spin(&smp_tlb_mtx); 2442 smp_tlb_addr1 = addr1; 2443 smp_tlb_addr2 = addr2; 2444 atomic_store_rel_int(&smp_tlb_wait, 0); 2445 if (mask == (u_int)-1) 2446 ipi_all_but_self(vector); 2447 else 2448 ipi_selected(mask, vector); 2449 while (smp_tlb_wait < ncpu) 2450 ia32_pause(); 2451 mtx_unlock_spin(&smp_tlb_mtx); 2452} 2453#endif 2454 2455void 2456smp_invltlb(void) 2457{ 2458#if defined(APIC_IO) 2459 if (smp_started) { 2460 smp_tlb_shootdown(IPI_INVLTLB, 0, 0); 2461#ifdef COUNT_XINVLTLB_HITS 2462 ipi_global++; 2463#endif 2464 } 2465#endif /* APIC_IO */ 2466} 2467 2468void 2469smp_invlpg(vm_offset_t addr) 2470{ 2471#if defined(APIC_IO) 2472 if (smp_started) { 2473 smp_tlb_shootdown(IPI_INVLPG, addr, 0); 2474#ifdef COUNT_XINVLTLB_HITS 2475 ipi_page++; 2476#endif 2477 } 2478#endif /* APIC_IO */ 2479} 2480 2481void 2482smp_invlpg_range(vm_offset_t addr1, vm_offset_t addr2) 2483{ 2484#if defined(APIC_IO) 2485 if (smp_started) { 2486 smp_tlb_shootdown(IPI_INVLRNG, addr1, addr2); 2487#ifdef COUNT_XINVLTLB_HITS 2488 ipi_range++; 2489 ipi_range_size += (addr2 - addr1) / PAGE_SIZE; 2490#endif 2491 } 2492#endif /* APIC_IO */ 2493} 2494 2495void 2496smp_masked_invltlb(u_int mask) 2497{ 2498#if defined(APIC_IO) 2499 if (smp_started) { 2500 smp_targeted_tlb_shootdown(mask, IPI_INVLTLB, 0, 0); 2501#ifdef COUNT_XINVLTLB_HITS 2502 ipi_masked_global++; 2503#endif 2504 } 2505#endif /* APIC_IO */ 2506} 2507 2508void 2509smp_masked_invlpg(u_int mask, vm_offset_t addr) 2510{ 2511#if defined(APIC_IO) 2512 if (smp_started) { 2513 smp_targeted_tlb_shootdown(mask, IPI_INVLPG, addr, 0); 2514#ifdef COUNT_XINVLTLB_HITS 2515 ipi_masked_page++; 2516#endif 2517 } 2518#endif /* APIC_IO */ 2519} 2520 2521void 2522smp_masked_invlpg_range(u_int mask, vm_offset_t addr1, vm_offset_t addr2) 2523{ 2524#if defined(APIC_IO) 2525 if (smp_started) { 2526 smp_targeted_tlb_shootdown(mask, IPI_INVLRNG, addr1, addr2); 2527#ifdef COUNT_XINVLTLB_HITS 2528 ipi_masked_range++; 2529 ipi_masked_range_size += (addr2 - addr1) / PAGE_SIZE; 2530#endif 2531 } 2532#endif /* APIC_IO */ 2533} 2534 2535 2536/* 2537 * This is called once the rest of the system is up and running and we're 2538 * ready to let the AP's out of the pen. 2539 */ 2540void 2541ap_init(void) 2542{ 2543 u_int apic_id; 2544 2545 /* spin until all the AP's are ready */ 2546 while (!aps_ready) 2547 ia32_pause(); 2548 2549 /* BSP may have changed PTD while we were waiting */ 2550 invltlb(); 2551 2552#if defined(I586_CPU) && !defined(NO_F00F_HACK) 2553 lidt(&r_idt); 2554#endif 2555 2556 /* set up CPU registers and state */ 2557 cpu_setregs(); 2558 2559 /* set up FPU state on the AP */ 2560 npxinit(__INITIAL_NPXCW__); 2561 2562 /* set up SSE registers */ 2563 enable_sse(); 2564 2565 /* A quick check from sanity claus */ 2566 apic_id = (apic_id_to_logical[(lapic.id & 0x0f000000) >> 24]); 2567 if (PCPU_GET(cpuid) != apic_id) { 2568 printf("SMP: cpuid = %d\n", PCPU_GET(cpuid)); 2569 printf("SMP: apic_id = %d\n", apic_id); 2570 printf("PTD[MPPTDI] = %#jx\n", (uintmax_t)PTD[MPPTDI]); 2571 panic("cpuid mismatch! boom!!"); 2572 } 2573 2574 /* Init local apic for irq's */ 2575 apic_initialize(); 2576 2577 /* Set memory range attributes for this CPU to match the BSP */ 2578 mem_range_AP_init(); 2579 2580 mtx_lock_spin(&ap_boot_mtx); 2581 2582 smp_cpus++; 2583 2584 CTR1(KTR_SMP, "SMP: AP CPU #%d Launched", PCPU_GET(cpuid)); 2585 printf("SMP: AP CPU #%d Launched!\n", PCPU_GET(cpuid)); 2586 2587 /* Build our map of 'other' CPUs. */ 2588 PCPU_SET(other_cpus, all_cpus & ~PCPU_GET(cpumask)); 2589 2590 if (bootverbose) 2591 apic_dump("ap_init()"); 2592 2593 if (smp_cpus == mp_ncpus) { 2594 /* enable IPI's, tlb shootdown, freezes etc */ 2595 atomic_store_rel_int(&smp_started, 1); 2596 smp_active = 1; /* historic */ 2597 } 2598 2599 mtx_unlock_spin(&ap_boot_mtx); 2600 2601 /* wait until all the AP's are up */ 2602 while (smp_started == 0) 2603 ia32_pause(); 2604 2605 /* ok, now grab sched_lock and enter the scheduler */ 2606 mtx_lock_spin(&sched_lock); 2607 2608 binuptime(PCPU_PTR(switchtime)); 2609 PCPU_SET(switchticks, ticks); 2610 2611 cpu_throw(NULL, choosethread()); /* doesn't return */ 2612 2613 panic("scheduler returned us to %s", __func__); 2614} 2615 2616/* 2617 * For statclock, we send an IPI to all CPU's to have them call this 2618 * function. 2619 * 2620 * WARNING! unpend() will call statclock() directly and skip this 2621 * routine. 2622 */ 2623void 2624forwarded_statclock(struct clockframe frame) 2625{ 2626 2627 if (profprocs != 0) 2628 profclock(&frame); 2629 if (pscnt == psdiv) 2630 statclock(&frame); 2631} 2632 2633void 2634forward_statclock(void) 2635{ 2636 int map; 2637 2638 CTR0(KTR_SMP, "forward_statclock"); 2639 2640 if (!smp_started || cold || panicstr) 2641 return; 2642 2643 map = PCPU_GET(other_cpus) & ~stopped_cpus ; 2644 if (map != 0) 2645 ipi_selected(map, IPI_STATCLOCK); 2646} 2647 2648/* 2649 * For each hardclock(), we send an IPI to all other CPU's to have them 2650 * execute this function. It would be nice to reduce contention on 2651 * sched_lock if we could simply peek at the CPU to determine the user/kernel 2652 * state and call hardclock_process() on the CPU receiving the clock interrupt 2653 * and then just use a simple IPI to handle any ast's if needed. 2654 * 2655 * WARNING! unpend() will call hardclock_process() directly and skip this 2656 * routine. 2657 */ 2658void 2659forwarded_hardclock(struct clockframe frame) 2660{ 2661 2662 hardclock_process(&frame); 2663} 2664 2665void 2666forward_hardclock(void) 2667{ 2668 u_int map; 2669 2670 CTR0(KTR_SMP, "forward_hardclock"); 2671 2672 if (!smp_started || cold || panicstr) 2673 return; 2674 2675 map = PCPU_GET(other_cpus) & ~stopped_cpus ; 2676 if (map != 0) 2677 ipi_selected(map, IPI_HARDCLOCK); 2678} 2679 2680#ifdef APIC_INTR_REORDER 2681/* 2682 * Maintain mapping from softintr vector to isr bit in local apic. 2683 */ 2684void 2685set_lapic_isrloc(int intr, int vector) 2686{ 2687 if (intr < 0 || intr > 32) 2688 panic("set_apic_isrloc: bad intr argument: %d",intr); 2689 if (vector < ICU_OFFSET || vector > 255) 2690 panic("set_apic_isrloc: bad vector argument: %d",vector); 2691 apic_isrbit_location[intr].location = &lapic.isr0 + ((vector>>5)<<2); 2692 apic_isrbit_location[intr].bit = (1<<(vector & 31)); 2693} 2694#endif 2695 2696/* 2697 * send an IPI to a set of cpus. 2698 */ 2699void 2700ipi_selected(u_int32_t cpus, u_int ipi) 2701{ 2702 2703 CTR3(KTR_SMP, "%s: cpus: %x ipi: %x", __func__, cpus, ipi); 2704 selected_apic_ipi(cpus, ipi, APIC_DELMODE_FIXED); 2705} 2706 2707/* 2708 * send an IPI INTerrupt containing 'vector' to all CPUs, including myself 2709 */ 2710void 2711ipi_all(u_int ipi) 2712{ 2713 2714 CTR2(KTR_SMP, "%s: ipi: %x", __func__, ipi); 2715 apic_ipi(APIC_DEST_ALLISELF, ipi, APIC_DELMODE_FIXED); 2716} 2717 2718/* 2719 * send an IPI to all CPUs EXCEPT myself 2720 */ 2721void 2722ipi_all_but_self(u_int ipi) 2723{ 2724 2725 CTR2(KTR_SMP, "%s: ipi: %x", __func__, ipi); 2726 apic_ipi(APIC_DEST_ALLESELF, ipi, APIC_DELMODE_FIXED); 2727} 2728 2729/* 2730 * send an IPI to myself 2731 */ 2732void 2733ipi_self(u_int ipi) 2734{ 2735 2736 CTR2(KTR_SMP, "%s: ipi: %x", __func__, ipi); 2737 apic_ipi(APIC_DEST_SELF, ipi, APIC_DELMODE_FIXED); 2738} 2739 2740static void 2741release_aps(void *dummy __unused) 2742{ 2743 2744 if (mp_ncpus == 1) 2745 return; 2746 mtx_lock_spin(&sched_lock); 2747 atomic_store_rel_int(&aps_ready, 1); 2748 while (smp_started == 0) 2749 ia32_pause(); 2750 mtx_unlock_spin(&sched_lock); 2751} 2752 2753SYSINIT(start_aps, SI_SUB_SMP, SI_ORDER_FIRST, release_aps, NULL); 2754 2755static int hlt_cpus_mask; 2756static int hlt_logical_cpus = 1; 2757static struct sysctl_ctx_list logical_cpu_clist; 2758 2759static int 2760sysctl_hlt_cpus(SYSCTL_HANDLER_ARGS) 2761{ 2762 u_int mask; 2763 int error; 2764 2765 mask = hlt_cpus_mask; 2766 error = sysctl_handle_int(oidp, &mask, 0, req); 2767 if (error || !req->newptr) 2768 return (error); 2769 2770 if (logical_cpus_mask != 0 && 2771 (mask & logical_cpus_mask) == logical_cpus_mask) 2772 hlt_logical_cpus = 1; 2773 else 2774 hlt_logical_cpus = 0; 2775 2776 if ((mask & all_cpus) == all_cpus) 2777 mask &= ~(1<<0); 2778 hlt_cpus_mask = mask; 2779 return (error); 2780} 2781SYSCTL_PROC(_machdep, OID_AUTO, hlt_cpus, CTLTYPE_INT|CTLFLAG_RW, 2782 0, 0, sysctl_hlt_cpus, "IU", ""); 2783 2784static int 2785sysctl_hlt_logical_cpus(SYSCTL_HANDLER_ARGS) 2786{ 2787 int disable, error; 2788 2789 disable = hlt_logical_cpus; 2790 error = sysctl_handle_int(oidp, &disable, 0, req); 2791 if (error || !req->newptr) 2792 return (error); 2793 2794 if (disable) 2795 hlt_cpus_mask |= logical_cpus_mask; 2796 else 2797 hlt_cpus_mask &= ~logical_cpus_mask; 2798 2799 if ((hlt_cpus_mask & all_cpus) == all_cpus) 2800 hlt_cpus_mask &= ~(1<<0); 2801 2802 hlt_logical_cpus = disable; 2803 return (error); 2804} 2805 2806static void 2807cpu_hlt_setup(void *dummy __unused) 2808{ 2809 2810 if (logical_cpus_mask != 0) { 2811 TUNABLE_INT_FETCH("machdep.hlt_logical_cpus", 2812 &hlt_logical_cpus); 2813 sysctl_ctx_init(&logical_cpu_clist); 2814 SYSCTL_ADD_PROC(&logical_cpu_clist, 2815 SYSCTL_STATIC_CHILDREN(_machdep), OID_AUTO, 2816 "hlt_logical_cpus", CTLTYPE_INT|CTLFLAG_RW, 0, 0, 2817 sysctl_hlt_logical_cpus, "IU", ""); 2818 SYSCTL_ADD_UINT(&logical_cpu_clist, 2819 SYSCTL_STATIC_CHILDREN(_machdep), OID_AUTO, 2820 "logical_cpus_mask", CTLTYPE_INT|CTLFLAG_RD, 2821 &logical_cpus_mask, 0, ""); 2822 2823 if (hlt_logical_cpus) 2824 hlt_cpus_mask |= logical_cpus_mask; 2825 } 2826} 2827SYSINIT(cpu_hlt, SI_SUB_SMP, SI_ORDER_ANY, cpu_hlt_setup, NULL); 2828 2829int 2830mp_grab_cpu_hlt(void) 2831{ 2832 u_int mask = PCPU_GET(cpumask); 2833 int retval; 2834 2835 retval = mask & hlt_cpus_mask; 2836 while (mask & hlt_cpus_mask) 2837 __asm __volatile("sti; hlt" : : : "memory"); 2838 return (retval); 2839} 2840