1/*- 2 * Copyright (c) 1996, by Steve Passe 3 * All rights reserved. 4 * 5 * Redistribution and use in source and binary forms, with or without 6 * modification, are permitted provided that the following conditions 7 * are met: 8 * 1. Redistributions of source code must retain the above copyright 9 * notice, this list of conditions and the following disclaimer. 10 * 2. The name of the developer may NOT be used to endorse or promote products 11 * derived from this software without specific prior written permission. 12 * 13 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND 14 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 15 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 16 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE 17 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 18 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 19 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 20 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 21 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 22 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 23 * SUCH DAMAGE. 24 */ 25 26#include <sys/cdefs.h> 27__FBSDID("$FreeBSD: stable/11/sys/i386/i386/mp_machdep.c 347700 2019-05-16 14:42:16Z markj $"); 28 29#include "opt_apic.h" 30#include "opt_cpu.h" 31#include "opt_kstack_pages.h" 32#include "opt_pmap.h" 33#include "opt_sched.h" 34#include "opt_smp.h" 35 36#if !defined(lint) 37#if !defined(SMP) 38#error How did you get here? 39#endif 40 41#ifndef DEV_APIC 42#error The apic device is required for SMP, add "device apic" to your config file. 43#endif 44#endif /* not lint */ 45 46#include <sys/param.h> 47#include <sys/systm.h> 48#include <sys/bus.h> 49#include <sys/cons.h> /* cngetc() */ 50#include <sys/cpuset.h> 51#ifdef GPROF 52#include <sys/gmon.h> 53#endif 54#include <sys/kernel.h> 55#include <sys/ktr.h> 56#include <sys/lock.h> 57#include <sys/malloc.h> 58#include <sys/memrange.h> 59#include <sys/mutex.h> 60#include <sys/pcpu.h> 61#include <sys/proc.h> 62#include <sys/sched.h> 63#include <sys/smp.h> 64#include <sys/sysctl.h> 65 66#include <vm/vm.h> 67#include <vm/vm_param.h> 68#include <vm/pmap.h> 69#include <vm/vm_kern.h> 70#include <vm/vm_extern.h> 71 72#include <x86/apicreg.h> 73#include <machine/clock.h> 74#include <machine/cpu.h> 75#include <machine/cputypes.h> 76#include <x86/mca.h> 77#include <machine/md_var.h> 78#include <machine/pcb.h> 79#include <machine/psl.h> 80#include <machine/smp.h> 81#include <machine/specialreg.h> 82#include <x86/ucode.h> 83 84#define WARMBOOT_TARGET 0 85#define WARMBOOT_OFF (KERNBASE + 0x0467) 86#define WARMBOOT_SEG (KERNBASE + 0x0469) 87 88#define CMOS_REG (0x70) 89#define CMOS_DATA (0x71) 90#define BIOS_RESET (0x0f) 91#define BIOS_WARM (0x0a) 92 93/* 94 * this code MUST be enabled here and in mpboot.s. 95 * it follows the very early stages of AP boot by placing values in CMOS ram. 96 * it NORMALLY will never be needed and thus the primitive method for enabling. 97 * 98#define CHECK_POINTS 99 */ 100 101#if defined(CHECK_POINTS) && !defined(PC98) 102#define CHECK_READ(A) (outb(CMOS_REG, (A)), inb(CMOS_DATA)) 103#define CHECK_WRITE(A,D) (outb(CMOS_REG, (A)), outb(CMOS_DATA, (D))) 104 105#define CHECK_INIT(D); \ 106 CHECK_WRITE(0x34, (D)); \ 107 CHECK_WRITE(0x35, (D)); \ 108 CHECK_WRITE(0x36, (D)); \ 109 CHECK_WRITE(0x37, (D)); \ 110 CHECK_WRITE(0x38, (D)); \ 111 CHECK_WRITE(0x39, (D)); 112 113#define CHECK_PRINT(S); \ 114 printf("%s: %d, %d, %d, %d, %d, %d\n", \ 115 (S), \ 116 CHECK_READ(0x34), \ 117 CHECK_READ(0x35), \ 118 CHECK_READ(0x36), \ 119 CHECK_READ(0x37), \ 120 CHECK_READ(0x38), \ 121 CHECK_READ(0x39)); 122 123#else /* CHECK_POINTS */ 124 125#define CHECK_INIT(D) 126#define CHECK_PRINT(S) 127#define CHECK_WRITE(A, D) 128 129#endif /* CHECK_POINTS */ 130 131extern struct pcpu __pcpu[]; 132 133/* 134 * Local data and functions. 135 */ 136 137static void install_ap_tramp(void); 138static int start_all_aps(void); 139static int start_ap(int apic_id); 140 141static u_int boot_address; 142 143/* 144 * Calculate usable address in base memory for AP trampoline code. 145 */ 146u_int 147mp_bootaddress(u_int basemem) 148{ 149 150 boot_address = trunc_page(basemem); /* round down to 4k boundary */ 151 if ((basemem - boot_address) < bootMP_size) 152 boot_address -= PAGE_SIZE; /* not enough, lower by 4k */ 153 154 return boot_address; 155} 156 157/* 158 * Initialize the IPI handlers and start up the AP's. 159 */ 160void 161cpu_mp_start(void) 162{ 163 int i; 164 165 /* Initialize the logical ID to APIC ID table. */ 166 for (i = 0; i < MAXCPU; i++) { 167 cpu_apic_ids[i] = -1; 168 cpu_ipi_pending[i] = 0; 169 } 170 171 /* Install an inter-CPU IPI for TLB invalidation */ 172 setidt(IPI_INVLTLB, IDTVEC(invltlb), 173 SDT_SYS386IGT, SEL_KPL, GSEL(GCODE_SEL, SEL_KPL)); 174 setidt(IPI_INVLPG, IDTVEC(invlpg), 175 SDT_SYS386IGT, SEL_KPL, GSEL(GCODE_SEL, SEL_KPL)); 176 setidt(IPI_INVLRNG, IDTVEC(invlrng), 177 SDT_SYS386IGT, SEL_KPL, GSEL(GCODE_SEL, SEL_KPL)); 178 179 /* Install an inter-CPU IPI for cache invalidation. */ 180 setidt(IPI_INVLCACHE, IDTVEC(invlcache), 181 SDT_SYS386IGT, SEL_KPL, GSEL(GCODE_SEL, SEL_KPL)); 182 183 /* Install an inter-CPU IPI for all-CPU rendezvous */ 184 setidt(IPI_RENDEZVOUS, IDTVEC(rendezvous), 185 SDT_SYS386IGT, SEL_KPL, GSEL(GCODE_SEL, SEL_KPL)); 186 187 /* Install generic inter-CPU IPI handler */ 188 setidt(IPI_BITMAP_VECTOR, IDTVEC(ipi_intr_bitmap_handler), 189 SDT_SYS386IGT, SEL_KPL, GSEL(GCODE_SEL, SEL_KPL)); 190 191 /* Install an inter-CPU IPI for CPU stop/restart */ 192 setidt(IPI_STOP, IDTVEC(cpustop), 193 SDT_SYS386IGT, SEL_KPL, GSEL(GCODE_SEL, SEL_KPL)); 194 195 /* Install an inter-CPU IPI for CPU suspend/resume */ 196 setidt(IPI_SUSPEND, IDTVEC(cpususpend), 197 SDT_SYS386IGT, SEL_KPL, GSEL(GCODE_SEL, SEL_KPL)); 198 199 /* Set boot_cpu_id if needed. */ 200 if (boot_cpu_id == -1) { 201 boot_cpu_id = PCPU_GET(apic_id); 202 cpu_info[boot_cpu_id].cpu_bsp = 1; 203 } else 204 KASSERT(boot_cpu_id == PCPU_GET(apic_id), 205 ("BSP's APIC ID doesn't match boot_cpu_id")); 206 207 /* Probe logical/physical core configuration. */ 208 topo_probe(); 209 210 assign_cpu_ids(); 211 212 /* Start each Application Processor */ 213 start_all_aps(); 214 215 set_interrupt_apic_ids(); 216} 217 218/* 219 * AP CPU's call this to initialize themselves. 220 */ 221void 222init_secondary(void) 223{ 224 struct pcpu *pc; 225 vm_offset_t addr; 226 int gsel_tss; 227 int x, myid; 228 u_int cr0; 229 230 /* bootAP is set in start_ap() to our ID. */ 231 myid = bootAP; 232 233 /* Update microcode before doing anything else. */ 234 ucode_load_ap(myid); 235 236 /* Get per-cpu data */ 237 pc = &__pcpu[myid]; 238 239 /* prime data page for it to use */ 240 pcpu_init(pc, myid, sizeof(struct pcpu)); 241 dpcpu_init(dpcpu, myid); 242 pc->pc_apic_id = cpu_apic_ids[myid]; 243 pc->pc_prvspace = pc; 244 pc->pc_curthread = 0; 245 246 fix_cpuid(); 247 248 gdt_segs[GPRIV_SEL].ssd_base = (int) pc; 249 gdt_segs[GPROC0_SEL].ssd_base = (int) &pc->pc_common_tss; 250 251 for (x = 0; x < NGDT; x++) { 252 ssdtosd(&gdt_segs[x], &gdt[myid * NGDT + x].sd); 253 } 254 255 r_gdt.rd_limit = NGDT * sizeof(gdt[0]) - 1; 256 r_gdt.rd_base = (int) &gdt[myid * NGDT]; 257 lgdt(&r_gdt); /* does magic intra-segment return */ 258 259 lidt(&r_idt); 260 261 lldt(_default_ldt); 262 PCPU_SET(currentldt, _default_ldt); 263 264 gsel_tss = GSEL(GPROC0_SEL, SEL_KPL); 265 gdt[myid * NGDT + GPROC0_SEL].sd.sd_type = SDT_SYS386TSS; 266 PCPU_SET(common_tss.tss_esp0, 0); /* not used until after switch */ 267 PCPU_SET(common_tss.tss_ss0, GSEL(GDATA_SEL, SEL_KPL)); 268 PCPU_SET(common_tss.tss_ioopt, (sizeof (struct i386tss)) << 16); 269 PCPU_SET(tss_gdt, &gdt[myid * NGDT + GPROC0_SEL].sd); 270 PCPU_SET(common_tssd, *PCPU_GET(tss_gdt)); 271 ltr(gsel_tss); 272 273 PCPU_SET(fsgs_gdt, &gdt[myid * NGDT + GUFS_SEL].sd); 274 275 /* 276 * Set to a known state: 277 * Set by mpboot.s: CR0_PG, CR0_PE 278 * Set by cpu_setregs: CR0_NE, CR0_MP, CR0_TS, CR0_WP, CR0_AM 279 */ 280 cr0 = rcr0(); 281 cr0 &= ~(CR0_CD | CR0_NW | CR0_EM); 282 load_cr0(cr0); 283 CHECK_WRITE(0x38, 5); 284 285 /* signal our startup to the BSP. */ 286 mp_naps++; 287 CHECK_WRITE(0x39, 6); 288 289 /* Spin until the BSP releases the AP's. */ 290 while (atomic_load_acq_int(&aps_ready) == 0) 291 ia32_pause(); 292 293 /* BSP may have changed PTD while we were waiting */ 294 invltlb(); 295 for (addr = 0; addr < NKPT * NBPDR - 1; addr += PAGE_SIZE) 296 invlpg(addr); 297 298#if defined(I586_CPU) && !defined(NO_F00F_HACK) 299 lidt(&r_idt); 300#endif 301 302 init_secondary_tail(); 303} 304 305/* 306 * start each AP in our list 307 */ 308/* Lowest 1MB is already mapped: don't touch*/ 309#define TMPMAP_START 1 310static int 311start_all_aps(void) 312{ 313#ifndef PC98 314 u_char mpbiosreason; 315#endif 316 u_int32_t mpbioswarmvec; 317 int apic_id, cpu, i; 318 319 mtx_init(&ap_boot_mtx, "ap boot", NULL, MTX_SPIN); 320 321 /* install the AP 1st level boot code */ 322 install_ap_tramp(); 323 324 /* save the current value of the warm-start vector */ 325 mpbioswarmvec = *((u_int32_t *) WARMBOOT_OFF); 326#ifndef PC98 327 outb(CMOS_REG, BIOS_RESET); 328 mpbiosreason = inb(CMOS_DATA); 329#endif 330 331 /* set up temporary P==V mapping for AP boot */ 332 /* XXX this is a hack, we should boot the AP on its own stack/PTD */ 333 for (i = TMPMAP_START; i < NKPT; i++) 334 PTD[i] = PTD[KPTDI + i]; 335 invltlb(); 336 337 /* start each AP */ 338 for (cpu = 1; cpu < mp_ncpus; cpu++) { 339 apic_id = cpu_apic_ids[cpu]; 340 341 /* allocate and set up a boot stack data page */ 342 bootstacks[cpu] = 343 (char *)kmem_malloc(kernel_arena, kstack_pages * PAGE_SIZE, 344 M_WAITOK | M_ZERO); 345 dpcpu = (void *)kmem_malloc(kernel_arena, DPCPU_SIZE, 346 M_WAITOK | M_ZERO); 347 /* setup a vector to our boot code */ 348 *((volatile u_short *) WARMBOOT_OFF) = WARMBOOT_TARGET; 349 *((volatile u_short *) WARMBOOT_SEG) = (boot_address >> 4); 350#ifndef PC98 351 outb(CMOS_REG, BIOS_RESET); 352 outb(CMOS_DATA, BIOS_WARM); /* 'warm-start' */ 353#endif 354 355 bootSTK = (char *)bootstacks[cpu] + kstack_pages * 356 PAGE_SIZE - 4; 357 bootAP = cpu; 358 359 /* attempt to start the Application Processor */ 360 CHECK_INIT(99); /* setup checkpoints */ 361 if (!start_ap(apic_id)) { 362 printf("AP #%d (PHY# %d) failed!\n", cpu, apic_id); 363 CHECK_PRINT("trace"); /* show checkpoints */ 364 /* better panic as the AP may be running loose */ 365 printf("panic y/n? [y] "); 366 if (cngetc() != 'n') 367 panic("bye-bye"); 368 } 369 CHECK_PRINT("trace"); /* show checkpoints */ 370 371 CPU_SET(cpu, &all_cpus); /* record AP in CPU map */ 372 } 373 374 /* restore the warmstart vector */ 375 *(u_int32_t *) WARMBOOT_OFF = mpbioswarmvec; 376 377#ifndef PC98 378 outb(CMOS_REG, BIOS_RESET); 379 outb(CMOS_DATA, mpbiosreason); 380#endif 381 382 /* Undo V==P hack from above */ 383 for (i = TMPMAP_START; i < NKPT; i++) 384 PTD[i] = 0; 385 pmap_invalidate_range(kernel_pmap, 0, NKPT * NBPDR - 1); 386 387 /* number of APs actually started */ 388 return mp_naps; 389} 390 391/* 392 * load the 1st level AP boot code into base memory. 393 */ 394 395/* targets for relocation */ 396extern void bigJump(void); 397extern void bootCodeSeg(void); 398extern void bootDataSeg(void); 399extern void MPentry(void); 400extern u_int MP_GDT; 401extern u_int mp_gdtbase; 402 403static void 404install_ap_tramp(void) 405{ 406 int x; 407 int size = *(int *) ((u_long) & bootMP_size); 408 vm_offset_t va = boot_address + KERNBASE; 409 u_char *src = (u_char *) ((u_long) bootMP); 410 u_char *dst = (u_char *) va; 411 u_int boot_base = (u_int) bootMP; 412 u_int8_t *dst8; 413 u_int16_t *dst16; 414 u_int32_t *dst32; 415 416 KASSERT (size <= PAGE_SIZE, 417 ("'size' do not fit into PAGE_SIZE, as expected.")); 418 pmap_kenter(va, boot_address); 419 pmap_invalidate_page (kernel_pmap, va); 420 for (x = 0; x < size; ++x) 421 *dst++ = *src++; 422 423 /* 424 * modify addresses in code we just moved to basemem. unfortunately we 425 * need fairly detailed info about mpboot.s for this to work. changes 426 * to mpboot.s might require changes here. 427 */ 428 429 /* boot code is located in KERNEL space */ 430 dst = (u_char *) va; 431 432 /* modify the lgdt arg */ 433 dst32 = (u_int32_t *) (dst + ((u_int) & mp_gdtbase - boot_base)); 434 *dst32 = boot_address + ((u_int) & MP_GDT - boot_base); 435 436 /* modify the ljmp target for MPentry() */ 437 dst32 = (u_int32_t *) (dst + ((u_int) bigJump - boot_base) + 1); 438 *dst32 = ((u_int) MPentry - KERNBASE); 439 440 /* modify the target for boot code segment */ 441 dst16 = (u_int16_t *) (dst + ((u_int) bootCodeSeg - boot_base)); 442 dst8 = (u_int8_t *) (dst16 + 1); 443 *dst16 = (u_int) boot_address & 0xffff; 444 *dst8 = ((u_int) boot_address >> 16) & 0xff; 445 446 /* modify the target for boot data segment */ 447 dst16 = (u_int16_t *) (dst + ((u_int) bootDataSeg - boot_base)); 448 dst8 = (u_int8_t *) (dst16 + 1); 449 *dst16 = (u_int) boot_address & 0xffff; 450 *dst8 = ((u_int) boot_address >> 16) & 0xff; 451} 452 453/* 454 * This function starts the AP (application processor) identified 455 * by the APIC ID 'physicalCpu'. It does quite a "song and dance" 456 * to accomplish this. This is necessary because of the nuances 457 * of the different hardware we might encounter. It isn't pretty, 458 * but it seems to work. 459 */ 460static int 461start_ap(int apic_id) 462{ 463 int vector, ms; 464 int cpus; 465 466 /* calculate the vector */ 467 vector = (boot_address >> 12) & 0xff; 468 469 /* used as a watchpoint to signal AP startup */ 470 cpus = mp_naps; 471 472 ipi_startup(apic_id, vector); 473 474 /* Wait up to 5 seconds for it to start. */ 475 for (ms = 0; ms < 5000; ms++) { 476 if (mp_naps > cpus) 477 return 1; /* return SUCCESS */ 478 DELAY(1000); 479 } 480 return 0; /* return FAILURE */ 481} 482