1/*- 2 * Copyright (c) 1996, by Steve Passe 3 * Copyright (c) 2003, by Peter Wemm 4 * All rights reserved. 5 * 6 * Redistribution and use in source and binary forms, with or without 7 * modification, are permitted provided that the following conditions 8 * are met: 9 * 1. Redistributions of source code must retain the above copyright 10 * notice, this list of conditions and the following disclaimer. 11 * 2. The name of the developer may NOT be used to endorse or promote products 12 * derived from this software without specific prior written permission. 13 * 14 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND 15 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 16 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 17 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE 18 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 19 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 20 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 21 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 22 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 23 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 24 * SUCH DAMAGE. 25 */ 26 27#include <sys/cdefs.h> 28__FBSDID("$FreeBSD: stable/11/sys/amd64/amd64/mp_machdep.c 347700 2019-05-16 14:42:16Z markj $"); 29 30#include "opt_cpu.h" 31#include "opt_ddb.h" 32#include "opt_kstack_pages.h" 33#include "opt_sched.h" 34#include "opt_smp.h" 35 36#include <sys/param.h> 37#include <sys/systm.h> 38#include <sys/bus.h> 39#include <sys/cpuset.h> 40#ifdef GPROF 41#include <sys/gmon.h> 42#endif 43#include <sys/kernel.h> 44#include <sys/ktr.h> 45#include <sys/lock.h> 46#include <sys/malloc.h> 47#include <sys/memrange.h> 48#include <sys/mutex.h> 49#include <sys/pcpu.h> 50#include <sys/proc.h> 51#include <sys/sched.h> 52#include <sys/smp.h> 53#include <sys/sysctl.h> 54 55#include <vm/vm.h> 56#include <vm/vm_param.h> 57#include <vm/pmap.h> 58#include <vm/vm_kern.h> 59#include <vm/vm_extern.h> 60 61#include <x86/apicreg.h> 62#include <machine/clock.h> 63#include <machine/cputypes.h> 64#include <machine/cpufunc.h> 65#include <x86/mca.h> 66#include <machine/md_var.h> 67#include <machine/pcb.h> 68#include <machine/psl.h> 69#include <machine/smp.h> 70#include <machine/specialreg.h> 71#include <machine/tss.h> 72#include <x86/ucode.h> 73#include <machine/cpu.h> 74#include <x86/init.h> 75 76#define WARMBOOT_TARGET 0 77#define WARMBOOT_OFF (KERNBASE + 0x0467) 78#define WARMBOOT_SEG (KERNBASE + 0x0469) 79 80#define CMOS_REG (0x70) 81#define CMOS_DATA (0x71) 82#define BIOS_RESET (0x0f) 83#define BIOS_WARM (0x0a) 84 85extern struct pcpu __pcpu[]; 86 87/* Temporary variables for init_secondary() */ 88char *doublefault_stack; 89char *mce_stack; 90char *nmi_stack; 91char *dbg_stack; 92 93/* 94 * Local data and functions. 95 */ 96 97static int start_ap(int apic_id); 98 99static u_int bootMP_size; 100static u_int boot_address; 101 102/* 103 * Calculate usable address in base memory for AP trampoline code. 104 */ 105u_int 106mp_bootaddress(u_int basemem) 107{ 108 109 bootMP_size = mptramp_end - mptramp_start; 110 boot_address = trunc_page(basemem * 1024); /* round down to 4k boundary */ 111 if (((basemem * 1024) - boot_address) < bootMP_size) 112 boot_address -= PAGE_SIZE; /* not enough, lower by 4k */ 113 /* 3 levels of page table pages */ 114 mptramp_pagetables = boot_address - (PAGE_SIZE * 3); 115 116 return mptramp_pagetables; 117} 118 119/* 120 * Initialize the IPI handlers and start up the AP's. 121 */ 122void 123cpu_mp_start(void) 124{ 125 int i; 126 127 /* Initialize the logical ID to APIC ID table. */ 128 for (i = 0; i < MAXCPU; i++) { 129 cpu_apic_ids[i] = -1; 130 cpu_ipi_pending[i] = 0; 131 } 132 133 /* Install an inter-CPU IPI for TLB invalidation */ 134 if (pmap_pcid_enabled) { 135 if (invpcid_works) { 136 setidt(IPI_INVLTLB, pti ? 137 IDTVEC(invltlb_invpcid_pti_pti) : 138 IDTVEC(invltlb_invpcid_nopti), SDT_SYSIGT, 139 SEL_KPL, 0); 140 setidt(IPI_INVLPG, pti ? IDTVEC(invlpg_invpcid_pti) : 141 IDTVEC(invlpg_invpcid), SDT_SYSIGT, SEL_KPL, 0); 142 setidt(IPI_INVLRNG, pti ? IDTVEC(invlrng_invpcid_pti) : 143 IDTVEC(invlrng_invpcid), SDT_SYSIGT, SEL_KPL, 0); 144 } else { 145 setidt(IPI_INVLTLB, pti ? IDTVEC(invltlb_pcid_pti) : 146 IDTVEC(invltlb_pcid), SDT_SYSIGT, SEL_KPL, 0); 147 setidt(IPI_INVLPG, pti ? IDTVEC(invlpg_pcid_pti) : 148 IDTVEC(invlpg_pcid), SDT_SYSIGT, SEL_KPL, 0); 149 setidt(IPI_INVLRNG, pti ? IDTVEC(invlrng_pcid_pti) : 150 IDTVEC(invlrng_pcid), SDT_SYSIGT, SEL_KPL, 0); 151 } 152 } else { 153 setidt(IPI_INVLTLB, pti ? IDTVEC(invltlb_pti) : IDTVEC(invltlb), 154 SDT_SYSIGT, SEL_KPL, 0); 155 setidt(IPI_INVLPG, pti ? IDTVEC(invlpg_pti) : IDTVEC(invlpg), 156 SDT_SYSIGT, SEL_KPL, 0); 157 setidt(IPI_INVLRNG, pti ? IDTVEC(invlrng_pti) : IDTVEC(invlrng), 158 SDT_SYSIGT, SEL_KPL, 0); 159 } 160 161 /* Install an inter-CPU IPI for cache invalidation. */ 162 setidt(IPI_INVLCACHE, pti ? IDTVEC(invlcache_pti) : IDTVEC(invlcache), 163 SDT_SYSIGT, SEL_KPL, 0); 164 165 /* Install an inter-CPU IPI for all-CPU rendezvous */ 166 setidt(IPI_RENDEZVOUS, pti ? IDTVEC(rendezvous_pti) : 167 IDTVEC(rendezvous), SDT_SYSIGT, SEL_KPL, 0); 168 169 /* Install generic inter-CPU IPI handler */ 170 setidt(IPI_BITMAP_VECTOR, pti ? IDTVEC(ipi_intr_bitmap_handler_pti) : 171 IDTVEC(ipi_intr_bitmap_handler), SDT_SYSIGT, SEL_KPL, 0); 172 173 /* Install an inter-CPU IPI for CPU stop/restart */ 174 setidt(IPI_STOP, pti ? IDTVEC(cpustop_pti) : IDTVEC(cpustop), 175 SDT_SYSIGT, SEL_KPL, 0); 176 177 /* Install an inter-CPU IPI for CPU suspend/resume */ 178 setidt(IPI_SUSPEND, pti ? IDTVEC(cpususpend_pti) : IDTVEC(cpususpend), 179 SDT_SYSIGT, SEL_KPL, 0); 180 181 /* Set boot_cpu_id if needed. */ 182 if (boot_cpu_id == -1) { 183 boot_cpu_id = PCPU_GET(apic_id); 184 cpu_info[boot_cpu_id].cpu_bsp = 1; 185 } else 186 KASSERT(boot_cpu_id == PCPU_GET(apic_id), 187 ("BSP's APIC ID doesn't match boot_cpu_id")); 188 189 /* Probe logical/physical core configuration. */ 190 topo_probe(); 191 192 assign_cpu_ids(); 193 194 /* Start each Application Processor */ 195 init_ops.start_all_aps(); 196 197 set_interrupt_apic_ids(); 198} 199 200 201/* 202 * AP CPU's call this to initialize themselves. 203 */ 204void 205init_secondary(void) 206{ 207 struct pcpu *pc; 208 struct nmi_pcpu *np; 209 u_int64_t cr0; 210 int cpu, gsel_tss, x; 211 struct region_descriptor ap_gdt; 212 213 /* Set by the startup code for us to use */ 214 cpu = bootAP; 215 216 /* Update microcode before doing anything else. */ 217 ucode_load_ap(cpu); 218 219 /* Init tss */ 220 common_tss[cpu] = common_tss[0]; 221 common_tss[cpu].tss_iobase = sizeof(struct amd64tss) + 222 IOPERM_BITMAP_SIZE; 223 common_tss[cpu].tss_ist1 = (long)&doublefault_stack[PAGE_SIZE]; 224 225 /* The NMI stack runs on IST2. */ 226 np = ((struct nmi_pcpu *) &nmi_stack[PAGE_SIZE]) - 1; 227 common_tss[cpu].tss_ist2 = (long) np; 228 229 /* The MC# stack runs on IST3. */ 230 np = ((struct nmi_pcpu *) &mce_stack[PAGE_SIZE]) - 1; 231 common_tss[cpu].tss_ist3 = (long) np; 232 233 /* The DB# stack runs on IST4. */ 234 np = ((struct nmi_pcpu *) &dbg_stack[PAGE_SIZE]) - 1; 235 common_tss[cpu].tss_ist4 = (long) np; 236 237 /* Prepare private GDT */ 238 gdt_segs[GPROC0_SEL].ssd_base = (long) &common_tss[cpu]; 239 for (x = 0; x < NGDT; x++) { 240 if (x != GPROC0_SEL && x != (GPROC0_SEL + 1) && 241 x != GUSERLDT_SEL && x != (GUSERLDT_SEL + 1)) 242 ssdtosd(&gdt_segs[x], &gdt[NGDT * cpu + x]); 243 } 244 ssdtosyssd(&gdt_segs[GPROC0_SEL], 245 (struct system_segment_descriptor *)&gdt[NGDT * cpu + GPROC0_SEL]); 246 ap_gdt.rd_limit = NGDT * sizeof(gdt[0]) - 1; 247 ap_gdt.rd_base = (long) &gdt[NGDT * cpu]; 248 lgdt(&ap_gdt); /* does magic intra-segment return */ 249 250 /* Get per-cpu data */ 251 pc = &__pcpu[cpu]; 252 253 /* prime data page for it to use */ 254 pcpu_init(pc, cpu, sizeof(struct pcpu)); 255 dpcpu_init(dpcpu, cpu); 256 pc->pc_apic_id = cpu_apic_ids[cpu]; 257 pc->pc_prvspace = pc; 258 pc->pc_curthread = 0; 259 pc->pc_tssp = &common_tss[cpu]; 260 pc->pc_commontssp = &common_tss[cpu]; 261 pc->pc_rsp0 = 0; 262 pc->pc_pti_rsp0 = (((vm_offset_t)&pc->pc_pti_stack + 263 PC_PTI_STACK_SZ * sizeof(uint64_t)) & ~0xful); 264 pc->pc_tss = (struct system_segment_descriptor *)&gdt[NGDT * cpu + 265 GPROC0_SEL]; 266 pc->pc_fs32p = &gdt[NGDT * cpu + GUFS32_SEL]; 267 pc->pc_gs32p = &gdt[NGDT * cpu + GUGS32_SEL]; 268 pc->pc_ldt = (struct system_segment_descriptor *)&gdt[NGDT * cpu + 269 GUSERLDT_SEL]; 270 /* See comment in pmap_bootstrap(). */ 271 pc->pc_pcid_next = PMAP_PCID_KERN + 2; 272 pc->pc_pcid_gen = 1; 273 common_tss[cpu].tss_rsp0 = 0; 274 275 /* Save the per-cpu pointer for use by the NMI handler. */ 276 np = ((struct nmi_pcpu *) &nmi_stack[PAGE_SIZE]) - 1; 277 np->np_pcpu = (register_t) pc; 278 279 /* Save the per-cpu pointer for use by the MC# handler. */ 280 np = ((struct nmi_pcpu *) &mce_stack[PAGE_SIZE]) - 1; 281 np->np_pcpu = (register_t) pc; 282 283 /* Save the per-cpu pointer for use by the DB# handler. */ 284 np = ((struct nmi_pcpu *) &dbg_stack[PAGE_SIZE]) - 1; 285 np->np_pcpu = (register_t) pc; 286 287 wrmsr(MSR_FSBASE, 0); /* User value */ 288 wrmsr(MSR_GSBASE, (u_int64_t)pc); 289 wrmsr(MSR_KGSBASE, (u_int64_t)pc); /* XXX User value while we're in the kernel */ 290 fix_cpuid(); 291 292 lidt(&r_idt); 293 294 gsel_tss = GSEL(GPROC0_SEL, SEL_KPL); 295 ltr(gsel_tss); 296 297 /* 298 * Set to a known state: 299 * Set by mpboot.s: CR0_PG, CR0_PE 300 * Set by cpu_setregs: CR0_NE, CR0_MP, CR0_TS, CR0_WP, CR0_AM 301 */ 302 cr0 = rcr0(); 303 cr0 &= ~(CR0_CD | CR0_NW | CR0_EM); 304 load_cr0(cr0); 305 306 amd64_conf_fast_syscall(); 307 308 /* signal our startup to the BSP. */ 309 mp_naps++; 310 311 /* Spin until the BSP releases the AP's. */ 312 while (atomic_load_acq_int(&aps_ready) == 0) 313 ia32_pause(); 314 315 init_secondary_tail(); 316} 317 318/******************************************************************* 319 * local functions and data 320 */ 321 322/* 323 * start each AP in our list 324 */ 325int 326native_start_all_aps(void) 327{ 328 vm_offset_t va = boot_address + KERNBASE; 329 u_int64_t *pt4, *pt3, *pt2; 330 u_int32_t mpbioswarmvec; 331 int apic_id, cpu, i; 332 u_char mpbiosreason; 333 334 mtx_init(&ap_boot_mtx, "ap boot", NULL, MTX_SPIN); 335 336 /* install the AP 1st level boot code */ 337 pmap_kenter(va, boot_address); 338 pmap_invalidate_page(kernel_pmap, va); 339 bcopy(mptramp_start, (void *)va, bootMP_size); 340 341 /* Locate the page tables, they'll be below the trampoline */ 342 pt4 = (u_int64_t *)(uintptr_t)(mptramp_pagetables + KERNBASE); 343 pt3 = pt4 + (PAGE_SIZE) / sizeof(u_int64_t); 344 pt2 = pt3 + (PAGE_SIZE) / sizeof(u_int64_t); 345 346 /* Create the initial 1GB replicated page tables */ 347 for (i = 0; i < 512; i++) { 348 /* Each slot of the level 4 pages points to the same level 3 page */ 349 pt4[i] = (u_int64_t)(uintptr_t)(mptramp_pagetables + PAGE_SIZE); 350 pt4[i] |= PG_V | PG_RW | PG_U; 351 352 /* Each slot of the level 3 pages points to the same level 2 page */ 353 pt3[i] = (u_int64_t)(uintptr_t)(mptramp_pagetables + (2 * PAGE_SIZE)); 354 pt3[i] |= PG_V | PG_RW | PG_U; 355 356 /* The level 2 page slots are mapped with 2MB pages for 1GB. */ 357 pt2[i] = i * (2 * 1024 * 1024); 358 pt2[i] |= PG_V | PG_RW | PG_PS | PG_U; 359 } 360 361 /* save the current value of the warm-start vector */ 362 mpbioswarmvec = *((u_int32_t *) WARMBOOT_OFF); 363 outb(CMOS_REG, BIOS_RESET); 364 mpbiosreason = inb(CMOS_DATA); 365 366 /* setup a vector to our boot code */ 367 *((volatile u_short *) WARMBOOT_OFF) = WARMBOOT_TARGET; 368 *((volatile u_short *) WARMBOOT_SEG) = (boot_address >> 4); 369 outb(CMOS_REG, BIOS_RESET); 370 outb(CMOS_DATA, BIOS_WARM); /* 'warm-start' */ 371 372 /* start each AP */ 373 for (cpu = 1; cpu < mp_ncpus; cpu++) { 374 apic_id = cpu_apic_ids[cpu]; 375 376 /* allocate and set up an idle stack data page */ 377 bootstacks[cpu] = (void *)kmem_malloc(kernel_arena, 378 kstack_pages * PAGE_SIZE, M_WAITOK | M_ZERO); 379 doublefault_stack = (char *)kmem_malloc(kernel_arena, 380 PAGE_SIZE, M_WAITOK | M_ZERO); 381 mce_stack = (char *)kmem_malloc(kernel_arena, PAGE_SIZE, 382 M_WAITOK | M_ZERO); 383 nmi_stack = (char *)kmem_malloc(kernel_arena, PAGE_SIZE, 384 M_WAITOK | M_ZERO); 385 dbg_stack = (char *)kmem_malloc(kernel_arena, PAGE_SIZE, 386 M_WAITOK | M_ZERO); 387 dpcpu = (void *)kmem_malloc(kernel_arena, DPCPU_SIZE, 388 M_WAITOK | M_ZERO); 389 390 bootSTK = (char *)bootstacks[cpu] + kstack_pages * PAGE_SIZE - 8; 391 bootAP = cpu; 392 393 /* attempt to start the Application Processor */ 394 if (!start_ap(apic_id)) { 395 /* restore the warmstart vector */ 396 *(u_int32_t *) WARMBOOT_OFF = mpbioswarmvec; 397 panic("AP #%d (PHY# %d) failed!", cpu, apic_id); 398 } 399 400 CPU_SET(cpu, &all_cpus); /* record AP in CPU map */ 401 } 402 403 /* restore the warmstart vector */ 404 *(u_int32_t *) WARMBOOT_OFF = mpbioswarmvec; 405 406 outb(CMOS_REG, BIOS_RESET); 407 outb(CMOS_DATA, mpbiosreason); 408 409 /* number of APs actually started */ 410 return mp_naps; 411} 412 413 414/* 415 * This function starts the AP (application processor) identified 416 * by the APIC ID 'physicalCpu'. It does quite a "song and dance" 417 * to accomplish this. This is necessary because of the nuances 418 * of the different hardware we might encounter. It isn't pretty, 419 * but it seems to work. 420 */ 421static int 422start_ap(int apic_id) 423{ 424 int vector, ms; 425 int cpus; 426 427 /* calculate the vector */ 428 vector = (boot_address >> 12) & 0xff; 429 430 /* used as a watchpoint to signal AP startup */ 431 cpus = mp_naps; 432 433 ipi_startup(apic_id, vector); 434 435 /* Wait up to 5 seconds for it to start. */ 436 for (ms = 0; ms < 5000; ms++) { 437 if (mp_naps > cpus) 438 return 1; /* return SUCCESS */ 439 DELAY(1000); 440 } 441 return 0; /* return FAILURE */ 442} 443 444void 445invltlb_invpcid_handler(void) 446{ 447 struct invpcid_descr d; 448 uint32_t generation; 449 450#ifdef COUNT_XINVLTLB_HITS 451 xhits_gbl[PCPU_GET(cpuid)]++; 452#endif /* COUNT_XINVLTLB_HITS */ 453#ifdef COUNT_IPIS 454 (*ipi_invltlb_counts[PCPU_GET(cpuid)])++; 455#endif /* COUNT_IPIS */ 456 457 generation = smp_tlb_generation; 458 d.pcid = smp_tlb_pmap->pm_pcids[PCPU_GET(cpuid)].pm_pcid; 459 d.pad = 0; 460 d.addr = 0; 461 invpcid(&d, smp_tlb_pmap == kernel_pmap ? INVPCID_CTXGLOB : 462 INVPCID_CTX); 463 PCPU_SET(smp_tlb_done, generation); 464} 465 466void 467invltlb_invpcid_pti_handler(void) 468{ 469 struct invpcid_descr d; 470 uint32_t generation; 471 472#ifdef COUNT_XINVLTLB_HITS 473 xhits_gbl[PCPU_GET(cpuid)]++; 474#endif /* COUNT_XINVLTLB_HITS */ 475#ifdef COUNT_IPIS 476 (*ipi_invltlb_counts[PCPU_GET(cpuid)])++; 477#endif /* COUNT_IPIS */ 478 479 generation = smp_tlb_generation; 480 d.pcid = smp_tlb_pmap->pm_pcids[PCPU_GET(cpuid)].pm_pcid; 481 d.pad = 0; 482 d.addr = 0; 483 if (smp_tlb_pmap == kernel_pmap) { 484 /* 485 * This invalidation actually needs to clear kernel 486 * mappings from the TLB in the current pmap, but 487 * since we were asked for the flush in the kernel 488 * pmap, achieve it by performing global flush. 489 */ 490 invpcid(&d, INVPCID_CTXGLOB); 491 } else { 492 invpcid(&d, INVPCID_CTX); 493 d.pcid |= PMAP_PCID_USER_PT; 494 invpcid(&d, INVPCID_CTX); 495 } 496 PCPU_SET(smp_tlb_done, generation); 497} 498 499void 500invltlb_pcid_handler(void) 501{ 502 uint64_t kcr3, ucr3; 503 uint32_t generation, pcid; 504 505#ifdef COUNT_XINVLTLB_HITS 506 xhits_gbl[PCPU_GET(cpuid)]++; 507#endif /* COUNT_XINVLTLB_HITS */ 508#ifdef COUNT_IPIS 509 (*ipi_invltlb_counts[PCPU_GET(cpuid)])++; 510#endif /* COUNT_IPIS */ 511 512 generation = smp_tlb_generation; /* Overlap with serialization */ 513 if (smp_tlb_pmap == kernel_pmap) { 514 invltlb_glob(); 515 } else { 516 /* 517 * The current pmap might not be equal to 518 * smp_tlb_pmap. The clearing of the pm_gen in 519 * pmap_invalidate_all() takes care of TLB 520 * invalidation when switching to the pmap on this 521 * CPU. 522 */ 523 if (PCPU_GET(curpmap) == smp_tlb_pmap) { 524 pcid = smp_tlb_pmap->pm_pcids[PCPU_GET(cpuid)].pm_pcid; 525 kcr3 = smp_tlb_pmap->pm_cr3 | pcid; 526 ucr3 = smp_tlb_pmap->pm_ucr3; 527 if (ucr3 != PMAP_NO_CR3) { 528 ucr3 |= PMAP_PCID_USER_PT | pcid; 529 pmap_pti_pcid_invalidate(ucr3, kcr3); 530 } else 531 load_cr3(kcr3); 532 } 533 } 534 PCPU_SET(smp_tlb_done, generation); 535} 536 537void 538invlpg_invpcid_handler(void) 539{ 540 struct invpcid_descr d; 541 uint32_t generation; 542 543#ifdef COUNT_XINVLTLB_HITS 544 xhits_pg[PCPU_GET(cpuid)]++; 545#endif /* COUNT_XINVLTLB_HITS */ 546#ifdef COUNT_IPIS 547 (*ipi_invlpg_counts[PCPU_GET(cpuid)])++; 548#endif /* COUNT_IPIS */ 549 550 generation = smp_tlb_generation; /* Overlap with serialization */ 551 invlpg(smp_tlb_addr1); 552 if (smp_tlb_pmap->pm_ucr3 != PMAP_NO_CR3) { 553 d.pcid = smp_tlb_pmap->pm_pcids[PCPU_GET(cpuid)].pm_pcid | 554 PMAP_PCID_USER_PT; 555 d.pad = 0; 556 d.addr = smp_tlb_addr1; 557 invpcid(&d, INVPCID_ADDR); 558 } 559 PCPU_SET(smp_tlb_done, generation); 560} 561 562void 563invlpg_pcid_handler(void) 564{ 565 uint64_t kcr3, ucr3; 566 uint32_t generation; 567 uint32_t pcid; 568 569#ifdef COUNT_XINVLTLB_HITS 570 xhits_pg[PCPU_GET(cpuid)]++; 571#endif /* COUNT_XINVLTLB_HITS */ 572#ifdef COUNT_IPIS 573 (*ipi_invlpg_counts[PCPU_GET(cpuid)])++; 574#endif /* COUNT_IPIS */ 575 576 generation = smp_tlb_generation; /* Overlap with serialization */ 577 invlpg(smp_tlb_addr1); 578 if (smp_tlb_pmap == PCPU_GET(curpmap) && 579 (ucr3 = smp_tlb_pmap->pm_ucr3) != PMAP_NO_CR3) { 580 pcid = smp_tlb_pmap->pm_pcids[PCPU_GET(cpuid)].pm_pcid; 581 kcr3 = smp_tlb_pmap->pm_cr3 | pcid | CR3_PCID_SAVE; 582 ucr3 |= pcid | PMAP_PCID_USER_PT | CR3_PCID_SAVE; 583 pmap_pti_pcid_invlpg(ucr3, kcr3, smp_tlb_addr1); 584 } 585 PCPU_SET(smp_tlb_done, generation); 586} 587 588void 589invlrng_invpcid_handler(void) 590{ 591 struct invpcid_descr d; 592 vm_offset_t addr, addr2; 593 uint32_t generation; 594 595#ifdef COUNT_XINVLTLB_HITS 596 xhits_rng[PCPU_GET(cpuid)]++; 597#endif /* COUNT_XINVLTLB_HITS */ 598#ifdef COUNT_IPIS 599 (*ipi_invlrng_counts[PCPU_GET(cpuid)])++; 600#endif /* COUNT_IPIS */ 601 602 addr = smp_tlb_addr1; 603 addr2 = smp_tlb_addr2; 604 generation = smp_tlb_generation; /* Overlap with serialization */ 605 do { 606 invlpg(addr); 607 addr += PAGE_SIZE; 608 } while (addr < addr2); 609 if (smp_tlb_pmap->pm_ucr3 != PMAP_NO_CR3) { 610 d.pcid = smp_tlb_pmap->pm_pcids[PCPU_GET(cpuid)].pm_pcid | 611 PMAP_PCID_USER_PT; 612 d.pad = 0; 613 d.addr = smp_tlb_addr1; 614 do { 615 invpcid(&d, INVPCID_ADDR); 616 d.addr += PAGE_SIZE; 617 } while (d.addr < addr2); 618 } 619 PCPU_SET(smp_tlb_done, generation); 620} 621 622void 623invlrng_pcid_handler(void) 624{ 625 vm_offset_t addr, addr2; 626 uint64_t kcr3, ucr3; 627 uint32_t generation; 628 uint32_t pcid; 629 630#ifdef COUNT_XINVLTLB_HITS 631 xhits_rng[PCPU_GET(cpuid)]++; 632#endif /* COUNT_XINVLTLB_HITS */ 633#ifdef COUNT_IPIS 634 (*ipi_invlrng_counts[PCPU_GET(cpuid)])++; 635#endif /* COUNT_IPIS */ 636 637 addr = smp_tlb_addr1; 638 addr2 = smp_tlb_addr2; 639 generation = smp_tlb_generation; /* Overlap with serialization */ 640 do { 641 invlpg(addr); 642 addr += PAGE_SIZE; 643 } while (addr < addr2); 644 if (smp_tlb_pmap == PCPU_GET(curpmap) && 645 (ucr3 = smp_tlb_pmap->pm_ucr3) != PMAP_NO_CR3) { 646 pcid = smp_tlb_pmap->pm_pcids[PCPU_GET(cpuid)].pm_pcid; 647 kcr3 = smp_tlb_pmap->pm_cr3 | pcid | CR3_PCID_SAVE; 648 ucr3 |= pcid | PMAP_PCID_USER_PT | CR3_PCID_SAVE; 649 pmap_pti_pcid_invlrng(ucr3, kcr3, smp_tlb_addr1, addr2); 650 } 651 PCPU_SET(smp_tlb_done, generation); 652} 653