1/*-
2 * Copyright (c) 1996, by Steve Passe
3 * Copyright (c) 2003, by Peter Wemm
4 * All rights reserved.
5 *
6 * Redistribution and use in source and binary forms, with or without
7 * modification, are permitted provided that the following conditions
8 * are met:
9 * 1. Redistributions of source code must retain the above copyright
10 *    notice, this list of conditions and the following disclaimer.
11 * 2. The name of the developer may NOT be used to endorse or promote products
12 *    derived from this software without specific prior written permission.
13 *
14 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
15 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
16 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
17 * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
18 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
19 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
20 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
21 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
22 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
23 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
24 * SUCH DAMAGE.
25 */
26
27#include <sys/cdefs.h>
28__FBSDID("$FreeBSD: stable/11/sys/amd64/amd64/mp_machdep.c 347700 2019-05-16 14:42:16Z markj $");
29
30#include "opt_cpu.h"
31#include "opt_ddb.h"
32#include "opt_kstack_pages.h"
33#include "opt_sched.h"
34#include "opt_smp.h"
35
36#include <sys/param.h>
37#include <sys/systm.h>
38#include <sys/bus.h>
39#include <sys/cpuset.h>
40#ifdef GPROF
41#include <sys/gmon.h>
42#endif
43#include <sys/kernel.h>
44#include <sys/ktr.h>
45#include <sys/lock.h>
46#include <sys/malloc.h>
47#include <sys/memrange.h>
48#include <sys/mutex.h>
49#include <sys/pcpu.h>
50#include <sys/proc.h>
51#include <sys/sched.h>
52#include <sys/smp.h>
53#include <sys/sysctl.h>
54
55#include <vm/vm.h>
56#include <vm/vm_param.h>
57#include <vm/pmap.h>
58#include <vm/vm_kern.h>
59#include <vm/vm_extern.h>
60
61#include <x86/apicreg.h>
62#include <machine/clock.h>
63#include <machine/cputypes.h>
64#include <machine/cpufunc.h>
65#include <x86/mca.h>
66#include <machine/md_var.h>
67#include <machine/pcb.h>
68#include <machine/psl.h>
69#include <machine/smp.h>
70#include <machine/specialreg.h>
71#include <machine/tss.h>
72#include <x86/ucode.h>
73#include <machine/cpu.h>
74#include <x86/init.h>
75
76#define WARMBOOT_TARGET		0
77#define WARMBOOT_OFF		(KERNBASE + 0x0467)
78#define WARMBOOT_SEG		(KERNBASE + 0x0469)
79
80#define CMOS_REG		(0x70)
81#define CMOS_DATA		(0x71)
82#define BIOS_RESET		(0x0f)
83#define BIOS_WARM		(0x0a)
84
85extern	struct pcpu __pcpu[];
86
87/* Temporary variables for init_secondary()  */
88char *doublefault_stack;
89char *mce_stack;
90char *nmi_stack;
91char *dbg_stack;
92
93/*
94 * Local data and functions.
95 */
96
97static int	start_ap(int apic_id);
98
99static u_int	bootMP_size;
100static u_int	boot_address;
101
102/*
103 * Calculate usable address in base memory for AP trampoline code.
104 */
105u_int
106mp_bootaddress(u_int basemem)
107{
108
109	bootMP_size = mptramp_end - mptramp_start;
110	boot_address = trunc_page(basemem * 1024); /* round down to 4k boundary */
111	if (((basemem * 1024) - boot_address) < bootMP_size)
112		boot_address -= PAGE_SIZE;	/* not enough, lower by 4k */
113	/* 3 levels of page table pages */
114	mptramp_pagetables = boot_address - (PAGE_SIZE * 3);
115
116	return mptramp_pagetables;
117}
118
119/*
120 * Initialize the IPI handlers and start up the AP's.
121 */
122void
123cpu_mp_start(void)
124{
125	int i;
126
127	/* Initialize the logical ID to APIC ID table. */
128	for (i = 0; i < MAXCPU; i++) {
129		cpu_apic_ids[i] = -1;
130		cpu_ipi_pending[i] = 0;
131	}
132
133	/* Install an inter-CPU IPI for TLB invalidation */
134	if (pmap_pcid_enabled) {
135		if (invpcid_works) {
136			setidt(IPI_INVLTLB, pti ?
137			    IDTVEC(invltlb_invpcid_pti_pti) :
138			    IDTVEC(invltlb_invpcid_nopti), SDT_SYSIGT,
139			    SEL_KPL, 0);
140			setidt(IPI_INVLPG, pti ? IDTVEC(invlpg_invpcid_pti) :
141			    IDTVEC(invlpg_invpcid), SDT_SYSIGT, SEL_KPL, 0);
142			setidt(IPI_INVLRNG, pti ? IDTVEC(invlrng_invpcid_pti) :
143			    IDTVEC(invlrng_invpcid), SDT_SYSIGT, SEL_KPL, 0);
144		} else {
145			setidt(IPI_INVLTLB, pti ? IDTVEC(invltlb_pcid_pti) :
146			    IDTVEC(invltlb_pcid), SDT_SYSIGT, SEL_KPL, 0);
147			setidt(IPI_INVLPG, pti ? IDTVEC(invlpg_pcid_pti) :
148			    IDTVEC(invlpg_pcid), SDT_SYSIGT, SEL_KPL, 0);
149			setidt(IPI_INVLRNG, pti ? IDTVEC(invlrng_pcid_pti) :
150			    IDTVEC(invlrng_pcid), SDT_SYSIGT, SEL_KPL, 0);
151		}
152	} else {
153		setidt(IPI_INVLTLB, pti ? IDTVEC(invltlb_pti) : IDTVEC(invltlb),
154		    SDT_SYSIGT, SEL_KPL, 0);
155		setidt(IPI_INVLPG, pti ? IDTVEC(invlpg_pti) : IDTVEC(invlpg),
156		    SDT_SYSIGT, SEL_KPL, 0);
157		setidt(IPI_INVLRNG, pti ? IDTVEC(invlrng_pti) : IDTVEC(invlrng),
158		    SDT_SYSIGT, SEL_KPL, 0);
159	}
160
161	/* Install an inter-CPU IPI for cache invalidation. */
162	setidt(IPI_INVLCACHE, pti ? IDTVEC(invlcache_pti) : IDTVEC(invlcache),
163	    SDT_SYSIGT, SEL_KPL, 0);
164
165	/* Install an inter-CPU IPI for all-CPU rendezvous */
166	setidt(IPI_RENDEZVOUS, pti ? IDTVEC(rendezvous_pti) :
167	    IDTVEC(rendezvous), SDT_SYSIGT, SEL_KPL, 0);
168
169	/* Install generic inter-CPU IPI handler */
170	setidt(IPI_BITMAP_VECTOR, pti ? IDTVEC(ipi_intr_bitmap_handler_pti) :
171	    IDTVEC(ipi_intr_bitmap_handler), SDT_SYSIGT, SEL_KPL, 0);
172
173	/* Install an inter-CPU IPI for CPU stop/restart */
174	setidt(IPI_STOP, pti ? IDTVEC(cpustop_pti) : IDTVEC(cpustop),
175	    SDT_SYSIGT, SEL_KPL, 0);
176
177	/* Install an inter-CPU IPI for CPU suspend/resume */
178	setidt(IPI_SUSPEND, pti ? IDTVEC(cpususpend_pti) : IDTVEC(cpususpend),
179	    SDT_SYSIGT, SEL_KPL, 0);
180
181	/* Set boot_cpu_id if needed. */
182	if (boot_cpu_id == -1) {
183		boot_cpu_id = PCPU_GET(apic_id);
184		cpu_info[boot_cpu_id].cpu_bsp = 1;
185	} else
186		KASSERT(boot_cpu_id == PCPU_GET(apic_id),
187		    ("BSP's APIC ID doesn't match boot_cpu_id"));
188
189	/* Probe logical/physical core configuration. */
190	topo_probe();
191
192	assign_cpu_ids();
193
194	/* Start each Application Processor */
195	init_ops.start_all_aps();
196
197	set_interrupt_apic_ids();
198}
199
200
201/*
202 * AP CPU's call this to initialize themselves.
203 */
204void
205init_secondary(void)
206{
207	struct pcpu *pc;
208	struct nmi_pcpu *np;
209	u_int64_t cr0;
210	int cpu, gsel_tss, x;
211	struct region_descriptor ap_gdt;
212
213	/* Set by the startup code for us to use */
214	cpu = bootAP;
215
216	/* Update microcode before doing anything else. */
217	ucode_load_ap(cpu);
218
219	/* Init tss */
220	common_tss[cpu] = common_tss[0];
221	common_tss[cpu].tss_iobase = sizeof(struct amd64tss) +
222	    IOPERM_BITMAP_SIZE;
223	common_tss[cpu].tss_ist1 = (long)&doublefault_stack[PAGE_SIZE];
224
225	/* The NMI stack runs on IST2. */
226	np = ((struct nmi_pcpu *) &nmi_stack[PAGE_SIZE]) - 1;
227	common_tss[cpu].tss_ist2 = (long) np;
228
229	/* The MC# stack runs on IST3. */
230	np = ((struct nmi_pcpu *) &mce_stack[PAGE_SIZE]) - 1;
231	common_tss[cpu].tss_ist3 = (long) np;
232
233	/* The DB# stack runs on IST4. */
234	np = ((struct nmi_pcpu *) &dbg_stack[PAGE_SIZE]) - 1;
235	common_tss[cpu].tss_ist4 = (long) np;
236
237	/* Prepare private GDT */
238	gdt_segs[GPROC0_SEL].ssd_base = (long) &common_tss[cpu];
239	for (x = 0; x < NGDT; x++) {
240		if (x != GPROC0_SEL && x != (GPROC0_SEL + 1) &&
241		    x != GUSERLDT_SEL && x != (GUSERLDT_SEL + 1))
242			ssdtosd(&gdt_segs[x], &gdt[NGDT * cpu + x]);
243	}
244	ssdtosyssd(&gdt_segs[GPROC0_SEL],
245	    (struct system_segment_descriptor *)&gdt[NGDT * cpu + GPROC0_SEL]);
246	ap_gdt.rd_limit = NGDT * sizeof(gdt[0]) - 1;
247	ap_gdt.rd_base =  (long) &gdt[NGDT * cpu];
248	lgdt(&ap_gdt);			/* does magic intra-segment return */
249
250	/* Get per-cpu data */
251	pc = &__pcpu[cpu];
252
253	/* prime data page for it to use */
254	pcpu_init(pc, cpu, sizeof(struct pcpu));
255	dpcpu_init(dpcpu, cpu);
256	pc->pc_apic_id = cpu_apic_ids[cpu];
257	pc->pc_prvspace = pc;
258	pc->pc_curthread = 0;
259	pc->pc_tssp = &common_tss[cpu];
260	pc->pc_commontssp = &common_tss[cpu];
261	pc->pc_rsp0 = 0;
262	pc->pc_pti_rsp0 = (((vm_offset_t)&pc->pc_pti_stack +
263	    PC_PTI_STACK_SZ * sizeof(uint64_t)) & ~0xful);
264	pc->pc_tss = (struct system_segment_descriptor *)&gdt[NGDT * cpu +
265	    GPROC0_SEL];
266	pc->pc_fs32p = &gdt[NGDT * cpu + GUFS32_SEL];
267	pc->pc_gs32p = &gdt[NGDT * cpu + GUGS32_SEL];
268	pc->pc_ldt = (struct system_segment_descriptor *)&gdt[NGDT * cpu +
269	    GUSERLDT_SEL];
270	/* See comment in pmap_bootstrap(). */
271	pc->pc_pcid_next = PMAP_PCID_KERN + 2;
272	pc->pc_pcid_gen = 1;
273	common_tss[cpu].tss_rsp0 = 0;
274
275	/* Save the per-cpu pointer for use by the NMI handler. */
276	np = ((struct nmi_pcpu *) &nmi_stack[PAGE_SIZE]) - 1;
277	np->np_pcpu = (register_t) pc;
278
279	/* Save the per-cpu pointer for use by the MC# handler. */
280	np = ((struct nmi_pcpu *) &mce_stack[PAGE_SIZE]) - 1;
281	np->np_pcpu = (register_t) pc;
282
283	/* Save the per-cpu pointer for use by the DB# handler. */
284	np = ((struct nmi_pcpu *) &dbg_stack[PAGE_SIZE]) - 1;
285	np->np_pcpu = (register_t) pc;
286
287	wrmsr(MSR_FSBASE, 0);		/* User value */
288	wrmsr(MSR_GSBASE, (u_int64_t)pc);
289	wrmsr(MSR_KGSBASE, (u_int64_t)pc);	/* XXX User value while we're in the kernel */
290	fix_cpuid();
291
292	lidt(&r_idt);
293
294	gsel_tss = GSEL(GPROC0_SEL, SEL_KPL);
295	ltr(gsel_tss);
296
297	/*
298	 * Set to a known state:
299	 * Set by mpboot.s: CR0_PG, CR0_PE
300	 * Set by cpu_setregs: CR0_NE, CR0_MP, CR0_TS, CR0_WP, CR0_AM
301	 */
302	cr0 = rcr0();
303	cr0 &= ~(CR0_CD | CR0_NW | CR0_EM);
304	load_cr0(cr0);
305
306	amd64_conf_fast_syscall();
307
308	/* signal our startup to the BSP. */
309	mp_naps++;
310
311	/* Spin until the BSP releases the AP's. */
312	while (atomic_load_acq_int(&aps_ready) == 0)
313		ia32_pause();
314
315	init_secondary_tail();
316}
317
318/*******************************************************************
319 * local functions and data
320 */
321
322/*
323 * start each AP in our list
324 */
325int
326native_start_all_aps(void)
327{
328	vm_offset_t va = boot_address + KERNBASE;
329	u_int64_t *pt4, *pt3, *pt2;
330	u_int32_t mpbioswarmvec;
331	int apic_id, cpu, i;
332	u_char mpbiosreason;
333
334	mtx_init(&ap_boot_mtx, "ap boot", NULL, MTX_SPIN);
335
336	/* install the AP 1st level boot code */
337	pmap_kenter(va, boot_address);
338	pmap_invalidate_page(kernel_pmap, va);
339	bcopy(mptramp_start, (void *)va, bootMP_size);
340
341	/* Locate the page tables, they'll be below the trampoline */
342	pt4 = (u_int64_t *)(uintptr_t)(mptramp_pagetables + KERNBASE);
343	pt3 = pt4 + (PAGE_SIZE) / sizeof(u_int64_t);
344	pt2 = pt3 + (PAGE_SIZE) / sizeof(u_int64_t);
345
346	/* Create the initial 1GB replicated page tables */
347	for (i = 0; i < 512; i++) {
348		/* Each slot of the level 4 pages points to the same level 3 page */
349		pt4[i] = (u_int64_t)(uintptr_t)(mptramp_pagetables + PAGE_SIZE);
350		pt4[i] |= PG_V | PG_RW | PG_U;
351
352		/* Each slot of the level 3 pages points to the same level 2 page */
353		pt3[i] = (u_int64_t)(uintptr_t)(mptramp_pagetables + (2 * PAGE_SIZE));
354		pt3[i] |= PG_V | PG_RW | PG_U;
355
356		/* The level 2 page slots are mapped with 2MB pages for 1GB. */
357		pt2[i] = i * (2 * 1024 * 1024);
358		pt2[i] |= PG_V | PG_RW | PG_PS | PG_U;
359	}
360
361	/* save the current value of the warm-start vector */
362	mpbioswarmvec = *((u_int32_t *) WARMBOOT_OFF);
363	outb(CMOS_REG, BIOS_RESET);
364	mpbiosreason = inb(CMOS_DATA);
365
366	/* setup a vector to our boot code */
367	*((volatile u_short *) WARMBOOT_OFF) = WARMBOOT_TARGET;
368	*((volatile u_short *) WARMBOOT_SEG) = (boot_address >> 4);
369	outb(CMOS_REG, BIOS_RESET);
370	outb(CMOS_DATA, BIOS_WARM);	/* 'warm-start' */
371
372	/* start each AP */
373	for (cpu = 1; cpu < mp_ncpus; cpu++) {
374		apic_id = cpu_apic_ids[cpu];
375
376		/* allocate and set up an idle stack data page */
377		bootstacks[cpu] = (void *)kmem_malloc(kernel_arena,
378		    kstack_pages * PAGE_SIZE, M_WAITOK | M_ZERO);
379		doublefault_stack = (char *)kmem_malloc(kernel_arena,
380		    PAGE_SIZE, M_WAITOK | M_ZERO);
381		mce_stack = (char *)kmem_malloc(kernel_arena, PAGE_SIZE,
382		    M_WAITOK | M_ZERO);
383		nmi_stack = (char *)kmem_malloc(kernel_arena, PAGE_SIZE,
384		    M_WAITOK | M_ZERO);
385		dbg_stack = (char *)kmem_malloc(kernel_arena, PAGE_SIZE,
386		    M_WAITOK | M_ZERO);
387		dpcpu = (void *)kmem_malloc(kernel_arena, DPCPU_SIZE,
388		    M_WAITOK | M_ZERO);
389
390		bootSTK = (char *)bootstacks[cpu] + kstack_pages * PAGE_SIZE - 8;
391		bootAP = cpu;
392
393		/* attempt to start the Application Processor */
394		if (!start_ap(apic_id)) {
395			/* restore the warmstart vector */
396			*(u_int32_t *) WARMBOOT_OFF = mpbioswarmvec;
397			panic("AP #%d (PHY# %d) failed!", cpu, apic_id);
398		}
399
400		CPU_SET(cpu, &all_cpus);	/* record AP in CPU map */
401	}
402
403	/* restore the warmstart vector */
404	*(u_int32_t *) WARMBOOT_OFF = mpbioswarmvec;
405
406	outb(CMOS_REG, BIOS_RESET);
407	outb(CMOS_DATA, mpbiosreason);
408
409	/* number of APs actually started */
410	return mp_naps;
411}
412
413
414/*
415 * This function starts the AP (application processor) identified
416 * by the APIC ID 'physicalCpu'.  It does quite a "song and dance"
417 * to accomplish this.  This is necessary because of the nuances
418 * of the different hardware we might encounter.  It isn't pretty,
419 * but it seems to work.
420 */
421static int
422start_ap(int apic_id)
423{
424	int vector, ms;
425	int cpus;
426
427	/* calculate the vector */
428	vector = (boot_address >> 12) & 0xff;
429
430	/* used as a watchpoint to signal AP startup */
431	cpus = mp_naps;
432
433	ipi_startup(apic_id, vector);
434
435	/* Wait up to 5 seconds for it to start. */
436	for (ms = 0; ms < 5000; ms++) {
437		if (mp_naps > cpus)
438			return 1;	/* return SUCCESS */
439		DELAY(1000);
440	}
441	return 0;		/* return FAILURE */
442}
443
444void
445invltlb_invpcid_handler(void)
446{
447	struct invpcid_descr d;
448	uint32_t generation;
449
450#ifdef COUNT_XINVLTLB_HITS
451	xhits_gbl[PCPU_GET(cpuid)]++;
452#endif /* COUNT_XINVLTLB_HITS */
453#ifdef COUNT_IPIS
454	(*ipi_invltlb_counts[PCPU_GET(cpuid)])++;
455#endif /* COUNT_IPIS */
456
457	generation = smp_tlb_generation;
458	d.pcid = smp_tlb_pmap->pm_pcids[PCPU_GET(cpuid)].pm_pcid;
459	d.pad = 0;
460	d.addr = 0;
461	invpcid(&d, smp_tlb_pmap == kernel_pmap ? INVPCID_CTXGLOB :
462	    INVPCID_CTX);
463	PCPU_SET(smp_tlb_done, generation);
464}
465
466void
467invltlb_invpcid_pti_handler(void)
468{
469	struct invpcid_descr d;
470	uint32_t generation;
471
472#ifdef COUNT_XINVLTLB_HITS
473	xhits_gbl[PCPU_GET(cpuid)]++;
474#endif /* COUNT_XINVLTLB_HITS */
475#ifdef COUNT_IPIS
476	(*ipi_invltlb_counts[PCPU_GET(cpuid)])++;
477#endif /* COUNT_IPIS */
478
479	generation = smp_tlb_generation;
480	d.pcid = smp_tlb_pmap->pm_pcids[PCPU_GET(cpuid)].pm_pcid;
481	d.pad = 0;
482	d.addr = 0;
483	if (smp_tlb_pmap == kernel_pmap) {
484		/*
485		 * This invalidation actually needs to clear kernel
486		 * mappings from the TLB in the current pmap, but
487		 * since we were asked for the flush in the kernel
488		 * pmap, achieve it by performing global flush.
489		 */
490		invpcid(&d, INVPCID_CTXGLOB);
491	} else {
492		invpcid(&d, INVPCID_CTX);
493		d.pcid |= PMAP_PCID_USER_PT;
494		invpcid(&d, INVPCID_CTX);
495	}
496	PCPU_SET(smp_tlb_done, generation);
497}
498
499void
500invltlb_pcid_handler(void)
501{
502	uint64_t kcr3, ucr3;
503	uint32_t generation, pcid;
504
505#ifdef COUNT_XINVLTLB_HITS
506	xhits_gbl[PCPU_GET(cpuid)]++;
507#endif /* COUNT_XINVLTLB_HITS */
508#ifdef COUNT_IPIS
509	(*ipi_invltlb_counts[PCPU_GET(cpuid)])++;
510#endif /* COUNT_IPIS */
511
512	generation = smp_tlb_generation;	/* Overlap with serialization */
513	if (smp_tlb_pmap == kernel_pmap) {
514		invltlb_glob();
515	} else {
516		/*
517		 * The current pmap might not be equal to
518		 * smp_tlb_pmap.  The clearing of the pm_gen in
519		 * pmap_invalidate_all() takes care of TLB
520		 * invalidation when switching to the pmap on this
521		 * CPU.
522		 */
523		if (PCPU_GET(curpmap) == smp_tlb_pmap) {
524			pcid = smp_tlb_pmap->pm_pcids[PCPU_GET(cpuid)].pm_pcid;
525			kcr3 = smp_tlb_pmap->pm_cr3 | pcid;
526			ucr3 = smp_tlb_pmap->pm_ucr3;
527			if (ucr3 != PMAP_NO_CR3) {
528				ucr3 |= PMAP_PCID_USER_PT | pcid;
529				pmap_pti_pcid_invalidate(ucr3, kcr3);
530			} else
531				load_cr3(kcr3);
532		}
533	}
534	PCPU_SET(smp_tlb_done, generation);
535}
536
537void
538invlpg_invpcid_handler(void)
539{
540	struct invpcid_descr d;
541	uint32_t generation;
542
543#ifdef COUNT_XINVLTLB_HITS
544	xhits_pg[PCPU_GET(cpuid)]++;
545#endif /* COUNT_XINVLTLB_HITS */
546#ifdef COUNT_IPIS
547	(*ipi_invlpg_counts[PCPU_GET(cpuid)])++;
548#endif /* COUNT_IPIS */
549
550	generation = smp_tlb_generation;	/* Overlap with serialization */
551	invlpg(smp_tlb_addr1);
552	if (smp_tlb_pmap->pm_ucr3 != PMAP_NO_CR3) {
553		d.pcid = smp_tlb_pmap->pm_pcids[PCPU_GET(cpuid)].pm_pcid |
554		    PMAP_PCID_USER_PT;
555		d.pad = 0;
556		d.addr = smp_tlb_addr1;
557		invpcid(&d, INVPCID_ADDR);
558	}
559	PCPU_SET(smp_tlb_done, generation);
560}
561
562void
563invlpg_pcid_handler(void)
564{
565	uint64_t kcr3, ucr3;
566	uint32_t generation;
567	uint32_t pcid;
568
569#ifdef COUNT_XINVLTLB_HITS
570	xhits_pg[PCPU_GET(cpuid)]++;
571#endif /* COUNT_XINVLTLB_HITS */
572#ifdef COUNT_IPIS
573	(*ipi_invlpg_counts[PCPU_GET(cpuid)])++;
574#endif /* COUNT_IPIS */
575
576	generation = smp_tlb_generation;	/* Overlap with serialization */
577	invlpg(smp_tlb_addr1);
578	if (smp_tlb_pmap == PCPU_GET(curpmap) &&
579	    (ucr3 = smp_tlb_pmap->pm_ucr3) != PMAP_NO_CR3) {
580		pcid = smp_tlb_pmap->pm_pcids[PCPU_GET(cpuid)].pm_pcid;
581		kcr3 = smp_tlb_pmap->pm_cr3 | pcid | CR3_PCID_SAVE;
582		ucr3 |= pcid | PMAP_PCID_USER_PT | CR3_PCID_SAVE;
583		pmap_pti_pcid_invlpg(ucr3, kcr3, smp_tlb_addr1);
584	}
585	PCPU_SET(smp_tlb_done, generation);
586}
587
588void
589invlrng_invpcid_handler(void)
590{
591	struct invpcid_descr d;
592	vm_offset_t addr, addr2;
593	uint32_t generation;
594
595#ifdef COUNT_XINVLTLB_HITS
596	xhits_rng[PCPU_GET(cpuid)]++;
597#endif /* COUNT_XINVLTLB_HITS */
598#ifdef COUNT_IPIS
599	(*ipi_invlrng_counts[PCPU_GET(cpuid)])++;
600#endif /* COUNT_IPIS */
601
602	addr = smp_tlb_addr1;
603	addr2 = smp_tlb_addr2;
604	generation = smp_tlb_generation;	/* Overlap with serialization */
605	do {
606		invlpg(addr);
607		addr += PAGE_SIZE;
608	} while (addr < addr2);
609	if (smp_tlb_pmap->pm_ucr3 != PMAP_NO_CR3) {
610		d.pcid = smp_tlb_pmap->pm_pcids[PCPU_GET(cpuid)].pm_pcid |
611		    PMAP_PCID_USER_PT;
612		d.pad = 0;
613		d.addr = smp_tlb_addr1;
614		do {
615			invpcid(&d, INVPCID_ADDR);
616			d.addr += PAGE_SIZE;
617		} while (d.addr < addr2);
618	}
619	PCPU_SET(smp_tlb_done, generation);
620}
621
622void
623invlrng_pcid_handler(void)
624{
625	vm_offset_t addr, addr2;
626	uint64_t kcr3, ucr3;
627	uint32_t generation;
628	uint32_t pcid;
629
630#ifdef COUNT_XINVLTLB_HITS
631	xhits_rng[PCPU_GET(cpuid)]++;
632#endif /* COUNT_XINVLTLB_HITS */
633#ifdef COUNT_IPIS
634	(*ipi_invlrng_counts[PCPU_GET(cpuid)])++;
635#endif /* COUNT_IPIS */
636
637	addr = smp_tlb_addr1;
638	addr2 = smp_tlb_addr2;
639	generation = smp_tlb_generation;	/* Overlap with serialization */
640	do {
641		invlpg(addr);
642		addr += PAGE_SIZE;
643	} while (addr < addr2);
644	if (smp_tlb_pmap == PCPU_GET(curpmap) &&
645	    (ucr3 = smp_tlb_pmap->pm_ucr3) != PMAP_NO_CR3) {
646		pcid = smp_tlb_pmap->pm_pcids[PCPU_GET(cpuid)].pm_pcid;
647		kcr3 = smp_tlb_pmap->pm_cr3 | pcid | CR3_PCID_SAVE;
648		ucr3 |= pcid | PMAP_PCID_USER_PT | CR3_PCID_SAVE;
649		pmap_pti_pcid_invlrng(ucr3, kcr3, smp_tlb_addr1, addr2);
650	}
651	PCPU_SET(smp_tlb_done, generation);
652}
653