mp_x86.c revision 123126
10SN/A/*-
213953Sxuelei * Copyright (c) 1996, by Steve Passe
30SN/A * All rights reserved.
40SN/A *
50SN/A * Redistribution and use in source and binary forms, with or without
60SN/A * modification, are permitted provided that the following conditions
72362SN/A * are met:
80SN/A * 1. Redistributions of source code must retain the above copyright
92362SN/A *    notice, this list of conditions and the following disclaimer.
100SN/A * 2. The name of the developer may NOT be used to endorse or promote products
110SN/A *    derived from this software without specific prior written permission.
120SN/A *
130SN/A * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
140SN/A * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
150SN/A * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
160SN/A * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
170SN/A * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
180SN/A * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
190SN/A * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
200SN/A * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
212362SN/A * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
222362SN/A * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
232362SN/A * SUCH DAMAGE.
240SN/A */
250SN/A
260SN/A#include <sys/cdefs.h>
270SN/A__FBSDID("$FreeBSD: head/sys/i386/i386/mp_machdep.c 123126 2003-12-03 14:57:26Z jhb $");
280SN/A
290SN/A#include "opt_apic.h"
300SN/A#include "opt_cpu.h"
310SN/A#include "opt_kstack_pages.h"
327495SN/A
337495SN/A#if !defined(lint)
347495SN/A#if !defined(SMP)
357495SN/A#error How did you get here?
367495SN/A#endif
3712352Szmajo
380SN/A#if defined(I386_CPU) && !defined(COMPILING_LINT)
398351SN/A#error SMP not supported with I386_CPU
4012352Szmajo#endif
4113351Schegar#ifndef DEV_APIC
4213351Schegar#error The apic device is required for SMP, add "device apic" to your config file.
4312352Szmajo#endif
440SN/A#if defined(CPU_DISABLE_CMPXCHG) && !defined(COMPILING_LINT)
450SN/A#error SMP not supported with CPU_DISABLE_CMPXCHG
460SN/A#endif
470SN/A#endif /* not lint */
480SN/A
490SN/A#include <sys/param.h>
500SN/A#include <sys/systm.h>
510SN/A#include <sys/bus.h>
520SN/A#include <sys/cons.h>	/* cngetc() */
530SN/A#ifdef GPROF
540SN/A#include <sys/gmon.h>
550SN/A#endif
560SN/A#include <sys/kernel.h>
570SN/A#include <sys/ktr.h>
580SN/A#include <sys/lock.h>
590SN/A#include <sys/malloc.h>
600SN/A#include <sys/memrange.h>
610SN/A#include <sys/mutex.h>
620SN/A#include <sys/pcpu.h>
630SN/A#include <sys/proc.h>
640SN/A#include <sys/smp.h>
650SN/A#include <sys/sysctl.h>
660SN/A
670SN/A#include <vm/vm.h>
680SN/A#include <vm/vm_param.h>
690SN/A#include <vm/pmap.h>
700SN/A#include <vm/vm_kern.h>
710SN/A#include <vm/vm_extern.h>
720SN/A
730SN/A#include <machine/apicreg.h>
740SN/A#include <machine/clock.h>
750SN/A#include <machine/md_var.h>
760SN/A#include <machine/pcb.h>
770SN/A#include <machine/smp.h>
780SN/A#include <machine/smptests.h>	/** COUNT_XINVLTLB_HITS */
790SN/A#include <machine/specialreg.h>
800SN/A#include <machine/privatespace.h>
810SN/A
820SN/A#define WARMBOOT_TARGET		0
830SN/A#define WARMBOOT_OFF		(KERNBASE + 0x0467)
840SN/A#define WARMBOOT_SEG		(KERNBASE + 0x0469)
850SN/A
860SN/A#define CMOS_REG		(0x70)
870SN/A#define CMOS_DATA		(0x71)
880SN/A#define BIOS_RESET		(0x0f)
890SN/A#define BIOS_WARM		(0x0a)
900SN/A
910SN/A/*
920SN/A * this code MUST be enabled here and in mpboot.s.
930SN/A * it follows the very early stages of AP boot by placing values in CMOS ram.
940SN/A * it NORMALLY will never be needed and thus the primitive method for enabling.
950SN/A *
960SN/A#define CHECK_POINTS
970SN/A */
980SN/A
990SN/A#if defined(CHECK_POINTS) && !defined(PC98)
1000SN/A#define CHECK_READ(A)	 (outb(CMOS_REG, (A)), inb(CMOS_DATA))
1010SN/A#define CHECK_WRITE(A,D) (outb(CMOS_REG, (A)), outb(CMOS_DATA, (D)))
1020SN/A
1038707SN/A#define CHECK_INIT(D);				\
1048707SN/A	CHECK_WRITE(0x34, (D));			\
1058707SN/A	CHECK_WRITE(0x35, (D));			\
1068707SN/A	CHECK_WRITE(0x36, (D));			\
1078707SN/A	CHECK_WRITE(0x37, (D));			\
1088707SN/A	CHECK_WRITE(0x38, (D));			\
1098707SN/A	CHECK_WRITE(0x39, (D));
1108707SN/A
1118707SN/A#define CHECK_PRINT(S);				\
1128707SN/A	printf("%s: %d, %d, %d, %d, %d, %d\n",	\
1138707SN/A	   (S),					\
1148707SN/A	   CHECK_READ(0x34),			\
1158707SN/A	   CHECK_READ(0x35),			\
1168707SN/A	   CHECK_READ(0x36),			\
1178707SN/A	   CHECK_READ(0x37),			\
1180SN/A	   CHECK_READ(0x38),			\
11914602Sdarcy	   CHECK_READ(0x39));
1200SN/A
1210SN/A#else				/* CHECK_POINTS */
1227495SN/A
1237746SN/A#define CHECK_INIT(D)
12410071SN/A#define CHECK_PRINT(S)
1250SN/A#define CHECK_WRITE(A, D)
1260SN/A
1270SN/A#endif				/* CHECK_POINTS */
1280SN/A
1290SN/A/*
13014602Sdarcy * Values to send to the POST hardware.
1310SN/A */
1320SN/A#define MP_BOOTADDRESS_POST	0x10
1330SN/A#define MP_PROBE_POST		0x11
1341246SN/A#define MPTABLE_PASS1_POST	0x12
1350SN/A
1360SN/A#define MP_START_POST		0x13
1370SN/A#define MP_ENABLE_POST		0x14
1380SN/A#define MPTABLE_PASS2_POST	0x15
1390SN/A
1400SN/A#define START_ALL_APS_POST	0x16
1410SN/A#define INSTALL_AP_TRAMP_POST	0x17
1420SN/A#define START_AP_POST		0x18
1430SN/A
1440SN/A#define MP_ANNOUNCE_POST	0x19
1451246SN/A
1460SN/A/* lock region used by kernel profiling */
1479484SN/Aint	mcount_lock;
1489484SN/A
1499484SN/A/** XXX FIXME: where does this really belong, isa.h/isa.c perhaps? */
1500SN/Aint	current_postcode;
1510SN/A
1529484SN/Aint	mp_naps;		/* # of Applications processors */
1539484SN/Aint	boot_cpu_id = -1;	/* designated BSP */
1549484SN/Aextern	int nkpt;
1559484SN/A
1569484SN/A/*
1579484SN/A * CPU topology map datastructures for HTT. (XXX)
1589484SN/A */
1599484SN/Astruct cpu_group mp_groups[MAXCPU];
1600SN/Astruct cpu_top mp_top;
1610SN/Astruct cpu_top *smp_topology;
1620SN/A
1630SN/A/* AP uses this during bootstrap.  Do not staticize.  */
1649484SN/Achar *bootSTK;
1650SN/Astatic int bootAP;
1660SN/A
1679484SN/A/* Hotwire a 0->4MB V==P mapping */
1680SN/Aextern pt_entry_t *KPTphys;
1690SN/A
1700SN/A/* SMP page table page */
1719484SN/Aextern pt_entry_t *SMPpt;
1720SN/A
1730SN/Astruct pcb stoppcbs[MAXCPU];
1741246SN/A
1759484SN/A/* Variables needed for SMP tlb shootdown. */
1769484SN/Avm_offset_t smp_tlb_addr1;
1771246SN/Avm_offset_t smp_tlb_addr2;
1789484SN/Avolatile int smp_tlb_wait;
1799484SN/Astruct mtx smp_tlb_mtx;
1809484SN/A
1819484SN/A/*
1820SN/A * Local data and functions.
1839484SN/A */
1840SN/A
1850SN/Astatic u_int logical_cpus;
1860SN/Astatic u_int logical_cpus_mask;
1870SN/A
18812745Smartin/* used to hold the AP's until we are ready to release them */
1890SN/Astatic struct mtx ap_boot_mtx;
1907495SN/A
1918707SN/A/* Set to 1 once we're ready to let the APs out of the pen. */
1928707SN/Astatic volatile int aps_ready = 0;
1938707SN/A
1948707SN/A/*
1958707SN/A * Store data from cpu_add() until later in the boot when we actually setup
1968707SN/A * the APs.
1978707SN/A */
1988707SN/Astruct cpu_info {
1998707SN/A	int	cpu_present:1;
2008707SN/A	int	cpu_bsp:1;
2018707SN/A} static cpu_info[MAXCPU];
2028707SN/Astatic int cpu_apic_ids[MAXCPU];
2037495SN/A
2047495SN/Astatic u_int boot_address;
2057495SN/A
2067495SN/Astatic void	set_logical_apic_ids(void);
2077495SN/Astatic int	start_all_aps(void);
2088925SN/Astatic void	install_ap_tramp(void);
2097495SN/Astatic int	start_ap(int apic_id);
2107495SN/Astatic void	release_aps(void *dummy);
2117495SN/A
2127495SN/Astatic int	hlt_cpus_mask;
2137495SN/Astatic int	hlt_logical_cpus;
2147495SN/Astatic struct	sysctl_ctx_list logical_cpu_clist;
2157495SN/A
2167495SN/A/*
2178925SN/A * Calculate usable address in base memory for AP trampoline code.
2187495SN/A */
2197495SN/Au_int
2207495SN/Amp_bootaddress(u_int basemem)
2217495SN/A{
2227495SN/A	POSTCODE(MP_BOOTADDRESS_POST);
2237495SN/A
2247495SN/A	boot_address = trunc_page(basemem);	/* round down to 4k boundary */
2258925SN/A	if ((basemem - boot_address) < bootMP_size)
2267495SN/A		boot_address -= PAGE_SIZE;	/* not enough, lower by 4k */
2277495SN/A
2287495SN/A	return boot_address;
2297495SN/A}
2307495SN/A
2317495SN/Avoid
2327495SN/Acpu_add(u_int apic_id, char boot_cpu)
2338925SN/A{
2347495SN/A
2357546SN/A	if (apic_id > MAXCPU) {
2367746SN/A		printf("SMP: CPU %d exceeds maximum CPU %d, ignoring\n",
2378953SN/A		    apic_id, MAXCPU);
2388953SN/A		return;
2397746SN/A	}
2408925SN/A	KASSERT(cpu_info[apic_id].cpu_present == 0, ("CPU %d added twice",
2417746SN/A	    apic_id));
2427746SN/A	cpu_info[apic_id].cpu_present = 1;
2438953SN/A	if (boot_cpu) {
2448953SN/A		KASSERT(boot_cpu_id == -1,
2458953SN/A		    ("CPU %d claims to be BSP, but CPU %d already is", apic_id,
2468953SN/A		    boot_cpu_id));
2478953SN/A		boot_cpu_id = apic_id;
2488953SN/A		cpu_info[apic_id].cpu_bsp = 1;
2498953SN/A	}
2508953SN/A	mp_ncpus++;
2518953SN/A	if (bootverbose)
2527546SN/A		printf("SMP: Added CPU %d (%s)\n", apic_id, boot_cpu ? "BSP" :
2537546SN/A		    "AP");
2547546SN/A
2557546SN/A}
2567546SN/A
2577546SN/Avoid
2587546SN/Acpu_mp_setmaxid(void)
2598925SN/A{
2607546SN/A
2618961SN/A	mp_maxid = MAXCPU - 1;
2628961SN/A}
2638961SN/A
2648961SN/Aint
2658961SN/Acpu_mp_probe(void)
2668961SN/A{
2678961SN/A
2688961SN/A	/*
26912348Saph	 * Always record BSP in CPU map so that the mbuf init code works
27012348Saph	 * correctly.
27112348Saph	 */
27212348Saph	all_cpus = 1;
27312348Saph	if (mp_ncpus == 0) {
27412348Saph		/*
27512348Saph		 * No CPUs were found, so this must be a UP system.  Setup
27612348Saph		 * the variables to represent a system with a single CPU
27712348Saph		 * with an id of 0.
2788961SN/A		 */
2790SN/A		mp_ncpus = 1;
2800SN/A		return (0);
28111218Sbpb	}
28211218Sbpb
28311218Sbpb	/* At least one CPU was found. */
2840SN/A	if (mp_ncpus == 1) {
28511218Sbpb		/*
28611218Sbpb		 * One CPU was found, so this must be a UP system with
2870SN/A		 * an I/O APIC.
28811218Sbpb		 */
28911218Sbpb		return (0);
29011218Sbpb	}
29111218Sbpb
29211218Sbpb	/* At least two CPUs were found. */
29311218Sbpb	return (1);
29411218Sbpb}
29511218Sbpb
29611218Sbpb/*
2970SN/A * Initialize the IPI handlers and start up the AP's.
29811218Sbpb */
29911218Sbpbvoid
30011218Sbpbcpu_mp_start(void)
30113532Siris{
3020SN/A	int i;
30311218Sbpb
30411218Sbpb	POSTCODE(MP_START_POST);
3050SN/A
30611218Sbpb	/* Initialize the logical ID to APIC ID table. */
30711218Sbpb	for (i = 0; i < MAXCPU; i++)
30811218Sbpb		cpu_apic_ids[i] = -1;
30911218Sbpb
31011218Sbpb	/* Install an inter-CPU IPI for TLB invalidation */
31111218Sbpb	setidt(IPI_INVLTLB, IDTVEC(invltlb),
31211218Sbpb	       SDT_SYS386IGT, SEL_KPL, GSEL(GCODE_SEL, SEL_KPL));
3130SN/A	setidt(IPI_INVLPG, IDTVEC(invlpg),
3140SN/A	       SDT_SYS386IGT, SEL_KPL, GSEL(GCODE_SEL, SEL_KPL));
31511218Sbpb	setidt(IPI_INVLRNG, IDTVEC(invlrng),
3160SN/A	       SDT_SYS386IGT, SEL_KPL, GSEL(GCODE_SEL, SEL_KPL));
3170SN/A
3188707SN/A	/* Install an inter-CPU IPI for forwarding hardclock() */
3198707SN/A	setidt(IPI_HARDCLOCK, IDTVEC(hardclock),
3208707SN/A	       SDT_SYS386IGT, SEL_KPL, GSEL(GCODE_SEL, SEL_KPL));
3210SN/A
3220SN/A	/* Install an inter-CPU IPI for forwarding statclock() */
3230SN/A	setidt(IPI_STATCLOCK, IDTVEC(statclock),
32411218Sbpb	       SDT_SYS386IGT, SEL_KPL, GSEL(GCODE_SEL, SEL_KPL));
32511218Sbpb
32611218Sbpb	/* Install an inter-CPU IPI for lazy pmap release */
32711218Sbpb	setidt(IPI_LAZYPMAP, IDTVEC(lazypmap),
32811218Sbpb	       SDT_SYS386IGT, SEL_KPL, GSEL(GCODE_SEL, SEL_KPL));
32911218Sbpb
33011218Sbpb	/* Install an inter-CPU IPI for all-CPU rendezvous */
33111218Sbpb	setidt(IPI_RENDEZVOUS, IDTVEC(rendezvous),
33211218Sbpb	       SDT_SYS386IGT, SEL_KPL, GSEL(GCODE_SEL, SEL_KPL));
33311218Sbpb
33411218Sbpb	/* Install an inter-CPU IPI for forcing an additional software trap */
33511218Sbpb	setidt(IPI_AST, IDTVEC(cpuast),
33611218Sbpb	       SDT_SYS386IGT, SEL_KPL, GSEL(GCODE_SEL, SEL_KPL));
33711218Sbpb
33811218Sbpb	/* Install an inter-CPU IPI for CPU stop/restart */
3390SN/A	setidt(IPI_STOP, IDTVEC(cpustop),
3400SN/A	       SDT_SYS386IGT, SEL_KPL, GSEL(GCODE_SEL, SEL_KPL));
3410SN/A
34211218Sbpb	mtx_init(&smp_tlb_mtx, "tlb", NULL, MTX_SPIN);
34311218Sbpb
34411218Sbpb	/* Set boot_cpu_id if needed. */
3450SN/A	if (boot_cpu_id == -1) {
3460SN/A		boot_cpu_id = PCPU_GET(apic_id);
3470SN/A		cpu_info[boot_cpu_id].cpu_bsp = 1;
3480SN/A	} else
3490SN/A		KASSERT(boot_cpu_id == PCPU_GET(apic_id),
3500SN/A		    ("BSP's APIC ID doesn't match boot_cpu_id"));
3510SN/A	cpu_apic_ids[0] = boot_cpu_id;
3520SN/A
3530SN/A	/* Start each Application Processor */
3540SN/A	start_all_aps();
3550SN/A
3560SN/A	/* Setup the initial logical CPUs info. */
3578707SN/A	logical_cpus = logical_cpus_mask = 0;
3588707SN/A	if (cpu_feature & CPUID_HTT)
3598707SN/A		logical_cpus = (cpu_procinfo & CPUID_HTT_CORES) >> 16;
3600SN/A
3610SN/A	set_logical_apic_ids();
3620SN/A}
3630SN/A
3640SN/A
36511218Sbpb/*
36611218Sbpb * Print various information about the SMP system hardware and setup.
36711218Sbpb */
36811218Sbpbvoid
36911218Sbpbcpu_mp_announce(void)
37011218Sbpb{
37111218Sbpb	int i, x;
37211218Sbpb
37311218Sbpb	POSTCODE(MP_ANNOUNCE_POST);
37411218Sbpb
37511218Sbpb	/* List CPUs */
3760SN/A	printf(" cpu0 (BSP): APIC ID: %2d\n", boot_cpu_id);
3770SN/A	for (i = 1, x = 0; x < MAXCPU; x++) {
3780SN/A		if (cpu_info[x].cpu_present && !cpu_info[x].cpu_bsp) {
3790SN/A			KASSERT(i < mp_ncpus,
3800SN/A			    ("mp_ncpus and actual cpus are out of whack"));
38111218Sbpb			printf(" cpu%d (AP): APIC ID: %2d\n", i++, x);
38211218Sbpb		}
3830SN/A	}
3840SN/A}
3850SN/A
38611218Sbpb/*
38711218Sbpb * AP CPU's call this to initialize themselves.
38811218Sbpb */
38913532Sirisvoid
3900SN/Ainit_secondary(void)
39111218Sbpb{
39211218Sbpb	int	gsel_tss;
3930SN/A	int	x, myid;
39411218Sbpb	u_int	cr0;
39511218Sbpb
39611218Sbpb	/* bootAP is set in start_ap() to our ID. */
39711218Sbpb	myid = bootAP;
39811218Sbpb	gdt_segs[GPRIV_SEL].ssd_base = (int) &SMP_prvspace[myid];
39911218Sbpb	gdt_segs[GPROC0_SEL].ssd_base =
40011218Sbpb		(int) &SMP_prvspace[myid].pcpu.pc_common_tss;
40111218Sbpb	SMP_prvspace[myid].pcpu.pc_prvspace =
40211218Sbpb		&SMP_prvspace[myid].pcpu;
4030SN/A
4047747SN/A	for (x = 0; x < NGDT; x++) {
4050SN/A		ssdtosd(&gdt_segs[x], &gdt[myid * NGDT + x].sd);
4060SN/A	}
4070SN/A
4080SN/A	r_gdt.rd_limit = NGDT * sizeof(gdt[0]) - 1;
4090SN/A	r_gdt.rd_base = (int) &gdt[myid * NGDT];
4100SN/A	lgdt(&r_gdt);			/* does magic intra-segment return */
4118707SN/A
4128707SN/A	lidt(&r_idt);
4138707SN/A
4140SN/A	lldt(_default_ldt);
4150SN/A	PCPU_SET(currentldt, _default_ldt);
4160SN/A
41711218Sbpb	gsel_tss = GSEL(GPROC0_SEL, SEL_KPL);
41811218Sbpb	gdt[myid * NGDT + GPROC0_SEL].sd.sd_type = SDT_SYS386TSS;
41911218Sbpb	PCPU_SET(common_tss.tss_esp0, 0); /* not used until after switch */
42011218Sbpb	PCPU_SET(common_tss.tss_ss0, GSEL(GDATA_SEL, SEL_KPL));
42111218Sbpb	PCPU_SET(common_tss.tss_ioopt, (sizeof (struct i386tss)) << 16);
42211218Sbpb	PCPU_SET(tss_gdt, &gdt[myid * NGDT + GPROC0_SEL].sd);
42311218Sbpb	PCPU_SET(common_tssd, *PCPU_GET(tss_gdt));
42411218Sbpb	ltr(gsel_tss);
42511218Sbpb
42611218Sbpb	/*
42711218Sbpb	 * Set to a known state:
42811218Sbpb	 * Set by mpboot.s: CR0_PG, CR0_PE
42911218Sbpb	 * Set by cpu_setregs: CR0_NE, CR0_MP, CR0_TS, CR0_WP, CR0_AM
43011218Sbpb	 */
43111218Sbpb	cr0 = rcr0();
43211218Sbpb	cr0 &= ~(CR0_CD | CR0_NW | CR0_EM);
43311218Sbpb	load_cr0(cr0);
43411218Sbpb	CHECK_WRITE(0x38, 5);
43511218Sbpb
43611218Sbpb	/* Disable local APIC just to be sure. */
43711218Sbpb	lapic_disable();
43811218Sbpb
4390SN/A	/* signal our startup to the BSP. */
4400SN/A	mp_naps++;
4410SN/A	CHECK_WRITE(0x39, 6);
44211218Sbpb
44311218Sbpb	/* Spin until the BSP releases the AP's. */
4440SN/A	while (!aps_ready)
4450SN/A		ia32_pause();
4460SN/A
4470SN/A	/* BSP may have changed PTD while we were waiting */
4480SN/A	invltlb();
4490SN/A	pmap_invalidate_range(kernel_pmap, 0, NKPT * NBPDR - 1);
4500SN/A
4517747SN/A#if defined(I586_CPU) && !defined(NO_F00F_HACK)
4520SN/A	lidt(&r_idt);
4530SN/A#endif
4540SN/A
4550SN/A	/* set up CPU registers and state */
4560SN/A	cpu_setregs();
4570SN/A
4588707SN/A	/* set up FPU state on the AP */
4598707SN/A	npxinit(__INITIAL_NPXCW__);
4608707SN/A
4610SN/A	/* set up SSE registers */
4620SN/A	enable_sse();
4630SN/A
4640SN/A	/* A quick check from sanity claus */
4650SN/A	if (PCPU_GET(apic_id) != lapic_id()) {
4660SN/A		printf("SMP: cpuid = %d\n", PCPU_GET(cpuid));
4670SN/A		printf("SMP: actual apic_id = %d\n", lapic_id());
4680SN/A		printf("SMP: correct apic_id = %d\n", PCPU_GET(apic_id));
4690SN/A		printf("PTD[MPPTDI] = %#jx\n", (uintmax_t)PTD[MPPTDI]);
4700SN/A		panic("cpuid mismatch! boom!!");
4710SN/A	}
4720SN/A
4730SN/A	mtx_lock_spin(&ap_boot_mtx);
4740SN/A
4750SN/A	/* Init local apic for irq's */
4760SN/A	lapic_setup();
4770SN/A
4780SN/A	/* Set memory range attributes for this CPU to match the BSP */
4790SN/A	mem_range_AP_init();
4800SN/A
4810SN/A	smp_cpus++;
4821816SN/A
4830SN/A	CTR1(KTR_SMP, "SMP: AP CPU #%d Launched", PCPU_GET(cpuid));
4840SN/A	printf("SMP: AP CPU #%d Launched!\n", PCPU_GET(cpuid));
4850SN/A
4861816SN/A	/* Determine if we are a logical CPU. */
4870SN/A	if (logical_cpus > 1 && PCPU_GET(apic_id) % logical_cpus != 0)
4880SN/A		logical_cpus_mask |= PCPU_GET(cpumask);
4890SN/A
4901246SN/A	/* Build our map of 'other' CPUs. */
4910SN/A	PCPU_SET(other_cpus, all_cpus & ~PCPU_GET(cpumask));
4920SN/A
4938707SN/A	if (bootverbose)
4948707SN/A		lapic_dump("AP");
4958707SN/A
4960SN/A	if (smp_cpus == mp_ncpus) {
4978707SN/A		/* enable IPI's, tlb shootdown, freezes etc */
4988707SN/A		atomic_store_rel_int(&smp_started, 1);
4998707SN/A		smp_active = 1;	 /* historic */
5008707SN/A	}
5018707SN/A
5028707SN/A	mtx_unlock_spin(&ap_boot_mtx);
5038707SN/A
5048707SN/A	/* wait until all the AP's are up */
5058707SN/A	while (smp_started == 0)
5068707SN/A		ia32_pause();
5070SN/A
5080SN/A	/* ok, now grab sched_lock and enter the scheduler */
5090SN/A	mtx_lock_spin(&sched_lock);
5107747SN/A
5110SN/A	binuptime(PCPU_PTR(switchtime));
5127747SN/A	PCPU_SET(switchticks, ticks);
5137747SN/A
5140SN/A	cpu_throw(NULL, choosethread());	/* doesn't return */
5150SN/A
5160SN/A	panic("scheduler returned us to %s", __func__);
5170SN/A	/* NOTREACHED */
5180SN/A}
5190SN/A
5201246SN/A/*******************************************************************
5211246SN/A * local functions and data
5221246SN/A */
5230SN/A
5240SN/A/*
5258707SN/A * Set the APIC logical IDs.
5268707SN/A *
5278707SN/A * We want to cluster logical CPU's within the same APIC ID cluster.
5288707SN/A * Since logical CPU's are aligned simply filling in the clusters in
5298707SN/A * APIC ID order works fine.  Note that this does not try to balance
5301246SN/A * the number of CPU's in each cluster. (XXX?)
5310SN/A */
5320SN/Astatic void
5330SN/Aset_logical_apic_ids(void)
5340SN/A{
5350SN/A	u_int apic_id, cluster, cluster_id;
5360SN/A
5371246SN/A	/* Force us to allocate cluster 0 at the start. */
5381246SN/A	cluster = -1;
5390SN/A	cluster_id = APIC_MAX_INTRACLUSTER_ID;
5400SN/A	for (apic_id = 0; apic_id < MAXCPU; apic_id++) {
5410SN/A		if (!cpu_info[apic_id].cpu_present)
5420SN/A			continue;
5430SN/A		if (cluster_id == APIC_MAX_INTRACLUSTER_ID) {
5441816SN/A			cluster = ioapic_next_logical_cluster();
5450SN/A			cluster_id = 0;
5460SN/A		} else
5470SN/A			cluster_id++;
5480SN/A		if (bootverbose)
5491246SN/A			printf("APIC ID: physical %u, logical %u:%u\n",
5500SN/A			    apic_id, cluster, cluster_id);
5510SN/A		lapic_set_logical_id(apic_id, cluster, cluster_id);
5521246SN/A	}
5538707SN/A}
5548707SN/A
5558707SN/A/*
5560SN/A * start each AP in our list
5570SN/A */
5584546SN/Astatic int
5594546SN/Astart_all_aps(void)
56011218Sbpb{
56111218Sbpb#ifndef PC98
56211218Sbpb	u_char mpbiosreason;
5634546SN/A#endif
5644546SN/A	u_long mpbioswarmvec;
5650SN/A	struct pcpu *pc;
5660SN/A	char *stack;
5670SN/A	uintptr_t kptbase;
5684546SN/A	int i, pg, apic_id, cpu;
5690SN/A
5704546SN/A	POSTCODE(START_ALL_APS_POST);
5710SN/A
5720SN/A	mtx_init(&ap_boot_mtx, "ap boot", NULL, MTX_SPIN);
5730SN/A
5740SN/A	/* install the AP 1st level boot code */
5750SN/A	install_ap_tramp();
5760SN/A
5771246SN/A	/* save the current value of the warm-start vector */
5781246SN/A	mpbioswarmvec = *((u_long *) WARMBOOT_OFF);
5790SN/A#ifndef PC98
5800SN/A	outb(CMOS_REG, BIOS_RESET);
5810SN/A	mpbiosreason = inb(CMOS_DATA);
5820SN/A#endif
5830SN/A
5848707SN/A	/* set up temporary P==V mapping for AP boot */
5858707SN/A	/* XXX this is a hack, we should boot the AP on its own stack/PTD */
5868707SN/A	kptbase = (uintptr_t)(void *)KPTphys;
5878707SN/A	for (i = 0; i < NKPT; i++)
5888707SN/A		PTD[i] = (pd_entry_t)(PG_V | PG_RW |
5890SN/A		    ((kptbase + i * PAGE_SIZE) & PG_FRAME));
5901246SN/A	invltlb();
5910SN/A
5920SN/A	/* start each AP */
5930SN/A	for (cpu = 0, apic_id = 0; apic_id < MAXCPU; apic_id++) {
5940SN/A		if (!cpu_info[apic_id].cpu_present ||
5950SN/A		    cpu_info[apic_id].cpu_bsp)
5961246SN/A			continue;
5970SN/A		cpu++;
5980SN/A
5990SN/A		/* save APIC ID for this logical ID */
6000SN/A		cpu_apic_ids[cpu] = apic_id;
6011246SN/A
6020SN/A		/* first page of AP's private space */
6031246SN/A		pg = cpu * i386_btop(sizeof(struct privatespace));
6048707SN/A
6058707SN/A		/* allocate a new private data page */
6068707SN/A		pc = (struct pcpu *)kmem_alloc(kernel_map, PAGE_SIZE);
6070SN/A
6080SN/A		/* wire it into the private page table page */
6090SN/A		SMPpt[pg] = (pt_entry_t)(PG_V | PG_RW | vtophys(pc));
6100SN/A
6110SN/A		/* allocate and set up an idle stack data page */
6120SN/A		stack = (char *)kmem_alloc(kernel_map, KSTACK_PAGES * PAGE_SIZE); /* XXXKSE */
6130SN/A		for (i = 0; i < KSTACK_PAGES; i++)
6140SN/A			SMPpt[pg + 1 + i] = (pt_entry_t)
6150SN/A			    (PG_V | PG_RW | vtophys(PAGE_SIZE * i + stack));
6160SN/A
6177747SN/A		/* prime data page for it to use */
6180SN/A		pcpu_init(pc, cpu, sizeof(struct pcpu));
6190SN/A		pc->pc_apic_id = apic_id;
6200SN/A
6210SN/A		/* setup a vector to our boot code */
6220SN/A		*((volatile u_short *) WARMBOOT_OFF) = WARMBOOT_TARGET;
6230SN/A		*((volatile u_short *) WARMBOOT_SEG) = (boot_address >> 4);
6240SN/A#ifndef PC98
6250SN/A		outb(CMOS_REG, BIOS_RESET);
6260SN/A		outb(CMOS_DATA, BIOS_WARM);	/* 'warm-start' */
6270SN/A#endif
6280SN/A
6290SN/A		bootSTK = &SMP_prvspace[cpu].idlekstack[KSTACK_PAGES *
6300SN/A		    PAGE_SIZE];
6310SN/A		bootAP = cpu;
6320SN/A
6330SN/A		/* attempt to start the Application Processor */
6340SN/A		CHECK_INIT(99);	/* setup checkpoints */
6350SN/A		if (!start_ap(apic_id)) {
6360SN/A			printf("AP #%d (PHY# %d) failed!\n", cpu, apic_id);
6370SN/A			CHECK_PRINT("trace");	/* show checkpoints */
6380SN/A			/* better panic as the AP may be running loose */
6390SN/A			printf("panic y/n? [y] ");
6400SN/A			if (cngetc() != 'n')
6410SN/A				panic("bye-bye");
6420SN/A		}
6430SN/A		CHECK_PRINT("trace");		/* show checkpoints */
6440SN/A
6450SN/A		all_cpus |= (1 << cpu);		/* record AP in CPU map */
6460SN/A	}
6470SN/A
6480SN/A	/* build our map of 'other' CPUs */
6490SN/A	PCPU_SET(other_cpus, all_cpus & ~PCPU_GET(cpumask));
6500SN/A
6510SN/A	/* restore the warmstart vector */
6520SN/A	*(u_long *) WARMBOOT_OFF = mpbioswarmvec;
6530SN/A#ifndef PC98
6540SN/A	outb(CMOS_REG, BIOS_RESET);
6550SN/A	outb(CMOS_DATA, mpbiosreason);
6560SN/A#endif
6570SN/A
6580SN/A	/*
6590SN/A	 * Set up the idle context for the BSP.  Similar to above except
6600SN/A	 * that some was done by locore, some by pmap.c and some is implicit
6610SN/A	 * because the BSP is cpu#0 and the page is initially zero and also
6620SN/A	 * because we can refer to variables by name on the BSP..
6630SN/A	 */
6640SN/A
6650SN/A	/* Allocate and setup BSP idle stack */
6660SN/A	stack = (char *)kmem_alloc(kernel_map, KSTACK_PAGES * PAGE_SIZE);
6670SN/A	for (i = 0; i < KSTACK_PAGES; i++)
6680SN/A		SMPpt[1 + i] = (pt_entry_t)
6690SN/A		    (PG_V | PG_RW | vtophys(PAGE_SIZE * i + stack));
6700SN/A
6710SN/A	for (i = 0; i < NKPT; i++)
6720SN/A		PTD[i] = 0;
6730SN/A	pmap_invalidate_range(kernel_pmap, 0, NKPT * NBPDR - 1);
6740SN/A
6750SN/A	/* number of APs actually started */
6760SN/A	return mp_naps;
6770SN/A}
6780SN/A
6790SN/A/*
6801794SN/A * load the 1st level AP boot code into base memory.
6810SN/A */
6820SN/A
6830SN/A/* targets for relocation */
6840SN/Aextern void bigJump(void);
6850SN/Aextern void bootCodeSeg(void);
6860SN/Aextern void bootDataSeg(void);
6870SN/Aextern void MPentry(void);
6880SN/Aextern u_int MP_GDT;
6890SN/Aextern u_int mp_gdtbase;
6900SN/A
6910SN/Astatic void
6920SN/Ainstall_ap_tramp(void)
6930SN/A{
6940SN/A	int     x;
6950SN/A	int     size = *(int *) ((u_long) & bootMP_size);
6960SN/A	u_char *src = (u_char *) ((u_long) bootMP);
6970SN/A	u_char *dst = (u_char *) boot_address + KERNBASE;
6980SN/A	u_int   boot_base = (u_int) bootMP;
6990SN/A	u_int8_t *dst8;
7000SN/A	u_int16_t *dst16;
7010SN/A	u_int32_t *dst32;
7020SN/A
7030SN/A	POSTCODE(INSTALL_AP_TRAMP_POST);
7040SN/A
7050SN/A	pmap_kenter(boot_address + KERNBASE, boot_address);
7060SN/A	for (x = 0; x < size; ++x)
7070SN/A		*dst++ = *src++;
7080SN/A
7090SN/A	/*
7100SN/A	 * modify addresses in code we just moved to basemem. unfortunately we
7110SN/A	 * need fairly detailed info about mpboot.s for this to work.  changes
7120SN/A	 * to mpboot.s might require changes here.
7130SN/A	 */
71414602Sdarcy
7150SN/A	/* boot code is located in KERNEL space */
7160SN/A	dst = (u_char *) boot_address + KERNBASE;
7170SN/A
7180SN/A	/* modify the lgdt arg */
7190SN/A	dst32 = (u_int32_t *) (dst + ((u_int) & mp_gdtbase - boot_base));
7200SN/A	*dst32 = boot_address + ((u_int) & MP_GDT - boot_base);
7210SN/A
7220SN/A	/* modify the ljmp target for MPentry() */
7230SN/A	dst32 = (u_int32_t *) (dst + ((u_int) bigJump - boot_base) + 1);
7240SN/A	*dst32 = ((u_int) MPentry - KERNBASE);
7250SN/A
7268707SN/A	/* modify the target for boot code segment */
7270SN/A	dst16 = (u_int16_t *) (dst + ((u_int) bootCodeSeg - boot_base));
7280SN/A	dst8 = (u_int8_t *) (dst16 + 1);
7290SN/A	*dst16 = (u_int) boot_address & 0xffff;
7300SN/A	*dst8 = ((u_int) boot_address >> 16) & 0xff;
7310SN/A
7320SN/A	/* modify the target for boot data segment */
7330SN/A	dst16 = (u_int16_t *) (dst + ((u_int) bootDataSeg - boot_base));
7347495SN/A	dst8 = (u_int8_t *) (dst16 + 1);
7357495SN/A	*dst16 = (u_int) boot_address & 0xffff;
7360SN/A	*dst8 = ((u_int) boot_address >> 16) & 0xff;
7370SN/A}
7380SN/A
7390SN/A/*
7400SN/A * This function starts the AP (application processor) identified
7410SN/A * by the APIC ID 'physicalCpu'.  It does quite a "song and dance"
7427495SN/A * to accomplish this.  This is necessary because of the nuances
7437495SN/A * of the different hardware we might encounter.  It isn't pretty,
7440SN/A * but it seems to work.
7450SN/A */
7460SN/Astatic int
7470SN/Astart_ap(int apic_id)
7480SN/A{
7490SN/A	int vector, ms;
7500SN/A	int cpus;
7510SN/A
7520SN/A	POSTCODE(START_AP_POST);
7530SN/A
7540SN/A	/* calculate the vector */
7550SN/A	vector = (boot_address >> 12) & 0xff;
7560SN/A
7570SN/A	/* used as a watchpoint to signal AP startup */
7588707SN/A	cpus = mp_naps;
7590SN/A
7600SN/A	/*
7610SN/A	 * first we do an INIT/RESET IPI this INIT IPI might be run, reseting
7620SN/A	 * and running the target CPU. OR this INIT IPI might be latched (P5
7630SN/A	 * bug), CPU waiting for STARTUP IPI. OR this INIT IPI might be
7640SN/A	 * ignored.
7650SN/A	 */
7660SN/A
7670SN/A	/* do an INIT IPI: assert RESET */
7680SN/A	lapic_ipi_raw(APIC_DEST_DESTFLD | APIC_TRIGMOD_EDGE |
7690SN/A	    APIC_LEVEL_ASSERT | APIC_DESTMODE_PHY | APIC_DELMODE_INIT, apic_id);
7700SN/A
7710SN/A	/* wait for pending status end */
7720SN/A	lapic_ipi_wait(-1);
7730SN/A
7740SN/A	/* do an INIT IPI: deassert RESET */
7750SN/A	lapic_ipi_raw(APIC_DEST_ALLESELF | APIC_TRIGMOD_LEVEL |
7760SN/A	    APIC_LEVEL_DEASSERT | APIC_DESTMODE_PHY | APIC_DELMODE_INIT, 0);
7770SN/A
7780SN/A	/* wait for pending status end */
7790SN/A	DELAY(10000);		/* wait ~10mS */
7800SN/A	lapic_ipi_wait(-1);
7810SN/A
7820SN/A	/*
7830SN/A	 * next we do a STARTUP IPI: the previous INIT IPI might still be
7847747SN/A	 * latched, (P5 bug) this 1st STARTUP would then terminate
7850SN/A	 * immediately, and the previously started INIT IPI would continue. OR
7867747SN/A	 * the previous INIT IPI has already run. and this STARTUP IPI will
7870SN/A	 * run. OR the previous INIT IPI was ignored. and this STARTUP IPI
7880SN/A	 * will run.
7890SN/A	 */
7900SN/A
7910SN/A	/* do a STARTUP IPI */
7920SN/A	lapic_ipi_raw(APIC_DEST_DESTFLD | APIC_TRIGMOD_EDGE |
7930SN/A	    APIC_LEVEL_DEASSERT | APIC_DESTMODE_PHY | APIC_DELMODE_STARTUP |
7940SN/A	    vector, apic_id);
7950SN/A	lapic_ipi_wait(-1);
7960SN/A	DELAY(200);		/* wait ~200uS */
7970SN/A
7980SN/A	/*
7990SN/A	 * finally we do a 2nd STARTUP IPI: this 2nd STARTUP IPI should run IF
8000SN/A	 * the previous STARTUP IPI was cancelled by a latched INIT IPI. OR
8010SN/A	 * this STARTUP IPI will be ignored, as only ONE STARTUP IPI is
8020SN/A	 * recognized after hardware RESET or INIT IPI.
8030SN/A	 */
8040SN/A
8050SN/A	lapic_ipi_raw(APIC_DEST_DESTFLD | APIC_TRIGMOD_EDGE |
8060SN/A	    APIC_LEVEL_DEASSERT | APIC_DESTMODE_PHY | APIC_DELMODE_STARTUP |
8070SN/A	    vector, apic_id);
8080SN/A	lapic_ipi_wait(-1);
8090SN/A	DELAY(200);		/* wait ~200uS */
8100SN/A
8110SN/A	/* Wait up to 5 seconds for it to start. */
8120SN/A	for (ms = 0; ms < 5000; ms++) {
8130SN/A		if (mp_naps > cpus)
8140SN/A			return 1;	/* return SUCCESS */
8150SN/A		DELAY(1000);
8160SN/A	}
8170SN/A	return 0;		/* return FAILURE */
8180SN/A}
8190SN/A
8200SN/A#ifdef COUNT_XINVLTLB_HITS
8210SN/Au_int xhits_gbl[MAXCPU];
8220SN/Au_int xhits_pg[MAXCPU];
8230SN/Au_int xhits_rng[MAXCPU];
8240SN/ASYSCTL_NODE(_debug, OID_AUTO, xhits, CTLFLAG_RW, 0, "");
8250SN/ASYSCTL_OPAQUE(_debug_xhits, OID_AUTO, global, CTLFLAG_RW, &xhits_gbl,
8260SN/A    sizeof(xhits_gbl), "IU", "");
8270SN/ASYSCTL_OPAQUE(_debug_xhits, OID_AUTO, page, CTLFLAG_RW, &xhits_pg,
8288707SN/A    sizeof(xhits_pg), "IU", "");
8290SN/ASYSCTL_OPAQUE(_debug_xhits, OID_AUTO, range, CTLFLAG_RW, &xhits_rng,
8300SN/A    sizeof(xhits_rng), "IU", "");
8310SN/A
8320SN/Au_int ipi_global;
8330SN/Au_int ipi_page;
8340SN/Au_int ipi_range;
8350SN/Au_int ipi_range_size;
8360SN/ASYSCTL_INT(_debug_xhits, OID_AUTO, ipi_global, CTLFLAG_RW, &ipi_global, 0, "");
8370SN/ASYSCTL_INT(_debug_xhits, OID_AUTO, ipi_page, CTLFLAG_RW, &ipi_page, 0, "");
8380SN/ASYSCTL_INT(_debug_xhits, OID_AUTO, ipi_range, CTLFLAG_RW, &ipi_range, 0, "");
8390SN/ASYSCTL_INT(_debug_xhits, OID_AUTO, ipi_range_size, CTLFLAG_RW, &ipi_range_size,
8400SN/A    0, "");
8410SN/A
8420SN/Au_int ipi_masked_global;
8430SN/Au_int ipi_masked_page;
8440SN/Au_int ipi_masked_range;
8450SN/Au_int ipi_masked_range_size;
8460SN/ASYSCTL_INT(_debug_xhits, OID_AUTO, ipi_masked_global, CTLFLAG_RW,
8470SN/A    &ipi_masked_global, 0, "");
8480SN/ASYSCTL_INT(_debug_xhits, OID_AUTO, ipi_masked_page, CTLFLAG_RW,
8490SN/A    &ipi_masked_page, 0, "");
8500SN/ASYSCTL_INT(_debug_xhits, OID_AUTO, ipi_masked_range, CTLFLAG_RW,
8510SN/A    &ipi_masked_range, 0, "");
8528707SN/ASYSCTL_INT(_debug_xhits, OID_AUTO, ipi_masked_range_size, CTLFLAG_RW,
8530SN/A    &ipi_masked_range_size, 0, "");
8540SN/A#endif /* COUNT_XINVLTLB_HITS */
8550SN/A
8560SN/A/*
8570SN/A * Flush the TLB on all other CPU's
8580SN/A */
8590SN/Astatic void
8600SN/Asmp_tlb_shootdown(u_int vector, vm_offset_t addr1, vm_offset_t addr2)
8610SN/A{
8620SN/A	u_int ncpu;
8630SN/A
8640SN/A	ncpu = mp_ncpus - 1;	/* does not shootdown self */
8650SN/A	if (ncpu < 1)
8660SN/A		return;		/* no other cpus */
8670SN/A	mtx_assert(&smp_tlb_mtx, MA_OWNED);
8680SN/A	smp_tlb_addr1 = addr1;
8690SN/A	smp_tlb_addr2 = addr2;
8700SN/A	atomic_store_rel_int(&smp_tlb_wait, 0);
8710SN/A	ipi_all_but_self(vector);
8727747SN/A	while (smp_tlb_wait < ncpu)
8730SN/A		ia32_pause();
8740SN/A}
8750SN/A
8760SN/A/*
8770SN/A * This is about as magic as it gets.  fortune(1) has got similar code
8780SN/A * for reversing bits in a word.  Who thinks up this stuff??
8790SN/A *
8800SN/A * Yes, it does appear to be consistently faster than:
8810SN/A * while (i = ffs(m)) {
8820SN/A *	m >>= i;
8830SN/A *	bits++;
8840SN/A * }
8850SN/A * and
8860SN/A * while (lsb = (m & -m)) {	// This is magic too
8870SN/A * 	m &= ~lsb;		// or: m ^= lsb
8880SN/A *	bits++;
8890SN/A * }
8900SN/A * Both of these latter forms do some very strange things on gcc-3.1 with
8910SN/A * -mcpu=pentiumpro and/or -march=pentiumpro and/or -O or -O2.
8920SN/A * There is probably an SSE or MMX popcnt instruction.
8930SN/A *
8940SN/A * I wonder if this should be in libkern?
8950SN/A *
8960SN/A * XXX Stop the presses!  Another one:
8970SN/A * static __inline u_int32_t
8980SN/A * popcnt1(u_int32_t v)
8990SN/A * {
9000SN/A *	v -= ((v >> 1) & 0x55555555);
9018707SN/A *	v = (v & 0x33333333) + ((v >> 2) & 0x33333333);
9020SN/A *	v = (v + (v >> 4)) & 0x0F0F0F0F;
9037747SN/A *	return (v * 0x01010101) >> 24;
9040SN/A * }
9050SN/A * The downside is that it has a multiply.  With a pentium3 with
9060SN/A * -mcpu=pentiumpro and -march=pentiumpro then gcc-3.1 will use
9070SN/A * an imull, and in that case it is faster.  In most other cases
9080SN/A * it appears slightly slower.
9090SN/A *
9100SN/A * Another variant (also from fortune):
9110SN/A * #define BITCOUNT(x) (((BX_(x)+(BX_(x)>>4)) & 0x0F0F0F0F) % 255)
9120SN/A * #define  BX_(x)     ((x) - (((x)>>1)&0x77777777)            \
9138707SN/A *                          - (((x)>>2)&0x33333333)            \
9148707SN/A *                          - (((x)>>3)&0x11111111))
9158707SN/A */
9168707SN/Astatic __inline u_int32_t
9178707SN/Apopcnt(u_int32_t m)
9188707SN/A{
9198707SN/A
9200SN/A	m = (m & 0x55555555) + ((m & 0xaaaaaaaa) >> 1);
9210SN/A	m = (m & 0x33333333) + ((m & 0xcccccccc) >> 2);
9220SN/A	m = (m & 0x0f0f0f0f) + ((m & 0xf0f0f0f0) >> 4);
9230SN/A	m = (m & 0x00ff00ff) + ((m & 0xff00ff00) >> 8);
9240SN/A	m = (m & 0x0000ffff) + ((m & 0xffff0000) >> 16);
9250SN/A	return m;
9260SN/A}
9270SN/A
9280SN/Astatic void
9290SN/Asmp_targeted_tlb_shootdown(u_int mask, u_int vector, vm_offset_t addr1, vm_offset_t addr2)
9300SN/A{
9310SN/A	int ncpu, othercpus;
9320SN/A
9330SN/A	othercpus = mp_ncpus - 1;
9340SN/A	if (mask == (u_int)-1) {
9350SN/A		ncpu = othercpus;
9360SN/A		if (ncpu < 1)
9370SN/A			return;
9380SN/A	} else {
9390SN/A		mask &= ~PCPU_GET(cpumask);
9400SN/A		if (mask == 0)
9410SN/A			return;
9420SN/A		ncpu = popcnt(mask);
9430SN/A		if (ncpu > othercpus) {
9440SN/A			/* XXX this should be a panic offence */
9450SN/A			printf("SMP: tlb shootdown to %d other cpus (only have %d)\n",
9460SN/A			    ncpu, othercpus);
9470SN/A			ncpu = othercpus;
9480SN/A		}
9490SN/A		/* XXX should be a panic, implied by mask == 0 above */
9500SN/A		if (ncpu < 1)
9510SN/A			return;
9520SN/A	}
9530SN/A	mtx_assert(&smp_tlb_mtx, MA_OWNED);
9540SN/A	smp_tlb_addr1 = addr1;
9550SN/A	smp_tlb_addr2 = addr2;
9560SN/A	atomic_store_rel_int(&smp_tlb_wait, 0);
9570SN/A	if (mask == (u_int)-1)
9580SN/A		ipi_all_but_self(vector);
9590SN/A	else
9600SN/A		ipi_selected(mask, vector);
9610SN/A	while (smp_tlb_wait < ncpu)
9620SN/A		ia32_pause();
9630SN/A}
9640SN/A
9650SN/Avoid
9660SN/Asmp_invltlb(void)
9670SN/A{
9680SN/A	if (smp_started) {
9690SN/A		smp_tlb_shootdown(IPI_INVLTLB, 0, 0);
9700SN/A#ifdef COUNT_XINVLTLB_HITS
9710SN/A		ipi_global++;
9720SN/A#endif
9730SN/A	}
9740SN/A}
9750SN/A
9760SN/Avoid
9777747SN/Asmp_invlpg(vm_offset_t addr)
9780SN/A{
9790SN/A	if (smp_started) {
9800SN/A		smp_tlb_shootdown(IPI_INVLPG, addr, 0);
9810SN/A#ifdef COUNT_XINVLTLB_HITS
9820SN/A		ipi_page++;
9830SN/A#endif
9840SN/A	}
9850SN/A}
9860SN/A
9870SN/Avoid
9880SN/Asmp_invlpg_range(vm_offset_t addr1, vm_offset_t addr2)
9890SN/A{
9900SN/A	if (smp_started) {
9910SN/A		smp_tlb_shootdown(IPI_INVLRNG, addr1, addr2);
9920SN/A#ifdef COUNT_XINVLTLB_HITS
9930SN/A		ipi_range++;
9940SN/A		ipi_range_size += (addr2 - addr1) / PAGE_SIZE;
9950SN/A#endif
9960SN/A	}
9970SN/A}
9980SN/A
9990SN/Avoid
10000SN/Asmp_masked_invltlb(u_int mask)
10010SN/A{
10020SN/A	if (smp_started) {
10030SN/A		smp_targeted_tlb_shootdown(mask, IPI_INVLTLB, 0, 0);
10040SN/A#ifdef COUNT_XINVLTLB_HITS
10050SN/A		ipi_masked_global++;
10060SN/A#endif
10070SN/A	}
10080SN/A}
10090SN/A
10100SN/Avoid
10110SN/Asmp_masked_invlpg(u_int mask, vm_offset_t addr)
10120SN/A{
10130SN/A	if (smp_started) {
10140SN/A		smp_targeted_tlb_shootdown(mask, IPI_INVLPG, addr, 0);
10150SN/A#ifdef COUNT_XINVLTLB_HITS
10160SN/A		ipi_masked_page++;
10170SN/A#endif
10180SN/A	}
10190SN/A}
10200SN/A
10210SN/Avoid
10220SN/Asmp_masked_invlpg_range(u_int mask, vm_offset_t addr1, vm_offset_t addr2)
10230SN/A{
10240SN/A	if (smp_started) {
10250SN/A		smp_targeted_tlb_shootdown(mask, IPI_INVLRNG, addr1, addr2);
10260SN/A#ifdef COUNT_XINVLTLB_HITS
10270SN/A		ipi_masked_range++;
10280SN/A		ipi_masked_range_size += (addr2 - addr1) / PAGE_SIZE;
10290SN/A#endif
10300SN/A	}
10310SN/A}
10320SN/A
10330SN/A
10340SN/A/*
10350SN/A * For statclock, we send an IPI to all CPU's to have them call this
10360SN/A * function.
10370SN/A */
10380SN/Avoid
10390SN/Aforwarded_statclock(struct clockframe frame)
10400SN/A{
10410SN/A	struct thread *td;
10420SN/A
10430SN/A	CTR0(KTR_SMP, "forwarded_statclock");
10440SN/A	td = curthread;
10450SN/A	td->td_intr_nesting_level++;
10460SN/A	if (profprocs != 0)
10470SN/A		profclock(&frame);
10480SN/A	if (pscnt == psdiv)
10490SN/A		statclock(&frame);
10507747SN/A	td->td_intr_nesting_level--;
10510SN/A}
10520SN/A
10530SN/Avoid
10541246SN/Aforward_statclock(void)
10551246SN/A{
10561246SN/A	int map;
10570SN/A
10580SN/A	CTR0(KTR_SMP, "forward_statclock");
10590SN/A
10600SN/A	if (!smp_started || cold || panicstr)
10610SN/A		return;
10621246SN/A
10631246SN/A	map = PCPU_GET(other_cpus) & ~(stopped_cpus|hlt_cpus_mask);
10641246SN/A	if (map != 0)
10650SN/A		ipi_selected(map, IPI_STATCLOCK);
10660SN/A}
10671246SN/A
10681246SN/A/*
10690SN/A * For each hardclock(), we send an IPI to all other CPU's to have them
10700SN/A * execute this function.  It would be nice to reduce contention on
10710SN/A * sched_lock if we could simply peek at the CPU to determine the user/kernel
10720SN/A * state and call hardclock_process() on the CPU receiving the clock interrupt
10730SN/A * and then just use a simple IPI to handle any ast's if needed.
10740SN/A */
10750SN/Avoid
10760SN/Aforwarded_hardclock(struct clockframe frame)
10770SN/A{
10780SN/A	struct thread *td;
10790SN/A
10800SN/A	CTR0(KTR_SMP, "forwarded_hardclock");
10810SN/A	td = curthread;
10820SN/A	td->td_intr_nesting_level++;
10830SN/A	hardclock_process(&frame);
10840SN/A	td->td_intr_nesting_level--;
10850SN/A}
10860SN/A
10870SN/Avoid
10880SN/Aforward_hardclock(void)
10890SN/A{
10900SN/A	u_int map;
10910SN/A
10920SN/A	CTR0(KTR_SMP, "forward_hardclock");
10930SN/A
10940SN/A	if (!smp_started || cold || panicstr)
10958351SN/A		return;
10960SN/A
10977747SN/A	map = PCPU_GET(other_cpus) & ~(stopped_cpus|hlt_cpus_mask);
10980SN/A	if (map != 0)
10990SN/A		ipi_selected(map, IPI_HARDCLOCK);
11000SN/A}
11010SN/A
11020SN/A/*
11030SN/A * send an IPI to a set of cpus.
11040SN/A */
11050SN/Avoid
11067747SN/Aipi_selected(u_int32_t cpus, u_int ipi)
11077747SN/A{
11080SN/A	int cpu;
11090SN/A
11100SN/A	CTR3(KTR_SMP, "%s: cpus: %x ipi: %x", __func__, cpus, ipi);
11110SN/A	while ((cpu = ffs(cpus)) != 0) {
11120SN/A		cpu--;
11130SN/A		KASSERT(cpu_apic_ids[cpu] != -1,
11140SN/A		    ("IPI to non-existent CPU %d", cpu));
11150SN/A		lapic_ipi_vectored(ipi, cpu_apic_ids[cpu]);
11161246SN/A		cpus &= ~(1 << cpu);
11170SN/A	}
11180SN/A}
11190SN/A
11201246SN/A/*
11217747SN/A * send an IPI INTerrupt containing 'vector' to all CPUs, including myself
11220SN/A */
11238707SN/Avoid
11248707SN/Aipi_all(u_int ipi)
11258707SN/A{
11260SN/A
11270SN/A	CTR2(KTR_SMP, "%s: ipi: %x", __func__, ipi);
11280SN/A	lapic_ipi_vectored(ipi, APIC_IPI_DEST_ALL);
11290SN/A}
113011218Sbpb
113111218Sbpb/*
11320SN/A * send an IPI to all CPUs EXCEPT myself
11330SN/A */
11347747SN/Avoid
113511218Sbpbipi_all_but_self(u_int ipi)
11368707SN/A{
11378707SN/A
11388707SN/A	CTR2(KTR_SMP, "%s: ipi: %x", __func__, ipi);
11398707SN/A	lapic_ipi_vectored(ipi, APIC_IPI_DEST_OTHERS);
11408707SN/A}
11418707SN/A
11428707SN/A/*
11438707SN/A * send an IPI to myself
11448707SN/A */
11458707SN/Avoid
11468707SN/Aipi_self(u_int ipi)
11478707SN/A{
11488707SN/A
11498707SN/A	CTR2(KTR_SMP, "%s: ipi: %x", __func__, ipi);
11508707SN/A	lapic_ipi_vectored(ipi, APIC_IPI_DEST_SELF);
11518707SN/A}
11528707SN/A
11538707SN/A/*
11548707SN/A * This is called once the rest of the system is up and running and we're
11550SN/A * ready to let the AP's out of the pen.
11560SN/A */
11570SN/Astatic void
11580SN/Arelease_aps(void *dummy __unused)
11590SN/A{
11600SN/A
116114602Sdarcy	if (mp_ncpus == 1)
116214602Sdarcy		return;
116314602Sdarcy	mtx_lock_spin(&sched_lock);
116414602Sdarcy	atomic_store_rel_int(&aps_ready, 1);
116514602Sdarcy	while (smp_started == 0)
11660SN/A		ia32_pause();
11670SN/A	mtx_unlock_spin(&sched_lock);
11680SN/A}
11690SN/ASYSINIT(start_aps, SI_SUB_SMP, SI_ORDER_FIRST, release_aps, NULL);
11700SN/A
11710SN/Astatic int
11720SN/Asysctl_hlt_cpus(SYSCTL_HANDLER_ARGS)
11730SN/A{
11740SN/A	u_int mask;
11750SN/A	int error;
11760SN/A
11770SN/A	mask = hlt_cpus_mask;
11780SN/A	error = sysctl_handle_int(oidp, &mask, 0, req);
11790SN/A	if (error || !req->newptr)
11800SN/A		return (error);
11810SN/A
11820SN/A	if (logical_cpus_mask != 0 &&
11830SN/A	    (mask & logical_cpus_mask) == logical_cpus_mask)
11840SN/A		hlt_logical_cpus = 1;
11850SN/A	else
11860SN/A		hlt_logical_cpus = 0;
11871246SN/A
11880SN/A	if ((mask & all_cpus) == all_cpus)
11890SN/A		mask &= ~(1<<0);
11900SN/A	hlt_cpus_mask = mask;
11910SN/A	return (error);
11920SN/A}
11930SN/ASYSCTL_PROC(_machdep, OID_AUTO, hlt_cpus, CTLTYPE_INT|CTLFLAG_RW,
11947747SN/A    0, 0, sysctl_hlt_cpus, "IU", "");
11950SN/A
11960SN/Astatic int
11970SN/Asysctl_hlt_logical_cpus(SYSCTL_HANDLER_ARGS)
11980SN/A{
11990SN/A	int disable, error;
12000SN/A
12010SN/A	disable = hlt_logical_cpus;
12020SN/A	error = sysctl_handle_int(oidp, &disable, 0, req);
12030SN/A	if (error || !req->newptr)
12040SN/A		return (error);
12050SN/A
12060SN/A	if (disable)
12070SN/A		hlt_cpus_mask |= logical_cpus_mask;
12080SN/A	else
12090SN/A		hlt_cpus_mask &= ~logical_cpus_mask;
12107747SN/A
12110SN/A	if ((hlt_cpus_mask & all_cpus) == all_cpus)
12120SN/A		hlt_cpus_mask &= ~(1<<0);
12130SN/A
12140SN/A	hlt_logical_cpus = disable;
12150SN/A	return (error);
12160SN/A}
12170SN/A
121812745Smartinstatic void
12190SN/Acpu_hlt_setup(void *dummy __unused)
12200SN/A{
12217495SN/A
12227546SN/A	if (logical_cpus_mask != 0) {
12237546SN/A		TUNABLE_INT_FETCH("machdep.hlt_logical_cpus",
12247546SN/A		    &hlt_logical_cpus);
12257546SN/A		sysctl_ctx_init(&logical_cpu_clist);
12267546SN/A		SYSCTL_ADD_PROC(&logical_cpu_clist,
12277586SN/A		    SYSCTL_STATIC_CHILDREN(_machdep), OID_AUTO,
12287546SN/A		    "hlt_logical_cpus", CTLTYPE_INT|CTLFLAG_RW, 0, 0,
12297546SN/A		    sysctl_hlt_logical_cpus, "IU", "");
12307546SN/A		SYSCTL_ADD_UINT(&logical_cpu_clist,
12317546SN/A		    SYSCTL_STATIC_CHILDREN(_machdep), OID_AUTO,
12327546SN/A		    "logical_cpus_mask", CTLTYPE_INT|CTLFLAG_RD,
12337546SN/A		    &logical_cpus_mask, 0, "");
12347546SN/A
12350SN/A		if (hlt_logical_cpus)
12360SN/A			hlt_cpus_mask |= logical_cpus_mask;
12370SN/A	}
12380SN/A}
12390SN/ASYSINIT(cpu_hlt, SI_SUB_SMP, SI_ORDER_ANY, cpu_hlt_setup, NULL);
12400SN/A
12410SN/Aint
12427546SN/Amp_grab_cpu_hlt(void)
12437546SN/A{
12447546SN/A	u_int mask = PCPU_GET(cpumask);
12457546SN/A	int retval;
12467546SN/A
12477546SN/A	retval = mask & hlt_cpus_mask;
12487586SN/A	while (mask & hlt_cpus_mask)
12497546SN/A		__asm __volatile("sti; hlt" : : : "memory");
12507546SN/A	return (retval);
12517747SN/A}
12527586SN/A