subr_smp.c revision 67365
125164Speter/*
225164Speter * Copyright (c) 1996, by Steve Passe
325164Speter * All rights reserved.
425164Speter *
525164Speter * Redistribution and use in source and binary forms, with or without
625164Speter * modification, are permitted provided that the following conditions
725164Speter * are met:
825164Speter * 1. Redistributions of source code must retain the above copyright
925164Speter *    notice, this list of conditions and the following disclaimer.
1025164Speter * 2. The name of the developer may NOT be used to endorse or promote products
1125164Speter *    derived from this software without specific prior written permission.
1225164Speter *
1325164Speter * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
1425164Speter * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
1525164Speter * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
1625164Speter * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
1725164Speter * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
1825164Speter * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
1925164Speter * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
2025164Speter * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
2125164Speter * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
2225164Speter * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
2325164Speter * SUCH DAMAGE.
2425164Speter *
2550477Speter * $FreeBSD: head/sys/kern/subr_smp.c 67365 2000-10-20 07:58:15Z jhb $
2625164Speter */
2725164Speter
2834197Stegge#include "opt_cpu.h"
2938422Smsmith#include "opt_user_ldt.h"
3025164Speter
3131639Sfsmp#ifdef SMP
3231639Sfsmp#include <machine/smptests.h>
3331639Sfsmp#else
3431639Sfsmp#error
3531639Sfsmp#endif
3631639Sfsmp
3728743Sbde#include <sys/param.h>
3865557Sjasone#include <sys/bus.h>
3925164Speter#include <sys/systm.h>
4028808Speter#include <sys/kernel.h>
4128808Speter#include <sys/proc.h>
4228808Speter#include <sys/sysctl.h>
4346703Speter#include <sys/malloc.h>
4446703Speter#include <sys/memrange.h>
4567365Sjhb#include <sys/mutex.h>
4631639Sfsmp#ifdef BETTER_CLOCK
4731639Sfsmp#include <sys/dkstat.h>
4831639Sfsmp#endif
4949558Sphk#include <sys/cons.h>	/* cngetc() */
5025164Speter
5128808Speter#include <vm/vm.h>
5228808Speter#include <vm/vm_param.h>
5328808Speter#include <vm/pmap.h>
5426812Speter#include <vm/vm_kern.h>
5526812Speter#include <vm/vm_extern.h>
5631639Sfsmp#ifdef BETTER_CLOCK
5731639Sfsmp#include <sys/lock.h>
5831639Sfsmp#include <vm/vm_map.h>
5931639Sfsmp#include <sys/user.h>
6031689Stegge#ifdef GPROF
6131689Stegge#include <sys/gmon.h>
6231639Sfsmp#endif
6331689Stegge#endif
6425164Speter
6525164Speter#include <machine/smp.h>
6625164Speter#include <machine/apic.h>
6748924Smsmith#include <machine/atomic.h>
6848924Smsmith#include <machine/cpufunc.h>
6925164Speter#include <machine/mpapic.h>
7048924Smsmith#include <machine/psl.h>
7125164Speter#include <machine/segments.h>
7227697Sfsmp#include <machine/smptests.h>	/** TEST_DEFAULT_CONFIG, TEST_TEST1 */
7326812Speter#include <machine/tss.h>
7426896Stegge#include <machine/specialreg.h>
7535077Speter#include <machine/globaldata.h>
7625164Speter
7725215Sfsmp#if defined(APIC_IO)
7827289Sfsmp#include <machine/md_var.h>		/* setidt() */
7927289Sfsmp#include <i386/isa/icu.h>		/* IPIs */
8027289Sfsmp#include <i386/isa/intr_machdep.h>	/* IPIs */
8125215Sfsmp#endif	/* APIC_IO */
8225164Speter
8328027Sfsmp#if defined(TEST_DEFAULT_CONFIG)
8428027Sfsmp#define MPFPS_MPFB1	TEST_DEFAULT_CONFIG
8528027Sfsmp#else
8628027Sfsmp#define MPFPS_MPFB1	mpfps->mpfb1
8728027Sfsmp#endif  /* TEST_DEFAULT_CONFIG */
8828027Sfsmp
8927005Sfsmp#define WARMBOOT_TARGET		0
9027005Sfsmp#define WARMBOOT_OFF		(KERNBASE + 0x0467)
9127005Sfsmp#define WARMBOOT_SEG		(KERNBASE + 0x0469)
9225164Speter
9340067Skato#ifdef PC98
9440067Skato#define BIOS_BASE		(0xe8000)
9540067Skato#define BIOS_SIZE		(0x18000)
9640067Skato#else
9727005Sfsmp#define BIOS_BASE		(0xf0000)
9827005Sfsmp#define BIOS_SIZE		(0x10000)
9940067Skato#endif
10027005Sfsmp#define BIOS_COUNT		(BIOS_SIZE/4)
10125164Speter
10227005Sfsmp#define CMOS_REG		(0x70)
10327005Sfsmp#define CMOS_DATA		(0x71)
10427005Sfsmp#define BIOS_RESET		(0x0f)
10527005Sfsmp#define BIOS_WARM		(0x0a)
10625164Speter
10726155Sfsmp#define PROCENTRY_FLAG_EN	0x01
10826155Sfsmp#define PROCENTRY_FLAG_BP	0x02
10926155Sfsmp#define IOAPICENTRY_FLAG_EN	0x01
11026155Sfsmp
11127005Sfsmp
11226155Sfsmp/* MP Floating Pointer Structure */
11326155Sfsmptypedef struct MPFPS {
11426155Sfsmp	char    signature[4];
11526155Sfsmp	void   *pap;
11626155Sfsmp	u_char  length;
11726155Sfsmp	u_char  spec_rev;
11826155Sfsmp	u_char  checksum;
11926155Sfsmp	u_char  mpfb1;
12026155Sfsmp	u_char  mpfb2;
12126155Sfsmp	u_char  mpfb3;
12226155Sfsmp	u_char  mpfb4;
12326155Sfsmp	u_char  mpfb5;
12426155Sfsmp}      *mpfps_t;
12526155Sfsmp
12626155Sfsmp/* MP Configuration Table Header */
12726155Sfsmptypedef struct MPCTH {
12826155Sfsmp	char    signature[4];
12926155Sfsmp	u_short base_table_length;
13026155Sfsmp	u_char  spec_rev;
13126155Sfsmp	u_char  checksum;
13226155Sfsmp	u_char  oem_id[8];
13326155Sfsmp	u_char  product_id[12];
13426155Sfsmp	void   *oem_table_pointer;
13526155Sfsmp	u_short oem_table_size;
13626155Sfsmp	u_short entry_count;
13726155Sfsmp	void   *apic_address;
13826155Sfsmp	u_short extended_table_length;
13926155Sfsmp	u_char  extended_table_checksum;
14026155Sfsmp	u_char  reserved;
14126155Sfsmp}      *mpcth_t;
14226155Sfsmp
14326155Sfsmp
14426155Sfsmptypedef struct PROCENTRY {
14526155Sfsmp	u_char  type;
14626155Sfsmp	u_char  apic_id;
14726155Sfsmp	u_char  apic_version;
14826155Sfsmp	u_char  cpu_flags;
14926155Sfsmp	u_long  cpu_signature;
15026155Sfsmp	u_long  feature_flags;
15126155Sfsmp	u_long  reserved1;
15226155Sfsmp	u_long  reserved2;
15326155Sfsmp}      *proc_entry_ptr;
15426155Sfsmp
15526155Sfsmptypedef struct BUSENTRY {
15626155Sfsmp	u_char  type;
15726155Sfsmp	u_char  bus_id;
15826155Sfsmp	char    bus_type[6];
15926155Sfsmp}      *bus_entry_ptr;
16026155Sfsmp
16126155Sfsmptypedef struct IOAPICENTRY {
16226155Sfsmp	u_char  type;
16326155Sfsmp	u_char  apic_id;
16426155Sfsmp	u_char  apic_version;
16526155Sfsmp	u_char  apic_flags;
16626155Sfsmp	void   *apic_address;
16726155Sfsmp}      *io_apic_entry_ptr;
16826155Sfsmp
16926155Sfsmptypedef struct INTENTRY {
17026155Sfsmp	u_char  type;
17126155Sfsmp	u_char  int_type;
17226155Sfsmp	u_short int_flags;
17326155Sfsmp	u_char  src_bus_id;
17426155Sfsmp	u_char  src_bus_irq;
17526155Sfsmp	u_char  dst_apic_id;
17626155Sfsmp	u_char  dst_apic_int;
17726155Sfsmp}      *int_entry_ptr;
17826155Sfsmp
17926155Sfsmp/* descriptions of MP basetable entries */
18026155Sfsmptypedef struct BASETABLE_ENTRY {
18126155Sfsmp	u_char  type;
18226155Sfsmp	u_char  length;
18326155Sfsmp	char    name[16];
18426155Sfsmp}       basetable_entry;
18526155Sfsmp
18625164Speter/*
18725164Speter * this code MUST be enabled here and in mpboot.s.
18825164Speter * it follows the very early stages of AP boot by placing values in CMOS ram.
18925164Speter * it NORMALLY will never be needed and thus the primitive method for enabling.
19025164Speter *
19125164Speter#define CHECK_POINTS
19225164Speter */
19325164Speter
19440169Skato#if defined(CHECK_POINTS) && !defined(PC98)
19525164Speter#define CHECK_READ(A)	 (outb(CMOS_REG, (A)), inb(CMOS_DATA))
19625164Speter#define CHECK_WRITE(A,D) (outb(CMOS_REG, (A)), outb(CMOS_DATA, (D)))
19725164Speter
19825164Speter#define CHECK_INIT(D);				\
19925164Speter	CHECK_WRITE(0x34, (D));			\
20025164Speter	CHECK_WRITE(0x35, (D));			\
20125164Speter	CHECK_WRITE(0x36, (D));			\
20225164Speter	CHECK_WRITE(0x37, (D));			\
20325164Speter	CHECK_WRITE(0x38, (D));			\
20425164Speter	CHECK_WRITE(0x39, (D));
20525164Speter
20625164Speter#define CHECK_PRINT(S);				\
20725164Speter	printf("%s: %d, %d, %d, %d, %d, %d\n",	\
20825164Speter	   (S),					\
20925164Speter	   CHECK_READ(0x34),			\
21025164Speter	   CHECK_READ(0x35),			\
21125164Speter	   CHECK_READ(0x36),			\
21225164Speter	   CHECK_READ(0x37),			\
21325164Speter	   CHECK_READ(0x38),			\
21425164Speter	   CHECK_READ(0x39));
21525164Speter
21625164Speter#else				/* CHECK_POINTS */
21725164Speter
21825164Speter#define CHECK_INIT(D)
21925164Speter#define CHECK_PRINT(S)
22025164Speter
22125164Speter#endif				/* CHECK_POINTS */
22225164Speter
22327005Sfsmp/*
22427005Sfsmp * Values to send to the POST hardware.
22527005Sfsmp */
22627005Sfsmp#define MP_BOOTADDRESS_POST	0x10
22727005Sfsmp#define MP_PROBE_POST		0x11
22829213Sfsmp#define MPTABLE_PASS1_POST	0x12
22929213Sfsmp
23029213Sfsmp#define MP_START_POST		0x13
23129213Sfsmp#define MP_ENABLE_POST		0x14
23227005Sfsmp#define MPTABLE_PASS2_POST	0x15
23327005Sfsmp
23429213Sfsmp#define START_ALL_APS_POST	0x16
23529213Sfsmp#define INSTALL_AP_TRAMP_POST	0x17
23629213Sfsmp#define START_AP_POST		0x18
23729213Sfsmp
23829213Sfsmp#define MP_ANNOUNCE_POST	0x19
23929213Sfsmp
24065557Sjasone/* used to hold the AP's until we are ready to release them */
24165557Sjasonestruct simplelock	ap_boot_lock;
24229213Sfsmp
24327289Sfsmp/** XXX FIXME: where does this really belong, isa.h/isa.c perhaps? */
24427255Sfsmpint	current_postcode;
24527255Sfsmp
24627289Sfsmp/** XXX FIXME: what system files declare these??? */
24725164Speterextern struct region_descriptor r_gdt, r_idt;
24825164Speter
24928669Sfsmpint	bsp_apic_ready = 0;	/* flags useability of BSP apic */
25026155Sfsmpint	mp_ncpus;		/* # of CPUs, including BSP */
25126155Sfsmpint	mp_naps;		/* # of Applications processors */
25226155Sfsmpint	mp_nbusses;		/* # of busses */
25326155Sfsmpint	mp_napics;		/* # of IO APICs */
25426155Sfsmpint	boot_cpu_id;		/* designated BSP */
25525164Spetervm_offset_t cpu_apic_address;
25626108Sfsmpvm_offset_t io_apic_address[NAPICID];	/* NAPICID is more than enough */
25729655Sdysonextern	int nkpt;
25825164Speter
25966277Spsu_int32_t cpu_apic_versions[MAXCPU];
26066277Spsu_int32_t *io_apic_versions;
26125164Speter
26234021Stegge#ifdef APIC_INTR_DIAGNOSTIC
26334021Steggeint apic_itrace_enter[32];
26434021Steggeint apic_itrace_tryisrlock[32];
26534021Steggeint apic_itrace_gotisrlock[32];
26634021Steggeint apic_itrace_active[32];
26734021Steggeint apic_itrace_masked[32];
26834021Steggeint apic_itrace_noisrlock[32];
26934021Steggeint apic_itrace_masked2[32];
27034021Steggeint apic_itrace_unmask[32];
27134021Steggeint apic_itrace_noforward[32];
27234021Steggeint apic_itrace_leave[32];
27334021Steggeint apic_itrace_enter2[32];
27434021Steggeint apic_itrace_doreti[32];
27534021Steggeint apic_itrace_splz[32];
27634021Steggeint apic_itrace_eoi[32];
27734021Stegge#ifdef APIC_INTR_DIAGNOSTIC_IRQ
27834021Steggeunsigned short apic_itrace_debugbuffer[32768];
27934021Steggeint apic_itrace_debugbuffer_idx;
28034021Steggestruct simplelock apic_itrace_debuglock;
28134021Stegge#endif
28234021Stegge#endif
28334021Stegge
28434021Stegge#ifdef APIC_INTR_REORDER
28534021Steggestruct {
28634021Stegge	volatile int *location;
28734021Stegge	int bit;
28834021Stegge} apic_isrbit_location[32];
28934021Stegge#endif
29034021Stegge
29138888Steggestruct apic_intmapinfo	int_to_apicintpin[APIC_INTMAPSIZE];
29238888Stegge
29325164Speter/*
29426108Sfsmp * APIC ID logical/physical mapping structures.
29526108Sfsmp * We oversize these to simplify boot-time config.
29625164Speter */
29726108Sfsmpint     cpu_num_to_apic_id[NAPICID];
29826108Sfsmpint     io_num_to_apic_id[NAPICID];
29925164Speterint     apic_id_to_logical[NAPICID];
30025164Speter
30130112Sdyson
30227005Sfsmp/* Bitmap of all available CPUs */
30327005Sfsmpu_int	all_cpus;
30427005Sfsmp
30546129Sluoqi/* AP uses this during bootstrap.  Do not staticize.  */
30646129Sluoqichar *bootSTK;
30748144Sluoqistatic int bootAP;
30826812Speter
30926812Speter/* Hotwire a 0->4MB V==P mapping */
31028808Speterextern pt_entry_t *KPTphys;
31126812Speter
31246129Sluoqi/* SMP page table page */
31346129Sluoqiextern pt_entry_t *SMPpt;
31426812Speter
31566277Spsstruct pcb stoppcbs[MAXCPU];
31636135Stegge
31738888Steggeint smp_started;		/* has the system started? */
31828809Speter
31925164Speter/*
32027289Sfsmp * Local data and functions.
32125164Speter */
32225164Speter
32326155Sfsmpstatic int	mp_capable;
32426155Sfsmpstatic u_int	boot_address;
32526155Sfsmpstatic u_int	base_memory;
32625164Speter
32726155Sfsmpstatic int	picmode;		/* 0: virtual wire mode, 1: PIC mode */
32826155Sfsmpstatic mpfps_t	mpfps;
32926155Sfsmpstatic int	search_for_sig(u_int32_t target, int count);
33026155Sfsmpstatic void	mp_enable(u_int boot_addr);
33125164Speter
33266277Spsstatic void	mptable_pass1(void);
33326155Sfsmpstatic int	mptable_pass2(void);
33426155Sfsmpstatic void	default_mp_table(int type);
33528027Sfsmpstatic void	fix_mp_table(void);
33638888Steggestatic void	setup_apic_irq_mapping(void);
33727634Sfsmpstatic void	init_locks(void);
33826155Sfsmpstatic int	start_all_aps(u_int boot_addr);
33926155Sfsmpstatic void	install_ap_tramp(u_int boot_addr);
34026155Sfsmpstatic int	start_ap(int logicalCpu, u_int boot_addr);
34155420Steggestatic int	apic_int_is_bus_type(int intr, int bus_type);
34265557Sjasonestatic void	release_aps(void *dummy);
34325164Speter
34425164Speter/*
34527289Sfsmp * Calculate usable address in base memory for AP trampoline code.
34625164Speter */
34725164Speteru_int
34825164Spetermp_bootaddress(u_int basemem)
34925164Speter{
35027005Sfsmp	POSTCODE(MP_BOOTADDRESS_POST);
35127005Sfsmp
35225164Speter	base_memory = basemem * 1024;	/* convert to bytes */
35325164Speter
35425164Speter	boot_address = base_memory & ~0xfff;	/* round down to 4k boundary */
35525164Speter	if ((base_memory - boot_address) < bootMP_size)
35625164Speter		boot_address -= 4096;	/* not enough, lower by 4k */
35725164Speter
35825164Speter	return boot_address;
35925164Speter}
36025164Speter
36125164Speter
36227289Sfsmp/*
36327289Sfsmp * Look for an Intel MP spec table (ie, SMP capable hardware).
36427289Sfsmp */
36526155Sfsmpint
36626155Sfsmpmp_probe(void)
36726155Sfsmp{
36826155Sfsmp	int     x;
36926155Sfsmp	u_long  segment;
37026155Sfsmp	u_int32_t target;
37126155Sfsmp
37227005Sfsmp	POSTCODE(MP_PROBE_POST);
37327005Sfsmp
37426155Sfsmp	/* see if EBDA exists */
37543314Sdillon	if ((segment = (u_long) * (u_short *) (KERNBASE + 0x40e)) != 0) {
37626155Sfsmp		/* search first 1K of EBDA */
37726155Sfsmp		target = (u_int32_t) (segment << 4);
37826155Sfsmp		if ((x = search_for_sig(target, 1024 / 4)) >= 0)
37926155Sfsmp			goto found;
38026155Sfsmp	} else {
38126155Sfsmp		/* last 1K of base memory, effective 'top of base' passed in */
38226155Sfsmp		target = (u_int32_t) (base_memory - 0x400);
38326155Sfsmp		if ((x = search_for_sig(target, 1024 / 4)) >= 0)
38426155Sfsmp			goto found;
38526155Sfsmp	}
38626155Sfsmp
38726155Sfsmp	/* search the BIOS */
38826155Sfsmp	target = (u_int32_t) BIOS_BASE;
38926155Sfsmp	if ((x = search_for_sig(target, BIOS_COUNT)) >= 0)
39026155Sfsmp		goto found;
39126155Sfsmp
39226155Sfsmp	/* nothing found */
39326155Sfsmp	mpfps = (mpfps_t)0;
39426155Sfsmp	mp_capable = 0;
39526155Sfsmp	return 0;
39626155Sfsmp
39727289Sfsmpfound:
39826155Sfsmp	/* calculate needed resources */
39926155Sfsmp	mpfps = (mpfps_t)x;
40066277Sps	mptable_pass1();
40126155Sfsmp
40226155Sfsmp	/* flag fact that we are running multiple processors */
40326155Sfsmp	mp_capable = 1;
40426155Sfsmp	return 1;
40526155Sfsmp}
40626155Sfsmp
40726155Sfsmp
40825164Speter/*
40965557Sjasone * Initialize the SMP hardware and the APIC and start up the AP's.
41025164Speter */
41125164Spetervoid
41225164Spetermp_start(void)
41325164Speter{
41427005Sfsmp	POSTCODE(MP_START_POST);
41527005Sfsmp
41625164Speter	/* look for MP capable motherboard */
41726155Sfsmp	if (mp_capable)
41825164Speter		mp_enable(boot_address);
41926101Sfsmp	else
42026155Sfsmp		panic("MP hardware not found!");
42125164Speter}
42225164Speter
42325164Speter
42425164Speter/*
42527289Sfsmp * Print various information about the SMP system hardware and setup.
42625164Speter */
42725164Spetervoid
42825164Spetermp_announce(void)
42925164Speter{
43025164Speter	int     x;
43125164Speter
43227005Sfsmp	POSTCODE(MP_ANNOUNCE_POST);
43327005Sfsmp
43425164Speter	printf("FreeBSD/SMP: Multiprocessor motherboard\n");
43527489Sfsmp	printf(" cpu0 (BSP): apic id: %2d", CPU_TO_ID(0));
43626812Speter	printf(", version: 0x%08x", cpu_apic_versions[0]);
43726812Speter	printf(", at 0x%08x\n", cpu_apic_address);
43825164Speter	for (x = 1; x <= mp_naps; ++x) {
43927561Sfsmp		printf(" cpu%d (AP):  apic id: %2d", x, CPU_TO_ID(x));
44026812Speter		printf(", version: 0x%08x", cpu_apic_versions[x]);
44126812Speter		printf(", at 0x%08x\n", cpu_apic_address);
44225164Speter	}
44325164Speter
44425164Speter#if defined(APIC_IO)
44525164Speter	for (x = 0; x < mp_napics; ++x) {
44627489Sfsmp		printf(" io%d (APIC): apic id: %2d", x, IO_TO_ID(x));
44726812Speter		printf(", version: 0x%08x", io_apic_versions[x]);
44826812Speter		printf(", at 0x%08x\n", io_apic_address[x]);
44925164Speter	}
45025164Speter#else
45125164Speter	printf(" Warning: APIC I/O disabled\n");
45225164Speter#endif	/* APIC_IO */
45325164Speter}
45425164Speter
45525164Speter/*
45625164Speter * AP cpu's call this to sync up protected mode.
45725164Speter */
45825164Spetervoid
45925164Speterinit_secondary(void)
46025164Speter{
46129663Speter	int	gsel_tss;
46248144Sluoqi	int	x, myid = bootAP;
46325164Speter
46446129Sluoqi	gdt_segs[GPRIV_SEL].ssd_base = (int) &SMP_prvspace[myid];
46546129Sluoqi	gdt_segs[GPROC0_SEL].ssd_base =
46646129Sluoqi		(int) &SMP_prvspace[myid].globaldata.gd_common_tss;
46746129Sluoqi	SMP_prvspace[myid].globaldata.gd_prvspace = &SMP_prvspace[myid];
46846129Sluoqi
46946129Sluoqi	for (x = 0; x < NGDT; x++) {
47046129Sluoqi		ssdtosd(&gdt_segs[x], &gdt[myid * NGDT + x].sd);
47146129Sluoqi	}
47246129Sluoqi
47346129Sluoqi	r_gdt.rd_limit = NGDT * sizeof(gdt[0]) - 1;
47446129Sluoqi	r_gdt.rd_base = (int) &gdt[myid * NGDT];
47527289Sfsmp	lgdt(&r_gdt);			/* does magic intra-segment return */
47646129Sluoqi
47725164Speter	lidt(&r_idt);
47846129Sluoqi
47925164Speter	lldt(_default_ldt);
48038422Smsmith#ifdef USER_LDT
48165597Sjake	PCPU_SET(currentldt, _default_ldt);
48238422Smsmith#endif
48325164Speter
48446129Sluoqi	gsel_tss = GSEL(GPROC0_SEL, SEL_KPL);
48546129Sluoqi	gdt[myid * NGDT + GPROC0_SEL].sd.sd_type = SDT_SYS386TSS;
48626812Speter	common_tss.tss_esp0 = 0;	/* not used until after switch */
48726812Speter	common_tss.tss_ss0 = GSEL(GDATA_SEL, SEL_KPL);
48826812Speter	common_tss.tss_ioopt = (sizeof common_tss) << 16;
48947081Sluoqi	tss_gdt = &gdt[myid * NGDT + GPROC0_SEL].sd;
49047081Sluoqi	common_tssd = *tss_gdt;
49125164Speter	ltr(gsel_tss);
49225164Speter
49348144Sluoqi	pmap_set_opt();
49425164Speter}
49525164Speter
49625164Speter
49725164Speter#if defined(APIC_IO)
49827289Sfsmp/*
49927289Sfsmp * Final configuration of the BSP's local APIC:
50027289Sfsmp *  - disable 'pic mode'.
50127289Sfsmp *  - disable 'virtual wire mode'.
50227289Sfsmp *  - enable NMI.
50327289Sfsmp */
50425164Spetervoid
50527289Sfsmpbsp_apic_configure(void)
50625164Speter{
50727289Sfsmp	u_char		byte;
50827289Sfsmp	u_int32_t	temp;
50925164Speter
51027289Sfsmp	/* leave 'pic mode' if necessary */
51125164Speter	if (picmode) {
51225164Speter		outb(0x22, 0x70);	/* select IMCR */
51325164Speter		byte = inb(0x23);	/* current contents */
51427289Sfsmp		byte |= 0x01;		/* mask external INTR */
51525164Speter		outb(0x23, byte);	/* disconnect 8259s/NMI */
51625164Speter	}
51727001Sfsmp
51827001Sfsmp	/* mask lint0 (the 8259 'virtual wire' connection) */
51926812Speter	temp = lapic.lvt_lint0;
52027289Sfsmp	temp |= APIC_LVT_M;		/* set the mask */
52126812Speter	lapic.lvt_lint0 = temp;
52227001Sfsmp
52327001Sfsmp        /* setup lint1 to handle NMI */
52427001Sfsmp        temp = lapic.lvt_lint1;
52527289Sfsmp        temp &= ~APIC_LVT_M;		/* clear the mask */
52627289Sfsmp        lapic.lvt_lint1 = temp;
52727001Sfsmp
52827353Sfsmp	if (bootverbose)
52927561Sfsmp		apic_dump("bsp_apic_configure()");
53025164Speter}
53127005Sfsmp#endif  /* APIC_IO */
53225164Speter
53325164Speter
53425164Speter/*******************************************************************
53525164Speter * local functions and data
53625164Speter */
53725164Speter
53825164Speter/*
53925164Speter * start the SMP system
54025164Speter */
54125164Speterstatic void
54225164Spetermp_enable(u_int boot_addr)
54325164Speter{
54425164Speter	int     x;
54525164Speter#if defined(APIC_IO)
54625164Speter	int     apic;
54725164Speter	u_int   ux;
54825164Speter#endif	/* APIC_IO */
54925164Speter
55027005Sfsmp	POSTCODE(MP_ENABLE_POST);
55127005Sfsmp
55227289Sfsmp	/* turn on 4MB of V == P addressing so we can get to MP table */
55338349Sbde	*(int *)PTD = PG_V | PG_RW | ((uintptr_t)(void *)KPTphys & PG_FRAME);
55426812Speter	invltlb();
55526812Speter
55626812Speter	/* examine the MP table for needed info, uses physical addresses */
55726155Sfsmp	x = mptable_pass2();
55825164Speter
55926812Speter	*(int *)PTD = 0;
56026812Speter	invltlb();
56125164Speter
56225164Speter	/* can't process default configs till the CPU APIC is pmapped */
56325164Speter	if (x)
56425164Speter		default_mp_table(x);
56525164Speter
56628027Sfsmp	/* post scan cleanup */
56728027Sfsmp	fix_mp_table();
56838888Stegge	setup_apic_irq_mapping();
56928027Sfsmp
57025164Speter#if defined(APIC_IO)
57127353Sfsmp
57225164Speter	/* fill the LOGICAL io_apic_versions table */
57325164Speter	for (apic = 0; apic < mp_napics; ++apic) {
57425164Speter		ux = io_apic_read(apic, IOAPIC_VER);
57525164Speter		io_apic_versions[apic] = ux;
57661136Smsmith		io_apic_set_id(apic, IO_TO_ID(apic));
57725164Speter	}
57825164Speter
57925216Sfsmp	/* program each IO APIC in the system */
58025164Speter	for (apic = 0; apic < mp_napics; ++apic)
58126266Speter		if (io_apic_setup(apic) < 0)
58226266Speter			panic("IO APIC setup failure");
58325164Speter
58427353Sfsmp	/* install a 'Spurious INTerrupt' vector */
58527353Sfsmp	setidt(XSPURIOUSINT_OFFSET, Xspuriousint,
58627353Sfsmp	       SDT_SYS386IGT, SEL_KPL, GSEL(GCODE_SEL, SEL_KPL));
58727353Sfsmp
58825204Sfsmp	/* install an inter-CPU IPI for TLB invalidation */
58927005Sfsmp	setidt(XINVLTLB_OFFSET, Xinvltlb,
59025216Sfsmp	       SDT_SYS386IGT, SEL_KPL, GSEL(GCODE_SEL, SEL_KPL));
59127005Sfsmp
59231639Sfsmp#ifdef BETTER_CLOCK
59331639Sfsmp	/* install an inter-CPU IPI for reading processor state */
59431639Sfsmp	setidt(XCPUCHECKSTATE_OFFSET, Xcpucheckstate,
59531639Sfsmp	       SDT_SYS386IGT, SEL_KPL, GSEL(GCODE_SEL, SEL_KPL));
59634020Stegge#endif
59731639Sfsmp
59848924Smsmith	/* install an inter-CPU IPI for all-CPU rendezvous */
59948924Smsmith	setidt(XRENDEZVOUS_OFFSET, Xrendezvous,
60048924Smsmith	       SDT_SYS386IGT, SEL_KPL, GSEL(GCODE_SEL, SEL_KPL));
60148924Smsmith
60231639Sfsmp	/* install an inter-CPU IPI for forcing an additional software trap */
60331639Sfsmp	setidt(XCPUAST_OFFSET, Xcpuast,
60431639Sfsmp	       SDT_SYS386IGT, SEL_KPL, GSEL(GCODE_SEL, SEL_KPL));
60531639Sfsmp
60634021Stegge	/* install an inter-CPU IPI for interrupt forwarding */
60734021Stegge	setidt(XFORWARD_IRQ_OFFSET, Xforward_irq,
60834021Stegge	       SDT_SYS386IGT, SEL_KPL, GSEL(GCODE_SEL, SEL_KPL));
60934021Stegge
61027005Sfsmp	/* install an inter-CPU IPI for CPU stop/restart */
61127005Sfsmp	setidt(XCPUSTOP_OFFSET, Xcpustop,
61227005Sfsmp	       SDT_SYS386IGT, SEL_KPL, GSEL(GCODE_SEL, SEL_KPL));
61327255Sfsmp
61427353Sfsmp#if defined(TEST_TEST1)
61527517Sfsmp	/* install a "fake hardware INTerrupt" vector */
61627353Sfsmp	setidt(XTEST1_OFFSET, Xtest1,
61727255Sfsmp	       SDT_SYS386IGT, SEL_KPL, GSEL(GCODE_SEL, SEL_KPL));
61827353Sfsmp#endif  /** TEST_TEST1 */
61927353Sfsmp
62025164Speter#endif	/* APIC_IO */
62125164Speter
62227634Sfsmp	/* initialize all SMP locks */
62327634Sfsmp	init_locks();
62427634Sfsmp
62565557Sjasone	/* obtain the ap_boot_lock */
62665557Sjasone	s_lock(&ap_boot_lock);
62765557Sjasone
62825164Speter	/* start each Application Processor */
62925164Speter	start_all_aps(boot_addr);
63025164Speter}
63125164Speter
63225164Speter
63325164Speter/*
63425164Speter * look for the MP spec signature
63525164Speter */
63625164Speter
63725164Speter/* string defined by the Intel MP Spec as identifying the MP table */
63825164Speter#define MP_SIG		0x5f504d5f	/* _MP_ */
63925164Speter#define NEXT(X)		((X) += 4)
64025164Speterstatic int
64125164Spetersearch_for_sig(u_int32_t target, int count)
64225164Speter{
64325164Speter	int     x;
64425164Speter	u_int32_t *addr = (u_int32_t *) (KERNBASE + target);
64525164Speter
64625164Speter	for (x = 0; x < count; NEXT(x))
64725164Speter		if (addr[x] == MP_SIG)
64825164Speter			/* make array index a byte index */
64925164Speter			return (target + (x * sizeof(u_int32_t)));
65025164Speter
65125164Speter	return -1;
65225164Speter}
65325164Speter
65425164Speter
65525164Speterstatic basetable_entry basetable_entry_types[] =
65625164Speter{
65725164Speter	{0, 20, "Processor"},
65825164Speter	{1, 8, "Bus"},
65925164Speter	{2, 8, "I/O APIC"},
66025164Speter	{3, 8, "I/O INT"},
66125164Speter	{4, 8, "Local INT"}
66225164Speter};
66325164Speter
66425164Spetertypedef struct BUSDATA {
66525164Speter	u_char  bus_id;
66625164Speter	enum busTypes bus_type;
66725164Speter}       bus_datum;
66825164Speter
66925164Spetertypedef struct INTDATA {
67025164Speter	u_char  int_type;
67125164Speter	u_short int_flags;
67225164Speter	u_char  src_bus_id;
67325164Speter	u_char  src_bus_irq;
67425164Speter	u_char  dst_apic_id;
67525164Speter	u_char  dst_apic_int;
67638888Stegge	u_char	int_vector;
67725164Speter}       io_int, local_int;
67825164Speter
67925164Spetertypedef struct BUSTYPENAME {
68025164Speter	u_char  type;
68125164Speter	char    name[7];
68225164Speter}       bus_type_name;
68325164Speter
68425164Speterstatic bus_type_name bus_type_table[] =
68525164Speter{
68625164Speter	{CBUS, "CBUS"},
68725164Speter	{CBUSII, "CBUSII"},
68825164Speter	{EISA, "EISA"},
68955891Smdodd	{MCA, "MCA"},
69025164Speter	{UNKNOWN_BUSTYPE, "---"},
69125164Speter	{ISA, "ISA"},
69255891Smdodd	{MCA, "MCA"},
69325164Speter	{UNKNOWN_BUSTYPE, "---"},
69425164Speter	{UNKNOWN_BUSTYPE, "---"},
69525164Speter	{UNKNOWN_BUSTYPE, "---"},
69625164Speter	{UNKNOWN_BUSTYPE, "---"},
69725164Speter	{UNKNOWN_BUSTYPE, "---"},
69825164Speter	{PCI, "PCI"},
69925164Speter	{UNKNOWN_BUSTYPE, "---"},
70025164Speter	{UNKNOWN_BUSTYPE, "---"},
70125164Speter	{UNKNOWN_BUSTYPE, "---"},
70225164Speter	{UNKNOWN_BUSTYPE, "---"},
70325164Speter	{XPRESS, "XPRESS"},
70425164Speter	{UNKNOWN_BUSTYPE, "---"}
70525164Speter};
70625164Speter/* from MP spec v1.4, table 5-1 */
70725164Speterstatic int default_data[7][5] =
70825164Speter{
70925164Speter/*   nbus, id0, type0, id1, type1 */
71025164Speter	{1, 0, ISA, 255, 255},
71125164Speter	{1, 0, EISA, 255, 255},
71225164Speter	{1, 0, EISA, 255, 255},
71355891Smdodd	{1, 0, MCA, 255, 255},
71425164Speter	{2, 0, ISA, 1, PCI},
71525164Speter	{2, 0, EISA, 1, PCI},
71655891Smdodd	{2, 0, MCA, 1, PCI}
71725164Speter};
71825164Speter
71925164Speter
72025164Speter/* the bus data */
72166277Spsstatic bus_datum *bus_data;
72225164Speter
72325164Speter/* the IO INT data, one entry per possible APIC INTerrupt */
72466277Spsstatic io_int  *io_apic_ints;
72525164Speter
72625164Speterstatic int nintrs;
72725164Speter
72826108Sfsmpstatic int processor_entry	__P((proc_entry_ptr entry, int cpu));
72926108Sfsmpstatic int bus_entry		__P((bus_entry_ptr entry, int bus));
73026108Sfsmpstatic int io_apic_entry	__P((io_apic_entry_ptr entry, int apic));
73126108Sfsmpstatic int int_entry		__P((int_entry_ptr entry, int intr));
73226108Sfsmpstatic int lookup_bus_type	__P((char *name));
73325164Speter
73425164Speter
73525164Speter/*
73626155Sfsmp * 1st pass on motherboard's Intel MP specification table.
73726155Sfsmp *
73826155Sfsmp * initializes:
73926155Sfsmp *	mp_ncpus = 1
74026155Sfsmp *
74126155Sfsmp * determines:
74226155Sfsmp *	cpu_apic_address (common to all CPUs)
74326155Sfsmp *	io_apic_address[N]
74426155Sfsmp *	mp_naps
74526155Sfsmp *	mp_nbusses
74626155Sfsmp *	mp_napics
74726155Sfsmp *	nintrs
74825164Speter */
74966277Spsstatic void
75026155Sfsmpmptable_pass1(void)
75125164Speter{
75226108Sfsmp	int	x;
75326108Sfsmp	mpcth_t	cth;
75426108Sfsmp	int	totalSize;
75526108Sfsmp	void*	position;
75626108Sfsmp	int	count;
75726108Sfsmp	int	type;
75825164Speter
75927005Sfsmp	POSTCODE(MPTABLE_PASS1_POST);
76027005Sfsmp
76126155Sfsmp	/* clear various tables */
76226155Sfsmp	for (x = 0; x < NAPICID; ++x) {
76326155Sfsmp		io_apic_address[x] = ~0;	/* IO APIC address table */
76426155Sfsmp	}
76525164Speter
76626108Sfsmp	/* init everything to empty */
76726108Sfsmp	mp_naps = 0;
76826108Sfsmp	mp_nbusses = 0;
76926108Sfsmp	mp_napics = 0;
77026108Sfsmp	nintrs = 0;
77126108Sfsmp
77226108Sfsmp	/* check for use of 'default' configuration */
77328027Sfsmp	if (MPFPS_MPFB1 != 0) {
77426108Sfsmp		/* use default addresses */
77526108Sfsmp		cpu_apic_address = DEFAULT_APIC_BASE;
77626108Sfsmp		io_apic_address[0] = DEFAULT_IO_APIC_BASE;
77726108Sfsmp
77826108Sfsmp		/* fill in with defaults */
77926882Sfsmp		mp_naps = 2;		/* includes BSP */
78028027Sfsmp		mp_nbusses = default_data[MPFPS_MPFB1 - 1][0];
78126108Sfsmp#if defined(APIC_IO)
78226108Sfsmp		mp_napics = 1;
78326108Sfsmp		nintrs = 16;
78426108Sfsmp#endif	/* APIC_IO */
78526108Sfsmp	}
78626108Sfsmp	else {
78726155Sfsmp		if ((cth = mpfps->pap) == 0)
78826108Sfsmp			panic("MP Configuration Table Header MISSING!");
78926108Sfsmp
79026108Sfsmp		cpu_apic_address = (vm_offset_t) cth->apic_address;
79126108Sfsmp
79226108Sfsmp		/* walk the table, recording info of interest */
79326108Sfsmp		totalSize = cth->base_table_length - sizeof(struct MPCTH);
79426108Sfsmp		position = (u_char *) cth + sizeof(struct MPCTH);
79526108Sfsmp		count = cth->entry_count;
79626108Sfsmp
79726108Sfsmp		while (count--) {
79826108Sfsmp			switch (type = *(u_char *) position) {
79926108Sfsmp			case 0: /* processor_entry */
80026108Sfsmp				if (((proc_entry_ptr)position)->cpu_flags
80126108Sfsmp					& PROCENTRY_FLAG_EN)
80226108Sfsmp					++mp_naps;
80326108Sfsmp				break;
80426108Sfsmp			case 1: /* bus_entry */
80526108Sfsmp				++mp_nbusses;
80626108Sfsmp				break;
80726108Sfsmp			case 2: /* io_apic_entry */
80826108Sfsmp				if (((io_apic_entry_ptr)position)->apic_flags
80926108Sfsmp					& IOAPICENTRY_FLAG_EN)
81026108Sfsmp					io_apic_address[mp_napics++] =
81126108Sfsmp					    (vm_offset_t)((io_apic_entry_ptr)
81226108Sfsmp						position)->apic_address;
81326108Sfsmp				break;
81426108Sfsmp			case 3: /* int_entry */
81526108Sfsmp				++nintrs;
81626108Sfsmp				break;
81726108Sfsmp			case 4:	/* int_entry */
81826108Sfsmp				break;
81926108Sfsmp			default:
82026108Sfsmp				panic("mpfps Base Table HOSED!");
82126108Sfsmp				/* NOTREACHED */
82226108Sfsmp			}
82326108Sfsmp
82426108Sfsmp			totalSize -= basetable_entry_types[type].length;
82526108Sfsmp			(u_char*)position += basetable_entry_types[type].length;
82626108Sfsmp		}
82726108Sfsmp	}
82826108Sfsmp
82926108Sfsmp	/* qualify the numbers */
83066277Sps	if (mp_naps > MAXCPU) {
83126108Sfsmp		printf("Warning: only using %d of %d available CPUs!\n",
83266277Sps			MAXCPU, mp_naps);
83366277Sps		mp_naps = MAXCPU;
83428041Sfsmp	}
83526108Sfsmp
83626108Sfsmp	/*
83726108Sfsmp	 * Count the BSP.
83826108Sfsmp	 * This is also used as a counter while starting the APs.
83926108Sfsmp	 */
84026108Sfsmp	mp_ncpus = 1;
84126108Sfsmp
84226108Sfsmp	--mp_naps;	/* subtract the BSP */
84326108Sfsmp}
84426108Sfsmp
84526108Sfsmp
84626108Sfsmp/*
84726155Sfsmp * 2nd pass on motherboard's Intel MP specification table.
84826155Sfsmp *
84926155Sfsmp * sets:
85026155Sfsmp *	boot_cpu_id
85126155Sfsmp *	ID_TO_IO(N), phy APIC ID to log CPU/IO table
85226155Sfsmp *	CPU_TO_ID(N), logical CPU to APIC ID table
85326155Sfsmp *	IO_TO_ID(N), logical IO to APIC ID table
85426155Sfsmp *	bus_data[N]
85526155Sfsmp *	io_apic_ints[N]
85626108Sfsmp */
85726108Sfsmpstatic int
85826155Sfsmpmptable_pass2(void)
85926108Sfsmp{
86026108Sfsmp	int     x;
86126108Sfsmp	mpcth_t cth;
86226108Sfsmp	int     totalSize;
86326108Sfsmp	void*   position;
86426108Sfsmp	int     count;
86526108Sfsmp	int     type;
86626108Sfsmp	int     apic, bus, cpu, intr;
86766277Sps	int	i, j;
86866277Sps	int	pgeflag;
86926108Sfsmp
87027005Sfsmp	POSTCODE(MPTABLE_PASS2_POST);
87127005Sfsmp
87266277Sps	pgeflag = 0;		/* XXX - Not used under SMP yet.  */
87366277Sps
87466277Sps	MALLOC(io_apic_versions, u_int32_t *, sizeof(u_int32_t) * mp_napics,
87566277Sps	    M_DEVBUF, M_WAITOK);
87666277Sps	MALLOC(ioapic, volatile ioapic_t **, sizeof(ioapic_t *) * mp_napics,
87766277Sps	    M_DEVBUF, M_WAITOK);
87866277Sps	MALLOC(io_apic_ints, io_int *, sizeof(io_int) * nintrs,
87966277Sps	    M_DEVBUF, M_WAITOK);
88066277Sps	MALLOC(bus_data, bus_datum *, sizeof(bus_datum) * mp_nbusses,
88166277Sps	    M_DEVBUF, M_WAITOK);
88266277Sps
88366277Sps	bzero(ioapic, sizeof(ioapic_t *) * mp_napics);
88466277Sps
88566277Sps	for (i = 0; i < mp_napics; i++) {
88666277Sps		for (j = 0; j < mp_napics; j++) {
88766277Sps			/* same page frame as a previous IO apic? */
88866277Sps			if (((vm_offset_t)SMPpt[NPTEPG-2-j] & PG_FRAME) ==
88966277Sps			    (io_apic_address[i] & PG_FRAME)) {
89066277Sps				ioapic[i] = (ioapic_t *)((u_int)SMP_prvspace
89166277Sps					+ (NPTEPG-2-j) * PAGE_SIZE
89266277Sps					+ (io_apic_address[i] & PAGE_MASK));
89366277Sps				break;
89466277Sps			}
89566277Sps			/* use this slot if available */
89666277Sps			if (((vm_offset_t)SMPpt[NPTEPG-2-j] & PG_FRAME) == 0) {
89766277Sps				SMPpt[NPTEPG-2-j] = (pt_entry_t)(PG_V | PG_RW |
89866277Sps				    pgeflag | (io_apic_address[i] & PG_FRAME));
89966277Sps				ioapic[i] = (ioapic_t *)((u_int)SMP_prvspace
90066277Sps					+ (NPTEPG-2-j) * PAGE_SIZE
90166277Sps					+ (io_apic_address[i] & PAGE_MASK));
90266277Sps				break;
90366277Sps			}
90466277Sps		}
90566277Sps	}
90666277Sps
90726155Sfsmp	/* clear various tables */
90826155Sfsmp	for (x = 0; x < NAPICID; ++x) {
90926155Sfsmp		ID_TO_IO(x) = -1;	/* phy APIC ID to log CPU/IO table */
91026155Sfsmp		CPU_TO_ID(x) = -1;	/* logical CPU to APIC ID table */
91126155Sfsmp		IO_TO_ID(x) = -1;	/* logical IO to APIC ID table */
91226155Sfsmp	}
91326155Sfsmp
91425164Speter	/* clear bus data table */
91566277Sps	for (x = 0; x < mp_nbusses; ++x)
91625164Speter		bus_data[x].bus_id = 0xff;
91725164Speter
91825164Speter	/* clear IO APIC INT table */
91966277Sps	for (x = 0; x < nintrs; ++x) {
92025164Speter		io_apic_ints[x].int_type = 0xff;
92138888Stegge		io_apic_ints[x].int_vector = 0xff;
92238888Stegge	}
92325164Speter
92425164Speter	/* setup the cpu/apic mapping arrays */
92525164Speter	boot_cpu_id = -1;
92625164Speter
92725164Speter	/* record whether PIC or virtual-wire mode */
92826155Sfsmp	picmode = (mpfps->mpfb2 & 0x80) ? 1 : 0;
92925164Speter
93025164Speter	/* check for use of 'default' configuration */
93128027Sfsmp	if (MPFPS_MPFB1 != 0)
93228027Sfsmp		return MPFPS_MPFB1;	/* return default configuration type */
93325164Speter
93426155Sfsmp	if ((cth = mpfps->pap) == 0)
93526108Sfsmp		panic("MP Configuration Table Header MISSING!");
93625164Speter
93726108Sfsmp	/* walk the table, recording info of interest */
93825164Speter	totalSize = cth->base_table_length - sizeof(struct MPCTH);
93925164Speter	position = (u_char *) cth + sizeof(struct MPCTH);
94025164Speter	count = cth->entry_count;
94126108Sfsmp	apic = bus = intr = 0;
94226108Sfsmp	cpu = 1;				/* pre-count the BSP */
94325164Speter
94425164Speter	while (count--) {
94525164Speter		switch (type = *(u_char *) position) {
94625164Speter		case 0:
94726108Sfsmp			if (processor_entry(position, cpu))
94826108Sfsmp				++cpu;
94925164Speter			break;
95025164Speter		case 1:
95126108Sfsmp			if (bus_entry(position, bus))
95226108Sfsmp				++bus;
95325164Speter			break;
95425164Speter		case 2:
95526108Sfsmp			if (io_apic_entry(position, apic))
95626108Sfsmp				++apic;
95725164Speter			break;
95825164Speter		case 3:
95926108Sfsmp			if (int_entry(position, intr))
96026108Sfsmp				++intr;
96125164Speter			break;
96225164Speter		case 4:
96325164Speter			/* int_entry(position); */
96425164Speter			break;
96525164Speter		default:
96626108Sfsmp			panic("mpfps Base Table HOSED!");
96725164Speter			/* NOTREACHED */
96825164Speter		}
96925164Speter
97025164Speter		totalSize -= basetable_entry_types[type].length;
97125164Speter		(u_char *) position += basetable_entry_types[type].length;
97225164Speter	}
97325164Speter
97426101Sfsmp	if (boot_cpu_id == -1)
97526108Sfsmp		panic("NO BSP found!");
97625164Speter
97725164Speter	/* report fact that its NOT a default configuration */
97825164Speter	return 0;
97925164Speter}
98025164Speter
98125164Speter
98255420Steggevoid
98338888Steggeassign_apic_irq(int apic, int intpin, int irq)
98438888Stegge{
98538888Stegge	int x;
98638888Stegge
98738888Stegge	if (int_to_apicintpin[irq].ioapic != -1)
98838888Stegge		panic("assign_apic_irq: inconsistent table");
98938888Stegge
99038888Stegge	int_to_apicintpin[irq].ioapic = apic;
99138888Stegge	int_to_apicintpin[irq].int_pin = intpin;
99238888Stegge	int_to_apicintpin[irq].apic_address = ioapic[apic];
99338888Stegge	int_to_apicintpin[irq].redirindex = IOAPIC_REDTBL + 2 * intpin;
99438888Stegge
99538888Stegge	for (x = 0; x < nintrs; x++) {
99638888Stegge		if ((io_apic_ints[x].int_type == 0 ||
99738888Stegge		     io_apic_ints[x].int_type == 3) &&
99838888Stegge		    io_apic_ints[x].int_vector == 0xff &&
99938888Stegge		    io_apic_ints[x].dst_apic_id == IO_TO_ID(apic) &&
100038888Stegge		    io_apic_ints[x].dst_apic_int == intpin)
100138888Stegge			io_apic_ints[x].int_vector = irq;
100238888Stegge	}
100338888Stegge}
100438888Stegge
100555420Steggevoid
100655420Steggerevoke_apic_irq(int irq)
100755420Stegge{
100855420Stegge	int x;
100955420Stegge	int oldapic;
101055420Stegge	int oldintpin;
101155420Stegge
101255420Stegge	if (int_to_apicintpin[irq].ioapic == -1)
101355420Stegge		panic("assign_apic_irq: inconsistent table");
101455420Stegge
101555420Stegge	oldapic = int_to_apicintpin[irq].ioapic;
101655420Stegge	oldintpin = int_to_apicintpin[irq].int_pin;
101755420Stegge
101855420Stegge	int_to_apicintpin[irq].ioapic = -1;
101955420Stegge	int_to_apicintpin[irq].int_pin = 0;
102055420Stegge	int_to_apicintpin[irq].apic_address = NULL;
102155420Stegge	int_to_apicintpin[irq].redirindex = 0;
102255420Stegge
102355420Stegge	for (x = 0; x < nintrs; x++) {
102455420Stegge		if ((io_apic_ints[x].int_type == 0 ||
102555420Stegge		     io_apic_ints[x].int_type == 3) &&
102655420Stegge		    io_apic_ints[x].int_vector == 0xff &&
102755420Stegge		    io_apic_ints[x].dst_apic_id == IO_TO_ID(oldapic) &&
102855420Stegge		    io_apic_ints[x].dst_apic_int == oldintpin)
102955420Stegge			io_apic_ints[x].int_vector = 0xff;
103055420Stegge	}
103155420Stegge}
103255420Stegge
103364290Stegge
103464290Stegge
103564290Steggestatic void
103664290Steggeswap_apic_id(int apic, int oldid, int newid)
103764290Stegge{
103864290Stegge	int x;
103964290Stegge	int oapic;
104064290Stegge
104164290Stegge
104264290Stegge	if (oldid == newid)
104364290Stegge		return;			/* Nothing to do */
104464290Stegge
104564290Stegge	printf("Changing APIC ID for IO APIC #%d from %d to %d in MP table\n",
104664290Stegge	       apic, oldid, newid);
104764290Stegge
104864290Stegge	/* Swap physical APIC IDs in interrupt entries */
104964290Stegge	for (x = 0; x < nintrs; x++) {
105064290Stegge		if (io_apic_ints[x].dst_apic_id == oldid)
105164290Stegge			io_apic_ints[x].dst_apic_id = newid;
105264290Stegge		else if (io_apic_ints[x].dst_apic_id == newid)
105364290Stegge			io_apic_ints[x].dst_apic_id = oldid;
105464290Stegge	}
105564290Stegge
105664290Stegge	/* Swap physical APIC IDs in IO_TO_ID mappings */
105764290Stegge	for (oapic = 0; oapic < mp_napics; oapic++)
105864290Stegge		if (IO_TO_ID(oapic) == newid)
105964290Stegge			break;
106064290Stegge
106164290Stegge	if (oapic < mp_napics) {
106264290Stegge		printf("Changing APIC ID for IO APIC #%d from "
106364290Stegge		       "%d to %d in MP table\n",
106464290Stegge		       oapic, newid, oldid);
106564290Stegge		IO_TO_ID(oapic) = oldid;
106664290Stegge	}
106764290Stegge	IO_TO_ID(apic) = newid;
106864290Stegge}
106964290Stegge
107064290Stegge
107164290Steggestatic void
107264290Steggefix_id_to_io_mapping(void)
107364290Stegge{
107464290Stegge	int x;
107564290Stegge
107664290Stegge	for (x = 0; x < NAPICID; x++)
107764290Stegge		ID_TO_IO(x) = -1;
107864290Stegge
107964290Stegge	for (x = 0; x <= mp_naps; x++)
108064290Stegge		if (CPU_TO_ID(x) < NAPICID)
108164290Stegge			ID_TO_IO(CPU_TO_ID(x)) = x;
108264290Stegge
108364290Stegge	for (x = 0; x < mp_napics; x++)
108464290Stegge		if (IO_TO_ID(x) < NAPICID)
108564290Stegge			ID_TO_IO(IO_TO_ID(x)) = x;
108664290Stegge}
108764290Stegge
108864290Stegge
108964290Steggestatic int
109064290Steggefirst_free_apic_id(void)
109164290Stegge{
109264290Stegge	int freeid, x;
109364290Stegge
109464290Stegge	for (freeid = 0; freeid < NAPICID; freeid++) {
109564290Stegge		for (x = 0; x <= mp_naps; x++)
109664290Stegge			if (CPU_TO_ID(x) == freeid)
109764290Stegge				break;
109864290Stegge		if (x <= mp_naps)
109964290Stegge			continue;
110064290Stegge		for (x = 0; x < mp_napics; x++)
110164290Stegge			if (IO_TO_ID(x) == freeid)
110264290Stegge				break;
110364290Stegge		if (x < mp_napics)
110464290Stegge			continue;
110564290Stegge		return freeid;
110664290Stegge	}
110764290Stegge	return freeid;
110864290Stegge}
110964290Stegge
111064290Stegge
111164290Steggestatic int
111264290Steggeio_apic_id_acceptable(int apic, int id)
111364290Stegge{
111464290Stegge	int cpu;		/* Logical CPU number */
111564290Stegge	int oapic;		/* Logical IO APIC number for other IO APIC */
111664290Stegge
111764290Stegge	if (id >= NAPICID)
111864290Stegge		return 0;	/* Out of range */
111964290Stegge
112064290Stegge	for (cpu = 0; cpu <= mp_naps; cpu++)
112164290Stegge		if (CPU_TO_ID(cpu) == id)
112264290Stegge			return 0;	/* Conflict with CPU */
112364290Stegge
112464290Stegge	for (oapic = 0; oapic < mp_napics && oapic < apic; oapic++)
112564290Stegge		if (IO_TO_ID(oapic) == id)
112664290Stegge			return 0;	/* Conflict with other APIC */
112764290Stegge
112864290Stegge	return 1;		/* ID is acceptable for IO APIC */
112964290Stegge}
113064290Stegge
113164290Stegge
113225164Speter/*
113325164Speter * parse an Intel MP specification table
113425164Speter */
113525164Speterstatic void
113625164Speterfix_mp_table(void)
113725164Speter{
113825292Sfsmp	int	x;
113925292Sfsmp	int	id;
114042543Seivind	int	bus_0 = 0;	/* Stop GCC warning */
114142543Seivind	int	bus_pci = 0;	/* Stop GCC warning */
114225292Sfsmp	int	num_pci_bus;
114364290Stegge	int	apic;		/* IO APIC unit number */
114464290Stegge	int     freeid;		/* Free physical APIC ID */
114564290Stegge	int	physid;		/* Current physical IO APIC ID */
114625164Speter
114725164Speter	/*
114825164Speter	 * Fix mis-numbering of the PCI bus and its INT entries if the BIOS
114925164Speter	 * did it wrong.  The MP spec says that when more than 1 PCI bus
115025164Speter	 * exists the BIOS must begin with bus entries for the PCI bus and use
115125164Speter	 * actual PCI bus numbering.  This implies that when only 1 PCI bus
115225164Speter	 * exists the BIOS can choose to ignore this ordering, and indeed many
115325164Speter	 * MP motherboards do ignore it.  This causes a problem when the PCI
115425164Speter	 * sub-system makes requests of the MP sub-system based on PCI bus
115525164Speter	 * numbers.	So here we look for the situation and renumber the
115625164Speter	 * busses and associated INTs in an effort to "make it right".
115725164Speter	 */
115825164Speter
115925292Sfsmp	/* find bus 0, PCI bus, count the number of PCI busses */
116025164Speter	for (num_pci_bus = 0, x = 0; x < mp_nbusses; ++x) {
116125292Sfsmp		if (bus_data[x].bus_id == 0) {
116225292Sfsmp			bus_0 = x;
116325292Sfsmp		}
116425292Sfsmp		if (bus_data[x].bus_type == PCI) {
116525164Speter			++num_pci_bus;
116625292Sfsmp			bus_pci = x;
116725292Sfsmp		}
116825164Speter	}
116925292Sfsmp	/*
117025292Sfsmp	 * bus_0 == slot of bus with ID of 0
117125292Sfsmp	 * bus_pci == slot of last PCI bus encountered
117225292Sfsmp	 */
117325164Speter
117425164Speter	/* check the 1 PCI bus case for sanity */
117564494Stegge	/* if it is number 0 all is well */
117664494Stegge	if (num_pci_bus == 1 &&
117764494Stegge	    bus_data[bus_pci].bus_id != 0) {
117864494Stegge
117925164Speter		/* mis-numbered, swap with whichever bus uses slot 0 */
118025164Speter
118125292Sfsmp		/* swap the bus entry types */
118225292Sfsmp		bus_data[bus_pci].bus_type = bus_data[bus_0].bus_type;
118325292Sfsmp		bus_data[bus_0].bus_type = PCI;
118425164Speter
118525164Speter		/* swap each relavant INTerrupt entry */
118625292Sfsmp		id = bus_data[bus_pci].bus_id;
118725292Sfsmp		for (x = 0; x < nintrs; ++x) {
118825292Sfsmp			if (io_apic_ints[x].src_bus_id == id) {
118925292Sfsmp				io_apic_ints[x].src_bus_id = 0;
119025292Sfsmp			}
119125292Sfsmp			else if (io_apic_ints[x].src_bus_id == 0) {
119225292Sfsmp				io_apic_ints[x].src_bus_id = id;
119325292Sfsmp			}
119425164Speter		}
119525164Speter	}
119664290Stegge
119764290Stegge	/* Assign IO APIC IDs.
119864290Stegge	 *
119964290Stegge	 * First try the existing ID. If a conflict is detected, try
120064290Stegge	 * the ID in the MP table.  If a conflict is still detected, find
120164290Stegge	 * a free id.
120264290Stegge	 *
120364290Stegge	 * We cannot use the ID_TO_IO table before all conflicts has been
120464290Stegge	 * resolved and the table has been corrected.
120564290Stegge	 */
120664290Stegge	for (apic = 0; apic < mp_napics; ++apic) { /* For all IO APICs */
120764290Stegge
120864290Stegge		/* First try to use the value set by the BIOS */
120964290Stegge		physid = io_apic_get_id(apic);
121064290Stegge		if (io_apic_id_acceptable(apic, physid)) {
121164290Stegge			if (IO_TO_ID(apic) != physid)
121264290Stegge				swap_apic_id(apic, IO_TO_ID(apic), physid);
121364290Stegge			continue;
121464290Stegge		}
121564290Stegge
121664290Stegge		/* Then check if the value in the MP table is acceptable */
121764290Stegge		if (io_apic_id_acceptable(apic, IO_TO_ID(apic)))
121864290Stegge			continue;
121964290Stegge
122064290Stegge		/* Last resort, find a free APIC ID and use it */
122164290Stegge		freeid = first_free_apic_id();
122264290Stegge		if (freeid >= NAPICID)
122364290Stegge			panic("No free physical APIC IDs found");
122464290Stegge
122564290Stegge		if (io_apic_id_acceptable(apic, freeid)) {
122664290Stegge			swap_apic_id(apic, IO_TO_ID(apic), freeid);
122764290Stegge			continue;
122864290Stegge		}
122964290Stegge		panic("Free physical APIC ID not usable");
123064290Stegge	}
123164290Stegge	fix_id_to_io_mapping();
123225164Speter}
123325164Speter
123425164Speter
123555420Stegge/* Assign low level interrupt handlers */
123638888Steggestatic void
123738888Steggesetup_apic_irq_mapping(void)
123838888Stegge{
123938888Stegge	int	x;
124038888Stegge	int	int_vector;
124138888Stegge
124255420Stegge	/* Clear array */
124338888Stegge	for (x = 0; x < APIC_INTMAPSIZE; x++) {
124438888Stegge		int_to_apicintpin[x].ioapic = -1;
124538888Stegge		int_to_apicintpin[x].int_pin = 0;
124638888Stegge		int_to_apicintpin[x].apic_address = NULL;
124738888Stegge		int_to_apicintpin[x].redirindex = 0;
124838888Stegge	}
124955420Stegge
125055420Stegge	/* First assign ISA/EISA interrupts */
125138888Stegge	for (x = 0; x < nintrs; x++) {
125255420Stegge		int_vector = io_apic_ints[x].src_bus_irq;
125355420Stegge		if (int_vector < APIC_INTMAPSIZE &&
125455420Stegge		    io_apic_ints[x].int_vector == 0xff &&
125555420Stegge		    int_to_apicintpin[int_vector].ioapic == -1 &&
125655420Stegge		    (apic_int_is_bus_type(x, ISA) ||
125755420Stegge		     apic_int_is_bus_type(x, EISA)) &&
125855420Stegge		    io_apic_ints[x].int_type == 0) {
125955420Stegge			assign_apic_irq(ID_TO_IO(io_apic_ints[x].dst_apic_id),
126055420Stegge					io_apic_ints[x].dst_apic_int,
126155420Stegge					int_vector);
126255420Stegge		}
126355420Stegge	}
126455420Stegge
126555420Stegge	/* Assign interrupts on first 24 intpins on IOAPIC #0 */
126655420Stegge	for (x = 0; x < nintrs; x++) {
126755420Stegge		int_vector = io_apic_ints[x].dst_apic_int;
126855420Stegge		if (int_vector < APIC_INTMAPSIZE &&
126938888Stegge		    io_apic_ints[x].dst_apic_id == IO_TO_ID(0) &&
127038888Stegge		    io_apic_ints[x].int_vector == 0xff &&
127155420Stegge		    int_to_apicintpin[int_vector].ioapic == -1 &&
127238888Stegge		    (io_apic_ints[x].int_type == 0 ||
127338888Stegge		     io_apic_ints[x].int_type == 3)) {
127455420Stegge			assign_apic_irq(0,
127538888Stegge					io_apic_ints[x].dst_apic_int,
127655420Stegge					int_vector);
127738888Stegge		}
127838888Stegge	}
127955420Stegge	/*
128055420Stegge	 * Assign interrupts for remaining intpins.
128155420Stegge	 * Skip IOAPIC #0 intpin 0 if the type is ExtInt, since this indicates
128255420Stegge	 * that an entry for ISA/EISA irq 0 exist, and a fallback to mixed mode
128355420Stegge	 * due to 8254 interrupts not being delivered can reuse that low level
128455420Stegge	 * interrupt handler.
128555420Stegge	 */
128638888Stegge	int_vector = 0;
128738888Stegge	while (int_vector < APIC_INTMAPSIZE &&
128838888Stegge	       int_to_apicintpin[int_vector].ioapic != -1)
128938888Stegge		int_vector++;
129038888Stegge	for (x = 0; x < nintrs && int_vector < APIC_INTMAPSIZE; x++) {
129138888Stegge		if ((io_apic_ints[x].int_type == 0 ||
129255420Stegge		     (io_apic_ints[x].int_type == 3 &&
129355420Stegge		      (io_apic_ints[x].dst_apic_id != IO_TO_ID(0) ||
129455420Stegge		       io_apic_ints[x].dst_apic_int != 0))) &&
129538888Stegge		    io_apic_ints[x].int_vector == 0xff) {
129638888Stegge			assign_apic_irq(ID_TO_IO(io_apic_ints[x].dst_apic_id),
129738888Stegge					io_apic_ints[x].dst_apic_int,
129838888Stegge					int_vector);
129938888Stegge			int_vector++;
130038888Stegge			while (int_vector < APIC_INTMAPSIZE &&
130138888Stegge			       int_to_apicintpin[int_vector].ioapic != -1)
130238888Stegge				int_vector++;
130338888Stegge		}
130438888Stegge	}
130538888Stegge}
130638888Stegge
130738888Stegge
130826108Sfsmpstatic int
130926108Sfsmpprocessor_entry(proc_entry_ptr entry, int cpu)
131025164Speter{
131125164Speter	/* check for usability */
131255540Sluoqi	if (!(entry->cpu_flags & PROCENTRY_FLAG_EN))
131326108Sfsmp		return 0;
131425164Speter
131564290Stegge	if(entry->apic_id >= NAPICID)
131664290Stegge		panic("CPU APIC ID out of range (0..%d)", NAPICID - 1);
131725164Speter	/* check for BSP flag */
131825164Speter	if (entry->cpu_flags & PROCENTRY_FLAG_BP) {
131925164Speter		boot_cpu_id = entry->apic_id;
132026108Sfsmp		CPU_TO_ID(0) = entry->apic_id;
132126108Sfsmp		ID_TO_CPU(entry->apic_id) = 0;
132226108Sfsmp		return 0;	/* its already been counted */
132325164Speter	}
132425164Speter
132526108Sfsmp	/* add another AP to list, if less than max number of CPUs */
132666277Sps	else if (cpu < MAXCPU) {
132726108Sfsmp		CPU_TO_ID(cpu) = entry->apic_id;
132826108Sfsmp		ID_TO_CPU(entry->apic_id) = cpu;
132926108Sfsmp		return 1;
133026108Sfsmp	}
133155540Sluoqi
133255540Sluoqi	return 0;
133325164Speter}
133425164Speter
133525164Speter
133626108Sfsmpstatic int
133726108Sfsmpbus_entry(bus_entry_ptr entry, int bus)
133825164Speter{
133926108Sfsmp	int     x;
134026108Sfsmp	char    c, name[8];
134125164Speter
134225164Speter	/* encode the name into an index */
134326108Sfsmp	for (x = 0; x < 6; ++x) {
134426108Sfsmp		if ((c = entry->bus_type[x]) == ' ')
134525164Speter			break;
134626108Sfsmp		name[x] = c;
134725164Speter	}
134826108Sfsmp	name[x] = '\0';
134925164Speter
135026108Sfsmp	if ((x = lookup_bus_type(name)) == UNKNOWN_BUSTYPE)
135126108Sfsmp		panic("unknown bus type: '%s'", name);
135225164Speter
135326108Sfsmp	bus_data[bus].bus_id = entry->bus_id;
135426108Sfsmp	bus_data[bus].bus_type = x;
135526108Sfsmp
135626108Sfsmp	return 1;
135725164Speter}
135825164Speter
135925164Speter
136026108Sfsmpstatic int
136126108Sfsmpio_apic_entry(io_apic_entry_ptr entry, int apic)
136225164Speter{
136325164Speter	if (!(entry->apic_flags & IOAPICENTRY_FLAG_EN))
136426108Sfsmp		return 0;
136525164Speter
136626108Sfsmp	IO_TO_ID(apic) = entry->apic_id;
136764290Stegge	if (entry->apic_id < NAPICID)
136864290Stegge		ID_TO_IO(entry->apic_id) = apic;
136925164Speter
137026108Sfsmp	return 1;
137125164Speter}
137225164Speter
137325164Speter
137425164Speterstatic int
137525164Speterlookup_bus_type(char *name)
137625164Speter{
137725164Speter	int     x;
137825164Speter
137925164Speter	for (x = 0; x < MAX_BUSTYPE; ++x)
138025164Speter		if (strcmp(bus_type_table[x].name, name) == 0)
138125164Speter			return bus_type_table[x].type;
138225164Speter
138325164Speter	return UNKNOWN_BUSTYPE;
138425164Speter}
138525164Speter
138625164Speter
138726108Sfsmpstatic int
138826108Sfsmpint_entry(int_entry_ptr entry, int intr)
138925164Speter{
139041367Stegge	int apic;
139141367Stegge
139226108Sfsmp	io_apic_ints[intr].int_type = entry->int_type;
139326108Sfsmp	io_apic_ints[intr].int_flags = entry->int_flags;
139426108Sfsmp	io_apic_ints[intr].src_bus_id = entry->src_bus_id;
139526108Sfsmp	io_apic_ints[intr].src_bus_irq = entry->src_bus_irq;
139641367Stegge	if (entry->dst_apic_id == 255) {
139741367Stegge		/* This signal goes to all IO APICS.  Select an IO APIC
139841367Stegge		   with sufficient number of interrupt pins */
139941367Stegge		for (apic = 0; apic < mp_napics; apic++)
140041367Stegge			if (((io_apic_read(apic, IOAPIC_VER) &
140141367Stegge			      IOART_VER_MAXREDIR) >> MAXREDIRSHIFT) >=
140241367Stegge			    entry->dst_apic_int)
140341367Stegge				break;
140441367Stegge		if (apic < mp_napics)
140541367Stegge			io_apic_ints[intr].dst_apic_id = IO_TO_ID(apic);
140641367Stegge		else
140741367Stegge			io_apic_ints[intr].dst_apic_id = entry->dst_apic_id;
140841367Stegge	} else
140941367Stegge		io_apic_ints[intr].dst_apic_id = entry->dst_apic_id;
141026108Sfsmp	io_apic_ints[intr].dst_apic_int = entry->dst_apic_int;
141125164Speter
141226108Sfsmp	return 1;
141325164Speter}
141425164Speter
141525164Speter
141625164Speterstatic int
141725164Speterapic_int_is_bus_type(int intr, int bus_type)
141825164Speter{
141925164Speter	int     bus;
142025164Speter
142125164Speter	for (bus = 0; bus < mp_nbusses; ++bus)
142225164Speter		if ((bus_data[bus].bus_id == io_apic_ints[intr].src_bus_id)
142325164Speter		    && ((int) bus_data[bus].bus_type == bus_type))
142425164Speter			return 1;
142525164Speter
142625164Speter	return 0;
142725164Speter}
142825164Speter
142925164Speter
143025164Speter/*
143126950Sfsmp * Given a traditional ISA INT mask, return an APIC mask.
143225164Speter */
143325499Sfsmpu_int
143426950Sfsmpisa_apic_mask(u_int isa_mask)
143525419Sfsmp{
143626950Sfsmp	int isa_irq;
143726950Sfsmp	int apic_pin;
143825419Sfsmp
143927255Sfsmp#if defined(SKIP_IRQ15_REDIRECT)
144027255Sfsmp	if (isa_mask == (1 << 15)) {
144127255Sfsmp		printf("skipping ISA IRQ15 redirect\n");
144227255Sfsmp		return isa_mask;
144327255Sfsmp	}
144427255Sfsmp#endif  /* SKIP_IRQ15_REDIRECT */
144527255Sfsmp
144626950Sfsmp	isa_irq = ffs(isa_mask);		/* find its bit position */
144726950Sfsmp	if (isa_irq == 0)			/* doesn't exist */
144825419Sfsmp		return 0;
144926950Sfsmp	--isa_irq;				/* make it zero based */
145025419Sfsmp
145138888Stegge	apic_pin = isa_apic_irq(isa_irq);	/* look for APIC connection */
145226950Sfsmp	if (apic_pin == -1)
145326950Sfsmp		return 0;
145425419Sfsmp
145526950Sfsmp	return (1 << apic_pin);			/* convert pin# to a mask */
145625419Sfsmp}
145725419Sfsmp
145825419Sfsmp
145925419Sfsmp/*
146026950Sfsmp * Determine which APIC pin an ISA/EISA INT is attached to.
146125164Speter */
146226950Sfsmp#define INTTYPE(I)	(io_apic_ints[(I)].int_type)
146326950Sfsmp#define INTPIN(I)	(io_apic_ints[(I)].dst_apic_int)
146438888Stegge#define INTIRQ(I)	(io_apic_ints[(I)].int_vector)
146538888Stegge#define INTAPIC(I)	(ID_TO_IO(io_apic_ints[(I)].dst_apic_id))
146626950Sfsmp
146725164Speter#define SRCBUSIRQ(I)	(io_apic_ints[(I)].src_bus_irq)
146825164Speterint
146938888Steggeisa_apic_irq(int isa_irq)
147025164Speter{
147125164Speter	int     intr;
147225164Speter
147326950Sfsmp	for (intr = 0; intr < nintrs; ++intr) {		/* check each record */
147426950Sfsmp		if (INTTYPE(intr) == 0) {		/* standard INT */
147526950Sfsmp			if (SRCBUSIRQ(intr) == isa_irq) {
147626950Sfsmp				if (apic_int_is_bus_type(intr, ISA) ||
147726950Sfsmp			            apic_int_is_bus_type(intr, EISA))
147838888Stegge					return INTIRQ(intr);	/* found */
147926950Sfsmp			}
148026950Sfsmp		}
148126950Sfsmp	}
148226950Sfsmp	return -1;					/* NOT found */
148325164Speter}
148425164Speter
148525164Speter
148625164Speter/*
148726950Sfsmp * Determine which APIC pin a PCI INT is attached to.
148825164Speter */
148925164Speter#define SRCBUSID(I)	(io_apic_ints[(I)].src_bus_id)
149025164Speter#define SRCBUSDEVICE(I)	((io_apic_ints[(I)].src_bus_irq >> 2) & 0x1f)
149125164Speter#define SRCBUSLINE(I)	(io_apic_ints[(I)].src_bus_irq & 0x03)
149225164Speterint
149338888Steggepci_apic_irq(int pciBus, int pciDevice, int pciInt)
149425164Speter{
149525164Speter	int     intr;
149625164Speter
149726950Sfsmp	--pciInt;					/* zero based */
149825164Speter
149926950Sfsmp	for (intr = 0; intr < nintrs; ++intr)		/* check each record */
150026950Sfsmp		if ((INTTYPE(intr) == 0)		/* standard INT */
150125164Speter		    && (SRCBUSID(intr) == pciBus)
150225164Speter		    && (SRCBUSDEVICE(intr) == pciDevice)
150325164Speter		    && (SRCBUSLINE(intr) == pciInt))	/* a candidate IRQ */
150426950Sfsmp			if (apic_int_is_bus_type(intr, PCI))
150538888Stegge				return INTIRQ(intr);	/* exact match */
150625164Speter
150726950Sfsmp	return -1;					/* NOT found */
150825164Speter}
150934990Stegge
151034990Steggeint
151138888Steggenext_apic_irq(int irq)
151234990Stegge{
151334990Stegge	int intr, ointr;
151434990Stegge	int bus, bustype;
151534990Stegge
151634990Stegge	bus = 0;
151734990Stegge	bustype = 0;
151834990Stegge	for (intr = 0; intr < nintrs; intr++) {
151938888Stegge		if (INTIRQ(intr) != irq || INTTYPE(intr) != 0)
152034990Stegge			continue;
152134990Stegge		bus = SRCBUSID(intr);
152234990Stegge		bustype = apic_bus_type(bus);
152334990Stegge		if (bustype != ISA &&
152434990Stegge		    bustype != EISA &&
152534990Stegge		    bustype != PCI)
152634990Stegge			continue;
152734990Stegge		break;
152834990Stegge	}
152934990Stegge	if (intr >= nintrs) {
153034990Stegge		return -1;
153134990Stegge	}
153234990Stegge	for (ointr = intr + 1; ointr < nintrs; ointr++) {
153334990Stegge		if (INTTYPE(ointr) != 0)
153434990Stegge			continue;
153534990Stegge		if (bus != SRCBUSID(ointr))
153634990Stegge			continue;
153734990Stegge		if (bustype == PCI) {
153834990Stegge			if (SRCBUSDEVICE(intr) != SRCBUSDEVICE(ointr))
153934990Stegge				continue;
154034990Stegge			if (SRCBUSLINE(intr) != SRCBUSLINE(ointr))
154134990Stegge				continue;
154234990Stegge		}
154334990Stegge		if (bustype == ISA || bustype == EISA) {
154434990Stegge			if (SRCBUSIRQ(intr) != SRCBUSIRQ(ointr))
154534990Stegge				continue;
154634990Stegge		}
154734990Stegge		if (INTPIN(intr) == INTPIN(ointr))
154834990Stegge			continue;
154934990Stegge		break;
155034990Stegge	}
155134990Stegge	if (ointr >= nintrs) {
155234990Stegge		return -1;
155334990Stegge	}
155438888Stegge	return INTIRQ(ointr);
155534990Stegge}
155625164Speter#undef SRCBUSLINE
155725164Speter#undef SRCBUSDEVICE
155825164Speter#undef SRCBUSID
155934990Stegge#undef SRCBUSIRQ
156025164Speter
156125164Speter#undef INTPIN
156238888Stegge#undef INTIRQ
156338888Stegge#undef INTAPIC
156425164Speter#undef INTTYPE
156525164Speter
156625164Speter
156725499Sfsmp/*
156826950Sfsmp * Reprogram the MB chipset to NOT redirect an ISA INTerrupt.
156926950Sfsmp *
157026950Sfsmp * XXX FIXME:
157126950Sfsmp *  Exactly what this means is unclear at this point.  It is a solution
157226950Sfsmp *  for motherboards that redirect the MBIRQ0 pin.  Generically a motherboard
157326950Sfsmp *  could route any of the ISA INTs to upper (>15) IRQ values.  But most would
157426950Sfsmp *  NOT be redirected via MBIRQ0, thus "undirect()ing" them would NOT be an
157526950Sfsmp *  option.
157625499Sfsmp */
157725164Speterint
157826950Sfsmpundirect_isa_irq(int rirq)
157925164Speter{
158025164Speter#if defined(READY)
158142880Sjkh	if (bootverbose)
158242880Sjkh	    printf("Freeing redirected ISA irq %d.\n", rirq);
158325164Speter	/** FIXME: tickle the MB redirector chip */
158425164Speter	return ???;
158525164Speter#else
158642880Sjkh	if (bootverbose)
158742880Sjkh	    printf("Freeing (NOT implemented) redirected ISA irq %d.\n", rirq);
158825164Speter	return 0;
158925499Sfsmp#endif  /* READY */
159025164Speter}
159125164Speter
159225164Speter
159325164Speter/*
159426950Sfsmp * Reprogram the MB chipset to NOT redirect a PCI INTerrupt
159525499Sfsmp */
159625499Sfsmpint
159726950Sfsmpundirect_pci_irq(int rirq)
159825499Sfsmp{
159925499Sfsmp#if defined(READY)
160026950Sfsmp	if (bootverbose)
160126950Sfsmp		printf("Freeing redirected PCI irq %d.\n", rirq);
160226950Sfsmp
160325499Sfsmp	/** FIXME: tickle the MB redirector chip */
160425499Sfsmp	return ???;
160525499Sfsmp#else
160626950Sfsmp	if (bootverbose)
160726950Sfsmp		printf("Freeing (NOT implemented) redirected PCI irq %d.\n",
160826950Sfsmp		       rirq);
160925499Sfsmp	return 0;
161025499Sfsmp#endif  /* READY */
161125499Sfsmp}
161225499Sfsmp
161325499Sfsmp
161425499Sfsmp/*
161525164Speter * given a bus ID, return:
161625164Speter *  the bus type if found
161725164Speter *  -1 if NOT found
161825164Speter */
161925164Speterint
162025164Speterapic_bus_type(int id)
162125164Speter{
162225164Speter	int     x;
162325164Speter
162425164Speter	for (x = 0; x < mp_nbusses; ++x)
162525164Speter		if (bus_data[x].bus_id == id)
162625164Speter			return bus_data[x].bus_type;
162725164Speter
162825164Speter	return -1;
162925164Speter}
163025164Speter
163125164Speter
163225164Speter/*
163325164Speter * given a LOGICAL APIC# and pin#, return:
163425164Speter *  the associated src bus ID if found
163525164Speter *  -1 if NOT found
163625164Speter */
163725164Speterint
163825164Speterapic_src_bus_id(int apic, int pin)
163925164Speter{
164025164Speter	int     x;
164125164Speter
164225164Speter	/* search each of the possible INTerrupt sources */
164325164Speter	for (x = 0; x < nintrs; ++x)
164425164Speter		if ((apic == ID_TO_IO(io_apic_ints[x].dst_apic_id)) &&
164525164Speter		    (pin == io_apic_ints[x].dst_apic_int))
164625164Speter			return (io_apic_ints[x].src_bus_id);
164725164Speter
164825164Speter	return -1;		/* NOT found */
164925164Speter}
165025164Speter
165125164Speter
165225164Speter/*
165325164Speter * given a LOGICAL APIC# and pin#, return:
165425164Speter *  the associated src bus IRQ if found
165525164Speter *  -1 if NOT found
165625164Speter */
165725164Speterint
165825164Speterapic_src_bus_irq(int apic, int pin)
165925164Speter{
166025164Speter	int     x;
166125164Speter
166225164Speter	for (x = 0; x < nintrs; x++)
166325164Speter		if ((apic == ID_TO_IO(io_apic_ints[x].dst_apic_id)) &&
166425164Speter		    (pin == io_apic_ints[x].dst_apic_int))
166525164Speter			return (io_apic_ints[x].src_bus_irq);
166625164Speter
166725164Speter	return -1;		/* NOT found */
166825164Speter}
166925164Speter
167025164Speter
167125164Speter/*
167225164Speter * given a LOGICAL APIC# and pin#, return:
167325164Speter *  the associated INTerrupt type if found
167425164Speter *  -1 if NOT found
167525164Speter */
167625164Speterint
167725164Speterapic_int_type(int apic, int pin)
167825164Speter{
167925164Speter	int     x;
168025164Speter
168125164Speter	/* search each of the possible INTerrupt sources */
168225164Speter	for (x = 0; x < nintrs; ++x)
168325164Speter		if ((apic == ID_TO_IO(io_apic_ints[x].dst_apic_id)) &&
168425164Speter		    (pin == io_apic_ints[x].dst_apic_int))
168525164Speter			return (io_apic_ints[x].int_type);
168625164Speter
168725164Speter	return -1;		/* NOT found */
168825164Speter}
168925164Speter
169038888Steggeint
169138888Steggeapic_irq(int apic, int pin)
169238888Stegge{
169338888Stegge	int x;
169438888Stegge	int res;
169525164Speter
169638888Stegge	for (x = 0; x < nintrs; ++x)
169738888Stegge		if ((apic == ID_TO_IO(io_apic_ints[x].dst_apic_id)) &&
169838888Stegge		    (pin == io_apic_ints[x].dst_apic_int)) {
169938888Stegge			res = io_apic_ints[x].int_vector;
170038888Stegge			if (res == 0xff)
170138888Stegge				return -1;
170238888Stegge			if (apic != int_to_apicintpin[res].ioapic)
170338888Stegge				panic("apic_irq: inconsistent table");
170438888Stegge			if (pin != int_to_apicintpin[res].int_pin)
170538888Stegge				panic("apic_irq inconsistent table (2)");
170638888Stegge			return res;
170738888Stegge		}
170838888Stegge	return -1;
170938888Stegge}
171038888Stegge
171138888Stegge
171225164Speter/*
171325164Speter * given a LOGICAL APIC# and pin#, return:
171425164Speter *  the associated trigger mode if found
171525164Speter *  -1 if NOT found
171625164Speter */
171725164Speterint
171825164Speterapic_trigger(int apic, int pin)
171925164Speter{
172025164Speter	int     x;
172125164Speter
172225164Speter	/* search each of the possible INTerrupt sources */
172325164Speter	for (x = 0; x < nintrs; ++x)
172425164Speter		if ((apic == ID_TO_IO(io_apic_ints[x].dst_apic_id)) &&
172525164Speter		    (pin == io_apic_ints[x].dst_apic_int))
172625164Speter			return ((io_apic_ints[x].int_flags >> 2) & 0x03);
172725164Speter
172825164Speter	return -1;		/* NOT found */
172925164Speter}
173025164Speter
173125164Speter
173225164Speter/*
173325164Speter * given a LOGICAL APIC# and pin#, return:
173425164Speter *  the associated 'active' level if found
173525164Speter *  -1 if NOT found
173625164Speter */
173725164Speterint
173825164Speterapic_polarity(int apic, int pin)
173925164Speter{
174025164Speter	int     x;
174125164Speter
174225164Speter	/* search each of the possible INTerrupt sources */
174325164Speter	for (x = 0; x < nintrs; ++x)
174425164Speter		if ((apic == ID_TO_IO(io_apic_ints[x].dst_apic_id)) &&
174525164Speter		    (pin == io_apic_ints[x].dst_apic_int))
174625164Speter			return (io_apic_ints[x].int_flags & 0x03);
174725164Speter
174825164Speter	return -1;		/* NOT found */
174925164Speter}
175025164Speter
175125164Speter
175225164Speter/*
175325164Speter * set data according to MP defaults
175425164Speter * FIXME: probably not complete yet...
175525164Speter */
175625164Speterstatic void
175725164Speterdefault_mp_table(int type)
175825164Speter{
175925164Speter	int     ap_cpu_id;
176025164Speter#if defined(APIC_IO)
176125164Speter	int     io_apic_id;
176225164Speter	int     pin;
176325164Speter#endif	/* APIC_IO */
176425164Speter
176525164Speter#if 0
176625164Speter	printf("  MP default config type: %d\n", type);
176725164Speter	switch (type) {
176825164Speter	case 1:
176925164Speter		printf("   bus: ISA, APIC: 82489DX\n");
177025164Speter		break;
177125164Speter	case 2:
177225164Speter		printf("   bus: EISA, APIC: 82489DX\n");
177325164Speter		break;
177425164Speter	case 3:
177525164Speter		printf("   bus: EISA, APIC: 82489DX\n");
177625164Speter		break;
177725164Speter	case 4:
177825164Speter		printf("   bus: MCA, APIC: 82489DX\n");
177925164Speter		break;
178025164Speter	case 5:
178125164Speter		printf("   bus: ISA+PCI, APIC: Integrated\n");
178225164Speter		break;
178325164Speter	case 6:
178425164Speter		printf("   bus: EISA+PCI, APIC: Integrated\n");
178525164Speter		break;
178625164Speter	case 7:
178725164Speter		printf("   bus: MCA+PCI, APIC: Integrated\n");
178825164Speter		break;
178925164Speter	default:
179025164Speter		printf("   future type\n");
179125164Speter		break;
179225164Speter		/* NOTREACHED */
179325164Speter	}
179425164Speter#endif	/* 0 */
179525164Speter
179626812Speter	boot_cpu_id = (lapic.id & APIC_ID_MASK) >> 24;
179725164Speter	ap_cpu_id = (boot_cpu_id == 0) ? 1 : 0;
179825164Speter
179925164Speter	/* BSP */
180025164Speter	CPU_TO_ID(0) = boot_cpu_id;
180125164Speter	ID_TO_CPU(boot_cpu_id) = 0;
180225164Speter
180325164Speter	/* one and only AP */
180425164Speter	CPU_TO_ID(1) = ap_cpu_id;
180525164Speter	ID_TO_CPU(ap_cpu_id) = 1;
180625164Speter
180726108Sfsmp#if defined(APIC_IO)
180825164Speter	/* one and only IO APIC */
180925164Speter	io_apic_id = (io_apic_read(0, IOAPIC_ID) & APIC_ID_MASK) >> 24;
181025164Speter
181125164Speter	/*
181225164Speter	 * sanity check, refer to MP spec section 3.6.6, last paragraph
181325164Speter	 * necessary as some hardware isn't properly setting up the IO APIC
181425164Speter	 */
181525164Speter#if defined(REALLY_ANAL_IOAPICID_VALUE)
181625164Speter	if (io_apic_id != 2) {
181725164Speter#else
181825164Speter	if ((io_apic_id == 0) || (io_apic_id == 1) || (io_apic_id == 15)) {
181925164Speter#endif	/* REALLY_ANAL_IOAPICID_VALUE */
182061136Smsmith		io_apic_set_id(0, 2);
182125164Speter		io_apic_id = 2;
182225164Speter	}
182325164Speter	IO_TO_ID(0) = io_apic_id;
182425164Speter	ID_TO_IO(io_apic_id) = 0;
182525164Speter#endif	/* APIC_IO */
182625164Speter
182725164Speter	/* fill out bus entries */
182825164Speter	switch (type) {
182925164Speter	case 1:
183025164Speter	case 2:
183125164Speter	case 3:
183255891Smdodd	case 4:
183325164Speter	case 5:
183425164Speter	case 6:
183555891Smdodd	case 7:
183625164Speter		bus_data[0].bus_id = default_data[type - 1][1];
183725164Speter		bus_data[0].bus_type = default_data[type - 1][2];
183825164Speter		bus_data[1].bus_id = default_data[type - 1][3];
183925164Speter		bus_data[1].bus_type = default_data[type - 1][4];
184025164Speter		break;
184125164Speter
184225164Speter	/* case 4: case 7:		   MCA NOT supported */
184325164Speter	default:		/* illegal/reserved */
184426108Sfsmp		panic("BAD default MP config: %d", type);
184526101Sfsmp		/* NOTREACHED */
184625164Speter	}
184725164Speter
184825164Speter#if defined(APIC_IO)
184925164Speter	/* general cases from MP v1.4, table 5-2 */
185025164Speter	for (pin = 0; pin < 16; ++pin) {
185125164Speter		io_apic_ints[pin].int_type = 0;
185227728Sfsmp		io_apic_ints[pin].int_flags = 0x05;	/* edge/active-hi */
185325164Speter		io_apic_ints[pin].src_bus_id = 0;
185427728Sfsmp		io_apic_ints[pin].src_bus_irq = pin;	/* IRQ2 caught below */
185525164Speter		io_apic_ints[pin].dst_apic_id = io_apic_id;
185627728Sfsmp		io_apic_ints[pin].dst_apic_int = pin;	/* 1-to-1 */
185725164Speter	}
185825164Speter
185925164Speter	/* special cases from MP v1.4, table 5-2 */
186025164Speter	if (type == 2) {
186125164Speter		io_apic_ints[2].int_type = 0xff;	/* N/C */
186225164Speter		io_apic_ints[13].int_type = 0xff;	/* N/C */
186325164Speter#if !defined(APIC_MIXED_MODE)
186425164Speter		/** FIXME: ??? */
186526108Sfsmp		panic("sorry, can't support type 2 default yet");
186625164Speter#endif	/* APIC_MIXED_MODE */
186726019Sfsmp	}
186826019Sfsmp	else
186925164Speter		io_apic_ints[2].src_bus_irq = 0;	/* ISA IRQ0 is on APIC INT 2 */
187025164Speter
187125164Speter	if (type == 7)
187225164Speter		io_apic_ints[0].int_type = 0xff;	/* N/C */
187325164Speter	else
187425164Speter		io_apic_ints[0].int_type = 3;	/* vectored 8259 */
187525164Speter#endif	/* APIC_IO */
187625164Speter}
187725164Speter
187825164Speter
187925164Speter/*
188027634Sfsmp * initialize all the SMP locks
188127634Sfsmp */
188228442Sfsmp
188328487Sfsmp/* critical region around IO APIC, apic_imen */
188428487Sfsmpstruct simplelock	imen_lock;
188528487Sfsmp
188629213Sfsmp/* critical region around splxx(), cpl, cml, cil, ipending */
188728487Sfsmpstruct simplelock	cpl_lock;
188828487Sfsmp
188928487Sfsmp/* Make FAST_INTR() routines sequential */
189028487Sfsmpstruct simplelock	fast_intr_lock;
189128487Sfsmp
189228487Sfsmp/* critical region around INTR() routines */
189328487Sfsmpstruct simplelock	intr_lock;
189428487Sfsmp
189531723Stegge/* lock region used by kernel profiling */
189631723Steggestruct simplelock	mcount_lock;
189731723Stegge
189828951Sfsmp#ifdef USE_COMLOCK
189928951Sfsmp/* locks com (tty) data/hardware accesses: a FASTINTR() */
190028442Sfsmpstruct simplelock	com_lock;
190128951Sfsmp#endif /* USE_COMLOCK */
190228442Sfsmp
190348924Smsmith/* lock around the MP rendezvous */
190448924Smsmithstatic struct simplelock smp_rv_lock;
190548924Smsmith
190665557Sjasone/* only 1 CPU can panic at a time :) */
190765557Sjasonestruct simplelock	panic_lock;
190865557Sjasone
190927634Sfsmpstatic void
191027634Sfsmpinit_locks(void)
191127634Sfsmp{
191234021Stegge#if defined(APIC_INTR_DIAGNOSTIC) && defined(APIC_INTR_DIAGNOSTIC_IRQ)
191334021Stegge	s_lock_init((struct simplelock*)&apic_itrace_debuglock);
191434021Stegge#endif
191534021Stegge
191631723Stegge	s_lock_init((struct simplelock*)&mcount_lock);
191731723Stegge
191828442Sfsmp	s_lock_init((struct simplelock*)&fast_intr_lock);
191928442Sfsmp	s_lock_init((struct simplelock*)&intr_lock);
192027780Sfsmp	s_lock_init((struct simplelock*)&imen_lock);
192128442Sfsmp	s_lock_init((struct simplelock*)&cpl_lock);
192248924Smsmith	s_lock_init(&smp_rv_lock);
192365557Sjasone	s_lock_init(&panic_lock);
192428999Sfsmp
192528951Sfsmp#ifdef USE_COMLOCK
192628442Sfsmp	s_lock_init((struct simplelock*)&com_lock);
192728951Sfsmp#endif /* USE_COMLOCK */
192865557Sjasone
192965557Sjasone	s_lock_init(&ap_boot_lock);
193027634Sfsmp}
193127634Sfsmp
193227634Sfsmp/*
193325164Speter * start each AP in our list
193425164Speter */
193525164Speterstatic int
193625164Speterstart_all_aps(u_int boot_addr)
193725164Speter{
193846129Sluoqi	int     x, i, pg;
193925164Speter	u_char  mpbiosreason;
194025164Speter	u_long  mpbioswarmvec;
194135077Speter	struct globaldata *gd;
194229655Sdyson	char *stack;
194325164Speter
194427005Sfsmp	POSTCODE(START_ALL_APS_POST);
194527005Sfsmp
194625164Speter	/* initialize BSP's local APIC */
194727289Sfsmp	apic_initialize();
194828669Sfsmp	bsp_apic_ready = 1;
194925164Speter
195025164Speter	/* install the AP 1st level boot code */
195125164Speter	install_ap_tramp(boot_addr);
195225164Speter
195326812Speter
195425164Speter	/* save the current value of the warm-start vector */
195525164Speter	mpbioswarmvec = *((u_long *) WARMBOOT_OFF);
195640179Skato#ifndef PC98
195725164Speter	outb(CMOS_REG, BIOS_RESET);
195825164Speter	mpbiosreason = inb(CMOS_DATA);
195940169Skato#endif
196025164Speter
196127005Sfsmp	/* record BSP in CPU map */
196227005Sfsmp	all_cpus = 1;
196327005Sfsmp
196446129Sluoqi	/* set up 0 -> 4MB P==V mapping for AP boot */
196546129Sluoqi	*(int *)PTD = PG_V | PG_RW | ((uintptr_t)(void *)KPTphys & PG_FRAME);
196646129Sluoqi	invltlb();
196746129Sluoqi
196825164Speter	/* start each AP */
196925164Speter	for (x = 1; x <= mp_naps; ++x) {
197025164Speter
197128808Speter		/* This is a bit verbose, it will go away soon.  */
197226812Speter
197346129Sluoqi		/* first page of AP's private space */
197446129Sluoqi		pg = x * i386_btop(sizeof(struct privatespace));
197526812Speter
197628808Speter		/* allocate a new private data page */
197735077Speter		gd = (struct globaldata *)kmem_alloc(kernel_map, PAGE_SIZE);
197826812Speter
197926812Speter		/* wire it into the private page table page */
198046129Sluoqi		SMPpt[pg] = (pt_entry_t)(PG_V | PG_RW | vtophys(gd));
198126812Speter
198228808Speter		/* allocate and set up an idle stack data page */
198329655Sdyson		stack = (char *)kmem_alloc(kernel_map, UPAGES*PAGE_SIZE);
198429655Sdyson		for (i = 0; i < UPAGES; i++)
198546129Sluoqi			SMPpt[pg + 5 + i] = (pt_entry_t)
198646129Sluoqi			    (PG_V | PG_RW | vtophys(PAGE_SIZE * i + stack));
198726812Speter
198846129Sluoqi		SMPpt[pg + 1] = 0;		/* *prv_CMAP1 */
198946129Sluoqi		SMPpt[pg + 2] = 0;		/* *prv_CMAP2 */
199046129Sluoqi		SMPpt[pg + 3] = 0;		/* *prv_CMAP3 */
199146129Sluoqi		SMPpt[pg + 4] = 0;		/* *prv_PMAP1 */
199226812Speter
199328808Speter		/* prime data page for it to use */
199465557Sjasone		SLIST_INSERT_HEAD(&cpuhead, gd, gd_allcpu);
199546129Sluoqi		gd->gd_cpuid = x;
199646129Sluoqi		gd->gd_cpu_lockid = x << 24;
199746129Sluoqi		gd->gd_prv_CMAP1 = &SMPpt[pg + 1];
199846129Sluoqi		gd->gd_prv_CMAP2 = &SMPpt[pg + 2];
199946129Sluoqi		gd->gd_prv_CMAP3 = &SMPpt[pg + 3];
200046129Sluoqi		gd->gd_prv_PMAP1 = &SMPpt[pg + 4];
200146129Sluoqi		gd->gd_prv_CADDR1 = SMP_prvspace[x].CPAGE1;
200246129Sluoqi		gd->gd_prv_CADDR2 = SMP_prvspace[x].CPAGE2;
200346129Sluoqi		gd->gd_prv_CADDR3 = SMP_prvspace[x].CPAGE3;
200446129Sluoqi		gd->gd_prv_PADDR1 = (unsigned *)SMP_prvspace[x].PPAGE1;
200526812Speter
200625164Speter		/* setup a vector to our boot code */
200725164Speter		*((volatile u_short *) WARMBOOT_OFF) = WARMBOOT_TARGET;
200825164Speter		*((volatile u_short *) WARMBOOT_SEG) = (boot_addr >> 4);
200940179Skato#ifndef PC98
201025164Speter		outb(CMOS_REG, BIOS_RESET);
201125164Speter		outb(CMOS_DATA, BIOS_WARM);	/* 'warm-start' */
201240169Skato#endif
201325164Speter
201446129Sluoqi		bootSTK = &SMP_prvspace[x].idlestack[UPAGES*PAGE_SIZE];
201548144Sluoqi		bootAP = x;
201646129Sluoqi
201725164Speter		/* attempt to start the Application Processor */
201825164Speter		CHECK_INIT(99);	/* setup checkpoints */
201925164Speter		if (!start_ap(x, boot_addr)) {
202025164Speter			printf("AP #%d (PHY# %d) failed!\n", x, CPU_TO_ID(x));
202125164Speter			CHECK_PRINT("trace");	/* show checkpoints */
202226155Sfsmp			/* better panic as the AP may be running loose */
202326155Sfsmp			printf("panic y/n? [y] ");
202426101Sfsmp			if (cngetc() != 'n')
202526108Sfsmp				panic("bye-bye");
202625164Speter		}
202727005Sfsmp		CHECK_PRINT("trace");		/* show checkpoints */
202825164Speter
202925164Speter		/* record its version info */
203025164Speter		cpu_apic_versions[x] = cpu_apic_versions[0];
203127005Sfsmp
203227005Sfsmp		all_cpus |= (1 << x);		/* record AP in CPU map */
203325164Speter	}
203425164Speter
203527005Sfsmp	/* build our map of 'other' CPUs */
203627005Sfsmp	other_cpus = all_cpus & ~(1 << cpuid);
203727005Sfsmp
203825164Speter	/* fill in our (BSP) APIC version */
203926812Speter	cpu_apic_versions[0] = lapic.version;
204025164Speter
204125164Speter	/* restore the warmstart vector */
204225164Speter	*(u_long *) WARMBOOT_OFF = mpbioswarmvec;
204340179Skato#ifndef PC98
204425164Speter	outb(CMOS_REG, BIOS_RESET);
204525164Speter	outb(CMOS_DATA, mpbiosreason);
204640169Skato#endif
204725164Speter
204828808Speter	/*
204928808Speter	 * Set up the idle context for the BSP.  Similar to above except
205028808Speter	 * that some was done by locore, some by pmap.c and some is implicit
205128808Speter	 * because the BSP is cpu#0 and the page is initially zero, and also
205228808Speter	 * because we can refer to variables by name on the BSP..
205328808Speter	 */
205428808Speter
205528808Speter	/* Allocate and setup BSP idle stack */
205629655Sdyson	stack = (char *)kmem_alloc(kernel_map, UPAGES * PAGE_SIZE);
205729655Sdyson	for (i = 0; i < UPAGES; i++)
205846129Sluoqi		SMPpt[5 + i] = (pt_entry_t)
205946129Sluoqi		    (PG_V | PG_RW | vtophys(PAGE_SIZE * i + stack));
206028808Speter
206146129Sluoqi	*(int *)PTD = 0;
206248144Sluoqi	pmap_set_opt();
206327484Sdyson
206425164Speter	/* number of APs actually started */
206525164Speter	return mp_ncpus - 1;
206625164Speter}
206725164Speter
206825164Speter
206925164Speter/*
207025164Speter * load the 1st level AP boot code into base memory.
207125164Speter */
207225164Speter
207325164Speter/* targets for relocation */
207425164Speterextern void bigJump(void);
207525164Speterextern void bootCodeSeg(void);
207625164Speterextern void bootDataSeg(void);
207725164Speterextern void MPentry(void);
207825164Speterextern u_int MP_GDT;
207925164Speterextern u_int mp_gdtbase;
208025164Speter
208125164Speterstatic void
208225164Speterinstall_ap_tramp(u_int boot_addr)
208325164Speter{
208425164Speter	int     x;
208525164Speter	int     size = *(int *) ((u_long) & bootMP_size);
208625164Speter	u_char *src = (u_char *) ((u_long) bootMP);
208725164Speter	u_char *dst = (u_char *) boot_addr + KERNBASE;
208825164Speter	u_int   boot_base = (u_int) bootMP;
208925164Speter	u_int8_t *dst8;
209025164Speter	u_int16_t *dst16;
209125164Speter	u_int32_t *dst32;
209225164Speter
209327005Sfsmp	POSTCODE(INSTALL_AP_TRAMP_POST);
209427005Sfsmp
209525164Speter	for (x = 0; x < size; ++x)
209625164Speter		*dst++ = *src++;
209725164Speter
209825164Speter	/*
209925164Speter	 * modify addresses in code we just moved to basemem. unfortunately we
210025164Speter	 * need fairly detailed info about mpboot.s for this to work.  changes
210125164Speter	 * to mpboot.s might require changes here.
210225164Speter	 */
210325164Speter
210425164Speter	/* boot code is located in KERNEL space */
210525164Speter	dst = (u_char *) boot_addr + KERNBASE;
210625164Speter
210725164Speter	/* modify the lgdt arg */
210825164Speter	dst32 = (u_int32_t *) (dst + ((u_int) & mp_gdtbase - boot_base));
210925164Speter	*dst32 = boot_addr + ((u_int) & MP_GDT - boot_base);
211025164Speter
211125164Speter	/* modify the ljmp target for MPentry() */
211225164Speter	dst32 = (u_int32_t *) (dst + ((u_int) bigJump - boot_base) + 1);
211325164Speter	*dst32 = ((u_int) MPentry - KERNBASE);
211425164Speter
211525164Speter	/* modify the target for boot code segment */
211625164Speter	dst16 = (u_int16_t *) (dst + ((u_int) bootCodeSeg - boot_base));
211725164Speter	dst8 = (u_int8_t *) (dst16 + 1);
211825164Speter	*dst16 = (u_int) boot_addr & 0xffff;
211925164Speter	*dst8 = ((u_int) boot_addr >> 16) & 0xff;
212025164Speter
212125164Speter	/* modify the target for boot data segment */
212225164Speter	dst16 = (u_int16_t *) (dst + ((u_int) bootDataSeg - boot_base));
212325164Speter	dst8 = (u_int8_t *) (dst16 + 1);
212425164Speter	*dst16 = (u_int) boot_addr & 0xffff;
212525164Speter	*dst8 = ((u_int) boot_addr >> 16) & 0xff;
212625164Speter}
212725164Speter
212825164Speter
212925164Speter/*
213025164Speter * this function starts the AP (application processor) identified
213125164Speter * by the APIC ID 'physicalCpu'.  It does quite a "song and dance"
213225164Speter * to accomplish this.  This is necessary because of the nuances
213325164Speter * of the different hardware we might encounter.  It ain't pretty,
213425164Speter * but it seems to work.
213525164Speter */
213625164Speterstatic int
213725164Speterstart_ap(int logical_cpu, u_int boot_addr)
213825164Speter{
213925164Speter	int     physical_cpu;
214025164Speter	int     vector;
214125164Speter	int     cpus;
214225164Speter	u_long  icr_lo, icr_hi;
214325164Speter
214427005Sfsmp	POSTCODE(START_AP_POST);
214527005Sfsmp
214625164Speter	/* get the PHYSICAL APIC ID# */
214725164Speter	physical_cpu = CPU_TO_ID(logical_cpu);
214825164Speter
214925164Speter	/* calculate the vector */
215025164Speter	vector = (boot_addr >> 12) & 0xff;
215125164Speter
215225164Speter	/* used as a watchpoint to signal AP startup */
215325164Speter	cpus = mp_ncpus;
215425164Speter
215525164Speter	/*
215625164Speter	 * first we do an INIT/RESET IPI this INIT IPI might be run, reseting
215725164Speter	 * and running the target CPU. OR this INIT IPI might be latched (P5
215825164Speter	 * bug), CPU waiting for STARTUP IPI. OR this INIT IPI might be
215925164Speter	 * ignored.
216025164Speter	 */
216125164Speter
216225164Speter	/* setup the address for the target AP */
216326812Speter	icr_hi = lapic.icr_hi & ~APIC_ID_MASK;
216425164Speter	icr_hi |= (physical_cpu << 24);
216526812Speter	lapic.icr_hi = icr_hi;
216625164Speter
216725164Speter	/* do an INIT IPI: assert RESET */
216826812Speter	icr_lo = lapic.icr_lo & 0xfff00000;
216926812Speter	lapic.icr_lo = icr_lo | 0x0000c500;
217025164Speter
217125164Speter	/* wait for pending status end */
217226812Speter	while (lapic.icr_lo & APIC_DELSTAT_MASK)
217325164Speter		 /* spin */ ;
217425164Speter
217525164Speter	/* do an INIT IPI: deassert RESET */
217626812Speter	lapic.icr_lo = icr_lo | 0x00008500;
217725164Speter
217825164Speter	/* wait for pending status end */
217925164Speter	u_sleep(10000);		/* wait ~10mS */
218026812Speter	while (lapic.icr_lo & APIC_DELSTAT_MASK)
218125164Speter		 /* spin */ ;
218225164Speter
218325164Speter	/*
218425164Speter	 * next we do a STARTUP IPI: the previous INIT IPI might still be
218525164Speter	 * latched, (P5 bug) this 1st STARTUP would then terminate
218625164Speter	 * immediately, and the previously started INIT IPI would continue. OR
218725164Speter	 * the previous INIT IPI has already run. and this STARTUP IPI will
218825164Speter	 * run. OR the previous INIT IPI was ignored. and this STARTUP IPI
218925164Speter	 * will run.
219025164Speter	 */
219125164Speter
219225164Speter	/* do a STARTUP IPI */
219326812Speter	lapic.icr_lo = icr_lo | 0x00000600 | vector;
219426812Speter	while (lapic.icr_lo & APIC_DELSTAT_MASK)
219525164Speter		 /* spin */ ;
219625164Speter	u_sleep(200);		/* wait ~200uS */
219725164Speter
219825164Speter	/*
219925164Speter	 * finally we do a 2nd STARTUP IPI: this 2nd STARTUP IPI should run IF
220025164Speter	 * the previous STARTUP IPI was cancelled by a latched INIT IPI. OR
220125164Speter	 * this STARTUP IPI will be ignored, as only ONE STARTUP IPI is
220225164Speter	 * recognized after hardware RESET or INIT IPI.
220325164Speter	 */
220425164Speter
220526812Speter	lapic.icr_lo = icr_lo | 0x00000600 | vector;
220626812Speter	while (lapic.icr_lo & APIC_DELSTAT_MASK)
220725164Speter		 /* spin */ ;
220825164Speter	u_sleep(200);		/* wait ~200uS */
220925164Speter
221025164Speter	/* wait for it to start */
221125164Speter	set_apic_timer(5000000);/* == 5 seconds */
221225164Speter	while (read_apic_timer())
221325164Speter		if (mp_ncpus > cpus)
221425164Speter			return 1;	/* return SUCCESS */
221525164Speter
221625164Speter	return 0;		/* return FAILURE */
221725164Speter}
221825164Speter
221925164Speter/*
222025164Speter * Flush the TLB on all other CPU's
222125164Speter *
222225164Speter * XXX: Needs to handshake and wait for completion before proceding.
222325164Speter */
222425164Spetervoid
222525215Sfsmpsmp_invltlb(void)
222625164Speter{
222725419Sfsmp#if defined(APIC_IO)
222828809Speter	if (smp_started && invltlb_ok)
222927005Sfsmp		all_but_self_ipi(XINVLTLB_OFFSET);
223025419Sfsmp#endif  /* APIC_IO */
223125164Speter}
223225164Speter
223325164Spetervoid
223425164Speterinvlpg(u_int addr)
223525164Speter{
223625164Speter	__asm   __volatile("invlpg (%0)"::"r"(addr):"memory");
223725215Sfsmp
223825215Sfsmp	/* send a message to the other CPUs */
223925164Speter	smp_invltlb();
224025164Speter}
224125164Speter
224225164Spetervoid
224325164Speterinvltlb(void)
224425164Speter{
224525164Speter	u_long  temp;
224625215Sfsmp
224725164Speter	/*
224825164Speter	 * This should be implemented as load_cr3(rcr3()) when load_cr3() is
224925164Speter	 * inlined.
225025164Speter	 */
225125164Speter	__asm __volatile("movl %%cr3, %0; movl %0, %%cr3":"=r"(temp) :: "memory");
225225215Sfsmp
225325215Sfsmp	/* send a message to the other CPUs */
225425164Speter	smp_invltlb();
225525164Speter}
225627005Sfsmp
225727005Sfsmp
225827005Sfsmp/*
225927005Sfsmp * When called the executing CPU will send an IPI to all other CPUs
226027005Sfsmp *  requesting that they halt execution.
226127005Sfsmp *
226227005Sfsmp * Usually (but not necessarily) called with 'other_cpus' as its arg.
226327005Sfsmp *
226427005Sfsmp *  - Signals all CPUs in map to stop.
226527005Sfsmp *  - Waits for each to stop.
226627005Sfsmp *
226727005Sfsmp * Returns:
226827005Sfsmp *  -1: error
226927005Sfsmp *   0: NA
227027005Sfsmp *   1: ok
227127005Sfsmp *
227227005Sfsmp * XXX FIXME: this is not MP-safe, needs a lock to prevent multiple CPUs
227327005Sfsmp *            from executing at same time.
227427005Sfsmp */
227527005Sfsmpint
227628808Speterstop_cpus(u_int map)
227727005Sfsmp{
227828809Speter	if (!smp_started)
227927005Sfsmp		return 0;
228027005Sfsmp
228127353Sfsmp	/* send the Xcpustop IPI to all CPUs in map */
228227005Sfsmp	selected_apic_ipi(map, XCPUSTOP_OFFSET, APIC_DELMODE_FIXED);
228336135Stegge
228436135Stegge	while ((stopped_cpus & map) != map)
228527005Sfsmp		/* spin */ ;
228627005Sfsmp
228727005Sfsmp	return 1;
228827005Sfsmp}
228927005Sfsmp
229027005Sfsmp
229127005Sfsmp/*
229227005Sfsmp * Called by a CPU to restart stopped CPUs.
229327005Sfsmp *
229427005Sfsmp * Usually (but not necessarily) called with 'stopped_cpus' as its arg.
229527005Sfsmp *
229627005Sfsmp *  - Signals all CPUs in map to restart.
229727005Sfsmp *  - Waits for each to restart.
229827005Sfsmp *
229927005Sfsmp * Returns:
230027005Sfsmp *  -1: error
230127005Sfsmp *   0: NA
230227005Sfsmp *   1: ok
230327005Sfsmp */
230427005Sfsmpint
230528808Speterrestart_cpus(u_int map)
230627005Sfsmp{
230728809Speter	if (!smp_started)
230827005Sfsmp		return 0;
230927005Sfsmp
231027255Sfsmp	started_cpus = map;		/* signal other cpus to restart */
231127255Sfsmp
231236135Stegge	while ((stopped_cpus & map) != 0) /* wait for each to clear its bit */
231327005Sfsmp		/* spin */ ;
231427005Sfsmp
231527005Sfsmp	return 1;
231627005Sfsmp}
231728808Speter
231828808Speterint smp_active = 0;	/* are the APs allowed to run? */
231928808SpeterSYSCTL_INT(_machdep, OID_AUTO, smp_active, CTLFLAG_RW, &smp_active, 0, "");
232028808Speter
232128808Speter/* XXX maybe should be hw.ncpu */
232233181Seivindstatic int smp_cpus = 1;	/* how many cpu's running */
232328808SpeterSYSCTL_INT(_machdep, OID_AUTO, smp_cpus, CTLFLAG_RD, &smp_cpus, 0, "");
232428808Speter
232528808Speterint invltlb_ok = 0;	/* throttle smp_invltlb() till safe */
232628808SpeterSYSCTL_INT(_machdep, OID_AUTO, invltlb_ok, CTLFLAG_RW, &invltlb_ok, 0, "");
232728808Speter
232833181Seivind/* Warning: Do not staticize.  Used from swtch.s */
232933936Sdysonint do_page_zero_idle = 1; /* bzero pages for fun and profit in idleloop */
233028808SpeterSYSCTL_INT(_machdep, OID_AUTO, do_page_zero_idle, CTLFLAG_RW,
233128808Speter	   &do_page_zero_idle, 0, "");
233228808Speter
233334021Stegge/* Is forwarding of a interrupt to the CPU holding the ISR lock enabled ? */
233434021Steggeint forward_irq_enabled = 1;
233534021SteggeSYSCTL_INT(_machdep, OID_AUTO, forward_irq_enabled, CTLFLAG_RW,
233634021Stegge	   &forward_irq_enabled, 0, "");
233734021Stegge
233834020Stegge/* Enable forwarding of a signal to a process running on a different CPU */
233941362Seivindstatic int forward_signal_enabled = 1;
234034020SteggeSYSCTL_INT(_machdep, OID_AUTO, forward_signal_enabled, CTLFLAG_RW,
234134020Stegge	   &forward_signal_enabled, 0, "");
234228808Speter
234336135Stegge/* Enable forwarding of roundrobin to all other cpus */
234441362Seivindstatic int forward_roundrobin_enabled = 1;
234536135SteggeSYSCTL_INT(_machdep, OID_AUTO, forward_roundrobin_enabled, CTLFLAG_RW,
234636135Stegge	   &forward_roundrobin_enabled, 0, "");
234736135Stegge
234828808Speter/*
234928808Speter * This is called once the rest of the system is up and running and we're
235028808Speter * ready to let the AP's out of the pen.
235128808Speter */
235228808Spetervoid ap_init(void);
235328808Speter
235428808Spetervoid
235565557Sjasoneap_init(void)
235628808Speter{
235728808Speter	u_int	apic_id;
235828808Speter
235965557Sjasone	/* lock against other AP's that are waking up */
236065557Sjasone	s_lock(&ap_boot_lock);
236165557Sjasone
236248144Sluoqi	/* BSP may have changed PTD while we're waiting for the lock */
236348144Sluoqi	cpu_invltlb();
236448144Sluoqi
236528808Speter	smp_cpus++;
236628808Speter
236734197Stegge#if defined(I586_CPU) && !defined(NO_F00F_HACK)
236834197Stegge	lidt(&r_idt);
236934197Stegge#endif
237034197Stegge
237128808Speter	/* Build our map of 'other' CPUs. */
237228808Speter	other_cpus = all_cpus & ~(1 << cpuid);
237328808Speter
237428808Speter	printf("SMP: AP CPU #%d Launched!\n", cpuid);
237528808Speter
237664529Speter	/* set up CPU registers and state */
237764529Speter	cpu_setregs();
237828808Speter
237950972Speter	/* set up FPU state on the AP */
238050972Speter	npxinit(__INITIAL_NPXCW__);
238150972Speter
238228808Speter	/* A quick check from sanity claus */
238328808Speter	apic_id = (apic_id_to_logical[(lapic.id & 0x0f000000) >> 24]);
238428808Speter	if (cpuid != apic_id) {
238528808Speter		printf("SMP: cpuid = %d\n", cpuid);
238628808Speter		printf("SMP: apic_id = %d\n", apic_id);
238738505Sbde		printf("PTD[MPPTDI] = %p\n", (void *)PTD[MPPTDI]);
238828808Speter		panic("cpuid mismatch! boom!!");
238928808Speter	}
239028808Speter
239128808Speter	/* Init local apic for irq's */
239228808Speter	apic_initialize();
239328808Speter
239446215Smsmith	/* Set memory range attributes for this CPU to match the BSP */
239546215Smsmith	mem_range_AP_init();
239646215Smsmith
239728808Speter	/*
239828808Speter	 * Activate smp_invltlb, although strictly speaking, this isn't
239928808Speter	 * quite correct yet.  We should have a bitfield for cpus willing
240028808Speter	 * to accept TLB flush IPI's or something and sync them.
240128808Speter	 */
240238888Stegge	if (smp_cpus == mp_ncpus) {
240338888Stegge		invltlb_ok = 1;
240438888Stegge		smp_started = 1; /* enable IPI's, tlb shootdown, freezes etc */
240538888Stegge		smp_active = 1;	 /* historic */
240638888Stegge	}
240765557Sjasone
240865557Sjasone	/* let other AP's wake up now */
240965557Sjasone	s_unlock(&ap_boot_lock);
241065557Sjasone
241165557Sjasone	/* wait until all the AP's are up */
241265557Sjasone	while (smp_started == 0)
241365557Sjasone		; /* nothing */
241465557Sjasone
241565557Sjasone	/*
241665557Sjasone	 * Set curproc to our per-cpu idleproc so that mutexes have
241765557Sjasone	 * something unique to lock with.
241865557Sjasone	 */
241965557Sjasone	PCPU_SET(curproc,idleproc);
242065557Sjasone
242165557Sjasone	microuptime(&switchtime);
242265557Sjasone	switchticks = ticks;
242365557Sjasone
242465557Sjasone	/* ok, now grab sched_lock and enter the scheduler */
242565557Sjasone	enable_intr();
242665557Sjasone	mtx_enter(&sched_lock, MTX_SPIN);
242765557Sjasone	cpu_throw();	/* doesn't return */
242865557Sjasone
242965557Sjasone	panic("scheduler returned us to ap_init");
243028808Speter}
243130112Sdyson
243231639Sfsmp#ifdef BETTER_CLOCK
243331639Sfsmp
243431639Sfsmp#define CHECKSTATE_USER	0
243531639Sfsmp#define CHECKSTATE_SYS	1
243631639Sfsmp#define CHECKSTATE_INTR	2
243731639Sfsmp
243833181Seivind/* Do not staticize.  Used from apic_vector.s */
243966277Spsstruct proc*	checkstate_curproc[MAXCPU];
244066277Spsint		checkstate_cpustate[MAXCPU];
244166277Spsu_long		checkstate_pc[MAXCPU];
244231639Sfsmp
244331639Sfsmp#define PC_TO_INDEX(pc, prof)				\
244431639Sfsmp        ((int)(((u_quad_t)((pc) - (prof)->pr_off) *	\
244531639Sfsmp            (u_quad_t)((prof)->pr_scale)) >> 16) & ~1)
244631639Sfsmp
244731639Sfsmpstatic void
244831639Sfsmpaddupc_intr_forwarded(struct proc *p, int id, int *astmap)
244931639Sfsmp{
245031639Sfsmp	int i;
245131639Sfsmp	struct uprof *prof;
245231639Sfsmp	u_long pc;
245331639Sfsmp
245431639Sfsmp	pc = checkstate_pc[id];
245531639Sfsmp	prof = &p->p_stats->p_prof;
245631639Sfsmp	if (pc >= prof->pr_off &&
245731639Sfsmp	    (i = PC_TO_INDEX(pc, prof)) < prof->pr_size) {
245831639Sfsmp		if ((p->p_flag & P_OWEUPC) == 0) {
245931639Sfsmp			prof->pr_addr = pc;
246031639Sfsmp			prof->pr_ticks = 1;
246131639Sfsmp			p->p_flag |= P_OWEUPC;
246231639Sfsmp		}
246331639Sfsmp		*astmap |= (1 << id);
246431639Sfsmp	}
246531639Sfsmp}
246631639Sfsmp
246731639Sfsmpstatic void
246831639Sfsmpforwarded_statclock(int id, int pscnt, int *astmap)
246931639Sfsmp{
247031639Sfsmp	struct pstats *pstats;
247131639Sfsmp	long rss;
247231639Sfsmp	struct rusage *ru;
247331639Sfsmp	struct vmspace *vm;
247431639Sfsmp	int cpustate;
247531639Sfsmp	struct proc *p;
247631639Sfsmp#ifdef GPROF
247731639Sfsmp	register struct gmonparam *g;
247831639Sfsmp	int i;
247931639Sfsmp#endif
248031639Sfsmp
248131639Sfsmp	p = checkstate_curproc[id];
248231639Sfsmp	cpustate = checkstate_cpustate[id];
248331639Sfsmp
248465557Sjasone	/* XXX */
248565557Sjasone	if (p->p_ithd)
248665557Sjasone		cpustate = CHECKSTATE_INTR;
248765713Sjhb	else if (p == SMP_prvspace[id].globaldata.gd_idleproc)
248865557Sjasone		cpustate = CHECKSTATE_SYS;
248965557Sjasone
249031639Sfsmp	switch (cpustate) {
249131639Sfsmp	case CHECKSTATE_USER:
249231639Sfsmp		if (p->p_flag & P_PROFIL)
249331639Sfsmp			addupc_intr_forwarded(p, id, astmap);
249431639Sfsmp		if (pscnt > 1)
249531639Sfsmp			return;
249631639Sfsmp		p->p_uticks++;
249731639Sfsmp		if (p->p_nice > NZERO)
249831639Sfsmp			cp_time[CP_NICE]++;
249931639Sfsmp		else
250031639Sfsmp			cp_time[CP_USER]++;
250131639Sfsmp		break;
250231639Sfsmp	case CHECKSTATE_SYS:
250331639Sfsmp#ifdef GPROF
250431639Sfsmp		/*
250531639Sfsmp		 * Kernel statistics are just like addupc_intr, only easier.
250631639Sfsmp		 */
250731639Sfsmp		g = &_gmonparam;
250831639Sfsmp		if (g->state == GMON_PROF_ON) {
250931639Sfsmp			i = checkstate_pc[id] - g->lowpc;
251031639Sfsmp			if (i < g->textsize) {
251131639Sfsmp				i /= HISTFRACTION * sizeof(*g->kcount);
251231639Sfsmp				g->kcount[i]++;
251331639Sfsmp			}
251431639Sfsmp		}
251531639Sfsmp#endif
251631639Sfsmp		if (pscnt > 1)
251731639Sfsmp			return;
251831639Sfsmp
251965782Sjhb		p->p_sticks++;
252065782Sjhb		if (p == SMP_prvspace[id].globaldata.gd_idleproc)
252131639Sfsmp			cp_time[CP_IDLE]++;
252265782Sjhb		else
252331639Sfsmp			cp_time[CP_SYS]++;
252431639Sfsmp		break;
252531639Sfsmp	case CHECKSTATE_INTR:
252631639Sfsmp	default:
252731639Sfsmp#ifdef GPROF
252831639Sfsmp		/*
252931639Sfsmp		 * Kernel statistics are just like addupc_intr, only easier.
253031639Sfsmp		 */
253131639Sfsmp		g = &_gmonparam;
253231639Sfsmp		if (g->state == GMON_PROF_ON) {
253331639Sfsmp			i = checkstate_pc[id] - g->lowpc;
253431639Sfsmp			if (i < g->textsize) {
253531639Sfsmp				i /= HISTFRACTION * sizeof(*g->kcount);
253631639Sfsmp				g->kcount[i]++;
253731639Sfsmp			}
253831639Sfsmp		}
253931639Sfsmp#endif
254031639Sfsmp		if (pscnt > 1)
254131639Sfsmp			return;
254231639Sfsmp		if (p)
254331639Sfsmp			p->p_iticks++;
254431639Sfsmp		cp_time[CP_INTR]++;
254531639Sfsmp	}
254665782Sjhb	schedclock(p);
254731639Sfsmp
254865782Sjhb	/* Update resource usage integrals and maximums. */
254965782Sjhb	if ((pstats = p->p_stats) != NULL &&
255065782Sjhb	    (ru = &pstats->p_ru) != NULL &&
255165782Sjhb	    (vm = p->p_vmspace) != NULL) {
255265782Sjhb		ru->ru_ixrss += pgtok(vm->vm_tsize);
255365782Sjhb		ru->ru_idrss += pgtok(vm->vm_dsize);
255465782Sjhb		ru->ru_isrss += pgtok(vm->vm_ssize);
255565782Sjhb		rss = pgtok(vmspace_resident_count(vm));
255665782Sjhb		if (ru->ru_maxrss < rss)
255765782Sjhb			ru->ru_maxrss = rss;
255831639Sfsmp	}
255931639Sfsmp}
256031639Sfsmp
256131639Sfsmpvoid
256231639Sfsmpforward_statclock(int pscnt)
256331639Sfsmp{
256431639Sfsmp	int map;
256531639Sfsmp	int id;
256631639Sfsmp	int i;
256731639Sfsmp
256831639Sfsmp	/* Kludge. We don't yet have separate locks for the interrupts
256931639Sfsmp	 * and the kernel. This means that we cannot let the other processors
257031639Sfsmp	 * handle complex interrupts while inhibiting them from entering
257131639Sfsmp	 * the kernel in a non-interrupt context.
257231639Sfsmp	 *
257331639Sfsmp	 * What we can do, without changing the locking mechanisms yet,
257431639Sfsmp	 * is letting the other processors handle a very simple interrupt
257531639Sfsmp	 * (wich determines the processor states), and do the main
257631639Sfsmp	 * work ourself.
257731639Sfsmp	 */
257831639Sfsmp
257931720Stegge	if (!smp_started || !invltlb_ok || cold || panicstr)
258031639Sfsmp		return;
258131639Sfsmp
258231639Sfsmp	/* Step 1: Probe state   (user, cpu, interrupt, spinlock, idle ) */
258331639Sfsmp
258431720Stegge	map = other_cpus & ~stopped_cpus ;
258531639Sfsmp	checkstate_probed_cpus = 0;
258631720Stegge	if (map != 0)
258731720Stegge		selected_apic_ipi(map,
258831720Stegge				  XCPUCHECKSTATE_OFFSET, APIC_DELMODE_FIXED);
258931639Sfsmp
259031639Sfsmp	i = 0;
259131639Sfsmp	while (checkstate_probed_cpus != map) {
259231639Sfsmp		/* spin */
259331639Sfsmp		i++;
259436135Stegge		if (i == 100000) {
259536135Stegge#ifdef BETTER_CLOCK_DIAGNOSTIC
259631639Sfsmp			printf("forward_statclock: checkstate %x\n",
259731639Sfsmp			       checkstate_probed_cpus);
259836135Stegge#endif
259931720Stegge			break;
260031639Sfsmp		}
260131639Sfsmp	}
260231639Sfsmp
260331639Sfsmp	/*
260431639Sfsmp	 * Step 2: walk through other processors processes, update ticks and
260531639Sfsmp	 * profiling info.
260631639Sfsmp	 */
260731639Sfsmp
260831639Sfsmp	map = 0;
260931639Sfsmp	for (id = 0; id < mp_ncpus; id++) {
261031639Sfsmp		if (id == cpuid)
261131639Sfsmp			continue;
261231639Sfsmp		if (((1 << id) & checkstate_probed_cpus) == 0)
261331720Stegge			continue;
261431639Sfsmp		forwarded_statclock(id, pscnt, &map);
261531639Sfsmp	}
261631639Sfsmp	if (map != 0) {
261731639Sfsmp		checkstate_need_ast |= map;
261831639Sfsmp		selected_apic_ipi(map, XCPUAST_OFFSET, APIC_DELMODE_FIXED);
261931639Sfsmp		i = 0;
262034019Stegge		while ((checkstate_need_ast & map) != 0) {
262131639Sfsmp			/* spin */
262231639Sfsmp			i++;
262334019Stegge			if (i > 100000) {
262434019Stegge#ifdef BETTER_CLOCK_DIAGNOSTIC
262531639Sfsmp				printf("forward_statclock: dropped ast 0x%x\n",
262634019Stegge				       checkstate_need_ast & map);
262734019Stegge#endif
262831639Sfsmp				break;
262931639Sfsmp			}
263031639Sfsmp		}
263131639Sfsmp	}
263231639Sfsmp}
263331639Sfsmp
263431639Sfsmpvoid
263531639Sfsmpforward_hardclock(int pscnt)
263631639Sfsmp{
263731639Sfsmp	int map;
263831639Sfsmp	int id;
263931639Sfsmp	struct proc *p;
264031639Sfsmp	struct pstats *pstats;
264131639Sfsmp	int i;
264231639Sfsmp
264331639Sfsmp	/* Kludge. We don't yet have separate locks for the interrupts
264431639Sfsmp	 * and the kernel. This means that we cannot let the other processors
264531639Sfsmp	 * handle complex interrupts while inhibiting them from entering
264631639Sfsmp	 * the kernel in a non-interrupt context.
264731639Sfsmp	 *
264831639Sfsmp	 * What we can do, without changing the locking mechanisms yet,
264931639Sfsmp	 * is letting the other processors handle a very simple interrupt
265031639Sfsmp	 * (wich determines the processor states), and do the main
265131639Sfsmp	 * work ourself.
265231639Sfsmp	 */
265331639Sfsmp
265431720Stegge	if (!smp_started || !invltlb_ok || cold || panicstr)
265531639Sfsmp		return;
265631639Sfsmp
265731639Sfsmp	/* Step 1: Probe state   (user, cpu, interrupt, spinlock, idle) */
265831639Sfsmp
265931720Stegge	map = other_cpus & ~stopped_cpus ;
266031639Sfsmp	checkstate_probed_cpus = 0;
266131720Stegge	if (map != 0)
266231720Stegge		selected_apic_ipi(map,
266331720Stegge				  XCPUCHECKSTATE_OFFSET, APIC_DELMODE_FIXED);
266431720Stegge
266531639Sfsmp	i = 0;
266631639Sfsmp	while (checkstate_probed_cpus != map) {
266731639Sfsmp		/* spin */
266831639Sfsmp		i++;
266936135Stegge		if (i == 100000) {
267036135Stegge#ifdef BETTER_CLOCK_DIAGNOSTIC
267131639Sfsmp			printf("forward_hardclock: checkstate %x\n",
267231639Sfsmp			       checkstate_probed_cpus);
267336135Stegge#endif
267431720Stegge			break;
267531639Sfsmp		}
267631639Sfsmp	}
267731639Sfsmp
267831639Sfsmp	/*
267931639Sfsmp	 * Step 2: walk through other processors processes, update virtual
268031639Sfsmp	 * timer and profiling timer. If stathz == 0, also update ticks and
268131639Sfsmp	 * profiling info.
268231639Sfsmp	 */
268331639Sfsmp
268431639Sfsmp	map = 0;
268531639Sfsmp	for (id = 0; id < mp_ncpus; id++) {
268631639Sfsmp		if (id == cpuid)
268731639Sfsmp			continue;
268831639Sfsmp		if (((1 << id) & checkstate_probed_cpus) == 0)
268931720Stegge			continue;
269031639Sfsmp		p = checkstate_curproc[id];
269131639Sfsmp		if (p) {
269231639Sfsmp			pstats = p->p_stats;
269331639Sfsmp			if (checkstate_cpustate[id] == CHECKSTATE_USER &&
269435058Sphk			    timevalisset(&pstats->p_timer[ITIMER_VIRTUAL].it_value) &&
269531639Sfsmp			    itimerdecr(&pstats->p_timer[ITIMER_VIRTUAL], tick) == 0) {
269631639Sfsmp				psignal(p, SIGVTALRM);
269731639Sfsmp				map |= (1 << id);
269831639Sfsmp			}
269935058Sphk			if (timevalisset(&pstats->p_timer[ITIMER_PROF].it_value) &&
270031639Sfsmp			    itimerdecr(&pstats->p_timer[ITIMER_PROF], tick) == 0) {
270131639Sfsmp				psignal(p, SIGPROF);
270231639Sfsmp				map |= (1 << id);
270331639Sfsmp			}
270431639Sfsmp		}
270531639Sfsmp		if (stathz == 0) {
270631639Sfsmp			forwarded_statclock( id, pscnt, &map);
270731639Sfsmp		}
270831639Sfsmp	}
270931639Sfsmp	if (map != 0) {
271031639Sfsmp		checkstate_need_ast |= map;
271131639Sfsmp		selected_apic_ipi(map, XCPUAST_OFFSET, APIC_DELMODE_FIXED);
271231639Sfsmp		i = 0;
271334019Stegge		while ((checkstate_need_ast & map) != 0) {
271431639Sfsmp			/* spin */
271531639Sfsmp			i++;
271634019Stegge			if (i > 100000) {
271734019Stegge#ifdef BETTER_CLOCK_DIAGNOSTIC
271831639Sfsmp				printf("forward_hardclock: dropped ast 0x%x\n",
271934019Stegge				       checkstate_need_ast & map);
272034019Stegge#endif
272131639Sfsmp				break;
272231639Sfsmp			}
272331639Sfsmp		}
272431639Sfsmp	}
272531639Sfsmp}
272631639Sfsmp
272731639Sfsmp#endif /* BETTER_CLOCK */
272834020Stegge
272934020Steggevoid
273034020Steggeforward_signal(struct proc *p)
273134020Stegge{
273234020Stegge	int map;
273334020Stegge	int id;
273434020Stegge	int i;
273534020Stegge
273634020Stegge	/* Kludge. We don't yet have separate locks for the interrupts
273734020Stegge	 * and the kernel. This means that we cannot let the other processors
273834020Stegge	 * handle complex interrupts while inhibiting them from entering
273934020Stegge	 * the kernel in a non-interrupt context.
274034020Stegge	 *
274134020Stegge	 * What we can do, without changing the locking mechanisms yet,
274234020Stegge	 * is letting the other processors handle a very simple interrupt
274334020Stegge	 * (wich determines the processor states), and do the main
274434020Stegge	 * work ourself.
274534020Stegge	 */
274634020Stegge
274734020Stegge	if (!smp_started || !invltlb_ok || cold || panicstr)
274834020Stegge		return;
274934020Stegge	if (!forward_signal_enabled)
275034020Stegge		return;
275134020Stegge	while (1) {
275234020Stegge		if (p->p_stat != SRUN)
275334020Stegge			return;
275444487Sbde		id = p->p_oncpu;
275534020Stegge		if (id == 0xff)
275634020Stegge			return;
275734020Stegge		map = (1<<id);
275834020Stegge		checkstate_need_ast |= map;
275934020Stegge		selected_apic_ipi(map, XCPUAST_OFFSET, APIC_DELMODE_FIXED);
276034020Stegge		i = 0;
276134020Stegge		while ((checkstate_need_ast & map) != 0) {
276234020Stegge			/* spin */
276334020Stegge			i++;
276434020Stegge			if (i > 100000) {
276534020Stegge#if 0
276634020Stegge				printf("forward_signal: dropped ast 0x%x\n",
276734020Stegge				       checkstate_need_ast & map);
276834020Stegge#endif
276934020Stegge				break;
277034020Stegge			}
277134020Stegge		}
277244487Sbde		if (id == p->p_oncpu)
277334020Stegge			return;
277434020Stegge	}
277534020Stegge}
277634021Stegge
277736135Steggevoid
277836135Steggeforward_roundrobin(void)
277936135Stegge{
278036135Stegge	u_int map;
278136135Stegge	int i;
278234021Stegge
278336135Stegge	if (!smp_started || !invltlb_ok || cold || panicstr)
278436135Stegge		return;
278536135Stegge	if (!forward_roundrobin_enabled)
278636135Stegge		return;
278736135Stegge	resched_cpus |= other_cpus;
278836135Stegge	map = other_cpus & ~stopped_cpus ;
278936135Stegge#if 1
279036135Stegge	selected_apic_ipi(map, XCPUAST_OFFSET, APIC_DELMODE_FIXED);
279136135Stegge#else
279236135Stegge	(void) all_but_self_ipi(XCPUAST_OFFSET);
279336135Stegge#endif
279436135Stegge	i = 0;
279536135Stegge	while ((checkstate_need_ast & map) != 0) {
279636135Stegge		/* spin */
279736135Stegge		i++;
279836135Stegge		if (i > 100000) {
279936135Stegge#if 0
280036135Stegge			printf("forward_roundrobin: dropped ast 0x%x\n",
280136135Stegge			       checkstate_need_ast & map);
280236135Stegge#endif
280336135Stegge			break;
280436135Stegge		}
280536135Stegge	}
280636135Stegge}
280736135Stegge
280836135Stegge
280934021Stegge#ifdef APIC_INTR_REORDER
281034021Stegge/*
281134021Stegge *	Maintain mapping from softintr vector to isr bit in local apic.
281234021Stegge */
281334021Steggevoid
281434021Steggeset_lapic_isrloc(int intr, int vector)
281534021Stegge{
281634021Stegge	if (intr < 0 || intr > 32)
281734021Stegge		panic("set_apic_isrloc: bad intr argument: %d",intr);
281834021Stegge	if (vector < ICU_OFFSET || vector > 255)
281934021Stegge		panic("set_apic_isrloc: bad vector argument: %d",vector);
282034021Stegge	apic_isrbit_location[intr].location = &lapic.isr0 + ((vector>>5)<<2);
282134021Stegge	apic_isrbit_location[intr].bit = (1<<(vector & 31));
282234021Stegge}
282334021Stegge#endif
282448924Smsmith
282548924Smsmith/*
282648924Smsmith * All-CPU rendezvous.  CPUs are signalled, all execute the setup function
282748924Smsmith * (if specified), rendezvous, execute the action function (if specified),
282848924Smsmith * rendezvous again, execute the teardown function (if specified), and then
282948924Smsmith * resume.
283048924Smsmith *
283148924Smsmith * Note that the supplied external functions _must_ be reentrant and aware
283248924Smsmith * that they are running in parallel and in an unknown lock context.
283348924Smsmith */
283448924Smsmithstatic void (*smp_rv_setup_func)(void *arg);
283548924Smsmithstatic void (*smp_rv_action_func)(void *arg);
283648924Smsmithstatic void (*smp_rv_teardown_func)(void *arg);
283748924Smsmithstatic void *smp_rv_func_arg;
283848924Smsmithstatic volatile int smp_rv_waiters[2];
283948924Smsmith
284048924Smsmithvoid
284148924Smsmithsmp_rendezvous_action(void)
284248924Smsmith{
284348924Smsmith	/* setup function */
284448924Smsmith	if (smp_rv_setup_func != NULL)
284548924Smsmith		smp_rv_setup_func(smp_rv_func_arg);
284648924Smsmith	/* spin on entry rendezvous */
284748924Smsmith	atomic_add_int(&smp_rv_waiters[0], 1);
284848924Smsmith	while (smp_rv_waiters[0] < mp_ncpus)
284948924Smsmith		;
285048924Smsmith	/* action function */
285148924Smsmith	if (smp_rv_action_func != NULL)
285248924Smsmith		smp_rv_action_func(smp_rv_func_arg);
285348924Smsmith	/* spin on exit rendezvous */
285448924Smsmith	atomic_add_int(&smp_rv_waiters[1], 1);
285548924Smsmith	while (smp_rv_waiters[1] < mp_ncpus)
285648924Smsmith		;
285748924Smsmith	/* teardown function */
285848924Smsmith	if (smp_rv_teardown_func != NULL)
285948924Smsmith		smp_rv_teardown_func(smp_rv_func_arg);
286048924Smsmith}
286148924Smsmith
286248924Smsmithvoid
286348924Smsmithsmp_rendezvous(void (* setup_func)(void *),
286448924Smsmith	       void (* action_func)(void *),
286548924Smsmith	       void (* teardown_func)(void *),
286648924Smsmith	       void *arg)
286748924Smsmith{
286848924Smsmith	u_int	efl;
286948924Smsmith
287048924Smsmith	/* obtain rendezvous lock */
287148924Smsmith	s_lock(&smp_rv_lock);		/* XXX sleep here? NOWAIT flag? */
287248924Smsmith
287348924Smsmith	/* set static function pointers */
287448924Smsmith	smp_rv_setup_func = setup_func;
287548924Smsmith	smp_rv_action_func = action_func;
287648924Smsmith	smp_rv_teardown_func = teardown_func;
287748924Smsmith	smp_rv_func_arg = arg;
287848924Smsmith	smp_rv_waiters[0] = 0;
287948924Smsmith	smp_rv_waiters[1] = 0;
288048924Smsmith
288148924Smsmith	/* disable interrupts on this CPU, save interrupt status */
288248924Smsmith	efl = read_eflags();
288348924Smsmith	write_eflags(efl & ~PSL_I);
288448924Smsmith
288548924Smsmith	/* signal other processors, which will enter the IPI with interrupts off */
288648924Smsmith	all_but_self_ipi(XRENDEZVOUS_OFFSET);
288748924Smsmith
288848924Smsmith	/* call executor function */
288948924Smsmith	smp_rendezvous_action();
289048924Smsmith
289148924Smsmith	/* restore interrupt flag */
289248924Smsmith	write_eflags(efl);
289348924Smsmith
289448924Smsmith	/* release lock */
289548924Smsmith	s_unlock(&smp_rv_lock);
289648924Smsmith}
289765557Sjasone
289865557Sjasonevoid
289965557Sjasonerelease_aps(void *dummy __unused)
290065557Sjasone{
290165557Sjasone	s_unlock(&ap_boot_lock);
290265557Sjasone}
290365557Sjasone
290465557SjasoneSYSINIT(start_aps, SI_SUB_SMP, SI_ORDER_FIRST, release_aps, NULL);
2905