mptable.c revision 48119
19313Ssos/*
29313Ssos * Copyright (c) 1996, by Steve Passe
39313Ssos * All rights reserved.
49313Ssos *
59313Ssos * Redistribution and use in source and binary forms, with or without
69313Ssos * modification, are permitted provided that the following conditions
79313Ssos * are met:
89313Ssos * 1. Redistributions of source code must retain the above copyright
9111798Sdes *    notice, this list of conditions and the following disclaimer.
109313Ssos * 2. The name of the developer may NOT be used to endorse or promote products
119313Ssos *    derived from this software without specific prior written permission.
129313Ssos *
139313Ssos * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
149313Ssos * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
1597748Sschweikh * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
169313Ssos * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
179313Ssos * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
189313Ssos * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
199313Ssos * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
209313Ssos * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
219313Ssos * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
229313Ssos * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
239313Ssos * SUCH DAMAGE.
249313Ssos *
259313Ssos *	$Id: mp_machdep.c,v 1.102 1999/06/01 18:19:42 jlemon Exp $
269313Ssos */
279313Ssos
289313Ssos#include "opt_smp.h"
29116173Sobrien#include "opt_cpu.h"
30116173Sobrien#include "opt_user_ldt.h"
31116173Sobrien
32156874Sru#ifdef SMP
33101189Srwatson#include <machine/smptests.h>
3431784Seivind#else
359313Ssos#error
369313Ssos#endif
3776166Smarkm
3876166Smarkm#include <sys/param.h>
399313Ssos#include <sys/systm.h>
409313Ssos#include <sys/kernel.h>
419313Ssos#include <sys/proc.h>
4231561Sbde#include <sys/sysctl.h>
439313Ssos#include <sys/malloc.h>
4472538Sjlemon#include <sys/memrange.h>
4576166Smarkm#ifdef BETTER_CLOCK
46168014Sjulian#include <sys/dkstat.h>
4776166Smarkm#endif
48162201Snetchild
49166085Skib#include <vm/vm.h>
50102814Siedowse#include <vm/vm_param.h>
5176166Smarkm#include <vm/pmap.h>
5214331Speter#include <vm/vm_kern.h>
53162585Snetchild#include <vm/vm_extern.h>
5476166Smarkm#ifdef BETTER_CLOCK
5512458Sbde#include <sys/lock.h>
56163606Srwatson#include <vm/vm_map.h>
57163606Srwatson#include <sys/user.h>
5872538Sjlemon#ifdef GPROF
5972538Sjlemon#include <sys/gmon.h>
6072538Sjlemon#endif
6172538Sjlemon#endif
62140214Sobrien
63140214Sobrien#include <machine/smp.h>
64140214Sobrien#include <machine/apic.h>
65140214Sobrien#include <machine/mpapic.h>
6664905Smarcel#include <machine/segments.h>
6768583Smarcel#include <machine/smptests.h>	/** TEST_DEFAULT_CONFIG, TEST_TEST1 */
68133816Stjr#include <machine/tss.h>
6964905Smarcel#include <machine/specialreg.h>
709313Ssos#include <machine/cputypes.h>
719313Ssos#include <machine/globaldata.h>
7283366Sjulian
739313Ssos#include <i386/i386/cons.h>	/* cngetc() */
74102814Siedowse
75102814Siedowse#if defined(APIC_IO)
769313Ssos#include <machine/md_var.h>		/* setidt() */
77102814Siedowse#include <i386/isa/icu.h>		/* IPIs */
7814331Speter#include <i386/isa/intr_machdep.h>	/* IPIs */
799313Ssos#endif	/* APIC_IO */
8072543Sjlemon
81102814Siedowse#if defined(TEST_DEFAULT_CONFIG)
829313Ssos#define MPFPS_MPFB1	TEST_DEFAULT_CONFIG
83102814Siedowse#else
84102814Siedowse#define MPFPS_MPFB1	mpfps->mpfb1
85102814Siedowse#endif  /* TEST_DEFAULT_CONFIG */
86102814Siedowse
879313Ssos#define WARMBOOT_TARGET		0
889313Ssos#define WARMBOOT_OFF		(KERNBASE + 0x0467)
89168014Sjulian#define WARMBOOT_SEG		(KERNBASE + 0x0469)
90168014Sjulian
91168014Sjulian#ifdef PC98
929313Ssos#define BIOS_BASE		(0xe8000)
9383382Sjhb#define BIOS_SIZE		(0x18000)
94166085Skib#else
95166085Skib#define BIOS_BASE		(0xf0000)
96102814Siedowse#define BIOS_SIZE		(0x10000)
9714331Speter#endif
98102814Siedowse#define BIOS_COUNT		(BIOS_SIZE/4)
99168014Sjulian
100168014Sjulian#define CMOS_REG		(0x70)
101102814Siedowse#define CMOS_DATA		(0x71)
102168014Sjulian#define BIOS_RESET		(0x0f)
103168014Sjulian#define BIOS_WARM		(0x0a)
104102814Siedowse
105168014Sjulian#define PROCENTRY_FLAG_EN	0x01
106168014Sjulian#define PROCENTRY_FLAG_BP	0x02
107168014Sjulian#define IOAPICENTRY_FLAG_EN	0x01
108168014Sjulian
109168014Sjulian
110102814Siedowse/* MP Floating Pointer Structure */
111168014Sjuliantypedef struct MPFPS {
112102814Siedowse	char    signature[4];
113168014Sjulian	void   *pap;
114102814Siedowse	u_char  length;
115168014Sjulian	u_char  spec_rev;
116102814Siedowse	u_char  checksum;
117168014Sjulian	u_char  mpfb1;
118102814Siedowse	u_char  mpfb2;
119168014Sjulian	u_char  mpfb3;
120102814Siedowse	u_char  mpfb4;
121168014Sjulian	u_char  mpfb5;
122102814Siedowse}      *mpfps_t;
123168014Sjulian
124102814Siedowse/* MP Configuration Table Header */
125168014Sjuliantypedef struct MPCTH {
126102814Siedowse	char    signature[4];
127168014Sjulian	u_short base_table_length;
128166085Skib	u_char  spec_rev;
129168014Sjulian	u_char  checksum;
130166085Skib	u_char  oem_id[8];
131166085Skib	u_char  product_id[12];
1329313Ssos	void   *oem_table_pointer;
133168014Sjulian	u_short oem_table_size;
134166085Skib	u_short entry_count;
135166085Skib	void   *apic_address;
136166085Skib	u_short extended_table_length;
137166085Skib	u_char  extended_table_checksum;
138166085Skib	u_char  reserved;
139166085Skib}      *mpcth_t;
140166085Skib
141166085Skib
142166085Skibtypedef struct PROCENTRY {
143166085Skib	u_char  type;
144166085Skib	u_char  apic_id;
145166085Skib	u_char  apic_version;
146166085Skib	u_char  cpu_flags;
147166085Skib	u_long  cpu_signature;
148166085Skib	u_long  feature_flags;
149166085Skib	u_long  reserved1;
150166085Skib	u_long  reserved2;
151166085Skib}      *proc_entry_ptr;
152166085Skib
153166085Skibtypedef struct BUSENTRY {
154166085Skib	u_char  type;
155166085Skib	u_char  bus_id;
156168014Sjulian	char    bus_type[6];
157166085Skib}      *bus_entry_ptr;
158166085Skib
159166085Skibtypedef struct IOAPICENTRY {
160166085Skib	u_char  type;
161166085Skib	u_char  apic_id;
162166085Skib	u_char  apic_version;
163166085Skib	u_char  apic_flags;
164166085Skib	void   *apic_address;
165166085Skib}      *io_apic_entry_ptr;
166166085Skib
167166085Skibtypedef struct INTENTRY {
168166085Skib	u_char  type;
169166085Skib	u_char  int_type;
1709313Ssos	u_short int_flags;
17114331Speter	u_char  src_bus_id;
172166085Skib	u_char  src_bus_irq;
173166085Skib	u_char  dst_apic_id;
17414331Speter	u_char  dst_apic_int;
175168014Sjulian}      *int_entry_ptr;
176168014Sjulian
1779313Ssos/* descriptions of MP basetable entries */
1789313Ssostypedef struct BASETABLE_ENTRY {
1799313Ssos	u_char  type;
180168014Sjulian	u_char  length;
181168014Sjulian	char    name[16];
182168014Sjulian}       basetable_entry;
183168014Sjulian
184168014Sjulian/*
185168014Sjulian * this code MUST be enabled here and in mpboot.s.
186168014Sjulian * it follows the very early stages of AP boot by placing values in CMOS ram.
187168014Sjulian * it NORMALLY will never be needed and thus the primitive method for enabling.
188168014Sjulian *
189168014Sjulian#define CHECK_POINTS
190168014Sjulian */
191168014Sjulian
192168014Sjulian#if defined(CHECK_POINTS) && !defined(PC98)
193168014Sjulian#define CHECK_READ(A)	 (outb(CMOS_REG, (A)), inb(CMOS_DATA))
194168014Sjulian#define CHECK_WRITE(A,D) (outb(CMOS_REG, (A)), outb(CMOS_DATA, (D)))
195168014Sjulian
196168355Srwatson#define CHECK_INIT(D);				\
197168014Sjulian	CHECK_WRITE(0x34, (D));			\
198168014Sjulian	CHECK_WRITE(0x35, (D));			\
199168014Sjulian	CHECK_WRITE(0x36, (D));			\
200168014Sjulian	CHECK_WRITE(0x37, (D));			\
201168014Sjulian	CHECK_WRITE(0x38, (D));			\
202168014Sjulian	CHECK_WRITE(0x39, (D));
203168014Sjulian
204168014Sjulian#define CHECK_PRINT(S);				\
205168014Sjulian	printf("%s: %d, %d, %d, %d, %d, %d\n",	\
206168014Sjulian	   (S),					\
207168014Sjulian	   CHECK_READ(0x34),			\
208168014Sjulian	   CHECK_READ(0x35),			\
209168014Sjulian	   CHECK_READ(0x36),			\
210168355Srwatson	   CHECK_READ(0x37),			\
211168014Sjulian	   CHECK_READ(0x38),			\
212168355Srwatson	   CHECK_READ(0x39));
213168355Srwatson
214168014Sjulian#else				/* CHECK_POINTS */
215168014Sjulian
216168014Sjulian#define CHECK_INIT(D)
217168014Sjulian#define CHECK_PRINT(S)
218168014Sjulian
219168014Sjulian#endif				/* CHECK_POINTS */
220168014Sjulian
221168014Sjulian/*
222168014Sjulian * Values to send to the POST hardware.
223168014Sjulian */
224168355Srwatson#define MP_BOOTADDRESS_POST	0x10
225168014Sjulian#define MP_PROBE_POST		0x11
226168014Sjulian#define MPTABLE_PASS1_POST	0x12
227168014Sjulian
228168355Srwatson#define MP_START_POST		0x13
229168355Srwatson#define MP_ENABLE_POST		0x14
230168355Srwatson#define MPTABLE_PASS2_POST	0x15
231168355Srwatson
232168355Srwatson#define START_ALL_APS_POST	0x16
233168355Srwatson#define INSTALL_AP_TRAMP_POST	0x17
234168355Srwatson#define START_AP_POST		0x18
235168355Srwatson
236168355Srwatson#define MP_ANNOUNCE_POST	0x19
237168014Sjulian
238168014Sjulian
239168014Sjulian/** XXX FIXME: where does this really belong, isa.h/isa.c perhaps? */
240168014Sjulianint	current_postcode;
241168014Sjulian
242170486Smjacob/** XXX FIXME: what system files declare these??? */
243170486Smjacobextern struct region_descriptor r_gdt, r_idt;
244168014Sjulian
245168355Srwatsonint	bsp_apic_ready = 0;	/* flags useability of BSP apic */
246168355Srwatsonint	mp_ncpus;		/* # of CPUs, including BSP */
247168355Srwatsonint	mp_naps;		/* # of Applications processors */
248168014Sjulianint	mp_nbusses;		/* # of busses */
249168014Sjulianint	mp_napics;		/* # of IO APICs */
250168014Sjulianint	boot_cpu_id;		/* designated BSP */
2519313Ssosvm_offset_t cpu_apic_address;
252168014Sjulianvm_offset_t io_apic_address[NAPICID];	/* NAPICID is more than enough */
253168014Sjulianextern	int nkpt;
254174975Skib
255168014Sjulianu_int32_t cpu_apic_versions[NCPU];
256168014Sjulianu_int32_t io_apic_versions[NAPIC];
257168014Sjulian
258168014Sjulian#ifdef APIC_INTR_DIAGNOSTIC
259174975Skibint apic_itrace_enter[32];
260174975Skibint apic_itrace_tryisrlock[32];
261174975Skibint apic_itrace_gotisrlock[32];
262174975Skibint apic_itrace_active[32];
263168014Sjulianint apic_itrace_masked[32];
264168014Sjulianint apic_itrace_noisrlock[32];
265168014Sjulianint apic_itrace_masked2[32];
266168014Sjulianint apic_itrace_unmask[32];
267168014Sjulianint apic_itrace_noforward[32];
268174975Skibint apic_itrace_leave[32];
269168014Sjulianint apic_itrace_enter2[32];
270174975Skibint apic_itrace_doreti[32];
271168014Sjulianint apic_itrace_splz[32];
272174975Skibint apic_itrace_eoi[32];
273174975Skib#ifdef APIC_INTR_DIAGNOSTIC_IRQ
274168014Sjulianunsigned short apic_itrace_debugbuffer[32768];
275174975Skibint apic_itrace_debugbuffer_idx;
276174975Skibstruct simplelock apic_itrace_debuglock;
277174975Skib#endif
278174975Skib#endif
279174975Skib
280168014Sjulian#ifdef APIC_INTR_REORDER
281168014Sjulianstruct {
282168014Sjulian	volatile int *location;
283168014Sjulian	int bit;
284174975Skib} apic_isrbit_location[32];
285174975Skib#endif
286174975Skib
287174975Skibstruct apic_intmapinfo	int_to_apicintpin[APIC_INTMAPSIZE];
288174975Skib
289168014Sjulian/*
290168014Sjulian * APIC ID logical/physical mapping structures.
291168014Sjulian * We oversize these to simplify boot-time config.
292168014Sjulian */
293168014Sjulianint     cpu_num_to_apic_id[NAPICID];
294168014Sjulianint     io_num_to_apic_id[NAPICID];
295168014Sjulianint     apic_id_to_logical[NAPICID];
296168014Sjulian
297168014Sjulian
298168014Sjulian/* Bitmap of all available CPUs */
299168014Sjulianu_int	all_cpus;
300168014Sjulian
301168014Sjulian/* AP uses this during bootstrap.  Do not staticize.  */
302168014Sjulianchar *bootSTK;
303168014Sjulianint boot_cpuid;
304168014Sjulian
305168014Sjulian/* Hotwire a 0->4MB V==P mapping */
306168014Sjulianextern pt_entry_t *KPTphys;
307168014Sjulian
308168014Sjulian/* SMP page table page */
309168014Sjulianextern pt_entry_t *SMPpt;
310168014Sjulian
311168014Sjulianstruct pcb stoppcbs[NCPU];
312168014Sjulian
31383366Sjulianint smp_started;		/* has the system started? */
3149313Ssos
3159313Ssos/*
31612858Speter * Local data and functions.
31712858Speter */
3189313Ssos
31912858Speterstatic int	mp_capable;
3209313Ssosstatic u_int	boot_address;
32112858Speterstatic u_int	base_memory;
3229313Ssos
3239313Ssosstatic int	picmode;		/* 0: virtual wire mode, 1: PIC mode */
3249313Ssosstatic mpfps_t	mpfps;
32572543Sjlemonstatic int	search_for_sig(u_int32_t target, int count);
32672543Sjlemonstatic void	mp_enable(u_int boot_addr);
32783221Smarcel
3289313Ssosstatic int	mptable_pass1(void);
32912858Speterstatic int	mptable_pass2(void);
33012858Speterstatic void	default_mp_table(int type);
3319313Ssosstatic void	fix_mp_table(void);
33283366Sjulianstatic void	setup_apic_irq_mapping(void);
3339313Ssosstatic void	init_locks(void);
3349313Ssosstatic int	start_all_aps(u_int boot_addr);
3359313Ssosstatic void	install_ap_tramp(u_int boot_addr);
33614331Speterstatic int	start_ap(int logicalCpu, u_int boot_addr);
33783366Sjulian
33814331Speter/*
33914331Speter * Calculate usable address in base memory for AP trampoline code.
34014331Speter */
34114331Speteru_int
34214331Spetermp_bootaddress(u_int basemem)
34314331Speter{
34472543Sjlemon	POSTCODE(MP_BOOTADDRESS_POST);
34572543Sjlemon
34672543Sjlemon	base_memory = basemem * 1024;	/* convert to bytes */
34714331Speter
34814331Speter	boot_address = base_memory & ~0xfff;	/* round down to 4k boundary */
34914331Speter	if ((base_memory - boot_address) < bootMP_size)
35014331Speter		boot_address -= 4096;	/* not enough, lower by 4k */
35114331Speter
35214331Speter	return boot_address;
35314331Speter}
35483366Sjulian
35514331Speter
35614331Speter/*
357111797Sdes * Look for an Intel MP spec table (ie, SMP capable hardware).
35814331Speter */
35914331Speterint
36083366Sjulianmp_probe(void)
36114331Speter{
36214331Speter	int     x;
36314331Speter	u_long  segment;
3649313Ssos	u_int32_t target;
36583366Sjulian
3669313Ssos	POSTCODE(MP_PROBE_POST);
36714331Speter
36814331Speter	/* see if EBDA exists */
36914331Speter	if ((segment = (u_long) * (u_short *) (KERNBASE + 0x40e)) != 0) {
37014331Speter		/* search first 1K of EBDA */
37114331Speter		target = (u_int32_t) (segment << 4);
37283366Sjulian		if ((x = search_for_sig(target, 1024 / 4)) >= 0)
37314331Speter			goto found;
37414331Speter	} else {
37583221Smarcel		/* last 1K of base memory, effective 'top of base' passed in */
37683221Smarcel		target = (u_int32_t) (base_memory - 0x400);
37783221Smarcel		if ((x = search_for_sig(target, 1024 / 4)) >= 0)
37883221Smarcel			goto found;
37983221Smarcel	}
38083221Smarcel
38183221Smarcel	/* search the BIOS */
38283221Smarcel	target = (u_int32_t) BIOS_BASE;
38383221Smarcel	if ((x = search_for_sig(target, BIOS_COUNT)) >= 0)
38483221Smarcel		goto found;
38583221Smarcel
38683221Smarcel	/* nothing found */
38783221Smarcel	mpfps = (mpfps_t)0;
38883221Smarcel	mp_capable = 0;
38983221Smarcel	return 0;
39083221Smarcel
39183221Smarcelfound:
39283221Smarcel	/* calculate needed resources */
39383221Smarcel	mpfps = (mpfps_t)x;
39483221Smarcel	if (mptable_pass1())
39583221Smarcel		panic("you must reconfigure your kernel");
39683221Smarcel
39783221Smarcel	/* flag fact that we are running multiple processors */
39883221Smarcel	mp_capable = 1;
39983221Smarcel	return 1;
40083221Smarcel}
40183221Smarcel
40283221Smarcel
40383221Smarcel/*
40483221Smarcel * Startup the SMP processors.
40583366Sjulian */
40683221Smarcelvoid
40714331Spetermp_start(void)
408111798Sdes{
40983221Smarcel	POSTCODE(MP_START_POST);
41083221Smarcel
41183221Smarcel	/* look for MP capable motherboard */
41283221Smarcel	if (mp_capable)
41383221Smarcel		mp_enable(boot_address);
41483221Smarcel	else
41583221Smarcel		panic("MP hardware not found!");
41683221Smarcel}
41783221Smarcel
41883221Smarcel
41983221Smarcel/*
42083221Smarcel * Print various information about the SMP system hardware and setup.
42183221Smarcel */
422160276Sjhbvoid
4239313Ssosmp_announce(void)
424160276Sjhb{
425160276Sjhb	int     x;
426160276Sjhb
427160276Sjhb	POSTCODE(MP_ANNOUNCE_POST);
428160276Sjhb
429160276Sjhb	printf("FreeBSD/SMP: Multiprocessor motherboard\n");
430160276Sjhb	printf(" cpu0 (BSP): apic id: %2d", CPU_TO_ID(0));
431160276Sjhb	printf(", version: 0x%08x", cpu_apic_versions[0]);
432160276Sjhb	printf(", at 0x%08x\n", cpu_apic_address);
433160276Sjhb	for (x = 1; x <= mp_naps; ++x) {
43483366Sjulian		printf(" cpu%d (AP):  apic id: %2d", x, CPU_TO_ID(x));
43583221Smarcel		printf(", version: 0x%08x", cpu_apic_versions[x]);
4369313Ssos		printf(", at 0x%08x\n", cpu_apic_address);
43789306Salfred	}
43889306Salfred
43983221Smarcel#if defined(APIC_IO)
44089306Salfred	for (x = 0; x < mp_napics; ++x) {
4419313Ssos		printf(" io%d (APIC): apic id: %2d", x, IO_TO_ID(x));
442116678Sphk		printf(", version: 0x%08x", io_apic_versions[x]);
443160276Sjhb		printf(", at 0x%08x\n", io_apic_address[x]);
44489306Salfred	}
445160276Sjhb#else
44689306Salfred	printf(" Warning: APIC I/O disabled\n");
44783221Smarcel#endif	/* APIC_IO */
44889306Salfred}
4499313Ssos
45083221Smarcel/*
4519313Ssos * AP cpu's call this to sync up protected mode.
45283221Smarcel */
45383221Smarcelvoid
454111119Simpinit_secondary(void)
455175202Sattilio{
45683221Smarcel	int	gsel_tss;
4579313Ssos	int	x, myid = boot_cpuid;
45883221Smarcel
45983221Smarcel	gdt_segs[GPRIV_SEL].ssd_base = (int) &SMP_prvspace[myid];
46083221Smarcel	gdt_segs[GPROC0_SEL].ssd_base =
46183221Smarcel		(int) &SMP_prvspace[myid].globaldata.gd_common_tss;
46283221Smarcel	SMP_prvspace[myid].globaldata.gd_prvspace = &SMP_prvspace[myid];
46383221Smarcel
46483366Sjulian	for (x = 0; x < NGDT; x++) {
46583221Smarcel		ssdtosd(&gdt_segs[x], &gdt[myid * NGDT + x].sd);
46683221Smarcel	}
4679313Ssos
46883221Smarcel	r_gdt.rd_limit = NGDT * sizeof(gdt[0]) - 1;
46983221Smarcel	r_gdt.rd_base = (int) &gdt[myid * NGDT];
47083221Smarcel	lgdt(&r_gdt);			/* does magic intra-segment return */
47183221Smarcel
47224654Sdfr	lidt(&r_idt);
473101189Srwatson
474101189Srwatson	lldt(_default_ldt);
475101189Srwatson#ifdef USER_LDT
476101189Srwatson	currentldt = _default_ldt;
477172930Srwatson#endif
478101189Srwatson
479101189Srwatson	gsel_tss = GSEL(GPROC0_SEL, SEL_KPL);
48083221Smarcel	gdt[myid * NGDT + GPROC0_SEL].sd.sd_type = SDT_SYS386TSS;
48183221Smarcel	common_tss.tss_esp0 = 0;	/* not used until after switch */
48283221Smarcel	common_tss.tss_ss0 = GSEL(GDATA_SEL, SEL_KPL);
4839313Ssos	common_tss.tss_ioopt = (sizeof common_tss) << 16;
48483221Smarcel	tss_gdt = &gdt[myid * NGDT + GPROC0_SEL].sd;
48583221Smarcel	common_tssd = *tss_gdt;
48683221Smarcel	ltr(gsel_tss);
48783221Smarcel
48883221Smarcel	load_cr0(0x8005003b);		/* XXX! */
4899313Ssos
49083221Smarcel	pmap_set_opt((unsigned *)PTD);
49124654Sdfr
49283221Smarcel	invltlb();
49383221Smarcel}
49483221Smarcel
49583221Smarcel
49683221Smarcel#if defined(APIC_IO)
49783221Smarcel/*
49883221Smarcel * Final configuration of the BSP's local APIC:
49983221Smarcel *  - disable 'pic mode'.
50083221Smarcel *  - disable 'virtual wire mode'.
50183221Smarcel *  - enable NMI.
50283221Smarcel */
50383221Smarcelvoid
50483221Smarcelbsp_apic_configure(void)
50583221Smarcel{
50683221Smarcel	u_char		byte;
50724654Sdfr	u_int32_t	temp;
50824654Sdfr
50983221Smarcel	/* leave 'pic mode' if necessary */
51083221Smarcel	if (picmode) {
51183221Smarcel		outb(0x22, 0x70);	/* select IMCR */
51283221Smarcel		byte = inb(0x23);	/* current contents */
51383221Smarcel		byte |= 0x01;		/* mask external INTR */
51483221Smarcel		outb(0x23, byte);	/* disconnect 8259s/NMI */
51583221Smarcel	}
51683221Smarcel
51783221Smarcel	/* mask lint0 (the 8259 'virtual wire' connection) */
51883221Smarcel	temp = lapic.lvt_lint0;
51983221Smarcel	temp |= APIC_LVT_M;		/* set the mask */
52083221Smarcel	lapic.lvt_lint0 = temp;
52183221Smarcel
52283221Smarcel        /* setup lint1 to handle NMI */
52383221Smarcel        temp = lapic.lvt_lint1;
52483221Smarcel        temp &= ~APIC_LVT_M;		/* clear the mask */
52583221Smarcel        lapic.lvt_lint1 = temp;
52683221Smarcel
52783221Smarcel	if (bootverbose)
52883221Smarcel		apic_dump("bsp_apic_configure()");
52983221Smarcel}
53083221Smarcel#endif  /* APIC_IO */
53183221Smarcel
53283221Smarcel
53383221Smarcel/*******************************************************************
53483221Smarcel * local functions and data
53583221Smarcel */
53683221Smarcel
53783221Smarcel/*
53883221Smarcel * start the SMP system
53983221Smarcel */
54083221Smarcelstatic void
54183221Smarcelmp_enable(u_int boot_addr)
54283221Smarcel{
54383221Smarcel	int     x;
54483221Smarcel#if defined(APIC_IO)
54583221Smarcel	int     apic;
54683221Smarcel	u_int   ux;
54783221Smarcel#endif	/* APIC_IO */
54883221Smarcel
54983221Smarcel	POSTCODE(MP_ENABLE_POST);
55083221Smarcel
55183221Smarcel	/* turn on 4MB of V == P addressing so we can get to MP table */
55283221Smarcel	*(int *)PTD = PG_V | PG_RW | ((uintptr_t)(void *)KPTphys & PG_FRAME);
55383221Smarcel	invltlb();
55483221Smarcel
55583221Smarcel	/* examine the MP table for needed info, uses physical addresses */
55683221Smarcel	x = mptable_pass2();
55783221Smarcel
55883221Smarcel	*(int *)PTD = 0;
55983221Smarcel	invltlb();
56083221Smarcel
56183221Smarcel	/* can't process default configs till the CPU APIC is pmapped */
56283221Smarcel	if (x)
56383221Smarcel		default_mp_table(x);
56483221Smarcel
56583221Smarcel	/* post scan cleanup */
56683221Smarcel	fix_mp_table();
56783221Smarcel	setup_apic_irq_mapping();
56883221Smarcel
56983221Smarcel#if defined(APIC_IO)
57083221Smarcel
57183221Smarcel	/* fill the LOGICAL io_apic_versions table */
57283221Smarcel	for (apic = 0; apic < mp_napics; ++apic) {
57383221Smarcel		ux = io_apic_read(apic, IOAPIC_VER);
57483221Smarcel		io_apic_versions[apic] = ux;
57583221Smarcel	}
57683221Smarcel
57783221Smarcel	/* program each IO APIC in the system */
57883221Smarcel	for (apic = 0; apic < mp_napics; ++apic)
57983221Smarcel		if (io_apic_setup(apic) < 0)
58083221Smarcel			panic("IO APIC setup failure");
58183221Smarcel
58283221Smarcel	/* install a 'Spurious INTerrupt' vector */
58383221Smarcel	setidt(XSPURIOUSINT_OFFSET, Xspuriousint,
58483221Smarcel	       SDT_SYS386IGT, SEL_KPL, GSEL(GCODE_SEL, SEL_KPL));
58510355Sswallace
5869313Ssos	/* install an inter-CPU IPI for TLB invalidation */
58783221Smarcel	setidt(XINVLTLB_OFFSET, Xinvltlb,
58883221Smarcel	       SDT_SYS386IGT, SEL_KPL, GSEL(GCODE_SEL, SEL_KPL));
5899313Ssos
59083221Smarcel#ifdef BETTER_CLOCK
59183221Smarcel	/* install an inter-CPU IPI for reading processor state */
59283221Smarcel	setidt(XCPUCHECKSTATE_OFFSET, Xcpucheckstate,
59310355Sswallace	       SDT_SYS386IGT, SEL_KPL, GSEL(GCODE_SEL, SEL_KPL));
5949313Ssos#endif
59583366Sjulian
59683221Smarcel	/* install an inter-CPU IPI for forcing an additional software trap */
5979313Ssos	setidt(XCPUAST_OFFSET, Xcpuast,
59883221Smarcel	       SDT_SYS386IGT, SEL_KPL, GSEL(GCODE_SEL, SEL_KPL));
59983221Smarcel
60083221Smarcel	/* install an inter-CPU IPI for interrupt forwarding */
601175294Sattilio	setidt(XFORWARD_IRQ_OFFSET, Xforward_irq,
602160276Sjhb	       SDT_SYS386IGT, SEL_KPL, GSEL(GCODE_SEL, SEL_KPL));
60389306Salfred
60483221Smarcel	/* install an inter-CPU IPI for CPU stop/restart */
60583221Smarcel	setidt(XCPUSTOP_OFFSET, Xcpustop,
6069313Ssos	       SDT_SYS386IGT, SEL_KPL, GSEL(GCODE_SEL, SEL_KPL));
60714331Speter
60883221Smarcel#if defined(TEST_TEST1)
60983366Sjulian	/* install a "fake hardware INTerrupt" vector */
61083221Smarcel	setidt(XTEST1_OFFSET, Xtest1,
61183221Smarcel	       SDT_SYS386IGT, SEL_KPL, GSEL(GCODE_SEL, SEL_KPL));
61283221Smarcel#endif  /** TEST_TEST1 */
61383221Smarcel
61483221Smarcel#endif	/* APIC_IO */
61583221Smarcel
61683221Smarcel	/* initialize all SMP locks */
61783366Sjulian	init_locks();
61883221Smarcel
61983221Smarcel	/* start each Application Processor */
62083221Smarcel	start_all_aps(boot_addr);
62183366Sjulian
62283221Smarcel	/*
62383221Smarcel	 * The init process might be started on a different CPU now,
62483221Smarcel	 * and the boot CPU might not call prepare_usermode to get
62583221Smarcel	 * cr0 correctly configured. Thus we initialize cr0 here.
62683221Smarcel	 */
62783221Smarcel	load_cr0(rcr0() | CR0_WP | CR0_AM);
62883221Smarcel}
62983366Sjulian
63083221Smarcel
63183221Smarcel/*
63214331Speter * look for the MP spec signature
63314331Speter */
63414331Speter
63514331Speter/* string defined by the Intel MP Spec as identifying the MP table */
63614331Speter#define MP_SIG		0x5f504d5f	/* _MP_ */
63783366Sjulian#define NEXT(X)		((X) += 4)
63814331Speterstatic int
639102814Siedowsesearch_for_sig(u_int32_t target, int count)
640102814Siedowse{
64114331Speter	int     x;
642162585Snetchild	u_int32_t *addr = (u_int32_t *) (KERNBASE + target);
643162585Snetchild
644162585Snetchild	for (x = 0; x < count; NEXT(x))
645162585Snetchild		if (addr[x] == MP_SIG)
646102814Siedowse			/* make array index a byte index */
64714331Speter			return (target + (x * sizeof(u_int32_t)));
64814331Speter
64972543Sjlemon	return -1;
650102814Siedowse}
65114331Speter
652102814Siedowse
653102814Siedowsestatic basetable_entry basetable_entry_types[] =
654162585Snetchild{
655102814Siedowse	{0, 20, "Processor"},
65614331Speter	{1, 8, "Bus"},
65714331Speter	{2, 8, "I/O APIC"},
65814331Speter	{3, 8, "I/O INT"},
65983366Sjulian	{4, 8, "Local INT"}
66014331Speter};
661102814Siedowse
662102814Siedowsetypedef struct BUSDATA {
663162201Snetchild	u_char  bus_id;
66414331Speter	enum busTypes bus_type;
665102814Siedowse}       bus_datum;
66614331Speter
66714331Spetertypedef struct INTDATA {
66872543Sjlemon	u_char  int_type;
669102814Siedowse	u_short int_flags;
67014331Speter	u_char  src_bus_id;
67114331Speter	u_char  src_bus_irq;
672102814Siedowse	u_char  dst_apic_id;
673162201Snetchild	u_char  dst_apic_int;
674162201Snetchild	u_char	int_vector;
675162201Snetchild}       io_int, local_int;
676162201Snetchild
677162201Snetchildtypedef struct BUSTYPENAME {
678102814Siedowse	u_char  type;
679102814Siedowse	char    name[7];
68014331Speter}       bus_type_name;
68114331Speter
68214331Speterstatic bus_type_name bus_type_table[] =
68383366Sjulian{
68414331Speter	{CBUS, "CBUS"},
685102814Siedowse	{CBUSII, "CBUSII"},
686102814Siedowse	{EISA, "EISA"},
68714331Speter	{UNKNOWN_BUSTYPE, "---"},
688102814Siedowse	{UNKNOWN_BUSTYPE, "---"},
68914331Speter	{ISA, "ISA"},
69014331Speter	{UNKNOWN_BUSTYPE, "---"},
69172543Sjlemon	{UNKNOWN_BUSTYPE, "---"},
692102814Siedowse	{UNKNOWN_BUSTYPE, "---"},
69314331Speter	{UNKNOWN_BUSTYPE, "---"},
694102814Siedowse	{UNKNOWN_BUSTYPE, "---"},
695102814Siedowse	{UNKNOWN_BUSTYPE, "---"},
696102814Siedowse	{PCI, "PCI"},
69714331Speter	{UNKNOWN_BUSTYPE, "---"},
69814331Speter	{UNKNOWN_BUSTYPE, "---"},
69914331Speter	{UNKNOWN_BUSTYPE, "---"},
70083366Sjulian	{UNKNOWN_BUSTYPE, "---"},
70114331Speter	{XPRESS, "XPRESS"},
702102814Siedowse	{UNKNOWN_BUSTYPE, "---"}
703102814Siedowse};
70414331Speter/* from MP spec v1.4, table 5-1 */
705102814Siedowsestatic int default_data[7][5] =
70614331Speter{
70714331Speter/*   nbus, id0, type0, id1, type1 */
70872543Sjlemon	{1, 0, ISA, 255, 255},
709102814Siedowse	{1, 0, EISA, 255, 255},
71014331Speter	{1, 0, EISA, 255, 255},
711102814Siedowse	{0, 255, 255, 255, 255},/* MCA not supported */
712102814Siedowse	{2, 0, ISA, 1, PCI},
713102814Siedowse	{2, 0, EISA, 1, PCI},
71414331Speter	{0, 255, 255, 255, 255}	/* MCA not supported */
71514331Speter};
71614331Speter
71783366Sjulian
71814331Speter/* the bus data */
719102814Siedowsestatic bus_datum bus_data[NBUS];
720102814Siedowse
72114331Speter/* the IO INT data, one entry per possible APIC INTerrupt */
722102814Siedowsestatic io_int  io_apic_ints[NINTR];
72314331Speter
72414331Speterstatic int nintrs;
72572543Sjlemon
726102814Siedowsestatic int processor_entry	__P((proc_entry_ptr entry, int cpu));
72714331Speterstatic int bus_entry		__P((bus_entry_ptr entry, int bus));
728102814Siedowsestatic int io_apic_entry	__P((io_apic_entry_ptr entry, int apic));
729102814Siedowsestatic int int_entry		__P((int_entry_ptr entry, int intr));
730102814Siedowsestatic int lookup_bus_type	__P((char *name));
73114331Speter
73214331Speter
73314331Speter/*
73483366Sjulian * 1st pass on motherboard's Intel MP specification table.
73514331Speter *
736102814Siedowse * initializes:
737102814Siedowse *	mp_ncpus = 1
73814331Speter *
739102814Siedowse * determines:
74014331Speter *	cpu_apic_address (common to all CPUs)
74114331Speter *	io_apic_address[N]
74272543Sjlemon *	mp_naps
743102814Siedowse *	mp_nbusses
74414331Speter *	mp_napics
745102814Siedowse *	nintrs
746102814Siedowse */
747102814Siedowsestatic int
74814331Spetermptable_pass1(void)
74914331Speter{
75014331Speter	int	x;
75183366Sjulian	mpcth_t	cth;
75214331Speter	int	totalSize;
753102814Siedowse	void*	position;
754102814Siedowse	int	count;
75514331Speter	int	type;
756102814Siedowse	int	mustpanic;
757102814Siedowse
758102814Siedowse	POSTCODE(MPTABLE_PASS1_POST);
759102814Siedowse
760102814Siedowse	mustpanic = 0;
761102814Siedowse
762102814Siedowse	/* clear various tables */
76314331Speter	for (x = 0; x < NAPICID; ++x) {
76414331Speter		io_apic_address[x] = ~0;	/* IO APIC address table */
76572543Sjlemon	}
766102814Siedowse
76714331Speter	/* init everything to empty */
768102814Siedowse	mp_naps = 0;
769102814Siedowse	mp_nbusses = 0;
770102814Siedowse	mp_napics = 0;
771102814Siedowse	nintrs = 0;
77214331Speter
77314331Speter	/* check for use of 'default' configuration */
77414331Speter	if (MPFPS_MPFB1 != 0) {
77583366Sjulian		/* use default addresses */
77614331Speter		cpu_apic_address = DEFAULT_APIC_BASE;
777102814Siedowse		io_apic_address[0] = DEFAULT_IO_APIC_BASE;
778102814Siedowse
77914331Speter		/* fill in with defaults */
780102814Siedowse		mp_naps = 2;		/* includes BSP */
781102814Siedowse		mp_nbusses = default_data[MPFPS_MPFB1 - 1][0];
782102814Siedowse#if defined(APIC_IO)
783102814Siedowse		mp_napics = 1;
784102814Siedowse		nintrs = 16;
785102814Siedowse#endif	/* APIC_IO */
786102814Siedowse	}
78714331Speter	else {
78814331Speter		if ((cth = mpfps->pap) == 0)
78972543Sjlemon			panic("MP Configuration Table Header MISSING!");
790102814Siedowse
79114331Speter		cpu_apic_address = (vm_offset_t) cth->apic_address;
792102814Siedowse
793102814Siedowse		/* walk the table, recording info of interest */
794102814Siedowse		totalSize = cth->base_table_length - sizeof(struct MPCTH);
795102814Siedowse		position = (u_char *) cth + sizeof(struct MPCTH);
79614331Speter		count = cth->entry_count;
79714331Speter
79814331Speter		while (count--) {
79983366Sjulian			switch (type = *(u_char *) position) {
80014331Speter			case 0: /* processor_entry */
801102814Siedowse				if (((proc_entry_ptr)position)->cpu_flags
802102814Siedowse					& PROCENTRY_FLAG_EN)
80314331Speter					++mp_naps;
804102814Siedowse				break;
80514331Speter			case 1: /* bus_entry */
80614331Speter				++mp_nbusses;
80772543Sjlemon				break;
808102814Siedowse			case 2: /* io_apic_entry */
809102814Siedowse				if (((io_apic_entry_ptr)position)->apic_flags
81014331Speter					& IOAPICENTRY_FLAG_EN)
811102814Siedowse					io_apic_address[mp_napics++] =
812102814Siedowse					    (vm_offset_t)((io_apic_entry_ptr)
813102814Siedowse						position)->apic_address;
814102814Siedowse				break;
81514331Speter			case 3: /* int_entry */
81614331Speter				++nintrs;
81714331Speter				break;
81883366Sjulian			case 4:	/* int_entry */
81914331Speter				break;
820102814Siedowse			default:
821102814Siedowse				panic("mpfps Base Table HOSED!");
82214331Speter				/* NOTREACHED */
823102814Siedowse			}
82414331Speter
82514331Speter			totalSize -= basetable_entry_types[type].length;
82672543Sjlemon			(u_char*)position += basetable_entry_types[type].length;
827102814Siedowse		}
82814331Speter	}
82914331Speter
830102814Siedowse	/* qualify the numbers */
831102814Siedowse	if (mp_naps > NCPU)
832102814Siedowse#if 0 /* XXX FIXME: kern/4255 */
83314331Speter		printf("Warning: only using %d of %d available CPUs!\n",
83414331Speter			NCPU, mp_naps);
83549662Smarcel#else
836156842Snetchild	{
837156842Snetchild		printf("NCPU cannot be different than actual CPU count.\n");
838156842Snetchild		printf(" add 'options NCPU=%d' to your kernel config file,\n",
839156842Snetchild			mp_naps);
840156842Snetchild		printf(" then rerun config & rebuild your SMP kernel\n");
841156842Snetchild		mustpanic = 1;
842156842Snetchild	}
843156842Snetchild#endif /* XXX FIXME: kern/4255 */
844156842Snetchild	if (mp_nbusses > NBUS) {
845156842Snetchild		printf("found %d busses, increase NBUS\n", mp_nbusses);
846156842Snetchild		mustpanic = 1;
847156842Snetchild	}
848156842Snetchild	if (mp_napics > NAPIC) {
849156842Snetchild		printf("found %d apics, increase NAPIC\n", mp_napics);
85083366Sjulian		mustpanic = 1;
85149662Smarcel	}
852102814Siedowse	if (nintrs > NINTR) {
853102814Siedowse		printf("found %d intrs, increase NINTR\n", nintrs);
85449662Smarcel		mustpanic = 1;
855102814Siedowse	}
856102814Siedowse
857102814Siedowse	/*
858102814Siedowse	 * Count the BSP.
859102814Siedowse	 * This is also used as a counter while starting the APs.
860102814Siedowse	 */
861102814Siedowse	mp_ncpus = 1;
86249662Smarcel
86349662Smarcel	--mp_naps;	/* subtract the BSP */
86472543Sjlemon
865102814Siedowse	return mustpanic;
86649662Smarcel}
867102814Siedowse
868102814Siedowse
869102814Siedowse/*
870102814Siedowse * 2nd pass on motherboard's Intel MP specification table.
87149662Smarcel *
87249788Smarcel * sets:
87353713Smarcel *	boot_cpu_id
87483366Sjulian *	ID_TO_IO(N), phy APIC ID to log CPU/IO table
87583366Sjulian *	CPU_TO_ID(N), logical CPU to APIC ID table
87653713Smarcel *	IO_TO_ID(N), logical IO to APIC ID table
87753713Smarcel *	bus_data[N]
87853713Smarcel *	io_apic_ints[N]
87953713Smarcel */
88053713Smarcelstatic int
88183366Sjulianmptable_pass2(void)
88253713Smarcel{
88363285Smarcel	int     x;
88463285Smarcel	mpcth_t cth;
88583366Sjulian	int     totalSize;
88683366Sjulian	void*   position;
88763285Smarcel	int     count;
88863285Smarcel	int     type;
88963285Smarcel	int     apic, bus, cpu, intr;
890162585Snetchild
891162585Snetchild	POSTCODE(MPTABLE_PASS2_POST);
89263285Smarcel
89363285Smarcel	/* clear various tables */
89463285Smarcel	for (x = 0; x < NAPICID; ++x) {
89563285Smarcel		ID_TO_IO(x) = -1;	/* phy APIC ID to log CPU/IO table */
89663285Smarcel		CPU_TO_ID(x) = -1;	/* logical CPU to APIC ID table */
897162585Snetchild		IO_TO_ID(x) = -1;	/* logical IO to APIC ID table */
898162585Snetchild	}
899162585Snetchild
900162585Snetchild	/* clear bus data table */
901162585Snetchild	for (x = 0; x < NBUS; ++x)
902162585Snetchild		bus_data[x].bus_id = 0xff;
903162585Snetchild
904162585Snetchild	/* clear IO APIC INT table */
905162585Snetchild	for (x = 0; x < NINTR; ++x) {
906162585Snetchild		io_apic_ints[x].int_type = 0xff;
907162585Snetchild		io_apic_ints[x].int_vector = 0xff;
908162585Snetchild	}
909162585Snetchild
910162585Snetchild	/* setup the cpu/apic mapping arrays */
911162585Snetchild	boot_cpu_id = -1;
91263285Smarcel
91363285Smarcel	/* record whether PIC or virtual-wire mode */
91463285Smarcel	picmode = (mpfps->mpfb2 & 0x80) ? 1 : 0;
91583366Sjulian
91683366Sjulian	/* check for use of 'default' configuration */
91763285Smarcel	if (MPFPS_MPFB1 != 0)
91863285Smarcel		return MPFPS_MPFB1;	/* return default configuration type */
91963285Smarcel
92063285Smarcel	if ((cth = mpfps->pap) == 0)
92163285Smarcel		panic("MP Configuration Table Header MISSING!");
92263285Smarcel
92363285Smarcel	/* walk the table, recording info of interest */
92463285Smarcel	totalSize = cth->base_table_length - sizeof(struct MPCTH);
92583366Sjulian	position = (u_char *) cth + sizeof(struct MPCTH);
92663285Smarcel	count = cth->entry_count;
92772538Sjlemon	apic = bus = intr = 0;
92872538Sjlemon	cpu = 1;				/* pre-count the BSP */
92983366Sjulian
93072538Sjlemon	while (count--) {
93172538Sjlemon		switch (type = *(u_char *) position) {
932111798Sdes		case 0:
933111798Sdes			if (processor_entry(position, cpu))
93473286Sadrian				++cpu;
93573286Sadrian			break;
93673286Sadrian		case 1:
93772538Sjlemon			if (bus_entry(position, bus))
938111798Sdes				++bus;
93973286Sadrian			break;
94072538Sjlemon		case 2:
941111798Sdes			if (io_apic_entry(position, apic))
942127057Stjr				++apic;
94372538Sjlemon			break;
944111798Sdes		case 3:
945127057Stjr			if (int_entry(position, intr))
94672538Sjlemon				++intr;
947111798Sdes			break;
94872538Sjlemon		case 4:
94972538Sjlemon			/* int_entry(position); */
95072538Sjlemon			break;
95172538Sjlemon		default:
95272538Sjlemon			panic("mpfps Base Table HOSED!");
95372538Sjlemon			/* NOTREACHED */
95472538Sjlemon		}
95572538Sjlemon
956127059Stjr		totalSize -= basetable_entry_types[type].length;
95773286Sadrian		(u_char *) position += basetable_entry_types[type].length;
95872538Sjlemon	}
95972538Sjlemon
96072538Sjlemon	if (boot_cpu_id == -1)
96172538Sjlemon		panic("NO BSP found!");
96272538Sjlemon
96372538Sjlemon	/* report fact that its NOT a default configuration */
964127059Stjr	return 0;
96573286Sadrian}
96672538Sjlemon
96772538Sjlemon
96872538Sjlemonstatic void
96972538Sjlemonassign_apic_irq(int apic, int intpin, int irq)
97073286Sadrian{
97172538Sjlemon	int x;
97272538Sjlemon
97372538Sjlemon	if (int_to_apicintpin[irq].ioapic != -1)
97472538Sjlemon		panic("assign_apic_irq: inconsistent table");
97572538Sjlemon
97672538Sjlemon	int_to_apicintpin[irq].ioapic = apic;
97772538Sjlemon	int_to_apicintpin[irq].int_pin = intpin;
978111798Sdes	int_to_apicintpin[irq].apic_address = ioapic[apic];
97972538Sjlemon	int_to_apicintpin[irq].redirindex = IOAPIC_REDTBL + 2 * intpin;
980111798Sdes
98172538Sjlemon	for (x = 0; x < nintrs; x++) {
982111798Sdes		if ((io_apic_ints[x].int_type == 0 ||
98372538Sjlemon		     io_apic_ints[x].int_type == 3) &&
984111798Sdes		    io_apic_ints[x].int_vector == 0xff &&
98572538Sjlemon		    io_apic_ints[x].dst_apic_id == IO_TO_ID(apic) &&
98672538Sjlemon		    io_apic_ints[x].dst_apic_int == intpin)
987127059Stjr			io_apic_ints[x].int_vector = irq;
988132708Sphk	}
989132708Sphk}
990132708Sphk
991132708Sphk/*
992127059Stjr * parse an Intel MP specification table
993138353Sphk */
994127059Stjrstatic void
99572538Sjlemonfix_mp_table(void)
99672538Sjlemon{
99772538Sjlemon	int	x;
99883366Sjulian	int	id;
99972538Sjlemon	int	bus_0 = 0;	/* Stop GCC warning */
100083221Smarcel	int	bus_pci = 0;	/* Stop GCC warning */
100172538Sjlemon	int	num_pci_bus;
100272538Sjlemon
100372538Sjlemon	/*
100483366Sjulian	 * Fix mis-numbering of the PCI bus and its INT entries if the BIOS
100572538Sjlemon	 * did it wrong.  The MP spec says that when more than 1 PCI bus
100672538Sjlemon	 * exists the BIOS must begin with bus entries for the PCI bus and use
100772538Sjlemon	 * actual PCI bus numbering.  This implies that when only 1 PCI bus
100883366Sjulian	 * exists the BIOS can choose to ignore this ordering, and indeed many
100972538Sjlemon	 * MP motherboards do ignore it.  This causes a problem when the PCI
101072538Sjlemon	 * sub-system makes requests of the MP sub-system based on PCI bus
101172538Sjlemon	 * numbers.	So here we look for the situation and renumber the
101272538Sjlemon	 * busses and associated INTs in an effort to "make it right".
101372538Sjlemon	 */
101483366Sjulian
101572538Sjlemon	/* find bus 0, PCI bus, count the number of PCI busses */
101683221Smarcel	for (num_pci_bus = 0, x = 0; x < mp_nbusses; ++x) {
101783221Smarcel		if (bus_data[x].bus_id == 0) {
101883221Smarcel			bus_0 = x;
101983221Smarcel		}
102083221Smarcel		if (bus_data[x].bus_type == PCI) {
102183221Smarcel			++num_pci_bus;
102283221Smarcel			bus_pci = x;
102383221Smarcel		}
102483221Smarcel	}
102583221Smarcel	/*
102683221Smarcel	 * bus_0 == slot of bus with ID of 0
1027133816Stjr	 * bus_pci == slot of last PCI bus encountered
1028140214Sobrien	 */
1029133816Stjr
1030133816Stjr	/* check the 1 PCI bus case for sanity */
1031133816Stjr	if (num_pci_bus == 1) {
103283221Smarcel
103383221Smarcel		/* if it is number 0 all is well */
103483221Smarcel		if (bus_data[bus_pci].bus_id == 0)
103583221Smarcel			return;
103683221Smarcel
103783221Smarcel		/* mis-numbered, swap with whichever bus uses slot 0 */
103883221Smarcel
103983221Smarcel		/* swap the bus entry types */
104083221Smarcel		bus_data[bus_pci].bus_type = bus_data[bus_0].bus_type;
104183221Smarcel		bus_data[bus_0].bus_type = PCI;
104283221Smarcel
104383221Smarcel		/* swap each relavant INTerrupt entry */
104483221Smarcel		id = bus_data[bus_pci].bus_id;
104583221Smarcel		for (x = 0; x < nintrs; ++x) {
104683221Smarcel			if (io_apic_ints[x].src_bus_id == id) {
104783221Smarcel				io_apic_ints[x].src_bus_id = 0;
104883221Smarcel			}
104983221Smarcel			else if (io_apic_ints[x].src_bus_id == 0) {
105083221Smarcel				io_apic_ints[x].src_bus_id = id;
105183221Smarcel			}
105283221Smarcel		}
105383221Smarcel	}
1054177633Sdfr	/* sanity check if more than 1 PCI bus */
105583221Smarcel	else if (num_pci_bus > 1) {
105683221Smarcel		for (x = 0; x < mp_nbusses; ++x) {
105783221Smarcel			if (bus_data[x].bus_type != PCI)
105883221Smarcel				continue;
105983221Smarcel		}
106083221Smarcel	}
106183221Smarcel}
106283221Smarcel
106383221Smarcel
106483221Smarcelstatic void
106583221Smarcelsetup_apic_irq_mapping(void)
106683221Smarcel{
106783221Smarcel	int	x;
106883221Smarcel	int	int_vector;
106983221Smarcel
107083221Smarcel	/* Assign low level interrupt handlers */
107183221Smarcel	for (x = 0; x < APIC_INTMAPSIZE; x++) {
107283221Smarcel		int_to_apicintpin[x].ioapic = -1;
107383221Smarcel		int_to_apicintpin[x].int_pin = 0;
107483221Smarcel		int_to_apicintpin[x].apic_address = NULL;
107583221Smarcel		int_to_apicintpin[x].redirindex = 0;
107683221Smarcel	}
1077140214Sobrien	for (x = 0; x < nintrs; x++) {
107883221Smarcel		if (io_apic_ints[x].dst_apic_int < APIC_INTMAPSIZE &&
107983221Smarcel		    io_apic_ints[x].dst_apic_id == IO_TO_ID(0) &&
108083221Smarcel		    io_apic_ints[x].int_vector == 0xff &&
108183221Smarcel		    (io_apic_ints[x].int_type == 0 ||
108283221Smarcel		     io_apic_ints[x].int_type == 3)) {
108383221Smarcel			assign_apic_irq(0,
1084133816Stjr					io_apic_ints[x].dst_apic_int,
1085140214Sobrien					io_apic_ints[x].dst_apic_int);
1086133816Stjr		}
1087133816Stjr	}
1088133816Stjr	int_vector = 0;
108983221Smarcel	while (int_vector < APIC_INTMAPSIZE &&
109083221Smarcel	       int_to_apicintpin[int_vector].ioapic != -1)
109183221Smarcel		int_vector++;
109283221Smarcel	for (x = 0; x < nintrs && int_vector < APIC_INTMAPSIZE; x++) {
109383221Smarcel		if ((io_apic_ints[x].int_type == 0 ||
109483221Smarcel		     io_apic_ints[x].int_type == 3) &&
109583221Smarcel		    io_apic_ints[x].int_vector == 0xff) {
109683221Smarcel			assign_apic_irq(ID_TO_IO(io_apic_ints[x].dst_apic_id),
109783221Smarcel					io_apic_ints[x].dst_apic_int,
109883221Smarcel					int_vector);
109983221Smarcel			int_vector++;
110083221Smarcel			while (int_vector < APIC_INTMAPSIZE &&
110183221Smarcel			       int_to_apicintpin[int_vector].ioapic != -1)
110283221Smarcel				int_vector++;
110383221Smarcel		}
110483221Smarcel	}
110583221Smarcel}
110683221Smarcel
110783221Smarcel
110883221Smarcelstatic int
110983221Smarcelprocessor_entry(proc_entry_ptr entry, int cpu)
111083221Smarcel{
1111177633Sdfr	/* check for usability */
111283221Smarcel	if ((cpu >= NCPU) || !(entry->cpu_flags & PROCENTRY_FLAG_EN))
111383221Smarcel		return 0;
111483221Smarcel
111583221Smarcel	/* check for BSP flag */
111683221Smarcel	if (entry->cpu_flags & PROCENTRY_FLAG_BP) {
111783221Smarcel		boot_cpu_id = entry->apic_id;
111883221Smarcel		CPU_TO_ID(0) = entry->apic_id;
111983221Smarcel		ID_TO_CPU(entry->apic_id) = 0;
112083221Smarcel		return 0;	/* its already been counted */
112183221Smarcel	}
112283221Smarcel
112383221Smarcel	/* add another AP to list, if less than max number of CPUs */
112483221Smarcel	else {
112583221Smarcel		CPU_TO_ID(cpu) = entry->apic_id;
112683221Smarcel		ID_TO_CPU(entry->apic_id) = cpu;
112783221Smarcel		return 1;
112883221Smarcel	}
112983221Smarcel}
113083221Smarcel
113183221Smarcel
113283221Smarcelstatic int
1133133816Stjrbus_entry(bus_entry_ptr entry, int bus)
113483221Smarcel{
113583221Smarcel	int     x;
113683366Sjulian	char    c, name[8];
113783221Smarcel
1138107680Siedowse	/* encode the name into an index */
1139107680Siedowse	for (x = 0; x < 6; ++x) {
114083221Smarcel		if ((c = entry->bus_type[x]) == ' ')
1141102872Siedowse			break;
114283221Smarcel		name[x] = c;
114383221Smarcel	}
114483221Smarcel	name[x] = '\0';
114583221Smarcel
1146102872Siedowse	if ((x = lookup_bus_type(name)) == UNKNOWN_BUSTYPE)
114783221Smarcel		panic("unknown bus type: '%s'", name);
114883221Smarcel
1149102872Siedowse	bus_data[bus].bus_id = entry->bus_id;
115083221Smarcel	bus_data[bus].bus_type = x;
115183221Smarcel
1152102872Siedowse	return 1;
115383221Smarcel}
115483221Smarcel
1155102872Siedowse
115683366Sjulianstatic int
115783366Sjulianio_apic_entry(io_apic_entry_ptr entry, int apic)
115883221Smarcel{
115983366Sjulian	if (!(entry->apic_flags & IOAPICENTRY_FLAG_EN))
116083221Smarcel		return 0;
116183366Sjulian
116283221Smarcel	IO_TO_ID(apic) = entry->apic_id;
116383366Sjulian	ID_TO_IO(entry->apic_id) = apic;
116483221Smarcel
116583366Sjulian	return 1;
116683221Smarcel}
116783366Sjulian
116883221Smarcel
116983366Sjulianstatic int
117083221Smarcellookup_bus_type(char *name)
117183366Sjulian{
1172144987Smdodd	int     x;
1173144987Smdodd
1174144987Smdodd	for (x = 0; x < MAX_BUSTYPE; ++x)
1175144987Smdodd		if (strcmp(bus_type_table[x].name, name) == 0)
1176144987Smdodd			return bus_type_table[x].type;
1177144987Smdodd
1178144987Smdodd	return UNKNOWN_BUSTYPE;
1179144987Smdodd}
118083221Smarcel
118183221Smarcel
118283221Smarcelstatic int
1183102872Siedowseint_entry(int_entry_ptr entry, int intr)
118483221Smarcel{
1185102872Siedowse	int apic;
118683221Smarcel
1187102872Siedowse	io_apic_ints[intr].int_type = entry->int_type;
118883221Smarcel	io_apic_ints[intr].int_flags = entry->int_flags;
1189102872Siedowse	io_apic_ints[intr].src_bus_id = entry->src_bus_id;
119083221Smarcel	io_apic_ints[intr].src_bus_irq = entry->src_bus_irq;
1191102872Siedowse	if (entry->dst_apic_id == 255) {
1192144987Smdodd		/* This signal goes to all IO APICS.  Select an IO APIC
1193144987Smdodd		   with sufficient number of interrupt pins */
1194144987Smdodd		for (apic = 0; apic < mp_napics; apic++)
1195144987Smdodd			if (((io_apic_read(apic, IOAPIC_VER) &
1196144987Smdodd			      IOART_VER_MAXREDIR) >> MAXREDIRSHIFT) >=
1197144987Smdodd			    entry->dst_apic_int)
1198144987Smdodd				break;
1199144987Smdodd		if (apic < mp_napics)
1200102872Siedowse			io_apic_ints[intr].dst_apic_id = IO_TO_ID(apic);
120183221Smarcel		else
1202107680Siedowse			io_apic_ints[intr].dst_apic_id = entry->dst_apic_id;
1203111797Sdes	} else
1204107680Siedowse		io_apic_ints[intr].dst_apic_id = entry->dst_apic_id;
1205107680Siedowse	io_apic_ints[intr].dst_apic_int = entry->dst_apic_int;
1206107680Siedowse
1207107680Siedowse	return 1;
1208107680Siedowse}
1209107680Siedowse
1210107680Siedowse
1211107680Siedowsestatic int
1212111797Sdesapic_int_is_bus_type(int intr, int bus_type)
1213107680Siedowse{
1214107680Siedowse	int     bus;
1215107680Siedowse
1216111797Sdes	for (bus = 0; bus < mp_nbusses; ++bus)
1217107680Siedowse		if ((bus_data[bus].bus_id == io_apic_ints[intr].src_bus_id)
1218107680Siedowse		    && ((int) bus_data[bus].bus_type == bus_type))
1219107680Siedowse			return 1;
1220107680Siedowse
1221107680Siedowse	return 0;
1222107680Siedowse}
1223107680Siedowse
1224107680Siedowse
1225111797Sdes/*
1226107680Siedowse * Given a traditional ISA INT mask, return an APIC mask.
1227107680Siedowse */
1228107680Siedowseu_int
1229107680Siedowseisa_apic_mask(u_int isa_mask)
1230107680Siedowse{
1231107680Siedowse	int isa_irq;
1232107680Siedowse	int apic_pin;
123383221Smarcel
1234102872Siedowse#if defined(SKIP_IRQ15_REDIRECT)
123583221Smarcel	if (isa_mask == (1 << 15)) {
123683221Smarcel		printf("skipping ISA IRQ15 redirect\n");
123783221Smarcel		return isa_mask;
123883221Smarcel	}
123983221Smarcel#endif  /* SKIP_IRQ15_REDIRECT */
124083221Smarcel
124183221Smarcel	isa_irq = ffs(isa_mask);		/* find its bit position */
124289319Salfred	if (isa_irq == 0)			/* doesn't exist */
124389319Salfred		return 0;
124489319Salfred	--isa_irq;				/* make it zero based */
124589306Salfred
124689306Salfred	apic_pin = isa_apic_irq(isa_irq);	/* look for APIC connection */
124783221Smarcel	if (apic_pin == -1)
124889306Salfred		return 0;
124989306Salfred
125083221Smarcel	return (1 << apic_pin);			/* convert pin# to a mask */
1251102872Siedowse}
125283221Smarcel
125383221Smarcel
125483221Smarcel/*
125583221Smarcel * Determine which APIC pin an ISA/EISA INT is attached to.
125683221Smarcel */
125783221Smarcel#define INTTYPE(I)	(io_apic_ints[(I)].int_type)
125883366Sjulian#define INTPIN(I)	(io_apic_ints[(I)].dst_apic_int)
125983221Smarcel#define INTIRQ(I)	(io_apic_ints[(I)].int_vector)
126083221Smarcel#define INTAPIC(I)	(ID_TO_IO(io_apic_ints[(I)].dst_apic_id))
126183221Smarcel
126283221Smarcel#define SRCBUSIRQ(I)	(io_apic_ints[(I)].src_bus_irq)
126383221Smarcelint
126483221Smarcelisa_apic_irq(int isa_irq)
126583221Smarcel{
126683221Smarcel	int     intr;
126783221Smarcel
126883221Smarcel	for (intr = 0; intr < nintrs; ++intr) {		/* check each record */
126983221Smarcel		if (INTTYPE(intr) == 0) {		/* standard INT */
127083366Sjulian			if (SRCBUSIRQ(intr) == isa_irq) {
127183221Smarcel				if (apic_int_is_bus_type(intr, ISA) ||
127283221Smarcel			            apic_int_is_bus_type(intr, EISA))
1273140214Sobrien					return INTIRQ(intr);	/* found */
127483221Smarcel			}
127583366Sjulian		}
127683221Smarcel	}
127783221Smarcel	return -1;					/* NOT found */
1278102872Siedowse}
127983221Smarcel
128083221Smarcel
128183221Smarcel/*
128283221Smarcel * Determine which APIC pin a PCI INT is attached to.
128383221Smarcel */
128483221Smarcel#define SRCBUSID(I)	(io_apic_ints[(I)].src_bus_id)
128583221Smarcel#define SRCBUSDEVICE(I)	((io_apic_ints[(I)].src_bus_irq >> 2) & 0x1f)
128683221Smarcel#define SRCBUSLINE(I)	(io_apic_ints[(I)].src_bus_irq & 0x03)
128799687Srobertint
1288111797Sdespci_apic_irq(int pciBus, int pciDevice, int pciInt)
128983221Smarcel{
129083221Smarcel	int     intr;
129183221Smarcel
1292102872Siedowse	--pciInt;					/* zero based */
1293102872Siedowse
129483221Smarcel	for (intr = 0; intr < nintrs; ++intr)		/* check each record */
129583221Smarcel		if ((INTTYPE(intr) == 0)		/* standard INT */
1296102872Siedowse		    && (SRCBUSID(intr) == pciBus)
1297111797Sdes		    && (SRCBUSDEVICE(intr) == pciDevice)
1298111797Sdes		    && (SRCBUSLINE(intr) == pciInt))	/* a candidate IRQ */
129983221Smarcel			if (apic_int_is_bus_type(intr, PCI))
130099687Srobert				return INTIRQ(intr);	/* exact match */
1301111797Sdes
130283221Smarcel	return -1;					/* NOT found */
130383221Smarcel}
130483221Smarcel
1305102872Siedowseint
1306102872Siedowsenext_apic_irq(int irq)
1307102872Siedowse{
130883221Smarcel	int intr, ointr;
130999687Srobert	int bus, bustype;
1310111797Sdes
131183221Smarcel	bus = 0;
131283221Smarcel	bustype = 0;
131383221Smarcel	for (intr = 0; intr < nintrs; intr++) {
1314102872Siedowse		if (INTIRQ(intr) != irq || INTTYPE(intr) != 0)
1315102872Siedowse			continue;
1316102872Siedowse		bus = SRCBUSID(intr);
131783221Smarcel		bustype = apic_bus_type(bus);
131883221Smarcel		if (bustype != ISA &&
131983366Sjulian		    bustype != EISA &&
132083221Smarcel		    bustype != PCI)
1321133816Stjr			continue;
132285022Smarcel		break;
132385022Smarcel	}
132485022Smarcel	if (intr >= nintrs) {
132585022Smarcel		return -1;
1326102814Siedowse	}
1327102814Siedowse	for (ointr = intr + 1; ointr < nintrs; ointr++) {
132885022Smarcel		if (INTTYPE(ointr) != 0)
1329102814Siedowse			continue;
133085022Smarcel		if (bus != SRCBUSID(ointr))
133185022Smarcel			continue;
133285022Smarcel		if (bustype == PCI) {
1333102814Siedowse			if (SRCBUSDEVICE(intr) != SRCBUSDEVICE(ointr))
133485022Smarcel				continue;
1335102814Siedowse			if (SRCBUSLINE(intr) != SRCBUSLINE(ointr))
1336102814Siedowse				continue;
1337102814Siedowse		}
133885022Smarcel		if (bustype == ISA || bustype == EISA) {
133985022Smarcel			if (SRCBUSIRQ(intr) != SRCBUSIRQ(ointr))
134085022Smarcel				continue;
134185022Smarcel		}
134285022Smarcel		if (INTPIN(intr) == INTPIN(ointr))
1343102814Siedowse			continue;
1344102814Siedowse		break;
134585022Smarcel	}
1346102814Siedowse	if (ointr >= nintrs) {
134785022Smarcel		return -1;
134885022Smarcel	}
134985022Smarcel	return INTIRQ(ointr);
1350102814Siedowse}
135185022Smarcel#undef SRCBUSLINE
1352102814Siedowse#undef SRCBUSDEVICE
1353102814Siedowse#undef SRCBUSID
1354102814Siedowse#undef SRCBUSIRQ
135585022Smarcel
1356#undef INTPIN
1357#undef INTIRQ
1358#undef INTAPIC
1359#undef INTTYPE
1360
1361
1362/*
1363 * Reprogram the MB chipset to NOT redirect an ISA INTerrupt.
1364 *
1365 * XXX FIXME:
1366 *  Exactly what this means is unclear at this point.  It is a solution
1367 *  for motherboards that redirect the MBIRQ0 pin.  Generically a motherboard
1368 *  could route any of the ISA INTs to upper (>15) IRQ values.  But most would
1369 *  NOT be redirected via MBIRQ0, thus "undirect()ing" them would NOT be an
1370 *  option.
1371 */
1372int
1373undirect_isa_irq(int rirq)
1374{
1375#if defined(READY)
1376	if (bootverbose)
1377	    printf("Freeing redirected ISA irq %d.\n", rirq);
1378	/** FIXME: tickle the MB redirector chip */
1379	return ???;
1380#else
1381	if (bootverbose)
1382	    printf("Freeing (NOT implemented) redirected ISA irq %d.\n", rirq);
1383	return 0;
1384#endif  /* READY */
1385}
1386
1387
1388/*
1389 * Reprogram the MB chipset to NOT redirect a PCI INTerrupt
1390 */
1391int
1392undirect_pci_irq(int rirq)
1393{
1394#if defined(READY)
1395	if (bootverbose)
1396		printf("Freeing redirected PCI irq %d.\n", rirq);
1397
1398	/** FIXME: tickle the MB redirector chip */
1399	return ???;
1400#else
1401	if (bootverbose)
1402		printf("Freeing (NOT implemented) redirected PCI irq %d.\n",
1403		       rirq);
1404	return 0;
1405#endif  /* READY */
1406}
1407
1408
1409/*
1410 * given a bus ID, return:
1411 *  the bus type if found
1412 *  -1 if NOT found
1413 */
1414int
1415apic_bus_type(int id)
1416{
1417	int     x;
1418
1419	for (x = 0; x < mp_nbusses; ++x)
1420		if (bus_data[x].bus_id == id)
1421			return bus_data[x].bus_type;
1422
1423	return -1;
1424}
1425
1426
1427/*
1428 * given a LOGICAL APIC# and pin#, return:
1429 *  the associated src bus ID if found
1430 *  -1 if NOT found
1431 */
1432int
1433apic_src_bus_id(int apic, int pin)
1434{
1435	int     x;
1436
1437	/* search each of the possible INTerrupt sources */
1438	for (x = 0; x < nintrs; ++x)
1439		if ((apic == ID_TO_IO(io_apic_ints[x].dst_apic_id)) &&
1440		    (pin == io_apic_ints[x].dst_apic_int))
1441			return (io_apic_ints[x].src_bus_id);
1442
1443	return -1;		/* NOT found */
1444}
1445
1446
1447/*
1448 * given a LOGICAL APIC# and pin#, return:
1449 *  the associated src bus IRQ if found
1450 *  -1 if NOT found
1451 */
1452int
1453apic_src_bus_irq(int apic, int pin)
1454{
1455	int     x;
1456
1457	for (x = 0; x < nintrs; x++)
1458		if ((apic == ID_TO_IO(io_apic_ints[x].dst_apic_id)) &&
1459		    (pin == io_apic_ints[x].dst_apic_int))
1460			return (io_apic_ints[x].src_bus_irq);
1461
1462	return -1;		/* NOT found */
1463}
1464
1465
1466/*
1467 * given a LOGICAL APIC# and pin#, return:
1468 *  the associated INTerrupt type if found
1469 *  -1 if NOT found
1470 */
1471int
1472apic_int_type(int apic, int pin)
1473{
1474	int     x;
1475
1476	/* search each of the possible INTerrupt sources */
1477	for (x = 0; x < nintrs; ++x)
1478		if ((apic == ID_TO_IO(io_apic_ints[x].dst_apic_id)) &&
1479		    (pin == io_apic_ints[x].dst_apic_int))
1480			return (io_apic_ints[x].int_type);
1481
1482	return -1;		/* NOT found */
1483}
1484
1485int
1486apic_irq(int apic, int pin)
1487{
1488	int x;
1489	int res;
1490
1491	for (x = 0; x < nintrs; ++x)
1492		if ((apic == ID_TO_IO(io_apic_ints[x].dst_apic_id)) &&
1493		    (pin == io_apic_ints[x].dst_apic_int)) {
1494			res = io_apic_ints[x].int_vector;
1495			if (res == 0xff)
1496				return -1;
1497			if (apic != int_to_apicintpin[res].ioapic)
1498				panic("apic_irq: inconsistent table");
1499			if (pin != int_to_apicintpin[res].int_pin)
1500				panic("apic_irq inconsistent table (2)");
1501			return res;
1502		}
1503	return -1;
1504}
1505
1506
1507/*
1508 * given a LOGICAL APIC# and pin#, return:
1509 *  the associated trigger mode if found
1510 *  -1 if NOT found
1511 */
1512int
1513apic_trigger(int apic, int pin)
1514{
1515	int     x;
1516
1517	/* search each of the possible INTerrupt sources */
1518	for (x = 0; x < nintrs; ++x)
1519		if ((apic == ID_TO_IO(io_apic_ints[x].dst_apic_id)) &&
1520		    (pin == io_apic_ints[x].dst_apic_int))
1521			return ((io_apic_ints[x].int_flags >> 2) & 0x03);
1522
1523	return -1;		/* NOT found */
1524}
1525
1526
1527/*
1528 * given a LOGICAL APIC# and pin#, return:
1529 *  the associated 'active' level if found
1530 *  -1 if NOT found
1531 */
1532int
1533apic_polarity(int apic, int pin)
1534{
1535	int     x;
1536
1537	/* search each of the possible INTerrupt sources */
1538	for (x = 0; x < nintrs; ++x)
1539		if ((apic == ID_TO_IO(io_apic_ints[x].dst_apic_id)) &&
1540		    (pin == io_apic_ints[x].dst_apic_int))
1541			return (io_apic_ints[x].int_flags & 0x03);
1542
1543	return -1;		/* NOT found */
1544}
1545
1546
1547/*
1548 * set data according to MP defaults
1549 * FIXME: probably not complete yet...
1550 */
1551static void
1552default_mp_table(int type)
1553{
1554	int     ap_cpu_id;
1555#if defined(APIC_IO)
1556	u_int32_t ux;
1557	int     io_apic_id;
1558	int     pin;
1559#endif	/* APIC_IO */
1560
1561#if 0
1562	printf("  MP default config type: %d\n", type);
1563	switch (type) {
1564	case 1:
1565		printf("   bus: ISA, APIC: 82489DX\n");
1566		break;
1567	case 2:
1568		printf("   bus: EISA, APIC: 82489DX\n");
1569		break;
1570	case 3:
1571		printf("   bus: EISA, APIC: 82489DX\n");
1572		break;
1573	case 4:
1574		printf("   bus: MCA, APIC: 82489DX\n");
1575		break;
1576	case 5:
1577		printf("   bus: ISA+PCI, APIC: Integrated\n");
1578		break;
1579	case 6:
1580		printf("   bus: EISA+PCI, APIC: Integrated\n");
1581		break;
1582	case 7:
1583		printf("   bus: MCA+PCI, APIC: Integrated\n");
1584		break;
1585	default:
1586		printf("   future type\n");
1587		break;
1588		/* NOTREACHED */
1589	}
1590#endif	/* 0 */
1591
1592	boot_cpu_id = (lapic.id & APIC_ID_MASK) >> 24;
1593	ap_cpu_id = (boot_cpu_id == 0) ? 1 : 0;
1594
1595	/* BSP */
1596	CPU_TO_ID(0) = boot_cpu_id;
1597	ID_TO_CPU(boot_cpu_id) = 0;
1598
1599	/* one and only AP */
1600	CPU_TO_ID(1) = ap_cpu_id;
1601	ID_TO_CPU(ap_cpu_id) = 1;
1602
1603#if defined(APIC_IO)
1604	/* one and only IO APIC */
1605	io_apic_id = (io_apic_read(0, IOAPIC_ID) & APIC_ID_MASK) >> 24;
1606
1607	/*
1608	 * sanity check, refer to MP spec section 3.6.6, last paragraph
1609	 * necessary as some hardware isn't properly setting up the IO APIC
1610	 */
1611#if defined(REALLY_ANAL_IOAPICID_VALUE)
1612	if (io_apic_id != 2) {
1613#else
1614	if ((io_apic_id == 0) || (io_apic_id == 1) || (io_apic_id == 15)) {
1615#endif	/* REALLY_ANAL_IOAPICID_VALUE */
1616		ux = io_apic_read(0, IOAPIC_ID);	/* get current contents */
1617		ux &= ~APIC_ID_MASK;	/* clear the ID field */
1618		ux |= 0x02000000;	/* set it to '2' */
1619		io_apic_write(0, IOAPIC_ID, ux);	/* write new value */
1620		ux = io_apic_read(0, IOAPIC_ID);	/* re-read && test */
1621		if ((ux & APIC_ID_MASK) != 0x02000000)
1622			panic("can't control IO APIC ID, reg: 0x%08x", ux);
1623		io_apic_id = 2;
1624	}
1625	IO_TO_ID(0) = io_apic_id;
1626	ID_TO_IO(io_apic_id) = 0;
1627#endif	/* APIC_IO */
1628
1629	/* fill out bus entries */
1630	switch (type) {
1631	case 1:
1632	case 2:
1633	case 3:
1634	case 5:
1635	case 6:
1636		bus_data[0].bus_id = default_data[type - 1][1];
1637		bus_data[0].bus_type = default_data[type - 1][2];
1638		bus_data[1].bus_id = default_data[type - 1][3];
1639		bus_data[1].bus_type = default_data[type - 1][4];
1640		break;
1641
1642	/* case 4: case 7:		   MCA NOT supported */
1643	default:		/* illegal/reserved */
1644		panic("BAD default MP config: %d", type);
1645		/* NOTREACHED */
1646	}
1647
1648#if defined(APIC_IO)
1649	/* general cases from MP v1.4, table 5-2 */
1650	for (pin = 0; pin < 16; ++pin) {
1651		io_apic_ints[pin].int_type = 0;
1652		io_apic_ints[pin].int_flags = 0x05;	/* edge/active-hi */
1653		io_apic_ints[pin].src_bus_id = 0;
1654		io_apic_ints[pin].src_bus_irq = pin;	/* IRQ2 caught below */
1655		io_apic_ints[pin].dst_apic_id = io_apic_id;
1656		io_apic_ints[pin].dst_apic_int = pin;	/* 1-to-1 */
1657	}
1658
1659	/* special cases from MP v1.4, table 5-2 */
1660	if (type == 2) {
1661		io_apic_ints[2].int_type = 0xff;	/* N/C */
1662		io_apic_ints[13].int_type = 0xff;	/* N/C */
1663#if !defined(APIC_MIXED_MODE)
1664		/** FIXME: ??? */
1665		panic("sorry, can't support type 2 default yet");
1666#endif	/* APIC_MIXED_MODE */
1667	}
1668	else
1669		io_apic_ints[2].src_bus_irq = 0;	/* ISA IRQ0 is on APIC INT 2 */
1670
1671	if (type == 7)
1672		io_apic_ints[0].int_type = 0xff;	/* N/C */
1673	else
1674		io_apic_ints[0].int_type = 3;	/* vectored 8259 */
1675#endif	/* APIC_IO */
1676}
1677
1678
1679/*
1680 * initialize all the SMP locks
1681 */
1682
1683/* critical region around IO APIC, apic_imen */
1684struct simplelock	imen_lock;
1685
1686/* critical region around splxx(), cpl, cml, cil, ipending */
1687struct simplelock	cpl_lock;
1688
1689/* Make FAST_INTR() routines sequential */
1690struct simplelock	fast_intr_lock;
1691
1692/* critical region around INTR() routines */
1693struct simplelock	intr_lock;
1694
1695/* lock regions protected in UP kernel via cli/sti */
1696struct simplelock	mpintr_lock;
1697
1698/* lock region used by kernel profiling */
1699struct simplelock	mcount_lock;
1700
1701#ifdef USE_COMLOCK
1702/* locks com (tty) data/hardware accesses: a FASTINTR() */
1703struct simplelock	com_lock;
1704#endif /* USE_COMLOCK */
1705
1706#ifdef USE_CLOCKLOCK
1707/* lock regions around the clock hardware */
1708struct simplelock	clock_lock;
1709#endif /* USE_CLOCKLOCK */
1710
1711static void
1712init_locks(void)
1713{
1714	/*
1715	 * Get the initial mp_lock with a count of 1 for the BSP.
1716	 * This uses a LOGICAL cpu ID, ie BSP == 0.
1717	 */
1718	mp_lock = 0x00000001;
1719
1720	/* ISR uses its own "giant lock" */
1721	isr_lock = FREE_LOCK;
1722
1723#if defined(APIC_INTR_DIAGNOSTIC) && defined(APIC_INTR_DIAGNOSTIC_IRQ)
1724	s_lock_init((struct simplelock*)&apic_itrace_debuglock);
1725#endif
1726
1727	s_lock_init((struct simplelock*)&mpintr_lock);
1728
1729	s_lock_init((struct simplelock*)&mcount_lock);
1730
1731	s_lock_init((struct simplelock*)&fast_intr_lock);
1732	s_lock_init((struct simplelock*)&intr_lock);
1733	s_lock_init((struct simplelock*)&imen_lock);
1734	s_lock_init((struct simplelock*)&cpl_lock);
1735
1736#ifdef USE_COMLOCK
1737	s_lock_init((struct simplelock*)&com_lock);
1738#endif /* USE_COMLOCK */
1739#ifdef USE_CLOCKLOCK
1740	s_lock_init((struct simplelock*)&clock_lock);
1741#endif /* USE_CLOCKLOCK */
1742}
1743
1744
1745/* Wait for all APs to be fully initialized */
1746extern int wait_ap(unsigned int);
1747
1748/*
1749 * start each AP in our list
1750 */
1751static int
1752start_all_aps(u_int boot_addr)
1753{
1754	int     x, i, pg;
1755	u_char  mpbiosreason;
1756	u_long  mpbioswarmvec;
1757	struct globaldata *gd;
1758	char *stack;
1759
1760	POSTCODE(START_ALL_APS_POST);
1761
1762	/* initialize BSP's local APIC */
1763	apic_initialize();
1764	bsp_apic_ready = 1;
1765
1766	/* install the AP 1st level boot code */
1767	install_ap_tramp(boot_addr);
1768
1769
1770	/* save the current value of the warm-start vector */
1771	mpbioswarmvec = *((u_long *) WARMBOOT_OFF);
1772#ifndef PC98
1773	outb(CMOS_REG, BIOS_RESET);
1774	mpbiosreason = inb(CMOS_DATA);
1775#endif
1776
1777	/* record BSP in CPU map */
1778	all_cpus = 1;
1779
1780	/* set up 0 -> 4MB P==V mapping for AP boot */
1781	*(int *)PTD = PG_V | PG_RW | ((uintptr_t)(void *)KPTphys & PG_FRAME);
1782	invltlb();
1783
1784	/* start each AP */
1785	for (x = 1; x <= mp_naps; ++x) {
1786
1787		/* This is a bit verbose, it will go away soon.  */
1788
1789		/* first page of AP's private space */
1790		pg = x * i386_btop(sizeof(struct privatespace));
1791
1792		/* allocate a new private data page */
1793		gd = (struct globaldata *)kmem_alloc(kernel_map, PAGE_SIZE);
1794
1795		/* wire it into the private page table page */
1796		SMPpt[pg] = (pt_entry_t)(PG_V | PG_RW | vtophys(gd));
1797
1798		/* allocate and set up an idle stack data page */
1799		stack = (char *)kmem_alloc(kernel_map, UPAGES*PAGE_SIZE);
1800		for (i = 0; i < UPAGES; i++)
1801			SMPpt[pg + 5 + i] = (pt_entry_t)
1802			    (PG_V | PG_RW | vtophys(PAGE_SIZE * i + stack));
1803
1804		SMPpt[pg + 1] = 0;		/* *prv_CMAP1 */
1805		SMPpt[pg + 2] = 0;		/* *prv_CMAP2 */
1806		SMPpt[pg + 3] = 0;		/* *prv_CMAP3 */
1807		SMPpt[pg + 4] = 0;		/* *prv_PMAP1 */
1808
1809		/* prime data page for it to use */
1810		gd->gd_cpuid = x;
1811		gd->gd_cpu_lockid = x << 24;
1812		gd->gd_prv_CMAP1 = &SMPpt[pg + 1];
1813		gd->gd_prv_CMAP2 = &SMPpt[pg + 2];
1814		gd->gd_prv_CMAP3 = &SMPpt[pg + 3];
1815		gd->gd_prv_PMAP1 = &SMPpt[pg + 4];
1816		gd->gd_prv_CADDR1 = SMP_prvspace[x].CPAGE1;
1817		gd->gd_prv_CADDR2 = SMP_prvspace[x].CPAGE2;
1818		gd->gd_prv_CADDR3 = SMP_prvspace[x].CPAGE3;
1819		gd->gd_prv_PADDR1 = (unsigned *)SMP_prvspace[x].PPAGE1;
1820
1821		/* setup a vector to our boot code */
1822		*((volatile u_short *) WARMBOOT_OFF) = WARMBOOT_TARGET;
1823		*((volatile u_short *) WARMBOOT_SEG) = (boot_addr >> 4);
1824#ifndef PC98
1825		outb(CMOS_REG, BIOS_RESET);
1826		outb(CMOS_DATA, BIOS_WARM);	/* 'warm-start' */
1827#endif
1828
1829		bootSTK = &SMP_prvspace[x].idlestack[UPAGES*PAGE_SIZE];
1830		boot_cpuid = x;
1831
1832		/* attempt to start the Application Processor */
1833		CHECK_INIT(99);	/* setup checkpoints */
1834		if (!start_ap(x, boot_addr)) {
1835			printf("AP #%d (PHY# %d) failed!\n", x, CPU_TO_ID(x));
1836			CHECK_PRINT("trace");	/* show checkpoints */
1837			/* better panic as the AP may be running loose */
1838			printf("panic y/n? [y] ");
1839			if (cngetc() != 'n')
1840				panic("bye-bye");
1841		}
1842		CHECK_PRINT("trace");		/* show checkpoints */
1843
1844		/* record its version info */
1845		cpu_apic_versions[x] = cpu_apic_versions[0];
1846
1847		all_cpus |= (1 << x);		/* record AP in CPU map */
1848	}
1849
1850	/* build our map of 'other' CPUs */
1851	other_cpus = all_cpus & ~(1 << cpuid);
1852
1853	/* fill in our (BSP) APIC version */
1854	cpu_apic_versions[0] = lapic.version;
1855
1856	/* restore the warmstart vector */
1857	*(u_long *) WARMBOOT_OFF = mpbioswarmvec;
1858#ifndef PC98
1859	outb(CMOS_REG, BIOS_RESET);
1860	outb(CMOS_DATA, mpbiosreason);
1861#endif
1862
1863	/*
1864	 * Set up the idle context for the BSP.  Similar to above except
1865	 * that some was done by locore, some by pmap.c and some is implicit
1866	 * because the BSP is cpu#0 and the page is initially zero, and also
1867	 * because we can refer to variables by name on the BSP..
1868	 */
1869
1870	/* Allocate and setup BSP idle stack */
1871	stack = (char *)kmem_alloc(kernel_map, UPAGES * PAGE_SIZE);
1872	for (i = 0; i < UPAGES; i++)
1873		SMPpt[5 + i] = (pt_entry_t)
1874		    (PG_V | PG_RW | vtophys(PAGE_SIZE * i + stack));
1875
1876	*(int *)PTD = 0;
1877	pmap_set_opt_bsp();
1878
1879	/* number of APs actually started */
1880	return mp_ncpus - 1;
1881}
1882
1883
1884/*
1885 * load the 1st level AP boot code into base memory.
1886 */
1887
1888/* targets for relocation */
1889extern void bigJump(void);
1890extern void bootCodeSeg(void);
1891extern void bootDataSeg(void);
1892extern void MPentry(void);
1893extern u_int MP_GDT;
1894extern u_int mp_gdtbase;
1895
1896static void
1897install_ap_tramp(u_int boot_addr)
1898{
1899	int     x;
1900	int     size = *(int *) ((u_long) & bootMP_size);
1901	u_char *src = (u_char *) ((u_long) bootMP);
1902	u_char *dst = (u_char *) boot_addr + KERNBASE;
1903	u_int   boot_base = (u_int) bootMP;
1904	u_int8_t *dst8;
1905	u_int16_t *dst16;
1906	u_int32_t *dst32;
1907
1908	POSTCODE(INSTALL_AP_TRAMP_POST);
1909
1910	for (x = 0; x < size; ++x)
1911		*dst++ = *src++;
1912
1913	/*
1914	 * modify addresses in code we just moved to basemem. unfortunately we
1915	 * need fairly detailed info about mpboot.s for this to work.  changes
1916	 * to mpboot.s might require changes here.
1917	 */
1918
1919	/* boot code is located in KERNEL space */
1920	dst = (u_char *) boot_addr + KERNBASE;
1921
1922	/* modify the lgdt arg */
1923	dst32 = (u_int32_t *) (dst + ((u_int) & mp_gdtbase - boot_base));
1924	*dst32 = boot_addr + ((u_int) & MP_GDT - boot_base);
1925
1926	/* modify the ljmp target for MPentry() */
1927	dst32 = (u_int32_t *) (dst + ((u_int) bigJump - boot_base) + 1);
1928	*dst32 = ((u_int) MPentry - KERNBASE);
1929
1930	/* modify the target for boot code segment */
1931	dst16 = (u_int16_t *) (dst + ((u_int) bootCodeSeg - boot_base));
1932	dst8 = (u_int8_t *) (dst16 + 1);
1933	*dst16 = (u_int) boot_addr & 0xffff;
1934	*dst8 = ((u_int) boot_addr >> 16) & 0xff;
1935
1936	/* modify the target for boot data segment */
1937	dst16 = (u_int16_t *) (dst + ((u_int) bootDataSeg - boot_base));
1938	dst8 = (u_int8_t *) (dst16 + 1);
1939	*dst16 = (u_int) boot_addr & 0xffff;
1940	*dst8 = ((u_int) boot_addr >> 16) & 0xff;
1941}
1942
1943
1944/*
1945 * this function starts the AP (application processor) identified
1946 * by the APIC ID 'physicalCpu'.  It does quite a "song and dance"
1947 * to accomplish this.  This is necessary because of the nuances
1948 * of the different hardware we might encounter.  It ain't pretty,
1949 * but it seems to work.
1950 */
1951static int
1952start_ap(int logical_cpu, u_int boot_addr)
1953{
1954	int     physical_cpu;
1955	int     vector;
1956	int     cpus;
1957	u_long  icr_lo, icr_hi;
1958
1959	POSTCODE(START_AP_POST);
1960
1961	/* get the PHYSICAL APIC ID# */
1962	physical_cpu = CPU_TO_ID(logical_cpu);
1963
1964	/* calculate the vector */
1965	vector = (boot_addr >> 12) & 0xff;
1966
1967	/* used as a watchpoint to signal AP startup */
1968	cpus = mp_ncpus;
1969
1970	/*
1971	 * first we do an INIT/RESET IPI this INIT IPI might be run, reseting
1972	 * and running the target CPU. OR this INIT IPI might be latched (P5
1973	 * bug), CPU waiting for STARTUP IPI. OR this INIT IPI might be
1974	 * ignored.
1975	 */
1976
1977	/* setup the address for the target AP */
1978	icr_hi = lapic.icr_hi & ~APIC_ID_MASK;
1979	icr_hi |= (physical_cpu << 24);
1980	lapic.icr_hi = icr_hi;
1981
1982	/* do an INIT IPI: assert RESET */
1983	icr_lo = lapic.icr_lo & 0xfff00000;
1984	lapic.icr_lo = icr_lo | 0x0000c500;
1985
1986	/* wait for pending status end */
1987	while (lapic.icr_lo & APIC_DELSTAT_MASK)
1988		 /* spin */ ;
1989
1990	/* do an INIT IPI: deassert RESET */
1991	lapic.icr_lo = icr_lo | 0x00008500;
1992
1993	/* wait for pending status end */
1994	u_sleep(10000);		/* wait ~10mS */
1995	while (lapic.icr_lo & APIC_DELSTAT_MASK)
1996		 /* spin */ ;
1997
1998	/*
1999	 * next we do a STARTUP IPI: the previous INIT IPI might still be
2000	 * latched, (P5 bug) this 1st STARTUP would then terminate
2001	 * immediately, and the previously started INIT IPI would continue. OR
2002	 * the previous INIT IPI has already run. and this STARTUP IPI will
2003	 * run. OR the previous INIT IPI was ignored. and this STARTUP IPI
2004	 * will run.
2005	 */
2006
2007	/* do a STARTUP IPI */
2008	lapic.icr_lo = icr_lo | 0x00000600 | vector;
2009	while (lapic.icr_lo & APIC_DELSTAT_MASK)
2010		 /* spin */ ;
2011	u_sleep(200);		/* wait ~200uS */
2012
2013	/*
2014	 * finally we do a 2nd STARTUP IPI: this 2nd STARTUP IPI should run IF
2015	 * the previous STARTUP IPI was cancelled by a latched INIT IPI. OR
2016	 * this STARTUP IPI will be ignored, as only ONE STARTUP IPI is
2017	 * recognized after hardware RESET or INIT IPI.
2018	 */
2019
2020	lapic.icr_lo = icr_lo | 0x00000600 | vector;
2021	while (lapic.icr_lo & APIC_DELSTAT_MASK)
2022		 /* spin */ ;
2023	u_sleep(200);		/* wait ~200uS */
2024
2025	/* wait for it to start */
2026	set_apic_timer(5000000);/* == 5 seconds */
2027	while (read_apic_timer())
2028		if (mp_ncpus > cpus)
2029			return 1;	/* return SUCCESS */
2030
2031	return 0;		/* return FAILURE */
2032}
2033
2034
2035/*
2036 * Flush the TLB on all other CPU's
2037 *
2038 * XXX: Needs to handshake and wait for completion before proceding.
2039 */
2040void
2041smp_invltlb(void)
2042{
2043#if defined(APIC_IO)
2044	if (smp_started && invltlb_ok)
2045		all_but_self_ipi(XINVLTLB_OFFSET);
2046#endif  /* APIC_IO */
2047}
2048
2049void
2050invlpg(u_int addr)
2051{
2052	__asm   __volatile("invlpg (%0)"::"r"(addr):"memory");
2053
2054	/* send a message to the other CPUs */
2055	smp_invltlb();
2056}
2057
2058void
2059invltlb(void)
2060{
2061	u_long  temp;
2062
2063	/*
2064	 * This should be implemented as load_cr3(rcr3()) when load_cr3() is
2065	 * inlined.
2066	 */
2067	__asm __volatile("movl %%cr3, %0; movl %0, %%cr3":"=r"(temp) :: "memory");
2068
2069	/* send a message to the other CPUs */
2070	smp_invltlb();
2071}
2072
2073
2074/*
2075 * When called the executing CPU will send an IPI to all other CPUs
2076 *  requesting that they halt execution.
2077 *
2078 * Usually (but not necessarily) called with 'other_cpus' as its arg.
2079 *
2080 *  - Signals all CPUs in map to stop.
2081 *  - Waits for each to stop.
2082 *
2083 * Returns:
2084 *  -1: error
2085 *   0: NA
2086 *   1: ok
2087 *
2088 * XXX FIXME: this is not MP-safe, needs a lock to prevent multiple CPUs
2089 *            from executing at same time.
2090 */
2091int
2092stop_cpus(u_int map)
2093{
2094	if (!smp_started)
2095		return 0;
2096
2097	/* send the Xcpustop IPI to all CPUs in map */
2098	selected_apic_ipi(map, XCPUSTOP_OFFSET, APIC_DELMODE_FIXED);
2099
2100	while ((stopped_cpus & map) != map)
2101		/* spin */ ;
2102
2103	return 1;
2104}
2105
2106
2107/*
2108 * Called by a CPU to restart stopped CPUs.
2109 *
2110 * Usually (but not necessarily) called with 'stopped_cpus' as its arg.
2111 *
2112 *  - Signals all CPUs in map to restart.
2113 *  - Waits for each to restart.
2114 *
2115 * Returns:
2116 *  -1: error
2117 *   0: NA
2118 *   1: ok
2119 */
2120int
2121restart_cpus(u_int map)
2122{
2123	if (!smp_started)
2124		return 0;
2125
2126	started_cpus = map;		/* signal other cpus to restart */
2127
2128	while ((stopped_cpus & map) != 0) /* wait for each to clear its bit */
2129		/* spin */ ;
2130
2131	return 1;
2132}
2133
2134int smp_active = 0;	/* are the APs allowed to run? */
2135SYSCTL_INT(_machdep, OID_AUTO, smp_active, CTLFLAG_RW, &smp_active, 0, "");
2136
2137/* XXX maybe should be hw.ncpu */
2138static int smp_cpus = 1;	/* how many cpu's running */
2139SYSCTL_INT(_machdep, OID_AUTO, smp_cpus, CTLFLAG_RD, &smp_cpus, 0, "");
2140
2141int invltlb_ok = 0;	/* throttle smp_invltlb() till safe */
2142SYSCTL_INT(_machdep, OID_AUTO, invltlb_ok, CTLFLAG_RW, &invltlb_ok, 0, "");
2143
2144/* Warning: Do not staticize.  Used from swtch.s */
2145int do_page_zero_idle = 1; /* bzero pages for fun and profit in idleloop */
2146SYSCTL_INT(_machdep, OID_AUTO, do_page_zero_idle, CTLFLAG_RW,
2147	   &do_page_zero_idle, 0, "");
2148
2149/* Is forwarding of a interrupt to the CPU holding the ISR lock enabled ? */
2150int forward_irq_enabled = 1;
2151SYSCTL_INT(_machdep, OID_AUTO, forward_irq_enabled, CTLFLAG_RW,
2152	   &forward_irq_enabled, 0, "");
2153
2154/* Enable forwarding of a signal to a process running on a different CPU */
2155static int forward_signal_enabled = 1;
2156SYSCTL_INT(_machdep, OID_AUTO, forward_signal_enabled, CTLFLAG_RW,
2157	   &forward_signal_enabled, 0, "");
2158
2159/* Enable forwarding of roundrobin to all other cpus */
2160static int forward_roundrobin_enabled = 1;
2161SYSCTL_INT(_machdep, OID_AUTO, forward_roundrobin_enabled, CTLFLAG_RW,
2162	   &forward_roundrobin_enabled, 0, "");
2163
2164/*
2165 * This is called once the rest of the system is up and running and we're
2166 * ready to let the AP's out of the pen.
2167 */
2168void ap_init(void);
2169
2170void
2171ap_init()
2172{
2173	u_int	apic_id;
2174
2175	smp_cpus++;
2176
2177#if defined(I586_CPU) && !defined(NO_F00F_HACK)
2178	lidt(&r_idt);
2179#endif
2180
2181	/* Build our map of 'other' CPUs. */
2182	other_cpus = all_cpus & ~(1 << cpuid);
2183
2184	printf("SMP: AP CPU #%d Launched!\n", cpuid);
2185
2186	/* XXX FIXME: i386 specific, and redundant: Setup the FPU. */
2187	load_cr0((rcr0() & ~CR0_EM) | CR0_MP | CR0_NE | CR0_TS);
2188
2189	/* A quick check from sanity claus */
2190	apic_id = (apic_id_to_logical[(lapic.id & 0x0f000000) >> 24]);
2191	if (cpuid != apic_id) {
2192		printf("SMP: cpuid = %d\n", cpuid);
2193		printf("SMP: apic_id = %d\n", apic_id);
2194		printf("PTD[MPPTDI] = %p\n", (void *)PTD[MPPTDI]);
2195		panic("cpuid mismatch! boom!!");
2196	}
2197
2198	/* Init local apic for irq's */
2199	apic_initialize();
2200
2201	/* Set memory range attributes for this CPU to match the BSP */
2202	mem_range_AP_init();
2203
2204	/*
2205	 * Activate smp_invltlb, although strictly speaking, this isn't
2206	 * quite correct yet.  We should have a bitfield for cpus willing
2207	 * to accept TLB flush IPI's or something and sync them.
2208	 */
2209	if (smp_cpus == mp_ncpus) {
2210		invltlb_ok = 1;
2211		smp_started = 1; /* enable IPI's, tlb shootdown, freezes etc */
2212		smp_active = 1;	 /* historic */
2213	}
2214}
2215
2216#ifdef BETTER_CLOCK
2217
2218#define CHECKSTATE_USER	0
2219#define CHECKSTATE_SYS	1
2220#define CHECKSTATE_INTR	2
2221
2222/* Do not staticize.  Used from apic_vector.s */
2223struct proc*	checkstate_curproc[NCPU];
2224int		checkstate_cpustate[NCPU];
2225u_long		checkstate_pc[NCPU];
2226
2227extern long	cp_time[CPUSTATES];
2228
2229#define PC_TO_INDEX(pc, prof)				\
2230        ((int)(((u_quad_t)((pc) - (prof)->pr_off) *	\
2231            (u_quad_t)((prof)->pr_scale)) >> 16) & ~1)
2232
2233static void
2234addupc_intr_forwarded(struct proc *p, int id, int *astmap)
2235{
2236	int i;
2237	struct uprof *prof;
2238	u_long pc;
2239
2240	pc = checkstate_pc[id];
2241	prof = &p->p_stats->p_prof;
2242	if (pc >= prof->pr_off &&
2243	    (i = PC_TO_INDEX(pc, prof)) < prof->pr_size) {
2244		if ((p->p_flag & P_OWEUPC) == 0) {
2245			prof->pr_addr = pc;
2246			prof->pr_ticks = 1;
2247			p->p_flag |= P_OWEUPC;
2248		}
2249		*astmap |= (1 << id);
2250	}
2251}
2252
2253static void
2254forwarded_statclock(int id, int pscnt, int *astmap)
2255{
2256	struct pstats *pstats;
2257	long rss;
2258	struct rusage *ru;
2259	struct vmspace *vm;
2260	int cpustate;
2261	struct proc *p;
2262#ifdef GPROF
2263	register struct gmonparam *g;
2264	int i;
2265#endif
2266
2267	p = checkstate_curproc[id];
2268	cpustate = checkstate_cpustate[id];
2269
2270	switch (cpustate) {
2271	case CHECKSTATE_USER:
2272		if (p->p_flag & P_PROFIL)
2273			addupc_intr_forwarded(p, id, astmap);
2274		if (pscnt > 1)
2275			return;
2276		p->p_uticks++;
2277		if (p->p_nice > NZERO)
2278			cp_time[CP_NICE]++;
2279		else
2280			cp_time[CP_USER]++;
2281		break;
2282	case CHECKSTATE_SYS:
2283#ifdef GPROF
2284		/*
2285		 * Kernel statistics are just like addupc_intr, only easier.
2286		 */
2287		g = &_gmonparam;
2288		if (g->state == GMON_PROF_ON) {
2289			i = checkstate_pc[id] - g->lowpc;
2290			if (i < g->textsize) {
2291				i /= HISTFRACTION * sizeof(*g->kcount);
2292				g->kcount[i]++;
2293			}
2294		}
2295#endif
2296		if (pscnt > 1)
2297			return;
2298
2299		if (!p)
2300			cp_time[CP_IDLE]++;
2301		else {
2302			p->p_sticks++;
2303			cp_time[CP_SYS]++;
2304		}
2305		break;
2306	case CHECKSTATE_INTR:
2307	default:
2308#ifdef GPROF
2309		/*
2310		 * Kernel statistics are just like addupc_intr, only easier.
2311		 */
2312		g = &_gmonparam;
2313		if (g->state == GMON_PROF_ON) {
2314			i = checkstate_pc[id] - g->lowpc;
2315			if (i < g->textsize) {
2316				i /= HISTFRACTION * sizeof(*g->kcount);
2317				g->kcount[i]++;
2318			}
2319		}
2320#endif
2321		if (pscnt > 1)
2322			return;
2323		if (p)
2324			p->p_iticks++;
2325		cp_time[CP_INTR]++;
2326	}
2327	if (p != NULL) {
2328		p->p_cpticks++;
2329		if (++p->p_estcpu == 0)
2330			p->p_estcpu--;
2331		if ((p->p_estcpu & 3) == 0) {
2332			resetpriority(p);
2333			if (p->p_priority >= PUSER)
2334				p->p_priority = p->p_usrpri;
2335		}
2336
2337		/* Update resource usage integrals and maximums. */
2338		if ((pstats = p->p_stats) != NULL &&
2339		    (ru = &pstats->p_ru) != NULL &&
2340		    (vm = p->p_vmspace) != NULL) {
2341			ru->ru_ixrss += pgtok(vm->vm_tsize);
2342			ru->ru_idrss += pgtok(vm->vm_dsize);
2343			ru->ru_isrss += pgtok(vm->vm_ssize);
2344			rss = pgtok(vmspace_resident_count(vm));
2345			if (ru->ru_maxrss < rss)
2346				ru->ru_maxrss = rss;
2347        	}
2348	}
2349}
2350
2351void
2352forward_statclock(int pscnt)
2353{
2354	int map;
2355	int id;
2356	int i;
2357
2358	/* Kludge. We don't yet have separate locks for the interrupts
2359	 * and the kernel. This means that we cannot let the other processors
2360	 * handle complex interrupts while inhibiting them from entering
2361	 * the kernel in a non-interrupt context.
2362	 *
2363	 * What we can do, without changing the locking mechanisms yet,
2364	 * is letting the other processors handle a very simple interrupt
2365	 * (wich determines the processor states), and do the main
2366	 * work ourself.
2367	 */
2368
2369	if (!smp_started || !invltlb_ok || cold || panicstr)
2370		return;
2371
2372	/* Step 1: Probe state   (user, cpu, interrupt, spinlock, idle ) */
2373
2374	map = other_cpus & ~stopped_cpus ;
2375	checkstate_probed_cpus = 0;
2376	if (map != 0)
2377		selected_apic_ipi(map,
2378				  XCPUCHECKSTATE_OFFSET, APIC_DELMODE_FIXED);
2379
2380	i = 0;
2381	while (checkstate_probed_cpus != map) {
2382		/* spin */
2383		i++;
2384		if (i == 100000) {
2385#ifdef BETTER_CLOCK_DIAGNOSTIC
2386			printf("forward_statclock: checkstate %x\n",
2387			       checkstate_probed_cpus);
2388#endif
2389			break;
2390		}
2391	}
2392
2393	/*
2394	 * Step 2: walk through other processors processes, update ticks and
2395	 * profiling info.
2396	 */
2397
2398	map = 0;
2399	for (id = 0; id < mp_ncpus; id++) {
2400		if (id == cpuid)
2401			continue;
2402		if (((1 << id) & checkstate_probed_cpus) == 0)
2403			continue;
2404		forwarded_statclock(id, pscnt, &map);
2405	}
2406	if (map != 0) {
2407		checkstate_need_ast |= map;
2408		selected_apic_ipi(map, XCPUAST_OFFSET, APIC_DELMODE_FIXED);
2409		i = 0;
2410		while ((checkstate_need_ast & map) != 0) {
2411			/* spin */
2412			i++;
2413			if (i > 100000) {
2414#ifdef BETTER_CLOCK_DIAGNOSTIC
2415				printf("forward_statclock: dropped ast 0x%x\n",
2416				       checkstate_need_ast & map);
2417#endif
2418				break;
2419			}
2420		}
2421	}
2422}
2423
2424void
2425forward_hardclock(int pscnt)
2426{
2427	int map;
2428	int id;
2429	struct proc *p;
2430	struct pstats *pstats;
2431	int i;
2432
2433	/* Kludge. We don't yet have separate locks for the interrupts
2434	 * and the kernel. This means that we cannot let the other processors
2435	 * handle complex interrupts while inhibiting them from entering
2436	 * the kernel in a non-interrupt context.
2437	 *
2438	 * What we can do, without changing the locking mechanisms yet,
2439	 * is letting the other processors handle a very simple interrupt
2440	 * (wich determines the processor states), and do the main
2441	 * work ourself.
2442	 */
2443
2444	if (!smp_started || !invltlb_ok || cold || panicstr)
2445		return;
2446
2447	/* Step 1: Probe state   (user, cpu, interrupt, spinlock, idle) */
2448
2449	map = other_cpus & ~stopped_cpus ;
2450	checkstate_probed_cpus = 0;
2451	if (map != 0)
2452		selected_apic_ipi(map,
2453				  XCPUCHECKSTATE_OFFSET, APIC_DELMODE_FIXED);
2454
2455	i = 0;
2456	while (checkstate_probed_cpus != map) {
2457		/* spin */
2458		i++;
2459		if (i == 100000) {
2460#ifdef BETTER_CLOCK_DIAGNOSTIC
2461			printf("forward_hardclock: checkstate %x\n",
2462			       checkstate_probed_cpus);
2463#endif
2464			break;
2465		}
2466	}
2467
2468	/*
2469	 * Step 2: walk through other processors processes, update virtual
2470	 * timer and profiling timer. If stathz == 0, also update ticks and
2471	 * profiling info.
2472	 */
2473
2474	map = 0;
2475	for (id = 0; id < mp_ncpus; id++) {
2476		if (id == cpuid)
2477			continue;
2478		if (((1 << id) & checkstate_probed_cpus) == 0)
2479			continue;
2480		p = checkstate_curproc[id];
2481		if (p) {
2482			pstats = p->p_stats;
2483			if (checkstate_cpustate[id] == CHECKSTATE_USER &&
2484			    timevalisset(&pstats->p_timer[ITIMER_VIRTUAL].it_value) &&
2485			    itimerdecr(&pstats->p_timer[ITIMER_VIRTUAL], tick) == 0) {
2486				psignal(p, SIGVTALRM);
2487				map |= (1 << id);
2488			}
2489			if (timevalisset(&pstats->p_timer[ITIMER_PROF].it_value) &&
2490			    itimerdecr(&pstats->p_timer[ITIMER_PROF], tick) == 0) {
2491				psignal(p, SIGPROF);
2492				map |= (1 << id);
2493			}
2494		}
2495		if (stathz == 0) {
2496			forwarded_statclock( id, pscnt, &map);
2497		}
2498	}
2499	if (map != 0) {
2500		checkstate_need_ast |= map;
2501		selected_apic_ipi(map, XCPUAST_OFFSET, APIC_DELMODE_FIXED);
2502		i = 0;
2503		while ((checkstate_need_ast & map) != 0) {
2504			/* spin */
2505			i++;
2506			if (i > 100000) {
2507#ifdef BETTER_CLOCK_DIAGNOSTIC
2508				printf("forward_hardclock: dropped ast 0x%x\n",
2509				       checkstate_need_ast & map);
2510#endif
2511				break;
2512			}
2513		}
2514	}
2515}
2516
2517#endif /* BETTER_CLOCK */
2518
2519void
2520forward_signal(struct proc *p)
2521{
2522	int map;
2523	int id;
2524	int i;
2525
2526	/* Kludge. We don't yet have separate locks for the interrupts
2527	 * and the kernel. This means that we cannot let the other processors
2528	 * handle complex interrupts while inhibiting them from entering
2529	 * the kernel in a non-interrupt context.
2530	 *
2531	 * What we can do, without changing the locking mechanisms yet,
2532	 * is letting the other processors handle a very simple interrupt
2533	 * (wich determines the processor states), and do the main
2534	 * work ourself.
2535	 */
2536
2537	if (!smp_started || !invltlb_ok || cold || panicstr)
2538		return;
2539	if (!forward_signal_enabled)
2540		return;
2541	while (1) {
2542		if (p->p_stat != SRUN)
2543			return;
2544		id = p->p_oncpu;
2545		if (id == 0xff)
2546			return;
2547		map = (1<<id);
2548		checkstate_need_ast |= map;
2549		selected_apic_ipi(map, XCPUAST_OFFSET, APIC_DELMODE_FIXED);
2550		i = 0;
2551		while ((checkstate_need_ast & map) != 0) {
2552			/* spin */
2553			i++;
2554			if (i > 100000) {
2555#if 0
2556				printf("forward_signal: dropped ast 0x%x\n",
2557				       checkstate_need_ast & map);
2558#endif
2559				break;
2560			}
2561		}
2562		if (id == p->p_oncpu)
2563			return;
2564	}
2565}
2566
2567void
2568forward_roundrobin(void)
2569{
2570	u_int map;
2571	int i;
2572
2573	if (!smp_started || !invltlb_ok || cold || panicstr)
2574		return;
2575	if (!forward_roundrobin_enabled)
2576		return;
2577	resched_cpus |= other_cpus;
2578	map = other_cpus & ~stopped_cpus ;
2579#if 1
2580	selected_apic_ipi(map, XCPUAST_OFFSET, APIC_DELMODE_FIXED);
2581#else
2582	(void) all_but_self_ipi(XCPUAST_OFFSET);
2583#endif
2584	i = 0;
2585	while ((checkstate_need_ast & map) != 0) {
2586		/* spin */
2587		i++;
2588		if (i > 100000) {
2589#if 0
2590			printf("forward_roundrobin: dropped ast 0x%x\n",
2591			       checkstate_need_ast & map);
2592#endif
2593			break;
2594		}
2595	}
2596}
2597
2598
2599#ifdef APIC_INTR_REORDER
2600/*
2601 *	Maintain mapping from softintr vector to isr bit in local apic.
2602 */
2603void
2604set_lapic_isrloc(int intr, int vector)
2605{
2606	if (intr < 0 || intr > 32)
2607		panic("set_apic_isrloc: bad intr argument: %d",intr);
2608	if (vector < ICU_OFFSET || vector > 255)
2609		panic("set_apic_isrloc: bad vector argument: %d",vector);
2610	apic_isrbit_location[intr].location = &lapic.isr0 + ((vector>>5)<<2);
2611	apic_isrbit_location[intr].bit = (1<<(vector & 31));
2612}
2613#endif
2614