mp_machdep.c revision 89410
1/*
2 * Copyright (c) 1996, by Steve Passe
3 * All rights reserved.
4 *
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions
7 * are met:
8 * 1. Redistributions of source code must retain the above copyright
9 *    notice, this list of conditions and the following disclaimer.
10 * 2. The name of the developer may NOT be used to endorse or promote products
11 *    derived from this software without specific prior written permission.
12 *
13 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
14 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
15 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
16 * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
17 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
18 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
19 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
20 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
21 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
22 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
23 * SUCH DAMAGE.
24 *
25 * $FreeBSD: head/sys/amd64/amd64/mp_machdep.c 89410 2002-01-16 00:44:29Z peter $
26 */
27
28#include "opt_cpu.h"
29#include "opt_kstack_pages.h"
30
31#ifdef SMP
32#include <machine/smptests.h>
33#else
34#error
35#endif
36
37#include <sys/param.h>
38#include <sys/systm.h>
39#include <sys/bus.h>
40#include <sys/cons.h>	/* cngetc() */
41#include <sys/dkstat.h>
42#ifdef GPROF
43#include <sys/gmon.h>
44#endif
45#include <sys/kernel.h>
46#include <sys/ktr.h>
47#include <sys/lock.h>
48#include <sys/malloc.h>
49#include <sys/memrange.h>
50#include <sys/mutex.h>
51#include <sys/pcpu.h>
52#include <sys/proc.h>
53#include <sys/smp.h>
54#include <sys/sysctl.h>
55#include <sys/user.h>
56
57#include <vm/vm.h>
58#include <vm/vm_param.h>
59#include <vm/pmap.h>
60#include <vm/vm_kern.h>
61#include <vm/vm_extern.h>
62#include <vm/vm_map.h>
63
64#include <machine/apic.h>
65#include <machine/atomic.h>
66#include <machine/cpu.h>
67#include <machine/cpufunc.h>
68#include <machine/mpapic.h>
69#include <machine/psl.h>
70#include <machine/segments.h>
71#include <machine/smptests.h>	/** TEST_DEFAULT_CONFIG, TEST_TEST1 */
72#include <machine/tss.h>
73#include <machine/specialreg.h>
74#include <machine/privatespace.h>
75
76#if defined(APIC_IO)
77#include <machine/md_var.h>		/* setidt() */
78#include <i386/isa/icu.h>		/* IPIs */
79#include <i386/isa/intr_machdep.h>	/* IPIs */
80#endif	/* APIC_IO */
81
82#if defined(TEST_DEFAULT_CONFIG)
83#define MPFPS_MPFB1	TEST_DEFAULT_CONFIG
84#else
85#define MPFPS_MPFB1	mpfps->mpfb1
86#endif  /* TEST_DEFAULT_CONFIG */
87
88#define WARMBOOT_TARGET		0
89#define WARMBOOT_OFF		(KERNBASE + 0x0467)
90#define WARMBOOT_SEG		(KERNBASE + 0x0469)
91
92#ifdef PC98
93#define BIOS_BASE		(0xe8000)
94#define BIOS_SIZE		(0x18000)
95#else
96#define BIOS_BASE		(0xf0000)
97#define BIOS_SIZE		(0x10000)
98#endif
99#define BIOS_COUNT		(BIOS_SIZE/4)
100
101#define CMOS_REG		(0x70)
102#define CMOS_DATA		(0x71)
103#define BIOS_RESET		(0x0f)
104#define BIOS_WARM		(0x0a)
105
106#define PROCENTRY_FLAG_EN	0x01
107#define PROCENTRY_FLAG_BP	0x02
108#define IOAPICENTRY_FLAG_EN	0x01
109
110
111/* MP Floating Pointer Structure */
112typedef struct MPFPS {
113	char    signature[4];
114	void   *pap;
115	u_char  length;
116	u_char  spec_rev;
117	u_char  checksum;
118	u_char  mpfb1;
119	u_char  mpfb2;
120	u_char  mpfb3;
121	u_char  mpfb4;
122	u_char  mpfb5;
123}      *mpfps_t;
124
125/* MP Configuration Table Header */
126typedef struct MPCTH {
127	char    signature[4];
128	u_short base_table_length;
129	u_char  spec_rev;
130	u_char  checksum;
131	u_char  oem_id[8];
132	u_char  product_id[12];
133	void   *oem_table_pointer;
134	u_short oem_table_size;
135	u_short entry_count;
136	void   *apic_address;
137	u_short extended_table_length;
138	u_char  extended_table_checksum;
139	u_char  reserved;
140}      *mpcth_t;
141
142
143typedef struct PROCENTRY {
144	u_char  type;
145	u_char  apic_id;
146	u_char  apic_version;
147	u_char  cpu_flags;
148	u_long  cpu_signature;
149	u_long  feature_flags;
150	u_long  reserved1;
151	u_long  reserved2;
152}      *proc_entry_ptr;
153
154typedef struct BUSENTRY {
155	u_char  type;
156	u_char  bus_id;
157	char    bus_type[6];
158}      *bus_entry_ptr;
159
160typedef struct IOAPICENTRY {
161	u_char  type;
162	u_char  apic_id;
163	u_char  apic_version;
164	u_char  apic_flags;
165	void   *apic_address;
166}      *io_apic_entry_ptr;
167
168typedef struct INTENTRY {
169	u_char  type;
170	u_char  int_type;
171	u_short int_flags;
172	u_char  src_bus_id;
173	u_char  src_bus_irq;
174	u_char  dst_apic_id;
175	u_char  dst_apic_int;
176}      *int_entry_ptr;
177
178/* descriptions of MP basetable entries */
179typedef struct BASETABLE_ENTRY {
180	u_char  type;
181	u_char  length;
182	char    name[16];
183}       basetable_entry;
184
185/*
186 * this code MUST be enabled here and in mpboot.s.
187 * it follows the very early stages of AP boot by placing values in CMOS ram.
188 * it NORMALLY will never be needed and thus the primitive method for enabling.
189 *
190#define CHECK_POINTS
191 */
192
193#if defined(CHECK_POINTS) && !defined(PC98)
194#define CHECK_READ(A)	 (outb(CMOS_REG, (A)), inb(CMOS_DATA))
195#define CHECK_WRITE(A,D) (outb(CMOS_REG, (A)), outb(CMOS_DATA, (D)))
196
197#define CHECK_INIT(D);				\
198	CHECK_WRITE(0x34, (D));			\
199	CHECK_WRITE(0x35, (D));			\
200	CHECK_WRITE(0x36, (D));			\
201	CHECK_WRITE(0x37, (D));			\
202	CHECK_WRITE(0x38, (D));			\
203	CHECK_WRITE(0x39, (D));
204
205#define CHECK_PRINT(S);				\
206	printf("%s: %d, %d, %d, %d, %d, %d\n",	\
207	   (S),					\
208	   CHECK_READ(0x34),			\
209	   CHECK_READ(0x35),			\
210	   CHECK_READ(0x36),			\
211	   CHECK_READ(0x37),			\
212	   CHECK_READ(0x38),			\
213	   CHECK_READ(0x39));
214
215#else				/* CHECK_POINTS */
216
217#define CHECK_INIT(D)
218#define CHECK_PRINT(S)
219
220#endif				/* CHECK_POINTS */
221
222/*
223 * Values to send to the POST hardware.
224 */
225#define MP_BOOTADDRESS_POST	0x10
226#define MP_PROBE_POST		0x11
227#define MPTABLE_PASS1_POST	0x12
228
229#define MP_START_POST		0x13
230#define MP_ENABLE_POST		0x14
231#define MPTABLE_PASS2_POST	0x15
232
233#define START_ALL_APS_POST	0x16
234#define INSTALL_AP_TRAMP_POST	0x17
235#define START_AP_POST		0x18
236
237#define MP_ANNOUNCE_POST	0x19
238
239/* used to hold the AP's until we are ready to release them */
240static struct mtx ap_boot_mtx;
241
242/** XXX FIXME: where does this really belong, isa.h/isa.c perhaps? */
243int	current_postcode;
244
245/** XXX FIXME: what system files declare these??? */
246extern struct region_descriptor r_gdt, r_idt;
247
248int	bsp_apic_ready = 0;	/* flags useability of BSP apic */
249int	mp_naps;		/* # of Applications processors */
250int	mp_nbusses;		/* # of busses */
251int	mp_napics;		/* # of IO APICs */
252int	boot_cpu_id;		/* designated BSP */
253vm_offset_t cpu_apic_address;
254vm_offset_t io_apic_address[NAPICID];	/* NAPICID is more than enough */
255extern	int nkpt;
256
257u_int32_t cpu_apic_versions[MAXCPU];
258u_int32_t *io_apic_versions;
259
260#ifdef APIC_INTR_REORDER
261struct {
262	volatile int *location;
263	int bit;
264} apic_isrbit_location[32];
265#endif
266
267struct apic_intmapinfo	int_to_apicintpin[APIC_INTMAPSIZE];
268
269/*
270 * APIC ID logical/physical mapping structures.
271 * We oversize these to simplify boot-time config.
272 */
273int     cpu_num_to_apic_id[NAPICID];
274int     io_num_to_apic_id[NAPICID];
275int     apic_id_to_logical[NAPICID];
276
277
278/* AP uses this during bootstrap.  Do not staticize.  */
279char *bootSTK;
280static int bootAP;
281
282/* Hotwire a 0->4MB V==P mapping */
283extern pt_entry_t *KPTphys;
284
285/* SMP page table page */
286extern pt_entry_t *SMPpt;
287
288struct pcb stoppcbs[MAXCPU];
289
290/*
291 * Local data and functions.
292 */
293
294/* Set to 1 once we're ready to let the APs out of the pen. */
295static volatile int aps_ready = 0;
296
297static int	mp_capable;
298static u_int	boot_address;
299static u_int	base_memory;
300
301static int	picmode;		/* 0: virtual wire mode, 1: PIC mode */
302static mpfps_t	mpfps;
303static int	search_for_sig(u_int32_t target, int count);
304static void	mp_enable(u_int boot_addr);
305
306static void	mptable_pass1(void);
307static int	mptable_pass2(void);
308static void	default_mp_table(int type);
309static void	fix_mp_table(void);
310static void	setup_apic_irq_mapping(void);
311static void	init_locks(void);
312static int	start_all_aps(u_int boot_addr);
313static void	install_ap_tramp(u_int boot_addr);
314static int	start_ap(int logicalCpu, u_int boot_addr);
315void		ap_init(void);
316static int	apic_int_is_bus_type(int intr, int bus_type);
317static void	release_aps(void *dummy);
318
319/*
320 * initialize all the SMP locks
321 */
322
323/* lock region used by kernel profiling */
324int	mcount_lock;
325
326#ifdef USE_COMLOCK
327/* locks com (tty) data/hardware accesses: a FASTINTR() */
328struct mtx		com_mtx;
329#endif /* USE_COMLOCK */
330
331static void
332init_locks(void)
333{
334
335#ifdef USE_COMLOCK
336	mtx_init(&com_mtx, "com", MTX_SPIN);
337#endif /* USE_COMLOCK */
338}
339
340/*
341 * Calculate usable address in base memory for AP trampoline code.
342 */
343u_int
344mp_bootaddress(u_int basemem)
345{
346	POSTCODE(MP_BOOTADDRESS_POST);
347
348	base_memory = basemem * 1024;	/* convert to bytes */
349
350	boot_address = base_memory & ~0xfff;	/* round down to 4k boundary */
351	if ((base_memory - boot_address) < bootMP_size)
352		boot_address -= 4096;	/* not enough, lower by 4k */
353
354	return boot_address;
355}
356
357
358/*
359 * Look for an Intel MP spec table (ie, SMP capable hardware).
360 */
361void
362i386_mp_probe(void)
363{
364	int     x;
365	u_long  segment;
366	u_int32_t target;
367
368	POSTCODE(MP_PROBE_POST);
369
370	/* see if EBDA exists */
371	if ((segment = (u_long) * (u_short *) (KERNBASE + 0x40e)) != 0) {
372		/* search first 1K of EBDA */
373		target = (u_int32_t) (segment << 4);
374		if ((x = search_for_sig(target, 1024 / 4)) >= 0)
375			goto found;
376	} else {
377		/* last 1K of base memory, effective 'top of base' passed in */
378		target = (u_int32_t) (base_memory - 0x400);
379		if ((x = search_for_sig(target, 1024 / 4)) >= 0)
380			goto found;
381	}
382
383	/* search the BIOS */
384	target = (u_int32_t) BIOS_BASE;
385	if ((x = search_for_sig(target, BIOS_COUNT)) >= 0)
386		goto found;
387
388	/* nothing found */
389	mpfps = (mpfps_t)0;
390	mp_capable = 0;
391	return;
392
393found:
394	/* calculate needed resources */
395	mpfps = (mpfps_t)x;
396	mptable_pass1();
397
398	/* flag fact that we are running multiple processors */
399	mp_capable = 1;
400}
401
402int
403cpu_mp_probe(void)
404{
405	/*
406	 * Record BSP in CPU map
407	 * This is done here so that MBUF init code works correctly.
408	 */
409	all_cpus = 1;
410
411	return (mp_capable);
412}
413
414/*
415 * Initialize the SMP hardware and the APIC and start up the AP's.
416 */
417void
418cpu_mp_start(void)
419{
420	POSTCODE(MP_START_POST);
421
422	/* look for MP capable motherboard */
423	if (mp_capable)
424		mp_enable(boot_address);
425	else
426		panic("MP hardware not found!");
427
428	cpu_setregs();
429}
430
431
432/*
433 * Print various information about the SMP system hardware and setup.
434 */
435void
436cpu_mp_announce(void)
437{
438	int     x;
439
440	POSTCODE(MP_ANNOUNCE_POST);
441
442	printf(" cpu0 (BSP): apic id: %2d", CPU_TO_ID(0));
443	printf(", version: 0x%08x", cpu_apic_versions[0]);
444	printf(", at 0x%08x\n", cpu_apic_address);
445	for (x = 1; x <= mp_naps; ++x) {
446		printf(" cpu%d (AP):  apic id: %2d", x, CPU_TO_ID(x));
447		printf(", version: 0x%08x", cpu_apic_versions[x]);
448		printf(", at 0x%08x\n", cpu_apic_address);
449	}
450
451#if defined(APIC_IO)
452	for (x = 0; x < mp_napics; ++x) {
453		printf(" io%d (APIC): apic id: %2d", x, IO_TO_ID(x));
454		printf(", version: 0x%08x", io_apic_versions[x]);
455		printf(", at 0x%08x\n", io_apic_address[x]);
456	}
457#else
458	printf(" Warning: APIC I/O disabled\n");
459#endif	/* APIC_IO */
460}
461
462/*
463 * AP cpu's call this to sync up protected mode.
464 */
465void
466init_secondary(void)
467{
468	int	gsel_tss;
469	int	x, myid = bootAP;
470	u_int	cr0;
471
472	gdt_segs[GPRIV_SEL].ssd_base = (int) &SMP_prvspace[myid];
473	gdt_segs[GPROC0_SEL].ssd_base =
474		(int) &SMP_prvspace[myid].pcpu.pc_common_tss;
475	SMP_prvspace[myid].pcpu.pc_prvspace =
476		&SMP_prvspace[myid].pcpu;
477
478	for (x = 0; x < NGDT; x++) {
479		ssdtosd(&gdt_segs[x], &gdt[myid * NGDT + x].sd);
480	}
481
482	r_gdt.rd_limit = NGDT * sizeof(gdt[0]) - 1;
483	r_gdt.rd_base = (int) &gdt[myid * NGDT];
484	lgdt(&r_gdt);			/* does magic intra-segment return */
485
486	lidt(&r_idt);
487
488	lldt(_default_ldt);
489	PCPU_SET(currentldt, _default_ldt);
490
491	gsel_tss = GSEL(GPROC0_SEL, SEL_KPL);
492	gdt[myid * NGDT + GPROC0_SEL].sd.sd_type = SDT_SYS386TSS;
493	PCPU_SET(common_tss.tss_esp0, 0); /* not used until after switch */
494	PCPU_SET(common_tss.tss_ss0, GSEL(GDATA_SEL, SEL_KPL));
495	PCPU_SET(common_tss.tss_ioopt, (sizeof (struct i386tss)) << 16);
496	PCPU_SET(tss_gdt, &gdt[myid * NGDT + GPROC0_SEL].sd);
497	PCPU_SET(common_tssd, *PCPU_GET(tss_gdt));
498	ltr(gsel_tss);
499
500	/*
501	 * Set to a known state:
502	 * Set by mpboot.s: CR0_PG, CR0_PE
503	 * Set by cpu_setregs: CR0_NE, CR0_MP, CR0_TS, CR0_WP, CR0_AM
504	 */
505	cr0 = rcr0();
506	cr0 &= ~(CR0_CD | CR0_NW | CR0_EM);
507	load_cr0(cr0);
508
509	pmap_set_opt();
510}
511
512
513#if defined(APIC_IO)
514/*
515 * Final configuration of the BSP's local APIC:
516 *  - disable 'pic mode'.
517 *  - disable 'virtual wire mode'.
518 *  - enable NMI.
519 */
520void
521bsp_apic_configure(void)
522{
523	u_char		byte;
524	u_int32_t	temp;
525
526	/* leave 'pic mode' if necessary */
527	if (picmode) {
528		outb(0x22, 0x70);	/* select IMCR */
529		byte = inb(0x23);	/* current contents */
530		byte |= 0x01;		/* mask external INTR */
531		outb(0x23, byte);	/* disconnect 8259s/NMI */
532	}
533
534	/* mask lint0 (the 8259 'virtual wire' connection) */
535	temp = lapic.lvt_lint0;
536	temp |= APIC_LVT_M;		/* set the mask */
537	lapic.lvt_lint0 = temp;
538
539        /* setup lint1 to handle NMI */
540        temp = lapic.lvt_lint1;
541        temp &= ~APIC_LVT_M;		/* clear the mask */
542        lapic.lvt_lint1 = temp;
543
544	if (bootverbose)
545		apic_dump("bsp_apic_configure()");
546}
547#endif  /* APIC_IO */
548
549
550/*******************************************************************
551 * local functions and data
552 */
553
554/*
555 * start the SMP system
556 */
557static void
558mp_enable(u_int boot_addr)
559{
560	int     x;
561#if defined(APIC_IO)
562	int     apic;
563	u_int   ux;
564#endif	/* APIC_IO */
565
566	POSTCODE(MP_ENABLE_POST);
567
568	/* turn on 4MB of V == P addressing so we can get to MP table */
569	*(int *)PTD = PG_V | PG_RW | ((uintptr_t)(void *)KPTphys & PG_FRAME);
570	invltlb();
571
572	/* examine the MP table for needed info, uses physical addresses */
573	x = mptable_pass2();
574
575	*(int *)PTD = 0;
576	invltlb();
577
578	/* can't process default configs till the CPU APIC is pmapped */
579	if (x)
580		default_mp_table(x);
581
582	/* post scan cleanup */
583	fix_mp_table();
584	setup_apic_irq_mapping();
585
586#if defined(APIC_IO)
587
588	/* fill the LOGICAL io_apic_versions table */
589	for (apic = 0; apic < mp_napics; ++apic) {
590		ux = io_apic_read(apic, IOAPIC_VER);
591		io_apic_versions[apic] = ux;
592		io_apic_set_id(apic, IO_TO_ID(apic));
593	}
594
595	/* program each IO APIC in the system */
596	for (apic = 0; apic < mp_napics; ++apic)
597		if (io_apic_setup(apic) < 0)
598			panic("IO APIC setup failure");
599
600	/* install a 'Spurious INTerrupt' vector */
601	setidt(XSPURIOUSINT_OFFSET, Xspuriousint,
602	       SDT_SYS386IGT, SEL_KPL, GSEL(GCODE_SEL, SEL_KPL));
603
604	/* install an inter-CPU IPI for TLB invalidation */
605	setidt(XINVLTLB_OFFSET, Xinvltlb,
606	       SDT_SYS386IGT, SEL_KPL, GSEL(GCODE_SEL, SEL_KPL));
607
608	/* install an inter-CPU IPI for forwarding hardclock() */
609	setidt(XHARDCLOCK_OFFSET, Xhardclock,
610	       SDT_SYS386IGT, SEL_KPL, GSEL(GCODE_SEL, SEL_KPL));
611
612	/* install an inter-CPU IPI for forwarding statclock() */
613	setidt(XSTATCLOCK_OFFSET, Xstatclock,
614	       SDT_SYS386IGT, SEL_KPL, GSEL(GCODE_SEL, SEL_KPL));
615
616	/* install an inter-CPU IPI for all-CPU rendezvous */
617	setidt(XRENDEZVOUS_OFFSET, Xrendezvous,
618	       SDT_SYS386IGT, SEL_KPL, GSEL(GCODE_SEL, SEL_KPL));
619
620	/* install an inter-CPU IPI for forcing an additional software trap */
621	setidt(XCPUAST_OFFSET, Xcpuast,
622	       SDT_SYS386IGT, SEL_KPL, GSEL(GCODE_SEL, SEL_KPL));
623
624	/* install an inter-CPU IPI for CPU stop/restart */
625	setidt(XCPUSTOP_OFFSET, Xcpustop,
626	       SDT_SYS386IGT, SEL_KPL, GSEL(GCODE_SEL, SEL_KPL));
627
628#if defined(TEST_TEST1)
629	/* install a "fake hardware INTerrupt" vector */
630	setidt(XTEST1_OFFSET, Xtest1,
631	       SDT_SYS386IGT, SEL_KPL, GSEL(GCODE_SEL, SEL_KPL));
632#endif  /** TEST_TEST1 */
633
634#endif	/* APIC_IO */
635
636	/* initialize all SMP locks */
637	init_locks();
638
639	/* start each Application Processor */
640	start_all_aps(boot_addr);
641}
642
643
644/*
645 * look for the MP spec signature
646 */
647
648/* string defined by the Intel MP Spec as identifying the MP table */
649#define MP_SIG		0x5f504d5f	/* _MP_ */
650#define NEXT(X)		((X) += 4)
651static int
652search_for_sig(u_int32_t target, int count)
653{
654	int     x;
655	u_int32_t *addr = (u_int32_t *) (KERNBASE + target);
656
657	for (x = 0; x < count; NEXT(x))
658		if (addr[x] == MP_SIG)
659			/* make array index a byte index */
660			return (target + (x * sizeof(u_int32_t)));
661
662	return -1;
663}
664
665
666static basetable_entry basetable_entry_types[] =
667{
668	{0, 20, "Processor"},
669	{1, 8, "Bus"},
670	{2, 8, "I/O APIC"},
671	{3, 8, "I/O INT"},
672	{4, 8, "Local INT"}
673};
674
675typedef struct BUSDATA {
676	u_char  bus_id;
677	enum busTypes bus_type;
678}       bus_datum;
679
680typedef struct INTDATA {
681	u_char  int_type;
682	u_short int_flags;
683	u_char  src_bus_id;
684	u_char  src_bus_irq;
685	u_char  dst_apic_id;
686	u_char  dst_apic_int;
687	u_char	int_vector;
688}       io_int, local_int;
689
690typedef struct BUSTYPENAME {
691	u_char  type;
692	char    name[7];
693}       bus_type_name;
694
695static bus_type_name bus_type_table[] =
696{
697	{CBUS, "CBUS"},
698	{CBUSII, "CBUSII"},
699	{EISA, "EISA"},
700	{MCA, "MCA"},
701	{UNKNOWN_BUSTYPE, "---"},
702	{ISA, "ISA"},
703	{MCA, "MCA"},
704	{UNKNOWN_BUSTYPE, "---"},
705	{UNKNOWN_BUSTYPE, "---"},
706	{UNKNOWN_BUSTYPE, "---"},
707	{UNKNOWN_BUSTYPE, "---"},
708	{UNKNOWN_BUSTYPE, "---"},
709	{PCI, "PCI"},
710	{UNKNOWN_BUSTYPE, "---"},
711	{UNKNOWN_BUSTYPE, "---"},
712	{UNKNOWN_BUSTYPE, "---"},
713	{UNKNOWN_BUSTYPE, "---"},
714	{XPRESS, "XPRESS"},
715	{UNKNOWN_BUSTYPE, "---"}
716};
717/* from MP spec v1.4, table 5-1 */
718static int default_data[7][5] =
719{
720/*   nbus, id0, type0, id1, type1 */
721	{1, 0, ISA, 255, 255},
722	{1, 0, EISA, 255, 255},
723	{1, 0, EISA, 255, 255},
724	{1, 0, MCA, 255, 255},
725	{2, 0, ISA, 1, PCI},
726	{2, 0, EISA, 1, PCI},
727	{2, 0, MCA, 1, PCI}
728};
729
730
731/* the bus data */
732static bus_datum *bus_data;
733
734/* the IO INT data, one entry per possible APIC INTerrupt */
735static io_int  *io_apic_ints;
736
737static int nintrs;
738
739static int processor_entry	__P((proc_entry_ptr entry, int cpu));
740static int bus_entry		__P((bus_entry_ptr entry, int bus));
741static int io_apic_entry	__P((io_apic_entry_ptr entry, int apic));
742static int int_entry		__P((int_entry_ptr entry, int intr));
743static int lookup_bus_type	__P((char *name));
744
745
746/*
747 * 1st pass on motherboard's Intel MP specification table.
748 *
749 * initializes:
750 *	mp_ncpus = 1
751 *
752 * determines:
753 *	cpu_apic_address (common to all CPUs)
754 *	io_apic_address[N]
755 *	mp_naps
756 *	mp_nbusses
757 *	mp_napics
758 *	nintrs
759 */
760static void
761mptable_pass1(void)
762{
763	int	x;
764	mpcth_t	cth;
765	int	totalSize;
766	void*	position;
767	int	count;
768	int	type;
769
770	POSTCODE(MPTABLE_PASS1_POST);
771
772	/* clear various tables */
773	for (x = 0; x < NAPICID; ++x) {
774		io_apic_address[x] = ~0;	/* IO APIC address table */
775	}
776
777	/* init everything to empty */
778	mp_naps = 0;
779	mp_nbusses = 0;
780	mp_napics = 0;
781	nintrs = 0;
782
783	/* check for use of 'default' configuration */
784	if (MPFPS_MPFB1 != 0) {
785		/* use default addresses */
786		cpu_apic_address = DEFAULT_APIC_BASE;
787		io_apic_address[0] = DEFAULT_IO_APIC_BASE;
788
789		/* fill in with defaults */
790		mp_naps = 2;		/* includes BSP */
791		mp_nbusses = default_data[MPFPS_MPFB1 - 1][0];
792#if defined(APIC_IO)
793		mp_napics = 1;
794		nintrs = 16;
795#endif	/* APIC_IO */
796	}
797	else {
798		if ((cth = mpfps->pap) == 0)
799			panic("MP Configuration Table Header MISSING!");
800
801		cpu_apic_address = (vm_offset_t) cth->apic_address;
802
803		/* walk the table, recording info of interest */
804		totalSize = cth->base_table_length - sizeof(struct MPCTH);
805		position = (u_char *) cth + sizeof(struct MPCTH);
806		count = cth->entry_count;
807
808		while (count--) {
809			switch (type = *(u_char *) position) {
810			case 0: /* processor_entry */
811				if (((proc_entry_ptr)position)->cpu_flags
812					& PROCENTRY_FLAG_EN)
813					++mp_naps;
814				break;
815			case 1: /* bus_entry */
816				++mp_nbusses;
817				break;
818			case 2: /* io_apic_entry */
819				if (((io_apic_entry_ptr)position)->apic_flags
820					& IOAPICENTRY_FLAG_EN)
821					io_apic_address[mp_napics++] =
822					    (vm_offset_t)((io_apic_entry_ptr)
823						position)->apic_address;
824				break;
825			case 3: /* int_entry */
826				++nintrs;
827				break;
828			case 4:	/* int_entry */
829				break;
830			default:
831				panic("mpfps Base Table HOSED!");
832				/* NOTREACHED */
833			}
834
835			totalSize -= basetable_entry_types[type].length;
836			(u_char*)position += basetable_entry_types[type].length;
837		}
838	}
839
840	/* qualify the numbers */
841	if (mp_naps > MAXCPU) {
842		printf("Warning: only using %d of %d available CPUs!\n",
843			MAXCPU, mp_naps);
844		mp_naps = MAXCPU;
845	}
846
847	/*
848	 * Count the BSP.
849	 * This is also used as a counter while starting the APs.
850	 */
851	mp_ncpus = 1;
852
853	--mp_naps;	/* subtract the BSP */
854}
855
856
857/*
858 * 2nd pass on motherboard's Intel MP specification table.
859 *
860 * sets:
861 *	boot_cpu_id
862 *	ID_TO_IO(N), phy APIC ID to log CPU/IO table
863 *	CPU_TO_ID(N), logical CPU to APIC ID table
864 *	IO_TO_ID(N), logical IO to APIC ID table
865 *	bus_data[N]
866 *	io_apic_ints[N]
867 */
868static int
869mptable_pass2(void)
870{
871	int     x;
872	mpcth_t cth;
873	int     totalSize;
874	void*   position;
875	int     count;
876	int     type;
877	int     apic, bus, cpu, intr;
878	int	i, j;
879	int	pgeflag;
880
881	POSTCODE(MPTABLE_PASS2_POST);
882
883	pgeflag = 0;		/* XXX - Not used under SMP yet.  */
884
885	MALLOC(io_apic_versions, u_int32_t *, sizeof(u_int32_t) * mp_napics,
886	    M_DEVBUF, M_WAITOK);
887	MALLOC(ioapic, volatile ioapic_t **, sizeof(ioapic_t *) * mp_napics,
888	    M_DEVBUF, M_WAITOK);
889	MALLOC(io_apic_ints, io_int *, sizeof(io_int) * (nintrs + 1),
890	    M_DEVBUF, M_WAITOK);
891	MALLOC(bus_data, bus_datum *, sizeof(bus_datum) * mp_nbusses,
892	    M_DEVBUF, M_WAITOK);
893
894	bzero(ioapic, sizeof(ioapic_t *) * mp_napics);
895
896	for (i = 0; i < mp_napics; i++) {
897		for (j = 0; j < mp_napics; j++) {
898			/* same page frame as a previous IO apic? */
899			if (((vm_offset_t)SMPpt[NPTEPG-2-j] & PG_FRAME) ==
900			    (io_apic_address[i] & PG_FRAME)) {
901				ioapic[i] = (ioapic_t *)((u_int)SMP_prvspace
902					+ (NPTEPG-2-j) * PAGE_SIZE
903					+ (io_apic_address[i] & PAGE_MASK));
904				break;
905			}
906			/* use this slot if available */
907			if (((vm_offset_t)SMPpt[NPTEPG-2-j] & PG_FRAME) == 0) {
908				SMPpt[NPTEPG-2-j] = (pt_entry_t)(PG_V | PG_RW |
909				    pgeflag | (io_apic_address[i] & PG_FRAME));
910				ioapic[i] = (ioapic_t *)((u_int)SMP_prvspace
911					+ (NPTEPG-2-j) * PAGE_SIZE
912					+ (io_apic_address[i] & PAGE_MASK));
913				break;
914			}
915		}
916	}
917
918	/* clear various tables */
919	for (x = 0; x < NAPICID; ++x) {
920		ID_TO_IO(x) = -1;	/* phy APIC ID to log CPU/IO table */
921		CPU_TO_ID(x) = -1;	/* logical CPU to APIC ID table */
922		IO_TO_ID(x) = -1;	/* logical IO to APIC ID table */
923	}
924
925	/* clear bus data table */
926	for (x = 0; x < mp_nbusses; ++x)
927		bus_data[x].bus_id = 0xff;
928
929	/* clear IO APIC INT table */
930	for (x = 0; x < (nintrs + 1); ++x) {
931		io_apic_ints[x].int_type = 0xff;
932		io_apic_ints[x].int_vector = 0xff;
933	}
934
935	/* setup the cpu/apic mapping arrays */
936	boot_cpu_id = -1;
937
938	/* record whether PIC or virtual-wire mode */
939	picmode = (mpfps->mpfb2 & 0x80) ? 1 : 0;
940
941	/* check for use of 'default' configuration */
942	if (MPFPS_MPFB1 != 0)
943		return MPFPS_MPFB1;	/* return default configuration type */
944
945	if ((cth = mpfps->pap) == 0)
946		panic("MP Configuration Table Header MISSING!");
947
948	/* walk the table, recording info of interest */
949	totalSize = cth->base_table_length - sizeof(struct MPCTH);
950	position = (u_char *) cth + sizeof(struct MPCTH);
951	count = cth->entry_count;
952	apic = bus = intr = 0;
953	cpu = 1;				/* pre-count the BSP */
954
955	while (count--) {
956		switch (type = *(u_char *) position) {
957		case 0:
958			if (processor_entry(position, cpu))
959				++cpu;
960			break;
961		case 1:
962			if (bus_entry(position, bus))
963				++bus;
964			break;
965		case 2:
966			if (io_apic_entry(position, apic))
967				++apic;
968			break;
969		case 3:
970			if (int_entry(position, intr))
971				++intr;
972			break;
973		case 4:
974			/* int_entry(position); */
975			break;
976		default:
977			panic("mpfps Base Table HOSED!");
978			/* NOTREACHED */
979		}
980
981		totalSize -= basetable_entry_types[type].length;
982		(u_char *) position += basetable_entry_types[type].length;
983	}
984
985	if (boot_cpu_id == -1)
986		panic("NO BSP found!");
987
988	/* report fact that its NOT a default configuration */
989	return 0;
990}
991
992
993void
994assign_apic_irq(int apic, int intpin, int irq)
995{
996	int x;
997
998	if (int_to_apicintpin[irq].ioapic != -1)
999		panic("assign_apic_irq: inconsistent table");
1000
1001	int_to_apicintpin[irq].ioapic = apic;
1002	int_to_apicintpin[irq].int_pin = intpin;
1003	int_to_apicintpin[irq].apic_address = ioapic[apic];
1004	int_to_apicintpin[irq].redirindex = IOAPIC_REDTBL + 2 * intpin;
1005
1006	for (x = 0; x < nintrs; x++) {
1007		if ((io_apic_ints[x].int_type == 0 ||
1008		     io_apic_ints[x].int_type == 3) &&
1009		    io_apic_ints[x].int_vector == 0xff &&
1010		    io_apic_ints[x].dst_apic_id == IO_TO_ID(apic) &&
1011		    io_apic_ints[x].dst_apic_int == intpin)
1012			io_apic_ints[x].int_vector = irq;
1013	}
1014}
1015
1016void
1017revoke_apic_irq(int irq)
1018{
1019	int x;
1020	int oldapic;
1021	int oldintpin;
1022
1023	if (int_to_apicintpin[irq].ioapic == -1)
1024		panic("assign_apic_irq: inconsistent table");
1025
1026	oldapic = int_to_apicintpin[irq].ioapic;
1027	oldintpin = int_to_apicintpin[irq].int_pin;
1028
1029	int_to_apicintpin[irq].ioapic = -1;
1030	int_to_apicintpin[irq].int_pin = 0;
1031	int_to_apicintpin[irq].apic_address = NULL;
1032	int_to_apicintpin[irq].redirindex = 0;
1033
1034	for (x = 0; x < nintrs; x++) {
1035		if ((io_apic_ints[x].int_type == 0 ||
1036		     io_apic_ints[x].int_type == 3) &&
1037		    io_apic_ints[x].int_vector == 0xff &&
1038		    io_apic_ints[x].dst_apic_id == IO_TO_ID(oldapic) &&
1039		    io_apic_ints[x].dst_apic_int == oldintpin)
1040			io_apic_ints[x].int_vector = 0xff;
1041	}
1042}
1043
1044
1045static void
1046allocate_apic_irq(int intr)
1047{
1048	int apic;
1049	int intpin;
1050	int irq;
1051
1052	if (io_apic_ints[intr].int_vector != 0xff)
1053		return;		/* Interrupt handler already assigned */
1054
1055	if (io_apic_ints[intr].int_type != 0 &&
1056	    (io_apic_ints[intr].int_type != 3 ||
1057	     (io_apic_ints[intr].dst_apic_id == IO_TO_ID(0) &&
1058	      io_apic_ints[intr].dst_apic_int == 0)))
1059		return;		/* Not INT or ExtInt on != (0, 0) */
1060
1061	irq = 0;
1062	while (irq < APIC_INTMAPSIZE &&
1063	       int_to_apicintpin[irq].ioapic != -1)
1064		irq++;
1065
1066	if (irq >= APIC_INTMAPSIZE)
1067		return;		/* No free interrupt handlers */
1068
1069	apic = ID_TO_IO(io_apic_ints[intr].dst_apic_id);
1070	intpin = io_apic_ints[intr].dst_apic_int;
1071
1072	assign_apic_irq(apic, intpin, irq);
1073	io_apic_setup_intpin(apic, intpin);
1074}
1075
1076
1077static void
1078swap_apic_id(int apic, int oldid, int newid)
1079{
1080	int x;
1081	int oapic;
1082
1083
1084	if (oldid == newid)
1085		return;			/* Nothing to do */
1086
1087	printf("Changing APIC ID for IO APIC #%d from %d to %d in MP table\n",
1088	       apic, oldid, newid);
1089
1090	/* Swap physical APIC IDs in interrupt entries */
1091	for (x = 0; x < nintrs; x++) {
1092		if (io_apic_ints[x].dst_apic_id == oldid)
1093			io_apic_ints[x].dst_apic_id = newid;
1094		else if (io_apic_ints[x].dst_apic_id == newid)
1095			io_apic_ints[x].dst_apic_id = oldid;
1096	}
1097
1098	/* Swap physical APIC IDs in IO_TO_ID mappings */
1099	for (oapic = 0; oapic < mp_napics; oapic++)
1100		if (IO_TO_ID(oapic) == newid)
1101			break;
1102
1103	if (oapic < mp_napics) {
1104		printf("Changing APIC ID for IO APIC #%d from "
1105		       "%d to %d in MP table\n",
1106		       oapic, newid, oldid);
1107		IO_TO_ID(oapic) = oldid;
1108	}
1109	IO_TO_ID(apic) = newid;
1110}
1111
1112
1113static void
1114fix_id_to_io_mapping(void)
1115{
1116	int x;
1117
1118	for (x = 0; x < NAPICID; x++)
1119		ID_TO_IO(x) = -1;
1120
1121	for (x = 0; x <= mp_naps; x++)
1122		if (CPU_TO_ID(x) < NAPICID)
1123			ID_TO_IO(CPU_TO_ID(x)) = x;
1124
1125	for (x = 0; x < mp_napics; x++)
1126		if (IO_TO_ID(x) < NAPICID)
1127			ID_TO_IO(IO_TO_ID(x)) = x;
1128}
1129
1130
1131static int
1132first_free_apic_id(void)
1133{
1134	int freeid, x;
1135
1136	for (freeid = 0; freeid < NAPICID; freeid++) {
1137		for (x = 0; x <= mp_naps; x++)
1138			if (CPU_TO_ID(x) == freeid)
1139				break;
1140		if (x <= mp_naps)
1141			continue;
1142		for (x = 0; x < mp_napics; x++)
1143			if (IO_TO_ID(x) == freeid)
1144				break;
1145		if (x < mp_napics)
1146			continue;
1147		return freeid;
1148	}
1149	return freeid;
1150}
1151
1152
1153static int
1154io_apic_id_acceptable(int apic, int id)
1155{
1156	int cpu;		/* Logical CPU number */
1157	int oapic;		/* Logical IO APIC number for other IO APIC */
1158
1159	if (id >= NAPICID)
1160		return 0;	/* Out of range */
1161
1162	for (cpu = 0; cpu <= mp_naps; cpu++)
1163		if (CPU_TO_ID(cpu) == id)
1164			return 0;	/* Conflict with CPU */
1165
1166	for (oapic = 0; oapic < mp_napics && oapic < apic; oapic++)
1167		if (IO_TO_ID(oapic) == id)
1168			return 0;	/* Conflict with other APIC */
1169
1170	return 1;		/* ID is acceptable for IO APIC */
1171}
1172
1173
1174/*
1175 * parse an Intel MP specification table
1176 */
1177static void
1178fix_mp_table(void)
1179{
1180	int	x;
1181	int	id;
1182	int	bus_0 = 0;	/* Stop GCC warning */
1183	int	bus_pci = 0;	/* Stop GCC warning */
1184	int	num_pci_bus;
1185	int	apic;		/* IO APIC unit number */
1186	int     freeid;		/* Free physical APIC ID */
1187	int	physid;		/* Current physical IO APIC ID */
1188
1189	/*
1190	 * Fix mis-numbering of the PCI bus and its INT entries if the BIOS
1191	 * did it wrong.  The MP spec says that when more than 1 PCI bus
1192	 * exists the BIOS must begin with bus entries for the PCI bus and use
1193	 * actual PCI bus numbering.  This implies that when only 1 PCI bus
1194	 * exists the BIOS can choose to ignore this ordering, and indeed many
1195	 * MP motherboards do ignore it.  This causes a problem when the PCI
1196	 * sub-system makes requests of the MP sub-system based on PCI bus
1197	 * numbers.	So here we look for the situation and renumber the
1198	 * busses and associated INTs in an effort to "make it right".
1199	 */
1200
1201	/* find bus 0, PCI bus, count the number of PCI busses */
1202	for (num_pci_bus = 0, x = 0; x < mp_nbusses; ++x) {
1203		if (bus_data[x].bus_id == 0) {
1204			bus_0 = x;
1205		}
1206		if (bus_data[x].bus_type == PCI) {
1207			++num_pci_bus;
1208			bus_pci = x;
1209		}
1210	}
1211	/*
1212	 * bus_0 == slot of bus with ID of 0
1213	 * bus_pci == slot of last PCI bus encountered
1214	 */
1215
1216	/* check the 1 PCI bus case for sanity */
1217	/* if it is number 0 all is well */
1218	if (num_pci_bus == 1 &&
1219	    bus_data[bus_pci].bus_id != 0) {
1220
1221		/* mis-numbered, swap with whichever bus uses slot 0 */
1222
1223		/* swap the bus entry types */
1224		bus_data[bus_pci].bus_type = bus_data[bus_0].bus_type;
1225		bus_data[bus_0].bus_type = PCI;
1226
1227		/* swap each relavant INTerrupt entry */
1228		id = bus_data[bus_pci].bus_id;
1229		for (x = 0; x < nintrs; ++x) {
1230			if (io_apic_ints[x].src_bus_id == id) {
1231				io_apic_ints[x].src_bus_id = 0;
1232			}
1233			else if (io_apic_ints[x].src_bus_id == 0) {
1234				io_apic_ints[x].src_bus_id = id;
1235			}
1236		}
1237	}
1238
1239	/* Assign IO APIC IDs.
1240	 *
1241	 * First try the existing ID. If a conflict is detected, try
1242	 * the ID in the MP table.  If a conflict is still detected, find
1243	 * a free id.
1244	 *
1245	 * We cannot use the ID_TO_IO table before all conflicts has been
1246	 * resolved and the table has been corrected.
1247	 */
1248	for (apic = 0; apic < mp_napics; ++apic) { /* For all IO APICs */
1249
1250		/* First try to use the value set by the BIOS */
1251		physid = io_apic_get_id(apic);
1252		if (io_apic_id_acceptable(apic, physid)) {
1253			if (IO_TO_ID(apic) != physid)
1254				swap_apic_id(apic, IO_TO_ID(apic), physid);
1255			continue;
1256		}
1257
1258		/* Then check if the value in the MP table is acceptable */
1259		if (io_apic_id_acceptable(apic, IO_TO_ID(apic)))
1260			continue;
1261
1262		/* Last resort, find a free APIC ID and use it */
1263		freeid = first_free_apic_id();
1264		if (freeid >= NAPICID)
1265			panic("No free physical APIC IDs found");
1266
1267		if (io_apic_id_acceptable(apic, freeid)) {
1268			swap_apic_id(apic, IO_TO_ID(apic), freeid);
1269			continue;
1270		}
1271		panic("Free physical APIC ID not usable");
1272	}
1273	fix_id_to_io_mapping();
1274
1275	/* detect and fix broken Compaq MP table */
1276	if (apic_int_type(0, 0) == -1) {
1277		printf("APIC_IO: MP table broken: 8259->APIC entry missing!\n");
1278		io_apic_ints[nintrs].int_type = 3;	/* ExtInt */
1279		io_apic_ints[nintrs].int_vector = 0xff;	/* Unassigned */
1280		/* XXX fixme, set src bus id etc, but it doesn't seem to hurt */
1281		io_apic_ints[nintrs].dst_apic_id = IO_TO_ID(0);
1282		io_apic_ints[nintrs].dst_apic_int = 0;	/* Pin 0 */
1283		nintrs++;
1284	}
1285}
1286
1287
1288/* Assign low level interrupt handlers */
1289static void
1290setup_apic_irq_mapping(void)
1291{
1292	int	x;
1293	int	int_vector;
1294
1295	/* Clear array */
1296	for (x = 0; x < APIC_INTMAPSIZE; x++) {
1297		int_to_apicintpin[x].ioapic = -1;
1298		int_to_apicintpin[x].int_pin = 0;
1299		int_to_apicintpin[x].apic_address = NULL;
1300		int_to_apicintpin[x].redirindex = 0;
1301	}
1302
1303	/* First assign ISA/EISA interrupts */
1304	for (x = 0; x < nintrs; x++) {
1305		int_vector = io_apic_ints[x].src_bus_irq;
1306		if (int_vector < APIC_INTMAPSIZE &&
1307		    io_apic_ints[x].int_vector == 0xff &&
1308		    int_to_apicintpin[int_vector].ioapic == -1 &&
1309		    (apic_int_is_bus_type(x, ISA) ||
1310		     apic_int_is_bus_type(x, EISA)) &&
1311		    io_apic_ints[x].int_type == 0) {
1312			assign_apic_irq(ID_TO_IO(io_apic_ints[x].dst_apic_id),
1313					io_apic_ints[x].dst_apic_int,
1314					int_vector);
1315		}
1316	}
1317
1318	/* Assign ExtInt entry if no ISA/EISA interrupt 0 entry */
1319	for (x = 0; x < nintrs; x++) {
1320		if (io_apic_ints[x].dst_apic_int == 0 &&
1321		    io_apic_ints[x].dst_apic_id == IO_TO_ID(0) &&
1322		    io_apic_ints[x].int_vector == 0xff &&
1323		    int_to_apicintpin[0].ioapic == -1 &&
1324		    io_apic_ints[x].int_type == 3) {
1325			assign_apic_irq(0, 0, 0);
1326			break;
1327		}
1328	}
1329	/* PCI interrupt assignment is deferred */
1330}
1331
1332
1333static int
1334processor_entry(proc_entry_ptr entry, int cpu)
1335{
1336	/* check for usability */
1337	if (!(entry->cpu_flags & PROCENTRY_FLAG_EN))
1338		return 0;
1339
1340	if(entry->apic_id >= NAPICID)
1341		panic("CPU APIC ID out of range (0..%d)", NAPICID - 1);
1342	/* check for BSP flag */
1343	if (entry->cpu_flags & PROCENTRY_FLAG_BP) {
1344		boot_cpu_id = entry->apic_id;
1345		CPU_TO_ID(0) = entry->apic_id;
1346		ID_TO_CPU(entry->apic_id) = 0;
1347		return 0;	/* its already been counted */
1348	}
1349
1350	/* add another AP to list, if less than max number of CPUs */
1351	else if (cpu < MAXCPU) {
1352		CPU_TO_ID(cpu) = entry->apic_id;
1353		ID_TO_CPU(entry->apic_id) = cpu;
1354		return 1;
1355	}
1356
1357	return 0;
1358}
1359
1360
1361static int
1362bus_entry(bus_entry_ptr entry, int bus)
1363{
1364	int     x;
1365	char    c, name[8];
1366
1367	/* encode the name into an index */
1368	for (x = 0; x < 6; ++x) {
1369		if ((c = entry->bus_type[x]) == ' ')
1370			break;
1371		name[x] = c;
1372	}
1373	name[x] = '\0';
1374
1375	if ((x = lookup_bus_type(name)) == UNKNOWN_BUSTYPE)
1376		panic("unknown bus type: '%s'", name);
1377
1378	bus_data[bus].bus_id = entry->bus_id;
1379	bus_data[bus].bus_type = x;
1380
1381	return 1;
1382}
1383
1384
1385static int
1386io_apic_entry(io_apic_entry_ptr entry, int apic)
1387{
1388	if (!(entry->apic_flags & IOAPICENTRY_FLAG_EN))
1389		return 0;
1390
1391	IO_TO_ID(apic) = entry->apic_id;
1392	if (entry->apic_id < NAPICID)
1393		ID_TO_IO(entry->apic_id) = apic;
1394
1395	return 1;
1396}
1397
1398
1399static int
1400lookup_bus_type(char *name)
1401{
1402	int     x;
1403
1404	for (x = 0; x < MAX_BUSTYPE; ++x)
1405		if (strcmp(bus_type_table[x].name, name) == 0)
1406			return bus_type_table[x].type;
1407
1408	return UNKNOWN_BUSTYPE;
1409}
1410
1411
1412static int
1413int_entry(int_entry_ptr entry, int intr)
1414{
1415	int apic;
1416
1417	io_apic_ints[intr].int_type = entry->int_type;
1418	io_apic_ints[intr].int_flags = entry->int_flags;
1419	io_apic_ints[intr].src_bus_id = entry->src_bus_id;
1420	io_apic_ints[intr].src_bus_irq = entry->src_bus_irq;
1421	if (entry->dst_apic_id == 255) {
1422		/* This signal goes to all IO APICS.  Select an IO APIC
1423		   with sufficient number of interrupt pins */
1424		for (apic = 0; apic < mp_napics; apic++)
1425			if (((io_apic_read(apic, IOAPIC_VER) &
1426			      IOART_VER_MAXREDIR) >> MAXREDIRSHIFT) >=
1427			    entry->dst_apic_int)
1428				break;
1429		if (apic < mp_napics)
1430			io_apic_ints[intr].dst_apic_id = IO_TO_ID(apic);
1431		else
1432			io_apic_ints[intr].dst_apic_id = entry->dst_apic_id;
1433	} else
1434		io_apic_ints[intr].dst_apic_id = entry->dst_apic_id;
1435	io_apic_ints[intr].dst_apic_int = entry->dst_apic_int;
1436
1437	return 1;
1438}
1439
1440
1441static int
1442apic_int_is_bus_type(int intr, int bus_type)
1443{
1444	int     bus;
1445
1446	for (bus = 0; bus < mp_nbusses; ++bus)
1447		if ((bus_data[bus].bus_id == io_apic_ints[intr].src_bus_id)
1448		    && ((int) bus_data[bus].bus_type == bus_type))
1449			return 1;
1450
1451	return 0;
1452}
1453
1454
1455/*
1456 * Given a traditional ISA INT mask, return an APIC mask.
1457 */
1458u_int
1459isa_apic_mask(u_int isa_mask)
1460{
1461	int isa_irq;
1462	int apic_pin;
1463
1464#if defined(SKIP_IRQ15_REDIRECT)
1465	if (isa_mask == (1 << 15)) {
1466		printf("skipping ISA IRQ15 redirect\n");
1467		return isa_mask;
1468	}
1469#endif  /* SKIP_IRQ15_REDIRECT */
1470
1471	isa_irq = ffs(isa_mask);		/* find its bit position */
1472	if (isa_irq == 0)			/* doesn't exist */
1473		return 0;
1474	--isa_irq;				/* make it zero based */
1475
1476	apic_pin = isa_apic_irq(isa_irq);	/* look for APIC connection */
1477	if (apic_pin == -1)
1478		return 0;
1479
1480	return (1 << apic_pin);			/* convert pin# to a mask */
1481}
1482
1483
1484/*
1485 * Determine which APIC pin an ISA/EISA INT is attached to.
1486 */
1487#define INTTYPE(I)	(io_apic_ints[(I)].int_type)
1488#define INTPIN(I)	(io_apic_ints[(I)].dst_apic_int)
1489#define INTIRQ(I)	(io_apic_ints[(I)].int_vector)
1490#define INTAPIC(I)	(ID_TO_IO(io_apic_ints[(I)].dst_apic_id))
1491
1492#define SRCBUSIRQ(I)	(io_apic_ints[(I)].src_bus_irq)
1493int
1494isa_apic_irq(int isa_irq)
1495{
1496	int     intr;
1497
1498	for (intr = 0; intr < nintrs; ++intr) {		/* check each record */
1499		if (INTTYPE(intr) == 0) {		/* standard INT */
1500			if (SRCBUSIRQ(intr) == isa_irq) {
1501				if (apic_int_is_bus_type(intr, ISA) ||
1502			            apic_int_is_bus_type(intr, EISA)) {
1503					if (INTIRQ(intr) == 0xff)
1504						return -1; /* unassigned */
1505					return INTIRQ(intr);	/* found */
1506				}
1507			}
1508		}
1509	}
1510	return -1;					/* NOT found */
1511}
1512
1513
1514/*
1515 * Determine which APIC pin a PCI INT is attached to.
1516 */
1517#define SRCBUSID(I)	(io_apic_ints[(I)].src_bus_id)
1518#define SRCBUSDEVICE(I)	((io_apic_ints[(I)].src_bus_irq >> 2) & 0x1f)
1519#define SRCBUSLINE(I)	(io_apic_ints[(I)].src_bus_irq & 0x03)
1520int
1521pci_apic_irq(int pciBus, int pciDevice, int pciInt)
1522{
1523	int     intr;
1524
1525	--pciInt;					/* zero based */
1526
1527	for (intr = 0; intr < nintrs; ++intr)		/* check each record */
1528		if ((INTTYPE(intr) == 0)		/* standard INT */
1529		    && (SRCBUSID(intr) == pciBus)
1530		    && (SRCBUSDEVICE(intr) == pciDevice)
1531		    && (SRCBUSLINE(intr) == pciInt))	/* a candidate IRQ */
1532			if (apic_int_is_bus_type(intr, PCI)) {
1533				if (INTIRQ(intr) == 0xff)
1534					allocate_apic_irq(intr);
1535				if (INTIRQ(intr) == 0xff)
1536					return -1;	/* unassigned */
1537				return INTIRQ(intr);	/* exact match */
1538			}
1539
1540	return -1;					/* NOT found */
1541}
1542
1543int
1544next_apic_irq(int irq)
1545{
1546	int intr, ointr;
1547	int bus, bustype;
1548
1549	bus = 0;
1550	bustype = 0;
1551	for (intr = 0; intr < nintrs; intr++) {
1552		if (INTIRQ(intr) != irq || INTTYPE(intr) != 0)
1553			continue;
1554		bus = SRCBUSID(intr);
1555		bustype = apic_bus_type(bus);
1556		if (bustype != ISA &&
1557		    bustype != EISA &&
1558		    bustype != PCI)
1559			continue;
1560		break;
1561	}
1562	if (intr >= nintrs) {
1563		return -1;
1564	}
1565	for (ointr = intr + 1; ointr < nintrs; ointr++) {
1566		if (INTTYPE(ointr) != 0)
1567			continue;
1568		if (bus != SRCBUSID(ointr))
1569			continue;
1570		if (bustype == PCI) {
1571			if (SRCBUSDEVICE(intr) != SRCBUSDEVICE(ointr))
1572				continue;
1573			if (SRCBUSLINE(intr) != SRCBUSLINE(ointr))
1574				continue;
1575		}
1576		if (bustype == ISA || bustype == EISA) {
1577			if (SRCBUSIRQ(intr) != SRCBUSIRQ(ointr))
1578				continue;
1579		}
1580		if (INTPIN(intr) == INTPIN(ointr))
1581			continue;
1582		break;
1583	}
1584	if (ointr >= nintrs) {
1585		return -1;
1586	}
1587	return INTIRQ(ointr);
1588}
1589#undef SRCBUSLINE
1590#undef SRCBUSDEVICE
1591#undef SRCBUSID
1592#undef SRCBUSIRQ
1593
1594#undef INTPIN
1595#undef INTIRQ
1596#undef INTAPIC
1597#undef INTTYPE
1598
1599
1600/*
1601 * Reprogram the MB chipset to NOT redirect an ISA INTerrupt.
1602 *
1603 * XXX FIXME:
1604 *  Exactly what this means is unclear at this point.  It is a solution
1605 *  for motherboards that redirect the MBIRQ0 pin.  Generically a motherboard
1606 *  could route any of the ISA INTs to upper (>15) IRQ values.  But most would
1607 *  NOT be redirected via MBIRQ0, thus "undirect()ing" them would NOT be an
1608 *  option.
1609 */
1610int
1611undirect_isa_irq(int rirq)
1612{
1613#if defined(READY)
1614	if (bootverbose)
1615	    printf("Freeing redirected ISA irq %d.\n", rirq);
1616	/** FIXME: tickle the MB redirector chip */
1617	return -1;
1618#else
1619	if (bootverbose)
1620	    printf("Freeing (NOT implemented) redirected ISA irq %d.\n", rirq);
1621	return 0;
1622#endif  /* READY */
1623}
1624
1625
1626/*
1627 * Reprogram the MB chipset to NOT redirect a PCI INTerrupt
1628 */
1629int
1630undirect_pci_irq(int rirq)
1631{
1632#if defined(READY)
1633	if (bootverbose)
1634		printf("Freeing redirected PCI irq %d.\n", rirq);
1635
1636	/** FIXME: tickle the MB redirector chip */
1637	return -1;
1638#else
1639	if (bootverbose)
1640		printf("Freeing (NOT implemented) redirected PCI irq %d.\n",
1641		       rirq);
1642	return 0;
1643#endif  /* READY */
1644}
1645
1646
1647/*
1648 * given a bus ID, return:
1649 *  the bus type if found
1650 *  -1 if NOT found
1651 */
1652int
1653apic_bus_type(int id)
1654{
1655	int     x;
1656
1657	for (x = 0; x < mp_nbusses; ++x)
1658		if (bus_data[x].bus_id == id)
1659			return bus_data[x].bus_type;
1660
1661	return -1;
1662}
1663
1664
1665/*
1666 * given a LOGICAL APIC# and pin#, return:
1667 *  the associated src bus ID if found
1668 *  -1 if NOT found
1669 */
1670int
1671apic_src_bus_id(int apic, int pin)
1672{
1673	int     x;
1674
1675	/* search each of the possible INTerrupt sources */
1676	for (x = 0; x < nintrs; ++x)
1677		if ((apic == ID_TO_IO(io_apic_ints[x].dst_apic_id)) &&
1678		    (pin == io_apic_ints[x].dst_apic_int))
1679			return (io_apic_ints[x].src_bus_id);
1680
1681	return -1;		/* NOT found */
1682}
1683
1684
1685/*
1686 * given a LOGICAL APIC# and pin#, return:
1687 *  the associated src bus IRQ if found
1688 *  -1 if NOT found
1689 */
1690int
1691apic_src_bus_irq(int apic, int pin)
1692{
1693	int     x;
1694
1695	for (x = 0; x < nintrs; x++)
1696		if ((apic == ID_TO_IO(io_apic_ints[x].dst_apic_id)) &&
1697		    (pin == io_apic_ints[x].dst_apic_int))
1698			return (io_apic_ints[x].src_bus_irq);
1699
1700	return -1;		/* NOT found */
1701}
1702
1703
1704/*
1705 * given a LOGICAL APIC# and pin#, return:
1706 *  the associated INTerrupt type if found
1707 *  -1 if NOT found
1708 */
1709int
1710apic_int_type(int apic, int pin)
1711{
1712	int     x;
1713
1714	/* search each of the possible INTerrupt sources */
1715	for (x = 0; x < nintrs; ++x)
1716		if ((apic == ID_TO_IO(io_apic_ints[x].dst_apic_id)) &&
1717		    (pin == io_apic_ints[x].dst_apic_int))
1718			return (io_apic_ints[x].int_type);
1719
1720	return -1;		/* NOT found */
1721}
1722
1723int
1724apic_irq(int apic, int pin)
1725{
1726	int x;
1727	int res;
1728
1729	for (x = 0; x < nintrs; ++x)
1730		if ((apic == ID_TO_IO(io_apic_ints[x].dst_apic_id)) &&
1731		    (pin == io_apic_ints[x].dst_apic_int)) {
1732			res = io_apic_ints[x].int_vector;
1733			if (res == 0xff)
1734				return -1;
1735			if (apic != int_to_apicintpin[res].ioapic)
1736				panic("apic_irq: inconsistent table");
1737			if (pin != int_to_apicintpin[res].int_pin)
1738				panic("apic_irq inconsistent table (2)");
1739			return res;
1740		}
1741	return -1;
1742}
1743
1744
1745/*
1746 * given a LOGICAL APIC# and pin#, return:
1747 *  the associated trigger mode if found
1748 *  -1 if NOT found
1749 */
1750int
1751apic_trigger(int apic, int pin)
1752{
1753	int     x;
1754
1755	/* search each of the possible INTerrupt sources */
1756	for (x = 0; x < nintrs; ++x)
1757		if ((apic == ID_TO_IO(io_apic_ints[x].dst_apic_id)) &&
1758		    (pin == io_apic_ints[x].dst_apic_int))
1759			return ((io_apic_ints[x].int_flags >> 2) & 0x03);
1760
1761	return -1;		/* NOT found */
1762}
1763
1764
1765/*
1766 * given a LOGICAL APIC# and pin#, return:
1767 *  the associated 'active' level if found
1768 *  -1 if NOT found
1769 */
1770int
1771apic_polarity(int apic, int pin)
1772{
1773	int     x;
1774
1775	/* search each of the possible INTerrupt sources */
1776	for (x = 0; x < nintrs; ++x)
1777		if ((apic == ID_TO_IO(io_apic_ints[x].dst_apic_id)) &&
1778		    (pin == io_apic_ints[x].dst_apic_int))
1779			return (io_apic_ints[x].int_flags & 0x03);
1780
1781	return -1;		/* NOT found */
1782}
1783
1784
1785/*
1786 * set data according to MP defaults
1787 * FIXME: probably not complete yet...
1788 */
1789static void
1790default_mp_table(int type)
1791{
1792	int     ap_cpu_id;
1793#if defined(APIC_IO)
1794	int     io_apic_id;
1795	int     pin;
1796#endif	/* APIC_IO */
1797
1798#if 0
1799	printf("  MP default config type: %d\n", type);
1800	switch (type) {
1801	case 1:
1802		printf("   bus: ISA, APIC: 82489DX\n");
1803		break;
1804	case 2:
1805		printf("   bus: EISA, APIC: 82489DX\n");
1806		break;
1807	case 3:
1808		printf("   bus: EISA, APIC: 82489DX\n");
1809		break;
1810	case 4:
1811		printf("   bus: MCA, APIC: 82489DX\n");
1812		break;
1813	case 5:
1814		printf("   bus: ISA+PCI, APIC: Integrated\n");
1815		break;
1816	case 6:
1817		printf("   bus: EISA+PCI, APIC: Integrated\n");
1818		break;
1819	case 7:
1820		printf("   bus: MCA+PCI, APIC: Integrated\n");
1821		break;
1822	default:
1823		printf("   future type\n");
1824		break;
1825		/* NOTREACHED */
1826	}
1827#endif	/* 0 */
1828
1829	boot_cpu_id = (lapic.id & APIC_ID_MASK) >> 24;
1830	ap_cpu_id = (boot_cpu_id == 0) ? 1 : 0;
1831
1832	/* BSP */
1833	CPU_TO_ID(0) = boot_cpu_id;
1834	ID_TO_CPU(boot_cpu_id) = 0;
1835
1836	/* one and only AP */
1837	CPU_TO_ID(1) = ap_cpu_id;
1838	ID_TO_CPU(ap_cpu_id) = 1;
1839
1840#if defined(APIC_IO)
1841	/* one and only IO APIC */
1842	io_apic_id = (io_apic_read(0, IOAPIC_ID) & APIC_ID_MASK) >> 24;
1843
1844	/*
1845	 * sanity check, refer to MP spec section 3.6.6, last paragraph
1846	 * necessary as some hardware isn't properly setting up the IO APIC
1847	 */
1848#if defined(REALLY_ANAL_IOAPICID_VALUE)
1849	if (io_apic_id != 2) {
1850#else
1851	if ((io_apic_id == 0) || (io_apic_id == 1) || (io_apic_id == 15)) {
1852#endif	/* REALLY_ANAL_IOAPICID_VALUE */
1853		io_apic_set_id(0, 2);
1854		io_apic_id = 2;
1855	}
1856	IO_TO_ID(0) = io_apic_id;
1857	ID_TO_IO(io_apic_id) = 0;
1858#endif	/* APIC_IO */
1859
1860	/* fill out bus entries */
1861	switch (type) {
1862	case 1:
1863	case 2:
1864	case 3:
1865	case 4:
1866	case 5:
1867	case 6:
1868	case 7:
1869		bus_data[0].bus_id = default_data[type - 1][1];
1870		bus_data[0].bus_type = default_data[type - 1][2];
1871		bus_data[1].bus_id = default_data[type - 1][3];
1872		bus_data[1].bus_type = default_data[type - 1][4];
1873		break;
1874
1875	/* case 4: case 7:		   MCA NOT supported */
1876	default:		/* illegal/reserved */
1877		panic("BAD default MP config: %d", type);
1878		/* NOTREACHED */
1879	}
1880
1881#if defined(APIC_IO)
1882	/* general cases from MP v1.4, table 5-2 */
1883	for (pin = 0; pin < 16; ++pin) {
1884		io_apic_ints[pin].int_type = 0;
1885		io_apic_ints[pin].int_flags = 0x05;	/* edge/active-hi */
1886		io_apic_ints[pin].src_bus_id = 0;
1887		io_apic_ints[pin].src_bus_irq = pin;	/* IRQ2 caught below */
1888		io_apic_ints[pin].dst_apic_id = io_apic_id;
1889		io_apic_ints[pin].dst_apic_int = pin;	/* 1-to-1 */
1890	}
1891
1892	/* special cases from MP v1.4, table 5-2 */
1893	if (type == 2) {
1894		io_apic_ints[2].int_type = 0xff;	/* N/C */
1895		io_apic_ints[13].int_type = 0xff;	/* N/C */
1896#if !defined(APIC_MIXED_MODE)
1897		/** FIXME: ??? */
1898		panic("sorry, can't support type 2 default yet");
1899#endif	/* APIC_MIXED_MODE */
1900	}
1901	else
1902		io_apic_ints[2].src_bus_irq = 0;	/* ISA IRQ0 is on APIC INT 2 */
1903
1904	if (type == 7)
1905		io_apic_ints[0].int_type = 0xff;	/* N/C */
1906	else
1907		io_apic_ints[0].int_type = 3;	/* vectored 8259 */
1908#endif	/* APIC_IO */
1909}
1910
1911
1912/*
1913 * start each AP in our list
1914 */
1915static int
1916start_all_aps(u_int boot_addr)
1917{
1918	int     x, i, pg;
1919	u_char  mpbiosreason;
1920	u_long  mpbioswarmvec;
1921	struct pcpu *pc;
1922	char *stack;
1923	uintptr_t kptbase;
1924
1925	POSTCODE(START_ALL_APS_POST);
1926
1927	mtx_init(&ap_boot_mtx, "ap boot", MTX_SPIN);
1928
1929	/* initialize BSP's local APIC */
1930	apic_initialize();
1931	bsp_apic_ready = 1;
1932
1933	/* install the AP 1st level boot code */
1934	install_ap_tramp(boot_addr);
1935
1936
1937	/* save the current value of the warm-start vector */
1938	mpbioswarmvec = *((u_long *) WARMBOOT_OFF);
1939#ifndef PC98
1940	outb(CMOS_REG, BIOS_RESET);
1941	mpbiosreason = inb(CMOS_DATA);
1942#endif
1943
1944	/* set up temporary P==V mapping for AP boot */
1945	/* XXX this is a hack, we should boot the AP on its own stack/PTD */
1946	kptbase = (uintptr_t)(void *)KPTphys;
1947	for (x = 0; x < NKPT; x++)
1948		PTD[x] = (pd_entry_t)(PG_V | PG_RW |
1949		    ((kptbase + x * PAGE_SIZE) & PG_FRAME));
1950	invltlb();
1951
1952	/* start each AP */
1953	for (x = 1; x <= mp_naps; ++x) {
1954
1955		/* This is a bit verbose, it will go away soon.  */
1956
1957		/* first page of AP's private space */
1958		pg = x * i386_btop(sizeof(struct privatespace));
1959
1960		/* allocate a new private data page */
1961		pc = (struct pcpu *)kmem_alloc(kernel_map, PAGE_SIZE);
1962
1963		/* wire it into the private page table page */
1964		SMPpt[pg] = (pt_entry_t)(PG_V | PG_RW | vtophys(pc));
1965
1966		/* allocate and set up an idle stack data page */
1967		stack = (char *)kmem_alloc(kernel_map, KSTACK_PAGES * PAGE_SIZE); /* XXXKSE */
1968		for (i = 0; i < KSTACK_PAGES; i++)
1969			SMPpt[pg + 1 + i] = (pt_entry_t)
1970			    (PG_V | PG_RW | vtophys(PAGE_SIZE * i + stack));
1971
1972		/* prime data page for it to use */
1973		pcpu_init(pc, x, sizeof(struct pcpu));
1974
1975		/* setup a vector to our boot code */
1976		*((volatile u_short *) WARMBOOT_OFF) = WARMBOOT_TARGET;
1977		*((volatile u_short *) WARMBOOT_SEG) = (boot_addr >> 4);
1978#ifndef PC98
1979		outb(CMOS_REG, BIOS_RESET);
1980		outb(CMOS_DATA, BIOS_WARM);	/* 'warm-start' */
1981#endif
1982
1983		bootSTK = &SMP_prvspace[x].idlekstack[KSTACK_PAGES * PAGE_SIZE];
1984		bootAP = x;
1985
1986		/* attempt to start the Application Processor */
1987		CHECK_INIT(99);	/* setup checkpoints */
1988		if (!start_ap(x, boot_addr)) {
1989			printf("AP #%d (PHY# %d) failed!\n", x, CPU_TO_ID(x));
1990			CHECK_PRINT("trace");	/* show checkpoints */
1991			/* better panic as the AP may be running loose */
1992			printf("panic y/n? [y] ");
1993			if (cngetc() != 'n')
1994				panic("bye-bye");
1995		}
1996		CHECK_PRINT("trace");		/* show checkpoints */
1997
1998		/* record its version info */
1999		cpu_apic_versions[x] = cpu_apic_versions[0];
2000
2001		all_cpus |= (1 << x);		/* record AP in CPU map */
2002	}
2003
2004	/* build our map of 'other' CPUs */
2005	PCPU_SET(other_cpus, all_cpus & ~PCPU_GET(cpumask));
2006
2007	/* fill in our (BSP) APIC version */
2008	cpu_apic_versions[0] = lapic.version;
2009
2010	/* restore the warmstart vector */
2011	*(u_long *) WARMBOOT_OFF = mpbioswarmvec;
2012#ifndef PC98
2013	outb(CMOS_REG, BIOS_RESET);
2014	outb(CMOS_DATA, mpbiosreason);
2015#endif
2016
2017	/*
2018	 * Set up the idle context for the BSP.  Similar to above except
2019	 * that some was done by locore, some by pmap.c and some is implicit
2020	 * because the BSP is cpu#0 and the page is initially zero, and also
2021	 * because we can refer to variables by name on the BSP..
2022	 */
2023
2024	/* Allocate and setup BSP idle stack */
2025	stack = (char *)kmem_alloc(kernel_map, KSTACK_PAGES * PAGE_SIZE);
2026	for (i = 0; i < KSTACK_PAGES; i++)
2027		SMPpt[1 + i] = (pt_entry_t)
2028		    (PG_V | PG_RW | vtophys(PAGE_SIZE * i + stack));
2029
2030	for (x = 0; x < NKPT; x++)
2031		PTD[x] = 0;
2032	pmap_set_opt();
2033
2034	/* number of APs actually started */
2035	return mp_ncpus - 1;
2036}
2037
2038
2039/*
2040 * load the 1st level AP boot code into base memory.
2041 */
2042
2043/* targets for relocation */
2044extern void bigJump(void);
2045extern void bootCodeSeg(void);
2046extern void bootDataSeg(void);
2047extern void MPentry(void);
2048extern u_int MP_GDT;
2049extern u_int mp_gdtbase;
2050
2051static void
2052install_ap_tramp(u_int boot_addr)
2053{
2054	int     x;
2055	int     size = *(int *) ((u_long) & bootMP_size);
2056	u_char *src = (u_char *) ((u_long) bootMP);
2057	u_char *dst = (u_char *) boot_addr + KERNBASE;
2058	u_int   boot_base = (u_int) bootMP;
2059	u_int8_t *dst8;
2060	u_int16_t *dst16;
2061	u_int32_t *dst32;
2062
2063	POSTCODE(INSTALL_AP_TRAMP_POST);
2064
2065	for (x = 0; x < size; ++x)
2066		*dst++ = *src++;
2067
2068	/*
2069	 * modify addresses in code we just moved to basemem. unfortunately we
2070	 * need fairly detailed info about mpboot.s for this to work.  changes
2071	 * to mpboot.s might require changes here.
2072	 */
2073
2074	/* boot code is located in KERNEL space */
2075	dst = (u_char *) boot_addr + KERNBASE;
2076
2077	/* modify the lgdt arg */
2078	dst32 = (u_int32_t *) (dst + ((u_int) & mp_gdtbase - boot_base));
2079	*dst32 = boot_addr + ((u_int) & MP_GDT - boot_base);
2080
2081	/* modify the ljmp target for MPentry() */
2082	dst32 = (u_int32_t *) (dst + ((u_int) bigJump - boot_base) + 1);
2083	*dst32 = ((u_int) MPentry - KERNBASE);
2084
2085	/* modify the target for boot code segment */
2086	dst16 = (u_int16_t *) (dst + ((u_int) bootCodeSeg - boot_base));
2087	dst8 = (u_int8_t *) (dst16 + 1);
2088	*dst16 = (u_int) boot_addr & 0xffff;
2089	*dst8 = ((u_int) boot_addr >> 16) & 0xff;
2090
2091	/* modify the target for boot data segment */
2092	dst16 = (u_int16_t *) (dst + ((u_int) bootDataSeg - boot_base));
2093	dst8 = (u_int8_t *) (dst16 + 1);
2094	*dst16 = (u_int) boot_addr & 0xffff;
2095	*dst8 = ((u_int) boot_addr >> 16) & 0xff;
2096}
2097
2098
2099/*
2100 * this function starts the AP (application processor) identified
2101 * by the APIC ID 'physicalCpu'.  It does quite a "song and dance"
2102 * to accomplish this.  This is necessary because of the nuances
2103 * of the different hardware we might encounter.  It ain't pretty,
2104 * but it seems to work.
2105 */
2106static int
2107start_ap(int logical_cpu, u_int boot_addr)
2108{
2109	int     physical_cpu;
2110	int     vector;
2111	int     cpus;
2112	u_long  icr_lo, icr_hi;
2113
2114	POSTCODE(START_AP_POST);
2115
2116	/* get the PHYSICAL APIC ID# */
2117	physical_cpu = CPU_TO_ID(logical_cpu);
2118
2119	/* calculate the vector */
2120	vector = (boot_addr >> 12) & 0xff;
2121
2122	/* used as a watchpoint to signal AP startup */
2123	cpus = mp_ncpus;
2124
2125	/*
2126	 * first we do an INIT/RESET IPI this INIT IPI might be run, reseting
2127	 * and running the target CPU. OR this INIT IPI might be latched (P5
2128	 * bug), CPU waiting for STARTUP IPI. OR this INIT IPI might be
2129	 * ignored.
2130	 */
2131
2132	/* setup the address for the target AP */
2133	icr_hi = lapic.icr_hi & ~APIC_ID_MASK;
2134	icr_hi |= (physical_cpu << 24);
2135	lapic.icr_hi = icr_hi;
2136
2137	/* do an INIT IPI: assert RESET */
2138	icr_lo = lapic.icr_lo & 0xfff00000;
2139	lapic.icr_lo = icr_lo | 0x0000c500;
2140
2141	/* wait for pending status end */
2142	while (lapic.icr_lo & APIC_DELSTAT_MASK)
2143		 /* spin */ ;
2144
2145	/* do an INIT IPI: deassert RESET */
2146	lapic.icr_lo = icr_lo | 0x00008500;
2147
2148	/* wait for pending status end */
2149	u_sleep(10000);		/* wait ~10mS */
2150	while (lapic.icr_lo & APIC_DELSTAT_MASK)
2151		 /* spin */ ;
2152
2153	/*
2154	 * next we do a STARTUP IPI: the previous INIT IPI might still be
2155	 * latched, (P5 bug) this 1st STARTUP would then terminate
2156	 * immediately, and the previously started INIT IPI would continue. OR
2157	 * the previous INIT IPI has already run. and this STARTUP IPI will
2158	 * run. OR the previous INIT IPI was ignored. and this STARTUP IPI
2159	 * will run.
2160	 */
2161
2162	/* do a STARTUP IPI */
2163	lapic.icr_lo = icr_lo | 0x00000600 | vector;
2164	while (lapic.icr_lo & APIC_DELSTAT_MASK)
2165		 /* spin */ ;
2166	u_sleep(200);		/* wait ~200uS */
2167
2168	/*
2169	 * finally we do a 2nd STARTUP IPI: this 2nd STARTUP IPI should run IF
2170	 * the previous STARTUP IPI was cancelled by a latched INIT IPI. OR
2171	 * this STARTUP IPI will be ignored, as only ONE STARTUP IPI is
2172	 * recognized after hardware RESET or INIT IPI.
2173	 */
2174
2175	lapic.icr_lo = icr_lo | 0x00000600 | vector;
2176	while (lapic.icr_lo & APIC_DELSTAT_MASK)
2177		 /* spin */ ;
2178	u_sleep(200);		/* wait ~200uS */
2179
2180	/* wait for it to start */
2181	set_apic_timer(5000000);/* == 5 seconds */
2182	while (read_apic_timer())
2183		if (mp_ncpus > cpus)
2184			return 1;	/* return SUCCESS */
2185
2186	return 0;		/* return FAILURE */
2187}
2188
2189/*
2190 * Flush the TLB on all other CPU's
2191 *
2192 * XXX: Needs to handshake and wait for completion before proceding.
2193 */
2194void
2195smp_invltlb(void)
2196{
2197#if defined(APIC_IO)
2198	if (smp_started)
2199		ipi_all_but_self(IPI_INVLTLB);
2200#endif  /* APIC_IO */
2201}
2202
2203void
2204invlpg(u_int addr)
2205{
2206	__asm   __volatile("invlpg (%0)"::"r"(addr):"memory");
2207
2208	/* send a message to the other CPUs */
2209	smp_invltlb();
2210}
2211
2212void
2213invltlb(void)
2214{
2215	u_long  temp;
2216
2217	/*
2218	 * This should be implemented as load_cr3(rcr3()) when load_cr3() is
2219	 * inlined.
2220	 */
2221	__asm __volatile("movl %%cr3, %0; movl %0, %%cr3":"=r"(temp) :: "memory");
2222
2223	/* send a message to the other CPUs */
2224	smp_invltlb();
2225}
2226
2227
2228/*
2229 * This is called once the rest of the system is up and running and we're
2230 * ready to let the AP's out of the pen.
2231 */
2232extern void	enable_sse(void);
2233
2234void
2235ap_init(void)
2236{
2237	u_int	apic_id;
2238
2239	/* spin until all the AP's are ready */
2240	while (!aps_ready)
2241		/* spin */ ;
2242
2243	/* BSP may have changed PTD while we were waiting */
2244	cpu_invltlb();
2245
2246#if defined(I586_CPU) && !defined(NO_F00F_HACK)
2247	lidt(&r_idt);
2248#endif
2249
2250	/* set up CPU registers and state */
2251	cpu_setregs();
2252
2253	/* set up FPU state on the AP */
2254	npxinit(__INITIAL_NPXCW__);
2255
2256	/* set up SSE registers */
2257	enable_sse();
2258
2259	/* A quick check from sanity claus */
2260	apic_id = (apic_id_to_logical[(lapic.id & 0x0f000000) >> 24]);
2261	if (PCPU_GET(cpuid) != apic_id) {
2262		printf("SMP: cpuid = %d\n", PCPU_GET(cpuid));
2263		printf("SMP: apic_id = %d\n", apic_id);
2264		printf("PTD[MPPTDI] = %p\n", (void *)PTD[MPPTDI]);
2265		panic("cpuid mismatch! boom!!");
2266	}
2267
2268	/* Init local apic for irq's */
2269	apic_initialize();
2270
2271	/* Set memory range attributes for this CPU to match the BSP */
2272	mem_range_AP_init();
2273
2274	mtx_lock_spin(&ap_boot_mtx);
2275
2276	CTR1(KTR_SMP, "SMP: AP CPU #%d Launched", PCPU_GET(cpuid));
2277
2278	smp_cpus++;
2279
2280	/* Build our map of 'other' CPUs. */
2281	PCPU_SET(other_cpus, all_cpus & ~PCPU_GET(cpumask));
2282
2283	printf("SMP: AP CPU #%d Launched!\n", PCPU_GET(cpuid));
2284
2285	if (smp_cpus == mp_ncpus) {
2286		smp_started = 1; /* enable IPI's, tlb shootdown, freezes etc */
2287		smp_active = 1;	 /* historic */
2288	}
2289
2290	mtx_unlock_spin(&ap_boot_mtx);
2291
2292	/* wait until all the AP's are up */
2293	while (smp_started == 0)
2294		; /* nothing */
2295
2296	microuptime(PCPU_PTR(switchtime));
2297	PCPU_SET(switchticks, ticks);
2298
2299	/* ok, now grab sched_lock and enter the scheduler */
2300	mtx_lock_spin(&sched_lock);
2301	cpu_throw();	/* doesn't return */
2302
2303	panic("scheduler returned us to %s", __func__);
2304}
2305
2306/*
2307 * For statclock, we send an IPI to all CPU's to have them call this
2308 * function.
2309 */
2310void
2311forwarded_statclock(struct trapframe frame)
2312{
2313
2314	mtx_lock_spin(&sched_lock);
2315	statclock_process(curthread->td_kse, TRAPF_PC(&frame), TRAPF_USERMODE(&frame));
2316	mtx_unlock_spin(&sched_lock);
2317}
2318
2319void
2320forward_statclock(void)
2321{
2322	int map;
2323
2324	CTR0(KTR_SMP, "forward_statclock");
2325
2326	if (!smp_started || cold || panicstr)
2327		return;
2328
2329	map = PCPU_GET(other_cpus) & ~stopped_cpus ;
2330	if (map != 0)
2331		ipi_selected(map, IPI_STATCLOCK);
2332}
2333
2334/*
2335 * For each hardclock(), we send an IPI to all other CPU's to have them
2336 * execute this function.  It would be nice to reduce contention on
2337 * sched_lock if we could simply peek at the CPU to determine the user/kernel
2338 * state and call hardclock_process() on the CPU receiving the clock interrupt
2339 * and then just use a simple IPI to handle any ast's if needed.
2340 */
2341void
2342forwarded_hardclock(struct trapframe frame)
2343{
2344
2345	mtx_lock_spin(&sched_lock);
2346	hardclock_process(curthread, TRAPF_USERMODE(&frame));
2347	mtx_unlock_spin(&sched_lock);
2348}
2349
2350void
2351forward_hardclock(void)
2352{
2353	u_int map;
2354
2355	CTR0(KTR_SMP, "forward_hardclock");
2356
2357	if (!smp_started || cold || panicstr)
2358		return;
2359
2360	map = PCPU_GET(other_cpus) & ~stopped_cpus ;
2361	if (map != 0)
2362		ipi_selected(map, IPI_HARDCLOCK);
2363}
2364
2365#ifdef APIC_INTR_REORDER
2366/*
2367 *	Maintain mapping from softintr vector to isr bit in local apic.
2368 */
2369void
2370set_lapic_isrloc(int intr, int vector)
2371{
2372	if (intr < 0 || intr > 32)
2373		panic("set_apic_isrloc: bad intr argument: %d",intr);
2374	if (vector < ICU_OFFSET || vector > 255)
2375		panic("set_apic_isrloc: bad vector argument: %d",vector);
2376	apic_isrbit_location[intr].location = &lapic.isr0 + ((vector>>5)<<2);
2377	apic_isrbit_location[intr].bit = (1<<(vector & 31));
2378}
2379#endif
2380
2381/*
2382 * send an IPI to a set of cpus.
2383 */
2384void
2385ipi_selected(u_int32_t cpus, u_int ipi)
2386{
2387
2388	CTR2(KTR_SMP, __func__ ": cpus: %x ipi: %x", cpus, ipi);
2389	selected_apic_ipi(cpus, ipi, APIC_DELMODE_FIXED);
2390}
2391
2392/*
2393 * send an IPI INTerrupt containing 'vector' to all CPUs, including myself
2394 */
2395void
2396ipi_all(u_int ipi)
2397{
2398
2399	CTR1(KTR_SMP, __func__ ": ipi: %x", ipi);
2400	apic_ipi(APIC_DEST_ALLISELF, ipi, APIC_DELMODE_FIXED);
2401}
2402
2403/*
2404 * send an IPI to all CPUs EXCEPT myself
2405 */
2406void
2407ipi_all_but_self(u_int ipi)
2408{
2409
2410	CTR1(KTR_SMP, __func__ ": ipi: %x", ipi);
2411	apic_ipi(APIC_DEST_ALLESELF, ipi, APIC_DELMODE_FIXED);
2412}
2413
2414/*
2415 * send an IPI to myself
2416 */
2417void
2418ipi_self(u_int ipi)
2419{
2420
2421	CTR1(KTR_SMP, __func__ ": ipi: %x", ipi);
2422	apic_ipi(APIC_DEST_SELF, ipi, APIC_DELMODE_FIXED);
2423}
2424
2425void
2426release_aps(void *dummy __unused)
2427{
2428	atomic_store_rel_int(&aps_ready, 1);
2429}
2430
2431SYSINIT(start_aps, SI_SUB_SMP, SI_ORDER_FIRST, release_aps, NULL);
2432