mptable.c revision 91673
1/*
2 * Copyright (c) 1996, by Steve Passe
3 * All rights reserved.
4 *
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions
7 * are met:
8 * 1. Redistributions of source code must retain the above copyright
9 *    notice, this list of conditions and the following disclaimer.
10 * 2. The name of the developer may NOT be used to endorse or promote products
11 *    derived from this software without specific prior written permission.
12 *
13 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
14 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
15 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
16 * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
17 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
18 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
19 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
20 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
21 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
22 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
23 * SUCH DAMAGE.
24 *
25 * $FreeBSD: head/sys/i386/i386/mptable.c 91673 2002-03-05 10:01:46Z jeff $
26 */
27
28#include "opt_cpu.h"
29#include "opt_kstack_pages.h"
30
31#ifdef SMP
32#include <machine/smptests.h>
33#else
34#error
35#endif
36
37#include <sys/param.h>
38#include <sys/systm.h>
39#include <sys/bus.h>
40#include <sys/cons.h>	/* cngetc() */
41#include <sys/dkstat.h>
42#ifdef GPROF
43#include <sys/gmon.h>
44#endif
45#include <sys/kernel.h>
46#include <sys/ktr.h>
47#include <sys/lock.h>
48#include <sys/malloc.h>
49#include <sys/memrange.h>
50#include <sys/mutex.h>
51#include <sys/pcpu.h>
52#include <sys/proc.h>
53#include <sys/smp.h>
54#include <sys/sysctl.h>
55#include <sys/user.h>
56
57#include <vm/vm.h>
58#include <vm/vm_param.h>
59#include <vm/pmap.h>
60#include <vm/vm_kern.h>
61#include <vm/vm_extern.h>
62#include <vm/vm_map.h>
63
64#include <machine/apic.h>
65#include <machine/atomic.h>
66#include <machine/cpu.h>
67#include <machine/cpufunc.h>
68#include <machine/mpapic.h>
69#include <machine/psl.h>
70#include <machine/segments.h>
71#include <machine/smptests.h>	/** TEST_DEFAULT_CONFIG, TEST_TEST1 */
72#include <machine/tss.h>
73#include <machine/specialreg.h>
74#include <machine/privatespace.h>
75
76#if defined(APIC_IO)
77#include <machine/md_var.h>		/* setidt() */
78#include <i386/isa/icu.h>		/* IPIs */
79#include <i386/isa/intr_machdep.h>	/* IPIs */
80#endif	/* APIC_IO */
81
82#if defined(TEST_DEFAULT_CONFIG)
83#define MPFPS_MPFB1	TEST_DEFAULT_CONFIG
84#else
85#define MPFPS_MPFB1	mpfps->mpfb1
86#endif  /* TEST_DEFAULT_CONFIG */
87
88#define WARMBOOT_TARGET		0
89#define WARMBOOT_OFF		(KERNBASE + 0x0467)
90#define WARMBOOT_SEG		(KERNBASE + 0x0469)
91
92#ifdef PC98
93#define BIOS_BASE		(0xe8000)
94#define BIOS_SIZE		(0x18000)
95#else
96#define BIOS_BASE		(0xf0000)
97#define BIOS_SIZE		(0x10000)
98#endif
99#define BIOS_COUNT		(BIOS_SIZE/4)
100
101#define CMOS_REG		(0x70)
102#define CMOS_DATA		(0x71)
103#define BIOS_RESET		(0x0f)
104#define BIOS_WARM		(0x0a)
105
106#define PROCENTRY_FLAG_EN	0x01
107#define PROCENTRY_FLAG_BP	0x02
108#define IOAPICENTRY_FLAG_EN	0x01
109
110
111/* MP Floating Pointer Structure */
112typedef struct MPFPS {
113	char    signature[4];
114	void   *pap;
115	u_char  length;
116	u_char  spec_rev;
117	u_char  checksum;
118	u_char  mpfb1;
119	u_char  mpfb2;
120	u_char  mpfb3;
121	u_char  mpfb4;
122	u_char  mpfb5;
123}      *mpfps_t;
124
125/* MP Configuration Table Header */
126typedef struct MPCTH {
127	char    signature[4];
128	u_short base_table_length;
129	u_char  spec_rev;
130	u_char  checksum;
131	u_char  oem_id[8];
132	u_char  product_id[12];
133	void   *oem_table_pointer;
134	u_short oem_table_size;
135	u_short entry_count;
136	void   *apic_address;
137	u_short extended_table_length;
138	u_char  extended_table_checksum;
139	u_char  reserved;
140}      *mpcth_t;
141
142
143typedef struct PROCENTRY {
144	u_char  type;
145	u_char  apic_id;
146	u_char  apic_version;
147	u_char  cpu_flags;
148	u_long  cpu_signature;
149	u_long  feature_flags;
150	u_long  reserved1;
151	u_long  reserved2;
152}      *proc_entry_ptr;
153
154typedef struct BUSENTRY {
155	u_char  type;
156	u_char  bus_id;
157	char    bus_type[6];
158}      *bus_entry_ptr;
159
160typedef struct IOAPICENTRY {
161	u_char  type;
162	u_char  apic_id;
163	u_char  apic_version;
164	u_char  apic_flags;
165	void   *apic_address;
166}      *io_apic_entry_ptr;
167
168typedef struct INTENTRY {
169	u_char  type;
170	u_char  int_type;
171	u_short int_flags;
172	u_char  src_bus_id;
173	u_char  src_bus_irq;
174	u_char  dst_apic_id;
175	u_char  dst_apic_int;
176}      *int_entry_ptr;
177
178/* descriptions of MP basetable entries */
179typedef struct BASETABLE_ENTRY {
180	u_char  type;
181	u_char  length;
182	char    name[16];
183}       basetable_entry;
184
185/*
186 * this code MUST be enabled here and in mpboot.s.
187 * it follows the very early stages of AP boot by placing values in CMOS ram.
188 * it NORMALLY will never be needed and thus the primitive method for enabling.
189 *
190#define CHECK_POINTS
191 */
192
193#if defined(CHECK_POINTS) && !defined(PC98)
194#define CHECK_READ(A)	 (outb(CMOS_REG, (A)), inb(CMOS_DATA))
195#define CHECK_WRITE(A,D) (outb(CMOS_REG, (A)), outb(CMOS_DATA, (D)))
196
197#define CHECK_INIT(D);				\
198	CHECK_WRITE(0x34, (D));			\
199	CHECK_WRITE(0x35, (D));			\
200	CHECK_WRITE(0x36, (D));			\
201	CHECK_WRITE(0x37, (D));			\
202	CHECK_WRITE(0x38, (D));			\
203	CHECK_WRITE(0x39, (D));
204
205#define CHECK_PRINT(S);				\
206	printf("%s: %d, %d, %d, %d, %d, %d\n",	\
207	   (S),					\
208	   CHECK_READ(0x34),			\
209	   CHECK_READ(0x35),			\
210	   CHECK_READ(0x36),			\
211	   CHECK_READ(0x37),			\
212	   CHECK_READ(0x38),			\
213	   CHECK_READ(0x39));
214
215#else				/* CHECK_POINTS */
216
217#define CHECK_INIT(D)
218#define CHECK_PRINT(S)
219
220#endif				/* CHECK_POINTS */
221
222/*
223 * Values to send to the POST hardware.
224 */
225#define MP_BOOTADDRESS_POST	0x10
226#define MP_PROBE_POST		0x11
227#define MPTABLE_PASS1_POST	0x12
228
229#define MP_START_POST		0x13
230#define MP_ENABLE_POST		0x14
231#define MPTABLE_PASS2_POST	0x15
232
233#define START_ALL_APS_POST	0x16
234#define INSTALL_AP_TRAMP_POST	0x17
235#define START_AP_POST		0x18
236
237#define MP_ANNOUNCE_POST	0x19
238
239/* used to hold the AP's until we are ready to release them */
240static struct mtx ap_boot_mtx;
241
242/** XXX FIXME: where does this really belong, isa.h/isa.c perhaps? */
243int	current_postcode;
244
245/** XXX FIXME: what system files declare these??? */
246extern struct region_descriptor r_gdt, r_idt;
247
248int	bsp_apic_ready = 0;	/* flags useability of BSP apic */
249int	mp_naps;		/* # of Applications processors */
250int	mp_nbusses;		/* # of busses */
251int	mp_napics;		/* # of IO APICs */
252int	boot_cpu_id;		/* designated BSP */
253vm_offset_t cpu_apic_address;
254vm_offset_t io_apic_address[NAPICID];	/* NAPICID is more than enough */
255extern	int nkpt;
256
257u_int32_t cpu_apic_versions[MAXCPU];
258u_int32_t *io_apic_versions;
259
260#ifdef APIC_INTR_REORDER
261struct {
262	volatile int *location;
263	int bit;
264} apic_isrbit_location[32];
265#endif
266
267struct apic_intmapinfo	int_to_apicintpin[APIC_INTMAPSIZE];
268
269/*
270 * APIC ID logical/physical mapping structures.
271 * We oversize these to simplify boot-time config.
272 */
273int     cpu_num_to_apic_id[NAPICID];
274int     io_num_to_apic_id[NAPICID];
275int     apic_id_to_logical[NAPICID];
276
277
278/* AP uses this during bootstrap.  Do not staticize.  */
279char *bootSTK;
280static int bootAP;
281
282/* Hotwire a 0->4MB V==P mapping */
283extern pt_entry_t *KPTphys;
284
285/* SMP page table page */
286extern pt_entry_t *SMPpt;
287
288struct pcb stoppcbs[MAXCPU];
289
290/*
291 * Local data and functions.
292 */
293
294/* Set to 1 once we're ready to let the APs out of the pen. */
295static volatile int aps_ready = 0;
296
297static int	mp_capable;
298static u_int	boot_address;
299static u_int	base_memory;
300
301static int	picmode;		/* 0: virtual wire mode, 1: PIC mode */
302static mpfps_t	mpfps;
303static int	search_for_sig(u_int32_t target, int count);
304static void	mp_enable(u_int boot_addr);
305
306static void	mptable_pass1(void);
307static int	mptable_pass2(void);
308static void	default_mp_table(int type);
309static void	fix_mp_table(void);
310static void	setup_apic_irq_mapping(void);
311static void	init_locks(void);
312static int	start_all_aps(u_int boot_addr);
313static void	install_ap_tramp(u_int boot_addr);
314static int	start_ap(int logicalCpu, u_int boot_addr);
315void		ap_init(void);
316static int	apic_int_is_bus_type(int intr, int bus_type);
317static void	release_aps(void *dummy);
318
319/*
320 * initialize all the SMP locks
321 */
322
323/* lock region used by kernel profiling */
324int	mcount_lock;
325
326#ifdef USE_COMLOCK
327/* locks com (tty) data/hardware accesses: a FASTINTR() */
328struct mtx		com_mtx;
329#endif /* USE_COMLOCK */
330
331static void
332init_locks(void)
333{
334
335#ifdef USE_COMLOCK
336	mtx_init(&com_mtx, "com", MTX_SPIN);
337#endif /* USE_COMLOCK */
338}
339
340/*
341 * Calculate usable address in base memory for AP trampoline code.
342 */
343u_int
344mp_bootaddress(u_int basemem)
345{
346	POSTCODE(MP_BOOTADDRESS_POST);
347
348	base_memory = basemem * 1024;	/* convert to bytes */
349
350	boot_address = base_memory & ~0xfff;	/* round down to 4k boundary */
351	if ((base_memory - boot_address) < bootMP_size)
352		boot_address -= 4096;	/* not enough, lower by 4k */
353
354	return boot_address;
355}
356
357
358/*
359 * Look for an Intel MP spec table (ie, SMP capable hardware).
360 */
361void
362i386_mp_probe(void)
363{
364	int     x;
365	u_long  segment;
366	u_int32_t target;
367
368	POSTCODE(MP_PROBE_POST);
369
370	/* see if EBDA exists */
371	if ((segment = (u_long) * (u_short *) (KERNBASE + 0x40e)) != 0) {
372		/* search first 1K of EBDA */
373		target = (u_int32_t) (segment << 4);
374		if ((x = search_for_sig(target, 1024 / 4)) >= 0)
375			goto found;
376	} else {
377		/* last 1K of base memory, effective 'top of base' passed in */
378		target = (u_int32_t) (base_memory - 0x400);
379		if ((x = search_for_sig(target, 1024 / 4)) >= 0)
380			goto found;
381	}
382
383	/* search the BIOS */
384	target = (u_int32_t) BIOS_BASE;
385	if ((x = search_for_sig(target, BIOS_COUNT)) >= 0)
386		goto found;
387
388	/* nothing found */
389	mpfps = (mpfps_t)0;
390	mp_capable = 0;
391	return;
392
393found:
394	/* calculate needed resources */
395	mpfps = (mpfps_t)x;
396	mptable_pass1();
397
398	/* flag fact that we are running multiple processors */
399	mp_capable = 1;
400}
401
402int
403cpu_mp_probe(void)
404{
405	/*
406	 * Record BSP in CPU map
407	 * This is done here so that MBUF init code works correctly.
408	 */
409	all_cpus = 1;
410
411	return (mp_capable);
412}
413
414/*
415 * Initialize the SMP hardware and the APIC and start up the AP's.
416 */
417void
418cpu_mp_start(void)
419{
420	POSTCODE(MP_START_POST);
421
422	/* look for MP capable motherboard */
423	if (mp_capable)
424		mp_enable(boot_address);
425	else
426		panic("MP hardware not found!");
427
428	cpu_setregs();
429}
430
431
432/*
433 * Print various information about the SMP system hardware and setup.
434 */
435void
436cpu_mp_announce(void)
437{
438	int     x;
439
440	POSTCODE(MP_ANNOUNCE_POST);
441
442	printf(" cpu0 (BSP): apic id: %2d", CPU_TO_ID(0));
443	printf(", version: 0x%08x", cpu_apic_versions[0]);
444	printf(", at 0x%08x\n", cpu_apic_address);
445	for (x = 1; x <= mp_naps; ++x) {
446		printf(" cpu%d (AP):  apic id: %2d", x, CPU_TO_ID(x));
447		printf(", version: 0x%08x", cpu_apic_versions[x]);
448		printf(", at 0x%08x\n", cpu_apic_address);
449	}
450
451#if defined(APIC_IO)
452	for (x = 0; x < mp_napics; ++x) {
453		printf(" io%d (APIC): apic id: %2d", x, IO_TO_ID(x));
454		printf(", version: 0x%08x", io_apic_versions[x]);
455		printf(", at 0x%08x\n", io_apic_address[x]);
456	}
457#else
458	printf(" Warning: APIC I/O disabled\n");
459#endif	/* APIC_IO */
460}
461
462/*
463 * AP cpu's call this to sync up protected mode.
464 */
465void
466init_secondary(void)
467{
468	int	gsel_tss;
469	int	x, myid = bootAP;
470	u_int	cr0;
471
472	gdt_segs[GPRIV_SEL].ssd_base = (int) &SMP_prvspace[myid];
473	gdt_segs[GPROC0_SEL].ssd_base =
474		(int) &SMP_prvspace[myid].pcpu.pc_common_tss;
475	SMP_prvspace[myid].pcpu.pc_prvspace =
476		&SMP_prvspace[myid].pcpu;
477
478	for (x = 0; x < NGDT; x++) {
479		ssdtosd(&gdt_segs[x], &gdt[myid * NGDT + x].sd);
480	}
481
482	r_gdt.rd_limit = NGDT * sizeof(gdt[0]) - 1;
483	r_gdt.rd_base = (int) &gdt[myid * NGDT];
484	lgdt(&r_gdt);			/* does magic intra-segment return */
485
486	lidt(&r_idt);
487
488	lldt(_default_ldt);
489	PCPU_SET(currentldt, _default_ldt);
490
491	gsel_tss = GSEL(GPROC0_SEL, SEL_KPL);
492	gdt[myid * NGDT + GPROC0_SEL].sd.sd_type = SDT_SYS386TSS;
493	PCPU_SET(common_tss.tss_esp0, 0); /* not used until after switch */
494	PCPU_SET(common_tss.tss_ss0, GSEL(GDATA_SEL, SEL_KPL));
495	PCPU_SET(common_tss.tss_ioopt, (sizeof (struct i386tss)) << 16);
496	PCPU_SET(tss_gdt, &gdt[myid * NGDT + GPROC0_SEL].sd);
497	PCPU_SET(common_tssd, *PCPU_GET(tss_gdt));
498	ltr(gsel_tss);
499
500	/*
501	 * Set to a known state:
502	 * Set by mpboot.s: CR0_PG, CR0_PE
503	 * Set by cpu_setregs: CR0_NE, CR0_MP, CR0_TS, CR0_WP, CR0_AM
504	 */
505	cr0 = rcr0();
506	cr0 &= ~(CR0_CD | CR0_NW | CR0_EM);
507	load_cr0(cr0);
508
509	pmap_set_opt();
510}
511
512
513#if defined(APIC_IO)
514/*
515 * Final configuration of the BSP's local APIC:
516 *  - disable 'pic mode'.
517 *  - disable 'virtual wire mode'.
518 *  - enable NMI.
519 */
520void
521bsp_apic_configure(void)
522{
523	u_char		byte;
524	u_int32_t	temp;
525
526	/* leave 'pic mode' if necessary */
527	if (picmode) {
528		outb(0x22, 0x70);	/* select IMCR */
529		byte = inb(0x23);	/* current contents */
530		byte |= 0x01;		/* mask external INTR */
531		outb(0x23, byte);	/* disconnect 8259s/NMI */
532	}
533
534	/* mask lint0 (the 8259 'virtual wire' connection) */
535	temp = lapic.lvt_lint0;
536	temp |= APIC_LVT_M;		/* set the mask */
537	lapic.lvt_lint0 = temp;
538
539        /* setup lint1 to handle NMI */
540        temp = lapic.lvt_lint1;
541        temp &= ~APIC_LVT_M;		/* clear the mask */
542        lapic.lvt_lint1 = temp;
543
544	if (bootverbose)
545		apic_dump("bsp_apic_configure()");
546}
547#endif  /* APIC_IO */
548
549
550/*******************************************************************
551 * local functions and data
552 */
553
554/*
555 * start the SMP system
556 */
557static void
558mp_enable(u_int boot_addr)
559{
560	int     x;
561#if defined(APIC_IO)
562	int     apic;
563	u_int   ux;
564#endif	/* APIC_IO */
565
566	POSTCODE(MP_ENABLE_POST);
567
568	/* turn on 4MB of V == P addressing so we can get to MP table */
569	*(int *)PTD = PG_V | PG_RW | ((uintptr_t)(void *)KPTphys & PG_FRAME);
570	invltlb();
571
572	/* examine the MP table for needed info, uses physical addresses */
573	x = mptable_pass2();
574
575	*(int *)PTD = 0;
576	invltlb();
577
578	/* can't process default configs till the CPU APIC is pmapped */
579	if (x)
580		default_mp_table(x);
581
582	/* post scan cleanup */
583	fix_mp_table();
584	setup_apic_irq_mapping();
585
586#if defined(APIC_IO)
587
588	/* fill the LOGICAL io_apic_versions table */
589	for (apic = 0; apic < mp_napics; ++apic) {
590		ux = io_apic_read(apic, IOAPIC_VER);
591		io_apic_versions[apic] = ux;
592		io_apic_set_id(apic, IO_TO_ID(apic));
593	}
594
595	/* program each IO APIC in the system */
596	for (apic = 0; apic < mp_napics; ++apic)
597		if (io_apic_setup(apic) < 0)
598			panic("IO APIC setup failure");
599
600	/* install a 'Spurious INTerrupt' vector */
601	setidt(XSPURIOUSINT_OFFSET, Xspuriousint,
602	       SDT_SYS386IGT, SEL_KPL, GSEL(GCODE_SEL, SEL_KPL));
603
604	/* install an inter-CPU IPI for TLB invalidation */
605	setidt(XINVLTLB_OFFSET, Xinvltlb,
606	       SDT_SYS386IGT, SEL_KPL, GSEL(GCODE_SEL, SEL_KPL));
607
608	/* install an inter-CPU IPI for forwarding hardclock() */
609	setidt(XHARDCLOCK_OFFSET, Xhardclock,
610	       SDT_SYS386IGT, SEL_KPL, GSEL(GCODE_SEL, SEL_KPL));
611
612	/* install an inter-CPU IPI for forwarding statclock() */
613	setidt(XSTATCLOCK_OFFSET, Xstatclock,
614	       SDT_SYS386IGT, SEL_KPL, GSEL(GCODE_SEL, SEL_KPL));
615
616	/* install an inter-CPU IPI for all-CPU rendezvous */
617	setidt(XRENDEZVOUS_OFFSET, Xrendezvous,
618	       SDT_SYS386IGT, SEL_KPL, GSEL(GCODE_SEL, SEL_KPL));
619
620	/* install an inter-CPU IPI for forcing an additional software trap */
621	setidt(XCPUAST_OFFSET, Xcpuast,
622	       SDT_SYS386IGT, SEL_KPL, GSEL(GCODE_SEL, SEL_KPL));
623
624	/* install an inter-CPU IPI for CPU stop/restart */
625	setidt(XCPUSTOP_OFFSET, Xcpustop,
626	       SDT_SYS386IGT, SEL_KPL, GSEL(GCODE_SEL, SEL_KPL));
627
628#if defined(TEST_TEST1)
629	/* install a "fake hardware INTerrupt" vector */
630	setidt(XTEST1_OFFSET, Xtest1,
631	       SDT_SYS386IGT, SEL_KPL, GSEL(GCODE_SEL, SEL_KPL));
632#endif  /** TEST_TEST1 */
633
634#endif	/* APIC_IO */
635
636	/* initialize all SMP locks */
637	init_locks();
638
639	/* start each Application Processor */
640	start_all_aps(boot_addr);
641}
642
643
644/*
645 * look for the MP spec signature
646 */
647
648/* string defined by the Intel MP Spec as identifying the MP table */
649#define MP_SIG		0x5f504d5f	/* _MP_ */
650#define NEXT(X)		((X) += 4)
651static int
652search_for_sig(u_int32_t target, int count)
653{
654	int     x;
655	u_int32_t *addr = (u_int32_t *) (KERNBASE + target);
656
657	for (x = 0; x < count; NEXT(x))
658		if (addr[x] == MP_SIG)
659			/* make array index a byte index */
660			return (target + (x * sizeof(u_int32_t)));
661
662	return -1;
663}
664
665
666static basetable_entry basetable_entry_types[] =
667{
668	{0, 20, "Processor"},
669	{1, 8, "Bus"},
670	{2, 8, "I/O APIC"},
671	{3, 8, "I/O INT"},
672	{4, 8, "Local INT"}
673};
674
675typedef struct BUSDATA {
676	u_char  bus_id;
677	enum busTypes bus_type;
678}       bus_datum;
679
680typedef struct INTDATA {
681	u_char  int_type;
682	u_short int_flags;
683	u_char  src_bus_id;
684	u_char  src_bus_irq;
685	u_char  dst_apic_id;
686	u_char  dst_apic_int;
687	u_char	int_vector;
688}       io_int, local_int;
689
690typedef struct BUSTYPENAME {
691	u_char  type;
692	char    name[7];
693}       bus_type_name;
694
695static bus_type_name bus_type_table[] =
696{
697	{CBUS, "CBUS"},
698	{CBUSII, "CBUSII"},
699	{EISA, "EISA"},
700	{MCA, "MCA"},
701	{UNKNOWN_BUSTYPE, "---"},
702	{ISA, "ISA"},
703	{MCA, "MCA"},
704	{UNKNOWN_BUSTYPE, "---"},
705	{UNKNOWN_BUSTYPE, "---"},
706	{UNKNOWN_BUSTYPE, "---"},
707	{UNKNOWN_BUSTYPE, "---"},
708	{UNKNOWN_BUSTYPE, "---"},
709	{PCI, "PCI"},
710	{UNKNOWN_BUSTYPE, "---"},
711	{UNKNOWN_BUSTYPE, "---"},
712	{UNKNOWN_BUSTYPE, "---"},
713	{UNKNOWN_BUSTYPE, "---"},
714	{XPRESS, "XPRESS"},
715	{UNKNOWN_BUSTYPE, "---"}
716};
717/* from MP spec v1.4, table 5-1 */
718static int default_data[7][5] =
719{
720/*   nbus, id0, type0, id1, type1 */
721	{1, 0, ISA, 255, 255},
722	{1, 0, EISA, 255, 255},
723	{1, 0, EISA, 255, 255},
724	{1, 0, MCA, 255, 255},
725	{2, 0, ISA, 1, PCI},
726	{2, 0, EISA, 1, PCI},
727	{2, 0, MCA, 1, PCI}
728};
729
730
731/* the bus data */
732static bus_datum *bus_data;
733
734/* the IO INT data, one entry per possible APIC INTerrupt */
735static io_int  *io_apic_ints;
736
737static int nintrs;
738
739static int processor_entry	__P((proc_entry_ptr entry, int cpu));
740static int bus_entry		__P((bus_entry_ptr entry, int bus));
741static int io_apic_entry	__P((io_apic_entry_ptr entry, int apic));
742static int int_entry		__P((int_entry_ptr entry, int intr));
743static int lookup_bus_type	__P((char *name));
744
745
746/*
747 * 1st pass on motherboard's Intel MP specification table.
748 *
749 * initializes:
750 *	mp_ncpus = 1
751 *
752 * determines:
753 *	cpu_apic_address (common to all CPUs)
754 *	io_apic_address[N]
755 *	mp_naps
756 *	mp_nbusses
757 *	mp_napics
758 *	nintrs
759 */
760static void
761mptable_pass1(void)
762{
763	int	x;
764	mpcth_t	cth;
765	int	totalSize;
766	void*	position;
767	int	count;
768	int	type;
769
770	POSTCODE(MPTABLE_PASS1_POST);
771
772	/* clear various tables */
773	for (x = 0; x < NAPICID; ++x) {
774		io_apic_address[x] = ~0;	/* IO APIC address table */
775	}
776
777	/* init everything to empty */
778	mp_naps = 0;
779	mp_nbusses = 0;
780	mp_napics = 0;
781	nintrs = 0;
782
783	/* check for use of 'default' configuration */
784	if (MPFPS_MPFB1 != 0) {
785		/* use default addresses */
786		cpu_apic_address = DEFAULT_APIC_BASE;
787		io_apic_address[0] = DEFAULT_IO_APIC_BASE;
788
789		/* fill in with defaults */
790		mp_naps = 2;		/* includes BSP */
791		mp_maxid = 1;
792		mp_nbusses = default_data[MPFPS_MPFB1 - 1][0];
793#if defined(APIC_IO)
794		mp_napics = 1;
795		nintrs = 16;
796#endif	/* APIC_IO */
797	}
798	else {
799		if ((cth = mpfps->pap) == 0)
800			panic("MP Configuration Table Header MISSING!");
801
802		cpu_apic_address = (vm_offset_t) cth->apic_address;
803
804		/* walk the table, recording info of interest */
805		totalSize = cth->base_table_length - sizeof(struct MPCTH);
806		position = (u_char *) cth + sizeof(struct MPCTH);
807		count = cth->entry_count;
808
809		while (count--) {
810			switch (type = *(u_char *) position) {
811			case 0: /* processor_entry */
812				if (((proc_entry_ptr)position)->cpu_flags
813				    & PROCENTRY_FLAG_EN) {
814					++mp_naps;
815					mp_maxid++;
816				}
817				break;
818			case 1: /* bus_entry */
819				++mp_nbusses;
820				break;
821			case 2: /* io_apic_entry */
822				if (((io_apic_entry_ptr)position)->apic_flags
823					& IOAPICENTRY_FLAG_EN)
824					io_apic_address[mp_napics++] =
825					    (vm_offset_t)((io_apic_entry_ptr)
826						position)->apic_address;
827				break;
828			case 3: /* int_entry */
829				++nintrs;
830				break;
831			case 4:	/* int_entry */
832				break;
833			default:
834				panic("mpfps Base Table HOSED!");
835				/* NOTREACHED */
836			}
837
838			totalSize -= basetable_entry_types[type].length;
839			(u_char*)position += basetable_entry_types[type].length;
840		}
841	}
842
843	/* qualify the numbers */
844	if (mp_naps > MAXCPU) {
845		printf("Warning: only using %d of %d available CPUs!\n",
846			MAXCPU, mp_naps);
847		mp_naps = MAXCPU;
848	}
849
850	/*
851	 * Count the BSP.
852	 * This is also used as a counter while starting the APs.
853	 */
854	mp_ncpus = 1;
855
856	--mp_naps;	/* subtract the BSP */
857}
858
859
860/*
861 * 2nd pass on motherboard's Intel MP specification table.
862 *
863 * sets:
864 *	boot_cpu_id
865 *	ID_TO_IO(N), phy APIC ID to log CPU/IO table
866 *	CPU_TO_ID(N), logical CPU to APIC ID table
867 *	IO_TO_ID(N), logical IO to APIC ID table
868 *	bus_data[N]
869 *	io_apic_ints[N]
870 */
871static int
872mptable_pass2(void)
873{
874	int     x;
875	mpcth_t cth;
876	int     totalSize;
877	void*   position;
878	int     count;
879	int     type;
880	int     apic, bus, cpu, intr;
881	int	i, j;
882	int	pgeflag;
883
884	POSTCODE(MPTABLE_PASS2_POST);
885
886	pgeflag = 0;		/* XXX - Not used under SMP yet.  */
887
888	MALLOC(io_apic_versions, u_int32_t *, sizeof(u_int32_t) * mp_napics,
889	    M_DEVBUF, M_WAITOK);
890	MALLOC(ioapic, volatile ioapic_t **, sizeof(ioapic_t *) * mp_napics,
891	    M_DEVBUF, M_WAITOK);
892	MALLOC(io_apic_ints, io_int *, sizeof(io_int) * (nintrs + 1),
893	    M_DEVBUF, M_WAITOK);
894	MALLOC(bus_data, bus_datum *, sizeof(bus_datum) * mp_nbusses,
895	    M_DEVBUF, M_WAITOK);
896
897	bzero(ioapic, sizeof(ioapic_t *) * mp_napics);
898
899	for (i = 0; i < mp_napics; i++) {
900		for (j = 0; j < mp_napics; j++) {
901			/* same page frame as a previous IO apic? */
902			if (((vm_offset_t)SMPpt[NPTEPG-2-j] & PG_FRAME) ==
903			    (io_apic_address[i] & PG_FRAME)) {
904				ioapic[i] = (ioapic_t *)((u_int)SMP_prvspace
905					+ (NPTEPG-2-j) * PAGE_SIZE
906					+ (io_apic_address[i] & PAGE_MASK));
907				break;
908			}
909			/* use this slot if available */
910			if (((vm_offset_t)SMPpt[NPTEPG-2-j] & PG_FRAME) == 0) {
911				SMPpt[NPTEPG-2-j] = (pt_entry_t)(PG_V | PG_RW |
912				    pgeflag | (io_apic_address[i] & PG_FRAME));
913				ioapic[i] = (ioapic_t *)((u_int)SMP_prvspace
914					+ (NPTEPG-2-j) * PAGE_SIZE
915					+ (io_apic_address[i] & PAGE_MASK));
916				break;
917			}
918		}
919	}
920
921	/* clear various tables */
922	for (x = 0; x < NAPICID; ++x) {
923		ID_TO_IO(x) = -1;	/* phy APIC ID to log CPU/IO table */
924		CPU_TO_ID(x) = -1;	/* logical CPU to APIC ID table */
925		IO_TO_ID(x) = -1;	/* logical IO to APIC ID table */
926	}
927
928	/* clear bus data table */
929	for (x = 0; x < mp_nbusses; ++x)
930		bus_data[x].bus_id = 0xff;
931
932	/* clear IO APIC INT table */
933	for (x = 0; x < (nintrs + 1); ++x) {
934		io_apic_ints[x].int_type = 0xff;
935		io_apic_ints[x].int_vector = 0xff;
936	}
937
938	/* setup the cpu/apic mapping arrays */
939	boot_cpu_id = -1;
940
941	/* record whether PIC or virtual-wire mode */
942	picmode = (mpfps->mpfb2 & 0x80) ? 1 : 0;
943
944	/* check for use of 'default' configuration */
945	if (MPFPS_MPFB1 != 0)
946		return MPFPS_MPFB1;	/* return default configuration type */
947
948	if ((cth = mpfps->pap) == 0)
949		panic("MP Configuration Table Header MISSING!");
950
951	/* walk the table, recording info of interest */
952	totalSize = cth->base_table_length - sizeof(struct MPCTH);
953	position = (u_char *) cth + sizeof(struct MPCTH);
954	count = cth->entry_count;
955	apic = bus = intr = 0;
956	cpu = 1;				/* pre-count the BSP */
957
958	while (count--) {
959		switch (type = *(u_char *) position) {
960		case 0:
961			if (processor_entry(position, cpu))
962				++cpu;
963			break;
964		case 1:
965			if (bus_entry(position, bus))
966				++bus;
967			break;
968		case 2:
969			if (io_apic_entry(position, apic))
970				++apic;
971			break;
972		case 3:
973			if (int_entry(position, intr))
974				++intr;
975			break;
976		case 4:
977			/* int_entry(position); */
978			break;
979		default:
980			panic("mpfps Base Table HOSED!");
981			/* NOTREACHED */
982		}
983
984		totalSize -= basetable_entry_types[type].length;
985		(u_char *) position += basetable_entry_types[type].length;
986	}
987
988	if (boot_cpu_id == -1)
989		panic("NO BSP found!");
990
991	/* report fact that its NOT a default configuration */
992	return 0;
993}
994
995
996void
997assign_apic_irq(int apic, int intpin, int irq)
998{
999	int x;
1000
1001	if (int_to_apicintpin[irq].ioapic != -1)
1002		panic("assign_apic_irq: inconsistent table");
1003
1004	int_to_apicintpin[irq].ioapic = apic;
1005	int_to_apicintpin[irq].int_pin = intpin;
1006	int_to_apicintpin[irq].apic_address = ioapic[apic];
1007	int_to_apicintpin[irq].redirindex = IOAPIC_REDTBL + 2 * intpin;
1008
1009	for (x = 0; x < nintrs; x++) {
1010		if ((io_apic_ints[x].int_type == 0 ||
1011		     io_apic_ints[x].int_type == 3) &&
1012		    io_apic_ints[x].int_vector == 0xff &&
1013		    io_apic_ints[x].dst_apic_id == IO_TO_ID(apic) &&
1014		    io_apic_ints[x].dst_apic_int == intpin)
1015			io_apic_ints[x].int_vector = irq;
1016	}
1017}
1018
1019void
1020revoke_apic_irq(int irq)
1021{
1022	int x;
1023	int oldapic;
1024	int oldintpin;
1025
1026	if (int_to_apicintpin[irq].ioapic == -1)
1027		panic("assign_apic_irq: inconsistent table");
1028
1029	oldapic = int_to_apicintpin[irq].ioapic;
1030	oldintpin = int_to_apicintpin[irq].int_pin;
1031
1032	int_to_apicintpin[irq].ioapic = -1;
1033	int_to_apicintpin[irq].int_pin = 0;
1034	int_to_apicintpin[irq].apic_address = NULL;
1035	int_to_apicintpin[irq].redirindex = 0;
1036
1037	for (x = 0; x < nintrs; x++) {
1038		if ((io_apic_ints[x].int_type == 0 ||
1039		     io_apic_ints[x].int_type == 3) &&
1040		    io_apic_ints[x].int_vector == 0xff &&
1041		    io_apic_ints[x].dst_apic_id == IO_TO_ID(oldapic) &&
1042		    io_apic_ints[x].dst_apic_int == oldintpin)
1043			io_apic_ints[x].int_vector = 0xff;
1044	}
1045}
1046
1047
1048static void
1049allocate_apic_irq(int intr)
1050{
1051	int apic;
1052	int intpin;
1053	int irq;
1054
1055	if (io_apic_ints[intr].int_vector != 0xff)
1056		return;		/* Interrupt handler already assigned */
1057
1058	if (io_apic_ints[intr].int_type != 0 &&
1059	    (io_apic_ints[intr].int_type != 3 ||
1060	     (io_apic_ints[intr].dst_apic_id == IO_TO_ID(0) &&
1061	      io_apic_ints[intr].dst_apic_int == 0)))
1062		return;		/* Not INT or ExtInt on != (0, 0) */
1063
1064	irq = 0;
1065	while (irq < APIC_INTMAPSIZE &&
1066	       int_to_apicintpin[irq].ioapic != -1)
1067		irq++;
1068
1069	if (irq >= APIC_INTMAPSIZE)
1070		return;		/* No free interrupt handlers */
1071
1072	apic = ID_TO_IO(io_apic_ints[intr].dst_apic_id);
1073	intpin = io_apic_ints[intr].dst_apic_int;
1074
1075	assign_apic_irq(apic, intpin, irq);
1076	io_apic_setup_intpin(apic, intpin);
1077}
1078
1079
1080static void
1081swap_apic_id(int apic, int oldid, int newid)
1082{
1083	int x;
1084	int oapic;
1085
1086
1087	if (oldid == newid)
1088		return;			/* Nothing to do */
1089
1090	printf("Changing APIC ID for IO APIC #%d from %d to %d in MP table\n",
1091	       apic, oldid, newid);
1092
1093	/* Swap physical APIC IDs in interrupt entries */
1094	for (x = 0; x < nintrs; x++) {
1095		if (io_apic_ints[x].dst_apic_id == oldid)
1096			io_apic_ints[x].dst_apic_id = newid;
1097		else if (io_apic_ints[x].dst_apic_id == newid)
1098			io_apic_ints[x].dst_apic_id = oldid;
1099	}
1100
1101	/* Swap physical APIC IDs in IO_TO_ID mappings */
1102	for (oapic = 0; oapic < mp_napics; oapic++)
1103		if (IO_TO_ID(oapic) == newid)
1104			break;
1105
1106	if (oapic < mp_napics) {
1107		printf("Changing APIC ID for IO APIC #%d from "
1108		       "%d to %d in MP table\n",
1109		       oapic, newid, oldid);
1110		IO_TO_ID(oapic) = oldid;
1111	}
1112	IO_TO_ID(apic) = newid;
1113}
1114
1115
1116static void
1117fix_id_to_io_mapping(void)
1118{
1119	int x;
1120
1121	for (x = 0; x < NAPICID; x++)
1122		ID_TO_IO(x) = -1;
1123
1124	for (x = 0; x <= mp_naps; x++)
1125		if (CPU_TO_ID(x) < NAPICID)
1126			ID_TO_IO(CPU_TO_ID(x)) = x;
1127
1128	for (x = 0; x < mp_napics; x++)
1129		if (IO_TO_ID(x) < NAPICID)
1130			ID_TO_IO(IO_TO_ID(x)) = x;
1131}
1132
1133
1134static int
1135first_free_apic_id(void)
1136{
1137	int freeid, x;
1138
1139	for (freeid = 0; freeid < NAPICID; freeid++) {
1140		for (x = 0; x <= mp_naps; x++)
1141			if (CPU_TO_ID(x) == freeid)
1142				break;
1143		if (x <= mp_naps)
1144			continue;
1145		for (x = 0; x < mp_napics; x++)
1146			if (IO_TO_ID(x) == freeid)
1147				break;
1148		if (x < mp_napics)
1149			continue;
1150		return freeid;
1151	}
1152	return freeid;
1153}
1154
1155
1156static int
1157io_apic_id_acceptable(int apic, int id)
1158{
1159	int cpu;		/* Logical CPU number */
1160	int oapic;		/* Logical IO APIC number for other IO APIC */
1161
1162	if (id >= NAPICID)
1163		return 0;	/* Out of range */
1164
1165	for (cpu = 0; cpu <= mp_naps; cpu++)
1166		if (CPU_TO_ID(cpu) == id)
1167			return 0;	/* Conflict with CPU */
1168
1169	for (oapic = 0; oapic < mp_napics && oapic < apic; oapic++)
1170		if (IO_TO_ID(oapic) == id)
1171			return 0;	/* Conflict with other APIC */
1172
1173	return 1;		/* ID is acceptable for IO APIC */
1174}
1175
1176
1177/*
1178 * parse an Intel MP specification table
1179 */
1180static void
1181fix_mp_table(void)
1182{
1183	int	x;
1184	int	id;
1185	int	bus_0 = 0;	/* Stop GCC warning */
1186	int	bus_pci = 0;	/* Stop GCC warning */
1187	int	num_pci_bus;
1188	int	apic;		/* IO APIC unit number */
1189	int     freeid;		/* Free physical APIC ID */
1190	int	physid;		/* Current physical IO APIC ID */
1191
1192	/*
1193	 * Fix mis-numbering of the PCI bus and its INT entries if the BIOS
1194	 * did it wrong.  The MP spec says that when more than 1 PCI bus
1195	 * exists the BIOS must begin with bus entries for the PCI bus and use
1196	 * actual PCI bus numbering.  This implies that when only 1 PCI bus
1197	 * exists the BIOS can choose to ignore this ordering, and indeed many
1198	 * MP motherboards do ignore it.  This causes a problem when the PCI
1199	 * sub-system makes requests of the MP sub-system based on PCI bus
1200	 * numbers.	So here we look for the situation and renumber the
1201	 * busses and associated INTs in an effort to "make it right".
1202	 */
1203
1204	/* find bus 0, PCI bus, count the number of PCI busses */
1205	for (num_pci_bus = 0, x = 0; x < mp_nbusses; ++x) {
1206		if (bus_data[x].bus_id == 0) {
1207			bus_0 = x;
1208		}
1209		if (bus_data[x].bus_type == PCI) {
1210			++num_pci_bus;
1211			bus_pci = x;
1212		}
1213	}
1214	/*
1215	 * bus_0 == slot of bus with ID of 0
1216	 * bus_pci == slot of last PCI bus encountered
1217	 */
1218
1219	/* check the 1 PCI bus case for sanity */
1220	/* if it is number 0 all is well */
1221	if (num_pci_bus == 1 &&
1222	    bus_data[bus_pci].bus_id != 0) {
1223
1224		/* mis-numbered, swap with whichever bus uses slot 0 */
1225
1226		/* swap the bus entry types */
1227		bus_data[bus_pci].bus_type = bus_data[bus_0].bus_type;
1228		bus_data[bus_0].bus_type = PCI;
1229
1230		/* swap each relavant INTerrupt entry */
1231		id = bus_data[bus_pci].bus_id;
1232		for (x = 0; x < nintrs; ++x) {
1233			if (io_apic_ints[x].src_bus_id == id) {
1234				io_apic_ints[x].src_bus_id = 0;
1235			}
1236			else if (io_apic_ints[x].src_bus_id == 0) {
1237				io_apic_ints[x].src_bus_id = id;
1238			}
1239		}
1240	}
1241
1242	/* Assign IO APIC IDs.
1243	 *
1244	 * First try the existing ID. If a conflict is detected, try
1245	 * the ID in the MP table.  If a conflict is still detected, find
1246	 * a free id.
1247	 *
1248	 * We cannot use the ID_TO_IO table before all conflicts has been
1249	 * resolved and the table has been corrected.
1250	 */
1251	for (apic = 0; apic < mp_napics; ++apic) { /* For all IO APICs */
1252
1253		/* First try to use the value set by the BIOS */
1254		physid = io_apic_get_id(apic);
1255		if (io_apic_id_acceptable(apic, physid)) {
1256			if (IO_TO_ID(apic) != physid)
1257				swap_apic_id(apic, IO_TO_ID(apic), physid);
1258			continue;
1259		}
1260
1261		/* Then check if the value in the MP table is acceptable */
1262		if (io_apic_id_acceptable(apic, IO_TO_ID(apic)))
1263			continue;
1264
1265		/* Last resort, find a free APIC ID and use it */
1266		freeid = first_free_apic_id();
1267		if (freeid >= NAPICID)
1268			panic("No free physical APIC IDs found");
1269
1270		if (io_apic_id_acceptable(apic, freeid)) {
1271			swap_apic_id(apic, IO_TO_ID(apic), freeid);
1272			continue;
1273		}
1274		panic("Free physical APIC ID not usable");
1275	}
1276	fix_id_to_io_mapping();
1277
1278	/* detect and fix broken Compaq MP table */
1279	if (apic_int_type(0, 0) == -1) {
1280		printf("APIC_IO: MP table broken: 8259->APIC entry missing!\n");
1281		io_apic_ints[nintrs].int_type = 3;	/* ExtInt */
1282		io_apic_ints[nintrs].int_vector = 0xff;	/* Unassigned */
1283		/* XXX fixme, set src bus id etc, but it doesn't seem to hurt */
1284		io_apic_ints[nintrs].dst_apic_id = IO_TO_ID(0);
1285		io_apic_ints[nintrs].dst_apic_int = 0;	/* Pin 0 */
1286		nintrs++;
1287	}
1288}
1289
1290
1291/* Assign low level interrupt handlers */
1292static void
1293setup_apic_irq_mapping(void)
1294{
1295	int	x;
1296	int	int_vector;
1297
1298	/* Clear array */
1299	for (x = 0; x < APIC_INTMAPSIZE; x++) {
1300		int_to_apicintpin[x].ioapic = -1;
1301		int_to_apicintpin[x].int_pin = 0;
1302		int_to_apicintpin[x].apic_address = NULL;
1303		int_to_apicintpin[x].redirindex = 0;
1304	}
1305
1306	/* First assign ISA/EISA interrupts */
1307	for (x = 0; x < nintrs; x++) {
1308		int_vector = io_apic_ints[x].src_bus_irq;
1309		if (int_vector < APIC_INTMAPSIZE &&
1310		    io_apic_ints[x].int_vector == 0xff &&
1311		    int_to_apicintpin[int_vector].ioapic == -1 &&
1312		    (apic_int_is_bus_type(x, ISA) ||
1313		     apic_int_is_bus_type(x, EISA)) &&
1314		    io_apic_ints[x].int_type == 0) {
1315			assign_apic_irq(ID_TO_IO(io_apic_ints[x].dst_apic_id),
1316					io_apic_ints[x].dst_apic_int,
1317					int_vector);
1318		}
1319	}
1320
1321	/* Assign ExtInt entry if no ISA/EISA interrupt 0 entry */
1322	for (x = 0; x < nintrs; x++) {
1323		if (io_apic_ints[x].dst_apic_int == 0 &&
1324		    io_apic_ints[x].dst_apic_id == IO_TO_ID(0) &&
1325		    io_apic_ints[x].int_vector == 0xff &&
1326		    int_to_apicintpin[0].ioapic == -1 &&
1327		    io_apic_ints[x].int_type == 3) {
1328			assign_apic_irq(0, 0, 0);
1329			break;
1330		}
1331	}
1332	/* PCI interrupt assignment is deferred */
1333}
1334
1335
1336static int
1337processor_entry(proc_entry_ptr entry, int cpu)
1338{
1339	/* check for usability */
1340	if (!(entry->cpu_flags & PROCENTRY_FLAG_EN))
1341		return 0;
1342
1343	if(entry->apic_id >= NAPICID)
1344		panic("CPU APIC ID out of range (0..%d)", NAPICID - 1);
1345	/* check for BSP flag */
1346	if (entry->cpu_flags & PROCENTRY_FLAG_BP) {
1347		boot_cpu_id = entry->apic_id;
1348		CPU_TO_ID(0) = entry->apic_id;
1349		ID_TO_CPU(entry->apic_id) = 0;
1350		return 0;	/* its already been counted */
1351	}
1352
1353	/* add another AP to list, if less than max number of CPUs */
1354	else if (cpu < MAXCPU) {
1355		CPU_TO_ID(cpu) = entry->apic_id;
1356		ID_TO_CPU(entry->apic_id) = cpu;
1357		return 1;
1358	}
1359
1360	return 0;
1361}
1362
1363
1364static int
1365bus_entry(bus_entry_ptr entry, int bus)
1366{
1367	int     x;
1368	char    c, name[8];
1369
1370	/* encode the name into an index */
1371	for (x = 0; x < 6; ++x) {
1372		if ((c = entry->bus_type[x]) == ' ')
1373			break;
1374		name[x] = c;
1375	}
1376	name[x] = '\0';
1377
1378	if ((x = lookup_bus_type(name)) == UNKNOWN_BUSTYPE)
1379		panic("unknown bus type: '%s'", name);
1380
1381	bus_data[bus].bus_id = entry->bus_id;
1382	bus_data[bus].bus_type = x;
1383
1384	return 1;
1385}
1386
1387
1388static int
1389io_apic_entry(io_apic_entry_ptr entry, int apic)
1390{
1391	if (!(entry->apic_flags & IOAPICENTRY_FLAG_EN))
1392		return 0;
1393
1394	IO_TO_ID(apic) = entry->apic_id;
1395	if (entry->apic_id < NAPICID)
1396		ID_TO_IO(entry->apic_id) = apic;
1397
1398	return 1;
1399}
1400
1401
1402static int
1403lookup_bus_type(char *name)
1404{
1405	int     x;
1406
1407	for (x = 0; x < MAX_BUSTYPE; ++x)
1408		if (strcmp(bus_type_table[x].name, name) == 0)
1409			return bus_type_table[x].type;
1410
1411	return UNKNOWN_BUSTYPE;
1412}
1413
1414
1415static int
1416int_entry(int_entry_ptr entry, int intr)
1417{
1418	int apic;
1419
1420	io_apic_ints[intr].int_type = entry->int_type;
1421	io_apic_ints[intr].int_flags = entry->int_flags;
1422	io_apic_ints[intr].src_bus_id = entry->src_bus_id;
1423	io_apic_ints[intr].src_bus_irq = entry->src_bus_irq;
1424	if (entry->dst_apic_id == 255) {
1425		/* This signal goes to all IO APICS.  Select an IO APIC
1426		   with sufficient number of interrupt pins */
1427		for (apic = 0; apic < mp_napics; apic++)
1428			if (((io_apic_read(apic, IOAPIC_VER) &
1429			      IOART_VER_MAXREDIR) >> MAXREDIRSHIFT) >=
1430			    entry->dst_apic_int)
1431				break;
1432		if (apic < mp_napics)
1433			io_apic_ints[intr].dst_apic_id = IO_TO_ID(apic);
1434		else
1435			io_apic_ints[intr].dst_apic_id = entry->dst_apic_id;
1436	} else
1437		io_apic_ints[intr].dst_apic_id = entry->dst_apic_id;
1438	io_apic_ints[intr].dst_apic_int = entry->dst_apic_int;
1439
1440	return 1;
1441}
1442
1443
1444static int
1445apic_int_is_bus_type(int intr, int bus_type)
1446{
1447	int     bus;
1448
1449	for (bus = 0; bus < mp_nbusses; ++bus)
1450		if ((bus_data[bus].bus_id == io_apic_ints[intr].src_bus_id)
1451		    && ((int) bus_data[bus].bus_type == bus_type))
1452			return 1;
1453
1454	return 0;
1455}
1456
1457
1458/*
1459 * Given a traditional ISA INT mask, return an APIC mask.
1460 */
1461u_int
1462isa_apic_mask(u_int isa_mask)
1463{
1464	int isa_irq;
1465	int apic_pin;
1466
1467#if defined(SKIP_IRQ15_REDIRECT)
1468	if (isa_mask == (1 << 15)) {
1469		printf("skipping ISA IRQ15 redirect\n");
1470		return isa_mask;
1471	}
1472#endif  /* SKIP_IRQ15_REDIRECT */
1473
1474	isa_irq = ffs(isa_mask);		/* find its bit position */
1475	if (isa_irq == 0)			/* doesn't exist */
1476		return 0;
1477	--isa_irq;				/* make it zero based */
1478
1479	apic_pin = isa_apic_irq(isa_irq);	/* look for APIC connection */
1480	if (apic_pin == -1)
1481		return 0;
1482
1483	return (1 << apic_pin);			/* convert pin# to a mask */
1484}
1485
1486
1487/*
1488 * Determine which APIC pin an ISA/EISA INT is attached to.
1489 */
1490#define INTTYPE(I)	(io_apic_ints[(I)].int_type)
1491#define INTPIN(I)	(io_apic_ints[(I)].dst_apic_int)
1492#define INTIRQ(I)	(io_apic_ints[(I)].int_vector)
1493#define INTAPIC(I)	(ID_TO_IO(io_apic_ints[(I)].dst_apic_id))
1494
1495#define SRCBUSIRQ(I)	(io_apic_ints[(I)].src_bus_irq)
1496int
1497isa_apic_irq(int isa_irq)
1498{
1499	int     intr;
1500
1501	for (intr = 0; intr < nintrs; ++intr) {		/* check each record */
1502		if (INTTYPE(intr) == 0) {		/* standard INT */
1503			if (SRCBUSIRQ(intr) == isa_irq) {
1504				if (apic_int_is_bus_type(intr, ISA) ||
1505			            apic_int_is_bus_type(intr, EISA)) {
1506					if (INTIRQ(intr) == 0xff)
1507						return -1; /* unassigned */
1508					return INTIRQ(intr);	/* found */
1509				}
1510			}
1511		}
1512	}
1513	return -1;					/* NOT found */
1514}
1515
1516
1517/*
1518 * Determine which APIC pin a PCI INT is attached to.
1519 */
1520#define SRCBUSID(I)	(io_apic_ints[(I)].src_bus_id)
1521#define SRCBUSDEVICE(I)	((io_apic_ints[(I)].src_bus_irq >> 2) & 0x1f)
1522#define SRCBUSLINE(I)	(io_apic_ints[(I)].src_bus_irq & 0x03)
1523int
1524pci_apic_irq(int pciBus, int pciDevice, int pciInt)
1525{
1526	int     intr;
1527
1528	--pciInt;					/* zero based */
1529
1530	for (intr = 0; intr < nintrs; ++intr)		/* check each record */
1531		if ((INTTYPE(intr) == 0)		/* standard INT */
1532		    && (SRCBUSID(intr) == pciBus)
1533		    && (SRCBUSDEVICE(intr) == pciDevice)
1534		    && (SRCBUSLINE(intr) == pciInt))	/* a candidate IRQ */
1535			if (apic_int_is_bus_type(intr, PCI)) {
1536				if (INTIRQ(intr) == 0xff)
1537					allocate_apic_irq(intr);
1538				if (INTIRQ(intr) == 0xff)
1539					return -1;	/* unassigned */
1540				return INTIRQ(intr);	/* exact match */
1541			}
1542
1543	return -1;					/* NOT found */
1544}
1545
1546int
1547next_apic_irq(int irq)
1548{
1549	int intr, ointr;
1550	int bus, bustype;
1551
1552	bus = 0;
1553	bustype = 0;
1554	for (intr = 0; intr < nintrs; intr++) {
1555		if (INTIRQ(intr) != irq || INTTYPE(intr) != 0)
1556			continue;
1557		bus = SRCBUSID(intr);
1558		bustype = apic_bus_type(bus);
1559		if (bustype != ISA &&
1560		    bustype != EISA &&
1561		    bustype != PCI)
1562			continue;
1563		break;
1564	}
1565	if (intr >= nintrs) {
1566		return -1;
1567	}
1568	for (ointr = intr + 1; ointr < nintrs; ointr++) {
1569		if (INTTYPE(ointr) != 0)
1570			continue;
1571		if (bus != SRCBUSID(ointr))
1572			continue;
1573		if (bustype == PCI) {
1574			if (SRCBUSDEVICE(intr) != SRCBUSDEVICE(ointr))
1575				continue;
1576			if (SRCBUSLINE(intr) != SRCBUSLINE(ointr))
1577				continue;
1578		}
1579		if (bustype == ISA || bustype == EISA) {
1580			if (SRCBUSIRQ(intr) != SRCBUSIRQ(ointr))
1581				continue;
1582		}
1583		if (INTPIN(intr) == INTPIN(ointr))
1584			continue;
1585		break;
1586	}
1587	if (ointr >= nintrs) {
1588		return -1;
1589	}
1590	return INTIRQ(ointr);
1591}
1592#undef SRCBUSLINE
1593#undef SRCBUSDEVICE
1594#undef SRCBUSID
1595#undef SRCBUSIRQ
1596
1597#undef INTPIN
1598#undef INTIRQ
1599#undef INTAPIC
1600#undef INTTYPE
1601
1602
1603/*
1604 * Reprogram the MB chipset to NOT redirect an ISA INTerrupt.
1605 *
1606 * XXX FIXME:
1607 *  Exactly what this means is unclear at this point.  It is a solution
1608 *  for motherboards that redirect the MBIRQ0 pin.  Generically a motherboard
1609 *  could route any of the ISA INTs to upper (>15) IRQ values.  But most would
1610 *  NOT be redirected via MBIRQ0, thus "undirect()ing" them would NOT be an
1611 *  option.
1612 */
1613int
1614undirect_isa_irq(int rirq)
1615{
1616#if defined(READY)
1617	if (bootverbose)
1618	    printf("Freeing redirected ISA irq %d.\n", rirq);
1619	/** FIXME: tickle the MB redirector chip */
1620	return -1;
1621#else
1622	if (bootverbose)
1623	    printf("Freeing (NOT implemented) redirected ISA irq %d.\n", rirq);
1624	return 0;
1625#endif  /* READY */
1626}
1627
1628
1629/*
1630 * Reprogram the MB chipset to NOT redirect a PCI INTerrupt
1631 */
1632int
1633undirect_pci_irq(int rirq)
1634{
1635#if defined(READY)
1636	if (bootverbose)
1637		printf("Freeing redirected PCI irq %d.\n", rirq);
1638
1639	/** FIXME: tickle the MB redirector chip */
1640	return -1;
1641#else
1642	if (bootverbose)
1643		printf("Freeing (NOT implemented) redirected PCI irq %d.\n",
1644		       rirq);
1645	return 0;
1646#endif  /* READY */
1647}
1648
1649
1650/*
1651 * given a bus ID, return:
1652 *  the bus type if found
1653 *  -1 if NOT found
1654 */
1655int
1656apic_bus_type(int id)
1657{
1658	int     x;
1659
1660	for (x = 0; x < mp_nbusses; ++x)
1661		if (bus_data[x].bus_id == id)
1662			return bus_data[x].bus_type;
1663
1664	return -1;
1665}
1666
1667
1668/*
1669 * given a LOGICAL APIC# and pin#, return:
1670 *  the associated src bus ID if found
1671 *  -1 if NOT found
1672 */
1673int
1674apic_src_bus_id(int apic, int pin)
1675{
1676	int     x;
1677
1678	/* search each of the possible INTerrupt sources */
1679	for (x = 0; x < nintrs; ++x)
1680		if ((apic == ID_TO_IO(io_apic_ints[x].dst_apic_id)) &&
1681		    (pin == io_apic_ints[x].dst_apic_int))
1682			return (io_apic_ints[x].src_bus_id);
1683
1684	return -1;		/* NOT found */
1685}
1686
1687
1688/*
1689 * given a LOGICAL APIC# and pin#, return:
1690 *  the associated src bus IRQ if found
1691 *  -1 if NOT found
1692 */
1693int
1694apic_src_bus_irq(int apic, int pin)
1695{
1696	int     x;
1697
1698	for (x = 0; x < nintrs; x++)
1699		if ((apic == ID_TO_IO(io_apic_ints[x].dst_apic_id)) &&
1700		    (pin == io_apic_ints[x].dst_apic_int))
1701			return (io_apic_ints[x].src_bus_irq);
1702
1703	return -1;		/* NOT found */
1704}
1705
1706
1707/*
1708 * given a LOGICAL APIC# and pin#, return:
1709 *  the associated INTerrupt type if found
1710 *  -1 if NOT found
1711 */
1712int
1713apic_int_type(int apic, int pin)
1714{
1715	int     x;
1716
1717	/* search each of the possible INTerrupt sources */
1718	for (x = 0; x < nintrs; ++x)
1719		if ((apic == ID_TO_IO(io_apic_ints[x].dst_apic_id)) &&
1720		    (pin == io_apic_ints[x].dst_apic_int))
1721			return (io_apic_ints[x].int_type);
1722
1723	return -1;		/* NOT found */
1724}
1725
1726int
1727apic_irq(int apic, int pin)
1728{
1729	int x;
1730	int res;
1731
1732	for (x = 0; x < nintrs; ++x)
1733		if ((apic == ID_TO_IO(io_apic_ints[x].dst_apic_id)) &&
1734		    (pin == io_apic_ints[x].dst_apic_int)) {
1735			res = io_apic_ints[x].int_vector;
1736			if (res == 0xff)
1737				return -1;
1738			if (apic != int_to_apicintpin[res].ioapic)
1739				panic("apic_irq: inconsistent table");
1740			if (pin != int_to_apicintpin[res].int_pin)
1741				panic("apic_irq inconsistent table (2)");
1742			return res;
1743		}
1744	return -1;
1745}
1746
1747
1748/*
1749 * given a LOGICAL APIC# and pin#, return:
1750 *  the associated trigger mode if found
1751 *  -1 if NOT found
1752 */
1753int
1754apic_trigger(int apic, int pin)
1755{
1756	int     x;
1757
1758	/* search each of the possible INTerrupt sources */
1759	for (x = 0; x < nintrs; ++x)
1760		if ((apic == ID_TO_IO(io_apic_ints[x].dst_apic_id)) &&
1761		    (pin == io_apic_ints[x].dst_apic_int))
1762			return ((io_apic_ints[x].int_flags >> 2) & 0x03);
1763
1764	return -1;		/* NOT found */
1765}
1766
1767
1768/*
1769 * given a LOGICAL APIC# and pin#, return:
1770 *  the associated 'active' level if found
1771 *  -1 if NOT found
1772 */
1773int
1774apic_polarity(int apic, int pin)
1775{
1776	int     x;
1777
1778	/* search each of the possible INTerrupt sources */
1779	for (x = 0; x < nintrs; ++x)
1780		if ((apic == ID_TO_IO(io_apic_ints[x].dst_apic_id)) &&
1781		    (pin == io_apic_ints[x].dst_apic_int))
1782			return (io_apic_ints[x].int_flags & 0x03);
1783
1784	return -1;		/* NOT found */
1785}
1786
1787
1788/*
1789 * set data according to MP defaults
1790 * FIXME: probably not complete yet...
1791 */
1792static void
1793default_mp_table(int type)
1794{
1795	int     ap_cpu_id;
1796#if defined(APIC_IO)
1797	int     io_apic_id;
1798	int     pin;
1799#endif	/* APIC_IO */
1800
1801#if 0
1802	printf("  MP default config type: %d\n", type);
1803	switch (type) {
1804	case 1:
1805		printf("   bus: ISA, APIC: 82489DX\n");
1806		break;
1807	case 2:
1808		printf("   bus: EISA, APIC: 82489DX\n");
1809		break;
1810	case 3:
1811		printf("   bus: EISA, APIC: 82489DX\n");
1812		break;
1813	case 4:
1814		printf("   bus: MCA, APIC: 82489DX\n");
1815		break;
1816	case 5:
1817		printf("   bus: ISA+PCI, APIC: Integrated\n");
1818		break;
1819	case 6:
1820		printf("   bus: EISA+PCI, APIC: Integrated\n");
1821		break;
1822	case 7:
1823		printf("   bus: MCA+PCI, APIC: Integrated\n");
1824		break;
1825	default:
1826		printf("   future type\n");
1827		break;
1828		/* NOTREACHED */
1829	}
1830#endif	/* 0 */
1831
1832	boot_cpu_id = (lapic.id & APIC_ID_MASK) >> 24;
1833	ap_cpu_id = (boot_cpu_id == 0) ? 1 : 0;
1834
1835	/* BSP */
1836	CPU_TO_ID(0) = boot_cpu_id;
1837	ID_TO_CPU(boot_cpu_id) = 0;
1838
1839	/* one and only AP */
1840	CPU_TO_ID(1) = ap_cpu_id;
1841	ID_TO_CPU(ap_cpu_id) = 1;
1842
1843#if defined(APIC_IO)
1844	/* one and only IO APIC */
1845	io_apic_id = (io_apic_read(0, IOAPIC_ID) & APIC_ID_MASK) >> 24;
1846
1847	/*
1848	 * sanity check, refer to MP spec section 3.6.6, last paragraph
1849	 * necessary as some hardware isn't properly setting up the IO APIC
1850	 */
1851#if defined(REALLY_ANAL_IOAPICID_VALUE)
1852	if (io_apic_id != 2) {
1853#else
1854	if ((io_apic_id == 0) || (io_apic_id == 1) || (io_apic_id == 15)) {
1855#endif	/* REALLY_ANAL_IOAPICID_VALUE */
1856		io_apic_set_id(0, 2);
1857		io_apic_id = 2;
1858	}
1859	IO_TO_ID(0) = io_apic_id;
1860	ID_TO_IO(io_apic_id) = 0;
1861#endif	/* APIC_IO */
1862
1863	/* fill out bus entries */
1864	switch (type) {
1865	case 1:
1866	case 2:
1867	case 3:
1868	case 4:
1869	case 5:
1870	case 6:
1871	case 7:
1872		bus_data[0].bus_id = default_data[type - 1][1];
1873		bus_data[0].bus_type = default_data[type - 1][2];
1874		bus_data[1].bus_id = default_data[type - 1][3];
1875		bus_data[1].bus_type = default_data[type - 1][4];
1876		break;
1877
1878	/* case 4: case 7:		   MCA NOT supported */
1879	default:		/* illegal/reserved */
1880		panic("BAD default MP config: %d", type);
1881		/* NOTREACHED */
1882	}
1883
1884#if defined(APIC_IO)
1885	/* general cases from MP v1.4, table 5-2 */
1886	for (pin = 0; pin < 16; ++pin) {
1887		io_apic_ints[pin].int_type = 0;
1888		io_apic_ints[pin].int_flags = 0x05;	/* edge/active-hi */
1889		io_apic_ints[pin].src_bus_id = 0;
1890		io_apic_ints[pin].src_bus_irq = pin;	/* IRQ2 caught below */
1891		io_apic_ints[pin].dst_apic_id = io_apic_id;
1892		io_apic_ints[pin].dst_apic_int = pin;	/* 1-to-1 */
1893	}
1894
1895	/* special cases from MP v1.4, table 5-2 */
1896	if (type == 2) {
1897		io_apic_ints[2].int_type = 0xff;	/* N/C */
1898		io_apic_ints[13].int_type = 0xff;	/* N/C */
1899#if !defined(APIC_MIXED_MODE)
1900		/** FIXME: ??? */
1901		panic("sorry, can't support type 2 default yet");
1902#endif	/* APIC_MIXED_MODE */
1903	}
1904	else
1905		io_apic_ints[2].src_bus_irq = 0;	/* ISA IRQ0 is on APIC INT 2 */
1906
1907	if (type == 7)
1908		io_apic_ints[0].int_type = 0xff;	/* N/C */
1909	else
1910		io_apic_ints[0].int_type = 3;	/* vectored 8259 */
1911#endif	/* APIC_IO */
1912}
1913
1914
1915/*
1916 * start each AP in our list
1917 */
1918static int
1919start_all_aps(u_int boot_addr)
1920{
1921	int     x, i, pg;
1922	u_char  mpbiosreason;
1923	u_long  mpbioswarmvec;
1924	struct pcpu *pc;
1925	char *stack;
1926	uintptr_t kptbase;
1927
1928	POSTCODE(START_ALL_APS_POST);
1929
1930	mtx_init(&ap_boot_mtx, "ap boot", MTX_SPIN);
1931
1932	/* initialize BSP's local APIC */
1933	apic_initialize();
1934	bsp_apic_ready = 1;
1935
1936	/* install the AP 1st level boot code */
1937	install_ap_tramp(boot_addr);
1938
1939
1940	/* save the current value of the warm-start vector */
1941	mpbioswarmvec = *((u_long *) WARMBOOT_OFF);
1942#ifndef PC98
1943	outb(CMOS_REG, BIOS_RESET);
1944	mpbiosreason = inb(CMOS_DATA);
1945#endif
1946
1947	/* set up temporary P==V mapping for AP boot */
1948	/* XXX this is a hack, we should boot the AP on its own stack/PTD */
1949	kptbase = (uintptr_t)(void *)KPTphys;
1950	for (x = 0; x < NKPT; x++)
1951		PTD[x] = (pd_entry_t)(PG_V | PG_RW |
1952		    ((kptbase + x * PAGE_SIZE) & PG_FRAME));
1953	invltlb();
1954
1955	/* start each AP */
1956	for (x = 1; x <= mp_naps; ++x) {
1957
1958		/* This is a bit verbose, it will go away soon.  */
1959
1960		/* first page of AP's private space */
1961		pg = x * i386_btop(sizeof(struct privatespace));
1962
1963		/* allocate a new private data page */
1964		pc = (struct pcpu *)kmem_alloc(kernel_map, PAGE_SIZE);
1965
1966		/* wire it into the private page table page */
1967		SMPpt[pg] = (pt_entry_t)(PG_V | PG_RW | vtophys(pc));
1968
1969		/* allocate and set up an idle stack data page */
1970		stack = (char *)kmem_alloc(kernel_map, KSTACK_PAGES * PAGE_SIZE); /* XXXKSE */
1971		for (i = 0; i < KSTACK_PAGES; i++)
1972			SMPpt[pg + 1 + i] = (pt_entry_t)
1973			    (PG_V | PG_RW | vtophys(PAGE_SIZE * i + stack));
1974
1975		/* prime data page for it to use */
1976		pcpu_init(pc, x, sizeof(struct pcpu));
1977
1978		/* setup a vector to our boot code */
1979		*((volatile u_short *) WARMBOOT_OFF) = WARMBOOT_TARGET;
1980		*((volatile u_short *) WARMBOOT_SEG) = (boot_addr >> 4);
1981#ifndef PC98
1982		outb(CMOS_REG, BIOS_RESET);
1983		outb(CMOS_DATA, BIOS_WARM);	/* 'warm-start' */
1984#endif
1985
1986		bootSTK = &SMP_prvspace[x].idlekstack[KSTACK_PAGES * PAGE_SIZE];
1987		bootAP = x;
1988
1989		/* attempt to start the Application Processor */
1990		CHECK_INIT(99);	/* setup checkpoints */
1991		if (!start_ap(x, boot_addr)) {
1992			printf("AP #%d (PHY# %d) failed!\n", x, CPU_TO_ID(x));
1993			CHECK_PRINT("trace");	/* show checkpoints */
1994			/* better panic as the AP may be running loose */
1995			printf("panic y/n? [y] ");
1996			if (cngetc() != 'n')
1997				panic("bye-bye");
1998		}
1999		CHECK_PRINT("trace");		/* show checkpoints */
2000
2001		/* record its version info */
2002		cpu_apic_versions[x] = cpu_apic_versions[0];
2003
2004		all_cpus |= (1 << x);		/* record AP in CPU map */
2005	}
2006
2007	/* build our map of 'other' CPUs */
2008	PCPU_SET(other_cpus, all_cpus & ~PCPU_GET(cpumask));
2009
2010	/* fill in our (BSP) APIC version */
2011	cpu_apic_versions[0] = lapic.version;
2012
2013	/* restore the warmstart vector */
2014	*(u_long *) WARMBOOT_OFF = mpbioswarmvec;
2015#ifndef PC98
2016	outb(CMOS_REG, BIOS_RESET);
2017	outb(CMOS_DATA, mpbiosreason);
2018#endif
2019
2020	/*
2021	 * Set up the idle context for the BSP.  Similar to above except
2022	 * that some was done by locore, some by pmap.c and some is implicit
2023	 * because the BSP is cpu#0 and the page is initially zero, and also
2024	 * because we can refer to variables by name on the BSP..
2025	 */
2026
2027	/* Allocate and setup BSP idle stack */
2028	stack = (char *)kmem_alloc(kernel_map, KSTACK_PAGES * PAGE_SIZE);
2029	for (i = 0; i < KSTACK_PAGES; i++)
2030		SMPpt[1 + i] = (pt_entry_t)
2031		    (PG_V | PG_RW | vtophys(PAGE_SIZE * i + stack));
2032
2033	for (x = 0; x < NKPT; x++)
2034		PTD[x] = 0;
2035	pmap_set_opt();
2036
2037	/* number of APs actually started */
2038	return mp_ncpus - 1;
2039}
2040
2041
2042/*
2043 * load the 1st level AP boot code into base memory.
2044 */
2045
2046/* targets for relocation */
2047extern void bigJump(void);
2048extern void bootCodeSeg(void);
2049extern void bootDataSeg(void);
2050extern void MPentry(void);
2051extern u_int MP_GDT;
2052extern u_int mp_gdtbase;
2053
2054static void
2055install_ap_tramp(u_int boot_addr)
2056{
2057	int     x;
2058	int     size = *(int *) ((u_long) & bootMP_size);
2059	u_char *src = (u_char *) ((u_long) bootMP);
2060	u_char *dst = (u_char *) boot_addr + KERNBASE;
2061	u_int   boot_base = (u_int) bootMP;
2062	u_int8_t *dst8;
2063	u_int16_t *dst16;
2064	u_int32_t *dst32;
2065
2066	POSTCODE(INSTALL_AP_TRAMP_POST);
2067
2068	for (x = 0; x < size; ++x)
2069		*dst++ = *src++;
2070
2071	/*
2072	 * modify addresses in code we just moved to basemem. unfortunately we
2073	 * need fairly detailed info about mpboot.s for this to work.  changes
2074	 * to mpboot.s might require changes here.
2075	 */
2076
2077	/* boot code is located in KERNEL space */
2078	dst = (u_char *) boot_addr + KERNBASE;
2079
2080	/* modify the lgdt arg */
2081	dst32 = (u_int32_t *) (dst + ((u_int) & mp_gdtbase - boot_base));
2082	*dst32 = boot_addr + ((u_int) & MP_GDT - boot_base);
2083
2084	/* modify the ljmp target for MPentry() */
2085	dst32 = (u_int32_t *) (dst + ((u_int) bigJump - boot_base) + 1);
2086	*dst32 = ((u_int) MPentry - KERNBASE);
2087
2088	/* modify the target for boot code segment */
2089	dst16 = (u_int16_t *) (dst + ((u_int) bootCodeSeg - boot_base));
2090	dst8 = (u_int8_t *) (dst16 + 1);
2091	*dst16 = (u_int) boot_addr & 0xffff;
2092	*dst8 = ((u_int) boot_addr >> 16) & 0xff;
2093
2094	/* modify the target for boot data segment */
2095	dst16 = (u_int16_t *) (dst + ((u_int) bootDataSeg - boot_base));
2096	dst8 = (u_int8_t *) (dst16 + 1);
2097	*dst16 = (u_int) boot_addr & 0xffff;
2098	*dst8 = ((u_int) boot_addr >> 16) & 0xff;
2099}
2100
2101
2102/*
2103 * this function starts the AP (application processor) identified
2104 * by the APIC ID 'physicalCpu'.  It does quite a "song and dance"
2105 * to accomplish this.  This is necessary because of the nuances
2106 * of the different hardware we might encounter.  It ain't pretty,
2107 * but it seems to work.
2108 */
2109static int
2110start_ap(int logical_cpu, u_int boot_addr)
2111{
2112	int     physical_cpu;
2113	int     vector;
2114	int     cpus;
2115	u_long  icr_lo, icr_hi;
2116
2117	POSTCODE(START_AP_POST);
2118
2119	/* get the PHYSICAL APIC ID# */
2120	physical_cpu = CPU_TO_ID(logical_cpu);
2121
2122	/* calculate the vector */
2123	vector = (boot_addr >> 12) & 0xff;
2124
2125	/* used as a watchpoint to signal AP startup */
2126	cpus = mp_ncpus;
2127
2128	/*
2129	 * first we do an INIT/RESET IPI this INIT IPI might be run, reseting
2130	 * and running the target CPU. OR this INIT IPI might be latched (P5
2131	 * bug), CPU waiting for STARTUP IPI. OR this INIT IPI might be
2132	 * ignored.
2133	 */
2134
2135	/* setup the address for the target AP */
2136	icr_hi = lapic.icr_hi & ~APIC_ID_MASK;
2137	icr_hi |= (physical_cpu << 24);
2138	lapic.icr_hi = icr_hi;
2139
2140	/* do an INIT IPI: assert RESET */
2141	icr_lo = lapic.icr_lo & 0xfff00000;
2142	lapic.icr_lo = icr_lo | 0x0000c500;
2143
2144	/* wait for pending status end */
2145	while (lapic.icr_lo & APIC_DELSTAT_MASK)
2146		 /* spin */ ;
2147
2148	/* do an INIT IPI: deassert RESET */
2149	lapic.icr_lo = icr_lo | 0x00008500;
2150
2151	/* wait for pending status end */
2152	u_sleep(10000);		/* wait ~10mS */
2153	while (lapic.icr_lo & APIC_DELSTAT_MASK)
2154		 /* spin */ ;
2155
2156	/*
2157	 * next we do a STARTUP IPI: the previous INIT IPI might still be
2158	 * latched, (P5 bug) this 1st STARTUP would then terminate
2159	 * immediately, and the previously started INIT IPI would continue. OR
2160	 * the previous INIT IPI has already run. and this STARTUP IPI will
2161	 * run. OR the previous INIT IPI was ignored. and this STARTUP IPI
2162	 * will run.
2163	 */
2164
2165	/* do a STARTUP IPI */
2166	lapic.icr_lo = icr_lo | 0x00000600 | vector;
2167	while (lapic.icr_lo & APIC_DELSTAT_MASK)
2168		 /* spin */ ;
2169	u_sleep(200);		/* wait ~200uS */
2170
2171	/*
2172	 * finally we do a 2nd STARTUP IPI: this 2nd STARTUP IPI should run IF
2173	 * the previous STARTUP IPI was cancelled by a latched INIT IPI. OR
2174	 * this STARTUP IPI will be ignored, as only ONE STARTUP IPI is
2175	 * recognized after hardware RESET or INIT IPI.
2176	 */
2177
2178	lapic.icr_lo = icr_lo | 0x00000600 | vector;
2179	while (lapic.icr_lo & APIC_DELSTAT_MASK)
2180		 /* spin */ ;
2181	u_sleep(200);		/* wait ~200uS */
2182
2183	/* wait for it to start */
2184	set_apic_timer(5000000);/* == 5 seconds */
2185	while (read_apic_timer())
2186		if (mp_ncpus > cpus)
2187			return 1;	/* return SUCCESS */
2188
2189	return 0;		/* return FAILURE */
2190}
2191
2192/*
2193 * Flush the TLB on all other CPU's
2194 *
2195 * XXX: Needs to handshake and wait for completion before proceding.
2196 */
2197void
2198smp_invltlb(void)
2199{
2200#if defined(APIC_IO)
2201	if (smp_started)
2202		ipi_all_but_self(IPI_INVLTLB);
2203#endif  /* APIC_IO */
2204}
2205
2206void
2207invlpg(u_int addr)
2208{
2209	__asm   __volatile("invlpg (%0)"::"r"(addr):"memory");
2210
2211	/* send a message to the other CPUs */
2212	smp_invltlb();
2213}
2214
2215void
2216invltlb(void)
2217{
2218	u_long  temp;
2219
2220	/*
2221	 * This should be implemented as load_cr3(rcr3()) when load_cr3() is
2222	 * inlined.
2223	 */
2224	__asm __volatile("movl %%cr3, %0; movl %0, %%cr3":"=r"(temp) :: "memory");
2225
2226	/* send a message to the other CPUs */
2227	smp_invltlb();
2228}
2229
2230
2231/*
2232 * This is called once the rest of the system is up and running and we're
2233 * ready to let the AP's out of the pen.
2234 */
2235extern void	enable_sse(void);
2236
2237void
2238ap_init(void)
2239{
2240	u_int	apic_id;
2241
2242	/* spin until all the AP's are ready */
2243	while (!aps_ready)
2244		/* spin */ ;
2245
2246	/* BSP may have changed PTD while we were waiting */
2247	cpu_invltlb();
2248
2249#if defined(I586_CPU) && !defined(NO_F00F_HACK)
2250	lidt(&r_idt);
2251#endif
2252
2253	/* set up CPU registers and state */
2254	cpu_setregs();
2255
2256	/* set up FPU state on the AP */
2257	npxinit(__INITIAL_NPXCW__);
2258
2259	/* set up SSE registers */
2260	enable_sse();
2261
2262	/* A quick check from sanity claus */
2263	apic_id = (apic_id_to_logical[(lapic.id & 0x0f000000) >> 24]);
2264	if (PCPU_GET(cpuid) != apic_id) {
2265		printf("SMP: cpuid = %d\n", PCPU_GET(cpuid));
2266		printf("SMP: apic_id = %d\n", apic_id);
2267		printf("PTD[MPPTDI] = %p\n", (void *)PTD[MPPTDI]);
2268		panic("cpuid mismatch! boom!!");
2269	}
2270
2271	/* Init local apic for irq's */
2272	apic_initialize();
2273
2274	/* Set memory range attributes for this CPU to match the BSP */
2275	mem_range_AP_init();
2276
2277	mtx_lock_spin(&ap_boot_mtx);
2278
2279	CTR1(KTR_SMP, "SMP: AP CPU #%d Launched", PCPU_GET(cpuid));
2280
2281	smp_cpus++;
2282
2283	/* Build our map of 'other' CPUs. */
2284	PCPU_SET(other_cpus, all_cpus & ~PCPU_GET(cpumask));
2285
2286	printf("SMP: AP CPU #%d Launched!\n", PCPU_GET(cpuid));
2287
2288	if (smp_cpus == mp_ncpus) {
2289		smp_started = 1; /* enable IPI's, tlb shootdown, freezes etc */
2290		smp_active = 1;	 /* historic */
2291	}
2292
2293	mtx_unlock_spin(&ap_boot_mtx);
2294
2295	/* wait until all the AP's are up */
2296	while (smp_started == 0)
2297		; /* nothing */
2298
2299	binuptime(PCPU_PTR(switchtime));
2300	PCPU_SET(switchticks, ticks);
2301
2302	/* ok, now grab sched_lock and enter the scheduler */
2303	mtx_lock_spin(&sched_lock);
2304	cpu_throw();	/* doesn't return */
2305
2306	panic("scheduler returned us to %s", __func__);
2307}
2308
2309/*
2310 * For statclock, we send an IPI to all CPU's to have them call this
2311 * function.
2312 */
2313void
2314forwarded_statclock(struct trapframe frame)
2315{
2316
2317	mtx_lock_spin(&sched_lock);
2318	statclock_process(curthread->td_kse, TRAPF_PC(&frame), TRAPF_USERMODE(&frame));
2319	mtx_unlock_spin(&sched_lock);
2320}
2321
2322void
2323forward_statclock(void)
2324{
2325	int map;
2326
2327	CTR0(KTR_SMP, "forward_statclock");
2328
2329	if (!smp_started || cold || panicstr)
2330		return;
2331
2332	map = PCPU_GET(other_cpus) & ~stopped_cpus ;
2333	if (map != 0)
2334		ipi_selected(map, IPI_STATCLOCK);
2335}
2336
2337/*
2338 * For each hardclock(), we send an IPI to all other CPU's to have them
2339 * execute this function.  It would be nice to reduce contention on
2340 * sched_lock if we could simply peek at the CPU to determine the user/kernel
2341 * state and call hardclock_process() on the CPU receiving the clock interrupt
2342 * and then just use a simple IPI to handle any ast's if needed.
2343 */
2344void
2345forwarded_hardclock(struct trapframe frame)
2346{
2347
2348	mtx_lock_spin(&sched_lock);
2349	hardclock_process(curthread, TRAPF_USERMODE(&frame));
2350	mtx_unlock_spin(&sched_lock);
2351}
2352
2353void
2354forward_hardclock(void)
2355{
2356	u_int map;
2357
2358	CTR0(KTR_SMP, "forward_hardclock");
2359
2360	if (!smp_started || cold || panicstr)
2361		return;
2362
2363	map = PCPU_GET(other_cpus) & ~stopped_cpus ;
2364	if (map != 0)
2365		ipi_selected(map, IPI_HARDCLOCK);
2366}
2367
2368#ifdef APIC_INTR_REORDER
2369/*
2370 *	Maintain mapping from softintr vector to isr bit in local apic.
2371 */
2372void
2373set_lapic_isrloc(int intr, int vector)
2374{
2375	if (intr < 0 || intr > 32)
2376		panic("set_apic_isrloc: bad intr argument: %d",intr);
2377	if (vector < ICU_OFFSET || vector > 255)
2378		panic("set_apic_isrloc: bad vector argument: %d",vector);
2379	apic_isrbit_location[intr].location = &lapic.isr0 + ((vector>>5)<<2);
2380	apic_isrbit_location[intr].bit = (1<<(vector & 31));
2381}
2382#endif
2383
2384/*
2385 * send an IPI to a set of cpus.
2386 */
2387void
2388ipi_selected(u_int32_t cpus, u_int ipi)
2389{
2390
2391	CTR3(KTR_SMP, "%s: cpus: %x ipi: %x", __func__, cpus, ipi);
2392	selected_apic_ipi(cpus, ipi, APIC_DELMODE_FIXED);
2393}
2394
2395/*
2396 * send an IPI INTerrupt containing 'vector' to all CPUs, including myself
2397 */
2398void
2399ipi_all(u_int ipi)
2400{
2401
2402	CTR2(KTR_SMP, "%s: ipi: %x", __func__, ipi);
2403	apic_ipi(APIC_DEST_ALLISELF, ipi, APIC_DELMODE_FIXED);
2404}
2405
2406/*
2407 * send an IPI to all CPUs EXCEPT myself
2408 */
2409void
2410ipi_all_but_self(u_int ipi)
2411{
2412
2413	CTR2(KTR_SMP, "%s: ipi: %x", __func__, ipi);
2414	apic_ipi(APIC_DEST_ALLESELF, ipi, APIC_DELMODE_FIXED);
2415}
2416
2417/*
2418 * send an IPI to myself
2419 */
2420void
2421ipi_self(u_int ipi)
2422{
2423
2424	CTR2(KTR_SMP, "%s: ipi: %x", __func__, ipi);
2425	apic_ipi(APIC_DEST_SELF, ipi, APIC_DELMODE_FIXED);
2426}
2427
2428void
2429release_aps(void *dummy __unused)
2430{
2431	atomic_store_rel_int(&aps_ready, 1);
2432}
2433
2434SYSINIT(start_aps, SI_SUB_SMP, SI_ORDER_FIRST, release_aps, NULL);
2435