mp_machdep.c revision 105216
1/*
2 * Copyright (c) 1996, by Steve Passe
3 * All rights reserved.
4 *
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions
7 * are met:
8 * 1. Redistributions of source code must retain the above copyright
9 *    notice, this list of conditions and the following disclaimer.
10 * 2. The name of the developer may NOT be used to endorse or promote products
11 *    derived from this software without specific prior written permission.
12 *
13 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
14 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
15 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
16 * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
17 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
18 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
19 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
20 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
21 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
22 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
23 * SUCH DAMAGE.
24 *
25 * $FreeBSD: head/sys/amd64/amd64/mp_machdep.c 105216 2002-10-16 08:57:14Z phk $
26 */
27
28#include "opt_cpu.h"
29#include "opt_kstack_pages.h"
30
31#ifdef SMP
32#include <machine/smptests.h>
33#else
34#error
35#endif
36
37#include <sys/param.h>
38#include <sys/systm.h>
39#include <sys/bus.h>
40#include <sys/cons.h>	/* cngetc() */
41#include <sys/dkstat.h>
42#ifdef GPROF
43#include <sys/gmon.h>
44#endif
45#include <sys/kernel.h>
46#include <sys/ktr.h>
47#include <sys/lock.h>
48#include <sys/malloc.h>
49#include <sys/memrange.h>
50#include <sys/mutex.h>
51#include <sys/pcpu.h>
52#include <sys/proc.h>
53#include <sys/smp.h>
54#include <sys/sysctl.h>
55#include <sys/user.h>
56
57#include <vm/vm.h>
58#include <vm/vm_param.h>
59#include <vm/pmap.h>
60#include <vm/vm_kern.h>
61#include <vm/vm_extern.h>
62#include <vm/vm_map.h>
63
64#include <machine/apic.h>
65#include <machine/atomic.h>
66#include <machine/cpu.h>
67#include <machine/cpufunc.h>
68#include <machine/mpapic.h>
69#include <machine/psl.h>
70#include <machine/segments.h>
71#include <machine/smp.h>
72#include <machine/smptests.h>	/** TEST_DEFAULT_CONFIG, TEST_TEST1 */
73#include <machine/tss.h>
74#include <machine/specialreg.h>
75#include <machine/privatespace.h>
76
77#if defined(APIC_IO)
78#include <machine/md_var.h>		/* setidt() */
79#include <i386/isa/icu.h>		/* IPIs */
80#include <i386/isa/intr_machdep.h>	/* IPIs */
81#endif	/* APIC_IO */
82
83#if defined(TEST_DEFAULT_CONFIG)
84#define MPFPS_MPFB1	TEST_DEFAULT_CONFIG
85#else
86#define MPFPS_MPFB1	mpfps->mpfb1
87#endif  /* TEST_DEFAULT_CONFIG */
88
89#define WARMBOOT_TARGET		0
90#define WARMBOOT_OFF		(KERNBASE + 0x0467)
91#define WARMBOOT_SEG		(KERNBASE + 0x0469)
92
93#ifdef PC98
94#define BIOS_BASE		(0xe8000)
95#define BIOS_SIZE		(0x18000)
96#else
97#define BIOS_BASE		(0xf0000)
98#define BIOS_SIZE		(0x10000)
99#endif
100#define BIOS_COUNT		(BIOS_SIZE/4)
101
102#define CMOS_REG		(0x70)
103#define CMOS_DATA		(0x71)
104#define BIOS_RESET		(0x0f)
105#define BIOS_WARM		(0x0a)
106
107#define PROCENTRY_FLAG_EN	0x01
108#define PROCENTRY_FLAG_BP	0x02
109#define IOAPICENTRY_FLAG_EN	0x01
110
111
112/* MP Floating Pointer Structure */
113typedef struct MPFPS {
114	char    signature[4];
115	void   *pap;
116	u_char  length;
117	u_char  spec_rev;
118	u_char  checksum;
119	u_char  mpfb1;
120	u_char  mpfb2;
121	u_char  mpfb3;
122	u_char  mpfb4;
123	u_char  mpfb5;
124}      *mpfps_t;
125
126/* MP Configuration Table Header */
127typedef struct MPCTH {
128	char    signature[4];
129	u_short base_table_length;
130	u_char  spec_rev;
131	u_char  checksum;
132	u_char  oem_id[8];
133	u_char  product_id[12];
134	void   *oem_table_pointer;
135	u_short oem_table_size;
136	u_short entry_count;
137	void   *apic_address;
138	u_short extended_table_length;
139	u_char  extended_table_checksum;
140	u_char  reserved;
141}      *mpcth_t;
142
143
144typedef struct PROCENTRY {
145	u_char  type;
146	u_char  apic_id;
147	u_char  apic_version;
148	u_char  cpu_flags;
149	u_long  cpu_signature;
150	u_long  feature_flags;
151	u_long  reserved1;
152	u_long  reserved2;
153}      *proc_entry_ptr;
154
155typedef struct BUSENTRY {
156	u_char  type;
157	u_char  bus_id;
158	char    bus_type[6];
159}      *bus_entry_ptr;
160
161typedef struct IOAPICENTRY {
162	u_char  type;
163	u_char  apic_id;
164	u_char  apic_version;
165	u_char  apic_flags;
166	void   *apic_address;
167}      *io_apic_entry_ptr;
168
169typedef struct INTENTRY {
170	u_char  type;
171	u_char  int_type;
172	u_short int_flags;
173	u_char  src_bus_id;
174	u_char  src_bus_irq;
175	u_char  dst_apic_id;
176	u_char  dst_apic_int;
177}      *int_entry_ptr;
178
179/* descriptions of MP basetable entries */
180typedef struct BASETABLE_ENTRY {
181	u_char  type;
182	u_char  length;
183	char    name[16];
184}       basetable_entry;
185
186/*
187 * this code MUST be enabled here and in mpboot.s.
188 * it follows the very early stages of AP boot by placing values in CMOS ram.
189 * it NORMALLY will never be needed and thus the primitive method for enabling.
190 *
191#define CHECK_POINTS
192 */
193
194#if defined(CHECK_POINTS) && !defined(PC98)
195#define CHECK_READ(A)	 (outb(CMOS_REG, (A)), inb(CMOS_DATA))
196#define CHECK_WRITE(A,D) (outb(CMOS_REG, (A)), outb(CMOS_DATA, (D)))
197
198#define CHECK_INIT(D);				\
199	CHECK_WRITE(0x34, (D));			\
200	CHECK_WRITE(0x35, (D));			\
201	CHECK_WRITE(0x36, (D));			\
202	CHECK_WRITE(0x37, (D));			\
203	CHECK_WRITE(0x38, (D));			\
204	CHECK_WRITE(0x39, (D));
205
206#define CHECK_PRINT(S);				\
207	printf("%s: %d, %d, %d, %d, %d, %d\n",	\
208	   (S),					\
209	   CHECK_READ(0x34),			\
210	   CHECK_READ(0x35),			\
211	   CHECK_READ(0x36),			\
212	   CHECK_READ(0x37),			\
213	   CHECK_READ(0x38),			\
214	   CHECK_READ(0x39));
215
216#else				/* CHECK_POINTS */
217
218#define CHECK_INIT(D)
219#define CHECK_PRINT(S)
220
221#endif				/* CHECK_POINTS */
222
223/*
224 * Values to send to the POST hardware.
225 */
226#define MP_BOOTADDRESS_POST	0x10
227#define MP_PROBE_POST		0x11
228#define MPTABLE_PASS1_POST	0x12
229
230#define MP_START_POST		0x13
231#define MP_ENABLE_POST		0x14
232#define MPTABLE_PASS2_POST	0x15
233
234#define START_ALL_APS_POST	0x16
235#define INSTALL_AP_TRAMP_POST	0x17
236#define START_AP_POST		0x18
237
238#define MP_ANNOUNCE_POST	0x19
239
240/* used to hold the AP's until we are ready to release them */
241static struct mtx ap_boot_mtx;
242
243/** XXX FIXME: where does this really belong, isa.h/isa.c perhaps? */
244int	current_postcode;
245
246/** XXX FIXME: what system files declare these??? */
247extern struct region_descriptor r_gdt, r_idt;
248
249int	bsp_apic_ready = 0;	/* flags useability of BSP apic */
250int	mp_naps;		/* # of Applications processors */
251int	mp_nbusses;		/* # of busses */
252int	mp_napics;		/* # of IO APICs */
253int	boot_cpu_id;		/* designated BSP */
254vm_offset_t cpu_apic_address;
255vm_offset_t io_apic_address[NAPICID];	/* NAPICID is more than enough */
256extern	int nkpt;
257
258u_int32_t cpu_apic_versions[MAXCPU];
259u_int32_t *io_apic_versions;
260
261#ifdef APIC_INTR_REORDER
262struct {
263	volatile int *location;
264	int bit;
265} apic_isrbit_location[32];
266#endif
267
268struct apic_intmapinfo	int_to_apicintpin[APIC_INTMAPSIZE];
269
270/*
271 * APIC ID logical/physical mapping structures.
272 * We oversize these to simplify boot-time config.
273 */
274int     cpu_num_to_apic_id[NAPICID];
275int     io_num_to_apic_id[NAPICID];
276int     apic_id_to_logical[NAPICID];
277
278
279/* AP uses this during bootstrap.  Do not staticize.  */
280char *bootSTK;
281static int bootAP;
282
283/* Hotwire a 0->4MB V==P mapping */
284extern pt_entry_t *KPTphys;
285
286/* SMP page table page */
287extern pt_entry_t *SMPpt;
288
289struct pcb stoppcbs[MAXCPU];
290
291#ifdef APIC_IO
292/* Variables needed for SMP tlb shootdown. */
293vm_offset_t smp_tlb_addr1;
294vm_offset_t smp_tlb_addr2;
295volatile int smp_tlb_wait;
296static struct mtx smp_tlb_mtx;
297#endif
298
299/*
300 * Local data and functions.
301 */
302
303/* Set to 1 once we're ready to let the APs out of the pen. */
304static volatile int aps_ready = 0;
305
306static int	mp_capable;
307static u_int	boot_address;
308static u_int	base_memory;
309
310static int	picmode;		/* 0: virtual wire mode, 1: PIC mode */
311static mpfps_t	mpfps;
312static int	search_for_sig(u_int32_t target, int count);
313static void	mp_enable(u_int boot_addr);
314
315static void	mptable_pass1(void);
316static int	mptable_pass2(void);
317static void	default_mp_table(int type);
318static void	fix_mp_table(void);
319static void	setup_apic_irq_mapping(void);
320static void	init_locks(void);
321static int	start_all_aps(u_int boot_addr);
322static void	install_ap_tramp(u_int boot_addr);
323static int	start_ap(int logicalCpu, u_int boot_addr);
324void		ap_init(void);
325static int	apic_int_is_bus_type(int intr, int bus_type);
326static void	release_aps(void *dummy);
327
328/*
329 * initialize all the SMP locks
330 */
331
332/* lock region used by kernel profiling */
333int	mcount_lock;
334
335#ifdef USE_COMLOCK
336/* locks com (tty) data/hardware accesses: a FASTINTR() */
337struct mtx		com_mtx;
338#endif /* USE_COMLOCK */
339
340static void
341init_locks(void)
342{
343
344#ifdef USE_COMLOCK
345	mtx_init(&com_mtx, "com", NULL, MTX_SPIN);
346#endif /* USE_COMLOCK */
347#ifdef APIC_IO
348	mtx_init(&smp_tlb_mtx, "tlb", NULL, MTX_SPIN);
349#endif
350}
351
352/*
353 * Calculate usable address in base memory for AP trampoline code.
354 */
355u_int
356mp_bootaddress(u_int basemem)
357{
358	POSTCODE(MP_BOOTADDRESS_POST);
359
360	base_memory = basemem * 1024;	/* convert to bytes */
361
362	boot_address = base_memory & ~0xfff;	/* round down to 4k boundary */
363	if ((base_memory - boot_address) < bootMP_size)
364		boot_address -= 4096;	/* not enough, lower by 4k */
365
366	return boot_address;
367}
368
369
370/*
371 * Look for an Intel MP spec table (ie, SMP capable hardware).
372 */
373void
374i386_mp_probe(void)
375{
376	int     x;
377	u_long  segment;
378	u_int32_t target;
379
380	POSTCODE(MP_PROBE_POST);
381
382	/* see if EBDA exists */
383	if ((segment = (u_long) * (u_short *) (KERNBASE + 0x40e)) != 0) {
384		/* search first 1K of EBDA */
385		target = (u_int32_t) (segment << 4);
386		if ((x = search_for_sig(target, 1024 / 4)) >= 0)
387			goto found;
388	} else {
389		/* last 1K of base memory, effective 'top of base' passed in */
390		target = (u_int32_t) (base_memory - 0x400);
391		if ((x = search_for_sig(target, 1024 / 4)) >= 0)
392			goto found;
393	}
394
395	/* search the BIOS */
396	target = (u_int32_t) BIOS_BASE;
397	if ((x = search_for_sig(target, BIOS_COUNT)) >= 0)
398		goto found;
399
400	/* nothing found */
401	mpfps = (mpfps_t)0;
402	mp_capable = 0;
403	return;
404
405found:
406	/* calculate needed resources */
407	mpfps = (mpfps_t)x;
408	mptable_pass1();
409
410	/* flag fact that we are running multiple processors */
411	mp_capable = 1;
412}
413
414int
415cpu_mp_probe(void)
416{
417	/*
418	 * Record BSP in CPU map
419	 * This is done here so that MBUF init code works correctly.
420	 */
421	all_cpus = 1;
422
423	return (mp_capable);
424}
425
426/*
427 * Initialize the SMP hardware and the APIC and start up the AP's.
428 */
429void
430cpu_mp_start(void)
431{
432	POSTCODE(MP_START_POST);
433
434	/* look for MP capable motherboard */
435	if (mp_capable)
436		mp_enable(boot_address);
437	else
438		panic("MP hardware not found!");
439
440	cpu_setregs();
441}
442
443
444/*
445 * Print various information about the SMP system hardware and setup.
446 */
447void
448cpu_mp_announce(void)
449{
450	int     x;
451
452	POSTCODE(MP_ANNOUNCE_POST);
453
454	printf(" cpu0 (BSP): apic id: %2d", CPU_TO_ID(0));
455	printf(", version: 0x%08x", cpu_apic_versions[0]);
456	printf(", at 0x%08x\n", cpu_apic_address);
457	for (x = 1; x <= mp_naps; ++x) {
458		printf(" cpu%d (AP):  apic id: %2d", x, CPU_TO_ID(x));
459		printf(", version: 0x%08x", cpu_apic_versions[x]);
460		printf(", at 0x%08x\n", cpu_apic_address);
461	}
462
463#if defined(APIC_IO)
464	for (x = 0; x < mp_napics; ++x) {
465		printf(" io%d (APIC): apic id: %2d", x, IO_TO_ID(x));
466		printf(", version: 0x%08x", io_apic_versions[x]);
467		printf(", at 0x%08x\n", io_apic_address[x]);
468	}
469#else
470	printf(" Warning: APIC I/O disabled\n");
471#endif	/* APIC_IO */
472}
473
474/*
475 * AP cpu's call this to sync up protected mode.
476 */
477void
478init_secondary(void)
479{
480	int	gsel_tss;
481	int	x, myid = bootAP;
482	u_int	cr0;
483
484	gdt_segs[GPRIV_SEL].ssd_base = (int) &SMP_prvspace[myid];
485	gdt_segs[GPROC0_SEL].ssd_base =
486		(int) &SMP_prvspace[myid].pcpu.pc_common_tss;
487	SMP_prvspace[myid].pcpu.pc_prvspace =
488		&SMP_prvspace[myid].pcpu;
489
490	for (x = 0; x < NGDT; x++) {
491		ssdtosd(&gdt_segs[x], &gdt[myid * NGDT + x].sd);
492	}
493
494	r_gdt.rd_limit = NGDT * sizeof(gdt[0]) - 1;
495	r_gdt.rd_base = (int) &gdt[myid * NGDT];
496	lgdt(&r_gdt);			/* does magic intra-segment return */
497
498	lidt(&r_idt);
499
500	lldt(_default_ldt);
501	PCPU_SET(currentldt, _default_ldt);
502
503	gsel_tss = GSEL(GPROC0_SEL, SEL_KPL);
504	gdt[myid * NGDT + GPROC0_SEL].sd.sd_type = SDT_SYS386TSS;
505	PCPU_SET(common_tss.tss_esp0, 0); /* not used until after switch */
506	PCPU_SET(common_tss.tss_ss0, GSEL(GDATA_SEL, SEL_KPL));
507	PCPU_SET(common_tss.tss_ioopt, (sizeof (struct i386tss)) << 16);
508	PCPU_SET(tss_gdt, &gdt[myid * NGDT + GPROC0_SEL].sd);
509	PCPU_SET(common_tssd, *PCPU_GET(tss_gdt));
510	ltr(gsel_tss);
511
512	/*
513	 * Set to a known state:
514	 * Set by mpboot.s: CR0_PG, CR0_PE
515	 * Set by cpu_setregs: CR0_NE, CR0_MP, CR0_TS, CR0_WP, CR0_AM
516	 */
517	cr0 = rcr0();
518	cr0 &= ~(CR0_CD | CR0_NW | CR0_EM);
519	load_cr0(cr0);
520
521	pmap_set_opt();
522}
523
524
525#if defined(APIC_IO)
526/*
527 * Final configuration of the BSP's local APIC:
528 *  - disable 'pic mode'.
529 *  - disable 'virtual wire mode'.
530 *  - enable NMI.
531 */
532void
533bsp_apic_configure(void)
534{
535	u_char		byte;
536	u_int32_t	temp;
537
538	/* leave 'pic mode' if necessary */
539	if (picmode) {
540		outb(0x22, 0x70);	/* select IMCR */
541		byte = inb(0x23);	/* current contents */
542		byte |= 0x01;		/* mask external INTR */
543		outb(0x23, byte);	/* disconnect 8259s/NMI */
544	}
545
546	/* mask lint0 (the 8259 'virtual wire' connection) */
547	temp = lapic.lvt_lint0;
548	temp |= APIC_LVT_M;		/* set the mask */
549	lapic.lvt_lint0 = temp;
550
551        /* setup lint1 to handle NMI */
552        temp = lapic.lvt_lint1;
553        temp &= ~APIC_LVT_M;		/* clear the mask */
554        lapic.lvt_lint1 = temp;
555
556	if (bootverbose)
557		apic_dump("bsp_apic_configure()");
558}
559#endif  /* APIC_IO */
560
561
562/*******************************************************************
563 * local functions and data
564 */
565
566/*
567 * start the SMP system
568 */
569static void
570mp_enable(u_int boot_addr)
571{
572	int     x;
573#if defined(APIC_IO)
574	int     apic;
575	u_int   ux;
576#endif	/* APIC_IO */
577
578	POSTCODE(MP_ENABLE_POST);
579
580	/* turn on 4MB of V == P addressing so we can get to MP table */
581	*(int *)PTD = PG_V | PG_RW | ((uintptr_t)(void *)KPTphys & PG_FRAME);
582	invltlb();
583
584	/* examine the MP table for needed info, uses physical addresses */
585	x = mptable_pass2();
586
587	*(int *)PTD = 0;
588	invltlb();
589
590	/* can't process default configs till the CPU APIC is pmapped */
591	if (x)
592		default_mp_table(x);
593
594	/* post scan cleanup */
595	fix_mp_table();
596	setup_apic_irq_mapping();
597
598#if defined(APIC_IO)
599
600	/* fill the LOGICAL io_apic_versions table */
601	for (apic = 0; apic < mp_napics; ++apic) {
602		ux = io_apic_read(apic, IOAPIC_VER);
603		io_apic_versions[apic] = ux;
604		io_apic_set_id(apic, IO_TO_ID(apic));
605	}
606
607	/* program each IO APIC in the system */
608	for (apic = 0; apic < mp_napics; ++apic)
609		if (io_apic_setup(apic) < 0)
610			panic("IO APIC setup failure");
611
612	/* install a 'Spurious INTerrupt' vector */
613	setidt(XSPURIOUSINT_OFFSET, Xspuriousint,
614	       SDT_SYS386IGT, SEL_KPL, GSEL(GCODE_SEL, SEL_KPL));
615
616	/* install an inter-CPU IPI for TLB invalidation */
617	setidt(XINVLTLB_OFFSET, Xinvltlb,
618	       SDT_SYS386IGT, SEL_KPL, GSEL(GCODE_SEL, SEL_KPL));
619	setidt(XINVLPG_OFFSET, Xinvlpg,
620	       SDT_SYS386IGT, SEL_KPL, GSEL(GCODE_SEL, SEL_KPL));
621	setidt(XINVLRNG_OFFSET, Xinvlrng,
622	       SDT_SYS386IGT, SEL_KPL, GSEL(GCODE_SEL, SEL_KPL));
623
624	/* install an inter-CPU IPI for forwarding hardclock() */
625	setidt(XHARDCLOCK_OFFSET, Xhardclock,
626	       SDT_SYS386IGT, SEL_KPL, GSEL(GCODE_SEL, SEL_KPL));
627
628	/* install an inter-CPU IPI for forwarding statclock() */
629	setidt(XSTATCLOCK_OFFSET, Xstatclock,
630	       SDT_SYS386IGT, SEL_KPL, GSEL(GCODE_SEL, SEL_KPL));
631
632	/* install an inter-CPU IPI for all-CPU rendezvous */
633	setidt(XRENDEZVOUS_OFFSET, Xrendezvous,
634	       SDT_SYS386IGT, SEL_KPL, GSEL(GCODE_SEL, SEL_KPL));
635
636	/* install an inter-CPU IPI for forcing an additional software trap */
637	setidt(XCPUAST_OFFSET, Xcpuast,
638	       SDT_SYS386IGT, SEL_KPL, GSEL(GCODE_SEL, SEL_KPL));
639
640	/* install an inter-CPU IPI for CPU stop/restart */
641	setidt(XCPUSTOP_OFFSET, Xcpustop,
642	       SDT_SYS386IGT, SEL_KPL, GSEL(GCODE_SEL, SEL_KPL));
643
644#if defined(TEST_TEST1)
645	/* install a "fake hardware INTerrupt" vector */
646	setidt(XTEST1_OFFSET, Xtest1,
647	       SDT_SYS386IGT, SEL_KPL, GSEL(GCODE_SEL, SEL_KPL));
648#endif  /** TEST_TEST1 */
649
650#endif	/* APIC_IO */
651
652	/* initialize all SMP locks */
653	init_locks();
654
655	/* start each Application Processor */
656	start_all_aps(boot_addr);
657}
658
659
660/*
661 * look for the MP spec signature
662 */
663
664/* string defined by the Intel MP Spec as identifying the MP table */
665#define MP_SIG		0x5f504d5f	/* _MP_ */
666#define NEXT(X)		((X) += 4)
667static int
668search_for_sig(u_int32_t target, int count)
669{
670	int     x;
671	u_int32_t *addr = (u_int32_t *) (KERNBASE + target);
672
673	for (x = 0; x < count; NEXT(x))
674		if (addr[x] == MP_SIG)
675			/* make array index a byte index */
676			return (target + (x * sizeof(u_int32_t)));
677
678	return -1;
679}
680
681
682static basetable_entry basetable_entry_types[] =
683{
684	{0, 20, "Processor"},
685	{1, 8, "Bus"},
686	{2, 8, "I/O APIC"},
687	{3, 8, "I/O INT"},
688	{4, 8, "Local INT"}
689};
690
691typedef struct BUSDATA {
692	u_char  bus_id;
693	enum busTypes bus_type;
694}       bus_datum;
695
696typedef struct INTDATA {
697	u_char  int_type;
698	u_short int_flags;
699	u_char  src_bus_id;
700	u_char  src_bus_irq;
701	u_char  dst_apic_id;
702	u_char  dst_apic_int;
703	u_char	int_vector;
704}       io_int, local_int;
705
706typedef struct BUSTYPENAME {
707	u_char  type;
708	char    name[7];
709}       bus_type_name;
710
711static bus_type_name bus_type_table[] =
712{
713	{CBUS, "CBUS"},
714	{CBUSII, "CBUSII"},
715	{EISA, "EISA"},
716	{MCA, "MCA"},
717	{UNKNOWN_BUSTYPE, "---"},
718	{ISA, "ISA"},
719	{MCA, "MCA"},
720	{UNKNOWN_BUSTYPE, "---"},
721	{UNKNOWN_BUSTYPE, "---"},
722	{UNKNOWN_BUSTYPE, "---"},
723	{UNKNOWN_BUSTYPE, "---"},
724	{UNKNOWN_BUSTYPE, "---"},
725	{PCI, "PCI"},
726	{UNKNOWN_BUSTYPE, "---"},
727	{UNKNOWN_BUSTYPE, "---"},
728	{UNKNOWN_BUSTYPE, "---"},
729	{UNKNOWN_BUSTYPE, "---"},
730	{XPRESS, "XPRESS"},
731	{UNKNOWN_BUSTYPE, "---"}
732};
733/* from MP spec v1.4, table 5-1 */
734static int default_data[7][5] =
735{
736/*   nbus, id0, type0, id1, type1 */
737	{1, 0, ISA, 255, 255},
738	{1, 0, EISA, 255, 255},
739	{1, 0, EISA, 255, 255},
740	{1, 0, MCA, 255, 255},
741	{2, 0, ISA, 1, PCI},
742	{2, 0, EISA, 1, PCI},
743	{2, 0, MCA, 1, PCI}
744};
745
746
747/* the bus data */
748static bus_datum *bus_data;
749
750/* the IO INT data, one entry per possible APIC INTerrupt */
751static io_int  *io_apic_ints;
752
753static int nintrs;
754
755static int processor_entry(proc_entry_ptr entry, int cpu);
756static int bus_entry(bus_entry_ptr entry, int bus);
757static int io_apic_entry(io_apic_entry_ptr entry, int apic);
758static int int_entry(int_entry_ptr entry, int intr);
759static int lookup_bus_type(char *name);
760
761
762/*
763 * 1st pass on motherboard's Intel MP specification table.
764 *
765 * initializes:
766 *	mp_ncpus = 1
767 *
768 * determines:
769 *	cpu_apic_address (common to all CPUs)
770 *	io_apic_address[N]
771 *	mp_naps
772 *	mp_nbusses
773 *	mp_napics
774 *	nintrs
775 */
776static void
777mptable_pass1(void)
778{
779	int	x;
780	mpcth_t	cth;
781	int	totalSize;
782	void*	position;
783	int	count;
784	int	type;
785
786	POSTCODE(MPTABLE_PASS1_POST);
787
788	/* clear various tables */
789	for (x = 0; x < NAPICID; ++x) {
790		io_apic_address[x] = ~0;	/* IO APIC address table */
791	}
792
793	/* init everything to empty */
794	mp_naps = 0;
795	mp_nbusses = 0;
796	mp_napics = 0;
797	nintrs = 0;
798
799	/* check for use of 'default' configuration */
800	if (MPFPS_MPFB1 != 0) {
801		/* use default addresses */
802		cpu_apic_address = DEFAULT_APIC_BASE;
803		io_apic_address[0] = DEFAULT_IO_APIC_BASE;
804
805		/* fill in with defaults */
806		mp_naps = 2;		/* includes BSP */
807		mp_maxid = 1;
808		mp_nbusses = default_data[MPFPS_MPFB1 - 1][0];
809#if defined(APIC_IO)
810		mp_napics = 1;
811		nintrs = 16;
812#endif	/* APIC_IO */
813	}
814	else {
815		if ((cth = mpfps->pap) == 0)
816			panic("MP Configuration Table Header MISSING!");
817
818		cpu_apic_address = (vm_offset_t) cth->apic_address;
819
820		/* walk the table, recording info of interest */
821		totalSize = cth->base_table_length - sizeof(struct MPCTH);
822		position = (u_char *) cth + sizeof(struct MPCTH);
823		count = cth->entry_count;
824
825		while (count--) {
826			switch (type = *(u_char *) position) {
827			case 0: /* processor_entry */
828				if (((proc_entry_ptr)position)->cpu_flags
829				    & PROCENTRY_FLAG_EN) {
830					++mp_naps;
831					mp_maxid++;
832				}
833				break;
834			case 1: /* bus_entry */
835				++mp_nbusses;
836				break;
837			case 2: /* io_apic_entry */
838				if (((io_apic_entry_ptr)position)->apic_flags
839					& IOAPICENTRY_FLAG_EN)
840					io_apic_address[mp_napics++] =
841					    (vm_offset_t)((io_apic_entry_ptr)
842						position)->apic_address;
843				break;
844			case 3: /* int_entry */
845				++nintrs;
846				break;
847			case 4:	/* int_entry */
848				break;
849			default:
850				panic("mpfps Base Table HOSED!");
851				/* NOTREACHED */
852			}
853
854			totalSize -= basetable_entry_types[type].length;
855			(u_char*)position += basetable_entry_types[type].length;
856		}
857	}
858
859	/* qualify the numbers */
860	if (mp_naps > MAXCPU) {
861		printf("Warning: only using %d of %d available CPUs!\n",
862			MAXCPU, mp_naps);
863		mp_naps = MAXCPU;
864	}
865
866	/*
867	 * Count the BSP.
868	 * This is also used as a counter while starting the APs.
869	 */
870	mp_ncpus = 1;
871
872	--mp_naps;	/* subtract the BSP */
873}
874
875
876/*
877 * 2nd pass on motherboard's Intel MP specification table.
878 *
879 * sets:
880 *	boot_cpu_id
881 *	ID_TO_IO(N), phy APIC ID to log CPU/IO table
882 *	CPU_TO_ID(N), logical CPU to APIC ID table
883 *	IO_TO_ID(N), logical IO to APIC ID table
884 *	bus_data[N]
885 *	io_apic_ints[N]
886 */
887static int
888mptable_pass2(void)
889{
890	int     x;
891	mpcth_t cth;
892	int     totalSize;
893	void*   position;
894	int     count;
895	int     type;
896	int     apic, bus, cpu, intr;
897	int	i, j;
898	int	pgeflag;
899
900	POSTCODE(MPTABLE_PASS2_POST);
901
902	pgeflag = 0;		/* XXX - Not used under SMP yet.  */
903
904	MALLOC(io_apic_versions, u_int32_t *, sizeof(u_int32_t) * mp_napics,
905	    M_DEVBUF, M_WAITOK);
906	MALLOC(ioapic, volatile ioapic_t **, sizeof(ioapic_t *) * mp_napics,
907	    M_DEVBUF, M_WAITOK);
908	MALLOC(io_apic_ints, io_int *, sizeof(io_int) * (nintrs + 1),
909	    M_DEVBUF, M_WAITOK);
910	MALLOC(bus_data, bus_datum *, sizeof(bus_datum) * mp_nbusses,
911	    M_DEVBUF, M_WAITOK);
912
913	bzero(ioapic, sizeof(ioapic_t *) * mp_napics);
914
915	for (i = 0; i < mp_napics; i++) {
916		for (j = 0; j < mp_napics; j++) {
917			/* same page frame as a previous IO apic? */
918			if (((vm_offset_t)SMPpt[NPTEPG-2-j] & PG_FRAME) ==
919			    (io_apic_address[i] & PG_FRAME)) {
920				ioapic[i] = (ioapic_t *)((u_int)SMP_prvspace
921					+ (NPTEPG-2-j) * PAGE_SIZE
922					+ (io_apic_address[i] & PAGE_MASK));
923				break;
924			}
925			/* use this slot if available */
926			if (((vm_offset_t)SMPpt[NPTEPG-2-j] & PG_FRAME) == 0) {
927				SMPpt[NPTEPG-2-j] = (pt_entry_t)(PG_V | PG_RW |
928				    pgeflag | (io_apic_address[i] & PG_FRAME));
929				ioapic[i] = (ioapic_t *)((u_int)SMP_prvspace
930					+ (NPTEPG-2-j) * PAGE_SIZE
931					+ (io_apic_address[i] & PAGE_MASK));
932				break;
933			}
934		}
935	}
936
937	/* clear various tables */
938	for (x = 0; x < NAPICID; ++x) {
939		ID_TO_IO(x) = -1;	/* phy APIC ID to log CPU/IO table */
940		CPU_TO_ID(x) = -1;	/* logical CPU to APIC ID table */
941		IO_TO_ID(x) = -1;	/* logical IO to APIC ID table */
942	}
943
944	/* clear bus data table */
945	for (x = 0; x < mp_nbusses; ++x)
946		bus_data[x].bus_id = 0xff;
947
948	/* clear IO APIC INT table */
949	for (x = 0; x < (nintrs + 1); ++x) {
950		io_apic_ints[x].int_type = 0xff;
951		io_apic_ints[x].int_vector = 0xff;
952	}
953
954	/* setup the cpu/apic mapping arrays */
955	boot_cpu_id = -1;
956
957	/* record whether PIC or virtual-wire mode */
958	picmode = (mpfps->mpfb2 & 0x80) ? 1 : 0;
959
960	/* check for use of 'default' configuration */
961	if (MPFPS_MPFB1 != 0)
962		return MPFPS_MPFB1;	/* return default configuration type */
963
964	if ((cth = mpfps->pap) == 0)
965		panic("MP Configuration Table Header MISSING!");
966
967	/* walk the table, recording info of interest */
968	totalSize = cth->base_table_length - sizeof(struct MPCTH);
969	position = (u_char *) cth + sizeof(struct MPCTH);
970	count = cth->entry_count;
971	apic = bus = intr = 0;
972	cpu = 1;				/* pre-count the BSP */
973
974	while (count--) {
975		switch (type = *(u_char *) position) {
976		case 0:
977			if (processor_entry(position, cpu))
978				++cpu;
979			break;
980		case 1:
981			if (bus_entry(position, bus))
982				++bus;
983			break;
984		case 2:
985			if (io_apic_entry(position, apic))
986				++apic;
987			break;
988		case 3:
989			if (int_entry(position, intr))
990				++intr;
991			break;
992		case 4:
993			/* int_entry(position); */
994			break;
995		default:
996			panic("mpfps Base Table HOSED!");
997			/* NOTREACHED */
998		}
999
1000		totalSize -= basetable_entry_types[type].length;
1001		(u_char *) position += basetable_entry_types[type].length;
1002	}
1003
1004	if (boot_cpu_id == -1)
1005		panic("NO BSP found!");
1006
1007	/* report fact that its NOT a default configuration */
1008	return 0;
1009}
1010
1011
1012void
1013assign_apic_irq(int apic, int intpin, int irq)
1014{
1015	int x;
1016
1017	if (int_to_apicintpin[irq].ioapic != -1)
1018		panic("assign_apic_irq: inconsistent table");
1019
1020	int_to_apicintpin[irq].ioapic = apic;
1021	int_to_apicintpin[irq].int_pin = intpin;
1022	int_to_apicintpin[irq].apic_address = ioapic[apic];
1023	int_to_apicintpin[irq].redirindex = IOAPIC_REDTBL + 2 * intpin;
1024
1025	for (x = 0; x < nintrs; x++) {
1026		if ((io_apic_ints[x].int_type == 0 ||
1027		     io_apic_ints[x].int_type == 3) &&
1028		    io_apic_ints[x].int_vector == 0xff &&
1029		    io_apic_ints[x].dst_apic_id == IO_TO_ID(apic) &&
1030		    io_apic_ints[x].dst_apic_int == intpin)
1031			io_apic_ints[x].int_vector = irq;
1032	}
1033}
1034
1035void
1036revoke_apic_irq(int irq)
1037{
1038	int x;
1039	int oldapic;
1040	int oldintpin;
1041
1042	if (int_to_apicintpin[irq].ioapic == -1)
1043		panic("revoke_apic_irq: inconsistent table");
1044
1045	oldapic = int_to_apicintpin[irq].ioapic;
1046	oldintpin = int_to_apicintpin[irq].int_pin;
1047
1048	int_to_apicintpin[irq].ioapic = -1;
1049	int_to_apicintpin[irq].int_pin = 0;
1050	int_to_apicintpin[irq].apic_address = NULL;
1051	int_to_apicintpin[irq].redirindex = 0;
1052
1053	for (x = 0; x < nintrs; x++) {
1054		if ((io_apic_ints[x].int_type == 0 ||
1055		     io_apic_ints[x].int_type == 3) &&
1056		    io_apic_ints[x].int_vector != 0xff &&
1057		    io_apic_ints[x].dst_apic_id == IO_TO_ID(oldapic) &&
1058		    io_apic_ints[x].dst_apic_int == oldintpin)
1059			io_apic_ints[x].int_vector = 0xff;
1060	}
1061}
1062
1063
1064static void
1065allocate_apic_irq(int intr)
1066{
1067	int apic;
1068	int intpin;
1069	int irq;
1070
1071	if (io_apic_ints[intr].int_vector != 0xff)
1072		return;		/* Interrupt handler already assigned */
1073
1074	if (io_apic_ints[intr].int_type != 0 &&
1075	    (io_apic_ints[intr].int_type != 3 ||
1076	     (io_apic_ints[intr].dst_apic_id == IO_TO_ID(0) &&
1077	      io_apic_ints[intr].dst_apic_int == 0)))
1078		return;		/* Not INT or ExtInt on != (0, 0) */
1079
1080	irq = 0;
1081	while (irq < APIC_INTMAPSIZE &&
1082	       int_to_apicintpin[irq].ioapic != -1)
1083		irq++;
1084
1085	if (irq >= APIC_INTMAPSIZE)
1086		return;		/* No free interrupt handlers */
1087
1088	apic = ID_TO_IO(io_apic_ints[intr].dst_apic_id);
1089	intpin = io_apic_ints[intr].dst_apic_int;
1090
1091	assign_apic_irq(apic, intpin, irq);
1092	io_apic_setup_intpin(apic, intpin);
1093}
1094
1095
1096static void
1097swap_apic_id(int apic, int oldid, int newid)
1098{
1099	int x;
1100	int oapic;
1101
1102
1103	if (oldid == newid)
1104		return;			/* Nothing to do */
1105
1106	printf("Changing APIC ID for IO APIC #%d from %d to %d in MP table\n",
1107	       apic, oldid, newid);
1108
1109	/* Swap physical APIC IDs in interrupt entries */
1110	for (x = 0; x < nintrs; x++) {
1111		if (io_apic_ints[x].dst_apic_id == oldid)
1112			io_apic_ints[x].dst_apic_id = newid;
1113		else if (io_apic_ints[x].dst_apic_id == newid)
1114			io_apic_ints[x].dst_apic_id = oldid;
1115	}
1116
1117	/* Swap physical APIC IDs in IO_TO_ID mappings */
1118	for (oapic = 0; oapic < mp_napics; oapic++)
1119		if (IO_TO_ID(oapic) == newid)
1120			break;
1121
1122	if (oapic < mp_napics) {
1123		printf("Changing APIC ID for IO APIC #%d from "
1124		       "%d to %d in MP table\n",
1125		       oapic, newid, oldid);
1126		IO_TO_ID(oapic) = oldid;
1127	}
1128	IO_TO_ID(apic) = newid;
1129}
1130
1131
1132static void
1133fix_id_to_io_mapping(void)
1134{
1135	int x;
1136
1137	for (x = 0; x < NAPICID; x++)
1138		ID_TO_IO(x) = -1;
1139
1140	for (x = 0; x <= mp_naps; x++)
1141		if (CPU_TO_ID(x) < NAPICID)
1142			ID_TO_IO(CPU_TO_ID(x)) = x;
1143
1144	for (x = 0; x < mp_napics; x++)
1145		if (IO_TO_ID(x) < NAPICID)
1146			ID_TO_IO(IO_TO_ID(x)) = x;
1147}
1148
1149
1150static int
1151first_free_apic_id(void)
1152{
1153	int freeid, x;
1154
1155	for (freeid = 0; freeid < NAPICID; freeid++) {
1156		for (x = 0; x <= mp_naps; x++)
1157			if (CPU_TO_ID(x) == freeid)
1158				break;
1159		if (x <= mp_naps)
1160			continue;
1161		for (x = 0; x < mp_napics; x++)
1162			if (IO_TO_ID(x) == freeid)
1163				break;
1164		if (x < mp_napics)
1165			continue;
1166		return freeid;
1167	}
1168	return freeid;
1169}
1170
1171
1172static int
1173io_apic_id_acceptable(int apic, int id)
1174{
1175	int cpu;		/* Logical CPU number */
1176	int oapic;		/* Logical IO APIC number for other IO APIC */
1177
1178	if (id >= NAPICID)
1179		return 0;	/* Out of range */
1180
1181	for (cpu = 0; cpu <= mp_naps; cpu++)
1182		if (CPU_TO_ID(cpu) == id)
1183			return 0;	/* Conflict with CPU */
1184
1185	for (oapic = 0; oapic < mp_napics && oapic < apic; oapic++)
1186		if (IO_TO_ID(oapic) == id)
1187			return 0;	/* Conflict with other APIC */
1188
1189	return 1;		/* ID is acceptable for IO APIC */
1190}
1191
1192
1193/*
1194 * parse an Intel MP specification table
1195 */
1196static void
1197fix_mp_table(void)
1198{
1199	int	x;
1200	int	id;
1201	int	bus_0 = 0;	/* Stop GCC warning */
1202	int	bus_pci = 0;	/* Stop GCC warning */
1203	int	num_pci_bus;
1204	int	apic;		/* IO APIC unit number */
1205	int     freeid;		/* Free physical APIC ID */
1206	int	physid;		/* Current physical IO APIC ID */
1207
1208	/*
1209	 * Fix mis-numbering of the PCI bus and its INT entries if the BIOS
1210	 * did it wrong.  The MP spec says that when more than 1 PCI bus
1211	 * exists the BIOS must begin with bus entries for the PCI bus and use
1212	 * actual PCI bus numbering.  This implies that when only 1 PCI bus
1213	 * exists the BIOS can choose to ignore this ordering, and indeed many
1214	 * MP motherboards do ignore it.  This causes a problem when the PCI
1215	 * sub-system makes requests of the MP sub-system based on PCI bus
1216	 * numbers.	So here we look for the situation and renumber the
1217	 * busses and associated INTs in an effort to "make it right".
1218	 */
1219
1220	/* find bus 0, PCI bus, count the number of PCI busses */
1221	for (num_pci_bus = 0, x = 0; x < mp_nbusses; ++x) {
1222		if (bus_data[x].bus_id == 0) {
1223			bus_0 = x;
1224		}
1225		if (bus_data[x].bus_type == PCI) {
1226			++num_pci_bus;
1227			bus_pci = x;
1228		}
1229	}
1230	/*
1231	 * bus_0 == slot of bus with ID of 0
1232	 * bus_pci == slot of last PCI bus encountered
1233	 */
1234
1235	/* check the 1 PCI bus case for sanity */
1236	/* if it is number 0 all is well */
1237	if (num_pci_bus == 1 &&
1238	    bus_data[bus_pci].bus_id != 0) {
1239
1240		/* mis-numbered, swap with whichever bus uses slot 0 */
1241
1242		/* swap the bus entry types */
1243		bus_data[bus_pci].bus_type = bus_data[bus_0].bus_type;
1244		bus_data[bus_0].bus_type = PCI;
1245
1246		/* swap each relavant INTerrupt entry */
1247		id = bus_data[bus_pci].bus_id;
1248		for (x = 0; x < nintrs; ++x) {
1249			if (io_apic_ints[x].src_bus_id == id) {
1250				io_apic_ints[x].src_bus_id = 0;
1251			}
1252			else if (io_apic_ints[x].src_bus_id == 0) {
1253				io_apic_ints[x].src_bus_id = id;
1254			}
1255		}
1256	}
1257
1258	/* Assign IO APIC IDs.
1259	 *
1260	 * First try the existing ID. If a conflict is detected, try
1261	 * the ID in the MP table.  If a conflict is still detected, find
1262	 * a free id.
1263	 *
1264	 * We cannot use the ID_TO_IO table before all conflicts has been
1265	 * resolved and the table has been corrected.
1266	 */
1267	for (apic = 0; apic < mp_napics; ++apic) { /* For all IO APICs */
1268
1269		/* First try to use the value set by the BIOS */
1270		physid = io_apic_get_id(apic);
1271		if (io_apic_id_acceptable(apic, physid)) {
1272			if (IO_TO_ID(apic) != physid)
1273				swap_apic_id(apic, IO_TO_ID(apic), physid);
1274			continue;
1275		}
1276
1277		/* Then check if the value in the MP table is acceptable */
1278		if (io_apic_id_acceptable(apic, IO_TO_ID(apic)))
1279			continue;
1280
1281		/* Last resort, find a free APIC ID and use it */
1282		freeid = first_free_apic_id();
1283		if (freeid >= NAPICID)
1284			panic("No free physical APIC IDs found");
1285
1286		if (io_apic_id_acceptable(apic, freeid)) {
1287			swap_apic_id(apic, IO_TO_ID(apic), freeid);
1288			continue;
1289		}
1290		panic("Free physical APIC ID not usable");
1291	}
1292	fix_id_to_io_mapping();
1293
1294	/* detect and fix broken Compaq MP table */
1295	if (apic_int_type(0, 0) == -1) {
1296		printf("APIC_IO: MP table broken: 8259->APIC entry missing!\n");
1297		io_apic_ints[nintrs].int_type = 3;	/* ExtInt */
1298		io_apic_ints[nintrs].int_vector = 0xff;	/* Unassigned */
1299		/* XXX fixme, set src bus id etc, but it doesn't seem to hurt */
1300		io_apic_ints[nintrs].dst_apic_id = IO_TO_ID(0);
1301		io_apic_ints[nintrs].dst_apic_int = 0;	/* Pin 0 */
1302		nintrs++;
1303	}
1304}
1305
1306
1307/* Assign low level interrupt handlers */
1308static void
1309setup_apic_irq_mapping(void)
1310{
1311	int	x;
1312	int	int_vector;
1313
1314	/* Clear array */
1315	for (x = 0; x < APIC_INTMAPSIZE; x++) {
1316		int_to_apicintpin[x].ioapic = -1;
1317		int_to_apicintpin[x].int_pin = 0;
1318		int_to_apicintpin[x].apic_address = NULL;
1319		int_to_apicintpin[x].redirindex = 0;
1320	}
1321
1322	/* First assign ISA/EISA interrupts */
1323	for (x = 0; x < nintrs; x++) {
1324		int_vector = io_apic_ints[x].src_bus_irq;
1325		if (int_vector < APIC_INTMAPSIZE &&
1326		    io_apic_ints[x].int_vector == 0xff &&
1327		    int_to_apicintpin[int_vector].ioapic == -1 &&
1328		    (apic_int_is_bus_type(x, ISA) ||
1329		     apic_int_is_bus_type(x, EISA)) &&
1330		    io_apic_ints[x].int_type == 0) {
1331			assign_apic_irq(ID_TO_IO(io_apic_ints[x].dst_apic_id),
1332					io_apic_ints[x].dst_apic_int,
1333					int_vector);
1334		}
1335	}
1336
1337	/* Assign ExtInt entry if no ISA/EISA interrupt 0 entry */
1338	for (x = 0; x < nintrs; x++) {
1339		if (io_apic_ints[x].dst_apic_int == 0 &&
1340		    io_apic_ints[x].dst_apic_id == IO_TO_ID(0) &&
1341		    io_apic_ints[x].int_vector == 0xff &&
1342		    int_to_apicintpin[0].ioapic == -1 &&
1343		    io_apic_ints[x].int_type == 3) {
1344			assign_apic_irq(0, 0, 0);
1345			break;
1346		}
1347	}
1348	/* PCI interrupt assignment is deferred */
1349}
1350
1351
1352static int
1353processor_entry(proc_entry_ptr entry, int cpu)
1354{
1355	/* check for usability */
1356	if (!(entry->cpu_flags & PROCENTRY_FLAG_EN))
1357		return 0;
1358
1359	if(entry->apic_id >= NAPICID)
1360		panic("CPU APIC ID out of range (0..%d)", NAPICID - 1);
1361	/* check for BSP flag */
1362	if (entry->cpu_flags & PROCENTRY_FLAG_BP) {
1363		boot_cpu_id = entry->apic_id;
1364		CPU_TO_ID(0) = entry->apic_id;
1365		ID_TO_CPU(entry->apic_id) = 0;
1366		return 0;	/* its already been counted */
1367	}
1368
1369	/* add another AP to list, if less than max number of CPUs */
1370	else if (cpu < MAXCPU) {
1371		CPU_TO_ID(cpu) = entry->apic_id;
1372		ID_TO_CPU(entry->apic_id) = cpu;
1373		return 1;
1374	}
1375
1376	return 0;
1377}
1378
1379
1380static int
1381bus_entry(bus_entry_ptr entry, int bus)
1382{
1383	int     x;
1384	char    c, name[8];
1385
1386	/* encode the name into an index */
1387	for (x = 0; x < 6; ++x) {
1388		if ((c = entry->bus_type[x]) == ' ')
1389			break;
1390		name[x] = c;
1391	}
1392	name[x] = '\0';
1393
1394	if ((x = lookup_bus_type(name)) == UNKNOWN_BUSTYPE)
1395		panic("unknown bus type: '%s'", name);
1396
1397	bus_data[bus].bus_id = entry->bus_id;
1398	bus_data[bus].bus_type = x;
1399
1400	return 1;
1401}
1402
1403
1404static int
1405io_apic_entry(io_apic_entry_ptr entry, int apic)
1406{
1407	if (!(entry->apic_flags & IOAPICENTRY_FLAG_EN))
1408		return 0;
1409
1410	IO_TO_ID(apic) = entry->apic_id;
1411	if (entry->apic_id < NAPICID)
1412		ID_TO_IO(entry->apic_id) = apic;
1413
1414	return 1;
1415}
1416
1417
1418static int
1419lookup_bus_type(char *name)
1420{
1421	int     x;
1422
1423	for (x = 0; x < MAX_BUSTYPE; ++x)
1424		if (strcmp(bus_type_table[x].name, name) == 0)
1425			return bus_type_table[x].type;
1426
1427	return UNKNOWN_BUSTYPE;
1428}
1429
1430
1431static int
1432int_entry(int_entry_ptr entry, int intr)
1433{
1434	int apic;
1435
1436	io_apic_ints[intr].int_type = entry->int_type;
1437	io_apic_ints[intr].int_flags = entry->int_flags;
1438	io_apic_ints[intr].src_bus_id = entry->src_bus_id;
1439	io_apic_ints[intr].src_bus_irq = entry->src_bus_irq;
1440	if (entry->dst_apic_id == 255) {
1441		/* This signal goes to all IO APICS.  Select an IO APIC
1442		   with sufficient number of interrupt pins */
1443		for (apic = 0; apic < mp_napics; apic++)
1444			if (((io_apic_read(apic, IOAPIC_VER) &
1445			      IOART_VER_MAXREDIR) >> MAXREDIRSHIFT) >=
1446			    entry->dst_apic_int)
1447				break;
1448		if (apic < mp_napics)
1449			io_apic_ints[intr].dst_apic_id = IO_TO_ID(apic);
1450		else
1451			io_apic_ints[intr].dst_apic_id = entry->dst_apic_id;
1452	} else
1453		io_apic_ints[intr].dst_apic_id = entry->dst_apic_id;
1454	io_apic_ints[intr].dst_apic_int = entry->dst_apic_int;
1455
1456	return 1;
1457}
1458
1459
1460static int
1461apic_int_is_bus_type(int intr, int bus_type)
1462{
1463	int     bus;
1464
1465	for (bus = 0; bus < mp_nbusses; ++bus)
1466		if ((bus_data[bus].bus_id == io_apic_ints[intr].src_bus_id)
1467		    && ((int) bus_data[bus].bus_type == bus_type))
1468			return 1;
1469
1470	return 0;
1471}
1472
1473
1474/*
1475 * Given a traditional ISA INT mask, return an APIC mask.
1476 */
1477u_int
1478isa_apic_mask(u_int isa_mask)
1479{
1480	int isa_irq;
1481	int apic_pin;
1482
1483#if defined(SKIP_IRQ15_REDIRECT)
1484	if (isa_mask == (1 << 15)) {
1485		printf("skipping ISA IRQ15 redirect\n");
1486		return isa_mask;
1487	}
1488#endif  /* SKIP_IRQ15_REDIRECT */
1489
1490	isa_irq = ffs(isa_mask);		/* find its bit position */
1491	if (isa_irq == 0)			/* doesn't exist */
1492		return 0;
1493	--isa_irq;				/* make it zero based */
1494
1495	apic_pin = isa_apic_irq(isa_irq);	/* look for APIC connection */
1496	if (apic_pin == -1)
1497		return 0;
1498
1499	return (1 << apic_pin);			/* convert pin# to a mask */
1500}
1501
1502
1503/*
1504 * Determine which APIC pin an ISA/EISA INT is attached to.
1505 */
1506#define INTTYPE(I)	(io_apic_ints[(I)].int_type)
1507#define INTPIN(I)	(io_apic_ints[(I)].dst_apic_int)
1508#define INTIRQ(I)	(io_apic_ints[(I)].int_vector)
1509#define INTAPIC(I)	(ID_TO_IO(io_apic_ints[(I)].dst_apic_id))
1510
1511#define SRCBUSIRQ(I)	(io_apic_ints[(I)].src_bus_irq)
1512int
1513isa_apic_irq(int isa_irq)
1514{
1515	int     intr;
1516
1517	for (intr = 0; intr < nintrs; ++intr) {		/* check each record */
1518		if (INTTYPE(intr) == 0) {		/* standard INT */
1519			if (SRCBUSIRQ(intr) == isa_irq) {
1520				if (apic_int_is_bus_type(intr, ISA) ||
1521			            apic_int_is_bus_type(intr, EISA)) {
1522					if (INTIRQ(intr) == 0xff)
1523						return -1; /* unassigned */
1524					return INTIRQ(intr);	/* found */
1525				}
1526			}
1527		}
1528	}
1529	return -1;					/* NOT found */
1530}
1531
1532
1533/*
1534 * Determine which APIC pin a PCI INT is attached to.
1535 */
1536#define SRCBUSID(I)	(io_apic_ints[(I)].src_bus_id)
1537#define SRCBUSDEVICE(I)	((io_apic_ints[(I)].src_bus_irq >> 2) & 0x1f)
1538#define SRCBUSLINE(I)	(io_apic_ints[(I)].src_bus_irq & 0x03)
1539int
1540pci_apic_irq(int pciBus, int pciDevice, int pciInt)
1541{
1542	int     intr;
1543
1544	--pciInt;					/* zero based */
1545
1546	for (intr = 0; intr < nintrs; ++intr)		/* check each record */
1547		if ((INTTYPE(intr) == 0)		/* standard INT */
1548		    && (SRCBUSID(intr) == pciBus)
1549		    && (SRCBUSDEVICE(intr) == pciDevice)
1550		    && (SRCBUSLINE(intr) == pciInt))	/* a candidate IRQ */
1551			if (apic_int_is_bus_type(intr, PCI)) {
1552				if (INTIRQ(intr) == 0xff)
1553					allocate_apic_irq(intr);
1554				if (INTIRQ(intr) == 0xff)
1555					return -1;	/* unassigned */
1556				return INTIRQ(intr);	/* exact match */
1557			}
1558
1559	return -1;					/* NOT found */
1560}
1561
1562int
1563next_apic_irq(int irq)
1564{
1565	int intr, ointr;
1566	int bus, bustype;
1567
1568	bus = 0;
1569	bustype = 0;
1570	for (intr = 0; intr < nintrs; intr++) {
1571		if (INTIRQ(intr) != irq || INTTYPE(intr) != 0)
1572			continue;
1573		bus = SRCBUSID(intr);
1574		bustype = apic_bus_type(bus);
1575		if (bustype != ISA &&
1576		    bustype != EISA &&
1577		    bustype != PCI)
1578			continue;
1579		break;
1580	}
1581	if (intr >= nintrs) {
1582		return -1;
1583	}
1584	for (ointr = intr + 1; ointr < nintrs; ointr++) {
1585		if (INTTYPE(ointr) != 0)
1586			continue;
1587		if (bus != SRCBUSID(ointr))
1588			continue;
1589		if (bustype == PCI) {
1590			if (SRCBUSDEVICE(intr) != SRCBUSDEVICE(ointr))
1591				continue;
1592			if (SRCBUSLINE(intr) != SRCBUSLINE(ointr))
1593				continue;
1594		}
1595		if (bustype == ISA || bustype == EISA) {
1596			if (SRCBUSIRQ(intr) != SRCBUSIRQ(ointr))
1597				continue;
1598		}
1599		if (INTPIN(intr) == INTPIN(ointr))
1600			continue;
1601		break;
1602	}
1603	if (ointr >= nintrs) {
1604		return -1;
1605	}
1606	return INTIRQ(ointr);
1607}
1608#undef SRCBUSLINE
1609#undef SRCBUSDEVICE
1610#undef SRCBUSID
1611#undef SRCBUSIRQ
1612
1613#undef INTPIN
1614#undef INTIRQ
1615#undef INTAPIC
1616#undef INTTYPE
1617
1618
1619/*
1620 * Reprogram the MB chipset to NOT redirect an ISA INTerrupt.
1621 *
1622 * XXX FIXME:
1623 *  Exactly what this means is unclear at this point.  It is a solution
1624 *  for motherboards that redirect the MBIRQ0 pin.  Generically a motherboard
1625 *  could route any of the ISA INTs to upper (>15) IRQ values.  But most would
1626 *  NOT be redirected via MBIRQ0, thus "undirect()ing" them would NOT be an
1627 *  option.
1628 */
1629int
1630undirect_isa_irq(int rirq)
1631{
1632#if defined(READY)
1633	if (bootverbose)
1634	    printf("Freeing redirected ISA irq %d.\n", rirq);
1635	/** FIXME: tickle the MB redirector chip */
1636	return -1;
1637#else
1638	if (bootverbose)
1639	    printf("Freeing (NOT implemented) redirected ISA irq %d.\n", rirq);
1640	return 0;
1641#endif  /* READY */
1642}
1643
1644
1645/*
1646 * Reprogram the MB chipset to NOT redirect a PCI INTerrupt
1647 */
1648int
1649undirect_pci_irq(int rirq)
1650{
1651#if defined(READY)
1652	if (bootverbose)
1653		printf("Freeing redirected PCI irq %d.\n", rirq);
1654
1655	/** FIXME: tickle the MB redirector chip */
1656	return -1;
1657#else
1658	if (bootverbose)
1659		printf("Freeing (NOT implemented) redirected PCI irq %d.\n",
1660		       rirq);
1661	return 0;
1662#endif  /* READY */
1663}
1664
1665
1666/*
1667 * given a bus ID, return:
1668 *  the bus type if found
1669 *  -1 if NOT found
1670 */
1671int
1672apic_bus_type(int id)
1673{
1674	int     x;
1675
1676	for (x = 0; x < mp_nbusses; ++x)
1677		if (bus_data[x].bus_id == id)
1678			return bus_data[x].bus_type;
1679
1680	return -1;
1681}
1682
1683
1684/*
1685 * given a LOGICAL APIC# and pin#, return:
1686 *  the associated src bus ID if found
1687 *  -1 if NOT found
1688 */
1689int
1690apic_src_bus_id(int apic, int pin)
1691{
1692	int     x;
1693
1694	/* search each of the possible INTerrupt sources */
1695	for (x = 0; x < nintrs; ++x)
1696		if ((apic == ID_TO_IO(io_apic_ints[x].dst_apic_id)) &&
1697		    (pin == io_apic_ints[x].dst_apic_int))
1698			return (io_apic_ints[x].src_bus_id);
1699
1700	return -1;		/* NOT found */
1701}
1702
1703
1704/*
1705 * given a LOGICAL APIC# and pin#, return:
1706 *  the associated src bus IRQ if found
1707 *  -1 if NOT found
1708 */
1709int
1710apic_src_bus_irq(int apic, int pin)
1711{
1712	int     x;
1713
1714	for (x = 0; x < nintrs; x++)
1715		if ((apic == ID_TO_IO(io_apic_ints[x].dst_apic_id)) &&
1716		    (pin == io_apic_ints[x].dst_apic_int))
1717			return (io_apic_ints[x].src_bus_irq);
1718
1719	return -1;		/* NOT found */
1720}
1721
1722
1723/*
1724 * given a LOGICAL APIC# and pin#, return:
1725 *  the associated INTerrupt type if found
1726 *  -1 if NOT found
1727 */
1728int
1729apic_int_type(int apic, int pin)
1730{
1731	int     x;
1732
1733	/* search each of the possible INTerrupt sources */
1734	for (x = 0; x < nintrs; ++x)
1735		if ((apic == ID_TO_IO(io_apic_ints[x].dst_apic_id)) &&
1736		    (pin == io_apic_ints[x].dst_apic_int))
1737			return (io_apic_ints[x].int_type);
1738
1739	return -1;		/* NOT found */
1740}
1741
1742int
1743apic_irq(int apic, int pin)
1744{
1745	int x;
1746	int res;
1747
1748	for (x = 0; x < nintrs; ++x)
1749		if ((apic == ID_TO_IO(io_apic_ints[x].dst_apic_id)) &&
1750		    (pin == io_apic_ints[x].dst_apic_int)) {
1751			res = io_apic_ints[x].int_vector;
1752			if (res == 0xff)
1753				return -1;
1754			if (apic != int_to_apicintpin[res].ioapic)
1755				panic("apic_irq: inconsistent table");
1756			if (pin != int_to_apicintpin[res].int_pin)
1757				panic("apic_irq inconsistent table (2)");
1758			return res;
1759		}
1760	return -1;
1761}
1762
1763
1764/*
1765 * given a LOGICAL APIC# and pin#, return:
1766 *  the associated trigger mode if found
1767 *  -1 if NOT found
1768 */
1769int
1770apic_trigger(int apic, int pin)
1771{
1772	int     x;
1773
1774	/* search each of the possible INTerrupt sources */
1775	for (x = 0; x < nintrs; ++x)
1776		if ((apic == ID_TO_IO(io_apic_ints[x].dst_apic_id)) &&
1777		    (pin == io_apic_ints[x].dst_apic_int))
1778			return ((io_apic_ints[x].int_flags >> 2) & 0x03);
1779
1780	return -1;		/* NOT found */
1781}
1782
1783
1784/*
1785 * given a LOGICAL APIC# and pin#, return:
1786 *  the associated 'active' level if found
1787 *  -1 if NOT found
1788 */
1789int
1790apic_polarity(int apic, int pin)
1791{
1792	int     x;
1793
1794	/* search each of the possible INTerrupt sources */
1795	for (x = 0; x < nintrs; ++x)
1796		if ((apic == ID_TO_IO(io_apic_ints[x].dst_apic_id)) &&
1797		    (pin == io_apic_ints[x].dst_apic_int))
1798			return (io_apic_ints[x].int_flags & 0x03);
1799
1800	return -1;		/* NOT found */
1801}
1802
1803
1804/*
1805 * set data according to MP defaults
1806 * FIXME: probably not complete yet...
1807 */
1808static void
1809default_mp_table(int type)
1810{
1811	int     ap_cpu_id;
1812#if defined(APIC_IO)
1813	int     io_apic_id;
1814	int     pin;
1815#endif	/* APIC_IO */
1816
1817#if 0
1818	printf("  MP default config type: %d\n", type);
1819	switch (type) {
1820	case 1:
1821		printf("   bus: ISA, APIC: 82489DX\n");
1822		break;
1823	case 2:
1824		printf("   bus: EISA, APIC: 82489DX\n");
1825		break;
1826	case 3:
1827		printf("   bus: EISA, APIC: 82489DX\n");
1828		break;
1829	case 4:
1830		printf("   bus: MCA, APIC: 82489DX\n");
1831		break;
1832	case 5:
1833		printf("   bus: ISA+PCI, APIC: Integrated\n");
1834		break;
1835	case 6:
1836		printf("   bus: EISA+PCI, APIC: Integrated\n");
1837		break;
1838	case 7:
1839		printf("   bus: MCA+PCI, APIC: Integrated\n");
1840		break;
1841	default:
1842		printf("   future type\n");
1843		break;
1844		/* NOTREACHED */
1845	}
1846#endif	/* 0 */
1847
1848	boot_cpu_id = (lapic.id & APIC_ID_MASK) >> 24;
1849	ap_cpu_id = (boot_cpu_id == 0) ? 1 : 0;
1850
1851	/* BSP */
1852	CPU_TO_ID(0) = boot_cpu_id;
1853	ID_TO_CPU(boot_cpu_id) = 0;
1854
1855	/* one and only AP */
1856	CPU_TO_ID(1) = ap_cpu_id;
1857	ID_TO_CPU(ap_cpu_id) = 1;
1858
1859#if defined(APIC_IO)
1860	/* one and only IO APIC */
1861	io_apic_id = (io_apic_read(0, IOAPIC_ID) & APIC_ID_MASK) >> 24;
1862
1863	/*
1864	 * sanity check, refer to MP spec section 3.6.6, last paragraph
1865	 * necessary as some hardware isn't properly setting up the IO APIC
1866	 */
1867#if defined(REALLY_ANAL_IOAPICID_VALUE)
1868	if (io_apic_id != 2) {
1869#else
1870	if ((io_apic_id == 0) || (io_apic_id == 1) || (io_apic_id == 15)) {
1871#endif	/* REALLY_ANAL_IOAPICID_VALUE */
1872		io_apic_set_id(0, 2);
1873		io_apic_id = 2;
1874	}
1875	IO_TO_ID(0) = io_apic_id;
1876	ID_TO_IO(io_apic_id) = 0;
1877#endif	/* APIC_IO */
1878
1879	/* fill out bus entries */
1880	switch (type) {
1881	case 1:
1882	case 2:
1883	case 3:
1884	case 4:
1885	case 5:
1886	case 6:
1887	case 7:
1888		bus_data[0].bus_id = default_data[type - 1][1];
1889		bus_data[0].bus_type = default_data[type - 1][2];
1890		bus_data[1].bus_id = default_data[type - 1][3];
1891		bus_data[1].bus_type = default_data[type - 1][4];
1892		break;
1893
1894	/* case 4: case 7:		   MCA NOT supported */
1895	default:		/* illegal/reserved */
1896		panic("BAD default MP config: %d", type);
1897		/* NOTREACHED */
1898	}
1899
1900#if defined(APIC_IO)
1901	/* general cases from MP v1.4, table 5-2 */
1902	for (pin = 0; pin < 16; ++pin) {
1903		io_apic_ints[pin].int_type = 0;
1904		io_apic_ints[pin].int_flags = 0x05;	/* edge/active-hi */
1905		io_apic_ints[pin].src_bus_id = 0;
1906		io_apic_ints[pin].src_bus_irq = pin;	/* IRQ2 caught below */
1907		io_apic_ints[pin].dst_apic_id = io_apic_id;
1908		io_apic_ints[pin].dst_apic_int = pin;	/* 1-to-1 */
1909	}
1910
1911	/* special cases from MP v1.4, table 5-2 */
1912	if (type == 2) {
1913		io_apic_ints[2].int_type = 0xff;	/* N/C */
1914		io_apic_ints[13].int_type = 0xff;	/* N/C */
1915#if !defined(APIC_MIXED_MODE)
1916		/** FIXME: ??? */
1917		panic("sorry, can't support type 2 default yet");
1918#endif	/* APIC_MIXED_MODE */
1919	}
1920	else
1921		io_apic_ints[2].src_bus_irq = 0;	/* ISA IRQ0 is on APIC INT 2 */
1922
1923	if (type == 7)
1924		io_apic_ints[0].int_type = 0xff;	/* N/C */
1925	else
1926		io_apic_ints[0].int_type = 3;	/* vectored 8259 */
1927#endif	/* APIC_IO */
1928}
1929
1930
1931/*
1932 * start each AP in our list
1933 */
1934static int
1935start_all_aps(u_int boot_addr)
1936{
1937	int     x, i, pg;
1938	u_char  mpbiosreason;
1939	u_long  mpbioswarmvec;
1940	struct pcpu *pc;
1941	char *stack;
1942	uintptr_t kptbase;
1943
1944	POSTCODE(START_ALL_APS_POST);
1945
1946	mtx_init(&ap_boot_mtx, "ap boot", NULL, MTX_SPIN);
1947
1948	/* initialize BSP's local APIC */
1949	apic_initialize();
1950	bsp_apic_ready = 1;
1951
1952	/* install the AP 1st level boot code */
1953	install_ap_tramp(boot_addr);
1954
1955
1956	/* save the current value of the warm-start vector */
1957	mpbioswarmvec = *((u_long *) WARMBOOT_OFF);
1958#ifndef PC98
1959	outb(CMOS_REG, BIOS_RESET);
1960	mpbiosreason = inb(CMOS_DATA);
1961#endif
1962
1963	/* set up temporary P==V mapping for AP boot */
1964	/* XXX this is a hack, we should boot the AP on its own stack/PTD */
1965	kptbase = (uintptr_t)(void *)KPTphys;
1966	for (x = 0; x < NKPT; x++)
1967		PTD[x] = (pd_entry_t)(PG_V | PG_RW |
1968		    ((kptbase + x * PAGE_SIZE) & PG_FRAME));
1969	invltlb();
1970
1971	/* start each AP */
1972	for (x = 1; x <= mp_naps; ++x) {
1973
1974		/* This is a bit verbose, it will go away soon.  */
1975
1976		/* first page of AP's private space */
1977		pg = x * i386_btop(sizeof(struct privatespace));
1978
1979		/* allocate a new private data page */
1980		pc = (struct pcpu *)kmem_alloc(kernel_map, PAGE_SIZE);
1981
1982		/* wire it into the private page table page */
1983		SMPpt[pg] = (pt_entry_t)(PG_V | PG_RW | vtophys(pc));
1984
1985		/* allocate and set up an idle stack data page */
1986		stack = (char *)kmem_alloc(kernel_map, KSTACK_PAGES * PAGE_SIZE); /* XXXKSE */
1987		for (i = 0; i < KSTACK_PAGES; i++)
1988			SMPpt[pg + 1 + i] = (pt_entry_t)
1989			    (PG_V | PG_RW | vtophys(PAGE_SIZE * i + stack));
1990
1991		/* prime data page for it to use */
1992		pcpu_init(pc, x, sizeof(struct pcpu));
1993
1994		/* setup a vector to our boot code */
1995		*((volatile u_short *) WARMBOOT_OFF) = WARMBOOT_TARGET;
1996		*((volatile u_short *) WARMBOOT_SEG) = (boot_addr >> 4);
1997#ifndef PC98
1998		outb(CMOS_REG, BIOS_RESET);
1999		outb(CMOS_DATA, BIOS_WARM);	/* 'warm-start' */
2000#endif
2001
2002		bootSTK = &SMP_prvspace[x].idlekstack[KSTACK_PAGES * PAGE_SIZE];
2003		bootAP = x;
2004
2005		/* attempt to start the Application Processor */
2006		CHECK_INIT(99);	/* setup checkpoints */
2007		if (!start_ap(x, boot_addr)) {
2008			printf("AP #%d (PHY# %d) failed!\n", x, CPU_TO_ID(x));
2009			CHECK_PRINT("trace");	/* show checkpoints */
2010			/* better panic as the AP may be running loose */
2011			printf("panic y/n? [y] ");
2012			if (cngetc() != 'n')
2013				panic("bye-bye");
2014		}
2015		CHECK_PRINT("trace");		/* show checkpoints */
2016
2017		/* record its version info */
2018		cpu_apic_versions[x] = cpu_apic_versions[0];
2019
2020		all_cpus |= (1 << x);		/* record AP in CPU map */
2021	}
2022
2023	/* build our map of 'other' CPUs */
2024	PCPU_SET(other_cpus, all_cpus & ~PCPU_GET(cpumask));
2025
2026	/* fill in our (BSP) APIC version */
2027	cpu_apic_versions[0] = lapic.version;
2028
2029	/* restore the warmstart vector */
2030	*(u_long *) WARMBOOT_OFF = mpbioswarmvec;
2031#ifndef PC98
2032	outb(CMOS_REG, BIOS_RESET);
2033	outb(CMOS_DATA, mpbiosreason);
2034#endif
2035
2036	/*
2037	 * Set up the idle context for the BSP.  Similar to above except
2038	 * that some was done by locore, some by pmap.c and some is implicit
2039	 * because the BSP is cpu#0 and the page is initially zero, and also
2040	 * because we can refer to variables by name on the BSP..
2041	 */
2042
2043	/* Allocate and setup BSP idle stack */
2044	stack = (char *)kmem_alloc(kernel_map, KSTACK_PAGES * PAGE_SIZE);
2045	for (i = 0; i < KSTACK_PAGES; i++)
2046		SMPpt[1 + i] = (pt_entry_t)
2047		    (PG_V | PG_RW | vtophys(PAGE_SIZE * i + stack));
2048
2049	for (x = 0; x < NKPT; x++)
2050		PTD[x] = 0;
2051	pmap_set_opt();
2052
2053	/* number of APs actually started */
2054	return mp_ncpus - 1;
2055}
2056
2057
2058/*
2059 * load the 1st level AP boot code into base memory.
2060 */
2061
2062/* targets for relocation */
2063extern void bigJump(void);
2064extern void bootCodeSeg(void);
2065extern void bootDataSeg(void);
2066extern void MPentry(void);
2067extern u_int MP_GDT;
2068extern u_int mp_gdtbase;
2069
2070static void
2071install_ap_tramp(u_int boot_addr)
2072{
2073	int     x;
2074	int     size = *(int *) ((u_long) & bootMP_size);
2075	u_char *src = (u_char *) ((u_long) bootMP);
2076	u_char *dst = (u_char *) boot_addr + KERNBASE;
2077	u_int   boot_base = (u_int) bootMP;
2078	u_int8_t *dst8;
2079	u_int16_t *dst16;
2080	u_int32_t *dst32;
2081
2082	POSTCODE(INSTALL_AP_TRAMP_POST);
2083
2084	for (x = 0; x < size; ++x)
2085		*dst++ = *src++;
2086
2087	/*
2088	 * modify addresses in code we just moved to basemem. unfortunately we
2089	 * need fairly detailed info about mpboot.s for this to work.  changes
2090	 * to mpboot.s might require changes here.
2091	 */
2092
2093	/* boot code is located in KERNEL space */
2094	dst = (u_char *) boot_addr + KERNBASE;
2095
2096	/* modify the lgdt arg */
2097	dst32 = (u_int32_t *) (dst + ((u_int) & mp_gdtbase - boot_base));
2098	*dst32 = boot_addr + ((u_int) & MP_GDT - boot_base);
2099
2100	/* modify the ljmp target for MPentry() */
2101	dst32 = (u_int32_t *) (dst + ((u_int) bigJump - boot_base) + 1);
2102	*dst32 = ((u_int) MPentry - KERNBASE);
2103
2104	/* modify the target for boot code segment */
2105	dst16 = (u_int16_t *) (dst + ((u_int) bootCodeSeg - boot_base));
2106	dst8 = (u_int8_t *) (dst16 + 1);
2107	*dst16 = (u_int) boot_addr & 0xffff;
2108	*dst8 = ((u_int) boot_addr >> 16) & 0xff;
2109
2110	/* modify the target for boot data segment */
2111	dst16 = (u_int16_t *) (dst + ((u_int) bootDataSeg - boot_base));
2112	dst8 = (u_int8_t *) (dst16 + 1);
2113	*dst16 = (u_int) boot_addr & 0xffff;
2114	*dst8 = ((u_int) boot_addr >> 16) & 0xff;
2115}
2116
2117
2118/*
2119 * this function starts the AP (application processor) identified
2120 * by the APIC ID 'physicalCpu'.  It does quite a "song and dance"
2121 * to accomplish this.  This is necessary because of the nuances
2122 * of the different hardware we might encounter.  It ain't pretty,
2123 * but it seems to work.
2124 */
2125static int
2126start_ap(int logical_cpu, u_int boot_addr)
2127{
2128	int     physical_cpu;
2129	int     vector;
2130	int     cpus;
2131	u_long  icr_lo, icr_hi;
2132
2133	POSTCODE(START_AP_POST);
2134
2135	/* get the PHYSICAL APIC ID# */
2136	physical_cpu = CPU_TO_ID(logical_cpu);
2137
2138	/* calculate the vector */
2139	vector = (boot_addr >> 12) & 0xff;
2140
2141	/* used as a watchpoint to signal AP startup */
2142	cpus = mp_ncpus;
2143
2144	/*
2145	 * first we do an INIT/RESET IPI this INIT IPI might be run, reseting
2146	 * and running the target CPU. OR this INIT IPI might be latched (P5
2147	 * bug), CPU waiting for STARTUP IPI. OR this INIT IPI might be
2148	 * ignored.
2149	 */
2150
2151	/* setup the address for the target AP */
2152	icr_hi = lapic.icr_hi & ~APIC_ID_MASK;
2153	icr_hi |= (physical_cpu << 24);
2154	lapic.icr_hi = icr_hi;
2155
2156	/* do an INIT IPI: assert RESET */
2157	icr_lo = lapic.icr_lo & 0xfff00000;
2158	lapic.icr_lo = icr_lo | 0x0000c500;
2159
2160	/* wait for pending status end */
2161	while (lapic.icr_lo & APIC_DELSTAT_MASK)
2162		 /* spin */ ;
2163
2164	/* do an INIT IPI: deassert RESET */
2165	lapic.icr_lo = icr_lo | 0x00008500;
2166
2167	/* wait for pending status end */
2168	u_sleep(10000);		/* wait ~10mS */
2169	while (lapic.icr_lo & APIC_DELSTAT_MASK)
2170		 /* spin */ ;
2171
2172	/*
2173	 * next we do a STARTUP IPI: the previous INIT IPI might still be
2174	 * latched, (P5 bug) this 1st STARTUP would then terminate
2175	 * immediately, and the previously started INIT IPI would continue. OR
2176	 * the previous INIT IPI has already run. and this STARTUP IPI will
2177	 * run. OR the previous INIT IPI was ignored. and this STARTUP IPI
2178	 * will run.
2179	 */
2180
2181	/* do a STARTUP IPI */
2182	lapic.icr_lo = icr_lo | 0x00000600 | vector;
2183	while (lapic.icr_lo & APIC_DELSTAT_MASK)
2184		 /* spin */ ;
2185	u_sleep(200);		/* wait ~200uS */
2186
2187	/*
2188	 * finally we do a 2nd STARTUP IPI: this 2nd STARTUP IPI should run IF
2189	 * the previous STARTUP IPI was cancelled by a latched INIT IPI. OR
2190	 * this STARTUP IPI will be ignored, as only ONE STARTUP IPI is
2191	 * recognized after hardware RESET or INIT IPI.
2192	 */
2193
2194	lapic.icr_lo = icr_lo | 0x00000600 | vector;
2195	while (lapic.icr_lo & APIC_DELSTAT_MASK)
2196		 /* spin */ ;
2197	u_sleep(200);		/* wait ~200uS */
2198
2199	/* wait for it to start */
2200	set_apic_timer(5000000);/* == 5 seconds */
2201	while (read_apic_timer())
2202		if (mp_ncpus > cpus)
2203			return 1;	/* return SUCCESS */
2204
2205	return 0;		/* return FAILURE */
2206}
2207
2208#if defined(APIC_IO)
2209
2210#ifdef COUNT_XINVLTLB_HITS
2211u_int xhits_gbl[MAXCPU];
2212u_int xhits_pg[MAXCPU];
2213u_int xhits_rng[MAXCPU];
2214SYSCTL_NODE(_debug, OID_AUTO, xhits, CTLFLAG_RW, 0, "");
2215SYSCTL_OPAQUE(_debug_xhits, OID_AUTO, global, CTLFLAG_RW, &xhits_gbl,
2216    sizeof(xhits_gbl), "IU", "");
2217SYSCTL_OPAQUE(_debug_xhits, OID_AUTO, page, CTLFLAG_RW, &xhits_pg,
2218    sizeof(xhits_pg), "IU", "");
2219SYSCTL_OPAQUE(_debug_xhits, OID_AUTO, range, CTLFLAG_RW, &xhits_rng,
2220    sizeof(xhits_rng), "IU", "");
2221
2222u_int ipi_global;
2223u_int ipi_page;
2224u_int ipi_range;
2225u_int ipi_range_size;
2226SYSCTL_INT(_debug_xhits, OID_AUTO, ipi_global, CTLFLAG_RW, &ipi_global, 0, "");
2227SYSCTL_INT(_debug_xhits, OID_AUTO, ipi_page, CTLFLAG_RW, &ipi_page, 0, "");
2228SYSCTL_INT(_debug_xhits, OID_AUTO, ipi_range, CTLFLAG_RW, &ipi_range, 0, "");
2229SYSCTL_INT(_debug_xhits, OID_AUTO, ipi_range_size, CTLFLAG_RW, &ipi_range_size,
2230    0, "");
2231
2232u_int ipi_masked_global;
2233u_int ipi_masked_page;
2234u_int ipi_masked_range;
2235u_int ipi_masked_range_size;
2236SYSCTL_INT(_debug_xhits, OID_AUTO, ipi_masked_global, CTLFLAG_RW,
2237    &ipi_masked_global, 0, "");
2238SYSCTL_INT(_debug_xhits, OID_AUTO, ipi_masked_page, CTLFLAG_RW,
2239    &ipi_masked_page, 0, "");
2240SYSCTL_INT(_debug_xhits, OID_AUTO, ipi_masked_range, CTLFLAG_RW,
2241    &ipi_masked_range, 0, "");
2242SYSCTL_INT(_debug_xhits, OID_AUTO, ipi_masked_range_size, CTLFLAG_RW,
2243    &ipi_masked_range_size, 0, "");
2244#endif
2245
2246/*
2247 * Flush the TLB on all other CPU's
2248 */
2249static void
2250smp_tlb_shootdown(u_int vector, vm_offset_t addr1, vm_offset_t addr2)
2251{
2252	u_int ncpu;
2253	register_t eflags;
2254
2255	ncpu = mp_ncpus - 1;	/* does not shootdown self */
2256	if (ncpu < 1)
2257		return;		/* no other cpus */
2258	eflags = read_eflags();
2259	if ((eflags & PSL_I) == 0)
2260		panic("absolutely cannot call smp_ipi_shootdown with interrupts already disabled");
2261	mtx_lock_spin(&smp_tlb_mtx);
2262	smp_tlb_addr1 = addr1;
2263	smp_tlb_addr2 = addr2;
2264	atomic_store_rel_int(&smp_tlb_wait, 0);
2265	ipi_all_but_self(vector);
2266	while (smp_tlb_wait < ncpu)
2267		ia32_pause();
2268	mtx_unlock_spin(&smp_tlb_mtx);
2269}
2270
2271/*
2272 * This is about as magic as it gets.  fortune(1) has got similar code
2273 * for reversing bits in a word.  Who thinks up this stuff??
2274 *
2275 * Yes, it does appear to be consistently faster than:
2276 * while (i = ffs(m)) {
2277 *	m >>= i;
2278 *	bits++;
2279 * }
2280 * and
2281 * while (lsb = (m & -m)) {	// This is magic too
2282 * 	m &= ~lsb;		// or: m ^= lsb
2283 *	bits++;
2284 * }
2285 * Both of these latter forms do some very strange things on gcc-3.1 with
2286 * -mcpu=pentiumpro and/or -march=pentiumpro and/or -O or -O2.
2287 * There is probably an SSE or MMX popcnt instruction.
2288 *
2289 * I wonder if this should be in libkern?
2290 *
2291 * XXX Stop the presses!  Another one:
2292 * static __inline u_int32_t
2293 * popcnt1(u_int32_t v)
2294 * {
2295 *	v -= ((v >> 1) & 0x55555555);
2296 *	v = (v & 0x33333333) + ((v >> 2) & 0x33333333);
2297 *	v = (v + (v >> 4)) & 0x0F0F0F0F;
2298 *	return (v * 0x01010101) >> 24;
2299 * }
2300 * The downside is that it has a multiply.  With a pentium3 with
2301 * -mcpu=pentiumpro and -march=pentiumpro then gcc-3.1 will use
2302 * an imull, and in that case it is faster.  In most other cases
2303 * it appears slightly slower.
2304 */
2305static __inline u_int32_t
2306popcnt(u_int32_t m)
2307{
2308
2309	m = (m & 0x55555555) + ((m & 0xaaaaaaaa) >> 1);
2310	m = (m & 0x33333333) + ((m & 0xcccccccc) >> 2);
2311	m = (m & 0x0f0f0f0f) + ((m & 0xf0f0f0f0) >> 4);
2312	m = (m & 0x00ff00ff) + ((m & 0xff00ff00) >> 8);
2313	m = (m & 0x0000ffff) + ((m & 0xffff0000) >> 16);
2314	return m;
2315}
2316
2317static void
2318smp_targeted_tlb_shootdown(u_int mask, u_int vector, vm_offset_t addr1, vm_offset_t addr2)
2319{
2320	int ncpu, othercpus;
2321	register_t eflags;
2322
2323	othercpus = mp_ncpus - 1;
2324	if (mask == (u_int)-1) {
2325		ncpu = othercpus;
2326		if (ncpu < 1)
2327			return;
2328	} else {
2329		/* XXX there should be a pcpu self mask */
2330		mask &= ~(1 << PCPU_GET(cpuid));
2331		if (mask == 0)
2332			return;
2333		ncpu = popcnt(mask);
2334		if (ncpu > othercpus) {
2335			/* XXX this should be a panic offence */
2336			printf("SMP: tlb shootdown to %d other cpus (only have %d)\n",
2337			    ncpu, othercpus);
2338			ncpu = othercpus;
2339		}
2340		/* XXX should be a panic, implied by mask == 0 above */
2341		if (ncpu < 1)
2342			return;
2343	}
2344	eflags = read_eflags();
2345	if ((eflags & PSL_I) == 0)
2346		panic("absolutely cannot call smp_targeted_ipi_shootdown with interrupts already disabled");
2347	mtx_lock_spin(&smp_tlb_mtx);
2348	smp_tlb_addr1 = addr1;
2349	smp_tlb_addr2 = addr2;
2350	atomic_store_rel_int(&smp_tlb_wait, 0);
2351	if (mask == (u_int)-1)
2352		ipi_all_but_self(vector);
2353	else
2354		ipi_selected(mask, vector);
2355	while (smp_tlb_wait < ncpu)
2356		ia32_pause();
2357	mtx_unlock_spin(&smp_tlb_mtx);
2358}
2359#endif
2360
2361void
2362smp_invltlb(void)
2363{
2364#if defined(APIC_IO)
2365	if (smp_started) {
2366		smp_tlb_shootdown(IPI_INVLTLB, 0, 0);
2367#ifdef COUNT_XINVLTLB_HITS
2368		ipi_global++;
2369#endif
2370	}
2371#endif  /* APIC_IO */
2372}
2373
2374void
2375smp_invlpg(vm_offset_t addr)
2376{
2377#if defined(APIC_IO)
2378	if (smp_started) {
2379		smp_tlb_shootdown(IPI_INVLPG, addr, 0);
2380#ifdef COUNT_XINVLTLB_HITS
2381		ipi_page++;
2382#endif
2383	}
2384#endif  /* APIC_IO */
2385}
2386
2387void
2388smp_invlpg_range(vm_offset_t addr1, vm_offset_t addr2)
2389{
2390#if defined(APIC_IO)
2391	if (smp_started) {
2392		smp_tlb_shootdown(IPI_INVLRNG, addr1, addr2);
2393#ifdef COUNT_XINVLTLB_HITS
2394		ipi_range++;
2395		ipi_range_size += (addr2 - addr1) / PAGE_SIZE;
2396#endif
2397	}
2398#endif  /* APIC_IO */
2399}
2400
2401void
2402smp_masked_invltlb(u_int mask)
2403{
2404#if defined(APIC_IO)
2405	if (smp_started) {
2406		smp_targeted_tlb_shootdown(mask, IPI_INVLTLB, 0, 0);
2407#ifdef COUNT_XINVLTLB_HITS
2408		ipi_masked_global++;
2409#endif
2410	}
2411#endif  /* APIC_IO */
2412}
2413
2414void
2415smp_masked_invlpg(u_int mask, vm_offset_t addr)
2416{
2417#if defined(APIC_IO)
2418	if (smp_started) {
2419		smp_targeted_tlb_shootdown(mask, IPI_INVLPG, addr, 0);
2420#ifdef COUNT_XINVLTLB_HITS
2421		ipi_masked_page++;
2422#endif
2423	}
2424#endif  /* APIC_IO */
2425}
2426
2427void
2428smp_masked_invlpg_range(u_int mask, vm_offset_t addr1, vm_offset_t addr2)
2429{
2430#if defined(APIC_IO)
2431	if (smp_started) {
2432		smp_targeted_tlb_shootdown(mask, IPI_INVLRNG, addr1, addr2);
2433#ifdef COUNT_XINVLTLB_HITS
2434		ipi_masked_range++;
2435		ipi_masked_range_size += (addr2 - addr1) / PAGE_SIZE;
2436#endif
2437	}
2438#endif  /* APIC_IO */
2439}
2440
2441
2442/*
2443 * This is called once the rest of the system is up and running and we're
2444 * ready to let the AP's out of the pen.
2445 */
2446extern void	enable_sse(void);
2447
2448void
2449ap_init(void)
2450{
2451	u_int	apic_id;
2452
2453	/* spin until all the AP's are ready */
2454	while (!aps_ready)
2455		ia32_pause();
2456
2457	/* BSP may have changed PTD while we were waiting */
2458	invltlb();
2459
2460#if defined(I586_CPU) && !defined(NO_F00F_HACK)
2461	lidt(&r_idt);
2462#endif
2463
2464	/* set up CPU registers and state */
2465	cpu_setregs();
2466
2467	/* set up FPU state on the AP */
2468	npxinit(__INITIAL_NPXCW__);
2469
2470	/* set up SSE registers */
2471	enable_sse();
2472
2473	/* A quick check from sanity claus */
2474	apic_id = (apic_id_to_logical[(lapic.id & 0x0f000000) >> 24]);
2475	if (PCPU_GET(cpuid) != apic_id) {
2476		printf("SMP: cpuid = %d\n", PCPU_GET(cpuid));
2477		printf("SMP: apic_id = %d\n", apic_id);
2478		printf("PTD[MPPTDI] = %p\n", (void *)PTD[MPPTDI]);
2479		panic("cpuid mismatch! boom!!");
2480	}
2481
2482	/* Init local apic for irq's */
2483	apic_initialize();
2484
2485	/* Set memory range attributes for this CPU to match the BSP */
2486	mem_range_AP_init();
2487
2488	mtx_lock_spin(&ap_boot_mtx);
2489
2490	smp_cpus++;
2491
2492	CTR1(KTR_SMP, "SMP: AP CPU #%d Launched", PCPU_GET(cpuid));
2493	printf("SMP: AP CPU #%d Launched!\n", PCPU_GET(cpuid));
2494
2495	/* Build our map of 'other' CPUs. */
2496	PCPU_SET(other_cpus, all_cpus & ~PCPU_GET(cpumask));
2497
2498	if (bootverbose)
2499		apic_dump("ap_init()");
2500
2501	if (smp_cpus == mp_ncpus) {
2502		/* enable IPI's, tlb shootdown, freezes etc */
2503		atomic_store_rel_int(&smp_started, 1);
2504		smp_active = 1;	 /* historic */
2505	}
2506
2507	mtx_unlock_spin(&ap_boot_mtx);
2508
2509	/* wait until all the AP's are up */
2510	while (smp_started == 0)
2511		ia32_pause();
2512
2513	/* ok, now grab sched_lock and enter the scheduler */
2514	mtx_lock_spin(&sched_lock);
2515
2516	binuptime(PCPU_PTR(switchtime));
2517	PCPU_SET(switchticks, ticks);
2518
2519	cpu_throw();	/* doesn't return */
2520
2521	panic("scheduler returned us to %s", __func__);
2522}
2523
2524/*
2525 * For statclock, we send an IPI to all CPU's to have them call this
2526 * function.
2527 *
2528 * WARNING! unpend() will call statclock_process() directly and skip this
2529 * routine.
2530 */
2531void
2532forwarded_statclock(struct trapframe frame)
2533{
2534
2535	mtx_lock_spin(&sched_lock);
2536	statclock_process(curthread->td_kse, TRAPF_PC(&frame),
2537	    TRAPF_USERMODE(&frame));
2538	mtx_unlock_spin(&sched_lock);
2539}
2540
2541void
2542forward_statclock(void)
2543{
2544	int map;
2545
2546	CTR0(KTR_SMP, "forward_statclock");
2547
2548	if (!smp_started || cold || panicstr)
2549		return;
2550
2551	map = PCPU_GET(other_cpus) & ~stopped_cpus ;
2552	if (map != 0)
2553		ipi_selected(map, IPI_STATCLOCK);
2554}
2555
2556/*
2557 * For each hardclock(), we send an IPI to all other CPU's to have them
2558 * execute this function.  It would be nice to reduce contention on
2559 * sched_lock if we could simply peek at the CPU to determine the user/kernel
2560 * state and call hardclock_process() on the CPU receiving the clock interrupt
2561 * and then just use a simple IPI to handle any ast's if needed.
2562 *
2563 * WARNING! unpend() will call hardclock_process() directly and skip this
2564 * routine.
2565 */
2566void
2567forwarded_hardclock(struct trapframe frame)
2568{
2569
2570	mtx_lock_spin(&sched_lock);
2571	hardclock_process(curthread, TRAPF_USERMODE(&frame));
2572	mtx_unlock_spin(&sched_lock);
2573}
2574
2575void
2576forward_hardclock(void)
2577{
2578	u_int map;
2579
2580	CTR0(KTR_SMP, "forward_hardclock");
2581
2582	if (!smp_started || cold || panicstr)
2583		return;
2584
2585	map = PCPU_GET(other_cpus) & ~stopped_cpus ;
2586	if (map != 0)
2587		ipi_selected(map, IPI_HARDCLOCK);
2588}
2589
2590#ifdef APIC_INTR_REORDER
2591/*
2592 *	Maintain mapping from softintr vector to isr bit in local apic.
2593 */
2594void
2595set_lapic_isrloc(int intr, int vector)
2596{
2597	if (intr < 0 || intr > 32)
2598		panic("set_apic_isrloc: bad intr argument: %d",intr);
2599	if (vector < ICU_OFFSET || vector > 255)
2600		panic("set_apic_isrloc: bad vector argument: %d",vector);
2601	apic_isrbit_location[intr].location = &lapic.isr0 + ((vector>>5)<<2);
2602	apic_isrbit_location[intr].bit = (1<<(vector & 31));
2603}
2604#endif
2605
2606/*
2607 * send an IPI to a set of cpus.
2608 */
2609void
2610ipi_selected(u_int32_t cpus, u_int ipi)
2611{
2612
2613	CTR3(KTR_SMP, "%s: cpus: %x ipi: %x", __func__, cpus, ipi);
2614	selected_apic_ipi(cpus, ipi, APIC_DELMODE_FIXED);
2615}
2616
2617/*
2618 * send an IPI INTerrupt containing 'vector' to all CPUs, including myself
2619 */
2620void
2621ipi_all(u_int ipi)
2622{
2623
2624	CTR2(KTR_SMP, "%s: ipi: %x", __func__, ipi);
2625	apic_ipi(APIC_DEST_ALLISELF, ipi, APIC_DELMODE_FIXED);
2626}
2627
2628/*
2629 * send an IPI to all CPUs EXCEPT myself
2630 */
2631void
2632ipi_all_but_self(u_int ipi)
2633{
2634
2635	CTR2(KTR_SMP, "%s: ipi: %x", __func__, ipi);
2636	apic_ipi(APIC_DEST_ALLESELF, ipi, APIC_DELMODE_FIXED);
2637}
2638
2639/*
2640 * send an IPI to myself
2641 */
2642void
2643ipi_self(u_int ipi)
2644{
2645
2646	CTR2(KTR_SMP, "%s: ipi: %x", __func__, ipi);
2647	apic_ipi(APIC_DEST_SELF, ipi, APIC_DELMODE_FIXED);
2648}
2649
2650static void
2651release_aps(void *dummy __unused)
2652{
2653
2654	mtx_lock_spin(&sched_lock);
2655	atomic_store_rel_int(&aps_ready, 1);
2656	while (smp_started == 0)
2657		ia32_pause();
2658	mtx_unlock_spin(&sched_lock);
2659}
2660
2661SYSINIT(start_aps, SI_SUB_SMP, SI_ORDER_FIRST, release_aps, NULL);
2662