mp_machdep.c revision 75570
1/*
2 * Copyright (c) 1996, by Steve Passe
3 * All rights reserved.
4 *
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions
7 * are met:
8 * 1. Redistributions of source code must retain the above copyright
9 *    notice, this list of conditions and the following disclaimer.
10 * 2. The name of the developer may NOT be used to endorse or promote products
11 *    derived from this software without specific prior written permission.
12 *
13 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
14 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
15 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
16 * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
17 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
18 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
19 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
20 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
21 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
22 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
23 * SUCH DAMAGE.
24 *
25 * $FreeBSD: head/sys/amd64/amd64/mp_machdep.c 75570 2001-04-17 04:18:08Z jhb $
26 */
27
28#include "opt_cpu.h"
29
30#ifdef SMP
31#include <machine/smptests.h>
32#else
33#error
34#endif
35
36#include <sys/param.h>
37#include <sys/bus.h>
38#include <sys/systm.h>
39#include <sys/kernel.h>
40#include <sys/proc.h>
41#include <sys/sysctl.h>
42#include <sys/malloc.h>
43#include <sys/memrange.h>
44#include <sys/mutex.h>
45#include <sys/dkstat.h>
46#include <sys/cons.h>	/* cngetc() */
47
48#include <vm/vm.h>
49#include <vm/vm_param.h>
50#include <vm/pmap.h>
51#include <vm/vm_kern.h>
52#include <vm/vm_extern.h>
53#include <sys/lock.h>
54#include <vm/vm_map.h>
55#include <sys/user.h>
56#ifdef GPROF
57#include <sys/gmon.h>
58#endif
59
60#include <machine/smp.h>
61#include <machine/apic.h>
62#include <machine/atomic.h>
63#include <machine/cpufunc.h>
64#include <machine/ipl.h>
65#include <machine/mpapic.h>
66#include <machine/psl.h>
67#include <machine/segments.h>
68#include <machine/smptests.h>	/** TEST_DEFAULT_CONFIG, TEST_TEST1 */
69#include <machine/tss.h>
70#include <machine/specialreg.h>
71#include <machine/globaldata.h>
72
73#if defined(APIC_IO)
74#include <machine/md_var.h>		/* setidt() */
75#include <i386/isa/icu.h>		/* IPIs */
76#include <i386/isa/intr_machdep.h>	/* IPIs */
77#endif	/* APIC_IO */
78
79#if defined(TEST_DEFAULT_CONFIG)
80#define MPFPS_MPFB1	TEST_DEFAULT_CONFIG
81#else
82#define MPFPS_MPFB1	mpfps->mpfb1
83#endif  /* TEST_DEFAULT_CONFIG */
84
85#define WARMBOOT_TARGET		0
86#define WARMBOOT_OFF		(KERNBASE + 0x0467)
87#define WARMBOOT_SEG		(KERNBASE + 0x0469)
88
89#ifdef PC98
90#define BIOS_BASE		(0xe8000)
91#define BIOS_SIZE		(0x18000)
92#else
93#define BIOS_BASE		(0xf0000)
94#define BIOS_SIZE		(0x10000)
95#endif
96#define BIOS_COUNT		(BIOS_SIZE/4)
97
98#define CMOS_REG		(0x70)
99#define CMOS_DATA		(0x71)
100#define BIOS_RESET		(0x0f)
101#define BIOS_WARM		(0x0a)
102
103#define PROCENTRY_FLAG_EN	0x01
104#define PROCENTRY_FLAG_BP	0x02
105#define IOAPICENTRY_FLAG_EN	0x01
106
107
108/* MP Floating Pointer Structure */
109typedef struct MPFPS {
110	char    signature[4];
111	void   *pap;
112	u_char  length;
113	u_char  spec_rev;
114	u_char  checksum;
115	u_char  mpfb1;
116	u_char  mpfb2;
117	u_char  mpfb3;
118	u_char  mpfb4;
119	u_char  mpfb5;
120}      *mpfps_t;
121
122/* MP Configuration Table Header */
123typedef struct MPCTH {
124	char    signature[4];
125	u_short base_table_length;
126	u_char  spec_rev;
127	u_char  checksum;
128	u_char  oem_id[8];
129	u_char  product_id[12];
130	void   *oem_table_pointer;
131	u_short oem_table_size;
132	u_short entry_count;
133	void   *apic_address;
134	u_short extended_table_length;
135	u_char  extended_table_checksum;
136	u_char  reserved;
137}      *mpcth_t;
138
139
140typedef struct PROCENTRY {
141	u_char  type;
142	u_char  apic_id;
143	u_char  apic_version;
144	u_char  cpu_flags;
145	u_long  cpu_signature;
146	u_long  feature_flags;
147	u_long  reserved1;
148	u_long  reserved2;
149}      *proc_entry_ptr;
150
151typedef struct BUSENTRY {
152	u_char  type;
153	u_char  bus_id;
154	char    bus_type[6];
155}      *bus_entry_ptr;
156
157typedef struct IOAPICENTRY {
158	u_char  type;
159	u_char  apic_id;
160	u_char  apic_version;
161	u_char  apic_flags;
162	void   *apic_address;
163}      *io_apic_entry_ptr;
164
165typedef struct INTENTRY {
166	u_char  type;
167	u_char  int_type;
168	u_short int_flags;
169	u_char  src_bus_id;
170	u_char  src_bus_irq;
171	u_char  dst_apic_id;
172	u_char  dst_apic_int;
173}      *int_entry_ptr;
174
175/* descriptions of MP basetable entries */
176typedef struct BASETABLE_ENTRY {
177	u_char  type;
178	u_char  length;
179	char    name[16];
180}       basetable_entry;
181
182/*
183 * this code MUST be enabled here and in mpboot.s.
184 * it follows the very early stages of AP boot by placing values in CMOS ram.
185 * it NORMALLY will never be needed and thus the primitive method for enabling.
186 *
187#define CHECK_POINTS
188 */
189
190#if defined(CHECK_POINTS) && !defined(PC98)
191#define CHECK_READ(A)	 (outb(CMOS_REG, (A)), inb(CMOS_DATA))
192#define CHECK_WRITE(A,D) (outb(CMOS_REG, (A)), outb(CMOS_DATA, (D)))
193
194#define CHECK_INIT(D);				\
195	CHECK_WRITE(0x34, (D));			\
196	CHECK_WRITE(0x35, (D));			\
197	CHECK_WRITE(0x36, (D));			\
198	CHECK_WRITE(0x37, (D));			\
199	CHECK_WRITE(0x38, (D));			\
200	CHECK_WRITE(0x39, (D));
201
202#define CHECK_PRINT(S);				\
203	printf("%s: %d, %d, %d, %d, %d, %d\n",	\
204	   (S),					\
205	   CHECK_READ(0x34),			\
206	   CHECK_READ(0x35),			\
207	   CHECK_READ(0x36),			\
208	   CHECK_READ(0x37),			\
209	   CHECK_READ(0x38),			\
210	   CHECK_READ(0x39));
211
212#else				/* CHECK_POINTS */
213
214#define CHECK_INIT(D)
215#define CHECK_PRINT(S)
216
217#endif				/* CHECK_POINTS */
218
219/*
220 * Values to send to the POST hardware.
221 */
222#define MP_BOOTADDRESS_POST	0x10
223#define MP_PROBE_POST		0x11
224#define MPTABLE_PASS1_POST	0x12
225
226#define MP_START_POST		0x13
227#define MP_ENABLE_POST		0x14
228#define MPTABLE_PASS2_POST	0x15
229
230#define START_ALL_APS_POST	0x16
231#define INSTALL_AP_TRAMP_POST	0x17
232#define START_AP_POST		0x18
233
234#define MP_ANNOUNCE_POST	0x19
235
236/* used to hold the AP's until we are ready to release them */
237struct mtx			ap_boot_mtx;
238
239/** XXX FIXME: where does this really belong, isa.h/isa.c perhaps? */
240int	current_postcode;
241
242/** XXX FIXME: what system files declare these??? */
243extern struct region_descriptor r_gdt, r_idt;
244
245int	bsp_apic_ready = 0;	/* flags useability of BSP apic */
246int	mp_ncpus;		/* # of CPUs, including BSP */
247int	mp_naps;		/* # of Applications processors */
248int	mp_nbusses;		/* # of busses */
249int	mp_napics;		/* # of IO APICs */
250int	boot_cpu_id;		/* designated BSP */
251vm_offset_t cpu_apic_address;
252vm_offset_t io_apic_address[NAPICID];	/* NAPICID is more than enough */
253extern	int nkpt;
254
255u_int32_t cpu_apic_versions[MAXCPU];
256u_int32_t *io_apic_versions;
257
258#ifdef APIC_INTR_REORDER
259struct {
260	volatile int *location;
261	int bit;
262} apic_isrbit_location[32];
263#endif
264
265struct apic_intmapinfo	int_to_apicintpin[APIC_INTMAPSIZE];
266
267/*
268 * APIC ID logical/physical mapping structures.
269 * We oversize these to simplify boot-time config.
270 */
271int     cpu_num_to_apic_id[NAPICID];
272int     io_num_to_apic_id[NAPICID];
273int     apic_id_to_logical[NAPICID];
274
275
276/* Bitmap of all available CPUs */
277u_int	all_cpus;
278
279/* AP uses this during bootstrap.  Do not staticize.  */
280char *bootSTK;
281static int bootAP;
282
283/* Hotwire a 0->4MB V==P mapping */
284extern pt_entry_t *KPTphys;
285
286/* SMP page table page */
287extern pt_entry_t *SMPpt;
288
289struct pcb stoppcbs[MAXCPU];
290
291int smp_started;		/* has the system started? */
292int smp_active = 0;		/* are the APs allowed to run? */
293SYSCTL_INT(_machdep, OID_AUTO, smp_active, CTLFLAG_RW, &smp_active, 0, "");
294
295/* XXX maybe should be hw.ncpu */
296static int smp_cpus = 1;	/* how many cpu's running */
297SYSCTL_INT(_machdep, OID_AUTO, smp_cpus, CTLFLAG_RD, &smp_cpus, 0, "");
298
299int invltlb_ok = 0;	/* throttle smp_invltlb() till safe */
300SYSCTL_INT(_machdep, OID_AUTO, invltlb_ok, CTLFLAG_RW, &invltlb_ok, 0, "");
301
302/* Enable forwarding of a signal to a process running on a different CPU */
303static int forward_signal_enabled = 1;
304SYSCTL_INT(_machdep, OID_AUTO, forward_signal_enabled, CTLFLAG_RW,
305	   &forward_signal_enabled, 0, "");
306
307/* Enable forwarding of roundrobin to all other cpus */
308static int forward_roundrobin_enabled = 1;
309SYSCTL_INT(_machdep, OID_AUTO, forward_roundrobin_enabled, CTLFLAG_RW,
310	   &forward_roundrobin_enabled, 0, "");
311
312
313/*
314 * Local data and functions.
315 */
316
317/* Set to 1 once we're ready to let the APs out of the pen. */
318static volatile int aps_ready = 0;
319
320static int	mp_capable;
321static u_int	boot_address;
322static u_int	base_memory;
323
324static int	picmode;		/* 0: virtual wire mode, 1: PIC mode */
325static mpfps_t	mpfps;
326static int	search_for_sig(u_int32_t target, int count);
327static void	mp_enable(u_int boot_addr);
328
329static void	mptable_pass1(void);
330static int	mptable_pass2(void);
331static void	default_mp_table(int type);
332static void	fix_mp_table(void);
333static void	setup_apic_irq_mapping(void);
334static void	init_locks(void);
335static int	start_all_aps(u_int boot_addr);
336static void	install_ap_tramp(u_int boot_addr);
337static int	start_ap(int logicalCpu, u_int boot_addr);
338void		ap_init(void);
339static int	apic_int_is_bus_type(int intr, int bus_type);
340static void	release_aps(void *dummy);
341
342/*
343 * initialize all the SMP locks
344 */
345
346/* critical region around IO APIC, apic_imen */
347struct mtx		imen_mtx;
348
349/* lock region used by kernel profiling */
350struct mtx		mcount_mtx;
351
352#ifdef USE_COMLOCK
353/* locks com (tty) data/hardware accesses: a FASTINTR() */
354struct mtx		com_mtx;
355#endif /* USE_COMLOCK */
356
357/* lock around the MP rendezvous */
358static struct mtx	smp_rv_mtx;
359
360static void
361init_locks(void)
362{
363	/*
364	 * XXX The mcount mutex probably needs to be statically initialized,
365	 * since it will be used even in the function calls that get us to this
366	 * point.
367	 */
368	mtx_init(&mcount_mtx, "mcount", MTX_DEF);
369
370	mtx_init(&smp_rv_mtx, "smp rendezvous", MTX_SPIN);
371
372#ifdef USE_COMLOCK
373	mtx_init(&com_mtx, "com", MTX_SPIN);
374#endif /* USE_COMLOCK */
375
376	mtx_init(&ap_boot_mtx, "ap boot", MTX_SPIN);
377}
378
379/*
380 * Calculate usable address in base memory for AP trampoline code.
381 */
382u_int
383mp_bootaddress(u_int basemem)
384{
385	POSTCODE(MP_BOOTADDRESS_POST);
386
387	base_memory = basemem * 1024;	/* convert to bytes */
388
389	boot_address = base_memory & ~0xfff;	/* round down to 4k boundary */
390	if ((base_memory - boot_address) < bootMP_size)
391		boot_address -= 4096;	/* not enough, lower by 4k */
392
393	return boot_address;
394}
395
396
397/*
398 * Look for an Intel MP spec table (ie, SMP capable hardware).
399 */
400int
401mp_probe(void)
402{
403	int     x;
404	u_long  segment;
405	u_int32_t target;
406
407	POSTCODE(MP_PROBE_POST);
408
409	/* see if EBDA exists */
410	if ((segment = (u_long) * (u_short *) (KERNBASE + 0x40e)) != 0) {
411		/* search first 1K of EBDA */
412		target = (u_int32_t) (segment << 4);
413		if ((x = search_for_sig(target, 1024 / 4)) >= 0)
414			goto found;
415	} else {
416		/* last 1K of base memory, effective 'top of base' passed in */
417		target = (u_int32_t) (base_memory - 0x400);
418		if ((x = search_for_sig(target, 1024 / 4)) >= 0)
419			goto found;
420	}
421
422	/* search the BIOS */
423	target = (u_int32_t) BIOS_BASE;
424	if ((x = search_for_sig(target, BIOS_COUNT)) >= 0)
425		goto found;
426
427	/* nothing found */
428	mpfps = (mpfps_t)0;
429	mp_capable = 0;
430	return 0;
431
432found:
433	/* calculate needed resources */
434	mpfps = (mpfps_t)x;
435	mptable_pass1();
436
437	/* flag fact that we are running multiple processors */
438	mp_capable = 1;
439	return 1;
440}
441
442
443/*
444 * Initialize the SMP hardware and the APIC and start up the AP's.
445 */
446void
447mp_start(void)
448{
449	POSTCODE(MP_START_POST);
450
451	/* look for MP capable motherboard */
452	if (mp_capable)
453		mp_enable(boot_address);
454	else
455		panic("MP hardware not found!");
456}
457
458
459/*
460 * Print various information about the SMP system hardware and setup.
461 */
462void
463mp_announce(void)
464{
465	int     x;
466
467	POSTCODE(MP_ANNOUNCE_POST);
468
469	printf("FreeBSD/SMP: Multiprocessor motherboard\n");
470	printf(" cpu0 (BSP): apic id: %2d", CPU_TO_ID(0));
471	printf(", version: 0x%08x", cpu_apic_versions[0]);
472	printf(", at 0x%08x\n", cpu_apic_address);
473	for (x = 1; x <= mp_naps; ++x) {
474		printf(" cpu%d (AP):  apic id: %2d", x, CPU_TO_ID(x));
475		printf(", version: 0x%08x", cpu_apic_versions[x]);
476		printf(", at 0x%08x\n", cpu_apic_address);
477	}
478
479#if defined(APIC_IO)
480	for (x = 0; x < mp_napics; ++x) {
481		printf(" io%d (APIC): apic id: %2d", x, IO_TO_ID(x));
482		printf(", version: 0x%08x", io_apic_versions[x]);
483		printf(", at 0x%08x\n", io_apic_address[x]);
484	}
485#else
486	printf(" Warning: APIC I/O disabled\n");
487#endif	/* APIC_IO */
488}
489
490/*
491 * AP cpu's call this to sync up protected mode.
492 */
493void
494init_secondary(void)
495{
496	int	gsel_tss;
497	int	x, myid = bootAP;
498
499	gdt_segs[GPRIV_SEL].ssd_base = (int) &SMP_prvspace[myid];
500	gdt_segs[GPROC0_SEL].ssd_base =
501		(int) &SMP_prvspace[myid].globaldata.gd_common_tss;
502	SMP_prvspace[myid].globaldata.gd_prvspace =
503		&SMP_prvspace[myid].globaldata;
504
505	for (x = 0; x < NGDT; x++) {
506		ssdtosd(&gdt_segs[x], &gdt[myid * NGDT + x].sd);
507	}
508
509	r_gdt.rd_limit = NGDT * sizeof(gdt[0]) - 1;
510	r_gdt.rd_base = (int) &gdt[myid * NGDT];
511	lgdt(&r_gdt);			/* does magic intra-segment return */
512
513	lidt(&r_idt);
514
515	lldt(_default_ldt);
516	PCPU_SET(currentldt, _default_ldt);
517
518	gsel_tss = GSEL(GPROC0_SEL, SEL_KPL);
519	gdt[myid * NGDT + GPROC0_SEL].sd.sd_type = SDT_SYS386TSS;
520	PCPU_SET(common_tss.tss_esp0, 0); /* not used until after switch */
521	PCPU_SET(common_tss.tss_ss0, GSEL(GDATA_SEL, SEL_KPL));
522	PCPU_SET(common_tss.tss_ioopt, (sizeof (struct i386tss)) << 16);
523	PCPU_SET(tss_gdt, &gdt[myid * NGDT + GPROC0_SEL].sd);
524	PCPU_SET(common_tssd, *PCPU_GET(tss_gdt));
525	ltr(gsel_tss);
526
527	pmap_set_opt();
528}
529
530
531#if defined(APIC_IO)
532/*
533 * Final configuration of the BSP's local APIC:
534 *  - disable 'pic mode'.
535 *  - disable 'virtual wire mode'.
536 *  - enable NMI.
537 */
538void
539bsp_apic_configure(void)
540{
541	u_char		byte;
542	u_int32_t	temp;
543
544	/* leave 'pic mode' if necessary */
545	if (picmode) {
546		outb(0x22, 0x70);	/* select IMCR */
547		byte = inb(0x23);	/* current contents */
548		byte |= 0x01;		/* mask external INTR */
549		outb(0x23, byte);	/* disconnect 8259s/NMI */
550	}
551
552	/* mask lint0 (the 8259 'virtual wire' connection) */
553	temp = lapic.lvt_lint0;
554	temp |= APIC_LVT_M;		/* set the mask */
555	lapic.lvt_lint0 = temp;
556
557        /* setup lint1 to handle NMI */
558        temp = lapic.lvt_lint1;
559        temp &= ~APIC_LVT_M;		/* clear the mask */
560        lapic.lvt_lint1 = temp;
561
562	if (bootverbose)
563		apic_dump("bsp_apic_configure()");
564}
565#endif  /* APIC_IO */
566
567
568/*******************************************************************
569 * local functions and data
570 */
571
572/*
573 * start the SMP system
574 */
575static void
576mp_enable(u_int boot_addr)
577{
578	int     x;
579#if defined(APIC_IO)
580	int     apic;
581	u_int   ux;
582#endif	/* APIC_IO */
583
584	POSTCODE(MP_ENABLE_POST);
585
586	/* turn on 4MB of V == P addressing so we can get to MP table */
587	*(int *)PTD = PG_V | PG_RW | ((uintptr_t)(void *)KPTphys & PG_FRAME);
588	invltlb();
589
590	/* examine the MP table for needed info, uses physical addresses */
591	x = mptable_pass2();
592
593	*(int *)PTD = 0;
594	invltlb();
595
596	/* can't process default configs till the CPU APIC is pmapped */
597	if (x)
598		default_mp_table(x);
599
600	/* post scan cleanup */
601	fix_mp_table();
602	setup_apic_irq_mapping();
603
604#if defined(APIC_IO)
605
606	/* fill the LOGICAL io_apic_versions table */
607	for (apic = 0; apic < mp_napics; ++apic) {
608		ux = io_apic_read(apic, IOAPIC_VER);
609		io_apic_versions[apic] = ux;
610		io_apic_set_id(apic, IO_TO_ID(apic));
611	}
612
613	/* program each IO APIC in the system */
614	for (apic = 0; apic < mp_napics; ++apic)
615		if (io_apic_setup(apic) < 0)
616			panic("IO APIC setup failure");
617
618	/* install a 'Spurious INTerrupt' vector */
619	setidt(XSPURIOUSINT_OFFSET, Xspuriousint,
620	       SDT_SYS386IGT, SEL_KPL, GSEL(GCODE_SEL, SEL_KPL));
621
622	/* install an inter-CPU IPI for TLB invalidation */
623	setidt(XINVLTLB_OFFSET, Xinvltlb,
624	       SDT_SYS386IGT, SEL_KPL, GSEL(GCODE_SEL, SEL_KPL));
625
626	/* install an inter-CPU IPI for reading processor state */
627	setidt(XCPUCHECKSTATE_OFFSET, Xcpucheckstate,
628	       SDT_SYS386IGT, SEL_KPL, GSEL(GCODE_SEL, SEL_KPL));
629
630	/* install an inter-CPU IPI for all-CPU rendezvous */
631	setidt(XRENDEZVOUS_OFFSET, Xrendezvous,
632	       SDT_SYS386IGT, SEL_KPL, GSEL(GCODE_SEL, SEL_KPL));
633
634	/* install an inter-CPU IPI for forcing an additional software trap */
635	setidt(XCPUAST_OFFSET, Xcpuast,
636	       SDT_SYS386IGT, SEL_KPL, GSEL(GCODE_SEL, SEL_KPL));
637
638	/* install an inter-CPU IPI for CPU stop/restart */
639	setidt(XCPUSTOP_OFFSET, Xcpustop,
640	       SDT_SYS386IGT, SEL_KPL, GSEL(GCODE_SEL, SEL_KPL));
641
642#if defined(TEST_TEST1)
643	/* install a "fake hardware INTerrupt" vector */
644	setidt(XTEST1_OFFSET, Xtest1,
645	       SDT_SYS386IGT, SEL_KPL, GSEL(GCODE_SEL, SEL_KPL));
646#endif  /** TEST_TEST1 */
647
648#endif	/* APIC_IO */
649
650	/* initialize all SMP locks */
651	init_locks();
652
653	/* start each Application Processor */
654	start_all_aps(boot_addr);
655}
656
657
658/*
659 * look for the MP spec signature
660 */
661
662/* string defined by the Intel MP Spec as identifying the MP table */
663#define MP_SIG		0x5f504d5f	/* _MP_ */
664#define NEXT(X)		((X) += 4)
665static int
666search_for_sig(u_int32_t target, int count)
667{
668	int     x;
669	u_int32_t *addr = (u_int32_t *) (KERNBASE + target);
670
671	for (x = 0; x < count; NEXT(x))
672		if (addr[x] == MP_SIG)
673			/* make array index a byte index */
674			return (target + (x * sizeof(u_int32_t)));
675
676	return -1;
677}
678
679
680static basetable_entry basetable_entry_types[] =
681{
682	{0, 20, "Processor"},
683	{1, 8, "Bus"},
684	{2, 8, "I/O APIC"},
685	{3, 8, "I/O INT"},
686	{4, 8, "Local INT"}
687};
688
689typedef struct BUSDATA {
690	u_char  bus_id;
691	enum busTypes bus_type;
692}       bus_datum;
693
694typedef struct INTDATA {
695	u_char  int_type;
696	u_short int_flags;
697	u_char  src_bus_id;
698	u_char  src_bus_irq;
699	u_char  dst_apic_id;
700	u_char  dst_apic_int;
701	u_char	int_vector;
702}       io_int, local_int;
703
704typedef struct BUSTYPENAME {
705	u_char  type;
706	char    name[7];
707}       bus_type_name;
708
709static bus_type_name bus_type_table[] =
710{
711	{CBUS, "CBUS"},
712	{CBUSII, "CBUSII"},
713	{EISA, "EISA"},
714	{MCA, "MCA"},
715	{UNKNOWN_BUSTYPE, "---"},
716	{ISA, "ISA"},
717	{MCA, "MCA"},
718	{UNKNOWN_BUSTYPE, "---"},
719	{UNKNOWN_BUSTYPE, "---"},
720	{UNKNOWN_BUSTYPE, "---"},
721	{UNKNOWN_BUSTYPE, "---"},
722	{UNKNOWN_BUSTYPE, "---"},
723	{PCI, "PCI"},
724	{UNKNOWN_BUSTYPE, "---"},
725	{UNKNOWN_BUSTYPE, "---"},
726	{UNKNOWN_BUSTYPE, "---"},
727	{UNKNOWN_BUSTYPE, "---"},
728	{XPRESS, "XPRESS"},
729	{UNKNOWN_BUSTYPE, "---"}
730};
731/* from MP spec v1.4, table 5-1 */
732static int default_data[7][5] =
733{
734/*   nbus, id0, type0, id1, type1 */
735	{1, 0, ISA, 255, 255},
736	{1, 0, EISA, 255, 255},
737	{1, 0, EISA, 255, 255},
738	{1, 0, MCA, 255, 255},
739	{2, 0, ISA, 1, PCI},
740	{2, 0, EISA, 1, PCI},
741	{2, 0, MCA, 1, PCI}
742};
743
744
745/* the bus data */
746static bus_datum *bus_data;
747
748/* the IO INT data, one entry per possible APIC INTerrupt */
749static io_int  *io_apic_ints;
750
751static int nintrs;
752
753static int processor_entry	__P((proc_entry_ptr entry, int cpu));
754static int bus_entry		__P((bus_entry_ptr entry, int bus));
755static int io_apic_entry	__P((io_apic_entry_ptr entry, int apic));
756static int int_entry		__P((int_entry_ptr entry, int intr));
757static int lookup_bus_type	__P((char *name));
758
759
760/*
761 * 1st pass on motherboard's Intel MP specification table.
762 *
763 * initializes:
764 *	mp_ncpus = 1
765 *
766 * determines:
767 *	cpu_apic_address (common to all CPUs)
768 *	io_apic_address[N]
769 *	mp_naps
770 *	mp_nbusses
771 *	mp_napics
772 *	nintrs
773 */
774static void
775mptable_pass1(void)
776{
777	int	x;
778	mpcth_t	cth;
779	int	totalSize;
780	void*	position;
781	int	count;
782	int	type;
783
784	POSTCODE(MPTABLE_PASS1_POST);
785
786	/* clear various tables */
787	for (x = 0; x < NAPICID; ++x) {
788		io_apic_address[x] = ~0;	/* IO APIC address table */
789	}
790
791	/* init everything to empty */
792	mp_naps = 0;
793	mp_nbusses = 0;
794	mp_napics = 0;
795	nintrs = 0;
796
797	/* check for use of 'default' configuration */
798	if (MPFPS_MPFB1 != 0) {
799		/* use default addresses */
800		cpu_apic_address = DEFAULT_APIC_BASE;
801		io_apic_address[0] = DEFAULT_IO_APIC_BASE;
802
803		/* fill in with defaults */
804		mp_naps = 2;		/* includes BSP */
805		mp_nbusses = default_data[MPFPS_MPFB1 - 1][0];
806#if defined(APIC_IO)
807		mp_napics = 1;
808		nintrs = 16;
809#endif	/* APIC_IO */
810	}
811	else {
812		if ((cth = mpfps->pap) == 0)
813			panic("MP Configuration Table Header MISSING!");
814
815		cpu_apic_address = (vm_offset_t) cth->apic_address;
816
817		/* walk the table, recording info of interest */
818		totalSize = cth->base_table_length - sizeof(struct MPCTH);
819		position = (u_char *) cth + sizeof(struct MPCTH);
820		count = cth->entry_count;
821
822		while (count--) {
823			switch (type = *(u_char *) position) {
824			case 0: /* processor_entry */
825				if (((proc_entry_ptr)position)->cpu_flags
826					& PROCENTRY_FLAG_EN)
827					++mp_naps;
828				break;
829			case 1: /* bus_entry */
830				++mp_nbusses;
831				break;
832			case 2: /* io_apic_entry */
833				if (((io_apic_entry_ptr)position)->apic_flags
834					& IOAPICENTRY_FLAG_EN)
835					io_apic_address[mp_napics++] =
836					    (vm_offset_t)((io_apic_entry_ptr)
837						position)->apic_address;
838				break;
839			case 3: /* int_entry */
840				++nintrs;
841				break;
842			case 4:	/* int_entry */
843				break;
844			default:
845				panic("mpfps Base Table HOSED!");
846				/* NOTREACHED */
847			}
848
849			totalSize -= basetable_entry_types[type].length;
850			(u_char*)position += basetable_entry_types[type].length;
851		}
852	}
853
854	/* qualify the numbers */
855	if (mp_naps > MAXCPU) {
856		printf("Warning: only using %d of %d available CPUs!\n",
857			MAXCPU, mp_naps);
858		mp_naps = MAXCPU;
859	}
860
861	/*
862	 * Count the BSP.
863	 * This is also used as a counter while starting the APs.
864	 */
865	mp_ncpus = 1;
866
867	--mp_naps;	/* subtract the BSP */
868}
869
870
871/*
872 * 2nd pass on motherboard's Intel MP specification table.
873 *
874 * sets:
875 *	boot_cpu_id
876 *	ID_TO_IO(N), phy APIC ID to log CPU/IO table
877 *	CPU_TO_ID(N), logical CPU to APIC ID table
878 *	IO_TO_ID(N), logical IO to APIC ID table
879 *	bus_data[N]
880 *	io_apic_ints[N]
881 */
882static int
883mptable_pass2(void)
884{
885	int     x;
886	mpcth_t cth;
887	int     totalSize;
888	void*   position;
889	int     count;
890	int     type;
891	int     apic, bus, cpu, intr;
892	int	i, j;
893	int	pgeflag;
894
895	POSTCODE(MPTABLE_PASS2_POST);
896
897	pgeflag = 0;		/* XXX - Not used under SMP yet.  */
898
899	MALLOC(io_apic_versions, u_int32_t *, sizeof(u_int32_t) * mp_napics,
900	    M_DEVBUF, M_WAITOK);
901	MALLOC(ioapic, volatile ioapic_t **, sizeof(ioapic_t *) * mp_napics,
902	    M_DEVBUF, M_WAITOK);
903	MALLOC(io_apic_ints, io_int *, sizeof(io_int) * (nintrs + 1),
904	    M_DEVBUF, M_WAITOK);
905	MALLOC(bus_data, bus_datum *, sizeof(bus_datum) * mp_nbusses,
906	    M_DEVBUF, M_WAITOK);
907
908	bzero(ioapic, sizeof(ioapic_t *) * mp_napics);
909
910	for (i = 0; i < mp_napics; i++) {
911		for (j = 0; j < mp_napics; j++) {
912			/* same page frame as a previous IO apic? */
913			if (((vm_offset_t)SMPpt[NPTEPG-2-j] & PG_FRAME) ==
914			    (io_apic_address[i] & PG_FRAME)) {
915				ioapic[i] = (ioapic_t *)((u_int)SMP_prvspace
916					+ (NPTEPG-2-j) * PAGE_SIZE
917					+ (io_apic_address[i] & PAGE_MASK));
918				break;
919			}
920			/* use this slot if available */
921			if (((vm_offset_t)SMPpt[NPTEPG-2-j] & PG_FRAME) == 0) {
922				SMPpt[NPTEPG-2-j] = (pt_entry_t)(PG_V | PG_RW |
923				    pgeflag | (io_apic_address[i] & PG_FRAME));
924				ioapic[i] = (ioapic_t *)((u_int)SMP_prvspace
925					+ (NPTEPG-2-j) * PAGE_SIZE
926					+ (io_apic_address[i] & PAGE_MASK));
927				break;
928			}
929		}
930	}
931
932	/* clear various tables */
933	for (x = 0; x < NAPICID; ++x) {
934		ID_TO_IO(x) = -1;	/* phy APIC ID to log CPU/IO table */
935		CPU_TO_ID(x) = -1;	/* logical CPU to APIC ID table */
936		IO_TO_ID(x) = -1;	/* logical IO to APIC ID table */
937	}
938
939	/* clear bus data table */
940	for (x = 0; x < mp_nbusses; ++x)
941		bus_data[x].bus_id = 0xff;
942
943	/* clear IO APIC INT table */
944	for (x = 0; x < (nintrs + 1); ++x) {
945		io_apic_ints[x].int_type = 0xff;
946		io_apic_ints[x].int_vector = 0xff;
947	}
948
949	/* setup the cpu/apic mapping arrays */
950	boot_cpu_id = -1;
951
952	/* record whether PIC or virtual-wire mode */
953	picmode = (mpfps->mpfb2 & 0x80) ? 1 : 0;
954
955	/* check for use of 'default' configuration */
956	if (MPFPS_MPFB1 != 0)
957		return MPFPS_MPFB1;	/* return default configuration type */
958
959	if ((cth = mpfps->pap) == 0)
960		panic("MP Configuration Table Header MISSING!");
961
962	/* walk the table, recording info of interest */
963	totalSize = cth->base_table_length - sizeof(struct MPCTH);
964	position = (u_char *) cth + sizeof(struct MPCTH);
965	count = cth->entry_count;
966	apic = bus = intr = 0;
967	cpu = 1;				/* pre-count the BSP */
968
969	while (count--) {
970		switch (type = *(u_char *) position) {
971		case 0:
972			if (processor_entry(position, cpu))
973				++cpu;
974			break;
975		case 1:
976			if (bus_entry(position, bus))
977				++bus;
978			break;
979		case 2:
980			if (io_apic_entry(position, apic))
981				++apic;
982			break;
983		case 3:
984			if (int_entry(position, intr))
985				++intr;
986			break;
987		case 4:
988			/* int_entry(position); */
989			break;
990		default:
991			panic("mpfps Base Table HOSED!");
992			/* NOTREACHED */
993		}
994
995		totalSize -= basetable_entry_types[type].length;
996		(u_char *) position += basetable_entry_types[type].length;
997	}
998
999	if (boot_cpu_id == -1)
1000		panic("NO BSP found!");
1001
1002	/* report fact that its NOT a default configuration */
1003	return 0;
1004}
1005
1006
1007void
1008assign_apic_irq(int apic, int intpin, int irq)
1009{
1010	int x;
1011
1012	if (int_to_apicintpin[irq].ioapic != -1)
1013		panic("assign_apic_irq: inconsistent table");
1014
1015	int_to_apicintpin[irq].ioapic = apic;
1016	int_to_apicintpin[irq].int_pin = intpin;
1017	int_to_apicintpin[irq].apic_address = ioapic[apic];
1018	int_to_apicintpin[irq].redirindex = IOAPIC_REDTBL + 2 * intpin;
1019
1020	for (x = 0; x < nintrs; x++) {
1021		if ((io_apic_ints[x].int_type == 0 ||
1022		     io_apic_ints[x].int_type == 3) &&
1023		    io_apic_ints[x].int_vector == 0xff &&
1024		    io_apic_ints[x].dst_apic_id == IO_TO_ID(apic) &&
1025		    io_apic_ints[x].dst_apic_int == intpin)
1026			io_apic_ints[x].int_vector = irq;
1027	}
1028}
1029
1030void
1031revoke_apic_irq(int irq)
1032{
1033	int x;
1034	int oldapic;
1035	int oldintpin;
1036
1037	if (int_to_apicintpin[irq].ioapic == -1)
1038		panic("assign_apic_irq: inconsistent table");
1039
1040	oldapic = int_to_apicintpin[irq].ioapic;
1041	oldintpin = int_to_apicintpin[irq].int_pin;
1042
1043	int_to_apicintpin[irq].ioapic = -1;
1044	int_to_apicintpin[irq].int_pin = 0;
1045	int_to_apicintpin[irq].apic_address = NULL;
1046	int_to_apicintpin[irq].redirindex = 0;
1047
1048	for (x = 0; x < nintrs; x++) {
1049		if ((io_apic_ints[x].int_type == 0 ||
1050		     io_apic_ints[x].int_type == 3) &&
1051		    io_apic_ints[x].int_vector == 0xff &&
1052		    io_apic_ints[x].dst_apic_id == IO_TO_ID(oldapic) &&
1053		    io_apic_ints[x].dst_apic_int == oldintpin)
1054			io_apic_ints[x].int_vector = 0xff;
1055	}
1056}
1057
1058
1059static void
1060allocate_apic_irq(int intr)
1061{
1062	int apic;
1063	int intpin;
1064	int irq;
1065
1066	if (io_apic_ints[intr].int_vector != 0xff)
1067		return;		/* Interrupt handler already assigned */
1068
1069	if (io_apic_ints[intr].int_type != 0 &&
1070	    (io_apic_ints[intr].int_type != 3 ||
1071	     (io_apic_ints[intr].dst_apic_id == IO_TO_ID(0) &&
1072	      io_apic_ints[intr].dst_apic_int == 0)))
1073		return;		/* Not INT or ExtInt on != (0, 0) */
1074
1075	irq = 0;
1076	while (irq < APIC_INTMAPSIZE &&
1077	       int_to_apicintpin[irq].ioapic != -1)
1078		irq++;
1079
1080	if (irq >= APIC_INTMAPSIZE)
1081		return;		/* No free interrupt handlers */
1082
1083	apic = ID_TO_IO(io_apic_ints[intr].dst_apic_id);
1084	intpin = io_apic_ints[intr].dst_apic_int;
1085
1086	assign_apic_irq(apic, intpin, irq);
1087	io_apic_setup_intpin(apic, intpin);
1088}
1089
1090
1091static void
1092swap_apic_id(int apic, int oldid, int newid)
1093{
1094	int x;
1095	int oapic;
1096
1097
1098	if (oldid == newid)
1099		return;			/* Nothing to do */
1100
1101	printf("Changing APIC ID for IO APIC #%d from %d to %d in MP table\n",
1102	       apic, oldid, newid);
1103
1104	/* Swap physical APIC IDs in interrupt entries */
1105	for (x = 0; x < nintrs; x++) {
1106		if (io_apic_ints[x].dst_apic_id == oldid)
1107			io_apic_ints[x].dst_apic_id = newid;
1108		else if (io_apic_ints[x].dst_apic_id == newid)
1109			io_apic_ints[x].dst_apic_id = oldid;
1110	}
1111
1112	/* Swap physical APIC IDs in IO_TO_ID mappings */
1113	for (oapic = 0; oapic < mp_napics; oapic++)
1114		if (IO_TO_ID(oapic) == newid)
1115			break;
1116
1117	if (oapic < mp_napics) {
1118		printf("Changing APIC ID for IO APIC #%d from "
1119		       "%d to %d in MP table\n",
1120		       oapic, newid, oldid);
1121		IO_TO_ID(oapic) = oldid;
1122	}
1123	IO_TO_ID(apic) = newid;
1124}
1125
1126
1127static void
1128fix_id_to_io_mapping(void)
1129{
1130	int x;
1131
1132	for (x = 0; x < NAPICID; x++)
1133		ID_TO_IO(x) = -1;
1134
1135	for (x = 0; x <= mp_naps; x++)
1136		if (CPU_TO_ID(x) < NAPICID)
1137			ID_TO_IO(CPU_TO_ID(x)) = x;
1138
1139	for (x = 0; x < mp_napics; x++)
1140		if (IO_TO_ID(x) < NAPICID)
1141			ID_TO_IO(IO_TO_ID(x)) = x;
1142}
1143
1144
1145static int
1146first_free_apic_id(void)
1147{
1148	int freeid, x;
1149
1150	for (freeid = 0; freeid < NAPICID; freeid++) {
1151		for (x = 0; x <= mp_naps; x++)
1152			if (CPU_TO_ID(x) == freeid)
1153				break;
1154		if (x <= mp_naps)
1155			continue;
1156		for (x = 0; x < mp_napics; x++)
1157			if (IO_TO_ID(x) == freeid)
1158				break;
1159		if (x < mp_napics)
1160			continue;
1161		return freeid;
1162	}
1163	return freeid;
1164}
1165
1166
1167static int
1168io_apic_id_acceptable(int apic, int id)
1169{
1170	int cpu;		/* Logical CPU number */
1171	int oapic;		/* Logical IO APIC number for other IO APIC */
1172
1173	if (id >= NAPICID)
1174		return 0;	/* Out of range */
1175
1176	for (cpu = 0; cpu <= mp_naps; cpu++)
1177		if (CPU_TO_ID(cpu) == id)
1178			return 0;	/* Conflict with CPU */
1179
1180	for (oapic = 0; oapic < mp_napics && oapic < apic; oapic++)
1181		if (IO_TO_ID(oapic) == id)
1182			return 0;	/* Conflict with other APIC */
1183
1184	return 1;		/* ID is acceptable for IO APIC */
1185}
1186
1187
1188/*
1189 * parse an Intel MP specification table
1190 */
1191static void
1192fix_mp_table(void)
1193{
1194	int	x;
1195	int	id;
1196	int	bus_0 = 0;	/* Stop GCC warning */
1197	int	bus_pci = 0;	/* Stop GCC warning */
1198	int	num_pci_bus;
1199	int	apic;		/* IO APIC unit number */
1200	int     freeid;		/* Free physical APIC ID */
1201	int	physid;		/* Current physical IO APIC ID */
1202
1203	/*
1204	 * Fix mis-numbering of the PCI bus and its INT entries if the BIOS
1205	 * did it wrong.  The MP spec says that when more than 1 PCI bus
1206	 * exists the BIOS must begin with bus entries for the PCI bus and use
1207	 * actual PCI bus numbering.  This implies that when only 1 PCI bus
1208	 * exists the BIOS can choose to ignore this ordering, and indeed many
1209	 * MP motherboards do ignore it.  This causes a problem when the PCI
1210	 * sub-system makes requests of the MP sub-system based on PCI bus
1211	 * numbers.	So here we look for the situation and renumber the
1212	 * busses and associated INTs in an effort to "make it right".
1213	 */
1214
1215	/* find bus 0, PCI bus, count the number of PCI busses */
1216	for (num_pci_bus = 0, x = 0; x < mp_nbusses; ++x) {
1217		if (bus_data[x].bus_id == 0) {
1218			bus_0 = x;
1219		}
1220		if (bus_data[x].bus_type == PCI) {
1221			++num_pci_bus;
1222			bus_pci = x;
1223		}
1224	}
1225	/*
1226	 * bus_0 == slot of bus with ID of 0
1227	 * bus_pci == slot of last PCI bus encountered
1228	 */
1229
1230	/* check the 1 PCI bus case for sanity */
1231	/* if it is number 0 all is well */
1232	if (num_pci_bus == 1 &&
1233	    bus_data[bus_pci].bus_id != 0) {
1234
1235		/* mis-numbered, swap with whichever bus uses slot 0 */
1236
1237		/* swap the bus entry types */
1238		bus_data[bus_pci].bus_type = bus_data[bus_0].bus_type;
1239		bus_data[bus_0].bus_type = PCI;
1240
1241		/* swap each relavant INTerrupt entry */
1242		id = bus_data[bus_pci].bus_id;
1243		for (x = 0; x < nintrs; ++x) {
1244			if (io_apic_ints[x].src_bus_id == id) {
1245				io_apic_ints[x].src_bus_id = 0;
1246			}
1247			else if (io_apic_ints[x].src_bus_id == 0) {
1248				io_apic_ints[x].src_bus_id = id;
1249			}
1250		}
1251	}
1252
1253	/* Assign IO APIC IDs.
1254	 *
1255	 * First try the existing ID. If a conflict is detected, try
1256	 * the ID in the MP table.  If a conflict is still detected, find
1257	 * a free id.
1258	 *
1259	 * We cannot use the ID_TO_IO table before all conflicts has been
1260	 * resolved and the table has been corrected.
1261	 */
1262	for (apic = 0; apic < mp_napics; ++apic) { /* For all IO APICs */
1263
1264		/* First try to use the value set by the BIOS */
1265		physid = io_apic_get_id(apic);
1266		if (io_apic_id_acceptable(apic, physid)) {
1267			if (IO_TO_ID(apic) != physid)
1268				swap_apic_id(apic, IO_TO_ID(apic), physid);
1269			continue;
1270		}
1271
1272		/* Then check if the value in the MP table is acceptable */
1273		if (io_apic_id_acceptable(apic, IO_TO_ID(apic)))
1274			continue;
1275
1276		/* Last resort, find a free APIC ID and use it */
1277		freeid = first_free_apic_id();
1278		if (freeid >= NAPICID)
1279			panic("No free physical APIC IDs found");
1280
1281		if (io_apic_id_acceptable(apic, freeid)) {
1282			swap_apic_id(apic, IO_TO_ID(apic), freeid);
1283			continue;
1284		}
1285		panic("Free physical APIC ID not usable");
1286	}
1287	fix_id_to_io_mapping();
1288
1289	/* detect and fix broken Compaq MP table */
1290	if (apic_int_type(0, 0) == -1) {
1291		printf("APIC_IO: MP table broken: 8259->APIC entry missing!\n");
1292		io_apic_ints[nintrs].int_type = 3;	/* ExtInt */
1293		io_apic_ints[nintrs].int_vector = 0xff;	/* Unassigned */
1294		/* XXX fixme, set src bus id etc, but it doesn't seem to hurt */
1295		io_apic_ints[nintrs].dst_apic_id = IO_TO_ID(0);
1296		io_apic_ints[nintrs].dst_apic_int = 0;	/* Pin 0 */
1297		nintrs++;
1298	}
1299}
1300
1301
1302/* Assign low level interrupt handlers */
1303static void
1304setup_apic_irq_mapping(void)
1305{
1306	int	x;
1307	int	int_vector;
1308
1309	/* Clear array */
1310	for (x = 0; x < APIC_INTMAPSIZE; x++) {
1311		int_to_apicintpin[x].ioapic = -1;
1312		int_to_apicintpin[x].int_pin = 0;
1313		int_to_apicintpin[x].apic_address = NULL;
1314		int_to_apicintpin[x].redirindex = 0;
1315	}
1316
1317	/* First assign ISA/EISA interrupts */
1318	for (x = 0; x < nintrs; x++) {
1319		int_vector = io_apic_ints[x].src_bus_irq;
1320		if (int_vector < APIC_INTMAPSIZE &&
1321		    io_apic_ints[x].int_vector == 0xff &&
1322		    int_to_apicintpin[int_vector].ioapic == -1 &&
1323		    (apic_int_is_bus_type(x, ISA) ||
1324		     apic_int_is_bus_type(x, EISA)) &&
1325		    io_apic_ints[x].int_type == 0) {
1326			assign_apic_irq(ID_TO_IO(io_apic_ints[x].dst_apic_id),
1327					io_apic_ints[x].dst_apic_int,
1328					int_vector);
1329		}
1330	}
1331
1332	/* Assign ExtInt entry if no ISA/EISA interrupt 0 entry */
1333	for (x = 0; x < nintrs; x++) {
1334		if (io_apic_ints[x].dst_apic_int == 0 &&
1335		    io_apic_ints[x].dst_apic_id == IO_TO_ID(0) &&
1336		    io_apic_ints[x].int_vector == 0xff &&
1337		    int_to_apicintpin[0].ioapic == -1 &&
1338		    io_apic_ints[x].int_type == 3) {
1339			assign_apic_irq(0, 0, 0);
1340			break;
1341		}
1342	}
1343	/* PCI interrupt assignment is deferred */
1344}
1345
1346
1347static int
1348processor_entry(proc_entry_ptr entry, int cpu)
1349{
1350	/* check for usability */
1351	if (!(entry->cpu_flags & PROCENTRY_FLAG_EN))
1352		return 0;
1353
1354	if(entry->apic_id >= NAPICID)
1355		panic("CPU APIC ID out of range (0..%d)", NAPICID - 1);
1356	/* check for BSP flag */
1357	if (entry->cpu_flags & PROCENTRY_FLAG_BP) {
1358		boot_cpu_id = entry->apic_id;
1359		CPU_TO_ID(0) = entry->apic_id;
1360		ID_TO_CPU(entry->apic_id) = 0;
1361		return 0;	/* its already been counted */
1362	}
1363
1364	/* add another AP to list, if less than max number of CPUs */
1365	else if (cpu < MAXCPU) {
1366		CPU_TO_ID(cpu) = entry->apic_id;
1367		ID_TO_CPU(entry->apic_id) = cpu;
1368		return 1;
1369	}
1370
1371	return 0;
1372}
1373
1374
1375static int
1376bus_entry(bus_entry_ptr entry, int bus)
1377{
1378	int     x;
1379	char    c, name[8];
1380
1381	/* encode the name into an index */
1382	for (x = 0; x < 6; ++x) {
1383		if ((c = entry->bus_type[x]) == ' ')
1384			break;
1385		name[x] = c;
1386	}
1387	name[x] = '\0';
1388
1389	if ((x = lookup_bus_type(name)) == UNKNOWN_BUSTYPE)
1390		panic("unknown bus type: '%s'", name);
1391
1392	bus_data[bus].bus_id = entry->bus_id;
1393	bus_data[bus].bus_type = x;
1394
1395	return 1;
1396}
1397
1398
1399static int
1400io_apic_entry(io_apic_entry_ptr entry, int apic)
1401{
1402	if (!(entry->apic_flags & IOAPICENTRY_FLAG_EN))
1403		return 0;
1404
1405	IO_TO_ID(apic) = entry->apic_id;
1406	if (entry->apic_id < NAPICID)
1407		ID_TO_IO(entry->apic_id) = apic;
1408
1409	return 1;
1410}
1411
1412
1413static int
1414lookup_bus_type(char *name)
1415{
1416	int     x;
1417
1418	for (x = 0; x < MAX_BUSTYPE; ++x)
1419		if (strcmp(bus_type_table[x].name, name) == 0)
1420			return bus_type_table[x].type;
1421
1422	return UNKNOWN_BUSTYPE;
1423}
1424
1425
1426static int
1427int_entry(int_entry_ptr entry, int intr)
1428{
1429	int apic;
1430
1431	io_apic_ints[intr].int_type = entry->int_type;
1432	io_apic_ints[intr].int_flags = entry->int_flags;
1433	io_apic_ints[intr].src_bus_id = entry->src_bus_id;
1434	io_apic_ints[intr].src_bus_irq = entry->src_bus_irq;
1435	if (entry->dst_apic_id == 255) {
1436		/* This signal goes to all IO APICS.  Select an IO APIC
1437		   with sufficient number of interrupt pins */
1438		for (apic = 0; apic < mp_napics; apic++)
1439			if (((io_apic_read(apic, IOAPIC_VER) &
1440			      IOART_VER_MAXREDIR) >> MAXREDIRSHIFT) >=
1441			    entry->dst_apic_int)
1442				break;
1443		if (apic < mp_napics)
1444			io_apic_ints[intr].dst_apic_id = IO_TO_ID(apic);
1445		else
1446			io_apic_ints[intr].dst_apic_id = entry->dst_apic_id;
1447	} else
1448		io_apic_ints[intr].dst_apic_id = entry->dst_apic_id;
1449	io_apic_ints[intr].dst_apic_int = entry->dst_apic_int;
1450
1451	return 1;
1452}
1453
1454
1455static int
1456apic_int_is_bus_type(int intr, int bus_type)
1457{
1458	int     bus;
1459
1460	for (bus = 0; bus < mp_nbusses; ++bus)
1461		if ((bus_data[bus].bus_id == io_apic_ints[intr].src_bus_id)
1462		    && ((int) bus_data[bus].bus_type == bus_type))
1463			return 1;
1464
1465	return 0;
1466}
1467
1468
1469/*
1470 * Given a traditional ISA INT mask, return an APIC mask.
1471 */
1472u_int
1473isa_apic_mask(u_int isa_mask)
1474{
1475	int isa_irq;
1476	int apic_pin;
1477
1478#if defined(SKIP_IRQ15_REDIRECT)
1479	if (isa_mask == (1 << 15)) {
1480		printf("skipping ISA IRQ15 redirect\n");
1481		return isa_mask;
1482	}
1483#endif  /* SKIP_IRQ15_REDIRECT */
1484
1485	isa_irq = ffs(isa_mask);		/* find its bit position */
1486	if (isa_irq == 0)			/* doesn't exist */
1487		return 0;
1488	--isa_irq;				/* make it zero based */
1489
1490	apic_pin = isa_apic_irq(isa_irq);	/* look for APIC connection */
1491	if (apic_pin == -1)
1492		return 0;
1493
1494	return (1 << apic_pin);			/* convert pin# to a mask */
1495}
1496
1497
1498/*
1499 * Determine which APIC pin an ISA/EISA INT is attached to.
1500 */
1501#define INTTYPE(I)	(io_apic_ints[(I)].int_type)
1502#define INTPIN(I)	(io_apic_ints[(I)].dst_apic_int)
1503#define INTIRQ(I)	(io_apic_ints[(I)].int_vector)
1504#define INTAPIC(I)	(ID_TO_IO(io_apic_ints[(I)].dst_apic_id))
1505
1506#define SRCBUSIRQ(I)	(io_apic_ints[(I)].src_bus_irq)
1507int
1508isa_apic_irq(int isa_irq)
1509{
1510	int     intr;
1511
1512	for (intr = 0; intr < nintrs; ++intr) {		/* check each record */
1513		if (INTTYPE(intr) == 0) {		/* standard INT */
1514			if (SRCBUSIRQ(intr) == isa_irq) {
1515				if (apic_int_is_bus_type(intr, ISA) ||
1516			            apic_int_is_bus_type(intr, EISA)) {
1517					if (INTIRQ(intr) == 0xff)
1518						return -1; /* unassigned */
1519					return INTIRQ(intr);	/* found */
1520				}
1521			}
1522		}
1523	}
1524	return -1;					/* NOT found */
1525}
1526
1527
1528/*
1529 * Determine which APIC pin a PCI INT is attached to.
1530 */
1531#define SRCBUSID(I)	(io_apic_ints[(I)].src_bus_id)
1532#define SRCBUSDEVICE(I)	((io_apic_ints[(I)].src_bus_irq >> 2) & 0x1f)
1533#define SRCBUSLINE(I)	(io_apic_ints[(I)].src_bus_irq & 0x03)
1534int
1535pci_apic_irq(int pciBus, int pciDevice, int pciInt)
1536{
1537	int     intr;
1538
1539	--pciInt;					/* zero based */
1540
1541	for (intr = 0; intr < nintrs; ++intr)		/* check each record */
1542		if ((INTTYPE(intr) == 0)		/* standard INT */
1543		    && (SRCBUSID(intr) == pciBus)
1544		    && (SRCBUSDEVICE(intr) == pciDevice)
1545		    && (SRCBUSLINE(intr) == pciInt))	/* a candidate IRQ */
1546			if (apic_int_is_bus_type(intr, PCI)) {
1547				if (INTIRQ(intr) == 0xff)
1548					allocate_apic_irq(intr);
1549				if (INTIRQ(intr) == 0xff)
1550					return -1;	/* unassigned */
1551				return INTIRQ(intr);	/* exact match */
1552			}
1553
1554	return -1;					/* NOT found */
1555}
1556
1557int
1558next_apic_irq(int irq)
1559{
1560	int intr, ointr;
1561	int bus, bustype;
1562
1563	bus = 0;
1564	bustype = 0;
1565	for (intr = 0; intr < nintrs; intr++) {
1566		if (INTIRQ(intr) != irq || INTTYPE(intr) != 0)
1567			continue;
1568		bus = SRCBUSID(intr);
1569		bustype = apic_bus_type(bus);
1570		if (bustype != ISA &&
1571		    bustype != EISA &&
1572		    bustype != PCI)
1573			continue;
1574		break;
1575	}
1576	if (intr >= nintrs) {
1577		return -1;
1578	}
1579	for (ointr = intr + 1; ointr < nintrs; ointr++) {
1580		if (INTTYPE(ointr) != 0)
1581			continue;
1582		if (bus != SRCBUSID(ointr))
1583			continue;
1584		if (bustype == PCI) {
1585			if (SRCBUSDEVICE(intr) != SRCBUSDEVICE(ointr))
1586				continue;
1587			if (SRCBUSLINE(intr) != SRCBUSLINE(ointr))
1588				continue;
1589		}
1590		if (bustype == ISA || bustype == EISA) {
1591			if (SRCBUSIRQ(intr) != SRCBUSIRQ(ointr))
1592				continue;
1593		}
1594		if (INTPIN(intr) == INTPIN(ointr))
1595			continue;
1596		break;
1597	}
1598	if (ointr >= nintrs) {
1599		return -1;
1600	}
1601	return INTIRQ(ointr);
1602}
1603#undef SRCBUSLINE
1604#undef SRCBUSDEVICE
1605#undef SRCBUSID
1606#undef SRCBUSIRQ
1607
1608#undef INTPIN
1609#undef INTIRQ
1610#undef INTAPIC
1611#undef INTTYPE
1612
1613
1614/*
1615 * Reprogram the MB chipset to NOT redirect an ISA INTerrupt.
1616 *
1617 * XXX FIXME:
1618 *  Exactly what this means is unclear at this point.  It is a solution
1619 *  for motherboards that redirect the MBIRQ0 pin.  Generically a motherboard
1620 *  could route any of the ISA INTs to upper (>15) IRQ values.  But most would
1621 *  NOT be redirected via MBIRQ0, thus "undirect()ing" them would NOT be an
1622 *  option.
1623 */
1624int
1625undirect_isa_irq(int rirq)
1626{
1627#if defined(READY)
1628	if (bootverbose)
1629	    printf("Freeing redirected ISA irq %d.\n", rirq);
1630	/** FIXME: tickle the MB redirector chip */
1631	return -1;
1632#else
1633	if (bootverbose)
1634	    printf("Freeing (NOT implemented) redirected ISA irq %d.\n", rirq);
1635	return 0;
1636#endif  /* READY */
1637}
1638
1639
1640/*
1641 * Reprogram the MB chipset to NOT redirect a PCI INTerrupt
1642 */
1643int
1644undirect_pci_irq(int rirq)
1645{
1646#if defined(READY)
1647	if (bootverbose)
1648		printf("Freeing redirected PCI irq %d.\n", rirq);
1649
1650	/** FIXME: tickle the MB redirector chip */
1651	return -1;
1652#else
1653	if (bootverbose)
1654		printf("Freeing (NOT implemented) redirected PCI irq %d.\n",
1655		       rirq);
1656	return 0;
1657#endif  /* READY */
1658}
1659
1660
1661/*
1662 * given a bus ID, return:
1663 *  the bus type if found
1664 *  -1 if NOT found
1665 */
1666int
1667apic_bus_type(int id)
1668{
1669	int     x;
1670
1671	for (x = 0; x < mp_nbusses; ++x)
1672		if (bus_data[x].bus_id == id)
1673			return bus_data[x].bus_type;
1674
1675	return -1;
1676}
1677
1678
1679/*
1680 * given a LOGICAL APIC# and pin#, return:
1681 *  the associated src bus ID if found
1682 *  -1 if NOT found
1683 */
1684int
1685apic_src_bus_id(int apic, int pin)
1686{
1687	int     x;
1688
1689	/* search each of the possible INTerrupt sources */
1690	for (x = 0; x < nintrs; ++x)
1691		if ((apic == ID_TO_IO(io_apic_ints[x].dst_apic_id)) &&
1692		    (pin == io_apic_ints[x].dst_apic_int))
1693			return (io_apic_ints[x].src_bus_id);
1694
1695	return -1;		/* NOT found */
1696}
1697
1698
1699/*
1700 * given a LOGICAL APIC# and pin#, return:
1701 *  the associated src bus IRQ if found
1702 *  -1 if NOT found
1703 */
1704int
1705apic_src_bus_irq(int apic, int pin)
1706{
1707	int     x;
1708
1709	for (x = 0; x < nintrs; x++)
1710		if ((apic == ID_TO_IO(io_apic_ints[x].dst_apic_id)) &&
1711		    (pin == io_apic_ints[x].dst_apic_int))
1712			return (io_apic_ints[x].src_bus_irq);
1713
1714	return -1;		/* NOT found */
1715}
1716
1717
1718/*
1719 * given a LOGICAL APIC# and pin#, return:
1720 *  the associated INTerrupt type if found
1721 *  -1 if NOT found
1722 */
1723int
1724apic_int_type(int apic, int pin)
1725{
1726	int     x;
1727
1728	/* search each of the possible INTerrupt sources */
1729	for (x = 0; x < nintrs; ++x)
1730		if ((apic == ID_TO_IO(io_apic_ints[x].dst_apic_id)) &&
1731		    (pin == io_apic_ints[x].dst_apic_int))
1732			return (io_apic_ints[x].int_type);
1733
1734	return -1;		/* NOT found */
1735}
1736
1737int
1738apic_irq(int apic, int pin)
1739{
1740	int x;
1741	int res;
1742
1743	for (x = 0; x < nintrs; ++x)
1744		if ((apic == ID_TO_IO(io_apic_ints[x].dst_apic_id)) &&
1745		    (pin == io_apic_ints[x].dst_apic_int)) {
1746			res = io_apic_ints[x].int_vector;
1747			if (res == 0xff)
1748				return -1;
1749			if (apic != int_to_apicintpin[res].ioapic)
1750				panic("apic_irq: inconsistent table");
1751			if (pin != int_to_apicintpin[res].int_pin)
1752				panic("apic_irq inconsistent table (2)");
1753			return res;
1754		}
1755	return -1;
1756}
1757
1758
1759/*
1760 * given a LOGICAL APIC# and pin#, return:
1761 *  the associated trigger mode if found
1762 *  -1 if NOT found
1763 */
1764int
1765apic_trigger(int apic, int pin)
1766{
1767	int     x;
1768
1769	/* search each of the possible INTerrupt sources */
1770	for (x = 0; x < nintrs; ++x)
1771		if ((apic == ID_TO_IO(io_apic_ints[x].dst_apic_id)) &&
1772		    (pin == io_apic_ints[x].dst_apic_int))
1773			return ((io_apic_ints[x].int_flags >> 2) & 0x03);
1774
1775	return -1;		/* NOT found */
1776}
1777
1778
1779/*
1780 * given a LOGICAL APIC# and pin#, return:
1781 *  the associated 'active' level if found
1782 *  -1 if NOT found
1783 */
1784int
1785apic_polarity(int apic, int pin)
1786{
1787	int     x;
1788
1789	/* search each of the possible INTerrupt sources */
1790	for (x = 0; x < nintrs; ++x)
1791		if ((apic == ID_TO_IO(io_apic_ints[x].dst_apic_id)) &&
1792		    (pin == io_apic_ints[x].dst_apic_int))
1793			return (io_apic_ints[x].int_flags & 0x03);
1794
1795	return -1;		/* NOT found */
1796}
1797
1798
1799/*
1800 * set data according to MP defaults
1801 * FIXME: probably not complete yet...
1802 */
1803static void
1804default_mp_table(int type)
1805{
1806	int     ap_cpu_id;
1807#if defined(APIC_IO)
1808	int     io_apic_id;
1809	int     pin;
1810#endif	/* APIC_IO */
1811
1812#if 0
1813	printf("  MP default config type: %d\n", type);
1814	switch (type) {
1815	case 1:
1816		printf("   bus: ISA, APIC: 82489DX\n");
1817		break;
1818	case 2:
1819		printf("   bus: EISA, APIC: 82489DX\n");
1820		break;
1821	case 3:
1822		printf("   bus: EISA, APIC: 82489DX\n");
1823		break;
1824	case 4:
1825		printf("   bus: MCA, APIC: 82489DX\n");
1826		break;
1827	case 5:
1828		printf("   bus: ISA+PCI, APIC: Integrated\n");
1829		break;
1830	case 6:
1831		printf("   bus: EISA+PCI, APIC: Integrated\n");
1832		break;
1833	case 7:
1834		printf("   bus: MCA+PCI, APIC: Integrated\n");
1835		break;
1836	default:
1837		printf("   future type\n");
1838		break;
1839		/* NOTREACHED */
1840	}
1841#endif	/* 0 */
1842
1843	boot_cpu_id = (lapic.id & APIC_ID_MASK) >> 24;
1844	ap_cpu_id = (boot_cpu_id == 0) ? 1 : 0;
1845
1846	/* BSP */
1847	CPU_TO_ID(0) = boot_cpu_id;
1848	ID_TO_CPU(boot_cpu_id) = 0;
1849
1850	/* one and only AP */
1851	CPU_TO_ID(1) = ap_cpu_id;
1852	ID_TO_CPU(ap_cpu_id) = 1;
1853
1854#if defined(APIC_IO)
1855	/* one and only IO APIC */
1856	io_apic_id = (io_apic_read(0, IOAPIC_ID) & APIC_ID_MASK) >> 24;
1857
1858	/*
1859	 * sanity check, refer to MP spec section 3.6.6, last paragraph
1860	 * necessary as some hardware isn't properly setting up the IO APIC
1861	 */
1862#if defined(REALLY_ANAL_IOAPICID_VALUE)
1863	if (io_apic_id != 2) {
1864#else
1865	if ((io_apic_id == 0) || (io_apic_id == 1) || (io_apic_id == 15)) {
1866#endif	/* REALLY_ANAL_IOAPICID_VALUE */
1867		io_apic_set_id(0, 2);
1868		io_apic_id = 2;
1869	}
1870	IO_TO_ID(0) = io_apic_id;
1871	ID_TO_IO(io_apic_id) = 0;
1872#endif	/* APIC_IO */
1873
1874	/* fill out bus entries */
1875	switch (type) {
1876	case 1:
1877	case 2:
1878	case 3:
1879	case 4:
1880	case 5:
1881	case 6:
1882	case 7:
1883		bus_data[0].bus_id = default_data[type - 1][1];
1884		bus_data[0].bus_type = default_data[type - 1][2];
1885		bus_data[1].bus_id = default_data[type - 1][3];
1886		bus_data[1].bus_type = default_data[type - 1][4];
1887		break;
1888
1889	/* case 4: case 7:		   MCA NOT supported */
1890	default:		/* illegal/reserved */
1891		panic("BAD default MP config: %d", type);
1892		/* NOTREACHED */
1893	}
1894
1895#if defined(APIC_IO)
1896	/* general cases from MP v1.4, table 5-2 */
1897	for (pin = 0; pin < 16; ++pin) {
1898		io_apic_ints[pin].int_type = 0;
1899		io_apic_ints[pin].int_flags = 0x05;	/* edge/active-hi */
1900		io_apic_ints[pin].src_bus_id = 0;
1901		io_apic_ints[pin].src_bus_irq = pin;	/* IRQ2 caught below */
1902		io_apic_ints[pin].dst_apic_id = io_apic_id;
1903		io_apic_ints[pin].dst_apic_int = pin;	/* 1-to-1 */
1904	}
1905
1906	/* special cases from MP v1.4, table 5-2 */
1907	if (type == 2) {
1908		io_apic_ints[2].int_type = 0xff;	/* N/C */
1909		io_apic_ints[13].int_type = 0xff;	/* N/C */
1910#if !defined(APIC_MIXED_MODE)
1911		/** FIXME: ??? */
1912		panic("sorry, can't support type 2 default yet");
1913#endif	/* APIC_MIXED_MODE */
1914	}
1915	else
1916		io_apic_ints[2].src_bus_irq = 0;	/* ISA IRQ0 is on APIC INT 2 */
1917
1918	if (type == 7)
1919		io_apic_ints[0].int_type = 0xff;	/* N/C */
1920	else
1921		io_apic_ints[0].int_type = 3;	/* vectored 8259 */
1922#endif	/* APIC_IO */
1923}
1924
1925
1926/*
1927 * start each AP in our list
1928 */
1929static int
1930start_all_aps(u_int boot_addr)
1931{
1932	int     x, i, pg;
1933	u_char  mpbiosreason;
1934	u_long  mpbioswarmvec;
1935	struct globaldata *gd;
1936	char *stack;
1937	uintptr_t kptbase;
1938
1939	POSTCODE(START_ALL_APS_POST);
1940
1941	/* initialize BSP's local APIC */
1942	apic_initialize();
1943	bsp_apic_ready = 1;
1944
1945	/* install the AP 1st level boot code */
1946	install_ap_tramp(boot_addr);
1947
1948
1949	/* save the current value of the warm-start vector */
1950	mpbioswarmvec = *((u_long *) WARMBOOT_OFF);
1951#ifndef PC98
1952	outb(CMOS_REG, BIOS_RESET);
1953	mpbiosreason = inb(CMOS_DATA);
1954#endif
1955
1956	/* record BSP in CPU map */
1957	all_cpus = 1;
1958
1959	/* set up temporary P==V mapping for AP boot */
1960	/* XXX this is a hack, we should boot the AP on its own stack/PTD */
1961	kptbase = (uintptr_t)(void *)KPTphys;
1962	for (x = 0; x < NKPT; x++)
1963		PTD[x] = (pd_entry_t)(PG_V | PG_RW |
1964		    ((kptbase + x * PAGE_SIZE) & PG_FRAME));
1965	invltlb();
1966
1967	/* start each AP */
1968	for (x = 1; x <= mp_naps; ++x) {
1969
1970		/* This is a bit verbose, it will go away soon.  */
1971
1972		/* first page of AP's private space */
1973		pg = x * i386_btop(sizeof(struct privatespace));
1974
1975		/* allocate a new private data page */
1976		gd = (struct globaldata *)kmem_alloc(kernel_map, PAGE_SIZE);
1977
1978		/* wire it into the private page table page */
1979		SMPpt[pg] = (pt_entry_t)(PG_V | PG_RW | vtophys(gd));
1980
1981		/* allocate and set up an idle stack data page */
1982		stack = (char *)kmem_alloc(kernel_map, UPAGES*PAGE_SIZE);
1983		for (i = 0; i < UPAGES; i++)
1984			SMPpt[pg + 1 + i] = (pt_entry_t)
1985			    (PG_V | PG_RW | vtophys(PAGE_SIZE * i + stack));
1986
1987		/* prime data page for it to use */
1988		SLIST_INSERT_HEAD(&cpuhead, gd, gd_allcpu);
1989		gd->gd_cpuid = x;
1990
1991		/* setup a vector to our boot code */
1992		*((volatile u_short *) WARMBOOT_OFF) = WARMBOOT_TARGET;
1993		*((volatile u_short *) WARMBOOT_SEG) = (boot_addr >> 4);
1994#ifndef PC98
1995		outb(CMOS_REG, BIOS_RESET);
1996		outb(CMOS_DATA, BIOS_WARM);	/* 'warm-start' */
1997#endif
1998
1999		bootSTK = &SMP_prvspace[x].idlestack[UPAGES*PAGE_SIZE];
2000		bootAP = x;
2001
2002		/* attempt to start the Application Processor */
2003		CHECK_INIT(99);	/* setup checkpoints */
2004		if (!start_ap(x, boot_addr)) {
2005			printf("AP #%d (PHY# %d) failed!\n", x, CPU_TO_ID(x));
2006			CHECK_PRINT("trace");	/* show checkpoints */
2007			/* better panic as the AP may be running loose */
2008			printf("panic y/n? [y] ");
2009			if (cngetc() != 'n')
2010				panic("bye-bye");
2011		}
2012		CHECK_PRINT("trace");		/* show checkpoints */
2013
2014		/* record its version info */
2015		cpu_apic_versions[x] = cpu_apic_versions[0];
2016
2017		all_cpus |= (1 << x);		/* record AP in CPU map */
2018	}
2019
2020	/* build our map of 'other' CPUs */
2021	PCPU_SET(other_cpus, all_cpus & ~(1 << PCPU_GET(cpuid)));
2022
2023	/* fill in our (BSP) APIC version */
2024	cpu_apic_versions[0] = lapic.version;
2025
2026	/* restore the warmstart vector */
2027	*(u_long *) WARMBOOT_OFF = mpbioswarmvec;
2028#ifndef PC98
2029	outb(CMOS_REG, BIOS_RESET);
2030	outb(CMOS_DATA, mpbiosreason);
2031#endif
2032
2033	/*
2034	 * Set up the idle context for the BSP.  Similar to above except
2035	 * that some was done by locore, some by pmap.c and some is implicit
2036	 * because the BSP is cpu#0 and the page is initially zero, and also
2037	 * because we can refer to variables by name on the BSP..
2038	 */
2039
2040	/* Allocate and setup BSP idle stack */
2041	stack = (char *)kmem_alloc(kernel_map, UPAGES * PAGE_SIZE);
2042	for (i = 0; i < UPAGES; i++)
2043		SMPpt[1 + i] = (pt_entry_t)
2044		    (PG_V | PG_RW | vtophys(PAGE_SIZE * i + stack));
2045
2046	for (x = 0; x < NKPT; x++)
2047		PTD[x] = 0;
2048	pmap_set_opt();
2049
2050	/* number of APs actually started */
2051	return mp_ncpus - 1;
2052}
2053
2054
2055/*
2056 * load the 1st level AP boot code into base memory.
2057 */
2058
2059/* targets for relocation */
2060extern void bigJump(void);
2061extern void bootCodeSeg(void);
2062extern void bootDataSeg(void);
2063extern void MPentry(void);
2064extern u_int MP_GDT;
2065extern u_int mp_gdtbase;
2066
2067static void
2068install_ap_tramp(u_int boot_addr)
2069{
2070	int     x;
2071	int     size = *(int *) ((u_long) & bootMP_size);
2072	u_char *src = (u_char *) ((u_long) bootMP);
2073	u_char *dst = (u_char *) boot_addr + KERNBASE;
2074	u_int   boot_base = (u_int) bootMP;
2075	u_int8_t *dst8;
2076	u_int16_t *dst16;
2077	u_int32_t *dst32;
2078
2079	POSTCODE(INSTALL_AP_TRAMP_POST);
2080
2081	for (x = 0; x < size; ++x)
2082		*dst++ = *src++;
2083
2084	/*
2085	 * modify addresses in code we just moved to basemem. unfortunately we
2086	 * need fairly detailed info about mpboot.s for this to work.  changes
2087	 * to mpboot.s might require changes here.
2088	 */
2089
2090	/* boot code is located in KERNEL space */
2091	dst = (u_char *) boot_addr + KERNBASE;
2092
2093	/* modify the lgdt arg */
2094	dst32 = (u_int32_t *) (dst + ((u_int) & mp_gdtbase - boot_base));
2095	*dst32 = boot_addr + ((u_int) & MP_GDT - boot_base);
2096
2097	/* modify the ljmp target for MPentry() */
2098	dst32 = (u_int32_t *) (dst + ((u_int) bigJump - boot_base) + 1);
2099	*dst32 = ((u_int) MPentry - KERNBASE);
2100
2101	/* modify the target for boot code segment */
2102	dst16 = (u_int16_t *) (dst + ((u_int) bootCodeSeg - boot_base));
2103	dst8 = (u_int8_t *) (dst16 + 1);
2104	*dst16 = (u_int) boot_addr & 0xffff;
2105	*dst8 = ((u_int) boot_addr >> 16) & 0xff;
2106
2107	/* modify the target for boot data segment */
2108	dst16 = (u_int16_t *) (dst + ((u_int) bootDataSeg - boot_base));
2109	dst8 = (u_int8_t *) (dst16 + 1);
2110	*dst16 = (u_int) boot_addr & 0xffff;
2111	*dst8 = ((u_int) boot_addr >> 16) & 0xff;
2112}
2113
2114
2115/*
2116 * this function starts the AP (application processor) identified
2117 * by the APIC ID 'physicalCpu'.  It does quite a "song and dance"
2118 * to accomplish this.  This is necessary because of the nuances
2119 * of the different hardware we might encounter.  It ain't pretty,
2120 * but it seems to work.
2121 */
2122static int
2123start_ap(int logical_cpu, u_int boot_addr)
2124{
2125	int     physical_cpu;
2126	int     vector;
2127	int     cpus;
2128	u_long  icr_lo, icr_hi;
2129
2130	POSTCODE(START_AP_POST);
2131
2132	/* get the PHYSICAL APIC ID# */
2133	physical_cpu = CPU_TO_ID(logical_cpu);
2134
2135	/* calculate the vector */
2136	vector = (boot_addr >> 12) & 0xff;
2137
2138	/* used as a watchpoint to signal AP startup */
2139	cpus = mp_ncpus;
2140
2141	/*
2142	 * first we do an INIT/RESET IPI this INIT IPI might be run, reseting
2143	 * and running the target CPU. OR this INIT IPI might be latched (P5
2144	 * bug), CPU waiting for STARTUP IPI. OR this INIT IPI might be
2145	 * ignored.
2146	 */
2147
2148	/* setup the address for the target AP */
2149	icr_hi = lapic.icr_hi & ~APIC_ID_MASK;
2150	icr_hi |= (physical_cpu << 24);
2151	lapic.icr_hi = icr_hi;
2152
2153	/* do an INIT IPI: assert RESET */
2154	icr_lo = lapic.icr_lo & 0xfff00000;
2155	lapic.icr_lo = icr_lo | 0x0000c500;
2156
2157	/* wait for pending status end */
2158	while (lapic.icr_lo & APIC_DELSTAT_MASK)
2159		 /* spin */ ;
2160
2161	/* do an INIT IPI: deassert RESET */
2162	lapic.icr_lo = icr_lo | 0x00008500;
2163
2164	/* wait for pending status end */
2165	u_sleep(10000);		/* wait ~10mS */
2166	while (lapic.icr_lo & APIC_DELSTAT_MASK)
2167		 /* spin */ ;
2168
2169	/*
2170	 * next we do a STARTUP IPI: the previous INIT IPI might still be
2171	 * latched, (P5 bug) this 1st STARTUP would then terminate
2172	 * immediately, and the previously started INIT IPI would continue. OR
2173	 * the previous INIT IPI has already run. and this STARTUP IPI will
2174	 * run. OR the previous INIT IPI was ignored. and this STARTUP IPI
2175	 * will run.
2176	 */
2177
2178	/* do a STARTUP IPI */
2179	lapic.icr_lo = icr_lo | 0x00000600 | vector;
2180	while (lapic.icr_lo & APIC_DELSTAT_MASK)
2181		 /* spin */ ;
2182	u_sleep(200);		/* wait ~200uS */
2183
2184	/*
2185	 * finally we do a 2nd STARTUP IPI: this 2nd STARTUP IPI should run IF
2186	 * the previous STARTUP IPI was cancelled by a latched INIT IPI. OR
2187	 * this STARTUP IPI will be ignored, as only ONE STARTUP IPI is
2188	 * recognized after hardware RESET or INIT IPI.
2189	 */
2190
2191	lapic.icr_lo = icr_lo | 0x00000600 | vector;
2192	while (lapic.icr_lo & APIC_DELSTAT_MASK)
2193		 /* spin */ ;
2194	u_sleep(200);		/* wait ~200uS */
2195
2196	/* wait for it to start */
2197	set_apic_timer(5000000);/* == 5 seconds */
2198	while (read_apic_timer())
2199		if (mp_ncpus > cpus)
2200			return 1;	/* return SUCCESS */
2201
2202	return 0;		/* return FAILURE */
2203}
2204
2205/*
2206 * Flush the TLB on all other CPU's
2207 *
2208 * XXX: Needs to handshake and wait for completion before proceding.
2209 */
2210void
2211smp_invltlb(void)
2212{
2213#if defined(APIC_IO)
2214	if (smp_started && invltlb_ok)
2215		ipi_all_but_self(IPI_INVLTLB);
2216#endif  /* APIC_IO */
2217}
2218
2219void
2220invlpg(u_int addr)
2221{
2222	__asm   __volatile("invlpg (%0)"::"r"(addr):"memory");
2223
2224	/* send a message to the other CPUs */
2225	smp_invltlb();
2226}
2227
2228void
2229invltlb(void)
2230{
2231	u_long  temp;
2232
2233	/*
2234	 * This should be implemented as load_cr3(rcr3()) when load_cr3() is
2235	 * inlined.
2236	 */
2237	__asm __volatile("movl %%cr3, %0; movl %0, %%cr3":"=r"(temp) :: "memory");
2238
2239	/* send a message to the other CPUs */
2240	smp_invltlb();
2241}
2242
2243
2244/*
2245 * This is called once the rest of the system is up and running and we're
2246 * ready to let the AP's out of the pen.
2247 */
2248void
2249ap_init(void)
2250{
2251	u_int	apic_id;
2252
2253	/* spin until all the AP's are ready */
2254	while (!aps_ready)
2255		/* spin */ ;
2256
2257	/*
2258	 * Set curproc to our per-cpu idleproc so that mutexes have
2259	 * something unique to lock with.
2260	 */
2261	PCPU_SET(curproc, PCPU_GET(idleproc));
2262	PCPU_SET(spinlocks, NULL);
2263
2264	/* lock against other AP's that are waking up */
2265	mtx_lock_spin(&ap_boot_mtx);
2266
2267	/* BSP may have changed PTD while we're waiting for the lock */
2268	cpu_invltlb();
2269
2270	smp_cpus++;
2271
2272#if defined(I586_CPU) && !defined(NO_F00F_HACK)
2273	lidt(&r_idt);
2274#endif
2275
2276	/* Build our map of 'other' CPUs. */
2277	PCPU_SET(other_cpus, all_cpus & ~(1 << PCPU_GET(cpuid)));
2278
2279	printf("SMP: AP CPU #%d Launched!\n", PCPU_GET(cpuid));
2280
2281	/* set up CPU registers and state */
2282	cpu_setregs();
2283
2284	/* set up FPU state on the AP */
2285	npxinit(__INITIAL_NPXCW__);
2286
2287	/* A quick check from sanity claus */
2288	apic_id = (apic_id_to_logical[(lapic.id & 0x0f000000) >> 24]);
2289	if (PCPU_GET(cpuid) != apic_id) {
2290		printf("SMP: cpuid = %d\n", PCPU_GET(cpuid));
2291		printf("SMP: apic_id = %d\n", apic_id);
2292		printf("PTD[MPPTDI] = %p\n", (void *)PTD[MPPTDI]);
2293		panic("cpuid mismatch! boom!!");
2294	}
2295
2296	/* Init local apic for irq's */
2297	apic_initialize();
2298
2299	/* Set memory range attributes for this CPU to match the BSP */
2300	mem_range_AP_init();
2301
2302	/*
2303	 * Activate smp_invltlb, although strictly speaking, this isn't
2304	 * quite correct yet.  We should have a bitfield for cpus willing
2305	 * to accept TLB flush IPI's or something and sync them.
2306	 */
2307	if (smp_cpus == mp_ncpus) {
2308		invltlb_ok = 1;
2309		smp_started = 1; /* enable IPI's, tlb shootdown, freezes etc */
2310		smp_active = 1;	 /* historic */
2311	}
2312
2313	/* let other AP's wake up now */
2314	mtx_unlock_spin(&ap_boot_mtx);
2315
2316	/* wait until all the AP's are up */
2317	while (smp_started == 0)
2318		; /* nothing */
2319
2320	microuptime(PCPU_PTR(switchtime));
2321	PCPU_SET(switchticks, ticks);
2322
2323	/* ok, now grab sched_lock and enter the scheduler */
2324	enable_intr();
2325	mtx_lock_spin(&sched_lock);
2326	cpu_throw();	/* doesn't return */
2327
2328	panic("scheduler returned us to ap_init");
2329}
2330
2331#define CHECKSTATE_USER	0
2332#define CHECKSTATE_SYS	1
2333#define CHECKSTATE_INTR	2
2334
2335/* Do not staticize.  Used from apic_vector.s */
2336struct proc*	checkstate_curproc[MAXCPU];
2337int		checkstate_cpustate[MAXCPU];
2338u_long		checkstate_pc[MAXCPU];
2339
2340#define PC_TO_INDEX(pc, prof)				\
2341        ((int)(((u_quad_t)((pc) - (prof)->pr_off) *	\
2342            (u_quad_t)((prof)->pr_scale)) >> 16) & ~1)
2343
2344static void
2345addupc_intr_forwarded(struct proc *p, int id, int *astmap)
2346{
2347	int i;
2348	struct uprof *prof;
2349	u_long pc;
2350
2351	pc = checkstate_pc[id];
2352	prof = &p->p_stats->p_prof;
2353	if (pc >= prof->pr_off &&
2354	    (i = PC_TO_INDEX(pc, prof)) < prof->pr_size) {
2355		mtx_assert(&sched_lock, MA_OWNED);
2356		if ((p->p_sflag & PS_OWEUPC) == 0) {
2357			prof->pr_addr = pc;
2358			prof->pr_ticks = 1;
2359			p->p_sflag |= PS_OWEUPC;
2360		}
2361		*astmap |= (1 << id);
2362	}
2363}
2364
2365static void
2366forwarded_statclock(int id, int pscnt, int *astmap)
2367{
2368	struct pstats *pstats;
2369	long rss;
2370	struct rusage *ru;
2371	struct vmspace *vm;
2372	int cpustate;
2373	struct proc *p;
2374#ifdef GPROF
2375	register struct gmonparam *g;
2376	int i;
2377#endif
2378
2379	mtx_assert(&sched_lock, MA_OWNED);
2380	p = checkstate_curproc[id];
2381	cpustate = checkstate_cpustate[id];
2382
2383	/* XXX */
2384	if (p->p_ithd)
2385		cpustate = CHECKSTATE_INTR;
2386	else if (p == SMP_prvspace[id].globaldata.gd_idleproc)
2387		cpustate = CHECKSTATE_SYS;
2388
2389	switch (cpustate) {
2390	case CHECKSTATE_USER:
2391		if (p->p_sflag & PS_PROFIL)
2392			addupc_intr_forwarded(p, id, astmap);
2393		if (pscnt > 1)
2394			return;
2395		p->p_uticks++;
2396		if (p->p_nice > NZERO)
2397			cp_time[CP_NICE]++;
2398		else
2399			cp_time[CP_USER]++;
2400		break;
2401	case CHECKSTATE_SYS:
2402#ifdef GPROF
2403		/*
2404		 * Kernel statistics are just like addupc_intr, only easier.
2405		 */
2406		g = &_gmonparam;
2407		if (g->state == GMON_PROF_ON) {
2408			i = checkstate_pc[id] - g->lowpc;
2409			if (i < g->textsize) {
2410				i /= HISTFRACTION * sizeof(*g->kcount);
2411				g->kcount[i]++;
2412			}
2413		}
2414#endif
2415		if (pscnt > 1)
2416			return;
2417
2418		p->p_sticks++;
2419		if (p == SMP_prvspace[id].globaldata.gd_idleproc)
2420			cp_time[CP_IDLE]++;
2421		else
2422			cp_time[CP_SYS]++;
2423		break;
2424	case CHECKSTATE_INTR:
2425	default:
2426#ifdef GPROF
2427		/*
2428		 * Kernel statistics are just like addupc_intr, only easier.
2429		 */
2430		g = &_gmonparam;
2431		if (g->state == GMON_PROF_ON) {
2432			i = checkstate_pc[id] - g->lowpc;
2433			if (i < g->textsize) {
2434				i /= HISTFRACTION * sizeof(*g->kcount);
2435				g->kcount[i]++;
2436			}
2437		}
2438#endif
2439		if (pscnt > 1)
2440			return;
2441		KASSERT(p != NULL, ("NULL process in interrupt state"));
2442		p->p_iticks++;
2443		cp_time[CP_INTR]++;
2444	}
2445
2446	schedclock(p);
2447
2448	/* Update resource usage integrals and maximums. */
2449	if ((pstats = p->p_stats) != NULL &&
2450	    (ru = &pstats->p_ru) != NULL &&
2451	    (vm = p->p_vmspace) != NULL) {
2452		ru->ru_ixrss += pgtok(vm->vm_tsize);
2453		ru->ru_idrss += pgtok(vm->vm_dsize);
2454		ru->ru_isrss += pgtok(vm->vm_ssize);
2455		rss = pgtok(vmspace_resident_count(vm));
2456		if (ru->ru_maxrss < rss)
2457			ru->ru_maxrss = rss;
2458	}
2459}
2460
2461void
2462forward_statclock(int pscnt)
2463{
2464	int map;
2465	int id;
2466	int i;
2467
2468	/* Kludge. We don't yet have separate locks for the interrupts
2469	 * and the kernel. This means that we cannot let the other processors
2470	 * handle complex interrupts while inhibiting them from entering
2471	 * the kernel in a non-interrupt context.
2472	 *
2473	 * What we can do, without changing the locking mechanisms yet,
2474	 * is letting the other processors handle a very simple interrupt
2475	 * (wich determines the processor states), and do the main
2476	 * work ourself.
2477	 */
2478
2479	CTR1(KTR_SMP, "forward_statclock(%d)", pscnt);
2480
2481	if (!smp_started || !invltlb_ok || cold || panicstr)
2482		return;
2483
2484	/* Step 1: Probe state   (user, cpu, interrupt, spinlock, idle ) */
2485
2486	map = PCPU_GET(other_cpus) & ~stopped_cpus ;
2487	checkstate_probed_cpus = 0;
2488	if (map != 0)
2489		ipi_selected(map, IPI_CHECKSTATE);
2490
2491	i = 0;
2492	while (checkstate_probed_cpus != map) {
2493		/* spin */
2494		i++;
2495		if (i == 100000) {
2496#ifdef DIAGNOSTIC
2497			printf("forward_statclock: checkstate %x\n",
2498			       checkstate_probed_cpus);
2499#endif
2500			break;
2501		}
2502	}
2503
2504	/*
2505	 * Step 2: walk through other processors processes, update ticks and
2506	 * profiling info.
2507	 */
2508
2509	map = 0;
2510	for (id = 0; id < mp_ncpus; id++) {
2511		if (id == PCPU_GET(cpuid))
2512			continue;
2513		if (((1 << id) & checkstate_probed_cpus) == 0)
2514			continue;
2515		forwarded_statclock(id, pscnt, &map);
2516	}
2517	if (map != 0) {
2518		checkstate_need_ast |= map;
2519		ipi_selected(map, IPI_AST);
2520		i = 0;
2521		while ((checkstate_need_ast & map) != 0) {
2522			/* spin */
2523			i++;
2524			if (i > 100000) {
2525#ifdef DIAGNOSTIC
2526				printf("forward_statclock: dropped ast 0x%x\n",
2527				       checkstate_need_ast & map);
2528#endif
2529				break;
2530			}
2531		}
2532	}
2533}
2534
2535void
2536forward_hardclock(int pscnt)
2537{
2538	int map;
2539	int id;
2540	struct proc *p;
2541	struct pstats *pstats;
2542	int i;
2543
2544	/* Kludge. We don't yet have separate locks for the interrupts
2545	 * and the kernel. This means that we cannot let the other processors
2546	 * handle complex interrupts while inhibiting them from entering
2547	 * the kernel in a non-interrupt context.
2548	 *
2549	 * What we can do, without changing the locking mechanisms yet,
2550	 * is letting the other processors handle a very simple interrupt
2551	 * (wich determines the processor states), and do the main
2552	 * work ourself.
2553	 */
2554
2555	CTR1(KTR_SMP, "forward_hardclock(%d)", pscnt);
2556
2557	if (!smp_started || !invltlb_ok || cold || panicstr)
2558		return;
2559
2560	/* Step 1: Probe state   (user, cpu, interrupt, spinlock, idle) */
2561
2562	map = PCPU_GET(other_cpus) & ~stopped_cpus ;
2563	checkstate_probed_cpus = 0;
2564	if (map != 0)
2565		ipi_selected(map, IPI_CHECKSTATE);
2566
2567	i = 0;
2568	while (checkstate_probed_cpus != map) {
2569		/* spin */
2570		i++;
2571		if (i == 100000) {
2572#ifdef DIAGNOSTIC
2573			printf("forward_hardclock: checkstate %x\n",
2574			       checkstate_probed_cpus);
2575#endif
2576			break;
2577		}
2578	}
2579
2580	/*
2581	 * Step 2: walk through other processors processes, update virtual
2582	 * timer and profiling timer. If stathz == 0, also update ticks and
2583	 * profiling info.
2584	 */
2585
2586	map = 0;
2587	for (id = 0; id < mp_ncpus; id++) {
2588		if (id == PCPU_GET(cpuid))
2589			continue;
2590		if (((1 << id) & checkstate_probed_cpus) == 0)
2591			continue;
2592		p = checkstate_curproc[id];
2593		if (p) {
2594			pstats = p->p_stats;
2595			if (checkstate_cpustate[id] == CHECKSTATE_USER &&
2596			    timevalisset(&pstats->p_timer[ITIMER_VIRTUAL].it_value) &&
2597			    itimerdecr(&pstats->p_timer[ITIMER_VIRTUAL], tick) == 0) {
2598				p->p_sflag |= PS_ALRMPEND;
2599				map |= (1 << id);
2600			}
2601			if (timevalisset(&pstats->p_timer[ITIMER_PROF].it_value) &&
2602			    itimerdecr(&pstats->p_timer[ITIMER_PROF], tick) == 0) {
2603				p->p_sflag |= PS_PROFPEND;
2604				map |= (1 << id);
2605			}
2606		}
2607		if (stathz == 0) {
2608			forwarded_statclock( id, pscnt, &map);
2609		}
2610	}
2611	if (map != 0) {
2612		checkstate_need_ast |= map;
2613		ipi_selected(map, IPI_AST);
2614		i = 0;
2615		while ((checkstate_need_ast & map) != 0) {
2616			/* spin */
2617			i++;
2618			if (i > 100000) {
2619#ifdef DIAGNOSTIC
2620				printf("forward_hardclock: dropped ast 0x%x\n",
2621				       checkstate_need_ast & map);
2622#endif
2623				break;
2624			}
2625		}
2626	}
2627}
2628
2629void
2630forward_signal(struct proc *p)
2631{
2632	int map;
2633	int id;
2634	int i;
2635
2636	/* Kludge. We don't yet have separate locks for the interrupts
2637	 * and the kernel. This means that we cannot let the other processors
2638	 * handle complex interrupts while inhibiting them from entering
2639	 * the kernel in a non-interrupt context.
2640	 *
2641	 * What we can do, without changing the locking mechanisms yet,
2642	 * is letting the other processors handle a very simple interrupt
2643	 * (wich determines the processor states), and do the main
2644	 * work ourself.
2645	 */
2646
2647	CTR1(KTR_SMP, "forward_signal(%p)", p);
2648
2649	if (!smp_started || !invltlb_ok || cold || panicstr)
2650		return;
2651	if (!forward_signal_enabled)
2652		return;
2653	mtx_lock_spin(&sched_lock);
2654	while (1) {
2655		if (p->p_stat != SRUN) {
2656			mtx_unlock_spin(&sched_lock);
2657			return;
2658		}
2659		id = p->p_oncpu;
2660		mtx_unlock_spin(&sched_lock);
2661		if (id == 0xff)
2662			return;
2663		map = (1<<id);
2664		checkstate_need_ast |= map;
2665		ipi_selected(map, IPI_AST);
2666		i = 0;
2667		while ((checkstate_need_ast & map) != 0) {
2668			/* spin */
2669			i++;
2670			if (i > 100000) {
2671#if 0
2672				printf("forward_signal: dropped ast 0x%x\n",
2673				       checkstate_need_ast & map);
2674#endif
2675				break;
2676			}
2677		}
2678		mtx_lock_spin(&sched_lock);
2679		if (id == p->p_oncpu) {
2680			mtx_unlock_spin(&sched_lock);
2681			return;
2682		}
2683	}
2684}
2685
2686void
2687forward_roundrobin(void)
2688{
2689	u_int map;
2690	int i;
2691
2692	CTR0(KTR_SMP, "forward_roundrobin()");
2693
2694	if (!smp_started || !invltlb_ok || cold || panicstr)
2695		return;
2696	if (!forward_roundrobin_enabled)
2697		return;
2698	resched_cpus |= PCPU_GET(other_cpus);
2699	map = PCPU_GET(other_cpus) & ~stopped_cpus ;
2700#if 1
2701	ipi_selected(map, IPI_AST);
2702#else
2703	ipi_all_but_self(IPI_AST);
2704#endif
2705	i = 0;
2706	while ((checkstate_need_ast & map) != 0) {
2707		/* spin */
2708		i++;
2709		if (i > 100000) {
2710#if 0
2711			printf("forward_roundrobin: dropped ast 0x%x\n",
2712			       checkstate_need_ast & map);
2713#endif
2714			break;
2715		}
2716	}
2717}
2718
2719/*
2720 * When called the executing CPU will send an IPI to all other CPUs
2721 *  requesting that they halt execution.
2722 *
2723 * Usually (but not necessarily) called with 'other_cpus' as its arg.
2724 *
2725 *  - Signals all CPUs in map to stop.
2726 *  - Waits for each to stop.
2727 *
2728 * Returns:
2729 *  -1: error
2730 *   0: NA
2731 *   1: ok
2732 *
2733 * XXX FIXME: this is not MP-safe, needs a lock to prevent multiple CPUs
2734 *            from executing at same time.
2735 */
2736int
2737stop_cpus(u_int map)
2738{
2739	int count = 0;
2740
2741	if (!smp_started)
2742		return 0;
2743
2744	/* send the Xcpustop IPI to all CPUs in map */
2745	ipi_selected(map, IPI_STOP);
2746
2747	while (count++ < 100000 && (stopped_cpus & map) != map)
2748		/* spin */ ;
2749
2750#ifdef DIAGNOSTIC
2751	if ((stopped_cpus & map) != map)
2752		printf("Warning: CPUs 0x%x did not stop!\n",
2753		    (~(stopped_cpus & map)) & map);
2754#endif
2755
2756	return 1;
2757}
2758
2759
2760/*
2761 * Called by a CPU to restart stopped CPUs.
2762 *
2763 * Usually (but not necessarily) called with 'stopped_cpus' as its arg.
2764 *
2765 *  - Signals all CPUs in map to restart.
2766 *  - Waits for each to restart.
2767 *
2768 * Returns:
2769 *  -1: error
2770 *   0: NA
2771 *   1: ok
2772 */
2773int
2774restart_cpus(u_int map)
2775{
2776	int count = 0;
2777
2778	if (!smp_started)
2779		return 0;
2780
2781	started_cpus = map;		/* signal other cpus to restart */
2782
2783	/* wait for each to clear its bit */
2784	while (count++ < 100000 && (stopped_cpus & map) != 0)
2785		/* spin */ ;
2786
2787#ifdef DIAGNOSTIC
2788	if ((stopped_cpus & map) != 0)
2789		printf("Warning: CPUs 0x%x did not restart!\n",
2790		    (~(stopped_cpus & map)) & map);
2791#endif
2792
2793	return 1;
2794}
2795
2796
2797#ifdef APIC_INTR_REORDER
2798/*
2799 *	Maintain mapping from softintr vector to isr bit in local apic.
2800 */
2801void
2802set_lapic_isrloc(int intr, int vector)
2803{
2804	if (intr < 0 || intr > 32)
2805		panic("set_apic_isrloc: bad intr argument: %d",intr);
2806	if (vector < ICU_OFFSET || vector > 255)
2807		panic("set_apic_isrloc: bad vector argument: %d",vector);
2808	apic_isrbit_location[intr].location = &lapic.isr0 + ((vector>>5)<<2);
2809	apic_isrbit_location[intr].bit = (1<<(vector & 31));
2810}
2811#endif
2812
2813/*
2814 * All-CPU rendezvous.  CPUs are signalled, all execute the setup function
2815 * (if specified), rendezvous, execute the action function (if specified),
2816 * rendezvous again, execute the teardown function (if specified), and then
2817 * resume.
2818 *
2819 * Note that the supplied external functions _must_ be reentrant and aware
2820 * that they are running in parallel and in an unknown lock context.
2821 */
2822static void (*smp_rv_setup_func)(void *arg);
2823static void (*smp_rv_action_func)(void *arg);
2824static void (*smp_rv_teardown_func)(void *arg);
2825static void *smp_rv_func_arg;
2826static volatile int smp_rv_waiters[2];
2827
2828void
2829smp_rendezvous_action(void)
2830{
2831	/* setup function */
2832	if (smp_rv_setup_func != NULL)
2833		smp_rv_setup_func(smp_rv_func_arg);
2834	/* spin on entry rendezvous */
2835	atomic_add_int(&smp_rv_waiters[0], 1);
2836	while (smp_rv_waiters[0] < mp_ncpus)
2837		;
2838	/* action function */
2839	if (smp_rv_action_func != NULL)
2840		smp_rv_action_func(smp_rv_func_arg);
2841	/* spin on exit rendezvous */
2842	atomic_add_int(&smp_rv_waiters[1], 1);
2843	while (smp_rv_waiters[1] < mp_ncpus)
2844		;
2845	/* teardown function */
2846	if (smp_rv_teardown_func != NULL)
2847		smp_rv_teardown_func(smp_rv_func_arg);
2848}
2849
2850void
2851smp_rendezvous(void (* setup_func)(void *),
2852	       void (* action_func)(void *),
2853	       void (* teardown_func)(void *),
2854	       void *arg)
2855{
2856
2857	/* obtain rendezvous lock */
2858	mtx_lock_spin(&smp_rv_mtx);
2859
2860	/* set static function pointers */
2861	smp_rv_setup_func = setup_func;
2862	smp_rv_action_func = action_func;
2863	smp_rv_teardown_func = teardown_func;
2864	smp_rv_func_arg = arg;
2865	smp_rv_waiters[0] = 0;
2866	smp_rv_waiters[1] = 0;
2867
2868	/*
2869	 * signal other processors, which will enter the IPI with interrupts off
2870	 */
2871	ipi_all_but_self(IPI_RENDEZVOUS);
2872
2873	/* call executor function */
2874	smp_rendezvous_action();
2875
2876	/* release lock */
2877	mtx_unlock_spin(&smp_rv_mtx);
2878}
2879
2880/*
2881 * send an IPI to a set of cpus.
2882 */
2883void
2884ipi_selected(u_int32_t cpus, u_int ipi)
2885{
2886
2887	CTR2(KTR_SMP, __func__ ": cpus: %x ipi: %x", cpus, ipi);
2888	selected_apic_ipi(cpus, ipi, APIC_DELMODE_FIXED);
2889}
2890
2891/*
2892 * send an IPI INTerrupt containing 'vector' to all CPUs, including myself
2893 */
2894void
2895ipi_all(u_int ipi)
2896{
2897
2898	CTR1(KTR_SMP, __func__ ": ipi: %x", ipi);
2899	apic_ipi(APIC_DEST_ALLISELF, ipi, APIC_DELMODE_FIXED);
2900}
2901
2902/*
2903 * send an IPI to all CPUs EXCEPT myself
2904 */
2905void
2906ipi_all_but_self(u_int ipi)
2907{
2908
2909	CTR1(KTR_SMP, __func__ ": ipi: %x", ipi);
2910	apic_ipi(APIC_DEST_ALLESELF, ipi, APIC_DELMODE_FIXED);
2911}
2912
2913/*
2914 * send an IPI to myself
2915 */
2916void
2917ipi_self(u_int ipi)
2918{
2919
2920	CTR1(KTR_SMP, __func__ ": ipi: %x", ipi);
2921	apic_ipi(APIC_DEST_SELF, ipi, APIC_DELMODE_FIXED);
2922}
2923
2924void
2925release_aps(void *dummy __unused)
2926{
2927	atomic_store_rel_int(&aps_ready, 1);
2928}
2929
2930SYSINIT(start_aps, SI_SUB_SMP, SI_ORDER_FIRST, release_aps, NULL);
2931