mptable.c revision 117372
1/*
2 * Copyright (c) 1996, by Steve Passe
3 * All rights reserved.
4 *
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions
7 * are met:
8 * 1. Redistributions of source code must retain the above copyright
9 *    notice, this list of conditions and the following disclaimer.
10 * 2. The name of the developer may NOT be used to endorse or promote products
11 *    derived from this software without specific prior written permission.
12 *
13 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
14 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
15 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
16 * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
17 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
18 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
19 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
20 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
21 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
22 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
23 * SUCH DAMAGE.
24 *
25 */
26
27#include <sys/cdefs.h>
28__FBSDID("$FreeBSD: head/sys/i386/i386/mptable.c 117372 2003-07-10 01:02:59Z peter $");
29
30#include "opt_cpu.h"
31#include "opt_kstack_pages.h"
32
33#ifdef SMP
34#include <machine/smptests.h>
35#else
36#error
37#endif
38
39#include <sys/param.h>
40#include <sys/systm.h>
41#include <sys/bus.h>
42#include <sys/cons.h>	/* cngetc() */
43#ifdef GPROF
44#include <sys/gmon.h>
45#endif
46#include <sys/kernel.h>
47#include <sys/ktr.h>
48#include <sys/lock.h>
49#include <sys/malloc.h>
50#include <sys/memrange.h>
51#include <sys/mutex.h>
52#include <sys/pcpu.h>
53#include <sys/proc.h>
54#include <sys/smp.h>
55#include <sys/sysctl.h>
56#include <sys/user.h>
57
58#include <vm/vm.h>
59#include <vm/vm_param.h>
60#include <vm/pmap.h>
61#include <vm/vm_kern.h>
62#include <vm/vm_extern.h>
63#include <vm/vm_map.h>
64
65#include <machine/apic.h>
66#include <machine/atomic.h>
67#include <machine/clock.h>
68#include <machine/cpu.h>
69#include <machine/cpufunc.h>
70#include <machine/mpapic.h>
71#include <machine/psl.h>
72#include <machine/segments.h>
73#include <machine/smp.h>
74#include <machine/smptests.h>	/** TEST_DEFAULT_CONFIG, TEST_TEST1 */
75#include <machine/tss.h>
76#include <machine/specialreg.h>
77#include <machine/privatespace.h>
78
79#if defined(APIC_IO)
80#include <machine/md_var.h>		/* setidt() */
81#include <i386/isa/icu.h>		/* IPIs */
82#include <i386/isa/intr_machdep.h>	/* IPIs */
83#endif	/* APIC_IO */
84
85#if defined(TEST_DEFAULT_CONFIG)
86#define MPFPS_MPFB1	TEST_DEFAULT_CONFIG
87#else
88#define MPFPS_MPFB1	mpfps->mpfb1
89#endif  /* TEST_DEFAULT_CONFIG */
90
91#define WARMBOOT_TARGET		0
92#define WARMBOOT_OFF		(KERNBASE + 0x0467)
93#define WARMBOOT_SEG		(KERNBASE + 0x0469)
94
95#ifdef PC98
96#define BIOS_BASE		(0xe8000)
97#define BIOS_SIZE		(0x18000)
98#else
99#define BIOS_BASE		(0xf0000)
100#define BIOS_SIZE		(0x10000)
101#endif
102#define BIOS_COUNT		(BIOS_SIZE/4)
103
104#define CMOS_REG		(0x70)
105#define CMOS_DATA		(0x71)
106#define BIOS_RESET		(0x0f)
107#define BIOS_WARM		(0x0a)
108
109#define PROCENTRY_FLAG_EN	0x01
110#define PROCENTRY_FLAG_BP	0x02
111#define IOAPICENTRY_FLAG_EN	0x01
112
113
114/* MP Floating Pointer Structure */
115typedef struct MPFPS {
116	char    signature[4];
117	void   *pap;
118	u_char  length;
119	u_char  spec_rev;
120	u_char  checksum;
121	u_char  mpfb1;
122	u_char  mpfb2;
123	u_char  mpfb3;
124	u_char  mpfb4;
125	u_char  mpfb5;
126}      *mpfps_t;
127
128/* MP Configuration Table Header */
129typedef struct MPCTH {
130	char    signature[4];
131	u_short base_table_length;
132	u_char  spec_rev;
133	u_char  checksum;
134	u_char  oem_id[8];
135	u_char  product_id[12];
136	void   *oem_table_pointer;
137	u_short oem_table_size;
138	u_short entry_count;
139	void   *apic_address;
140	u_short extended_table_length;
141	u_char  extended_table_checksum;
142	u_char  reserved;
143}      *mpcth_t;
144
145
146typedef struct PROCENTRY {
147	u_char  type;
148	u_char  apic_id;
149	u_char  apic_version;
150	u_char  cpu_flags;
151	u_long  cpu_signature;
152	u_long  feature_flags;
153	u_long  reserved1;
154	u_long  reserved2;
155}      *proc_entry_ptr;
156
157typedef struct BUSENTRY {
158	u_char  type;
159	u_char  bus_id;
160	char    bus_type[6];
161}      *bus_entry_ptr;
162
163typedef struct IOAPICENTRY {
164	u_char  type;
165	u_char  apic_id;
166	u_char  apic_version;
167	u_char  apic_flags;
168	void   *apic_address;
169}      *io_apic_entry_ptr;
170
171typedef struct INTENTRY {
172	u_char  type;
173	u_char  int_type;
174	u_short int_flags;
175	u_char  src_bus_id;
176	u_char  src_bus_irq;
177	u_char  dst_apic_id;
178	u_char  dst_apic_int;
179}      *int_entry_ptr;
180
181/* descriptions of MP basetable entries */
182typedef struct BASETABLE_ENTRY {
183	u_char  type;
184	u_char  length;
185	char    name[16];
186}       basetable_entry;
187
188/*
189 * this code MUST be enabled here and in mpboot.s.
190 * it follows the very early stages of AP boot by placing values in CMOS ram.
191 * it NORMALLY will never be needed and thus the primitive method for enabling.
192 *
193#define CHECK_POINTS
194 */
195
196#if defined(CHECK_POINTS) && !defined(PC98)
197#define CHECK_READ(A)	 (outb(CMOS_REG, (A)), inb(CMOS_DATA))
198#define CHECK_WRITE(A,D) (outb(CMOS_REG, (A)), outb(CMOS_DATA, (D)))
199
200#define CHECK_INIT(D);				\
201	CHECK_WRITE(0x34, (D));			\
202	CHECK_WRITE(0x35, (D));			\
203	CHECK_WRITE(0x36, (D));			\
204	CHECK_WRITE(0x37, (D));			\
205	CHECK_WRITE(0x38, (D));			\
206	CHECK_WRITE(0x39, (D));
207
208#define CHECK_PRINT(S);				\
209	printf("%s: %d, %d, %d, %d, %d, %d\n",	\
210	   (S),					\
211	   CHECK_READ(0x34),			\
212	   CHECK_READ(0x35),			\
213	   CHECK_READ(0x36),			\
214	   CHECK_READ(0x37),			\
215	   CHECK_READ(0x38),			\
216	   CHECK_READ(0x39));
217
218#else				/* CHECK_POINTS */
219
220#define CHECK_INIT(D)
221#define CHECK_PRINT(S)
222
223#endif				/* CHECK_POINTS */
224
225/*
226 * Values to send to the POST hardware.
227 */
228#define MP_BOOTADDRESS_POST	0x10
229#define MP_PROBE_POST		0x11
230#define MPTABLE_PASS1_POST	0x12
231
232#define MP_START_POST		0x13
233#define MP_ENABLE_POST		0x14
234#define MPTABLE_PASS2_POST	0x15
235
236#define START_ALL_APS_POST	0x16
237#define INSTALL_AP_TRAMP_POST	0x17
238#define START_AP_POST		0x18
239
240#define MP_ANNOUNCE_POST	0x19
241
242static int need_hyperthreading_fixup;
243static u_int logical_cpus;
244static u_int logical_cpus_mask;
245
246/* used to hold the AP's until we are ready to release them */
247static struct mtx ap_boot_mtx;
248
249/** XXX FIXME: where does this really belong, isa.h/isa.c perhaps? */
250int	current_postcode;
251
252/** XXX FIXME: what system files declare these??? */
253extern struct region_descriptor r_gdt, r_idt;
254
255int	bsp_apic_ready = 0;	/* flags useability of BSP apic */
256int	mp_naps;		/* # of Applications processors */
257int	mp_nbusses;		/* # of busses */
258int	mp_napics;		/* # of IO APICs */
259int	boot_cpu_id;		/* designated BSP */
260vm_offset_t cpu_apic_address;
261vm_offset_t io_apic_address[NAPICID];	/* NAPICID is more than enough */
262extern	int nkpt;
263
264u_int32_t cpu_apic_versions[MAXCPU];
265u_int32_t *io_apic_versions;
266
267#ifdef APIC_INTR_REORDER
268struct {
269	volatile int *location;
270	int bit;
271} apic_isrbit_location[32];
272#endif
273
274struct apic_intmapinfo	int_to_apicintpin[APIC_INTMAPSIZE];
275
276/*
277 * APIC ID logical/physical mapping structures.
278 * We oversize these to simplify boot-time config.
279 */
280int     cpu_num_to_apic_id[NAPICID];
281int     io_num_to_apic_id[NAPICID];
282int     apic_id_to_logical[NAPICID];
283
284/*
285 * CPU topology map datastructures for HTT.
286 */
287struct	cpu_group	mp_groups[NAPICID];
288struct	cpu_top mp_top;
289struct	cpu_top *smp_topology;
290
291
292/* AP uses this during bootstrap.  Do not staticize.  */
293char *bootSTK;
294static int bootAP;
295
296/* Hotwire a 0->4MB V==P mapping */
297extern pt_entry_t *KPTphys;
298
299/* SMP page table page */
300extern pt_entry_t *SMPpt;
301
302struct pcb stoppcbs[MAXCPU];
303
304#ifdef APIC_IO
305/* Variables needed for SMP tlb shootdown. */
306vm_offset_t smp_tlb_addr1;
307vm_offset_t smp_tlb_addr2;
308volatile int smp_tlb_wait;
309static struct mtx smp_tlb_mtx;
310#endif
311
312/*
313 * Local data and functions.
314 */
315
316/* Set to 1 once we're ready to let the APs out of the pen. */
317static volatile int aps_ready = 0;
318
319static int	mp_capable;
320static u_int	boot_address;
321static u_int	base_memory;
322
323static int	picmode;		/* 0: virtual wire mode, 1: PIC mode */
324static mpfps_t	mpfps;
325static int	search_for_sig(u_int32_t target, int count);
326static void	mp_enable(u_int boot_addr);
327
328static void	mptable_hyperthread_fixup(u_int id_mask);
329static void	mptable_pass1(void);
330static int	mptable_pass2(void);
331static void	default_mp_table(int type);
332static void	fix_mp_table(void);
333static void	setup_apic_irq_mapping(void);
334static void	init_locks(void);
335static int	start_all_aps(u_int boot_addr);
336static void	install_ap_tramp(u_int boot_addr);
337static int	start_ap(int logicalCpu, u_int boot_addr);
338void		ap_init(void);
339static int	apic_int_is_bus_type(int intr, int bus_type);
340static void	release_aps(void *dummy);
341
342/*
343 * initialize all the SMP locks
344 */
345
346/* lock region used by kernel profiling */
347int	mcount_lock;
348
349#ifdef USE_COMLOCK
350/* locks com (tty) data/hardware accesses: a FASTINTR() */
351struct mtx		com_mtx;
352#endif /* USE_COMLOCK */
353
354static void
355init_locks(void)
356{
357
358#ifdef USE_COMLOCK
359	mtx_init(&com_mtx, "com", NULL, MTX_SPIN);
360#endif /* USE_COMLOCK */
361#ifdef APIC_IO
362	mtx_init(&smp_tlb_mtx, "tlb", NULL, MTX_SPIN);
363#endif
364}
365
366/*
367 * Calculate usable address in base memory for AP trampoline code.
368 */
369u_int
370mp_bootaddress(u_int basemem)
371{
372	POSTCODE(MP_BOOTADDRESS_POST);
373
374	base_memory = basemem * 1024;	/* convert to bytes */
375
376	boot_address = base_memory & ~0xfff;	/* round down to 4k boundary */
377	if ((base_memory - boot_address) < bootMP_size)
378		boot_address -= 4096;	/* not enough, lower by 4k */
379
380	return boot_address;
381}
382
383
384/*
385 * Look for an Intel MP spec table (ie, SMP capable hardware).
386 */
387void
388i386_mp_probe(void)
389{
390	int     x;
391	u_long  segment;
392	u_int32_t target;
393
394	POSTCODE(MP_PROBE_POST);
395
396	/* see if EBDA exists */
397	if ((segment = (u_long) * (u_short *) (KERNBASE + 0x40e)) != 0) {
398		/* search first 1K of EBDA */
399		target = (u_int32_t) (segment << 4);
400		if ((x = search_for_sig(target, 1024 / 4)) >= 0)
401			goto found;
402	} else {
403		/* last 1K of base memory, effective 'top of base' passed in */
404		target = (u_int32_t) (base_memory - 0x400);
405		if ((x = search_for_sig(target, 1024 / 4)) >= 0)
406			goto found;
407	}
408
409	/* search the BIOS */
410	target = (u_int32_t) BIOS_BASE;
411	if ((x = search_for_sig(target, BIOS_COUNT)) >= 0)
412		goto found;
413
414	/* nothing found */
415	mpfps = (mpfps_t)0;
416	mp_capable = 0;
417	return;
418
419found:
420	/* calculate needed resources */
421	mpfps = (mpfps_t)x;
422	mptable_pass1();
423
424	/* flag fact that we are running multiple processors */
425	mp_capable = 1;
426}
427
428int
429cpu_mp_probe(void)
430{
431	/*
432	 * Record BSP in CPU map
433	 * This is done here so that MBUF init code works correctly.
434	 */
435	all_cpus = 1;
436
437	return (mp_capable);
438}
439
440/*
441 * Initialize the SMP hardware and the APIC and start up the AP's.
442 */
443void
444cpu_mp_start(void)
445{
446	POSTCODE(MP_START_POST);
447
448	/* look for MP capable motherboard */
449	if (mp_capable)
450		mp_enable(boot_address);
451	else
452		panic("MP hardware not found!");
453
454	cpu_setregs();
455}
456
457
458/*
459 * Print various information about the SMP system hardware and setup.
460 */
461void
462cpu_mp_announce(void)
463{
464	int     x;
465
466	POSTCODE(MP_ANNOUNCE_POST);
467
468	printf(" cpu0 (BSP): apic id: %2d", CPU_TO_ID(0));
469	printf(", version: 0x%08x", cpu_apic_versions[0]);
470	printf(", at 0x%08x\n", cpu_apic_address);
471	for (x = 1; x <= mp_naps; ++x) {
472		printf(" cpu%d (AP):  apic id: %2d", x, CPU_TO_ID(x));
473		printf(", version: 0x%08x", cpu_apic_versions[x]);
474		printf(", at 0x%08x\n", cpu_apic_address);
475	}
476
477#if defined(APIC_IO)
478	for (x = 0; x < mp_napics; ++x) {
479		printf(" io%d (APIC): apic id: %2d", x, IO_TO_ID(x));
480		printf(", version: 0x%08x", io_apic_versions[x]);
481		printf(", at 0x%08x\n", io_apic_address[x]);
482	}
483#else
484	printf(" Warning: APIC I/O disabled\n");
485#endif	/* APIC_IO */
486}
487
488/*
489 * AP cpu's call this to sync up protected mode.
490 */
491void
492init_secondary(void)
493{
494	int	gsel_tss;
495	int	x, myid = bootAP;
496	u_int	cr0;
497
498	gdt_segs[GPRIV_SEL].ssd_base = (int) &SMP_prvspace[myid];
499	gdt_segs[GPROC0_SEL].ssd_base =
500		(int) &SMP_prvspace[myid].pcpu.pc_common_tss;
501	SMP_prvspace[myid].pcpu.pc_prvspace =
502		&SMP_prvspace[myid].pcpu;
503
504	for (x = 0; x < NGDT; x++) {
505		ssdtosd(&gdt_segs[x], &gdt[myid * NGDT + x].sd);
506	}
507
508	r_gdt.rd_limit = NGDT * sizeof(gdt[0]) - 1;
509	r_gdt.rd_base = (int) &gdt[myid * NGDT];
510	lgdt(&r_gdt);			/* does magic intra-segment return */
511
512	lidt(&r_idt);
513
514	lldt(_default_ldt);
515	PCPU_SET(currentldt, _default_ldt);
516
517	gsel_tss = GSEL(GPROC0_SEL, SEL_KPL);
518	gdt[myid * NGDT + GPROC0_SEL].sd.sd_type = SDT_SYS386TSS;
519	PCPU_SET(common_tss.tss_esp0, 0); /* not used until after switch */
520	PCPU_SET(common_tss.tss_ss0, GSEL(GDATA_SEL, SEL_KPL));
521	PCPU_SET(common_tss.tss_ioopt, (sizeof (struct i386tss)) << 16);
522	PCPU_SET(tss_gdt, &gdt[myid * NGDT + GPROC0_SEL].sd);
523	PCPU_SET(common_tssd, *PCPU_GET(tss_gdt));
524	ltr(gsel_tss);
525
526	/*
527	 * Set to a known state:
528	 * Set by mpboot.s: CR0_PG, CR0_PE
529	 * Set by cpu_setregs: CR0_NE, CR0_MP, CR0_TS, CR0_WP, CR0_AM
530	 */
531	cr0 = rcr0();
532	cr0 &= ~(CR0_CD | CR0_NW | CR0_EM);
533	load_cr0(cr0);
534
535	pmap_set_opt();
536}
537
538
539#if defined(APIC_IO)
540/*
541 * Final configuration of the BSP's local APIC:
542 *  - disable 'pic mode'.
543 *  - disable 'virtual wire mode'.
544 *  - enable NMI.
545 */
546void
547bsp_apic_configure(void)
548{
549	u_char		byte;
550	u_int32_t	temp;
551
552	/* leave 'pic mode' if necessary */
553	if (picmode) {
554		outb(0x22, 0x70);	/* select IMCR */
555		byte = inb(0x23);	/* current contents */
556		byte |= 0x01;		/* mask external INTR */
557		outb(0x23, byte);	/* disconnect 8259s/NMI */
558	}
559
560	/* mask lint0 (the 8259 'virtual wire' connection) */
561	temp = lapic.lvt_lint0;
562	temp |= APIC_LVT_M;		/* set the mask */
563	lapic.lvt_lint0 = temp;
564
565        /* setup lint1 to handle NMI */
566        temp = lapic.lvt_lint1;
567        temp &= ~APIC_LVT_M;		/* clear the mask */
568        lapic.lvt_lint1 = temp;
569
570	if (bootverbose)
571		apic_dump("bsp_apic_configure()");
572}
573#endif  /* APIC_IO */
574
575
576/*******************************************************************
577 * local functions and data
578 */
579
580/*
581 * start the SMP system
582 */
583static void
584mp_enable(u_int boot_addr)
585{
586	int     x;
587#if defined(APIC_IO)
588	int     apic;
589	u_int   ux;
590#endif	/* APIC_IO */
591
592	POSTCODE(MP_ENABLE_POST);
593
594	/* turn on 4MB of V == P addressing so we can get to MP table */
595	*(int *)PTD = PG_V | PG_RW | ((uintptr_t)(void *)KPTphys & PG_FRAME);
596	invltlb();
597
598	/* examine the MP table for needed info, uses physical addresses */
599	x = mptable_pass2();
600
601	*(int *)PTD = 0;
602	invltlb();
603
604	/* can't process default configs till the CPU APIC is pmapped */
605	if (x)
606		default_mp_table(x);
607
608	/* post scan cleanup */
609	fix_mp_table();
610	setup_apic_irq_mapping();
611
612#if defined(APIC_IO)
613
614	/* fill the LOGICAL io_apic_versions table */
615	for (apic = 0; apic < mp_napics; ++apic) {
616		ux = io_apic_read(apic, IOAPIC_VER);
617		io_apic_versions[apic] = ux;
618		io_apic_set_id(apic, IO_TO_ID(apic));
619	}
620
621	/* program each IO APIC in the system */
622	for (apic = 0; apic < mp_napics; ++apic)
623		if (io_apic_setup(apic) < 0)
624			panic("IO APIC setup failure");
625
626	/* install a 'Spurious INTerrupt' vector */
627	setidt(XSPURIOUSINT_OFFSET, Xspuriousint,
628	       SDT_SYS386IGT, SEL_KPL, GSEL(GCODE_SEL, SEL_KPL));
629
630	/* install an inter-CPU IPI for TLB invalidation */
631	setidt(XINVLTLB_OFFSET, Xinvltlb,
632	       SDT_SYS386IGT, SEL_KPL, GSEL(GCODE_SEL, SEL_KPL));
633	setidt(XINVLPG_OFFSET, Xinvlpg,
634	       SDT_SYS386IGT, SEL_KPL, GSEL(GCODE_SEL, SEL_KPL));
635	setidt(XINVLRNG_OFFSET, Xinvlrng,
636	       SDT_SYS386IGT, SEL_KPL, GSEL(GCODE_SEL, SEL_KPL));
637
638	/* install an inter-CPU IPI for forwarding hardclock() */
639	setidt(XHARDCLOCK_OFFSET, Xhardclock,
640	       SDT_SYS386IGT, SEL_KPL, GSEL(GCODE_SEL, SEL_KPL));
641
642	/* install an inter-CPU IPI for forwarding statclock() */
643	setidt(XSTATCLOCK_OFFSET, Xstatclock,
644	       SDT_SYS386IGT, SEL_KPL, GSEL(GCODE_SEL, SEL_KPL));
645
646	/* install an inter-CPU IPI for lazy pmap release */
647	setidt(XLAZYPMAP_OFFSET, Xlazypmap,
648	       SDT_SYS386IGT, SEL_KPL, GSEL(GCODE_SEL, SEL_KPL));
649
650	/* install an inter-CPU IPI for all-CPU rendezvous */
651	setidt(XRENDEZVOUS_OFFSET, Xrendezvous,
652	       SDT_SYS386IGT, SEL_KPL, GSEL(GCODE_SEL, SEL_KPL));
653
654	/* install an inter-CPU IPI for forcing an additional software trap */
655	setidt(XCPUAST_OFFSET, Xcpuast,
656	       SDT_SYS386IGT, SEL_KPL, GSEL(GCODE_SEL, SEL_KPL));
657
658	/* install an inter-CPU IPI for CPU stop/restart */
659	setidt(XCPUSTOP_OFFSET, Xcpustop,
660	       SDT_SYS386IGT, SEL_KPL, GSEL(GCODE_SEL, SEL_KPL));
661
662#if defined(TEST_TEST1)
663	/* install a "fake hardware INTerrupt" vector */
664	setidt(XTEST1_OFFSET, Xtest1,
665	       SDT_SYS386IGT, SEL_KPL, GSEL(GCODE_SEL, SEL_KPL));
666#endif  /** TEST_TEST1 */
667
668#endif	/* APIC_IO */
669
670	/* initialize all SMP locks */
671	init_locks();
672
673	/* start each Application Processor */
674	start_all_aps(boot_addr);
675}
676
677
678/*
679 * look for the MP spec signature
680 */
681
682/* string defined by the Intel MP Spec as identifying the MP table */
683#define MP_SIG		0x5f504d5f	/* _MP_ */
684#define NEXT(X)		((X) += 4)
685static int
686search_for_sig(u_int32_t target, int count)
687{
688	int     x;
689	u_int32_t *addr = (u_int32_t *) (KERNBASE + target);
690
691	for (x = 0; x < count; NEXT(x))
692		if (addr[x] == MP_SIG)
693			/* make array index a byte index */
694			return (target + (x * sizeof(u_int32_t)));
695
696	return -1;
697}
698
699
700static basetable_entry basetable_entry_types[] =
701{
702	{0, 20, "Processor"},
703	{1, 8, "Bus"},
704	{2, 8, "I/O APIC"},
705	{3, 8, "I/O INT"},
706	{4, 8, "Local INT"}
707};
708
709typedef struct BUSDATA {
710	u_char  bus_id;
711	enum busTypes bus_type;
712}       bus_datum;
713
714typedef struct INTDATA {
715	u_char  int_type;
716	u_short int_flags;
717	u_char  src_bus_id;
718	u_char  src_bus_irq;
719	u_char  dst_apic_id;
720	u_char  dst_apic_int;
721	u_char	int_vector;
722}       io_int, local_int;
723
724typedef struct BUSTYPENAME {
725	u_char  type;
726	char    name[7];
727}       bus_type_name;
728
729static bus_type_name bus_type_table[] =
730{
731	{CBUS, "CBUS"},
732	{CBUSII, "CBUSII"},
733	{EISA, "EISA"},
734	{MCA, "MCA"},
735	{UNKNOWN_BUSTYPE, "---"},
736	{ISA, "ISA"},
737	{MCA, "MCA"},
738	{UNKNOWN_BUSTYPE, "---"},
739	{UNKNOWN_BUSTYPE, "---"},
740	{UNKNOWN_BUSTYPE, "---"},
741	{UNKNOWN_BUSTYPE, "---"},
742	{UNKNOWN_BUSTYPE, "---"},
743	{PCI, "PCI"},
744	{UNKNOWN_BUSTYPE, "---"},
745	{UNKNOWN_BUSTYPE, "---"},
746	{UNKNOWN_BUSTYPE, "---"},
747	{UNKNOWN_BUSTYPE, "---"},
748	{XPRESS, "XPRESS"},
749	{UNKNOWN_BUSTYPE, "---"}
750};
751/* from MP spec v1.4, table 5-1 */
752static int default_data[7][5] =
753{
754/*   nbus, id0, type0, id1, type1 */
755	{1, 0, ISA, 255, 255},
756	{1, 0, EISA, 255, 255},
757	{1, 0, EISA, 255, 255},
758	{1, 0, MCA, 255, 255},
759	{2, 0, ISA, 1, PCI},
760	{2, 0, EISA, 1, PCI},
761	{2, 0, MCA, 1, PCI}
762};
763
764
765/* the bus data */
766static bus_datum *bus_data;
767
768/* the IO INT data, one entry per possible APIC INTerrupt */
769static io_int  *io_apic_ints;
770
771static int nintrs;
772
773static int processor_entry(proc_entry_ptr entry, int cpu);
774static int bus_entry(bus_entry_ptr entry, int bus);
775static int io_apic_entry(io_apic_entry_ptr entry, int apic);
776static int int_entry(int_entry_ptr entry, int intr);
777static int lookup_bus_type(char *name);
778
779
780/*
781 * 1st pass on motherboard's Intel MP specification table.
782 *
783 * initializes:
784 *	mp_ncpus = 1
785 *
786 * determines:
787 *	cpu_apic_address (common to all CPUs)
788 *	io_apic_address[N]
789 *	mp_naps
790 *	mp_nbusses
791 *	mp_napics
792 *	nintrs
793 */
794static void
795mptable_pass1(void)
796{
797	int	x;
798	mpcth_t	cth;
799	int	totalSize;
800	void*	position;
801	int	count;
802	int	type;
803	u_int	id_mask;
804
805	POSTCODE(MPTABLE_PASS1_POST);
806
807	/* clear various tables */
808	for (x = 0; x < NAPICID; ++x) {
809		io_apic_address[x] = ~0;	/* IO APIC address table */
810	}
811
812	/* init everything to empty */
813	mp_naps = 0;
814	mp_nbusses = 0;
815	mp_napics = 0;
816	nintrs = 0;
817	id_mask = 0;
818
819	/* check for use of 'default' configuration */
820	if (MPFPS_MPFB1 != 0) {
821		/* use default addresses */
822		cpu_apic_address = DEFAULT_APIC_BASE;
823		io_apic_address[0] = DEFAULT_IO_APIC_BASE;
824
825		/* fill in with defaults */
826		mp_naps = 2;		/* includes BSP */
827		mp_maxid = 1;
828		mp_nbusses = default_data[MPFPS_MPFB1 - 1][0];
829#if defined(APIC_IO)
830		mp_napics = 1;
831		nintrs = 16;
832#endif	/* APIC_IO */
833	}
834	else {
835		if ((cth = mpfps->pap) == 0)
836			panic("MP Configuration Table Header MISSING!");
837
838		cpu_apic_address = (vm_offset_t) cth->apic_address;
839
840		/* walk the table, recording info of interest */
841		totalSize = cth->base_table_length - sizeof(struct MPCTH);
842		position = (u_char *) cth + sizeof(struct MPCTH);
843		count = cth->entry_count;
844
845		while (count--) {
846			switch (type = *(u_char *) position) {
847			case 0: /* processor_entry */
848				if (((proc_entry_ptr)position)->cpu_flags
849				    & PROCENTRY_FLAG_EN) {
850					++mp_naps;
851					mp_maxid++;
852					id_mask |= 1 <<
853					    ((proc_entry_ptr)position)->apic_id;
854				}
855				break;
856			case 1: /* bus_entry */
857				++mp_nbusses;
858				break;
859			case 2: /* io_apic_entry */
860				if (((io_apic_entry_ptr)position)->apic_flags
861					& IOAPICENTRY_FLAG_EN)
862					io_apic_address[mp_napics++] =
863					    (vm_offset_t)((io_apic_entry_ptr)
864						position)->apic_address;
865				break;
866			case 3: /* int_entry */
867				++nintrs;
868				break;
869			case 4:	/* int_entry */
870				break;
871			default:
872				panic("mpfps Base Table HOSED!");
873				/* NOTREACHED */
874			}
875
876			totalSize -= basetable_entry_types[type].length;
877			(u_char*)position += basetable_entry_types[type].length;
878		}
879	}
880
881	/* qualify the numbers */
882	if (mp_naps > MAXCPU) {
883		printf("Warning: only using %d of %d available CPUs!\n",
884			MAXCPU, mp_naps);
885		mp_naps = MAXCPU;
886	}
887
888	/* See if we need to fixup HT logical CPUs. */
889	mptable_hyperthread_fixup(id_mask);
890
891	/*
892	 * Count the BSP.
893	 * This is also used as a counter while starting the APs.
894	 */
895	mp_ncpus = 1;
896
897	--mp_naps;	/* subtract the BSP */
898}
899
900
901/*
902 * 2nd pass on motherboard's Intel MP specification table.
903 *
904 * sets:
905 *	boot_cpu_id
906 *	ID_TO_IO(N), phy APIC ID to log CPU/IO table
907 *	CPU_TO_ID(N), logical CPU to APIC ID table
908 *	IO_TO_ID(N), logical IO to APIC ID table
909 *	bus_data[N]
910 *	io_apic_ints[N]
911 */
912static int
913mptable_pass2(void)
914{
915	struct PROCENTRY proc;
916	int     x;
917	mpcth_t cth;
918	int     totalSize;
919	void*   position;
920	int     count;
921	int     type;
922	int     apic, bus, cpu, intr;
923	int	i, j;
924	int	pgeflag;
925
926	POSTCODE(MPTABLE_PASS2_POST);
927
928	/* Initialize fake proc entry for use with HT fixup. */
929	bzero(&proc, sizeof(proc));
930	proc.type = 0;
931	proc.cpu_flags = PROCENTRY_FLAG_EN;
932
933	pgeflag = 0;		/* XXX - Not used under SMP yet.  */
934
935	MALLOC(io_apic_versions, u_int32_t *, sizeof(u_int32_t) * mp_napics,
936	    M_DEVBUF, M_WAITOK);
937	MALLOC(ioapic, volatile ioapic_t **, sizeof(ioapic_t *) * mp_napics,
938	    M_DEVBUF, M_WAITOK);
939	MALLOC(io_apic_ints, io_int *, sizeof(io_int) * (nintrs + 1),
940	    M_DEVBUF, M_WAITOK);
941	MALLOC(bus_data, bus_datum *, sizeof(bus_datum) * mp_nbusses,
942	    M_DEVBUF, M_WAITOK);
943
944	bzero(ioapic, sizeof(ioapic_t *) * mp_napics);
945
946	for (i = 0; i < mp_napics; i++) {
947		for (j = 0; j < mp_napics; j++) {
948			/* same page frame as a previous IO apic? */
949			if (((vm_offset_t)SMPpt[NPTEPG-2-j] & PG_FRAME) ==
950			    (io_apic_address[i] & PG_FRAME)) {
951				ioapic[i] = (ioapic_t *)((u_int)SMP_prvspace
952					+ (NPTEPG-2-j) * PAGE_SIZE
953					+ (io_apic_address[i] & PAGE_MASK));
954				break;
955			}
956			/* use this slot if available */
957			if (((vm_offset_t)SMPpt[NPTEPG-2-j] & PG_FRAME) == 0) {
958				SMPpt[NPTEPG-2-j] = (pt_entry_t)(PG_V | PG_RW |
959				    pgeflag | (io_apic_address[i] & PG_FRAME));
960				ioapic[i] = (ioapic_t *)((u_int)SMP_prvspace
961					+ (NPTEPG-2-j) * PAGE_SIZE
962					+ (io_apic_address[i] & PAGE_MASK));
963				break;
964			}
965		}
966	}
967
968	/* clear various tables */
969	for (x = 0; x < NAPICID; ++x) {
970		ID_TO_IO(x) = -1;	/* phy APIC ID to log CPU/IO table */
971		CPU_TO_ID(x) = -1;	/* logical CPU to APIC ID table */
972		IO_TO_ID(x) = -1;	/* logical IO to APIC ID table */
973	}
974
975	/* clear bus data table */
976	for (x = 0; x < mp_nbusses; ++x)
977		bus_data[x].bus_id = 0xff;
978
979	/* clear IO APIC INT table */
980	for (x = 0; x < (nintrs + 1); ++x) {
981		io_apic_ints[x].int_type = 0xff;
982		io_apic_ints[x].int_vector = 0xff;
983	}
984
985	/* setup the cpu/apic mapping arrays */
986	boot_cpu_id = -1;
987
988	/* record whether PIC or virtual-wire mode */
989	picmode = (mpfps->mpfb2 & 0x80) ? 1 : 0;
990
991	/* check for use of 'default' configuration */
992	if (MPFPS_MPFB1 != 0)
993		return MPFPS_MPFB1;	/* return default configuration type */
994
995	if ((cth = mpfps->pap) == 0)
996		panic("MP Configuration Table Header MISSING!");
997
998	/* walk the table, recording info of interest */
999	totalSize = cth->base_table_length - sizeof(struct MPCTH);
1000	position = (u_char *) cth + sizeof(struct MPCTH);
1001	count = cth->entry_count;
1002	apic = bus = intr = 0;
1003	cpu = 1;				/* pre-count the BSP */
1004
1005	while (count--) {
1006		switch (type = *(u_char *) position) {
1007		case 0:
1008			if (processor_entry(position, cpu)) {
1009				if (logical_cpus != 0 &&
1010				    cpu % logical_cpus != 0)
1011					logical_cpus_mask |= (1 << cpu);
1012				++cpu;
1013			}
1014			if (need_hyperthreading_fixup) {
1015				/*
1016				 * Create fake mptable processor entries
1017				 * and feed them to processor_entry() to
1018				 * enumerate the logical CPUs.
1019				 */
1020				proc.apic_id = ((proc_entry_ptr)position)->apic_id;
1021				for (i = 1; i < logical_cpus; i++) {
1022					proc.apic_id++;
1023					(void)processor_entry(&proc, cpu);
1024					logical_cpus_mask |= (1 << cpu);
1025					cpu++;
1026				}
1027			}
1028			break;
1029		case 1:
1030			if (bus_entry(position, bus))
1031				++bus;
1032			break;
1033		case 2:
1034			if (io_apic_entry(position, apic))
1035				++apic;
1036			break;
1037		case 3:
1038			if (int_entry(position, intr))
1039				++intr;
1040			break;
1041		case 4:
1042			/* int_entry(position); */
1043			break;
1044		default:
1045			panic("mpfps Base Table HOSED!");
1046			/* NOTREACHED */
1047		}
1048
1049		totalSize -= basetable_entry_types[type].length;
1050		(u_char *) position += basetable_entry_types[type].length;
1051	}
1052
1053	if (boot_cpu_id == -1)
1054		panic("NO BSP found!");
1055
1056	/* report fact that its NOT a default configuration */
1057	return 0;
1058}
1059
1060/*
1061 * Check if we should perform a hyperthreading "fix-up" to
1062 * enumerate any logical CPU's that aren't already listed
1063 * in the table.
1064 *
1065 * XXX: We assume that all of the physical CPUs in the
1066 * system have the same number of logical CPUs.
1067 *
1068 * XXX: We assume that APIC ID's are allocated such that
1069 * the APIC ID's for a physical processor are aligned
1070 * with the number of logical CPU's in the processor.
1071 */
1072static void
1073mptable_hyperthread_fixup(u_int id_mask)
1074{
1075	u_int i, id;
1076	int logical;
1077
1078	/* Nothing to do if there is no HTT support. */
1079	if ((cpu_feature & CPUID_HTT) == 0)
1080		return;
1081	logical_cpus = (cpu_procinfo & CPUID_HTT_CORES) >> 16;
1082	if (logical_cpus <= 1)
1083		return;
1084
1085	/*
1086	 * For each APIC ID of a CPU that is set in the mask,
1087	 * scan the other candidate APIC ID's for this
1088	 * physical processor.  If any of those ID's are
1089	 * already in the table, then kill the fixup.
1090	 */
1091	for (id = 0; id <= MAXCPU; id++) {
1092		if ((id_mask & 1 << id) == 0)
1093			continue;
1094		/* First, make sure we are on a logical_cpus boundary. */
1095		if (id % logical_cpus != 0)
1096			return;
1097		for (i = id + 1; i < id + logical_cpus; i++)
1098			if ((id_mask & 1 << i) != 0)
1099				return;
1100	}
1101
1102	/*
1103	 * Ok, the ID's checked out, so enable the fixup.  We have to fixup
1104	 * mp_naps and mp_maxid right now.
1105	 */
1106	need_hyperthreading_fixup = 1;
1107	mp_maxid *= logical_cpus;
1108	mp_naps *= logical_cpus;
1109
1110	/*
1111	 * Now setup the cpu topology map.
1112	 */
1113	mp_top.ct_count = mp_naps / logical_cpus;
1114	mp_top.ct_group = mp_groups;
1115
1116	/*
1117	 * The first logical id is directly after the last valid physical id.
1118	 */
1119	logical = mp_top.ct_count + 1;
1120
1121	for (i = 0; i < mp_top.ct_count; i++) {
1122		int j;
1123
1124		mp_groups[i].cg_mask = (1 << i);
1125		for (j = 1; j < logical_cpus; j++)
1126			mp_groups[i].cg_mask |= (1 << logical++);
1127		mp_groups[i].cg_count = logical_cpus;
1128		mp_groups[i].cg_children = 0;
1129	}
1130
1131	smp_topology = &mp_top;
1132}
1133
1134void
1135assign_apic_irq(int apic, int intpin, int irq)
1136{
1137	int x;
1138
1139	if (int_to_apicintpin[irq].ioapic != -1)
1140		panic("assign_apic_irq: inconsistent table");
1141
1142	int_to_apicintpin[irq].ioapic = apic;
1143	int_to_apicintpin[irq].int_pin = intpin;
1144	int_to_apicintpin[irq].apic_address = ioapic[apic];
1145	int_to_apicintpin[irq].redirindex = IOAPIC_REDTBL + 2 * intpin;
1146
1147	for (x = 0; x < nintrs; x++) {
1148		if ((io_apic_ints[x].int_type == 0 ||
1149		     io_apic_ints[x].int_type == 3) &&
1150		    io_apic_ints[x].int_vector == 0xff &&
1151		    io_apic_ints[x].dst_apic_id == IO_TO_ID(apic) &&
1152		    io_apic_ints[x].dst_apic_int == intpin)
1153			io_apic_ints[x].int_vector = irq;
1154	}
1155}
1156
1157void
1158revoke_apic_irq(int irq)
1159{
1160	int x;
1161	int oldapic;
1162	int oldintpin;
1163
1164	if (int_to_apicintpin[irq].ioapic == -1)
1165		panic("revoke_apic_irq: inconsistent table");
1166
1167	oldapic = int_to_apicintpin[irq].ioapic;
1168	oldintpin = int_to_apicintpin[irq].int_pin;
1169
1170	int_to_apicintpin[irq].ioapic = -1;
1171	int_to_apicintpin[irq].int_pin = 0;
1172	int_to_apicintpin[irq].apic_address = NULL;
1173	int_to_apicintpin[irq].redirindex = 0;
1174
1175	for (x = 0; x < nintrs; x++) {
1176		if ((io_apic_ints[x].int_type == 0 ||
1177		     io_apic_ints[x].int_type == 3) &&
1178		    io_apic_ints[x].int_vector != 0xff &&
1179		    io_apic_ints[x].dst_apic_id == IO_TO_ID(oldapic) &&
1180		    io_apic_ints[x].dst_apic_int == oldintpin)
1181			io_apic_ints[x].int_vector = 0xff;
1182	}
1183}
1184
1185
1186static void
1187allocate_apic_irq(int intr)
1188{
1189	int apic;
1190	int intpin;
1191	int irq;
1192
1193	if (io_apic_ints[intr].int_vector != 0xff)
1194		return;		/* Interrupt handler already assigned */
1195
1196	if (io_apic_ints[intr].int_type != 0 &&
1197	    (io_apic_ints[intr].int_type != 3 ||
1198	     (io_apic_ints[intr].dst_apic_id == IO_TO_ID(0) &&
1199	      io_apic_ints[intr].dst_apic_int == 0)))
1200		return;		/* Not INT or ExtInt on != (0, 0) */
1201
1202	irq = 0;
1203	while (irq < APIC_INTMAPSIZE &&
1204	       int_to_apicintpin[irq].ioapic != -1)
1205		irq++;
1206
1207	if (irq >= APIC_INTMAPSIZE)
1208		return;		/* No free interrupt handlers */
1209
1210	apic = ID_TO_IO(io_apic_ints[intr].dst_apic_id);
1211	intpin = io_apic_ints[intr].dst_apic_int;
1212
1213	assign_apic_irq(apic, intpin, irq);
1214	io_apic_setup_intpin(apic, intpin);
1215}
1216
1217
1218static void
1219swap_apic_id(int apic, int oldid, int newid)
1220{
1221	int x;
1222	int oapic;
1223
1224
1225	if (oldid == newid)
1226		return;			/* Nothing to do */
1227
1228	printf("Changing APIC ID for IO APIC #%d from %d to %d in MP table\n",
1229	       apic, oldid, newid);
1230
1231	/* Swap physical APIC IDs in interrupt entries */
1232	for (x = 0; x < nintrs; x++) {
1233		if (io_apic_ints[x].dst_apic_id == oldid)
1234			io_apic_ints[x].dst_apic_id = newid;
1235		else if (io_apic_ints[x].dst_apic_id == newid)
1236			io_apic_ints[x].dst_apic_id = oldid;
1237	}
1238
1239	/* Swap physical APIC IDs in IO_TO_ID mappings */
1240	for (oapic = 0; oapic < mp_napics; oapic++)
1241		if (IO_TO_ID(oapic) == newid)
1242			break;
1243
1244	if (oapic < mp_napics) {
1245		printf("Changing APIC ID for IO APIC #%d from "
1246		       "%d to %d in MP table\n",
1247		       oapic, newid, oldid);
1248		IO_TO_ID(oapic) = oldid;
1249	}
1250	IO_TO_ID(apic) = newid;
1251}
1252
1253
1254static void
1255fix_id_to_io_mapping(void)
1256{
1257	int x;
1258
1259	for (x = 0; x < NAPICID; x++)
1260		ID_TO_IO(x) = -1;
1261
1262	for (x = 0; x <= mp_naps; x++)
1263		if (CPU_TO_ID(x) < NAPICID)
1264			ID_TO_IO(CPU_TO_ID(x)) = x;
1265
1266	for (x = 0; x < mp_napics; x++)
1267		if (IO_TO_ID(x) < NAPICID)
1268			ID_TO_IO(IO_TO_ID(x)) = x;
1269}
1270
1271
1272static int
1273first_free_apic_id(void)
1274{
1275	int freeid, x;
1276
1277	for (freeid = 0; freeid < NAPICID; freeid++) {
1278		for (x = 0; x <= mp_naps; x++)
1279			if (CPU_TO_ID(x) == freeid)
1280				break;
1281		if (x <= mp_naps)
1282			continue;
1283		for (x = 0; x < mp_napics; x++)
1284			if (IO_TO_ID(x) == freeid)
1285				break;
1286		if (x < mp_napics)
1287			continue;
1288		return freeid;
1289	}
1290	return freeid;
1291}
1292
1293
1294static int
1295io_apic_id_acceptable(int apic, int id)
1296{
1297	int cpu;		/* Logical CPU number */
1298	int oapic;		/* Logical IO APIC number for other IO APIC */
1299
1300	if (id >= NAPICID)
1301		return 0;	/* Out of range */
1302
1303	for (cpu = 0; cpu <= mp_naps; cpu++)
1304		if (CPU_TO_ID(cpu) == id)
1305			return 0;	/* Conflict with CPU */
1306
1307	for (oapic = 0; oapic < mp_napics && oapic < apic; oapic++)
1308		if (IO_TO_ID(oapic) == id)
1309			return 0;	/* Conflict with other APIC */
1310
1311	return 1;		/* ID is acceptable for IO APIC */
1312}
1313
1314
1315/*
1316 * parse an Intel MP specification table
1317 */
1318static void
1319fix_mp_table(void)
1320{
1321	int	x;
1322	int	id;
1323	int	bus_0 = 0;	/* Stop GCC warning */
1324	int	bus_pci = 0;	/* Stop GCC warning */
1325	int	num_pci_bus;
1326	int	apic;		/* IO APIC unit number */
1327	int     freeid;		/* Free physical APIC ID */
1328	int	physid;		/* Current physical IO APIC ID */
1329
1330	/*
1331	 * Fix mis-numbering of the PCI bus and its INT entries if the BIOS
1332	 * did it wrong.  The MP spec says that when more than 1 PCI bus
1333	 * exists the BIOS must begin with bus entries for the PCI bus and use
1334	 * actual PCI bus numbering.  This implies that when only 1 PCI bus
1335	 * exists the BIOS can choose to ignore this ordering, and indeed many
1336	 * MP motherboards do ignore it.  This causes a problem when the PCI
1337	 * sub-system makes requests of the MP sub-system based on PCI bus
1338	 * numbers.	So here we look for the situation and renumber the
1339	 * busses and associated INTs in an effort to "make it right".
1340	 */
1341
1342	/* find bus 0, PCI bus, count the number of PCI busses */
1343	for (num_pci_bus = 0, x = 0; x < mp_nbusses; ++x) {
1344		if (bus_data[x].bus_id == 0) {
1345			bus_0 = x;
1346		}
1347		if (bus_data[x].bus_type == PCI) {
1348			++num_pci_bus;
1349			bus_pci = x;
1350		}
1351	}
1352	/*
1353	 * bus_0 == slot of bus with ID of 0
1354	 * bus_pci == slot of last PCI bus encountered
1355	 */
1356
1357	/* check the 1 PCI bus case for sanity */
1358	/* if it is number 0 all is well */
1359	if (num_pci_bus == 1 &&
1360	    bus_data[bus_pci].bus_id != 0) {
1361
1362		/* mis-numbered, swap with whichever bus uses slot 0 */
1363
1364		/* swap the bus entry types */
1365		bus_data[bus_pci].bus_type = bus_data[bus_0].bus_type;
1366		bus_data[bus_0].bus_type = PCI;
1367
1368		/* swap each relavant INTerrupt entry */
1369		id = bus_data[bus_pci].bus_id;
1370		for (x = 0; x < nintrs; ++x) {
1371			if (io_apic_ints[x].src_bus_id == id) {
1372				io_apic_ints[x].src_bus_id = 0;
1373			}
1374			else if (io_apic_ints[x].src_bus_id == 0) {
1375				io_apic_ints[x].src_bus_id = id;
1376			}
1377		}
1378	}
1379
1380	/* Assign IO APIC IDs.
1381	 *
1382	 * First try the existing ID. If a conflict is detected, try
1383	 * the ID in the MP table.  If a conflict is still detected, find
1384	 * a free id.
1385	 *
1386	 * We cannot use the ID_TO_IO table before all conflicts has been
1387	 * resolved and the table has been corrected.
1388	 */
1389	for (apic = 0; apic < mp_napics; ++apic) { /* For all IO APICs */
1390
1391		/* First try to use the value set by the BIOS */
1392		physid = io_apic_get_id(apic);
1393		if (io_apic_id_acceptable(apic, physid)) {
1394			if (IO_TO_ID(apic) != physid)
1395				swap_apic_id(apic, IO_TO_ID(apic), physid);
1396			continue;
1397		}
1398
1399		/* Then check if the value in the MP table is acceptable */
1400		if (io_apic_id_acceptable(apic, IO_TO_ID(apic)))
1401			continue;
1402
1403		/* Last resort, find a free APIC ID and use it */
1404		freeid = first_free_apic_id();
1405		if (freeid >= NAPICID)
1406			panic("No free physical APIC IDs found");
1407
1408		if (io_apic_id_acceptable(apic, freeid)) {
1409			swap_apic_id(apic, IO_TO_ID(apic), freeid);
1410			continue;
1411		}
1412		panic("Free physical APIC ID not usable");
1413	}
1414	fix_id_to_io_mapping();
1415
1416	/* detect and fix broken Compaq MP table */
1417	if (apic_int_type(0, 0) == -1) {
1418		printf("APIC_IO: MP table broken: 8259->APIC entry missing!\n");
1419		io_apic_ints[nintrs].int_type = 3;	/* ExtInt */
1420		io_apic_ints[nintrs].int_vector = 0xff;	/* Unassigned */
1421		/* XXX fixme, set src bus id etc, but it doesn't seem to hurt */
1422		io_apic_ints[nintrs].dst_apic_id = IO_TO_ID(0);
1423		io_apic_ints[nintrs].dst_apic_int = 0;	/* Pin 0 */
1424		nintrs++;
1425	}
1426}
1427
1428
1429/* Assign low level interrupt handlers */
1430static void
1431setup_apic_irq_mapping(void)
1432{
1433	int	x;
1434	int	int_vector;
1435
1436	/* Clear array */
1437	for (x = 0; x < APIC_INTMAPSIZE; x++) {
1438		int_to_apicintpin[x].ioapic = -1;
1439		int_to_apicintpin[x].int_pin = 0;
1440		int_to_apicintpin[x].apic_address = NULL;
1441		int_to_apicintpin[x].redirindex = 0;
1442	}
1443
1444	/* First assign ISA/EISA interrupts */
1445	for (x = 0; x < nintrs; x++) {
1446		int_vector = io_apic_ints[x].src_bus_irq;
1447		if (int_vector < APIC_INTMAPSIZE &&
1448		    io_apic_ints[x].int_vector == 0xff &&
1449		    int_to_apicintpin[int_vector].ioapic == -1 &&
1450		    (apic_int_is_bus_type(x, ISA) ||
1451		     apic_int_is_bus_type(x, EISA)) &&
1452		    io_apic_ints[x].int_type == 0) {
1453			assign_apic_irq(ID_TO_IO(io_apic_ints[x].dst_apic_id),
1454					io_apic_ints[x].dst_apic_int,
1455					int_vector);
1456		}
1457	}
1458
1459	/* Assign ExtInt entry if no ISA/EISA interrupt 0 entry */
1460	for (x = 0; x < nintrs; x++) {
1461		if (io_apic_ints[x].dst_apic_int == 0 &&
1462		    io_apic_ints[x].dst_apic_id == IO_TO_ID(0) &&
1463		    io_apic_ints[x].int_vector == 0xff &&
1464		    int_to_apicintpin[0].ioapic == -1 &&
1465		    io_apic_ints[x].int_type == 3) {
1466			assign_apic_irq(0, 0, 0);
1467			break;
1468		}
1469	}
1470	/* PCI interrupt assignment is deferred */
1471}
1472
1473
1474static int
1475processor_entry(proc_entry_ptr entry, int cpu)
1476{
1477	/* check for usability */
1478	if (!(entry->cpu_flags & PROCENTRY_FLAG_EN))
1479		return 0;
1480
1481	if(entry->apic_id >= NAPICID)
1482		panic("CPU APIC ID out of range (0..%d)", NAPICID - 1);
1483	/* check for BSP flag */
1484	if (entry->cpu_flags & PROCENTRY_FLAG_BP) {
1485		boot_cpu_id = entry->apic_id;
1486		CPU_TO_ID(0) = entry->apic_id;
1487		ID_TO_CPU(entry->apic_id) = 0;
1488		return 0;	/* its already been counted */
1489	}
1490
1491	/* add another AP to list, if less than max number of CPUs */
1492	else if (cpu < MAXCPU) {
1493		CPU_TO_ID(cpu) = entry->apic_id;
1494		ID_TO_CPU(entry->apic_id) = cpu;
1495		return 1;
1496	}
1497
1498	return 0;
1499}
1500
1501
1502static int
1503bus_entry(bus_entry_ptr entry, int bus)
1504{
1505	int     x;
1506	char    c, name[8];
1507
1508	/* encode the name into an index */
1509	for (x = 0; x < 6; ++x) {
1510		if ((c = entry->bus_type[x]) == ' ')
1511			break;
1512		name[x] = c;
1513	}
1514	name[x] = '\0';
1515
1516	if ((x = lookup_bus_type(name)) == UNKNOWN_BUSTYPE)
1517		panic("unknown bus type: '%s'", name);
1518
1519	bus_data[bus].bus_id = entry->bus_id;
1520	bus_data[bus].bus_type = x;
1521
1522	return 1;
1523}
1524
1525
1526static int
1527io_apic_entry(io_apic_entry_ptr entry, int apic)
1528{
1529	if (!(entry->apic_flags & IOAPICENTRY_FLAG_EN))
1530		return 0;
1531
1532	IO_TO_ID(apic) = entry->apic_id;
1533	if (entry->apic_id < NAPICID)
1534		ID_TO_IO(entry->apic_id) = apic;
1535
1536	return 1;
1537}
1538
1539
1540static int
1541lookup_bus_type(char *name)
1542{
1543	int     x;
1544
1545	for (x = 0; x < MAX_BUSTYPE; ++x)
1546		if (strcmp(bus_type_table[x].name, name) == 0)
1547			return bus_type_table[x].type;
1548
1549	return UNKNOWN_BUSTYPE;
1550}
1551
1552
1553static int
1554int_entry(int_entry_ptr entry, int intr)
1555{
1556	int apic;
1557
1558	io_apic_ints[intr].int_type = entry->int_type;
1559	io_apic_ints[intr].int_flags = entry->int_flags;
1560	io_apic_ints[intr].src_bus_id = entry->src_bus_id;
1561	io_apic_ints[intr].src_bus_irq = entry->src_bus_irq;
1562	if (entry->dst_apic_id == 255) {
1563		/* This signal goes to all IO APICS.  Select an IO APIC
1564		   with sufficient number of interrupt pins */
1565		for (apic = 0; apic < mp_napics; apic++)
1566			if (((io_apic_read(apic, IOAPIC_VER) &
1567			      IOART_VER_MAXREDIR) >> MAXREDIRSHIFT) >=
1568			    entry->dst_apic_int)
1569				break;
1570		if (apic < mp_napics)
1571			io_apic_ints[intr].dst_apic_id = IO_TO_ID(apic);
1572		else
1573			io_apic_ints[intr].dst_apic_id = entry->dst_apic_id;
1574	} else
1575		io_apic_ints[intr].dst_apic_id = entry->dst_apic_id;
1576	io_apic_ints[intr].dst_apic_int = entry->dst_apic_int;
1577
1578	return 1;
1579}
1580
1581
1582static int
1583apic_int_is_bus_type(int intr, int bus_type)
1584{
1585	int     bus;
1586
1587	for (bus = 0; bus < mp_nbusses; ++bus)
1588		if ((bus_data[bus].bus_id == io_apic_ints[intr].src_bus_id)
1589		    && ((int) bus_data[bus].bus_type == bus_type))
1590			return 1;
1591
1592	return 0;
1593}
1594
1595
1596/*
1597 * Given a traditional ISA INT mask, return an APIC mask.
1598 */
1599u_int
1600isa_apic_mask(u_int isa_mask)
1601{
1602	int isa_irq;
1603	int apic_pin;
1604
1605#if defined(SKIP_IRQ15_REDIRECT)
1606	if (isa_mask == (1 << 15)) {
1607		printf("skipping ISA IRQ15 redirect\n");
1608		return isa_mask;
1609	}
1610#endif  /* SKIP_IRQ15_REDIRECT */
1611
1612	isa_irq = ffs(isa_mask);		/* find its bit position */
1613	if (isa_irq == 0)			/* doesn't exist */
1614		return 0;
1615	--isa_irq;				/* make it zero based */
1616
1617	apic_pin = isa_apic_irq(isa_irq);	/* look for APIC connection */
1618	if (apic_pin == -1)
1619		return 0;
1620
1621	return (1 << apic_pin);			/* convert pin# to a mask */
1622}
1623
1624
1625/*
1626 * Determine which APIC pin an ISA/EISA INT is attached to.
1627 */
1628#define INTTYPE(I)	(io_apic_ints[(I)].int_type)
1629#define INTPIN(I)	(io_apic_ints[(I)].dst_apic_int)
1630#define INTIRQ(I)	(io_apic_ints[(I)].int_vector)
1631#define INTAPIC(I)	(ID_TO_IO(io_apic_ints[(I)].dst_apic_id))
1632
1633#define SRCBUSIRQ(I)	(io_apic_ints[(I)].src_bus_irq)
1634int
1635isa_apic_irq(int isa_irq)
1636{
1637	int     intr;
1638
1639	for (intr = 0; intr < nintrs; ++intr) {		/* check each record */
1640		if (INTTYPE(intr) == 0) {		/* standard INT */
1641			if (SRCBUSIRQ(intr) == isa_irq) {
1642				if (apic_int_is_bus_type(intr, ISA) ||
1643			            apic_int_is_bus_type(intr, EISA)) {
1644					if (INTIRQ(intr) == 0xff)
1645						return -1; /* unassigned */
1646					return INTIRQ(intr);	/* found */
1647				}
1648			}
1649		}
1650	}
1651	return -1;					/* NOT found */
1652}
1653
1654
1655/*
1656 * Determine which APIC pin a PCI INT is attached to.
1657 */
1658#define SRCBUSID(I)	(io_apic_ints[(I)].src_bus_id)
1659#define SRCBUSDEVICE(I)	((io_apic_ints[(I)].src_bus_irq >> 2) & 0x1f)
1660#define SRCBUSLINE(I)	(io_apic_ints[(I)].src_bus_irq & 0x03)
1661int
1662pci_apic_irq(int pciBus, int pciDevice, int pciInt)
1663{
1664	int     intr;
1665
1666	--pciInt;					/* zero based */
1667
1668	for (intr = 0; intr < nintrs; ++intr)		/* check each record */
1669		if ((INTTYPE(intr) == 0)		/* standard INT */
1670		    && (SRCBUSID(intr) == pciBus)
1671		    && (SRCBUSDEVICE(intr) == pciDevice)
1672		    && (SRCBUSLINE(intr) == pciInt))	/* a candidate IRQ */
1673			if (apic_int_is_bus_type(intr, PCI)) {
1674				if (INTIRQ(intr) == 0xff)
1675					allocate_apic_irq(intr);
1676				if (INTIRQ(intr) == 0xff)
1677					return -1;	/* unassigned */
1678				return INTIRQ(intr);	/* exact match */
1679			}
1680
1681	return -1;					/* NOT found */
1682}
1683
1684int
1685next_apic_irq(int irq)
1686{
1687	int intr, ointr;
1688	int bus, bustype;
1689
1690	bus = 0;
1691	bustype = 0;
1692	for (intr = 0; intr < nintrs; intr++) {
1693		if (INTIRQ(intr) != irq || INTTYPE(intr) != 0)
1694			continue;
1695		bus = SRCBUSID(intr);
1696		bustype = apic_bus_type(bus);
1697		if (bustype != ISA &&
1698		    bustype != EISA &&
1699		    bustype != PCI)
1700			continue;
1701		break;
1702	}
1703	if (intr >= nintrs) {
1704		return -1;
1705	}
1706	for (ointr = intr + 1; ointr < nintrs; ointr++) {
1707		if (INTTYPE(ointr) != 0)
1708			continue;
1709		if (bus != SRCBUSID(ointr))
1710			continue;
1711		if (bustype == PCI) {
1712			if (SRCBUSDEVICE(intr) != SRCBUSDEVICE(ointr))
1713				continue;
1714			if (SRCBUSLINE(intr) != SRCBUSLINE(ointr))
1715				continue;
1716		}
1717		if (bustype == ISA || bustype == EISA) {
1718			if (SRCBUSIRQ(intr) != SRCBUSIRQ(ointr))
1719				continue;
1720		}
1721		if (INTPIN(intr) == INTPIN(ointr))
1722			continue;
1723		break;
1724	}
1725	if (ointr >= nintrs) {
1726		return -1;
1727	}
1728	return INTIRQ(ointr);
1729}
1730#undef SRCBUSLINE
1731#undef SRCBUSDEVICE
1732#undef SRCBUSID
1733#undef SRCBUSIRQ
1734
1735#undef INTPIN
1736#undef INTIRQ
1737#undef INTAPIC
1738#undef INTTYPE
1739
1740
1741/*
1742 * Reprogram the MB chipset to NOT redirect an ISA INTerrupt.
1743 *
1744 * XXX FIXME:
1745 *  Exactly what this means is unclear at this point.  It is a solution
1746 *  for motherboards that redirect the MBIRQ0 pin.  Generically a motherboard
1747 *  could route any of the ISA INTs to upper (>15) IRQ values.  But most would
1748 *  NOT be redirected via MBIRQ0, thus "undirect()ing" them would NOT be an
1749 *  option.
1750 */
1751int
1752undirect_isa_irq(int rirq)
1753{
1754#if defined(READY)
1755	if (bootverbose)
1756	    printf("Freeing redirected ISA irq %d.\n", rirq);
1757	/** FIXME: tickle the MB redirector chip */
1758	return -1;
1759#else
1760	if (bootverbose)
1761	    printf("Freeing (NOT implemented) redirected ISA irq %d.\n", rirq);
1762	return 0;
1763#endif  /* READY */
1764}
1765
1766
1767/*
1768 * Reprogram the MB chipset to NOT redirect a PCI INTerrupt
1769 */
1770int
1771undirect_pci_irq(int rirq)
1772{
1773#if defined(READY)
1774	if (bootverbose)
1775		printf("Freeing redirected PCI irq %d.\n", rirq);
1776
1777	/** FIXME: tickle the MB redirector chip */
1778	return -1;
1779#else
1780	if (bootverbose)
1781		printf("Freeing (NOT implemented) redirected PCI irq %d.\n",
1782		       rirq);
1783	return 0;
1784#endif  /* READY */
1785}
1786
1787
1788/*
1789 * given a bus ID, return:
1790 *  the bus type if found
1791 *  -1 if NOT found
1792 */
1793int
1794apic_bus_type(int id)
1795{
1796	int     x;
1797
1798	for (x = 0; x < mp_nbusses; ++x)
1799		if (bus_data[x].bus_id == id)
1800			return bus_data[x].bus_type;
1801
1802	return -1;
1803}
1804
1805
1806/*
1807 * given a LOGICAL APIC# and pin#, return:
1808 *  the associated src bus ID if found
1809 *  -1 if NOT found
1810 */
1811int
1812apic_src_bus_id(int apic, int pin)
1813{
1814	int     x;
1815
1816	/* search each of the possible INTerrupt sources */
1817	for (x = 0; x < nintrs; ++x)
1818		if ((apic == ID_TO_IO(io_apic_ints[x].dst_apic_id)) &&
1819		    (pin == io_apic_ints[x].dst_apic_int))
1820			return (io_apic_ints[x].src_bus_id);
1821
1822	return -1;		/* NOT found */
1823}
1824
1825
1826/*
1827 * given a LOGICAL APIC# and pin#, return:
1828 *  the associated src bus IRQ if found
1829 *  -1 if NOT found
1830 */
1831int
1832apic_src_bus_irq(int apic, int pin)
1833{
1834	int     x;
1835
1836	for (x = 0; x < nintrs; x++)
1837		if ((apic == ID_TO_IO(io_apic_ints[x].dst_apic_id)) &&
1838		    (pin == io_apic_ints[x].dst_apic_int))
1839			return (io_apic_ints[x].src_bus_irq);
1840
1841	return -1;		/* NOT found */
1842}
1843
1844
1845/*
1846 * given a LOGICAL APIC# and pin#, return:
1847 *  the associated INTerrupt type if found
1848 *  -1 if NOT found
1849 */
1850int
1851apic_int_type(int apic, int pin)
1852{
1853	int     x;
1854
1855	/* search each of the possible INTerrupt sources */
1856	for (x = 0; x < nintrs; ++x)
1857		if ((apic == ID_TO_IO(io_apic_ints[x].dst_apic_id)) &&
1858		    (pin == io_apic_ints[x].dst_apic_int))
1859			return (io_apic_ints[x].int_type);
1860
1861	return -1;		/* NOT found */
1862}
1863
1864int
1865apic_irq(int apic, int pin)
1866{
1867	int x;
1868	int res;
1869
1870	for (x = 0; x < nintrs; ++x)
1871		if ((apic == ID_TO_IO(io_apic_ints[x].dst_apic_id)) &&
1872		    (pin == io_apic_ints[x].dst_apic_int)) {
1873			res = io_apic_ints[x].int_vector;
1874			if (res == 0xff)
1875				return -1;
1876			if (apic != int_to_apicintpin[res].ioapic)
1877				panic("apic_irq: inconsistent table");
1878			if (pin != int_to_apicintpin[res].int_pin)
1879				panic("apic_irq inconsistent table (2)");
1880			return res;
1881		}
1882	return -1;
1883}
1884
1885
1886/*
1887 * given a LOGICAL APIC# and pin#, return:
1888 *  the associated trigger mode if found
1889 *  -1 if NOT found
1890 */
1891int
1892apic_trigger(int apic, int pin)
1893{
1894	int     x;
1895
1896	/* search each of the possible INTerrupt sources */
1897	for (x = 0; x < nintrs; ++x)
1898		if ((apic == ID_TO_IO(io_apic_ints[x].dst_apic_id)) &&
1899		    (pin == io_apic_ints[x].dst_apic_int))
1900			return ((io_apic_ints[x].int_flags >> 2) & 0x03);
1901
1902	return -1;		/* NOT found */
1903}
1904
1905
1906/*
1907 * given a LOGICAL APIC# and pin#, return:
1908 *  the associated 'active' level if found
1909 *  -1 if NOT found
1910 */
1911int
1912apic_polarity(int apic, int pin)
1913{
1914	int     x;
1915
1916	/* search each of the possible INTerrupt sources */
1917	for (x = 0; x < nintrs; ++x)
1918		if ((apic == ID_TO_IO(io_apic_ints[x].dst_apic_id)) &&
1919		    (pin == io_apic_ints[x].dst_apic_int))
1920			return (io_apic_ints[x].int_flags & 0x03);
1921
1922	return -1;		/* NOT found */
1923}
1924
1925
1926/*
1927 * set data according to MP defaults
1928 * FIXME: probably not complete yet...
1929 */
1930static void
1931default_mp_table(int type)
1932{
1933	int     ap_cpu_id;
1934#if defined(APIC_IO)
1935	int     io_apic_id;
1936	int     pin;
1937#endif	/* APIC_IO */
1938
1939#if 0
1940	printf("  MP default config type: %d\n", type);
1941	switch (type) {
1942	case 1:
1943		printf("   bus: ISA, APIC: 82489DX\n");
1944		break;
1945	case 2:
1946		printf("   bus: EISA, APIC: 82489DX\n");
1947		break;
1948	case 3:
1949		printf("   bus: EISA, APIC: 82489DX\n");
1950		break;
1951	case 4:
1952		printf("   bus: MCA, APIC: 82489DX\n");
1953		break;
1954	case 5:
1955		printf("   bus: ISA+PCI, APIC: Integrated\n");
1956		break;
1957	case 6:
1958		printf("   bus: EISA+PCI, APIC: Integrated\n");
1959		break;
1960	case 7:
1961		printf("   bus: MCA+PCI, APIC: Integrated\n");
1962		break;
1963	default:
1964		printf("   future type\n");
1965		break;
1966		/* NOTREACHED */
1967	}
1968#endif	/* 0 */
1969
1970	boot_cpu_id = (lapic.id & APIC_ID_MASK) >> 24;
1971	ap_cpu_id = (boot_cpu_id == 0) ? 1 : 0;
1972
1973	/* BSP */
1974	CPU_TO_ID(0) = boot_cpu_id;
1975	ID_TO_CPU(boot_cpu_id) = 0;
1976
1977	/* one and only AP */
1978	CPU_TO_ID(1) = ap_cpu_id;
1979	ID_TO_CPU(ap_cpu_id) = 1;
1980
1981#if defined(APIC_IO)
1982	/* one and only IO APIC */
1983	io_apic_id = (io_apic_read(0, IOAPIC_ID) & APIC_ID_MASK) >> 24;
1984
1985	/*
1986	 * sanity check, refer to MP spec section 3.6.6, last paragraph
1987	 * necessary as some hardware isn't properly setting up the IO APIC
1988	 */
1989#if defined(REALLY_ANAL_IOAPICID_VALUE)
1990	if (io_apic_id != 2) {
1991#else
1992	if ((io_apic_id == 0) || (io_apic_id == 1) || (io_apic_id == 15)) {
1993#endif	/* REALLY_ANAL_IOAPICID_VALUE */
1994		io_apic_set_id(0, 2);
1995		io_apic_id = 2;
1996	}
1997	IO_TO_ID(0) = io_apic_id;
1998	ID_TO_IO(io_apic_id) = 0;
1999#endif	/* APIC_IO */
2000
2001	/* fill out bus entries */
2002	switch (type) {
2003	case 1:
2004	case 2:
2005	case 3:
2006	case 4:
2007	case 5:
2008	case 6:
2009	case 7:
2010		bus_data[0].bus_id = default_data[type - 1][1];
2011		bus_data[0].bus_type = default_data[type - 1][2];
2012		bus_data[1].bus_id = default_data[type - 1][3];
2013		bus_data[1].bus_type = default_data[type - 1][4];
2014		break;
2015
2016	/* case 4: case 7:		   MCA NOT supported */
2017	default:		/* illegal/reserved */
2018		panic("BAD default MP config: %d", type);
2019		/* NOTREACHED */
2020	}
2021
2022#if defined(APIC_IO)
2023	/* general cases from MP v1.4, table 5-2 */
2024	for (pin = 0; pin < 16; ++pin) {
2025		io_apic_ints[pin].int_type = 0;
2026		io_apic_ints[pin].int_flags = 0x05;	/* edge/active-hi */
2027		io_apic_ints[pin].src_bus_id = 0;
2028		io_apic_ints[pin].src_bus_irq = pin;	/* IRQ2 caught below */
2029		io_apic_ints[pin].dst_apic_id = io_apic_id;
2030		io_apic_ints[pin].dst_apic_int = pin;	/* 1-to-1 */
2031	}
2032
2033	/* special cases from MP v1.4, table 5-2 */
2034	if (type == 2) {
2035		io_apic_ints[2].int_type = 0xff;	/* N/C */
2036		io_apic_ints[13].int_type = 0xff;	/* N/C */
2037#if !defined(APIC_MIXED_MODE)
2038		/** FIXME: ??? */
2039		panic("sorry, can't support type 2 default yet");
2040#endif	/* APIC_MIXED_MODE */
2041	}
2042	else
2043		io_apic_ints[2].src_bus_irq = 0;	/* ISA IRQ0 is on APIC INT 2 */
2044
2045	if (type == 7)
2046		io_apic_ints[0].int_type = 0xff;	/* N/C */
2047	else
2048		io_apic_ints[0].int_type = 3;	/* vectored 8259 */
2049#endif	/* APIC_IO */
2050}
2051
2052
2053/*
2054 * start each AP in our list
2055 */
2056static int
2057start_all_aps(u_int boot_addr)
2058{
2059	int     x, i, pg;
2060#ifndef PC98
2061	u_char  mpbiosreason;
2062#endif
2063	u_long  mpbioswarmvec;
2064	struct pcpu *pc;
2065	char *stack;
2066	uintptr_t kptbase;
2067
2068	POSTCODE(START_ALL_APS_POST);
2069
2070	mtx_init(&ap_boot_mtx, "ap boot", NULL, MTX_SPIN);
2071
2072	/* initialize BSP's local APIC */
2073	apic_initialize();
2074	bsp_apic_ready = 1;
2075
2076	/* install the AP 1st level boot code */
2077	install_ap_tramp(boot_addr);
2078
2079
2080	/* save the current value of the warm-start vector */
2081	mpbioswarmvec = *((u_long *) WARMBOOT_OFF);
2082#ifndef PC98
2083	outb(CMOS_REG, BIOS_RESET);
2084	mpbiosreason = inb(CMOS_DATA);
2085#endif
2086
2087	/* set up temporary P==V mapping for AP boot */
2088	/* XXX this is a hack, we should boot the AP on its own stack/PTD */
2089	kptbase = (uintptr_t)(void *)KPTphys;
2090	for (x = 0; x < NKPT; x++)
2091		PTD[x] = (pd_entry_t)(PG_V | PG_RW |
2092		    ((kptbase + x * PAGE_SIZE) & PG_FRAME));
2093	invltlb();
2094
2095	/* start each AP */
2096	for (x = 1; x <= mp_naps; ++x) {
2097
2098		/* This is a bit verbose, it will go away soon.  */
2099
2100		/* first page of AP's private space */
2101		pg = x * i386_btop(sizeof(struct privatespace));
2102
2103		/* allocate a new private data page */
2104		pc = (struct pcpu *)kmem_alloc(kernel_map, PAGE_SIZE);
2105
2106		/* wire it into the private page table page */
2107		SMPpt[pg] = (pt_entry_t)(PG_V | PG_RW | vtophys(pc));
2108
2109		/* allocate and set up an idle stack data page */
2110		stack = (char *)kmem_alloc(kernel_map, KSTACK_PAGES * PAGE_SIZE); /* XXXKSE */
2111		for (i = 0; i < KSTACK_PAGES; i++)
2112			SMPpt[pg + 1 + i] = (pt_entry_t)
2113			    (PG_V | PG_RW | vtophys(PAGE_SIZE * i + stack));
2114
2115		/* prime data page for it to use */
2116		pcpu_init(pc, x, sizeof(struct pcpu));
2117
2118		/* setup a vector to our boot code */
2119		*((volatile u_short *) WARMBOOT_OFF) = WARMBOOT_TARGET;
2120		*((volatile u_short *) WARMBOOT_SEG) = (boot_addr >> 4);
2121#ifndef PC98
2122		outb(CMOS_REG, BIOS_RESET);
2123		outb(CMOS_DATA, BIOS_WARM);	/* 'warm-start' */
2124#endif
2125
2126		bootSTK = &SMP_prvspace[x].idlekstack[KSTACK_PAGES * PAGE_SIZE];
2127		bootAP = x;
2128
2129		/* attempt to start the Application Processor */
2130		CHECK_INIT(99);	/* setup checkpoints */
2131		if (!start_ap(x, boot_addr)) {
2132			printf("AP #%d (PHY# %d) failed!\n", x, CPU_TO_ID(x));
2133			CHECK_PRINT("trace");	/* show checkpoints */
2134			/* better panic as the AP may be running loose */
2135			printf("panic y/n? [y] ");
2136			if (cngetc() != 'n')
2137				panic("bye-bye");
2138		}
2139		CHECK_PRINT("trace");		/* show checkpoints */
2140
2141		/* record its version info */
2142		cpu_apic_versions[x] = cpu_apic_versions[0];
2143
2144		all_cpus |= (1 << x);		/* record AP in CPU map */
2145	}
2146
2147	/* build our map of 'other' CPUs */
2148	PCPU_SET(other_cpus, all_cpus & ~PCPU_GET(cpumask));
2149
2150	/* fill in our (BSP) APIC version */
2151	cpu_apic_versions[0] = lapic.version;
2152
2153	/* restore the warmstart vector */
2154	*(u_long *) WARMBOOT_OFF = mpbioswarmvec;
2155#ifndef PC98
2156	outb(CMOS_REG, BIOS_RESET);
2157	outb(CMOS_DATA, mpbiosreason);
2158#endif
2159
2160	/*
2161	 * Set up the idle context for the BSP.  Similar to above except
2162	 * that some was done by locore, some by pmap.c and some is implicit
2163	 * because the BSP is cpu#0 and the page is initially zero, and also
2164	 * because we can refer to variables by name on the BSP..
2165	 */
2166
2167	/* Allocate and setup BSP idle stack */
2168	stack = (char *)kmem_alloc(kernel_map, KSTACK_PAGES * PAGE_SIZE);
2169	for (i = 0; i < KSTACK_PAGES; i++)
2170		SMPpt[1 + i] = (pt_entry_t)
2171		    (PG_V | PG_RW | vtophys(PAGE_SIZE * i + stack));
2172
2173	for (x = 0; x < NKPT; x++)
2174		PTD[x] = 0;
2175	pmap_set_opt();
2176
2177	/* number of APs actually started */
2178	return mp_ncpus - 1;
2179}
2180
2181
2182/*
2183 * load the 1st level AP boot code into base memory.
2184 */
2185
2186/* targets for relocation */
2187extern void bigJump(void);
2188extern void bootCodeSeg(void);
2189extern void bootDataSeg(void);
2190extern void MPentry(void);
2191extern u_int MP_GDT;
2192extern u_int mp_gdtbase;
2193
2194static void
2195install_ap_tramp(u_int boot_addr)
2196{
2197	int     x;
2198	int     size = *(int *) ((u_long) & bootMP_size);
2199	u_char *src = (u_char *) ((u_long) bootMP);
2200	u_char *dst = (u_char *) boot_addr + KERNBASE;
2201	u_int   boot_base = (u_int) bootMP;
2202	u_int8_t *dst8;
2203	u_int16_t *dst16;
2204	u_int32_t *dst32;
2205
2206	POSTCODE(INSTALL_AP_TRAMP_POST);
2207
2208	for (x = 0; x < size; ++x)
2209		*dst++ = *src++;
2210
2211	/*
2212	 * modify addresses in code we just moved to basemem. unfortunately we
2213	 * need fairly detailed info about mpboot.s for this to work.  changes
2214	 * to mpboot.s might require changes here.
2215	 */
2216
2217	/* boot code is located in KERNEL space */
2218	dst = (u_char *) boot_addr + KERNBASE;
2219
2220	/* modify the lgdt arg */
2221	dst32 = (u_int32_t *) (dst + ((u_int) & mp_gdtbase - boot_base));
2222	*dst32 = boot_addr + ((u_int) & MP_GDT - boot_base);
2223
2224	/* modify the ljmp target for MPentry() */
2225	dst32 = (u_int32_t *) (dst + ((u_int) bigJump - boot_base) + 1);
2226	*dst32 = ((u_int) MPentry - KERNBASE);
2227
2228	/* modify the target for boot code segment */
2229	dst16 = (u_int16_t *) (dst + ((u_int) bootCodeSeg - boot_base));
2230	dst8 = (u_int8_t *) (dst16 + 1);
2231	*dst16 = (u_int) boot_addr & 0xffff;
2232	*dst8 = ((u_int) boot_addr >> 16) & 0xff;
2233
2234	/* modify the target for boot data segment */
2235	dst16 = (u_int16_t *) (dst + ((u_int) bootDataSeg - boot_base));
2236	dst8 = (u_int8_t *) (dst16 + 1);
2237	*dst16 = (u_int) boot_addr & 0xffff;
2238	*dst8 = ((u_int) boot_addr >> 16) & 0xff;
2239}
2240
2241
2242/*
2243 * this function starts the AP (application processor) identified
2244 * by the APIC ID 'physicalCpu'.  It does quite a "song and dance"
2245 * to accomplish this.  This is necessary because of the nuances
2246 * of the different hardware we might encounter.  It ain't pretty,
2247 * but it seems to work.
2248 */
2249static int
2250start_ap(int logical_cpu, u_int boot_addr)
2251{
2252	int     physical_cpu;
2253	int     vector;
2254	int     cpus;
2255	u_long  icr_lo, icr_hi;
2256
2257	POSTCODE(START_AP_POST);
2258
2259	/* get the PHYSICAL APIC ID# */
2260	physical_cpu = CPU_TO_ID(logical_cpu);
2261
2262	/* calculate the vector */
2263	vector = (boot_addr >> 12) & 0xff;
2264
2265	/* used as a watchpoint to signal AP startup */
2266	cpus = mp_ncpus;
2267
2268	/*
2269	 * first we do an INIT/RESET IPI this INIT IPI might be run, reseting
2270	 * and running the target CPU. OR this INIT IPI might be latched (P5
2271	 * bug), CPU waiting for STARTUP IPI. OR this INIT IPI might be
2272	 * ignored.
2273	 */
2274
2275	/* setup the address for the target AP */
2276	icr_hi = lapic.icr_hi & ~APIC_ID_MASK;
2277	icr_hi |= (physical_cpu << 24);
2278	lapic.icr_hi = icr_hi;
2279
2280	/* do an INIT IPI: assert RESET */
2281	icr_lo = lapic.icr_lo & 0xfff00000;
2282	lapic.icr_lo = icr_lo | 0x0000c500;
2283
2284	/* wait for pending status end */
2285	while (lapic.icr_lo & APIC_DELSTAT_MASK)
2286		 /* spin */ ;
2287
2288	/* do an INIT IPI: deassert RESET */
2289	lapic.icr_lo = icr_lo | 0x00008500;
2290
2291	/* wait for pending status end */
2292	u_sleep(10000);		/* wait ~10mS */
2293	while (lapic.icr_lo & APIC_DELSTAT_MASK)
2294		 /* spin */ ;
2295
2296	/*
2297	 * next we do a STARTUP IPI: the previous INIT IPI might still be
2298	 * latched, (P5 bug) this 1st STARTUP would then terminate
2299	 * immediately, and the previously started INIT IPI would continue. OR
2300	 * the previous INIT IPI has already run. and this STARTUP IPI will
2301	 * run. OR the previous INIT IPI was ignored. and this STARTUP IPI
2302	 * will run.
2303	 */
2304
2305	/* do a STARTUP IPI */
2306	lapic.icr_lo = icr_lo | 0x00000600 | vector;
2307	while (lapic.icr_lo & APIC_DELSTAT_MASK)
2308		 /* spin */ ;
2309	u_sleep(200);		/* wait ~200uS */
2310
2311	/*
2312	 * finally we do a 2nd STARTUP IPI: this 2nd STARTUP IPI should run IF
2313	 * the previous STARTUP IPI was cancelled by a latched INIT IPI. OR
2314	 * this STARTUP IPI will be ignored, as only ONE STARTUP IPI is
2315	 * recognized after hardware RESET or INIT IPI.
2316	 */
2317
2318	lapic.icr_lo = icr_lo | 0x00000600 | vector;
2319	while (lapic.icr_lo & APIC_DELSTAT_MASK)
2320		 /* spin */ ;
2321	u_sleep(200);		/* wait ~200uS */
2322
2323	/* wait for it to start */
2324	set_apic_timer(5000000);/* == 5 seconds */
2325	while (read_apic_timer())
2326		if (mp_ncpus > cpus)
2327			return 1;	/* return SUCCESS */
2328
2329	return 0;		/* return FAILURE */
2330}
2331
2332#if defined(APIC_IO)
2333
2334#ifdef COUNT_XINVLTLB_HITS
2335u_int xhits_gbl[MAXCPU];
2336u_int xhits_pg[MAXCPU];
2337u_int xhits_rng[MAXCPU];
2338SYSCTL_NODE(_debug, OID_AUTO, xhits, CTLFLAG_RW, 0, "");
2339SYSCTL_OPAQUE(_debug_xhits, OID_AUTO, global, CTLFLAG_RW, &xhits_gbl,
2340    sizeof(xhits_gbl), "IU", "");
2341SYSCTL_OPAQUE(_debug_xhits, OID_AUTO, page, CTLFLAG_RW, &xhits_pg,
2342    sizeof(xhits_pg), "IU", "");
2343SYSCTL_OPAQUE(_debug_xhits, OID_AUTO, range, CTLFLAG_RW, &xhits_rng,
2344    sizeof(xhits_rng), "IU", "");
2345
2346u_int ipi_global;
2347u_int ipi_page;
2348u_int ipi_range;
2349u_int ipi_range_size;
2350SYSCTL_INT(_debug_xhits, OID_AUTO, ipi_global, CTLFLAG_RW, &ipi_global, 0, "");
2351SYSCTL_INT(_debug_xhits, OID_AUTO, ipi_page, CTLFLAG_RW, &ipi_page, 0, "");
2352SYSCTL_INT(_debug_xhits, OID_AUTO, ipi_range, CTLFLAG_RW, &ipi_range, 0, "");
2353SYSCTL_INT(_debug_xhits, OID_AUTO, ipi_range_size, CTLFLAG_RW, &ipi_range_size,
2354    0, "");
2355
2356u_int ipi_masked_global;
2357u_int ipi_masked_page;
2358u_int ipi_masked_range;
2359u_int ipi_masked_range_size;
2360SYSCTL_INT(_debug_xhits, OID_AUTO, ipi_masked_global, CTLFLAG_RW,
2361    &ipi_masked_global, 0, "");
2362SYSCTL_INT(_debug_xhits, OID_AUTO, ipi_masked_page, CTLFLAG_RW,
2363    &ipi_masked_page, 0, "");
2364SYSCTL_INT(_debug_xhits, OID_AUTO, ipi_masked_range, CTLFLAG_RW,
2365    &ipi_masked_range, 0, "");
2366SYSCTL_INT(_debug_xhits, OID_AUTO, ipi_masked_range_size, CTLFLAG_RW,
2367    &ipi_masked_range_size, 0, "");
2368#endif
2369
2370/*
2371 * Flush the TLB on all other CPU's
2372 */
2373static void
2374smp_tlb_shootdown(u_int vector, vm_offset_t addr1, vm_offset_t addr2)
2375{
2376	u_int ncpu;
2377	register_t eflags;
2378
2379	ncpu = mp_ncpus - 1;	/* does not shootdown self */
2380	if (ncpu < 1)
2381		return;		/* no other cpus */
2382	eflags = read_eflags();
2383	if ((eflags & PSL_I) == 0)
2384		panic("absolutely cannot call smp_ipi_shootdown with interrupts already disabled");
2385	mtx_lock_spin(&smp_tlb_mtx);
2386	smp_tlb_addr1 = addr1;
2387	smp_tlb_addr2 = addr2;
2388	atomic_store_rel_int(&smp_tlb_wait, 0);
2389	ipi_all_but_self(vector);
2390	while (smp_tlb_wait < ncpu)
2391		ia32_pause();
2392	mtx_unlock_spin(&smp_tlb_mtx);
2393}
2394
2395/*
2396 * This is about as magic as it gets.  fortune(1) has got similar code
2397 * for reversing bits in a word.  Who thinks up this stuff??
2398 *
2399 * Yes, it does appear to be consistently faster than:
2400 * while (i = ffs(m)) {
2401 *	m >>= i;
2402 *	bits++;
2403 * }
2404 * and
2405 * while (lsb = (m & -m)) {	// This is magic too
2406 * 	m &= ~lsb;		// or: m ^= lsb
2407 *	bits++;
2408 * }
2409 * Both of these latter forms do some very strange things on gcc-3.1 with
2410 * -mcpu=pentiumpro and/or -march=pentiumpro and/or -O or -O2.
2411 * There is probably an SSE or MMX popcnt instruction.
2412 *
2413 * I wonder if this should be in libkern?
2414 *
2415 * XXX Stop the presses!  Another one:
2416 * static __inline u_int32_t
2417 * popcnt1(u_int32_t v)
2418 * {
2419 *	v -= ((v >> 1) & 0x55555555);
2420 *	v = (v & 0x33333333) + ((v >> 2) & 0x33333333);
2421 *	v = (v + (v >> 4)) & 0x0F0F0F0F;
2422 *	return (v * 0x01010101) >> 24;
2423 * }
2424 * The downside is that it has a multiply.  With a pentium3 with
2425 * -mcpu=pentiumpro and -march=pentiumpro then gcc-3.1 will use
2426 * an imull, and in that case it is faster.  In most other cases
2427 * it appears slightly slower.
2428 */
2429static __inline u_int32_t
2430popcnt(u_int32_t m)
2431{
2432
2433	m = (m & 0x55555555) + ((m & 0xaaaaaaaa) >> 1);
2434	m = (m & 0x33333333) + ((m & 0xcccccccc) >> 2);
2435	m = (m & 0x0f0f0f0f) + ((m & 0xf0f0f0f0) >> 4);
2436	m = (m & 0x00ff00ff) + ((m & 0xff00ff00) >> 8);
2437	m = (m & 0x0000ffff) + ((m & 0xffff0000) >> 16);
2438	return m;
2439}
2440
2441static void
2442smp_targeted_tlb_shootdown(u_int mask, u_int vector, vm_offset_t addr1, vm_offset_t addr2)
2443{
2444	int ncpu, othercpus;
2445	register_t eflags;
2446
2447	othercpus = mp_ncpus - 1;
2448	if (mask == (u_int)-1) {
2449		ncpu = othercpus;
2450		if (ncpu < 1)
2451			return;
2452	} else {
2453		/* XXX there should be a pcpu self mask */
2454		mask &= ~(1 << PCPU_GET(cpuid));
2455		if (mask == 0)
2456			return;
2457		ncpu = popcnt(mask);
2458		if (ncpu > othercpus) {
2459			/* XXX this should be a panic offence */
2460			printf("SMP: tlb shootdown to %d other cpus (only have %d)\n",
2461			    ncpu, othercpus);
2462			ncpu = othercpus;
2463		}
2464		/* XXX should be a panic, implied by mask == 0 above */
2465		if (ncpu < 1)
2466			return;
2467	}
2468	eflags = read_eflags();
2469	if ((eflags & PSL_I) == 0)
2470		panic("absolutely cannot call smp_targeted_ipi_shootdown with interrupts already disabled");
2471	mtx_lock_spin(&smp_tlb_mtx);
2472	smp_tlb_addr1 = addr1;
2473	smp_tlb_addr2 = addr2;
2474	atomic_store_rel_int(&smp_tlb_wait, 0);
2475	if (mask == (u_int)-1)
2476		ipi_all_but_self(vector);
2477	else
2478		ipi_selected(mask, vector);
2479	while (smp_tlb_wait < ncpu)
2480		ia32_pause();
2481	mtx_unlock_spin(&smp_tlb_mtx);
2482}
2483#endif
2484
2485void
2486smp_invltlb(void)
2487{
2488#if defined(APIC_IO)
2489	if (smp_started) {
2490		smp_tlb_shootdown(IPI_INVLTLB, 0, 0);
2491#ifdef COUNT_XINVLTLB_HITS
2492		ipi_global++;
2493#endif
2494	}
2495#endif  /* APIC_IO */
2496}
2497
2498void
2499smp_invlpg(vm_offset_t addr)
2500{
2501#if defined(APIC_IO)
2502	if (smp_started) {
2503		smp_tlb_shootdown(IPI_INVLPG, addr, 0);
2504#ifdef COUNT_XINVLTLB_HITS
2505		ipi_page++;
2506#endif
2507	}
2508#endif  /* APIC_IO */
2509}
2510
2511void
2512smp_invlpg_range(vm_offset_t addr1, vm_offset_t addr2)
2513{
2514#if defined(APIC_IO)
2515	if (smp_started) {
2516		smp_tlb_shootdown(IPI_INVLRNG, addr1, addr2);
2517#ifdef COUNT_XINVLTLB_HITS
2518		ipi_range++;
2519		ipi_range_size += (addr2 - addr1) / PAGE_SIZE;
2520#endif
2521	}
2522#endif  /* APIC_IO */
2523}
2524
2525void
2526smp_masked_invltlb(u_int mask)
2527{
2528#if defined(APIC_IO)
2529	if (smp_started) {
2530		smp_targeted_tlb_shootdown(mask, IPI_INVLTLB, 0, 0);
2531#ifdef COUNT_XINVLTLB_HITS
2532		ipi_masked_global++;
2533#endif
2534	}
2535#endif  /* APIC_IO */
2536}
2537
2538void
2539smp_masked_invlpg(u_int mask, vm_offset_t addr)
2540{
2541#if defined(APIC_IO)
2542	if (smp_started) {
2543		smp_targeted_tlb_shootdown(mask, IPI_INVLPG, addr, 0);
2544#ifdef COUNT_XINVLTLB_HITS
2545		ipi_masked_page++;
2546#endif
2547	}
2548#endif  /* APIC_IO */
2549}
2550
2551void
2552smp_masked_invlpg_range(u_int mask, vm_offset_t addr1, vm_offset_t addr2)
2553{
2554#if defined(APIC_IO)
2555	if (smp_started) {
2556		smp_targeted_tlb_shootdown(mask, IPI_INVLRNG, addr1, addr2);
2557#ifdef COUNT_XINVLTLB_HITS
2558		ipi_masked_range++;
2559		ipi_masked_range_size += (addr2 - addr1) / PAGE_SIZE;
2560#endif
2561	}
2562#endif  /* APIC_IO */
2563}
2564
2565
2566/*
2567 * This is called once the rest of the system is up and running and we're
2568 * ready to let the AP's out of the pen.
2569 */
2570void
2571ap_init(void)
2572{
2573	u_int	apic_id;
2574
2575	/* spin until all the AP's are ready */
2576	while (!aps_ready)
2577		ia32_pause();
2578
2579	/* BSP may have changed PTD while we were waiting */
2580	invltlb();
2581
2582#if defined(I586_CPU) && !defined(NO_F00F_HACK)
2583	lidt(&r_idt);
2584#endif
2585
2586	/* set up CPU registers and state */
2587	cpu_setregs();
2588
2589	/* set up FPU state on the AP */
2590	npxinit(__INITIAL_NPXCW__);
2591
2592	/* set up SSE registers */
2593	enable_sse();
2594
2595	/* A quick check from sanity claus */
2596	apic_id = (apic_id_to_logical[(lapic.id & 0x0f000000) >> 24]);
2597	if (PCPU_GET(cpuid) != apic_id) {
2598		printf("SMP: cpuid = %d\n", PCPU_GET(cpuid));
2599		printf("SMP: apic_id = %d\n", apic_id);
2600		printf("PTD[MPPTDI] = %#jx\n", (uintmax_t)PTD[MPPTDI]);
2601		panic("cpuid mismatch! boom!!");
2602	}
2603
2604	/* Init local apic for irq's */
2605	apic_initialize();
2606
2607	/* Set memory range attributes for this CPU to match the BSP */
2608	mem_range_AP_init();
2609
2610	mtx_lock_spin(&ap_boot_mtx);
2611
2612	smp_cpus++;
2613
2614	CTR1(KTR_SMP, "SMP: AP CPU #%d Launched", PCPU_GET(cpuid));
2615	printf("SMP: AP CPU #%d Launched!\n", PCPU_GET(cpuid));
2616
2617	/* Build our map of 'other' CPUs. */
2618	PCPU_SET(other_cpus, all_cpus & ~PCPU_GET(cpumask));
2619
2620	if (bootverbose)
2621		apic_dump("ap_init()");
2622
2623	if (smp_cpus == mp_ncpus) {
2624		/* enable IPI's, tlb shootdown, freezes etc */
2625		atomic_store_rel_int(&smp_started, 1);
2626		smp_active = 1;	 /* historic */
2627	}
2628
2629	mtx_unlock_spin(&ap_boot_mtx);
2630
2631	/* wait until all the AP's are up */
2632	while (smp_started == 0)
2633		ia32_pause();
2634
2635	/* ok, now grab sched_lock and enter the scheduler */
2636	mtx_lock_spin(&sched_lock);
2637
2638	binuptime(PCPU_PTR(switchtime));
2639	PCPU_SET(switchticks, ticks);
2640
2641	cpu_throw(NULL, choosethread());	/* doesn't return */
2642
2643	panic("scheduler returned us to %s", __func__);
2644}
2645
2646/*
2647 * For statclock, we send an IPI to all CPU's to have them call this
2648 * function.
2649 *
2650 * WARNING! unpend() will call statclock() directly and skip this
2651 * routine.
2652 */
2653void
2654forwarded_statclock(struct clockframe frame)
2655{
2656
2657	if (profprocs != 0)
2658		profclock(&frame);
2659	if (pscnt == psdiv)
2660		statclock(&frame);
2661}
2662
2663void
2664forward_statclock(void)
2665{
2666	int map;
2667
2668	CTR0(KTR_SMP, "forward_statclock");
2669
2670	if (!smp_started || cold || panicstr)
2671		return;
2672
2673	map = PCPU_GET(other_cpus) & ~stopped_cpus ;
2674	if (map != 0)
2675		ipi_selected(map, IPI_STATCLOCK);
2676}
2677
2678/*
2679 * For each hardclock(), we send an IPI to all other CPU's to have them
2680 * execute this function.  It would be nice to reduce contention on
2681 * sched_lock if we could simply peek at the CPU to determine the user/kernel
2682 * state and call hardclock_process() on the CPU receiving the clock interrupt
2683 * and then just use a simple IPI to handle any ast's if needed.
2684 *
2685 * WARNING! unpend() will call hardclock_process() directly and skip this
2686 * routine.
2687 */
2688void
2689forwarded_hardclock(struct clockframe frame)
2690{
2691
2692	hardclock_process(&frame);
2693}
2694
2695void
2696forward_hardclock(void)
2697{
2698	u_int map;
2699
2700	CTR0(KTR_SMP, "forward_hardclock");
2701
2702	if (!smp_started || cold || panicstr)
2703		return;
2704
2705	map = PCPU_GET(other_cpus) & ~stopped_cpus ;
2706	if (map != 0)
2707		ipi_selected(map, IPI_HARDCLOCK);
2708}
2709
2710#ifdef APIC_INTR_REORDER
2711/*
2712 *	Maintain mapping from softintr vector to isr bit in local apic.
2713 */
2714void
2715set_lapic_isrloc(int intr, int vector)
2716{
2717	if (intr < 0 || intr > 32)
2718		panic("set_apic_isrloc: bad intr argument: %d",intr);
2719	if (vector < ICU_OFFSET || vector > 255)
2720		panic("set_apic_isrloc: bad vector argument: %d",vector);
2721	apic_isrbit_location[intr].location = &lapic.isr0 + ((vector>>5)<<2);
2722	apic_isrbit_location[intr].bit = (1<<(vector & 31));
2723}
2724#endif
2725
2726/*
2727 * send an IPI to a set of cpus.
2728 */
2729void
2730ipi_selected(u_int32_t cpus, u_int ipi)
2731{
2732
2733	CTR3(KTR_SMP, "%s: cpus: %x ipi: %x", __func__, cpus, ipi);
2734	selected_apic_ipi(cpus, ipi, APIC_DELMODE_FIXED);
2735}
2736
2737/*
2738 * send an IPI INTerrupt containing 'vector' to all CPUs, including myself
2739 */
2740void
2741ipi_all(u_int ipi)
2742{
2743
2744	CTR2(KTR_SMP, "%s: ipi: %x", __func__, ipi);
2745	apic_ipi(APIC_DEST_ALLISELF, ipi, APIC_DELMODE_FIXED);
2746}
2747
2748/*
2749 * send an IPI to all CPUs EXCEPT myself
2750 */
2751void
2752ipi_all_but_self(u_int ipi)
2753{
2754
2755	CTR2(KTR_SMP, "%s: ipi: %x", __func__, ipi);
2756	apic_ipi(APIC_DEST_ALLESELF, ipi, APIC_DELMODE_FIXED);
2757}
2758
2759/*
2760 * send an IPI to myself
2761 */
2762void
2763ipi_self(u_int ipi)
2764{
2765
2766	CTR2(KTR_SMP, "%s: ipi: %x", __func__, ipi);
2767	apic_ipi(APIC_DEST_SELF, ipi, APIC_DELMODE_FIXED);
2768}
2769
2770static void
2771release_aps(void *dummy __unused)
2772{
2773
2774	if (mp_ncpus == 1)
2775		return;
2776	mtx_lock_spin(&sched_lock);
2777	atomic_store_rel_int(&aps_ready, 1);
2778	while (smp_started == 0)
2779		ia32_pause();
2780	mtx_unlock_spin(&sched_lock);
2781}
2782
2783SYSINIT(start_aps, SI_SUB_SMP, SI_ORDER_FIRST, release_aps, NULL);
2784
2785static int	hlt_cpus_mask;
2786static int	hlt_logical_cpus = 1;
2787static struct	sysctl_ctx_list logical_cpu_clist;
2788
2789static int
2790sysctl_hlt_cpus(SYSCTL_HANDLER_ARGS)
2791{
2792	u_int mask;
2793	int error;
2794
2795	mask = hlt_cpus_mask;
2796	error = sysctl_handle_int(oidp, &mask, 0, req);
2797	if (error || !req->newptr)
2798		return (error);
2799
2800	if (logical_cpus_mask != 0 &&
2801	    (mask & logical_cpus_mask) == logical_cpus_mask)
2802		hlt_logical_cpus = 1;
2803	else
2804		hlt_logical_cpus = 0;
2805
2806	if ((mask & all_cpus) == all_cpus)
2807		mask &= ~(1<<0);
2808	hlt_cpus_mask = mask;
2809	return (error);
2810}
2811SYSCTL_PROC(_machdep, OID_AUTO, hlt_cpus, CTLTYPE_INT|CTLFLAG_RW,
2812    0, 0, sysctl_hlt_cpus, "IU", "");
2813
2814static int
2815sysctl_hlt_logical_cpus(SYSCTL_HANDLER_ARGS)
2816{
2817	int disable, error;
2818
2819	disable = hlt_logical_cpus;
2820	error = sysctl_handle_int(oidp, &disable, 0, req);
2821	if (error || !req->newptr)
2822		return (error);
2823
2824	if (disable)
2825		hlt_cpus_mask |= logical_cpus_mask;
2826	else
2827		hlt_cpus_mask &= ~logical_cpus_mask;
2828
2829	if ((hlt_cpus_mask & all_cpus) == all_cpus)
2830		hlt_cpus_mask &= ~(1<<0);
2831
2832	hlt_logical_cpus = disable;
2833	return (error);
2834}
2835
2836static void
2837cpu_hlt_setup(void *dummy __unused)
2838{
2839
2840	if (logical_cpus_mask != 0) {
2841		TUNABLE_INT_FETCH("machdep.hlt_logical_cpus",
2842		    &hlt_logical_cpus);
2843		sysctl_ctx_init(&logical_cpu_clist);
2844		SYSCTL_ADD_PROC(&logical_cpu_clist,
2845		    SYSCTL_STATIC_CHILDREN(_machdep), OID_AUTO,
2846		    "hlt_logical_cpus", CTLTYPE_INT|CTLFLAG_RW, 0, 0,
2847		    sysctl_hlt_logical_cpus, "IU", "");
2848		SYSCTL_ADD_UINT(&logical_cpu_clist,
2849		    SYSCTL_STATIC_CHILDREN(_machdep), OID_AUTO,
2850		    "logical_cpus_mask", CTLTYPE_INT|CTLFLAG_RD,
2851		    &logical_cpus_mask, 0, "");
2852
2853		if (hlt_logical_cpus)
2854			hlt_cpus_mask |= logical_cpus_mask;
2855	}
2856}
2857SYSINIT(cpu_hlt, SI_SUB_SMP, SI_ORDER_ANY, cpu_hlt_setup, NULL);
2858
2859int
2860mp_grab_cpu_hlt(void)
2861{
2862	u_int mask = PCPU_GET(cpumask);
2863	int retval;
2864
2865	retval = mask & hlt_cpus_mask;
2866	while (mask & hlt_cpus_mask)
2867		__asm __volatile("sti; hlt" : : : "memory");
2868	return (retval);
2869}
2870