mptable.c revision 35077
1/*
2 * Copyright (c) 1996, by Steve Passe
3 * All rights reserved.
4 *
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions
7 * are met:
8 * 1. Redistributions of source code must retain the above copyright
9 *    notice, this list of conditions and the following disclaimer.
10 * 2. The name of the developer may NOT be used to endorse or promote products
11 *    derived from this software without specific prior written permission.
12 *
13 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
14 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
15 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
16 * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
17 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
18 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
19 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
20 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
21 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
22 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
23 * SUCH DAMAGE.
24 *
25 *	$Id: mp_machdep.c,v 1.72 1998/04/06 08:25:30 phk Exp $
26 */
27
28#include "opt_smp.h"
29#include "opt_vm86.h"
30#include "opt_cpu.h"
31
32#ifdef SMP
33#include <machine/smptests.h>
34#else
35#error
36#endif
37
38#include <sys/param.h>
39#include <sys/systm.h>
40#include <sys/kernel.h>
41#include <sys/proc.h>
42#include <sys/sysctl.h>
43#ifdef BETTER_CLOCK
44#include <sys/dkstat.h>
45#endif
46
47#include <vm/vm.h>
48#include <vm/vm_param.h>
49#include <vm/pmap.h>
50#include <vm/vm_kern.h>
51#include <vm/vm_extern.h>
52#ifdef BETTER_CLOCK
53#include <sys/lock.h>
54#include <vm/vm_map.h>
55#include <sys/user.h>
56#ifdef GPROF
57#include <sys/gmon.h>
58#endif
59#endif
60
61#include <machine/smp.h>
62#include <machine/apic.h>
63#include <machine/mpapic.h>
64#include <machine/segments.h>
65#include <machine/smptests.h>	/** TEST_DEFAULT_CONFIG, TEST_TEST1 */
66#include <machine/tss.h>
67#include <machine/specialreg.h>
68#include <machine/cputypes.h>
69#include <machine/globaldata.h>
70
71#include <i386/i386/cons.h>	/* cngetc() */
72
73#if defined(APIC_IO)
74#include <machine/md_var.h>		/* setidt() */
75#include <i386/isa/icu.h>		/* IPIs */
76#include <i386/isa/intr_machdep.h>	/* IPIs */
77#endif	/* APIC_IO */
78
79#if defined(TEST_DEFAULT_CONFIG)
80#define MPFPS_MPFB1	TEST_DEFAULT_CONFIG
81#else
82#define MPFPS_MPFB1	mpfps->mpfb1
83#endif  /* TEST_DEFAULT_CONFIG */
84
85#define WARMBOOT_TARGET		0
86#define WARMBOOT_OFF		(KERNBASE + 0x0467)
87#define WARMBOOT_SEG		(KERNBASE + 0x0469)
88
89#define BIOS_BASE		(0xf0000)
90#define BIOS_SIZE		(0x10000)
91#define BIOS_COUNT		(BIOS_SIZE/4)
92
93#define CMOS_REG		(0x70)
94#define CMOS_DATA		(0x71)
95#define BIOS_RESET		(0x0f)
96#define BIOS_WARM		(0x0a)
97
98#define PROCENTRY_FLAG_EN	0x01
99#define PROCENTRY_FLAG_BP	0x02
100#define IOAPICENTRY_FLAG_EN	0x01
101
102
103/* MP Floating Pointer Structure */
104typedef struct MPFPS {
105	char    signature[4];
106	void   *pap;
107	u_char  length;
108	u_char  spec_rev;
109	u_char  checksum;
110	u_char  mpfb1;
111	u_char  mpfb2;
112	u_char  mpfb3;
113	u_char  mpfb4;
114	u_char  mpfb5;
115}      *mpfps_t;
116
117/* MP Configuration Table Header */
118typedef struct MPCTH {
119	char    signature[4];
120	u_short base_table_length;
121	u_char  spec_rev;
122	u_char  checksum;
123	u_char  oem_id[8];
124	u_char  product_id[12];
125	void   *oem_table_pointer;
126	u_short oem_table_size;
127	u_short entry_count;
128	void   *apic_address;
129	u_short extended_table_length;
130	u_char  extended_table_checksum;
131	u_char  reserved;
132}      *mpcth_t;
133
134
135typedef struct PROCENTRY {
136	u_char  type;
137	u_char  apic_id;
138	u_char  apic_version;
139	u_char  cpu_flags;
140	u_long  cpu_signature;
141	u_long  feature_flags;
142	u_long  reserved1;
143	u_long  reserved2;
144}      *proc_entry_ptr;
145
146typedef struct BUSENTRY {
147	u_char  type;
148	u_char  bus_id;
149	char    bus_type[6];
150}      *bus_entry_ptr;
151
152typedef struct IOAPICENTRY {
153	u_char  type;
154	u_char  apic_id;
155	u_char  apic_version;
156	u_char  apic_flags;
157	void   *apic_address;
158}      *io_apic_entry_ptr;
159
160typedef struct INTENTRY {
161	u_char  type;
162	u_char  int_type;
163	u_short int_flags;
164	u_char  src_bus_id;
165	u_char  src_bus_irq;
166	u_char  dst_apic_id;
167	u_char  dst_apic_int;
168}      *int_entry_ptr;
169
170/* descriptions of MP basetable entries */
171typedef struct BASETABLE_ENTRY {
172	u_char  type;
173	u_char  length;
174	char    name[16];
175}       basetable_entry;
176
177/*
178 * this code MUST be enabled here and in mpboot.s.
179 * it follows the very early stages of AP boot by placing values in CMOS ram.
180 * it NORMALLY will never be needed and thus the primitive method for enabling.
181 *
182#define CHECK_POINTS
183 */
184
185#if defined(CHECK_POINTS)
186#define CHECK_READ(A)	 (outb(CMOS_REG, (A)), inb(CMOS_DATA))
187#define CHECK_WRITE(A,D) (outb(CMOS_REG, (A)), outb(CMOS_DATA, (D)))
188
189#define CHECK_INIT(D);				\
190	CHECK_WRITE(0x34, (D));			\
191	CHECK_WRITE(0x35, (D));			\
192	CHECK_WRITE(0x36, (D));			\
193	CHECK_WRITE(0x37, (D));			\
194	CHECK_WRITE(0x38, (D));			\
195	CHECK_WRITE(0x39, (D));
196
197#define CHECK_PRINT(S);				\
198	printf("%s: %d, %d, %d, %d, %d, %d\n",	\
199	   (S),					\
200	   CHECK_READ(0x34),			\
201	   CHECK_READ(0x35),			\
202	   CHECK_READ(0x36),			\
203	   CHECK_READ(0x37),			\
204	   CHECK_READ(0x38),			\
205	   CHECK_READ(0x39));
206
207#else				/* CHECK_POINTS */
208
209#define CHECK_INIT(D)
210#define CHECK_PRINT(S)
211
212#endif				/* CHECK_POINTS */
213
214/*
215 * Values to send to the POST hardware.
216 */
217#define MP_BOOTADDRESS_POST	0x10
218#define MP_PROBE_POST		0x11
219#define MPTABLE_PASS1_POST	0x12
220
221#define MP_START_POST		0x13
222#define MP_ENABLE_POST		0x14
223#define MPTABLE_PASS2_POST	0x15
224
225#define START_ALL_APS_POST	0x16
226#define INSTALL_AP_TRAMP_POST	0x17
227#define START_AP_POST		0x18
228
229#define MP_ANNOUNCE_POST	0x19
230
231
232/** XXX FIXME: where does this really belong, isa.h/isa.c perhaps? */
233int	current_postcode;
234
235/** XXX FIXME: what system files declare these??? */
236extern struct region_descriptor r_gdt, r_idt;
237
238int	bsp_apic_ready = 0;	/* flags useability of BSP apic */
239int	mp_ncpus;		/* # of CPUs, including BSP */
240int	mp_naps;		/* # of Applications processors */
241int	mp_nbusses;		/* # of busses */
242int	mp_napics;		/* # of IO APICs */
243int	boot_cpu_id;		/* designated BSP */
244vm_offset_t cpu_apic_address;
245vm_offset_t io_apic_address[NAPICID];	/* NAPICID is more than enough */
246extern	int nkpt;
247
248u_int32_t cpu_apic_versions[NCPU];
249u_int32_t io_apic_versions[NAPIC];
250
251#ifdef APIC_INTR_DIAGNOSTIC
252int apic_itrace_enter[32];
253int apic_itrace_tryisrlock[32];
254int apic_itrace_gotisrlock[32];
255int apic_itrace_active[32];
256int apic_itrace_masked[32];
257int apic_itrace_noisrlock[32];
258int apic_itrace_masked2[32];
259int apic_itrace_unmask[32];
260int apic_itrace_noforward[32];
261int apic_itrace_leave[32];
262int apic_itrace_enter2[32];
263int apic_itrace_doreti[32];
264int apic_itrace_splz[32];
265int apic_itrace_eoi[32];
266#ifdef APIC_INTR_DIAGNOSTIC_IRQ
267unsigned short apic_itrace_debugbuffer[32768];
268int apic_itrace_debugbuffer_idx;
269struct simplelock apic_itrace_debuglock;
270#endif
271#endif
272
273#ifdef APIC_INTR_REORDER
274struct {
275	volatile int *location;
276	int bit;
277} apic_isrbit_location[32];
278#endif
279
280/*
281 * APIC ID logical/physical mapping structures.
282 * We oversize these to simplify boot-time config.
283 */
284int     cpu_num_to_apic_id[NAPICID];
285int     io_num_to_apic_id[NAPICID];
286int     apic_id_to_logical[NAPICID];
287
288
289#define NPPROVMTRR		8
290#define PPRO_VMTRRphysBase0	0x200
291#define PPRO_VMTRRphysMask0	0x201
292static struct {
293	u_int64_t base, mask;
294} PPro_vmtrr[NPPROVMTRR];
295
296/* Bitmap of all available CPUs */
297u_int	all_cpus;
298
299/* AP uses this PTD during bootstrap.  Do not staticize.  */
300pd_entry_t *bootPTD;
301
302/* Hotwire a 0->4MB V==P mapping */
303extern pt_entry_t *KPTphys;
304
305/* Virtual address of per-cpu common_tss */
306extern struct i386tss common_tss;
307#ifdef VM86
308extern struct segment_descriptor common_tssd;
309extern u_int private_tss;		/* flag indicating private tss */
310extern u_int my_tr;
311#endif /* VM86 */
312
313/* IdlePTD per cpu */
314pd_entry_t *IdlePTDS[NCPU];
315
316/* "my" private page table page, for BSP init */
317extern pt_entry_t SMP_prvpt[];
318
319/* Private page pointer to curcpu's PTD, used during BSP init */
320extern pd_entry_t *my_idlePTD;
321
322static int smp_started;		/* has the system started? */
323
324/*
325 * Local data and functions.
326 */
327
328static int	mp_capable;
329static u_int	boot_address;
330static u_int	base_memory;
331
332static int	picmode;		/* 0: virtual wire mode, 1: PIC mode */
333static mpfps_t	mpfps;
334static int	search_for_sig(u_int32_t target, int count);
335static void	mp_enable(u_int boot_addr);
336
337static int	mptable_pass1(void);
338static int	mptable_pass2(void);
339static void	default_mp_table(int type);
340static void	fix_mp_table(void);
341static void	init_locks(void);
342static int	start_all_aps(u_int boot_addr);
343static void	install_ap_tramp(u_int boot_addr);
344static int	start_ap(int logicalCpu, u_int boot_addr);
345static void	getmtrr(void);
346static void	putmtrr(void);
347static void	putfmtrr(void);
348
349
350/*
351 * Calculate usable address in base memory for AP trampoline code.
352 */
353u_int
354mp_bootaddress(u_int basemem)
355{
356	POSTCODE(MP_BOOTADDRESS_POST);
357
358	base_memory = basemem * 1024;	/* convert to bytes */
359
360	boot_address = base_memory & ~0xfff;	/* round down to 4k boundary */
361	if ((base_memory - boot_address) < bootMP_size)
362		boot_address -= 4096;	/* not enough, lower by 4k */
363
364	return boot_address;
365}
366
367
368/*
369 * Look for an Intel MP spec table (ie, SMP capable hardware).
370 */
371int
372mp_probe(void)
373{
374	int     x;
375	u_long  segment;
376	u_int32_t target;
377
378	POSTCODE(MP_PROBE_POST);
379
380	/* see if EBDA exists */
381	if (segment = (u_long) * (u_short *) (KERNBASE + 0x40e)) {
382		/* search first 1K of EBDA */
383		target = (u_int32_t) (segment << 4);
384		if ((x = search_for_sig(target, 1024 / 4)) >= 0)
385			goto found;
386	} else {
387		/* last 1K of base memory, effective 'top of base' passed in */
388		target = (u_int32_t) (base_memory - 0x400);
389		if ((x = search_for_sig(target, 1024 / 4)) >= 0)
390			goto found;
391	}
392
393	/* search the BIOS */
394	target = (u_int32_t) BIOS_BASE;
395	if ((x = search_for_sig(target, BIOS_COUNT)) >= 0)
396		goto found;
397
398	/* nothing found */
399	mpfps = (mpfps_t)0;
400	mp_capable = 0;
401	return 0;
402
403found:
404	/* calculate needed resources */
405	mpfps = (mpfps_t)x;
406	if (mptable_pass1())
407		panic("you must reconfigure your kernel");
408
409	/* flag fact that we are running multiple processors */
410	mp_capable = 1;
411	return 1;
412}
413
414
415/*
416 * Startup the SMP processors.
417 */
418void
419mp_start(void)
420{
421	POSTCODE(MP_START_POST);
422
423	/* look for MP capable motherboard */
424	if (mp_capable)
425		mp_enable(boot_address);
426	else
427		panic("MP hardware not found!");
428}
429
430
431/*
432 * Print various information about the SMP system hardware and setup.
433 */
434void
435mp_announce(void)
436{
437	int     x;
438
439	POSTCODE(MP_ANNOUNCE_POST);
440
441	printf("FreeBSD/SMP: Multiprocessor motherboard\n");
442	printf(" cpu0 (BSP): apic id: %2d", CPU_TO_ID(0));
443	printf(", version: 0x%08x", cpu_apic_versions[0]);
444	printf(", at 0x%08x\n", cpu_apic_address);
445	for (x = 1; x <= mp_naps; ++x) {
446		printf(" cpu%d (AP):  apic id: %2d", x, CPU_TO_ID(x));
447		printf(", version: 0x%08x", cpu_apic_versions[x]);
448		printf(", at 0x%08x\n", cpu_apic_address);
449	}
450
451#if defined(APIC_IO)
452	for (x = 0; x < mp_napics; ++x) {
453		printf(" io%d (APIC): apic id: %2d", x, IO_TO_ID(x));
454		printf(", version: 0x%08x", io_apic_versions[x]);
455		printf(", at 0x%08x\n", io_apic_address[x]);
456	}
457#else
458	printf(" Warning: APIC I/O disabled\n");
459#endif	/* APIC_IO */
460}
461
462/*
463 * AP cpu's call this to sync up protected mode.
464 */
465void
466init_secondary(void)
467{
468	int	gsel_tss;
469#ifndef VM86
470	u_int	my_tr;
471#endif
472
473	r_gdt.rd_limit = sizeof(gdt[0]) * (NGDT + NCPU) - 1;
474	r_gdt.rd_base = (int) gdt;
475	lgdt(&r_gdt);			/* does magic intra-segment return */
476	lidt(&r_idt);
477	lldt(_default_ldt);
478
479	my_tr = NGDT + cpuid;
480	gsel_tss = GSEL(my_tr, SEL_KPL);
481	gdt[my_tr].sd.sd_type = SDT_SYS386TSS;
482	common_tss.tss_esp0 = 0;	/* not used until after switch */
483	common_tss.tss_ss0 = GSEL(GDATA_SEL, SEL_KPL);
484	common_tss.tss_ioopt = (sizeof common_tss) << 16;
485#ifdef VM86
486	common_tssd = gdt[my_tr].sd;
487	private_tss = 0;
488#endif /* VM86 */
489	ltr(gsel_tss);
490
491	load_cr0(0x8005003b);		/* XXX! */
492
493	PTD[0] = 0;
494	pmap_set_opt((unsigned *)PTD);
495
496	putmtrr();
497	putfmtrr();
498
499	invltlb();
500}
501
502
503#if defined(APIC_IO)
504/*
505 * Final configuration of the BSP's local APIC:
506 *  - disable 'pic mode'.
507 *  - disable 'virtual wire mode'.
508 *  - enable NMI.
509 */
510void
511bsp_apic_configure(void)
512{
513	u_char		byte;
514	u_int32_t	temp;
515
516	/* leave 'pic mode' if necessary */
517	if (picmode) {
518		outb(0x22, 0x70);	/* select IMCR */
519		byte = inb(0x23);	/* current contents */
520		byte |= 0x01;		/* mask external INTR */
521		outb(0x23, byte);	/* disconnect 8259s/NMI */
522	}
523
524	/* mask lint0 (the 8259 'virtual wire' connection) */
525	temp = lapic.lvt_lint0;
526	temp |= APIC_LVT_M;		/* set the mask */
527	lapic.lvt_lint0 = temp;
528
529        /* setup lint1 to handle NMI */
530        temp = lapic.lvt_lint1;
531        temp &= ~APIC_LVT_M;		/* clear the mask */
532        lapic.lvt_lint1 = temp;
533
534	if (bootverbose)
535		apic_dump("bsp_apic_configure()");
536}
537#endif  /* APIC_IO */
538
539
540/*******************************************************************
541 * local functions and data
542 */
543
544/*
545 * start the SMP system
546 */
547static void
548mp_enable(u_int boot_addr)
549{
550	int     x;
551#if defined(APIC_IO)
552	int     apic;
553	u_int   ux;
554#endif	/* APIC_IO */
555
556	getmtrr();
557	putfmtrr();
558
559	POSTCODE(MP_ENABLE_POST);
560
561	/* turn on 4MB of V == P addressing so we can get to MP table */
562	*(int *)PTD = PG_V | PG_RW | ((u_long)KPTphys & PG_FRAME);
563	invltlb();
564
565	/* examine the MP table for needed info, uses physical addresses */
566	x = mptable_pass2();
567
568	*(int *)PTD = 0;
569	invltlb();
570
571	/* can't process default configs till the CPU APIC is pmapped */
572	if (x)
573		default_mp_table(x);
574
575	/* post scan cleanup */
576	fix_mp_table();
577
578#if defined(APIC_IO)
579
580	/* fill the LOGICAL io_apic_versions table */
581	for (apic = 0; apic < mp_napics; ++apic) {
582		ux = io_apic_read(apic, IOAPIC_VER);
583		io_apic_versions[apic] = ux;
584	}
585
586	/* program each IO APIC in the system */
587	for (apic = 0; apic < mp_napics; ++apic)
588		if (io_apic_setup(apic) < 0)
589			panic("IO APIC setup failure");
590
591	/* install a 'Spurious INTerrupt' vector */
592	setidt(XSPURIOUSINT_OFFSET, Xspuriousint,
593	       SDT_SYS386IGT, SEL_KPL, GSEL(GCODE_SEL, SEL_KPL));
594
595	/* install an inter-CPU IPI for TLB invalidation */
596	setidt(XINVLTLB_OFFSET, Xinvltlb,
597	       SDT_SYS386IGT, SEL_KPL, GSEL(GCODE_SEL, SEL_KPL));
598
599#ifdef BETTER_CLOCK
600	/* install an inter-CPU IPI for reading processor state */
601	setidt(XCPUCHECKSTATE_OFFSET, Xcpucheckstate,
602	       SDT_SYS386IGT, SEL_KPL, GSEL(GCODE_SEL, SEL_KPL));
603#endif
604
605	/* install an inter-CPU IPI for forcing an additional software trap */
606	setidt(XCPUAST_OFFSET, Xcpuast,
607	       SDT_SYS386IGT, SEL_KPL, GSEL(GCODE_SEL, SEL_KPL));
608
609	/* install an inter-CPU IPI for interrupt forwarding */
610	setidt(XFORWARD_IRQ_OFFSET, Xforward_irq,
611	       SDT_SYS386IGT, SEL_KPL, GSEL(GCODE_SEL, SEL_KPL));
612
613	/* install an inter-CPU IPI for CPU stop/restart */
614	setidt(XCPUSTOP_OFFSET, Xcpustop,
615	       SDT_SYS386IGT, SEL_KPL, GSEL(GCODE_SEL, SEL_KPL));
616
617#if defined(TEST_TEST1)
618	/* install a "fake hardware INTerrupt" vector */
619	setidt(XTEST1_OFFSET, Xtest1,
620	       SDT_SYS386IGT, SEL_KPL, GSEL(GCODE_SEL, SEL_KPL));
621#endif  /** TEST_TEST1 */
622
623#endif	/* APIC_IO */
624
625	/* initialize all SMP locks */
626	init_locks();
627
628	/* start each Application Processor */
629	start_all_aps(boot_addr);
630
631	/*
632	 * The init process might be started on a different CPU now,
633	 * and the boot CPU might not call prepare_usermode to get
634	 * cr0 correctly configured. Thus we initialize cr0 here.
635	 */
636	load_cr0(rcr0() | CR0_WP | CR0_AM);
637}
638
639
640/*
641 * look for the MP spec signature
642 */
643
644/* string defined by the Intel MP Spec as identifying the MP table */
645#define MP_SIG		0x5f504d5f	/* _MP_ */
646#define NEXT(X)		((X) += 4)
647static int
648search_for_sig(u_int32_t target, int count)
649{
650	int     x;
651	u_int32_t *addr = (u_int32_t *) (KERNBASE + target);
652
653	for (x = 0; x < count; NEXT(x))
654		if (addr[x] == MP_SIG)
655			/* make array index a byte index */
656			return (target + (x * sizeof(u_int32_t)));
657
658	return -1;
659}
660
661
662static basetable_entry basetable_entry_types[] =
663{
664	{0, 20, "Processor"},
665	{1, 8, "Bus"},
666	{2, 8, "I/O APIC"},
667	{3, 8, "I/O INT"},
668	{4, 8, "Local INT"}
669};
670
671typedef struct BUSDATA {
672	u_char  bus_id;
673	enum busTypes bus_type;
674}       bus_datum;
675
676typedef struct INTDATA {
677	u_char  int_type;
678	u_short int_flags;
679	u_char  src_bus_id;
680	u_char  src_bus_irq;
681	u_char  dst_apic_id;
682	u_char  dst_apic_int;
683}       io_int, local_int;
684
685typedef struct BUSTYPENAME {
686	u_char  type;
687	char    name[7];
688}       bus_type_name;
689
690static bus_type_name bus_type_table[] =
691{
692	{CBUS, "CBUS"},
693	{CBUSII, "CBUSII"},
694	{EISA, "EISA"},
695	{UNKNOWN_BUSTYPE, "---"},
696	{UNKNOWN_BUSTYPE, "---"},
697	{ISA, "ISA"},
698	{UNKNOWN_BUSTYPE, "---"},
699	{UNKNOWN_BUSTYPE, "---"},
700	{UNKNOWN_BUSTYPE, "---"},
701	{UNKNOWN_BUSTYPE, "---"},
702	{UNKNOWN_BUSTYPE, "---"},
703	{UNKNOWN_BUSTYPE, "---"},
704	{PCI, "PCI"},
705	{UNKNOWN_BUSTYPE, "---"},
706	{UNKNOWN_BUSTYPE, "---"},
707	{UNKNOWN_BUSTYPE, "---"},
708	{UNKNOWN_BUSTYPE, "---"},
709	{XPRESS, "XPRESS"},
710	{UNKNOWN_BUSTYPE, "---"}
711};
712/* from MP spec v1.4, table 5-1 */
713static int default_data[7][5] =
714{
715/*   nbus, id0, type0, id1, type1 */
716	{1, 0, ISA, 255, 255},
717	{1, 0, EISA, 255, 255},
718	{1, 0, EISA, 255, 255},
719	{0, 255, 255, 255, 255},/* MCA not supported */
720	{2, 0, ISA, 1, PCI},
721	{2, 0, EISA, 1, PCI},
722	{0, 255, 255, 255, 255}	/* MCA not supported */
723};
724
725
726/* the bus data */
727static bus_datum bus_data[NBUS];
728
729/* the IO INT data, one entry per possible APIC INTerrupt */
730static io_int  io_apic_ints[NINTR];
731
732static int nintrs;
733
734static int processor_entry	__P((proc_entry_ptr entry, int cpu));
735static int bus_entry		__P((bus_entry_ptr entry, int bus));
736static int io_apic_entry	__P((io_apic_entry_ptr entry, int apic));
737static int int_entry		__P((int_entry_ptr entry, int intr));
738static int lookup_bus_type	__P((char *name));
739
740
741/*
742 * 1st pass on motherboard's Intel MP specification table.
743 *
744 * initializes:
745 *	mp_ncpus = 1
746 *
747 * determines:
748 *	cpu_apic_address (common to all CPUs)
749 *	io_apic_address[N]
750 *	mp_naps
751 *	mp_nbusses
752 *	mp_napics
753 *	nintrs
754 */
755static int
756mptable_pass1(void)
757{
758	int	x;
759	mpcth_t	cth;
760	int	totalSize;
761	void*	position;
762	int	count;
763	int	type;
764	int	mustpanic;
765
766	POSTCODE(MPTABLE_PASS1_POST);
767
768	mustpanic = 0;
769
770	/* clear various tables */
771	for (x = 0; x < NAPICID; ++x) {
772		io_apic_address[x] = ~0;	/* IO APIC address table */
773	}
774
775	/* init everything to empty */
776	mp_naps = 0;
777	mp_nbusses = 0;
778	mp_napics = 0;
779	nintrs = 0;
780
781	/* check for use of 'default' configuration */
782	if (MPFPS_MPFB1 != 0) {
783		/* use default addresses */
784		cpu_apic_address = DEFAULT_APIC_BASE;
785		io_apic_address[0] = DEFAULT_IO_APIC_BASE;
786
787		/* fill in with defaults */
788		mp_naps = 2;		/* includes BSP */
789		mp_nbusses = default_data[MPFPS_MPFB1 - 1][0];
790#if defined(APIC_IO)
791		mp_napics = 1;
792		nintrs = 16;
793#endif	/* APIC_IO */
794	}
795	else {
796		if ((cth = mpfps->pap) == 0)
797			panic("MP Configuration Table Header MISSING!");
798
799		cpu_apic_address = (vm_offset_t) cth->apic_address;
800
801		/* walk the table, recording info of interest */
802		totalSize = cth->base_table_length - sizeof(struct MPCTH);
803		position = (u_char *) cth + sizeof(struct MPCTH);
804		count = cth->entry_count;
805
806		while (count--) {
807			switch (type = *(u_char *) position) {
808			case 0: /* processor_entry */
809				if (((proc_entry_ptr)position)->cpu_flags
810					& PROCENTRY_FLAG_EN)
811					++mp_naps;
812				break;
813			case 1: /* bus_entry */
814				++mp_nbusses;
815				break;
816			case 2: /* io_apic_entry */
817				if (((io_apic_entry_ptr)position)->apic_flags
818					& IOAPICENTRY_FLAG_EN)
819					io_apic_address[mp_napics++] =
820					    (vm_offset_t)((io_apic_entry_ptr)
821						position)->apic_address;
822				break;
823			case 3: /* int_entry */
824				++nintrs;
825				break;
826			case 4:	/* int_entry */
827				break;
828			default:
829				panic("mpfps Base Table HOSED!");
830				/* NOTREACHED */
831			}
832
833			totalSize -= basetable_entry_types[type].length;
834			(u_char*)position += basetable_entry_types[type].length;
835		}
836	}
837
838	/* qualify the numbers */
839	if (mp_naps > NCPU)
840#if 0 /* XXX FIXME: kern/4255 */
841		printf("Warning: only using %d of %d available CPUs!\n",
842			NCPU, mp_naps);
843#else
844	{
845		printf("NCPU cannot be different than actual CPU count.\n");
846		printf(" add 'options NCPU=%d' to your kernel config file,\n",
847			mp_naps);
848		printf(" then rerun config & rebuild your SMP kernel\n");
849		mustpanic = 1;
850	}
851#endif /* XXX FIXME: kern/4255 */
852	if (mp_nbusses > NBUS) {
853		printf("found %d busses, increase NBUS\n", mp_nbusses);
854		mustpanic = 1;
855	}
856	if (mp_napics > NAPIC) {
857		printf("found %d apics, increase NAPIC\n", mp_napics);
858		mustpanic = 1;
859	}
860	if (nintrs > NINTR) {
861		printf("found %d intrs, increase NINTR\n", nintrs);
862		mustpanic = 1;
863	}
864
865	/*
866	 * Count the BSP.
867	 * This is also used as a counter while starting the APs.
868	 */
869	mp_ncpus = 1;
870
871	--mp_naps;	/* subtract the BSP */
872
873	return mustpanic;
874}
875
876
877/*
878 * 2nd pass on motherboard's Intel MP specification table.
879 *
880 * sets:
881 *	boot_cpu_id
882 *	ID_TO_IO(N), phy APIC ID to log CPU/IO table
883 *	CPU_TO_ID(N), logical CPU to APIC ID table
884 *	IO_TO_ID(N), logical IO to APIC ID table
885 *	bus_data[N]
886 *	io_apic_ints[N]
887 */
888static int
889mptable_pass2(void)
890{
891	int     x;
892	mpcth_t cth;
893	int     totalSize;
894	void*   position;
895	int     count;
896	int     type;
897	int     apic, bus, cpu, intr;
898
899	POSTCODE(MPTABLE_PASS2_POST);
900
901	/* clear various tables */
902	for (x = 0; x < NAPICID; ++x) {
903		ID_TO_IO(x) = -1;	/* phy APIC ID to log CPU/IO table */
904		CPU_TO_ID(x) = -1;	/* logical CPU to APIC ID table */
905		IO_TO_ID(x) = -1;	/* logical IO to APIC ID table */
906	}
907
908	/* clear bus data table */
909	for (x = 0; x < NBUS; ++x)
910		bus_data[x].bus_id = 0xff;
911
912	/* clear IO APIC INT table */
913	for (x = 0; x < NINTR; ++x)
914		io_apic_ints[x].int_type = 0xff;
915
916	/* setup the cpu/apic mapping arrays */
917	boot_cpu_id = -1;
918
919	/* record whether PIC or virtual-wire mode */
920	picmode = (mpfps->mpfb2 & 0x80) ? 1 : 0;
921
922	/* check for use of 'default' configuration */
923	if (MPFPS_MPFB1 != 0)
924		return MPFPS_MPFB1;	/* return default configuration type */
925
926	if ((cth = mpfps->pap) == 0)
927		panic("MP Configuration Table Header MISSING!");
928
929	/* walk the table, recording info of interest */
930	totalSize = cth->base_table_length - sizeof(struct MPCTH);
931	position = (u_char *) cth + sizeof(struct MPCTH);
932	count = cth->entry_count;
933	apic = bus = intr = 0;
934	cpu = 1;				/* pre-count the BSP */
935
936	while (count--) {
937		switch (type = *(u_char *) position) {
938		case 0:
939			if (processor_entry(position, cpu))
940				++cpu;
941			break;
942		case 1:
943			if (bus_entry(position, bus))
944				++bus;
945			break;
946		case 2:
947			if (io_apic_entry(position, apic))
948				++apic;
949			break;
950		case 3:
951			if (int_entry(position, intr))
952				++intr;
953			break;
954		case 4:
955			/* int_entry(position); */
956			break;
957		default:
958			panic("mpfps Base Table HOSED!");
959			/* NOTREACHED */
960		}
961
962		totalSize -= basetable_entry_types[type].length;
963		(u_char *) position += basetable_entry_types[type].length;
964	}
965
966	if (boot_cpu_id == -1)
967		panic("NO BSP found!");
968
969	/* report fact that its NOT a default configuration */
970	return 0;
971}
972
973
974/*
975 * parse an Intel MP specification table
976 */
977static void
978fix_mp_table(void)
979{
980	int	x;
981	int	id;
982	int	bus_0;
983	int	bus_pci;
984	int	num_pci_bus;
985
986	/*
987	 * Fix mis-numbering of the PCI bus and its INT entries if the BIOS
988	 * did it wrong.  The MP spec says that when more than 1 PCI bus
989	 * exists the BIOS must begin with bus entries for the PCI bus and use
990	 * actual PCI bus numbering.  This implies that when only 1 PCI bus
991	 * exists the BIOS can choose to ignore this ordering, and indeed many
992	 * MP motherboards do ignore it.  This causes a problem when the PCI
993	 * sub-system makes requests of the MP sub-system based on PCI bus
994	 * numbers.	So here we look for the situation and renumber the
995	 * busses and associated INTs in an effort to "make it right".
996	 */
997
998	/* find bus 0, PCI bus, count the number of PCI busses */
999	for (num_pci_bus = 0, x = 0; x < mp_nbusses; ++x) {
1000		if (bus_data[x].bus_id == 0) {
1001			bus_0 = x;
1002		}
1003		if (bus_data[x].bus_type == PCI) {
1004			++num_pci_bus;
1005			bus_pci = x;
1006		}
1007	}
1008	/*
1009	 * bus_0 == slot of bus with ID of 0
1010	 * bus_pci == slot of last PCI bus encountered
1011	 */
1012
1013	/* check the 1 PCI bus case for sanity */
1014	if (num_pci_bus == 1) {
1015
1016		/* if it is number 0 all is well */
1017		if (bus_data[bus_pci].bus_id == 0)
1018			return;
1019
1020		/* mis-numbered, swap with whichever bus uses slot 0 */
1021
1022		/* swap the bus entry types */
1023		bus_data[bus_pci].bus_type = bus_data[bus_0].bus_type;
1024		bus_data[bus_0].bus_type = PCI;
1025
1026		/* swap each relavant INTerrupt entry */
1027		id = bus_data[bus_pci].bus_id;
1028		for (x = 0; x < nintrs; ++x) {
1029			if (io_apic_ints[x].src_bus_id == id) {
1030				io_apic_ints[x].src_bus_id = 0;
1031			}
1032			else if (io_apic_ints[x].src_bus_id == 0) {
1033				io_apic_ints[x].src_bus_id = id;
1034			}
1035		}
1036	}
1037	/* sanity check if more than 1 PCI bus */
1038	else if (num_pci_bus > 1) {
1039		for (x = 0; x < mp_nbusses; ++x) {
1040			if (bus_data[x].bus_type != PCI)
1041				continue;
1042			if (bus_data[x].bus_id >= num_pci_bus)
1043				panic("bad PCI bus numbering");
1044		}
1045	}
1046}
1047
1048
1049static int
1050processor_entry(proc_entry_ptr entry, int cpu)
1051{
1052	/* check for usability */
1053	if ((cpu >= NCPU) || !(entry->cpu_flags & PROCENTRY_FLAG_EN))
1054		return 0;
1055
1056	/* check for BSP flag */
1057	if (entry->cpu_flags & PROCENTRY_FLAG_BP) {
1058		boot_cpu_id = entry->apic_id;
1059		CPU_TO_ID(0) = entry->apic_id;
1060		ID_TO_CPU(entry->apic_id) = 0;
1061		return 0;	/* its already been counted */
1062	}
1063
1064	/* add another AP to list, if less than max number of CPUs */
1065	else {
1066		CPU_TO_ID(cpu) = entry->apic_id;
1067		ID_TO_CPU(entry->apic_id) = cpu;
1068		return 1;
1069	}
1070}
1071
1072
1073static int
1074bus_entry(bus_entry_ptr entry, int bus)
1075{
1076	int     x;
1077	char    c, name[8];
1078
1079	/* encode the name into an index */
1080	for (x = 0; x < 6; ++x) {
1081		if ((c = entry->bus_type[x]) == ' ')
1082			break;
1083		name[x] = c;
1084	}
1085	name[x] = '\0';
1086
1087	if ((x = lookup_bus_type(name)) == UNKNOWN_BUSTYPE)
1088		panic("unknown bus type: '%s'", name);
1089
1090	bus_data[bus].bus_id = entry->bus_id;
1091	bus_data[bus].bus_type = x;
1092
1093	return 1;
1094}
1095
1096
1097static int
1098io_apic_entry(io_apic_entry_ptr entry, int apic)
1099{
1100	if (!(entry->apic_flags & IOAPICENTRY_FLAG_EN))
1101		return 0;
1102
1103	IO_TO_ID(apic) = entry->apic_id;
1104	ID_TO_IO(entry->apic_id) = apic;
1105
1106	return 1;
1107}
1108
1109
1110static int
1111lookup_bus_type(char *name)
1112{
1113	int     x;
1114
1115	for (x = 0; x < MAX_BUSTYPE; ++x)
1116		if (strcmp(bus_type_table[x].name, name) == 0)
1117			return bus_type_table[x].type;
1118
1119	return UNKNOWN_BUSTYPE;
1120}
1121
1122
1123static int
1124int_entry(int_entry_ptr entry, int intr)
1125{
1126	io_apic_ints[intr].int_type = entry->int_type;
1127	io_apic_ints[intr].int_flags = entry->int_flags;
1128	io_apic_ints[intr].src_bus_id = entry->src_bus_id;
1129	io_apic_ints[intr].src_bus_irq = entry->src_bus_irq;
1130	io_apic_ints[intr].dst_apic_id = entry->dst_apic_id;
1131	io_apic_ints[intr].dst_apic_int = entry->dst_apic_int;
1132
1133	return 1;
1134}
1135
1136
1137static int
1138apic_int_is_bus_type(int intr, int bus_type)
1139{
1140	int     bus;
1141
1142	for (bus = 0; bus < mp_nbusses; ++bus)
1143		if ((bus_data[bus].bus_id == io_apic_ints[intr].src_bus_id)
1144		    && ((int) bus_data[bus].bus_type == bus_type))
1145			return 1;
1146
1147	return 0;
1148}
1149
1150
1151/*
1152 * Given a traditional ISA INT mask, return an APIC mask.
1153 */
1154u_int
1155isa_apic_mask(u_int isa_mask)
1156{
1157	int isa_irq;
1158	int apic_pin;
1159
1160#if defined(SKIP_IRQ15_REDIRECT)
1161	if (isa_mask == (1 << 15)) {
1162		printf("skipping ISA IRQ15 redirect\n");
1163		return isa_mask;
1164	}
1165#endif  /* SKIP_IRQ15_REDIRECT */
1166
1167	isa_irq = ffs(isa_mask);		/* find its bit position */
1168	if (isa_irq == 0)			/* doesn't exist */
1169		return 0;
1170	--isa_irq;				/* make it zero based */
1171
1172	apic_pin = isa_apic_pin(isa_irq);	/* look for APIC connection */
1173	if (apic_pin == -1)
1174		return 0;
1175
1176	return (1 << apic_pin);			/* convert pin# to a mask */
1177}
1178
1179
1180/*
1181 * Determine which APIC pin an ISA/EISA INT is attached to.
1182 */
1183#define INTTYPE(I)	(io_apic_ints[(I)].int_type)
1184#define INTPIN(I)	(io_apic_ints[(I)].dst_apic_int)
1185
1186#define SRCBUSIRQ(I)	(io_apic_ints[(I)].src_bus_irq)
1187int
1188isa_apic_pin(int isa_irq)
1189{
1190	int     intr;
1191
1192	for (intr = 0; intr < nintrs; ++intr) {		/* check each record */
1193		if (INTTYPE(intr) == 0) {		/* standard INT */
1194			if (SRCBUSIRQ(intr) == isa_irq) {
1195				if (apic_int_is_bus_type(intr, ISA) ||
1196			            apic_int_is_bus_type(intr, EISA))
1197					return INTPIN(intr);	/* found */
1198			}
1199		}
1200	}
1201	return -1;					/* NOT found */
1202}
1203
1204
1205/*
1206 * Determine which APIC pin a PCI INT is attached to.
1207 */
1208#define SRCBUSID(I)	(io_apic_ints[(I)].src_bus_id)
1209#define SRCBUSDEVICE(I)	((io_apic_ints[(I)].src_bus_irq >> 2) & 0x1f)
1210#define SRCBUSLINE(I)	(io_apic_ints[(I)].src_bus_irq & 0x03)
1211int
1212pci_apic_pin(int pciBus, int pciDevice, int pciInt)
1213{
1214	int     intr;
1215
1216	--pciInt;					/* zero based */
1217
1218	for (intr = 0; intr < nintrs; ++intr)		/* check each record */
1219		if ((INTTYPE(intr) == 0)		/* standard INT */
1220		    && (SRCBUSID(intr) == pciBus)
1221		    && (SRCBUSDEVICE(intr) == pciDevice)
1222		    && (SRCBUSLINE(intr) == pciInt))	/* a candidate IRQ */
1223			if (apic_int_is_bus_type(intr, PCI))
1224				return INTPIN(intr);	/* exact match */
1225
1226	return -1;					/* NOT found */
1227}
1228
1229int
1230next_apic_pin(int pin)
1231{
1232	int intr, ointr;
1233	int bus, bustype;
1234
1235	bus = 0;
1236	bustype = 0;
1237	for (intr = 0; intr < nintrs; intr++) {
1238		if (INTPIN(intr) != pin || INTTYPE(intr) != 0)
1239			continue;
1240		bus = SRCBUSID(intr);
1241		bustype = apic_bus_type(bus);
1242		if (bustype != ISA &&
1243		    bustype != EISA &&
1244		    bustype != PCI)
1245			continue;
1246		break;
1247	}
1248	if (intr >= nintrs) {
1249		return -1;
1250	}
1251	for (ointr = intr + 1; ointr < nintrs; ointr++) {
1252		if (INTTYPE(ointr) != 0)
1253			continue;
1254		if (bus != SRCBUSID(ointr))
1255			continue;
1256		if (bustype == PCI) {
1257			if (SRCBUSDEVICE(intr) != SRCBUSDEVICE(ointr))
1258				continue;
1259			if (SRCBUSLINE(intr) != SRCBUSLINE(ointr))
1260				continue;
1261		}
1262		if (bustype == ISA || bustype == EISA) {
1263			if (SRCBUSIRQ(intr) != SRCBUSIRQ(ointr))
1264				continue;
1265		}
1266		if (INTPIN(intr) == INTPIN(ointr))
1267			continue;
1268		break;
1269	}
1270	if (ointr >= nintrs) {
1271		return -1;
1272	}
1273	return INTPIN(ointr);
1274}
1275#undef SRCBUSLINE
1276#undef SRCBUSDEVICE
1277#undef SRCBUSID
1278#undef SRCBUSIRQ
1279
1280#undef INTPIN
1281#undef INTTYPE
1282
1283
1284/*
1285 * Reprogram the MB chipset to NOT redirect an ISA INTerrupt.
1286 *
1287 * XXX FIXME:
1288 *  Exactly what this means is unclear at this point.  It is a solution
1289 *  for motherboards that redirect the MBIRQ0 pin.  Generically a motherboard
1290 *  could route any of the ISA INTs to upper (>15) IRQ values.  But most would
1291 *  NOT be redirected via MBIRQ0, thus "undirect()ing" them would NOT be an
1292 *  option.
1293 */
1294int
1295undirect_isa_irq(int rirq)
1296{
1297#if defined(READY)
1298	printf("Freeing redirected ISA irq %d.\n", rirq);
1299	/** FIXME: tickle the MB redirector chip */
1300	return ???;
1301#else
1302	printf("Freeing (NOT implemented) redirected ISA irq %d.\n", rirq);
1303	return 0;
1304#endif  /* READY */
1305}
1306
1307
1308/*
1309 * Reprogram the MB chipset to NOT redirect a PCI INTerrupt
1310 */
1311int
1312undirect_pci_irq(int rirq)
1313{
1314#if defined(READY)
1315	if (bootverbose)
1316		printf("Freeing redirected PCI irq %d.\n", rirq);
1317
1318	/** FIXME: tickle the MB redirector chip */
1319	return ???;
1320#else
1321	if (bootverbose)
1322		printf("Freeing (NOT implemented) redirected PCI irq %d.\n",
1323		       rirq);
1324	return 0;
1325#endif  /* READY */
1326}
1327
1328
1329/*
1330 * given a bus ID, return:
1331 *  the bus type if found
1332 *  -1 if NOT found
1333 */
1334int
1335apic_bus_type(int id)
1336{
1337	int     x;
1338
1339	for (x = 0; x < mp_nbusses; ++x)
1340		if (bus_data[x].bus_id == id)
1341			return bus_data[x].bus_type;
1342
1343	return -1;
1344}
1345
1346
1347/*
1348 * given a LOGICAL APIC# and pin#, return:
1349 *  the associated src bus ID if found
1350 *  -1 if NOT found
1351 */
1352int
1353apic_src_bus_id(int apic, int pin)
1354{
1355	int     x;
1356
1357	/* search each of the possible INTerrupt sources */
1358	for (x = 0; x < nintrs; ++x)
1359		if ((apic == ID_TO_IO(io_apic_ints[x].dst_apic_id)) &&
1360		    (pin == io_apic_ints[x].dst_apic_int))
1361			return (io_apic_ints[x].src_bus_id);
1362
1363	return -1;		/* NOT found */
1364}
1365
1366
1367/*
1368 * given a LOGICAL APIC# and pin#, return:
1369 *  the associated src bus IRQ if found
1370 *  -1 if NOT found
1371 */
1372int
1373apic_src_bus_irq(int apic, int pin)
1374{
1375	int     x;
1376
1377	for (x = 0; x < nintrs; x++)
1378		if ((apic == ID_TO_IO(io_apic_ints[x].dst_apic_id)) &&
1379		    (pin == io_apic_ints[x].dst_apic_int))
1380			return (io_apic_ints[x].src_bus_irq);
1381
1382	return -1;		/* NOT found */
1383}
1384
1385
1386/*
1387 * given a LOGICAL APIC# and pin#, return:
1388 *  the associated INTerrupt type if found
1389 *  -1 if NOT found
1390 */
1391int
1392apic_int_type(int apic, int pin)
1393{
1394	int     x;
1395
1396	/* search each of the possible INTerrupt sources */
1397	for (x = 0; x < nintrs; ++x)
1398		if ((apic == ID_TO_IO(io_apic_ints[x].dst_apic_id)) &&
1399		    (pin == io_apic_ints[x].dst_apic_int))
1400			return (io_apic_ints[x].int_type);
1401
1402	return -1;		/* NOT found */
1403}
1404
1405
1406/*
1407 * given a LOGICAL APIC# and pin#, return:
1408 *  the associated trigger mode if found
1409 *  -1 if NOT found
1410 */
1411int
1412apic_trigger(int apic, int pin)
1413{
1414	int     x;
1415
1416	/* search each of the possible INTerrupt sources */
1417	for (x = 0; x < nintrs; ++x)
1418		if ((apic == ID_TO_IO(io_apic_ints[x].dst_apic_id)) &&
1419		    (pin == io_apic_ints[x].dst_apic_int))
1420			return ((io_apic_ints[x].int_flags >> 2) & 0x03);
1421
1422	return -1;		/* NOT found */
1423}
1424
1425
1426/*
1427 * given a LOGICAL APIC# and pin#, return:
1428 *  the associated 'active' level if found
1429 *  -1 if NOT found
1430 */
1431int
1432apic_polarity(int apic, int pin)
1433{
1434	int     x;
1435
1436	/* search each of the possible INTerrupt sources */
1437	for (x = 0; x < nintrs; ++x)
1438		if ((apic == ID_TO_IO(io_apic_ints[x].dst_apic_id)) &&
1439		    (pin == io_apic_ints[x].dst_apic_int))
1440			return (io_apic_ints[x].int_flags & 0x03);
1441
1442	return -1;		/* NOT found */
1443}
1444
1445
1446/*
1447 * set data according to MP defaults
1448 * FIXME: probably not complete yet...
1449 */
1450static void
1451default_mp_table(int type)
1452{
1453	int     ap_cpu_id;
1454#if defined(APIC_IO)
1455	u_int32_t ux;
1456	int     io_apic_id;
1457	int     pin;
1458#endif	/* APIC_IO */
1459
1460#if 0
1461	printf("  MP default config type: %d\n", type);
1462	switch (type) {
1463	case 1:
1464		printf("   bus: ISA, APIC: 82489DX\n");
1465		break;
1466	case 2:
1467		printf("   bus: EISA, APIC: 82489DX\n");
1468		break;
1469	case 3:
1470		printf("   bus: EISA, APIC: 82489DX\n");
1471		break;
1472	case 4:
1473		printf("   bus: MCA, APIC: 82489DX\n");
1474		break;
1475	case 5:
1476		printf("   bus: ISA+PCI, APIC: Integrated\n");
1477		break;
1478	case 6:
1479		printf("   bus: EISA+PCI, APIC: Integrated\n");
1480		break;
1481	case 7:
1482		printf("   bus: MCA+PCI, APIC: Integrated\n");
1483		break;
1484	default:
1485		printf("   future type\n");
1486		break;
1487		/* NOTREACHED */
1488	}
1489#endif	/* 0 */
1490
1491	boot_cpu_id = (lapic.id & APIC_ID_MASK) >> 24;
1492	ap_cpu_id = (boot_cpu_id == 0) ? 1 : 0;
1493
1494	/* BSP */
1495	CPU_TO_ID(0) = boot_cpu_id;
1496	ID_TO_CPU(boot_cpu_id) = 0;
1497
1498	/* one and only AP */
1499	CPU_TO_ID(1) = ap_cpu_id;
1500	ID_TO_CPU(ap_cpu_id) = 1;
1501
1502#if defined(APIC_IO)
1503	/* one and only IO APIC */
1504	io_apic_id = (io_apic_read(0, IOAPIC_ID) & APIC_ID_MASK) >> 24;
1505
1506	/*
1507	 * sanity check, refer to MP spec section 3.6.6, last paragraph
1508	 * necessary as some hardware isn't properly setting up the IO APIC
1509	 */
1510#if defined(REALLY_ANAL_IOAPICID_VALUE)
1511	if (io_apic_id != 2) {
1512#else
1513	if ((io_apic_id == 0) || (io_apic_id == 1) || (io_apic_id == 15)) {
1514#endif	/* REALLY_ANAL_IOAPICID_VALUE */
1515		ux = io_apic_read(0, IOAPIC_ID);	/* get current contents */
1516		ux &= ~APIC_ID_MASK;	/* clear the ID field */
1517		ux |= 0x02000000;	/* set it to '2' */
1518		io_apic_write(0, IOAPIC_ID, ux);	/* write new value */
1519		ux = io_apic_read(0, IOAPIC_ID);	/* re-read && test */
1520		if ((ux & APIC_ID_MASK) != 0x02000000)
1521			panic("can't control IO APIC ID, reg: 0x%08x", ux);
1522		io_apic_id = 2;
1523	}
1524	IO_TO_ID(0) = io_apic_id;
1525	ID_TO_IO(io_apic_id) = 0;
1526#endif	/* APIC_IO */
1527
1528	/* fill out bus entries */
1529	switch (type) {
1530	case 1:
1531	case 2:
1532	case 3:
1533	case 5:
1534	case 6:
1535		bus_data[0].bus_id = default_data[type - 1][1];
1536		bus_data[0].bus_type = default_data[type - 1][2];
1537		bus_data[1].bus_id = default_data[type - 1][3];
1538		bus_data[1].bus_type = default_data[type - 1][4];
1539		break;
1540
1541	/* case 4: case 7:		   MCA NOT supported */
1542	default:		/* illegal/reserved */
1543		panic("BAD default MP config: %d", type);
1544		/* NOTREACHED */
1545	}
1546
1547#if defined(APIC_IO)
1548	/* general cases from MP v1.4, table 5-2 */
1549	for (pin = 0; pin < 16; ++pin) {
1550		io_apic_ints[pin].int_type = 0;
1551		io_apic_ints[pin].int_flags = 0x05;	/* edge/active-hi */
1552		io_apic_ints[pin].src_bus_id = 0;
1553		io_apic_ints[pin].src_bus_irq = pin;	/* IRQ2 caught below */
1554		io_apic_ints[pin].dst_apic_id = io_apic_id;
1555		io_apic_ints[pin].dst_apic_int = pin;	/* 1-to-1 */
1556	}
1557
1558	/* special cases from MP v1.4, table 5-2 */
1559	if (type == 2) {
1560		io_apic_ints[2].int_type = 0xff;	/* N/C */
1561		io_apic_ints[13].int_type = 0xff;	/* N/C */
1562#if !defined(APIC_MIXED_MODE)
1563		/** FIXME: ??? */
1564		panic("sorry, can't support type 2 default yet");
1565#endif	/* APIC_MIXED_MODE */
1566	}
1567	else
1568		io_apic_ints[2].src_bus_irq = 0;	/* ISA IRQ0 is on APIC INT 2 */
1569
1570	if (type == 7)
1571		io_apic_ints[0].int_type = 0xff;	/* N/C */
1572	else
1573		io_apic_ints[0].int_type = 3;	/* vectored 8259 */
1574#endif	/* APIC_IO */
1575}
1576
1577
1578/*
1579 * initialize all the SMP locks
1580 */
1581
1582/* critical region around IO APIC, apic_imen */
1583struct simplelock	imen_lock;
1584
1585/* critical region around splxx(), cpl, cml, cil, ipending */
1586struct simplelock	cpl_lock;
1587
1588/* Make FAST_INTR() routines sequential */
1589struct simplelock	fast_intr_lock;
1590
1591/* critical region around INTR() routines */
1592struct simplelock	intr_lock;
1593
1594/* lock regions protected in UP kernel via cli/sti */
1595struct simplelock	mpintr_lock;
1596
1597/* lock region used by kernel profiling */
1598struct simplelock	mcount_lock;
1599
1600#ifdef USE_COMLOCK
1601/* locks com (tty) data/hardware accesses: a FASTINTR() */
1602struct simplelock	com_lock;
1603#endif /* USE_COMLOCK */
1604
1605#ifdef USE_CLOCKLOCK
1606/* lock regions around the clock hardware */
1607struct simplelock	clock_lock;
1608#endif /* USE_CLOCKLOCK */
1609
1610static void
1611init_locks(void)
1612{
1613	/*
1614	 * Get the initial mp_lock with a count of 1 for the BSP.
1615	 * This uses a LOGICAL cpu ID, ie BSP == 0.
1616	 */
1617	mp_lock = 0x00000001;
1618
1619	/* ISR uses its own "giant lock" */
1620	isr_lock = FREE_LOCK;
1621
1622#if defined(APIC_INTR_DIAGNOSTIC) && defined(APIC_INTR_DIAGNOSTIC_IRQ)
1623	s_lock_init((struct simplelock*)&apic_itrace_debuglock);
1624#endif
1625
1626	s_lock_init((struct simplelock*)&mpintr_lock);
1627
1628	s_lock_init((struct simplelock*)&mcount_lock);
1629
1630	s_lock_init((struct simplelock*)&fast_intr_lock);
1631	s_lock_init((struct simplelock*)&intr_lock);
1632	s_lock_init((struct simplelock*)&imen_lock);
1633	s_lock_init((struct simplelock*)&cpl_lock);
1634
1635#ifdef USE_COMLOCK
1636	s_lock_init((struct simplelock*)&com_lock);
1637#endif /* USE_COMLOCK */
1638#ifdef USE_CLOCKLOCK
1639	s_lock_init((struct simplelock*)&clock_lock);
1640#endif /* USE_CLOCKLOCK */
1641}
1642
1643
1644/*
1645 * start each AP in our list
1646 */
1647static int
1648start_all_aps(u_int boot_addr)
1649{
1650	int     x, i;
1651	u_char  mpbiosreason;
1652	u_long  mpbioswarmvec;
1653	pd_entry_t *newptd;
1654	pt_entry_t *newpt;
1655	struct globaldata *gd;
1656	char *stack;
1657	pd_entry_t	*myPTD;
1658
1659	POSTCODE(START_ALL_APS_POST);
1660
1661	/* initialize BSP's local APIC */
1662	apic_initialize();
1663	bsp_apic_ready = 1;
1664
1665	/* install the AP 1st level boot code */
1666	install_ap_tramp(boot_addr);
1667
1668
1669	/* save the current value of the warm-start vector */
1670	mpbioswarmvec = *((u_long *) WARMBOOT_OFF);
1671	outb(CMOS_REG, BIOS_RESET);
1672	mpbiosreason = inb(CMOS_DATA);
1673
1674	/* record BSP in CPU map */
1675	all_cpus = 1;
1676
1677	/* start each AP */
1678	for (x = 1; x <= mp_naps; ++x) {
1679
1680		/* This is a bit verbose, it will go away soon.  */
1681
1682		/* alloc new page table directory */
1683		newptd = (pd_entry_t *)(kmem_alloc(kernel_map, PAGE_SIZE));
1684
1685		/* Store the virtual PTD address for this CPU */
1686		IdlePTDS[x] = newptd;
1687
1688		/* clone currently active one (ie: IdlePTD) */
1689		bcopy(PTD, newptd, PAGE_SIZE);	/* inc prv page pde */
1690
1691		/* set up 0 -> 4MB P==V mapping for AP boot */
1692		newptd[0] = (pd_entry_t) (PG_V | PG_RW |
1693						((u_long)KPTphys & PG_FRAME));
1694
1695		/* store PTD for this AP's boot sequence */
1696		myPTD = (pd_entry_t *)vtophys(newptd);
1697
1698		/* alloc new page table page */
1699		newpt = (pt_entry_t *)(kmem_alloc(kernel_map, PAGE_SIZE));
1700
1701		/* set the new PTD's private page to point there */
1702		newptd[MPPTDI] = (pt_entry_t)(PG_V | PG_RW | vtophys(newpt));
1703
1704		/* install self referential entry */
1705		newptd[PTDPTDI] = (pd_entry_t)(PG_V | PG_RW | vtophys(newptd));
1706
1707		/* allocate a new private data page */
1708		gd = (struct globaldata *)kmem_alloc(kernel_map, PAGE_SIZE);
1709
1710		/* wire it into the private page table page */
1711		newpt[0] = (pt_entry_t)(PG_V | PG_RW | vtophys(gd));
1712
1713		/* wire the ptp into itself for access */
1714		newpt[1] = (pt_entry_t)(PG_V | PG_RW | vtophys(newpt));
1715
1716		/* copy in the pointer to the local apic */
1717		newpt[2] = SMP_prvpt[2];
1718
1719		/* and the IO apic mapping[s] */
1720		for (i = 16; i < 32; i++)
1721			newpt[i] = SMP_prvpt[i];
1722
1723		/* allocate and set up an idle stack data page */
1724		stack = (char *)kmem_alloc(kernel_map, UPAGES*PAGE_SIZE);
1725		for (i = 0; i < UPAGES; i++)
1726			newpt[i + 3] = (pt_entry_t)(PG_V | PG_RW | vtophys(PAGE_SIZE * i + stack));
1727
1728		newpt[3 + UPAGES] = 0;		/* *prv_CMAP1 */
1729		newpt[4 + UPAGES] = 0;		/* *prv_CMAP2 */
1730		newpt[5 + UPAGES] = 0;		/* *prv_CMAP3 */
1731
1732		/* prime data page for it to use */
1733		gd->cpuid = x;
1734		gd->cpu_lockid = x << 24;
1735		gd->my_idlePTD = myPTD;
1736		gd->prv_CMAP1 = &newpt[3 + UPAGES];
1737		gd->prv_CMAP2 = &newpt[4 + UPAGES];
1738		gd->prv_CMAP3 = &newpt[5 + UPAGES];
1739
1740		/* setup a vector to our boot code */
1741		*((volatile u_short *) WARMBOOT_OFF) = WARMBOOT_TARGET;
1742		*((volatile u_short *) WARMBOOT_SEG) = (boot_addr >> 4);
1743		outb(CMOS_REG, BIOS_RESET);
1744		outb(CMOS_DATA, BIOS_WARM);	/* 'warm-start' */
1745
1746		bootPTD = myPTD;
1747		/* attempt to start the Application Processor */
1748		CHECK_INIT(99);	/* setup checkpoints */
1749		if (!start_ap(x, boot_addr)) {
1750			printf("AP #%d (PHY# %d) failed!\n", x, CPU_TO_ID(x));
1751			CHECK_PRINT("trace");	/* show checkpoints */
1752			/* better panic as the AP may be running loose */
1753			printf("panic y/n? [y] ");
1754			if (cngetc() != 'n')
1755				panic("bye-bye");
1756		}
1757		CHECK_PRINT("trace");		/* show checkpoints */
1758
1759		/* record its version info */
1760		cpu_apic_versions[x] = cpu_apic_versions[0];
1761
1762		all_cpus |= (1 << x);		/* record AP in CPU map */
1763	}
1764
1765	/* build our map of 'other' CPUs */
1766	other_cpus = all_cpus & ~(1 << cpuid);
1767
1768	/* fill in our (BSP) APIC version */
1769	cpu_apic_versions[0] = lapic.version;
1770
1771	/* restore the warmstart vector */
1772	*(u_long *) WARMBOOT_OFF = mpbioswarmvec;
1773	outb(CMOS_REG, BIOS_RESET);
1774	outb(CMOS_DATA, mpbiosreason);
1775
1776	/*
1777	 * Set up the idle context for the BSP.  Similar to above except
1778	 * that some was done by locore, some by pmap.c and some is implicit
1779	 * because the BSP is cpu#0 and the page is initially zero, and also
1780	 * because we can refer to variables by name on the BSP..
1781	 */
1782	newptd = (pd_entry_t *)(kmem_alloc(kernel_map, PAGE_SIZE));
1783
1784	bcopy(PTD, newptd, PAGE_SIZE);	/* inc prv page pde */
1785	IdlePTDS[0] = newptd;
1786
1787	/* Point PTD[] to this page instead of IdlePTD's physical page */
1788	newptd[PTDPTDI] = (pd_entry_t)(PG_V | PG_RW | vtophys(newptd));
1789
1790	my_idlePTD = (pd_entry_t *)vtophys(newptd);
1791
1792	/* Allocate and setup BSP idle stack */
1793	stack = (char *)kmem_alloc(kernel_map, UPAGES * PAGE_SIZE);
1794	for (i = 0; i < UPAGES; i++)
1795		SMP_prvpt[i + 3] = (pt_entry_t)(PG_V | PG_RW | vtophys(PAGE_SIZE * i + stack));
1796
1797	pmap_set_opt_bsp();
1798
1799	for (i = 0; i < mp_ncpus; i++) {
1800		bcopy( (int *) PTD + KPTDI, (int *) IdlePTDS[i] + KPTDI, NKPDE * sizeof (int));
1801	}
1802
1803	/* number of APs actually started */
1804	return mp_ncpus - 1;
1805}
1806
1807
1808/*
1809 * load the 1st level AP boot code into base memory.
1810 */
1811
1812/* targets for relocation */
1813extern void bigJump(void);
1814extern void bootCodeSeg(void);
1815extern void bootDataSeg(void);
1816extern void MPentry(void);
1817extern u_int MP_GDT;
1818extern u_int mp_gdtbase;
1819
1820static void
1821install_ap_tramp(u_int boot_addr)
1822{
1823	int     x;
1824	int     size = *(int *) ((u_long) & bootMP_size);
1825	u_char *src = (u_char *) ((u_long) bootMP);
1826	u_char *dst = (u_char *) boot_addr + KERNBASE;
1827	u_int   boot_base = (u_int) bootMP;
1828	u_int8_t *dst8;
1829	u_int16_t *dst16;
1830	u_int32_t *dst32;
1831
1832	POSTCODE(INSTALL_AP_TRAMP_POST);
1833
1834	for (x = 0; x < size; ++x)
1835		*dst++ = *src++;
1836
1837	/*
1838	 * modify addresses in code we just moved to basemem. unfortunately we
1839	 * need fairly detailed info about mpboot.s for this to work.  changes
1840	 * to mpboot.s might require changes here.
1841	 */
1842
1843	/* boot code is located in KERNEL space */
1844	dst = (u_char *) boot_addr + KERNBASE;
1845
1846	/* modify the lgdt arg */
1847	dst32 = (u_int32_t *) (dst + ((u_int) & mp_gdtbase - boot_base));
1848	*dst32 = boot_addr + ((u_int) & MP_GDT - boot_base);
1849
1850	/* modify the ljmp target for MPentry() */
1851	dst32 = (u_int32_t *) (dst + ((u_int) bigJump - boot_base) + 1);
1852	*dst32 = ((u_int) MPentry - KERNBASE);
1853
1854	/* modify the target for boot code segment */
1855	dst16 = (u_int16_t *) (dst + ((u_int) bootCodeSeg - boot_base));
1856	dst8 = (u_int8_t *) (dst16 + 1);
1857	*dst16 = (u_int) boot_addr & 0xffff;
1858	*dst8 = ((u_int) boot_addr >> 16) & 0xff;
1859
1860	/* modify the target for boot data segment */
1861	dst16 = (u_int16_t *) (dst + ((u_int) bootDataSeg - boot_base));
1862	dst8 = (u_int8_t *) (dst16 + 1);
1863	*dst16 = (u_int) boot_addr & 0xffff;
1864	*dst8 = ((u_int) boot_addr >> 16) & 0xff;
1865}
1866
1867
1868/*
1869 * this function starts the AP (application processor) identified
1870 * by the APIC ID 'physicalCpu'.  It does quite a "song and dance"
1871 * to accomplish this.  This is necessary because of the nuances
1872 * of the different hardware we might encounter.  It ain't pretty,
1873 * but it seems to work.
1874 */
1875static int
1876start_ap(int logical_cpu, u_int boot_addr)
1877{
1878	int     physical_cpu;
1879	int     vector;
1880	int     cpus;
1881	u_long  icr_lo, icr_hi;
1882
1883	POSTCODE(START_AP_POST);
1884
1885	/* get the PHYSICAL APIC ID# */
1886	physical_cpu = CPU_TO_ID(logical_cpu);
1887
1888	/* calculate the vector */
1889	vector = (boot_addr >> 12) & 0xff;
1890
1891	/* used as a watchpoint to signal AP startup */
1892	cpus = mp_ncpus;
1893
1894	/*
1895	 * first we do an INIT/RESET IPI this INIT IPI might be run, reseting
1896	 * and running the target CPU. OR this INIT IPI might be latched (P5
1897	 * bug), CPU waiting for STARTUP IPI. OR this INIT IPI might be
1898	 * ignored.
1899	 */
1900
1901	/* setup the address for the target AP */
1902	icr_hi = lapic.icr_hi & ~APIC_ID_MASK;
1903	icr_hi |= (physical_cpu << 24);
1904	lapic.icr_hi = icr_hi;
1905
1906	/* do an INIT IPI: assert RESET */
1907	icr_lo = lapic.icr_lo & 0xfff00000;
1908	lapic.icr_lo = icr_lo | 0x0000c500;
1909
1910	/* wait for pending status end */
1911	while (lapic.icr_lo & APIC_DELSTAT_MASK)
1912		 /* spin */ ;
1913
1914	/* do an INIT IPI: deassert RESET */
1915	lapic.icr_lo = icr_lo | 0x00008500;
1916
1917	/* wait for pending status end */
1918	u_sleep(10000);		/* wait ~10mS */
1919	while (lapic.icr_lo & APIC_DELSTAT_MASK)
1920		 /* spin */ ;
1921
1922	/*
1923	 * next we do a STARTUP IPI: the previous INIT IPI might still be
1924	 * latched, (P5 bug) this 1st STARTUP would then terminate
1925	 * immediately, and the previously started INIT IPI would continue. OR
1926	 * the previous INIT IPI has already run. and this STARTUP IPI will
1927	 * run. OR the previous INIT IPI was ignored. and this STARTUP IPI
1928	 * will run.
1929	 */
1930
1931	/* do a STARTUP IPI */
1932	lapic.icr_lo = icr_lo | 0x00000600 | vector;
1933	while (lapic.icr_lo & APIC_DELSTAT_MASK)
1934		 /* spin */ ;
1935	u_sleep(200);		/* wait ~200uS */
1936
1937	/*
1938	 * finally we do a 2nd STARTUP IPI: this 2nd STARTUP IPI should run IF
1939	 * the previous STARTUP IPI was cancelled by a latched INIT IPI. OR
1940	 * this STARTUP IPI will be ignored, as only ONE STARTUP IPI is
1941	 * recognized after hardware RESET or INIT IPI.
1942	 */
1943
1944	lapic.icr_lo = icr_lo | 0x00000600 | vector;
1945	while (lapic.icr_lo & APIC_DELSTAT_MASK)
1946		 /* spin */ ;
1947	u_sleep(200);		/* wait ~200uS */
1948
1949	/* wait for it to start */
1950	set_apic_timer(5000000);/* == 5 seconds */
1951	while (read_apic_timer())
1952		if (mp_ncpus > cpus)
1953			return 1;	/* return SUCCESS */
1954
1955	return 0;		/* return FAILURE */
1956}
1957
1958
1959/*
1960 * Flush the TLB on all other CPU's
1961 *
1962 * XXX: Needs to handshake and wait for completion before proceding.
1963 */
1964void
1965smp_invltlb(void)
1966{
1967#if defined(APIC_IO)
1968	if (smp_started && invltlb_ok)
1969		all_but_self_ipi(XINVLTLB_OFFSET);
1970#endif  /* APIC_IO */
1971}
1972
1973void
1974invlpg(u_int addr)
1975{
1976	__asm   __volatile("invlpg (%0)"::"r"(addr):"memory");
1977
1978	/* send a message to the other CPUs */
1979	smp_invltlb();
1980}
1981
1982void
1983invltlb(void)
1984{
1985	u_long  temp;
1986
1987	/*
1988	 * This should be implemented as load_cr3(rcr3()) when load_cr3() is
1989	 * inlined.
1990	 */
1991	__asm __volatile("movl %%cr3, %0; movl %0, %%cr3":"=r"(temp) :: "memory");
1992
1993	/* send a message to the other CPUs */
1994	smp_invltlb();
1995}
1996
1997
1998/*
1999 * When called the executing CPU will send an IPI to all other CPUs
2000 *  requesting that they halt execution.
2001 *
2002 * Usually (but not necessarily) called with 'other_cpus' as its arg.
2003 *
2004 *  - Signals all CPUs in map to stop.
2005 *  - Waits for each to stop.
2006 *
2007 * Returns:
2008 *  -1: error
2009 *   0: NA
2010 *   1: ok
2011 *
2012 * XXX FIXME: this is not MP-safe, needs a lock to prevent multiple CPUs
2013 *            from executing at same time.
2014 */
2015int
2016stop_cpus(u_int map)
2017{
2018	if (!smp_started)
2019		return 0;
2020
2021	/* send IPI to all CPUs in map */
2022	stopped_cpus = 0;
2023
2024	/* send the Xcpustop IPI to all CPUs in map */
2025	selected_apic_ipi(map, XCPUSTOP_OFFSET, APIC_DELMODE_FIXED);
2026
2027	while (stopped_cpus != map)
2028		/* spin */ ;
2029
2030	return 1;
2031}
2032
2033
2034/*
2035 * Called by a CPU to restart stopped CPUs.
2036 *
2037 * Usually (but not necessarily) called with 'stopped_cpus' as its arg.
2038 *
2039 *  - Signals all CPUs in map to restart.
2040 *  - Waits for each to restart.
2041 *
2042 * Returns:
2043 *  -1: error
2044 *   0: NA
2045 *   1: ok
2046 */
2047int
2048restart_cpus(u_int map)
2049{
2050	if (!smp_started)
2051		return 0;
2052
2053	started_cpus = map;		/* signal other cpus to restart */
2054
2055	while (started_cpus)		/* wait for each to clear its bit */
2056		/* spin */ ;
2057	stopped_cpus = 0;
2058
2059	return 1;
2060}
2061
2062int smp_active = 0;	/* are the APs allowed to run? */
2063SYSCTL_INT(_machdep, OID_AUTO, smp_active, CTLFLAG_RW, &smp_active, 0, "");
2064
2065/* XXX maybe should be hw.ncpu */
2066static int smp_cpus = 1;	/* how many cpu's running */
2067SYSCTL_INT(_machdep, OID_AUTO, smp_cpus, CTLFLAG_RD, &smp_cpus, 0, "");
2068
2069int invltlb_ok = 0;	/* throttle smp_invltlb() till safe */
2070SYSCTL_INT(_machdep, OID_AUTO, invltlb_ok, CTLFLAG_RW, &invltlb_ok, 0, "");
2071
2072/* Warning: Do not staticize.  Used from swtch.s */
2073int do_page_zero_idle = 1; /* bzero pages for fun and profit in idleloop */
2074SYSCTL_INT(_machdep, OID_AUTO, do_page_zero_idle, CTLFLAG_RW,
2075	   &do_page_zero_idle, 0, "");
2076
2077/* Is forwarding of a interrupt to the CPU holding the ISR lock enabled ? */
2078int forward_irq_enabled = 1;
2079SYSCTL_INT(_machdep, OID_AUTO, forward_irq_enabled, CTLFLAG_RW,
2080	   &forward_irq_enabled, 0, "");
2081
2082/* Enable forwarding of a signal to a process running on a different CPU */
2083int forward_signal_enabled = 1;
2084SYSCTL_INT(_machdep, OID_AUTO, forward_signal_enabled, CTLFLAG_RW,
2085	   &forward_signal_enabled, 0, "");
2086
2087/*
2088 * This is called once the rest of the system is up and running and we're
2089 * ready to let the AP's out of the pen.
2090 */
2091void ap_init(void);
2092
2093void
2094ap_init()
2095{
2096	u_int   temp;
2097	u_int	apic_id;
2098
2099	smp_cpus++;
2100
2101#if defined(I586_CPU) && !defined(NO_F00F_HACK)
2102	lidt(&r_idt);
2103#endif
2104
2105	/* Build our map of 'other' CPUs. */
2106	other_cpus = all_cpus & ~(1 << cpuid);
2107
2108	printf("SMP: AP CPU #%d Launched!\n", cpuid);
2109
2110	/* XXX FIXME: i386 specific, and redundant: Setup the FPU. */
2111	load_cr0((rcr0() & ~CR0_EM) | CR0_MP | CR0_NE | CR0_TS);
2112
2113	/* A quick check from sanity claus */
2114	apic_id = (apic_id_to_logical[(lapic.id & 0x0f000000) >> 24]);
2115	if (cpuid != apic_id) {
2116		printf("SMP: cpuid = %d\n", cpuid);
2117		printf("SMP: apic_id = %d\n", apic_id);
2118		printf("PTD[MPPTDI] = %08x\n", PTD[MPPTDI]);
2119		panic("cpuid mismatch! boom!!");
2120	}
2121
2122	/* Init local apic for irq's */
2123	apic_initialize();
2124
2125	/*
2126	 * Activate smp_invltlb, although strictly speaking, this isn't
2127	 * quite correct yet.  We should have a bitfield for cpus willing
2128	 * to accept TLB flush IPI's or something and sync them.
2129	 */
2130	invltlb_ok = 1;
2131	smp_started = 1;	/* enable IPI's, tlb shootdown, freezes etc */
2132	smp_active = 1;		/* historic */
2133
2134	curproc = NULL;		/* make sure */
2135}
2136
2137void
2138getmtrr()
2139{
2140	int i;
2141
2142	if (cpu_class == CPUCLASS_686) {
2143		for(i = 0; i < NPPROVMTRR; i++) {
2144			PPro_vmtrr[i].base = rdmsr(PPRO_VMTRRphysBase0 + i * 2);
2145			PPro_vmtrr[i].mask = rdmsr(PPRO_VMTRRphysMask0 + i * 2);
2146		}
2147	}
2148}
2149
2150void
2151putmtrr()
2152{
2153	int i;
2154
2155	if (cpu_class == CPUCLASS_686) {
2156		wbinvd();
2157		for(i = 0; i < NPPROVMTRR; i++) {
2158			wrmsr(PPRO_VMTRRphysBase0 + i * 2, PPro_vmtrr[i].base);
2159			wrmsr(PPRO_VMTRRphysMask0 + i * 2, PPro_vmtrr[i].mask);
2160		}
2161	}
2162}
2163
2164void
2165putfmtrr()
2166{
2167	if (cpu_class == CPUCLASS_686) {
2168		wbinvd();
2169		/*
2170		 * Set memory between 0-640K to be WB
2171		 */
2172		wrmsr(0x250, 0x0606060606060606LL);
2173		wrmsr(0x258, 0x0606060606060606LL);
2174		/*
2175		 * Set normal, PC video memory to be WC
2176		 */
2177		wrmsr(0x259, 0x0101010101010101LL);
2178	}
2179}
2180
2181
2182#ifdef BETTER_CLOCK
2183
2184#define CHECKSTATE_USER	0
2185#define CHECKSTATE_SYS	1
2186#define CHECKSTATE_INTR	2
2187
2188/* Do not staticize.  Used from apic_vector.s */
2189struct proc*	checkstate_curproc[NCPU];
2190int		checkstate_cpustate[NCPU];
2191u_long		checkstate_pc[NCPU];
2192
2193extern long	cp_time[CPUSTATES];
2194
2195#define PC_TO_INDEX(pc, prof)				\
2196        ((int)(((u_quad_t)((pc) - (prof)->pr_off) *	\
2197            (u_quad_t)((prof)->pr_scale)) >> 16) & ~1)
2198
2199static void
2200addupc_intr_forwarded(struct proc *p, int id, int *astmap)
2201{
2202	int i;
2203	struct uprof *prof;
2204	u_long pc;
2205
2206	pc = checkstate_pc[id];
2207	prof = &p->p_stats->p_prof;
2208	if (pc >= prof->pr_off &&
2209	    (i = PC_TO_INDEX(pc, prof)) < prof->pr_size) {
2210		if ((p->p_flag & P_OWEUPC) == 0) {
2211			prof->pr_addr = pc;
2212			prof->pr_ticks = 1;
2213			p->p_flag |= P_OWEUPC;
2214		}
2215		*astmap |= (1 << id);
2216	}
2217}
2218
2219static void
2220forwarded_statclock(int id, int pscnt, int *astmap)
2221{
2222	struct pstats *pstats;
2223	long rss;
2224	struct rusage *ru;
2225	struct vmspace *vm;
2226	int cpustate;
2227	struct proc *p;
2228#ifdef GPROF
2229	register struct gmonparam *g;
2230	int i;
2231#endif
2232
2233	p = checkstate_curproc[id];
2234	cpustate = checkstate_cpustate[id];
2235
2236	switch (cpustate) {
2237	case CHECKSTATE_USER:
2238		if (p->p_flag & P_PROFIL)
2239			addupc_intr_forwarded(p, id, astmap);
2240		if (pscnt > 1)
2241			return;
2242		p->p_uticks++;
2243		if (p->p_nice > NZERO)
2244			cp_time[CP_NICE]++;
2245		else
2246			cp_time[CP_USER]++;
2247		break;
2248	case CHECKSTATE_SYS:
2249#ifdef GPROF
2250		/*
2251		 * Kernel statistics are just like addupc_intr, only easier.
2252		 */
2253		g = &_gmonparam;
2254		if (g->state == GMON_PROF_ON) {
2255			i = checkstate_pc[id] - g->lowpc;
2256			if (i < g->textsize) {
2257				i /= HISTFRACTION * sizeof(*g->kcount);
2258				g->kcount[i]++;
2259			}
2260		}
2261#endif
2262		if (pscnt > 1)
2263			return;
2264
2265		if (!p)
2266			cp_time[CP_IDLE]++;
2267		else {
2268			p->p_sticks++;
2269			cp_time[CP_SYS]++;
2270		}
2271		break;
2272	case CHECKSTATE_INTR:
2273	default:
2274#ifdef GPROF
2275		/*
2276		 * Kernel statistics are just like addupc_intr, only easier.
2277		 */
2278		g = &_gmonparam;
2279		if (g->state == GMON_PROF_ON) {
2280			i = checkstate_pc[id] - g->lowpc;
2281			if (i < g->textsize) {
2282				i /= HISTFRACTION * sizeof(*g->kcount);
2283				g->kcount[i]++;
2284			}
2285		}
2286#endif
2287		if (pscnt > 1)
2288			return;
2289		if (p)
2290			p->p_iticks++;
2291		cp_time[CP_INTR]++;
2292	}
2293	if (p != NULL) {
2294		p->p_cpticks++;
2295		if (++p->p_estcpu == 0)
2296			p->p_estcpu--;
2297		if ((p->p_estcpu & 3) == 0) {
2298			resetpriority(p);
2299			if (p->p_priority >= PUSER)
2300				p->p_priority = p->p_usrpri;
2301		}
2302
2303		/* Update resource usage integrals and maximums. */
2304		if ((pstats = p->p_stats) != NULL &&
2305		    (ru = &pstats->p_ru) != NULL &&
2306		    (vm = p->p_vmspace) != NULL) {
2307			ru->ru_ixrss += vm->vm_tsize * PAGE_SIZE / 1024;
2308			ru->ru_idrss += vm->vm_dsize * PAGE_SIZE / 1024;
2309			ru->ru_isrss += vm->vm_ssize * PAGE_SIZE / 1024;
2310			rss = vm->vm_pmap.pm_stats.resident_count *
2311				PAGE_SIZE / 1024;
2312			if (ru->ru_maxrss < rss)
2313				ru->ru_maxrss = rss;
2314        	}
2315	}
2316}
2317
2318void
2319forward_statclock(int pscnt)
2320{
2321	int map;
2322	int id;
2323	int i;
2324
2325	/* Kludge. We don't yet have separate locks for the interrupts
2326	 * and the kernel. This means that we cannot let the other processors
2327	 * handle complex interrupts while inhibiting them from entering
2328	 * the kernel in a non-interrupt context.
2329	 *
2330	 * What we can do, without changing the locking mechanisms yet,
2331	 * is letting the other processors handle a very simple interrupt
2332	 * (wich determines the processor states), and do the main
2333	 * work ourself.
2334	 */
2335
2336	if (!smp_started || !invltlb_ok || cold || panicstr)
2337		return;
2338
2339	/* Step 1: Probe state   (user, cpu, interrupt, spinlock, idle ) */
2340
2341	map = other_cpus & ~stopped_cpus ;
2342	checkstate_probed_cpus = 0;
2343	if (map != 0)
2344		selected_apic_ipi(map,
2345				  XCPUCHECKSTATE_OFFSET, APIC_DELMODE_FIXED);
2346
2347	i = 0;
2348	while (checkstate_probed_cpus != map) {
2349		/* spin */
2350		i++;
2351		if (i == 1000000) {
2352			printf("forward_statclock: checkstate %x\n",
2353			       checkstate_probed_cpus);
2354			break;
2355		}
2356	}
2357
2358	/*
2359	 * Step 2: walk through other processors processes, update ticks and
2360	 * profiling info.
2361	 */
2362
2363	map = 0;
2364	for (id = 0; id < mp_ncpus; id++) {
2365		if (id == cpuid)
2366			continue;
2367		if (((1 << id) & checkstate_probed_cpus) == 0)
2368			continue;
2369		forwarded_statclock(id, pscnt, &map);
2370	}
2371	if (map != 0) {
2372		checkstate_need_ast |= map;
2373		selected_apic_ipi(map, XCPUAST_OFFSET, APIC_DELMODE_FIXED);
2374		i = 0;
2375		while ((checkstate_need_ast & map) != 0) {
2376			/* spin */
2377			i++;
2378			if (i > 100000) {
2379#ifdef BETTER_CLOCK_DIAGNOSTIC
2380				printf("forward_statclock: dropped ast 0x%x\n",
2381				       checkstate_need_ast & map);
2382#endif
2383				break;
2384			}
2385		}
2386	}
2387}
2388
2389void
2390forward_hardclock(int pscnt)
2391{
2392	int map;
2393	int id;
2394	struct proc *p;
2395	struct pstats *pstats;
2396	int i;
2397
2398	/* Kludge. We don't yet have separate locks for the interrupts
2399	 * and the kernel. This means that we cannot let the other processors
2400	 * handle complex interrupts while inhibiting them from entering
2401	 * the kernel in a non-interrupt context.
2402	 *
2403	 * What we can do, without changing the locking mechanisms yet,
2404	 * is letting the other processors handle a very simple interrupt
2405	 * (wich determines the processor states), and do the main
2406	 * work ourself.
2407	 */
2408
2409	if (!smp_started || !invltlb_ok || cold || panicstr)
2410		return;
2411
2412	/* Step 1: Probe state   (user, cpu, interrupt, spinlock, idle) */
2413
2414	map = other_cpus & ~stopped_cpus ;
2415	checkstate_probed_cpus = 0;
2416	if (map != 0)
2417		selected_apic_ipi(map,
2418				  XCPUCHECKSTATE_OFFSET, APIC_DELMODE_FIXED);
2419
2420	i = 0;
2421	while (checkstate_probed_cpus != map) {
2422		/* spin */
2423		i++;
2424		if (i == 1000000) {
2425			printf("forward_hardclock: checkstate %x\n",
2426			       checkstate_probed_cpus);
2427			break;
2428		}
2429	}
2430
2431	/*
2432	 * Step 2: walk through other processors processes, update virtual
2433	 * timer and profiling timer. If stathz == 0, also update ticks and
2434	 * profiling info.
2435	 */
2436
2437	map = 0;
2438	for (id = 0; id < mp_ncpus; id++) {
2439		if (id == cpuid)
2440			continue;
2441		if (((1 << id) & checkstate_probed_cpus) == 0)
2442			continue;
2443		p = checkstate_curproc[id];
2444		if (p) {
2445			pstats = p->p_stats;
2446			if (checkstate_cpustate[id] == CHECKSTATE_USER &&
2447			    timevalisset(&pstats->p_timer[ITIMER_VIRTUAL].it_value) &&
2448			    itimerdecr(&pstats->p_timer[ITIMER_VIRTUAL], tick) == 0) {
2449				psignal(p, SIGVTALRM);
2450				map |= (1 << id);
2451			}
2452			if (timevalisset(&pstats->p_timer[ITIMER_PROF].it_value) &&
2453			    itimerdecr(&pstats->p_timer[ITIMER_PROF], tick) == 0) {
2454				psignal(p, SIGPROF);
2455				map |= (1 << id);
2456			}
2457		}
2458		if (stathz == 0) {
2459			forwarded_statclock( id, pscnt, &map);
2460		}
2461	}
2462	if (map != 0) {
2463		checkstate_need_ast |= map;
2464		selected_apic_ipi(map, XCPUAST_OFFSET, APIC_DELMODE_FIXED);
2465		i = 0;
2466		while ((checkstate_need_ast & map) != 0) {
2467			/* spin */
2468			i++;
2469			if (i > 100000) {
2470#ifdef BETTER_CLOCK_DIAGNOSTIC
2471				printf("forward_hardclock: dropped ast 0x%x\n",
2472				       checkstate_need_ast & map);
2473#endif
2474				break;
2475			}
2476		}
2477	}
2478}
2479
2480#endif /* BETTER_CLOCK */
2481
2482void
2483forward_signal(struct proc *p)
2484{
2485	int map;
2486	int id;
2487	int i;
2488
2489	/* Kludge. We don't yet have separate locks for the interrupts
2490	 * and the kernel. This means that we cannot let the other processors
2491	 * handle complex interrupts while inhibiting them from entering
2492	 * the kernel in a non-interrupt context.
2493	 *
2494	 * What we can do, without changing the locking mechanisms yet,
2495	 * is letting the other processors handle a very simple interrupt
2496	 * (wich determines the processor states), and do the main
2497	 * work ourself.
2498	 */
2499
2500	if (!smp_started || !invltlb_ok || cold || panicstr)
2501		return;
2502	if (!forward_signal_enabled)
2503		return;
2504	while (1) {
2505		if (p->p_stat != SRUN)
2506			return;
2507		id = (u_char) p->p_oncpu;
2508		if (id == 0xff)
2509			return;
2510		map = (1<<id);
2511		checkstate_need_ast |= map;
2512		selected_apic_ipi(map, XCPUAST_OFFSET, APIC_DELMODE_FIXED);
2513		i = 0;
2514		while ((checkstate_need_ast & map) != 0) {
2515			/* spin */
2516			i++;
2517			if (i > 100000) {
2518#if 0
2519				printf("forward_signal: dropped ast 0x%x\n",
2520				       checkstate_need_ast & map);
2521#endif
2522				break;
2523			}
2524		}
2525		if (id == (u_char) p->p_oncpu)
2526			return;
2527	}
2528}
2529
2530
2531#ifdef APIC_INTR_REORDER
2532/*
2533 *	Maintain mapping from softintr vector to isr bit in local apic.
2534 */
2535void
2536set_lapic_isrloc(int intr, int vector)
2537{
2538	if (intr < 0 || intr > 32)
2539		panic("set_apic_isrloc: bad intr argument: %d",intr);
2540	if (vector < ICU_OFFSET || vector > 255)
2541		panic("set_apic_isrloc: bad vector argument: %d",vector);
2542	apic_isrbit_location[intr].location = &lapic.isr0 + ((vector>>5)<<2);
2543	apic_isrbit_location[intr].bit = (1<<(vector & 31));
2544}
2545#endif
2546