mptable.c revision 26108
1/*
2 * Copyright (c) 1996, by Steve Passe
3 * All rights reserved.
4 *
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions
7 * are met:
8 * 1. Redistributions of source code must retain the above copyright
9 *    notice, this list of conditions and the following disclaimer.
10 * 2. The name of the developer may NOT be used to endorse or promote products
11 *    derived from this software without specific prior written permission.
12 *
13 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
14 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
15 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
16 * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
17 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
18 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
19 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
20 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
21 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
22 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
23 * SUCH DAMAGE.
24 *
25 *	$Id: mp_machdep.c,v 1.11 1997/05/24 18:48:53 fsmp Exp $
26 */
27
28#include "opt_smp.h"
29#include "opt_serial.h"
30
31#include <sys/param.h>		/* for KERNBASE */
32#include <sys/types.h>
33#include <sys/sysproto.h>
34#include <sys/time.h>
35#include <sys/systm.h>
36
37#include <vm/vm.h>		/* for KERNBASE */
38#include <vm/vm_param.h>	/* for KERNBASE */
39#include <vm/pmap.h>		/* for KERNBASE */
40#include <machine/pmap.h>	/* for KERNBASE */
41
42#include <machine/smp.h>
43#include <machine/apic.h>
44#include <machine/mpapic.h>
45#include <machine/cpufunc.h>
46#include <machine/segments.h>
47#include <machine/smptests.h>	/** TEST_DEFAULT_CONFIG */
48
49#include <i386/i386/cons.h>	/* cngetc() */
50
51#if defined(APIC_IO)
52#include <i386/include/md_var.h>	/* setidt() */
53#include <i386/isa/icu.h>		/* Xinvltlb() */
54#include <i386/isa/isa_device.h>	/* Xinvltlb() */
55#endif	/* APIC_IO */
56
57#define WARMBOOT_TARGET	0
58#define WARMBOOT_OFF	(KERNBASE + 0x0467)
59#define WARMBOOT_SEG	(KERNBASE + 0x0469)
60
61#define BIOS_BASE	(0xf0000)
62#define BIOS_SIZE	(0x10000)
63#define BIOS_COUNT	(BIOS_SIZE/4)
64
65#define CMOS_REG	(0x70)
66#define CMOS_DATA	(0x71)
67#define BIOS_RESET	(0x0f)
68#define BIOS_WARM	(0x0a)
69
70/*
71 * this code MUST be enabled here and in mpboot.s.
72 * it follows the very early stages of AP boot by placing values in CMOS ram.
73 * it NORMALLY will never be needed and thus the primitive method for enabling.
74 *
75#define CHECK_POINTS
76 */
77
78#if defined(CHECK_POINTS)
79#define CHECK_READ(A)	 (outb(CMOS_REG, (A)), inb(CMOS_DATA))
80#define CHECK_WRITE(A,D) (outb(CMOS_REG, (A)), outb(CMOS_DATA, (D)))
81
82#define CHECK_INIT(D);				\
83	CHECK_WRITE(0x34, (D));			\
84	CHECK_WRITE(0x35, (D));			\
85	CHECK_WRITE(0x36, (D));			\
86	CHECK_WRITE(0x37, (D));			\
87	CHECK_WRITE(0x38, (D));			\
88	CHECK_WRITE(0x39, (D));
89
90#define CHECK_PRINT(S);				\
91	printf("%s: %d, %d, %d, %d, %d, %d\n",	\
92	   (S),					\
93	   CHECK_READ(0x34),			\
94	   CHECK_READ(0x35),			\
95	   CHECK_READ(0x36),			\
96	   CHECK_READ(0x37),			\
97	   CHECK_READ(0x38),			\
98	   CHECK_READ(0x39));
99
100#else				/* CHECK_POINTS */
101
102#define CHECK_INIT(D)
103#define CHECK_PRINT(S)
104
105#endif				/* CHECK_POINTS */
106
107
108/** FIXME: what system files declare these??? */
109extern struct region_descriptor r_gdt, r_idt;
110
111/* global data */
112struct proc *SMPcurproc[NCPU];
113struct pcb *SMPcurpcb[NCPU];
114struct timeval SMPruntime[NCPU];
115
116int     mp_ncpus;		/* # of CPUs, including BSP */
117int     mp_naps;		/* # of Applications processors */
118int     mp_nbusses;		/* # of busses */
119int     mp_napics;		/* # of IO APICs */
120int     mpenabled;
121int     boot_cpu_id;		/* designated BSP */
122vm_offset_t cpu_apic_address;
123vm_offset_t io_apic_address[NAPICID];	/* NAPICID is more than enough */
124
125u_int32_t cpu_apic_versions[NCPU];
126u_int32_t io_apic_versions[NAPIC];
127
128/*
129 * APIC ID logical/physical mapping structures.
130 * We oversize these to simplify boot-time config.
131 */
132int     cpu_num_to_apic_id[NAPICID];
133int     io_num_to_apic_id[NAPICID];
134int     apic_id_to_logical[NAPICID];
135
136/*
137 * look for MP compliant motherboard.
138 */
139
140static u_int boot_address;
141static u_int base_memory;
142
143static int picmode;		/* 0: virtual wire mode, 1: PIC mode */
144static u_int mpfps;
145static int search_for_sig(u_int32_t target, int count);
146static int mp_probe(u_int base_top);
147static void mp_enable(u_int boot_addr);
148
149
150/*
151 * calculate usable address in base memory for AP trampoline code
152 */
153u_int
154mp_bootaddress(u_int basemem)
155{
156	base_memory = basemem * 1024;	/* convert to bytes */
157
158	boot_address = base_memory & ~0xfff;	/* round down to 4k boundary */
159	if ((base_memory - boot_address) < bootMP_size)
160		boot_address -= 4096;	/* not enough, lower by 4k */
161
162	return boot_address;
163}
164
165
166/*
167 * startup the SMP processors
168 */
169void
170mp_start(void)
171{
172	/* look for MP capable motherboard */
173	if (mp_probe(base_memory))
174/*
175 * XXX: mp_probe() now does a 1st pass of the motherboard's MP table, so
176 *       by this point we know how many busses, INTs, etc. exist, as well as
177 *       the addresses of the LOCAL and IO APICs.
178 *      This means we have the info necessary for malloc()ing memory for
179 *       boot-time MP structs, as well as pmapping the APICs to known addrs.
180 *      So when we get private pages working we will probably want to move
181 *       mp_enable() further down in the boot process.
182 */
183		mp_enable(boot_address);
184	else
185		panic("MP FPS not found, can't continue!");
186
187	/* finish pmap initialization - turn off V==P mapping at zero */
188	pmap_bootstrap2();
189}
190
191
192/*
193 * print various information about the SMP system hardware and setup
194 */
195void
196mp_announce(void)
197{
198	int     x;
199
200	printf("FreeBSD/SMP: Multiprocessor motherboard\n");
201	printf(" cpu0 (BSP): apic id: %d", CPU_TO_ID(0));
202	printf(", version: 0x%08x\n", cpu_apic_versions[0]);
203	for (x = 1; x <= mp_naps; ++x) {
204		printf(" cpu%d (AP):  apic id: %d", x, CPU_TO_ID(x));
205		printf(", version: 0x%08x\n", cpu_apic_versions[x]);
206	}
207
208#if defined(APIC_IO)
209	for (x = 0; x < mp_napics; ++x) {
210		printf(" io%d (APIC): apic id: %d", x, IO_TO_ID(x));
211		printf(", version: 0x%08x\n", io_apic_versions[x]);
212	}
213#else
214	printf(" Warning: APIC I/O disabled\n");
215#endif	/* APIC_IO */
216}
217
218
219/*
220 * AP cpu's call this to sync up protected mode.
221 */
222void
223init_secondary(void)
224{
225	int     gsel_tss, slot;
226
227	r_gdt.rd_limit = sizeof(gdt[0]) * (NGDT + NCPU) - 1;
228	r_gdt.rd_base = (int) gdt;
229	lgdt(&r_gdt);		/* does magic intra-segment return */
230	lidt(&r_idt);
231	lldt(_default_ldt);
232
233	slot = NGDT + cpunumber();
234	gsel_tss = GSEL(slot, SEL_KPL);
235	gdt[slot].sd.sd_type = SDT_SYS386TSS;
236	ltr(gsel_tss);
237
238	load_cr0(0x8005003b);	/* XXX! */
239}
240
241
242#if defined(APIC_IO)
243void
244configure_local_apic(void)
245{
246	u_char  byte;
247	u_int32_t temp;
248
249	if (picmode) {
250		outb(0x22, 0x70);	/* select IMCR */
251		byte = inb(0x23);	/* current contents */
252		byte |= 0x01;	/* mask external INTR */
253		outb(0x23, byte);	/* disconnect 8259s/NMI */
254	}
255	/* mask the LVT1 */
256	temp = apic_base[APIC_LVT1];
257	temp |= APIC_LVT_M;
258	apic_base[APIC_LVT1] = temp;
259}
260#endif	/* APIC_IO */
261
262
263/*******************************************************************
264 * local functions and data
265 */
266static int preparse_mp_table(void);
267static int parse_mp_table(void);
268static void default_mp_table(int type);
269static int start_all_aps(u_int boot_addr);
270
271static int
272mp_probe(u_int base_top)
273{
274	int     x;
275	u_long  segment;
276	u_int32_t target;
277
278	/* see if EBDA exists */
279	if (segment = (u_long) * (u_short *) (KERNBASE + 0x40e)) {
280		/* search first 1K of EBDA */
281		target = (u_int32_t) (segment << 4);
282		if ((x = search_for_sig(target, 1024 / 4)) >= 0)
283			goto found;
284	} else {
285		/*last 1K of base memory, effective 'top of base' is passed in*/
286		target = (u_int32_t) (base_top - 0x400);
287		if ((x = search_for_sig(target, 1024 / 4)) >= 0)
288			goto found;
289	}
290
291	/* search the BIOS */
292	target = (u_int32_t) BIOS_BASE;
293	if ((x = search_for_sig(target, BIOS_COUNT)) >= 0)
294		goto found;
295
296	/* nothing found */
297	mpfps = mpenabled = 0;
298	return 0;
299
300found:				/* please forgive the 'goto'! */
301	/* calculate needed resources */
302	mpfps = x;
303	if (preparse_mp_table())
304		panic("you must reconfigure your kernel");
305
306	/* flag fact that we are running multiple processors */
307	mpenabled = 1;
308	return 1;
309}
310
311
312/*
313 * start the SMP system
314 */
315static void
316mp_enable(u_int boot_addr)
317{
318	int     x;
319#if defined(APIC_IO)
320	int     apic;
321	u_int   ux;
322#endif	/* APIC_IO */
323
324	/* examine the MP table for needed info */
325	x = parse_mp_table();
326
327	/* create pages for (address common) cpu APIC and each IO APIC */
328	pmap_bootstrap_apics();
329
330	/* can't process default configs till the CPU APIC is pmapped */
331	if (x)
332		default_mp_table(x);
333
334#if defined(APIC_IO)
335	/* fill the LOGICAL io_apic_versions table */
336	for (apic = 0; apic < mp_napics; ++apic) {
337		ux = io_apic_read(apic, IOAPIC_VER);
338		io_apic_versions[apic] = ux;
339	}
340
341	/* program each IO APIC in the system */
342	for (apic = 0; apic < mp_napics; ++apic)
343          if (io_apic_setup(apic) < 0)
344		panic("IO APIC setup failure");
345
346	/* install an inter-CPU IPI for TLB invalidation */
347	setidt(ICU_OFFSET + XINVLTLB_OFFSET, Xinvltlb,
348	       SDT_SYS386IGT, SEL_KPL, GSEL(GCODE_SEL, SEL_KPL));
349#endif	/* APIC_IO */
350
351	/* start each Application Processor */
352	start_all_aps(boot_addr);
353}
354
355
356/*
357 * look for the MP spec signature
358 */
359
360/* string defined by the Intel MP Spec as identifying the MP table */
361#define MP_SIG		0x5f504d5f	/* _MP_ */
362#define NEXT(X)		((X) += 4)
363static int
364search_for_sig(u_int32_t target, int count)
365{
366	int     x;
367	u_int32_t *addr = (u_int32_t *) (KERNBASE + target);
368
369	for (x = 0; x < count; NEXT(x))
370		if (addr[x] == MP_SIG)
371			/* make array index a byte index */
372			return (target + (x * sizeof(u_int32_t)));
373
374	return -1;
375}
376
377
378#define PROCENTRY_FLAG_EN	0x01
379#define PROCENTRY_FLAG_BP	0x02
380#define IOAPICENTRY_FLAG_EN	0x01
381
382/* MP Floating Pointer Structure */
383typedef struct MPFPS {
384	char    signature[4];
385	void   *pap;
386	u_char  length;
387	u_char  spec_rev;
388	u_char  checksum;
389	u_char  mpfb1;
390	u_char  mpfb2;
391	u_char  mpfb3;
392	u_char  mpfb4;
393	u_char  mpfb5;
394}      *mpfps_t;
395/* MP Configuration Table Header */
396typedef struct MPCTH {
397	char    signature[4];
398	u_short base_table_length;
399	u_char  spec_rev;
400	u_char  checksum;
401	u_char  oem_id[8];
402	u_char  product_id[12];
403	void   *oem_table_pointer;
404	u_short oem_table_size;
405	u_short entry_count;
406	void   *apic_address;
407	u_short extended_table_length;
408	u_char  extended_table_checksum;
409	u_char  reserved;
410}      *mpcth_t;
411
412
413typedef struct PROCENTRY {
414	u_char  type;
415	u_char  apic_id;
416	u_char  apic_version;
417	u_char  cpu_flags;
418	u_long  cpu_signature;
419	u_long  feature_flags;
420	u_long  reserved1;
421	u_long  reserved2;
422}      *proc_entry_ptr;
423
424typedef struct BUSENTRY {
425	u_char  type;
426	u_char  bus_id;
427	char    bus_type[6];
428}      *bus_entry_ptr;
429
430typedef struct IOAPICENTRY {
431	u_char  type;
432	u_char  apic_id;
433	u_char  apic_version;
434	u_char  apic_flags;
435	void   *apic_address;
436}      *io_apic_entry_ptr;
437
438typedef struct INTENTRY {
439	u_char  type;
440	u_char  int_type;
441	u_short int_flags;
442	u_char  src_bus_id;
443	u_char  src_bus_irq;
444	u_char  dst_apic_id;
445	u_char  dst_apic_int;
446}      *int_entry_ptr;
447/* descriptions of MP basetable entries */
448typedef struct BASETABLE_ENTRY {
449	u_char  type;
450	u_char  length;
451	char    name[16];
452}       basetable_entry;
453
454static basetable_entry basetable_entry_types[] =
455{
456	{0, 20, "Processor"},
457	{1, 8, "Bus"},
458	{2, 8, "I/O APIC"},
459	{3, 8, "I/O INT"},
460	{4, 8, "Local INT"}
461};
462
463typedef struct BUSDATA {
464	u_char  bus_id;
465	enum busTypes bus_type;
466}       bus_datum;
467
468typedef struct INTDATA {
469	u_char  int_type;
470	u_short int_flags;
471	u_char  src_bus_id;
472	u_char  src_bus_irq;
473	u_char  dst_apic_id;
474	u_char  dst_apic_int;
475}       io_int, local_int;
476
477typedef struct BUSTYPENAME {
478	u_char  type;
479	char    name[7];
480}       bus_type_name;
481
482static bus_type_name bus_type_table[] =
483{
484	{CBUS, "CBUS"},
485	{CBUSII, "CBUSII"},
486	{EISA, "EISA"},
487	{UNKNOWN_BUSTYPE, "---"},
488	{UNKNOWN_BUSTYPE, "---"},
489	{ISA, "ISA"},
490	{UNKNOWN_BUSTYPE, "---"},
491	{UNKNOWN_BUSTYPE, "---"},
492	{UNKNOWN_BUSTYPE, "---"},
493	{UNKNOWN_BUSTYPE, "---"},
494	{UNKNOWN_BUSTYPE, "---"},
495	{UNKNOWN_BUSTYPE, "---"},
496	{PCI, "PCI"},
497	{UNKNOWN_BUSTYPE, "---"},
498	{UNKNOWN_BUSTYPE, "---"},
499	{UNKNOWN_BUSTYPE, "---"},
500	{UNKNOWN_BUSTYPE, "---"},
501	{XPRESS, "XPRESS"},
502	{UNKNOWN_BUSTYPE, "---"}
503};
504/* from MP spec v1.4, table 5-1 */
505static int default_data[7][5] =
506{
507/*   nbus, id0, type0, id1, type1 */
508	{1, 0, ISA, 255, 255},
509	{1, 0, EISA, 255, 255},
510	{1, 0, EISA, 255, 255},
511	{0, 255, 255, 255, 255},/* MCA not supported */
512	{2, 0, ISA, 1, PCI},
513	{2, 0, EISA, 1, PCI},
514	{0, 255, 255, 255, 255}	/* MCA not supported */
515};
516
517
518/* the bus data */
519bus_datum bus_data[NBUS];
520
521/* the IO INT data, one entry per possible APIC INTerrupt */
522io_int  io_apic_ints[NINTR];
523
524static int nintrs;
525
526static void fix_mp_table	__P((void));
527static int processor_entry	__P((proc_entry_ptr entry, int cpu));
528static int bus_entry		__P((bus_entry_ptr entry, int bus));
529static int io_apic_entry	__P((io_apic_entry_ptr entry, int apic));
530static int int_entry		__P((int_entry_ptr entry, int intr));
531static int lookup_bus_type	__P((char *name));
532
533
534/*
535 * parse an Intel MP specification table
536 */
537static int
538preparse_mp_table(void)
539{
540	int	x;
541	mpfps_t	fps;
542	mpcth_t	cth;
543	int	totalSize;
544	void*	position;
545	int	count;
546	int	type;
547	int	mustpanic;
548
549	mustpanic = 0;
550
551	/* clear physical APIC ID to logical CPU/IO table */
552	for (x = 0; x < NAPICID; ++x)
553		ID_TO_IO(x) = -1;
554
555	/* clear logical CPU to APIC ID table */
556	for (x = 0; x < NAPICID; ++x)
557		CPU_TO_ID(x) = -1;
558
559	/* clear logical IO to APIC ID table */
560	for (x = 0; x < NAPICID; ++x)
561		IO_TO_ID(x) = -1;
562
563	/* clear IO APIC address table */
564	for (x = 0; x < NAPICID; ++x)
565		io_apic_address[x] = ~0;
566
567	/* local pointer */
568	fps = (mpfps_t) mpfps;
569
570	/* init everything to empty */
571	mp_naps = 0;
572	mp_nbusses = 0;
573	mp_napics = 0;
574	nintrs = 0;
575
576	/* check for use of 'default' configuration */
577	if (fps->mpfb1 != 0) {
578		/* use default addresses */
579		cpu_apic_address = DEFAULT_APIC_BASE;
580		io_apic_address[0] = DEFAULT_IO_APIC_BASE;
581
582		/* fill in with defaults */
583		mp_naps = 1;
584		mp_nbusses = default_data[fps->mpfb1 - 1][0];
585#if defined(APIC_IO)
586		mp_napics = 1;
587		nintrs = 16;
588#endif	/* APIC_IO */
589	}
590	else {
591		if ((cth = fps->pap) == 0)
592			panic("MP Configuration Table Header MISSING!");
593
594		cpu_apic_address = (vm_offset_t) cth->apic_address;
595
596		/* walk the table, recording info of interest */
597		totalSize = cth->base_table_length - sizeof(struct MPCTH);
598		position = (u_char *) cth + sizeof(struct MPCTH);
599		count = cth->entry_count;
600
601		while (count--) {
602			switch (type = *(u_char *) position) {
603			case 0: /* processor_entry */
604				if (((proc_entry_ptr)position)->cpu_flags
605					& PROCENTRY_FLAG_EN)
606					++mp_naps;
607				break;
608			case 1: /* bus_entry */
609				++mp_nbusses;
610				break;
611			case 2: /* io_apic_entry */
612				if (((io_apic_entry_ptr)position)->apic_flags
613					& IOAPICENTRY_FLAG_EN)
614					io_apic_address[mp_napics++] =
615					    (vm_offset_t)((io_apic_entry_ptr)
616						position)->apic_address;
617				break;
618			case 3: /* int_entry */
619				++nintrs;
620				break;
621			case 4:	/* int_entry */
622				break;
623			default:
624				panic("mpfps Base Table HOSED!");
625				/* NOTREACHED */
626			}
627
628			totalSize -= basetable_entry_types[type].length;
629			(u_char*)position += basetable_entry_types[type].length;
630		}
631	}
632
633	/* qualify the numbers */
634	if (mp_naps > NCPU)
635		printf("Warning: only using %d of %d available CPUs!\n",
636			NCPU, mp_naps);
637#if 0
638		/** XXX we consider this legal now (but should we?) */
639		mustpanic = 1;
640#endif
641	if (mp_nbusses > NBUS) {
642		printf("found %d busses, increase NBUS\n", mp_nbusses);
643		mustpanic = 1;
644	}
645	if (mp_napics > NAPIC) {
646		printf("found %d apics, increase NAPIC\n", mp_napics);
647		mustpanic = 1;
648	}
649	if (nintrs > NINTR) {
650		printf("found %d intrs, increase NINTR\n", nintrs);
651		mustpanic = 1;
652	}
653
654	/*
655	 * Count the BSP.
656	 * This is also used as a counter while starting the APs.
657	 */
658	mp_ncpus = 1;
659
660	--mp_naps;	/* subtract the BSP */
661
662	return mustpanic;
663}
664
665
666/*
667 * parse an Intel MP specification table
668 */
669static int
670parse_mp_table(void)
671{
672	int     x;
673	mpfps_t fps;
674	mpcth_t cth;
675	int     totalSize;
676	void*   position;
677	int     count;
678	int     type;
679	int     apic, bus, cpu, intr;
680
681	/* clear bus data table */
682	for (x = 0; x < NBUS; ++x)
683		bus_data[x].bus_id = 0xff;
684
685	/* clear IO APIC INT table */
686	for (x = 0; x < NINTR; ++x)
687		io_apic_ints[x].int_type = 0xff;
688
689	/* setup the cpu/apic mapping arrays */
690	boot_cpu_id = -1;
691
692	/* local pointer */
693	fps = (mpfps_t) mpfps;
694
695	/* record whether PIC or virtual-wire mode */
696	picmode = (fps->mpfb2 & 0x80) ? 1 : 0;
697
698	/* check for use of 'default' configuration */
699#if defined(TEST_DEFAULT_CONFIG)
700	return TEST_DEFAULT_CONFIG;
701#else
702	if (fps->mpfb1 != 0)
703		return fps->mpfb1;	/* return default configuration type */
704#endif	/* TEST_DEFAULT_CONFIG */
705
706	if ((cth = fps->pap) == 0)
707		panic("MP Configuration Table Header MISSING!");
708
709	/* walk the table, recording info of interest */
710	totalSize = cth->base_table_length - sizeof(struct MPCTH);
711	position = (u_char *) cth + sizeof(struct MPCTH);
712	count = cth->entry_count;
713	apic = bus = intr = 0;
714	cpu = 1;				/* pre-count the BSP */
715
716	while (count--) {
717		switch (type = *(u_char *) position) {
718		case 0:
719			if (processor_entry(position, cpu))
720				++cpu;
721			break;
722		case 1:
723			if (bus_entry(position, bus))
724				++bus;
725			break;
726		case 2:
727			if (io_apic_entry(position, apic))
728				++apic;
729			break;
730		case 3:
731			if (int_entry(position, intr))
732				++intr;
733			break;
734		case 4:
735			/* int_entry(position); */
736			break;
737		default:
738			panic("mpfps Base Table HOSED!");
739			/* NOTREACHED */
740		}
741
742		totalSize -= basetable_entry_types[type].length;
743		(u_char *) position += basetable_entry_types[type].length;
744	}
745
746	if (boot_cpu_id == -1)
747		panic("NO BSP found!");
748
749	/* post scan cleanup */
750	fix_mp_table();
751
752	/* report fact that its NOT a default configuration */
753	return 0;
754}
755
756
757/*
758 * parse an Intel MP specification table
759 */
760static void
761fix_mp_table(void)
762{
763	int	x;
764	int	id;
765	int	bus_0;
766	int	bus_pci;
767	int	num_pci_bus;
768
769	/*
770	 * Fix mis-numbering of the PCI bus and its INT entries if the BIOS
771	 * did it wrong.  The MP spec says that when more than 1 PCI bus
772	 * exists the BIOS must begin with bus entries for the PCI bus and use
773	 * actual PCI bus numbering.  This implies that when only 1 PCI bus
774	 * exists the BIOS can choose to ignore this ordering, and indeed many
775	 * MP motherboards do ignore it.  This causes a problem when the PCI
776	 * sub-system makes requests of the MP sub-system based on PCI bus
777	 * numbers.	So here we look for the situation and renumber the
778	 * busses and associated INTs in an effort to "make it right".
779	 */
780
781	/* find bus 0, PCI bus, count the number of PCI busses */
782	for (num_pci_bus = 0, x = 0; x < mp_nbusses; ++x) {
783		if (bus_data[x].bus_id == 0) {
784			bus_0 = x;
785		}
786		if (bus_data[x].bus_type == PCI) {
787			++num_pci_bus;
788			bus_pci = x;
789		}
790	}
791	/*
792	 * bus_0 == slot of bus with ID of 0
793	 * bus_pci == slot of last PCI bus encountered
794	 */
795
796	/* check the 1 PCI bus case for sanity */
797	if (num_pci_bus == 1) {
798
799		/* if it is number 0 all is well */
800		if (bus_data[bus_pci].bus_id == 0)
801			return;
802
803		/* mis-numbered, swap with whichever bus uses slot 0 */
804
805		/* swap the bus entry types */
806		bus_data[bus_pci].bus_type = bus_data[bus_0].bus_type;
807		bus_data[bus_0].bus_type = PCI;
808
809		/* swap each relavant INTerrupt entry */
810		id = bus_data[bus_pci].bus_id;
811		for (x = 0; x < nintrs; ++x) {
812			if (io_apic_ints[x].src_bus_id == id) {
813				io_apic_ints[x].src_bus_id = 0;
814			}
815			else if (io_apic_ints[x].src_bus_id == 0) {
816				io_apic_ints[x].src_bus_id = id;
817			}
818		}
819	}
820	/* sanity check if more than 1 PCI bus */
821	else if (num_pci_bus > 1) {
822		for (x = 0; x < mp_nbusses; ++x) {
823			if (bus_data[x].bus_type != PCI)
824				continue;
825			if (bus_data[x].bus_id >= num_pci_bus )
826				panic("bad PCI bus numbering");
827		}
828	}
829}
830
831
832static int
833processor_entry(proc_entry_ptr entry, int cpu)
834{
835	/* check for usability */
836	if ((cpu >= NCPU) || !(entry->cpu_flags & PROCENTRY_FLAG_EN))
837		return 0;
838
839	/* check for BSP flag */
840	if (entry->cpu_flags & PROCENTRY_FLAG_BP) {
841		boot_cpu_id = entry->apic_id;
842		CPU_TO_ID(0) = entry->apic_id;
843		ID_TO_CPU(entry->apic_id) = 0;
844		return 0;	/* its already been counted */
845	}
846
847	/* add another AP to list, if less than max number of CPUs */
848	else {
849		CPU_TO_ID(cpu) = entry->apic_id;
850		ID_TO_CPU(entry->apic_id) = cpu;
851		return 1;
852	}
853}
854
855
856static int
857bus_entry(bus_entry_ptr entry, int bus)
858{
859	int     x;
860	char    c, name[8];
861
862	/* encode the name into an index */
863	for (x = 0; x < 6; ++x) {
864		if ((c = entry->bus_type[x]) == ' ')
865			break;
866		name[x] = c;
867	}
868	name[x] = '\0';
869
870	if ((x = lookup_bus_type(name)) == UNKNOWN_BUSTYPE)
871		panic("unknown bus type: '%s'", name);
872
873	bus_data[bus].bus_id = entry->bus_id;
874	bus_data[bus].bus_type = x;
875
876	return 1;
877}
878
879
880static int
881io_apic_entry(io_apic_entry_ptr entry, int apic)
882{
883	if (!(entry->apic_flags & IOAPICENTRY_FLAG_EN))
884		return 0;
885
886	IO_TO_ID(apic) = entry->apic_id;
887	ID_TO_IO(entry->apic_id) = apic;
888
889	return 1;
890}
891
892
893static int
894lookup_bus_type(char *name)
895{
896	int     x;
897
898	for (x = 0; x < MAX_BUSTYPE; ++x)
899		if (strcmp(bus_type_table[x].name, name) == 0)
900			return bus_type_table[x].type;
901
902	return UNKNOWN_BUSTYPE;
903}
904
905
906static int
907int_entry(int_entry_ptr entry, int intr)
908{
909	io_apic_ints[intr].int_type = entry->int_type;
910	io_apic_ints[intr].int_flags = entry->int_flags;
911	io_apic_ints[intr].src_bus_id = entry->src_bus_id;
912	io_apic_ints[intr].src_bus_irq = entry->src_bus_irq;
913	io_apic_ints[intr].dst_apic_id = entry->dst_apic_id;
914	io_apic_ints[intr].dst_apic_int = entry->dst_apic_int;
915
916	return 1;
917}
918
919
920static int
921apic_int_is_bus_type(int intr, int bus_type)
922{
923	int     bus;
924
925	for (bus = 0; bus < mp_nbusses; ++bus)
926		if ((bus_data[bus].bus_id == io_apic_ints[intr].src_bus_id)
927		    && ((int) bus_data[bus].bus_type == bus_type))
928			return 1;
929
930	return 0;
931}
932
933
934/*
935 * determine which APIC pin an ISA INT is attached to.
936 */
937#define INTTYPE(I)	(io_apic_ints[(I)].int_type)
938#define INTPIN(I)	(io_apic_ints[(I)].dst_apic_int)
939
940#define SRCBUSIRQ(I)	(io_apic_ints[(I)].src_bus_irq)
941int
942get_isa_apic_irq(int isaIRQ)
943{
944	int     intr;
945
946#if defined(SMP_TIMER_NC)
947	if (isaIRQ == 0)
948		return -1;
949#endif				/* SMP_TIMER_NC */
950
951	for (intr = 0; intr < nintrs; ++intr)	/* search each INT record */
952		if ((INTTYPE(intr) == 0)
953		    && (SRCBUSIRQ(intr) == isaIRQ))	/* a candidate IRQ */
954			if (apic_int_is_bus_type(intr, ISA))	/* check bus match */
955				return INTPIN(intr);	/* exact match */
956
957	return -1;		/* NOT found */
958}
959#undef SRCBUSIRQ
960
961
962/*
963 *
964 */
965u_int
966get_isa_apic_mask(u_int isaMASK)
967{
968	int apicpin, isairq;
969
970	isairq = ffs(isaMASK);
971	if (isairq == 0) {
972		return 0;
973	}
974	--isairq;
975
976	apicpin = get_isa_apic_irq( isairq );
977	if (apicpin == -1) {
978		apicpin = get_eisa_apic_irq( isairq );
979		if (apicpin == -1) {
980			return 0;
981		}
982	}
983
984	return (1 << apicpin);
985}
986
987
988/*
989 * determine which APIC pin an EISA INT is attached to.
990 */
991#define SRCBUSIRQ(I)	(io_apic_ints[(I)].src_bus_irq)
992int
993get_eisa_apic_irq(int eisaIRQ)
994{
995	int     intr;
996
997#if defined(SMP_TIMER_NC)
998	if (eisaIRQ == 0)
999		return -1;
1000#endif				/* SMP_TIMER_NC */
1001
1002	for (intr = 0; intr < nintrs; ++intr)	/* search each INT record */
1003		if ((INTTYPE(intr) == 0)
1004		    && (SRCBUSIRQ(intr) == eisaIRQ))	/* a candidate IRQ */
1005			if (apic_int_is_bus_type(intr, EISA))	/* check bus match */
1006				return INTPIN(intr);	/* exact match */
1007
1008	return -1;		/* NOT found */
1009}
1010#undef SRCBUSIRQ
1011
1012
1013/*
1014 * determine which APIC pin a PCI INT is attached to.
1015 */
1016#define SRCBUSID(I)	(io_apic_ints[(I)].src_bus_id)
1017#define SRCBUSDEVICE(I)	((io_apic_ints[(I)].src_bus_irq >> 2) & 0x1f)
1018#define SRCBUSLINE(I)	(io_apic_ints[(I)].src_bus_irq & 0x03)
1019int
1020get_pci_apic_irq(int pciBus, int pciDevice, int pciInt)
1021{
1022	int     intr;
1023
1024	--pciInt;		/* zero based */
1025
1026	for (intr = 0; intr < nintrs; ++intr)	/* search each record */
1027		if ((INTTYPE(intr) == 0)
1028		    && (SRCBUSID(intr) == pciBus)
1029		    && (SRCBUSDEVICE(intr) == pciDevice)
1030		    && (SRCBUSLINE(intr) == pciInt))	/* a candidate IRQ */
1031			if (apic_int_is_bus_type(intr, PCI))	/* check bus match */
1032				return INTPIN(intr);	/* exact match */
1033
1034	return -1;		/* NOT found */
1035}
1036#undef SRCBUSLINE
1037#undef SRCBUSDEVICE
1038#undef SRCBUSID
1039
1040#undef INTPIN
1041#undef INTTYPE
1042
1043
1044/*
1045 * Reprogram the MB chipset to NOT redirect a PCI INTerrupt
1046 */
1047int
1048undirect_pci_irq(int rirq)
1049{
1050#if defined(READY)
1051	printf("Freeing redirected PCI irq %d.\n", rirq);
1052	/** FIXME: tickle the MB redirector chip */
1053	return ???;
1054#else
1055	printf("Freeing (NOT implemented) redirected PCI irq %d.\n", rirq);
1056	return 0;
1057#endif  /* READY */
1058}
1059
1060
1061/*
1062 * Reprogram the MB chipset to NOT redirect an ISA INTerrupt.
1063 *
1064 * XXX FIXME:
1065 *  Exactly what this means is unclear at this point.  It is a solution
1066 *  for motherboards that redirect the MBIRQ0 pin.  Generically a motherboard
1067 *  could route any of the ISA INTs to upper (>15) IRQ values.  But most would
1068 *  NOT be redirected via MBIRQ0, thus "undirect()ing" them would NOT be an
1069 *  option.
1070 */
1071int
1072undirect_isa_irq(int rirq)
1073{
1074#if defined(READY)
1075	printf("Freeing redirected ISA irq %d.\n", rirq);
1076	/** FIXME: tickle the MB redirector chip */
1077	return ???;
1078#else
1079	printf("Freeing (NOT implemented) redirected ISA irq %d.\n", rirq);
1080	return 0;
1081#endif  /* READY */
1082}
1083
1084
1085/*
1086 * given a bus ID, return:
1087 *  the bus type if found
1088 *  -1 if NOT found
1089 */
1090int
1091apic_bus_type(int id)
1092{
1093	int     x;
1094
1095	for (x = 0; x < mp_nbusses; ++x)
1096		if (bus_data[x].bus_id == id)
1097			return bus_data[x].bus_type;
1098
1099	return -1;
1100}
1101
1102
1103/*
1104 * given a LOGICAL APIC# and pin#, return:
1105 *  the associated src bus ID if found
1106 *  -1 if NOT found
1107 */
1108int
1109apic_src_bus_id(int apic, int pin)
1110{
1111	int     x;
1112
1113	/* search each of the possible INTerrupt sources */
1114	for (x = 0; x < nintrs; ++x)
1115		if ((apic == ID_TO_IO(io_apic_ints[x].dst_apic_id)) &&
1116		    (pin == io_apic_ints[x].dst_apic_int))
1117			return (io_apic_ints[x].src_bus_id);
1118
1119	return -1;		/* NOT found */
1120}
1121
1122
1123/*
1124 * given a LOGICAL APIC# and pin#, return:
1125 *  the associated src bus IRQ if found
1126 *  -1 if NOT found
1127 */
1128int
1129apic_src_bus_irq(int apic, int pin)
1130{
1131	int     x;
1132
1133	for (x = 0; x < nintrs; x++)
1134		if ((apic == ID_TO_IO(io_apic_ints[x].dst_apic_id)) &&
1135		    (pin == io_apic_ints[x].dst_apic_int))
1136			return (io_apic_ints[x].src_bus_irq);
1137
1138	return -1;		/* NOT found */
1139}
1140
1141
1142/*
1143 * given a LOGICAL APIC# and pin#, return:
1144 *  the associated INTerrupt type if found
1145 *  -1 if NOT found
1146 */
1147int
1148apic_int_type(int apic, int pin)
1149{
1150	int     x;
1151
1152	/* search each of the possible INTerrupt sources */
1153	for (x = 0; x < nintrs; ++x)
1154		if ((apic == ID_TO_IO(io_apic_ints[x].dst_apic_id)) &&
1155		    (pin == io_apic_ints[x].dst_apic_int))
1156			return (io_apic_ints[x].int_type);
1157
1158	return -1;		/* NOT found */
1159}
1160
1161
1162/*
1163 * given a LOGICAL APIC# and pin#, return:
1164 *  the associated trigger mode if found
1165 *  -1 if NOT found
1166 */
1167int
1168apic_trigger(int apic, int pin)
1169{
1170	int     x;
1171
1172	/* search each of the possible INTerrupt sources */
1173	for (x = 0; x < nintrs; ++x)
1174		if ((apic == ID_TO_IO(io_apic_ints[x].dst_apic_id)) &&
1175		    (pin == io_apic_ints[x].dst_apic_int))
1176			return ((io_apic_ints[x].int_flags >> 2) & 0x03);
1177
1178	return -1;		/* NOT found */
1179}
1180
1181
1182/*
1183 * given a LOGICAL APIC# and pin#, return:
1184 *  the associated 'active' level if found
1185 *  -1 if NOT found
1186 */
1187int
1188apic_polarity(int apic, int pin)
1189{
1190	int     x;
1191
1192	/* search each of the possible INTerrupt sources */
1193	for (x = 0; x < nintrs; ++x)
1194		if ((apic == ID_TO_IO(io_apic_ints[x].dst_apic_id)) &&
1195		    (pin == io_apic_ints[x].dst_apic_int))
1196			return (io_apic_ints[x].int_flags & 0x03);
1197
1198	return -1;		/* NOT found */
1199}
1200
1201
1202/*
1203 * set data according to MP defaults
1204 * FIXME: probably not complete yet...
1205 */
1206static void
1207default_mp_table(int type)
1208{
1209	int     ap_cpu_id;
1210#if defined(APIC_IO)
1211	u_int32_t ux;
1212	int     io_apic_id;
1213	int     pin;
1214#endif	/* APIC_IO */
1215
1216#if 0
1217	printf("  MP default config type: %d\n", type);
1218	switch (type) {
1219	case 1:
1220		printf("   bus: ISA, APIC: 82489DX\n");
1221		break;
1222	case 2:
1223		printf("   bus: EISA, APIC: 82489DX\n");
1224		break;
1225	case 3:
1226		printf("   bus: EISA, APIC: 82489DX\n");
1227		break;
1228	case 4:
1229		printf("   bus: MCA, APIC: 82489DX\n");
1230		break;
1231	case 5:
1232		printf("   bus: ISA+PCI, APIC: Integrated\n");
1233		break;
1234	case 6:
1235		printf("   bus: EISA+PCI, APIC: Integrated\n");
1236		break;
1237	case 7:
1238		printf("   bus: MCA+PCI, APIC: Integrated\n");
1239		break;
1240	default:
1241		printf("   future type\n");
1242		break;
1243		/* NOTREACHED */
1244	}
1245#endif	/* 0 */
1246
1247	boot_cpu_id = (apic_base[APIC_ID] & APIC_ID_MASK) >> 24;
1248	ap_cpu_id = (boot_cpu_id == 0) ? 1 : 0;
1249
1250	/* BSP */
1251	CPU_TO_ID(0) = boot_cpu_id;
1252	ID_TO_CPU(boot_cpu_id) = 0;
1253
1254	/* one and only AP */
1255	CPU_TO_ID(1) = ap_cpu_id;
1256	ID_TO_CPU(ap_cpu_id) = 1;
1257
1258#if defined(APIC_IO)
1259	/* one and only IO APIC */
1260	io_apic_id = (io_apic_read(0, IOAPIC_ID) & APIC_ID_MASK) >> 24;
1261
1262	/*
1263	 * sanity check, refer to MP spec section 3.6.6, last paragraph
1264	 * necessary as some hardware isn't properly setting up the IO APIC
1265	 */
1266#if defined(REALLY_ANAL_IOAPICID_VALUE)
1267	if (io_apic_id != 2) {
1268#else
1269	if ((io_apic_id == 0) || (io_apic_id == 1) || (io_apic_id == 15)) {
1270#endif	/* REALLY_ANAL_IOAPICID_VALUE */
1271		ux = io_apic_read(0, IOAPIC_ID);	/* get current contents */
1272		ux &= ~APIC_ID_MASK;	/* clear the ID field */
1273		ux |= 0x02000000;	/* set it to '2' */
1274		io_apic_write(0, IOAPIC_ID, ux);	/* write new value */
1275		ux = io_apic_read(0, IOAPIC_ID);	/* re-read && test */
1276		if ((ux & APIC_ID_MASK) != 0x02000000)
1277			panic("can't control IO APIC ID, reg: 0x%08x", ux);
1278		io_apic_id = 2;
1279	}
1280	IO_TO_ID(0) = io_apic_id;
1281	ID_TO_IO(io_apic_id) = 0;
1282#endif	/* APIC_IO */
1283
1284	/* fill out bus entries */
1285	switch (type) {
1286	case 1:
1287	case 2:
1288	case 3:
1289	case 5:
1290	case 6:
1291		bus_data[0].bus_id = default_data[type - 1][1];
1292		bus_data[0].bus_type = default_data[type - 1][2];
1293		bus_data[1].bus_id = default_data[type - 1][3];
1294		bus_data[1].bus_type = default_data[type - 1][4];
1295		break;
1296
1297	/* case 4: case 7:		   MCA NOT supported */
1298	default:		/* illegal/reserved */
1299		panic("BAD default MP config: %d", type);
1300		/* NOTREACHED */
1301	}
1302
1303#if defined(APIC_IO)
1304	/* general cases from MP v1.4, table 5-2 */
1305	for (pin = 0; pin < 16; ++pin) {
1306		io_apic_ints[pin].int_type = 0;
1307		io_apic_ints[pin].int_flags = 0x05;	/* edge-triggered/active-hi */
1308		io_apic_ints[pin].src_bus_id = 0;
1309		io_apic_ints[pin].src_bus_irq = pin;	/* IRQ2 is caught below */
1310		io_apic_ints[pin].dst_apic_id = io_apic_id;
1311		io_apic_ints[pin].dst_apic_int = pin;	/* 1-to-1 correspondence */
1312	}
1313
1314	/* special cases from MP v1.4, table 5-2 */
1315	if (type == 2) {
1316		io_apic_ints[2].int_type = 0xff;	/* N/C */
1317		io_apic_ints[13].int_type = 0xff;	/* N/C */
1318#if !defined(APIC_MIXED_MODE)
1319		/** FIXME: ??? */
1320		panic("sorry, can't support type 2 default yet");
1321#endif	/* APIC_MIXED_MODE */
1322	}
1323	else
1324		io_apic_ints[2].src_bus_irq = 0;	/* ISA IRQ0 is on APIC INT 2 */
1325
1326	if (type == 7)
1327		io_apic_ints[0].int_type = 0xff;	/* N/C */
1328	else
1329		io_apic_ints[0].int_type = 3;	/* vectored 8259 */
1330#endif	/* APIC_IO */
1331}
1332
1333
1334static void install_ap_tramp(u_int boot_addr);
1335static int start_ap(int logicalCpu, u_int boot_addr);
1336
1337/*
1338 * start each AP in our list
1339 */
1340static int
1341start_all_aps(u_int boot_addr)
1342{
1343	int     x;
1344	u_char  mpbiosreason;
1345	u_long  mpbioswarmvec;
1346
1347	/**
1348         * NOTE: this needs further thought:
1349         *        where does it get released?
1350         *        should it be set to empy?
1351         *
1352         * get the initial mp_lock with a count of 1 for the BSP
1353         */
1354	mp_lock = (apic_base[APIC_ID] & APIC_ID_MASK) + 1;
1355
1356	/* initialize BSP's local APIC */
1357	apic_initialize(1);
1358
1359	/* install the AP 1st level boot code */
1360	install_ap_tramp(boot_addr);
1361
1362	/* save the current value of the warm-start vector */
1363	mpbioswarmvec = *((u_long *) WARMBOOT_OFF);
1364	outb(CMOS_REG, BIOS_RESET);
1365	mpbiosreason = inb(CMOS_DATA);
1366
1367	/* start each AP */
1368	for (x = 1; x <= mp_naps; ++x) {
1369
1370		/* setup a vector to our boot code */
1371		*((volatile u_short *) WARMBOOT_OFF) = WARMBOOT_TARGET;
1372		*((volatile u_short *) WARMBOOT_SEG) = (boot_addr >> 4);
1373		outb(CMOS_REG, BIOS_RESET);
1374		outb(CMOS_DATA, BIOS_WARM);	/* 'warm-start' */
1375
1376		/* attempt to start the Application Processor */
1377		CHECK_INIT(99);	/* setup checkpoints */
1378		if (!start_ap(x, boot_addr)) {
1379			printf("AP #%d (PHY# %d) failed!\n", x, CPU_TO_ID(x));
1380			CHECK_PRINT("trace");	/* show checkpoints */
1381			/*
1382			 * better panic as the AP may be running loose
1383			 * somewhere
1384			 */
1385			printf("panic y/n? [n] ");
1386			if (cngetc() != 'n')
1387				panic("bye-bye");
1388		}
1389		CHECK_PRINT("trace");	/* show checkpoints */
1390
1391		/* record its version info */
1392		cpu_apic_versions[x] = cpu_apic_versions[0];
1393	}
1394
1395	/* fill in our (BSP) APIC version */
1396	cpu_apic_versions[0] = apic_base[APIC_VER];
1397
1398	/* restore the warmstart vector */
1399	*(u_long *) WARMBOOT_OFF = mpbioswarmvec;
1400	outb(CMOS_REG, BIOS_RESET);
1401	outb(CMOS_DATA, mpbiosreason);
1402
1403	/* number of APs actually started */
1404	return mp_ncpus - 1;
1405}
1406
1407
1408/*
1409 * load the 1st level AP boot code into base memory.
1410 */
1411
1412/* targets for relocation */
1413extern void bigJump(void);
1414extern void bootCodeSeg(void);
1415extern void bootDataSeg(void);
1416extern void MPentry(void);
1417extern u_int MP_GDT;
1418extern u_int mp_gdtbase;
1419
1420static void
1421install_ap_tramp(u_int boot_addr)
1422{
1423	int     x;
1424	int     size = *(int *) ((u_long) & bootMP_size);
1425	u_char *src = (u_char *) ((u_long) bootMP);
1426	u_char *dst = (u_char *) boot_addr + KERNBASE;
1427	u_int   boot_base = (u_int) bootMP;
1428	u_int8_t *dst8;
1429	u_int16_t *dst16;
1430	u_int32_t *dst32;
1431
1432	for (x = 0; x < size; ++x)
1433		*dst++ = *src++;
1434
1435	/*
1436	 * modify addresses in code we just moved to basemem. unfortunately we
1437	 * need fairly detailed info about mpboot.s for this to work.  changes
1438	 * to mpboot.s might require changes here.
1439	 */
1440
1441	/* boot code is located in KERNEL space */
1442	dst = (u_char *) boot_addr + KERNBASE;
1443
1444	/* modify the lgdt arg */
1445	dst32 = (u_int32_t *) (dst + ((u_int) & mp_gdtbase - boot_base));
1446	*dst32 = boot_addr + ((u_int) & MP_GDT - boot_base);
1447
1448	/* modify the ljmp target for MPentry() */
1449	dst32 = (u_int32_t *) (dst + ((u_int) bigJump - boot_base) + 1);
1450	*dst32 = ((u_int) MPentry - KERNBASE);
1451
1452	/* modify the target for boot code segment */
1453	dst16 = (u_int16_t *) (dst + ((u_int) bootCodeSeg - boot_base));
1454	dst8 = (u_int8_t *) (dst16 + 1);
1455	*dst16 = (u_int) boot_addr & 0xffff;
1456	*dst8 = ((u_int) boot_addr >> 16) & 0xff;
1457
1458	/* modify the target for boot data segment */
1459	dst16 = (u_int16_t *) (dst + ((u_int) bootDataSeg - boot_base));
1460	dst8 = (u_int8_t *) (dst16 + 1);
1461	*dst16 = (u_int) boot_addr & 0xffff;
1462	*dst8 = ((u_int) boot_addr >> 16) & 0xff;
1463}
1464
1465
1466/*
1467 * this function starts the AP (application processor) identified
1468 * by the APIC ID 'physicalCpu'.  It does quite a "song and dance"
1469 * to accomplish this.  This is necessary because of the nuances
1470 * of the different hardware we might encounter.  It ain't pretty,
1471 * but it seems to work.
1472 */
1473static int
1474start_ap(int logical_cpu, u_int boot_addr)
1475{
1476	int     physical_cpu;
1477	int     vector;
1478	int     cpus;
1479	u_long  icr_lo, icr_hi;
1480
1481	/* get the PHYSICAL APIC ID# */
1482	physical_cpu = CPU_TO_ID(logical_cpu);
1483
1484	/* calculate the vector */
1485	vector = (boot_addr >> 12) & 0xff;
1486
1487	/* used as a watchpoint to signal AP startup */
1488	cpus = mp_ncpus;
1489
1490	/*
1491	 * first we do an INIT/RESET IPI this INIT IPI might be run, reseting
1492	 * and running the target CPU. OR this INIT IPI might be latched (P5
1493	 * bug), CPU waiting for STARTUP IPI. OR this INIT IPI might be
1494	 * ignored.
1495	 */
1496
1497	/* setup the address for the target AP */
1498	icr_hi = apic_base[APIC_ICR_HI] & ~APIC_ID_MASK;
1499	icr_hi |= (physical_cpu << 24);
1500	apic_base[APIC_ICR_HI] = icr_hi;
1501
1502	/* do an INIT IPI: assert RESET */
1503	icr_lo = apic_base[APIC_ICR_LOW] & 0xfff00000;
1504	apic_base[APIC_ICR_LOW] = icr_lo | 0x0000c500;
1505
1506	/* wait for pending status end */
1507	while (apic_base[APIC_ICR_LOW] & APIC_DELSTAT_MASK)
1508		 /* spin */ ;
1509
1510	/* do an INIT IPI: deassert RESET */
1511	apic_base[APIC_ICR_LOW] = icr_lo | 0x00008500;
1512
1513	/* wait for pending status end */
1514	u_sleep(10000);		/* wait ~10mS */
1515	while (apic_base[APIC_ICR_LOW] & APIC_DELSTAT_MASK)
1516		 /* spin */ ;
1517
1518	/*
1519	 * next we do a STARTUP IPI: the previous INIT IPI might still be
1520	 * latched, (P5 bug) this 1st STARTUP would then terminate
1521	 * immediately, and the previously started INIT IPI would continue. OR
1522	 * the previous INIT IPI has already run. and this STARTUP IPI will
1523	 * run. OR the previous INIT IPI was ignored. and this STARTUP IPI
1524	 * will run.
1525	 */
1526
1527	/* do a STARTUP IPI */
1528	apic_base[APIC_ICR_LOW] = icr_lo | 0x00000600 | vector;
1529	while (apic_base[APIC_ICR_LOW] & APIC_DELSTAT_MASK)
1530		 /* spin */ ;
1531	u_sleep(200);		/* wait ~200uS */
1532
1533	/*
1534	 * finally we do a 2nd STARTUP IPI: this 2nd STARTUP IPI should run IF
1535	 * the previous STARTUP IPI was cancelled by a latched INIT IPI. OR
1536	 * this STARTUP IPI will be ignored, as only ONE STARTUP IPI is
1537	 * recognized after hardware RESET or INIT IPI.
1538	 */
1539
1540	apic_base[APIC_ICR_LOW] = icr_lo | 0x00000600 | vector;
1541	while (apic_base[APIC_ICR_LOW] & APIC_DELSTAT_MASK)
1542		 /* spin */ ;
1543	u_sleep(200);		/* wait ~200uS */
1544
1545	/* wait for it to start */
1546	set_apic_timer(5000000);/* == 5 seconds */
1547	while (read_apic_timer())
1548		if (mp_ncpus > cpus)
1549			return 1;	/* return SUCCESS */
1550
1551	return 0;		/* return FAILURE */
1552}
1553
1554
1555/*
1556 * Flush the TLB on all other CPU's
1557 *
1558 * XXX: Needs to handshake and wait for completion before proceding.
1559 */
1560void
1561smp_invltlb(void)
1562{
1563#if defined(APIC_IO)
1564	if (smp_active && invltlb_ok)
1565		all_but_self_ipi(ICU_OFFSET + XINVLTLB_OFFSET);
1566#endif  /* APIC_IO */
1567}
1568
1569void
1570invlpg(u_int addr)
1571{
1572	__asm   __volatile("invlpg (%0)"::"r"(addr):"memory");
1573
1574	/* send a message to the other CPUs */
1575	smp_invltlb();
1576}
1577
1578void
1579invltlb(void)
1580{
1581	u_long  temp;
1582
1583	/*
1584	 * This should be implemented as load_cr3(rcr3()) when load_cr3() is
1585	 * inlined.
1586	 */
1587	__asm __volatile("movl %%cr3, %0; movl %0, %%cr3":"=r"(temp) :: "memory");
1588
1589	/* send a message to the other CPUs */
1590	smp_invltlb();
1591}
1592