Deleted Added
full compact
mptable.h (74283) mptable.h (74912)
1/*
2 * Copyright (c) 1996, by Steve Passe
3 * All rights reserved.
4 *
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions
7 * are met:
8 * 1. Redistributions of source code must retain the above copyright
9 * notice, this list of conditions and the following disclaimer.
10 * 2. The name of the developer may NOT be used to endorse or promote products
11 * derived from this software without specific prior written permission.
12 *
13 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
14 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
15 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
16 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
17 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
18 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
19 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
20 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
21 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
22 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
23 * SUCH DAMAGE.
24 *
1/*
2 * Copyright (c) 1996, by Steve Passe
3 * All rights reserved.
4 *
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions
7 * are met:
8 * 1. Redistributions of source code must retain the above copyright
9 * notice, this list of conditions and the following disclaimer.
10 * 2. The name of the developer may NOT be used to endorse or promote products
11 * derived from this software without specific prior written permission.
12 *
13 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
14 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
15 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
16 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
17 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
18 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
19 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
20 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
21 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
22 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
23 * SUCH DAMAGE.
24 *
25 * $FreeBSD: head/sys/i386/include/mptable.h 74283 2001-03-15 05:10:06Z peter $
25 * $FreeBSD: head/sys/i386/include/mptable.h 74912 2001-03-28 09:03:24Z jhb $
26 */
27
28#include "opt_cpu.h"
29
30#ifdef SMP
31#include <machine/smptests.h>
32#else
33#error
34#endif
35
36#include <sys/param.h>
37#include <sys/bus.h>
38#include <sys/systm.h>
39#include <sys/kernel.h>
40#include <sys/proc.h>
41#include <sys/sysctl.h>
42#include <sys/malloc.h>
43#include <sys/memrange.h>
44#include <sys/mutex.h>
45#ifdef BETTER_CLOCK
46#include <sys/dkstat.h>
47#endif
48#include <sys/cons.h> /* cngetc() */
49
50#include <vm/vm.h>
51#include <vm/vm_param.h>
52#include <vm/pmap.h>
53#include <vm/vm_kern.h>
54#include <vm/vm_extern.h>
55#ifdef BETTER_CLOCK
56#include <sys/lock.h>
57#include <vm/vm_map.h>
58#include <sys/user.h>
59#ifdef GPROF
60#include <sys/gmon.h>
61#endif
62#endif
63
64#include <machine/smp.h>
65#include <machine/apic.h>
66#include <machine/atomic.h>
67#include <machine/cpufunc.h>
68#include <machine/mpapic.h>
69#include <machine/psl.h>
70#include <machine/segments.h>
71#include <machine/smptests.h> /** TEST_DEFAULT_CONFIG, TEST_TEST1 */
72#include <machine/tss.h>
73#include <machine/specialreg.h>
74#include <machine/globaldata.h>
75
76#if defined(APIC_IO)
77#include <machine/md_var.h> /* setidt() */
78#include <i386/isa/icu.h> /* IPIs */
79#include <i386/isa/intr_machdep.h> /* IPIs */
80#endif /* APIC_IO */
81
82#if defined(TEST_DEFAULT_CONFIG)
83#define MPFPS_MPFB1 TEST_DEFAULT_CONFIG
84#else
85#define MPFPS_MPFB1 mpfps->mpfb1
86#endif /* TEST_DEFAULT_CONFIG */
87
88#define WARMBOOT_TARGET 0
89#define WARMBOOT_OFF (KERNBASE + 0x0467)
90#define WARMBOOT_SEG (KERNBASE + 0x0469)
91
92#ifdef PC98
93#define BIOS_BASE (0xe8000)
94#define BIOS_SIZE (0x18000)
95#else
96#define BIOS_BASE (0xf0000)
97#define BIOS_SIZE (0x10000)
98#endif
99#define BIOS_COUNT (BIOS_SIZE/4)
100
101#define CMOS_REG (0x70)
102#define CMOS_DATA (0x71)
103#define BIOS_RESET (0x0f)
104#define BIOS_WARM (0x0a)
105
106#define PROCENTRY_FLAG_EN 0x01
107#define PROCENTRY_FLAG_BP 0x02
108#define IOAPICENTRY_FLAG_EN 0x01
109
110
111/* MP Floating Pointer Structure */
112typedef struct MPFPS {
113 char signature[4];
114 void *pap;
115 u_char length;
116 u_char spec_rev;
117 u_char checksum;
118 u_char mpfb1;
119 u_char mpfb2;
120 u_char mpfb3;
121 u_char mpfb4;
122 u_char mpfb5;
123} *mpfps_t;
124
125/* MP Configuration Table Header */
126typedef struct MPCTH {
127 char signature[4];
128 u_short base_table_length;
129 u_char spec_rev;
130 u_char checksum;
131 u_char oem_id[8];
132 u_char product_id[12];
133 void *oem_table_pointer;
134 u_short oem_table_size;
135 u_short entry_count;
136 void *apic_address;
137 u_short extended_table_length;
138 u_char extended_table_checksum;
139 u_char reserved;
140} *mpcth_t;
141
142
143typedef struct PROCENTRY {
144 u_char type;
145 u_char apic_id;
146 u_char apic_version;
147 u_char cpu_flags;
148 u_long cpu_signature;
149 u_long feature_flags;
150 u_long reserved1;
151 u_long reserved2;
152} *proc_entry_ptr;
153
154typedef struct BUSENTRY {
155 u_char type;
156 u_char bus_id;
157 char bus_type[6];
158} *bus_entry_ptr;
159
160typedef struct IOAPICENTRY {
161 u_char type;
162 u_char apic_id;
163 u_char apic_version;
164 u_char apic_flags;
165 void *apic_address;
166} *io_apic_entry_ptr;
167
168typedef struct INTENTRY {
169 u_char type;
170 u_char int_type;
171 u_short int_flags;
172 u_char src_bus_id;
173 u_char src_bus_irq;
174 u_char dst_apic_id;
175 u_char dst_apic_int;
176} *int_entry_ptr;
177
178/* descriptions of MP basetable entries */
179typedef struct BASETABLE_ENTRY {
180 u_char type;
181 u_char length;
182 char name[16];
183} basetable_entry;
184
185/*
186 * this code MUST be enabled here and in mpboot.s.
187 * it follows the very early stages of AP boot by placing values in CMOS ram.
188 * it NORMALLY will never be needed and thus the primitive method for enabling.
189 *
190#define CHECK_POINTS
191 */
192
193#if defined(CHECK_POINTS) && !defined(PC98)
194#define CHECK_READ(A) (outb(CMOS_REG, (A)), inb(CMOS_DATA))
195#define CHECK_WRITE(A,D) (outb(CMOS_REG, (A)), outb(CMOS_DATA, (D)))
196
197#define CHECK_INIT(D); \
198 CHECK_WRITE(0x34, (D)); \
199 CHECK_WRITE(0x35, (D)); \
200 CHECK_WRITE(0x36, (D)); \
201 CHECK_WRITE(0x37, (D)); \
202 CHECK_WRITE(0x38, (D)); \
203 CHECK_WRITE(0x39, (D));
204
205#define CHECK_PRINT(S); \
206 printf("%s: %d, %d, %d, %d, %d, %d\n", \
207 (S), \
208 CHECK_READ(0x34), \
209 CHECK_READ(0x35), \
210 CHECK_READ(0x36), \
211 CHECK_READ(0x37), \
212 CHECK_READ(0x38), \
213 CHECK_READ(0x39));
214
215#else /* CHECK_POINTS */
216
217#define CHECK_INIT(D)
218#define CHECK_PRINT(S)
219
220#endif /* CHECK_POINTS */
221
222/*
223 * Values to send to the POST hardware.
224 */
225#define MP_BOOTADDRESS_POST 0x10
226#define MP_PROBE_POST 0x11
227#define MPTABLE_PASS1_POST 0x12
228
229#define MP_START_POST 0x13
230#define MP_ENABLE_POST 0x14
231#define MPTABLE_PASS2_POST 0x15
232
233#define START_ALL_APS_POST 0x16
234#define INSTALL_AP_TRAMP_POST 0x17
235#define START_AP_POST 0x18
236
237#define MP_ANNOUNCE_POST 0x19
238
239/* used to hold the AP's until we are ready to release them */
240struct mtx ap_boot_mtx;
241
242/** XXX FIXME: where does this really belong, isa.h/isa.c perhaps? */
243int current_postcode;
244
245/** XXX FIXME: what system files declare these??? */
246extern struct region_descriptor r_gdt, r_idt;
247
248int bsp_apic_ready = 0; /* flags useability of BSP apic */
249int mp_ncpus; /* # of CPUs, including BSP */
250int mp_naps; /* # of Applications processors */
251int mp_nbusses; /* # of busses */
252int mp_napics; /* # of IO APICs */
253int boot_cpu_id; /* designated BSP */
254vm_offset_t cpu_apic_address;
255vm_offset_t io_apic_address[NAPICID]; /* NAPICID is more than enough */
256extern int nkpt;
257
258u_int32_t cpu_apic_versions[MAXCPU];
259u_int32_t *io_apic_versions;
260
261#ifdef APIC_INTR_REORDER
262struct {
263 volatile int *location;
264 int bit;
265} apic_isrbit_location[32];
266#endif
267
268struct apic_intmapinfo int_to_apicintpin[APIC_INTMAPSIZE];
269
270/*
271 * APIC ID logical/physical mapping structures.
272 * We oversize these to simplify boot-time config.
273 */
274int cpu_num_to_apic_id[NAPICID];
275int io_num_to_apic_id[NAPICID];
276int apic_id_to_logical[NAPICID];
277
278
279/* Bitmap of all available CPUs */
280u_int all_cpus;
281
282/* AP uses this during bootstrap. Do not staticize. */
283char *bootSTK;
284static int bootAP;
285
286/* Hotwire a 0->4MB V==P mapping */
287extern pt_entry_t *KPTphys;
288
289/* SMP page table page */
290extern pt_entry_t *SMPpt;
291
292struct pcb stoppcbs[MAXCPU];
293
294int smp_started; /* has the system started? */
295int smp_active = 0; /* are the APs allowed to run? */
296SYSCTL_INT(_machdep, OID_AUTO, smp_active, CTLFLAG_RW, &smp_active, 0, "");
297
298/* XXX maybe should be hw.ncpu */
299static int smp_cpus = 1; /* how many cpu's running */
300SYSCTL_INT(_machdep, OID_AUTO, smp_cpus, CTLFLAG_RD, &smp_cpus, 0, "");
301
302int invltlb_ok = 0; /* throttle smp_invltlb() till safe */
303SYSCTL_INT(_machdep, OID_AUTO, invltlb_ok, CTLFLAG_RW, &invltlb_ok, 0, "");
304
305/* Enable forwarding of a signal to a process running on a different CPU */
306static int forward_signal_enabled = 1;
307SYSCTL_INT(_machdep, OID_AUTO, forward_signal_enabled, CTLFLAG_RW,
308 &forward_signal_enabled, 0, "");
309
310/* Enable forwarding of roundrobin to all other cpus */
311static int forward_roundrobin_enabled = 1;
312SYSCTL_INT(_machdep, OID_AUTO, forward_roundrobin_enabled, CTLFLAG_RW,
313 &forward_roundrobin_enabled, 0, "");
314
315
316/*
317 * Local data and functions.
318 */
319
320/* Set to 1 once we're ready to let the APs out of the pen. */
321static volatile int aps_ready = 0;
322
323static int mp_capable;
324static u_int boot_address;
325static u_int base_memory;
326
327static int picmode; /* 0: virtual wire mode, 1: PIC mode */
328static mpfps_t mpfps;
329static int search_for_sig(u_int32_t target, int count);
330static void mp_enable(u_int boot_addr);
331
332static void mptable_pass1(void);
333static int mptable_pass2(void);
334static void default_mp_table(int type);
335static void fix_mp_table(void);
336static void setup_apic_irq_mapping(void);
337static void init_locks(void);
338static int start_all_aps(u_int boot_addr);
339static void install_ap_tramp(u_int boot_addr);
340static int start_ap(int logicalCpu, u_int boot_addr);
341void ap_init(void);
342static int apic_int_is_bus_type(int intr, int bus_type);
343static void release_aps(void *dummy);
344
345/*
346 * initialize all the SMP locks
347 */
348
349/* critical region around IO APIC, apic_imen */
350struct mtx imen_mtx;
351
352/* lock region used by kernel profiling */
353struct mtx mcount_mtx;
354
355#ifdef USE_COMLOCK
356/* locks com (tty) data/hardware accesses: a FASTINTR() */
357struct mtx com_mtx;
358#endif /* USE_COMLOCK */
359
360/* lock around the MP rendezvous */
361static struct mtx smp_rv_mtx;
362
363/* only 1 CPU can panic at a time :) */
364struct mtx panic_mtx;
365
366static void
367init_locks(void)
368{
369 /*
370 * XXX The mcount mutex probably needs to be statically initialized,
371 * since it will be used even in the function calls that get us to this
372 * point.
373 */
374 mtx_init(&mcount_mtx, "mcount", MTX_DEF);
375
376 mtx_init(&smp_rv_mtx, "smp rendezvous", MTX_SPIN);
377 mtx_init(&panic_mtx, "panic", MTX_DEF);
378
379#ifdef USE_COMLOCK
380 mtx_init(&com_mtx, "com", MTX_SPIN);
381#endif /* USE_COMLOCK */
382
383 mtx_init(&ap_boot_mtx, "ap boot", MTX_SPIN);
384}
385
386/*
387 * Calculate usable address in base memory for AP trampoline code.
388 */
389u_int
390mp_bootaddress(u_int basemem)
391{
392 POSTCODE(MP_BOOTADDRESS_POST);
393
394 base_memory = basemem * 1024; /* convert to bytes */
395
396 boot_address = base_memory & ~0xfff; /* round down to 4k boundary */
397 if ((base_memory - boot_address) < bootMP_size)
398 boot_address -= 4096; /* not enough, lower by 4k */
399
400 return boot_address;
401}
402
403
404/*
405 * Look for an Intel MP spec table (ie, SMP capable hardware).
406 */
407int
408mp_probe(void)
409{
410 int x;
411 u_long segment;
412 u_int32_t target;
413
414 POSTCODE(MP_PROBE_POST);
415
416 /* see if EBDA exists */
417 if ((segment = (u_long) * (u_short *) (KERNBASE + 0x40e)) != 0) {
418 /* search first 1K of EBDA */
419 target = (u_int32_t) (segment << 4);
420 if ((x = search_for_sig(target, 1024 / 4)) >= 0)
421 goto found;
422 } else {
423 /* last 1K of base memory, effective 'top of base' passed in */
424 target = (u_int32_t) (base_memory - 0x400);
425 if ((x = search_for_sig(target, 1024 / 4)) >= 0)
426 goto found;
427 }
428
429 /* search the BIOS */
430 target = (u_int32_t) BIOS_BASE;
431 if ((x = search_for_sig(target, BIOS_COUNT)) >= 0)
432 goto found;
433
434 /* nothing found */
435 mpfps = (mpfps_t)0;
436 mp_capable = 0;
437 return 0;
438
439found:
440 /* calculate needed resources */
441 mpfps = (mpfps_t)x;
442 mptable_pass1();
443
444 /* flag fact that we are running multiple processors */
445 mp_capable = 1;
446 return 1;
447}
448
449
450/*
451 * Initialize the SMP hardware and the APIC and start up the AP's.
452 */
453void
454mp_start(void)
455{
456 POSTCODE(MP_START_POST);
457
458 /* look for MP capable motherboard */
459 if (mp_capable)
460 mp_enable(boot_address);
461 else
462 panic("MP hardware not found!");
463}
464
465
466/*
467 * Print various information about the SMP system hardware and setup.
468 */
469void
470mp_announce(void)
471{
472 int x;
473
474 POSTCODE(MP_ANNOUNCE_POST);
475
476 printf("FreeBSD/SMP: Multiprocessor motherboard\n");
477 printf(" cpu0 (BSP): apic id: %2d", CPU_TO_ID(0));
478 printf(", version: 0x%08x", cpu_apic_versions[0]);
479 printf(", at 0x%08x\n", cpu_apic_address);
480 for (x = 1; x <= mp_naps; ++x) {
481 printf(" cpu%d (AP): apic id: %2d", x, CPU_TO_ID(x));
482 printf(", version: 0x%08x", cpu_apic_versions[x]);
483 printf(", at 0x%08x\n", cpu_apic_address);
484 }
485
486#if defined(APIC_IO)
487 for (x = 0; x < mp_napics; ++x) {
488 printf(" io%d (APIC): apic id: %2d", x, IO_TO_ID(x));
489 printf(", version: 0x%08x", io_apic_versions[x]);
490 printf(", at 0x%08x\n", io_apic_address[x]);
491 }
492#else
493 printf(" Warning: APIC I/O disabled\n");
494#endif /* APIC_IO */
495}
496
497/*
498 * AP cpu's call this to sync up protected mode.
499 */
500void
501init_secondary(void)
502{
503 int gsel_tss;
504 int x, myid = bootAP;
505
506 gdt_segs[GPRIV_SEL].ssd_base = (int) &SMP_prvspace[myid];
507 gdt_segs[GPROC0_SEL].ssd_base =
508 (int) &SMP_prvspace[myid].globaldata.gd_common_tss;
509 SMP_prvspace[myid].globaldata.gd_prvspace =
510 &SMP_prvspace[myid].globaldata;
511
512 for (x = 0; x < NGDT; x++) {
513 ssdtosd(&gdt_segs[x], &gdt[myid * NGDT + x].sd);
514 }
515
516 r_gdt.rd_limit = NGDT * sizeof(gdt[0]) - 1;
517 r_gdt.rd_base = (int) &gdt[myid * NGDT];
518 lgdt(&r_gdt); /* does magic intra-segment return */
519
520 lidt(&r_idt);
521
522 lldt(_default_ldt);
523 PCPU_SET(currentldt, _default_ldt);
524
525 gsel_tss = GSEL(GPROC0_SEL, SEL_KPL);
526 gdt[myid * NGDT + GPROC0_SEL].sd.sd_type = SDT_SYS386TSS;
527 PCPU_SET(common_tss.tss_esp0, 0); /* not used until after switch */
528 PCPU_SET(common_tss.tss_ss0, GSEL(GDATA_SEL, SEL_KPL));
529 PCPU_SET(common_tss.tss_ioopt, (sizeof (struct i386tss)) << 16);
530 PCPU_SET(tss_gdt, &gdt[myid * NGDT + GPROC0_SEL].sd);
531 PCPU_SET(common_tssd, *PCPU_GET(tss_gdt));
532 ltr(gsel_tss);
533
534 pmap_set_opt();
535}
536
537
538#if defined(APIC_IO)
539/*
540 * Final configuration of the BSP's local APIC:
541 * - disable 'pic mode'.
542 * - disable 'virtual wire mode'.
543 * - enable NMI.
544 */
545void
546bsp_apic_configure(void)
547{
548 u_char byte;
549 u_int32_t temp;
550
551 /* leave 'pic mode' if necessary */
552 if (picmode) {
553 outb(0x22, 0x70); /* select IMCR */
554 byte = inb(0x23); /* current contents */
555 byte |= 0x01; /* mask external INTR */
556 outb(0x23, byte); /* disconnect 8259s/NMI */
557 }
558
559 /* mask lint0 (the 8259 'virtual wire' connection) */
560 temp = lapic.lvt_lint0;
561 temp |= APIC_LVT_M; /* set the mask */
562 lapic.lvt_lint0 = temp;
563
564 /* setup lint1 to handle NMI */
565 temp = lapic.lvt_lint1;
566 temp &= ~APIC_LVT_M; /* clear the mask */
567 lapic.lvt_lint1 = temp;
568
569 if (bootverbose)
570 apic_dump("bsp_apic_configure()");
571}
572#endif /* APIC_IO */
573
574
575/*******************************************************************
576 * local functions and data
577 */
578
579/*
580 * start the SMP system
581 */
582static void
583mp_enable(u_int boot_addr)
584{
585 int x;
586#if defined(APIC_IO)
587 int apic;
588 u_int ux;
589#endif /* APIC_IO */
590
591 POSTCODE(MP_ENABLE_POST);
592
593 /* turn on 4MB of V == P addressing so we can get to MP table */
594 *(int *)PTD = PG_V | PG_RW | ((uintptr_t)(void *)KPTphys & PG_FRAME);
595 invltlb();
596
597 /* examine the MP table for needed info, uses physical addresses */
598 x = mptable_pass2();
599
600 *(int *)PTD = 0;
601 invltlb();
602
603 /* can't process default configs till the CPU APIC is pmapped */
604 if (x)
605 default_mp_table(x);
606
607 /* post scan cleanup */
608 fix_mp_table();
609 setup_apic_irq_mapping();
610
611#if defined(APIC_IO)
612
613 /* fill the LOGICAL io_apic_versions table */
614 for (apic = 0; apic < mp_napics; ++apic) {
615 ux = io_apic_read(apic, IOAPIC_VER);
616 io_apic_versions[apic] = ux;
617 io_apic_set_id(apic, IO_TO_ID(apic));
618 }
619
620 /* program each IO APIC in the system */
621 for (apic = 0; apic < mp_napics; ++apic)
622 if (io_apic_setup(apic) < 0)
623 panic("IO APIC setup failure");
624
625 /* install a 'Spurious INTerrupt' vector */
626 setidt(XSPURIOUSINT_OFFSET, Xspuriousint,
627 SDT_SYS386IGT, SEL_KPL, GSEL(GCODE_SEL, SEL_KPL));
628
629 /* install an inter-CPU IPI for TLB invalidation */
630 setidt(XINVLTLB_OFFSET, Xinvltlb,
631 SDT_SYS386IGT, SEL_KPL, GSEL(GCODE_SEL, SEL_KPL));
632
633#ifdef BETTER_CLOCK
634 /* install an inter-CPU IPI for reading processor state */
635 setidt(XCPUCHECKSTATE_OFFSET, Xcpucheckstate,
636 SDT_SYS386IGT, SEL_KPL, GSEL(GCODE_SEL, SEL_KPL));
637#endif
638
639 /* install an inter-CPU IPI for all-CPU rendezvous */
640 setidt(XRENDEZVOUS_OFFSET, Xrendezvous,
641 SDT_SYS386IGT, SEL_KPL, GSEL(GCODE_SEL, SEL_KPL));
642
643 /* install an inter-CPU IPI for forcing an additional software trap */
644 setidt(XCPUAST_OFFSET, Xcpuast,
645 SDT_SYS386IGT, SEL_KPL, GSEL(GCODE_SEL, SEL_KPL));
646
647 /* install an inter-CPU IPI for CPU stop/restart */
648 setidt(XCPUSTOP_OFFSET, Xcpustop,
649 SDT_SYS386IGT, SEL_KPL, GSEL(GCODE_SEL, SEL_KPL));
650
651#if defined(TEST_TEST1)
652 /* install a "fake hardware INTerrupt" vector */
653 setidt(XTEST1_OFFSET, Xtest1,
654 SDT_SYS386IGT, SEL_KPL, GSEL(GCODE_SEL, SEL_KPL));
655#endif /** TEST_TEST1 */
656
657#endif /* APIC_IO */
658
659 /* initialize all SMP locks */
660 init_locks();
661
662 /* start each Application Processor */
663 start_all_aps(boot_addr);
664}
665
666
667/*
668 * look for the MP spec signature
669 */
670
671/* string defined by the Intel MP Spec as identifying the MP table */
672#define MP_SIG 0x5f504d5f /* _MP_ */
673#define NEXT(X) ((X) += 4)
674static int
675search_for_sig(u_int32_t target, int count)
676{
677 int x;
678 u_int32_t *addr = (u_int32_t *) (KERNBASE + target);
679
680 for (x = 0; x < count; NEXT(x))
681 if (addr[x] == MP_SIG)
682 /* make array index a byte index */
683 return (target + (x * sizeof(u_int32_t)));
684
685 return -1;
686}
687
688
689static basetable_entry basetable_entry_types[] =
690{
691 {0, 20, "Processor"},
692 {1, 8, "Bus"},
693 {2, 8, "I/O APIC"},
694 {3, 8, "I/O INT"},
695 {4, 8, "Local INT"}
696};
697
698typedef struct BUSDATA {
699 u_char bus_id;
700 enum busTypes bus_type;
701} bus_datum;
702
703typedef struct INTDATA {
704 u_char int_type;
705 u_short int_flags;
706 u_char src_bus_id;
707 u_char src_bus_irq;
708 u_char dst_apic_id;
709 u_char dst_apic_int;
710 u_char int_vector;
711} io_int, local_int;
712
713typedef struct BUSTYPENAME {
714 u_char type;
715 char name[7];
716} bus_type_name;
717
718static bus_type_name bus_type_table[] =
719{
720 {CBUS, "CBUS"},
721 {CBUSII, "CBUSII"},
722 {EISA, "EISA"},
723 {MCA, "MCA"},
724 {UNKNOWN_BUSTYPE, "---"},
725 {ISA, "ISA"},
726 {MCA, "MCA"},
727 {UNKNOWN_BUSTYPE, "---"},
728 {UNKNOWN_BUSTYPE, "---"},
729 {UNKNOWN_BUSTYPE, "---"},
730 {UNKNOWN_BUSTYPE, "---"},
731 {UNKNOWN_BUSTYPE, "---"},
732 {PCI, "PCI"},
733 {UNKNOWN_BUSTYPE, "---"},
734 {UNKNOWN_BUSTYPE, "---"},
735 {UNKNOWN_BUSTYPE, "---"},
736 {UNKNOWN_BUSTYPE, "---"},
737 {XPRESS, "XPRESS"},
738 {UNKNOWN_BUSTYPE, "---"}
739};
740/* from MP spec v1.4, table 5-1 */
741static int default_data[7][5] =
742{
743/* nbus, id0, type0, id1, type1 */
744 {1, 0, ISA, 255, 255},
745 {1, 0, EISA, 255, 255},
746 {1, 0, EISA, 255, 255},
747 {1, 0, MCA, 255, 255},
748 {2, 0, ISA, 1, PCI},
749 {2, 0, EISA, 1, PCI},
750 {2, 0, MCA, 1, PCI}
751};
752
753
754/* the bus data */
755static bus_datum *bus_data;
756
757/* the IO INT data, one entry per possible APIC INTerrupt */
758static io_int *io_apic_ints;
759
760static int nintrs;
761
762static int processor_entry __P((proc_entry_ptr entry, int cpu));
763static int bus_entry __P((bus_entry_ptr entry, int bus));
764static int io_apic_entry __P((io_apic_entry_ptr entry, int apic));
765static int int_entry __P((int_entry_ptr entry, int intr));
766static int lookup_bus_type __P((char *name));
767
768
769/*
770 * 1st pass on motherboard's Intel MP specification table.
771 *
772 * initializes:
773 * mp_ncpus = 1
774 *
775 * determines:
776 * cpu_apic_address (common to all CPUs)
777 * io_apic_address[N]
778 * mp_naps
779 * mp_nbusses
780 * mp_napics
781 * nintrs
782 */
783static void
784mptable_pass1(void)
785{
786 int x;
787 mpcth_t cth;
788 int totalSize;
789 void* position;
790 int count;
791 int type;
792
793 POSTCODE(MPTABLE_PASS1_POST);
794
795 /* clear various tables */
796 for (x = 0; x < NAPICID; ++x) {
797 io_apic_address[x] = ~0; /* IO APIC address table */
798 }
799
800 /* init everything to empty */
801 mp_naps = 0;
802 mp_nbusses = 0;
803 mp_napics = 0;
804 nintrs = 0;
805
806 /* check for use of 'default' configuration */
807 if (MPFPS_MPFB1 != 0) {
808 /* use default addresses */
809 cpu_apic_address = DEFAULT_APIC_BASE;
810 io_apic_address[0] = DEFAULT_IO_APIC_BASE;
811
812 /* fill in with defaults */
813 mp_naps = 2; /* includes BSP */
814 mp_nbusses = default_data[MPFPS_MPFB1 - 1][0];
815#if defined(APIC_IO)
816 mp_napics = 1;
817 nintrs = 16;
818#endif /* APIC_IO */
819 }
820 else {
821 if ((cth = mpfps->pap) == 0)
822 panic("MP Configuration Table Header MISSING!");
823
824 cpu_apic_address = (vm_offset_t) cth->apic_address;
825
826 /* walk the table, recording info of interest */
827 totalSize = cth->base_table_length - sizeof(struct MPCTH);
828 position = (u_char *) cth + sizeof(struct MPCTH);
829 count = cth->entry_count;
830
831 while (count--) {
832 switch (type = *(u_char *) position) {
833 case 0: /* processor_entry */
834 if (((proc_entry_ptr)position)->cpu_flags
835 & PROCENTRY_FLAG_EN)
836 ++mp_naps;
837 break;
838 case 1: /* bus_entry */
839 ++mp_nbusses;
840 break;
841 case 2: /* io_apic_entry */
842 if (((io_apic_entry_ptr)position)->apic_flags
843 & IOAPICENTRY_FLAG_EN)
844 io_apic_address[mp_napics++] =
845 (vm_offset_t)((io_apic_entry_ptr)
846 position)->apic_address;
847 break;
848 case 3: /* int_entry */
849 ++nintrs;
850 break;
851 case 4: /* int_entry */
852 break;
853 default:
854 panic("mpfps Base Table HOSED!");
855 /* NOTREACHED */
856 }
857
858 totalSize -= basetable_entry_types[type].length;
859 (u_char*)position += basetable_entry_types[type].length;
860 }
861 }
862
863 /* qualify the numbers */
864 if (mp_naps > MAXCPU) {
865 printf("Warning: only using %d of %d available CPUs!\n",
866 MAXCPU, mp_naps);
867 mp_naps = MAXCPU;
868 }
869
870 /*
871 * Count the BSP.
872 * This is also used as a counter while starting the APs.
873 */
874 mp_ncpus = 1;
875
876 --mp_naps; /* subtract the BSP */
877}
878
879
880/*
881 * 2nd pass on motherboard's Intel MP specification table.
882 *
883 * sets:
884 * boot_cpu_id
885 * ID_TO_IO(N), phy APIC ID to log CPU/IO table
886 * CPU_TO_ID(N), logical CPU to APIC ID table
887 * IO_TO_ID(N), logical IO to APIC ID table
888 * bus_data[N]
889 * io_apic_ints[N]
890 */
891static int
892mptable_pass2(void)
893{
894 int x;
895 mpcth_t cth;
896 int totalSize;
897 void* position;
898 int count;
899 int type;
900 int apic, bus, cpu, intr;
901 int i, j;
902 int pgeflag;
903
904 POSTCODE(MPTABLE_PASS2_POST);
905
906 pgeflag = 0; /* XXX - Not used under SMP yet. */
907
908 MALLOC(io_apic_versions, u_int32_t *, sizeof(u_int32_t) * mp_napics,
909 M_DEVBUF, M_WAITOK);
910 MALLOC(ioapic, volatile ioapic_t **, sizeof(ioapic_t *) * mp_napics,
911 M_DEVBUF, M_WAITOK);
912 MALLOC(io_apic_ints, io_int *, sizeof(io_int) * (nintrs + 1),
913 M_DEVBUF, M_WAITOK);
914 MALLOC(bus_data, bus_datum *, sizeof(bus_datum) * mp_nbusses,
915 M_DEVBUF, M_WAITOK);
916
917 bzero(ioapic, sizeof(ioapic_t *) * mp_napics);
918
919 for (i = 0; i < mp_napics; i++) {
920 for (j = 0; j < mp_napics; j++) {
921 /* same page frame as a previous IO apic? */
922 if (((vm_offset_t)SMPpt[NPTEPG-2-j] & PG_FRAME) ==
923 (io_apic_address[i] & PG_FRAME)) {
924 ioapic[i] = (ioapic_t *)((u_int)SMP_prvspace
925 + (NPTEPG-2-j) * PAGE_SIZE
926 + (io_apic_address[i] & PAGE_MASK));
927 break;
928 }
929 /* use this slot if available */
930 if (((vm_offset_t)SMPpt[NPTEPG-2-j] & PG_FRAME) == 0) {
931 SMPpt[NPTEPG-2-j] = (pt_entry_t)(PG_V | PG_RW |
932 pgeflag | (io_apic_address[i] & PG_FRAME));
933 ioapic[i] = (ioapic_t *)((u_int)SMP_prvspace
934 + (NPTEPG-2-j) * PAGE_SIZE
935 + (io_apic_address[i] & PAGE_MASK));
936 break;
937 }
938 }
939 }
940
941 /* clear various tables */
942 for (x = 0; x < NAPICID; ++x) {
943 ID_TO_IO(x) = -1; /* phy APIC ID to log CPU/IO table */
944 CPU_TO_ID(x) = -1; /* logical CPU to APIC ID table */
945 IO_TO_ID(x) = -1; /* logical IO to APIC ID table */
946 }
947
948 /* clear bus data table */
949 for (x = 0; x < mp_nbusses; ++x)
950 bus_data[x].bus_id = 0xff;
951
952 /* clear IO APIC INT table */
953 for (x = 0; x < (nintrs + 1); ++x) {
954 io_apic_ints[x].int_type = 0xff;
955 io_apic_ints[x].int_vector = 0xff;
956 }
957
958 /* setup the cpu/apic mapping arrays */
959 boot_cpu_id = -1;
960
961 /* record whether PIC or virtual-wire mode */
962 picmode = (mpfps->mpfb2 & 0x80) ? 1 : 0;
963
964 /* check for use of 'default' configuration */
965 if (MPFPS_MPFB1 != 0)
966 return MPFPS_MPFB1; /* return default configuration type */
967
968 if ((cth = mpfps->pap) == 0)
969 panic("MP Configuration Table Header MISSING!");
970
971 /* walk the table, recording info of interest */
972 totalSize = cth->base_table_length - sizeof(struct MPCTH);
973 position = (u_char *) cth + sizeof(struct MPCTH);
974 count = cth->entry_count;
975 apic = bus = intr = 0;
976 cpu = 1; /* pre-count the BSP */
977
978 while (count--) {
979 switch (type = *(u_char *) position) {
980 case 0:
981 if (processor_entry(position, cpu))
982 ++cpu;
983 break;
984 case 1:
985 if (bus_entry(position, bus))
986 ++bus;
987 break;
988 case 2:
989 if (io_apic_entry(position, apic))
990 ++apic;
991 break;
992 case 3:
993 if (int_entry(position, intr))
994 ++intr;
995 break;
996 case 4:
997 /* int_entry(position); */
998 break;
999 default:
1000 panic("mpfps Base Table HOSED!");
1001 /* NOTREACHED */
1002 }
1003
1004 totalSize -= basetable_entry_types[type].length;
1005 (u_char *) position += basetable_entry_types[type].length;
1006 }
1007
1008 if (boot_cpu_id == -1)
1009 panic("NO BSP found!");
1010
1011 /* report fact that its NOT a default configuration */
1012 return 0;
1013}
1014
1015
1016void
1017assign_apic_irq(int apic, int intpin, int irq)
1018{
1019 int x;
1020
1021 if (int_to_apicintpin[irq].ioapic != -1)
1022 panic("assign_apic_irq: inconsistent table");
1023
1024 int_to_apicintpin[irq].ioapic = apic;
1025 int_to_apicintpin[irq].int_pin = intpin;
1026 int_to_apicintpin[irq].apic_address = ioapic[apic];
1027 int_to_apicintpin[irq].redirindex = IOAPIC_REDTBL + 2 * intpin;
1028
1029 for (x = 0; x < nintrs; x++) {
1030 if ((io_apic_ints[x].int_type == 0 ||
1031 io_apic_ints[x].int_type == 3) &&
1032 io_apic_ints[x].int_vector == 0xff &&
1033 io_apic_ints[x].dst_apic_id == IO_TO_ID(apic) &&
1034 io_apic_ints[x].dst_apic_int == intpin)
1035 io_apic_ints[x].int_vector = irq;
1036 }
1037}
1038
1039void
1040revoke_apic_irq(int irq)
1041{
1042 int x;
1043 int oldapic;
1044 int oldintpin;
1045
1046 if (int_to_apicintpin[irq].ioapic == -1)
1047 panic("assign_apic_irq: inconsistent table");
1048
1049 oldapic = int_to_apicintpin[irq].ioapic;
1050 oldintpin = int_to_apicintpin[irq].int_pin;
1051
1052 int_to_apicintpin[irq].ioapic = -1;
1053 int_to_apicintpin[irq].int_pin = 0;
1054 int_to_apicintpin[irq].apic_address = NULL;
1055 int_to_apicintpin[irq].redirindex = 0;
1056
1057 for (x = 0; x < nintrs; x++) {
1058 if ((io_apic_ints[x].int_type == 0 ||
1059 io_apic_ints[x].int_type == 3) &&
1060 io_apic_ints[x].int_vector == 0xff &&
1061 io_apic_ints[x].dst_apic_id == IO_TO_ID(oldapic) &&
1062 io_apic_ints[x].dst_apic_int == oldintpin)
1063 io_apic_ints[x].int_vector = 0xff;
1064 }
1065}
1066
1067
1068static void
1069allocate_apic_irq(int intr)
1070{
1071 int apic;
1072 int intpin;
1073 int irq;
1074
1075 if (io_apic_ints[intr].int_vector != 0xff)
1076 return; /* Interrupt handler already assigned */
1077
1078 if (io_apic_ints[intr].int_type != 0 &&
1079 (io_apic_ints[intr].int_type != 3 ||
1080 (io_apic_ints[intr].dst_apic_id == IO_TO_ID(0) &&
1081 io_apic_ints[intr].dst_apic_int == 0)))
1082 return; /* Not INT or ExtInt on != (0, 0) */
1083
1084 irq = 0;
1085 while (irq < APIC_INTMAPSIZE &&
1086 int_to_apicintpin[irq].ioapic != -1)
1087 irq++;
1088
1089 if (irq >= APIC_INTMAPSIZE)
1090 return; /* No free interrupt handlers */
1091
1092 apic = ID_TO_IO(io_apic_ints[intr].dst_apic_id);
1093 intpin = io_apic_ints[intr].dst_apic_int;
1094
1095 assign_apic_irq(apic, intpin, irq);
1096 io_apic_setup_intpin(apic, intpin);
1097}
1098
1099
1100static void
1101swap_apic_id(int apic, int oldid, int newid)
1102{
1103 int x;
1104 int oapic;
1105
1106
1107 if (oldid == newid)
1108 return; /* Nothing to do */
1109
1110 printf("Changing APIC ID for IO APIC #%d from %d to %d in MP table\n",
1111 apic, oldid, newid);
1112
1113 /* Swap physical APIC IDs in interrupt entries */
1114 for (x = 0; x < nintrs; x++) {
1115 if (io_apic_ints[x].dst_apic_id == oldid)
1116 io_apic_ints[x].dst_apic_id = newid;
1117 else if (io_apic_ints[x].dst_apic_id == newid)
1118 io_apic_ints[x].dst_apic_id = oldid;
1119 }
1120
1121 /* Swap physical APIC IDs in IO_TO_ID mappings */
1122 for (oapic = 0; oapic < mp_napics; oapic++)
1123 if (IO_TO_ID(oapic) == newid)
1124 break;
1125
1126 if (oapic < mp_napics) {
1127 printf("Changing APIC ID for IO APIC #%d from "
1128 "%d to %d in MP table\n",
1129 oapic, newid, oldid);
1130 IO_TO_ID(oapic) = oldid;
1131 }
1132 IO_TO_ID(apic) = newid;
1133}
1134
1135
1136static void
1137fix_id_to_io_mapping(void)
1138{
1139 int x;
1140
1141 for (x = 0; x < NAPICID; x++)
1142 ID_TO_IO(x) = -1;
1143
1144 for (x = 0; x <= mp_naps; x++)
1145 if (CPU_TO_ID(x) < NAPICID)
1146 ID_TO_IO(CPU_TO_ID(x)) = x;
1147
1148 for (x = 0; x < mp_napics; x++)
1149 if (IO_TO_ID(x) < NAPICID)
1150 ID_TO_IO(IO_TO_ID(x)) = x;
1151}
1152
1153
1154static int
1155first_free_apic_id(void)
1156{
1157 int freeid, x;
1158
1159 for (freeid = 0; freeid < NAPICID; freeid++) {
1160 for (x = 0; x <= mp_naps; x++)
1161 if (CPU_TO_ID(x) == freeid)
1162 break;
1163 if (x <= mp_naps)
1164 continue;
1165 for (x = 0; x < mp_napics; x++)
1166 if (IO_TO_ID(x) == freeid)
1167 break;
1168 if (x < mp_napics)
1169 continue;
1170 return freeid;
1171 }
1172 return freeid;
1173}
1174
1175
1176static int
1177io_apic_id_acceptable(int apic, int id)
1178{
1179 int cpu; /* Logical CPU number */
1180 int oapic; /* Logical IO APIC number for other IO APIC */
1181
1182 if (id >= NAPICID)
1183 return 0; /* Out of range */
1184
1185 for (cpu = 0; cpu <= mp_naps; cpu++)
1186 if (CPU_TO_ID(cpu) == id)
1187 return 0; /* Conflict with CPU */
1188
1189 for (oapic = 0; oapic < mp_napics && oapic < apic; oapic++)
1190 if (IO_TO_ID(oapic) == id)
1191 return 0; /* Conflict with other APIC */
1192
1193 return 1; /* ID is acceptable for IO APIC */
1194}
1195
1196
1197/*
1198 * parse an Intel MP specification table
1199 */
1200static void
1201fix_mp_table(void)
1202{
1203 int x;
1204 int id;
1205 int bus_0 = 0; /* Stop GCC warning */
1206 int bus_pci = 0; /* Stop GCC warning */
1207 int num_pci_bus;
1208 int apic; /* IO APIC unit number */
1209 int freeid; /* Free physical APIC ID */
1210 int physid; /* Current physical IO APIC ID */
1211
1212 /*
1213 * Fix mis-numbering of the PCI bus and its INT entries if the BIOS
1214 * did it wrong. The MP spec says that when more than 1 PCI bus
1215 * exists the BIOS must begin with bus entries for the PCI bus and use
1216 * actual PCI bus numbering. This implies that when only 1 PCI bus
1217 * exists the BIOS can choose to ignore this ordering, and indeed many
1218 * MP motherboards do ignore it. This causes a problem when the PCI
1219 * sub-system makes requests of the MP sub-system based on PCI bus
1220 * numbers. So here we look for the situation and renumber the
1221 * busses and associated INTs in an effort to "make it right".
1222 */
1223
1224 /* find bus 0, PCI bus, count the number of PCI busses */
1225 for (num_pci_bus = 0, x = 0; x < mp_nbusses; ++x) {
1226 if (bus_data[x].bus_id == 0) {
1227 bus_0 = x;
1228 }
1229 if (bus_data[x].bus_type == PCI) {
1230 ++num_pci_bus;
1231 bus_pci = x;
1232 }
1233 }
1234 /*
1235 * bus_0 == slot of bus with ID of 0
1236 * bus_pci == slot of last PCI bus encountered
1237 */
1238
1239 /* check the 1 PCI bus case for sanity */
1240 /* if it is number 0 all is well */
1241 if (num_pci_bus == 1 &&
1242 bus_data[bus_pci].bus_id != 0) {
1243
1244 /* mis-numbered, swap with whichever bus uses slot 0 */
1245
1246 /* swap the bus entry types */
1247 bus_data[bus_pci].bus_type = bus_data[bus_0].bus_type;
1248 bus_data[bus_0].bus_type = PCI;
1249
1250 /* swap each relavant INTerrupt entry */
1251 id = bus_data[bus_pci].bus_id;
1252 for (x = 0; x < nintrs; ++x) {
1253 if (io_apic_ints[x].src_bus_id == id) {
1254 io_apic_ints[x].src_bus_id = 0;
1255 }
1256 else if (io_apic_ints[x].src_bus_id == 0) {
1257 io_apic_ints[x].src_bus_id = id;
1258 }
1259 }
1260 }
1261
1262 /* Assign IO APIC IDs.
1263 *
1264 * First try the existing ID. If a conflict is detected, try
1265 * the ID in the MP table. If a conflict is still detected, find
1266 * a free id.
1267 *
1268 * We cannot use the ID_TO_IO table before all conflicts has been
1269 * resolved and the table has been corrected.
1270 */
1271 for (apic = 0; apic < mp_napics; ++apic) { /* For all IO APICs */
1272
1273 /* First try to use the value set by the BIOS */
1274 physid = io_apic_get_id(apic);
1275 if (io_apic_id_acceptable(apic, physid)) {
1276 if (IO_TO_ID(apic) != physid)
1277 swap_apic_id(apic, IO_TO_ID(apic), physid);
1278 continue;
1279 }
1280
1281 /* Then check if the value in the MP table is acceptable */
1282 if (io_apic_id_acceptable(apic, IO_TO_ID(apic)))
1283 continue;
1284
1285 /* Last resort, find a free APIC ID and use it */
1286 freeid = first_free_apic_id();
1287 if (freeid >= NAPICID)
1288 panic("No free physical APIC IDs found");
1289
1290 if (io_apic_id_acceptable(apic, freeid)) {
1291 swap_apic_id(apic, IO_TO_ID(apic), freeid);
1292 continue;
1293 }
1294 panic("Free physical APIC ID not usable");
1295 }
1296 fix_id_to_io_mapping();
1297
1298 /* detect and fix broken Compaq MP table */
1299 if (apic_int_type(0, 0) == -1) {
1300 printf("APIC_IO: MP table broken: 8259->APIC entry missing!\n");
1301 io_apic_ints[nintrs].int_type = 3; /* ExtInt */
1302 io_apic_ints[nintrs].int_vector = 0xff; /* Unassigned */
1303 /* XXX fixme, set src bus id etc, but it doesn't seem to hurt */
1304 io_apic_ints[nintrs].dst_apic_id = IO_TO_ID(0);
1305 io_apic_ints[nintrs].dst_apic_int = 0; /* Pin 0 */
1306 nintrs++;
1307 }
1308}
1309
1310
1311/* Assign low level interrupt handlers */
1312static void
1313setup_apic_irq_mapping(void)
1314{
1315 int x;
1316 int int_vector;
1317
1318 /* Clear array */
1319 for (x = 0; x < APIC_INTMAPSIZE; x++) {
1320 int_to_apicintpin[x].ioapic = -1;
1321 int_to_apicintpin[x].int_pin = 0;
1322 int_to_apicintpin[x].apic_address = NULL;
1323 int_to_apicintpin[x].redirindex = 0;
1324 }
1325
1326 /* First assign ISA/EISA interrupts */
1327 for (x = 0; x < nintrs; x++) {
1328 int_vector = io_apic_ints[x].src_bus_irq;
1329 if (int_vector < APIC_INTMAPSIZE &&
1330 io_apic_ints[x].int_vector == 0xff &&
1331 int_to_apicintpin[int_vector].ioapic == -1 &&
1332 (apic_int_is_bus_type(x, ISA) ||
1333 apic_int_is_bus_type(x, EISA)) &&
1334 io_apic_ints[x].int_type == 0) {
1335 assign_apic_irq(ID_TO_IO(io_apic_ints[x].dst_apic_id),
1336 io_apic_ints[x].dst_apic_int,
1337 int_vector);
1338 }
1339 }
1340
1341 /* Assign ExtInt entry if no ISA/EISA interrupt 0 entry */
1342 for (x = 0; x < nintrs; x++) {
1343 if (io_apic_ints[x].dst_apic_int == 0 &&
1344 io_apic_ints[x].dst_apic_id == IO_TO_ID(0) &&
1345 io_apic_ints[x].int_vector == 0xff &&
1346 int_to_apicintpin[0].ioapic == -1 &&
1347 io_apic_ints[x].int_type == 3) {
1348 assign_apic_irq(0, 0, 0);
1349 break;
1350 }
1351 }
1352 /* PCI interrupt assignment is deferred */
1353}
1354
1355
1356static int
1357processor_entry(proc_entry_ptr entry, int cpu)
1358{
1359 /* check for usability */
1360 if (!(entry->cpu_flags & PROCENTRY_FLAG_EN))
1361 return 0;
1362
1363 if(entry->apic_id >= NAPICID)
1364 panic("CPU APIC ID out of range (0..%d)", NAPICID - 1);
1365 /* check for BSP flag */
1366 if (entry->cpu_flags & PROCENTRY_FLAG_BP) {
1367 boot_cpu_id = entry->apic_id;
1368 CPU_TO_ID(0) = entry->apic_id;
1369 ID_TO_CPU(entry->apic_id) = 0;
1370 return 0; /* its already been counted */
1371 }
1372
1373 /* add another AP to list, if less than max number of CPUs */
1374 else if (cpu < MAXCPU) {
1375 CPU_TO_ID(cpu) = entry->apic_id;
1376 ID_TO_CPU(entry->apic_id) = cpu;
1377 return 1;
1378 }
1379
1380 return 0;
1381}
1382
1383
1384static int
1385bus_entry(bus_entry_ptr entry, int bus)
1386{
1387 int x;
1388 char c, name[8];
1389
1390 /* encode the name into an index */
1391 for (x = 0; x < 6; ++x) {
1392 if ((c = entry->bus_type[x]) == ' ')
1393 break;
1394 name[x] = c;
1395 }
1396 name[x] = '\0';
1397
1398 if ((x = lookup_bus_type(name)) == UNKNOWN_BUSTYPE)
1399 panic("unknown bus type: '%s'", name);
1400
1401 bus_data[bus].bus_id = entry->bus_id;
1402 bus_data[bus].bus_type = x;
1403
1404 return 1;
1405}
1406
1407
1408static int
1409io_apic_entry(io_apic_entry_ptr entry, int apic)
1410{
1411 if (!(entry->apic_flags & IOAPICENTRY_FLAG_EN))
1412 return 0;
1413
1414 IO_TO_ID(apic) = entry->apic_id;
1415 if (entry->apic_id < NAPICID)
1416 ID_TO_IO(entry->apic_id) = apic;
1417
1418 return 1;
1419}
1420
1421
1422static int
1423lookup_bus_type(char *name)
1424{
1425 int x;
1426
1427 for (x = 0; x < MAX_BUSTYPE; ++x)
1428 if (strcmp(bus_type_table[x].name, name) == 0)
1429 return bus_type_table[x].type;
1430
1431 return UNKNOWN_BUSTYPE;
1432}
1433
1434
1435static int
1436int_entry(int_entry_ptr entry, int intr)
1437{
1438 int apic;
1439
1440 io_apic_ints[intr].int_type = entry->int_type;
1441 io_apic_ints[intr].int_flags = entry->int_flags;
1442 io_apic_ints[intr].src_bus_id = entry->src_bus_id;
1443 io_apic_ints[intr].src_bus_irq = entry->src_bus_irq;
1444 if (entry->dst_apic_id == 255) {
1445 /* This signal goes to all IO APICS. Select an IO APIC
1446 with sufficient number of interrupt pins */
1447 for (apic = 0; apic < mp_napics; apic++)
1448 if (((io_apic_read(apic, IOAPIC_VER) &
1449 IOART_VER_MAXREDIR) >> MAXREDIRSHIFT) >=
1450 entry->dst_apic_int)
1451 break;
1452 if (apic < mp_napics)
1453 io_apic_ints[intr].dst_apic_id = IO_TO_ID(apic);
1454 else
1455 io_apic_ints[intr].dst_apic_id = entry->dst_apic_id;
1456 } else
1457 io_apic_ints[intr].dst_apic_id = entry->dst_apic_id;
1458 io_apic_ints[intr].dst_apic_int = entry->dst_apic_int;
1459
1460 return 1;
1461}
1462
1463
1464static int
1465apic_int_is_bus_type(int intr, int bus_type)
1466{
1467 int bus;
1468
1469 for (bus = 0; bus < mp_nbusses; ++bus)
1470 if ((bus_data[bus].bus_id == io_apic_ints[intr].src_bus_id)
1471 && ((int) bus_data[bus].bus_type == bus_type))
1472 return 1;
1473
1474 return 0;
1475}
1476
1477
1478/*
1479 * Given a traditional ISA INT mask, return an APIC mask.
1480 */
1481u_int
1482isa_apic_mask(u_int isa_mask)
1483{
1484 int isa_irq;
1485 int apic_pin;
1486
1487#if defined(SKIP_IRQ15_REDIRECT)
1488 if (isa_mask == (1 << 15)) {
1489 printf("skipping ISA IRQ15 redirect\n");
1490 return isa_mask;
1491 }
1492#endif /* SKIP_IRQ15_REDIRECT */
1493
1494 isa_irq = ffs(isa_mask); /* find its bit position */
1495 if (isa_irq == 0) /* doesn't exist */
1496 return 0;
1497 --isa_irq; /* make it zero based */
1498
1499 apic_pin = isa_apic_irq(isa_irq); /* look for APIC connection */
1500 if (apic_pin == -1)
1501 return 0;
1502
1503 return (1 << apic_pin); /* convert pin# to a mask */
1504}
1505
1506
1507/*
1508 * Determine which APIC pin an ISA/EISA INT is attached to.
1509 */
1510#define INTTYPE(I) (io_apic_ints[(I)].int_type)
1511#define INTPIN(I) (io_apic_ints[(I)].dst_apic_int)
1512#define INTIRQ(I) (io_apic_ints[(I)].int_vector)
1513#define INTAPIC(I) (ID_TO_IO(io_apic_ints[(I)].dst_apic_id))
1514
1515#define SRCBUSIRQ(I) (io_apic_ints[(I)].src_bus_irq)
1516int
1517isa_apic_irq(int isa_irq)
1518{
1519 int intr;
1520
1521 for (intr = 0; intr < nintrs; ++intr) { /* check each record */
1522 if (INTTYPE(intr) == 0) { /* standard INT */
1523 if (SRCBUSIRQ(intr) == isa_irq) {
1524 if (apic_int_is_bus_type(intr, ISA) ||
1525 apic_int_is_bus_type(intr, EISA)) {
1526 if (INTIRQ(intr) == 0xff)
1527 return -1; /* unassigned */
1528 return INTIRQ(intr); /* found */
1529 }
1530 }
1531 }
1532 }
1533 return -1; /* NOT found */
1534}
1535
1536
1537/*
1538 * Determine which APIC pin a PCI INT is attached to.
1539 */
1540#define SRCBUSID(I) (io_apic_ints[(I)].src_bus_id)
1541#define SRCBUSDEVICE(I) ((io_apic_ints[(I)].src_bus_irq >> 2) & 0x1f)
1542#define SRCBUSLINE(I) (io_apic_ints[(I)].src_bus_irq & 0x03)
1543int
1544pci_apic_irq(int pciBus, int pciDevice, int pciInt)
1545{
1546 int intr;
1547
1548 --pciInt; /* zero based */
1549
1550 for (intr = 0; intr < nintrs; ++intr) /* check each record */
1551 if ((INTTYPE(intr) == 0) /* standard INT */
1552 && (SRCBUSID(intr) == pciBus)
1553 && (SRCBUSDEVICE(intr) == pciDevice)
1554 && (SRCBUSLINE(intr) == pciInt)) /* a candidate IRQ */
1555 if (apic_int_is_bus_type(intr, PCI)) {
1556 if (INTIRQ(intr) == 0xff)
1557 allocate_apic_irq(intr);
1558 if (INTIRQ(intr) == 0xff)
1559 return -1; /* unassigned */
1560 return INTIRQ(intr); /* exact match */
1561 }
1562
1563 return -1; /* NOT found */
1564}
1565
1566int
1567next_apic_irq(int irq)
1568{
1569 int intr, ointr;
1570 int bus, bustype;
1571
1572 bus = 0;
1573 bustype = 0;
1574 for (intr = 0; intr < nintrs; intr++) {
1575 if (INTIRQ(intr) != irq || INTTYPE(intr) != 0)
1576 continue;
1577 bus = SRCBUSID(intr);
1578 bustype = apic_bus_type(bus);
1579 if (bustype != ISA &&
1580 bustype != EISA &&
1581 bustype != PCI)
1582 continue;
1583 break;
1584 }
1585 if (intr >= nintrs) {
1586 return -1;
1587 }
1588 for (ointr = intr + 1; ointr < nintrs; ointr++) {
1589 if (INTTYPE(ointr) != 0)
1590 continue;
1591 if (bus != SRCBUSID(ointr))
1592 continue;
1593 if (bustype == PCI) {
1594 if (SRCBUSDEVICE(intr) != SRCBUSDEVICE(ointr))
1595 continue;
1596 if (SRCBUSLINE(intr) != SRCBUSLINE(ointr))
1597 continue;
1598 }
1599 if (bustype == ISA || bustype == EISA) {
1600 if (SRCBUSIRQ(intr) != SRCBUSIRQ(ointr))
1601 continue;
1602 }
1603 if (INTPIN(intr) == INTPIN(ointr))
1604 continue;
1605 break;
1606 }
1607 if (ointr >= nintrs) {
1608 return -1;
1609 }
1610 return INTIRQ(ointr);
1611}
1612#undef SRCBUSLINE
1613#undef SRCBUSDEVICE
1614#undef SRCBUSID
1615#undef SRCBUSIRQ
1616
1617#undef INTPIN
1618#undef INTIRQ
1619#undef INTAPIC
1620#undef INTTYPE
1621
1622
1623/*
1624 * Reprogram the MB chipset to NOT redirect an ISA INTerrupt.
1625 *
1626 * XXX FIXME:
1627 * Exactly what this means is unclear at this point. It is a solution
1628 * for motherboards that redirect the MBIRQ0 pin. Generically a motherboard
1629 * could route any of the ISA INTs to upper (>15) IRQ values. But most would
1630 * NOT be redirected via MBIRQ0, thus "undirect()ing" them would NOT be an
1631 * option.
1632 */
1633int
1634undirect_isa_irq(int rirq)
1635{
1636#if defined(READY)
1637 if (bootverbose)
1638 printf("Freeing redirected ISA irq %d.\n", rirq);
1639 /** FIXME: tickle the MB redirector chip */
1640 return -1;
1641#else
1642 if (bootverbose)
1643 printf("Freeing (NOT implemented) redirected ISA irq %d.\n", rirq);
1644 return 0;
1645#endif /* READY */
1646}
1647
1648
1649/*
1650 * Reprogram the MB chipset to NOT redirect a PCI INTerrupt
1651 */
1652int
1653undirect_pci_irq(int rirq)
1654{
1655#if defined(READY)
1656 if (bootverbose)
1657 printf("Freeing redirected PCI irq %d.\n", rirq);
1658
1659 /** FIXME: tickle the MB redirector chip */
1660 return -1;
1661#else
1662 if (bootverbose)
1663 printf("Freeing (NOT implemented) redirected PCI irq %d.\n",
1664 rirq);
1665 return 0;
1666#endif /* READY */
1667}
1668
1669
1670/*
1671 * given a bus ID, return:
1672 * the bus type if found
1673 * -1 if NOT found
1674 */
1675int
1676apic_bus_type(int id)
1677{
1678 int x;
1679
1680 for (x = 0; x < mp_nbusses; ++x)
1681 if (bus_data[x].bus_id == id)
1682 return bus_data[x].bus_type;
1683
1684 return -1;
1685}
1686
1687
1688/*
1689 * given a LOGICAL APIC# and pin#, return:
1690 * the associated src bus ID if found
1691 * -1 if NOT found
1692 */
1693int
1694apic_src_bus_id(int apic, int pin)
1695{
1696 int x;
1697
1698 /* search each of the possible INTerrupt sources */
1699 for (x = 0; x < nintrs; ++x)
1700 if ((apic == ID_TO_IO(io_apic_ints[x].dst_apic_id)) &&
1701 (pin == io_apic_ints[x].dst_apic_int))
1702 return (io_apic_ints[x].src_bus_id);
1703
1704 return -1; /* NOT found */
1705}
1706
1707
1708/*
1709 * given a LOGICAL APIC# and pin#, return:
1710 * the associated src bus IRQ if found
1711 * -1 if NOT found
1712 */
1713int
1714apic_src_bus_irq(int apic, int pin)
1715{
1716 int x;
1717
1718 for (x = 0; x < nintrs; x++)
1719 if ((apic == ID_TO_IO(io_apic_ints[x].dst_apic_id)) &&
1720 (pin == io_apic_ints[x].dst_apic_int))
1721 return (io_apic_ints[x].src_bus_irq);
1722
1723 return -1; /* NOT found */
1724}
1725
1726
1727/*
1728 * given a LOGICAL APIC# and pin#, return:
1729 * the associated INTerrupt type if found
1730 * -1 if NOT found
1731 */
1732int
1733apic_int_type(int apic, int pin)
1734{
1735 int x;
1736
1737 /* search each of the possible INTerrupt sources */
1738 for (x = 0; x < nintrs; ++x)
1739 if ((apic == ID_TO_IO(io_apic_ints[x].dst_apic_id)) &&
1740 (pin == io_apic_ints[x].dst_apic_int))
1741 return (io_apic_ints[x].int_type);
1742
1743 return -1; /* NOT found */
1744}
1745
1746int
1747apic_irq(int apic, int pin)
1748{
1749 int x;
1750 int res;
1751
1752 for (x = 0; x < nintrs; ++x)
1753 if ((apic == ID_TO_IO(io_apic_ints[x].dst_apic_id)) &&
1754 (pin == io_apic_ints[x].dst_apic_int)) {
1755 res = io_apic_ints[x].int_vector;
1756 if (res == 0xff)
1757 return -1;
1758 if (apic != int_to_apicintpin[res].ioapic)
1759 panic("apic_irq: inconsistent table");
1760 if (pin != int_to_apicintpin[res].int_pin)
1761 panic("apic_irq inconsistent table (2)");
1762 return res;
1763 }
1764 return -1;
1765}
1766
1767
1768/*
1769 * given a LOGICAL APIC# and pin#, return:
1770 * the associated trigger mode if found
1771 * -1 if NOT found
1772 */
1773int
1774apic_trigger(int apic, int pin)
1775{
1776 int x;
1777
1778 /* search each of the possible INTerrupt sources */
1779 for (x = 0; x < nintrs; ++x)
1780 if ((apic == ID_TO_IO(io_apic_ints[x].dst_apic_id)) &&
1781 (pin == io_apic_ints[x].dst_apic_int))
1782 return ((io_apic_ints[x].int_flags >> 2) & 0x03);
1783
1784 return -1; /* NOT found */
1785}
1786
1787
1788/*
1789 * given a LOGICAL APIC# and pin#, return:
1790 * the associated 'active' level if found
1791 * -1 if NOT found
1792 */
1793int
1794apic_polarity(int apic, int pin)
1795{
1796 int x;
1797
1798 /* search each of the possible INTerrupt sources */
1799 for (x = 0; x < nintrs; ++x)
1800 if ((apic == ID_TO_IO(io_apic_ints[x].dst_apic_id)) &&
1801 (pin == io_apic_ints[x].dst_apic_int))
1802 return (io_apic_ints[x].int_flags & 0x03);
1803
1804 return -1; /* NOT found */
1805}
1806
1807
1808/*
1809 * set data according to MP defaults
1810 * FIXME: probably not complete yet...
1811 */
1812static void
1813default_mp_table(int type)
1814{
1815 int ap_cpu_id;
1816#if defined(APIC_IO)
1817 int io_apic_id;
1818 int pin;
1819#endif /* APIC_IO */
1820
1821#if 0
1822 printf(" MP default config type: %d\n", type);
1823 switch (type) {
1824 case 1:
1825 printf(" bus: ISA, APIC: 82489DX\n");
1826 break;
1827 case 2:
1828 printf(" bus: EISA, APIC: 82489DX\n");
1829 break;
1830 case 3:
1831 printf(" bus: EISA, APIC: 82489DX\n");
1832 break;
1833 case 4:
1834 printf(" bus: MCA, APIC: 82489DX\n");
1835 break;
1836 case 5:
1837 printf(" bus: ISA+PCI, APIC: Integrated\n");
1838 break;
1839 case 6:
1840 printf(" bus: EISA+PCI, APIC: Integrated\n");
1841 break;
1842 case 7:
1843 printf(" bus: MCA+PCI, APIC: Integrated\n");
1844 break;
1845 default:
1846 printf(" future type\n");
1847 break;
1848 /* NOTREACHED */
1849 }
1850#endif /* 0 */
1851
1852 boot_cpu_id = (lapic.id & APIC_ID_MASK) >> 24;
1853 ap_cpu_id = (boot_cpu_id == 0) ? 1 : 0;
1854
1855 /* BSP */
1856 CPU_TO_ID(0) = boot_cpu_id;
1857 ID_TO_CPU(boot_cpu_id) = 0;
1858
1859 /* one and only AP */
1860 CPU_TO_ID(1) = ap_cpu_id;
1861 ID_TO_CPU(ap_cpu_id) = 1;
1862
1863#if defined(APIC_IO)
1864 /* one and only IO APIC */
1865 io_apic_id = (io_apic_read(0, IOAPIC_ID) & APIC_ID_MASK) >> 24;
1866
1867 /*
1868 * sanity check, refer to MP spec section 3.6.6, last paragraph
1869 * necessary as some hardware isn't properly setting up the IO APIC
1870 */
1871#if defined(REALLY_ANAL_IOAPICID_VALUE)
1872 if (io_apic_id != 2) {
1873#else
1874 if ((io_apic_id == 0) || (io_apic_id == 1) || (io_apic_id == 15)) {
1875#endif /* REALLY_ANAL_IOAPICID_VALUE */
1876 io_apic_set_id(0, 2);
1877 io_apic_id = 2;
1878 }
1879 IO_TO_ID(0) = io_apic_id;
1880 ID_TO_IO(io_apic_id) = 0;
1881#endif /* APIC_IO */
1882
1883 /* fill out bus entries */
1884 switch (type) {
1885 case 1:
1886 case 2:
1887 case 3:
1888 case 4:
1889 case 5:
1890 case 6:
1891 case 7:
1892 bus_data[0].bus_id = default_data[type - 1][1];
1893 bus_data[0].bus_type = default_data[type - 1][2];
1894 bus_data[1].bus_id = default_data[type - 1][3];
1895 bus_data[1].bus_type = default_data[type - 1][4];
1896 break;
1897
1898 /* case 4: case 7: MCA NOT supported */
1899 default: /* illegal/reserved */
1900 panic("BAD default MP config: %d", type);
1901 /* NOTREACHED */
1902 }
1903
1904#if defined(APIC_IO)
1905 /* general cases from MP v1.4, table 5-2 */
1906 for (pin = 0; pin < 16; ++pin) {
1907 io_apic_ints[pin].int_type = 0;
1908 io_apic_ints[pin].int_flags = 0x05; /* edge/active-hi */
1909 io_apic_ints[pin].src_bus_id = 0;
1910 io_apic_ints[pin].src_bus_irq = pin; /* IRQ2 caught below */
1911 io_apic_ints[pin].dst_apic_id = io_apic_id;
1912 io_apic_ints[pin].dst_apic_int = pin; /* 1-to-1 */
1913 }
1914
1915 /* special cases from MP v1.4, table 5-2 */
1916 if (type == 2) {
1917 io_apic_ints[2].int_type = 0xff; /* N/C */
1918 io_apic_ints[13].int_type = 0xff; /* N/C */
1919#if !defined(APIC_MIXED_MODE)
1920 /** FIXME: ??? */
1921 panic("sorry, can't support type 2 default yet");
1922#endif /* APIC_MIXED_MODE */
1923 }
1924 else
1925 io_apic_ints[2].src_bus_irq = 0; /* ISA IRQ0 is on APIC INT 2 */
1926
1927 if (type == 7)
1928 io_apic_ints[0].int_type = 0xff; /* N/C */
1929 else
1930 io_apic_ints[0].int_type = 3; /* vectored 8259 */
1931#endif /* APIC_IO */
1932}
1933
1934
1935/*
1936 * start each AP in our list
1937 */
1938static int
1939start_all_aps(u_int boot_addr)
1940{
1941 int x, i, pg;
1942 u_char mpbiosreason;
1943 u_long mpbioswarmvec;
1944 struct globaldata *gd;
1945 char *stack;
1946 uintptr_t kptbase;
1947
1948 POSTCODE(START_ALL_APS_POST);
1949
1950 /* initialize BSP's local APIC */
1951 apic_initialize();
1952 bsp_apic_ready = 1;
1953
1954 /* install the AP 1st level boot code */
1955 install_ap_tramp(boot_addr);
1956
1957
1958 /* save the current value of the warm-start vector */
1959 mpbioswarmvec = *((u_long *) WARMBOOT_OFF);
1960#ifndef PC98
1961 outb(CMOS_REG, BIOS_RESET);
1962 mpbiosreason = inb(CMOS_DATA);
1963#endif
1964
1965 /* record BSP in CPU map */
1966 all_cpus = 1;
1967
1968 /* set up temporary P==V mapping for AP boot */
1969 /* XXX this is a hack, we should boot the AP on its own stack/PTD */
1970 kptbase = (uintptr_t)(void *)KPTphys;
1971 for (x = 0; x < NKPT; x++)
1972 PTD[x] = (pd_entry_t)(PG_V | PG_RW |
1973 ((kptbase + x * PAGE_SIZE) & PG_FRAME));
1974 invltlb();
1975
1976 /* start each AP */
1977 for (x = 1; x <= mp_naps; ++x) {
1978
1979 /* This is a bit verbose, it will go away soon. */
1980
1981 /* first page of AP's private space */
1982 pg = x * i386_btop(sizeof(struct privatespace));
1983
1984 /* allocate a new private data page */
1985 gd = (struct globaldata *)kmem_alloc(kernel_map, PAGE_SIZE);
1986
1987 /* wire it into the private page table page */
1988 SMPpt[pg] = (pt_entry_t)(PG_V | PG_RW | vtophys(gd));
1989
1990 /* allocate and set up an idle stack data page */
1991 stack = (char *)kmem_alloc(kernel_map, UPAGES*PAGE_SIZE);
1992 for (i = 0; i < UPAGES; i++)
1993 SMPpt[pg + 1 + i] = (pt_entry_t)
1994 (PG_V | PG_RW | vtophys(PAGE_SIZE * i + stack));
1995
1996 /* prime data page for it to use */
1997 SLIST_INSERT_HEAD(&cpuhead, gd, gd_allcpu);
1998 gd->gd_cpuid = x;
1999
2000 /* setup a vector to our boot code */
2001 *((volatile u_short *) WARMBOOT_OFF) = WARMBOOT_TARGET;
2002 *((volatile u_short *) WARMBOOT_SEG) = (boot_addr >> 4);
2003#ifndef PC98
2004 outb(CMOS_REG, BIOS_RESET);
2005 outb(CMOS_DATA, BIOS_WARM); /* 'warm-start' */
2006#endif
2007
2008 bootSTK = &SMP_prvspace[x].idlestack[UPAGES*PAGE_SIZE];
2009 bootAP = x;
2010
2011 /* attempt to start the Application Processor */
2012 CHECK_INIT(99); /* setup checkpoints */
2013 if (!start_ap(x, boot_addr)) {
2014 printf("AP #%d (PHY# %d) failed!\n", x, CPU_TO_ID(x));
2015 CHECK_PRINT("trace"); /* show checkpoints */
2016 /* better panic as the AP may be running loose */
2017 printf("panic y/n? [y] ");
2018 if (cngetc() != 'n')
2019 panic("bye-bye");
2020 }
2021 CHECK_PRINT("trace"); /* show checkpoints */
2022
2023 /* record its version info */
2024 cpu_apic_versions[x] = cpu_apic_versions[0];
2025
2026 all_cpus |= (1 << x); /* record AP in CPU map */
2027 }
2028
2029 /* build our map of 'other' CPUs */
2030 PCPU_SET(other_cpus, all_cpus & ~(1 << PCPU_GET(cpuid)));
2031
2032 /* fill in our (BSP) APIC version */
2033 cpu_apic_versions[0] = lapic.version;
2034
2035 /* restore the warmstart vector */
2036 *(u_long *) WARMBOOT_OFF = mpbioswarmvec;
2037#ifndef PC98
2038 outb(CMOS_REG, BIOS_RESET);
2039 outb(CMOS_DATA, mpbiosreason);
2040#endif
2041
2042 /*
2043 * Set up the idle context for the BSP. Similar to above except
2044 * that some was done by locore, some by pmap.c and some is implicit
2045 * because the BSP is cpu#0 and the page is initially zero, and also
2046 * because we can refer to variables by name on the BSP..
2047 */
2048
2049 /* Allocate and setup BSP idle stack */
2050 stack = (char *)kmem_alloc(kernel_map, UPAGES * PAGE_SIZE);
2051 for (i = 0; i < UPAGES; i++)
2052 SMPpt[1 + i] = (pt_entry_t)
2053 (PG_V | PG_RW | vtophys(PAGE_SIZE * i + stack));
2054
2055 for (x = 0; x < NKPT; x++)
2056 PTD[x] = 0;
2057 pmap_set_opt();
2058
2059 /* number of APs actually started */
2060 return mp_ncpus - 1;
2061}
2062
2063
2064/*
2065 * load the 1st level AP boot code into base memory.
2066 */
2067
2068/* targets for relocation */
2069extern void bigJump(void);
2070extern void bootCodeSeg(void);
2071extern void bootDataSeg(void);
2072extern void MPentry(void);
2073extern u_int MP_GDT;
2074extern u_int mp_gdtbase;
2075
2076static void
2077install_ap_tramp(u_int boot_addr)
2078{
2079 int x;
2080 int size = *(int *) ((u_long) & bootMP_size);
2081 u_char *src = (u_char *) ((u_long) bootMP);
2082 u_char *dst = (u_char *) boot_addr + KERNBASE;
2083 u_int boot_base = (u_int) bootMP;
2084 u_int8_t *dst8;
2085 u_int16_t *dst16;
2086 u_int32_t *dst32;
2087
2088 POSTCODE(INSTALL_AP_TRAMP_POST);
2089
2090 for (x = 0; x < size; ++x)
2091 *dst++ = *src++;
2092
2093 /*
2094 * modify addresses in code we just moved to basemem. unfortunately we
2095 * need fairly detailed info about mpboot.s for this to work. changes
2096 * to mpboot.s might require changes here.
2097 */
2098
2099 /* boot code is located in KERNEL space */
2100 dst = (u_char *) boot_addr + KERNBASE;
2101
2102 /* modify the lgdt arg */
2103 dst32 = (u_int32_t *) (dst + ((u_int) & mp_gdtbase - boot_base));
2104 *dst32 = boot_addr + ((u_int) & MP_GDT - boot_base);
2105
2106 /* modify the ljmp target for MPentry() */
2107 dst32 = (u_int32_t *) (dst + ((u_int) bigJump - boot_base) + 1);
2108 *dst32 = ((u_int) MPentry - KERNBASE);
2109
2110 /* modify the target for boot code segment */
2111 dst16 = (u_int16_t *) (dst + ((u_int) bootCodeSeg - boot_base));
2112 dst8 = (u_int8_t *) (dst16 + 1);
2113 *dst16 = (u_int) boot_addr & 0xffff;
2114 *dst8 = ((u_int) boot_addr >> 16) & 0xff;
2115
2116 /* modify the target for boot data segment */
2117 dst16 = (u_int16_t *) (dst + ((u_int) bootDataSeg - boot_base));
2118 dst8 = (u_int8_t *) (dst16 + 1);
2119 *dst16 = (u_int) boot_addr & 0xffff;
2120 *dst8 = ((u_int) boot_addr >> 16) & 0xff;
2121}
2122
2123
2124/*
2125 * this function starts the AP (application processor) identified
2126 * by the APIC ID 'physicalCpu'. It does quite a "song and dance"
2127 * to accomplish this. This is necessary because of the nuances
2128 * of the different hardware we might encounter. It ain't pretty,
2129 * but it seems to work.
2130 */
2131static int
2132start_ap(int logical_cpu, u_int boot_addr)
2133{
2134 int physical_cpu;
2135 int vector;
2136 int cpus;
2137 u_long icr_lo, icr_hi;
2138
2139 POSTCODE(START_AP_POST);
2140
2141 /* get the PHYSICAL APIC ID# */
2142 physical_cpu = CPU_TO_ID(logical_cpu);
2143
2144 /* calculate the vector */
2145 vector = (boot_addr >> 12) & 0xff;
2146
2147 /* used as a watchpoint to signal AP startup */
2148 cpus = mp_ncpus;
2149
2150 /*
2151 * first we do an INIT/RESET IPI this INIT IPI might be run, reseting
2152 * and running the target CPU. OR this INIT IPI might be latched (P5
2153 * bug), CPU waiting for STARTUP IPI. OR this INIT IPI might be
2154 * ignored.
2155 */
2156
2157 /* setup the address for the target AP */
2158 icr_hi = lapic.icr_hi & ~APIC_ID_MASK;
2159 icr_hi |= (physical_cpu << 24);
2160 lapic.icr_hi = icr_hi;
2161
2162 /* do an INIT IPI: assert RESET */
2163 icr_lo = lapic.icr_lo & 0xfff00000;
2164 lapic.icr_lo = icr_lo | 0x0000c500;
2165
2166 /* wait for pending status end */
2167 while (lapic.icr_lo & APIC_DELSTAT_MASK)
2168 /* spin */ ;
2169
2170 /* do an INIT IPI: deassert RESET */
2171 lapic.icr_lo = icr_lo | 0x00008500;
2172
2173 /* wait for pending status end */
2174 u_sleep(10000); /* wait ~10mS */
2175 while (lapic.icr_lo & APIC_DELSTAT_MASK)
2176 /* spin */ ;
2177
2178 /*
2179 * next we do a STARTUP IPI: the previous INIT IPI might still be
2180 * latched, (P5 bug) this 1st STARTUP would then terminate
2181 * immediately, and the previously started INIT IPI would continue. OR
2182 * the previous INIT IPI has already run. and this STARTUP IPI will
2183 * run. OR the previous INIT IPI was ignored. and this STARTUP IPI
2184 * will run.
2185 */
2186
2187 /* do a STARTUP IPI */
2188 lapic.icr_lo = icr_lo | 0x00000600 | vector;
2189 while (lapic.icr_lo & APIC_DELSTAT_MASK)
2190 /* spin */ ;
2191 u_sleep(200); /* wait ~200uS */
2192
2193 /*
2194 * finally we do a 2nd STARTUP IPI: this 2nd STARTUP IPI should run IF
2195 * the previous STARTUP IPI was cancelled by a latched INIT IPI. OR
2196 * this STARTUP IPI will be ignored, as only ONE STARTUP IPI is
2197 * recognized after hardware RESET or INIT IPI.
2198 */
2199
2200 lapic.icr_lo = icr_lo | 0x00000600 | vector;
2201 while (lapic.icr_lo & APIC_DELSTAT_MASK)
2202 /* spin */ ;
2203 u_sleep(200); /* wait ~200uS */
2204
2205 /* wait for it to start */
2206 set_apic_timer(5000000);/* == 5 seconds */
2207 while (read_apic_timer())
2208 if (mp_ncpus > cpus)
2209 return 1; /* return SUCCESS */
2210
2211 return 0; /* return FAILURE */
2212}
2213
2214/*
2215 * Flush the TLB on all other CPU's
2216 *
2217 * XXX: Needs to handshake and wait for completion before proceding.
2218 */
2219void
2220smp_invltlb(void)
2221{
2222#if defined(APIC_IO)
2223 if (smp_started && invltlb_ok)
2224 all_but_self_ipi(XINVLTLB_OFFSET);
2225#endif /* APIC_IO */
2226}
2227
2228void
2229invlpg(u_int addr)
2230{
2231 __asm __volatile("invlpg (%0)"::"r"(addr):"memory");
2232
2233 /* send a message to the other CPUs */
2234 smp_invltlb();
2235}
2236
2237void
2238invltlb(void)
2239{
2240 u_long temp;
2241
2242 /*
2243 * This should be implemented as load_cr3(rcr3()) when load_cr3() is
2244 * inlined.
2245 */
2246 __asm __volatile("movl %%cr3, %0; movl %0, %%cr3":"=r"(temp) :: "memory");
2247
2248 /* send a message to the other CPUs */
2249 smp_invltlb();
2250}
2251
2252
2253/*
2254 * This is called once the rest of the system is up and running and we're
2255 * ready to let the AP's out of the pen.
2256 */
2257void
2258ap_init(void)
2259{
2260 u_int apic_id;
2261
2262 /* spin until all the AP's are ready */
2263 while (!aps_ready)
2264 /* spin */ ;
2265
2266 /*
2267 * Set curproc to our per-cpu idleproc so that mutexes have
2268 * something unique to lock with.
2269 */
2270 PCPU_SET(curproc, PCPU_GET(idleproc));
26 */
27
28#include "opt_cpu.h"
29
30#ifdef SMP
31#include <machine/smptests.h>
32#else
33#error
34#endif
35
36#include <sys/param.h>
37#include <sys/bus.h>
38#include <sys/systm.h>
39#include <sys/kernel.h>
40#include <sys/proc.h>
41#include <sys/sysctl.h>
42#include <sys/malloc.h>
43#include <sys/memrange.h>
44#include <sys/mutex.h>
45#ifdef BETTER_CLOCK
46#include <sys/dkstat.h>
47#endif
48#include <sys/cons.h> /* cngetc() */
49
50#include <vm/vm.h>
51#include <vm/vm_param.h>
52#include <vm/pmap.h>
53#include <vm/vm_kern.h>
54#include <vm/vm_extern.h>
55#ifdef BETTER_CLOCK
56#include <sys/lock.h>
57#include <vm/vm_map.h>
58#include <sys/user.h>
59#ifdef GPROF
60#include <sys/gmon.h>
61#endif
62#endif
63
64#include <machine/smp.h>
65#include <machine/apic.h>
66#include <machine/atomic.h>
67#include <machine/cpufunc.h>
68#include <machine/mpapic.h>
69#include <machine/psl.h>
70#include <machine/segments.h>
71#include <machine/smptests.h> /** TEST_DEFAULT_CONFIG, TEST_TEST1 */
72#include <machine/tss.h>
73#include <machine/specialreg.h>
74#include <machine/globaldata.h>
75
76#if defined(APIC_IO)
77#include <machine/md_var.h> /* setidt() */
78#include <i386/isa/icu.h> /* IPIs */
79#include <i386/isa/intr_machdep.h> /* IPIs */
80#endif /* APIC_IO */
81
82#if defined(TEST_DEFAULT_CONFIG)
83#define MPFPS_MPFB1 TEST_DEFAULT_CONFIG
84#else
85#define MPFPS_MPFB1 mpfps->mpfb1
86#endif /* TEST_DEFAULT_CONFIG */
87
88#define WARMBOOT_TARGET 0
89#define WARMBOOT_OFF (KERNBASE + 0x0467)
90#define WARMBOOT_SEG (KERNBASE + 0x0469)
91
92#ifdef PC98
93#define BIOS_BASE (0xe8000)
94#define BIOS_SIZE (0x18000)
95#else
96#define BIOS_BASE (0xf0000)
97#define BIOS_SIZE (0x10000)
98#endif
99#define BIOS_COUNT (BIOS_SIZE/4)
100
101#define CMOS_REG (0x70)
102#define CMOS_DATA (0x71)
103#define BIOS_RESET (0x0f)
104#define BIOS_WARM (0x0a)
105
106#define PROCENTRY_FLAG_EN 0x01
107#define PROCENTRY_FLAG_BP 0x02
108#define IOAPICENTRY_FLAG_EN 0x01
109
110
111/* MP Floating Pointer Structure */
112typedef struct MPFPS {
113 char signature[4];
114 void *pap;
115 u_char length;
116 u_char spec_rev;
117 u_char checksum;
118 u_char mpfb1;
119 u_char mpfb2;
120 u_char mpfb3;
121 u_char mpfb4;
122 u_char mpfb5;
123} *mpfps_t;
124
125/* MP Configuration Table Header */
126typedef struct MPCTH {
127 char signature[4];
128 u_short base_table_length;
129 u_char spec_rev;
130 u_char checksum;
131 u_char oem_id[8];
132 u_char product_id[12];
133 void *oem_table_pointer;
134 u_short oem_table_size;
135 u_short entry_count;
136 void *apic_address;
137 u_short extended_table_length;
138 u_char extended_table_checksum;
139 u_char reserved;
140} *mpcth_t;
141
142
143typedef struct PROCENTRY {
144 u_char type;
145 u_char apic_id;
146 u_char apic_version;
147 u_char cpu_flags;
148 u_long cpu_signature;
149 u_long feature_flags;
150 u_long reserved1;
151 u_long reserved2;
152} *proc_entry_ptr;
153
154typedef struct BUSENTRY {
155 u_char type;
156 u_char bus_id;
157 char bus_type[6];
158} *bus_entry_ptr;
159
160typedef struct IOAPICENTRY {
161 u_char type;
162 u_char apic_id;
163 u_char apic_version;
164 u_char apic_flags;
165 void *apic_address;
166} *io_apic_entry_ptr;
167
168typedef struct INTENTRY {
169 u_char type;
170 u_char int_type;
171 u_short int_flags;
172 u_char src_bus_id;
173 u_char src_bus_irq;
174 u_char dst_apic_id;
175 u_char dst_apic_int;
176} *int_entry_ptr;
177
178/* descriptions of MP basetable entries */
179typedef struct BASETABLE_ENTRY {
180 u_char type;
181 u_char length;
182 char name[16];
183} basetable_entry;
184
185/*
186 * this code MUST be enabled here and in mpboot.s.
187 * it follows the very early stages of AP boot by placing values in CMOS ram.
188 * it NORMALLY will never be needed and thus the primitive method for enabling.
189 *
190#define CHECK_POINTS
191 */
192
193#if defined(CHECK_POINTS) && !defined(PC98)
194#define CHECK_READ(A) (outb(CMOS_REG, (A)), inb(CMOS_DATA))
195#define CHECK_WRITE(A,D) (outb(CMOS_REG, (A)), outb(CMOS_DATA, (D)))
196
197#define CHECK_INIT(D); \
198 CHECK_WRITE(0x34, (D)); \
199 CHECK_WRITE(0x35, (D)); \
200 CHECK_WRITE(0x36, (D)); \
201 CHECK_WRITE(0x37, (D)); \
202 CHECK_WRITE(0x38, (D)); \
203 CHECK_WRITE(0x39, (D));
204
205#define CHECK_PRINT(S); \
206 printf("%s: %d, %d, %d, %d, %d, %d\n", \
207 (S), \
208 CHECK_READ(0x34), \
209 CHECK_READ(0x35), \
210 CHECK_READ(0x36), \
211 CHECK_READ(0x37), \
212 CHECK_READ(0x38), \
213 CHECK_READ(0x39));
214
215#else /* CHECK_POINTS */
216
217#define CHECK_INIT(D)
218#define CHECK_PRINT(S)
219
220#endif /* CHECK_POINTS */
221
222/*
223 * Values to send to the POST hardware.
224 */
225#define MP_BOOTADDRESS_POST 0x10
226#define MP_PROBE_POST 0x11
227#define MPTABLE_PASS1_POST 0x12
228
229#define MP_START_POST 0x13
230#define MP_ENABLE_POST 0x14
231#define MPTABLE_PASS2_POST 0x15
232
233#define START_ALL_APS_POST 0x16
234#define INSTALL_AP_TRAMP_POST 0x17
235#define START_AP_POST 0x18
236
237#define MP_ANNOUNCE_POST 0x19
238
239/* used to hold the AP's until we are ready to release them */
240struct mtx ap_boot_mtx;
241
242/** XXX FIXME: where does this really belong, isa.h/isa.c perhaps? */
243int current_postcode;
244
245/** XXX FIXME: what system files declare these??? */
246extern struct region_descriptor r_gdt, r_idt;
247
248int bsp_apic_ready = 0; /* flags useability of BSP apic */
249int mp_ncpus; /* # of CPUs, including BSP */
250int mp_naps; /* # of Applications processors */
251int mp_nbusses; /* # of busses */
252int mp_napics; /* # of IO APICs */
253int boot_cpu_id; /* designated BSP */
254vm_offset_t cpu_apic_address;
255vm_offset_t io_apic_address[NAPICID]; /* NAPICID is more than enough */
256extern int nkpt;
257
258u_int32_t cpu_apic_versions[MAXCPU];
259u_int32_t *io_apic_versions;
260
261#ifdef APIC_INTR_REORDER
262struct {
263 volatile int *location;
264 int bit;
265} apic_isrbit_location[32];
266#endif
267
268struct apic_intmapinfo int_to_apicintpin[APIC_INTMAPSIZE];
269
270/*
271 * APIC ID logical/physical mapping structures.
272 * We oversize these to simplify boot-time config.
273 */
274int cpu_num_to_apic_id[NAPICID];
275int io_num_to_apic_id[NAPICID];
276int apic_id_to_logical[NAPICID];
277
278
279/* Bitmap of all available CPUs */
280u_int all_cpus;
281
282/* AP uses this during bootstrap. Do not staticize. */
283char *bootSTK;
284static int bootAP;
285
286/* Hotwire a 0->4MB V==P mapping */
287extern pt_entry_t *KPTphys;
288
289/* SMP page table page */
290extern pt_entry_t *SMPpt;
291
292struct pcb stoppcbs[MAXCPU];
293
294int smp_started; /* has the system started? */
295int smp_active = 0; /* are the APs allowed to run? */
296SYSCTL_INT(_machdep, OID_AUTO, smp_active, CTLFLAG_RW, &smp_active, 0, "");
297
298/* XXX maybe should be hw.ncpu */
299static int smp_cpus = 1; /* how many cpu's running */
300SYSCTL_INT(_machdep, OID_AUTO, smp_cpus, CTLFLAG_RD, &smp_cpus, 0, "");
301
302int invltlb_ok = 0; /* throttle smp_invltlb() till safe */
303SYSCTL_INT(_machdep, OID_AUTO, invltlb_ok, CTLFLAG_RW, &invltlb_ok, 0, "");
304
305/* Enable forwarding of a signal to a process running on a different CPU */
306static int forward_signal_enabled = 1;
307SYSCTL_INT(_machdep, OID_AUTO, forward_signal_enabled, CTLFLAG_RW,
308 &forward_signal_enabled, 0, "");
309
310/* Enable forwarding of roundrobin to all other cpus */
311static int forward_roundrobin_enabled = 1;
312SYSCTL_INT(_machdep, OID_AUTO, forward_roundrobin_enabled, CTLFLAG_RW,
313 &forward_roundrobin_enabled, 0, "");
314
315
316/*
317 * Local data and functions.
318 */
319
320/* Set to 1 once we're ready to let the APs out of the pen. */
321static volatile int aps_ready = 0;
322
323static int mp_capable;
324static u_int boot_address;
325static u_int base_memory;
326
327static int picmode; /* 0: virtual wire mode, 1: PIC mode */
328static mpfps_t mpfps;
329static int search_for_sig(u_int32_t target, int count);
330static void mp_enable(u_int boot_addr);
331
332static void mptable_pass1(void);
333static int mptable_pass2(void);
334static void default_mp_table(int type);
335static void fix_mp_table(void);
336static void setup_apic_irq_mapping(void);
337static void init_locks(void);
338static int start_all_aps(u_int boot_addr);
339static void install_ap_tramp(u_int boot_addr);
340static int start_ap(int logicalCpu, u_int boot_addr);
341void ap_init(void);
342static int apic_int_is_bus_type(int intr, int bus_type);
343static void release_aps(void *dummy);
344
345/*
346 * initialize all the SMP locks
347 */
348
349/* critical region around IO APIC, apic_imen */
350struct mtx imen_mtx;
351
352/* lock region used by kernel profiling */
353struct mtx mcount_mtx;
354
355#ifdef USE_COMLOCK
356/* locks com (tty) data/hardware accesses: a FASTINTR() */
357struct mtx com_mtx;
358#endif /* USE_COMLOCK */
359
360/* lock around the MP rendezvous */
361static struct mtx smp_rv_mtx;
362
363/* only 1 CPU can panic at a time :) */
364struct mtx panic_mtx;
365
366static void
367init_locks(void)
368{
369 /*
370 * XXX The mcount mutex probably needs to be statically initialized,
371 * since it will be used even in the function calls that get us to this
372 * point.
373 */
374 mtx_init(&mcount_mtx, "mcount", MTX_DEF);
375
376 mtx_init(&smp_rv_mtx, "smp rendezvous", MTX_SPIN);
377 mtx_init(&panic_mtx, "panic", MTX_DEF);
378
379#ifdef USE_COMLOCK
380 mtx_init(&com_mtx, "com", MTX_SPIN);
381#endif /* USE_COMLOCK */
382
383 mtx_init(&ap_boot_mtx, "ap boot", MTX_SPIN);
384}
385
386/*
387 * Calculate usable address in base memory for AP trampoline code.
388 */
389u_int
390mp_bootaddress(u_int basemem)
391{
392 POSTCODE(MP_BOOTADDRESS_POST);
393
394 base_memory = basemem * 1024; /* convert to bytes */
395
396 boot_address = base_memory & ~0xfff; /* round down to 4k boundary */
397 if ((base_memory - boot_address) < bootMP_size)
398 boot_address -= 4096; /* not enough, lower by 4k */
399
400 return boot_address;
401}
402
403
404/*
405 * Look for an Intel MP spec table (ie, SMP capable hardware).
406 */
407int
408mp_probe(void)
409{
410 int x;
411 u_long segment;
412 u_int32_t target;
413
414 POSTCODE(MP_PROBE_POST);
415
416 /* see if EBDA exists */
417 if ((segment = (u_long) * (u_short *) (KERNBASE + 0x40e)) != 0) {
418 /* search first 1K of EBDA */
419 target = (u_int32_t) (segment << 4);
420 if ((x = search_for_sig(target, 1024 / 4)) >= 0)
421 goto found;
422 } else {
423 /* last 1K of base memory, effective 'top of base' passed in */
424 target = (u_int32_t) (base_memory - 0x400);
425 if ((x = search_for_sig(target, 1024 / 4)) >= 0)
426 goto found;
427 }
428
429 /* search the BIOS */
430 target = (u_int32_t) BIOS_BASE;
431 if ((x = search_for_sig(target, BIOS_COUNT)) >= 0)
432 goto found;
433
434 /* nothing found */
435 mpfps = (mpfps_t)0;
436 mp_capable = 0;
437 return 0;
438
439found:
440 /* calculate needed resources */
441 mpfps = (mpfps_t)x;
442 mptable_pass1();
443
444 /* flag fact that we are running multiple processors */
445 mp_capable = 1;
446 return 1;
447}
448
449
450/*
451 * Initialize the SMP hardware and the APIC and start up the AP's.
452 */
453void
454mp_start(void)
455{
456 POSTCODE(MP_START_POST);
457
458 /* look for MP capable motherboard */
459 if (mp_capable)
460 mp_enable(boot_address);
461 else
462 panic("MP hardware not found!");
463}
464
465
466/*
467 * Print various information about the SMP system hardware and setup.
468 */
469void
470mp_announce(void)
471{
472 int x;
473
474 POSTCODE(MP_ANNOUNCE_POST);
475
476 printf("FreeBSD/SMP: Multiprocessor motherboard\n");
477 printf(" cpu0 (BSP): apic id: %2d", CPU_TO_ID(0));
478 printf(", version: 0x%08x", cpu_apic_versions[0]);
479 printf(", at 0x%08x\n", cpu_apic_address);
480 for (x = 1; x <= mp_naps; ++x) {
481 printf(" cpu%d (AP): apic id: %2d", x, CPU_TO_ID(x));
482 printf(", version: 0x%08x", cpu_apic_versions[x]);
483 printf(", at 0x%08x\n", cpu_apic_address);
484 }
485
486#if defined(APIC_IO)
487 for (x = 0; x < mp_napics; ++x) {
488 printf(" io%d (APIC): apic id: %2d", x, IO_TO_ID(x));
489 printf(", version: 0x%08x", io_apic_versions[x]);
490 printf(", at 0x%08x\n", io_apic_address[x]);
491 }
492#else
493 printf(" Warning: APIC I/O disabled\n");
494#endif /* APIC_IO */
495}
496
497/*
498 * AP cpu's call this to sync up protected mode.
499 */
500void
501init_secondary(void)
502{
503 int gsel_tss;
504 int x, myid = bootAP;
505
506 gdt_segs[GPRIV_SEL].ssd_base = (int) &SMP_prvspace[myid];
507 gdt_segs[GPROC0_SEL].ssd_base =
508 (int) &SMP_prvspace[myid].globaldata.gd_common_tss;
509 SMP_prvspace[myid].globaldata.gd_prvspace =
510 &SMP_prvspace[myid].globaldata;
511
512 for (x = 0; x < NGDT; x++) {
513 ssdtosd(&gdt_segs[x], &gdt[myid * NGDT + x].sd);
514 }
515
516 r_gdt.rd_limit = NGDT * sizeof(gdt[0]) - 1;
517 r_gdt.rd_base = (int) &gdt[myid * NGDT];
518 lgdt(&r_gdt); /* does magic intra-segment return */
519
520 lidt(&r_idt);
521
522 lldt(_default_ldt);
523 PCPU_SET(currentldt, _default_ldt);
524
525 gsel_tss = GSEL(GPROC0_SEL, SEL_KPL);
526 gdt[myid * NGDT + GPROC0_SEL].sd.sd_type = SDT_SYS386TSS;
527 PCPU_SET(common_tss.tss_esp0, 0); /* not used until after switch */
528 PCPU_SET(common_tss.tss_ss0, GSEL(GDATA_SEL, SEL_KPL));
529 PCPU_SET(common_tss.tss_ioopt, (sizeof (struct i386tss)) << 16);
530 PCPU_SET(tss_gdt, &gdt[myid * NGDT + GPROC0_SEL].sd);
531 PCPU_SET(common_tssd, *PCPU_GET(tss_gdt));
532 ltr(gsel_tss);
533
534 pmap_set_opt();
535}
536
537
538#if defined(APIC_IO)
539/*
540 * Final configuration of the BSP's local APIC:
541 * - disable 'pic mode'.
542 * - disable 'virtual wire mode'.
543 * - enable NMI.
544 */
545void
546bsp_apic_configure(void)
547{
548 u_char byte;
549 u_int32_t temp;
550
551 /* leave 'pic mode' if necessary */
552 if (picmode) {
553 outb(0x22, 0x70); /* select IMCR */
554 byte = inb(0x23); /* current contents */
555 byte |= 0x01; /* mask external INTR */
556 outb(0x23, byte); /* disconnect 8259s/NMI */
557 }
558
559 /* mask lint0 (the 8259 'virtual wire' connection) */
560 temp = lapic.lvt_lint0;
561 temp |= APIC_LVT_M; /* set the mask */
562 lapic.lvt_lint0 = temp;
563
564 /* setup lint1 to handle NMI */
565 temp = lapic.lvt_lint1;
566 temp &= ~APIC_LVT_M; /* clear the mask */
567 lapic.lvt_lint1 = temp;
568
569 if (bootverbose)
570 apic_dump("bsp_apic_configure()");
571}
572#endif /* APIC_IO */
573
574
575/*******************************************************************
576 * local functions and data
577 */
578
579/*
580 * start the SMP system
581 */
582static void
583mp_enable(u_int boot_addr)
584{
585 int x;
586#if defined(APIC_IO)
587 int apic;
588 u_int ux;
589#endif /* APIC_IO */
590
591 POSTCODE(MP_ENABLE_POST);
592
593 /* turn on 4MB of V == P addressing so we can get to MP table */
594 *(int *)PTD = PG_V | PG_RW | ((uintptr_t)(void *)KPTphys & PG_FRAME);
595 invltlb();
596
597 /* examine the MP table for needed info, uses physical addresses */
598 x = mptable_pass2();
599
600 *(int *)PTD = 0;
601 invltlb();
602
603 /* can't process default configs till the CPU APIC is pmapped */
604 if (x)
605 default_mp_table(x);
606
607 /* post scan cleanup */
608 fix_mp_table();
609 setup_apic_irq_mapping();
610
611#if defined(APIC_IO)
612
613 /* fill the LOGICAL io_apic_versions table */
614 for (apic = 0; apic < mp_napics; ++apic) {
615 ux = io_apic_read(apic, IOAPIC_VER);
616 io_apic_versions[apic] = ux;
617 io_apic_set_id(apic, IO_TO_ID(apic));
618 }
619
620 /* program each IO APIC in the system */
621 for (apic = 0; apic < mp_napics; ++apic)
622 if (io_apic_setup(apic) < 0)
623 panic("IO APIC setup failure");
624
625 /* install a 'Spurious INTerrupt' vector */
626 setidt(XSPURIOUSINT_OFFSET, Xspuriousint,
627 SDT_SYS386IGT, SEL_KPL, GSEL(GCODE_SEL, SEL_KPL));
628
629 /* install an inter-CPU IPI for TLB invalidation */
630 setidt(XINVLTLB_OFFSET, Xinvltlb,
631 SDT_SYS386IGT, SEL_KPL, GSEL(GCODE_SEL, SEL_KPL));
632
633#ifdef BETTER_CLOCK
634 /* install an inter-CPU IPI for reading processor state */
635 setidt(XCPUCHECKSTATE_OFFSET, Xcpucheckstate,
636 SDT_SYS386IGT, SEL_KPL, GSEL(GCODE_SEL, SEL_KPL));
637#endif
638
639 /* install an inter-CPU IPI for all-CPU rendezvous */
640 setidt(XRENDEZVOUS_OFFSET, Xrendezvous,
641 SDT_SYS386IGT, SEL_KPL, GSEL(GCODE_SEL, SEL_KPL));
642
643 /* install an inter-CPU IPI for forcing an additional software trap */
644 setidt(XCPUAST_OFFSET, Xcpuast,
645 SDT_SYS386IGT, SEL_KPL, GSEL(GCODE_SEL, SEL_KPL));
646
647 /* install an inter-CPU IPI for CPU stop/restart */
648 setidt(XCPUSTOP_OFFSET, Xcpustop,
649 SDT_SYS386IGT, SEL_KPL, GSEL(GCODE_SEL, SEL_KPL));
650
651#if defined(TEST_TEST1)
652 /* install a "fake hardware INTerrupt" vector */
653 setidt(XTEST1_OFFSET, Xtest1,
654 SDT_SYS386IGT, SEL_KPL, GSEL(GCODE_SEL, SEL_KPL));
655#endif /** TEST_TEST1 */
656
657#endif /* APIC_IO */
658
659 /* initialize all SMP locks */
660 init_locks();
661
662 /* start each Application Processor */
663 start_all_aps(boot_addr);
664}
665
666
667/*
668 * look for the MP spec signature
669 */
670
671/* string defined by the Intel MP Spec as identifying the MP table */
672#define MP_SIG 0x5f504d5f /* _MP_ */
673#define NEXT(X) ((X) += 4)
674static int
675search_for_sig(u_int32_t target, int count)
676{
677 int x;
678 u_int32_t *addr = (u_int32_t *) (KERNBASE + target);
679
680 for (x = 0; x < count; NEXT(x))
681 if (addr[x] == MP_SIG)
682 /* make array index a byte index */
683 return (target + (x * sizeof(u_int32_t)));
684
685 return -1;
686}
687
688
689static basetable_entry basetable_entry_types[] =
690{
691 {0, 20, "Processor"},
692 {1, 8, "Bus"},
693 {2, 8, "I/O APIC"},
694 {3, 8, "I/O INT"},
695 {4, 8, "Local INT"}
696};
697
698typedef struct BUSDATA {
699 u_char bus_id;
700 enum busTypes bus_type;
701} bus_datum;
702
703typedef struct INTDATA {
704 u_char int_type;
705 u_short int_flags;
706 u_char src_bus_id;
707 u_char src_bus_irq;
708 u_char dst_apic_id;
709 u_char dst_apic_int;
710 u_char int_vector;
711} io_int, local_int;
712
713typedef struct BUSTYPENAME {
714 u_char type;
715 char name[7];
716} bus_type_name;
717
718static bus_type_name bus_type_table[] =
719{
720 {CBUS, "CBUS"},
721 {CBUSII, "CBUSII"},
722 {EISA, "EISA"},
723 {MCA, "MCA"},
724 {UNKNOWN_BUSTYPE, "---"},
725 {ISA, "ISA"},
726 {MCA, "MCA"},
727 {UNKNOWN_BUSTYPE, "---"},
728 {UNKNOWN_BUSTYPE, "---"},
729 {UNKNOWN_BUSTYPE, "---"},
730 {UNKNOWN_BUSTYPE, "---"},
731 {UNKNOWN_BUSTYPE, "---"},
732 {PCI, "PCI"},
733 {UNKNOWN_BUSTYPE, "---"},
734 {UNKNOWN_BUSTYPE, "---"},
735 {UNKNOWN_BUSTYPE, "---"},
736 {UNKNOWN_BUSTYPE, "---"},
737 {XPRESS, "XPRESS"},
738 {UNKNOWN_BUSTYPE, "---"}
739};
740/* from MP spec v1.4, table 5-1 */
741static int default_data[7][5] =
742{
743/* nbus, id0, type0, id1, type1 */
744 {1, 0, ISA, 255, 255},
745 {1, 0, EISA, 255, 255},
746 {1, 0, EISA, 255, 255},
747 {1, 0, MCA, 255, 255},
748 {2, 0, ISA, 1, PCI},
749 {2, 0, EISA, 1, PCI},
750 {2, 0, MCA, 1, PCI}
751};
752
753
754/* the bus data */
755static bus_datum *bus_data;
756
757/* the IO INT data, one entry per possible APIC INTerrupt */
758static io_int *io_apic_ints;
759
760static int nintrs;
761
762static int processor_entry __P((proc_entry_ptr entry, int cpu));
763static int bus_entry __P((bus_entry_ptr entry, int bus));
764static int io_apic_entry __P((io_apic_entry_ptr entry, int apic));
765static int int_entry __P((int_entry_ptr entry, int intr));
766static int lookup_bus_type __P((char *name));
767
768
769/*
770 * 1st pass on motherboard's Intel MP specification table.
771 *
772 * initializes:
773 * mp_ncpus = 1
774 *
775 * determines:
776 * cpu_apic_address (common to all CPUs)
777 * io_apic_address[N]
778 * mp_naps
779 * mp_nbusses
780 * mp_napics
781 * nintrs
782 */
783static void
784mptable_pass1(void)
785{
786 int x;
787 mpcth_t cth;
788 int totalSize;
789 void* position;
790 int count;
791 int type;
792
793 POSTCODE(MPTABLE_PASS1_POST);
794
795 /* clear various tables */
796 for (x = 0; x < NAPICID; ++x) {
797 io_apic_address[x] = ~0; /* IO APIC address table */
798 }
799
800 /* init everything to empty */
801 mp_naps = 0;
802 mp_nbusses = 0;
803 mp_napics = 0;
804 nintrs = 0;
805
806 /* check for use of 'default' configuration */
807 if (MPFPS_MPFB1 != 0) {
808 /* use default addresses */
809 cpu_apic_address = DEFAULT_APIC_BASE;
810 io_apic_address[0] = DEFAULT_IO_APIC_BASE;
811
812 /* fill in with defaults */
813 mp_naps = 2; /* includes BSP */
814 mp_nbusses = default_data[MPFPS_MPFB1 - 1][0];
815#if defined(APIC_IO)
816 mp_napics = 1;
817 nintrs = 16;
818#endif /* APIC_IO */
819 }
820 else {
821 if ((cth = mpfps->pap) == 0)
822 panic("MP Configuration Table Header MISSING!");
823
824 cpu_apic_address = (vm_offset_t) cth->apic_address;
825
826 /* walk the table, recording info of interest */
827 totalSize = cth->base_table_length - sizeof(struct MPCTH);
828 position = (u_char *) cth + sizeof(struct MPCTH);
829 count = cth->entry_count;
830
831 while (count--) {
832 switch (type = *(u_char *) position) {
833 case 0: /* processor_entry */
834 if (((proc_entry_ptr)position)->cpu_flags
835 & PROCENTRY_FLAG_EN)
836 ++mp_naps;
837 break;
838 case 1: /* bus_entry */
839 ++mp_nbusses;
840 break;
841 case 2: /* io_apic_entry */
842 if (((io_apic_entry_ptr)position)->apic_flags
843 & IOAPICENTRY_FLAG_EN)
844 io_apic_address[mp_napics++] =
845 (vm_offset_t)((io_apic_entry_ptr)
846 position)->apic_address;
847 break;
848 case 3: /* int_entry */
849 ++nintrs;
850 break;
851 case 4: /* int_entry */
852 break;
853 default:
854 panic("mpfps Base Table HOSED!");
855 /* NOTREACHED */
856 }
857
858 totalSize -= basetable_entry_types[type].length;
859 (u_char*)position += basetable_entry_types[type].length;
860 }
861 }
862
863 /* qualify the numbers */
864 if (mp_naps > MAXCPU) {
865 printf("Warning: only using %d of %d available CPUs!\n",
866 MAXCPU, mp_naps);
867 mp_naps = MAXCPU;
868 }
869
870 /*
871 * Count the BSP.
872 * This is also used as a counter while starting the APs.
873 */
874 mp_ncpus = 1;
875
876 --mp_naps; /* subtract the BSP */
877}
878
879
880/*
881 * 2nd pass on motherboard's Intel MP specification table.
882 *
883 * sets:
884 * boot_cpu_id
885 * ID_TO_IO(N), phy APIC ID to log CPU/IO table
886 * CPU_TO_ID(N), logical CPU to APIC ID table
887 * IO_TO_ID(N), logical IO to APIC ID table
888 * bus_data[N]
889 * io_apic_ints[N]
890 */
891static int
892mptable_pass2(void)
893{
894 int x;
895 mpcth_t cth;
896 int totalSize;
897 void* position;
898 int count;
899 int type;
900 int apic, bus, cpu, intr;
901 int i, j;
902 int pgeflag;
903
904 POSTCODE(MPTABLE_PASS2_POST);
905
906 pgeflag = 0; /* XXX - Not used under SMP yet. */
907
908 MALLOC(io_apic_versions, u_int32_t *, sizeof(u_int32_t) * mp_napics,
909 M_DEVBUF, M_WAITOK);
910 MALLOC(ioapic, volatile ioapic_t **, sizeof(ioapic_t *) * mp_napics,
911 M_DEVBUF, M_WAITOK);
912 MALLOC(io_apic_ints, io_int *, sizeof(io_int) * (nintrs + 1),
913 M_DEVBUF, M_WAITOK);
914 MALLOC(bus_data, bus_datum *, sizeof(bus_datum) * mp_nbusses,
915 M_DEVBUF, M_WAITOK);
916
917 bzero(ioapic, sizeof(ioapic_t *) * mp_napics);
918
919 for (i = 0; i < mp_napics; i++) {
920 for (j = 0; j < mp_napics; j++) {
921 /* same page frame as a previous IO apic? */
922 if (((vm_offset_t)SMPpt[NPTEPG-2-j] & PG_FRAME) ==
923 (io_apic_address[i] & PG_FRAME)) {
924 ioapic[i] = (ioapic_t *)((u_int)SMP_prvspace
925 + (NPTEPG-2-j) * PAGE_SIZE
926 + (io_apic_address[i] & PAGE_MASK));
927 break;
928 }
929 /* use this slot if available */
930 if (((vm_offset_t)SMPpt[NPTEPG-2-j] & PG_FRAME) == 0) {
931 SMPpt[NPTEPG-2-j] = (pt_entry_t)(PG_V | PG_RW |
932 pgeflag | (io_apic_address[i] & PG_FRAME));
933 ioapic[i] = (ioapic_t *)((u_int)SMP_prvspace
934 + (NPTEPG-2-j) * PAGE_SIZE
935 + (io_apic_address[i] & PAGE_MASK));
936 break;
937 }
938 }
939 }
940
941 /* clear various tables */
942 for (x = 0; x < NAPICID; ++x) {
943 ID_TO_IO(x) = -1; /* phy APIC ID to log CPU/IO table */
944 CPU_TO_ID(x) = -1; /* logical CPU to APIC ID table */
945 IO_TO_ID(x) = -1; /* logical IO to APIC ID table */
946 }
947
948 /* clear bus data table */
949 for (x = 0; x < mp_nbusses; ++x)
950 bus_data[x].bus_id = 0xff;
951
952 /* clear IO APIC INT table */
953 for (x = 0; x < (nintrs + 1); ++x) {
954 io_apic_ints[x].int_type = 0xff;
955 io_apic_ints[x].int_vector = 0xff;
956 }
957
958 /* setup the cpu/apic mapping arrays */
959 boot_cpu_id = -1;
960
961 /* record whether PIC or virtual-wire mode */
962 picmode = (mpfps->mpfb2 & 0x80) ? 1 : 0;
963
964 /* check for use of 'default' configuration */
965 if (MPFPS_MPFB1 != 0)
966 return MPFPS_MPFB1; /* return default configuration type */
967
968 if ((cth = mpfps->pap) == 0)
969 panic("MP Configuration Table Header MISSING!");
970
971 /* walk the table, recording info of interest */
972 totalSize = cth->base_table_length - sizeof(struct MPCTH);
973 position = (u_char *) cth + sizeof(struct MPCTH);
974 count = cth->entry_count;
975 apic = bus = intr = 0;
976 cpu = 1; /* pre-count the BSP */
977
978 while (count--) {
979 switch (type = *(u_char *) position) {
980 case 0:
981 if (processor_entry(position, cpu))
982 ++cpu;
983 break;
984 case 1:
985 if (bus_entry(position, bus))
986 ++bus;
987 break;
988 case 2:
989 if (io_apic_entry(position, apic))
990 ++apic;
991 break;
992 case 3:
993 if (int_entry(position, intr))
994 ++intr;
995 break;
996 case 4:
997 /* int_entry(position); */
998 break;
999 default:
1000 panic("mpfps Base Table HOSED!");
1001 /* NOTREACHED */
1002 }
1003
1004 totalSize -= basetable_entry_types[type].length;
1005 (u_char *) position += basetable_entry_types[type].length;
1006 }
1007
1008 if (boot_cpu_id == -1)
1009 panic("NO BSP found!");
1010
1011 /* report fact that its NOT a default configuration */
1012 return 0;
1013}
1014
1015
1016void
1017assign_apic_irq(int apic, int intpin, int irq)
1018{
1019 int x;
1020
1021 if (int_to_apicintpin[irq].ioapic != -1)
1022 panic("assign_apic_irq: inconsistent table");
1023
1024 int_to_apicintpin[irq].ioapic = apic;
1025 int_to_apicintpin[irq].int_pin = intpin;
1026 int_to_apicintpin[irq].apic_address = ioapic[apic];
1027 int_to_apicintpin[irq].redirindex = IOAPIC_REDTBL + 2 * intpin;
1028
1029 for (x = 0; x < nintrs; x++) {
1030 if ((io_apic_ints[x].int_type == 0 ||
1031 io_apic_ints[x].int_type == 3) &&
1032 io_apic_ints[x].int_vector == 0xff &&
1033 io_apic_ints[x].dst_apic_id == IO_TO_ID(apic) &&
1034 io_apic_ints[x].dst_apic_int == intpin)
1035 io_apic_ints[x].int_vector = irq;
1036 }
1037}
1038
1039void
1040revoke_apic_irq(int irq)
1041{
1042 int x;
1043 int oldapic;
1044 int oldintpin;
1045
1046 if (int_to_apicintpin[irq].ioapic == -1)
1047 panic("assign_apic_irq: inconsistent table");
1048
1049 oldapic = int_to_apicintpin[irq].ioapic;
1050 oldintpin = int_to_apicintpin[irq].int_pin;
1051
1052 int_to_apicintpin[irq].ioapic = -1;
1053 int_to_apicintpin[irq].int_pin = 0;
1054 int_to_apicintpin[irq].apic_address = NULL;
1055 int_to_apicintpin[irq].redirindex = 0;
1056
1057 for (x = 0; x < nintrs; x++) {
1058 if ((io_apic_ints[x].int_type == 0 ||
1059 io_apic_ints[x].int_type == 3) &&
1060 io_apic_ints[x].int_vector == 0xff &&
1061 io_apic_ints[x].dst_apic_id == IO_TO_ID(oldapic) &&
1062 io_apic_ints[x].dst_apic_int == oldintpin)
1063 io_apic_ints[x].int_vector = 0xff;
1064 }
1065}
1066
1067
1068static void
1069allocate_apic_irq(int intr)
1070{
1071 int apic;
1072 int intpin;
1073 int irq;
1074
1075 if (io_apic_ints[intr].int_vector != 0xff)
1076 return; /* Interrupt handler already assigned */
1077
1078 if (io_apic_ints[intr].int_type != 0 &&
1079 (io_apic_ints[intr].int_type != 3 ||
1080 (io_apic_ints[intr].dst_apic_id == IO_TO_ID(0) &&
1081 io_apic_ints[intr].dst_apic_int == 0)))
1082 return; /* Not INT or ExtInt on != (0, 0) */
1083
1084 irq = 0;
1085 while (irq < APIC_INTMAPSIZE &&
1086 int_to_apicintpin[irq].ioapic != -1)
1087 irq++;
1088
1089 if (irq >= APIC_INTMAPSIZE)
1090 return; /* No free interrupt handlers */
1091
1092 apic = ID_TO_IO(io_apic_ints[intr].dst_apic_id);
1093 intpin = io_apic_ints[intr].dst_apic_int;
1094
1095 assign_apic_irq(apic, intpin, irq);
1096 io_apic_setup_intpin(apic, intpin);
1097}
1098
1099
1100static void
1101swap_apic_id(int apic, int oldid, int newid)
1102{
1103 int x;
1104 int oapic;
1105
1106
1107 if (oldid == newid)
1108 return; /* Nothing to do */
1109
1110 printf("Changing APIC ID for IO APIC #%d from %d to %d in MP table\n",
1111 apic, oldid, newid);
1112
1113 /* Swap physical APIC IDs in interrupt entries */
1114 for (x = 0; x < nintrs; x++) {
1115 if (io_apic_ints[x].dst_apic_id == oldid)
1116 io_apic_ints[x].dst_apic_id = newid;
1117 else if (io_apic_ints[x].dst_apic_id == newid)
1118 io_apic_ints[x].dst_apic_id = oldid;
1119 }
1120
1121 /* Swap physical APIC IDs in IO_TO_ID mappings */
1122 for (oapic = 0; oapic < mp_napics; oapic++)
1123 if (IO_TO_ID(oapic) == newid)
1124 break;
1125
1126 if (oapic < mp_napics) {
1127 printf("Changing APIC ID for IO APIC #%d from "
1128 "%d to %d in MP table\n",
1129 oapic, newid, oldid);
1130 IO_TO_ID(oapic) = oldid;
1131 }
1132 IO_TO_ID(apic) = newid;
1133}
1134
1135
1136static void
1137fix_id_to_io_mapping(void)
1138{
1139 int x;
1140
1141 for (x = 0; x < NAPICID; x++)
1142 ID_TO_IO(x) = -1;
1143
1144 for (x = 0; x <= mp_naps; x++)
1145 if (CPU_TO_ID(x) < NAPICID)
1146 ID_TO_IO(CPU_TO_ID(x)) = x;
1147
1148 for (x = 0; x < mp_napics; x++)
1149 if (IO_TO_ID(x) < NAPICID)
1150 ID_TO_IO(IO_TO_ID(x)) = x;
1151}
1152
1153
1154static int
1155first_free_apic_id(void)
1156{
1157 int freeid, x;
1158
1159 for (freeid = 0; freeid < NAPICID; freeid++) {
1160 for (x = 0; x <= mp_naps; x++)
1161 if (CPU_TO_ID(x) == freeid)
1162 break;
1163 if (x <= mp_naps)
1164 continue;
1165 for (x = 0; x < mp_napics; x++)
1166 if (IO_TO_ID(x) == freeid)
1167 break;
1168 if (x < mp_napics)
1169 continue;
1170 return freeid;
1171 }
1172 return freeid;
1173}
1174
1175
1176static int
1177io_apic_id_acceptable(int apic, int id)
1178{
1179 int cpu; /* Logical CPU number */
1180 int oapic; /* Logical IO APIC number for other IO APIC */
1181
1182 if (id >= NAPICID)
1183 return 0; /* Out of range */
1184
1185 for (cpu = 0; cpu <= mp_naps; cpu++)
1186 if (CPU_TO_ID(cpu) == id)
1187 return 0; /* Conflict with CPU */
1188
1189 for (oapic = 0; oapic < mp_napics && oapic < apic; oapic++)
1190 if (IO_TO_ID(oapic) == id)
1191 return 0; /* Conflict with other APIC */
1192
1193 return 1; /* ID is acceptable for IO APIC */
1194}
1195
1196
1197/*
1198 * parse an Intel MP specification table
1199 */
1200static void
1201fix_mp_table(void)
1202{
1203 int x;
1204 int id;
1205 int bus_0 = 0; /* Stop GCC warning */
1206 int bus_pci = 0; /* Stop GCC warning */
1207 int num_pci_bus;
1208 int apic; /* IO APIC unit number */
1209 int freeid; /* Free physical APIC ID */
1210 int physid; /* Current physical IO APIC ID */
1211
1212 /*
1213 * Fix mis-numbering of the PCI bus and its INT entries if the BIOS
1214 * did it wrong. The MP spec says that when more than 1 PCI bus
1215 * exists the BIOS must begin with bus entries for the PCI bus and use
1216 * actual PCI bus numbering. This implies that when only 1 PCI bus
1217 * exists the BIOS can choose to ignore this ordering, and indeed many
1218 * MP motherboards do ignore it. This causes a problem when the PCI
1219 * sub-system makes requests of the MP sub-system based on PCI bus
1220 * numbers. So here we look for the situation and renumber the
1221 * busses and associated INTs in an effort to "make it right".
1222 */
1223
1224 /* find bus 0, PCI bus, count the number of PCI busses */
1225 for (num_pci_bus = 0, x = 0; x < mp_nbusses; ++x) {
1226 if (bus_data[x].bus_id == 0) {
1227 bus_0 = x;
1228 }
1229 if (bus_data[x].bus_type == PCI) {
1230 ++num_pci_bus;
1231 bus_pci = x;
1232 }
1233 }
1234 /*
1235 * bus_0 == slot of bus with ID of 0
1236 * bus_pci == slot of last PCI bus encountered
1237 */
1238
1239 /* check the 1 PCI bus case for sanity */
1240 /* if it is number 0 all is well */
1241 if (num_pci_bus == 1 &&
1242 bus_data[bus_pci].bus_id != 0) {
1243
1244 /* mis-numbered, swap with whichever bus uses slot 0 */
1245
1246 /* swap the bus entry types */
1247 bus_data[bus_pci].bus_type = bus_data[bus_0].bus_type;
1248 bus_data[bus_0].bus_type = PCI;
1249
1250 /* swap each relavant INTerrupt entry */
1251 id = bus_data[bus_pci].bus_id;
1252 for (x = 0; x < nintrs; ++x) {
1253 if (io_apic_ints[x].src_bus_id == id) {
1254 io_apic_ints[x].src_bus_id = 0;
1255 }
1256 else if (io_apic_ints[x].src_bus_id == 0) {
1257 io_apic_ints[x].src_bus_id = id;
1258 }
1259 }
1260 }
1261
1262 /* Assign IO APIC IDs.
1263 *
1264 * First try the existing ID. If a conflict is detected, try
1265 * the ID in the MP table. If a conflict is still detected, find
1266 * a free id.
1267 *
1268 * We cannot use the ID_TO_IO table before all conflicts has been
1269 * resolved and the table has been corrected.
1270 */
1271 for (apic = 0; apic < mp_napics; ++apic) { /* For all IO APICs */
1272
1273 /* First try to use the value set by the BIOS */
1274 physid = io_apic_get_id(apic);
1275 if (io_apic_id_acceptable(apic, physid)) {
1276 if (IO_TO_ID(apic) != physid)
1277 swap_apic_id(apic, IO_TO_ID(apic), physid);
1278 continue;
1279 }
1280
1281 /* Then check if the value in the MP table is acceptable */
1282 if (io_apic_id_acceptable(apic, IO_TO_ID(apic)))
1283 continue;
1284
1285 /* Last resort, find a free APIC ID and use it */
1286 freeid = first_free_apic_id();
1287 if (freeid >= NAPICID)
1288 panic("No free physical APIC IDs found");
1289
1290 if (io_apic_id_acceptable(apic, freeid)) {
1291 swap_apic_id(apic, IO_TO_ID(apic), freeid);
1292 continue;
1293 }
1294 panic("Free physical APIC ID not usable");
1295 }
1296 fix_id_to_io_mapping();
1297
1298 /* detect and fix broken Compaq MP table */
1299 if (apic_int_type(0, 0) == -1) {
1300 printf("APIC_IO: MP table broken: 8259->APIC entry missing!\n");
1301 io_apic_ints[nintrs].int_type = 3; /* ExtInt */
1302 io_apic_ints[nintrs].int_vector = 0xff; /* Unassigned */
1303 /* XXX fixme, set src bus id etc, but it doesn't seem to hurt */
1304 io_apic_ints[nintrs].dst_apic_id = IO_TO_ID(0);
1305 io_apic_ints[nintrs].dst_apic_int = 0; /* Pin 0 */
1306 nintrs++;
1307 }
1308}
1309
1310
1311/* Assign low level interrupt handlers */
1312static void
1313setup_apic_irq_mapping(void)
1314{
1315 int x;
1316 int int_vector;
1317
1318 /* Clear array */
1319 for (x = 0; x < APIC_INTMAPSIZE; x++) {
1320 int_to_apicintpin[x].ioapic = -1;
1321 int_to_apicintpin[x].int_pin = 0;
1322 int_to_apicintpin[x].apic_address = NULL;
1323 int_to_apicintpin[x].redirindex = 0;
1324 }
1325
1326 /* First assign ISA/EISA interrupts */
1327 for (x = 0; x < nintrs; x++) {
1328 int_vector = io_apic_ints[x].src_bus_irq;
1329 if (int_vector < APIC_INTMAPSIZE &&
1330 io_apic_ints[x].int_vector == 0xff &&
1331 int_to_apicintpin[int_vector].ioapic == -1 &&
1332 (apic_int_is_bus_type(x, ISA) ||
1333 apic_int_is_bus_type(x, EISA)) &&
1334 io_apic_ints[x].int_type == 0) {
1335 assign_apic_irq(ID_TO_IO(io_apic_ints[x].dst_apic_id),
1336 io_apic_ints[x].dst_apic_int,
1337 int_vector);
1338 }
1339 }
1340
1341 /* Assign ExtInt entry if no ISA/EISA interrupt 0 entry */
1342 for (x = 0; x < nintrs; x++) {
1343 if (io_apic_ints[x].dst_apic_int == 0 &&
1344 io_apic_ints[x].dst_apic_id == IO_TO_ID(0) &&
1345 io_apic_ints[x].int_vector == 0xff &&
1346 int_to_apicintpin[0].ioapic == -1 &&
1347 io_apic_ints[x].int_type == 3) {
1348 assign_apic_irq(0, 0, 0);
1349 break;
1350 }
1351 }
1352 /* PCI interrupt assignment is deferred */
1353}
1354
1355
1356static int
1357processor_entry(proc_entry_ptr entry, int cpu)
1358{
1359 /* check for usability */
1360 if (!(entry->cpu_flags & PROCENTRY_FLAG_EN))
1361 return 0;
1362
1363 if(entry->apic_id >= NAPICID)
1364 panic("CPU APIC ID out of range (0..%d)", NAPICID - 1);
1365 /* check for BSP flag */
1366 if (entry->cpu_flags & PROCENTRY_FLAG_BP) {
1367 boot_cpu_id = entry->apic_id;
1368 CPU_TO_ID(0) = entry->apic_id;
1369 ID_TO_CPU(entry->apic_id) = 0;
1370 return 0; /* its already been counted */
1371 }
1372
1373 /* add another AP to list, if less than max number of CPUs */
1374 else if (cpu < MAXCPU) {
1375 CPU_TO_ID(cpu) = entry->apic_id;
1376 ID_TO_CPU(entry->apic_id) = cpu;
1377 return 1;
1378 }
1379
1380 return 0;
1381}
1382
1383
1384static int
1385bus_entry(bus_entry_ptr entry, int bus)
1386{
1387 int x;
1388 char c, name[8];
1389
1390 /* encode the name into an index */
1391 for (x = 0; x < 6; ++x) {
1392 if ((c = entry->bus_type[x]) == ' ')
1393 break;
1394 name[x] = c;
1395 }
1396 name[x] = '\0';
1397
1398 if ((x = lookup_bus_type(name)) == UNKNOWN_BUSTYPE)
1399 panic("unknown bus type: '%s'", name);
1400
1401 bus_data[bus].bus_id = entry->bus_id;
1402 bus_data[bus].bus_type = x;
1403
1404 return 1;
1405}
1406
1407
1408static int
1409io_apic_entry(io_apic_entry_ptr entry, int apic)
1410{
1411 if (!(entry->apic_flags & IOAPICENTRY_FLAG_EN))
1412 return 0;
1413
1414 IO_TO_ID(apic) = entry->apic_id;
1415 if (entry->apic_id < NAPICID)
1416 ID_TO_IO(entry->apic_id) = apic;
1417
1418 return 1;
1419}
1420
1421
1422static int
1423lookup_bus_type(char *name)
1424{
1425 int x;
1426
1427 for (x = 0; x < MAX_BUSTYPE; ++x)
1428 if (strcmp(bus_type_table[x].name, name) == 0)
1429 return bus_type_table[x].type;
1430
1431 return UNKNOWN_BUSTYPE;
1432}
1433
1434
1435static int
1436int_entry(int_entry_ptr entry, int intr)
1437{
1438 int apic;
1439
1440 io_apic_ints[intr].int_type = entry->int_type;
1441 io_apic_ints[intr].int_flags = entry->int_flags;
1442 io_apic_ints[intr].src_bus_id = entry->src_bus_id;
1443 io_apic_ints[intr].src_bus_irq = entry->src_bus_irq;
1444 if (entry->dst_apic_id == 255) {
1445 /* This signal goes to all IO APICS. Select an IO APIC
1446 with sufficient number of interrupt pins */
1447 for (apic = 0; apic < mp_napics; apic++)
1448 if (((io_apic_read(apic, IOAPIC_VER) &
1449 IOART_VER_MAXREDIR) >> MAXREDIRSHIFT) >=
1450 entry->dst_apic_int)
1451 break;
1452 if (apic < mp_napics)
1453 io_apic_ints[intr].dst_apic_id = IO_TO_ID(apic);
1454 else
1455 io_apic_ints[intr].dst_apic_id = entry->dst_apic_id;
1456 } else
1457 io_apic_ints[intr].dst_apic_id = entry->dst_apic_id;
1458 io_apic_ints[intr].dst_apic_int = entry->dst_apic_int;
1459
1460 return 1;
1461}
1462
1463
1464static int
1465apic_int_is_bus_type(int intr, int bus_type)
1466{
1467 int bus;
1468
1469 for (bus = 0; bus < mp_nbusses; ++bus)
1470 if ((bus_data[bus].bus_id == io_apic_ints[intr].src_bus_id)
1471 && ((int) bus_data[bus].bus_type == bus_type))
1472 return 1;
1473
1474 return 0;
1475}
1476
1477
1478/*
1479 * Given a traditional ISA INT mask, return an APIC mask.
1480 */
1481u_int
1482isa_apic_mask(u_int isa_mask)
1483{
1484 int isa_irq;
1485 int apic_pin;
1486
1487#if defined(SKIP_IRQ15_REDIRECT)
1488 if (isa_mask == (1 << 15)) {
1489 printf("skipping ISA IRQ15 redirect\n");
1490 return isa_mask;
1491 }
1492#endif /* SKIP_IRQ15_REDIRECT */
1493
1494 isa_irq = ffs(isa_mask); /* find its bit position */
1495 if (isa_irq == 0) /* doesn't exist */
1496 return 0;
1497 --isa_irq; /* make it zero based */
1498
1499 apic_pin = isa_apic_irq(isa_irq); /* look for APIC connection */
1500 if (apic_pin == -1)
1501 return 0;
1502
1503 return (1 << apic_pin); /* convert pin# to a mask */
1504}
1505
1506
1507/*
1508 * Determine which APIC pin an ISA/EISA INT is attached to.
1509 */
1510#define INTTYPE(I) (io_apic_ints[(I)].int_type)
1511#define INTPIN(I) (io_apic_ints[(I)].dst_apic_int)
1512#define INTIRQ(I) (io_apic_ints[(I)].int_vector)
1513#define INTAPIC(I) (ID_TO_IO(io_apic_ints[(I)].dst_apic_id))
1514
1515#define SRCBUSIRQ(I) (io_apic_ints[(I)].src_bus_irq)
1516int
1517isa_apic_irq(int isa_irq)
1518{
1519 int intr;
1520
1521 for (intr = 0; intr < nintrs; ++intr) { /* check each record */
1522 if (INTTYPE(intr) == 0) { /* standard INT */
1523 if (SRCBUSIRQ(intr) == isa_irq) {
1524 if (apic_int_is_bus_type(intr, ISA) ||
1525 apic_int_is_bus_type(intr, EISA)) {
1526 if (INTIRQ(intr) == 0xff)
1527 return -1; /* unassigned */
1528 return INTIRQ(intr); /* found */
1529 }
1530 }
1531 }
1532 }
1533 return -1; /* NOT found */
1534}
1535
1536
1537/*
1538 * Determine which APIC pin a PCI INT is attached to.
1539 */
1540#define SRCBUSID(I) (io_apic_ints[(I)].src_bus_id)
1541#define SRCBUSDEVICE(I) ((io_apic_ints[(I)].src_bus_irq >> 2) & 0x1f)
1542#define SRCBUSLINE(I) (io_apic_ints[(I)].src_bus_irq & 0x03)
1543int
1544pci_apic_irq(int pciBus, int pciDevice, int pciInt)
1545{
1546 int intr;
1547
1548 --pciInt; /* zero based */
1549
1550 for (intr = 0; intr < nintrs; ++intr) /* check each record */
1551 if ((INTTYPE(intr) == 0) /* standard INT */
1552 && (SRCBUSID(intr) == pciBus)
1553 && (SRCBUSDEVICE(intr) == pciDevice)
1554 && (SRCBUSLINE(intr) == pciInt)) /* a candidate IRQ */
1555 if (apic_int_is_bus_type(intr, PCI)) {
1556 if (INTIRQ(intr) == 0xff)
1557 allocate_apic_irq(intr);
1558 if (INTIRQ(intr) == 0xff)
1559 return -1; /* unassigned */
1560 return INTIRQ(intr); /* exact match */
1561 }
1562
1563 return -1; /* NOT found */
1564}
1565
1566int
1567next_apic_irq(int irq)
1568{
1569 int intr, ointr;
1570 int bus, bustype;
1571
1572 bus = 0;
1573 bustype = 0;
1574 for (intr = 0; intr < nintrs; intr++) {
1575 if (INTIRQ(intr) != irq || INTTYPE(intr) != 0)
1576 continue;
1577 bus = SRCBUSID(intr);
1578 bustype = apic_bus_type(bus);
1579 if (bustype != ISA &&
1580 bustype != EISA &&
1581 bustype != PCI)
1582 continue;
1583 break;
1584 }
1585 if (intr >= nintrs) {
1586 return -1;
1587 }
1588 for (ointr = intr + 1; ointr < nintrs; ointr++) {
1589 if (INTTYPE(ointr) != 0)
1590 continue;
1591 if (bus != SRCBUSID(ointr))
1592 continue;
1593 if (bustype == PCI) {
1594 if (SRCBUSDEVICE(intr) != SRCBUSDEVICE(ointr))
1595 continue;
1596 if (SRCBUSLINE(intr) != SRCBUSLINE(ointr))
1597 continue;
1598 }
1599 if (bustype == ISA || bustype == EISA) {
1600 if (SRCBUSIRQ(intr) != SRCBUSIRQ(ointr))
1601 continue;
1602 }
1603 if (INTPIN(intr) == INTPIN(ointr))
1604 continue;
1605 break;
1606 }
1607 if (ointr >= nintrs) {
1608 return -1;
1609 }
1610 return INTIRQ(ointr);
1611}
1612#undef SRCBUSLINE
1613#undef SRCBUSDEVICE
1614#undef SRCBUSID
1615#undef SRCBUSIRQ
1616
1617#undef INTPIN
1618#undef INTIRQ
1619#undef INTAPIC
1620#undef INTTYPE
1621
1622
1623/*
1624 * Reprogram the MB chipset to NOT redirect an ISA INTerrupt.
1625 *
1626 * XXX FIXME:
1627 * Exactly what this means is unclear at this point. It is a solution
1628 * for motherboards that redirect the MBIRQ0 pin. Generically a motherboard
1629 * could route any of the ISA INTs to upper (>15) IRQ values. But most would
1630 * NOT be redirected via MBIRQ0, thus "undirect()ing" them would NOT be an
1631 * option.
1632 */
1633int
1634undirect_isa_irq(int rirq)
1635{
1636#if defined(READY)
1637 if (bootverbose)
1638 printf("Freeing redirected ISA irq %d.\n", rirq);
1639 /** FIXME: tickle the MB redirector chip */
1640 return -1;
1641#else
1642 if (bootverbose)
1643 printf("Freeing (NOT implemented) redirected ISA irq %d.\n", rirq);
1644 return 0;
1645#endif /* READY */
1646}
1647
1648
1649/*
1650 * Reprogram the MB chipset to NOT redirect a PCI INTerrupt
1651 */
1652int
1653undirect_pci_irq(int rirq)
1654{
1655#if defined(READY)
1656 if (bootverbose)
1657 printf("Freeing redirected PCI irq %d.\n", rirq);
1658
1659 /** FIXME: tickle the MB redirector chip */
1660 return -1;
1661#else
1662 if (bootverbose)
1663 printf("Freeing (NOT implemented) redirected PCI irq %d.\n",
1664 rirq);
1665 return 0;
1666#endif /* READY */
1667}
1668
1669
1670/*
1671 * given a bus ID, return:
1672 * the bus type if found
1673 * -1 if NOT found
1674 */
1675int
1676apic_bus_type(int id)
1677{
1678 int x;
1679
1680 for (x = 0; x < mp_nbusses; ++x)
1681 if (bus_data[x].bus_id == id)
1682 return bus_data[x].bus_type;
1683
1684 return -1;
1685}
1686
1687
1688/*
1689 * given a LOGICAL APIC# and pin#, return:
1690 * the associated src bus ID if found
1691 * -1 if NOT found
1692 */
1693int
1694apic_src_bus_id(int apic, int pin)
1695{
1696 int x;
1697
1698 /* search each of the possible INTerrupt sources */
1699 for (x = 0; x < nintrs; ++x)
1700 if ((apic == ID_TO_IO(io_apic_ints[x].dst_apic_id)) &&
1701 (pin == io_apic_ints[x].dst_apic_int))
1702 return (io_apic_ints[x].src_bus_id);
1703
1704 return -1; /* NOT found */
1705}
1706
1707
1708/*
1709 * given a LOGICAL APIC# and pin#, return:
1710 * the associated src bus IRQ if found
1711 * -1 if NOT found
1712 */
1713int
1714apic_src_bus_irq(int apic, int pin)
1715{
1716 int x;
1717
1718 for (x = 0; x < nintrs; x++)
1719 if ((apic == ID_TO_IO(io_apic_ints[x].dst_apic_id)) &&
1720 (pin == io_apic_ints[x].dst_apic_int))
1721 return (io_apic_ints[x].src_bus_irq);
1722
1723 return -1; /* NOT found */
1724}
1725
1726
1727/*
1728 * given a LOGICAL APIC# and pin#, return:
1729 * the associated INTerrupt type if found
1730 * -1 if NOT found
1731 */
1732int
1733apic_int_type(int apic, int pin)
1734{
1735 int x;
1736
1737 /* search each of the possible INTerrupt sources */
1738 for (x = 0; x < nintrs; ++x)
1739 if ((apic == ID_TO_IO(io_apic_ints[x].dst_apic_id)) &&
1740 (pin == io_apic_ints[x].dst_apic_int))
1741 return (io_apic_ints[x].int_type);
1742
1743 return -1; /* NOT found */
1744}
1745
1746int
1747apic_irq(int apic, int pin)
1748{
1749 int x;
1750 int res;
1751
1752 for (x = 0; x < nintrs; ++x)
1753 if ((apic == ID_TO_IO(io_apic_ints[x].dst_apic_id)) &&
1754 (pin == io_apic_ints[x].dst_apic_int)) {
1755 res = io_apic_ints[x].int_vector;
1756 if (res == 0xff)
1757 return -1;
1758 if (apic != int_to_apicintpin[res].ioapic)
1759 panic("apic_irq: inconsistent table");
1760 if (pin != int_to_apicintpin[res].int_pin)
1761 panic("apic_irq inconsistent table (2)");
1762 return res;
1763 }
1764 return -1;
1765}
1766
1767
1768/*
1769 * given a LOGICAL APIC# and pin#, return:
1770 * the associated trigger mode if found
1771 * -1 if NOT found
1772 */
1773int
1774apic_trigger(int apic, int pin)
1775{
1776 int x;
1777
1778 /* search each of the possible INTerrupt sources */
1779 for (x = 0; x < nintrs; ++x)
1780 if ((apic == ID_TO_IO(io_apic_ints[x].dst_apic_id)) &&
1781 (pin == io_apic_ints[x].dst_apic_int))
1782 return ((io_apic_ints[x].int_flags >> 2) & 0x03);
1783
1784 return -1; /* NOT found */
1785}
1786
1787
1788/*
1789 * given a LOGICAL APIC# and pin#, return:
1790 * the associated 'active' level if found
1791 * -1 if NOT found
1792 */
1793int
1794apic_polarity(int apic, int pin)
1795{
1796 int x;
1797
1798 /* search each of the possible INTerrupt sources */
1799 for (x = 0; x < nintrs; ++x)
1800 if ((apic == ID_TO_IO(io_apic_ints[x].dst_apic_id)) &&
1801 (pin == io_apic_ints[x].dst_apic_int))
1802 return (io_apic_ints[x].int_flags & 0x03);
1803
1804 return -1; /* NOT found */
1805}
1806
1807
1808/*
1809 * set data according to MP defaults
1810 * FIXME: probably not complete yet...
1811 */
1812static void
1813default_mp_table(int type)
1814{
1815 int ap_cpu_id;
1816#if defined(APIC_IO)
1817 int io_apic_id;
1818 int pin;
1819#endif /* APIC_IO */
1820
1821#if 0
1822 printf(" MP default config type: %d\n", type);
1823 switch (type) {
1824 case 1:
1825 printf(" bus: ISA, APIC: 82489DX\n");
1826 break;
1827 case 2:
1828 printf(" bus: EISA, APIC: 82489DX\n");
1829 break;
1830 case 3:
1831 printf(" bus: EISA, APIC: 82489DX\n");
1832 break;
1833 case 4:
1834 printf(" bus: MCA, APIC: 82489DX\n");
1835 break;
1836 case 5:
1837 printf(" bus: ISA+PCI, APIC: Integrated\n");
1838 break;
1839 case 6:
1840 printf(" bus: EISA+PCI, APIC: Integrated\n");
1841 break;
1842 case 7:
1843 printf(" bus: MCA+PCI, APIC: Integrated\n");
1844 break;
1845 default:
1846 printf(" future type\n");
1847 break;
1848 /* NOTREACHED */
1849 }
1850#endif /* 0 */
1851
1852 boot_cpu_id = (lapic.id & APIC_ID_MASK) >> 24;
1853 ap_cpu_id = (boot_cpu_id == 0) ? 1 : 0;
1854
1855 /* BSP */
1856 CPU_TO_ID(0) = boot_cpu_id;
1857 ID_TO_CPU(boot_cpu_id) = 0;
1858
1859 /* one and only AP */
1860 CPU_TO_ID(1) = ap_cpu_id;
1861 ID_TO_CPU(ap_cpu_id) = 1;
1862
1863#if defined(APIC_IO)
1864 /* one and only IO APIC */
1865 io_apic_id = (io_apic_read(0, IOAPIC_ID) & APIC_ID_MASK) >> 24;
1866
1867 /*
1868 * sanity check, refer to MP spec section 3.6.6, last paragraph
1869 * necessary as some hardware isn't properly setting up the IO APIC
1870 */
1871#if defined(REALLY_ANAL_IOAPICID_VALUE)
1872 if (io_apic_id != 2) {
1873#else
1874 if ((io_apic_id == 0) || (io_apic_id == 1) || (io_apic_id == 15)) {
1875#endif /* REALLY_ANAL_IOAPICID_VALUE */
1876 io_apic_set_id(0, 2);
1877 io_apic_id = 2;
1878 }
1879 IO_TO_ID(0) = io_apic_id;
1880 ID_TO_IO(io_apic_id) = 0;
1881#endif /* APIC_IO */
1882
1883 /* fill out bus entries */
1884 switch (type) {
1885 case 1:
1886 case 2:
1887 case 3:
1888 case 4:
1889 case 5:
1890 case 6:
1891 case 7:
1892 bus_data[0].bus_id = default_data[type - 1][1];
1893 bus_data[0].bus_type = default_data[type - 1][2];
1894 bus_data[1].bus_id = default_data[type - 1][3];
1895 bus_data[1].bus_type = default_data[type - 1][4];
1896 break;
1897
1898 /* case 4: case 7: MCA NOT supported */
1899 default: /* illegal/reserved */
1900 panic("BAD default MP config: %d", type);
1901 /* NOTREACHED */
1902 }
1903
1904#if defined(APIC_IO)
1905 /* general cases from MP v1.4, table 5-2 */
1906 for (pin = 0; pin < 16; ++pin) {
1907 io_apic_ints[pin].int_type = 0;
1908 io_apic_ints[pin].int_flags = 0x05; /* edge/active-hi */
1909 io_apic_ints[pin].src_bus_id = 0;
1910 io_apic_ints[pin].src_bus_irq = pin; /* IRQ2 caught below */
1911 io_apic_ints[pin].dst_apic_id = io_apic_id;
1912 io_apic_ints[pin].dst_apic_int = pin; /* 1-to-1 */
1913 }
1914
1915 /* special cases from MP v1.4, table 5-2 */
1916 if (type == 2) {
1917 io_apic_ints[2].int_type = 0xff; /* N/C */
1918 io_apic_ints[13].int_type = 0xff; /* N/C */
1919#if !defined(APIC_MIXED_MODE)
1920 /** FIXME: ??? */
1921 panic("sorry, can't support type 2 default yet");
1922#endif /* APIC_MIXED_MODE */
1923 }
1924 else
1925 io_apic_ints[2].src_bus_irq = 0; /* ISA IRQ0 is on APIC INT 2 */
1926
1927 if (type == 7)
1928 io_apic_ints[0].int_type = 0xff; /* N/C */
1929 else
1930 io_apic_ints[0].int_type = 3; /* vectored 8259 */
1931#endif /* APIC_IO */
1932}
1933
1934
1935/*
1936 * start each AP in our list
1937 */
1938static int
1939start_all_aps(u_int boot_addr)
1940{
1941 int x, i, pg;
1942 u_char mpbiosreason;
1943 u_long mpbioswarmvec;
1944 struct globaldata *gd;
1945 char *stack;
1946 uintptr_t kptbase;
1947
1948 POSTCODE(START_ALL_APS_POST);
1949
1950 /* initialize BSP's local APIC */
1951 apic_initialize();
1952 bsp_apic_ready = 1;
1953
1954 /* install the AP 1st level boot code */
1955 install_ap_tramp(boot_addr);
1956
1957
1958 /* save the current value of the warm-start vector */
1959 mpbioswarmvec = *((u_long *) WARMBOOT_OFF);
1960#ifndef PC98
1961 outb(CMOS_REG, BIOS_RESET);
1962 mpbiosreason = inb(CMOS_DATA);
1963#endif
1964
1965 /* record BSP in CPU map */
1966 all_cpus = 1;
1967
1968 /* set up temporary P==V mapping for AP boot */
1969 /* XXX this is a hack, we should boot the AP on its own stack/PTD */
1970 kptbase = (uintptr_t)(void *)KPTphys;
1971 for (x = 0; x < NKPT; x++)
1972 PTD[x] = (pd_entry_t)(PG_V | PG_RW |
1973 ((kptbase + x * PAGE_SIZE) & PG_FRAME));
1974 invltlb();
1975
1976 /* start each AP */
1977 for (x = 1; x <= mp_naps; ++x) {
1978
1979 /* This is a bit verbose, it will go away soon. */
1980
1981 /* first page of AP's private space */
1982 pg = x * i386_btop(sizeof(struct privatespace));
1983
1984 /* allocate a new private data page */
1985 gd = (struct globaldata *)kmem_alloc(kernel_map, PAGE_SIZE);
1986
1987 /* wire it into the private page table page */
1988 SMPpt[pg] = (pt_entry_t)(PG_V | PG_RW | vtophys(gd));
1989
1990 /* allocate and set up an idle stack data page */
1991 stack = (char *)kmem_alloc(kernel_map, UPAGES*PAGE_SIZE);
1992 for (i = 0; i < UPAGES; i++)
1993 SMPpt[pg + 1 + i] = (pt_entry_t)
1994 (PG_V | PG_RW | vtophys(PAGE_SIZE * i + stack));
1995
1996 /* prime data page for it to use */
1997 SLIST_INSERT_HEAD(&cpuhead, gd, gd_allcpu);
1998 gd->gd_cpuid = x;
1999
2000 /* setup a vector to our boot code */
2001 *((volatile u_short *) WARMBOOT_OFF) = WARMBOOT_TARGET;
2002 *((volatile u_short *) WARMBOOT_SEG) = (boot_addr >> 4);
2003#ifndef PC98
2004 outb(CMOS_REG, BIOS_RESET);
2005 outb(CMOS_DATA, BIOS_WARM); /* 'warm-start' */
2006#endif
2007
2008 bootSTK = &SMP_prvspace[x].idlestack[UPAGES*PAGE_SIZE];
2009 bootAP = x;
2010
2011 /* attempt to start the Application Processor */
2012 CHECK_INIT(99); /* setup checkpoints */
2013 if (!start_ap(x, boot_addr)) {
2014 printf("AP #%d (PHY# %d) failed!\n", x, CPU_TO_ID(x));
2015 CHECK_PRINT("trace"); /* show checkpoints */
2016 /* better panic as the AP may be running loose */
2017 printf("panic y/n? [y] ");
2018 if (cngetc() != 'n')
2019 panic("bye-bye");
2020 }
2021 CHECK_PRINT("trace"); /* show checkpoints */
2022
2023 /* record its version info */
2024 cpu_apic_versions[x] = cpu_apic_versions[0];
2025
2026 all_cpus |= (1 << x); /* record AP in CPU map */
2027 }
2028
2029 /* build our map of 'other' CPUs */
2030 PCPU_SET(other_cpus, all_cpus & ~(1 << PCPU_GET(cpuid)));
2031
2032 /* fill in our (BSP) APIC version */
2033 cpu_apic_versions[0] = lapic.version;
2034
2035 /* restore the warmstart vector */
2036 *(u_long *) WARMBOOT_OFF = mpbioswarmvec;
2037#ifndef PC98
2038 outb(CMOS_REG, BIOS_RESET);
2039 outb(CMOS_DATA, mpbiosreason);
2040#endif
2041
2042 /*
2043 * Set up the idle context for the BSP. Similar to above except
2044 * that some was done by locore, some by pmap.c and some is implicit
2045 * because the BSP is cpu#0 and the page is initially zero, and also
2046 * because we can refer to variables by name on the BSP..
2047 */
2048
2049 /* Allocate and setup BSP idle stack */
2050 stack = (char *)kmem_alloc(kernel_map, UPAGES * PAGE_SIZE);
2051 for (i = 0; i < UPAGES; i++)
2052 SMPpt[1 + i] = (pt_entry_t)
2053 (PG_V | PG_RW | vtophys(PAGE_SIZE * i + stack));
2054
2055 for (x = 0; x < NKPT; x++)
2056 PTD[x] = 0;
2057 pmap_set_opt();
2058
2059 /* number of APs actually started */
2060 return mp_ncpus - 1;
2061}
2062
2063
2064/*
2065 * load the 1st level AP boot code into base memory.
2066 */
2067
2068/* targets for relocation */
2069extern void bigJump(void);
2070extern void bootCodeSeg(void);
2071extern void bootDataSeg(void);
2072extern void MPentry(void);
2073extern u_int MP_GDT;
2074extern u_int mp_gdtbase;
2075
2076static void
2077install_ap_tramp(u_int boot_addr)
2078{
2079 int x;
2080 int size = *(int *) ((u_long) & bootMP_size);
2081 u_char *src = (u_char *) ((u_long) bootMP);
2082 u_char *dst = (u_char *) boot_addr + KERNBASE;
2083 u_int boot_base = (u_int) bootMP;
2084 u_int8_t *dst8;
2085 u_int16_t *dst16;
2086 u_int32_t *dst32;
2087
2088 POSTCODE(INSTALL_AP_TRAMP_POST);
2089
2090 for (x = 0; x < size; ++x)
2091 *dst++ = *src++;
2092
2093 /*
2094 * modify addresses in code we just moved to basemem. unfortunately we
2095 * need fairly detailed info about mpboot.s for this to work. changes
2096 * to mpboot.s might require changes here.
2097 */
2098
2099 /* boot code is located in KERNEL space */
2100 dst = (u_char *) boot_addr + KERNBASE;
2101
2102 /* modify the lgdt arg */
2103 dst32 = (u_int32_t *) (dst + ((u_int) & mp_gdtbase - boot_base));
2104 *dst32 = boot_addr + ((u_int) & MP_GDT - boot_base);
2105
2106 /* modify the ljmp target for MPentry() */
2107 dst32 = (u_int32_t *) (dst + ((u_int) bigJump - boot_base) + 1);
2108 *dst32 = ((u_int) MPentry - KERNBASE);
2109
2110 /* modify the target for boot code segment */
2111 dst16 = (u_int16_t *) (dst + ((u_int) bootCodeSeg - boot_base));
2112 dst8 = (u_int8_t *) (dst16 + 1);
2113 *dst16 = (u_int) boot_addr & 0xffff;
2114 *dst8 = ((u_int) boot_addr >> 16) & 0xff;
2115
2116 /* modify the target for boot data segment */
2117 dst16 = (u_int16_t *) (dst + ((u_int) bootDataSeg - boot_base));
2118 dst8 = (u_int8_t *) (dst16 + 1);
2119 *dst16 = (u_int) boot_addr & 0xffff;
2120 *dst8 = ((u_int) boot_addr >> 16) & 0xff;
2121}
2122
2123
2124/*
2125 * this function starts the AP (application processor) identified
2126 * by the APIC ID 'physicalCpu'. It does quite a "song and dance"
2127 * to accomplish this. This is necessary because of the nuances
2128 * of the different hardware we might encounter. It ain't pretty,
2129 * but it seems to work.
2130 */
2131static int
2132start_ap(int logical_cpu, u_int boot_addr)
2133{
2134 int physical_cpu;
2135 int vector;
2136 int cpus;
2137 u_long icr_lo, icr_hi;
2138
2139 POSTCODE(START_AP_POST);
2140
2141 /* get the PHYSICAL APIC ID# */
2142 physical_cpu = CPU_TO_ID(logical_cpu);
2143
2144 /* calculate the vector */
2145 vector = (boot_addr >> 12) & 0xff;
2146
2147 /* used as a watchpoint to signal AP startup */
2148 cpus = mp_ncpus;
2149
2150 /*
2151 * first we do an INIT/RESET IPI this INIT IPI might be run, reseting
2152 * and running the target CPU. OR this INIT IPI might be latched (P5
2153 * bug), CPU waiting for STARTUP IPI. OR this INIT IPI might be
2154 * ignored.
2155 */
2156
2157 /* setup the address for the target AP */
2158 icr_hi = lapic.icr_hi & ~APIC_ID_MASK;
2159 icr_hi |= (physical_cpu << 24);
2160 lapic.icr_hi = icr_hi;
2161
2162 /* do an INIT IPI: assert RESET */
2163 icr_lo = lapic.icr_lo & 0xfff00000;
2164 lapic.icr_lo = icr_lo | 0x0000c500;
2165
2166 /* wait for pending status end */
2167 while (lapic.icr_lo & APIC_DELSTAT_MASK)
2168 /* spin */ ;
2169
2170 /* do an INIT IPI: deassert RESET */
2171 lapic.icr_lo = icr_lo | 0x00008500;
2172
2173 /* wait for pending status end */
2174 u_sleep(10000); /* wait ~10mS */
2175 while (lapic.icr_lo & APIC_DELSTAT_MASK)
2176 /* spin */ ;
2177
2178 /*
2179 * next we do a STARTUP IPI: the previous INIT IPI might still be
2180 * latched, (P5 bug) this 1st STARTUP would then terminate
2181 * immediately, and the previously started INIT IPI would continue. OR
2182 * the previous INIT IPI has already run. and this STARTUP IPI will
2183 * run. OR the previous INIT IPI was ignored. and this STARTUP IPI
2184 * will run.
2185 */
2186
2187 /* do a STARTUP IPI */
2188 lapic.icr_lo = icr_lo | 0x00000600 | vector;
2189 while (lapic.icr_lo & APIC_DELSTAT_MASK)
2190 /* spin */ ;
2191 u_sleep(200); /* wait ~200uS */
2192
2193 /*
2194 * finally we do a 2nd STARTUP IPI: this 2nd STARTUP IPI should run IF
2195 * the previous STARTUP IPI was cancelled by a latched INIT IPI. OR
2196 * this STARTUP IPI will be ignored, as only ONE STARTUP IPI is
2197 * recognized after hardware RESET or INIT IPI.
2198 */
2199
2200 lapic.icr_lo = icr_lo | 0x00000600 | vector;
2201 while (lapic.icr_lo & APIC_DELSTAT_MASK)
2202 /* spin */ ;
2203 u_sleep(200); /* wait ~200uS */
2204
2205 /* wait for it to start */
2206 set_apic_timer(5000000);/* == 5 seconds */
2207 while (read_apic_timer())
2208 if (mp_ncpus > cpus)
2209 return 1; /* return SUCCESS */
2210
2211 return 0; /* return FAILURE */
2212}
2213
2214/*
2215 * Flush the TLB on all other CPU's
2216 *
2217 * XXX: Needs to handshake and wait for completion before proceding.
2218 */
2219void
2220smp_invltlb(void)
2221{
2222#if defined(APIC_IO)
2223 if (smp_started && invltlb_ok)
2224 all_but_self_ipi(XINVLTLB_OFFSET);
2225#endif /* APIC_IO */
2226}
2227
2228void
2229invlpg(u_int addr)
2230{
2231 __asm __volatile("invlpg (%0)"::"r"(addr):"memory");
2232
2233 /* send a message to the other CPUs */
2234 smp_invltlb();
2235}
2236
2237void
2238invltlb(void)
2239{
2240 u_long temp;
2241
2242 /*
2243 * This should be implemented as load_cr3(rcr3()) when load_cr3() is
2244 * inlined.
2245 */
2246 __asm __volatile("movl %%cr3, %0; movl %0, %%cr3":"=r"(temp) :: "memory");
2247
2248 /* send a message to the other CPUs */
2249 smp_invltlb();
2250}
2251
2252
2253/*
2254 * This is called once the rest of the system is up and running and we're
2255 * ready to let the AP's out of the pen.
2256 */
2257void
2258ap_init(void)
2259{
2260 u_int apic_id;
2261
2262 /* spin until all the AP's are ready */
2263 while (!aps_ready)
2264 /* spin */ ;
2265
2266 /*
2267 * Set curproc to our per-cpu idleproc so that mutexes have
2268 * something unique to lock with.
2269 */
2270 PCPU_SET(curproc, PCPU_GET(idleproc));
2271 PCPU_SET(spinlocks, NULL);
2271
2272 /* lock against other AP's that are waking up */
2273 mtx_lock_spin(&ap_boot_mtx);
2274
2275 /* BSP may have changed PTD while we're waiting for the lock */
2276 cpu_invltlb();
2277
2278 smp_cpus++;
2279
2280#if defined(I586_CPU) && !defined(NO_F00F_HACK)
2281 lidt(&r_idt);
2282#endif
2283
2284 /* Build our map of 'other' CPUs. */
2285 PCPU_SET(other_cpus, all_cpus & ~(1 << PCPU_GET(cpuid)));
2286
2287 printf("SMP: AP CPU #%d Launched!\n", PCPU_GET(cpuid));
2288
2289 /* set up CPU registers and state */
2290 cpu_setregs();
2291
2292 /* set up FPU state on the AP */
2293 npxinit(__INITIAL_NPXCW__);
2294
2295 /* A quick check from sanity claus */
2296 apic_id = (apic_id_to_logical[(lapic.id & 0x0f000000) >> 24]);
2297 if (PCPU_GET(cpuid) != apic_id) {
2298 printf("SMP: cpuid = %d\n", PCPU_GET(cpuid));
2299 printf("SMP: apic_id = %d\n", apic_id);
2300 printf("PTD[MPPTDI] = %p\n", (void *)PTD[MPPTDI]);
2301 panic("cpuid mismatch! boom!!");
2302 }
2303
2304 /* Init local apic for irq's */
2305 apic_initialize();
2306
2307 /* Set memory range attributes for this CPU to match the BSP */
2308 mem_range_AP_init();
2309
2310 /*
2311 * Activate smp_invltlb, although strictly speaking, this isn't
2312 * quite correct yet. We should have a bitfield for cpus willing
2313 * to accept TLB flush IPI's or something and sync them.
2314 */
2315 if (smp_cpus == mp_ncpus) {
2316 invltlb_ok = 1;
2317 smp_started = 1; /* enable IPI's, tlb shootdown, freezes etc */
2318 smp_active = 1; /* historic */
2319 }
2320
2321 /* let other AP's wake up now */
2322 mtx_unlock_spin(&ap_boot_mtx);
2323
2324 /* wait until all the AP's are up */
2325 while (smp_started == 0)
2326 ; /* nothing */
2327
2328 microuptime(PCPU_PTR(switchtime));
2329 PCPU_SET(switchticks, ticks);
2330
2331 /* ok, now grab sched_lock and enter the scheduler */
2332 enable_intr();
2333 mtx_lock_spin(&sched_lock);
2334 cpu_throw(); /* doesn't return */
2335
2336 panic("scheduler returned us to ap_init");
2337}
2338
2339#ifdef BETTER_CLOCK
2340
2341#define CHECKSTATE_USER 0
2342#define CHECKSTATE_SYS 1
2343#define CHECKSTATE_INTR 2
2344
2345/* Do not staticize. Used from apic_vector.s */
2346struct proc* checkstate_curproc[MAXCPU];
2347int checkstate_cpustate[MAXCPU];
2348u_long checkstate_pc[MAXCPU];
2349
2350#define PC_TO_INDEX(pc, prof) \
2351 ((int)(((u_quad_t)((pc) - (prof)->pr_off) * \
2352 (u_quad_t)((prof)->pr_scale)) >> 16) & ~1)
2353
2354static void
2355addupc_intr_forwarded(struct proc *p, int id, int *astmap)
2356{
2357 int i;
2358 struct uprof *prof;
2359 u_long pc;
2360
2361 pc = checkstate_pc[id];
2362 prof = &p->p_stats->p_prof;
2363 if (pc >= prof->pr_off &&
2364 (i = PC_TO_INDEX(pc, prof)) < prof->pr_size) {
2365 mtx_assert(&sched_lock, MA_OWNED);
2366 if ((p->p_sflag & PS_OWEUPC) == 0) {
2367 prof->pr_addr = pc;
2368 prof->pr_ticks = 1;
2369 p->p_sflag |= PS_OWEUPC;
2370 }
2371 *astmap |= (1 << id);
2372 }
2373}
2374
2375static void
2376forwarded_statclock(int id, int pscnt, int *astmap)
2377{
2378 struct pstats *pstats;
2379 long rss;
2380 struct rusage *ru;
2381 struct vmspace *vm;
2382 int cpustate;
2383 struct proc *p;
2384#ifdef GPROF
2385 register struct gmonparam *g;
2386 int i;
2387#endif
2388
2389 mtx_assert(&sched_lock, MA_OWNED);
2390 p = checkstate_curproc[id];
2391 cpustate = checkstate_cpustate[id];
2392
2393 /* XXX */
2394 if (p->p_ithd)
2395 cpustate = CHECKSTATE_INTR;
2396 else if (p == SMP_prvspace[id].globaldata.gd_idleproc)
2397 cpustate = CHECKSTATE_SYS;
2398
2399 switch (cpustate) {
2400 case CHECKSTATE_USER:
2401 if (p->p_sflag & PS_PROFIL)
2402 addupc_intr_forwarded(p, id, astmap);
2403 if (pscnt > 1)
2404 return;
2405 p->p_uticks++;
2406 if (p->p_nice > NZERO)
2407 cp_time[CP_NICE]++;
2408 else
2409 cp_time[CP_USER]++;
2410 break;
2411 case CHECKSTATE_SYS:
2412#ifdef GPROF
2413 /*
2414 * Kernel statistics are just like addupc_intr, only easier.
2415 */
2416 g = &_gmonparam;
2417 if (g->state == GMON_PROF_ON) {
2418 i = checkstate_pc[id] - g->lowpc;
2419 if (i < g->textsize) {
2420 i /= HISTFRACTION * sizeof(*g->kcount);
2421 g->kcount[i]++;
2422 }
2423 }
2424#endif
2425 if (pscnt > 1)
2426 return;
2427
2428 p->p_sticks++;
2429 if (p == SMP_prvspace[id].globaldata.gd_idleproc)
2430 cp_time[CP_IDLE]++;
2431 else
2432 cp_time[CP_SYS]++;
2433 break;
2434 case CHECKSTATE_INTR:
2435 default:
2436#ifdef GPROF
2437 /*
2438 * Kernel statistics are just like addupc_intr, only easier.
2439 */
2440 g = &_gmonparam;
2441 if (g->state == GMON_PROF_ON) {
2442 i = checkstate_pc[id] - g->lowpc;
2443 if (i < g->textsize) {
2444 i /= HISTFRACTION * sizeof(*g->kcount);
2445 g->kcount[i]++;
2446 }
2447 }
2448#endif
2449 if (pscnt > 1)
2450 return;
2451 KASSERT(p != NULL, ("NULL process in interrupt state"));
2452 p->p_iticks++;
2453 cp_time[CP_INTR]++;
2454 }
2455
2456 schedclock(p);
2457
2458 /* Update resource usage integrals and maximums. */
2459 if ((pstats = p->p_stats) != NULL &&
2460 (ru = &pstats->p_ru) != NULL &&
2461 (vm = p->p_vmspace) != NULL) {
2462 ru->ru_ixrss += pgtok(vm->vm_tsize);
2463 ru->ru_idrss += pgtok(vm->vm_dsize);
2464 ru->ru_isrss += pgtok(vm->vm_ssize);
2465 rss = pgtok(vmspace_resident_count(vm));
2466 if (ru->ru_maxrss < rss)
2467 ru->ru_maxrss = rss;
2468 }
2469}
2470
2471void
2472forward_statclock(int pscnt)
2473{
2474 int map;
2475 int id;
2476 int i;
2477
2478 /* Kludge. We don't yet have separate locks for the interrupts
2479 * and the kernel. This means that we cannot let the other processors
2480 * handle complex interrupts while inhibiting them from entering
2481 * the kernel in a non-interrupt context.
2482 *
2483 * What we can do, without changing the locking mechanisms yet,
2484 * is letting the other processors handle a very simple interrupt
2485 * (wich determines the processor states), and do the main
2486 * work ourself.
2487 */
2488
2489 CTR1(KTR_SMP, "forward_statclock(%d)", pscnt);
2490
2491 if (!smp_started || !invltlb_ok || cold || panicstr)
2492 return;
2493
2494 /* Step 1: Probe state (user, cpu, interrupt, spinlock, idle ) */
2495
2496 map = PCPU_GET(other_cpus) & ~stopped_cpus ;
2497 checkstate_probed_cpus = 0;
2498 if (map != 0)
2499 selected_apic_ipi(map,
2500 XCPUCHECKSTATE_OFFSET, APIC_DELMODE_FIXED);
2501
2502 i = 0;
2503 while (checkstate_probed_cpus != map) {
2504 /* spin */
2505 i++;
2506 if (i == 100000) {
2507#ifdef BETTER_CLOCK_DIAGNOSTIC
2508 printf("forward_statclock: checkstate %x\n",
2509 checkstate_probed_cpus);
2510#endif
2511 break;
2512 }
2513 }
2514
2515 /*
2516 * Step 2: walk through other processors processes, update ticks and
2517 * profiling info.
2518 */
2519
2520 map = 0;
2521 for (id = 0; id < mp_ncpus; id++) {
2522 if (id == PCPU_GET(cpuid))
2523 continue;
2524 if (((1 << id) & checkstate_probed_cpus) == 0)
2525 continue;
2526 forwarded_statclock(id, pscnt, &map);
2527 }
2528 if (map != 0) {
2529 checkstate_need_ast |= map;
2530 selected_apic_ipi(map, XCPUAST_OFFSET, APIC_DELMODE_FIXED);
2531 i = 0;
2532 while ((checkstate_need_ast & map) != 0) {
2533 /* spin */
2534 i++;
2535 if (i > 100000) {
2536#ifdef BETTER_CLOCK_DIAGNOSTIC
2537 printf("forward_statclock: dropped ast 0x%x\n",
2538 checkstate_need_ast & map);
2539#endif
2540 break;
2541 }
2542 }
2543 }
2544}
2545
2546void
2547forward_hardclock(int pscnt)
2548{
2549 int map;
2550 int id;
2551 struct proc *p;
2552 struct pstats *pstats;
2553 int i;
2554
2555 /* Kludge. We don't yet have separate locks for the interrupts
2556 * and the kernel. This means that we cannot let the other processors
2557 * handle complex interrupts while inhibiting them from entering
2558 * the kernel in a non-interrupt context.
2559 *
2560 * What we can do, without changing the locking mechanisms yet,
2561 * is letting the other processors handle a very simple interrupt
2562 * (wich determines the processor states), and do the main
2563 * work ourself.
2564 */
2565
2566 CTR1(KTR_SMP, "forward_hardclock(%d)", pscnt);
2567
2568 if (!smp_started || !invltlb_ok || cold || panicstr)
2569 return;
2570
2571 /* Step 1: Probe state (user, cpu, interrupt, spinlock, idle) */
2572
2573 map = PCPU_GET(other_cpus) & ~stopped_cpus ;
2574 checkstate_probed_cpus = 0;
2575 if (map != 0)
2576 selected_apic_ipi(map,
2577 XCPUCHECKSTATE_OFFSET, APIC_DELMODE_FIXED);
2578
2579 i = 0;
2580 while (checkstate_probed_cpus != map) {
2581 /* spin */
2582 i++;
2583 if (i == 100000) {
2584#ifdef BETTER_CLOCK_DIAGNOSTIC
2585 printf("forward_hardclock: checkstate %x\n",
2586 checkstate_probed_cpus);
2587#endif
2588 break;
2589 }
2590 }
2591
2592 /*
2593 * Step 2: walk through other processors processes, update virtual
2594 * timer and profiling timer. If stathz == 0, also update ticks and
2595 * profiling info.
2596 */
2597
2598 map = 0;
2599 for (id = 0; id < mp_ncpus; id++) {
2600 if (id == PCPU_GET(cpuid))
2601 continue;
2602 if (((1 << id) & checkstate_probed_cpus) == 0)
2603 continue;
2604 p = checkstate_curproc[id];
2605 if (p) {
2606 pstats = p->p_stats;
2607 if (checkstate_cpustate[id] == CHECKSTATE_USER &&
2608 timevalisset(&pstats->p_timer[ITIMER_VIRTUAL].it_value) &&
2609 itimerdecr(&pstats->p_timer[ITIMER_VIRTUAL], tick) == 0) {
2610 p->p_sflag |= PS_ALRMPEND;
2611 map |= (1 << id);
2612 }
2613 if (timevalisset(&pstats->p_timer[ITIMER_PROF].it_value) &&
2614 itimerdecr(&pstats->p_timer[ITIMER_PROF], tick) == 0) {
2615 p->p_sflag |= PS_PROFPEND;
2616 map |= (1 << id);
2617 }
2618 }
2619 if (stathz == 0) {
2620 forwarded_statclock( id, pscnt, &map);
2621 }
2622 }
2623 if (map != 0) {
2624 checkstate_need_ast |= map;
2625 selected_apic_ipi(map, XCPUAST_OFFSET, APIC_DELMODE_FIXED);
2626 i = 0;
2627 while ((checkstate_need_ast & map) != 0) {
2628 /* spin */
2629 i++;
2630 if (i > 100000) {
2631#ifdef BETTER_CLOCK_DIAGNOSTIC
2632 printf("forward_hardclock: dropped ast 0x%x\n",
2633 checkstate_need_ast & map);
2634#endif
2635 break;
2636 }
2637 }
2638 }
2639}
2640
2641#endif /* BETTER_CLOCK */
2642
2643void
2644forward_signal(struct proc *p)
2645{
2646 int map;
2647 int id;
2648 int i;
2649
2650 /* Kludge. We don't yet have separate locks for the interrupts
2651 * and the kernel. This means that we cannot let the other processors
2652 * handle complex interrupts while inhibiting them from entering
2653 * the kernel in a non-interrupt context.
2654 *
2655 * What we can do, without changing the locking mechanisms yet,
2656 * is letting the other processors handle a very simple interrupt
2657 * (wich determines the processor states), and do the main
2658 * work ourself.
2659 */
2660
2661 CTR1(KTR_SMP, "forward_signal(%p)", p);
2662
2663 if (!smp_started || !invltlb_ok || cold || panicstr)
2664 return;
2665 if (!forward_signal_enabled)
2666 return;
2667 mtx_lock_spin(&sched_lock);
2668 while (1) {
2669 if (p->p_stat != SRUN) {
2670 mtx_unlock_spin(&sched_lock);
2671 return;
2672 }
2673 id = p->p_oncpu;
2674 mtx_unlock_spin(&sched_lock);
2675 if (id == 0xff)
2676 return;
2677 map = (1<<id);
2678 checkstate_need_ast |= map;
2679 selected_apic_ipi(map, XCPUAST_OFFSET, APIC_DELMODE_FIXED);
2680 i = 0;
2681 while ((checkstate_need_ast & map) != 0) {
2682 /* spin */
2683 i++;
2684 if (i > 100000) {
2685#if 0
2686 printf("forward_signal: dropped ast 0x%x\n",
2687 checkstate_need_ast & map);
2688#endif
2689 break;
2690 }
2691 }
2692 mtx_lock_spin(&sched_lock);
2693 if (id == p->p_oncpu) {
2694 mtx_unlock_spin(&sched_lock);
2695 return;
2696 }
2697 }
2698}
2699
2700void
2701forward_roundrobin(void)
2702{
2703 u_int map;
2704 int i;
2705
2706 CTR0(KTR_SMP, "forward_roundrobin()");
2707
2708 if (!smp_started || !invltlb_ok || cold || panicstr)
2709 return;
2710 if (!forward_roundrobin_enabled)
2711 return;
2712 resched_cpus |= PCPU_GET(other_cpus);
2713 map = PCPU_GET(other_cpus) & ~stopped_cpus ;
2714#if 1
2715 selected_apic_ipi(map, XCPUAST_OFFSET, APIC_DELMODE_FIXED);
2716#else
2717 (void) all_but_self_ipi(XCPUAST_OFFSET);
2718#endif
2719 i = 0;
2720 while ((checkstate_need_ast & map) != 0) {
2721 /* spin */
2722 i++;
2723 if (i > 100000) {
2724#if 0
2725 printf("forward_roundrobin: dropped ast 0x%x\n",
2726 checkstate_need_ast & map);
2727#endif
2728 break;
2729 }
2730 }
2731}
2732
2733/*
2734 * When called the executing CPU will send an IPI to all other CPUs
2735 * requesting that they halt execution.
2736 *
2737 * Usually (but not necessarily) called with 'other_cpus' as its arg.
2738 *
2739 * - Signals all CPUs in map to stop.
2740 * - Waits for each to stop.
2741 *
2742 * Returns:
2743 * -1: error
2744 * 0: NA
2745 * 1: ok
2746 *
2747 * XXX FIXME: this is not MP-safe, needs a lock to prevent multiple CPUs
2748 * from executing at same time.
2749 */
2750int
2751stop_cpus(u_int map)
2752{
2753 int count = 0;
2754
2755 if (!smp_started)
2756 return 0;
2757
2758 /* send the Xcpustop IPI to all CPUs in map */
2759 selected_apic_ipi(map, XCPUSTOP_OFFSET, APIC_DELMODE_FIXED);
2760
2761 while (count++ < 100000 && (stopped_cpus & map) != map)
2762 /* spin */ ;
2763
2764#ifdef DIAGNOSTIC
2765 if ((stopped_cpus & map) != map)
2766 printf("Warning: CPUs 0x%x did not stop!\n",
2767 (~(stopped_cpus & map)) & map);
2768#endif
2769
2770 return 1;
2771}
2772
2773
2774/*
2775 * Called by a CPU to restart stopped CPUs.
2776 *
2777 * Usually (but not necessarily) called with 'stopped_cpus' as its arg.
2778 *
2779 * - Signals all CPUs in map to restart.
2780 * - Waits for each to restart.
2781 *
2782 * Returns:
2783 * -1: error
2784 * 0: NA
2785 * 1: ok
2786 */
2787int
2788restart_cpus(u_int map)
2789{
2790 int count = 0;
2791
2792 if (!smp_started)
2793 return 0;
2794
2795 started_cpus = map; /* signal other cpus to restart */
2796
2797 /* wait for each to clear its bit */
2798 while (count++ < 100000 && (stopped_cpus & map) != 0)
2799 /* spin */ ;
2800
2801#ifdef DIAGNOSTIC
2802 if ((stopped_cpus & map) != 0)
2803 printf("Warning: CPUs 0x%x did not restart!\n",
2804 (~(stopped_cpus & map)) & map);
2805#endif
2806
2807 return 1;
2808}
2809
2810
2811#ifdef APIC_INTR_REORDER
2812/*
2813 * Maintain mapping from softintr vector to isr bit in local apic.
2814 */
2815void
2816set_lapic_isrloc(int intr, int vector)
2817{
2818 if (intr < 0 || intr > 32)
2819 panic("set_apic_isrloc: bad intr argument: %d",intr);
2820 if (vector < ICU_OFFSET || vector > 255)
2821 panic("set_apic_isrloc: bad vector argument: %d",vector);
2822 apic_isrbit_location[intr].location = &lapic.isr0 + ((vector>>5)<<2);
2823 apic_isrbit_location[intr].bit = (1<<(vector & 31));
2824}
2825#endif
2826
2827/*
2828 * All-CPU rendezvous. CPUs are signalled, all execute the setup function
2829 * (if specified), rendezvous, execute the action function (if specified),
2830 * rendezvous again, execute the teardown function (if specified), and then
2831 * resume.
2832 *
2833 * Note that the supplied external functions _must_ be reentrant and aware
2834 * that they are running in parallel and in an unknown lock context.
2835 */
2836static void (*smp_rv_setup_func)(void *arg);
2837static void (*smp_rv_action_func)(void *arg);
2838static void (*smp_rv_teardown_func)(void *arg);
2839static void *smp_rv_func_arg;
2840static volatile int smp_rv_waiters[2];
2841
2842void
2843smp_rendezvous_action(void)
2844{
2845 /* setup function */
2846 if (smp_rv_setup_func != NULL)
2847 smp_rv_setup_func(smp_rv_func_arg);
2848 /* spin on entry rendezvous */
2849 atomic_add_int(&smp_rv_waiters[0], 1);
2850 while (smp_rv_waiters[0] < mp_ncpus)
2851 ;
2852 /* action function */
2853 if (smp_rv_action_func != NULL)
2854 smp_rv_action_func(smp_rv_func_arg);
2855 /* spin on exit rendezvous */
2856 atomic_add_int(&smp_rv_waiters[1], 1);
2857 while (smp_rv_waiters[1] < mp_ncpus)
2858 ;
2859 /* teardown function */
2860 if (smp_rv_teardown_func != NULL)
2861 smp_rv_teardown_func(smp_rv_func_arg);
2862}
2863
2864void
2865smp_rendezvous(void (* setup_func)(void *),
2866 void (* action_func)(void *),
2867 void (* teardown_func)(void *),
2868 void *arg)
2869{
2870
2871 /* obtain rendezvous lock */
2872 mtx_lock_spin(&smp_rv_mtx);
2873
2874 /* set static function pointers */
2875 smp_rv_setup_func = setup_func;
2876 smp_rv_action_func = action_func;
2877 smp_rv_teardown_func = teardown_func;
2878 smp_rv_func_arg = arg;
2879 smp_rv_waiters[0] = 0;
2880 smp_rv_waiters[1] = 0;
2881
2882 /*
2883 * signal other processors, which will enter the IPI with interrupts off
2884 */
2885 all_but_self_ipi(XRENDEZVOUS_OFFSET);
2886
2887 /* call executor function */
2888 smp_rendezvous_action();
2889
2890 /* release lock */
2891 mtx_unlock_spin(&smp_rv_mtx);
2892}
2893
2894void
2895release_aps(void *dummy __unused)
2896{
2897 atomic_store_rel_int(&aps_ready, 1);
2898}
2899
2900SYSINIT(start_aps, SI_SUB_SMP, SI_ORDER_FIRST, release_aps, NULL);
2272
2273 /* lock against other AP's that are waking up */
2274 mtx_lock_spin(&ap_boot_mtx);
2275
2276 /* BSP may have changed PTD while we're waiting for the lock */
2277 cpu_invltlb();
2278
2279 smp_cpus++;
2280
2281#if defined(I586_CPU) && !defined(NO_F00F_HACK)
2282 lidt(&r_idt);
2283#endif
2284
2285 /* Build our map of 'other' CPUs. */
2286 PCPU_SET(other_cpus, all_cpus & ~(1 << PCPU_GET(cpuid)));
2287
2288 printf("SMP: AP CPU #%d Launched!\n", PCPU_GET(cpuid));
2289
2290 /* set up CPU registers and state */
2291 cpu_setregs();
2292
2293 /* set up FPU state on the AP */
2294 npxinit(__INITIAL_NPXCW__);
2295
2296 /* A quick check from sanity claus */
2297 apic_id = (apic_id_to_logical[(lapic.id & 0x0f000000) >> 24]);
2298 if (PCPU_GET(cpuid) != apic_id) {
2299 printf("SMP: cpuid = %d\n", PCPU_GET(cpuid));
2300 printf("SMP: apic_id = %d\n", apic_id);
2301 printf("PTD[MPPTDI] = %p\n", (void *)PTD[MPPTDI]);
2302 panic("cpuid mismatch! boom!!");
2303 }
2304
2305 /* Init local apic for irq's */
2306 apic_initialize();
2307
2308 /* Set memory range attributes for this CPU to match the BSP */
2309 mem_range_AP_init();
2310
2311 /*
2312 * Activate smp_invltlb, although strictly speaking, this isn't
2313 * quite correct yet. We should have a bitfield for cpus willing
2314 * to accept TLB flush IPI's or something and sync them.
2315 */
2316 if (smp_cpus == mp_ncpus) {
2317 invltlb_ok = 1;
2318 smp_started = 1; /* enable IPI's, tlb shootdown, freezes etc */
2319 smp_active = 1; /* historic */
2320 }
2321
2322 /* let other AP's wake up now */
2323 mtx_unlock_spin(&ap_boot_mtx);
2324
2325 /* wait until all the AP's are up */
2326 while (smp_started == 0)
2327 ; /* nothing */
2328
2329 microuptime(PCPU_PTR(switchtime));
2330 PCPU_SET(switchticks, ticks);
2331
2332 /* ok, now grab sched_lock and enter the scheduler */
2333 enable_intr();
2334 mtx_lock_spin(&sched_lock);
2335 cpu_throw(); /* doesn't return */
2336
2337 panic("scheduler returned us to ap_init");
2338}
2339
2340#ifdef BETTER_CLOCK
2341
2342#define CHECKSTATE_USER 0
2343#define CHECKSTATE_SYS 1
2344#define CHECKSTATE_INTR 2
2345
2346/* Do not staticize. Used from apic_vector.s */
2347struct proc* checkstate_curproc[MAXCPU];
2348int checkstate_cpustate[MAXCPU];
2349u_long checkstate_pc[MAXCPU];
2350
2351#define PC_TO_INDEX(pc, prof) \
2352 ((int)(((u_quad_t)((pc) - (prof)->pr_off) * \
2353 (u_quad_t)((prof)->pr_scale)) >> 16) & ~1)
2354
2355static void
2356addupc_intr_forwarded(struct proc *p, int id, int *astmap)
2357{
2358 int i;
2359 struct uprof *prof;
2360 u_long pc;
2361
2362 pc = checkstate_pc[id];
2363 prof = &p->p_stats->p_prof;
2364 if (pc >= prof->pr_off &&
2365 (i = PC_TO_INDEX(pc, prof)) < prof->pr_size) {
2366 mtx_assert(&sched_lock, MA_OWNED);
2367 if ((p->p_sflag & PS_OWEUPC) == 0) {
2368 prof->pr_addr = pc;
2369 prof->pr_ticks = 1;
2370 p->p_sflag |= PS_OWEUPC;
2371 }
2372 *astmap |= (1 << id);
2373 }
2374}
2375
2376static void
2377forwarded_statclock(int id, int pscnt, int *astmap)
2378{
2379 struct pstats *pstats;
2380 long rss;
2381 struct rusage *ru;
2382 struct vmspace *vm;
2383 int cpustate;
2384 struct proc *p;
2385#ifdef GPROF
2386 register struct gmonparam *g;
2387 int i;
2388#endif
2389
2390 mtx_assert(&sched_lock, MA_OWNED);
2391 p = checkstate_curproc[id];
2392 cpustate = checkstate_cpustate[id];
2393
2394 /* XXX */
2395 if (p->p_ithd)
2396 cpustate = CHECKSTATE_INTR;
2397 else if (p == SMP_prvspace[id].globaldata.gd_idleproc)
2398 cpustate = CHECKSTATE_SYS;
2399
2400 switch (cpustate) {
2401 case CHECKSTATE_USER:
2402 if (p->p_sflag & PS_PROFIL)
2403 addupc_intr_forwarded(p, id, astmap);
2404 if (pscnt > 1)
2405 return;
2406 p->p_uticks++;
2407 if (p->p_nice > NZERO)
2408 cp_time[CP_NICE]++;
2409 else
2410 cp_time[CP_USER]++;
2411 break;
2412 case CHECKSTATE_SYS:
2413#ifdef GPROF
2414 /*
2415 * Kernel statistics are just like addupc_intr, only easier.
2416 */
2417 g = &_gmonparam;
2418 if (g->state == GMON_PROF_ON) {
2419 i = checkstate_pc[id] - g->lowpc;
2420 if (i < g->textsize) {
2421 i /= HISTFRACTION * sizeof(*g->kcount);
2422 g->kcount[i]++;
2423 }
2424 }
2425#endif
2426 if (pscnt > 1)
2427 return;
2428
2429 p->p_sticks++;
2430 if (p == SMP_prvspace[id].globaldata.gd_idleproc)
2431 cp_time[CP_IDLE]++;
2432 else
2433 cp_time[CP_SYS]++;
2434 break;
2435 case CHECKSTATE_INTR:
2436 default:
2437#ifdef GPROF
2438 /*
2439 * Kernel statistics are just like addupc_intr, only easier.
2440 */
2441 g = &_gmonparam;
2442 if (g->state == GMON_PROF_ON) {
2443 i = checkstate_pc[id] - g->lowpc;
2444 if (i < g->textsize) {
2445 i /= HISTFRACTION * sizeof(*g->kcount);
2446 g->kcount[i]++;
2447 }
2448 }
2449#endif
2450 if (pscnt > 1)
2451 return;
2452 KASSERT(p != NULL, ("NULL process in interrupt state"));
2453 p->p_iticks++;
2454 cp_time[CP_INTR]++;
2455 }
2456
2457 schedclock(p);
2458
2459 /* Update resource usage integrals and maximums. */
2460 if ((pstats = p->p_stats) != NULL &&
2461 (ru = &pstats->p_ru) != NULL &&
2462 (vm = p->p_vmspace) != NULL) {
2463 ru->ru_ixrss += pgtok(vm->vm_tsize);
2464 ru->ru_idrss += pgtok(vm->vm_dsize);
2465 ru->ru_isrss += pgtok(vm->vm_ssize);
2466 rss = pgtok(vmspace_resident_count(vm));
2467 if (ru->ru_maxrss < rss)
2468 ru->ru_maxrss = rss;
2469 }
2470}
2471
2472void
2473forward_statclock(int pscnt)
2474{
2475 int map;
2476 int id;
2477 int i;
2478
2479 /* Kludge. We don't yet have separate locks for the interrupts
2480 * and the kernel. This means that we cannot let the other processors
2481 * handle complex interrupts while inhibiting them from entering
2482 * the kernel in a non-interrupt context.
2483 *
2484 * What we can do, without changing the locking mechanisms yet,
2485 * is letting the other processors handle a very simple interrupt
2486 * (wich determines the processor states), and do the main
2487 * work ourself.
2488 */
2489
2490 CTR1(KTR_SMP, "forward_statclock(%d)", pscnt);
2491
2492 if (!smp_started || !invltlb_ok || cold || panicstr)
2493 return;
2494
2495 /* Step 1: Probe state (user, cpu, interrupt, spinlock, idle ) */
2496
2497 map = PCPU_GET(other_cpus) & ~stopped_cpus ;
2498 checkstate_probed_cpus = 0;
2499 if (map != 0)
2500 selected_apic_ipi(map,
2501 XCPUCHECKSTATE_OFFSET, APIC_DELMODE_FIXED);
2502
2503 i = 0;
2504 while (checkstate_probed_cpus != map) {
2505 /* spin */
2506 i++;
2507 if (i == 100000) {
2508#ifdef BETTER_CLOCK_DIAGNOSTIC
2509 printf("forward_statclock: checkstate %x\n",
2510 checkstate_probed_cpus);
2511#endif
2512 break;
2513 }
2514 }
2515
2516 /*
2517 * Step 2: walk through other processors processes, update ticks and
2518 * profiling info.
2519 */
2520
2521 map = 0;
2522 for (id = 0; id < mp_ncpus; id++) {
2523 if (id == PCPU_GET(cpuid))
2524 continue;
2525 if (((1 << id) & checkstate_probed_cpus) == 0)
2526 continue;
2527 forwarded_statclock(id, pscnt, &map);
2528 }
2529 if (map != 0) {
2530 checkstate_need_ast |= map;
2531 selected_apic_ipi(map, XCPUAST_OFFSET, APIC_DELMODE_FIXED);
2532 i = 0;
2533 while ((checkstate_need_ast & map) != 0) {
2534 /* spin */
2535 i++;
2536 if (i > 100000) {
2537#ifdef BETTER_CLOCK_DIAGNOSTIC
2538 printf("forward_statclock: dropped ast 0x%x\n",
2539 checkstate_need_ast & map);
2540#endif
2541 break;
2542 }
2543 }
2544 }
2545}
2546
2547void
2548forward_hardclock(int pscnt)
2549{
2550 int map;
2551 int id;
2552 struct proc *p;
2553 struct pstats *pstats;
2554 int i;
2555
2556 /* Kludge. We don't yet have separate locks for the interrupts
2557 * and the kernel. This means that we cannot let the other processors
2558 * handle complex interrupts while inhibiting them from entering
2559 * the kernel in a non-interrupt context.
2560 *
2561 * What we can do, without changing the locking mechanisms yet,
2562 * is letting the other processors handle a very simple interrupt
2563 * (wich determines the processor states), and do the main
2564 * work ourself.
2565 */
2566
2567 CTR1(KTR_SMP, "forward_hardclock(%d)", pscnt);
2568
2569 if (!smp_started || !invltlb_ok || cold || panicstr)
2570 return;
2571
2572 /* Step 1: Probe state (user, cpu, interrupt, spinlock, idle) */
2573
2574 map = PCPU_GET(other_cpus) & ~stopped_cpus ;
2575 checkstate_probed_cpus = 0;
2576 if (map != 0)
2577 selected_apic_ipi(map,
2578 XCPUCHECKSTATE_OFFSET, APIC_DELMODE_FIXED);
2579
2580 i = 0;
2581 while (checkstate_probed_cpus != map) {
2582 /* spin */
2583 i++;
2584 if (i == 100000) {
2585#ifdef BETTER_CLOCK_DIAGNOSTIC
2586 printf("forward_hardclock: checkstate %x\n",
2587 checkstate_probed_cpus);
2588#endif
2589 break;
2590 }
2591 }
2592
2593 /*
2594 * Step 2: walk through other processors processes, update virtual
2595 * timer and profiling timer. If stathz == 0, also update ticks and
2596 * profiling info.
2597 */
2598
2599 map = 0;
2600 for (id = 0; id < mp_ncpus; id++) {
2601 if (id == PCPU_GET(cpuid))
2602 continue;
2603 if (((1 << id) & checkstate_probed_cpus) == 0)
2604 continue;
2605 p = checkstate_curproc[id];
2606 if (p) {
2607 pstats = p->p_stats;
2608 if (checkstate_cpustate[id] == CHECKSTATE_USER &&
2609 timevalisset(&pstats->p_timer[ITIMER_VIRTUAL].it_value) &&
2610 itimerdecr(&pstats->p_timer[ITIMER_VIRTUAL], tick) == 0) {
2611 p->p_sflag |= PS_ALRMPEND;
2612 map |= (1 << id);
2613 }
2614 if (timevalisset(&pstats->p_timer[ITIMER_PROF].it_value) &&
2615 itimerdecr(&pstats->p_timer[ITIMER_PROF], tick) == 0) {
2616 p->p_sflag |= PS_PROFPEND;
2617 map |= (1 << id);
2618 }
2619 }
2620 if (stathz == 0) {
2621 forwarded_statclock( id, pscnt, &map);
2622 }
2623 }
2624 if (map != 0) {
2625 checkstate_need_ast |= map;
2626 selected_apic_ipi(map, XCPUAST_OFFSET, APIC_DELMODE_FIXED);
2627 i = 0;
2628 while ((checkstate_need_ast & map) != 0) {
2629 /* spin */
2630 i++;
2631 if (i > 100000) {
2632#ifdef BETTER_CLOCK_DIAGNOSTIC
2633 printf("forward_hardclock: dropped ast 0x%x\n",
2634 checkstate_need_ast & map);
2635#endif
2636 break;
2637 }
2638 }
2639 }
2640}
2641
2642#endif /* BETTER_CLOCK */
2643
2644void
2645forward_signal(struct proc *p)
2646{
2647 int map;
2648 int id;
2649 int i;
2650
2651 /* Kludge. We don't yet have separate locks for the interrupts
2652 * and the kernel. This means that we cannot let the other processors
2653 * handle complex interrupts while inhibiting them from entering
2654 * the kernel in a non-interrupt context.
2655 *
2656 * What we can do, without changing the locking mechanisms yet,
2657 * is letting the other processors handle a very simple interrupt
2658 * (wich determines the processor states), and do the main
2659 * work ourself.
2660 */
2661
2662 CTR1(KTR_SMP, "forward_signal(%p)", p);
2663
2664 if (!smp_started || !invltlb_ok || cold || panicstr)
2665 return;
2666 if (!forward_signal_enabled)
2667 return;
2668 mtx_lock_spin(&sched_lock);
2669 while (1) {
2670 if (p->p_stat != SRUN) {
2671 mtx_unlock_spin(&sched_lock);
2672 return;
2673 }
2674 id = p->p_oncpu;
2675 mtx_unlock_spin(&sched_lock);
2676 if (id == 0xff)
2677 return;
2678 map = (1<<id);
2679 checkstate_need_ast |= map;
2680 selected_apic_ipi(map, XCPUAST_OFFSET, APIC_DELMODE_FIXED);
2681 i = 0;
2682 while ((checkstate_need_ast & map) != 0) {
2683 /* spin */
2684 i++;
2685 if (i > 100000) {
2686#if 0
2687 printf("forward_signal: dropped ast 0x%x\n",
2688 checkstate_need_ast & map);
2689#endif
2690 break;
2691 }
2692 }
2693 mtx_lock_spin(&sched_lock);
2694 if (id == p->p_oncpu) {
2695 mtx_unlock_spin(&sched_lock);
2696 return;
2697 }
2698 }
2699}
2700
2701void
2702forward_roundrobin(void)
2703{
2704 u_int map;
2705 int i;
2706
2707 CTR0(KTR_SMP, "forward_roundrobin()");
2708
2709 if (!smp_started || !invltlb_ok || cold || panicstr)
2710 return;
2711 if (!forward_roundrobin_enabled)
2712 return;
2713 resched_cpus |= PCPU_GET(other_cpus);
2714 map = PCPU_GET(other_cpus) & ~stopped_cpus ;
2715#if 1
2716 selected_apic_ipi(map, XCPUAST_OFFSET, APIC_DELMODE_FIXED);
2717#else
2718 (void) all_but_self_ipi(XCPUAST_OFFSET);
2719#endif
2720 i = 0;
2721 while ((checkstate_need_ast & map) != 0) {
2722 /* spin */
2723 i++;
2724 if (i > 100000) {
2725#if 0
2726 printf("forward_roundrobin: dropped ast 0x%x\n",
2727 checkstate_need_ast & map);
2728#endif
2729 break;
2730 }
2731 }
2732}
2733
2734/*
2735 * When called the executing CPU will send an IPI to all other CPUs
2736 * requesting that they halt execution.
2737 *
2738 * Usually (but not necessarily) called with 'other_cpus' as its arg.
2739 *
2740 * - Signals all CPUs in map to stop.
2741 * - Waits for each to stop.
2742 *
2743 * Returns:
2744 * -1: error
2745 * 0: NA
2746 * 1: ok
2747 *
2748 * XXX FIXME: this is not MP-safe, needs a lock to prevent multiple CPUs
2749 * from executing at same time.
2750 */
2751int
2752stop_cpus(u_int map)
2753{
2754 int count = 0;
2755
2756 if (!smp_started)
2757 return 0;
2758
2759 /* send the Xcpustop IPI to all CPUs in map */
2760 selected_apic_ipi(map, XCPUSTOP_OFFSET, APIC_DELMODE_FIXED);
2761
2762 while (count++ < 100000 && (stopped_cpus & map) != map)
2763 /* spin */ ;
2764
2765#ifdef DIAGNOSTIC
2766 if ((stopped_cpus & map) != map)
2767 printf("Warning: CPUs 0x%x did not stop!\n",
2768 (~(stopped_cpus & map)) & map);
2769#endif
2770
2771 return 1;
2772}
2773
2774
2775/*
2776 * Called by a CPU to restart stopped CPUs.
2777 *
2778 * Usually (but not necessarily) called with 'stopped_cpus' as its arg.
2779 *
2780 * - Signals all CPUs in map to restart.
2781 * - Waits for each to restart.
2782 *
2783 * Returns:
2784 * -1: error
2785 * 0: NA
2786 * 1: ok
2787 */
2788int
2789restart_cpus(u_int map)
2790{
2791 int count = 0;
2792
2793 if (!smp_started)
2794 return 0;
2795
2796 started_cpus = map; /* signal other cpus to restart */
2797
2798 /* wait for each to clear its bit */
2799 while (count++ < 100000 && (stopped_cpus & map) != 0)
2800 /* spin */ ;
2801
2802#ifdef DIAGNOSTIC
2803 if ((stopped_cpus & map) != 0)
2804 printf("Warning: CPUs 0x%x did not restart!\n",
2805 (~(stopped_cpus & map)) & map);
2806#endif
2807
2808 return 1;
2809}
2810
2811
2812#ifdef APIC_INTR_REORDER
2813/*
2814 * Maintain mapping from softintr vector to isr bit in local apic.
2815 */
2816void
2817set_lapic_isrloc(int intr, int vector)
2818{
2819 if (intr < 0 || intr > 32)
2820 panic("set_apic_isrloc: bad intr argument: %d",intr);
2821 if (vector < ICU_OFFSET || vector > 255)
2822 panic("set_apic_isrloc: bad vector argument: %d",vector);
2823 apic_isrbit_location[intr].location = &lapic.isr0 + ((vector>>5)<<2);
2824 apic_isrbit_location[intr].bit = (1<<(vector & 31));
2825}
2826#endif
2827
2828/*
2829 * All-CPU rendezvous. CPUs are signalled, all execute the setup function
2830 * (if specified), rendezvous, execute the action function (if specified),
2831 * rendezvous again, execute the teardown function (if specified), and then
2832 * resume.
2833 *
2834 * Note that the supplied external functions _must_ be reentrant and aware
2835 * that they are running in parallel and in an unknown lock context.
2836 */
2837static void (*smp_rv_setup_func)(void *arg);
2838static void (*smp_rv_action_func)(void *arg);
2839static void (*smp_rv_teardown_func)(void *arg);
2840static void *smp_rv_func_arg;
2841static volatile int smp_rv_waiters[2];
2842
2843void
2844smp_rendezvous_action(void)
2845{
2846 /* setup function */
2847 if (smp_rv_setup_func != NULL)
2848 smp_rv_setup_func(smp_rv_func_arg);
2849 /* spin on entry rendezvous */
2850 atomic_add_int(&smp_rv_waiters[0], 1);
2851 while (smp_rv_waiters[0] < mp_ncpus)
2852 ;
2853 /* action function */
2854 if (smp_rv_action_func != NULL)
2855 smp_rv_action_func(smp_rv_func_arg);
2856 /* spin on exit rendezvous */
2857 atomic_add_int(&smp_rv_waiters[1], 1);
2858 while (smp_rv_waiters[1] < mp_ncpus)
2859 ;
2860 /* teardown function */
2861 if (smp_rv_teardown_func != NULL)
2862 smp_rv_teardown_func(smp_rv_func_arg);
2863}
2864
2865void
2866smp_rendezvous(void (* setup_func)(void *),
2867 void (* action_func)(void *),
2868 void (* teardown_func)(void *),
2869 void *arg)
2870{
2871
2872 /* obtain rendezvous lock */
2873 mtx_lock_spin(&smp_rv_mtx);
2874
2875 /* set static function pointers */
2876 smp_rv_setup_func = setup_func;
2877 smp_rv_action_func = action_func;
2878 smp_rv_teardown_func = teardown_func;
2879 smp_rv_func_arg = arg;
2880 smp_rv_waiters[0] = 0;
2881 smp_rv_waiters[1] = 0;
2882
2883 /*
2884 * signal other processors, which will enter the IPI with interrupts off
2885 */
2886 all_but_self_ipi(XRENDEZVOUS_OFFSET);
2887
2888 /* call executor function */
2889 smp_rendezvous_action();
2890
2891 /* release lock */
2892 mtx_unlock_spin(&smp_rv_mtx);
2893}
2894
2895void
2896release_aps(void *dummy __unused)
2897{
2898 atomic_store_rel_int(&aps_ready, 1);
2899}
2900
2901SYSINIT(start_aps, SI_SUB_SMP, SI_ORDER_FIRST, release_aps, NULL);