1/*-
2 * Copyright (c) 1996, by Steve Passe
3 * All rights reserved.
4 *
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions
7 * are met:
8 * 1. Redistributions of source code must retain the above copyright
9 *    notice, this list of conditions and the following disclaimer.
10 * 2. The name of the developer may NOT be used to endorse or promote products
11 *    derived from this software without specific prior written permission.
12 *
13 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
14 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
15 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
16 * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
17 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
18 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
19 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
20 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
21 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
22 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
23 * SUCH DAMAGE.
24 */
25
26#include <sys/cdefs.h>
27__FBSDID("$FreeBSD: stable/11/sys/i386/i386/mp_machdep.c 347700 2019-05-16 14:42:16Z markj $");
28
29#include "opt_apic.h"
30#include "opt_cpu.h"
31#include "opt_kstack_pages.h"
32#include "opt_pmap.h"
33#include "opt_sched.h"
34#include "opt_smp.h"
35
36#if !defined(lint)
37#if !defined(SMP)
38#error How did you get here?
39#endif
40
41#ifndef DEV_APIC
42#error The apic device is required for SMP, add "device apic" to your config file.
43#endif
44#endif /* not lint */
45
46#include <sys/param.h>
47#include <sys/systm.h>
48#include <sys/bus.h>
49#include <sys/cons.h>	/* cngetc() */
50#include <sys/cpuset.h>
51#ifdef GPROF
52#include <sys/gmon.h>
53#endif
54#include <sys/kernel.h>
55#include <sys/ktr.h>
56#include <sys/lock.h>
57#include <sys/malloc.h>
58#include <sys/memrange.h>
59#include <sys/mutex.h>
60#include <sys/pcpu.h>
61#include <sys/proc.h>
62#include <sys/sched.h>
63#include <sys/smp.h>
64#include <sys/sysctl.h>
65
66#include <vm/vm.h>
67#include <vm/vm_param.h>
68#include <vm/pmap.h>
69#include <vm/vm_kern.h>
70#include <vm/vm_extern.h>
71
72#include <x86/apicreg.h>
73#include <machine/clock.h>
74#include <machine/cpu.h>
75#include <machine/cputypes.h>
76#include <x86/mca.h>
77#include <machine/md_var.h>
78#include <machine/pcb.h>
79#include <machine/psl.h>
80#include <machine/smp.h>
81#include <machine/specialreg.h>
82#include <x86/ucode.h>
83
84#define WARMBOOT_TARGET		0
85#define WARMBOOT_OFF		(KERNBASE + 0x0467)
86#define WARMBOOT_SEG		(KERNBASE + 0x0469)
87
88#define CMOS_REG		(0x70)
89#define CMOS_DATA		(0x71)
90#define BIOS_RESET		(0x0f)
91#define BIOS_WARM		(0x0a)
92
93/*
94 * this code MUST be enabled here and in mpboot.s.
95 * it follows the very early stages of AP boot by placing values in CMOS ram.
96 * it NORMALLY will never be needed and thus the primitive method for enabling.
97 *
98#define CHECK_POINTS
99 */
100
101#if defined(CHECK_POINTS) && !defined(PC98)
102#define CHECK_READ(A)	 (outb(CMOS_REG, (A)), inb(CMOS_DATA))
103#define CHECK_WRITE(A,D) (outb(CMOS_REG, (A)), outb(CMOS_DATA, (D)))
104
105#define CHECK_INIT(D);				\
106	CHECK_WRITE(0x34, (D));			\
107	CHECK_WRITE(0x35, (D));			\
108	CHECK_WRITE(0x36, (D));			\
109	CHECK_WRITE(0x37, (D));			\
110	CHECK_WRITE(0x38, (D));			\
111	CHECK_WRITE(0x39, (D));
112
113#define CHECK_PRINT(S);				\
114	printf("%s: %d, %d, %d, %d, %d, %d\n",	\
115	   (S),					\
116	   CHECK_READ(0x34),			\
117	   CHECK_READ(0x35),			\
118	   CHECK_READ(0x36),			\
119	   CHECK_READ(0x37),			\
120	   CHECK_READ(0x38),			\
121	   CHECK_READ(0x39));
122
123#else				/* CHECK_POINTS */
124
125#define CHECK_INIT(D)
126#define CHECK_PRINT(S)
127#define CHECK_WRITE(A, D)
128
129#endif				/* CHECK_POINTS */
130
131extern	struct pcpu __pcpu[];
132
133/*
134 * Local data and functions.
135 */
136
137static void	install_ap_tramp(void);
138static int	start_all_aps(void);
139static int	start_ap(int apic_id);
140
141static u_int	boot_address;
142
143/*
144 * Calculate usable address in base memory for AP trampoline code.
145 */
146u_int
147mp_bootaddress(u_int basemem)
148{
149
150	boot_address = trunc_page(basemem);	/* round down to 4k boundary */
151	if ((basemem - boot_address) < bootMP_size)
152		boot_address -= PAGE_SIZE;	/* not enough, lower by 4k */
153
154	return boot_address;
155}
156
157/*
158 * Initialize the IPI handlers and start up the AP's.
159 */
160void
161cpu_mp_start(void)
162{
163	int i;
164
165	/* Initialize the logical ID to APIC ID table. */
166	for (i = 0; i < MAXCPU; i++) {
167		cpu_apic_ids[i] = -1;
168		cpu_ipi_pending[i] = 0;
169	}
170
171	/* Install an inter-CPU IPI for TLB invalidation */
172	setidt(IPI_INVLTLB, IDTVEC(invltlb),
173	       SDT_SYS386IGT, SEL_KPL, GSEL(GCODE_SEL, SEL_KPL));
174	setidt(IPI_INVLPG, IDTVEC(invlpg),
175	       SDT_SYS386IGT, SEL_KPL, GSEL(GCODE_SEL, SEL_KPL));
176	setidt(IPI_INVLRNG, IDTVEC(invlrng),
177	       SDT_SYS386IGT, SEL_KPL, GSEL(GCODE_SEL, SEL_KPL));
178
179	/* Install an inter-CPU IPI for cache invalidation. */
180	setidt(IPI_INVLCACHE, IDTVEC(invlcache),
181	       SDT_SYS386IGT, SEL_KPL, GSEL(GCODE_SEL, SEL_KPL));
182
183	/* Install an inter-CPU IPI for all-CPU rendezvous */
184	setidt(IPI_RENDEZVOUS, IDTVEC(rendezvous),
185	       SDT_SYS386IGT, SEL_KPL, GSEL(GCODE_SEL, SEL_KPL));
186
187	/* Install generic inter-CPU IPI handler */
188	setidt(IPI_BITMAP_VECTOR, IDTVEC(ipi_intr_bitmap_handler),
189	       SDT_SYS386IGT, SEL_KPL, GSEL(GCODE_SEL, SEL_KPL));
190
191	/* Install an inter-CPU IPI for CPU stop/restart */
192	setidt(IPI_STOP, IDTVEC(cpustop),
193	       SDT_SYS386IGT, SEL_KPL, GSEL(GCODE_SEL, SEL_KPL));
194
195	/* Install an inter-CPU IPI for CPU suspend/resume */
196	setidt(IPI_SUSPEND, IDTVEC(cpususpend),
197	       SDT_SYS386IGT, SEL_KPL, GSEL(GCODE_SEL, SEL_KPL));
198
199	/* Set boot_cpu_id if needed. */
200	if (boot_cpu_id == -1) {
201		boot_cpu_id = PCPU_GET(apic_id);
202		cpu_info[boot_cpu_id].cpu_bsp = 1;
203	} else
204		KASSERT(boot_cpu_id == PCPU_GET(apic_id),
205		    ("BSP's APIC ID doesn't match boot_cpu_id"));
206
207	/* Probe logical/physical core configuration. */
208	topo_probe();
209
210	assign_cpu_ids();
211
212	/* Start each Application Processor */
213	start_all_aps();
214
215	set_interrupt_apic_ids();
216}
217
218/*
219 * AP CPU's call this to initialize themselves.
220 */
221void
222init_secondary(void)
223{
224	struct pcpu *pc;
225	vm_offset_t addr;
226	int	gsel_tss;
227	int	x, myid;
228	u_int	cr0;
229
230	/* bootAP is set in start_ap() to our ID. */
231	myid = bootAP;
232
233	/* Update microcode before doing anything else. */
234	ucode_load_ap(myid);
235
236	/* Get per-cpu data */
237	pc = &__pcpu[myid];
238
239	/* prime data page for it to use */
240	pcpu_init(pc, myid, sizeof(struct pcpu));
241	dpcpu_init(dpcpu, myid);
242	pc->pc_apic_id = cpu_apic_ids[myid];
243	pc->pc_prvspace = pc;
244	pc->pc_curthread = 0;
245
246	fix_cpuid();
247
248	gdt_segs[GPRIV_SEL].ssd_base = (int) pc;
249	gdt_segs[GPROC0_SEL].ssd_base = (int) &pc->pc_common_tss;
250
251	for (x = 0; x < NGDT; x++) {
252		ssdtosd(&gdt_segs[x], &gdt[myid * NGDT + x].sd);
253	}
254
255	r_gdt.rd_limit = NGDT * sizeof(gdt[0]) - 1;
256	r_gdt.rd_base = (int) &gdt[myid * NGDT];
257	lgdt(&r_gdt);			/* does magic intra-segment return */
258
259	lidt(&r_idt);
260
261	lldt(_default_ldt);
262	PCPU_SET(currentldt, _default_ldt);
263
264	gsel_tss = GSEL(GPROC0_SEL, SEL_KPL);
265	gdt[myid * NGDT + GPROC0_SEL].sd.sd_type = SDT_SYS386TSS;
266	PCPU_SET(common_tss.tss_esp0, 0); /* not used until after switch */
267	PCPU_SET(common_tss.tss_ss0, GSEL(GDATA_SEL, SEL_KPL));
268	PCPU_SET(common_tss.tss_ioopt, (sizeof (struct i386tss)) << 16);
269	PCPU_SET(tss_gdt, &gdt[myid * NGDT + GPROC0_SEL].sd);
270	PCPU_SET(common_tssd, *PCPU_GET(tss_gdt));
271	ltr(gsel_tss);
272
273	PCPU_SET(fsgs_gdt, &gdt[myid * NGDT + GUFS_SEL].sd);
274
275	/*
276	 * Set to a known state:
277	 * Set by mpboot.s: CR0_PG, CR0_PE
278	 * Set by cpu_setregs: CR0_NE, CR0_MP, CR0_TS, CR0_WP, CR0_AM
279	 */
280	cr0 = rcr0();
281	cr0 &= ~(CR0_CD | CR0_NW | CR0_EM);
282	load_cr0(cr0);
283	CHECK_WRITE(0x38, 5);
284
285	/* signal our startup to the BSP. */
286	mp_naps++;
287	CHECK_WRITE(0x39, 6);
288
289	/* Spin until the BSP releases the AP's. */
290	while (atomic_load_acq_int(&aps_ready) == 0)
291		ia32_pause();
292
293	/* BSP may have changed PTD while we were waiting */
294	invltlb();
295	for (addr = 0; addr < NKPT * NBPDR - 1; addr += PAGE_SIZE)
296		invlpg(addr);
297
298#if defined(I586_CPU) && !defined(NO_F00F_HACK)
299	lidt(&r_idt);
300#endif
301
302	init_secondary_tail();
303}
304
305/*
306 * start each AP in our list
307 */
308/* Lowest 1MB is already mapped: don't touch*/
309#define TMPMAP_START 1
310static int
311start_all_aps(void)
312{
313#ifndef PC98
314	u_char mpbiosreason;
315#endif
316	u_int32_t mpbioswarmvec;
317	int apic_id, cpu, i;
318
319	mtx_init(&ap_boot_mtx, "ap boot", NULL, MTX_SPIN);
320
321	/* install the AP 1st level boot code */
322	install_ap_tramp();
323
324	/* save the current value of the warm-start vector */
325	mpbioswarmvec = *((u_int32_t *) WARMBOOT_OFF);
326#ifndef PC98
327	outb(CMOS_REG, BIOS_RESET);
328	mpbiosreason = inb(CMOS_DATA);
329#endif
330
331	/* set up temporary P==V mapping for AP boot */
332	/* XXX this is a hack, we should boot the AP on its own stack/PTD */
333	for (i = TMPMAP_START; i < NKPT; i++)
334		PTD[i] = PTD[KPTDI + i];
335	invltlb();
336
337	/* start each AP */
338	for (cpu = 1; cpu < mp_ncpus; cpu++) {
339		apic_id = cpu_apic_ids[cpu];
340
341		/* allocate and set up a boot stack data page */
342		bootstacks[cpu] =
343		    (char *)kmem_malloc(kernel_arena, kstack_pages * PAGE_SIZE,
344		    M_WAITOK | M_ZERO);
345		dpcpu = (void *)kmem_malloc(kernel_arena, DPCPU_SIZE,
346		    M_WAITOK | M_ZERO);
347		/* setup a vector to our boot code */
348		*((volatile u_short *) WARMBOOT_OFF) = WARMBOOT_TARGET;
349		*((volatile u_short *) WARMBOOT_SEG) = (boot_address >> 4);
350#ifndef PC98
351		outb(CMOS_REG, BIOS_RESET);
352		outb(CMOS_DATA, BIOS_WARM);	/* 'warm-start' */
353#endif
354
355		bootSTK = (char *)bootstacks[cpu] + kstack_pages *
356		    PAGE_SIZE - 4;
357		bootAP = cpu;
358
359		/* attempt to start the Application Processor */
360		CHECK_INIT(99);	/* setup checkpoints */
361		if (!start_ap(apic_id)) {
362			printf("AP #%d (PHY# %d) failed!\n", cpu, apic_id);
363			CHECK_PRINT("trace");	/* show checkpoints */
364			/* better panic as the AP may be running loose */
365			printf("panic y/n? [y] ");
366			if (cngetc() != 'n')
367				panic("bye-bye");
368		}
369		CHECK_PRINT("trace");		/* show checkpoints */
370
371		CPU_SET(cpu, &all_cpus);	/* record AP in CPU map */
372	}
373
374	/* restore the warmstart vector */
375	*(u_int32_t *) WARMBOOT_OFF = mpbioswarmvec;
376
377#ifndef PC98
378	outb(CMOS_REG, BIOS_RESET);
379	outb(CMOS_DATA, mpbiosreason);
380#endif
381
382	/* Undo V==P hack from above */
383	for (i = TMPMAP_START; i < NKPT; i++)
384		PTD[i] = 0;
385	pmap_invalidate_range(kernel_pmap, 0, NKPT * NBPDR - 1);
386
387	/* number of APs actually started */
388	return mp_naps;
389}
390
391/*
392 * load the 1st level AP boot code into base memory.
393 */
394
395/* targets for relocation */
396extern void bigJump(void);
397extern void bootCodeSeg(void);
398extern void bootDataSeg(void);
399extern void MPentry(void);
400extern u_int MP_GDT;
401extern u_int mp_gdtbase;
402
403static void
404install_ap_tramp(void)
405{
406	int     x;
407	int     size = *(int *) ((u_long) & bootMP_size);
408	vm_offset_t va = boot_address + KERNBASE;
409	u_char *src = (u_char *) ((u_long) bootMP);
410	u_char *dst = (u_char *) va;
411	u_int   boot_base = (u_int) bootMP;
412	u_int8_t *dst8;
413	u_int16_t *dst16;
414	u_int32_t *dst32;
415
416	KASSERT (size <= PAGE_SIZE,
417	    ("'size' do not fit into PAGE_SIZE, as expected."));
418	pmap_kenter(va, boot_address);
419	pmap_invalidate_page (kernel_pmap, va);
420	for (x = 0; x < size; ++x)
421		*dst++ = *src++;
422
423	/*
424	 * modify addresses in code we just moved to basemem. unfortunately we
425	 * need fairly detailed info about mpboot.s for this to work.  changes
426	 * to mpboot.s might require changes here.
427	 */
428
429	/* boot code is located in KERNEL space */
430	dst = (u_char *) va;
431
432	/* modify the lgdt arg */
433	dst32 = (u_int32_t *) (dst + ((u_int) & mp_gdtbase - boot_base));
434	*dst32 = boot_address + ((u_int) & MP_GDT - boot_base);
435
436	/* modify the ljmp target for MPentry() */
437	dst32 = (u_int32_t *) (dst + ((u_int) bigJump - boot_base) + 1);
438	*dst32 = ((u_int) MPentry - KERNBASE);
439
440	/* modify the target for boot code segment */
441	dst16 = (u_int16_t *) (dst + ((u_int) bootCodeSeg - boot_base));
442	dst8 = (u_int8_t *) (dst16 + 1);
443	*dst16 = (u_int) boot_address & 0xffff;
444	*dst8 = ((u_int) boot_address >> 16) & 0xff;
445
446	/* modify the target for boot data segment */
447	dst16 = (u_int16_t *) (dst + ((u_int) bootDataSeg - boot_base));
448	dst8 = (u_int8_t *) (dst16 + 1);
449	*dst16 = (u_int) boot_address & 0xffff;
450	*dst8 = ((u_int) boot_address >> 16) & 0xff;
451}
452
453/*
454 * This function starts the AP (application processor) identified
455 * by the APIC ID 'physicalCpu'.  It does quite a "song and dance"
456 * to accomplish this.  This is necessary because of the nuances
457 * of the different hardware we might encounter.  It isn't pretty,
458 * but it seems to work.
459 */
460static int
461start_ap(int apic_id)
462{
463	int vector, ms;
464	int cpus;
465
466	/* calculate the vector */
467	vector = (boot_address >> 12) & 0xff;
468
469	/* used as a watchpoint to signal AP startup */
470	cpus = mp_naps;
471
472	ipi_startup(apic_id, vector);
473
474	/* Wait up to 5 seconds for it to start. */
475	for (ms = 0; ms < 5000; ms++) {
476		if (mp_naps > cpus)
477			return 1;	/* return SUCCESS */
478		DELAY(1000);
479	}
480	return 0;		/* return FAILURE */
481}
482