1/*	$OpenBSD: machdep.c,v 1.671 2024/05/26 13:37:32 kettenis Exp $	*/
2/*	$NetBSD: machdep.c,v 1.214 1996/11/10 03:16:17 thorpej Exp $	*/
3
4/*-
5 * Copyright (c) 1996, 1997 The NetBSD Foundation, Inc.
6 * All rights reserved.
7 *
8 * This code is derived from software contributed to The NetBSD Foundation
9 * by Jason R. Thorpe of the Numerical Aerospace Simulation Facility,
10 * NASA Ames Research Center.
11 *
12 * Redistribution and use in source and binary forms, with or without
13 * modification, are permitted provided that the following conditions
14 * are met:
15 * 1. Redistributions of source code must retain the above copyright
16 *    notice, this list of conditions and the following disclaimer.
17 * 2. Redistributions in binary form must reproduce the above copyright
18 *    notice, this list of conditions and the following disclaimer in the
19 *    documentation and/or other materials provided with the distribution.
20 *
21 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
22 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
23 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
24 * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
25 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
26 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
27 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
28 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
29 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
30 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
31 * POSSIBILITY OF SUCH DAMAGE.
32 */
33
34/*-
35 * Copyright (c) 1993, 1994, 1995, 1996 Charles M. Hannum.  All rights reserved.
36 * Copyright (c) 1992 Terrence R. Lambert.
37 * Copyright (c) 1982, 1987, 1990 The Regents of the University of California.
38 * All rights reserved.
39 *
40 * This code is derived from software contributed to Berkeley by
41 * William Jolitz.
42 *
43 * Redistribution and use in source and binary forms, with or without
44 * modification, are permitted provided that the following conditions
45 * are met:
46 * 1. Redistributions of source code must retain the above copyright
47 *    notice, this list of conditions and the following disclaimer.
48 * 2. Redistributions in binary form must reproduce the above copyright
49 *    notice, this list of conditions and the following disclaimer in the
50 *    documentation and/or other materials provided with the distribution.
51 * 3. Neither the name of the University nor the names of its contributors
52 *    may be used to endorse or promote products derived from this software
53 *    without specific prior written permission.
54 *
55 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
56 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
57 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
58 * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
59 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
60 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
61 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
62 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
63 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
64 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
65 * SUCH DAMAGE.
66 *
67 *	@(#)machdep.c	7.4 (Berkeley) 6/3/91
68 */
69
70#include <sys/param.h>
71#include <sys/systm.h>
72#include <sys/signalvar.h>
73#include <sys/proc.h>
74#include <sys/user.h>
75#include <sys/exec.h>
76#include <sys/buf.h>
77#include <sys/reboot.h>
78#include <sys/conf.h>
79#include <sys/malloc.h>
80#include <sys/msgbuf.h>
81#include <sys/mount.h>
82#include <sys/device.h>
83#include <sys/extent.h>
84#include <sys/sysctl.h>
85#include <sys/syscallargs.h>
86#include <sys/core.h>
87#include <sys/kcore.h>
88#include <sys/sensors.h>
89
90#include <dev/cons.h>
91#include <stand/boot/bootarg.h>
92
93#include <net/if.h>
94#include <uvm/uvm_extern.h>
95
96#include <machine/bus.h>
97
98#include <machine/cpu.h>
99#include <machine/cpu_full.h>
100#include <machine/cpufunc.h>
101#include <machine/cpuvar.h>
102#include <machine/kcore.h>
103#include <machine/pio.h>
104#include <machine/psl.h>
105#include <machine/specialreg.h>
106#include <machine/biosvar.h>
107#include <machine/pte.h>
108#ifdef MULTIPROCESSOR
109#include <machine/mpbiosvar.h>
110#endif /* MULTIPROCESSOR */
111
112#include <dev/isa/isareg.h>
113#include <dev/ic/i8042reg.h>
114#include <i386/isa/isa_machdep.h>
115
116#include "acpi.h"
117#if NACPI > 0
118#include <dev/acpi/acpivar.h>
119#endif
120
121#include "apm.h"
122#if NAPM > 0
123#include <machine/apmvar.h>
124#endif
125
126#ifdef DDB
127#include <machine/db_machdep.h>
128#include <ddb/db_extern.h>
129#endif
130
131#include "isa.h"
132#include "isadma.h"
133#include "npx.h"
134#if NNPX > 0
135extern struct proc *npxproc;
136#endif
137
138#include "bios.h"
139
140#ifdef HIBERNATE
141#include <machine/hibernate_var.h>
142#endif /* HIBERNATE */
143
144#include "ukbd.h"
145#include "pckbc.h"
146#if NPCKBC > 0 && NUKBD > 0
147#include <dev/ic/pckbcvar.h>
148#endif
149
150/* #define MACHDEP_DEBUG */
151
152#ifdef MACHDEP_DEBUG
153#define DPRINTF(x...)	do  { printf(x); } while (0)
154#else
155#define DPRINTF(x...)
156#endif	/* MACHDEP_DEBUG */
157
158void	replacesmap(void);
159int     intr_handler(struct intrframe *, struct intrhand *);
160
161/* the following is used externally (sysctl_hw) */
162char machine[] = MACHINE;
163
164/*
165 * switchto vectors
166 */
167void (*cpu_idle_leave_fcn)(void) = NULL;
168void (*cpu_idle_cycle_fcn)(void) = NULL;
169void (*cpu_idle_enter_fcn)(void) = NULL;
170
171
172struct uvm_constraint_range  isa_constraint = { 0x0, 0x00ffffffUL };
173struct uvm_constraint_range  dma_constraint = { 0x0, 0xffffffffUL };
174struct uvm_constraint_range *uvm_md_constraints[] = {
175	&isa_constraint,
176	&dma_constraint,
177	NULL
178};
179
180extern int	boothowto;
181int	physmem;
182
183struct dumpmem dumpmem[VM_PHYSSEG_MAX];
184u_int ndumpmem;
185
186/*
187 * These variables are needed by /sbin/savecore
188 */
189u_long	dumpmag = 0x8fca0101;	/* magic number */
190int	dumpsize = 0;		/* pages */
191long	dumplo = 0;		/* blocks */
192
193int	cpu_class;
194
195int	i386_use_fxsave;
196int	i386_has_sse;
197int	i386_has_sse2;
198int	i386_has_xcrypt;
199
200bootarg_t *bootargp;
201paddr_t avail_end;
202
203struct vm_map *exec_map = NULL;
204struct vm_map *phys_map = NULL;
205
206#if !defined(SMALL_KERNEL)
207int p3_early;
208void (*update_cpuspeed)(void) = NULL;
209void	via_update_sensor(void *args);
210#endif
211int kbd_reset;
212int lid_action = 1;
213int pwr_action = 1;
214int forceukbd;
215
216/*
217 * safepri is a safe priority for sleep to set for a spin-wait
218 * during autoconfiguration or after a panic.
219 */
220int	safepri = 0;
221
222#if !defined(SMALL_KERNEL)
223int bus_clock;
224#endif
225void (*setperf_setup)(struct cpu_info *);
226int setperf_prio = 0;		/* for concurrent handlers */
227
228void (*cpusensors_setup)(struct cpu_info *);
229
230void (*delay_func)(int) = i8254_delay;
231void (*initclock_func)(void) = i8254_initclocks;
232void (*startclock_func)(void) = i8254_start_both_clocks;
233
234/*
235 * Extent maps to manage I/O and ISA memory hole space.  Allocate
236 * storage for 16 regions in each, initially.  Later, ioport_malloc_safe
237 * will indicate that it's safe to use malloc() to dynamically allocate
238 * region descriptors.
239 *
240 * N.B. At least two regions are _always_ allocated from the iomem
241 * extent map; (0 -> ISA hole) and (end of ISA hole -> end of RAM).
242 *
243 * The extent maps are not static!  Machine-dependent ISA and EISA
244 * routines need access to them for bus address space allocation.
245 */
246static	long ioport_ex_storage[EXTENT_FIXED_STORAGE_SIZE(16) / sizeof(long)];
247static	long iomem_ex_storage[EXTENT_FIXED_STORAGE_SIZE(16) / sizeof(long)];
248struct	extent *ioport_ex;
249struct	extent *iomem_ex;
250static	int ioport_malloc_safe;
251
252void	dumpsys(void);
253int	cpu_dump(void);
254void	init386(paddr_t);
255void	consinit(void);
256void	(*cpuresetfn)(void);
257
258int	bus_mem_add_mapping(bus_addr_t, bus_size_t,
259	    int, bus_space_handle_t *);
260
261#ifdef APERTURE
262int allowaperture = 0;
263#endif
264
265int has_rdrand;
266int has_rdseed;
267
268void	winchip_cpu_setup(struct cpu_info *);
269void	amd_family5_setperf_setup(struct cpu_info *);
270void	amd_family5_setup(struct cpu_info *);
271void	amd_family6_setperf_setup(struct cpu_info *);
272void	amd_family6_setup(struct cpu_info *);
273void	cyrix3_setperf_setup(struct cpu_info *);
274void	cyrix3_cpu_setup(struct cpu_info *);
275void	cyrix6x86_cpu_setup(struct cpu_info *);
276void	natsem6x86_cpu_setup(struct cpu_info *);
277void	intel586_cpu_setup(struct cpu_info *);
278void	intel686_cpusensors_setup(struct cpu_info *);
279void	intel686_setperf_setup(struct cpu_info *);
280void	intel686_common_cpu_setup(struct cpu_info *);
281void	intel686_cpu_setup(struct cpu_info *);
282void	intel686_p4_cpu_setup(struct cpu_info *);
283void	intelcore_update_sensor(void *);
284void	tm86_cpu_setup(struct cpu_info *);
285char *	intel686_cpu_name(int);
286char *	cyrix3_cpu_name(int, int);
287char *	tm86_cpu_name(int);
288void	cyrix3_get_bus_clock(struct cpu_info *);
289void	p4_get_bus_clock(struct cpu_info *);
290void	p3_get_bus_clock(struct cpu_info *);
291void	p4_update_cpuspeed(void);
292void	p3_update_cpuspeed(void);
293int	pentium_cpuspeed(int *);
294void	enter_shared_special_pages(void);
295
296static __inline u_char
297cyrix_read_reg(u_char reg)
298{
299	outb(0x22, reg);
300	return inb(0x23);
301}
302
303static __inline void
304cyrix_write_reg(u_char reg, u_char data)
305{
306	outb(0x22, reg);
307	outb(0x23, data);
308}
309
310/*
311 * cpuid instruction.  request in eax, result in eax, ebx, ecx, edx.
312 * requires caller to provide u_int32_t regs[4] array.
313 */
314void
315cpuid(u_int32_t ax, u_int32_t *regs)
316{
317	__asm volatile(
318	    "cpuid\n\t"
319	    "movl	%%eax, 0(%2)\n\t"
320	    "movl	%%ebx, 4(%2)\n\t"
321	    "movl	%%ecx, 8(%2)\n\t"
322	    "movl	%%edx, 12(%2)\n\t"
323	    :"=a" (ax)
324	    :"0" (ax), "S" (regs)
325	    :"bx", "cx", "dx");
326}
327
328/*
329 * Machine-dependent startup code
330 */
331void
332cpu_startup(void)
333{
334	unsigned i;
335	vaddr_t minaddr, maxaddr, va;
336	paddr_t pa;
337
338	/*
339	 * Initialize error message buffer (at end of core).
340	 * (space reserved in pmap_bootstrap)
341	 */
342	pa = avail_end;
343	va = (vaddr_t)msgbufp;
344	for (i = 0; i < atop(MSGBUFSIZE); i++) {
345		pmap_kenter_pa(va, pa, PROT_READ | PROT_WRITE);
346		va += PAGE_SIZE;
347		pa += PAGE_SIZE;
348	}
349	pmap_update(pmap_kernel());
350	initmsgbuf((caddr_t)msgbufp, round_page(MSGBUFSIZE));
351
352	printf("%s", version);
353	startclocks();
354	rtcinit();
355
356	printf("real mem  = %llu (%lluMB)\n",
357	    (unsigned long long)ptoa((psize_t)physmem),
358	    (unsigned long long)ptoa((psize_t)physmem)/1024U/1024U);
359
360	/*
361	 * Allocate a submap for exec arguments.  This map effectively
362	 * limits the number of processes exec'ing at any time.
363	 */
364	minaddr = vm_map_min(kernel_map);
365	exec_map = uvm_km_suballoc(kernel_map, &minaddr, &maxaddr,
366				   16*NCARGS, VM_MAP_PAGEABLE, FALSE, NULL);
367
368	/*
369	 * Allocate a submap for physio
370	 */
371	phys_map = uvm_km_suballoc(kernel_map, &minaddr, &maxaddr,
372				   VM_PHYS_SIZE, 0, FALSE, NULL);
373
374	printf("avail mem = %llu (%lluMB)\n",
375	    (unsigned long long)ptoa((psize_t)uvmexp.free),
376	    (unsigned long long)ptoa((psize_t)uvmexp.free)/1024U/1024U);
377
378	/*
379	 * Set up buffers, so they can be used to read disk labels.
380	 */
381	bufinit();
382
383	/*
384	 * Configure the system.
385	 */
386	if (boothowto & RB_CONFIG) {
387#ifdef BOOT_CONFIG
388		user_config();
389#else
390		printf("kernel does not support -c; continuing..\n");
391#endif
392	}
393	ioport_malloc_safe = 1;
394
395#ifndef SMALL_KERNEL
396	cpu_ucode_setup();
397#endif
398
399	/* enter the IDT and trampoline code in the u-k maps */
400	enter_shared_special_pages();
401
402	/* initialize CPU0's TSS and GDT and put them in the u-k maps */
403	cpu_enter_pages(&cpu_info_full_primary);
404}
405
406void
407enter_shared_special_pages(void)
408{
409	extern char __kutext_start[], __kutext_end[], __kernel_kutext_phys[];
410	extern char __kudata_start[], __kudata_end[], __kernel_kudata_phys[];
411	vaddr_t	va;
412	paddr_t	pa;
413
414	/* idt */
415	pmap_extract(pmap_kernel(), (vaddr_t)idt, &pa);
416	pmap_enter_special((vaddr_t)idt, pa, PROT_READ, 0);
417
418	/* .kutext section */
419	va = (vaddr_t)__kutext_start;
420	pa = (paddr_t)__kernel_kutext_phys;
421	while (va < (vaddr_t)__kutext_end) {
422		pmap_enter_special(va, pa, PROT_READ | PROT_EXEC, 0);
423		DPRINTF("%s: entered kutext page va 0x%08lx pa 0x%08lx\n",
424		    __func__, (unsigned long)va, (unsigned long)pa);
425		va += PAGE_SIZE;
426		pa += PAGE_SIZE;
427	}
428
429	/* .kudata section */
430	va = (vaddr_t)__kudata_start;
431	pa = (paddr_t)__kernel_kudata_phys;
432	while (va < (vaddr_t)__kudata_end) {
433		pmap_enter_special(va, pa, PROT_READ | PROT_WRITE, 0);
434		DPRINTF("%s: entered kudata page va 0x%08lx pa 0x%08lx\n",
435		    __func__, (unsigned long)va, (unsigned long)pa);
436		va += PAGE_SIZE;
437		pa += PAGE_SIZE;
438	}
439}
440
441/*
442 * Set up proc0's TSS
443 */
444void
445i386_proc0_tss_init(void)
446{
447	struct pcb *pcb;
448
449	curpcb = pcb = &proc0.p_addr->u_pcb;
450	pcb->pcb_cr0 = rcr0();
451	pcb->pcb_kstack = (int)proc0.p_addr + USPACE - 16;
452	proc0.p_md.md_regs = (struct trapframe *)pcb->pcb_kstack - 1;
453}
454
455#ifdef MULTIPROCESSOR
456void
457i386_init_pcb_tss(struct cpu_info *ci)
458{
459	struct pcb *pcb = ci->ci_idle_pcb;
460
461	pcb->pcb_cr0 = rcr0();
462}
463#endif	/* MULTIPROCESSOR */
464
465/*
466 * Info for CTL_HW
467 */
468char	cpu_model[120];
469
470const char *classnames[] = {
471	"",
472	"486",
473	"586",
474	"686"
475};
476
477const char *modifiers[] = {
478	"",
479	"OverDrive ",
480	"Dual ",
481	""
482};
483
484const struct cpu_cpuid_nameclass i386_cpuid_cpus[] = {
485	{
486		"GenuineIntel",
487		CPUVENDOR_INTEL,
488		"Intel",
489		/* Family 4 */
490		{ {
491			CPUCLASS_486,
492			{
493				"486DX", "486DX", "486SX", "486DX2", "486SL",
494				"486SX2", 0, "486DX2 W/B",
495				"486DX4", 0, 0, 0, 0, 0, 0, 0,
496				"486"		/* Default */
497			},
498			NULL
499		},
500		/* Family 5 */
501		{
502			CPUCLASS_586,
503			{
504				"Pentium (A-step)", "Pentium (P5)",
505				"Pentium (P54C)", "Pentium (P24T)",
506				"Pentium/MMX", "Pentium", 0,
507				"Pentium (P54C)", "Pentium/MMX",
508				0, 0, 0, 0, 0, 0, 0,
509				"Pentium"	/* Default */
510			},
511			intel586_cpu_setup
512		},
513		/* Family 6 */
514		{
515			CPUCLASS_686,
516			{
517				"Pentium Pro", "Pentium Pro", 0,
518				"Pentium II", "Pentium Pro",
519				"Pentium II/Celeron",
520				"Celeron",
521				"Pentium III",
522				"Pentium III",
523				"Pentium M",
524				"Pentium III Xeon",
525				"Pentium III", 0,
526				"Pentium M",
527				"Core Duo/Solo", 0,
528				"Pentium Pro, II or III"	/* Default */
529			},
530			intel686_cpu_setup
531		},
532		/* Family 7 */
533		{
534			CPUCLASS_686,
535		} ,
536		/* Family 8 */
537		{
538			CPUCLASS_686,
539		} ,
540		/* Family 9 */
541		{
542			CPUCLASS_686,
543		} ,
544		/* Family A */
545		{
546			CPUCLASS_686,
547		} ,
548		/* Family B */
549		{
550			CPUCLASS_686,
551		} ,
552		/* Family C */
553		{
554			CPUCLASS_686,
555		} ,
556		/* Family D */
557		{
558			CPUCLASS_686,
559		} ,
560		/* Family E */
561		{
562			CPUCLASS_686,
563		} ,
564		/* Family F */
565		{
566			CPUCLASS_686,
567			{
568				"Pentium 4", 0, 0, 0,
569				0, 0, 0, 0,
570				0, 0, 0, 0,
571				0, 0, 0, 0,
572				"Pentium 4"	/* Default */
573			},
574			intel686_p4_cpu_setup
575		} }
576	},
577	{
578		"AuthenticAMD",
579		CPUVENDOR_AMD,
580		"AMD",
581		/* Family 4 */
582		{ {
583			CPUCLASS_486,
584			{
585				0, 0, 0, "Am486DX2 W/T",
586				0, 0, 0, "Am486DX2 W/B",
587				"Am486DX4 W/T or Am5x86 W/T 150",
588				"Am486DX4 W/B or Am5x86 W/B 150", 0, 0,
589				0, 0, "Am5x86 W/T 133/160",
590				"Am5x86 W/B 133/160",
591				"Am486 or Am5x86"	/* Default */
592			},
593			NULL
594		},
595		/* Family 5 */
596		{
597			CPUCLASS_586,
598			{
599				"K5", "K5", "K5", "K5", 0, 0, "K6",
600				"K6", "K6-2", "K6-III", 0, 0, 0,
601				"K6-2+/III+", 0, 0,
602				"K5 or K6"		/* Default */
603			},
604			amd_family5_setup
605		},
606		/* Family 6 */
607		{
608			CPUCLASS_686,
609			{
610				0, "Athlon Model 1", "Athlon Model 2",
611				"Duron Model 3",
612				"Athlon Model 4",
613				0, "Athlon XP Model 6",
614				"Duron Model 7",
615				"Athlon XP Model 8",
616				0, "Athlon XP Model 10",
617				0, 0, 0, 0, 0,
618				"K7"		/* Default */
619			},
620			amd_family6_setup
621		},
622		/* Family 7 */
623		{
624			CPUCLASS_686,
625		} ,
626		/* Family 8 */
627		{
628			CPUCLASS_686,
629		} ,
630		/* Family 9 */
631		{
632			CPUCLASS_686,
633		} ,
634		/* Family A */
635		{
636			CPUCLASS_686,
637		} ,
638		/* Family B */
639		{
640			CPUCLASS_686,
641		} ,
642		/* Family C */
643		{
644			CPUCLASS_686,
645		} ,
646		/* Family D */
647		{
648			CPUCLASS_686,
649		} ,
650		/* Family E */
651		{
652			CPUCLASS_686,
653		} ,
654		/* Family F */
655		{
656			CPUCLASS_686,
657			{
658				0, 0, 0, 0, "Athlon64",
659				"Opteron or Athlon64FX", 0, 0,
660				0, 0, 0, 0, 0, 0, 0, 0,
661				"AMD64"			/* DEFAULT */
662			},
663			amd_family6_setup
664		} }
665	},
666	{
667		"CyrixInstead",
668		CPUVENDOR_CYRIX,
669		"Cyrix",
670		/* Family 4 */
671		{ {
672			CPUCLASS_486,
673			{
674				0, 0, 0, "MediaGX", 0, 0, 0, 0, "5x86", 0, 0,
675				0, 0, 0, 0,
676				"486 class"	/* Default */
677			},
678			NULL
679		},
680		/* Family 5 */
681		{
682			CPUCLASS_586,
683			{
684				0, 0, "6x86", 0, "GXm", 0, 0, 0, 0, 0,
685				0, 0, 0, 0, 0, 0,
686				"586 class"	/* Default */
687			},
688			cyrix6x86_cpu_setup
689		},
690		/* Family 6 */
691		{
692			CPUCLASS_686,
693			{
694				"6x86MX", 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
695				0, 0, 0, 0,
696				"686 class"	/* Default */
697			},
698			NULL
699		} }
700	},
701	{
702		"CentaurHauls",
703		CPUVENDOR_IDT,
704		"IDT",
705		/* Family 4, not available from IDT */
706		{ {
707			CPUCLASS_486,
708			{
709				0, 0, 0, 0, 0, 0, 0, 0,
710				0, 0, 0, 0, 0, 0, 0, 0,
711				"486 class"		/* Default */
712			},
713			NULL
714		},
715		/* Family 5 */
716		{
717			CPUCLASS_586,
718			{
719				0, 0, 0, 0, "WinChip C6", 0, 0, 0,
720				"WinChip 2", "WinChip 3", 0, 0, 0, 0, 0, 0,
721				"WinChip"		/* Default */
722			},
723			winchip_cpu_setup
724		},
725		/* Family 6 */
726		{
727			CPUCLASS_686,
728			{
729				0, 0, 0, 0, 0, 0,
730				"C3 Samuel",
731				"C3 Samuel 2/Ezra",
732				"C3 Ezra-T",
733				"C3 Nehemiah", "C3 Esther", 0, 0, 0, 0, 0,
734				"C3"		/* Default */
735			},
736			cyrix3_cpu_setup
737		} }
738	},
739	{
740		"GenuineTMx86",
741		CPUVENDOR_TRANSMETA,
742		"Transmeta",
743		/* Family 4, not available from Transmeta */
744		{ {
745			CPUCLASS_486,
746			{
747				0, 0, 0, 0, 0, 0, 0, 0,
748				0, 0, 0, 0, 0, 0, 0, 0,
749				"486 class"		/* Default */
750			},
751			NULL
752		},
753		/* Family 5 */
754		{
755			CPUCLASS_586,
756			{
757				0, 0, 0, 0, "TMS5x00", 0, 0,
758				0, 0, 0, 0, 0, 0, 0, 0, 0,
759				"TMS5x00"		/* Default */
760			},
761			tm86_cpu_setup
762		},
763		/* Family 6, not yet available from Transmeta */
764		{
765			CPUCLASS_686,
766			{
767				0, 0, 0, 0, 0, 0, 0, 0,
768				0, 0, 0, 0, 0, 0, 0, 0,
769				"686 class"		/* Default */
770			},
771			NULL
772		},
773		/* Family 7 */
774		{
775			CPUCLASS_686,
776		} ,
777		/* Family 8 */
778		{
779			CPUCLASS_686,
780		} ,
781		/* Family 9 */
782		{
783			CPUCLASS_686,
784		} ,
785		/* Family A */
786		{
787			CPUCLASS_686,
788		} ,
789		/* Family B */
790		{
791			CPUCLASS_686,
792		} ,
793		/* Family C */
794		{
795			CPUCLASS_686,
796		} ,
797		/* Family D */
798		{
799			CPUCLASS_686,
800		} ,
801		/* Family E */
802		{
803			CPUCLASS_686,
804		} ,
805		/* Family F */
806		{
807			/* Extended processor family - Transmeta Efficeon */
808			CPUCLASS_686,
809			{
810				0, 0, "TM8000", "TM8000",
811				0, 0, 0, 0,
812				0, 0, 0, 0,
813				0, 0, 0, 0,
814				"TM8000"	/* Default */
815			},
816			tm86_cpu_setup
817		} }
818	},
819	{
820		"Geode by NSC",
821		CPUVENDOR_NS,
822		"National Semiconductor",
823		/* Family 4, not available from National Semiconductor */
824		{ {
825			CPUCLASS_486,
826			{
827				0, 0, 0, 0, 0, 0, 0, 0,
828				0, 0, 0, 0, 0, 0, 0, 0,
829				"486 class"	/* Default */
830			},
831			NULL
832		},
833		/* Family 5 */
834		{
835			CPUCLASS_586,
836			{
837				0, 0, 0, 0, "Geode GX1", 0, 0, 0, 0, 0,
838				0, 0, 0, 0, 0, 0,
839				"586 class"	/* Default */
840			},
841			natsem6x86_cpu_setup
842		} }
843	},
844	{
845		"SiS SiS SiS ",
846		CPUVENDOR_SIS,
847		"SiS",
848		/* Family 4, not available from SiS */
849		{ {
850			CPUCLASS_486,
851			{
852				0, 0, 0, 0, 0, 0, 0, 0,
853				0, 0, 0, 0, 0, 0, 0, 0,
854				"486 class"	/* Default */
855			},
856			NULL
857		},
858		/* Family 5 */
859		{
860			CPUCLASS_586,
861			{
862				"SiS55x", 0, 0, 0, 0, 0, 0, 0, 0, 0,
863				0, 0, 0, 0, 0, 0,
864				"586 class"	/* Default */
865			},
866			NULL
867		} }
868	}
869};
870
871const struct cpu_cpuid_feature i386_cpuid_features[] = {
872	{ CPUID_FPU,	"FPU" },
873	{ CPUID_VME,	"V86" },
874	{ CPUID_DE,	"DE" },
875	{ CPUID_PSE,	"PSE" },
876	{ CPUID_TSC,	"TSC" },
877	{ CPUID_MSR,	"MSR" },
878	{ CPUID_PAE,	"PAE" },
879	{ CPUID_MCE,	"MCE" },
880	{ CPUID_CX8,	"CX8" },
881	{ CPUID_APIC,	"APIC" },
882	{ CPUID_SYS1,	"SYS" },
883	{ CPUID_SEP,	"SEP" },
884	{ CPUID_MTRR,	"MTRR" },
885	{ CPUID_PGE,	"PGE" },
886	{ CPUID_MCA,	"MCA" },
887	{ CPUID_CMOV,	"CMOV" },
888	{ CPUID_PAT,	"PAT" },
889	{ CPUID_PSE36,	"PSE36" },
890	{ CPUID_PSN,	"PSN" },
891	{ CPUID_CFLUSH,	"CFLUSH" },
892	{ CPUID_DS,	"DS" },
893	{ CPUID_ACPI,	"ACPI" },
894	{ CPUID_MMX,	"MMX" },
895	{ CPUID_FXSR,	"FXSR" },
896	{ CPUID_SSE,	"SSE" },
897	{ CPUID_SSE2,	"SSE2" },
898	{ CPUID_SS,	"SS" },
899	{ CPUID_HTT,	"HTT" },
900	{ CPUID_TM,	"TM" },
901	{ CPUID_PBE,	"PBE" }
902};
903
904const struct cpu_cpuid_feature i386_ecpuid_features[] = {
905	{ CPUID_MPC,		"MPC" },
906	{ CPUID_NXE,		"NXE" },
907	{ CPUID_MMXX,		"MMXX" },
908	{ CPUID_FFXSR,		"FFXSR" },
909	{ CPUID_PAGE1GB,	"PAGE1GB" },
910	{ CPUID_RDTSCP,		"RDTSCP" },
911	{ CPUID_LONG,		"LONG" },
912	{ CPUID_3DNOW2,		"3DNOW2" },
913	{ CPUID_3DNOW,		"3DNOW" }
914};
915
916const struct cpu_cpuid_feature i386_cpuid_ecxfeatures[] = {
917	{ CPUIDECX_SSE3,	"SSE3" },
918	{ CPUIDECX_PCLMUL,	"PCLMUL" },
919	{ CPUIDECX_DTES64,	"DTES64" },
920	{ CPUIDECX_MWAIT,	"MWAIT" },
921	{ CPUIDECX_DSCPL,	"DS-CPL" },
922	{ CPUIDECX_VMX,		"VMX" },
923	{ CPUIDECX_SMX,		"SMX" },
924	{ CPUIDECX_EST,		"EST" },
925	{ CPUIDECX_TM2,		"TM2" },
926	{ CPUIDECX_SSSE3,	"SSSE3" },
927	{ CPUIDECX_CNXTID,	"CNXT-ID" },
928	{ CPUIDECX_SDBG,	"SDBG" },
929	{ CPUIDECX_FMA3,	"FMA3" },
930	{ CPUIDECX_CX16,	"CX16" },
931	{ CPUIDECX_XTPR,	"xTPR" },
932	{ CPUIDECX_PDCM,	"PDCM" },
933	{ CPUIDECX_PCID,	"PCID" },
934	{ CPUIDECX_DCA,		"DCA" },
935	{ CPUIDECX_SSE41,	"SSE4.1" },
936	{ CPUIDECX_SSE42,	"SSE4.2" },
937	{ CPUIDECX_X2APIC,	"x2APIC" },
938	{ CPUIDECX_MOVBE,	"MOVBE" },
939	{ CPUIDECX_POPCNT,	"POPCNT" },
940	{ CPUIDECX_DEADLINE,	"DEADLINE" },
941	{ CPUIDECX_AES,		"AES" },
942	{ CPUIDECX_XSAVE,	"XSAVE" },
943	{ CPUIDECX_OSXSAVE,	"OSXSAVE" },
944	{ CPUIDECX_AVX,		"AVX" },
945	{ CPUIDECX_F16C,	"F16C" },
946	{ CPUIDECX_RDRAND,	"RDRAND" },
947	{ CPUIDECX_HV,		"HV" },
948};
949
950const struct cpu_cpuid_feature i386_ecpuid_ecxfeatures[] = {
951	{ CPUIDECX_LAHF,	"LAHF" },
952	{ CPUIDECX_CMPLEG,	"CMPLEG" },
953	{ CPUIDECX_SVM,		"SVM" },
954	{ CPUIDECX_EAPICSP,	"EAPICSP" },
955	{ CPUIDECX_AMCR8,	"AMCR8" },
956	{ CPUIDECX_ABM,		"ABM" },
957	{ CPUIDECX_SSE4A,	"SSE4A" },
958	{ CPUIDECX_MASSE,	"MASSE" },
959	{ CPUIDECX_3DNOWP,	"3DNOWP" },
960	{ CPUIDECX_OSVW,	"OSVW" },
961	{ CPUIDECX_IBS,		"IBS" },
962	{ CPUIDECX_XOP,		"XOP" },
963	{ CPUIDECX_SKINIT,	"SKINIT" },
964	{ CPUIDECX_WDT,		"WDT" },
965	{ CPUIDECX_LWP,		"LWP" },
966	{ CPUIDECX_FMA4,	"FMA4" },
967	{ CPUIDECX_TCE,		"TCE" },
968	{ CPUIDECX_NODEID,	"NODEID" },
969	{ CPUIDECX_TBM,		"TBM" },
970	{ CPUIDECX_TOPEXT,	"TOPEXT" },
971	{ CPUIDECX_CPCTR,	"CPCTR" },
972	{ CPUIDECX_DBKP,	"DBKP" },
973	{ CPUIDECX_PERFTSC,	"PERFTSC" },
974	{ CPUIDECX_PCTRL3,	"PCTRL3" },
975	{ CPUIDECX_MWAITX,	"MWAITX" },
976};
977
978const struct cpu_cpuid_feature cpu_seff0_ebxfeatures[] = {
979	{ SEFF0EBX_FSGSBASE,	"FSGSBASE" },
980	{ SEFF0EBX_TSC_ADJUST,	"TSC_ADJUST" },
981	{ SEFF0EBX_SGX,		"SGX" },
982	{ SEFF0EBX_BMI1,	"BMI1" },
983	{ SEFF0EBX_HLE,		"HLE" },
984	{ SEFF0EBX_AVX2,	"AVX2" },
985	{ SEFF0EBX_SMEP,	"SMEP" },
986	{ SEFF0EBX_BMI2,	"BMI2" },
987	{ SEFF0EBX_ERMS,	"ERMS" },
988	{ SEFF0EBX_INVPCID,	"INVPCID" },
989	{ SEFF0EBX_RTM,		"RTM" },
990	{ SEFF0EBX_PQM,		"PQM" },
991	{ SEFF0EBX_MPX,		"MPX" },
992	{ SEFF0EBX_AVX512F,	"AVX512F" },
993	{ SEFF0EBX_AVX512DQ,	"AVX512DQ" },
994	{ SEFF0EBX_RDSEED,	"RDSEED" },
995	{ SEFF0EBX_ADX,		"ADX" },
996	{ SEFF0EBX_SMAP,	"SMAP" },
997	{ SEFF0EBX_AVX512IFMA,	"AVX512IFMA" },
998	{ SEFF0EBX_PCOMMIT,	"PCOMMIT" },
999	{ SEFF0EBX_CLFLUSHOPT,	"CLFLUSHOPT" },
1000	{ SEFF0EBX_CLWB,	"CLWB" },
1001	{ SEFF0EBX_PT,		"PT" },
1002	{ SEFF0EBX_AVX512PF,	"AVX512PF" },
1003	{ SEFF0EBX_AVX512ER,	"AVX512ER" },
1004	{ SEFF0EBX_AVX512CD,	"AVX512CD" },
1005	{ SEFF0EBX_SHA,		"SHA" },
1006	{ SEFF0EBX_AVX512BW,	"AVX512BW" },
1007	{ SEFF0EBX_AVX512VL,	"AVX512VL" },
1008};
1009
1010const struct cpu_cpuid_feature cpu_seff0_ecxfeatures[] = {
1011	{ SEFF0ECX_PREFETCHWT1,	"PREFETCHWT1" },
1012	{ SEFF0ECX_UMIP,	"UMIP" },
1013	{ SEFF0ECX_AVX512VBMI,	"AVX512VBMI" },
1014	{ SEFF0ECX_PKU,		"PKU" },
1015	{ SEFF0ECX_WAITPKG,	"WAITPKG" },
1016};
1017
1018const struct cpu_cpuid_feature cpu_seff0_edxfeatures[] = {
1019	{ SEFF0EDX_AVX512_4FNNIW, "AVX512FNNIW" },
1020	{ SEFF0EDX_AVX512_4FMAPS, "AVX512FMAPS" },
1021	{ SEFF0EDX_SRBDS_CTRL,	"SRBDS_CTRL" },
1022	{ SEFF0EDX_MD_CLEAR,	"MD_CLEAR" },
1023	{ SEFF0EDX_TSXFA,	"TSXFA" },
1024	{ SEFF0EDX_IBRS,	"IBRS,IBPB" },
1025	{ SEFF0EDX_STIBP,	"STIBP" },
1026	{ SEFF0EDX_L1DF,	"L1DF" },
1027	 /* SEFF0EDX_ARCH_CAP (not printed) */
1028	{ SEFF0EDX_SSBD,	"SSBD" },
1029};
1030
1031const struct cpu_cpuid_feature cpu_tpm_eaxfeatures[] = {
1032	{ TPM_SENSOR,		"SENSOR" },
1033	{ TPM_ARAT,		"ARAT" },
1034};
1035
1036const struct cpu_cpuid_feature i386_cpuid_eaxperf[] = {
1037	{ CPUIDEAX_VERID,	"PERF" },
1038};
1039
1040const struct cpu_cpuid_feature i386_cpuid_edxapmi[] = {
1041	{ CPUIDEDX_ITSC,	"ITSC" },
1042};
1043
1044const struct cpu_cpuid_feature cpu_xsave_extfeatures[] = {
1045	{ XSAVE_XSAVEOPT,	"XSAVEOPT" },
1046	{ XSAVE_XSAVEC,		"XSAVEC" },
1047	{ XSAVE_XGETBV1,	"XGETBV1" },
1048	{ XSAVE_XSAVES,		"XSAVES" },
1049};
1050
1051void
1052winchip_cpu_setup(struct cpu_info *ci)
1053{
1054
1055	switch ((ci->ci_signature >> 4) & 15) { /* model */
1056	case 4: /* WinChip C6 */
1057		ci->ci_feature_flags &= ~CPUID_TSC;
1058		/* Disable RDTSC instruction from user-level. */
1059		lcr4(rcr4() | CR4_TSD);
1060		printf("%s: TSC disabled\n", ci->ci_dev->dv_xname);
1061		break;
1062	}
1063}
1064
1065#if !defined(SMALL_KERNEL)
1066void
1067cyrix3_setperf_setup(struct cpu_info *ci)
1068{
1069	if (cpu_ecxfeature & CPUIDECX_EST) {
1070		if (rdmsr(MSR_MISC_ENABLE) & (1 << 16))
1071			est_init(ci, CPUVENDOR_VIA);
1072		else
1073			printf("%s: Enhanced SpeedStep disabled by BIOS\n",
1074			    ci->ci_dev->dv_xname);
1075	}
1076}
1077#endif
1078
1079void
1080cyrix3_cpu_setup(struct cpu_info *ci)
1081{
1082	int model = (ci->ci_signature >> 4) & 15;
1083	int step = ci->ci_signature & 15;
1084
1085	u_int64_t msreg;
1086	u_int32_t regs[4];
1087	unsigned int val;
1088#if !defined(SMALL_KERNEL)
1089	extern void (*pagezero)(void *, size_t);
1090	extern void i686_pagezero(void *, size_t);
1091
1092	pagezero = i686_pagezero;
1093
1094	setperf_setup = cyrix3_setperf_setup;
1095#endif
1096
1097	switch (model) {
1098	/* Possible earlier models */
1099	case 0: case 1: case 2:
1100	case 3: case 4: case 5:
1101		break;
1102
1103	case 6: /* C3 Samuel 1 */
1104	case 7: /* C3 Samuel 2 or C3 Ezra */
1105	case 8: /* C3 Ezra-T */
1106		cpuid(0x80000001, regs);
1107		val = regs[3];
1108		if (val & (1U << 31)) {
1109			cpu_feature |= CPUID_3DNOW;
1110		} else {
1111			cpu_feature &= ~CPUID_3DNOW;
1112		}
1113		break;
1114
1115	case 9:
1116		if (step < 3)
1117			break;
1118		/*
1119		 * C3 Nehemiah & later: fall through.
1120		 */
1121
1122	case 10: /* C7-M Type A */
1123	case 13: /* C7-M Type D */
1124	case 15: /* Nano */
1125#if !defined(SMALL_KERNEL)
1126		if (CPU_IS_PRIMARY(ci) &&
1127		    (model == 10 || model == 13 || model == 15)) {
1128			/* Setup the sensors structures */
1129			strlcpy(ci->ci_sensordev.xname, ci->ci_dev->dv_xname,
1130			    sizeof(ci->ci_sensordev.xname));
1131			ci->ci_sensor.type = SENSOR_TEMP;
1132			sensor_task_register(ci, via_update_sensor, 5);
1133			sensor_attach(&ci->ci_sensordev, &ci->ci_sensor);
1134			sensordev_install(&ci->ci_sensordev);
1135		}
1136#endif
1137
1138	default:
1139		/*
1140		 * C3 Nehemiah/Esther & later models:
1141		 * First we check for extended feature flags, and then
1142		 * (if present) retrieve the ones at 0xC0000001.  In this
1143		 * bit 2 tells us if the RNG is present.  Bit 3 tells us
1144		 * if the RNG has been enabled.  In order to use the RNG
1145		 * we need 3 things:  We need an RNG, we need the FXSR bit
1146		 * enabled in cr4 (SSE/SSE2 stuff), and we need to have
1147		 * Bit 6 of MSR 0x110B set to 1 (the default), which will
1148		 * show up as bit 3 set here.
1149		 */
1150		cpuid(0xC0000000, regs); /* Check for RNG */
1151		val = regs[0];
1152		if (val >= 0xC0000001) {
1153			cpuid(0xC0000001, regs);
1154			val = regs[3];
1155		} else
1156			val = 0;
1157
1158		if (val & (C3_CPUID_HAS_RNG | C3_CPUID_HAS_ACE))
1159			printf("%s:", ci->ci_dev->dv_xname);
1160
1161		/* Enable RNG if present and disabled */
1162		if (val & C3_CPUID_HAS_RNG) {
1163			extern int viac3_rnd_present;
1164
1165			if (!(val & C3_CPUID_DO_RNG)) {
1166				msreg = rdmsr(0x110B);
1167				msreg |= 0x40;
1168				wrmsr(0x110B, msreg);
1169			}
1170			viac3_rnd_present = 1;
1171			printf(" RNG");
1172		}
1173
1174		/* Enable AES engine if present and disabled */
1175		if (val & C3_CPUID_HAS_ACE) {
1176#ifdef CRYPTO
1177			if (!(val & C3_CPUID_DO_ACE)) {
1178				msreg = rdmsr(0x1107);
1179				msreg |= (0x01 << 28);
1180				wrmsr(0x1107, msreg);
1181			}
1182			i386_has_xcrypt |= C3_HAS_AES;
1183#endif /* CRYPTO */
1184			printf(" AES");
1185		}
1186
1187		/* Enable ACE2 engine if present and disabled */
1188		if (val & C3_CPUID_HAS_ACE2) {
1189#ifdef CRYPTO
1190			if (!(val & C3_CPUID_DO_ACE2)) {
1191				msreg = rdmsr(0x1107);
1192				msreg |= (0x01 << 28);
1193				wrmsr(0x1107, msreg);
1194			}
1195			i386_has_xcrypt |= C3_HAS_AESCTR;
1196#endif /* CRYPTO */
1197			printf(" AES-CTR");
1198		}
1199
1200		/* Enable SHA engine if present and disabled */
1201		if (val & C3_CPUID_HAS_PHE) {
1202#ifdef CRYPTO
1203			if (!(val & C3_CPUID_DO_PHE)) {
1204				msreg = rdmsr(0x1107);
1205				msreg |= (0x01 << 28/**/);
1206				wrmsr(0x1107, msreg);
1207			}
1208			i386_has_xcrypt |= C3_HAS_SHA;
1209#endif /* CRYPTO */
1210			printf(" SHA1 SHA256");
1211		}
1212
1213		/* Enable MM engine if present and disabled */
1214		if (val & C3_CPUID_HAS_PMM) {
1215#ifdef CRYPTO
1216			if (!(val & C3_CPUID_DO_PMM)) {
1217				msreg = rdmsr(0x1107);
1218				msreg |= (0x01 << 28/**/);
1219				wrmsr(0x1107, msreg);
1220			}
1221			i386_has_xcrypt |= C3_HAS_MM;
1222#endif /* CRYPTO */
1223			printf(" RSA");
1224		}
1225
1226		printf("\n");
1227		break;
1228	}
1229}
1230
1231#if !defined(SMALL_KERNEL)
1232void
1233via_update_sensor(void *args)
1234{
1235	struct cpu_info *ci = (struct cpu_info *) args;
1236	u_int64_t msr;
1237
1238	switch (ci->ci_model) {
1239	case 0xa:
1240	case 0xd:
1241		msr = rdmsr(MSR_C7M_TMTEMPERATURE);
1242		break;
1243	case 0xf:
1244		msr = rdmsr(MSR_CENT_TMTEMPERATURE);
1245		break;
1246	}
1247	ci->ci_sensor.value = (msr & 0xffffff);
1248	/* micro degrees */
1249	ci->ci_sensor.value *= 1000000;
1250	ci->ci_sensor.value += 273150000;
1251	ci->ci_sensor.flags &= ~SENSOR_FINVALID;
1252}
1253#endif
1254
1255void
1256cyrix6x86_cpu_setup(struct cpu_info *ci)
1257{
1258	extern int clock_broken_latch;
1259
1260	switch ((ci->ci_signature >> 4) & 15) { /* model */
1261	case -1: /* M1 w/o cpuid */
1262	case 2:	/* M1 */
1263		/* set up various cyrix registers */
1264		/* Enable suspend on halt */
1265		cyrix_write_reg(0xc2, cyrix_read_reg(0xc2) | 0x08);
1266		/* enable access to ccr4/ccr5 */
1267		cyrix_write_reg(0xC3, cyrix_read_reg(0xC3) | 0x10);
1268		/* cyrix's workaround  for the "coma bug" */
1269		cyrix_write_reg(0x31, cyrix_read_reg(0x31) | 0xf8);
1270		cyrix_write_reg(0x32, cyrix_read_reg(0x32) | 0x7f);
1271		cyrix_read_reg(0x33); cyrix_write_reg(0x33, 0);
1272		cyrix_write_reg(0x3c, cyrix_read_reg(0x3c) | 0x87);
1273		/* disable access to ccr4/ccr5 */
1274		cyrix_write_reg(0xC3, cyrix_read_reg(0xC3) & ~0x10);
1275
1276		printf("%s: xchg bug workaround performed\n",
1277		    ci->ci_dev->dv_xname);
1278		break;	/* fallthrough? */
1279	case 4:	/* GXm */
1280		/* Unset the TSC bit until calibrate_delay() gets fixed. */
1281		clock_broken_latch = 1;
1282		curcpu()->ci_feature_flags &= ~CPUID_TSC;
1283		printf("%s: TSC disabled\n", ci->ci_dev->dv_xname);
1284		break;
1285	}
1286}
1287
1288void
1289natsem6x86_cpu_setup(struct cpu_info *ci)
1290{
1291	extern int clock_broken_latch;
1292	int model = (ci->ci_signature >> 4) & 15;
1293
1294	clock_broken_latch = 1;
1295	switch (model) {
1296	case 4:
1297		cpu_feature &= ~CPUID_TSC;
1298		printf("%s: TSC disabled\n", ci->ci_dev->dv_xname);
1299		break;
1300	}
1301}
1302
1303void
1304intel586_cpu_setup(struct cpu_info *ci)
1305{
1306	if (!cpu_f00f_bug) {
1307		fix_f00f();
1308		printf("%s: F00F bug workaround installed\n",
1309		    ci->ci_dev->dv_xname);
1310	}
1311}
1312
1313#if !defined(SMALL_KERNEL)
1314void
1315amd_family5_setperf_setup(struct cpu_info *ci)
1316{
1317	k6_powernow_init();
1318}
1319#endif
1320
1321void
1322amd_family5_setup(struct cpu_info *ci)
1323{
1324	int model = (ci->ci_signature >> 4) & 15;
1325
1326	switch (model) {
1327	case 0:		/* AMD-K5 Model 0 */
1328		/*
1329		 * According to the AMD Processor Recognition App Note,
1330		 * the AMD-K5 Model 0 uses the wrong bit to indicate
1331		 * support for global PTEs, instead using bit 9 (APIC)
1332		 * rather than bit 13 (i.e. "0x200" vs. 0x2000".  Oops!).
1333		 */
1334		if (cpu_feature & CPUID_APIC)
1335			cpu_feature = (cpu_feature & ~CPUID_APIC) | CPUID_PGE;
1336		/*
1337		 * XXX But pmap_pg_g is already initialized -- need to kick
1338		 * XXX the pmap somehow.  How does the MP branch do this?
1339		 */
1340		break;
1341	case 12:
1342	case 13:
1343#if !defined(SMALL_KERNEL)
1344		setperf_setup = amd_family5_setperf_setup;
1345#endif
1346		break;
1347	}
1348}
1349
1350#if !defined(SMALL_KERNEL)
1351void
1352amd_family6_setperf_setup(struct cpu_info *ci)
1353{
1354	int family = (ci->ci_signature >> 8) & 15;
1355
1356	switch (family) {
1357	case 6:
1358		k7_powernow_init();
1359		break;
1360	case 15:
1361		k8_powernow_init();
1362		break;
1363	}
1364	if (ci->ci_family >= 0x10)
1365		k1x_init(ci);
1366}
1367#endif
1368
1369void
1370amd_family6_setup(struct cpu_info *ci)
1371{
1372#if !defined(SMALL_KERNEL)
1373	int family = (ci->ci_signature >> 8) & 15;
1374	extern void (*pagezero)(void *, size_t);
1375	extern void sse2_pagezero(void *, size_t);
1376	extern void i686_pagezero(void *, size_t);
1377
1378	if (cpu_feature & CPUID_SSE2)
1379		pagezero = sse2_pagezero;
1380	else
1381		pagezero = i686_pagezero;
1382
1383	setperf_setup = amd_family6_setperf_setup;
1384
1385	if (family == 0xf) {
1386		amd64_errata(ci);
1387	}
1388#endif
1389}
1390
1391#if !defined(SMALL_KERNEL)
1392/*
1393 * Temperature read on the CPU is relative to the maximum
1394 * temperature supported by the CPU, Tj(Max).
1395 * Refer to:
1396 * 64-ia-32-architectures-software-developer-vol-3c-part-3-manual.pdf
1397 * Section 35 and
1398 * http://www.intel.com/content/dam/www/public/us/en/documents/
1399 * white-papers/cpu-monitoring-dts-peci-paper.pdf
1400 *
1401 * The temperature on Intel CPUs can be between 70 and 105 degC, since
1402 * Westmere we can read the TJmax from the die. For older CPUs we have
1403 * to guess or use undocumented MSRs. Then we subtract the temperature
1404 * portion of thermal status from max to get current temperature.
1405 */
1406void
1407intelcore_update_sensor(void *args)
1408{
1409	struct cpu_info *ci = (struct cpu_info *) args;
1410	u_int64_t msr;
1411	int max = 100;
1412
1413	/* Only some Core family chips have MSR_TEMPERATURE_TARGET. */
1414	if (ci->ci_model == 0x0e &&
1415	    (rdmsr(MSR_TEMPERATURE_TARGET_UNDOCUMENTED) &
1416	     MSR_TEMPERATURE_TARGET_LOW_BIT_UNDOCUMENTED))
1417		max = 85;
1418
1419	/*
1420	 * Newer CPUs can tell you what their max temperature is.
1421	 * See: '64-ia-32-architectures-software-developer-
1422	 * vol-3c-part-3-manual.pdf'
1423	 */
1424	if (ci->ci_model > 0x17 && ci->ci_model != 0x1c &&
1425	    ci->ci_model != 0x26 && ci->ci_model != 0x27 &&
1426	    ci->ci_model != 0x35 && ci->ci_model != 0x36)
1427		max = MSR_TEMPERATURE_TARGET_TJMAX(
1428		    rdmsr(MSR_TEMPERATURE_TARGET));
1429
1430	msr = rdmsr(MSR_THERM_STATUS);
1431	if (msr & MSR_THERM_STATUS_VALID_BIT) {
1432		ci->ci_sensor.value = max - MSR_THERM_STATUS_TEMP(msr);
1433		/* micro degrees */
1434		ci->ci_sensor.value *= 1000000;
1435		/* kelvin */
1436		ci->ci_sensor.value += 273150000;
1437		ci->ci_sensor.flags &= ~SENSOR_FINVALID;
1438	} else {
1439		ci->ci_sensor.value = 0;
1440		ci->ci_sensor.flags |= SENSOR_FINVALID;
1441	}
1442}
1443
1444void
1445intel686_cpusensors_setup(struct cpu_info *ci)
1446{
1447	if (!CPU_IS_PRIMARY(ci) || (ci->ci_feature_tpmflags & TPM_SENSOR) == 0)
1448		return;
1449
1450	/* Setup the sensors structures */
1451	strlcpy(ci->ci_sensordev.xname, ci->ci_dev->dv_xname,
1452	    sizeof(ci->ci_sensordev.xname));
1453	ci->ci_sensor.type = SENSOR_TEMP;
1454	sensor_task_register(ci, intelcore_update_sensor, 5);
1455	sensor_attach(&ci->ci_sensordev, &ci->ci_sensor);
1456	sensordev_install(&ci->ci_sensordev);
1457}
1458#endif
1459
1460#if !defined(SMALL_KERNEL)
1461void
1462intel686_setperf_setup(struct cpu_info *ci)
1463{
1464	int family = (ci->ci_signature >> 8) & 15;
1465	int step = ci->ci_signature & 15;
1466
1467	if (cpu_ecxfeature & CPUIDECX_EST) {
1468		if (rdmsr(MSR_MISC_ENABLE) & (1 << 16))
1469			est_init(ci, CPUVENDOR_INTEL);
1470		else
1471			printf("%s: Enhanced SpeedStep disabled by BIOS\n",
1472			    ci->ci_dev->dv_xname);
1473	} else if ((cpu_feature & (CPUID_ACPI | CPUID_TM)) ==
1474	    (CPUID_ACPI | CPUID_TM))
1475		p4tcc_init(family, step);
1476}
1477#endif
1478
1479void
1480intel686_common_cpu_setup(struct cpu_info *ci)
1481{
1482
1483#if !defined(SMALL_KERNEL)
1484	setperf_setup = intel686_setperf_setup;
1485	cpusensors_setup = intel686_cpusensors_setup;
1486	{
1487	extern void (*pagezero)(void *, size_t);
1488	extern void sse2_pagezero(void *, size_t);
1489	extern void i686_pagezero(void *, size_t);
1490
1491	if (cpu_feature & CPUID_SSE2)
1492		pagezero = sse2_pagezero;
1493	else
1494		pagezero = i686_pagezero;
1495	}
1496#endif
1497	/*
1498	 * Make sure SYSENTER is disabled.
1499	 */
1500	if (cpu_feature & CPUID_SEP)
1501		wrmsr(MSR_SYSENTER_CS, 0);
1502}
1503
1504void
1505intel686_cpu_setup(struct cpu_info *ci)
1506{
1507	int model = (ci->ci_signature >> 4) & 15;
1508	int step = ci->ci_signature & 15;
1509	u_quad_t msr119;
1510
1511	intel686_common_cpu_setup(ci);
1512
1513	/*
1514	 * Original PPro returns SYSCALL in CPUID but is non-functional.
1515	 * From Intel Application Note #485.
1516	 */
1517	if ((model == 1) && (step < 3))
1518		ci->ci_feature_flags &= ~CPUID_SEP;
1519
1520	/*
1521	 * Disable the Pentium3 serial number.
1522	 */
1523	if ((model == 7) && (ci->ci_feature_flags & CPUID_PSN)) {
1524		msr119 = rdmsr(MSR_BBL_CR_CTL);
1525		msr119 |= 0x0000000000200000LL;
1526		wrmsr(MSR_BBL_CR_CTL, msr119);
1527
1528		printf("%s: disabling processor serial number\n",
1529			 ci->ci_dev->dv_xname);
1530		ci->ci_feature_flags &= ~CPUID_PSN;
1531		ci->ci_level = 2;
1532	}
1533
1534#if !defined(SMALL_KERNEL)
1535	p3_early = (model == 8 && step == 1) ? 1 : 0;
1536	update_cpuspeed = p3_update_cpuspeed;
1537#endif
1538}
1539
1540void
1541intel686_p4_cpu_setup(struct cpu_info *ci)
1542{
1543	intel686_common_cpu_setup(ci);
1544
1545#if !defined(SMALL_KERNEL)
1546	update_cpuspeed = p4_update_cpuspeed;
1547#endif
1548}
1549
1550void
1551tm86_cpu_setup(struct cpu_info *ci)
1552{
1553#if !defined(SMALL_KERNEL)
1554	longrun_init();
1555#endif
1556}
1557
1558char *
1559intel686_cpu_name(int model)
1560{
1561	char *ret = NULL;
1562
1563	switch (model) {
1564	case 5:
1565		switch (cpu_cache_edx & 0xFF) {
1566		case 0x40:
1567		case 0x41:
1568			ret = "Celeron";
1569			break;
1570		/* 0x42 should not exist in this model. */
1571		case 0x43:
1572			ret = "Pentium II";
1573			break;
1574		case 0x44:
1575		case 0x45:
1576			ret = "Pentium II Xeon";
1577			break;
1578		}
1579		break;
1580	case 7:
1581		switch (cpu_cache_edx & 0xFF) {
1582		/* 0x40 - 0x42 should not exist in this model. */
1583		case 0x43:
1584			ret = "Pentium III";
1585			break;
1586		case 0x44:
1587		case 0x45:
1588			ret = "Pentium III Xeon";
1589			break;
1590		}
1591		break;
1592	}
1593
1594	return (ret);
1595}
1596
1597char *
1598cyrix3_cpu_name(int model, int step)
1599{
1600	char	*name = NULL;
1601
1602	switch (model) {
1603	case 7:
1604		if (step < 8)
1605			name = "C3 Samuel 2";
1606		else
1607			name = "C3 Ezra";
1608		break;
1609	}
1610	return name;
1611}
1612
1613/*
1614 * Print identification for the given CPU.
1615 * XXX XXX
1616 * This is not as clean as one might like, because it references
1617 *
1618 * the "cpuid_level" and "cpu_vendor" globals.
1619 * cpuid_level isn't so bad, since both CPU's will hopefully
1620 * be of the same level.
1621 *
1622 * The Intel multiprocessor spec doesn't give us the cpu_vendor
1623 * information; however, the chance of multi-vendor SMP actually
1624 * ever *working* is sufficiently low that it's probably safe to assume
1625 * all processors are of the same vendor.
1626 */
1627void
1628identifycpu(struct cpu_info *ci)
1629{
1630	const char *name, *modifier, *vendorname, *token;
1631	int class = CPUCLASS_486, vendor, i, max;
1632	int family, model, step, modif, cachesize;
1633	const struct cpu_cpuid_nameclass *cpup = NULL;
1634	char *brandstr_from, *brandstr_to;
1635	char *cpu_device = ci->ci_dev->dv_xname;
1636	int skipspace;
1637	extern uint32_t cpu_meltdown;
1638	uint64_t msr, nmsr;
1639
1640	if (cpuid_level == -1) {
1641		name = "486DX";
1642		vendor = CPUVENDOR_INTEL;
1643		vendorname = "Intel";
1644		model = -1;
1645		step = -1;
1646		class = CPUCLASS_486;
1647		ci->cpu_setup = NULL;
1648		modifier = "";
1649		token = "";
1650	} else {
1651		max = sizeof (i386_cpuid_cpus) / sizeof (i386_cpuid_cpus[0]);
1652		modif = (ci->ci_signature >> 12) & 3;
1653		family = (ci->ci_signature >> 8) & 15;
1654		ci->ci_family = family;
1655		model = (ci->ci_signature >> 4) & 15;
1656		ci->ci_model = model;
1657		step = ci->ci_signature & 15;
1658#ifdef CPUDEBUG
1659		printf("%s: cpuid level %d cache eax %x ebx %x ecx %x edx %x\n",
1660		    cpu_device, cpuid_level, cpu_cache_eax, cpu_cache_ebx,
1661		    cpu_cache_ecx, cpu_cache_edx);
1662#endif
1663		if (family < CPU_MINFAMILY)
1664			panic("identifycpu: strange family value");
1665
1666		for (i = 0; i < max; i++) {
1667			if (!strncmp(cpu_vendor,
1668			    i386_cpuid_cpus[i].cpu_id, 12)) {
1669				cpup = &i386_cpuid_cpus[i];
1670				break;
1671			}
1672		}
1673
1674		if (cpup == NULL) {
1675			vendor = CPUVENDOR_UNKNOWN;
1676			if (cpu_vendor[0] != '\0')
1677				vendorname = &cpu_vendor[0];
1678			else
1679				vendorname = "Unknown";
1680			if (family > CPU_MAXFAMILY)
1681				family = CPU_MAXFAMILY;
1682			class = family - 3;
1683			if (class > CPUCLASS_686)
1684				class = CPUCLASS_686;
1685			modifier = "";
1686			name = "";
1687			token = "";
1688			ci->cpu_setup = NULL;
1689		} else {
1690			token = cpup->cpu_id;
1691			vendor = cpup->cpu_vendor;
1692			vendorname = cpup->cpu_vendorname;
1693			/*
1694			 * Special hack for the VIA C3 series.
1695			 *
1696			 * VIA bought Centaur Technology from IDT in Aug 1999
1697			 * and marketed the processors as VIA Cyrix III/C3.
1698			 */
1699			if (vendor == CPUVENDOR_IDT && family >= 6) {
1700				vendor = CPUVENDOR_VIA;
1701				vendorname = "VIA";
1702			}
1703			modifier = modifiers[modif];
1704			if (family > CPU_MAXFAMILY) {
1705				family = CPU_MAXFAMILY;
1706				model = CPU_DEFMODEL;
1707			} else if (model > CPU_MAXMODEL)
1708				model = CPU_DEFMODEL;
1709			i = family - CPU_MINFAMILY;
1710
1711			/* store extended family/model values for later use */
1712			if ((vendor == CPUVENDOR_INTEL &&
1713			    (family == 0x6 || family == 0xf)) ||
1714			    (vendor == CPUVENDOR_AMD && family == 0xf)) {
1715				ci->ci_family += (ci->ci_signature >> 20) &
1716				    0xff;
1717				ci->ci_model += ((ci->ci_signature >> 16) &
1718				    0x0f) << 4;
1719			}
1720
1721			/* Special hack for the PentiumII/III series. */
1722			if (vendor == CPUVENDOR_INTEL && family == 6 &&
1723			    (model == 5 || model == 7)) {
1724				name = intel686_cpu_name(model);
1725			/* Special hack for the VIA C3 series. */
1726			} else if (vendor == CPUVENDOR_VIA && family == 6 &&
1727			    model == 7) {
1728				name = cyrix3_cpu_name(model, step);
1729			/* Special hack for the TMS5x00 series. */
1730			} else if (vendor == CPUVENDOR_TRANSMETA &&
1731			    family == 5 && model == 4) {
1732				name = tm86_cpu_name(model);
1733			} else
1734				name = cpup->cpu_family[i].cpu_models[model];
1735			if (name == NULL) {
1736				name = cpup->cpu_family[i].cpu_models[CPU_DEFMODEL];
1737				if (name == NULL)
1738					name = "";
1739			}
1740			class = cpup->cpu_family[i].cpu_class;
1741			ci->cpu_setup = cpup->cpu_family[i].cpu_setup;
1742		}
1743	}
1744
1745	/* Find the amount of on-chip L2 cache. */
1746	cachesize = -1;
1747	if (vendor == CPUVENDOR_INTEL && cpuid_level >= 2 && family < 0xf) {
1748		int intel_cachetable[] = { 0, 128, 256, 512, 1024, 2048 };
1749
1750		if ((cpu_cache_edx & 0xFF) >= 0x40 &&
1751		    (cpu_cache_edx & 0xFF) <= 0x45)
1752			cachesize = intel_cachetable[(cpu_cache_edx & 0xFF) - 0x40];
1753	} else if (vendor == CPUVENDOR_AMD && class == CPUCLASS_686) {
1754		u_int regs[4];
1755		cpuid(0x80000000, regs);
1756
1757		if (regs[0] >= 0x80000006) {
1758			cpuid(0x80000006, regs);
1759			cachesize = (regs[2] >> 16);
1760		}
1761	}
1762
1763	if (ci->ci_feature_flags & CPUID_CFLUSH) {
1764		u_int regs[4];
1765
1766		/* to get the cacheline size you must do cpuid
1767		 * with eax 0x01
1768		 */
1769
1770		cpuid(0x01, regs);
1771		ci->ci_cflushsz = ((regs[1] >> 8) & 0xff) * 8;
1772	}
1773
1774	if (vendor == CPUVENDOR_INTEL) {
1775		/*
1776		 * PIII, Core Solo and Core Duo CPUs have known
1777		 * errata stating:
1778		 * "Page with PAT set to WC while associated MTRR is UC
1779		 * may consolidate to UC".
1780		 * Because of this it is best we just fallback to mtrrs
1781		 * in this case.
1782		 */
1783		if (ci->ci_family == 6 && ci->ci_model < 15)
1784		    ci->ci_feature_flags &= ~CPUID_PAT;
1785	}
1786
1787	/* Remove leading, trailing and duplicated spaces from cpu_brandstr */
1788	brandstr_from = brandstr_to = cpu_brandstr;
1789	skipspace = 1;
1790	while (*brandstr_from != '\0') {
1791		if (!skipspace || *brandstr_from != ' ') {
1792			skipspace = 0;
1793			*(brandstr_to++) = *brandstr_from;
1794		}
1795		if (*brandstr_from == ' ')
1796			skipspace = 1;
1797		brandstr_from++;
1798	}
1799	if (skipspace && brandstr_to > cpu_brandstr)
1800		brandstr_to--;
1801	*brandstr_to = '\0';
1802
1803	if (cpu_brandstr[0] == '\0') {
1804		snprintf(cpu_brandstr, 48 /* sizeof(cpu_brandstr) */,
1805		    "%s %s%s", vendorname, modifier, name);
1806	}
1807
1808	if (cachesize > -1) {
1809		snprintf(cpu_model, sizeof(cpu_model),
1810		    "%s (%s%s%s%s-class, %dKB L2 cache)",
1811		    cpu_brandstr,
1812		    ((*token) ? "\"" : ""), ((*token) ? token : ""),
1813		    ((*token) ? "\" " : ""), classnames[class], cachesize);
1814	} else {
1815		snprintf(cpu_model, sizeof(cpu_model),
1816		    "%s (%s%s%s%s-class)",
1817		    cpu_brandstr,
1818		    ((*token) ? "\"" : ""), ((*token) ? token : ""),
1819		    ((*token) ? "\" " : ""), classnames[class]);
1820	}
1821
1822	printf("%s: %s", cpu_device, cpu_model);
1823
1824	if (ci->ci_feature_flags && (ci->ci_feature_flags & CPUID_TSC)) {
1825		/* Has TSC, check if it's constant */
1826		switch (vendor) {
1827		case CPUVENDOR_INTEL:
1828			if ((ci->ci_family == 0x0f && ci->ci_model >= 0x03) ||
1829			    (ci->ci_family == 0x06 && ci->ci_model >= 0x0e)) {
1830				ci->ci_flags |= CPUF_CONST_TSC;
1831			}
1832			break;
1833		case CPUVENDOR_VIA:
1834			if (ci->ci_model >= 0x0f) {
1835				ci->ci_flags |= CPUF_CONST_TSC;
1836			}
1837			break;
1838		}
1839		calibrate_cyclecounter();
1840		if (cpuspeed > 994) {
1841			int ghz, fr;
1842
1843			ghz = (cpuspeed + 9) / 1000;
1844			fr = ((cpuspeed + 9) / 10 ) % 100;
1845			if (fr)
1846				printf(" %d.%02d GHz", ghz, fr);
1847			else
1848				printf(" %d GHz", ghz);
1849		} else {
1850			printf(" %d MHz", cpuspeed);
1851		}
1852	}
1853
1854	if (cpuid_level != -1)
1855		printf(", %02x-%02x-%02x", ci->ci_family, ci->ci_model,
1856		    step);
1857
1858	if ((cpu_ecxfeature & CPUIDECX_HV) == 0) {
1859		uint64_t level = 0;
1860		uint32_t dummy;
1861
1862		if (strcmp(cpu_vendor, "AuthenticAMD") == 0 &&
1863		    ci->ci_family >= 0x0f) {
1864			level = rdmsr(MSR_PATCH_LEVEL);
1865		} else if (strcmp(cpu_vendor, "GenuineIntel") == 0 &&
1866		    ci->ci_family >= 6) {
1867			wrmsr(MSR_BIOS_SIGN, 0);
1868			CPUID(1, dummy, dummy, dummy, dummy);
1869			level = rdmsr(MSR_BIOS_SIGN) >> 32;
1870		}
1871		if (level != 0)
1872			printf(", patch %08llx", level);
1873	}
1874
1875	printf("\n");
1876
1877	if (ci->ci_feature_flags) {
1878		int numbits = 0;
1879
1880		printf("%s: ", cpu_device);
1881		max = sizeof(i386_cpuid_features) /
1882		    sizeof(i386_cpuid_features[0]);
1883		for (i = 0; i < max; i++) {
1884			if (ci->ci_feature_flags &
1885			    i386_cpuid_features[i].feature_bit) {
1886				printf("%s%s", (numbits == 0 ? "" : ","),
1887				    i386_cpuid_features[i].feature_name);
1888				numbits++;
1889			}
1890		}
1891		max = sizeof(i386_cpuid_ecxfeatures)
1892			/ sizeof(i386_cpuid_ecxfeatures[0]);
1893		for (i = 0; i < max; i++) {
1894			if (cpu_ecxfeature &
1895			    i386_cpuid_ecxfeatures[i].feature_bit) {
1896				printf("%s%s", (numbits == 0 ? "" : ","),
1897				    i386_cpuid_ecxfeatures[i].feature_name);
1898				numbits++;
1899			}
1900		}
1901		for (i = 0; i < nitems(i386_ecpuid_features); i++) {
1902			if (ecpu_feature &
1903			    i386_ecpuid_features[i].feature_bit) {
1904				printf("%s%s", (numbits == 0 ? "" : ","),
1905				    i386_ecpuid_features[i].feature_name);
1906				numbits++;
1907			}
1908		}
1909		for (i = 0; i < nitems(i386_ecpuid_ecxfeatures); i++) {
1910			if (ecpu_ecxfeature &
1911			    i386_ecpuid_ecxfeatures[i].feature_bit) {
1912				printf("%s%s", (numbits == 0 ? "" : ","),
1913				    i386_ecpuid_ecxfeatures[i].feature_name);
1914				numbits++;
1915			}
1916		}
1917		for (i = 0; i < nitems(i386_cpuid_eaxperf); i++) {
1918			if (cpu_perf_eax &
1919			    i386_cpuid_eaxperf[i].feature_bit) {
1920				printf("%s%s", (numbits == 0 ? "" : ","),
1921				    i386_cpuid_eaxperf[i].feature_name);
1922				numbits++;
1923			}
1924		}
1925		for (i = 0; i < nitems(i386_cpuid_edxapmi); i++) {
1926			if (cpu_apmi_edx &
1927			    i386_cpuid_edxapmi[i].feature_bit) {
1928				printf("%s%s", (numbits == 0 ? "" : ","),
1929				    i386_cpuid_edxapmi[i].feature_name);
1930				numbits++;
1931			}
1932		}
1933
1934		if (cpuid_level >= 0x07) {
1935			u_int dummy;
1936
1937			/* "Structured Extended Feature Flags" */
1938			CPUID_LEAF(0x7, 0, dummy,
1939			    ci->ci_feature_sefflags_ebx,
1940			    ci->ci_feature_sefflags_ecx,
1941			    ci->ci_feature_sefflags_edx);
1942			for (i = 0; i < nitems(cpu_seff0_ebxfeatures); i++)
1943				if (ci->ci_feature_sefflags_ebx &
1944				    cpu_seff0_ebxfeatures[i].feature_bit)
1945					printf("%s%s",
1946					    (numbits == 0 ? "" : ","),
1947					    cpu_seff0_ebxfeatures[i].feature_name);
1948			for (i = 0; i < nitems(cpu_seff0_ecxfeatures); i++)
1949				if (ci->ci_feature_sefflags_ecx &
1950				    cpu_seff0_ecxfeatures[i].feature_bit)
1951					printf("%s%s",
1952					    (numbits == 0 ? "" : ","),
1953					    cpu_seff0_ecxfeatures[i].feature_name);
1954			for (i = 0; i < nitems(cpu_seff0_edxfeatures); i++)
1955				if (ci->ci_feature_sefflags_edx &
1956				    cpu_seff0_edxfeatures[i].feature_bit)
1957					printf("%s%s",
1958					    (numbits == 0 ? "" : ","),
1959					    cpu_seff0_edxfeatures[i].feature_name);
1960		}
1961
1962		if (!strcmp(cpu_vendor, "GenuineIntel") &&
1963		    cpuid_level >= 0x06 ) {
1964			u_int dummy;
1965
1966			CPUID(0x06, ci->ci_feature_tpmflags, dummy,
1967			    dummy, dummy);
1968			max = nitems(cpu_tpm_eaxfeatures);
1969			for (i = 0; i < max; i++)
1970				if (ci->ci_feature_tpmflags &
1971				    cpu_tpm_eaxfeatures[i].feature_bit)
1972					printf(",%s", cpu_tpm_eaxfeatures[i].feature_name);
1973		}
1974
1975		/* xsave subfeatures */
1976		if (cpuid_level >= 0xd) {
1977			uint32_t dummy, val;
1978
1979			CPUID_LEAF(0xd, 1, val, dummy, dummy, dummy);
1980			for (i = 0; i < nitems(cpu_xsave_extfeatures); i++)
1981				if (val & cpu_xsave_extfeatures[i].feature_bit)
1982					printf(",%s",
1983					    cpu_xsave_extfeatures[i].feature_name);
1984		}
1985
1986		if (cpu_meltdown)
1987			printf(",MELTDOWN");
1988
1989		printf("\n");
1990	}
1991
1992	/*
1993	 * "Mitigation G-2" per AMD's Whitepaper "Software Techniques
1994	 * for Managing Speculation on AMD Processors"
1995	 *
1996	 * By setting MSR C001_1029[1]=1, LFENCE becomes a dispatch
1997	 * serializing instruction.
1998	 *
1999	 * This MSR is available on all AMD families >= 10h, except 11h
2000 	 * where LFENCE is always serializing.
2001	 */
2002	if (!strcmp(cpu_vendor, "AuthenticAMD")) {
2003		if (ci->ci_family >= 0x10 && ci->ci_family != 0x11) {
2004			nmsr = msr = rdmsr(MSR_DE_CFG);
2005			nmsr |= DE_CFG_SERIALIZE_LFENCE;
2006			if (msr != nmsr)
2007				wrmsr(MSR_DE_CFG, nmsr);
2008		}
2009		if (family == 0x17 && ci->ci_model >= 0x31 &&
2010		    (cpu_ecxfeature & CPUIDECX_HV) == 0) {
2011			nmsr = msr = rdmsr(MSR_DE_CFG);
2012			nmsr |= DE_CFG_SERIALIZE_9;
2013			if (msr != nmsr)
2014				wrmsr(MSR_DE_CFG, nmsr);
2015		}
2016	}
2017
2018	/*
2019	 * Attempt to disable Silicon Debug and lock the configuration
2020	 * if it's enabled and unlocked.
2021	 */
2022	if (!strcmp(cpu_vendor, "GenuineIntel") &&
2023	    (cpu_ecxfeature & CPUIDECX_SDBG)) {
2024		uint64_t msr;
2025
2026		msr = rdmsr(IA32_DEBUG_INTERFACE);
2027		if ((msr & IA32_DEBUG_INTERFACE_ENABLE) &&
2028		    (msr & IA32_DEBUG_INTERFACE_LOCK) == 0) {
2029			msr &= IA32_DEBUG_INTERFACE_MASK;
2030			msr |= IA32_DEBUG_INTERFACE_LOCK;
2031			wrmsr(IA32_DEBUG_INTERFACE, msr);
2032		} else if (msr & IA32_DEBUG_INTERFACE_ENABLE)
2033			printf("%s: cannot disable silicon debug\n",
2034			    cpu_device);
2035	}
2036
2037	if (CPU_IS_PRIMARY(ci)) {
2038		if (cpu_ecxfeature & CPUIDECX_RDRAND)
2039			has_rdrand = 1;
2040		if (ci->ci_feature_sefflags_ebx & SEFF0EBX_RDSEED)
2041			has_rdseed = 1;
2042		if (ci->ci_feature_sefflags_ebx & SEFF0EBX_SMAP)
2043			replacesmap();
2044	}
2045
2046#ifndef SMALL_KERNEL
2047	if (cpuspeed != 0 && cpu_cpuspeed == NULL)
2048		cpu_cpuspeed = pentium_cpuspeed;
2049#endif
2050
2051	cpu_class = class;
2052
2053	ci->cpu_class = class;
2054
2055	/*
2056	 * Enable ring 0 write protection.
2057	 */
2058	lcr0(rcr0() | CR0_WP);
2059
2060	/*
2061	 * If we have FXSAVE/FXRESTOR, use them.
2062	 */
2063	if (cpu_feature & CPUID_FXSR) {
2064		i386_use_fxsave = 1;
2065		lcr4(rcr4() | CR4_OSFXSR);
2066
2067		/*
2068		 * If we have SSE/SSE2, enable XMM exceptions, and
2069		 * notify userland.
2070		 */
2071		if (cpu_feature & (CPUID_SSE|CPUID_SSE2)) {
2072			if (cpu_feature & CPUID_SSE)
2073				i386_has_sse = 1;
2074			if (cpu_feature & CPUID_SSE2)
2075				i386_has_sse2 = 1;
2076			lcr4(rcr4() | CR4_OSXMMEXCPT);
2077		}
2078	} else
2079		i386_use_fxsave = 0;
2080
2081}
2082
2083char *
2084tm86_cpu_name(int model)
2085{
2086	u_int32_t regs[4];
2087	char *name = NULL;
2088
2089	cpuid(0x80860001, regs);
2090
2091	switch (model) {
2092	case 4:
2093		if (((regs[1] >> 16) & 0xff) >= 0x3)
2094			name = "TMS5800";
2095		else
2096			name = "TMS5600";
2097	}
2098
2099	return name;
2100}
2101
2102#ifndef SMALL_KERNEL
2103void
2104cyrix3_get_bus_clock(struct cpu_info *ci)
2105{
2106	u_int64_t msr;
2107	int bus;
2108
2109	msr = rdmsr(MSR_EBL_CR_POWERON);
2110	bus = (msr >> 18) & 0x3;
2111	switch (bus) {
2112	case 0:
2113		bus_clock = BUS100;
2114		break;
2115	case 1:
2116		bus_clock = BUS133;
2117		break;
2118	case 2:
2119		bus_clock = BUS200;
2120		break;
2121	case 3:
2122		bus_clock = BUS166;
2123		break;
2124	}
2125}
2126
2127void
2128p4_get_bus_clock(struct cpu_info *ci)
2129{
2130	u_int64_t msr;
2131	int model, bus;
2132
2133	model = (ci->ci_signature >> 4) & 15;
2134	msr = rdmsr(MSR_EBC_FREQUENCY_ID);
2135	if (model < 2) {
2136		bus = (msr >> 21) & 0x7;
2137		switch (bus) {
2138		case 0:
2139			bus_clock = BUS100;
2140			break;
2141		case 1:
2142			bus_clock = BUS133;
2143			break;
2144		default:
2145			printf("%s: unknown Pentium 4 (model %d) "
2146			    "EBC_FREQUENCY_ID value %d\n",
2147			    ci->ci_dev->dv_xname, model, bus);
2148			break;
2149		}
2150	} else {
2151		bus = (msr >> 16) & 0x7;
2152		switch (bus) {
2153		case 0:
2154			bus_clock = (model == 2) ? BUS100 : BUS266;
2155			break;
2156		case 1:
2157			bus_clock = BUS133;
2158			break;
2159		case 2:
2160			bus_clock = BUS200;
2161			break;
2162		case 3:
2163			bus_clock = BUS166;
2164			break;
2165		default:
2166			printf("%s: unknown Pentium 4 (model %d) "
2167			    "EBC_FREQUENCY_ID value %d\n",
2168			    ci->ci_dev->dv_xname, model, bus);
2169			break;
2170		}
2171	}
2172}
2173
2174void
2175p3_get_bus_clock(struct cpu_info *ci)
2176{
2177	u_int64_t msr;
2178	int bus;
2179
2180	switch (ci->ci_model) {
2181	case 0x9: /* Pentium M (130 nm, Banias) */
2182		bus_clock = BUS100;
2183		break;
2184	case 0xd: /* Pentium M (90 nm, Dothan) */
2185		msr = rdmsr(MSR_FSB_FREQ);
2186		bus = (msr >> 0) & 0x7;
2187		switch (bus) {
2188		case 0:
2189			bus_clock = BUS100;
2190			break;
2191		case 1:
2192			bus_clock = BUS133;
2193			break;
2194		default:
2195			printf("%s: unknown Pentium M FSB_FREQ value %d",
2196			    ci->ci_dev->dv_xname, bus);
2197			goto print_msr;
2198		}
2199		break;
2200	case 0x15:	/* EP80579 no FSB */
2201		break;
2202	case 0xe: /* Core Duo/Solo */
2203	case 0xf: /* Core Xeon */
2204	case 0x16: /* 65nm Celeron */
2205	case 0x17: /* Core 2 Extreme/45nm Xeon */
2206	case 0x1d: /* Xeon MP 7400 */
2207		msr = rdmsr(MSR_FSB_FREQ);
2208		bus = (msr >> 0) & 0x7;
2209		switch (bus) {
2210		case 5:
2211			bus_clock = BUS100;
2212			break;
2213		case 1:
2214			bus_clock = BUS133;
2215			break;
2216		case 3:
2217			bus_clock = BUS166;
2218			break;
2219		case 2:
2220			bus_clock = BUS200;
2221			break;
2222		case 0:
2223			bus_clock = BUS266;
2224			break;
2225		case 4:
2226			bus_clock = BUS333;
2227			break;
2228		default:
2229			printf("%s: unknown Core FSB_FREQ value %d",
2230			    ci->ci_dev->dv_xname, bus);
2231			goto print_msr;
2232		}
2233		break;
2234	case 0x1c: /* Atom */
2235	case 0x26: /* Atom Z6xx */
2236	case 0x36: /* Atom [DN]2xxx */
2237		msr = rdmsr(MSR_FSB_FREQ);
2238		bus = (msr >> 0) & 0x7;
2239		switch (bus) {
2240		case 5:
2241			bus_clock = BUS100;
2242			break;
2243		case 1:
2244			bus_clock = BUS133;
2245			break;
2246		case 3:
2247			bus_clock = BUS166;
2248			break;
2249		case 2:
2250			bus_clock = BUS200;
2251			break;
2252		default:
2253			printf("%s: unknown Atom FSB_FREQ value %d",
2254			    ci->ci_dev->dv_xname, bus);
2255			goto print_msr;
2256		}
2257		break;
2258	case 0x1: /* Pentium Pro, model 1 */
2259	case 0x3: /* Pentium II, model 3 */
2260	case 0x5: /* Pentium II, II Xeon, Celeron, model 5 */
2261	case 0x6: /* Celeron, model 6 */
2262	case 0x7: /* Pentium III, III Xeon, model 7 */
2263	case 0x8: /* Pentium III, III Xeon, Celeron, model 8 */
2264	case 0xa: /* Pentium III Xeon, model A */
2265	case 0xb: /* Pentium III, model B */
2266		msr = rdmsr(MSR_EBL_CR_POWERON);
2267		bus = (msr >> 18) & 0x3;
2268		switch (bus) {
2269		case 0:
2270			bus_clock = BUS66;
2271			break;
2272		case 1:
2273			bus_clock = BUS133;
2274			break;
2275		case 2:
2276			bus_clock = BUS100;
2277			break;
2278		default:
2279			printf("%s: unknown i686 EBL_CR_POWERON value %d",
2280			    ci->ci_dev->dv_xname, bus);
2281			goto print_msr;
2282		}
2283		break;
2284	default:
2285		/* no FSB on modern Intel processors */
2286		break;
2287	}
2288	return;
2289print_msr:
2290	/*
2291	 * Show the EBL_CR_POWERON MSR, so we'll at least have
2292	 * some extra information, such as clock ratio, etc.
2293	 */
2294	printf(" (0x%llx)\n", rdmsr(MSR_EBL_CR_POWERON));
2295}
2296
2297void
2298p4_update_cpuspeed(void)
2299{
2300	struct cpu_info *ci;
2301	u_int64_t msr;
2302	int mult;
2303
2304	ci = curcpu();
2305	p4_get_bus_clock(ci);
2306
2307	if (bus_clock == 0) {
2308		printf("p4_update_cpuspeed: unknown bus clock\n");
2309		return;
2310	}
2311
2312	msr = rdmsr(MSR_EBC_FREQUENCY_ID);
2313	mult = ((msr >> 24) & 0xff);
2314
2315	cpuspeed = (bus_clock * mult) / 100;
2316}
2317
2318void
2319p3_update_cpuspeed(void)
2320{
2321	struct cpu_info *ci;
2322	u_int64_t msr;
2323	int mult;
2324	const u_int8_t mult_code[] = {
2325	    50, 30, 40, 0, 55, 35, 45, 0, 0, 70, 80, 60, 0, 75, 0, 65 };
2326
2327	ci = curcpu();
2328	p3_get_bus_clock(ci);
2329
2330	if (bus_clock == 0) {
2331		printf("p3_update_cpuspeed: unknown bus clock\n");
2332		return;
2333	}
2334
2335	msr = rdmsr(MSR_EBL_CR_POWERON);
2336	mult = (msr >> 22) & 0xf;
2337	mult = mult_code[mult];
2338	if (!p3_early)
2339		mult += ((msr >> 27) & 0x1) * 40;
2340
2341	cpuspeed = (bus_clock * mult) / 1000;
2342}
2343
2344int
2345pentium_cpuspeed(int *freq)
2346{
2347	*freq = cpuspeed;
2348	return (0);
2349}
2350#endif	/* !SMALL_KERNEL */
2351
2352/*
2353 * Send an interrupt to process.
2354 *
2355 * Stack is set up to allow sigcode stored
2356 * in u. to call routine, followed by kcall
2357 * to sigreturn routine below.  After sigreturn
2358 * resets the signal mask, the stack, and the
2359 * frame pointer, it returns to the user
2360 * specified pc, psl.
2361 */
2362int
2363sendsig(sig_t catcher, int sig, sigset_t mask, const siginfo_t *ksip,
2364    int info, int onstack)
2365{
2366	struct proc *p = curproc;
2367	struct trapframe *tf = p->p_md.md_regs;
2368	struct sigframe *fp, frame;
2369	register_t sp;
2370
2371	/*
2372	 * Build the argument list for the signal handler.
2373	 */
2374	bzero(&frame, sizeof(frame));
2375	frame.sf_signum = sig;
2376
2377	/*
2378	 * Allocate space for the signal handler context.
2379	 */
2380	if ((p->p_sigstk.ss_flags & SS_DISABLE) == 0 &&
2381	    !sigonstack(tf->tf_esp) && onstack)
2382		sp = trunc_page((vaddr_t)p->p_sigstk.ss_sp + p->p_sigstk.ss_size);
2383	else
2384		sp = tf->tf_esp;
2385
2386	frame.sf_sc.sc_fpstate = NULL;
2387	if (p->p_md.md_flags & MDP_USEDFPU) {
2388		npxsave_proc(p, 1);
2389		sp -= sizeof(union savefpu);
2390		sp &= ~0xf;	/* for XMM regs */
2391		frame.sf_sc.sc_fpstate = (void *)sp;
2392		if (copyout(&p->p_addr->u_pcb.pcb_savefpu,
2393		    (void *)sp, sizeof(union savefpu)))
2394		    	return 1;
2395
2396		/* Signal handlers get a completely clean FP state */
2397		p->p_md.md_flags &= ~MDP_USEDFPU;
2398	}
2399
2400	fp = (struct sigframe *)sp - 1;
2401	frame.sf_scp = &fp->sf_sc;
2402	frame.sf_sip = NULL;
2403	frame.sf_handler = catcher;
2404
2405	/*
2406	 * Build the signal context to be used by sigreturn.
2407	 */
2408	frame.sf_sc.sc_err = tf->tf_err;
2409	frame.sf_sc.sc_trapno = tf->tf_trapno;
2410	frame.sf_sc.sc_mask = mask;
2411	frame.sf_sc.sc_fs = tf->tf_fs;
2412	frame.sf_sc.sc_gs = tf->tf_gs;
2413	frame.sf_sc.sc_es = tf->tf_es;
2414	frame.sf_sc.sc_ds = tf->tf_ds;
2415	frame.sf_sc.sc_eflags = tf->tf_eflags;
2416	frame.sf_sc.sc_edi = tf->tf_edi;
2417	frame.sf_sc.sc_esi = tf->tf_esi;
2418	frame.sf_sc.sc_ebp = tf->tf_ebp;
2419	frame.sf_sc.sc_ebx = tf->tf_ebx;
2420	frame.sf_sc.sc_edx = tf->tf_edx;
2421	frame.sf_sc.sc_ecx = tf->tf_ecx;
2422	frame.sf_sc.sc_eax = tf->tf_eax;
2423	frame.sf_sc.sc_eip = tf->tf_eip;
2424	frame.sf_sc.sc_cs = tf->tf_cs;
2425	frame.sf_sc.sc_esp = tf->tf_esp;
2426	frame.sf_sc.sc_ss = tf->tf_ss;
2427
2428	if (info) {
2429		frame.sf_sip = &fp->sf_si;
2430		frame.sf_si = *ksip;
2431	}
2432
2433	/* XXX don't copyout siginfo if not needed? */
2434	frame.sf_sc.sc_cookie = (long)&fp->sf_sc ^ p->p_p->ps_sigcookie;
2435	if (copyout(&frame, fp, sizeof(frame)) != 0)
2436		return 1;
2437
2438	/*
2439	 * Build context to run handler in.
2440	 */
2441	tf->tf_fs = GSEL(GUFS_SEL, SEL_UPL);
2442	tf->tf_gs = GSEL(GUGS_SEL, SEL_UPL);
2443	tf->tf_es = GSEL(GUDATA_SEL, SEL_UPL);
2444	tf->tf_ds = GSEL(GUDATA_SEL, SEL_UPL);
2445	tf->tf_eip = p->p_p->ps_sigcode;
2446	tf->tf_cs = GSEL(GUCODE_SEL, SEL_UPL);
2447	tf->tf_eflags &= ~(PSL_T|PSL_D|PSL_VM|PSL_AC);
2448	tf->tf_esp = (int)fp;
2449	tf->tf_ss = GSEL(GUDATA_SEL, SEL_UPL);
2450
2451	return 0;
2452}
2453
2454/*
2455 * System call to cleanup state after a signal
2456 * has been taken.  Reset signal mask and
2457 * stack state from context left by sendsig (above).
2458 * Return to previous pc and psl as specified by
2459 * context left by sendsig. Check carefully to
2460 * make sure that the user has not modified the
2461 * psl to gain improper privileges or to cause
2462 * a machine fault.
2463 */
2464int
2465sys_sigreturn(struct proc *p, void *v, register_t *retval)
2466{
2467	struct sys_sigreturn_args /* {
2468		syscallarg(struct sigcontext *) sigcntxp;
2469	} */ *uap = v;
2470	struct sigcontext ksc, *scp = SCARG(uap, sigcntxp);
2471	struct trapframe *tf = p->p_md.md_regs;
2472	int error;
2473
2474	if (PROC_PC(p) != p->p_p->ps_sigcoderet) {
2475		sigexit(p, SIGILL);
2476		return (EPERM);
2477	}
2478
2479	if ((error = copyin((caddr_t)scp, &ksc, sizeof(*scp))))
2480		return (error);
2481
2482	if (ksc.sc_cookie != ((long)scp ^ p->p_p->ps_sigcookie)) {
2483		sigexit(p, SIGILL);
2484		return (EFAULT);
2485	}
2486
2487	/* Prevent reuse of the sigcontext cookie */
2488	ksc.sc_cookie = 0;
2489	(void)copyout(&ksc.sc_cookie, (caddr_t)scp +
2490	    offsetof(struct sigcontext, sc_cookie), sizeof (ksc.sc_cookie));
2491
2492	/*
2493	 * Restore signal ksc.
2494	 */
2495	/*
2496	 * Check for security violations.  If we're returning to
2497	 * protected mode, the CPU will validate the segment registers
2498	 * automatically and generate a trap on violations.  We handle
2499	 * the trap, rather than doing all of the checking here.
2500	 */
2501	if (((ksc.sc_eflags ^ tf->tf_eflags) & PSL_USERSTATIC) != 0 ||
2502	    !USERMODE(ksc.sc_cs, ksc.sc_eflags))
2503		return (EINVAL);
2504
2505	tf->tf_fs = ksc.sc_fs;
2506	tf->tf_gs = ksc.sc_gs;
2507	tf->tf_es = ksc.sc_es;
2508	tf->tf_ds = ksc.sc_ds;
2509	tf->tf_eflags = ksc.sc_eflags;
2510	tf->tf_edi = ksc.sc_edi;
2511	tf->tf_esi = ksc.sc_esi;
2512	tf->tf_ebp = ksc.sc_ebp;
2513	tf->tf_ebx = ksc.sc_ebx;
2514	tf->tf_edx = ksc.sc_edx;
2515	tf->tf_ecx = ksc.sc_ecx;
2516	tf->tf_eax = ksc.sc_eax;
2517	tf->tf_eip = ksc.sc_eip;
2518	tf->tf_cs = ksc.sc_cs;
2519	tf->tf_esp = ksc.sc_esp;
2520	tf->tf_ss = ksc.sc_ss;
2521
2522	if (p->p_md.md_flags & MDP_USEDFPU)
2523		npxsave_proc(p, 0);
2524
2525	if (ksc.sc_fpstate) {
2526		union savefpu *sfp = &p->p_addr->u_pcb.pcb_savefpu;
2527
2528		if ((error = copyin(ksc.sc_fpstate, sfp, sizeof(*sfp))))
2529			return (error);
2530		if (i386_use_fxsave)
2531			sfp->sv_xmm.sv_env.en_mxcsr &= fpu_mxcsr_mask;
2532		p->p_md.md_flags |= MDP_USEDFPU;
2533	}
2534
2535	p->p_sigmask = ksc.sc_mask & ~sigcantmask;
2536
2537	return (EJUSTRETURN);
2538}
2539
2540#ifdef MULTIPROCESSOR
2541/* force a CPU into the kernel, whether or not it's idle */
2542void
2543cpu_kick(struct cpu_info *ci)
2544{
2545	/* only need to kick other CPUs */
2546	if (ci != curcpu()) {
2547		if (cpu_mwait_size > 0) {
2548			/*
2549			 * If not idling, then send an IPI, else
2550			 * just clear the "keep idling" bit.
2551			 */
2552			if ((ci->ci_mwait & MWAIT_IN_IDLE) == 0)
2553				i386_send_ipi(ci, I386_IPI_NOP);
2554			else
2555				atomic_clearbits_int(&ci->ci_mwait,
2556				    MWAIT_KEEP_IDLING);
2557		} else {
2558			/* no mwait, so need an IPI */
2559			i386_send_ipi(ci, I386_IPI_NOP);
2560		}
2561	}
2562}
2563#endif
2564
2565/*
2566 * Notify the current process (p) that it has a signal pending,
2567 * process as soon as possible.
2568 */
2569void
2570signotify(struct proc *p)
2571{
2572	aston(p);
2573	cpu_kick(p->p_cpu);
2574}
2575
2576#ifdef MULTIPROCESSOR
2577void
2578cpu_unidle(struct cpu_info *ci)
2579{
2580	if (cpu_mwait_size > 0 && (ci->ci_mwait & MWAIT_ONLY)) {
2581		/*
2582		 * Just clear the "keep idling" bit; if it wasn't
2583		 * idling then we didn't need to do anything anyway.
2584		 */
2585		atomic_clearbits_int(&ci->ci_mwait, MWAIT_KEEP_IDLING);
2586		return;
2587	}
2588
2589	if (ci != curcpu())
2590		i386_send_ipi(ci, I386_IPI_NOP);
2591}
2592#endif
2593
2594int	waittime = -1;
2595struct pcb dumppcb;
2596
2597__dead void
2598boot(int howto)
2599{
2600	if ((howto & RB_POWERDOWN) != 0)
2601		lid_action = 0;
2602
2603	if ((howto & RB_RESET) != 0)
2604		goto doreset;
2605
2606	if (cold) {
2607		if ((howto & RB_USERREQ) == 0)
2608			howto |= RB_HALT;
2609		goto haltsys;
2610	}
2611
2612	boothowto = howto;
2613	if ((howto & RB_NOSYNC) == 0 && waittime < 0) {
2614		waittime = 0;
2615		vfs_shutdown(curproc);
2616
2617		if ((howto & RB_TIMEBAD) == 0) {
2618			resettodr();
2619		} else {
2620			printf("WARNING: not updating battery clock\n");
2621		}
2622	}
2623	if_downall();
2624
2625	uvm_shutdown();
2626	splhigh();
2627	cold = 1;
2628
2629	if ((howto & RB_DUMP) != 0)
2630		dumpsys();
2631
2632haltsys:
2633	config_suspend_all(DVACT_POWERDOWN);
2634
2635#ifdef MULTIPROCESSOR
2636	i386_broadcast_ipi(I386_IPI_HALT);
2637#endif
2638
2639	if ((howto & RB_HALT) != 0) {
2640#if NACPI > 0 && !defined(SMALL_KERNEL)
2641		extern int acpi_enabled;
2642
2643		if (acpi_enabled) {
2644			delay(500000);
2645			if ((howto & RB_POWERDOWN) != 0)
2646				acpi_powerdown();
2647		}
2648#endif
2649
2650#if NAPM > 0
2651		if ((howto & RB_POWERDOWN) != 0) {
2652			int rv;
2653
2654			printf("\nAttempting to power down...\n");
2655			/*
2656			 * Turn off, if we can.  But try to turn disk off and
2657			 * wait a bit first--some disk drives are slow to
2658			 * clean up and users have reported disk corruption.
2659			 *
2660			 * If apm_set_powstate() fails the first time, don't
2661			 * try to turn the system off.
2662			 */
2663			delay(500000);
2664			apm_set_powstate(APM_DEV_DISK(0xff), APM_SYS_OFF);
2665			delay(500000);
2666			rv = apm_set_powstate(APM_DEV_DISK(0xff), APM_SYS_OFF);
2667			if (rv == 0 || rv == ENXIO) {
2668				delay(500000);
2669				(void) apm_set_powstate(APM_DEV_ALLDEVS,
2670							APM_SYS_OFF);
2671			}
2672		}
2673#endif
2674		printf("\n");
2675		printf("The operating system has halted.\n");
2676		printf("Please press any key to reboot.\n\n");
2677		cnpollc(1);	/* for proper keyboard command handling */
2678		cngetc();
2679		cnpollc(0);
2680	}
2681
2682doreset:
2683	printf("rebooting...\n");
2684	cpu_reset();
2685	for (;;)
2686		continue;
2687	/* NOTREACHED */
2688}
2689
2690/*
2691 * This is called by configure to set dumplo and dumpsize.
2692 * Dumps always skip the first block of disk space
2693 * in case there might be a disk label stored there.
2694 * If there is extra space, put dump at the end to
2695 * reduce the chance that swapping trashes it.
2696 */
2697void
2698dumpconf(void)
2699{
2700	int nblks;	/* size of dump area */
2701	int i;
2702
2703	if (dumpdev == NODEV ||
2704	    (nblks = (bdevsw[major(dumpdev)].d_psize)(dumpdev)) == 0)
2705		return;
2706	if (nblks <= ctod(1))
2707		return;
2708
2709	/* Always skip the first block, in case there is a label there. */
2710	if (dumplo < ctod(1))
2711		dumplo = ctod(1);
2712
2713	for (i = 0; i < ndumpmem; i++)
2714		dumpsize = max(dumpsize, dumpmem[i].end);
2715
2716	/* Put dump at end of partition, and make it fit. */
2717	if (dumpsize > dtoc(nblks - dumplo - 1))
2718		dumpsize = dtoc(nblks - dumplo - 1);
2719	if (dumplo < nblks - ctod(dumpsize) - 1)
2720		dumplo = nblks - ctod(dumpsize) - 1;
2721}
2722
2723/*
2724 * cpu_dump: dump machine-dependent kernel core dump headers.
2725 */
2726int
2727cpu_dump(void)
2728{
2729	int (*dump)(dev_t, daddr_t, caddr_t, size_t);
2730	long buf[dbtob(1) / sizeof (long)];
2731	kcore_seg_t	*segp;
2732
2733	dump = bdevsw[major(dumpdev)].d_dump;
2734
2735	segp = (kcore_seg_t *)buf;
2736
2737	/*
2738	 * Generate a segment header.
2739	 */
2740	CORE_SETMAGIC(*segp, KCORE_MAGIC, MID_MACHINE, CORE_CPU);
2741	segp->c_size = dbtob(1) - ALIGN(sizeof(*segp));
2742
2743	return (dump(dumpdev, dumplo, (caddr_t)buf, dbtob(1)));
2744}
2745
2746/*
2747 * Doadump comes here after turning off memory management and
2748 * getting on the dump stack, either when called above, or by
2749 * the auto-restart code.
2750 */
2751static vaddr_t dumpspace;
2752
2753vaddr_t
2754reserve_dumppages(vaddr_t p)
2755{
2756
2757	dumpspace = p;
2758	return (p + PAGE_SIZE);
2759}
2760
2761void
2762dumpsys(void)
2763{
2764	u_int i, j, npg;
2765	int maddr;
2766	daddr_t blkno;
2767	int (*dump)(dev_t, daddr_t, caddr_t, size_t);
2768	int error;
2769	char *str;
2770	extern int msgbufmapped;
2771
2772	/* Save registers. */
2773	savectx(&dumppcb);
2774
2775	msgbufmapped = 0;	/* don't record dump msgs in msgbuf */
2776	if (dumpdev == NODEV)
2777		return;
2778
2779	/*
2780	 * For dumps during autoconfiguration,
2781	 * if dump device has already configured...
2782	 */
2783	if (dumpsize == 0)
2784		dumpconf();
2785	if (dumplo < 0)
2786		return;
2787	printf("\ndumping to dev %x, offset %ld\n", dumpdev, dumplo);
2788
2789	error = (*bdevsw[major(dumpdev)].d_psize)(dumpdev);
2790	printf("dump ");
2791	if (error == -1) {
2792		printf("area unavailable\n");
2793		return;
2794	}
2795
2796#if 0	/* XXX this doesn't work.  grr. */
2797	/* toss any characters present prior to dump */
2798	while (sget() != NULL); /*syscons and pccons differ */
2799#endif
2800
2801	/* scan through the dumpmem list */
2802	dump = bdevsw[major(dumpdev)].d_dump;
2803	error = cpu_dump();
2804	for (i = 0; !error && i < ndumpmem; i++) {
2805
2806		npg = dumpmem[i].end - dumpmem[i].start;
2807		maddr = ptoa(dumpmem[i].start);
2808		blkno = dumplo + btodb(maddr) + 1;
2809#if 0
2810		printf("(%d %lld %d) ", maddr, (long long)blkno, npg);
2811#endif
2812		for (j = npg; j--; maddr += NBPG, blkno += btodb(NBPG)) {
2813
2814			/* Print out how many MBs we have more to go. */
2815			if (dbtob(blkno - dumplo) % (1024 * 1024) < NBPG)
2816				printf("%ld ",
2817				    (ptoa(dumpsize) - maddr) / (1024 * 1024));
2818#if 0
2819			printf("(%x %lld) ", maddr, (long long)blkno);
2820#endif
2821			pmap_enter(pmap_kernel(), dumpspace, maddr,
2822			    PROT_READ, PMAP_WIRED);
2823			if ((error = (*dump)(dumpdev, blkno,
2824			    (caddr_t)dumpspace, NBPG)))
2825				break;
2826
2827#if 0	/* XXX this doesn't work.  grr. */
2828			/* operator aborting dump? */
2829			if (sget() != NULL) {
2830				error = EINTR;
2831				break;
2832			}
2833#endif
2834		}
2835	}
2836
2837	switch (error) {
2838
2839	case 0:		str = "succeeded\n\n";			break;
2840	case ENXIO:	str = "device bad\n\n";			break;
2841	case EFAULT:	str = "device not ready\n\n";		break;
2842	case EINVAL:	str = "area improper\n\n";		break;
2843	case EIO:	str = "i/o error\n\n";			break;
2844	case EINTR:	str = "aborted from console\n\n";	break;
2845	default:	str = "error %d\n\n";			break;
2846	}
2847	printf(str, error);
2848
2849	delay(5000000);		/* 5 seconds */
2850}
2851
2852/*
2853 * Clear registers on exec
2854 */
2855void
2856setregs(struct proc *p, struct exec_package *pack, u_long stack,
2857    struct ps_strings *arginfo)
2858{
2859	struct pcb *pcb = &p->p_addr->u_pcb;
2860	struct pmap *pmap = vm_map_pmap(&p->p_vmspace->vm_map);
2861	struct trapframe *tf = p->p_md.md_regs;
2862
2863#if NNPX > 0
2864	/* If we were using the FPU, forget about it. */
2865	if (pcb->pcb_fpcpu != NULL)
2866		npxsave_proc(p, 0);
2867	p->p_md.md_flags &= ~MDP_USEDFPU;
2868#endif
2869
2870	initcodesegment(&pmap->pm_codeseg);
2871	setsegment(&pcb->pcb_threadsegs[TSEG_FS], 0,
2872	    atop(VM_MAXUSER_ADDRESS) - 1, SDT_MEMRWA, SEL_UPL, 1, 1);
2873	setsegment(&pcb->pcb_threadsegs[TSEG_GS], 0,
2874	    atop(VM_MAXUSER_ADDRESS) - 1, SDT_MEMRWA, SEL_UPL, 1, 1);
2875
2876	/*
2877	 * And update the GDT since we return to the user process
2878	 * by leaving the syscall (we don't do another pmap_activate()).
2879	 */
2880	curcpu()->ci_gdt[GUCODE_SEL].sd = pmap->pm_codeseg;
2881	curcpu()->ci_gdt[GUFS_SEL].sd = pcb->pcb_threadsegs[TSEG_FS];
2882	curcpu()->ci_gdt[GUGS_SEL].sd = pcb->pcb_threadsegs[TSEG_GS];
2883
2884	/*
2885	 * And reset the hiexec marker in the pmap.
2886	 */
2887	pmap->pm_hiexec = 0;
2888
2889	tf->tf_fs = GSEL(GUFS_SEL, SEL_UPL);
2890	tf->tf_gs = GSEL(GUGS_SEL, SEL_UPL);
2891	tf->tf_es = GSEL(GUDATA_SEL, SEL_UPL);
2892	tf->tf_ds = GSEL(GUDATA_SEL, SEL_UPL);
2893	tf->tf_edi = 0;
2894	tf->tf_esi = 0;
2895	tf->tf_ebp = 0;
2896	tf->tf_ebx = (int)p->p_p->ps_strings;
2897	tf->tf_edx = 0;
2898	tf->tf_ecx = 0;
2899	tf->tf_eax = 0;
2900	tf->tf_eip = pack->ep_entry;
2901	tf->tf_cs = GSEL(GUCODE_SEL, SEL_UPL);
2902	tf->tf_eflags = PSL_USERSET;
2903	tf->tf_esp = stack;
2904	tf->tf_ss = GSEL(GUDATA_SEL, SEL_UPL);
2905}
2906
2907/*
2908 * Initialize segments and descriptor tables
2909 */
2910
2911/* IDT is now a full page, so we can map it in u-k */
2912union {
2913	struct gate_descriptor	idt[NIDT];
2914	char			align[PAGE_SIZE];
2915} _idt_region __aligned(PAGE_SIZE);
2916#define idt_region _idt_region.idt
2917struct gate_descriptor *idt = idt_region;
2918
2919extern  struct user *proc0paddr;
2920
2921void
2922setgate(struct gate_descriptor *gd, void *func, int args, int type, int dpl,
2923    int seg)
2924{
2925
2926	gd->gd_looffset = (int)func;
2927	gd->gd_selector = GSEL(seg, SEL_KPL);
2928	gd->gd_stkcpy = args;
2929	gd->gd_xx = 0;
2930	gd->gd_type = type;
2931	gd->gd_dpl = dpl;
2932	gd->gd_p = 1;
2933	gd->gd_hioffset = (int)func >> 16;
2934}
2935
2936void
2937unsetgate(struct gate_descriptor *gd)
2938{
2939	gd->gd_p = 0;
2940	gd->gd_hioffset = 0;
2941	gd->gd_looffset = 0;
2942	gd->gd_selector = 0;
2943	gd->gd_xx = 0;
2944	gd->gd_stkcpy = 0;
2945	gd->gd_type = 0;
2946	gd->gd_dpl = 0;
2947}
2948
2949void
2950setregion(struct region_descriptor *rd, void *base, size_t limit)
2951{
2952	rd->rd_limit = (int)limit;
2953	rd->rd_base = (int)base;
2954}
2955
2956void
2957initcodesegment(struct segment_descriptor *cs)
2958{
2959	if (cpu_pae) {
2960		/*
2961		 * When code execution is managed using NX feature
2962		 * in pmapae.c, GUCODE_SEL should cover userland.
2963		 */
2964		setsegment(cs, 0, atop(VM_MAXUSER_ADDRESS - 1),
2965		    SDT_MEMERA, SEL_UPL, 1, 1);
2966	} else {
2967		/*
2968		 * For pmap.c's non-PAE/NX line-in-the-sand execution, reset
2969		 * the code segment limit to I386_MAX_EXE_ADDR in the pmap;
2970		 * this gets copied into the GDT for GUCODE_SEL by
2971		 * pmap_activate().  Similarly, reset the base of each of
2972		 * the two thread data segments to zero in the pcb; they'll
2973		 * get copied into the GDT for GUFS_SEL and GUGS_SEL.
2974		 */
2975		setsegment(cs, 0, atop(I386_MAX_EXE_ADDR - 1),
2976		    SDT_MEMERA, SEL_UPL, 1, 1);
2977	}
2978}
2979
2980void
2981setsegment(struct segment_descriptor *sd, void *base, size_t limit, int type,
2982    int dpl, int def32, int gran)
2983{
2984
2985	sd->sd_lolimit = (int)limit;
2986	sd->sd_lobase = (int)base;
2987	sd->sd_type = type;
2988	sd->sd_dpl = dpl;
2989	sd->sd_p = 1;
2990	sd->sd_hilimit = (int)limit >> 16;
2991	sd->sd_xx = 0;
2992	sd->sd_def32 = def32;
2993	sd->sd_gran = gran;
2994	sd->sd_hibase = (int)base >> 24;
2995}
2996
2997#define	IDTVEC(name)	__CONCAT(X, name)
2998extern int IDTVEC(div), IDTVEC(dbg), IDTVEC(nmi), IDTVEC(bpt), IDTVEC(ofl),
2999    IDTVEC(bnd), IDTVEC(ill), IDTVEC(dna), IDTVEC(dble), IDTVEC(fpusegm),
3000    IDTVEC(tss), IDTVEC(missing), IDTVEC(stk), IDTVEC(prot), IDTVEC(page),
3001    IDTVEC(rsvd), IDTVEC(fpu), IDTVEC(align), IDTVEC(syscall), IDTVEC(mchk),
3002    IDTVEC(simd);
3003
3004extern int IDTVEC(f00f_redirect);
3005
3006int cpu_f00f_bug = 0;
3007
3008void
3009fix_f00f(void)
3010{
3011	struct region_descriptor region;
3012	vaddr_t va;
3013	paddr_t pa;
3014	void *p;
3015
3016	/* Allocate two new pages */
3017	va = (vaddr_t)km_alloc(NBPG*2, &kv_any, &kp_zero, &kd_waitok);
3018	p = (void *)(va + NBPG - 7*sizeof(*idt));
3019
3020	/* Copy over old IDT */
3021	bcopy(idt, p, sizeof(idt_region));
3022	idt = p;
3023
3024	/* Fix up paging redirect */
3025	setgate(&idt[ 14], &IDTVEC(f00f_redirect), 0, SDT_SYS386IGT, SEL_KPL,
3026	    GCODE_SEL);
3027
3028	/* Map first page RO */
3029	pmap_pte_setbits(va, 0, PG_RW);
3030
3031	/* add k-u read-only mappings XXX old IDT stays in place */
3032	/* XXX hshoexer: are f00f affected CPUs affected by meltdown? */
3033	pmap_extract(pmap_kernel(), va, &pa);
3034	pmap_enter_special(va, pa, PROT_READ, 0);
3035	pmap_extract(pmap_kernel(), va + PAGE_SIZE, &pa);
3036	pmap_enter_special(va + PAGE_SIZE, pa, PROT_READ, 0);
3037
3038	/* Reload idtr */
3039	setregion(&region, idt, NIDT * sizeof(idt[0]) - 1);
3040	lidt(&region);
3041
3042	/* Tell the rest of the world */
3043	cpu_f00f_bug = 1;
3044}
3045
3046#ifdef MULTIPROCESSOR
3047void
3048cpu_init_idt(void)
3049{
3050	struct region_descriptor region;
3051	setregion(&region, idt, NIDT * sizeof(idt[0]) - 1);
3052	lidt(&region);
3053}
3054#endif /* MULTIPROCESSOR */
3055
3056void
3057init386(paddr_t first_avail)
3058{
3059	int i, kb;
3060	struct region_descriptor region;
3061	bios_memmap_t *im;
3062
3063	proc0.p_addr = proc0paddr;
3064	cpu_info_primary.ci_self = &cpu_info_primary;
3065	cpu_info_primary.ci_curpcb = &proc0.p_addr->u_pcb;
3066	cpu_info_primary.ci_tss = &cpu_info_full_primary.cif_tss;
3067	cpu_info_primary.ci_nmi_tss = &cpu_info_full_primary.cif_nmi_tss;
3068	cpu_info_primary.ci_gdt = (void *)&cpu_info_full_primary.cif_gdt;
3069
3070	/* make bootstrap gdt gates and memory segments */
3071	setsegment(&cpu_info_primary.ci_gdt[GCODE_SEL].sd, 0, 0xfffff,
3072	    SDT_MEMERA, SEL_KPL, 1, 1);
3073	setsegment(&cpu_info_primary.ci_gdt[GICODE_SEL].sd, 0, 0xfffff,
3074	    SDT_MEMERA, SEL_KPL, 1, 1);
3075	setsegment(&cpu_info_primary.ci_gdt[GDATA_SEL].sd, 0, 0xfffff,
3076	    SDT_MEMRWA, SEL_KPL, 1, 1);
3077	setsegment(&cpu_info_primary.ci_gdt[GUCODE_SEL].sd, 0,
3078	    atop(I386_MAX_EXE_ADDR) - 1, SDT_MEMERA, SEL_UPL, 1, 1);
3079	setsegment(&cpu_info_primary.ci_gdt[GUDATA_SEL].sd, 0,
3080	    atop(VM_MAXUSER_ADDRESS) - 1, SDT_MEMRWA, SEL_UPL, 1, 1);
3081	setsegment(&cpu_info_primary.ci_gdt[GCPU_SEL].sd, &cpu_info_primary,
3082	    sizeof(struct cpu_info)-1, SDT_MEMRWA, SEL_KPL, 0, 0);
3083	setsegment(&cpu_info_primary.ci_gdt[GUFS_SEL].sd, 0,
3084	    atop(VM_MAXUSER_ADDRESS) - 1, SDT_MEMRWA, SEL_UPL, 1, 1);
3085	setsegment(&cpu_info_primary.ci_gdt[GUGS_SEL].sd, 0,
3086	    atop(VM_MAXUSER_ADDRESS) - 1, SDT_MEMRWA, SEL_UPL, 1, 1);
3087	setsegment(&cpu_info_primary.ci_gdt[GTSS_SEL].sd,
3088	    cpu_info_primary.ci_tss, sizeof(struct i386tss)-1,
3089	    SDT_SYS386TSS, SEL_KPL, 0, 0);
3090	setsegment(&cpu_info_primary.ci_gdt[GNMITSS_SEL].sd,
3091	    cpu_info_primary.ci_nmi_tss, sizeof(struct i386tss)-1,
3092	    SDT_SYS386TSS, SEL_KPL, 0, 0);
3093
3094	/* exceptions */
3095	setgate(&idt[  0], &IDTVEC(div),     0, SDT_SYS386IGT, SEL_KPL, GCODE_SEL);
3096	setgate(&idt[  1], &IDTVEC(dbg),     0, SDT_SYS386IGT, SEL_KPL, GCODE_SEL);
3097	setgate(&idt[  2], NULL,             0, SDT_SYSTASKGT, SEL_KPL, GNMITSS_SEL);
3098	setgate(&idt[  3], &IDTVEC(bpt),     0, SDT_SYS386IGT, SEL_UPL, GCODE_SEL);
3099	setgate(&idt[  4], &IDTVEC(ofl),     0, SDT_SYS386IGT, SEL_UPL, GCODE_SEL);
3100	setgate(&idt[  5], &IDTVEC(bnd),     0, SDT_SYS386IGT, SEL_KPL, GCODE_SEL);
3101	setgate(&idt[  6], &IDTVEC(ill),     0, SDT_SYS386IGT, SEL_KPL, GCODE_SEL);
3102	setgate(&idt[  7], &IDTVEC(dna),     0, SDT_SYS386IGT, SEL_KPL, GCODE_SEL);
3103	setgate(&idt[  8], &IDTVEC(dble),    0, SDT_SYS386IGT, SEL_KPL, GCODE_SEL);
3104	setgate(&idt[  9], &IDTVEC(fpusegm), 0, SDT_SYS386IGT, SEL_KPL, GCODE_SEL);
3105	setgate(&idt[ 10], &IDTVEC(tss),     0, SDT_SYS386IGT, SEL_KPL, GCODE_SEL);
3106	setgate(&idt[ 11], &IDTVEC(missing), 0, SDT_SYS386IGT, SEL_KPL, GCODE_SEL);
3107	setgate(&idt[ 12], &IDTVEC(stk),     0, SDT_SYS386IGT, SEL_KPL, GCODE_SEL);
3108	setgate(&idt[ 13], &IDTVEC(prot),    0, SDT_SYS386IGT, SEL_KPL, GCODE_SEL);
3109	setgate(&idt[ 14], &IDTVEC(page),    0, SDT_SYS386IGT, SEL_KPL, GCODE_SEL);
3110	setgate(&idt[ 15], &IDTVEC(rsvd),    0, SDT_SYS386IGT, SEL_KPL, GCODE_SEL);
3111	setgate(&idt[ 16], &IDTVEC(fpu),     0, SDT_SYS386IGT, SEL_KPL, GCODE_SEL);
3112	setgate(&idt[ 17], &IDTVEC(align),   0, SDT_SYS386IGT, SEL_KPL, GCODE_SEL);
3113	setgate(&idt[ 18], &IDTVEC(mchk),    0, SDT_SYS386IGT, SEL_KPL, GCODE_SEL);
3114	setgate(&idt[ 19], &IDTVEC(simd),    0, SDT_SYS386IGT, SEL_KPL, GCODE_SEL);
3115	for (i = 20; i < NRSVIDT; i++)
3116		setgate(&idt[i], &IDTVEC(rsvd), 0, SDT_SYS386IGT, SEL_KPL, GCODE_SEL);
3117	for (i = NRSVIDT; i < NIDT; i++)
3118		unsetgate(&idt[i]);
3119	setgate(&idt[128], &IDTVEC(syscall), 0, SDT_SYS386IGT, SEL_UPL, GCODE_SEL);
3120
3121	setregion(&region, cpu_info_primary.ci_gdt, GDT_SIZE - 1);
3122	lgdt(&region);
3123	setregion(&region, idt, NIDT * sizeof(idt[0]) - 1);
3124	lidt(&region);
3125
3126	/*
3127	 * Initialize the I/O port and I/O mem extent maps.
3128	 * Note: we don't have to check the return value since
3129	 * creation of a fixed extent map will never fail (since
3130	 * descriptor storage has already been allocated).
3131	 *
3132	 * N.B. The iomem extent manages _all_ physical addresses
3133	 * on the machine.  When the amount of RAM is found, the two
3134	 * extents of RAM are allocated from the map (0 -> ISA hole
3135	 * and end of ISA hole -> end of RAM).
3136	 */
3137	ioport_ex = extent_create("ioport", 0x0, 0xffff, M_DEVBUF,
3138	    (caddr_t)ioport_ex_storage, sizeof(ioport_ex_storage),
3139	    EX_NOCOALESCE|EX_NOWAIT);
3140	iomem_ex = extent_create("iomem", 0x0, 0xffffffff, M_DEVBUF,
3141	    (caddr_t)iomem_ex_storage, sizeof(iomem_ex_storage),
3142	    EX_NOCOALESCE|EX_NOWAIT);
3143
3144#if NISA > 0
3145	isa_defaultirq();
3146#endif
3147
3148	/*
3149	 * Attach the glass console early in case we need to display a panic.
3150	 */
3151	cninit();
3152
3153	/*
3154	 * Saving SSE registers won't work if the save area isn't
3155	 * 16-byte aligned.
3156	 */
3157	if (offsetof(struct user, u_pcb.pcb_savefpu) & 0xf)
3158		panic("init386: pcb_savefpu not 16-byte aligned");
3159
3160	/* call pmap initialization to make new kernel address space */
3161	pmap_bootstrap((vaddr_t)atdevbase + IOM_SIZE);
3162
3163	/*
3164	 * Boot arguments are in a single page specified by /boot.
3165	 *
3166	 * We require the "new" vector form, as well as memory ranges
3167	 * to be given in bytes rather than KB.
3168	 */
3169	if ((bootapiver & (BAPIV_VECTOR | BAPIV_BMEMMAP)) ==
3170	    (BAPIV_VECTOR | BAPIV_BMEMMAP)) {
3171		if (bootargc > NBPG)
3172			panic("too many boot args");
3173
3174		if (extent_alloc_region(iomem_ex, (paddr_t)bootargv, bootargc,
3175		    EX_NOWAIT))
3176			panic("cannot reserve /boot args memory");
3177
3178		pmap_enter(pmap_kernel(), (vaddr_t)bootargp, (paddr_t)bootargv,
3179		    PROT_READ | PROT_WRITE,
3180		    PROT_READ | PROT_WRITE | PMAP_WIRED);
3181
3182		bios_getopt();
3183
3184	} else
3185		panic("/boot too old: upgrade!");
3186
3187#ifdef DIAGNOSTIC
3188	if (bios_memmap == NULL)
3189		panic("no BIOS memory map supplied");
3190#endif
3191
3192	/*
3193	 * account all the memory passed in the map from /boot
3194	 * calculate avail_end and count the physmem.
3195	 */
3196	avail_end = 0;
3197	physmem = 0;
3198#ifdef DEBUG
3199	printf("memmap:");
3200#endif
3201	for(i = 0, im = bios_memmap; im->type != BIOS_MAP_END; im++)
3202		if (im->type == BIOS_MAP_FREE) {
3203			paddr_t a, e;
3204#ifdef DEBUG
3205			printf(" %llx-%llx", im->addr, im->addr + im->size);
3206#endif
3207
3208			if (im->addr >= 0x100000000ULL) {
3209#ifdef DEBUG
3210				printf("-H");
3211#endif
3212				continue;
3213			}
3214
3215			a = round_page(im->addr);
3216			if (im->addr + im->size <= 0xfffff000ULL)
3217				e = trunc_page(im->addr + im->size);
3218			else {
3219#ifdef DEBUG
3220				printf("-T");
3221#endif
3222				e = 0xfffff000;
3223			}
3224
3225			/* skip first 16 pages due to SMI corruption */
3226			if (a < 16 * NBPG)
3227				a = 16 * NBPG;
3228
3229#ifdef MULTIPROCESSOR
3230			/* skip MP trampoline code page */
3231			if (a < MP_TRAMPOLINE + NBPG)
3232				a = MP_TRAMPOLINE + NBPG;
3233
3234			/* skip MP trampoline data page */
3235			if (a < MP_TRAMP_DATA + NBPG)
3236				a = MP_TRAMP_DATA + NBPG;
3237#endif /* MULTIPROCESSOR */
3238
3239#if NACPI > 0 && !defined(SMALL_KERNEL)
3240			/* skip ACPI resume trampoline code page */
3241			if (a < ACPI_TRAMPOLINE + NBPG)
3242				a = ACPI_TRAMPOLINE + NBPG;
3243
3244			/* skip ACPI resume trampoline data page */
3245			if (a < ACPI_TRAMP_DATA + NBPG)
3246				a = ACPI_TRAMP_DATA + NBPG;
3247#endif /* ACPI */
3248
3249#ifdef HIBERNATE
3250			/* skip hibernate reserved pages */
3251			if (a < HIBERNATE_HIBALLOC_PAGE + PAGE_SIZE)
3252				a = HIBERNATE_HIBALLOC_PAGE + PAGE_SIZE;
3253#endif /* HIBERNATE */
3254
3255			/* skip shorter than page regions */
3256			if (a >= e || (e - a) < NBPG) {
3257#ifdef DEBUG
3258				printf("-S");
3259#endif
3260				continue;
3261			}
3262
3263			/*
3264			 * XXX Some buggy ACPI BIOSes use memory that
3265			 * they declare as free. Current worst offender
3266			 * is Supermicro 5019D-FTN4.  Typically the
3267			 * affected memory areas are small blocks
3268			 * between areas reserved for ACPI and other
3269			 * BIOS goo.  So skip areas smaller than 32 MB
3270			 * above the 16 MB boundary (to avoid
3271			 * affecting legacy stuff).
3272			 */
3273			if (a > 16*1024*1024 && (e - a) < 32*1024*1024) {
3274#ifdef DEBUG
3275				printf("-X");
3276#endif
3277				continue;
3278			}
3279
3280			/* skip legacy IO region */
3281			if ((a > IOM_BEGIN && a < IOM_END) ||
3282			    (e > IOM_BEGIN && e < IOM_END)) {
3283#ifdef DEBUG
3284				printf("-I");
3285#endif
3286				continue;
3287			}
3288
3289			if (extent_alloc_region(iomem_ex, a, e - a, EX_NOWAIT))
3290				/* XXX What should we do? */
3291				printf("\nWARNING: CAN'T ALLOCATE RAM (%lx-%lx)"
3292				    " FROM IOMEM EXTENT MAP!\n", a, e);
3293
3294			physmem += atop(e - a);
3295			dumpmem[i].start = atop(a);
3296			dumpmem[i].end = atop(e);
3297			i++;
3298			avail_end = max(avail_end, e);
3299		}
3300
3301	ndumpmem = i;
3302	avail_end -= round_page(MSGBUFSIZE);
3303
3304#ifdef DEBUG
3305	printf(": %lx\n", avail_end);
3306#endif
3307	if (physmem < atop(4 * 1024 * 1024)) {
3308		printf("\awarning: too little memory available;"
3309		    "running in degraded mode\npress a key to confirm\n\n");
3310		cnpollc(1);
3311		cngetc();
3312		cnpollc(0);
3313	}
3314
3315#ifdef DEBUG
3316	printf("physload: ");
3317#endif
3318	kb = atop(KERNTEXTOFF - KERNBASE);
3319	if (kb > atop(IOM_END)) {
3320		paddr_t lim = atop(IOM_END);
3321#ifdef DEBUG
3322		printf(" %lx-%x (<16M)", lim, kb);
3323#endif
3324		uvm_page_physload(lim, kb, lim, kb, 0);
3325	}
3326
3327	for (i = 0; i < ndumpmem; i++) {
3328		paddr_t a, e;
3329
3330		a = dumpmem[i].start;
3331		e = dumpmem[i].end;
3332		if (a < atop(first_avail) && e > atop(first_avail))
3333			a = atop(first_avail);
3334		if (e > atop(avail_end))
3335			e = atop(avail_end);
3336
3337		if (a < e) {
3338#ifdef DEBUG
3339				printf(" %lx-%lx", a, e);
3340#endif
3341				uvm_page_physload(a, e, a, e, 0);
3342		}
3343	}
3344#ifdef DEBUG
3345	printf("\n");
3346#endif
3347
3348	tlbflush();
3349#if 0
3350#if NISADMA > 0
3351	/*
3352	 * Some motherboards/BIOSes remap the 384K of RAM that would
3353	 * normally be covered by the ISA hole to the end of memory
3354	 * so that it can be used.  However, on a 16M system, this
3355	 * would cause bounce buffers to be allocated and used.
3356	 * This is not desirable behaviour, as more than 384K of
3357	 * bounce buffers might be allocated.  As a work-around,
3358	 * we round memory down to the nearest 1M boundary if
3359	 * we're using any isadma devices and the remapped memory
3360	 * is what puts us over 16M.
3361	 */
3362	if (extmem > (15*1024) && extmem < (16*1024)) {
3363		printf("Warning: ignoring %dk of remapped memory\n",
3364		    extmem - (15*1024));
3365		extmem = (15*1024);
3366	}
3367#endif
3368#endif
3369
3370#ifdef DDB
3371	db_machine_init();
3372	ddb_init();
3373	if (boothowto & RB_KDB)
3374		db_enter();
3375#endif
3376
3377	softintr_init();
3378}
3379
3380/*
3381 * consinit:
3382 * initialize the system console.
3383 */
3384void
3385consinit(void)
3386{
3387	/* Already done in init386(). */
3388}
3389
3390void
3391cpu_reset(void)
3392{
3393	struct region_descriptor region;
3394
3395	intr_disable();
3396
3397	if (cpuresetfn)
3398		(*cpuresetfn)();
3399
3400	/*
3401	 * The keyboard controller has 4 random output pins, one of which is
3402	 * connected to the RESET pin on the CPU in many PCs.  We tell the
3403	 * keyboard controller to pulse this line a couple of times.
3404	 */
3405	outb(IO_KBD + KBCMDP, KBC_PULSE0);
3406	delay(100000);
3407	outb(IO_KBD + KBCMDP, KBC_PULSE0);
3408	delay(100000);
3409
3410	/*
3411	 * Try to cause a triple fault and watchdog reset by setting the
3412	 * IDT to point to nothing.
3413	 */
3414	bzero((caddr_t)idt, sizeof(idt_region));
3415	setregion(&region, idt, NIDT * sizeof(idt[0]) - 1);
3416	lidt(&region);
3417	__asm volatile("divl %0,%1" : : "q" (0), "a" (0));
3418
3419	/*
3420	 * Try to cause a triple fault and watchdog reset by unmapping the
3421	 * entire address space.
3422	 */
3423	bzero((caddr_t)PTD, NBPG);
3424	tlbflush();
3425
3426	for (;;)
3427		continue;
3428	/* NOTREACHED */
3429}
3430
3431void
3432cpu_initclocks(void)
3433{
3434	(*initclock_func)();		/* lapic or i8254 */
3435}
3436
3437void
3438cpu_startclock(void)
3439{
3440	(*startclock_func)();
3441}
3442
3443void
3444need_resched(struct cpu_info *ci)
3445{
3446	ci->ci_want_resched = 1;
3447
3448	/* There's a risk we'll be called before the idle threads start */
3449	if (ci->ci_curproc) {
3450		aston(ci->ci_curproc);
3451		cpu_kick(ci);
3452	}
3453}
3454
3455/* Allocate an IDT vector slot within the given range.
3456 * XXX needs locking to avoid MP allocation races.
3457 */
3458
3459int
3460idt_vec_alloc(int low, int high)
3461{
3462	int vec;
3463
3464	for (vec = low; vec <= high; vec++)
3465		if (idt[vec].gd_p == 0)
3466			return (vec);
3467	return (0);
3468}
3469
3470void
3471idt_vec_set(int vec, void (*function)(void))
3472{
3473	setgate(&idt[vec], function, 0, SDT_SYS386IGT, SEL_KPL, GICODE_SEL);
3474}
3475
3476void
3477idt_vec_free(int vec)
3478{
3479	unsetgate(&idt[vec]);
3480}
3481
3482const struct sysctl_bounded_args cpuctl_vars[] = {
3483	{ CPU_LIDACTION, &lid_action, 0, 2 },
3484	{ CPU_CPUID, &cpu_id, SYSCTL_INT_READONLY },
3485	{ CPU_OSFXSR, &i386_use_fxsave, SYSCTL_INT_READONLY },
3486	{ CPU_SSE, &i386_has_sse, SYSCTL_INT_READONLY },
3487	{ CPU_SSE2, &i386_has_sse2, SYSCTL_INT_READONLY },
3488	{ CPU_XCRYPT, &i386_has_xcrypt, SYSCTL_INT_READONLY },
3489};
3490
3491/*
3492 * machine dependent system variables.
3493 */
3494int
3495cpu_sysctl(int *name, u_int namelen, void *oldp, size_t *oldlenp, void *newp,
3496    size_t newlen, struct proc *p)
3497{
3498	dev_t dev;
3499
3500	switch (name[0]) {
3501	case CPU_CONSDEV:
3502		if (namelen != 1)
3503			return (ENOTDIR);		/* overloaded */
3504
3505		if (cn_tab != NULL)
3506			dev = cn_tab->cn_dev;
3507		else
3508			dev = NODEV;
3509		return sysctl_rdstruct(oldp, oldlenp, newp, &dev, sizeof(dev));
3510#if NBIOS > 0
3511	case CPU_BIOS:
3512		return bios_sysctl(name + 1, namelen - 1, oldp, oldlenp,
3513		    newp, newlen, p);
3514#endif
3515	case CPU_BLK2CHR:
3516		if (namelen != 2)
3517			return (ENOTDIR);		/* overloaded */
3518		dev = blktochr((dev_t)name[1]);
3519		return sysctl_rdstruct(oldp, oldlenp, newp, &dev, sizeof(dev));
3520	case CPU_CHR2BLK:
3521		if (namelen != 2)
3522			return (ENOTDIR);		/* overloaded */
3523		dev = chrtoblk((dev_t)name[1]);
3524		return sysctl_rdstruct(oldp, oldlenp, newp, &dev, sizeof(dev));
3525	case CPU_ALLOWAPERTURE:
3526#ifdef APERTURE
3527		if (securelevel > 0)
3528			return (sysctl_int_lower(oldp, oldlenp, newp, newlen,
3529			    &allowaperture));
3530		else
3531			return (sysctl_int(oldp, oldlenp, newp, newlen,
3532			    &allowaperture));
3533#else
3534		return (sysctl_rdint(oldp, oldlenp, newp, 0));
3535#endif
3536	case CPU_CPUVENDOR:
3537		return (sysctl_rdstring(oldp, oldlenp, newp, cpu_vendor));
3538	case CPU_CPUFEATURE:
3539		return (sysctl_rdint(oldp, oldlenp, newp, curcpu()->ci_feature_flags));
3540	case CPU_KBDRESET:
3541		return (sysctl_securelevel_int(oldp, oldlenp, newp, newlen,
3542		    &kbd_reset));
3543#if NPCKBC > 0 && NUKBD > 0
3544	case CPU_FORCEUKBD:
3545		{
3546		int error;
3547
3548		if (forceukbd)
3549			return (sysctl_rdint(oldp, oldlenp, newp, forceukbd));
3550
3551		error = sysctl_int(oldp, oldlenp, newp, newlen, &forceukbd);
3552		if (forceukbd)
3553			pckbc_release_console();
3554		return (error);
3555		}
3556#endif
3557	default:
3558		return (sysctl_bounded_arr(cpuctl_vars, nitems(cpuctl_vars),
3559		    name, namelen, oldp, oldlenp, newp, newlen));
3560	}
3561	/* NOTREACHED */
3562}
3563
3564int
3565bus_space_map(bus_space_tag_t t, bus_addr_t bpa, bus_size_t size, int flags,
3566    bus_space_handle_t *bshp)
3567{
3568	int error;
3569	struct extent *ex;
3570
3571	/*
3572	 * Pick the appropriate extent map.
3573	 */
3574	if (t == I386_BUS_SPACE_IO) {
3575		ex = ioport_ex;
3576		if (flags & BUS_SPACE_MAP_LINEAR)
3577			return (EINVAL);
3578	} else if (t == I386_BUS_SPACE_MEM) {
3579		ex = iomem_ex;
3580	} else {
3581		panic("bus_space_map: bad bus space tag");
3582	}
3583
3584	/*
3585	 * Before we go any further, let's make sure that this
3586	 * region is available.
3587	 */
3588	error = extent_alloc_region(ex, bpa, size,
3589	    EX_NOWAIT | (ioport_malloc_safe ? EX_MALLOCOK : 0));
3590	if (error)
3591		return (error);
3592
3593	/*
3594	 * For I/O space, that's all she wrote.
3595	 */
3596	if (t == I386_BUS_SPACE_IO) {
3597		*bshp = bpa;
3598		return (0);
3599	}
3600
3601	if (IOM_BEGIN <= bpa && bpa <= IOM_END) {
3602		*bshp = (bus_space_handle_t)ISA_HOLE_VADDR(bpa);
3603		return (0);
3604	}
3605
3606	/*
3607	 * For memory space, map the bus physical address to
3608	 * a kernel virtual address.
3609	 */
3610	error = bus_mem_add_mapping(bpa, size, flags, bshp);
3611	if (error) {
3612		if (extent_free(ex, bpa, size, EX_NOWAIT |
3613		    (ioport_malloc_safe ? EX_MALLOCOK : 0))) {
3614			printf("bus_space_map: pa 0x%lx, size 0x%lx\n",
3615			    bpa, size);
3616			printf("bus_space_map: can't free region\n");
3617		}
3618	}
3619
3620	return (error);
3621}
3622
3623int
3624_bus_space_map(bus_space_tag_t t, bus_addr_t bpa, bus_size_t size,
3625    int flags, bus_space_handle_t *bshp)
3626{
3627	/*
3628	 * For I/O space, that's all she wrote.
3629	 */
3630	if (t == I386_BUS_SPACE_IO) {
3631		*bshp = bpa;
3632		return (0);
3633	}
3634
3635	/*
3636	 * For memory space, map the bus physical address to
3637	 * a kernel virtual address.
3638	 */
3639	return (bus_mem_add_mapping(bpa, size, flags, bshp));
3640}
3641
3642int
3643bus_space_alloc(bus_space_tag_t t, bus_addr_t rstart, bus_addr_t rend,
3644    bus_size_t size, bus_size_t alignment, bus_size_t boundary,
3645    int flags, bus_addr_t *bpap, bus_space_handle_t *bshp)
3646{
3647	struct extent *ex;
3648	u_long bpa;
3649	int error;
3650
3651	/*
3652	 * Pick the appropriate extent map.
3653	 */
3654	if (t == I386_BUS_SPACE_IO) {
3655		ex = ioport_ex;
3656	} else if (t == I386_BUS_SPACE_MEM) {
3657		ex = iomem_ex;
3658	} else {
3659		panic("bus_space_alloc: bad bus space tag");
3660	}
3661
3662	/*
3663	 * Sanity check the allocation against the extent's boundaries.
3664	 */
3665	if (rstart < ex->ex_start || rend > ex->ex_end)
3666		panic("bus_space_alloc: bad region start/end");
3667
3668	/*
3669	 * Do the requested allocation.
3670	 */
3671	error = extent_alloc_subregion(ex, rstart, rend, size, alignment, 0,
3672	    boundary, EX_NOWAIT | (ioport_malloc_safe ?  EX_MALLOCOK : 0),
3673	    &bpa);
3674
3675	if (error)
3676		return (error);
3677
3678	/*
3679	 * For I/O space, that's all she wrote.
3680	 */
3681	if (t == I386_BUS_SPACE_IO) {
3682		*bshp = *bpap = bpa;
3683		return (0);
3684	}
3685
3686	/*
3687	 * For memory space, map the bus physical address to
3688	 * a kernel virtual address.
3689	 */
3690	error = bus_mem_add_mapping(bpa, size, flags, bshp);
3691	if (error) {
3692		if (extent_free(iomem_ex, bpa, size, EX_NOWAIT |
3693		    (ioport_malloc_safe ? EX_MALLOCOK : 0))) {
3694			printf("bus_space_alloc: pa 0x%lx, size 0x%lx\n",
3695			    bpa, size);
3696			printf("bus_space_alloc: can't free region\n");
3697		}
3698	}
3699
3700	*bpap = bpa;
3701
3702	return (error);
3703}
3704
3705int
3706bus_mem_add_mapping(bus_addr_t bpa, bus_size_t size, int flags,
3707    bus_space_handle_t *bshp)
3708{
3709	paddr_t pa, endpa;
3710	vaddr_t va;
3711	bus_size_t map_size;
3712	int pmap_flags = PMAP_NOCACHE;
3713
3714	pa = trunc_page(bpa);
3715	endpa = round_page(bpa + size);
3716
3717#ifdef DIAGNOSTIC
3718	if (endpa <= pa && endpa != 0)
3719		panic("bus_mem_add_mapping: overflow");
3720#endif
3721
3722	map_size = endpa - pa;
3723
3724	va = (vaddr_t)km_alloc(map_size, &kv_any, &kp_none, &kd_nowait);
3725	if (va == 0)
3726		return (ENOMEM);
3727
3728	*bshp = (bus_space_handle_t)(va + (bpa & PGOFSET));
3729
3730	if (flags & BUS_SPACE_MAP_CACHEABLE)
3731		pmap_flags = 0;
3732	else if (flags & BUS_SPACE_MAP_PREFETCHABLE)
3733		pmap_flags = PMAP_WC;
3734
3735	for (; map_size > 0;
3736	    pa += PAGE_SIZE, va += PAGE_SIZE, map_size -= PAGE_SIZE)
3737		pmap_kenter_pa(va, pa | pmap_flags,
3738		    PROT_READ | PROT_WRITE);
3739	pmap_update(pmap_kernel());
3740
3741	return 0;
3742}
3743
3744void
3745bus_space_unmap(bus_space_tag_t t, bus_space_handle_t bsh, bus_size_t size)
3746{
3747	struct extent *ex;
3748	u_long va, endva;
3749	bus_addr_t bpa;
3750
3751	/*
3752	 * Find the correct extent and bus physical address.
3753	 */
3754	if (t == I386_BUS_SPACE_IO) {
3755		ex = ioport_ex;
3756		bpa = bsh;
3757	} else if (t == I386_BUS_SPACE_MEM) {
3758		ex = iomem_ex;
3759		bpa = (bus_addr_t)ISA_PHYSADDR(bsh);
3760		if (IOM_BEGIN <= bpa && bpa <= IOM_END)
3761			goto ok;
3762
3763		va = trunc_page(bsh);
3764		endva = round_page(bsh + size);
3765
3766#ifdef DIAGNOSTIC
3767		if (endva <= va)
3768			panic("bus_space_unmap: overflow");
3769#endif
3770
3771		(void) pmap_extract(pmap_kernel(), va, &bpa);
3772		bpa += (bsh & PGOFSET);
3773
3774		pmap_kremove(va, endva - va);
3775		pmap_update(pmap_kernel());
3776
3777		/*
3778		 * Free the kernel virtual mapping.
3779		 */
3780		km_free((void *)va, endva - va, &kv_any, &kp_none);
3781	} else
3782		panic("bus_space_unmap: bad bus space tag");
3783
3784ok:
3785	if (extent_free(ex, bpa, size,
3786	    EX_NOWAIT | (ioport_malloc_safe ? EX_MALLOCOK : 0))) {
3787		printf("bus_space_unmap: %s 0x%lx, size 0x%lx\n",
3788		    (t == I386_BUS_SPACE_IO) ? "port" : "pa", bpa, size);
3789		printf("bus_space_unmap: can't free region\n");
3790	}
3791}
3792
3793void
3794_bus_space_unmap(bus_space_tag_t t, bus_space_handle_t bsh, bus_size_t size,
3795    bus_addr_t *adrp)
3796{
3797	u_long va, endva;
3798	bus_addr_t bpa;
3799
3800	/*
3801	 * Find the correct bus physical address.
3802	 */
3803	if (t == I386_BUS_SPACE_IO) {
3804		bpa = bsh;
3805	} else if (t == I386_BUS_SPACE_MEM) {
3806		bpa = (bus_addr_t)ISA_PHYSADDR(bsh);
3807		if (IOM_BEGIN <= bpa && bpa <= IOM_END)
3808			goto ok;
3809
3810		va = trunc_page(bsh);
3811		endva = round_page(bsh + size);
3812
3813#ifdef DIAGNOSTIC
3814		if (endva <= va)
3815			panic("_bus_space_unmap: overflow");
3816#endif
3817
3818		(void) pmap_extract(pmap_kernel(), va, &bpa);
3819		bpa += (bsh & PGOFSET);
3820
3821		pmap_kremove(va, endva - va);
3822		pmap_update(pmap_kernel());
3823
3824		/*
3825		 * Free the kernel virtual mapping.
3826		 */
3827		km_free((void *)va, endva - va, &kv_any, &kp_none);
3828	} else
3829		panic("bus_space_unmap: bad bus space tag");
3830
3831ok:
3832	if (adrp != NULL)
3833		*adrp = bpa;
3834}
3835
3836void
3837bus_space_free(bus_space_tag_t t, bus_space_handle_t bsh, bus_size_t size)
3838{
3839
3840	/* bus_space_unmap() does all that we need to do. */
3841	bus_space_unmap(t, bsh, size);
3842}
3843
3844int
3845bus_space_subregion(bus_space_tag_t t, bus_space_handle_t bsh,
3846    bus_size_t offset, bus_size_t size, bus_space_handle_t *nbshp)
3847{
3848	*nbshp = bsh + offset;
3849	return (0);
3850}
3851
3852paddr_t
3853bus_space_mmap(bus_space_tag_t t, bus_addr_t addr, off_t off, int prot, int flags)
3854{
3855	/* Can't mmap I/O space. */
3856	if (t == I386_BUS_SPACE_IO)
3857		return (-1);
3858
3859	return (addr + off);
3860}
3861
3862#ifdef DIAGNOSTIC
3863void
3864splassert_check(int wantipl, const char *func)
3865{
3866	if (lapic_tpr < wantipl)
3867		splassert_fail(wantipl, lapic_tpr, func);
3868	if (wantipl == IPL_NONE && curcpu()->ci_idepth != 0)
3869		splassert_fail(-1, curcpu()->ci_idepth, func);
3870}
3871#endif
3872
3873int
3874copyin32(const uint32_t *uaddr, uint32_t *kaddr)
3875{
3876	if ((vaddr_t)uaddr & 0x3)
3877		return EFAULT;
3878
3879	/* copyin(9) is atomic */
3880	return copyin(uaddr, kaddr, sizeof(uint32_t));
3881}
3882
3883/*
3884 * True if the system has any non-level interrupts which are shared
3885 * on the same pin.
3886 */
3887int	intr_shared_edge;
3888
3889/*
3890 * Software interrupt registration
3891 *
3892 * We hand-code this to ensure that it's atomic.
3893 */
3894void
3895softintr(int sir)
3896{
3897	struct cpu_info *ci = curcpu();
3898
3899	__asm volatile("orl %1, %0" :
3900	    "=m" (ci->ci_ipending) : "ir" (1 << sir));
3901}
3902
3903/*
3904 * Raise current interrupt priority level, and return the old one.
3905 */
3906int
3907splraise(int ncpl)
3908{
3909	int ocpl;
3910
3911	KASSERT(ncpl >= IPL_NONE);
3912
3913	_SPLRAISE(ocpl, ncpl);
3914	return (ocpl);
3915}
3916
3917/*
3918 * Restore an old interrupt priority level.  If any thereby unmasked
3919 * interrupts are pending, call Xspllower() to process them.
3920 */
3921void
3922splx(int ncpl)
3923{
3924	_SPLX(ncpl);
3925}
3926
3927/*
3928 * Same as splx(), but we return the old value of spl, for the
3929 * benefit of some splsoftclock() callers.
3930 */
3931int
3932spllower(int ncpl)
3933{
3934	int ocpl = lapic_tpr;
3935
3936	splx(ncpl);
3937	return (ocpl);
3938}
3939
3940int
3941intr_handler(struct intrframe *frame, struct intrhand *ih)
3942{
3943	int rc;
3944#ifdef MULTIPROCESSOR
3945	int need_lock;
3946
3947	if (ih->ih_flags & IPL_MPSAFE)
3948		need_lock = 0;
3949	else
3950		need_lock = 1;
3951
3952	if (need_lock)
3953		__mp_lock(&kernel_lock);
3954#endif
3955	rc = (*ih->ih_fun)(ih->ih_arg ? ih->ih_arg : frame);
3956#ifdef MULTIPROCESSOR
3957	if (need_lock)
3958		__mp_unlock(&kernel_lock);
3959#endif
3960	return rc;
3961}
3962
3963void
3964intr_barrier(void *ih)
3965{
3966	sched_barrier(NULL);
3967}
3968
3969#ifdef SUSPEND
3970
3971void
3972intr_enable_wakeup(void)
3973{
3974}
3975
3976void
3977intr_disable_wakeup(void)
3978{
3979}
3980
3981#endif
3982
3983unsigned int
3984cpu_rnd_messybits(void)
3985{
3986	struct timespec ts;
3987
3988	nanotime(&ts);
3989	return (ts.tv_nsec ^ (ts.tv_sec << 20));
3990}
3991
3992int i386_delay_quality;
3993
3994void
3995delay_init(void(*fn)(int), int fn_quality)
3996{
3997	if (fn_quality > i386_delay_quality) {
3998		delay_func = fn;
3999		i386_delay_quality = fn_quality;
4000	}
4001}
4002
4003void
4004delay_fini(void (*fn)(int))
4005{
4006	if (delay_func == fn) {
4007		delay_func = i8254_delay;
4008		i386_delay_quality = 0;
4009	}
4010}
4011