machdep.c revision 15565
1227825Stheraven/*-
2227825Stheraven * Copyright (c) 1992 Terrence R. Lambert.
3227825Stheraven * Copyright (c) 1982, 1987, 1990 The Regents of the University of California.
4227825Stheraven * All rights reserved.
5227825Stheraven *
6227825Stheraven * This code is derived from software contributed to Berkeley by
7227825Stheraven * William Jolitz.
8227825Stheraven *
9227825Stheraven * Redistribution and use in source and binary forms, with or without
10262801Sdim * modification, are permitted provided that the following conditions
11227825Stheraven * are met:
12227825Stheraven * 1. Redistributions of source code must retain the above copyright
13227825Stheraven *    notice, this list of conditions and the following disclaimer.
14227825Stheraven * 2. Redistributions in binary form must reproduce the above copyright
15227825Stheraven *    notice, this list of conditions and the following disclaimer in the
16227825Stheraven *    documentation and/or other materials provided with the distribution.
17227825Stheraven * 3. All advertising materials mentioning features or use of this software
18227825Stheraven *    must display the following acknowledgement:
19227825Stheraven *	This product includes software developed by the University of
20249998Sdim *	California, Berkeley and its contributors.
21227825Stheraven * 4. Neither the name of the University nor the names of its contributors
22227825Stheraven *    may be used to endorse or promote products derived from this software
23227825Stheraven *    without specific prior written permission.
24227825Stheraven *
25227825Stheraven * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
26246487Stheraven * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
27227825Stheraven * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
28249998Sdim * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
29227825Stheraven * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
30227825Stheraven * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
31227825Stheraven * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
32227825Stheraven * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
33227825Stheraven * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
34227825Stheraven * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
35227825Stheraven * SUCH DAMAGE.
36227825Stheraven *
37227825Stheraven *	from: @(#)machdep.c	7.4 (Berkeley) 6/3/91
38227825Stheraven *	$Id: machdep.c,v 1.187 1996/05/02 14:19:47 phk Exp $
39227825Stheraven */
40227825Stheraven
41227825Stheraven#include "npx.h"
42227825Stheraven#include "isa.h"
43227825Stheraven#include "opt_sysvipc.h"
44227825Stheraven#include "opt_ddb.h"
45227825Stheraven#include "opt_bounce.h"
46227825Stheraven#include "opt_machdep.h"
47227825Stheraven#include "opt_perfmon.h"
48227825Stheraven
49227825Stheraven#include <sys/param.h>
50227825Stheraven#include <sys/systm.h>
51227825Stheraven#include <sys/sysproto.h>
52227825Stheraven#include <sys/signalvar.h>
53227825Stheraven#include <sys/kernel.h>
54227825Stheraven#include <sys/proc.h>
55227825Stheraven#include <sys/buf.h>
56227825Stheraven#include <sys/reboot.h>
57227825Stheraven#include <sys/conf.h>
58227825Stheraven#include <sys/file.h>
59227825Stheraven#include <sys/callout.h>
60227825Stheraven#include <sys/malloc.h>
61227825Stheraven#include <sys/mbuf.h>
62227825Stheraven#include <sys/mount.h>
63227825Stheraven#include <sys/msgbuf.h>
64227825Stheraven#include <sys/ioctl.h>
65227825Stheraven#include <sys/sysent.h>
66227825Stheraven#include <sys/tty.h>
67227825Stheraven#include <sys/sysctl.h>
68227825Stheraven#include <sys/devconf.h>
69227825Stheraven#include <sys/vmmeter.h>
70227825Stheraven
71227825Stheraven#ifdef SYSVSHM
72227825Stheraven#include <sys/shm.h>
73227825Stheraven#endif
74227825Stheraven
75227825Stheraven#ifdef SYSVMSG
76227825Stheraven#include <sys/msg.h>
77227825Stheraven#endif
78227825Stheraven
79227825Stheraven#ifdef SYSVSEM
80227825Stheraven#include <sys/sem.h>
81227825Stheraven#endif
82227825Stheraven
83227825Stheraven#include <vm/vm.h>
84227825Stheraven#include <vm/vm_param.h>
85227825Stheraven#include <vm/vm_prot.h>
86227825Stheraven#include <vm/lock.h>
87227825Stheraven#include <vm/vm_kern.h>
88227825Stheraven#include <vm/vm_object.h>
89227825Stheraven#include <vm/vm_page.h>
90227825Stheraven#include <vm/vm_map.h>
91227825Stheraven#include <vm/vm_pager.h>
92227825Stheraven#include <vm/vm_extern.h>
93227825Stheraven
94227825Stheraven#include <sys/user.h>
95227825Stheraven#include <sys/exec.h>
96227825Stheraven#include <sys/vnode.h>
97227825Stheraven
98227825Stheraven#include <ddb/ddb.h>
99227825Stheraven
100227825Stheraven#include <net/netisr.h>
101227825Stheraven
102227825Stheraven#include <machine/cpu.h>
103227825Stheraven#include <machine/npx.h>
104227825Stheraven#include <machine/reg.h>
105227825Stheraven#include <machine/psl.h>
106227825Stheraven#include <machine/clock.h>
107227825Stheraven#include <machine/specialreg.h>
108227825Stheraven#include <machine/sysarch.h>
109227825Stheraven#include <machine/cons.h>
110227825Stheraven#include <machine/devconf.h>
111227825Stheraven#include <machine/bootinfo.h>
112227825Stheraven#include <machine/md_var.h>
113249998Sdim#ifdef PERFMON
114227825Stheraven#include <machine/perfmon.h>
115227825Stheraven#endif
116227825Stheraven
117227825Stheraven#include <i386/isa/isa.h>
118227825Stheraven#include <i386/isa/isa_device.h>
119227825Stheraven#include <i386/isa/rtc.h>
120227825Stheraven#include <machine/random.h>
121262801Sdim
122262801Sdimextern void init386 __P((int first));
123232950Stheravenextern int ptrace_set_pc __P((struct proc *p, unsigned int addr));
124227825Stheravenextern int ptrace_single_step __P((struct proc *p));
125262801Sdimextern int ptrace_write_u __P((struct proc *p, vm_offset_t off, int data));
126262801Sdimextern void dblfault_handler __P((void));
127227825Stheraven
128227825Stheravenextern void i486_bzero	__P((void *, size_t));
129227825Stheravenextern void i586_bzero	__P((void *, size_t));
130262801Sdimextern void i686_bzero	__P((void *, size_t));
131262801Sdim
132227825Stheravenstatic void cpu_startup __P((void *));
133262801SdimSYSINIT(cpu, SI_SUB_CPU, SI_ORDER_FIRST, cpu_startup, NULL)
134227825Stheraven
135227825Stheravenstatic void identifycpu(void);
136227825Stheraven
137227825Stheravenchar machine[] = "i386";
138227825StheravenSYSCTL_STRING(_hw, HW_MACHINE, machine, CTLFLAG_RD, machine, 0, "");
139227825Stheraven
140227825Stheravenstatic char cpu_model[128];
141227825StheravenSYSCTL_STRING(_hw, HW_MODEL, model, CTLFLAG_RD, cpu_model, 0, "");
142232950Stheraven
143227825Stheravenstruct kern_devconf kdc_cpu0 = {
144232950Stheraven	0, 0, 0,		/* filled in by dev_attach */
145253159Stheraven	"cpu", 0, { MDDT_CPU },
146227825Stheraven	0, 0, 0, CPU_EXTERNALLEN,
147241903Sdim	0,			/* CPU has no parent */
148227825Stheraven	0,			/* no parentdata */
149241903Sdim	DC_BUSY,		/* the CPU is always busy */
150241903Sdim	cpu_model,		/* no sense in duplication */
151241903Sdim	DC_CLS_CPU		/* class */
152227825Stheraven};
153227825Stheraven
154227825Stheraven#ifndef PANIC_REBOOT_WAIT_TIME
155227825Stheraven#define PANIC_REBOOT_WAIT_TIME 15 /* default to 15 seconds */
156227825Stheraven#endif
157227825Stheraven
158227825Stheraven#ifdef BOUNCE_BUFFERS
159227825Stheravenextern char *bouncememory;
160227825Stheravenextern int maxbkva;
161227825Stheraven#ifdef BOUNCEPAGES
162227825Stheravenint	bouncepages = BOUNCEPAGES;
163227825Stheraven#else
164227825Stheravenint	bouncepages = 0;
165227825Stheraven#endif
166227825Stheraven#endif	/* BOUNCE_BUFFERS */
167227825Stheraven
168232950Stheravenextern int freebufspace;
169227825Stheravenint	msgbufmapped = 0;		/* set when safe to use msgbuf */
170253159Stheravenint _udatasel, _ucodesel;
171253159Stheravenu_int	atdevbase;
172227825Stheraven
173241903Sdim
174227825Stheravenint physmem = 0;
175241903Sdimint cold = 1;
176241903Sdim
177241903Sdimstatic int
178227825Stheravensysctl_hw_physmem SYSCTL_HANDLER_ARGS
179227825Stheraven{
180227825Stheraven	int error = sysctl_handle_int(oidp, 0, ctob(physmem), req);
181227825Stheraven	return (error);
182227825Stheraven}
183227825Stheraven
184227825StheravenSYSCTL_PROC(_hw, HW_PHYSMEM, physmem, CTLTYPE_INT|CTLFLAG_RD,
185227825Stheraven	0, 0, sysctl_hw_physmem, "I", "");
186227825Stheraven
187227825Stheravenstatic int
188227825Stheravensysctl_hw_usermem SYSCTL_HANDLER_ARGS
189227825Stheraven{
190232950Stheraven	int error = sysctl_handle_int(oidp, 0,
191227825Stheraven		ctob(physmem - cnt.v_wire_count), req);
192227825Stheraven	return (error);
193227825Stheraven}
194227825Stheraven
195227825StheravenSYSCTL_PROC(_hw, HW_USERMEM, usermem, CTLTYPE_INT|CTLFLAG_RD,
196227825Stheraven	0, 0, sysctl_hw_usermem, "I", "");
197227825Stheraven
198227825Stheravenint boothowto = 0, bootverbose = 0, Maxmem = 0;
199227825Stheravenstatic int	badpages = 0;
200227825Stheravenlong dumplo;
201227825Stheravenextern int bootdev;
202227825Stheraven
203227825Stheravenvm_offset_t phys_avail[10];
204227825Stheraven
205227825Stheraven/* must be 2 less so 0 0 can signal end of chunks */
206227825Stheraven#define PHYS_AVAIL_ARRAY_END ((sizeof(phys_avail) / sizeof(vm_offset_t)) - 2)
207227825Stheraven
208227825Stheravenint cpu_class = CPUCLASS_386;	/* smallest common denominator */
209227825Stheraven
210227825Stheravenstatic void dumpsys __P((void));
211227825Stheravenstatic void setup_netisrs __P((struct linker_set *)); /* XXX declare elsewhere */
212227825Stheraven
213227825Stheravenstatic vm_offset_t buffer_sva, buffer_eva;
214227825Stheravenvm_offset_t clean_sva, clean_eva;
215227825Stheravenstatic vm_offset_t pager_sva, pager_eva;
216227825Stheravenextern struct linker_set netisr_set;
217227825Stheraven
218227825Stheraven#define offsetof(type, member)	((size_t)(&((type *)0)->member))
219262801Sdim
220227825Stheravenstatic void
221262801Sdimcpu_startup(dummy)
222262801Sdim	void *dummy;
223262801Sdim{
224262801Sdim	register unsigned i;
225262801Sdim	register caddr_t v;
226262801Sdim	vm_offset_t maxaddr;
227262801Sdim	vm_size_t size = 0;
228262801Sdim	int firstaddr;
229262801Sdim	vm_offset_t minaddr;
230262801Sdim
231262801Sdim	if (boothowto & RB_VERBOSE)
232262801Sdim		bootverbose++;
233262801Sdim
234262801Sdim	/*
235262801Sdim	 * Initialize error message buffer (at end of core).
236227825Stheraven	 */
237227825Stheraven
238227825Stheraven	/* avail_end was pre-decremented in init386() to compensate */
239227825Stheraven	for (i = 0; i < btoc(sizeof (struct msgbuf)); i++)
240227825Stheraven		pmap_enter(pmap_kernel(), (vm_offset_t)msgbufp,
241227825Stheraven			   avail_end + i * PAGE_SIZE,
242227825Stheraven			   VM_PROT_ALL, TRUE);
243262801Sdim	msgbufmapped = 1;
244262801Sdim
245262801Sdim	/*
246262801Sdim	 * Good {morning,afternoon,evening,night}.
247262801Sdim	 */
248232950Stheraven	printf(version);
249227825Stheraven	cpu_class = i386_cpus[cpu].cpu_class;
250262801Sdim	startrtclock();
251262801Sdim	identifycpu();
252262801Sdim	printf("real memory  = %d (%dK bytes)\n", ptoa(Maxmem), ptoa(Maxmem) / 1024);
253262801Sdim	/*
254262801Sdim	 * Display any holes after the first chunk of extended memory.
255227825Stheraven	 */
256227825Stheraven	if (badpages != 0) {
257227825Stheraven		int indx = 1;
258262801Sdim
259262801Sdim		/*
260262801Sdim		 * XXX skip reporting ISA hole & unmanaged kernel memory
261262801Sdim		 */
262262801Sdim		if (phys_avail[0] == PAGE_SIZE)
263227825Stheraven			indx += 2;
264227825Stheraven
265227825Stheraven		printf("Physical memory hole(s):\n");
266227825Stheraven		for (; phys_avail[indx + 1] != 0; indx += 2) {
267227825Stheraven			int size = phys_avail[indx + 1] - phys_avail[indx];
268227825Stheraven
269227825Stheraven			printf("0x%08lx - 0x%08lx, %d bytes (%d pages)\n", phys_avail[indx],
270232950Stheraven			    phys_avail[indx + 1] - 1, size, size / PAGE_SIZE);
271227825Stheraven		}
272227825Stheraven	}
273227825Stheraven
274227825Stheraven	/*
275227825Stheraven	 * Quickly wire in netisrs.
276227825Stheraven	 */
277227825Stheraven	setup_netisrs(&netisr_set);
278227825Stheraven
279227825Stheraven/*
280227825Stheraven#ifdef ISDN
281227825Stheraven	DONET(isdnintr, NETISR_ISDN);
282227825Stheraven#endif
283227825Stheraven*/
284227825Stheraven
285227825Stheraven	/*
286227825Stheraven	 * Allocate space for system data structures.
287227825Stheraven	 * The first available kernel virtual address is in "v".
288227825Stheraven	 * As pages of kernel virtual memory are allocated, "v" is incremented.
289227825Stheraven	 * As pages of memory are allocated and cleared,
290227825Stheraven	 * "firstaddr" is incremented.
291262801Sdim	 * An index into the kernel page table corresponding to the
292227825Stheraven	 * virtual memory address maintained in "v" is kept in "mapaddr".
293262801Sdim	 */
294262801Sdim
295262801Sdim	/*
296262801Sdim	 * Make two passes.  The first pass calculates how much memory is
297262801Sdim	 * needed and allocates it.  The second pass assigns virtual
298262801Sdim	 * addresses to the various data structures.
299262801Sdim	 */
300262801Sdim	firstaddr = 0;
301262801Sdimagain:
302262801Sdim	v = (caddr_t)firstaddr;
303262801Sdim
304262801Sdim#define	valloc(name, type, num) \
305262801Sdim	    (name) = (type *)v; v = (caddr_t)((name)+(num))
306262801Sdim#define	valloclim(name, type, num, lim) \
307262801Sdim	    (name) = (type *)v; v = (caddr_t)((lim) = ((name)+(num)))
308262801Sdim	valloc(callout, struct callout, ncallout);
309262801Sdim#ifdef SYSVSHM
310262801Sdim	valloc(shmsegs, struct shmid_ds, shminfo.shmmni);
311262801Sdim#endif
312262801Sdim#ifdef SYSVSEM
313262801Sdim	valloc(sema, struct semid_ds, seminfo.semmni);
314262801Sdim	valloc(sem, struct sem, seminfo.semmns);
315262801Sdim	/* This is pretty disgusting! */
316262801Sdim	valloc(semu, int, (seminfo.semmnu * seminfo.semusz) / sizeof(int));
317262801Sdim#endif
318262801Sdim#ifdef SYSVMSG
319227825Stheraven	valloc(msgpool, char, msginfo.msgmax);
320227825Stheraven	valloc(msgmaps, struct msgmap, msginfo.msgseg);
321227825Stheraven	valloc(msghdrs, struct msg, msginfo.msgtql);
322227825Stheraven	valloc(msqids, struct msqid_ds, msginfo.msgmni);
323227825Stheraven#endif
324227825Stheraven
325227825Stheraven	if (nbuf == 0) {
326227825Stheraven		nbuf = 30;
327227825Stheraven		if( physmem > 1024)
328227825Stheraven			nbuf += min((physmem - 1024) / 12, 1024);
329249998Sdim	}
330227825Stheraven	nswbuf = min(nbuf, 128);
331227825Stheraven
332227825Stheraven	valloc(swbuf, struct buf, nswbuf);
333227825Stheraven	valloc(buf, struct buf, nbuf);
334227825Stheraven
335227825Stheraven#ifdef BOUNCE_BUFFERS
336227825Stheraven	/*
337227825Stheraven	 * If there is more than 16MB of memory, allocate some bounce buffers
338227825Stheraven	 */
339227825Stheraven	if (Maxmem > 4096) {
340227825Stheraven		if (bouncepages == 0) {
341227825Stheraven			bouncepages = 64;
342227825Stheraven			bouncepages += ((Maxmem - 4096) / 2048) * 32;
343227825Stheraven		}
344227825Stheraven		v = (caddr_t)((vm_offset_t)((vm_offset_t)v + PAGE_SIZE - 1) & ~(PAGE_SIZE - 1));
345227825Stheraven		valloc(bouncememory, char, bouncepages * PAGE_SIZE);
346227825Stheraven	}
347227825Stheraven#endif
348227825Stheraven
349227825Stheraven	/*
350227825Stheraven	 * End of first pass, size has been calculated so allocate memory
351227825Stheraven	 */
352227825Stheraven	if (firstaddr == 0) {
353227825Stheraven		size = (vm_size_t)(v - firstaddr);
354227825Stheraven		firstaddr = (int)kmem_alloc(kernel_map, round_page(size));
355227825Stheraven		if (firstaddr == 0)
356227825Stheraven			panic("startup: no room for tables");
357227825Stheraven		goto again;
358227825Stheraven	}
359227825Stheraven
360227825Stheraven	/*
361227825Stheraven	 * End of second pass, addresses have been assigned
362227825Stheraven	 */
363227825Stheraven	if ((vm_size_t)(v - firstaddr) != size)
364227825Stheraven		panic("startup: table size inconsistency");
365227825Stheraven
366227825Stheraven#ifdef BOUNCE_BUFFERS
367227825Stheraven	clean_map = kmem_suballoc(kernel_map, &clean_sva, &clean_eva,
368227825Stheraven			(nbuf*MAXBSIZE) + (nswbuf*MAXPHYS) +
369227825Stheraven				maxbkva + pager_map_size, TRUE);
370227825Stheraven	io_map = kmem_suballoc(clean_map, &minaddr, &maxaddr, maxbkva, FALSE);
371227825Stheraven#else
372227825Stheraven	clean_map = kmem_suballoc(kernel_map, &clean_sva, &clean_eva,
373227825Stheraven			(nbuf*MAXBSIZE) + (nswbuf*MAXPHYS) + pager_map_size, TRUE);
374227825Stheraven#endif
375227825Stheraven	buffer_map = kmem_suballoc(clean_map, &buffer_sva, &buffer_eva,
376227825Stheraven				(nbuf*MAXBSIZE), TRUE);
377227825Stheraven	pager_map = kmem_suballoc(clean_map, &pager_sva, &pager_eva,
378227825Stheraven				(nswbuf*MAXPHYS) + pager_map_size, TRUE);
379227825Stheraven	exec_map = kmem_suballoc(kernel_map, &minaddr, &maxaddr,
380227825Stheraven				(16*ARG_MAX), TRUE);
381262801Sdim	u_map = kmem_suballoc(kernel_map, &minaddr, &maxaddr,
382227825Stheraven				(maxproc*UPAGES*PAGE_SIZE), FALSE);
383227825Stheraven
384227825Stheraven	/*
385227825Stheraven	 * Finally, allocate mbuf pool.  Since mclrefcnt is an off-size
386227825Stheraven	 * we use the more space efficient malloc in place of kmem_alloc.
387227825Stheraven	 */
388227825Stheraven	mclrefcnt = (char *)malloc(nmbclusters+PAGE_SIZE/MCLBYTES,
389227825Stheraven				   M_MBUF, M_NOWAIT);
390227825Stheraven	bzero(mclrefcnt, nmbclusters+PAGE_SIZE/MCLBYTES);
391227825Stheraven	mb_map = kmem_suballoc(kmem_map, (vm_offset_t *)&mbutl, &maxaddr,
392227825Stheraven			       nmbclusters * MCLBYTES, FALSE);
393227825Stheraven	/*
394227825Stheraven	 * Initialize callouts
395232950Stheraven	 */
396227825Stheraven	callfree = callout;
397227825Stheraven	for (i = 1; i < ncallout; i++)
398227825Stheraven		callout[i-1].c_next = &callout[i];
399227825Stheraven
400227825Stheraven        if (boothowto & RB_CONFIG) {
401227825Stheraven		userconfig();
402227825Stheraven		cninit();	/* the preferred console may have changed */
403232950Stheraven	}
404227825Stheraven
405227825Stheraven#ifdef BOUNCE_BUFFERS
406227825Stheraven	/*
407227825Stheraven	 * init bounce buffers
408227825Stheraven	 */
409227825Stheraven	vm_bounce_init();
410227825Stheraven#endif
411227825Stheraven
412227825Stheraven	printf("avail memory = %d (%dK bytes)\n", ptoa(cnt.v_free_count),
413227825Stheraven	    ptoa(cnt.v_free_count) / 1024);
414227825Stheraven
415227825Stheraven	/*
416227825Stheraven	 * Set up buffers, so they can be used to read disk labels.
417227825Stheraven	 */
418227825Stheraven	bufinit();
419227825Stheraven	vm_pager_bufferinit();
420227825Stheraven
421227825Stheraven	/*
422227825Stheraven	 * In verbose mode, print out the BIOS's idea of the disk geometries.
423227825Stheraven	 */
424227825Stheraven	if (bootverbose) {
425227825Stheraven		printf("BIOS Geometries:\n");
426227825Stheraven		for (i = 0; i < N_BIOS_GEOM; i++) {
427227825Stheraven			unsigned long bios_geom;
428227825Stheraven			int max_cylinder, max_head, max_sector;
429227825Stheraven
430227825Stheraven			bios_geom = bootinfo.bi_bios_geom[i];
431227825Stheraven
432232950Stheraven			/*
433227825Stheraven			 * XXX the bootstrap punts a 1200K floppy geometry
434227825Stheraven			 * when the get-disk-geometry interrupt fails.  Skip
435253159Stheraven			 * drives that have this geometry.
436253159Stheraven			 */
437227825Stheraven			if (bios_geom == 0x4f010f)
438241903Sdim				continue;
439227825Stheraven
440241903Sdim			printf(" %x:%08lx ", i, bios_geom);
441241903Sdim			max_cylinder = bios_geom >> 16;
442241903Sdim			max_head = (bios_geom >> 8) & 0xff;
443227825Stheraven			max_sector = bios_geom & 0xff;
444227825Stheraven			printf(
445227825Stheraven		"0..%d=%d cylinders, 0..%d=%d heads, 1..%d=%d sectors\n",
446227825Stheraven			       max_cylinder, max_cylinder + 1,
447227825Stheraven			       max_head, max_head + 1,
448227825Stheraven			       max_sector, max_sector);
449227825Stheraven		}
450227825Stheraven		printf(" %d accounted for\n", bootinfo.bi_n_bios_used);
451227825Stheraven	}
452227825Stheraven}
453227825Stheraven
454227825Stheravenint
455227825Stheravenregister_netisr(num, handler)
456227825Stheraven	int num;
457227825Stheraven	netisr_t *handler;
458227825Stheraven{
459232950Stheraven
460227825Stheraven	if (num < 0 || num >= (sizeof(netisrs)/sizeof(*netisrs)) ) {
461232950Stheraven		printf("register_netisr: bad isr number: %d\n", num);
462253159Stheraven		return (EINVAL);
463227825Stheraven	}
464241903Sdim	netisrs[num] = handler;
465227825Stheraven	return (0);
466241903Sdim}
467241903Sdim
468241903Sdimstatic void
469227825Stheravensetup_netisrs(ls)
470227825Stheraven	struct linker_set *ls;
471227825Stheraven{
472227825Stheraven	int i;
473227825Stheraven	const struct netisrtab *nit;
474227825Stheraven
475227825Stheraven	for(i = 0; ls->ls_items[i]; i++) {
476227825Stheraven		nit = (const struct netisrtab *)ls->ls_items[i];
477227825Stheraven		register_netisr(nit->nit_num, nit->nit_isr);
478227825Stheraven	}
479227825Stheraven}
480227825Stheraven
481227825Stheravenstatic struct cpu_nameclass i386_cpus[] = {
482227825Stheraven	{ "Intel 80286",	CPUCLASS_286 },		/* CPU_286   */
483227825Stheraven	{ "i386SX",		CPUCLASS_386 },		/* CPU_386SX */
484227825Stheraven	{ "i386DX",		CPUCLASS_386 },		/* CPU_386   */
485232950Stheraven	{ "i486SX",		CPUCLASS_486 },		/* CPU_486SX */
486227825Stheraven	{ "i486DX",		CPUCLASS_486 },		/* CPU_486   */
487253159Stheraven	{ "Pentium",		CPUCLASS_586 },		/* CPU_586   */
488253159Stheraven	{ "Cy486DLC",		CPUCLASS_486 },		/* CPU_486DLC */
489227825Stheraven	{ "Pentium Pro",	CPUCLASS_686 },		/* CPU_686 */
490241903Sdim};
491227825Stheraven
492241903Sdimstatic void
493241903Sdimidentifycpu()
494241903Sdim{
495227825Stheraven	printf("CPU: ");
496227825Stheraven	strncpy(cpu_model, i386_cpus[cpu].cpu_name, sizeof cpu_model);
497227825Stheraven
498227825Stheraven#if defined(I486_CPU) || defined(I586_CPU) || defined(I686_CPU)
499227825Stheraven	if (!strcmp(cpu_vendor,"GenuineIntel")) {
500227825Stheraven		if ((cpu_id & 0xf00) > 3) {
501227825Stheraven			cpu_model[0] = '\0';
502227825Stheraven
503227825Stheraven			switch (cpu_id & 0x3000) {
504227825Stheraven			case 0x1000:
505227825Stheraven				strcpy(cpu_model, "Overdrive ");
506227825Stheraven				break;
507232950Stheraven			case 0x2000:
508227825Stheraven				strcpy(cpu_model, "Dual ");
509227825Stheraven				break;
510227825Stheraven			}
511227825Stheraven
512227825Stheraven			switch (cpu_id & 0xf00) {
513227825Stheraven			case 0x400:
514227825Stheraven				strcat(cpu_model, "i486 ");
515227825Stheraven				break;
516227825Stheraven			case 0x500:
517227825Stheraven				strcat(cpu_model, "Pentium"); /* nb no space */
518227825Stheraven				break;
519227825Stheraven			case 0x600:
520227825Stheraven				strcat(cpu_model, "Pentium Pro");
521227825Stheraven				break;
522227825Stheraven			default:
523227825Stheraven				strcat(cpu_model, "unknown");
524227825Stheraven				break;
525227825Stheraven			}
526227825Stheraven
527232950Stheraven			switch (cpu_id & 0xff0) {
528227825Stheraven			case 0x400:
529227825Stheraven				strcat(cpu_model, "DX"); break;
530227825Stheraven			case 0x410:
531				strcat(cpu_model, "DX"); break;
532			case 0x420:
533				strcat(cpu_model, "SX"); break;
534			case 0x430:
535				strcat(cpu_model, "DX2"); break;
536			case 0x440:
537				strcat(cpu_model, "SL"); break;
538			case 0x450:
539				strcat(cpu_model, "SX2"); break;
540			case 0x470:
541				strcat(cpu_model, "DX2 Write-Back Enhanced");
542				break;
543			case 0x480:
544				strcat(cpu_model, "DX4"); break;
545				break;
546			}
547		}
548	}
549#endif
550	printf("%s (", cpu_model);
551	switch(cpu_class) {
552	case CPUCLASS_286:
553		printf("286");
554		break;
555#if defined(I386_CPU)
556	case CPUCLASS_386:
557		printf("386");
558		break;
559#endif
560#if defined(I486_CPU)
561	case CPUCLASS_486:
562		printf("486");
563		bzero = i486_bzero;
564		break;
565#endif
566#if defined(I586_CPU)
567	case CPUCLASS_586:
568		printf("%d.%02d-MHz ",
569		       ((100 * i586_ctr_rate) >> I586_CTR_RATE_SHIFT) / 100,
570		       ((100 * i586_ctr_rate) >> I586_CTR_RATE_SHIFT) % 100);
571		printf("586");
572		break;
573#endif
574#if defined(I686_CPU)
575	case CPUCLASS_686:
576		printf("%d.%02d-MHz ",
577		       ((100 * i586_ctr_rate) >> I586_CTR_RATE_SHIFT) / 100,
578		       ((100 * i586_ctr_rate) >> I586_CTR_RATE_SHIFT) % 100);
579		printf("686");
580		break;
581#endif
582	default:
583		printf("unknown");	/* will panic below... */
584	}
585	printf("-class CPU)\n");
586#if defined(I486_CPU) || defined(I586_CPU) || defined(I686_CPU)
587	if(*cpu_vendor)
588		printf("  Origin = \"%s\"",cpu_vendor);
589	if(cpu_id)
590		printf("  Id = 0x%lx",cpu_id);
591
592	if (!strcmp(cpu_vendor, "GenuineIntel")) {
593		printf("  Stepping=%ld", cpu_id & 0xf);
594		if (cpu_high > 0) {
595			printf("\n  Features=0x%b", cpu_feature,
596			"\020"
597			"\001FPU"
598			"\002VME"
599			"\003DE"
600			"\004PSE"
601			"\005TSC"
602			"\006MSR"
603			"\007PAE"
604			"\010MCE"
605			"\011CX8"
606			"\012APIC"
607			"\013<b10>"
608			"\014<b11>"
609			"\015MTRR"
610			"\016PGE"
611			"\017MCA"
612			"\020CMOV"
613			);
614		}
615	}
616	/* Avoid ugly blank lines: only print newline when we have to. */
617	if (*cpu_vendor || cpu_id)
618		printf("\n");
619#endif
620	/*
621	 * Now that we have told the user what they have,
622	 * let them know if that machine type isn't configured.
623	 */
624	switch (cpu_class) {
625	case CPUCLASS_286:	/* a 286 should not make it this far, anyway */
626#if !defined(I386_CPU) && !defined(I486_CPU) && !defined(I586_CPU) && !defined(I686_CPU)
627#error This kernel is not configured for one of the supported CPUs
628#endif
629#if !defined(I386_CPU)
630	case CPUCLASS_386:
631#endif
632#if !defined(I486_CPU)
633	case CPUCLASS_486:
634#endif
635#if !defined(I586_CPU)
636	case CPUCLASS_586:
637#endif
638#if !defined(I686_CPU)
639	case CPUCLASS_686:
640#endif
641		panic("CPU class not configured");
642	default:
643		break;
644	}
645#ifdef PERFMON
646	perfmon_init();
647#endif
648	dev_attach(&kdc_cpu0);
649}
650
651/*
652 * Send an interrupt to process.
653 *
654 * Stack is set up to allow sigcode stored
655 * at top to call routine, followed by kcall
656 * to sigreturn routine below.  After sigreturn
657 * resets the signal mask, the stack, and the
658 * frame pointer, it returns to the user
659 * specified pc, psl.
660 */
661void
662sendsig(catcher, sig, mask, code)
663	sig_t catcher;
664	int sig, mask;
665	u_long code;
666{
667	register struct proc *p = curproc;
668	register int *regs;
669	register struct sigframe *fp;
670	struct sigframe sf;
671	struct sigacts *psp = p->p_sigacts;
672	int oonstack;
673
674	regs = p->p_md.md_regs;
675        oonstack = psp->ps_sigstk.ss_flags & SS_ONSTACK;
676	/*
677	 * Allocate and validate space for the signal handler context.
678	 */
679        if ((psp->ps_flags & SAS_ALTSTACK) && !oonstack &&
680	    (psp->ps_sigonstack & sigmask(sig))) {
681		fp = (struct sigframe *)(psp->ps_sigstk.ss_sp +
682		    psp->ps_sigstk.ss_size - sizeof(struct sigframe));
683		psp->ps_sigstk.ss_flags |= SS_ONSTACK;
684	} else {
685		fp = (struct sigframe *)regs[tESP] - 1;
686	}
687
688	/*
689	 * grow() will return FALSE if the fp will not fit inside the stack
690	 *	and the stack can not be grown. useracc will return FALSE
691	 *	if access is denied.
692	 */
693	if ((grow(p, (int)fp) == FALSE) ||
694	    (useracc((caddr_t)fp, sizeof (struct sigframe), B_WRITE) == FALSE)) {
695		/*
696		 * Process has trashed its stack; give it an illegal
697		 * instruction to halt it in its tracks.
698		 */
699		SIGACTION(p, SIGILL) = SIG_DFL;
700		sig = sigmask(SIGILL);
701		p->p_sigignore &= ~sig;
702		p->p_sigcatch &= ~sig;
703		p->p_sigmask &= ~sig;
704		psignal(p, SIGILL);
705		return;
706	}
707
708	/*
709	 * Build the argument list for the signal handler.
710	 */
711	if (p->p_sysent->sv_sigtbl) {
712		if (sig < p->p_sysent->sv_sigsize)
713			sig = p->p_sysent->sv_sigtbl[sig];
714		else
715			sig = p->p_sysent->sv_sigsize + 1;
716	}
717	sf.sf_signum = sig;
718	sf.sf_code = code;
719	sf.sf_scp = &fp->sf_sc;
720	sf.sf_addr = (char *) regs[tERR];
721	sf.sf_handler = catcher;
722
723	/* save scratch registers */
724	sf.sf_sc.sc_eax = regs[tEAX];
725	sf.sf_sc.sc_ebx = regs[tEBX];
726	sf.sf_sc.sc_ecx = regs[tECX];
727	sf.sf_sc.sc_edx = regs[tEDX];
728	sf.sf_sc.sc_esi = regs[tESI];
729	sf.sf_sc.sc_edi = regs[tEDI];
730	sf.sf_sc.sc_cs = regs[tCS];
731	sf.sf_sc.sc_ds = regs[tDS];
732	sf.sf_sc.sc_ss = regs[tSS];
733	sf.sf_sc.sc_es = regs[tES];
734	sf.sf_sc.sc_isp = regs[tISP];
735
736	/*
737	 * Build the signal context to be used by sigreturn.
738	 */
739	sf.sf_sc.sc_onstack = oonstack;
740	sf.sf_sc.sc_mask = mask;
741	sf.sf_sc.sc_sp = regs[tESP];
742	sf.sf_sc.sc_fp = regs[tEBP];
743	sf.sf_sc.sc_pc = regs[tEIP];
744	sf.sf_sc.sc_ps = regs[tEFLAGS];
745
746	/*
747	 * Copy the sigframe out to the user's stack.
748	 */
749	if (copyout(&sf, fp, sizeof(struct sigframe)) != 0) {
750		/*
751		 * Something is wrong with the stack pointer.
752		 * ...Kill the process.
753		 */
754		sigexit(p, SIGILL);
755	};
756
757	regs[tESP] = (int)fp;
758	regs[tEIP] = (int)(((char *)PS_STRINGS) - *(p->p_sysent->sv_szsigcode));
759	regs[tEFLAGS] &= ~PSL_VM;
760	regs[tCS] = _ucodesel;
761	regs[tDS] = _udatasel;
762	regs[tES] = _udatasel;
763	regs[tSS] = _udatasel;
764}
765
766/*
767 * System call to cleanup state after a signal
768 * has been taken.  Reset signal mask and
769 * stack state from context left by sendsig (above).
770 * Return to previous pc and psl as specified by
771 * context left by sendsig. Check carefully to
772 * make sure that the user has not modified the
773 * state to gain improper privileges.
774 */
775int
776sigreturn(p, uap, retval)
777	struct proc *p;
778	struct sigreturn_args /* {
779		struct sigcontext *sigcntxp;
780	} */ *uap;
781	int *retval;
782{
783	register struct sigcontext *scp;
784	register struct sigframe *fp;
785	register int *regs = p->p_md.md_regs;
786	int eflags;
787
788	/*
789	 * (XXX old comment) regs[tESP] points to the return address.
790	 * The user scp pointer is above that.
791	 * The return address is faked in the signal trampoline code
792	 * for consistency.
793	 */
794	scp = uap->sigcntxp;
795	fp = (struct sigframe *)
796	     ((caddr_t)scp - offsetof(struct sigframe, sf_sc));
797
798	if (useracc((caddr_t)fp, sizeof (*fp), 0) == 0)
799		return(EINVAL);
800
801	/*
802	 * Don't allow users to change privileged or reserved flags.
803	 */
804#define	EFLAGS_SECURE(ef, oef)	((((ef) ^ (oef)) & ~PSL_USERCHANGE) == 0)
805	eflags = scp->sc_ps;
806	/*
807	 * XXX do allow users to change the privileged flag PSL_RF.  The
808	 * cpu sets PSL_RF in tf_eflags for faults.  Debuggers should
809	 * sometimes set it there too.  tf_eflags is kept in the signal
810	 * context during signal handling and there is no other place
811	 * to remember it, so the PSL_RF bit may be corrupted by the
812	 * signal handler without us knowing.  Corruption of the PSL_RF
813	 * bit at worst causes one more or one less debugger trap, so
814	 * allowing it is fairly harmless.
815	 */
816	if (!EFLAGS_SECURE(eflags & ~PSL_RF, regs[tEFLAGS] & ~PSL_RF)) {
817#ifdef DEBUG
818    		printf("sigreturn: eflags = 0x%x\n", eflags);
819#endif
820    		return(EINVAL);
821	}
822
823	/*
824	 * Don't allow users to load a valid privileged %cs.  Let the
825	 * hardware check for invalid selectors, excess privilege in
826	 * other selectors, invalid %eip's and invalid %esp's.
827	 */
828#define	CS_SECURE(cs)	(ISPL(cs) == SEL_UPL)
829	if (!CS_SECURE(scp->sc_cs)) {
830#ifdef DEBUG
831    		printf("sigreturn: cs = 0x%x\n", scp->sc_cs);
832#endif
833		trapsignal(p, SIGBUS, T_PROTFLT);
834		return(EINVAL);
835	}
836
837	/* restore scratch registers */
838	regs[tEAX] = scp->sc_eax;
839	regs[tEBX] = scp->sc_ebx;
840	regs[tECX] = scp->sc_ecx;
841	regs[tEDX] = scp->sc_edx;
842	regs[tESI] = scp->sc_esi;
843	regs[tEDI] = scp->sc_edi;
844	regs[tCS] = scp->sc_cs;
845	regs[tDS] = scp->sc_ds;
846	regs[tES] = scp->sc_es;
847	regs[tSS] = scp->sc_ss;
848	regs[tISP] = scp->sc_isp;
849
850	if (useracc((caddr_t)scp, sizeof (*scp), 0) == 0)
851		return(EINVAL);
852
853	if (scp->sc_onstack & 01)
854		p->p_sigacts->ps_sigstk.ss_flags |= SS_ONSTACK;
855	else
856		p->p_sigacts->ps_sigstk.ss_flags &= ~SS_ONSTACK;
857	p->p_sigmask = scp->sc_mask &~
858	    (sigmask(SIGKILL)|sigmask(SIGCONT)|sigmask(SIGSTOP));
859	regs[tEBP] = scp->sc_fp;
860	regs[tESP] = scp->sc_sp;
861	regs[tEIP] = scp->sc_pc;
862	regs[tEFLAGS] = eflags;
863	return(EJUSTRETURN);
864}
865
866static int	waittime = -1;
867static struct pcb dumppcb;
868
869__dead void
870boot(howto)
871	int howto;
872{
873	if (!cold && (howto & RB_NOSYNC) == 0 && waittime < 0) {
874		register struct buf *bp;
875		int iter, nbusy;
876
877		waittime = 0;
878		printf("\nsyncing disks... ");
879
880		sync(&proc0, NULL, NULL);
881
882		for (iter = 0; iter < 20; iter++) {
883			nbusy = 0;
884			for (bp = &buf[nbuf]; --bp >= buf; ) {
885				if ((bp->b_flags & (B_BUSY | B_INVAL)) == B_BUSY) {
886					nbusy++;
887				}
888			}
889			if (nbusy == 0)
890				break;
891			printf("%d ", nbusy);
892			DELAY(40000 * iter);
893		}
894		if (nbusy) {
895			/*
896			 * Failed to sync all blocks. Indicate this and don't
897			 * unmount filesystems (thus forcing an fsck on reboot).
898			 */
899			printf("giving up\n");
900#ifdef SHOW_BUSYBUFS
901			nbusy = 0;
902			for (bp = &buf[nbuf]; --bp >= buf; ) {
903				if ((bp->b_flags & (B_BUSY | B_INVAL)) == B_BUSY) {
904					nbusy++;
905					printf("%d: dev:%08x, flags:%08x, blkno:%d, lblkno:%d\n", nbusy, bp->b_dev, bp->b_flags, bp->b_blkno, bp->b_lblkno);
906				}
907			}
908			DELAY(5000000);	/* 5 seconds */
909#endif
910		} else {
911			printf("done\n");
912			/*
913			 * Unmount filesystems
914			 */
915			if (panicstr == 0)
916				vfs_unmountall();
917		}
918		DELAY(100000);			/* wait for console output to finish */
919		dev_shutdownall(FALSE);
920	}
921	splhigh();
922	if (howto & RB_HALT) {
923		printf("\n");
924		printf("The operating system has halted.\n");
925		printf("Please press any key to reboot.\n\n");
926		cngetc();
927	} else {
928		if (howto & RB_DUMP) {
929			if (!cold) {
930				savectx(&dumppcb);
931				dumppcb.pcb_ptd = rcr3();
932				dumpsys();
933			}
934
935			if (PANIC_REBOOT_WAIT_TIME != 0) {
936				if (PANIC_REBOOT_WAIT_TIME != -1) {
937					int loop;
938					printf("Automatic reboot in %d seconds - press a key on the console to abort\n",
939						PANIC_REBOOT_WAIT_TIME);
940					for (loop = PANIC_REBOOT_WAIT_TIME * 10; loop > 0; --loop) {
941						DELAY(1000 * 100); /* 1/10th second */
942						if (cncheckc()) /* Did user type a key? */
943							break;
944					}
945					if (!loop)
946						goto die;
947				}
948			} else { /* zero time specified - reboot NOW */
949				goto die;
950			}
951			printf("--> Press a key on the console to reboot <--\n");
952			cngetc();
953		}
954	}
955die:
956	printf("Rebooting...\n");
957	DELAY(1000000);	/* wait 1 sec for printf's to complete and be read */
958	cpu_reset();
959	for(;;) ;
960	/* NOTREACHED */
961}
962
963/*
964 * Magic number for savecore
965 *
966 * exported (symorder) and used at least by savecore(8)
967 *
968 */
969u_long		dumpmag = 0x8fca0101UL;
970
971static int	dumpsize = 0;		/* also for savecore */
972
973static int	dodump = 1;
974SYSCTL_INT(_machdep, OID_AUTO, do_dump, CTLFLAG_RW, &dodump, 0, "");
975
976/*
977 * Doadump comes here after turning off memory management and
978 * getting on the dump stack, either when called above, or by
979 * the auto-restart code.
980 */
981static void
982dumpsys()
983{
984
985	if (!dodump)
986		return;
987	if (dumpdev == NODEV)
988		return;
989	if ((minor(dumpdev)&07) != 1)
990		return;
991	if (!(bdevsw[major(dumpdev)]))
992		return;
993	if (!(bdevsw[major(dumpdev)]->d_dump))
994		return;
995	dumpsize = Maxmem;
996	printf("\ndumping to dev %lx, offset %ld\n", dumpdev, dumplo);
997	printf("dump ");
998	switch ((*bdevsw[major(dumpdev)]->d_dump)(dumpdev)) {
999
1000	case ENXIO:
1001		printf("device bad\n");
1002		break;
1003
1004	case EFAULT:
1005		printf("device not ready\n");
1006		break;
1007
1008	case EINVAL:
1009		printf("area improper\n");
1010		break;
1011
1012	case EIO:
1013		printf("i/o error\n");
1014		break;
1015
1016	case EINTR:
1017		printf("aborted from console\n");
1018		break;
1019
1020	default:
1021		printf("succeeded\n");
1022		break;
1023	}
1024}
1025
1026/*
1027 * Clear registers on exec
1028 */
1029void
1030setregs(p, entry, stack)
1031	struct proc *p;
1032	u_long entry;
1033	u_long stack;
1034{
1035	int *regs = p->p_md.md_regs;
1036
1037#ifdef USER_LDT
1038	struct pcb *pcb = &p->p_addr->u_pcb;
1039
1040	/* was i386_user_cleanup() in NetBSD */
1041	if (pcb->pcb_ldt) {
1042		if (pcb == curpcb)
1043			lldt(GSEL(GUSERLDT_SEL, SEL_KPL));
1044		kmem_free(kernel_map, (vm_offset_t)pcb->pcb_ldt,
1045			pcb->pcb_ldt_len * sizeof(union descriptor));
1046		pcb->pcb_ldt_len = (int)pcb->pcb_ldt = 0;
1047 	}
1048#endif
1049
1050	bzero(regs, sizeof(struct trapframe));
1051	regs[tEIP] = entry;
1052	regs[tESP] = stack;
1053	regs[tEFLAGS] = PSL_USER | (regs[tEFLAGS] & PSL_T);
1054	regs[tSS] = _udatasel;
1055	regs[tDS] = _udatasel;
1056	regs[tES] = _udatasel;
1057	regs[tCS] = _ucodesel;
1058
1059	p->p_addr->u_pcb.pcb_flags = 0;	/* no fp at all */
1060	load_cr0(rcr0() | CR0_TS);	/* start emulating */
1061#if	NNPX > 0
1062	npxinit(__INITIAL_NPXCW__);
1063#endif	/* NNPX > 0 */
1064}
1065
1066static int
1067sysctl_machdep_adjkerntz SYSCTL_HANDLER_ARGS
1068{
1069	int error;
1070	error = sysctl_handle_int(oidp, oidp->oid_arg1, oidp->oid_arg2,
1071		req);
1072	if (!error && req->newptr)
1073		resettodr();
1074	return (error);
1075}
1076
1077SYSCTL_PROC(_machdep, CPU_ADJKERNTZ, adjkerntz, CTLTYPE_INT|CTLFLAG_RW,
1078	&adjkerntz, 0, sysctl_machdep_adjkerntz, "I", "");
1079
1080SYSCTL_INT(_machdep, CPU_DISRTCSET, disable_rtc_set,
1081	CTLFLAG_RW, &disable_rtc_set, 0, "");
1082
1083SYSCTL_STRUCT(_machdep, CPU_BOOTINFO, bootinfo,
1084	CTLFLAG_RD, &bootinfo, bootinfo, "");
1085
1086SYSCTL_INT(_machdep, CPU_WALLCLOCK, wall_cmos_clock,
1087	CTLFLAG_RW, &wall_cmos_clock, 0, "");
1088
1089/*
1090 * Initialize 386 and configure to run kernel
1091 */
1092
1093/*
1094 * Initialize segments & interrupt table
1095 */
1096
1097int currentldt;
1098int _default_ldt;
1099union descriptor gdt[NGDT];		/* global descriptor table */
1100struct gate_descriptor idt[NIDT];	/* interrupt descriptor table */
1101union descriptor ldt[NLDT];		/* local descriptor table */
1102
1103static struct i386tss dblfault_tss;
1104static char dblfault_stack[PAGE_SIZE];
1105
1106extern  struct user *proc0paddr;
1107
1108/* software prototypes -- in more palatable form */
1109struct soft_segment_descriptor gdt_segs[] = {
1110/* GNULL_SEL	0 Null Descriptor */
1111{	0x0,			/* segment base address  */
1112	0x0,			/* length */
1113	0,			/* segment type */
1114	0,			/* segment descriptor priority level */
1115	0,			/* segment descriptor present */
1116	0, 0,
1117	0,			/* default 32 vs 16 bit size */
1118	0  			/* limit granularity (byte/page units)*/ },
1119/* GCODE_SEL	1 Code Descriptor for kernel */
1120{	0x0,			/* segment base address  */
1121	0xfffff,		/* length - all address space */
1122	SDT_MEMERA,		/* segment type */
1123	0,			/* segment descriptor priority level */
1124	1,			/* segment descriptor present */
1125	0, 0,
1126	1,			/* default 32 vs 16 bit size */
1127	1  			/* limit granularity (byte/page units)*/ },
1128/* GDATA_SEL	2 Data Descriptor for kernel */
1129{	0x0,			/* segment base address  */
1130	0xfffff,		/* length - all address space */
1131	SDT_MEMRWA,		/* segment type */
1132	0,			/* segment descriptor priority level */
1133	1,			/* segment descriptor present */
1134	0, 0,
1135	1,			/* default 32 vs 16 bit size */
1136	1  			/* limit granularity (byte/page units)*/ },
1137/* GLDT_SEL	3 LDT Descriptor */
1138{	(int) ldt,		/* segment base address  */
1139	sizeof(ldt)-1,		/* length - all address space */
1140	SDT_SYSLDT,		/* segment type */
1141	0,			/* segment descriptor priority level */
1142	1,			/* segment descriptor present */
1143	0, 0,
1144	0,			/* unused - default 32 vs 16 bit size */
1145	0  			/* limit granularity (byte/page units)*/ },
1146/* GTGATE_SEL	4 Null Descriptor - Placeholder */
1147{	0x0,			/* segment base address  */
1148	0x0,			/* length - all address space */
1149	0,			/* segment type */
1150	0,			/* segment descriptor priority level */
1151	0,			/* segment descriptor present */
1152	0, 0,
1153	0,			/* default 32 vs 16 bit size */
1154	0  			/* limit granularity (byte/page units)*/ },
1155/* GPANIC_SEL	5 Panic Tss Descriptor */
1156{	(int) &dblfault_tss,	/* segment base address  */
1157	sizeof(struct i386tss)-1,/* length - all address space */
1158	SDT_SYS386TSS,		/* segment type */
1159	0,			/* segment descriptor priority level */
1160	1,			/* segment descriptor present */
1161	0, 0,
1162	0,			/* unused - default 32 vs 16 bit size */
1163	0  			/* limit granularity (byte/page units)*/ },
1164/* GPROC0_SEL	6 Proc 0 Tss Descriptor */
1165{	(int) kstack,		/* segment base address  */
1166	sizeof(struct i386tss)-1,/* length - all address space */
1167	SDT_SYS386TSS,		/* segment type */
1168	0,			/* segment descriptor priority level */
1169	1,			/* segment descriptor present */
1170	0, 0,
1171	0,			/* unused - default 32 vs 16 bit size */
1172	0  			/* limit granularity (byte/page units)*/ },
1173/* GUSERLDT_SEL	7 User LDT Descriptor per process */
1174{	(int) ldt,		/* segment base address  */
1175	(512 * sizeof(union descriptor)-1),		/* length */
1176	SDT_SYSLDT,		/* segment type */
1177	0,			/* segment descriptor priority level */
1178	1,			/* segment descriptor present */
1179	0, 0,
1180	0,			/* unused - default 32 vs 16 bit size */
1181	0  			/* limit granularity (byte/page units)*/ },
1182/* GAPMCODE32_SEL 8 APM BIOS 32-bit interface (32bit Code) */
1183{	0,			/* segment base address (overwritten by APM)  */
1184	0xfffff,		/* length */
1185	SDT_MEMERA,		/* segment type */
1186	0,			/* segment descriptor priority level */
1187	1,			/* segment descriptor present */
1188	0, 0,
1189	1,			/* default 32 vs 16 bit size */
1190	1  			/* limit granularity (byte/page units)*/ },
1191/* GAPMCODE16_SEL 9 APM BIOS 32-bit interface (16bit Code) */
1192{	0,			/* segment base address (overwritten by APM)  */
1193	0xfffff,		/* length */
1194	SDT_MEMERA,		/* segment type */
1195	0,			/* segment descriptor priority level */
1196	1,			/* segment descriptor present */
1197	0, 0,
1198	0,			/* default 32 vs 16 bit size */
1199	1  			/* limit granularity (byte/page units)*/ },
1200/* GAPMDATA_SEL	10 APM BIOS 32-bit interface (Data) */
1201{	0,			/* segment base address (overwritten by APM) */
1202	0xfffff,		/* length */
1203	SDT_MEMRWA,		/* segment type */
1204	0,			/* segment descriptor priority level */
1205	1,			/* segment descriptor present */
1206	0, 0,
1207	1,			/* default 32 vs 16 bit size */
1208	1  			/* limit granularity (byte/page units)*/ },
1209};
1210
1211static struct soft_segment_descriptor ldt_segs[] = {
1212	/* Null Descriptor - overwritten by call gate */
1213{	0x0,			/* segment base address  */
1214	0x0,			/* length - all address space */
1215	0,			/* segment type */
1216	0,			/* segment descriptor priority level */
1217	0,			/* segment descriptor present */
1218	0, 0,
1219	0,			/* default 32 vs 16 bit size */
1220	0  			/* limit granularity (byte/page units)*/ },
1221	/* Null Descriptor - overwritten by call gate */
1222{	0x0,			/* segment base address  */
1223	0x0,			/* length - all address space */
1224	0,			/* segment type */
1225	0,			/* segment descriptor priority level */
1226	0,			/* segment descriptor present */
1227	0, 0,
1228	0,			/* default 32 vs 16 bit size */
1229	0  			/* limit granularity (byte/page units)*/ },
1230	/* Null Descriptor - overwritten by call gate */
1231{	0x0,			/* segment base address  */
1232	0x0,			/* length - all address space */
1233	0,			/* segment type */
1234	0,			/* segment descriptor priority level */
1235	0,			/* segment descriptor present */
1236	0, 0,
1237	0,			/* default 32 vs 16 bit size */
1238	0  			/* limit granularity (byte/page units)*/ },
1239	/* Code Descriptor for user */
1240{	0x0,			/* segment base address  */
1241	0xfffff,		/* length - all address space */
1242	SDT_MEMERA,		/* segment type */
1243	SEL_UPL,		/* segment descriptor priority level */
1244	1,			/* segment descriptor present */
1245	0, 0,
1246	1,			/* default 32 vs 16 bit size */
1247	1  			/* limit granularity (byte/page units)*/ },
1248	/* Data Descriptor for user */
1249{	0x0,			/* segment base address  */
1250	0xfffff,		/* length - all address space */
1251	SDT_MEMRWA,		/* segment type */
1252	SEL_UPL,		/* segment descriptor priority level */
1253	1,			/* segment descriptor present */
1254	0, 0,
1255	1,			/* default 32 vs 16 bit size */
1256	1  			/* limit granularity (byte/page units)*/ },
1257};
1258
1259void
1260setidt(idx, func, typ, dpl, selec)
1261	int idx;
1262	inthand_t *func;
1263	int typ;
1264	int dpl;
1265	int selec;
1266{
1267	struct gate_descriptor *ip = idt + idx;
1268
1269	ip->gd_looffset = (int)func;
1270	ip->gd_selector = selec;
1271	ip->gd_stkcpy = 0;
1272	ip->gd_xx = 0;
1273	ip->gd_type = typ;
1274	ip->gd_dpl = dpl;
1275	ip->gd_p = 1;
1276	ip->gd_hioffset = ((int)func)>>16 ;
1277}
1278
1279#define	IDTVEC(name)	__CONCAT(X,name)
1280
1281extern inthand_t
1282	IDTVEC(div), IDTVEC(dbg), IDTVEC(nmi), IDTVEC(bpt), IDTVEC(ofl),
1283	IDTVEC(bnd), IDTVEC(ill), IDTVEC(dna), IDTVEC(fpusegm),
1284	IDTVEC(tss), IDTVEC(missing), IDTVEC(stk), IDTVEC(prot),
1285	IDTVEC(page), IDTVEC(rsvd), IDTVEC(fpu), IDTVEC(align),
1286	IDTVEC(syscall), IDTVEC(int0x80_syscall);
1287
1288void
1289sdtossd(sd, ssd)
1290	struct segment_descriptor *sd;
1291	struct soft_segment_descriptor *ssd;
1292{
1293	ssd->ssd_base  = (sd->sd_hibase << 24) | sd->sd_lobase;
1294	ssd->ssd_limit = (sd->sd_hilimit << 16) | sd->sd_lolimit;
1295	ssd->ssd_type  = sd->sd_type;
1296	ssd->ssd_dpl   = sd->sd_dpl;
1297	ssd->ssd_p     = sd->sd_p;
1298	ssd->ssd_def32 = sd->sd_def32;
1299	ssd->ssd_gran  = sd->sd_gran;
1300}
1301
1302void
1303init386(first)
1304	int first;
1305{
1306	int x;
1307	unsigned biosbasemem, biosextmem;
1308	struct gate_descriptor *gdp;
1309	int gsel_tss;
1310	/* table descriptors - used to load tables by microp */
1311	struct region_descriptor r_gdt, r_idt;
1312	int	pagesinbase, pagesinext;
1313	int	target_page, pa_indx;
1314
1315	proc0.p_addr = proc0paddr;
1316
1317	atdevbase = ISA_HOLE_START + KERNBASE;
1318
1319	/*
1320	 * Initialize the console before we print anything out.
1321	 */
1322	cninit();
1323
1324	/*
1325	 * make gdt memory segments, the code segment goes up to end of the
1326	 * page with etext in it, the data segment goes to the end of
1327	 * the address space
1328	 */
1329	/*
1330	 * XXX text protection is temporarily (?) disabled.  The limit was
1331	 * i386_btop(round_page(etext)) - 1.
1332	 */
1333	gdt_segs[GCODE_SEL].ssd_limit = i386_btop(0) - 1;
1334	gdt_segs[GDATA_SEL].ssd_limit = i386_btop(0) - 1;
1335	for (x = 0; x < NGDT; x++)
1336		ssdtosd(&gdt_segs[x], &gdt[x].sd);
1337
1338	/* make ldt memory segments */
1339	/*
1340	 * The data segment limit must not cover the user area because we
1341	 * don't want the user area to be writable in copyout() etc. (page
1342	 * level protection is lost in kernel mode on 386's).  Also, we
1343	 * don't want the user area to be writable directly (page level
1344	 * protection of the user area is not available on 486's with
1345	 * CR0_WP set, because there is no user-read/kernel-write mode).
1346	 *
1347	 * XXX - VM_MAXUSER_ADDRESS is an end address, not a max.  And it
1348	 * should be spelled ...MAX_USER...
1349	 */
1350#define VM_END_USER_RW_ADDRESS	VM_MAXUSER_ADDRESS
1351	/*
1352	 * The code segment limit has to cover the user area until we move
1353	 * the signal trampoline out of the user area.  This is safe because
1354	 * the code segment cannot be written to directly.
1355	 */
1356#define VM_END_USER_R_ADDRESS	(VM_END_USER_RW_ADDRESS + UPAGES * PAGE_SIZE)
1357	ldt_segs[LUCODE_SEL].ssd_limit = i386_btop(VM_END_USER_R_ADDRESS) - 1;
1358	ldt_segs[LUDATA_SEL].ssd_limit = i386_btop(VM_END_USER_RW_ADDRESS) - 1;
1359	/* Note. eventually want private ldts per process */
1360	for (x = 0; x < NLDT; x++)
1361		ssdtosd(&ldt_segs[x], &ldt[x].sd);
1362
1363	/* exceptions */
1364	for (x = 0; x < NIDT; x++)
1365		setidt(x, &IDTVEC(rsvd), SDT_SYS386TGT, SEL_KPL, GSEL(GCODE_SEL, SEL_KPL));
1366	setidt(0, &IDTVEC(div),  SDT_SYS386TGT, SEL_KPL, GSEL(GCODE_SEL, SEL_KPL));
1367	setidt(1, &IDTVEC(dbg),  SDT_SYS386TGT, SEL_KPL, GSEL(GCODE_SEL, SEL_KPL));
1368	setidt(2, &IDTVEC(nmi),  SDT_SYS386TGT, SEL_KPL, GSEL(GCODE_SEL, SEL_KPL));
1369 	setidt(3, &IDTVEC(bpt),  SDT_SYS386TGT, SEL_UPL, GSEL(GCODE_SEL, SEL_KPL));
1370	setidt(4, &IDTVEC(ofl),  SDT_SYS386TGT, SEL_UPL, GSEL(GCODE_SEL, SEL_KPL));
1371	setidt(5, &IDTVEC(bnd),  SDT_SYS386TGT, SEL_KPL, GSEL(GCODE_SEL, SEL_KPL));
1372	setidt(6, &IDTVEC(ill),  SDT_SYS386TGT, SEL_KPL, GSEL(GCODE_SEL, SEL_KPL));
1373	setidt(7, &IDTVEC(dna),  SDT_SYS386TGT, SEL_KPL, GSEL(GCODE_SEL, SEL_KPL));
1374	setidt(8, 0,  SDT_SYSTASKGT, SEL_KPL, GSEL(GPANIC_SEL, SEL_KPL));
1375	setidt(9, &IDTVEC(fpusegm),  SDT_SYS386TGT, SEL_KPL, GSEL(GCODE_SEL, SEL_KPL));
1376	setidt(10, &IDTVEC(tss),  SDT_SYS386TGT, SEL_KPL, GSEL(GCODE_SEL, SEL_KPL));
1377	setidt(11, &IDTVEC(missing),  SDT_SYS386TGT, SEL_KPL, GSEL(GCODE_SEL, SEL_KPL));
1378	setidt(12, &IDTVEC(stk),  SDT_SYS386TGT, SEL_KPL, GSEL(GCODE_SEL, SEL_KPL));
1379	setidt(13, &IDTVEC(prot),  SDT_SYS386TGT, SEL_KPL, GSEL(GCODE_SEL, SEL_KPL));
1380	setidt(14, &IDTVEC(page),  SDT_SYS386TGT, SEL_KPL, GSEL(GCODE_SEL, SEL_KPL));
1381	setidt(15, &IDTVEC(rsvd),  SDT_SYS386TGT, SEL_KPL, GSEL(GCODE_SEL, SEL_KPL));
1382	setidt(16, &IDTVEC(fpu),  SDT_SYS386TGT, SEL_KPL, GSEL(GCODE_SEL, SEL_KPL));
1383	setidt(17, &IDTVEC(align), SDT_SYS386TGT, SEL_KPL, GSEL(GCODE_SEL, SEL_KPL));
1384 	setidt(0x80, &IDTVEC(int0x80_syscall),
1385			SDT_SYS386TGT, SEL_UPL, GSEL(GCODE_SEL, SEL_KPL));
1386
1387#include	"isa.h"
1388#if	NISA >0
1389	isa_defaultirq();
1390#endif
1391	rand_initialize();
1392
1393	r_gdt.rd_limit = sizeof(gdt) - 1;
1394	r_gdt.rd_base =  (int) gdt;
1395	lgdt(&r_gdt);
1396
1397	r_idt.rd_limit = sizeof(idt) - 1;
1398	r_idt.rd_base = (int) idt;
1399	lidt(&r_idt);
1400
1401	_default_ldt = GSEL(GLDT_SEL, SEL_KPL);
1402	lldt(_default_ldt);
1403	currentldt = _default_ldt;
1404
1405#ifdef DDB
1406	kdb_init();
1407	if (boothowto & RB_KDB)
1408		Debugger("Boot flags requested debugger");
1409#endif
1410
1411	/* Use BIOS values stored in RTC CMOS RAM, since probing
1412	 * breaks certain 386 AT relics.
1413	 */
1414	biosbasemem = rtcin(RTC_BASELO)+ (rtcin(RTC_BASEHI)<<8);
1415	biosextmem = rtcin(RTC_EXTLO)+ (rtcin(RTC_EXTHI)<<8);
1416
1417	/*
1418	 * Print a warning if the official BIOS interface disagrees
1419	 * with the hackish interface used above.  Eventually only
1420	 * the official interface should be used.
1421	 */
1422	if (bootinfo.bi_memsizes_valid) {
1423		if (bootinfo.bi_basemem != biosbasemem)
1424			printf("BIOS basemem (%ldK) != RTC basemem (%dK)\n",
1425			       bootinfo.bi_basemem, biosbasemem);
1426		if (bootinfo.bi_extmem != biosextmem)
1427			printf("BIOS extmem (%ldK) != RTC extmem (%dK)\n",
1428			       bootinfo.bi_extmem, biosextmem);
1429	}
1430
1431	/*
1432	 * If BIOS tells us that it has more than 640k in the basemem,
1433	 *	don't believe it - set it to 640k.
1434	 */
1435	if (biosbasemem > 640)
1436		biosbasemem = 640;
1437
1438	/*
1439	 * Some 386 machines might give us a bogus number for extended
1440	 *	mem. If this happens, stop now.
1441	 */
1442#ifndef LARGEMEM
1443	if (biosextmem > 65536) {
1444		panic("extended memory beyond limit of 64MB");
1445		/* NOTREACHED */
1446	}
1447#endif
1448
1449	pagesinbase = biosbasemem * 1024 / PAGE_SIZE;
1450	pagesinext = biosextmem * 1024 / PAGE_SIZE;
1451
1452	/*
1453	 * Special hack for chipsets that still remap the 384k hole when
1454	 *	there's 16MB of memory - this really confuses people that
1455	 *	are trying to use bus mastering ISA controllers with the
1456	 *	"16MB limit"; they only have 16MB, but the remapping puts
1457	 *	them beyond the limit.
1458	 */
1459	/*
1460	 * If extended memory is between 15-16MB (16-17MB phys address range),
1461	 *	chop it to 15MB.
1462	 */
1463	if ((pagesinext > 3840) && (pagesinext < 4096))
1464		pagesinext = 3840;
1465
1466	/*
1467	 * Maxmem isn't the "maximum memory", it's one larger than the
1468	 * highest page of of the physical address space. It
1469	 */
1470	Maxmem = pagesinext + 0x100000/PAGE_SIZE;
1471
1472#ifdef MAXMEM
1473	Maxmem = MAXMEM/4;
1474#endif
1475
1476	/* call pmap initialization to make new kernel address space */
1477	pmap_bootstrap (first, 0);
1478
1479	/*
1480	 * Size up each available chunk of physical memory.
1481	 */
1482
1483	/*
1484	 * We currently don't bother testing base memory.
1485	 * XXX  ...but we probably should.
1486	 */
1487	pa_indx = 0;
1488	badpages = 0;
1489	if (pagesinbase > 1) {
1490		phys_avail[pa_indx++] = PAGE_SIZE;	/* skip first page of memory */
1491		phys_avail[pa_indx] = ptoa(pagesinbase);/* memory up to the ISA hole */
1492		physmem = pagesinbase - 1;
1493	} else {
1494		/* point at first chunk end */
1495		pa_indx++;
1496	}
1497
1498	for (target_page = avail_start; target_page < ptoa(Maxmem); target_page += PAGE_SIZE) {
1499		int tmp, page_bad = FALSE;
1500
1501		/*
1502		 * map page into kernel: valid, read/write, non-cacheable
1503		 */
1504		*(int *)CMAP1 = PG_V | PG_RW | PG_N | target_page;
1505		pmap_update();
1506
1507		tmp = *(int *)CADDR1;
1508		/*
1509		 * Test for alternating 1's and 0's
1510		 */
1511		*(volatile int *)CADDR1 = 0xaaaaaaaa;
1512		if (*(volatile int *)CADDR1 != 0xaaaaaaaa) {
1513			page_bad = TRUE;
1514		}
1515		/*
1516		 * Test for alternating 0's and 1's
1517		 */
1518		*(volatile int *)CADDR1 = 0x55555555;
1519		if (*(volatile int *)CADDR1 != 0x55555555) {
1520			page_bad = TRUE;
1521		}
1522		/*
1523		 * Test for all 1's
1524		 */
1525		*(volatile int *)CADDR1 = 0xffffffff;
1526		if (*(volatile int *)CADDR1 != 0xffffffff) {
1527			page_bad = TRUE;
1528		}
1529		/*
1530		 * Test for all 0's
1531		 */
1532		*(volatile int *)CADDR1 = 0x0;
1533		if (*(volatile int *)CADDR1 != 0x0) {
1534			/*
1535			 * test of page failed
1536			 */
1537			page_bad = TRUE;
1538		}
1539		/*
1540		 * Restore original value.
1541		 */
1542		*(int *)CADDR1 = tmp;
1543
1544		/*
1545		 * Adjust array of valid/good pages.
1546		 */
1547		if (page_bad == FALSE) {
1548			/*
1549			 * If this good page is a continuation of the
1550			 * previous set of good pages, then just increase
1551			 * the end pointer. Otherwise start a new chunk.
1552			 * Note that "end" points one higher than end,
1553			 * making the range >= start and < end.
1554			 */
1555			if (phys_avail[pa_indx] == target_page) {
1556				phys_avail[pa_indx] += PAGE_SIZE;
1557			} else {
1558				pa_indx++;
1559				if (pa_indx == PHYS_AVAIL_ARRAY_END) {
1560					printf("Too many holes in the physical address space, giving up\n");
1561					pa_indx--;
1562					break;
1563				}
1564				phys_avail[pa_indx++] = target_page;	/* start */
1565				phys_avail[pa_indx] = target_page + PAGE_SIZE;	/* end */
1566			}
1567			physmem++;
1568		} else {
1569			badpages++;
1570			page_bad = FALSE;
1571		}
1572	}
1573
1574	*(int *)CMAP1 = 0;
1575	pmap_update();
1576
1577	/*
1578	 * XXX
1579	 * The last chunk must contain at least one page plus the message
1580	 * buffer to avoid complicating other code (message buffer address
1581	 * calculation, etc.).
1582	 */
1583	while (phys_avail[pa_indx - 1] + PAGE_SIZE +
1584	    round_page(sizeof(struct msgbuf)) >= phys_avail[pa_indx]) {
1585		physmem -= atop(phys_avail[pa_indx] - phys_avail[pa_indx - 1]);
1586		phys_avail[pa_indx--] = 0;
1587		phys_avail[pa_indx--] = 0;
1588	}
1589
1590	Maxmem = atop(phys_avail[pa_indx]);
1591
1592	/* Trim off space for the message buffer. */
1593	phys_avail[pa_indx] -= round_page(sizeof(struct msgbuf));
1594
1595	avail_end = phys_avail[pa_indx];
1596
1597	/* now running on new page tables, configured,and u/iom is accessible */
1598
1599	/* make a initial tss so microp can get interrupt stack on syscall! */
1600	proc0.p_addr->u_pcb.pcb_tss.tss_esp0 = (int) kstack + UPAGES*PAGE_SIZE;
1601	proc0.p_addr->u_pcb.pcb_tss.tss_ss0 = GSEL(GDATA_SEL, SEL_KPL) ;
1602	gsel_tss = GSEL(GPROC0_SEL, SEL_KPL);
1603
1604	dblfault_tss.tss_esp = dblfault_tss.tss_esp0 = dblfault_tss.tss_esp1 =
1605	    dblfault_tss.tss_esp2 = (int) &dblfault_stack[sizeof(dblfault_stack)];
1606	dblfault_tss.tss_ss = dblfault_tss.tss_ss0 = dblfault_tss.tss_ss1 =
1607	    dblfault_tss.tss_ss2 = GSEL(GDATA_SEL, SEL_KPL);
1608	dblfault_tss.tss_cr3 = IdlePTD;
1609	dblfault_tss.tss_eip = (int) dblfault_handler;
1610	dblfault_tss.tss_eflags = PSL_KERNEL;
1611	dblfault_tss.tss_ds = dblfault_tss.tss_es = dblfault_tss.tss_fs = dblfault_tss.tss_gs =
1612		GSEL(GDATA_SEL, SEL_KPL);
1613	dblfault_tss.tss_cs = GSEL(GCODE_SEL, SEL_KPL);
1614	dblfault_tss.tss_ldt = GSEL(GLDT_SEL, SEL_KPL);
1615
1616	((struct i386tss *)gdt_segs[GPROC0_SEL].ssd_base)->tss_ioopt =
1617		(sizeof(struct i386tss))<<16;
1618
1619	ltr(gsel_tss);
1620
1621	/* make a call gate to reenter kernel with */
1622	gdp = &ldt[LSYS5CALLS_SEL].gd;
1623
1624	x = (int) &IDTVEC(syscall);
1625	gdp->gd_looffset = x++;
1626	gdp->gd_selector = GSEL(GCODE_SEL,SEL_KPL);
1627	gdp->gd_stkcpy = 1;
1628	gdp->gd_type = SDT_SYS386CGT;
1629	gdp->gd_dpl = SEL_UPL;
1630	gdp->gd_p = 1;
1631	gdp->gd_hioffset = ((int) &IDTVEC(syscall)) >>16;
1632
1633	/* transfer to user mode */
1634
1635	_ucodesel = LSEL(LUCODE_SEL, SEL_UPL);
1636	_udatasel = LSEL(LUDATA_SEL, SEL_UPL);
1637
1638	/* setup proc 0's pcb */
1639	proc0.p_addr->u_pcb.pcb_flags = 0;
1640	proc0.p_addr->u_pcb.pcb_ptd = IdlePTD;
1641}
1642
1643/*
1644 * The registers are in the frame; the frame is in the user area of
1645 * the process in question; when the process is active, the registers
1646 * are in "the kernel stack"; when it's not, they're still there, but
1647 * things get flipped around.  So, since p->p_md.md_regs is the whole address
1648 * of the register set, take its offset from the kernel stack, and
1649 * index into the user block.  Don't you just *love* virtual memory?
1650 * (I'm starting to think seymour is right...)
1651 */
1652#define	TF_REGP(p)	((struct trapframe *) \
1653			 ((char *)(p)->p_addr \
1654			  + ((char *)(p)->p_md.md_regs - kstack)))
1655
1656int
1657ptrace_set_pc(p, addr)
1658	struct proc *p;
1659	unsigned int addr;
1660{
1661	TF_REGP(p)->tf_eip = addr;
1662	return (0);
1663}
1664
1665int
1666ptrace_single_step(p)
1667	struct proc *p;
1668{
1669	TF_REGP(p)->tf_eflags |= PSL_T;
1670	return (0);
1671}
1672
1673int ptrace_write_u(p, off, data)
1674	struct proc *p;
1675	vm_offset_t off;
1676	int data;
1677{
1678	struct trapframe frame_copy;
1679	vm_offset_t min;
1680	struct trapframe *tp;
1681
1682	/*
1683	 * Privileged kernel state is scattered all over the user area.
1684	 * Only allow write access to parts of regs and to fpregs.
1685	 */
1686	min = (char *)p->p_md.md_regs - kstack;
1687	if (off >= min && off <= min + sizeof(struct trapframe) - sizeof(int)) {
1688		tp = TF_REGP(p);
1689		frame_copy = *tp;
1690		*(int *)((char *)&frame_copy + (off - min)) = data;
1691		if (!EFLAGS_SECURE(frame_copy.tf_eflags, tp->tf_eflags) ||
1692		    !CS_SECURE(frame_copy.tf_cs))
1693			return (EINVAL);
1694		*(int*)((char *)p->p_addr + off) = data;
1695		return (0);
1696	}
1697	min = offsetof(struct user, u_pcb) + offsetof(struct pcb, pcb_savefpu);
1698	if (off >= min && off <= min + sizeof(struct save87) - sizeof(int)) {
1699		*(int*)((char *)p->p_addr + off) = data;
1700		return (0);
1701	}
1702	return (EFAULT);
1703}
1704
1705int
1706fill_regs(p, regs)
1707	struct proc *p;
1708	struct reg *regs;
1709{
1710	struct trapframe *tp;
1711
1712	tp = TF_REGP(p);
1713	regs->r_es = tp->tf_es;
1714	regs->r_ds = tp->tf_ds;
1715	regs->r_edi = tp->tf_edi;
1716	regs->r_esi = tp->tf_esi;
1717	regs->r_ebp = tp->tf_ebp;
1718	regs->r_ebx = tp->tf_ebx;
1719	regs->r_edx = tp->tf_edx;
1720	regs->r_ecx = tp->tf_ecx;
1721	regs->r_eax = tp->tf_eax;
1722	regs->r_eip = tp->tf_eip;
1723	regs->r_cs = tp->tf_cs;
1724	regs->r_eflags = tp->tf_eflags;
1725	regs->r_esp = tp->tf_esp;
1726	regs->r_ss = tp->tf_ss;
1727	return (0);
1728}
1729
1730int
1731set_regs(p, regs)
1732	struct proc *p;
1733	struct reg *regs;
1734{
1735	struct trapframe *tp;
1736
1737	tp = TF_REGP(p);
1738	if (!EFLAGS_SECURE(regs->r_eflags, tp->tf_eflags) ||
1739	    !CS_SECURE(regs->r_cs))
1740		return (EINVAL);
1741	tp->tf_es = regs->r_es;
1742	tp->tf_ds = regs->r_ds;
1743	tp->tf_edi = regs->r_edi;
1744	tp->tf_esi = regs->r_esi;
1745	tp->tf_ebp = regs->r_ebp;
1746	tp->tf_ebx = regs->r_ebx;
1747	tp->tf_edx = regs->r_edx;
1748	tp->tf_ecx = regs->r_ecx;
1749	tp->tf_eax = regs->r_eax;
1750	tp->tf_eip = regs->r_eip;
1751	tp->tf_cs = regs->r_cs;
1752	tp->tf_eflags = regs->r_eflags;
1753	tp->tf_esp = regs->r_esp;
1754	tp->tf_ss = regs->r_ss;
1755	return (0);
1756}
1757
1758#ifndef DDB
1759void
1760Debugger(const char *msg)
1761{
1762	printf("Debugger(\"%s\") called.\n", msg);
1763}
1764#endif /* no DDB */
1765
1766#include <sys/disklabel.h>
1767#define b_cylin	b_resid
1768/*
1769 * Determine the size of the transfer, and make sure it is
1770 * within the boundaries of the partition. Adjust transfer
1771 * if needed, and signal errors or early completion.
1772 */
1773int
1774bounds_check_with_label(struct buf *bp, struct disklabel *lp, int wlabel)
1775{
1776        struct partition *p = lp->d_partitions + dkpart(bp->b_dev);
1777        int labelsect = lp->d_partitions[0].p_offset;
1778        int maxsz = p->p_size,
1779                sz = (bp->b_bcount + DEV_BSIZE - 1) >> DEV_BSHIFT;
1780
1781        /* overwriting disk label ? */
1782        /* XXX should also protect bootstrap in first 8K */
1783        if (bp->b_blkno + p->p_offset <= LABELSECTOR + labelsect &&
1784#if LABELSECTOR != 0
1785            bp->b_blkno + p->p_offset + sz > LABELSECTOR + labelsect &&
1786#endif
1787            (bp->b_flags & B_READ) == 0 && wlabel == 0) {
1788                bp->b_error = EROFS;
1789                goto bad;
1790        }
1791
1792#if     defined(DOSBBSECTOR) && defined(notyet)
1793        /* overwriting master boot record? */
1794        if (bp->b_blkno + p->p_offset <= DOSBBSECTOR &&
1795            (bp->b_flags & B_READ) == 0 && wlabel == 0) {
1796                bp->b_error = EROFS;
1797                goto bad;
1798        }
1799#endif
1800
1801        /* beyond partition? */
1802        if (bp->b_blkno < 0 || bp->b_blkno + sz > maxsz) {
1803                /* if exactly at end of disk, return an EOF */
1804                if (bp->b_blkno == maxsz) {
1805                        bp->b_resid = bp->b_bcount;
1806                        return(0);
1807                }
1808                /* or truncate if part of it fits */
1809                sz = maxsz - bp->b_blkno;
1810                if (sz <= 0) {
1811                        bp->b_error = EINVAL;
1812                        goto bad;
1813                }
1814                bp->b_bcount = sz << DEV_BSHIFT;
1815        }
1816
1817        /* calculate cylinder for disksort to order transfers with */
1818        bp->b_pblkno = bp->b_blkno + p->p_offset;
1819        bp->b_cylin = bp->b_pblkno / lp->d_secpercyl;
1820        return(1);
1821
1822bad:
1823        bp->b_flags |= B_ERROR;
1824        return(-1);
1825}
1826
1827int
1828disk_externalize(int drive, struct sysctl_req *req)
1829{
1830	return SYSCTL_OUT(req, &drive, sizeof drive);
1831}
1832