machdep.c revision 1825
1224133Sdim/*-
2224133Sdim * Copyright (c) 1992 Terrence R. Lambert.
3224133Sdim * Copyright (c) 1982, 1987, 1990 The Regents of the University of California.
4224133Sdim * All rights reserved.
5224133Sdim *
6224133Sdim * This code is derived from software contributed to Berkeley by
7224133Sdim * William Jolitz.
8224133Sdim *
9224133Sdim * Redistribution and use in source and binary forms, with or without
10224133Sdim * modification, are permitted provided that the following conditions
11224133Sdim * are met:
12224133Sdim * 1. Redistributions of source code must retain the above copyright
13224133Sdim *    notice, this list of conditions and the following disclaimer.
14224133Sdim * 2. Redistributions in binary form must reproduce the above copyright
15224133Sdim *    notice, this list of conditions and the following disclaimer in the
16224133Sdim *    documentation and/or other materials provided with the distribution.
17224133Sdim * 3. All advertising materials mentioning features or use of this software
18224133Sdim *    must display the following acknowledgement:
19224133Sdim *	This product includes software developed by the University of
20224133Sdim *	California, Berkeley and its contributors.
21224133Sdim * 4. Neither the name of the University nor the names of its contributors
22224133Sdim *    may be used to endorse or promote products derived from this software
23224133Sdim *    without specific prior written permission.
24224133Sdim *
25224133Sdim * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
26224133Sdim * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
27224133Sdim * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
28224133Sdim * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
29224133Sdim * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
30224133Sdim * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
31224133Sdim * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
32224133Sdim * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
33224133Sdim * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
34224133Sdim * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
35224133Sdim * SUCH DAMAGE.
36224133Sdim *
37243830Sdim *	from: @(#)machdep.c	7.4 (Berkeley) 6/3/91
38243830Sdim *	$Id: machdep.c,v 1.44 1994/06/04 11:01:15 davidg Exp $
39243830Sdim */
40243830Sdim
41224133Sdim#include "npx.h"
42234353Sdim#include "isa.h"
43234353Sdim
44224133Sdim#include <sys/param.h>
45224133Sdim#include <sys/systm.h>
46224133Sdim#include <sys/signalvar.h>
47249423Sdim#include <sys/kernel.h>
48224133Sdim#include <sys/map.h>
49224133Sdim#include <sys/proc.h>
50224133Sdim#include <sys/user.h>
51224133Sdim#include <sys/buf.h>
52224133Sdim#include <sys/reboot.h>
53224133Sdim#include <sys/conf.h>
54224133Sdim#include <sys/file.h>
55224133Sdim#include <sys/callout.h>
56224133Sdim#include <sys/malloc.h>
57224133Sdim#include <sys/mbuf.h>
58224133Sdim#include <sys/msgbuf.h>
59224133Sdim#include <sys/ioctl.h>
60224133Sdim#include <sys/tty.h>
61224133Sdim#include <sys/sysctl.h>
62224133Sdim
63224133Sdim#ifdef SYSVSHM
64243830Sdim#include "sys/shm.h"
65224133Sdim#endif
66224133Sdim
67224133Sdim#ifdef SYSVMSG
68266715Sdim#include "msg.h"
69266715Sdim#endif
70266715Sdim
71266715Sdim#ifdef SYSVSEM
72266715Sdim#include "sem.h"
73266715Sdim#endif
74266715Sdim
75266715Sdim#include "vm/vm.h"
76266715Sdim#include "vm/vm_kern.h"
77266715Sdim#include "vm/vm_page.h"
78266715Sdim
79224133Sdim#include "sys/exec.h"
80224133Sdim#include "sys/vnode.h"
81224133Sdim
82224133Sdimextern vm_offset_t avail_start, avail_end;
83224133Sdim
84224133Sdim#include "machine/cpu.h"
85234353Sdim#include "machine/reg.h"
86234353Sdim#include "machine/psl.h"
87224133Sdim#include "machine/specialreg.h"
88249423Sdim#include "machine/sysarch.h"
89249423Sdim#include "machine/cons.h"
90224133Sdim
91249423Sdim#include "i386/isa/isa.h"
92249423Sdim#include "i386/isa/rtc.h"
93249423Sdim
94249423Sdimstatic void identifycpu(void);
95249423Sdimstatic void initcpu(void);
96249423Sdimstatic int test_page(int *, int);
97249423Sdim
98249423Sdimextern int grow(struct proc *,u_int);
99224133Sdimconst char machine[] = "PC-Class";
100224133Sdimconst char *cpu_model;
101224133Sdim
102224133Sdim#ifndef PANIC_REBOOT_WAIT_TIME
103224133Sdim#define PANIC_REBOOT_WAIT_TIME 15 /* default to 15 seconds */
104224133Sdim#endif
105224133Sdim
106224133Sdim/*
107224133Sdim * Declare these as initialized data so we can patch them.
108224133Sdim */
109224133Sdimint	nswbuf = 0;
110224133Sdim#ifdef	NBUF
111224133Sdimint	nbuf = NBUF;
112224133Sdim#else
113224133Sdimint	nbuf = 0;
114239462Sdim#endif
115239462Sdim#ifdef	BUFPAGES
116239462Sdimint	bufpages = BUFPAGES;
117239462Sdim#else
118224133Sdimint	bufpages = 0;
119224133Sdim#endif
120224133Sdim#ifdef BOUNCEPAGES
121224133Sdimint	bouncepages = BOUNCEPAGES;
122224133Sdim#else
123224133Sdimint	bouncepages = 0;
124224133Sdim#endif
125224133Sdimint	msgbufmapped = 0;		/* set when safe to use msgbuf */
126224133Sdimextern int freebufspace;
127224133Sdimextern char *bouncememory;
128224133Sdim
129224133Sdimint _udatasel, _ucodesel;
130224133Sdim
131224133Sdim/*
132224133Sdim * Machine-dependent startup code
133224133Sdim */
134224133Sdimint boothowto = 0, Maxmem = 0, badpages = 0, physmem = 0;
135224133Sdimlong dumplo;
136224133Sdimextern int bootdev;
137224133Sdimint biosmem;
138234353Sdim
139234353Sdimvm_offset_t	phys_avail[6];
140234353Sdim
141224133Sdimextern cyloffset;
142224133Sdim
143224133Sdimint cpu_class;
144224133Sdim
145224133Sdimvoid dumpsys __P((void));
146224133Sdimvm_offset_t buffer_sva, buffer_eva;
147224133Sdimvm_offset_t clean_sva, clean_eva;
148224133Sdimvm_offset_t pager_sva, pager_eva;
149263508Sdimint maxbkva, pager_map_size;
150263508Sdim
151224133Sdim#define offsetof(type, member)	((size_t)(&((type *)0)->member))
152234353Sdim
153234353Sdimvoid
154234353Sdimcpu_startup()
155234353Sdim{
156234353Sdim	register int unixsize;
157234353Sdim	register unsigned i;
158234353Sdim	register struct pte *pte;
159234353Sdim	int mapaddr, j;
160234353Sdim	register caddr_t v;
161234353Sdim	int maxbufs, base, residual;
162234353Sdim	extern long Usrptsize;
163234353Sdim	vm_offset_t minaddr, maxaddr;
164234353Sdim	vm_size_t size = 0;
165234353Sdim	int firstaddr;
166234353Sdim
167234353Sdim	/*
168	 * Initialize error message buffer (at end of core).
169	 */
170
171	/* avail_end was pre-decremented in init_386() to compensate */
172	for (i = 0; i < btoc(sizeof (struct msgbuf)); i++)
173		pmap_enter(pmap_kernel(), (vm_offset_t)msgbufp,
174			   avail_end + i * NBPG,
175			   VM_PROT_ALL, TRUE);
176	msgbufmapped = 1;
177
178	/*
179	 * Good {morning,afternoon,evening,night}.
180	 */
181	printf(version);
182	identifycpu();
183	printf("real memory  = %d (%d pages)\n", ptoa(physmem), physmem);
184	if (badpages)
185		printf("bad memory   = %d (%d pages)\n", ptoa(badpages), badpages);
186
187	/*
188	 * Allocate space for system data structures.
189	 * The first available kernel virtual address is in "v".
190	 * As pages of kernel virtual memory are allocated, "v" is incremented.
191	 * As pages of memory are allocated and cleared,
192	 * "firstaddr" is incremented.
193	 * An index into the kernel page table corresponding to the
194	 * virtual memory address maintained in "v" is kept in "mapaddr".
195	 */
196
197	/*
198	 * Make two passes.  The first pass calculates how much memory is
199	 * needed and allocates it.  The second pass assigns virtual
200	 * addresses to the various data structures.
201	 */
202	firstaddr = 0;
203again:
204	v = (caddr_t)firstaddr;
205
206#define	valloc(name, type, num) \
207	    (name) = (type *)v; v = (caddr_t)((name)+(num))
208#define	valloclim(name, type, num, lim) \
209	    (name) = (type *)v; v = (caddr_t)((lim) = ((name)+(num)))
210	valloc(callout, struct callout, ncallout);
211#ifdef SYSVSHM
212	valloc(shmsegs, struct shmid_ds, shminfo.shmmni);
213#endif
214#ifdef SYSVSEM
215	valloc(sema, struct semid_ds, seminfo.semmni);
216	valloc(sem, struct sem, seminfo.semmns);
217	/* This is pretty disgusting! */
218	valloc(semu, int, (seminfo.semmnu * seminfo.semusz) / sizeof(int));
219#endif
220#ifdef SYSVMSG
221	valloc(msgpool, char, msginfo.msgmax);
222	valloc(msgmaps, struct msgmap, msginfo.msgseg);
223	valloc(msghdrs, struct msg, msginfo.msgtql);
224	valloc(msqids, struct msqid_ds, msginfo.msgmni);
225#endif
226	/*
227	 * Determine how many buffers to allocate.
228	 * Use 20% of memory of memory beyond the first 2MB
229	 * Insure a minimum of 16 fs buffers.
230	 * We allocate 1/2 as many swap buffer headers as file i/o buffers.
231	 */
232	if (bufpages == 0)
233		bufpages = ((physmem << PGSHIFT) - 2048*1024) / NBPG / 5;
234	if (bufpages < 64)
235		bufpages = 64;
236
237	/*
238	 * We must still limit the maximum number of buffers to be no
239	 * more than 2/5's of the size of the kernal malloc region, this
240	 * will only take effect for machines with lots of memory
241	 */
242	bufpages = min(bufpages, (VM_KMEM_SIZE / NBPG) * 2 / 5);
243	if (nbuf == 0) {
244		nbuf = bufpages / 2;
245		if (nbuf < 32)
246			nbuf = 32;
247	}
248	freebufspace = bufpages * NBPG;
249	if (nswbuf == 0) {
250		nswbuf = (nbuf / 2) &~ 1;	/* force even */
251		if (nswbuf > 256)
252			nswbuf = 256;		/* sanity */
253	}
254	valloc(swbuf, struct buf, nswbuf);
255	valloc(buf, struct buf, nbuf);
256
257#ifndef NOBOUNCE
258	/*
259	 * If there is more than 16MB of memory, allocate some bounce buffers
260	 */
261	if (Maxmem > 4096) {
262		if (bouncepages == 0)
263			bouncepages = 96;	/* largest physio size + extra */
264		v = (caddr_t)((vm_offset_t)((vm_offset_t)v + PAGE_SIZE - 1) & ~(PAGE_SIZE - 1));
265		valloc(bouncememory, char, bouncepages * PAGE_SIZE);
266	}
267#endif
268
269	/*
270	 * End of first pass, size has been calculated so allocate memory
271	 */
272	if (firstaddr == 0) {
273		size = (vm_size_t)(v - firstaddr);
274		firstaddr = (int)kmem_alloc(kernel_map, round_page(size));
275		if (firstaddr == 0)
276			panic("startup: no room for tables");
277		goto again;
278	}
279
280	/*
281	 * End of second pass, addresses have been assigned
282	 */
283	if ((vm_size_t)(v - firstaddr) != size)
284		panic("startup: table size inconsistency");
285
286	clean_map = kmem_suballoc(kernel_map, &clean_sva, &clean_eva,
287			(nbuf*MAXBSIZE) + VM_PHYS_SIZE + maxbkva + pager_map_size, TRUE);
288
289	io_map = kmem_suballoc(clean_map, &minaddr, &maxaddr, maxbkva, FALSE);
290	pager_map = kmem_suballoc(clean_map, &pager_sva, &pager_eva,
291				  pager_map_size, TRUE);
292
293	buffer_map = kmem_suballoc(clean_map, &buffer_sva, &buffer_eva,
294				(nbuf * MAXBSIZE), TRUE);
295	/*
296	 * Allocate a submap for physio
297	 */
298	phys_map = kmem_suballoc(clean_map, &minaddr, &maxaddr,
299				 VM_PHYS_SIZE, TRUE);
300
301	/*
302	 * Finally, allocate mbuf pool.  Since mclrefcnt is an off-size
303	 * we use the more space efficient malloc in place of kmem_alloc.
304	 */
305	mclrefcnt = (char *)malloc(NMBCLUSTERS+CLBYTES/MCLBYTES,
306				   M_MBUF, M_NOWAIT);
307	bzero(mclrefcnt, NMBCLUSTERS+CLBYTES/MCLBYTES);
308	mb_map = kmem_suballoc(kmem_map, (vm_offset_t *)&mbutl, &maxaddr,
309			       VM_MBUF_SIZE, FALSE);
310	/*
311	 * Initialize callouts
312	 */
313	callfree = callout;
314	for (i = 1; i < ncallout; i++)
315		callout[i-1].c_next = &callout[i];
316
317	printf("avail memory = %d (%d pages)\n", ptoa(cnt.v_free_count), cnt.v_free_count);
318	printf("using %d buffers containing %d bytes of memory\n",
319		nbuf, bufpages * CLBYTES);
320
321#ifndef NOBOUNCE
322	/*
323	 * init bounce buffers
324	 */
325	vm_bounce_init();
326#endif
327
328	/*
329	 * Set up CPU-specific registers, cache, etc.
330	 */
331	initcpu();
332
333	/*
334	 * Set up buffers, so they can be used to read disk labels.
335	 */
336	bufinit();
337
338	/*
339	 * Configure the system.
340	 */
341	configure();
342}
343
344
345struct cpu_nameclass i386_cpus[] = {
346	{ "Intel 80286",	CPUCLASS_286 },		/* CPU_286   */
347	{ "i386SX",		CPUCLASS_386 },		/* CPU_386SX */
348	{ "i386DX",		CPUCLASS_386 },		/* CPU_386   */
349	{ "i486SX",		CPUCLASS_486 },		/* CPU_486SX */
350	{ "i486DX",		CPUCLASS_486 },		/* CPU_486   */
351	{ "i586",		CPUCLASS_586 },		/* CPU_586   */
352};
353
354static void
355identifycpu()
356{
357	printf("CPU: ");
358	if (cpu >= 0 && cpu < (sizeof i386_cpus/sizeof(struct cpu_nameclass))) {
359		printf("%s", i386_cpus[cpu].cpu_name);
360		cpu_class = i386_cpus[cpu].cpu_class;
361		cpu_model = i386_cpus[cpu].cpu_name;
362	} else {
363		printf("unknown cpu type %d\n", cpu);
364		panic("startup: bad cpu id");
365	}
366	printf(" (");
367	switch(cpu_class) {
368	case CPUCLASS_286:
369		printf("286");
370		break;
371	case CPUCLASS_386:
372		printf("386");
373		break;
374	case CPUCLASS_486:
375		printf("486");
376		break;
377	case CPUCLASS_586:
378		printf("586");
379		break;
380	default:
381		printf("unknown");	/* will panic below... */
382	}
383	printf("-class CPU)");
384	printf("\n");	/* cpu speed would be nice, but how? */
385
386	/*
387	 * Now that we have told the user what they have,
388	 * let them know if that machine type isn't configured.
389	 */
390	switch (cpu_class) {
391	case CPUCLASS_286:	/* a 286 should not make it this far, anyway */
392#if !defined(I386_CPU) && !defined(I486_CPU) && !defined(I586_CPU)
393#error This kernel is not configured for one of the supported CPUs
394#endif
395#if !defined(I386_CPU)
396	case CPUCLASS_386:
397#endif
398#if !defined(I486_CPU)
399	case CPUCLASS_486:
400#endif
401#if !defined(I586_CPU)
402	case CPUCLASS_586:
403#endif
404		panic("CPU class not configured");
405	default:
406		break;
407	}
408}
409
410#ifdef PGINPROF
411/*
412 * Return the difference (in microseconds)
413 * between the  current time and a previous
414 * time as represented  by the arguments.
415 * If there is a pending clock interrupt
416 * which has not been serviced due to high
417 * ipl, return error code.
418 */
419/*ARGSUSED*/
420vmtime(otime, olbolt, oicr)
421	register int otime, olbolt, oicr;
422{
423
424	return (((time.tv_sec-otime)*60 + lbolt-olbolt)*16667);
425}
426#endif
427
428extern int kstack[];
429
430/*
431 * Send an interrupt to process.
432 *
433 * Stack is set up to allow sigcode stored
434 * in u. to call routine, followed by kcall
435 * to sigreturn routine below.  After sigreturn
436 * resets the signal mask, the stack, and the
437 * frame pointer, it returns to the user
438 * specified pc, psl.
439 */
440void
441sendsig(catcher, sig, mask, code)
442	sig_t catcher;
443	int sig, mask;
444	unsigned code;
445{
446	register struct proc *p = curproc;
447	register int *regs;
448	register struct sigframe *fp;
449	struct sigacts *psp = p->p_sigacts;
450	int oonstack, frmtrap;
451
452	regs = p->p_md.md_regs;
453        oonstack = psp->ps_sigstk.ss_flags & SA_ONSTACK;
454	/*
455	 * Allocate and validate space for the signal handler
456	 * context. Note that if the stack is in P0 space, the
457	 * call to grow() is a nop, and the useracc() check
458	 * will fail if the process has not already allocated
459	 * the space with a `brk'.
460	 */
461        if ((psp->ps_flags & SAS_ALTSTACK) &&
462	    (psp->ps_sigstk.ss_flags & SA_ONSTACK) == 0 &&
463	    (psp->ps_sigonstack & sigmask(sig))) {
464		fp = (struct sigframe *)(psp->ps_sigstk.ss_base +
465		    psp->ps_sigstk.ss_size - sizeof(struct sigframe));
466		psp->ps_sigstk.ss_flags |= SA_ONSTACK;
467	} else {
468		fp = (struct sigframe *)(regs[tESP]
469			- sizeof(struct sigframe));
470	}
471
472	/*
473	 * grow() will return FALSE if the fp will not fit inside the stack
474	 *	and the stack can not be grown. useracc will return FALSE
475	 *	if access is denied.
476	 */
477	if ((grow(p, (int)fp) == FALSE) ||
478	    (useracc((caddr_t)fp, sizeof (struct sigframe), B_WRITE) == FALSE)) {
479		/*
480		 * Process has trashed its stack; give it an illegal
481		 * instruction to halt it in its tracks.
482		 */
483		SIGACTION(p, SIGILL) = SIG_DFL;
484		sig = sigmask(SIGILL);
485		p->p_sigignore &= ~sig;
486		p->p_sigcatch &= ~sig;
487		p->p_sigmask &= ~sig;
488		psignal(p, SIGILL);
489		return;
490	}
491
492	/*
493	 * Build the argument list for the signal handler.
494	 */
495	fp->sf_signum = sig;
496	fp->sf_code = code;
497	fp->sf_scp = &fp->sf_sc;
498	fp->sf_addr = (char *) regs[tERR];
499	fp->sf_handler = catcher;
500
501	/* save scratch registers */
502	fp->sf_sc.sc_eax = regs[tEAX];
503	fp->sf_sc.sc_ebx = regs[tEBX];
504	fp->sf_sc.sc_ecx = regs[tECX];
505	fp->sf_sc.sc_edx = regs[tEDX];
506	fp->sf_sc.sc_esi = regs[tESI];
507	fp->sf_sc.sc_edi = regs[tEDI];
508	fp->sf_sc.sc_cs = regs[tCS];
509	fp->sf_sc.sc_ds = regs[tDS];
510	fp->sf_sc.sc_ss = regs[tSS];
511	fp->sf_sc.sc_es = regs[tES];
512	fp->sf_sc.sc_isp = regs[tISP];
513
514	/*
515	 * Build the signal context to be used by sigreturn.
516	 */
517	fp->sf_sc.sc_onstack = oonstack;
518	fp->sf_sc.sc_mask = mask;
519	fp->sf_sc.sc_sp = regs[tESP];
520	fp->sf_sc.sc_fp = regs[tEBP];
521	fp->sf_sc.sc_pc = regs[tEIP];
522	fp->sf_sc.sc_ps = regs[tEFLAGS];
523	regs[tESP] = (int)fp;
524	regs[tEIP] = (int)((struct pcb *)kstack)->pcb_sigc;
525	regs[tEFLAGS] &= ~PSL_VM;
526	regs[tCS] = _ucodesel;
527	regs[tDS] = _udatasel;
528	regs[tES] = _udatasel;
529	regs[tSS] = _udatasel;
530}
531
532/*
533 * System call to cleanup state after a signal
534 * has been taken.  Reset signal mask and
535 * stack state from context left by sendsig (above).
536 * Return to previous pc and psl as specified by
537 * context left by sendsig. Check carefully to
538 * make sure that the user has not modified the
539 * psl to gain improper privileges or to cause
540 * a machine fault.
541 */
542struct sigreturn_args {
543	struct sigcontext *sigcntxp;
544};
545
546int
547sigreturn(p, uap, retval)
548	struct proc *p;
549	struct sigreturn_args *uap;
550	int *retval;
551{
552	register struct sigcontext *scp;
553	register struct sigframe *fp;
554	register int *regs = p->p_md.md_regs;
555	int eflags;
556
557	/*
558	 * (XXX old comment) regs[tESP] points to the return address.
559	 * The user scp pointer is above that.
560	 * The return address is faked in the signal trampoline code
561	 * for consistency.
562	 */
563	scp = uap->sigcntxp;
564	fp = (struct sigframe *)
565	     ((caddr_t)scp - offsetof(struct sigframe, sf_sc));
566
567	if (useracc((caddr_t)fp, sizeof (*fp), 0) == 0)
568		return(EINVAL);
569
570	eflags = scp->sc_ps;
571	if ((eflags & PSL_USERCLR) != 0 ||
572	    (eflags & PSL_USERSET) != PSL_USERSET ||
573	    (eflags & PSL_IOPL) < (regs[tEFLAGS] & PSL_IOPL)) {
574#ifdef DEBUG
575    		printf("sigreturn:  eflags=0x%x\n", eflags);
576#endif
577    		return(EINVAL);
578	}
579
580	/*
581	 * Sanity check the user's selectors and error if they
582	 * are suspect.
583	 */
584#define max_ldt_sel(pcb) \
585	((pcb)->pcb_ldt ? (pcb)->pcb_ldt_len : (sizeof(ldt) / sizeof(ldt[0])))
586
587#define valid_ldt_sel(sel) \
588	(ISLDT(sel) && ISPL(sel) == SEL_UPL && \
589	 IDXSEL(sel) < max_ldt_sel(&p->p_addr->u_pcb))
590
591#define null_sel(sel) \
592	(!ISLDT(sel) && IDXSEL(sel) == 0)
593
594	if ((scp->sc_cs&0xffff != _ucodesel && !valid_ldt_sel(scp->sc_cs)) ||
595	    (scp->sc_ss&0xffff != _udatasel && !valid_ldt_sel(scp->sc_ss)) ||
596	    (scp->sc_ds&0xffff != _udatasel && !valid_ldt_sel(scp->sc_ds) &&
597	     !null_sel(scp->sc_ds)) ||
598	    (scp->sc_es&0xffff != _udatasel && !valid_ldt_sel(scp->sc_es) &&
599	     !null_sel(scp->sc_es))) {
600#ifdef DEBUG
601    		printf("sigreturn:  cs=0x%x ss=0x%x ds=0x%x es=0x%x\n",
602			scp->sc_cs, scp->sc_ss, scp->sc_ds, scp->sc_es);
603#endif
604		trapsignal(p, SIGBUS, T_PROTFLT);
605		return(EINVAL);
606	}
607
608#undef max_ldt_sel
609#undef valid_ldt_sel
610#undef null_sel
611
612	/* restore scratch registers */
613	regs[tEAX] = scp->sc_eax;
614	regs[tEBX] = scp->sc_ebx;
615	regs[tECX] = scp->sc_ecx;
616	regs[tEDX] = scp->sc_edx;
617	regs[tESI] = scp->sc_esi;
618	regs[tEDI] = scp->sc_edi;
619	regs[tCS] = scp->sc_cs;
620	regs[tDS] = scp->sc_ds;
621	regs[tES] = scp->sc_es;
622	regs[tSS] = scp->sc_ss;
623	regs[tISP] = scp->sc_isp;
624
625	if (useracc((caddr_t)scp, sizeof (*scp), 0) == 0)
626		return(EINVAL);
627
628	if (scp->sc_onstack & 01)
629		p->p_sigacts->ps_sigstk.ss_flags |= SA_ONSTACK;
630	else
631		p->p_sigacts->ps_sigstk.ss_flags &= ~SA_ONSTACK;
632	p->p_sigmask = scp->sc_mask &~
633	    (sigmask(SIGKILL)|sigmask(SIGCONT)|sigmask(SIGSTOP));
634	regs[tEBP] = scp->sc_fp;
635	regs[tESP] = scp->sc_sp;
636	regs[tEIP] = scp->sc_pc;
637	regs[tEFLAGS] = eflags;
638	return(EJUSTRETURN);
639}
640
641/*
642 * a simple function to make the system panic (and dump a vmcore)
643 * in a predictable fashion
644 */
645void diediedie()
646{
647	panic("because you said to!");
648}
649
650int	waittime = -1;
651struct pcb dumppcb;
652
653void
654boot(arghowto)
655	int arghowto;
656{
657	register long dummy;		/* r12 is reserved */
658	register int howto;		/* r11 == how to boot */
659	register int devtype;		/* r10 == major of root dev */
660	extern int cold;
661	int nomsg = 1;
662
663	if (cold) {
664		printf("hit reset please");
665		for(;;);
666	}
667	howto = arghowto;
668	if ((howto&RB_NOSYNC) == 0 && waittime < 0) {
669		register struct buf *bp;
670		int iter, nbusy;
671
672		waittime = 0;
673		(void) splnet();
674		printf("syncing disks... ");
675		/*
676		 * Release inodes held by texts before update.
677		 */
678		if (panicstr == 0)
679			vnode_pager_umount(NULL);
680		sync(curproc, NULL, NULL);
681		/*
682		 * Unmount filesystems
683		 */
684#if 0
685		if (panicstr == 0)
686			vfs_unmountall();
687#endif
688
689		for (iter = 0; iter < 20; iter++) {
690			nbusy = 0;
691			for (bp = &buf[nbuf]; --bp >= buf; )
692				if ((bp->b_flags & (B_BUSY|B_INVAL)) == B_BUSY)
693					nbusy++;
694			if (nbusy == 0)
695				break;
696			if (nomsg) {
697				printf("updating disks before rebooting... ");
698				nomsg = 0;
699			}
700			printf("%d ", nbusy);
701			DELAY(40000 * iter);
702		}
703		if (nbusy)
704			printf("giving up\n");
705		else
706			printf("done\n");
707		DELAY(10000);			/* wait for printf to finish */
708	}
709	splhigh();
710	devtype = major(rootdev);
711	if (howto&RB_HALT) {
712		printf("\n");
713		printf("The operating system has halted.\n");
714		printf("Please press any key to reboot.\n\n");
715		cngetc();
716	} else {
717		if (howto & RB_DUMP) {
718			savectx(&dumppcb, 0);
719			dumppcb.pcb_ptd = rcr3();
720			dumpsys();
721
722			if (PANIC_REBOOT_WAIT_TIME != 0) {
723				if (PANIC_REBOOT_WAIT_TIME != -1) {
724					int loop;
725					printf("Automatic reboot in %d seconds - press a key on the console to abort\n",
726						PANIC_REBOOT_WAIT_TIME);
727					for (loop = PANIC_REBOOT_WAIT_TIME; loop > 0; --loop) {
728						DELAY(1000 * 1000); /* one second */
729						if (sgetc(1)) /* Did user type a key? */
730							break;
731					}
732					if (!loop)
733						goto die;
734				}
735			} else { /* zero time specified - reboot NOW */
736				goto die;
737			}
738			printf("--> Press a key on the console to reboot <--\n");
739			cngetc();
740		}
741	}
742#ifdef lint
743	dummy = 0; dummy = dummy;
744	printf("howto %d, devtype %d\n", arghowto, devtype);
745#endif
746die:
747	printf("Rebooting...\n");
748	DELAY(1000000);	/* wait 1 sec for printf's to complete and be read */
749	cpu_reset();
750	for(;;) ;
751	/* NOTREACHED */
752}
753
754unsigned long	dumpmag = 0x8fca0101UL;	/* magic number for savecore */
755int		dumpsize = 0;		/* also for savecore */
756/*
757 * Doadump comes here after turning off memory management and
758 * getting on the dump stack, either when called above, or by
759 * the auto-restart code.
760 */
761void
762dumpsys()
763{
764
765	if (dumpdev == NODEV)
766		return;
767	if ((minor(dumpdev)&07) != 1)
768		return;
769	dumpsize = Maxmem;
770	printf("\ndumping to dev %x, offset %d\n", dumpdev, dumplo);
771	printf("dump ");
772	switch ((*bdevsw[major(dumpdev)].d_dump)(dumpdev)) {
773
774	case ENXIO:
775		printf("device bad\n");
776		break;
777
778	case EFAULT:
779		printf("device not ready\n");
780		break;
781
782	case EINVAL:
783		printf("area improper\n");
784		break;
785
786	case EIO:
787		printf("i/o error\n");
788		break;
789
790	case EINTR:
791		printf("aborted from console\n");
792		break;
793
794	default:
795		printf("succeeded\n");
796		break;
797	}
798}
799
800#ifdef HZ
801/*
802 * If HZ is defined we use this code, otherwise the code in
803 * /sys/i386/i386/microtime.s is used.  The othercode only works
804 * for HZ=100.
805 */
806microtime(tvp)
807	register struct timeval *tvp;
808{
809	int s = splhigh();
810
811	*tvp = time;
812	tvp->tv_usec += tick;
813	while (tvp->tv_usec > 1000000) {
814		tvp->tv_sec++;
815		tvp->tv_usec -= 1000000;
816	}
817	splx(s);
818}
819#endif /* HZ */
820
821static void
822initcpu()
823{
824}
825
826/*
827 * Clear registers on exec
828 */
829void
830setregs(p, entry, stack)
831	struct proc *p;
832	u_long entry;
833	u_long stack;
834{
835	p->p_md.md_regs[tEBP] = 0;	/* bottom of the fp chain */
836	p->p_md.md_regs[tEIP] = entry;
837	p->p_md.md_regs[tESP] = stack;
838	p->p_md.md_regs[tSS] = _udatasel;
839	p->p_md.md_regs[tDS] = _udatasel;
840	p->p_md.md_regs[tES] = _udatasel;
841	p->p_md.md_regs[tCS] = _ucodesel;
842
843	p->p_addr->u_pcb.pcb_flags = 0;	/* no fp at all */
844	load_cr0(rcr0() | CR0_TS);	/* start emulating */
845#if	NNPX > 0
846	npxinit(__INITIAL_NPXCW__);
847#endif	/* NNPX > 0 */
848}
849
850/*
851 * machine dependent system variables.
852 */
853int
854cpu_sysctl(name, namelen, oldp, oldlenp, newp, newlen, p)
855	int *name;
856	u_int namelen;
857	void *oldp;
858	size_t *oldlenp;
859	void *newp;
860	size_t newlen;
861	struct proc *p;
862{
863
864	/* all sysctl names at this level are terminal */
865	if (namelen != 1)
866		return (ENOTDIR);               /* overloaded */
867
868	switch (name[0]) {
869	case CPU_CONSDEV:
870		return (sysctl_rdstruct(oldp, oldlenp, newp, &cn_tty->t_dev,
871		   sizeof cn_tty->t_dev));
872	default:
873		return (EOPNOTSUPP);
874	}
875	/* NOTREACHED */
876}
877
878/*
879 * Initialize 386 and configure to run kernel
880 */
881
882/*
883 * Initialize segments & interrupt table
884 */
885
886union descriptor gdt[NGDT];
887union descriptor ldt[NLDT];		/* local descriptor table */
888struct gate_descriptor idt[NIDT];	/* interrupt descriptor table */
889
890int _default_ldt, currentldt;
891
892struct	i386tss	tss, panic_tss;
893
894extern  struct user *proc0paddr;
895
896/* software prototypes -- in more palatable form */
897struct soft_segment_descriptor gdt_segs[] = {
898	/* Null Descriptor */
899{	0x0,			/* segment base address  */
900	0x0,			/* length */
901	0,			/* segment type */
902	0,			/* segment descriptor priority level */
903	0,			/* segment descriptor present */
904	0, 0,
905	0,			/* default 32 vs 16 bit size */
906	0  			/* limit granularity (byte/page units)*/ },
907	/* Code Descriptor for kernel */
908{	0x0,			/* segment base address  */
909	0xfffff,		/* length - all address space */
910	SDT_MEMERA,		/* segment type */
911	0,			/* segment descriptor priority level */
912	1,			/* segment descriptor present */
913	0, 0,
914	1,			/* default 32 vs 16 bit size */
915	1  			/* limit granularity (byte/page units)*/ },
916	/* Data Descriptor for kernel */
917{	0x0,			/* segment base address  */
918	0xfffff,		/* length - all address space */
919	SDT_MEMRWA,		/* segment type */
920	0,			/* segment descriptor priority level */
921	1,			/* segment descriptor present */
922	0, 0,
923	1,			/* default 32 vs 16 bit size */
924	1  			/* limit granularity (byte/page units)*/ },
925	/* LDT Descriptor */
926{	(int) ldt,			/* segment base address  */
927	sizeof(ldt)-1,		/* length - all address space */
928	SDT_SYSLDT,		/* segment type */
929	0,			/* segment descriptor priority level */
930	1,			/* segment descriptor present */
931	0, 0,
932	0,			/* unused - default 32 vs 16 bit size */
933	0  			/* limit granularity (byte/page units)*/ },
934	/* Null Descriptor - Placeholder */
935{	0x0,			/* segment base address  */
936	0x0,			/* length - all address space */
937	0,			/* segment type */
938	0,			/* segment descriptor priority level */
939	0,			/* segment descriptor present */
940	0, 0,
941	0,			/* default 32 vs 16 bit size */
942	0  			/* limit granularity (byte/page units)*/ },
943	/* Panic Tss Descriptor */
944{	(int) &panic_tss,		/* segment base address  */
945	sizeof(tss)-1,		/* length - all address space */
946	SDT_SYS386TSS,		/* segment type */
947	0,			/* segment descriptor priority level */
948	1,			/* segment descriptor present */
949	0, 0,
950	0,			/* unused - default 32 vs 16 bit size */
951	0  			/* limit granularity (byte/page units)*/ },
952	/* Proc 0 Tss Descriptor */
953{	(int) kstack,			/* segment base address  */
954	sizeof(tss)-1,		/* length - all address space */
955	SDT_SYS386TSS,		/* segment type */
956	0,			/* segment descriptor priority level */
957	1,			/* segment descriptor present */
958	0, 0,
959	0,			/* unused - default 32 vs 16 bit size */
960	0  			/* limit granularity (byte/page units)*/ },
961	/* User LDT Descriptor per process */
962{	(int) ldt,			/* segment base address  */
963	(512 * sizeof(union descriptor)-1),		/* length */
964	SDT_SYSLDT,		/* segment type */
965	0,			/* segment descriptor priority level */
966	1,			/* segment descriptor present */
967	0, 0,
968	0,			/* unused - default 32 vs 16 bit size */
969	0  			/* limit granularity (byte/page units)*/ },
970};
971
972struct soft_segment_descriptor ldt_segs[] = {
973	/* Null Descriptor - overwritten by call gate */
974{	0x0,			/* segment base address  */
975	0x0,			/* length - all address space */
976	0,			/* segment type */
977	0,			/* segment descriptor priority level */
978	0,			/* segment descriptor present */
979	0, 0,
980	0,			/* default 32 vs 16 bit size */
981	0  			/* limit granularity (byte/page units)*/ },
982	/* Null Descriptor - overwritten by call gate */
983{	0x0,			/* segment base address  */
984	0x0,			/* length - all address space */
985	0,			/* segment type */
986	0,			/* segment descriptor priority level */
987	0,			/* segment descriptor present */
988	0, 0,
989	0,			/* default 32 vs 16 bit size */
990	0  			/* limit granularity (byte/page units)*/ },
991	/* Null Descriptor - overwritten by call gate */
992{	0x0,			/* segment base address  */
993	0x0,			/* length - all address space */
994	0,			/* segment type */
995	0,			/* segment descriptor priority level */
996	0,			/* segment descriptor present */
997	0, 0,
998	0,			/* default 32 vs 16 bit size */
999	0  			/* limit granularity (byte/page units)*/ },
1000	/* Code Descriptor for user */
1001{	0x0,			/* segment base address  */
1002	0xfffff,		/* length - all address space */
1003	SDT_MEMERA,		/* segment type */
1004	SEL_UPL,		/* segment descriptor priority level */
1005	1,			/* segment descriptor present */
1006	0, 0,
1007	1,			/* default 32 vs 16 bit size */
1008	1  			/* limit granularity (byte/page units)*/ },
1009	/* Data Descriptor for user */
1010{	0x0,			/* segment base address  */
1011	0xfffff,		/* length - all address space */
1012	SDT_MEMRWA,		/* segment type */
1013	SEL_UPL,		/* segment descriptor priority level */
1014	1,			/* segment descriptor present */
1015	0, 0,
1016	1,			/* default 32 vs 16 bit size */
1017	1  			/* limit granularity (byte/page units)*/ } };
1018
1019void
1020setidt(idx, func, typ, dpl)
1021	int idx;
1022	void (*func)();
1023	int typ;
1024	int dpl;
1025{
1026	struct gate_descriptor *ip = idt + idx;
1027
1028	ip->gd_looffset = (int)func;
1029	ip->gd_selector = 8;
1030	ip->gd_stkcpy = 0;
1031	ip->gd_xx = 0;
1032	ip->gd_type = typ;
1033	ip->gd_dpl = dpl;
1034	ip->gd_p = 1;
1035	ip->gd_hioffset = ((int)func)>>16 ;
1036}
1037
1038#define	IDTVEC(name)	__CONCAT(X,name)
1039typedef void idtvec_t();
1040
1041extern idtvec_t
1042	IDTVEC(div), IDTVEC(dbg), IDTVEC(nmi), IDTVEC(bpt), IDTVEC(ofl),
1043	IDTVEC(bnd), IDTVEC(ill), IDTVEC(dna), IDTVEC(dble), IDTVEC(fpusegm),
1044	IDTVEC(tss), IDTVEC(missing), IDTVEC(stk), IDTVEC(prot),
1045	IDTVEC(page), IDTVEC(rsvd), IDTVEC(fpu), IDTVEC(rsvd0),
1046	IDTVEC(rsvd1), IDTVEC(rsvd2), IDTVEC(rsvd3), IDTVEC(rsvd4),
1047	IDTVEC(rsvd5), IDTVEC(rsvd6), IDTVEC(rsvd7), IDTVEC(rsvd8),
1048	IDTVEC(rsvd9), IDTVEC(rsvd10), IDTVEC(rsvd11), IDTVEC(rsvd12),
1049	IDTVEC(rsvd13), IDTVEC(rsvd14), IDTVEC(syscall);
1050
1051int _gsel_tss;
1052
1053void
1054init386(first)
1055	int first;
1056{
1057	extern ssdtosd(), lgdt(), lidt(), lldt(), etext;
1058	int x, *pi;
1059	unsigned biosbasemem, biosextmem;
1060	struct gate_descriptor *gdp;
1061	extern int sigcode,szsigcode;
1062	/* table descriptors - used to load tables by microp */
1063	struct region_descriptor r_gdt, r_idt;
1064	int	pagesinbase, pagesinext;
1065	int	target_page;
1066	extern struct pte *CMAP1;
1067	extern caddr_t CADDR1;
1068
1069	proc0.p_addr = proc0paddr;
1070
1071	/*
1072	 * Initialize the console before we print anything out.
1073	 */
1074
1075	cninit ();
1076
1077	/*
1078	 * make gdt memory segments, the code segment goes up to end of the
1079	 * page with etext in it, the data segment goes to the end of
1080	 * the address space
1081	 */
1082	gdt_segs[GCODE_SEL].ssd_limit = i386_btop(i386_round_page(&etext)) - 1;
1083	gdt_segs[GDATA_SEL].ssd_limit = i386_btop(0) - 1;
1084	for (x=0; x < NGDT; x++) ssdtosd(gdt_segs+x, gdt+x);
1085
1086	/* make ldt memory segments */
1087	/*
1088	 * The data segment limit must not cover the user area because we
1089	 * don't want the user area to be writable in copyout() etc. (page
1090	 * level protection is lost in kernel mode on 386's).  Also, we
1091	 * don't want the user area to be writable directly (page level
1092	 * protection of the user area is not available on 486's with
1093	 * CR0_WP set, because there is no user-read/kernel-write mode).
1094	 *
1095	 * XXX - VM_MAXUSER_ADDRESS is an end address, not a max.  And it
1096	 * should be spelled ...MAX_USER...
1097	 */
1098#define VM_END_USER_RW_ADDRESS	VM_MAXUSER_ADDRESS
1099	/*
1100	 * The code segment limit has to cover the user area until we move
1101	 * the signal trampoline out of the user area.  This is safe because
1102	 * the code segment cannot be written to directly.
1103	 */
1104#define VM_END_USER_R_ADDRESS	(VM_END_USER_RW_ADDRESS + UPAGES * NBPG)
1105	ldt_segs[LUCODE_SEL].ssd_limit = i386_btop(VM_END_USER_R_ADDRESS) - 1;
1106	ldt_segs[LUDATA_SEL].ssd_limit = i386_btop(VM_END_USER_RW_ADDRESS) - 1;
1107	/* Note. eventually want private ldts per process */
1108	for (x=0; x < 5; x++) ssdtosd(ldt_segs+x, ldt+x);
1109
1110	/* exceptions */
1111	setidt(0, &IDTVEC(div),  SDT_SYS386TGT, SEL_KPL);
1112	setidt(1, &IDTVEC(dbg),  SDT_SYS386TGT, SEL_KPL);
1113	setidt(2, &IDTVEC(nmi),  SDT_SYS386TGT, SEL_KPL);
1114 	setidt(3, &IDTVEC(bpt),  SDT_SYS386TGT, SEL_UPL);
1115	setidt(4, &IDTVEC(ofl),  SDT_SYS386TGT, SEL_UPL);
1116	setidt(5, &IDTVEC(bnd),  SDT_SYS386TGT, SEL_KPL);
1117	setidt(6, &IDTVEC(ill),  SDT_SYS386TGT, SEL_KPL);
1118	setidt(7, &IDTVEC(dna),  SDT_SYS386TGT, SEL_KPL);
1119	setidt(8, &IDTVEC(dble),  SDT_SYS386TGT, SEL_KPL);
1120	setidt(9, &IDTVEC(fpusegm),  SDT_SYS386TGT, SEL_KPL);
1121	setidt(10, &IDTVEC(tss),  SDT_SYS386TGT, SEL_KPL);
1122	setidt(11, &IDTVEC(missing),  SDT_SYS386TGT, SEL_KPL);
1123	setidt(12, &IDTVEC(stk),  SDT_SYS386TGT, SEL_KPL);
1124	setidt(13, &IDTVEC(prot),  SDT_SYS386TGT, SEL_KPL);
1125	setidt(14, &IDTVEC(page),  SDT_SYS386TGT, SEL_KPL);
1126	setidt(15, &IDTVEC(rsvd),  SDT_SYS386TGT, SEL_KPL);
1127	setidt(16, &IDTVEC(fpu),  SDT_SYS386TGT, SEL_KPL);
1128	setidt(17, &IDTVEC(rsvd0),  SDT_SYS386TGT, SEL_KPL);
1129	setidt(18, &IDTVEC(rsvd1),  SDT_SYS386TGT, SEL_KPL);
1130	setidt(19, &IDTVEC(rsvd2),  SDT_SYS386TGT, SEL_KPL);
1131	setidt(20, &IDTVEC(rsvd3),  SDT_SYS386TGT, SEL_KPL);
1132	setidt(21, &IDTVEC(rsvd4),  SDT_SYS386TGT, SEL_KPL);
1133	setidt(22, &IDTVEC(rsvd5),  SDT_SYS386TGT, SEL_KPL);
1134	setidt(23, &IDTVEC(rsvd6),  SDT_SYS386TGT, SEL_KPL);
1135	setidt(24, &IDTVEC(rsvd7),  SDT_SYS386TGT, SEL_KPL);
1136	setidt(25, &IDTVEC(rsvd8),  SDT_SYS386TGT, SEL_KPL);
1137	setidt(26, &IDTVEC(rsvd9),  SDT_SYS386TGT, SEL_KPL);
1138	setidt(27, &IDTVEC(rsvd10),  SDT_SYS386TGT, SEL_KPL);
1139	setidt(28, &IDTVEC(rsvd11),  SDT_SYS386TGT, SEL_KPL);
1140	setidt(29, &IDTVEC(rsvd12),  SDT_SYS386TGT, SEL_KPL);
1141	setidt(30, &IDTVEC(rsvd13),  SDT_SYS386TGT, SEL_KPL);
1142	setidt(31, &IDTVEC(rsvd14),  SDT_SYS386TGT, SEL_KPL);
1143
1144#include	"isa.h"
1145#if	NISA >0
1146	isa_defaultirq();
1147#endif
1148
1149	r_gdt.rd_limit = sizeof(gdt) - 1;
1150	r_gdt.rd_base =  (int) gdt;
1151	lgdt(&r_gdt);
1152
1153	r_idt.rd_limit = sizeof(idt) - 1;
1154	r_idt.rd_base = (int) idt;
1155	lidt(&r_idt);
1156
1157	_default_ldt = GSEL(GLDT_SEL, SEL_KPL);
1158	lldt(_default_ldt);
1159	currentldt = _default_ldt;
1160
1161#include "ddb.h"
1162#if NDDB > 0
1163	kdb_init();
1164	if (boothowto & RB_KDB)
1165		Debugger("Boot flags requested debugger");
1166#endif
1167
1168	/* Use BIOS values stored in RTC CMOS RAM, since probing
1169	 * breaks certain 386 AT relics.
1170	 */
1171	biosbasemem = rtcin(RTC_BASELO)+ (rtcin(RTC_BASEHI)<<8);
1172	biosextmem = rtcin(RTC_EXTLO)+ (rtcin(RTC_EXTHI)<<8);
1173
1174	/*
1175	 * If BIOS tells us that it has more than 640k in the basemem,
1176	 *	don't believe it - set it to 640k.
1177	 */
1178	if (biosbasemem > 640)
1179		biosbasemem = 640;
1180
1181	/*
1182	 * Some 386 machines might give us a bogus number for extended
1183	 *	mem. If this happens, stop now.
1184	 */
1185#ifndef LARGEMEM
1186	if (biosextmem > 65536) {
1187		panic("extended memory beyond limit of 64MB");
1188		/* NOTREACHED */
1189	}
1190#endif
1191
1192	pagesinbase = biosbasemem * 1024 / NBPG;
1193	pagesinext = biosextmem * 1024 / NBPG;
1194
1195	/*
1196	 * Special hack for chipsets that still remap the 384k hole when
1197	 *	there's 16MB of memory - this really confuses people that
1198	 *	are trying to use bus mastering ISA controllers with the
1199	 *	"16MB limit"; they only have 16MB, but the remapping puts
1200	 *	them beyond the limit.
1201	 * XXX - this should be removed when bounce buffers are
1202	 *	implemented.
1203	 */
1204	/*
1205	 * If extended memory is between 15-16MB (16-17MB phys address range),
1206	 *	chop it to 15MB.
1207	 */
1208	if ((pagesinext > 3840) && (pagesinext < 4096))
1209		pagesinext = 3840;
1210
1211	/*
1212	 * Maxmem isn't the "maximum memory", it's the highest page of
1213	 * of the physical address space. It should be "Maxphyspage".
1214	 */
1215	Maxmem = pagesinext + 0x100000/PAGE_SIZE;
1216
1217#ifdef MAXMEM
1218	if (MAXMEM/4 < Maxmem)
1219		Maxmem = MAXMEM/4;
1220#endif
1221	/*
1222	 * Calculate number of physical pages, but account for Maxmem
1223	 *	limitation above.
1224	 */
1225	physmem = pagesinbase +
1226	    (min(pagesinext + 0x100000/PAGE_SIZE, Maxmem) - 0x100000/PAGE_SIZE);
1227
1228	/* call pmap initialization to make new kernel address space */
1229	pmap_bootstrap (first, 0);
1230
1231	/*
1232	 * Do simple memory test over range of extended memory that BIOS
1233	 *	indicates exists. Adjust Maxmem to the highest page of
1234	 *	good memory.
1235	 */
1236	printf("Testing memory (%dMB)...", ptoa(Maxmem)/1024/1024);
1237
1238	for (target_page = Maxmem - 1; target_page >= atop(first); target_page--) {
1239
1240		/*
1241		 * map page into kernel: valid, read/write, non-cacheable
1242		 */
1243		*(int *)CMAP1 = PG_V | PG_KW | PG_N | ptoa(target_page);
1244		tlbflush();
1245
1246		/*
1247		 * Test for alternating 1's and 0's
1248		 */
1249		filli(0xaaaaaaaa, CADDR1, PAGE_SIZE/sizeof(int));
1250		if (test_page((int *)CADDR1, 0xaaaaaaaa)) {
1251			Maxmem = target_page;
1252			badpages++;
1253			continue;
1254		}
1255		/*
1256		 * Test for alternating 0's and 1's
1257		 */
1258		filli(0x55555555, CADDR1, PAGE_SIZE/sizeof(int));
1259		if (test_page((int *)CADDR1, 0x55555555)) {
1260			Maxmem = target_page;
1261			badpages++;
1262			continue;
1263		}
1264		/*
1265		 * Test for all 1's
1266		 */
1267		filli(0xffffffff, CADDR1, PAGE_SIZE/sizeof(int));
1268		if (test_page((int *)CADDR1, 0xffffffff)) {
1269			Maxmem = target_page;
1270			badpages++;
1271			continue;
1272		}
1273		/*
1274		 * Test zeroing of page
1275		 */
1276		bzero(CADDR1, PAGE_SIZE);
1277		if (test_page((int *)CADDR1, 0)) {
1278			/*
1279			 * test of page failed
1280			 */
1281			Maxmem = target_page;
1282			badpages++;
1283			continue;
1284		}
1285	}
1286	printf("done.\n");
1287
1288	*(int *)CMAP1 = 0;
1289	tlbflush();
1290
1291	avail_end = (Maxmem << PAGE_SHIFT)
1292		    - i386_round_page(sizeof(struct msgbuf));
1293
1294	/*
1295	 * Initialize pointers to the two chunks of memory; for use
1296	 *	later in vm_page_startup.
1297	 */
1298	/* avail_start is initialized in pmap_bootstrap */
1299	x = 0;
1300	if (pagesinbase > 1) {
1301		phys_avail[x++] = NBPG;		/* skip first page of memory */
1302		phys_avail[x++] = pagesinbase * NBPG;	/* memory up to the ISA hole */
1303	}
1304	phys_avail[x++] = avail_start;	/* memory up to the end */
1305	phys_avail[x++] = avail_end;
1306	phys_avail[x++] = 0;		/* no more chunks */
1307	phys_avail[x++] = 0;
1308
1309	/* now running on new page tables, configured,and u/iom is accessible */
1310
1311	/* make a initial tss so microp can get interrupt stack on syscall! */
1312	proc0.p_addr->u_pcb.pcb_tss.tss_esp0 = (int) kstack + UPAGES*NBPG;
1313	proc0.p_addr->u_pcb.pcb_tss.tss_ss0 = GSEL(GDATA_SEL, SEL_KPL) ;
1314	_gsel_tss = GSEL(GPROC0_SEL, SEL_KPL);
1315
1316	((struct i386tss *)gdt_segs[GPROC0_SEL].ssd_base)->tss_ioopt =
1317		(sizeof(tss))<<16;
1318
1319	ltr(_gsel_tss);
1320
1321	/* make a call gate to reenter kernel with */
1322	gdp = &ldt[LSYS5CALLS_SEL].gd;
1323
1324	x = (int) &IDTVEC(syscall);
1325	gdp->gd_looffset = x++;
1326	gdp->gd_selector = GSEL(GCODE_SEL,SEL_KPL);
1327	gdp->gd_stkcpy = 1;
1328	gdp->gd_type = SDT_SYS386CGT;
1329	gdp->gd_dpl = SEL_UPL;
1330	gdp->gd_p = 1;
1331	gdp->gd_hioffset = ((int) &IDTVEC(syscall)) >>16;
1332
1333	/* transfer to user mode */
1334
1335	_ucodesel = LSEL(LUCODE_SEL, SEL_UPL);
1336	_udatasel = LSEL(LUDATA_SEL, SEL_UPL);
1337
1338	/* setup proc 0's pcb */
1339	bcopy(&sigcode, proc0.p_addr->u_pcb.pcb_sigc, szsigcode);
1340	proc0.p_addr->u_pcb.pcb_flags = 0;
1341	proc0.p_addr->u_pcb.pcb_ptd = IdlePTD;
1342}
1343
1344int
1345test_page(address, pattern)
1346	int *address;
1347	int pattern;
1348{
1349	int *x;
1350
1351	for (x = address; x < (int *)((char *)address + PAGE_SIZE); x++) {
1352		if (*x != pattern)
1353			return (1);
1354	}
1355	return(0);
1356}
1357
1358/*
1359 * insert an element into a queue
1360 */
1361#undef insque
1362void				/* XXX replace with inline FIXME! */
1363_insque(element, head)
1364	register struct prochd *element, *head;
1365{
1366	element->ph_link = head->ph_link;
1367	head->ph_link = (struct proc *)element;
1368	element->ph_rlink = (struct proc *)head;
1369	((struct prochd *)(element->ph_link))->ph_rlink=(struct proc *)element;
1370}
1371
1372/*
1373 * remove an element from a queue
1374 */
1375#undef remque
1376void				/* XXX replace with inline FIXME! */
1377_remque(element)
1378	register struct prochd *element;
1379{
1380	((struct prochd *)(element->ph_link))->ph_rlink = element->ph_rlink;
1381	((struct prochd *)(element->ph_rlink))->ph_link = element->ph_link;
1382	element->ph_rlink = (struct proc *)0;
1383}
1384
1385/*
1386 * The registers are in the frame; the frame is in the user area of
1387 * the process in question; when the process is active, the registers
1388 * are in "the kernel stack"; when it's not, they're still there, but
1389 * things get flipped around.  So, since p->p_md.md_regs is the whole address
1390 * of the register set, take its offset from the kernel stack, and
1391 * index into the user block.  Don't you just *love* virtual memory?
1392 * (I'm starting to think seymour is right...)
1393 */
1394
1395int
1396ptrace_set_pc (struct proc *p, unsigned int addr) {
1397	void *regs = (char*)p->p_addr +
1398		((char*) p->p_md.md_regs - (char*) kstack);
1399
1400	((struct trapframe *)regs)->tf_eip = addr;
1401	return 0;
1402}
1403
1404int
1405ptrace_single_step (struct proc *p) {
1406	void *regs = (char*)p->p_addr +
1407		((char*) p->p_md.md_regs - (char*) kstack);
1408
1409	((struct trapframe *)regs)->tf_eflags |= PSL_T;
1410	return 0;
1411}
1412
1413/*
1414 * Copy the registers to user-space.
1415 */
1416
1417int
1418ptrace_getregs (struct proc *p, unsigned int *addr) {
1419	int error;
1420	struct reg regs = {0};
1421
1422	if (error = fill_regs (p, &regs))
1423		return error;
1424
1425	return copyout (&regs, addr, sizeof (regs));
1426}
1427
1428int
1429ptrace_setregs (struct proc *p, unsigned int *addr) {
1430	int error;
1431	struct reg regs = {0};
1432
1433	if (error = copyin (addr, &regs, sizeof(regs)))
1434		return error;
1435
1436	return set_regs (p, &regs);
1437}
1438
1439int
1440fill_regs(struct proc *p, struct reg *regs) {
1441	int error;
1442	struct trapframe *tp;
1443	void *ptr = (char*)p->p_addr +
1444		((char*) p->p_md.md_regs - (char*) kstack);
1445
1446	tp = ptr;
1447	regs->r_es = tp->tf_es;
1448	regs->r_ds = tp->tf_ds;
1449	regs->r_edi = tp->tf_edi;
1450	regs->r_esi = tp->tf_esi;
1451	regs->r_ebp = tp->tf_ebp;
1452	regs->r_ebx = tp->tf_ebx;
1453	regs->r_edx = tp->tf_edx;
1454	regs->r_ecx = tp->tf_ecx;
1455	regs->r_eax = tp->tf_eax;
1456	regs->r_eip = tp->tf_eip;
1457	regs->r_cs = tp->tf_cs;
1458	regs->r_eflags = tp->tf_eflags;
1459	regs->r_esp = tp->tf_esp;
1460	regs->r_ss = tp->tf_ss;
1461	return 0;
1462}
1463
1464int
1465set_regs (struct proc *p, struct reg *regs) {
1466	int error;
1467	struct trapframe *tp;
1468	void *ptr = (char*)p->p_addr +
1469		((char*) p->p_md.md_regs - (char*) kstack);
1470
1471	tp = ptr;
1472	tp->tf_es = regs->r_es;
1473	tp->tf_ds = regs->r_ds;
1474	tp->tf_edi = regs->r_edi;
1475	tp->tf_esi = regs->r_esi;
1476	tp->tf_ebp = regs->r_ebp;
1477	tp->tf_ebx = regs->r_ebx;
1478	tp->tf_edx = regs->r_edx;
1479	tp->tf_ecx = regs->r_ecx;
1480	tp->tf_eax = regs->r_eax;
1481	tp->tf_eip = regs->r_eip;
1482	tp->tf_cs = regs->r_cs;
1483	tp->tf_eflags = regs->r_eflags;
1484	tp->tf_esp = regs->r_esp;
1485	tp->tf_ss = regs->r_ss;
1486	return 0;
1487}
1488
1489#include "ddb.h"
1490#if NDDB <= 0
1491void
1492Debugger(const char *msg)
1493{
1494	printf("Debugger(\"%s\") called.\n", msg);
1495}
1496#endif /* no DDB */
1497
1498#include <sys/disklabel.h>
1499#define b_cylin	b_resid
1500#define dkpart(dev)              (minor(dev) & 7)
1501/*
1502 * Determine the size of the transfer, and make sure it is
1503 * within the boundaries of the partition. Adjust transfer
1504 * if needed, and signal errors or early completion.
1505 */
1506int
1507bounds_check_with_label(struct buf *bp, struct disklabel *lp, int wlabel)
1508{
1509        struct partition *p = lp->d_partitions + dkpart(bp->b_dev);
1510        int labelsect = lp->d_partitions[0].p_offset;
1511        int maxsz = p->p_size,
1512                sz = (bp->b_bcount + DEV_BSIZE - 1) >> DEV_BSHIFT;
1513
1514        /* overwriting disk label ? */
1515        /* XXX should also protect bootstrap in first 8K */
1516        if (bp->b_blkno + p->p_offset <= LABELSECTOR + labelsect &&
1517#if LABELSECTOR != 0
1518            bp->b_blkno + p->p_offset + sz > LABELSECTOR + labelsect &&
1519#endif
1520            (bp->b_flags & B_READ) == 0 && wlabel == 0) {
1521                bp->b_error = EROFS;
1522                goto bad;
1523        }
1524
1525#if     defined(DOSBBSECTOR) && defined(notyet)
1526        /* overwriting master boot record? */
1527        if (bp->b_blkno + p->p_offset <= DOSBBSECTOR &&
1528            (bp->b_flags & B_READ) == 0 && wlabel == 0) {
1529                bp->b_error = EROFS;
1530                goto bad;
1531        }
1532#endif
1533
1534        /* beyond partition? */
1535        if (bp->b_blkno < 0 || bp->b_blkno + sz > maxsz) {
1536                /* if exactly at end of disk, return an EOF */
1537                if (bp->b_blkno == maxsz) {
1538                        bp->b_resid = bp->b_bcount;
1539                        return(0);
1540                }
1541                /* or truncate if part of it fits */
1542                sz = maxsz - bp->b_blkno;
1543                if (sz <= 0) {
1544                        bp->b_error = EINVAL;
1545                        goto bad;
1546                }
1547                bp->b_bcount = sz << DEV_BSHIFT;
1548        }
1549
1550        /* calculate cylinder for disksort to order transfers with */
1551        bp->b_pblkno = bp->b_blkno + p->p_offset;
1552        bp->b_cylin = bp->b_pblkno / lp->d_secpercyl;
1553        return(1);
1554
1555bad:
1556        bp->b_flags |= B_ERROR;
1557        return(-1);
1558}
1559
1560