machdep.c revision 3844
1/*-
2 * Copyright (c) 1992 Terrence R. Lambert.
3 * Copyright (c) 1982, 1987, 1990 The Regents of the University of California.
4 * All rights reserved.
5 *
6 * This code is derived from software contributed to Berkeley by
7 * William Jolitz.
8 *
9 * Redistribution and use in source and binary forms, with or without
10 * modification, are permitted provided that the following conditions
11 * are met:
12 * 1. Redistributions of source code must retain the above copyright
13 *    notice, this list of conditions and the following disclaimer.
14 * 2. Redistributions in binary form must reproduce the above copyright
15 *    notice, this list of conditions and the following disclaimer in the
16 *    documentation and/or other materials provided with the distribution.
17 * 3. All advertising materials mentioning features or use of this software
18 *    must display the following acknowledgement:
19 *	This product includes software developed by the University of
20 *	California, Berkeley and its contributors.
21 * 4. Neither the name of the University nor the names of its contributors
22 *    may be used to endorse or promote products derived from this software
23 *    without specific prior written permission.
24 *
25 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
26 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
27 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
28 * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
29 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
30 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
31 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
32 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
33 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
34 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
35 * SUCH DAMAGE.
36 *
37 *	from: @(#)machdep.c	7.4 (Berkeley) 6/3/91
38 *	$Id: machdep.c,v 1.78 1994/10/20 00:07:49 phk Exp $
39 */
40
41#include "npx.h"
42#include "isa.h"
43
44#include <sys/param.h>
45#include <sys/systm.h>
46#include <sys/signalvar.h>
47#include <sys/kernel.h>
48#include <sys/proc.h>
49#include <sys/user.h>
50#include <sys/buf.h>
51#include <sys/reboot.h>
52#include <sys/conf.h>
53#include <sys/file.h>
54#include <sys/callout.h>
55#include <sys/malloc.h>
56#include <sys/mbuf.h>
57#include <sys/msgbuf.h>
58#include <sys/ioctl.h>
59#include <sys/sysent.h>
60#include <sys/tty.h>
61#include <sys/sysctl.h>
62
63#ifdef SYSVSHM
64#include <sys/shm.h>
65#endif
66
67#ifdef SYSVMSG
68#include <sys/msg.h>
69#endif
70
71#ifdef SYSVSEM
72#include <sys/sem.h>
73#endif
74
75#include <vm/vm.h>
76#include <vm/vm_kern.h>
77#include <vm/vm_page.h>
78
79#include <sys/exec.h>
80#include <sys/vnode.h>
81
82#include <net/netisr.h>
83
84extern vm_offset_t avail_start, avail_end;
85
86#include <machine/cpu.h>
87#include <machine/reg.h>
88#include <machine/psl.h>
89#include <machine/specialreg.h>
90#include <machine/sysarch.h>
91#include <machine/cons.h>
92#include <machine/devconf.h>
93
94#include <i386/isa/isa.h>
95#include <i386/isa/rtc.h>
96#include <ether.h>
97
98static void identifycpu(void);
99static void initcpu(void);
100static int test_page(int *, int);
101
102char machine[] = "i386";
103char cpu_model[sizeof("Cy486DLC") + 1];
104
105#ifndef PANIC_REBOOT_WAIT_TIME
106#define PANIC_REBOOT_WAIT_TIME 15 /* default to 15 seconds */
107#endif
108
109/*
110 * Declare these as initialized data so we can patch them.
111 */
112int	nswbuf = 0;
113#ifdef	NBUF
114int	nbuf = NBUF;
115#else
116int	nbuf = 0;
117#endif
118#ifdef	BUFPAGES
119int	bufpages = BUFPAGES;
120#else
121int	bufpages = 0;
122#endif
123
124#ifdef BOUNCE_BUFFERS
125extern char *bouncememory;
126extern int maxbkva;
127#ifdef BOUNCEPAGES
128int	bouncepages = BOUNCEPAGES;
129#else
130int	bouncepages = 0;
131#endif
132#endif	/* BOUNCE_BUFFERS */
133
134extern int freebufspace;
135int	msgbufmapped = 0;		/* set when safe to use msgbuf */
136int _udatasel, _ucodesel;
137
138extern int adjkerntz, disable_rtc_set;	/* from	clock.c	*/
139
140/*
141 * Machine-dependent startup code
142 */
143int boothowto = 0, Maxmem = 0, badpages = 0, physmem = 0;
144long dumplo;
145extern int bootdev;
146int biosmem;
147
148vm_offset_t	phys_avail[6];
149
150int cpu_class;
151
152void dumpsys __P((void));
153vm_offset_t buffer_sva, buffer_eva;
154vm_offset_t clean_sva, clean_eva;
155vm_offset_t pager_sva, pager_eva;
156extern int pager_map_size;
157
158#define offsetof(type, member)	((size_t)(&((type *)0)->member))
159
160void
161cpu_startup()
162{
163	register unsigned i;
164	register caddr_t v;
165	extern void (*netisrs[32])(void);
166	vm_offset_t maxaddr;
167	vm_size_t size = 0;
168	int firstaddr;
169#ifdef BOUNCE_BUFFERS
170	vm_offset_t minaddr;
171#endif /* BOUNCE_BUFFERS */
172
173	/*
174	 * Initialize error message buffer (at end of core).
175	 */
176
177	/* avail_end was pre-decremented in init_386() to compensate */
178	for (i = 0; i < btoc(sizeof (struct msgbuf)); i++)
179		pmap_enter(pmap_kernel(), (vm_offset_t)msgbufp,
180			   avail_end + i * NBPG,
181			   VM_PROT_ALL, TRUE);
182	msgbufmapped = 1;
183
184	/*
185	 * Good {morning,afternoon,evening,night}.
186	 */
187	printf(version);
188	startrtclock();
189	identifycpu();
190	printf("real memory  = %d (%d pages)\n", ptoa(physmem), physmem);
191	if (badpages)
192		printf("bad memory   = %d (%d pages)\n", ptoa(badpages), badpages);
193
194	/*
195	 * Quickly wire in netisrs.
196	 */
197#define DONET(isr, n) do { extern void isr(void); netisrs[n] = isr; } while(0)
198#ifdef INET
199#if NETHER > 0
200	DONET(arpintr, NETISR_ARP);
201#endif
202	DONET(ipintr, NETISR_IP);
203#endif
204#ifdef NS
205	DONET(nsintr, NETISR_NS);
206#endif
207#ifdef ISO
208	DONET(clnlintr, NETISR_ISO);
209#endif
210#ifdef CCITT
211	DONET(ccittintr, NETISR_CCITT);
212#endif
213#undef DONET
214
215	/*
216	 * Allocate space for system data structures.
217	 * The first available kernel virtual address is in "v".
218	 * As pages of kernel virtual memory are allocated, "v" is incremented.
219	 * As pages of memory are allocated and cleared,
220	 * "firstaddr" is incremented.
221	 * An index into the kernel page table corresponding to the
222	 * virtual memory address maintained in "v" is kept in "mapaddr".
223	 */
224
225	/*
226	 * Make two passes.  The first pass calculates how much memory is
227	 * needed and allocates it.  The second pass assigns virtual
228	 * addresses to the various data structures.
229	 */
230	firstaddr = 0;
231again:
232	v = (caddr_t)firstaddr;
233
234#define	valloc(name, type, num) \
235	    (name) = (type *)v; v = (caddr_t)((name)+(num))
236#define	valloclim(name, type, num, lim) \
237	    (name) = (type *)v; v = (caddr_t)((lim) = ((name)+(num)))
238	valloc(callout, struct callout, ncallout);
239#ifdef SYSVSHM
240	valloc(shmsegs, struct shmid_ds, shminfo.shmmni);
241#endif
242#ifdef SYSVSEM
243	valloc(sema, struct semid_ds, seminfo.semmni);
244	valloc(sem, struct sem, seminfo.semmns);
245	/* This is pretty disgusting! */
246	valloc(semu, int, (seminfo.semmnu * seminfo.semusz) / sizeof(int));
247#endif
248#ifdef SYSVMSG
249	valloc(msgpool, char, msginfo.msgmax);
250	valloc(msgmaps, struct msgmap, msginfo.msgseg);
251	valloc(msghdrs, struct msg, msginfo.msgtql);
252	valloc(msqids, struct msqid_ds, msginfo.msgmni);
253#endif
254	/*
255	 * Determine how many buffers to allocate.
256	 * Use 20% of memory of memory beyond the first 2MB
257	 * Insure a minimum of 16 fs buffers.
258	 * We allocate 1/2 as many swap buffer headers as file i/o buffers.
259	 */
260	if (bufpages == 0)
261		bufpages = ((physmem << PGSHIFT) - 2048*1024) / NBPG / 6;
262	if (bufpages < 64)
263		bufpages = 64;
264
265	/*
266	 * We must still limit the maximum number of buffers to be no
267	 * more than 750 because we'll run out of kernel VM otherwise.
268	 */
269	bufpages = min(bufpages, 1500);
270	if (nbuf == 0) {
271		nbuf = bufpages / 2;
272		if (nbuf < 32)
273			nbuf = 32;
274	}
275	freebufspace = bufpages * NBPG;
276	if (nswbuf == 0) {
277		nswbuf = (nbuf / 2) &~ 1;	/* force even */
278		if (nswbuf > 64)
279			nswbuf = 64;		/* sanity */
280	}
281	valloc(swbuf, struct buf, nswbuf);
282	valloc(buf, struct buf, nbuf);
283
284#ifdef BOUNCE_BUFFERS
285	/*
286	 * If there is more than 16MB of memory, allocate some bounce buffers
287	 */
288	if (Maxmem > 4096) {
289		if (bouncepages == 0)
290			bouncepages = 96;	/* largest physio size + extra */
291		v = (caddr_t)((vm_offset_t)((vm_offset_t)v + PAGE_SIZE - 1) & ~(PAGE_SIZE - 1));
292		valloc(bouncememory, char, bouncepages * PAGE_SIZE);
293	}
294#endif
295
296	/*
297	 * End of first pass, size has been calculated so allocate memory
298	 */
299	if (firstaddr == 0) {
300		size = (vm_size_t)(v - firstaddr);
301		firstaddr = (int)kmem_alloc(kernel_map, round_page(size));
302		if (firstaddr == 0)
303			panic("startup: no room for tables");
304		goto again;
305	}
306
307	/*
308	 * End of second pass, addresses have been assigned
309	 */
310	if ((vm_size_t)(v - firstaddr) != size)
311		panic("startup: table size inconsistency");
312
313#ifdef BOUNCE_BUFFERS
314	clean_map = kmem_suballoc(kernel_map, &clean_sva, &clean_eva,
315			(nbuf*MAXBSIZE) + (nswbuf*MAXPHYS) +
316				maxbkva + pager_map_size, TRUE);
317	io_map = kmem_suballoc(clean_map, &minaddr, &maxaddr, maxbkva, FALSE);
318#else
319	clean_map = kmem_suballoc(kernel_map, &clean_sva, &clean_eva,
320			(nbuf*MAXBSIZE) + (nswbuf*MAXPHYS) + pager_map_size, TRUE);
321#endif
322	buffer_map = kmem_suballoc(clean_map, &buffer_sva, &buffer_eva,
323				(nbuf*MAXBSIZE), TRUE);
324	pager_map = kmem_suballoc(clean_map, &pager_sva, &pager_eva,
325				(nswbuf*MAXPHYS) + pager_map_size, TRUE);
326
327	/*
328	 * Finally, allocate mbuf pool.  Since mclrefcnt is an off-size
329	 * we use the more space efficient malloc in place of kmem_alloc.
330	 */
331	mclrefcnt = (char *)malloc(NMBCLUSTERS+CLBYTES/MCLBYTES,
332				   M_MBUF, M_NOWAIT);
333	bzero(mclrefcnt, NMBCLUSTERS+CLBYTES/MCLBYTES);
334	mb_map = kmem_suballoc(kmem_map, (vm_offset_t *)&mbutl, &maxaddr,
335			       VM_MBUF_SIZE, FALSE);
336	/*
337	 * Initialize callouts
338	 */
339	callfree = callout;
340	for (i = 1; i < ncallout; i++)
341		callout[i-1].c_next = &callout[i];
342
343	printf("avail memory = %d (%d pages)\n", ptoa(cnt.v_free_count), cnt.v_free_count);
344	printf("using %d buffers containing %d bytes of memory\n",
345		nbuf, bufpages * CLBYTES);
346
347#ifdef BOUNCE_BUFFERS
348	/*
349	 * init bounce buffers
350	 */
351	vm_bounce_init();
352#endif
353
354	/*
355	 * Set up CPU-specific registers, cache, etc.
356	 */
357	initcpu();
358
359	/*
360	 * Set up buffers, so they can be used to read disk labels.
361	 */
362	bufinit();
363	vm_pager_bufferinit();
364
365	/*
366	 * Configure the system.
367	 */
368	configure();
369}
370
371
372struct cpu_nameclass i386_cpus[] = {
373	{ "Intel 80286",	CPUCLASS_286 },		/* CPU_286   */
374	{ "i386SX",		CPUCLASS_386 },		/* CPU_386SX */
375	{ "i386DX",		CPUCLASS_386 },		/* CPU_386   */
376	{ "i486SX",		CPUCLASS_486 },		/* CPU_486SX */
377	{ "i486DX",		CPUCLASS_486 },		/* CPU_486   */
378	{ "Pentium",		CPUCLASS_586 },		/* CPU_586   */
379	{ "Cy486DLC",		CPUCLASS_486 },		/* CPU_486DLC */
380};
381
382static void
383identifycpu()
384{
385	extern u_long cpu_id;
386	extern char cpu_vendor[];
387	printf("CPU: ");
388	if (cpu >= 0
389	    && cpu < (sizeof i386_cpus/sizeof(struct cpu_nameclass))) {
390		printf("%s", i386_cpus[cpu].cpu_name);
391		cpu_class = i386_cpus[cpu].cpu_class;
392		strncpy(cpu_model, i386_cpus[cpu].cpu_name, sizeof cpu_model);
393	} else {
394		printf("unknown cpu type %d\n", cpu);
395		panic("startup: bad cpu id");
396	}
397	printf(" (");
398	switch(cpu_class) {
399	case CPUCLASS_286:
400		printf("286");
401		break;
402	case CPUCLASS_386:
403		printf("386");
404		break;
405	case CPUCLASS_486:
406		printf("486");
407		break;
408	case CPUCLASS_586:
409		printf("Pentium");
410		break;
411	default:
412		printf("unknown");	/* will panic below... */
413	}
414	printf("-class CPU)");
415#ifdef I586_CPU
416	if(cpu_class == CPUCLASS_586) {
417		extern void calibrate_cyclecounter();
418		extern int pentium_mhz;
419		calibrate_cyclecounter();
420		printf(" %d MHz", pentium_mhz);
421	}
422#endif
423	if(cpu_id)
424		printf("  Id = 0x%lx",cpu_id);
425	if(*cpu_vendor)
426		printf("  Origin = \"%s\"",cpu_vendor);
427	printf("\n");	/* cpu speed would be nice, but how? */
428
429	/*
430	 * Now that we have told the user what they have,
431	 * let them know if that machine type isn't configured.
432	 */
433	switch (cpu_class) {
434	case CPUCLASS_286:	/* a 286 should not make it this far, anyway */
435#if !defined(I386_CPU) && !defined(I486_CPU) && !defined(I586_CPU)
436#error This kernel is not configured for one of the supported CPUs
437#endif
438#if !defined(I386_CPU)
439	case CPUCLASS_386:
440#endif
441#if !defined(I486_CPU)
442	case CPUCLASS_486:
443#endif
444#if !defined(I586_CPU)
445	case CPUCLASS_586:
446#endif
447		panic("CPU class not configured");
448	default:
449		break;
450	}
451}
452
453#ifdef PGINPROF
454/*
455 * Return the difference (in microseconds)
456 * between the  current time and a previous
457 * time as represented  by the arguments.
458 * If there is a pending clock interrupt
459 * which has not been serviced due to high
460 * ipl, return error code.
461 */
462/*ARGSUSED*/
463vmtime(otime, olbolt, oicr)
464	register int otime, olbolt, oicr;
465{
466
467	return (((time.tv_sec-otime)*60 + lbolt-olbolt)*16667);
468}
469#endif
470
471extern int kstack[];
472
473/*
474 * Send an interrupt to process.
475 *
476 * Stack is set up to allow sigcode stored
477 * in u. to call routine, followed by kcall
478 * to sigreturn routine below.  After sigreturn
479 * resets the signal mask, the stack, and the
480 * frame pointer, it returns to the user
481 * specified pc, psl.
482 */
483void
484sendsig(catcher, sig, mask, code)
485	sig_t catcher;
486	int sig, mask;
487	unsigned code;
488{
489	register struct proc *p = curproc;
490	register int *regs;
491	register struct sigframe *fp;
492	struct sigacts *psp = p->p_sigacts;
493	int oonstack;
494
495	regs = p->p_md.md_regs;
496        oonstack = psp->ps_sigstk.ss_flags & SA_ONSTACK;
497	/*
498	 * Allocate and validate space for the signal handler
499	 * context. Note that if the stack is in P0 space, the
500	 * call to grow() is a nop, and the useracc() check
501	 * will fail if the process has not already allocated
502	 * the space with a `brk'.
503	 */
504        if ((psp->ps_flags & SAS_ALTSTACK) &&
505	    (psp->ps_sigstk.ss_flags & SA_ONSTACK) == 0 &&
506	    (psp->ps_sigonstack & sigmask(sig))) {
507		fp = (struct sigframe *)(psp->ps_sigstk.ss_base +
508		    psp->ps_sigstk.ss_size - sizeof(struct sigframe));
509		psp->ps_sigstk.ss_flags |= SA_ONSTACK;
510	} else {
511		fp = (struct sigframe *)(regs[tESP]
512			- sizeof(struct sigframe));
513	}
514
515	/*
516	 * grow() will return FALSE if the fp will not fit inside the stack
517	 *	and the stack can not be grown. useracc will return FALSE
518	 *	if access is denied.
519	 */
520	if ((grow(p, (int)fp) == FALSE) ||
521	    (useracc((caddr_t)fp, sizeof (struct sigframe), B_WRITE) == FALSE)) {
522		/*
523		 * Process has trashed its stack; give it an illegal
524		 * instruction to halt it in its tracks.
525		 */
526		SIGACTION(p, SIGILL) = SIG_DFL;
527		sig = sigmask(SIGILL);
528		p->p_sigignore &= ~sig;
529		p->p_sigcatch &= ~sig;
530		p->p_sigmask &= ~sig;
531		psignal(p, SIGILL);
532		return;
533	}
534
535	/*
536	 * Build the argument list for the signal handler.
537	 */
538	if (p->p_sysent->sv_sigtbl) {
539		if (sig < p->p_sysent->sv_sigsize)
540			sig = p->p_sysent->sv_sigtbl[sig];
541		else
542			sig = p->p_sysent->sv_sigsize + 1;
543	}
544	fp->sf_signum = sig;
545	fp->sf_code = code;
546	fp->sf_scp = &fp->sf_sc;
547	fp->sf_addr = (char *) regs[tERR];
548	fp->sf_handler = catcher;
549
550	/* save scratch registers */
551	fp->sf_sc.sc_eax = regs[tEAX];
552	fp->sf_sc.sc_ebx = regs[tEBX];
553	fp->sf_sc.sc_ecx = regs[tECX];
554	fp->sf_sc.sc_edx = regs[tEDX];
555	fp->sf_sc.sc_esi = regs[tESI];
556	fp->sf_sc.sc_edi = regs[tEDI];
557	fp->sf_sc.sc_cs = regs[tCS];
558	fp->sf_sc.sc_ds = regs[tDS];
559	fp->sf_sc.sc_ss = regs[tSS];
560	fp->sf_sc.sc_es = regs[tES];
561	fp->sf_sc.sc_isp = regs[tISP];
562
563	/*
564	 * Build the signal context to be used by sigreturn.
565	 */
566	fp->sf_sc.sc_onstack = oonstack;
567	fp->sf_sc.sc_mask = mask;
568	fp->sf_sc.sc_sp = regs[tESP];
569	fp->sf_sc.sc_fp = regs[tEBP];
570	fp->sf_sc.sc_pc = regs[tEIP];
571	fp->sf_sc.sc_ps = regs[tEFLAGS];
572	regs[tESP] = (int)fp;
573	regs[tEIP] = (int)((struct pcb *)kstack)->pcb_sigc;
574	regs[tEFLAGS] &= ~PSL_VM;
575	regs[tCS] = _ucodesel;
576	regs[tDS] = _udatasel;
577	regs[tES] = _udatasel;
578	regs[tSS] = _udatasel;
579}
580
581/*
582 * System call to cleanup state after a signal
583 * has been taken.  Reset signal mask and
584 * stack state from context left by sendsig (above).
585 * Return to previous pc and psl as specified by
586 * context left by sendsig. Check carefully to
587 * make sure that the user has not modified the
588 * psl to gain improper privileges or to cause
589 * a machine fault.
590 */
591struct sigreturn_args {
592	struct sigcontext *sigcntxp;
593};
594
595int
596sigreturn(p, uap, retval)
597	struct proc *p;
598	struct sigreturn_args *uap;
599	int *retval;
600{
601	register struct sigcontext *scp;
602	register struct sigframe *fp;
603	register int *regs = p->p_md.md_regs;
604	int eflags;
605
606	/*
607	 * (XXX old comment) regs[tESP] points to the return address.
608	 * The user scp pointer is above that.
609	 * The return address is faked in the signal trampoline code
610	 * for consistency.
611	 */
612	scp = uap->sigcntxp;
613	fp = (struct sigframe *)
614	     ((caddr_t)scp - offsetof(struct sigframe, sf_sc));
615
616	if (useracc((caddr_t)fp, sizeof (*fp), 0) == 0)
617		return(EINVAL);
618
619	eflags = scp->sc_ps;
620	if ((eflags & PSL_USERCLR) != 0 ||
621	    (eflags & PSL_USERSET) != PSL_USERSET ||
622	    (eflags & PSL_IOPL) < (regs[tEFLAGS] & PSL_IOPL)) {
623#ifdef DEBUG
624    		printf("sigreturn:  eflags=0x%x\n", eflags);
625#endif
626    		return(EINVAL);
627	}
628
629	/*
630	 * Sanity check the user's selectors and error if they
631	 * are suspect.
632	 */
633#define max_ldt_sel(pcb) \
634	((pcb)->pcb_ldt ? (pcb)->pcb_ldt_len : (sizeof(ldt) / sizeof(ldt[0])))
635
636#define valid_ldt_sel(sel) \
637	(ISLDT(sel) && ISPL(sel) == SEL_UPL && \
638	 IDXSEL(sel) < max_ldt_sel(&p->p_addr->u_pcb))
639
640#define null_sel(sel) \
641	(!ISLDT(sel) && IDXSEL(sel) == 0)
642
643	if (((scp->sc_cs&0xffff) != _ucodesel && !valid_ldt_sel(scp->sc_cs)) ||
644	    ((scp->sc_ss&0xffff) != _udatasel && !valid_ldt_sel(scp->sc_ss)) ||
645	    ((scp->sc_ds&0xffff) != _udatasel && !valid_ldt_sel(scp->sc_ds) &&
646	     !null_sel(scp->sc_ds)) ||
647	    ((scp->sc_es&0xffff) != _udatasel && !valid_ldt_sel(scp->sc_es) &&
648	     !null_sel(scp->sc_es))) {
649#ifdef DEBUG
650    		printf("sigreturn:  cs=0x%x ss=0x%x ds=0x%x es=0x%x\n",
651			scp->sc_cs, scp->sc_ss, scp->sc_ds, scp->sc_es);
652#endif
653		trapsignal(p, SIGBUS, T_PROTFLT);
654		return(EINVAL);
655	}
656
657#undef max_ldt_sel
658#undef valid_ldt_sel
659#undef null_sel
660
661	/* restore scratch registers */
662	regs[tEAX] = scp->sc_eax;
663	regs[tEBX] = scp->sc_ebx;
664	regs[tECX] = scp->sc_ecx;
665	regs[tEDX] = scp->sc_edx;
666	regs[tESI] = scp->sc_esi;
667	regs[tEDI] = scp->sc_edi;
668	regs[tCS] = scp->sc_cs;
669	regs[tDS] = scp->sc_ds;
670	regs[tES] = scp->sc_es;
671	regs[tSS] = scp->sc_ss;
672	regs[tISP] = scp->sc_isp;
673
674	if (useracc((caddr_t)scp, sizeof (*scp), 0) == 0)
675		return(EINVAL);
676
677	if (scp->sc_onstack & 01)
678		p->p_sigacts->ps_sigstk.ss_flags |= SA_ONSTACK;
679	else
680		p->p_sigacts->ps_sigstk.ss_flags &= ~SA_ONSTACK;
681	p->p_sigmask = scp->sc_mask &~
682	    (sigmask(SIGKILL)|sigmask(SIGCONT)|sigmask(SIGSTOP));
683	regs[tEBP] = scp->sc_fp;
684	regs[tESP] = scp->sc_sp;
685	regs[tEIP] = scp->sc_pc;
686	regs[tEFLAGS] = eflags;
687	return(EJUSTRETURN);
688}
689
690/*
691 * a simple function to make the system panic (and dump a vmcore)
692 * in a predictable fashion
693 */
694void diediedie()
695{
696	panic("because you said to!");
697}
698
699int	waittime = -1;
700struct pcb dumppcb;
701
702__dead void
703boot(arghowto)
704	int arghowto;
705{
706	register long dummy;		/* r12 is reserved */
707	register int howto;		/* r11 == how to boot */
708	register int devtype;		/* r10 == major of root dev */
709	extern int cold;
710
711	if (cold) {
712		printf("hit reset please");
713		for(;;);
714	}
715	howto = arghowto;
716	if ((howto&RB_NOSYNC) == 0 && waittime < 0) {
717		register struct buf *bp;
718		int iter, nbusy;
719
720		waittime = 0;
721		printf("\nsyncing disks... ");
722		/*
723		 * Release inodes held by texts before update.
724		 */
725		if (panicstr == 0)
726			vnode_pager_umount(NULL);
727		sync(curproc, NULL, NULL);
728
729		for (iter = 0; iter < 20; iter++) {
730			nbusy = 0;
731			for (bp = &buf[nbuf]; --bp >= buf; )
732				if ((bp->b_flags & (B_BUSY|B_INVAL)) == B_BUSY)
733					nbusy++;
734			if (nbusy == 0)
735				break;
736			printf("%d ", nbusy);
737			DELAY(40000 * iter);
738		}
739		if (nbusy) {
740			/*
741			 * Failed to sync all blocks. Indicate this and don't
742			 * unmount filesystems (thus forcing an fsck on reboot).
743			 */
744			printf("giving up\n");
745		} else {
746			printf("done\n");
747			/*
748			 * Unmount filesystems
749			 */
750			if (panicstr == 0)
751				vfs_unmountall();
752		}
753		DELAY(100000);			/* wait for console output to finish */
754	}
755	splhigh();
756	devtype = major(rootdev);
757	if (howto&RB_HALT) {
758		printf("\n");
759		printf("The operating system has halted.\n");
760		printf("Please press any key to reboot.\n\n");
761		cngetc();
762	} else {
763		if (howto & RB_DUMP) {
764			savectx(&dumppcb, 0);
765			dumppcb.pcb_ptd = rcr3();
766			dumpsys();
767
768			if (PANIC_REBOOT_WAIT_TIME != 0) {
769				if (PANIC_REBOOT_WAIT_TIME != -1) {
770					int loop;
771					printf("Automatic reboot in %d seconds - press a key on the console to abort\n",
772						PANIC_REBOOT_WAIT_TIME);
773					for (loop = PANIC_REBOOT_WAIT_TIME; loop > 0; --loop) {
774						DELAY(1000 * 1000); /* one second */
775						if (cncheckc()) /* Did user type a key? */
776							break;
777					}
778					if (!loop)
779						goto die;
780				}
781			} else { /* zero time specified - reboot NOW */
782				goto die;
783			}
784			printf("--> Press a key on the console to reboot <--\n");
785			cngetc();
786		}
787	}
788#ifdef lint
789	dummy = 0; dummy = dummy;
790	printf("howto %d, devtype %d\n", arghowto, devtype);
791#endif
792die:
793	printf("Rebooting...\n");
794	DELAY(1000000);	/* wait 1 sec for printf's to complete and be read */
795	cpu_reset();
796	for(;;) ;
797	/* NOTREACHED */
798}
799
800unsigned long	dumpmag = 0x8fca0101UL;	/* magic number for savecore */
801int		dumpsize = 0;		/* also for savecore */
802/*
803 * Doadump comes here after turning off memory management and
804 * getting on the dump stack, either when called above, or by
805 * the auto-restart code.
806 */
807void
808dumpsys()
809{
810
811	if (dumpdev == NODEV)
812		return;
813	if ((minor(dumpdev)&07) != 1)
814		return;
815	dumpsize = Maxmem;
816	printf("\ndumping to dev %lx, offset %ld\n", dumpdev, dumplo);
817	printf("dump ");
818	switch ((*bdevsw[major(dumpdev)].d_dump)(dumpdev)) {
819
820	case ENXIO:
821		printf("device bad\n");
822		break;
823
824	case EFAULT:
825		printf("device not ready\n");
826		break;
827
828	case EINVAL:
829		printf("area improper\n");
830		break;
831
832	case EIO:
833		printf("i/o error\n");
834		break;
835
836	case EINTR:
837		printf("aborted from console\n");
838		break;
839
840	default:
841		printf("succeeded\n");
842		break;
843	}
844}
845
846#ifdef HZ
847/*
848 * If HZ is defined we use this code, otherwise the code in
849 * /sys/i386/i386/microtime.s is used.  The othercode only works
850 * for HZ=100.
851 */
852microtime(tvp)
853	register struct timeval *tvp;
854{
855	int s = splhigh();
856
857	*tvp = time;
858	tvp->tv_usec += tick;
859	while (tvp->tv_usec > 1000000) {
860		tvp->tv_sec++;
861		tvp->tv_usec -= 1000000;
862	}
863	splx(s);
864}
865#endif /* HZ */
866
867static void
868initcpu()
869{
870}
871
872/*
873 * Clear registers on exec
874 */
875void
876setregs(p, entry, stack)
877	struct proc *p;
878	u_long entry;
879	u_long stack;
880{
881	p->p_md.md_regs[tEBP] = 0;	/* bottom of the fp chain */
882	p->p_md.md_regs[tEIP] = entry;
883	p->p_md.md_regs[tESP] = stack;
884	p->p_md.md_regs[tSS] = _udatasel;
885	p->p_md.md_regs[tDS] = _udatasel;
886	p->p_md.md_regs[tES] = _udatasel;
887	p->p_md.md_regs[tCS] = _ucodesel;
888
889	p->p_addr->u_pcb.pcb_flags = 0;	/* no fp at all */
890	load_cr0(rcr0() | CR0_TS);	/* start emulating */
891#if	NNPX > 0
892	npxinit(__INITIAL_NPXCW__);
893#endif	/* NNPX > 0 */
894}
895
896/*
897 * machine dependent system variables.
898 */
899int
900cpu_sysctl(name, namelen, oldp, oldlenp, newp, newlen, p)
901	int *name;
902	u_int namelen;
903	void *oldp;
904	size_t *oldlenp;
905	void *newp;
906	size_t newlen;
907	struct proc *p;
908{
909
910	/* all sysctl names at this level are terminal */
911	if (namelen != 1)
912		return (ENOTDIR);               /* overloaded */
913
914	switch (name[0]) {
915	case CPU_CONSDEV:
916		return (sysctl_rdstruct(oldp, oldlenp, newp, &cn_tty->t_dev,
917		   sizeof cn_tty->t_dev));
918	case CPU_ADJKERNTZ:
919		return (sysctl_int(oldp, oldlenp, newp, newlen, &adjkerntz));
920	case CPU_DISRTCSET:
921		return (sysctl_int(oldp, oldlenp, newp,	newlen,	&disable_rtc_set));
922	default:
923		return (EOPNOTSUPP);
924	}
925	/* NOTREACHED */
926}
927
928/*
929 * Initialize 386 and configure to run kernel
930 */
931
932/*
933 * Initialize segments & interrupt table
934 */
935
936union descriptor gdt[NGDT];
937union descriptor ldt[NLDT];		/* local descriptor table */
938struct gate_descriptor idt[NIDT];	/* interrupt descriptor table */
939
940int _default_ldt, currentldt;
941
942struct	i386tss	tss, panic_tss;
943
944extern  struct user *proc0paddr;
945
946/* software prototypes -- in more palatable form */
947struct soft_segment_descriptor gdt_segs[] = {
948/* GNULL_SEL	0 Null Descriptor */
949{	0x0,			/* segment base address  */
950	0x0,			/* length */
951	0,			/* segment type */
952	0,			/* segment descriptor priority level */
953	0,			/* segment descriptor present */
954	0, 0,
955	0,			/* default 32 vs 16 bit size */
956	0  			/* limit granularity (byte/page units)*/ },
957/* GCODE_SEL	1 Code Descriptor for kernel */
958{	0x0,			/* segment base address  */
959	0xfffff,		/* length - all address space */
960	SDT_MEMERA,		/* segment type */
961	0,			/* segment descriptor priority level */
962	1,			/* segment descriptor present */
963	0, 0,
964	1,			/* default 32 vs 16 bit size */
965	1  			/* limit granularity (byte/page units)*/ },
966/* GDATA_SEL	2 Data Descriptor for kernel */
967{	0x0,			/* segment base address  */
968	0xfffff,		/* length - all address space */
969	SDT_MEMRWA,		/* segment type */
970	0,			/* segment descriptor priority level */
971	1,			/* segment descriptor present */
972	0, 0,
973	1,			/* default 32 vs 16 bit size */
974	1  			/* limit granularity (byte/page units)*/ },
975/* GLDT_SEL	3 LDT Descriptor */
976{	(int) ldt,		/* segment base address  */
977	sizeof(ldt)-1,		/* length - all address space */
978	SDT_SYSLDT,		/* segment type */
979	0,			/* segment descriptor priority level */
980	1,			/* segment descriptor present */
981	0, 0,
982	0,			/* unused - default 32 vs 16 bit size */
983	0  			/* limit granularity (byte/page units)*/ },
984/* GTGATE_SEL	4 Null Descriptor - Placeholder */
985{	0x0,			/* segment base address  */
986	0x0,			/* length - all address space */
987	0,			/* segment type */
988	0,			/* segment descriptor priority level */
989	0,			/* segment descriptor present */
990	0, 0,
991	0,			/* default 32 vs 16 bit size */
992	0  			/* limit granularity (byte/page units)*/ },
993/* GPANIC_SEL	5 Panic Tss Descriptor */
994{	(int) &panic_tss,	/* segment base address  */
995	sizeof(tss)-1,		/* length - all address space */
996	SDT_SYS386TSS,		/* segment type */
997	0,			/* segment descriptor priority level */
998	1,			/* segment descriptor present */
999	0, 0,
1000	0,			/* unused - default 32 vs 16 bit size */
1001	0  			/* limit granularity (byte/page units)*/ },
1002/* GPROC0_SEL	6 Proc 0 Tss Descriptor */
1003{	(int) kstack,		/* segment base address  */
1004	sizeof(tss)-1,		/* length - all address space */
1005	SDT_SYS386TSS,		/* segment type */
1006	0,			/* segment descriptor priority level */
1007	1,			/* segment descriptor present */
1008	0, 0,
1009	0,			/* unused - default 32 vs 16 bit size */
1010	0  			/* limit granularity (byte/page units)*/ },
1011/* GUSERLDT_SEL	7 User LDT Descriptor per process */
1012{	(int) ldt,		/* segment base address  */
1013	(512 * sizeof(union descriptor)-1),		/* length */
1014	SDT_SYSLDT,		/* segment type */
1015	0,			/* segment descriptor priority level */
1016	1,			/* segment descriptor present */
1017	0, 0,
1018	0,			/* unused - default 32 vs 16 bit size */
1019	0  			/* limit granularity (byte/page units)*/ },
1020/* GAPMCODE32_SEL 8 APM BIOS 32-bit interface (32bit Code) */
1021{	0,			/* segment base address (overwritten by APM)  */
1022	0xfffff,		/* length */
1023	SDT_MEMERA,		/* segment type */
1024	0,			/* segment descriptor priority level */
1025	1,			/* segment descriptor present */
1026	0, 0,
1027	1,			/* default 32 vs 16 bit size */
1028	1  			/* limit granularity (byte/page units)*/ },
1029/* GAPMCODE16_SEL 9 APM BIOS 32-bit interface (16bit Code) */
1030{	0,			/* segment base address (overwritten by APM)  */
1031	0xfffff,		/* length */
1032	SDT_MEMERA,		/* segment type */
1033	0,			/* segment descriptor priority level */
1034	1,			/* segment descriptor present */
1035	0, 0,
1036	0,			/* default 32 vs 16 bit size */
1037	1  			/* limit granularity (byte/page units)*/ },
1038/* GAPMDATA_SEL	10 APM BIOS 32-bit interface (Data) */
1039{	0,			/* segment base address (overwritten by APM) */
1040	0xfffff,		/* length */
1041	SDT_MEMRWA,		/* segment type */
1042	0,			/* segment descriptor priority level */
1043	1,			/* segment descriptor present */
1044	0, 0,
1045	1,			/* default 32 vs 16 bit size */
1046	1  			/* limit granularity (byte/page units)*/ },
1047};
1048
1049struct soft_segment_descriptor ldt_segs[] = {
1050	/* Null Descriptor - overwritten by call gate */
1051{	0x0,			/* segment base address  */
1052	0x0,			/* length - all address space */
1053	0,			/* segment type */
1054	0,			/* segment descriptor priority level */
1055	0,			/* segment descriptor present */
1056	0, 0,
1057	0,			/* default 32 vs 16 bit size */
1058	0  			/* limit granularity (byte/page units)*/ },
1059	/* Null Descriptor - overwritten by call gate */
1060{	0x0,			/* segment base address  */
1061	0x0,			/* length - all address space */
1062	0,			/* segment type */
1063	0,			/* segment descriptor priority level */
1064	0,			/* segment descriptor present */
1065	0, 0,
1066	0,			/* default 32 vs 16 bit size */
1067	0  			/* limit granularity (byte/page units)*/ },
1068	/* Null Descriptor - overwritten by call gate */
1069{	0x0,			/* segment base address  */
1070	0x0,			/* length - all address space */
1071	0,			/* segment type */
1072	0,			/* segment descriptor priority level */
1073	0,			/* segment descriptor present */
1074	0, 0,
1075	0,			/* default 32 vs 16 bit size */
1076	0  			/* limit granularity (byte/page units)*/ },
1077	/* Code Descriptor for user */
1078{	0x0,			/* segment base address  */
1079	0xfffff,		/* length - all address space */
1080	SDT_MEMERA,		/* segment type */
1081	SEL_UPL,		/* segment descriptor priority level */
1082	1,			/* segment descriptor present */
1083	0, 0,
1084	1,			/* default 32 vs 16 bit size */
1085	1  			/* limit granularity (byte/page units)*/ },
1086	/* Data Descriptor for user */
1087{	0x0,			/* segment base address  */
1088	0xfffff,		/* length - all address space */
1089	SDT_MEMRWA,		/* segment type */
1090	SEL_UPL,		/* segment descriptor priority level */
1091	1,			/* segment descriptor present */
1092	0, 0,
1093	1,			/* default 32 vs 16 bit size */
1094	1  			/* limit granularity (byte/page units)*/ } };
1095
1096void
1097setidt(idx, func, typ, dpl)
1098	int idx;
1099	void (*func)();
1100	int typ;
1101	int dpl;
1102{
1103	struct gate_descriptor *ip = idt + idx;
1104
1105	ip->gd_looffset = (int)func;
1106	ip->gd_selector = 8;
1107	ip->gd_stkcpy = 0;
1108	ip->gd_xx = 0;
1109	ip->gd_type = typ;
1110	ip->gd_dpl = dpl;
1111	ip->gd_p = 1;
1112	ip->gd_hioffset = ((int)func)>>16 ;
1113}
1114
1115#define	IDTVEC(name)	__CONCAT(X,name)
1116typedef void idtvec_t();
1117
1118extern idtvec_t
1119	IDTVEC(div), IDTVEC(dbg), IDTVEC(nmi), IDTVEC(bpt), IDTVEC(ofl),
1120	IDTVEC(bnd), IDTVEC(ill), IDTVEC(dna), IDTVEC(dble), IDTVEC(fpusegm),
1121	IDTVEC(tss), IDTVEC(missing), IDTVEC(stk), IDTVEC(prot),
1122	IDTVEC(page), IDTVEC(rsvd), IDTVEC(fpu), IDTVEC(rsvd0),
1123	IDTVEC(rsvd1), IDTVEC(rsvd2), IDTVEC(rsvd3), IDTVEC(rsvd4),
1124	IDTVEC(rsvd5), IDTVEC(rsvd6), IDTVEC(rsvd7), IDTVEC(rsvd8),
1125	IDTVEC(rsvd9), IDTVEC(rsvd10), IDTVEC(rsvd11), IDTVEC(rsvd12),
1126	IDTVEC(rsvd13), IDTVEC(rsvd14), IDTVEC(syscall);
1127
1128int _gsel_tss;
1129
1130/* added sdtossd() by HOSOKAWA Tatsumi <hosokawa@mt.cs.keio.ac.jp> */
1131int
1132sdtossd(sd, ssd)
1133	struct segment_descriptor *sd;
1134	struct soft_segment_descriptor *ssd;
1135{
1136	ssd->ssd_base  = (sd->sd_hibase << 24) | sd->sd_lobase;
1137	ssd->ssd_limit = (sd->sd_hilimit << 16) | sd->sd_lolimit;
1138	ssd->ssd_type  = sd->sd_type;
1139	ssd->ssd_dpl   = sd->sd_dpl;
1140	ssd->ssd_p     = sd->sd_p;
1141	ssd->ssd_def32 = sd->sd_def32;
1142	ssd->ssd_gran  = sd->sd_gran;
1143	return 0;
1144}
1145
1146void
1147init386(first)
1148	int first;
1149{
1150	extern lgdt(), lidt(), lldt();
1151	int x;
1152	unsigned biosbasemem, biosextmem;
1153	struct gate_descriptor *gdp;
1154	extern int sigcode,szsigcode;
1155	/* table descriptors - used to load tables by microp */
1156	struct region_descriptor r_gdt, r_idt;
1157	int	pagesinbase, pagesinext;
1158	int	target_page;
1159	extern struct pte *CMAP1;
1160	extern caddr_t CADDR1;
1161
1162	proc0.p_addr = proc0paddr;
1163
1164	/*
1165	 * Initialize the console before we print anything out.
1166	 */
1167
1168	cninit ();
1169
1170	/*
1171	 * make gdt memory segments, the code segment goes up to end of the
1172	 * page with etext in it, the data segment goes to the end of
1173	 * the address space
1174	 */
1175	gdt_segs[GCODE_SEL].ssd_limit = i386_btop(0) - 1 /* i386_btop(i386_round_page(&etext)) - 1 */;
1176	gdt_segs[GDATA_SEL].ssd_limit = i386_btop(0) - 1;
1177	for (x=0; x < NGDT; x++) ssdtosd(gdt_segs+x, gdt+x);
1178
1179	/* make ldt memory segments */
1180	/*
1181	 * The data segment limit must not cover the user area because we
1182	 * don't want the user area to be writable in copyout() etc. (page
1183	 * level protection is lost in kernel mode on 386's).  Also, we
1184	 * don't want the user area to be writable directly (page level
1185	 * protection of the user area is not available on 486's with
1186	 * CR0_WP set, because there is no user-read/kernel-write mode).
1187	 *
1188	 * XXX - VM_MAXUSER_ADDRESS is an end address, not a max.  And it
1189	 * should be spelled ...MAX_USER...
1190	 */
1191#define VM_END_USER_RW_ADDRESS	VM_MAXUSER_ADDRESS
1192	/*
1193	 * The code segment limit has to cover the user area until we move
1194	 * the signal trampoline out of the user area.  This is safe because
1195	 * the code segment cannot be written to directly.
1196	 */
1197#define VM_END_USER_R_ADDRESS	(VM_END_USER_RW_ADDRESS + UPAGES * NBPG)
1198	ldt_segs[LUCODE_SEL].ssd_limit = i386_btop(VM_END_USER_R_ADDRESS) - 1;
1199	ldt_segs[LUDATA_SEL].ssd_limit = i386_btop(VM_END_USER_RW_ADDRESS) - 1;
1200	/* Note. eventually want private ldts per process */
1201	for (x=0; x < 5; x++) ssdtosd(ldt_segs+x, ldt+x);
1202
1203	/* exceptions */
1204	setidt(0, &IDTVEC(div),  SDT_SYS386TGT, SEL_KPL);
1205	setidt(1, &IDTVEC(dbg),  SDT_SYS386TGT, SEL_KPL);
1206	setidt(2, &IDTVEC(nmi),  SDT_SYS386TGT, SEL_KPL);
1207 	setidt(3, &IDTVEC(bpt),  SDT_SYS386TGT, SEL_UPL);
1208	setidt(4, &IDTVEC(ofl),  SDT_SYS386TGT, SEL_UPL);
1209	setidt(5, &IDTVEC(bnd),  SDT_SYS386TGT, SEL_KPL);
1210	setidt(6, &IDTVEC(ill),  SDT_SYS386TGT, SEL_KPL);
1211	setidt(7, &IDTVEC(dna),  SDT_SYS386TGT, SEL_KPL);
1212	setidt(8, &IDTVEC(dble),  SDT_SYS386TGT, SEL_KPL);
1213	setidt(9, &IDTVEC(fpusegm),  SDT_SYS386TGT, SEL_KPL);
1214	setidt(10, &IDTVEC(tss),  SDT_SYS386TGT, SEL_KPL);
1215	setidt(11, &IDTVEC(missing),  SDT_SYS386TGT, SEL_KPL);
1216	setidt(12, &IDTVEC(stk),  SDT_SYS386TGT, SEL_KPL);
1217	setidt(13, &IDTVEC(prot),  SDT_SYS386TGT, SEL_KPL);
1218	setidt(14, &IDTVEC(page),  SDT_SYS386TGT, SEL_KPL);
1219	setidt(15, &IDTVEC(rsvd),  SDT_SYS386TGT, SEL_KPL);
1220	setidt(16, &IDTVEC(fpu),  SDT_SYS386TGT, SEL_KPL);
1221	setidt(17, &IDTVEC(rsvd0),  SDT_SYS386TGT, SEL_KPL);
1222	setidt(18, &IDTVEC(rsvd1),  SDT_SYS386TGT, SEL_KPL);
1223	setidt(19, &IDTVEC(rsvd2),  SDT_SYS386TGT, SEL_KPL);
1224	setidt(20, &IDTVEC(rsvd3),  SDT_SYS386TGT, SEL_KPL);
1225	setidt(21, &IDTVEC(rsvd4),  SDT_SYS386TGT, SEL_KPL);
1226	setidt(22, &IDTVEC(rsvd5),  SDT_SYS386TGT, SEL_KPL);
1227	setidt(23, &IDTVEC(rsvd6),  SDT_SYS386TGT, SEL_KPL);
1228	setidt(24, &IDTVEC(rsvd7),  SDT_SYS386TGT, SEL_KPL);
1229	setidt(25, &IDTVEC(rsvd8),  SDT_SYS386TGT, SEL_KPL);
1230	setidt(26, &IDTVEC(rsvd9),  SDT_SYS386TGT, SEL_KPL);
1231	setidt(27, &IDTVEC(rsvd10),  SDT_SYS386TGT, SEL_KPL);
1232	setidt(28, &IDTVEC(rsvd11),  SDT_SYS386TGT, SEL_KPL);
1233	setidt(29, &IDTVEC(rsvd12),  SDT_SYS386TGT, SEL_KPL);
1234	setidt(30, &IDTVEC(rsvd13),  SDT_SYS386TGT, SEL_KPL);
1235	setidt(31, &IDTVEC(rsvd14),  SDT_SYS386TGT, SEL_KPL);
1236
1237#include	"isa.h"
1238#if	NISA >0
1239	isa_defaultirq();
1240#endif
1241
1242	r_gdt.rd_limit = sizeof(gdt) - 1;
1243	r_gdt.rd_base =  (int) gdt;
1244	lgdt(&r_gdt);
1245
1246	r_idt.rd_limit = sizeof(idt) - 1;
1247	r_idt.rd_base = (int) idt;
1248	lidt(&r_idt);
1249
1250	_default_ldt = GSEL(GLDT_SEL, SEL_KPL);
1251	lldt(_default_ldt);
1252	currentldt = _default_ldt;
1253
1254#ifdef DDB
1255	kdb_init();
1256	if (boothowto & RB_KDB)
1257		Debugger("Boot flags requested debugger");
1258#endif
1259
1260	/* Use BIOS values stored in RTC CMOS RAM, since probing
1261	 * breaks certain 386 AT relics.
1262	 */
1263	biosbasemem = rtcin(RTC_BASELO)+ (rtcin(RTC_BASEHI)<<8);
1264	biosextmem = rtcin(RTC_EXTLO)+ (rtcin(RTC_EXTHI)<<8);
1265
1266	/*
1267	 * If BIOS tells us that it has more than 640k in the basemem,
1268	 *	don't believe it - set it to 640k.
1269	 */
1270	if (biosbasemem > 640)
1271		biosbasemem = 640;
1272
1273	/*
1274	 * Some 386 machines might give us a bogus number for extended
1275	 *	mem. If this happens, stop now.
1276	 */
1277#ifndef LARGEMEM
1278	if (biosextmem > 65536) {
1279		panic("extended memory beyond limit of 64MB");
1280		/* NOTREACHED */
1281	}
1282#endif
1283
1284	pagesinbase = biosbasemem * 1024 / NBPG;
1285	pagesinext = biosextmem * 1024 / NBPG;
1286
1287	/*
1288	 * Special hack for chipsets that still remap the 384k hole when
1289	 *	there's 16MB of memory - this really confuses people that
1290	 *	are trying to use bus mastering ISA controllers with the
1291	 *	"16MB limit"; they only have 16MB, but the remapping puts
1292	 *	them beyond the limit.
1293	 * XXX - this should be removed when bounce buffers are
1294	 *	implemented.
1295	 */
1296	/*
1297	 * If extended memory is between 15-16MB (16-17MB phys address range),
1298	 *	chop it to 15MB.
1299	 */
1300	if ((pagesinext > 3840) && (pagesinext < 4096))
1301		pagesinext = 3840;
1302
1303	/*
1304	 * Maxmem isn't the "maximum memory", it's the highest page of
1305	 * of the physical address space. It should be "Maxphyspage".
1306	 */
1307	Maxmem = pagesinext + 0x100000/PAGE_SIZE;
1308
1309#ifdef MAXMEM
1310	if (MAXMEM/4 < Maxmem)
1311		Maxmem = MAXMEM/4;
1312#endif
1313	/*
1314	 * Calculate number of physical pages, but account for Maxmem
1315	 *	limitation above.
1316	 */
1317	physmem = pagesinbase +
1318	    (min(pagesinext + 0x100000/PAGE_SIZE, Maxmem) - 0x100000/PAGE_SIZE);
1319
1320	/* call pmap initialization to make new kernel address space */
1321	pmap_bootstrap (first, 0);
1322
1323	/*
1324	 * Do simple memory test over range of extended memory that BIOS
1325	 *	indicates exists. Adjust Maxmem to the highest page of
1326	 *	good memory.
1327	 */
1328	printf("Testing memory (%dMB)...", ptoa(Maxmem)/1024/1024);
1329
1330	for (target_page = Maxmem - 1; target_page >= atop(first); target_page--) {
1331
1332		/*
1333		 * map page into kernel: valid, read/write, non-cacheable
1334		 */
1335		*(int *)CMAP1 = PG_V | PG_KW | PG_N | ptoa(target_page);
1336		pmap_update();
1337
1338		/*
1339		 * Test for alternating 1's and 0's
1340		 */
1341		filli(0xaaaaaaaa, CADDR1, PAGE_SIZE/sizeof(int));
1342		if (test_page((int *)CADDR1, 0xaaaaaaaa)) {
1343			Maxmem = target_page;
1344			badpages++;
1345			continue;
1346		}
1347		/*
1348		 * Test for alternating 0's and 1's
1349		 */
1350		filli(0x55555555, CADDR1, PAGE_SIZE/sizeof(int));
1351		if (test_page((int *)CADDR1, 0x55555555)) {
1352			Maxmem = target_page;
1353			badpages++;
1354			continue;
1355		}
1356		/*
1357		 * Test for all 1's
1358		 */
1359		filli(0xffffffff, CADDR1, PAGE_SIZE/sizeof(int));
1360		if (test_page((int *)CADDR1, 0xffffffff)) {
1361			Maxmem = target_page;
1362			badpages++;
1363			continue;
1364		}
1365		/*
1366		 * Test zeroing of page
1367		 */
1368		bzero(CADDR1, PAGE_SIZE);
1369		if (test_page((int *)CADDR1, 0)) {
1370			/*
1371			 * test of page failed
1372			 */
1373			Maxmem = target_page;
1374			badpages++;
1375			continue;
1376		}
1377	}
1378	printf("done.\n");
1379
1380	*(int *)CMAP1 = 0;
1381	pmap_update();
1382
1383	avail_end = (Maxmem << PAGE_SHIFT)
1384		    - i386_round_page(sizeof(struct msgbuf));
1385
1386	/*
1387	 * Initialize pointers to the two chunks of memory; for use
1388	 *	later in vm_page_startup.
1389	 */
1390	/* avail_start is initialized in pmap_bootstrap */
1391	x = 0;
1392	if (pagesinbase > 1) {
1393		phys_avail[x++] = NBPG;		/* skip first page of memory */
1394		phys_avail[x++] = pagesinbase * NBPG;	/* memory up to the ISA hole */
1395	}
1396	phys_avail[x++] = avail_start;	/* memory up to the end */
1397	phys_avail[x++] = avail_end;
1398	phys_avail[x++] = 0;		/* no more chunks */
1399	phys_avail[x++] = 0;
1400
1401	/* now running on new page tables, configured,and u/iom is accessible */
1402
1403	/* make a initial tss so microp can get interrupt stack on syscall! */
1404	proc0.p_addr->u_pcb.pcb_tss.tss_esp0 = (int) kstack + UPAGES*NBPG;
1405	proc0.p_addr->u_pcb.pcb_tss.tss_ss0 = GSEL(GDATA_SEL, SEL_KPL) ;
1406	_gsel_tss = GSEL(GPROC0_SEL, SEL_KPL);
1407
1408	((struct i386tss *)gdt_segs[GPROC0_SEL].ssd_base)->tss_ioopt =
1409		(sizeof(tss))<<16;
1410
1411	ltr(_gsel_tss);
1412
1413	/* make a call gate to reenter kernel with */
1414	gdp = &ldt[LSYS5CALLS_SEL].gd;
1415
1416	x = (int) &IDTVEC(syscall);
1417	gdp->gd_looffset = x++;
1418	gdp->gd_selector = GSEL(GCODE_SEL,SEL_KPL);
1419	gdp->gd_stkcpy = 1;
1420	gdp->gd_type = SDT_SYS386CGT;
1421	gdp->gd_dpl = SEL_UPL;
1422	gdp->gd_p = 1;
1423	gdp->gd_hioffset = ((int) &IDTVEC(syscall)) >>16;
1424
1425	/* transfer to user mode */
1426
1427	_ucodesel = LSEL(LUCODE_SEL, SEL_UPL);
1428	_udatasel = LSEL(LUDATA_SEL, SEL_UPL);
1429
1430	/* setup proc 0's pcb */
1431	bcopy(&sigcode, proc0.p_addr->u_pcb.pcb_sigc, szsigcode);
1432	proc0.p_addr->u_pcb.pcb_flags = 0;
1433	proc0.p_addr->u_pcb.pcb_ptd = IdlePTD;
1434}
1435
1436int
1437test_page(address, pattern)
1438	int *address;
1439	int pattern;
1440{
1441	int *x;
1442
1443	for (x = address; x < (int *)((char *)address + PAGE_SIZE); x++) {
1444		if (*x != pattern)
1445			return (1);
1446	}
1447	return(0);
1448}
1449
1450/*
1451 * The registers are in the frame; the frame is in the user area of
1452 * the process in question; when the process is active, the registers
1453 * are in "the kernel stack"; when it's not, they're still there, but
1454 * things get flipped around.  So, since p->p_md.md_regs is the whole address
1455 * of the register set, take its offset from the kernel stack, and
1456 * index into the user block.  Don't you just *love* virtual memory?
1457 * (I'm starting to think seymour is right...)
1458 */
1459
1460int
1461ptrace_set_pc (struct proc *p, unsigned int addr) {
1462	void *regs = (char*)p->p_addr +
1463		((char*) p->p_md.md_regs - (char*) kstack);
1464
1465	((struct trapframe *)regs)->tf_eip = addr;
1466	return 0;
1467}
1468
1469int
1470ptrace_single_step (struct proc *p) {
1471	void *regs = (char*)p->p_addr +
1472		((char*) p->p_md.md_regs - (char*) kstack);
1473
1474	((struct trapframe *)regs)->tf_eflags |= PSL_T;
1475	return 0;
1476}
1477
1478/*
1479 * Copy the registers to user-space.
1480 */
1481
1482int
1483ptrace_getregs (struct proc *p, unsigned int *addr) {
1484	int error;
1485	struct reg regs = {0};
1486
1487	error = fill_regs (p, &regs);
1488	if (error)
1489		return error;
1490
1491	return copyout (&regs, addr, sizeof (regs));
1492}
1493
1494int
1495ptrace_setregs (struct proc *p, unsigned int *addr) {
1496	int error;
1497	struct reg regs = {0};
1498
1499	error = copyin (addr, &regs, sizeof(regs));
1500	if (error)
1501		return error;
1502
1503	return set_regs (p, &regs);
1504}
1505
1506int
1507fill_regs(struct proc *p, struct reg *regs) {
1508	struct trapframe *tp;
1509	void *ptr = (char*)p->p_addr +
1510		((char*) p->p_md.md_regs - (char*) kstack);
1511
1512	tp = ptr;
1513	regs->r_es = tp->tf_es;
1514	regs->r_ds = tp->tf_ds;
1515	regs->r_edi = tp->tf_edi;
1516	regs->r_esi = tp->tf_esi;
1517	regs->r_ebp = tp->tf_ebp;
1518	regs->r_ebx = tp->tf_ebx;
1519	regs->r_edx = tp->tf_edx;
1520	regs->r_ecx = tp->tf_ecx;
1521	regs->r_eax = tp->tf_eax;
1522	regs->r_eip = tp->tf_eip;
1523	regs->r_cs = tp->tf_cs;
1524	regs->r_eflags = tp->tf_eflags;
1525	regs->r_esp = tp->tf_esp;
1526	regs->r_ss = tp->tf_ss;
1527	return 0;
1528}
1529
1530int
1531set_regs (struct proc *p, struct reg *regs) {
1532	struct trapframe *tp;
1533	void *ptr = (char*)p->p_addr +
1534		((char*) p->p_md.md_regs - (char*) kstack);
1535
1536	tp = ptr;
1537	tp->tf_es = regs->r_es;
1538	tp->tf_ds = regs->r_ds;
1539	tp->tf_edi = regs->r_edi;
1540	tp->tf_esi = regs->r_esi;
1541	tp->tf_ebp = regs->r_ebp;
1542	tp->tf_ebx = regs->r_ebx;
1543	tp->tf_edx = regs->r_edx;
1544	tp->tf_ecx = regs->r_ecx;
1545	tp->tf_eax = regs->r_eax;
1546	tp->tf_eip = regs->r_eip;
1547	tp->tf_cs = regs->r_cs;
1548	tp->tf_eflags = regs->r_eflags;
1549	tp->tf_esp = regs->r_esp;
1550	tp->tf_ss = regs->r_ss;
1551	return 0;
1552}
1553
1554#ifndef DDB
1555void
1556Debugger(const char *msg)
1557{
1558	printf("Debugger(\"%s\") called.\n", msg);
1559}
1560#endif /* no DDB */
1561
1562#include <sys/disklabel.h>
1563#define b_cylin	b_resid
1564#define dkpart(dev)              (minor(dev) & 7)
1565/*
1566 * Determine the size of the transfer, and make sure it is
1567 * within the boundaries of the partition. Adjust transfer
1568 * if needed, and signal errors or early completion.
1569 */
1570int
1571bounds_check_with_label(struct buf *bp, struct disklabel *lp, int wlabel)
1572{
1573        struct partition *p = lp->d_partitions + dkpart(bp->b_dev);
1574        int labelsect = lp->d_partitions[0].p_offset;
1575        int maxsz = p->p_size,
1576                sz = (bp->b_bcount + DEV_BSIZE - 1) >> DEV_BSHIFT;
1577
1578        /* overwriting disk label ? */
1579        /* XXX should also protect bootstrap in first 8K */
1580        if (bp->b_blkno + p->p_offset <= LABELSECTOR + labelsect &&
1581#if LABELSECTOR != 0
1582            bp->b_blkno + p->p_offset + sz > LABELSECTOR + labelsect &&
1583#endif
1584            (bp->b_flags & B_READ) == 0 && wlabel == 0) {
1585                bp->b_error = EROFS;
1586                goto bad;
1587        }
1588
1589#if     defined(DOSBBSECTOR) && defined(notyet)
1590        /* overwriting master boot record? */
1591        if (bp->b_blkno + p->p_offset <= DOSBBSECTOR &&
1592            (bp->b_flags & B_READ) == 0 && wlabel == 0) {
1593                bp->b_error = EROFS;
1594                goto bad;
1595        }
1596#endif
1597
1598        /* beyond partition? */
1599        if (bp->b_blkno < 0 || bp->b_blkno + sz > maxsz) {
1600                /* if exactly at end of disk, return an EOF */
1601                if (bp->b_blkno == maxsz) {
1602                        bp->b_resid = bp->b_bcount;
1603                        return(0);
1604                }
1605                /* or truncate if part of it fits */
1606                sz = maxsz - bp->b_blkno;
1607                if (sz <= 0) {
1608                        bp->b_error = EINVAL;
1609                        goto bad;
1610                }
1611                bp->b_bcount = sz << DEV_BSHIFT;
1612        }
1613
1614        /* calculate cylinder for disksort to order transfers with */
1615        bp->b_pblkno = bp->b_blkno + p->p_offset;
1616        bp->b_cylin = bp->b_pblkno / lp->d_secpercyl;
1617        return(1);
1618
1619bad:
1620        bp->b_flags |= B_ERROR;
1621        return(-1);
1622}
1623
1624int
1625disk_externalize(int drive, void *userp, size_t *maxlen)
1626{
1627	if(*maxlen < sizeof drive) {
1628		return ENOMEM;
1629	}
1630
1631	*maxlen -= sizeof drive;
1632	return copyout(&drive, userp, sizeof drive);
1633}
1634
1635