machdep.c revision 7103
1/*-
2 * Copyright (c) 1992 Terrence R. Lambert.
3 * Copyright (c) 1982, 1987, 1990 The Regents of the University of California.
4 * All rights reserved.
5 *
6 * This code is derived from software contributed to Berkeley by
7 * William Jolitz.
8 *
9 * Redistribution and use in source and binary forms, with or without
10 * modification, are permitted provided that the following conditions
11 * are met:
12 * 1. Redistributions of source code must retain the above copyright
13 *    notice, this list of conditions and the following disclaimer.
14 * 2. Redistributions in binary form must reproduce the above copyright
15 *    notice, this list of conditions and the following disclaimer in the
16 *    documentation and/or other materials provided with the distribution.
17 * 3. All advertising materials mentioning features or use of this software
18 *    must display the following acknowledgement:
19 *	This product includes software developed by the University of
20 *	California, Berkeley and its contributors.
21 * 4. Neither the name of the University nor the names of its contributors
22 *    may be used to endorse or promote products derived from this software
23 *    without specific prior written permission.
24 *
25 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
26 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
27 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
28 * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
29 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
30 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
31 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
32 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
33 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
34 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
35 * SUCH DAMAGE.
36 *
37 *	from: @(#)machdep.c	7.4 (Berkeley) 6/3/91
38 *	$Id: machdep.c,v 1.114 1995/03/16 18:11:27 bde Exp $
39 */
40
41#include "npx.h"
42#include "isa.h"
43
44#include <sys/param.h>
45#include <sys/systm.h>
46#include <sys/signalvar.h>
47#include <sys/kernel.h>
48#include <sys/proc.h>
49#include <sys/user.h>
50#include <sys/buf.h>
51#include <sys/reboot.h>
52#include <sys/conf.h>
53#include <sys/file.h>
54#include <sys/callout.h>
55#include <sys/malloc.h>
56#include <sys/mbuf.h>
57#include <sys/mount.h>
58#include <sys/msgbuf.h>
59#include <sys/ioctl.h>
60#include <sys/sysent.h>
61#include <sys/tty.h>
62#include <sys/sysctl.h>
63#include <sys/devconf.h>
64
65#ifdef SYSVSHM
66#include <sys/shm.h>
67#endif
68
69#ifdef SYSVMSG
70#include <sys/msg.h>
71#endif
72
73#ifdef SYSVSEM
74#include <sys/sem.h>
75#endif
76
77#include <vm/vm.h>
78#include <vm/vm_kern.h>
79#include <vm/vm_page.h>
80
81#include <sys/exec.h>
82#include <sys/vnode.h>
83
84#include <ddb/ddb.h>
85
86#include <net/netisr.h>
87
88/* XXX correctly declaring all the netisr's is painful. */
89#include <net/if.h>
90#include <net/route.h>
91
92#include <netinet/in.h>
93#include <netinet/in_systm.h>
94#include <netinet/ip.h>
95#include <netinet/if_ether.h>
96#include <netinet/ip_var.h>
97
98#include <netns/ns.h>
99#include <netns/ns_if.h>
100
101#include <netiso/iso.h>
102#include <netiso/iso_var.h>
103
104#include <netccitt/dll.h>
105#include <netccitt/x25.h>
106#include <netccitt/pk.h>
107#include <sys/socketvar.h>
108#include <netccitt/pk_var.h>
109
110#include "ether.h"
111
112#include <machine/cpu.h>
113#include <machine/npx.h>
114#include <machine/reg.h>
115#include <machine/psl.h>
116#include <machine/clock.h>
117#include <machine/specialreg.h>
118#include <machine/sysarch.h>
119#include <machine/cons.h>
120#include <machine/devconf.h>
121#include <machine/bootinfo.h>
122#include <machine/md_var.h>
123
124#include <i386/isa/isa.h>
125#include <i386/isa/isa_device.h>
126#include <i386/isa/rtc.h>
127
128static void identifycpu(void);
129static void initcpu(void);
130
131char machine[] = "i386";
132char cpu_model[sizeof("Cy486DLC") + 1];
133
134#ifndef PANIC_REBOOT_WAIT_TIME
135#define PANIC_REBOOT_WAIT_TIME 15 /* default to 15 seconds */
136#endif
137
138/*
139 * Declare these as initialized data so we can patch them.
140 */
141int	nswbuf = 0;
142#ifdef	NBUF
143int	nbuf = NBUF;
144#else
145int	nbuf = 0;
146#endif
147
148#ifdef BOUNCE_BUFFERS
149extern char *bouncememory;
150extern int maxbkva;
151#ifdef BOUNCEPAGES
152int	bouncepages = BOUNCEPAGES;
153#else
154int	bouncepages = 0;
155#endif
156#endif	/* BOUNCE_BUFFERS */
157
158extern int freebufspace;
159int	msgbufmapped = 0;		/* set when safe to use msgbuf */
160int _udatasel, _ucodesel;
161
162
163/*
164 * Machine-dependent startup code
165 */
166int boothowto = 0, bootverbose = 0, Maxmem = 0, badpages = 0, physmem = 0;
167long dumplo;
168extern int bootdev;
169int biosmem;
170
171vm_offset_t	phys_avail[6];
172
173int cpu_class;
174
175void dumpsys __P((void));
176vm_offset_t buffer_sva, buffer_eva;
177vm_offset_t clean_sva, clean_eva;
178vm_offset_t pager_sva, pager_eva;
179extern int pager_map_size;
180
181#define offsetof(type, member)	((size_t)(&((type *)0)->member))
182
183void
184cpu_startup()
185{
186	register unsigned i;
187	register caddr_t v;
188	vm_offset_t maxaddr;
189	vm_size_t size = 0;
190	int firstaddr;
191	vm_offset_t minaddr;
192
193	if (boothowto & RB_VERBOSE)
194		bootverbose++;
195
196	/*
197	 * Initialize error message buffer (at end of core).
198	 */
199
200	/* avail_end was pre-decremented in init_386() to compensate */
201	for (i = 0; i < btoc(sizeof (struct msgbuf)); i++)
202		pmap_enter(pmap_kernel(), (vm_offset_t)msgbufp,
203			   avail_end + i * NBPG,
204			   VM_PROT_ALL, TRUE);
205	msgbufmapped = 1;
206
207	/*
208	 * Good {morning,afternoon,evening,night}.
209	 */
210	printf(version);
211	startrtclock();
212	identifycpu();
213	printf("real memory  = %d (%d pages)\n", ptoa(physmem), physmem);
214	if (badpages)
215		printf("bad memory   = %d (%d pages)\n", ptoa(badpages), badpages);
216
217	/*
218	 * Quickly wire in netisrs.
219	 */
220#define DONET(isr, n) do { netisrs[n] = isr; } while(0)
221#ifdef INET
222#if NETHER > 0
223	DONET(arpintr, NETISR_ARP);
224#endif
225	DONET(ipintr, NETISR_IP);
226#endif
227#ifdef NS
228	DONET(nsintr, NETISR_NS);
229#endif
230#ifdef ISO
231	DONET(clnlintr, NETISR_ISO);
232#endif
233#ifdef CCITT
234	DONET(ccittintr, NETISR_CCITT);
235#endif
236#ifdef ISDN
237	DONET(isdnintr, NETISR_ISDN);
238#endif
239#undef DONET
240
241	/*
242	 * Allocate space for system data structures.
243	 * The first available kernel virtual address is in "v".
244	 * As pages of kernel virtual memory are allocated, "v" is incremented.
245	 * As pages of memory are allocated and cleared,
246	 * "firstaddr" is incremented.
247	 * An index into the kernel page table corresponding to the
248	 * virtual memory address maintained in "v" is kept in "mapaddr".
249	 */
250
251	/*
252	 * Make two passes.  The first pass calculates how much memory is
253	 * needed and allocates it.  The second pass assigns virtual
254	 * addresses to the various data structures.
255	 */
256	firstaddr = 0;
257again:
258	v = (caddr_t)firstaddr;
259
260#define	valloc(name, type, num) \
261	    (name) = (type *)v; v = (caddr_t)((name)+(num))
262#define	valloclim(name, type, num, lim) \
263	    (name) = (type *)v; v = (caddr_t)((lim) = ((name)+(num)))
264	valloc(callout, struct callout, ncallout);
265#ifdef SYSVSHM
266	valloc(shmsegs, struct shmid_ds, shminfo.shmmni);
267#endif
268#ifdef SYSVSEM
269	valloc(sema, struct semid_ds, seminfo.semmni);
270	valloc(sem, struct sem, seminfo.semmns);
271	/* This is pretty disgusting! */
272	valloc(semu, int, (seminfo.semmnu * seminfo.semusz) / sizeof(int));
273#endif
274#ifdef SYSVMSG
275	valloc(msgpool, char, msginfo.msgmax);
276	valloc(msgmaps, struct msgmap, msginfo.msgseg);
277	valloc(msghdrs, struct msg, msginfo.msgtql);
278	valloc(msqids, struct msqid_ds, msginfo.msgmni);
279#endif
280
281	if (nbuf == 0) {
282		nbuf = 30;
283		if( physmem > 1024)
284			nbuf += min((physmem - 1024) / 12, 1024);
285	}
286	nswbuf = min(nbuf, 128);
287
288	valloc(swbuf, struct buf, nswbuf);
289	valloc(buf, struct buf, nbuf);
290
291#ifdef BOUNCE_BUFFERS
292	/*
293	 * If there is more than 16MB of memory, allocate some bounce buffers
294	 */
295	if (Maxmem > 4096) {
296		if (bouncepages == 0) {
297			bouncepages = 64;
298			bouncepages += ((Maxmem - 4096) / 2048) * 32;
299		}
300		v = (caddr_t)((vm_offset_t)((vm_offset_t)v + PAGE_SIZE - 1) & ~(PAGE_SIZE - 1));
301		valloc(bouncememory, char, bouncepages * PAGE_SIZE);
302	}
303#endif
304
305	/*
306	 * End of first pass, size has been calculated so allocate memory
307	 */
308	if (firstaddr == 0) {
309		size = (vm_size_t)(v - firstaddr);
310		firstaddr = (int)kmem_alloc(kernel_map, round_page(size));
311		if (firstaddr == 0)
312			panic("startup: no room for tables");
313		goto again;
314	}
315
316	/*
317	 * End of second pass, addresses have been assigned
318	 */
319	if ((vm_size_t)(v - firstaddr) != size)
320		panic("startup: table size inconsistency");
321
322#ifdef BOUNCE_BUFFERS
323	clean_map = kmem_suballoc(kernel_map, &clean_sva, &clean_eva,
324			(nbuf*MAXBSIZE) + (nswbuf*MAXPHYS) +
325				maxbkva + pager_map_size, TRUE);
326	io_map = kmem_suballoc(clean_map, &minaddr, &maxaddr, maxbkva, FALSE);
327#else
328	clean_map = kmem_suballoc(kernel_map, &clean_sva, &clean_eva,
329			(nbuf*MAXBSIZE) + (nswbuf*MAXPHYS) + pager_map_size, TRUE);
330#endif
331	buffer_map = kmem_suballoc(clean_map, &buffer_sva, &buffer_eva,
332				(nbuf*MAXBSIZE), TRUE);
333	pager_map = kmem_suballoc(clean_map, &pager_sva, &pager_eva,
334				(nswbuf*MAXPHYS) + pager_map_size, TRUE);
335	exec_map = kmem_suballoc(kernel_map, &minaddr, &maxaddr,
336				(16*ARG_MAX), TRUE);
337	u_map = kmem_suballoc(kernel_map, &minaddr, &maxaddr,
338				(maxproc*UPAGES*PAGE_SIZE), FALSE);
339
340	/*
341	 * Finally, allocate mbuf pool.  Since mclrefcnt is an off-size
342	 * we use the more space efficient malloc in place of kmem_alloc.
343	 */
344	mclrefcnt = (char *)malloc(NMBCLUSTERS+CLBYTES/MCLBYTES,
345				   M_MBUF, M_NOWAIT);
346	bzero(mclrefcnt, NMBCLUSTERS+CLBYTES/MCLBYTES);
347	mb_map = kmem_suballoc(kmem_map, (vm_offset_t *)&mbutl, &maxaddr,
348			       VM_MBUF_SIZE, FALSE);
349	/*
350	 * Initialize callouts
351	 */
352	callfree = callout;
353	for (i = 1; i < ncallout; i++)
354		callout[i-1].c_next = &callout[i];
355
356        if (boothowto & RB_CONFIG)
357		userconfig();
358	printf("avail memory = %d (%d pages)\n", ptoa(cnt.v_free_count), cnt.v_free_count);
359
360#ifdef BOUNCE_BUFFERS
361	/*
362	 * init bounce buffers
363	 */
364	vm_bounce_init();
365#endif
366
367	/*
368	 * Set up CPU-specific registers, cache, etc.
369	 */
370	initcpu();
371
372	/*
373	 * Set up buffers, so they can be used to read disk labels.
374	 */
375	bufinit();
376	vm_pager_bufferinit();
377
378	/*
379	 * Configure the system.
380	 */
381	configure();
382	if (bootverbose) {
383		printf("BIOS Geometries:");
384		for (i=0; i < N_BIOS_GEOM; i++)
385			printf(" %x:%x\n", i, bootinfo.bi_bios_geom[i]);
386		printf(" %d accounted for\n", bootinfo.bi_n_bios_used);
387	}
388}
389
390
391struct cpu_nameclass i386_cpus[] = {
392	{ "Intel 80286",	CPUCLASS_286 },		/* CPU_286   */
393	{ "i386SX",		CPUCLASS_386 },		/* CPU_386SX */
394	{ "i386DX",		CPUCLASS_386 },		/* CPU_386   */
395	{ "i486SX",		CPUCLASS_486 },		/* CPU_486SX */
396	{ "i486DX",		CPUCLASS_486 },		/* CPU_486   */
397	{ "Pentium",		CPUCLASS_586 },		/* CPU_586   */
398	{ "Cy486DLC",		CPUCLASS_486 },		/* CPU_486DLC */
399};
400
401static void
402identifycpu()
403{
404	printf("CPU: ");
405	if (cpu >= 0
406	    && cpu < (sizeof i386_cpus/sizeof(struct cpu_nameclass))) {
407		printf("%s", i386_cpus[cpu].cpu_name);
408		cpu_class = i386_cpus[cpu].cpu_class;
409		strncpy(cpu_model, i386_cpus[cpu].cpu_name, sizeof cpu_model);
410	} else {
411		printf("unknown cpu type %d\n", cpu);
412		panic("startup: bad cpu id");
413	}
414	printf(" (");
415	switch(cpu_class) {
416	case CPUCLASS_286:
417		printf("286");
418		break;
419	case CPUCLASS_386:
420		printf("386");
421		break;
422	case CPUCLASS_486:
423		printf("486");
424		break;
425	case CPUCLASS_586:
426		printf("Pentium");
427		break;
428	default:
429		printf("unknown");	/* will panic below... */
430	}
431	printf("-class CPU)");
432#ifdef I586_CPU
433	if(cpu_class == CPUCLASS_586) {
434		calibrate_cyclecounter();
435		printf(" %d MHz", pentium_mhz);
436	}
437#endif
438	if(*cpu_vendor)
439		printf("  Origin = \"%s\"",cpu_vendor);
440	if(cpu_id)
441		printf("  Id = 0x%lx",cpu_id);
442	printf("\n");	/* cpu speed would be nice, but how? */
443	if (!strcmp(cpu_vendor,"GenuineIntel")) {
444		printf("  This is a");
445		if ((cpu_id & 0xf00) > 3) {
446			switch (cpu_id & 0x3000) {
447			    case 0x1000: printf("Overdrive "); break;
448			    case 0x2000: printf("Dual "); break;
449			}
450			if ((cpu_id & 0xf00) == 0x400)
451			    printf("n i486");
452			else if ((cpu_id & 0xf00) == 0x500)
453			    printf(" Pentium ");
454			else
455			    printf(" unknown CPU");
456			switch (cpu_id & 0xff0) {
457			    case 0x400: printf("DX"); break;
458			    case 0x410: printf("DX"); break;
459			    case 0x420: printf("SX"); break;
460			    case 0x430: printf("DX2"); break;
461			    case 0x440: printf("SL"); break;
462			    case 0x450: printf("SX2"); break;
463			    case 0x470: printf("DX2 Write-Back Enhanced");
464				break;
465			    case 0x480: printf("DX4"); break;
466			    case 0x510: printf("510\\60 or 567\\66"); break;
467			    case 0x520: printf("735\\90 or 815\\100"); break;
468			}
469		}
470		printf("  Stepping=%d", cpu_id & 0xf);
471		if (cpu_high > 0) {
472			printf("  Features=0x%lx",cpu_feature);
473			if (cpu_feature & 0x1) printf(" FPU");
474			if (cpu_feature & 0x2) printf(" VME");
475			if (cpu_feature & 0x8) printf(" PSE");
476			if (cpu_feature & 0x80) printf(" MCE");
477			if (cpu_feature & 0x100) printf(" CX8");
478			if (cpu_feature & 0x200) printf(" APIC");
479		}
480		printf("\n");
481	}
482
483	/*
484	 * Now that we have told the user what they have,
485	 * let them know if that machine type isn't configured.
486	 */
487	switch (cpu_class) {
488	case CPUCLASS_286:	/* a 286 should not make it this far, anyway */
489#if !defined(I386_CPU) && !defined(I486_CPU) && !defined(I586_CPU)
490#error This kernel is not configured for one of the supported CPUs
491#endif
492#if !defined(I386_CPU)
493	case CPUCLASS_386:
494#endif
495#if !defined(I486_CPU)
496	case CPUCLASS_486:
497#endif
498#if !defined(I586_CPU)
499	case CPUCLASS_586:
500#endif
501		panic("CPU class not configured");
502	default:
503		break;
504	}
505}
506
507/*
508 * Send an interrupt to process.
509 *
510 * Stack is set up to allow sigcode stored
511 * in u. to call routine, followed by kcall
512 * to sigreturn routine below.  After sigreturn
513 * resets the signal mask, the stack, and the
514 * frame pointer, it returns to the user
515 * specified pc, psl.
516 */
517void
518sendsig(catcher, sig, mask, code)
519	sig_t catcher;
520	int sig, mask;
521	unsigned code;
522{
523	register struct proc *p = curproc;
524	register int *regs;
525	register struct sigframe *fp;
526	struct sigframe sf;
527	struct sigacts *psp = p->p_sigacts;
528	int oonstack;
529
530	regs = p->p_md.md_regs;
531        oonstack = psp->ps_sigstk.ss_flags & SA_ONSTACK;
532	/*
533	 * Allocate and validate space for the signal handler
534	 * context. Note that if the stack is in P0 space, the
535	 * call to grow() is a nop, and the useracc() check
536	 * will fail if the process has not already allocated
537	 * the space with a `brk'.
538	 */
539        if ((psp->ps_flags & SAS_ALTSTACK) &&
540	    (psp->ps_sigstk.ss_flags & SA_ONSTACK) == 0 &&
541	    (psp->ps_sigonstack & sigmask(sig))) {
542		fp = (struct sigframe *)(psp->ps_sigstk.ss_sp +
543		    psp->ps_sigstk.ss_size - sizeof(struct sigframe));
544		psp->ps_sigstk.ss_flags |= SA_ONSTACK;
545	} else {
546		fp = (struct sigframe *)(regs[tESP]
547			- sizeof(struct sigframe));
548	}
549
550	/*
551	 * grow() will return FALSE if the fp will not fit inside the stack
552	 *	and the stack can not be grown. useracc will return FALSE
553	 *	if access is denied.
554	 */
555	if ((grow(p, (int)fp) == FALSE) ||
556	    (useracc((caddr_t)fp, sizeof (struct sigframe), B_WRITE) == FALSE)) {
557		/*
558		 * Process has trashed its stack; give it an illegal
559		 * instruction to halt it in its tracks.
560		 */
561		SIGACTION(p, SIGILL) = SIG_DFL;
562		sig = sigmask(SIGILL);
563		p->p_sigignore &= ~sig;
564		p->p_sigcatch &= ~sig;
565		p->p_sigmask &= ~sig;
566		psignal(p, SIGILL);
567		return;
568	}
569
570	/*
571	 * Build the argument list for the signal handler.
572	 */
573	if (p->p_sysent->sv_sigtbl) {
574		if (sig < p->p_sysent->sv_sigsize)
575			sig = p->p_sysent->sv_sigtbl[sig];
576		else
577			sig = p->p_sysent->sv_sigsize + 1;
578	}
579	sf.sf_signum = sig;
580	sf.sf_code = code;
581	sf.sf_scp = &fp->sf_sc;
582	sf.sf_addr = (char *) regs[tERR];
583	sf.sf_handler = catcher;
584
585	/* save scratch registers */
586	sf.sf_sc.sc_eax = regs[tEAX];
587	sf.sf_sc.sc_ebx = regs[tEBX];
588	sf.sf_sc.sc_ecx = regs[tECX];
589	sf.sf_sc.sc_edx = regs[tEDX];
590	sf.sf_sc.sc_esi = regs[tESI];
591	sf.sf_sc.sc_edi = regs[tEDI];
592	sf.sf_sc.sc_cs = regs[tCS];
593	sf.sf_sc.sc_ds = regs[tDS];
594	sf.sf_sc.sc_ss = regs[tSS];
595	sf.sf_sc.sc_es = regs[tES];
596	sf.sf_sc.sc_isp = regs[tISP];
597
598	/*
599	 * Build the signal context to be used by sigreturn.
600	 */
601	sf.sf_sc.sc_onstack = oonstack;
602	sf.sf_sc.sc_mask = mask;
603	sf.sf_sc.sc_sp = regs[tESP];
604	sf.sf_sc.sc_fp = regs[tEBP];
605	sf.sf_sc.sc_pc = regs[tEIP];
606	sf.sf_sc.sc_ps = regs[tEFLAGS];
607
608	/*
609	 * Copy the sigframe out to the user's stack.
610	 */
611	if (copyout(&sf, fp, sizeof(struct sigframe)) != 0) {
612		/*
613		 * Something is wrong with the stack pointer.
614		 * ...Kill the process.
615		 */
616		sigexit(p, SIGILL);
617	};
618
619	regs[tESP] = (int)fp;
620	regs[tEIP] = (int)((struct pcb *)kstack)->pcb_sigc;
621	regs[tEFLAGS] &= ~PSL_VM;
622	regs[tCS] = _ucodesel;
623	regs[tDS] = _udatasel;
624	regs[tES] = _udatasel;
625	regs[tSS] = _udatasel;
626}
627
628/*
629 * System call to cleanup state after a signal
630 * has been taken.  Reset signal mask and
631 * stack state from context left by sendsig (above).
632 * Return to previous pc and psl as specified by
633 * context left by sendsig. Check carefully to
634 * make sure that the user has not modified the
635 * state to gain improper privileges.
636 */
637struct sigreturn_args {
638	struct sigcontext *sigcntxp;
639};
640
641int
642sigreturn(p, uap, retval)
643	struct proc *p;
644	struct sigreturn_args *uap;
645	int *retval;
646{
647	register struct sigcontext *scp;
648	register struct sigframe *fp;
649	register int *regs = p->p_md.md_regs;
650	int eflags;
651
652	/*
653	 * (XXX old comment) regs[tESP] points to the return address.
654	 * The user scp pointer is above that.
655	 * The return address is faked in the signal trampoline code
656	 * for consistency.
657	 */
658	scp = uap->sigcntxp;
659	fp = (struct sigframe *)
660	     ((caddr_t)scp - offsetof(struct sigframe, sf_sc));
661
662	if (useracc((caddr_t)fp, sizeof (*fp), 0) == 0)
663		return(EINVAL);
664
665	/*
666	 * Don't allow users to change privileged or reserved flags.
667	 */
668#define	EFLAGS_SECURE(ef, oef)	((((ef) ^ (oef)) & ~PSL_USERCHANGE) == 0)
669	eflags = scp->sc_ps;
670	/*
671	 * XXX do allow users to change the privileged flag PSL_RF.  The
672	 * cpu sets PSL_RF in tf_eflags for faults.  Debuggers should
673	 * sometimes set it there too.  tf_eflags is kept in the signal
674	 * context during signal handling and there is no other place
675	 * to remember it, so the PSL_RF bit may be corrupted by the
676	 * signal handler without us knowing.  Corruption of the PSL_RF
677	 * bit at worst causes one more or one less debugger trap, so
678	 * allowing it is fairly harmless.
679	 */
680	if (!EFLAGS_SECURE(eflags & ~PSL_RF, regs[tEFLAGS] & ~PSL_RF)) {
681#ifdef DEBUG
682    		printf("sigreturn: eflags = 0x%x\n", eflags);
683#endif
684    		return(EINVAL);
685	}
686
687	/*
688	 * Don't allow users to load a valid privileged %cs.  Let the
689	 * hardware check for invalid selectors, excess privilege in
690	 * other selectors, invalid %eip's and invalid %esp's.
691	 */
692#define	CS_SECURE(cs)	(ISPL(cs) == SEL_UPL)
693	if (!CS_SECURE(scp->sc_cs)) {
694#ifdef DEBUG
695    		printf("sigreturn: cs = 0x%x\n", scp->sc_cs);
696#endif
697		trapsignal(p, SIGBUS, T_PROTFLT);
698		return(EINVAL);
699	}
700
701	/* restore scratch registers */
702	regs[tEAX] = scp->sc_eax;
703	regs[tEBX] = scp->sc_ebx;
704	regs[tECX] = scp->sc_ecx;
705	regs[tEDX] = scp->sc_edx;
706	regs[tESI] = scp->sc_esi;
707	regs[tEDI] = scp->sc_edi;
708	regs[tCS] = scp->sc_cs;
709	regs[tDS] = scp->sc_ds;
710	regs[tES] = scp->sc_es;
711	regs[tSS] = scp->sc_ss;
712	regs[tISP] = scp->sc_isp;
713
714	if (useracc((caddr_t)scp, sizeof (*scp), 0) == 0)
715		return(EINVAL);
716
717	if (scp->sc_onstack & 01)
718		p->p_sigacts->ps_sigstk.ss_flags |= SA_ONSTACK;
719	else
720		p->p_sigacts->ps_sigstk.ss_flags &= ~SA_ONSTACK;
721	p->p_sigmask = scp->sc_mask &~
722	    (sigmask(SIGKILL)|sigmask(SIGCONT)|sigmask(SIGSTOP));
723	regs[tEBP] = scp->sc_fp;
724	regs[tESP] = scp->sc_sp;
725	regs[tEIP] = scp->sc_pc;
726	regs[tEFLAGS] = eflags;
727	return(EJUSTRETURN);
728}
729
730/*
731 * a simple function to make the system panic (and dump a vmcore)
732 * in a predictable fashion
733 */
734void diediedie()
735{
736	panic("because you said to!");
737}
738
739int	waittime = -1;
740struct pcb dumppcb;
741
742__dead void
743boot(arghowto)
744	int arghowto;
745{
746	register long dummy;		/* r12 is reserved */
747	register int howto;		/* r11 == how to boot */
748	register int devtype;		/* r10 == major of root dev */
749
750	if (cold) {
751		printf("hit reset please");
752		for(;;);
753	}
754	howto = arghowto;
755	if ((howto&RB_NOSYNC) == 0 && waittime < 0) {
756		register struct buf *bp;
757		int iter, nbusy;
758
759		waittime = 0;
760		printf("\nsyncing disks... ");
761		/*
762		 * Release inodes held by texts before update.
763		 */
764		if (panicstr == 0)
765			vnode_pager_umount(NULL);
766		sync(&proc0, NULL, NULL);
767
768		for (iter = 0; iter < 20; iter++) {
769			nbusy = 0;
770			for (bp = &buf[nbuf]; --bp >= buf; ) {
771				if ((bp->b_flags & (B_BUSY|B_INVAL)) == B_BUSY) {
772					nbusy++;
773				}
774			}
775			if (nbusy == 0)
776				break;
777			printf("%d ", nbusy);
778			DELAY(40000 * iter);
779		}
780		if (nbusy) {
781			/*
782			 * Failed to sync all blocks. Indicate this and don't
783			 * unmount filesystems (thus forcing an fsck on reboot).
784			 */
785			printf("giving up\n");
786		} else {
787			printf("done\n");
788			/*
789			 * Unmount filesystems
790			 */
791			if (panicstr == 0)
792				vfs_unmountall();
793		}
794		DELAY(100000);			/* wait for console output to finish */
795		dev_shutdownall(FALSE);
796	}
797	splhigh();
798	devtype = major(rootdev);
799	if (howto&RB_HALT) {
800		printf("\n");
801		printf("The operating system has halted.\n");
802		printf("Please press any key to reboot.\n\n");
803		cngetc();
804	} else {
805		if (howto & RB_DUMP) {
806			savectx(&dumppcb, 0);
807			dumppcb.pcb_ptd = rcr3();
808			dumpsys();
809
810			if (PANIC_REBOOT_WAIT_TIME != 0) {
811				if (PANIC_REBOOT_WAIT_TIME != -1) {
812					int loop;
813					printf("Automatic reboot in %d seconds - press a key on the console to abort\n",
814						PANIC_REBOOT_WAIT_TIME);
815					for (loop = PANIC_REBOOT_WAIT_TIME; loop > 0; --loop) {
816						DELAY(1000 * 1000); /* one second */
817						if (cncheckc()) /* Did user type a key? */
818							break;
819					}
820					if (!loop)
821						goto die;
822				}
823			} else { /* zero time specified - reboot NOW */
824				goto die;
825			}
826			printf("--> Press a key on the console to reboot <--\n");
827			cngetc();
828		}
829	}
830#ifdef lint
831	dummy = 0; dummy = dummy;
832	printf("howto %d, devtype %d\n", arghowto, devtype);
833#endif
834die:
835	printf("Rebooting...\n");
836	DELAY(1000000);	/* wait 1 sec for printf's to complete and be read */
837	cpu_reset();
838	for(;;) ;
839	/* NOTREACHED */
840}
841
842unsigned long	dumpmag = 0x8fca0101UL;	/* magic number for savecore */
843int		dumpsize = 0;		/* also for savecore */
844
845#ifdef DODUMP
846int		dodump = 1;
847#else
848int		dodump = 0;
849#endif
850/*
851 * Doadump comes here after turning off memory management and
852 * getting on the dump stack, either when called above, or by
853 * the auto-restart code.
854 */
855void
856dumpsys()
857{
858
859	if (!dodump)
860		return;
861	if (dumpdev == NODEV)
862		return;
863	if ((minor(dumpdev)&07) != 1)
864		return;
865	dumpsize = Maxmem;
866	printf("\ndumping to dev %lx, offset %ld\n", dumpdev, dumplo);
867	printf("dump ");
868	switch ((*bdevsw[major(dumpdev)].d_dump)(dumpdev)) {
869
870	case ENXIO:
871		printf("device bad\n");
872		break;
873
874	case EFAULT:
875		printf("device not ready\n");
876		break;
877
878	case EINVAL:
879		printf("area improper\n");
880		break;
881
882	case EIO:
883		printf("i/o error\n");
884		break;
885
886	case EINTR:
887		printf("aborted from console\n");
888		break;
889
890	default:
891		printf("succeeded\n");
892		break;
893	}
894}
895
896static void
897initcpu()
898{
899}
900
901/*
902 * Clear registers on exec
903 */
904void
905setregs(p, entry, stack)
906	struct proc *p;
907	u_long entry;
908	u_long stack;
909{
910	int *regs = p->p_md.md_regs;
911
912	bzero(regs, sizeof(struct trapframe));
913	regs[tEIP] = entry;
914	regs[tESP] = stack;
915	regs[tEFLAGS] = PSL_USER | (regs[tEFLAGS] & PSL_T);
916	regs[tSS] = _udatasel;
917	regs[tDS] = _udatasel;
918	regs[tES] = _udatasel;
919	regs[tCS] = _ucodesel;
920
921	p->p_addr->u_pcb.pcb_flags = 0;	/* no fp at all */
922	load_cr0(rcr0() | CR0_TS);	/* start emulating */
923#if	NNPX > 0
924	npxinit(__INITIAL_NPXCW__);
925#endif	/* NNPX > 0 */
926}
927
928/*
929 * machine dependent system variables.
930 */
931int
932cpu_sysctl(name, namelen, oldp, oldlenp, newp, newlen, p)
933	int *name;
934	u_int namelen;
935	void *oldp;
936	size_t *oldlenp;
937	void *newp;
938	size_t newlen;
939	struct proc *p;
940{
941	int error;
942
943	/* all sysctl names at this level are terminal */
944	if (namelen != 1)
945		return (ENOTDIR);               /* overloaded */
946
947	switch (name[0]) {
948	case CPU_CONSDEV:
949		return (sysctl_rdstruct(oldp, oldlenp, newp, &cn_tty->t_dev,
950		   sizeof cn_tty->t_dev));
951	case CPU_ADJKERNTZ:
952		error = sysctl_int(oldp, oldlenp, newp, newlen, &adjkerntz);
953		if (!error && newp)
954			resettodr();
955		return error;
956	case CPU_DISRTCSET:
957		return (sysctl_int(oldp, oldlenp, newp,	newlen,	&disable_rtc_set));
958	default:
959		return (EOPNOTSUPP);
960	}
961	/* NOTREACHED */
962}
963
964/*
965 * Initialize 386 and configure to run kernel
966 */
967
968/*
969 * Initialize segments & interrupt table
970 */
971
972int currentldt;
973int _default_ldt;
974union descriptor gdt[NGDT];		/* global descriptor table */
975struct gate_descriptor idt[NIDT];	/* interrupt descriptor table */
976union descriptor ldt[NLDT];		/* local descriptor table */
977
978struct	i386tss	tss, panic_tss;
979
980extern  struct user *proc0paddr;
981
982/* software prototypes -- in more palatable form */
983struct soft_segment_descriptor gdt_segs[] = {
984/* GNULL_SEL	0 Null Descriptor */
985{	0x0,			/* segment base address  */
986	0x0,			/* length */
987	0,			/* segment type */
988	0,			/* segment descriptor priority level */
989	0,			/* segment descriptor present */
990	0, 0,
991	0,			/* default 32 vs 16 bit size */
992	0  			/* limit granularity (byte/page units)*/ },
993/* GCODE_SEL	1 Code Descriptor for kernel */
994{	0x0,			/* segment base address  */
995	0xfffff,		/* length - all address space */
996	SDT_MEMERA,		/* segment type */
997	0,			/* segment descriptor priority level */
998	1,			/* segment descriptor present */
999	0, 0,
1000	1,			/* default 32 vs 16 bit size */
1001	1  			/* limit granularity (byte/page units)*/ },
1002/* GDATA_SEL	2 Data Descriptor for kernel */
1003{	0x0,			/* segment base address  */
1004	0xfffff,		/* length - all address space */
1005	SDT_MEMRWA,		/* segment type */
1006	0,			/* segment descriptor priority level */
1007	1,			/* segment descriptor present */
1008	0, 0,
1009	1,			/* default 32 vs 16 bit size */
1010	1  			/* limit granularity (byte/page units)*/ },
1011/* GLDT_SEL	3 LDT Descriptor */
1012{	(int) ldt,		/* segment base address  */
1013	sizeof(ldt)-1,		/* length - all address space */
1014	SDT_SYSLDT,		/* segment type */
1015	0,			/* segment descriptor priority level */
1016	1,			/* segment descriptor present */
1017	0, 0,
1018	0,			/* unused - default 32 vs 16 bit size */
1019	0  			/* limit granularity (byte/page units)*/ },
1020/* GTGATE_SEL	4 Null Descriptor - Placeholder */
1021{	0x0,			/* segment base address  */
1022	0x0,			/* length - all address space */
1023	0,			/* segment type */
1024	0,			/* segment descriptor priority level */
1025	0,			/* segment descriptor present */
1026	0, 0,
1027	0,			/* default 32 vs 16 bit size */
1028	0  			/* limit granularity (byte/page units)*/ },
1029/* GPANIC_SEL	5 Panic Tss Descriptor */
1030{	(int) &panic_tss,	/* segment base address  */
1031	sizeof(tss)-1,		/* length - all address space */
1032	SDT_SYS386TSS,		/* segment type */
1033	0,			/* segment descriptor priority level */
1034	1,			/* segment descriptor present */
1035	0, 0,
1036	0,			/* unused - default 32 vs 16 bit size */
1037	0  			/* limit granularity (byte/page units)*/ },
1038/* GPROC0_SEL	6 Proc 0 Tss Descriptor */
1039{	(int) kstack,		/* segment base address  */
1040	sizeof(tss)-1,		/* length - all address space */
1041	SDT_SYS386TSS,		/* segment type */
1042	0,			/* segment descriptor priority level */
1043	1,			/* segment descriptor present */
1044	0, 0,
1045	0,			/* unused - default 32 vs 16 bit size */
1046	0  			/* limit granularity (byte/page units)*/ },
1047/* GUSERLDT_SEL	7 User LDT Descriptor per process */
1048{	(int) ldt,		/* segment base address  */
1049	(512 * sizeof(union descriptor)-1),		/* length */
1050	SDT_SYSLDT,		/* segment type */
1051	0,			/* segment descriptor priority level */
1052	1,			/* segment descriptor present */
1053	0, 0,
1054	0,			/* unused - default 32 vs 16 bit size */
1055	0  			/* limit granularity (byte/page units)*/ },
1056/* GAPMCODE32_SEL 8 APM BIOS 32-bit interface (32bit Code) */
1057{	0,			/* segment base address (overwritten by APM)  */
1058	0xfffff,		/* length */
1059	SDT_MEMERA,		/* segment type */
1060	0,			/* segment descriptor priority level */
1061	1,			/* segment descriptor present */
1062	0, 0,
1063	1,			/* default 32 vs 16 bit size */
1064	1  			/* limit granularity (byte/page units)*/ },
1065/* GAPMCODE16_SEL 9 APM BIOS 32-bit interface (16bit Code) */
1066{	0,			/* segment base address (overwritten by APM)  */
1067	0xfffff,		/* length */
1068	SDT_MEMERA,		/* segment type */
1069	0,			/* segment descriptor priority level */
1070	1,			/* segment descriptor present */
1071	0, 0,
1072	0,			/* default 32 vs 16 bit size */
1073	1  			/* limit granularity (byte/page units)*/ },
1074/* GAPMDATA_SEL	10 APM BIOS 32-bit interface (Data) */
1075{	0,			/* segment base address (overwritten by APM) */
1076	0xfffff,		/* length */
1077	SDT_MEMRWA,		/* segment type */
1078	0,			/* segment descriptor priority level */
1079	1,			/* segment descriptor present */
1080	0, 0,
1081	1,			/* default 32 vs 16 bit size */
1082	1  			/* limit granularity (byte/page units)*/ },
1083};
1084
1085struct soft_segment_descriptor ldt_segs[] = {
1086	/* Null Descriptor - overwritten by call gate */
1087{	0x0,			/* segment base address  */
1088	0x0,			/* length - all address space */
1089	0,			/* segment type */
1090	0,			/* segment descriptor priority level */
1091	0,			/* segment descriptor present */
1092	0, 0,
1093	0,			/* default 32 vs 16 bit size */
1094	0  			/* limit granularity (byte/page units)*/ },
1095	/* Null Descriptor - overwritten by call gate */
1096{	0x0,			/* segment base address  */
1097	0x0,			/* length - all address space */
1098	0,			/* segment type */
1099	0,			/* segment descriptor priority level */
1100	0,			/* segment descriptor present */
1101	0, 0,
1102	0,			/* default 32 vs 16 bit size */
1103	0  			/* limit granularity (byte/page units)*/ },
1104	/* Null Descriptor - overwritten by call gate */
1105{	0x0,			/* segment base address  */
1106	0x0,			/* length - all address space */
1107	0,			/* segment type */
1108	0,			/* segment descriptor priority level */
1109	0,			/* segment descriptor present */
1110	0, 0,
1111	0,			/* default 32 vs 16 bit size */
1112	0  			/* limit granularity (byte/page units)*/ },
1113	/* Code Descriptor for user */
1114{	0x0,			/* segment base address  */
1115	0xfffff,		/* length - all address space */
1116	SDT_MEMERA,		/* segment type */
1117	SEL_UPL,		/* segment descriptor priority level */
1118	1,			/* segment descriptor present */
1119	0, 0,
1120	1,			/* default 32 vs 16 bit size */
1121	1  			/* limit granularity (byte/page units)*/ },
1122	/* Data Descriptor for user */
1123{	0x0,			/* segment base address  */
1124	0xfffff,		/* length - all address space */
1125	SDT_MEMRWA,		/* segment type */
1126	SEL_UPL,		/* segment descriptor priority level */
1127	1,			/* segment descriptor present */
1128	0, 0,
1129	1,			/* default 32 vs 16 bit size */
1130	1  			/* limit granularity (byte/page units)*/ },
1131};
1132
1133void
1134setidt(idx, func, typ, dpl)
1135	int idx;
1136	inthand_t *func;
1137	int typ;
1138	int dpl;
1139{
1140	struct gate_descriptor *ip = idt + idx;
1141
1142	ip->gd_looffset = (int)func;
1143	ip->gd_selector = 8;
1144	ip->gd_stkcpy = 0;
1145	ip->gd_xx = 0;
1146	ip->gd_type = typ;
1147	ip->gd_dpl = dpl;
1148	ip->gd_p = 1;
1149	ip->gd_hioffset = ((int)func)>>16 ;
1150}
1151
1152#define	IDTVEC(name)	__CONCAT(X,name)
1153
1154extern inthand_t
1155	IDTVEC(div), IDTVEC(dbg), IDTVEC(nmi), IDTVEC(bpt), IDTVEC(ofl),
1156	IDTVEC(bnd), IDTVEC(ill), IDTVEC(dna), IDTVEC(dble), IDTVEC(fpusegm),
1157	IDTVEC(tss), IDTVEC(missing), IDTVEC(stk), IDTVEC(prot),
1158	IDTVEC(page), IDTVEC(rsvd), IDTVEC(fpu), IDTVEC(align),
1159	IDTVEC(syscall);
1160
1161#ifdef COMPAT_LINUX
1162extern inthand_t
1163	IDTVEC(linux_syscall);
1164#endif
1165
1166void
1167sdtossd(sd, ssd)
1168	struct segment_descriptor *sd;
1169	struct soft_segment_descriptor *ssd;
1170{
1171	ssd->ssd_base  = (sd->sd_hibase << 24) | sd->sd_lobase;
1172	ssd->ssd_limit = (sd->sd_hilimit << 16) | sd->sd_lolimit;
1173	ssd->ssd_type  = sd->sd_type;
1174	ssd->ssd_dpl   = sd->sd_dpl;
1175	ssd->ssd_p     = sd->sd_p;
1176	ssd->ssd_def32 = sd->sd_def32;
1177	ssd->ssd_gran  = sd->sd_gran;
1178}
1179
1180void
1181init386(first)
1182	int first;
1183{
1184	int x;
1185	unsigned biosbasemem, biosextmem;
1186	struct gate_descriptor *gdp;
1187	int gsel_tss;
1188	/* table descriptors - used to load tables by microp */
1189	struct region_descriptor r_gdt, r_idt;
1190	int	pagesinbase, pagesinext;
1191	int	target_page;
1192
1193	proc0.p_addr = proc0paddr;
1194
1195	/*
1196	 * Initialize the console before we print anything out.
1197	 */
1198
1199	cninit ();
1200
1201	/*
1202	 * make gdt memory segments, the code segment goes up to end of the
1203	 * page with etext in it, the data segment goes to the end of
1204	 * the address space
1205	 */
1206	/*
1207	 * XXX text protection is temporarily (?) disabled.  The limit was
1208	 * i386_btop(i386_round_page(etext)) - 1.
1209	 */
1210	gdt_segs[GCODE_SEL].ssd_limit = i386_btop(0) - 1;
1211	gdt_segs[GDATA_SEL].ssd_limit = i386_btop(0) - 1;
1212	for (x = 0; x < NGDT; x++)
1213		ssdtosd(&gdt_segs[x], &gdt[x].sd);
1214
1215	/* make ldt memory segments */
1216	/*
1217	 * The data segment limit must not cover the user area because we
1218	 * don't want the user area to be writable in copyout() etc. (page
1219	 * level protection is lost in kernel mode on 386's).  Also, we
1220	 * don't want the user area to be writable directly (page level
1221	 * protection of the user area is not available on 486's with
1222	 * CR0_WP set, because there is no user-read/kernel-write mode).
1223	 *
1224	 * XXX - VM_MAXUSER_ADDRESS is an end address, not a max.  And it
1225	 * should be spelled ...MAX_USER...
1226	 */
1227#define VM_END_USER_RW_ADDRESS	VM_MAXUSER_ADDRESS
1228	/*
1229	 * The code segment limit has to cover the user area until we move
1230	 * the signal trampoline out of the user area.  This is safe because
1231	 * the code segment cannot be written to directly.
1232	 */
1233#define VM_END_USER_R_ADDRESS	(VM_END_USER_RW_ADDRESS + UPAGES * NBPG)
1234	ldt_segs[LUCODE_SEL].ssd_limit = i386_btop(VM_END_USER_R_ADDRESS) - 1;
1235	ldt_segs[LUDATA_SEL].ssd_limit = i386_btop(VM_END_USER_RW_ADDRESS) - 1;
1236	/* Note. eventually want private ldts per process */
1237	for (x = 0; x < NLDT; x++)
1238		ssdtosd(&ldt_segs[x], &ldt[x].sd);
1239
1240	/* exceptions */
1241	for (x = 0; x < NIDT; x++)
1242		setidt(x, &IDTVEC(rsvd), SDT_SYS386TGT, SEL_KPL);
1243	setidt(0, &IDTVEC(div),  SDT_SYS386TGT, SEL_KPL);
1244	setidt(1, &IDTVEC(dbg),  SDT_SYS386TGT, SEL_KPL);
1245	setidt(2, &IDTVEC(nmi),  SDT_SYS386TGT, SEL_KPL);
1246 	setidt(3, &IDTVEC(bpt),  SDT_SYS386TGT, SEL_UPL);
1247	setidt(4, &IDTVEC(ofl),  SDT_SYS386TGT, SEL_UPL);
1248	setidt(5, &IDTVEC(bnd),  SDT_SYS386TGT, SEL_KPL);
1249	setidt(6, &IDTVEC(ill),  SDT_SYS386TGT, SEL_KPL);
1250	setidt(7, &IDTVEC(dna),  SDT_SYS386TGT, SEL_KPL);
1251	setidt(8, &IDTVEC(dble),  SDT_SYS386TGT, SEL_KPL);
1252	setidt(9, &IDTVEC(fpusegm),  SDT_SYS386TGT, SEL_KPL);
1253	setidt(10, &IDTVEC(tss),  SDT_SYS386TGT, SEL_KPL);
1254	setidt(11, &IDTVEC(missing),  SDT_SYS386TGT, SEL_KPL);
1255	setidt(12, &IDTVEC(stk),  SDT_SYS386TGT, SEL_KPL);
1256	setidt(13, &IDTVEC(prot),  SDT_SYS386TGT, SEL_KPL);
1257	setidt(14, &IDTVEC(page),  SDT_SYS386TGT, SEL_KPL);
1258	setidt(15, &IDTVEC(rsvd),  SDT_SYS386TGT, SEL_KPL);
1259	setidt(16, &IDTVEC(fpu),  SDT_SYS386TGT, SEL_KPL);
1260	setidt(17, &IDTVEC(align), SDT_SYS386TGT, SEL_KPL);
1261#ifdef COMPAT_LINUX
1262 	setidt(0x80, &IDTVEC(linux_syscall),  SDT_SYS386TGT, SEL_UPL);
1263#endif
1264
1265#include	"isa.h"
1266#if	NISA >0
1267	isa_defaultirq();
1268#endif
1269
1270	r_gdt.rd_limit = sizeof(gdt) - 1;
1271	r_gdt.rd_base =  (int) gdt;
1272	lgdt(&r_gdt);
1273
1274	r_idt.rd_limit = sizeof(idt) - 1;
1275	r_idt.rd_base = (int) idt;
1276	lidt(&r_idt);
1277
1278	_default_ldt = GSEL(GLDT_SEL, SEL_KPL);
1279	lldt(_default_ldt);
1280	currentldt = _default_ldt;
1281
1282#ifdef DDB
1283	kdb_init();
1284	if (boothowto & RB_KDB)
1285		Debugger("Boot flags requested debugger");
1286#endif
1287
1288	/* Use BIOS values stored in RTC CMOS RAM, since probing
1289	 * breaks certain 386 AT relics.
1290	 */
1291	biosbasemem = rtcin(RTC_BASELO)+ (rtcin(RTC_BASEHI)<<8);
1292	biosextmem = rtcin(RTC_EXTLO)+ (rtcin(RTC_EXTHI)<<8);
1293
1294	/*
1295	 * Print a warning if the official BIOS interface disagrees
1296	 * with the hackish interface used above.  Eventually only
1297	 * the official interface should be used.
1298	 */
1299	if (bootinfo.bi_memsizes_valid) {
1300		if (bootinfo.bi_basemem != biosbasemem)
1301			printf("BIOS basemem (%dK) != RTC basemem (%dK)\n",
1302			       bootinfo.bi_basemem, biosbasemem);
1303		if (bootinfo.bi_extmem != biosextmem)
1304			printf("BIOS extmem (%dK) != RTC extmem (%dK)\n",
1305			       bootinfo.bi_extmem, biosextmem);
1306	}
1307
1308	/*
1309	 * If BIOS tells us that it has more than 640k in the basemem,
1310	 *	don't believe it - set it to 640k.
1311	 */
1312	if (biosbasemem > 640)
1313		biosbasemem = 640;
1314
1315	/*
1316	 * Some 386 machines might give us a bogus number for extended
1317	 *	mem. If this happens, stop now.
1318	 */
1319#ifndef LARGEMEM
1320	if (biosextmem > 65536) {
1321		panic("extended memory beyond limit of 64MB");
1322		/* NOTREACHED */
1323	}
1324#endif
1325
1326	pagesinbase = biosbasemem * 1024 / NBPG;
1327	pagesinext = biosextmem * 1024 / NBPG;
1328
1329	/*
1330	 * Special hack for chipsets that still remap the 384k hole when
1331	 *	there's 16MB of memory - this really confuses people that
1332	 *	are trying to use bus mastering ISA controllers with the
1333	 *	"16MB limit"; they only have 16MB, but the remapping puts
1334	 *	them beyond the limit.
1335	 */
1336	/*
1337	 * If extended memory is between 15-16MB (16-17MB phys address range),
1338	 *	chop it to 15MB.
1339	 */
1340	if ((pagesinext > 3840) && (pagesinext < 4096))
1341		pagesinext = 3840;
1342
1343	/*
1344	 * Maxmem isn't the "maximum memory", it's one larger than the
1345	 * highest page of of the physical address space. It should be
1346	 * called something like "Maxphyspage".
1347	 */
1348	Maxmem = pagesinext + 0x100000/PAGE_SIZE;
1349
1350#ifdef MAXMEM
1351	Maxmem = MAXMEM/4;
1352#endif
1353	/*
1354	 * Calculate number of physical pages, but account for Maxmem
1355	 *	adjustment above.
1356	 */
1357	physmem = pagesinbase + Maxmem - 0x100000/PAGE_SIZE;
1358
1359	/* call pmap initialization to make new kernel address space */
1360	pmap_bootstrap (first, 0);
1361
1362	/*
1363	 * Do a quick, non-destructive check over extended memory to verify
1364	 * what the BIOS tells us agrees with reality. Adjust down Maxmem
1365	 * if we find that the page can't be correctly written to/read from.
1366	 */
1367
1368	for (target_page = Maxmem - 1; target_page >= atop(first); target_page--) {
1369		int tmp;
1370
1371		/*
1372		 * map page into kernel: valid, read/write, non-cacheable
1373		 */
1374		*(int *)CMAP1 = PG_V | PG_KW | PG_N | ptoa(target_page);
1375		pmap_update();
1376
1377		tmp = *(int *)CADDR1;
1378		/*
1379		 * Test for alternating 1's and 0's
1380		 */
1381		*(int *)CADDR1 = 0xaaaaaaaa;
1382		if (*(int *)CADDR1 != 0xaaaaaaaa) {
1383			Maxmem = target_page;
1384			badpages++;
1385			continue;
1386		}
1387		/*
1388		 * Test for alternating 0's and 1's
1389		 */
1390		*(int *)CADDR1 = 0x55555555;
1391		if (*(int *)CADDR1 != 0x55555555) {
1392			Maxmem = target_page;
1393			badpages++;
1394			continue;
1395		}
1396		/*
1397		 * Test for all 1's
1398		 */
1399		*(int *)CADDR1 = 0xffffffff;
1400		if (*(int *)CADDR1 != 0xffffffff) {
1401			Maxmem = target_page;
1402			badpages++;
1403			continue;
1404		}
1405		/*
1406		 * Test for all 0's
1407		 */
1408		*(int *)CADDR1 = 0x0;
1409		if (*(int *)CADDR1 != 0x0) {
1410			/*
1411			 * test of page failed
1412			 */
1413			Maxmem = target_page;
1414			badpages++;
1415			continue;
1416		}
1417		*(int *)CADDR1 = tmp;
1418	}
1419	if (badpages != 0)
1420		printf("WARNING: BIOS extended memory size and reality don't agree.\n");
1421
1422	*(int *)CMAP1 = 0;
1423	pmap_update();
1424
1425	avail_end = (Maxmem << PAGE_SHIFT)
1426		    - i386_round_page(sizeof(struct msgbuf));
1427
1428	/*
1429	 * Initialize pointers to the two chunks of memory; for use
1430	 *	later in vm_page_startup.
1431	 */
1432	/* avail_start is initialized in pmap_bootstrap */
1433	x = 0;
1434	if (pagesinbase > 1) {
1435		phys_avail[x++] = NBPG;		/* skip first page of memory */
1436		phys_avail[x++] = pagesinbase * NBPG;	/* memory up to the ISA hole */
1437	}
1438	phys_avail[x++] = avail_start;	/* memory up to the end */
1439	phys_avail[x++] = avail_end;
1440	phys_avail[x++] = 0;		/* no more chunks */
1441	phys_avail[x++] = 0;
1442
1443	/* now running on new page tables, configured,and u/iom is accessible */
1444
1445	/* make a initial tss so microp can get interrupt stack on syscall! */
1446	proc0.p_addr->u_pcb.pcb_tss.tss_esp0 = (int) kstack + UPAGES*NBPG;
1447	proc0.p_addr->u_pcb.pcb_tss.tss_ss0 = GSEL(GDATA_SEL, SEL_KPL) ;
1448	gsel_tss = GSEL(GPROC0_SEL, SEL_KPL);
1449
1450	((struct i386tss *)gdt_segs[GPROC0_SEL].ssd_base)->tss_ioopt =
1451		(sizeof(tss))<<16;
1452
1453	ltr(gsel_tss);
1454
1455	/* make a call gate to reenter kernel with */
1456	gdp = &ldt[LSYS5CALLS_SEL].gd;
1457
1458	x = (int) &IDTVEC(syscall);
1459	gdp->gd_looffset = x++;
1460	gdp->gd_selector = GSEL(GCODE_SEL,SEL_KPL);
1461	gdp->gd_stkcpy = 1;
1462	gdp->gd_type = SDT_SYS386CGT;
1463	gdp->gd_dpl = SEL_UPL;
1464	gdp->gd_p = 1;
1465	gdp->gd_hioffset = ((int) &IDTVEC(syscall)) >>16;
1466
1467	/* transfer to user mode */
1468
1469	_ucodesel = LSEL(LUCODE_SEL, SEL_UPL);
1470	_udatasel = LSEL(LUDATA_SEL, SEL_UPL);
1471
1472	/* setup proc 0's pcb */
1473	bcopy(&sigcode, proc0.p_addr->u_pcb.pcb_sigc, szsigcode);
1474	proc0.p_addr->u_pcb.pcb_flags = 0;
1475	proc0.p_addr->u_pcb.pcb_ptd = IdlePTD;
1476}
1477
1478/*
1479 * The registers are in the frame; the frame is in the user area of
1480 * the process in question; when the process is active, the registers
1481 * are in "the kernel stack"; when it's not, they're still there, but
1482 * things get flipped around.  So, since p->p_md.md_regs is the whole address
1483 * of the register set, take its offset from the kernel stack, and
1484 * index into the user block.  Don't you just *love* virtual memory?
1485 * (I'm starting to think seymour is right...)
1486 */
1487#define	TF_REGP(p)	((struct trapframe *) \
1488			 ((char *)(p)->p_addr \
1489			  + ((char *)(p)->p_md.md_regs - kstack)))
1490
1491int
1492ptrace_set_pc(p, addr)
1493	struct proc *p;
1494	unsigned int addr;
1495{
1496	TF_REGP(p)->tf_eip = addr;
1497	return (0);
1498}
1499
1500int
1501ptrace_single_step(p)
1502	struct proc *p;
1503{
1504	TF_REGP(p)->tf_eflags |= PSL_T;
1505	return (0);
1506}
1507
1508int
1509ptrace_getregs(p, addr)
1510	struct proc *p;
1511	unsigned int *addr;
1512{
1513	int error;
1514	struct reg regs;
1515
1516	error = fill_regs(p, &regs);
1517	if (error)
1518		return (error);
1519	return (copyout(&regs, addr, sizeof regs));
1520}
1521
1522int
1523ptrace_setregs(p, addr)
1524	struct proc *p;
1525	unsigned int *addr;
1526{
1527	int error;
1528	struct reg regs;
1529
1530	error = copyin(addr, &regs, sizeof regs);
1531	if (error)
1532		return (error);
1533	return (set_regs(p, &regs));
1534}
1535
1536int ptrace_write_u(p, off, data)
1537	struct proc *p;
1538	vm_offset_t off;
1539	int data;
1540{
1541	struct trapframe frame_copy;
1542	vm_offset_t min;
1543	struct trapframe *tp;
1544
1545	/*
1546	 * Privileged kernel state is scattered all over the user area.
1547	 * Only allow write access to parts of regs and to fpregs.
1548	 */
1549	min = (char *)p->p_md.md_regs - kstack;
1550	if (off >= min && off <= min + sizeof(struct trapframe) - sizeof(int)) {
1551		tp = TF_REGP(p);
1552		frame_copy = *tp;
1553		*(int *)((char *)&frame_copy + (off - min)) = data;
1554		if (!EFLAGS_SECURE(frame_copy.tf_eflags, tp->tf_eflags) ||
1555		    !CS_SECURE(frame_copy.tf_cs))
1556			return (EINVAL);
1557		*(int*)((char *)p->p_addr + off) = data;
1558		return (0);
1559	}
1560	min = offsetof(struct user, u_pcb) + offsetof(struct pcb, pcb_savefpu);
1561	if (off >= min && off <= min + sizeof(struct save87) - sizeof(int)) {
1562		*(int*)((char *)p->p_addr + off) = data;
1563		return (0);
1564	}
1565	return (EFAULT);
1566}
1567
1568int
1569fill_regs(p, regs)
1570	struct proc *p;
1571	struct reg *regs;
1572{
1573	struct trapframe *tp;
1574
1575	tp = TF_REGP(p);
1576	regs->r_es = tp->tf_es;
1577	regs->r_ds = tp->tf_ds;
1578	regs->r_edi = tp->tf_edi;
1579	regs->r_esi = tp->tf_esi;
1580	regs->r_ebp = tp->tf_ebp;
1581	regs->r_ebx = tp->tf_ebx;
1582	regs->r_edx = tp->tf_edx;
1583	regs->r_ecx = tp->tf_ecx;
1584	regs->r_eax = tp->tf_eax;
1585	regs->r_eip = tp->tf_eip;
1586	regs->r_cs = tp->tf_cs;
1587	regs->r_eflags = tp->tf_eflags;
1588	regs->r_esp = tp->tf_esp;
1589	regs->r_ss = tp->tf_ss;
1590	return (0);
1591}
1592
1593int
1594set_regs(p, regs)
1595	struct proc *p;
1596	struct reg *regs;
1597{
1598	struct trapframe *tp;
1599
1600	tp = TF_REGP(p);
1601	if (!EFLAGS_SECURE(regs->r_eflags, tp->tf_eflags) ||
1602	    !CS_SECURE(regs->r_cs))
1603		return (EINVAL);
1604	tp->tf_es = regs->r_es;
1605	tp->tf_ds = regs->r_ds;
1606	tp->tf_edi = regs->r_edi;
1607	tp->tf_esi = regs->r_esi;
1608	tp->tf_ebp = regs->r_ebp;
1609	tp->tf_ebx = regs->r_ebx;
1610	tp->tf_edx = regs->r_edx;
1611	tp->tf_ecx = regs->r_ecx;
1612	tp->tf_eax = regs->r_eax;
1613	tp->tf_eip = regs->r_eip;
1614	tp->tf_cs = regs->r_cs;
1615	tp->tf_eflags = regs->r_eflags;
1616	tp->tf_esp = regs->r_esp;
1617	tp->tf_ss = regs->r_ss;
1618	return (0);
1619}
1620
1621#ifndef DDB
1622void
1623Debugger(const char *msg)
1624{
1625	printf("Debugger(\"%s\") called.\n", msg);
1626}
1627#endif /* no DDB */
1628
1629#include <sys/disklabel.h>
1630#define b_cylin	b_resid
1631/*
1632 * Determine the size of the transfer, and make sure it is
1633 * within the boundaries of the partition. Adjust transfer
1634 * if needed, and signal errors or early completion.
1635 */
1636int
1637bounds_check_with_label(struct buf *bp, struct disklabel *lp, int wlabel)
1638{
1639        struct partition *p = lp->d_partitions + dkpart(bp->b_dev);
1640        int labelsect = lp->d_partitions[0].p_offset;
1641        int maxsz = p->p_size,
1642                sz = (bp->b_bcount + DEV_BSIZE - 1) >> DEV_BSHIFT;
1643
1644        /* overwriting disk label ? */
1645        /* XXX should also protect bootstrap in first 8K */
1646        if (bp->b_blkno + p->p_offset <= LABELSECTOR + labelsect &&
1647#if LABELSECTOR != 0
1648            bp->b_blkno + p->p_offset + sz > LABELSECTOR + labelsect &&
1649#endif
1650            (bp->b_flags & B_READ) == 0 && wlabel == 0) {
1651                bp->b_error = EROFS;
1652                goto bad;
1653        }
1654
1655#if     defined(DOSBBSECTOR) && defined(notyet)
1656        /* overwriting master boot record? */
1657        if (bp->b_blkno + p->p_offset <= DOSBBSECTOR &&
1658            (bp->b_flags & B_READ) == 0 && wlabel == 0) {
1659                bp->b_error = EROFS;
1660                goto bad;
1661        }
1662#endif
1663
1664        /* beyond partition? */
1665        if (bp->b_blkno < 0 || bp->b_blkno + sz > maxsz) {
1666                /* if exactly at end of disk, return an EOF */
1667                if (bp->b_blkno == maxsz) {
1668                        bp->b_resid = bp->b_bcount;
1669                        return(0);
1670                }
1671                /* or truncate if part of it fits */
1672                sz = maxsz - bp->b_blkno;
1673                if (sz <= 0) {
1674                        bp->b_error = EINVAL;
1675                        goto bad;
1676                }
1677                bp->b_bcount = sz << DEV_BSHIFT;
1678        }
1679
1680        /* calculate cylinder for disksort to order transfers with */
1681        bp->b_pblkno = bp->b_blkno + p->p_offset;
1682        bp->b_cylin = bp->b_pblkno / lp->d_secpercyl;
1683        return(1);
1684
1685bad:
1686        bp->b_flags |= B_ERROR;
1687        return(-1);
1688}
1689
1690int
1691disk_externalize(int drive, void *userp, size_t *maxlen)
1692{
1693	if(*maxlen < sizeof drive) {
1694		return ENOMEM;
1695	}
1696
1697	*maxlen -= sizeof drive;
1698	return copyout(&drive, userp, sizeof drive);
1699}
1700