machdep.c revision 9578
1/*-
2 * Copyright (c) 1992 Terrence R. Lambert.
3 * Copyright (c) 1982, 1987, 1990 The Regents of the University of California.
4 * All rights reserved.
5 *
6 * This code is derived from software contributed to Berkeley by
7 * William Jolitz.
8 *
9 * Redistribution and use in source and binary forms, with or without
10 * modification, are permitted provided that the following conditions
11 * are met:
12 * 1. Redistributions of source code must retain the above copyright
13 *    notice, this list of conditions and the following disclaimer.
14 * 2. Redistributions in binary form must reproduce the above copyright
15 *    notice, this list of conditions and the following disclaimer in the
16 *    documentation and/or other materials provided with the distribution.
17 * 3. All advertising materials mentioning features or use of this software
18 *    must display the following acknowledgement:
19 *	This product includes software developed by the University of
20 *	California, Berkeley and its contributors.
21 * 4. Neither the name of the University nor the names of its contributors
22 *    may be used to endorse or promote products derived from this software
23 *    without specific prior written permission.
24 *
25 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
26 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
27 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
28 * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
29 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
30 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
31 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
32 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
33 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
34 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
35 * SUCH DAMAGE.
36 *
37 *	from: @(#)machdep.c	7.4 (Berkeley) 6/3/91
38 *	$Id: machdep.c,v 1.132 1995/07/16 10:33:38 phk Exp $
39 */
40
41#include "npx.h"
42#include "isa.h"
43
44#include <sys/param.h>
45#include <sys/systm.h>
46#include <sys/signalvar.h>
47#include <sys/kernel.h>
48#include <sys/proc.h>
49#include <sys/user.h>
50#include <sys/buf.h>
51#include <sys/reboot.h>
52#include <sys/conf.h>
53#include <sys/file.h>
54#include <sys/callout.h>
55#include <sys/malloc.h>
56#include <sys/mbuf.h>
57#include <sys/mount.h>
58#include <sys/msgbuf.h>
59#include <sys/ioctl.h>
60#include <sys/sysent.h>
61#include <sys/tty.h>
62#include <sys/sysctl.h>
63#include <sys/devconf.h>
64
65#ifdef SYSVSHM
66#include <sys/shm.h>
67#endif
68
69#ifdef SYSVMSG
70#include <sys/msg.h>
71#endif
72
73#ifdef SYSVSEM
74#include <sys/sem.h>
75#endif
76
77#include <vm/vm.h>
78#include <vm/vm_kern.h>
79#include <vm/vm_page.h>
80#include <vm/vm_pager.h>
81
82#include <sys/exec.h>
83#include <sys/vnode.h>
84
85#include <ddb/ddb.h>
86
87#include <net/netisr.h>
88
89/* XXX correctly declaring all the netisr's is painful. */
90#include <net/if.h>
91#include <net/route.h>
92
93#include <netinet/in.h>
94#include <netinet/in_systm.h>
95#include <netinet/ip.h>
96#include <netinet/if_ether.h>
97#include <netinet/ip_var.h>
98
99#include <netns/ns.h>
100#include <netns/ns_if.h>
101
102#include <netiso/iso.h>
103#include <netiso/iso_var.h>
104
105#include <netccitt/dll.h>
106#include <netccitt/x25.h>
107#include <netccitt/pk.h>
108#include <sys/socketvar.h>
109#include <netccitt/pk_var.h>
110
111#include "ether.h"
112
113#include <machine/cpu.h>
114#include <machine/npx.h>
115#include <machine/reg.h>
116#include <machine/psl.h>
117#include <machine/clock.h>
118#include <machine/specialreg.h>
119#include <machine/sysarch.h>
120#include <machine/cons.h>
121#include <machine/devconf.h>
122#include <machine/bootinfo.h>
123#include <machine/md_var.h>
124
125#include <i386/isa/isa.h>
126#include <i386/isa/isa_device.h>
127#include <i386/isa/rtc.h>
128
129static void identifycpu(void);
130static void initcpu(void);
131
132char machine[] = "i386";
133char cpu_model[128];
134
135struct kern_devconf kdc_cpu0 = {
136	0, 0, 0,		/* filled in by dev_attach */
137	"cpu", 0, { MDDT_CPU },
138	0, 0, 0, CPU_EXTERNALLEN,
139	0,			/* CPU has no parent */
140	0,			/* no parentdata */
141	DC_BUSY,		/* the CPU is always busy */
142	cpu_model,		/* no sense in duplication */
143	DC_CLS_CPU		/* class */
144};
145
146#ifndef PANIC_REBOOT_WAIT_TIME
147#define PANIC_REBOOT_WAIT_TIME 15 /* default to 15 seconds */
148#endif
149
150/*
151 * Declare these as initialized data so we can patch them.
152 */
153int	nswbuf = 0;
154#ifdef	NBUF
155int	nbuf = NBUF;
156#else
157int	nbuf = 0;
158#endif
159
160#ifdef BOUNCE_BUFFERS
161extern char *bouncememory;
162extern int maxbkva;
163#ifdef BOUNCEPAGES
164int	bouncepages = BOUNCEPAGES;
165#else
166int	bouncepages = 0;
167#endif
168#endif	/* BOUNCE_BUFFERS */
169
170extern int freebufspace;
171int	msgbufmapped = 0;		/* set when safe to use msgbuf */
172int _udatasel, _ucodesel;
173
174
175/*
176 * Machine-dependent startup code
177 */
178int boothowto = 0, bootverbose = 0, Maxmem = 0, badpages = 0, physmem = 0;
179long dumplo;
180extern int bootdev;
181int biosmem;
182
183vm_offset_t	phys_avail[10];
184
185/* must be 2 less so 0 0 can signal end of chunks */
186#define PHYS_AVAIL_ARRAY_END ((sizeof(phys_avail) / sizeof(vm_offset_t)) - 2)
187
188int cpu_class;
189
190void dumpsys __P((void));
191void setup_netisrs __P((struct linker_set *)); /* XXX declare elsewhere */
192
193vm_offset_t buffer_sva, buffer_eva;
194vm_offset_t clean_sva, clean_eva;
195vm_offset_t pager_sva, pager_eva;
196extern int pager_map_size;
197extern struct linker_set netisr_set;
198
199#define offsetof(type, member)	((size_t)(&((type *)0)->member))
200
201void
202cpu_startup()
203{
204	register unsigned i;
205	register caddr_t v;
206	vm_offset_t maxaddr;
207	vm_size_t size = 0;
208	int firstaddr, indx;
209	vm_offset_t minaddr;
210
211	if (boothowto & RB_VERBOSE)
212		bootverbose++;
213
214	/*
215	 * Initialize error message buffer (at end of core).
216	 */
217
218	/* avail_end was pre-decremented in init_386() to compensate */
219	for (i = 0; i < btoc(sizeof (struct msgbuf)); i++)
220		pmap_enter(pmap_kernel(), (vm_offset_t)msgbufp,
221			   avail_end + i * NBPG,
222			   VM_PROT_ALL, TRUE);
223	msgbufmapped = 1;
224
225	/*
226	 * Good {morning,afternoon,evening,night}.
227	 */
228	printf(version);
229	startrtclock();
230	identifycpu();
231	/*
232	 * Display any holes after the first chunk of extended memory.
233	 */
234	if (badpages != 0) {
235		int indx = 1;
236
237		/*
238		 * XXX skip reporting ISA hole & unmanaged kernel memory
239		 */
240		if (phys_avail[0] == PAGE_SIZE)
241			indx += 2;
242
243		printf("Physical memory hole(s):\n");
244		for (; phys_avail[indx + 1] != 0; indx += 2) {
245			int size = phys_avail[indx + 1] - phys_avail[indx];
246
247			printf("0x%08x - 0x%08x, %d bytes (%d pages)\n", phys_avail[indx],
248			    phys_avail[indx + 1] - 1, size, size / PAGE_SIZE);
249		}
250	}
251
252	/*
253	 * Quickly wire in netisrs.
254	 */
255	setup_netisrs(&netisr_set);
256
257/*
258#ifdef ISDN
259	DONET(isdnintr, NETISR_ISDN);
260#endif
261*/
262
263	/*
264	 * Allocate space for system data structures.
265	 * The first available kernel virtual address is in "v".
266	 * As pages of kernel virtual memory are allocated, "v" is incremented.
267	 * As pages of memory are allocated and cleared,
268	 * "firstaddr" is incremented.
269	 * An index into the kernel page table corresponding to the
270	 * virtual memory address maintained in "v" is kept in "mapaddr".
271	 */
272
273	/*
274	 * Make two passes.  The first pass calculates how much memory is
275	 * needed and allocates it.  The second pass assigns virtual
276	 * addresses to the various data structures.
277	 */
278	firstaddr = 0;
279again:
280	v = (caddr_t)firstaddr;
281
282#define	valloc(name, type, num) \
283	    (name) = (type *)v; v = (caddr_t)((name)+(num))
284#define	valloclim(name, type, num, lim) \
285	    (name) = (type *)v; v = (caddr_t)((lim) = ((name)+(num)))
286	valloc(callout, struct callout, ncallout);
287#ifdef SYSVSHM
288	valloc(shmsegs, struct shmid_ds, shminfo.shmmni);
289#endif
290#ifdef SYSVSEM
291	valloc(sema, struct semid_ds, seminfo.semmni);
292	valloc(sem, struct sem, seminfo.semmns);
293	/* This is pretty disgusting! */
294	valloc(semu, int, (seminfo.semmnu * seminfo.semusz) / sizeof(int));
295#endif
296#ifdef SYSVMSG
297	valloc(msgpool, char, msginfo.msgmax);
298	valloc(msgmaps, struct msgmap, msginfo.msgseg);
299	valloc(msghdrs, struct msg, msginfo.msgtql);
300	valloc(msqids, struct msqid_ds, msginfo.msgmni);
301#endif
302
303	if (nbuf == 0) {
304		nbuf = 30;
305		if( physmem > 1024)
306			nbuf += min((physmem - 1024) / 12, 1024);
307	}
308	nswbuf = min(nbuf, 128);
309
310	valloc(swbuf, struct buf, nswbuf);
311	valloc(buf, struct buf, nbuf);
312
313#ifdef BOUNCE_BUFFERS
314	/*
315	 * If there is more than 16MB of memory, allocate some bounce buffers
316	 */
317	if (Maxmem > 4096) {
318		if (bouncepages == 0) {
319			bouncepages = 64;
320			bouncepages += ((Maxmem - 4096) / 2048) * 32;
321		}
322		v = (caddr_t)((vm_offset_t)((vm_offset_t)v + PAGE_SIZE - 1) & ~(PAGE_SIZE - 1));
323		valloc(bouncememory, char, bouncepages * PAGE_SIZE);
324	}
325#endif
326
327	/*
328	 * End of first pass, size has been calculated so allocate memory
329	 */
330	if (firstaddr == 0) {
331		size = (vm_size_t)(v - firstaddr);
332		firstaddr = (int)kmem_alloc(kernel_map, round_page(size));
333		if (firstaddr == 0)
334			panic("startup: no room for tables");
335		goto again;
336	}
337
338	/*
339	 * End of second pass, addresses have been assigned
340	 */
341	if ((vm_size_t)(v - firstaddr) != size)
342		panic("startup: table size inconsistency");
343
344#ifdef BOUNCE_BUFFERS
345	clean_map = kmem_suballoc(kernel_map, &clean_sva, &clean_eva,
346			(nbuf*MAXBSIZE) + (nswbuf*MAXPHYS) +
347				maxbkva + pager_map_size, TRUE);
348	io_map = kmem_suballoc(clean_map, &minaddr, &maxaddr, maxbkva, FALSE);
349#else
350	clean_map = kmem_suballoc(kernel_map, &clean_sva, &clean_eva,
351			(nbuf*MAXBSIZE) + (nswbuf*MAXPHYS) + pager_map_size, TRUE);
352#endif
353	buffer_map = kmem_suballoc(clean_map, &buffer_sva, &buffer_eva,
354				(nbuf*MAXBSIZE), TRUE);
355	pager_map = kmem_suballoc(clean_map, &pager_sva, &pager_eva,
356				(nswbuf*MAXPHYS) + pager_map_size, TRUE);
357	exec_map = kmem_suballoc(kernel_map, &minaddr, &maxaddr,
358				(16*ARG_MAX), TRUE);
359	u_map = kmem_suballoc(kernel_map, &minaddr, &maxaddr,
360				(maxproc*UPAGES*PAGE_SIZE), FALSE);
361
362	/*
363	 * Finally, allocate mbuf pool.  Since mclrefcnt is an off-size
364	 * we use the more space efficient malloc in place of kmem_alloc.
365	 */
366	mclrefcnt = (char *)malloc(nmbclusters+CLBYTES/MCLBYTES,
367				   M_MBUF, M_NOWAIT);
368	bzero(mclrefcnt, nmbclusters+CLBYTES/MCLBYTES);
369	mb_map = kmem_suballoc(kmem_map, (vm_offset_t *)&mbutl, &maxaddr,
370			       nmbclusters * MCLBYTES, FALSE);
371	/*
372	 * Initialize callouts
373	 */
374	callfree = callout;
375	for (i = 1; i < ncallout; i++)
376		callout[i-1].c_next = &callout[i];
377
378        if (boothowto & RB_CONFIG)
379		userconfig();
380	printf("avail memory = %d (%d pages)\n", ptoa(cnt.v_free_count), cnt.v_free_count);
381
382#ifdef BOUNCE_BUFFERS
383	/*
384	 * init bounce buffers
385	 */
386	vm_bounce_init();
387#endif
388
389	/*
390	 * Set up CPU-specific registers, cache, etc.
391	 */
392	initcpu();
393
394	/*
395	 * Set up buffers, so they can be used to read disk labels.
396	 */
397	bufinit();
398	vm_pager_bufferinit();
399
400	/*
401	 * Configure the system.
402	 */
403	configure();
404	if (bootverbose) {
405		printf("BIOS Geometries:\n");
406		for (i=0; i < N_BIOS_GEOM; i++) {
407			int j = bootinfo.bi_bios_geom[i];
408			if (j == 0x4f010f)
409				continue;
410			printf(" %x:%08x", i, j);
411			printf(" %d cyl, %d heads, %d sects\n",
412				j >> 16, (j >> 8) & 0xff, j & 0xff);
413
414		}
415		printf(" %d accounted for\n", bootinfo.bi_n_bios_used);
416	}
417}
418
419void
420setup_netisrs(struct linker_set *ls)
421{
422	int i;
423	const struct netisrtab *nit;
424
425	for(i = 0; ls->ls_items[i]; i++) {
426		nit = (const struct netisrtab *)ls->ls_items[i];
427		netisrs[nit->nit_num] = nit->nit_isr;
428	}
429}
430
431struct cpu_nameclass i386_cpus[] = {
432	{ "Intel 80286",	CPUCLASS_286 },		/* CPU_286   */
433	{ "i386SX",		CPUCLASS_386 },		/* CPU_386SX */
434	{ "i386DX",		CPUCLASS_386 },		/* CPU_386   */
435	{ "i486SX",		CPUCLASS_486 },		/* CPU_486SX */
436	{ "i486DX",		CPUCLASS_486 },		/* CPU_486   */
437	{ "Pentium",		CPUCLASS_586 },		/* CPU_586   */
438	{ "Cy486DLC",		CPUCLASS_486 },		/* CPU_486DLC */
439};
440
441static void
442identifycpu()
443{
444	printf("CPU: ");
445	if (cpu >= 0
446	    && cpu < (sizeof i386_cpus/sizeof(struct cpu_nameclass))) {
447		cpu_class = i386_cpus[cpu].cpu_class;
448		strncpy(cpu_model, i386_cpus[cpu].cpu_name, sizeof cpu_model);
449	} else {
450		printf("unknown cpu type %d\n", cpu);
451		panic("startup: bad cpu id");
452	}
453
454#if defined(I586_CPU)
455	if(cpu_class == CPUCLASS_586) {
456		calibrate_cyclecounter();
457		printf("%d-MHz ", pentium_mhz);
458	}
459#endif
460#if defined(I486_CPU) || defined(I586_CPU)
461	if (!strcmp(cpu_vendor,"GenuineIntel")) {
462		if ((cpu_id & 0xf00) > 3) {
463			cpu_model[0] = '\0';
464
465			switch (cpu_id & 0x3000) {
466			case 0x1000:
467				strcpy(cpu_model, "Overdrive ");
468				break;
469			case 0x2000:
470				strcpy(cpu_model, "Dual ");
471				break;
472			}
473			if ((cpu_id & 0xf00) == 0x400) {
474				strcat(cpu_model, "i486 ");
475#if defined(I586_CPU)
476			} else if ((cpu_id & 0xf00) == 0x500) {
477				strcat(cpu_model, "Pentium ");
478#endif
479			} else {
480				strcat(cpu_model, "unknown ");
481			}
482
483			switch (cpu_id & 0xff0) {
484			case 0x400:
485				strcat(cpu_model, "DX"); break;
486			case 0x410:
487				strcat(cpu_model, "DX"); break;
488			case 0x420:
489				strcat(cpu_model, "SX"); break;
490			case 0x430:
491				strcat(cpu_model, "DX2"); break;
492			case 0x440:
493				strcat(cpu_model, "SL"); break;
494			case 0x450:
495				strcat(cpu_model, "SX2"); break;
496			case 0x470:
497				strcat(cpu_model, "DX2 Write-Back Enhanced");
498				break;
499			case 0x480:
500				strcat(cpu_model, "DX4"); break;
501#if defined(I586_CPU)
502			case 0x510:
503				if (pentium_mhz == 60) {
504					strcat(cpu_model, "510\\60");
505				} else if (pentium_mhz == 66) {
506					strcat(cpu_model, "567\\66");
507				} else {
508					strcat(cpu_model,"510\\60 or 567\\66");
509				}
510				break;
511			case 0x520:
512				if (pentium_mhz == 90) {
513					strcat(cpu_model, "735\\90");
514				} else if (pentium_mhz == 100) {
515					strcat(cpu_model, "815\\100");
516				} else {
517					strcat(cpu_model,"735\\90 or 815\\100");
518				}
519				break;
520#endif
521			}
522		}
523	}
524#endif
525	printf("%s (", cpu_model);
526	switch(cpu_class) {
527	case CPUCLASS_286:
528		printf("286");
529		break;
530#if defined(I386_CPU)
531	case CPUCLASS_386:
532		printf("386");
533		break;
534#endif
535#if defined(I486_CPU)
536	case CPUCLASS_486:
537		printf("486");
538		break;
539#endif
540#if defined(I586_CPU)
541	case CPUCLASS_586:
542		printf("Pentium");
543		break;
544#endif
545	default:
546		printf("unknown");	/* will panic below... */
547	}
548	printf("-class CPU)\n");
549#if defined(I486_CPU) || defined(I586_CPU)
550	if(*cpu_vendor)
551		printf("  Origin = \"%s\"",cpu_vendor);
552	if(cpu_id)
553		printf("  Id = 0x%lx",cpu_id);
554
555	if (!strcmp(cpu_vendor, "GenuineIntel")) {
556		printf("  Stepping=%ld", cpu_id & 0xf);
557		if (cpu_high > 0) {
558#define FEATUREFMT "\020\001FPU\002VME\003PSE\004MCE\005CX8\006APIC"
559			printf("\n  Features=0x%b", cpu_feature, FEATUREFMT);
560		}
561	}
562	/* Avoid ugly blank lines: only print newline when we have to. */
563	if (*cpu_vendor || cpu_id)
564		printf("\n");
565#endif
566	/*
567	 * Now that we have told the user what they have,
568	 * let them know if that machine type isn't configured.
569	 */
570	switch (cpu_class) {
571	case CPUCLASS_286:	/* a 286 should not make it this far, anyway */
572#if !defined(I386_CPU) && !defined(I486_CPU) && !defined(I586_CPU)
573#error This kernel is not configured for one of the supported CPUs
574#endif
575#if !defined(I386_CPU)
576	case CPUCLASS_386:
577#endif
578#if !defined(I486_CPU)
579	case CPUCLASS_486:
580#endif
581#if !defined(I586_CPU)
582	case CPUCLASS_586:
583#endif
584		panic("CPU class not configured");
585	default:
586		break;
587	}
588	dev_attach(&kdc_cpu0);
589}
590
591/*
592 * Send an interrupt to process.
593 *
594 * Stack is set up to allow sigcode stored
595 * in u. to call routine, followed by kcall
596 * to sigreturn routine below.  After sigreturn
597 * resets the signal mask, the stack, and the
598 * frame pointer, it returns to the user
599 * specified pc, psl.
600 */
601void
602sendsig(catcher, sig, mask, code)
603	sig_t catcher;
604	int sig, mask;
605	unsigned code;
606{
607	register struct proc *p = curproc;
608	register int *regs;
609	register struct sigframe *fp;
610	struct sigframe sf;
611	struct sigacts *psp = p->p_sigacts;
612	int oonstack;
613
614	regs = p->p_md.md_regs;
615        oonstack = psp->ps_sigstk.ss_flags & SA_ONSTACK;
616	/*
617	 * Allocate and validate space for the signal handler
618	 * context. Note that if the stack is in P0 space, the
619	 * call to grow() is a nop, and the useracc() check
620	 * will fail if the process has not already allocated
621	 * the space with a `brk'.
622	 */
623        if ((psp->ps_flags & SAS_ALTSTACK) &&
624	    (psp->ps_sigstk.ss_flags & SA_ONSTACK) == 0 &&
625	    (psp->ps_sigonstack & sigmask(sig))) {
626		fp = (struct sigframe *)(psp->ps_sigstk.ss_sp +
627		    psp->ps_sigstk.ss_size - sizeof(struct sigframe));
628		psp->ps_sigstk.ss_flags |= SA_ONSTACK;
629	} else {
630		fp = (struct sigframe *)(regs[tESP]
631			- sizeof(struct sigframe));
632	}
633
634	/*
635	 * grow() will return FALSE if the fp will not fit inside the stack
636	 *	and the stack can not be grown. useracc will return FALSE
637	 *	if access is denied.
638	 */
639	if ((grow(p, (int)fp) == FALSE) ||
640	    (useracc((caddr_t)fp, sizeof (struct sigframe), B_WRITE) == FALSE)) {
641		/*
642		 * Process has trashed its stack; give it an illegal
643		 * instruction to halt it in its tracks.
644		 */
645		SIGACTION(p, SIGILL) = SIG_DFL;
646		sig = sigmask(SIGILL);
647		p->p_sigignore &= ~sig;
648		p->p_sigcatch &= ~sig;
649		p->p_sigmask &= ~sig;
650		psignal(p, SIGILL);
651		return;
652	}
653
654	/*
655	 * Build the argument list for the signal handler.
656	 */
657	if (p->p_sysent->sv_sigtbl) {
658		if (sig < p->p_sysent->sv_sigsize)
659			sig = p->p_sysent->sv_sigtbl[sig];
660		else
661			sig = p->p_sysent->sv_sigsize + 1;
662	}
663	sf.sf_signum = sig;
664	sf.sf_code = code;
665	sf.sf_scp = &fp->sf_sc;
666	sf.sf_addr = (char *) regs[tERR];
667	sf.sf_handler = catcher;
668
669	/* save scratch registers */
670	sf.sf_sc.sc_eax = regs[tEAX];
671	sf.sf_sc.sc_ebx = regs[tEBX];
672	sf.sf_sc.sc_ecx = regs[tECX];
673	sf.sf_sc.sc_edx = regs[tEDX];
674	sf.sf_sc.sc_esi = regs[tESI];
675	sf.sf_sc.sc_edi = regs[tEDI];
676	sf.sf_sc.sc_cs = regs[tCS];
677	sf.sf_sc.sc_ds = regs[tDS];
678	sf.sf_sc.sc_ss = regs[tSS];
679	sf.sf_sc.sc_es = regs[tES];
680	sf.sf_sc.sc_isp = regs[tISP];
681
682	/*
683	 * Build the signal context to be used by sigreturn.
684	 */
685	sf.sf_sc.sc_onstack = oonstack;
686	sf.sf_sc.sc_mask = mask;
687	sf.sf_sc.sc_sp = regs[tESP];
688	sf.sf_sc.sc_fp = regs[tEBP];
689	sf.sf_sc.sc_pc = regs[tEIP];
690	sf.sf_sc.sc_ps = regs[tEFLAGS];
691
692	/*
693	 * Copy the sigframe out to the user's stack.
694	 */
695	if (copyout(&sf, fp, sizeof(struct sigframe)) != 0) {
696		/*
697		 * Something is wrong with the stack pointer.
698		 * ...Kill the process.
699		 */
700		sigexit(p, SIGILL);
701	};
702
703	regs[tESP] = (int)fp;
704	regs[tEIP] = (int)((struct pcb *)kstack)->pcb_sigc;
705	regs[tEFLAGS] &= ~PSL_VM;
706	regs[tCS] = _ucodesel;
707	regs[tDS] = _udatasel;
708	regs[tES] = _udatasel;
709	regs[tSS] = _udatasel;
710}
711
712/*
713 * System call to cleanup state after a signal
714 * has been taken.  Reset signal mask and
715 * stack state from context left by sendsig (above).
716 * Return to previous pc and psl as specified by
717 * context left by sendsig. Check carefully to
718 * make sure that the user has not modified the
719 * state to gain improper privileges.
720 */
721struct sigreturn_args {
722	struct sigcontext *sigcntxp;
723};
724
725int
726sigreturn(p, uap, retval)
727	struct proc *p;
728	struct sigreturn_args *uap;
729	int *retval;
730{
731	register struct sigcontext *scp;
732	register struct sigframe *fp;
733	register int *regs = p->p_md.md_regs;
734	int eflags;
735
736	/*
737	 * (XXX old comment) regs[tESP] points to the return address.
738	 * The user scp pointer is above that.
739	 * The return address is faked in the signal trampoline code
740	 * for consistency.
741	 */
742	scp = uap->sigcntxp;
743	fp = (struct sigframe *)
744	     ((caddr_t)scp - offsetof(struct sigframe, sf_sc));
745
746	if (useracc((caddr_t)fp, sizeof (*fp), 0) == 0)
747		return(EINVAL);
748
749	/*
750	 * Don't allow users to change privileged or reserved flags.
751	 */
752#define	EFLAGS_SECURE(ef, oef)	((((ef) ^ (oef)) & ~PSL_USERCHANGE) == 0)
753	eflags = scp->sc_ps;
754	/*
755	 * XXX do allow users to change the privileged flag PSL_RF.  The
756	 * cpu sets PSL_RF in tf_eflags for faults.  Debuggers should
757	 * sometimes set it there too.  tf_eflags is kept in the signal
758	 * context during signal handling and there is no other place
759	 * to remember it, so the PSL_RF bit may be corrupted by the
760	 * signal handler without us knowing.  Corruption of the PSL_RF
761	 * bit at worst causes one more or one less debugger trap, so
762	 * allowing it is fairly harmless.
763	 */
764	if (!EFLAGS_SECURE(eflags & ~PSL_RF, regs[tEFLAGS] & ~PSL_RF)) {
765#ifdef DEBUG
766    		printf("sigreturn: eflags = 0x%x\n", eflags);
767#endif
768    		return(EINVAL);
769	}
770
771	/*
772	 * Don't allow users to load a valid privileged %cs.  Let the
773	 * hardware check for invalid selectors, excess privilege in
774	 * other selectors, invalid %eip's and invalid %esp's.
775	 */
776#define	CS_SECURE(cs)	(ISPL(cs) == SEL_UPL)
777	if (!CS_SECURE(scp->sc_cs)) {
778#ifdef DEBUG
779    		printf("sigreturn: cs = 0x%x\n", scp->sc_cs);
780#endif
781		trapsignal(p, SIGBUS, T_PROTFLT);
782		return(EINVAL);
783	}
784
785	/* restore scratch registers */
786	regs[tEAX] = scp->sc_eax;
787	regs[tEBX] = scp->sc_ebx;
788	regs[tECX] = scp->sc_ecx;
789	regs[tEDX] = scp->sc_edx;
790	regs[tESI] = scp->sc_esi;
791	regs[tEDI] = scp->sc_edi;
792	regs[tCS] = scp->sc_cs;
793	regs[tDS] = scp->sc_ds;
794	regs[tES] = scp->sc_es;
795	regs[tSS] = scp->sc_ss;
796	regs[tISP] = scp->sc_isp;
797
798	if (useracc((caddr_t)scp, sizeof (*scp), 0) == 0)
799		return(EINVAL);
800
801	if (scp->sc_onstack & 01)
802		p->p_sigacts->ps_sigstk.ss_flags |= SA_ONSTACK;
803	else
804		p->p_sigacts->ps_sigstk.ss_flags &= ~SA_ONSTACK;
805	p->p_sigmask = scp->sc_mask &~
806	    (sigmask(SIGKILL)|sigmask(SIGCONT)|sigmask(SIGSTOP));
807	regs[tEBP] = scp->sc_fp;
808	regs[tESP] = scp->sc_sp;
809	regs[tEIP] = scp->sc_pc;
810	regs[tEFLAGS] = eflags;
811	return(EJUSTRETURN);
812}
813
814/*
815 * a simple function to make the system panic (and dump a vmcore)
816 * in a predictable fashion
817 */
818void diediedie()
819{
820	panic("because you said to!");
821}
822
823int	waittime = -1;
824struct pcb dumppcb;
825
826__dead void
827boot(arghowto)
828	int arghowto;
829{
830	register long dummy;		/* r12 is reserved */
831	register int howto;		/* r11 == how to boot */
832	register int devtype;		/* r10 == major of root dev */
833
834	if (cold) {
835		printf("hit reset please");
836		for(;;);
837	}
838	howto = arghowto;
839	if ((howto&RB_NOSYNC) == 0 && waittime < 0) {
840		register struct buf *bp;
841		int iter, nbusy;
842
843		waittime = 0;
844		printf("\nsyncing disks... ");
845
846		sync(&proc0, NULL, NULL);
847
848		for (iter = 0; iter < 20; iter++) {
849			nbusy = 0;
850			for (bp = &buf[nbuf]; --bp >= buf; ) {
851				if ((bp->b_flags & (B_BUSY|B_INVAL)) == B_BUSY) {
852					nbusy++;
853				}
854			}
855			if (nbusy == 0)
856				break;
857			printf("%d ", nbusy);
858			DELAY(40000 * iter);
859		}
860		if (nbusy) {
861			/*
862			 * Failed to sync all blocks. Indicate this and don't
863			 * unmount filesystems (thus forcing an fsck on reboot).
864			 */
865			printf("giving up\n");
866		} else {
867			printf("done\n");
868			/*
869			 * Unmount filesystems
870			 */
871			if (panicstr == 0)
872				vfs_unmountall();
873		}
874		DELAY(100000);			/* wait for console output to finish */
875		dev_shutdownall(FALSE);
876	}
877	splhigh();
878	devtype = major(rootdev);
879	if (howto&RB_HALT) {
880		printf("\n");
881		printf("The operating system has halted.\n");
882		printf("Please press any key to reboot.\n\n");
883		cngetc();
884	} else {
885		if (howto & RB_DUMP) {
886			savectx(&dumppcb, 0);
887			dumppcb.pcb_ptd = rcr3();
888			dumpsys();
889
890			if (PANIC_REBOOT_WAIT_TIME != 0) {
891				if (PANIC_REBOOT_WAIT_TIME != -1) {
892					int loop;
893					printf("Automatic reboot in %d seconds - press a key on the console to abort\n",
894						PANIC_REBOOT_WAIT_TIME);
895					for (loop = PANIC_REBOOT_WAIT_TIME; loop > 0; --loop) {
896						DELAY(1000 * 1000); /* one second */
897						if (cncheckc()) /* Did user type a key? */
898							break;
899					}
900					if (!loop)
901						goto die;
902				}
903			} else { /* zero time specified - reboot NOW */
904				goto die;
905			}
906			printf("--> Press a key on the console to reboot <--\n");
907			cngetc();
908		}
909	}
910#ifdef lint
911	dummy = 0; dummy = dummy;
912	printf("howto %d, devtype %d\n", arghowto, devtype);
913#endif
914die:
915	printf("Rebooting...\n");
916	DELAY(1000000);	/* wait 1 sec for printf's to complete and be read */
917	cpu_reset();
918	for(;;) ;
919	/* NOTREACHED */
920}
921
922unsigned long	dumpmag = 0x8fca0101UL;	/* magic number for savecore */
923int		dumpsize = 0;		/* also for savecore */
924
925int		dodump = 1;
926
927/*
928 * Doadump comes here after turning off memory management and
929 * getting on the dump stack, either when called above, or by
930 * the auto-restart code.
931 */
932void
933dumpsys()
934{
935
936	if (!dodump)
937		return;
938	if (dumpdev == NODEV)
939		return;
940	if ((minor(dumpdev)&07) != 1)
941		return;
942	dumpsize = Maxmem;
943	printf("\ndumping to dev %lx, offset %ld\n", dumpdev, dumplo);
944	printf("dump ");
945	switch ((*bdevsw[major(dumpdev)].d_dump)(dumpdev)) {
946
947	case ENXIO:
948		printf("device bad\n");
949		break;
950
951	case EFAULT:
952		printf("device not ready\n");
953		break;
954
955	case EINVAL:
956		printf("area improper\n");
957		break;
958
959	case EIO:
960		printf("i/o error\n");
961		break;
962
963	case EINTR:
964		printf("aborted from console\n");
965		break;
966
967	default:
968		printf("succeeded\n");
969		break;
970	}
971}
972
973static void
974initcpu()
975{
976}
977
978/*
979 * Clear registers on exec
980 */
981void
982setregs(p, entry, stack)
983	struct proc *p;
984	u_long entry;
985	u_long stack;
986{
987	int *regs = p->p_md.md_regs;
988
989	bzero(regs, sizeof(struct trapframe));
990	regs[tEIP] = entry;
991	regs[tESP] = stack;
992	regs[tEFLAGS] = PSL_USER | (regs[tEFLAGS] & PSL_T);
993	regs[tSS] = _udatasel;
994	regs[tDS] = _udatasel;
995	regs[tES] = _udatasel;
996	regs[tCS] = _ucodesel;
997
998	p->p_addr->u_pcb.pcb_flags = 0;	/* no fp at all */
999	load_cr0(rcr0() | CR0_TS);	/* start emulating */
1000#if	NNPX > 0
1001	npxinit(__INITIAL_NPXCW__);
1002#endif	/* NNPX > 0 */
1003}
1004
1005/*
1006 * machine dependent system variables.
1007 */
1008int
1009cpu_sysctl(name, namelen, oldp, oldlenp, newp, newlen, p)
1010	int *name;
1011	u_int namelen;
1012	void *oldp;
1013	size_t *oldlenp;
1014	void *newp;
1015	size_t newlen;
1016	struct proc *p;
1017{
1018	dev_t consdev;
1019	int error;
1020
1021	/* all sysctl names at this level are terminal */
1022	if (namelen != 1)
1023		return (ENOTDIR);               /* overloaded */
1024
1025	switch (name[0]) {
1026	case CPU_CONSDEV:
1027		consdev = (cn_tty == NULL ? NODEV : cn_tty->t_dev);
1028		return (sysctl_rdstruct(oldp, oldlenp, newp, &consdev,
1029					sizeof consdev));
1030	case CPU_ADJKERNTZ:
1031		error = sysctl_int(oldp, oldlenp, newp, newlen, &adjkerntz);
1032		if (!error && newp)
1033			resettodr();
1034		return error;
1035	case CPU_DISRTCSET:
1036		return (sysctl_int(oldp, oldlenp, newp,	newlen,	&disable_rtc_set));
1037	case CPU_BOOTINFO:
1038		return (sysctl_rdstruct(oldp, oldlenp, newp, &bootinfo,
1039					sizeof bootinfo));
1040	default:
1041		return (EOPNOTSUPP);
1042	}
1043	/* NOTREACHED */
1044}
1045
1046/*
1047 * Initialize 386 and configure to run kernel
1048 */
1049
1050/*
1051 * Initialize segments & interrupt table
1052 */
1053
1054int currentldt;
1055int _default_ldt;
1056union descriptor gdt[NGDT];		/* global descriptor table */
1057struct gate_descriptor idt[NIDT];	/* interrupt descriptor table */
1058union descriptor ldt[NLDT];		/* local descriptor table */
1059
1060struct	i386tss	tss, panic_tss;
1061
1062extern  struct user *proc0paddr;
1063
1064/* software prototypes -- in more palatable form */
1065struct soft_segment_descriptor gdt_segs[] = {
1066/* GNULL_SEL	0 Null Descriptor */
1067{	0x0,			/* segment base address  */
1068	0x0,			/* length */
1069	0,			/* segment type */
1070	0,			/* segment descriptor priority level */
1071	0,			/* segment descriptor present */
1072	0, 0,
1073	0,			/* default 32 vs 16 bit size */
1074	0  			/* limit granularity (byte/page units)*/ },
1075/* GCODE_SEL	1 Code Descriptor for kernel */
1076{	0x0,			/* segment base address  */
1077	0xfffff,		/* length - all address space */
1078	SDT_MEMERA,		/* segment type */
1079	0,			/* segment descriptor priority level */
1080	1,			/* segment descriptor present */
1081	0, 0,
1082	1,			/* default 32 vs 16 bit size */
1083	1  			/* limit granularity (byte/page units)*/ },
1084/* GDATA_SEL	2 Data Descriptor for kernel */
1085{	0x0,			/* segment base address  */
1086	0xfffff,		/* length - all address space */
1087	SDT_MEMRWA,		/* segment type */
1088	0,			/* segment descriptor priority level */
1089	1,			/* segment descriptor present */
1090	0, 0,
1091	1,			/* default 32 vs 16 bit size */
1092	1  			/* limit granularity (byte/page units)*/ },
1093/* GLDT_SEL	3 LDT Descriptor */
1094{	(int) ldt,		/* segment base address  */
1095	sizeof(ldt)-1,		/* length - all address space */
1096	SDT_SYSLDT,		/* segment type */
1097	0,			/* segment descriptor priority level */
1098	1,			/* segment descriptor present */
1099	0, 0,
1100	0,			/* unused - default 32 vs 16 bit size */
1101	0  			/* limit granularity (byte/page units)*/ },
1102/* GTGATE_SEL	4 Null Descriptor - Placeholder */
1103{	0x0,			/* segment base address  */
1104	0x0,			/* length - all address space */
1105	0,			/* segment type */
1106	0,			/* segment descriptor priority level */
1107	0,			/* segment descriptor present */
1108	0, 0,
1109	0,			/* default 32 vs 16 bit size */
1110	0  			/* limit granularity (byte/page units)*/ },
1111/* GPANIC_SEL	5 Panic Tss Descriptor */
1112{	(int) &panic_tss,	/* segment base address  */
1113	sizeof(tss)-1,		/* length - all address space */
1114	SDT_SYS386TSS,		/* segment type */
1115	0,			/* segment descriptor priority level */
1116	1,			/* segment descriptor present */
1117	0, 0,
1118	0,			/* unused - default 32 vs 16 bit size */
1119	0  			/* limit granularity (byte/page units)*/ },
1120/* GPROC0_SEL	6 Proc 0 Tss Descriptor */
1121{	(int) kstack,		/* segment base address  */
1122	sizeof(tss)-1,		/* length - all address space */
1123	SDT_SYS386TSS,		/* segment type */
1124	0,			/* segment descriptor priority level */
1125	1,			/* segment descriptor present */
1126	0, 0,
1127	0,			/* unused - default 32 vs 16 bit size */
1128	0  			/* limit granularity (byte/page units)*/ },
1129/* GUSERLDT_SEL	7 User LDT Descriptor per process */
1130{	(int) ldt,		/* segment base address  */
1131	(512 * sizeof(union descriptor)-1),		/* length */
1132	SDT_SYSLDT,		/* segment type */
1133	0,			/* segment descriptor priority level */
1134	1,			/* segment descriptor present */
1135	0, 0,
1136	0,			/* unused - default 32 vs 16 bit size */
1137	0  			/* limit granularity (byte/page units)*/ },
1138/* GAPMCODE32_SEL 8 APM BIOS 32-bit interface (32bit Code) */
1139{	0,			/* segment base address (overwritten by APM)  */
1140	0xfffff,		/* length */
1141	SDT_MEMERA,		/* segment type */
1142	0,			/* segment descriptor priority level */
1143	1,			/* segment descriptor present */
1144	0, 0,
1145	1,			/* default 32 vs 16 bit size */
1146	1  			/* limit granularity (byte/page units)*/ },
1147/* GAPMCODE16_SEL 9 APM BIOS 32-bit interface (16bit Code) */
1148{	0,			/* segment base address (overwritten by APM)  */
1149	0xfffff,		/* length */
1150	SDT_MEMERA,		/* segment type */
1151	0,			/* segment descriptor priority level */
1152	1,			/* segment descriptor present */
1153	0, 0,
1154	0,			/* default 32 vs 16 bit size */
1155	1  			/* limit granularity (byte/page units)*/ },
1156/* GAPMDATA_SEL	10 APM BIOS 32-bit interface (Data) */
1157{	0,			/* segment base address (overwritten by APM) */
1158	0xfffff,		/* length */
1159	SDT_MEMRWA,		/* segment type */
1160	0,			/* segment descriptor priority level */
1161	1,			/* segment descriptor present */
1162	0, 0,
1163	1,			/* default 32 vs 16 bit size */
1164	1  			/* limit granularity (byte/page units)*/ },
1165};
1166
1167struct soft_segment_descriptor ldt_segs[] = {
1168	/* Null Descriptor - overwritten by call gate */
1169{	0x0,			/* segment base address  */
1170	0x0,			/* length - all address space */
1171	0,			/* segment type */
1172	0,			/* segment descriptor priority level */
1173	0,			/* segment descriptor present */
1174	0, 0,
1175	0,			/* default 32 vs 16 bit size */
1176	0  			/* limit granularity (byte/page units)*/ },
1177	/* Null Descriptor - overwritten by call gate */
1178{	0x0,			/* segment base address  */
1179	0x0,			/* length - all address space */
1180	0,			/* segment type */
1181	0,			/* segment descriptor priority level */
1182	0,			/* segment descriptor present */
1183	0, 0,
1184	0,			/* default 32 vs 16 bit size */
1185	0  			/* limit granularity (byte/page units)*/ },
1186	/* Null Descriptor - overwritten by call gate */
1187{	0x0,			/* segment base address  */
1188	0x0,			/* length - all address space */
1189	0,			/* segment type */
1190	0,			/* segment descriptor priority level */
1191	0,			/* segment descriptor present */
1192	0, 0,
1193	0,			/* default 32 vs 16 bit size */
1194	0  			/* limit granularity (byte/page units)*/ },
1195	/* Code Descriptor for user */
1196{	0x0,			/* segment base address  */
1197	0xfffff,		/* length - all address space */
1198	SDT_MEMERA,		/* segment type */
1199	SEL_UPL,		/* segment descriptor priority level */
1200	1,			/* segment descriptor present */
1201	0, 0,
1202	1,			/* default 32 vs 16 bit size */
1203	1  			/* limit granularity (byte/page units)*/ },
1204	/* Data Descriptor for user */
1205{	0x0,			/* segment base address  */
1206	0xfffff,		/* length - all address space */
1207	SDT_MEMRWA,		/* segment type */
1208	SEL_UPL,		/* segment descriptor priority level */
1209	1,			/* segment descriptor present */
1210	0, 0,
1211	1,			/* default 32 vs 16 bit size */
1212	1  			/* limit granularity (byte/page units)*/ },
1213};
1214
1215void
1216setidt(idx, func, typ, dpl)
1217	int idx;
1218	inthand_t *func;
1219	int typ;
1220	int dpl;
1221{
1222	struct gate_descriptor *ip = idt + idx;
1223
1224	ip->gd_looffset = (int)func;
1225	ip->gd_selector = 8;
1226	ip->gd_stkcpy = 0;
1227	ip->gd_xx = 0;
1228	ip->gd_type = typ;
1229	ip->gd_dpl = dpl;
1230	ip->gd_p = 1;
1231	ip->gd_hioffset = ((int)func)>>16 ;
1232}
1233
1234#define	IDTVEC(name)	__CONCAT(X,name)
1235
1236extern inthand_t
1237	IDTVEC(div), IDTVEC(dbg), IDTVEC(nmi), IDTVEC(bpt), IDTVEC(ofl),
1238	IDTVEC(bnd), IDTVEC(ill), IDTVEC(dna), IDTVEC(dble), IDTVEC(fpusegm),
1239	IDTVEC(tss), IDTVEC(missing), IDTVEC(stk), IDTVEC(prot),
1240	IDTVEC(page), IDTVEC(rsvd), IDTVEC(fpu), IDTVEC(align),
1241	IDTVEC(syscall);
1242
1243#ifdef COMPAT_LINUX
1244extern inthand_t
1245	IDTVEC(linux_syscall);
1246#endif
1247
1248void
1249sdtossd(sd, ssd)
1250	struct segment_descriptor *sd;
1251	struct soft_segment_descriptor *ssd;
1252{
1253	ssd->ssd_base  = (sd->sd_hibase << 24) | sd->sd_lobase;
1254	ssd->ssd_limit = (sd->sd_hilimit << 16) | sd->sd_lolimit;
1255	ssd->ssd_type  = sd->sd_type;
1256	ssd->ssd_dpl   = sd->sd_dpl;
1257	ssd->ssd_p     = sd->sd_p;
1258	ssd->ssd_def32 = sd->sd_def32;
1259	ssd->ssd_gran  = sd->sd_gran;
1260}
1261
1262void
1263init386(first)
1264	int first;
1265{
1266	int x;
1267	unsigned biosbasemem, biosextmem;
1268	struct gate_descriptor *gdp;
1269	int gsel_tss;
1270	/* table descriptors - used to load tables by microp */
1271	struct region_descriptor r_gdt, r_idt;
1272	int	pagesinbase, pagesinext;
1273	int	target_page, pa_indx;
1274
1275	proc0.p_addr = proc0paddr;
1276
1277	/*
1278	 * Initialize the console before we print anything out.
1279	 */
1280
1281	cninit ();
1282
1283	/*
1284	 * make gdt memory segments, the code segment goes up to end of the
1285	 * page with etext in it, the data segment goes to the end of
1286	 * the address space
1287	 */
1288	/*
1289	 * XXX text protection is temporarily (?) disabled.  The limit was
1290	 * i386_btop(i386_round_page(etext)) - 1.
1291	 */
1292	gdt_segs[GCODE_SEL].ssd_limit = i386_btop(0) - 1;
1293	gdt_segs[GDATA_SEL].ssd_limit = i386_btop(0) - 1;
1294	for (x = 0; x < NGDT; x++)
1295		ssdtosd(&gdt_segs[x], &gdt[x].sd);
1296
1297	/* make ldt memory segments */
1298	/*
1299	 * The data segment limit must not cover the user area because we
1300	 * don't want the user area to be writable in copyout() etc. (page
1301	 * level protection is lost in kernel mode on 386's).  Also, we
1302	 * don't want the user area to be writable directly (page level
1303	 * protection of the user area is not available on 486's with
1304	 * CR0_WP set, because there is no user-read/kernel-write mode).
1305	 *
1306	 * XXX - VM_MAXUSER_ADDRESS is an end address, not a max.  And it
1307	 * should be spelled ...MAX_USER...
1308	 */
1309#define VM_END_USER_RW_ADDRESS	VM_MAXUSER_ADDRESS
1310	/*
1311	 * The code segment limit has to cover the user area until we move
1312	 * the signal trampoline out of the user area.  This is safe because
1313	 * the code segment cannot be written to directly.
1314	 */
1315#define VM_END_USER_R_ADDRESS	(VM_END_USER_RW_ADDRESS + UPAGES * NBPG)
1316	ldt_segs[LUCODE_SEL].ssd_limit = i386_btop(VM_END_USER_R_ADDRESS) - 1;
1317	ldt_segs[LUDATA_SEL].ssd_limit = i386_btop(VM_END_USER_RW_ADDRESS) - 1;
1318	/* Note. eventually want private ldts per process */
1319	for (x = 0; x < NLDT; x++)
1320		ssdtosd(&ldt_segs[x], &ldt[x].sd);
1321
1322	/* exceptions */
1323	for (x = 0; x < NIDT; x++)
1324		setidt(x, &IDTVEC(rsvd), SDT_SYS386TGT, SEL_KPL);
1325	setidt(0, &IDTVEC(div),  SDT_SYS386TGT, SEL_KPL);
1326	setidt(1, &IDTVEC(dbg),  SDT_SYS386TGT, SEL_KPL);
1327	setidt(2, &IDTVEC(nmi),  SDT_SYS386TGT, SEL_KPL);
1328 	setidt(3, &IDTVEC(bpt),  SDT_SYS386TGT, SEL_UPL);
1329	setidt(4, &IDTVEC(ofl),  SDT_SYS386TGT, SEL_UPL);
1330	setidt(5, &IDTVEC(bnd),  SDT_SYS386TGT, SEL_KPL);
1331	setidt(6, &IDTVEC(ill),  SDT_SYS386TGT, SEL_KPL);
1332	setidt(7, &IDTVEC(dna),  SDT_SYS386TGT, SEL_KPL);
1333	setidt(8, &IDTVEC(dble),  SDT_SYS386TGT, SEL_KPL);
1334	setidt(9, &IDTVEC(fpusegm),  SDT_SYS386TGT, SEL_KPL);
1335	setidt(10, &IDTVEC(tss),  SDT_SYS386TGT, SEL_KPL);
1336	setidt(11, &IDTVEC(missing),  SDT_SYS386TGT, SEL_KPL);
1337	setidt(12, &IDTVEC(stk),  SDT_SYS386TGT, SEL_KPL);
1338	setidt(13, &IDTVEC(prot),  SDT_SYS386TGT, SEL_KPL);
1339	setidt(14, &IDTVEC(page),  SDT_SYS386TGT, SEL_KPL);
1340	setidt(15, &IDTVEC(rsvd),  SDT_SYS386TGT, SEL_KPL);
1341	setidt(16, &IDTVEC(fpu),  SDT_SYS386TGT, SEL_KPL);
1342	setidt(17, &IDTVEC(align), SDT_SYS386TGT, SEL_KPL);
1343#ifdef COMPAT_LINUX
1344 	setidt(0x80, &IDTVEC(linux_syscall),  SDT_SYS386TGT, SEL_UPL);
1345#endif
1346
1347#include	"isa.h"
1348#if	NISA >0
1349	isa_defaultirq();
1350#endif
1351
1352	r_gdt.rd_limit = sizeof(gdt) - 1;
1353	r_gdt.rd_base =  (int) gdt;
1354	lgdt(&r_gdt);
1355
1356	r_idt.rd_limit = sizeof(idt) - 1;
1357	r_idt.rd_base = (int) idt;
1358	lidt(&r_idt);
1359
1360	_default_ldt = GSEL(GLDT_SEL, SEL_KPL);
1361	lldt(_default_ldt);
1362	currentldt = _default_ldt;
1363
1364#ifdef DDB
1365	kdb_init();
1366	if (boothowto & RB_KDB)
1367		Debugger("Boot flags requested debugger");
1368#endif
1369
1370	/* Use BIOS values stored in RTC CMOS RAM, since probing
1371	 * breaks certain 386 AT relics.
1372	 */
1373	biosbasemem = rtcin(RTC_BASELO)+ (rtcin(RTC_BASEHI)<<8);
1374	biosextmem = rtcin(RTC_EXTLO)+ (rtcin(RTC_EXTHI)<<8);
1375
1376	/*
1377	 * Print a warning if the official BIOS interface disagrees
1378	 * with the hackish interface used above.  Eventually only
1379	 * the official interface should be used.
1380	 */
1381	if (bootinfo.bi_memsizes_valid) {
1382		if (bootinfo.bi_basemem != biosbasemem)
1383			printf("BIOS basemem (%ldK) != RTC basemem (%dK)\n",
1384			       bootinfo.bi_basemem, biosbasemem);
1385		if (bootinfo.bi_extmem != biosextmem)
1386			printf("BIOS extmem (%ldK) != RTC extmem (%dK)\n",
1387			       bootinfo.bi_extmem, biosextmem);
1388	}
1389
1390	/*
1391	 * If BIOS tells us that it has more than 640k in the basemem,
1392	 *	don't believe it - set it to 640k.
1393	 */
1394	if (biosbasemem > 640)
1395		biosbasemem = 640;
1396
1397	/*
1398	 * Some 386 machines might give us a bogus number for extended
1399	 *	mem. If this happens, stop now.
1400	 */
1401#ifndef LARGEMEM
1402	if (biosextmem > 65536) {
1403		panic("extended memory beyond limit of 64MB");
1404		/* NOTREACHED */
1405	}
1406#endif
1407
1408	pagesinbase = biosbasemem * 1024 / NBPG;
1409	pagesinext = biosextmem * 1024 / NBPG;
1410
1411	/*
1412	 * Special hack for chipsets that still remap the 384k hole when
1413	 *	there's 16MB of memory - this really confuses people that
1414	 *	are trying to use bus mastering ISA controllers with the
1415	 *	"16MB limit"; they only have 16MB, but the remapping puts
1416	 *	them beyond the limit.
1417	 */
1418	/*
1419	 * If extended memory is between 15-16MB (16-17MB phys address range),
1420	 *	chop it to 15MB.
1421	 */
1422	if ((pagesinext > 3840) && (pagesinext < 4096))
1423		pagesinext = 3840;
1424
1425	/*
1426	 * Maxmem isn't the "maximum memory", it's one larger than the
1427	 * highest page of of the physical address space. It
1428	 */
1429	Maxmem = pagesinext + 0x100000/PAGE_SIZE;
1430
1431#ifdef MAXMEM
1432	Maxmem = MAXMEM/4;
1433#endif
1434
1435	/* call pmap initialization to make new kernel address space */
1436	pmap_bootstrap (first, 0);
1437
1438	/*
1439	 * Size up each available chunk of physical memory.
1440	 */
1441
1442	/*
1443	 * We currently don't bother testing base memory.
1444	 * XXX  ...but we probably should.
1445	 */
1446	pa_indx = 0;
1447	badpages = 0;
1448	if (pagesinbase > 1) {
1449		phys_avail[pa_indx++] = PAGE_SIZE;	/* skip first page of memory */
1450		phys_avail[pa_indx] = ptoa(pagesinbase);/* memory up to the ISA hole */
1451		physmem = pagesinbase - 1;
1452	} else {
1453		/* point at first chunk end */
1454		pa_indx++;
1455	}
1456
1457	for (target_page = first; target_page < ptoa(Maxmem); target_page += PAGE_SIZE) {
1458		int tmp, page_bad = FALSE;
1459
1460		/*
1461		 * map page into kernel: valid, read/write, non-cacheable
1462		 */
1463		*(int *)CMAP1 = PG_V | PG_KW | PG_N | target_page;
1464		pmap_update();
1465
1466		tmp = *(int *)CADDR1;
1467		/*
1468		 * Test for alternating 1's and 0's
1469		 */
1470		*(int *)CADDR1 = 0xaaaaaaaa;
1471		if (*(int *)CADDR1 != 0xaaaaaaaa) {
1472			page_bad = TRUE;
1473		}
1474		/*
1475		 * Test for alternating 0's and 1's
1476		 */
1477		*(int *)CADDR1 = 0x55555555;
1478		if (*(int *)CADDR1 != 0x55555555) {
1479			page_bad = TRUE;
1480		}
1481		/*
1482		 * Test for all 1's
1483		 */
1484		*(int *)CADDR1 = 0xffffffff;
1485		if (*(int *)CADDR1 != 0xffffffff) {
1486			page_bad = TRUE;
1487		}
1488		/*
1489		 * Test for all 0's
1490		 */
1491		*(int *)CADDR1 = 0x0;
1492		if (*(int *)CADDR1 != 0x0) {
1493			/*
1494			 * test of page failed
1495			 */
1496			page_bad = TRUE;
1497		}
1498		/*
1499		 * Restore original value.
1500		 */
1501		*(int *)CADDR1 = tmp;
1502
1503		/*
1504		 * Adjust array of valid/good pages.
1505		 */
1506		if (page_bad == FALSE) {
1507			/*
1508			 * If this good page is a continuation of the
1509			 * previous set of good pages, then just increase
1510			 * the end pointer. Otherwise start a new chunk.
1511			 * Note that "end" points one higher than end,
1512			 * making the range >= start and < end.
1513			 */
1514			if (phys_avail[pa_indx] == target_page) {
1515				phys_avail[pa_indx] += PAGE_SIZE;
1516			} else {
1517				pa_indx++;
1518				if (pa_indx == PHYS_AVAIL_ARRAY_END) {
1519					printf("Too many holes in the physical address space, giving up\n");
1520					pa_indx--;
1521					break;
1522				}
1523				phys_avail[pa_indx++] = target_page;	/* start */
1524				phys_avail[pa_indx] = target_page + PAGE_SIZE;	/* end */
1525			}
1526			physmem++;
1527		} else {
1528			badpages++;
1529			page_bad = FALSE;
1530		}
1531	}
1532
1533	*(int *)CMAP1 = 0;
1534	pmap_update();
1535
1536	/*
1537	 * XXX
1538	 * The last chunk must contain at leat one page plus the message
1539	 * buffer to avoid complicating other code (message buffer address
1540	 * calculation, etc.).
1541	 */
1542	while (phys_avail[pa_indx - 1] + PAGE_SIZE +
1543	    round_page(sizeof(struct msgbuf)) >= phys_avail[pa_indx]) {
1544		physmem -= atop(phys_avail[pa_indx] - phys_avail[pa_indx - 1]);
1545		phys_avail[pa_indx--] = 0;
1546		phys_avail[pa_indx--] = 0;
1547	}
1548
1549	Maxmem = atop(phys_avail[pa_indx]);
1550
1551	/* Trim off space for the message buffer. */
1552	phys_avail[pa_indx] -= round_page(sizeof(struct msgbuf));
1553
1554	avail_end = phys_avail[pa_indx];
1555
1556	/* now running on new page tables, configured,and u/iom is accessible */
1557
1558	/* make a initial tss so microp can get interrupt stack on syscall! */
1559	proc0.p_addr->u_pcb.pcb_tss.tss_esp0 = (int) kstack + UPAGES*NBPG;
1560	proc0.p_addr->u_pcb.pcb_tss.tss_ss0 = GSEL(GDATA_SEL, SEL_KPL) ;
1561	gsel_tss = GSEL(GPROC0_SEL, SEL_KPL);
1562
1563	((struct i386tss *)gdt_segs[GPROC0_SEL].ssd_base)->tss_ioopt =
1564		(sizeof(tss))<<16;
1565
1566	ltr(gsel_tss);
1567
1568	/* make a call gate to reenter kernel with */
1569	gdp = &ldt[LSYS5CALLS_SEL].gd;
1570
1571	x = (int) &IDTVEC(syscall);
1572	gdp->gd_looffset = x++;
1573	gdp->gd_selector = GSEL(GCODE_SEL,SEL_KPL);
1574	gdp->gd_stkcpy = 1;
1575	gdp->gd_type = SDT_SYS386CGT;
1576	gdp->gd_dpl = SEL_UPL;
1577	gdp->gd_p = 1;
1578	gdp->gd_hioffset = ((int) &IDTVEC(syscall)) >>16;
1579
1580	/* transfer to user mode */
1581
1582	_ucodesel = LSEL(LUCODE_SEL, SEL_UPL);
1583	_udatasel = LSEL(LUDATA_SEL, SEL_UPL);
1584
1585	/* setup proc 0's pcb */
1586	bcopy(&sigcode, proc0.p_addr->u_pcb.pcb_sigc, szsigcode);
1587	proc0.p_addr->u_pcb.pcb_flags = 0;
1588	proc0.p_addr->u_pcb.pcb_ptd = IdlePTD;
1589}
1590
1591/*
1592 * The registers are in the frame; the frame is in the user area of
1593 * the process in question; when the process is active, the registers
1594 * are in "the kernel stack"; when it's not, they're still there, but
1595 * things get flipped around.  So, since p->p_md.md_regs is the whole address
1596 * of the register set, take its offset from the kernel stack, and
1597 * index into the user block.  Don't you just *love* virtual memory?
1598 * (I'm starting to think seymour is right...)
1599 */
1600#define	TF_REGP(p)	((struct trapframe *) \
1601			 ((char *)(p)->p_addr \
1602			  + ((char *)(p)->p_md.md_regs - kstack)))
1603
1604int
1605ptrace_set_pc(p, addr)
1606	struct proc *p;
1607	unsigned int addr;
1608{
1609	TF_REGP(p)->tf_eip = addr;
1610	return (0);
1611}
1612
1613int
1614ptrace_single_step(p)
1615	struct proc *p;
1616{
1617	TF_REGP(p)->tf_eflags |= PSL_T;
1618	return (0);
1619}
1620
1621int
1622ptrace_getregs(p, addr)
1623	struct proc *p;
1624	unsigned int *addr;
1625{
1626	int error;
1627	struct reg regs;
1628
1629	error = fill_regs(p, &regs);
1630	if (error)
1631		return (error);
1632	return (copyout(&regs, addr, sizeof regs));
1633}
1634
1635int
1636ptrace_setregs(p, addr)
1637	struct proc *p;
1638	unsigned int *addr;
1639{
1640	int error;
1641	struct reg regs;
1642
1643	error = copyin(addr, &regs, sizeof regs);
1644	if (error)
1645		return (error);
1646	return (set_regs(p, &regs));
1647}
1648
1649int ptrace_write_u(p, off, data)
1650	struct proc *p;
1651	vm_offset_t off;
1652	int data;
1653{
1654	struct trapframe frame_copy;
1655	vm_offset_t min;
1656	struct trapframe *tp;
1657
1658	/*
1659	 * Privileged kernel state is scattered all over the user area.
1660	 * Only allow write access to parts of regs and to fpregs.
1661	 */
1662	min = (char *)p->p_md.md_regs - kstack;
1663	if (off >= min && off <= min + sizeof(struct trapframe) - sizeof(int)) {
1664		tp = TF_REGP(p);
1665		frame_copy = *tp;
1666		*(int *)((char *)&frame_copy + (off - min)) = data;
1667		if (!EFLAGS_SECURE(frame_copy.tf_eflags, tp->tf_eflags) ||
1668		    !CS_SECURE(frame_copy.tf_cs))
1669			return (EINVAL);
1670		*(int*)((char *)p->p_addr + off) = data;
1671		return (0);
1672	}
1673	min = offsetof(struct user, u_pcb) + offsetof(struct pcb, pcb_savefpu);
1674	if (off >= min && off <= min + sizeof(struct save87) - sizeof(int)) {
1675		*(int*)((char *)p->p_addr + off) = data;
1676		return (0);
1677	}
1678	return (EFAULT);
1679}
1680
1681int
1682fill_regs(p, regs)
1683	struct proc *p;
1684	struct reg *regs;
1685{
1686	struct trapframe *tp;
1687
1688	tp = TF_REGP(p);
1689	regs->r_es = tp->tf_es;
1690	regs->r_ds = tp->tf_ds;
1691	regs->r_edi = tp->tf_edi;
1692	regs->r_esi = tp->tf_esi;
1693	regs->r_ebp = tp->tf_ebp;
1694	regs->r_ebx = tp->tf_ebx;
1695	regs->r_edx = tp->tf_edx;
1696	regs->r_ecx = tp->tf_ecx;
1697	regs->r_eax = tp->tf_eax;
1698	regs->r_eip = tp->tf_eip;
1699	regs->r_cs = tp->tf_cs;
1700	regs->r_eflags = tp->tf_eflags;
1701	regs->r_esp = tp->tf_esp;
1702	regs->r_ss = tp->tf_ss;
1703	return (0);
1704}
1705
1706int
1707set_regs(p, regs)
1708	struct proc *p;
1709	struct reg *regs;
1710{
1711	struct trapframe *tp;
1712
1713	tp = TF_REGP(p);
1714	if (!EFLAGS_SECURE(regs->r_eflags, tp->tf_eflags) ||
1715	    !CS_SECURE(regs->r_cs))
1716		return (EINVAL);
1717	tp->tf_es = regs->r_es;
1718	tp->tf_ds = regs->r_ds;
1719	tp->tf_edi = regs->r_edi;
1720	tp->tf_esi = regs->r_esi;
1721	tp->tf_ebp = regs->r_ebp;
1722	tp->tf_ebx = regs->r_ebx;
1723	tp->tf_edx = regs->r_edx;
1724	tp->tf_ecx = regs->r_ecx;
1725	tp->tf_eax = regs->r_eax;
1726	tp->tf_eip = regs->r_eip;
1727	tp->tf_cs = regs->r_cs;
1728	tp->tf_eflags = regs->r_eflags;
1729	tp->tf_esp = regs->r_esp;
1730	tp->tf_ss = regs->r_ss;
1731	return (0);
1732}
1733
1734#ifndef DDB
1735void
1736Debugger(const char *msg)
1737{
1738	printf("Debugger(\"%s\") called.\n", msg);
1739}
1740#endif /* no DDB */
1741
1742#include <sys/disklabel.h>
1743#define b_cylin	b_resid
1744/*
1745 * Determine the size of the transfer, and make sure it is
1746 * within the boundaries of the partition. Adjust transfer
1747 * if needed, and signal errors or early completion.
1748 */
1749int
1750bounds_check_with_label(struct buf *bp, struct disklabel *lp, int wlabel)
1751{
1752        struct partition *p = lp->d_partitions + dkpart(bp->b_dev);
1753        int labelsect = lp->d_partitions[0].p_offset;
1754        int maxsz = p->p_size,
1755                sz = (bp->b_bcount + DEV_BSIZE - 1) >> DEV_BSHIFT;
1756
1757        /* overwriting disk label ? */
1758        /* XXX should also protect bootstrap in first 8K */
1759        if (bp->b_blkno + p->p_offset <= LABELSECTOR + labelsect &&
1760#if LABELSECTOR != 0
1761            bp->b_blkno + p->p_offset + sz > LABELSECTOR + labelsect &&
1762#endif
1763            (bp->b_flags & B_READ) == 0 && wlabel == 0) {
1764                bp->b_error = EROFS;
1765                goto bad;
1766        }
1767
1768#if     defined(DOSBBSECTOR) && defined(notyet)
1769        /* overwriting master boot record? */
1770        if (bp->b_blkno + p->p_offset <= DOSBBSECTOR &&
1771            (bp->b_flags & B_READ) == 0 && wlabel == 0) {
1772                bp->b_error = EROFS;
1773                goto bad;
1774        }
1775#endif
1776
1777        /* beyond partition? */
1778        if (bp->b_blkno < 0 || bp->b_blkno + sz > maxsz) {
1779                /* if exactly at end of disk, return an EOF */
1780                if (bp->b_blkno == maxsz) {
1781                        bp->b_resid = bp->b_bcount;
1782                        return(0);
1783                }
1784                /* or truncate if part of it fits */
1785                sz = maxsz - bp->b_blkno;
1786                if (sz <= 0) {
1787                        bp->b_error = EINVAL;
1788                        goto bad;
1789                }
1790                bp->b_bcount = sz << DEV_BSHIFT;
1791        }
1792
1793        /* calculate cylinder for disksort to order transfers with */
1794        bp->b_pblkno = bp->b_blkno + p->p_offset;
1795        bp->b_cylin = bp->b_pblkno / lp->d_secpercyl;
1796        return(1);
1797
1798bad:
1799        bp->b_flags |= B_ERROR;
1800        return(-1);
1801}
1802
1803int
1804disk_externalize(int drive, void *userp, size_t *maxlen)
1805{
1806	if(*maxlen < sizeof drive) {
1807		return ENOMEM;
1808	}
1809
1810	*maxlen -= sizeof drive;
1811	return copyout(&drive, userp, sizeof drive);
1812}
1813