machdep.c revision 8481
1/*-
2 * Copyright (c) 1992 Terrence R. Lambert.
3 * Copyright (c) 1982, 1987, 1990 The Regents of the University of California.
4 * All rights reserved.
5 *
6 * This code is derived from software contributed to Berkeley by
7 * William Jolitz.
8 *
9 * Redistribution and use in source and binary forms, with or without
10 * modification, are permitted provided that the following conditions
11 * are met:
12 * 1. Redistributions of source code must retain the above copyright
13 *    notice, this list of conditions and the following disclaimer.
14 * 2. Redistributions in binary form must reproduce the above copyright
15 *    notice, this list of conditions and the following disclaimer in the
16 *    documentation and/or other materials provided with the distribution.
17 * 3. All advertising materials mentioning features or use of this software
18 *    must display the following acknowledgement:
19 *	This product includes software developed by the University of
20 *	California, Berkeley and its contributors.
21 * 4. Neither the name of the University nor the names of its contributors
22 *    may be used to endorse or promote products derived from this software
23 *    without specific prior written permission.
24 *
25 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
26 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
27 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
28 * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
29 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
30 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
31 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
32 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
33 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
34 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
35 * SUCH DAMAGE.
36 *
37 *	from: @(#)machdep.c	7.4 (Berkeley) 6/3/91
38 *	$Id: machdep.c,v 1.125 1995/05/11 19:26:08 rgrimes Exp $
39 */
40
41#include "npx.h"
42#include "isa.h"
43
44#include <sys/param.h>
45#include <sys/systm.h>
46#include <sys/signalvar.h>
47#include <sys/kernel.h>
48#include <sys/proc.h>
49#include <sys/user.h>
50#include <sys/buf.h>
51#include <sys/reboot.h>
52#include <sys/conf.h>
53#include <sys/file.h>
54#include <sys/callout.h>
55#include <sys/malloc.h>
56#include <sys/mbuf.h>
57#include <sys/mount.h>
58#include <sys/msgbuf.h>
59#include <sys/ioctl.h>
60#include <sys/sysent.h>
61#include <sys/tty.h>
62#include <sys/sysctl.h>
63#include <sys/devconf.h>
64
65#ifdef SYSVSHM
66#include <sys/shm.h>
67#endif
68
69#ifdef SYSVMSG
70#include <sys/msg.h>
71#endif
72
73#ifdef SYSVSEM
74#include <sys/sem.h>
75#endif
76
77#include <vm/vm.h>
78#include <vm/vm_kern.h>
79#include <vm/vm_page.h>
80
81#include <sys/exec.h>
82#include <sys/vnode.h>
83
84#include <ddb/ddb.h>
85
86#include <net/netisr.h>
87
88/* XXX correctly declaring all the netisr's is painful. */
89#include <net/if.h>
90#include <net/route.h>
91
92#include <netinet/in.h>
93#include <netinet/in_systm.h>
94#include <netinet/ip.h>
95#include <netinet/if_ether.h>
96#include <netinet/ip_var.h>
97
98#include <netns/ns.h>
99#include <netns/ns_if.h>
100
101#include <netiso/iso.h>
102#include <netiso/iso_var.h>
103
104#include <netccitt/dll.h>
105#include <netccitt/x25.h>
106#include <netccitt/pk.h>
107#include <sys/socketvar.h>
108#include <netccitt/pk_var.h>
109
110#include "ether.h"
111
112#include <machine/cpu.h>
113#include <machine/npx.h>
114#include <machine/reg.h>
115#include <machine/psl.h>
116#include <machine/clock.h>
117#include <machine/specialreg.h>
118#include <machine/sysarch.h>
119#include <machine/cons.h>
120#include <machine/devconf.h>
121#include <machine/bootinfo.h>
122#include <machine/md_var.h>
123
124#include <i386/isa/isa.h>
125#include <i386/isa/isa_device.h>
126#include <i386/isa/rtc.h>
127
128static void identifycpu(void);
129static void initcpu(void);
130
131char machine[] = "i386";
132char cpu_model[128];
133
134struct kern_devconf kdc_cpu0 = {
135	0, 0, 0,		/* filled in by dev_attach */
136	"cpu", 0, { MDDT_CPU },
137	0, 0, 0, CPU_EXTERNALLEN,
138	0,			/* CPU has no parent */
139	0,			/* no parentdata */
140	DC_BUSY,		/* the CPU is always busy */
141	cpu_model,		/* no sense in duplication */
142	DC_CLS_CPU		/* class */
143};
144
145#ifndef PANIC_REBOOT_WAIT_TIME
146#define PANIC_REBOOT_WAIT_TIME 15 /* default to 15 seconds */
147#endif
148
149/*
150 * Declare these as initialized data so we can patch them.
151 */
152int	nswbuf = 0;
153#ifdef	NBUF
154int	nbuf = NBUF;
155#else
156int	nbuf = 0;
157#endif
158
159#ifdef BOUNCE_BUFFERS
160extern char *bouncememory;
161extern int maxbkva;
162#ifdef BOUNCEPAGES
163int	bouncepages = BOUNCEPAGES;
164#else
165int	bouncepages = 0;
166#endif
167#endif	/* BOUNCE_BUFFERS */
168
169extern int freebufspace;
170int	msgbufmapped = 0;		/* set when safe to use msgbuf */
171int _udatasel, _ucodesel;
172
173
174/*
175 * Machine-dependent startup code
176 */
177int boothowto = 0, bootverbose = 0, Maxmem = 0, badpages = 0, physmem = 0;
178long dumplo;
179extern int bootdev;
180int biosmem;
181
182vm_offset_t	phys_avail[6];
183
184int cpu_class;
185
186void dumpsys __P((void));
187void setup_netisrs __P((struct linker_set *)); /* XXX declare elsewhere */
188
189vm_offset_t buffer_sva, buffer_eva;
190vm_offset_t clean_sva, clean_eva;
191vm_offset_t pager_sva, pager_eva;
192extern int pager_map_size;
193extern struct linker_set netisr_set;
194
195#define offsetof(type, member)	((size_t)(&((type *)0)->member))
196
197void
198cpu_startup()
199{
200	register unsigned i;
201	register caddr_t v;
202	vm_offset_t maxaddr;
203	vm_size_t size = 0;
204	int firstaddr;
205	vm_offset_t minaddr;
206
207	if (boothowto & RB_VERBOSE)
208		bootverbose++;
209
210	/*
211	 * Initialize error message buffer (at end of core).
212	 */
213
214	/* avail_end was pre-decremented in init_386() to compensate */
215	for (i = 0; i < btoc(sizeof (struct msgbuf)); i++)
216		pmap_enter(pmap_kernel(), (vm_offset_t)msgbufp,
217			   avail_end + i * NBPG,
218			   VM_PROT_ALL, TRUE);
219	msgbufmapped = 1;
220
221	/*
222	 * Good {morning,afternoon,evening,night}.
223	 */
224	printf(version);
225	startrtclock();
226	identifycpu();
227	printf("real memory  = %d (%d pages)\n", ptoa(physmem), physmem);
228	if (badpages)
229		printf("bad memory   = %d (%d pages)\n", ptoa(badpages), badpages);
230
231	/*
232	 * Quickly wire in netisrs.
233	 */
234	setup_netisrs(&netisr_set);
235
236/*
237#ifdef ISDN
238	DONET(isdnintr, NETISR_ISDN);
239#endif
240*/
241
242	/*
243	 * Allocate space for system data structures.
244	 * The first available kernel virtual address is in "v".
245	 * As pages of kernel virtual memory are allocated, "v" is incremented.
246	 * As pages of memory are allocated and cleared,
247	 * "firstaddr" is incremented.
248	 * An index into the kernel page table corresponding to the
249	 * virtual memory address maintained in "v" is kept in "mapaddr".
250	 */
251
252	/*
253	 * Make two passes.  The first pass calculates how much memory is
254	 * needed and allocates it.  The second pass assigns virtual
255	 * addresses to the various data structures.
256	 */
257	firstaddr = 0;
258again:
259	v = (caddr_t)firstaddr;
260
261#define	valloc(name, type, num) \
262	    (name) = (type *)v; v = (caddr_t)((name)+(num))
263#define	valloclim(name, type, num, lim) \
264	    (name) = (type *)v; v = (caddr_t)((lim) = ((name)+(num)))
265	valloc(callout, struct callout, ncallout);
266#ifdef SYSVSHM
267	valloc(shmsegs, struct shmid_ds, shminfo.shmmni);
268#endif
269#ifdef SYSVSEM
270	valloc(sema, struct semid_ds, seminfo.semmni);
271	valloc(sem, struct sem, seminfo.semmns);
272	/* This is pretty disgusting! */
273	valloc(semu, int, (seminfo.semmnu * seminfo.semusz) / sizeof(int));
274#endif
275#ifdef SYSVMSG
276	valloc(msgpool, char, msginfo.msgmax);
277	valloc(msgmaps, struct msgmap, msginfo.msgseg);
278	valloc(msghdrs, struct msg, msginfo.msgtql);
279	valloc(msqids, struct msqid_ds, msginfo.msgmni);
280#endif
281
282	if (nbuf == 0) {
283		nbuf = 30;
284		if( physmem > 1024)
285			nbuf += min((physmem - 1024) / 12, 1024);
286	}
287	nswbuf = min(nbuf, 128);
288
289	valloc(swbuf, struct buf, nswbuf);
290	valloc(buf, struct buf, nbuf);
291
292#ifdef BOUNCE_BUFFERS
293	/*
294	 * If there is more than 16MB of memory, allocate some bounce buffers
295	 */
296	if (Maxmem > 4096) {
297		if (bouncepages == 0) {
298			bouncepages = 64;
299			bouncepages += ((Maxmem - 4096) / 2048) * 32;
300		}
301		v = (caddr_t)((vm_offset_t)((vm_offset_t)v + PAGE_SIZE - 1) & ~(PAGE_SIZE - 1));
302		valloc(bouncememory, char, bouncepages * PAGE_SIZE);
303	}
304#endif
305
306	/*
307	 * End of first pass, size has been calculated so allocate memory
308	 */
309	if (firstaddr == 0) {
310		size = (vm_size_t)(v - firstaddr);
311		firstaddr = (int)kmem_alloc(kernel_map, round_page(size));
312		if (firstaddr == 0)
313			panic("startup: no room for tables");
314		goto again;
315	}
316
317	/*
318	 * End of second pass, addresses have been assigned
319	 */
320	if ((vm_size_t)(v - firstaddr) != size)
321		panic("startup: table size inconsistency");
322
323#ifdef BOUNCE_BUFFERS
324	clean_map = kmem_suballoc(kernel_map, &clean_sva, &clean_eva,
325			(nbuf*MAXBSIZE) + (nswbuf*MAXPHYS) +
326				maxbkva + pager_map_size, TRUE);
327	io_map = kmem_suballoc(clean_map, &minaddr, &maxaddr, maxbkva, FALSE);
328#else
329	clean_map = kmem_suballoc(kernel_map, &clean_sva, &clean_eva,
330			(nbuf*MAXBSIZE) + (nswbuf*MAXPHYS) + pager_map_size, TRUE);
331#endif
332	buffer_map = kmem_suballoc(clean_map, &buffer_sva, &buffer_eva,
333				(nbuf*MAXBSIZE), TRUE);
334	pager_map = kmem_suballoc(clean_map, &pager_sva, &pager_eva,
335				(nswbuf*MAXPHYS) + pager_map_size, TRUE);
336	exec_map = kmem_suballoc(kernel_map, &minaddr, &maxaddr,
337				(16*ARG_MAX), TRUE);
338	u_map = kmem_suballoc(kernel_map, &minaddr, &maxaddr,
339				(maxproc*UPAGES*PAGE_SIZE), FALSE);
340
341	/*
342	 * Finally, allocate mbuf pool.  Since mclrefcnt is an off-size
343	 * we use the more space efficient malloc in place of kmem_alloc.
344	 */
345	mclrefcnt = (char *)malloc(NMBCLUSTERS+CLBYTES/MCLBYTES,
346				   M_MBUF, M_NOWAIT);
347	bzero(mclrefcnt, NMBCLUSTERS+CLBYTES/MCLBYTES);
348	mb_map = kmem_suballoc(kmem_map, (vm_offset_t *)&mbutl, &maxaddr,
349			       VM_MBUF_SIZE, FALSE);
350	/*
351	 * Initialize callouts
352	 */
353	callfree = callout;
354	for (i = 1; i < ncallout; i++)
355		callout[i-1].c_next = &callout[i];
356
357        if (boothowto & RB_CONFIG)
358		userconfig();
359	printf("avail memory = %d (%d pages)\n", ptoa(cnt.v_free_count), cnt.v_free_count);
360
361#ifdef BOUNCE_BUFFERS
362	/*
363	 * init bounce buffers
364	 */
365	vm_bounce_init();
366#endif
367
368	/*
369	 * Set up CPU-specific registers, cache, etc.
370	 */
371	initcpu();
372
373	/*
374	 * Set up buffers, so they can be used to read disk labels.
375	 */
376	bufinit();
377	vm_pager_bufferinit();
378
379	/*
380	 * Configure the system.
381	 */
382	configure();
383	if (bootverbose) {
384		printf("BIOS Geometries:\n");
385		for (i=0; i < N_BIOS_GEOM; i++) {
386			int j = bootinfo.bi_bios_geom[i];
387			if (j == 0x4f010f)
388				continue;
389			printf(" %x:%08x", i, j);
390			printf(" %d cyl, %d heads, %d sects\n",
391				j >> 16, (j >> 8) & 0xff, j & 0xff);
392
393		}
394		printf(" %d accounted for\n", bootinfo.bi_n_bios_used);
395	}
396}
397
398void
399setup_netisrs(struct linker_set *ls)
400{
401	int i;
402	const struct netisrtab *nit;
403
404	for(i = 0; ls->ls_items[i]; i++) {
405		nit = (const struct netisrtab *)ls->ls_items[i];
406		netisrs[nit->nit_num] = nit->nit_isr;
407	}
408}
409
410struct cpu_nameclass i386_cpus[] = {
411	{ "Intel 80286",	CPUCLASS_286 },		/* CPU_286   */
412	{ "i386SX",		CPUCLASS_386 },		/* CPU_386SX */
413	{ "i386DX",		CPUCLASS_386 },		/* CPU_386   */
414	{ "i486SX",		CPUCLASS_486 },		/* CPU_486SX */
415	{ "i486DX",		CPUCLASS_486 },		/* CPU_486   */
416	{ "Pentium",		CPUCLASS_586 },		/* CPU_586   */
417	{ "Cy486DLC",		CPUCLASS_486 },		/* CPU_486DLC */
418};
419
420static void
421identifycpu()
422{
423	printf("CPU: ");
424	if (cpu >= 0
425	    && cpu < (sizeof i386_cpus/sizeof(struct cpu_nameclass))) {
426		cpu_class = i386_cpus[cpu].cpu_class;
427		strncpy(cpu_model, i386_cpus[cpu].cpu_name, sizeof cpu_model);
428	} else {
429		printf("unknown cpu type %d\n", cpu);
430		panic("startup: bad cpu id");
431	}
432
433#if defined(I586_CPU)
434	if(cpu_class == CPUCLASS_586) {
435		calibrate_cyclecounter();
436		printf("%d-MHz ", pentium_mhz);
437	}
438#endif
439#if defined(I486_CPU) || defined(I586_CPU)
440	if (!strcmp(cpu_vendor,"GenuineIntel")) {
441		if ((cpu_id & 0xf00) > 3) {
442			cpu_model[0] = '\0';
443
444			switch (cpu_id & 0x3000) {
445			case 0x1000:
446				strcpy(cpu_model, "Overdrive ");
447				break;
448			case 0x2000:
449				strcpy(cpu_model, "Dual ");
450				break;
451			}
452			if ((cpu_id & 0xf00) == 0x400) {
453				strcat(cpu_model, "i486 ");
454#if defined(I586_CPU)
455			} else if ((cpu_id & 0xf00) == 0x500) {
456				strcat(cpu_model, "Pentium ");
457#endif
458			} else {
459				strcat(cpu_model, "unknown ");
460			}
461
462			switch (cpu_id & 0xff0) {
463			case 0x400:
464				strcat(cpu_model, "DX"); break;
465			case 0x410:
466				strcat(cpu_model, "DX"); break;
467			case 0x420:
468				strcat(cpu_model, "SX"); break;
469			case 0x430:
470				strcat(cpu_model, "DX2"); break;
471			case 0x440:
472				strcat(cpu_model, "SL"); break;
473			case 0x450:
474				strcat(cpu_model, "SX2"); break;
475			case 0x470:
476				strcat(cpu_model, "DX2 Write-Back Enhanced");
477				break;
478			case 0x480:
479				strcat(cpu_model, "DX4"); break;
480#if defined(I586_CPU)
481			case 0x510:
482				if (pentium_mhz == 60) {
483					strcat(cpu_model, "510\\60");
484				} else if (pentium_mhz == 66) {
485					strcat(cpu_model, "567\\66");
486				} else {
487					strcat(cpu_model,"510\\60 or 567\\66");
488				}
489				break;
490			case 0x520:
491				if (pentium_mhz == 90) {
492					strcat(cpu_model, "735\\90");
493				} else if (pentium_mhz == 100) {
494					strcat(cpu_model, "815\\100");
495				} else {
496					strcat(cpu_model,"735\\90 or 815\\100");
497				}
498				break;
499#endif
500			}
501		}
502	}
503#endif
504	printf("%s (", cpu_model);
505	switch(cpu_class) {
506	case CPUCLASS_286:
507		printf("286");
508		break;
509#if defined(I386_CPU)
510	case CPUCLASS_386:
511		printf("386");
512		break;
513#endif
514#if defined(I486_CPU)
515	case CPUCLASS_486:
516		printf("486");
517		break;
518#endif
519#if defined(I586_CPU)
520	case CPUCLASS_586:
521		printf("Pentium");
522		break;
523#endif
524	default:
525		printf("unknown");	/* will panic below... */
526	}
527	printf("-class CPU)\n");
528#if defined(I486_CPU) || defined(I586_CPU)
529	if(*cpu_vendor)
530		printf("  Origin = \"%s\"",cpu_vendor);
531	if(cpu_id)
532		printf("  Id = 0x%lx",cpu_id);
533
534	if (!strcmp(cpu_vendor, "GenuineIntel")) {
535		printf("  Stepping=%ld", cpu_id & 0xf);
536		if (cpu_high > 0) {
537#define FEATUREFMT "\020\001FPU\002VME\003PSE\004MCE\005CX8\006APIC"
538			printf("\n  Features=0x%b", cpu_feature, FEATUREFMT);
539		}
540	}
541	/* Avoid ugly blank lines: only print newline when we have to. */
542	if (*cpu_vendor || cpu_id)
543		printf("\n");
544#endif
545	/*
546	 * Now that we have told the user what they have,
547	 * let them know if that machine type isn't configured.
548	 */
549	switch (cpu_class) {
550	case CPUCLASS_286:	/* a 286 should not make it this far, anyway */
551#if !defined(I386_CPU) && !defined(I486_CPU) && !defined(I586_CPU)
552#error This kernel is not configured for one of the supported CPUs
553#endif
554#if !defined(I386_CPU)
555	case CPUCLASS_386:
556#endif
557#if !defined(I486_CPU)
558	case CPUCLASS_486:
559#endif
560#if !defined(I586_CPU)
561	case CPUCLASS_586:
562#endif
563		panic("CPU class not configured");
564	default:
565		break;
566	}
567	dev_attach(&kdc_cpu0);
568}
569
570/*
571 * Send an interrupt to process.
572 *
573 * Stack is set up to allow sigcode stored
574 * in u. to call routine, followed by kcall
575 * to sigreturn routine below.  After sigreturn
576 * resets the signal mask, the stack, and the
577 * frame pointer, it returns to the user
578 * specified pc, psl.
579 */
580void
581sendsig(catcher, sig, mask, code)
582	sig_t catcher;
583	int sig, mask;
584	unsigned code;
585{
586	register struct proc *p = curproc;
587	register int *regs;
588	register struct sigframe *fp;
589	struct sigframe sf;
590	struct sigacts *psp = p->p_sigacts;
591	int oonstack;
592
593	regs = p->p_md.md_regs;
594        oonstack = psp->ps_sigstk.ss_flags & SA_ONSTACK;
595	/*
596	 * Allocate and validate space for the signal handler
597	 * context. Note that if the stack is in P0 space, the
598	 * call to grow() is a nop, and the useracc() check
599	 * will fail if the process has not already allocated
600	 * the space with a `brk'.
601	 */
602        if ((psp->ps_flags & SAS_ALTSTACK) &&
603	    (psp->ps_sigstk.ss_flags & SA_ONSTACK) == 0 &&
604	    (psp->ps_sigonstack & sigmask(sig))) {
605		fp = (struct sigframe *)(psp->ps_sigstk.ss_sp +
606		    psp->ps_sigstk.ss_size - sizeof(struct sigframe));
607		psp->ps_sigstk.ss_flags |= SA_ONSTACK;
608	} else {
609		fp = (struct sigframe *)(regs[tESP]
610			- sizeof(struct sigframe));
611	}
612
613	/*
614	 * grow() will return FALSE if the fp will not fit inside the stack
615	 *	and the stack can not be grown. useracc will return FALSE
616	 *	if access is denied.
617	 */
618	if ((grow(p, (int)fp) == FALSE) ||
619	    (useracc((caddr_t)fp, sizeof (struct sigframe), B_WRITE) == FALSE)) {
620		/*
621		 * Process has trashed its stack; give it an illegal
622		 * instruction to halt it in its tracks.
623		 */
624		SIGACTION(p, SIGILL) = SIG_DFL;
625		sig = sigmask(SIGILL);
626		p->p_sigignore &= ~sig;
627		p->p_sigcatch &= ~sig;
628		p->p_sigmask &= ~sig;
629		psignal(p, SIGILL);
630		return;
631	}
632
633	/*
634	 * Build the argument list for the signal handler.
635	 */
636	if (p->p_sysent->sv_sigtbl) {
637		if (sig < p->p_sysent->sv_sigsize)
638			sig = p->p_sysent->sv_sigtbl[sig];
639		else
640			sig = p->p_sysent->sv_sigsize + 1;
641	}
642	sf.sf_signum = sig;
643	sf.sf_code = code;
644	sf.sf_scp = &fp->sf_sc;
645	sf.sf_addr = (char *) regs[tERR];
646	sf.sf_handler = catcher;
647
648	/* save scratch registers */
649	sf.sf_sc.sc_eax = regs[tEAX];
650	sf.sf_sc.sc_ebx = regs[tEBX];
651	sf.sf_sc.sc_ecx = regs[tECX];
652	sf.sf_sc.sc_edx = regs[tEDX];
653	sf.sf_sc.sc_esi = regs[tESI];
654	sf.sf_sc.sc_edi = regs[tEDI];
655	sf.sf_sc.sc_cs = regs[tCS];
656	sf.sf_sc.sc_ds = regs[tDS];
657	sf.sf_sc.sc_ss = regs[tSS];
658	sf.sf_sc.sc_es = regs[tES];
659	sf.sf_sc.sc_isp = regs[tISP];
660
661	/*
662	 * Build the signal context to be used by sigreturn.
663	 */
664	sf.sf_sc.sc_onstack = oonstack;
665	sf.sf_sc.sc_mask = mask;
666	sf.sf_sc.sc_sp = regs[tESP];
667	sf.sf_sc.sc_fp = regs[tEBP];
668	sf.sf_sc.sc_pc = regs[tEIP];
669	sf.sf_sc.sc_ps = regs[tEFLAGS];
670
671	/*
672	 * Copy the sigframe out to the user's stack.
673	 */
674	if (copyout(&sf, fp, sizeof(struct sigframe)) != 0) {
675		/*
676		 * Something is wrong with the stack pointer.
677		 * ...Kill the process.
678		 */
679		sigexit(p, SIGILL);
680	};
681
682	regs[tESP] = (int)fp;
683	regs[tEIP] = (int)((struct pcb *)kstack)->pcb_sigc;
684	regs[tEFLAGS] &= ~PSL_VM;
685	regs[tCS] = _ucodesel;
686	regs[tDS] = _udatasel;
687	regs[tES] = _udatasel;
688	regs[tSS] = _udatasel;
689}
690
691/*
692 * System call to cleanup state after a signal
693 * has been taken.  Reset signal mask and
694 * stack state from context left by sendsig (above).
695 * Return to previous pc and psl as specified by
696 * context left by sendsig. Check carefully to
697 * make sure that the user has not modified the
698 * state to gain improper privileges.
699 */
700struct sigreturn_args {
701	struct sigcontext *sigcntxp;
702};
703
704int
705sigreturn(p, uap, retval)
706	struct proc *p;
707	struct sigreturn_args *uap;
708	int *retval;
709{
710	register struct sigcontext *scp;
711	register struct sigframe *fp;
712	register int *regs = p->p_md.md_regs;
713	int eflags;
714
715	/*
716	 * (XXX old comment) regs[tESP] points to the return address.
717	 * The user scp pointer is above that.
718	 * The return address is faked in the signal trampoline code
719	 * for consistency.
720	 */
721	scp = uap->sigcntxp;
722	fp = (struct sigframe *)
723	     ((caddr_t)scp - offsetof(struct sigframe, sf_sc));
724
725	if (useracc((caddr_t)fp, sizeof (*fp), 0) == 0)
726		return(EINVAL);
727
728	/*
729	 * Don't allow users to change privileged or reserved flags.
730	 */
731#define	EFLAGS_SECURE(ef, oef)	((((ef) ^ (oef)) & ~PSL_USERCHANGE) == 0)
732	eflags = scp->sc_ps;
733	/*
734	 * XXX do allow users to change the privileged flag PSL_RF.  The
735	 * cpu sets PSL_RF in tf_eflags for faults.  Debuggers should
736	 * sometimes set it there too.  tf_eflags is kept in the signal
737	 * context during signal handling and there is no other place
738	 * to remember it, so the PSL_RF bit may be corrupted by the
739	 * signal handler without us knowing.  Corruption of the PSL_RF
740	 * bit at worst causes one more or one less debugger trap, so
741	 * allowing it is fairly harmless.
742	 */
743	if (!EFLAGS_SECURE(eflags & ~PSL_RF, regs[tEFLAGS] & ~PSL_RF)) {
744#ifdef DEBUG
745    		printf("sigreturn: eflags = 0x%x\n", eflags);
746#endif
747    		return(EINVAL);
748	}
749
750	/*
751	 * Don't allow users to load a valid privileged %cs.  Let the
752	 * hardware check for invalid selectors, excess privilege in
753	 * other selectors, invalid %eip's and invalid %esp's.
754	 */
755#define	CS_SECURE(cs)	(ISPL(cs) == SEL_UPL)
756	if (!CS_SECURE(scp->sc_cs)) {
757#ifdef DEBUG
758    		printf("sigreturn: cs = 0x%x\n", scp->sc_cs);
759#endif
760		trapsignal(p, SIGBUS, T_PROTFLT);
761		return(EINVAL);
762	}
763
764	/* restore scratch registers */
765	regs[tEAX] = scp->sc_eax;
766	regs[tEBX] = scp->sc_ebx;
767	regs[tECX] = scp->sc_ecx;
768	regs[tEDX] = scp->sc_edx;
769	regs[tESI] = scp->sc_esi;
770	regs[tEDI] = scp->sc_edi;
771	regs[tCS] = scp->sc_cs;
772	regs[tDS] = scp->sc_ds;
773	regs[tES] = scp->sc_es;
774	regs[tSS] = scp->sc_ss;
775	regs[tISP] = scp->sc_isp;
776
777	if (useracc((caddr_t)scp, sizeof (*scp), 0) == 0)
778		return(EINVAL);
779
780	if (scp->sc_onstack & 01)
781		p->p_sigacts->ps_sigstk.ss_flags |= SA_ONSTACK;
782	else
783		p->p_sigacts->ps_sigstk.ss_flags &= ~SA_ONSTACK;
784	p->p_sigmask = scp->sc_mask &~
785	    (sigmask(SIGKILL)|sigmask(SIGCONT)|sigmask(SIGSTOP));
786	regs[tEBP] = scp->sc_fp;
787	regs[tESP] = scp->sc_sp;
788	regs[tEIP] = scp->sc_pc;
789	regs[tEFLAGS] = eflags;
790	return(EJUSTRETURN);
791}
792
793/*
794 * a simple function to make the system panic (and dump a vmcore)
795 * in a predictable fashion
796 */
797void diediedie()
798{
799	panic("because you said to!");
800}
801
802int	waittime = -1;
803struct pcb dumppcb;
804
805__dead void
806boot(arghowto)
807	int arghowto;
808{
809	register long dummy;		/* r12 is reserved */
810	register int howto;		/* r11 == how to boot */
811	register int devtype;		/* r10 == major of root dev */
812
813	if (cold) {
814		printf("hit reset please");
815		for(;;);
816	}
817	howto = arghowto;
818	if ((howto&RB_NOSYNC) == 0 && waittime < 0) {
819		register struct buf *bp;
820		int iter, nbusy;
821
822		waittime = 0;
823		printf("\nsyncing disks... ");
824		/*
825		 * Release inodes held by texts before update.
826		 */
827		if (panicstr == 0)
828			vnode_pager_umount(NULL);
829		sync(&proc0, NULL, NULL);
830
831		for (iter = 0; iter < 20; iter++) {
832			nbusy = 0;
833			for (bp = &buf[nbuf]; --bp >= buf; ) {
834				if ((bp->b_flags & (B_BUSY|B_INVAL)) == B_BUSY) {
835					nbusy++;
836				}
837			}
838			if (nbusy == 0)
839				break;
840			printf("%d ", nbusy);
841			DELAY(40000 * iter);
842		}
843		if (nbusy) {
844			/*
845			 * Failed to sync all blocks. Indicate this and don't
846			 * unmount filesystems (thus forcing an fsck on reboot).
847			 */
848			printf("giving up\n");
849		} else {
850			printf("done\n");
851			/*
852			 * Unmount filesystems
853			 */
854			if (panicstr == 0)
855				vfs_unmountall();
856		}
857		DELAY(100000);			/* wait for console output to finish */
858		dev_shutdownall(FALSE);
859	}
860	splhigh();
861	devtype = major(rootdev);
862	if (howto&RB_HALT) {
863		printf("\n");
864		printf("The operating system has halted.\n");
865		printf("Please press any key to reboot.\n\n");
866		cngetc();
867	} else {
868		if (howto & RB_DUMP) {
869			savectx(&dumppcb, 0);
870			dumppcb.pcb_ptd = rcr3();
871			dumpsys();
872
873			if (PANIC_REBOOT_WAIT_TIME != 0) {
874				if (PANIC_REBOOT_WAIT_TIME != -1) {
875					int loop;
876					printf("Automatic reboot in %d seconds - press a key on the console to abort\n",
877						PANIC_REBOOT_WAIT_TIME);
878					for (loop = PANIC_REBOOT_WAIT_TIME; loop > 0; --loop) {
879						DELAY(1000 * 1000); /* one second */
880						if (cncheckc()) /* Did user type a key? */
881							break;
882					}
883					if (!loop)
884						goto die;
885				}
886			} else { /* zero time specified - reboot NOW */
887				goto die;
888			}
889			printf("--> Press a key on the console to reboot <--\n");
890			cngetc();
891		}
892	}
893#ifdef lint
894	dummy = 0; dummy = dummy;
895	printf("howto %d, devtype %d\n", arghowto, devtype);
896#endif
897die:
898	printf("Rebooting...\n");
899	DELAY(1000000);	/* wait 1 sec for printf's to complete and be read */
900	cpu_reset();
901	for(;;) ;
902	/* NOTREACHED */
903}
904
905unsigned long	dumpmag = 0x8fca0101UL;	/* magic number for savecore */
906int		dumpsize = 0;		/* also for savecore */
907
908int		dodump = 1;
909
910/*
911 * Doadump comes here after turning off memory management and
912 * getting on the dump stack, either when called above, or by
913 * the auto-restart code.
914 */
915void
916dumpsys()
917{
918
919	if (!dodump)
920		return;
921	if (dumpdev == NODEV)
922		return;
923	if ((minor(dumpdev)&07) != 1)
924		return;
925	dumpsize = Maxmem;
926	printf("\ndumping to dev %lx, offset %ld\n", dumpdev, dumplo);
927	printf("dump ");
928	switch ((*bdevsw[major(dumpdev)].d_dump)(dumpdev)) {
929
930	case ENXIO:
931		printf("device bad\n");
932		break;
933
934	case EFAULT:
935		printf("device not ready\n");
936		break;
937
938	case EINVAL:
939		printf("area improper\n");
940		break;
941
942	case EIO:
943		printf("i/o error\n");
944		break;
945
946	case EINTR:
947		printf("aborted from console\n");
948		break;
949
950	default:
951		printf("succeeded\n");
952		break;
953	}
954}
955
956static void
957initcpu()
958{
959}
960
961/*
962 * Clear registers on exec
963 */
964void
965setregs(p, entry, stack)
966	struct proc *p;
967	u_long entry;
968	u_long stack;
969{
970	int *regs = p->p_md.md_regs;
971
972	bzero(regs, sizeof(struct trapframe));
973	regs[tEIP] = entry;
974	regs[tESP] = stack;
975	regs[tEFLAGS] = PSL_USER | (regs[tEFLAGS] & PSL_T);
976	regs[tSS] = _udatasel;
977	regs[tDS] = _udatasel;
978	regs[tES] = _udatasel;
979	regs[tCS] = _ucodesel;
980
981	p->p_addr->u_pcb.pcb_flags = 0;	/* no fp at all */
982	load_cr0(rcr0() | CR0_TS);	/* start emulating */
983#if	NNPX > 0
984	npxinit(__INITIAL_NPXCW__);
985#endif	/* NNPX > 0 */
986}
987
988/*
989 * machine dependent system variables.
990 */
991int
992cpu_sysctl(name, namelen, oldp, oldlenp, newp, newlen, p)
993	int *name;
994	u_int namelen;
995	void *oldp;
996	size_t *oldlenp;
997	void *newp;
998	size_t newlen;
999	struct proc *p;
1000{
1001	int error;
1002
1003	/* all sysctl names at this level are terminal */
1004	if (namelen != 1)
1005		return (ENOTDIR);               /* overloaded */
1006
1007	switch (name[0]) {
1008	case CPU_CONSDEV:
1009		return (sysctl_rdstruct(oldp, oldlenp, newp, &cn_tty->t_dev,
1010		   sizeof cn_tty->t_dev));
1011	case CPU_ADJKERNTZ:
1012		error = sysctl_int(oldp, oldlenp, newp, newlen, &adjkerntz);
1013		if (!error && newp)
1014			resettodr();
1015		return error;
1016	case CPU_DISRTCSET:
1017		return (sysctl_int(oldp, oldlenp, newp,	newlen,	&disable_rtc_set));
1018	default:
1019		return (EOPNOTSUPP);
1020	}
1021	/* NOTREACHED */
1022}
1023
1024/*
1025 * Initialize 386 and configure to run kernel
1026 */
1027
1028/*
1029 * Initialize segments & interrupt table
1030 */
1031
1032int currentldt;
1033int _default_ldt;
1034union descriptor gdt[NGDT];		/* global descriptor table */
1035struct gate_descriptor idt[NIDT];	/* interrupt descriptor table */
1036union descriptor ldt[NLDT];		/* local descriptor table */
1037
1038struct	i386tss	tss, panic_tss;
1039
1040extern  struct user *proc0paddr;
1041
1042/* software prototypes -- in more palatable form */
1043struct soft_segment_descriptor gdt_segs[] = {
1044/* GNULL_SEL	0 Null Descriptor */
1045{	0x0,			/* segment base address  */
1046	0x0,			/* length */
1047	0,			/* segment type */
1048	0,			/* segment descriptor priority level */
1049	0,			/* segment descriptor present */
1050	0, 0,
1051	0,			/* default 32 vs 16 bit size */
1052	0  			/* limit granularity (byte/page units)*/ },
1053/* GCODE_SEL	1 Code Descriptor for kernel */
1054{	0x0,			/* segment base address  */
1055	0xfffff,		/* length - all address space */
1056	SDT_MEMERA,		/* segment type */
1057	0,			/* segment descriptor priority level */
1058	1,			/* segment descriptor present */
1059	0, 0,
1060	1,			/* default 32 vs 16 bit size */
1061	1  			/* limit granularity (byte/page units)*/ },
1062/* GDATA_SEL	2 Data Descriptor for kernel */
1063{	0x0,			/* segment base address  */
1064	0xfffff,		/* length - all address space */
1065	SDT_MEMRWA,		/* segment type */
1066	0,			/* segment descriptor priority level */
1067	1,			/* segment descriptor present */
1068	0, 0,
1069	1,			/* default 32 vs 16 bit size */
1070	1  			/* limit granularity (byte/page units)*/ },
1071/* GLDT_SEL	3 LDT Descriptor */
1072{	(int) ldt,		/* segment base address  */
1073	sizeof(ldt)-1,		/* length - all address space */
1074	SDT_SYSLDT,		/* segment type */
1075	0,			/* segment descriptor priority level */
1076	1,			/* segment descriptor present */
1077	0, 0,
1078	0,			/* unused - default 32 vs 16 bit size */
1079	0  			/* limit granularity (byte/page units)*/ },
1080/* GTGATE_SEL	4 Null Descriptor - Placeholder */
1081{	0x0,			/* segment base address  */
1082	0x0,			/* length - all address space */
1083	0,			/* segment type */
1084	0,			/* segment descriptor priority level */
1085	0,			/* segment descriptor present */
1086	0, 0,
1087	0,			/* default 32 vs 16 bit size */
1088	0  			/* limit granularity (byte/page units)*/ },
1089/* GPANIC_SEL	5 Panic Tss Descriptor */
1090{	(int) &panic_tss,	/* segment base address  */
1091	sizeof(tss)-1,		/* length - all address space */
1092	SDT_SYS386TSS,		/* segment type */
1093	0,			/* segment descriptor priority level */
1094	1,			/* segment descriptor present */
1095	0, 0,
1096	0,			/* unused - default 32 vs 16 bit size */
1097	0  			/* limit granularity (byte/page units)*/ },
1098/* GPROC0_SEL	6 Proc 0 Tss Descriptor */
1099{	(int) kstack,		/* segment base address  */
1100	sizeof(tss)-1,		/* length - all address space */
1101	SDT_SYS386TSS,		/* segment type */
1102	0,			/* segment descriptor priority level */
1103	1,			/* segment descriptor present */
1104	0, 0,
1105	0,			/* unused - default 32 vs 16 bit size */
1106	0  			/* limit granularity (byte/page units)*/ },
1107/* GUSERLDT_SEL	7 User LDT Descriptor per process */
1108{	(int) ldt,		/* segment base address  */
1109	(512 * sizeof(union descriptor)-1),		/* length */
1110	SDT_SYSLDT,		/* segment type */
1111	0,			/* segment descriptor priority level */
1112	1,			/* segment descriptor present */
1113	0, 0,
1114	0,			/* unused - default 32 vs 16 bit size */
1115	0  			/* limit granularity (byte/page units)*/ },
1116/* GAPMCODE32_SEL 8 APM BIOS 32-bit interface (32bit Code) */
1117{	0,			/* segment base address (overwritten by APM)  */
1118	0xfffff,		/* length */
1119	SDT_MEMERA,		/* segment type */
1120	0,			/* segment descriptor priority level */
1121	1,			/* segment descriptor present */
1122	0, 0,
1123	1,			/* default 32 vs 16 bit size */
1124	1  			/* limit granularity (byte/page units)*/ },
1125/* GAPMCODE16_SEL 9 APM BIOS 32-bit interface (16bit Code) */
1126{	0,			/* segment base address (overwritten by APM)  */
1127	0xfffff,		/* length */
1128	SDT_MEMERA,		/* segment type */
1129	0,			/* segment descriptor priority level */
1130	1,			/* segment descriptor present */
1131	0, 0,
1132	0,			/* default 32 vs 16 bit size */
1133	1  			/* limit granularity (byte/page units)*/ },
1134/* GAPMDATA_SEL	10 APM BIOS 32-bit interface (Data) */
1135{	0,			/* segment base address (overwritten by APM) */
1136	0xfffff,		/* length */
1137	SDT_MEMRWA,		/* segment type */
1138	0,			/* segment descriptor priority level */
1139	1,			/* segment descriptor present */
1140	0, 0,
1141	1,			/* default 32 vs 16 bit size */
1142	1  			/* limit granularity (byte/page units)*/ },
1143};
1144
1145struct soft_segment_descriptor ldt_segs[] = {
1146	/* Null Descriptor - overwritten by call gate */
1147{	0x0,			/* segment base address  */
1148	0x0,			/* length - all address space */
1149	0,			/* segment type */
1150	0,			/* segment descriptor priority level */
1151	0,			/* segment descriptor present */
1152	0, 0,
1153	0,			/* default 32 vs 16 bit size */
1154	0  			/* limit granularity (byte/page units)*/ },
1155	/* Null Descriptor - overwritten by call gate */
1156{	0x0,			/* segment base address  */
1157	0x0,			/* length - all address space */
1158	0,			/* segment type */
1159	0,			/* segment descriptor priority level */
1160	0,			/* segment descriptor present */
1161	0, 0,
1162	0,			/* default 32 vs 16 bit size */
1163	0  			/* limit granularity (byte/page units)*/ },
1164	/* Null Descriptor - overwritten by call gate */
1165{	0x0,			/* segment base address  */
1166	0x0,			/* length - all address space */
1167	0,			/* segment type */
1168	0,			/* segment descriptor priority level */
1169	0,			/* segment descriptor present */
1170	0, 0,
1171	0,			/* default 32 vs 16 bit size */
1172	0  			/* limit granularity (byte/page units)*/ },
1173	/* Code Descriptor for user */
1174{	0x0,			/* segment base address  */
1175	0xfffff,		/* length - all address space */
1176	SDT_MEMERA,		/* segment type */
1177	SEL_UPL,		/* segment descriptor priority level */
1178	1,			/* segment descriptor present */
1179	0, 0,
1180	1,			/* default 32 vs 16 bit size */
1181	1  			/* limit granularity (byte/page units)*/ },
1182	/* Data Descriptor for user */
1183{	0x0,			/* segment base address  */
1184	0xfffff,		/* length - all address space */
1185	SDT_MEMRWA,		/* segment type */
1186	SEL_UPL,		/* segment descriptor priority level */
1187	1,			/* segment descriptor present */
1188	0, 0,
1189	1,			/* default 32 vs 16 bit size */
1190	1  			/* limit granularity (byte/page units)*/ },
1191};
1192
1193void
1194setidt(idx, func, typ, dpl)
1195	int idx;
1196	inthand_t *func;
1197	int typ;
1198	int dpl;
1199{
1200	struct gate_descriptor *ip = idt + idx;
1201
1202	ip->gd_looffset = (int)func;
1203	ip->gd_selector = 8;
1204	ip->gd_stkcpy = 0;
1205	ip->gd_xx = 0;
1206	ip->gd_type = typ;
1207	ip->gd_dpl = dpl;
1208	ip->gd_p = 1;
1209	ip->gd_hioffset = ((int)func)>>16 ;
1210}
1211
1212#define	IDTVEC(name)	__CONCAT(X,name)
1213
1214extern inthand_t
1215	IDTVEC(div), IDTVEC(dbg), IDTVEC(nmi), IDTVEC(bpt), IDTVEC(ofl),
1216	IDTVEC(bnd), IDTVEC(ill), IDTVEC(dna), IDTVEC(dble), IDTVEC(fpusegm),
1217	IDTVEC(tss), IDTVEC(missing), IDTVEC(stk), IDTVEC(prot),
1218	IDTVEC(page), IDTVEC(rsvd), IDTVEC(fpu), IDTVEC(align),
1219	IDTVEC(syscall);
1220
1221#ifdef COMPAT_LINUX
1222extern inthand_t
1223	IDTVEC(linux_syscall);
1224#endif
1225
1226void
1227sdtossd(sd, ssd)
1228	struct segment_descriptor *sd;
1229	struct soft_segment_descriptor *ssd;
1230{
1231	ssd->ssd_base  = (sd->sd_hibase << 24) | sd->sd_lobase;
1232	ssd->ssd_limit = (sd->sd_hilimit << 16) | sd->sd_lolimit;
1233	ssd->ssd_type  = sd->sd_type;
1234	ssd->ssd_dpl   = sd->sd_dpl;
1235	ssd->ssd_p     = sd->sd_p;
1236	ssd->ssd_def32 = sd->sd_def32;
1237	ssd->ssd_gran  = sd->sd_gran;
1238}
1239
1240void
1241init386(first)
1242	int first;
1243{
1244	int x;
1245	unsigned biosbasemem, biosextmem;
1246	struct gate_descriptor *gdp;
1247	int gsel_tss;
1248	/* table descriptors - used to load tables by microp */
1249	struct region_descriptor r_gdt, r_idt;
1250	int	pagesinbase, pagesinext;
1251	int	target_page;
1252
1253	proc0.p_addr = proc0paddr;
1254
1255	/*
1256	 * Initialize the console before we print anything out.
1257	 */
1258
1259	cninit ();
1260
1261	/*
1262	 * make gdt memory segments, the code segment goes up to end of the
1263	 * page with etext in it, the data segment goes to the end of
1264	 * the address space
1265	 */
1266	/*
1267	 * XXX text protection is temporarily (?) disabled.  The limit was
1268	 * i386_btop(i386_round_page(etext)) - 1.
1269	 */
1270	gdt_segs[GCODE_SEL].ssd_limit = i386_btop(0) - 1;
1271	gdt_segs[GDATA_SEL].ssd_limit = i386_btop(0) - 1;
1272	for (x = 0; x < NGDT; x++)
1273		ssdtosd(&gdt_segs[x], &gdt[x].sd);
1274
1275	/* make ldt memory segments */
1276	/*
1277	 * The data segment limit must not cover the user area because we
1278	 * don't want the user area to be writable in copyout() etc. (page
1279	 * level protection is lost in kernel mode on 386's).  Also, we
1280	 * don't want the user area to be writable directly (page level
1281	 * protection of the user area is not available on 486's with
1282	 * CR0_WP set, because there is no user-read/kernel-write mode).
1283	 *
1284	 * XXX - VM_MAXUSER_ADDRESS is an end address, not a max.  And it
1285	 * should be spelled ...MAX_USER...
1286	 */
1287#define VM_END_USER_RW_ADDRESS	VM_MAXUSER_ADDRESS
1288	/*
1289	 * The code segment limit has to cover the user area until we move
1290	 * the signal trampoline out of the user area.  This is safe because
1291	 * the code segment cannot be written to directly.
1292	 */
1293#define VM_END_USER_R_ADDRESS	(VM_END_USER_RW_ADDRESS + UPAGES * NBPG)
1294	ldt_segs[LUCODE_SEL].ssd_limit = i386_btop(VM_END_USER_R_ADDRESS) - 1;
1295	ldt_segs[LUDATA_SEL].ssd_limit = i386_btop(VM_END_USER_RW_ADDRESS) - 1;
1296	/* Note. eventually want private ldts per process */
1297	for (x = 0; x < NLDT; x++)
1298		ssdtosd(&ldt_segs[x], &ldt[x].sd);
1299
1300	/* exceptions */
1301	for (x = 0; x < NIDT; x++)
1302		setidt(x, &IDTVEC(rsvd), SDT_SYS386TGT, SEL_KPL);
1303	setidt(0, &IDTVEC(div),  SDT_SYS386TGT, SEL_KPL);
1304	setidt(1, &IDTVEC(dbg),  SDT_SYS386TGT, SEL_KPL);
1305	setidt(2, &IDTVEC(nmi),  SDT_SYS386TGT, SEL_KPL);
1306 	setidt(3, &IDTVEC(bpt),  SDT_SYS386TGT, SEL_UPL);
1307	setidt(4, &IDTVEC(ofl),  SDT_SYS386TGT, SEL_UPL);
1308	setidt(5, &IDTVEC(bnd),  SDT_SYS386TGT, SEL_KPL);
1309	setidt(6, &IDTVEC(ill),  SDT_SYS386TGT, SEL_KPL);
1310	setidt(7, &IDTVEC(dna),  SDT_SYS386TGT, SEL_KPL);
1311	setidt(8, &IDTVEC(dble),  SDT_SYS386TGT, SEL_KPL);
1312	setidt(9, &IDTVEC(fpusegm),  SDT_SYS386TGT, SEL_KPL);
1313	setidt(10, &IDTVEC(tss),  SDT_SYS386TGT, SEL_KPL);
1314	setidt(11, &IDTVEC(missing),  SDT_SYS386TGT, SEL_KPL);
1315	setidt(12, &IDTVEC(stk),  SDT_SYS386TGT, SEL_KPL);
1316	setidt(13, &IDTVEC(prot),  SDT_SYS386TGT, SEL_KPL);
1317	setidt(14, &IDTVEC(page),  SDT_SYS386TGT, SEL_KPL);
1318	setidt(15, &IDTVEC(rsvd),  SDT_SYS386TGT, SEL_KPL);
1319	setidt(16, &IDTVEC(fpu),  SDT_SYS386TGT, SEL_KPL);
1320	setidt(17, &IDTVEC(align), SDT_SYS386TGT, SEL_KPL);
1321#ifdef COMPAT_LINUX
1322 	setidt(0x80, &IDTVEC(linux_syscall),  SDT_SYS386TGT, SEL_UPL);
1323#endif
1324
1325#include	"isa.h"
1326#if	NISA >0
1327	isa_defaultirq();
1328#endif
1329
1330	r_gdt.rd_limit = sizeof(gdt) - 1;
1331	r_gdt.rd_base =  (int) gdt;
1332	lgdt(&r_gdt);
1333
1334	r_idt.rd_limit = sizeof(idt) - 1;
1335	r_idt.rd_base = (int) idt;
1336	lidt(&r_idt);
1337
1338	_default_ldt = GSEL(GLDT_SEL, SEL_KPL);
1339	lldt(_default_ldt);
1340	currentldt = _default_ldt;
1341
1342#ifdef DDB
1343	kdb_init();
1344	if (boothowto & RB_KDB)
1345		Debugger("Boot flags requested debugger");
1346#endif
1347
1348	/* Use BIOS values stored in RTC CMOS RAM, since probing
1349	 * breaks certain 386 AT relics.
1350	 */
1351	biosbasemem = rtcin(RTC_BASELO)+ (rtcin(RTC_BASEHI)<<8);
1352	biosextmem = rtcin(RTC_EXTLO)+ (rtcin(RTC_EXTHI)<<8);
1353
1354	/*
1355	 * Print a warning if the official BIOS interface disagrees
1356	 * with the hackish interface used above.  Eventually only
1357	 * the official interface should be used.
1358	 */
1359	if (bootinfo.bi_memsizes_valid) {
1360		if (bootinfo.bi_basemem != biosbasemem)
1361			printf("BIOS basemem (%ldK) != RTC basemem (%dK)\n",
1362			       bootinfo.bi_basemem, biosbasemem);
1363		if (bootinfo.bi_extmem != biosextmem)
1364			printf("BIOS extmem (%ldK) != RTC extmem (%dK)\n",
1365			       bootinfo.bi_extmem, biosextmem);
1366	}
1367
1368	/*
1369	 * If BIOS tells us that it has more than 640k in the basemem,
1370	 *	don't believe it - set it to 640k.
1371	 */
1372	if (biosbasemem > 640)
1373		biosbasemem = 640;
1374
1375	/*
1376	 * Some 386 machines might give us a bogus number for extended
1377	 *	mem. If this happens, stop now.
1378	 */
1379#ifndef LARGEMEM
1380	if (biosextmem > 65536) {
1381		panic("extended memory beyond limit of 64MB");
1382		/* NOTREACHED */
1383	}
1384#endif
1385
1386	pagesinbase = biosbasemem * 1024 / NBPG;
1387	pagesinext = biosextmem * 1024 / NBPG;
1388
1389	/*
1390	 * Special hack for chipsets that still remap the 384k hole when
1391	 *	there's 16MB of memory - this really confuses people that
1392	 *	are trying to use bus mastering ISA controllers with the
1393	 *	"16MB limit"; they only have 16MB, but the remapping puts
1394	 *	them beyond the limit.
1395	 */
1396	/*
1397	 * If extended memory is between 15-16MB (16-17MB phys address range),
1398	 *	chop it to 15MB.
1399	 */
1400	if ((pagesinext > 3840) && (pagesinext < 4096))
1401		pagesinext = 3840;
1402
1403	/*
1404	 * Maxmem isn't the "maximum memory", it's one larger than the
1405	 * highest page of of the physical address space. It should be
1406	 * called something like "Maxphyspage".
1407	 */
1408	Maxmem = pagesinext + 0x100000/PAGE_SIZE;
1409
1410#ifdef MAXMEM
1411	Maxmem = MAXMEM/4;
1412#endif
1413	/*
1414	 * Calculate number of physical pages, but account for Maxmem
1415	 *	adjustment above.
1416	 */
1417	physmem = pagesinbase + Maxmem - 0x100000/PAGE_SIZE;
1418
1419	/* call pmap initialization to make new kernel address space */
1420	pmap_bootstrap (first, 0);
1421
1422	/*
1423	 * Do a quick, non-destructive check over extended memory to verify
1424	 * what the BIOS tells us agrees with reality. Adjust down Maxmem
1425	 * if we find that the page can't be correctly written to/read from.
1426	 */
1427
1428	for (target_page = Maxmem - 1; target_page >= atop(first); target_page--) {
1429		int tmp;
1430
1431		/*
1432		 * map page into kernel: valid, read/write, non-cacheable
1433		 */
1434		*(int *)CMAP1 = PG_V | PG_KW | PG_N | ptoa(target_page);
1435		pmap_update();
1436
1437		tmp = *(int *)CADDR1;
1438		/*
1439		 * Test for alternating 1's and 0's
1440		 */
1441		*(int *)CADDR1 = 0xaaaaaaaa;
1442		if (*(int *)CADDR1 != 0xaaaaaaaa) {
1443			Maxmem = target_page;
1444			badpages++;
1445			continue;
1446		}
1447		/*
1448		 * Test for alternating 0's and 1's
1449		 */
1450		*(int *)CADDR1 = 0x55555555;
1451		if (*(int *)CADDR1 != 0x55555555) {
1452			Maxmem = target_page;
1453			badpages++;
1454			continue;
1455		}
1456		/*
1457		 * Test for all 1's
1458		 */
1459		*(int *)CADDR1 = 0xffffffff;
1460		if (*(int *)CADDR1 != 0xffffffff) {
1461			Maxmem = target_page;
1462			badpages++;
1463			continue;
1464		}
1465		/*
1466		 * Test for all 0's
1467		 */
1468		*(int *)CADDR1 = 0x0;
1469		if (*(int *)CADDR1 != 0x0) {
1470			/*
1471			 * test of page failed
1472			 */
1473			Maxmem = target_page;
1474			badpages++;
1475			continue;
1476		}
1477		*(int *)CADDR1 = tmp;
1478	}
1479	if (badpages != 0)
1480		printf("WARNING: BIOS extended memory size and reality don't agree.\n");
1481
1482	*(int *)CMAP1 = 0;
1483	pmap_update();
1484
1485	avail_end = (Maxmem << PAGE_SHIFT)
1486		    - i386_round_page(sizeof(struct msgbuf));
1487
1488	/*
1489	 * Initialize pointers to the two chunks of memory; for use
1490	 *	later in vm_page_startup.
1491	 */
1492	/* avail_start is initialized in pmap_bootstrap */
1493	x = 0;
1494	if (pagesinbase > 1) {
1495		phys_avail[x++] = NBPG;		/* skip first page of memory */
1496		phys_avail[x++] = pagesinbase * NBPG;	/* memory up to the ISA hole */
1497	}
1498	phys_avail[x++] = avail_start;	/* memory up to the end */
1499	phys_avail[x++] = avail_end;
1500	phys_avail[x++] = 0;		/* no more chunks */
1501	phys_avail[x++] = 0;
1502
1503	/* now running on new page tables, configured,and u/iom is accessible */
1504
1505	/* make a initial tss so microp can get interrupt stack on syscall! */
1506	proc0.p_addr->u_pcb.pcb_tss.tss_esp0 = (int) kstack + UPAGES*NBPG;
1507	proc0.p_addr->u_pcb.pcb_tss.tss_ss0 = GSEL(GDATA_SEL, SEL_KPL) ;
1508	gsel_tss = GSEL(GPROC0_SEL, SEL_KPL);
1509
1510	((struct i386tss *)gdt_segs[GPROC0_SEL].ssd_base)->tss_ioopt =
1511		(sizeof(tss))<<16;
1512
1513	ltr(gsel_tss);
1514
1515	/* make a call gate to reenter kernel with */
1516	gdp = &ldt[LSYS5CALLS_SEL].gd;
1517
1518	x = (int) &IDTVEC(syscall);
1519	gdp->gd_looffset = x++;
1520	gdp->gd_selector = GSEL(GCODE_SEL,SEL_KPL);
1521	gdp->gd_stkcpy = 1;
1522	gdp->gd_type = SDT_SYS386CGT;
1523	gdp->gd_dpl = SEL_UPL;
1524	gdp->gd_p = 1;
1525	gdp->gd_hioffset = ((int) &IDTVEC(syscall)) >>16;
1526
1527	/* transfer to user mode */
1528
1529	_ucodesel = LSEL(LUCODE_SEL, SEL_UPL);
1530	_udatasel = LSEL(LUDATA_SEL, SEL_UPL);
1531
1532	/* setup proc 0's pcb */
1533	bcopy(&sigcode, proc0.p_addr->u_pcb.pcb_sigc, szsigcode);
1534	proc0.p_addr->u_pcb.pcb_flags = 0;
1535	proc0.p_addr->u_pcb.pcb_ptd = IdlePTD;
1536}
1537
1538/*
1539 * The registers are in the frame; the frame is in the user area of
1540 * the process in question; when the process is active, the registers
1541 * are in "the kernel stack"; when it's not, they're still there, but
1542 * things get flipped around.  So, since p->p_md.md_regs is the whole address
1543 * of the register set, take its offset from the kernel stack, and
1544 * index into the user block.  Don't you just *love* virtual memory?
1545 * (I'm starting to think seymour is right...)
1546 */
1547#define	TF_REGP(p)	((struct trapframe *) \
1548			 ((char *)(p)->p_addr \
1549			  + ((char *)(p)->p_md.md_regs - kstack)))
1550
1551int
1552ptrace_set_pc(p, addr)
1553	struct proc *p;
1554	unsigned int addr;
1555{
1556	TF_REGP(p)->tf_eip = addr;
1557	return (0);
1558}
1559
1560int
1561ptrace_single_step(p)
1562	struct proc *p;
1563{
1564	TF_REGP(p)->tf_eflags |= PSL_T;
1565	return (0);
1566}
1567
1568int
1569ptrace_getregs(p, addr)
1570	struct proc *p;
1571	unsigned int *addr;
1572{
1573	int error;
1574	struct reg regs;
1575
1576	error = fill_regs(p, &regs);
1577	if (error)
1578		return (error);
1579	return (copyout(&regs, addr, sizeof regs));
1580}
1581
1582int
1583ptrace_setregs(p, addr)
1584	struct proc *p;
1585	unsigned int *addr;
1586{
1587	int error;
1588	struct reg regs;
1589
1590	error = copyin(addr, &regs, sizeof regs);
1591	if (error)
1592		return (error);
1593	return (set_regs(p, &regs));
1594}
1595
1596int ptrace_write_u(p, off, data)
1597	struct proc *p;
1598	vm_offset_t off;
1599	int data;
1600{
1601	struct trapframe frame_copy;
1602	vm_offset_t min;
1603	struct trapframe *tp;
1604
1605	/*
1606	 * Privileged kernel state is scattered all over the user area.
1607	 * Only allow write access to parts of regs and to fpregs.
1608	 */
1609	min = (char *)p->p_md.md_regs - kstack;
1610	if (off >= min && off <= min + sizeof(struct trapframe) - sizeof(int)) {
1611		tp = TF_REGP(p);
1612		frame_copy = *tp;
1613		*(int *)((char *)&frame_copy + (off - min)) = data;
1614		if (!EFLAGS_SECURE(frame_copy.tf_eflags, tp->tf_eflags) ||
1615		    !CS_SECURE(frame_copy.tf_cs))
1616			return (EINVAL);
1617		*(int*)((char *)p->p_addr + off) = data;
1618		return (0);
1619	}
1620	min = offsetof(struct user, u_pcb) + offsetof(struct pcb, pcb_savefpu);
1621	if (off >= min && off <= min + sizeof(struct save87) - sizeof(int)) {
1622		*(int*)((char *)p->p_addr + off) = data;
1623		return (0);
1624	}
1625	return (EFAULT);
1626}
1627
1628int
1629fill_regs(p, regs)
1630	struct proc *p;
1631	struct reg *regs;
1632{
1633	struct trapframe *tp;
1634
1635	tp = TF_REGP(p);
1636	regs->r_es = tp->tf_es;
1637	regs->r_ds = tp->tf_ds;
1638	regs->r_edi = tp->tf_edi;
1639	regs->r_esi = tp->tf_esi;
1640	regs->r_ebp = tp->tf_ebp;
1641	regs->r_ebx = tp->tf_ebx;
1642	regs->r_edx = tp->tf_edx;
1643	regs->r_ecx = tp->tf_ecx;
1644	regs->r_eax = tp->tf_eax;
1645	regs->r_eip = tp->tf_eip;
1646	regs->r_cs = tp->tf_cs;
1647	regs->r_eflags = tp->tf_eflags;
1648	regs->r_esp = tp->tf_esp;
1649	regs->r_ss = tp->tf_ss;
1650	return (0);
1651}
1652
1653int
1654set_regs(p, regs)
1655	struct proc *p;
1656	struct reg *regs;
1657{
1658	struct trapframe *tp;
1659
1660	tp = TF_REGP(p);
1661	if (!EFLAGS_SECURE(regs->r_eflags, tp->tf_eflags) ||
1662	    !CS_SECURE(regs->r_cs))
1663		return (EINVAL);
1664	tp->tf_es = regs->r_es;
1665	tp->tf_ds = regs->r_ds;
1666	tp->tf_edi = regs->r_edi;
1667	tp->tf_esi = regs->r_esi;
1668	tp->tf_ebp = regs->r_ebp;
1669	tp->tf_ebx = regs->r_ebx;
1670	tp->tf_edx = regs->r_edx;
1671	tp->tf_ecx = regs->r_ecx;
1672	tp->tf_eax = regs->r_eax;
1673	tp->tf_eip = regs->r_eip;
1674	tp->tf_cs = regs->r_cs;
1675	tp->tf_eflags = regs->r_eflags;
1676	tp->tf_esp = regs->r_esp;
1677	tp->tf_ss = regs->r_ss;
1678	return (0);
1679}
1680
1681#ifndef DDB
1682void
1683Debugger(const char *msg)
1684{
1685	printf("Debugger(\"%s\") called.\n", msg);
1686}
1687#endif /* no DDB */
1688
1689#include <sys/disklabel.h>
1690#define b_cylin	b_resid
1691/*
1692 * Determine the size of the transfer, and make sure it is
1693 * within the boundaries of the partition. Adjust transfer
1694 * if needed, and signal errors or early completion.
1695 */
1696int
1697bounds_check_with_label(struct buf *bp, struct disklabel *lp, int wlabel)
1698{
1699        struct partition *p = lp->d_partitions + dkpart(bp->b_dev);
1700        int labelsect = lp->d_partitions[0].p_offset;
1701        int maxsz = p->p_size,
1702                sz = (bp->b_bcount + DEV_BSIZE - 1) >> DEV_BSHIFT;
1703
1704        /* overwriting disk label ? */
1705        /* XXX should also protect bootstrap in first 8K */
1706        if (bp->b_blkno + p->p_offset <= LABELSECTOR + labelsect &&
1707#if LABELSECTOR != 0
1708            bp->b_blkno + p->p_offset + sz > LABELSECTOR + labelsect &&
1709#endif
1710            (bp->b_flags & B_READ) == 0 && wlabel == 0) {
1711                bp->b_error = EROFS;
1712                goto bad;
1713        }
1714
1715#if     defined(DOSBBSECTOR) && defined(notyet)
1716        /* overwriting master boot record? */
1717        if (bp->b_blkno + p->p_offset <= DOSBBSECTOR &&
1718            (bp->b_flags & B_READ) == 0 && wlabel == 0) {
1719                bp->b_error = EROFS;
1720                goto bad;
1721        }
1722#endif
1723
1724        /* beyond partition? */
1725        if (bp->b_blkno < 0 || bp->b_blkno + sz > maxsz) {
1726                /* if exactly at end of disk, return an EOF */
1727                if (bp->b_blkno == maxsz) {
1728                        bp->b_resid = bp->b_bcount;
1729                        return(0);
1730                }
1731                /* or truncate if part of it fits */
1732                sz = maxsz - bp->b_blkno;
1733                if (sz <= 0) {
1734                        bp->b_error = EINVAL;
1735                        goto bad;
1736                }
1737                bp->b_bcount = sz << DEV_BSHIFT;
1738        }
1739
1740        /* calculate cylinder for disksort to order transfers with */
1741        bp->b_pblkno = bp->b_blkno + p->p_offset;
1742        bp->b_cylin = bp->b_pblkno / lp->d_secpercyl;
1743        return(1);
1744
1745bad:
1746        bp->b_flags |= B_ERROR;
1747        return(-1);
1748}
1749
1750int
1751disk_externalize(int drive, void *userp, size_t *maxlen)
1752{
1753	if(*maxlen < sizeof drive) {
1754		return ENOMEM;
1755	}
1756
1757	*maxlen -= sizeof drive;
1758	return copyout(&drive, userp, sizeof drive);
1759}
1760