machdep.c revision 12827
1/*-
2 * Copyright (c) 1992 Terrence R. Lambert.
3 * Copyright (c) 1982, 1987, 1990 The Regents of the University of California.
4 * All rights reserved.
5 *
6 * This code is derived from software contributed to Berkeley by
7 * William Jolitz.
8 *
9 * Redistribution and use in source and binary forms, with or without
10 * modification, are permitted provided that the following conditions
11 * are met:
12 * 1. Redistributions of source code must retain the above copyright
13 *    notice, this list of conditions and the following disclaimer.
14 * 2. Redistributions in binary form must reproduce the above copyright
15 *    notice, this list of conditions and the following disclaimer in the
16 *    documentation and/or other materials provided with the distribution.
17 * 3. All advertising materials mentioning features or use of this software
18 *    must display the following acknowledgement:
19 *	This product includes software developed by the University of
20 *	California, Berkeley and its contributors.
21 * 4. Neither the name of the University nor the names of its contributors
22 *    may be used to endorse or promote products derived from this software
23 *    without specific prior written permission.
24 *
25 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
26 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
27 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
28 * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
29 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
30 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
31 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
32 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
33 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
34 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
35 * SUCH DAMAGE.
36 *
37 *	from: @(#)machdep.c	7.4 (Berkeley) 6/3/91
38 *	$Id: machdep.c,v 1.158 1995/12/13 15:12:23 julian Exp $
39 */
40
41#include "npx.h"
42#include "isa.h"
43
44#include <sys/param.h>
45#include <sys/systm.h>
46#include <sys/sysproto.h>
47#include <sys/signalvar.h>
48#include <sys/kernel.h>
49#include <sys/proc.h>
50#include <sys/buf.h>
51#include <sys/reboot.h>
52#include <sys/conf.h>
53#include <sys/file.h>
54#include <sys/callout.h>
55#include <sys/malloc.h>
56#include <sys/mbuf.h>
57#include <sys/mount.h>
58#include <sys/msgbuf.h>
59#include <sys/ioctl.h>
60#include <sys/sysent.h>
61#include <sys/tty.h>
62#include <sys/sysctl.h>
63#include <sys/devconf.h>
64#include <sys/vmmeter.h>
65
66#ifdef SYSVSHM
67#include <sys/shm.h>
68#endif
69
70#ifdef SYSVMSG
71#include <sys/msg.h>
72#endif
73
74#ifdef SYSVSEM
75#include <sys/sem.h>
76#endif
77
78#include <vm/vm.h>
79#include <vm/vm_param.h>
80#include <vm/vm_prot.h>
81#include <vm/lock.h>
82#include <vm/vm_kern.h>
83#include <vm/vm_object.h>
84#include <vm/vm_page.h>
85#include <vm/vm_map.h>
86#include <vm/vm_pager.h>
87#include <vm/vm_extern.h>
88
89#include <sys/user.h>
90#include <sys/exec.h>
91#include <sys/vnode.h>
92
93#include <ddb/ddb.h>
94
95#include <net/netisr.h>
96
97#include <machine/cpu.h>
98#include <machine/npx.h>
99#include <machine/reg.h>
100#include <machine/psl.h>
101#include <machine/clock.h>
102#include <machine/specialreg.h>
103#include <machine/sysarch.h>
104#include <machine/cons.h>
105#include <machine/devconf.h>
106#include <machine/bootinfo.h>
107#include <machine/md_var.h>
108
109#include <i386/isa/isa.h>
110#include <i386/isa/isa_device.h>
111#include <i386/isa/rtc.h>
112#include <machine/random.h>
113
114extern void init386 __P((int first));
115extern int ptrace_set_pc __P((struct proc *p, unsigned int addr));
116extern int ptrace_single_step __P((struct proc *p));
117extern int ptrace_write_u __P((struct proc *p, vm_offset_t off, int data));
118
119static void cpu_startup __P((void *));
120SYSINIT(cpu, SI_SUB_CPU, SI_ORDER_FIRST, cpu_startup, NULL)
121
122static void identifycpu(void);
123
124char machine[] = "i386";
125SYSCTL_STRING(_hw, HW_MACHINE, machine, CTLFLAG_RD, machine, 0, "");
126
127static char cpu_model[128];
128SYSCTL_STRING(_hw, HW_MODEL, model, CTLFLAG_RD, cpu_model, 0, "");
129
130struct kern_devconf kdc_cpu0 = {
131	0, 0, 0,		/* filled in by dev_attach */
132	"cpu", 0, { MDDT_CPU },
133	0, 0, 0, CPU_EXTERNALLEN,
134	0,			/* CPU has no parent */
135	0,			/* no parentdata */
136	DC_BUSY,		/* the CPU is always busy */
137	cpu_model,		/* no sense in duplication */
138	DC_CLS_CPU		/* class */
139};
140
141#ifndef PANIC_REBOOT_WAIT_TIME
142#define PANIC_REBOOT_WAIT_TIME 15 /* default to 15 seconds */
143#endif
144
145#ifdef BOUNCE_BUFFERS
146extern char *bouncememory;
147extern int maxbkva;
148#ifdef BOUNCEPAGES
149int	bouncepages = BOUNCEPAGES;
150#else
151int	bouncepages = 0;
152#endif
153#endif	/* BOUNCE_BUFFERS */
154
155extern int freebufspace;
156int	msgbufmapped = 0;		/* set when safe to use msgbuf */
157int _udatasel, _ucodesel;
158
159
160int physmem = 0;
161
162static int
163sysctl_hw_physmem SYSCTL_HANDLER_ARGS
164{
165	int error = sysctl_handle_int(oidp, 0, ctob(physmem), req);
166	return (error);
167}
168
169SYSCTL_PROC(_hw, HW_PHYSMEM, physmem, CTLTYPE_INT|CTLFLAG_RD,
170	0, 0, sysctl_hw_physmem, "I", "");
171
172static int
173sysctl_hw_usermem SYSCTL_HANDLER_ARGS
174{
175	int error = sysctl_handle_int(oidp, 0,
176		ctob(physmem - cnt.v_wire_count), req);
177	return (error);
178}
179
180SYSCTL_PROC(_hw, HW_USERMEM, usermem, CTLTYPE_INT|CTLFLAG_RD,
181	0, 0, sysctl_hw_usermem, "I", "");
182
183int boothowto = 0, bootverbose = 0, Maxmem = 0;
184static int	badpages = 0;
185long dumplo;
186extern int bootdev;
187
188vm_offset_t phys_avail[10];
189
190/* must be 2 less so 0 0 can signal end of chunks */
191#define PHYS_AVAIL_ARRAY_END ((sizeof(phys_avail) / sizeof(vm_offset_t)) - 2)
192
193int cpu_class;
194
195static void dumpsys __P((void));
196static void setup_netisrs __P((struct linker_set *)); /* XXX declare elsewhere */
197
198static vm_offset_t buffer_sva, buffer_eva;
199vm_offset_t clean_sva, clean_eva;
200static vm_offset_t pager_sva, pager_eva;
201extern struct linker_set netisr_set;
202
203#define offsetof(type, member)	((size_t)(&((type *)0)->member))
204
205static void
206cpu_startup(dummy)
207	void *dummy;
208{
209	register unsigned i;
210	register caddr_t v;
211	vm_offset_t maxaddr;
212	vm_size_t size = 0;
213	int firstaddr;
214	vm_offset_t minaddr;
215
216	if (boothowto & RB_VERBOSE)
217		bootverbose++;
218
219	/*
220	 * Initialize error message buffer (at end of core).
221	 */
222
223	/* avail_end was pre-decremented in init_386() to compensate */
224	for (i = 0; i < btoc(sizeof (struct msgbuf)); i++)
225		pmap_enter(pmap_kernel(), (vm_offset_t)msgbufp,
226			   avail_end + i * NBPG,
227			   VM_PROT_ALL, TRUE);
228	msgbufmapped = 1;
229
230	/*
231	 * Good {morning,afternoon,evening,night}.
232	 */
233	printf(version);
234	startrtclock();
235	identifycpu();
236	printf("real memory  = %d (%dK bytes)\n", ptoa(Maxmem), ptoa(Maxmem) / 1024);
237	/*
238	 * Display any holes after the first chunk of extended memory.
239	 */
240	if (badpages != 0) {
241		int indx = 1;
242
243		/*
244		 * XXX skip reporting ISA hole & unmanaged kernel memory
245		 */
246		if (phys_avail[0] == PAGE_SIZE)
247			indx += 2;
248
249		printf("Physical memory hole(s):\n");
250		for (; phys_avail[indx + 1] != 0; indx += 2) {
251			int size = phys_avail[indx + 1] - phys_avail[indx];
252
253			printf("0x%08lx - 0x%08lx, %d bytes (%d pages)\n", phys_avail[indx],
254			    phys_avail[indx + 1] - 1, size, size / PAGE_SIZE);
255		}
256	}
257
258	/*
259	 * Quickly wire in netisrs.
260	 */
261	setup_netisrs(&netisr_set);
262
263/*
264#ifdef ISDN
265	DONET(isdnintr, NETISR_ISDN);
266#endif
267*/
268
269	/*
270	 * Allocate space for system data structures.
271	 * The first available kernel virtual address is in "v".
272	 * As pages of kernel virtual memory are allocated, "v" is incremented.
273	 * As pages of memory are allocated and cleared,
274	 * "firstaddr" is incremented.
275	 * An index into the kernel page table corresponding to the
276	 * virtual memory address maintained in "v" is kept in "mapaddr".
277	 */
278
279	/*
280	 * Make two passes.  The first pass calculates how much memory is
281	 * needed and allocates it.  The second pass assigns virtual
282	 * addresses to the various data structures.
283	 */
284	firstaddr = 0;
285again:
286	v = (caddr_t)firstaddr;
287
288#define	valloc(name, type, num) \
289	    (name) = (type *)v; v = (caddr_t)((name)+(num))
290#define	valloclim(name, type, num, lim) \
291	    (name) = (type *)v; v = (caddr_t)((lim) = ((name)+(num)))
292	valloc(callout, struct callout, ncallout);
293#ifdef SYSVSHM
294	valloc(shmsegs, struct shmid_ds, shminfo.shmmni);
295#endif
296#ifdef SYSVSEM
297	valloc(sema, struct semid_ds, seminfo.semmni);
298	valloc(sem, struct sem, seminfo.semmns);
299	/* This is pretty disgusting! */
300	valloc(semu, int, (seminfo.semmnu * seminfo.semusz) / sizeof(int));
301#endif
302#ifdef SYSVMSG
303	valloc(msgpool, char, msginfo.msgmax);
304	valloc(msgmaps, struct msgmap, msginfo.msgseg);
305	valloc(msghdrs, struct msg, msginfo.msgtql);
306	valloc(msqids, struct msqid_ds, msginfo.msgmni);
307#endif
308
309	if (nbuf == 0) {
310		nbuf = 30;
311		if( physmem > 1024)
312			nbuf += min((physmem - 1024) / 12, 1024);
313	}
314	nswbuf = min(nbuf, 128);
315
316	valloc(swbuf, struct buf, nswbuf);
317	valloc(buf, struct buf, nbuf);
318
319#ifdef BOUNCE_BUFFERS
320	/*
321	 * If there is more than 16MB of memory, allocate some bounce buffers
322	 */
323	if (Maxmem > 4096) {
324		if (bouncepages == 0) {
325			bouncepages = 64;
326			bouncepages += ((Maxmem - 4096) / 2048) * 32;
327		}
328		v = (caddr_t)((vm_offset_t)((vm_offset_t)v + PAGE_SIZE - 1) & ~(PAGE_SIZE - 1));
329		valloc(bouncememory, char, bouncepages * PAGE_SIZE);
330	}
331#endif
332
333	/*
334	 * End of first pass, size has been calculated so allocate memory
335	 */
336	if (firstaddr == 0) {
337		size = (vm_size_t)(v - firstaddr);
338		firstaddr = (int)kmem_alloc(kernel_map, round_page(size));
339		if (firstaddr == 0)
340			panic("startup: no room for tables");
341		goto again;
342	}
343
344	/*
345	 * End of second pass, addresses have been assigned
346	 */
347	if ((vm_size_t)(v - firstaddr) != size)
348		panic("startup: table size inconsistency");
349
350#ifdef BOUNCE_BUFFERS
351	clean_map = kmem_suballoc(kernel_map, &clean_sva, &clean_eva,
352			(nbuf*MAXBSIZE) + (nswbuf*MAXPHYS) +
353				maxbkva + pager_map_size, TRUE);
354	io_map = kmem_suballoc(clean_map, &minaddr, &maxaddr, maxbkva, FALSE);
355#else
356	clean_map = kmem_suballoc(kernel_map, &clean_sva, &clean_eva,
357			(nbuf*MAXBSIZE) + (nswbuf*MAXPHYS) + pager_map_size, TRUE);
358#endif
359	buffer_map = kmem_suballoc(clean_map, &buffer_sva, &buffer_eva,
360				(nbuf*MAXBSIZE), TRUE);
361	pager_map = kmem_suballoc(clean_map, &pager_sva, &pager_eva,
362				(nswbuf*MAXPHYS) + pager_map_size, TRUE);
363	exec_map = kmem_suballoc(kernel_map, &minaddr, &maxaddr,
364				(16*ARG_MAX), TRUE);
365	u_map = kmem_suballoc(kernel_map, &minaddr, &maxaddr,
366				(maxproc*UPAGES*PAGE_SIZE), FALSE);
367
368	/*
369	 * Finally, allocate mbuf pool.  Since mclrefcnt is an off-size
370	 * we use the more space efficient malloc in place of kmem_alloc.
371	 */
372	mclrefcnt = (char *)malloc(nmbclusters+CLBYTES/MCLBYTES,
373				   M_MBUF, M_NOWAIT);
374	bzero(mclrefcnt, nmbclusters+CLBYTES/MCLBYTES);
375	mb_map = kmem_suballoc(kmem_map, (vm_offset_t *)&mbutl, &maxaddr,
376			       nmbclusters * MCLBYTES, FALSE);
377	/*
378	 * Initialize callouts
379	 */
380	callfree = callout;
381	for (i = 1; i < ncallout; i++)
382		callout[i-1].c_next = &callout[i];
383
384        if (boothowto & RB_CONFIG) {
385		userconfig();
386		cninit();	/* the preferred console may have changed */
387	}
388
389#ifdef BOUNCE_BUFFERS
390	/*
391	 * init bounce buffers
392	 */
393	vm_bounce_init();
394#endif
395	/*
396	 * XXX allocate a contiguous area for ISA (non busmaster) DMA
397	 * operations. This _should_ only be done if the DMA channels
398	 * will actually be used, but for now we do it always.
399	 */
400#define DMAPAGES 8
401	isaphysmem =
402	    vm_page_alloc_contig(DMAPAGES * PAGE_SIZE, 0, 0xfffffful, 64*1024);
403
404	printf("avail memory = %d (%dK bytes)\n", ptoa(cnt.v_free_count),
405	    ptoa(cnt.v_free_count) / 1024);
406
407	/*
408	 * Set up buffers, so they can be used to read disk labels.
409	 */
410	bufinit();
411	vm_pager_bufferinit();
412
413	/*
414	 * In verbose mode, print out the BIOS's idea of the disk geometries.
415	 */
416	if (bootverbose) {
417		printf("BIOS Geometries:\n");
418		for (i = 0; i < N_BIOS_GEOM; i++) {
419			unsigned long bios_geom;
420			int max_cylinder, max_head, max_sector;
421
422			bios_geom = bootinfo.bi_bios_geom[i];
423
424			/*
425			 * XXX the bootstrap punts a 1200K floppy geometry
426			 * when the get-disk-geometry interrupt fails.  Skip
427			 * drives that have this geometry.
428			 */
429			if (bios_geom == 0x4f010f)
430				continue;
431
432			printf(" %x:%08lx ", i, bios_geom);
433			max_cylinder = bios_geom >> 16;
434			max_head = (bios_geom >> 8) & 0xff;
435			max_sector = bios_geom & 0xff;
436			printf(
437		"0..%d=%d cylinders, 0..%d=%d heads, 1..%d=%d sectors\n",
438			       max_cylinder, max_cylinder + 1,
439			       max_head, max_head + 1,
440			       max_sector, max_sector);
441		}
442		printf(" %d accounted for\n", bootinfo.bi_n_bios_used);
443	}
444}
445
446int
447register_netisr(num, handler)
448	int num;
449	netisr_t *handler;
450{
451
452	if (num < 0 || num >= (sizeof(netisrs)/sizeof(*netisrs)) ) {
453		printf("register_netisr: bad isr number: %d\n", num);
454		return (EINVAL);
455	}
456	netisrs[num] = handler;
457	return (0);
458}
459
460static void
461setup_netisrs(ls)
462	struct linker_set *ls;
463{
464	int i;
465	const struct netisrtab *nit;
466
467	for(i = 0; ls->ls_items[i]; i++) {
468		nit = (const struct netisrtab *)ls->ls_items[i];
469		register_netisr(nit->nit_num, nit->nit_isr);
470	}
471}
472
473static struct cpu_nameclass i386_cpus[] = {
474	{ "Intel 80286",	CPUCLASS_286 },		/* CPU_286   */
475	{ "i386SX",		CPUCLASS_386 },		/* CPU_386SX */
476	{ "i386DX",		CPUCLASS_386 },		/* CPU_386   */
477	{ "i486SX",		CPUCLASS_486 },		/* CPU_486SX */
478	{ "i486DX",		CPUCLASS_486 },		/* CPU_486   */
479	{ "Pentium",		CPUCLASS_586 },		/* CPU_586   */
480	{ "Cy486DLC",		CPUCLASS_486 },		/* CPU_486DLC */
481};
482
483static void
484identifycpu()
485{
486	printf("CPU: ");
487	if (cpu >= 0
488	    && cpu < (sizeof i386_cpus/sizeof(struct cpu_nameclass))) {
489		cpu_class = i386_cpus[cpu].cpu_class;
490		strncpy(cpu_model, i386_cpus[cpu].cpu_name, sizeof cpu_model);
491	} else {
492		printf("unknown cpu type %d\n", cpu);
493		panic("startup: bad cpu id");
494	}
495
496#if defined(I586_CPU)
497	if(cpu_class == CPUCLASS_586) {
498		calibrate_cyclecounter();
499	}
500#endif
501#if defined(I486_CPU) || defined(I586_CPU)
502	if (!strcmp(cpu_vendor,"GenuineIntel")) {
503		if ((cpu_id & 0xf00) > 3) {
504			cpu_model[0] = '\0';
505
506			switch (cpu_id & 0x3000) {
507			case 0x1000:
508				strcpy(cpu_model, "Overdrive ");
509				break;
510			case 0x2000:
511				strcpy(cpu_model, "Dual ");
512				break;
513			}
514			if ((cpu_id & 0xf00) == 0x400) {
515				strcat(cpu_model, "i486 ");
516#if defined(I586_CPU)
517			} else if ((cpu_id & 0xf00) == 0x500) {
518				strcat(cpu_model, "Pentium"); /* nb no space */
519#endif
520			} else {
521				strcat(cpu_model, "unknown ");
522			}
523
524			switch (cpu_id & 0xff0) {
525			case 0x400:
526				strcat(cpu_model, "DX"); break;
527			case 0x410:
528				strcat(cpu_model, "DX"); break;
529			case 0x420:
530				strcat(cpu_model, "SX"); break;
531			case 0x430:
532				strcat(cpu_model, "DX2"); break;
533			case 0x440:
534				strcat(cpu_model, "SL"); break;
535			case 0x450:
536				strcat(cpu_model, "SX2"); break;
537			case 0x470:
538				strcat(cpu_model, "DX2 Write-Back Enhanced");
539				break;
540			case 0x480:
541				strcat(cpu_model, "DX4"); break;
542#if defined(I586_CPU)
543			case 0x510:
544			case 0x520:
545				/*
546				 * We used to do all sorts of nonsense here
547				 * to print out iCOMP numbers.  Since these
548				 * are meaningless except to Intel
549				 * marketroids, there seems to be little
550				 * sense in doing so.
551				 */
552				break;
553#endif
554			}
555		}
556	}
557#endif
558	printf("%s (", cpu_model);
559	switch(cpu_class) {
560	case CPUCLASS_286:
561		printf("286");
562		break;
563#if defined(I386_CPU)
564	case CPUCLASS_386:
565		printf("386");
566		break;
567#endif
568#if defined(I486_CPU)
569	case CPUCLASS_486:
570		printf("486");
571		break;
572#endif
573#if defined(I586_CPU)
574	case CPUCLASS_586:
575		printf("%d.%02d-MHz ",
576		       ((100 * i586_ctr_rate) >> I586_CTR_RATE_SHIFT) / 100,
577		       ((100 * i586_ctr_rate) >> I586_CTR_RATE_SHIFT) % 100);
578		printf("586");
579		break;
580#endif
581	default:
582		printf("unknown");	/* will panic below... */
583	}
584	printf("-class CPU)\n");
585#if defined(I486_CPU) || defined(I586_CPU)
586	if(*cpu_vendor)
587		printf("  Origin = \"%s\"",cpu_vendor);
588	if(cpu_id)
589		printf("  Id = 0x%lx",cpu_id);
590
591	if (!strcmp(cpu_vendor, "GenuineIntel")) {
592		printf("  Stepping=%ld", cpu_id & 0xf);
593		if (cpu_high > 0) {
594#define FEATUREFMT "\020\001FPU\002VME\003PSE\004MCE\005CX8\006APIC"
595			printf("\n  Features=0x%b", cpu_feature, FEATUREFMT);
596		}
597	}
598	/* Avoid ugly blank lines: only print newline when we have to. */
599	if (*cpu_vendor || cpu_id)
600		printf("\n");
601#endif
602	/*
603	 * Now that we have told the user what they have,
604	 * let them know if that machine type isn't configured.
605	 */
606	switch (cpu_class) {
607	case CPUCLASS_286:	/* a 286 should not make it this far, anyway */
608#if !defined(I386_CPU) && !defined(I486_CPU) && !defined(I586_CPU)
609#error This kernel is not configured for one of the supported CPUs
610#endif
611#if !defined(I386_CPU)
612	case CPUCLASS_386:
613#endif
614#if !defined(I486_CPU)
615	case CPUCLASS_486:
616#endif
617#if !defined(I586_CPU)
618	case CPUCLASS_586:
619#endif
620		panic("CPU class not configured");
621	default:
622		break;
623	}
624	dev_attach(&kdc_cpu0);
625}
626
627/*
628 * Send an interrupt to process.
629 *
630 * Stack is set up to allow sigcode stored
631 * in u. to call routine, followed by kcall
632 * to sigreturn routine below.  After sigreturn
633 * resets the signal mask, the stack, and the
634 * frame pointer, it returns to the user
635 * specified pc, psl.
636 */
637void
638sendsig(catcher, sig, mask, code)
639	sig_t catcher;
640	int sig, mask;
641	unsigned code;
642{
643	register struct proc *p = curproc;
644	register int *regs;
645	register struct sigframe *fp;
646	struct sigframe sf;
647	struct sigacts *psp = p->p_sigacts;
648	int oonstack;
649
650	regs = p->p_md.md_regs;
651        oonstack = psp->ps_sigstk.ss_flags & SA_ONSTACK;
652	/*
653	 * Allocate and validate space for the signal handler
654	 * context. Note that if the stack is in P0 space, the
655	 * call to grow() is a nop, and the useracc() check
656	 * will fail if the process has not already allocated
657	 * the space with a `brk'.
658	 */
659        if ((psp->ps_flags & SAS_ALTSTACK) &&
660	    (psp->ps_sigstk.ss_flags & SA_ONSTACK) == 0 &&
661	    (psp->ps_sigonstack & sigmask(sig))) {
662		fp = (struct sigframe *)(psp->ps_sigstk.ss_sp +
663		    psp->ps_sigstk.ss_size - sizeof(struct sigframe));
664		psp->ps_sigstk.ss_flags |= SA_ONSTACK;
665	} else {
666		fp = (struct sigframe *)(regs[tESP]
667			- sizeof(struct sigframe));
668	}
669
670	/*
671	 * grow() will return FALSE if the fp will not fit inside the stack
672	 *	and the stack can not be grown. useracc will return FALSE
673	 *	if access is denied.
674	 */
675	if ((grow(p, (int)fp) == FALSE) ||
676	    (useracc((caddr_t)fp, sizeof (struct sigframe), B_WRITE) == FALSE)) {
677		/*
678		 * Process has trashed its stack; give it an illegal
679		 * instruction to halt it in its tracks.
680		 */
681		SIGACTION(p, SIGILL) = SIG_DFL;
682		sig = sigmask(SIGILL);
683		p->p_sigignore &= ~sig;
684		p->p_sigcatch &= ~sig;
685		p->p_sigmask &= ~sig;
686		psignal(p, SIGILL);
687		return;
688	}
689
690	/*
691	 * Build the argument list for the signal handler.
692	 */
693	if (p->p_sysent->sv_sigtbl) {
694		if (sig < p->p_sysent->sv_sigsize)
695			sig = p->p_sysent->sv_sigtbl[sig];
696		else
697			sig = p->p_sysent->sv_sigsize + 1;
698	}
699	sf.sf_signum = sig;
700	sf.sf_code = code;
701	sf.sf_scp = &fp->sf_sc;
702	sf.sf_addr = (char *) regs[tERR];
703	sf.sf_handler = catcher;
704
705	/* save scratch registers */
706	sf.sf_sc.sc_eax = regs[tEAX];
707	sf.sf_sc.sc_ebx = regs[tEBX];
708	sf.sf_sc.sc_ecx = regs[tECX];
709	sf.sf_sc.sc_edx = regs[tEDX];
710	sf.sf_sc.sc_esi = regs[tESI];
711	sf.sf_sc.sc_edi = regs[tEDI];
712	sf.sf_sc.sc_cs = regs[tCS];
713	sf.sf_sc.sc_ds = regs[tDS];
714	sf.sf_sc.sc_ss = regs[tSS];
715	sf.sf_sc.sc_es = regs[tES];
716	sf.sf_sc.sc_isp = regs[tISP];
717
718	/*
719	 * Build the signal context to be used by sigreturn.
720	 */
721	sf.sf_sc.sc_onstack = oonstack;
722	sf.sf_sc.sc_mask = mask;
723	sf.sf_sc.sc_sp = regs[tESP];
724	sf.sf_sc.sc_fp = regs[tEBP];
725	sf.sf_sc.sc_pc = regs[tEIP];
726	sf.sf_sc.sc_ps = regs[tEFLAGS];
727
728	/*
729	 * Copy the sigframe out to the user's stack.
730	 */
731	if (copyout(&sf, fp, sizeof(struct sigframe)) != 0) {
732		/*
733		 * Something is wrong with the stack pointer.
734		 * ...Kill the process.
735		 */
736		sigexit(p, SIGILL);
737	};
738
739	regs[tESP] = (int)fp;
740	regs[tEIP] = (int)((struct pcb *)kstack)->pcb_sigc;
741	regs[tEFLAGS] &= ~PSL_VM;
742	regs[tCS] = _ucodesel;
743	regs[tDS] = _udatasel;
744	regs[tES] = _udatasel;
745	regs[tSS] = _udatasel;
746}
747
748/*
749 * System call to cleanup state after a signal
750 * has been taken.  Reset signal mask and
751 * stack state from context left by sendsig (above).
752 * Return to previous pc and psl as specified by
753 * context left by sendsig. Check carefully to
754 * make sure that the user has not modified the
755 * state to gain improper privileges.
756 */
757int
758sigreturn(p, uap, retval)
759	struct proc *p;
760	struct sigreturn_args /* {
761		struct sigcontext *sigcntxp;
762	} */ *uap;
763	int *retval;
764{
765	register struct sigcontext *scp;
766	register struct sigframe *fp;
767	register int *regs = p->p_md.md_regs;
768	int eflags;
769
770	/*
771	 * (XXX old comment) regs[tESP] points to the return address.
772	 * The user scp pointer is above that.
773	 * The return address is faked in the signal trampoline code
774	 * for consistency.
775	 */
776	scp = uap->sigcntxp;
777	fp = (struct sigframe *)
778	     ((caddr_t)scp - offsetof(struct sigframe, sf_sc));
779
780	if (useracc((caddr_t)fp, sizeof (*fp), 0) == 0)
781		return(EINVAL);
782
783	/*
784	 * Don't allow users to change privileged or reserved flags.
785	 */
786#define	EFLAGS_SECURE(ef, oef)	((((ef) ^ (oef)) & ~PSL_USERCHANGE) == 0)
787	eflags = scp->sc_ps;
788	/*
789	 * XXX do allow users to change the privileged flag PSL_RF.  The
790	 * cpu sets PSL_RF in tf_eflags for faults.  Debuggers should
791	 * sometimes set it there too.  tf_eflags is kept in the signal
792	 * context during signal handling and there is no other place
793	 * to remember it, so the PSL_RF bit may be corrupted by the
794	 * signal handler without us knowing.  Corruption of the PSL_RF
795	 * bit at worst causes one more or one less debugger trap, so
796	 * allowing it is fairly harmless.
797	 */
798	if (!EFLAGS_SECURE(eflags & ~PSL_RF, regs[tEFLAGS] & ~PSL_RF)) {
799#ifdef DEBUG
800    		printf("sigreturn: eflags = 0x%x\n", eflags);
801#endif
802    		return(EINVAL);
803	}
804
805	/*
806	 * Don't allow users to load a valid privileged %cs.  Let the
807	 * hardware check for invalid selectors, excess privilege in
808	 * other selectors, invalid %eip's and invalid %esp's.
809	 */
810#define	CS_SECURE(cs)	(ISPL(cs) == SEL_UPL)
811	if (!CS_SECURE(scp->sc_cs)) {
812#ifdef DEBUG
813    		printf("sigreturn: cs = 0x%x\n", scp->sc_cs);
814#endif
815		trapsignal(p, SIGBUS, T_PROTFLT);
816		return(EINVAL);
817	}
818
819	/* restore scratch registers */
820	regs[tEAX] = scp->sc_eax;
821	regs[tEBX] = scp->sc_ebx;
822	regs[tECX] = scp->sc_ecx;
823	regs[tEDX] = scp->sc_edx;
824	regs[tESI] = scp->sc_esi;
825	regs[tEDI] = scp->sc_edi;
826	regs[tCS] = scp->sc_cs;
827	regs[tDS] = scp->sc_ds;
828	regs[tES] = scp->sc_es;
829	regs[tSS] = scp->sc_ss;
830	regs[tISP] = scp->sc_isp;
831
832	if (useracc((caddr_t)scp, sizeof (*scp), 0) == 0)
833		return(EINVAL);
834
835	if (scp->sc_onstack & 01)
836		p->p_sigacts->ps_sigstk.ss_flags |= SA_ONSTACK;
837	else
838		p->p_sigacts->ps_sigstk.ss_flags &= ~SA_ONSTACK;
839	p->p_sigmask = scp->sc_mask &~
840	    (sigmask(SIGKILL)|sigmask(SIGCONT)|sigmask(SIGSTOP));
841	regs[tEBP] = scp->sc_fp;
842	regs[tESP] = scp->sc_sp;
843	regs[tEIP] = scp->sc_pc;
844	regs[tEFLAGS] = eflags;
845	return(EJUSTRETURN);
846}
847
848static int	waittime = -1;
849static struct pcb dumppcb;
850
851__dead void
852boot(howto)
853	int howto;
854{
855	if (!cold && (howto & RB_NOSYNC) == 0 && waittime < 0) {
856		register struct buf *bp;
857		int iter, nbusy;
858
859		waittime = 0;
860		printf("\nsyncing disks... ");
861
862		sync(&proc0, NULL, NULL);
863
864		for (iter = 0; iter < 20; iter++) {
865			nbusy = 0;
866			for (bp = &buf[nbuf]; --bp >= buf; ) {
867				if ((bp->b_flags & (B_BUSY | B_INVAL)) == B_BUSY) {
868					nbusy++;
869				}
870			}
871			if (nbusy == 0)
872				break;
873			printf("%d ", nbusy);
874			DELAY(40000 * iter);
875		}
876		if (nbusy) {
877			/*
878			 * Failed to sync all blocks. Indicate this and don't
879			 * unmount filesystems (thus forcing an fsck on reboot).
880			 */
881			printf("giving up\n");
882#ifdef SHOW_BUSYBUFS
883			nbusy = 0;
884			for (bp = &buf[nbuf]; --bp >= buf; ) {
885				if ((bp->b_flags & (B_BUSY | B_INVAL)) == B_BUSY) {
886					nbusy++;
887					printf("%d: dev:%08x, flags:%08x, blkno:%d, lblkno:%d\n", nbusy, bp->b_dev, bp->b_flags, bp->b_blkno, bp->b_lblkno);
888				}
889			}
890			DELAY(5000000);	/* 5 seconds */
891#endif
892		} else {
893			printf("done\n");
894			/*
895			 * Unmount filesystems
896			 */
897			if (panicstr == 0)
898				vfs_unmountall();
899		}
900		DELAY(100000);			/* wait for console output to finish */
901		dev_shutdownall(FALSE);
902	}
903	splhigh();
904	if (howto & RB_HALT) {
905		printf("\n");
906		printf("The operating system has halted.\n");
907		printf("Please press any key to reboot.\n\n");
908		cngetc();
909	} else {
910		if (howto & RB_DUMP) {
911			if (!cold) {
912				savectx(&dumppcb, 0);
913				dumppcb.pcb_ptd = rcr3();
914				dumpsys();
915			}
916
917			if (PANIC_REBOOT_WAIT_TIME != 0) {
918				if (PANIC_REBOOT_WAIT_TIME != -1) {
919					int loop;
920					printf("Automatic reboot in %d seconds - press a key on the console to abort\n",
921						PANIC_REBOOT_WAIT_TIME);
922					for (loop = PANIC_REBOOT_WAIT_TIME * 10; loop > 0; --loop) {
923						DELAY(1000 * 100); /* 1/10th second */
924						if (cncheckc()) /* Did user type a key? */
925							break;
926					}
927					if (!loop)
928						goto die;
929				}
930			} else { /* zero time specified - reboot NOW */
931				goto die;
932			}
933			printf("--> Press a key on the console to reboot <--\n");
934			cngetc();
935		}
936	}
937die:
938	printf("Rebooting...\n");
939	DELAY(1000000);	/* wait 1 sec for printf's to complete and be read */
940	cpu_reset();
941	for(;;) ;
942	/* NOTREACHED */
943}
944
945/*
946 * Magic number for savecore
947 *
948 * exported (symorder) and used at least by savecore(8)
949 *
950 */
951u_long		dumpmag = 0x8fca0101UL;
952
953static int	dumpsize = 0;		/* also for savecore */
954
955static int	dodump = 1;
956SYSCTL_INT(_machdep, OID_AUTO, do_dump, CTLFLAG_RW, &dodump, 0, "");
957
958/*
959 * Doadump comes here after turning off memory management and
960 * getting on the dump stack, either when called above, or by
961 * the auto-restart code.
962 */
963static void
964dumpsys()
965{
966
967	if (!dodump)
968		return;
969	if (dumpdev == NODEV)
970		return;
971	if ((minor(dumpdev)&07) != 1)
972		return;
973	dumpsize = Maxmem;
974	printf("\ndumping to dev %lx, offset %ld\n", dumpdev, dumplo);
975	printf("dump ");
976	switch ((*bdevsw[major(dumpdev)]->d_dump)(dumpdev)) {
977
978	case ENXIO:
979		printf("device bad\n");
980		break;
981
982	case EFAULT:
983		printf("device not ready\n");
984		break;
985
986	case EINVAL:
987		printf("area improper\n");
988		break;
989
990	case EIO:
991		printf("i/o error\n");
992		break;
993
994	case EINTR:
995		printf("aborted from console\n");
996		break;
997
998	default:
999		printf("succeeded\n");
1000		break;
1001	}
1002}
1003
1004/*
1005 * Clear registers on exec
1006 */
1007void
1008setregs(p, entry, stack)
1009	struct proc *p;
1010	u_long entry;
1011	u_long stack;
1012{
1013	int *regs = p->p_md.md_regs;
1014
1015	bzero(regs, sizeof(struct trapframe));
1016	regs[tEIP] = entry;
1017	regs[tESP] = stack;
1018	regs[tEFLAGS] = PSL_USER | (regs[tEFLAGS] & PSL_T);
1019	regs[tSS] = _udatasel;
1020	regs[tDS] = _udatasel;
1021	regs[tES] = _udatasel;
1022	regs[tCS] = _ucodesel;
1023
1024	p->p_addr->u_pcb.pcb_flags = 0;	/* no fp at all */
1025	load_cr0(rcr0() | CR0_TS);	/* start emulating */
1026#if	NNPX > 0
1027	npxinit(__INITIAL_NPXCW__);
1028#endif	/* NNPX > 0 */
1029}
1030
1031static int
1032sysctl_machdep_adjkerntz SYSCTL_HANDLER_ARGS
1033{
1034	int error;
1035	error = sysctl_handle_int(oidp, oidp->oid_arg1, oidp->oid_arg2,
1036		req);
1037	if (!error && req->newptr)
1038		resettodr();
1039	return (error);
1040}
1041
1042SYSCTL_PROC(_machdep, CPU_ADJKERNTZ, adjkerntz, CTLTYPE_INT|CTLFLAG_RW,
1043	&adjkerntz, 0, sysctl_machdep_adjkerntz, "I", "");
1044
1045SYSCTL_INT(_machdep, CPU_DISRTCSET, disable_rtc_set,
1046	CTLFLAG_RW, &disable_rtc_set, 0, "");
1047
1048SYSCTL_STRUCT(_machdep, CPU_BOOTINFO, bootinfo,
1049	CTLFLAG_RD, &bootinfo, bootinfo, "");
1050
1051/*
1052 * Initialize 386 and configure to run kernel
1053 */
1054
1055/*
1056 * Initialize segments & interrupt table
1057 */
1058
1059int currentldt;
1060int _default_ldt;
1061union descriptor gdt[NGDT];		/* global descriptor table */
1062struct gate_descriptor idt[NIDT];	/* interrupt descriptor table */
1063union descriptor ldt[NLDT];		/* local descriptor table */
1064
1065static struct	i386tss	tss, panic_tss;
1066
1067extern  struct user *proc0paddr;
1068
1069/* software prototypes -- in more palatable form */
1070struct soft_segment_descriptor gdt_segs[] = {
1071/* GNULL_SEL	0 Null Descriptor */
1072{	0x0,			/* segment base address  */
1073	0x0,			/* length */
1074	0,			/* segment type */
1075	0,			/* segment descriptor priority level */
1076	0,			/* segment descriptor present */
1077	0, 0,
1078	0,			/* default 32 vs 16 bit size */
1079	0  			/* limit granularity (byte/page units)*/ },
1080/* GCODE_SEL	1 Code Descriptor for kernel */
1081{	0x0,			/* segment base address  */
1082	0xfffff,		/* length - all address space */
1083	SDT_MEMERA,		/* segment type */
1084	0,			/* segment descriptor priority level */
1085	1,			/* segment descriptor present */
1086	0, 0,
1087	1,			/* default 32 vs 16 bit size */
1088	1  			/* limit granularity (byte/page units)*/ },
1089/* GDATA_SEL	2 Data Descriptor for kernel */
1090{	0x0,			/* segment base address  */
1091	0xfffff,		/* length - all address space */
1092	SDT_MEMRWA,		/* segment type */
1093	0,			/* segment descriptor priority level */
1094	1,			/* segment descriptor present */
1095	0, 0,
1096	1,			/* default 32 vs 16 bit size */
1097	1  			/* limit granularity (byte/page units)*/ },
1098/* GLDT_SEL	3 LDT Descriptor */
1099{	(int) ldt,		/* segment base address  */
1100	sizeof(ldt)-1,		/* length - all address space */
1101	SDT_SYSLDT,		/* segment type */
1102	0,			/* segment descriptor priority level */
1103	1,			/* segment descriptor present */
1104	0, 0,
1105	0,			/* unused - default 32 vs 16 bit size */
1106	0  			/* limit granularity (byte/page units)*/ },
1107/* GTGATE_SEL	4 Null Descriptor - Placeholder */
1108{	0x0,			/* segment base address  */
1109	0x0,			/* length - all address space */
1110	0,			/* segment type */
1111	0,			/* segment descriptor priority level */
1112	0,			/* segment descriptor present */
1113	0, 0,
1114	0,			/* default 32 vs 16 bit size */
1115	0  			/* limit granularity (byte/page units)*/ },
1116/* GPANIC_SEL	5 Panic Tss Descriptor */
1117{	(int) &panic_tss,	/* segment base address  */
1118	sizeof(tss)-1,		/* length - all address space */
1119	SDT_SYS386TSS,		/* segment type */
1120	0,			/* segment descriptor priority level */
1121	1,			/* segment descriptor present */
1122	0, 0,
1123	0,			/* unused - default 32 vs 16 bit size */
1124	0  			/* limit granularity (byte/page units)*/ },
1125/* GPROC0_SEL	6 Proc 0 Tss Descriptor */
1126{	(int) kstack,		/* segment base address  */
1127	sizeof(tss)-1,		/* length - all address space */
1128	SDT_SYS386TSS,		/* segment type */
1129	0,			/* segment descriptor priority level */
1130	1,			/* segment descriptor present */
1131	0, 0,
1132	0,			/* unused - default 32 vs 16 bit size */
1133	0  			/* limit granularity (byte/page units)*/ },
1134/* GUSERLDT_SEL	7 User LDT Descriptor per process */
1135{	(int) ldt,		/* segment base address  */
1136	(512 * sizeof(union descriptor)-1),		/* length */
1137	SDT_SYSLDT,		/* segment type */
1138	0,			/* segment descriptor priority level */
1139	1,			/* segment descriptor present */
1140	0, 0,
1141	0,			/* unused - default 32 vs 16 bit size */
1142	0  			/* limit granularity (byte/page units)*/ },
1143/* GAPMCODE32_SEL 8 APM BIOS 32-bit interface (32bit Code) */
1144{	0,			/* segment base address (overwritten by APM)  */
1145	0xfffff,		/* length */
1146	SDT_MEMERA,		/* segment type */
1147	0,			/* segment descriptor priority level */
1148	1,			/* segment descriptor present */
1149	0, 0,
1150	1,			/* default 32 vs 16 bit size */
1151	1  			/* limit granularity (byte/page units)*/ },
1152/* GAPMCODE16_SEL 9 APM BIOS 32-bit interface (16bit Code) */
1153{	0,			/* segment base address (overwritten by APM)  */
1154	0xfffff,		/* length */
1155	SDT_MEMERA,		/* segment type */
1156	0,			/* segment descriptor priority level */
1157	1,			/* segment descriptor present */
1158	0, 0,
1159	0,			/* default 32 vs 16 bit size */
1160	1  			/* limit granularity (byte/page units)*/ },
1161/* GAPMDATA_SEL	10 APM BIOS 32-bit interface (Data) */
1162{	0,			/* segment base address (overwritten by APM) */
1163	0xfffff,		/* length */
1164	SDT_MEMRWA,		/* segment type */
1165	0,			/* segment descriptor priority level */
1166	1,			/* segment descriptor present */
1167	0, 0,
1168	1,			/* default 32 vs 16 bit size */
1169	1  			/* limit granularity (byte/page units)*/ },
1170};
1171
1172static struct soft_segment_descriptor ldt_segs[] = {
1173	/* Null Descriptor - overwritten by call gate */
1174{	0x0,			/* segment base address  */
1175	0x0,			/* length - all address space */
1176	0,			/* segment type */
1177	0,			/* segment descriptor priority level */
1178	0,			/* segment descriptor present */
1179	0, 0,
1180	0,			/* default 32 vs 16 bit size */
1181	0  			/* limit granularity (byte/page units)*/ },
1182	/* Null Descriptor - overwritten by call gate */
1183{	0x0,			/* segment base address  */
1184	0x0,			/* length - all address space */
1185	0,			/* segment type */
1186	0,			/* segment descriptor priority level */
1187	0,			/* segment descriptor present */
1188	0, 0,
1189	0,			/* default 32 vs 16 bit size */
1190	0  			/* limit granularity (byte/page units)*/ },
1191	/* Null Descriptor - overwritten by call gate */
1192{	0x0,			/* segment base address  */
1193	0x0,			/* length - all address space */
1194	0,			/* segment type */
1195	0,			/* segment descriptor priority level */
1196	0,			/* segment descriptor present */
1197	0, 0,
1198	0,			/* default 32 vs 16 bit size */
1199	0  			/* limit granularity (byte/page units)*/ },
1200	/* Code Descriptor for user */
1201{	0x0,			/* segment base address  */
1202	0xfffff,		/* length - all address space */
1203	SDT_MEMERA,		/* segment type */
1204	SEL_UPL,		/* segment descriptor priority level */
1205	1,			/* segment descriptor present */
1206	0, 0,
1207	1,			/* default 32 vs 16 bit size */
1208	1  			/* limit granularity (byte/page units)*/ },
1209	/* Data Descriptor for user */
1210{	0x0,			/* segment base address  */
1211	0xfffff,		/* length - all address space */
1212	SDT_MEMRWA,		/* segment type */
1213	SEL_UPL,		/* segment descriptor priority level */
1214	1,			/* segment descriptor present */
1215	0, 0,
1216	1,			/* default 32 vs 16 bit size */
1217	1  			/* limit granularity (byte/page units)*/ },
1218};
1219
1220void
1221setidt(idx, func, typ, dpl)
1222	int idx;
1223	inthand_t *func;
1224	int typ;
1225	int dpl;
1226{
1227	struct gate_descriptor *ip = idt + idx;
1228
1229	ip->gd_looffset = (int)func;
1230	ip->gd_selector = 8;
1231	ip->gd_stkcpy = 0;
1232	ip->gd_xx = 0;
1233	ip->gd_type = typ;
1234	ip->gd_dpl = dpl;
1235	ip->gd_p = 1;
1236	ip->gd_hioffset = ((int)func)>>16 ;
1237}
1238
1239#define	IDTVEC(name)	__CONCAT(X,name)
1240
1241extern inthand_t
1242	IDTVEC(div), IDTVEC(dbg), IDTVEC(nmi), IDTVEC(bpt), IDTVEC(ofl),
1243	IDTVEC(bnd), IDTVEC(ill), IDTVEC(dna), IDTVEC(dble), IDTVEC(fpusegm),
1244	IDTVEC(tss), IDTVEC(missing), IDTVEC(stk), IDTVEC(prot),
1245	IDTVEC(page), IDTVEC(rsvd), IDTVEC(fpu), IDTVEC(align),
1246	IDTVEC(syscall);
1247
1248#if defined(COMPAT_LINUX) || defined(LINUX)
1249extern inthand_t
1250	IDTVEC(linux_syscall);
1251#endif
1252
1253void
1254sdtossd(sd, ssd)
1255	struct segment_descriptor *sd;
1256	struct soft_segment_descriptor *ssd;
1257{
1258	ssd->ssd_base  = (sd->sd_hibase << 24) | sd->sd_lobase;
1259	ssd->ssd_limit = (sd->sd_hilimit << 16) | sd->sd_lolimit;
1260	ssd->ssd_type  = sd->sd_type;
1261	ssd->ssd_dpl   = sd->sd_dpl;
1262	ssd->ssd_p     = sd->sd_p;
1263	ssd->ssd_def32 = sd->sd_def32;
1264	ssd->ssd_gran  = sd->sd_gran;
1265}
1266
1267void
1268init386(first)
1269	int first;
1270{
1271	int x;
1272	unsigned biosbasemem, biosextmem;
1273	struct gate_descriptor *gdp;
1274	int gsel_tss;
1275	/* table descriptors - used to load tables by microp */
1276	struct region_descriptor r_gdt, r_idt;
1277	int	pagesinbase, pagesinext;
1278	int	target_page, pa_indx;
1279
1280	proc0.p_addr = proc0paddr;
1281
1282	/*
1283	 * Initialize the console before we print anything out.
1284	 */
1285	cninit();
1286
1287	/*
1288	 * make gdt memory segments, the code segment goes up to end of the
1289	 * page with etext in it, the data segment goes to the end of
1290	 * the address space
1291	 */
1292	/*
1293	 * XXX text protection is temporarily (?) disabled.  The limit was
1294	 * i386_btop(i386_round_page(etext)) - 1.
1295	 */
1296	gdt_segs[GCODE_SEL].ssd_limit = i386_btop(0) - 1;
1297	gdt_segs[GDATA_SEL].ssd_limit = i386_btop(0) - 1;
1298	for (x = 0; x < NGDT; x++)
1299		ssdtosd(&gdt_segs[x], &gdt[x].sd);
1300
1301	/* make ldt memory segments */
1302	/*
1303	 * The data segment limit must not cover the user area because we
1304	 * don't want the user area to be writable in copyout() etc. (page
1305	 * level protection is lost in kernel mode on 386's).  Also, we
1306	 * don't want the user area to be writable directly (page level
1307	 * protection of the user area is not available on 486's with
1308	 * CR0_WP set, because there is no user-read/kernel-write mode).
1309	 *
1310	 * XXX - VM_MAXUSER_ADDRESS is an end address, not a max.  And it
1311	 * should be spelled ...MAX_USER...
1312	 */
1313#define VM_END_USER_RW_ADDRESS	VM_MAXUSER_ADDRESS
1314	/*
1315	 * The code segment limit has to cover the user area until we move
1316	 * the signal trampoline out of the user area.  This is safe because
1317	 * the code segment cannot be written to directly.
1318	 */
1319#define VM_END_USER_R_ADDRESS	(VM_END_USER_RW_ADDRESS + UPAGES * NBPG)
1320	ldt_segs[LUCODE_SEL].ssd_limit = i386_btop(VM_END_USER_R_ADDRESS) - 1;
1321	ldt_segs[LUDATA_SEL].ssd_limit = i386_btop(VM_END_USER_RW_ADDRESS) - 1;
1322	/* Note. eventually want private ldts per process */
1323	for (x = 0; x < NLDT; x++)
1324		ssdtosd(&ldt_segs[x], &ldt[x].sd);
1325
1326	/* exceptions */
1327	for (x = 0; x < NIDT; x++)
1328		setidt(x, &IDTVEC(rsvd), SDT_SYS386TGT, SEL_KPL);
1329	setidt(0, &IDTVEC(div),  SDT_SYS386TGT, SEL_KPL);
1330	setidt(1, &IDTVEC(dbg),  SDT_SYS386TGT, SEL_KPL);
1331	setidt(2, &IDTVEC(nmi),  SDT_SYS386TGT, SEL_KPL);
1332 	setidt(3, &IDTVEC(bpt),  SDT_SYS386TGT, SEL_UPL);
1333	setidt(4, &IDTVEC(ofl),  SDT_SYS386TGT, SEL_UPL);
1334	setidt(5, &IDTVEC(bnd),  SDT_SYS386TGT, SEL_KPL);
1335	setidt(6, &IDTVEC(ill),  SDT_SYS386TGT, SEL_KPL);
1336	setidt(7, &IDTVEC(dna),  SDT_SYS386TGT, SEL_KPL);
1337	setidt(8, &IDTVEC(dble),  SDT_SYS386TGT, SEL_KPL);
1338	setidt(9, &IDTVEC(fpusegm),  SDT_SYS386TGT, SEL_KPL);
1339	setidt(10, &IDTVEC(tss),  SDT_SYS386TGT, SEL_KPL);
1340	setidt(11, &IDTVEC(missing),  SDT_SYS386TGT, SEL_KPL);
1341	setidt(12, &IDTVEC(stk),  SDT_SYS386TGT, SEL_KPL);
1342	setidt(13, &IDTVEC(prot),  SDT_SYS386TGT, SEL_KPL);
1343	setidt(14, &IDTVEC(page),  SDT_SYS386TGT, SEL_KPL);
1344	setidt(15, &IDTVEC(rsvd),  SDT_SYS386TGT, SEL_KPL);
1345	setidt(16, &IDTVEC(fpu),  SDT_SYS386TGT, SEL_KPL);
1346	setidt(17, &IDTVEC(align), SDT_SYS386TGT, SEL_KPL);
1347#if defined(COMPAT_LINUX) || defined(LINUX)
1348 	setidt(0x80, &IDTVEC(linux_syscall),  SDT_SYS386TGT, SEL_UPL);
1349#endif
1350
1351#include	"isa.h"
1352#if	NISA >0
1353	isa_defaultirq();
1354#endif
1355	rand_initialize();
1356
1357	r_gdt.rd_limit = sizeof(gdt) - 1;
1358	r_gdt.rd_base =  (int) gdt;
1359	lgdt(&r_gdt);
1360
1361	r_idt.rd_limit = sizeof(idt) - 1;
1362	r_idt.rd_base = (int) idt;
1363	lidt(&r_idt);
1364
1365	_default_ldt = GSEL(GLDT_SEL, SEL_KPL);
1366	lldt(_default_ldt);
1367	currentldt = _default_ldt;
1368
1369#ifdef DDB
1370	kdb_init();
1371	if (boothowto & RB_KDB)
1372		Debugger("Boot flags requested debugger");
1373#endif
1374
1375	/* Use BIOS values stored in RTC CMOS RAM, since probing
1376	 * breaks certain 386 AT relics.
1377	 */
1378	biosbasemem = rtcin(RTC_BASELO)+ (rtcin(RTC_BASEHI)<<8);
1379	biosextmem = rtcin(RTC_EXTLO)+ (rtcin(RTC_EXTHI)<<8);
1380
1381	/*
1382	 * Print a warning if the official BIOS interface disagrees
1383	 * with the hackish interface used above.  Eventually only
1384	 * the official interface should be used.
1385	 */
1386	if (bootinfo.bi_memsizes_valid) {
1387		if (bootinfo.bi_basemem != biosbasemem)
1388			printf("BIOS basemem (%ldK) != RTC basemem (%dK)\n",
1389			       bootinfo.bi_basemem, biosbasemem);
1390		if (bootinfo.bi_extmem != biosextmem)
1391			printf("BIOS extmem (%ldK) != RTC extmem (%dK)\n",
1392			       bootinfo.bi_extmem, biosextmem);
1393	}
1394
1395	/*
1396	 * If BIOS tells us that it has more than 640k in the basemem,
1397	 *	don't believe it - set it to 640k.
1398	 */
1399	if (biosbasemem > 640)
1400		biosbasemem = 640;
1401
1402	/*
1403	 * Some 386 machines might give us a bogus number for extended
1404	 *	mem. If this happens, stop now.
1405	 */
1406#ifndef LARGEMEM
1407	if (biosextmem > 65536) {
1408		panic("extended memory beyond limit of 64MB");
1409		/* NOTREACHED */
1410	}
1411#endif
1412
1413	pagesinbase = biosbasemem * 1024 / NBPG;
1414	pagesinext = biosextmem * 1024 / NBPG;
1415
1416	/*
1417	 * Special hack for chipsets that still remap the 384k hole when
1418	 *	there's 16MB of memory - this really confuses people that
1419	 *	are trying to use bus mastering ISA controllers with the
1420	 *	"16MB limit"; they only have 16MB, but the remapping puts
1421	 *	them beyond the limit.
1422	 */
1423	/*
1424	 * If extended memory is between 15-16MB (16-17MB phys address range),
1425	 *	chop it to 15MB.
1426	 */
1427	if ((pagesinext > 3840) && (pagesinext < 4096))
1428		pagesinext = 3840;
1429
1430	/*
1431	 * Maxmem isn't the "maximum memory", it's one larger than the
1432	 * highest page of of the physical address space. It
1433	 */
1434	Maxmem = pagesinext + 0x100000/PAGE_SIZE;
1435
1436#ifdef MAXMEM
1437	Maxmem = MAXMEM/4;
1438#endif
1439
1440	/* call pmap initialization to make new kernel address space */
1441	pmap_bootstrap (first, 0);
1442
1443	/*
1444	 * Size up each available chunk of physical memory.
1445	 */
1446
1447	/*
1448	 * We currently don't bother testing base memory.
1449	 * XXX  ...but we probably should.
1450	 */
1451	pa_indx = 0;
1452	badpages = 0;
1453	if (pagesinbase > 1) {
1454		phys_avail[pa_indx++] = PAGE_SIZE;	/* skip first page of memory */
1455		phys_avail[pa_indx] = ptoa(pagesinbase);/* memory up to the ISA hole */
1456		physmem = pagesinbase - 1;
1457	} else {
1458		/* point at first chunk end */
1459		pa_indx++;
1460	}
1461
1462	for (target_page = avail_start; target_page < ptoa(Maxmem); target_page += PAGE_SIZE) {
1463		int tmp, page_bad = FALSE;
1464
1465		/*
1466		 * map page into kernel: valid, read/write, non-cacheable
1467		 */
1468		*(int *)CMAP1 = PG_V | PG_KW | PG_N | target_page;
1469		pmap_update();
1470
1471		tmp = *(int *)CADDR1;
1472		/*
1473		 * Test for alternating 1's and 0's
1474		 */
1475		*(int *)CADDR1 = 0xaaaaaaaa;
1476		if (*(int *)CADDR1 != 0xaaaaaaaa) {
1477			page_bad = TRUE;
1478		}
1479		/*
1480		 * Test for alternating 0's and 1's
1481		 */
1482		*(int *)CADDR1 = 0x55555555;
1483		if (*(int *)CADDR1 != 0x55555555) {
1484			page_bad = TRUE;
1485		}
1486		/*
1487		 * Test for all 1's
1488		 */
1489		*(int *)CADDR1 = 0xffffffff;
1490		if (*(int *)CADDR1 != 0xffffffff) {
1491			page_bad = TRUE;
1492		}
1493		/*
1494		 * Test for all 0's
1495		 */
1496		*(int *)CADDR1 = 0x0;
1497		if (*(int *)CADDR1 != 0x0) {
1498			/*
1499			 * test of page failed
1500			 */
1501			page_bad = TRUE;
1502		}
1503		/*
1504		 * Restore original value.
1505		 */
1506		*(int *)CADDR1 = tmp;
1507
1508		/*
1509		 * Adjust array of valid/good pages.
1510		 */
1511		if (page_bad == FALSE) {
1512			/*
1513			 * If this good page is a continuation of the
1514			 * previous set of good pages, then just increase
1515			 * the end pointer. Otherwise start a new chunk.
1516			 * Note that "end" points one higher than end,
1517			 * making the range >= start and < end.
1518			 */
1519			if (phys_avail[pa_indx] == target_page) {
1520				phys_avail[pa_indx] += PAGE_SIZE;
1521			} else {
1522				pa_indx++;
1523				if (pa_indx == PHYS_AVAIL_ARRAY_END) {
1524					printf("Too many holes in the physical address space, giving up\n");
1525					pa_indx--;
1526					break;
1527				}
1528				phys_avail[pa_indx++] = target_page;	/* start */
1529				phys_avail[pa_indx] = target_page + PAGE_SIZE;	/* end */
1530			}
1531			physmem++;
1532		} else {
1533			badpages++;
1534			page_bad = FALSE;
1535		}
1536	}
1537
1538	*(int *)CMAP1 = 0;
1539	pmap_update();
1540
1541	/*
1542	 * XXX
1543	 * The last chunk must contain at least one page plus the message
1544	 * buffer to avoid complicating other code (message buffer address
1545	 * calculation, etc.).
1546	 */
1547	while (phys_avail[pa_indx - 1] + PAGE_SIZE +
1548	    round_page(sizeof(struct msgbuf)) >= phys_avail[pa_indx]) {
1549		physmem -= atop(phys_avail[pa_indx] - phys_avail[pa_indx - 1]);
1550		phys_avail[pa_indx--] = 0;
1551		phys_avail[pa_indx--] = 0;
1552	}
1553
1554	Maxmem = atop(phys_avail[pa_indx]);
1555
1556	/* Trim off space for the message buffer. */
1557	phys_avail[pa_indx] -= round_page(sizeof(struct msgbuf));
1558
1559	avail_end = phys_avail[pa_indx];
1560
1561	/* now running on new page tables, configured,and u/iom is accessible */
1562
1563	/* make a initial tss so microp can get interrupt stack on syscall! */
1564	proc0.p_addr->u_pcb.pcb_tss.tss_esp0 = (int) kstack + UPAGES*NBPG;
1565	proc0.p_addr->u_pcb.pcb_tss.tss_ss0 = GSEL(GDATA_SEL, SEL_KPL) ;
1566	gsel_tss = GSEL(GPROC0_SEL, SEL_KPL);
1567
1568	((struct i386tss *)gdt_segs[GPROC0_SEL].ssd_base)->tss_ioopt =
1569		(sizeof(tss))<<16;
1570
1571	ltr(gsel_tss);
1572
1573	/* make a call gate to reenter kernel with */
1574	gdp = &ldt[LSYS5CALLS_SEL].gd;
1575
1576	x = (int) &IDTVEC(syscall);
1577	gdp->gd_looffset = x++;
1578	gdp->gd_selector = GSEL(GCODE_SEL,SEL_KPL);
1579	gdp->gd_stkcpy = 1;
1580	gdp->gd_type = SDT_SYS386CGT;
1581	gdp->gd_dpl = SEL_UPL;
1582	gdp->gd_p = 1;
1583	gdp->gd_hioffset = ((int) &IDTVEC(syscall)) >>16;
1584
1585	/* transfer to user mode */
1586
1587	_ucodesel = LSEL(LUCODE_SEL, SEL_UPL);
1588	_udatasel = LSEL(LUDATA_SEL, SEL_UPL);
1589
1590	/* setup proc 0's pcb */
1591	bcopy(&sigcode, proc0.p_addr->u_pcb.pcb_sigc, szsigcode);
1592	proc0.p_addr->u_pcb.pcb_flags = 0;
1593	proc0.p_addr->u_pcb.pcb_ptd = IdlePTD;
1594}
1595
1596/*
1597 * The registers are in the frame; the frame is in the user area of
1598 * the process in question; when the process is active, the registers
1599 * are in "the kernel stack"; when it's not, they're still there, but
1600 * things get flipped around.  So, since p->p_md.md_regs is the whole address
1601 * of the register set, take its offset from the kernel stack, and
1602 * index into the user block.  Don't you just *love* virtual memory?
1603 * (I'm starting to think seymour is right...)
1604 */
1605#define	TF_REGP(p)	((struct trapframe *) \
1606			 ((char *)(p)->p_addr \
1607			  + ((char *)(p)->p_md.md_regs - kstack)))
1608
1609int
1610ptrace_set_pc(p, addr)
1611	struct proc *p;
1612	unsigned int addr;
1613{
1614	TF_REGP(p)->tf_eip = addr;
1615	return (0);
1616}
1617
1618int
1619ptrace_single_step(p)
1620	struct proc *p;
1621{
1622	TF_REGP(p)->tf_eflags |= PSL_T;
1623	return (0);
1624}
1625
1626int ptrace_write_u(p, off, data)
1627	struct proc *p;
1628	vm_offset_t off;
1629	int data;
1630{
1631	struct trapframe frame_copy;
1632	vm_offset_t min;
1633	struct trapframe *tp;
1634
1635	/*
1636	 * Privileged kernel state is scattered all over the user area.
1637	 * Only allow write access to parts of regs and to fpregs.
1638	 */
1639	min = (char *)p->p_md.md_regs - kstack;
1640	if (off >= min && off <= min + sizeof(struct trapframe) - sizeof(int)) {
1641		tp = TF_REGP(p);
1642		frame_copy = *tp;
1643		*(int *)((char *)&frame_copy + (off - min)) = data;
1644		if (!EFLAGS_SECURE(frame_copy.tf_eflags, tp->tf_eflags) ||
1645		    !CS_SECURE(frame_copy.tf_cs))
1646			return (EINVAL);
1647		*(int*)((char *)p->p_addr + off) = data;
1648		return (0);
1649	}
1650	min = offsetof(struct user, u_pcb) + offsetof(struct pcb, pcb_savefpu);
1651	if (off >= min && off <= min + sizeof(struct save87) - sizeof(int)) {
1652		*(int*)((char *)p->p_addr + off) = data;
1653		return (0);
1654	}
1655	return (EFAULT);
1656}
1657
1658int
1659fill_regs(p, regs)
1660	struct proc *p;
1661	struct reg *regs;
1662{
1663	struct trapframe *tp;
1664
1665	tp = TF_REGP(p);
1666	regs->r_es = tp->tf_es;
1667	regs->r_ds = tp->tf_ds;
1668	regs->r_edi = tp->tf_edi;
1669	regs->r_esi = tp->tf_esi;
1670	regs->r_ebp = tp->tf_ebp;
1671	regs->r_ebx = tp->tf_ebx;
1672	regs->r_edx = tp->tf_edx;
1673	regs->r_ecx = tp->tf_ecx;
1674	regs->r_eax = tp->tf_eax;
1675	regs->r_eip = tp->tf_eip;
1676	regs->r_cs = tp->tf_cs;
1677	regs->r_eflags = tp->tf_eflags;
1678	regs->r_esp = tp->tf_esp;
1679	regs->r_ss = tp->tf_ss;
1680	return (0);
1681}
1682
1683int
1684set_regs(p, regs)
1685	struct proc *p;
1686	struct reg *regs;
1687{
1688	struct trapframe *tp;
1689
1690	tp = TF_REGP(p);
1691	if (!EFLAGS_SECURE(regs->r_eflags, tp->tf_eflags) ||
1692	    !CS_SECURE(regs->r_cs))
1693		return (EINVAL);
1694	tp->tf_es = regs->r_es;
1695	tp->tf_ds = regs->r_ds;
1696	tp->tf_edi = regs->r_edi;
1697	tp->tf_esi = regs->r_esi;
1698	tp->tf_ebp = regs->r_ebp;
1699	tp->tf_ebx = regs->r_ebx;
1700	tp->tf_edx = regs->r_edx;
1701	tp->tf_ecx = regs->r_ecx;
1702	tp->tf_eax = regs->r_eax;
1703	tp->tf_eip = regs->r_eip;
1704	tp->tf_cs = regs->r_cs;
1705	tp->tf_eflags = regs->r_eflags;
1706	tp->tf_esp = regs->r_esp;
1707	tp->tf_ss = regs->r_ss;
1708	return (0);
1709}
1710
1711#ifndef DDB
1712void
1713Debugger(const char *msg)
1714{
1715	printf("Debugger(\"%s\") called.\n", msg);
1716}
1717#endif /* no DDB */
1718
1719#include <sys/disklabel.h>
1720#define b_cylin	b_resid
1721/*
1722 * Determine the size of the transfer, and make sure it is
1723 * within the boundaries of the partition. Adjust transfer
1724 * if needed, and signal errors or early completion.
1725 */
1726int
1727bounds_check_with_label(struct buf *bp, struct disklabel *lp, int wlabel)
1728{
1729        struct partition *p = lp->d_partitions + dkpart(bp->b_dev);
1730        int labelsect = lp->d_partitions[0].p_offset;
1731        int maxsz = p->p_size,
1732                sz = (bp->b_bcount + DEV_BSIZE - 1) >> DEV_BSHIFT;
1733
1734        /* overwriting disk label ? */
1735        /* XXX should also protect bootstrap in first 8K */
1736        if (bp->b_blkno + p->p_offset <= LABELSECTOR + labelsect &&
1737#if LABELSECTOR != 0
1738            bp->b_blkno + p->p_offset + sz > LABELSECTOR + labelsect &&
1739#endif
1740            (bp->b_flags & B_READ) == 0 && wlabel == 0) {
1741                bp->b_error = EROFS;
1742                goto bad;
1743        }
1744
1745#if     defined(DOSBBSECTOR) && defined(notyet)
1746        /* overwriting master boot record? */
1747        if (bp->b_blkno + p->p_offset <= DOSBBSECTOR &&
1748            (bp->b_flags & B_READ) == 0 && wlabel == 0) {
1749                bp->b_error = EROFS;
1750                goto bad;
1751        }
1752#endif
1753
1754        /* beyond partition? */
1755        if (bp->b_blkno < 0 || bp->b_blkno + sz > maxsz) {
1756                /* if exactly at end of disk, return an EOF */
1757                if (bp->b_blkno == maxsz) {
1758                        bp->b_resid = bp->b_bcount;
1759                        return(0);
1760                }
1761                /* or truncate if part of it fits */
1762                sz = maxsz - bp->b_blkno;
1763                if (sz <= 0) {
1764                        bp->b_error = EINVAL;
1765                        goto bad;
1766                }
1767                bp->b_bcount = sz << DEV_BSHIFT;
1768        }
1769
1770        /* calculate cylinder for disksort to order transfers with */
1771        bp->b_pblkno = bp->b_blkno + p->p_offset;
1772        bp->b_cylin = bp->b_pblkno / lp->d_secpercyl;
1773        return(1);
1774
1775bad:
1776        bp->b_flags |= B_ERROR;
1777        return(-1);
1778}
1779
1780int
1781disk_externalize(int drive, struct sysctl_req *req)
1782{
1783	return SYSCTL_OUT(req, &drive, sizeof drive);
1784}
1785