machdep.c revision 12929
1/*-
2 * Copyright (c) 1992 Terrence R. Lambert.
3 * Copyright (c) 1982, 1987, 1990 The Regents of the University of California.
4 * All rights reserved.
5 *
6 * This code is derived from software contributed to Berkeley by
7 * William Jolitz.
8 *
9 * Redistribution and use in source and binary forms, with or without
10 * modification, are permitted provided that the following conditions
11 * are met:
12 * 1. Redistributions of source code must retain the above copyright
13 *    notice, this list of conditions and the following disclaimer.
14 * 2. Redistributions in binary form must reproduce the above copyright
15 *    notice, this list of conditions and the following disclaimer in the
16 *    documentation and/or other materials provided with the distribution.
17 * 3. All advertising materials mentioning features or use of this software
18 *    must display the following acknowledgement:
19 *	This product includes software developed by the University of
20 *	California, Berkeley and its contributors.
21 * 4. Neither the name of the University nor the names of its contributors
22 *    may be used to endorse or promote products derived from this software
23 *    without specific prior written permission.
24 *
25 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
26 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
27 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
28 * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
29 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
30 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
31 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
32 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
33 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
34 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
35 * SUCH DAMAGE.
36 *
37 *	from: @(#)machdep.c	7.4 (Berkeley) 6/3/91
38 *	$Id: machdep.c,v 1.160 1995/12/16 18:52:08 peter Exp $
39 */
40
41#include "npx.h"
42#include "isa.h"
43
44#include <sys/param.h>
45#include <sys/systm.h>
46#include <sys/sysproto.h>
47#include <sys/signalvar.h>
48#include <sys/kernel.h>
49#include <sys/proc.h>
50#include <sys/buf.h>
51#include <sys/reboot.h>
52#include <sys/conf.h>
53#include <sys/file.h>
54#include <sys/callout.h>
55#include <sys/malloc.h>
56#include <sys/mbuf.h>
57#include <sys/mount.h>
58#include <sys/msgbuf.h>
59#include <sys/ioctl.h>
60#include <sys/sysent.h>
61#include <sys/tty.h>
62#include <sys/sysctl.h>
63#include <sys/devconf.h>
64#include <sys/vmmeter.h>
65
66#ifdef SYSVSHM
67#include <sys/shm.h>
68#endif
69
70#ifdef SYSVMSG
71#include <sys/msg.h>
72#endif
73
74#ifdef SYSVSEM
75#include <sys/sem.h>
76#endif
77
78#include <vm/vm.h>
79#include <vm/vm_param.h>
80#include <vm/vm_prot.h>
81#include <vm/lock.h>
82#include <vm/vm_kern.h>
83#include <vm/vm_object.h>
84#include <vm/vm_page.h>
85#include <vm/vm_map.h>
86#include <vm/vm_pager.h>
87#include <vm/vm_extern.h>
88
89#include <sys/user.h>
90#include <sys/exec.h>
91#include <sys/vnode.h>
92
93#include <ddb/ddb.h>
94
95#include <net/netisr.h>
96
97#include <machine/cpu.h>
98#include <machine/npx.h>
99#include <machine/reg.h>
100#include <machine/psl.h>
101#include <machine/clock.h>
102#include <machine/specialreg.h>
103#include <machine/sysarch.h>
104#include <machine/cons.h>
105#include <machine/devconf.h>
106#include <machine/bootinfo.h>
107#include <machine/md_var.h>
108
109#include <i386/isa/isa.h>
110#include <i386/isa/isa_device.h>
111#include <i386/isa/rtc.h>
112#include <machine/random.h>
113
114extern void init386 __P((int first));
115extern int ptrace_set_pc __P((struct proc *p, unsigned int addr));
116extern int ptrace_single_step __P((struct proc *p));
117extern int ptrace_write_u __P((struct proc *p, vm_offset_t off, int data));
118extern void dblfault_handler __P((void));
119
120static void cpu_startup __P((void *));
121SYSINIT(cpu, SI_SUB_CPU, SI_ORDER_FIRST, cpu_startup, NULL)
122
123static void identifycpu(void);
124
125char machine[] = "i386";
126SYSCTL_STRING(_hw, HW_MACHINE, machine, CTLFLAG_RD, machine, 0, "");
127
128static char cpu_model[128];
129SYSCTL_STRING(_hw, HW_MODEL, model, CTLFLAG_RD, cpu_model, 0, "");
130
131struct kern_devconf kdc_cpu0 = {
132	0, 0, 0,		/* filled in by dev_attach */
133	"cpu", 0, { MDDT_CPU },
134	0, 0, 0, CPU_EXTERNALLEN,
135	0,			/* CPU has no parent */
136	0,			/* no parentdata */
137	DC_BUSY,		/* the CPU is always busy */
138	cpu_model,		/* no sense in duplication */
139	DC_CLS_CPU		/* class */
140};
141
142#ifndef PANIC_REBOOT_WAIT_TIME
143#define PANIC_REBOOT_WAIT_TIME 15 /* default to 15 seconds */
144#endif
145
146#ifdef BOUNCE_BUFFERS
147extern char *bouncememory;
148extern int maxbkva;
149#ifdef BOUNCEPAGES
150int	bouncepages = BOUNCEPAGES;
151#else
152int	bouncepages = 0;
153#endif
154#endif	/* BOUNCE_BUFFERS */
155
156extern int freebufspace;
157int	msgbufmapped = 0;		/* set when safe to use msgbuf */
158int _udatasel, _ucodesel;
159
160
161int physmem = 0;
162
163static int
164sysctl_hw_physmem SYSCTL_HANDLER_ARGS
165{
166	int error = sysctl_handle_int(oidp, 0, ctob(physmem), req);
167	return (error);
168}
169
170SYSCTL_PROC(_hw, HW_PHYSMEM, physmem, CTLTYPE_INT|CTLFLAG_RD,
171	0, 0, sysctl_hw_physmem, "I", "");
172
173static int
174sysctl_hw_usermem SYSCTL_HANDLER_ARGS
175{
176	int error = sysctl_handle_int(oidp, 0,
177		ctob(physmem - cnt.v_wire_count), req);
178	return (error);
179}
180
181SYSCTL_PROC(_hw, HW_USERMEM, usermem, CTLTYPE_INT|CTLFLAG_RD,
182	0, 0, sysctl_hw_usermem, "I", "");
183
184int boothowto = 0, bootverbose = 0, Maxmem = 0;
185static int	badpages = 0;
186long dumplo;
187extern int bootdev;
188
189vm_offset_t phys_avail[10];
190
191/* must be 2 less so 0 0 can signal end of chunks */
192#define PHYS_AVAIL_ARRAY_END ((sizeof(phys_avail) / sizeof(vm_offset_t)) - 2)
193
194int cpu_class;
195
196static void dumpsys __P((void));
197static void setup_netisrs __P((struct linker_set *)); /* XXX declare elsewhere */
198
199static vm_offset_t buffer_sva, buffer_eva;
200vm_offset_t clean_sva, clean_eva;
201static vm_offset_t pager_sva, pager_eva;
202extern struct linker_set netisr_set;
203
204#define offsetof(type, member)	((size_t)(&((type *)0)->member))
205
206static void
207cpu_startup(dummy)
208	void *dummy;
209{
210	register unsigned i;
211	register caddr_t v;
212	vm_offset_t maxaddr;
213	vm_size_t size = 0;
214	int firstaddr;
215	vm_offset_t minaddr;
216
217	if (boothowto & RB_VERBOSE)
218		bootverbose++;
219
220	/*
221	 * Initialize error message buffer (at end of core).
222	 */
223
224	/* avail_end was pre-decremented in init_386() to compensate */
225	for (i = 0; i < btoc(sizeof (struct msgbuf)); i++)
226		pmap_enter(pmap_kernel(), (vm_offset_t)msgbufp,
227			   avail_end + i * NBPG,
228			   VM_PROT_ALL, TRUE);
229	msgbufmapped = 1;
230
231	/*
232	 * Good {morning,afternoon,evening,night}.
233	 */
234	printf(version);
235	startrtclock();
236	identifycpu();
237	printf("real memory  = %d (%dK bytes)\n", ptoa(Maxmem), ptoa(Maxmem) / 1024);
238	/*
239	 * Display any holes after the first chunk of extended memory.
240	 */
241	if (badpages != 0) {
242		int indx = 1;
243
244		/*
245		 * XXX skip reporting ISA hole & unmanaged kernel memory
246		 */
247		if (phys_avail[0] == PAGE_SIZE)
248			indx += 2;
249
250		printf("Physical memory hole(s):\n");
251		for (; phys_avail[indx + 1] != 0; indx += 2) {
252			int size = phys_avail[indx + 1] - phys_avail[indx];
253
254			printf("0x%08lx - 0x%08lx, %d bytes (%d pages)\n", phys_avail[indx],
255			    phys_avail[indx + 1] - 1, size, size / PAGE_SIZE);
256		}
257	}
258
259	/*
260	 * Quickly wire in netisrs.
261	 */
262	setup_netisrs(&netisr_set);
263
264/*
265#ifdef ISDN
266	DONET(isdnintr, NETISR_ISDN);
267#endif
268*/
269
270	/*
271	 * Allocate space for system data structures.
272	 * The first available kernel virtual address is in "v".
273	 * As pages of kernel virtual memory are allocated, "v" is incremented.
274	 * As pages of memory are allocated and cleared,
275	 * "firstaddr" is incremented.
276	 * An index into the kernel page table corresponding to the
277	 * virtual memory address maintained in "v" is kept in "mapaddr".
278	 */
279
280	/*
281	 * Make two passes.  The first pass calculates how much memory is
282	 * needed and allocates it.  The second pass assigns virtual
283	 * addresses to the various data structures.
284	 */
285	firstaddr = 0;
286again:
287	v = (caddr_t)firstaddr;
288
289#define	valloc(name, type, num) \
290	    (name) = (type *)v; v = (caddr_t)((name)+(num))
291#define	valloclim(name, type, num, lim) \
292	    (name) = (type *)v; v = (caddr_t)((lim) = ((name)+(num)))
293	valloc(callout, struct callout, ncallout);
294#ifdef SYSVSHM
295	valloc(shmsegs, struct shmid_ds, shminfo.shmmni);
296#endif
297#ifdef SYSVSEM
298	valloc(sema, struct semid_ds, seminfo.semmni);
299	valloc(sem, struct sem, seminfo.semmns);
300	/* This is pretty disgusting! */
301	valloc(semu, int, (seminfo.semmnu * seminfo.semusz) / sizeof(int));
302#endif
303#ifdef SYSVMSG
304	valloc(msgpool, char, msginfo.msgmax);
305	valloc(msgmaps, struct msgmap, msginfo.msgseg);
306	valloc(msghdrs, struct msg, msginfo.msgtql);
307	valloc(msqids, struct msqid_ds, msginfo.msgmni);
308#endif
309
310	if (nbuf == 0) {
311		nbuf = 30;
312		if( physmem > 1024)
313			nbuf += min((physmem - 1024) / 12, 1024);
314	}
315	nswbuf = min(nbuf, 128);
316
317	valloc(swbuf, struct buf, nswbuf);
318	valloc(buf, struct buf, nbuf);
319
320#ifdef BOUNCE_BUFFERS
321	/*
322	 * If there is more than 16MB of memory, allocate some bounce buffers
323	 */
324	if (Maxmem > 4096) {
325		if (bouncepages == 0) {
326			bouncepages = 64;
327			bouncepages += ((Maxmem - 4096) / 2048) * 32;
328		}
329		v = (caddr_t)((vm_offset_t)((vm_offset_t)v + PAGE_SIZE - 1) & ~(PAGE_SIZE - 1));
330		valloc(bouncememory, char, bouncepages * PAGE_SIZE);
331	}
332#endif
333
334	/*
335	 * End of first pass, size has been calculated so allocate memory
336	 */
337	if (firstaddr == 0) {
338		size = (vm_size_t)(v - firstaddr);
339		firstaddr = (int)kmem_alloc(kernel_map, round_page(size));
340		if (firstaddr == 0)
341			panic("startup: no room for tables");
342		goto again;
343	}
344
345	/*
346	 * End of second pass, addresses have been assigned
347	 */
348	if ((vm_size_t)(v - firstaddr) != size)
349		panic("startup: table size inconsistency");
350
351#ifdef BOUNCE_BUFFERS
352	clean_map = kmem_suballoc(kernel_map, &clean_sva, &clean_eva,
353			(nbuf*MAXBSIZE) + (nswbuf*MAXPHYS) +
354				maxbkva + pager_map_size, TRUE);
355	io_map = kmem_suballoc(clean_map, &minaddr, &maxaddr, maxbkva, FALSE);
356#else
357	clean_map = kmem_suballoc(kernel_map, &clean_sva, &clean_eva,
358			(nbuf*MAXBSIZE) + (nswbuf*MAXPHYS) + pager_map_size, TRUE);
359#endif
360	buffer_map = kmem_suballoc(clean_map, &buffer_sva, &buffer_eva,
361				(nbuf*MAXBSIZE), TRUE);
362	pager_map = kmem_suballoc(clean_map, &pager_sva, &pager_eva,
363				(nswbuf*MAXPHYS) + pager_map_size, TRUE);
364	exec_map = kmem_suballoc(kernel_map, &minaddr, &maxaddr,
365				(16*ARG_MAX), TRUE);
366	u_map = kmem_suballoc(kernel_map, &minaddr, &maxaddr,
367				(maxproc*UPAGES*PAGE_SIZE), FALSE);
368
369	/*
370	 * Finally, allocate mbuf pool.  Since mclrefcnt is an off-size
371	 * we use the more space efficient malloc in place of kmem_alloc.
372	 */
373	mclrefcnt = (char *)malloc(nmbclusters+CLBYTES/MCLBYTES,
374				   M_MBUF, M_NOWAIT);
375	bzero(mclrefcnt, nmbclusters+CLBYTES/MCLBYTES);
376	mb_map = kmem_suballoc(kmem_map, (vm_offset_t *)&mbutl, &maxaddr,
377			       nmbclusters * MCLBYTES, FALSE);
378	/*
379	 * Initialize callouts
380	 */
381	callfree = callout;
382	for (i = 1; i < ncallout; i++)
383		callout[i-1].c_next = &callout[i];
384
385        if (boothowto & RB_CONFIG) {
386		userconfig();
387		cninit();	/* the preferred console may have changed */
388	}
389
390#ifdef BOUNCE_BUFFERS
391	/*
392	 * init bounce buffers
393	 */
394	vm_bounce_init();
395#endif
396	/*
397	 * XXX allocate a contiguous area for ISA (non busmaster) DMA
398	 * operations. This _should_ only be done if the DMA channels
399	 * will actually be used, but for now we do it always.
400	 */
401#define DMAPAGES 8
402	isaphysmem =
403	    vm_page_alloc_contig(DMAPAGES * PAGE_SIZE, 0, 0xfffffful, 64*1024);
404
405	printf("avail memory = %d (%dK bytes)\n", ptoa(cnt.v_free_count),
406	    ptoa(cnt.v_free_count) / 1024);
407
408	/*
409	 * Set up buffers, so they can be used to read disk labels.
410	 */
411	bufinit();
412	vm_pager_bufferinit();
413
414	/*
415	 * In verbose mode, print out the BIOS's idea of the disk geometries.
416	 */
417	if (bootverbose) {
418		printf("BIOS Geometries:\n");
419		for (i = 0; i < N_BIOS_GEOM; i++) {
420			unsigned long bios_geom;
421			int max_cylinder, max_head, max_sector;
422
423			bios_geom = bootinfo.bi_bios_geom[i];
424
425			/*
426			 * XXX the bootstrap punts a 1200K floppy geometry
427			 * when the get-disk-geometry interrupt fails.  Skip
428			 * drives that have this geometry.
429			 */
430			if (bios_geom == 0x4f010f)
431				continue;
432
433			printf(" %x:%08lx ", i, bios_geom);
434			max_cylinder = bios_geom >> 16;
435			max_head = (bios_geom >> 8) & 0xff;
436			max_sector = bios_geom & 0xff;
437			printf(
438		"0..%d=%d cylinders, 0..%d=%d heads, 1..%d=%d sectors\n",
439			       max_cylinder, max_cylinder + 1,
440			       max_head, max_head + 1,
441			       max_sector, max_sector);
442		}
443		printf(" %d accounted for\n", bootinfo.bi_n_bios_used);
444	}
445}
446
447int
448register_netisr(num, handler)
449	int num;
450	netisr_t *handler;
451{
452
453	if (num < 0 || num >= (sizeof(netisrs)/sizeof(*netisrs)) ) {
454		printf("register_netisr: bad isr number: %d\n", num);
455		return (EINVAL);
456	}
457	netisrs[num] = handler;
458	return (0);
459}
460
461static void
462setup_netisrs(ls)
463	struct linker_set *ls;
464{
465	int i;
466	const struct netisrtab *nit;
467
468	for(i = 0; ls->ls_items[i]; i++) {
469		nit = (const struct netisrtab *)ls->ls_items[i];
470		register_netisr(nit->nit_num, nit->nit_isr);
471	}
472}
473
474static struct cpu_nameclass i386_cpus[] = {
475	{ "Intel 80286",	CPUCLASS_286 },		/* CPU_286   */
476	{ "i386SX",		CPUCLASS_386 },		/* CPU_386SX */
477	{ "i386DX",		CPUCLASS_386 },		/* CPU_386   */
478	{ "i486SX",		CPUCLASS_486 },		/* CPU_486SX */
479	{ "i486DX",		CPUCLASS_486 },		/* CPU_486   */
480	{ "Pentium",		CPUCLASS_586 },		/* CPU_586   */
481	{ "Cy486DLC",		CPUCLASS_486 },		/* CPU_486DLC */
482};
483
484static void
485identifycpu()
486{
487	printf("CPU: ");
488	if (cpu >= 0
489	    && cpu < (sizeof i386_cpus/sizeof(struct cpu_nameclass))) {
490		cpu_class = i386_cpus[cpu].cpu_class;
491		strncpy(cpu_model, i386_cpus[cpu].cpu_name, sizeof cpu_model);
492	} else {
493		printf("unknown cpu type %d\n", cpu);
494		panic("startup: bad cpu id");
495	}
496
497#if defined(I586_CPU)
498	if(cpu_class == CPUCLASS_586) {
499		calibrate_cyclecounter();
500	}
501#endif
502#if defined(I486_CPU) || defined(I586_CPU)
503	if (!strcmp(cpu_vendor,"GenuineIntel")) {
504		if ((cpu_id & 0xf00) > 3) {
505			cpu_model[0] = '\0';
506
507			switch (cpu_id & 0x3000) {
508			case 0x1000:
509				strcpy(cpu_model, "Overdrive ");
510				break;
511			case 0x2000:
512				strcpy(cpu_model, "Dual ");
513				break;
514			}
515			if ((cpu_id & 0xf00) == 0x400) {
516				strcat(cpu_model, "i486 ");
517#if defined(I586_CPU)
518			} else if ((cpu_id & 0xf00) == 0x500) {
519				strcat(cpu_model, "Pentium"); /* nb no space */
520#endif
521			} else {
522				strcat(cpu_model, "unknown ");
523			}
524
525			switch (cpu_id & 0xff0) {
526			case 0x400:
527				strcat(cpu_model, "DX"); break;
528			case 0x410:
529				strcat(cpu_model, "DX"); break;
530			case 0x420:
531				strcat(cpu_model, "SX"); break;
532			case 0x430:
533				strcat(cpu_model, "DX2"); break;
534			case 0x440:
535				strcat(cpu_model, "SL"); break;
536			case 0x450:
537				strcat(cpu_model, "SX2"); break;
538			case 0x470:
539				strcat(cpu_model, "DX2 Write-Back Enhanced");
540				break;
541			case 0x480:
542				strcat(cpu_model, "DX4"); break;
543#if defined(I586_CPU)
544			case 0x510:
545			case 0x520:
546				/*
547				 * We used to do all sorts of nonsense here
548				 * to print out iCOMP numbers.  Since these
549				 * are meaningless except to Intel
550				 * marketroids, there seems to be little
551				 * sense in doing so.
552				 */
553				break;
554#endif
555			}
556		}
557	}
558#endif
559	printf("%s (", cpu_model);
560	switch(cpu_class) {
561	case CPUCLASS_286:
562		printf("286");
563		break;
564#if defined(I386_CPU)
565	case CPUCLASS_386:
566		printf("386");
567		break;
568#endif
569#if defined(I486_CPU)
570	case CPUCLASS_486:
571		printf("486");
572		break;
573#endif
574#if defined(I586_CPU)
575	case CPUCLASS_586:
576		printf("%d.%02d-MHz ",
577		       ((100 * i586_ctr_rate) >> I586_CTR_RATE_SHIFT) / 100,
578		       ((100 * i586_ctr_rate) >> I586_CTR_RATE_SHIFT) % 100);
579		printf("586");
580		break;
581#endif
582	default:
583		printf("unknown");	/* will panic below... */
584	}
585	printf("-class CPU)\n");
586#if defined(I486_CPU) || defined(I586_CPU)
587	if(*cpu_vendor)
588		printf("  Origin = \"%s\"",cpu_vendor);
589	if(cpu_id)
590		printf("  Id = 0x%lx",cpu_id);
591
592	if (!strcmp(cpu_vendor, "GenuineIntel")) {
593		printf("  Stepping=%ld", cpu_id & 0xf);
594		if (cpu_high > 0) {
595#define FEATUREFMT "\020\001FPU\002VME\003PSE\004MCE\005CX8\006APIC"
596			printf("\n  Features=0x%b", cpu_feature, FEATUREFMT);
597		}
598	}
599	/* Avoid ugly blank lines: only print newline when we have to. */
600	if (*cpu_vendor || cpu_id)
601		printf("\n");
602#endif
603	/*
604	 * Now that we have told the user what they have,
605	 * let them know if that machine type isn't configured.
606	 */
607	switch (cpu_class) {
608	case CPUCLASS_286:	/* a 286 should not make it this far, anyway */
609#if !defined(I386_CPU) && !defined(I486_CPU) && !defined(I586_CPU)
610#error This kernel is not configured for one of the supported CPUs
611#endif
612#if !defined(I386_CPU)
613	case CPUCLASS_386:
614#endif
615#if !defined(I486_CPU)
616	case CPUCLASS_486:
617#endif
618#if !defined(I586_CPU)
619	case CPUCLASS_586:
620#endif
621		panic("CPU class not configured");
622	default:
623		break;
624	}
625	dev_attach(&kdc_cpu0);
626}
627
628/*
629 * Send an interrupt to process.
630 *
631 * Stack is set up to allow sigcode stored
632 * in u. to call routine, followed by kcall
633 * to sigreturn routine below.  After sigreturn
634 * resets the signal mask, the stack, and the
635 * frame pointer, it returns to the user
636 * specified pc, psl.
637 */
638void
639sendsig(catcher, sig, mask, code)
640	sig_t catcher;
641	int sig, mask;
642	unsigned code;
643{
644	register struct proc *p = curproc;
645	register int *regs;
646	register struct sigframe *fp;
647	struct sigframe sf;
648	struct sigacts *psp = p->p_sigacts;
649	int oonstack;
650
651	regs = p->p_md.md_regs;
652        oonstack = psp->ps_sigstk.ss_flags & SA_ONSTACK;
653	/*
654	 * Allocate and validate space for the signal handler
655	 * context. Note that if the stack is in P0 space, the
656	 * call to grow() is a nop, and the useracc() check
657	 * will fail if the process has not already allocated
658	 * the space with a `brk'.
659	 */
660        if ((psp->ps_flags & SAS_ALTSTACK) &&
661	    (psp->ps_sigstk.ss_flags & SA_ONSTACK) == 0 &&
662	    (psp->ps_sigonstack & sigmask(sig))) {
663		fp = (struct sigframe *)(psp->ps_sigstk.ss_sp +
664		    psp->ps_sigstk.ss_size - sizeof(struct sigframe));
665		psp->ps_sigstk.ss_flags |= SA_ONSTACK;
666	} else {
667		fp = (struct sigframe *)(regs[tESP]
668			- sizeof(struct sigframe));
669	}
670
671	/*
672	 * grow() will return FALSE if the fp will not fit inside the stack
673	 *	and the stack can not be grown. useracc will return FALSE
674	 *	if access is denied.
675	 */
676	if ((grow(p, (int)fp) == FALSE) ||
677	    (useracc((caddr_t)fp, sizeof (struct sigframe), B_WRITE) == FALSE)) {
678		/*
679		 * Process has trashed its stack; give it an illegal
680		 * instruction to halt it in its tracks.
681		 */
682		SIGACTION(p, SIGILL) = SIG_DFL;
683		sig = sigmask(SIGILL);
684		p->p_sigignore &= ~sig;
685		p->p_sigcatch &= ~sig;
686		p->p_sigmask &= ~sig;
687		psignal(p, SIGILL);
688		return;
689	}
690
691	/*
692	 * Build the argument list for the signal handler.
693	 */
694	if (p->p_sysent->sv_sigtbl) {
695		if (sig < p->p_sysent->sv_sigsize)
696			sig = p->p_sysent->sv_sigtbl[sig];
697		else
698			sig = p->p_sysent->sv_sigsize + 1;
699	}
700	sf.sf_signum = sig;
701	sf.sf_code = code;
702	sf.sf_scp = &fp->sf_sc;
703	sf.sf_addr = (char *) regs[tERR];
704	sf.sf_handler = catcher;
705
706	/* save scratch registers */
707	sf.sf_sc.sc_eax = regs[tEAX];
708	sf.sf_sc.sc_ebx = regs[tEBX];
709	sf.sf_sc.sc_ecx = regs[tECX];
710	sf.sf_sc.sc_edx = regs[tEDX];
711	sf.sf_sc.sc_esi = regs[tESI];
712	sf.sf_sc.sc_edi = regs[tEDI];
713	sf.sf_sc.sc_cs = regs[tCS];
714	sf.sf_sc.sc_ds = regs[tDS];
715	sf.sf_sc.sc_ss = regs[tSS];
716	sf.sf_sc.sc_es = regs[tES];
717	sf.sf_sc.sc_isp = regs[tISP];
718
719	/*
720	 * Build the signal context to be used by sigreturn.
721	 */
722	sf.sf_sc.sc_onstack = oonstack;
723	sf.sf_sc.sc_mask = mask;
724	sf.sf_sc.sc_sp = regs[tESP];
725	sf.sf_sc.sc_fp = regs[tEBP];
726	sf.sf_sc.sc_pc = regs[tEIP];
727	sf.sf_sc.sc_ps = regs[tEFLAGS];
728
729	/*
730	 * Copy the sigframe out to the user's stack.
731	 */
732	if (copyout(&sf, fp, sizeof(struct sigframe)) != 0) {
733		/*
734		 * Something is wrong with the stack pointer.
735		 * ...Kill the process.
736		 */
737		sigexit(p, SIGILL);
738	};
739
740	regs[tESP] = (int)fp;
741	regs[tEIP] = (int)((struct pcb *)kstack)->pcb_sigc;
742	regs[tEFLAGS] &= ~PSL_VM;
743	regs[tCS] = _ucodesel;
744	regs[tDS] = _udatasel;
745	regs[tES] = _udatasel;
746	regs[tSS] = _udatasel;
747}
748
749/*
750 * System call to cleanup state after a signal
751 * has been taken.  Reset signal mask and
752 * stack state from context left by sendsig (above).
753 * Return to previous pc and psl as specified by
754 * context left by sendsig. Check carefully to
755 * make sure that the user has not modified the
756 * state to gain improper privileges.
757 */
758int
759sigreturn(p, uap, retval)
760	struct proc *p;
761	struct sigreturn_args /* {
762		struct sigcontext *sigcntxp;
763	} */ *uap;
764	int *retval;
765{
766	register struct sigcontext *scp;
767	register struct sigframe *fp;
768	register int *regs = p->p_md.md_regs;
769	int eflags;
770
771	/*
772	 * (XXX old comment) regs[tESP] points to the return address.
773	 * The user scp pointer is above that.
774	 * The return address is faked in the signal trampoline code
775	 * for consistency.
776	 */
777	scp = uap->sigcntxp;
778	fp = (struct sigframe *)
779	     ((caddr_t)scp - offsetof(struct sigframe, sf_sc));
780
781	if (useracc((caddr_t)fp, sizeof (*fp), 0) == 0)
782		return(EINVAL);
783
784	/*
785	 * Don't allow users to change privileged or reserved flags.
786	 */
787#define	EFLAGS_SECURE(ef, oef)	((((ef) ^ (oef)) & ~PSL_USERCHANGE) == 0)
788	eflags = scp->sc_ps;
789	/*
790	 * XXX do allow users to change the privileged flag PSL_RF.  The
791	 * cpu sets PSL_RF in tf_eflags for faults.  Debuggers should
792	 * sometimes set it there too.  tf_eflags is kept in the signal
793	 * context during signal handling and there is no other place
794	 * to remember it, so the PSL_RF bit may be corrupted by the
795	 * signal handler without us knowing.  Corruption of the PSL_RF
796	 * bit at worst causes one more or one less debugger trap, so
797	 * allowing it is fairly harmless.
798	 */
799	if (!EFLAGS_SECURE(eflags & ~PSL_RF, regs[tEFLAGS] & ~PSL_RF)) {
800#ifdef DEBUG
801    		printf("sigreturn: eflags = 0x%x\n", eflags);
802#endif
803    		return(EINVAL);
804	}
805
806	/*
807	 * Don't allow users to load a valid privileged %cs.  Let the
808	 * hardware check for invalid selectors, excess privilege in
809	 * other selectors, invalid %eip's and invalid %esp's.
810	 */
811#define	CS_SECURE(cs)	(ISPL(cs) == SEL_UPL)
812	if (!CS_SECURE(scp->sc_cs)) {
813#ifdef DEBUG
814    		printf("sigreturn: cs = 0x%x\n", scp->sc_cs);
815#endif
816		trapsignal(p, SIGBUS, T_PROTFLT);
817		return(EINVAL);
818	}
819
820	/* restore scratch registers */
821	regs[tEAX] = scp->sc_eax;
822	regs[tEBX] = scp->sc_ebx;
823	regs[tECX] = scp->sc_ecx;
824	regs[tEDX] = scp->sc_edx;
825	regs[tESI] = scp->sc_esi;
826	regs[tEDI] = scp->sc_edi;
827	regs[tCS] = scp->sc_cs;
828	regs[tDS] = scp->sc_ds;
829	regs[tES] = scp->sc_es;
830	regs[tSS] = scp->sc_ss;
831	regs[tISP] = scp->sc_isp;
832
833	if (useracc((caddr_t)scp, sizeof (*scp), 0) == 0)
834		return(EINVAL);
835
836	if (scp->sc_onstack & 01)
837		p->p_sigacts->ps_sigstk.ss_flags |= SA_ONSTACK;
838	else
839		p->p_sigacts->ps_sigstk.ss_flags &= ~SA_ONSTACK;
840	p->p_sigmask = scp->sc_mask &~
841	    (sigmask(SIGKILL)|sigmask(SIGCONT)|sigmask(SIGSTOP));
842	regs[tEBP] = scp->sc_fp;
843	regs[tESP] = scp->sc_sp;
844	regs[tEIP] = scp->sc_pc;
845	regs[tEFLAGS] = eflags;
846	return(EJUSTRETURN);
847}
848
849static int	waittime = -1;
850static struct pcb dumppcb;
851
852__dead void
853boot(howto)
854	int howto;
855{
856	if (!cold && (howto & RB_NOSYNC) == 0 && waittime < 0) {
857		register struct buf *bp;
858		int iter, nbusy;
859
860		waittime = 0;
861		printf("\nsyncing disks... ");
862
863		sync(&proc0, NULL, NULL);
864
865		for (iter = 0; iter < 20; iter++) {
866			nbusy = 0;
867			for (bp = &buf[nbuf]; --bp >= buf; ) {
868				if ((bp->b_flags & (B_BUSY | B_INVAL)) == B_BUSY) {
869					nbusy++;
870				}
871			}
872			if (nbusy == 0)
873				break;
874			printf("%d ", nbusy);
875			DELAY(40000 * iter);
876		}
877		if (nbusy) {
878			/*
879			 * Failed to sync all blocks. Indicate this and don't
880			 * unmount filesystems (thus forcing an fsck on reboot).
881			 */
882			printf("giving up\n");
883#ifdef SHOW_BUSYBUFS
884			nbusy = 0;
885			for (bp = &buf[nbuf]; --bp >= buf; ) {
886				if ((bp->b_flags & (B_BUSY | B_INVAL)) == B_BUSY) {
887					nbusy++;
888					printf("%d: dev:%08x, flags:%08x, blkno:%d, lblkno:%d\n", nbusy, bp->b_dev, bp->b_flags, bp->b_blkno, bp->b_lblkno);
889				}
890			}
891			DELAY(5000000);	/* 5 seconds */
892#endif
893		} else {
894			printf("done\n");
895			/*
896			 * Unmount filesystems
897			 */
898			if (panicstr == 0)
899				vfs_unmountall();
900		}
901		DELAY(100000);			/* wait for console output to finish */
902		dev_shutdownall(FALSE);
903	}
904	splhigh();
905	if (howto & RB_HALT) {
906		printf("\n");
907		printf("The operating system has halted.\n");
908		printf("Please press any key to reboot.\n\n");
909		cngetc();
910	} else {
911		if (howto & RB_DUMP) {
912			if (!cold) {
913				savectx(&dumppcb, 0);
914				dumppcb.pcb_ptd = rcr3();
915				dumpsys();
916			}
917
918			if (PANIC_REBOOT_WAIT_TIME != 0) {
919				if (PANIC_REBOOT_WAIT_TIME != -1) {
920					int loop;
921					printf("Automatic reboot in %d seconds - press a key on the console to abort\n",
922						PANIC_REBOOT_WAIT_TIME);
923					for (loop = PANIC_REBOOT_WAIT_TIME * 10; loop > 0; --loop) {
924						DELAY(1000 * 100); /* 1/10th second */
925						if (cncheckc()) /* Did user type a key? */
926							break;
927					}
928					if (!loop)
929						goto die;
930				}
931			} else { /* zero time specified - reboot NOW */
932				goto die;
933			}
934			printf("--> Press a key on the console to reboot <--\n");
935			cngetc();
936		}
937	}
938die:
939	printf("Rebooting...\n");
940	DELAY(1000000);	/* wait 1 sec for printf's to complete and be read */
941	cpu_reset();
942	for(;;) ;
943	/* NOTREACHED */
944}
945
946/*
947 * Magic number for savecore
948 *
949 * exported (symorder) and used at least by savecore(8)
950 *
951 */
952u_long		dumpmag = 0x8fca0101UL;
953
954static int	dumpsize = 0;		/* also for savecore */
955
956static int	dodump = 1;
957SYSCTL_INT(_machdep, OID_AUTO, do_dump, CTLFLAG_RW, &dodump, 0, "");
958
959/*
960 * Doadump comes here after turning off memory management and
961 * getting on the dump stack, either when called above, or by
962 * the auto-restart code.
963 */
964static void
965dumpsys()
966{
967
968	if (!dodump)
969		return;
970	if (dumpdev == NODEV)
971		return;
972	if ((minor(dumpdev)&07) != 1)
973		return;
974	if (!(bdevsw[major(dumpdev)]))
975		return;
976	if (!(bdevsw[major(dumpdev)]->d_dump))
977		return;
978	dumpsize = Maxmem;
979	printf("\ndumping to dev %lx, offset %ld\n", dumpdev, dumplo);
980	printf("dump ");
981	switch ((*bdevsw[major(dumpdev)]->d_dump)(dumpdev)) {
982
983	case ENXIO:
984		printf("device bad\n");
985		break;
986
987	case EFAULT:
988		printf("device not ready\n");
989		break;
990
991	case EINVAL:
992		printf("area improper\n");
993		break;
994
995	case EIO:
996		printf("i/o error\n");
997		break;
998
999	case EINTR:
1000		printf("aborted from console\n");
1001		break;
1002
1003	default:
1004		printf("succeeded\n");
1005		break;
1006	}
1007}
1008
1009/*
1010 * Clear registers on exec
1011 */
1012void
1013setregs(p, entry, stack)
1014	struct proc *p;
1015	u_long entry;
1016	u_long stack;
1017{
1018	int *regs = p->p_md.md_regs;
1019
1020	bzero(regs, sizeof(struct trapframe));
1021	regs[tEIP] = entry;
1022	regs[tESP] = stack;
1023	regs[tEFLAGS] = PSL_USER | (regs[tEFLAGS] & PSL_T);
1024	regs[tSS] = _udatasel;
1025	regs[tDS] = _udatasel;
1026	regs[tES] = _udatasel;
1027	regs[tCS] = _ucodesel;
1028
1029	p->p_addr->u_pcb.pcb_flags = 0;	/* no fp at all */
1030	load_cr0(rcr0() | CR0_TS);	/* start emulating */
1031#if	NNPX > 0
1032	npxinit(__INITIAL_NPXCW__);
1033#endif	/* NNPX > 0 */
1034}
1035
1036static int
1037sysctl_machdep_adjkerntz SYSCTL_HANDLER_ARGS
1038{
1039	int error;
1040	error = sysctl_handle_int(oidp, oidp->oid_arg1, oidp->oid_arg2,
1041		req);
1042	if (!error && req->newptr)
1043		resettodr();
1044	return (error);
1045}
1046
1047SYSCTL_PROC(_machdep, CPU_ADJKERNTZ, adjkerntz, CTLTYPE_INT|CTLFLAG_RW,
1048	&adjkerntz, 0, sysctl_machdep_adjkerntz, "I", "");
1049
1050SYSCTL_INT(_machdep, CPU_DISRTCSET, disable_rtc_set,
1051	CTLFLAG_RW, &disable_rtc_set, 0, "");
1052
1053SYSCTL_STRUCT(_machdep, CPU_BOOTINFO, bootinfo,
1054	CTLFLAG_RD, &bootinfo, bootinfo, "");
1055
1056/*
1057 * Initialize 386 and configure to run kernel
1058 */
1059
1060/*
1061 * Initialize segments & interrupt table
1062 */
1063
1064int currentldt;
1065int _default_ldt;
1066union descriptor gdt[NGDT];		/* global descriptor table */
1067struct gate_descriptor idt[NIDT];	/* interrupt descriptor table */
1068union descriptor ldt[NLDT];		/* local descriptor table */
1069
1070static struct i386tss dblfault_tss;
1071static char dblfault_stack[512];
1072
1073extern  struct user *proc0paddr;
1074
1075/* software prototypes -- in more palatable form */
1076struct soft_segment_descriptor gdt_segs[] = {
1077/* GNULL_SEL	0 Null Descriptor */
1078{	0x0,			/* segment base address  */
1079	0x0,			/* length */
1080	0,			/* segment type */
1081	0,			/* segment descriptor priority level */
1082	0,			/* segment descriptor present */
1083	0, 0,
1084	0,			/* default 32 vs 16 bit size */
1085	0  			/* limit granularity (byte/page units)*/ },
1086/* GCODE_SEL	1 Code Descriptor for kernel */
1087{	0x0,			/* segment base address  */
1088	0xfffff,		/* length - all address space */
1089	SDT_MEMERA,		/* segment type */
1090	0,			/* segment descriptor priority level */
1091	1,			/* segment descriptor present */
1092	0, 0,
1093	1,			/* default 32 vs 16 bit size */
1094	1  			/* limit granularity (byte/page units)*/ },
1095/* GDATA_SEL	2 Data Descriptor for kernel */
1096{	0x0,			/* segment base address  */
1097	0xfffff,		/* length - all address space */
1098	SDT_MEMRWA,		/* segment type */
1099	0,			/* segment descriptor priority level */
1100	1,			/* segment descriptor present */
1101	0, 0,
1102	1,			/* default 32 vs 16 bit size */
1103	1  			/* limit granularity (byte/page units)*/ },
1104/* GLDT_SEL	3 LDT Descriptor */
1105{	(int) ldt,		/* segment base address  */
1106	sizeof(ldt)-1,		/* length - all address space */
1107	SDT_SYSLDT,		/* segment type */
1108	0,			/* segment descriptor priority level */
1109	1,			/* segment descriptor present */
1110	0, 0,
1111	0,			/* unused - default 32 vs 16 bit size */
1112	0  			/* limit granularity (byte/page units)*/ },
1113/* GTGATE_SEL	4 Null Descriptor - Placeholder */
1114{	0x0,			/* segment base address  */
1115	0x0,			/* length - all address space */
1116	0,			/* segment type */
1117	0,			/* segment descriptor priority level */
1118	0,			/* segment descriptor present */
1119	0, 0,
1120	0,			/* default 32 vs 16 bit size */
1121	0  			/* limit granularity (byte/page units)*/ },
1122/* GPANIC_SEL	5 Panic Tss Descriptor */
1123{	(int) &dblfault_tss,	/* segment base address  */
1124	sizeof(struct i386tss)-1,/* length - all address space */
1125	SDT_SYS386TSS,		/* segment type */
1126	0,			/* segment descriptor priority level */
1127	1,			/* segment descriptor present */
1128	0, 0,
1129	0,			/* unused - default 32 vs 16 bit size */
1130	0  			/* limit granularity (byte/page units)*/ },
1131/* GPROC0_SEL	6 Proc 0 Tss Descriptor */
1132{	(int) kstack,		/* segment base address  */
1133	sizeof(struct i386tss)-1,/* length - all address space */
1134	SDT_SYS386TSS,		/* segment type */
1135	0,			/* segment descriptor priority level */
1136	1,			/* segment descriptor present */
1137	0, 0,
1138	0,			/* unused - default 32 vs 16 bit size */
1139	0  			/* limit granularity (byte/page units)*/ },
1140/* GUSERLDT_SEL	7 User LDT Descriptor per process */
1141{	(int) ldt,		/* segment base address  */
1142	(512 * sizeof(union descriptor)-1),		/* length */
1143	SDT_SYSLDT,		/* segment type */
1144	0,			/* segment descriptor priority level */
1145	1,			/* segment descriptor present */
1146	0, 0,
1147	0,			/* unused - default 32 vs 16 bit size */
1148	0  			/* limit granularity (byte/page units)*/ },
1149/* GAPMCODE32_SEL 8 APM BIOS 32-bit interface (32bit Code) */
1150{	0,			/* segment base address (overwritten by APM)  */
1151	0xfffff,		/* length */
1152	SDT_MEMERA,		/* segment type */
1153	0,			/* segment descriptor priority level */
1154	1,			/* segment descriptor present */
1155	0, 0,
1156	1,			/* default 32 vs 16 bit size */
1157	1  			/* limit granularity (byte/page units)*/ },
1158/* GAPMCODE16_SEL 9 APM BIOS 32-bit interface (16bit Code) */
1159{	0,			/* segment base address (overwritten by APM)  */
1160	0xfffff,		/* length */
1161	SDT_MEMERA,		/* segment type */
1162	0,			/* segment descriptor priority level */
1163	1,			/* segment descriptor present */
1164	0, 0,
1165	0,			/* default 32 vs 16 bit size */
1166	1  			/* limit granularity (byte/page units)*/ },
1167/* GAPMDATA_SEL	10 APM BIOS 32-bit interface (Data) */
1168{	0,			/* segment base address (overwritten by APM) */
1169	0xfffff,		/* length */
1170	SDT_MEMRWA,		/* segment type */
1171	0,			/* segment descriptor priority level */
1172	1,			/* segment descriptor present */
1173	0, 0,
1174	1,			/* default 32 vs 16 bit size */
1175	1  			/* limit granularity (byte/page units)*/ },
1176};
1177
1178static struct soft_segment_descriptor ldt_segs[] = {
1179	/* Null Descriptor - overwritten by call gate */
1180{	0x0,			/* segment base address  */
1181	0x0,			/* length - all address space */
1182	0,			/* segment type */
1183	0,			/* segment descriptor priority level */
1184	0,			/* segment descriptor present */
1185	0, 0,
1186	0,			/* default 32 vs 16 bit size */
1187	0  			/* limit granularity (byte/page units)*/ },
1188	/* Null Descriptor - overwritten by call gate */
1189{	0x0,			/* segment base address  */
1190	0x0,			/* length - all address space */
1191	0,			/* segment type */
1192	0,			/* segment descriptor priority level */
1193	0,			/* segment descriptor present */
1194	0, 0,
1195	0,			/* default 32 vs 16 bit size */
1196	0  			/* limit granularity (byte/page units)*/ },
1197	/* Null Descriptor - overwritten by call gate */
1198{	0x0,			/* segment base address  */
1199	0x0,			/* length - all address space */
1200	0,			/* segment type */
1201	0,			/* segment descriptor priority level */
1202	0,			/* segment descriptor present */
1203	0, 0,
1204	0,			/* default 32 vs 16 bit size */
1205	0  			/* limit granularity (byte/page units)*/ },
1206	/* Code Descriptor for user */
1207{	0x0,			/* segment base address  */
1208	0xfffff,		/* length - all address space */
1209	SDT_MEMERA,		/* segment type */
1210	SEL_UPL,		/* segment descriptor priority level */
1211	1,			/* segment descriptor present */
1212	0, 0,
1213	1,			/* default 32 vs 16 bit size */
1214	1  			/* limit granularity (byte/page units)*/ },
1215	/* Data Descriptor for user */
1216{	0x0,			/* segment base address  */
1217	0xfffff,		/* length - all address space */
1218	SDT_MEMRWA,		/* segment type */
1219	SEL_UPL,		/* segment descriptor priority level */
1220	1,			/* segment descriptor present */
1221	0, 0,
1222	1,			/* default 32 vs 16 bit size */
1223	1  			/* limit granularity (byte/page units)*/ },
1224};
1225
1226void
1227setidt(idx, func, typ, dpl, selec)
1228	int idx;
1229	inthand_t *func;
1230	int typ;
1231	int dpl;
1232	int selec;
1233{
1234	struct gate_descriptor *ip = idt + idx;
1235
1236	ip->gd_looffset = (int)func;
1237	ip->gd_selector = selec;
1238	ip->gd_stkcpy = 0;
1239	ip->gd_xx = 0;
1240	ip->gd_type = typ;
1241	ip->gd_dpl = dpl;
1242	ip->gd_p = 1;
1243	ip->gd_hioffset = ((int)func)>>16 ;
1244}
1245
1246#define	IDTVEC(name)	__CONCAT(X,name)
1247
1248extern inthand_t
1249	IDTVEC(div), IDTVEC(dbg), IDTVEC(nmi), IDTVEC(bpt), IDTVEC(ofl),
1250	IDTVEC(bnd), IDTVEC(ill), IDTVEC(dna), IDTVEC(fpusegm),
1251	IDTVEC(tss), IDTVEC(missing), IDTVEC(stk), IDTVEC(prot),
1252	IDTVEC(page), IDTVEC(rsvd), IDTVEC(fpu), IDTVEC(align),
1253	IDTVEC(syscall);
1254
1255#if defined(COMPAT_LINUX) || defined(LINUX)
1256extern inthand_t
1257	IDTVEC(linux_syscall);
1258#endif
1259
1260void
1261sdtossd(sd, ssd)
1262	struct segment_descriptor *sd;
1263	struct soft_segment_descriptor *ssd;
1264{
1265	ssd->ssd_base  = (sd->sd_hibase << 24) | sd->sd_lobase;
1266	ssd->ssd_limit = (sd->sd_hilimit << 16) | sd->sd_lolimit;
1267	ssd->ssd_type  = sd->sd_type;
1268	ssd->ssd_dpl   = sd->sd_dpl;
1269	ssd->ssd_p     = sd->sd_p;
1270	ssd->ssd_def32 = sd->sd_def32;
1271	ssd->ssd_gran  = sd->sd_gran;
1272}
1273
1274void
1275init386(first)
1276	int first;
1277{
1278	int x;
1279	unsigned biosbasemem, biosextmem;
1280	struct gate_descriptor *gdp;
1281	int gsel_tss;
1282	/* table descriptors - used to load tables by microp */
1283	struct region_descriptor r_gdt, r_idt;
1284	int	pagesinbase, pagesinext;
1285	int	target_page, pa_indx;
1286
1287	proc0.p_addr = proc0paddr;
1288
1289	/*
1290	 * Initialize the console before we print anything out.
1291	 */
1292	cninit();
1293
1294	/*
1295	 * make gdt memory segments, the code segment goes up to end of the
1296	 * page with etext in it, the data segment goes to the end of
1297	 * the address space
1298	 */
1299	/*
1300	 * XXX text protection is temporarily (?) disabled.  The limit was
1301	 * i386_btop(i386_round_page(etext)) - 1.
1302	 */
1303	gdt_segs[GCODE_SEL].ssd_limit = i386_btop(0) - 1;
1304	gdt_segs[GDATA_SEL].ssd_limit = i386_btop(0) - 1;
1305	for (x = 0; x < NGDT; x++)
1306		ssdtosd(&gdt_segs[x], &gdt[x].sd);
1307
1308	/* make ldt memory segments */
1309	/*
1310	 * The data segment limit must not cover the user area because we
1311	 * don't want the user area to be writable in copyout() etc. (page
1312	 * level protection is lost in kernel mode on 386's).  Also, we
1313	 * don't want the user area to be writable directly (page level
1314	 * protection of the user area is not available on 486's with
1315	 * CR0_WP set, because there is no user-read/kernel-write mode).
1316	 *
1317	 * XXX - VM_MAXUSER_ADDRESS is an end address, not a max.  And it
1318	 * should be spelled ...MAX_USER...
1319	 */
1320#define VM_END_USER_RW_ADDRESS	VM_MAXUSER_ADDRESS
1321	/*
1322	 * The code segment limit has to cover the user area until we move
1323	 * the signal trampoline out of the user area.  This is safe because
1324	 * the code segment cannot be written to directly.
1325	 */
1326#define VM_END_USER_R_ADDRESS	(VM_END_USER_RW_ADDRESS + UPAGES * NBPG)
1327	ldt_segs[LUCODE_SEL].ssd_limit = i386_btop(VM_END_USER_R_ADDRESS) - 1;
1328	ldt_segs[LUDATA_SEL].ssd_limit = i386_btop(VM_END_USER_RW_ADDRESS) - 1;
1329	/* Note. eventually want private ldts per process */
1330	for (x = 0; x < NLDT; x++)
1331		ssdtosd(&ldt_segs[x], &ldt[x].sd);
1332
1333	/* exceptions */
1334	for (x = 0; x < NIDT; x++)
1335		setidt(x, &IDTVEC(rsvd), SDT_SYS386TGT, SEL_KPL, GSEL(GCODE_SEL, SEL_KPL));
1336	setidt(0, &IDTVEC(div),  SDT_SYS386TGT, SEL_KPL, GSEL(GCODE_SEL, SEL_KPL));
1337	setidt(1, &IDTVEC(dbg),  SDT_SYS386TGT, SEL_KPL, GSEL(GCODE_SEL, SEL_KPL));
1338	setidt(2, &IDTVEC(nmi),  SDT_SYS386TGT, SEL_KPL, GSEL(GCODE_SEL, SEL_KPL));
1339 	setidt(3, &IDTVEC(bpt),  SDT_SYS386TGT, SEL_UPL, GSEL(GCODE_SEL, SEL_KPL));
1340	setidt(4, &IDTVEC(ofl),  SDT_SYS386TGT, SEL_UPL, GSEL(GCODE_SEL, SEL_KPL));
1341	setidt(5, &IDTVEC(bnd),  SDT_SYS386TGT, SEL_KPL, GSEL(GCODE_SEL, SEL_KPL));
1342	setidt(6, &IDTVEC(ill),  SDT_SYS386TGT, SEL_KPL, GSEL(GCODE_SEL, SEL_KPL));
1343	setidt(7, &IDTVEC(dna),  SDT_SYS386TGT, SEL_KPL, GSEL(GCODE_SEL, SEL_KPL));
1344	setidt(8, 0,  SDT_SYSTASKGT, SEL_KPL, GSEL(GPANIC_SEL, SEL_KPL));
1345	setidt(9, &IDTVEC(fpusegm),  SDT_SYS386TGT, SEL_KPL, GSEL(GCODE_SEL, SEL_KPL));
1346	setidt(10, &IDTVEC(tss),  SDT_SYS386TGT, SEL_KPL, GSEL(GCODE_SEL, SEL_KPL));
1347	setidt(11, &IDTVEC(missing),  SDT_SYS386TGT, SEL_KPL, GSEL(GCODE_SEL, SEL_KPL));
1348	setidt(12, &IDTVEC(stk),  SDT_SYS386TGT, SEL_KPL, GSEL(GCODE_SEL, SEL_KPL));
1349	setidt(13, &IDTVEC(prot),  SDT_SYS386TGT, SEL_KPL, GSEL(GCODE_SEL, SEL_KPL));
1350	setidt(14, &IDTVEC(page),  SDT_SYS386TGT, SEL_KPL, GSEL(GCODE_SEL, SEL_KPL));
1351	setidt(15, &IDTVEC(rsvd),  SDT_SYS386TGT, SEL_KPL, GSEL(GCODE_SEL, SEL_KPL));
1352	setidt(16, &IDTVEC(fpu),  SDT_SYS386TGT, SEL_KPL, GSEL(GCODE_SEL, SEL_KPL));
1353	setidt(17, &IDTVEC(align), SDT_SYS386TGT, SEL_KPL, GSEL(GCODE_SEL, SEL_KPL));
1354#if defined(COMPAT_LINUX) || defined(LINUX)
1355 	setidt(0x80, &IDTVEC(linux_syscall),  SDT_SYS386TGT, SEL_UPL, GSEL(GCODE_SEL, SEL_KPL));
1356#endif
1357
1358#include	"isa.h"
1359#if	NISA >0
1360	isa_defaultirq();
1361#endif
1362	rand_initialize();
1363
1364	r_gdt.rd_limit = sizeof(gdt) - 1;
1365	r_gdt.rd_base =  (int) gdt;
1366	lgdt(&r_gdt);
1367
1368	r_idt.rd_limit = sizeof(idt) - 1;
1369	r_idt.rd_base = (int) idt;
1370	lidt(&r_idt);
1371
1372	_default_ldt = GSEL(GLDT_SEL, SEL_KPL);
1373	lldt(_default_ldt);
1374	currentldt = _default_ldt;
1375
1376#ifdef DDB
1377	kdb_init();
1378	if (boothowto & RB_KDB)
1379		Debugger("Boot flags requested debugger");
1380#endif
1381
1382	/* Use BIOS values stored in RTC CMOS RAM, since probing
1383	 * breaks certain 386 AT relics.
1384	 */
1385	biosbasemem = rtcin(RTC_BASELO)+ (rtcin(RTC_BASEHI)<<8);
1386	biosextmem = rtcin(RTC_EXTLO)+ (rtcin(RTC_EXTHI)<<8);
1387
1388	/*
1389	 * Print a warning if the official BIOS interface disagrees
1390	 * with the hackish interface used above.  Eventually only
1391	 * the official interface should be used.
1392	 */
1393	if (bootinfo.bi_memsizes_valid) {
1394		if (bootinfo.bi_basemem != biosbasemem)
1395			printf("BIOS basemem (%ldK) != RTC basemem (%dK)\n",
1396			       bootinfo.bi_basemem, biosbasemem);
1397		if (bootinfo.bi_extmem != biosextmem)
1398			printf("BIOS extmem (%ldK) != RTC extmem (%dK)\n",
1399			       bootinfo.bi_extmem, biosextmem);
1400	}
1401
1402	/*
1403	 * If BIOS tells us that it has more than 640k in the basemem,
1404	 *	don't believe it - set it to 640k.
1405	 */
1406	if (biosbasemem > 640)
1407		biosbasemem = 640;
1408
1409	/*
1410	 * Some 386 machines might give us a bogus number for extended
1411	 *	mem. If this happens, stop now.
1412	 */
1413#ifndef LARGEMEM
1414	if (biosextmem > 65536) {
1415		panic("extended memory beyond limit of 64MB");
1416		/* NOTREACHED */
1417	}
1418#endif
1419
1420	pagesinbase = biosbasemem * 1024 / NBPG;
1421	pagesinext = biosextmem * 1024 / NBPG;
1422
1423	/*
1424	 * Special hack for chipsets that still remap the 384k hole when
1425	 *	there's 16MB of memory - this really confuses people that
1426	 *	are trying to use bus mastering ISA controllers with the
1427	 *	"16MB limit"; they only have 16MB, but the remapping puts
1428	 *	them beyond the limit.
1429	 */
1430	/*
1431	 * If extended memory is between 15-16MB (16-17MB phys address range),
1432	 *	chop it to 15MB.
1433	 */
1434	if ((pagesinext > 3840) && (pagesinext < 4096))
1435		pagesinext = 3840;
1436
1437	/*
1438	 * Maxmem isn't the "maximum memory", it's one larger than the
1439	 * highest page of of the physical address space. It
1440	 */
1441	Maxmem = pagesinext + 0x100000/PAGE_SIZE;
1442
1443#ifdef MAXMEM
1444	Maxmem = MAXMEM/4;
1445#endif
1446
1447	/* call pmap initialization to make new kernel address space */
1448	pmap_bootstrap (first, 0);
1449
1450	/*
1451	 * Size up each available chunk of physical memory.
1452	 */
1453
1454	/*
1455	 * We currently don't bother testing base memory.
1456	 * XXX  ...but we probably should.
1457	 */
1458	pa_indx = 0;
1459	badpages = 0;
1460	if (pagesinbase > 1) {
1461		phys_avail[pa_indx++] = PAGE_SIZE;	/* skip first page of memory */
1462		phys_avail[pa_indx] = ptoa(pagesinbase);/* memory up to the ISA hole */
1463		physmem = pagesinbase - 1;
1464	} else {
1465		/* point at first chunk end */
1466		pa_indx++;
1467	}
1468
1469	for (target_page = avail_start; target_page < ptoa(Maxmem); target_page += PAGE_SIZE) {
1470		int tmp, page_bad = FALSE;
1471
1472		/*
1473		 * map page into kernel: valid, read/write, non-cacheable
1474		 */
1475		*(int *)CMAP1 = PG_V | PG_KW | PG_N | target_page;
1476		pmap_update();
1477
1478		tmp = *(int *)CADDR1;
1479		/*
1480		 * Test for alternating 1's and 0's
1481		 */
1482		*(int *)CADDR1 = 0xaaaaaaaa;
1483		if (*(int *)CADDR1 != 0xaaaaaaaa) {
1484			page_bad = TRUE;
1485		}
1486		/*
1487		 * Test for alternating 0's and 1's
1488		 */
1489		*(int *)CADDR1 = 0x55555555;
1490		if (*(int *)CADDR1 != 0x55555555) {
1491			page_bad = TRUE;
1492		}
1493		/*
1494		 * Test for all 1's
1495		 */
1496		*(int *)CADDR1 = 0xffffffff;
1497		if (*(int *)CADDR1 != 0xffffffff) {
1498			page_bad = TRUE;
1499		}
1500		/*
1501		 * Test for all 0's
1502		 */
1503		*(int *)CADDR1 = 0x0;
1504		if (*(int *)CADDR1 != 0x0) {
1505			/*
1506			 * test of page failed
1507			 */
1508			page_bad = TRUE;
1509		}
1510		/*
1511		 * Restore original value.
1512		 */
1513		*(int *)CADDR1 = tmp;
1514
1515		/*
1516		 * Adjust array of valid/good pages.
1517		 */
1518		if (page_bad == FALSE) {
1519			/*
1520			 * If this good page is a continuation of the
1521			 * previous set of good pages, then just increase
1522			 * the end pointer. Otherwise start a new chunk.
1523			 * Note that "end" points one higher than end,
1524			 * making the range >= start and < end.
1525			 */
1526			if (phys_avail[pa_indx] == target_page) {
1527				phys_avail[pa_indx] += PAGE_SIZE;
1528			} else {
1529				pa_indx++;
1530				if (pa_indx == PHYS_AVAIL_ARRAY_END) {
1531					printf("Too many holes in the physical address space, giving up\n");
1532					pa_indx--;
1533					break;
1534				}
1535				phys_avail[pa_indx++] = target_page;	/* start */
1536				phys_avail[pa_indx] = target_page + PAGE_SIZE;	/* end */
1537			}
1538			physmem++;
1539		} else {
1540			badpages++;
1541			page_bad = FALSE;
1542		}
1543	}
1544
1545	*(int *)CMAP1 = 0;
1546	pmap_update();
1547
1548	/*
1549	 * XXX
1550	 * The last chunk must contain at least one page plus the message
1551	 * buffer to avoid complicating other code (message buffer address
1552	 * calculation, etc.).
1553	 */
1554	while (phys_avail[pa_indx - 1] + PAGE_SIZE +
1555	    round_page(sizeof(struct msgbuf)) >= phys_avail[pa_indx]) {
1556		physmem -= atop(phys_avail[pa_indx] - phys_avail[pa_indx - 1]);
1557		phys_avail[pa_indx--] = 0;
1558		phys_avail[pa_indx--] = 0;
1559	}
1560
1561	Maxmem = atop(phys_avail[pa_indx]);
1562
1563	/* Trim off space for the message buffer. */
1564	phys_avail[pa_indx] -= round_page(sizeof(struct msgbuf));
1565
1566	avail_end = phys_avail[pa_indx];
1567
1568	/* now running on new page tables, configured,and u/iom is accessible */
1569
1570	/* make a initial tss so microp can get interrupt stack on syscall! */
1571	proc0.p_addr->u_pcb.pcb_tss.tss_esp0 = (int) kstack + UPAGES*NBPG;
1572	proc0.p_addr->u_pcb.pcb_tss.tss_ss0 = GSEL(GDATA_SEL, SEL_KPL) ;
1573	gsel_tss = GSEL(GPROC0_SEL, SEL_KPL);
1574
1575	dblfault_tss.tss_esp = dblfault_tss.tss_esp0 = dblfault_tss.tss_esp1 =
1576	    dblfault_tss.tss_esp2 = (int) &dblfault_stack[sizeof(dblfault_stack)];
1577	dblfault_tss.tss_ss = dblfault_tss.tss_ss0 = dblfault_tss.tss_ss1 =
1578	    dblfault_tss.tss_ss2 = GSEL(GDATA_SEL, SEL_KPL);
1579	dblfault_tss.tss_cr3 = IdlePTD;
1580	dblfault_tss.tss_eip = (int) dblfault_handler;
1581	dblfault_tss.tss_eflags = PSL_KERNEL;
1582	dblfault_tss.tss_ds = dblfault_tss.tss_es = dblfault_tss.tss_fs = dblfault_tss.tss_gs =
1583		GSEL(GDATA_SEL, SEL_KPL);
1584	dblfault_tss.tss_cs = GSEL(GCODE_SEL, SEL_KPL);
1585	dblfault_tss.tss_ldt = GSEL(GLDT_SEL, SEL_KPL);
1586
1587	((struct i386tss *)gdt_segs[GPROC0_SEL].ssd_base)->tss_ioopt =
1588		(sizeof(struct i386tss))<<16;
1589
1590	ltr(gsel_tss);
1591
1592	/* make a call gate to reenter kernel with */
1593	gdp = &ldt[LSYS5CALLS_SEL].gd;
1594
1595	x = (int) &IDTVEC(syscall);
1596	gdp->gd_looffset = x++;
1597	gdp->gd_selector = GSEL(GCODE_SEL,SEL_KPL);
1598	gdp->gd_stkcpy = 1;
1599	gdp->gd_type = SDT_SYS386CGT;
1600	gdp->gd_dpl = SEL_UPL;
1601	gdp->gd_p = 1;
1602	gdp->gd_hioffset = ((int) &IDTVEC(syscall)) >>16;
1603
1604	/* transfer to user mode */
1605
1606	_ucodesel = LSEL(LUCODE_SEL, SEL_UPL);
1607	_udatasel = LSEL(LUDATA_SEL, SEL_UPL);
1608
1609	/* setup proc 0's pcb */
1610	bcopy(&sigcode, proc0.p_addr->u_pcb.pcb_sigc, szsigcode);
1611	proc0.p_addr->u_pcb.pcb_flags = 0;
1612	proc0.p_addr->u_pcb.pcb_ptd = IdlePTD;
1613}
1614
1615/*
1616 * The registers are in the frame; the frame is in the user area of
1617 * the process in question; when the process is active, the registers
1618 * are in "the kernel stack"; when it's not, they're still there, but
1619 * things get flipped around.  So, since p->p_md.md_regs is the whole address
1620 * of the register set, take its offset from the kernel stack, and
1621 * index into the user block.  Don't you just *love* virtual memory?
1622 * (I'm starting to think seymour is right...)
1623 */
1624#define	TF_REGP(p)	((struct trapframe *) \
1625			 ((char *)(p)->p_addr \
1626			  + ((char *)(p)->p_md.md_regs - kstack)))
1627
1628int
1629ptrace_set_pc(p, addr)
1630	struct proc *p;
1631	unsigned int addr;
1632{
1633	TF_REGP(p)->tf_eip = addr;
1634	return (0);
1635}
1636
1637int
1638ptrace_single_step(p)
1639	struct proc *p;
1640{
1641	TF_REGP(p)->tf_eflags |= PSL_T;
1642	return (0);
1643}
1644
1645int ptrace_write_u(p, off, data)
1646	struct proc *p;
1647	vm_offset_t off;
1648	int data;
1649{
1650	struct trapframe frame_copy;
1651	vm_offset_t min;
1652	struct trapframe *tp;
1653
1654	/*
1655	 * Privileged kernel state is scattered all over the user area.
1656	 * Only allow write access to parts of regs and to fpregs.
1657	 */
1658	min = (char *)p->p_md.md_regs - kstack;
1659	if (off >= min && off <= min + sizeof(struct trapframe) - sizeof(int)) {
1660		tp = TF_REGP(p);
1661		frame_copy = *tp;
1662		*(int *)((char *)&frame_copy + (off - min)) = data;
1663		if (!EFLAGS_SECURE(frame_copy.tf_eflags, tp->tf_eflags) ||
1664		    !CS_SECURE(frame_copy.tf_cs))
1665			return (EINVAL);
1666		*(int*)((char *)p->p_addr + off) = data;
1667		return (0);
1668	}
1669	min = offsetof(struct user, u_pcb) + offsetof(struct pcb, pcb_savefpu);
1670	if (off >= min && off <= min + sizeof(struct save87) - sizeof(int)) {
1671		*(int*)((char *)p->p_addr + off) = data;
1672		return (0);
1673	}
1674	return (EFAULT);
1675}
1676
1677int
1678fill_regs(p, regs)
1679	struct proc *p;
1680	struct reg *regs;
1681{
1682	struct trapframe *tp;
1683
1684	tp = TF_REGP(p);
1685	regs->r_es = tp->tf_es;
1686	regs->r_ds = tp->tf_ds;
1687	regs->r_edi = tp->tf_edi;
1688	regs->r_esi = tp->tf_esi;
1689	regs->r_ebp = tp->tf_ebp;
1690	regs->r_ebx = tp->tf_ebx;
1691	regs->r_edx = tp->tf_edx;
1692	regs->r_ecx = tp->tf_ecx;
1693	regs->r_eax = tp->tf_eax;
1694	regs->r_eip = tp->tf_eip;
1695	regs->r_cs = tp->tf_cs;
1696	regs->r_eflags = tp->tf_eflags;
1697	regs->r_esp = tp->tf_esp;
1698	regs->r_ss = tp->tf_ss;
1699	return (0);
1700}
1701
1702int
1703set_regs(p, regs)
1704	struct proc *p;
1705	struct reg *regs;
1706{
1707	struct trapframe *tp;
1708
1709	tp = TF_REGP(p);
1710	if (!EFLAGS_SECURE(regs->r_eflags, tp->tf_eflags) ||
1711	    !CS_SECURE(regs->r_cs))
1712		return (EINVAL);
1713	tp->tf_es = regs->r_es;
1714	tp->tf_ds = regs->r_ds;
1715	tp->tf_edi = regs->r_edi;
1716	tp->tf_esi = regs->r_esi;
1717	tp->tf_ebp = regs->r_ebp;
1718	tp->tf_ebx = regs->r_ebx;
1719	tp->tf_edx = regs->r_edx;
1720	tp->tf_ecx = regs->r_ecx;
1721	tp->tf_eax = regs->r_eax;
1722	tp->tf_eip = regs->r_eip;
1723	tp->tf_cs = regs->r_cs;
1724	tp->tf_eflags = regs->r_eflags;
1725	tp->tf_esp = regs->r_esp;
1726	tp->tf_ss = regs->r_ss;
1727	return (0);
1728}
1729
1730#ifndef DDB
1731void
1732Debugger(const char *msg)
1733{
1734	printf("Debugger(\"%s\") called.\n", msg);
1735}
1736#endif /* no DDB */
1737
1738#include <sys/disklabel.h>
1739#define b_cylin	b_resid
1740/*
1741 * Determine the size of the transfer, and make sure it is
1742 * within the boundaries of the partition. Adjust transfer
1743 * if needed, and signal errors or early completion.
1744 */
1745int
1746bounds_check_with_label(struct buf *bp, struct disklabel *lp, int wlabel)
1747{
1748        struct partition *p = lp->d_partitions + dkpart(bp->b_dev);
1749        int labelsect = lp->d_partitions[0].p_offset;
1750        int maxsz = p->p_size,
1751                sz = (bp->b_bcount + DEV_BSIZE - 1) >> DEV_BSHIFT;
1752
1753        /* overwriting disk label ? */
1754        /* XXX should also protect bootstrap in first 8K */
1755        if (bp->b_blkno + p->p_offset <= LABELSECTOR + labelsect &&
1756#if LABELSECTOR != 0
1757            bp->b_blkno + p->p_offset + sz > LABELSECTOR + labelsect &&
1758#endif
1759            (bp->b_flags & B_READ) == 0 && wlabel == 0) {
1760                bp->b_error = EROFS;
1761                goto bad;
1762        }
1763
1764#if     defined(DOSBBSECTOR) && defined(notyet)
1765        /* overwriting master boot record? */
1766        if (bp->b_blkno + p->p_offset <= DOSBBSECTOR &&
1767            (bp->b_flags & B_READ) == 0 && wlabel == 0) {
1768                bp->b_error = EROFS;
1769                goto bad;
1770        }
1771#endif
1772
1773        /* beyond partition? */
1774        if (bp->b_blkno < 0 || bp->b_blkno + sz > maxsz) {
1775                /* if exactly at end of disk, return an EOF */
1776                if (bp->b_blkno == maxsz) {
1777                        bp->b_resid = bp->b_bcount;
1778                        return(0);
1779                }
1780                /* or truncate if part of it fits */
1781                sz = maxsz - bp->b_blkno;
1782                if (sz <= 0) {
1783                        bp->b_error = EINVAL;
1784                        goto bad;
1785                }
1786                bp->b_bcount = sz << DEV_BSHIFT;
1787        }
1788
1789        /* calculate cylinder for disksort to order transfers with */
1790        bp->b_pblkno = bp->b_blkno + p->p_offset;
1791        bp->b_cylin = bp->b_pblkno / lp->d_secpercyl;
1792        return(1);
1793
1794bad:
1795        bp->b_flags |= B_ERROR;
1796        return(-1);
1797}
1798
1799int
1800disk_externalize(int drive, struct sysctl_req *req)
1801{
1802	return SYSCTL_OUT(req, &drive, sizeof drive);
1803}
1804