machdep.c revision 12186
1/*-
2 * Copyright (c) 1992 Terrence R. Lambert.
3 * Copyright (c) 1982, 1987, 1990 The Regents of the University of California.
4 * All rights reserved.
5 *
6 * This code is derived from software contributed to Berkeley by
7 * William Jolitz.
8 *
9 * Redistribution and use in source and binary forms, with or without
10 * modification, are permitted provided that the following conditions
11 * are met:
12 * 1. Redistributions of source code must retain the above copyright
13 *    notice, this list of conditions and the following disclaimer.
14 * 2. Redistributions in binary form must reproduce the above copyright
15 *    notice, this list of conditions and the following disclaimer in the
16 *    documentation and/or other materials provided with the distribution.
17 * 3. All advertising materials mentioning features or use of this software
18 *    must display the following acknowledgement:
19 *	This product includes software developed by the University of
20 *	California, Berkeley and its contributors.
21 * 4. Neither the name of the University nor the names of its contributors
22 *    may be used to endorse or promote products derived from this software
23 *    without specific prior written permission.
24 *
25 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
26 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
27 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
28 * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
29 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
30 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
31 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
32 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
33 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
34 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
35 * SUCH DAMAGE.
36 *
37 *	from: @(#)machdep.c	7.4 (Berkeley) 6/3/91
38 *	$Id: machdep.c,v 1.148 1995/11/04 16:00:22 markm Exp $
39 */
40
41#include "npx.h"
42#include "isa.h"
43
44#include <sys/param.h>
45#include <sys/systm.h>
46#include <sys/sysproto.h>
47#include <sys/signalvar.h>
48#include <sys/kernel.h>
49#include <sys/proc.h>
50#include <sys/user.h>
51#include <sys/buf.h>
52#include <sys/reboot.h>
53#include <sys/conf.h>
54#include <sys/file.h>
55#include <sys/callout.h>
56#include <sys/malloc.h>
57#include <sys/mbuf.h>
58#include <sys/mount.h>
59#include <sys/msgbuf.h>
60#include <sys/ioctl.h>
61#include <sys/sysent.h>
62#include <sys/tty.h>
63#include <sys/sysctl.h>
64#include <sys/devconf.h>
65
66#ifdef SYSVSHM
67#include <sys/shm.h>
68#endif
69
70#ifdef SYSVMSG
71#include <sys/msg.h>
72#endif
73
74#ifdef SYSVSEM
75#include <sys/sem.h>
76#endif
77
78#include <vm/vm.h>
79#include <vm/vm_kern.h>
80#include <vm/vm_page.h>
81#include <vm/vm_pager.h>
82
83#include <sys/exec.h>
84#include <sys/vnode.h>
85
86#include <ddb/ddb.h>
87
88#include <net/netisr.h>
89
90/* XXX correctly declaring all the netisr's is painful. */
91#include <net/if.h>
92#include <net/route.h>
93
94#include <netinet/in.h>
95#include <netinet/in_systm.h>
96#include <netinet/ip.h>
97#include <netinet/if_ether.h>
98#include <netinet/ip_var.h>
99
100#include <netns/ns.h>
101#include <netns/ns_if.h>
102
103#include <netiso/iso.h>
104#include <netiso/iso_var.h>
105
106#include <netccitt/dll.h>
107#include <netccitt/x25.h>
108#include <netccitt/pk.h>
109#include <sys/socketvar.h>
110#include <netccitt/pk_var.h>
111
112#include "ether.h"
113
114#include <machine/cpu.h>
115#include <machine/npx.h>
116#include <machine/reg.h>
117#include <machine/psl.h>
118#include <machine/clock.h>
119#include <machine/specialreg.h>
120#include <machine/sysarch.h>
121#include <machine/cons.h>
122#include <machine/devconf.h>
123#include <machine/bootinfo.h>
124#include <machine/md_var.h>
125
126#include <i386/isa/isa.h>
127#include <i386/isa/isa_device.h>
128#include <i386/isa/rtc.h>
129#include <machine/random.h>
130
131extern void diediedie __P((void));
132extern void init386 __P((int first));
133extern int ptrace_set_pc __P((struct proc *p, unsigned int addr));
134extern int ptrace_single_step __P((struct proc *p));
135extern int ptrace_getregs __P((struct proc *p, unsigned int *addr));
136extern int ptrace_setregs __P((struct proc *p, unsigned int *addr));
137extern int ptrace_write_u __P((struct proc *p, vm_offset_t off, int data));
138
139static void cpu_startup __P((void *));
140SYSINIT(cpu, SI_SUB_CPU, SI_ORDER_FIRST, cpu_startup, NULL)
141
142static void identifycpu(void);
143
144char machine[] = "i386";
145char cpu_model[128];
146
147struct kern_devconf kdc_cpu0 = {
148	0, 0, 0,		/* filled in by dev_attach */
149	"cpu", 0, { MDDT_CPU },
150	0, 0, 0, CPU_EXTERNALLEN,
151	0,			/* CPU has no parent */
152	0,			/* no parentdata */
153	DC_BUSY,		/* the CPU is always busy */
154	cpu_model,		/* no sense in duplication */
155	DC_CLS_CPU		/* class */
156};
157
158#ifndef PANIC_REBOOT_WAIT_TIME
159#define PANIC_REBOOT_WAIT_TIME 15 /* default to 15 seconds */
160#endif
161
162#ifdef BOUNCE_BUFFERS
163extern char *bouncememory;
164extern int maxbkva;
165#ifdef BOUNCEPAGES
166int	bouncepages = BOUNCEPAGES;
167#else
168int	bouncepages = 0;
169#endif
170#endif	/* BOUNCE_BUFFERS */
171
172extern int freebufspace;
173int	msgbufmapped = 0;		/* set when safe to use msgbuf */
174int _udatasel, _ucodesel;
175
176
177/*
178 * Machine-dependent startup code
179 */
180int boothowto = 0, bootverbose = 0, Maxmem = 0, badpages = 0, physmem = 0;
181long dumplo;
182extern int bootdev;
183int biosmem;
184
185vm_offset_t phys_avail[10];
186
187/* must be 2 less so 0 0 can signal end of chunks */
188#define PHYS_AVAIL_ARRAY_END ((sizeof(phys_avail) / sizeof(vm_offset_t)) - 2)
189
190int cpu_class;
191
192void dumpsys __P((void));
193void setup_netisrs __P((struct linker_set *)); /* XXX declare elsewhere */
194
195vm_offset_t buffer_sva, buffer_eva;
196vm_offset_t clean_sva, clean_eva;
197vm_offset_t pager_sva, pager_eva;
198extern struct linker_set netisr_set;
199
200#define offsetof(type, member)	((size_t)(&((type *)0)->member))
201
202static void
203cpu_startup(dummy)
204	void *dummy;
205{
206	register unsigned i;
207	register caddr_t v;
208	vm_offset_t maxaddr;
209	vm_size_t size = 0;
210	int firstaddr, indx;
211	vm_offset_t minaddr;
212
213	if (boothowto & RB_VERBOSE)
214		bootverbose++;
215
216	/*
217	 * Initialize error message buffer (at end of core).
218	 */
219
220	/* avail_end was pre-decremented in init_386() to compensate */
221	for (i = 0; i < btoc(sizeof (struct msgbuf)); i++)
222		pmap_enter(pmap_kernel(), (vm_offset_t)msgbufp,
223			   avail_end + i * NBPG,
224			   VM_PROT_ALL, TRUE);
225	msgbufmapped = 1;
226
227	/*
228	 * Good {morning,afternoon,evening,night}.
229	 */
230	printf(version);
231	startrtclock();
232	identifycpu();
233	printf("real memory  = %d (%dK bytes)\n", ptoa(Maxmem), ptoa(Maxmem) / 1024);
234	/*
235	 * Display any holes after the first chunk of extended memory.
236	 */
237	if (badpages != 0) {
238		int indx = 1;
239
240		/*
241		 * XXX skip reporting ISA hole & unmanaged kernel memory
242		 */
243		if (phys_avail[0] == PAGE_SIZE)
244			indx += 2;
245
246		printf("Physical memory hole(s):\n");
247		for (; phys_avail[indx + 1] != 0; indx += 2) {
248			int size = phys_avail[indx + 1] - phys_avail[indx];
249
250			printf("0x%08x - 0x%08x, %d bytes (%d pages)\n", phys_avail[indx],
251			    phys_avail[indx + 1] - 1, size, size / PAGE_SIZE);
252		}
253	}
254
255	/*
256	 * Quickly wire in netisrs.
257	 */
258	setup_netisrs(&netisr_set);
259
260/*
261#ifdef ISDN
262	DONET(isdnintr, NETISR_ISDN);
263#endif
264*/
265
266	/*
267	 * Allocate space for system data structures.
268	 * The first available kernel virtual address is in "v".
269	 * As pages of kernel virtual memory are allocated, "v" is incremented.
270	 * As pages of memory are allocated and cleared,
271	 * "firstaddr" is incremented.
272	 * An index into the kernel page table corresponding to the
273	 * virtual memory address maintained in "v" is kept in "mapaddr".
274	 */
275
276	/*
277	 * Make two passes.  The first pass calculates how much memory is
278	 * needed and allocates it.  The second pass assigns virtual
279	 * addresses to the various data structures.
280	 */
281	firstaddr = 0;
282again:
283	v = (caddr_t)firstaddr;
284
285#define	valloc(name, type, num) \
286	    (name) = (type *)v; v = (caddr_t)((name)+(num))
287#define	valloclim(name, type, num, lim) \
288	    (name) = (type *)v; v = (caddr_t)((lim) = ((name)+(num)))
289	valloc(callout, struct callout, ncallout);
290#ifdef SYSVSHM
291	valloc(shmsegs, struct shmid_ds, shminfo.shmmni);
292#endif
293#ifdef SYSVSEM
294	valloc(sema, struct semid_ds, seminfo.semmni);
295	valloc(sem, struct sem, seminfo.semmns);
296	/* This is pretty disgusting! */
297	valloc(semu, int, (seminfo.semmnu * seminfo.semusz) / sizeof(int));
298#endif
299#ifdef SYSVMSG
300	valloc(msgpool, char, msginfo.msgmax);
301	valloc(msgmaps, struct msgmap, msginfo.msgseg);
302	valloc(msghdrs, struct msg, msginfo.msgtql);
303	valloc(msqids, struct msqid_ds, msginfo.msgmni);
304#endif
305
306	if (nbuf == 0) {
307		nbuf = 30;
308		if( physmem > 1024)
309			nbuf += min((physmem - 1024) / 12, 1024);
310	}
311	nswbuf = min(nbuf, 128);
312
313	valloc(swbuf, struct buf, nswbuf);
314	valloc(buf, struct buf, nbuf);
315
316#ifdef BOUNCE_BUFFERS
317	/*
318	 * If there is more than 16MB of memory, allocate some bounce buffers
319	 */
320	if (Maxmem > 4096) {
321		if (bouncepages == 0) {
322			bouncepages = 64;
323			bouncepages += ((Maxmem - 4096) / 2048) * 32;
324		}
325		v = (caddr_t)((vm_offset_t)((vm_offset_t)v + PAGE_SIZE - 1) & ~(PAGE_SIZE - 1));
326		valloc(bouncememory, char, bouncepages * PAGE_SIZE);
327	}
328#endif
329
330	/*
331	 * End of first pass, size has been calculated so allocate memory
332	 */
333	if (firstaddr == 0) {
334		size = (vm_size_t)(v - firstaddr);
335		firstaddr = (int)kmem_alloc(kernel_map, round_page(size));
336		if (firstaddr == 0)
337			panic("startup: no room for tables");
338		goto again;
339	}
340
341	/*
342	 * End of second pass, addresses have been assigned
343	 */
344	if ((vm_size_t)(v - firstaddr) != size)
345		panic("startup: table size inconsistency");
346
347#ifdef BOUNCE_BUFFERS
348	clean_map = kmem_suballoc(kernel_map, &clean_sva, &clean_eva,
349			(nbuf*MAXBSIZE) + (nswbuf*MAXPHYS) +
350				maxbkva + pager_map_size, TRUE);
351	io_map = kmem_suballoc(clean_map, &minaddr, &maxaddr, maxbkva, FALSE);
352#else
353	clean_map = kmem_suballoc(kernel_map, &clean_sva, &clean_eva,
354			(nbuf*MAXBSIZE) + (nswbuf*MAXPHYS) + pager_map_size, TRUE);
355#endif
356	buffer_map = kmem_suballoc(clean_map, &buffer_sva, &buffer_eva,
357				(nbuf*MAXBSIZE), TRUE);
358	pager_map = kmem_suballoc(clean_map, &pager_sva, &pager_eva,
359				(nswbuf*MAXPHYS) + pager_map_size, TRUE);
360	exec_map = kmem_suballoc(kernel_map, &minaddr, &maxaddr,
361				(16*ARG_MAX), TRUE);
362	u_map = kmem_suballoc(kernel_map, &minaddr, &maxaddr,
363				(maxproc*UPAGES*PAGE_SIZE), FALSE);
364
365	/*
366	 * Finally, allocate mbuf pool.  Since mclrefcnt is an off-size
367	 * we use the more space efficient malloc in place of kmem_alloc.
368	 */
369	mclrefcnt = (char *)malloc(nmbclusters+CLBYTES/MCLBYTES,
370				   M_MBUF, M_NOWAIT);
371	bzero(mclrefcnt, nmbclusters+CLBYTES/MCLBYTES);
372	mb_map = kmem_suballoc(kmem_map, (vm_offset_t *)&mbutl, &maxaddr,
373			       nmbclusters * MCLBYTES, FALSE);
374	/*
375	 * Initialize callouts
376	 */
377	callfree = callout;
378	for (i = 1; i < ncallout; i++)
379		callout[i-1].c_next = &callout[i];
380
381        if (boothowto & RB_CONFIG) {
382		userconfig();
383		cninit();	/* the preferred console may have changed */
384	}
385
386#ifdef BOUNCE_BUFFERS
387	/*
388	 * init bounce buffers
389	 */
390	vm_bounce_init();
391#endif
392	/*
393	 * XXX allocate a contiguous area for ISA (non busmaster) DMA
394	 * operations. This _should_ only be done if the DMA channels
395	 * will actually be used, but for now we do it always.
396	 */
397#define DMAPAGES 8
398	isaphysmem =
399	    vm_page_alloc_contig(DMAPAGES * PAGE_SIZE, 0, 0xfffffful, 64*1024);
400
401	printf("avail memory = %d (%dK bytes)\n", ptoa(cnt.v_free_count),
402	    ptoa(cnt.v_free_count) / 1024);
403
404	/*
405	 * Set up buffers, so they can be used to read disk labels.
406	 */
407	bufinit();
408	vm_pager_bufferinit();
409
410	/*
411	 * In verbose mode, print out the BIOS's idea of the disk geometries.
412	 */
413	if (bootverbose) {
414		printf("BIOS Geometries:\n");
415		for (i = 0; i < N_BIOS_GEOM; i++) {
416			unsigned long bios_geom;
417			int max_cylinder, max_head, max_sector;
418
419			bios_geom = bootinfo.bi_bios_geom[i];
420
421			/*
422			 * XXX the bootstrap punts a 1200K floppy geometry
423			 * when the get-disk-geometry interrupt fails.  Skip
424			 * drives that have this geometry.
425			 */
426			if (bios_geom == 0x4f010f)
427				continue;
428
429			printf(" %x:%08x ", i, bios_geom);
430			max_cylinder = bios_geom >> 16;
431			max_head = (bios_geom >> 8) & 0xff;
432			max_sector = bios_geom & 0xff;
433			printf(
434		"0..%d=%d cylinders, 0..%d=%d heads, 1..%d=%d sectors\n",
435			       max_cylinder, max_cylinder + 1,
436			       max_head, max_head + 1,
437			       max_sector, max_sector);
438		}
439		printf(" %d accounted for\n", bootinfo.bi_n_bios_used);
440	}
441}
442
443int
444register_netisr(num, handler)
445	int num;
446	netisr_t *handler;
447{
448
449	if (num < 0 || num >= (sizeof(netisrs)/sizeof(*netisrs)) ) {
450		printf("register_netisr: bad isr number: %d\n", num);
451		return (EINVAL);
452	}
453	netisrs[num] = handler;
454	return (0);
455}
456
457void
458setup_netisrs(ls)
459	struct linker_set *ls;
460{
461	int i;
462	const struct netisrtab *nit;
463
464	for(i = 0; ls->ls_items[i]; i++) {
465		nit = (const struct netisrtab *)ls->ls_items[i];
466		register_netisr(nit->nit_num, nit->nit_isr);
467	}
468}
469
470struct cpu_nameclass i386_cpus[] = {
471	{ "Intel 80286",	CPUCLASS_286 },		/* CPU_286   */
472	{ "i386SX",		CPUCLASS_386 },		/* CPU_386SX */
473	{ "i386DX",		CPUCLASS_386 },		/* CPU_386   */
474	{ "i486SX",		CPUCLASS_486 },		/* CPU_486SX */
475	{ "i486DX",		CPUCLASS_486 },		/* CPU_486   */
476	{ "Pentium",		CPUCLASS_586 },		/* CPU_586   */
477	{ "Cy486DLC",		CPUCLASS_486 },		/* CPU_486DLC */
478};
479
480static void
481identifycpu()
482{
483	printf("CPU: ");
484	if (cpu >= 0
485	    && cpu < (sizeof i386_cpus/sizeof(struct cpu_nameclass))) {
486		cpu_class = i386_cpus[cpu].cpu_class;
487		strncpy(cpu_model, i386_cpus[cpu].cpu_name, sizeof cpu_model);
488	} else {
489		printf("unknown cpu type %d\n", cpu);
490		panic("startup: bad cpu id");
491	}
492
493#if defined(I586_CPU)
494	if(cpu_class == CPUCLASS_586) {
495		calibrate_cyclecounter();
496		printf("%d-MHz ", pentium_mhz);
497	}
498#endif
499#if defined(I486_CPU) || defined(I586_CPU)
500	if (!strcmp(cpu_vendor,"GenuineIntel")) {
501		if ((cpu_id & 0xf00) > 3) {
502			cpu_model[0] = '\0';
503
504			switch (cpu_id & 0x3000) {
505			case 0x1000:
506				strcpy(cpu_model, "Overdrive ");
507				break;
508			case 0x2000:
509				strcpy(cpu_model, "Dual ");
510				break;
511			}
512			if ((cpu_id & 0xf00) == 0x400) {
513				strcat(cpu_model, "i486 ");
514#if defined(I586_CPU)
515			} else if ((cpu_id & 0xf00) == 0x500) {
516				strcat(cpu_model, "Pentium ");
517#endif
518			} else {
519				strcat(cpu_model, "unknown ");
520			}
521
522			switch (cpu_id & 0xff0) {
523			case 0x400:
524				strcat(cpu_model, "DX"); break;
525			case 0x410:
526				strcat(cpu_model, "DX"); break;
527			case 0x420:
528				strcat(cpu_model, "SX"); break;
529			case 0x430:
530				strcat(cpu_model, "DX2"); break;
531			case 0x440:
532				strcat(cpu_model, "SL"); break;
533			case 0x450:
534				strcat(cpu_model, "SX2"); break;
535			case 0x470:
536				strcat(cpu_model, "DX2 Write-Back Enhanced");
537				break;
538			case 0x480:
539				strcat(cpu_model, "DX4"); break;
540#if defined(I586_CPU)
541			case 0x510:
542				if (pentium_mhz == 60) {
543					strcat(cpu_model, "510\\60");
544				} else if (pentium_mhz == 66) {
545					strcat(cpu_model, "567\\66");
546				} else {
547					strcat(cpu_model,"510\\60 or 567\\66");
548				}
549				break;
550			case 0x520:
551				if (pentium_mhz == 90) {
552					strcat(cpu_model, "735\\90");
553				} else if (pentium_mhz == 100) {
554					strcat(cpu_model, "815\\100");
555				} else {
556					strcat(cpu_model,"735\\90 or 815\\100");
557				}
558				break;
559#endif
560			}
561		}
562	}
563#endif
564	printf("%s (", cpu_model);
565	switch(cpu_class) {
566	case CPUCLASS_286:
567		printf("286");
568		break;
569#if defined(I386_CPU)
570	case CPUCLASS_386:
571		printf("386");
572		break;
573#endif
574#if defined(I486_CPU)
575	case CPUCLASS_486:
576		printf("486");
577		break;
578#endif
579#if defined(I586_CPU)
580	case CPUCLASS_586:
581		printf("Pentium");
582		break;
583#endif
584	default:
585		printf("unknown");	/* will panic below... */
586	}
587	printf("-class CPU)\n");
588#if defined(I486_CPU) || defined(I586_CPU)
589	if(*cpu_vendor)
590		printf("  Origin = \"%s\"",cpu_vendor);
591	if(cpu_id)
592		printf("  Id = 0x%lx",cpu_id);
593
594	if (!strcmp(cpu_vendor, "GenuineIntel")) {
595		printf("  Stepping=%ld", cpu_id & 0xf);
596		if (cpu_high > 0) {
597#define FEATUREFMT "\020\001FPU\002VME\003PSE\004MCE\005CX8\006APIC"
598			printf("\n  Features=0x%b", cpu_feature, FEATUREFMT);
599		}
600	}
601	/* Avoid ugly blank lines: only print newline when we have to. */
602	if (*cpu_vendor || cpu_id)
603		printf("\n");
604#endif
605	/*
606	 * Now that we have told the user what they have,
607	 * let them know if that machine type isn't configured.
608	 */
609	switch (cpu_class) {
610	case CPUCLASS_286:	/* a 286 should not make it this far, anyway */
611#if !defined(I386_CPU) && !defined(I486_CPU) && !defined(I586_CPU)
612#error This kernel is not configured for one of the supported CPUs
613#endif
614#if !defined(I386_CPU)
615	case CPUCLASS_386:
616#endif
617#if !defined(I486_CPU)
618	case CPUCLASS_486:
619#endif
620#if !defined(I586_CPU)
621	case CPUCLASS_586:
622#endif
623		panic("CPU class not configured");
624	default:
625		break;
626	}
627	dev_attach(&kdc_cpu0);
628}
629
630/*
631 * Send an interrupt to process.
632 *
633 * Stack is set up to allow sigcode stored
634 * in u. to call routine, followed by kcall
635 * to sigreturn routine below.  After sigreturn
636 * resets the signal mask, the stack, and the
637 * frame pointer, it returns to the user
638 * specified pc, psl.
639 */
640void
641sendsig(catcher, sig, mask, code)
642	sig_t catcher;
643	int sig, mask;
644	unsigned code;
645{
646	register struct proc *p = curproc;
647	register int *regs;
648	register struct sigframe *fp;
649	struct sigframe sf;
650	struct sigacts *psp = p->p_sigacts;
651	int oonstack;
652
653	regs = p->p_md.md_regs;
654        oonstack = psp->ps_sigstk.ss_flags & SA_ONSTACK;
655	/*
656	 * Allocate and validate space for the signal handler
657	 * context. Note that if the stack is in P0 space, the
658	 * call to grow() is a nop, and the useracc() check
659	 * will fail if the process has not already allocated
660	 * the space with a `brk'.
661	 */
662        if ((psp->ps_flags & SAS_ALTSTACK) &&
663	    (psp->ps_sigstk.ss_flags & SA_ONSTACK) == 0 &&
664	    (psp->ps_sigonstack & sigmask(sig))) {
665		fp = (struct sigframe *)(psp->ps_sigstk.ss_sp +
666		    psp->ps_sigstk.ss_size - sizeof(struct sigframe));
667		psp->ps_sigstk.ss_flags |= SA_ONSTACK;
668	} else {
669		fp = (struct sigframe *)(regs[tESP]
670			- sizeof(struct sigframe));
671	}
672
673	/*
674	 * grow() will return FALSE if the fp will not fit inside the stack
675	 *	and the stack can not be grown. useracc will return FALSE
676	 *	if access is denied.
677	 */
678	if ((grow(p, (int)fp) == FALSE) ||
679	    (useracc((caddr_t)fp, sizeof (struct sigframe), B_WRITE) == FALSE)) {
680		/*
681		 * Process has trashed its stack; give it an illegal
682		 * instruction to halt it in its tracks.
683		 */
684		SIGACTION(p, SIGILL) = SIG_DFL;
685		sig = sigmask(SIGILL);
686		p->p_sigignore &= ~sig;
687		p->p_sigcatch &= ~sig;
688		p->p_sigmask &= ~sig;
689		psignal(p, SIGILL);
690		return;
691	}
692
693	/*
694	 * Build the argument list for the signal handler.
695	 */
696	if (p->p_sysent->sv_sigtbl) {
697		if (sig < p->p_sysent->sv_sigsize)
698			sig = p->p_sysent->sv_sigtbl[sig];
699		else
700			sig = p->p_sysent->sv_sigsize + 1;
701	}
702	sf.sf_signum = sig;
703	sf.sf_code = code;
704	sf.sf_scp = &fp->sf_sc;
705	sf.sf_addr = (char *) regs[tERR];
706	sf.sf_handler = catcher;
707
708	/* save scratch registers */
709	sf.sf_sc.sc_eax = regs[tEAX];
710	sf.sf_sc.sc_ebx = regs[tEBX];
711	sf.sf_sc.sc_ecx = regs[tECX];
712	sf.sf_sc.sc_edx = regs[tEDX];
713	sf.sf_sc.sc_esi = regs[tESI];
714	sf.sf_sc.sc_edi = regs[tEDI];
715	sf.sf_sc.sc_cs = regs[tCS];
716	sf.sf_sc.sc_ds = regs[tDS];
717	sf.sf_sc.sc_ss = regs[tSS];
718	sf.sf_sc.sc_es = regs[tES];
719	sf.sf_sc.sc_isp = regs[tISP];
720
721	/*
722	 * Build the signal context to be used by sigreturn.
723	 */
724	sf.sf_sc.sc_onstack = oonstack;
725	sf.sf_sc.sc_mask = mask;
726	sf.sf_sc.sc_sp = regs[tESP];
727	sf.sf_sc.sc_fp = regs[tEBP];
728	sf.sf_sc.sc_pc = regs[tEIP];
729	sf.sf_sc.sc_ps = regs[tEFLAGS];
730
731	/*
732	 * Copy the sigframe out to the user's stack.
733	 */
734	if (copyout(&sf, fp, sizeof(struct sigframe)) != 0) {
735		/*
736		 * Something is wrong with the stack pointer.
737		 * ...Kill the process.
738		 */
739		sigexit(p, SIGILL);
740	};
741
742	regs[tESP] = (int)fp;
743	regs[tEIP] = (int)((struct pcb *)kstack)->pcb_sigc;
744	regs[tEFLAGS] &= ~PSL_VM;
745	regs[tCS] = _ucodesel;
746	regs[tDS] = _udatasel;
747	regs[tES] = _udatasel;
748	regs[tSS] = _udatasel;
749}
750
751/*
752 * System call to cleanup state after a signal
753 * has been taken.  Reset signal mask and
754 * stack state from context left by sendsig (above).
755 * Return to previous pc and psl as specified by
756 * context left by sendsig. Check carefully to
757 * make sure that the user has not modified the
758 * state to gain improper privileges.
759 */
760int
761sigreturn(p, uap, retval)
762	struct proc *p;
763	struct sigreturn_args /* {
764		struct sigcontext *sigcntxp;
765	} */ *uap;
766	int *retval;
767{
768	register struct sigcontext *scp;
769	register struct sigframe *fp;
770	register int *regs = p->p_md.md_regs;
771	int eflags;
772
773	/*
774	 * (XXX old comment) regs[tESP] points to the return address.
775	 * The user scp pointer is above that.
776	 * The return address is faked in the signal trampoline code
777	 * for consistency.
778	 */
779	scp = uap->sigcntxp;
780	fp = (struct sigframe *)
781	     ((caddr_t)scp - offsetof(struct sigframe, sf_sc));
782
783	if (useracc((caddr_t)fp, sizeof (*fp), 0) == 0)
784		return(EINVAL);
785
786	/*
787	 * Don't allow users to change privileged or reserved flags.
788	 */
789#define	EFLAGS_SECURE(ef, oef)	((((ef) ^ (oef)) & ~PSL_USERCHANGE) == 0)
790	eflags = scp->sc_ps;
791	/*
792	 * XXX do allow users to change the privileged flag PSL_RF.  The
793	 * cpu sets PSL_RF in tf_eflags for faults.  Debuggers should
794	 * sometimes set it there too.  tf_eflags is kept in the signal
795	 * context during signal handling and there is no other place
796	 * to remember it, so the PSL_RF bit may be corrupted by the
797	 * signal handler without us knowing.  Corruption of the PSL_RF
798	 * bit at worst causes one more or one less debugger trap, so
799	 * allowing it is fairly harmless.
800	 */
801	if (!EFLAGS_SECURE(eflags & ~PSL_RF, regs[tEFLAGS] & ~PSL_RF)) {
802#ifdef DEBUG
803    		printf("sigreturn: eflags = 0x%x\n", eflags);
804#endif
805    		return(EINVAL);
806	}
807
808	/*
809	 * Don't allow users to load a valid privileged %cs.  Let the
810	 * hardware check for invalid selectors, excess privilege in
811	 * other selectors, invalid %eip's and invalid %esp's.
812	 */
813#define	CS_SECURE(cs)	(ISPL(cs) == SEL_UPL)
814	if (!CS_SECURE(scp->sc_cs)) {
815#ifdef DEBUG
816    		printf("sigreturn: cs = 0x%x\n", scp->sc_cs);
817#endif
818		trapsignal(p, SIGBUS, T_PROTFLT);
819		return(EINVAL);
820	}
821
822	/* restore scratch registers */
823	regs[tEAX] = scp->sc_eax;
824	regs[tEBX] = scp->sc_ebx;
825	regs[tECX] = scp->sc_ecx;
826	regs[tEDX] = scp->sc_edx;
827	regs[tESI] = scp->sc_esi;
828	regs[tEDI] = scp->sc_edi;
829	regs[tCS] = scp->sc_cs;
830	regs[tDS] = scp->sc_ds;
831	regs[tES] = scp->sc_es;
832	regs[tSS] = scp->sc_ss;
833	regs[tISP] = scp->sc_isp;
834
835	if (useracc((caddr_t)scp, sizeof (*scp), 0) == 0)
836		return(EINVAL);
837
838	if (scp->sc_onstack & 01)
839		p->p_sigacts->ps_sigstk.ss_flags |= SA_ONSTACK;
840	else
841		p->p_sigacts->ps_sigstk.ss_flags &= ~SA_ONSTACK;
842	p->p_sigmask = scp->sc_mask &~
843	    (sigmask(SIGKILL)|sigmask(SIGCONT)|sigmask(SIGSTOP));
844	regs[tEBP] = scp->sc_fp;
845	regs[tESP] = scp->sc_sp;
846	regs[tEIP] = scp->sc_pc;
847	regs[tEFLAGS] = eflags;
848	return(EJUSTRETURN);
849}
850
851/*
852 * a simple function to make the system panic (and dump a vmcore)
853 * in a predictable fashion
854 */
855void diediedie()
856{
857	panic("because you said to!");
858}
859
860int	waittime = -1;
861struct pcb dumppcb;
862
863__dead void
864boot(howto)
865	int howto;
866{
867	if (!cold && (howto & RB_NOSYNC) == 0 && waittime < 0) {
868		register struct buf *bp;
869		int iter, nbusy;
870
871		waittime = 0;
872		printf("\nsyncing disks... ");
873
874		sync(&proc0, NULL, NULL);
875
876		for (iter = 0; iter < 20; iter++) {
877			nbusy = 0;
878			for (bp = &buf[nbuf]; --bp >= buf; ) {
879				if ((bp->b_flags & (B_BUSY | B_INVAL)) == B_BUSY) {
880					nbusy++;
881				}
882			}
883			if (nbusy == 0)
884				break;
885			printf("%d ", nbusy);
886			DELAY(40000 * iter);
887		}
888		if (nbusy) {
889			/*
890			 * Failed to sync all blocks. Indicate this and don't
891			 * unmount filesystems (thus forcing an fsck on reboot).
892			 */
893			printf("giving up\n");
894#ifdef SHOW_BUSYBUFS
895			nbusy = 0;
896			for (bp = &buf[nbuf]; --bp >= buf; ) {
897				if ((bp->b_flags & (B_BUSY | B_INVAL)) == B_BUSY) {
898					nbusy++;
899					printf("%d: dev:%08x, flags:%08x, blkno:%d, lblkno:%d\n", nbusy, bp->b_dev, bp->b_flags, bp->b_blkno, bp->b_lblkno);
900				}
901			}
902			DELAY(5000000);	/* 5 seconds */
903#endif
904		} else {
905			printf("done\n");
906			/*
907			 * Unmount filesystems
908			 */
909			if (panicstr == 0)
910				vfs_unmountall();
911		}
912		DELAY(100000);			/* wait for console output to finish */
913		dev_shutdownall(FALSE);
914	}
915	splhigh();
916	if (howto & RB_HALT) {
917		printf("\n");
918		printf("The operating system has halted.\n");
919		printf("Please press any key to reboot.\n\n");
920		cngetc();
921	} else {
922		if (howto & RB_DUMP) {
923			if (!cold) {
924				savectx(&dumppcb, 0);
925				dumppcb.pcb_ptd = rcr3();
926				dumpsys();
927			}
928
929			if (PANIC_REBOOT_WAIT_TIME != 0) {
930				if (PANIC_REBOOT_WAIT_TIME != -1) {
931					int loop;
932					printf("Automatic reboot in %d seconds - press a key on the console to abort\n",
933						PANIC_REBOOT_WAIT_TIME);
934					for (loop = PANIC_REBOOT_WAIT_TIME * 10; loop > 0; --loop) {
935						DELAY(1000 * 100); /* 1/10th second */
936						if (cncheckc()) /* Did user type a key? */
937							break;
938					}
939					if (!loop)
940						goto die;
941				}
942			} else { /* zero time specified - reboot NOW */
943				goto die;
944			}
945			printf("--> Press a key on the console to reboot <--\n");
946			cngetc();
947		}
948	}
949die:
950	printf("Rebooting...\n");
951	DELAY(1000000);	/* wait 1 sec for printf's to complete and be read */
952	cpu_reset();
953	for(;;) ;
954	/* NOTREACHED */
955}
956
957unsigned long	dumpmag = 0x8fca0101UL;	/* magic number for savecore */
958int		dumpsize = 0;		/* also for savecore */
959
960int		dodump = 1;
961
962/*
963 * Doadump comes here after turning off memory management and
964 * getting on the dump stack, either when called above, or by
965 * the auto-restart code.
966 */
967void
968dumpsys()
969{
970
971	if (!dodump)
972		return;
973	if (dumpdev == NODEV)
974		return;
975	if ((minor(dumpdev)&07) != 1)
976		return;
977	dumpsize = Maxmem;
978	printf("\ndumping to dev %lx, offset %ld\n", dumpdev, dumplo);
979	printf("dump ");
980	switch ((*bdevsw[major(dumpdev)].d_dump)(dumpdev)) {
981
982	case ENXIO:
983		printf("device bad\n");
984		break;
985
986	case EFAULT:
987		printf("device not ready\n");
988		break;
989
990	case EINVAL:
991		printf("area improper\n");
992		break;
993
994	case EIO:
995		printf("i/o error\n");
996		break;
997
998	case EINTR:
999		printf("aborted from console\n");
1000		break;
1001
1002	default:
1003		printf("succeeded\n");
1004		break;
1005	}
1006}
1007
1008/*
1009 * Clear registers on exec
1010 */
1011void
1012setregs(p, entry, stack)
1013	struct proc *p;
1014	u_long entry;
1015	u_long stack;
1016{
1017	int *regs = p->p_md.md_regs;
1018
1019	bzero(regs, sizeof(struct trapframe));
1020	regs[tEIP] = entry;
1021	regs[tESP] = stack;
1022	regs[tEFLAGS] = PSL_USER | (regs[tEFLAGS] & PSL_T);
1023	regs[tSS] = _udatasel;
1024	regs[tDS] = _udatasel;
1025	regs[tES] = _udatasel;
1026	regs[tCS] = _ucodesel;
1027
1028	p->p_addr->u_pcb.pcb_flags = 0;	/* no fp at all */
1029	load_cr0(rcr0() | CR0_TS);	/* start emulating */
1030#if	NNPX > 0
1031	npxinit(__INITIAL_NPXCW__);
1032#endif	/* NNPX > 0 */
1033}
1034
1035static int
1036sysctl_machdep_adjkerntz SYSCTL_HANDLER_ARGS
1037{
1038	int error;
1039	error = sysctl_handle_int(oidp, oidp->oid_arg1, oidp->oid_arg2,
1040		oldp, oldlenp, newp, newlen);
1041	if (!error && newp)
1042		resettodr();
1043	return (error);
1044}
1045
1046SYSCTL_PROC(_machdep, CPU_ADJKERNTZ, adjkerntz,
1047	CTLTYPE_INT|CTLFLAG_RW, &adjkerntz, 0, sysctl_machdep_adjkerntz, "");
1048
1049static int
1050sysctl_machdep_consdev SYSCTL_HANDLER_ARGS
1051{
1052	dev_t consdev;
1053	consdev = (cn_tty == NULL ? NODEV : cn_tty->t_dev);
1054	return (sysctl_handle_opaque(oidp, &consdev, sizeof consdev,
1055		oldp, oldlenp, newp, newlen));
1056}
1057
1058SYSCTL_PROC(_machdep, CPU_CONSDEV, consdev,
1059	CTLTYPE_OPAQUE|CTLFLAG_RD, 0, 0, sysctl_machdep_consdev, "");
1060
1061SYSCTL_INT(_machdep, CPU_DISRTCSET, disable_rtc_set,
1062	CTLFLAG_RW, &disable_rtc_set, 0, "");
1063
1064SYSCTL_STRUCT(_machdep, CPU_BOOTINFO, bootinfo,
1065	CTLFLAG_RD, &bootinfo, bootinfo, "");
1066
1067/*
1068 * Initialize 386 and configure to run kernel
1069 */
1070
1071/*
1072 * Initialize segments & interrupt table
1073 */
1074
1075int currentldt;
1076int _default_ldt;
1077union descriptor gdt[NGDT];		/* global descriptor table */
1078struct gate_descriptor idt[NIDT];	/* interrupt descriptor table */
1079union descriptor ldt[NLDT];		/* local descriptor table */
1080
1081struct	i386tss	tss, panic_tss;
1082
1083extern  struct user *proc0paddr;
1084
1085/* software prototypes -- in more palatable form */
1086struct soft_segment_descriptor gdt_segs[] = {
1087/* GNULL_SEL	0 Null Descriptor */
1088{	0x0,			/* segment base address  */
1089	0x0,			/* length */
1090	0,			/* segment type */
1091	0,			/* segment descriptor priority level */
1092	0,			/* segment descriptor present */
1093	0, 0,
1094	0,			/* default 32 vs 16 bit size */
1095	0  			/* limit granularity (byte/page units)*/ },
1096/* GCODE_SEL	1 Code Descriptor for kernel */
1097{	0x0,			/* segment base address  */
1098	0xfffff,		/* length - all address space */
1099	SDT_MEMERA,		/* segment type */
1100	0,			/* segment descriptor priority level */
1101	1,			/* segment descriptor present */
1102	0, 0,
1103	1,			/* default 32 vs 16 bit size */
1104	1  			/* limit granularity (byte/page units)*/ },
1105/* GDATA_SEL	2 Data Descriptor for kernel */
1106{	0x0,			/* segment base address  */
1107	0xfffff,		/* length - all address space */
1108	SDT_MEMRWA,		/* segment type */
1109	0,			/* segment descriptor priority level */
1110	1,			/* segment descriptor present */
1111	0, 0,
1112	1,			/* default 32 vs 16 bit size */
1113	1  			/* limit granularity (byte/page units)*/ },
1114/* GLDT_SEL	3 LDT Descriptor */
1115{	(int) ldt,		/* segment base address  */
1116	sizeof(ldt)-1,		/* length - all address space */
1117	SDT_SYSLDT,		/* segment type */
1118	0,			/* segment descriptor priority level */
1119	1,			/* segment descriptor present */
1120	0, 0,
1121	0,			/* unused - default 32 vs 16 bit size */
1122	0  			/* limit granularity (byte/page units)*/ },
1123/* GTGATE_SEL	4 Null Descriptor - Placeholder */
1124{	0x0,			/* segment base address  */
1125	0x0,			/* length - all address space */
1126	0,			/* segment type */
1127	0,			/* segment descriptor priority level */
1128	0,			/* segment descriptor present */
1129	0, 0,
1130	0,			/* default 32 vs 16 bit size */
1131	0  			/* limit granularity (byte/page units)*/ },
1132/* GPANIC_SEL	5 Panic Tss Descriptor */
1133{	(int) &panic_tss,	/* segment base address  */
1134	sizeof(tss)-1,		/* length - all address space */
1135	SDT_SYS386TSS,		/* segment type */
1136	0,			/* segment descriptor priority level */
1137	1,			/* segment descriptor present */
1138	0, 0,
1139	0,			/* unused - default 32 vs 16 bit size */
1140	0  			/* limit granularity (byte/page units)*/ },
1141/* GPROC0_SEL	6 Proc 0 Tss Descriptor */
1142{	(int) kstack,		/* segment base address  */
1143	sizeof(tss)-1,		/* length - all address space */
1144	SDT_SYS386TSS,		/* segment type */
1145	0,			/* segment descriptor priority level */
1146	1,			/* segment descriptor present */
1147	0, 0,
1148	0,			/* unused - default 32 vs 16 bit size */
1149	0  			/* limit granularity (byte/page units)*/ },
1150/* GUSERLDT_SEL	7 User LDT Descriptor per process */
1151{	(int) ldt,		/* segment base address  */
1152	(512 * sizeof(union descriptor)-1),		/* length */
1153	SDT_SYSLDT,		/* segment type */
1154	0,			/* segment descriptor priority level */
1155	1,			/* segment descriptor present */
1156	0, 0,
1157	0,			/* unused - default 32 vs 16 bit size */
1158	0  			/* limit granularity (byte/page units)*/ },
1159/* GAPMCODE32_SEL 8 APM BIOS 32-bit interface (32bit Code) */
1160{	0,			/* segment base address (overwritten by APM)  */
1161	0xfffff,		/* length */
1162	SDT_MEMERA,		/* segment type */
1163	0,			/* segment descriptor priority level */
1164	1,			/* segment descriptor present */
1165	0, 0,
1166	1,			/* default 32 vs 16 bit size */
1167	1  			/* limit granularity (byte/page units)*/ },
1168/* GAPMCODE16_SEL 9 APM BIOS 32-bit interface (16bit Code) */
1169{	0,			/* segment base address (overwritten by APM)  */
1170	0xfffff,		/* length */
1171	SDT_MEMERA,		/* segment type */
1172	0,			/* segment descriptor priority level */
1173	1,			/* segment descriptor present */
1174	0, 0,
1175	0,			/* default 32 vs 16 bit size */
1176	1  			/* limit granularity (byte/page units)*/ },
1177/* GAPMDATA_SEL	10 APM BIOS 32-bit interface (Data) */
1178{	0,			/* segment base address (overwritten by APM) */
1179	0xfffff,		/* length */
1180	SDT_MEMRWA,		/* segment type */
1181	0,			/* segment descriptor priority level */
1182	1,			/* segment descriptor present */
1183	0, 0,
1184	1,			/* default 32 vs 16 bit size */
1185	1  			/* limit granularity (byte/page units)*/ },
1186};
1187
1188struct soft_segment_descriptor ldt_segs[] = {
1189	/* Null Descriptor - overwritten by call gate */
1190{	0x0,			/* segment base address  */
1191	0x0,			/* length - all address space */
1192	0,			/* segment type */
1193	0,			/* segment descriptor priority level */
1194	0,			/* segment descriptor present */
1195	0, 0,
1196	0,			/* default 32 vs 16 bit size */
1197	0  			/* limit granularity (byte/page units)*/ },
1198	/* Null Descriptor - overwritten by call gate */
1199{	0x0,			/* segment base address  */
1200	0x0,			/* length - all address space */
1201	0,			/* segment type */
1202	0,			/* segment descriptor priority level */
1203	0,			/* segment descriptor present */
1204	0, 0,
1205	0,			/* default 32 vs 16 bit size */
1206	0  			/* limit granularity (byte/page units)*/ },
1207	/* Null Descriptor - overwritten by call gate */
1208{	0x0,			/* segment base address  */
1209	0x0,			/* length - all address space */
1210	0,			/* segment type */
1211	0,			/* segment descriptor priority level */
1212	0,			/* segment descriptor present */
1213	0, 0,
1214	0,			/* default 32 vs 16 bit size */
1215	0  			/* limit granularity (byte/page units)*/ },
1216	/* Code Descriptor for user */
1217{	0x0,			/* segment base address  */
1218	0xfffff,		/* length - all address space */
1219	SDT_MEMERA,		/* segment type */
1220	SEL_UPL,		/* segment descriptor priority level */
1221	1,			/* segment descriptor present */
1222	0, 0,
1223	1,			/* default 32 vs 16 bit size */
1224	1  			/* limit granularity (byte/page units)*/ },
1225	/* Data Descriptor for user */
1226{	0x0,			/* segment base address  */
1227	0xfffff,		/* length - all address space */
1228	SDT_MEMRWA,		/* segment type */
1229	SEL_UPL,		/* segment descriptor priority level */
1230	1,			/* segment descriptor present */
1231	0, 0,
1232	1,			/* default 32 vs 16 bit size */
1233	1  			/* limit granularity (byte/page units)*/ },
1234};
1235
1236void
1237setidt(idx, func, typ, dpl)
1238	int idx;
1239	inthand_t *func;
1240	int typ;
1241	int dpl;
1242{
1243	struct gate_descriptor *ip = idt + idx;
1244
1245	ip->gd_looffset = (int)func;
1246	ip->gd_selector = 8;
1247	ip->gd_stkcpy = 0;
1248	ip->gd_xx = 0;
1249	ip->gd_type = typ;
1250	ip->gd_dpl = dpl;
1251	ip->gd_p = 1;
1252	ip->gd_hioffset = ((int)func)>>16 ;
1253}
1254
1255#define	IDTVEC(name)	__CONCAT(X,name)
1256
1257extern inthand_t
1258	IDTVEC(div), IDTVEC(dbg), IDTVEC(nmi), IDTVEC(bpt), IDTVEC(ofl),
1259	IDTVEC(bnd), IDTVEC(ill), IDTVEC(dna), IDTVEC(dble), IDTVEC(fpusegm),
1260	IDTVEC(tss), IDTVEC(missing), IDTVEC(stk), IDTVEC(prot),
1261	IDTVEC(page), IDTVEC(rsvd), IDTVEC(fpu), IDTVEC(align),
1262	IDTVEC(syscall);
1263
1264#ifdef COMPAT_LINUX
1265extern inthand_t
1266	IDTVEC(linux_syscall);
1267#endif
1268
1269void
1270sdtossd(sd, ssd)
1271	struct segment_descriptor *sd;
1272	struct soft_segment_descriptor *ssd;
1273{
1274	ssd->ssd_base  = (sd->sd_hibase << 24) | sd->sd_lobase;
1275	ssd->ssd_limit = (sd->sd_hilimit << 16) | sd->sd_lolimit;
1276	ssd->ssd_type  = sd->sd_type;
1277	ssd->ssd_dpl   = sd->sd_dpl;
1278	ssd->ssd_p     = sd->sd_p;
1279	ssd->ssd_def32 = sd->sd_def32;
1280	ssd->ssd_gran  = sd->sd_gran;
1281}
1282
1283void
1284init386(first)
1285	int first;
1286{
1287	int x;
1288	unsigned biosbasemem, biosextmem;
1289	struct gate_descriptor *gdp;
1290	int gsel_tss;
1291	/* table descriptors - used to load tables by microp */
1292	struct region_descriptor r_gdt, r_idt;
1293	int	pagesinbase, pagesinext;
1294	int	target_page, pa_indx;
1295
1296	proc0.p_addr = proc0paddr;
1297
1298	/*
1299	 * Initialize the console before we print anything out.
1300	 */
1301	cninit();
1302
1303	/*
1304	 * make gdt memory segments, the code segment goes up to end of the
1305	 * page with etext in it, the data segment goes to the end of
1306	 * the address space
1307	 */
1308	/*
1309	 * XXX text protection is temporarily (?) disabled.  The limit was
1310	 * i386_btop(i386_round_page(etext)) - 1.
1311	 */
1312	gdt_segs[GCODE_SEL].ssd_limit = i386_btop(0) - 1;
1313	gdt_segs[GDATA_SEL].ssd_limit = i386_btop(0) - 1;
1314	for (x = 0; x < NGDT; x++)
1315		ssdtosd(&gdt_segs[x], &gdt[x].sd);
1316
1317	/* make ldt memory segments */
1318	/*
1319	 * The data segment limit must not cover the user area because we
1320	 * don't want the user area to be writable in copyout() etc. (page
1321	 * level protection is lost in kernel mode on 386's).  Also, we
1322	 * don't want the user area to be writable directly (page level
1323	 * protection of the user area is not available on 486's with
1324	 * CR0_WP set, because there is no user-read/kernel-write mode).
1325	 *
1326	 * XXX - VM_MAXUSER_ADDRESS is an end address, not a max.  And it
1327	 * should be spelled ...MAX_USER...
1328	 */
1329#define VM_END_USER_RW_ADDRESS	VM_MAXUSER_ADDRESS
1330	/*
1331	 * The code segment limit has to cover the user area until we move
1332	 * the signal trampoline out of the user area.  This is safe because
1333	 * the code segment cannot be written to directly.
1334	 */
1335#define VM_END_USER_R_ADDRESS	(VM_END_USER_RW_ADDRESS + UPAGES * NBPG)
1336	ldt_segs[LUCODE_SEL].ssd_limit = i386_btop(VM_END_USER_R_ADDRESS) - 1;
1337	ldt_segs[LUDATA_SEL].ssd_limit = i386_btop(VM_END_USER_RW_ADDRESS) - 1;
1338	/* Note. eventually want private ldts per process */
1339	for (x = 0; x < NLDT; x++)
1340		ssdtosd(&ldt_segs[x], &ldt[x].sd);
1341
1342	/* exceptions */
1343	for (x = 0; x < NIDT; x++)
1344		setidt(x, &IDTVEC(rsvd), SDT_SYS386TGT, SEL_KPL);
1345	setidt(0, &IDTVEC(div),  SDT_SYS386TGT, SEL_KPL);
1346	setidt(1, &IDTVEC(dbg),  SDT_SYS386TGT, SEL_KPL);
1347	setidt(2, &IDTVEC(nmi),  SDT_SYS386TGT, SEL_KPL);
1348 	setidt(3, &IDTVEC(bpt),  SDT_SYS386TGT, SEL_UPL);
1349	setidt(4, &IDTVEC(ofl),  SDT_SYS386TGT, SEL_UPL);
1350	setidt(5, &IDTVEC(bnd),  SDT_SYS386TGT, SEL_KPL);
1351	setidt(6, &IDTVEC(ill),  SDT_SYS386TGT, SEL_KPL);
1352	setidt(7, &IDTVEC(dna),  SDT_SYS386TGT, SEL_KPL);
1353	setidt(8, &IDTVEC(dble),  SDT_SYS386TGT, SEL_KPL);
1354	setidt(9, &IDTVEC(fpusegm),  SDT_SYS386TGT, SEL_KPL);
1355	setidt(10, &IDTVEC(tss),  SDT_SYS386TGT, SEL_KPL);
1356	setidt(11, &IDTVEC(missing),  SDT_SYS386TGT, SEL_KPL);
1357	setidt(12, &IDTVEC(stk),  SDT_SYS386TGT, SEL_KPL);
1358	setidt(13, &IDTVEC(prot),  SDT_SYS386TGT, SEL_KPL);
1359	setidt(14, &IDTVEC(page),  SDT_SYS386TGT, SEL_KPL);
1360	setidt(15, &IDTVEC(rsvd),  SDT_SYS386TGT, SEL_KPL);
1361	setidt(16, &IDTVEC(fpu),  SDT_SYS386TGT, SEL_KPL);
1362	setidt(17, &IDTVEC(align), SDT_SYS386TGT, SEL_KPL);
1363#ifdef COMPAT_LINUX
1364 	setidt(0x80, &IDTVEC(linux_syscall),  SDT_SYS386TGT, SEL_UPL);
1365#endif
1366
1367#include	"isa.h"
1368#if	NISA >0
1369	isa_defaultirq();
1370#endif
1371	rand_initialize();
1372
1373	r_gdt.rd_limit = sizeof(gdt) - 1;
1374	r_gdt.rd_base =  (int) gdt;
1375	lgdt(&r_gdt);
1376
1377	r_idt.rd_limit = sizeof(idt) - 1;
1378	r_idt.rd_base = (int) idt;
1379	lidt(&r_idt);
1380
1381	_default_ldt = GSEL(GLDT_SEL, SEL_KPL);
1382	lldt(_default_ldt);
1383	currentldt = _default_ldt;
1384
1385#ifdef DDB
1386	kdb_init();
1387	if (boothowto & RB_KDB)
1388		Debugger("Boot flags requested debugger");
1389#endif
1390
1391	/* Use BIOS values stored in RTC CMOS RAM, since probing
1392	 * breaks certain 386 AT relics.
1393	 */
1394	biosbasemem = rtcin(RTC_BASELO)+ (rtcin(RTC_BASEHI)<<8);
1395	biosextmem = rtcin(RTC_EXTLO)+ (rtcin(RTC_EXTHI)<<8);
1396
1397	/*
1398	 * Print a warning if the official BIOS interface disagrees
1399	 * with the hackish interface used above.  Eventually only
1400	 * the official interface should be used.
1401	 */
1402	if (bootinfo.bi_memsizes_valid) {
1403		if (bootinfo.bi_basemem != biosbasemem)
1404			printf("BIOS basemem (%ldK) != RTC basemem (%dK)\n",
1405			       bootinfo.bi_basemem, biosbasemem);
1406		if (bootinfo.bi_extmem != biosextmem)
1407			printf("BIOS extmem (%ldK) != RTC extmem (%dK)\n",
1408			       bootinfo.bi_extmem, biosextmem);
1409	}
1410
1411	/*
1412	 * If BIOS tells us that it has more than 640k in the basemem,
1413	 *	don't believe it - set it to 640k.
1414	 */
1415	if (biosbasemem > 640)
1416		biosbasemem = 640;
1417
1418	/*
1419	 * Some 386 machines might give us a bogus number for extended
1420	 *	mem. If this happens, stop now.
1421	 */
1422#ifndef LARGEMEM
1423	if (biosextmem > 65536) {
1424		panic("extended memory beyond limit of 64MB");
1425		/* NOTREACHED */
1426	}
1427#endif
1428
1429	pagesinbase = biosbasemem * 1024 / NBPG;
1430	pagesinext = biosextmem * 1024 / NBPG;
1431
1432	/*
1433	 * Special hack for chipsets that still remap the 384k hole when
1434	 *	there's 16MB of memory - this really confuses people that
1435	 *	are trying to use bus mastering ISA controllers with the
1436	 *	"16MB limit"; they only have 16MB, but the remapping puts
1437	 *	them beyond the limit.
1438	 */
1439	/*
1440	 * If extended memory is between 15-16MB (16-17MB phys address range),
1441	 *	chop it to 15MB.
1442	 */
1443	if ((pagesinext > 3840) && (pagesinext < 4096))
1444		pagesinext = 3840;
1445
1446	/*
1447	 * Maxmem isn't the "maximum memory", it's one larger than the
1448	 * highest page of of the physical address space. It
1449	 */
1450	Maxmem = pagesinext + 0x100000/PAGE_SIZE;
1451
1452#ifdef MAXMEM
1453	Maxmem = MAXMEM/4;
1454#endif
1455
1456	/* call pmap initialization to make new kernel address space */
1457	pmap_bootstrap (first, 0);
1458
1459	/*
1460	 * Size up each available chunk of physical memory.
1461	 */
1462
1463	/*
1464	 * We currently don't bother testing base memory.
1465	 * XXX  ...but we probably should.
1466	 */
1467	pa_indx = 0;
1468	badpages = 0;
1469	if (pagesinbase > 1) {
1470		phys_avail[pa_indx++] = PAGE_SIZE;	/* skip first page of memory */
1471		phys_avail[pa_indx] = ptoa(pagesinbase);/* memory up to the ISA hole */
1472		physmem = pagesinbase - 1;
1473	} else {
1474		/* point at first chunk end */
1475		pa_indx++;
1476	}
1477
1478	for (target_page = avail_start; target_page < ptoa(Maxmem); target_page += PAGE_SIZE) {
1479		int tmp, page_bad = FALSE;
1480
1481		/*
1482		 * map page into kernel: valid, read/write, non-cacheable
1483		 */
1484		*(int *)CMAP1 = PG_V | PG_KW | PG_N | target_page;
1485		pmap_update();
1486
1487		tmp = *(int *)CADDR1;
1488		/*
1489		 * Test for alternating 1's and 0's
1490		 */
1491		*(int *)CADDR1 = 0xaaaaaaaa;
1492		if (*(int *)CADDR1 != 0xaaaaaaaa) {
1493			page_bad = TRUE;
1494		}
1495		/*
1496		 * Test for alternating 0's and 1's
1497		 */
1498		*(int *)CADDR1 = 0x55555555;
1499		if (*(int *)CADDR1 != 0x55555555) {
1500			page_bad = TRUE;
1501		}
1502		/*
1503		 * Test for all 1's
1504		 */
1505		*(int *)CADDR1 = 0xffffffff;
1506		if (*(int *)CADDR1 != 0xffffffff) {
1507			page_bad = TRUE;
1508		}
1509		/*
1510		 * Test for all 0's
1511		 */
1512		*(int *)CADDR1 = 0x0;
1513		if (*(int *)CADDR1 != 0x0) {
1514			/*
1515			 * test of page failed
1516			 */
1517			page_bad = TRUE;
1518		}
1519		/*
1520		 * Restore original value.
1521		 */
1522		*(int *)CADDR1 = tmp;
1523
1524		/*
1525		 * Adjust array of valid/good pages.
1526		 */
1527		if (page_bad == FALSE) {
1528			/*
1529			 * If this good page is a continuation of the
1530			 * previous set of good pages, then just increase
1531			 * the end pointer. Otherwise start a new chunk.
1532			 * Note that "end" points one higher than end,
1533			 * making the range >= start and < end.
1534			 */
1535			if (phys_avail[pa_indx] == target_page) {
1536				phys_avail[pa_indx] += PAGE_SIZE;
1537			} else {
1538				pa_indx++;
1539				if (pa_indx == PHYS_AVAIL_ARRAY_END) {
1540					printf("Too many holes in the physical address space, giving up\n");
1541					pa_indx--;
1542					break;
1543				}
1544				phys_avail[pa_indx++] = target_page;	/* start */
1545				phys_avail[pa_indx] = target_page + PAGE_SIZE;	/* end */
1546			}
1547			physmem++;
1548		} else {
1549			badpages++;
1550			page_bad = FALSE;
1551		}
1552	}
1553
1554	*(int *)CMAP1 = 0;
1555	pmap_update();
1556
1557	/*
1558	 * XXX
1559	 * The last chunk must contain at least one page plus the message
1560	 * buffer to avoid complicating other code (message buffer address
1561	 * calculation, etc.).
1562	 */
1563	while (phys_avail[pa_indx - 1] + PAGE_SIZE +
1564	    round_page(sizeof(struct msgbuf)) >= phys_avail[pa_indx]) {
1565		physmem -= atop(phys_avail[pa_indx] - phys_avail[pa_indx - 1]);
1566		phys_avail[pa_indx--] = 0;
1567		phys_avail[pa_indx--] = 0;
1568	}
1569
1570	Maxmem = atop(phys_avail[pa_indx]);
1571
1572	/* Trim off space for the message buffer. */
1573	phys_avail[pa_indx] -= round_page(sizeof(struct msgbuf));
1574
1575	avail_end = phys_avail[pa_indx];
1576
1577	/* now running on new page tables, configured,and u/iom is accessible */
1578
1579	/* make a initial tss so microp can get interrupt stack on syscall! */
1580	proc0.p_addr->u_pcb.pcb_tss.tss_esp0 = (int) kstack + UPAGES*NBPG;
1581	proc0.p_addr->u_pcb.pcb_tss.tss_ss0 = GSEL(GDATA_SEL, SEL_KPL) ;
1582	gsel_tss = GSEL(GPROC0_SEL, SEL_KPL);
1583
1584	((struct i386tss *)gdt_segs[GPROC0_SEL].ssd_base)->tss_ioopt =
1585		(sizeof(tss))<<16;
1586
1587	ltr(gsel_tss);
1588
1589	/* make a call gate to reenter kernel with */
1590	gdp = &ldt[LSYS5CALLS_SEL].gd;
1591
1592	x = (int) &IDTVEC(syscall);
1593	gdp->gd_looffset = x++;
1594	gdp->gd_selector = GSEL(GCODE_SEL,SEL_KPL);
1595	gdp->gd_stkcpy = 1;
1596	gdp->gd_type = SDT_SYS386CGT;
1597	gdp->gd_dpl = SEL_UPL;
1598	gdp->gd_p = 1;
1599	gdp->gd_hioffset = ((int) &IDTVEC(syscall)) >>16;
1600
1601	/* transfer to user mode */
1602
1603	_ucodesel = LSEL(LUCODE_SEL, SEL_UPL);
1604	_udatasel = LSEL(LUDATA_SEL, SEL_UPL);
1605
1606	/* setup proc 0's pcb */
1607	bcopy(&sigcode, proc0.p_addr->u_pcb.pcb_sigc, szsigcode);
1608	proc0.p_addr->u_pcb.pcb_flags = 0;
1609	proc0.p_addr->u_pcb.pcb_ptd = IdlePTD;
1610}
1611
1612/*
1613 * The registers are in the frame; the frame is in the user area of
1614 * the process in question; when the process is active, the registers
1615 * are in "the kernel stack"; when it's not, they're still there, but
1616 * things get flipped around.  So, since p->p_md.md_regs is the whole address
1617 * of the register set, take its offset from the kernel stack, and
1618 * index into the user block.  Don't you just *love* virtual memory?
1619 * (I'm starting to think seymour is right...)
1620 */
1621#define	TF_REGP(p)	((struct trapframe *) \
1622			 ((char *)(p)->p_addr \
1623			  + ((char *)(p)->p_md.md_regs - kstack)))
1624
1625int
1626ptrace_set_pc(p, addr)
1627	struct proc *p;
1628	unsigned int addr;
1629{
1630	TF_REGP(p)->tf_eip = addr;
1631	return (0);
1632}
1633
1634int
1635ptrace_single_step(p)
1636	struct proc *p;
1637{
1638	TF_REGP(p)->tf_eflags |= PSL_T;
1639	return (0);
1640}
1641
1642int
1643ptrace_getregs(p, addr)
1644	struct proc *p;
1645	unsigned int *addr;
1646{
1647	int error;
1648	struct reg regs;
1649
1650	error = fill_regs(p, &regs);
1651	if (error)
1652		return (error);
1653	return (copyout(&regs, addr, sizeof regs));
1654}
1655
1656int
1657ptrace_setregs(p, addr)
1658	struct proc *p;
1659	unsigned int *addr;
1660{
1661	int error;
1662	struct reg regs;
1663
1664	error = copyin(addr, &regs, sizeof regs);
1665	if (error)
1666		return (error);
1667	return (set_regs(p, &regs));
1668}
1669
1670int ptrace_write_u(p, off, data)
1671	struct proc *p;
1672	vm_offset_t off;
1673	int data;
1674{
1675	struct trapframe frame_copy;
1676	vm_offset_t min;
1677	struct trapframe *tp;
1678
1679	/*
1680	 * Privileged kernel state is scattered all over the user area.
1681	 * Only allow write access to parts of regs and to fpregs.
1682	 */
1683	min = (char *)p->p_md.md_regs - kstack;
1684	if (off >= min && off <= min + sizeof(struct trapframe) - sizeof(int)) {
1685		tp = TF_REGP(p);
1686		frame_copy = *tp;
1687		*(int *)((char *)&frame_copy + (off - min)) = data;
1688		if (!EFLAGS_SECURE(frame_copy.tf_eflags, tp->tf_eflags) ||
1689		    !CS_SECURE(frame_copy.tf_cs))
1690			return (EINVAL);
1691		*(int*)((char *)p->p_addr + off) = data;
1692		return (0);
1693	}
1694	min = offsetof(struct user, u_pcb) + offsetof(struct pcb, pcb_savefpu);
1695	if (off >= min && off <= min + sizeof(struct save87) - sizeof(int)) {
1696		*(int*)((char *)p->p_addr + off) = data;
1697		return (0);
1698	}
1699	return (EFAULT);
1700}
1701
1702int
1703fill_regs(p, regs)
1704	struct proc *p;
1705	struct reg *regs;
1706{
1707	struct trapframe *tp;
1708
1709	tp = TF_REGP(p);
1710	regs->r_es = tp->tf_es;
1711	regs->r_ds = tp->tf_ds;
1712	regs->r_edi = tp->tf_edi;
1713	regs->r_esi = tp->tf_esi;
1714	regs->r_ebp = tp->tf_ebp;
1715	regs->r_ebx = tp->tf_ebx;
1716	regs->r_edx = tp->tf_edx;
1717	regs->r_ecx = tp->tf_ecx;
1718	regs->r_eax = tp->tf_eax;
1719	regs->r_eip = tp->tf_eip;
1720	regs->r_cs = tp->tf_cs;
1721	regs->r_eflags = tp->tf_eflags;
1722	regs->r_esp = tp->tf_esp;
1723	regs->r_ss = tp->tf_ss;
1724	return (0);
1725}
1726
1727int
1728set_regs(p, regs)
1729	struct proc *p;
1730	struct reg *regs;
1731{
1732	struct trapframe *tp;
1733
1734	tp = TF_REGP(p);
1735	if (!EFLAGS_SECURE(regs->r_eflags, tp->tf_eflags) ||
1736	    !CS_SECURE(regs->r_cs))
1737		return (EINVAL);
1738	tp->tf_es = regs->r_es;
1739	tp->tf_ds = regs->r_ds;
1740	tp->tf_edi = regs->r_edi;
1741	tp->tf_esi = regs->r_esi;
1742	tp->tf_ebp = regs->r_ebp;
1743	tp->tf_ebx = regs->r_ebx;
1744	tp->tf_edx = regs->r_edx;
1745	tp->tf_ecx = regs->r_ecx;
1746	tp->tf_eax = regs->r_eax;
1747	tp->tf_eip = regs->r_eip;
1748	tp->tf_cs = regs->r_cs;
1749	tp->tf_eflags = regs->r_eflags;
1750	tp->tf_esp = regs->r_esp;
1751	tp->tf_ss = regs->r_ss;
1752	return (0);
1753}
1754
1755#ifndef DDB
1756void
1757Debugger(const char *msg)
1758{
1759	printf("Debugger(\"%s\") called.\n", msg);
1760}
1761#endif /* no DDB */
1762
1763#include <sys/disklabel.h>
1764#define b_cylin	b_resid
1765/*
1766 * Determine the size of the transfer, and make sure it is
1767 * within the boundaries of the partition. Adjust transfer
1768 * if needed, and signal errors or early completion.
1769 */
1770int
1771bounds_check_with_label(struct buf *bp, struct disklabel *lp, int wlabel)
1772{
1773        struct partition *p = lp->d_partitions + dkpart(bp->b_dev);
1774        int labelsect = lp->d_partitions[0].p_offset;
1775        int maxsz = p->p_size,
1776                sz = (bp->b_bcount + DEV_BSIZE - 1) >> DEV_BSHIFT;
1777
1778        /* overwriting disk label ? */
1779        /* XXX should also protect bootstrap in first 8K */
1780        if (bp->b_blkno + p->p_offset <= LABELSECTOR + labelsect &&
1781#if LABELSECTOR != 0
1782            bp->b_blkno + p->p_offset + sz > LABELSECTOR + labelsect &&
1783#endif
1784            (bp->b_flags & B_READ) == 0 && wlabel == 0) {
1785                bp->b_error = EROFS;
1786                goto bad;
1787        }
1788
1789#if     defined(DOSBBSECTOR) && defined(notyet)
1790        /* overwriting master boot record? */
1791        if (bp->b_blkno + p->p_offset <= DOSBBSECTOR &&
1792            (bp->b_flags & B_READ) == 0 && wlabel == 0) {
1793                bp->b_error = EROFS;
1794                goto bad;
1795        }
1796#endif
1797
1798        /* beyond partition? */
1799        if (bp->b_blkno < 0 || bp->b_blkno + sz > maxsz) {
1800                /* if exactly at end of disk, return an EOF */
1801                if (bp->b_blkno == maxsz) {
1802                        bp->b_resid = bp->b_bcount;
1803                        return(0);
1804                }
1805                /* or truncate if part of it fits */
1806                sz = maxsz - bp->b_blkno;
1807                if (sz <= 0) {
1808                        bp->b_error = EINVAL;
1809                        goto bad;
1810                }
1811                bp->b_bcount = sz << DEV_BSHIFT;
1812        }
1813
1814        /* calculate cylinder for disksort to order transfers with */
1815        bp->b_pblkno = bp->b_blkno + p->p_offset;
1816        bp->b_cylin = bp->b_pblkno / lp->d_secpercyl;
1817        return(1);
1818
1819bad:
1820        bp->b_flags |= B_ERROR;
1821        return(-1);
1822}
1823
1824int
1825disk_externalize(int drive, void *userp, size_t *maxlen)
1826{
1827	if(*maxlen < sizeof drive) {
1828		return ENOMEM;
1829	}
1830
1831	*maxlen -= sizeof drive;
1832	return copyout(&drive, userp, sizeof drive);
1833}
1834