machdep.c revision 12623
1/*-
2 * Copyright (c) 1992 Terrence R. Lambert.
3 * Copyright (c) 1982, 1987, 1990 The Regents of the University of California.
4 * All rights reserved.
5 *
6 * This code is derived from software contributed to Berkeley by
7 * William Jolitz.
8 *
9 * Redistribution and use in source and binary forms, with or without
10 * modification, are permitted provided that the following conditions
11 * are met:
12 * 1. Redistributions of source code must retain the above copyright
13 *    notice, this list of conditions and the following disclaimer.
14 * 2. Redistributions in binary form must reproduce the above copyright
15 *    notice, this list of conditions and the following disclaimer in the
16 *    documentation and/or other materials provided with the distribution.
17 * 3. All advertising materials mentioning features or use of this software
18 *    must display the following acknowledgement:
19 *	This product includes software developed by the University of
20 *	California, Berkeley and its contributors.
21 * 4. Neither the name of the University nor the names of its contributors
22 *    may be used to endorse or promote products derived from this software
23 *    without specific prior written permission.
24 *
25 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
26 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
27 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
28 * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
29 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
30 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
31 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
32 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
33 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
34 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
35 * SUCH DAMAGE.
36 *
37 *	from: @(#)machdep.c	7.4 (Berkeley) 6/3/91
38 *	$Id: machdep.c,v 1.153 1995/11/29 19:57:01 wollman Exp $
39 */
40
41#include "npx.h"
42#include "isa.h"
43
44#include <sys/param.h>
45#include <sys/systm.h>
46#include <sys/sysproto.h>
47#include <sys/signalvar.h>
48#include <sys/kernel.h>
49#include <sys/proc.h>
50#include <sys/user.h>
51#include <sys/buf.h>
52#include <sys/reboot.h>
53#include <sys/conf.h>
54#include <sys/file.h>
55#include <sys/callout.h>
56#include <sys/malloc.h>
57#include <sys/mbuf.h>
58#include <sys/mount.h>
59#include <sys/msgbuf.h>
60#include <sys/ioctl.h>
61#include <sys/sysent.h>
62#include <sys/tty.h>
63#include <sys/sysctl.h>
64#include <sys/devconf.h>
65
66#ifdef SYSVSHM
67#include <sys/shm.h>
68#endif
69
70#ifdef SYSVMSG
71#include <sys/msg.h>
72#endif
73
74#ifdef SYSVSEM
75#include <sys/sem.h>
76#endif
77
78#include <vm/vm.h>
79#include <vm/vm_kern.h>
80#include <vm/vm_page.h>
81#include <vm/vm_pager.h>
82
83#include <sys/exec.h>
84#include <sys/vnode.h>
85
86#include <ddb/ddb.h>
87
88#include <net/netisr.h>
89
90/* XXX correctly declaring all the netisr's is painful. */
91#include <net/if.h>
92#include <net/route.h>
93
94#include <netinet/in.h>
95#include <netinet/in_systm.h>
96#include <netinet/ip.h>
97#include <netinet/if_ether.h>
98#include <netinet/ip_var.h>
99
100#include <netns/ns.h>
101#include <netns/ns_if.h>
102
103#include <netiso/iso.h>
104#include <netiso/iso_var.h>
105
106#include <netccitt/dll.h>
107#include <netccitt/x25.h>
108#include <netccitt/pk.h>
109#include <sys/socketvar.h>
110#include <netccitt/pk_var.h>
111
112#include "ether.h"
113
114#include <machine/cpu.h>
115#include <machine/npx.h>
116#include <machine/reg.h>
117#include <machine/psl.h>
118#include <machine/clock.h>
119#include <machine/specialreg.h>
120#include <machine/sysarch.h>
121#include <machine/cons.h>
122#include <machine/devconf.h>
123#include <machine/bootinfo.h>
124#include <machine/md_var.h>
125
126#include <i386/isa/isa.h>
127#include <i386/isa/isa_device.h>
128#include <i386/isa/rtc.h>
129#include <machine/random.h>
130
131extern void diediedie __P((void));
132extern void init386 __P((int first));
133extern int ptrace_set_pc __P((struct proc *p, unsigned int addr));
134extern int ptrace_single_step __P((struct proc *p));
135extern int ptrace_getregs __P((struct proc *p, unsigned int *addr));
136extern int ptrace_setregs __P((struct proc *p, unsigned int *addr));
137extern int ptrace_write_u __P((struct proc *p, vm_offset_t off, int data));
138
139static void cpu_startup __P((void *));
140SYSINIT(cpu, SI_SUB_CPU, SI_ORDER_FIRST, cpu_startup, NULL)
141
142static void identifycpu(void);
143
144char machine[] = "i386";
145SYSCTL_STRING(_hw, HW_MACHINE, machine, CTLFLAG_RD, machine, 0, "");
146
147char cpu_model[128];
148SYSCTL_STRING(_hw, HW_MODEL, model, CTLFLAG_RD, cpu_model, 0, "");
149
150struct kern_devconf kdc_cpu0 = {
151	0, 0, 0,		/* filled in by dev_attach */
152	"cpu", 0, { MDDT_CPU },
153	0, 0, 0, CPU_EXTERNALLEN,
154	0,			/* CPU has no parent */
155	0,			/* no parentdata */
156	DC_BUSY,		/* the CPU is always busy */
157	cpu_model,		/* no sense in duplication */
158	DC_CLS_CPU		/* class */
159};
160
161#ifndef PANIC_REBOOT_WAIT_TIME
162#define PANIC_REBOOT_WAIT_TIME 15 /* default to 15 seconds */
163#endif
164
165#ifdef BOUNCE_BUFFERS
166extern char *bouncememory;
167extern int maxbkva;
168#ifdef BOUNCEPAGES
169int	bouncepages = BOUNCEPAGES;
170#else
171int	bouncepages = 0;
172#endif
173#endif	/* BOUNCE_BUFFERS */
174
175extern int freebufspace;
176int	msgbufmapped = 0;		/* set when safe to use msgbuf */
177int _udatasel, _ucodesel;
178
179
180int physmem = 0;
181
182static int
183sysctl_hw_physmem SYSCTL_HANDLER_ARGS
184{
185	int error = sysctl_handle_int(oidp, 0, ctob(physmem), req);
186	return (error);
187}
188
189SYSCTL_PROC(_hw, HW_PHYSMEM, physmem, CTLTYPE_INT|CTLFLAG_RD,
190	0, 0, sysctl_hw_physmem, "I", "");
191
192static int
193sysctl_hw_usermem SYSCTL_HANDLER_ARGS
194{
195	int error = sysctl_handle_int(oidp, 0,
196		ctob(physmem - cnt.v_wire_count), req);
197	return (error);
198}
199
200SYSCTL_PROC(_hw, HW_USERMEM, usermem, CTLTYPE_INT|CTLFLAG_RD,
201	0, 0, sysctl_hw_usermem, "I", "");
202
203int boothowto = 0, bootverbose = 0, Maxmem = 0, badpages = 0;
204long dumplo;
205extern int bootdev;
206int biosmem;
207
208vm_offset_t phys_avail[10];
209
210/* must be 2 less so 0 0 can signal end of chunks */
211#define PHYS_AVAIL_ARRAY_END ((sizeof(phys_avail) / sizeof(vm_offset_t)) - 2)
212
213int cpu_class;
214
215void dumpsys __P((void));
216void setup_netisrs __P((struct linker_set *)); /* XXX declare elsewhere */
217
218vm_offset_t buffer_sva, buffer_eva;
219vm_offset_t clean_sva, clean_eva;
220vm_offset_t pager_sva, pager_eva;
221extern struct linker_set netisr_set;
222
223#define offsetof(type, member)	((size_t)(&((type *)0)->member))
224
225static void
226cpu_startup(dummy)
227	void *dummy;
228{
229	register unsigned i;
230	register caddr_t v;
231	vm_offset_t maxaddr;
232	vm_size_t size = 0;
233	int firstaddr;
234	vm_offset_t minaddr;
235
236	if (boothowto & RB_VERBOSE)
237		bootverbose++;
238
239	/*
240	 * Initialize error message buffer (at end of core).
241	 */
242
243	/* avail_end was pre-decremented in init_386() to compensate */
244	for (i = 0; i < btoc(sizeof (struct msgbuf)); i++)
245		pmap_enter(pmap_kernel(), (vm_offset_t)msgbufp,
246			   avail_end + i * NBPG,
247			   VM_PROT_ALL, TRUE);
248	msgbufmapped = 1;
249
250	/*
251	 * Good {morning,afternoon,evening,night}.
252	 */
253	printf(version);
254	startrtclock();
255	identifycpu();
256	printf("real memory  = %d (%dK bytes)\n", ptoa(Maxmem), ptoa(Maxmem) / 1024);
257	/*
258	 * Display any holes after the first chunk of extended memory.
259	 */
260	if (badpages != 0) {
261		int indx = 1;
262
263		/*
264		 * XXX skip reporting ISA hole & unmanaged kernel memory
265		 */
266		if (phys_avail[0] == PAGE_SIZE)
267			indx += 2;
268
269		printf("Physical memory hole(s):\n");
270		for (; phys_avail[indx + 1] != 0; indx += 2) {
271			int size = phys_avail[indx + 1] - phys_avail[indx];
272
273			printf("0x%08lx - 0x%08lx, %d bytes (%d pages)\n", phys_avail[indx],
274			    phys_avail[indx + 1] - 1, size, size / PAGE_SIZE);
275		}
276	}
277
278	/*
279	 * Quickly wire in netisrs.
280	 */
281	setup_netisrs(&netisr_set);
282
283/*
284#ifdef ISDN
285	DONET(isdnintr, NETISR_ISDN);
286#endif
287*/
288
289	/*
290	 * Allocate space for system data structures.
291	 * The first available kernel virtual address is in "v".
292	 * As pages of kernel virtual memory are allocated, "v" is incremented.
293	 * As pages of memory are allocated and cleared,
294	 * "firstaddr" is incremented.
295	 * An index into the kernel page table corresponding to the
296	 * virtual memory address maintained in "v" is kept in "mapaddr".
297	 */
298
299	/*
300	 * Make two passes.  The first pass calculates how much memory is
301	 * needed and allocates it.  The second pass assigns virtual
302	 * addresses to the various data structures.
303	 */
304	firstaddr = 0;
305again:
306	v = (caddr_t)firstaddr;
307
308#define	valloc(name, type, num) \
309	    (name) = (type *)v; v = (caddr_t)((name)+(num))
310#define	valloclim(name, type, num, lim) \
311	    (name) = (type *)v; v = (caddr_t)((lim) = ((name)+(num)))
312	valloc(callout, struct callout, ncallout);
313#ifdef SYSVSHM
314	valloc(shmsegs, struct shmid_ds, shminfo.shmmni);
315#endif
316#ifdef SYSVSEM
317	valloc(sema, struct semid_ds, seminfo.semmni);
318	valloc(sem, struct sem, seminfo.semmns);
319	/* This is pretty disgusting! */
320	valloc(semu, int, (seminfo.semmnu * seminfo.semusz) / sizeof(int));
321#endif
322#ifdef SYSVMSG
323	valloc(msgpool, char, msginfo.msgmax);
324	valloc(msgmaps, struct msgmap, msginfo.msgseg);
325	valloc(msghdrs, struct msg, msginfo.msgtql);
326	valloc(msqids, struct msqid_ds, msginfo.msgmni);
327#endif
328
329	if (nbuf == 0) {
330		nbuf = 30;
331		if( physmem > 1024)
332			nbuf += min((physmem - 1024) / 12, 1024);
333	}
334	nswbuf = min(nbuf, 128);
335
336	valloc(swbuf, struct buf, nswbuf);
337	valloc(buf, struct buf, nbuf);
338
339#ifdef BOUNCE_BUFFERS
340	/*
341	 * If there is more than 16MB of memory, allocate some bounce buffers
342	 */
343	if (Maxmem > 4096) {
344		if (bouncepages == 0) {
345			bouncepages = 64;
346			bouncepages += ((Maxmem - 4096) / 2048) * 32;
347		}
348		v = (caddr_t)((vm_offset_t)((vm_offset_t)v + PAGE_SIZE - 1) & ~(PAGE_SIZE - 1));
349		valloc(bouncememory, char, bouncepages * PAGE_SIZE);
350	}
351#endif
352
353	/*
354	 * End of first pass, size has been calculated so allocate memory
355	 */
356	if (firstaddr == 0) {
357		size = (vm_size_t)(v - firstaddr);
358		firstaddr = (int)kmem_alloc(kernel_map, round_page(size));
359		if (firstaddr == 0)
360			panic("startup: no room for tables");
361		goto again;
362	}
363
364	/*
365	 * End of second pass, addresses have been assigned
366	 */
367	if ((vm_size_t)(v - firstaddr) != size)
368		panic("startup: table size inconsistency");
369
370#ifdef BOUNCE_BUFFERS
371	clean_map = kmem_suballoc(kernel_map, &clean_sva, &clean_eva,
372			(nbuf*MAXBSIZE) + (nswbuf*MAXPHYS) +
373				maxbkva + pager_map_size, TRUE);
374	io_map = kmem_suballoc(clean_map, &minaddr, &maxaddr, maxbkva, FALSE);
375#else
376	clean_map = kmem_suballoc(kernel_map, &clean_sva, &clean_eva,
377			(nbuf*MAXBSIZE) + (nswbuf*MAXPHYS) + pager_map_size, TRUE);
378#endif
379	buffer_map = kmem_suballoc(clean_map, &buffer_sva, &buffer_eva,
380				(nbuf*MAXBSIZE), TRUE);
381	pager_map = kmem_suballoc(clean_map, &pager_sva, &pager_eva,
382				(nswbuf*MAXPHYS) + pager_map_size, TRUE);
383	exec_map = kmem_suballoc(kernel_map, &minaddr, &maxaddr,
384				(16*ARG_MAX), TRUE);
385	u_map = kmem_suballoc(kernel_map, &minaddr, &maxaddr,
386				(maxproc*UPAGES*PAGE_SIZE), FALSE);
387
388	/*
389	 * Finally, allocate mbuf pool.  Since mclrefcnt is an off-size
390	 * we use the more space efficient malloc in place of kmem_alloc.
391	 */
392	mclrefcnt = (char *)malloc(nmbclusters+CLBYTES/MCLBYTES,
393				   M_MBUF, M_NOWAIT);
394	bzero(mclrefcnt, nmbclusters+CLBYTES/MCLBYTES);
395	mb_map = kmem_suballoc(kmem_map, (vm_offset_t *)&mbutl, &maxaddr,
396			       nmbclusters * MCLBYTES, FALSE);
397	/*
398	 * Initialize callouts
399	 */
400	callfree = callout;
401	for (i = 1; i < ncallout; i++)
402		callout[i-1].c_next = &callout[i];
403
404        if (boothowto & RB_CONFIG) {
405		userconfig();
406		cninit();	/* the preferred console may have changed */
407	}
408
409#ifdef BOUNCE_BUFFERS
410	/*
411	 * init bounce buffers
412	 */
413	vm_bounce_init();
414#endif
415	/*
416	 * XXX allocate a contiguous area for ISA (non busmaster) DMA
417	 * operations. This _should_ only be done if the DMA channels
418	 * will actually be used, but for now we do it always.
419	 */
420#define DMAPAGES 8
421	isaphysmem =
422	    vm_page_alloc_contig(DMAPAGES * PAGE_SIZE, 0, 0xfffffful, 64*1024);
423
424	printf("avail memory = %d (%dK bytes)\n", ptoa(cnt.v_free_count),
425	    ptoa(cnt.v_free_count) / 1024);
426
427	/*
428	 * Set up buffers, so they can be used to read disk labels.
429	 */
430	bufinit();
431	vm_pager_bufferinit();
432
433	/*
434	 * In verbose mode, print out the BIOS's idea of the disk geometries.
435	 */
436	if (bootverbose) {
437		printf("BIOS Geometries:\n");
438		for (i = 0; i < N_BIOS_GEOM; i++) {
439			unsigned long bios_geom;
440			int max_cylinder, max_head, max_sector;
441
442			bios_geom = bootinfo.bi_bios_geom[i];
443
444			/*
445			 * XXX the bootstrap punts a 1200K floppy geometry
446			 * when the get-disk-geometry interrupt fails.  Skip
447			 * drives that have this geometry.
448			 */
449			if (bios_geom == 0x4f010f)
450				continue;
451
452			printf(" %x:%08lx ", i, bios_geom);
453			max_cylinder = bios_geom >> 16;
454			max_head = (bios_geom >> 8) & 0xff;
455			max_sector = bios_geom & 0xff;
456			printf(
457		"0..%d=%d cylinders, 0..%d=%d heads, 1..%d=%d sectors\n",
458			       max_cylinder, max_cylinder + 1,
459			       max_head, max_head + 1,
460			       max_sector, max_sector);
461		}
462		printf(" %d accounted for\n", bootinfo.bi_n_bios_used);
463	}
464}
465
466int
467register_netisr(num, handler)
468	int num;
469	netisr_t *handler;
470{
471
472	if (num < 0 || num >= (sizeof(netisrs)/sizeof(*netisrs)) ) {
473		printf("register_netisr: bad isr number: %d\n", num);
474		return (EINVAL);
475	}
476	netisrs[num] = handler;
477	return (0);
478}
479
480void
481setup_netisrs(ls)
482	struct linker_set *ls;
483{
484	int i;
485	const struct netisrtab *nit;
486
487	for(i = 0; ls->ls_items[i]; i++) {
488		nit = (const struct netisrtab *)ls->ls_items[i];
489		register_netisr(nit->nit_num, nit->nit_isr);
490	}
491}
492
493struct cpu_nameclass i386_cpus[] = {
494	{ "Intel 80286",	CPUCLASS_286 },		/* CPU_286   */
495	{ "i386SX",		CPUCLASS_386 },		/* CPU_386SX */
496	{ "i386DX",		CPUCLASS_386 },		/* CPU_386   */
497	{ "i486SX",		CPUCLASS_486 },		/* CPU_486SX */
498	{ "i486DX",		CPUCLASS_486 },		/* CPU_486   */
499	{ "Pentium",		CPUCLASS_586 },		/* CPU_586   */
500	{ "Cy486DLC",		CPUCLASS_486 },		/* CPU_486DLC */
501};
502
503static void
504identifycpu()
505{
506	printf("CPU: ");
507	if (cpu >= 0
508	    && cpu < (sizeof i386_cpus/sizeof(struct cpu_nameclass))) {
509		cpu_class = i386_cpus[cpu].cpu_class;
510		strncpy(cpu_model, i386_cpus[cpu].cpu_name, sizeof cpu_model);
511	} else {
512		printf("unknown cpu type %d\n", cpu);
513		panic("startup: bad cpu id");
514	}
515
516#if defined(I586_CPU)
517	if(cpu_class == CPUCLASS_586) {
518		calibrate_cyclecounter();
519	}
520#endif
521#if defined(I486_CPU) || defined(I586_CPU)
522	if (!strcmp(cpu_vendor,"GenuineIntel")) {
523		if ((cpu_id & 0xf00) > 3) {
524			cpu_model[0] = '\0';
525
526			switch (cpu_id & 0x3000) {
527			case 0x1000:
528				strcpy(cpu_model, "Overdrive ");
529				break;
530			case 0x2000:
531				strcpy(cpu_model, "Dual ");
532				break;
533			}
534			if ((cpu_id & 0xf00) == 0x400) {
535				strcat(cpu_model, "i486 ");
536#if defined(I586_CPU)
537			} else if ((cpu_id & 0xf00) == 0x500) {
538				strcat(cpu_model, "Pentium"); /* nb no space */
539#endif
540			} else {
541				strcat(cpu_model, "unknown ");
542			}
543
544			switch (cpu_id & 0xff0) {
545			case 0x400:
546				strcat(cpu_model, "DX"); break;
547			case 0x410:
548				strcat(cpu_model, "DX"); break;
549			case 0x420:
550				strcat(cpu_model, "SX"); break;
551			case 0x430:
552				strcat(cpu_model, "DX2"); break;
553			case 0x440:
554				strcat(cpu_model, "SL"); break;
555			case 0x450:
556				strcat(cpu_model, "SX2"); break;
557			case 0x470:
558				strcat(cpu_model, "DX2 Write-Back Enhanced");
559				break;
560			case 0x480:
561				strcat(cpu_model, "DX4"); break;
562#if defined(I586_CPU)
563			case 0x510:
564			case 0x520:
565				/*
566				 * We used to do all sorts of nonsense here
567				 * to print out iCOMP numbers.  Since these
568				 * are meaningless except to Intel
569				 * marketroids, there seems to be little
570				 * sense in doing so.
571				 */
572				break;
573#endif
574			}
575		}
576	}
577#endif
578	printf("%s (", cpu_model);
579	switch(cpu_class) {
580	case CPUCLASS_286:
581		printf("286");
582		break;
583#if defined(I386_CPU)
584	case CPUCLASS_386:
585		printf("386");
586		break;
587#endif
588#if defined(I486_CPU)
589	case CPUCLASS_486:
590		printf("486");
591		break;
592#endif
593#if defined(I586_CPU)
594	case CPUCLASS_586:
595		printf("%d.%02d-MHz ",
596		       ((100 * i586_ctr_rate) >> I586_CTR_RATE_SHIFT) / 100,
597		       ((100 * i586_ctr_rate) >> I586_CTR_RATE_SHIFT) % 100);
598		printf("586");
599		break;
600#endif
601	default:
602		printf("unknown");	/* will panic below... */
603	}
604	printf("-class CPU)\n");
605#if defined(I486_CPU) || defined(I586_CPU)
606	if(*cpu_vendor)
607		printf("  Origin = \"%s\"",cpu_vendor);
608	if(cpu_id)
609		printf("  Id = 0x%lx",cpu_id);
610
611	if (!strcmp(cpu_vendor, "GenuineIntel")) {
612		printf("  Stepping=%ld", cpu_id & 0xf);
613		if (cpu_high > 0) {
614#define FEATUREFMT "\020\001FPU\002VME\003PSE\004MCE\005CX8\006APIC"
615			printf("\n  Features=0x%b", cpu_feature, FEATUREFMT);
616		}
617	}
618	/* Avoid ugly blank lines: only print newline when we have to. */
619	if (*cpu_vendor || cpu_id)
620		printf("\n");
621#endif
622	/*
623	 * Now that we have told the user what they have,
624	 * let them know if that machine type isn't configured.
625	 */
626	switch (cpu_class) {
627	case CPUCLASS_286:	/* a 286 should not make it this far, anyway */
628#if !defined(I386_CPU) && !defined(I486_CPU) && !defined(I586_CPU)
629#error This kernel is not configured for one of the supported CPUs
630#endif
631#if !defined(I386_CPU)
632	case CPUCLASS_386:
633#endif
634#if !defined(I486_CPU)
635	case CPUCLASS_486:
636#endif
637#if !defined(I586_CPU)
638	case CPUCLASS_586:
639#endif
640		panic("CPU class not configured");
641	default:
642		break;
643	}
644	dev_attach(&kdc_cpu0);
645}
646
647/*
648 * Send an interrupt to process.
649 *
650 * Stack is set up to allow sigcode stored
651 * in u. to call routine, followed by kcall
652 * to sigreturn routine below.  After sigreturn
653 * resets the signal mask, the stack, and the
654 * frame pointer, it returns to the user
655 * specified pc, psl.
656 */
657void
658sendsig(catcher, sig, mask, code)
659	sig_t catcher;
660	int sig, mask;
661	unsigned code;
662{
663	register struct proc *p = curproc;
664	register int *regs;
665	register struct sigframe *fp;
666	struct sigframe sf;
667	struct sigacts *psp = p->p_sigacts;
668	int oonstack;
669
670	regs = p->p_md.md_regs;
671        oonstack = psp->ps_sigstk.ss_flags & SA_ONSTACK;
672	/*
673	 * Allocate and validate space for the signal handler
674	 * context. Note that if the stack is in P0 space, the
675	 * call to grow() is a nop, and the useracc() check
676	 * will fail if the process has not already allocated
677	 * the space with a `brk'.
678	 */
679        if ((psp->ps_flags & SAS_ALTSTACK) &&
680	    (psp->ps_sigstk.ss_flags & SA_ONSTACK) == 0 &&
681	    (psp->ps_sigonstack & sigmask(sig))) {
682		fp = (struct sigframe *)(psp->ps_sigstk.ss_sp +
683		    psp->ps_sigstk.ss_size - sizeof(struct sigframe));
684		psp->ps_sigstk.ss_flags |= SA_ONSTACK;
685	} else {
686		fp = (struct sigframe *)(regs[tESP]
687			- sizeof(struct sigframe));
688	}
689
690	/*
691	 * grow() will return FALSE if the fp will not fit inside the stack
692	 *	and the stack can not be grown. useracc will return FALSE
693	 *	if access is denied.
694	 */
695	if ((grow(p, (int)fp) == FALSE) ||
696	    (useracc((caddr_t)fp, sizeof (struct sigframe), B_WRITE) == FALSE)) {
697		/*
698		 * Process has trashed its stack; give it an illegal
699		 * instruction to halt it in its tracks.
700		 */
701		SIGACTION(p, SIGILL) = SIG_DFL;
702		sig = sigmask(SIGILL);
703		p->p_sigignore &= ~sig;
704		p->p_sigcatch &= ~sig;
705		p->p_sigmask &= ~sig;
706		psignal(p, SIGILL);
707		return;
708	}
709
710	/*
711	 * Build the argument list for the signal handler.
712	 */
713	if (p->p_sysent->sv_sigtbl) {
714		if (sig < p->p_sysent->sv_sigsize)
715			sig = p->p_sysent->sv_sigtbl[sig];
716		else
717			sig = p->p_sysent->sv_sigsize + 1;
718	}
719	sf.sf_signum = sig;
720	sf.sf_code = code;
721	sf.sf_scp = &fp->sf_sc;
722	sf.sf_addr = (char *) regs[tERR];
723	sf.sf_handler = catcher;
724
725	/* save scratch registers */
726	sf.sf_sc.sc_eax = regs[tEAX];
727	sf.sf_sc.sc_ebx = regs[tEBX];
728	sf.sf_sc.sc_ecx = regs[tECX];
729	sf.sf_sc.sc_edx = regs[tEDX];
730	sf.sf_sc.sc_esi = regs[tESI];
731	sf.sf_sc.sc_edi = regs[tEDI];
732	sf.sf_sc.sc_cs = regs[tCS];
733	sf.sf_sc.sc_ds = regs[tDS];
734	sf.sf_sc.sc_ss = regs[tSS];
735	sf.sf_sc.sc_es = regs[tES];
736	sf.sf_sc.sc_isp = regs[tISP];
737
738	/*
739	 * Build the signal context to be used by sigreturn.
740	 */
741	sf.sf_sc.sc_onstack = oonstack;
742	sf.sf_sc.sc_mask = mask;
743	sf.sf_sc.sc_sp = regs[tESP];
744	sf.sf_sc.sc_fp = regs[tEBP];
745	sf.sf_sc.sc_pc = regs[tEIP];
746	sf.sf_sc.sc_ps = regs[tEFLAGS];
747
748	/*
749	 * Copy the sigframe out to the user's stack.
750	 */
751	if (copyout(&sf, fp, sizeof(struct sigframe)) != 0) {
752		/*
753		 * Something is wrong with the stack pointer.
754		 * ...Kill the process.
755		 */
756		sigexit(p, SIGILL);
757	};
758
759	regs[tESP] = (int)fp;
760	regs[tEIP] = (int)((struct pcb *)kstack)->pcb_sigc;
761	regs[tEFLAGS] &= ~PSL_VM;
762	regs[tCS] = _ucodesel;
763	regs[tDS] = _udatasel;
764	regs[tES] = _udatasel;
765	regs[tSS] = _udatasel;
766}
767
768/*
769 * System call to cleanup state after a signal
770 * has been taken.  Reset signal mask and
771 * stack state from context left by sendsig (above).
772 * Return to previous pc and psl as specified by
773 * context left by sendsig. Check carefully to
774 * make sure that the user has not modified the
775 * state to gain improper privileges.
776 */
777int
778sigreturn(p, uap, retval)
779	struct proc *p;
780	struct sigreturn_args /* {
781		struct sigcontext *sigcntxp;
782	} */ *uap;
783	int *retval;
784{
785	register struct sigcontext *scp;
786	register struct sigframe *fp;
787	register int *regs = p->p_md.md_regs;
788	int eflags;
789
790	/*
791	 * (XXX old comment) regs[tESP] points to the return address.
792	 * The user scp pointer is above that.
793	 * The return address is faked in the signal trampoline code
794	 * for consistency.
795	 */
796	scp = uap->sigcntxp;
797	fp = (struct sigframe *)
798	     ((caddr_t)scp - offsetof(struct sigframe, sf_sc));
799
800	if (useracc((caddr_t)fp, sizeof (*fp), 0) == 0)
801		return(EINVAL);
802
803	/*
804	 * Don't allow users to change privileged or reserved flags.
805	 */
806#define	EFLAGS_SECURE(ef, oef)	((((ef) ^ (oef)) & ~PSL_USERCHANGE) == 0)
807	eflags = scp->sc_ps;
808	/*
809	 * XXX do allow users to change the privileged flag PSL_RF.  The
810	 * cpu sets PSL_RF in tf_eflags for faults.  Debuggers should
811	 * sometimes set it there too.  tf_eflags is kept in the signal
812	 * context during signal handling and there is no other place
813	 * to remember it, so the PSL_RF bit may be corrupted by the
814	 * signal handler without us knowing.  Corruption of the PSL_RF
815	 * bit at worst causes one more or one less debugger trap, so
816	 * allowing it is fairly harmless.
817	 */
818	if (!EFLAGS_SECURE(eflags & ~PSL_RF, regs[tEFLAGS] & ~PSL_RF)) {
819#ifdef DEBUG
820    		printf("sigreturn: eflags = 0x%x\n", eflags);
821#endif
822    		return(EINVAL);
823	}
824
825	/*
826	 * Don't allow users to load a valid privileged %cs.  Let the
827	 * hardware check for invalid selectors, excess privilege in
828	 * other selectors, invalid %eip's and invalid %esp's.
829	 */
830#define	CS_SECURE(cs)	(ISPL(cs) == SEL_UPL)
831	if (!CS_SECURE(scp->sc_cs)) {
832#ifdef DEBUG
833    		printf("sigreturn: cs = 0x%x\n", scp->sc_cs);
834#endif
835		trapsignal(p, SIGBUS, T_PROTFLT);
836		return(EINVAL);
837	}
838
839	/* restore scratch registers */
840	regs[tEAX] = scp->sc_eax;
841	regs[tEBX] = scp->sc_ebx;
842	regs[tECX] = scp->sc_ecx;
843	regs[tEDX] = scp->sc_edx;
844	regs[tESI] = scp->sc_esi;
845	regs[tEDI] = scp->sc_edi;
846	regs[tCS] = scp->sc_cs;
847	regs[tDS] = scp->sc_ds;
848	regs[tES] = scp->sc_es;
849	regs[tSS] = scp->sc_ss;
850	regs[tISP] = scp->sc_isp;
851
852	if (useracc((caddr_t)scp, sizeof (*scp), 0) == 0)
853		return(EINVAL);
854
855	if (scp->sc_onstack & 01)
856		p->p_sigacts->ps_sigstk.ss_flags |= SA_ONSTACK;
857	else
858		p->p_sigacts->ps_sigstk.ss_flags &= ~SA_ONSTACK;
859	p->p_sigmask = scp->sc_mask &~
860	    (sigmask(SIGKILL)|sigmask(SIGCONT)|sigmask(SIGSTOP));
861	regs[tEBP] = scp->sc_fp;
862	regs[tESP] = scp->sc_sp;
863	regs[tEIP] = scp->sc_pc;
864	regs[tEFLAGS] = eflags;
865	return(EJUSTRETURN);
866}
867
868/*
869 * a simple function to make the system panic (and dump a vmcore)
870 * in a predictable fashion
871 */
872void diediedie()
873{
874	panic("because you said to!");
875}
876
877int	waittime = -1;
878struct pcb dumppcb;
879
880__dead void
881boot(howto)
882	int howto;
883{
884	if (!cold && (howto & RB_NOSYNC) == 0 && waittime < 0) {
885		register struct buf *bp;
886		int iter, nbusy;
887
888		waittime = 0;
889		printf("\nsyncing disks... ");
890
891		sync(&proc0, NULL, NULL);
892
893		for (iter = 0; iter < 20; iter++) {
894			nbusy = 0;
895			for (bp = &buf[nbuf]; --bp >= buf; ) {
896				if ((bp->b_flags & (B_BUSY | B_INVAL)) == B_BUSY) {
897					nbusy++;
898				}
899			}
900			if (nbusy == 0)
901				break;
902			printf("%d ", nbusy);
903			DELAY(40000 * iter);
904		}
905		if (nbusy) {
906			/*
907			 * Failed to sync all blocks. Indicate this and don't
908			 * unmount filesystems (thus forcing an fsck on reboot).
909			 */
910			printf("giving up\n");
911#ifdef SHOW_BUSYBUFS
912			nbusy = 0;
913			for (bp = &buf[nbuf]; --bp >= buf; ) {
914				if ((bp->b_flags & (B_BUSY | B_INVAL)) == B_BUSY) {
915					nbusy++;
916					printf("%d: dev:%08x, flags:%08x, blkno:%d, lblkno:%d\n", nbusy, bp->b_dev, bp->b_flags, bp->b_blkno, bp->b_lblkno);
917				}
918			}
919			DELAY(5000000);	/* 5 seconds */
920#endif
921		} else {
922			printf("done\n");
923			/*
924			 * Unmount filesystems
925			 */
926			if (panicstr == 0)
927				vfs_unmountall();
928		}
929		DELAY(100000);			/* wait for console output to finish */
930		dev_shutdownall(FALSE);
931	}
932	splhigh();
933	if (howto & RB_HALT) {
934		printf("\n");
935		printf("The operating system has halted.\n");
936		printf("Please press any key to reboot.\n\n");
937		cngetc();
938	} else {
939		if (howto & RB_DUMP) {
940			if (!cold) {
941				savectx(&dumppcb, 0);
942				dumppcb.pcb_ptd = rcr3();
943				dumpsys();
944			}
945
946			if (PANIC_REBOOT_WAIT_TIME != 0) {
947				if (PANIC_REBOOT_WAIT_TIME != -1) {
948					int loop;
949					printf("Automatic reboot in %d seconds - press a key on the console to abort\n",
950						PANIC_REBOOT_WAIT_TIME);
951					for (loop = PANIC_REBOOT_WAIT_TIME * 10; loop > 0; --loop) {
952						DELAY(1000 * 100); /* 1/10th second */
953						if (cncheckc()) /* Did user type a key? */
954							break;
955					}
956					if (!loop)
957						goto die;
958				}
959			} else { /* zero time specified - reboot NOW */
960				goto die;
961			}
962			printf("--> Press a key on the console to reboot <--\n");
963			cngetc();
964		}
965	}
966die:
967	printf("Rebooting...\n");
968	DELAY(1000000);	/* wait 1 sec for printf's to complete and be read */
969	cpu_reset();
970	for(;;) ;
971	/* NOTREACHED */
972}
973
974unsigned long	dumpmag = 0x8fca0101UL;	/* magic number for savecore */
975int		dumpsize = 0;		/* also for savecore */
976
977int		dodump = 1;
978
979/*
980 * Doadump comes here after turning off memory management and
981 * getting on the dump stack, either when called above, or by
982 * the auto-restart code.
983 */
984void
985dumpsys()
986{
987
988	if (!dodump)
989		return;
990	if (dumpdev == NODEV)
991		return;
992	if ((minor(dumpdev)&07) != 1)
993		return;
994	dumpsize = Maxmem;
995	printf("\ndumping to dev %lx, offset %ld\n", dumpdev, dumplo);
996	printf("dump ");
997	switch ((*bdevsw[major(dumpdev)].d_dump)(dumpdev)) {
998
999	case ENXIO:
1000		printf("device bad\n");
1001		break;
1002
1003	case EFAULT:
1004		printf("device not ready\n");
1005		break;
1006
1007	case EINVAL:
1008		printf("area improper\n");
1009		break;
1010
1011	case EIO:
1012		printf("i/o error\n");
1013		break;
1014
1015	case EINTR:
1016		printf("aborted from console\n");
1017		break;
1018
1019	default:
1020		printf("succeeded\n");
1021		break;
1022	}
1023}
1024
1025/*
1026 * Clear registers on exec
1027 */
1028void
1029setregs(p, entry, stack)
1030	struct proc *p;
1031	u_long entry;
1032	u_long stack;
1033{
1034	int *regs = p->p_md.md_regs;
1035
1036	bzero(regs, sizeof(struct trapframe));
1037	regs[tEIP] = entry;
1038	regs[tESP] = stack;
1039	regs[tEFLAGS] = PSL_USER | (regs[tEFLAGS] & PSL_T);
1040	regs[tSS] = _udatasel;
1041	regs[tDS] = _udatasel;
1042	regs[tES] = _udatasel;
1043	regs[tCS] = _ucodesel;
1044
1045	p->p_addr->u_pcb.pcb_flags = 0;	/* no fp at all */
1046	load_cr0(rcr0() | CR0_TS);	/* start emulating */
1047#if	NNPX > 0
1048	npxinit(__INITIAL_NPXCW__);
1049#endif	/* NNPX > 0 */
1050}
1051
1052static int
1053sysctl_machdep_adjkerntz SYSCTL_HANDLER_ARGS
1054{
1055	int error;
1056	error = sysctl_handle_int(oidp, oidp->oid_arg1, oidp->oid_arg2,
1057		req);
1058	if (!error && req->newptr)
1059		resettodr();
1060	return (error);
1061}
1062
1063SYSCTL_PROC(_machdep, CPU_ADJKERNTZ, adjkerntz, CTLTYPE_INT|CTLFLAG_RW,
1064	&adjkerntz, 0, sysctl_machdep_adjkerntz, "I", "");
1065
1066static int
1067sysctl_machdep_consdev SYSCTL_HANDLER_ARGS
1068{
1069	dev_t consdev;
1070	consdev = (cn_tty == NULL ? NODEV : cn_tty->t_dev);
1071	return (sysctl_handle_opaque(oidp, &consdev, sizeof consdev, req));
1072}
1073
1074SYSCTL_PROC(_machdep, CPU_CONSDEV, consdev, CTLTYPE_OPAQUE|CTLFLAG_RD,
1075	0, 0, sysctl_machdep_consdev, "T,dev_t", "");
1076
1077SYSCTL_INT(_machdep, CPU_DISRTCSET, disable_rtc_set,
1078	CTLFLAG_RW, &disable_rtc_set, 0, "");
1079
1080SYSCTL_STRUCT(_machdep, CPU_BOOTINFO, bootinfo,
1081	CTLFLAG_RD, &bootinfo, bootinfo, "");
1082
1083/*
1084 * Initialize 386 and configure to run kernel
1085 */
1086
1087/*
1088 * Initialize segments & interrupt table
1089 */
1090
1091int currentldt;
1092int _default_ldt;
1093union descriptor gdt[NGDT];		/* global descriptor table */
1094struct gate_descriptor idt[NIDT];	/* interrupt descriptor table */
1095union descriptor ldt[NLDT];		/* local descriptor table */
1096
1097struct	i386tss	tss, panic_tss;
1098
1099extern  struct user *proc0paddr;
1100
1101/* software prototypes -- in more palatable form */
1102struct soft_segment_descriptor gdt_segs[] = {
1103/* GNULL_SEL	0 Null Descriptor */
1104{	0x0,			/* segment base address  */
1105	0x0,			/* length */
1106	0,			/* segment type */
1107	0,			/* segment descriptor priority level */
1108	0,			/* segment descriptor present */
1109	0, 0,
1110	0,			/* default 32 vs 16 bit size */
1111	0  			/* limit granularity (byte/page units)*/ },
1112/* GCODE_SEL	1 Code Descriptor for kernel */
1113{	0x0,			/* segment base address  */
1114	0xfffff,		/* length - all address space */
1115	SDT_MEMERA,		/* segment type */
1116	0,			/* segment descriptor priority level */
1117	1,			/* segment descriptor present */
1118	0, 0,
1119	1,			/* default 32 vs 16 bit size */
1120	1  			/* limit granularity (byte/page units)*/ },
1121/* GDATA_SEL	2 Data Descriptor for kernel */
1122{	0x0,			/* segment base address  */
1123	0xfffff,		/* length - all address space */
1124	SDT_MEMRWA,		/* segment type */
1125	0,			/* segment descriptor priority level */
1126	1,			/* segment descriptor present */
1127	0, 0,
1128	1,			/* default 32 vs 16 bit size */
1129	1  			/* limit granularity (byte/page units)*/ },
1130/* GLDT_SEL	3 LDT Descriptor */
1131{	(int) ldt,		/* segment base address  */
1132	sizeof(ldt)-1,		/* length - all address space */
1133	SDT_SYSLDT,		/* segment type */
1134	0,			/* segment descriptor priority level */
1135	1,			/* segment descriptor present */
1136	0, 0,
1137	0,			/* unused - default 32 vs 16 bit size */
1138	0  			/* limit granularity (byte/page units)*/ },
1139/* GTGATE_SEL	4 Null Descriptor - Placeholder */
1140{	0x0,			/* segment base address  */
1141	0x0,			/* length - all address space */
1142	0,			/* segment type */
1143	0,			/* segment descriptor priority level */
1144	0,			/* segment descriptor present */
1145	0, 0,
1146	0,			/* default 32 vs 16 bit size */
1147	0  			/* limit granularity (byte/page units)*/ },
1148/* GPANIC_SEL	5 Panic Tss Descriptor */
1149{	(int) &panic_tss,	/* segment base address  */
1150	sizeof(tss)-1,		/* length - all address space */
1151	SDT_SYS386TSS,		/* segment type */
1152	0,			/* segment descriptor priority level */
1153	1,			/* segment descriptor present */
1154	0, 0,
1155	0,			/* unused - default 32 vs 16 bit size */
1156	0  			/* limit granularity (byte/page units)*/ },
1157/* GPROC0_SEL	6 Proc 0 Tss Descriptor */
1158{	(int) kstack,		/* segment base address  */
1159	sizeof(tss)-1,		/* length - all address space */
1160	SDT_SYS386TSS,		/* segment type */
1161	0,			/* segment descriptor priority level */
1162	1,			/* segment descriptor present */
1163	0, 0,
1164	0,			/* unused - default 32 vs 16 bit size */
1165	0  			/* limit granularity (byte/page units)*/ },
1166/* GUSERLDT_SEL	7 User LDT Descriptor per process */
1167{	(int) ldt,		/* segment base address  */
1168	(512 * sizeof(union descriptor)-1),		/* length */
1169	SDT_SYSLDT,		/* segment type */
1170	0,			/* segment descriptor priority level */
1171	1,			/* segment descriptor present */
1172	0, 0,
1173	0,			/* unused - default 32 vs 16 bit size */
1174	0  			/* limit granularity (byte/page units)*/ },
1175/* GAPMCODE32_SEL 8 APM BIOS 32-bit interface (32bit Code) */
1176{	0,			/* segment base address (overwritten by APM)  */
1177	0xfffff,		/* length */
1178	SDT_MEMERA,		/* segment type */
1179	0,			/* segment descriptor priority level */
1180	1,			/* segment descriptor present */
1181	0, 0,
1182	1,			/* default 32 vs 16 bit size */
1183	1  			/* limit granularity (byte/page units)*/ },
1184/* GAPMCODE16_SEL 9 APM BIOS 32-bit interface (16bit Code) */
1185{	0,			/* segment base address (overwritten by APM)  */
1186	0xfffff,		/* length */
1187	SDT_MEMERA,		/* segment type */
1188	0,			/* segment descriptor priority level */
1189	1,			/* segment descriptor present */
1190	0, 0,
1191	0,			/* default 32 vs 16 bit size */
1192	1  			/* limit granularity (byte/page units)*/ },
1193/* GAPMDATA_SEL	10 APM BIOS 32-bit interface (Data) */
1194{	0,			/* segment base address (overwritten by APM) */
1195	0xfffff,		/* length */
1196	SDT_MEMRWA,		/* segment type */
1197	0,			/* segment descriptor priority level */
1198	1,			/* segment descriptor present */
1199	0, 0,
1200	1,			/* default 32 vs 16 bit size */
1201	1  			/* limit granularity (byte/page units)*/ },
1202};
1203
1204struct soft_segment_descriptor ldt_segs[] = {
1205	/* Null Descriptor - overwritten by call gate */
1206{	0x0,			/* segment base address  */
1207	0x0,			/* length - all address space */
1208	0,			/* segment type */
1209	0,			/* segment descriptor priority level */
1210	0,			/* segment descriptor present */
1211	0, 0,
1212	0,			/* default 32 vs 16 bit size */
1213	0  			/* limit granularity (byte/page units)*/ },
1214	/* Null Descriptor - overwritten by call gate */
1215{	0x0,			/* segment base address  */
1216	0x0,			/* length - all address space */
1217	0,			/* segment type */
1218	0,			/* segment descriptor priority level */
1219	0,			/* segment descriptor present */
1220	0, 0,
1221	0,			/* default 32 vs 16 bit size */
1222	0  			/* limit granularity (byte/page units)*/ },
1223	/* Null Descriptor - overwritten by call gate */
1224{	0x0,			/* segment base address  */
1225	0x0,			/* length - all address space */
1226	0,			/* segment type */
1227	0,			/* segment descriptor priority level */
1228	0,			/* segment descriptor present */
1229	0, 0,
1230	0,			/* default 32 vs 16 bit size */
1231	0  			/* limit granularity (byte/page units)*/ },
1232	/* Code Descriptor for user */
1233{	0x0,			/* segment base address  */
1234	0xfffff,		/* length - all address space */
1235	SDT_MEMERA,		/* segment type */
1236	SEL_UPL,		/* segment descriptor priority level */
1237	1,			/* segment descriptor present */
1238	0, 0,
1239	1,			/* default 32 vs 16 bit size */
1240	1  			/* limit granularity (byte/page units)*/ },
1241	/* Data Descriptor for user */
1242{	0x0,			/* segment base address  */
1243	0xfffff,		/* length - all address space */
1244	SDT_MEMRWA,		/* segment type */
1245	SEL_UPL,		/* segment descriptor priority level */
1246	1,			/* segment descriptor present */
1247	0, 0,
1248	1,			/* default 32 vs 16 bit size */
1249	1  			/* limit granularity (byte/page units)*/ },
1250};
1251
1252void
1253setidt(idx, func, typ, dpl)
1254	int idx;
1255	inthand_t *func;
1256	int typ;
1257	int dpl;
1258{
1259	struct gate_descriptor *ip = idt + idx;
1260
1261	ip->gd_looffset = (int)func;
1262	ip->gd_selector = 8;
1263	ip->gd_stkcpy = 0;
1264	ip->gd_xx = 0;
1265	ip->gd_type = typ;
1266	ip->gd_dpl = dpl;
1267	ip->gd_p = 1;
1268	ip->gd_hioffset = ((int)func)>>16 ;
1269}
1270
1271#define	IDTVEC(name)	__CONCAT(X,name)
1272
1273extern inthand_t
1274	IDTVEC(div), IDTVEC(dbg), IDTVEC(nmi), IDTVEC(bpt), IDTVEC(ofl),
1275	IDTVEC(bnd), IDTVEC(ill), IDTVEC(dna), IDTVEC(dble), IDTVEC(fpusegm),
1276	IDTVEC(tss), IDTVEC(missing), IDTVEC(stk), IDTVEC(prot),
1277	IDTVEC(page), IDTVEC(rsvd), IDTVEC(fpu), IDTVEC(align),
1278	IDTVEC(syscall);
1279
1280#ifdef COMPAT_LINUX
1281extern inthand_t
1282	IDTVEC(linux_syscall);
1283#endif
1284
1285void
1286sdtossd(sd, ssd)
1287	struct segment_descriptor *sd;
1288	struct soft_segment_descriptor *ssd;
1289{
1290	ssd->ssd_base  = (sd->sd_hibase << 24) | sd->sd_lobase;
1291	ssd->ssd_limit = (sd->sd_hilimit << 16) | sd->sd_lolimit;
1292	ssd->ssd_type  = sd->sd_type;
1293	ssd->ssd_dpl   = sd->sd_dpl;
1294	ssd->ssd_p     = sd->sd_p;
1295	ssd->ssd_def32 = sd->sd_def32;
1296	ssd->ssd_gran  = sd->sd_gran;
1297}
1298
1299void
1300init386(first)
1301	int first;
1302{
1303	int x;
1304	unsigned biosbasemem, biosextmem;
1305	struct gate_descriptor *gdp;
1306	int gsel_tss;
1307	/* table descriptors - used to load tables by microp */
1308	struct region_descriptor r_gdt, r_idt;
1309	int	pagesinbase, pagesinext;
1310	int	target_page, pa_indx;
1311
1312	proc0.p_addr = proc0paddr;
1313
1314	/*
1315	 * Initialize the console before we print anything out.
1316	 */
1317	cninit();
1318
1319	/*
1320	 * make gdt memory segments, the code segment goes up to end of the
1321	 * page with etext in it, the data segment goes to the end of
1322	 * the address space
1323	 */
1324	/*
1325	 * XXX text protection is temporarily (?) disabled.  The limit was
1326	 * i386_btop(i386_round_page(etext)) - 1.
1327	 */
1328	gdt_segs[GCODE_SEL].ssd_limit = i386_btop(0) - 1;
1329	gdt_segs[GDATA_SEL].ssd_limit = i386_btop(0) - 1;
1330	for (x = 0; x < NGDT; x++)
1331		ssdtosd(&gdt_segs[x], &gdt[x].sd);
1332
1333	/* make ldt memory segments */
1334	/*
1335	 * The data segment limit must not cover the user area because we
1336	 * don't want the user area to be writable in copyout() etc. (page
1337	 * level protection is lost in kernel mode on 386's).  Also, we
1338	 * don't want the user area to be writable directly (page level
1339	 * protection of the user area is not available on 486's with
1340	 * CR0_WP set, because there is no user-read/kernel-write mode).
1341	 *
1342	 * XXX - VM_MAXUSER_ADDRESS is an end address, not a max.  And it
1343	 * should be spelled ...MAX_USER...
1344	 */
1345#define VM_END_USER_RW_ADDRESS	VM_MAXUSER_ADDRESS
1346	/*
1347	 * The code segment limit has to cover the user area until we move
1348	 * the signal trampoline out of the user area.  This is safe because
1349	 * the code segment cannot be written to directly.
1350	 */
1351#define VM_END_USER_R_ADDRESS	(VM_END_USER_RW_ADDRESS + UPAGES * NBPG)
1352	ldt_segs[LUCODE_SEL].ssd_limit = i386_btop(VM_END_USER_R_ADDRESS) - 1;
1353	ldt_segs[LUDATA_SEL].ssd_limit = i386_btop(VM_END_USER_RW_ADDRESS) - 1;
1354	/* Note. eventually want private ldts per process */
1355	for (x = 0; x < NLDT; x++)
1356		ssdtosd(&ldt_segs[x], &ldt[x].sd);
1357
1358	/* exceptions */
1359	for (x = 0; x < NIDT; x++)
1360		setidt(x, &IDTVEC(rsvd), SDT_SYS386TGT, SEL_KPL);
1361	setidt(0, &IDTVEC(div),  SDT_SYS386TGT, SEL_KPL);
1362	setidt(1, &IDTVEC(dbg),  SDT_SYS386TGT, SEL_KPL);
1363	setidt(2, &IDTVEC(nmi),  SDT_SYS386TGT, SEL_KPL);
1364 	setidt(3, &IDTVEC(bpt),  SDT_SYS386TGT, SEL_UPL);
1365	setidt(4, &IDTVEC(ofl),  SDT_SYS386TGT, SEL_UPL);
1366	setidt(5, &IDTVEC(bnd),  SDT_SYS386TGT, SEL_KPL);
1367	setidt(6, &IDTVEC(ill),  SDT_SYS386TGT, SEL_KPL);
1368	setidt(7, &IDTVEC(dna),  SDT_SYS386TGT, SEL_KPL);
1369	setidt(8, &IDTVEC(dble),  SDT_SYS386TGT, SEL_KPL);
1370	setidt(9, &IDTVEC(fpusegm),  SDT_SYS386TGT, SEL_KPL);
1371	setidt(10, &IDTVEC(tss),  SDT_SYS386TGT, SEL_KPL);
1372	setidt(11, &IDTVEC(missing),  SDT_SYS386TGT, SEL_KPL);
1373	setidt(12, &IDTVEC(stk),  SDT_SYS386TGT, SEL_KPL);
1374	setidt(13, &IDTVEC(prot),  SDT_SYS386TGT, SEL_KPL);
1375	setidt(14, &IDTVEC(page),  SDT_SYS386TGT, SEL_KPL);
1376	setidt(15, &IDTVEC(rsvd),  SDT_SYS386TGT, SEL_KPL);
1377	setidt(16, &IDTVEC(fpu),  SDT_SYS386TGT, SEL_KPL);
1378	setidt(17, &IDTVEC(align), SDT_SYS386TGT, SEL_KPL);
1379#ifdef COMPAT_LINUX
1380 	setidt(0x80, &IDTVEC(linux_syscall),  SDT_SYS386TGT, SEL_UPL);
1381#endif
1382
1383#include	"isa.h"
1384#if	NISA >0
1385	isa_defaultirq();
1386#endif
1387	rand_initialize();
1388
1389	r_gdt.rd_limit = sizeof(gdt) - 1;
1390	r_gdt.rd_base =  (int) gdt;
1391	lgdt(&r_gdt);
1392
1393	r_idt.rd_limit = sizeof(idt) - 1;
1394	r_idt.rd_base = (int) idt;
1395	lidt(&r_idt);
1396
1397	_default_ldt = GSEL(GLDT_SEL, SEL_KPL);
1398	lldt(_default_ldt);
1399	currentldt = _default_ldt;
1400
1401#ifdef DDB
1402	kdb_init();
1403	if (boothowto & RB_KDB)
1404		Debugger("Boot flags requested debugger");
1405#endif
1406
1407	/* Use BIOS values stored in RTC CMOS RAM, since probing
1408	 * breaks certain 386 AT relics.
1409	 */
1410	biosbasemem = rtcin(RTC_BASELO)+ (rtcin(RTC_BASEHI)<<8);
1411	biosextmem = rtcin(RTC_EXTLO)+ (rtcin(RTC_EXTHI)<<8);
1412
1413	/*
1414	 * Print a warning if the official BIOS interface disagrees
1415	 * with the hackish interface used above.  Eventually only
1416	 * the official interface should be used.
1417	 */
1418	if (bootinfo.bi_memsizes_valid) {
1419		if (bootinfo.bi_basemem != biosbasemem)
1420			printf("BIOS basemem (%ldK) != RTC basemem (%dK)\n",
1421			       bootinfo.bi_basemem, biosbasemem);
1422		if (bootinfo.bi_extmem != biosextmem)
1423			printf("BIOS extmem (%ldK) != RTC extmem (%dK)\n",
1424			       bootinfo.bi_extmem, biosextmem);
1425	}
1426
1427	/*
1428	 * If BIOS tells us that it has more than 640k in the basemem,
1429	 *	don't believe it - set it to 640k.
1430	 */
1431	if (biosbasemem > 640)
1432		biosbasemem = 640;
1433
1434	/*
1435	 * Some 386 machines might give us a bogus number for extended
1436	 *	mem. If this happens, stop now.
1437	 */
1438#ifndef LARGEMEM
1439	if (biosextmem > 65536) {
1440		panic("extended memory beyond limit of 64MB");
1441		/* NOTREACHED */
1442	}
1443#endif
1444
1445	pagesinbase = biosbasemem * 1024 / NBPG;
1446	pagesinext = biosextmem * 1024 / NBPG;
1447
1448	/*
1449	 * Special hack for chipsets that still remap the 384k hole when
1450	 *	there's 16MB of memory - this really confuses people that
1451	 *	are trying to use bus mastering ISA controllers with the
1452	 *	"16MB limit"; they only have 16MB, but the remapping puts
1453	 *	them beyond the limit.
1454	 */
1455	/*
1456	 * If extended memory is between 15-16MB (16-17MB phys address range),
1457	 *	chop it to 15MB.
1458	 */
1459	if ((pagesinext > 3840) && (pagesinext < 4096))
1460		pagesinext = 3840;
1461
1462	/*
1463	 * Maxmem isn't the "maximum memory", it's one larger than the
1464	 * highest page of of the physical address space. It
1465	 */
1466	Maxmem = pagesinext + 0x100000/PAGE_SIZE;
1467
1468#ifdef MAXMEM
1469	Maxmem = MAXMEM/4;
1470#endif
1471
1472	/* call pmap initialization to make new kernel address space */
1473	pmap_bootstrap (first, 0);
1474
1475	/*
1476	 * Size up each available chunk of physical memory.
1477	 */
1478
1479	/*
1480	 * We currently don't bother testing base memory.
1481	 * XXX  ...but we probably should.
1482	 */
1483	pa_indx = 0;
1484	badpages = 0;
1485	if (pagesinbase > 1) {
1486		phys_avail[pa_indx++] = PAGE_SIZE;	/* skip first page of memory */
1487		phys_avail[pa_indx] = ptoa(pagesinbase);/* memory up to the ISA hole */
1488		physmem = pagesinbase - 1;
1489	} else {
1490		/* point at first chunk end */
1491		pa_indx++;
1492	}
1493
1494	for (target_page = avail_start; target_page < ptoa(Maxmem); target_page += PAGE_SIZE) {
1495		int tmp, page_bad = FALSE;
1496
1497		/*
1498		 * map page into kernel: valid, read/write, non-cacheable
1499		 */
1500		*(int *)CMAP1 = PG_V | PG_KW | PG_N | target_page;
1501		pmap_update();
1502
1503		tmp = *(int *)CADDR1;
1504		/*
1505		 * Test for alternating 1's and 0's
1506		 */
1507		*(int *)CADDR1 = 0xaaaaaaaa;
1508		if (*(int *)CADDR1 != 0xaaaaaaaa) {
1509			page_bad = TRUE;
1510		}
1511		/*
1512		 * Test for alternating 0's and 1's
1513		 */
1514		*(int *)CADDR1 = 0x55555555;
1515		if (*(int *)CADDR1 != 0x55555555) {
1516			page_bad = TRUE;
1517		}
1518		/*
1519		 * Test for all 1's
1520		 */
1521		*(int *)CADDR1 = 0xffffffff;
1522		if (*(int *)CADDR1 != 0xffffffff) {
1523			page_bad = TRUE;
1524		}
1525		/*
1526		 * Test for all 0's
1527		 */
1528		*(int *)CADDR1 = 0x0;
1529		if (*(int *)CADDR1 != 0x0) {
1530			/*
1531			 * test of page failed
1532			 */
1533			page_bad = TRUE;
1534		}
1535		/*
1536		 * Restore original value.
1537		 */
1538		*(int *)CADDR1 = tmp;
1539
1540		/*
1541		 * Adjust array of valid/good pages.
1542		 */
1543		if (page_bad == FALSE) {
1544			/*
1545			 * If this good page is a continuation of the
1546			 * previous set of good pages, then just increase
1547			 * the end pointer. Otherwise start a new chunk.
1548			 * Note that "end" points one higher than end,
1549			 * making the range >= start and < end.
1550			 */
1551			if (phys_avail[pa_indx] == target_page) {
1552				phys_avail[pa_indx] += PAGE_SIZE;
1553			} else {
1554				pa_indx++;
1555				if (pa_indx == PHYS_AVAIL_ARRAY_END) {
1556					printf("Too many holes in the physical address space, giving up\n");
1557					pa_indx--;
1558					break;
1559				}
1560				phys_avail[pa_indx++] = target_page;	/* start */
1561				phys_avail[pa_indx] = target_page + PAGE_SIZE;	/* end */
1562			}
1563			physmem++;
1564		} else {
1565			badpages++;
1566			page_bad = FALSE;
1567		}
1568	}
1569
1570	*(int *)CMAP1 = 0;
1571	pmap_update();
1572
1573	/*
1574	 * XXX
1575	 * The last chunk must contain at least one page plus the message
1576	 * buffer to avoid complicating other code (message buffer address
1577	 * calculation, etc.).
1578	 */
1579	while (phys_avail[pa_indx - 1] + PAGE_SIZE +
1580	    round_page(sizeof(struct msgbuf)) >= phys_avail[pa_indx]) {
1581		physmem -= atop(phys_avail[pa_indx] - phys_avail[pa_indx - 1]);
1582		phys_avail[pa_indx--] = 0;
1583		phys_avail[pa_indx--] = 0;
1584	}
1585
1586	Maxmem = atop(phys_avail[pa_indx]);
1587
1588	/* Trim off space for the message buffer. */
1589	phys_avail[pa_indx] -= round_page(sizeof(struct msgbuf));
1590
1591	avail_end = phys_avail[pa_indx];
1592
1593	/* now running on new page tables, configured,and u/iom is accessible */
1594
1595	/* make a initial tss so microp can get interrupt stack on syscall! */
1596	proc0.p_addr->u_pcb.pcb_tss.tss_esp0 = (int) kstack + UPAGES*NBPG;
1597	proc0.p_addr->u_pcb.pcb_tss.tss_ss0 = GSEL(GDATA_SEL, SEL_KPL) ;
1598	gsel_tss = GSEL(GPROC0_SEL, SEL_KPL);
1599
1600	((struct i386tss *)gdt_segs[GPROC0_SEL].ssd_base)->tss_ioopt =
1601		(sizeof(tss))<<16;
1602
1603	ltr(gsel_tss);
1604
1605	/* make a call gate to reenter kernel with */
1606	gdp = &ldt[LSYS5CALLS_SEL].gd;
1607
1608	x = (int) &IDTVEC(syscall);
1609	gdp->gd_looffset = x++;
1610	gdp->gd_selector = GSEL(GCODE_SEL,SEL_KPL);
1611	gdp->gd_stkcpy = 1;
1612	gdp->gd_type = SDT_SYS386CGT;
1613	gdp->gd_dpl = SEL_UPL;
1614	gdp->gd_p = 1;
1615	gdp->gd_hioffset = ((int) &IDTVEC(syscall)) >>16;
1616
1617	/* transfer to user mode */
1618
1619	_ucodesel = LSEL(LUCODE_SEL, SEL_UPL);
1620	_udatasel = LSEL(LUDATA_SEL, SEL_UPL);
1621
1622	/* setup proc 0's pcb */
1623	bcopy(&sigcode, proc0.p_addr->u_pcb.pcb_sigc, szsigcode);
1624	proc0.p_addr->u_pcb.pcb_flags = 0;
1625	proc0.p_addr->u_pcb.pcb_ptd = IdlePTD;
1626}
1627
1628/*
1629 * The registers are in the frame; the frame is in the user area of
1630 * the process in question; when the process is active, the registers
1631 * are in "the kernel stack"; when it's not, they're still there, but
1632 * things get flipped around.  So, since p->p_md.md_regs is the whole address
1633 * of the register set, take its offset from the kernel stack, and
1634 * index into the user block.  Don't you just *love* virtual memory?
1635 * (I'm starting to think seymour is right...)
1636 */
1637#define	TF_REGP(p)	((struct trapframe *) \
1638			 ((char *)(p)->p_addr \
1639			  + ((char *)(p)->p_md.md_regs - kstack)))
1640
1641int
1642ptrace_set_pc(p, addr)
1643	struct proc *p;
1644	unsigned int addr;
1645{
1646	TF_REGP(p)->tf_eip = addr;
1647	return (0);
1648}
1649
1650int
1651ptrace_single_step(p)
1652	struct proc *p;
1653{
1654	TF_REGP(p)->tf_eflags |= PSL_T;
1655	return (0);
1656}
1657
1658int
1659ptrace_getregs(p, addr)
1660	struct proc *p;
1661	unsigned int *addr;
1662{
1663	int error;
1664	struct reg regs;
1665
1666	error = fill_regs(p, &regs);
1667	if (error)
1668		return (error);
1669	return (copyout(&regs, addr, sizeof regs));
1670}
1671
1672int
1673ptrace_setregs(p, addr)
1674	struct proc *p;
1675	unsigned int *addr;
1676{
1677	int error;
1678	struct reg regs;
1679
1680	error = copyin(addr, &regs, sizeof regs);
1681	if (error)
1682		return (error);
1683	return (set_regs(p, &regs));
1684}
1685
1686int ptrace_write_u(p, off, data)
1687	struct proc *p;
1688	vm_offset_t off;
1689	int data;
1690{
1691	struct trapframe frame_copy;
1692	vm_offset_t min;
1693	struct trapframe *tp;
1694
1695	/*
1696	 * Privileged kernel state is scattered all over the user area.
1697	 * Only allow write access to parts of regs and to fpregs.
1698	 */
1699	min = (char *)p->p_md.md_regs - kstack;
1700	if (off >= min && off <= min + sizeof(struct trapframe) - sizeof(int)) {
1701		tp = TF_REGP(p);
1702		frame_copy = *tp;
1703		*(int *)((char *)&frame_copy + (off - min)) = data;
1704		if (!EFLAGS_SECURE(frame_copy.tf_eflags, tp->tf_eflags) ||
1705		    !CS_SECURE(frame_copy.tf_cs))
1706			return (EINVAL);
1707		*(int*)((char *)p->p_addr + off) = data;
1708		return (0);
1709	}
1710	min = offsetof(struct user, u_pcb) + offsetof(struct pcb, pcb_savefpu);
1711	if (off >= min && off <= min + sizeof(struct save87) - sizeof(int)) {
1712		*(int*)((char *)p->p_addr + off) = data;
1713		return (0);
1714	}
1715	return (EFAULT);
1716}
1717
1718int
1719fill_regs(p, regs)
1720	struct proc *p;
1721	struct reg *regs;
1722{
1723	struct trapframe *tp;
1724
1725	tp = TF_REGP(p);
1726	regs->r_es = tp->tf_es;
1727	regs->r_ds = tp->tf_ds;
1728	regs->r_edi = tp->tf_edi;
1729	regs->r_esi = tp->tf_esi;
1730	regs->r_ebp = tp->tf_ebp;
1731	regs->r_ebx = tp->tf_ebx;
1732	regs->r_edx = tp->tf_edx;
1733	regs->r_ecx = tp->tf_ecx;
1734	regs->r_eax = tp->tf_eax;
1735	regs->r_eip = tp->tf_eip;
1736	regs->r_cs = tp->tf_cs;
1737	regs->r_eflags = tp->tf_eflags;
1738	regs->r_esp = tp->tf_esp;
1739	regs->r_ss = tp->tf_ss;
1740	return (0);
1741}
1742
1743int
1744set_regs(p, regs)
1745	struct proc *p;
1746	struct reg *regs;
1747{
1748	struct trapframe *tp;
1749
1750	tp = TF_REGP(p);
1751	if (!EFLAGS_SECURE(regs->r_eflags, tp->tf_eflags) ||
1752	    !CS_SECURE(regs->r_cs))
1753		return (EINVAL);
1754	tp->tf_es = regs->r_es;
1755	tp->tf_ds = regs->r_ds;
1756	tp->tf_edi = regs->r_edi;
1757	tp->tf_esi = regs->r_esi;
1758	tp->tf_ebp = regs->r_ebp;
1759	tp->tf_ebx = regs->r_ebx;
1760	tp->tf_edx = regs->r_edx;
1761	tp->tf_ecx = regs->r_ecx;
1762	tp->tf_eax = regs->r_eax;
1763	tp->tf_eip = regs->r_eip;
1764	tp->tf_cs = regs->r_cs;
1765	tp->tf_eflags = regs->r_eflags;
1766	tp->tf_esp = regs->r_esp;
1767	tp->tf_ss = regs->r_ss;
1768	return (0);
1769}
1770
1771#ifndef DDB
1772void
1773Debugger(const char *msg)
1774{
1775	printf("Debugger(\"%s\") called.\n", msg);
1776}
1777#endif /* no DDB */
1778
1779#include <sys/disklabel.h>
1780#define b_cylin	b_resid
1781/*
1782 * Determine the size of the transfer, and make sure it is
1783 * within the boundaries of the partition. Adjust transfer
1784 * if needed, and signal errors or early completion.
1785 */
1786int
1787bounds_check_with_label(struct buf *bp, struct disklabel *lp, int wlabel)
1788{
1789        struct partition *p = lp->d_partitions + dkpart(bp->b_dev);
1790        int labelsect = lp->d_partitions[0].p_offset;
1791        int maxsz = p->p_size,
1792                sz = (bp->b_bcount + DEV_BSIZE - 1) >> DEV_BSHIFT;
1793
1794        /* overwriting disk label ? */
1795        /* XXX should also protect bootstrap in first 8K */
1796        if (bp->b_blkno + p->p_offset <= LABELSECTOR + labelsect &&
1797#if LABELSECTOR != 0
1798            bp->b_blkno + p->p_offset + sz > LABELSECTOR + labelsect &&
1799#endif
1800            (bp->b_flags & B_READ) == 0 && wlabel == 0) {
1801                bp->b_error = EROFS;
1802                goto bad;
1803        }
1804
1805#if     defined(DOSBBSECTOR) && defined(notyet)
1806        /* overwriting master boot record? */
1807        if (bp->b_blkno + p->p_offset <= DOSBBSECTOR &&
1808            (bp->b_flags & B_READ) == 0 && wlabel == 0) {
1809                bp->b_error = EROFS;
1810                goto bad;
1811        }
1812#endif
1813
1814        /* beyond partition? */
1815        if (bp->b_blkno < 0 || bp->b_blkno + sz > maxsz) {
1816                /* if exactly at end of disk, return an EOF */
1817                if (bp->b_blkno == maxsz) {
1818                        bp->b_resid = bp->b_bcount;
1819                        return(0);
1820                }
1821                /* or truncate if part of it fits */
1822                sz = maxsz - bp->b_blkno;
1823                if (sz <= 0) {
1824                        bp->b_error = EINVAL;
1825                        goto bad;
1826                }
1827                bp->b_bcount = sz << DEV_BSHIFT;
1828        }
1829
1830        /* calculate cylinder for disksort to order transfers with */
1831        bp->b_pblkno = bp->b_blkno + p->p_offset;
1832        bp->b_cylin = bp->b_pblkno / lp->d_secpercyl;
1833        return(1);
1834
1835bad:
1836        bp->b_flags |= B_ERROR;
1837        return(-1);
1838}
1839
1840int
1841disk_externalize(int drive, struct sysctl_req *req)
1842{
1843	return SYSCTL_OUT(req, &drive, sizeof drive);
1844}
1845