machdep.c revision 11390
1/*-
2 * Copyright (c) 1992 Terrence R. Lambert.
3 * Copyright (c) 1982, 1987, 1990 The Regents of the University of California.
4 * All rights reserved.
5 *
6 * This code is derived from software contributed to Berkeley by
7 * William Jolitz.
8 *
9 * Redistribution and use in source and binary forms, with or without
10 * modification, are permitted provided that the following conditions
11 * are met:
12 * 1. Redistributions of source code must retain the above copyright
13 *    notice, this list of conditions and the following disclaimer.
14 * 2. Redistributions in binary form must reproduce the above copyright
15 *    notice, this list of conditions and the following disclaimer in the
16 *    documentation and/or other materials provided with the distribution.
17 * 3. All advertising materials mentioning features or use of this software
18 *    must display the following acknowledgement:
19 *	This product includes software developed by the University of
20 *	California, Berkeley and its contributors.
21 * 4. Neither the name of the University nor the names of its contributors
22 *    may be used to endorse or promote products derived from this software
23 *    without specific prior written permission.
24 *
25 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
26 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
27 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
28 * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
29 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
30 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
31 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
32 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
33 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
34 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
35 * SUCH DAMAGE.
36 *
37 *	from: @(#)machdep.c	7.4 (Berkeley) 6/3/91
38 *	$Id: machdep.c,v 1.143 1995/09/15 08:31:14 davidg Exp $
39 */
40
41#include "npx.h"
42#include "isa.h"
43
44#include <sys/param.h>
45#include <sys/systm.h>
46#include <sys/sysproto.h>
47#include <sys/signalvar.h>
48#include <sys/kernel.h>
49#include <sys/proc.h>
50#include <sys/user.h>
51#include <sys/buf.h>
52#include <sys/reboot.h>
53#include <sys/conf.h>
54#include <sys/file.h>
55#include <sys/callout.h>
56#include <sys/malloc.h>
57#include <sys/mbuf.h>
58#include <sys/mount.h>
59#include <sys/msgbuf.h>
60#include <sys/ioctl.h>
61#include <sys/sysent.h>
62#include <sys/tty.h>
63#include <sys/sysctl.h>
64#include <sys/devconf.h>
65
66#ifdef SYSVSHM
67#include <sys/shm.h>
68#endif
69
70#ifdef SYSVMSG
71#include <sys/msg.h>
72#endif
73
74#ifdef SYSVSEM
75#include <sys/sem.h>
76#endif
77
78#include <vm/vm.h>
79#include <vm/vm_kern.h>
80#include <vm/vm_page.h>
81#include <vm/vm_pager.h>
82
83#include <sys/exec.h>
84#include <sys/vnode.h>
85
86#include <ddb/ddb.h>
87
88#include <net/netisr.h>
89
90/* XXX correctly declaring all the netisr's is painful. */
91#include <net/if.h>
92#include <net/route.h>
93
94#include <netinet/in.h>
95#include <netinet/in_systm.h>
96#include <netinet/ip.h>
97#include <netinet/if_ether.h>
98#include <netinet/ip_var.h>
99
100#include <netns/ns.h>
101#include <netns/ns_if.h>
102
103#include <netiso/iso.h>
104#include <netiso/iso_var.h>
105
106#include <netccitt/dll.h>
107#include <netccitt/x25.h>
108#include <netccitt/pk.h>
109#include <sys/socketvar.h>
110#include <netccitt/pk_var.h>
111
112#include "ether.h"
113
114#include <machine/cpu.h>
115#include <machine/npx.h>
116#include <machine/reg.h>
117#include <machine/psl.h>
118#include <machine/clock.h>
119#include <machine/specialreg.h>
120#include <machine/sysarch.h>
121#include <machine/cons.h>
122#include <machine/devconf.h>
123#include <machine/bootinfo.h>
124#include <machine/md_var.h>
125
126#include <i386/isa/isa.h>
127#include <i386/isa/isa_device.h>
128#include <i386/isa/rtc.h>
129
130extern void diediedie __P((void));
131extern void init386 __P((int first));
132extern int ptrace_set_pc __P((struct proc *p, unsigned int addr));
133extern int ptrace_single_step __P((struct proc *p));
134extern int ptrace_getregs __P((struct proc *p, unsigned int *addr));
135extern int ptrace_setregs __P((struct proc *p, unsigned int *addr));
136extern int ptrace_write_u __P((struct proc *p, vm_offset_t off, int data));
137
138static void cpu_startup __P((void *));
139SYSINIT(cpu, SI_SUB_CPU, SI_ORDER_FIRST, cpu_startup, NULL)
140
141static void identifycpu(void);
142
143char machine[] = "i386";
144char cpu_model[128];
145
146struct kern_devconf kdc_cpu0 = {
147	0, 0, 0,		/* filled in by dev_attach */
148	"cpu", 0, { MDDT_CPU },
149	0, 0, 0, CPU_EXTERNALLEN,
150	0,			/* CPU has no parent */
151	0,			/* no parentdata */
152	DC_BUSY,		/* the CPU is always busy */
153	cpu_model,		/* no sense in duplication */
154	DC_CLS_CPU		/* class */
155};
156
157#ifndef PANIC_REBOOT_WAIT_TIME
158#define PANIC_REBOOT_WAIT_TIME 15 /* default to 15 seconds */
159#endif
160
161#ifdef BOUNCE_BUFFERS
162extern char *bouncememory;
163extern int maxbkva;
164#ifdef BOUNCEPAGES
165int	bouncepages = BOUNCEPAGES;
166#else
167int	bouncepages = 0;
168#endif
169#endif	/* BOUNCE_BUFFERS */
170
171extern int freebufspace;
172int	msgbufmapped = 0;		/* set when safe to use msgbuf */
173int _udatasel, _ucodesel;
174
175
176/*
177 * Machine-dependent startup code
178 */
179int boothowto = 0, bootverbose = 0, Maxmem = 0, badpages = 0, physmem = 0;
180long dumplo;
181extern int bootdev;
182int biosmem;
183
184vm_offset_t phys_avail[10];
185
186/* must be 2 less so 0 0 can signal end of chunks */
187#define PHYS_AVAIL_ARRAY_END ((sizeof(phys_avail) / sizeof(vm_offset_t)) - 2)
188
189int cpu_class;
190
191void dumpsys __P((void));
192void setup_netisrs __P((struct linker_set *)); /* XXX declare elsewhere */
193
194vm_offset_t buffer_sva, buffer_eva;
195vm_offset_t clean_sva, clean_eva;
196vm_offset_t pager_sva, pager_eva;
197extern struct linker_set netisr_set;
198
199#define offsetof(type, member)	((size_t)(&((type *)0)->member))
200
201static void
202cpu_startup(dummy)
203	void *dummy;
204{
205	register unsigned i;
206	register caddr_t v;
207	vm_offset_t maxaddr;
208	vm_size_t size = 0;
209	int firstaddr, indx;
210	vm_offset_t minaddr;
211
212	if (boothowto & RB_VERBOSE)
213		bootverbose++;
214
215	/*
216	 * Initialize error message buffer (at end of core).
217	 */
218
219	/* avail_end was pre-decremented in init_386() to compensate */
220	for (i = 0; i < btoc(sizeof (struct msgbuf)); i++)
221		pmap_enter(pmap_kernel(), (vm_offset_t)msgbufp,
222			   avail_end + i * NBPG,
223			   VM_PROT_ALL, TRUE);
224	msgbufmapped = 1;
225
226	/*
227	 * Good {morning,afternoon,evening,night}.
228	 */
229	printf(version);
230	startrtclock();
231	identifycpu();
232	printf("real memory  = %d (%dK bytes)\n", ptoa(Maxmem), ptoa(Maxmem) / 1024);
233	/*
234	 * Display any holes after the first chunk of extended memory.
235	 */
236	if (badpages != 0) {
237		int indx = 1;
238
239		/*
240		 * XXX skip reporting ISA hole & unmanaged kernel memory
241		 */
242		if (phys_avail[0] == PAGE_SIZE)
243			indx += 2;
244
245		printf("Physical memory hole(s):\n");
246		for (; phys_avail[indx + 1] != 0; indx += 2) {
247			int size = phys_avail[indx + 1] - phys_avail[indx];
248
249			printf("0x%08x - 0x%08x, %d bytes (%d pages)\n", phys_avail[indx],
250			    phys_avail[indx + 1] - 1, size, size / PAGE_SIZE);
251		}
252	}
253
254	/*
255	 * Quickly wire in netisrs.
256	 */
257	setup_netisrs(&netisr_set);
258
259/*
260#ifdef ISDN
261	DONET(isdnintr, NETISR_ISDN);
262#endif
263*/
264
265	/*
266	 * Allocate space for system data structures.
267	 * The first available kernel virtual address is in "v".
268	 * As pages of kernel virtual memory are allocated, "v" is incremented.
269	 * As pages of memory are allocated and cleared,
270	 * "firstaddr" is incremented.
271	 * An index into the kernel page table corresponding to the
272	 * virtual memory address maintained in "v" is kept in "mapaddr".
273	 */
274
275	/*
276	 * Make two passes.  The first pass calculates how much memory is
277	 * needed and allocates it.  The second pass assigns virtual
278	 * addresses to the various data structures.
279	 */
280	firstaddr = 0;
281again:
282	v = (caddr_t)firstaddr;
283
284#define	valloc(name, type, num) \
285	    (name) = (type *)v; v = (caddr_t)((name)+(num))
286#define	valloclim(name, type, num, lim) \
287	    (name) = (type *)v; v = (caddr_t)((lim) = ((name)+(num)))
288	valloc(callout, struct callout, ncallout);
289#ifdef SYSVSHM
290	valloc(shmsegs, struct shmid_ds, shminfo.shmmni);
291#endif
292#ifdef SYSVSEM
293	valloc(sema, struct semid_ds, seminfo.semmni);
294	valloc(sem, struct sem, seminfo.semmns);
295	/* This is pretty disgusting! */
296	valloc(semu, int, (seminfo.semmnu * seminfo.semusz) / sizeof(int));
297#endif
298#ifdef SYSVMSG
299	valloc(msgpool, char, msginfo.msgmax);
300	valloc(msgmaps, struct msgmap, msginfo.msgseg);
301	valloc(msghdrs, struct msg, msginfo.msgtql);
302	valloc(msqids, struct msqid_ds, msginfo.msgmni);
303#endif
304
305	if (nbuf == 0) {
306		nbuf = 30;
307		if( physmem > 1024)
308			nbuf += min((physmem - 1024) / 12, 1024);
309	}
310	nswbuf = min(nbuf, 128);
311
312	valloc(swbuf, struct buf, nswbuf);
313	valloc(buf, struct buf, nbuf);
314
315#ifdef BOUNCE_BUFFERS
316	/*
317	 * If there is more than 16MB of memory, allocate some bounce buffers
318	 */
319	if (Maxmem > 4096) {
320		if (bouncepages == 0) {
321			bouncepages = 64;
322			bouncepages += ((Maxmem - 4096) / 2048) * 32;
323		}
324		v = (caddr_t)((vm_offset_t)((vm_offset_t)v + PAGE_SIZE - 1) & ~(PAGE_SIZE - 1));
325		valloc(bouncememory, char, bouncepages * PAGE_SIZE);
326	}
327#endif
328
329	/*
330	 * End of first pass, size has been calculated so allocate memory
331	 */
332	if (firstaddr == 0) {
333		size = (vm_size_t)(v - firstaddr);
334		firstaddr = (int)kmem_alloc(kernel_map, round_page(size));
335		if (firstaddr == 0)
336			panic("startup: no room for tables");
337		goto again;
338	}
339
340	/*
341	 * End of second pass, addresses have been assigned
342	 */
343	if ((vm_size_t)(v - firstaddr) != size)
344		panic("startup: table size inconsistency");
345
346#ifdef BOUNCE_BUFFERS
347	clean_map = kmem_suballoc(kernel_map, &clean_sva, &clean_eva,
348			(nbuf*MAXBSIZE) + (nswbuf*MAXPHYS) +
349				maxbkva + pager_map_size, TRUE);
350	io_map = kmem_suballoc(clean_map, &minaddr, &maxaddr, maxbkva, FALSE);
351#else
352	clean_map = kmem_suballoc(kernel_map, &clean_sva, &clean_eva,
353			(nbuf*MAXBSIZE) + (nswbuf*MAXPHYS) + pager_map_size, TRUE);
354#endif
355	buffer_map = kmem_suballoc(clean_map, &buffer_sva, &buffer_eva,
356				(nbuf*MAXBSIZE), TRUE);
357	pager_map = kmem_suballoc(clean_map, &pager_sva, &pager_eva,
358				(nswbuf*MAXPHYS) + pager_map_size, TRUE);
359	exec_map = kmem_suballoc(kernel_map, &minaddr, &maxaddr,
360				(16*ARG_MAX), TRUE);
361	u_map = kmem_suballoc(kernel_map, &minaddr, &maxaddr,
362				(maxproc*UPAGES*PAGE_SIZE), FALSE);
363
364	/*
365	 * Finally, allocate mbuf pool.  Since mclrefcnt is an off-size
366	 * we use the more space efficient malloc in place of kmem_alloc.
367	 */
368	mclrefcnt = (char *)malloc(nmbclusters+CLBYTES/MCLBYTES,
369				   M_MBUF, M_NOWAIT);
370	bzero(mclrefcnt, nmbclusters+CLBYTES/MCLBYTES);
371	mb_map = kmem_suballoc(kmem_map, (vm_offset_t *)&mbutl, &maxaddr,
372			       nmbclusters * MCLBYTES, FALSE);
373	/*
374	 * Initialize callouts
375	 */
376	callfree = callout;
377	for (i = 1; i < ncallout; i++)
378		callout[i-1].c_next = &callout[i];
379
380        if (boothowto & RB_CONFIG) {
381		userconfig();
382		cninit();	/* the preferred console may have changed */
383	}
384
385#ifdef BOUNCE_BUFFERS
386	/*
387	 * init bounce buffers
388	 */
389	vm_bounce_init();
390#endif
391	/*
392	 * XXX allocate a contiguous area for ISA (non busmaster) DMA
393	 * operations. This _should_ only be done if the DMA channels
394	 * will actually be used, but for now we do it always.
395	 */
396#define DMAPAGES 8
397	isaphysmem =
398	    vm_page_alloc_contig(DMAPAGES * PAGE_SIZE, 0, 0xfffffful, 64*1024);
399
400	printf("avail memory = %d (%dK bytes)\n", ptoa(cnt.v_free_count),
401	    ptoa(cnt.v_free_count) / 1024);
402
403	/*
404	 * Set up buffers, so they can be used to read disk labels.
405	 */
406	bufinit();
407	vm_pager_bufferinit();
408
409	/*
410	 * In verbose mode, print out the BIOS's idea of the disk geometries.
411	 */
412	if (bootverbose) {
413		printf("BIOS Geometries:\n");
414		for (i = 0; i < N_BIOS_GEOM; i++) {
415			unsigned long bios_geom;
416			int max_cylinder, max_head, max_sector;
417
418			bios_geom = bootinfo.bi_bios_geom[i];
419
420			/*
421			 * XXX the bootstrap punts a 1200K floppy geometry
422			 * when the get-disk-geometry interrupt fails.  Skip
423			 * drives that have this geometry.
424			 */
425			if (bios_geom == 0x4f010f)
426				continue;
427
428			printf(" %x:%08x ", i, bios_geom);
429			max_cylinder = bios_geom >> 16;
430			max_head = (bios_geom >> 8) & 0xff;
431			max_sector = bios_geom & 0xff;
432			printf(
433		"0..%d=%d cylinders, 0..%d=%d heads, 1..%d=%d sectors\n",
434			       max_cylinder, max_cylinder + 1,
435			       max_head, max_head + 1,
436			       max_sector, max_sector);
437		}
438		printf(" %d accounted for\n", bootinfo.bi_n_bios_used);
439	}
440}
441
442void
443setup_netisrs(struct linker_set *ls)
444{
445	int i;
446	const struct netisrtab *nit;
447
448	for(i = 0; ls->ls_items[i]; i++) {
449		nit = (const struct netisrtab *)ls->ls_items[i];
450		netisrs[nit->nit_num] = nit->nit_isr;
451	}
452}
453
454struct cpu_nameclass i386_cpus[] = {
455	{ "Intel 80286",	CPUCLASS_286 },		/* CPU_286   */
456	{ "i386SX",		CPUCLASS_386 },		/* CPU_386SX */
457	{ "i386DX",		CPUCLASS_386 },		/* CPU_386   */
458	{ "i486SX",		CPUCLASS_486 },		/* CPU_486SX */
459	{ "i486DX",		CPUCLASS_486 },		/* CPU_486   */
460	{ "Pentium",		CPUCLASS_586 },		/* CPU_586   */
461	{ "Cy486DLC",		CPUCLASS_486 },		/* CPU_486DLC */
462};
463
464static void
465identifycpu()
466{
467	printf("CPU: ");
468	if (cpu >= 0
469	    && cpu < (sizeof i386_cpus/sizeof(struct cpu_nameclass))) {
470		cpu_class = i386_cpus[cpu].cpu_class;
471		strncpy(cpu_model, i386_cpus[cpu].cpu_name, sizeof cpu_model);
472	} else {
473		printf("unknown cpu type %d\n", cpu);
474		panic("startup: bad cpu id");
475	}
476
477#if defined(I586_CPU)
478	if(cpu_class == CPUCLASS_586) {
479		calibrate_cyclecounter();
480		printf("%d-MHz ", pentium_mhz);
481	}
482#endif
483#if defined(I486_CPU) || defined(I586_CPU)
484	if (!strcmp(cpu_vendor,"GenuineIntel")) {
485		if ((cpu_id & 0xf00) > 3) {
486			cpu_model[0] = '\0';
487
488			switch (cpu_id & 0x3000) {
489			case 0x1000:
490				strcpy(cpu_model, "Overdrive ");
491				break;
492			case 0x2000:
493				strcpy(cpu_model, "Dual ");
494				break;
495			}
496			if ((cpu_id & 0xf00) == 0x400) {
497				strcat(cpu_model, "i486 ");
498#if defined(I586_CPU)
499			} else if ((cpu_id & 0xf00) == 0x500) {
500				strcat(cpu_model, "Pentium ");
501#endif
502			} else {
503				strcat(cpu_model, "unknown ");
504			}
505
506			switch (cpu_id & 0xff0) {
507			case 0x400:
508				strcat(cpu_model, "DX"); break;
509			case 0x410:
510				strcat(cpu_model, "DX"); break;
511			case 0x420:
512				strcat(cpu_model, "SX"); break;
513			case 0x430:
514				strcat(cpu_model, "DX2"); break;
515			case 0x440:
516				strcat(cpu_model, "SL"); break;
517			case 0x450:
518				strcat(cpu_model, "SX2"); break;
519			case 0x470:
520				strcat(cpu_model, "DX2 Write-Back Enhanced");
521				break;
522			case 0x480:
523				strcat(cpu_model, "DX4"); break;
524#if defined(I586_CPU)
525			case 0x510:
526				if (pentium_mhz == 60) {
527					strcat(cpu_model, "510\\60");
528				} else if (pentium_mhz == 66) {
529					strcat(cpu_model, "567\\66");
530				} else {
531					strcat(cpu_model,"510\\60 or 567\\66");
532				}
533				break;
534			case 0x520:
535				if (pentium_mhz == 90) {
536					strcat(cpu_model, "735\\90");
537				} else if (pentium_mhz == 100) {
538					strcat(cpu_model, "815\\100");
539				} else {
540					strcat(cpu_model,"735\\90 or 815\\100");
541				}
542				break;
543#endif
544			}
545		}
546	}
547#endif
548	printf("%s (", cpu_model);
549	switch(cpu_class) {
550	case CPUCLASS_286:
551		printf("286");
552		break;
553#if defined(I386_CPU)
554	case CPUCLASS_386:
555		printf("386");
556		break;
557#endif
558#if defined(I486_CPU)
559	case CPUCLASS_486:
560		printf("486");
561		break;
562#endif
563#if defined(I586_CPU)
564	case CPUCLASS_586:
565		printf("Pentium");
566		break;
567#endif
568	default:
569		printf("unknown");	/* will panic below... */
570	}
571	printf("-class CPU)\n");
572#if defined(I486_CPU) || defined(I586_CPU)
573	if(*cpu_vendor)
574		printf("  Origin = \"%s\"",cpu_vendor);
575	if(cpu_id)
576		printf("  Id = 0x%lx",cpu_id);
577
578	if (!strcmp(cpu_vendor, "GenuineIntel")) {
579		printf("  Stepping=%ld", cpu_id & 0xf);
580		if (cpu_high > 0) {
581#define FEATUREFMT "\020\001FPU\002VME\003PSE\004MCE\005CX8\006APIC"
582			printf("\n  Features=0x%b", cpu_feature, FEATUREFMT);
583		}
584	}
585	/* Avoid ugly blank lines: only print newline when we have to. */
586	if (*cpu_vendor || cpu_id)
587		printf("\n");
588#endif
589	/*
590	 * Now that we have told the user what they have,
591	 * let them know if that machine type isn't configured.
592	 */
593	switch (cpu_class) {
594	case CPUCLASS_286:	/* a 286 should not make it this far, anyway */
595#if !defined(I386_CPU) && !defined(I486_CPU) && !defined(I586_CPU)
596#error This kernel is not configured for one of the supported CPUs
597#endif
598#if !defined(I386_CPU)
599	case CPUCLASS_386:
600#endif
601#if !defined(I486_CPU)
602	case CPUCLASS_486:
603#endif
604#if !defined(I586_CPU)
605	case CPUCLASS_586:
606#endif
607		panic("CPU class not configured");
608	default:
609		break;
610	}
611	dev_attach(&kdc_cpu0);
612}
613
614/*
615 * Send an interrupt to process.
616 *
617 * Stack is set up to allow sigcode stored
618 * in u. to call routine, followed by kcall
619 * to sigreturn routine below.  After sigreturn
620 * resets the signal mask, the stack, and the
621 * frame pointer, it returns to the user
622 * specified pc, psl.
623 */
624void
625sendsig(catcher, sig, mask, code)
626	sig_t catcher;
627	int sig, mask;
628	unsigned code;
629{
630	register struct proc *p = curproc;
631	register int *regs;
632	register struct sigframe *fp;
633	struct sigframe sf;
634	struct sigacts *psp = p->p_sigacts;
635	int oonstack;
636
637	regs = p->p_md.md_regs;
638        oonstack = psp->ps_sigstk.ss_flags & SA_ONSTACK;
639	/*
640	 * Allocate and validate space for the signal handler
641	 * context. Note that if the stack is in P0 space, the
642	 * call to grow() is a nop, and the useracc() check
643	 * will fail if the process has not already allocated
644	 * the space with a `brk'.
645	 */
646        if ((psp->ps_flags & SAS_ALTSTACK) &&
647	    (psp->ps_sigstk.ss_flags & SA_ONSTACK) == 0 &&
648	    (psp->ps_sigonstack & sigmask(sig))) {
649		fp = (struct sigframe *)(psp->ps_sigstk.ss_sp +
650		    psp->ps_sigstk.ss_size - sizeof(struct sigframe));
651		psp->ps_sigstk.ss_flags |= SA_ONSTACK;
652	} else {
653		fp = (struct sigframe *)(regs[tESP]
654			- sizeof(struct sigframe));
655	}
656
657	/*
658	 * grow() will return FALSE if the fp will not fit inside the stack
659	 *	and the stack can not be grown. useracc will return FALSE
660	 *	if access is denied.
661	 */
662	if ((grow(p, (int)fp) == FALSE) ||
663	    (useracc((caddr_t)fp, sizeof (struct sigframe), B_WRITE) == FALSE)) {
664		/*
665		 * Process has trashed its stack; give it an illegal
666		 * instruction to halt it in its tracks.
667		 */
668		SIGACTION(p, SIGILL) = SIG_DFL;
669		sig = sigmask(SIGILL);
670		p->p_sigignore &= ~sig;
671		p->p_sigcatch &= ~sig;
672		p->p_sigmask &= ~sig;
673		psignal(p, SIGILL);
674		return;
675	}
676
677	/*
678	 * Build the argument list for the signal handler.
679	 */
680	if (p->p_sysent->sv_sigtbl) {
681		if (sig < p->p_sysent->sv_sigsize)
682			sig = p->p_sysent->sv_sigtbl[sig];
683		else
684			sig = p->p_sysent->sv_sigsize + 1;
685	}
686	sf.sf_signum = sig;
687	sf.sf_code = code;
688	sf.sf_scp = &fp->sf_sc;
689	sf.sf_addr = (char *) regs[tERR];
690	sf.sf_handler = catcher;
691
692	/* save scratch registers */
693	sf.sf_sc.sc_eax = regs[tEAX];
694	sf.sf_sc.sc_ebx = regs[tEBX];
695	sf.sf_sc.sc_ecx = regs[tECX];
696	sf.sf_sc.sc_edx = regs[tEDX];
697	sf.sf_sc.sc_esi = regs[tESI];
698	sf.sf_sc.sc_edi = regs[tEDI];
699	sf.sf_sc.sc_cs = regs[tCS];
700	sf.sf_sc.sc_ds = regs[tDS];
701	sf.sf_sc.sc_ss = regs[tSS];
702	sf.sf_sc.sc_es = regs[tES];
703	sf.sf_sc.sc_isp = regs[tISP];
704
705	/*
706	 * Build the signal context to be used by sigreturn.
707	 */
708	sf.sf_sc.sc_onstack = oonstack;
709	sf.sf_sc.sc_mask = mask;
710	sf.sf_sc.sc_sp = regs[tESP];
711	sf.sf_sc.sc_fp = regs[tEBP];
712	sf.sf_sc.sc_pc = regs[tEIP];
713	sf.sf_sc.sc_ps = regs[tEFLAGS];
714
715	/*
716	 * Copy the sigframe out to the user's stack.
717	 */
718	if (copyout(&sf, fp, sizeof(struct sigframe)) != 0) {
719		/*
720		 * Something is wrong with the stack pointer.
721		 * ...Kill the process.
722		 */
723		sigexit(p, SIGILL);
724	};
725
726	regs[tESP] = (int)fp;
727	regs[tEIP] = (int)((struct pcb *)kstack)->pcb_sigc;
728	regs[tEFLAGS] &= ~PSL_VM;
729	regs[tCS] = _ucodesel;
730	regs[tDS] = _udatasel;
731	regs[tES] = _udatasel;
732	regs[tSS] = _udatasel;
733}
734
735/*
736 * System call to cleanup state after a signal
737 * has been taken.  Reset signal mask and
738 * stack state from context left by sendsig (above).
739 * Return to previous pc and psl as specified by
740 * context left by sendsig. Check carefully to
741 * make sure that the user has not modified the
742 * state to gain improper privileges.
743 */
744int
745sigreturn(p, uap, retval)
746	struct proc *p;
747	struct sigreturn_args /* {
748		struct sigcontext *sigcntxp;
749	} */ *uap;
750	int *retval;
751{
752	register struct sigcontext *scp;
753	register struct sigframe *fp;
754	register int *regs = p->p_md.md_regs;
755	int eflags;
756
757	/*
758	 * (XXX old comment) regs[tESP] points to the return address.
759	 * The user scp pointer is above that.
760	 * The return address is faked in the signal trampoline code
761	 * for consistency.
762	 */
763	scp = uap->sigcntxp;
764	fp = (struct sigframe *)
765	     ((caddr_t)scp - offsetof(struct sigframe, sf_sc));
766
767	if (useracc((caddr_t)fp, sizeof (*fp), 0) == 0)
768		return(EINVAL);
769
770	/*
771	 * Don't allow users to change privileged or reserved flags.
772	 */
773#define	EFLAGS_SECURE(ef, oef)	((((ef) ^ (oef)) & ~PSL_USERCHANGE) == 0)
774	eflags = scp->sc_ps;
775	/*
776	 * XXX do allow users to change the privileged flag PSL_RF.  The
777	 * cpu sets PSL_RF in tf_eflags for faults.  Debuggers should
778	 * sometimes set it there too.  tf_eflags is kept in the signal
779	 * context during signal handling and there is no other place
780	 * to remember it, so the PSL_RF bit may be corrupted by the
781	 * signal handler without us knowing.  Corruption of the PSL_RF
782	 * bit at worst causes one more or one less debugger trap, so
783	 * allowing it is fairly harmless.
784	 */
785	if (!EFLAGS_SECURE(eflags & ~PSL_RF, regs[tEFLAGS] & ~PSL_RF)) {
786#ifdef DEBUG
787    		printf("sigreturn: eflags = 0x%x\n", eflags);
788#endif
789    		return(EINVAL);
790	}
791
792	/*
793	 * Don't allow users to load a valid privileged %cs.  Let the
794	 * hardware check for invalid selectors, excess privilege in
795	 * other selectors, invalid %eip's and invalid %esp's.
796	 */
797#define	CS_SECURE(cs)	(ISPL(cs) == SEL_UPL)
798	if (!CS_SECURE(scp->sc_cs)) {
799#ifdef DEBUG
800    		printf("sigreturn: cs = 0x%x\n", scp->sc_cs);
801#endif
802		trapsignal(p, SIGBUS, T_PROTFLT);
803		return(EINVAL);
804	}
805
806	/* restore scratch registers */
807	regs[tEAX] = scp->sc_eax;
808	regs[tEBX] = scp->sc_ebx;
809	regs[tECX] = scp->sc_ecx;
810	regs[tEDX] = scp->sc_edx;
811	regs[tESI] = scp->sc_esi;
812	regs[tEDI] = scp->sc_edi;
813	regs[tCS] = scp->sc_cs;
814	regs[tDS] = scp->sc_ds;
815	regs[tES] = scp->sc_es;
816	regs[tSS] = scp->sc_ss;
817	regs[tISP] = scp->sc_isp;
818
819	if (useracc((caddr_t)scp, sizeof (*scp), 0) == 0)
820		return(EINVAL);
821
822	if (scp->sc_onstack & 01)
823		p->p_sigacts->ps_sigstk.ss_flags |= SA_ONSTACK;
824	else
825		p->p_sigacts->ps_sigstk.ss_flags &= ~SA_ONSTACK;
826	p->p_sigmask = scp->sc_mask &~
827	    (sigmask(SIGKILL)|sigmask(SIGCONT)|sigmask(SIGSTOP));
828	regs[tEBP] = scp->sc_fp;
829	regs[tESP] = scp->sc_sp;
830	regs[tEIP] = scp->sc_pc;
831	regs[tEFLAGS] = eflags;
832	return(EJUSTRETURN);
833}
834
835/*
836 * a simple function to make the system panic (and dump a vmcore)
837 * in a predictable fashion
838 */
839void diediedie()
840{
841	panic("because you said to!");
842}
843
844int	waittime = -1;
845struct pcb dumppcb;
846
847__dead void
848boot(howto)
849	int howto;
850{
851	if (!cold && (howto & RB_NOSYNC) == 0 && waittime < 0) {
852		register struct buf *bp;
853		int iter, nbusy;
854
855		waittime = 0;
856		printf("\nsyncing disks... ");
857
858		sync(&proc0, NULL, NULL);
859
860		for (iter = 0; iter < 20; iter++) {
861			nbusy = 0;
862			for (bp = &buf[nbuf]; --bp >= buf; ) {
863				if ((bp->b_flags & (B_BUSY | B_INVAL)) == B_BUSY) {
864					nbusy++;
865				}
866			}
867			if (nbusy == 0)
868				break;
869			printf("%d ", nbusy);
870			DELAY(40000 * iter);
871		}
872		if (nbusy) {
873			/*
874			 * Failed to sync all blocks. Indicate this and don't
875			 * unmount filesystems (thus forcing an fsck on reboot).
876			 */
877			printf("giving up\n");
878		} else {
879			printf("done\n");
880			/*
881			 * Unmount filesystems
882			 */
883			if (panicstr == 0)
884				vfs_unmountall();
885		}
886		DELAY(100000);			/* wait for console output to finish */
887		dev_shutdownall(FALSE);
888	}
889	splhigh();
890	if (howto & RB_HALT) {
891		printf("\n");
892		printf("The operating system has halted.\n");
893		printf("Please press any key to reboot.\n\n");
894		cngetc();
895	} else {
896		if (howto & RB_DUMP) {
897			if (!cold) {
898				savectx(&dumppcb, 0);
899				dumppcb.pcb_ptd = rcr3();
900				dumpsys();
901			}
902
903			if (PANIC_REBOOT_WAIT_TIME != 0) {
904				if (PANIC_REBOOT_WAIT_TIME != -1) {
905					int loop;
906					printf("Automatic reboot in %d seconds - press a key on the console to abort\n",
907						PANIC_REBOOT_WAIT_TIME);
908					for (loop = PANIC_REBOOT_WAIT_TIME * 10; loop > 0; --loop) {
909						DELAY(1000 * 100); /* 1/10th second */
910						if (cncheckc()) /* Did user type a key? */
911							break;
912					}
913					if (!loop)
914						goto die;
915				}
916			} else { /* zero time specified - reboot NOW */
917				goto die;
918			}
919			printf("--> Press a key on the console to reboot <--\n");
920			cngetc();
921		}
922	}
923die:
924	printf("Rebooting...\n");
925	DELAY(1000000);	/* wait 1 sec for printf's to complete and be read */
926	cpu_reset();
927	for(;;) ;
928	/* NOTREACHED */
929}
930
931unsigned long	dumpmag = 0x8fca0101UL;	/* magic number for savecore */
932int		dumpsize = 0;		/* also for savecore */
933
934int		dodump = 1;
935
936/*
937 * Doadump comes here after turning off memory management and
938 * getting on the dump stack, either when called above, or by
939 * the auto-restart code.
940 */
941void
942dumpsys()
943{
944
945	if (!dodump)
946		return;
947	if (dumpdev == NODEV)
948		return;
949	if ((minor(dumpdev)&07) != 1)
950		return;
951	dumpsize = Maxmem;
952	printf("\ndumping to dev %lx, offset %ld\n", dumpdev, dumplo);
953	printf("dump ");
954	switch ((*bdevsw[major(dumpdev)].d_dump)(dumpdev)) {
955
956	case ENXIO:
957		printf("device bad\n");
958		break;
959
960	case EFAULT:
961		printf("device not ready\n");
962		break;
963
964	case EINVAL:
965		printf("area improper\n");
966		break;
967
968	case EIO:
969		printf("i/o error\n");
970		break;
971
972	case EINTR:
973		printf("aborted from console\n");
974		break;
975
976	default:
977		printf("succeeded\n");
978		break;
979	}
980}
981
982/*
983 * Clear registers on exec
984 */
985void
986setregs(p, entry, stack)
987	struct proc *p;
988	u_long entry;
989	u_long stack;
990{
991	int *regs = p->p_md.md_regs;
992
993	bzero(regs, sizeof(struct trapframe));
994	regs[tEIP] = entry;
995	regs[tESP] = stack;
996	regs[tEFLAGS] = PSL_USER | (regs[tEFLAGS] & PSL_T);
997	regs[tSS] = _udatasel;
998	regs[tDS] = _udatasel;
999	regs[tES] = _udatasel;
1000	regs[tCS] = _ucodesel;
1001
1002	p->p_addr->u_pcb.pcb_flags = 0;	/* no fp at all */
1003	load_cr0(rcr0() | CR0_TS);	/* start emulating */
1004#if	NNPX > 0
1005	npxinit(__INITIAL_NPXCW__);
1006#endif	/* NNPX > 0 */
1007}
1008
1009/*
1010 * machine dependent system variables.
1011 */
1012int
1013cpu_sysctl(name, namelen, oldp, oldlenp, newp, newlen, p)
1014	int *name;
1015	u_int namelen;
1016	void *oldp;
1017	size_t *oldlenp;
1018	void *newp;
1019	size_t newlen;
1020	struct proc *p;
1021{
1022	dev_t consdev;
1023	int error;
1024
1025	/* all sysctl names at this level are terminal */
1026	if (namelen != 1)
1027		return (ENOTDIR);               /* overloaded */
1028
1029	switch (name[0]) {
1030	case CPU_CONSDEV:
1031		consdev = (cn_tty == NULL ? NODEV : cn_tty->t_dev);
1032		return (sysctl_rdstruct(oldp, oldlenp, newp, &consdev,
1033					sizeof consdev));
1034	case CPU_ADJKERNTZ:
1035		error = sysctl_int(oldp, oldlenp, newp, newlen, &adjkerntz);
1036		if (!error && newp)
1037			resettodr();
1038		return error;
1039	case CPU_DISRTCSET:
1040		return (sysctl_int(oldp, oldlenp, newp,	newlen,	&disable_rtc_set));
1041	case CPU_BOOTINFO:
1042		return (sysctl_rdstruct(oldp, oldlenp, newp, &bootinfo,
1043					sizeof bootinfo));
1044	default:
1045		return (EOPNOTSUPP);
1046	}
1047	/* NOTREACHED */
1048}
1049
1050/*
1051 * Initialize 386 and configure to run kernel
1052 */
1053
1054/*
1055 * Initialize segments & interrupt table
1056 */
1057
1058int currentldt;
1059int _default_ldt;
1060union descriptor gdt[NGDT];		/* global descriptor table */
1061struct gate_descriptor idt[NIDT];	/* interrupt descriptor table */
1062union descriptor ldt[NLDT];		/* local descriptor table */
1063
1064struct	i386tss	tss, panic_tss;
1065
1066extern  struct user *proc0paddr;
1067
1068/* software prototypes -- in more palatable form */
1069struct soft_segment_descriptor gdt_segs[] = {
1070/* GNULL_SEL	0 Null Descriptor */
1071{	0x0,			/* segment base address  */
1072	0x0,			/* length */
1073	0,			/* segment type */
1074	0,			/* segment descriptor priority level */
1075	0,			/* segment descriptor present */
1076	0, 0,
1077	0,			/* default 32 vs 16 bit size */
1078	0  			/* limit granularity (byte/page units)*/ },
1079/* GCODE_SEL	1 Code Descriptor for kernel */
1080{	0x0,			/* segment base address  */
1081	0xfffff,		/* length - all address space */
1082	SDT_MEMERA,		/* segment type */
1083	0,			/* segment descriptor priority level */
1084	1,			/* segment descriptor present */
1085	0, 0,
1086	1,			/* default 32 vs 16 bit size */
1087	1  			/* limit granularity (byte/page units)*/ },
1088/* GDATA_SEL	2 Data Descriptor for kernel */
1089{	0x0,			/* segment base address  */
1090	0xfffff,		/* length - all address space */
1091	SDT_MEMRWA,		/* segment type */
1092	0,			/* segment descriptor priority level */
1093	1,			/* segment descriptor present */
1094	0, 0,
1095	1,			/* default 32 vs 16 bit size */
1096	1  			/* limit granularity (byte/page units)*/ },
1097/* GLDT_SEL	3 LDT Descriptor */
1098{	(int) ldt,		/* segment base address  */
1099	sizeof(ldt)-1,		/* length - all address space */
1100	SDT_SYSLDT,		/* segment type */
1101	0,			/* segment descriptor priority level */
1102	1,			/* segment descriptor present */
1103	0, 0,
1104	0,			/* unused - default 32 vs 16 bit size */
1105	0  			/* limit granularity (byte/page units)*/ },
1106/* GTGATE_SEL	4 Null Descriptor - Placeholder */
1107{	0x0,			/* segment base address  */
1108	0x0,			/* length - all address space */
1109	0,			/* segment type */
1110	0,			/* segment descriptor priority level */
1111	0,			/* segment descriptor present */
1112	0, 0,
1113	0,			/* default 32 vs 16 bit size */
1114	0  			/* limit granularity (byte/page units)*/ },
1115/* GPANIC_SEL	5 Panic Tss Descriptor */
1116{	(int) &panic_tss,	/* segment base address  */
1117	sizeof(tss)-1,		/* length - all address space */
1118	SDT_SYS386TSS,		/* segment type */
1119	0,			/* segment descriptor priority level */
1120	1,			/* segment descriptor present */
1121	0, 0,
1122	0,			/* unused - default 32 vs 16 bit size */
1123	0  			/* limit granularity (byte/page units)*/ },
1124/* GPROC0_SEL	6 Proc 0 Tss Descriptor */
1125{	(int) kstack,		/* segment base address  */
1126	sizeof(tss)-1,		/* length - all address space */
1127	SDT_SYS386TSS,		/* segment type */
1128	0,			/* segment descriptor priority level */
1129	1,			/* segment descriptor present */
1130	0, 0,
1131	0,			/* unused - default 32 vs 16 bit size */
1132	0  			/* limit granularity (byte/page units)*/ },
1133/* GUSERLDT_SEL	7 User LDT Descriptor per process */
1134{	(int) ldt,		/* segment base address  */
1135	(512 * sizeof(union descriptor)-1),		/* length */
1136	SDT_SYSLDT,		/* segment type */
1137	0,			/* segment descriptor priority level */
1138	1,			/* segment descriptor present */
1139	0, 0,
1140	0,			/* unused - default 32 vs 16 bit size */
1141	0  			/* limit granularity (byte/page units)*/ },
1142/* GAPMCODE32_SEL 8 APM BIOS 32-bit interface (32bit Code) */
1143{	0,			/* segment base address (overwritten by APM)  */
1144	0xfffff,		/* length */
1145	SDT_MEMERA,		/* segment type */
1146	0,			/* segment descriptor priority level */
1147	1,			/* segment descriptor present */
1148	0, 0,
1149	1,			/* default 32 vs 16 bit size */
1150	1  			/* limit granularity (byte/page units)*/ },
1151/* GAPMCODE16_SEL 9 APM BIOS 32-bit interface (16bit Code) */
1152{	0,			/* segment base address (overwritten by APM)  */
1153	0xfffff,		/* length */
1154	SDT_MEMERA,		/* segment type */
1155	0,			/* segment descriptor priority level */
1156	1,			/* segment descriptor present */
1157	0, 0,
1158	0,			/* default 32 vs 16 bit size */
1159	1  			/* limit granularity (byte/page units)*/ },
1160/* GAPMDATA_SEL	10 APM BIOS 32-bit interface (Data) */
1161{	0,			/* segment base address (overwritten by APM) */
1162	0xfffff,		/* length */
1163	SDT_MEMRWA,		/* segment type */
1164	0,			/* segment descriptor priority level */
1165	1,			/* segment descriptor present */
1166	0, 0,
1167	1,			/* default 32 vs 16 bit size */
1168	1  			/* limit granularity (byte/page units)*/ },
1169};
1170
1171struct soft_segment_descriptor ldt_segs[] = {
1172	/* Null Descriptor - overwritten by call gate */
1173{	0x0,			/* segment base address  */
1174	0x0,			/* length - all address space */
1175	0,			/* segment type */
1176	0,			/* segment descriptor priority level */
1177	0,			/* segment descriptor present */
1178	0, 0,
1179	0,			/* default 32 vs 16 bit size */
1180	0  			/* limit granularity (byte/page units)*/ },
1181	/* Null Descriptor - overwritten by call gate */
1182{	0x0,			/* segment base address  */
1183	0x0,			/* length - all address space */
1184	0,			/* segment type */
1185	0,			/* segment descriptor priority level */
1186	0,			/* segment descriptor present */
1187	0, 0,
1188	0,			/* default 32 vs 16 bit size */
1189	0  			/* limit granularity (byte/page units)*/ },
1190	/* Null Descriptor - overwritten by call gate */
1191{	0x0,			/* segment base address  */
1192	0x0,			/* length - all address space */
1193	0,			/* segment type */
1194	0,			/* segment descriptor priority level */
1195	0,			/* segment descriptor present */
1196	0, 0,
1197	0,			/* default 32 vs 16 bit size */
1198	0  			/* limit granularity (byte/page units)*/ },
1199	/* Code Descriptor for user */
1200{	0x0,			/* segment base address  */
1201	0xfffff,		/* length - all address space */
1202	SDT_MEMERA,		/* segment type */
1203	SEL_UPL,		/* segment descriptor priority level */
1204	1,			/* segment descriptor present */
1205	0, 0,
1206	1,			/* default 32 vs 16 bit size */
1207	1  			/* limit granularity (byte/page units)*/ },
1208	/* Data Descriptor for user */
1209{	0x0,			/* segment base address  */
1210	0xfffff,		/* length - all address space */
1211	SDT_MEMRWA,		/* segment type */
1212	SEL_UPL,		/* segment descriptor priority level */
1213	1,			/* segment descriptor present */
1214	0, 0,
1215	1,			/* default 32 vs 16 bit size */
1216	1  			/* limit granularity (byte/page units)*/ },
1217};
1218
1219void
1220setidt(idx, func, typ, dpl)
1221	int idx;
1222	inthand_t *func;
1223	int typ;
1224	int dpl;
1225{
1226	struct gate_descriptor *ip = idt + idx;
1227
1228	ip->gd_looffset = (int)func;
1229	ip->gd_selector = 8;
1230	ip->gd_stkcpy = 0;
1231	ip->gd_xx = 0;
1232	ip->gd_type = typ;
1233	ip->gd_dpl = dpl;
1234	ip->gd_p = 1;
1235	ip->gd_hioffset = ((int)func)>>16 ;
1236}
1237
1238#define	IDTVEC(name)	__CONCAT(X,name)
1239
1240extern inthand_t
1241	IDTVEC(div), IDTVEC(dbg), IDTVEC(nmi), IDTVEC(bpt), IDTVEC(ofl),
1242	IDTVEC(bnd), IDTVEC(ill), IDTVEC(dna), IDTVEC(dble), IDTVEC(fpusegm),
1243	IDTVEC(tss), IDTVEC(missing), IDTVEC(stk), IDTVEC(prot),
1244	IDTVEC(page), IDTVEC(rsvd), IDTVEC(fpu), IDTVEC(align),
1245	IDTVEC(syscall);
1246
1247#ifdef COMPAT_LINUX
1248extern inthand_t
1249	IDTVEC(linux_syscall);
1250#endif
1251
1252void
1253sdtossd(sd, ssd)
1254	struct segment_descriptor *sd;
1255	struct soft_segment_descriptor *ssd;
1256{
1257	ssd->ssd_base  = (sd->sd_hibase << 24) | sd->sd_lobase;
1258	ssd->ssd_limit = (sd->sd_hilimit << 16) | sd->sd_lolimit;
1259	ssd->ssd_type  = sd->sd_type;
1260	ssd->ssd_dpl   = sd->sd_dpl;
1261	ssd->ssd_p     = sd->sd_p;
1262	ssd->ssd_def32 = sd->sd_def32;
1263	ssd->ssd_gran  = sd->sd_gran;
1264}
1265
1266void
1267init386(first)
1268	int first;
1269{
1270	int x;
1271	unsigned biosbasemem, biosextmem;
1272	struct gate_descriptor *gdp;
1273	int gsel_tss;
1274	/* table descriptors - used to load tables by microp */
1275	struct region_descriptor r_gdt, r_idt;
1276	int	pagesinbase, pagesinext;
1277	int	target_page, pa_indx;
1278
1279	proc0.p_addr = proc0paddr;
1280
1281	/*
1282	 * Initialize the console before we print anything out.
1283	 */
1284	cninit();
1285
1286	/*
1287	 * make gdt memory segments, the code segment goes up to end of the
1288	 * page with etext in it, the data segment goes to the end of
1289	 * the address space
1290	 */
1291	/*
1292	 * XXX text protection is temporarily (?) disabled.  The limit was
1293	 * i386_btop(i386_round_page(etext)) - 1.
1294	 */
1295	gdt_segs[GCODE_SEL].ssd_limit = i386_btop(0) - 1;
1296	gdt_segs[GDATA_SEL].ssd_limit = i386_btop(0) - 1;
1297	for (x = 0; x < NGDT; x++)
1298		ssdtosd(&gdt_segs[x], &gdt[x].sd);
1299
1300	/* make ldt memory segments */
1301	/*
1302	 * The data segment limit must not cover the user area because we
1303	 * don't want the user area to be writable in copyout() etc. (page
1304	 * level protection is lost in kernel mode on 386's).  Also, we
1305	 * don't want the user area to be writable directly (page level
1306	 * protection of the user area is not available on 486's with
1307	 * CR0_WP set, because there is no user-read/kernel-write mode).
1308	 *
1309	 * XXX - VM_MAXUSER_ADDRESS is an end address, not a max.  And it
1310	 * should be spelled ...MAX_USER...
1311	 */
1312#define VM_END_USER_RW_ADDRESS	VM_MAXUSER_ADDRESS
1313	/*
1314	 * The code segment limit has to cover the user area until we move
1315	 * the signal trampoline out of the user area.  This is safe because
1316	 * the code segment cannot be written to directly.
1317	 */
1318#define VM_END_USER_R_ADDRESS	(VM_END_USER_RW_ADDRESS + UPAGES * NBPG)
1319	ldt_segs[LUCODE_SEL].ssd_limit = i386_btop(VM_END_USER_R_ADDRESS) - 1;
1320	ldt_segs[LUDATA_SEL].ssd_limit = i386_btop(VM_END_USER_RW_ADDRESS) - 1;
1321	/* Note. eventually want private ldts per process */
1322	for (x = 0; x < NLDT; x++)
1323		ssdtosd(&ldt_segs[x], &ldt[x].sd);
1324
1325	/* exceptions */
1326	for (x = 0; x < NIDT; x++)
1327		setidt(x, &IDTVEC(rsvd), SDT_SYS386TGT, SEL_KPL);
1328	setidt(0, &IDTVEC(div),  SDT_SYS386TGT, SEL_KPL);
1329	setidt(1, &IDTVEC(dbg),  SDT_SYS386TGT, SEL_KPL);
1330	setidt(2, &IDTVEC(nmi),  SDT_SYS386TGT, SEL_KPL);
1331 	setidt(3, &IDTVEC(bpt),  SDT_SYS386TGT, SEL_UPL);
1332	setidt(4, &IDTVEC(ofl),  SDT_SYS386TGT, SEL_UPL);
1333	setidt(5, &IDTVEC(bnd),  SDT_SYS386TGT, SEL_KPL);
1334	setidt(6, &IDTVEC(ill),  SDT_SYS386TGT, SEL_KPL);
1335	setidt(7, &IDTVEC(dna),  SDT_SYS386TGT, SEL_KPL);
1336	setidt(8, &IDTVEC(dble),  SDT_SYS386TGT, SEL_KPL);
1337	setidt(9, &IDTVEC(fpusegm),  SDT_SYS386TGT, SEL_KPL);
1338	setidt(10, &IDTVEC(tss),  SDT_SYS386TGT, SEL_KPL);
1339	setidt(11, &IDTVEC(missing),  SDT_SYS386TGT, SEL_KPL);
1340	setidt(12, &IDTVEC(stk),  SDT_SYS386TGT, SEL_KPL);
1341	setidt(13, &IDTVEC(prot),  SDT_SYS386TGT, SEL_KPL);
1342	setidt(14, &IDTVEC(page),  SDT_SYS386TGT, SEL_KPL);
1343	setidt(15, &IDTVEC(rsvd),  SDT_SYS386TGT, SEL_KPL);
1344	setidt(16, &IDTVEC(fpu),  SDT_SYS386TGT, SEL_KPL);
1345	setidt(17, &IDTVEC(align), SDT_SYS386TGT, SEL_KPL);
1346#ifdef COMPAT_LINUX
1347 	setidt(0x80, &IDTVEC(linux_syscall),  SDT_SYS386TGT, SEL_UPL);
1348#endif
1349
1350#include	"isa.h"
1351#if	NISA >0
1352	isa_defaultirq();
1353#endif
1354
1355	r_gdt.rd_limit = sizeof(gdt) - 1;
1356	r_gdt.rd_base =  (int) gdt;
1357	lgdt(&r_gdt);
1358
1359	r_idt.rd_limit = sizeof(idt) - 1;
1360	r_idt.rd_base = (int) idt;
1361	lidt(&r_idt);
1362
1363	_default_ldt = GSEL(GLDT_SEL, SEL_KPL);
1364	lldt(_default_ldt);
1365	currentldt = _default_ldt;
1366
1367#ifdef DDB
1368	kdb_init();
1369	if (boothowto & RB_KDB)
1370		Debugger("Boot flags requested debugger");
1371#endif
1372
1373	/* Use BIOS values stored in RTC CMOS RAM, since probing
1374	 * breaks certain 386 AT relics.
1375	 */
1376	biosbasemem = rtcin(RTC_BASELO)+ (rtcin(RTC_BASEHI)<<8);
1377	biosextmem = rtcin(RTC_EXTLO)+ (rtcin(RTC_EXTHI)<<8);
1378
1379	/*
1380	 * Print a warning if the official BIOS interface disagrees
1381	 * with the hackish interface used above.  Eventually only
1382	 * the official interface should be used.
1383	 */
1384	if (bootinfo.bi_memsizes_valid) {
1385		if (bootinfo.bi_basemem != biosbasemem)
1386			printf("BIOS basemem (%ldK) != RTC basemem (%dK)\n",
1387			       bootinfo.bi_basemem, biosbasemem);
1388		if (bootinfo.bi_extmem != biosextmem)
1389			printf("BIOS extmem (%ldK) != RTC extmem (%dK)\n",
1390			       bootinfo.bi_extmem, biosextmem);
1391	}
1392
1393	/*
1394	 * If BIOS tells us that it has more than 640k in the basemem,
1395	 *	don't believe it - set it to 640k.
1396	 */
1397	if (biosbasemem > 640)
1398		biosbasemem = 640;
1399
1400	/*
1401	 * Some 386 machines might give us a bogus number for extended
1402	 *	mem. If this happens, stop now.
1403	 */
1404#ifndef LARGEMEM
1405	if (biosextmem > 65536) {
1406		panic("extended memory beyond limit of 64MB");
1407		/* NOTREACHED */
1408	}
1409#endif
1410
1411	pagesinbase = biosbasemem * 1024 / NBPG;
1412	pagesinext = biosextmem * 1024 / NBPG;
1413
1414	/*
1415	 * Special hack for chipsets that still remap the 384k hole when
1416	 *	there's 16MB of memory - this really confuses people that
1417	 *	are trying to use bus mastering ISA controllers with the
1418	 *	"16MB limit"; they only have 16MB, but the remapping puts
1419	 *	them beyond the limit.
1420	 */
1421	/*
1422	 * If extended memory is between 15-16MB (16-17MB phys address range),
1423	 *	chop it to 15MB.
1424	 */
1425	if ((pagesinext > 3840) && (pagesinext < 4096))
1426		pagesinext = 3840;
1427
1428	/*
1429	 * Maxmem isn't the "maximum memory", it's one larger than the
1430	 * highest page of of the physical address space. It
1431	 */
1432	Maxmem = pagesinext + 0x100000/PAGE_SIZE;
1433
1434#ifdef MAXMEM
1435	Maxmem = MAXMEM/4;
1436#endif
1437
1438	/* call pmap initialization to make new kernel address space */
1439	pmap_bootstrap (first, 0);
1440
1441	/*
1442	 * Size up each available chunk of physical memory.
1443	 */
1444
1445	/*
1446	 * We currently don't bother testing base memory.
1447	 * XXX  ...but we probably should.
1448	 */
1449	pa_indx = 0;
1450	badpages = 0;
1451	if (pagesinbase > 1) {
1452		phys_avail[pa_indx++] = PAGE_SIZE;	/* skip first page of memory */
1453		phys_avail[pa_indx] = ptoa(pagesinbase);/* memory up to the ISA hole */
1454		physmem = pagesinbase - 1;
1455	} else {
1456		/* point at first chunk end */
1457		pa_indx++;
1458	}
1459
1460	for (target_page = avail_start; target_page < ptoa(Maxmem); target_page += PAGE_SIZE) {
1461		int tmp, page_bad = FALSE;
1462
1463		/*
1464		 * map page into kernel: valid, read/write, non-cacheable
1465		 */
1466		*(int *)CMAP1 = PG_V | PG_KW | PG_N | target_page;
1467		pmap_update();
1468
1469		tmp = *(int *)CADDR1;
1470		/*
1471		 * Test for alternating 1's and 0's
1472		 */
1473		*(int *)CADDR1 = 0xaaaaaaaa;
1474		if (*(int *)CADDR1 != 0xaaaaaaaa) {
1475			page_bad = TRUE;
1476		}
1477		/*
1478		 * Test for alternating 0's and 1's
1479		 */
1480		*(int *)CADDR1 = 0x55555555;
1481		if (*(int *)CADDR1 != 0x55555555) {
1482			page_bad = TRUE;
1483		}
1484		/*
1485		 * Test for all 1's
1486		 */
1487		*(int *)CADDR1 = 0xffffffff;
1488		if (*(int *)CADDR1 != 0xffffffff) {
1489			page_bad = TRUE;
1490		}
1491		/*
1492		 * Test for all 0's
1493		 */
1494		*(int *)CADDR1 = 0x0;
1495		if (*(int *)CADDR1 != 0x0) {
1496			/*
1497			 * test of page failed
1498			 */
1499			page_bad = TRUE;
1500		}
1501		/*
1502		 * Restore original value.
1503		 */
1504		*(int *)CADDR1 = tmp;
1505
1506		/*
1507		 * Adjust array of valid/good pages.
1508		 */
1509		if (page_bad == FALSE) {
1510			/*
1511			 * If this good page is a continuation of the
1512			 * previous set of good pages, then just increase
1513			 * the end pointer. Otherwise start a new chunk.
1514			 * Note that "end" points one higher than end,
1515			 * making the range >= start and < end.
1516			 */
1517			if (phys_avail[pa_indx] == target_page) {
1518				phys_avail[pa_indx] += PAGE_SIZE;
1519			} else {
1520				pa_indx++;
1521				if (pa_indx == PHYS_AVAIL_ARRAY_END) {
1522					printf("Too many holes in the physical address space, giving up\n");
1523					pa_indx--;
1524					break;
1525				}
1526				phys_avail[pa_indx++] = target_page;	/* start */
1527				phys_avail[pa_indx] = target_page + PAGE_SIZE;	/* end */
1528			}
1529			physmem++;
1530		} else {
1531			badpages++;
1532			page_bad = FALSE;
1533		}
1534	}
1535
1536	*(int *)CMAP1 = 0;
1537	pmap_update();
1538
1539	/*
1540	 * XXX
1541	 * The last chunk must contain at leat one page plus the message
1542	 * buffer to avoid complicating other code (message buffer address
1543	 * calculation, etc.).
1544	 */
1545	while (phys_avail[pa_indx - 1] + PAGE_SIZE +
1546	    round_page(sizeof(struct msgbuf)) >= phys_avail[pa_indx]) {
1547		physmem -= atop(phys_avail[pa_indx] - phys_avail[pa_indx - 1]);
1548		phys_avail[pa_indx--] = 0;
1549		phys_avail[pa_indx--] = 0;
1550	}
1551
1552	Maxmem = atop(phys_avail[pa_indx]);
1553
1554	/* Trim off space for the message buffer. */
1555	phys_avail[pa_indx] -= round_page(sizeof(struct msgbuf));
1556
1557	avail_end = phys_avail[pa_indx];
1558
1559	/* now running on new page tables, configured,and u/iom is accessible */
1560
1561	/* make a initial tss so microp can get interrupt stack on syscall! */
1562	proc0.p_addr->u_pcb.pcb_tss.tss_esp0 = (int) kstack + UPAGES*NBPG;
1563	proc0.p_addr->u_pcb.pcb_tss.tss_ss0 = GSEL(GDATA_SEL, SEL_KPL) ;
1564	gsel_tss = GSEL(GPROC0_SEL, SEL_KPL);
1565
1566	((struct i386tss *)gdt_segs[GPROC0_SEL].ssd_base)->tss_ioopt =
1567		(sizeof(tss))<<16;
1568
1569	ltr(gsel_tss);
1570
1571	/* make a call gate to reenter kernel with */
1572	gdp = &ldt[LSYS5CALLS_SEL].gd;
1573
1574	x = (int) &IDTVEC(syscall);
1575	gdp->gd_looffset = x++;
1576	gdp->gd_selector = GSEL(GCODE_SEL,SEL_KPL);
1577	gdp->gd_stkcpy = 1;
1578	gdp->gd_type = SDT_SYS386CGT;
1579	gdp->gd_dpl = SEL_UPL;
1580	gdp->gd_p = 1;
1581	gdp->gd_hioffset = ((int) &IDTVEC(syscall)) >>16;
1582
1583	/* transfer to user mode */
1584
1585	_ucodesel = LSEL(LUCODE_SEL, SEL_UPL);
1586	_udatasel = LSEL(LUDATA_SEL, SEL_UPL);
1587
1588	/* setup proc 0's pcb */
1589	bcopy(&sigcode, proc0.p_addr->u_pcb.pcb_sigc, szsigcode);
1590	proc0.p_addr->u_pcb.pcb_flags = 0;
1591	proc0.p_addr->u_pcb.pcb_ptd = IdlePTD;
1592}
1593
1594/*
1595 * The registers are in the frame; the frame is in the user area of
1596 * the process in question; when the process is active, the registers
1597 * are in "the kernel stack"; when it's not, they're still there, but
1598 * things get flipped around.  So, since p->p_md.md_regs is the whole address
1599 * of the register set, take its offset from the kernel stack, and
1600 * index into the user block.  Don't you just *love* virtual memory?
1601 * (I'm starting to think seymour is right...)
1602 */
1603#define	TF_REGP(p)	((struct trapframe *) \
1604			 ((char *)(p)->p_addr \
1605			  + ((char *)(p)->p_md.md_regs - kstack)))
1606
1607int
1608ptrace_set_pc(p, addr)
1609	struct proc *p;
1610	unsigned int addr;
1611{
1612	TF_REGP(p)->tf_eip = addr;
1613	return (0);
1614}
1615
1616int
1617ptrace_single_step(p)
1618	struct proc *p;
1619{
1620	TF_REGP(p)->tf_eflags |= PSL_T;
1621	return (0);
1622}
1623
1624int
1625ptrace_getregs(p, addr)
1626	struct proc *p;
1627	unsigned int *addr;
1628{
1629	int error;
1630	struct reg regs;
1631
1632	error = fill_regs(p, &regs);
1633	if (error)
1634		return (error);
1635	return (copyout(&regs, addr, sizeof regs));
1636}
1637
1638int
1639ptrace_setregs(p, addr)
1640	struct proc *p;
1641	unsigned int *addr;
1642{
1643	int error;
1644	struct reg regs;
1645
1646	error = copyin(addr, &regs, sizeof regs);
1647	if (error)
1648		return (error);
1649	return (set_regs(p, &regs));
1650}
1651
1652int ptrace_write_u(p, off, data)
1653	struct proc *p;
1654	vm_offset_t off;
1655	int data;
1656{
1657	struct trapframe frame_copy;
1658	vm_offset_t min;
1659	struct trapframe *tp;
1660
1661	/*
1662	 * Privileged kernel state is scattered all over the user area.
1663	 * Only allow write access to parts of regs and to fpregs.
1664	 */
1665	min = (char *)p->p_md.md_regs - kstack;
1666	if (off >= min && off <= min + sizeof(struct trapframe) - sizeof(int)) {
1667		tp = TF_REGP(p);
1668		frame_copy = *tp;
1669		*(int *)((char *)&frame_copy + (off - min)) = data;
1670		if (!EFLAGS_SECURE(frame_copy.tf_eflags, tp->tf_eflags) ||
1671		    !CS_SECURE(frame_copy.tf_cs))
1672			return (EINVAL);
1673		*(int*)((char *)p->p_addr + off) = data;
1674		return (0);
1675	}
1676	min = offsetof(struct user, u_pcb) + offsetof(struct pcb, pcb_savefpu);
1677	if (off >= min && off <= min + sizeof(struct save87) - sizeof(int)) {
1678		*(int*)((char *)p->p_addr + off) = data;
1679		return (0);
1680	}
1681	return (EFAULT);
1682}
1683
1684int
1685fill_regs(p, regs)
1686	struct proc *p;
1687	struct reg *regs;
1688{
1689	struct trapframe *tp;
1690
1691	tp = TF_REGP(p);
1692	regs->r_es = tp->tf_es;
1693	regs->r_ds = tp->tf_ds;
1694	regs->r_edi = tp->tf_edi;
1695	regs->r_esi = tp->tf_esi;
1696	regs->r_ebp = tp->tf_ebp;
1697	regs->r_ebx = tp->tf_ebx;
1698	regs->r_edx = tp->tf_edx;
1699	regs->r_ecx = tp->tf_ecx;
1700	regs->r_eax = tp->tf_eax;
1701	regs->r_eip = tp->tf_eip;
1702	regs->r_cs = tp->tf_cs;
1703	regs->r_eflags = tp->tf_eflags;
1704	regs->r_esp = tp->tf_esp;
1705	regs->r_ss = tp->tf_ss;
1706	return (0);
1707}
1708
1709int
1710set_regs(p, regs)
1711	struct proc *p;
1712	struct reg *regs;
1713{
1714	struct trapframe *tp;
1715
1716	tp = TF_REGP(p);
1717	if (!EFLAGS_SECURE(regs->r_eflags, tp->tf_eflags) ||
1718	    !CS_SECURE(regs->r_cs))
1719		return (EINVAL);
1720	tp->tf_es = regs->r_es;
1721	tp->tf_ds = regs->r_ds;
1722	tp->tf_edi = regs->r_edi;
1723	tp->tf_esi = regs->r_esi;
1724	tp->tf_ebp = regs->r_ebp;
1725	tp->tf_ebx = regs->r_ebx;
1726	tp->tf_edx = regs->r_edx;
1727	tp->tf_ecx = regs->r_ecx;
1728	tp->tf_eax = regs->r_eax;
1729	tp->tf_eip = regs->r_eip;
1730	tp->tf_cs = regs->r_cs;
1731	tp->tf_eflags = regs->r_eflags;
1732	tp->tf_esp = regs->r_esp;
1733	tp->tf_ss = regs->r_ss;
1734	return (0);
1735}
1736
1737#ifndef DDB
1738void
1739Debugger(const char *msg)
1740{
1741	printf("Debugger(\"%s\") called.\n", msg);
1742}
1743#endif /* no DDB */
1744
1745#include <sys/disklabel.h>
1746#define b_cylin	b_resid
1747/*
1748 * Determine the size of the transfer, and make sure it is
1749 * within the boundaries of the partition. Adjust transfer
1750 * if needed, and signal errors or early completion.
1751 */
1752int
1753bounds_check_with_label(struct buf *bp, struct disklabel *lp, int wlabel)
1754{
1755        struct partition *p = lp->d_partitions + dkpart(bp->b_dev);
1756        int labelsect = lp->d_partitions[0].p_offset;
1757        int maxsz = p->p_size,
1758                sz = (bp->b_bcount + DEV_BSIZE - 1) >> DEV_BSHIFT;
1759
1760        /* overwriting disk label ? */
1761        /* XXX should also protect bootstrap in first 8K */
1762        if (bp->b_blkno + p->p_offset <= LABELSECTOR + labelsect &&
1763#if LABELSECTOR != 0
1764            bp->b_blkno + p->p_offset + sz > LABELSECTOR + labelsect &&
1765#endif
1766            (bp->b_flags & B_READ) == 0 && wlabel == 0) {
1767                bp->b_error = EROFS;
1768                goto bad;
1769        }
1770
1771#if     defined(DOSBBSECTOR) && defined(notyet)
1772        /* overwriting master boot record? */
1773        if (bp->b_blkno + p->p_offset <= DOSBBSECTOR &&
1774            (bp->b_flags & B_READ) == 0 && wlabel == 0) {
1775                bp->b_error = EROFS;
1776                goto bad;
1777        }
1778#endif
1779
1780        /* beyond partition? */
1781        if (bp->b_blkno < 0 || bp->b_blkno + sz > maxsz) {
1782                /* if exactly at end of disk, return an EOF */
1783                if (bp->b_blkno == maxsz) {
1784                        bp->b_resid = bp->b_bcount;
1785                        return(0);
1786                }
1787                /* or truncate if part of it fits */
1788                sz = maxsz - bp->b_blkno;
1789                if (sz <= 0) {
1790                        bp->b_error = EINVAL;
1791                        goto bad;
1792                }
1793                bp->b_bcount = sz << DEV_BSHIFT;
1794        }
1795
1796        /* calculate cylinder for disksort to order transfers with */
1797        bp->b_pblkno = bp->b_blkno + p->p_offset;
1798        bp->b_cylin = bp->b_pblkno / lp->d_secpercyl;
1799        return(1);
1800
1801bad:
1802        bp->b_flags |= B_ERROR;
1803        return(-1);
1804}
1805
1806int
1807disk_externalize(int drive, void *userp, size_t *maxlen)
1808{
1809	if(*maxlen < sizeof drive) {
1810		return ENOMEM;
1811	}
1812
1813	*maxlen -= sizeof drive;
1814	return copyout(&drive, userp, sizeof drive);
1815}
1816