machdep.c revision 9507
1/*-
2 * Copyright (c) 1992 Terrence R. Lambert.
3 * Copyright (c) 1982, 1987, 1990 The Regents of the University of California.
4 * All rights reserved.
5 *
6 * This code is derived from software contributed to Berkeley by
7 * William Jolitz.
8 *
9 * Redistribution and use in source and binary forms, with or without
10 * modification, are permitted provided that the following conditions
11 * are met:
12 * 1. Redistributions of source code must retain the above copyright
13 *    notice, this list of conditions and the following disclaimer.
14 * 2. Redistributions in binary form must reproduce the above copyright
15 *    notice, this list of conditions and the following disclaimer in the
16 *    documentation and/or other materials provided with the distribution.
17 * 3. All advertising materials mentioning features or use of this software
18 *    must display the following acknowledgement:
19 *	This product includes software developed by the University of
20 *	California, Berkeley and its contributors.
21 * 4. Neither the name of the University nor the names of its contributors
22 *    may be used to endorse or promote products derived from this software
23 *    without specific prior written permission.
24 *
25 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
26 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
27 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
28 * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
29 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
30 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
31 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
32 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
33 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
34 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
35 * SUCH DAMAGE.
36 *
37 *	from: @(#)machdep.c	7.4 (Berkeley) 6/3/91
38 *	$Id: machdep.c,v 1.130 1995/06/28 04:46:11 davidg Exp $
39 */
40
41#include "npx.h"
42#include "isa.h"
43
44#include <sys/param.h>
45#include <sys/systm.h>
46#include <sys/signalvar.h>
47#include <sys/kernel.h>
48#include <sys/proc.h>
49#include <sys/user.h>
50#include <sys/buf.h>
51#include <sys/reboot.h>
52#include <sys/conf.h>
53#include <sys/file.h>
54#include <sys/callout.h>
55#include <sys/malloc.h>
56#include <sys/mbuf.h>
57#include <sys/mount.h>
58#include <sys/msgbuf.h>
59#include <sys/ioctl.h>
60#include <sys/sysent.h>
61#include <sys/tty.h>
62#include <sys/sysctl.h>
63#include <sys/devconf.h>
64
65#ifdef SYSVSHM
66#include <sys/shm.h>
67#endif
68
69#ifdef SYSVMSG
70#include <sys/msg.h>
71#endif
72
73#ifdef SYSVSEM
74#include <sys/sem.h>
75#endif
76
77#include <vm/vm.h>
78#include <vm/vm_kern.h>
79#include <vm/vm_page.h>
80#include <vm/vm_pager.h>
81
82#include <sys/exec.h>
83#include <sys/vnode.h>
84
85#include <ddb/ddb.h>
86
87#include <net/netisr.h>
88
89/* XXX correctly declaring all the netisr's is painful. */
90#include <net/if.h>
91#include <net/route.h>
92
93#include <netinet/in.h>
94#include <netinet/in_systm.h>
95#include <netinet/ip.h>
96#include <netinet/if_ether.h>
97#include <netinet/ip_var.h>
98
99#include <netns/ns.h>
100#include <netns/ns_if.h>
101
102#include <netiso/iso.h>
103#include <netiso/iso_var.h>
104
105#include <netccitt/dll.h>
106#include <netccitt/x25.h>
107#include <netccitt/pk.h>
108#include <sys/socketvar.h>
109#include <netccitt/pk_var.h>
110
111#include "ether.h"
112
113#include <machine/cpu.h>
114#include <machine/npx.h>
115#include <machine/reg.h>
116#include <machine/psl.h>
117#include <machine/clock.h>
118#include <machine/specialreg.h>
119#include <machine/sysarch.h>
120#include <machine/cons.h>
121#include <machine/devconf.h>
122#include <machine/bootinfo.h>
123#include <machine/md_var.h>
124
125#include <i386/isa/isa.h>
126#include <i386/isa/isa_device.h>
127#include <i386/isa/rtc.h>
128
129static void identifycpu(void);
130static void initcpu(void);
131
132char machine[] = "i386";
133char cpu_model[128];
134
135struct kern_devconf kdc_cpu0 = {
136	0, 0, 0,		/* filled in by dev_attach */
137	"cpu", 0, { MDDT_CPU },
138	0, 0, 0, CPU_EXTERNALLEN,
139	0,			/* CPU has no parent */
140	0,			/* no parentdata */
141	DC_BUSY,		/* the CPU is always busy */
142	cpu_model,		/* no sense in duplication */
143	DC_CLS_CPU		/* class */
144};
145
146#ifndef PANIC_REBOOT_WAIT_TIME
147#define PANIC_REBOOT_WAIT_TIME 15 /* default to 15 seconds */
148#endif
149
150/*
151 * Declare these as initialized data so we can patch them.
152 */
153int	nswbuf = 0;
154#ifdef	NBUF
155int	nbuf = NBUF;
156#else
157int	nbuf = 0;
158#endif
159
160#ifdef BOUNCE_BUFFERS
161extern char *bouncememory;
162extern int maxbkva;
163#ifdef BOUNCEPAGES
164int	bouncepages = BOUNCEPAGES;
165#else
166int	bouncepages = 0;
167#endif
168#endif	/* BOUNCE_BUFFERS */
169
170extern int freebufspace;
171int	msgbufmapped = 0;		/* set when safe to use msgbuf */
172int _udatasel, _ucodesel;
173
174
175/*
176 * Machine-dependent startup code
177 */
178int boothowto = 0, bootverbose = 0, Maxmem = 0, badpages = 0, physmem = 0;
179long dumplo;
180extern int bootdev;
181int biosmem;
182
183vm_offset_t	phys_avail[6];
184
185int cpu_class;
186
187void dumpsys __P((void));
188void setup_netisrs __P((struct linker_set *)); /* XXX declare elsewhere */
189
190vm_offset_t buffer_sva, buffer_eva;
191vm_offset_t clean_sva, clean_eva;
192vm_offset_t pager_sva, pager_eva;
193extern int pager_map_size;
194extern struct linker_set netisr_set;
195
196#define offsetof(type, member)	((size_t)(&((type *)0)->member))
197
198void
199cpu_startup()
200{
201	register unsigned i;
202	register caddr_t v;
203	vm_offset_t maxaddr;
204	vm_size_t size = 0;
205	int firstaddr;
206	vm_offset_t minaddr;
207
208	if (boothowto & RB_VERBOSE)
209		bootverbose++;
210
211	/*
212	 * Initialize error message buffer (at end of core).
213	 */
214
215	/* avail_end was pre-decremented in init_386() to compensate */
216	for (i = 0; i < btoc(sizeof (struct msgbuf)); i++)
217		pmap_enter(pmap_kernel(), (vm_offset_t)msgbufp,
218			   avail_end + i * NBPG,
219			   VM_PROT_ALL, TRUE);
220	msgbufmapped = 1;
221
222	/*
223	 * Good {morning,afternoon,evening,night}.
224	 */
225	printf(version);
226	startrtclock();
227	identifycpu();
228	printf("real memory  = %d (%d pages)\n", ptoa(physmem), physmem);
229	if (badpages)
230		printf("bad memory   = %d (%d pages)\n", ptoa(badpages), badpages);
231
232	/*
233	 * Quickly wire in netisrs.
234	 */
235	setup_netisrs(&netisr_set);
236
237/*
238#ifdef ISDN
239	DONET(isdnintr, NETISR_ISDN);
240#endif
241*/
242
243	/*
244	 * Allocate space for system data structures.
245	 * The first available kernel virtual address is in "v".
246	 * As pages of kernel virtual memory are allocated, "v" is incremented.
247	 * As pages of memory are allocated and cleared,
248	 * "firstaddr" is incremented.
249	 * An index into the kernel page table corresponding to the
250	 * virtual memory address maintained in "v" is kept in "mapaddr".
251	 */
252
253	/*
254	 * Make two passes.  The first pass calculates how much memory is
255	 * needed and allocates it.  The second pass assigns virtual
256	 * addresses to the various data structures.
257	 */
258	firstaddr = 0;
259again:
260	v = (caddr_t)firstaddr;
261
262#define	valloc(name, type, num) \
263	    (name) = (type *)v; v = (caddr_t)((name)+(num))
264#define	valloclim(name, type, num, lim) \
265	    (name) = (type *)v; v = (caddr_t)((lim) = ((name)+(num)))
266	valloc(callout, struct callout, ncallout);
267#ifdef SYSVSHM
268	valloc(shmsegs, struct shmid_ds, shminfo.shmmni);
269#endif
270#ifdef SYSVSEM
271	valloc(sema, struct semid_ds, seminfo.semmni);
272	valloc(sem, struct sem, seminfo.semmns);
273	/* This is pretty disgusting! */
274	valloc(semu, int, (seminfo.semmnu * seminfo.semusz) / sizeof(int));
275#endif
276#ifdef SYSVMSG
277	valloc(msgpool, char, msginfo.msgmax);
278	valloc(msgmaps, struct msgmap, msginfo.msgseg);
279	valloc(msghdrs, struct msg, msginfo.msgtql);
280	valloc(msqids, struct msqid_ds, msginfo.msgmni);
281#endif
282
283	if (nbuf == 0) {
284		nbuf = 30;
285		if( physmem > 1024)
286			nbuf += min((physmem - 1024) / 12, 1024);
287	}
288	nswbuf = min(nbuf, 128);
289
290	valloc(swbuf, struct buf, nswbuf);
291	valloc(buf, struct buf, nbuf);
292
293#ifdef BOUNCE_BUFFERS
294	/*
295	 * If there is more than 16MB of memory, allocate some bounce buffers
296	 */
297	if (Maxmem > 4096) {
298		if (bouncepages == 0) {
299			bouncepages = 64;
300			bouncepages += ((Maxmem - 4096) / 2048) * 32;
301		}
302		v = (caddr_t)((vm_offset_t)((vm_offset_t)v + PAGE_SIZE - 1) & ~(PAGE_SIZE - 1));
303		valloc(bouncememory, char, bouncepages * PAGE_SIZE);
304	}
305#endif
306
307	/*
308	 * End of first pass, size has been calculated so allocate memory
309	 */
310	if (firstaddr == 0) {
311		size = (vm_size_t)(v - firstaddr);
312		firstaddr = (int)kmem_alloc(kernel_map, round_page(size));
313		if (firstaddr == 0)
314			panic("startup: no room for tables");
315		goto again;
316	}
317
318	/*
319	 * End of second pass, addresses have been assigned
320	 */
321	if ((vm_size_t)(v - firstaddr) != size)
322		panic("startup: table size inconsistency");
323
324#ifdef BOUNCE_BUFFERS
325	clean_map = kmem_suballoc(kernel_map, &clean_sva, &clean_eva,
326			(nbuf*MAXBSIZE) + (nswbuf*MAXPHYS) +
327				maxbkva + pager_map_size, TRUE);
328	io_map = kmem_suballoc(clean_map, &minaddr, &maxaddr, maxbkva, FALSE);
329#else
330	clean_map = kmem_suballoc(kernel_map, &clean_sva, &clean_eva,
331			(nbuf*MAXBSIZE) + (nswbuf*MAXPHYS) + pager_map_size, TRUE);
332#endif
333	buffer_map = kmem_suballoc(clean_map, &buffer_sva, &buffer_eva,
334				(nbuf*MAXBSIZE), TRUE);
335	pager_map = kmem_suballoc(clean_map, &pager_sva, &pager_eva,
336				(nswbuf*MAXPHYS) + pager_map_size, TRUE);
337	exec_map = kmem_suballoc(kernel_map, &minaddr, &maxaddr,
338				(16*ARG_MAX), TRUE);
339	u_map = kmem_suballoc(kernel_map, &minaddr, &maxaddr,
340				(maxproc*UPAGES*PAGE_SIZE), FALSE);
341
342	/*
343	 * Finally, allocate mbuf pool.  Since mclrefcnt is an off-size
344	 * we use the more space efficient malloc in place of kmem_alloc.
345	 */
346	mclrefcnt = (char *)malloc(nmbclusters+CLBYTES/MCLBYTES,
347				   M_MBUF, M_NOWAIT);
348	bzero(mclrefcnt, nmbclusters+CLBYTES/MCLBYTES);
349	mb_map = kmem_suballoc(kmem_map, (vm_offset_t *)&mbutl, &maxaddr,
350			       nmbclusters * MCLBYTES, FALSE);
351	/*
352	 * Initialize callouts
353	 */
354	callfree = callout;
355	for (i = 1; i < ncallout; i++)
356		callout[i-1].c_next = &callout[i];
357
358        if (boothowto & RB_CONFIG)
359		userconfig();
360	printf("avail memory = %d (%d pages)\n", ptoa(cnt.v_free_count), cnt.v_free_count);
361
362#ifdef BOUNCE_BUFFERS
363	/*
364	 * init bounce buffers
365	 */
366	vm_bounce_init();
367#endif
368
369	/*
370	 * Set up CPU-specific registers, cache, etc.
371	 */
372	initcpu();
373
374	/*
375	 * Set up buffers, so they can be used to read disk labels.
376	 */
377	bufinit();
378	vm_pager_bufferinit();
379
380	/*
381	 * Configure the system.
382	 */
383	configure();
384	if (bootverbose) {
385		printf("BIOS Geometries:\n");
386		for (i=0; i < N_BIOS_GEOM; i++) {
387			int j = bootinfo.bi_bios_geom[i];
388			if (j == 0x4f010f)
389				continue;
390			printf(" %x:%08x", i, j);
391			printf(" %d cyl, %d heads, %d sects\n",
392				j >> 16, (j >> 8) & 0xff, j & 0xff);
393
394		}
395		printf(" %d accounted for\n", bootinfo.bi_n_bios_used);
396	}
397}
398
399void
400setup_netisrs(struct linker_set *ls)
401{
402	int i;
403	const struct netisrtab *nit;
404
405	for(i = 0; ls->ls_items[i]; i++) {
406		nit = (const struct netisrtab *)ls->ls_items[i];
407		netisrs[nit->nit_num] = nit->nit_isr;
408	}
409}
410
411struct cpu_nameclass i386_cpus[] = {
412	{ "Intel 80286",	CPUCLASS_286 },		/* CPU_286   */
413	{ "i386SX",		CPUCLASS_386 },		/* CPU_386SX */
414	{ "i386DX",		CPUCLASS_386 },		/* CPU_386   */
415	{ "i486SX",		CPUCLASS_486 },		/* CPU_486SX */
416	{ "i486DX",		CPUCLASS_486 },		/* CPU_486   */
417	{ "Pentium",		CPUCLASS_586 },		/* CPU_586   */
418	{ "Cy486DLC",		CPUCLASS_486 },		/* CPU_486DLC */
419};
420
421static void
422identifycpu()
423{
424	printf("CPU: ");
425	if (cpu >= 0
426	    && cpu < (sizeof i386_cpus/sizeof(struct cpu_nameclass))) {
427		cpu_class = i386_cpus[cpu].cpu_class;
428		strncpy(cpu_model, i386_cpus[cpu].cpu_name, sizeof cpu_model);
429	} else {
430		printf("unknown cpu type %d\n", cpu);
431		panic("startup: bad cpu id");
432	}
433
434#if defined(I586_CPU)
435	if(cpu_class == CPUCLASS_586) {
436		calibrate_cyclecounter();
437		printf("%d-MHz ", pentium_mhz);
438	}
439#endif
440#if defined(I486_CPU) || defined(I586_CPU)
441	if (!strcmp(cpu_vendor,"GenuineIntel")) {
442		if ((cpu_id & 0xf00) > 3) {
443			cpu_model[0] = '\0';
444
445			switch (cpu_id & 0x3000) {
446			case 0x1000:
447				strcpy(cpu_model, "Overdrive ");
448				break;
449			case 0x2000:
450				strcpy(cpu_model, "Dual ");
451				break;
452			}
453			if ((cpu_id & 0xf00) == 0x400) {
454				strcat(cpu_model, "i486 ");
455#if defined(I586_CPU)
456			} else if ((cpu_id & 0xf00) == 0x500) {
457				strcat(cpu_model, "Pentium ");
458#endif
459			} else {
460				strcat(cpu_model, "unknown ");
461			}
462
463			switch (cpu_id & 0xff0) {
464			case 0x400:
465				strcat(cpu_model, "DX"); break;
466			case 0x410:
467				strcat(cpu_model, "DX"); break;
468			case 0x420:
469				strcat(cpu_model, "SX"); break;
470			case 0x430:
471				strcat(cpu_model, "DX2"); break;
472			case 0x440:
473				strcat(cpu_model, "SL"); break;
474			case 0x450:
475				strcat(cpu_model, "SX2"); break;
476			case 0x470:
477				strcat(cpu_model, "DX2 Write-Back Enhanced");
478				break;
479			case 0x480:
480				strcat(cpu_model, "DX4"); break;
481#if defined(I586_CPU)
482			case 0x510:
483				if (pentium_mhz == 60) {
484					strcat(cpu_model, "510\\60");
485				} else if (pentium_mhz == 66) {
486					strcat(cpu_model, "567\\66");
487				} else {
488					strcat(cpu_model,"510\\60 or 567\\66");
489				}
490				break;
491			case 0x520:
492				if (pentium_mhz == 90) {
493					strcat(cpu_model, "735\\90");
494				} else if (pentium_mhz == 100) {
495					strcat(cpu_model, "815\\100");
496				} else {
497					strcat(cpu_model,"735\\90 or 815\\100");
498				}
499				break;
500#endif
501			}
502		}
503	}
504#endif
505	printf("%s (", cpu_model);
506	switch(cpu_class) {
507	case CPUCLASS_286:
508		printf("286");
509		break;
510#if defined(I386_CPU)
511	case CPUCLASS_386:
512		printf("386");
513		break;
514#endif
515#if defined(I486_CPU)
516	case CPUCLASS_486:
517		printf("486");
518		break;
519#endif
520#if defined(I586_CPU)
521	case CPUCLASS_586:
522		printf("Pentium");
523		break;
524#endif
525	default:
526		printf("unknown");	/* will panic below... */
527	}
528	printf("-class CPU)\n");
529#if defined(I486_CPU) || defined(I586_CPU)
530	if(*cpu_vendor)
531		printf("  Origin = \"%s\"",cpu_vendor);
532	if(cpu_id)
533		printf("  Id = 0x%lx",cpu_id);
534
535	if (!strcmp(cpu_vendor, "GenuineIntel")) {
536		printf("  Stepping=%ld", cpu_id & 0xf);
537		if (cpu_high > 0) {
538#define FEATUREFMT "\020\001FPU\002VME\003PSE\004MCE\005CX8\006APIC"
539			printf("\n  Features=0x%b", cpu_feature, FEATUREFMT);
540		}
541	}
542	/* Avoid ugly blank lines: only print newline when we have to. */
543	if (*cpu_vendor || cpu_id)
544		printf("\n");
545#endif
546	/*
547	 * Now that we have told the user what they have,
548	 * let them know if that machine type isn't configured.
549	 */
550	switch (cpu_class) {
551	case CPUCLASS_286:	/* a 286 should not make it this far, anyway */
552#if !defined(I386_CPU) && !defined(I486_CPU) && !defined(I586_CPU)
553#error This kernel is not configured for one of the supported CPUs
554#endif
555#if !defined(I386_CPU)
556	case CPUCLASS_386:
557#endif
558#if !defined(I486_CPU)
559	case CPUCLASS_486:
560#endif
561#if !defined(I586_CPU)
562	case CPUCLASS_586:
563#endif
564		panic("CPU class not configured");
565	default:
566		break;
567	}
568	dev_attach(&kdc_cpu0);
569}
570
571/*
572 * Send an interrupt to process.
573 *
574 * Stack is set up to allow sigcode stored
575 * in u. to call routine, followed by kcall
576 * to sigreturn routine below.  After sigreturn
577 * resets the signal mask, the stack, and the
578 * frame pointer, it returns to the user
579 * specified pc, psl.
580 */
581void
582sendsig(catcher, sig, mask, code)
583	sig_t catcher;
584	int sig, mask;
585	unsigned code;
586{
587	register struct proc *p = curproc;
588	register int *regs;
589	register struct sigframe *fp;
590	struct sigframe sf;
591	struct sigacts *psp = p->p_sigacts;
592	int oonstack;
593
594	regs = p->p_md.md_regs;
595        oonstack = psp->ps_sigstk.ss_flags & SA_ONSTACK;
596	/*
597	 * Allocate and validate space for the signal handler
598	 * context. Note that if the stack is in P0 space, the
599	 * call to grow() is a nop, and the useracc() check
600	 * will fail if the process has not already allocated
601	 * the space with a `brk'.
602	 */
603        if ((psp->ps_flags & SAS_ALTSTACK) &&
604	    (psp->ps_sigstk.ss_flags & SA_ONSTACK) == 0 &&
605	    (psp->ps_sigonstack & sigmask(sig))) {
606		fp = (struct sigframe *)(psp->ps_sigstk.ss_sp +
607		    psp->ps_sigstk.ss_size - sizeof(struct sigframe));
608		psp->ps_sigstk.ss_flags |= SA_ONSTACK;
609	} else {
610		fp = (struct sigframe *)(regs[tESP]
611			- sizeof(struct sigframe));
612	}
613
614	/*
615	 * grow() will return FALSE if the fp will not fit inside the stack
616	 *	and the stack can not be grown. useracc will return FALSE
617	 *	if access is denied.
618	 */
619	if ((grow(p, (int)fp) == FALSE) ||
620	    (useracc((caddr_t)fp, sizeof (struct sigframe), B_WRITE) == FALSE)) {
621		/*
622		 * Process has trashed its stack; give it an illegal
623		 * instruction to halt it in its tracks.
624		 */
625		SIGACTION(p, SIGILL) = SIG_DFL;
626		sig = sigmask(SIGILL);
627		p->p_sigignore &= ~sig;
628		p->p_sigcatch &= ~sig;
629		p->p_sigmask &= ~sig;
630		psignal(p, SIGILL);
631		return;
632	}
633
634	/*
635	 * Build the argument list for the signal handler.
636	 */
637	if (p->p_sysent->sv_sigtbl) {
638		if (sig < p->p_sysent->sv_sigsize)
639			sig = p->p_sysent->sv_sigtbl[sig];
640		else
641			sig = p->p_sysent->sv_sigsize + 1;
642	}
643	sf.sf_signum = sig;
644	sf.sf_code = code;
645	sf.sf_scp = &fp->sf_sc;
646	sf.sf_addr = (char *) regs[tERR];
647	sf.sf_handler = catcher;
648
649	/* save scratch registers */
650	sf.sf_sc.sc_eax = regs[tEAX];
651	sf.sf_sc.sc_ebx = regs[tEBX];
652	sf.sf_sc.sc_ecx = regs[tECX];
653	sf.sf_sc.sc_edx = regs[tEDX];
654	sf.sf_sc.sc_esi = regs[tESI];
655	sf.sf_sc.sc_edi = regs[tEDI];
656	sf.sf_sc.sc_cs = regs[tCS];
657	sf.sf_sc.sc_ds = regs[tDS];
658	sf.sf_sc.sc_ss = regs[tSS];
659	sf.sf_sc.sc_es = regs[tES];
660	sf.sf_sc.sc_isp = regs[tISP];
661
662	/*
663	 * Build the signal context to be used by sigreturn.
664	 */
665	sf.sf_sc.sc_onstack = oonstack;
666	sf.sf_sc.sc_mask = mask;
667	sf.sf_sc.sc_sp = regs[tESP];
668	sf.sf_sc.sc_fp = regs[tEBP];
669	sf.sf_sc.sc_pc = regs[tEIP];
670	sf.sf_sc.sc_ps = regs[tEFLAGS];
671
672	/*
673	 * Copy the sigframe out to the user's stack.
674	 */
675	if (copyout(&sf, fp, sizeof(struct sigframe)) != 0) {
676		/*
677		 * Something is wrong with the stack pointer.
678		 * ...Kill the process.
679		 */
680		sigexit(p, SIGILL);
681	};
682
683	regs[tESP] = (int)fp;
684	regs[tEIP] = (int)((struct pcb *)kstack)->pcb_sigc;
685	regs[tEFLAGS] &= ~PSL_VM;
686	regs[tCS] = _ucodesel;
687	regs[tDS] = _udatasel;
688	regs[tES] = _udatasel;
689	regs[tSS] = _udatasel;
690}
691
692/*
693 * System call to cleanup state after a signal
694 * has been taken.  Reset signal mask and
695 * stack state from context left by sendsig (above).
696 * Return to previous pc and psl as specified by
697 * context left by sendsig. Check carefully to
698 * make sure that the user has not modified the
699 * state to gain improper privileges.
700 */
701struct sigreturn_args {
702	struct sigcontext *sigcntxp;
703};
704
705int
706sigreturn(p, uap, retval)
707	struct proc *p;
708	struct sigreturn_args *uap;
709	int *retval;
710{
711	register struct sigcontext *scp;
712	register struct sigframe *fp;
713	register int *regs = p->p_md.md_regs;
714	int eflags;
715
716	/*
717	 * (XXX old comment) regs[tESP] points to the return address.
718	 * The user scp pointer is above that.
719	 * The return address is faked in the signal trampoline code
720	 * for consistency.
721	 */
722	scp = uap->sigcntxp;
723	fp = (struct sigframe *)
724	     ((caddr_t)scp - offsetof(struct sigframe, sf_sc));
725
726	if (useracc((caddr_t)fp, sizeof (*fp), 0) == 0)
727		return(EINVAL);
728
729	/*
730	 * Don't allow users to change privileged or reserved flags.
731	 */
732#define	EFLAGS_SECURE(ef, oef)	((((ef) ^ (oef)) & ~PSL_USERCHANGE) == 0)
733	eflags = scp->sc_ps;
734	/*
735	 * XXX do allow users to change the privileged flag PSL_RF.  The
736	 * cpu sets PSL_RF in tf_eflags for faults.  Debuggers should
737	 * sometimes set it there too.  tf_eflags is kept in the signal
738	 * context during signal handling and there is no other place
739	 * to remember it, so the PSL_RF bit may be corrupted by the
740	 * signal handler without us knowing.  Corruption of the PSL_RF
741	 * bit at worst causes one more or one less debugger trap, so
742	 * allowing it is fairly harmless.
743	 */
744	if (!EFLAGS_SECURE(eflags & ~PSL_RF, regs[tEFLAGS] & ~PSL_RF)) {
745#ifdef DEBUG
746    		printf("sigreturn: eflags = 0x%x\n", eflags);
747#endif
748    		return(EINVAL);
749	}
750
751	/*
752	 * Don't allow users to load a valid privileged %cs.  Let the
753	 * hardware check for invalid selectors, excess privilege in
754	 * other selectors, invalid %eip's and invalid %esp's.
755	 */
756#define	CS_SECURE(cs)	(ISPL(cs) == SEL_UPL)
757	if (!CS_SECURE(scp->sc_cs)) {
758#ifdef DEBUG
759    		printf("sigreturn: cs = 0x%x\n", scp->sc_cs);
760#endif
761		trapsignal(p, SIGBUS, T_PROTFLT);
762		return(EINVAL);
763	}
764
765	/* restore scratch registers */
766	regs[tEAX] = scp->sc_eax;
767	regs[tEBX] = scp->sc_ebx;
768	regs[tECX] = scp->sc_ecx;
769	regs[tEDX] = scp->sc_edx;
770	regs[tESI] = scp->sc_esi;
771	regs[tEDI] = scp->sc_edi;
772	regs[tCS] = scp->sc_cs;
773	regs[tDS] = scp->sc_ds;
774	regs[tES] = scp->sc_es;
775	regs[tSS] = scp->sc_ss;
776	regs[tISP] = scp->sc_isp;
777
778	if (useracc((caddr_t)scp, sizeof (*scp), 0) == 0)
779		return(EINVAL);
780
781	if (scp->sc_onstack & 01)
782		p->p_sigacts->ps_sigstk.ss_flags |= SA_ONSTACK;
783	else
784		p->p_sigacts->ps_sigstk.ss_flags &= ~SA_ONSTACK;
785	p->p_sigmask = scp->sc_mask &~
786	    (sigmask(SIGKILL)|sigmask(SIGCONT)|sigmask(SIGSTOP));
787	regs[tEBP] = scp->sc_fp;
788	regs[tESP] = scp->sc_sp;
789	regs[tEIP] = scp->sc_pc;
790	regs[tEFLAGS] = eflags;
791	return(EJUSTRETURN);
792}
793
794/*
795 * a simple function to make the system panic (and dump a vmcore)
796 * in a predictable fashion
797 */
798void diediedie()
799{
800	panic("because you said to!");
801}
802
803int	waittime = -1;
804struct pcb dumppcb;
805
806__dead void
807boot(arghowto)
808	int arghowto;
809{
810	register long dummy;		/* r12 is reserved */
811	register int howto;		/* r11 == how to boot */
812	register int devtype;		/* r10 == major of root dev */
813
814	if (cold) {
815		printf("hit reset please");
816		for(;;);
817	}
818	howto = arghowto;
819	if ((howto&RB_NOSYNC) == 0 && waittime < 0) {
820		register struct buf *bp;
821		int iter, nbusy;
822
823		waittime = 0;
824		printf("\nsyncing disks... ");
825
826		sync(&proc0, NULL, NULL);
827
828		for (iter = 0; iter < 20; iter++) {
829			nbusy = 0;
830			for (bp = &buf[nbuf]; --bp >= buf; ) {
831				if ((bp->b_flags & (B_BUSY|B_INVAL)) == B_BUSY) {
832					nbusy++;
833				}
834			}
835			if (nbusy == 0)
836				break;
837			printf("%d ", nbusy);
838			DELAY(40000 * iter);
839		}
840		if (nbusy) {
841			/*
842			 * Failed to sync all blocks. Indicate this and don't
843			 * unmount filesystems (thus forcing an fsck on reboot).
844			 */
845			printf("giving up\n");
846		} else {
847			printf("done\n");
848			/*
849			 * Unmount filesystems
850			 */
851			if (panicstr == 0)
852				vfs_unmountall();
853		}
854		DELAY(100000);			/* wait for console output to finish */
855		dev_shutdownall(FALSE);
856	}
857	splhigh();
858	devtype = major(rootdev);
859	if (howto&RB_HALT) {
860		printf("\n");
861		printf("The operating system has halted.\n");
862		printf("Please press any key to reboot.\n\n");
863		cngetc();
864	} else {
865		if (howto & RB_DUMP) {
866			savectx(&dumppcb, 0);
867			dumppcb.pcb_ptd = rcr3();
868			dumpsys();
869
870			if (PANIC_REBOOT_WAIT_TIME != 0) {
871				if (PANIC_REBOOT_WAIT_TIME != -1) {
872					int loop;
873					printf("Automatic reboot in %d seconds - press a key on the console to abort\n",
874						PANIC_REBOOT_WAIT_TIME);
875					for (loop = PANIC_REBOOT_WAIT_TIME; loop > 0; --loop) {
876						DELAY(1000 * 1000); /* one second */
877						if (cncheckc()) /* Did user type a key? */
878							break;
879					}
880					if (!loop)
881						goto die;
882				}
883			} else { /* zero time specified - reboot NOW */
884				goto die;
885			}
886			printf("--> Press a key on the console to reboot <--\n");
887			cngetc();
888		}
889	}
890#ifdef lint
891	dummy = 0; dummy = dummy;
892	printf("howto %d, devtype %d\n", arghowto, devtype);
893#endif
894die:
895	printf("Rebooting...\n");
896	DELAY(1000000);	/* wait 1 sec for printf's to complete and be read */
897	cpu_reset();
898	for(;;) ;
899	/* NOTREACHED */
900}
901
902unsigned long	dumpmag = 0x8fca0101UL;	/* magic number for savecore */
903int		dumpsize = 0;		/* also for savecore */
904
905int		dodump = 1;
906
907/*
908 * Doadump comes here after turning off memory management and
909 * getting on the dump stack, either when called above, or by
910 * the auto-restart code.
911 */
912void
913dumpsys()
914{
915
916	if (!dodump)
917		return;
918	if (dumpdev == NODEV)
919		return;
920	if ((minor(dumpdev)&07) != 1)
921		return;
922	dumpsize = Maxmem;
923	printf("\ndumping to dev %lx, offset %ld\n", dumpdev, dumplo);
924	printf("dump ");
925	switch ((*bdevsw[major(dumpdev)].d_dump)(dumpdev)) {
926
927	case ENXIO:
928		printf("device bad\n");
929		break;
930
931	case EFAULT:
932		printf("device not ready\n");
933		break;
934
935	case EINVAL:
936		printf("area improper\n");
937		break;
938
939	case EIO:
940		printf("i/o error\n");
941		break;
942
943	case EINTR:
944		printf("aborted from console\n");
945		break;
946
947	default:
948		printf("succeeded\n");
949		break;
950	}
951}
952
953static void
954initcpu()
955{
956}
957
958/*
959 * Clear registers on exec
960 */
961void
962setregs(p, entry, stack)
963	struct proc *p;
964	u_long entry;
965	u_long stack;
966{
967	int *regs = p->p_md.md_regs;
968
969	bzero(regs, sizeof(struct trapframe));
970	regs[tEIP] = entry;
971	regs[tESP] = stack;
972	regs[tEFLAGS] = PSL_USER | (regs[tEFLAGS] & PSL_T);
973	regs[tSS] = _udatasel;
974	regs[tDS] = _udatasel;
975	regs[tES] = _udatasel;
976	regs[tCS] = _ucodesel;
977
978	p->p_addr->u_pcb.pcb_flags = 0;	/* no fp at all */
979	load_cr0(rcr0() | CR0_TS);	/* start emulating */
980#if	NNPX > 0
981	npxinit(__INITIAL_NPXCW__);
982#endif	/* NNPX > 0 */
983}
984
985/*
986 * machine dependent system variables.
987 */
988int
989cpu_sysctl(name, namelen, oldp, oldlenp, newp, newlen, p)
990	int *name;
991	u_int namelen;
992	void *oldp;
993	size_t *oldlenp;
994	void *newp;
995	size_t newlen;
996	struct proc *p;
997{
998	dev_t consdev;
999	int error;
1000
1001	/* all sysctl names at this level are terminal */
1002	if (namelen != 1)
1003		return (ENOTDIR);               /* overloaded */
1004
1005	switch (name[0]) {
1006	case CPU_CONSDEV:
1007		consdev = (cn_tty == NULL ? NODEV : cn_tty->t_dev);
1008		return (sysctl_rdstruct(oldp, oldlenp, newp, &consdev,
1009					sizeof consdev));
1010	case CPU_ADJKERNTZ:
1011		error = sysctl_int(oldp, oldlenp, newp, newlen, &adjkerntz);
1012		if (!error && newp)
1013			resettodr();
1014		return error;
1015	case CPU_DISRTCSET:
1016		return (sysctl_int(oldp, oldlenp, newp,	newlen,	&disable_rtc_set));
1017	default:
1018		return (EOPNOTSUPP);
1019	}
1020	/* NOTREACHED */
1021}
1022
1023/*
1024 * Initialize 386 and configure to run kernel
1025 */
1026
1027/*
1028 * Initialize segments & interrupt table
1029 */
1030
1031int currentldt;
1032int _default_ldt;
1033union descriptor gdt[NGDT];		/* global descriptor table */
1034struct gate_descriptor idt[NIDT];	/* interrupt descriptor table */
1035union descriptor ldt[NLDT];		/* local descriptor table */
1036
1037struct	i386tss	tss, panic_tss;
1038
1039extern  struct user *proc0paddr;
1040
1041/* software prototypes -- in more palatable form */
1042struct soft_segment_descriptor gdt_segs[] = {
1043/* GNULL_SEL	0 Null Descriptor */
1044{	0x0,			/* segment base address  */
1045	0x0,			/* length */
1046	0,			/* segment type */
1047	0,			/* segment descriptor priority level */
1048	0,			/* segment descriptor present */
1049	0, 0,
1050	0,			/* default 32 vs 16 bit size */
1051	0  			/* limit granularity (byte/page units)*/ },
1052/* GCODE_SEL	1 Code Descriptor for kernel */
1053{	0x0,			/* segment base address  */
1054	0xfffff,		/* length - all address space */
1055	SDT_MEMERA,		/* segment type */
1056	0,			/* segment descriptor priority level */
1057	1,			/* segment descriptor present */
1058	0, 0,
1059	1,			/* default 32 vs 16 bit size */
1060	1  			/* limit granularity (byte/page units)*/ },
1061/* GDATA_SEL	2 Data Descriptor for kernel */
1062{	0x0,			/* segment base address  */
1063	0xfffff,		/* length - all address space */
1064	SDT_MEMRWA,		/* segment type */
1065	0,			/* segment descriptor priority level */
1066	1,			/* segment descriptor present */
1067	0, 0,
1068	1,			/* default 32 vs 16 bit size */
1069	1  			/* limit granularity (byte/page units)*/ },
1070/* GLDT_SEL	3 LDT Descriptor */
1071{	(int) ldt,		/* segment base address  */
1072	sizeof(ldt)-1,		/* length - all address space */
1073	SDT_SYSLDT,		/* segment type */
1074	0,			/* segment descriptor priority level */
1075	1,			/* segment descriptor present */
1076	0, 0,
1077	0,			/* unused - default 32 vs 16 bit size */
1078	0  			/* limit granularity (byte/page units)*/ },
1079/* GTGATE_SEL	4 Null Descriptor - Placeholder */
1080{	0x0,			/* segment base address  */
1081	0x0,			/* length - all address space */
1082	0,			/* segment type */
1083	0,			/* segment descriptor priority level */
1084	0,			/* segment descriptor present */
1085	0, 0,
1086	0,			/* default 32 vs 16 bit size */
1087	0  			/* limit granularity (byte/page units)*/ },
1088/* GPANIC_SEL	5 Panic Tss Descriptor */
1089{	(int) &panic_tss,	/* segment base address  */
1090	sizeof(tss)-1,		/* length - all address space */
1091	SDT_SYS386TSS,		/* segment type */
1092	0,			/* segment descriptor priority level */
1093	1,			/* segment descriptor present */
1094	0, 0,
1095	0,			/* unused - default 32 vs 16 bit size */
1096	0  			/* limit granularity (byte/page units)*/ },
1097/* GPROC0_SEL	6 Proc 0 Tss Descriptor */
1098{	(int) kstack,		/* segment base address  */
1099	sizeof(tss)-1,		/* length - all address space */
1100	SDT_SYS386TSS,		/* segment type */
1101	0,			/* segment descriptor priority level */
1102	1,			/* segment descriptor present */
1103	0, 0,
1104	0,			/* unused - default 32 vs 16 bit size */
1105	0  			/* limit granularity (byte/page units)*/ },
1106/* GUSERLDT_SEL	7 User LDT Descriptor per process */
1107{	(int) ldt,		/* segment base address  */
1108	(512 * sizeof(union descriptor)-1),		/* length */
1109	SDT_SYSLDT,		/* segment type */
1110	0,			/* segment descriptor priority level */
1111	1,			/* segment descriptor present */
1112	0, 0,
1113	0,			/* unused - default 32 vs 16 bit size */
1114	0  			/* limit granularity (byte/page units)*/ },
1115/* GAPMCODE32_SEL 8 APM BIOS 32-bit interface (32bit Code) */
1116{	0,			/* segment base address (overwritten by APM)  */
1117	0xfffff,		/* length */
1118	SDT_MEMERA,		/* segment type */
1119	0,			/* segment descriptor priority level */
1120	1,			/* segment descriptor present */
1121	0, 0,
1122	1,			/* default 32 vs 16 bit size */
1123	1  			/* limit granularity (byte/page units)*/ },
1124/* GAPMCODE16_SEL 9 APM BIOS 32-bit interface (16bit Code) */
1125{	0,			/* segment base address (overwritten by APM)  */
1126	0xfffff,		/* length */
1127	SDT_MEMERA,		/* segment type */
1128	0,			/* segment descriptor priority level */
1129	1,			/* segment descriptor present */
1130	0, 0,
1131	0,			/* default 32 vs 16 bit size */
1132	1  			/* limit granularity (byte/page units)*/ },
1133/* GAPMDATA_SEL	10 APM BIOS 32-bit interface (Data) */
1134{	0,			/* segment base address (overwritten by APM) */
1135	0xfffff,		/* length */
1136	SDT_MEMRWA,		/* segment type */
1137	0,			/* segment descriptor priority level */
1138	1,			/* segment descriptor present */
1139	0, 0,
1140	1,			/* default 32 vs 16 bit size */
1141	1  			/* limit granularity (byte/page units)*/ },
1142};
1143
1144struct soft_segment_descriptor ldt_segs[] = {
1145	/* Null Descriptor - overwritten by call gate */
1146{	0x0,			/* segment base address  */
1147	0x0,			/* length - all address space */
1148	0,			/* segment type */
1149	0,			/* segment descriptor priority level */
1150	0,			/* segment descriptor present */
1151	0, 0,
1152	0,			/* default 32 vs 16 bit size */
1153	0  			/* limit granularity (byte/page units)*/ },
1154	/* Null Descriptor - overwritten by call gate */
1155{	0x0,			/* segment base address  */
1156	0x0,			/* length - all address space */
1157	0,			/* segment type */
1158	0,			/* segment descriptor priority level */
1159	0,			/* segment descriptor present */
1160	0, 0,
1161	0,			/* default 32 vs 16 bit size */
1162	0  			/* limit granularity (byte/page units)*/ },
1163	/* Null Descriptor - overwritten by call gate */
1164{	0x0,			/* segment base address  */
1165	0x0,			/* length - all address space */
1166	0,			/* segment type */
1167	0,			/* segment descriptor priority level */
1168	0,			/* segment descriptor present */
1169	0, 0,
1170	0,			/* default 32 vs 16 bit size */
1171	0  			/* limit granularity (byte/page units)*/ },
1172	/* Code Descriptor for user */
1173{	0x0,			/* segment base address  */
1174	0xfffff,		/* length - all address space */
1175	SDT_MEMERA,		/* segment type */
1176	SEL_UPL,		/* segment descriptor priority level */
1177	1,			/* segment descriptor present */
1178	0, 0,
1179	1,			/* default 32 vs 16 bit size */
1180	1  			/* limit granularity (byte/page units)*/ },
1181	/* Data Descriptor for user */
1182{	0x0,			/* segment base address  */
1183	0xfffff,		/* length - all address space */
1184	SDT_MEMRWA,		/* segment type */
1185	SEL_UPL,		/* segment descriptor priority level */
1186	1,			/* segment descriptor present */
1187	0, 0,
1188	1,			/* default 32 vs 16 bit size */
1189	1  			/* limit granularity (byte/page units)*/ },
1190};
1191
1192void
1193setidt(idx, func, typ, dpl)
1194	int idx;
1195	inthand_t *func;
1196	int typ;
1197	int dpl;
1198{
1199	struct gate_descriptor *ip = idt + idx;
1200
1201	ip->gd_looffset = (int)func;
1202	ip->gd_selector = 8;
1203	ip->gd_stkcpy = 0;
1204	ip->gd_xx = 0;
1205	ip->gd_type = typ;
1206	ip->gd_dpl = dpl;
1207	ip->gd_p = 1;
1208	ip->gd_hioffset = ((int)func)>>16 ;
1209}
1210
1211#define	IDTVEC(name)	__CONCAT(X,name)
1212
1213extern inthand_t
1214	IDTVEC(div), IDTVEC(dbg), IDTVEC(nmi), IDTVEC(bpt), IDTVEC(ofl),
1215	IDTVEC(bnd), IDTVEC(ill), IDTVEC(dna), IDTVEC(dble), IDTVEC(fpusegm),
1216	IDTVEC(tss), IDTVEC(missing), IDTVEC(stk), IDTVEC(prot),
1217	IDTVEC(page), IDTVEC(rsvd), IDTVEC(fpu), IDTVEC(align),
1218	IDTVEC(syscall);
1219
1220#ifdef COMPAT_LINUX
1221extern inthand_t
1222	IDTVEC(linux_syscall);
1223#endif
1224
1225void
1226sdtossd(sd, ssd)
1227	struct segment_descriptor *sd;
1228	struct soft_segment_descriptor *ssd;
1229{
1230	ssd->ssd_base  = (sd->sd_hibase << 24) | sd->sd_lobase;
1231	ssd->ssd_limit = (sd->sd_hilimit << 16) | sd->sd_lolimit;
1232	ssd->ssd_type  = sd->sd_type;
1233	ssd->ssd_dpl   = sd->sd_dpl;
1234	ssd->ssd_p     = sd->sd_p;
1235	ssd->ssd_def32 = sd->sd_def32;
1236	ssd->ssd_gran  = sd->sd_gran;
1237}
1238
1239void
1240init386(first)
1241	int first;
1242{
1243	int x;
1244	unsigned biosbasemem, biosextmem;
1245	struct gate_descriptor *gdp;
1246	int gsel_tss;
1247	/* table descriptors - used to load tables by microp */
1248	struct region_descriptor r_gdt, r_idt;
1249	int	pagesinbase, pagesinext;
1250	int	target_page;
1251
1252	proc0.p_addr = proc0paddr;
1253
1254	/*
1255	 * Initialize the console before we print anything out.
1256	 */
1257
1258	cninit ();
1259
1260	/*
1261	 * make gdt memory segments, the code segment goes up to end of the
1262	 * page with etext in it, the data segment goes to the end of
1263	 * the address space
1264	 */
1265	/*
1266	 * XXX text protection is temporarily (?) disabled.  The limit was
1267	 * i386_btop(i386_round_page(etext)) - 1.
1268	 */
1269	gdt_segs[GCODE_SEL].ssd_limit = i386_btop(0) - 1;
1270	gdt_segs[GDATA_SEL].ssd_limit = i386_btop(0) - 1;
1271	for (x = 0; x < NGDT; x++)
1272		ssdtosd(&gdt_segs[x], &gdt[x].sd);
1273
1274	/* make ldt memory segments */
1275	/*
1276	 * The data segment limit must not cover the user area because we
1277	 * don't want the user area to be writable in copyout() etc. (page
1278	 * level protection is lost in kernel mode on 386's).  Also, we
1279	 * don't want the user area to be writable directly (page level
1280	 * protection of the user area is not available on 486's with
1281	 * CR0_WP set, because there is no user-read/kernel-write mode).
1282	 *
1283	 * XXX - VM_MAXUSER_ADDRESS is an end address, not a max.  And it
1284	 * should be spelled ...MAX_USER...
1285	 */
1286#define VM_END_USER_RW_ADDRESS	VM_MAXUSER_ADDRESS
1287	/*
1288	 * The code segment limit has to cover the user area until we move
1289	 * the signal trampoline out of the user area.  This is safe because
1290	 * the code segment cannot be written to directly.
1291	 */
1292#define VM_END_USER_R_ADDRESS	(VM_END_USER_RW_ADDRESS + UPAGES * NBPG)
1293	ldt_segs[LUCODE_SEL].ssd_limit = i386_btop(VM_END_USER_R_ADDRESS) - 1;
1294	ldt_segs[LUDATA_SEL].ssd_limit = i386_btop(VM_END_USER_RW_ADDRESS) - 1;
1295	/* Note. eventually want private ldts per process */
1296	for (x = 0; x < NLDT; x++)
1297		ssdtosd(&ldt_segs[x], &ldt[x].sd);
1298
1299	/* exceptions */
1300	for (x = 0; x < NIDT; x++)
1301		setidt(x, &IDTVEC(rsvd), SDT_SYS386TGT, SEL_KPL);
1302	setidt(0, &IDTVEC(div),  SDT_SYS386TGT, SEL_KPL);
1303	setidt(1, &IDTVEC(dbg),  SDT_SYS386TGT, SEL_KPL);
1304	setidt(2, &IDTVEC(nmi),  SDT_SYS386TGT, SEL_KPL);
1305 	setidt(3, &IDTVEC(bpt),  SDT_SYS386TGT, SEL_UPL);
1306	setidt(4, &IDTVEC(ofl),  SDT_SYS386TGT, SEL_UPL);
1307	setidt(5, &IDTVEC(bnd),  SDT_SYS386TGT, SEL_KPL);
1308	setidt(6, &IDTVEC(ill),  SDT_SYS386TGT, SEL_KPL);
1309	setidt(7, &IDTVEC(dna),  SDT_SYS386TGT, SEL_KPL);
1310	setidt(8, &IDTVEC(dble),  SDT_SYS386TGT, SEL_KPL);
1311	setidt(9, &IDTVEC(fpusegm),  SDT_SYS386TGT, SEL_KPL);
1312	setidt(10, &IDTVEC(tss),  SDT_SYS386TGT, SEL_KPL);
1313	setidt(11, &IDTVEC(missing),  SDT_SYS386TGT, SEL_KPL);
1314	setidt(12, &IDTVEC(stk),  SDT_SYS386TGT, SEL_KPL);
1315	setidt(13, &IDTVEC(prot),  SDT_SYS386TGT, SEL_KPL);
1316	setidt(14, &IDTVEC(page),  SDT_SYS386TGT, SEL_KPL);
1317	setidt(15, &IDTVEC(rsvd),  SDT_SYS386TGT, SEL_KPL);
1318	setidt(16, &IDTVEC(fpu),  SDT_SYS386TGT, SEL_KPL);
1319	setidt(17, &IDTVEC(align), SDT_SYS386TGT, SEL_KPL);
1320#ifdef COMPAT_LINUX
1321 	setidt(0x80, &IDTVEC(linux_syscall),  SDT_SYS386TGT, SEL_UPL);
1322#endif
1323
1324#include	"isa.h"
1325#if	NISA >0
1326	isa_defaultirq();
1327#endif
1328
1329	r_gdt.rd_limit = sizeof(gdt) - 1;
1330	r_gdt.rd_base =  (int) gdt;
1331	lgdt(&r_gdt);
1332
1333	r_idt.rd_limit = sizeof(idt) - 1;
1334	r_idt.rd_base = (int) idt;
1335	lidt(&r_idt);
1336
1337	_default_ldt = GSEL(GLDT_SEL, SEL_KPL);
1338	lldt(_default_ldt);
1339	currentldt = _default_ldt;
1340
1341#ifdef DDB
1342	kdb_init();
1343	if (boothowto & RB_KDB)
1344		Debugger("Boot flags requested debugger");
1345#endif
1346
1347	/* Use BIOS values stored in RTC CMOS RAM, since probing
1348	 * breaks certain 386 AT relics.
1349	 */
1350	biosbasemem = rtcin(RTC_BASELO)+ (rtcin(RTC_BASEHI)<<8);
1351	biosextmem = rtcin(RTC_EXTLO)+ (rtcin(RTC_EXTHI)<<8);
1352
1353	/*
1354	 * Print a warning if the official BIOS interface disagrees
1355	 * with the hackish interface used above.  Eventually only
1356	 * the official interface should be used.
1357	 */
1358	if (bootinfo.bi_memsizes_valid) {
1359		if (bootinfo.bi_basemem != biosbasemem)
1360			printf("BIOS basemem (%ldK) != RTC basemem (%dK)\n",
1361			       bootinfo.bi_basemem, biosbasemem);
1362		if (bootinfo.bi_extmem != biosextmem)
1363			printf("BIOS extmem (%ldK) != RTC extmem (%dK)\n",
1364			       bootinfo.bi_extmem, biosextmem);
1365	}
1366
1367	/*
1368	 * If BIOS tells us that it has more than 640k in the basemem,
1369	 *	don't believe it - set it to 640k.
1370	 */
1371	if (biosbasemem > 640)
1372		biosbasemem = 640;
1373
1374	/*
1375	 * Some 386 machines might give us a bogus number for extended
1376	 *	mem. If this happens, stop now.
1377	 */
1378#ifndef LARGEMEM
1379	if (biosextmem > 65536) {
1380		panic("extended memory beyond limit of 64MB");
1381		/* NOTREACHED */
1382	}
1383#endif
1384
1385	pagesinbase = biosbasemem * 1024 / NBPG;
1386	pagesinext = biosextmem * 1024 / NBPG;
1387
1388	/*
1389	 * Special hack for chipsets that still remap the 384k hole when
1390	 *	there's 16MB of memory - this really confuses people that
1391	 *	are trying to use bus mastering ISA controllers with the
1392	 *	"16MB limit"; they only have 16MB, but the remapping puts
1393	 *	them beyond the limit.
1394	 */
1395	/*
1396	 * If extended memory is between 15-16MB (16-17MB phys address range),
1397	 *	chop it to 15MB.
1398	 */
1399	if ((pagesinext > 3840) && (pagesinext < 4096))
1400		pagesinext = 3840;
1401
1402	/*
1403	 * Maxmem isn't the "maximum memory", it's one larger than the
1404	 * highest page of of the physical address space. It should be
1405	 * called something like "Maxphyspage".
1406	 */
1407	Maxmem = pagesinext + 0x100000/PAGE_SIZE;
1408
1409#ifdef MAXMEM
1410	Maxmem = MAXMEM/4;
1411#endif
1412	/*
1413	 * Calculate number of physical pages, but account for Maxmem
1414	 *	adjustment above.
1415	 */
1416	physmem = pagesinbase + Maxmem - 0x100000/PAGE_SIZE;
1417
1418	/* call pmap initialization to make new kernel address space */
1419	pmap_bootstrap (first, 0);
1420
1421	/*
1422	 * Do a quick, non-destructive check over extended memory to verify
1423	 * what the BIOS tells us agrees with reality. Adjust down Maxmem
1424	 * if we find that the page can't be correctly written to/read from.
1425	 */
1426
1427	for (target_page = Maxmem - 1; target_page >= atop(first); target_page--) {
1428		int tmp;
1429
1430		/*
1431		 * map page into kernel: valid, read/write, non-cacheable
1432		 */
1433		*(int *)CMAP1 = PG_V | PG_KW | PG_N | ptoa(target_page);
1434		pmap_update();
1435
1436		tmp = *(int *)CADDR1;
1437		/*
1438		 * Test for alternating 1's and 0's
1439		 */
1440		*(int *)CADDR1 = 0xaaaaaaaa;
1441		if (*(int *)CADDR1 != 0xaaaaaaaa) {
1442			Maxmem = target_page;
1443			badpages++;
1444			continue;
1445		}
1446		/*
1447		 * Test for alternating 0's and 1's
1448		 */
1449		*(int *)CADDR1 = 0x55555555;
1450		if (*(int *)CADDR1 != 0x55555555) {
1451			Maxmem = target_page;
1452			badpages++;
1453			continue;
1454		}
1455		/*
1456		 * Test for all 1's
1457		 */
1458		*(int *)CADDR1 = 0xffffffff;
1459		if (*(int *)CADDR1 != 0xffffffff) {
1460			Maxmem = target_page;
1461			badpages++;
1462			continue;
1463		}
1464		/*
1465		 * Test for all 0's
1466		 */
1467		*(int *)CADDR1 = 0x0;
1468		if (*(int *)CADDR1 != 0x0) {
1469			/*
1470			 * test of page failed
1471			 */
1472			Maxmem = target_page;
1473			badpages++;
1474			continue;
1475		}
1476		*(int *)CADDR1 = tmp;
1477	}
1478	if (badpages != 0)
1479		printf("WARNING: BIOS extended memory size and reality don't agree.\n");
1480
1481	*(int *)CMAP1 = 0;
1482	pmap_update();
1483
1484	avail_end = (Maxmem << PAGE_SHIFT)
1485		    - i386_round_page(sizeof(struct msgbuf));
1486
1487	/*
1488	 * Initialize pointers to the two chunks of memory; for use
1489	 *	later in vm_page_startup.
1490	 */
1491	/* avail_start is initialized in pmap_bootstrap */
1492	x = 0;
1493	if (pagesinbase > 1) {
1494		phys_avail[x++] = NBPG;		/* skip first page of memory */
1495		phys_avail[x++] = pagesinbase * NBPG;	/* memory up to the ISA hole */
1496	}
1497	phys_avail[x++] = avail_start;	/* memory up to the end */
1498	phys_avail[x++] = avail_end;
1499	phys_avail[x++] = 0;		/* no more chunks */
1500	phys_avail[x++] = 0;
1501
1502	/* now running on new page tables, configured,and u/iom is accessible */
1503
1504	/* make a initial tss so microp can get interrupt stack on syscall! */
1505	proc0.p_addr->u_pcb.pcb_tss.tss_esp0 = (int) kstack + UPAGES*NBPG;
1506	proc0.p_addr->u_pcb.pcb_tss.tss_ss0 = GSEL(GDATA_SEL, SEL_KPL) ;
1507	gsel_tss = GSEL(GPROC0_SEL, SEL_KPL);
1508
1509	((struct i386tss *)gdt_segs[GPROC0_SEL].ssd_base)->tss_ioopt =
1510		(sizeof(tss))<<16;
1511
1512	ltr(gsel_tss);
1513
1514	/* make a call gate to reenter kernel with */
1515	gdp = &ldt[LSYS5CALLS_SEL].gd;
1516
1517	x = (int) &IDTVEC(syscall);
1518	gdp->gd_looffset = x++;
1519	gdp->gd_selector = GSEL(GCODE_SEL,SEL_KPL);
1520	gdp->gd_stkcpy = 1;
1521	gdp->gd_type = SDT_SYS386CGT;
1522	gdp->gd_dpl = SEL_UPL;
1523	gdp->gd_p = 1;
1524	gdp->gd_hioffset = ((int) &IDTVEC(syscall)) >>16;
1525
1526	/* transfer to user mode */
1527
1528	_ucodesel = LSEL(LUCODE_SEL, SEL_UPL);
1529	_udatasel = LSEL(LUDATA_SEL, SEL_UPL);
1530
1531	/* setup proc 0's pcb */
1532	bcopy(&sigcode, proc0.p_addr->u_pcb.pcb_sigc, szsigcode);
1533	proc0.p_addr->u_pcb.pcb_flags = 0;
1534	proc0.p_addr->u_pcb.pcb_ptd = IdlePTD;
1535}
1536
1537/*
1538 * The registers are in the frame; the frame is in the user area of
1539 * the process in question; when the process is active, the registers
1540 * are in "the kernel stack"; when it's not, they're still there, but
1541 * things get flipped around.  So, since p->p_md.md_regs is the whole address
1542 * of the register set, take its offset from the kernel stack, and
1543 * index into the user block.  Don't you just *love* virtual memory?
1544 * (I'm starting to think seymour is right...)
1545 */
1546#define	TF_REGP(p)	((struct trapframe *) \
1547			 ((char *)(p)->p_addr \
1548			  + ((char *)(p)->p_md.md_regs - kstack)))
1549
1550int
1551ptrace_set_pc(p, addr)
1552	struct proc *p;
1553	unsigned int addr;
1554{
1555	TF_REGP(p)->tf_eip = addr;
1556	return (0);
1557}
1558
1559int
1560ptrace_single_step(p)
1561	struct proc *p;
1562{
1563	TF_REGP(p)->tf_eflags |= PSL_T;
1564	return (0);
1565}
1566
1567int
1568ptrace_getregs(p, addr)
1569	struct proc *p;
1570	unsigned int *addr;
1571{
1572	int error;
1573	struct reg regs;
1574
1575	error = fill_regs(p, &regs);
1576	if (error)
1577		return (error);
1578	return (copyout(&regs, addr, sizeof regs));
1579}
1580
1581int
1582ptrace_setregs(p, addr)
1583	struct proc *p;
1584	unsigned int *addr;
1585{
1586	int error;
1587	struct reg regs;
1588
1589	error = copyin(addr, &regs, sizeof regs);
1590	if (error)
1591		return (error);
1592	return (set_regs(p, &regs));
1593}
1594
1595int ptrace_write_u(p, off, data)
1596	struct proc *p;
1597	vm_offset_t off;
1598	int data;
1599{
1600	struct trapframe frame_copy;
1601	vm_offset_t min;
1602	struct trapframe *tp;
1603
1604	/*
1605	 * Privileged kernel state is scattered all over the user area.
1606	 * Only allow write access to parts of regs and to fpregs.
1607	 */
1608	min = (char *)p->p_md.md_regs - kstack;
1609	if (off >= min && off <= min + sizeof(struct trapframe) - sizeof(int)) {
1610		tp = TF_REGP(p);
1611		frame_copy = *tp;
1612		*(int *)((char *)&frame_copy + (off - min)) = data;
1613		if (!EFLAGS_SECURE(frame_copy.tf_eflags, tp->tf_eflags) ||
1614		    !CS_SECURE(frame_copy.tf_cs))
1615			return (EINVAL);
1616		*(int*)((char *)p->p_addr + off) = data;
1617		return (0);
1618	}
1619	min = offsetof(struct user, u_pcb) + offsetof(struct pcb, pcb_savefpu);
1620	if (off >= min && off <= min + sizeof(struct save87) - sizeof(int)) {
1621		*(int*)((char *)p->p_addr + off) = data;
1622		return (0);
1623	}
1624	return (EFAULT);
1625}
1626
1627int
1628fill_regs(p, regs)
1629	struct proc *p;
1630	struct reg *regs;
1631{
1632	struct trapframe *tp;
1633
1634	tp = TF_REGP(p);
1635	regs->r_es = tp->tf_es;
1636	regs->r_ds = tp->tf_ds;
1637	regs->r_edi = tp->tf_edi;
1638	regs->r_esi = tp->tf_esi;
1639	regs->r_ebp = tp->tf_ebp;
1640	regs->r_ebx = tp->tf_ebx;
1641	regs->r_edx = tp->tf_edx;
1642	regs->r_ecx = tp->tf_ecx;
1643	regs->r_eax = tp->tf_eax;
1644	regs->r_eip = tp->tf_eip;
1645	regs->r_cs = tp->tf_cs;
1646	regs->r_eflags = tp->tf_eflags;
1647	regs->r_esp = tp->tf_esp;
1648	regs->r_ss = tp->tf_ss;
1649	return (0);
1650}
1651
1652int
1653set_regs(p, regs)
1654	struct proc *p;
1655	struct reg *regs;
1656{
1657	struct trapframe *tp;
1658
1659	tp = TF_REGP(p);
1660	if (!EFLAGS_SECURE(regs->r_eflags, tp->tf_eflags) ||
1661	    !CS_SECURE(regs->r_cs))
1662		return (EINVAL);
1663	tp->tf_es = regs->r_es;
1664	tp->tf_ds = regs->r_ds;
1665	tp->tf_edi = regs->r_edi;
1666	tp->tf_esi = regs->r_esi;
1667	tp->tf_ebp = regs->r_ebp;
1668	tp->tf_ebx = regs->r_ebx;
1669	tp->tf_edx = regs->r_edx;
1670	tp->tf_ecx = regs->r_ecx;
1671	tp->tf_eax = regs->r_eax;
1672	tp->tf_eip = regs->r_eip;
1673	tp->tf_cs = regs->r_cs;
1674	tp->tf_eflags = regs->r_eflags;
1675	tp->tf_esp = regs->r_esp;
1676	tp->tf_ss = regs->r_ss;
1677	return (0);
1678}
1679
1680#ifndef DDB
1681void
1682Debugger(const char *msg)
1683{
1684	printf("Debugger(\"%s\") called.\n", msg);
1685}
1686#endif /* no DDB */
1687
1688#include <sys/disklabel.h>
1689#define b_cylin	b_resid
1690/*
1691 * Determine the size of the transfer, and make sure it is
1692 * within the boundaries of the partition. Adjust transfer
1693 * if needed, and signal errors or early completion.
1694 */
1695int
1696bounds_check_with_label(struct buf *bp, struct disklabel *lp, int wlabel)
1697{
1698        struct partition *p = lp->d_partitions + dkpart(bp->b_dev);
1699        int labelsect = lp->d_partitions[0].p_offset;
1700        int maxsz = p->p_size,
1701                sz = (bp->b_bcount + DEV_BSIZE - 1) >> DEV_BSHIFT;
1702
1703        /* overwriting disk label ? */
1704        /* XXX should also protect bootstrap in first 8K */
1705        if (bp->b_blkno + p->p_offset <= LABELSECTOR + labelsect &&
1706#if LABELSECTOR != 0
1707            bp->b_blkno + p->p_offset + sz > LABELSECTOR + labelsect &&
1708#endif
1709            (bp->b_flags & B_READ) == 0 && wlabel == 0) {
1710                bp->b_error = EROFS;
1711                goto bad;
1712        }
1713
1714#if     defined(DOSBBSECTOR) && defined(notyet)
1715        /* overwriting master boot record? */
1716        if (bp->b_blkno + p->p_offset <= DOSBBSECTOR &&
1717            (bp->b_flags & B_READ) == 0 && wlabel == 0) {
1718                bp->b_error = EROFS;
1719                goto bad;
1720        }
1721#endif
1722
1723        /* beyond partition? */
1724        if (bp->b_blkno < 0 || bp->b_blkno + sz > maxsz) {
1725                /* if exactly at end of disk, return an EOF */
1726                if (bp->b_blkno == maxsz) {
1727                        bp->b_resid = bp->b_bcount;
1728                        return(0);
1729                }
1730                /* or truncate if part of it fits */
1731                sz = maxsz - bp->b_blkno;
1732                if (sz <= 0) {
1733                        bp->b_error = EINVAL;
1734                        goto bad;
1735                }
1736                bp->b_bcount = sz << DEV_BSHIFT;
1737        }
1738
1739        /* calculate cylinder for disksort to order transfers with */
1740        bp->b_pblkno = bp->b_blkno + p->p_offset;
1741        bp->b_cylin = bp->b_pblkno / lp->d_secpercyl;
1742        return(1);
1743
1744bad:
1745        bp->b_flags |= B_ERROR;
1746        return(-1);
1747}
1748
1749int
1750disk_externalize(int drive, void *userp, size_t *maxlen)
1751{
1752	if(*maxlen < sizeof drive) {
1753		return ENOMEM;
1754	}
1755
1756	*maxlen -= sizeof drive;
1757	return copyout(&drive, userp, sizeof drive);
1758}
1759