machdep.c revision 7792
1/*-
2 * Copyright (c) 1992 Terrence R. Lambert.
3 * Copyright (c) 1982, 1987, 1990 The Regents of the University of California.
4 * All rights reserved.
5 *
6 * This code is derived from software contributed to Berkeley by
7 * William Jolitz.
8 *
9 * Redistribution and use in source and binary forms, with or without
10 * modification, are permitted provided that the following conditions
11 * are met:
12 * 1. Redistributions of source code must retain the above copyright
13 *    notice, this list of conditions and the following disclaimer.
14 * 2. Redistributions in binary form must reproduce the above copyright
15 *    notice, this list of conditions and the following disclaimer in the
16 *    documentation and/or other materials provided with the distribution.
17 * 3. All advertising materials mentioning features or use of this software
18 *    must display the following acknowledgement:
19 *	This product includes software developed by the University of
20 *	California, Berkeley and its contributors.
21 * 4. Neither the name of the University nor the names of its contributors
22 *    may be used to endorse or promote products derived from this software
23 *    without specific prior written permission.
24 *
25 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
26 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
27 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
28 * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
29 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
30 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
31 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
32 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
33 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
34 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
35 * SUCH DAMAGE.
36 *
37 *	from: @(#)machdep.c	7.4 (Berkeley) 6/3/91
38 *	$Id: machdep.c,v 1.117 1995/04/12 20:47:28 wollman Exp $
39 */
40
41#include "npx.h"
42#include "isa.h"
43
44#include <sys/param.h>
45#include <sys/systm.h>
46#include <sys/signalvar.h>
47#include <sys/kernel.h>
48#include <sys/proc.h>
49#include <sys/user.h>
50#include <sys/buf.h>
51#include <sys/reboot.h>
52#include <sys/conf.h>
53#include <sys/file.h>
54#include <sys/callout.h>
55#include <sys/malloc.h>
56#include <sys/mbuf.h>
57#include <sys/mount.h>
58#include <sys/msgbuf.h>
59#include <sys/ioctl.h>
60#include <sys/sysent.h>
61#include <sys/tty.h>
62#include <sys/sysctl.h>
63#include <sys/devconf.h>
64
65#ifdef SYSVSHM
66#include <sys/shm.h>
67#endif
68
69#ifdef SYSVMSG
70#include <sys/msg.h>
71#endif
72
73#ifdef SYSVSEM
74#include <sys/sem.h>
75#endif
76
77#include <vm/vm.h>
78#include <vm/vm_kern.h>
79#include <vm/vm_page.h>
80
81#include <sys/exec.h>
82#include <sys/vnode.h>
83
84#include <ddb/ddb.h>
85
86#include <net/netisr.h>
87
88/* XXX correctly declaring all the netisr's is painful. */
89#include <net/if.h>
90#include <net/route.h>
91
92#include <netinet/in.h>
93#include <netinet/in_systm.h>
94#include <netinet/ip.h>
95#include <netinet/if_ether.h>
96#include <netinet/ip_var.h>
97
98#include <netns/ns.h>
99#include <netns/ns_if.h>
100
101#include <netiso/iso.h>
102#include <netiso/iso_var.h>
103
104#include <netccitt/dll.h>
105#include <netccitt/x25.h>
106#include <netccitt/pk.h>
107#include <sys/socketvar.h>
108#include <netccitt/pk_var.h>
109
110#include "ether.h"
111
112#include <machine/cpu.h>
113#include <machine/npx.h>
114#include <machine/reg.h>
115#include <machine/psl.h>
116#include <machine/clock.h>
117#include <machine/specialreg.h>
118#include <machine/sysarch.h>
119#include <machine/cons.h>
120#include <machine/devconf.h>
121#include <machine/bootinfo.h>
122#include <machine/md_var.h>
123
124#include <i386/isa/isa.h>
125#include <i386/isa/isa_device.h>
126#include <i386/isa/rtc.h>
127
128static void identifycpu(void);
129static void initcpu(void);
130
131char machine[] = "i386";
132char cpu_model[128];
133
134struct kern_devconf kdc_cpu0 = {
135	0, 0, 0,		/* filled in by dev_attach */
136	"cpu", 0, { MDDT_CPU },
137	0, 0, 0, CPU_EXTERNALLEN,
138	0,			/* CPU has no parent */
139	0,			/* no parentdata */
140	DC_BUSY,		/* the CPU is always busy */
141	cpu_model,		/* no sense in duplication */
142	DC_CLS_CPU		/* class */
143};
144
145#ifndef PANIC_REBOOT_WAIT_TIME
146#define PANIC_REBOOT_WAIT_TIME 15 /* default to 15 seconds */
147#endif
148
149/*
150 * Declare these as initialized data so we can patch them.
151 */
152int	nswbuf = 0;
153#ifdef	NBUF
154int	nbuf = NBUF;
155#else
156int	nbuf = 0;
157#endif
158
159#ifdef BOUNCE_BUFFERS
160extern char *bouncememory;
161extern int maxbkva;
162#ifdef BOUNCEPAGES
163int	bouncepages = BOUNCEPAGES;
164#else
165int	bouncepages = 0;
166#endif
167#endif	/* BOUNCE_BUFFERS */
168
169extern int freebufspace;
170int	msgbufmapped = 0;		/* set when safe to use msgbuf */
171int _udatasel, _ucodesel;
172
173
174/*
175 * Machine-dependent startup code
176 */
177int boothowto = 0, bootverbose = 0, Maxmem = 0, badpages = 0, physmem = 0;
178long dumplo;
179extern int bootdev;
180int biosmem;
181
182vm_offset_t	phys_avail[6];
183
184int cpu_class;
185
186void dumpsys __P((void));
187vm_offset_t buffer_sva, buffer_eva;
188vm_offset_t clean_sva, clean_eva;
189vm_offset_t pager_sva, pager_eva;
190extern int pager_map_size;
191
192#define offsetof(type, member)	((size_t)(&((type *)0)->member))
193
194void
195cpu_startup()
196{
197	register unsigned i;
198	register caddr_t v;
199	vm_offset_t maxaddr;
200	vm_size_t size = 0;
201	int firstaddr;
202	vm_offset_t minaddr;
203
204	if (boothowto & RB_VERBOSE)
205		bootverbose++;
206
207	/*
208	 * Initialize error message buffer (at end of core).
209	 */
210
211	/* avail_end was pre-decremented in init_386() to compensate */
212	for (i = 0; i < btoc(sizeof (struct msgbuf)); i++)
213		pmap_enter(pmap_kernel(), (vm_offset_t)msgbufp,
214			   avail_end + i * NBPG,
215			   VM_PROT_ALL, TRUE);
216	msgbufmapped = 1;
217
218	/*
219	 * Good {morning,afternoon,evening,night}.
220	 */
221	printf(version);
222	startrtclock();
223	identifycpu();
224	printf("real memory  = %d (%d pages)\n", ptoa(physmem), physmem);
225	if (badpages)
226		printf("bad memory   = %d (%d pages)\n", ptoa(badpages), badpages);
227
228	/*
229	 * Quickly wire in netisrs.
230	 */
231#define DONET(isr, n) do { netisrs[n] = isr; } while(0)
232#ifdef INET
233#if NETHER > 0
234	DONET(arpintr, NETISR_ARP);
235#endif
236	DONET(ipintr, NETISR_IP);
237#endif
238#ifdef NS
239	DONET(nsintr, NETISR_NS);
240#endif
241#ifdef ISO
242	DONET(clnlintr, NETISR_ISO);
243#endif
244#ifdef CCITT
245	DONET(ccittintr, NETISR_CCITT);
246#endif
247#ifdef ISDN
248	DONET(isdnintr, NETISR_ISDN);
249#endif
250#undef DONET
251
252	/*
253	 * Allocate space for system data structures.
254	 * The first available kernel virtual address is in "v".
255	 * As pages of kernel virtual memory are allocated, "v" is incremented.
256	 * As pages of memory are allocated and cleared,
257	 * "firstaddr" is incremented.
258	 * An index into the kernel page table corresponding to the
259	 * virtual memory address maintained in "v" is kept in "mapaddr".
260	 */
261
262	/*
263	 * Make two passes.  The first pass calculates how much memory is
264	 * needed and allocates it.  The second pass assigns virtual
265	 * addresses to the various data structures.
266	 */
267	firstaddr = 0;
268again:
269	v = (caddr_t)firstaddr;
270
271#define	valloc(name, type, num) \
272	    (name) = (type *)v; v = (caddr_t)((name)+(num))
273#define	valloclim(name, type, num, lim) \
274	    (name) = (type *)v; v = (caddr_t)((lim) = ((name)+(num)))
275	valloc(callout, struct callout, ncallout);
276#ifdef SYSVSHM
277	valloc(shmsegs, struct shmid_ds, shminfo.shmmni);
278#endif
279#ifdef SYSVSEM
280	valloc(sema, struct semid_ds, seminfo.semmni);
281	valloc(sem, struct sem, seminfo.semmns);
282	/* This is pretty disgusting! */
283	valloc(semu, int, (seminfo.semmnu * seminfo.semusz) / sizeof(int));
284#endif
285#ifdef SYSVMSG
286	valloc(msgpool, char, msginfo.msgmax);
287	valloc(msgmaps, struct msgmap, msginfo.msgseg);
288	valloc(msghdrs, struct msg, msginfo.msgtql);
289	valloc(msqids, struct msqid_ds, msginfo.msgmni);
290#endif
291
292	if (nbuf == 0) {
293		nbuf = 30;
294		if( physmem > 1024)
295			nbuf += min((physmem - 1024) / 12, 1024);
296	}
297	nswbuf = min(nbuf, 128);
298
299	valloc(swbuf, struct buf, nswbuf);
300	valloc(buf, struct buf, nbuf);
301
302#ifdef BOUNCE_BUFFERS
303	/*
304	 * If there is more than 16MB of memory, allocate some bounce buffers
305	 */
306	if (Maxmem > 4096) {
307		if (bouncepages == 0) {
308			bouncepages = 64;
309			bouncepages += ((Maxmem - 4096) / 2048) * 32;
310		}
311		v = (caddr_t)((vm_offset_t)((vm_offset_t)v + PAGE_SIZE - 1) & ~(PAGE_SIZE - 1));
312		valloc(bouncememory, char, bouncepages * PAGE_SIZE);
313	}
314#endif
315
316	/*
317	 * End of first pass, size has been calculated so allocate memory
318	 */
319	if (firstaddr == 0) {
320		size = (vm_size_t)(v - firstaddr);
321		firstaddr = (int)kmem_alloc(kernel_map, round_page(size));
322		if (firstaddr == 0)
323			panic("startup: no room for tables");
324		goto again;
325	}
326
327	/*
328	 * End of second pass, addresses have been assigned
329	 */
330	if ((vm_size_t)(v - firstaddr) != size)
331		panic("startup: table size inconsistency");
332
333#ifdef BOUNCE_BUFFERS
334	clean_map = kmem_suballoc(kernel_map, &clean_sva, &clean_eva,
335			(nbuf*MAXBSIZE) + (nswbuf*MAXPHYS) +
336				maxbkva + pager_map_size, TRUE);
337	io_map = kmem_suballoc(clean_map, &minaddr, &maxaddr, maxbkva, FALSE);
338#else
339	clean_map = kmem_suballoc(kernel_map, &clean_sva, &clean_eva,
340			(nbuf*MAXBSIZE) + (nswbuf*MAXPHYS) + pager_map_size, TRUE);
341#endif
342	buffer_map = kmem_suballoc(clean_map, &buffer_sva, &buffer_eva,
343				(nbuf*MAXBSIZE), TRUE);
344	pager_map = kmem_suballoc(clean_map, &pager_sva, &pager_eva,
345				(nswbuf*MAXPHYS) + pager_map_size, TRUE);
346	exec_map = kmem_suballoc(kernel_map, &minaddr, &maxaddr,
347				(16*ARG_MAX), TRUE);
348	u_map = kmem_suballoc(kernel_map, &minaddr, &maxaddr,
349				(maxproc*UPAGES*PAGE_SIZE), FALSE);
350
351	/*
352	 * Finally, allocate mbuf pool.  Since mclrefcnt is an off-size
353	 * we use the more space efficient malloc in place of kmem_alloc.
354	 */
355	mclrefcnt = (char *)malloc(NMBCLUSTERS+CLBYTES/MCLBYTES,
356				   M_MBUF, M_NOWAIT);
357	bzero(mclrefcnt, NMBCLUSTERS+CLBYTES/MCLBYTES);
358	mb_map = kmem_suballoc(kmem_map, (vm_offset_t *)&mbutl, &maxaddr,
359			       VM_MBUF_SIZE, FALSE);
360	/*
361	 * Initialize callouts
362	 */
363	callfree = callout;
364	for (i = 1; i < ncallout; i++)
365		callout[i-1].c_next = &callout[i];
366
367        if (boothowto & RB_CONFIG)
368		userconfig();
369	printf("avail memory = %d (%d pages)\n", ptoa(cnt.v_free_count), cnt.v_free_count);
370
371#ifdef BOUNCE_BUFFERS
372	/*
373	 * init bounce buffers
374	 */
375	vm_bounce_init();
376#endif
377
378	/*
379	 * Set up CPU-specific registers, cache, etc.
380	 */
381	initcpu();
382
383	/*
384	 * Set up buffers, so they can be used to read disk labels.
385	 */
386	bufinit();
387	vm_pager_bufferinit();
388
389	/*
390	 * Configure the system.
391	 */
392	configure();
393	if (bootverbose) {
394		printf("BIOS Geometries:");
395		for (i=0; i < N_BIOS_GEOM; i++)
396			printf(" %x:%x\n", i, bootinfo.bi_bios_geom[i]);
397		printf(" %d accounted for\n", bootinfo.bi_n_bios_used);
398	}
399}
400
401
402struct cpu_nameclass i386_cpus[] = {
403	{ "Intel 80286",	CPUCLASS_286 },		/* CPU_286   */
404	{ "i386SX",		CPUCLASS_386 },		/* CPU_386SX */
405	{ "i386DX",		CPUCLASS_386 },		/* CPU_386   */
406	{ "i486SX",		CPUCLASS_486 },		/* CPU_486SX */
407	{ "i486DX",		CPUCLASS_486 },		/* CPU_486   */
408	{ "Pentium",		CPUCLASS_586 },		/* CPU_586   */
409	{ "Cy486DLC",		CPUCLASS_486 },		/* CPU_486DLC */
410};
411
412static void
413identifycpu()
414{
415	printf("CPU: ");
416	if (cpu >= 0
417	    && cpu < (sizeof i386_cpus/sizeof(struct cpu_nameclass))) {
418		cpu_class = i386_cpus[cpu].cpu_class;
419		strncpy(cpu_model, i386_cpus[cpu].cpu_name, sizeof cpu_model);
420	} else {
421		printf("unknown cpu type %d\n", cpu);
422		panic("startup: bad cpu id");
423	}
424
425#ifdef I586_CPU
426	if(cpu_class == CPUCLASS_586) {
427		calibrate_cyclecounter();
428		printf("%d-MHz ", pentium_mhz);
429	}
430#endif
431	if (!strcmp(cpu_vendor,"GenuineIntel")) {
432		if ((cpu_id & 0xf00) > 3) {
433			cpu_model[0] = '\0';
434
435			switch (cpu_id & 0x3000) {
436			case 0x1000:
437				strcpy(cpu_model, "Overdrive ");
438				break;
439			case 0x2000:
440				strcpy(cpu_model, "Dual ");
441				break;
442			}
443			if ((cpu_id & 0xf00) == 0x400) {
444				strcat(cpu_model, "i486 ");
445			} else if ((cpu_id & 0xf00) == 0x500) {
446				strcat(cpu_model, "Pentium ");
447			} else {
448				strcat(cpu_model, "unknown ");
449			}
450
451			switch (cpu_id & 0xff0) {
452			case 0x400:
453				strcat(cpu_model, "DX"); break;
454			case 0x410:
455				strcat(cpu_model, "DX"); break;
456			case 0x420:
457				strcat(cpu_model, "SX"); break;
458			case 0x430:
459				strcat(cpu_model, "DX2"); break;
460			case 0x440:
461				strcat(cpu_model, "SL"); break;
462			case 0x450:
463				strcat(cpu_model, "SX2"); break;
464			case 0x470:
465				strcat(cpu_model, "DX2 Write-Back Enhanced");
466				break;
467			case 0x480:
468				strcat(cpu_model, "DX4"); break;
469#ifdef I586_CPU
470			case 0x510:
471				if (pentium_mhz == 60) {
472					strcat(cpu_model, "510\\60");
473				} else if (pentium_mhz == 66) {
474					strcat(cpu_model, "567\\66");
475				} else {
476					strcat(cpu_model,"510\\60 or 567\\66");
477				}
478				break;
479			case 0x520:
480				if (pentium_mhz == 90) {
481					strcat(cpu_model, "735\\90");
482				} else if (pentium_mhz == 100) {
483					strcat(cpu_model, "815\\100");
484				} else {
485					strcat(cpu_model,"735\\90 or 815\\100");
486				}
487				break;
488#endif
489			}
490		}
491	}
492
493	printf("%s (", cpu_model);
494	switch(cpu_class) {
495	case CPUCLASS_286:
496		printf("286");
497		break;
498	case CPUCLASS_386:
499		printf("386");
500		break;
501	case CPUCLASS_486:
502		printf("486");
503		break;
504	case CPUCLASS_586:
505		printf("Pentium");
506		break;
507	default:
508		printf("unknown");	/* will panic below... */
509	}
510	printf("-class CPU)\n");
511	if(*cpu_vendor)
512		printf("  Origin = \"%s\"",cpu_vendor);
513	if(cpu_id)
514		printf("  Id = 0x%lx",cpu_id);
515
516	if (!strcmp(cpu_vendor, "GenuineIntel")) {
517		printf("  Stepping=%d", cpu_id & 0xf);
518		if (cpu_high > 0) {
519#define FEATUREFMT "\020\001FPU\002VME\003PSE\004MCE\005CX8\006APIC"
520			printf("  Features=0x%b", cpu_feature, FEATUREFMT);
521		}
522	}
523	printf("\n");
524
525	/*
526	 * Now that we have told the user what they have,
527	 * let them know if that machine type isn't configured.
528	 */
529	switch (cpu_class) {
530	case CPUCLASS_286:	/* a 286 should not make it this far, anyway */
531#if !defined(I386_CPU) && !defined(I486_CPU) && !defined(I586_CPU)
532#error This kernel is not configured for one of the supported CPUs
533#endif
534#if !defined(I386_CPU)
535	case CPUCLASS_386:
536#endif
537#if !defined(I486_CPU)
538	case CPUCLASS_486:
539#endif
540#if !defined(I586_CPU)
541	case CPUCLASS_586:
542#endif
543		panic("CPU class not configured");
544	default:
545		break;
546	}
547	dev_attach(&kdc_cpu0);
548}
549
550/*
551 * Send an interrupt to process.
552 *
553 * Stack is set up to allow sigcode stored
554 * in u. to call routine, followed by kcall
555 * to sigreturn routine below.  After sigreturn
556 * resets the signal mask, the stack, and the
557 * frame pointer, it returns to the user
558 * specified pc, psl.
559 */
560void
561sendsig(catcher, sig, mask, code)
562	sig_t catcher;
563	int sig, mask;
564	unsigned code;
565{
566	register struct proc *p = curproc;
567	register int *regs;
568	register struct sigframe *fp;
569	struct sigframe sf;
570	struct sigacts *psp = p->p_sigacts;
571	int oonstack;
572
573	regs = p->p_md.md_regs;
574        oonstack = psp->ps_sigstk.ss_flags & SA_ONSTACK;
575	/*
576	 * Allocate and validate space for the signal handler
577	 * context. Note that if the stack is in P0 space, the
578	 * call to grow() is a nop, and the useracc() check
579	 * will fail if the process has not already allocated
580	 * the space with a `brk'.
581	 */
582        if ((psp->ps_flags & SAS_ALTSTACK) &&
583	    (psp->ps_sigstk.ss_flags & SA_ONSTACK) == 0 &&
584	    (psp->ps_sigonstack & sigmask(sig))) {
585		fp = (struct sigframe *)(psp->ps_sigstk.ss_sp +
586		    psp->ps_sigstk.ss_size - sizeof(struct sigframe));
587		psp->ps_sigstk.ss_flags |= SA_ONSTACK;
588	} else {
589		fp = (struct sigframe *)(regs[tESP]
590			- sizeof(struct sigframe));
591	}
592
593	/*
594	 * grow() will return FALSE if the fp will not fit inside the stack
595	 *	and the stack can not be grown. useracc will return FALSE
596	 *	if access is denied.
597	 */
598	if ((grow(p, (int)fp) == FALSE) ||
599	    (useracc((caddr_t)fp, sizeof (struct sigframe), B_WRITE) == FALSE)) {
600		/*
601		 * Process has trashed its stack; give it an illegal
602		 * instruction to halt it in its tracks.
603		 */
604		SIGACTION(p, SIGILL) = SIG_DFL;
605		sig = sigmask(SIGILL);
606		p->p_sigignore &= ~sig;
607		p->p_sigcatch &= ~sig;
608		p->p_sigmask &= ~sig;
609		psignal(p, SIGILL);
610		return;
611	}
612
613	/*
614	 * Build the argument list for the signal handler.
615	 */
616	if (p->p_sysent->sv_sigtbl) {
617		if (sig < p->p_sysent->sv_sigsize)
618			sig = p->p_sysent->sv_sigtbl[sig];
619		else
620			sig = p->p_sysent->sv_sigsize + 1;
621	}
622	sf.sf_signum = sig;
623	sf.sf_code = code;
624	sf.sf_scp = &fp->sf_sc;
625	sf.sf_addr = (char *) regs[tERR];
626	sf.sf_handler = catcher;
627
628	/* save scratch registers */
629	sf.sf_sc.sc_eax = regs[tEAX];
630	sf.sf_sc.sc_ebx = regs[tEBX];
631	sf.sf_sc.sc_ecx = regs[tECX];
632	sf.sf_sc.sc_edx = regs[tEDX];
633	sf.sf_sc.sc_esi = regs[tESI];
634	sf.sf_sc.sc_edi = regs[tEDI];
635	sf.sf_sc.sc_cs = regs[tCS];
636	sf.sf_sc.sc_ds = regs[tDS];
637	sf.sf_sc.sc_ss = regs[tSS];
638	sf.sf_sc.sc_es = regs[tES];
639	sf.sf_sc.sc_isp = regs[tISP];
640
641	/*
642	 * Build the signal context to be used by sigreturn.
643	 */
644	sf.sf_sc.sc_onstack = oonstack;
645	sf.sf_sc.sc_mask = mask;
646	sf.sf_sc.sc_sp = regs[tESP];
647	sf.sf_sc.sc_fp = regs[tEBP];
648	sf.sf_sc.sc_pc = regs[tEIP];
649	sf.sf_sc.sc_ps = regs[tEFLAGS];
650
651	/*
652	 * Copy the sigframe out to the user's stack.
653	 */
654	if (copyout(&sf, fp, sizeof(struct sigframe)) != 0) {
655		/*
656		 * Something is wrong with the stack pointer.
657		 * ...Kill the process.
658		 */
659		sigexit(p, SIGILL);
660	};
661
662	regs[tESP] = (int)fp;
663	regs[tEIP] = (int)((struct pcb *)kstack)->pcb_sigc;
664	regs[tEFLAGS] &= ~PSL_VM;
665	regs[tCS] = _ucodesel;
666	regs[tDS] = _udatasel;
667	regs[tES] = _udatasel;
668	regs[tSS] = _udatasel;
669}
670
671/*
672 * System call to cleanup state after a signal
673 * has been taken.  Reset signal mask and
674 * stack state from context left by sendsig (above).
675 * Return to previous pc and psl as specified by
676 * context left by sendsig. Check carefully to
677 * make sure that the user has not modified the
678 * state to gain improper privileges.
679 */
680struct sigreturn_args {
681	struct sigcontext *sigcntxp;
682};
683
684int
685sigreturn(p, uap, retval)
686	struct proc *p;
687	struct sigreturn_args *uap;
688	int *retval;
689{
690	register struct sigcontext *scp;
691	register struct sigframe *fp;
692	register int *regs = p->p_md.md_regs;
693	int eflags;
694
695	/*
696	 * (XXX old comment) regs[tESP] points to the return address.
697	 * The user scp pointer is above that.
698	 * The return address is faked in the signal trampoline code
699	 * for consistency.
700	 */
701	scp = uap->sigcntxp;
702	fp = (struct sigframe *)
703	     ((caddr_t)scp - offsetof(struct sigframe, sf_sc));
704
705	if (useracc((caddr_t)fp, sizeof (*fp), 0) == 0)
706		return(EINVAL);
707
708	/*
709	 * Don't allow users to change privileged or reserved flags.
710	 */
711#define	EFLAGS_SECURE(ef, oef)	((((ef) ^ (oef)) & ~PSL_USERCHANGE) == 0)
712	eflags = scp->sc_ps;
713	/*
714	 * XXX do allow users to change the privileged flag PSL_RF.  The
715	 * cpu sets PSL_RF in tf_eflags for faults.  Debuggers should
716	 * sometimes set it there too.  tf_eflags is kept in the signal
717	 * context during signal handling and there is no other place
718	 * to remember it, so the PSL_RF bit may be corrupted by the
719	 * signal handler without us knowing.  Corruption of the PSL_RF
720	 * bit at worst causes one more or one less debugger trap, so
721	 * allowing it is fairly harmless.
722	 */
723	if (!EFLAGS_SECURE(eflags & ~PSL_RF, regs[tEFLAGS] & ~PSL_RF)) {
724#ifdef DEBUG
725    		printf("sigreturn: eflags = 0x%x\n", eflags);
726#endif
727    		return(EINVAL);
728	}
729
730	/*
731	 * Don't allow users to load a valid privileged %cs.  Let the
732	 * hardware check for invalid selectors, excess privilege in
733	 * other selectors, invalid %eip's and invalid %esp's.
734	 */
735#define	CS_SECURE(cs)	(ISPL(cs) == SEL_UPL)
736	if (!CS_SECURE(scp->sc_cs)) {
737#ifdef DEBUG
738    		printf("sigreturn: cs = 0x%x\n", scp->sc_cs);
739#endif
740		trapsignal(p, SIGBUS, T_PROTFLT);
741		return(EINVAL);
742	}
743
744	/* restore scratch registers */
745	regs[tEAX] = scp->sc_eax;
746	regs[tEBX] = scp->sc_ebx;
747	regs[tECX] = scp->sc_ecx;
748	regs[tEDX] = scp->sc_edx;
749	regs[tESI] = scp->sc_esi;
750	regs[tEDI] = scp->sc_edi;
751	regs[tCS] = scp->sc_cs;
752	regs[tDS] = scp->sc_ds;
753	regs[tES] = scp->sc_es;
754	regs[tSS] = scp->sc_ss;
755	regs[tISP] = scp->sc_isp;
756
757	if (useracc((caddr_t)scp, sizeof (*scp), 0) == 0)
758		return(EINVAL);
759
760	if (scp->sc_onstack & 01)
761		p->p_sigacts->ps_sigstk.ss_flags |= SA_ONSTACK;
762	else
763		p->p_sigacts->ps_sigstk.ss_flags &= ~SA_ONSTACK;
764	p->p_sigmask = scp->sc_mask &~
765	    (sigmask(SIGKILL)|sigmask(SIGCONT)|sigmask(SIGSTOP));
766	regs[tEBP] = scp->sc_fp;
767	regs[tESP] = scp->sc_sp;
768	regs[tEIP] = scp->sc_pc;
769	regs[tEFLAGS] = eflags;
770	return(EJUSTRETURN);
771}
772
773/*
774 * a simple function to make the system panic (and dump a vmcore)
775 * in a predictable fashion
776 */
777void diediedie()
778{
779	panic("because you said to!");
780}
781
782int	waittime = -1;
783struct pcb dumppcb;
784
785__dead void
786boot(arghowto)
787	int arghowto;
788{
789	register long dummy;		/* r12 is reserved */
790	register int howto;		/* r11 == how to boot */
791	register int devtype;		/* r10 == major of root dev */
792
793	if (cold) {
794		printf("hit reset please");
795		for(;;);
796	}
797	howto = arghowto;
798	if ((howto&RB_NOSYNC) == 0 && waittime < 0) {
799		register struct buf *bp;
800		int iter, nbusy;
801
802		waittime = 0;
803		printf("\nsyncing disks... ");
804		/*
805		 * Release inodes held by texts before update.
806		 */
807		if (panicstr == 0)
808			vnode_pager_umount(NULL);
809		sync(&proc0, NULL, NULL);
810
811		for (iter = 0; iter < 20; iter++) {
812			nbusy = 0;
813			for (bp = &buf[nbuf]; --bp >= buf; ) {
814				if ((bp->b_flags & (B_BUSY|B_INVAL)) == B_BUSY) {
815					nbusy++;
816				}
817			}
818			if (nbusy == 0)
819				break;
820			printf("%d ", nbusy);
821			DELAY(40000 * iter);
822		}
823		if (nbusy) {
824			/*
825			 * Failed to sync all blocks. Indicate this and don't
826			 * unmount filesystems (thus forcing an fsck on reboot).
827			 */
828			printf("giving up\n");
829		} else {
830			printf("done\n");
831			/*
832			 * Unmount filesystems
833			 */
834			if (panicstr == 0)
835				vfs_unmountall();
836		}
837		DELAY(100000);			/* wait for console output to finish */
838		dev_shutdownall(FALSE);
839	}
840	splhigh();
841	devtype = major(rootdev);
842	if (howto&RB_HALT) {
843		printf("\n");
844		printf("The operating system has halted.\n");
845		printf("Please press any key to reboot.\n\n");
846		cngetc();
847	} else {
848		if (howto & RB_DUMP) {
849			savectx(&dumppcb, 0);
850			dumppcb.pcb_ptd = rcr3();
851			dumpsys();
852
853			if (PANIC_REBOOT_WAIT_TIME != 0) {
854				if (PANIC_REBOOT_WAIT_TIME != -1) {
855					int loop;
856					printf("Automatic reboot in %d seconds - press a key on the console to abort\n",
857						PANIC_REBOOT_WAIT_TIME);
858					for (loop = PANIC_REBOOT_WAIT_TIME; loop > 0; --loop) {
859						DELAY(1000 * 1000); /* one second */
860						if (cncheckc()) /* Did user type a key? */
861							break;
862					}
863					if (!loop)
864						goto die;
865				}
866			} else { /* zero time specified - reboot NOW */
867				goto die;
868			}
869			printf("--> Press a key on the console to reboot <--\n");
870			cngetc();
871		}
872	}
873#ifdef lint
874	dummy = 0; dummy = dummy;
875	printf("howto %d, devtype %d\n", arghowto, devtype);
876#endif
877die:
878	printf("Rebooting...\n");
879	DELAY(1000000);	/* wait 1 sec for printf's to complete and be read */
880	cpu_reset();
881	for(;;) ;
882	/* NOTREACHED */
883}
884
885unsigned long	dumpmag = 0x8fca0101UL;	/* magic number for savecore */
886int		dumpsize = 0;		/* also for savecore */
887
888#ifdef DODUMP
889int		dodump = 1;
890#else
891int		dodump = 0;
892#endif
893/*
894 * Doadump comes here after turning off memory management and
895 * getting on the dump stack, either when called above, or by
896 * the auto-restart code.
897 */
898void
899dumpsys()
900{
901
902	if (!dodump)
903		return;
904	if (dumpdev == NODEV)
905		return;
906	if ((minor(dumpdev)&07) != 1)
907		return;
908	dumpsize = Maxmem;
909	printf("\ndumping to dev %lx, offset %ld\n", dumpdev, dumplo);
910	printf("dump ");
911	switch ((*bdevsw[major(dumpdev)].d_dump)(dumpdev)) {
912
913	case ENXIO:
914		printf("device bad\n");
915		break;
916
917	case EFAULT:
918		printf("device not ready\n");
919		break;
920
921	case EINVAL:
922		printf("area improper\n");
923		break;
924
925	case EIO:
926		printf("i/o error\n");
927		break;
928
929	case EINTR:
930		printf("aborted from console\n");
931		break;
932
933	default:
934		printf("succeeded\n");
935		break;
936	}
937}
938
939static void
940initcpu()
941{
942}
943
944/*
945 * Clear registers on exec
946 */
947void
948setregs(p, entry, stack)
949	struct proc *p;
950	u_long entry;
951	u_long stack;
952{
953	int *regs = p->p_md.md_regs;
954
955	bzero(regs, sizeof(struct trapframe));
956	regs[tEIP] = entry;
957	regs[tESP] = stack;
958	regs[tEFLAGS] = PSL_USER | (regs[tEFLAGS] & PSL_T);
959	regs[tSS] = _udatasel;
960	regs[tDS] = _udatasel;
961	regs[tES] = _udatasel;
962	regs[tCS] = _ucodesel;
963
964	p->p_addr->u_pcb.pcb_flags = 0;	/* no fp at all */
965	load_cr0(rcr0() | CR0_TS);	/* start emulating */
966#if	NNPX > 0
967	npxinit(__INITIAL_NPXCW__);
968#endif	/* NNPX > 0 */
969}
970
971/*
972 * machine dependent system variables.
973 */
974int
975cpu_sysctl(name, namelen, oldp, oldlenp, newp, newlen, p)
976	int *name;
977	u_int namelen;
978	void *oldp;
979	size_t *oldlenp;
980	void *newp;
981	size_t newlen;
982	struct proc *p;
983{
984	int error;
985
986	/* all sysctl names at this level are terminal */
987	if (namelen != 1)
988		return (ENOTDIR);               /* overloaded */
989
990	switch (name[0]) {
991	case CPU_CONSDEV:
992		return (sysctl_rdstruct(oldp, oldlenp, newp, &cn_tty->t_dev,
993		   sizeof cn_tty->t_dev));
994	case CPU_ADJKERNTZ:
995		error = sysctl_int(oldp, oldlenp, newp, newlen, &adjkerntz);
996		if (!error && newp)
997			resettodr();
998		return error;
999	case CPU_DISRTCSET:
1000		return (sysctl_int(oldp, oldlenp, newp,	newlen,	&disable_rtc_set));
1001	default:
1002		return (EOPNOTSUPP);
1003	}
1004	/* NOTREACHED */
1005}
1006
1007/*
1008 * Initialize 386 and configure to run kernel
1009 */
1010
1011/*
1012 * Initialize segments & interrupt table
1013 */
1014
1015int currentldt;
1016int _default_ldt;
1017union descriptor gdt[NGDT];		/* global descriptor table */
1018struct gate_descriptor idt[NIDT];	/* interrupt descriptor table */
1019union descriptor ldt[NLDT];		/* local descriptor table */
1020
1021struct	i386tss	tss, panic_tss;
1022
1023extern  struct user *proc0paddr;
1024
1025/* software prototypes -- in more palatable form */
1026struct soft_segment_descriptor gdt_segs[] = {
1027/* GNULL_SEL	0 Null Descriptor */
1028{	0x0,			/* segment base address  */
1029	0x0,			/* length */
1030	0,			/* segment type */
1031	0,			/* segment descriptor priority level */
1032	0,			/* segment descriptor present */
1033	0, 0,
1034	0,			/* default 32 vs 16 bit size */
1035	0  			/* limit granularity (byte/page units)*/ },
1036/* GCODE_SEL	1 Code Descriptor for kernel */
1037{	0x0,			/* segment base address  */
1038	0xfffff,		/* length - all address space */
1039	SDT_MEMERA,		/* segment type */
1040	0,			/* segment descriptor priority level */
1041	1,			/* segment descriptor present */
1042	0, 0,
1043	1,			/* default 32 vs 16 bit size */
1044	1  			/* limit granularity (byte/page units)*/ },
1045/* GDATA_SEL	2 Data Descriptor for kernel */
1046{	0x0,			/* segment base address  */
1047	0xfffff,		/* length - all address space */
1048	SDT_MEMRWA,		/* segment type */
1049	0,			/* segment descriptor priority level */
1050	1,			/* segment descriptor present */
1051	0, 0,
1052	1,			/* default 32 vs 16 bit size */
1053	1  			/* limit granularity (byte/page units)*/ },
1054/* GLDT_SEL	3 LDT Descriptor */
1055{	(int) ldt,		/* segment base address  */
1056	sizeof(ldt)-1,		/* length - all address space */
1057	SDT_SYSLDT,		/* segment type */
1058	0,			/* segment descriptor priority level */
1059	1,			/* segment descriptor present */
1060	0, 0,
1061	0,			/* unused - default 32 vs 16 bit size */
1062	0  			/* limit granularity (byte/page units)*/ },
1063/* GTGATE_SEL	4 Null Descriptor - Placeholder */
1064{	0x0,			/* segment base address  */
1065	0x0,			/* length - all address space */
1066	0,			/* segment type */
1067	0,			/* segment descriptor priority level */
1068	0,			/* segment descriptor present */
1069	0, 0,
1070	0,			/* default 32 vs 16 bit size */
1071	0  			/* limit granularity (byte/page units)*/ },
1072/* GPANIC_SEL	5 Panic Tss Descriptor */
1073{	(int) &panic_tss,	/* segment base address  */
1074	sizeof(tss)-1,		/* length - all address space */
1075	SDT_SYS386TSS,		/* segment type */
1076	0,			/* segment descriptor priority level */
1077	1,			/* segment descriptor present */
1078	0, 0,
1079	0,			/* unused - default 32 vs 16 bit size */
1080	0  			/* limit granularity (byte/page units)*/ },
1081/* GPROC0_SEL	6 Proc 0 Tss Descriptor */
1082{	(int) kstack,		/* segment base address  */
1083	sizeof(tss)-1,		/* length - all address space */
1084	SDT_SYS386TSS,		/* segment type */
1085	0,			/* segment descriptor priority level */
1086	1,			/* segment descriptor present */
1087	0, 0,
1088	0,			/* unused - default 32 vs 16 bit size */
1089	0  			/* limit granularity (byte/page units)*/ },
1090/* GUSERLDT_SEL	7 User LDT Descriptor per process */
1091{	(int) ldt,		/* segment base address  */
1092	(512 * sizeof(union descriptor)-1),		/* length */
1093	SDT_SYSLDT,		/* segment type */
1094	0,			/* segment descriptor priority level */
1095	1,			/* segment descriptor present */
1096	0, 0,
1097	0,			/* unused - default 32 vs 16 bit size */
1098	0  			/* limit granularity (byte/page units)*/ },
1099/* GAPMCODE32_SEL 8 APM BIOS 32-bit interface (32bit Code) */
1100{	0,			/* segment base address (overwritten by APM)  */
1101	0xfffff,		/* length */
1102	SDT_MEMERA,		/* segment type */
1103	0,			/* segment descriptor priority level */
1104	1,			/* segment descriptor present */
1105	0, 0,
1106	1,			/* default 32 vs 16 bit size */
1107	1  			/* limit granularity (byte/page units)*/ },
1108/* GAPMCODE16_SEL 9 APM BIOS 32-bit interface (16bit Code) */
1109{	0,			/* segment base address (overwritten by APM)  */
1110	0xfffff,		/* length */
1111	SDT_MEMERA,		/* segment type */
1112	0,			/* segment descriptor priority level */
1113	1,			/* segment descriptor present */
1114	0, 0,
1115	0,			/* default 32 vs 16 bit size */
1116	1  			/* limit granularity (byte/page units)*/ },
1117/* GAPMDATA_SEL	10 APM BIOS 32-bit interface (Data) */
1118{	0,			/* segment base address (overwritten by APM) */
1119	0xfffff,		/* length */
1120	SDT_MEMRWA,		/* segment type */
1121	0,			/* segment descriptor priority level */
1122	1,			/* segment descriptor present */
1123	0, 0,
1124	1,			/* default 32 vs 16 bit size */
1125	1  			/* limit granularity (byte/page units)*/ },
1126};
1127
1128struct soft_segment_descriptor ldt_segs[] = {
1129	/* Null Descriptor - overwritten by call gate */
1130{	0x0,			/* segment base address  */
1131	0x0,			/* length - all address space */
1132	0,			/* segment type */
1133	0,			/* segment descriptor priority level */
1134	0,			/* segment descriptor present */
1135	0, 0,
1136	0,			/* default 32 vs 16 bit size */
1137	0  			/* limit granularity (byte/page units)*/ },
1138	/* Null Descriptor - overwritten by call gate */
1139{	0x0,			/* segment base address  */
1140	0x0,			/* length - all address space */
1141	0,			/* segment type */
1142	0,			/* segment descriptor priority level */
1143	0,			/* segment descriptor present */
1144	0, 0,
1145	0,			/* default 32 vs 16 bit size */
1146	0  			/* limit granularity (byte/page units)*/ },
1147	/* Null Descriptor - overwritten by call gate */
1148{	0x0,			/* segment base address  */
1149	0x0,			/* length - all address space */
1150	0,			/* segment type */
1151	0,			/* segment descriptor priority level */
1152	0,			/* segment descriptor present */
1153	0, 0,
1154	0,			/* default 32 vs 16 bit size */
1155	0  			/* limit granularity (byte/page units)*/ },
1156	/* Code Descriptor for user */
1157{	0x0,			/* segment base address  */
1158	0xfffff,		/* length - all address space */
1159	SDT_MEMERA,		/* segment type */
1160	SEL_UPL,		/* segment descriptor priority level */
1161	1,			/* segment descriptor present */
1162	0, 0,
1163	1,			/* default 32 vs 16 bit size */
1164	1  			/* limit granularity (byte/page units)*/ },
1165	/* Data Descriptor for user */
1166{	0x0,			/* segment base address  */
1167	0xfffff,		/* length - all address space */
1168	SDT_MEMRWA,		/* segment type */
1169	SEL_UPL,		/* segment descriptor priority level */
1170	1,			/* segment descriptor present */
1171	0, 0,
1172	1,			/* default 32 vs 16 bit size */
1173	1  			/* limit granularity (byte/page units)*/ },
1174};
1175
1176void
1177setidt(idx, func, typ, dpl)
1178	int idx;
1179	inthand_t *func;
1180	int typ;
1181	int dpl;
1182{
1183	struct gate_descriptor *ip = idt + idx;
1184
1185	ip->gd_looffset = (int)func;
1186	ip->gd_selector = 8;
1187	ip->gd_stkcpy = 0;
1188	ip->gd_xx = 0;
1189	ip->gd_type = typ;
1190	ip->gd_dpl = dpl;
1191	ip->gd_p = 1;
1192	ip->gd_hioffset = ((int)func)>>16 ;
1193}
1194
1195#define	IDTVEC(name)	__CONCAT(X,name)
1196
1197extern inthand_t
1198	IDTVEC(div), IDTVEC(dbg), IDTVEC(nmi), IDTVEC(bpt), IDTVEC(ofl),
1199	IDTVEC(bnd), IDTVEC(ill), IDTVEC(dna), IDTVEC(dble), IDTVEC(fpusegm),
1200	IDTVEC(tss), IDTVEC(missing), IDTVEC(stk), IDTVEC(prot),
1201	IDTVEC(page), IDTVEC(rsvd), IDTVEC(fpu), IDTVEC(align),
1202	IDTVEC(syscall);
1203
1204#ifdef COMPAT_LINUX
1205extern inthand_t
1206	IDTVEC(linux_syscall);
1207#endif
1208
1209void
1210sdtossd(sd, ssd)
1211	struct segment_descriptor *sd;
1212	struct soft_segment_descriptor *ssd;
1213{
1214	ssd->ssd_base  = (sd->sd_hibase << 24) | sd->sd_lobase;
1215	ssd->ssd_limit = (sd->sd_hilimit << 16) | sd->sd_lolimit;
1216	ssd->ssd_type  = sd->sd_type;
1217	ssd->ssd_dpl   = sd->sd_dpl;
1218	ssd->ssd_p     = sd->sd_p;
1219	ssd->ssd_def32 = sd->sd_def32;
1220	ssd->ssd_gran  = sd->sd_gran;
1221}
1222
1223void
1224init386(first)
1225	int first;
1226{
1227	int x;
1228	unsigned biosbasemem, biosextmem;
1229	struct gate_descriptor *gdp;
1230	int gsel_tss;
1231	/* table descriptors - used to load tables by microp */
1232	struct region_descriptor r_gdt, r_idt;
1233	int	pagesinbase, pagesinext;
1234	int	target_page;
1235
1236	proc0.p_addr = proc0paddr;
1237
1238	/*
1239	 * Initialize the console before we print anything out.
1240	 */
1241
1242	cninit ();
1243
1244	/*
1245	 * make gdt memory segments, the code segment goes up to end of the
1246	 * page with etext in it, the data segment goes to the end of
1247	 * the address space
1248	 */
1249	/*
1250	 * XXX text protection is temporarily (?) disabled.  The limit was
1251	 * i386_btop(i386_round_page(etext)) - 1.
1252	 */
1253	gdt_segs[GCODE_SEL].ssd_limit = i386_btop(0) - 1;
1254	gdt_segs[GDATA_SEL].ssd_limit = i386_btop(0) - 1;
1255	for (x = 0; x < NGDT; x++)
1256		ssdtosd(&gdt_segs[x], &gdt[x].sd);
1257
1258	/* make ldt memory segments */
1259	/*
1260	 * The data segment limit must not cover the user area because we
1261	 * don't want the user area to be writable in copyout() etc. (page
1262	 * level protection is lost in kernel mode on 386's).  Also, we
1263	 * don't want the user area to be writable directly (page level
1264	 * protection of the user area is not available on 486's with
1265	 * CR0_WP set, because there is no user-read/kernel-write mode).
1266	 *
1267	 * XXX - VM_MAXUSER_ADDRESS is an end address, not a max.  And it
1268	 * should be spelled ...MAX_USER...
1269	 */
1270#define VM_END_USER_RW_ADDRESS	VM_MAXUSER_ADDRESS
1271	/*
1272	 * The code segment limit has to cover the user area until we move
1273	 * the signal trampoline out of the user area.  This is safe because
1274	 * the code segment cannot be written to directly.
1275	 */
1276#define VM_END_USER_R_ADDRESS	(VM_END_USER_RW_ADDRESS + UPAGES * NBPG)
1277	ldt_segs[LUCODE_SEL].ssd_limit = i386_btop(VM_END_USER_R_ADDRESS) - 1;
1278	ldt_segs[LUDATA_SEL].ssd_limit = i386_btop(VM_END_USER_RW_ADDRESS) - 1;
1279	/* Note. eventually want private ldts per process */
1280	for (x = 0; x < NLDT; x++)
1281		ssdtosd(&ldt_segs[x], &ldt[x].sd);
1282
1283	/* exceptions */
1284	for (x = 0; x < NIDT; x++)
1285		setidt(x, &IDTVEC(rsvd), SDT_SYS386TGT, SEL_KPL);
1286	setidt(0, &IDTVEC(div),  SDT_SYS386TGT, SEL_KPL);
1287	setidt(1, &IDTVEC(dbg),  SDT_SYS386TGT, SEL_KPL);
1288	setidt(2, &IDTVEC(nmi),  SDT_SYS386TGT, SEL_KPL);
1289 	setidt(3, &IDTVEC(bpt),  SDT_SYS386TGT, SEL_UPL);
1290	setidt(4, &IDTVEC(ofl),  SDT_SYS386TGT, SEL_UPL);
1291	setidt(5, &IDTVEC(bnd),  SDT_SYS386TGT, SEL_KPL);
1292	setidt(6, &IDTVEC(ill),  SDT_SYS386TGT, SEL_KPL);
1293	setidt(7, &IDTVEC(dna),  SDT_SYS386TGT, SEL_KPL);
1294	setidt(8, &IDTVEC(dble),  SDT_SYS386TGT, SEL_KPL);
1295	setidt(9, &IDTVEC(fpusegm),  SDT_SYS386TGT, SEL_KPL);
1296	setidt(10, &IDTVEC(tss),  SDT_SYS386TGT, SEL_KPL);
1297	setidt(11, &IDTVEC(missing),  SDT_SYS386TGT, SEL_KPL);
1298	setidt(12, &IDTVEC(stk),  SDT_SYS386TGT, SEL_KPL);
1299	setidt(13, &IDTVEC(prot),  SDT_SYS386TGT, SEL_KPL);
1300	setidt(14, &IDTVEC(page),  SDT_SYS386TGT, SEL_KPL);
1301	setidt(15, &IDTVEC(rsvd),  SDT_SYS386TGT, SEL_KPL);
1302	setidt(16, &IDTVEC(fpu),  SDT_SYS386TGT, SEL_KPL);
1303	setidt(17, &IDTVEC(align), SDT_SYS386TGT, SEL_KPL);
1304#ifdef COMPAT_LINUX
1305 	setidt(0x80, &IDTVEC(linux_syscall),  SDT_SYS386TGT, SEL_UPL);
1306#endif
1307
1308#include	"isa.h"
1309#if	NISA >0
1310	isa_defaultirq();
1311#endif
1312
1313	r_gdt.rd_limit = sizeof(gdt) - 1;
1314	r_gdt.rd_base =  (int) gdt;
1315	lgdt(&r_gdt);
1316
1317	r_idt.rd_limit = sizeof(idt) - 1;
1318	r_idt.rd_base = (int) idt;
1319	lidt(&r_idt);
1320
1321	_default_ldt = GSEL(GLDT_SEL, SEL_KPL);
1322	lldt(_default_ldt);
1323	currentldt = _default_ldt;
1324
1325#ifdef DDB
1326	kdb_init();
1327	if (boothowto & RB_KDB)
1328		Debugger("Boot flags requested debugger");
1329#endif
1330
1331	/* Use BIOS values stored in RTC CMOS RAM, since probing
1332	 * breaks certain 386 AT relics.
1333	 */
1334	biosbasemem = rtcin(RTC_BASELO)+ (rtcin(RTC_BASEHI)<<8);
1335	biosextmem = rtcin(RTC_EXTLO)+ (rtcin(RTC_EXTHI)<<8);
1336
1337	/*
1338	 * Print a warning if the official BIOS interface disagrees
1339	 * with the hackish interface used above.  Eventually only
1340	 * the official interface should be used.
1341	 */
1342	if (bootinfo.bi_memsizes_valid) {
1343		if (bootinfo.bi_basemem != biosbasemem)
1344			printf("BIOS basemem (%dK) != RTC basemem (%dK)\n",
1345			       bootinfo.bi_basemem, biosbasemem);
1346		if (bootinfo.bi_extmem != biosextmem)
1347			printf("BIOS extmem (%dK) != RTC extmem (%dK)\n",
1348			       bootinfo.bi_extmem, biosextmem);
1349	}
1350
1351	/*
1352	 * If BIOS tells us that it has more than 640k in the basemem,
1353	 *	don't believe it - set it to 640k.
1354	 */
1355	if (biosbasemem > 640)
1356		biosbasemem = 640;
1357
1358	/*
1359	 * Some 386 machines might give us a bogus number for extended
1360	 *	mem. If this happens, stop now.
1361	 */
1362#ifndef LARGEMEM
1363	if (biosextmem > 65536) {
1364		panic("extended memory beyond limit of 64MB");
1365		/* NOTREACHED */
1366	}
1367#endif
1368
1369	pagesinbase = biosbasemem * 1024 / NBPG;
1370	pagesinext = biosextmem * 1024 / NBPG;
1371
1372	/*
1373	 * Special hack for chipsets that still remap the 384k hole when
1374	 *	there's 16MB of memory - this really confuses people that
1375	 *	are trying to use bus mastering ISA controllers with the
1376	 *	"16MB limit"; they only have 16MB, but the remapping puts
1377	 *	them beyond the limit.
1378	 */
1379	/*
1380	 * If extended memory is between 15-16MB (16-17MB phys address range),
1381	 *	chop it to 15MB.
1382	 */
1383	if ((pagesinext > 3840) && (pagesinext < 4096))
1384		pagesinext = 3840;
1385
1386	/*
1387	 * Maxmem isn't the "maximum memory", it's one larger than the
1388	 * highest page of of the physical address space. It should be
1389	 * called something like "Maxphyspage".
1390	 */
1391	Maxmem = pagesinext + 0x100000/PAGE_SIZE;
1392
1393#ifdef MAXMEM
1394	Maxmem = MAXMEM/4;
1395#endif
1396	/*
1397	 * Calculate number of physical pages, but account for Maxmem
1398	 *	adjustment above.
1399	 */
1400	physmem = pagesinbase + Maxmem - 0x100000/PAGE_SIZE;
1401
1402	/* call pmap initialization to make new kernel address space */
1403	pmap_bootstrap (first, 0);
1404
1405	/*
1406	 * Do a quick, non-destructive check over extended memory to verify
1407	 * what the BIOS tells us agrees with reality. Adjust down Maxmem
1408	 * if we find that the page can't be correctly written to/read from.
1409	 */
1410
1411	for (target_page = Maxmem - 1; target_page >= atop(first); target_page--) {
1412		int tmp;
1413
1414		/*
1415		 * map page into kernel: valid, read/write, non-cacheable
1416		 */
1417		*(int *)CMAP1 = PG_V | PG_KW | PG_N | ptoa(target_page);
1418		pmap_update();
1419
1420		tmp = *(int *)CADDR1;
1421		/*
1422		 * Test for alternating 1's and 0's
1423		 */
1424		*(int *)CADDR1 = 0xaaaaaaaa;
1425		if (*(int *)CADDR1 != 0xaaaaaaaa) {
1426			Maxmem = target_page;
1427			badpages++;
1428			continue;
1429		}
1430		/*
1431		 * Test for alternating 0's and 1's
1432		 */
1433		*(int *)CADDR1 = 0x55555555;
1434		if (*(int *)CADDR1 != 0x55555555) {
1435			Maxmem = target_page;
1436			badpages++;
1437			continue;
1438		}
1439		/*
1440		 * Test for all 1's
1441		 */
1442		*(int *)CADDR1 = 0xffffffff;
1443		if (*(int *)CADDR1 != 0xffffffff) {
1444			Maxmem = target_page;
1445			badpages++;
1446			continue;
1447		}
1448		/*
1449		 * Test for all 0's
1450		 */
1451		*(int *)CADDR1 = 0x0;
1452		if (*(int *)CADDR1 != 0x0) {
1453			/*
1454			 * test of page failed
1455			 */
1456			Maxmem = target_page;
1457			badpages++;
1458			continue;
1459		}
1460		*(int *)CADDR1 = tmp;
1461	}
1462	if (badpages != 0)
1463		printf("WARNING: BIOS extended memory size and reality don't agree.\n");
1464
1465	*(int *)CMAP1 = 0;
1466	pmap_update();
1467
1468	avail_end = (Maxmem << PAGE_SHIFT)
1469		    - i386_round_page(sizeof(struct msgbuf));
1470
1471	/*
1472	 * Initialize pointers to the two chunks of memory; for use
1473	 *	later in vm_page_startup.
1474	 */
1475	/* avail_start is initialized in pmap_bootstrap */
1476	x = 0;
1477	if (pagesinbase > 1) {
1478		phys_avail[x++] = NBPG;		/* skip first page of memory */
1479		phys_avail[x++] = pagesinbase * NBPG;	/* memory up to the ISA hole */
1480	}
1481	phys_avail[x++] = avail_start;	/* memory up to the end */
1482	phys_avail[x++] = avail_end;
1483	phys_avail[x++] = 0;		/* no more chunks */
1484	phys_avail[x++] = 0;
1485
1486	/* now running on new page tables, configured,and u/iom is accessible */
1487
1488	/* make a initial tss so microp can get interrupt stack on syscall! */
1489	proc0.p_addr->u_pcb.pcb_tss.tss_esp0 = (int) kstack + UPAGES*NBPG;
1490	proc0.p_addr->u_pcb.pcb_tss.tss_ss0 = GSEL(GDATA_SEL, SEL_KPL) ;
1491	gsel_tss = GSEL(GPROC0_SEL, SEL_KPL);
1492
1493	((struct i386tss *)gdt_segs[GPROC0_SEL].ssd_base)->tss_ioopt =
1494		(sizeof(tss))<<16;
1495
1496	ltr(gsel_tss);
1497
1498	/* make a call gate to reenter kernel with */
1499	gdp = &ldt[LSYS5CALLS_SEL].gd;
1500
1501	x = (int) &IDTVEC(syscall);
1502	gdp->gd_looffset = x++;
1503	gdp->gd_selector = GSEL(GCODE_SEL,SEL_KPL);
1504	gdp->gd_stkcpy = 1;
1505	gdp->gd_type = SDT_SYS386CGT;
1506	gdp->gd_dpl = SEL_UPL;
1507	gdp->gd_p = 1;
1508	gdp->gd_hioffset = ((int) &IDTVEC(syscall)) >>16;
1509
1510	/* transfer to user mode */
1511
1512	_ucodesel = LSEL(LUCODE_SEL, SEL_UPL);
1513	_udatasel = LSEL(LUDATA_SEL, SEL_UPL);
1514
1515	/* setup proc 0's pcb */
1516	bcopy(&sigcode, proc0.p_addr->u_pcb.pcb_sigc, szsigcode);
1517	proc0.p_addr->u_pcb.pcb_flags = 0;
1518	proc0.p_addr->u_pcb.pcb_ptd = IdlePTD;
1519}
1520
1521/*
1522 * The registers are in the frame; the frame is in the user area of
1523 * the process in question; when the process is active, the registers
1524 * are in "the kernel stack"; when it's not, they're still there, but
1525 * things get flipped around.  So, since p->p_md.md_regs is the whole address
1526 * of the register set, take its offset from the kernel stack, and
1527 * index into the user block.  Don't you just *love* virtual memory?
1528 * (I'm starting to think seymour is right...)
1529 */
1530#define	TF_REGP(p)	((struct trapframe *) \
1531			 ((char *)(p)->p_addr \
1532			  + ((char *)(p)->p_md.md_regs - kstack)))
1533
1534int
1535ptrace_set_pc(p, addr)
1536	struct proc *p;
1537	unsigned int addr;
1538{
1539	TF_REGP(p)->tf_eip = addr;
1540	return (0);
1541}
1542
1543int
1544ptrace_single_step(p)
1545	struct proc *p;
1546{
1547	TF_REGP(p)->tf_eflags |= PSL_T;
1548	return (0);
1549}
1550
1551int
1552ptrace_getregs(p, addr)
1553	struct proc *p;
1554	unsigned int *addr;
1555{
1556	int error;
1557	struct reg regs;
1558
1559	error = fill_regs(p, &regs);
1560	if (error)
1561		return (error);
1562	return (copyout(&regs, addr, sizeof regs));
1563}
1564
1565int
1566ptrace_setregs(p, addr)
1567	struct proc *p;
1568	unsigned int *addr;
1569{
1570	int error;
1571	struct reg regs;
1572
1573	error = copyin(addr, &regs, sizeof regs);
1574	if (error)
1575		return (error);
1576	return (set_regs(p, &regs));
1577}
1578
1579int ptrace_write_u(p, off, data)
1580	struct proc *p;
1581	vm_offset_t off;
1582	int data;
1583{
1584	struct trapframe frame_copy;
1585	vm_offset_t min;
1586	struct trapframe *tp;
1587
1588	/*
1589	 * Privileged kernel state is scattered all over the user area.
1590	 * Only allow write access to parts of regs and to fpregs.
1591	 */
1592	min = (char *)p->p_md.md_regs - kstack;
1593	if (off >= min && off <= min + sizeof(struct trapframe) - sizeof(int)) {
1594		tp = TF_REGP(p);
1595		frame_copy = *tp;
1596		*(int *)((char *)&frame_copy + (off - min)) = data;
1597		if (!EFLAGS_SECURE(frame_copy.tf_eflags, tp->tf_eflags) ||
1598		    !CS_SECURE(frame_copy.tf_cs))
1599			return (EINVAL);
1600		*(int*)((char *)p->p_addr + off) = data;
1601		return (0);
1602	}
1603	min = offsetof(struct user, u_pcb) + offsetof(struct pcb, pcb_savefpu);
1604	if (off >= min && off <= min + sizeof(struct save87) - sizeof(int)) {
1605		*(int*)((char *)p->p_addr + off) = data;
1606		return (0);
1607	}
1608	return (EFAULT);
1609}
1610
1611int
1612fill_regs(p, regs)
1613	struct proc *p;
1614	struct reg *regs;
1615{
1616	struct trapframe *tp;
1617
1618	tp = TF_REGP(p);
1619	regs->r_es = tp->tf_es;
1620	regs->r_ds = tp->tf_ds;
1621	regs->r_edi = tp->tf_edi;
1622	regs->r_esi = tp->tf_esi;
1623	regs->r_ebp = tp->tf_ebp;
1624	regs->r_ebx = tp->tf_ebx;
1625	regs->r_edx = tp->tf_edx;
1626	regs->r_ecx = tp->tf_ecx;
1627	regs->r_eax = tp->tf_eax;
1628	regs->r_eip = tp->tf_eip;
1629	regs->r_cs = tp->tf_cs;
1630	regs->r_eflags = tp->tf_eflags;
1631	regs->r_esp = tp->tf_esp;
1632	regs->r_ss = tp->tf_ss;
1633	return (0);
1634}
1635
1636int
1637set_regs(p, regs)
1638	struct proc *p;
1639	struct reg *regs;
1640{
1641	struct trapframe *tp;
1642
1643	tp = TF_REGP(p);
1644	if (!EFLAGS_SECURE(regs->r_eflags, tp->tf_eflags) ||
1645	    !CS_SECURE(regs->r_cs))
1646		return (EINVAL);
1647	tp->tf_es = regs->r_es;
1648	tp->tf_ds = regs->r_ds;
1649	tp->tf_edi = regs->r_edi;
1650	tp->tf_esi = regs->r_esi;
1651	tp->tf_ebp = regs->r_ebp;
1652	tp->tf_ebx = regs->r_ebx;
1653	tp->tf_edx = regs->r_edx;
1654	tp->tf_ecx = regs->r_ecx;
1655	tp->tf_eax = regs->r_eax;
1656	tp->tf_eip = regs->r_eip;
1657	tp->tf_cs = regs->r_cs;
1658	tp->tf_eflags = regs->r_eflags;
1659	tp->tf_esp = regs->r_esp;
1660	tp->tf_ss = regs->r_ss;
1661	return (0);
1662}
1663
1664#ifndef DDB
1665void
1666Debugger(const char *msg)
1667{
1668	printf("Debugger(\"%s\") called.\n", msg);
1669}
1670#endif /* no DDB */
1671
1672#include <sys/disklabel.h>
1673#define b_cylin	b_resid
1674/*
1675 * Determine the size of the transfer, and make sure it is
1676 * within the boundaries of the partition. Adjust transfer
1677 * if needed, and signal errors or early completion.
1678 */
1679int
1680bounds_check_with_label(struct buf *bp, struct disklabel *lp, int wlabel)
1681{
1682        struct partition *p = lp->d_partitions + dkpart(bp->b_dev);
1683        int labelsect = lp->d_partitions[0].p_offset;
1684        int maxsz = p->p_size,
1685                sz = (bp->b_bcount + DEV_BSIZE - 1) >> DEV_BSHIFT;
1686
1687        /* overwriting disk label ? */
1688        /* XXX should also protect bootstrap in first 8K */
1689        if (bp->b_blkno + p->p_offset <= LABELSECTOR + labelsect &&
1690#if LABELSECTOR != 0
1691            bp->b_blkno + p->p_offset + sz > LABELSECTOR + labelsect &&
1692#endif
1693            (bp->b_flags & B_READ) == 0 && wlabel == 0) {
1694                bp->b_error = EROFS;
1695                goto bad;
1696        }
1697
1698#if     defined(DOSBBSECTOR) && defined(notyet)
1699        /* overwriting master boot record? */
1700        if (bp->b_blkno + p->p_offset <= DOSBBSECTOR &&
1701            (bp->b_flags & B_READ) == 0 && wlabel == 0) {
1702                bp->b_error = EROFS;
1703                goto bad;
1704        }
1705#endif
1706
1707        /* beyond partition? */
1708        if (bp->b_blkno < 0 || bp->b_blkno + sz > maxsz) {
1709                /* if exactly at end of disk, return an EOF */
1710                if (bp->b_blkno == maxsz) {
1711                        bp->b_resid = bp->b_bcount;
1712                        return(0);
1713                }
1714                /* or truncate if part of it fits */
1715                sz = maxsz - bp->b_blkno;
1716                if (sz <= 0) {
1717                        bp->b_error = EINVAL;
1718                        goto bad;
1719                }
1720                bp->b_bcount = sz << DEV_BSHIFT;
1721        }
1722
1723        /* calculate cylinder for disksort to order transfers with */
1724        bp->b_pblkno = bp->b_blkno + p->p_offset;
1725        bp->b_cylin = bp->b_pblkno / lp->d_secpercyl;
1726        return(1);
1727
1728bad:
1729        bp->b_flags |= B_ERROR;
1730        return(-1);
1731}
1732
1733int
1734disk_externalize(int drive, void *userp, size_t *maxlen)
1735{
1736	if(*maxlen < sizeof drive) {
1737		return ENOMEM;
1738	}
1739
1740	*maxlen -= sizeof drive;
1741	return copyout(&drive, userp, sizeof drive);
1742}
1743