machdep.c revision 17118
1/*-
2 * Copyright (c) 1992 Terrence R. Lambert.
3 * Copyright (c) 1982, 1987, 1990 The Regents of the University of California.
4 * All rights reserved.
5 *
6 * This code is derived from software contributed to Berkeley by
7 * William Jolitz.
8 *
9 * Redistribution and use in source and binary forms, with or without
10 * modification, are permitted provided that the following conditions
11 * are met:
12 * 1. Redistributions of source code must retain the above copyright
13 *    notice, this list of conditions and the following disclaimer.
14 * 2. Redistributions in binary form must reproduce the above copyright
15 *    notice, this list of conditions and the following disclaimer in the
16 *    documentation and/or other materials provided with the distribution.
17 * 3. All advertising materials mentioning features or use of this software
18 *    must display the following acknowledgement:
19 *	This product includes software developed by the University of
20 *	California, Berkeley and its contributors.
21 * 4. Neither the name of the University nor the names of its contributors
22 *    may be used to endorse or promote products derived from this software
23 *    without specific prior written permission.
24 *
25 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
26 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
27 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
28 * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
29 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
30 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
31 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
32 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
33 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
34 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
35 * SUCH DAMAGE.
36 *
37 *	from: @(#)machdep.c	7.4 (Berkeley) 6/3/91
38 *	$Id: machdep.c,v 1.194 1996/07/08 19:44:39 wollman Exp $
39 */
40
41#include "npx.h"
42#include "opt_sysvipc.h"
43#include "opt_ddb.h"
44#include "opt_bounce.h"
45#include "opt_machdep.h"
46#include "opt_perfmon.h"
47
48#include <sys/param.h>
49#include <sys/systm.h>
50#include <sys/sysproto.h>
51#include <sys/signalvar.h>
52#include <sys/kernel.h>
53#include <sys/proc.h>
54#include <sys/buf.h>
55#include <sys/reboot.h>
56#include <sys/conf.h>
57#include <sys/file.h>
58#include <sys/callout.h>
59#include <sys/malloc.h>
60#include <sys/mbuf.h>
61#include <sys/mount.h>
62#include <sys/msgbuf.h>
63#include <sys/ioctl.h>
64#include <sys/sysent.h>
65#include <sys/tty.h>
66#include <sys/sysctl.h>
67#include <sys/devconf.h>
68#include <sys/vmmeter.h>
69
70#ifdef SYSVSHM
71#include <sys/shm.h>
72#endif
73
74#ifdef SYSVMSG
75#include <sys/msg.h>
76#endif
77
78#ifdef SYSVSEM
79#include <sys/sem.h>
80#endif
81
82#include <vm/vm.h>
83#include <vm/vm_param.h>
84#include <vm/vm_prot.h>
85#include <vm/lock.h>
86#include <vm/vm_kern.h>
87#include <vm/vm_object.h>
88#include <vm/vm_page.h>
89#include <vm/vm_map.h>
90#include <vm/vm_pager.h>
91#include <vm/vm_extern.h>
92
93#include <sys/user.h>
94#include <sys/exec.h>
95#include <sys/vnode.h>
96
97#include <ddb/ddb.h>
98
99#include <net/netisr.h>
100
101#include <machine/cpu.h>
102#include <machine/npx.h>
103#include <machine/reg.h>
104#include <machine/psl.h>
105#include <machine/clock.h>
106#include <machine/specialreg.h>
107#include <machine/sysarch.h>
108#include <machine/cons.h>
109#include <machine/devconf.h>
110#include <machine/bootinfo.h>
111#include <machine/md_var.h>
112#ifdef PERFMON
113#include <machine/perfmon.h>
114#endif
115
116#include <i386/isa/isa_device.h>
117#include <i386/isa/rtc.h>
118#include <machine/random.h>
119
120extern void init386 __P((int first));
121extern int ptrace_set_pc __P((struct proc *p, unsigned int addr));
122extern int ptrace_single_step __P((struct proc *p));
123extern int ptrace_write_u __P((struct proc *p, vm_offset_t off, int data));
124extern void dblfault_handler __P((void));
125
126extern void identifycpu(void);	/* XXX header file */
127extern void earlysetcpuclass(void);	/* same header file */
128
129static void cpu_startup __P((void *));
130SYSINIT(cpu, SI_SUB_CPU, SI_ORDER_FIRST, cpu_startup, NULL)
131
132
133#ifndef PANIC_REBOOT_WAIT_TIME
134#define PANIC_REBOOT_WAIT_TIME 15 /* default to 15 seconds */
135#endif
136
137#ifdef BOUNCE_BUFFERS
138extern char *bouncememory;
139extern int maxbkva;
140#ifdef BOUNCEPAGES
141int	bouncepages = BOUNCEPAGES;
142#else
143int	bouncepages = 0;
144#endif
145#endif	/* BOUNCE_BUFFERS */
146
147extern int freebufspace;
148int	msgbufmapped = 0;		/* set when safe to use msgbuf */
149int _udatasel, _ucodesel;
150u_int	atdevbase;
151
152
153int physmem = 0;
154int cold = 1;
155
156static int
157sysctl_hw_physmem SYSCTL_HANDLER_ARGS
158{
159	int error = sysctl_handle_int(oidp, 0, ctob(physmem), req);
160	return (error);
161}
162
163SYSCTL_PROC(_hw, HW_PHYSMEM, physmem, CTLTYPE_INT|CTLFLAG_RD,
164	0, 0, sysctl_hw_physmem, "I", "");
165
166static int
167sysctl_hw_usermem SYSCTL_HANDLER_ARGS
168{
169	int error = sysctl_handle_int(oidp, 0,
170		ctob(physmem - cnt.v_wire_count), req);
171	return (error);
172}
173
174SYSCTL_PROC(_hw, HW_USERMEM, usermem, CTLTYPE_INT|CTLFLAG_RD,
175	0, 0, sysctl_hw_usermem, "I", "");
176
177int boothowto = 0, bootverbose = 0, Maxmem = 0;
178static int	badpages = 0;
179long dumplo;
180extern int bootdev;
181
182vm_offset_t phys_avail[10];
183
184/* must be 2 less so 0 0 can signal end of chunks */
185#define PHYS_AVAIL_ARRAY_END ((sizeof(phys_avail) / sizeof(vm_offset_t)) - 2)
186
187static void dumpsys __P((void));
188static void setup_netisrs __P((struct linker_set *)); /* XXX declare elsewhere */
189
190static vm_offset_t buffer_sva, buffer_eva;
191vm_offset_t clean_sva, clean_eva;
192static vm_offset_t pager_sva, pager_eva;
193extern struct linker_set netisr_set;
194
195#define offsetof(type, member)	((size_t)(&((type *)0)->member))
196
197static void
198cpu_startup(dummy)
199	void *dummy;
200{
201	register unsigned i;
202	register caddr_t v;
203	vm_offset_t maxaddr;
204	vm_size_t size = 0;
205	int firstaddr;
206	vm_offset_t minaddr;
207
208	if (boothowto & RB_VERBOSE)
209		bootverbose++;
210
211	/*
212	 * Initialize error message buffer (at end of core).
213	 */
214
215	/* avail_end was pre-decremented in init386() to compensate */
216	for (i = 0; i < btoc(sizeof (struct msgbuf)); i++)
217		pmap_enter(pmap_kernel(), (vm_offset_t)msgbufp,
218			   avail_end + i * PAGE_SIZE,
219			   VM_PROT_ALL, TRUE);
220	msgbufmapped = 1;
221
222	/*
223	 * Good {morning,afternoon,evening,night}.
224	 */
225	printf(version);
226	earlysetcpuclass();
227	startrtclock();
228	identifycpu();
229#ifdef PERFMON
230	perfmon_init();
231#endif
232	printf("real memory  = %d (%dK bytes)\n", ptoa(Maxmem), ptoa(Maxmem) / 1024);
233	/*
234	 * Display any holes after the first chunk of extended memory.
235	 */
236	if (badpages != 0) {
237		int indx = 1;
238
239		/*
240		 * XXX skip reporting ISA hole & unmanaged kernel memory
241		 */
242		if (phys_avail[0] == PAGE_SIZE)
243			indx += 2;
244
245		printf("Physical memory hole(s):\n");
246		for (; phys_avail[indx + 1] != 0; indx += 2) {
247			int size = phys_avail[indx + 1] - phys_avail[indx];
248
249			printf("0x%08lx - 0x%08lx, %d bytes (%d pages)\n", phys_avail[indx],
250			    phys_avail[indx + 1] - 1, size, size / PAGE_SIZE);
251		}
252	}
253
254	/*
255	 * Quickly wire in netisrs.
256	 */
257	setup_netisrs(&netisr_set);
258
259/*
260#ifdef ISDN
261	DONET(isdnintr, NETISR_ISDN);
262#endif
263*/
264
265	/*
266	 * Allocate space for system data structures.
267	 * The first available kernel virtual address is in "v".
268	 * As pages of kernel virtual memory are allocated, "v" is incremented.
269	 * As pages of memory are allocated and cleared,
270	 * "firstaddr" is incremented.
271	 * An index into the kernel page table corresponding to the
272	 * virtual memory address maintained in "v" is kept in "mapaddr".
273	 */
274
275	/*
276	 * Make two passes.  The first pass calculates how much memory is
277	 * needed and allocates it.  The second pass assigns virtual
278	 * addresses to the various data structures.
279	 */
280	firstaddr = 0;
281again:
282	v = (caddr_t)firstaddr;
283
284#define	valloc(name, type, num) \
285	    (name) = (type *)v; v = (caddr_t)((name)+(num))
286#define	valloclim(name, type, num, lim) \
287	    (name) = (type *)v; v = (caddr_t)((lim) = ((name)+(num)))
288	valloc(callout, struct callout, ncallout);
289#ifdef SYSVSHM
290	valloc(shmsegs, struct shmid_ds, shminfo.shmmni);
291#endif
292#ifdef SYSVSEM
293	valloc(sema, struct semid_ds, seminfo.semmni);
294	valloc(sem, struct sem, seminfo.semmns);
295	/* This is pretty disgusting! */
296	valloc(semu, int, (seminfo.semmnu * seminfo.semusz) / sizeof(int));
297#endif
298#ifdef SYSVMSG
299	valloc(msgpool, char, msginfo.msgmax);
300	valloc(msgmaps, struct msgmap, msginfo.msgseg);
301	valloc(msghdrs, struct msg, msginfo.msgtql);
302	valloc(msqids, struct msqid_ds, msginfo.msgmni);
303#endif
304
305	if (nbuf == 0) {
306		nbuf = 30;
307		if( physmem > 1024)
308			nbuf += min((physmem - 1024) / 12, 1024);
309	}
310	nswbuf = min(nbuf, 128);
311
312	valloc(swbuf, struct buf, nswbuf);
313	valloc(buf, struct buf, nbuf);
314
315#ifdef BOUNCE_BUFFERS
316	/*
317	 * If there is more than 16MB of memory, allocate some bounce buffers
318	 */
319	if (Maxmem > 4096) {
320		if (bouncepages == 0) {
321			bouncepages = 64;
322			bouncepages += ((Maxmem - 4096) / 2048) * 32;
323		}
324		v = (caddr_t)((vm_offset_t)round_page(v));
325		valloc(bouncememory, char, bouncepages * PAGE_SIZE);
326	}
327#endif
328
329	/*
330	 * End of first pass, size has been calculated so allocate memory
331	 */
332	if (firstaddr == 0) {
333		size = (vm_size_t)(v - firstaddr);
334		firstaddr = (int)kmem_alloc(kernel_map, round_page(size));
335		if (firstaddr == 0)
336			panic("startup: no room for tables");
337		goto again;
338	}
339
340	/*
341	 * End of second pass, addresses have been assigned
342	 */
343	if ((vm_size_t)(v - firstaddr) != size)
344		panic("startup: table size inconsistency");
345
346#ifdef BOUNCE_BUFFERS
347	clean_map = kmem_suballoc(kernel_map, &clean_sva, &clean_eva,
348			(nbuf*MAXBSIZE) + (nswbuf*MAXPHYS) +
349				maxbkva + pager_map_size, TRUE);
350	io_map = kmem_suballoc(clean_map, &minaddr, &maxaddr, maxbkva, FALSE);
351#else
352	clean_map = kmem_suballoc(kernel_map, &clean_sva, &clean_eva,
353			(nbuf*MAXBSIZE) + (nswbuf*MAXPHYS) + pager_map_size, TRUE);
354#endif
355	buffer_map = kmem_suballoc(clean_map, &buffer_sva, &buffer_eva,
356				(nbuf*MAXBSIZE), TRUE);
357	pager_map = kmem_suballoc(clean_map, &pager_sva, &pager_eva,
358				(nswbuf*MAXPHYS) + pager_map_size, TRUE);
359	exec_map = kmem_suballoc(kernel_map, &minaddr, &maxaddr,
360				(16*ARG_MAX), TRUE);
361	exech_map = kmem_suballoc(kernel_map, &minaddr, &maxaddr,
362				(32*ARG_MAX), TRUE);
363	u_map = kmem_suballoc(kernel_map, &minaddr, &maxaddr,
364				(maxproc*UPAGES*PAGE_SIZE), FALSE);
365
366	/*
367	 * Finally, allocate mbuf pool.  Since mclrefcnt is an off-size
368	 * we use the more space efficient malloc in place of kmem_alloc.
369	 */
370	mclrefcnt = (char *)malloc(nmbclusters+PAGE_SIZE/MCLBYTES,
371				   M_MBUF, M_NOWAIT);
372	bzero(mclrefcnt, nmbclusters+PAGE_SIZE/MCLBYTES);
373	mcl_map = kmem_suballoc(kmem_map, (vm_offset_t *)&mbutl, &maxaddr,
374			       nmbclusters * MCLBYTES, FALSE);
375	{
376		vm_size_t mb_map_size;
377		mb_map_size = nmbufs * MSIZE;
378		mb_map = kmem_suballoc(kmem_map, &minaddr, &maxaddr,
379				       round_page(mb_map_size), FALSE);
380	}
381
382	/*
383	 * Initialize callouts
384	 */
385	callfree = callout;
386	for (i = 1; i < ncallout; i++)
387		callout[i-1].c_next = &callout[i];
388
389        if (boothowto & RB_CONFIG) {
390		userconfig();
391		cninit();	/* the preferred console may have changed */
392	}
393
394#ifdef BOUNCE_BUFFERS
395	/*
396	 * init bounce buffers
397	 */
398	vm_bounce_init();
399#endif
400
401	printf("avail memory = %d (%dK bytes)\n", ptoa(cnt.v_free_count),
402	    ptoa(cnt.v_free_count) / 1024);
403
404	/*
405	 * Set up buffers, so they can be used to read disk labels.
406	 */
407	bufinit();
408	vm_pager_bufferinit();
409
410	/*
411	 * In verbose mode, print out the BIOS's idea of the disk geometries.
412	 */
413	if (bootverbose) {
414		printf("BIOS Geometries:\n");
415		for (i = 0; i < N_BIOS_GEOM; i++) {
416			unsigned long bios_geom;
417			int max_cylinder, max_head, max_sector;
418
419			bios_geom = bootinfo.bi_bios_geom[i];
420
421			/*
422			 * XXX the bootstrap punts a 1200K floppy geometry
423			 * when the get-disk-geometry interrupt fails.  Skip
424			 * drives that have this geometry.
425			 */
426			if (bios_geom == 0x4f010f)
427				continue;
428
429			printf(" %x:%08lx ", i, bios_geom);
430			max_cylinder = bios_geom >> 16;
431			max_head = (bios_geom >> 8) & 0xff;
432			max_sector = bios_geom & 0xff;
433			printf(
434		"0..%d=%d cylinders, 0..%d=%d heads, 1..%d=%d sectors\n",
435			       max_cylinder, max_cylinder + 1,
436			       max_head, max_head + 1,
437			       max_sector, max_sector);
438		}
439		printf(" %d accounted for\n", bootinfo.bi_n_bios_used);
440	}
441}
442
443int
444register_netisr(num, handler)
445	int num;
446	netisr_t *handler;
447{
448
449	if (num < 0 || num >= (sizeof(netisrs)/sizeof(*netisrs)) ) {
450		printf("register_netisr: bad isr number: %d\n", num);
451		return (EINVAL);
452	}
453	netisrs[num] = handler;
454	return (0);
455}
456
457static void
458setup_netisrs(ls)
459	struct linker_set *ls;
460{
461	int i;
462	const struct netisrtab *nit;
463
464	for(i = 0; ls->ls_items[i]; i++) {
465		nit = (const struct netisrtab *)ls->ls_items[i];
466		register_netisr(nit->nit_num, nit->nit_isr);
467	}
468}
469
470/*
471 * Send an interrupt to process.
472 *
473 * Stack is set up to allow sigcode stored
474 * at top to call routine, followed by kcall
475 * to sigreturn routine below.  After sigreturn
476 * resets the signal mask, the stack, and the
477 * frame pointer, it returns to the user
478 * specified pc, psl.
479 */
480void
481sendsig(catcher, sig, mask, code)
482	sig_t catcher;
483	int sig, mask;
484	u_long code;
485{
486	register struct proc *p = curproc;
487	register int *regs;
488	register struct sigframe *fp;
489	struct sigframe sf;
490	struct sigacts *psp = p->p_sigacts;
491	int oonstack;
492
493	regs = p->p_md.md_regs;
494        oonstack = psp->ps_sigstk.ss_flags & SS_ONSTACK;
495	/*
496	 * Allocate and validate space for the signal handler context.
497	 */
498        if ((psp->ps_flags & SAS_ALTSTACK) && !oonstack &&
499	    (psp->ps_sigonstack & sigmask(sig))) {
500		fp = (struct sigframe *)(psp->ps_sigstk.ss_sp +
501		    psp->ps_sigstk.ss_size - sizeof(struct sigframe));
502		psp->ps_sigstk.ss_flags |= SS_ONSTACK;
503	} else {
504		fp = (struct sigframe *)regs[tESP] - 1;
505	}
506
507	/*
508	 * grow() will return FALSE if the fp will not fit inside the stack
509	 *	and the stack can not be grown. useracc will return FALSE
510	 *	if access is denied.
511	 */
512	if ((grow(p, (int)fp) == FALSE) ||
513	    (useracc((caddr_t)fp, sizeof (struct sigframe), B_WRITE) == FALSE)) {
514		/*
515		 * Process has trashed its stack; give it an illegal
516		 * instruction to halt it in its tracks.
517		 */
518		SIGACTION(p, SIGILL) = SIG_DFL;
519		sig = sigmask(SIGILL);
520		p->p_sigignore &= ~sig;
521		p->p_sigcatch &= ~sig;
522		p->p_sigmask &= ~sig;
523		psignal(p, SIGILL);
524		return;
525	}
526
527	/*
528	 * Build the argument list for the signal handler.
529	 */
530	if (p->p_sysent->sv_sigtbl) {
531		if (sig < p->p_sysent->sv_sigsize)
532			sig = p->p_sysent->sv_sigtbl[sig];
533		else
534			sig = p->p_sysent->sv_sigsize + 1;
535	}
536	sf.sf_signum = sig;
537	sf.sf_code = code;
538	sf.sf_scp = &fp->sf_sc;
539	sf.sf_addr = (char *) regs[tERR];
540	sf.sf_handler = catcher;
541
542	/* save scratch registers */
543	sf.sf_sc.sc_eax = regs[tEAX];
544	sf.sf_sc.sc_ebx = regs[tEBX];
545	sf.sf_sc.sc_ecx = regs[tECX];
546	sf.sf_sc.sc_edx = regs[tEDX];
547	sf.sf_sc.sc_esi = regs[tESI];
548	sf.sf_sc.sc_edi = regs[tEDI];
549	sf.sf_sc.sc_cs = regs[tCS];
550	sf.sf_sc.sc_ds = regs[tDS];
551	sf.sf_sc.sc_ss = regs[tSS];
552	sf.sf_sc.sc_es = regs[tES];
553	sf.sf_sc.sc_isp = regs[tISP];
554
555	/*
556	 * Build the signal context to be used by sigreturn.
557	 */
558	sf.sf_sc.sc_onstack = oonstack;
559	sf.sf_sc.sc_mask = mask;
560	sf.sf_sc.sc_sp = regs[tESP];
561	sf.sf_sc.sc_fp = regs[tEBP];
562	sf.sf_sc.sc_pc = regs[tEIP];
563	sf.sf_sc.sc_ps = regs[tEFLAGS];
564
565	/*
566	 * Copy the sigframe out to the user's stack.
567	 */
568	if (copyout(&sf, fp, sizeof(struct sigframe)) != 0) {
569		/*
570		 * Something is wrong with the stack pointer.
571		 * ...Kill the process.
572		 */
573		sigexit(p, SIGILL);
574	};
575
576	regs[tESP] = (int)fp;
577	regs[tEIP] = (int)(((char *)PS_STRINGS) - *(p->p_sysent->sv_szsigcode));
578	regs[tEFLAGS] &= ~PSL_VM;
579	regs[tCS] = _ucodesel;
580	regs[tDS] = _udatasel;
581	regs[tES] = _udatasel;
582	regs[tSS] = _udatasel;
583}
584
585/*
586 * System call to cleanup state after a signal
587 * has been taken.  Reset signal mask and
588 * stack state from context left by sendsig (above).
589 * Return to previous pc and psl as specified by
590 * context left by sendsig. Check carefully to
591 * make sure that the user has not modified the
592 * state to gain improper privileges.
593 */
594int
595sigreturn(p, uap, retval)
596	struct proc *p;
597	struct sigreturn_args /* {
598		struct sigcontext *sigcntxp;
599	} */ *uap;
600	int *retval;
601{
602	register struct sigcontext *scp;
603	register struct sigframe *fp;
604	register int *regs = p->p_md.md_regs;
605	int eflags;
606
607	/*
608	 * (XXX old comment) regs[tESP] points to the return address.
609	 * The user scp pointer is above that.
610	 * The return address is faked in the signal trampoline code
611	 * for consistency.
612	 */
613	scp = uap->sigcntxp;
614	fp = (struct sigframe *)
615	     ((caddr_t)scp - offsetof(struct sigframe, sf_sc));
616
617	if (useracc((caddr_t)fp, sizeof (*fp), 0) == 0)
618		return(EINVAL);
619
620	/*
621	 * Don't allow users to change privileged or reserved flags.
622	 */
623#define	EFLAGS_SECURE(ef, oef)	((((ef) ^ (oef)) & ~PSL_USERCHANGE) == 0)
624	eflags = scp->sc_ps;
625	/*
626	 * XXX do allow users to change the privileged flag PSL_RF.  The
627	 * cpu sets PSL_RF in tf_eflags for faults.  Debuggers should
628	 * sometimes set it there too.  tf_eflags is kept in the signal
629	 * context during signal handling and there is no other place
630	 * to remember it, so the PSL_RF bit may be corrupted by the
631	 * signal handler without us knowing.  Corruption of the PSL_RF
632	 * bit at worst causes one more or one less debugger trap, so
633	 * allowing it is fairly harmless.
634	 */
635	if (!EFLAGS_SECURE(eflags & ~PSL_RF, regs[tEFLAGS] & ~PSL_RF)) {
636#ifdef DEBUG
637    		printf("sigreturn: eflags = 0x%x\n", eflags);
638#endif
639    		return(EINVAL);
640	}
641
642	/*
643	 * Don't allow users to load a valid privileged %cs.  Let the
644	 * hardware check for invalid selectors, excess privilege in
645	 * other selectors, invalid %eip's and invalid %esp's.
646	 */
647#define	CS_SECURE(cs)	(ISPL(cs) == SEL_UPL)
648	if (!CS_SECURE(scp->sc_cs)) {
649#ifdef DEBUG
650    		printf("sigreturn: cs = 0x%x\n", scp->sc_cs);
651#endif
652		trapsignal(p, SIGBUS, T_PROTFLT);
653		return(EINVAL);
654	}
655
656	/* restore scratch registers */
657	regs[tEAX] = scp->sc_eax;
658	regs[tEBX] = scp->sc_ebx;
659	regs[tECX] = scp->sc_ecx;
660	regs[tEDX] = scp->sc_edx;
661	regs[tESI] = scp->sc_esi;
662	regs[tEDI] = scp->sc_edi;
663	regs[tCS] = scp->sc_cs;
664	regs[tDS] = scp->sc_ds;
665	regs[tES] = scp->sc_es;
666	regs[tSS] = scp->sc_ss;
667	regs[tISP] = scp->sc_isp;
668
669	if (useracc((caddr_t)scp, sizeof (*scp), 0) == 0)
670		return(EINVAL);
671
672	if (scp->sc_onstack & 01)
673		p->p_sigacts->ps_sigstk.ss_flags |= SS_ONSTACK;
674	else
675		p->p_sigacts->ps_sigstk.ss_flags &= ~SS_ONSTACK;
676	p->p_sigmask = scp->sc_mask &~
677	    (sigmask(SIGKILL)|sigmask(SIGCONT)|sigmask(SIGSTOP));
678	regs[tEBP] = scp->sc_fp;
679	regs[tESP] = scp->sc_sp;
680	regs[tEIP] = scp->sc_pc;
681	regs[tEFLAGS] = eflags;
682	return(EJUSTRETURN);
683}
684
685static int	waittime = -1;
686static struct pcb dumppcb;
687
688__dead void
689boot(howto)
690	int howto;
691{
692	if (!cold && (howto & RB_NOSYNC) == 0 && waittime < 0) {
693		register struct buf *bp;
694		int iter, nbusy;
695
696		waittime = 0;
697		printf("\nsyncing disks... ");
698
699		sync(&proc0, NULL, NULL);
700
701		for (iter = 0; iter < 20; iter++) {
702			nbusy = 0;
703			for (bp = &buf[nbuf]; --bp >= buf; ) {
704				if ((bp->b_flags & (B_BUSY | B_INVAL)) == B_BUSY) {
705					nbusy++;
706				}
707			}
708			if (nbusy == 0)
709				break;
710			printf("%d ", nbusy);
711			DELAY(40000 * iter);
712		}
713		if (nbusy) {
714			/*
715			 * Failed to sync all blocks. Indicate this and don't
716			 * unmount filesystems (thus forcing an fsck on reboot).
717			 */
718			printf("giving up\n");
719#ifdef SHOW_BUSYBUFS
720			nbusy = 0;
721			for (bp = &buf[nbuf]; --bp >= buf; ) {
722				if ((bp->b_flags & (B_BUSY | B_INVAL)) == B_BUSY) {
723					nbusy++;
724					printf("%d: dev:%08x, flags:%08x, blkno:%d, lblkno:%d\n", nbusy, bp->b_dev, bp->b_flags, bp->b_blkno, bp->b_lblkno);
725				}
726			}
727			DELAY(5000000);	/* 5 seconds */
728#endif
729		} else {
730			printf("done\n");
731			/*
732			 * Unmount filesystems
733			 */
734			if (panicstr == 0)
735				vfs_unmountall();
736		}
737		DELAY(100000);			/* wait for console output to finish */
738		dev_shutdownall(FALSE);
739	}
740	splhigh();
741	if (howto & RB_HALT) {
742		printf("\n");
743		printf("The operating system has halted.\n");
744		printf("Please press any key to reboot.\n\n");
745		cngetc();
746	} else {
747		if (howto & RB_DUMP) {
748			if (!cold) {
749				savectx(&dumppcb);
750				dumppcb.pcb_cr3 = rcr3();
751				dumpsys();
752			}
753
754			if (PANIC_REBOOT_WAIT_TIME != 0) {
755				if (PANIC_REBOOT_WAIT_TIME != -1) {
756					int loop;
757					printf("Automatic reboot in %d seconds - press a key on the console to abort\n",
758						PANIC_REBOOT_WAIT_TIME);
759					for (loop = PANIC_REBOOT_WAIT_TIME * 10; loop > 0; --loop) {
760						DELAY(1000 * 100); /* 1/10th second */
761						if (cncheckc()) /* Did user type a key? */
762							break;
763					}
764					if (!loop)
765						goto die;
766				}
767			} else { /* zero time specified - reboot NOW */
768				goto die;
769			}
770			printf("--> Press a key on the console to reboot <--\n");
771			cngetc();
772		}
773	}
774die:
775	printf("Rebooting...\n");
776	DELAY(1000000);	/* wait 1 sec for printf's to complete and be read */
777	cpu_reset();
778	for(;;) ;
779	/* NOTREACHED */
780}
781
782/*
783 * Magic number for savecore
784 *
785 * exported (symorder) and used at least by savecore(8)
786 *
787 */
788static u_long const	dumpmag = 0x8fca0101UL;
789
790static int	dumpsize = 0;		/* also for savecore */
791
792static int	dodump = 1;
793SYSCTL_INT(_machdep, OID_AUTO, do_dump, CTLFLAG_RW, &dodump, 0, "");
794
795/*
796 * Doadump comes here after turning off memory management and
797 * getting on the dump stack, either when called above, or by
798 * the auto-restart code.
799 */
800static void
801dumpsys()
802{
803
804	if (!dodump)
805		return;
806	if (dumpdev == NODEV)
807		return;
808	if ((minor(dumpdev)&07) != 1)
809		return;
810	if (!(bdevsw[major(dumpdev)]))
811		return;
812	if (!(bdevsw[major(dumpdev)]->d_dump))
813		return;
814	dumpsize = Maxmem;
815	printf("\ndumping to dev %lx, offset %ld\n", dumpdev, dumplo);
816	printf("dump ");
817	switch ((*bdevsw[major(dumpdev)]->d_dump)(dumpdev)) {
818
819	case ENXIO:
820		printf("device bad\n");
821		break;
822
823	case EFAULT:
824		printf("device not ready\n");
825		break;
826
827	case EINVAL:
828		printf("area improper\n");
829		break;
830
831	case EIO:
832		printf("i/o error\n");
833		break;
834
835	case EINTR:
836		printf("aborted from console\n");
837		break;
838
839	default:
840		printf("succeeded\n");
841		break;
842	}
843}
844
845/*
846 * Clear registers on exec
847 */
848void
849setregs(p, entry, stack)
850	struct proc *p;
851	u_long entry;
852	u_long stack;
853{
854	int *regs = p->p_md.md_regs;
855
856#ifdef USER_LDT
857	struct pcb *pcb = &p->p_addr->u_pcb;
858
859	/* was i386_user_cleanup() in NetBSD */
860	if (pcb->pcb_ldt) {
861		if (pcb == curpcb)
862			lldt(GSEL(GUSERLDT_SEL, SEL_KPL));
863		kmem_free(kernel_map, (vm_offset_t)pcb->pcb_ldt,
864			pcb->pcb_ldt_len * sizeof(union descriptor));
865		pcb->pcb_ldt_len = (int)pcb->pcb_ldt = 0;
866 	}
867#endif
868
869	bzero(regs, sizeof(struct trapframe));
870	regs[tEIP] = entry;
871	regs[tESP] = stack;
872	regs[tEFLAGS] = PSL_USER | (regs[tEFLAGS] & PSL_T);
873	regs[tSS] = _udatasel;
874	regs[tDS] = _udatasel;
875	regs[tES] = _udatasel;
876	regs[tCS] = _ucodesel;
877
878	p->p_addr->u_pcb.pcb_flags = 0;	/* no fp at all */
879	load_cr0(rcr0() | CR0_TS);	/* start emulating */
880#if	NNPX > 0
881	npxinit(__INITIAL_NPXCW__);
882#endif	/* NNPX > 0 */
883}
884
885static int
886sysctl_machdep_adjkerntz SYSCTL_HANDLER_ARGS
887{
888	int error;
889	error = sysctl_handle_int(oidp, oidp->oid_arg1, oidp->oid_arg2,
890		req);
891	if (!error && req->newptr)
892		resettodr();
893	return (error);
894}
895
896SYSCTL_PROC(_machdep, CPU_ADJKERNTZ, adjkerntz, CTLTYPE_INT|CTLFLAG_RW,
897	&adjkerntz, 0, sysctl_machdep_adjkerntz, "I", "");
898
899SYSCTL_INT(_machdep, CPU_DISRTCSET, disable_rtc_set,
900	CTLFLAG_RW, &disable_rtc_set, 0, "");
901
902SYSCTL_STRUCT(_machdep, CPU_BOOTINFO, bootinfo,
903	CTLFLAG_RD, &bootinfo, bootinfo, "");
904
905SYSCTL_INT(_machdep, CPU_WALLCLOCK, wall_cmos_clock,
906	CTLFLAG_RW, &wall_cmos_clock, 0, "");
907
908/*
909 * Initialize 386 and configure to run kernel
910 */
911
912/*
913 * Initialize segments & interrupt table
914 */
915
916int currentldt;
917int _default_ldt;
918union descriptor gdt[NGDT];		/* global descriptor table */
919struct gate_descriptor idt[NIDT];	/* interrupt descriptor table */
920union descriptor ldt[NLDT];		/* local descriptor table */
921
922static struct i386tss dblfault_tss;
923static char dblfault_stack[PAGE_SIZE];
924
925extern  struct user *proc0paddr;
926
927/* software prototypes -- in more palatable form */
928struct soft_segment_descriptor gdt_segs[] = {
929/* GNULL_SEL	0 Null Descriptor */
930{	0x0,			/* segment base address  */
931	0x0,			/* length */
932	0,			/* segment type */
933	0,			/* segment descriptor priority level */
934	0,			/* segment descriptor present */
935	0, 0,
936	0,			/* default 32 vs 16 bit size */
937	0  			/* limit granularity (byte/page units)*/ },
938/* GCODE_SEL	1 Code Descriptor for kernel */
939{	0x0,			/* segment base address  */
940	0xfffff,		/* length - all address space */
941	SDT_MEMERA,		/* segment type */
942	0,			/* segment descriptor priority level */
943	1,			/* segment descriptor present */
944	0, 0,
945	1,			/* default 32 vs 16 bit size */
946	1  			/* limit granularity (byte/page units)*/ },
947/* GDATA_SEL	2 Data Descriptor for kernel */
948{	0x0,			/* segment base address  */
949	0xfffff,		/* length - all address space */
950	SDT_MEMRWA,		/* segment type */
951	0,			/* segment descriptor priority level */
952	1,			/* segment descriptor present */
953	0, 0,
954	1,			/* default 32 vs 16 bit size */
955	1  			/* limit granularity (byte/page units)*/ },
956/* GLDT_SEL	3 LDT Descriptor */
957{	(int) ldt,		/* segment base address  */
958	sizeof(ldt)-1,		/* length - all address space */
959	SDT_SYSLDT,		/* segment type */
960	0,			/* segment descriptor priority level */
961	1,			/* segment descriptor present */
962	0, 0,
963	0,			/* unused - default 32 vs 16 bit size */
964	0  			/* limit granularity (byte/page units)*/ },
965/* GTGATE_SEL	4 Null Descriptor - Placeholder */
966{	0x0,			/* segment base address  */
967	0x0,			/* length - all address space */
968	0,			/* segment type */
969	0,			/* segment descriptor priority level */
970	0,			/* segment descriptor present */
971	0, 0,
972	0,			/* default 32 vs 16 bit size */
973	0  			/* limit granularity (byte/page units)*/ },
974/* GPANIC_SEL	5 Panic Tss Descriptor */
975{	(int) &dblfault_tss,	/* segment base address  */
976	sizeof(struct i386tss)-1,/* length - all address space */
977	SDT_SYS386TSS,		/* segment type */
978	0,			/* segment descriptor priority level */
979	1,			/* segment descriptor present */
980	0, 0,
981	0,			/* unused - default 32 vs 16 bit size */
982	0  			/* limit granularity (byte/page units)*/ },
983/* GPROC0_SEL	6 Proc 0 Tss Descriptor */
984{	(int) kstack,		/* segment base address  */
985	sizeof(struct i386tss)-1,/* length - all address space */
986	SDT_SYS386TSS,		/* segment type */
987	0,			/* segment descriptor priority level */
988	1,			/* segment descriptor present */
989	0, 0,
990	0,			/* unused - default 32 vs 16 bit size */
991	0  			/* limit granularity (byte/page units)*/ },
992/* GUSERLDT_SEL	7 User LDT Descriptor per process */
993{	(int) ldt,		/* segment base address  */
994	(512 * sizeof(union descriptor)-1),		/* length */
995	SDT_SYSLDT,		/* segment type */
996	0,			/* segment descriptor priority level */
997	1,			/* segment descriptor present */
998	0, 0,
999	0,			/* unused - default 32 vs 16 bit size */
1000	0  			/* limit granularity (byte/page units)*/ },
1001/* GAPMCODE32_SEL 8 APM BIOS 32-bit interface (32bit Code) */
1002{	0,			/* segment base address (overwritten by APM)  */
1003	0xfffff,		/* length */
1004	SDT_MEMERA,		/* segment type */
1005	0,			/* segment descriptor priority level */
1006	1,			/* segment descriptor present */
1007	0, 0,
1008	1,			/* default 32 vs 16 bit size */
1009	1  			/* limit granularity (byte/page units)*/ },
1010/* GAPMCODE16_SEL 9 APM BIOS 32-bit interface (16bit Code) */
1011{	0,			/* segment base address (overwritten by APM)  */
1012	0xfffff,		/* length */
1013	SDT_MEMERA,		/* segment type */
1014	0,			/* segment descriptor priority level */
1015	1,			/* segment descriptor present */
1016	0, 0,
1017	0,			/* default 32 vs 16 bit size */
1018	1  			/* limit granularity (byte/page units)*/ },
1019/* GAPMDATA_SEL	10 APM BIOS 32-bit interface (Data) */
1020{	0,			/* segment base address (overwritten by APM) */
1021	0xfffff,		/* length */
1022	SDT_MEMRWA,		/* segment type */
1023	0,			/* segment descriptor priority level */
1024	1,			/* segment descriptor present */
1025	0, 0,
1026	1,			/* default 32 vs 16 bit size */
1027	1  			/* limit granularity (byte/page units)*/ },
1028};
1029
1030static struct soft_segment_descriptor ldt_segs[] = {
1031	/* Null Descriptor - overwritten by call gate */
1032{	0x0,			/* segment base address  */
1033	0x0,			/* length - all address space */
1034	0,			/* segment type */
1035	0,			/* segment descriptor priority level */
1036	0,			/* segment descriptor present */
1037	0, 0,
1038	0,			/* default 32 vs 16 bit size */
1039	0  			/* limit granularity (byte/page units)*/ },
1040	/* Null Descriptor - overwritten by call gate */
1041{	0x0,			/* segment base address  */
1042	0x0,			/* length - all address space */
1043	0,			/* segment type */
1044	0,			/* segment descriptor priority level */
1045	0,			/* segment descriptor present */
1046	0, 0,
1047	0,			/* default 32 vs 16 bit size */
1048	0  			/* limit granularity (byte/page units)*/ },
1049	/* Null Descriptor - overwritten by call gate */
1050{	0x0,			/* segment base address  */
1051	0x0,			/* length - all address space */
1052	0,			/* segment type */
1053	0,			/* segment descriptor priority level */
1054	0,			/* segment descriptor present */
1055	0, 0,
1056	0,			/* default 32 vs 16 bit size */
1057	0  			/* limit granularity (byte/page units)*/ },
1058	/* Code Descriptor for user */
1059{	0x0,			/* segment base address  */
1060	0xfffff,		/* length - all address space */
1061	SDT_MEMERA,		/* segment type */
1062	SEL_UPL,		/* segment descriptor priority level */
1063	1,			/* segment descriptor present */
1064	0, 0,
1065	1,			/* default 32 vs 16 bit size */
1066	1  			/* limit granularity (byte/page units)*/ },
1067	/* Data Descriptor for user */
1068{	0x0,			/* segment base address  */
1069	0xfffff,		/* length - all address space */
1070	SDT_MEMRWA,		/* segment type */
1071	SEL_UPL,		/* segment descriptor priority level */
1072	1,			/* segment descriptor present */
1073	0, 0,
1074	1,			/* default 32 vs 16 bit size */
1075	1  			/* limit granularity (byte/page units)*/ },
1076};
1077
1078void
1079setidt(idx, func, typ, dpl, selec)
1080	int idx;
1081	inthand_t *func;
1082	int typ;
1083	int dpl;
1084	int selec;
1085{
1086	struct gate_descriptor *ip = idt + idx;
1087
1088	ip->gd_looffset = (int)func;
1089	ip->gd_selector = selec;
1090	ip->gd_stkcpy = 0;
1091	ip->gd_xx = 0;
1092	ip->gd_type = typ;
1093	ip->gd_dpl = dpl;
1094	ip->gd_p = 1;
1095	ip->gd_hioffset = ((int)func)>>16 ;
1096}
1097
1098#define	IDTVEC(name)	__CONCAT(X,name)
1099
1100extern inthand_t
1101	IDTVEC(div), IDTVEC(dbg), IDTVEC(nmi), IDTVEC(bpt), IDTVEC(ofl),
1102	IDTVEC(bnd), IDTVEC(ill), IDTVEC(dna), IDTVEC(fpusegm),
1103	IDTVEC(tss), IDTVEC(missing), IDTVEC(stk), IDTVEC(prot),
1104	IDTVEC(page), IDTVEC(rsvd), IDTVEC(fpu), IDTVEC(align),
1105	IDTVEC(syscall), IDTVEC(int0x80_syscall);
1106
1107void
1108sdtossd(sd, ssd)
1109	struct segment_descriptor *sd;
1110	struct soft_segment_descriptor *ssd;
1111{
1112	ssd->ssd_base  = (sd->sd_hibase << 24) | sd->sd_lobase;
1113	ssd->ssd_limit = (sd->sd_hilimit << 16) | sd->sd_lolimit;
1114	ssd->ssd_type  = sd->sd_type;
1115	ssd->ssd_dpl   = sd->sd_dpl;
1116	ssd->ssd_p     = sd->sd_p;
1117	ssd->ssd_def32 = sd->sd_def32;
1118	ssd->ssd_gran  = sd->sd_gran;
1119}
1120
1121void
1122init386(first)
1123	int first;
1124{
1125	int x;
1126	unsigned biosbasemem, biosextmem;
1127	struct gate_descriptor *gdp;
1128	int gsel_tss;
1129	/* table descriptors - used to load tables by microp */
1130	struct region_descriptor r_gdt, r_idt;
1131	int	pagesinbase, pagesinext;
1132	int	target_page, pa_indx;
1133
1134	proc0.p_addr = proc0paddr;
1135
1136	atdevbase = ISA_HOLE_START + KERNBASE;
1137
1138	/*
1139	 * Initialize the console before we print anything out.
1140	 */
1141	cninit();
1142
1143	/*
1144	 * make gdt memory segments, the code segment goes up to end of the
1145	 * page with etext in it, the data segment goes to the end of
1146	 * the address space
1147	 */
1148	/*
1149	 * XXX text protection is temporarily (?) disabled.  The limit was
1150	 * i386_btop(round_page(etext)) - 1.
1151	 */
1152	gdt_segs[GCODE_SEL].ssd_limit = i386_btop(0) - 1;
1153	gdt_segs[GDATA_SEL].ssd_limit = i386_btop(0) - 1;
1154	for (x = 0; x < NGDT; x++)
1155		ssdtosd(&gdt_segs[x], &gdt[x].sd);
1156
1157	/* make ldt memory segments */
1158	/*
1159	 * The data segment limit must not cover the user area because we
1160	 * don't want the user area to be writable in copyout() etc. (page
1161	 * level protection is lost in kernel mode on 386's).  Also, we
1162	 * don't want the user area to be writable directly (page level
1163	 * protection of the user area is not available on 486's with
1164	 * CR0_WP set, because there is no user-read/kernel-write mode).
1165	 *
1166	 * XXX - VM_MAXUSER_ADDRESS is an end address, not a max.  And it
1167	 * should be spelled ...MAX_USER...
1168	 */
1169#define VM_END_USER_RW_ADDRESS	VM_MAXUSER_ADDRESS
1170	/*
1171	 * The code segment limit has to cover the user area until we move
1172	 * the signal trampoline out of the user area.  This is safe because
1173	 * the code segment cannot be written to directly.
1174	 */
1175#define VM_END_USER_R_ADDRESS	(VM_END_USER_RW_ADDRESS + UPAGES * PAGE_SIZE)
1176	ldt_segs[LUCODE_SEL].ssd_limit = i386_btop(VM_END_USER_R_ADDRESS) - 1;
1177	ldt_segs[LUDATA_SEL].ssd_limit = i386_btop(VM_END_USER_RW_ADDRESS) - 1;
1178	/* Note. eventually want private ldts per process */
1179	for (x = 0; x < NLDT; x++)
1180		ssdtosd(&ldt_segs[x], &ldt[x].sd);
1181
1182	/* exceptions */
1183	for (x = 0; x < NIDT; x++)
1184		setidt(x, &IDTVEC(rsvd), SDT_SYS386TGT, SEL_KPL, GSEL(GCODE_SEL, SEL_KPL));
1185	setidt(0, &IDTVEC(div),  SDT_SYS386TGT, SEL_KPL, GSEL(GCODE_SEL, SEL_KPL));
1186	setidt(1, &IDTVEC(dbg),  SDT_SYS386TGT, SEL_KPL, GSEL(GCODE_SEL, SEL_KPL));
1187	setidt(2, &IDTVEC(nmi),  SDT_SYS386TGT, SEL_KPL, GSEL(GCODE_SEL, SEL_KPL));
1188 	setidt(3, &IDTVEC(bpt),  SDT_SYS386TGT, SEL_UPL, GSEL(GCODE_SEL, SEL_KPL));
1189	setidt(4, &IDTVEC(ofl),  SDT_SYS386TGT, SEL_UPL, GSEL(GCODE_SEL, SEL_KPL));
1190	setidt(5, &IDTVEC(bnd),  SDT_SYS386TGT, SEL_KPL, GSEL(GCODE_SEL, SEL_KPL));
1191	setidt(6, &IDTVEC(ill),  SDT_SYS386TGT, SEL_KPL, GSEL(GCODE_SEL, SEL_KPL));
1192	setidt(7, &IDTVEC(dna),  SDT_SYS386TGT, SEL_KPL, GSEL(GCODE_SEL, SEL_KPL));
1193	setidt(8, 0,  SDT_SYSTASKGT, SEL_KPL, GSEL(GPANIC_SEL, SEL_KPL));
1194	setidt(9, &IDTVEC(fpusegm),  SDT_SYS386TGT, SEL_KPL, GSEL(GCODE_SEL, SEL_KPL));
1195	setidt(10, &IDTVEC(tss),  SDT_SYS386TGT, SEL_KPL, GSEL(GCODE_SEL, SEL_KPL));
1196	setidt(11, &IDTVEC(missing),  SDT_SYS386TGT, SEL_KPL, GSEL(GCODE_SEL, SEL_KPL));
1197	setidt(12, &IDTVEC(stk),  SDT_SYS386TGT, SEL_KPL, GSEL(GCODE_SEL, SEL_KPL));
1198	setidt(13, &IDTVEC(prot),  SDT_SYS386TGT, SEL_KPL, GSEL(GCODE_SEL, SEL_KPL));
1199	setidt(14, &IDTVEC(page),  SDT_SYS386TGT, SEL_KPL, GSEL(GCODE_SEL, SEL_KPL));
1200	setidt(15, &IDTVEC(rsvd),  SDT_SYS386TGT, SEL_KPL, GSEL(GCODE_SEL, SEL_KPL));
1201	setidt(16, &IDTVEC(fpu),  SDT_SYS386TGT, SEL_KPL, GSEL(GCODE_SEL, SEL_KPL));
1202	setidt(17, &IDTVEC(align), SDT_SYS386TGT, SEL_KPL, GSEL(GCODE_SEL, SEL_KPL));
1203 	setidt(0x80, &IDTVEC(int0x80_syscall),
1204			SDT_SYS386TGT, SEL_UPL, GSEL(GCODE_SEL, SEL_KPL));
1205
1206#include	"isa.h"
1207#if	NISA >0
1208	isa_defaultirq();
1209#endif
1210	rand_initialize();
1211
1212	r_gdt.rd_limit = sizeof(gdt) - 1;
1213	r_gdt.rd_base =  (int) gdt;
1214	lgdt(&r_gdt);
1215
1216	r_idt.rd_limit = sizeof(idt) - 1;
1217	r_idt.rd_base = (int) idt;
1218	lidt(&r_idt);
1219
1220	_default_ldt = GSEL(GLDT_SEL, SEL_KPL);
1221	lldt(_default_ldt);
1222	currentldt = _default_ldt;
1223
1224#ifdef DDB
1225	kdb_init();
1226	if (boothowto & RB_KDB)
1227		Debugger("Boot flags requested debugger");
1228#endif
1229
1230	/* Use BIOS values stored in RTC CMOS RAM, since probing
1231	 * breaks certain 386 AT relics.
1232	 */
1233	biosbasemem = rtcin(RTC_BASELO)+ (rtcin(RTC_BASEHI)<<8);
1234	biosextmem = rtcin(RTC_EXTLO)+ (rtcin(RTC_EXTHI)<<8);
1235
1236	/*
1237	 * Print a warning if the official BIOS interface disagrees
1238	 * with the hackish interface used above.  Eventually only
1239	 * the official interface should be used.
1240	 */
1241	if (bootinfo.bi_memsizes_valid) {
1242		if (bootinfo.bi_basemem != biosbasemem)
1243			printf("BIOS basemem (%ldK) != RTC basemem (%dK)\n",
1244			       bootinfo.bi_basemem, biosbasemem);
1245		if (bootinfo.bi_extmem != biosextmem)
1246			printf("BIOS extmem (%ldK) != RTC extmem (%dK)\n",
1247			       bootinfo.bi_extmem, biosextmem);
1248	}
1249
1250	/*
1251	 * If BIOS tells us that it has more than 640k in the basemem,
1252	 *	don't believe it - set it to 640k.
1253	 */
1254	if (biosbasemem > 640)
1255		biosbasemem = 640;
1256
1257	/*
1258	 * Some 386 machines might give us a bogus number for extended
1259	 *	mem. If this happens, stop now.
1260	 */
1261#ifndef LARGEMEM
1262	if (biosextmem > 65536) {
1263		panic("extended memory beyond limit of 64MB");
1264		/* NOTREACHED */
1265	}
1266#endif
1267
1268	pagesinbase = biosbasemem * 1024 / PAGE_SIZE;
1269	pagesinext = biosextmem * 1024 / PAGE_SIZE;
1270
1271	/*
1272	 * Special hack for chipsets that still remap the 384k hole when
1273	 *	there's 16MB of memory - this really confuses people that
1274	 *	are trying to use bus mastering ISA controllers with the
1275	 *	"16MB limit"; they only have 16MB, but the remapping puts
1276	 *	them beyond the limit.
1277	 */
1278	/*
1279	 * If extended memory is between 15-16MB (16-17MB phys address range),
1280	 *	chop it to 15MB.
1281	 */
1282	if ((pagesinext > 3840) && (pagesinext < 4096))
1283		pagesinext = 3840;
1284
1285	/*
1286	 * Maxmem isn't the "maximum memory", it's one larger than the
1287	 * highest page of the physical address space.  It should be
1288	 * called something like "Maxphyspage".
1289	 */
1290	Maxmem = pagesinext + 0x100000/PAGE_SIZE;
1291
1292#ifdef MAXMEM
1293	Maxmem = MAXMEM/4;
1294#endif
1295
1296	/* call pmap initialization to make new kernel address space */
1297	pmap_bootstrap (first, 0);
1298
1299	/*
1300	 * Size up each available chunk of physical memory.
1301	 */
1302
1303	/*
1304	 * We currently don't bother testing base memory.
1305	 * XXX  ...but we probably should.
1306	 */
1307	pa_indx = 0;
1308	badpages = 0;
1309	if (pagesinbase > 1) {
1310		phys_avail[pa_indx++] = PAGE_SIZE;	/* skip first page of memory */
1311		phys_avail[pa_indx] = ptoa(pagesinbase);/* memory up to the ISA hole */
1312		physmem = pagesinbase - 1;
1313	} else {
1314		/* point at first chunk end */
1315		pa_indx++;
1316	}
1317
1318	for (target_page = avail_start; target_page < ptoa(Maxmem); target_page += PAGE_SIZE) {
1319		int tmp, page_bad = FALSE;
1320
1321		/*
1322		 * map page into kernel: valid, read/write, non-cacheable
1323		 */
1324		*(int *)CMAP1 = PG_V | PG_RW | PG_N | target_page;
1325		pmap_update();
1326
1327		tmp = *(int *)CADDR1;
1328		/*
1329		 * Test for alternating 1's and 0's
1330		 */
1331		*(volatile int *)CADDR1 = 0xaaaaaaaa;
1332		if (*(volatile int *)CADDR1 != 0xaaaaaaaa) {
1333			page_bad = TRUE;
1334		}
1335		/*
1336		 * Test for alternating 0's and 1's
1337		 */
1338		*(volatile int *)CADDR1 = 0x55555555;
1339		if (*(volatile int *)CADDR1 != 0x55555555) {
1340			page_bad = TRUE;
1341		}
1342		/*
1343		 * Test for all 1's
1344		 */
1345		*(volatile int *)CADDR1 = 0xffffffff;
1346		if (*(volatile int *)CADDR1 != 0xffffffff) {
1347			page_bad = TRUE;
1348		}
1349		/*
1350		 * Test for all 0's
1351		 */
1352		*(volatile int *)CADDR1 = 0x0;
1353		if (*(volatile int *)CADDR1 != 0x0) {
1354			/*
1355			 * test of page failed
1356			 */
1357			page_bad = TRUE;
1358		}
1359		/*
1360		 * Restore original value.
1361		 */
1362		*(int *)CADDR1 = tmp;
1363
1364		/*
1365		 * Adjust array of valid/good pages.
1366		 */
1367		if (page_bad == FALSE) {
1368			/*
1369			 * If this good page is a continuation of the
1370			 * previous set of good pages, then just increase
1371			 * the end pointer. Otherwise start a new chunk.
1372			 * Note that "end" points one higher than end,
1373			 * making the range >= start and < end.
1374			 */
1375			if (phys_avail[pa_indx] == target_page) {
1376				phys_avail[pa_indx] += PAGE_SIZE;
1377			} else {
1378				pa_indx++;
1379				if (pa_indx == PHYS_AVAIL_ARRAY_END) {
1380					printf("Too many holes in the physical address space, giving up\n");
1381					pa_indx--;
1382					break;
1383				}
1384				phys_avail[pa_indx++] = target_page;	/* start */
1385				phys_avail[pa_indx] = target_page + PAGE_SIZE;	/* end */
1386			}
1387			physmem++;
1388		} else {
1389			badpages++;
1390			page_bad = FALSE;
1391		}
1392	}
1393
1394	*(int *)CMAP1 = 0;
1395	pmap_update();
1396
1397	/*
1398	 * XXX
1399	 * The last chunk must contain at least one page plus the message
1400	 * buffer to avoid complicating other code (message buffer address
1401	 * calculation, etc.).
1402	 */
1403	while (phys_avail[pa_indx - 1] + PAGE_SIZE +
1404	    round_page(sizeof(struct msgbuf)) >= phys_avail[pa_indx]) {
1405		physmem -= atop(phys_avail[pa_indx] - phys_avail[pa_indx - 1]);
1406		phys_avail[pa_indx--] = 0;
1407		phys_avail[pa_indx--] = 0;
1408	}
1409
1410	Maxmem = atop(phys_avail[pa_indx]);
1411
1412	/* Trim off space for the message buffer. */
1413	phys_avail[pa_indx] -= round_page(sizeof(struct msgbuf));
1414
1415	avail_end = phys_avail[pa_indx];
1416
1417	/* now running on new page tables, configured,and u/iom is accessible */
1418
1419	/* make a initial tss so microp can get interrupt stack on syscall! */
1420	proc0.p_addr->u_pcb.pcb_tss.tss_esp0 = (int) kstack + UPAGES*PAGE_SIZE;
1421	proc0.p_addr->u_pcb.pcb_tss.tss_ss0 = GSEL(GDATA_SEL, SEL_KPL) ;
1422	gsel_tss = GSEL(GPROC0_SEL, SEL_KPL);
1423
1424	dblfault_tss.tss_esp = dblfault_tss.tss_esp0 = dblfault_tss.tss_esp1 =
1425	    dblfault_tss.tss_esp2 = (int) &dblfault_stack[sizeof(dblfault_stack)];
1426	dblfault_tss.tss_ss = dblfault_tss.tss_ss0 = dblfault_tss.tss_ss1 =
1427	    dblfault_tss.tss_ss2 = GSEL(GDATA_SEL, SEL_KPL);
1428	dblfault_tss.tss_cr3 = IdlePTD;
1429	dblfault_tss.tss_eip = (int) dblfault_handler;
1430	dblfault_tss.tss_eflags = PSL_KERNEL;
1431	dblfault_tss.tss_ds = dblfault_tss.tss_es = dblfault_tss.tss_fs = dblfault_tss.tss_gs =
1432		GSEL(GDATA_SEL, SEL_KPL);
1433	dblfault_tss.tss_cs = GSEL(GCODE_SEL, SEL_KPL);
1434	dblfault_tss.tss_ldt = GSEL(GLDT_SEL, SEL_KPL);
1435
1436	((struct i386tss *)gdt_segs[GPROC0_SEL].ssd_base)->tss_ioopt =
1437		(sizeof(struct i386tss))<<16;
1438
1439	ltr(gsel_tss);
1440
1441	/* make a call gate to reenter kernel with */
1442	gdp = &ldt[LSYS5CALLS_SEL].gd;
1443
1444	x = (int) &IDTVEC(syscall);
1445	gdp->gd_looffset = x++;
1446	gdp->gd_selector = GSEL(GCODE_SEL,SEL_KPL);
1447	gdp->gd_stkcpy = 1;
1448	gdp->gd_type = SDT_SYS386CGT;
1449	gdp->gd_dpl = SEL_UPL;
1450	gdp->gd_p = 1;
1451	gdp->gd_hioffset = ((int) &IDTVEC(syscall)) >>16;
1452
1453	/* transfer to user mode */
1454
1455	_ucodesel = LSEL(LUCODE_SEL, SEL_UPL);
1456	_udatasel = LSEL(LUDATA_SEL, SEL_UPL);
1457
1458	/* setup proc 0's pcb */
1459	proc0.p_addr->u_pcb.pcb_flags = 0;
1460	proc0.p_addr->u_pcb.pcb_cr3 = IdlePTD;
1461}
1462
1463/*
1464 * The registers are in the frame; the frame is in the user area of
1465 * the process in question; when the process is active, the registers
1466 * are in "the kernel stack"; when it's not, they're still there, but
1467 * things get flipped around.  So, since p->p_md.md_regs is the whole address
1468 * of the register set, take its offset from the kernel stack, and
1469 * index into the user block.  Don't you just *love* virtual memory?
1470 * (I'm starting to think seymour is right...)
1471 */
1472#define	TF_REGP(p)	((struct trapframe *) \
1473			 ((char *)(p)->p_addr \
1474			  + ((char *)(p)->p_md.md_regs - kstack)))
1475
1476int
1477ptrace_set_pc(p, addr)
1478	struct proc *p;
1479	unsigned int addr;
1480{
1481	TF_REGP(p)->tf_eip = addr;
1482	return (0);
1483}
1484
1485int
1486ptrace_single_step(p)
1487	struct proc *p;
1488{
1489	TF_REGP(p)->tf_eflags |= PSL_T;
1490	return (0);
1491}
1492
1493int ptrace_write_u(p, off, data)
1494	struct proc *p;
1495	vm_offset_t off;
1496	int data;
1497{
1498	struct trapframe frame_copy;
1499	vm_offset_t min;
1500	struct trapframe *tp;
1501
1502	/*
1503	 * Privileged kernel state is scattered all over the user area.
1504	 * Only allow write access to parts of regs and to fpregs.
1505	 */
1506	min = (char *)p->p_md.md_regs - kstack;
1507	if (off >= min && off <= min + sizeof(struct trapframe) - sizeof(int)) {
1508		tp = TF_REGP(p);
1509		frame_copy = *tp;
1510		*(int *)((char *)&frame_copy + (off - min)) = data;
1511		if (!EFLAGS_SECURE(frame_copy.tf_eflags, tp->tf_eflags) ||
1512		    !CS_SECURE(frame_copy.tf_cs))
1513			return (EINVAL);
1514		*(int*)((char *)p->p_addr + off) = data;
1515		return (0);
1516	}
1517	min = offsetof(struct user, u_pcb) + offsetof(struct pcb, pcb_savefpu);
1518	if (off >= min && off <= min + sizeof(struct save87) - sizeof(int)) {
1519		*(int*)((char *)p->p_addr + off) = data;
1520		return (0);
1521	}
1522	return (EFAULT);
1523}
1524
1525int
1526fill_regs(p, regs)
1527	struct proc *p;
1528	struct reg *regs;
1529{
1530	struct trapframe *tp;
1531
1532	tp = TF_REGP(p);
1533	regs->r_es = tp->tf_es;
1534	regs->r_ds = tp->tf_ds;
1535	regs->r_edi = tp->tf_edi;
1536	regs->r_esi = tp->tf_esi;
1537	regs->r_ebp = tp->tf_ebp;
1538	regs->r_ebx = tp->tf_ebx;
1539	regs->r_edx = tp->tf_edx;
1540	regs->r_ecx = tp->tf_ecx;
1541	regs->r_eax = tp->tf_eax;
1542	regs->r_eip = tp->tf_eip;
1543	regs->r_cs = tp->tf_cs;
1544	regs->r_eflags = tp->tf_eflags;
1545	regs->r_esp = tp->tf_esp;
1546	regs->r_ss = tp->tf_ss;
1547	return (0);
1548}
1549
1550int
1551set_regs(p, regs)
1552	struct proc *p;
1553	struct reg *regs;
1554{
1555	struct trapframe *tp;
1556
1557	tp = TF_REGP(p);
1558	if (!EFLAGS_SECURE(regs->r_eflags, tp->tf_eflags) ||
1559	    !CS_SECURE(regs->r_cs))
1560		return (EINVAL);
1561	tp->tf_es = regs->r_es;
1562	tp->tf_ds = regs->r_ds;
1563	tp->tf_edi = regs->r_edi;
1564	tp->tf_esi = regs->r_esi;
1565	tp->tf_ebp = regs->r_ebp;
1566	tp->tf_ebx = regs->r_ebx;
1567	tp->tf_edx = regs->r_edx;
1568	tp->tf_ecx = regs->r_ecx;
1569	tp->tf_eax = regs->r_eax;
1570	tp->tf_eip = regs->r_eip;
1571	tp->tf_cs = regs->r_cs;
1572	tp->tf_eflags = regs->r_eflags;
1573	tp->tf_esp = regs->r_esp;
1574	tp->tf_ss = regs->r_ss;
1575	return (0);
1576}
1577
1578#ifndef DDB
1579void
1580Debugger(const char *msg)
1581{
1582	printf("Debugger(\"%s\") called.\n", msg);
1583}
1584#endif /* no DDB */
1585
1586#include <sys/disklabel.h>
1587#define b_cylin	b_resid
1588/*
1589 * Determine the size of the transfer, and make sure it is
1590 * within the boundaries of the partition. Adjust transfer
1591 * if needed, and signal errors or early completion.
1592 */
1593int
1594bounds_check_with_label(struct buf *bp, struct disklabel *lp, int wlabel)
1595{
1596        struct partition *p = lp->d_partitions + dkpart(bp->b_dev);
1597        int labelsect = lp->d_partitions[0].p_offset;
1598        int maxsz = p->p_size,
1599                sz = (bp->b_bcount + DEV_BSIZE - 1) >> DEV_BSHIFT;
1600
1601        /* overwriting disk label ? */
1602        /* XXX should also protect bootstrap in first 8K */
1603        if (bp->b_blkno + p->p_offset <= LABELSECTOR + labelsect &&
1604#if LABELSECTOR != 0
1605            bp->b_blkno + p->p_offset + sz > LABELSECTOR + labelsect &&
1606#endif
1607            (bp->b_flags & B_READ) == 0 && wlabel == 0) {
1608                bp->b_error = EROFS;
1609                goto bad;
1610        }
1611
1612#if     defined(DOSBBSECTOR) && defined(notyet)
1613        /* overwriting master boot record? */
1614        if (bp->b_blkno + p->p_offset <= DOSBBSECTOR &&
1615            (bp->b_flags & B_READ) == 0 && wlabel == 0) {
1616                bp->b_error = EROFS;
1617                goto bad;
1618        }
1619#endif
1620
1621        /* beyond partition? */
1622        if (bp->b_blkno < 0 || bp->b_blkno + sz > maxsz) {
1623                /* if exactly at end of disk, return an EOF */
1624                if (bp->b_blkno == maxsz) {
1625                        bp->b_resid = bp->b_bcount;
1626                        return(0);
1627                }
1628                /* or truncate if part of it fits */
1629                sz = maxsz - bp->b_blkno;
1630                if (sz <= 0) {
1631                        bp->b_error = EINVAL;
1632                        goto bad;
1633                }
1634                bp->b_bcount = sz << DEV_BSHIFT;
1635        }
1636
1637        /* calculate cylinder for disksort to order transfers with */
1638        bp->b_pblkno = bp->b_blkno + p->p_offset;
1639        bp->b_cylin = bp->b_pblkno / lp->d_secpercyl;
1640        return(1);
1641
1642bad:
1643        bp->b_flags |= B_ERROR;
1644        return(-1);
1645}
1646
1647int
1648disk_externalize(int drive, struct sysctl_req *req)
1649{
1650	return SYSCTL_OUT(req, &drive, sizeof drive);
1651}
1652