machdep.c revision 18232
1/*-
2 * Copyright (c) 1992 Terrence R. Lambert.
3 * Copyright (c) 1982, 1987, 1990 The Regents of the University of California.
4 * All rights reserved.
5 *
6 * This code is derived from software contributed to Berkeley by
7 * William Jolitz.
8 *
9 * Redistribution and use in source and binary forms, with or without
10 * modification, are permitted provided that the following conditions
11 * are met:
12 * 1. Redistributions of source code must retain the above copyright
13 *    notice, this list of conditions and the following disclaimer.
14 * 2. Redistributions in binary form must reproduce the above copyright
15 *    notice, this list of conditions and the following disclaimer in the
16 *    documentation and/or other materials provided with the distribution.
17 * 3. All advertising materials mentioning features or use of this software
18 *    must display the following acknowledgement:
19 *	This product includes software developed by the University of
20 *	California, Berkeley and its contributors.
21 * 4. Neither the name of the University nor the names of its contributors
22 *    may be used to endorse or promote products derived from this software
23 *    without specific prior written permission.
24 *
25 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
26 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
27 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
28 * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
29 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
30 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
31 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
32 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
33 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
34 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
35 * SUCH DAMAGE.
36 *
37 *	from: @(#)machdep.c	7.4 (Berkeley) 6/3/91
38 *	$Id: machdep.c,v 1.202 1996/09/06 23:07:03 phk Exp $
39 */
40
41#include "npx.h"
42#include "opt_sysvipc.h"
43#include "opt_ddb.h"
44#include "opt_bounce.h"
45#include "opt_machdep.h"
46#include "opt_perfmon.h"
47
48#include <sys/param.h>
49#include <sys/systm.h>
50#include <sys/sysproto.h>
51#include <sys/signalvar.h>
52#include <sys/kernel.h>
53#include <sys/proc.h>
54#include <sys/buf.h>
55#include <sys/reboot.h>
56#include <sys/conf.h>
57#include <sys/file.h>
58#include <sys/callout.h>
59#include <sys/malloc.h>
60#include <sys/mbuf.h>
61#include <sys/mount.h>
62#include <sys/msgbuf.h>
63#include <sys/ioctl.h>
64#include <sys/sysent.h>
65#include <sys/tty.h>
66#include <sys/sysctl.h>
67#include <sys/vmmeter.h>
68
69#ifdef SYSVSHM
70#include <sys/shm.h>
71#endif
72
73#ifdef SYSVMSG
74#include <sys/msg.h>
75#endif
76
77#ifdef SYSVSEM
78#include <sys/sem.h>
79#endif
80
81#include <vm/vm.h>
82#include <vm/vm_param.h>
83#include <vm/vm_prot.h>
84#include <vm/lock.h>
85#include <vm/vm_kern.h>
86#include <vm/vm_object.h>
87#include <vm/vm_page.h>
88#include <vm/vm_map.h>
89#include <vm/vm_pager.h>
90#include <vm/vm_extern.h>
91
92#include <sys/user.h>
93#include <sys/exec.h>
94#include <sys/vnode.h>
95
96#include <ddb/ddb.h>
97
98#include <net/netisr.h>
99
100#include <machine/cpu.h>
101#include <machine/npx.h>
102#include <machine/reg.h>
103#include <machine/psl.h>
104#include <machine/clock.h>
105#include <machine/specialreg.h>
106#include <machine/sysarch.h>
107#include <machine/cons.h>
108#include <machine/bootinfo.h>
109#include <machine/md_var.h>
110#ifdef PERFMON
111#include <machine/perfmon.h>
112#endif
113
114#include <i386/isa/isa_device.h>
115#include <i386/isa/rtc.h>
116#include <machine/random.h>
117
118extern void init386 __P((int first));
119extern int ptrace_set_pc __P((struct proc *p, unsigned int addr));
120extern int ptrace_single_step __P((struct proc *p));
121extern int ptrace_write_u __P((struct proc *p, vm_offset_t off, int data));
122extern void dblfault_handler __P((void));
123
124extern void identifycpu(void);	/* XXX header file */
125extern void earlysetcpuclass(void);	/* same header file */
126
127static void cpu_startup __P((void *));
128SYSINIT(cpu, SI_SUB_CPU, SI_ORDER_FIRST, cpu_startup, NULL)
129
130
131#ifdef BOUNCE_BUFFERS
132extern char *bouncememory;
133extern int maxbkva;
134#ifdef BOUNCEPAGES
135int	bouncepages = BOUNCEPAGES;
136#else
137int	bouncepages = 0;
138#endif
139#endif	/* BOUNCE_BUFFERS */
140
141extern int freebufspace;
142int	msgbufmapped = 0;		/* set when safe to use msgbuf */
143int _udatasel, _ucodesel;
144u_int	atdevbase;
145
146
147int physmem = 0;
148int cold = 1;
149
150static int
151sysctl_hw_physmem SYSCTL_HANDLER_ARGS
152{
153	int error = sysctl_handle_int(oidp, 0, ctob(physmem), req);
154	return (error);
155}
156
157SYSCTL_PROC(_hw, HW_PHYSMEM, physmem, CTLTYPE_INT|CTLFLAG_RD,
158	0, 0, sysctl_hw_physmem, "I", "");
159
160static int
161sysctl_hw_usermem SYSCTL_HANDLER_ARGS
162{
163	int error = sysctl_handle_int(oidp, 0,
164		ctob(physmem - cnt.v_wire_count), req);
165	return (error);
166}
167
168SYSCTL_PROC(_hw, HW_USERMEM, usermem, CTLTYPE_INT|CTLFLAG_RD,
169	0, 0, sysctl_hw_usermem, "I", "");
170
171int boothowto = 0, bootverbose = 0, Maxmem = 0;
172static int	badpages = 0;
173long dumplo;
174extern int bootdev;
175
176vm_offset_t phys_avail[10];
177
178/* must be 2 less so 0 0 can signal end of chunks */
179#define PHYS_AVAIL_ARRAY_END ((sizeof(phys_avail) / sizeof(vm_offset_t)) - 2)
180
181static void setup_netisrs __P((struct linker_set *)); /* XXX declare elsewhere */
182
183static vm_offset_t buffer_sva, buffer_eva;
184vm_offset_t clean_sva, clean_eva;
185static vm_offset_t pager_sva, pager_eva;
186extern struct linker_set netisr_set;
187
188#define offsetof(type, member)	((size_t)(&((type *)0)->member))
189
190static void
191cpu_startup(dummy)
192	void *dummy;
193{
194	register unsigned i;
195	register caddr_t v;
196	vm_offset_t maxaddr;
197	vm_size_t size = 0;
198	int firstaddr;
199	vm_offset_t minaddr;
200
201	if (boothowto & RB_VERBOSE)
202		bootverbose++;
203
204	/*
205	 * Initialize error message buffer (at end of core).
206	 */
207
208	/* avail_end was pre-decremented in init386() to compensate */
209	for (i = 0; i < btoc(sizeof (struct msgbuf)); i++)
210		pmap_enter(pmap_kernel(), (vm_offset_t)msgbufp,
211			   avail_end + i * PAGE_SIZE,
212			   VM_PROT_ALL, TRUE);
213	msgbufmapped = 1;
214
215	/*
216	 * Good {morning,afternoon,evening,night}.
217	 */
218	printf(version);
219	earlysetcpuclass();
220	startrtclock();
221	identifycpu();
222#ifdef PERFMON
223	perfmon_init();
224#endif
225	printf("real memory  = %d (%dK bytes)\n", ptoa(Maxmem), ptoa(Maxmem) / 1024);
226	/*
227	 * Display any holes after the first chunk of extended memory.
228	 */
229	if (badpages != 0) {
230		int indx = 1;
231
232		/*
233		 * XXX skip reporting ISA hole & unmanaged kernel memory
234		 */
235		if (phys_avail[0] == PAGE_SIZE)
236			indx += 2;
237
238		printf("Physical memory hole(s):\n");
239		for (; phys_avail[indx + 1] != 0; indx += 2) {
240			int size = phys_avail[indx + 1] - phys_avail[indx];
241
242			printf("0x%08lx - 0x%08lx, %d bytes (%d pages)\n", phys_avail[indx],
243			    phys_avail[indx + 1] - 1, size, size / PAGE_SIZE);
244		}
245	}
246
247	/*
248	 * Quickly wire in netisrs.
249	 */
250	setup_netisrs(&netisr_set);
251
252/*
253#ifdef ISDN
254	DONET(isdnintr, NETISR_ISDN);
255#endif
256*/
257
258	/*
259	 * Allocate space for system data structures.
260	 * The first available kernel virtual address is in "v".
261	 * As pages of kernel virtual memory are allocated, "v" is incremented.
262	 * As pages of memory are allocated and cleared,
263	 * "firstaddr" is incremented.
264	 * An index into the kernel page table corresponding to the
265	 * virtual memory address maintained in "v" is kept in "mapaddr".
266	 */
267
268	/*
269	 * Make two passes.  The first pass calculates how much memory is
270	 * needed and allocates it.  The second pass assigns virtual
271	 * addresses to the various data structures.
272	 */
273	firstaddr = 0;
274again:
275	v = (caddr_t)firstaddr;
276
277#define	valloc(name, type, num) \
278	    (name) = (type *)v; v = (caddr_t)((name)+(num))
279#define	valloclim(name, type, num, lim) \
280	    (name) = (type *)v; v = (caddr_t)((lim) = ((name)+(num)))
281	valloc(callout, struct callout, ncallout);
282#ifdef SYSVSHM
283	valloc(shmsegs, struct shmid_ds, shminfo.shmmni);
284#endif
285#ifdef SYSVSEM
286	valloc(sema, struct semid_ds, seminfo.semmni);
287	valloc(sem, struct sem, seminfo.semmns);
288	/* This is pretty disgusting! */
289	valloc(semu, int, (seminfo.semmnu * seminfo.semusz) / sizeof(int));
290#endif
291#ifdef SYSVMSG
292	valloc(msgpool, char, msginfo.msgmax);
293	valloc(msgmaps, struct msgmap, msginfo.msgseg);
294	valloc(msghdrs, struct msg, msginfo.msgtql);
295	valloc(msqids, struct msqid_ds, msginfo.msgmni);
296#endif
297
298	if (nbuf == 0) {
299		nbuf = 30;
300		if( physmem > 1024)
301			nbuf += min((physmem - 1024) / 12, 1024);
302	}
303	nswbuf = min(nbuf, 128);
304
305	valloc(swbuf, struct buf, nswbuf);
306	valloc(buf, struct buf, nbuf);
307
308#ifdef BOUNCE_BUFFERS
309	/*
310	 * If there is more than 16MB of memory, allocate some bounce buffers
311	 */
312	if (Maxmem > 4096) {
313		if (bouncepages == 0) {
314			bouncepages = 64;
315			bouncepages += ((Maxmem - 4096) / 2048) * 32;
316		}
317		v = (caddr_t)((vm_offset_t)round_page(v));
318		valloc(bouncememory, char, bouncepages * PAGE_SIZE);
319	}
320#endif
321
322	/*
323	 * End of first pass, size has been calculated so allocate memory
324	 */
325	if (firstaddr == 0) {
326		size = (vm_size_t)(v - firstaddr);
327		firstaddr = (int)kmem_alloc(kernel_map, round_page(size));
328		if (firstaddr == 0)
329			panic("startup: no room for tables");
330		goto again;
331	}
332
333	/*
334	 * End of second pass, addresses have been assigned
335	 */
336	if ((vm_size_t)(v - firstaddr) != size)
337		panic("startup: table size inconsistency");
338
339#ifdef BOUNCE_BUFFERS
340	clean_map = kmem_suballoc(kernel_map, &clean_sva, &clean_eva,
341			(nbuf*MAXBSIZE) + (nswbuf*MAXPHYS) +
342				maxbkva + pager_map_size, TRUE);
343	io_map = kmem_suballoc(clean_map, &minaddr, &maxaddr, maxbkva, FALSE);
344#else
345	clean_map = kmem_suballoc(kernel_map, &clean_sva, &clean_eva,
346			(nbuf*MAXBSIZE) + (nswbuf*MAXPHYS) + pager_map_size, TRUE);
347#endif
348	buffer_map = kmem_suballoc(clean_map, &buffer_sva, &buffer_eva,
349				(nbuf*MAXBSIZE), TRUE);
350	pager_map = kmem_suballoc(clean_map, &pager_sva, &pager_eva,
351				(nswbuf*MAXPHYS) + pager_map_size, TRUE);
352	exec_map = kmem_suballoc(kernel_map, &minaddr, &maxaddr,
353				(16*ARG_MAX), TRUE);
354	exech_map = kmem_suballoc(kernel_map, &minaddr, &maxaddr,
355				(32*ARG_MAX), TRUE);
356	u_map = kmem_suballoc(kernel_map, &minaddr, &maxaddr,
357				(maxproc*UPAGES*PAGE_SIZE), FALSE);
358
359	/*
360	 * Finally, allocate mbuf pool.  Since mclrefcnt is an off-size
361	 * we use the more space efficient malloc in place of kmem_alloc.
362	 */
363	mclrefcnt = (char *)malloc(nmbclusters+PAGE_SIZE/MCLBYTES,
364				   M_MBUF, M_NOWAIT);
365	bzero(mclrefcnt, nmbclusters+PAGE_SIZE/MCLBYTES);
366	mcl_map = kmem_suballoc(kmem_map, (vm_offset_t *)&mbutl, &maxaddr,
367			       nmbclusters * MCLBYTES, FALSE);
368	{
369		vm_size_t mb_map_size;
370		mb_map_size = nmbufs * MSIZE;
371		mb_map = kmem_suballoc(kmem_map, &minaddr, &maxaddr,
372				       round_page(mb_map_size), FALSE);
373	}
374
375	/*
376	 * Initialize callouts
377	 */
378	callfree = callout;
379	for (i = 1; i < ncallout; i++)
380		callout[i-1].c_next = &callout[i];
381
382        if (boothowto & RB_CONFIG) {
383		userconfig();
384		cninit();	/* the preferred console may have changed */
385	}
386
387#ifdef BOUNCE_BUFFERS
388	/*
389	 * init bounce buffers
390	 */
391	vm_bounce_init();
392#endif
393
394	printf("avail memory = %d (%dK bytes)\n", ptoa(cnt.v_free_count),
395	    ptoa(cnt.v_free_count) / 1024);
396
397	/*
398	 * Set up buffers, so they can be used to read disk labels.
399	 */
400	bufinit();
401	vm_pager_bufferinit();
402
403	/*
404	 * In verbose mode, print out the BIOS's idea of the disk geometries.
405	 */
406	if (bootverbose) {
407		printf("BIOS Geometries:\n");
408		for (i = 0; i < N_BIOS_GEOM; i++) {
409			unsigned long bios_geom;
410			int max_cylinder, max_head, max_sector;
411
412			bios_geom = bootinfo.bi_bios_geom[i];
413
414			/*
415			 * XXX the bootstrap punts a 1200K floppy geometry
416			 * when the get-disk-geometry interrupt fails.  Skip
417			 * drives that have this geometry.
418			 */
419			if (bios_geom == 0x4f010f)
420				continue;
421
422			printf(" %x:%08lx ", i, bios_geom);
423			max_cylinder = bios_geom >> 16;
424			max_head = (bios_geom >> 8) & 0xff;
425			max_sector = bios_geom & 0xff;
426			printf(
427		"0..%d=%d cylinders, 0..%d=%d heads, 1..%d=%d sectors\n",
428			       max_cylinder, max_cylinder + 1,
429			       max_head, max_head + 1,
430			       max_sector, max_sector);
431		}
432		printf(" %d accounted for\n", bootinfo.bi_n_bios_used);
433	}
434}
435
436int
437register_netisr(num, handler)
438	int num;
439	netisr_t *handler;
440{
441
442	if (num < 0 || num >= (sizeof(netisrs)/sizeof(*netisrs)) ) {
443		printf("register_netisr: bad isr number: %d\n", num);
444		return (EINVAL);
445	}
446	netisrs[num] = handler;
447	return (0);
448}
449
450static void
451setup_netisrs(ls)
452	struct linker_set *ls;
453{
454	int i;
455	const struct netisrtab *nit;
456
457	for(i = 0; ls->ls_items[i]; i++) {
458		nit = (const struct netisrtab *)ls->ls_items[i];
459		register_netisr(nit->nit_num, nit->nit_isr);
460	}
461}
462
463/*
464 * Send an interrupt to process.
465 *
466 * Stack is set up to allow sigcode stored
467 * at top to call routine, followed by kcall
468 * to sigreturn routine below.  After sigreturn
469 * resets the signal mask, the stack, and the
470 * frame pointer, it returns to the user
471 * specified pc, psl.
472 */
473void
474sendsig(catcher, sig, mask, code)
475	sig_t catcher;
476	int sig, mask;
477	u_long code;
478{
479	register struct proc *p = curproc;
480	register int *regs;
481	register struct sigframe *fp;
482	struct sigframe sf;
483	struct sigacts *psp = p->p_sigacts;
484	int oonstack;
485
486	regs = p->p_md.md_regs;
487        oonstack = psp->ps_sigstk.ss_flags & SS_ONSTACK;
488	/*
489	 * Allocate and validate space for the signal handler context.
490	 */
491        if ((psp->ps_flags & SAS_ALTSTACK) && !oonstack &&
492	    (psp->ps_sigonstack & sigmask(sig))) {
493		fp = (struct sigframe *)(psp->ps_sigstk.ss_sp +
494		    psp->ps_sigstk.ss_size - sizeof(struct sigframe));
495		psp->ps_sigstk.ss_flags |= SS_ONSTACK;
496	} else {
497		fp = (struct sigframe *)regs[tESP] - 1;
498	}
499
500	/*
501	 * grow() will return FALSE if the fp will not fit inside the stack
502	 *	and the stack can not be grown. useracc will return FALSE
503	 *	if access is denied.
504	 */
505	if ((grow(p, (int)fp) == FALSE) ||
506	    (useracc((caddr_t)fp, sizeof (struct sigframe), B_WRITE) == FALSE)) {
507		/*
508		 * Process has trashed its stack; give it an illegal
509		 * instruction to halt it in its tracks.
510		 */
511		SIGACTION(p, SIGILL) = SIG_DFL;
512		sig = sigmask(SIGILL);
513		p->p_sigignore &= ~sig;
514		p->p_sigcatch &= ~sig;
515		p->p_sigmask &= ~sig;
516		psignal(p, SIGILL);
517		return;
518	}
519
520	/*
521	 * Build the argument list for the signal handler.
522	 */
523	if (p->p_sysent->sv_sigtbl) {
524		if (sig < p->p_sysent->sv_sigsize)
525			sig = p->p_sysent->sv_sigtbl[sig];
526		else
527			sig = p->p_sysent->sv_sigsize + 1;
528	}
529	sf.sf_signum = sig;
530	sf.sf_code = code;
531	sf.sf_scp = &fp->sf_sc;
532	sf.sf_addr = (char *) regs[tERR];
533	sf.sf_handler = catcher;
534
535	/* save scratch registers */
536	sf.sf_sc.sc_eax = regs[tEAX];
537	sf.sf_sc.sc_ebx = regs[tEBX];
538	sf.sf_sc.sc_ecx = regs[tECX];
539	sf.sf_sc.sc_edx = regs[tEDX];
540	sf.sf_sc.sc_esi = regs[tESI];
541	sf.sf_sc.sc_edi = regs[tEDI];
542	sf.sf_sc.sc_cs = regs[tCS];
543	sf.sf_sc.sc_ds = regs[tDS];
544	sf.sf_sc.sc_ss = regs[tSS];
545	sf.sf_sc.sc_es = regs[tES];
546	sf.sf_sc.sc_isp = regs[tISP];
547
548	/*
549	 * Build the signal context to be used by sigreturn.
550	 */
551	sf.sf_sc.sc_onstack = oonstack;
552	sf.sf_sc.sc_mask = mask;
553	sf.sf_sc.sc_sp = regs[tESP];
554	sf.sf_sc.sc_fp = regs[tEBP];
555	sf.sf_sc.sc_pc = regs[tEIP];
556	sf.sf_sc.sc_ps = regs[tEFLAGS];
557
558	/*
559	 * Copy the sigframe out to the user's stack.
560	 */
561	if (copyout(&sf, fp, sizeof(struct sigframe)) != 0) {
562		/*
563		 * Something is wrong with the stack pointer.
564		 * ...Kill the process.
565		 */
566		sigexit(p, SIGILL);
567	};
568
569	regs[tESP] = (int)fp;
570	regs[tEIP] = (int)(((char *)PS_STRINGS) - *(p->p_sysent->sv_szsigcode));
571	regs[tEFLAGS] &= ~PSL_VM;
572	regs[tCS] = _ucodesel;
573	regs[tDS] = _udatasel;
574	regs[tES] = _udatasel;
575	regs[tSS] = _udatasel;
576}
577
578/*
579 * System call to cleanup state after a signal
580 * has been taken.  Reset signal mask and
581 * stack state from context left by sendsig (above).
582 * Return to previous pc and psl as specified by
583 * context left by sendsig. Check carefully to
584 * make sure that the user has not modified the
585 * state to gain improper privileges.
586 */
587int
588sigreturn(p, uap, retval)
589	struct proc *p;
590	struct sigreturn_args /* {
591		struct sigcontext *sigcntxp;
592	} */ *uap;
593	int *retval;
594{
595	register struct sigcontext *scp;
596	register struct sigframe *fp;
597	register int *regs = p->p_md.md_regs;
598	int eflags;
599
600	/*
601	 * (XXX old comment) regs[tESP] points to the return address.
602	 * The user scp pointer is above that.
603	 * The return address is faked in the signal trampoline code
604	 * for consistency.
605	 */
606	scp = uap->sigcntxp;
607	fp = (struct sigframe *)
608	     ((caddr_t)scp - offsetof(struct sigframe, sf_sc));
609
610	if (useracc((caddr_t)fp, sizeof (*fp), 0) == 0)
611		return(EINVAL);
612
613	/*
614	 * Don't allow users to change privileged or reserved flags.
615	 */
616#define	EFLAGS_SECURE(ef, oef)	((((ef) ^ (oef)) & ~PSL_USERCHANGE) == 0)
617	eflags = scp->sc_ps;
618	/*
619	 * XXX do allow users to change the privileged flag PSL_RF.  The
620	 * cpu sets PSL_RF in tf_eflags for faults.  Debuggers should
621	 * sometimes set it there too.  tf_eflags is kept in the signal
622	 * context during signal handling and there is no other place
623	 * to remember it, so the PSL_RF bit may be corrupted by the
624	 * signal handler without us knowing.  Corruption of the PSL_RF
625	 * bit at worst causes one more or one less debugger trap, so
626	 * allowing it is fairly harmless.
627	 */
628	if (!EFLAGS_SECURE(eflags & ~PSL_RF, regs[tEFLAGS] & ~PSL_RF)) {
629#ifdef DEBUG
630    		printf("sigreturn: eflags = 0x%x\n", eflags);
631#endif
632    		return(EINVAL);
633	}
634
635	/*
636	 * Don't allow users to load a valid privileged %cs.  Let the
637	 * hardware check for invalid selectors, excess privilege in
638	 * other selectors, invalid %eip's and invalid %esp's.
639	 */
640#define	CS_SECURE(cs)	(ISPL(cs) == SEL_UPL)
641	if (!CS_SECURE(scp->sc_cs)) {
642#ifdef DEBUG
643    		printf("sigreturn: cs = 0x%x\n", scp->sc_cs);
644#endif
645		trapsignal(p, SIGBUS, T_PROTFLT);
646		return(EINVAL);
647	}
648
649	/* restore scratch registers */
650	regs[tEAX] = scp->sc_eax;
651	regs[tEBX] = scp->sc_ebx;
652	regs[tECX] = scp->sc_ecx;
653	regs[tEDX] = scp->sc_edx;
654	regs[tESI] = scp->sc_esi;
655	regs[tEDI] = scp->sc_edi;
656	regs[tCS] = scp->sc_cs;
657	regs[tDS] = scp->sc_ds;
658	regs[tES] = scp->sc_es;
659	regs[tSS] = scp->sc_ss;
660	regs[tISP] = scp->sc_isp;
661
662	if (useracc((caddr_t)scp, sizeof (*scp), 0) == 0)
663		return(EINVAL);
664
665	if (scp->sc_onstack & 01)
666		p->p_sigacts->ps_sigstk.ss_flags |= SS_ONSTACK;
667	else
668		p->p_sigacts->ps_sigstk.ss_flags &= ~SS_ONSTACK;
669	p->p_sigmask = scp->sc_mask &~
670	    (sigmask(SIGKILL)|sigmask(SIGCONT)|sigmask(SIGSTOP));
671	regs[tEBP] = scp->sc_fp;
672	regs[tESP] = scp->sc_sp;
673	regs[tEIP] = scp->sc_pc;
674	regs[tEFLAGS] = eflags;
675	return(EJUSTRETURN);
676}
677
678/*
679 * Machine depdnetnt boot() routine
680 *
681 * I haven't seen anything too put here yet
682 * Possibly some stuff might be grafted back here from boot()
683 */
684void
685cpu_boot(int howto)
686{
687}
688
689/*
690 * Clear registers on exec
691 */
692void
693setregs(p, entry, stack)
694	struct proc *p;
695	u_long entry;
696	u_long stack;
697{
698	int *regs = p->p_md.md_regs;
699
700#ifdef USER_LDT
701	struct pcb *pcb = &p->p_addr->u_pcb;
702
703	/* was i386_user_cleanup() in NetBSD */
704	if (pcb->pcb_ldt) {
705		if (pcb == curpcb)
706			lldt(GSEL(GUSERLDT_SEL, SEL_KPL));
707		kmem_free(kernel_map, (vm_offset_t)pcb->pcb_ldt,
708			pcb->pcb_ldt_len * sizeof(union descriptor));
709		pcb->pcb_ldt_len = (int)pcb->pcb_ldt = 0;
710 	}
711#endif
712
713	bzero(regs, sizeof(struct trapframe));
714	regs[tEIP] = entry;
715	regs[tESP] = stack;
716	regs[tEFLAGS] = PSL_USER | (regs[tEFLAGS] & PSL_T);
717	regs[tSS] = _udatasel;
718	regs[tDS] = _udatasel;
719	regs[tES] = _udatasel;
720	regs[tCS] = _ucodesel;
721
722	p->p_addr->u_pcb.pcb_flags = 0;	/* no fp at all */
723	load_cr0(rcr0() | CR0_TS);	/* start emulating */
724#if	NNPX > 0
725	npxinit(__INITIAL_NPXCW__);
726#endif	/* NNPX > 0 */
727}
728
729static int
730sysctl_machdep_adjkerntz SYSCTL_HANDLER_ARGS
731{
732	int error;
733	error = sysctl_handle_int(oidp, oidp->oid_arg1, oidp->oid_arg2,
734		req);
735	if (!error && req->newptr)
736		resettodr();
737	return (error);
738}
739
740SYSCTL_PROC(_machdep, CPU_ADJKERNTZ, adjkerntz, CTLTYPE_INT|CTLFLAG_RW,
741	&adjkerntz, 0, sysctl_machdep_adjkerntz, "I", "");
742
743SYSCTL_INT(_machdep, CPU_DISRTCSET, disable_rtc_set,
744	CTLFLAG_RW, &disable_rtc_set, 0, "");
745
746SYSCTL_STRUCT(_machdep, CPU_BOOTINFO, bootinfo,
747	CTLFLAG_RD, &bootinfo, bootinfo, "");
748
749SYSCTL_INT(_machdep, CPU_WALLCLOCK, wall_cmos_clock,
750	CTLFLAG_RW, &wall_cmos_clock, 0, "");
751
752/*
753 * Initialize 386 and configure to run kernel
754 */
755
756/*
757 * Initialize segments & interrupt table
758 */
759
760int currentldt;
761int _default_ldt;
762union descriptor gdt[NGDT];		/* global descriptor table */
763struct gate_descriptor idt[NIDT];	/* interrupt descriptor table */
764union descriptor ldt[NLDT];		/* local descriptor table */
765
766static struct i386tss dblfault_tss;
767static char dblfault_stack[PAGE_SIZE];
768
769extern  struct user *proc0paddr;
770
771/* software prototypes -- in more palatable form */
772struct soft_segment_descriptor gdt_segs[] = {
773/* GNULL_SEL	0 Null Descriptor */
774{	0x0,			/* segment base address  */
775	0x0,			/* length */
776	0,			/* segment type */
777	0,			/* segment descriptor priority level */
778	0,			/* segment descriptor present */
779	0, 0,
780	0,			/* default 32 vs 16 bit size */
781	0  			/* limit granularity (byte/page units)*/ },
782/* GCODE_SEL	1 Code Descriptor for kernel */
783{	0x0,			/* segment base address  */
784	0xfffff,		/* length - all address space */
785	SDT_MEMERA,		/* segment type */
786	0,			/* segment descriptor priority level */
787	1,			/* segment descriptor present */
788	0, 0,
789	1,			/* default 32 vs 16 bit size */
790	1  			/* limit granularity (byte/page units)*/ },
791/* GDATA_SEL	2 Data Descriptor for kernel */
792{	0x0,			/* segment base address  */
793	0xfffff,		/* length - all address space */
794	SDT_MEMRWA,		/* segment type */
795	0,			/* segment descriptor priority level */
796	1,			/* segment descriptor present */
797	0, 0,
798	1,			/* default 32 vs 16 bit size */
799	1  			/* limit granularity (byte/page units)*/ },
800/* GLDT_SEL	3 LDT Descriptor */
801{	(int) ldt,		/* segment base address  */
802	sizeof(ldt)-1,		/* length - all address space */
803	SDT_SYSLDT,		/* segment type */
804	0,			/* segment descriptor priority level */
805	1,			/* segment descriptor present */
806	0, 0,
807	0,			/* unused - default 32 vs 16 bit size */
808	0  			/* limit granularity (byte/page units)*/ },
809/* GTGATE_SEL	4 Null Descriptor - Placeholder */
810{	0x0,			/* segment base address  */
811	0x0,			/* length - all address space */
812	0,			/* segment type */
813	0,			/* segment descriptor priority level */
814	0,			/* segment descriptor present */
815	0, 0,
816	0,			/* default 32 vs 16 bit size */
817	0  			/* limit granularity (byte/page units)*/ },
818/* GPANIC_SEL	5 Panic Tss Descriptor */
819{	(int) &dblfault_tss,	/* segment base address  */
820	sizeof(struct i386tss)-1,/* length - all address space */
821	SDT_SYS386TSS,		/* segment type */
822	0,			/* segment descriptor priority level */
823	1,			/* segment descriptor present */
824	0, 0,
825	0,			/* unused - default 32 vs 16 bit size */
826	0  			/* limit granularity (byte/page units)*/ },
827/* GPROC0_SEL	6 Proc 0 Tss Descriptor */
828{	(int) kstack,		/* segment base address  */
829	sizeof(struct i386tss)-1,/* length - all address space */
830	SDT_SYS386TSS,		/* segment type */
831	0,			/* segment descriptor priority level */
832	1,			/* segment descriptor present */
833	0, 0,
834	0,			/* unused - default 32 vs 16 bit size */
835	0  			/* limit granularity (byte/page units)*/ },
836/* GUSERLDT_SEL	7 User LDT Descriptor per process */
837{	(int) ldt,		/* segment base address  */
838	(512 * sizeof(union descriptor)-1),		/* length */
839	SDT_SYSLDT,		/* segment type */
840	0,			/* segment descriptor priority level */
841	1,			/* segment descriptor present */
842	0, 0,
843	0,			/* unused - default 32 vs 16 bit size */
844	0  			/* limit granularity (byte/page units)*/ },
845/* GAPMCODE32_SEL 8 APM BIOS 32-bit interface (32bit Code) */
846{	0,			/* segment base address (overwritten by APM)  */
847	0xfffff,		/* length */
848	SDT_MEMERA,		/* segment type */
849	0,			/* segment descriptor priority level */
850	1,			/* segment descriptor present */
851	0, 0,
852	1,			/* default 32 vs 16 bit size */
853	1  			/* limit granularity (byte/page units)*/ },
854/* GAPMCODE16_SEL 9 APM BIOS 32-bit interface (16bit Code) */
855{	0,			/* segment base address (overwritten by APM)  */
856	0xfffff,		/* length */
857	SDT_MEMERA,		/* segment type */
858	0,			/* segment descriptor priority level */
859	1,			/* segment descriptor present */
860	0, 0,
861	0,			/* default 32 vs 16 bit size */
862	1  			/* limit granularity (byte/page units)*/ },
863/* GAPMDATA_SEL	10 APM BIOS 32-bit interface (Data) */
864{	0,			/* segment base address (overwritten by APM) */
865	0xfffff,		/* length */
866	SDT_MEMRWA,		/* segment type */
867	0,			/* segment descriptor priority level */
868	1,			/* segment descriptor present */
869	0, 0,
870	1,			/* default 32 vs 16 bit size */
871	1  			/* limit granularity (byte/page units)*/ },
872};
873
874static struct soft_segment_descriptor ldt_segs[] = {
875	/* Null Descriptor - overwritten by call gate */
876{	0x0,			/* segment base address  */
877	0x0,			/* length - all address space */
878	0,			/* segment type */
879	0,			/* segment descriptor priority level */
880	0,			/* segment descriptor present */
881	0, 0,
882	0,			/* default 32 vs 16 bit size */
883	0  			/* limit granularity (byte/page units)*/ },
884	/* Null Descriptor - overwritten by call gate */
885{	0x0,			/* segment base address  */
886	0x0,			/* length - all address space */
887	0,			/* segment type */
888	0,			/* segment descriptor priority level */
889	0,			/* segment descriptor present */
890	0, 0,
891	0,			/* default 32 vs 16 bit size */
892	0  			/* limit granularity (byte/page units)*/ },
893	/* Null Descriptor - overwritten by call gate */
894{	0x0,			/* segment base address  */
895	0x0,			/* length - all address space */
896	0,			/* segment type */
897	0,			/* segment descriptor priority level */
898	0,			/* segment descriptor present */
899	0, 0,
900	0,			/* default 32 vs 16 bit size */
901	0  			/* limit granularity (byte/page units)*/ },
902	/* Code Descriptor for user */
903{	0x0,			/* segment base address  */
904	0xfffff,		/* length - all address space */
905	SDT_MEMERA,		/* segment type */
906	SEL_UPL,		/* segment descriptor priority level */
907	1,			/* segment descriptor present */
908	0, 0,
909	1,			/* default 32 vs 16 bit size */
910	1  			/* limit granularity (byte/page units)*/ },
911	/* Data Descriptor for user */
912{	0x0,			/* segment base address  */
913	0xfffff,		/* length - all address space */
914	SDT_MEMRWA,		/* segment type */
915	SEL_UPL,		/* segment descriptor priority level */
916	1,			/* segment descriptor present */
917	0, 0,
918	1,			/* default 32 vs 16 bit size */
919	1  			/* limit granularity (byte/page units)*/ },
920};
921
922void
923setidt(idx, func, typ, dpl, selec)
924	int idx;
925	inthand_t *func;
926	int typ;
927	int dpl;
928	int selec;
929{
930	struct gate_descriptor *ip = idt + idx;
931
932	ip->gd_looffset = (int)func;
933	ip->gd_selector = selec;
934	ip->gd_stkcpy = 0;
935	ip->gd_xx = 0;
936	ip->gd_type = typ;
937	ip->gd_dpl = dpl;
938	ip->gd_p = 1;
939	ip->gd_hioffset = ((int)func)>>16 ;
940}
941
942#define	IDTVEC(name)	__CONCAT(X,name)
943
944extern inthand_t
945	IDTVEC(div), IDTVEC(dbg), IDTVEC(nmi), IDTVEC(bpt), IDTVEC(ofl),
946	IDTVEC(bnd), IDTVEC(ill), IDTVEC(dna), IDTVEC(fpusegm),
947	IDTVEC(tss), IDTVEC(missing), IDTVEC(stk), IDTVEC(prot),
948	IDTVEC(page), IDTVEC(mchk), IDTVEC(rsvd), IDTVEC(fpu), IDTVEC(align),
949	IDTVEC(syscall), IDTVEC(int0x80_syscall);
950
951void
952sdtossd(sd, ssd)
953	struct segment_descriptor *sd;
954	struct soft_segment_descriptor *ssd;
955{
956	ssd->ssd_base  = (sd->sd_hibase << 24) | sd->sd_lobase;
957	ssd->ssd_limit = (sd->sd_hilimit << 16) | sd->sd_lolimit;
958	ssd->ssd_type  = sd->sd_type;
959	ssd->ssd_dpl   = sd->sd_dpl;
960	ssd->ssd_p     = sd->sd_p;
961	ssd->ssd_def32 = sd->sd_def32;
962	ssd->ssd_gran  = sd->sd_gran;
963}
964
965void
966init386(first)
967	int first;
968{
969	int x;
970	unsigned biosbasemem, biosextmem;
971	struct gate_descriptor *gdp;
972	int gsel_tss;
973	/* table descriptors - used to load tables by microp */
974	struct region_descriptor r_gdt, r_idt;
975	int	pagesinbase, pagesinext;
976	int	target_page, pa_indx;
977
978	proc0.p_addr = proc0paddr;
979
980	atdevbase = ISA_HOLE_START + KERNBASE;
981
982	/*
983	 * Initialize the console before we print anything out.
984	 */
985	cninit();
986
987	/*
988	 * make gdt memory segments, the code segment goes up to end of the
989	 * page with etext in it, the data segment goes to the end of
990	 * the address space
991	 */
992	/*
993	 * XXX text protection is temporarily (?) disabled.  The limit was
994	 * i386_btop(round_page(etext)) - 1.
995	 */
996	gdt_segs[GCODE_SEL].ssd_limit = i386_btop(0) - 1;
997	gdt_segs[GDATA_SEL].ssd_limit = i386_btop(0) - 1;
998	for (x = 0; x < NGDT; x++)
999		ssdtosd(&gdt_segs[x], &gdt[x].sd);
1000
1001	/* make ldt memory segments */
1002	/*
1003	 * The data segment limit must not cover the user area because we
1004	 * don't want the user area to be writable in copyout() etc. (page
1005	 * level protection is lost in kernel mode on 386's).  Also, we
1006	 * don't want the user area to be writable directly (page level
1007	 * protection of the user area is not available on 486's with
1008	 * CR0_WP set, because there is no user-read/kernel-write mode).
1009	 *
1010	 * XXX - VM_MAXUSER_ADDRESS is an end address, not a max.  And it
1011	 * should be spelled ...MAX_USER...
1012	 */
1013#define VM_END_USER_RW_ADDRESS	VM_MAXUSER_ADDRESS
1014	/*
1015	 * The code segment limit has to cover the user area until we move
1016	 * the signal trampoline out of the user area.  This is safe because
1017	 * the code segment cannot be written to directly.
1018	 */
1019#define VM_END_USER_R_ADDRESS	(VM_END_USER_RW_ADDRESS + UPAGES * PAGE_SIZE)
1020	ldt_segs[LUCODE_SEL].ssd_limit = i386_btop(VM_END_USER_R_ADDRESS) - 1;
1021	ldt_segs[LUDATA_SEL].ssd_limit = i386_btop(VM_END_USER_RW_ADDRESS) - 1;
1022	/* Note. eventually want private ldts per process */
1023	for (x = 0; x < NLDT; x++)
1024		ssdtosd(&ldt_segs[x], &ldt[x].sd);
1025
1026	/* exceptions */
1027	for (x = 0; x < NIDT; x++)
1028		setidt(x, &IDTVEC(rsvd), SDT_SYS386TGT, SEL_KPL, GSEL(GCODE_SEL, SEL_KPL));
1029	setidt(0, &IDTVEC(div),  SDT_SYS386TGT, SEL_KPL, GSEL(GCODE_SEL, SEL_KPL));
1030	setidt(1, &IDTVEC(dbg),  SDT_SYS386TGT, SEL_KPL, GSEL(GCODE_SEL, SEL_KPL));
1031	setidt(2, &IDTVEC(nmi),  SDT_SYS386TGT, SEL_KPL, GSEL(GCODE_SEL, SEL_KPL));
1032 	setidt(3, &IDTVEC(bpt),  SDT_SYS386TGT, SEL_UPL, GSEL(GCODE_SEL, SEL_KPL));
1033	setidt(4, &IDTVEC(ofl),  SDT_SYS386TGT, SEL_UPL, GSEL(GCODE_SEL, SEL_KPL));
1034	setidt(5, &IDTVEC(bnd),  SDT_SYS386TGT, SEL_KPL, GSEL(GCODE_SEL, SEL_KPL));
1035	setidt(6, &IDTVEC(ill),  SDT_SYS386TGT, SEL_KPL, GSEL(GCODE_SEL, SEL_KPL));
1036	setidt(7, &IDTVEC(dna),  SDT_SYS386TGT, SEL_KPL, GSEL(GCODE_SEL, SEL_KPL));
1037	setidt(8, 0,  SDT_SYSTASKGT, SEL_KPL, GSEL(GPANIC_SEL, SEL_KPL));
1038	setidt(9, &IDTVEC(fpusegm),  SDT_SYS386TGT, SEL_KPL, GSEL(GCODE_SEL, SEL_KPL));
1039	setidt(10, &IDTVEC(tss),  SDT_SYS386TGT, SEL_KPL, GSEL(GCODE_SEL, SEL_KPL));
1040	setidt(11, &IDTVEC(missing),  SDT_SYS386TGT, SEL_KPL, GSEL(GCODE_SEL, SEL_KPL));
1041	setidt(12, &IDTVEC(stk),  SDT_SYS386TGT, SEL_KPL, GSEL(GCODE_SEL, SEL_KPL));
1042	setidt(13, &IDTVEC(prot),  SDT_SYS386TGT, SEL_KPL, GSEL(GCODE_SEL, SEL_KPL));
1043	setidt(14, &IDTVEC(page),  SDT_SYS386TGT, SEL_KPL, GSEL(GCODE_SEL, SEL_KPL));
1044	setidt(15, &IDTVEC(rsvd),  SDT_SYS386TGT, SEL_KPL, GSEL(GCODE_SEL, SEL_KPL));
1045	setidt(16, &IDTVEC(fpu),  SDT_SYS386TGT, SEL_KPL, GSEL(GCODE_SEL, SEL_KPL));
1046	setidt(17, &IDTVEC(align), SDT_SYS386TGT, SEL_KPL, GSEL(GCODE_SEL, SEL_KPL));
1047	setidt(18, &IDTVEC(mchk),  SDT_SYS386TGT, SEL_KPL, GSEL(GCODE_SEL, SEL_KPL));
1048 	setidt(0x80, &IDTVEC(int0x80_syscall),
1049			SDT_SYS386TGT, SEL_UPL, GSEL(GCODE_SEL, SEL_KPL));
1050
1051#include	"isa.h"
1052#if	NISA >0
1053	isa_defaultirq();
1054#endif
1055	rand_initialize();
1056
1057	r_gdt.rd_limit = sizeof(gdt) - 1;
1058	r_gdt.rd_base =  (int) gdt;
1059	lgdt(&r_gdt);
1060
1061	r_idt.rd_limit = sizeof(idt) - 1;
1062	r_idt.rd_base = (int) idt;
1063	lidt(&r_idt);
1064
1065	_default_ldt = GSEL(GLDT_SEL, SEL_KPL);
1066	lldt(_default_ldt);
1067	currentldt = _default_ldt;
1068
1069#ifdef DDB
1070	kdb_init();
1071	if (boothowto & RB_KDB)
1072		Debugger("Boot flags requested debugger");
1073#endif
1074
1075	/* Use BIOS values stored in RTC CMOS RAM, since probing
1076	 * breaks certain 386 AT relics.
1077	 */
1078	biosbasemem = rtcin(RTC_BASELO)+ (rtcin(RTC_BASEHI)<<8);
1079	biosextmem = rtcin(RTC_EXTLO)+ (rtcin(RTC_EXTHI)<<8);
1080
1081	/*
1082	 * If BIOS tells us that it has more than 640k in the basemem,
1083	 *	don't believe it - set it to 640k.
1084	 */
1085	if (biosbasemem > 640) {
1086		printf("Preposterous RTC basemem of %dK, truncating to 640K\n",
1087		       biosbasemem);
1088		biosbasemem = 640;
1089	}
1090	if (bootinfo.bi_memsizes_valid && bootinfo.bi_basemem > 640) {
1091		printf("Preposterous BIOS basemem of %dK, truncating to 640K\n",
1092		       bootinfo.bi_basemem);
1093		bootinfo.bi_basemem = 640;
1094	}
1095
1096	/*
1097	 * Warn if the official BIOS interface disagrees with the RTC
1098	 * interface used above about the amount of base memory or the
1099	 * amount of extended memory.  Prefer the BIOS value for the base
1100	 * memory.  This is necessary for machines that `steal' base
1101	 * memory for use as BIOS memory, at least if we are going to use
1102	 * the BIOS for apm.  Prefer the RTC value for extended memory.
1103	 * Eventually the hackish interface shouldn't even be looked at.
1104	 */
1105	if (bootinfo.bi_memsizes_valid) {
1106		if (bootinfo.bi_basemem != biosbasemem) {
1107			vm_offset_t pa;
1108
1109			printf(
1110	"BIOS basemem (%ldK) != RTC basemem (%dK), setting to BIOS value\n",
1111			       bootinfo.bi_basemem, biosbasemem);
1112			biosbasemem = bootinfo.bi_basemem;
1113
1114			/*
1115			 * XXX if biosbasemem is now < 640, there is `hole'
1116			 * between the end of base memory and the start of
1117			 * ISA memory.  The hole may be empty or it may
1118			 * contain BIOS code or data.  Map it read/write so
1119			 * that the BIOS can write to it.  (Memory from 0 to
1120			 * the physical end of the kernel is mapped read-only
1121			 * to begin with and then parts of it are remapped.
1122			 * The parts that aren't remapped form holes that
1123			 * remain read-only and are unused by the kernel.
1124			 * The base memory area is below the physical end of
1125			 * the kernel and right now forms a read-only hole.
1126			 * The part of it from 0 to
1127			 * (trunc_page(biosbasemem * 1024) - 1) will be
1128			 * remapped and used by the kernel later.)
1129			 *
1130			 * This code is similar to the code used in
1131			 * pmap_mapdev, but since no memory needs to be
1132			 * allocated we simply change the mapping.
1133			 */
1134			for (pa = trunc_page(biosbasemem * 1024);
1135			     pa < ISA_HOLE_START; pa += PAGE_SIZE) {
1136				unsigned *pte;
1137
1138				pte = (unsigned *)vtopte(pa + KERNBASE);
1139				*pte = pa | PG_RW | PG_V;
1140			}
1141		}
1142		if (bootinfo.bi_extmem != biosextmem)
1143			printf("BIOS extmem (%ldK) != RTC extmem (%dK)\n",
1144			       bootinfo.bi_extmem, biosextmem);
1145	}
1146
1147	pagesinbase = biosbasemem * 1024 / PAGE_SIZE;
1148	pagesinext = biosextmem * 1024 / PAGE_SIZE;
1149
1150	/*
1151	 * Special hack for chipsets that still remap the 384k hole when
1152	 *	there's 16MB of memory - this really confuses people that
1153	 *	are trying to use bus mastering ISA controllers with the
1154	 *	"16MB limit"; they only have 16MB, but the remapping puts
1155	 *	them beyond the limit.
1156	 */
1157	/*
1158	 * If extended memory is between 15-16MB (16-17MB phys address range),
1159	 *	chop it to 15MB.
1160	 */
1161	if ((pagesinext > 3840) && (pagesinext < 4096))
1162		pagesinext = 3840;
1163
1164	/*
1165	 * Maxmem isn't the "maximum memory", it's one larger than the
1166	 * highest page of the physical address space.  It should be
1167	 * called something like "Maxphyspage".
1168	 */
1169	Maxmem = pagesinext + 0x100000/PAGE_SIZE;
1170
1171#ifdef MAXMEM
1172	Maxmem = MAXMEM/4;
1173#endif
1174
1175	/* call pmap initialization to make new kernel address space */
1176	pmap_bootstrap (first, 0);
1177
1178	/*
1179	 * Size up each available chunk of physical memory.
1180	 */
1181
1182	/*
1183	 * We currently don't bother testing base memory.
1184	 * XXX  ...but we probably should.
1185	 */
1186	pa_indx = 0;
1187	badpages = 0;
1188	if (pagesinbase > 1) {
1189		phys_avail[pa_indx++] = PAGE_SIZE;	/* skip first page of memory */
1190		phys_avail[pa_indx] = ptoa(pagesinbase);/* memory up to the ISA hole */
1191		physmem = pagesinbase - 1;
1192	} else {
1193		/* point at first chunk end */
1194		pa_indx++;
1195	}
1196
1197	for (target_page = avail_start; target_page < ptoa(Maxmem); target_page += PAGE_SIZE) {
1198		int tmp, page_bad = FALSE;
1199
1200		/*
1201		 * map page into kernel: valid, read/write, non-cacheable
1202		 */
1203		*(int *)CMAP1 = PG_V | PG_RW | PG_N | target_page;
1204		pmap_update();
1205
1206		tmp = *(int *)CADDR1;
1207		/*
1208		 * Test for alternating 1's and 0's
1209		 */
1210		*(volatile int *)CADDR1 = 0xaaaaaaaa;
1211		if (*(volatile int *)CADDR1 != 0xaaaaaaaa) {
1212			page_bad = TRUE;
1213		}
1214		/*
1215		 * Test for alternating 0's and 1's
1216		 */
1217		*(volatile int *)CADDR1 = 0x55555555;
1218		if (*(volatile int *)CADDR1 != 0x55555555) {
1219			page_bad = TRUE;
1220		}
1221		/*
1222		 * Test for all 1's
1223		 */
1224		*(volatile int *)CADDR1 = 0xffffffff;
1225		if (*(volatile int *)CADDR1 != 0xffffffff) {
1226			page_bad = TRUE;
1227		}
1228		/*
1229		 * Test for all 0's
1230		 */
1231		*(volatile int *)CADDR1 = 0x0;
1232		if (*(volatile int *)CADDR1 != 0x0) {
1233			/*
1234			 * test of page failed
1235			 */
1236			page_bad = TRUE;
1237		}
1238		/*
1239		 * Restore original value.
1240		 */
1241		*(int *)CADDR1 = tmp;
1242
1243		/*
1244		 * Adjust array of valid/good pages.
1245		 */
1246		if (page_bad == FALSE) {
1247			/*
1248			 * If this good page is a continuation of the
1249			 * previous set of good pages, then just increase
1250			 * the end pointer. Otherwise start a new chunk.
1251			 * Note that "end" points one higher than end,
1252			 * making the range >= start and < end.
1253			 */
1254			if (phys_avail[pa_indx] == target_page) {
1255				phys_avail[pa_indx] += PAGE_SIZE;
1256			} else {
1257				pa_indx++;
1258				if (pa_indx == PHYS_AVAIL_ARRAY_END) {
1259					printf("Too many holes in the physical address space, giving up\n");
1260					pa_indx--;
1261					break;
1262				}
1263				phys_avail[pa_indx++] = target_page;	/* start */
1264				phys_avail[pa_indx] = target_page + PAGE_SIZE;	/* end */
1265			}
1266			physmem++;
1267		} else {
1268			badpages++;
1269			page_bad = FALSE;
1270		}
1271	}
1272
1273	*(int *)CMAP1 = 0;
1274	pmap_update();
1275
1276	/*
1277	 * XXX
1278	 * The last chunk must contain at least one page plus the message
1279	 * buffer to avoid complicating other code (message buffer address
1280	 * calculation, etc.).
1281	 */
1282	while (phys_avail[pa_indx - 1] + PAGE_SIZE +
1283	    round_page(sizeof(struct msgbuf)) >= phys_avail[pa_indx]) {
1284		physmem -= atop(phys_avail[pa_indx] - phys_avail[pa_indx - 1]);
1285		phys_avail[pa_indx--] = 0;
1286		phys_avail[pa_indx--] = 0;
1287	}
1288
1289	Maxmem = atop(phys_avail[pa_indx]);
1290
1291	/* Trim off space for the message buffer. */
1292	phys_avail[pa_indx] -= round_page(sizeof(struct msgbuf));
1293
1294	avail_end = phys_avail[pa_indx];
1295
1296	/* now running on new page tables, configured,and u/iom is accessible */
1297
1298	/* make a initial tss so microp can get interrupt stack on syscall! */
1299	proc0.p_addr->u_pcb.pcb_tss.tss_esp0 = (int) kstack + UPAGES*PAGE_SIZE;
1300	proc0.p_addr->u_pcb.pcb_tss.tss_ss0 = GSEL(GDATA_SEL, SEL_KPL) ;
1301	gsel_tss = GSEL(GPROC0_SEL, SEL_KPL);
1302
1303	dblfault_tss.tss_esp = dblfault_tss.tss_esp0 = dblfault_tss.tss_esp1 =
1304	    dblfault_tss.tss_esp2 = (int) &dblfault_stack[sizeof(dblfault_stack)];
1305	dblfault_tss.tss_ss = dblfault_tss.tss_ss0 = dblfault_tss.tss_ss1 =
1306	    dblfault_tss.tss_ss2 = GSEL(GDATA_SEL, SEL_KPL);
1307	dblfault_tss.tss_cr3 = IdlePTD;
1308	dblfault_tss.tss_eip = (int) dblfault_handler;
1309	dblfault_tss.tss_eflags = PSL_KERNEL;
1310	dblfault_tss.tss_ds = dblfault_tss.tss_es = dblfault_tss.tss_fs = dblfault_tss.tss_gs =
1311		GSEL(GDATA_SEL, SEL_KPL);
1312	dblfault_tss.tss_cs = GSEL(GCODE_SEL, SEL_KPL);
1313	dblfault_tss.tss_ldt = GSEL(GLDT_SEL, SEL_KPL);
1314
1315	((struct i386tss *)gdt_segs[GPROC0_SEL].ssd_base)->tss_ioopt =
1316		(sizeof(struct i386tss))<<16;
1317
1318	ltr(gsel_tss);
1319
1320	/* make a call gate to reenter kernel with */
1321	gdp = &ldt[LSYS5CALLS_SEL].gd;
1322
1323	x = (int) &IDTVEC(syscall);
1324	gdp->gd_looffset = x++;
1325	gdp->gd_selector = GSEL(GCODE_SEL,SEL_KPL);
1326	gdp->gd_stkcpy = 1;
1327	gdp->gd_type = SDT_SYS386CGT;
1328	gdp->gd_dpl = SEL_UPL;
1329	gdp->gd_p = 1;
1330	gdp->gd_hioffset = ((int) &IDTVEC(syscall)) >>16;
1331
1332	/* transfer to user mode */
1333
1334	_ucodesel = LSEL(LUCODE_SEL, SEL_UPL);
1335	_udatasel = LSEL(LUDATA_SEL, SEL_UPL);
1336
1337	/* setup proc 0's pcb */
1338	proc0.p_addr->u_pcb.pcb_flags = 0;
1339	proc0.p_addr->u_pcb.pcb_cr3 = IdlePTD;
1340}
1341
1342/*
1343 * The registers are in the frame; the frame is in the user area of
1344 * the process in question; when the process is active, the registers
1345 * are in "the kernel stack"; when it's not, they're still there, but
1346 * things get flipped around.  So, since p->p_md.md_regs is the whole address
1347 * of the register set, take its offset from the kernel stack, and
1348 * index into the user block.  Don't you just *love* virtual memory?
1349 * (I'm starting to think seymour is right...)
1350 */
1351#define	TF_REGP(p)	((struct trapframe *) \
1352			 ((char *)(p)->p_addr \
1353			  + ((char *)(p)->p_md.md_regs - kstack)))
1354
1355int
1356ptrace_set_pc(p, addr)
1357	struct proc *p;
1358	unsigned int addr;
1359{
1360	TF_REGP(p)->tf_eip = addr;
1361	return (0);
1362}
1363
1364int
1365ptrace_single_step(p)
1366	struct proc *p;
1367{
1368	TF_REGP(p)->tf_eflags |= PSL_T;
1369	return (0);
1370}
1371
1372int ptrace_write_u(p, off, data)
1373	struct proc *p;
1374	vm_offset_t off;
1375	int data;
1376{
1377	struct trapframe frame_copy;
1378	vm_offset_t min;
1379	struct trapframe *tp;
1380
1381	/*
1382	 * Privileged kernel state is scattered all over the user area.
1383	 * Only allow write access to parts of regs and to fpregs.
1384	 */
1385	min = (char *)p->p_md.md_regs - kstack;
1386	if (off >= min && off <= min + sizeof(struct trapframe) - sizeof(int)) {
1387		tp = TF_REGP(p);
1388		frame_copy = *tp;
1389		*(int *)((char *)&frame_copy + (off - min)) = data;
1390		if (!EFLAGS_SECURE(frame_copy.tf_eflags, tp->tf_eflags) ||
1391		    !CS_SECURE(frame_copy.tf_cs))
1392			return (EINVAL);
1393		*(int*)((char *)p->p_addr + off) = data;
1394		return (0);
1395	}
1396	min = offsetof(struct user, u_pcb) + offsetof(struct pcb, pcb_savefpu);
1397	if (off >= min && off <= min + sizeof(struct save87) - sizeof(int)) {
1398		*(int*)((char *)p->p_addr + off) = data;
1399		return (0);
1400	}
1401	return (EFAULT);
1402}
1403
1404int
1405fill_regs(p, regs)
1406	struct proc *p;
1407	struct reg *regs;
1408{
1409	struct trapframe *tp;
1410
1411	tp = TF_REGP(p);
1412	regs->r_es = tp->tf_es;
1413	regs->r_ds = tp->tf_ds;
1414	regs->r_edi = tp->tf_edi;
1415	regs->r_esi = tp->tf_esi;
1416	regs->r_ebp = tp->tf_ebp;
1417	regs->r_ebx = tp->tf_ebx;
1418	regs->r_edx = tp->tf_edx;
1419	regs->r_ecx = tp->tf_ecx;
1420	regs->r_eax = tp->tf_eax;
1421	regs->r_eip = tp->tf_eip;
1422	regs->r_cs = tp->tf_cs;
1423	regs->r_eflags = tp->tf_eflags;
1424	regs->r_esp = tp->tf_esp;
1425	regs->r_ss = tp->tf_ss;
1426	return (0);
1427}
1428
1429int
1430set_regs(p, regs)
1431	struct proc *p;
1432	struct reg *regs;
1433{
1434	struct trapframe *tp;
1435
1436	tp = TF_REGP(p);
1437	if (!EFLAGS_SECURE(regs->r_eflags, tp->tf_eflags) ||
1438	    !CS_SECURE(regs->r_cs))
1439		return (EINVAL);
1440	tp->tf_es = regs->r_es;
1441	tp->tf_ds = regs->r_ds;
1442	tp->tf_edi = regs->r_edi;
1443	tp->tf_esi = regs->r_esi;
1444	tp->tf_ebp = regs->r_ebp;
1445	tp->tf_ebx = regs->r_ebx;
1446	tp->tf_edx = regs->r_edx;
1447	tp->tf_ecx = regs->r_ecx;
1448	tp->tf_eax = regs->r_eax;
1449	tp->tf_eip = regs->r_eip;
1450	tp->tf_cs = regs->r_cs;
1451	tp->tf_eflags = regs->r_eflags;
1452	tp->tf_esp = regs->r_esp;
1453	tp->tf_ss = regs->r_ss;
1454	return (0);
1455}
1456
1457#ifndef DDB
1458void
1459Debugger(const char *msg)
1460{
1461	printf("Debugger(\"%s\") called.\n", msg);
1462}
1463#endif /* no DDB */
1464
1465#include <sys/disklabel.h>
1466#define b_cylin	b_resid
1467/*
1468 * Determine the size of the transfer, and make sure it is
1469 * within the boundaries of the partition. Adjust transfer
1470 * if needed, and signal errors or early completion.
1471 */
1472int
1473bounds_check_with_label(struct buf *bp, struct disklabel *lp, int wlabel)
1474{
1475        struct partition *p = lp->d_partitions + dkpart(bp->b_dev);
1476        int labelsect = lp->d_partitions[0].p_offset;
1477        int maxsz = p->p_size,
1478                sz = (bp->b_bcount + DEV_BSIZE - 1) >> DEV_BSHIFT;
1479
1480        /* overwriting disk label ? */
1481        /* XXX should also protect bootstrap in first 8K */
1482        if (bp->b_blkno + p->p_offset <= LABELSECTOR + labelsect &&
1483#if LABELSECTOR != 0
1484            bp->b_blkno + p->p_offset + sz > LABELSECTOR + labelsect &&
1485#endif
1486            (bp->b_flags & B_READ) == 0 && wlabel == 0) {
1487                bp->b_error = EROFS;
1488                goto bad;
1489        }
1490
1491#if     defined(DOSBBSECTOR) && defined(notyet)
1492        /* overwriting master boot record? */
1493        if (bp->b_blkno + p->p_offset <= DOSBBSECTOR &&
1494            (bp->b_flags & B_READ) == 0 && wlabel == 0) {
1495                bp->b_error = EROFS;
1496                goto bad;
1497        }
1498#endif
1499
1500        /* beyond partition? */
1501        if (bp->b_blkno < 0 || bp->b_blkno + sz > maxsz) {
1502                /* if exactly at end of disk, return an EOF */
1503                if (bp->b_blkno == maxsz) {
1504                        bp->b_resid = bp->b_bcount;
1505                        return(0);
1506                }
1507                /* or truncate if part of it fits */
1508                sz = maxsz - bp->b_blkno;
1509                if (sz <= 0) {
1510                        bp->b_error = EINVAL;
1511                        goto bad;
1512                }
1513                bp->b_bcount = sz << DEV_BSHIFT;
1514        }
1515
1516        /* calculate cylinder for disksort to order transfers with */
1517        bp->b_pblkno = bp->b_blkno + p->p_offset;
1518        bp->b_cylin = bp->b_pblkno / lp->d_secpercyl;
1519        return(1);
1520
1521bad:
1522        bp->b_flags |= B_ERROR;
1523        return(-1);
1524}
1525