machdep.c revision 25711
1/*-
2 * Copyright (c) 1992 Terrence R. Lambert.
3 * Copyright (c) 1982, 1987, 1990 The Regents of the University of California.
4 * All rights reserved.
5 *
6 * This code is derived from software contributed to Berkeley by
7 * William Jolitz.
8 *
9 * Redistribution and use in source and binary forms, with or without
10 * modification, are permitted provided that the following conditions
11 * are met:
12 * 1. Redistributions of source code must retain the above copyright
13 *    notice, this list of conditions and the following disclaimer.
14 * 2. Redistributions in binary form must reproduce the above copyright
15 *    notice, this list of conditions and the following disclaimer in the
16 *    documentation and/or other materials provided with the distribution.
17 * 3. All advertising materials mentioning features or use of this software
18 *    must display the following acknowledgement:
19 *	This product includes software developed by the University of
20 *	California, Berkeley and its contributors.
21 * 4. Neither the name of the University nor the names of its contributors
22 *    may be used to endorse or promote products derived from this software
23 *    without specific prior written permission.
24 *
25 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
26 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
27 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
28 * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
29 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
30 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
31 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
32 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
33 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
34 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
35 * SUCH DAMAGE.
36 *
37 *	from: @(#)machdep.c	7.4 (Berkeley) 6/3/91
38 *	$Id: machdep.c,v 1.240 1997/05/07 20:12:47 peter Exp $
39 */
40
41#include "npx.h"
42#include "opt_sysvipc.h"
43#include "opt_ddb.h"
44#include "opt_bounce.h"
45#include "opt_machdep.h"
46#include "opt_perfmon.h"
47#include "opt_smp.h"
48#include "opt_userconfig.h"
49
50#include <sys/param.h>
51#include <sys/systm.h>
52#include <sys/sysproto.h>
53#include <sys/signalvar.h>
54#include <sys/kernel.h>
55#include <sys/proc.h>
56#include <sys/buf.h>
57#include <sys/reboot.h>
58#include <sys/conf.h>
59#include <sys/file.h>
60#include <sys/callout.h>
61#include <sys/malloc.h>
62#include <sys/mbuf.h>
63#include <sys/mount.h>
64#include <sys/msgbuf.h>
65#include <sys/sysent.h>
66#include <sys/tty.h>
67#include <sys/sysctl.h>
68#include <sys/vmmeter.h>
69
70#ifdef SYSVSHM
71#include <sys/shm.h>
72#endif
73
74#ifdef SYSVMSG
75#include <sys/msg.h>
76#endif
77
78#ifdef SYSVSEM
79#include <sys/sem.h>
80#endif
81
82#include <vm/vm.h>
83#include <vm/vm_param.h>
84#include <vm/vm_prot.h>
85#include <sys/lock.h>
86#include <vm/vm_kern.h>
87#include <vm/vm_object.h>
88#include <vm/vm_page.h>
89#include <vm/vm_map.h>
90#include <vm/vm_pager.h>
91#include <vm/vm_extern.h>
92
93#include <sys/user.h>
94#include <sys/exec.h>
95#include <sys/vnode.h>
96
97#include <ddb/ddb.h>
98
99#include <net/netisr.h>
100
101#include <machine/cpu.h>
102#include <machine/npx.h>
103#include <machine/reg.h>
104#include <machine/psl.h>
105#include <machine/clock.h>
106#include <machine/specialreg.h>
107#include <machine/sysarch.h>
108#include <machine/cons.h>
109#include <machine/bootinfo.h>
110#include <machine/md_var.h>
111#ifdef SMP
112#include <machine/smp.h>
113#endif
114#ifdef PERFMON
115#include <machine/perfmon.h>
116#endif
117
118#include <i386/isa/isa_device.h>
119#include <i386/isa/rtc.h>
120#include <machine/random.h>
121
122extern void init386 __P((int first));
123extern int ptrace_set_pc __P((struct proc *p, unsigned int addr));
124extern int ptrace_single_step __P((struct proc *p));
125extern int ptrace_write_u __P((struct proc *p, vm_offset_t off, int data));
126extern void dblfault_handler __P((void));
127
128extern void printcpuinfo(void);	/* XXX header file */
129extern void earlysetcpuclass(void);	/* same header file */
130extern void finishidentcpu(void);
131extern void panicifcpuunsupported(void);
132extern void initializecpu(void);
133extern void init_sets(void);
134
135static void cpu_startup __P((void *));
136SYSINIT(cpu, SI_SUB_CPU, SI_ORDER_FIRST, cpu_startup, NULL)
137
138
139#ifdef BOUNCE_BUFFERS
140extern char *bouncememory;
141extern int maxbkva;
142#ifdef BOUNCEPAGES
143int	bouncepages = BOUNCEPAGES;
144#else
145int	bouncepages = 0;
146#endif
147#endif	/* BOUNCE_BUFFERS */
148
149extern int freebufspace;
150int	msgbufmapped = 0;		/* set when safe to use msgbuf */
151int _udatasel, _ucodesel;
152u_int	atdevbase;
153
154
155int physmem = 0;
156int cold = 1;
157
158static int
159sysctl_hw_physmem SYSCTL_HANDLER_ARGS
160{
161	int error = sysctl_handle_int(oidp, 0, ctob(physmem), req);
162	return (error);
163}
164
165SYSCTL_PROC(_hw, HW_PHYSMEM, physmem, CTLTYPE_INT|CTLFLAG_RD,
166	0, 0, sysctl_hw_physmem, "I", "");
167
168static int
169sysctl_hw_usermem SYSCTL_HANDLER_ARGS
170{
171	int error = sysctl_handle_int(oidp, 0,
172		ctob(physmem - cnt.v_wire_count), req);
173	return (error);
174}
175
176SYSCTL_PROC(_hw, HW_USERMEM, usermem, CTLTYPE_INT|CTLFLAG_RD,
177	0, 0, sysctl_hw_usermem, "I", "");
178
179int boothowto = 0, bootverbose = 0, Maxmem = 0;
180static int	badpages = 0;
181long dumplo;
182extern int bootdev;
183
184vm_offset_t phys_avail[10];
185
186/* must be 2 less so 0 0 can signal end of chunks */
187#define PHYS_AVAIL_ARRAY_END ((sizeof(phys_avail) / sizeof(vm_offset_t)) - 2)
188
189static void setup_netisrs __P((struct linker_set *)); /* XXX declare elsewhere */
190
191static vm_offset_t buffer_sva, buffer_eva;
192vm_offset_t clean_sva, clean_eva;
193static vm_offset_t pager_sva, pager_eva;
194extern struct linker_set netisr_set;
195
196#define offsetof(type, member)	((size_t)(&((type *)0)->member))
197
198static void
199cpu_startup(dummy)
200	void *dummy;
201{
202	register unsigned i;
203	register caddr_t v;
204	vm_offset_t maxaddr;
205	vm_size_t size = 0;
206	int firstaddr;
207	vm_offset_t minaddr;
208
209	if (boothowto & RB_VERBOSE)
210		bootverbose++;
211
212	/*
213	 * Good {morning,afternoon,evening,night}.
214	 */
215	printf(version);
216#ifdef SMP
217	mp_announce();
218#endif
219	earlysetcpuclass();
220	startrtclock();
221	printcpuinfo();
222	panicifcpuunsupported();
223#ifdef PERFMON
224	perfmon_init();
225#endif
226	printf("real memory  = %d (%dK bytes)\n", ptoa(Maxmem), ptoa(Maxmem) / 1024);
227	/*
228	 * Display any holes after the first chunk of extended memory.
229	 */
230	if (badpages != 0) {
231		int indx = 1;
232
233		/*
234		 * XXX skip reporting ISA hole & unmanaged kernel memory
235		 */
236		if (phys_avail[0] == PAGE_SIZE)
237			indx += 2;
238
239		printf("Physical memory hole(s):\n");
240		for (; phys_avail[indx + 1] != 0; indx += 2) {
241			int size = phys_avail[indx + 1] - phys_avail[indx];
242
243			printf("0x%08lx - 0x%08lx, %d bytes (%d pages)\n", phys_avail[indx],
244			    phys_avail[indx + 1] - 1, size, size / PAGE_SIZE);
245		}
246	}
247
248	/*
249	 * Quickly wire in netisrs.
250	 */
251	setup_netisrs(&netisr_set);
252
253	/*
254	 * Allocate space for system data structures.
255	 * The first available kernel virtual address is in "v".
256	 * As pages of kernel virtual memory are allocated, "v" is incremented.
257	 * As pages of memory are allocated and cleared,
258	 * "firstaddr" is incremented.
259	 * An index into the kernel page table corresponding to the
260	 * virtual memory address maintained in "v" is kept in "mapaddr".
261	 */
262
263	/*
264	 * Make two passes.  The first pass calculates how much memory is
265	 * needed and allocates it.  The second pass assigns virtual
266	 * addresses to the various data structures.
267	 */
268	firstaddr = 0;
269again:
270	v = (caddr_t)firstaddr;
271
272#define	valloc(name, type, num) \
273	    (name) = (type *)v; v = (caddr_t)((name)+(num))
274#define	valloclim(name, type, num, lim) \
275	    (name) = (type *)v; v = (caddr_t)((lim) = ((name)+(num)))
276	valloc(callout, struct callout, ncallout);
277#ifdef SYSVSHM
278	valloc(shmsegs, struct shmid_ds, shminfo.shmmni);
279#endif
280#ifdef SYSVSEM
281	valloc(sema, struct semid_ds, seminfo.semmni);
282	valloc(sem, struct sem, seminfo.semmns);
283	/* This is pretty disgusting! */
284	valloc(semu, int, (seminfo.semmnu * seminfo.semusz) / sizeof(int));
285#endif
286#ifdef SYSVMSG
287	valloc(msgpool, char, msginfo.msgmax);
288	valloc(msgmaps, struct msgmap, msginfo.msgseg);
289	valloc(msghdrs, struct msg, msginfo.msgtql);
290	valloc(msqids, struct msqid_ds, msginfo.msgmni);
291#endif
292
293	if (nbuf == 0) {
294		nbuf = 30;
295		if( physmem > 1024)
296			nbuf += min((physmem - 1024) / 8, 2048);
297	}
298	nswbuf = max(min(nbuf/4, 128), 16);
299
300	valloc(swbuf, struct buf, nswbuf);
301	valloc(buf, struct buf, nbuf);
302
303#ifdef BOUNCE_BUFFERS
304	/*
305	 * If there is more than 16MB of memory, allocate some bounce buffers
306	 */
307	if (Maxmem > 4096) {
308		if (bouncepages == 0) {
309			bouncepages = 64;
310			bouncepages += ((Maxmem - 4096) / 2048) * 32;
311			if (bouncepages > 128)
312				bouncepages = 128;
313		}
314		v = (caddr_t)((vm_offset_t)round_page(v));
315		valloc(bouncememory, char, bouncepages * PAGE_SIZE);
316	}
317#endif
318
319	/*
320	 * End of first pass, size has been calculated so allocate memory
321	 */
322	if (firstaddr == 0) {
323		size = (vm_size_t)(v - firstaddr);
324		firstaddr = (int)kmem_alloc(kernel_map, round_page(size));
325		if (firstaddr == 0)
326			panic("startup: no room for tables");
327		goto again;
328	}
329
330	/*
331	 * End of second pass, addresses have been assigned
332	 */
333	if ((vm_size_t)(v - firstaddr) != size)
334		panic("startup: table size inconsistency");
335
336#ifdef BOUNCE_BUFFERS
337	clean_map = kmem_suballoc(kernel_map, &clean_sva, &clean_eva,
338			(nbuf*BKVASIZE) + (nswbuf*MAXPHYS) +
339				maxbkva + pager_map_size, TRUE);
340	io_map = kmem_suballoc(clean_map, &minaddr, &maxaddr, maxbkva, FALSE);
341#else
342	clean_map = kmem_suballoc(kernel_map, &clean_sva, &clean_eva,
343			(nbuf*BKVASIZE) + (nswbuf*MAXPHYS) + pager_map_size, TRUE);
344#endif
345	buffer_map = kmem_suballoc(clean_map, &buffer_sva, &buffer_eva,
346				(nbuf*BKVASIZE), TRUE);
347	pager_map = kmem_suballoc(clean_map, &pager_sva, &pager_eva,
348				(nswbuf*MAXPHYS) + pager_map_size, TRUE);
349	exec_map = kmem_suballoc(kernel_map, &minaddr, &maxaddr,
350				(16*ARG_MAX), TRUE);
351	u_map = kmem_suballoc(kernel_map, &minaddr, &maxaddr,
352				(maxproc*UPAGES*PAGE_SIZE), FALSE);
353
354#if defined(SMP) && defined(SMP_PRIVPAGES)
355	/* Per-cpu pages.. (the story so far is... subject to change)
356	 * ========= For the per-cpu data page ========
357	 * 1 private data page
358	 * 1 PDE	(per-cpu PTD entry page)
359	 * 1 PT		(per-cpu page table page)
360	 * ============ For the idle loop =============
361	 * 2 UPAGEs	(per-cpu idle procs)
362	 * 1 PTD	(for per-cpu equiv of IdlePTD)
363	 * ============================================
364	 * = total of 6 pages per cpu.  The BSP reuses the ones allocated
365	 * by locore.s during boot to remove special cases at runtime.
366	 */
367	ppage_map = kmem_suballoc(kernel_map, &minaddr, &maxaddr,
368				(NCPU*6*PAGE_SIZE), FALSE);
369#endif
370
371	/*
372	 * Finally, allocate mbuf pool.  Since mclrefcnt is an off-size
373	 * we use the more space efficient malloc in place of kmem_alloc.
374	 */
375	{
376		vm_offset_t mb_map_size;
377
378		mb_map_size = nmbufs * MSIZE + nmbclusters * MCLBYTES;
379		mb_map_size = roundup2(mb_map_size, max(MCLBYTES, PAGE_SIZE));
380		mclrefcnt = malloc(mb_map_size / MCLBYTES, M_MBUF, M_NOWAIT);
381		bzero(mclrefcnt, mb_map_size / MCLBYTES);
382		mb_map = kmem_suballoc(kmem_map, (vm_offset_t *)&mbutl, &maxaddr,
383			mb_map_size, FALSE);
384	}
385
386	/*
387	 * Initialize callouts
388	 */
389	callfree = callout;
390	for (i = 1; i < ncallout; i++)
391		callout[i-1].c_next = &callout[i];
392
393#if defined(USERCONFIG)
394#if defined(USERCONFIG_BOOT)
395	if (1) {
396#else
397        if (boothowto & RB_CONFIG) {
398#endif
399		userconfig();
400		cninit();	/* the preferred console may have changed */
401	}
402#endif
403
404#ifdef BOUNCE_BUFFERS
405	/*
406	 * init bounce buffers
407	 */
408	vm_bounce_init();
409#endif
410
411	printf("avail memory = %d (%dK bytes)\n", ptoa(cnt.v_free_count),
412	    ptoa(cnt.v_free_count) / 1024);
413
414	/*
415	 * Set up buffers, so they can be used to read disk labels.
416	 */
417	bufinit();
418	vm_pager_bufferinit();
419}
420
421int
422register_netisr(num, handler)
423	int num;
424	netisr_t *handler;
425{
426
427	if (num < 0 || num >= (sizeof(netisrs)/sizeof(*netisrs)) ) {
428		printf("register_netisr: bad isr number: %d\n", num);
429		return (EINVAL);
430	}
431	netisrs[num] = handler;
432	return (0);
433}
434
435static void
436setup_netisrs(ls)
437	struct linker_set *ls;
438{
439	int i;
440	const struct netisrtab *nit;
441
442	for(i = 0; ls->ls_items[i]; i++) {
443		nit = (const struct netisrtab *)ls->ls_items[i];
444		register_netisr(nit->nit_num, nit->nit_isr);
445	}
446}
447
448/*
449 * Send an interrupt to process.
450 *
451 * Stack is set up to allow sigcode stored
452 * at top to call routine, followed by kcall
453 * to sigreturn routine below.  After sigreturn
454 * resets the signal mask, the stack, and the
455 * frame pointer, it returns to the user
456 * specified pc, psl.
457 */
458void
459sendsig(catcher, sig, mask, code)
460	sig_t catcher;
461	int sig, mask;
462	u_long code;
463{
464	register struct proc *p = curproc;
465	register struct trapframe *regs;
466	register struct sigframe *fp;
467	struct sigframe sf;
468	struct sigacts *psp = p->p_sigacts;
469	int oonstack;
470
471	regs = p->p_md.md_regs;
472        oonstack = psp->ps_sigstk.ss_flags & SS_ONSTACK;
473	/*
474	 * Allocate and validate space for the signal handler context.
475	 */
476        if ((psp->ps_flags & SAS_ALTSTACK) && !oonstack &&
477	    (psp->ps_sigonstack & sigmask(sig))) {
478		fp = (struct sigframe *)(psp->ps_sigstk.ss_sp +
479		    psp->ps_sigstk.ss_size - sizeof(struct sigframe));
480		psp->ps_sigstk.ss_flags |= SS_ONSTACK;
481	} else {
482		fp = (struct sigframe *)regs->tf_esp - 1;
483	}
484
485	/*
486	 * grow() will return FALSE if the fp will not fit inside the stack
487	 *	and the stack can not be grown. useracc will return FALSE
488	 *	if access is denied.
489	 */
490	if ((grow(p, (int)fp) == FALSE) ||
491	    (useracc((caddr_t)fp, sizeof (struct sigframe), B_WRITE) == FALSE)) {
492		/*
493		 * Process has trashed its stack; give it an illegal
494		 * instruction to halt it in its tracks.
495		 */
496		SIGACTION(p, SIGILL) = SIG_DFL;
497		sig = sigmask(SIGILL);
498		p->p_sigignore &= ~sig;
499		p->p_sigcatch &= ~sig;
500		p->p_sigmask &= ~sig;
501		psignal(p, SIGILL);
502		return;
503	}
504
505	/*
506	 * Build the argument list for the signal handler.
507	 */
508	if (p->p_sysent->sv_sigtbl) {
509		if (sig < p->p_sysent->sv_sigsize)
510			sig = p->p_sysent->sv_sigtbl[sig];
511		else
512			sig = p->p_sysent->sv_sigsize + 1;
513	}
514	sf.sf_signum = sig;
515	sf.sf_code = code;
516	sf.sf_scp = &fp->sf_sc;
517	sf.sf_addr = (char *) regs->tf_err;
518	sf.sf_handler = catcher;
519
520	/* save scratch registers */
521	sf.sf_sc.sc_eax = regs->tf_eax;
522	sf.sf_sc.sc_ebx = regs->tf_ebx;
523	sf.sf_sc.sc_ecx = regs->tf_ecx;
524	sf.sf_sc.sc_edx = regs->tf_edx;
525	sf.sf_sc.sc_esi = regs->tf_esi;
526	sf.sf_sc.sc_edi = regs->tf_edi;
527	sf.sf_sc.sc_cs = regs->tf_cs;
528	sf.sf_sc.sc_ds = regs->tf_ds;
529	sf.sf_sc.sc_ss = regs->tf_ss;
530	sf.sf_sc.sc_es = regs->tf_es;
531	sf.sf_sc.sc_isp = regs->tf_isp;
532
533	/*
534	 * Build the signal context to be used by sigreturn.
535	 */
536	sf.sf_sc.sc_onstack = oonstack;
537	sf.sf_sc.sc_mask = mask;
538	sf.sf_sc.sc_sp = regs->tf_esp;
539	sf.sf_sc.sc_fp = regs->tf_ebp;
540	sf.sf_sc.sc_pc = regs->tf_eip;
541	sf.sf_sc.sc_ps = regs->tf_eflags;
542
543	/*
544	 * Copy the sigframe out to the user's stack.
545	 */
546	if (copyout(&sf, fp, sizeof(struct sigframe)) != 0) {
547		/*
548		 * Something is wrong with the stack pointer.
549		 * ...Kill the process.
550		 */
551		sigexit(p, SIGILL);
552	};
553
554	regs->tf_esp = (int)fp;
555	regs->tf_eip = (int)(((char *)PS_STRINGS) - *(p->p_sysent->sv_szsigcode));
556	regs->tf_eflags &= ~PSL_VM;
557	regs->tf_cs = _ucodesel;
558	regs->tf_ds = _udatasel;
559	regs->tf_es = _udatasel;
560	regs->tf_ss = _udatasel;
561}
562
563/*
564 * System call to cleanup state after a signal
565 * has been taken.  Reset signal mask and
566 * stack state from context left by sendsig (above).
567 * Return to previous pc and psl as specified by
568 * context left by sendsig. Check carefully to
569 * make sure that the user has not modified the
570 * state to gain improper privileges.
571 */
572int
573sigreturn(p, uap, retval)
574	struct proc *p;
575	struct sigreturn_args /* {
576		struct sigcontext *sigcntxp;
577	} */ *uap;
578	int *retval;
579{
580	register struct sigcontext *scp;
581	register struct sigframe *fp;
582	register struct trapframe *regs = p->p_md.md_regs;
583	int eflags;
584
585	/*
586	 * (XXX old comment) regs->tf_esp points to the return address.
587	 * The user scp pointer is above that.
588	 * The return address is faked in the signal trampoline code
589	 * for consistency.
590	 */
591	scp = uap->sigcntxp;
592	fp = (struct sigframe *)
593	     ((caddr_t)scp - offsetof(struct sigframe, sf_sc));
594
595	if (useracc((caddr_t)fp, sizeof (*fp), B_WRITE) == 0)
596		return(EFAULT);
597
598	/*
599	 * Don't allow users to change privileged or reserved flags.
600	 */
601#define	EFLAGS_SECURE(ef, oef)	((((ef) ^ (oef)) & ~PSL_USERCHANGE) == 0)
602	eflags = scp->sc_ps;
603	/*
604	 * XXX do allow users to change the privileged flag PSL_RF.  The
605	 * cpu sets PSL_RF in tf_eflags for faults.  Debuggers should
606	 * sometimes set it there too.  tf_eflags is kept in the signal
607	 * context during signal handling and there is no other place
608	 * to remember it, so the PSL_RF bit may be corrupted by the
609	 * signal handler without us knowing.  Corruption of the PSL_RF
610	 * bit at worst causes one more or one less debugger trap, so
611	 * allowing it is fairly harmless.
612	 */
613	if (!EFLAGS_SECURE(eflags & ~PSL_RF, regs->tf_eflags & ~PSL_RF)) {
614#ifdef DEBUG
615    		printf("sigreturn: eflags = 0x%x\n", eflags);
616#endif
617    		return(EINVAL);
618	}
619
620	/*
621	 * Don't allow users to load a valid privileged %cs.  Let the
622	 * hardware check for invalid selectors, excess privilege in
623	 * other selectors, invalid %eip's and invalid %esp's.
624	 */
625#define	CS_SECURE(cs)	(ISPL(cs) == SEL_UPL)
626	if (!CS_SECURE(scp->sc_cs)) {
627#ifdef DEBUG
628    		printf("sigreturn: cs = 0x%x\n", scp->sc_cs);
629#endif
630		trapsignal(p, SIGBUS, T_PROTFLT);
631		return(EINVAL);
632	}
633
634	/* restore scratch registers */
635	regs->tf_eax = scp->sc_eax;
636	regs->tf_ebx = scp->sc_ebx;
637	regs->tf_ecx = scp->sc_ecx;
638	regs->tf_edx = scp->sc_edx;
639	regs->tf_esi = scp->sc_esi;
640	regs->tf_edi = scp->sc_edi;
641	regs->tf_cs = scp->sc_cs;
642	regs->tf_ds = scp->sc_ds;
643	regs->tf_es = scp->sc_es;
644	regs->tf_ss = scp->sc_ss;
645	regs->tf_isp = scp->sc_isp;
646
647	if (useracc((caddr_t)scp, sizeof (*scp), B_WRITE) == 0)
648		return(EINVAL);
649
650	if (scp->sc_onstack & 01)
651		p->p_sigacts->ps_sigstk.ss_flags |= SS_ONSTACK;
652	else
653		p->p_sigacts->ps_sigstk.ss_flags &= ~SS_ONSTACK;
654	p->p_sigmask = scp->sc_mask & ~sigcantmask;
655	regs->tf_ebp = scp->sc_fp;
656	regs->tf_esp = scp->sc_sp;
657	regs->tf_eip = scp->sc_pc;
658	regs->tf_eflags = eflags;
659	return(EJUSTRETURN);
660}
661
662/*
663 * Machine dependent boot() routine
664 *
665 * I haven't seen anything to put here yet
666 * Possibly some stuff might be grafted back here from boot()
667 */
668void
669cpu_boot(int howto)
670{
671}
672
673/*
674 * Shutdown the CPU as much as possible
675 */
676void
677cpu_halt(void)
678{
679	for (;;)
680		__asm__ ("hlt");
681}
682
683/*
684 * Clear registers on exec
685 */
686void
687setregs(p, entry, stack)
688	struct proc *p;
689	u_long entry;
690	u_long stack;
691{
692	struct trapframe *regs = p->p_md.md_regs;
693
694#ifdef USER_LDT
695	struct pcb *pcb = &p->p_addr->u_pcb;
696
697	/* was i386_user_cleanup() in NetBSD */
698	if (pcb->pcb_ldt) {
699		if (pcb == curpcb)
700			lldt(GSEL(GUSERLDT_SEL, SEL_KPL));
701		kmem_free(kernel_map, (vm_offset_t)pcb->pcb_ldt,
702			pcb->pcb_ldt_len * sizeof(union descriptor));
703		pcb->pcb_ldt_len = (int)pcb->pcb_ldt = 0;
704 	}
705#endif
706
707	bzero((char *)regs, sizeof(struct trapframe));
708	regs->tf_eip = entry;
709	regs->tf_esp = stack;
710	regs->tf_eflags = PSL_USER | (regs->tf_eflags & PSL_T);
711	regs->tf_ss = _udatasel;
712	regs->tf_ds = _udatasel;
713	regs->tf_es = _udatasel;
714	regs->tf_cs = _ucodesel;
715
716	/*
717	 * Initialize the math emulator (if any) for the current process.
718	 * Actually, just clear the bit that says that the emulator has
719	 * been initialized.  Initialization is delayed until the process
720	 * traps to the emulator (if it is done at all) mainly because
721	 * emulators don't provide an entry point for initialization.
722	 */
723	p->p_addr->u_pcb.pcb_flags &= ~FP_SOFTFP;
724
725	/*
726	 * Arrange to trap the next npx or `fwait' instruction (see npx.c
727	 * for why fwait must be trapped at least if there is an npx or an
728	 * emulator).  This is mainly to handle the case where npx0 is not
729	 * configured, since the npx routines normally set up the trap
730	 * otherwise.  It should be done only at boot time, but doing it
731	 * here allows modifying `npx_exists' for testing the emulator on
732	 * systems with an npx.
733	 */
734	load_cr0(rcr0() | CR0_MP | CR0_TS);
735
736#if NNPX > 0
737	/* Initialize the npx (if any) for the current process. */
738	npxinit(__INITIAL_NPXCW__);
739#endif
740}
741
742static int
743sysctl_machdep_adjkerntz SYSCTL_HANDLER_ARGS
744{
745	int error;
746	error = sysctl_handle_int(oidp, oidp->oid_arg1, oidp->oid_arg2,
747		req);
748	if (!error && req->newptr)
749		resettodr();
750	return (error);
751}
752
753SYSCTL_PROC(_machdep, CPU_ADJKERNTZ, adjkerntz, CTLTYPE_INT|CTLFLAG_RW,
754	&adjkerntz, 0, sysctl_machdep_adjkerntz, "I", "");
755
756SYSCTL_INT(_machdep, CPU_DISRTCSET, disable_rtc_set,
757	CTLFLAG_RW, &disable_rtc_set, 0, "");
758
759SYSCTL_STRUCT(_machdep, CPU_BOOTINFO, bootinfo,
760	CTLFLAG_RD, &bootinfo, bootinfo, "");
761
762SYSCTL_INT(_machdep, CPU_WALLCLOCK, wall_cmos_clock,
763	CTLFLAG_RW, &wall_cmos_clock, 0, "");
764
765/*
766 * Initialize 386 and configure to run kernel
767 */
768
769/*
770 * Initialize segments & interrupt table
771 */
772
773int currentldt;
774int _default_ldt;
775#ifdef SMP
776union descriptor gdt[NGDT + NCPU];		/* global descriptor table */
777#else
778union descriptor gdt[NGDT];		/* global descriptor table */
779#endif
780struct gate_descriptor idt[NIDT];	/* interrupt descriptor table */
781union descriptor ldt[NLDT];		/* local descriptor table */
782#ifdef SMP
783/* table descriptors - used to load tables by microp */
784struct region_descriptor r_gdt, r_idt;
785#endif
786
787#ifdef SMP
788struct i386tss SMPcommon_tss[NCPU];	/* One tss per cpu */
789struct i386tss *SMPcommon_tss_ptr[NCPU]; /* for the benefit of asmp code */
790#else
791struct i386tss common_tss;
792#endif
793
794static struct i386tss dblfault_tss;
795static char dblfault_stack[PAGE_SIZE];
796
797extern  struct user *proc0paddr;
798
799#ifdef TSS_IS_CACHED			/* cpu_switch helper */
800struct segment_descriptor *tssptr;
801int gsel_tss;
802#endif
803
804/* software prototypes -- in more palatable form */
805struct soft_segment_descriptor gdt_segs[
806#ifdef SMP
807					NGDT + NCPU
808#endif
809						   ] = {
810/* GNULL_SEL	0 Null Descriptor */
811{	0x0,			/* segment base address  */
812	0x0,			/* length */
813	0,			/* segment type */
814	0,			/* segment descriptor priority level */
815	0,			/* segment descriptor present */
816	0, 0,
817	0,			/* default 32 vs 16 bit size */
818	0  			/* limit granularity (byte/page units)*/ },
819/* GCODE_SEL	1 Code Descriptor for kernel */
820{	0x0,			/* segment base address  */
821	0xfffff,		/* length - all address space */
822	SDT_MEMERA,		/* segment type */
823	0,			/* segment descriptor priority level */
824	1,			/* segment descriptor present */
825	0, 0,
826	1,			/* default 32 vs 16 bit size */
827	1  			/* limit granularity (byte/page units)*/ },
828/* GDATA_SEL	2 Data Descriptor for kernel */
829{	0x0,			/* segment base address  */
830	0xfffff,		/* length - all address space */
831	SDT_MEMRWA,		/* segment type */
832	0,			/* segment descriptor priority level */
833	1,			/* segment descriptor present */
834	0, 0,
835	1,			/* default 32 vs 16 bit size */
836	1  			/* limit granularity (byte/page units)*/ },
837/* GLDT_SEL	3 LDT Descriptor */
838{	(int) ldt,		/* segment base address  */
839	sizeof(ldt)-1,		/* length - all address space */
840	SDT_SYSLDT,		/* segment type */
841	SEL_UPL,		/* segment descriptor priority level */
842	1,			/* segment descriptor present */
843	0, 0,
844	0,			/* unused - default 32 vs 16 bit size */
845	0  			/* limit granularity (byte/page units)*/ },
846/* GTGATE_SEL	4 Null Descriptor - Placeholder */
847{	0x0,			/* segment base address  */
848	0x0,			/* length - all address space */
849	0,			/* segment type */
850	0,			/* segment descriptor priority level */
851	0,			/* segment descriptor present */
852	0, 0,
853	0,			/* default 32 vs 16 bit size */
854	0  			/* limit granularity (byte/page units)*/ },
855/* GPANIC_SEL	5 Panic Tss Descriptor */
856{	(int) &dblfault_tss,	/* segment base address  */
857	sizeof(struct i386tss)-1,/* length - all address space */
858	SDT_SYS386TSS,		/* segment type */
859	0,			/* segment descriptor priority level */
860	1,			/* segment descriptor present */
861	0, 0,
862	0,			/* unused - default 32 vs 16 bit size */
863	0  			/* limit granularity (byte/page units)*/ },
864/* GPROC0_SEL	6 Proc 0 Tss Descriptor */
865{
866#ifdef SMP
867	(int) &SMPcommon_tss[0],/* segment base address */
868#else
869	(int) &common_tss,	/* segment base address */
870#endif
871	sizeof(struct i386tss)-1,/* length - all address space */
872	SDT_SYS386TSS,		/* segment type */
873	0,			/* segment descriptor priority level */
874	1,			/* segment descriptor present */
875	0, 0,
876	0,			/* unused - default 32 vs 16 bit size */
877	0  			/* limit granularity (byte/page units)*/ },
878/* GUSERLDT_SEL	7 User LDT Descriptor per process */
879{	(int) ldt,		/* segment base address  */
880	(512 * sizeof(union descriptor)-1),		/* length */
881	SDT_SYSLDT,		/* segment type */
882	0,			/* segment descriptor priority level */
883	1,			/* segment descriptor present */
884	0, 0,
885	0,			/* unused - default 32 vs 16 bit size */
886	0  			/* limit granularity (byte/page units)*/ },
887/* GAPMCODE32_SEL 8 APM BIOS 32-bit interface (32bit Code) */
888{	0,			/* segment base address (overwritten by APM)  */
889	0xfffff,		/* length */
890	SDT_MEMERA,		/* segment type */
891	0,			/* segment descriptor priority level */
892	1,			/* segment descriptor present */
893	0, 0,
894	1,			/* default 32 vs 16 bit size */
895	1  			/* limit granularity (byte/page units)*/ },
896/* GAPMCODE16_SEL 9 APM BIOS 32-bit interface (16bit Code) */
897{	0,			/* segment base address (overwritten by APM)  */
898	0xfffff,		/* length */
899	SDT_MEMERA,		/* segment type */
900	0,			/* segment descriptor priority level */
901	1,			/* segment descriptor present */
902	0, 0,
903	0,			/* default 32 vs 16 bit size */
904	1  			/* limit granularity (byte/page units)*/ },
905/* GAPMDATA_SEL	10 APM BIOS 32-bit interface (Data) */
906{	0,			/* segment base address (overwritten by APM) */
907	0xfffff,		/* length */
908	SDT_MEMRWA,		/* segment type */
909	0,			/* segment descriptor priority level */
910	1,			/* segment descriptor present */
911	0, 0,
912	1,			/* default 32 vs 16 bit size */
913	1  			/* limit granularity (byte/page units)*/ },
914};
915
916static struct soft_segment_descriptor ldt_segs[] = {
917	/* Null Descriptor - overwritten by call gate */
918{	0x0,			/* segment base address  */
919	0x0,			/* length - all address space */
920	0,			/* segment type */
921	0,			/* segment descriptor priority level */
922	0,			/* segment descriptor present */
923	0, 0,
924	0,			/* default 32 vs 16 bit size */
925	0  			/* limit granularity (byte/page units)*/ },
926	/* Null Descriptor - overwritten by call gate */
927{	0x0,			/* segment base address  */
928	0x0,			/* length - all address space */
929	0,			/* segment type */
930	0,			/* segment descriptor priority level */
931	0,			/* segment descriptor present */
932	0, 0,
933	0,			/* default 32 vs 16 bit size */
934	0  			/* limit granularity (byte/page units)*/ },
935	/* Null Descriptor - overwritten by call gate */
936{	0x0,			/* segment base address  */
937	0x0,			/* length - all address space */
938	0,			/* segment type */
939	0,			/* segment descriptor priority level */
940	0,			/* segment descriptor present */
941	0, 0,
942	0,			/* default 32 vs 16 bit size */
943	0  			/* limit granularity (byte/page units)*/ },
944	/* Code Descriptor for user */
945{	0x0,			/* segment base address  */
946	0xfffff,		/* length - all address space */
947	SDT_MEMERA,		/* segment type */
948	SEL_UPL,		/* segment descriptor priority level */
949	1,			/* segment descriptor present */
950	0, 0,
951	1,			/* default 32 vs 16 bit size */
952	1  			/* limit granularity (byte/page units)*/ },
953	/* Data Descriptor for user */
954{	0x0,			/* segment base address  */
955	0xfffff,		/* length - all address space */
956	SDT_MEMRWA,		/* segment type */
957	SEL_UPL,		/* segment descriptor priority level */
958	1,			/* segment descriptor present */
959	0, 0,
960	1,			/* default 32 vs 16 bit size */
961	1  			/* limit granularity (byte/page units)*/ },
962};
963
964void
965setidt(idx, func, typ, dpl, selec)
966	int idx;
967	inthand_t *func;
968	int typ;
969	int dpl;
970	int selec;
971{
972	struct gate_descriptor *ip = idt + idx;
973
974	ip->gd_looffset = (int)func;
975	ip->gd_selector = selec;
976	ip->gd_stkcpy = 0;
977	ip->gd_xx = 0;
978	ip->gd_type = typ;
979	ip->gd_dpl = dpl;
980	ip->gd_p = 1;
981	ip->gd_hioffset = ((int)func)>>16 ;
982}
983
984#define	IDTVEC(name)	__CONCAT(X,name)
985
986extern inthand_t
987	IDTVEC(div), IDTVEC(dbg), IDTVEC(nmi), IDTVEC(bpt), IDTVEC(ofl),
988	IDTVEC(bnd), IDTVEC(ill), IDTVEC(dna), IDTVEC(fpusegm),
989	IDTVEC(tss), IDTVEC(missing), IDTVEC(stk), IDTVEC(prot),
990	IDTVEC(page), IDTVEC(mchk), IDTVEC(rsvd), IDTVEC(fpu), IDTVEC(align),
991	IDTVEC(syscall), IDTVEC(int0x80_syscall);
992
993void
994sdtossd(sd, ssd)
995	struct segment_descriptor *sd;
996	struct soft_segment_descriptor *ssd;
997{
998	ssd->ssd_base  = (sd->sd_hibase << 24) | sd->sd_lobase;
999	ssd->ssd_limit = (sd->sd_hilimit << 16) | sd->sd_lolimit;
1000	ssd->ssd_type  = sd->sd_type;
1001	ssd->ssd_dpl   = sd->sd_dpl;
1002	ssd->ssd_p     = sd->sd_p;
1003	ssd->ssd_def32 = sd->sd_def32;
1004	ssd->ssd_gran  = sd->sd_gran;
1005}
1006
1007void
1008init386(first)
1009	int first;
1010{
1011	int x;
1012	unsigned biosbasemem, biosextmem;
1013	struct gate_descriptor *gdp;
1014#ifndef TSS_IS_CACHED
1015	int gsel_tss;
1016#endif
1017	struct isa_device *idp;
1018#ifndef SMP
1019	/* table descriptors - used to load tables by microp */
1020	struct region_descriptor r_gdt, r_idt;
1021#endif
1022	int	pagesinbase, pagesinext;
1023	int	target_page, pa_indx;
1024	int	off;
1025
1026	proc0.p_addr = proc0paddr;
1027
1028	atdevbase = ISA_HOLE_START + KERNBASE;
1029
1030	/*
1031	 * Fill in the length fields of all linker sets (necessary for ELF).
1032	 */
1033	init_sets();
1034
1035	/*
1036	 * Initialize the console before we print anything out.
1037	 */
1038	cninit();
1039
1040	/*
1041	 * make gdt memory segments, the code segment goes up to end of the
1042	 * page with etext in it, the data segment goes to the end of
1043	 * the address space
1044	 */
1045	/*
1046	 * XXX text protection is temporarily (?) disabled.  The limit was
1047	 * i386_btop(round_page(etext)) - 1.
1048	 */
1049	gdt_segs[GCODE_SEL].ssd_limit = i386_btop(0) - 1;
1050	gdt_segs[GDATA_SEL].ssd_limit = i386_btop(0) - 1;
1051#ifdef BDE_DEBUGGER
1052#define	NGDT1	8		/* avoid overwriting db entries with APM ones */
1053#else
1054#define	NGDT1	(sizeof gdt_segs / sizeof gdt_segs[0])
1055#endif
1056	for (x = 0; x < NGDT1; x++)
1057		ssdtosd(&gdt_segs[x], &gdt[x].sd);
1058
1059#ifdef SMP
1060	/*
1061	 * Oh puke!
1062	 */
1063	for (x = 0; x < NCPU; x++) {
1064		SMPcommon_tss_ptr[x] = &SMPcommon_tss[x];
1065		gdt_segs[NGDT + x] = gdt_segs[GPROC0_SEL];
1066		gdt_segs[NGDT + x].ssd_base = (int) SMPcommon_tss_ptr[x];
1067		ssdtosd(&gdt_segs[NGDT + x], &gdt[NGDT + x].sd);
1068	}
1069#endif
1070
1071	/* make ldt memory segments */
1072	/*
1073	 * The data segment limit must not cover the user area because we
1074	 * don't want the user area to be writable in copyout() etc. (page
1075	 * level protection is lost in kernel mode on 386's).  Also, we
1076	 * don't want the user area to be writable directly (page level
1077	 * protection of the user area is not available on 486's with
1078	 * CR0_WP set, because there is no user-read/kernel-write mode).
1079	 *
1080	 * XXX - VM_MAXUSER_ADDRESS is an end address, not a max.  And it
1081	 * should be spelled ...MAX_USER...
1082	 */
1083#define VM_END_USER_RW_ADDRESS	VM_MAXUSER_ADDRESS
1084	/*
1085	 * The code segment limit has to cover the user area until we move
1086	 * the signal trampoline out of the user area.  This is safe because
1087	 * the code segment cannot be written to directly.
1088	 */
1089#define VM_END_USER_R_ADDRESS	(VM_END_USER_RW_ADDRESS + UPAGES * PAGE_SIZE)
1090	ldt_segs[LUCODE_SEL].ssd_limit = i386_btop(VM_END_USER_R_ADDRESS) - 1;
1091	ldt_segs[LUDATA_SEL].ssd_limit = i386_btop(VM_END_USER_RW_ADDRESS) - 1;
1092	for (x = 0; x < sizeof ldt_segs / sizeof ldt_segs[0]; x++)
1093		ssdtosd(&ldt_segs[x], &ldt[x].sd);
1094
1095	/* exceptions */
1096	for (x = 0; x < NIDT; x++)
1097		setidt(x, &IDTVEC(rsvd), SDT_SYS386TGT, SEL_KPL, GSEL(GCODE_SEL, SEL_KPL));
1098	setidt(0, &IDTVEC(div),  SDT_SYS386TGT, SEL_KPL, GSEL(GCODE_SEL, SEL_KPL));
1099	setidt(1, &IDTVEC(dbg),  SDT_SYS386TGT, SEL_KPL, GSEL(GCODE_SEL, SEL_KPL));
1100	setidt(2, &IDTVEC(nmi),  SDT_SYS386TGT, SEL_KPL, GSEL(GCODE_SEL, SEL_KPL));
1101 	setidt(3, &IDTVEC(bpt),  SDT_SYS386TGT, SEL_UPL, GSEL(GCODE_SEL, SEL_KPL));
1102	setidt(4, &IDTVEC(ofl),  SDT_SYS386TGT, SEL_UPL, GSEL(GCODE_SEL, SEL_KPL));
1103	setidt(5, &IDTVEC(bnd),  SDT_SYS386TGT, SEL_KPL, GSEL(GCODE_SEL, SEL_KPL));
1104	setidt(6, &IDTVEC(ill),  SDT_SYS386TGT, SEL_KPL, GSEL(GCODE_SEL, SEL_KPL));
1105	setidt(7, &IDTVEC(dna),  SDT_SYS386TGT, SEL_KPL, GSEL(GCODE_SEL, SEL_KPL));
1106	setidt(8, 0,  SDT_SYSTASKGT, SEL_KPL, GSEL(GPANIC_SEL, SEL_KPL));
1107	setidt(9, &IDTVEC(fpusegm),  SDT_SYS386TGT, SEL_KPL, GSEL(GCODE_SEL, SEL_KPL));
1108	setidt(10, &IDTVEC(tss),  SDT_SYS386TGT, SEL_KPL, GSEL(GCODE_SEL, SEL_KPL));
1109	setidt(11, &IDTVEC(missing),  SDT_SYS386TGT, SEL_KPL, GSEL(GCODE_SEL, SEL_KPL));
1110	setidt(12, &IDTVEC(stk),  SDT_SYS386TGT, SEL_KPL, GSEL(GCODE_SEL, SEL_KPL));
1111	setidt(13, &IDTVEC(prot),  SDT_SYS386TGT, SEL_KPL, GSEL(GCODE_SEL, SEL_KPL));
1112	setidt(14, &IDTVEC(page),  SDT_SYS386TGT, SEL_KPL, GSEL(GCODE_SEL, SEL_KPL));
1113	setidt(15, &IDTVEC(rsvd),  SDT_SYS386TGT, SEL_KPL, GSEL(GCODE_SEL, SEL_KPL));
1114	setidt(16, &IDTVEC(fpu),  SDT_SYS386TGT, SEL_KPL, GSEL(GCODE_SEL, SEL_KPL));
1115	setidt(17, &IDTVEC(align), SDT_SYS386TGT, SEL_KPL, GSEL(GCODE_SEL, SEL_KPL));
1116	setidt(18, &IDTVEC(mchk),  SDT_SYS386TGT, SEL_KPL, GSEL(GCODE_SEL, SEL_KPL));
1117 	setidt(0x80, &IDTVEC(int0x80_syscall),
1118			SDT_SYS386TGT, SEL_UPL, GSEL(GCODE_SEL, SEL_KPL));
1119
1120#include	"isa.h"
1121#if	NISA >0
1122	isa_defaultirq();
1123#endif
1124	rand_initialize();
1125
1126	r_gdt.rd_limit = sizeof(gdt) - 1;
1127	r_gdt.rd_base =  (int) gdt;
1128	lgdt(&r_gdt);
1129
1130	r_idt.rd_limit = sizeof(idt) - 1;
1131	r_idt.rd_base = (int) idt;
1132	lidt(&r_idt);
1133
1134	_default_ldt = GSEL(GLDT_SEL, SEL_KPL);
1135	lldt(_default_ldt);
1136	currentldt = _default_ldt;
1137
1138#ifdef DDB
1139	kdb_init();
1140	if (boothowto & RB_KDB)
1141		Debugger("Boot flags requested debugger");
1142#endif
1143
1144	finishidentcpu();	/* Final stage of CPU initialization */
1145	setidt(6, &IDTVEC(ill),  SDT_SYS386TGT, SEL_KPL, GSEL(GCODE_SEL, SEL_KPL));
1146	initializecpu();	/* Initialize CPU registers */
1147
1148	/* Use BIOS values stored in RTC CMOS RAM, since probing
1149	 * breaks certain 386 AT relics.
1150	 */
1151	biosbasemem = rtcin(RTC_BASELO)+ (rtcin(RTC_BASEHI)<<8);
1152	biosextmem = rtcin(RTC_EXTLO)+ (rtcin(RTC_EXTHI)<<8);
1153
1154	/*
1155	 * If BIOS tells us that it has more than 640k in the basemem,
1156	 *	don't believe it - set it to 640k.
1157	 */
1158	if (biosbasemem > 640) {
1159		printf("Preposterous RTC basemem of %dK, truncating to 640K\n",
1160		       biosbasemem);
1161		biosbasemem = 640;
1162	}
1163	if (bootinfo.bi_memsizes_valid && bootinfo.bi_basemem > 640) {
1164		printf("Preposterous BIOS basemem of %dK, truncating to 640K\n",
1165		       bootinfo.bi_basemem);
1166		bootinfo.bi_basemem = 640;
1167	}
1168
1169	/*
1170	 * Warn if the official BIOS interface disagrees with the RTC
1171	 * interface used above about the amount of base memory or the
1172	 * amount of extended memory.  Prefer the BIOS value for the base
1173	 * memory.  This is necessary for machines that `steal' base
1174	 * memory for use as BIOS memory, at least if we are going to use
1175	 * the BIOS for apm.  Prefer the RTC value for extended memory.
1176	 * Eventually the hackish interface shouldn't even be looked at.
1177	 */
1178	if (bootinfo.bi_memsizes_valid) {
1179		if (bootinfo.bi_basemem != biosbasemem) {
1180			vm_offset_t pa;
1181
1182			printf(
1183	"BIOS basemem (%ldK) != RTC basemem (%dK), setting to BIOS value\n",
1184			       bootinfo.bi_basemem, biosbasemem);
1185			biosbasemem = bootinfo.bi_basemem;
1186
1187			/*
1188			 * XXX if biosbasemem is now < 640, there is `hole'
1189			 * between the end of base memory and the start of
1190			 * ISA memory.  The hole may be empty or it may
1191			 * contain BIOS code or data.  Map it read/write so
1192			 * that the BIOS can write to it.  (Memory from 0 to
1193			 * the physical end of the kernel is mapped read-only
1194			 * to begin with and then parts of it are remapped.
1195			 * The parts that aren't remapped form holes that
1196			 * remain read-only and are unused by the kernel.
1197			 * The base memory area is below the physical end of
1198			 * the kernel and right now forms a read-only hole.
1199			 * The part of it from 0 to
1200			 * (trunc_page(biosbasemem * 1024) - 1) will be
1201			 * remapped and used by the kernel later.)
1202			 *
1203			 * This code is similar to the code used in
1204			 * pmap_mapdev, but since no memory needs to be
1205			 * allocated we simply change the mapping.
1206			 */
1207			for (pa = trunc_page(biosbasemem * 1024);
1208			     pa < ISA_HOLE_START; pa += PAGE_SIZE) {
1209				unsigned *pte;
1210
1211				pte = (unsigned *)vtopte(pa + KERNBASE);
1212				*pte = pa | PG_RW | PG_V;
1213			}
1214		}
1215		if (bootinfo.bi_extmem != biosextmem)
1216			printf("BIOS extmem (%ldK) != RTC extmem (%dK)\n",
1217			       bootinfo.bi_extmem, biosextmem);
1218	}
1219
1220#ifdef SMP
1221	/* make hole for AP bootstrap code */
1222	pagesinbase = mp_bootaddress(biosbasemem) / PAGE_SIZE;
1223#else
1224	pagesinbase = biosbasemem * 1024 / PAGE_SIZE;
1225#endif
1226
1227	pagesinext = biosextmem * 1024 / PAGE_SIZE;
1228
1229	/*
1230	 * Special hack for chipsets that still remap the 384k hole when
1231	 *	there's 16MB of memory - this really confuses people that
1232	 *	are trying to use bus mastering ISA controllers with the
1233	 *	"16MB limit"; they only have 16MB, but the remapping puts
1234	 *	them beyond the limit.
1235	 */
1236	/*
1237	 * If extended memory is between 15-16MB (16-17MB phys address range),
1238	 *	chop it to 15MB.
1239	 */
1240	if ((pagesinext > 3840) && (pagesinext < 4096))
1241		pagesinext = 3840;
1242
1243	/*
1244	 * Maxmem isn't the "maximum memory", it's one larger than the
1245	 * highest page of the physical address space.  It should be
1246	 * called something like "Maxphyspage".
1247	 */
1248	Maxmem = pagesinext + 0x100000/PAGE_SIZE;
1249
1250#ifdef MAXMEM
1251	Maxmem = MAXMEM/4;
1252#endif
1253
1254#if NNPX > 0
1255	idp = find_isadev(isa_devtab_null, &npxdriver, 0);
1256	if (idp != NULL && idp->id_msize != 0)
1257		Maxmem = idp->id_msize / 4;
1258#endif
1259
1260	/* call pmap initialization to make new kernel address space */
1261	pmap_bootstrap (first, 0);
1262
1263#ifdef SMP
1264	/* fire up the APs and APICs */
1265	mp_start();
1266#endif
1267
1268	/*
1269	 * Size up each available chunk of physical memory.
1270	 */
1271
1272	/*
1273	 * We currently don't bother testing base memory.
1274	 * XXX  ...but we probably should.
1275	 */
1276	pa_indx = 0;
1277	badpages = 0;
1278	if (pagesinbase > 1) {
1279		phys_avail[pa_indx++] = PAGE_SIZE;	/* skip first page of memory */
1280		phys_avail[pa_indx] = ptoa(pagesinbase);/* memory up to the ISA hole */
1281		physmem = pagesinbase - 1;
1282	} else {
1283		/* point at first chunk end */
1284		pa_indx++;
1285	}
1286
1287	for (target_page = avail_start; target_page < ptoa(Maxmem); target_page += PAGE_SIZE) {
1288		int tmp, page_bad = FALSE;
1289
1290		/*
1291		 * map page into kernel: valid, read/write, non-cacheable
1292		 */
1293		*(int *)CMAP1 = PG_V | PG_RW | PG_N | target_page;
1294		invltlb();
1295
1296		tmp = *(int *)CADDR1;
1297		/*
1298		 * Test for alternating 1's and 0's
1299		 */
1300		*(volatile int *)CADDR1 = 0xaaaaaaaa;
1301		if (*(volatile int *)CADDR1 != 0xaaaaaaaa) {
1302			page_bad = TRUE;
1303		}
1304		/*
1305		 * Test for alternating 0's and 1's
1306		 */
1307		*(volatile int *)CADDR1 = 0x55555555;
1308		if (*(volatile int *)CADDR1 != 0x55555555) {
1309			page_bad = TRUE;
1310		}
1311		/*
1312		 * Test for all 1's
1313		 */
1314		*(volatile int *)CADDR1 = 0xffffffff;
1315		if (*(volatile int *)CADDR1 != 0xffffffff) {
1316			page_bad = TRUE;
1317		}
1318		/*
1319		 * Test for all 0's
1320		 */
1321		*(volatile int *)CADDR1 = 0x0;
1322		if (*(volatile int *)CADDR1 != 0x0) {
1323			/*
1324			 * test of page failed
1325			 */
1326			page_bad = TRUE;
1327		}
1328		/*
1329		 * Restore original value.
1330		 */
1331		*(int *)CADDR1 = tmp;
1332
1333		/*
1334		 * Adjust array of valid/good pages.
1335		 */
1336		if (page_bad == FALSE) {
1337			/*
1338			 * If this good page is a continuation of the
1339			 * previous set of good pages, then just increase
1340			 * the end pointer. Otherwise start a new chunk.
1341			 * Note that "end" points one higher than end,
1342			 * making the range >= start and < end.
1343			 */
1344			if (phys_avail[pa_indx] == target_page) {
1345				phys_avail[pa_indx] += PAGE_SIZE;
1346			} else {
1347				pa_indx++;
1348				if (pa_indx == PHYS_AVAIL_ARRAY_END) {
1349					printf("Too many holes in the physical address space, giving up\n");
1350					pa_indx--;
1351					break;
1352				}
1353				phys_avail[pa_indx++] = target_page;	/* start */
1354				phys_avail[pa_indx] = target_page + PAGE_SIZE;	/* end */
1355			}
1356			physmem++;
1357		} else {
1358			badpages++;
1359			page_bad = FALSE;
1360		}
1361	}
1362
1363	*(int *)CMAP1 = 0;
1364	invltlb();
1365
1366	/*
1367	 * XXX
1368	 * The last chunk must contain at least one page plus the message
1369	 * buffer to avoid complicating other code (message buffer address
1370	 * calculation, etc.).
1371	 */
1372	while (phys_avail[pa_indx - 1] + PAGE_SIZE +
1373	    round_page(sizeof(struct msgbuf)) >= phys_avail[pa_indx]) {
1374		physmem -= atop(phys_avail[pa_indx] - phys_avail[pa_indx - 1]);
1375		phys_avail[pa_indx--] = 0;
1376		phys_avail[pa_indx--] = 0;
1377	}
1378
1379	Maxmem = atop(phys_avail[pa_indx]);
1380
1381	/* Trim off space for the message buffer. */
1382	phys_avail[pa_indx] -= round_page(sizeof(struct msgbuf));
1383
1384	avail_end = phys_avail[pa_indx];
1385
1386	/* now running on new page tables, configured,and u/iom is accessible */
1387
1388	/* Map the message buffer. */
1389	for (off = 0; off < round_page(sizeof(struct msgbuf)); off += PAGE_SIZE)
1390		pmap_enter(kernel_pmap, (vm_offset_t)msgbufp + off,
1391			   avail_end + off, VM_PROT_ALL, TRUE);
1392	msgbufmapped = 1;
1393
1394#ifdef SMP
1395	for(x = 0; x < NCPU; x++) {
1396	/* make an initial tss so cpu can get interrupt stack on syscall! */
1397		SMPcommon_tss[x].tss_esp0 = (int) proc0.p_addr + UPAGES*PAGE_SIZE;
1398		SMPcommon_tss[x].tss_ss0 = GSEL(GDATA_SEL, SEL_KPL) ;
1399		SMPcommon_tss[x].tss_ioopt = (sizeof SMPcommon_tss[x]) << 16;
1400	}
1401	gsel_tss = GSEL(NGDT + cpunumber(), SEL_KPL);
1402	ltr(gsel_tss);
1403#else
1404	/* make an initial tss so cpu can get interrupt stack on syscall! */
1405	common_tss.tss_esp0 = (int) proc0.p_addr + UPAGES*PAGE_SIZE;
1406	common_tss.tss_ss0 = GSEL(GDATA_SEL, SEL_KPL) ;
1407	common_tss.tss_ioopt = (sizeof common_tss) << 16;
1408	gsel_tss = GSEL(GPROC0_SEL, SEL_KPL);
1409	ltr(gsel_tss);
1410#endif
1411
1412	dblfault_tss.tss_esp = dblfault_tss.tss_esp0 = dblfault_tss.tss_esp1 =
1413	    dblfault_tss.tss_esp2 = (int) &dblfault_stack[sizeof(dblfault_stack)];
1414	dblfault_tss.tss_ss = dblfault_tss.tss_ss0 = dblfault_tss.tss_ss1 =
1415	    dblfault_tss.tss_ss2 = GSEL(GDATA_SEL, SEL_KPL);
1416	dblfault_tss.tss_cr3 = IdlePTD;
1417	dblfault_tss.tss_eip = (int) dblfault_handler;
1418	dblfault_tss.tss_eflags = PSL_KERNEL;
1419	dblfault_tss.tss_ds = dblfault_tss.tss_es = dblfault_tss.tss_fs =
1420	    dblfault_tss.tss_gs = GSEL(GDATA_SEL, SEL_KPL);
1421	dblfault_tss.tss_cs = GSEL(GCODE_SEL, SEL_KPL);
1422	dblfault_tss.tss_ldt = GSEL(GLDT_SEL, SEL_KPL);
1423
1424#ifdef TSS_IS_CACHED			/* cpu_switch helper */
1425	tssptr = &gdt[GPROC0_SEL].sd;
1426#endif
1427
1428	/* make a call gate to reenter kernel with */
1429	gdp = &ldt[LSYS5CALLS_SEL].gd;
1430
1431	x = (int) &IDTVEC(syscall);
1432	gdp->gd_looffset = x++;
1433	gdp->gd_selector = GSEL(GCODE_SEL,SEL_KPL);
1434	gdp->gd_stkcpy = 1;
1435	gdp->gd_type = SDT_SYS386CGT;
1436	gdp->gd_dpl = SEL_UPL;
1437	gdp->gd_p = 1;
1438	gdp->gd_hioffset = ((int) &IDTVEC(syscall)) >>16;
1439
1440	/* XXX does this work? */
1441	ldt[LBSDICALLS_SEL] = ldt[LSYS5CALLS_SEL];
1442
1443	/* transfer to user mode */
1444
1445	_ucodesel = LSEL(LUCODE_SEL, SEL_UPL);
1446	_udatasel = LSEL(LUDATA_SEL, SEL_UPL);
1447
1448	/* setup proc 0's pcb */
1449	proc0.p_addr->u_pcb.pcb_flags = 0;
1450	proc0.p_addr->u_pcb.pcb_cr3 = IdlePTD;
1451	proc0.p_addr->u_pcb.pcb_mpnest = 1;
1452}
1453
1454int
1455ptrace_set_pc(p, addr)
1456	struct proc *p;
1457	unsigned int addr;
1458{
1459	p->p_md.md_regs->tf_eip = addr;
1460	return (0);
1461}
1462
1463int
1464ptrace_single_step(p)
1465	struct proc *p;
1466{
1467	p->p_md.md_regs->tf_eflags |= PSL_T;
1468	return (0);
1469}
1470
1471int ptrace_write_u(p, off, data)
1472	struct proc *p;
1473	vm_offset_t off;
1474	int data;
1475{
1476	struct trapframe frame_copy;
1477	vm_offset_t min;
1478	struct trapframe *tp;
1479
1480	/*
1481	 * Privileged kernel state is scattered all over the user area.
1482	 * Only allow write access to parts of regs and to fpregs.
1483	 */
1484	min = (char *)p->p_md.md_regs - (char *)p->p_addr;
1485	if (off >= min && off <= min + sizeof(struct trapframe) - sizeof(int)) {
1486		tp = p->p_md.md_regs;
1487		frame_copy = *tp;
1488		*(int *)((char *)&frame_copy + (off - min)) = data;
1489		if (!EFLAGS_SECURE(frame_copy.tf_eflags, tp->tf_eflags) ||
1490		    !CS_SECURE(frame_copy.tf_cs))
1491			return (EINVAL);
1492		*(int*)((char *)p->p_addr + off) = data;
1493		return (0);
1494	}
1495	min = offsetof(struct user, u_pcb) + offsetof(struct pcb, pcb_savefpu);
1496	if (off >= min && off <= min + sizeof(struct save87) - sizeof(int)) {
1497		*(int*)((char *)p->p_addr + off) = data;
1498		return (0);
1499	}
1500	return (EFAULT);
1501}
1502
1503int
1504fill_regs(p, regs)
1505	struct proc *p;
1506	struct reg *regs;
1507{
1508	struct trapframe *tp;
1509
1510	tp = p->p_md.md_regs;
1511	regs->r_es = tp->tf_es;
1512	regs->r_ds = tp->tf_ds;
1513	regs->r_edi = tp->tf_edi;
1514	regs->r_esi = tp->tf_esi;
1515	regs->r_ebp = tp->tf_ebp;
1516	regs->r_ebx = tp->tf_ebx;
1517	regs->r_edx = tp->tf_edx;
1518	regs->r_ecx = tp->tf_ecx;
1519	regs->r_eax = tp->tf_eax;
1520	regs->r_eip = tp->tf_eip;
1521	regs->r_cs = tp->tf_cs;
1522	regs->r_eflags = tp->tf_eflags;
1523	regs->r_esp = tp->tf_esp;
1524	regs->r_ss = tp->tf_ss;
1525	return (0);
1526}
1527
1528int
1529set_regs(p, regs)
1530	struct proc *p;
1531	struct reg *regs;
1532{
1533	struct trapframe *tp;
1534
1535	tp = p->p_md.md_regs;
1536	if (!EFLAGS_SECURE(regs->r_eflags, tp->tf_eflags) ||
1537	    !CS_SECURE(regs->r_cs))
1538		return (EINVAL);
1539	tp->tf_es = regs->r_es;
1540	tp->tf_ds = regs->r_ds;
1541	tp->tf_edi = regs->r_edi;
1542	tp->tf_esi = regs->r_esi;
1543	tp->tf_ebp = regs->r_ebp;
1544	tp->tf_ebx = regs->r_ebx;
1545	tp->tf_edx = regs->r_edx;
1546	tp->tf_ecx = regs->r_ecx;
1547	tp->tf_eax = regs->r_eax;
1548	tp->tf_eip = regs->r_eip;
1549	tp->tf_cs = regs->r_cs;
1550	tp->tf_eflags = regs->r_eflags;
1551	tp->tf_esp = regs->r_esp;
1552	tp->tf_ss = regs->r_ss;
1553	return (0);
1554}
1555
1556#ifndef DDB
1557void
1558Debugger(const char *msg)
1559{
1560	printf("Debugger(\"%s\") called.\n", msg);
1561}
1562#endif /* no DDB */
1563
1564#include <sys/disklabel.h>
1565
1566/*
1567 * Determine the size of the transfer, and make sure it is
1568 * within the boundaries of the partition. Adjust transfer
1569 * if needed, and signal errors or early completion.
1570 */
1571int
1572bounds_check_with_label(struct buf *bp, struct disklabel *lp, int wlabel)
1573{
1574        struct partition *p = lp->d_partitions + dkpart(bp->b_dev);
1575        int labelsect = lp->d_partitions[0].p_offset;
1576        int maxsz = p->p_size,
1577                sz = (bp->b_bcount + DEV_BSIZE - 1) >> DEV_BSHIFT;
1578
1579        /* overwriting disk label ? */
1580        /* XXX should also protect bootstrap in first 8K */
1581        if (bp->b_blkno + p->p_offset <= LABELSECTOR + labelsect &&
1582#if LABELSECTOR != 0
1583            bp->b_blkno + p->p_offset + sz > LABELSECTOR + labelsect &&
1584#endif
1585            (bp->b_flags & B_READ) == 0 && wlabel == 0) {
1586                bp->b_error = EROFS;
1587                goto bad;
1588        }
1589
1590#if     defined(DOSBBSECTOR) && defined(notyet)
1591        /* overwriting master boot record? */
1592        if (bp->b_blkno + p->p_offset <= DOSBBSECTOR &&
1593            (bp->b_flags & B_READ) == 0 && wlabel == 0) {
1594                bp->b_error = EROFS;
1595                goto bad;
1596        }
1597#endif
1598
1599        /* beyond partition? */
1600        if (bp->b_blkno < 0 || bp->b_blkno + sz > maxsz) {
1601                /* if exactly at end of disk, return an EOF */
1602                if (bp->b_blkno == maxsz) {
1603                        bp->b_resid = bp->b_bcount;
1604                        return(0);
1605                }
1606                /* or truncate if part of it fits */
1607                sz = maxsz - bp->b_blkno;
1608                if (sz <= 0) {
1609                        bp->b_error = EINVAL;
1610                        goto bad;
1611                }
1612                bp->b_bcount = sz << DEV_BSHIFT;
1613        }
1614
1615        bp->b_pblkno = bp->b_blkno + p->p_offset;
1616        return(1);
1617
1618bad:
1619        bp->b_flags |= B_ERROR;
1620        return(-1);
1621}
1622
1623#ifdef DDB
1624
1625/*
1626 * Provide inb() and outb() as functions.  They are normally only
1627 * available as macros calling inlined functions, thus cannot be
1628 * called inside DDB.
1629 *
1630 * The actual code is stolen from <machine/cpufunc.h>, and de-inlined.
1631 */
1632
1633#undef inb
1634#undef outb
1635
1636/* silence compiler warnings */
1637u_char inb(u_int);
1638void outb(u_int, u_char);
1639
1640u_char
1641inb(u_int port)
1642{
1643	u_char	data;
1644	/*
1645	 * We use %%dx and not %1 here because i/o is done at %dx and not at
1646	 * %edx, while gcc generates inferior code (movw instead of movl)
1647	 * if we tell it to load (u_short) port.
1648	 */
1649	__asm __volatile("inb %%dx,%0" : "=a" (data) : "d" (port));
1650	return (data);
1651}
1652
1653void
1654outb(u_int port, u_char data)
1655{
1656	u_char	al;
1657	/*
1658	 * Use an unnecessary assignment to help gcc's register allocator.
1659	 * This make a large difference for gcc-1.40 and a tiny difference
1660	 * for gcc-2.6.0.  For gcc-1.40, al had to be ``asm("ax")'' for
1661	 * best results.  gcc-2.6.0 can't handle this.
1662	 */
1663	al = data;
1664	__asm __volatile("outb %0,%%dx" : : "a" (al), "d" (port));
1665}
1666
1667#endif /* DDB */
1668