machdep.c revision 12701
1/*-
2 * Copyright (c) 1992 Terrence R. Lambert.
3 * Copyright (c) 1982, 1987, 1990 The Regents of the University of California.
4 * All rights reserved.
5 *
6 * This code is derived from software contributed to Berkeley by
7 * William Jolitz.
8 *
9 * Redistribution and use in source and binary forms, with or without
10 * modification, are permitted provided that the following conditions
11 * are met:
12 * 1. Redistributions of source code must retain the above copyright
13 *    notice, this list of conditions and the following disclaimer.
14 * 2. Redistributions in binary form must reproduce the above copyright
15 *    notice, this list of conditions and the following disclaimer in the
16 *    documentation and/or other materials provided with the distribution.
17 * 3. All advertising materials mentioning features or use of this software
18 *    must display the following acknowledgement:
19 *	This product includes software developed by the University of
20 *	California, Berkeley and its contributors.
21 * 4. Neither the name of the University nor the names of its contributors
22 *    may be used to endorse or promote products derived from this software
23 *    without specific prior written permission.
24 *
25 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
26 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
27 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
28 * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
29 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
30 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
31 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
32 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
33 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
34 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
35 * SUCH DAMAGE.
36 *
37 *	from: @(#)machdep.c	7.4 (Berkeley) 6/3/91
38 *	$Id: machdep.c,v 1.155 1995/12/07 12:45:32 davidg Exp $
39 */
40
41#include "npx.h"
42#include "isa.h"
43
44#include <sys/param.h>
45#include <sys/systm.h>
46#include <sys/sysproto.h>
47#include <sys/signalvar.h>
48#include <sys/kernel.h>
49#include <sys/proc.h>
50#include <sys/buf.h>
51#include <sys/reboot.h>
52#include <sys/conf.h>
53#include <sys/file.h>
54#include <sys/callout.h>
55#include <sys/malloc.h>
56#include <sys/mbuf.h>
57#include <sys/mount.h>
58#include <sys/msgbuf.h>
59#include <sys/ioctl.h>
60#include <sys/sysent.h>
61#include <sys/tty.h>
62#include <sys/sysctl.h>
63#include <sys/devconf.h>
64#include <sys/vmmeter.h>
65
66#ifdef SYSVSHM
67#include <sys/shm.h>
68#endif
69
70#ifdef SYSVMSG
71#include <sys/msg.h>
72#endif
73
74#ifdef SYSVSEM
75#include <sys/sem.h>
76#endif
77
78#include <vm/vm.h>
79#include <vm/vm_param.h>
80#include <vm/vm_prot.h>
81#include <vm/lock.h>
82#include <vm/vm_kern.h>
83#include <vm/vm_object.h>
84#include <vm/vm_page.h>
85#include <vm/vm_map.h>
86#include <vm/vm_pager.h>
87#include <vm/vm_extern.h>
88
89#include <sys/user.h>
90#include <sys/exec.h>
91#include <sys/vnode.h>
92
93#include <ddb/ddb.h>
94
95#include <net/netisr.h>
96
97/* XXX correctly declaring all the netisr's is painful. */
98#include <net/if.h>
99#include <net/route.h>
100
101#include <netinet/in.h>
102#include <netinet/in_systm.h>
103#include <netinet/ip.h>
104#include <netinet/if_ether.h>
105#include <netinet/ip_var.h>
106
107#include <netns/ns.h>
108#include <netns/ns_if.h>
109
110#include <netiso/iso.h>
111#include <netiso/iso_var.h>
112
113#include <netccitt/dll.h>
114#include <netccitt/x25.h>
115#include <netccitt/pk.h>
116#include <sys/socketvar.h>
117#include <netccitt/pk_var.h>
118
119#include "ether.h"
120
121#include <machine/cpu.h>
122#include <machine/npx.h>
123#include <machine/reg.h>
124#include <machine/psl.h>
125#include <machine/clock.h>
126#include <machine/specialreg.h>
127#include <machine/sysarch.h>
128#include <machine/cons.h>
129#include <machine/devconf.h>
130#include <machine/bootinfo.h>
131#include <machine/md_var.h>
132
133#include <i386/isa/isa.h>
134#include <i386/isa/isa_device.h>
135#include <i386/isa/rtc.h>
136#include <machine/random.h>
137
138extern void diediedie __P((void));
139extern void init386 __P((int first));
140extern int ptrace_set_pc __P((struct proc *p, unsigned int addr));
141extern int ptrace_single_step __P((struct proc *p));
142extern int ptrace_getregs __P((struct proc *p, unsigned int *addr));
143extern int ptrace_setregs __P((struct proc *p, unsigned int *addr));
144extern int ptrace_write_u __P((struct proc *p, vm_offset_t off, int data));
145
146static void cpu_startup __P((void *));
147SYSINIT(cpu, SI_SUB_CPU, SI_ORDER_FIRST, cpu_startup, NULL)
148
149static void identifycpu(void);
150
151char machine[] = "i386";
152SYSCTL_STRING(_hw, HW_MACHINE, machine, CTLFLAG_RD, machine, 0, "");
153
154char cpu_model[128];
155SYSCTL_STRING(_hw, HW_MODEL, model, CTLFLAG_RD, cpu_model, 0, "");
156
157struct kern_devconf kdc_cpu0 = {
158	0, 0, 0,		/* filled in by dev_attach */
159	"cpu", 0, { MDDT_CPU },
160	0, 0, 0, CPU_EXTERNALLEN,
161	0,			/* CPU has no parent */
162	0,			/* no parentdata */
163	DC_BUSY,		/* the CPU is always busy */
164	cpu_model,		/* no sense in duplication */
165	DC_CLS_CPU		/* class */
166};
167
168#ifndef PANIC_REBOOT_WAIT_TIME
169#define PANIC_REBOOT_WAIT_TIME 15 /* default to 15 seconds */
170#endif
171
172#ifdef BOUNCE_BUFFERS
173extern char *bouncememory;
174extern int maxbkva;
175#ifdef BOUNCEPAGES
176int	bouncepages = BOUNCEPAGES;
177#else
178int	bouncepages = 0;
179#endif
180#endif	/* BOUNCE_BUFFERS */
181
182extern int freebufspace;
183int	msgbufmapped = 0;		/* set when safe to use msgbuf */
184int _udatasel, _ucodesel;
185
186
187int physmem = 0;
188
189static int
190sysctl_hw_physmem SYSCTL_HANDLER_ARGS
191{
192	int error = sysctl_handle_int(oidp, 0, ctob(physmem), req);
193	return (error);
194}
195
196SYSCTL_PROC(_hw, HW_PHYSMEM, physmem, CTLTYPE_INT|CTLFLAG_RD,
197	0, 0, sysctl_hw_physmem, "I", "");
198
199static int
200sysctl_hw_usermem SYSCTL_HANDLER_ARGS
201{
202	int error = sysctl_handle_int(oidp, 0,
203		ctob(physmem - cnt.v_wire_count), req);
204	return (error);
205}
206
207SYSCTL_PROC(_hw, HW_USERMEM, usermem, CTLTYPE_INT|CTLFLAG_RD,
208	0, 0, sysctl_hw_usermem, "I", "");
209
210int boothowto = 0, bootverbose = 0, Maxmem = 0, badpages = 0;
211long dumplo;
212extern int bootdev;
213int biosmem;
214
215vm_offset_t phys_avail[10];
216
217/* must be 2 less so 0 0 can signal end of chunks */
218#define PHYS_AVAIL_ARRAY_END ((sizeof(phys_avail) / sizeof(vm_offset_t)) - 2)
219
220int cpu_class;
221
222void dumpsys __P((void));
223void setup_netisrs __P((struct linker_set *)); /* XXX declare elsewhere */
224
225vm_offset_t buffer_sva, buffer_eva;
226vm_offset_t clean_sva, clean_eva;
227vm_offset_t pager_sva, pager_eva;
228extern struct linker_set netisr_set;
229
230#define offsetof(type, member)	((size_t)(&((type *)0)->member))
231
232static void
233cpu_startup(dummy)
234	void *dummy;
235{
236	register unsigned i;
237	register caddr_t v;
238	vm_offset_t maxaddr;
239	vm_size_t size = 0;
240	int firstaddr;
241	vm_offset_t minaddr;
242
243	if (boothowto & RB_VERBOSE)
244		bootverbose++;
245
246	/*
247	 * Initialize error message buffer (at end of core).
248	 */
249
250	/* avail_end was pre-decremented in init_386() to compensate */
251	for (i = 0; i < btoc(sizeof (struct msgbuf)); i++)
252		pmap_enter(pmap_kernel(), (vm_offset_t)msgbufp,
253			   avail_end + i * NBPG,
254			   VM_PROT_ALL, TRUE);
255	msgbufmapped = 1;
256
257	/*
258	 * Good {morning,afternoon,evening,night}.
259	 */
260	printf(version);
261	startrtclock();
262	identifycpu();
263	printf("real memory  = %d (%dK bytes)\n", ptoa(Maxmem), ptoa(Maxmem) / 1024);
264	/*
265	 * Display any holes after the first chunk of extended memory.
266	 */
267	if (badpages != 0) {
268		int indx = 1;
269
270		/*
271		 * XXX skip reporting ISA hole & unmanaged kernel memory
272		 */
273		if (phys_avail[0] == PAGE_SIZE)
274			indx += 2;
275
276		printf("Physical memory hole(s):\n");
277		for (; phys_avail[indx + 1] != 0; indx += 2) {
278			int size = phys_avail[indx + 1] - phys_avail[indx];
279
280			printf("0x%08lx - 0x%08lx, %d bytes (%d pages)\n", phys_avail[indx],
281			    phys_avail[indx + 1] - 1, size, size / PAGE_SIZE);
282		}
283	}
284
285	/*
286	 * Quickly wire in netisrs.
287	 */
288	setup_netisrs(&netisr_set);
289
290/*
291#ifdef ISDN
292	DONET(isdnintr, NETISR_ISDN);
293#endif
294*/
295
296	/*
297	 * Allocate space for system data structures.
298	 * The first available kernel virtual address is in "v".
299	 * As pages of kernel virtual memory are allocated, "v" is incremented.
300	 * As pages of memory are allocated and cleared,
301	 * "firstaddr" is incremented.
302	 * An index into the kernel page table corresponding to the
303	 * virtual memory address maintained in "v" is kept in "mapaddr".
304	 */
305
306	/*
307	 * Make two passes.  The first pass calculates how much memory is
308	 * needed and allocates it.  The second pass assigns virtual
309	 * addresses to the various data structures.
310	 */
311	firstaddr = 0;
312again:
313	v = (caddr_t)firstaddr;
314
315#define	valloc(name, type, num) \
316	    (name) = (type *)v; v = (caddr_t)((name)+(num))
317#define	valloclim(name, type, num, lim) \
318	    (name) = (type *)v; v = (caddr_t)((lim) = ((name)+(num)))
319	valloc(callout, struct callout, ncallout);
320#ifdef SYSVSHM
321	valloc(shmsegs, struct shmid_ds, shminfo.shmmni);
322#endif
323#ifdef SYSVSEM
324	valloc(sema, struct semid_ds, seminfo.semmni);
325	valloc(sem, struct sem, seminfo.semmns);
326	/* This is pretty disgusting! */
327	valloc(semu, int, (seminfo.semmnu * seminfo.semusz) / sizeof(int));
328#endif
329#ifdef SYSVMSG
330	valloc(msgpool, char, msginfo.msgmax);
331	valloc(msgmaps, struct msgmap, msginfo.msgseg);
332	valloc(msghdrs, struct msg, msginfo.msgtql);
333	valloc(msqids, struct msqid_ds, msginfo.msgmni);
334#endif
335
336	if (nbuf == 0) {
337		nbuf = 30;
338		if( physmem > 1024)
339			nbuf += min((physmem - 1024) / 12, 1024);
340	}
341	nswbuf = min(nbuf, 128);
342
343	valloc(swbuf, struct buf, nswbuf);
344	valloc(buf, struct buf, nbuf);
345
346#ifdef BOUNCE_BUFFERS
347	/*
348	 * If there is more than 16MB of memory, allocate some bounce buffers
349	 */
350	if (Maxmem > 4096) {
351		if (bouncepages == 0) {
352			bouncepages = 64;
353			bouncepages += ((Maxmem - 4096) / 2048) * 32;
354		}
355		v = (caddr_t)((vm_offset_t)((vm_offset_t)v + PAGE_SIZE - 1) & ~(PAGE_SIZE - 1));
356		valloc(bouncememory, char, bouncepages * PAGE_SIZE);
357	}
358#endif
359
360	/*
361	 * End of first pass, size has been calculated so allocate memory
362	 */
363	if (firstaddr == 0) {
364		size = (vm_size_t)(v - firstaddr);
365		firstaddr = (int)kmem_alloc(kernel_map, round_page(size));
366		if (firstaddr == 0)
367			panic("startup: no room for tables");
368		goto again;
369	}
370
371	/*
372	 * End of second pass, addresses have been assigned
373	 */
374	if ((vm_size_t)(v - firstaddr) != size)
375		panic("startup: table size inconsistency");
376
377#ifdef BOUNCE_BUFFERS
378	clean_map = kmem_suballoc(kernel_map, &clean_sva, &clean_eva,
379			(nbuf*MAXBSIZE) + (nswbuf*MAXPHYS) +
380				maxbkva + pager_map_size, TRUE);
381	io_map = kmem_suballoc(clean_map, &minaddr, &maxaddr, maxbkva, FALSE);
382#else
383	clean_map = kmem_suballoc(kernel_map, &clean_sva, &clean_eva,
384			(nbuf*MAXBSIZE) + (nswbuf*MAXPHYS) + pager_map_size, TRUE);
385#endif
386	buffer_map = kmem_suballoc(clean_map, &buffer_sva, &buffer_eva,
387				(nbuf*MAXBSIZE), TRUE);
388	pager_map = kmem_suballoc(clean_map, &pager_sva, &pager_eva,
389				(nswbuf*MAXPHYS) + pager_map_size, TRUE);
390	exec_map = kmem_suballoc(kernel_map, &minaddr, &maxaddr,
391				(16*ARG_MAX), TRUE);
392	u_map = kmem_suballoc(kernel_map, &minaddr, &maxaddr,
393				(maxproc*UPAGES*PAGE_SIZE), FALSE);
394
395	/*
396	 * Finally, allocate mbuf pool.  Since mclrefcnt is an off-size
397	 * we use the more space efficient malloc in place of kmem_alloc.
398	 */
399	mclrefcnt = (char *)malloc(nmbclusters+CLBYTES/MCLBYTES,
400				   M_MBUF, M_NOWAIT);
401	bzero(mclrefcnt, nmbclusters+CLBYTES/MCLBYTES);
402	mb_map = kmem_suballoc(kmem_map, (vm_offset_t *)&mbutl, &maxaddr,
403			       nmbclusters * MCLBYTES, FALSE);
404	/*
405	 * Initialize callouts
406	 */
407	callfree = callout;
408	for (i = 1; i < ncallout; i++)
409		callout[i-1].c_next = &callout[i];
410
411        if (boothowto & RB_CONFIG) {
412		userconfig();
413		cninit();	/* the preferred console may have changed */
414	}
415
416#ifdef BOUNCE_BUFFERS
417	/*
418	 * init bounce buffers
419	 */
420	vm_bounce_init();
421#endif
422	/*
423	 * XXX allocate a contiguous area for ISA (non busmaster) DMA
424	 * operations. This _should_ only be done if the DMA channels
425	 * will actually be used, but for now we do it always.
426	 */
427#define DMAPAGES 8
428	isaphysmem =
429	    vm_page_alloc_contig(DMAPAGES * PAGE_SIZE, 0, 0xfffffful, 64*1024);
430
431	printf("avail memory = %d (%dK bytes)\n", ptoa(cnt.v_free_count),
432	    ptoa(cnt.v_free_count) / 1024);
433
434	/*
435	 * Set up buffers, so they can be used to read disk labels.
436	 */
437	bufinit();
438	vm_pager_bufferinit();
439
440	/*
441	 * In verbose mode, print out the BIOS's idea of the disk geometries.
442	 */
443	if (bootverbose) {
444		printf("BIOS Geometries:\n");
445		for (i = 0; i < N_BIOS_GEOM; i++) {
446			unsigned long bios_geom;
447			int max_cylinder, max_head, max_sector;
448
449			bios_geom = bootinfo.bi_bios_geom[i];
450
451			/*
452			 * XXX the bootstrap punts a 1200K floppy geometry
453			 * when the get-disk-geometry interrupt fails.  Skip
454			 * drives that have this geometry.
455			 */
456			if (bios_geom == 0x4f010f)
457				continue;
458
459			printf(" %x:%08lx ", i, bios_geom);
460			max_cylinder = bios_geom >> 16;
461			max_head = (bios_geom >> 8) & 0xff;
462			max_sector = bios_geom & 0xff;
463			printf(
464		"0..%d=%d cylinders, 0..%d=%d heads, 1..%d=%d sectors\n",
465			       max_cylinder, max_cylinder + 1,
466			       max_head, max_head + 1,
467			       max_sector, max_sector);
468		}
469		printf(" %d accounted for\n", bootinfo.bi_n_bios_used);
470	}
471}
472
473int
474register_netisr(num, handler)
475	int num;
476	netisr_t *handler;
477{
478
479	if (num < 0 || num >= (sizeof(netisrs)/sizeof(*netisrs)) ) {
480		printf("register_netisr: bad isr number: %d\n", num);
481		return (EINVAL);
482	}
483	netisrs[num] = handler;
484	return (0);
485}
486
487void
488setup_netisrs(ls)
489	struct linker_set *ls;
490{
491	int i;
492	const struct netisrtab *nit;
493
494	for(i = 0; ls->ls_items[i]; i++) {
495		nit = (const struct netisrtab *)ls->ls_items[i];
496		register_netisr(nit->nit_num, nit->nit_isr);
497	}
498}
499
500struct cpu_nameclass i386_cpus[] = {
501	{ "Intel 80286",	CPUCLASS_286 },		/* CPU_286   */
502	{ "i386SX",		CPUCLASS_386 },		/* CPU_386SX */
503	{ "i386DX",		CPUCLASS_386 },		/* CPU_386   */
504	{ "i486SX",		CPUCLASS_486 },		/* CPU_486SX */
505	{ "i486DX",		CPUCLASS_486 },		/* CPU_486   */
506	{ "Pentium",		CPUCLASS_586 },		/* CPU_586   */
507	{ "Cy486DLC",		CPUCLASS_486 },		/* CPU_486DLC */
508};
509
510static void
511identifycpu()
512{
513	printf("CPU: ");
514	if (cpu >= 0
515	    && cpu < (sizeof i386_cpus/sizeof(struct cpu_nameclass))) {
516		cpu_class = i386_cpus[cpu].cpu_class;
517		strncpy(cpu_model, i386_cpus[cpu].cpu_name, sizeof cpu_model);
518	} else {
519		printf("unknown cpu type %d\n", cpu);
520		panic("startup: bad cpu id");
521	}
522
523#if defined(I586_CPU)
524	if(cpu_class == CPUCLASS_586) {
525		calibrate_cyclecounter();
526	}
527#endif
528#if defined(I486_CPU) || defined(I586_CPU)
529	if (!strcmp(cpu_vendor,"GenuineIntel")) {
530		if ((cpu_id & 0xf00) > 3) {
531			cpu_model[0] = '\0';
532
533			switch (cpu_id & 0x3000) {
534			case 0x1000:
535				strcpy(cpu_model, "Overdrive ");
536				break;
537			case 0x2000:
538				strcpy(cpu_model, "Dual ");
539				break;
540			}
541			if ((cpu_id & 0xf00) == 0x400) {
542				strcat(cpu_model, "i486 ");
543#if defined(I586_CPU)
544			} else if ((cpu_id & 0xf00) == 0x500) {
545				strcat(cpu_model, "Pentium"); /* nb no space */
546#endif
547			} else {
548				strcat(cpu_model, "unknown ");
549			}
550
551			switch (cpu_id & 0xff0) {
552			case 0x400:
553				strcat(cpu_model, "DX"); break;
554			case 0x410:
555				strcat(cpu_model, "DX"); break;
556			case 0x420:
557				strcat(cpu_model, "SX"); break;
558			case 0x430:
559				strcat(cpu_model, "DX2"); break;
560			case 0x440:
561				strcat(cpu_model, "SL"); break;
562			case 0x450:
563				strcat(cpu_model, "SX2"); break;
564			case 0x470:
565				strcat(cpu_model, "DX2 Write-Back Enhanced");
566				break;
567			case 0x480:
568				strcat(cpu_model, "DX4"); break;
569#if defined(I586_CPU)
570			case 0x510:
571			case 0x520:
572				/*
573				 * We used to do all sorts of nonsense here
574				 * to print out iCOMP numbers.  Since these
575				 * are meaningless except to Intel
576				 * marketroids, there seems to be little
577				 * sense in doing so.
578				 */
579				break;
580#endif
581			}
582		}
583	}
584#endif
585	printf("%s (", cpu_model);
586	switch(cpu_class) {
587	case CPUCLASS_286:
588		printf("286");
589		break;
590#if defined(I386_CPU)
591	case CPUCLASS_386:
592		printf("386");
593		break;
594#endif
595#if defined(I486_CPU)
596	case CPUCLASS_486:
597		printf("486");
598		break;
599#endif
600#if defined(I586_CPU)
601	case CPUCLASS_586:
602		printf("%d.%02d-MHz ",
603		       ((100 * i586_ctr_rate) >> I586_CTR_RATE_SHIFT) / 100,
604		       ((100 * i586_ctr_rate) >> I586_CTR_RATE_SHIFT) % 100);
605		printf("586");
606		break;
607#endif
608	default:
609		printf("unknown");	/* will panic below... */
610	}
611	printf("-class CPU)\n");
612#if defined(I486_CPU) || defined(I586_CPU)
613	if(*cpu_vendor)
614		printf("  Origin = \"%s\"",cpu_vendor);
615	if(cpu_id)
616		printf("  Id = 0x%lx",cpu_id);
617
618	if (!strcmp(cpu_vendor, "GenuineIntel")) {
619		printf("  Stepping=%ld", cpu_id & 0xf);
620		if (cpu_high > 0) {
621#define FEATUREFMT "\020\001FPU\002VME\003PSE\004MCE\005CX8\006APIC"
622			printf("\n  Features=0x%b", cpu_feature, FEATUREFMT);
623		}
624	}
625	/* Avoid ugly blank lines: only print newline when we have to. */
626	if (*cpu_vendor || cpu_id)
627		printf("\n");
628#endif
629	/*
630	 * Now that we have told the user what they have,
631	 * let them know if that machine type isn't configured.
632	 */
633	switch (cpu_class) {
634	case CPUCLASS_286:	/* a 286 should not make it this far, anyway */
635#if !defined(I386_CPU) && !defined(I486_CPU) && !defined(I586_CPU)
636#error This kernel is not configured for one of the supported CPUs
637#endif
638#if !defined(I386_CPU)
639	case CPUCLASS_386:
640#endif
641#if !defined(I486_CPU)
642	case CPUCLASS_486:
643#endif
644#if !defined(I586_CPU)
645	case CPUCLASS_586:
646#endif
647		panic("CPU class not configured");
648	default:
649		break;
650	}
651	dev_attach(&kdc_cpu0);
652}
653
654/*
655 * Send an interrupt to process.
656 *
657 * Stack is set up to allow sigcode stored
658 * in u. to call routine, followed by kcall
659 * to sigreturn routine below.  After sigreturn
660 * resets the signal mask, the stack, and the
661 * frame pointer, it returns to the user
662 * specified pc, psl.
663 */
664void
665sendsig(catcher, sig, mask, code)
666	sig_t catcher;
667	int sig, mask;
668	unsigned code;
669{
670	register struct proc *p = curproc;
671	register int *regs;
672	register struct sigframe *fp;
673	struct sigframe sf;
674	struct sigacts *psp = p->p_sigacts;
675	int oonstack;
676
677	regs = p->p_md.md_regs;
678        oonstack = psp->ps_sigstk.ss_flags & SA_ONSTACK;
679	/*
680	 * Allocate and validate space for the signal handler
681	 * context. Note that if the stack is in P0 space, the
682	 * call to grow() is a nop, and the useracc() check
683	 * will fail if the process has not already allocated
684	 * the space with a `brk'.
685	 */
686        if ((psp->ps_flags & SAS_ALTSTACK) &&
687	    (psp->ps_sigstk.ss_flags & SA_ONSTACK) == 0 &&
688	    (psp->ps_sigonstack & sigmask(sig))) {
689		fp = (struct sigframe *)(psp->ps_sigstk.ss_sp +
690		    psp->ps_sigstk.ss_size - sizeof(struct sigframe));
691		psp->ps_sigstk.ss_flags |= SA_ONSTACK;
692	} else {
693		fp = (struct sigframe *)(regs[tESP]
694			- sizeof(struct sigframe));
695	}
696
697	/*
698	 * grow() will return FALSE if the fp will not fit inside the stack
699	 *	and the stack can not be grown. useracc will return FALSE
700	 *	if access is denied.
701	 */
702	if ((grow(p, (int)fp) == FALSE) ||
703	    (useracc((caddr_t)fp, sizeof (struct sigframe), B_WRITE) == FALSE)) {
704		/*
705		 * Process has trashed its stack; give it an illegal
706		 * instruction to halt it in its tracks.
707		 */
708		SIGACTION(p, SIGILL) = SIG_DFL;
709		sig = sigmask(SIGILL);
710		p->p_sigignore &= ~sig;
711		p->p_sigcatch &= ~sig;
712		p->p_sigmask &= ~sig;
713		psignal(p, SIGILL);
714		return;
715	}
716
717	/*
718	 * Build the argument list for the signal handler.
719	 */
720	if (p->p_sysent->sv_sigtbl) {
721		if (sig < p->p_sysent->sv_sigsize)
722			sig = p->p_sysent->sv_sigtbl[sig];
723		else
724			sig = p->p_sysent->sv_sigsize + 1;
725	}
726	sf.sf_signum = sig;
727	sf.sf_code = code;
728	sf.sf_scp = &fp->sf_sc;
729	sf.sf_addr = (char *) regs[tERR];
730	sf.sf_handler = catcher;
731
732	/* save scratch registers */
733	sf.sf_sc.sc_eax = regs[tEAX];
734	sf.sf_sc.sc_ebx = regs[tEBX];
735	sf.sf_sc.sc_ecx = regs[tECX];
736	sf.sf_sc.sc_edx = regs[tEDX];
737	sf.sf_sc.sc_esi = regs[tESI];
738	sf.sf_sc.sc_edi = regs[tEDI];
739	sf.sf_sc.sc_cs = regs[tCS];
740	sf.sf_sc.sc_ds = regs[tDS];
741	sf.sf_sc.sc_ss = regs[tSS];
742	sf.sf_sc.sc_es = regs[tES];
743	sf.sf_sc.sc_isp = regs[tISP];
744
745	/*
746	 * Build the signal context to be used by sigreturn.
747	 */
748	sf.sf_sc.sc_onstack = oonstack;
749	sf.sf_sc.sc_mask = mask;
750	sf.sf_sc.sc_sp = regs[tESP];
751	sf.sf_sc.sc_fp = regs[tEBP];
752	sf.sf_sc.sc_pc = regs[tEIP];
753	sf.sf_sc.sc_ps = regs[tEFLAGS];
754
755	/*
756	 * Copy the sigframe out to the user's stack.
757	 */
758	if (copyout(&sf, fp, sizeof(struct sigframe)) != 0) {
759		/*
760		 * Something is wrong with the stack pointer.
761		 * ...Kill the process.
762		 */
763		sigexit(p, SIGILL);
764	};
765
766	regs[tESP] = (int)fp;
767	regs[tEIP] = (int)((struct pcb *)kstack)->pcb_sigc;
768	regs[tEFLAGS] &= ~PSL_VM;
769	regs[tCS] = _ucodesel;
770	regs[tDS] = _udatasel;
771	regs[tES] = _udatasel;
772	regs[tSS] = _udatasel;
773}
774
775/*
776 * System call to cleanup state after a signal
777 * has been taken.  Reset signal mask and
778 * stack state from context left by sendsig (above).
779 * Return to previous pc and psl as specified by
780 * context left by sendsig. Check carefully to
781 * make sure that the user has not modified the
782 * state to gain improper privileges.
783 */
784int
785sigreturn(p, uap, retval)
786	struct proc *p;
787	struct sigreturn_args /* {
788		struct sigcontext *sigcntxp;
789	} */ *uap;
790	int *retval;
791{
792	register struct sigcontext *scp;
793	register struct sigframe *fp;
794	register int *regs = p->p_md.md_regs;
795	int eflags;
796
797	/*
798	 * (XXX old comment) regs[tESP] points to the return address.
799	 * The user scp pointer is above that.
800	 * The return address is faked in the signal trampoline code
801	 * for consistency.
802	 */
803	scp = uap->sigcntxp;
804	fp = (struct sigframe *)
805	     ((caddr_t)scp - offsetof(struct sigframe, sf_sc));
806
807	if (useracc((caddr_t)fp, sizeof (*fp), 0) == 0)
808		return(EINVAL);
809
810	/*
811	 * Don't allow users to change privileged or reserved flags.
812	 */
813#define	EFLAGS_SECURE(ef, oef)	((((ef) ^ (oef)) & ~PSL_USERCHANGE) == 0)
814	eflags = scp->sc_ps;
815	/*
816	 * XXX do allow users to change the privileged flag PSL_RF.  The
817	 * cpu sets PSL_RF in tf_eflags for faults.  Debuggers should
818	 * sometimes set it there too.  tf_eflags is kept in the signal
819	 * context during signal handling and there is no other place
820	 * to remember it, so the PSL_RF bit may be corrupted by the
821	 * signal handler without us knowing.  Corruption of the PSL_RF
822	 * bit at worst causes one more or one less debugger trap, so
823	 * allowing it is fairly harmless.
824	 */
825	if (!EFLAGS_SECURE(eflags & ~PSL_RF, regs[tEFLAGS] & ~PSL_RF)) {
826#ifdef DEBUG
827    		printf("sigreturn: eflags = 0x%x\n", eflags);
828#endif
829    		return(EINVAL);
830	}
831
832	/*
833	 * Don't allow users to load a valid privileged %cs.  Let the
834	 * hardware check for invalid selectors, excess privilege in
835	 * other selectors, invalid %eip's and invalid %esp's.
836	 */
837#define	CS_SECURE(cs)	(ISPL(cs) == SEL_UPL)
838	if (!CS_SECURE(scp->sc_cs)) {
839#ifdef DEBUG
840    		printf("sigreturn: cs = 0x%x\n", scp->sc_cs);
841#endif
842		trapsignal(p, SIGBUS, T_PROTFLT);
843		return(EINVAL);
844	}
845
846	/* restore scratch registers */
847	regs[tEAX] = scp->sc_eax;
848	regs[tEBX] = scp->sc_ebx;
849	regs[tECX] = scp->sc_ecx;
850	regs[tEDX] = scp->sc_edx;
851	regs[tESI] = scp->sc_esi;
852	regs[tEDI] = scp->sc_edi;
853	regs[tCS] = scp->sc_cs;
854	regs[tDS] = scp->sc_ds;
855	regs[tES] = scp->sc_es;
856	regs[tSS] = scp->sc_ss;
857	regs[tISP] = scp->sc_isp;
858
859	if (useracc((caddr_t)scp, sizeof (*scp), 0) == 0)
860		return(EINVAL);
861
862	if (scp->sc_onstack & 01)
863		p->p_sigacts->ps_sigstk.ss_flags |= SA_ONSTACK;
864	else
865		p->p_sigacts->ps_sigstk.ss_flags &= ~SA_ONSTACK;
866	p->p_sigmask = scp->sc_mask &~
867	    (sigmask(SIGKILL)|sigmask(SIGCONT)|sigmask(SIGSTOP));
868	regs[tEBP] = scp->sc_fp;
869	regs[tESP] = scp->sc_sp;
870	regs[tEIP] = scp->sc_pc;
871	regs[tEFLAGS] = eflags;
872	return(EJUSTRETURN);
873}
874
875/*
876 * a simple function to make the system panic (and dump a vmcore)
877 * in a predictable fashion
878 */
879void diediedie()
880{
881	panic("because you said to!");
882}
883
884int	waittime = -1;
885struct pcb dumppcb;
886
887__dead void
888boot(howto)
889	int howto;
890{
891	if (!cold && (howto & RB_NOSYNC) == 0 && waittime < 0) {
892		register struct buf *bp;
893		int iter, nbusy;
894
895		waittime = 0;
896		printf("\nsyncing disks... ");
897
898		sync(&proc0, NULL, NULL);
899
900		for (iter = 0; iter < 20; iter++) {
901			nbusy = 0;
902			for (bp = &buf[nbuf]; --bp >= buf; ) {
903				if ((bp->b_flags & (B_BUSY | B_INVAL)) == B_BUSY) {
904					nbusy++;
905				}
906			}
907			if (nbusy == 0)
908				break;
909			printf("%d ", nbusy);
910			DELAY(40000 * iter);
911		}
912		if (nbusy) {
913			/*
914			 * Failed to sync all blocks. Indicate this and don't
915			 * unmount filesystems (thus forcing an fsck on reboot).
916			 */
917			printf("giving up\n");
918#ifdef SHOW_BUSYBUFS
919			nbusy = 0;
920			for (bp = &buf[nbuf]; --bp >= buf; ) {
921				if ((bp->b_flags & (B_BUSY | B_INVAL)) == B_BUSY) {
922					nbusy++;
923					printf("%d: dev:%08x, flags:%08x, blkno:%d, lblkno:%d\n", nbusy, bp->b_dev, bp->b_flags, bp->b_blkno, bp->b_lblkno);
924				}
925			}
926			DELAY(5000000);	/* 5 seconds */
927#endif
928		} else {
929			printf("done\n");
930			/*
931			 * Unmount filesystems
932			 */
933			if (panicstr == 0)
934				vfs_unmountall();
935		}
936		DELAY(100000);			/* wait for console output to finish */
937		dev_shutdownall(FALSE);
938	}
939	splhigh();
940	if (howto & RB_HALT) {
941		printf("\n");
942		printf("The operating system has halted.\n");
943		printf("Please press any key to reboot.\n\n");
944		cngetc();
945	} else {
946		if (howto & RB_DUMP) {
947			if (!cold) {
948				savectx(&dumppcb, 0);
949				dumppcb.pcb_ptd = rcr3();
950				dumpsys();
951			}
952
953			if (PANIC_REBOOT_WAIT_TIME != 0) {
954				if (PANIC_REBOOT_WAIT_TIME != -1) {
955					int loop;
956					printf("Automatic reboot in %d seconds - press a key on the console to abort\n",
957						PANIC_REBOOT_WAIT_TIME);
958					for (loop = PANIC_REBOOT_WAIT_TIME * 10; loop > 0; --loop) {
959						DELAY(1000 * 100); /* 1/10th second */
960						if (cncheckc()) /* Did user type a key? */
961							break;
962					}
963					if (!loop)
964						goto die;
965				}
966			} else { /* zero time specified - reboot NOW */
967				goto die;
968			}
969			printf("--> Press a key on the console to reboot <--\n");
970			cngetc();
971		}
972	}
973die:
974	printf("Rebooting...\n");
975	DELAY(1000000);	/* wait 1 sec for printf's to complete and be read */
976	cpu_reset();
977	for(;;) ;
978	/* NOTREACHED */
979}
980
981unsigned long	dumpmag = 0x8fca0101UL;	/* magic number for savecore */
982int		dumpsize = 0;		/* also for savecore */
983
984int		dodump = 1;
985
986/*
987 * Doadump comes here after turning off memory management and
988 * getting on the dump stack, either when called above, or by
989 * the auto-restart code.
990 */
991void
992dumpsys()
993{
994
995	if (!dodump)
996		return;
997	if (dumpdev == NODEV)
998		return;
999	if ((minor(dumpdev)&07) != 1)
1000		return;
1001	dumpsize = Maxmem;
1002	printf("\ndumping to dev %lx, offset %ld\n", dumpdev, dumplo);
1003	printf("dump ");
1004	switch ((*bdevsw[major(dumpdev)].d_dump)(dumpdev)) {
1005
1006	case ENXIO:
1007		printf("device bad\n");
1008		break;
1009
1010	case EFAULT:
1011		printf("device not ready\n");
1012		break;
1013
1014	case EINVAL:
1015		printf("area improper\n");
1016		break;
1017
1018	case EIO:
1019		printf("i/o error\n");
1020		break;
1021
1022	case EINTR:
1023		printf("aborted from console\n");
1024		break;
1025
1026	default:
1027		printf("succeeded\n");
1028		break;
1029	}
1030}
1031
1032/*
1033 * Clear registers on exec
1034 */
1035void
1036setregs(p, entry, stack)
1037	struct proc *p;
1038	u_long entry;
1039	u_long stack;
1040{
1041	int *regs = p->p_md.md_regs;
1042
1043	bzero(regs, sizeof(struct trapframe));
1044	regs[tEIP] = entry;
1045	regs[tESP] = stack;
1046	regs[tEFLAGS] = PSL_USER | (regs[tEFLAGS] & PSL_T);
1047	regs[tSS] = _udatasel;
1048	regs[tDS] = _udatasel;
1049	regs[tES] = _udatasel;
1050	regs[tCS] = _ucodesel;
1051
1052	p->p_addr->u_pcb.pcb_flags = 0;	/* no fp at all */
1053	load_cr0(rcr0() | CR0_TS);	/* start emulating */
1054#if	NNPX > 0
1055	npxinit(__INITIAL_NPXCW__);
1056#endif	/* NNPX > 0 */
1057}
1058
1059static int
1060sysctl_machdep_adjkerntz SYSCTL_HANDLER_ARGS
1061{
1062	int error;
1063	error = sysctl_handle_int(oidp, oidp->oid_arg1, oidp->oid_arg2,
1064		req);
1065	if (!error && req->newptr)
1066		resettodr();
1067	return (error);
1068}
1069
1070SYSCTL_PROC(_machdep, CPU_ADJKERNTZ, adjkerntz, CTLTYPE_INT|CTLFLAG_RW,
1071	&adjkerntz, 0, sysctl_machdep_adjkerntz, "I", "");
1072
1073SYSCTL_INT(_machdep, CPU_DISRTCSET, disable_rtc_set,
1074	CTLFLAG_RW, &disable_rtc_set, 0, "");
1075
1076SYSCTL_STRUCT(_machdep, CPU_BOOTINFO, bootinfo,
1077	CTLFLAG_RD, &bootinfo, bootinfo, "");
1078
1079/*
1080 * Initialize 386 and configure to run kernel
1081 */
1082
1083/*
1084 * Initialize segments & interrupt table
1085 */
1086
1087int currentldt;
1088int _default_ldt;
1089union descriptor gdt[NGDT];		/* global descriptor table */
1090struct gate_descriptor idt[NIDT];	/* interrupt descriptor table */
1091union descriptor ldt[NLDT];		/* local descriptor table */
1092
1093struct	i386tss	tss, panic_tss;
1094
1095extern  struct user *proc0paddr;
1096
1097/* software prototypes -- in more palatable form */
1098struct soft_segment_descriptor gdt_segs[] = {
1099/* GNULL_SEL	0 Null Descriptor */
1100{	0x0,			/* segment base address  */
1101	0x0,			/* length */
1102	0,			/* segment type */
1103	0,			/* segment descriptor priority level */
1104	0,			/* segment descriptor present */
1105	0, 0,
1106	0,			/* default 32 vs 16 bit size */
1107	0  			/* limit granularity (byte/page units)*/ },
1108/* GCODE_SEL	1 Code Descriptor for kernel */
1109{	0x0,			/* segment base address  */
1110	0xfffff,		/* length - all address space */
1111	SDT_MEMERA,		/* segment type */
1112	0,			/* segment descriptor priority level */
1113	1,			/* segment descriptor present */
1114	0, 0,
1115	1,			/* default 32 vs 16 bit size */
1116	1  			/* limit granularity (byte/page units)*/ },
1117/* GDATA_SEL	2 Data Descriptor for kernel */
1118{	0x0,			/* segment base address  */
1119	0xfffff,		/* length - all address space */
1120	SDT_MEMRWA,		/* segment type */
1121	0,			/* segment descriptor priority level */
1122	1,			/* segment descriptor present */
1123	0, 0,
1124	1,			/* default 32 vs 16 bit size */
1125	1  			/* limit granularity (byte/page units)*/ },
1126/* GLDT_SEL	3 LDT Descriptor */
1127{	(int) ldt,		/* segment base address  */
1128	sizeof(ldt)-1,		/* length - all address space */
1129	SDT_SYSLDT,		/* segment type */
1130	0,			/* segment descriptor priority level */
1131	1,			/* segment descriptor present */
1132	0, 0,
1133	0,			/* unused - default 32 vs 16 bit size */
1134	0  			/* limit granularity (byte/page units)*/ },
1135/* GTGATE_SEL	4 Null Descriptor - Placeholder */
1136{	0x0,			/* segment base address  */
1137	0x0,			/* length - all address space */
1138	0,			/* segment type */
1139	0,			/* segment descriptor priority level */
1140	0,			/* segment descriptor present */
1141	0, 0,
1142	0,			/* default 32 vs 16 bit size */
1143	0  			/* limit granularity (byte/page units)*/ },
1144/* GPANIC_SEL	5 Panic Tss Descriptor */
1145{	(int) &panic_tss,	/* segment base address  */
1146	sizeof(tss)-1,		/* length - all address space */
1147	SDT_SYS386TSS,		/* segment type */
1148	0,			/* segment descriptor priority level */
1149	1,			/* segment descriptor present */
1150	0, 0,
1151	0,			/* unused - default 32 vs 16 bit size */
1152	0  			/* limit granularity (byte/page units)*/ },
1153/* GPROC0_SEL	6 Proc 0 Tss Descriptor */
1154{	(int) kstack,		/* segment base address  */
1155	sizeof(tss)-1,		/* length - all address space */
1156	SDT_SYS386TSS,		/* segment type */
1157	0,			/* segment descriptor priority level */
1158	1,			/* segment descriptor present */
1159	0, 0,
1160	0,			/* unused - default 32 vs 16 bit size */
1161	0  			/* limit granularity (byte/page units)*/ },
1162/* GUSERLDT_SEL	7 User LDT Descriptor per process */
1163{	(int) ldt,		/* segment base address  */
1164	(512 * sizeof(union descriptor)-1),		/* length */
1165	SDT_SYSLDT,		/* segment type */
1166	0,			/* segment descriptor priority level */
1167	1,			/* segment descriptor present */
1168	0, 0,
1169	0,			/* unused - default 32 vs 16 bit size */
1170	0  			/* limit granularity (byte/page units)*/ },
1171/* GAPMCODE32_SEL 8 APM BIOS 32-bit interface (32bit Code) */
1172{	0,			/* segment base address (overwritten by APM)  */
1173	0xfffff,		/* length */
1174	SDT_MEMERA,		/* segment type */
1175	0,			/* segment descriptor priority level */
1176	1,			/* segment descriptor present */
1177	0, 0,
1178	1,			/* default 32 vs 16 bit size */
1179	1  			/* limit granularity (byte/page units)*/ },
1180/* GAPMCODE16_SEL 9 APM BIOS 32-bit interface (16bit Code) */
1181{	0,			/* segment base address (overwritten by APM)  */
1182	0xfffff,		/* length */
1183	SDT_MEMERA,		/* segment type */
1184	0,			/* segment descriptor priority level */
1185	1,			/* segment descriptor present */
1186	0, 0,
1187	0,			/* default 32 vs 16 bit size */
1188	1  			/* limit granularity (byte/page units)*/ },
1189/* GAPMDATA_SEL	10 APM BIOS 32-bit interface (Data) */
1190{	0,			/* segment base address (overwritten by APM) */
1191	0xfffff,		/* length */
1192	SDT_MEMRWA,		/* segment type */
1193	0,			/* segment descriptor priority level */
1194	1,			/* segment descriptor present */
1195	0, 0,
1196	1,			/* default 32 vs 16 bit size */
1197	1  			/* limit granularity (byte/page units)*/ },
1198};
1199
1200struct soft_segment_descriptor ldt_segs[] = {
1201	/* Null Descriptor - overwritten by call gate */
1202{	0x0,			/* segment base address  */
1203	0x0,			/* length - all address space */
1204	0,			/* segment type */
1205	0,			/* segment descriptor priority level */
1206	0,			/* segment descriptor present */
1207	0, 0,
1208	0,			/* default 32 vs 16 bit size */
1209	0  			/* limit granularity (byte/page units)*/ },
1210	/* Null Descriptor - overwritten by call gate */
1211{	0x0,			/* segment base address  */
1212	0x0,			/* length - all address space */
1213	0,			/* segment type */
1214	0,			/* segment descriptor priority level */
1215	0,			/* segment descriptor present */
1216	0, 0,
1217	0,			/* default 32 vs 16 bit size */
1218	0  			/* limit granularity (byte/page units)*/ },
1219	/* Null Descriptor - overwritten by call gate */
1220{	0x0,			/* segment base address  */
1221	0x0,			/* length - all address space */
1222	0,			/* segment type */
1223	0,			/* segment descriptor priority level */
1224	0,			/* segment descriptor present */
1225	0, 0,
1226	0,			/* default 32 vs 16 bit size */
1227	0  			/* limit granularity (byte/page units)*/ },
1228	/* Code Descriptor for user */
1229{	0x0,			/* segment base address  */
1230	0xfffff,		/* length - all address space */
1231	SDT_MEMERA,		/* segment type */
1232	SEL_UPL,		/* segment descriptor priority level */
1233	1,			/* segment descriptor present */
1234	0, 0,
1235	1,			/* default 32 vs 16 bit size */
1236	1  			/* limit granularity (byte/page units)*/ },
1237	/* Data Descriptor for user */
1238{	0x0,			/* segment base address  */
1239	0xfffff,		/* length - all address space */
1240	SDT_MEMRWA,		/* segment type */
1241	SEL_UPL,		/* segment descriptor priority level */
1242	1,			/* segment descriptor present */
1243	0, 0,
1244	1,			/* default 32 vs 16 bit size */
1245	1  			/* limit granularity (byte/page units)*/ },
1246};
1247
1248void
1249setidt(idx, func, typ, dpl)
1250	int idx;
1251	inthand_t *func;
1252	int typ;
1253	int dpl;
1254{
1255	struct gate_descriptor *ip = idt + idx;
1256
1257	ip->gd_looffset = (int)func;
1258	ip->gd_selector = 8;
1259	ip->gd_stkcpy = 0;
1260	ip->gd_xx = 0;
1261	ip->gd_type = typ;
1262	ip->gd_dpl = dpl;
1263	ip->gd_p = 1;
1264	ip->gd_hioffset = ((int)func)>>16 ;
1265}
1266
1267#define	IDTVEC(name)	__CONCAT(X,name)
1268
1269extern inthand_t
1270	IDTVEC(div), IDTVEC(dbg), IDTVEC(nmi), IDTVEC(bpt), IDTVEC(ofl),
1271	IDTVEC(bnd), IDTVEC(ill), IDTVEC(dna), IDTVEC(dble), IDTVEC(fpusegm),
1272	IDTVEC(tss), IDTVEC(missing), IDTVEC(stk), IDTVEC(prot),
1273	IDTVEC(page), IDTVEC(rsvd), IDTVEC(fpu), IDTVEC(align),
1274	IDTVEC(syscall);
1275
1276#ifdef COMPAT_LINUX
1277extern inthand_t
1278	IDTVEC(linux_syscall);
1279#endif
1280
1281void
1282sdtossd(sd, ssd)
1283	struct segment_descriptor *sd;
1284	struct soft_segment_descriptor *ssd;
1285{
1286	ssd->ssd_base  = (sd->sd_hibase << 24) | sd->sd_lobase;
1287	ssd->ssd_limit = (sd->sd_hilimit << 16) | sd->sd_lolimit;
1288	ssd->ssd_type  = sd->sd_type;
1289	ssd->ssd_dpl   = sd->sd_dpl;
1290	ssd->ssd_p     = sd->sd_p;
1291	ssd->ssd_def32 = sd->sd_def32;
1292	ssd->ssd_gran  = sd->sd_gran;
1293}
1294
1295void
1296init386(first)
1297	int first;
1298{
1299	int x;
1300	unsigned biosbasemem, biosextmem;
1301	struct gate_descriptor *gdp;
1302	int gsel_tss;
1303	/* table descriptors - used to load tables by microp */
1304	struct region_descriptor r_gdt, r_idt;
1305	int	pagesinbase, pagesinext;
1306	int	target_page, pa_indx;
1307
1308	proc0.p_addr = proc0paddr;
1309
1310	/*
1311	 * Initialize the console before we print anything out.
1312	 */
1313	cninit();
1314
1315	/*
1316	 * make gdt memory segments, the code segment goes up to end of the
1317	 * page with etext in it, the data segment goes to the end of
1318	 * the address space
1319	 */
1320	/*
1321	 * XXX text protection is temporarily (?) disabled.  The limit was
1322	 * i386_btop(i386_round_page(etext)) - 1.
1323	 */
1324	gdt_segs[GCODE_SEL].ssd_limit = i386_btop(0) - 1;
1325	gdt_segs[GDATA_SEL].ssd_limit = i386_btop(0) - 1;
1326	for (x = 0; x < NGDT; x++)
1327		ssdtosd(&gdt_segs[x], &gdt[x].sd);
1328
1329	/* make ldt memory segments */
1330	/*
1331	 * The data segment limit must not cover the user area because we
1332	 * don't want the user area to be writable in copyout() etc. (page
1333	 * level protection is lost in kernel mode on 386's).  Also, we
1334	 * don't want the user area to be writable directly (page level
1335	 * protection of the user area is not available on 486's with
1336	 * CR0_WP set, because there is no user-read/kernel-write mode).
1337	 *
1338	 * XXX - VM_MAXUSER_ADDRESS is an end address, not a max.  And it
1339	 * should be spelled ...MAX_USER...
1340	 */
1341#define VM_END_USER_RW_ADDRESS	VM_MAXUSER_ADDRESS
1342	/*
1343	 * The code segment limit has to cover the user area until we move
1344	 * the signal trampoline out of the user area.  This is safe because
1345	 * the code segment cannot be written to directly.
1346	 */
1347#define VM_END_USER_R_ADDRESS	(VM_END_USER_RW_ADDRESS + UPAGES * NBPG)
1348	ldt_segs[LUCODE_SEL].ssd_limit = i386_btop(VM_END_USER_R_ADDRESS) - 1;
1349	ldt_segs[LUDATA_SEL].ssd_limit = i386_btop(VM_END_USER_RW_ADDRESS) - 1;
1350	/* Note. eventually want private ldts per process */
1351	for (x = 0; x < NLDT; x++)
1352		ssdtosd(&ldt_segs[x], &ldt[x].sd);
1353
1354	/* exceptions */
1355	for (x = 0; x < NIDT; x++)
1356		setidt(x, &IDTVEC(rsvd), SDT_SYS386TGT, SEL_KPL);
1357	setidt(0, &IDTVEC(div),  SDT_SYS386TGT, SEL_KPL);
1358	setidt(1, &IDTVEC(dbg),  SDT_SYS386TGT, SEL_KPL);
1359	setidt(2, &IDTVEC(nmi),  SDT_SYS386TGT, SEL_KPL);
1360 	setidt(3, &IDTVEC(bpt),  SDT_SYS386TGT, SEL_UPL);
1361	setidt(4, &IDTVEC(ofl),  SDT_SYS386TGT, SEL_UPL);
1362	setidt(5, &IDTVEC(bnd),  SDT_SYS386TGT, SEL_KPL);
1363	setidt(6, &IDTVEC(ill),  SDT_SYS386TGT, SEL_KPL);
1364	setidt(7, &IDTVEC(dna),  SDT_SYS386TGT, SEL_KPL);
1365	setidt(8, &IDTVEC(dble),  SDT_SYS386TGT, SEL_KPL);
1366	setidt(9, &IDTVEC(fpusegm),  SDT_SYS386TGT, SEL_KPL);
1367	setidt(10, &IDTVEC(tss),  SDT_SYS386TGT, SEL_KPL);
1368	setidt(11, &IDTVEC(missing),  SDT_SYS386TGT, SEL_KPL);
1369	setidt(12, &IDTVEC(stk),  SDT_SYS386TGT, SEL_KPL);
1370	setidt(13, &IDTVEC(prot),  SDT_SYS386TGT, SEL_KPL);
1371	setidt(14, &IDTVEC(page),  SDT_SYS386TGT, SEL_KPL);
1372	setidt(15, &IDTVEC(rsvd),  SDT_SYS386TGT, SEL_KPL);
1373	setidt(16, &IDTVEC(fpu),  SDT_SYS386TGT, SEL_KPL);
1374	setidt(17, &IDTVEC(align), SDT_SYS386TGT, SEL_KPL);
1375#ifdef COMPAT_LINUX
1376 	setidt(0x80, &IDTVEC(linux_syscall),  SDT_SYS386TGT, SEL_UPL);
1377#endif
1378
1379#include	"isa.h"
1380#if	NISA >0
1381	isa_defaultirq();
1382#endif
1383	rand_initialize();
1384
1385	r_gdt.rd_limit = sizeof(gdt) - 1;
1386	r_gdt.rd_base =  (int) gdt;
1387	lgdt(&r_gdt);
1388
1389	r_idt.rd_limit = sizeof(idt) - 1;
1390	r_idt.rd_base = (int) idt;
1391	lidt(&r_idt);
1392
1393	_default_ldt = GSEL(GLDT_SEL, SEL_KPL);
1394	lldt(_default_ldt);
1395	currentldt = _default_ldt;
1396
1397#ifdef DDB
1398	kdb_init();
1399	if (boothowto & RB_KDB)
1400		Debugger("Boot flags requested debugger");
1401#endif
1402
1403	/* Use BIOS values stored in RTC CMOS RAM, since probing
1404	 * breaks certain 386 AT relics.
1405	 */
1406	biosbasemem = rtcin(RTC_BASELO)+ (rtcin(RTC_BASEHI)<<8);
1407	biosextmem = rtcin(RTC_EXTLO)+ (rtcin(RTC_EXTHI)<<8);
1408
1409	/*
1410	 * Print a warning if the official BIOS interface disagrees
1411	 * with the hackish interface used above.  Eventually only
1412	 * the official interface should be used.
1413	 */
1414	if (bootinfo.bi_memsizes_valid) {
1415		if (bootinfo.bi_basemem != biosbasemem)
1416			printf("BIOS basemem (%ldK) != RTC basemem (%dK)\n",
1417			       bootinfo.bi_basemem, biosbasemem);
1418		if (bootinfo.bi_extmem != biosextmem)
1419			printf("BIOS extmem (%ldK) != RTC extmem (%dK)\n",
1420			       bootinfo.bi_extmem, biosextmem);
1421	}
1422
1423	/*
1424	 * If BIOS tells us that it has more than 640k in the basemem,
1425	 *	don't believe it - set it to 640k.
1426	 */
1427	if (biosbasemem > 640)
1428		biosbasemem = 640;
1429
1430	/*
1431	 * Some 386 machines might give us a bogus number for extended
1432	 *	mem. If this happens, stop now.
1433	 */
1434#ifndef LARGEMEM
1435	if (biosextmem > 65536) {
1436		panic("extended memory beyond limit of 64MB");
1437		/* NOTREACHED */
1438	}
1439#endif
1440
1441	pagesinbase = biosbasemem * 1024 / NBPG;
1442	pagesinext = biosextmem * 1024 / NBPG;
1443
1444	/*
1445	 * Special hack for chipsets that still remap the 384k hole when
1446	 *	there's 16MB of memory - this really confuses people that
1447	 *	are trying to use bus mastering ISA controllers with the
1448	 *	"16MB limit"; they only have 16MB, but the remapping puts
1449	 *	them beyond the limit.
1450	 */
1451	/*
1452	 * If extended memory is between 15-16MB (16-17MB phys address range),
1453	 *	chop it to 15MB.
1454	 */
1455	if ((pagesinext > 3840) && (pagesinext < 4096))
1456		pagesinext = 3840;
1457
1458	/*
1459	 * Maxmem isn't the "maximum memory", it's one larger than the
1460	 * highest page of of the physical address space. It
1461	 */
1462	Maxmem = pagesinext + 0x100000/PAGE_SIZE;
1463
1464#ifdef MAXMEM
1465	Maxmem = MAXMEM/4;
1466#endif
1467
1468	/* call pmap initialization to make new kernel address space */
1469	pmap_bootstrap (first, 0);
1470
1471	/*
1472	 * Size up each available chunk of physical memory.
1473	 */
1474
1475	/*
1476	 * We currently don't bother testing base memory.
1477	 * XXX  ...but we probably should.
1478	 */
1479	pa_indx = 0;
1480	badpages = 0;
1481	if (pagesinbase > 1) {
1482		phys_avail[pa_indx++] = PAGE_SIZE;	/* skip first page of memory */
1483		phys_avail[pa_indx] = ptoa(pagesinbase);/* memory up to the ISA hole */
1484		physmem = pagesinbase - 1;
1485	} else {
1486		/* point at first chunk end */
1487		pa_indx++;
1488	}
1489
1490	for (target_page = avail_start; target_page < ptoa(Maxmem); target_page += PAGE_SIZE) {
1491		int tmp, page_bad = FALSE;
1492
1493		/*
1494		 * map page into kernel: valid, read/write, non-cacheable
1495		 */
1496		*(int *)CMAP1 = PG_V | PG_KW | PG_N | target_page;
1497		pmap_update();
1498
1499		tmp = *(int *)CADDR1;
1500		/*
1501		 * Test for alternating 1's and 0's
1502		 */
1503		*(int *)CADDR1 = 0xaaaaaaaa;
1504		if (*(int *)CADDR1 != 0xaaaaaaaa) {
1505			page_bad = TRUE;
1506		}
1507		/*
1508		 * Test for alternating 0's and 1's
1509		 */
1510		*(int *)CADDR1 = 0x55555555;
1511		if (*(int *)CADDR1 != 0x55555555) {
1512			page_bad = TRUE;
1513		}
1514		/*
1515		 * Test for all 1's
1516		 */
1517		*(int *)CADDR1 = 0xffffffff;
1518		if (*(int *)CADDR1 != 0xffffffff) {
1519			page_bad = TRUE;
1520		}
1521		/*
1522		 * Test for all 0's
1523		 */
1524		*(int *)CADDR1 = 0x0;
1525		if (*(int *)CADDR1 != 0x0) {
1526			/*
1527			 * test of page failed
1528			 */
1529			page_bad = TRUE;
1530		}
1531		/*
1532		 * Restore original value.
1533		 */
1534		*(int *)CADDR1 = tmp;
1535
1536		/*
1537		 * Adjust array of valid/good pages.
1538		 */
1539		if (page_bad == FALSE) {
1540			/*
1541			 * If this good page is a continuation of the
1542			 * previous set of good pages, then just increase
1543			 * the end pointer. Otherwise start a new chunk.
1544			 * Note that "end" points one higher than end,
1545			 * making the range >= start and < end.
1546			 */
1547			if (phys_avail[pa_indx] == target_page) {
1548				phys_avail[pa_indx] += PAGE_SIZE;
1549			} else {
1550				pa_indx++;
1551				if (pa_indx == PHYS_AVAIL_ARRAY_END) {
1552					printf("Too many holes in the physical address space, giving up\n");
1553					pa_indx--;
1554					break;
1555				}
1556				phys_avail[pa_indx++] = target_page;	/* start */
1557				phys_avail[pa_indx] = target_page + PAGE_SIZE;	/* end */
1558			}
1559			physmem++;
1560		} else {
1561			badpages++;
1562			page_bad = FALSE;
1563		}
1564	}
1565
1566	*(int *)CMAP1 = 0;
1567	pmap_update();
1568
1569	/*
1570	 * XXX
1571	 * The last chunk must contain at least one page plus the message
1572	 * buffer to avoid complicating other code (message buffer address
1573	 * calculation, etc.).
1574	 */
1575	while (phys_avail[pa_indx - 1] + PAGE_SIZE +
1576	    round_page(sizeof(struct msgbuf)) >= phys_avail[pa_indx]) {
1577		physmem -= atop(phys_avail[pa_indx] - phys_avail[pa_indx - 1]);
1578		phys_avail[pa_indx--] = 0;
1579		phys_avail[pa_indx--] = 0;
1580	}
1581
1582	Maxmem = atop(phys_avail[pa_indx]);
1583
1584	/* Trim off space for the message buffer. */
1585	phys_avail[pa_indx] -= round_page(sizeof(struct msgbuf));
1586
1587	avail_end = phys_avail[pa_indx];
1588
1589	/* now running on new page tables, configured,and u/iom is accessible */
1590
1591	/* make a initial tss so microp can get interrupt stack on syscall! */
1592	proc0.p_addr->u_pcb.pcb_tss.tss_esp0 = (int) kstack + UPAGES*NBPG;
1593	proc0.p_addr->u_pcb.pcb_tss.tss_ss0 = GSEL(GDATA_SEL, SEL_KPL) ;
1594	gsel_tss = GSEL(GPROC0_SEL, SEL_KPL);
1595
1596	((struct i386tss *)gdt_segs[GPROC0_SEL].ssd_base)->tss_ioopt =
1597		(sizeof(tss))<<16;
1598
1599	ltr(gsel_tss);
1600
1601	/* make a call gate to reenter kernel with */
1602	gdp = &ldt[LSYS5CALLS_SEL].gd;
1603
1604	x = (int) &IDTVEC(syscall);
1605	gdp->gd_looffset = x++;
1606	gdp->gd_selector = GSEL(GCODE_SEL,SEL_KPL);
1607	gdp->gd_stkcpy = 1;
1608	gdp->gd_type = SDT_SYS386CGT;
1609	gdp->gd_dpl = SEL_UPL;
1610	gdp->gd_p = 1;
1611	gdp->gd_hioffset = ((int) &IDTVEC(syscall)) >>16;
1612
1613	/* transfer to user mode */
1614
1615	_ucodesel = LSEL(LUCODE_SEL, SEL_UPL);
1616	_udatasel = LSEL(LUDATA_SEL, SEL_UPL);
1617
1618	/* setup proc 0's pcb */
1619	bcopy(&sigcode, proc0.p_addr->u_pcb.pcb_sigc, szsigcode);
1620	proc0.p_addr->u_pcb.pcb_flags = 0;
1621	proc0.p_addr->u_pcb.pcb_ptd = IdlePTD;
1622}
1623
1624/*
1625 * The registers are in the frame; the frame is in the user area of
1626 * the process in question; when the process is active, the registers
1627 * are in "the kernel stack"; when it's not, they're still there, but
1628 * things get flipped around.  So, since p->p_md.md_regs is the whole address
1629 * of the register set, take its offset from the kernel stack, and
1630 * index into the user block.  Don't you just *love* virtual memory?
1631 * (I'm starting to think seymour is right...)
1632 */
1633#define	TF_REGP(p)	((struct trapframe *) \
1634			 ((char *)(p)->p_addr \
1635			  + ((char *)(p)->p_md.md_regs - kstack)))
1636
1637int
1638ptrace_set_pc(p, addr)
1639	struct proc *p;
1640	unsigned int addr;
1641{
1642	TF_REGP(p)->tf_eip = addr;
1643	return (0);
1644}
1645
1646int
1647ptrace_single_step(p)
1648	struct proc *p;
1649{
1650	TF_REGP(p)->tf_eflags |= PSL_T;
1651	return (0);
1652}
1653
1654int
1655ptrace_getregs(p, addr)
1656	struct proc *p;
1657	unsigned int *addr;
1658{
1659	int error;
1660	struct reg regs;
1661
1662	error = fill_regs(p, &regs);
1663	if (error)
1664		return (error);
1665	return (copyout(&regs, addr, sizeof regs));
1666}
1667
1668int
1669ptrace_setregs(p, addr)
1670	struct proc *p;
1671	unsigned int *addr;
1672{
1673	int error;
1674	struct reg regs;
1675
1676	error = copyin(addr, &regs, sizeof regs);
1677	if (error)
1678		return (error);
1679	return (set_regs(p, &regs));
1680}
1681
1682int ptrace_write_u(p, off, data)
1683	struct proc *p;
1684	vm_offset_t off;
1685	int data;
1686{
1687	struct trapframe frame_copy;
1688	vm_offset_t min;
1689	struct trapframe *tp;
1690
1691	/*
1692	 * Privileged kernel state is scattered all over the user area.
1693	 * Only allow write access to parts of regs and to fpregs.
1694	 */
1695	min = (char *)p->p_md.md_regs - kstack;
1696	if (off >= min && off <= min + sizeof(struct trapframe) - sizeof(int)) {
1697		tp = TF_REGP(p);
1698		frame_copy = *tp;
1699		*(int *)((char *)&frame_copy + (off - min)) = data;
1700		if (!EFLAGS_SECURE(frame_copy.tf_eflags, tp->tf_eflags) ||
1701		    !CS_SECURE(frame_copy.tf_cs))
1702			return (EINVAL);
1703		*(int*)((char *)p->p_addr + off) = data;
1704		return (0);
1705	}
1706	min = offsetof(struct user, u_pcb) + offsetof(struct pcb, pcb_savefpu);
1707	if (off >= min && off <= min + sizeof(struct save87) - sizeof(int)) {
1708		*(int*)((char *)p->p_addr + off) = data;
1709		return (0);
1710	}
1711	return (EFAULT);
1712}
1713
1714int
1715fill_regs(p, regs)
1716	struct proc *p;
1717	struct reg *regs;
1718{
1719	struct trapframe *tp;
1720
1721	tp = TF_REGP(p);
1722	regs->r_es = tp->tf_es;
1723	regs->r_ds = tp->tf_ds;
1724	regs->r_edi = tp->tf_edi;
1725	regs->r_esi = tp->tf_esi;
1726	regs->r_ebp = tp->tf_ebp;
1727	regs->r_ebx = tp->tf_ebx;
1728	regs->r_edx = tp->tf_edx;
1729	regs->r_ecx = tp->tf_ecx;
1730	regs->r_eax = tp->tf_eax;
1731	regs->r_eip = tp->tf_eip;
1732	regs->r_cs = tp->tf_cs;
1733	regs->r_eflags = tp->tf_eflags;
1734	regs->r_esp = tp->tf_esp;
1735	regs->r_ss = tp->tf_ss;
1736	return (0);
1737}
1738
1739int
1740set_regs(p, regs)
1741	struct proc *p;
1742	struct reg *regs;
1743{
1744	struct trapframe *tp;
1745
1746	tp = TF_REGP(p);
1747	if (!EFLAGS_SECURE(regs->r_eflags, tp->tf_eflags) ||
1748	    !CS_SECURE(regs->r_cs))
1749		return (EINVAL);
1750	tp->tf_es = regs->r_es;
1751	tp->tf_ds = regs->r_ds;
1752	tp->tf_edi = regs->r_edi;
1753	tp->tf_esi = regs->r_esi;
1754	tp->tf_ebp = regs->r_ebp;
1755	tp->tf_ebx = regs->r_ebx;
1756	tp->tf_edx = regs->r_edx;
1757	tp->tf_ecx = regs->r_ecx;
1758	tp->tf_eax = regs->r_eax;
1759	tp->tf_eip = regs->r_eip;
1760	tp->tf_cs = regs->r_cs;
1761	tp->tf_eflags = regs->r_eflags;
1762	tp->tf_esp = regs->r_esp;
1763	tp->tf_ss = regs->r_ss;
1764	return (0);
1765}
1766
1767#ifndef DDB
1768void
1769Debugger(const char *msg)
1770{
1771	printf("Debugger(\"%s\") called.\n", msg);
1772}
1773#endif /* no DDB */
1774
1775#include <sys/disklabel.h>
1776#define b_cylin	b_resid
1777/*
1778 * Determine the size of the transfer, and make sure it is
1779 * within the boundaries of the partition. Adjust transfer
1780 * if needed, and signal errors or early completion.
1781 */
1782int
1783bounds_check_with_label(struct buf *bp, struct disklabel *lp, int wlabel)
1784{
1785        struct partition *p = lp->d_partitions + dkpart(bp->b_dev);
1786        int labelsect = lp->d_partitions[0].p_offset;
1787        int maxsz = p->p_size,
1788                sz = (bp->b_bcount + DEV_BSIZE - 1) >> DEV_BSHIFT;
1789
1790        /* overwriting disk label ? */
1791        /* XXX should also protect bootstrap in first 8K */
1792        if (bp->b_blkno + p->p_offset <= LABELSECTOR + labelsect &&
1793#if LABELSECTOR != 0
1794            bp->b_blkno + p->p_offset + sz > LABELSECTOR + labelsect &&
1795#endif
1796            (bp->b_flags & B_READ) == 0 && wlabel == 0) {
1797                bp->b_error = EROFS;
1798                goto bad;
1799        }
1800
1801#if     defined(DOSBBSECTOR) && defined(notyet)
1802        /* overwriting master boot record? */
1803        if (bp->b_blkno + p->p_offset <= DOSBBSECTOR &&
1804            (bp->b_flags & B_READ) == 0 && wlabel == 0) {
1805                bp->b_error = EROFS;
1806                goto bad;
1807        }
1808#endif
1809
1810        /* beyond partition? */
1811        if (bp->b_blkno < 0 || bp->b_blkno + sz > maxsz) {
1812                /* if exactly at end of disk, return an EOF */
1813                if (bp->b_blkno == maxsz) {
1814                        bp->b_resid = bp->b_bcount;
1815                        return(0);
1816                }
1817                /* or truncate if part of it fits */
1818                sz = maxsz - bp->b_blkno;
1819                if (sz <= 0) {
1820                        bp->b_error = EINVAL;
1821                        goto bad;
1822                }
1823                bp->b_bcount = sz << DEV_BSHIFT;
1824        }
1825
1826        /* calculate cylinder for disksort to order transfers with */
1827        bp->b_pblkno = bp->b_blkno + p->p_offset;
1828        bp->b_cylin = bp->b_pblkno / lp->d_secpercyl;
1829        return(1);
1830
1831bad:
1832        bp->b_flags |= B_ERROR;
1833        return(-1);
1834}
1835
1836int
1837disk_externalize(int drive, struct sysctl_req *req)
1838{
1839	return SYSCTL_OUT(req, &drive, sizeof drive);
1840}
1841