machdep.c revision 13228
1/*-
2 * Copyright (c) 1992 Terrence R. Lambert.
3 * Copyright (c) 1982, 1987, 1990 The Regents of the University of California.
4 * All rights reserved.
5 *
6 * This code is derived from software contributed to Berkeley by
7 * William Jolitz.
8 *
9 * Redistribution and use in source and binary forms, with or without
10 * modification, are permitted provided that the following conditions
11 * are met:
12 * 1. Redistributions of source code must retain the above copyright
13 *    notice, this list of conditions and the following disclaimer.
14 * 2. Redistributions in binary form must reproduce the above copyright
15 *    notice, this list of conditions and the following disclaimer in the
16 *    documentation and/or other materials provided with the distribution.
17 * 3. All advertising materials mentioning features or use of this software
18 *    must display the following acknowledgement:
19 *	This product includes software developed by the University of
20 *	California, Berkeley and its contributors.
21 * 4. Neither the name of the University nor the names of its contributors
22 *    may be used to endorse or promote products derived from this software
23 *    without specific prior written permission.
24 *
25 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
26 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
27 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
28 * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
29 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
30 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
31 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
32 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
33 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
34 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
35 * SUCH DAMAGE.
36 *
37 *	from: @(#)machdep.c	7.4 (Berkeley) 6/3/91
38 *	$Id: machdep.c,v 1.166 1995/12/30 23:13:32 davidg Exp $
39 */
40
41#include "npx.h"
42#include "isa.h"
43#include "opt_sysvipc.h"
44#include "opt_ddb.h"
45
46#include <sys/param.h>
47#include <sys/systm.h>
48#include <sys/sysproto.h>
49#include <sys/signalvar.h>
50#include <sys/kernel.h>
51#include <sys/proc.h>
52#include <sys/buf.h>
53#include <sys/reboot.h>
54#include <sys/conf.h>
55#include <sys/file.h>
56#include <sys/callout.h>
57#include <sys/malloc.h>
58#include <sys/mbuf.h>
59#include <sys/mount.h>
60#include <sys/msgbuf.h>
61#include <sys/ioctl.h>
62#include <sys/sysent.h>
63#include <sys/tty.h>
64#include <sys/sysctl.h>
65#include <sys/devconf.h>
66#include <sys/vmmeter.h>
67
68#ifdef SYSVSHM
69#include <sys/shm.h>
70#endif
71
72#ifdef SYSVMSG
73#include <sys/msg.h>
74#endif
75
76#ifdef SYSVSEM
77#include <sys/sem.h>
78#endif
79
80#include <vm/vm.h>
81#include <vm/vm_param.h>
82#include <vm/vm_prot.h>
83#include <vm/lock.h>
84#include <vm/vm_kern.h>
85#include <vm/vm_object.h>
86#include <vm/vm_page.h>
87#include <vm/vm_map.h>
88#include <vm/vm_pager.h>
89#include <vm/vm_extern.h>
90
91#include <sys/user.h>
92#include <sys/exec.h>
93#include <sys/vnode.h>
94
95#include <ddb/ddb.h>
96
97#include <net/netisr.h>
98
99#include <machine/cpu.h>
100#include <machine/npx.h>
101#include <machine/reg.h>
102#include <machine/psl.h>
103#include <machine/clock.h>
104#include <machine/specialreg.h>
105#include <machine/sysarch.h>
106#include <machine/cons.h>
107#include <machine/devconf.h>
108#include <machine/bootinfo.h>
109#include <machine/md_var.h>
110
111#include <i386/isa/isa.h>
112#include <i386/isa/isa_device.h>
113#include <i386/isa/rtc.h>
114#include <machine/random.h>
115
116extern void init386 __P((int first));
117extern int ptrace_set_pc __P((struct proc *p, unsigned int addr));
118extern int ptrace_single_step __P((struct proc *p));
119extern int ptrace_write_u __P((struct proc *p, vm_offset_t off, int data));
120extern void dblfault_handler __P((void));
121
122extern void i486_bzero	__P((void *, size_t));
123extern void i586_bzero	__P((void *, size_t));
124extern void i686_bzero	__P((void *, size_t));
125
126static void cpu_startup __P((void *));
127SYSINIT(cpu, SI_SUB_CPU, SI_ORDER_FIRST, cpu_startup, NULL)
128
129static void identifycpu(void);
130
131char machine[] = "i386";
132SYSCTL_STRING(_hw, HW_MACHINE, machine, CTLFLAG_RD, machine, 0, "");
133
134static char cpu_model[128];
135SYSCTL_STRING(_hw, HW_MODEL, model, CTLFLAG_RD, cpu_model, 0, "");
136
137struct kern_devconf kdc_cpu0 = {
138	0, 0, 0,		/* filled in by dev_attach */
139	"cpu", 0, { MDDT_CPU },
140	0, 0, 0, CPU_EXTERNALLEN,
141	0,			/* CPU has no parent */
142	0,			/* no parentdata */
143	DC_BUSY,		/* the CPU is always busy */
144	cpu_model,		/* no sense in duplication */
145	DC_CLS_CPU		/* class */
146};
147
148#ifndef PANIC_REBOOT_WAIT_TIME
149#define PANIC_REBOOT_WAIT_TIME 15 /* default to 15 seconds */
150#endif
151
152#ifdef BOUNCE_BUFFERS
153extern char *bouncememory;
154extern int maxbkva;
155#ifdef BOUNCEPAGES
156int	bouncepages = BOUNCEPAGES;
157#else
158int	bouncepages = 0;
159#endif
160#endif	/* BOUNCE_BUFFERS */
161
162extern int freebufspace;
163int	msgbufmapped = 0;		/* set when safe to use msgbuf */
164int _udatasel, _ucodesel;
165
166
167int physmem = 0;
168
169static int
170sysctl_hw_physmem SYSCTL_HANDLER_ARGS
171{
172	int error = sysctl_handle_int(oidp, 0, ctob(physmem), req);
173	return (error);
174}
175
176SYSCTL_PROC(_hw, HW_PHYSMEM, physmem, CTLTYPE_INT|CTLFLAG_RD,
177	0, 0, sysctl_hw_physmem, "I", "");
178
179static int
180sysctl_hw_usermem SYSCTL_HANDLER_ARGS
181{
182	int error = sysctl_handle_int(oidp, 0,
183		ctob(physmem - cnt.v_wire_count), req);
184	return (error);
185}
186
187SYSCTL_PROC(_hw, HW_USERMEM, usermem, CTLTYPE_INT|CTLFLAG_RD,
188	0, 0, sysctl_hw_usermem, "I", "");
189
190int boothowto = 0, bootverbose = 0, Maxmem = 0;
191static int	badpages = 0;
192long dumplo;
193extern int bootdev;
194
195vm_offset_t phys_avail[10];
196
197/* must be 2 less so 0 0 can signal end of chunks */
198#define PHYS_AVAIL_ARRAY_END ((sizeof(phys_avail) / sizeof(vm_offset_t)) - 2)
199
200int cpu_class;
201
202static void dumpsys __P((void));
203static void setup_netisrs __P((struct linker_set *)); /* XXX declare elsewhere */
204
205static vm_offset_t buffer_sva, buffer_eva;
206vm_offset_t clean_sva, clean_eva;
207static vm_offset_t pager_sva, pager_eva;
208extern struct linker_set netisr_set;
209
210#define offsetof(type, member)	((size_t)(&((type *)0)->member))
211
212static void
213cpu_startup(dummy)
214	void *dummy;
215{
216	register unsigned i;
217	register caddr_t v;
218	vm_offset_t maxaddr;
219	vm_size_t size = 0;
220	int firstaddr;
221	vm_offset_t minaddr;
222
223	if (boothowto & RB_VERBOSE)
224		bootverbose++;
225
226	/*
227	 * Initialize error message buffer (at end of core).
228	 */
229
230	/* avail_end was pre-decremented in init_386() to compensate */
231	for (i = 0; i < btoc(sizeof (struct msgbuf)); i++)
232		pmap_enter(pmap_kernel(), (vm_offset_t)msgbufp,
233			   avail_end + i * NBPG,
234			   VM_PROT_ALL, TRUE);
235	msgbufmapped = 1;
236
237	/*
238	 * Good {morning,afternoon,evening,night}.
239	 */
240	printf(version);
241	startrtclock();
242	identifycpu();
243	printf("real memory  = %d (%dK bytes)\n", ptoa(Maxmem), ptoa(Maxmem) / 1024);
244	/*
245	 * Display any holes after the first chunk of extended memory.
246	 */
247	if (badpages != 0) {
248		int indx = 1;
249
250		/*
251		 * XXX skip reporting ISA hole & unmanaged kernel memory
252		 */
253		if (phys_avail[0] == PAGE_SIZE)
254			indx += 2;
255
256		printf("Physical memory hole(s):\n");
257		for (; phys_avail[indx + 1] != 0; indx += 2) {
258			int size = phys_avail[indx + 1] - phys_avail[indx];
259
260			printf("0x%08lx - 0x%08lx, %d bytes (%d pages)\n", phys_avail[indx],
261			    phys_avail[indx + 1] - 1, size, size / PAGE_SIZE);
262		}
263	}
264
265	/*
266	 * Quickly wire in netisrs.
267	 */
268	setup_netisrs(&netisr_set);
269
270/*
271#ifdef ISDN
272	DONET(isdnintr, NETISR_ISDN);
273#endif
274*/
275
276	/*
277	 * Allocate space for system data structures.
278	 * The first available kernel virtual address is in "v".
279	 * As pages of kernel virtual memory are allocated, "v" is incremented.
280	 * As pages of memory are allocated and cleared,
281	 * "firstaddr" is incremented.
282	 * An index into the kernel page table corresponding to the
283	 * virtual memory address maintained in "v" is kept in "mapaddr".
284	 */
285
286	/*
287	 * Make two passes.  The first pass calculates how much memory is
288	 * needed and allocates it.  The second pass assigns virtual
289	 * addresses to the various data structures.
290	 */
291	firstaddr = 0;
292again:
293	v = (caddr_t)firstaddr;
294
295#define	valloc(name, type, num) \
296	    (name) = (type *)v; v = (caddr_t)((name)+(num))
297#define	valloclim(name, type, num, lim) \
298	    (name) = (type *)v; v = (caddr_t)((lim) = ((name)+(num)))
299	valloc(callout, struct callout, ncallout);
300#ifdef SYSVSHM
301	valloc(shmsegs, struct shmid_ds, shminfo.shmmni);
302#endif
303#ifdef SYSVSEM
304	valloc(sema, struct semid_ds, seminfo.semmni);
305	valloc(sem, struct sem, seminfo.semmns);
306	/* This is pretty disgusting! */
307	valloc(semu, int, (seminfo.semmnu * seminfo.semusz) / sizeof(int));
308#endif
309#ifdef SYSVMSG
310	valloc(msgpool, char, msginfo.msgmax);
311	valloc(msgmaps, struct msgmap, msginfo.msgseg);
312	valloc(msghdrs, struct msg, msginfo.msgtql);
313	valloc(msqids, struct msqid_ds, msginfo.msgmni);
314#endif
315
316	if (nbuf == 0) {
317		nbuf = 30;
318		if( physmem > 1024)
319			nbuf += min((physmem - 1024) / 12, 1024);
320	}
321	nswbuf = min(nbuf, 128);
322
323	valloc(swbuf, struct buf, nswbuf);
324	valloc(buf, struct buf, nbuf);
325
326#ifdef BOUNCE_BUFFERS
327	/*
328	 * If there is more than 16MB of memory, allocate some bounce buffers
329	 */
330	if (Maxmem > 4096) {
331		if (bouncepages == 0) {
332			bouncepages = 64;
333			bouncepages += ((Maxmem - 4096) / 2048) * 32;
334		}
335		v = (caddr_t)((vm_offset_t)((vm_offset_t)v + PAGE_SIZE - 1) & ~(PAGE_SIZE - 1));
336		valloc(bouncememory, char, bouncepages * PAGE_SIZE);
337	}
338#endif
339
340	/*
341	 * End of first pass, size has been calculated so allocate memory
342	 */
343	if (firstaddr == 0) {
344		size = (vm_size_t)(v - firstaddr);
345		firstaddr = (int)kmem_alloc(kernel_map, round_page(size));
346		if (firstaddr == 0)
347			panic("startup: no room for tables");
348		goto again;
349	}
350
351	/*
352	 * End of second pass, addresses have been assigned
353	 */
354	if ((vm_size_t)(v - firstaddr) != size)
355		panic("startup: table size inconsistency");
356
357#ifdef BOUNCE_BUFFERS
358	clean_map = kmem_suballoc(kernel_map, &clean_sva, &clean_eva,
359			(nbuf*MAXBSIZE) + (nswbuf*MAXPHYS) +
360				maxbkva + pager_map_size, TRUE);
361	io_map = kmem_suballoc(clean_map, &minaddr, &maxaddr, maxbkva, FALSE);
362#else
363	clean_map = kmem_suballoc(kernel_map, &clean_sva, &clean_eva,
364			(nbuf*MAXBSIZE) + (nswbuf*MAXPHYS) + pager_map_size, TRUE);
365#endif
366	buffer_map = kmem_suballoc(clean_map, &buffer_sva, &buffer_eva,
367				(nbuf*MAXBSIZE), TRUE);
368	pager_map = kmem_suballoc(clean_map, &pager_sva, &pager_eva,
369				(nswbuf*MAXPHYS) + pager_map_size, TRUE);
370	exec_map = kmem_suballoc(kernel_map, &minaddr, &maxaddr,
371				(16*ARG_MAX), TRUE);
372	u_map = kmem_suballoc(kernel_map, &minaddr, &maxaddr,
373				(maxproc*UPAGES*PAGE_SIZE), FALSE);
374
375	/*
376	 * Finally, allocate mbuf pool.  Since mclrefcnt is an off-size
377	 * we use the more space efficient malloc in place of kmem_alloc.
378	 */
379	mclrefcnt = (char *)malloc(nmbclusters+CLBYTES/MCLBYTES,
380				   M_MBUF, M_NOWAIT);
381	bzero(mclrefcnt, nmbclusters+CLBYTES/MCLBYTES);
382	mb_map = kmem_suballoc(kmem_map, (vm_offset_t *)&mbutl, &maxaddr,
383			       nmbclusters * MCLBYTES, FALSE);
384	/*
385	 * Initialize callouts
386	 */
387	callfree = callout;
388	for (i = 1; i < ncallout; i++)
389		callout[i-1].c_next = &callout[i];
390
391        if (boothowto & RB_CONFIG) {
392		userconfig();
393		cninit();	/* the preferred console may have changed */
394	}
395
396#ifdef BOUNCE_BUFFERS
397	/*
398	 * init bounce buffers
399	 */
400	vm_bounce_init();
401#endif
402	/*
403	 * XXX allocate a contiguous area for ISA (non busmaster) DMA
404	 * operations. This _should_ only be done if the DMA channels
405	 * will actually be used, but for now we do it always.
406	 */
407#define DMAPAGES 8
408	isaphysmem =
409	    vm_page_alloc_contig(DMAPAGES * PAGE_SIZE, 0, 0xfffffful, 64*1024);
410
411	printf("avail memory = %d (%dK bytes)\n", ptoa(cnt.v_free_count),
412	    ptoa(cnt.v_free_count) / 1024);
413
414	/*
415	 * Set up buffers, so they can be used to read disk labels.
416	 */
417	bufinit();
418	vm_pager_bufferinit();
419
420	/*
421	 * In verbose mode, print out the BIOS's idea of the disk geometries.
422	 */
423	if (bootverbose) {
424		printf("BIOS Geometries:\n");
425		for (i = 0; i < N_BIOS_GEOM; i++) {
426			unsigned long bios_geom;
427			int max_cylinder, max_head, max_sector;
428
429			bios_geom = bootinfo.bi_bios_geom[i];
430
431			/*
432			 * XXX the bootstrap punts a 1200K floppy geometry
433			 * when the get-disk-geometry interrupt fails.  Skip
434			 * drives that have this geometry.
435			 */
436			if (bios_geom == 0x4f010f)
437				continue;
438
439			printf(" %x:%08lx ", i, bios_geom);
440			max_cylinder = bios_geom >> 16;
441			max_head = (bios_geom >> 8) & 0xff;
442			max_sector = bios_geom & 0xff;
443			printf(
444		"0..%d=%d cylinders, 0..%d=%d heads, 1..%d=%d sectors\n",
445			       max_cylinder, max_cylinder + 1,
446			       max_head, max_head + 1,
447			       max_sector, max_sector);
448		}
449		printf(" %d accounted for\n", bootinfo.bi_n_bios_used);
450	}
451}
452
453int
454register_netisr(num, handler)
455	int num;
456	netisr_t *handler;
457{
458
459	if (num < 0 || num >= (sizeof(netisrs)/sizeof(*netisrs)) ) {
460		printf("register_netisr: bad isr number: %d\n", num);
461		return (EINVAL);
462	}
463	netisrs[num] = handler;
464	return (0);
465}
466
467static void
468setup_netisrs(ls)
469	struct linker_set *ls;
470{
471	int i;
472	const struct netisrtab *nit;
473
474	for(i = 0; ls->ls_items[i]; i++) {
475		nit = (const struct netisrtab *)ls->ls_items[i];
476		register_netisr(nit->nit_num, nit->nit_isr);
477	}
478}
479
480static struct cpu_nameclass i386_cpus[] = {
481	{ "Intel 80286",	CPUCLASS_286 },		/* CPU_286   */
482	{ "i386SX",		CPUCLASS_386 },		/* CPU_386SX */
483	{ "i386DX",		CPUCLASS_386 },		/* CPU_386   */
484	{ "i486SX",		CPUCLASS_486 },		/* CPU_486SX */
485	{ "i486DX",		CPUCLASS_486 },		/* CPU_486   */
486	{ "Pentium",		CPUCLASS_586 },		/* CPU_586   */
487	{ "Cy486DLC",		CPUCLASS_486 },		/* CPU_486DLC */
488	{ "Pentium Pro",	CPUCLASS_686 },		/* CPU_686 */
489};
490
491static void
492identifycpu()
493{
494	printf("CPU: ");
495	if (cpu >= 0
496	    && cpu < (sizeof i386_cpus/sizeof(struct cpu_nameclass))) {
497		cpu_class = i386_cpus[cpu].cpu_class;
498		strncpy(cpu_model, i386_cpus[cpu].cpu_name, sizeof cpu_model);
499	} else {
500		printf("unknown cpu type %d\n", cpu);
501		panic("startup: bad cpu id");
502	}
503
504#if defined(I586_CPU) || defined(I686_CPU)
505	if (cpu_class == CPUCLASS_586 || cpu_class == CPUCLASS_686) {
506		calibrate_cyclecounter();
507	}
508#endif
509#if defined(I486_CPU) || defined(I586_CPU) || defined(I686_CPU)
510	if (!strcmp(cpu_vendor,"GenuineIntel")) {
511		if ((cpu_id & 0xf00) > 3) {
512			cpu_model[0] = '\0';
513
514			switch (cpu_id & 0x3000) {
515			case 0x1000:
516				strcpy(cpu_model, "Overdrive ");
517				break;
518			case 0x2000:
519				strcpy(cpu_model, "Dual ");
520				break;
521			}
522
523			switch (cpu_id & 0xf00) {
524			case 0x400:
525				strcat(cpu_model, "i486 ");
526				break;
527			case 0x500:
528				strcat(cpu_model, "Pentium"); /* nb no space */
529				break;
530			case 0x600:
531				strcat(cpu_model, "Pentium Pro");
532				break;
533			default:
534				strcat(cpu_model, "unknown");
535				break;
536			}
537
538			switch (cpu_id & 0xff0) {
539			case 0x400:
540				strcat(cpu_model, "DX"); break;
541			case 0x410:
542				strcat(cpu_model, "DX"); break;
543			case 0x420:
544				strcat(cpu_model, "SX"); break;
545			case 0x430:
546				strcat(cpu_model, "DX2"); break;
547			case 0x440:
548				strcat(cpu_model, "SL"); break;
549			case 0x450:
550				strcat(cpu_model, "SX2"); break;
551			case 0x470:
552				strcat(cpu_model, "DX2 Write-Back Enhanced");
553				break;
554			case 0x480:
555				strcat(cpu_model, "DX4"); break;
556				break;
557			}
558		}
559	}
560#endif
561	printf("%s (", cpu_model);
562	switch(cpu_class) {
563	case CPUCLASS_286:
564		printf("286");
565		break;
566#if defined(I386_CPU)
567	case CPUCLASS_386:
568		printf("386");
569		break;
570#endif
571#if defined(I486_CPU)
572	case CPUCLASS_486:
573		printf("486");
574		bzero = i486_bzero;
575		break;
576#endif
577#if defined(I586_CPU)
578	case CPUCLASS_586:
579		printf("%d.%02d-MHz ",
580		       ((100 * i586_ctr_rate) >> I586_CTR_RATE_SHIFT) / 100,
581		       ((100 * i586_ctr_rate) >> I586_CTR_RATE_SHIFT) % 100);
582		printf("586");
583		bzero = i586_bzero;
584		break;
585#endif
586#if defined(I686_CPU)
587	case CPUCLASS_686:
588		printf("%d.%02d-MHz ",
589		       ((100 * i586_ctr_rate) >> I586_CTR_RATE_SHIFT) / 100,
590		       ((100 * i586_ctr_rate) >> I586_CTR_RATE_SHIFT) % 100);
591		printf("686");
592		bzero = i686_bzero;
593		break;
594#endif
595	default:
596		printf("unknown");	/* will panic below... */
597	}
598	printf("-class CPU)\n");
599#if defined(I486_CPU) || defined(I586_CPU) || defined(I686_CPU)
600	if(*cpu_vendor)
601		printf("  Origin = \"%s\"",cpu_vendor);
602	if(cpu_id)
603		printf("  Id = 0x%lx",cpu_id);
604
605	if (!strcmp(cpu_vendor, "GenuineIntel")) {
606		printf("  Stepping=%ld", cpu_id & 0xf);
607		if (cpu_high > 0) {
608#define FEATUREFMT "\020\001FPU\002VME\003PSE\004MCE\005CX8\006APIC"
609			printf("\n  Features=0x%b", cpu_feature, FEATUREFMT);
610		}
611	}
612	/* Avoid ugly blank lines: only print newline when we have to. */
613	if (*cpu_vendor || cpu_id)
614		printf("\n");
615#endif
616	/*
617	 * Now that we have told the user what they have,
618	 * let them know if that machine type isn't configured.
619	 */
620	switch (cpu_class) {
621	case CPUCLASS_286:	/* a 286 should not make it this far, anyway */
622#if !defined(I386_CPU) && !defined(I486_CPU) && !defined(I586_CPU) && !defined(I686_CPU)
623#error This kernel is not configured for one of the supported CPUs
624#endif
625#if !defined(I386_CPU)
626	case CPUCLASS_386:
627#endif
628#if !defined(I486_CPU)
629	case CPUCLASS_486:
630#endif
631#if !defined(I586_CPU)
632	case CPUCLASS_586:
633#endif
634#if !defined(I686_CPU)
635	case CPUCLASS_686:
636#endif
637		panic("CPU class not configured");
638	default:
639		break;
640	}
641	dev_attach(&kdc_cpu0);
642}
643
644/*
645 * Send an interrupt to process.
646 *
647 * Stack is set up to allow sigcode stored
648 * in u. to call routine, followed by kcall
649 * to sigreturn routine below.  After sigreturn
650 * resets the signal mask, the stack, and the
651 * frame pointer, it returns to the user
652 * specified pc, psl.
653 */
654void
655sendsig(catcher, sig, mask, code)
656	sig_t catcher;
657	int sig, mask;
658	unsigned code;
659{
660	register struct proc *p = curproc;
661	register int *regs;
662	register struct sigframe *fp;
663	struct sigframe sf;
664	struct sigacts *psp = p->p_sigacts;
665	int oonstack;
666
667	regs = p->p_md.md_regs;
668        oonstack = psp->ps_sigstk.ss_flags & SA_ONSTACK;
669	/*
670	 * Allocate and validate space for the signal handler
671	 * context. Note that if the stack is in P0 space, the
672	 * call to grow() is a nop, and the useracc() check
673	 * will fail if the process has not already allocated
674	 * the space with a `brk'.
675	 */
676        if ((psp->ps_flags & SAS_ALTSTACK) &&
677	    (psp->ps_sigstk.ss_flags & SA_ONSTACK) == 0 &&
678	    (psp->ps_sigonstack & sigmask(sig))) {
679		fp = (struct sigframe *)(psp->ps_sigstk.ss_sp +
680		    psp->ps_sigstk.ss_size - sizeof(struct sigframe));
681		psp->ps_sigstk.ss_flags |= SA_ONSTACK;
682	} else {
683		fp = (struct sigframe *)(regs[tESP]
684			- sizeof(struct sigframe));
685	}
686
687	/*
688	 * grow() will return FALSE if the fp will not fit inside the stack
689	 *	and the stack can not be grown. useracc will return FALSE
690	 *	if access is denied.
691	 */
692	if ((grow(p, (int)fp) == FALSE) ||
693	    (useracc((caddr_t)fp, sizeof (struct sigframe), B_WRITE) == FALSE)) {
694		/*
695		 * Process has trashed its stack; give it an illegal
696		 * instruction to halt it in its tracks.
697		 */
698		SIGACTION(p, SIGILL) = SIG_DFL;
699		sig = sigmask(SIGILL);
700		p->p_sigignore &= ~sig;
701		p->p_sigcatch &= ~sig;
702		p->p_sigmask &= ~sig;
703		psignal(p, SIGILL);
704		return;
705	}
706
707	/*
708	 * Build the argument list for the signal handler.
709	 */
710	if (p->p_sysent->sv_sigtbl) {
711		if (sig < p->p_sysent->sv_sigsize)
712			sig = p->p_sysent->sv_sigtbl[sig];
713		else
714			sig = p->p_sysent->sv_sigsize + 1;
715	}
716	sf.sf_signum = sig;
717	sf.sf_code = code;
718	sf.sf_scp = &fp->sf_sc;
719	sf.sf_addr = (char *) regs[tERR];
720	sf.sf_handler = catcher;
721
722	/* save scratch registers */
723	sf.sf_sc.sc_eax = regs[tEAX];
724	sf.sf_sc.sc_ebx = regs[tEBX];
725	sf.sf_sc.sc_ecx = regs[tECX];
726	sf.sf_sc.sc_edx = regs[tEDX];
727	sf.sf_sc.sc_esi = regs[tESI];
728	sf.sf_sc.sc_edi = regs[tEDI];
729	sf.sf_sc.sc_cs = regs[tCS];
730	sf.sf_sc.sc_ds = regs[tDS];
731	sf.sf_sc.sc_ss = regs[tSS];
732	sf.sf_sc.sc_es = regs[tES];
733	sf.sf_sc.sc_isp = regs[tISP];
734
735	/*
736	 * Build the signal context to be used by sigreturn.
737	 */
738	sf.sf_sc.sc_onstack = oonstack;
739	sf.sf_sc.sc_mask = mask;
740	sf.sf_sc.sc_sp = regs[tESP];
741	sf.sf_sc.sc_fp = regs[tEBP];
742	sf.sf_sc.sc_pc = regs[tEIP];
743	sf.sf_sc.sc_ps = regs[tEFLAGS];
744
745	/*
746	 * Copy the sigframe out to the user's stack.
747	 */
748	if (copyout(&sf, fp, sizeof(struct sigframe)) != 0) {
749		/*
750		 * Something is wrong with the stack pointer.
751		 * ...Kill the process.
752		 */
753		sigexit(p, SIGILL);
754	};
755
756	regs[tESP] = (int)fp;
757	regs[tEIP] = (int)((struct pcb *)kstack)->pcb_sigc;
758	regs[tEFLAGS] &= ~PSL_VM;
759	regs[tCS] = _ucodesel;
760	regs[tDS] = _udatasel;
761	regs[tES] = _udatasel;
762	regs[tSS] = _udatasel;
763}
764
765/*
766 * System call to cleanup state after a signal
767 * has been taken.  Reset signal mask and
768 * stack state from context left by sendsig (above).
769 * Return to previous pc and psl as specified by
770 * context left by sendsig. Check carefully to
771 * make sure that the user has not modified the
772 * state to gain improper privileges.
773 */
774int
775sigreturn(p, uap, retval)
776	struct proc *p;
777	struct sigreturn_args /* {
778		struct sigcontext *sigcntxp;
779	} */ *uap;
780	int *retval;
781{
782	register struct sigcontext *scp;
783	register struct sigframe *fp;
784	register int *regs = p->p_md.md_regs;
785	int eflags;
786
787	/*
788	 * (XXX old comment) regs[tESP] points to the return address.
789	 * The user scp pointer is above that.
790	 * The return address is faked in the signal trampoline code
791	 * for consistency.
792	 */
793	scp = uap->sigcntxp;
794	fp = (struct sigframe *)
795	     ((caddr_t)scp - offsetof(struct sigframe, sf_sc));
796
797	if (useracc((caddr_t)fp, sizeof (*fp), 0) == 0)
798		return(EINVAL);
799
800	/*
801	 * Don't allow users to change privileged or reserved flags.
802	 */
803#define	EFLAGS_SECURE(ef, oef)	((((ef) ^ (oef)) & ~PSL_USERCHANGE) == 0)
804	eflags = scp->sc_ps;
805	/*
806	 * XXX do allow users to change the privileged flag PSL_RF.  The
807	 * cpu sets PSL_RF in tf_eflags for faults.  Debuggers should
808	 * sometimes set it there too.  tf_eflags is kept in the signal
809	 * context during signal handling and there is no other place
810	 * to remember it, so the PSL_RF bit may be corrupted by the
811	 * signal handler without us knowing.  Corruption of the PSL_RF
812	 * bit at worst causes one more or one less debugger trap, so
813	 * allowing it is fairly harmless.
814	 */
815	if (!EFLAGS_SECURE(eflags & ~PSL_RF, regs[tEFLAGS] & ~PSL_RF)) {
816#ifdef DEBUG
817    		printf("sigreturn: eflags = 0x%x\n", eflags);
818#endif
819    		return(EINVAL);
820	}
821
822	/*
823	 * Don't allow users to load a valid privileged %cs.  Let the
824	 * hardware check for invalid selectors, excess privilege in
825	 * other selectors, invalid %eip's and invalid %esp's.
826	 */
827#define	CS_SECURE(cs)	(ISPL(cs) == SEL_UPL)
828	if (!CS_SECURE(scp->sc_cs)) {
829#ifdef DEBUG
830    		printf("sigreturn: cs = 0x%x\n", scp->sc_cs);
831#endif
832		trapsignal(p, SIGBUS, T_PROTFLT);
833		return(EINVAL);
834	}
835
836	/* restore scratch registers */
837	regs[tEAX] = scp->sc_eax;
838	regs[tEBX] = scp->sc_ebx;
839	regs[tECX] = scp->sc_ecx;
840	regs[tEDX] = scp->sc_edx;
841	regs[tESI] = scp->sc_esi;
842	regs[tEDI] = scp->sc_edi;
843	regs[tCS] = scp->sc_cs;
844	regs[tDS] = scp->sc_ds;
845	regs[tES] = scp->sc_es;
846	regs[tSS] = scp->sc_ss;
847	regs[tISP] = scp->sc_isp;
848
849	if (useracc((caddr_t)scp, sizeof (*scp), 0) == 0)
850		return(EINVAL);
851
852	if (scp->sc_onstack & 01)
853		p->p_sigacts->ps_sigstk.ss_flags |= SA_ONSTACK;
854	else
855		p->p_sigacts->ps_sigstk.ss_flags &= ~SA_ONSTACK;
856	p->p_sigmask = scp->sc_mask &~
857	    (sigmask(SIGKILL)|sigmask(SIGCONT)|sigmask(SIGSTOP));
858	regs[tEBP] = scp->sc_fp;
859	regs[tESP] = scp->sc_sp;
860	regs[tEIP] = scp->sc_pc;
861	regs[tEFLAGS] = eflags;
862	return(EJUSTRETURN);
863}
864
865static int	waittime = -1;
866static struct pcb dumppcb;
867
868__dead void
869boot(howto)
870	int howto;
871{
872	if (!cold && (howto & RB_NOSYNC) == 0 && waittime < 0) {
873		register struct buf *bp;
874		int iter, nbusy;
875
876		waittime = 0;
877		printf("\nsyncing disks... ");
878
879		sync(&proc0, NULL, NULL);
880
881		for (iter = 0; iter < 20; iter++) {
882			nbusy = 0;
883			for (bp = &buf[nbuf]; --bp >= buf; ) {
884				if ((bp->b_flags & (B_BUSY | B_INVAL)) == B_BUSY) {
885					nbusy++;
886				}
887			}
888			if (nbusy == 0)
889				break;
890			printf("%d ", nbusy);
891			DELAY(40000 * iter);
892		}
893		if (nbusy) {
894			/*
895			 * Failed to sync all blocks. Indicate this and don't
896			 * unmount filesystems (thus forcing an fsck on reboot).
897			 */
898			printf("giving up\n");
899#ifdef SHOW_BUSYBUFS
900			nbusy = 0;
901			for (bp = &buf[nbuf]; --bp >= buf; ) {
902				if ((bp->b_flags & (B_BUSY | B_INVAL)) == B_BUSY) {
903					nbusy++;
904					printf("%d: dev:%08x, flags:%08x, blkno:%d, lblkno:%d\n", nbusy, bp->b_dev, bp->b_flags, bp->b_blkno, bp->b_lblkno);
905				}
906			}
907			DELAY(5000000);	/* 5 seconds */
908#endif
909		} else {
910			printf("done\n");
911			/*
912			 * Unmount filesystems
913			 */
914			if (panicstr == 0)
915				vfs_unmountall();
916		}
917		DELAY(100000);			/* wait for console output to finish */
918		dev_shutdownall(FALSE);
919	}
920	splhigh();
921	if (howto & RB_HALT) {
922		printf("\n");
923		printf("The operating system has halted.\n");
924		printf("Please press any key to reboot.\n\n");
925		cngetc();
926	} else {
927		if (howto & RB_DUMP) {
928			if (!cold) {
929				savectx(&dumppcb, 0);
930				dumppcb.pcb_ptd = rcr3();
931				dumpsys();
932			}
933
934			if (PANIC_REBOOT_WAIT_TIME != 0) {
935				if (PANIC_REBOOT_WAIT_TIME != -1) {
936					int loop;
937					printf("Automatic reboot in %d seconds - press a key on the console to abort\n",
938						PANIC_REBOOT_WAIT_TIME);
939					for (loop = PANIC_REBOOT_WAIT_TIME * 10; loop > 0; --loop) {
940						DELAY(1000 * 100); /* 1/10th second */
941						if (cncheckc()) /* Did user type a key? */
942							break;
943					}
944					if (!loop)
945						goto die;
946				}
947			} else { /* zero time specified - reboot NOW */
948				goto die;
949			}
950			printf("--> Press a key on the console to reboot <--\n");
951			cngetc();
952		}
953	}
954die:
955	printf("Rebooting...\n");
956	DELAY(1000000);	/* wait 1 sec for printf's to complete and be read */
957	cpu_reset();
958	for(;;) ;
959	/* NOTREACHED */
960}
961
962/*
963 * Magic number for savecore
964 *
965 * exported (symorder) and used at least by savecore(8)
966 *
967 */
968u_long		dumpmag = 0x8fca0101UL;
969
970static int	dumpsize = 0;		/* also for savecore */
971
972static int	dodump = 1;
973SYSCTL_INT(_machdep, OID_AUTO, do_dump, CTLFLAG_RW, &dodump, 0, "");
974
975/*
976 * Doadump comes here after turning off memory management and
977 * getting on the dump stack, either when called above, or by
978 * the auto-restart code.
979 */
980static void
981dumpsys()
982{
983
984	if (!dodump)
985		return;
986	if (dumpdev == NODEV)
987		return;
988	if ((minor(dumpdev)&07) != 1)
989		return;
990	if (!(bdevsw[major(dumpdev)]))
991		return;
992	if (!(bdevsw[major(dumpdev)]->d_dump))
993		return;
994	dumpsize = Maxmem;
995	printf("\ndumping to dev %lx, offset %ld\n", dumpdev, dumplo);
996	printf("dump ");
997	switch ((*bdevsw[major(dumpdev)]->d_dump)(dumpdev)) {
998
999	case ENXIO:
1000		printf("device bad\n");
1001		break;
1002
1003	case EFAULT:
1004		printf("device not ready\n");
1005		break;
1006
1007	case EINVAL:
1008		printf("area improper\n");
1009		break;
1010
1011	case EIO:
1012		printf("i/o error\n");
1013		break;
1014
1015	case EINTR:
1016		printf("aborted from console\n");
1017		break;
1018
1019	default:
1020		printf("succeeded\n");
1021		break;
1022	}
1023}
1024
1025/*
1026 * Clear registers on exec
1027 */
1028void
1029setregs(p, entry, stack)
1030	struct proc *p;
1031	u_long entry;
1032	u_long stack;
1033{
1034	int *regs = p->p_md.md_regs;
1035
1036	bzero(regs, sizeof(struct trapframe));
1037	regs[tEIP] = entry;
1038	regs[tESP] = stack;
1039	regs[tEFLAGS] = PSL_USER | (regs[tEFLAGS] & PSL_T);
1040	regs[tSS] = _udatasel;
1041	regs[tDS] = _udatasel;
1042	regs[tES] = _udatasel;
1043	regs[tCS] = _ucodesel;
1044
1045	p->p_addr->u_pcb.pcb_flags = 0;	/* no fp at all */
1046	load_cr0(rcr0() | CR0_TS);	/* start emulating */
1047#if	NNPX > 0
1048	npxinit(__INITIAL_NPXCW__);
1049#endif	/* NNPX > 0 */
1050}
1051
1052static int
1053sysctl_machdep_adjkerntz SYSCTL_HANDLER_ARGS
1054{
1055	int error;
1056	error = sysctl_handle_int(oidp, oidp->oid_arg1, oidp->oid_arg2,
1057		req);
1058	if (!error && req->newptr)
1059		resettodr();
1060	return (error);
1061}
1062
1063SYSCTL_PROC(_machdep, CPU_ADJKERNTZ, adjkerntz, CTLTYPE_INT|CTLFLAG_RW,
1064	&adjkerntz, 0, sysctl_machdep_adjkerntz, "I", "");
1065
1066SYSCTL_INT(_machdep, CPU_DISRTCSET, disable_rtc_set,
1067	CTLFLAG_RW, &disable_rtc_set, 0, "");
1068
1069SYSCTL_STRUCT(_machdep, CPU_BOOTINFO, bootinfo,
1070	CTLFLAG_RD, &bootinfo, bootinfo, "");
1071
1072/*
1073 * Initialize 386 and configure to run kernel
1074 */
1075
1076/*
1077 * Initialize segments & interrupt table
1078 */
1079
1080int currentldt;
1081int _default_ldt;
1082union descriptor gdt[NGDT];		/* global descriptor table */
1083struct gate_descriptor idt[NIDT];	/* interrupt descriptor table */
1084union descriptor ldt[NLDT];		/* local descriptor table */
1085
1086static struct i386tss dblfault_tss;
1087static char dblfault_stack[PAGE_SIZE];
1088
1089extern  struct user *proc0paddr;
1090
1091/* software prototypes -- in more palatable form */
1092struct soft_segment_descriptor gdt_segs[] = {
1093/* GNULL_SEL	0 Null Descriptor */
1094{	0x0,			/* segment base address  */
1095	0x0,			/* length */
1096	0,			/* segment type */
1097	0,			/* segment descriptor priority level */
1098	0,			/* segment descriptor present */
1099	0, 0,
1100	0,			/* default 32 vs 16 bit size */
1101	0  			/* limit granularity (byte/page units)*/ },
1102/* GCODE_SEL	1 Code Descriptor for kernel */
1103{	0x0,			/* segment base address  */
1104	0xfffff,		/* length - all address space */
1105	SDT_MEMERA,		/* segment type */
1106	0,			/* segment descriptor priority level */
1107	1,			/* segment descriptor present */
1108	0, 0,
1109	1,			/* default 32 vs 16 bit size */
1110	1  			/* limit granularity (byte/page units)*/ },
1111/* GDATA_SEL	2 Data Descriptor for kernel */
1112{	0x0,			/* segment base address  */
1113	0xfffff,		/* length - all address space */
1114	SDT_MEMRWA,		/* segment type */
1115	0,			/* segment descriptor priority level */
1116	1,			/* segment descriptor present */
1117	0, 0,
1118	1,			/* default 32 vs 16 bit size */
1119	1  			/* limit granularity (byte/page units)*/ },
1120/* GLDT_SEL	3 LDT Descriptor */
1121{	(int) ldt,		/* segment base address  */
1122	sizeof(ldt)-1,		/* length - all address space */
1123	SDT_SYSLDT,		/* segment type */
1124	0,			/* segment descriptor priority level */
1125	1,			/* segment descriptor present */
1126	0, 0,
1127	0,			/* unused - default 32 vs 16 bit size */
1128	0  			/* limit granularity (byte/page units)*/ },
1129/* GTGATE_SEL	4 Null Descriptor - Placeholder */
1130{	0x0,			/* segment base address  */
1131	0x0,			/* length - all address space */
1132	0,			/* segment type */
1133	0,			/* segment descriptor priority level */
1134	0,			/* segment descriptor present */
1135	0, 0,
1136	0,			/* default 32 vs 16 bit size */
1137	0  			/* limit granularity (byte/page units)*/ },
1138/* GPANIC_SEL	5 Panic Tss Descriptor */
1139{	(int) &dblfault_tss,	/* segment base address  */
1140	sizeof(struct i386tss)-1,/* length - all address space */
1141	SDT_SYS386TSS,		/* segment type */
1142	0,			/* segment descriptor priority level */
1143	1,			/* segment descriptor present */
1144	0, 0,
1145	0,			/* unused - default 32 vs 16 bit size */
1146	0  			/* limit granularity (byte/page units)*/ },
1147/* GPROC0_SEL	6 Proc 0 Tss Descriptor */
1148{	(int) kstack,		/* segment base address  */
1149	sizeof(struct i386tss)-1,/* length - all address space */
1150	SDT_SYS386TSS,		/* segment type */
1151	0,			/* segment descriptor priority level */
1152	1,			/* segment descriptor present */
1153	0, 0,
1154	0,			/* unused - default 32 vs 16 bit size */
1155	0  			/* limit granularity (byte/page units)*/ },
1156/* GUSERLDT_SEL	7 User LDT Descriptor per process */
1157{	(int) ldt,		/* segment base address  */
1158	(512 * sizeof(union descriptor)-1),		/* length */
1159	SDT_SYSLDT,		/* segment type */
1160	0,			/* segment descriptor priority level */
1161	1,			/* segment descriptor present */
1162	0, 0,
1163	0,			/* unused - default 32 vs 16 bit size */
1164	0  			/* limit granularity (byte/page units)*/ },
1165/* GAPMCODE32_SEL 8 APM BIOS 32-bit interface (32bit Code) */
1166{	0,			/* segment base address (overwritten by APM)  */
1167	0xfffff,		/* length */
1168	SDT_MEMERA,		/* segment type */
1169	0,			/* segment descriptor priority level */
1170	1,			/* segment descriptor present */
1171	0, 0,
1172	1,			/* default 32 vs 16 bit size */
1173	1  			/* limit granularity (byte/page units)*/ },
1174/* GAPMCODE16_SEL 9 APM BIOS 32-bit interface (16bit Code) */
1175{	0,			/* segment base address (overwritten by APM)  */
1176	0xfffff,		/* length */
1177	SDT_MEMERA,		/* segment type */
1178	0,			/* segment descriptor priority level */
1179	1,			/* segment descriptor present */
1180	0, 0,
1181	0,			/* default 32 vs 16 bit size */
1182	1  			/* limit granularity (byte/page units)*/ },
1183/* GAPMDATA_SEL	10 APM BIOS 32-bit interface (Data) */
1184{	0,			/* segment base address (overwritten by APM) */
1185	0xfffff,		/* length */
1186	SDT_MEMRWA,		/* segment type */
1187	0,			/* segment descriptor priority level */
1188	1,			/* segment descriptor present */
1189	0, 0,
1190	1,			/* default 32 vs 16 bit size */
1191	1  			/* limit granularity (byte/page units)*/ },
1192};
1193
1194static struct soft_segment_descriptor ldt_segs[] = {
1195	/* Null Descriptor - overwritten by call gate */
1196{	0x0,			/* segment base address  */
1197	0x0,			/* length - all address space */
1198	0,			/* segment type */
1199	0,			/* segment descriptor priority level */
1200	0,			/* segment descriptor present */
1201	0, 0,
1202	0,			/* default 32 vs 16 bit size */
1203	0  			/* limit granularity (byte/page units)*/ },
1204	/* Null Descriptor - overwritten by call gate */
1205{	0x0,			/* segment base address  */
1206	0x0,			/* length - all address space */
1207	0,			/* segment type */
1208	0,			/* segment descriptor priority level */
1209	0,			/* segment descriptor present */
1210	0, 0,
1211	0,			/* default 32 vs 16 bit size */
1212	0  			/* limit granularity (byte/page units)*/ },
1213	/* Null Descriptor - overwritten by call gate */
1214{	0x0,			/* segment base address  */
1215	0x0,			/* length - all address space */
1216	0,			/* segment type */
1217	0,			/* segment descriptor priority level */
1218	0,			/* segment descriptor present */
1219	0, 0,
1220	0,			/* default 32 vs 16 bit size */
1221	0  			/* limit granularity (byte/page units)*/ },
1222	/* Code Descriptor for user */
1223{	0x0,			/* segment base address  */
1224	0xfffff,		/* length - all address space */
1225	SDT_MEMERA,		/* segment type */
1226	SEL_UPL,		/* segment descriptor priority level */
1227	1,			/* segment descriptor present */
1228	0, 0,
1229	1,			/* default 32 vs 16 bit size */
1230	1  			/* limit granularity (byte/page units)*/ },
1231	/* Data Descriptor for user */
1232{	0x0,			/* segment base address  */
1233	0xfffff,		/* length - all address space */
1234	SDT_MEMRWA,		/* segment type */
1235	SEL_UPL,		/* segment descriptor priority level */
1236	1,			/* segment descriptor present */
1237	0, 0,
1238	1,			/* default 32 vs 16 bit size */
1239	1  			/* limit granularity (byte/page units)*/ },
1240};
1241
1242void
1243setidt(idx, func, typ, dpl, selec)
1244	int idx;
1245	inthand_t *func;
1246	int typ;
1247	int dpl;
1248	int selec;
1249{
1250	struct gate_descriptor *ip = idt + idx;
1251
1252	ip->gd_looffset = (int)func;
1253	ip->gd_selector = selec;
1254	ip->gd_stkcpy = 0;
1255	ip->gd_xx = 0;
1256	ip->gd_type = typ;
1257	ip->gd_dpl = dpl;
1258	ip->gd_p = 1;
1259	ip->gd_hioffset = ((int)func)>>16 ;
1260}
1261
1262#define	IDTVEC(name)	__CONCAT(X,name)
1263
1264extern inthand_t
1265	IDTVEC(div), IDTVEC(dbg), IDTVEC(nmi), IDTVEC(bpt), IDTVEC(ofl),
1266	IDTVEC(bnd), IDTVEC(ill), IDTVEC(dna), IDTVEC(fpusegm),
1267	IDTVEC(tss), IDTVEC(missing), IDTVEC(stk), IDTVEC(prot),
1268	IDTVEC(page), IDTVEC(rsvd), IDTVEC(fpu), IDTVEC(align),
1269	IDTVEC(syscall);
1270
1271#if defined(COMPAT_LINUX) || defined(LINUX)
1272extern inthand_t
1273	IDTVEC(linux_syscall);
1274#endif
1275
1276void
1277sdtossd(sd, ssd)
1278	struct segment_descriptor *sd;
1279	struct soft_segment_descriptor *ssd;
1280{
1281	ssd->ssd_base  = (sd->sd_hibase << 24) | sd->sd_lobase;
1282	ssd->ssd_limit = (sd->sd_hilimit << 16) | sd->sd_lolimit;
1283	ssd->ssd_type  = sd->sd_type;
1284	ssd->ssd_dpl   = sd->sd_dpl;
1285	ssd->ssd_p     = sd->sd_p;
1286	ssd->ssd_def32 = sd->sd_def32;
1287	ssd->ssd_gran  = sd->sd_gran;
1288}
1289
1290void
1291init386(first)
1292	int first;
1293{
1294	int x;
1295	unsigned biosbasemem, biosextmem;
1296	struct gate_descriptor *gdp;
1297	int gsel_tss;
1298	/* table descriptors - used to load tables by microp */
1299	struct region_descriptor r_gdt, r_idt;
1300	int	pagesinbase, pagesinext;
1301	int	target_page, pa_indx;
1302
1303	proc0.p_addr = proc0paddr;
1304
1305	/*
1306	 * Initialize the console before we print anything out.
1307	 */
1308	cninit();
1309
1310	/*
1311	 * make gdt memory segments, the code segment goes up to end of the
1312	 * page with etext in it, the data segment goes to the end of
1313	 * the address space
1314	 */
1315	/*
1316	 * XXX text protection is temporarily (?) disabled.  The limit was
1317	 * i386_btop(i386_round_page(etext)) - 1.
1318	 */
1319	gdt_segs[GCODE_SEL].ssd_limit = i386_btop(0) - 1;
1320	gdt_segs[GDATA_SEL].ssd_limit = i386_btop(0) - 1;
1321	for (x = 0; x < NGDT; x++)
1322		ssdtosd(&gdt_segs[x], &gdt[x].sd);
1323
1324	/* make ldt memory segments */
1325	/*
1326	 * The data segment limit must not cover the user area because we
1327	 * don't want the user area to be writable in copyout() etc. (page
1328	 * level protection is lost in kernel mode on 386's).  Also, we
1329	 * don't want the user area to be writable directly (page level
1330	 * protection of the user area is not available on 486's with
1331	 * CR0_WP set, because there is no user-read/kernel-write mode).
1332	 *
1333	 * XXX - VM_MAXUSER_ADDRESS is an end address, not a max.  And it
1334	 * should be spelled ...MAX_USER...
1335	 */
1336#define VM_END_USER_RW_ADDRESS	VM_MAXUSER_ADDRESS
1337	/*
1338	 * The code segment limit has to cover the user area until we move
1339	 * the signal trampoline out of the user area.  This is safe because
1340	 * the code segment cannot be written to directly.
1341	 */
1342#define VM_END_USER_R_ADDRESS	(VM_END_USER_RW_ADDRESS + UPAGES * NBPG)
1343	ldt_segs[LUCODE_SEL].ssd_limit = i386_btop(VM_END_USER_R_ADDRESS) - 1;
1344	ldt_segs[LUDATA_SEL].ssd_limit = i386_btop(VM_END_USER_RW_ADDRESS) - 1;
1345	/* Note. eventually want private ldts per process */
1346	for (x = 0; x < NLDT; x++)
1347		ssdtosd(&ldt_segs[x], &ldt[x].sd);
1348
1349	/* exceptions */
1350	for (x = 0; x < NIDT; x++)
1351		setidt(x, &IDTVEC(rsvd), SDT_SYS386TGT, SEL_KPL, GSEL(GCODE_SEL, SEL_KPL));
1352	setidt(0, &IDTVEC(div),  SDT_SYS386TGT, SEL_KPL, GSEL(GCODE_SEL, SEL_KPL));
1353	setidt(1, &IDTVEC(dbg),  SDT_SYS386TGT, SEL_KPL, GSEL(GCODE_SEL, SEL_KPL));
1354	setidt(2, &IDTVEC(nmi),  SDT_SYS386TGT, SEL_KPL, GSEL(GCODE_SEL, SEL_KPL));
1355 	setidt(3, &IDTVEC(bpt),  SDT_SYS386TGT, SEL_UPL, GSEL(GCODE_SEL, SEL_KPL));
1356	setidt(4, &IDTVEC(ofl),  SDT_SYS386TGT, SEL_UPL, GSEL(GCODE_SEL, SEL_KPL));
1357	setidt(5, &IDTVEC(bnd),  SDT_SYS386TGT, SEL_KPL, GSEL(GCODE_SEL, SEL_KPL));
1358	setidt(6, &IDTVEC(ill),  SDT_SYS386TGT, SEL_KPL, GSEL(GCODE_SEL, SEL_KPL));
1359	setidt(7, &IDTVEC(dna),  SDT_SYS386TGT, SEL_KPL, GSEL(GCODE_SEL, SEL_KPL));
1360	setidt(8, 0,  SDT_SYSTASKGT, SEL_KPL, GSEL(GPANIC_SEL, SEL_KPL));
1361	setidt(9, &IDTVEC(fpusegm),  SDT_SYS386TGT, SEL_KPL, GSEL(GCODE_SEL, SEL_KPL));
1362	setidt(10, &IDTVEC(tss),  SDT_SYS386TGT, SEL_KPL, GSEL(GCODE_SEL, SEL_KPL));
1363	setidt(11, &IDTVEC(missing),  SDT_SYS386TGT, SEL_KPL, GSEL(GCODE_SEL, SEL_KPL));
1364	setidt(12, &IDTVEC(stk),  SDT_SYS386TGT, SEL_KPL, GSEL(GCODE_SEL, SEL_KPL));
1365	setidt(13, &IDTVEC(prot),  SDT_SYS386TGT, SEL_KPL, GSEL(GCODE_SEL, SEL_KPL));
1366	setidt(14, &IDTVEC(page),  SDT_SYS386TGT, SEL_KPL, GSEL(GCODE_SEL, SEL_KPL));
1367	setidt(15, &IDTVEC(rsvd),  SDT_SYS386TGT, SEL_KPL, GSEL(GCODE_SEL, SEL_KPL));
1368	setidt(16, &IDTVEC(fpu),  SDT_SYS386TGT, SEL_KPL, GSEL(GCODE_SEL, SEL_KPL));
1369	setidt(17, &IDTVEC(align), SDT_SYS386TGT, SEL_KPL, GSEL(GCODE_SEL, SEL_KPL));
1370#if defined(COMPAT_LINUX) || defined(LINUX)
1371 	setidt(0x80, &IDTVEC(linux_syscall),  SDT_SYS386TGT, SEL_UPL, GSEL(GCODE_SEL, SEL_KPL));
1372#endif
1373
1374#include	"isa.h"
1375#if	NISA >0
1376	isa_defaultirq();
1377#endif
1378	rand_initialize();
1379
1380	r_gdt.rd_limit = sizeof(gdt) - 1;
1381	r_gdt.rd_base =  (int) gdt;
1382	lgdt(&r_gdt);
1383
1384	r_idt.rd_limit = sizeof(idt) - 1;
1385	r_idt.rd_base = (int) idt;
1386	lidt(&r_idt);
1387
1388	_default_ldt = GSEL(GLDT_SEL, SEL_KPL);
1389	lldt(_default_ldt);
1390	currentldt = _default_ldt;
1391
1392#ifdef DDB
1393	kdb_init();
1394	if (boothowto & RB_KDB)
1395		Debugger("Boot flags requested debugger");
1396#endif
1397
1398	/* Use BIOS values stored in RTC CMOS RAM, since probing
1399	 * breaks certain 386 AT relics.
1400	 */
1401	biosbasemem = rtcin(RTC_BASELO)+ (rtcin(RTC_BASEHI)<<8);
1402	biosextmem = rtcin(RTC_EXTLO)+ (rtcin(RTC_EXTHI)<<8);
1403
1404	/*
1405	 * Print a warning if the official BIOS interface disagrees
1406	 * with the hackish interface used above.  Eventually only
1407	 * the official interface should be used.
1408	 */
1409	if (bootinfo.bi_memsizes_valid) {
1410		if (bootinfo.bi_basemem != biosbasemem)
1411			printf("BIOS basemem (%ldK) != RTC basemem (%dK)\n",
1412			       bootinfo.bi_basemem, biosbasemem);
1413		if (bootinfo.bi_extmem != biosextmem)
1414			printf("BIOS extmem (%ldK) != RTC extmem (%dK)\n",
1415			       bootinfo.bi_extmem, biosextmem);
1416	}
1417
1418	/*
1419	 * If BIOS tells us that it has more than 640k in the basemem,
1420	 *	don't believe it - set it to 640k.
1421	 */
1422	if (biosbasemem > 640)
1423		biosbasemem = 640;
1424
1425	/*
1426	 * Some 386 machines might give us a bogus number for extended
1427	 *	mem. If this happens, stop now.
1428	 */
1429#ifndef LARGEMEM
1430	if (biosextmem > 65536) {
1431		panic("extended memory beyond limit of 64MB");
1432		/* NOTREACHED */
1433	}
1434#endif
1435
1436	pagesinbase = biosbasemem * 1024 / NBPG;
1437	pagesinext = biosextmem * 1024 / NBPG;
1438
1439	/*
1440	 * Special hack for chipsets that still remap the 384k hole when
1441	 *	there's 16MB of memory - this really confuses people that
1442	 *	are trying to use bus mastering ISA controllers with the
1443	 *	"16MB limit"; they only have 16MB, but the remapping puts
1444	 *	them beyond the limit.
1445	 */
1446	/*
1447	 * If extended memory is between 15-16MB (16-17MB phys address range),
1448	 *	chop it to 15MB.
1449	 */
1450	if ((pagesinext > 3840) && (pagesinext < 4096))
1451		pagesinext = 3840;
1452
1453	/*
1454	 * Maxmem isn't the "maximum memory", it's one larger than the
1455	 * highest page of of the physical address space. It
1456	 */
1457	Maxmem = pagesinext + 0x100000/PAGE_SIZE;
1458
1459#ifdef MAXMEM
1460	Maxmem = MAXMEM/4;
1461#endif
1462
1463	/* call pmap initialization to make new kernel address space */
1464	pmap_bootstrap (first, 0);
1465
1466	/*
1467	 * Size up each available chunk of physical memory.
1468	 */
1469
1470	/*
1471	 * We currently don't bother testing base memory.
1472	 * XXX  ...but we probably should.
1473	 */
1474	pa_indx = 0;
1475	badpages = 0;
1476	if (pagesinbase > 1) {
1477		phys_avail[pa_indx++] = PAGE_SIZE;	/* skip first page of memory */
1478		phys_avail[pa_indx] = ptoa(pagesinbase);/* memory up to the ISA hole */
1479		physmem = pagesinbase - 1;
1480	} else {
1481		/* point at first chunk end */
1482		pa_indx++;
1483	}
1484
1485	for (target_page = avail_start; target_page < ptoa(Maxmem); target_page += PAGE_SIZE) {
1486		int tmp, page_bad = FALSE;
1487
1488		/*
1489		 * map page into kernel: valid, read/write, non-cacheable
1490		 */
1491		*(int *)CMAP1 = PG_V | PG_KW | PG_N | target_page;
1492		pmap_update();
1493
1494		tmp = *(int *)CADDR1;
1495		/*
1496		 * Test for alternating 1's and 0's
1497		 */
1498		*(volatile int *)CADDR1 = 0xaaaaaaaa;
1499		if (*(volatile int *)CADDR1 != 0xaaaaaaaa) {
1500			page_bad = TRUE;
1501		}
1502		/*
1503		 * Test for alternating 0's and 1's
1504		 */
1505		*(volatile int *)CADDR1 = 0x55555555;
1506		if (*(volatile int *)CADDR1 != 0x55555555) {
1507			page_bad = TRUE;
1508		}
1509		/*
1510		 * Test for all 1's
1511		 */
1512		*(volatile int *)CADDR1 = 0xffffffff;
1513		if (*(volatile int *)CADDR1 != 0xffffffff) {
1514			page_bad = TRUE;
1515		}
1516		/*
1517		 * Test for all 0's
1518		 */
1519		*(volatile int *)CADDR1 = 0x0;
1520		if (*(volatile int *)CADDR1 != 0x0) {
1521			/*
1522			 * test of page failed
1523			 */
1524			page_bad = TRUE;
1525		}
1526		/*
1527		 * Restore original value.
1528		 */
1529		*(int *)CADDR1 = tmp;
1530
1531		/*
1532		 * Adjust array of valid/good pages.
1533		 */
1534		if (page_bad == FALSE) {
1535			/*
1536			 * If this good page is a continuation of the
1537			 * previous set of good pages, then just increase
1538			 * the end pointer. Otherwise start a new chunk.
1539			 * Note that "end" points one higher than end,
1540			 * making the range >= start and < end.
1541			 */
1542			if (phys_avail[pa_indx] == target_page) {
1543				phys_avail[pa_indx] += PAGE_SIZE;
1544			} else {
1545				pa_indx++;
1546				if (pa_indx == PHYS_AVAIL_ARRAY_END) {
1547					printf("Too many holes in the physical address space, giving up\n");
1548					pa_indx--;
1549					break;
1550				}
1551				phys_avail[pa_indx++] = target_page;	/* start */
1552				phys_avail[pa_indx] = target_page + PAGE_SIZE;	/* end */
1553			}
1554			physmem++;
1555		} else {
1556			badpages++;
1557			page_bad = FALSE;
1558		}
1559	}
1560
1561	*(int *)CMAP1 = 0;
1562	pmap_update();
1563
1564	/*
1565	 * XXX
1566	 * The last chunk must contain at least one page plus the message
1567	 * buffer to avoid complicating other code (message buffer address
1568	 * calculation, etc.).
1569	 */
1570	while (phys_avail[pa_indx - 1] + PAGE_SIZE +
1571	    round_page(sizeof(struct msgbuf)) >= phys_avail[pa_indx]) {
1572		physmem -= atop(phys_avail[pa_indx] - phys_avail[pa_indx - 1]);
1573		phys_avail[pa_indx--] = 0;
1574		phys_avail[pa_indx--] = 0;
1575	}
1576
1577	Maxmem = atop(phys_avail[pa_indx]);
1578
1579	/* Trim off space for the message buffer. */
1580	phys_avail[pa_indx] -= round_page(sizeof(struct msgbuf));
1581
1582	avail_end = phys_avail[pa_indx];
1583
1584	/* now running on new page tables, configured,and u/iom is accessible */
1585
1586	/* make a initial tss so microp can get interrupt stack on syscall! */
1587	proc0.p_addr->u_pcb.pcb_tss.tss_esp0 = (int) kstack + UPAGES*NBPG;
1588	proc0.p_addr->u_pcb.pcb_tss.tss_ss0 = GSEL(GDATA_SEL, SEL_KPL) ;
1589	gsel_tss = GSEL(GPROC0_SEL, SEL_KPL);
1590
1591	dblfault_tss.tss_esp = dblfault_tss.tss_esp0 = dblfault_tss.tss_esp1 =
1592	    dblfault_tss.tss_esp2 = (int) &dblfault_stack[sizeof(dblfault_stack)];
1593	dblfault_tss.tss_ss = dblfault_tss.tss_ss0 = dblfault_tss.tss_ss1 =
1594	    dblfault_tss.tss_ss2 = GSEL(GDATA_SEL, SEL_KPL);
1595	dblfault_tss.tss_cr3 = IdlePTD;
1596	dblfault_tss.tss_eip = (int) dblfault_handler;
1597	dblfault_tss.tss_eflags = PSL_KERNEL;
1598	dblfault_tss.tss_ds = dblfault_tss.tss_es = dblfault_tss.tss_fs = dblfault_tss.tss_gs =
1599		GSEL(GDATA_SEL, SEL_KPL);
1600	dblfault_tss.tss_cs = GSEL(GCODE_SEL, SEL_KPL);
1601	dblfault_tss.tss_ldt = GSEL(GLDT_SEL, SEL_KPL);
1602
1603	((struct i386tss *)gdt_segs[GPROC0_SEL].ssd_base)->tss_ioopt =
1604		(sizeof(struct i386tss))<<16;
1605
1606	ltr(gsel_tss);
1607
1608	/* make a call gate to reenter kernel with */
1609	gdp = &ldt[LSYS5CALLS_SEL].gd;
1610
1611	x = (int) &IDTVEC(syscall);
1612	gdp->gd_looffset = x++;
1613	gdp->gd_selector = GSEL(GCODE_SEL,SEL_KPL);
1614	gdp->gd_stkcpy = 1;
1615	gdp->gd_type = SDT_SYS386CGT;
1616	gdp->gd_dpl = SEL_UPL;
1617	gdp->gd_p = 1;
1618	gdp->gd_hioffset = ((int) &IDTVEC(syscall)) >>16;
1619
1620	/* transfer to user mode */
1621
1622	_ucodesel = LSEL(LUCODE_SEL, SEL_UPL);
1623	_udatasel = LSEL(LUDATA_SEL, SEL_UPL);
1624
1625	/* setup proc 0's pcb */
1626	bcopy(&sigcode, proc0.p_addr->u_pcb.pcb_sigc, szsigcode);
1627	proc0.p_addr->u_pcb.pcb_flags = 0;
1628	proc0.p_addr->u_pcb.pcb_ptd = IdlePTD;
1629}
1630
1631/*
1632 * The registers are in the frame; the frame is in the user area of
1633 * the process in question; when the process is active, the registers
1634 * are in "the kernel stack"; when it's not, they're still there, but
1635 * things get flipped around.  So, since p->p_md.md_regs is the whole address
1636 * of the register set, take its offset from the kernel stack, and
1637 * index into the user block.  Don't you just *love* virtual memory?
1638 * (I'm starting to think seymour is right...)
1639 */
1640#define	TF_REGP(p)	((struct trapframe *) \
1641			 ((char *)(p)->p_addr \
1642			  + ((char *)(p)->p_md.md_regs - kstack)))
1643
1644int
1645ptrace_set_pc(p, addr)
1646	struct proc *p;
1647	unsigned int addr;
1648{
1649	TF_REGP(p)->tf_eip = addr;
1650	return (0);
1651}
1652
1653int
1654ptrace_single_step(p)
1655	struct proc *p;
1656{
1657	TF_REGP(p)->tf_eflags |= PSL_T;
1658	return (0);
1659}
1660
1661int ptrace_write_u(p, off, data)
1662	struct proc *p;
1663	vm_offset_t off;
1664	int data;
1665{
1666	struct trapframe frame_copy;
1667	vm_offset_t min;
1668	struct trapframe *tp;
1669
1670	/*
1671	 * Privileged kernel state is scattered all over the user area.
1672	 * Only allow write access to parts of regs and to fpregs.
1673	 */
1674	min = (char *)p->p_md.md_regs - kstack;
1675	if (off >= min && off <= min + sizeof(struct trapframe) - sizeof(int)) {
1676		tp = TF_REGP(p);
1677		frame_copy = *tp;
1678		*(int *)((char *)&frame_copy + (off - min)) = data;
1679		if (!EFLAGS_SECURE(frame_copy.tf_eflags, tp->tf_eflags) ||
1680		    !CS_SECURE(frame_copy.tf_cs))
1681			return (EINVAL);
1682		*(int*)((char *)p->p_addr + off) = data;
1683		return (0);
1684	}
1685	min = offsetof(struct user, u_pcb) + offsetof(struct pcb, pcb_savefpu);
1686	if (off >= min && off <= min + sizeof(struct save87) - sizeof(int)) {
1687		*(int*)((char *)p->p_addr + off) = data;
1688		return (0);
1689	}
1690	return (EFAULT);
1691}
1692
1693int
1694fill_regs(p, regs)
1695	struct proc *p;
1696	struct reg *regs;
1697{
1698	struct trapframe *tp;
1699
1700	tp = TF_REGP(p);
1701	regs->r_es = tp->tf_es;
1702	regs->r_ds = tp->tf_ds;
1703	regs->r_edi = tp->tf_edi;
1704	regs->r_esi = tp->tf_esi;
1705	regs->r_ebp = tp->tf_ebp;
1706	regs->r_ebx = tp->tf_ebx;
1707	regs->r_edx = tp->tf_edx;
1708	regs->r_ecx = tp->tf_ecx;
1709	regs->r_eax = tp->tf_eax;
1710	regs->r_eip = tp->tf_eip;
1711	regs->r_cs = tp->tf_cs;
1712	regs->r_eflags = tp->tf_eflags;
1713	regs->r_esp = tp->tf_esp;
1714	regs->r_ss = tp->tf_ss;
1715	return (0);
1716}
1717
1718int
1719set_regs(p, regs)
1720	struct proc *p;
1721	struct reg *regs;
1722{
1723	struct trapframe *tp;
1724
1725	tp = TF_REGP(p);
1726	if (!EFLAGS_SECURE(regs->r_eflags, tp->tf_eflags) ||
1727	    !CS_SECURE(regs->r_cs))
1728		return (EINVAL);
1729	tp->tf_es = regs->r_es;
1730	tp->tf_ds = regs->r_ds;
1731	tp->tf_edi = regs->r_edi;
1732	tp->tf_esi = regs->r_esi;
1733	tp->tf_ebp = regs->r_ebp;
1734	tp->tf_ebx = regs->r_ebx;
1735	tp->tf_edx = regs->r_edx;
1736	tp->tf_ecx = regs->r_ecx;
1737	tp->tf_eax = regs->r_eax;
1738	tp->tf_eip = regs->r_eip;
1739	tp->tf_cs = regs->r_cs;
1740	tp->tf_eflags = regs->r_eflags;
1741	tp->tf_esp = regs->r_esp;
1742	tp->tf_ss = regs->r_ss;
1743	return (0);
1744}
1745
1746#ifndef DDB
1747void
1748Debugger(const char *msg)
1749{
1750	printf("Debugger(\"%s\") called.\n", msg);
1751}
1752#endif /* no DDB */
1753
1754#include <sys/disklabel.h>
1755#define b_cylin	b_resid
1756/*
1757 * Determine the size of the transfer, and make sure it is
1758 * within the boundaries of the partition. Adjust transfer
1759 * if needed, and signal errors or early completion.
1760 */
1761int
1762bounds_check_with_label(struct buf *bp, struct disklabel *lp, int wlabel)
1763{
1764        struct partition *p = lp->d_partitions + dkpart(bp->b_dev);
1765        int labelsect = lp->d_partitions[0].p_offset;
1766        int maxsz = p->p_size,
1767                sz = (bp->b_bcount + DEV_BSIZE - 1) >> DEV_BSHIFT;
1768
1769        /* overwriting disk label ? */
1770        /* XXX should also protect bootstrap in first 8K */
1771        if (bp->b_blkno + p->p_offset <= LABELSECTOR + labelsect &&
1772#if LABELSECTOR != 0
1773            bp->b_blkno + p->p_offset + sz > LABELSECTOR + labelsect &&
1774#endif
1775            (bp->b_flags & B_READ) == 0 && wlabel == 0) {
1776                bp->b_error = EROFS;
1777                goto bad;
1778        }
1779
1780#if     defined(DOSBBSECTOR) && defined(notyet)
1781        /* overwriting master boot record? */
1782        if (bp->b_blkno + p->p_offset <= DOSBBSECTOR &&
1783            (bp->b_flags & B_READ) == 0 && wlabel == 0) {
1784                bp->b_error = EROFS;
1785                goto bad;
1786        }
1787#endif
1788
1789        /* beyond partition? */
1790        if (bp->b_blkno < 0 || bp->b_blkno + sz > maxsz) {
1791                /* if exactly at end of disk, return an EOF */
1792                if (bp->b_blkno == maxsz) {
1793                        bp->b_resid = bp->b_bcount;
1794                        return(0);
1795                }
1796                /* or truncate if part of it fits */
1797                sz = maxsz - bp->b_blkno;
1798                if (sz <= 0) {
1799                        bp->b_error = EINVAL;
1800                        goto bad;
1801                }
1802                bp->b_bcount = sz << DEV_BSHIFT;
1803        }
1804
1805        /* calculate cylinder for disksort to order transfers with */
1806        bp->b_pblkno = bp->b_blkno + p->p_offset;
1807        bp->b_cylin = bp->b_pblkno / lp->d_secpercyl;
1808        return(1);
1809
1810bad:
1811        bp->b_flags |= B_ERROR;
1812        return(-1);
1813}
1814
1815int
1816disk_externalize(int drive, struct sysctl_req *req)
1817{
1818	return SYSCTL_OUT(req, &drive, sizeof drive);
1819}
1820