machdep.c revision 15392
1/*-
2 * Copyright (c) 1992 Terrence R. Lambert.
3 * Copyright (c) 1982, 1987, 1990 The Regents of the University of California.
4 * All rights reserved.
5 *
6 * This code is derived from software contributed to Berkeley by
7 * William Jolitz.
8 *
9 * Redistribution and use in source and binary forms, with or without
10 * modification, are permitted provided that the following conditions
11 * are met:
12 * 1. Redistributions of source code must retain the above copyright
13 *    notice, this list of conditions and the following disclaimer.
14 * 2. Redistributions in binary form must reproduce the above copyright
15 *    notice, this list of conditions and the following disclaimer in the
16 *    documentation and/or other materials provided with the distribution.
17 * 3. All advertising materials mentioning features or use of this software
18 *    must display the following acknowledgement:
19 *	This product includes software developed by the University of
20 *	California, Berkeley and its contributors.
21 * 4. Neither the name of the University nor the names of its contributors
22 *    may be used to endorse or promote products derived from this software
23 *    without specific prior written permission.
24 *
25 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
26 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
27 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
28 * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
29 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
30 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
31 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
32 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
33 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
34 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
35 * SUCH DAMAGE.
36 *
37 *	from: @(#)machdep.c	7.4 (Berkeley) 6/3/91
38 *	$Id: machdep.c,v 1.183 1996/04/25 06:20:10 phk Exp $
39 */
40
41#include "npx.h"
42#include "isa.h"
43#include "opt_sysvipc.h"
44#include "opt_ddb.h"
45#include "opt_bounce.h"
46#include "opt_machdep.h"
47#include "opt_perfmon.h"
48
49#include <sys/param.h>
50#include <sys/systm.h>
51#include <sys/sysproto.h>
52#include <sys/signalvar.h>
53#include <sys/kernel.h>
54#include <sys/proc.h>
55#include <sys/buf.h>
56#include <sys/reboot.h>
57#include <sys/conf.h>
58#include <sys/file.h>
59#include <sys/callout.h>
60#include <sys/malloc.h>
61#include <sys/mbuf.h>
62#include <sys/mount.h>
63#include <sys/msgbuf.h>
64#include <sys/ioctl.h>
65#include <sys/sysent.h>
66#include <sys/tty.h>
67#include <sys/sysctl.h>
68#include <sys/devconf.h>
69#include <sys/vmmeter.h>
70
71#ifdef SYSVSHM
72#include <sys/shm.h>
73#endif
74
75#ifdef SYSVMSG
76#include <sys/msg.h>
77#endif
78
79#ifdef SYSVSEM
80#include <sys/sem.h>
81#endif
82
83#include <vm/vm.h>
84#include <vm/vm_param.h>
85#include <vm/vm_prot.h>
86#include <vm/lock.h>
87#include <vm/vm_kern.h>
88#include <vm/vm_object.h>
89#include <vm/vm_page.h>
90#include <vm/vm_map.h>
91#include <vm/vm_pager.h>
92#include <vm/vm_extern.h>
93
94#include <sys/user.h>
95#include <sys/exec.h>
96#include <sys/vnode.h>
97
98#include <ddb/ddb.h>
99
100#include <net/netisr.h>
101
102#include <machine/cpu.h>
103#include <machine/npx.h>
104#include <machine/reg.h>
105#include <machine/psl.h>
106#include <machine/clock.h>
107#include <machine/specialreg.h>
108#include <machine/sysarch.h>
109#include <machine/cons.h>
110#include <machine/devconf.h>
111#include <machine/bootinfo.h>
112#include <machine/md_var.h>
113#ifdef PERFMON
114#include <machine/perfmon.h>
115#endif
116
117#include <i386/isa/isa.h>
118#include <i386/isa/isa_device.h>
119#include <i386/isa/rtc.h>
120#include <machine/random.h>
121
122extern void init386 __P((int first));
123extern int ptrace_set_pc __P((struct proc *p, unsigned int addr));
124extern int ptrace_single_step __P((struct proc *p));
125extern int ptrace_write_u __P((struct proc *p, vm_offset_t off, int data));
126extern void dblfault_handler __P((void));
127
128extern void i486_bzero	__P((void *, size_t));
129extern void i586_bzero	__P((void *, size_t));
130extern void i686_bzero	__P((void *, size_t));
131
132static void cpu_startup __P((void *));
133SYSINIT(cpu, SI_SUB_CPU, SI_ORDER_FIRST, cpu_startup, NULL)
134
135static void identifycpu(void);
136
137char machine[] = "i386";
138SYSCTL_STRING(_hw, HW_MACHINE, machine, CTLFLAG_RD, machine, 0, "");
139
140static char cpu_model[128];
141SYSCTL_STRING(_hw, HW_MODEL, model, CTLFLAG_RD, cpu_model, 0, "");
142
143struct kern_devconf kdc_cpu0 = {
144	0, 0, 0,		/* filled in by dev_attach */
145	"cpu", 0, { MDDT_CPU },
146	0, 0, 0, CPU_EXTERNALLEN,
147	0,			/* CPU has no parent */
148	0,			/* no parentdata */
149	DC_BUSY,		/* the CPU is always busy */
150	cpu_model,		/* no sense in duplication */
151	DC_CLS_CPU		/* class */
152};
153
154#ifndef PANIC_REBOOT_WAIT_TIME
155#define PANIC_REBOOT_WAIT_TIME 15 /* default to 15 seconds */
156#endif
157
158#ifdef BOUNCE_BUFFERS
159extern char *bouncememory;
160extern int maxbkva;
161#ifdef BOUNCEPAGES
162int	bouncepages = BOUNCEPAGES;
163#else
164int	bouncepages = 0;
165#endif
166#endif	/* BOUNCE_BUFFERS */
167
168extern int freebufspace;
169int	msgbufmapped = 0;		/* set when safe to use msgbuf */
170int _udatasel, _ucodesel;
171
172
173int physmem = 0;
174int cold = 1;
175
176static int
177sysctl_hw_physmem SYSCTL_HANDLER_ARGS
178{
179	int error = sysctl_handle_int(oidp, 0, ctob(physmem), req);
180	return (error);
181}
182
183SYSCTL_PROC(_hw, HW_PHYSMEM, physmem, CTLTYPE_INT|CTLFLAG_RD,
184	0, 0, sysctl_hw_physmem, "I", "");
185
186static int
187sysctl_hw_usermem SYSCTL_HANDLER_ARGS
188{
189	int error = sysctl_handle_int(oidp, 0,
190		ctob(physmem - cnt.v_wire_count), req);
191	return (error);
192}
193
194SYSCTL_PROC(_hw, HW_USERMEM, usermem, CTLTYPE_INT|CTLFLAG_RD,
195	0, 0, sysctl_hw_usermem, "I", "");
196
197int boothowto = 0, bootverbose = 0, Maxmem = 0;
198static int	badpages = 0;
199long dumplo;
200extern int bootdev;
201
202vm_offset_t phys_avail[10];
203
204/* must be 2 less so 0 0 can signal end of chunks */
205#define PHYS_AVAIL_ARRAY_END ((sizeof(phys_avail) / sizeof(vm_offset_t)) - 2)
206
207int cpu_class = CPUCLASS_386;	/* smallest common denominator */
208
209static void dumpsys __P((void));
210static void setup_netisrs __P((struct linker_set *)); /* XXX declare elsewhere */
211
212static vm_offset_t buffer_sva, buffer_eva;
213vm_offset_t clean_sva, clean_eva;
214static vm_offset_t pager_sva, pager_eva;
215extern struct linker_set netisr_set;
216
217#define offsetof(type, member)	((size_t)(&((type *)0)->member))
218
219static void
220cpu_startup(dummy)
221	void *dummy;
222{
223	register unsigned i;
224	register caddr_t v;
225	vm_offset_t maxaddr;
226	vm_size_t size = 0;
227	int firstaddr;
228	vm_offset_t minaddr;
229
230	if (boothowto & RB_VERBOSE)
231		bootverbose++;
232
233	/*
234	 * Initialize error message buffer (at end of core).
235	 */
236
237	/* avail_end was pre-decremented in init_386() to compensate */
238	for (i = 0; i < btoc(sizeof (struct msgbuf)); i++)
239		pmap_enter(pmap_kernel(), (vm_offset_t)msgbufp,
240			   avail_end + i * NBPG,
241			   VM_PROT_ALL, TRUE);
242	msgbufmapped = 1;
243
244	/*
245	 * Good {morning,afternoon,evening,night}.
246	 */
247	printf(version);
248	startrtclock();
249	identifycpu();
250	printf("real memory  = %d (%dK bytes)\n", ptoa(Maxmem), ptoa(Maxmem) / 1024);
251	/*
252	 * Display any holes after the first chunk of extended memory.
253	 */
254	if (badpages != 0) {
255		int indx = 1;
256
257		/*
258		 * XXX skip reporting ISA hole & unmanaged kernel memory
259		 */
260		if (phys_avail[0] == PAGE_SIZE)
261			indx += 2;
262
263		printf("Physical memory hole(s):\n");
264		for (; phys_avail[indx + 1] != 0; indx += 2) {
265			int size = phys_avail[indx + 1] - phys_avail[indx];
266
267			printf("0x%08lx - 0x%08lx, %d bytes (%d pages)\n", phys_avail[indx],
268			    phys_avail[indx + 1] - 1, size, size / PAGE_SIZE);
269		}
270	}
271
272	/*
273	 * Quickly wire in netisrs.
274	 */
275	setup_netisrs(&netisr_set);
276
277/*
278#ifdef ISDN
279	DONET(isdnintr, NETISR_ISDN);
280#endif
281*/
282
283	/*
284	 * Allocate space for system data structures.
285	 * The first available kernel virtual address is in "v".
286	 * As pages of kernel virtual memory are allocated, "v" is incremented.
287	 * As pages of memory are allocated and cleared,
288	 * "firstaddr" is incremented.
289	 * An index into the kernel page table corresponding to the
290	 * virtual memory address maintained in "v" is kept in "mapaddr".
291	 */
292
293	/*
294	 * Make two passes.  The first pass calculates how much memory is
295	 * needed and allocates it.  The second pass assigns virtual
296	 * addresses to the various data structures.
297	 */
298	firstaddr = 0;
299again:
300	v = (caddr_t)firstaddr;
301
302#define	valloc(name, type, num) \
303	    (name) = (type *)v; v = (caddr_t)((name)+(num))
304#define	valloclim(name, type, num, lim) \
305	    (name) = (type *)v; v = (caddr_t)((lim) = ((name)+(num)))
306	valloc(callout, struct callout, ncallout);
307#ifdef SYSVSHM
308	valloc(shmsegs, struct shmid_ds, shminfo.shmmni);
309#endif
310#ifdef SYSVSEM
311	valloc(sema, struct semid_ds, seminfo.semmni);
312	valloc(sem, struct sem, seminfo.semmns);
313	/* This is pretty disgusting! */
314	valloc(semu, int, (seminfo.semmnu * seminfo.semusz) / sizeof(int));
315#endif
316#ifdef SYSVMSG
317	valloc(msgpool, char, msginfo.msgmax);
318	valloc(msgmaps, struct msgmap, msginfo.msgseg);
319	valloc(msghdrs, struct msg, msginfo.msgtql);
320	valloc(msqids, struct msqid_ds, msginfo.msgmni);
321#endif
322
323	if (nbuf == 0) {
324		nbuf = 30;
325		if( physmem > 1024)
326			nbuf += min((physmem - 1024) / 12, 1024);
327	}
328	nswbuf = min(nbuf, 128);
329
330	valloc(swbuf, struct buf, nswbuf);
331	valloc(buf, struct buf, nbuf);
332
333#ifdef BOUNCE_BUFFERS
334	/*
335	 * If there is more than 16MB of memory, allocate some bounce buffers
336	 */
337	if (Maxmem > 4096) {
338		if (bouncepages == 0) {
339			bouncepages = 64;
340			bouncepages += ((Maxmem - 4096) / 2048) * 32;
341		}
342		v = (caddr_t)((vm_offset_t)((vm_offset_t)v + PAGE_SIZE - 1) & ~(PAGE_SIZE - 1));
343		valloc(bouncememory, char, bouncepages * PAGE_SIZE);
344	}
345#endif
346
347	/*
348	 * End of first pass, size has been calculated so allocate memory
349	 */
350	if (firstaddr == 0) {
351		size = (vm_size_t)(v - firstaddr);
352		firstaddr = (int)kmem_alloc(kernel_map, round_page(size));
353		if (firstaddr == 0)
354			panic("startup: no room for tables");
355		goto again;
356	}
357
358	/*
359	 * End of second pass, addresses have been assigned
360	 */
361	if ((vm_size_t)(v - firstaddr) != size)
362		panic("startup: table size inconsistency");
363
364#ifdef BOUNCE_BUFFERS
365	clean_map = kmem_suballoc(kernel_map, &clean_sva, &clean_eva,
366			(nbuf*MAXBSIZE) + (nswbuf*MAXPHYS) +
367				maxbkva + pager_map_size, TRUE);
368	io_map = kmem_suballoc(clean_map, &minaddr, &maxaddr, maxbkva, FALSE);
369#else
370	clean_map = kmem_suballoc(kernel_map, &clean_sva, &clean_eva,
371			(nbuf*MAXBSIZE) + (nswbuf*MAXPHYS) + pager_map_size, TRUE);
372#endif
373	buffer_map = kmem_suballoc(clean_map, &buffer_sva, &buffer_eva,
374				(nbuf*MAXBSIZE), TRUE);
375	pager_map = kmem_suballoc(clean_map, &pager_sva, &pager_eva,
376				(nswbuf*MAXPHYS) + pager_map_size, TRUE);
377	exec_map = kmem_suballoc(kernel_map, &minaddr, &maxaddr,
378				(16*ARG_MAX), TRUE);
379	u_map = kmem_suballoc(kernel_map, &minaddr, &maxaddr,
380				(maxproc*UPAGES*PAGE_SIZE), FALSE);
381
382	/*
383	 * Finally, allocate mbuf pool.  Since mclrefcnt is an off-size
384	 * we use the more space efficient malloc in place of kmem_alloc.
385	 */
386	mclrefcnt = (char *)malloc(nmbclusters+CLBYTES/MCLBYTES,
387				   M_MBUF, M_NOWAIT);
388	bzero(mclrefcnt, nmbclusters+CLBYTES/MCLBYTES);
389	mb_map = kmem_suballoc(kmem_map, (vm_offset_t *)&mbutl, &maxaddr,
390			       nmbclusters * MCLBYTES, FALSE);
391	/*
392	 * Initialize callouts
393	 */
394	callfree = callout;
395	for (i = 1; i < ncallout; i++)
396		callout[i-1].c_next = &callout[i];
397
398        if (boothowto & RB_CONFIG) {
399		userconfig();
400		cninit();	/* the preferred console may have changed */
401	}
402
403#ifdef BOUNCE_BUFFERS
404	/*
405	 * init bounce buffers
406	 */
407	vm_bounce_init();
408#endif
409
410	printf("avail memory = %d (%dK bytes)\n", ptoa(cnt.v_free_count),
411	    ptoa(cnt.v_free_count) / 1024);
412
413	/*
414	 * Set up buffers, so they can be used to read disk labels.
415	 */
416	bufinit();
417	vm_pager_bufferinit();
418
419	/*
420	 * In verbose mode, print out the BIOS's idea of the disk geometries.
421	 */
422	if (bootverbose) {
423		printf("BIOS Geometries:\n");
424		for (i = 0; i < N_BIOS_GEOM; i++) {
425			unsigned long bios_geom;
426			int max_cylinder, max_head, max_sector;
427
428			bios_geom = bootinfo.bi_bios_geom[i];
429
430			/*
431			 * XXX the bootstrap punts a 1200K floppy geometry
432			 * when the get-disk-geometry interrupt fails.  Skip
433			 * drives that have this geometry.
434			 */
435			if (bios_geom == 0x4f010f)
436				continue;
437
438			printf(" %x:%08lx ", i, bios_geom);
439			max_cylinder = bios_geom >> 16;
440			max_head = (bios_geom >> 8) & 0xff;
441			max_sector = bios_geom & 0xff;
442			printf(
443		"0..%d=%d cylinders, 0..%d=%d heads, 1..%d=%d sectors\n",
444			       max_cylinder, max_cylinder + 1,
445			       max_head, max_head + 1,
446			       max_sector, max_sector);
447		}
448		printf(" %d accounted for\n", bootinfo.bi_n_bios_used);
449	}
450}
451
452int
453register_netisr(num, handler)
454	int num;
455	netisr_t *handler;
456{
457
458	if (num < 0 || num >= (sizeof(netisrs)/sizeof(*netisrs)) ) {
459		printf("register_netisr: bad isr number: %d\n", num);
460		return (EINVAL);
461	}
462	netisrs[num] = handler;
463	return (0);
464}
465
466static void
467setup_netisrs(ls)
468	struct linker_set *ls;
469{
470	int i;
471	const struct netisrtab *nit;
472
473	for(i = 0; ls->ls_items[i]; i++) {
474		nit = (const struct netisrtab *)ls->ls_items[i];
475		register_netisr(nit->nit_num, nit->nit_isr);
476	}
477}
478
479static struct cpu_nameclass i386_cpus[] = {
480	{ "Intel 80286",	CPUCLASS_286 },		/* CPU_286   */
481	{ "i386SX",		CPUCLASS_386 },		/* CPU_386SX */
482	{ "i386DX",		CPUCLASS_386 },		/* CPU_386   */
483	{ "i486SX",		CPUCLASS_486 },		/* CPU_486SX */
484	{ "i486DX",		CPUCLASS_486 },		/* CPU_486   */
485	{ "Pentium",		CPUCLASS_586 },		/* CPU_586   */
486	{ "Cy486DLC",		CPUCLASS_486 },		/* CPU_486DLC */
487	{ "Pentium Pro",	CPUCLASS_686 },		/* CPU_686 */
488};
489
490static void
491identifycpu()
492{
493	printf("CPU: ");
494	if (cpu >= 0
495	    && cpu < (sizeof i386_cpus/sizeof(struct cpu_nameclass))) {
496		cpu_class = i386_cpus[cpu].cpu_class;
497		strncpy(cpu_model, i386_cpus[cpu].cpu_name, sizeof cpu_model);
498	} else {
499		printf("unknown cpu type %d\n", cpu);
500		panic("startup: bad cpu id");
501	}
502
503#if defined(I586_CPU) || defined(I686_CPU)
504	if (cpu_class == CPUCLASS_586 || cpu_class == CPUCLASS_686) {
505		calibrate_cyclecounter();
506	}
507#endif
508#if defined(I486_CPU) || defined(I586_CPU) || defined(I686_CPU)
509	if (!strcmp(cpu_vendor,"GenuineIntel")) {
510		if ((cpu_id & 0xf00) > 3) {
511			cpu_model[0] = '\0';
512
513			switch (cpu_id & 0x3000) {
514			case 0x1000:
515				strcpy(cpu_model, "Overdrive ");
516				break;
517			case 0x2000:
518				strcpy(cpu_model, "Dual ");
519				break;
520			}
521
522			switch (cpu_id & 0xf00) {
523			case 0x400:
524				strcat(cpu_model, "i486 ");
525				break;
526			case 0x500:
527				strcat(cpu_model, "Pentium"); /* nb no space */
528				break;
529			case 0x600:
530				strcat(cpu_model, "Pentium Pro");
531				break;
532			default:
533				strcat(cpu_model, "unknown");
534				break;
535			}
536
537			switch (cpu_id & 0xff0) {
538			case 0x400:
539				strcat(cpu_model, "DX"); break;
540			case 0x410:
541				strcat(cpu_model, "DX"); break;
542			case 0x420:
543				strcat(cpu_model, "SX"); break;
544			case 0x430:
545				strcat(cpu_model, "DX2"); break;
546			case 0x440:
547				strcat(cpu_model, "SL"); break;
548			case 0x450:
549				strcat(cpu_model, "SX2"); break;
550			case 0x470:
551				strcat(cpu_model, "DX2 Write-Back Enhanced");
552				break;
553			case 0x480:
554				strcat(cpu_model, "DX4"); break;
555				break;
556			}
557		}
558	}
559#endif
560	printf("%s (", cpu_model);
561	switch(cpu_class) {
562	case CPUCLASS_286:
563		printf("286");
564		break;
565#if defined(I386_CPU)
566	case CPUCLASS_386:
567		printf("386");
568		break;
569#endif
570#if defined(I486_CPU)
571	case CPUCLASS_486:
572		printf("486");
573		bzero = i486_bzero;
574		break;
575#endif
576#if defined(I586_CPU)
577	case CPUCLASS_586:
578		printf("%d.%02d-MHz ",
579		       ((100 * i586_ctr_rate) >> I586_CTR_RATE_SHIFT) / 100,
580		       ((100 * i586_ctr_rate) >> I586_CTR_RATE_SHIFT) % 100);
581		printf("586");
582		break;
583#endif
584#if defined(I686_CPU)
585	case CPUCLASS_686:
586		printf("%d.%02d-MHz ",
587		       ((100 * i586_ctr_rate) >> I586_CTR_RATE_SHIFT) / 100,
588		       ((100 * i586_ctr_rate) >> I586_CTR_RATE_SHIFT) % 100);
589		printf("686");
590		break;
591#endif
592	default:
593		printf("unknown");	/* will panic below... */
594	}
595	printf("-class CPU)\n");
596#if defined(I486_CPU) || defined(I586_CPU) || defined(I686_CPU)
597	if(*cpu_vendor)
598		printf("  Origin = \"%s\"",cpu_vendor);
599	if(cpu_id)
600		printf("  Id = 0x%lx",cpu_id);
601
602	if (!strcmp(cpu_vendor, "GenuineIntel")) {
603		printf("  Stepping=%ld", cpu_id & 0xf);
604		if (cpu_high > 0) {
605			printf("\n  Features=0x%b", cpu_feature,
606			"\020"
607			"\001FPU"
608			"\002VME"
609			"\003DE"
610			"\004PSE"
611			"\005TSC"
612			"\006MSR"
613			"\007PAE"
614			"\010MCE"
615			"\011CX8"
616			"\012APIC"
617			"\013<b10>"
618			"\014<b11>"
619			"\015MTRR"
620			"\016PGE"
621			"\017MCA"
622			"\020CMOV"
623			);
624		}
625	}
626	/* Avoid ugly blank lines: only print newline when we have to. */
627	if (*cpu_vendor || cpu_id)
628		printf("\n");
629#endif
630	/*
631	 * Now that we have told the user what they have,
632	 * let them know if that machine type isn't configured.
633	 */
634	switch (cpu_class) {
635	case CPUCLASS_286:	/* a 286 should not make it this far, anyway */
636#if !defined(I386_CPU) && !defined(I486_CPU) && !defined(I586_CPU) && !defined(I686_CPU)
637#error This kernel is not configured for one of the supported CPUs
638#endif
639#if !defined(I386_CPU)
640	case CPUCLASS_386:
641#endif
642#if !defined(I486_CPU)
643	case CPUCLASS_486:
644#endif
645#if !defined(I586_CPU)
646	case CPUCLASS_586:
647#endif
648#if !defined(I686_CPU)
649	case CPUCLASS_686:
650#endif
651		panic("CPU class not configured");
652	default:
653		break;
654	}
655#ifdef PERFMON
656	perfmon_init();
657#endif
658	dev_attach(&kdc_cpu0);
659}
660
661/*
662 * Send an interrupt to process.
663 *
664 * Stack is set up to allow sigcode stored
665 * at top to call routine, followed by kcall
666 * to sigreturn routine below.  After sigreturn
667 * resets the signal mask, the stack, and the
668 * frame pointer, it returns to the user
669 * specified pc, psl.
670 */
671void
672sendsig(catcher, sig, mask, code)
673	sig_t catcher;
674	int sig, mask;
675	u_long code;
676{
677	register struct proc *p = curproc;
678	register int *regs;
679	register struct sigframe *fp;
680	struct sigframe sf;
681	struct sigacts *psp = p->p_sigacts;
682	int oonstack;
683
684	regs = p->p_md.md_regs;
685        oonstack = psp->ps_sigstk.ss_flags & SS_ONSTACK;
686	/*
687	 * Allocate and validate space for the signal handler context.
688	 */
689        if ((psp->ps_flags & SAS_ALTSTACK) && !oonstack &&
690	    (psp->ps_sigonstack & sigmask(sig))) {
691		fp = (struct sigframe *)(psp->ps_sigstk.ss_sp +
692		    psp->ps_sigstk.ss_size - sizeof(struct sigframe));
693		psp->ps_sigstk.ss_flags |= SS_ONSTACK;
694	} else {
695		fp = (struct sigframe *)regs[tESP] - 1;
696	}
697
698	/*
699	 * grow() will return FALSE if the fp will not fit inside the stack
700	 *	and the stack can not be grown. useracc will return FALSE
701	 *	if access is denied.
702	 */
703	if ((grow(p, (int)fp) == FALSE) ||
704	    (useracc((caddr_t)fp, sizeof (struct sigframe), B_WRITE) == FALSE)) {
705		/*
706		 * Process has trashed its stack; give it an illegal
707		 * instruction to halt it in its tracks.
708		 */
709		SIGACTION(p, SIGILL) = SIG_DFL;
710		sig = sigmask(SIGILL);
711		p->p_sigignore &= ~sig;
712		p->p_sigcatch &= ~sig;
713		p->p_sigmask &= ~sig;
714		psignal(p, SIGILL);
715		return;
716	}
717
718	/*
719	 * Build the argument list for the signal handler.
720	 */
721	if (p->p_sysent->sv_sigtbl) {
722		if (sig < p->p_sysent->sv_sigsize)
723			sig = p->p_sysent->sv_sigtbl[sig];
724		else
725			sig = p->p_sysent->sv_sigsize + 1;
726	}
727	sf.sf_signum = sig;
728	sf.sf_code = code;
729	sf.sf_scp = &fp->sf_sc;
730	sf.sf_addr = (char *) regs[tERR];
731	sf.sf_handler = catcher;
732
733	/* save scratch registers */
734	sf.sf_sc.sc_eax = regs[tEAX];
735	sf.sf_sc.sc_ebx = regs[tEBX];
736	sf.sf_sc.sc_ecx = regs[tECX];
737	sf.sf_sc.sc_edx = regs[tEDX];
738	sf.sf_sc.sc_esi = regs[tESI];
739	sf.sf_sc.sc_edi = regs[tEDI];
740	sf.sf_sc.sc_cs = regs[tCS];
741	sf.sf_sc.sc_ds = regs[tDS];
742	sf.sf_sc.sc_ss = regs[tSS];
743	sf.sf_sc.sc_es = regs[tES];
744	sf.sf_sc.sc_isp = regs[tISP];
745
746	/*
747	 * Build the signal context to be used by sigreturn.
748	 */
749	sf.sf_sc.sc_onstack = oonstack;
750	sf.sf_sc.sc_mask = mask;
751	sf.sf_sc.sc_sp = regs[tESP];
752	sf.sf_sc.sc_fp = regs[tEBP];
753	sf.sf_sc.sc_pc = regs[tEIP];
754	sf.sf_sc.sc_ps = regs[tEFLAGS];
755
756	/*
757	 * Copy the sigframe out to the user's stack.
758	 */
759	if (copyout(&sf, fp, sizeof(struct sigframe)) != 0) {
760		/*
761		 * Something is wrong with the stack pointer.
762		 * ...Kill the process.
763		 */
764		sigexit(p, SIGILL);
765	};
766
767	regs[tESP] = (int)fp;
768	regs[tEIP] = (int)(((char *)PS_STRINGS) - *(p->p_sysent->sv_szsigcode));
769	regs[tEFLAGS] &= ~PSL_VM;
770	regs[tCS] = _ucodesel;
771	regs[tDS] = _udatasel;
772	regs[tES] = _udatasel;
773	regs[tSS] = _udatasel;
774}
775
776/*
777 * System call to cleanup state after a signal
778 * has been taken.  Reset signal mask and
779 * stack state from context left by sendsig (above).
780 * Return to previous pc and psl as specified by
781 * context left by sendsig. Check carefully to
782 * make sure that the user has not modified the
783 * state to gain improper privileges.
784 */
785int
786sigreturn(p, uap, retval)
787	struct proc *p;
788	struct sigreturn_args /* {
789		struct sigcontext *sigcntxp;
790	} */ *uap;
791	int *retval;
792{
793	register struct sigcontext *scp;
794	register struct sigframe *fp;
795	register int *regs = p->p_md.md_regs;
796	int eflags;
797
798	/*
799	 * (XXX old comment) regs[tESP] points to the return address.
800	 * The user scp pointer is above that.
801	 * The return address is faked in the signal trampoline code
802	 * for consistency.
803	 */
804	scp = uap->sigcntxp;
805	fp = (struct sigframe *)
806	     ((caddr_t)scp - offsetof(struct sigframe, sf_sc));
807
808	if (useracc((caddr_t)fp, sizeof (*fp), 0) == 0)
809		return(EINVAL);
810
811	/*
812	 * Don't allow users to change privileged or reserved flags.
813	 */
814#define	EFLAGS_SECURE(ef, oef)	((((ef) ^ (oef)) & ~PSL_USERCHANGE) == 0)
815	eflags = scp->sc_ps;
816	/*
817	 * XXX do allow users to change the privileged flag PSL_RF.  The
818	 * cpu sets PSL_RF in tf_eflags for faults.  Debuggers should
819	 * sometimes set it there too.  tf_eflags is kept in the signal
820	 * context during signal handling and there is no other place
821	 * to remember it, so the PSL_RF bit may be corrupted by the
822	 * signal handler without us knowing.  Corruption of the PSL_RF
823	 * bit at worst causes one more or one less debugger trap, so
824	 * allowing it is fairly harmless.
825	 */
826	if (!EFLAGS_SECURE(eflags & ~PSL_RF, regs[tEFLAGS] & ~PSL_RF)) {
827#ifdef DEBUG
828    		printf("sigreturn: eflags = 0x%x\n", eflags);
829#endif
830    		return(EINVAL);
831	}
832
833	/*
834	 * Don't allow users to load a valid privileged %cs.  Let the
835	 * hardware check for invalid selectors, excess privilege in
836	 * other selectors, invalid %eip's and invalid %esp's.
837	 */
838#define	CS_SECURE(cs)	(ISPL(cs) == SEL_UPL)
839	if (!CS_SECURE(scp->sc_cs)) {
840#ifdef DEBUG
841    		printf("sigreturn: cs = 0x%x\n", scp->sc_cs);
842#endif
843		trapsignal(p, SIGBUS, T_PROTFLT);
844		return(EINVAL);
845	}
846
847	/* restore scratch registers */
848	regs[tEAX] = scp->sc_eax;
849	regs[tEBX] = scp->sc_ebx;
850	regs[tECX] = scp->sc_ecx;
851	regs[tEDX] = scp->sc_edx;
852	regs[tESI] = scp->sc_esi;
853	regs[tEDI] = scp->sc_edi;
854	regs[tCS] = scp->sc_cs;
855	regs[tDS] = scp->sc_ds;
856	regs[tES] = scp->sc_es;
857	regs[tSS] = scp->sc_ss;
858	regs[tISP] = scp->sc_isp;
859
860	if (useracc((caddr_t)scp, sizeof (*scp), 0) == 0)
861		return(EINVAL);
862
863	if (scp->sc_onstack & 01)
864		p->p_sigacts->ps_sigstk.ss_flags |= SS_ONSTACK;
865	else
866		p->p_sigacts->ps_sigstk.ss_flags &= ~SS_ONSTACK;
867	p->p_sigmask = scp->sc_mask &~
868	    (sigmask(SIGKILL)|sigmask(SIGCONT)|sigmask(SIGSTOP));
869	regs[tEBP] = scp->sc_fp;
870	regs[tESP] = scp->sc_sp;
871	regs[tEIP] = scp->sc_pc;
872	regs[tEFLAGS] = eflags;
873	return(EJUSTRETURN);
874}
875
876static int	waittime = -1;
877static struct pcb dumppcb;
878
879__dead void
880boot(howto)
881	int howto;
882{
883	if (!cold && (howto & RB_NOSYNC) == 0 && waittime < 0) {
884		register struct buf *bp;
885		int iter, nbusy;
886
887		waittime = 0;
888		printf("\nsyncing disks... ");
889
890		sync(&proc0, NULL, NULL);
891
892		for (iter = 0; iter < 20; iter++) {
893			nbusy = 0;
894			for (bp = &buf[nbuf]; --bp >= buf; ) {
895				if ((bp->b_flags & (B_BUSY | B_INVAL)) == B_BUSY) {
896					nbusy++;
897				}
898			}
899			if (nbusy == 0)
900				break;
901			printf("%d ", nbusy);
902			DELAY(40000 * iter);
903		}
904		if (nbusy) {
905			/*
906			 * Failed to sync all blocks. Indicate this and don't
907			 * unmount filesystems (thus forcing an fsck on reboot).
908			 */
909			printf("giving up\n");
910#ifdef SHOW_BUSYBUFS
911			nbusy = 0;
912			for (bp = &buf[nbuf]; --bp >= buf; ) {
913				if ((bp->b_flags & (B_BUSY | B_INVAL)) == B_BUSY) {
914					nbusy++;
915					printf("%d: dev:%08x, flags:%08x, blkno:%d, lblkno:%d\n", nbusy, bp->b_dev, bp->b_flags, bp->b_blkno, bp->b_lblkno);
916				}
917			}
918			DELAY(5000000);	/* 5 seconds */
919#endif
920		} else {
921			printf("done\n");
922			/*
923			 * Unmount filesystems
924			 */
925			if (panicstr == 0)
926				vfs_unmountall();
927		}
928		DELAY(100000);			/* wait for console output to finish */
929		dev_shutdownall(FALSE);
930	}
931	splhigh();
932	if (howto & RB_HALT) {
933		printf("\n");
934		printf("The operating system has halted.\n");
935		printf("Please press any key to reboot.\n\n");
936		cngetc();
937	} else {
938		if (howto & RB_DUMP) {
939			if (!cold) {
940				savectx(&dumppcb);
941				dumppcb.pcb_ptd = rcr3();
942				dumpsys();
943			}
944
945			if (PANIC_REBOOT_WAIT_TIME != 0) {
946				if (PANIC_REBOOT_WAIT_TIME != -1) {
947					int loop;
948					printf("Automatic reboot in %d seconds - press a key on the console to abort\n",
949						PANIC_REBOOT_WAIT_TIME);
950					for (loop = PANIC_REBOOT_WAIT_TIME * 10; loop > 0; --loop) {
951						DELAY(1000 * 100); /* 1/10th second */
952						if (cncheckc()) /* Did user type a key? */
953							break;
954					}
955					if (!loop)
956						goto die;
957				}
958			} else { /* zero time specified - reboot NOW */
959				goto die;
960			}
961			printf("--> Press a key on the console to reboot <--\n");
962			cngetc();
963		}
964	}
965die:
966	printf("Rebooting...\n");
967	DELAY(1000000);	/* wait 1 sec for printf's to complete and be read */
968	cpu_reset();
969	for(;;) ;
970	/* NOTREACHED */
971}
972
973/*
974 * Magic number for savecore
975 *
976 * exported (symorder) and used at least by savecore(8)
977 *
978 */
979u_long		dumpmag = 0x8fca0101UL;
980
981static int	dumpsize = 0;		/* also for savecore */
982
983static int	dodump = 1;
984SYSCTL_INT(_machdep, OID_AUTO, do_dump, CTLFLAG_RW, &dodump, 0, "");
985
986/*
987 * Doadump comes here after turning off memory management and
988 * getting on the dump stack, either when called above, or by
989 * the auto-restart code.
990 */
991static void
992dumpsys()
993{
994
995	if (!dodump)
996		return;
997	if (dumpdev == NODEV)
998		return;
999	if ((minor(dumpdev)&07) != 1)
1000		return;
1001	if (!(bdevsw[major(dumpdev)]))
1002		return;
1003	if (!(bdevsw[major(dumpdev)]->d_dump))
1004		return;
1005	dumpsize = Maxmem;
1006	printf("\ndumping to dev %lx, offset %ld\n", dumpdev, dumplo);
1007	printf("dump ");
1008	switch ((*bdevsw[major(dumpdev)]->d_dump)(dumpdev)) {
1009
1010	case ENXIO:
1011		printf("device bad\n");
1012		break;
1013
1014	case EFAULT:
1015		printf("device not ready\n");
1016		break;
1017
1018	case EINVAL:
1019		printf("area improper\n");
1020		break;
1021
1022	case EIO:
1023		printf("i/o error\n");
1024		break;
1025
1026	case EINTR:
1027		printf("aborted from console\n");
1028		break;
1029
1030	default:
1031		printf("succeeded\n");
1032		break;
1033	}
1034}
1035
1036/*
1037 * Clear registers on exec
1038 */
1039void
1040setregs(p, entry, stack)
1041	struct proc *p;
1042	u_long entry;
1043	u_long stack;
1044{
1045	int *regs = p->p_md.md_regs;
1046
1047#ifdef USER_LDT
1048	struct pcb *pcb = &p->p_addr->u_pcb;
1049
1050	/* was i386_user_cleanup() in NetBSD */
1051	if (pcb->pcb_ldt) {
1052		if (pcb == curpcb)
1053			lldt(GSEL(GUSERLDT_SEL, SEL_KPL));
1054		kmem_free(kernel_map, (vm_offset_t)pcb->pcb_ldt,
1055			pcb->pcb_ldt_len * sizeof(union descriptor));
1056		pcb->pcb_ldt_len = (int)pcb->pcb_ldt = 0;
1057 	}
1058#endif
1059
1060	bzero(regs, sizeof(struct trapframe));
1061	regs[tEIP] = entry;
1062	regs[tESP] = stack;
1063	regs[tEFLAGS] = PSL_USER | (regs[tEFLAGS] & PSL_T);
1064	regs[tSS] = _udatasel;
1065	regs[tDS] = _udatasel;
1066	regs[tES] = _udatasel;
1067	regs[tCS] = _ucodesel;
1068
1069	p->p_addr->u_pcb.pcb_flags = 0;	/* no fp at all */
1070	load_cr0(rcr0() | CR0_TS);	/* start emulating */
1071#if	NNPX > 0
1072	npxinit(__INITIAL_NPXCW__);
1073#endif	/* NNPX > 0 */
1074}
1075
1076static int
1077sysctl_machdep_adjkerntz SYSCTL_HANDLER_ARGS
1078{
1079	int error;
1080	error = sysctl_handle_int(oidp, oidp->oid_arg1, oidp->oid_arg2,
1081		req);
1082	if (!error && req->newptr)
1083		resettodr();
1084	return (error);
1085}
1086
1087SYSCTL_PROC(_machdep, CPU_ADJKERNTZ, adjkerntz, CTLTYPE_INT|CTLFLAG_RW,
1088	&adjkerntz, 0, sysctl_machdep_adjkerntz, "I", "");
1089
1090SYSCTL_INT(_machdep, CPU_DISRTCSET, disable_rtc_set,
1091	CTLFLAG_RW, &disable_rtc_set, 0, "");
1092
1093SYSCTL_STRUCT(_machdep, CPU_BOOTINFO, bootinfo,
1094	CTLFLAG_RD, &bootinfo, bootinfo, "");
1095
1096SYSCTL_INT(_machdep, CPU_WALLCLOCK, wall_cmos_clock,
1097	CTLFLAG_RW, &wall_cmos_clock, 0, "");
1098
1099/*
1100 * Initialize 386 and configure to run kernel
1101 */
1102
1103/*
1104 * Initialize segments & interrupt table
1105 */
1106
1107int currentldt;
1108int _default_ldt;
1109union descriptor gdt[NGDT];		/* global descriptor table */
1110struct gate_descriptor idt[NIDT];	/* interrupt descriptor table */
1111union descriptor ldt[NLDT];		/* local descriptor table */
1112
1113static struct i386tss dblfault_tss;
1114static char dblfault_stack[PAGE_SIZE];
1115
1116extern  struct user *proc0paddr;
1117
1118/* software prototypes -- in more palatable form */
1119struct soft_segment_descriptor gdt_segs[] = {
1120/* GNULL_SEL	0 Null Descriptor */
1121{	0x0,			/* segment base address  */
1122	0x0,			/* length */
1123	0,			/* segment type */
1124	0,			/* segment descriptor priority level */
1125	0,			/* segment descriptor present */
1126	0, 0,
1127	0,			/* default 32 vs 16 bit size */
1128	0  			/* limit granularity (byte/page units)*/ },
1129/* GCODE_SEL	1 Code Descriptor for kernel */
1130{	0x0,			/* segment base address  */
1131	0xfffff,		/* length - all address space */
1132	SDT_MEMERA,		/* segment type */
1133	0,			/* segment descriptor priority level */
1134	1,			/* segment descriptor present */
1135	0, 0,
1136	1,			/* default 32 vs 16 bit size */
1137	1  			/* limit granularity (byte/page units)*/ },
1138/* GDATA_SEL	2 Data Descriptor for kernel */
1139{	0x0,			/* segment base address  */
1140	0xfffff,		/* length - all address space */
1141	SDT_MEMRWA,		/* segment type */
1142	0,			/* segment descriptor priority level */
1143	1,			/* segment descriptor present */
1144	0, 0,
1145	1,			/* default 32 vs 16 bit size */
1146	1  			/* limit granularity (byte/page units)*/ },
1147/* GLDT_SEL	3 LDT Descriptor */
1148{	(int) ldt,		/* segment base address  */
1149	sizeof(ldt)-1,		/* length - all address space */
1150	SDT_SYSLDT,		/* segment type */
1151	0,			/* segment descriptor priority level */
1152	1,			/* segment descriptor present */
1153	0, 0,
1154	0,			/* unused - default 32 vs 16 bit size */
1155	0  			/* limit granularity (byte/page units)*/ },
1156/* GTGATE_SEL	4 Null Descriptor - Placeholder */
1157{	0x0,			/* segment base address  */
1158	0x0,			/* length - all address space */
1159	0,			/* segment type */
1160	0,			/* segment descriptor priority level */
1161	0,			/* segment descriptor present */
1162	0, 0,
1163	0,			/* default 32 vs 16 bit size */
1164	0  			/* limit granularity (byte/page units)*/ },
1165/* GPANIC_SEL	5 Panic Tss Descriptor */
1166{	(int) &dblfault_tss,	/* segment base address  */
1167	sizeof(struct i386tss)-1,/* length - all address space */
1168	SDT_SYS386TSS,		/* segment type */
1169	0,			/* segment descriptor priority level */
1170	1,			/* segment descriptor present */
1171	0, 0,
1172	0,			/* unused - default 32 vs 16 bit size */
1173	0  			/* limit granularity (byte/page units)*/ },
1174/* GPROC0_SEL	6 Proc 0 Tss Descriptor */
1175{	(int) kstack,		/* segment base address  */
1176	sizeof(struct i386tss)-1,/* length - all address space */
1177	SDT_SYS386TSS,		/* segment type */
1178	0,			/* segment descriptor priority level */
1179	1,			/* segment descriptor present */
1180	0, 0,
1181	0,			/* unused - default 32 vs 16 bit size */
1182	0  			/* limit granularity (byte/page units)*/ },
1183/* GUSERLDT_SEL	7 User LDT Descriptor per process */
1184{	(int) ldt,		/* segment base address  */
1185	(512 * sizeof(union descriptor)-1),		/* length */
1186	SDT_SYSLDT,		/* segment type */
1187	0,			/* segment descriptor priority level */
1188	1,			/* segment descriptor present */
1189	0, 0,
1190	0,			/* unused - default 32 vs 16 bit size */
1191	0  			/* limit granularity (byte/page units)*/ },
1192/* GAPMCODE32_SEL 8 APM BIOS 32-bit interface (32bit Code) */
1193{	0,			/* segment base address (overwritten by APM)  */
1194	0xfffff,		/* length */
1195	SDT_MEMERA,		/* segment type */
1196	0,			/* segment descriptor priority level */
1197	1,			/* segment descriptor present */
1198	0, 0,
1199	1,			/* default 32 vs 16 bit size */
1200	1  			/* limit granularity (byte/page units)*/ },
1201/* GAPMCODE16_SEL 9 APM BIOS 32-bit interface (16bit Code) */
1202{	0,			/* segment base address (overwritten by APM)  */
1203	0xfffff,		/* length */
1204	SDT_MEMERA,		/* segment type */
1205	0,			/* segment descriptor priority level */
1206	1,			/* segment descriptor present */
1207	0, 0,
1208	0,			/* default 32 vs 16 bit size */
1209	1  			/* limit granularity (byte/page units)*/ },
1210/* GAPMDATA_SEL	10 APM BIOS 32-bit interface (Data) */
1211{	0,			/* segment base address (overwritten by APM) */
1212	0xfffff,		/* length */
1213	SDT_MEMRWA,		/* segment type */
1214	0,			/* segment descriptor priority level */
1215	1,			/* segment descriptor present */
1216	0, 0,
1217	1,			/* default 32 vs 16 bit size */
1218	1  			/* limit granularity (byte/page units)*/ },
1219};
1220
1221static struct soft_segment_descriptor ldt_segs[] = {
1222	/* Null Descriptor - overwritten by call gate */
1223{	0x0,			/* segment base address  */
1224	0x0,			/* length - all address space */
1225	0,			/* segment type */
1226	0,			/* segment descriptor priority level */
1227	0,			/* segment descriptor present */
1228	0, 0,
1229	0,			/* default 32 vs 16 bit size */
1230	0  			/* limit granularity (byte/page units)*/ },
1231	/* Null Descriptor - overwritten by call gate */
1232{	0x0,			/* segment base address  */
1233	0x0,			/* length - all address space */
1234	0,			/* segment type */
1235	0,			/* segment descriptor priority level */
1236	0,			/* segment descriptor present */
1237	0, 0,
1238	0,			/* default 32 vs 16 bit size */
1239	0  			/* limit granularity (byte/page units)*/ },
1240	/* Null Descriptor - overwritten by call gate */
1241{	0x0,			/* segment base address  */
1242	0x0,			/* length - all address space */
1243	0,			/* segment type */
1244	0,			/* segment descriptor priority level */
1245	0,			/* segment descriptor present */
1246	0, 0,
1247	0,			/* default 32 vs 16 bit size */
1248	0  			/* limit granularity (byte/page units)*/ },
1249	/* Code Descriptor for user */
1250{	0x0,			/* segment base address  */
1251	0xfffff,		/* length - all address space */
1252	SDT_MEMERA,		/* segment type */
1253	SEL_UPL,		/* segment descriptor priority level */
1254	1,			/* segment descriptor present */
1255	0, 0,
1256	1,			/* default 32 vs 16 bit size */
1257	1  			/* limit granularity (byte/page units)*/ },
1258	/* Data Descriptor for user */
1259{	0x0,			/* segment base address  */
1260	0xfffff,		/* length - all address space */
1261	SDT_MEMRWA,		/* segment type */
1262	SEL_UPL,		/* segment descriptor priority level */
1263	1,			/* segment descriptor present */
1264	0, 0,
1265	1,			/* default 32 vs 16 bit size */
1266	1  			/* limit granularity (byte/page units)*/ },
1267};
1268
1269void
1270setidt(idx, func, typ, dpl, selec)
1271	int idx;
1272	inthand_t *func;
1273	int typ;
1274	int dpl;
1275	int selec;
1276{
1277	struct gate_descriptor *ip = idt + idx;
1278
1279	ip->gd_looffset = (int)func;
1280	ip->gd_selector = selec;
1281	ip->gd_stkcpy = 0;
1282	ip->gd_xx = 0;
1283	ip->gd_type = typ;
1284	ip->gd_dpl = dpl;
1285	ip->gd_p = 1;
1286	ip->gd_hioffset = ((int)func)>>16 ;
1287}
1288
1289#define	IDTVEC(name)	__CONCAT(X,name)
1290
1291extern inthand_t
1292	IDTVEC(div), IDTVEC(dbg), IDTVEC(nmi), IDTVEC(bpt), IDTVEC(ofl),
1293	IDTVEC(bnd), IDTVEC(ill), IDTVEC(dna), IDTVEC(fpusegm),
1294	IDTVEC(tss), IDTVEC(missing), IDTVEC(stk), IDTVEC(prot),
1295	IDTVEC(page), IDTVEC(rsvd), IDTVEC(fpu), IDTVEC(align),
1296	IDTVEC(syscall), IDTVEC(int0x80_syscall);
1297
1298void
1299sdtossd(sd, ssd)
1300	struct segment_descriptor *sd;
1301	struct soft_segment_descriptor *ssd;
1302{
1303	ssd->ssd_base  = (sd->sd_hibase << 24) | sd->sd_lobase;
1304	ssd->ssd_limit = (sd->sd_hilimit << 16) | sd->sd_lolimit;
1305	ssd->ssd_type  = sd->sd_type;
1306	ssd->ssd_dpl   = sd->sd_dpl;
1307	ssd->ssd_p     = sd->sd_p;
1308	ssd->ssd_def32 = sd->sd_def32;
1309	ssd->ssd_gran  = sd->sd_gran;
1310}
1311
1312void
1313init386(first)
1314	int first;
1315{
1316	int x;
1317	unsigned biosbasemem, biosextmem;
1318	struct gate_descriptor *gdp;
1319	int gsel_tss;
1320	/* table descriptors - used to load tables by microp */
1321	struct region_descriptor r_gdt, r_idt;
1322	int	pagesinbase, pagesinext;
1323	int	target_page, pa_indx;
1324
1325	proc0.p_addr = proc0paddr;
1326
1327	/*
1328	 * Initialize the console before we print anything out.
1329	 */
1330	cninit();
1331
1332	/*
1333	 * make gdt memory segments, the code segment goes up to end of the
1334	 * page with etext in it, the data segment goes to the end of
1335	 * the address space
1336	 */
1337	/*
1338	 * XXX text protection is temporarily (?) disabled.  The limit was
1339	 * i386_btop(round_page(etext)) - 1.
1340	 */
1341	gdt_segs[GCODE_SEL].ssd_limit = i386_btop(0) - 1;
1342	gdt_segs[GDATA_SEL].ssd_limit = i386_btop(0) - 1;
1343	for (x = 0; x < NGDT; x++)
1344		ssdtosd(&gdt_segs[x], &gdt[x].sd);
1345
1346	/* make ldt memory segments */
1347	/*
1348	 * The data segment limit must not cover the user area because we
1349	 * don't want the user area to be writable in copyout() etc. (page
1350	 * level protection is lost in kernel mode on 386's).  Also, we
1351	 * don't want the user area to be writable directly (page level
1352	 * protection of the user area is not available on 486's with
1353	 * CR0_WP set, because there is no user-read/kernel-write mode).
1354	 *
1355	 * XXX - VM_MAXUSER_ADDRESS is an end address, not a max.  And it
1356	 * should be spelled ...MAX_USER...
1357	 */
1358#define VM_END_USER_RW_ADDRESS	VM_MAXUSER_ADDRESS
1359	/*
1360	 * The code segment limit has to cover the user area until we move
1361	 * the signal trampoline out of the user area.  This is safe because
1362	 * the code segment cannot be written to directly.
1363	 */
1364#define VM_END_USER_R_ADDRESS	(VM_END_USER_RW_ADDRESS + UPAGES * NBPG)
1365	ldt_segs[LUCODE_SEL].ssd_limit = i386_btop(VM_END_USER_R_ADDRESS) - 1;
1366	ldt_segs[LUDATA_SEL].ssd_limit = i386_btop(VM_END_USER_RW_ADDRESS) - 1;
1367	/* Note. eventually want private ldts per process */
1368	for (x = 0; x < NLDT; x++)
1369		ssdtosd(&ldt_segs[x], &ldt[x].sd);
1370
1371	/* exceptions */
1372	for (x = 0; x < NIDT; x++)
1373		setidt(x, &IDTVEC(rsvd), SDT_SYS386TGT, SEL_KPL, GSEL(GCODE_SEL, SEL_KPL));
1374	setidt(0, &IDTVEC(div),  SDT_SYS386TGT, SEL_KPL, GSEL(GCODE_SEL, SEL_KPL));
1375	setidt(1, &IDTVEC(dbg),  SDT_SYS386TGT, SEL_KPL, GSEL(GCODE_SEL, SEL_KPL));
1376	setidt(2, &IDTVEC(nmi),  SDT_SYS386TGT, SEL_KPL, GSEL(GCODE_SEL, SEL_KPL));
1377 	setidt(3, &IDTVEC(bpt),  SDT_SYS386TGT, SEL_UPL, GSEL(GCODE_SEL, SEL_KPL));
1378	setidt(4, &IDTVEC(ofl),  SDT_SYS386TGT, SEL_UPL, GSEL(GCODE_SEL, SEL_KPL));
1379	setidt(5, &IDTVEC(bnd),  SDT_SYS386TGT, SEL_KPL, GSEL(GCODE_SEL, SEL_KPL));
1380	setidt(6, &IDTVEC(ill),  SDT_SYS386TGT, SEL_KPL, GSEL(GCODE_SEL, SEL_KPL));
1381	setidt(7, &IDTVEC(dna),  SDT_SYS386TGT, SEL_KPL, GSEL(GCODE_SEL, SEL_KPL));
1382	setidt(8, 0,  SDT_SYSTASKGT, SEL_KPL, GSEL(GPANIC_SEL, SEL_KPL));
1383	setidt(9, &IDTVEC(fpusegm),  SDT_SYS386TGT, SEL_KPL, GSEL(GCODE_SEL, SEL_KPL));
1384	setidt(10, &IDTVEC(tss),  SDT_SYS386TGT, SEL_KPL, GSEL(GCODE_SEL, SEL_KPL));
1385	setidt(11, &IDTVEC(missing),  SDT_SYS386TGT, SEL_KPL, GSEL(GCODE_SEL, SEL_KPL));
1386	setidt(12, &IDTVEC(stk),  SDT_SYS386TGT, SEL_KPL, GSEL(GCODE_SEL, SEL_KPL));
1387	setidt(13, &IDTVEC(prot),  SDT_SYS386TGT, SEL_KPL, GSEL(GCODE_SEL, SEL_KPL));
1388	setidt(14, &IDTVEC(page),  SDT_SYS386TGT, SEL_KPL, GSEL(GCODE_SEL, SEL_KPL));
1389	setidt(15, &IDTVEC(rsvd),  SDT_SYS386TGT, SEL_KPL, GSEL(GCODE_SEL, SEL_KPL));
1390	setidt(16, &IDTVEC(fpu),  SDT_SYS386TGT, SEL_KPL, GSEL(GCODE_SEL, SEL_KPL));
1391	setidt(17, &IDTVEC(align), SDT_SYS386TGT, SEL_KPL, GSEL(GCODE_SEL, SEL_KPL));
1392 	setidt(0x80, &IDTVEC(int0x80_syscall),
1393			SDT_SYS386TGT, SEL_UPL, GSEL(GCODE_SEL, SEL_KPL));
1394
1395#include	"isa.h"
1396#if	NISA >0
1397	isa_defaultirq();
1398#endif
1399	rand_initialize();
1400
1401	r_gdt.rd_limit = sizeof(gdt) - 1;
1402	r_gdt.rd_base =  (int) gdt;
1403	lgdt(&r_gdt);
1404
1405	r_idt.rd_limit = sizeof(idt) - 1;
1406	r_idt.rd_base = (int) idt;
1407	lidt(&r_idt);
1408
1409	_default_ldt = GSEL(GLDT_SEL, SEL_KPL);
1410	lldt(_default_ldt);
1411	currentldt = _default_ldt;
1412
1413#ifdef DDB
1414	kdb_init();
1415	if (boothowto & RB_KDB)
1416		Debugger("Boot flags requested debugger");
1417#endif
1418
1419	/* Use BIOS values stored in RTC CMOS RAM, since probing
1420	 * breaks certain 386 AT relics.
1421	 */
1422	biosbasemem = rtcin(RTC_BASELO)+ (rtcin(RTC_BASEHI)<<8);
1423	biosextmem = rtcin(RTC_EXTLO)+ (rtcin(RTC_EXTHI)<<8);
1424
1425	/*
1426	 * Print a warning if the official BIOS interface disagrees
1427	 * with the hackish interface used above.  Eventually only
1428	 * the official interface should be used.
1429	 */
1430	if (bootinfo.bi_memsizes_valid) {
1431		if (bootinfo.bi_basemem != biosbasemem)
1432			printf("BIOS basemem (%ldK) != RTC basemem (%dK)\n",
1433			       bootinfo.bi_basemem, biosbasemem);
1434		if (bootinfo.bi_extmem != biosextmem)
1435			printf("BIOS extmem (%ldK) != RTC extmem (%dK)\n",
1436			       bootinfo.bi_extmem, biosextmem);
1437	}
1438
1439	/*
1440	 * If BIOS tells us that it has more than 640k in the basemem,
1441	 *	don't believe it - set it to 640k.
1442	 */
1443	if (biosbasemem > 640)
1444		biosbasemem = 640;
1445
1446	/*
1447	 * Some 386 machines might give us a bogus number for extended
1448	 *	mem. If this happens, stop now.
1449	 */
1450#ifndef LARGEMEM
1451	if (biosextmem > 65536) {
1452		panic("extended memory beyond limit of 64MB");
1453		/* NOTREACHED */
1454	}
1455#endif
1456
1457	pagesinbase = biosbasemem * 1024 / NBPG;
1458	pagesinext = biosextmem * 1024 / NBPG;
1459
1460	/*
1461	 * Special hack for chipsets that still remap the 384k hole when
1462	 *	there's 16MB of memory - this really confuses people that
1463	 *	are trying to use bus mastering ISA controllers with the
1464	 *	"16MB limit"; they only have 16MB, but the remapping puts
1465	 *	them beyond the limit.
1466	 */
1467	/*
1468	 * If extended memory is between 15-16MB (16-17MB phys address range),
1469	 *	chop it to 15MB.
1470	 */
1471	if ((pagesinext > 3840) && (pagesinext < 4096))
1472		pagesinext = 3840;
1473
1474	/*
1475	 * Maxmem isn't the "maximum memory", it's one larger than the
1476	 * highest page of of the physical address space. It
1477	 */
1478	Maxmem = pagesinext + 0x100000/PAGE_SIZE;
1479
1480#ifdef MAXMEM
1481	Maxmem = MAXMEM/4;
1482#endif
1483
1484	/* call pmap initialization to make new kernel address space */
1485	pmap_bootstrap (first, 0);
1486
1487	/*
1488	 * Size up each available chunk of physical memory.
1489	 */
1490
1491	/*
1492	 * We currently don't bother testing base memory.
1493	 * XXX  ...but we probably should.
1494	 */
1495	pa_indx = 0;
1496	badpages = 0;
1497	if (pagesinbase > 1) {
1498		phys_avail[pa_indx++] = PAGE_SIZE;	/* skip first page of memory */
1499		phys_avail[pa_indx] = ptoa(pagesinbase);/* memory up to the ISA hole */
1500		physmem = pagesinbase - 1;
1501	} else {
1502		/* point at first chunk end */
1503		pa_indx++;
1504	}
1505
1506	for (target_page = avail_start; target_page < ptoa(Maxmem); target_page += PAGE_SIZE) {
1507		int tmp, page_bad = FALSE;
1508
1509		/*
1510		 * map page into kernel: valid, read/write, non-cacheable
1511		 */
1512		*(int *)CMAP1 = PG_V | PG_KW | PG_N | target_page;
1513		pmap_update();
1514
1515		tmp = *(int *)CADDR1;
1516		/*
1517		 * Test for alternating 1's and 0's
1518		 */
1519		*(volatile int *)CADDR1 = 0xaaaaaaaa;
1520		if (*(volatile int *)CADDR1 != 0xaaaaaaaa) {
1521			page_bad = TRUE;
1522		}
1523		/*
1524		 * Test for alternating 0's and 1's
1525		 */
1526		*(volatile int *)CADDR1 = 0x55555555;
1527		if (*(volatile int *)CADDR1 != 0x55555555) {
1528			page_bad = TRUE;
1529		}
1530		/*
1531		 * Test for all 1's
1532		 */
1533		*(volatile int *)CADDR1 = 0xffffffff;
1534		if (*(volatile int *)CADDR1 != 0xffffffff) {
1535			page_bad = TRUE;
1536		}
1537		/*
1538		 * Test for all 0's
1539		 */
1540		*(volatile int *)CADDR1 = 0x0;
1541		if (*(volatile int *)CADDR1 != 0x0) {
1542			/*
1543			 * test of page failed
1544			 */
1545			page_bad = TRUE;
1546		}
1547		/*
1548		 * Restore original value.
1549		 */
1550		*(int *)CADDR1 = tmp;
1551
1552		/*
1553		 * Adjust array of valid/good pages.
1554		 */
1555		if (page_bad == FALSE) {
1556			/*
1557			 * If this good page is a continuation of the
1558			 * previous set of good pages, then just increase
1559			 * the end pointer. Otherwise start a new chunk.
1560			 * Note that "end" points one higher than end,
1561			 * making the range >= start and < end.
1562			 */
1563			if (phys_avail[pa_indx] == target_page) {
1564				phys_avail[pa_indx] += PAGE_SIZE;
1565			} else {
1566				pa_indx++;
1567				if (pa_indx == PHYS_AVAIL_ARRAY_END) {
1568					printf("Too many holes in the physical address space, giving up\n");
1569					pa_indx--;
1570					break;
1571				}
1572				phys_avail[pa_indx++] = target_page;	/* start */
1573				phys_avail[pa_indx] = target_page + PAGE_SIZE;	/* end */
1574			}
1575			physmem++;
1576		} else {
1577			badpages++;
1578			page_bad = FALSE;
1579		}
1580	}
1581
1582	*(int *)CMAP1 = 0;
1583	pmap_update();
1584
1585	/*
1586	 * XXX
1587	 * The last chunk must contain at least one page plus the message
1588	 * buffer to avoid complicating other code (message buffer address
1589	 * calculation, etc.).
1590	 */
1591	while (phys_avail[pa_indx - 1] + PAGE_SIZE +
1592	    round_page(sizeof(struct msgbuf)) >= phys_avail[pa_indx]) {
1593		physmem -= atop(phys_avail[pa_indx] - phys_avail[pa_indx - 1]);
1594		phys_avail[pa_indx--] = 0;
1595		phys_avail[pa_indx--] = 0;
1596	}
1597
1598	Maxmem = atop(phys_avail[pa_indx]);
1599
1600	/* Trim off space for the message buffer. */
1601	phys_avail[pa_indx] -= round_page(sizeof(struct msgbuf));
1602
1603	avail_end = phys_avail[pa_indx];
1604
1605	/* now running on new page tables, configured,and u/iom is accessible */
1606
1607	/* make a initial tss so microp can get interrupt stack on syscall! */
1608	proc0.p_addr->u_pcb.pcb_tss.tss_esp0 = (int) kstack + UPAGES*NBPG;
1609	proc0.p_addr->u_pcb.pcb_tss.tss_ss0 = GSEL(GDATA_SEL, SEL_KPL) ;
1610	gsel_tss = GSEL(GPROC0_SEL, SEL_KPL);
1611
1612	dblfault_tss.tss_esp = dblfault_tss.tss_esp0 = dblfault_tss.tss_esp1 =
1613	    dblfault_tss.tss_esp2 = (int) &dblfault_stack[sizeof(dblfault_stack)];
1614	dblfault_tss.tss_ss = dblfault_tss.tss_ss0 = dblfault_tss.tss_ss1 =
1615	    dblfault_tss.tss_ss2 = GSEL(GDATA_SEL, SEL_KPL);
1616	dblfault_tss.tss_cr3 = IdlePTD;
1617	dblfault_tss.tss_eip = (int) dblfault_handler;
1618	dblfault_tss.tss_eflags = PSL_KERNEL;
1619	dblfault_tss.tss_ds = dblfault_tss.tss_es = dblfault_tss.tss_fs = dblfault_tss.tss_gs =
1620		GSEL(GDATA_SEL, SEL_KPL);
1621	dblfault_tss.tss_cs = GSEL(GCODE_SEL, SEL_KPL);
1622	dblfault_tss.tss_ldt = GSEL(GLDT_SEL, SEL_KPL);
1623
1624	((struct i386tss *)gdt_segs[GPROC0_SEL].ssd_base)->tss_ioopt =
1625		(sizeof(struct i386tss))<<16;
1626
1627	ltr(gsel_tss);
1628
1629	/* make a call gate to reenter kernel with */
1630	gdp = &ldt[LSYS5CALLS_SEL].gd;
1631
1632	x = (int) &IDTVEC(syscall);
1633	gdp->gd_looffset = x++;
1634	gdp->gd_selector = GSEL(GCODE_SEL,SEL_KPL);
1635	gdp->gd_stkcpy = 1;
1636	gdp->gd_type = SDT_SYS386CGT;
1637	gdp->gd_dpl = SEL_UPL;
1638	gdp->gd_p = 1;
1639	gdp->gd_hioffset = ((int) &IDTVEC(syscall)) >>16;
1640
1641	/* transfer to user mode */
1642
1643	_ucodesel = LSEL(LUCODE_SEL, SEL_UPL);
1644	_udatasel = LSEL(LUDATA_SEL, SEL_UPL);
1645
1646	/* setup proc 0's pcb */
1647	proc0.p_addr->u_pcb.pcb_flags = 0;
1648	proc0.p_addr->u_pcb.pcb_ptd = IdlePTD;
1649}
1650
1651/*
1652 * The registers are in the frame; the frame is in the user area of
1653 * the process in question; when the process is active, the registers
1654 * are in "the kernel stack"; when it's not, they're still there, but
1655 * things get flipped around.  So, since p->p_md.md_regs is the whole address
1656 * of the register set, take its offset from the kernel stack, and
1657 * index into the user block.  Don't you just *love* virtual memory?
1658 * (I'm starting to think seymour is right...)
1659 */
1660#define	TF_REGP(p)	((struct trapframe *) \
1661			 ((char *)(p)->p_addr \
1662			  + ((char *)(p)->p_md.md_regs - kstack)))
1663
1664int
1665ptrace_set_pc(p, addr)
1666	struct proc *p;
1667	unsigned int addr;
1668{
1669	TF_REGP(p)->tf_eip = addr;
1670	return (0);
1671}
1672
1673int
1674ptrace_single_step(p)
1675	struct proc *p;
1676{
1677	TF_REGP(p)->tf_eflags |= PSL_T;
1678	return (0);
1679}
1680
1681int ptrace_write_u(p, off, data)
1682	struct proc *p;
1683	vm_offset_t off;
1684	int data;
1685{
1686	struct trapframe frame_copy;
1687	vm_offset_t min;
1688	struct trapframe *tp;
1689
1690	/*
1691	 * Privileged kernel state is scattered all over the user area.
1692	 * Only allow write access to parts of regs and to fpregs.
1693	 */
1694	min = (char *)p->p_md.md_regs - kstack;
1695	if (off >= min && off <= min + sizeof(struct trapframe) - sizeof(int)) {
1696		tp = TF_REGP(p);
1697		frame_copy = *tp;
1698		*(int *)((char *)&frame_copy + (off - min)) = data;
1699		if (!EFLAGS_SECURE(frame_copy.tf_eflags, tp->tf_eflags) ||
1700		    !CS_SECURE(frame_copy.tf_cs))
1701			return (EINVAL);
1702		*(int*)((char *)p->p_addr + off) = data;
1703		return (0);
1704	}
1705	min = offsetof(struct user, u_pcb) + offsetof(struct pcb, pcb_savefpu);
1706	if (off >= min && off <= min + sizeof(struct save87) - sizeof(int)) {
1707		*(int*)((char *)p->p_addr + off) = data;
1708		return (0);
1709	}
1710	return (EFAULT);
1711}
1712
1713int
1714fill_regs(p, regs)
1715	struct proc *p;
1716	struct reg *regs;
1717{
1718	struct trapframe *tp;
1719
1720	tp = TF_REGP(p);
1721	regs->r_es = tp->tf_es;
1722	regs->r_ds = tp->tf_ds;
1723	regs->r_edi = tp->tf_edi;
1724	regs->r_esi = tp->tf_esi;
1725	regs->r_ebp = tp->tf_ebp;
1726	regs->r_ebx = tp->tf_ebx;
1727	regs->r_edx = tp->tf_edx;
1728	regs->r_ecx = tp->tf_ecx;
1729	regs->r_eax = tp->tf_eax;
1730	regs->r_eip = tp->tf_eip;
1731	regs->r_cs = tp->tf_cs;
1732	regs->r_eflags = tp->tf_eflags;
1733	regs->r_esp = tp->tf_esp;
1734	regs->r_ss = tp->tf_ss;
1735	return (0);
1736}
1737
1738int
1739set_regs(p, regs)
1740	struct proc *p;
1741	struct reg *regs;
1742{
1743	struct trapframe *tp;
1744
1745	tp = TF_REGP(p);
1746	if (!EFLAGS_SECURE(regs->r_eflags, tp->tf_eflags) ||
1747	    !CS_SECURE(regs->r_cs))
1748		return (EINVAL);
1749	tp->tf_es = regs->r_es;
1750	tp->tf_ds = regs->r_ds;
1751	tp->tf_edi = regs->r_edi;
1752	tp->tf_esi = regs->r_esi;
1753	tp->tf_ebp = regs->r_ebp;
1754	tp->tf_ebx = regs->r_ebx;
1755	tp->tf_edx = regs->r_edx;
1756	tp->tf_ecx = regs->r_ecx;
1757	tp->tf_eax = regs->r_eax;
1758	tp->tf_eip = regs->r_eip;
1759	tp->tf_cs = regs->r_cs;
1760	tp->tf_eflags = regs->r_eflags;
1761	tp->tf_esp = regs->r_esp;
1762	tp->tf_ss = regs->r_ss;
1763	return (0);
1764}
1765
1766#ifndef DDB
1767void
1768Debugger(const char *msg)
1769{
1770	printf("Debugger(\"%s\") called.\n", msg);
1771}
1772#endif /* no DDB */
1773
1774#include <sys/disklabel.h>
1775#define b_cylin	b_resid
1776/*
1777 * Determine the size of the transfer, and make sure it is
1778 * within the boundaries of the partition. Adjust transfer
1779 * if needed, and signal errors or early completion.
1780 */
1781int
1782bounds_check_with_label(struct buf *bp, struct disklabel *lp, int wlabel)
1783{
1784        struct partition *p = lp->d_partitions + dkpart(bp->b_dev);
1785        int labelsect = lp->d_partitions[0].p_offset;
1786        int maxsz = p->p_size,
1787                sz = (bp->b_bcount + DEV_BSIZE - 1) >> DEV_BSHIFT;
1788
1789        /* overwriting disk label ? */
1790        /* XXX should also protect bootstrap in first 8K */
1791        if (bp->b_blkno + p->p_offset <= LABELSECTOR + labelsect &&
1792#if LABELSECTOR != 0
1793            bp->b_blkno + p->p_offset + sz > LABELSECTOR + labelsect &&
1794#endif
1795            (bp->b_flags & B_READ) == 0 && wlabel == 0) {
1796                bp->b_error = EROFS;
1797                goto bad;
1798        }
1799
1800#if     defined(DOSBBSECTOR) && defined(notyet)
1801        /* overwriting master boot record? */
1802        if (bp->b_blkno + p->p_offset <= DOSBBSECTOR &&
1803            (bp->b_flags & B_READ) == 0 && wlabel == 0) {
1804                bp->b_error = EROFS;
1805                goto bad;
1806        }
1807#endif
1808
1809        /* beyond partition? */
1810        if (bp->b_blkno < 0 || bp->b_blkno + sz > maxsz) {
1811                /* if exactly at end of disk, return an EOF */
1812                if (bp->b_blkno == maxsz) {
1813                        bp->b_resid = bp->b_bcount;
1814                        return(0);
1815                }
1816                /* or truncate if part of it fits */
1817                sz = maxsz - bp->b_blkno;
1818                if (sz <= 0) {
1819                        bp->b_error = EINVAL;
1820                        goto bad;
1821                }
1822                bp->b_bcount = sz << DEV_BSHIFT;
1823        }
1824
1825        /* calculate cylinder for disksort to order transfers with */
1826        bp->b_pblkno = bp->b_blkno + p->p_offset;
1827        bp->b_cylin = bp->b_pblkno / lp->d_secpercyl;
1828        return(1);
1829
1830bad:
1831        bp->b_flags |= B_ERROR;
1832        return(-1);
1833}
1834
1835int
1836disk_externalize(int drive, struct sysctl_req *req)
1837{
1838	return SYSCTL_OUT(req, &drive, sizeof drive);
1839}
1840