machdep.c revision 6308
1178172Simp/*-
2178172Simp * Copyright (c) 1992 Terrence R. Lambert.
3178172Simp * Copyright (c) 1982, 1987, 1990 The Regents of the University of California.
4178172Simp * All rights reserved.
5178172Simp *
6178172Simp * This code is derived from software contributed to Berkeley by
7178172Simp * William Jolitz.
8178172Simp *
9178172Simp * Redistribution and use in source and binary forms, with or without
10178172Simp * modification, are permitted provided that the following conditions
11178172Simp * are met:
12178172Simp * 1. Redistributions of source code must retain the above copyright
13178172Simp *    notice, this list of conditions and the following disclaimer.
14178172Simp * 2. Redistributions in binary form must reproduce the above copyright
15178172Simp *    notice, this list of conditions and the following disclaimer in the
16178172Simp *    documentation and/or other materials provided with the distribution.
17178172Simp * 3. All advertising materials mentioning features or use of this software
18178172Simp *    must display the following acknowledgement:
19178172Simp *	This product includes software developed by the University of
20178172Simp *	California, Berkeley and its contributors.
21178172Simp * 4. Neither the name of the University nor the names of its contributors
22178172Simp *    may be used to endorse or promote products derived from this software
23178172Simp *    without specific prior written permission.
24178172Simp *
25178172Simp * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
26178172Simp * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
27178172Simp * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
28178172Simp * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
29178172Simp * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
30178172Simp * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
31178172Simp * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
32178172Simp * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
33178172Simp * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
34178172Simp * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
35178172Simp * SUCH DAMAGE.
36178172Simp *
37178172Simp *	from: @(#)machdep.c	7.4 (Berkeley) 6/3/91
38178172Simp *	$Id: machdep.c,v 1.107 1995/02/10 07:44:03 davidg Exp $
39178172Simp */
40178172Simp
41178172Simp#include "npx.h"
42202046Simp#include "isa.h"
43178172Simp
44178172Simp#include <sys/param.h>
45178172Simp#include <sys/systm.h>
46178172Simp#include <sys/signalvar.h>
47178172Simp#include <sys/kernel.h>
48178172Simp#include <sys/proc.h>
49178172Simp#include <sys/user.h>
50178172Simp#include <sys/buf.h>
51178172Simp#include <sys/reboot.h>
52178172Simp#include <sys/conf.h>
53209613Sjhb#include <sys/file.h>
54178172Simp#include <sys/callout.h>
55178172Simp#include <sys/malloc.h>
56178172Simp#include <sys/mbuf.h>
57178172Simp#include <sys/msgbuf.h>
58178172Simp#include <sys/ioctl.h>
59178172Simp#include <sys/sysent.h>
60178172Simp#include <sys/tty.h>
61178172Simp#include <sys/sysctl.h>
62178172Simp
63178172Simp#ifdef SYSVSHM
64178172Simp#include <sys/shm.h>
65178172Simp#endif
66178172Simp
67178172Simp#ifdef SYSVMSG
68178172Simp#include <sys/msg.h>
69178172Simp#endif
70178172Simp
71178172Simp#ifdef SYSVSEM
72178172Simp#include <sys/sem.h>
73178172Simp#endif
74178172Simp
75178172Simp#include <vm/vm.h>
76178172Simp#include <vm/vm_kern.h>
77178172Simp#include <vm/vm_page.h>
78178172Simp
79178172Simp#include <sys/exec.h>
80178172Simp#include <sys/vnode.h>
81178172Simp
82178172Simp#include <net/netisr.h>
83178172Simp
84178172Simpextern vm_offset_t avail_start, avail_end;
85178172Simp
86178172Simp#include "ether.h"
87178172Simp
88178172Simp#include <machine/cpu.h>
89178172Simp#include <machine/npx.h>
90178172Simp#include <machine/reg.h>
91178172Simp#include <machine/psl.h>
92178172Simp#include <machine/clock.h>
93178172Simp#include <machine/specialreg.h>
94178172Simp#include <machine/sysarch.h>
95178172Simp#include <machine/cons.h>
96178172Simp#include <machine/devconf.h>
97178172Simp#include <machine/bootinfo.h>
98178172Simp
99178172Simp#include <i386/isa/isa.h>
100178172Simp#include <i386/isa/isa_device.h>
101178172Simp#include <i386/isa/rtc.h>
102178172Simp
103178172Simpstatic void identifycpu(void);
104178172Simpstatic void initcpu(void);
105178172Simp
106178172Simpchar machine[] = "i386";
107178172Simpchar cpu_model[sizeof("Cy486DLC") + 1];
108178172Simp
109178172Simp#ifndef PANIC_REBOOT_WAIT_TIME
110178172Simp#define PANIC_REBOOT_WAIT_TIME 15 /* default to 15 seconds */
111178172Simp#endif
112232585Sjmallett
113178172Simp/*
114178172Simp * Declare these as initialized data so we can patch them.
115178172Simp */
116178172Simpint	nswbuf = 0;
117178172Simp#ifdef	NBUF
118178172Simpint	nbuf = NBUF;
119178172Simp#else
120178172Simpint	nbuf = 0;
121178172Simp#endif
122178172Simp
123178172Simp#ifdef BOUNCE_BUFFERS
124178172Simpextern char *bouncememory;
125178172Simpextern int maxbkva;
126178172Simp#ifdef BOUNCEPAGES
127178172Simpint	bouncepages = BOUNCEPAGES;
128178172Simp#else
129178172Simpint	bouncepages = 0;
130178172Simp#endif
131178172Simp#endif	/* BOUNCE_BUFFERS */
132178172Simp
133178172Simpextern int freebufspace;
134178172Simpint	msgbufmapped = 0;		/* set when safe to use msgbuf */
135178172Simpint _udatasel, _ucodesel;
136178172Simp
137178172Simp
138209500Sjchandra/*
139178172Simp * Machine-dependent startup code
140178172Simp */
141209500Sjchandraint boothowto = 0, bootverbose = 0, Maxmem = 0, badpages = 0, physmem = 0;
142178172Simplong dumplo;
143178172Simpextern int bootdev;
144178172Simpint biosmem;
145178172Simp
146178172Simpvm_offset_t	phys_avail[6];
147209500Sjchandra
148178172Simpint cpu_class;
149178172Simp
150178172Simpvoid dumpsys __P((void));
151178172Simpvm_offset_t buffer_sva, buffer_eva;
152178172Simpvm_offset_t clean_sva, clean_eva;
153178172Simpvm_offset_t pager_sva, pager_eva;
154178172Simpextern int pager_map_size;
155178172Simp
156178172Simp#define offsetof(type, member)	((size_t)(&((type *)0)->member))
157178172Simp
158178172Simpvoid
159178172Simpcpu_startup()
160178172Simp{
161178172Simp	register unsigned i;
162178172Simp	register caddr_t v;
163178172Simp	extern void (*netisrs[32])(void);
164178172Simp	vm_offset_t maxaddr;
165178172Simp	vm_size_t size = 0;
166178172Simp	int firstaddr;
167178172Simp	vm_offset_t minaddr;
168178172Simp
169178172Simp	if (boothowto & RB_VERBOSE)
170209500Sjchandra		bootverbose++;
171209500Sjchandra
172209500Sjchandra	/*
173178172Simp	 * Initialize error message buffer (at end of core).
174178172Simp	 */
175178172Simp
176209500Sjchandra	/* avail_end was pre-decremented in init_386() to compensate */
177178172Simp	for (i = 0; i < btoc(sizeof (struct msgbuf)); i++)
178178172Simp		pmap_enter(pmap_kernel(), (vm_offset_t)msgbufp,
179178172Simp			   avail_end + i * NBPG,
180178172Simp			   VM_PROT_ALL, TRUE);
181178172Simp	msgbufmapped = 1;
182178172Simp
183178172Simp	/*
184178172Simp	 * Good {morning,afternoon,evening,night}.
185178172Simp	 */
186178172Simp	printf(version);
187178172Simp	startrtclock();
188178172Simp	identifycpu();
189178172Simp	printf("real memory  = %d (%d pages)\n", ptoa(physmem), physmem);
190178172Simp	if (badpages)
191178172Simp		printf("bad memory   = %d (%d pages)\n", ptoa(badpages), badpages);
192178172Simp
193178172Simp	/*
194178172Simp	 * Quickly wire in netisrs.
195178172Simp	 */
196178172Simp#define DONET(isr, n) do { extern void isr(void); netisrs[n] = isr; } while(0)
197178172Simp#ifdef INET
198178172Simp#if NETHER > 0
199178172Simp	DONET(arpintr, NETISR_ARP);
200178172Simp#endif
201178172Simp	DONET(ipintr, NETISR_IP);
202178172Simp#endif
203178172Simp#ifdef NS
204178172Simp	DONET(nsintr, NETISR_NS);
205178172Simp#endif
206178172Simp#ifdef ISO
207178172Simp	DONET(clnlintr, NETISR_ISO);
208178172Simp#endif
209178172Simp#ifdef CCITT
210225617Skmacy	DONET(ccittintr, NETISR_CCITT);
211178172Simp#endif
212178172Simp#ifdef ISDN
213178172Simp	DONET(isdnintr, NETISR_ISDN);
214178172Simp#endif
215178172Simp#undef DONET
216178172Simp
217178172Simp	/*
218178172Simp	 * Allocate space for system data structures.
219232586Sjmallett	 * The first available kernel virtual address is in "v".
220232586Sjmallett	 * As pages of kernel virtual memory are allocated, "v" is incremented.
221232586Sjmallett	 * As pages of memory are allocated and cleared,
222178172Simp	 * "firstaddr" is incremented.
223232586Sjmallett	 * An index into the kernel page table corresponding to the
224178172Simp	 * virtual memory address maintained in "v" is kept in "mapaddr".
225232586Sjmallett	 */
226178172Simp
227178172Simp	/*
228178172Simp	 * Make two passes.  The first pass calculates how much memory is
229178172Simp	 * needed and allocates it.  The second pass assigns virtual
230178172Simp	 * addresses to the various data structures.
231178172Simp	 */
232178172Simp	firstaddr = 0;
233178172Simpagain:
234178172Simp	v = (caddr_t)firstaddr;
235178172Simp
236178172Simp#define	valloc(name, type, num) \
237178172Simp	    (name) = (type *)v; v = (caddr_t)((name)+(num))
238178172Simp#define	valloclim(name, type, num, lim) \
239178172Simp	    (name) = (type *)v; v = (caddr_t)((lim) = ((name)+(num)))
240178172Simp	valloc(callout, struct callout, ncallout);
241178172Simp#ifdef SYSVSHM
242178172Simp	valloc(shmsegs, struct shmid_ds, shminfo.shmmni);
243178172Simp#endif
244178172Simp#ifdef SYSVSEM
245178172Simp	valloc(sema, struct semid_ds, seminfo.semmni);
246178172Simp	valloc(sem, struct sem, seminfo.semmns);
247178172Simp	/* This is pretty disgusting! */
248178172Simp	valloc(semu, int, (seminfo.semmnu * seminfo.semusz) / sizeof(int));
249178172Simp#endif
250178172Simp#ifdef SYSVMSG
251178172Simp	valloc(msgpool, char, msginfo.msgmax);
252178172Simp	valloc(msgmaps, struct msgmap, msginfo.msgseg);
253178172Simp	valloc(msghdrs, struct msg, msginfo.msgtql);
254178172Simp	valloc(msqids, struct msqid_ds, msginfo.msgmni);
255178172Simp#endif
256178172Simp
257178172Simp	if (nbuf == 0) {
258178172Simp		nbuf = 32;
259178172Simp		if( physmem > 1024)
260178172Simp			nbuf += min((physmem - 1024) / 20, 1024);
261178172Simp	}
262178172Simp	nswbuf = min(nbuf, 128);
263178172Simp
264178172Simp	valloc(swbuf, struct buf, nswbuf);
265178172Simp	valloc(buf, struct buf, nbuf);
266178172Simp
267178172Simp#ifdef BOUNCE_BUFFERS
268178172Simp	/*
269178172Simp	 * If there is more than 16MB of memory, allocate some bounce buffers
270178172Simp	 */
271178172Simp	if (Maxmem > 4096) {
272178172Simp		if (bouncepages == 0) {
273178172Simp			bouncepages = 64;
274178172Simp			bouncepages += ((Maxmem - 4096) / 2048) * 32;
275178172Simp		}
276178172Simp		v = (caddr_t)((vm_offset_t)((vm_offset_t)v + PAGE_SIZE - 1) & ~(PAGE_SIZE - 1));
277178172Simp		valloc(bouncememory, char, bouncepages * PAGE_SIZE);
278178172Simp	}
279210038Simp#endif
280178172Simp
281178172Simp	/*
282178172Simp	 * End of first pass, size has been calculated so allocate memory
283178172Simp	 */
284178172Simp	if (firstaddr == 0) {
285178172Simp		size = (vm_size_t)(v - firstaddr);
286178172Simp		firstaddr = (int)kmem_alloc(kernel_map, round_page(size));
287178172Simp		if (firstaddr == 0)
288178172Simp			panic("startup: no room for tables");
289178172Simp		goto again;
290178172Simp	}
291178172Simp
292178172Simp	/*
293202046Simp	 * End of second pass, addresses have been assigned
294178172Simp	 */
295178172Simp	if ((vm_size_t)(v - firstaddr) != size)
296178172Simp		panic("startup: table size inconsistency");
297178172Simp
298178172Simp#ifdef BOUNCE_BUFFERS
299178172Simp	clean_map = kmem_suballoc(kernel_map, &clean_sva, &clean_eva,
300178172Simp			(nbuf*MAXBSIZE) + (nswbuf*MAXPHYS) +
301178172Simp				maxbkva + pager_map_size, TRUE);
302178172Simp	io_map = kmem_suballoc(clean_map, &minaddr, &maxaddr, maxbkva, FALSE);
303178172Simp#else
304178172Simp	clean_map = kmem_suballoc(kernel_map, &clean_sva, &clean_eva,
305178172Simp			(nbuf*MAXBSIZE) + (nswbuf*MAXPHYS) + pager_map_size, TRUE);
306178172Simp#endif
307178172Simp	buffer_map = kmem_suballoc(clean_map, &buffer_sva, &buffer_eva,
308178172Simp				(nbuf*MAXBSIZE), TRUE);
309178172Simp	pager_map = kmem_suballoc(clean_map, &pager_sva, &pager_eva,
310178172Simp				(nswbuf*MAXPHYS) + pager_map_size, TRUE);
311178172Simp	exec_map = kmem_suballoc(kernel_map, &minaddr, &maxaddr,
312178172Simp				(16*ARG_MAX), TRUE);
313178172Simp	u_map = kmem_suballoc(kernel_map, &minaddr, &maxaddr,
314178172Simp				(maxproc*UPAGES*PAGE_SIZE), FALSE);
315178172Simp
316178172Simp	/*
317178172Simp	 * Finally, allocate mbuf pool.  Since mclrefcnt is an off-size
318178172Simp	 * we use the more space efficient malloc in place of kmem_alloc.
319178172Simp	 */
320178172Simp	mclrefcnt = (char *)malloc(NMBCLUSTERS+CLBYTES/MCLBYTES,
321178172Simp				   M_MBUF, M_NOWAIT);
322178172Simp	bzero(mclrefcnt, NMBCLUSTERS+CLBYTES/MCLBYTES);
323178172Simp	mb_map = kmem_suballoc(kmem_map, (vm_offset_t *)&mbutl, &maxaddr,
324178172Simp			       VM_MBUF_SIZE, FALSE);
325178172Simp	/*
326178172Simp	 * Initialize callouts
327178172Simp	 */
328178172Simp	callfree = callout;
329178172Simp	for (i = 1; i < ncallout; i++)
330178172Simp		callout[i-1].c_next = &callout[i];
331178172Simp
332178172Simp        if (boothowto & RB_CONFIG)
333178172Simp		userconfig();
334178172Simp	printf("avail memory = %d (%d pages)\n", ptoa(cnt.v_free_count), cnt.v_free_count);
335178172Simp
336178172Simp#ifdef BOUNCE_BUFFERS
337178172Simp	/*
338178172Simp	 * init bounce buffers
339178172Simp	 */
340178172Simp	vm_bounce_init();
341178172Simp#endif
342178172Simp
343178172Simp	/*
344178172Simp	 * Set up CPU-specific registers, cache, etc.
345178172Simp	 */
346178172Simp	initcpu();
347178172Simp
348178172Simp	/*
349178172Simp	 * Set up buffers, so they can be used to read disk labels.
350178172Simp	 */
351178172Simp	bufinit();
352178172Simp	vm_pager_bufferinit();
353178172Simp
354178172Simp	/*
355178172Simp	 * Configure the system.
356178172Simp	 */
357178172Simp	configure();
358178172Simp	if (bootverbose) {
359178172Simp		printf("BIOS Geometries:");
360178172Simp		for (i=0; i < N_BIOS_GEOM; i++)
361178172Simp			printf(" %x:%x\n", i, bootinfo.bi_bios_geom[i]);
362178172Simp		printf(" %d accounted for\n", bootinfo.bi_n_bios_used);
363178172Simp	}
364178172Simp}
365178172Simp
366178172Simp
367178172Simpstruct cpu_nameclass i386_cpus[] = {
368178172Simp	{ "Intel 80286",	CPUCLASS_286 },		/* CPU_286   */
369178172Simp	{ "i386SX",		CPUCLASS_386 },		/* CPU_386SX */
370178172Simp	{ "i386DX",		CPUCLASS_386 },		/* CPU_386   */
371178172Simp	{ "i486SX",		CPUCLASS_486 },		/* CPU_486SX */
372178172Simp	{ "i486DX",		CPUCLASS_486 },		/* CPU_486   */
373178172Simp	{ "Pentium",		CPUCLASS_586 },		/* CPU_586   */
374178172Simp	{ "Cy486DLC",		CPUCLASS_486 },		/* CPU_486DLC */
375178172Simp};
376178172Simp
377178172Simpstatic void
378178172Simpidentifycpu()
379202046Simp{
380202046Simp	extern u_long cpu_id, cpu_high, cpu_feature;
381202046Simp	extern char cpu_vendor[];
382202046Simp	printf("CPU: ");
383202046Simp	if (cpu >= 0
384202046Simp	    && cpu < (sizeof i386_cpus/sizeof(struct cpu_nameclass))) {
385178172Simp		printf("%s", i386_cpus[cpu].cpu_name);
386178172Simp		cpu_class = i386_cpus[cpu].cpu_class;
387178172Simp		strncpy(cpu_model, i386_cpus[cpu].cpu_name, sizeof cpu_model);
388202046Simp	} else {
389178172Simp		printf("unknown cpu type %d\n", cpu);
390178172Simp		panic("startup: bad cpu id");
391178172Simp	}
392178172Simp	printf(" (");
393278347Skib	switch(cpu_class) {
394178172Simp	case CPUCLASS_286:
395178172Simp		printf("286");
396178172Simp		break;
397178172Simp	case CPUCLASS_386:
398178172Simp		printf("386");
399178172Simp		break;
400178172Simp	case CPUCLASS_486:
401178172Simp		printf("486");
402178172Simp		break;
403178172Simp	case CPUCLASS_586:
404178172Simp		printf("Pentium");
405178172Simp		break;
406178172Simp	default:
407178172Simp		printf("unknown");	/* will panic below... */
408178172Simp	}
409202046Simp	printf("-class CPU)");
410178172Simp#ifdef I586_CPU
411178172Simp	if(cpu_class == CPUCLASS_586) {
412178172Simp		calibrate_cyclecounter();
413178172Simp		printf(" %d MHz", pentium_mhz);
414178172Simp	}
415178172Simp#endif
416178172Simp	if(*cpu_vendor)
417178172Simp		printf("  Origin = \"%s\"",cpu_vendor);
418178172Simp	if(cpu_id)
419178172Simp		printf("  Id = 0x%lx",cpu_id);
420178172Simp	printf("\n");	/* cpu speed would be nice, but how? */
421178172Simp	if (!strcmp(cpu_vendor,"GenuineIntel")) {
422178172Simp		printf("  This is a");
423178172Simp		if ((cpu_id & 0xf00) > 3) {
424178172Simp			switch (cpu_id & 0x3000) {
425178172Simp			    case 0x1000: printf("Overdrive "); break;
426178172Simp			    case 0x2000: printf("Dual "); break;
427178172Simp			}
428178172Simp			if ((cpu_id & 0xf00) == 0x400)
429178172Simp			    printf("n i486");
430178172Simp			else if ((cpu_id & 0xf00) == 0x500)
431178172Simp			    printf(" Pentium ");
432178172Simp			else
433178172Simp			    printf(" unknown CPU");
434178172Simp			switch (cpu_id & 0xff0) {
435178172Simp			    case 0x400: printf("DX"); break;
436178172Simp			    case 0x410: printf("DX"); break;
437178172Simp			    case 0x420: printf("SX"); break;
438178172Simp			    case 0x430: printf("DX2"); break;
439178172Simp			    case 0x440: printf("SL"); break;
440178172Simp			    case 0x450: printf("SX2"); break;
441205642Snwhitehorn			    case 0x470: printf("DX2 Write-Back Enhanced");
442178172Simp				break;
443178172Simp			    case 0x480: printf("DX4"); break;
444178172Simp			    case 0x510: printf("510\\60 or 567\\66"); break;
445178172Simp			    case 0x520: printf("735\\90 or 815\\100"); break;
446178172Simp			}
447210595Sjmallett		}
448210595Sjmallett		printf("  Stepping=%d", cpu_id & 0xf);
449210595Sjmallett		if (cpu_high > 0) {
450178172Simp			printf("  Features=0x%lx",cpu_feature);
451178172Simp			if (cpu_feature & 0x1) printf(" FPU");
452210595Sjmallett			if (cpu_feature & 0x2) printf(" VME");
453210595Sjmallett			if (cpu_feature & 0x8) printf(" PSE");
454210595Sjmallett			if (cpu_feature & 0x80) printf(" MCE");
455210595Sjmallett			if (cpu_feature & 0x100) printf(" CX8");
456210595Sjmallett			if (cpu_feature & 0x200) printf(" APIC");
457210595Sjmallett		}
458210595Sjmallett		printf("\n");
459210595Sjmallett	}
460210595Sjmallett
461210595Sjmallett	/*
462210595Sjmallett	 * Now that we have told the user what they have,
463210595Sjmallett	 * let them know if that machine type isn't configured.
464210595Sjmallett	 */
465210595Sjmallett	switch (cpu_class) {
466210595Sjmallett	case CPUCLASS_286:	/* a 286 should not make it this far, anyway */
467210595Sjmallett#if !defined(I386_CPU) && !defined(I486_CPU) && !defined(I586_CPU)
468210595Sjmallett#error This kernel is not configured for one of the supported CPUs
469210595Sjmallett#endif
470210595Sjmallett#if !defined(I386_CPU)
471210595Sjmallett	case CPUCLASS_386:
472210595Sjmallett#endif
473210595Sjmallett#if !defined(I486_CPU)
474210595Sjmallett	case CPUCLASS_486:
475210595Sjmallett#endif
476210595Sjmallett#if !defined(I586_CPU)
477210595Sjmallett	case CPUCLASS_586:
478205642Snwhitehorn#endif
479205642Snwhitehorn		panic("CPU class not configured");
480210038Simp	default:
481210038Simp		break;
482210644Sjchandra	}
483210038Simp}
484210644Sjchandra
485211218Sjchandraextern char kstack[];
486178172Simp
487178172Simp/*
488178172Simp * Send an interrupt to process.
489178172Simp *
490178172Simp * Stack is set up to allow sigcode stored
491178172Simp * in u. to call routine, followed by kcall
492178172Simp * to sigreturn routine below.  After sigreturn
493178172Simp * resets the signal mask, the stack, and the
494178172Simp * frame pointer, it returns to the user
495178172Simp * specified pc, psl.
496178172Simp */
497178172Simpvoid
498178172Simpsendsig(catcher, sig, mask, code)
499178172Simp	sig_t catcher;
500178172Simp	int sig, mask;
501178172Simp	unsigned code;
502178172Simp{
503178172Simp	register struct proc *p = curproc;
504205642Snwhitehorn	register int *regs;
505178172Simp	register struct sigframe *fp;
506178172Simp	struct sigacts *psp = p->p_sigacts;
507178172Simp	int oonstack;
508178172Simp
509178172Simp	regs = p->p_md.md_regs;
510178172Simp        oonstack = psp->ps_sigstk.ss_flags & SA_ONSTACK;
511178172Simp	/*
512178172Simp	 * Allocate and validate space for the signal handler
513178172Simp	 * context. Note that if the stack is in P0 space, the
514178172Simp	 * call to grow() is a nop, and the useracc() check
515178172Simp	 * will fail if the process has not already allocated
516178172Simp	 * the space with a `brk'.
517178172Simp	 */
518178172Simp        if ((psp->ps_flags & SAS_ALTSTACK) &&
519178172Simp	    (psp->ps_sigstk.ss_flags & SA_ONSTACK) == 0 &&
520178172Simp	    (psp->ps_sigonstack & sigmask(sig))) {
521178172Simp		fp = (struct sigframe *)(psp->ps_sigstk.ss_sp +
522178172Simp		    psp->ps_sigstk.ss_size - sizeof(struct sigframe));
523178172Simp		psp->ps_sigstk.ss_flags |= SA_ONSTACK;
524178172Simp	} else {
525178172Simp		fp = (struct sigframe *)(regs[tESP]
526178172Simp			- sizeof(struct sigframe));
527178172Simp	}
528178172Simp
529178172Simp	/*
530178172Simp	 * grow() will return FALSE if the fp will not fit inside the stack
531178172Simp	 *	and the stack can not be grown. useracc will return FALSE
532178172Simp	 *	if access is denied.
533178172Simp	 */
534178172Simp	if ((grow(p, (int)fp) == FALSE) ||
535178172Simp	    (useracc((caddr_t)fp, sizeof (struct sigframe), B_WRITE) == FALSE)) {
536178172Simp		/*
537178172Simp		 * Process has trashed its stack; give it an illegal
538		 * instruction to halt it in its tracks.
539		 */
540		SIGACTION(p, SIGILL) = SIG_DFL;
541		sig = sigmask(SIGILL);
542		p->p_sigignore &= ~sig;
543		p->p_sigcatch &= ~sig;
544		p->p_sigmask &= ~sig;
545		psignal(p, SIGILL);
546		return;
547	}
548
549	/*
550	 * Build the argument list for the signal handler.
551	 */
552	if (p->p_sysent->sv_sigtbl) {
553		if (sig < p->p_sysent->sv_sigsize)
554			sig = p->p_sysent->sv_sigtbl[sig];
555		else
556			sig = p->p_sysent->sv_sigsize + 1;
557	}
558	fp->sf_signum = sig;
559	fp->sf_code = code;
560	fp->sf_scp = &fp->sf_sc;
561	fp->sf_addr = (char *) regs[tERR];
562	fp->sf_handler = catcher;
563
564	/* save scratch registers */
565	fp->sf_sc.sc_eax = regs[tEAX];
566	fp->sf_sc.sc_ebx = regs[tEBX];
567	fp->sf_sc.sc_ecx = regs[tECX];
568	fp->sf_sc.sc_edx = regs[tEDX];
569	fp->sf_sc.sc_esi = regs[tESI];
570	fp->sf_sc.sc_edi = regs[tEDI];
571	fp->sf_sc.sc_cs = regs[tCS];
572	fp->sf_sc.sc_ds = regs[tDS];
573	fp->sf_sc.sc_ss = regs[tSS];
574	fp->sf_sc.sc_es = regs[tES];
575	fp->sf_sc.sc_isp = regs[tISP];
576
577	/*
578	 * Build the signal context to be used by sigreturn.
579	 */
580	fp->sf_sc.sc_onstack = oonstack;
581	fp->sf_sc.sc_mask = mask;
582	fp->sf_sc.sc_sp = regs[tESP];
583	fp->sf_sc.sc_fp = regs[tEBP];
584	fp->sf_sc.sc_pc = regs[tEIP];
585	fp->sf_sc.sc_ps = regs[tEFLAGS];
586	regs[tESP] = (int)fp;
587	regs[tEIP] = (int)((struct pcb *)kstack)->pcb_sigc;
588	regs[tEFLAGS] &= ~PSL_VM;
589	regs[tCS] = _ucodesel;
590	regs[tDS] = _udatasel;
591	regs[tES] = _udatasel;
592	regs[tSS] = _udatasel;
593}
594
595/*
596 * System call to cleanup state after a signal
597 * has been taken.  Reset signal mask and
598 * stack state from context left by sendsig (above).
599 * Return to previous pc and psl as specified by
600 * context left by sendsig. Check carefully to
601 * make sure that the user has not modified the
602 * state to gain improper privileges.
603 */
604struct sigreturn_args {
605	struct sigcontext *sigcntxp;
606};
607
608int
609sigreturn(p, uap, retval)
610	struct proc *p;
611	struct sigreturn_args *uap;
612	int *retval;
613{
614	register struct sigcontext *scp;
615	register struct sigframe *fp;
616	register int *regs = p->p_md.md_regs;
617	int eflags;
618
619	/*
620	 * (XXX old comment) regs[tESP] points to the return address.
621	 * The user scp pointer is above that.
622	 * The return address is faked in the signal trampoline code
623	 * for consistency.
624	 */
625	scp = uap->sigcntxp;
626	fp = (struct sigframe *)
627	     ((caddr_t)scp - offsetof(struct sigframe, sf_sc));
628
629	if (useracc((caddr_t)fp, sizeof (*fp), 0) == 0)
630		return(EINVAL);
631
632	/*
633	 * Don't allow users to change privileged or reserved flags.
634	 */
635#define	EFLAGS_SECURE(ef, oef)	((((ef) ^ (oef)) & ~PSL_USERCHANGE) == 0)
636	eflags = scp->sc_ps;
637	/*
638	 * XXX do allow users to change the privileged flag PSL_RF.  The
639	 * cpu sets PSL_RF in tf_eflags for faults.  Debuggers should
640	 * sometimes set it there too.  tf_eflags is kept in the signal
641	 * context during signal handling and there is no other place
642	 * to remember it, so the PSL_RF bit may be corrupted by the
643	 * signal handler without us knowing.  Corruption of the PSL_RF
644	 * bit at worst causes one more or one less debugger trap, so
645	 * allowing it is fairly harmless.
646	 */
647	if (!EFLAGS_SECURE(eflags & ~PSL_RF, regs[tEFLAGS] & ~PSL_RF)) {
648#ifdef DEBUG
649    		printf("sigreturn: eflags = 0x%x\n", eflags);
650#endif
651    		return(EINVAL);
652	}
653
654	/*
655	 * Don't allow users to load a valid privileged %cs.  Let the
656	 * hardware check for invalid selectors, excess privilege in
657	 * other selectors, invalid %eip's and invalid %esp's.
658	 */
659#define	CS_SECURE(cs)	(ISPL(cs) == SEL_UPL)
660	if (!CS_SECURE(scp->sc_cs)) {
661#ifdef DEBUG
662    		printf("sigreturn: cs = 0x%x\n", scp->sc_cs);
663#endif
664		trapsignal(p, SIGBUS, T_PROTFLT);
665		return(EINVAL);
666	}
667
668	/* restore scratch registers */
669	regs[tEAX] = scp->sc_eax;
670	regs[tEBX] = scp->sc_ebx;
671	regs[tECX] = scp->sc_ecx;
672	regs[tEDX] = scp->sc_edx;
673	regs[tESI] = scp->sc_esi;
674	regs[tEDI] = scp->sc_edi;
675	regs[tCS] = scp->sc_cs;
676	regs[tDS] = scp->sc_ds;
677	regs[tES] = scp->sc_es;
678	regs[tSS] = scp->sc_ss;
679	regs[tISP] = scp->sc_isp;
680
681	if (useracc((caddr_t)scp, sizeof (*scp), 0) == 0)
682		return(EINVAL);
683
684	if (scp->sc_onstack & 01)
685		p->p_sigacts->ps_sigstk.ss_flags |= SA_ONSTACK;
686	else
687		p->p_sigacts->ps_sigstk.ss_flags &= ~SA_ONSTACK;
688	p->p_sigmask = scp->sc_mask &~
689	    (sigmask(SIGKILL)|sigmask(SIGCONT)|sigmask(SIGSTOP));
690	regs[tEBP] = scp->sc_fp;
691	regs[tESP] = scp->sc_sp;
692	regs[tEIP] = scp->sc_pc;
693	regs[tEFLAGS] = eflags;
694	return(EJUSTRETURN);
695}
696
697/*
698 * a simple function to make the system panic (and dump a vmcore)
699 * in a predictable fashion
700 */
701void diediedie()
702{
703	panic("because you said to!");
704}
705
706int	waittime = -1;
707struct pcb dumppcb;
708
709__dead void
710boot(arghowto)
711	int arghowto;
712{
713	register long dummy;		/* r12 is reserved */
714	register int howto;		/* r11 == how to boot */
715	register int devtype;		/* r10 == major of root dev */
716	extern int cold;
717
718	if (cold) {
719		printf("hit reset please");
720		for(;;);
721	}
722	howto = arghowto;
723	if ((howto&RB_NOSYNC) == 0 && waittime < 0) {
724		register struct buf *bp;
725		int iter, nbusy;
726
727		waittime = 0;
728		printf("\nsyncing disks... ");
729		/*
730		 * Release inodes held by texts before update.
731		 */
732		if (panicstr == 0)
733			vnode_pager_umount(NULL);
734		sync(curproc, NULL, NULL);
735
736		for (iter = 0; iter < 20; iter++) {
737			nbusy = 0;
738			for (bp = &buf[nbuf]; --bp >= buf; ) {
739				if ((bp->b_flags & (B_BUSY|B_INVAL)) == B_BUSY) {
740					nbusy++;
741				}
742			}
743			if (nbusy == 0)
744				break;
745			printf("%d ", nbusy);
746			DELAY(40000 * iter);
747		}
748		if (nbusy) {
749			/*
750			 * Failed to sync all blocks. Indicate this and don't
751			 * unmount filesystems (thus forcing an fsck on reboot).
752			 */
753			printf("giving up\n");
754		} else {
755			printf("done\n");
756			/*
757			 * Unmount filesystems
758			 */
759			if (panicstr == 0)
760				vfs_unmountall();
761		}
762		DELAY(100000);			/* wait for console output to finish */
763	}
764	splhigh();
765	devtype = major(rootdev);
766	if (howto&RB_HALT) {
767		printf("\n");
768		printf("The operating system has halted.\n");
769		printf("Please press any key to reboot.\n\n");
770		cngetc();
771	} else {
772		if (howto & RB_DUMP) {
773			savectx(&dumppcb, 0);
774			dumppcb.pcb_ptd = rcr3();
775			dumpsys();
776
777			if (PANIC_REBOOT_WAIT_TIME != 0) {
778				if (PANIC_REBOOT_WAIT_TIME != -1) {
779					int loop;
780					printf("Automatic reboot in %d seconds - press a key on the console to abort\n",
781						PANIC_REBOOT_WAIT_TIME);
782					for (loop = PANIC_REBOOT_WAIT_TIME; loop > 0; --loop) {
783						DELAY(1000 * 1000); /* one second */
784						if (cncheckc()) /* Did user type a key? */
785							break;
786					}
787					if (!loop)
788						goto die;
789				}
790			} else { /* zero time specified - reboot NOW */
791				goto die;
792			}
793			printf("--> Press a key on the console to reboot <--\n");
794			cngetc();
795		}
796	}
797#ifdef lint
798	dummy = 0; dummy = dummy;
799	printf("howto %d, devtype %d\n", arghowto, devtype);
800#endif
801die:
802	printf("Rebooting...\n");
803	DELAY(1000000);	/* wait 1 sec for printf's to complete and be read */
804	cpu_reset();
805	for(;;) ;
806	/* NOTREACHED */
807}
808
809unsigned long	dumpmag = 0x8fca0101UL;	/* magic number for savecore */
810int		dumpsize = 0;		/* also for savecore */
811
812#ifdef DODUMP
813int		dodump = 1;
814#else
815int		dodump = 0;
816#endif
817/*
818 * Doadump comes here after turning off memory management and
819 * getting on the dump stack, either when called above, or by
820 * the auto-restart code.
821 */
822void
823dumpsys()
824{
825
826	if (!dodump)
827		return;
828	if (dumpdev == NODEV)
829		return;
830	if ((minor(dumpdev)&07) != 1)
831		return;
832	dumpsize = Maxmem;
833	printf("\ndumping to dev %lx, offset %ld\n", dumpdev, dumplo);
834	printf("dump ");
835	switch ((*bdevsw[major(dumpdev)].d_dump)(dumpdev)) {
836
837	case ENXIO:
838		printf("device bad\n");
839		break;
840
841	case EFAULT:
842		printf("device not ready\n");
843		break;
844
845	case EINVAL:
846		printf("area improper\n");
847		break;
848
849	case EIO:
850		printf("i/o error\n");
851		break;
852
853	case EINTR:
854		printf("aborted from console\n");
855		break;
856
857	default:
858		printf("succeeded\n");
859		break;
860	}
861}
862
863static void
864initcpu()
865{
866}
867
868/*
869 * Clear registers on exec
870 */
871void
872setregs(p, entry, stack)
873	struct proc *p;
874	u_long entry;
875	u_long stack;
876{
877	int *regs = p->p_md.md_regs;
878
879	bzero(regs, sizeof(struct trapframe));
880	regs[tEIP] = entry;
881	regs[tESP] = stack;
882	regs[tEFLAGS] = PSL_USER | (regs[tEFLAGS] & PSL_T);
883	regs[tSS] = _udatasel;
884	regs[tDS] = _udatasel;
885	regs[tES] = _udatasel;
886	regs[tCS] = _ucodesel;
887
888	p->p_addr->u_pcb.pcb_flags = 0;	/* no fp at all */
889	load_cr0(rcr0() | CR0_TS);	/* start emulating */
890#if	NNPX > 0
891	npxinit(__INITIAL_NPXCW__);
892#endif	/* NNPX > 0 */
893}
894
895/*
896 * machine dependent system variables.
897 */
898int
899cpu_sysctl(name, namelen, oldp, oldlenp, newp, newlen, p)
900	int *name;
901	u_int namelen;
902	void *oldp;
903	size_t *oldlenp;
904	void *newp;
905	size_t newlen;
906	struct proc *p;
907{
908	int error;
909
910	/* all sysctl names at this level are terminal */
911	if (namelen != 1)
912		return (ENOTDIR);               /* overloaded */
913
914	switch (name[0]) {
915	case CPU_CONSDEV:
916		return (sysctl_rdstruct(oldp, oldlenp, newp, &cn_tty->t_dev,
917		   sizeof cn_tty->t_dev));
918	case CPU_ADJKERNTZ:
919		error = sysctl_int(oldp, oldlenp, newp, newlen, &adjkerntz);
920		if (!error && newp)
921			resettodr();
922		return error;
923	case CPU_DISRTCSET:
924		return (sysctl_int(oldp, oldlenp, newp,	newlen,	&disable_rtc_set));
925	default:
926		return (EOPNOTSUPP);
927	}
928	/* NOTREACHED */
929}
930
931/*
932 * Initialize 386 and configure to run kernel
933 */
934
935/*
936 * Initialize segments & interrupt table
937 */
938
939int currentldt;
940int _default_ldt;
941union descriptor gdt[NGDT];		/* global descriptor table */
942struct gate_descriptor idt[NIDT];	/* interrupt descriptor table */
943union descriptor ldt[NLDT];		/* local descriptor table */
944
945struct	i386tss	tss, panic_tss;
946
947extern  struct user *proc0paddr;
948
949/* software prototypes -- in more palatable form */
950struct soft_segment_descriptor gdt_segs[] = {
951/* GNULL_SEL	0 Null Descriptor */
952{	0x0,			/* segment base address  */
953	0x0,			/* length */
954	0,			/* segment type */
955	0,			/* segment descriptor priority level */
956	0,			/* segment descriptor present */
957	0, 0,
958	0,			/* default 32 vs 16 bit size */
959	0  			/* limit granularity (byte/page units)*/ },
960/* GCODE_SEL	1 Code Descriptor for kernel */
961{	0x0,			/* segment base address  */
962	0xfffff,		/* length - all address space */
963	SDT_MEMERA,		/* segment type */
964	0,			/* segment descriptor priority level */
965	1,			/* segment descriptor present */
966	0, 0,
967	1,			/* default 32 vs 16 bit size */
968	1  			/* limit granularity (byte/page units)*/ },
969/* GDATA_SEL	2 Data Descriptor for kernel */
970{	0x0,			/* segment base address  */
971	0xfffff,		/* length - all address space */
972	SDT_MEMRWA,		/* segment type */
973	0,			/* segment descriptor priority level */
974	1,			/* segment descriptor present */
975	0, 0,
976	1,			/* default 32 vs 16 bit size */
977	1  			/* limit granularity (byte/page units)*/ },
978/* GLDT_SEL	3 LDT Descriptor */
979{	(int) ldt,		/* segment base address  */
980	sizeof(ldt)-1,		/* length - all address space */
981	SDT_SYSLDT,		/* segment type */
982	0,			/* segment descriptor priority level */
983	1,			/* segment descriptor present */
984	0, 0,
985	0,			/* unused - default 32 vs 16 bit size */
986	0  			/* limit granularity (byte/page units)*/ },
987/* GTGATE_SEL	4 Null Descriptor - Placeholder */
988{	0x0,			/* segment base address  */
989	0x0,			/* length - all address space */
990	0,			/* segment type */
991	0,			/* segment descriptor priority level */
992	0,			/* segment descriptor present */
993	0, 0,
994	0,			/* default 32 vs 16 bit size */
995	0  			/* limit granularity (byte/page units)*/ },
996/* GPANIC_SEL	5 Panic Tss Descriptor */
997{	(int) &panic_tss,	/* segment base address  */
998	sizeof(tss)-1,		/* length - all address space */
999	SDT_SYS386TSS,		/* segment type */
1000	0,			/* segment descriptor priority level */
1001	1,			/* segment descriptor present */
1002	0, 0,
1003	0,			/* unused - default 32 vs 16 bit size */
1004	0  			/* limit granularity (byte/page units)*/ },
1005/* GPROC0_SEL	6 Proc 0 Tss Descriptor */
1006{	(int) kstack,		/* segment base address  */
1007	sizeof(tss)-1,		/* length - all address space */
1008	SDT_SYS386TSS,		/* segment type */
1009	0,			/* segment descriptor priority level */
1010	1,			/* segment descriptor present */
1011	0, 0,
1012	0,			/* unused - default 32 vs 16 bit size */
1013	0  			/* limit granularity (byte/page units)*/ },
1014/* GUSERLDT_SEL	7 User LDT Descriptor per process */
1015{	(int) ldt,		/* segment base address  */
1016	(512 * sizeof(union descriptor)-1),		/* length */
1017	SDT_SYSLDT,		/* segment type */
1018	0,			/* segment descriptor priority level */
1019	1,			/* segment descriptor present */
1020	0, 0,
1021	0,			/* unused - default 32 vs 16 bit size */
1022	0  			/* limit granularity (byte/page units)*/ },
1023/* GAPMCODE32_SEL 8 APM BIOS 32-bit interface (32bit Code) */
1024{	0,			/* segment base address (overwritten by APM)  */
1025	0xfffff,		/* length */
1026	SDT_MEMERA,		/* segment type */
1027	0,			/* segment descriptor priority level */
1028	1,			/* segment descriptor present */
1029	0, 0,
1030	1,			/* default 32 vs 16 bit size */
1031	1  			/* limit granularity (byte/page units)*/ },
1032/* GAPMCODE16_SEL 9 APM BIOS 32-bit interface (16bit Code) */
1033{	0,			/* segment base address (overwritten by APM)  */
1034	0xfffff,		/* length */
1035	SDT_MEMERA,		/* segment type */
1036	0,			/* segment descriptor priority level */
1037	1,			/* segment descriptor present */
1038	0, 0,
1039	0,			/* default 32 vs 16 bit size */
1040	1  			/* limit granularity (byte/page units)*/ },
1041/* GAPMDATA_SEL	10 APM BIOS 32-bit interface (Data) */
1042{	0,			/* segment base address (overwritten by APM) */
1043	0xfffff,		/* length */
1044	SDT_MEMRWA,		/* segment type */
1045	0,			/* segment descriptor priority level */
1046	1,			/* segment descriptor present */
1047	0, 0,
1048	1,			/* default 32 vs 16 bit size */
1049	1  			/* limit granularity (byte/page units)*/ },
1050};
1051
1052struct soft_segment_descriptor ldt_segs[] = {
1053	/* Null Descriptor - overwritten by call gate */
1054{	0x0,			/* segment base address  */
1055	0x0,			/* length - all address space */
1056	0,			/* segment type */
1057	0,			/* segment descriptor priority level */
1058	0,			/* segment descriptor present */
1059	0, 0,
1060	0,			/* default 32 vs 16 bit size */
1061	0  			/* limit granularity (byte/page units)*/ },
1062	/* Null Descriptor - overwritten by call gate */
1063{	0x0,			/* segment base address  */
1064	0x0,			/* length - all address space */
1065	0,			/* segment type */
1066	0,			/* segment descriptor priority level */
1067	0,			/* segment descriptor present */
1068	0, 0,
1069	0,			/* default 32 vs 16 bit size */
1070	0  			/* limit granularity (byte/page units)*/ },
1071	/* Null Descriptor - overwritten by call gate */
1072{	0x0,			/* segment base address  */
1073	0x0,			/* length - all address space */
1074	0,			/* segment type */
1075	0,			/* segment descriptor priority level */
1076	0,			/* segment descriptor present */
1077	0, 0,
1078	0,			/* default 32 vs 16 bit size */
1079	0  			/* limit granularity (byte/page units)*/ },
1080	/* Code Descriptor for user */
1081{	0x0,			/* segment base address  */
1082	0xfffff,		/* length - all address space */
1083	SDT_MEMERA,		/* segment type */
1084	SEL_UPL,		/* segment descriptor priority level */
1085	1,			/* segment descriptor present */
1086	0, 0,
1087	1,			/* default 32 vs 16 bit size */
1088	1  			/* limit granularity (byte/page units)*/ },
1089	/* Data Descriptor for user */
1090{	0x0,			/* segment base address  */
1091	0xfffff,		/* length - all address space */
1092	SDT_MEMRWA,		/* segment type */
1093	SEL_UPL,		/* segment descriptor priority level */
1094	1,			/* segment descriptor present */
1095	0, 0,
1096	1,			/* default 32 vs 16 bit size */
1097	1  			/* limit granularity (byte/page units)*/ },
1098};
1099
1100void
1101setidt(idx, func, typ, dpl)
1102	int idx;
1103	inthand_t *func;
1104	int typ;
1105	int dpl;
1106{
1107	struct gate_descriptor *ip = idt + idx;
1108
1109	ip->gd_looffset = (int)func;
1110	ip->gd_selector = 8;
1111	ip->gd_stkcpy = 0;
1112	ip->gd_xx = 0;
1113	ip->gd_type = typ;
1114	ip->gd_dpl = dpl;
1115	ip->gd_p = 1;
1116	ip->gd_hioffset = ((int)func)>>16 ;
1117}
1118
1119#define	IDTVEC(name)	__CONCAT(X,name)
1120
1121extern inthand_t
1122	IDTVEC(div), IDTVEC(dbg), IDTVEC(nmi), IDTVEC(bpt), IDTVEC(ofl),
1123	IDTVEC(bnd), IDTVEC(ill), IDTVEC(dna), IDTVEC(dble), IDTVEC(fpusegm),
1124	IDTVEC(tss), IDTVEC(missing), IDTVEC(stk), IDTVEC(prot),
1125	IDTVEC(page), IDTVEC(rsvd), IDTVEC(fpu), IDTVEC(align),
1126	IDTVEC(syscall);
1127
1128void
1129sdtossd(sd, ssd)
1130	struct segment_descriptor *sd;
1131	struct soft_segment_descriptor *ssd;
1132{
1133	ssd->ssd_base  = (sd->sd_hibase << 24) | sd->sd_lobase;
1134	ssd->ssd_limit = (sd->sd_hilimit << 16) | sd->sd_lolimit;
1135	ssd->ssd_type  = sd->sd_type;
1136	ssd->ssd_dpl   = sd->sd_dpl;
1137	ssd->ssd_p     = sd->sd_p;
1138	ssd->ssd_def32 = sd->sd_def32;
1139	ssd->ssd_gran  = sd->sd_gran;
1140}
1141
1142void
1143init386(first)
1144	int first;
1145{
1146	extern char etext[];
1147	int x;
1148	unsigned biosbasemem, biosextmem;
1149	struct gate_descriptor *gdp;
1150	int gsel_tss;
1151	extern int sigcode,szsigcode;
1152	/* table descriptors - used to load tables by microp */
1153	struct region_descriptor r_gdt, r_idt;
1154	int	pagesinbase, pagesinext;
1155	int	target_page;
1156	extern struct pte *CMAP1;
1157	extern caddr_t CADDR1;
1158
1159	proc0.p_addr = proc0paddr;
1160
1161	/*
1162	 * Initialize the console before we print anything out.
1163	 */
1164
1165	cninit ();
1166
1167	/*
1168	 * make gdt memory segments, the code segment goes up to end of the
1169	 * page with etext in it, the data segment goes to the end of
1170	 * the address space
1171	 */
1172	/*
1173	 * XXX text protection is temporarily (?) disabled.  The limit was
1174	 * i386_btop(i386_round_page(etext)) - 1.
1175	 */
1176	gdt_segs[GCODE_SEL].ssd_limit = i386_btop(0) - 1;
1177	gdt_segs[GDATA_SEL].ssd_limit = i386_btop(0) - 1;
1178	for (x = 0; x < NGDT; x++)
1179		ssdtosd(&gdt_segs[x], &gdt[x].sd);
1180
1181	/* make ldt memory segments */
1182	/*
1183	 * The data segment limit must not cover the user area because we
1184	 * don't want the user area to be writable in copyout() etc. (page
1185	 * level protection is lost in kernel mode on 386's).  Also, we
1186	 * don't want the user area to be writable directly (page level
1187	 * protection of the user area is not available on 486's with
1188	 * CR0_WP set, because there is no user-read/kernel-write mode).
1189	 *
1190	 * XXX - VM_MAXUSER_ADDRESS is an end address, not a max.  And it
1191	 * should be spelled ...MAX_USER...
1192	 */
1193#define VM_END_USER_RW_ADDRESS	VM_MAXUSER_ADDRESS
1194	/*
1195	 * The code segment limit has to cover the user area until we move
1196	 * the signal trampoline out of the user area.  This is safe because
1197	 * the code segment cannot be written to directly.
1198	 */
1199#define VM_END_USER_R_ADDRESS	(VM_END_USER_RW_ADDRESS + UPAGES * NBPG)
1200	ldt_segs[LUCODE_SEL].ssd_limit = i386_btop(VM_END_USER_R_ADDRESS) - 1;
1201	ldt_segs[LUDATA_SEL].ssd_limit = i386_btop(VM_END_USER_RW_ADDRESS) - 1;
1202	/* Note. eventually want private ldts per process */
1203	for (x = 0; x < NLDT; x++)
1204		ssdtosd(&ldt_segs[x], &ldt[x].sd);
1205
1206	/* exceptions */
1207	for (x = 0; x < NIDT; x++)
1208		setidt(x, &IDTVEC(rsvd), SDT_SYS386TGT, SEL_KPL);
1209	setidt(0, &IDTVEC(div),  SDT_SYS386TGT, SEL_KPL);
1210	setidt(1, &IDTVEC(dbg),  SDT_SYS386TGT, SEL_KPL);
1211	setidt(2, &IDTVEC(nmi),  SDT_SYS386TGT, SEL_KPL);
1212 	setidt(3, &IDTVEC(bpt),  SDT_SYS386TGT, SEL_UPL);
1213	setidt(4, &IDTVEC(ofl),  SDT_SYS386TGT, SEL_UPL);
1214	setidt(5, &IDTVEC(bnd),  SDT_SYS386TGT, SEL_KPL);
1215	setidt(6, &IDTVEC(ill),  SDT_SYS386TGT, SEL_KPL);
1216	setidt(7, &IDTVEC(dna),  SDT_SYS386TGT, SEL_KPL);
1217	setidt(8, &IDTVEC(dble),  SDT_SYS386TGT, SEL_KPL);
1218	setidt(9, &IDTVEC(fpusegm),  SDT_SYS386TGT, SEL_KPL);
1219	setidt(10, &IDTVEC(tss),  SDT_SYS386TGT, SEL_KPL);
1220	setidt(11, &IDTVEC(missing),  SDT_SYS386TGT, SEL_KPL);
1221	setidt(12, &IDTVEC(stk),  SDT_SYS386TGT, SEL_KPL);
1222	setidt(13, &IDTVEC(prot),  SDT_SYS386TGT, SEL_KPL);
1223	setidt(14, &IDTVEC(page),  SDT_SYS386TGT, SEL_KPL);
1224	setidt(15, &IDTVEC(rsvd),  SDT_SYS386TGT, SEL_KPL);
1225	setidt(16, &IDTVEC(fpu),  SDT_SYS386TGT, SEL_KPL);
1226	setidt(17, &IDTVEC(align), SDT_SYS386TGT, SEL_KPL);
1227
1228#include	"isa.h"
1229#if	NISA >0
1230	isa_defaultirq();
1231#endif
1232
1233	r_gdt.rd_limit = sizeof(gdt) - 1;
1234	r_gdt.rd_base =  (int) gdt;
1235	lgdt(&r_gdt);
1236
1237	r_idt.rd_limit = sizeof(idt) - 1;
1238	r_idt.rd_base = (int) idt;
1239	lidt(&r_idt);
1240
1241	_default_ldt = GSEL(GLDT_SEL, SEL_KPL);
1242	lldt(_default_ldt);
1243	currentldt = _default_ldt;
1244
1245#ifdef DDB
1246	kdb_init();
1247	if (boothowto & RB_KDB)
1248		Debugger("Boot flags requested debugger");
1249#endif
1250
1251	/* Use BIOS values stored in RTC CMOS RAM, since probing
1252	 * breaks certain 386 AT relics.
1253	 */
1254	biosbasemem = rtcin(RTC_BASELO)+ (rtcin(RTC_BASEHI)<<8);
1255	biosextmem = rtcin(RTC_EXTLO)+ (rtcin(RTC_EXTHI)<<8);
1256
1257	/*
1258	 * Print a warning if the official BIOS interface disagrees
1259	 * with the hackish interface used above.  Eventually only
1260	 * the official interface should be used.
1261	 */
1262	if (bootinfo.bi_memsizes_valid) {
1263		if (bootinfo.bi_basemem != biosbasemem)
1264			printf("BIOS basemem (%dK) != RTC basemem (%dK)\n",
1265			       bootinfo.bi_basemem, biosbasemem);
1266		if (bootinfo.bi_extmem != biosextmem)
1267			printf("BIOS extmem (%dK) != RTC extmem (%dK)\n",
1268			       bootinfo.bi_extmem, biosextmem);
1269	}
1270
1271	/*
1272	 * If BIOS tells us that it has more than 640k in the basemem,
1273	 *	don't believe it - set it to 640k.
1274	 */
1275	if (biosbasemem > 640)
1276		biosbasemem = 640;
1277
1278	/*
1279	 * Some 386 machines might give us a bogus number for extended
1280	 *	mem. If this happens, stop now.
1281	 */
1282#ifndef LARGEMEM
1283	if (biosextmem > 65536) {
1284		panic("extended memory beyond limit of 64MB");
1285		/* NOTREACHED */
1286	}
1287#endif
1288
1289	pagesinbase = biosbasemem * 1024 / NBPG;
1290	pagesinext = biosextmem * 1024 / NBPG;
1291
1292	/*
1293	 * Special hack for chipsets that still remap the 384k hole when
1294	 *	there's 16MB of memory - this really confuses people that
1295	 *	are trying to use bus mastering ISA controllers with the
1296	 *	"16MB limit"; they only have 16MB, but the remapping puts
1297	 *	them beyond the limit.
1298	 */
1299	/*
1300	 * If extended memory is between 15-16MB (16-17MB phys address range),
1301	 *	chop it to 15MB.
1302	 */
1303	if ((pagesinext > 3840) && (pagesinext < 4096))
1304		pagesinext = 3840;
1305
1306	/*
1307	 * Maxmem isn't the "maximum memory", it's one larger than the
1308	 * highest page of of the physical address space. It should be
1309	 * called something like "Maxphyspage".
1310	 */
1311	Maxmem = pagesinext + 0x100000/PAGE_SIZE;
1312
1313#ifdef MAXMEM
1314	Maxmem = MAXMEM/4;
1315#endif
1316	/*
1317	 * Calculate number of physical pages, but account for Maxmem
1318	 *	adjustment above.
1319	 */
1320	physmem = pagesinbase + Maxmem - 0x100000/PAGE_SIZE;
1321
1322	/* call pmap initialization to make new kernel address space */
1323	pmap_bootstrap (first, 0);
1324
1325	/*
1326	 * Do a quick, non-destructive check over extended memory to verify
1327	 * what the BIOS tells us agrees with reality. Adjust down Maxmem
1328	 * if we find that the page can't be correctly written to/read from.
1329	 */
1330
1331	for (target_page = Maxmem - 1; target_page >= atop(first); target_page--) {
1332		int tmp;
1333
1334		/*
1335		 * map page into kernel: valid, read/write, non-cacheable
1336		 */
1337		*(int *)CMAP1 = PG_V | PG_KW | PG_N | ptoa(target_page);
1338		pmap_update();
1339
1340		tmp = *(int *)CADDR1;
1341		/*
1342		 * Test for alternating 1's and 0's
1343		 */
1344		*(int *)CADDR1 = 0xaaaaaaaa;
1345		if (*(int *)CADDR1 != 0xaaaaaaaa) {
1346			Maxmem = target_page;
1347			badpages++;
1348			continue;
1349		}
1350		/*
1351		 * Test for alternating 0's and 1's
1352		 */
1353		*(int *)CADDR1 = 0x55555555;
1354		if (*(int *)CADDR1 != 0x55555555) {
1355			Maxmem = target_page;
1356			badpages++;
1357			continue;
1358		}
1359		/*
1360		 * Test for all 1's
1361		 */
1362		*(int *)CADDR1 = 0xffffffff;
1363		if (*(int *)CADDR1 != 0xffffffff) {
1364			Maxmem = target_page;
1365			badpages++;
1366			continue;
1367		}
1368		/*
1369		 * Test for all 0's
1370		 */
1371		*(int *)CADDR1 = 0x0;
1372		if (*(int *)CADDR1 != 0x0) {
1373			/*
1374			 * test of page failed
1375			 */
1376			Maxmem = target_page;
1377			badpages++;
1378			continue;
1379		}
1380		*(int *)CADDR1 = tmp;
1381	}
1382	if (badpages != 0)
1383		printf("WARNING: BIOS extended memory size and reality don't agree.\n");
1384
1385	*(int *)CMAP1 = 0;
1386	pmap_update();
1387
1388	avail_end = (Maxmem << PAGE_SHIFT)
1389		    - i386_round_page(sizeof(struct msgbuf));
1390
1391	/*
1392	 * Initialize pointers to the two chunks of memory; for use
1393	 *	later in vm_page_startup.
1394	 */
1395	/* avail_start is initialized in pmap_bootstrap */
1396	x = 0;
1397	if (pagesinbase > 1) {
1398		phys_avail[x++] = NBPG;		/* skip first page of memory */
1399		phys_avail[x++] = pagesinbase * NBPG;	/* memory up to the ISA hole */
1400	}
1401	phys_avail[x++] = avail_start;	/* memory up to the end */
1402	phys_avail[x++] = avail_end;
1403	phys_avail[x++] = 0;		/* no more chunks */
1404	phys_avail[x++] = 0;
1405
1406	/* now running on new page tables, configured,and u/iom is accessible */
1407
1408	/* make a initial tss so microp can get interrupt stack on syscall! */
1409	proc0.p_addr->u_pcb.pcb_tss.tss_esp0 = (int) kstack + UPAGES*NBPG;
1410	proc0.p_addr->u_pcb.pcb_tss.tss_ss0 = GSEL(GDATA_SEL, SEL_KPL) ;
1411	gsel_tss = GSEL(GPROC0_SEL, SEL_KPL);
1412
1413	((struct i386tss *)gdt_segs[GPROC0_SEL].ssd_base)->tss_ioopt =
1414		(sizeof(tss))<<16;
1415
1416	ltr(gsel_tss);
1417
1418	/* make a call gate to reenter kernel with */
1419	gdp = &ldt[LSYS5CALLS_SEL].gd;
1420
1421	x = (int) &IDTVEC(syscall);
1422	gdp->gd_looffset = x++;
1423	gdp->gd_selector = GSEL(GCODE_SEL,SEL_KPL);
1424	gdp->gd_stkcpy = 1;
1425	gdp->gd_type = SDT_SYS386CGT;
1426	gdp->gd_dpl = SEL_UPL;
1427	gdp->gd_p = 1;
1428	gdp->gd_hioffset = ((int) &IDTVEC(syscall)) >>16;
1429
1430	/* transfer to user mode */
1431
1432	_ucodesel = LSEL(LUCODE_SEL, SEL_UPL);
1433	_udatasel = LSEL(LUDATA_SEL, SEL_UPL);
1434
1435	/* setup proc 0's pcb */
1436	bcopy(&sigcode, proc0.p_addr->u_pcb.pcb_sigc, szsigcode);
1437	proc0.p_addr->u_pcb.pcb_flags = 0;
1438	proc0.p_addr->u_pcb.pcb_ptd = IdlePTD;
1439}
1440
1441/*
1442 * The registers are in the frame; the frame is in the user area of
1443 * the process in question; when the process is active, the registers
1444 * are in "the kernel stack"; when it's not, they're still there, but
1445 * things get flipped around.  So, since p->p_md.md_regs is the whole address
1446 * of the register set, take its offset from the kernel stack, and
1447 * index into the user block.  Don't you just *love* virtual memory?
1448 * (I'm starting to think seymour is right...)
1449 */
1450#define	TF_REGP(p)	((struct trapframe *) \
1451			 ((char *)(p)->p_addr \
1452			  + ((char *)(p)->p_md.md_regs - kstack)))
1453
1454int
1455ptrace_set_pc(p, addr)
1456	struct proc *p;
1457	unsigned int addr;
1458{
1459	TF_REGP(p)->tf_eip = addr;
1460	return (0);
1461}
1462
1463int
1464ptrace_single_step(p)
1465	struct proc *p;
1466{
1467	TF_REGP(p)->tf_eflags |= PSL_T;
1468	return (0);
1469}
1470
1471int
1472ptrace_getregs(p, addr)
1473	struct proc *p;
1474	unsigned int *addr;
1475{
1476	int error;
1477	struct reg regs;
1478
1479	error = fill_regs(p, &regs);
1480	if (error)
1481		return (error);
1482	return (copyout(&regs, addr, sizeof regs));
1483}
1484
1485int
1486ptrace_setregs(p, addr)
1487	struct proc *p;
1488	unsigned int *addr;
1489{
1490	int error;
1491	struct reg regs;
1492
1493	error = copyin(addr, &regs, sizeof regs);
1494	if (error)
1495		return (error);
1496	return (set_regs(p, &regs));
1497}
1498
1499int ptrace_write_u(p, off, data)
1500	struct proc *p;
1501	vm_offset_t off;
1502	int data;
1503{
1504	struct trapframe frame_copy;
1505	vm_offset_t min;
1506	struct trapframe *tp;
1507
1508	/*
1509	 * Privileged kernel state is scattered all over the user area.
1510	 * Only allow write access to parts of regs and to fpregs.
1511	 */
1512	min = (char *)p->p_md.md_regs - kstack;
1513	if (off >= min && off <= min + sizeof(struct trapframe) - sizeof(int)) {
1514		tp = TF_REGP(p);
1515		frame_copy = *tp;
1516		*(int *)((char *)&frame_copy + (off - min)) = data;
1517		if (!EFLAGS_SECURE(frame_copy.tf_eflags, tp->tf_eflags) ||
1518		    !CS_SECURE(frame_copy.tf_cs))
1519			return (EINVAL);
1520		*(int*)((char *)p->p_addr + off) = data;
1521		return (0);
1522	}
1523	min = offsetof(struct user, u_pcb) + offsetof(struct pcb, pcb_savefpu);
1524	if (off >= min && off <= min + sizeof(struct save87) - sizeof(int)) {
1525		*(int*)((char *)p->p_addr + off) = data;
1526		return (0);
1527	}
1528	return (EFAULT);
1529}
1530
1531int
1532fill_regs(p, regs)
1533	struct proc *p;
1534	struct reg *regs;
1535{
1536	struct trapframe *tp;
1537
1538	tp = TF_REGP(p);
1539	regs->r_es = tp->tf_es;
1540	regs->r_ds = tp->tf_ds;
1541	regs->r_edi = tp->tf_edi;
1542	regs->r_esi = tp->tf_esi;
1543	regs->r_ebp = tp->tf_ebp;
1544	regs->r_ebx = tp->tf_ebx;
1545	regs->r_edx = tp->tf_edx;
1546	regs->r_ecx = tp->tf_ecx;
1547	regs->r_eax = tp->tf_eax;
1548	regs->r_eip = tp->tf_eip;
1549	regs->r_cs = tp->tf_cs;
1550	regs->r_eflags = tp->tf_eflags;
1551	regs->r_esp = tp->tf_esp;
1552	regs->r_ss = tp->tf_ss;
1553	return (0);
1554}
1555
1556int
1557set_regs(p, regs)
1558	struct proc *p;
1559	struct reg *regs;
1560{
1561	struct trapframe *tp;
1562
1563	tp = TF_REGP(p);
1564	if (!EFLAGS_SECURE(regs->r_eflags, tp->tf_eflags) ||
1565	    !CS_SECURE(regs->r_cs))
1566		return (EINVAL);
1567	tp->tf_es = regs->r_es;
1568	tp->tf_ds = regs->r_ds;
1569	tp->tf_edi = regs->r_edi;
1570	tp->tf_esi = regs->r_esi;
1571	tp->tf_ebp = regs->r_ebp;
1572	tp->tf_ebx = regs->r_ebx;
1573	tp->tf_edx = regs->r_edx;
1574	tp->tf_ecx = regs->r_ecx;
1575	tp->tf_eax = regs->r_eax;
1576	tp->tf_eip = regs->r_eip;
1577	tp->tf_cs = regs->r_cs;
1578	tp->tf_eflags = regs->r_eflags;
1579	tp->tf_esp = regs->r_esp;
1580	tp->tf_ss = regs->r_ss;
1581	return (0);
1582}
1583
1584#ifndef DDB
1585void
1586Debugger(const char *msg)
1587{
1588	printf("Debugger(\"%s\") called.\n", msg);
1589}
1590#endif /* no DDB */
1591
1592#include <sys/disklabel.h>
1593#define b_cylin	b_resid
1594/*
1595 * Determine the size of the transfer, and make sure it is
1596 * within the boundaries of the partition. Adjust transfer
1597 * if needed, and signal errors or early completion.
1598 */
1599int
1600bounds_check_with_label(struct buf *bp, struct disklabel *lp, int wlabel)
1601{
1602        struct partition *p = lp->d_partitions + dkpart(bp->b_dev);
1603        int labelsect = lp->d_partitions[0].p_offset;
1604        int maxsz = p->p_size,
1605                sz = (bp->b_bcount + DEV_BSIZE - 1) >> DEV_BSHIFT;
1606
1607        /* overwriting disk label ? */
1608        /* XXX should also protect bootstrap in first 8K */
1609        if (bp->b_blkno + p->p_offset <= LABELSECTOR + labelsect &&
1610#if LABELSECTOR != 0
1611            bp->b_blkno + p->p_offset + sz > LABELSECTOR + labelsect &&
1612#endif
1613            (bp->b_flags & B_READ) == 0 && wlabel == 0) {
1614                bp->b_error = EROFS;
1615                goto bad;
1616        }
1617
1618#if     defined(DOSBBSECTOR) && defined(notyet)
1619        /* overwriting master boot record? */
1620        if (bp->b_blkno + p->p_offset <= DOSBBSECTOR &&
1621            (bp->b_flags & B_READ) == 0 && wlabel == 0) {
1622                bp->b_error = EROFS;
1623                goto bad;
1624        }
1625#endif
1626
1627        /* beyond partition? */
1628        if (bp->b_blkno < 0 || bp->b_blkno + sz > maxsz) {
1629                /* if exactly at end of disk, return an EOF */
1630                if (bp->b_blkno == maxsz) {
1631                        bp->b_resid = bp->b_bcount;
1632                        return(0);
1633                }
1634                /* or truncate if part of it fits */
1635                sz = maxsz - bp->b_blkno;
1636                if (sz <= 0) {
1637                        bp->b_error = EINVAL;
1638                        goto bad;
1639                }
1640                bp->b_bcount = sz << DEV_BSHIFT;
1641        }
1642
1643        /* calculate cylinder for disksort to order transfers with */
1644        bp->b_pblkno = bp->b_blkno + p->p_offset;
1645        bp->b_cylin = bp->b_pblkno / lp->d_secpercyl;
1646        return(1);
1647
1648bad:
1649        bp->b_flags |= B_ERROR;
1650        return(-1);
1651}
1652
1653int
1654disk_externalize(int drive, void *userp, size_t *maxlen)
1655{
1656	if(*maxlen < sizeof drive) {
1657		return ENOMEM;
1658	}
1659
1660	*maxlen -= sizeof drive;
1661	return copyout(&drive, userp, sizeof drive);
1662}
1663