machdep.c revision 17118
1/*- 2 * Copyright (c) 1992 Terrence R. Lambert. 3 * Copyright (c) 1982, 1987, 1990 The Regents of the University of California. 4 * All rights reserved. 5 * 6 * This code is derived from software contributed to Berkeley by 7 * William Jolitz. 8 * 9 * Redistribution and use in source and binary forms, with or without 10 * modification, are permitted provided that the following conditions 11 * are met: 12 * 1. Redistributions of source code must retain the above copyright 13 * notice, this list of conditions and the following disclaimer. 14 * 2. Redistributions in binary form must reproduce the above copyright 15 * notice, this list of conditions and the following disclaimer in the 16 * documentation and/or other materials provided with the distribution. 17 * 3. All advertising materials mentioning features or use of this software 18 * must display the following acknowledgement: 19 * This product includes software developed by the University of 20 * California, Berkeley and its contributors. 21 * 4. Neither the name of the University nor the names of its contributors 22 * may be used to endorse or promote products derived from this software 23 * without specific prior written permission. 24 * 25 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 26 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 27 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 28 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 29 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 30 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 31 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 32 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 33 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 34 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 35 * SUCH DAMAGE. 36 * 37 * from: @(#)machdep.c 7.4 (Berkeley) 6/3/91 38 * $Id: machdep.c,v 1.194 1996/07/08 19:44:39 wollman Exp $ 39 */ 40 41#include "npx.h" 42#include "opt_sysvipc.h" 43#include "opt_ddb.h" 44#include "opt_bounce.h" 45#include "opt_machdep.h" 46#include "opt_perfmon.h" 47 48#include <sys/param.h> 49#include <sys/systm.h> 50#include <sys/sysproto.h> 51#include <sys/signalvar.h> 52#include <sys/kernel.h> 53#include <sys/proc.h> 54#include <sys/buf.h> 55#include <sys/reboot.h> 56#include <sys/conf.h> 57#include <sys/file.h> 58#include <sys/callout.h> 59#include <sys/malloc.h> 60#include <sys/mbuf.h> 61#include <sys/mount.h> 62#include <sys/msgbuf.h> 63#include <sys/ioctl.h> 64#include <sys/sysent.h> 65#include <sys/tty.h> 66#include <sys/sysctl.h> 67#include <sys/devconf.h> 68#include <sys/vmmeter.h> 69 70#ifdef SYSVSHM 71#include <sys/shm.h> 72#endif 73 74#ifdef SYSVMSG 75#include <sys/msg.h> 76#endif 77 78#ifdef SYSVSEM 79#include <sys/sem.h> 80#endif 81 82#include <vm/vm.h> 83#include <vm/vm_param.h> 84#include <vm/vm_prot.h> 85#include <vm/lock.h> 86#include <vm/vm_kern.h> 87#include <vm/vm_object.h> 88#include <vm/vm_page.h> 89#include <vm/vm_map.h> 90#include <vm/vm_pager.h> 91#include <vm/vm_extern.h> 92 93#include <sys/user.h> 94#include <sys/exec.h> 95#include <sys/vnode.h> 96 97#include <ddb/ddb.h> 98 99#include <net/netisr.h> 100 101#include <machine/cpu.h> 102#include <machine/npx.h> 103#include <machine/reg.h> 104#include <machine/psl.h> 105#include <machine/clock.h> 106#include <machine/specialreg.h> 107#include <machine/sysarch.h> 108#include <machine/cons.h> 109#include <machine/devconf.h> 110#include <machine/bootinfo.h> 111#include <machine/md_var.h> 112#ifdef PERFMON 113#include <machine/perfmon.h> 114#endif 115 116#include <i386/isa/isa_device.h> 117#include <i386/isa/rtc.h> 118#include <machine/random.h> 119 120extern void init386 __P((int first)); 121extern int ptrace_set_pc __P((struct proc *p, unsigned int addr)); 122extern int ptrace_single_step __P((struct proc *p)); 123extern int ptrace_write_u __P((struct proc *p, vm_offset_t off, int data)); 124extern void dblfault_handler __P((void)); 125 126extern void identifycpu(void); /* XXX header file */ 127extern void earlysetcpuclass(void); /* same header file */ 128 129static void cpu_startup __P((void *)); 130SYSINIT(cpu, SI_SUB_CPU, SI_ORDER_FIRST, cpu_startup, NULL) 131 132 133#ifndef PANIC_REBOOT_WAIT_TIME 134#define PANIC_REBOOT_WAIT_TIME 15 /* default to 15 seconds */ 135#endif 136 137#ifdef BOUNCE_BUFFERS 138extern char *bouncememory; 139extern int maxbkva; 140#ifdef BOUNCEPAGES 141int bouncepages = BOUNCEPAGES; 142#else 143int bouncepages = 0; 144#endif 145#endif /* BOUNCE_BUFFERS */ 146 147extern int freebufspace; 148int msgbufmapped = 0; /* set when safe to use msgbuf */ 149int _udatasel, _ucodesel; 150u_int atdevbase; 151 152 153int physmem = 0; 154int cold = 1; 155 156static int 157sysctl_hw_physmem SYSCTL_HANDLER_ARGS 158{ 159 int error = sysctl_handle_int(oidp, 0, ctob(physmem), req); 160 return (error); 161} 162 163SYSCTL_PROC(_hw, HW_PHYSMEM, physmem, CTLTYPE_INT|CTLFLAG_RD, 164 0, 0, sysctl_hw_physmem, "I", ""); 165 166static int 167sysctl_hw_usermem SYSCTL_HANDLER_ARGS 168{ 169 int error = sysctl_handle_int(oidp, 0, 170 ctob(physmem - cnt.v_wire_count), req); 171 return (error); 172} 173 174SYSCTL_PROC(_hw, HW_USERMEM, usermem, CTLTYPE_INT|CTLFLAG_RD, 175 0, 0, sysctl_hw_usermem, "I", ""); 176 177int boothowto = 0, bootverbose = 0, Maxmem = 0; 178static int badpages = 0; 179long dumplo; 180extern int bootdev; 181 182vm_offset_t phys_avail[10]; 183 184/* must be 2 less so 0 0 can signal end of chunks */ 185#define PHYS_AVAIL_ARRAY_END ((sizeof(phys_avail) / sizeof(vm_offset_t)) - 2) 186 187static void dumpsys __P((void)); 188static void setup_netisrs __P((struct linker_set *)); /* XXX declare elsewhere */ 189 190static vm_offset_t buffer_sva, buffer_eva; 191vm_offset_t clean_sva, clean_eva; 192static vm_offset_t pager_sva, pager_eva; 193extern struct linker_set netisr_set; 194 195#define offsetof(type, member) ((size_t)(&((type *)0)->member)) 196 197static void 198cpu_startup(dummy) 199 void *dummy; 200{ 201 register unsigned i; 202 register caddr_t v; 203 vm_offset_t maxaddr; 204 vm_size_t size = 0; 205 int firstaddr; 206 vm_offset_t minaddr; 207 208 if (boothowto & RB_VERBOSE) 209 bootverbose++; 210 211 /* 212 * Initialize error message buffer (at end of core). 213 */ 214 215 /* avail_end was pre-decremented in init386() to compensate */ 216 for (i = 0; i < btoc(sizeof (struct msgbuf)); i++) 217 pmap_enter(pmap_kernel(), (vm_offset_t)msgbufp, 218 avail_end + i * PAGE_SIZE, 219 VM_PROT_ALL, TRUE); 220 msgbufmapped = 1; 221 222 /* 223 * Good {morning,afternoon,evening,night}. 224 */ 225 printf(version); 226 earlysetcpuclass(); 227 startrtclock(); 228 identifycpu(); 229#ifdef PERFMON 230 perfmon_init(); 231#endif 232 printf("real memory = %d (%dK bytes)\n", ptoa(Maxmem), ptoa(Maxmem) / 1024); 233 /* 234 * Display any holes after the first chunk of extended memory. 235 */ 236 if (badpages != 0) { 237 int indx = 1; 238 239 /* 240 * XXX skip reporting ISA hole & unmanaged kernel memory 241 */ 242 if (phys_avail[0] == PAGE_SIZE) 243 indx += 2; 244 245 printf("Physical memory hole(s):\n"); 246 for (; phys_avail[indx + 1] != 0; indx += 2) { 247 int size = phys_avail[indx + 1] - phys_avail[indx]; 248 249 printf("0x%08lx - 0x%08lx, %d bytes (%d pages)\n", phys_avail[indx], 250 phys_avail[indx + 1] - 1, size, size / PAGE_SIZE); 251 } 252 } 253 254 /* 255 * Quickly wire in netisrs. 256 */ 257 setup_netisrs(&netisr_set); 258 259/* 260#ifdef ISDN 261 DONET(isdnintr, NETISR_ISDN); 262#endif 263*/ 264 265 /* 266 * Allocate space for system data structures. 267 * The first available kernel virtual address is in "v". 268 * As pages of kernel virtual memory are allocated, "v" is incremented. 269 * As pages of memory are allocated and cleared, 270 * "firstaddr" is incremented. 271 * An index into the kernel page table corresponding to the 272 * virtual memory address maintained in "v" is kept in "mapaddr". 273 */ 274 275 /* 276 * Make two passes. The first pass calculates how much memory is 277 * needed and allocates it. The second pass assigns virtual 278 * addresses to the various data structures. 279 */ 280 firstaddr = 0; 281again: 282 v = (caddr_t)firstaddr; 283 284#define valloc(name, type, num) \ 285 (name) = (type *)v; v = (caddr_t)((name)+(num)) 286#define valloclim(name, type, num, lim) \ 287 (name) = (type *)v; v = (caddr_t)((lim) = ((name)+(num))) 288 valloc(callout, struct callout, ncallout); 289#ifdef SYSVSHM 290 valloc(shmsegs, struct shmid_ds, shminfo.shmmni); 291#endif 292#ifdef SYSVSEM 293 valloc(sema, struct semid_ds, seminfo.semmni); 294 valloc(sem, struct sem, seminfo.semmns); 295 /* This is pretty disgusting! */ 296 valloc(semu, int, (seminfo.semmnu * seminfo.semusz) / sizeof(int)); 297#endif 298#ifdef SYSVMSG 299 valloc(msgpool, char, msginfo.msgmax); 300 valloc(msgmaps, struct msgmap, msginfo.msgseg); 301 valloc(msghdrs, struct msg, msginfo.msgtql); 302 valloc(msqids, struct msqid_ds, msginfo.msgmni); 303#endif 304 305 if (nbuf == 0) { 306 nbuf = 30; 307 if( physmem > 1024) 308 nbuf += min((physmem - 1024) / 12, 1024); 309 } 310 nswbuf = min(nbuf, 128); 311 312 valloc(swbuf, struct buf, nswbuf); 313 valloc(buf, struct buf, nbuf); 314 315#ifdef BOUNCE_BUFFERS 316 /* 317 * If there is more than 16MB of memory, allocate some bounce buffers 318 */ 319 if (Maxmem > 4096) { 320 if (bouncepages == 0) { 321 bouncepages = 64; 322 bouncepages += ((Maxmem - 4096) / 2048) * 32; 323 } 324 v = (caddr_t)((vm_offset_t)round_page(v)); 325 valloc(bouncememory, char, bouncepages * PAGE_SIZE); 326 } 327#endif 328 329 /* 330 * End of first pass, size has been calculated so allocate memory 331 */ 332 if (firstaddr == 0) { 333 size = (vm_size_t)(v - firstaddr); 334 firstaddr = (int)kmem_alloc(kernel_map, round_page(size)); 335 if (firstaddr == 0) 336 panic("startup: no room for tables"); 337 goto again; 338 } 339 340 /* 341 * End of second pass, addresses have been assigned 342 */ 343 if ((vm_size_t)(v - firstaddr) != size) 344 panic("startup: table size inconsistency"); 345 346#ifdef BOUNCE_BUFFERS 347 clean_map = kmem_suballoc(kernel_map, &clean_sva, &clean_eva, 348 (nbuf*MAXBSIZE) + (nswbuf*MAXPHYS) + 349 maxbkva + pager_map_size, TRUE); 350 io_map = kmem_suballoc(clean_map, &minaddr, &maxaddr, maxbkva, FALSE); 351#else 352 clean_map = kmem_suballoc(kernel_map, &clean_sva, &clean_eva, 353 (nbuf*MAXBSIZE) + (nswbuf*MAXPHYS) + pager_map_size, TRUE); 354#endif 355 buffer_map = kmem_suballoc(clean_map, &buffer_sva, &buffer_eva, 356 (nbuf*MAXBSIZE), TRUE); 357 pager_map = kmem_suballoc(clean_map, &pager_sva, &pager_eva, 358 (nswbuf*MAXPHYS) + pager_map_size, TRUE); 359 exec_map = kmem_suballoc(kernel_map, &minaddr, &maxaddr, 360 (16*ARG_MAX), TRUE); 361 exech_map = kmem_suballoc(kernel_map, &minaddr, &maxaddr, 362 (32*ARG_MAX), TRUE); 363 u_map = kmem_suballoc(kernel_map, &minaddr, &maxaddr, 364 (maxproc*UPAGES*PAGE_SIZE), FALSE); 365 366 /* 367 * Finally, allocate mbuf pool. Since mclrefcnt is an off-size 368 * we use the more space efficient malloc in place of kmem_alloc. 369 */ 370 mclrefcnt = (char *)malloc(nmbclusters+PAGE_SIZE/MCLBYTES, 371 M_MBUF, M_NOWAIT); 372 bzero(mclrefcnt, nmbclusters+PAGE_SIZE/MCLBYTES); 373 mcl_map = kmem_suballoc(kmem_map, (vm_offset_t *)&mbutl, &maxaddr, 374 nmbclusters * MCLBYTES, FALSE); 375 { 376 vm_size_t mb_map_size; 377 mb_map_size = nmbufs * MSIZE; 378 mb_map = kmem_suballoc(kmem_map, &minaddr, &maxaddr, 379 round_page(mb_map_size), FALSE); 380 } 381 382 /* 383 * Initialize callouts 384 */ 385 callfree = callout; 386 for (i = 1; i < ncallout; i++) 387 callout[i-1].c_next = &callout[i]; 388 389 if (boothowto & RB_CONFIG) { 390 userconfig(); 391 cninit(); /* the preferred console may have changed */ 392 } 393 394#ifdef BOUNCE_BUFFERS 395 /* 396 * init bounce buffers 397 */ 398 vm_bounce_init(); 399#endif 400 401 printf("avail memory = %d (%dK bytes)\n", ptoa(cnt.v_free_count), 402 ptoa(cnt.v_free_count) / 1024); 403 404 /* 405 * Set up buffers, so they can be used to read disk labels. 406 */ 407 bufinit(); 408 vm_pager_bufferinit(); 409 410 /* 411 * In verbose mode, print out the BIOS's idea of the disk geometries. 412 */ 413 if (bootverbose) { 414 printf("BIOS Geometries:\n"); 415 for (i = 0; i < N_BIOS_GEOM; i++) { 416 unsigned long bios_geom; 417 int max_cylinder, max_head, max_sector; 418 419 bios_geom = bootinfo.bi_bios_geom[i]; 420 421 /* 422 * XXX the bootstrap punts a 1200K floppy geometry 423 * when the get-disk-geometry interrupt fails. Skip 424 * drives that have this geometry. 425 */ 426 if (bios_geom == 0x4f010f) 427 continue; 428 429 printf(" %x:%08lx ", i, bios_geom); 430 max_cylinder = bios_geom >> 16; 431 max_head = (bios_geom >> 8) & 0xff; 432 max_sector = bios_geom & 0xff; 433 printf( 434 "0..%d=%d cylinders, 0..%d=%d heads, 1..%d=%d sectors\n", 435 max_cylinder, max_cylinder + 1, 436 max_head, max_head + 1, 437 max_sector, max_sector); 438 } 439 printf(" %d accounted for\n", bootinfo.bi_n_bios_used); 440 } 441} 442 443int 444register_netisr(num, handler) 445 int num; 446 netisr_t *handler; 447{ 448 449 if (num < 0 || num >= (sizeof(netisrs)/sizeof(*netisrs)) ) { 450 printf("register_netisr: bad isr number: %d\n", num); 451 return (EINVAL); 452 } 453 netisrs[num] = handler; 454 return (0); 455} 456 457static void 458setup_netisrs(ls) 459 struct linker_set *ls; 460{ 461 int i; 462 const struct netisrtab *nit; 463 464 for(i = 0; ls->ls_items[i]; i++) { 465 nit = (const struct netisrtab *)ls->ls_items[i]; 466 register_netisr(nit->nit_num, nit->nit_isr); 467 } 468} 469 470/* 471 * Send an interrupt to process. 472 * 473 * Stack is set up to allow sigcode stored 474 * at top to call routine, followed by kcall 475 * to sigreturn routine below. After sigreturn 476 * resets the signal mask, the stack, and the 477 * frame pointer, it returns to the user 478 * specified pc, psl. 479 */ 480void 481sendsig(catcher, sig, mask, code) 482 sig_t catcher; 483 int sig, mask; 484 u_long code; 485{ 486 register struct proc *p = curproc; 487 register int *regs; 488 register struct sigframe *fp; 489 struct sigframe sf; 490 struct sigacts *psp = p->p_sigacts; 491 int oonstack; 492 493 regs = p->p_md.md_regs; 494 oonstack = psp->ps_sigstk.ss_flags & SS_ONSTACK; 495 /* 496 * Allocate and validate space for the signal handler context. 497 */ 498 if ((psp->ps_flags & SAS_ALTSTACK) && !oonstack && 499 (psp->ps_sigonstack & sigmask(sig))) { 500 fp = (struct sigframe *)(psp->ps_sigstk.ss_sp + 501 psp->ps_sigstk.ss_size - sizeof(struct sigframe)); 502 psp->ps_sigstk.ss_flags |= SS_ONSTACK; 503 } else { 504 fp = (struct sigframe *)regs[tESP] - 1; 505 } 506 507 /* 508 * grow() will return FALSE if the fp will not fit inside the stack 509 * and the stack can not be grown. useracc will return FALSE 510 * if access is denied. 511 */ 512 if ((grow(p, (int)fp) == FALSE) || 513 (useracc((caddr_t)fp, sizeof (struct sigframe), B_WRITE) == FALSE)) { 514 /* 515 * Process has trashed its stack; give it an illegal 516 * instruction to halt it in its tracks. 517 */ 518 SIGACTION(p, SIGILL) = SIG_DFL; 519 sig = sigmask(SIGILL); 520 p->p_sigignore &= ~sig; 521 p->p_sigcatch &= ~sig; 522 p->p_sigmask &= ~sig; 523 psignal(p, SIGILL); 524 return; 525 } 526 527 /* 528 * Build the argument list for the signal handler. 529 */ 530 if (p->p_sysent->sv_sigtbl) { 531 if (sig < p->p_sysent->sv_sigsize) 532 sig = p->p_sysent->sv_sigtbl[sig]; 533 else 534 sig = p->p_sysent->sv_sigsize + 1; 535 } 536 sf.sf_signum = sig; 537 sf.sf_code = code; 538 sf.sf_scp = &fp->sf_sc; 539 sf.sf_addr = (char *) regs[tERR]; 540 sf.sf_handler = catcher; 541 542 /* save scratch registers */ 543 sf.sf_sc.sc_eax = regs[tEAX]; 544 sf.sf_sc.sc_ebx = regs[tEBX]; 545 sf.sf_sc.sc_ecx = regs[tECX]; 546 sf.sf_sc.sc_edx = regs[tEDX]; 547 sf.sf_sc.sc_esi = regs[tESI]; 548 sf.sf_sc.sc_edi = regs[tEDI]; 549 sf.sf_sc.sc_cs = regs[tCS]; 550 sf.sf_sc.sc_ds = regs[tDS]; 551 sf.sf_sc.sc_ss = regs[tSS]; 552 sf.sf_sc.sc_es = regs[tES]; 553 sf.sf_sc.sc_isp = regs[tISP]; 554 555 /* 556 * Build the signal context to be used by sigreturn. 557 */ 558 sf.sf_sc.sc_onstack = oonstack; 559 sf.sf_sc.sc_mask = mask; 560 sf.sf_sc.sc_sp = regs[tESP]; 561 sf.sf_sc.sc_fp = regs[tEBP]; 562 sf.sf_sc.sc_pc = regs[tEIP]; 563 sf.sf_sc.sc_ps = regs[tEFLAGS]; 564 565 /* 566 * Copy the sigframe out to the user's stack. 567 */ 568 if (copyout(&sf, fp, sizeof(struct sigframe)) != 0) { 569 /* 570 * Something is wrong with the stack pointer. 571 * ...Kill the process. 572 */ 573 sigexit(p, SIGILL); 574 }; 575 576 regs[tESP] = (int)fp; 577 regs[tEIP] = (int)(((char *)PS_STRINGS) - *(p->p_sysent->sv_szsigcode)); 578 regs[tEFLAGS] &= ~PSL_VM; 579 regs[tCS] = _ucodesel; 580 regs[tDS] = _udatasel; 581 regs[tES] = _udatasel; 582 regs[tSS] = _udatasel; 583} 584 585/* 586 * System call to cleanup state after a signal 587 * has been taken. Reset signal mask and 588 * stack state from context left by sendsig (above). 589 * Return to previous pc and psl as specified by 590 * context left by sendsig. Check carefully to 591 * make sure that the user has not modified the 592 * state to gain improper privileges. 593 */ 594int 595sigreturn(p, uap, retval) 596 struct proc *p; 597 struct sigreturn_args /* { 598 struct sigcontext *sigcntxp; 599 } */ *uap; 600 int *retval; 601{ 602 register struct sigcontext *scp; 603 register struct sigframe *fp; 604 register int *regs = p->p_md.md_regs; 605 int eflags; 606 607 /* 608 * (XXX old comment) regs[tESP] points to the return address. 609 * The user scp pointer is above that. 610 * The return address is faked in the signal trampoline code 611 * for consistency. 612 */ 613 scp = uap->sigcntxp; 614 fp = (struct sigframe *) 615 ((caddr_t)scp - offsetof(struct sigframe, sf_sc)); 616 617 if (useracc((caddr_t)fp, sizeof (*fp), 0) == 0) 618 return(EINVAL); 619 620 /* 621 * Don't allow users to change privileged or reserved flags. 622 */ 623#define EFLAGS_SECURE(ef, oef) ((((ef) ^ (oef)) & ~PSL_USERCHANGE) == 0) 624 eflags = scp->sc_ps; 625 /* 626 * XXX do allow users to change the privileged flag PSL_RF. The 627 * cpu sets PSL_RF in tf_eflags for faults. Debuggers should 628 * sometimes set it there too. tf_eflags is kept in the signal 629 * context during signal handling and there is no other place 630 * to remember it, so the PSL_RF bit may be corrupted by the 631 * signal handler without us knowing. Corruption of the PSL_RF 632 * bit at worst causes one more or one less debugger trap, so 633 * allowing it is fairly harmless. 634 */ 635 if (!EFLAGS_SECURE(eflags & ~PSL_RF, regs[tEFLAGS] & ~PSL_RF)) { 636#ifdef DEBUG 637 printf("sigreturn: eflags = 0x%x\n", eflags); 638#endif 639 return(EINVAL); 640 } 641 642 /* 643 * Don't allow users to load a valid privileged %cs. Let the 644 * hardware check for invalid selectors, excess privilege in 645 * other selectors, invalid %eip's and invalid %esp's. 646 */ 647#define CS_SECURE(cs) (ISPL(cs) == SEL_UPL) 648 if (!CS_SECURE(scp->sc_cs)) { 649#ifdef DEBUG 650 printf("sigreturn: cs = 0x%x\n", scp->sc_cs); 651#endif 652 trapsignal(p, SIGBUS, T_PROTFLT); 653 return(EINVAL); 654 } 655 656 /* restore scratch registers */ 657 regs[tEAX] = scp->sc_eax; 658 regs[tEBX] = scp->sc_ebx; 659 regs[tECX] = scp->sc_ecx; 660 regs[tEDX] = scp->sc_edx; 661 regs[tESI] = scp->sc_esi; 662 regs[tEDI] = scp->sc_edi; 663 regs[tCS] = scp->sc_cs; 664 regs[tDS] = scp->sc_ds; 665 regs[tES] = scp->sc_es; 666 regs[tSS] = scp->sc_ss; 667 regs[tISP] = scp->sc_isp; 668 669 if (useracc((caddr_t)scp, sizeof (*scp), 0) == 0) 670 return(EINVAL); 671 672 if (scp->sc_onstack & 01) 673 p->p_sigacts->ps_sigstk.ss_flags |= SS_ONSTACK; 674 else 675 p->p_sigacts->ps_sigstk.ss_flags &= ~SS_ONSTACK; 676 p->p_sigmask = scp->sc_mask &~ 677 (sigmask(SIGKILL)|sigmask(SIGCONT)|sigmask(SIGSTOP)); 678 regs[tEBP] = scp->sc_fp; 679 regs[tESP] = scp->sc_sp; 680 regs[tEIP] = scp->sc_pc; 681 regs[tEFLAGS] = eflags; 682 return(EJUSTRETURN); 683} 684 685static int waittime = -1; 686static struct pcb dumppcb; 687 688__dead void 689boot(howto) 690 int howto; 691{ 692 if (!cold && (howto & RB_NOSYNC) == 0 && waittime < 0) { 693 register struct buf *bp; 694 int iter, nbusy; 695 696 waittime = 0; 697 printf("\nsyncing disks... "); 698 699 sync(&proc0, NULL, NULL); 700 701 for (iter = 0; iter < 20; iter++) { 702 nbusy = 0; 703 for (bp = &buf[nbuf]; --bp >= buf; ) { 704 if ((bp->b_flags & (B_BUSY | B_INVAL)) == B_BUSY) { 705 nbusy++; 706 } 707 } 708 if (nbusy == 0) 709 break; 710 printf("%d ", nbusy); 711 DELAY(40000 * iter); 712 } 713 if (nbusy) { 714 /* 715 * Failed to sync all blocks. Indicate this and don't 716 * unmount filesystems (thus forcing an fsck on reboot). 717 */ 718 printf("giving up\n"); 719#ifdef SHOW_BUSYBUFS 720 nbusy = 0; 721 for (bp = &buf[nbuf]; --bp >= buf; ) { 722 if ((bp->b_flags & (B_BUSY | B_INVAL)) == B_BUSY) { 723 nbusy++; 724 printf("%d: dev:%08x, flags:%08x, blkno:%d, lblkno:%d\n", nbusy, bp->b_dev, bp->b_flags, bp->b_blkno, bp->b_lblkno); 725 } 726 } 727 DELAY(5000000); /* 5 seconds */ 728#endif 729 } else { 730 printf("done\n"); 731 /* 732 * Unmount filesystems 733 */ 734 if (panicstr == 0) 735 vfs_unmountall(); 736 } 737 DELAY(100000); /* wait for console output to finish */ 738 dev_shutdownall(FALSE); 739 } 740 splhigh(); 741 if (howto & RB_HALT) { 742 printf("\n"); 743 printf("The operating system has halted.\n"); 744 printf("Please press any key to reboot.\n\n"); 745 cngetc(); 746 } else { 747 if (howto & RB_DUMP) { 748 if (!cold) { 749 savectx(&dumppcb); 750 dumppcb.pcb_cr3 = rcr3(); 751 dumpsys(); 752 } 753 754 if (PANIC_REBOOT_WAIT_TIME != 0) { 755 if (PANIC_REBOOT_WAIT_TIME != -1) { 756 int loop; 757 printf("Automatic reboot in %d seconds - press a key on the console to abort\n", 758 PANIC_REBOOT_WAIT_TIME); 759 for (loop = PANIC_REBOOT_WAIT_TIME * 10; loop > 0; --loop) { 760 DELAY(1000 * 100); /* 1/10th second */ 761 if (cncheckc()) /* Did user type a key? */ 762 break; 763 } 764 if (!loop) 765 goto die; 766 } 767 } else { /* zero time specified - reboot NOW */ 768 goto die; 769 } 770 printf("--> Press a key on the console to reboot <--\n"); 771 cngetc(); 772 } 773 } 774die: 775 printf("Rebooting...\n"); 776 DELAY(1000000); /* wait 1 sec for printf's to complete and be read */ 777 cpu_reset(); 778 for(;;) ; 779 /* NOTREACHED */ 780} 781 782/* 783 * Magic number for savecore 784 * 785 * exported (symorder) and used at least by savecore(8) 786 * 787 */ 788static u_long const dumpmag = 0x8fca0101UL; 789 790static int dumpsize = 0; /* also for savecore */ 791 792static int dodump = 1; 793SYSCTL_INT(_machdep, OID_AUTO, do_dump, CTLFLAG_RW, &dodump, 0, ""); 794 795/* 796 * Doadump comes here after turning off memory management and 797 * getting on the dump stack, either when called above, or by 798 * the auto-restart code. 799 */ 800static void 801dumpsys() 802{ 803 804 if (!dodump) 805 return; 806 if (dumpdev == NODEV) 807 return; 808 if ((minor(dumpdev)&07) != 1) 809 return; 810 if (!(bdevsw[major(dumpdev)])) 811 return; 812 if (!(bdevsw[major(dumpdev)]->d_dump)) 813 return; 814 dumpsize = Maxmem; 815 printf("\ndumping to dev %lx, offset %ld\n", dumpdev, dumplo); 816 printf("dump "); 817 switch ((*bdevsw[major(dumpdev)]->d_dump)(dumpdev)) { 818 819 case ENXIO: 820 printf("device bad\n"); 821 break; 822 823 case EFAULT: 824 printf("device not ready\n"); 825 break; 826 827 case EINVAL: 828 printf("area improper\n"); 829 break; 830 831 case EIO: 832 printf("i/o error\n"); 833 break; 834 835 case EINTR: 836 printf("aborted from console\n"); 837 break; 838 839 default: 840 printf("succeeded\n"); 841 break; 842 } 843} 844 845/* 846 * Clear registers on exec 847 */ 848void 849setregs(p, entry, stack) 850 struct proc *p; 851 u_long entry; 852 u_long stack; 853{ 854 int *regs = p->p_md.md_regs; 855 856#ifdef USER_LDT 857 struct pcb *pcb = &p->p_addr->u_pcb; 858 859 /* was i386_user_cleanup() in NetBSD */ 860 if (pcb->pcb_ldt) { 861 if (pcb == curpcb) 862 lldt(GSEL(GUSERLDT_SEL, SEL_KPL)); 863 kmem_free(kernel_map, (vm_offset_t)pcb->pcb_ldt, 864 pcb->pcb_ldt_len * sizeof(union descriptor)); 865 pcb->pcb_ldt_len = (int)pcb->pcb_ldt = 0; 866 } 867#endif 868 869 bzero(regs, sizeof(struct trapframe)); 870 regs[tEIP] = entry; 871 regs[tESP] = stack; 872 regs[tEFLAGS] = PSL_USER | (regs[tEFLAGS] & PSL_T); 873 regs[tSS] = _udatasel; 874 regs[tDS] = _udatasel; 875 regs[tES] = _udatasel; 876 regs[tCS] = _ucodesel; 877 878 p->p_addr->u_pcb.pcb_flags = 0; /* no fp at all */ 879 load_cr0(rcr0() | CR0_TS); /* start emulating */ 880#if NNPX > 0 881 npxinit(__INITIAL_NPXCW__); 882#endif /* NNPX > 0 */ 883} 884 885static int 886sysctl_machdep_adjkerntz SYSCTL_HANDLER_ARGS 887{ 888 int error; 889 error = sysctl_handle_int(oidp, oidp->oid_arg1, oidp->oid_arg2, 890 req); 891 if (!error && req->newptr) 892 resettodr(); 893 return (error); 894} 895 896SYSCTL_PROC(_machdep, CPU_ADJKERNTZ, adjkerntz, CTLTYPE_INT|CTLFLAG_RW, 897 &adjkerntz, 0, sysctl_machdep_adjkerntz, "I", ""); 898 899SYSCTL_INT(_machdep, CPU_DISRTCSET, disable_rtc_set, 900 CTLFLAG_RW, &disable_rtc_set, 0, ""); 901 902SYSCTL_STRUCT(_machdep, CPU_BOOTINFO, bootinfo, 903 CTLFLAG_RD, &bootinfo, bootinfo, ""); 904 905SYSCTL_INT(_machdep, CPU_WALLCLOCK, wall_cmos_clock, 906 CTLFLAG_RW, &wall_cmos_clock, 0, ""); 907 908/* 909 * Initialize 386 and configure to run kernel 910 */ 911 912/* 913 * Initialize segments & interrupt table 914 */ 915 916int currentldt; 917int _default_ldt; 918union descriptor gdt[NGDT]; /* global descriptor table */ 919struct gate_descriptor idt[NIDT]; /* interrupt descriptor table */ 920union descriptor ldt[NLDT]; /* local descriptor table */ 921 922static struct i386tss dblfault_tss; 923static char dblfault_stack[PAGE_SIZE]; 924 925extern struct user *proc0paddr; 926 927/* software prototypes -- in more palatable form */ 928struct soft_segment_descriptor gdt_segs[] = { 929/* GNULL_SEL 0 Null Descriptor */ 930{ 0x0, /* segment base address */ 931 0x0, /* length */ 932 0, /* segment type */ 933 0, /* segment descriptor priority level */ 934 0, /* segment descriptor present */ 935 0, 0, 936 0, /* default 32 vs 16 bit size */ 937 0 /* limit granularity (byte/page units)*/ }, 938/* GCODE_SEL 1 Code Descriptor for kernel */ 939{ 0x0, /* segment base address */ 940 0xfffff, /* length - all address space */ 941 SDT_MEMERA, /* segment type */ 942 0, /* segment descriptor priority level */ 943 1, /* segment descriptor present */ 944 0, 0, 945 1, /* default 32 vs 16 bit size */ 946 1 /* limit granularity (byte/page units)*/ }, 947/* GDATA_SEL 2 Data Descriptor for kernel */ 948{ 0x0, /* segment base address */ 949 0xfffff, /* length - all address space */ 950 SDT_MEMRWA, /* segment type */ 951 0, /* segment descriptor priority level */ 952 1, /* segment descriptor present */ 953 0, 0, 954 1, /* default 32 vs 16 bit size */ 955 1 /* limit granularity (byte/page units)*/ }, 956/* GLDT_SEL 3 LDT Descriptor */ 957{ (int) ldt, /* segment base address */ 958 sizeof(ldt)-1, /* length - all address space */ 959 SDT_SYSLDT, /* segment type */ 960 0, /* segment descriptor priority level */ 961 1, /* segment descriptor present */ 962 0, 0, 963 0, /* unused - default 32 vs 16 bit size */ 964 0 /* limit granularity (byte/page units)*/ }, 965/* GTGATE_SEL 4 Null Descriptor - Placeholder */ 966{ 0x0, /* segment base address */ 967 0x0, /* length - all address space */ 968 0, /* segment type */ 969 0, /* segment descriptor priority level */ 970 0, /* segment descriptor present */ 971 0, 0, 972 0, /* default 32 vs 16 bit size */ 973 0 /* limit granularity (byte/page units)*/ }, 974/* GPANIC_SEL 5 Panic Tss Descriptor */ 975{ (int) &dblfault_tss, /* segment base address */ 976 sizeof(struct i386tss)-1,/* length - all address space */ 977 SDT_SYS386TSS, /* segment type */ 978 0, /* segment descriptor priority level */ 979 1, /* segment descriptor present */ 980 0, 0, 981 0, /* unused - default 32 vs 16 bit size */ 982 0 /* limit granularity (byte/page units)*/ }, 983/* GPROC0_SEL 6 Proc 0 Tss Descriptor */ 984{ (int) kstack, /* segment base address */ 985 sizeof(struct i386tss)-1,/* length - all address space */ 986 SDT_SYS386TSS, /* segment type */ 987 0, /* segment descriptor priority level */ 988 1, /* segment descriptor present */ 989 0, 0, 990 0, /* unused - default 32 vs 16 bit size */ 991 0 /* limit granularity (byte/page units)*/ }, 992/* GUSERLDT_SEL 7 User LDT Descriptor per process */ 993{ (int) ldt, /* segment base address */ 994 (512 * sizeof(union descriptor)-1), /* length */ 995 SDT_SYSLDT, /* segment type */ 996 0, /* segment descriptor priority level */ 997 1, /* segment descriptor present */ 998 0, 0, 999 0, /* unused - default 32 vs 16 bit size */ 1000 0 /* limit granularity (byte/page units)*/ }, 1001/* GAPMCODE32_SEL 8 APM BIOS 32-bit interface (32bit Code) */ 1002{ 0, /* segment base address (overwritten by APM) */ 1003 0xfffff, /* length */ 1004 SDT_MEMERA, /* segment type */ 1005 0, /* segment descriptor priority level */ 1006 1, /* segment descriptor present */ 1007 0, 0, 1008 1, /* default 32 vs 16 bit size */ 1009 1 /* limit granularity (byte/page units)*/ }, 1010/* GAPMCODE16_SEL 9 APM BIOS 32-bit interface (16bit Code) */ 1011{ 0, /* segment base address (overwritten by APM) */ 1012 0xfffff, /* length */ 1013 SDT_MEMERA, /* segment type */ 1014 0, /* segment descriptor priority level */ 1015 1, /* segment descriptor present */ 1016 0, 0, 1017 0, /* default 32 vs 16 bit size */ 1018 1 /* limit granularity (byte/page units)*/ }, 1019/* GAPMDATA_SEL 10 APM BIOS 32-bit interface (Data) */ 1020{ 0, /* segment base address (overwritten by APM) */ 1021 0xfffff, /* length */ 1022 SDT_MEMRWA, /* segment type */ 1023 0, /* segment descriptor priority level */ 1024 1, /* segment descriptor present */ 1025 0, 0, 1026 1, /* default 32 vs 16 bit size */ 1027 1 /* limit granularity (byte/page units)*/ }, 1028}; 1029 1030static struct soft_segment_descriptor ldt_segs[] = { 1031 /* Null Descriptor - overwritten by call gate */ 1032{ 0x0, /* segment base address */ 1033 0x0, /* length - all address space */ 1034 0, /* segment type */ 1035 0, /* segment descriptor priority level */ 1036 0, /* segment descriptor present */ 1037 0, 0, 1038 0, /* default 32 vs 16 bit size */ 1039 0 /* limit granularity (byte/page units)*/ }, 1040 /* Null Descriptor - overwritten by call gate */ 1041{ 0x0, /* segment base address */ 1042 0x0, /* length - all address space */ 1043 0, /* segment type */ 1044 0, /* segment descriptor priority level */ 1045 0, /* segment descriptor present */ 1046 0, 0, 1047 0, /* default 32 vs 16 bit size */ 1048 0 /* limit granularity (byte/page units)*/ }, 1049 /* Null Descriptor - overwritten by call gate */ 1050{ 0x0, /* segment base address */ 1051 0x0, /* length - all address space */ 1052 0, /* segment type */ 1053 0, /* segment descriptor priority level */ 1054 0, /* segment descriptor present */ 1055 0, 0, 1056 0, /* default 32 vs 16 bit size */ 1057 0 /* limit granularity (byte/page units)*/ }, 1058 /* Code Descriptor for user */ 1059{ 0x0, /* segment base address */ 1060 0xfffff, /* length - all address space */ 1061 SDT_MEMERA, /* segment type */ 1062 SEL_UPL, /* segment descriptor priority level */ 1063 1, /* segment descriptor present */ 1064 0, 0, 1065 1, /* default 32 vs 16 bit size */ 1066 1 /* limit granularity (byte/page units)*/ }, 1067 /* Data Descriptor for user */ 1068{ 0x0, /* segment base address */ 1069 0xfffff, /* length - all address space */ 1070 SDT_MEMRWA, /* segment type */ 1071 SEL_UPL, /* segment descriptor priority level */ 1072 1, /* segment descriptor present */ 1073 0, 0, 1074 1, /* default 32 vs 16 bit size */ 1075 1 /* limit granularity (byte/page units)*/ }, 1076}; 1077 1078void 1079setidt(idx, func, typ, dpl, selec) 1080 int idx; 1081 inthand_t *func; 1082 int typ; 1083 int dpl; 1084 int selec; 1085{ 1086 struct gate_descriptor *ip = idt + idx; 1087 1088 ip->gd_looffset = (int)func; 1089 ip->gd_selector = selec; 1090 ip->gd_stkcpy = 0; 1091 ip->gd_xx = 0; 1092 ip->gd_type = typ; 1093 ip->gd_dpl = dpl; 1094 ip->gd_p = 1; 1095 ip->gd_hioffset = ((int)func)>>16 ; 1096} 1097 1098#define IDTVEC(name) __CONCAT(X,name) 1099 1100extern inthand_t 1101 IDTVEC(div), IDTVEC(dbg), IDTVEC(nmi), IDTVEC(bpt), IDTVEC(ofl), 1102 IDTVEC(bnd), IDTVEC(ill), IDTVEC(dna), IDTVEC(fpusegm), 1103 IDTVEC(tss), IDTVEC(missing), IDTVEC(stk), IDTVEC(prot), 1104 IDTVEC(page), IDTVEC(rsvd), IDTVEC(fpu), IDTVEC(align), 1105 IDTVEC(syscall), IDTVEC(int0x80_syscall); 1106 1107void 1108sdtossd(sd, ssd) 1109 struct segment_descriptor *sd; 1110 struct soft_segment_descriptor *ssd; 1111{ 1112 ssd->ssd_base = (sd->sd_hibase << 24) | sd->sd_lobase; 1113 ssd->ssd_limit = (sd->sd_hilimit << 16) | sd->sd_lolimit; 1114 ssd->ssd_type = sd->sd_type; 1115 ssd->ssd_dpl = sd->sd_dpl; 1116 ssd->ssd_p = sd->sd_p; 1117 ssd->ssd_def32 = sd->sd_def32; 1118 ssd->ssd_gran = sd->sd_gran; 1119} 1120 1121void 1122init386(first) 1123 int first; 1124{ 1125 int x; 1126 unsigned biosbasemem, biosextmem; 1127 struct gate_descriptor *gdp; 1128 int gsel_tss; 1129 /* table descriptors - used to load tables by microp */ 1130 struct region_descriptor r_gdt, r_idt; 1131 int pagesinbase, pagesinext; 1132 int target_page, pa_indx; 1133 1134 proc0.p_addr = proc0paddr; 1135 1136 atdevbase = ISA_HOLE_START + KERNBASE; 1137 1138 /* 1139 * Initialize the console before we print anything out. 1140 */ 1141 cninit(); 1142 1143 /* 1144 * make gdt memory segments, the code segment goes up to end of the 1145 * page with etext in it, the data segment goes to the end of 1146 * the address space 1147 */ 1148 /* 1149 * XXX text protection is temporarily (?) disabled. The limit was 1150 * i386_btop(round_page(etext)) - 1. 1151 */ 1152 gdt_segs[GCODE_SEL].ssd_limit = i386_btop(0) - 1; 1153 gdt_segs[GDATA_SEL].ssd_limit = i386_btop(0) - 1; 1154 for (x = 0; x < NGDT; x++) 1155 ssdtosd(&gdt_segs[x], &gdt[x].sd); 1156 1157 /* make ldt memory segments */ 1158 /* 1159 * The data segment limit must not cover the user area because we 1160 * don't want the user area to be writable in copyout() etc. (page 1161 * level protection is lost in kernel mode on 386's). Also, we 1162 * don't want the user area to be writable directly (page level 1163 * protection of the user area is not available on 486's with 1164 * CR0_WP set, because there is no user-read/kernel-write mode). 1165 * 1166 * XXX - VM_MAXUSER_ADDRESS is an end address, not a max. And it 1167 * should be spelled ...MAX_USER... 1168 */ 1169#define VM_END_USER_RW_ADDRESS VM_MAXUSER_ADDRESS 1170 /* 1171 * The code segment limit has to cover the user area until we move 1172 * the signal trampoline out of the user area. This is safe because 1173 * the code segment cannot be written to directly. 1174 */ 1175#define VM_END_USER_R_ADDRESS (VM_END_USER_RW_ADDRESS + UPAGES * PAGE_SIZE) 1176 ldt_segs[LUCODE_SEL].ssd_limit = i386_btop(VM_END_USER_R_ADDRESS) - 1; 1177 ldt_segs[LUDATA_SEL].ssd_limit = i386_btop(VM_END_USER_RW_ADDRESS) - 1; 1178 /* Note. eventually want private ldts per process */ 1179 for (x = 0; x < NLDT; x++) 1180 ssdtosd(&ldt_segs[x], &ldt[x].sd); 1181 1182 /* exceptions */ 1183 for (x = 0; x < NIDT; x++) 1184 setidt(x, &IDTVEC(rsvd), SDT_SYS386TGT, SEL_KPL, GSEL(GCODE_SEL, SEL_KPL)); 1185 setidt(0, &IDTVEC(div), SDT_SYS386TGT, SEL_KPL, GSEL(GCODE_SEL, SEL_KPL)); 1186 setidt(1, &IDTVEC(dbg), SDT_SYS386TGT, SEL_KPL, GSEL(GCODE_SEL, SEL_KPL)); 1187 setidt(2, &IDTVEC(nmi), SDT_SYS386TGT, SEL_KPL, GSEL(GCODE_SEL, SEL_KPL)); 1188 setidt(3, &IDTVEC(bpt), SDT_SYS386TGT, SEL_UPL, GSEL(GCODE_SEL, SEL_KPL)); 1189 setidt(4, &IDTVEC(ofl), SDT_SYS386TGT, SEL_UPL, GSEL(GCODE_SEL, SEL_KPL)); 1190 setidt(5, &IDTVEC(bnd), SDT_SYS386TGT, SEL_KPL, GSEL(GCODE_SEL, SEL_KPL)); 1191 setidt(6, &IDTVEC(ill), SDT_SYS386TGT, SEL_KPL, GSEL(GCODE_SEL, SEL_KPL)); 1192 setidt(7, &IDTVEC(dna), SDT_SYS386TGT, SEL_KPL, GSEL(GCODE_SEL, SEL_KPL)); 1193 setidt(8, 0, SDT_SYSTASKGT, SEL_KPL, GSEL(GPANIC_SEL, SEL_KPL)); 1194 setidt(9, &IDTVEC(fpusegm), SDT_SYS386TGT, SEL_KPL, GSEL(GCODE_SEL, SEL_KPL)); 1195 setidt(10, &IDTVEC(tss), SDT_SYS386TGT, SEL_KPL, GSEL(GCODE_SEL, SEL_KPL)); 1196 setidt(11, &IDTVEC(missing), SDT_SYS386TGT, SEL_KPL, GSEL(GCODE_SEL, SEL_KPL)); 1197 setidt(12, &IDTVEC(stk), SDT_SYS386TGT, SEL_KPL, GSEL(GCODE_SEL, SEL_KPL)); 1198 setidt(13, &IDTVEC(prot), SDT_SYS386TGT, SEL_KPL, GSEL(GCODE_SEL, SEL_KPL)); 1199 setidt(14, &IDTVEC(page), SDT_SYS386TGT, SEL_KPL, GSEL(GCODE_SEL, SEL_KPL)); 1200 setidt(15, &IDTVEC(rsvd), SDT_SYS386TGT, SEL_KPL, GSEL(GCODE_SEL, SEL_KPL)); 1201 setidt(16, &IDTVEC(fpu), SDT_SYS386TGT, SEL_KPL, GSEL(GCODE_SEL, SEL_KPL)); 1202 setidt(17, &IDTVEC(align), SDT_SYS386TGT, SEL_KPL, GSEL(GCODE_SEL, SEL_KPL)); 1203 setidt(0x80, &IDTVEC(int0x80_syscall), 1204 SDT_SYS386TGT, SEL_UPL, GSEL(GCODE_SEL, SEL_KPL)); 1205 1206#include "isa.h" 1207#if NISA >0 1208 isa_defaultirq(); 1209#endif 1210 rand_initialize(); 1211 1212 r_gdt.rd_limit = sizeof(gdt) - 1; 1213 r_gdt.rd_base = (int) gdt; 1214 lgdt(&r_gdt); 1215 1216 r_idt.rd_limit = sizeof(idt) - 1; 1217 r_idt.rd_base = (int) idt; 1218 lidt(&r_idt); 1219 1220 _default_ldt = GSEL(GLDT_SEL, SEL_KPL); 1221 lldt(_default_ldt); 1222 currentldt = _default_ldt; 1223 1224#ifdef DDB 1225 kdb_init(); 1226 if (boothowto & RB_KDB) 1227 Debugger("Boot flags requested debugger"); 1228#endif 1229 1230 /* Use BIOS values stored in RTC CMOS RAM, since probing 1231 * breaks certain 386 AT relics. 1232 */ 1233 biosbasemem = rtcin(RTC_BASELO)+ (rtcin(RTC_BASEHI)<<8); 1234 biosextmem = rtcin(RTC_EXTLO)+ (rtcin(RTC_EXTHI)<<8); 1235 1236 /* 1237 * Print a warning if the official BIOS interface disagrees 1238 * with the hackish interface used above. Eventually only 1239 * the official interface should be used. 1240 */ 1241 if (bootinfo.bi_memsizes_valid) { 1242 if (bootinfo.bi_basemem != biosbasemem) 1243 printf("BIOS basemem (%ldK) != RTC basemem (%dK)\n", 1244 bootinfo.bi_basemem, biosbasemem); 1245 if (bootinfo.bi_extmem != biosextmem) 1246 printf("BIOS extmem (%ldK) != RTC extmem (%dK)\n", 1247 bootinfo.bi_extmem, biosextmem); 1248 } 1249 1250 /* 1251 * If BIOS tells us that it has more than 640k in the basemem, 1252 * don't believe it - set it to 640k. 1253 */ 1254 if (biosbasemem > 640) 1255 biosbasemem = 640; 1256 1257 /* 1258 * Some 386 machines might give us a bogus number for extended 1259 * mem. If this happens, stop now. 1260 */ 1261#ifndef LARGEMEM 1262 if (biosextmem > 65536) { 1263 panic("extended memory beyond limit of 64MB"); 1264 /* NOTREACHED */ 1265 } 1266#endif 1267 1268 pagesinbase = biosbasemem * 1024 / PAGE_SIZE; 1269 pagesinext = biosextmem * 1024 / PAGE_SIZE; 1270 1271 /* 1272 * Special hack for chipsets that still remap the 384k hole when 1273 * there's 16MB of memory - this really confuses people that 1274 * are trying to use bus mastering ISA controllers with the 1275 * "16MB limit"; they only have 16MB, but the remapping puts 1276 * them beyond the limit. 1277 */ 1278 /* 1279 * If extended memory is between 15-16MB (16-17MB phys address range), 1280 * chop it to 15MB. 1281 */ 1282 if ((pagesinext > 3840) && (pagesinext < 4096)) 1283 pagesinext = 3840; 1284 1285 /* 1286 * Maxmem isn't the "maximum memory", it's one larger than the 1287 * highest page of the physical address space. It should be 1288 * called something like "Maxphyspage". 1289 */ 1290 Maxmem = pagesinext + 0x100000/PAGE_SIZE; 1291 1292#ifdef MAXMEM 1293 Maxmem = MAXMEM/4; 1294#endif 1295 1296 /* call pmap initialization to make new kernel address space */ 1297 pmap_bootstrap (first, 0); 1298 1299 /* 1300 * Size up each available chunk of physical memory. 1301 */ 1302 1303 /* 1304 * We currently don't bother testing base memory. 1305 * XXX ...but we probably should. 1306 */ 1307 pa_indx = 0; 1308 badpages = 0; 1309 if (pagesinbase > 1) { 1310 phys_avail[pa_indx++] = PAGE_SIZE; /* skip first page of memory */ 1311 phys_avail[pa_indx] = ptoa(pagesinbase);/* memory up to the ISA hole */ 1312 physmem = pagesinbase - 1; 1313 } else { 1314 /* point at first chunk end */ 1315 pa_indx++; 1316 } 1317 1318 for (target_page = avail_start; target_page < ptoa(Maxmem); target_page += PAGE_SIZE) { 1319 int tmp, page_bad = FALSE; 1320 1321 /* 1322 * map page into kernel: valid, read/write, non-cacheable 1323 */ 1324 *(int *)CMAP1 = PG_V | PG_RW | PG_N | target_page; 1325 pmap_update(); 1326 1327 tmp = *(int *)CADDR1; 1328 /* 1329 * Test for alternating 1's and 0's 1330 */ 1331 *(volatile int *)CADDR1 = 0xaaaaaaaa; 1332 if (*(volatile int *)CADDR1 != 0xaaaaaaaa) { 1333 page_bad = TRUE; 1334 } 1335 /* 1336 * Test for alternating 0's and 1's 1337 */ 1338 *(volatile int *)CADDR1 = 0x55555555; 1339 if (*(volatile int *)CADDR1 != 0x55555555) { 1340 page_bad = TRUE; 1341 } 1342 /* 1343 * Test for all 1's 1344 */ 1345 *(volatile int *)CADDR1 = 0xffffffff; 1346 if (*(volatile int *)CADDR1 != 0xffffffff) { 1347 page_bad = TRUE; 1348 } 1349 /* 1350 * Test for all 0's 1351 */ 1352 *(volatile int *)CADDR1 = 0x0; 1353 if (*(volatile int *)CADDR1 != 0x0) { 1354 /* 1355 * test of page failed 1356 */ 1357 page_bad = TRUE; 1358 } 1359 /* 1360 * Restore original value. 1361 */ 1362 *(int *)CADDR1 = tmp; 1363 1364 /* 1365 * Adjust array of valid/good pages. 1366 */ 1367 if (page_bad == FALSE) { 1368 /* 1369 * If this good page is a continuation of the 1370 * previous set of good pages, then just increase 1371 * the end pointer. Otherwise start a new chunk. 1372 * Note that "end" points one higher than end, 1373 * making the range >= start and < end. 1374 */ 1375 if (phys_avail[pa_indx] == target_page) { 1376 phys_avail[pa_indx] += PAGE_SIZE; 1377 } else { 1378 pa_indx++; 1379 if (pa_indx == PHYS_AVAIL_ARRAY_END) { 1380 printf("Too many holes in the physical address space, giving up\n"); 1381 pa_indx--; 1382 break; 1383 } 1384 phys_avail[pa_indx++] = target_page; /* start */ 1385 phys_avail[pa_indx] = target_page + PAGE_SIZE; /* end */ 1386 } 1387 physmem++; 1388 } else { 1389 badpages++; 1390 page_bad = FALSE; 1391 } 1392 } 1393 1394 *(int *)CMAP1 = 0; 1395 pmap_update(); 1396 1397 /* 1398 * XXX 1399 * The last chunk must contain at least one page plus the message 1400 * buffer to avoid complicating other code (message buffer address 1401 * calculation, etc.). 1402 */ 1403 while (phys_avail[pa_indx - 1] + PAGE_SIZE + 1404 round_page(sizeof(struct msgbuf)) >= phys_avail[pa_indx]) { 1405 physmem -= atop(phys_avail[pa_indx] - phys_avail[pa_indx - 1]); 1406 phys_avail[pa_indx--] = 0; 1407 phys_avail[pa_indx--] = 0; 1408 } 1409 1410 Maxmem = atop(phys_avail[pa_indx]); 1411 1412 /* Trim off space for the message buffer. */ 1413 phys_avail[pa_indx] -= round_page(sizeof(struct msgbuf)); 1414 1415 avail_end = phys_avail[pa_indx]; 1416 1417 /* now running on new page tables, configured,and u/iom is accessible */ 1418 1419 /* make a initial tss so microp can get interrupt stack on syscall! */ 1420 proc0.p_addr->u_pcb.pcb_tss.tss_esp0 = (int) kstack + UPAGES*PAGE_SIZE; 1421 proc0.p_addr->u_pcb.pcb_tss.tss_ss0 = GSEL(GDATA_SEL, SEL_KPL) ; 1422 gsel_tss = GSEL(GPROC0_SEL, SEL_KPL); 1423 1424 dblfault_tss.tss_esp = dblfault_tss.tss_esp0 = dblfault_tss.tss_esp1 = 1425 dblfault_tss.tss_esp2 = (int) &dblfault_stack[sizeof(dblfault_stack)]; 1426 dblfault_tss.tss_ss = dblfault_tss.tss_ss0 = dblfault_tss.tss_ss1 = 1427 dblfault_tss.tss_ss2 = GSEL(GDATA_SEL, SEL_KPL); 1428 dblfault_tss.tss_cr3 = IdlePTD; 1429 dblfault_tss.tss_eip = (int) dblfault_handler; 1430 dblfault_tss.tss_eflags = PSL_KERNEL; 1431 dblfault_tss.tss_ds = dblfault_tss.tss_es = dblfault_tss.tss_fs = dblfault_tss.tss_gs = 1432 GSEL(GDATA_SEL, SEL_KPL); 1433 dblfault_tss.tss_cs = GSEL(GCODE_SEL, SEL_KPL); 1434 dblfault_tss.tss_ldt = GSEL(GLDT_SEL, SEL_KPL); 1435 1436 ((struct i386tss *)gdt_segs[GPROC0_SEL].ssd_base)->tss_ioopt = 1437 (sizeof(struct i386tss))<<16; 1438 1439 ltr(gsel_tss); 1440 1441 /* make a call gate to reenter kernel with */ 1442 gdp = &ldt[LSYS5CALLS_SEL].gd; 1443 1444 x = (int) &IDTVEC(syscall); 1445 gdp->gd_looffset = x++; 1446 gdp->gd_selector = GSEL(GCODE_SEL,SEL_KPL); 1447 gdp->gd_stkcpy = 1; 1448 gdp->gd_type = SDT_SYS386CGT; 1449 gdp->gd_dpl = SEL_UPL; 1450 gdp->gd_p = 1; 1451 gdp->gd_hioffset = ((int) &IDTVEC(syscall)) >>16; 1452 1453 /* transfer to user mode */ 1454 1455 _ucodesel = LSEL(LUCODE_SEL, SEL_UPL); 1456 _udatasel = LSEL(LUDATA_SEL, SEL_UPL); 1457 1458 /* setup proc 0's pcb */ 1459 proc0.p_addr->u_pcb.pcb_flags = 0; 1460 proc0.p_addr->u_pcb.pcb_cr3 = IdlePTD; 1461} 1462 1463/* 1464 * The registers are in the frame; the frame is in the user area of 1465 * the process in question; when the process is active, the registers 1466 * are in "the kernel stack"; when it's not, they're still there, but 1467 * things get flipped around. So, since p->p_md.md_regs is the whole address 1468 * of the register set, take its offset from the kernel stack, and 1469 * index into the user block. Don't you just *love* virtual memory? 1470 * (I'm starting to think seymour is right...) 1471 */ 1472#define TF_REGP(p) ((struct trapframe *) \ 1473 ((char *)(p)->p_addr \ 1474 + ((char *)(p)->p_md.md_regs - kstack))) 1475 1476int 1477ptrace_set_pc(p, addr) 1478 struct proc *p; 1479 unsigned int addr; 1480{ 1481 TF_REGP(p)->tf_eip = addr; 1482 return (0); 1483} 1484 1485int 1486ptrace_single_step(p) 1487 struct proc *p; 1488{ 1489 TF_REGP(p)->tf_eflags |= PSL_T; 1490 return (0); 1491} 1492 1493int ptrace_write_u(p, off, data) 1494 struct proc *p; 1495 vm_offset_t off; 1496 int data; 1497{ 1498 struct trapframe frame_copy; 1499 vm_offset_t min; 1500 struct trapframe *tp; 1501 1502 /* 1503 * Privileged kernel state is scattered all over the user area. 1504 * Only allow write access to parts of regs and to fpregs. 1505 */ 1506 min = (char *)p->p_md.md_regs - kstack; 1507 if (off >= min && off <= min + sizeof(struct trapframe) - sizeof(int)) { 1508 tp = TF_REGP(p); 1509 frame_copy = *tp; 1510 *(int *)((char *)&frame_copy + (off - min)) = data; 1511 if (!EFLAGS_SECURE(frame_copy.tf_eflags, tp->tf_eflags) || 1512 !CS_SECURE(frame_copy.tf_cs)) 1513 return (EINVAL); 1514 *(int*)((char *)p->p_addr + off) = data; 1515 return (0); 1516 } 1517 min = offsetof(struct user, u_pcb) + offsetof(struct pcb, pcb_savefpu); 1518 if (off >= min && off <= min + sizeof(struct save87) - sizeof(int)) { 1519 *(int*)((char *)p->p_addr + off) = data; 1520 return (0); 1521 } 1522 return (EFAULT); 1523} 1524 1525int 1526fill_regs(p, regs) 1527 struct proc *p; 1528 struct reg *regs; 1529{ 1530 struct trapframe *tp; 1531 1532 tp = TF_REGP(p); 1533 regs->r_es = tp->tf_es; 1534 regs->r_ds = tp->tf_ds; 1535 regs->r_edi = tp->tf_edi; 1536 regs->r_esi = tp->tf_esi; 1537 regs->r_ebp = tp->tf_ebp; 1538 regs->r_ebx = tp->tf_ebx; 1539 regs->r_edx = tp->tf_edx; 1540 regs->r_ecx = tp->tf_ecx; 1541 regs->r_eax = tp->tf_eax; 1542 regs->r_eip = tp->tf_eip; 1543 regs->r_cs = tp->tf_cs; 1544 regs->r_eflags = tp->tf_eflags; 1545 regs->r_esp = tp->tf_esp; 1546 regs->r_ss = tp->tf_ss; 1547 return (0); 1548} 1549 1550int 1551set_regs(p, regs) 1552 struct proc *p; 1553 struct reg *regs; 1554{ 1555 struct trapframe *tp; 1556 1557 tp = TF_REGP(p); 1558 if (!EFLAGS_SECURE(regs->r_eflags, tp->tf_eflags) || 1559 !CS_SECURE(regs->r_cs)) 1560 return (EINVAL); 1561 tp->tf_es = regs->r_es; 1562 tp->tf_ds = regs->r_ds; 1563 tp->tf_edi = regs->r_edi; 1564 tp->tf_esi = regs->r_esi; 1565 tp->tf_ebp = regs->r_ebp; 1566 tp->tf_ebx = regs->r_ebx; 1567 tp->tf_edx = regs->r_edx; 1568 tp->tf_ecx = regs->r_ecx; 1569 tp->tf_eax = regs->r_eax; 1570 tp->tf_eip = regs->r_eip; 1571 tp->tf_cs = regs->r_cs; 1572 tp->tf_eflags = regs->r_eflags; 1573 tp->tf_esp = regs->r_esp; 1574 tp->tf_ss = regs->r_ss; 1575 return (0); 1576} 1577 1578#ifndef DDB 1579void 1580Debugger(const char *msg) 1581{ 1582 printf("Debugger(\"%s\") called.\n", msg); 1583} 1584#endif /* no DDB */ 1585 1586#include <sys/disklabel.h> 1587#define b_cylin b_resid 1588/* 1589 * Determine the size of the transfer, and make sure it is 1590 * within the boundaries of the partition. Adjust transfer 1591 * if needed, and signal errors or early completion. 1592 */ 1593int 1594bounds_check_with_label(struct buf *bp, struct disklabel *lp, int wlabel) 1595{ 1596 struct partition *p = lp->d_partitions + dkpart(bp->b_dev); 1597 int labelsect = lp->d_partitions[0].p_offset; 1598 int maxsz = p->p_size, 1599 sz = (bp->b_bcount + DEV_BSIZE - 1) >> DEV_BSHIFT; 1600 1601 /* overwriting disk label ? */ 1602 /* XXX should also protect bootstrap in first 8K */ 1603 if (bp->b_blkno + p->p_offset <= LABELSECTOR + labelsect && 1604#if LABELSECTOR != 0 1605 bp->b_blkno + p->p_offset + sz > LABELSECTOR + labelsect && 1606#endif 1607 (bp->b_flags & B_READ) == 0 && wlabel == 0) { 1608 bp->b_error = EROFS; 1609 goto bad; 1610 } 1611 1612#if defined(DOSBBSECTOR) && defined(notyet) 1613 /* overwriting master boot record? */ 1614 if (bp->b_blkno + p->p_offset <= DOSBBSECTOR && 1615 (bp->b_flags & B_READ) == 0 && wlabel == 0) { 1616 bp->b_error = EROFS; 1617 goto bad; 1618 } 1619#endif 1620 1621 /* beyond partition? */ 1622 if (bp->b_blkno < 0 || bp->b_blkno + sz > maxsz) { 1623 /* if exactly at end of disk, return an EOF */ 1624 if (bp->b_blkno == maxsz) { 1625 bp->b_resid = bp->b_bcount; 1626 return(0); 1627 } 1628 /* or truncate if part of it fits */ 1629 sz = maxsz - bp->b_blkno; 1630 if (sz <= 0) { 1631 bp->b_error = EINVAL; 1632 goto bad; 1633 } 1634 bp->b_bcount = sz << DEV_BSHIFT; 1635 } 1636 1637 /* calculate cylinder for disksort to order transfers with */ 1638 bp->b_pblkno = bp->b_blkno + p->p_offset; 1639 bp->b_cylin = bp->b_pblkno / lp->d_secpercyl; 1640 return(1); 1641 1642bad: 1643 bp->b_flags |= B_ERROR; 1644 return(-1); 1645} 1646 1647int 1648disk_externalize(int drive, struct sysctl_req *req) 1649{ 1650 return SYSCTL_OUT(req, &drive, sizeof drive); 1651} 1652