machdep.c revision 12827
1/*- 2 * Copyright (c) 1992 Terrence R. Lambert. 3 * Copyright (c) 1982, 1987, 1990 The Regents of the University of California. 4 * All rights reserved. 5 * 6 * This code is derived from software contributed to Berkeley by 7 * William Jolitz. 8 * 9 * Redistribution and use in source and binary forms, with or without 10 * modification, are permitted provided that the following conditions 11 * are met: 12 * 1. Redistributions of source code must retain the above copyright 13 * notice, this list of conditions and the following disclaimer. 14 * 2. Redistributions in binary form must reproduce the above copyright 15 * notice, this list of conditions and the following disclaimer in the 16 * documentation and/or other materials provided with the distribution. 17 * 3. All advertising materials mentioning features or use of this software 18 * must display the following acknowledgement: 19 * This product includes software developed by the University of 20 * California, Berkeley and its contributors. 21 * 4. Neither the name of the University nor the names of its contributors 22 * may be used to endorse or promote products derived from this software 23 * without specific prior written permission. 24 * 25 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 26 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 27 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 28 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 29 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 30 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 31 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 32 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 33 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 34 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 35 * SUCH DAMAGE. 36 * 37 * from: @(#)machdep.c 7.4 (Berkeley) 6/3/91 38 * $Id: machdep.c,v 1.158 1995/12/13 15:12:23 julian Exp $ 39 */ 40 41#include "npx.h" 42#include "isa.h" 43 44#include <sys/param.h> 45#include <sys/systm.h> 46#include <sys/sysproto.h> 47#include <sys/signalvar.h> 48#include <sys/kernel.h> 49#include <sys/proc.h> 50#include <sys/buf.h> 51#include <sys/reboot.h> 52#include <sys/conf.h> 53#include <sys/file.h> 54#include <sys/callout.h> 55#include <sys/malloc.h> 56#include <sys/mbuf.h> 57#include <sys/mount.h> 58#include <sys/msgbuf.h> 59#include <sys/ioctl.h> 60#include <sys/sysent.h> 61#include <sys/tty.h> 62#include <sys/sysctl.h> 63#include <sys/devconf.h> 64#include <sys/vmmeter.h> 65 66#ifdef SYSVSHM 67#include <sys/shm.h> 68#endif 69 70#ifdef SYSVMSG 71#include <sys/msg.h> 72#endif 73 74#ifdef SYSVSEM 75#include <sys/sem.h> 76#endif 77 78#include <vm/vm.h> 79#include <vm/vm_param.h> 80#include <vm/vm_prot.h> 81#include <vm/lock.h> 82#include <vm/vm_kern.h> 83#include <vm/vm_object.h> 84#include <vm/vm_page.h> 85#include <vm/vm_map.h> 86#include <vm/vm_pager.h> 87#include <vm/vm_extern.h> 88 89#include <sys/user.h> 90#include <sys/exec.h> 91#include <sys/vnode.h> 92 93#include <ddb/ddb.h> 94 95#include <net/netisr.h> 96 97#include <machine/cpu.h> 98#include <machine/npx.h> 99#include <machine/reg.h> 100#include <machine/psl.h> 101#include <machine/clock.h> 102#include <machine/specialreg.h> 103#include <machine/sysarch.h> 104#include <machine/cons.h> 105#include <machine/devconf.h> 106#include <machine/bootinfo.h> 107#include <machine/md_var.h> 108 109#include <i386/isa/isa.h> 110#include <i386/isa/isa_device.h> 111#include <i386/isa/rtc.h> 112#include <machine/random.h> 113 114extern void init386 __P((int first)); 115extern int ptrace_set_pc __P((struct proc *p, unsigned int addr)); 116extern int ptrace_single_step __P((struct proc *p)); 117extern int ptrace_write_u __P((struct proc *p, vm_offset_t off, int data)); 118 119static void cpu_startup __P((void *)); 120SYSINIT(cpu, SI_SUB_CPU, SI_ORDER_FIRST, cpu_startup, NULL) 121 122static void identifycpu(void); 123 124char machine[] = "i386"; 125SYSCTL_STRING(_hw, HW_MACHINE, machine, CTLFLAG_RD, machine, 0, ""); 126 127static char cpu_model[128]; 128SYSCTL_STRING(_hw, HW_MODEL, model, CTLFLAG_RD, cpu_model, 0, ""); 129 130struct kern_devconf kdc_cpu0 = { 131 0, 0, 0, /* filled in by dev_attach */ 132 "cpu", 0, { MDDT_CPU }, 133 0, 0, 0, CPU_EXTERNALLEN, 134 0, /* CPU has no parent */ 135 0, /* no parentdata */ 136 DC_BUSY, /* the CPU is always busy */ 137 cpu_model, /* no sense in duplication */ 138 DC_CLS_CPU /* class */ 139}; 140 141#ifndef PANIC_REBOOT_WAIT_TIME 142#define PANIC_REBOOT_WAIT_TIME 15 /* default to 15 seconds */ 143#endif 144 145#ifdef BOUNCE_BUFFERS 146extern char *bouncememory; 147extern int maxbkva; 148#ifdef BOUNCEPAGES 149int bouncepages = BOUNCEPAGES; 150#else 151int bouncepages = 0; 152#endif 153#endif /* BOUNCE_BUFFERS */ 154 155extern int freebufspace; 156int msgbufmapped = 0; /* set when safe to use msgbuf */ 157int _udatasel, _ucodesel; 158 159 160int physmem = 0; 161 162static int 163sysctl_hw_physmem SYSCTL_HANDLER_ARGS 164{ 165 int error = sysctl_handle_int(oidp, 0, ctob(physmem), req); 166 return (error); 167} 168 169SYSCTL_PROC(_hw, HW_PHYSMEM, physmem, CTLTYPE_INT|CTLFLAG_RD, 170 0, 0, sysctl_hw_physmem, "I", ""); 171 172static int 173sysctl_hw_usermem SYSCTL_HANDLER_ARGS 174{ 175 int error = sysctl_handle_int(oidp, 0, 176 ctob(physmem - cnt.v_wire_count), req); 177 return (error); 178} 179 180SYSCTL_PROC(_hw, HW_USERMEM, usermem, CTLTYPE_INT|CTLFLAG_RD, 181 0, 0, sysctl_hw_usermem, "I", ""); 182 183int boothowto = 0, bootverbose = 0, Maxmem = 0; 184static int badpages = 0; 185long dumplo; 186extern int bootdev; 187 188vm_offset_t phys_avail[10]; 189 190/* must be 2 less so 0 0 can signal end of chunks */ 191#define PHYS_AVAIL_ARRAY_END ((sizeof(phys_avail) / sizeof(vm_offset_t)) - 2) 192 193int cpu_class; 194 195static void dumpsys __P((void)); 196static void setup_netisrs __P((struct linker_set *)); /* XXX declare elsewhere */ 197 198static vm_offset_t buffer_sva, buffer_eva; 199vm_offset_t clean_sva, clean_eva; 200static vm_offset_t pager_sva, pager_eva; 201extern struct linker_set netisr_set; 202 203#define offsetof(type, member) ((size_t)(&((type *)0)->member)) 204 205static void 206cpu_startup(dummy) 207 void *dummy; 208{ 209 register unsigned i; 210 register caddr_t v; 211 vm_offset_t maxaddr; 212 vm_size_t size = 0; 213 int firstaddr; 214 vm_offset_t minaddr; 215 216 if (boothowto & RB_VERBOSE) 217 bootverbose++; 218 219 /* 220 * Initialize error message buffer (at end of core). 221 */ 222 223 /* avail_end was pre-decremented in init_386() to compensate */ 224 for (i = 0; i < btoc(sizeof (struct msgbuf)); i++) 225 pmap_enter(pmap_kernel(), (vm_offset_t)msgbufp, 226 avail_end + i * NBPG, 227 VM_PROT_ALL, TRUE); 228 msgbufmapped = 1; 229 230 /* 231 * Good {morning,afternoon,evening,night}. 232 */ 233 printf(version); 234 startrtclock(); 235 identifycpu(); 236 printf("real memory = %d (%dK bytes)\n", ptoa(Maxmem), ptoa(Maxmem) / 1024); 237 /* 238 * Display any holes after the first chunk of extended memory. 239 */ 240 if (badpages != 0) { 241 int indx = 1; 242 243 /* 244 * XXX skip reporting ISA hole & unmanaged kernel memory 245 */ 246 if (phys_avail[0] == PAGE_SIZE) 247 indx += 2; 248 249 printf("Physical memory hole(s):\n"); 250 for (; phys_avail[indx + 1] != 0; indx += 2) { 251 int size = phys_avail[indx + 1] - phys_avail[indx]; 252 253 printf("0x%08lx - 0x%08lx, %d bytes (%d pages)\n", phys_avail[indx], 254 phys_avail[indx + 1] - 1, size, size / PAGE_SIZE); 255 } 256 } 257 258 /* 259 * Quickly wire in netisrs. 260 */ 261 setup_netisrs(&netisr_set); 262 263/* 264#ifdef ISDN 265 DONET(isdnintr, NETISR_ISDN); 266#endif 267*/ 268 269 /* 270 * Allocate space for system data structures. 271 * The first available kernel virtual address is in "v". 272 * As pages of kernel virtual memory are allocated, "v" is incremented. 273 * As pages of memory are allocated and cleared, 274 * "firstaddr" is incremented. 275 * An index into the kernel page table corresponding to the 276 * virtual memory address maintained in "v" is kept in "mapaddr". 277 */ 278 279 /* 280 * Make two passes. The first pass calculates how much memory is 281 * needed and allocates it. The second pass assigns virtual 282 * addresses to the various data structures. 283 */ 284 firstaddr = 0; 285again: 286 v = (caddr_t)firstaddr; 287 288#define valloc(name, type, num) \ 289 (name) = (type *)v; v = (caddr_t)((name)+(num)) 290#define valloclim(name, type, num, lim) \ 291 (name) = (type *)v; v = (caddr_t)((lim) = ((name)+(num))) 292 valloc(callout, struct callout, ncallout); 293#ifdef SYSVSHM 294 valloc(shmsegs, struct shmid_ds, shminfo.shmmni); 295#endif 296#ifdef SYSVSEM 297 valloc(sema, struct semid_ds, seminfo.semmni); 298 valloc(sem, struct sem, seminfo.semmns); 299 /* This is pretty disgusting! */ 300 valloc(semu, int, (seminfo.semmnu * seminfo.semusz) / sizeof(int)); 301#endif 302#ifdef SYSVMSG 303 valloc(msgpool, char, msginfo.msgmax); 304 valloc(msgmaps, struct msgmap, msginfo.msgseg); 305 valloc(msghdrs, struct msg, msginfo.msgtql); 306 valloc(msqids, struct msqid_ds, msginfo.msgmni); 307#endif 308 309 if (nbuf == 0) { 310 nbuf = 30; 311 if( physmem > 1024) 312 nbuf += min((physmem - 1024) / 12, 1024); 313 } 314 nswbuf = min(nbuf, 128); 315 316 valloc(swbuf, struct buf, nswbuf); 317 valloc(buf, struct buf, nbuf); 318 319#ifdef BOUNCE_BUFFERS 320 /* 321 * If there is more than 16MB of memory, allocate some bounce buffers 322 */ 323 if (Maxmem > 4096) { 324 if (bouncepages == 0) { 325 bouncepages = 64; 326 bouncepages += ((Maxmem - 4096) / 2048) * 32; 327 } 328 v = (caddr_t)((vm_offset_t)((vm_offset_t)v + PAGE_SIZE - 1) & ~(PAGE_SIZE - 1)); 329 valloc(bouncememory, char, bouncepages * PAGE_SIZE); 330 } 331#endif 332 333 /* 334 * End of first pass, size has been calculated so allocate memory 335 */ 336 if (firstaddr == 0) { 337 size = (vm_size_t)(v - firstaddr); 338 firstaddr = (int)kmem_alloc(kernel_map, round_page(size)); 339 if (firstaddr == 0) 340 panic("startup: no room for tables"); 341 goto again; 342 } 343 344 /* 345 * End of second pass, addresses have been assigned 346 */ 347 if ((vm_size_t)(v - firstaddr) != size) 348 panic("startup: table size inconsistency"); 349 350#ifdef BOUNCE_BUFFERS 351 clean_map = kmem_suballoc(kernel_map, &clean_sva, &clean_eva, 352 (nbuf*MAXBSIZE) + (nswbuf*MAXPHYS) + 353 maxbkva + pager_map_size, TRUE); 354 io_map = kmem_suballoc(clean_map, &minaddr, &maxaddr, maxbkva, FALSE); 355#else 356 clean_map = kmem_suballoc(kernel_map, &clean_sva, &clean_eva, 357 (nbuf*MAXBSIZE) + (nswbuf*MAXPHYS) + pager_map_size, TRUE); 358#endif 359 buffer_map = kmem_suballoc(clean_map, &buffer_sva, &buffer_eva, 360 (nbuf*MAXBSIZE), TRUE); 361 pager_map = kmem_suballoc(clean_map, &pager_sva, &pager_eva, 362 (nswbuf*MAXPHYS) + pager_map_size, TRUE); 363 exec_map = kmem_suballoc(kernel_map, &minaddr, &maxaddr, 364 (16*ARG_MAX), TRUE); 365 u_map = kmem_suballoc(kernel_map, &minaddr, &maxaddr, 366 (maxproc*UPAGES*PAGE_SIZE), FALSE); 367 368 /* 369 * Finally, allocate mbuf pool. Since mclrefcnt is an off-size 370 * we use the more space efficient malloc in place of kmem_alloc. 371 */ 372 mclrefcnt = (char *)malloc(nmbclusters+CLBYTES/MCLBYTES, 373 M_MBUF, M_NOWAIT); 374 bzero(mclrefcnt, nmbclusters+CLBYTES/MCLBYTES); 375 mb_map = kmem_suballoc(kmem_map, (vm_offset_t *)&mbutl, &maxaddr, 376 nmbclusters * MCLBYTES, FALSE); 377 /* 378 * Initialize callouts 379 */ 380 callfree = callout; 381 for (i = 1; i < ncallout; i++) 382 callout[i-1].c_next = &callout[i]; 383 384 if (boothowto & RB_CONFIG) { 385 userconfig(); 386 cninit(); /* the preferred console may have changed */ 387 } 388 389#ifdef BOUNCE_BUFFERS 390 /* 391 * init bounce buffers 392 */ 393 vm_bounce_init(); 394#endif 395 /* 396 * XXX allocate a contiguous area for ISA (non busmaster) DMA 397 * operations. This _should_ only be done if the DMA channels 398 * will actually be used, but for now we do it always. 399 */ 400#define DMAPAGES 8 401 isaphysmem = 402 vm_page_alloc_contig(DMAPAGES * PAGE_SIZE, 0, 0xfffffful, 64*1024); 403 404 printf("avail memory = %d (%dK bytes)\n", ptoa(cnt.v_free_count), 405 ptoa(cnt.v_free_count) / 1024); 406 407 /* 408 * Set up buffers, so they can be used to read disk labels. 409 */ 410 bufinit(); 411 vm_pager_bufferinit(); 412 413 /* 414 * In verbose mode, print out the BIOS's idea of the disk geometries. 415 */ 416 if (bootverbose) { 417 printf("BIOS Geometries:\n"); 418 for (i = 0; i < N_BIOS_GEOM; i++) { 419 unsigned long bios_geom; 420 int max_cylinder, max_head, max_sector; 421 422 bios_geom = bootinfo.bi_bios_geom[i]; 423 424 /* 425 * XXX the bootstrap punts a 1200K floppy geometry 426 * when the get-disk-geometry interrupt fails. Skip 427 * drives that have this geometry. 428 */ 429 if (bios_geom == 0x4f010f) 430 continue; 431 432 printf(" %x:%08lx ", i, bios_geom); 433 max_cylinder = bios_geom >> 16; 434 max_head = (bios_geom >> 8) & 0xff; 435 max_sector = bios_geom & 0xff; 436 printf( 437 "0..%d=%d cylinders, 0..%d=%d heads, 1..%d=%d sectors\n", 438 max_cylinder, max_cylinder + 1, 439 max_head, max_head + 1, 440 max_sector, max_sector); 441 } 442 printf(" %d accounted for\n", bootinfo.bi_n_bios_used); 443 } 444} 445 446int 447register_netisr(num, handler) 448 int num; 449 netisr_t *handler; 450{ 451 452 if (num < 0 || num >= (sizeof(netisrs)/sizeof(*netisrs)) ) { 453 printf("register_netisr: bad isr number: %d\n", num); 454 return (EINVAL); 455 } 456 netisrs[num] = handler; 457 return (0); 458} 459 460static void 461setup_netisrs(ls) 462 struct linker_set *ls; 463{ 464 int i; 465 const struct netisrtab *nit; 466 467 for(i = 0; ls->ls_items[i]; i++) { 468 nit = (const struct netisrtab *)ls->ls_items[i]; 469 register_netisr(nit->nit_num, nit->nit_isr); 470 } 471} 472 473static struct cpu_nameclass i386_cpus[] = { 474 { "Intel 80286", CPUCLASS_286 }, /* CPU_286 */ 475 { "i386SX", CPUCLASS_386 }, /* CPU_386SX */ 476 { "i386DX", CPUCLASS_386 }, /* CPU_386 */ 477 { "i486SX", CPUCLASS_486 }, /* CPU_486SX */ 478 { "i486DX", CPUCLASS_486 }, /* CPU_486 */ 479 { "Pentium", CPUCLASS_586 }, /* CPU_586 */ 480 { "Cy486DLC", CPUCLASS_486 }, /* CPU_486DLC */ 481}; 482 483static void 484identifycpu() 485{ 486 printf("CPU: "); 487 if (cpu >= 0 488 && cpu < (sizeof i386_cpus/sizeof(struct cpu_nameclass))) { 489 cpu_class = i386_cpus[cpu].cpu_class; 490 strncpy(cpu_model, i386_cpus[cpu].cpu_name, sizeof cpu_model); 491 } else { 492 printf("unknown cpu type %d\n", cpu); 493 panic("startup: bad cpu id"); 494 } 495 496#if defined(I586_CPU) 497 if(cpu_class == CPUCLASS_586) { 498 calibrate_cyclecounter(); 499 } 500#endif 501#if defined(I486_CPU) || defined(I586_CPU) 502 if (!strcmp(cpu_vendor,"GenuineIntel")) { 503 if ((cpu_id & 0xf00) > 3) { 504 cpu_model[0] = '\0'; 505 506 switch (cpu_id & 0x3000) { 507 case 0x1000: 508 strcpy(cpu_model, "Overdrive "); 509 break; 510 case 0x2000: 511 strcpy(cpu_model, "Dual "); 512 break; 513 } 514 if ((cpu_id & 0xf00) == 0x400) { 515 strcat(cpu_model, "i486 "); 516#if defined(I586_CPU) 517 } else if ((cpu_id & 0xf00) == 0x500) { 518 strcat(cpu_model, "Pentium"); /* nb no space */ 519#endif 520 } else { 521 strcat(cpu_model, "unknown "); 522 } 523 524 switch (cpu_id & 0xff0) { 525 case 0x400: 526 strcat(cpu_model, "DX"); break; 527 case 0x410: 528 strcat(cpu_model, "DX"); break; 529 case 0x420: 530 strcat(cpu_model, "SX"); break; 531 case 0x430: 532 strcat(cpu_model, "DX2"); break; 533 case 0x440: 534 strcat(cpu_model, "SL"); break; 535 case 0x450: 536 strcat(cpu_model, "SX2"); break; 537 case 0x470: 538 strcat(cpu_model, "DX2 Write-Back Enhanced"); 539 break; 540 case 0x480: 541 strcat(cpu_model, "DX4"); break; 542#if defined(I586_CPU) 543 case 0x510: 544 case 0x520: 545 /* 546 * We used to do all sorts of nonsense here 547 * to print out iCOMP numbers. Since these 548 * are meaningless except to Intel 549 * marketroids, there seems to be little 550 * sense in doing so. 551 */ 552 break; 553#endif 554 } 555 } 556 } 557#endif 558 printf("%s (", cpu_model); 559 switch(cpu_class) { 560 case CPUCLASS_286: 561 printf("286"); 562 break; 563#if defined(I386_CPU) 564 case CPUCLASS_386: 565 printf("386"); 566 break; 567#endif 568#if defined(I486_CPU) 569 case CPUCLASS_486: 570 printf("486"); 571 break; 572#endif 573#if defined(I586_CPU) 574 case CPUCLASS_586: 575 printf("%d.%02d-MHz ", 576 ((100 * i586_ctr_rate) >> I586_CTR_RATE_SHIFT) / 100, 577 ((100 * i586_ctr_rate) >> I586_CTR_RATE_SHIFT) % 100); 578 printf("586"); 579 break; 580#endif 581 default: 582 printf("unknown"); /* will panic below... */ 583 } 584 printf("-class CPU)\n"); 585#if defined(I486_CPU) || defined(I586_CPU) 586 if(*cpu_vendor) 587 printf(" Origin = \"%s\"",cpu_vendor); 588 if(cpu_id) 589 printf(" Id = 0x%lx",cpu_id); 590 591 if (!strcmp(cpu_vendor, "GenuineIntel")) { 592 printf(" Stepping=%ld", cpu_id & 0xf); 593 if (cpu_high > 0) { 594#define FEATUREFMT "\020\001FPU\002VME\003PSE\004MCE\005CX8\006APIC" 595 printf("\n Features=0x%b", cpu_feature, FEATUREFMT); 596 } 597 } 598 /* Avoid ugly blank lines: only print newline when we have to. */ 599 if (*cpu_vendor || cpu_id) 600 printf("\n"); 601#endif 602 /* 603 * Now that we have told the user what they have, 604 * let them know if that machine type isn't configured. 605 */ 606 switch (cpu_class) { 607 case CPUCLASS_286: /* a 286 should not make it this far, anyway */ 608#if !defined(I386_CPU) && !defined(I486_CPU) && !defined(I586_CPU) 609#error This kernel is not configured for one of the supported CPUs 610#endif 611#if !defined(I386_CPU) 612 case CPUCLASS_386: 613#endif 614#if !defined(I486_CPU) 615 case CPUCLASS_486: 616#endif 617#if !defined(I586_CPU) 618 case CPUCLASS_586: 619#endif 620 panic("CPU class not configured"); 621 default: 622 break; 623 } 624 dev_attach(&kdc_cpu0); 625} 626 627/* 628 * Send an interrupt to process. 629 * 630 * Stack is set up to allow sigcode stored 631 * in u. to call routine, followed by kcall 632 * to sigreturn routine below. After sigreturn 633 * resets the signal mask, the stack, and the 634 * frame pointer, it returns to the user 635 * specified pc, psl. 636 */ 637void 638sendsig(catcher, sig, mask, code) 639 sig_t catcher; 640 int sig, mask; 641 unsigned code; 642{ 643 register struct proc *p = curproc; 644 register int *regs; 645 register struct sigframe *fp; 646 struct sigframe sf; 647 struct sigacts *psp = p->p_sigacts; 648 int oonstack; 649 650 regs = p->p_md.md_regs; 651 oonstack = psp->ps_sigstk.ss_flags & SA_ONSTACK; 652 /* 653 * Allocate and validate space for the signal handler 654 * context. Note that if the stack is in P0 space, the 655 * call to grow() is a nop, and the useracc() check 656 * will fail if the process has not already allocated 657 * the space with a `brk'. 658 */ 659 if ((psp->ps_flags & SAS_ALTSTACK) && 660 (psp->ps_sigstk.ss_flags & SA_ONSTACK) == 0 && 661 (psp->ps_sigonstack & sigmask(sig))) { 662 fp = (struct sigframe *)(psp->ps_sigstk.ss_sp + 663 psp->ps_sigstk.ss_size - sizeof(struct sigframe)); 664 psp->ps_sigstk.ss_flags |= SA_ONSTACK; 665 } else { 666 fp = (struct sigframe *)(regs[tESP] 667 - sizeof(struct sigframe)); 668 } 669 670 /* 671 * grow() will return FALSE if the fp will not fit inside the stack 672 * and the stack can not be grown. useracc will return FALSE 673 * if access is denied. 674 */ 675 if ((grow(p, (int)fp) == FALSE) || 676 (useracc((caddr_t)fp, sizeof (struct sigframe), B_WRITE) == FALSE)) { 677 /* 678 * Process has trashed its stack; give it an illegal 679 * instruction to halt it in its tracks. 680 */ 681 SIGACTION(p, SIGILL) = SIG_DFL; 682 sig = sigmask(SIGILL); 683 p->p_sigignore &= ~sig; 684 p->p_sigcatch &= ~sig; 685 p->p_sigmask &= ~sig; 686 psignal(p, SIGILL); 687 return; 688 } 689 690 /* 691 * Build the argument list for the signal handler. 692 */ 693 if (p->p_sysent->sv_sigtbl) { 694 if (sig < p->p_sysent->sv_sigsize) 695 sig = p->p_sysent->sv_sigtbl[sig]; 696 else 697 sig = p->p_sysent->sv_sigsize + 1; 698 } 699 sf.sf_signum = sig; 700 sf.sf_code = code; 701 sf.sf_scp = &fp->sf_sc; 702 sf.sf_addr = (char *) regs[tERR]; 703 sf.sf_handler = catcher; 704 705 /* save scratch registers */ 706 sf.sf_sc.sc_eax = regs[tEAX]; 707 sf.sf_sc.sc_ebx = regs[tEBX]; 708 sf.sf_sc.sc_ecx = regs[tECX]; 709 sf.sf_sc.sc_edx = regs[tEDX]; 710 sf.sf_sc.sc_esi = regs[tESI]; 711 sf.sf_sc.sc_edi = regs[tEDI]; 712 sf.sf_sc.sc_cs = regs[tCS]; 713 sf.sf_sc.sc_ds = regs[tDS]; 714 sf.sf_sc.sc_ss = regs[tSS]; 715 sf.sf_sc.sc_es = regs[tES]; 716 sf.sf_sc.sc_isp = regs[tISP]; 717 718 /* 719 * Build the signal context to be used by sigreturn. 720 */ 721 sf.sf_sc.sc_onstack = oonstack; 722 sf.sf_sc.sc_mask = mask; 723 sf.sf_sc.sc_sp = regs[tESP]; 724 sf.sf_sc.sc_fp = regs[tEBP]; 725 sf.sf_sc.sc_pc = regs[tEIP]; 726 sf.sf_sc.sc_ps = regs[tEFLAGS]; 727 728 /* 729 * Copy the sigframe out to the user's stack. 730 */ 731 if (copyout(&sf, fp, sizeof(struct sigframe)) != 0) { 732 /* 733 * Something is wrong with the stack pointer. 734 * ...Kill the process. 735 */ 736 sigexit(p, SIGILL); 737 }; 738 739 regs[tESP] = (int)fp; 740 regs[tEIP] = (int)((struct pcb *)kstack)->pcb_sigc; 741 regs[tEFLAGS] &= ~PSL_VM; 742 regs[tCS] = _ucodesel; 743 regs[tDS] = _udatasel; 744 regs[tES] = _udatasel; 745 regs[tSS] = _udatasel; 746} 747 748/* 749 * System call to cleanup state after a signal 750 * has been taken. Reset signal mask and 751 * stack state from context left by sendsig (above). 752 * Return to previous pc and psl as specified by 753 * context left by sendsig. Check carefully to 754 * make sure that the user has not modified the 755 * state to gain improper privileges. 756 */ 757int 758sigreturn(p, uap, retval) 759 struct proc *p; 760 struct sigreturn_args /* { 761 struct sigcontext *sigcntxp; 762 } */ *uap; 763 int *retval; 764{ 765 register struct sigcontext *scp; 766 register struct sigframe *fp; 767 register int *regs = p->p_md.md_regs; 768 int eflags; 769 770 /* 771 * (XXX old comment) regs[tESP] points to the return address. 772 * The user scp pointer is above that. 773 * The return address is faked in the signal trampoline code 774 * for consistency. 775 */ 776 scp = uap->sigcntxp; 777 fp = (struct sigframe *) 778 ((caddr_t)scp - offsetof(struct sigframe, sf_sc)); 779 780 if (useracc((caddr_t)fp, sizeof (*fp), 0) == 0) 781 return(EINVAL); 782 783 /* 784 * Don't allow users to change privileged or reserved flags. 785 */ 786#define EFLAGS_SECURE(ef, oef) ((((ef) ^ (oef)) & ~PSL_USERCHANGE) == 0) 787 eflags = scp->sc_ps; 788 /* 789 * XXX do allow users to change the privileged flag PSL_RF. The 790 * cpu sets PSL_RF in tf_eflags for faults. Debuggers should 791 * sometimes set it there too. tf_eflags is kept in the signal 792 * context during signal handling and there is no other place 793 * to remember it, so the PSL_RF bit may be corrupted by the 794 * signal handler without us knowing. Corruption of the PSL_RF 795 * bit at worst causes one more or one less debugger trap, so 796 * allowing it is fairly harmless. 797 */ 798 if (!EFLAGS_SECURE(eflags & ~PSL_RF, regs[tEFLAGS] & ~PSL_RF)) { 799#ifdef DEBUG 800 printf("sigreturn: eflags = 0x%x\n", eflags); 801#endif 802 return(EINVAL); 803 } 804 805 /* 806 * Don't allow users to load a valid privileged %cs. Let the 807 * hardware check for invalid selectors, excess privilege in 808 * other selectors, invalid %eip's and invalid %esp's. 809 */ 810#define CS_SECURE(cs) (ISPL(cs) == SEL_UPL) 811 if (!CS_SECURE(scp->sc_cs)) { 812#ifdef DEBUG 813 printf("sigreturn: cs = 0x%x\n", scp->sc_cs); 814#endif 815 trapsignal(p, SIGBUS, T_PROTFLT); 816 return(EINVAL); 817 } 818 819 /* restore scratch registers */ 820 regs[tEAX] = scp->sc_eax; 821 regs[tEBX] = scp->sc_ebx; 822 regs[tECX] = scp->sc_ecx; 823 regs[tEDX] = scp->sc_edx; 824 regs[tESI] = scp->sc_esi; 825 regs[tEDI] = scp->sc_edi; 826 regs[tCS] = scp->sc_cs; 827 regs[tDS] = scp->sc_ds; 828 regs[tES] = scp->sc_es; 829 regs[tSS] = scp->sc_ss; 830 regs[tISP] = scp->sc_isp; 831 832 if (useracc((caddr_t)scp, sizeof (*scp), 0) == 0) 833 return(EINVAL); 834 835 if (scp->sc_onstack & 01) 836 p->p_sigacts->ps_sigstk.ss_flags |= SA_ONSTACK; 837 else 838 p->p_sigacts->ps_sigstk.ss_flags &= ~SA_ONSTACK; 839 p->p_sigmask = scp->sc_mask &~ 840 (sigmask(SIGKILL)|sigmask(SIGCONT)|sigmask(SIGSTOP)); 841 regs[tEBP] = scp->sc_fp; 842 regs[tESP] = scp->sc_sp; 843 regs[tEIP] = scp->sc_pc; 844 regs[tEFLAGS] = eflags; 845 return(EJUSTRETURN); 846} 847 848static int waittime = -1; 849static struct pcb dumppcb; 850 851__dead void 852boot(howto) 853 int howto; 854{ 855 if (!cold && (howto & RB_NOSYNC) == 0 && waittime < 0) { 856 register struct buf *bp; 857 int iter, nbusy; 858 859 waittime = 0; 860 printf("\nsyncing disks... "); 861 862 sync(&proc0, NULL, NULL); 863 864 for (iter = 0; iter < 20; iter++) { 865 nbusy = 0; 866 for (bp = &buf[nbuf]; --bp >= buf; ) { 867 if ((bp->b_flags & (B_BUSY | B_INVAL)) == B_BUSY) { 868 nbusy++; 869 } 870 } 871 if (nbusy == 0) 872 break; 873 printf("%d ", nbusy); 874 DELAY(40000 * iter); 875 } 876 if (nbusy) { 877 /* 878 * Failed to sync all blocks. Indicate this and don't 879 * unmount filesystems (thus forcing an fsck on reboot). 880 */ 881 printf("giving up\n"); 882#ifdef SHOW_BUSYBUFS 883 nbusy = 0; 884 for (bp = &buf[nbuf]; --bp >= buf; ) { 885 if ((bp->b_flags & (B_BUSY | B_INVAL)) == B_BUSY) { 886 nbusy++; 887 printf("%d: dev:%08x, flags:%08x, blkno:%d, lblkno:%d\n", nbusy, bp->b_dev, bp->b_flags, bp->b_blkno, bp->b_lblkno); 888 } 889 } 890 DELAY(5000000); /* 5 seconds */ 891#endif 892 } else { 893 printf("done\n"); 894 /* 895 * Unmount filesystems 896 */ 897 if (panicstr == 0) 898 vfs_unmountall(); 899 } 900 DELAY(100000); /* wait for console output to finish */ 901 dev_shutdownall(FALSE); 902 } 903 splhigh(); 904 if (howto & RB_HALT) { 905 printf("\n"); 906 printf("The operating system has halted.\n"); 907 printf("Please press any key to reboot.\n\n"); 908 cngetc(); 909 } else { 910 if (howto & RB_DUMP) { 911 if (!cold) { 912 savectx(&dumppcb, 0); 913 dumppcb.pcb_ptd = rcr3(); 914 dumpsys(); 915 } 916 917 if (PANIC_REBOOT_WAIT_TIME != 0) { 918 if (PANIC_REBOOT_WAIT_TIME != -1) { 919 int loop; 920 printf("Automatic reboot in %d seconds - press a key on the console to abort\n", 921 PANIC_REBOOT_WAIT_TIME); 922 for (loop = PANIC_REBOOT_WAIT_TIME * 10; loop > 0; --loop) { 923 DELAY(1000 * 100); /* 1/10th second */ 924 if (cncheckc()) /* Did user type a key? */ 925 break; 926 } 927 if (!loop) 928 goto die; 929 } 930 } else { /* zero time specified - reboot NOW */ 931 goto die; 932 } 933 printf("--> Press a key on the console to reboot <--\n"); 934 cngetc(); 935 } 936 } 937die: 938 printf("Rebooting...\n"); 939 DELAY(1000000); /* wait 1 sec for printf's to complete and be read */ 940 cpu_reset(); 941 for(;;) ; 942 /* NOTREACHED */ 943} 944 945/* 946 * Magic number for savecore 947 * 948 * exported (symorder) and used at least by savecore(8) 949 * 950 */ 951u_long dumpmag = 0x8fca0101UL; 952 953static int dumpsize = 0; /* also for savecore */ 954 955static int dodump = 1; 956SYSCTL_INT(_machdep, OID_AUTO, do_dump, CTLFLAG_RW, &dodump, 0, ""); 957 958/* 959 * Doadump comes here after turning off memory management and 960 * getting on the dump stack, either when called above, or by 961 * the auto-restart code. 962 */ 963static void 964dumpsys() 965{ 966 967 if (!dodump) 968 return; 969 if (dumpdev == NODEV) 970 return; 971 if ((minor(dumpdev)&07) != 1) 972 return; 973 dumpsize = Maxmem; 974 printf("\ndumping to dev %lx, offset %ld\n", dumpdev, dumplo); 975 printf("dump "); 976 switch ((*bdevsw[major(dumpdev)]->d_dump)(dumpdev)) { 977 978 case ENXIO: 979 printf("device bad\n"); 980 break; 981 982 case EFAULT: 983 printf("device not ready\n"); 984 break; 985 986 case EINVAL: 987 printf("area improper\n"); 988 break; 989 990 case EIO: 991 printf("i/o error\n"); 992 break; 993 994 case EINTR: 995 printf("aborted from console\n"); 996 break; 997 998 default: 999 printf("succeeded\n"); 1000 break; 1001 } 1002} 1003 1004/* 1005 * Clear registers on exec 1006 */ 1007void 1008setregs(p, entry, stack) 1009 struct proc *p; 1010 u_long entry; 1011 u_long stack; 1012{ 1013 int *regs = p->p_md.md_regs; 1014 1015 bzero(regs, sizeof(struct trapframe)); 1016 regs[tEIP] = entry; 1017 regs[tESP] = stack; 1018 regs[tEFLAGS] = PSL_USER | (regs[tEFLAGS] & PSL_T); 1019 regs[tSS] = _udatasel; 1020 regs[tDS] = _udatasel; 1021 regs[tES] = _udatasel; 1022 regs[tCS] = _ucodesel; 1023 1024 p->p_addr->u_pcb.pcb_flags = 0; /* no fp at all */ 1025 load_cr0(rcr0() | CR0_TS); /* start emulating */ 1026#if NNPX > 0 1027 npxinit(__INITIAL_NPXCW__); 1028#endif /* NNPX > 0 */ 1029} 1030 1031static int 1032sysctl_machdep_adjkerntz SYSCTL_HANDLER_ARGS 1033{ 1034 int error; 1035 error = sysctl_handle_int(oidp, oidp->oid_arg1, oidp->oid_arg2, 1036 req); 1037 if (!error && req->newptr) 1038 resettodr(); 1039 return (error); 1040} 1041 1042SYSCTL_PROC(_machdep, CPU_ADJKERNTZ, adjkerntz, CTLTYPE_INT|CTLFLAG_RW, 1043 &adjkerntz, 0, sysctl_machdep_adjkerntz, "I", ""); 1044 1045SYSCTL_INT(_machdep, CPU_DISRTCSET, disable_rtc_set, 1046 CTLFLAG_RW, &disable_rtc_set, 0, ""); 1047 1048SYSCTL_STRUCT(_machdep, CPU_BOOTINFO, bootinfo, 1049 CTLFLAG_RD, &bootinfo, bootinfo, ""); 1050 1051/* 1052 * Initialize 386 and configure to run kernel 1053 */ 1054 1055/* 1056 * Initialize segments & interrupt table 1057 */ 1058 1059int currentldt; 1060int _default_ldt; 1061union descriptor gdt[NGDT]; /* global descriptor table */ 1062struct gate_descriptor idt[NIDT]; /* interrupt descriptor table */ 1063union descriptor ldt[NLDT]; /* local descriptor table */ 1064 1065static struct i386tss tss, panic_tss; 1066 1067extern struct user *proc0paddr; 1068 1069/* software prototypes -- in more palatable form */ 1070struct soft_segment_descriptor gdt_segs[] = { 1071/* GNULL_SEL 0 Null Descriptor */ 1072{ 0x0, /* segment base address */ 1073 0x0, /* length */ 1074 0, /* segment type */ 1075 0, /* segment descriptor priority level */ 1076 0, /* segment descriptor present */ 1077 0, 0, 1078 0, /* default 32 vs 16 bit size */ 1079 0 /* limit granularity (byte/page units)*/ }, 1080/* GCODE_SEL 1 Code Descriptor for kernel */ 1081{ 0x0, /* segment base address */ 1082 0xfffff, /* length - all address space */ 1083 SDT_MEMERA, /* segment type */ 1084 0, /* segment descriptor priority level */ 1085 1, /* segment descriptor present */ 1086 0, 0, 1087 1, /* default 32 vs 16 bit size */ 1088 1 /* limit granularity (byte/page units)*/ }, 1089/* GDATA_SEL 2 Data Descriptor for kernel */ 1090{ 0x0, /* segment base address */ 1091 0xfffff, /* length - all address space */ 1092 SDT_MEMRWA, /* segment type */ 1093 0, /* segment descriptor priority level */ 1094 1, /* segment descriptor present */ 1095 0, 0, 1096 1, /* default 32 vs 16 bit size */ 1097 1 /* limit granularity (byte/page units)*/ }, 1098/* GLDT_SEL 3 LDT Descriptor */ 1099{ (int) ldt, /* segment base address */ 1100 sizeof(ldt)-1, /* length - all address space */ 1101 SDT_SYSLDT, /* segment type */ 1102 0, /* segment descriptor priority level */ 1103 1, /* segment descriptor present */ 1104 0, 0, 1105 0, /* unused - default 32 vs 16 bit size */ 1106 0 /* limit granularity (byte/page units)*/ }, 1107/* GTGATE_SEL 4 Null Descriptor - Placeholder */ 1108{ 0x0, /* segment base address */ 1109 0x0, /* length - all address space */ 1110 0, /* segment type */ 1111 0, /* segment descriptor priority level */ 1112 0, /* segment descriptor present */ 1113 0, 0, 1114 0, /* default 32 vs 16 bit size */ 1115 0 /* limit granularity (byte/page units)*/ }, 1116/* GPANIC_SEL 5 Panic Tss Descriptor */ 1117{ (int) &panic_tss, /* segment base address */ 1118 sizeof(tss)-1, /* length - all address space */ 1119 SDT_SYS386TSS, /* segment type */ 1120 0, /* segment descriptor priority level */ 1121 1, /* segment descriptor present */ 1122 0, 0, 1123 0, /* unused - default 32 vs 16 bit size */ 1124 0 /* limit granularity (byte/page units)*/ }, 1125/* GPROC0_SEL 6 Proc 0 Tss Descriptor */ 1126{ (int) kstack, /* segment base address */ 1127 sizeof(tss)-1, /* length - all address space */ 1128 SDT_SYS386TSS, /* segment type */ 1129 0, /* segment descriptor priority level */ 1130 1, /* segment descriptor present */ 1131 0, 0, 1132 0, /* unused - default 32 vs 16 bit size */ 1133 0 /* limit granularity (byte/page units)*/ }, 1134/* GUSERLDT_SEL 7 User LDT Descriptor per process */ 1135{ (int) ldt, /* segment base address */ 1136 (512 * sizeof(union descriptor)-1), /* length */ 1137 SDT_SYSLDT, /* segment type */ 1138 0, /* segment descriptor priority level */ 1139 1, /* segment descriptor present */ 1140 0, 0, 1141 0, /* unused - default 32 vs 16 bit size */ 1142 0 /* limit granularity (byte/page units)*/ }, 1143/* GAPMCODE32_SEL 8 APM BIOS 32-bit interface (32bit Code) */ 1144{ 0, /* segment base address (overwritten by APM) */ 1145 0xfffff, /* length */ 1146 SDT_MEMERA, /* segment type */ 1147 0, /* segment descriptor priority level */ 1148 1, /* segment descriptor present */ 1149 0, 0, 1150 1, /* default 32 vs 16 bit size */ 1151 1 /* limit granularity (byte/page units)*/ }, 1152/* GAPMCODE16_SEL 9 APM BIOS 32-bit interface (16bit Code) */ 1153{ 0, /* segment base address (overwritten by APM) */ 1154 0xfffff, /* length */ 1155 SDT_MEMERA, /* segment type */ 1156 0, /* segment descriptor priority level */ 1157 1, /* segment descriptor present */ 1158 0, 0, 1159 0, /* default 32 vs 16 bit size */ 1160 1 /* limit granularity (byte/page units)*/ }, 1161/* GAPMDATA_SEL 10 APM BIOS 32-bit interface (Data) */ 1162{ 0, /* segment base address (overwritten by APM) */ 1163 0xfffff, /* length */ 1164 SDT_MEMRWA, /* segment type */ 1165 0, /* segment descriptor priority level */ 1166 1, /* segment descriptor present */ 1167 0, 0, 1168 1, /* default 32 vs 16 bit size */ 1169 1 /* limit granularity (byte/page units)*/ }, 1170}; 1171 1172static struct soft_segment_descriptor ldt_segs[] = { 1173 /* Null Descriptor - overwritten by call gate */ 1174{ 0x0, /* segment base address */ 1175 0x0, /* length - all address space */ 1176 0, /* segment type */ 1177 0, /* segment descriptor priority level */ 1178 0, /* segment descriptor present */ 1179 0, 0, 1180 0, /* default 32 vs 16 bit size */ 1181 0 /* limit granularity (byte/page units)*/ }, 1182 /* Null Descriptor - overwritten by call gate */ 1183{ 0x0, /* segment base address */ 1184 0x0, /* length - all address space */ 1185 0, /* segment type */ 1186 0, /* segment descriptor priority level */ 1187 0, /* segment descriptor present */ 1188 0, 0, 1189 0, /* default 32 vs 16 bit size */ 1190 0 /* limit granularity (byte/page units)*/ }, 1191 /* Null Descriptor - overwritten by call gate */ 1192{ 0x0, /* segment base address */ 1193 0x0, /* length - all address space */ 1194 0, /* segment type */ 1195 0, /* segment descriptor priority level */ 1196 0, /* segment descriptor present */ 1197 0, 0, 1198 0, /* default 32 vs 16 bit size */ 1199 0 /* limit granularity (byte/page units)*/ }, 1200 /* Code Descriptor for user */ 1201{ 0x0, /* segment base address */ 1202 0xfffff, /* length - all address space */ 1203 SDT_MEMERA, /* segment type */ 1204 SEL_UPL, /* segment descriptor priority level */ 1205 1, /* segment descriptor present */ 1206 0, 0, 1207 1, /* default 32 vs 16 bit size */ 1208 1 /* limit granularity (byte/page units)*/ }, 1209 /* Data Descriptor for user */ 1210{ 0x0, /* segment base address */ 1211 0xfffff, /* length - all address space */ 1212 SDT_MEMRWA, /* segment type */ 1213 SEL_UPL, /* segment descriptor priority level */ 1214 1, /* segment descriptor present */ 1215 0, 0, 1216 1, /* default 32 vs 16 bit size */ 1217 1 /* limit granularity (byte/page units)*/ }, 1218}; 1219 1220void 1221setidt(idx, func, typ, dpl) 1222 int idx; 1223 inthand_t *func; 1224 int typ; 1225 int dpl; 1226{ 1227 struct gate_descriptor *ip = idt + idx; 1228 1229 ip->gd_looffset = (int)func; 1230 ip->gd_selector = 8; 1231 ip->gd_stkcpy = 0; 1232 ip->gd_xx = 0; 1233 ip->gd_type = typ; 1234 ip->gd_dpl = dpl; 1235 ip->gd_p = 1; 1236 ip->gd_hioffset = ((int)func)>>16 ; 1237} 1238 1239#define IDTVEC(name) __CONCAT(X,name) 1240 1241extern inthand_t 1242 IDTVEC(div), IDTVEC(dbg), IDTVEC(nmi), IDTVEC(bpt), IDTVEC(ofl), 1243 IDTVEC(bnd), IDTVEC(ill), IDTVEC(dna), IDTVEC(dble), IDTVEC(fpusegm), 1244 IDTVEC(tss), IDTVEC(missing), IDTVEC(stk), IDTVEC(prot), 1245 IDTVEC(page), IDTVEC(rsvd), IDTVEC(fpu), IDTVEC(align), 1246 IDTVEC(syscall); 1247 1248#if defined(COMPAT_LINUX) || defined(LINUX) 1249extern inthand_t 1250 IDTVEC(linux_syscall); 1251#endif 1252 1253void 1254sdtossd(sd, ssd) 1255 struct segment_descriptor *sd; 1256 struct soft_segment_descriptor *ssd; 1257{ 1258 ssd->ssd_base = (sd->sd_hibase << 24) | sd->sd_lobase; 1259 ssd->ssd_limit = (sd->sd_hilimit << 16) | sd->sd_lolimit; 1260 ssd->ssd_type = sd->sd_type; 1261 ssd->ssd_dpl = sd->sd_dpl; 1262 ssd->ssd_p = sd->sd_p; 1263 ssd->ssd_def32 = sd->sd_def32; 1264 ssd->ssd_gran = sd->sd_gran; 1265} 1266 1267void 1268init386(first) 1269 int first; 1270{ 1271 int x; 1272 unsigned biosbasemem, biosextmem; 1273 struct gate_descriptor *gdp; 1274 int gsel_tss; 1275 /* table descriptors - used to load tables by microp */ 1276 struct region_descriptor r_gdt, r_idt; 1277 int pagesinbase, pagesinext; 1278 int target_page, pa_indx; 1279 1280 proc0.p_addr = proc0paddr; 1281 1282 /* 1283 * Initialize the console before we print anything out. 1284 */ 1285 cninit(); 1286 1287 /* 1288 * make gdt memory segments, the code segment goes up to end of the 1289 * page with etext in it, the data segment goes to the end of 1290 * the address space 1291 */ 1292 /* 1293 * XXX text protection is temporarily (?) disabled. The limit was 1294 * i386_btop(i386_round_page(etext)) - 1. 1295 */ 1296 gdt_segs[GCODE_SEL].ssd_limit = i386_btop(0) - 1; 1297 gdt_segs[GDATA_SEL].ssd_limit = i386_btop(0) - 1; 1298 for (x = 0; x < NGDT; x++) 1299 ssdtosd(&gdt_segs[x], &gdt[x].sd); 1300 1301 /* make ldt memory segments */ 1302 /* 1303 * The data segment limit must not cover the user area because we 1304 * don't want the user area to be writable in copyout() etc. (page 1305 * level protection is lost in kernel mode on 386's). Also, we 1306 * don't want the user area to be writable directly (page level 1307 * protection of the user area is not available on 486's with 1308 * CR0_WP set, because there is no user-read/kernel-write mode). 1309 * 1310 * XXX - VM_MAXUSER_ADDRESS is an end address, not a max. And it 1311 * should be spelled ...MAX_USER... 1312 */ 1313#define VM_END_USER_RW_ADDRESS VM_MAXUSER_ADDRESS 1314 /* 1315 * The code segment limit has to cover the user area until we move 1316 * the signal trampoline out of the user area. This is safe because 1317 * the code segment cannot be written to directly. 1318 */ 1319#define VM_END_USER_R_ADDRESS (VM_END_USER_RW_ADDRESS + UPAGES * NBPG) 1320 ldt_segs[LUCODE_SEL].ssd_limit = i386_btop(VM_END_USER_R_ADDRESS) - 1; 1321 ldt_segs[LUDATA_SEL].ssd_limit = i386_btop(VM_END_USER_RW_ADDRESS) - 1; 1322 /* Note. eventually want private ldts per process */ 1323 for (x = 0; x < NLDT; x++) 1324 ssdtosd(&ldt_segs[x], &ldt[x].sd); 1325 1326 /* exceptions */ 1327 for (x = 0; x < NIDT; x++) 1328 setidt(x, &IDTVEC(rsvd), SDT_SYS386TGT, SEL_KPL); 1329 setidt(0, &IDTVEC(div), SDT_SYS386TGT, SEL_KPL); 1330 setidt(1, &IDTVEC(dbg), SDT_SYS386TGT, SEL_KPL); 1331 setidt(2, &IDTVEC(nmi), SDT_SYS386TGT, SEL_KPL); 1332 setidt(3, &IDTVEC(bpt), SDT_SYS386TGT, SEL_UPL); 1333 setidt(4, &IDTVEC(ofl), SDT_SYS386TGT, SEL_UPL); 1334 setidt(5, &IDTVEC(bnd), SDT_SYS386TGT, SEL_KPL); 1335 setidt(6, &IDTVEC(ill), SDT_SYS386TGT, SEL_KPL); 1336 setidt(7, &IDTVEC(dna), SDT_SYS386TGT, SEL_KPL); 1337 setidt(8, &IDTVEC(dble), SDT_SYS386TGT, SEL_KPL); 1338 setidt(9, &IDTVEC(fpusegm), SDT_SYS386TGT, SEL_KPL); 1339 setidt(10, &IDTVEC(tss), SDT_SYS386TGT, SEL_KPL); 1340 setidt(11, &IDTVEC(missing), SDT_SYS386TGT, SEL_KPL); 1341 setidt(12, &IDTVEC(stk), SDT_SYS386TGT, SEL_KPL); 1342 setidt(13, &IDTVEC(prot), SDT_SYS386TGT, SEL_KPL); 1343 setidt(14, &IDTVEC(page), SDT_SYS386TGT, SEL_KPL); 1344 setidt(15, &IDTVEC(rsvd), SDT_SYS386TGT, SEL_KPL); 1345 setidt(16, &IDTVEC(fpu), SDT_SYS386TGT, SEL_KPL); 1346 setidt(17, &IDTVEC(align), SDT_SYS386TGT, SEL_KPL); 1347#if defined(COMPAT_LINUX) || defined(LINUX) 1348 setidt(0x80, &IDTVEC(linux_syscall), SDT_SYS386TGT, SEL_UPL); 1349#endif 1350 1351#include "isa.h" 1352#if NISA >0 1353 isa_defaultirq(); 1354#endif 1355 rand_initialize(); 1356 1357 r_gdt.rd_limit = sizeof(gdt) - 1; 1358 r_gdt.rd_base = (int) gdt; 1359 lgdt(&r_gdt); 1360 1361 r_idt.rd_limit = sizeof(idt) - 1; 1362 r_idt.rd_base = (int) idt; 1363 lidt(&r_idt); 1364 1365 _default_ldt = GSEL(GLDT_SEL, SEL_KPL); 1366 lldt(_default_ldt); 1367 currentldt = _default_ldt; 1368 1369#ifdef DDB 1370 kdb_init(); 1371 if (boothowto & RB_KDB) 1372 Debugger("Boot flags requested debugger"); 1373#endif 1374 1375 /* Use BIOS values stored in RTC CMOS RAM, since probing 1376 * breaks certain 386 AT relics. 1377 */ 1378 biosbasemem = rtcin(RTC_BASELO)+ (rtcin(RTC_BASEHI)<<8); 1379 biosextmem = rtcin(RTC_EXTLO)+ (rtcin(RTC_EXTHI)<<8); 1380 1381 /* 1382 * Print a warning if the official BIOS interface disagrees 1383 * with the hackish interface used above. Eventually only 1384 * the official interface should be used. 1385 */ 1386 if (bootinfo.bi_memsizes_valid) { 1387 if (bootinfo.bi_basemem != biosbasemem) 1388 printf("BIOS basemem (%ldK) != RTC basemem (%dK)\n", 1389 bootinfo.bi_basemem, biosbasemem); 1390 if (bootinfo.bi_extmem != biosextmem) 1391 printf("BIOS extmem (%ldK) != RTC extmem (%dK)\n", 1392 bootinfo.bi_extmem, biosextmem); 1393 } 1394 1395 /* 1396 * If BIOS tells us that it has more than 640k in the basemem, 1397 * don't believe it - set it to 640k. 1398 */ 1399 if (biosbasemem > 640) 1400 biosbasemem = 640; 1401 1402 /* 1403 * Some 386 machines might give us a bogus number for extended 1404 * mem. If this happens, stop now. 1405 */ 1406#ifndef LARGEMEM 1407 if (biosextmem > 65536) { 1408 panic("extended memory beyond limit of 64MB"); 1409 /* NOTREACHED */ 1410 } 1411#endif 1412 1413 pagesinbase = biosbasemem * 1024 / NBPG; 1414 pagesinext = biosextmem * 1024 / NBPG; 1415 1416 /* 1417 * Special hack for chipsets that still remap the 384k hole when 1418 * there's 16MB of memory - this really confuses people that 1419 * are trying to use bus mastering ISA controllers with the 1420 * "16MB limit"; they only have 16MB, but the remapping puts 1421 * them beyond the limit. 1422 */ 1423 /* 1424 * If extended memory is between 15-16MB (16-17MB phys address range), 1425 * chop it to 15MB. 1426 */ 1427 if ((pagesinext > 3840) && (pagesinext < 4096)) 1428 pagesinext = 3840; 1429 1430 /* 1431 * Maxmem isn't the "maximum memory", it's one larger than the 1432 * highest page of of the physical address space. It 1433 */ 1434 Maxmem = pagesinext + 0x100000/PAGE_SIZE; 1435 1436#ifdef MAXMEM 1437 Maxmem = MAXMEM/4; 1438#endif 1439 1440 /* call pmap initialization to make new kernel address space */ 1441 pmap_bootstrap (first, 0); 1442 1443 /* 1444 * Size up each available chunk of physical memory. 1445 */ 1446 1447 /* 1448 * We currently don't bother testing base memory. 1449 * XXX ...but we probably should. 1450 */ 1451 pa_indx = 0; 1452 badpages = 0; 1453 if (pagesinbase > 1) { 1454 phys_avail[pa_indx++] = PAGE_SIZE; /* skip first page of memory */ 1455 phys_avail[pa_indx] = ptoa(pagesinbase);/* memory up to the ISA hole */ 1456 physmem = pagesinbase - 1; 1457 } else { 1458 /* point at first chunk end */ 1459 pa_indx++; 1460 } 1461 1462 for (target_page = avail_start; target_page < ptoa(Maxmem); target_page += PAGE_SIZE) { 1463 int tmp, page_bad = FALSE; 1464 1465 /* 1466 * map page into kernel: valid, read/write, non-cacheable 1467 */ 1468 *(int *)CMAP1 = PG_V | PG_KW | PG_N | target_page; 1469 pmap_update(); 1470 1471 tmp = *(int *)CADDR1; 1472 /* 1473 * Test for alternating 1's and 0's 1474 */ 1475 *(int *)CADDR1 = 0xaaaaaaaa; 1476 if (*(int *)CADDR1 != 0xaaaaaaaa) { 1477 page_bad = TRUE; 1478 } 1479 /* 1480 * Test for alternating 0's and 1's 1481 */ 1482 *(int *)CADDR1 = 0x55555555; 1483 if (*(int *)CADDR1 != 0x55555555) { 1484 page_bad = TRUE; 1485 } 1486 /* 1487 * Test for all 1's 1488 */ 1489 *(int *)CADDR1 = 0xffffffff; 1490 if (*(int *)CADDR1 != 0xffffffff) { 1491 page_bad = TRUE; 1492 } 1493 /* 1494 * Test for all 0's 1495 */ 1496 *(int *)CADDR1 = 0x0; 1497 if (*(int *)CADDR1 != 0x0) { 1498 /* 1499 * test of page failed 1500 */ 1501 page_bad = TRUE; 1502 } 1503 /* 1504 * Restore original value. 1505 */ 1506 *(int *)CADDR1 = tmp; 1507 1508 /* 1509 * Adjust array of valid/good pages. 1510 */ 1511 if (page_bad == FALSE) { 1512 /* 1513 * If this good page is a continuation of the 1514 * previous set of good pages, then just increase 1515 * the end pointer. Otherwise start a new chunk. 1516 * Note that "end" points one higher than end, 1517 * making the range >= start and < end. 1518 */ 1519 if (phys_avail[pa_indx] == target_page) { 1520 phys_avail[pa_indx] += PAGE_SIZE; 1521 } else { 1522 pa_indx++; 1523 if (pa_indx == PHYS_AVAIL_ARRAY_END) { 1524 printf("Too many holes in the physical address space, giving up\n"); 1525 pa_indx--; 1526 break; 1527 } 1528 phys_avail[pa_indx++] = target_page; /* start */ 1529 phys_avail[pa_indx] = target_page + PAGE_SIZE; /* end */ 1530 } 1531 physmem++; 1532 } else { 1533 badpages++; 1534 page_bad = FALSE; 1535 } 1536 } 1537 1538 *(int *)CMAP1 = 0; 1539 pmap_update(); 1540 1541 /* 1542 * XXX 1543 * The last chunk must contain at least one page plus the message 1544 * buffer to avoid complicating other code (message buffer address 1545 * calculation, etc.). 1546 */ 1547 while (phys_avail[pa_indx - 1] + PAGE_SIZE + 1548 round_page(sizeof(struct msgbuf)) >= phys_avail[pa_indx]) { 1549 physmem -= atop(phys_avail[pa_indx] - phys_avail[pa_indx - 1]); 1550 phys_avail[pa_indx--] = 0; 1551 phys_avail[pa_indx--] = 0; 1552 } 1553 1554 Maxmem = atop(phys_avail[pa_indx]); 1555 1556 /* Trim off space for the message buffer. */ 1557 phys_avail[pa_indx] -= round_page(sizeof(struct msgbuf)); 1558 1559 avail_end = phys_avail[pa_indx]; 1560 1561 /* now running on new page tables, configured,and u/iom is accessible */ 1562 1563 /* make a initial tss so microp can get interrupt stack on syscall! */ 1564 proc0.p_addr->u_pcb.pcb_tss.tss_esp0 = (int) kstack + UPAGES*NBPG; 1565 proc0.p_addr->u_pcb.pcb_tss.tss_ss0 = GSEL(GDATA_SEL, SEL_KPL) ; 1566 gsel_tss = GSEL(GPROC0_SEL, SEL_KPL); 1567 1568 ((struct i386tss *)gdt_segs[GPROC0_SEL].ssd_base)->tss_ioopt = 1569 (sizeof(tss))<<16; 1570 1571 ltr(gsel_tss); 1572 1573 /* make a call gate to reenter kernel with */ 1574 gdp = &ldt[LSYS5CALLS_SEL].gd; 1575 1576 x = (int) &IDTVEC(syscall); 1577 gdp->gd_looffset = x++; 1578 gdp->gd_selector = GSEL(GCODE_SEL,SEL_KPL); 1579 gdp->gd_stkcpy = 1; 1580 gdp->gd_type = SDT_SYS386CGT; 1581 gdp->gd_dpl = SEL_UPL; 1582 gdp->gd_p = 1; 1583 gdp->gd_hioffset = ((int) &IDTVEC(syscall)) >>16; 1584 1585 /* transfer to user mode */ 1586 1587 _ucodesel = LSEL(LUCODE_SEL, SEL_UPL); 1588 _udatasel = LSEL(LUDATA_SEL, SEL_UPL); 1589 1590 /* setup proc 0's pcb */ 1591 bcopy(&sigcode, proc0.p_addr->u_pcb.pcb_sigc, szsigcode); 1592 proc0.p_addr->u_pcb.pcb_flags = 0; 1593 proc0.p_addr->u_pcb.pcb_ptd = IdlePTD; 1594} 1595 1596/* 1597 * The registers are in the frame; the frame is in the user area of 1598 * the process in question; when the process is active, the registers 1599 * are in "the kernel stack"; when it's not, they're still there, but 1600 * things get flipped around. So, since p->p_md.md_regs is the whole address 1601 * of the register set, take its offset from the kernel stack, and 1602 * index into the user block. Don't you just *love* virtual memory? 1603 * (I'm starting to think seymour is right...) 1604 */ 1605#define TF_REGP(p) ((struct trapframe *) \ 1606 ((char *)(p)->p_addr \ 1607 + ((char *)(p)->p_md.md_regs - kstack))) 1608 1609int 1610ptrace_set_pc(p, addr) 1611 struct proc *p; 1612 unsigned int addr; 1613{ 1614 TF_REGP(p)->tf_eip = addr; 1615 return (0); 1616} 1617 1618int 1619ptrace_single_step(p) 1620 struct proc *p; 1621{ 1622 TF_REGP(p)->tf_eflags |= PSL_T; 1623 return (0); 1624} 1625 1626int ptrace_write_u(p, off, data) 1627 struct proc *p; 1628 vm_offset_t off; 1629 int data; 1630{ 1631 struct trapframe frame_copy; 1632 vm_offset_t min; 1633 struct trapframe *tp; 1634 1635 /* 1636 * Privileged kernel state is scattered all over the user area. 1637 * Only allow write access to parts of regs and to fpregs. 1638 */ 1639 min = (char *)p->p_md.md_regs - kstack; 1640 if (off >= min && off <= min + sizeof(struct trapframe) - sizeof(int)) { 1641 tp = TF_REGP(p); 1642 frame_copy = *tp; 1643 *(int *)((char *)&frame_copy + (off - min)) = data; 1644 if (!EFLAGS_SECURE(frame_copy.tf_eflags, tp->tf_eflags) || 1645 !CS_SECURE(frame_copy.tf_cs)) 1646 return (EINVAL); 1647 *(int*)((char *)p->p_addr + off) = data; 1648 return (0); 1649 } 1650 min = offsetof(struct user, u_pcb) + offsetof(struct pcb, pcb_savefpu); 1651 if (off >= min && off <= min + sizeof(struct save87) - sizeof(int)) { 1652 *(int*)((char *)p->p_addr + off) = data; 1653 return (0); 1654 } 1655 return (EFAULT); 1656} 1657 1658int 1659fill_regs(p, regs) 1660 struct proc *p; 1661 struct reg *regs; 1662{ 1663 struct trapframe *tp; 1664 1665 tp = TF_REGP(p); 1666 regs->r_es = tp->tf_es; 1667 regs->r_ds = tp->tf_ds; 1668 regs->r_edi = tp->tf_edi; 1669 regs->r_esi = tp->tf_esi; 1670 regs->r_ebp = tp->tf_ebp; 1671 regs->r_ebx = tp->tf_ebx; 1672 regs->r_edx = tp->tf_edx; 1673 regs->r_ecx = tp->tf_ecx; 1674 regs->r_eax = tp->tf_eax; 1675 regs->r_eip = tp->tf_eip; 1676 regs->r_cs = tp->tf_cs; 1677 regs->r_eflags = tp->tf_eflags; 1678 regs->r_esp = tp->tf_esp; 1679 regs->r_ss = tp->tf_ss; 1680 return (0); 1681} 1682 1683int 1684set_regs(p, regs) 1685 struct proc *p; 1686 struct reg *regs; 1687{ 1688 struct trapframe *tp; 1689 1690 tp = TF_REGP(p); 1691 if (!EFLAGS_SECURE(regs->r_eflags, tp->tf_eflags) || 1692 !CS_SECURE(regs->r_cs)) 1693 return (EINVAL); 1694 tp->tf_es = regs->r_es; 1695 tp->tf_ds = regs->r_ds; 1696 tp->tf_edi = regs->r_edi; 1697 tp->tf_esi = regs->r_esi; 1698 tp->tf_ebp = regs->r_ebp; 1699 tp->tf_ebx = regs->r_ebx; 1700 tp->tf_edx = regs->r_edx; 1701 tp->tf_ecx = regs->r_ecx; 1702 tp->tf_eax = regs->r_eax; 1703 tp->tf_eip = regs->r_eip; 1704 tp->tf_cs = regs->r_cs; 1705 tp->tf_eflags = regs->r_eflags; 1706 tp->tf_esp = regs->r_esp; 1707 tp->tf_ss = regs->r_ss; 1708 return (0); 1709} 1710 1711#ifndef DDB 1712void 1713Debugger(const char *msg) 1714{ 1715 printf("Debugger(\"%s\") called.\n", msg); 1716} 1717#endif /* no DDB */ 1718 1719#include <sys/disklabel.h> 1720#define b_cylin b_resid 1721/* 1722 * Determine the size of the transfer, and make sure it is 1723 * within the boundaries of the partition. Adjust transfer 1724 * if needed, and signal errors or early completion. 1725 */ 1726int 1727bounds_check_with_label(struct buf *bp, struct disklabel *lp, int wlabel) 1728{ 1729 struct partition *p = lp->d_partitions + dkpart(bp->b_dev); 1730 int labelsect = lp->d_partitions[0].p_offset; 1731 int maxsz = p->p_size, 1732 sz = (bp->b_bcount + DEV_BSIZE - 1) >> DEV_BSHIFT; 1733 1734 /* overwriting disk label ? */ 1735 /* XXX should also protect bootstrap in first 8K */ 1736 if (bp->b_blkno + p->p_offset <= LABELSECTOR + labelsect && 1737#if LABELSECTOR != 0 1738 bp->b_blkno + p->p_offset + sz > LABELSECTOR + labelsect && 1739#endif 1740 (bp->b_flags & B_READ) == 0 && wlabel == 0) { 1741 bp->b_error = EROFS; 1742 goto bad; 1743 } 1744 1745#if defined(DOSBBSECTOR) && defined(notyet) 1746 /* overwriting master boot record? */ 1747 if (bp->b_blkno + p->p_offset <= DOSBBSECTOR && 1748 (bp->b_flags & B_READ) == 0 && wlabel == 0) { 1749 bp->b_error = EROFS; 1750 goto bad; 1751 } 1752#endif 1753 1754 /* beyond partition? */ 1755 if (bp->b_blkno < 0 || bp->b_blkno + sz > maxsz) { 1756 /* if exactly at end of disk, return an EOF */ 1757 if (bp->b_blkno == maxsz) { 1758 bp->b_resid = bp->b_bcount; 1759 return(0); 1760 } 1761 /* or truncate if part of it fits */ 1762 sz = maxsz - bp->b_blkno; 1763 if (sz <= 0) { 1764 bp->b_error = EINVAL; 1765 goto bad; 1766 } 1767 bp->b_bcount = sz << DEV_BSHIFT; 1768 } 1769 1770 /* calculate cylinder for disksort to order transfers with */ 1771 bp->b_pblkno = bp->b_blkno + p->p_offset; 1772 bp->b_cylin = bp->b_pblkno / lp->d_secpercyl; 1773 return(1); 1774 1775bad: 1776 bp->b_flags |= B_ERROR; 1777 return(-1); 1778} 1779 1780int 1781disk_externalize(int drive, struct sysctl_req *req) 1782{ 1783 return SYSCTL_OUT(req, &drive, sizeof drive); 1784} 1785