machdep.c revision 13226
1/*- 2 * Copyright (c) 1992 Terrence R. Lambert. 3 * Copyright (c) 1982, 1987, 1990 The Regents of the University of California. 4 * All rights reserved. 5 * 6 * This code is derived from software contributed to Berkeley by 7 * William Jolitz. 8 * 9 * Redistribution and use in source and binary forms, with or without 10 * modification, are permitted provided that the following conditions 11 * are met: 12 * 1. Redistributions of source code must retain the above copyright 13 * notice, this list of conditions and the following disclaimer. 14 * 2. Redistributions in binary form must reproduce the above copyright 15 * notice, this list of conditions and the following disclaimer in the 16 * documentation and/or other materials provided with the distribution. 17 * 3. All advertising materials mentioning features or use of this software 18 * must display the following acknowledgement: 19 * This product includes software developed by the University of 20 * California, Berkeley and its contributors. 21 * 4. Neither the name of the University nor the names of its contributors 22 * may be used to endorse or promote products derived from this software 23 * without specific prior written permission. 24 * 25 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 26 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 27 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 28 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 29 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 30 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 31 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 32 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 33 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 34 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 35 * SUCH DAMAGE. 36 * 37 * from: @(#)machdep.c 7.4 (Berkeley) 6/3/91 38 * $Id: machdep.c,v 1.166 1995/12/30 23:13:32 davidg Exp $ 39 */ 40 41#include "npx.h" 42#include "isa.h" 43#include "opt_sysvipc.h" 44 45#include <sys/param.h> 46#include <sys/systm.h> 47#include <sys/sysproto.h> 48#include <sys/signalvar.h> 49#include <sys/kernel.h> 50#include <sys/proc.h> 51#include <sys/buf.h> 52#include <sys/reboot.h> 53#include <sys/conf.h> 54#include <sys/file.h> 55#include <sys/callout.h> 56#include <sys/malloc.h> 57#include <sys/mbuf.h> 58#include <sys/mount.h> 59#include <sys/msgbuf.h> 60#include <sys/ioctl.h> 61#include <sys/sysent.h> 62#include <sys/tty.h> 63#include <sys/sysctl.h> 64#include <sys/devconf.h> 65#include <sys/vmmeter.h> 66 67#ifdef SYSVSHM 68#include <sys/shm.h> 69#endif 70 71#ifdef SYSVMSG 72#include <sys/msg.h> 73#endif 74 75#ifdef SYSVSEM 76#include <sys/sem.h> 77#endif 78 79#include <vm/vm.h> 80#include <vm/vm_param.h> 81#include <vm/vm_prot.h> 82#include <vm/lock.h> 83#include <vm/vm_kern.h> 84#include <vm/vm_object.h> 85#include <vm/vm_page.h> 86#include <vm/vm_map.h> 87#include <vm/vm_pager.h> 88#include <vm/vm_extern.h> 89 90#include <sys/user.h> 91#include <sys/exec.h> 92#include <sys/vnode.h> 93 94#include <ddb/ddb.h> 95 96#include <net/netisr.h> 97 98#include <machine/cpu.h> 99#include <machine/npx.h> 100#include <machine/reg.h> 101#include <machine/psl.h> 102#include <machine/clock.h> 103#include <machine/specialreg.h> 104#include <machine/sysarch.h> 105#include <machine/cons.h> 106#include <machine/devconf.h> 107#include <machine/bootinfo.h> 108#include <machine/md_var.h> 109 110#include <i386/isa/isa.h> 111#include <i386/isa/isa_device.h> 112#include <i386/isa/rtc.h> 113#include <machine/random.h> 114 115extern void init386 __P((int first)); 116extern int ptrace_set_pc __P((struct proc *p, unsigned int addr)); 117extern int ptrace_single_step __P((struct proc *p)); 118extern int ptrace_write_u __P((struct proc *p, vm_offset_t off, int data)); 119extern void dblfault_handler __P((void)); 120 121extern void i486_bzero __P((void *, size_t)); 122extern void i586_bzero __P((void *, size_t)); 123extern void i686_bzero __P((void *, size_t)); 124 125static void cpu_startup __P((void *)); 126SYSINIT(cpu, SI_SUB_CPU, SI_ORDER_FIRST, cpu_startup, NULL) 127 128static void identifycpu(void); 129 130char machine[] = "i386"; 131SYSCTL_STRING(_hw, HW_MACHINE, machine, CTLFLAG_RD, machine, 0, ""); 132 133static char cpu_model[128]; 134SYSCTL_STRING(_hw, HW_MODEL, model, CTLFLAG_RD, cpu_model, 0, ""); 135 136struct kern_devconf kdc_cpu0 = { 137 0, 0, 0, /* filled in by dev_attach */ 138 "cpu", 0, { MDDT_CPU }, 139 0, 0, 0, CPU_EXTERNALLEN, 140 0, /* CPU has no parent */ 141 0, /* no parentdata */ 142 DC_BUSY, /* the CPU is always busy */ 143 cpu_model, /* no sense in duplication */ 144 DC_CLS_CPU /* class */ 145}; 146 147#ifndef PANIC_REBOOT_WAIT_TIME 148#define PANIC_REBOOT_WAIT_TIME 15 /* default to 15 seconds */ 149#endif 150 151#ifdef BOUNCE_BUFFERS 152extern char *bouncememory; 153extern int maxbkva; 154#ifdef BOUNCEPAGES 155int bouncepages = BOUNCEPAGES; 156#else 157int bouncepages = 0; 158#endif 159#endif /* BOUNCE_BUFFERS */ 160 161extern int freebufspace; 162int msgbufmapped = 0; /* set when safe to use msgbuf */ 163int _udatasel, _ucodesel; 164 165 166int physmem = 0; 167 168static int 169sysctl_hw_physmem SYSCTL_HANDLER_ARGS 170{ 171 int error = sysctl_handle_int(oidp, 0, ctob(physmem), req); 172 return (error); 173} 174 175SYSCTL_PROC(_hw, HW_PHYSMEM, physmem, CTLTYPE_INT|CTLFLAG_RD, 176 0, 0, sysctl_hw_physmem, "I", ""); 177 178static int 179sysctl_hw_usermem SYSCTL_HANDLER_ARGS 180{ 181 int error = sysctl_handle_int(oidp, 0, 182 ctob(physmem - cnt.v_wire_count), req); 183 return (error); 184} 185 186SYSCTL_PROC(_hw, HW_USERMEM, usermem, CTLTYPE_INT|CTLFLAG_RD, 187 0, 0, sysctl_hw_usermem, "I", ""); 188 189int boothowto = 0, bootverbose = 0, Maxmem = 0; 190static int badpages = 0; 191long dumplo; 192extern int bootdev; 193 194vm_offset_t phys_avail[10]; 195 196/* must be 2 less so 0 0 can signal end of chunks */ 197#define PHYS_AVAIL_ARRAY_END ((sizeof(phys_avail) / sizeof(vm_offset_t)) - 2) 198 199int cpu_class; 200 201static void dumpsys __P((void)); 202static void setup_netisrs __P((struct linker_set *)); /* XXX declare elsewhere */ 203 204static vm_offset_t buffer_sva, buffer_eva; 205vm_offset_t clean_sva, clean_eva; 206static vm_offset_t pager_sva, pager_eva; 207extern struct linker_set netisr_set; 208 209#define offsetof(type, member) ((size_t)(&((type *)0)->member)) 210 211static void 212cpu_startup(dummy) 213 void *dummy; 214{ 215 register unsigned i; 216 register caddr_t v; 217 vm_offset_t maxaddr; 218 vm_size_t size = 0; 219 int firstaddr; 220 vm_offset_t minaddr; 221 222 if (boothowto & RB_VERBOSE) 223 bootverbose++; 224 225 /* 226 * Initialize error message buffer (at end of core). 227 */ 228 229 /* avail_end was pre-decremented in init_386() to compensate */ 230 for (i = 0; i < btoc(sizeof (struct msgbuf)); i++) 231 pmap_enter(pmap_kernel(), (vm_offset_t)msgbufp, 232 avail_end + i * NBPG, 233 VM_PROT_ALL, TRUE); 234 msgbufmapped = 1; 235 236 /* 237 * Good {morning,afternoon,evening,night}. 238 */ 239 printf(version); 240 startrtclock(); 241 identifycpu(); 242 printf("real memory = %d (%dK bytes)\n", ptoa(Maxmem), ptoa(Maxmem) / 1024); 243 /* 244 * Display any holes after the first chunk of extended memory. 245 */ 246 if (badpages != 0) { 247 int indx = 1; 248 249 /* 250 * XXX skip reporting ISA hole & unmanaged kernel memory 251 */ 252 if (phys_avail[0] == PAGE_SIZE) 253 indx += 2; 254 255 printf("Physical memory hole(s):\n"); 256 for (; phys_avail[indx + 1] != 0; indx += 2) { 257 int size = phys_avail[indx + 1] - phys_avail[indx]; 258 259 printf("0x%08lx - 0x%08lx, %d bytes (%d pages)\n", phys_avail[indx], 260 phys_avail[indx + 1] - 1, size, size / PAGE_SIZE); 261 } 262 } 263 264 /* 265 * Quickly wire in netisrs. 266 */ 267 setup_netisrs(&netisr_set); 268 269/* 270#ifdef ISDN 271 DONET(isdnintr, NETISR_ISDN); 272#endif 273*/ 274 275 /* 276 * Allocate space for system data structures. 277 * The first available kernel virtual address is in "v". 278 * As pages of kernel virtual memory are allocated, "v" is incremented. 279 * As pages of memory are allocated and cleared, 280 * "firstaddr" is incremented. 281 * An index into the kernel page table corresponding to the 282 * virtual memory address maintained in "v" is kept in "mapaddr". 283 */ 284 285 /* 286 * Make two passes. The first pass calculates how much memory is 287 * needed and allocates it. The second pass assigns virtual 288 * addresses to the various data structures. 289 */ 290 firstaddr = 0; 291again: 292 v = (caddr_t)firstaddr; 293 294#define valloc(name, type, num) \ 295 (name) = (type *)v; v = (caddr_t)((name)+(num)) 296#define valloclim(name, type, num, lim) \ 297 (name) = (type *)v; v = (caddr_t)((lim) = ((name)+(num))) 298 valloc(callout, struct callout, ncallout); 299#ifdef SYSVSHM 300 valloc(shmsegs, struct shmid_ds, shminfo.shmmni); 301#endif 302#ifdef SYSVSEM 303 valloc(sema, struct semid_ds, seminfo.semmni); 304 valloc(sem, struct sem, seminfo.semmns); 305 /* This is pretty disgusting! */ 306 valloc(semu, int, (seminfo.semmnu * seminfo.semusz) / sizeof(int)); 307#endif 308#ifdef SYSVMSG 309 valloc(msgpool, char, msginfo.msgmax); 310 valloc(msgmaps, struct msgmap, msginfo.msgseg); 311 valloc(msghdrs, struct msg, msginfo.msgtql); 312 valloc(msqids, struct msqid_ds, msginfo.msgmni); 313#endif 314 315 if (nbuf == 0) { 316 nbuf = 30; 317 if( physmem > 1024) 318 nbuf += min((physmem - 1024) / 12, 1024); 319 } 320 nswbuf = min(nbuf, 128); 321 322 valloc(swbuf, struct buf, nswbuf); 323 valloc(buf, struct buf, nbuf); 324 325#ifdef BOUNCE_BUFFERS 326 /* 327 * If there is more than 16MB of memory, allocate some bounce buffers 328 */ 329 if (Maxmem > 4096) { 330 if (bouncepages == 0) { 331 bouncepages = 64; 332 bouncepages += ((Maxmem - 4096) / 2048) * 32; 333 } 334 v = (caddr_t)((vm_offset_t)((vm_offset_t)v + PAGE_SIZE - 1) & ~(PAGE_SIZE - 1)); 335 valloc(bouncememory, char, bouncepages * PAGE_SIZE); 336 } 337#endif 338 339 /* 340 * End of first pass, size has been calculated so allocate memory 341 */ 342 if (firstaddr == 0) { 343 size = (vm_size_t)(v - firstaddr); 344 firstaddr = (int)kmem_alloc(kernel_map, round_page(size)); 345 if (firstaddr == 0) 346 panic("startup: no room for tables"); 347 goto again; 348 } 349 350 /* 351 * End of second pass, addresses have been assigned 352 */ 353 if ((vm_size_t)(v - firstaddr) != size) 354 panic("startup: table size inconsistency"); 355 356#ifdef BOUNCE_BUFFERS 357 clean_map = kmem_suballoc(kernel_map, &clean_sva, &clean_eva, 358 (nbuf*MAXBSIZE) + (nswbuf*MAXPHYS) + 359 maxbkva + pager_map_size, TRUE); 360 io_map = kmem_suballoc(clean_map, &minaddr, &maxaddr, maxbkva, FALSE); 361#else 362 clean_map = kmem_suballoc(kernel_map, &clean_sva, &clean_eva, 363 (nbuf*MAXBSIZE) + (nswbuf*MAXPHYS) + pager_map_size, TRUE); 364#endif 365 buffer_map = kmem_suballoc(clean_map, &buffer_sva, &buffer_eva, 366 (nbuf*MAXBSIZE), TRUE); 367 pager_map = kmem_suballoc(clean_map, &pager_sva, &pager_eva, 368 (nswbuf*MAXPHYS) + pager_map_size, TRUE); 369 exec_map = kmem_suballoc(kernel_map, &minaddr, &maxaddr, 370 (16*ARG_MAX), TRUE); 371 u_map = kmem_suballoc(kernel_map, &minaddr, &maxaddr, 372 (maxproc*UPAGES*PAGE_SIZE), FALSE); 373 374 /* 375 * Finally, allocate mbuf pool. Since mclrefcnt is an off-size 376 * we use the more space efficient malloc in place of kmem_alloc. 377 */ 378 mclrefcnt = (char *)malloc(nmbclusters+CLBYTES/MCLBYTES, 379 M_MBUF, M_NOWAIT); 380 bzero(mclrefcnt, nmbclusters+CLBYTES/MCLBYTES); 381 mb_map = kmem_suballoc(kmem_map, (vm_offset_t *)&mbutl, &maxaddr, 382 nmbclusters * MCLBYTES, FALSE); 383 /* 384 * Initialize callouts 385 */ 386 callfree = callout; 387 for (i = 1; i < ncallout; i++) 388 callout[i-1].c_next = &callout[i]; 389 390 if (boothowto & RB_CONFIG) { 391 userconfig(); 392 cninit(); /* the preferred console may have changed */ 393 } 394 395#ifdef BOUNCE_BUFFERS 396 /* 397 * init bounce buffers 398 */ 399 vm_bounce_init(); 400#endif 401 /* 402 * XXX allocate a contiguous area for ISA (non busmaster) DMA 403 * operations. This _should_ only be done if the DMA channels 404 * will actually be used, but for now we do it always. 405 */ 406#define DMAPAGES 8 407 isaphysmem = 408 vm_page_alloc_contig(DMAPAGES * PAGE_SIZE, 0, 0xfffffful, 64*1024); 409 410 printf("avail memory = %d (%dK bytes)\n", ptoa(cnt.v_free_count), 411 ptoa(cnt.v_free_count) / 1024); 412 413 /* 414 * Set up buffers, so they can be used to read disk labels. 415 */ 416 bufinit(); 417 vm_pager_bufferinit(); 418 419 /* 420 * In verbose mode, print out the BIOS's idea of the disk geometries. 421 */ 422 if (bootverbose) { 423 printf("BIOS Geometries:\n"); 424 for (i = 0; i < N_BIOS_GEOM; i++) { 425 unsigned long bios_geom; 426 int max_cylinder, max_head, max_sector; 427 428 bios_geom = bootinfo.bi_bios_geom[i]; 429 430 /* 431 * XXX the bootstrap punts a 1200K floppy geometry 432 * when the get-disk-geometry interrupt fails. Skip 433 * drives that have this geometry. 434 */ 435 if (bios_geom == 0x4f010f) 436 continue; 437 438 printf(" %x:%08lx ", i, bios_geom); 439 max_cylinder = bios_geom >> 16; 440 max_head = (bios_geom >> 8) & 0xff; 441 max_sector = bios_geom & 0xff; 442 printf( 443 "0..%d=%d cylinders, 0..%d=%d heads, 1..%d=%d sectors\n", 444 max_cylinder, max_cylinder + 1, 445 max_head, max_head + 1, 446 max_sector, max_sector); 447 } 448 printf(" %d accounted for\n", bootinfo.bi_n_bios_used); 449 } 450} 451 452int 453register_netisr(num, handler) 454 int num; 455 netisr_t *handler; 456{ 457 458 if (num < 0 || num >= (sizeof(netisrs)/sizeof(*netisrs)) ) { 459 printf("register_netisr: bad isr number: %d\n", num); 460 return (EINVAL); 461 } 462 netisrs[num] = handler; 463 return (0); 464} 465 466static void 467setup_netisrs(ls) 468 struct linker_set *ls; 469{ 470 int i; 471 const struct netisrtab *nit; 472 473 for(i = 0; ls->ls_items[i]; i++) { 474 nit = (const struct netisrtab *)ls->ls_items[i]; 475 register_netisr(nit->nit_num, nit->nit_isr); 476 } 477} 478 479static struct cpu_nameclass i386_cpus[] = { 480 { "Intel 80286", CPUCLASS_286 }, /* CPU_286 */ 481 { "i386SX", CPUCLASS_386 }, /* CPU_386SX */ 482 { "i386DX", CPUCLASS_386 }, /* CPU_386 */ 483 { "i486SX", CPUCLASS_486 }, /* CPU_486SX */ 484 { "i486DX", CPUCLASS_486 }, /* CPU_486 */ 485 { "Pentium", CPUCLASS_586 }, /* CPU_586 */ 486 { "Cy486DLC", CPUCLASS_486 }, /* CPU_486DLC */ 487 { "Pentium Pro", CPUCLASS_686 }, /* CPU_686 */ 488}; 489 490static void 491identifycpu() 492{ 493 printf("CPU: "); 494 if (cpu >= 0 495 && cpu < (sizeof i386_cpus/sizeof(struct cpu_nameclass))) { 496 cpu_class = i386_cpus[cpu].cpu_class; 497 strncpy(cpu_model, i386_cpus[cpu].cpu_name, sizeof cpu_model); 498 } else { 499 printf("unknown cpu type %d\n", cpu); 500 panic("startup: bad cpu id"); 501 } 502 503#if defined(I586_CPU) || defined(I686_CPU) 504 if (cpu_class == CPUCLASS_586 || cpu_class == CPUCLASS_686) { 505 calibrate_cyclecounter(); 506 } 507#endif 508#if defined(I486_CPU) || defined(I586_CPU) || defined(I686_CPU) 509 if (!strcmp(cpu_vendor,"GenuineIntel")) { 510 if ((cpu_id & 0xf00) > 3) { 511 cpu_model[0] = '\0'; 512 513 switch (cpu_id & 0x3000) { 514 case 0x1000: 515 strcpy(cpu_model, "Overdrive "); 516 break; 517 case 0x2000: 518 strcpy(cpu_model, "Dual "); 519 break; 520 } 521 522 switch (cpu_id & 0xf00) { 523 case 0x400: 524 strcat(cpu_model, "i486 "); 525 break; 526 case 0x500: 527 strcat(cpu_model, "Pentium"); /* nb no space */ 528 break; 529 case 0x600: 530 strcat(cpu_model, "Pentium Pro"); 531 break; 532 default: 533 strcat(cpu_model, "unknown"); 534 break; 535 } 536 537 switch (cpu_id & 0xff0) { 538 case 0x400: 539 strcat(cpu_model, "DX"); break; 540 case 0x410: 541 strcat(cpu_model, "DX"); break; 542 case 0x420: 543 strcat(cpu_model, "SX"); break; 544 case 0x430: 545 strcat(cpu_model, "DX2"); break; 546 case 0x440: 547 strcat(cpu_model, "SL"); break; 548 case 0x450: 549 strcat(cpu_model, "SX2"); break; 550 case 0x470: 551 strcat(cpu_model, "DX2 Write-Back Enhanced"); 552 break; 553 case 0x480: 554 strcat(cpu_model, "DX4"); break; 555 break; 556 } 557 } 558 } 559#endif 560 printf("%s (", cpu_model); 561 switch(cpu_class) { 562 case CPUCLASS_286: 563 printf("286"); 564 break; 565#if defined(I386_CPU) 566 case CPUCLASS_386: 567 printf("386"); 568 break; 569#endif 570#if defined(I486_CPU) 571 case CPUCLASS_486: 572 printf("486"); 573 bzero = i486_bzero; 574 break; 575#endif 576#if defined(I586_CPU) 577 case CPUCLASS_586: 578 printf("%d.%02d-MHz ", 579 ((100 * i586_ctr_rate) >> I586_CTR_RATE_SHIFT) / 100, 580 ((100 * i586_ctr_rate) >> I586_CTR_RATE_SHIFT) % 100); 581 printf("586"); 582 bzero = i586_bzero; 583 break; 584#endif 585#if defined(I686_CPU) 586 case CPUCLASS_686: 587 printf("%d.%02d-MHz ", 588 ((100 * i586_ctr_rate) >> I586_CTR_RATE_SHIFT) / 100, 589 ((100 * i586_ctr_rate) >> I586_CTR_RATE_SHIFT) % 100); 590 printf("686"); 591 bzero = i686_bzero; 592 break; 593#endif 594 default: 595 printf("unknown"); /* will panic below... */ 596 } 597 printf("-class CPU)\n"); 598#if defined(I486_CPU) || defined(I586_CPU) || defined(I686_CPU) 599 if(*cpu_vendor) 600 printf(" Origin = \"%s\"",cpu_vendor); 601 if(cpu_id) 602 printf(" Id = 0x%lx",cpu_id); 603 604 if (!strcmp(cpu_vendor, "GenuineIntel")) { 605 printf(" Stepping=%ld", cpu_id & 0xf); 606 if (cpu_high > 0) { 607#define FEATUREFMT "\020\001FPU\002VME\003PSE\004MCE\005CX8\006APIC" 608 printf("\n Features=0x%b", cpu_feature, FEATUREFMT); 609 } 610 } 611 /* Avoid ugly blank lines: only print newline when we have to. */ 612 if (*cpu_vendor || cpu_id) 613 printf("\n"); 614#endif 615 /* 616 * Now that we have told the user what they have, 617 * let them know if that machine type isn't configured. 618 */ 619 switch (cpu_class) { 620 case CPUCLASS_286: /* a 286 should not make it this far, anyway */ 621#if !defined(I386_CPU) && !defined(I486_CPU) && !defined(I586_CPU) && !defined(I686_CPU) 622#error This kernel is not configured for one of the supported CPUs 623#endif 624#if !defined(I386_CPU) 625 case CPUCLASS_386: 626#endif 627#if !defined(I486_CPU) 628 case CPUCLASS_486: 629#endif 630#if !defined(I586_CPU) 631 case CPUCLASS_586: 632#endif 633#if !defined(I686_CPU) 634 case CPUCLASS_686: 635#endif 636 panic("CPU class not configured"); 637 default: 638 break; 639 } 640 dev_attach(&kdc_cpu0); 641} 642 643/* 644 * Send an interrupt to process. 645 * 646 * Stack is set up to allow sigcode stored 647 * in u. to call routine, followed by kcall 648 * to sigreturn routine below. After sigreturn 649 * resets the signal mask, the stack, and the 650 * frame pointer, it returns to the user 651 * specified pc, psl. 652 */ 653void 654sendsig(catcher, sig, mask, code) 655 sig_t catcher; 656 int sig, mask; 657 unsigned code; 658{ 659 register struct proc *p = curproc; 660 register int *regs; 661 register struct sigframe *fp; 662 struct sigframe sf; 663 struct sigacts *psp = p->p_sigacts; 664 int oonstack; 665 666 regs = p->p_md.md_regs; 667 oonstack = psp->ps_sigstk.ss_flags & SA_ONSTACK; 668 /* 669 * Allocate and validate space for the signal handler 670 * context. Note that if the stack is in P0 space, the 671 * call to grow() is a nop, and the useracc() check 672 * will fail if the process has not already allocated 673 * the space with a `brk'. 674 */ 675 if ((psp->ps_flags & SAS_ALTSTACK) && 676 (psp->ps_sigstk.ss_flags & SA_ONSTACK) == 0 && 677 (psp->ps_sigonstack & sigmask(sig))) { 678 fp = (struct sigframe *)(psp->ps_sigstk.ss_sp + 679 psp->ps_sigstk.ss_size - sizeof(struct sigframe)); 680 psp->ps_sigstk.ss_flags |= SA_ONSTACK; 681 } else { 682 fp = (struct sigframe *)(regs[tESP] 683 - sizeof(struct sigframe)); 684 } 685 686 /* 687 * grow() will return FALSE if the fp will not fit inside the stack 688 * and the stack can not be grown. useracc will return FALSE 689 * if access is denied. 690 */ 691 if ((grow(p, (int)fp) == FALSE) || 692 (useracc((caddr_t)fp, sizeof (struct sigframe), B_WRITE) == FALSE)) { 693 /* 694 * Process has trashed its stack; give it an illegal 695 * instruction to halt it in its tracks. 696 */ 697 SIGACTION(p, SIGILL) = SIG_DFL; 698 sig = sigmask(SIGILL); 699 p->p_sigignore &= ~sig; 700 p->p_sigcatch &= ~sig; 701 p->p_sigmask &= ~sig; 702 psignal(p, SIGILL); 703 return; 704 } 705 706 /* 707 * Build the argument list for the signal handler. 708 */ 709 if (p->p_sysent->sv_sigtbl) { 710 if (sig < p->p_sysent->sv_sigsize) 711 sig = p->p_sysent->sv_sigtbl[sig]; 712 else 713 sig = p->p_sysent->sv_sigsize + 1; 714 } 715 sf.sf_signum = sig; 716 sf.sf_code = code; 717 sf.sf_scp = &fp->sf_sc; 718 sf.sf_addr = (char *) regs[tERR]; 719 sf.sf_handler = catcher; 720 721 /* save scratch registers */ 722 sf.sf_sc.sc_eax = regs[tEAX]; 723 sf.sf_sc.sc_ebx = regs[tEBX]; 724 sf.sf_sc.sc_ecx = regs[tECX]; 725 sf.sf_sc.sc_edx = regs[tEDX]; 726 sf.sf_sc.sc_esi = regs[tESI]; 727 sf.sf_sc.sc_edi = regs[tEDI]; 728 sf.sf_sc.sc_cs = regs[tCS]; 729 sf.sf_sc.sc_ds = regs[tDS]; 730 sf.sf_sc.sc_ss = regs[tSS]; 731 sf.sf_sc.sc_es = regs[tES]; 732 sf.sf_sc.sc_isp = regs[tISP]; 733 734 /* 735 * Build the signal context to be used by sigreturn. 736 */ 737 sf.sf_sc.sc_onstack = oonstack; 738 sf.sf_sc.sc_mask = mask; 739 sf.sf_sc.sc_sp = regs[tESP]; 740 sf.sf_sc.sc_fp = regs[tEBP]; 741 sf.sf_sc.sc_pc = regs[tEIP]; 742 sf.sf_sc.sc_ps = regs[tEFLAGS]; 743 744 /* 745 * Copy the sigframe out to the user's stack. 746 */ 747 if (copyout(&sf, fp, sizeof(struct sigframe)) != 0) { 748 /* 749 * Something is wrong with the stack pointer. 750 * ...Kill the process. 751 */ 752 sigexit(p, SIGILL); 753 }; 754 755 regs[tESP] = (int)fp; 756 regs[tEIP] = (int)((struct pcb *)kstack)->pcb_sigc; 757 regs[tEFLAGS] &= ~PSL_VM; 758 regs[tCS] = _ucodesel; 759 regs[tDS] = _udatasel; 760 regs[tES] = _udatasel; 761 regs[tSS] = _udatasel; 762} 763 764/* 765 * System call to cleanup state after a signal 766 * has been taken. Reset signal mask and 767 * stack state from context left by sendsig (above). 768 * Return to previous pc and psl as specified by 769 * context left by sendsig. Check carefully to 770 * make sure that the user has not modified the 771 * state to gain improper privileges. 772 */ 773int 774sigreturn(p, uap, retval) 775 struct proc *p; 776 struct sigreturn_args /* { 777 struct sigcontext *sigcntxp; 778 } */ *uap; 779 int *retval; 780{ 781 register struct sigcontext *scp; 782 register struct sigframe *fp; 783 register int *regs = p->p_md.md_regs; 784 int eflags; 785 786 /* 787 * (XXX old comment) regs[tESP] points to the return address. 788 * The user scp pointer is above that. 789 * The return address is faked in the signal trampoline code 790 * for consistency. 791 */ 792 scp = uap->sigcntxp; 793 fp = (struct sigframe *) 794 ((caddr_t)scp - offsetof(struct sigframe, sf_sc)); 795 796 if (useracc((caddr_t)fp, sizeof (*fp), 0) == 0) 797 return(EINVAL); 798 799 /* 800 * Don't allow users to change privileged or reserved flags. 801 */ 802#define EFLAGS_SECURE(ef, oef) ((((ef) ^ (oef)) & ~PSL_USERCHANGE) == 0) 803 eflags = scp->sc_ps; 804 /* 805 * XXX do allow users to change the privileged flag PSL_RF. The 806 * cpu sets PSL_RF in tf_eflags for faults. Debuggers should 807 * sometimes set it there too. tf_eflags is kept in the signal 808 * context during signal handling and there is no other place 809 * to remember it, so the PSL_RF bit may be corrupted by the 810 * signal handler without us knowing. Corruption of the PSL_RF 811 * bit at worst causes one more or one less debugger trap, so 812 * allowing it is fairly harmless. 813 */ 814 if (!EFLAGS_SECURE(eflags & ~PSL_RF, regs[tEFLAGS] & ~PSL_RF)) { 815#ifdef DEBUG 816 printf("sigreturn: eflags = 0x%x\n", eflags); 817#endif 818 return(EINVAL); 819 } 820 821 /* 822 * Don't allow users to load a valid privileged %cs. Let the 823 * hardware check for invalid selectors, excess privilege in 824 * other selectors, invalid %eip's and invalid %esp's. 825 */ 826#define CS_SECURE(cs) (ISPL(cs) == SEL_UPL) 827 if (!CS_SECURE(scp->sc_cs)) { 828#ifdef DEBUG 829 printf("sigreturn: cs = 0x%x\n", scp->sc_cs); 830#endif 831 trapsignal(p, SIGBUS, T_PROTFLT); 832 return(EINVAL); 833 } 834 835 /* restore scratch registers */ 836 regs[tEAX] = scp->sc_eax; 837 regs[tEBX] = scp->sc_ebx; 838 regs[tECX] = scp->sc_ecx; 839 regs[tEDX] = scp->sc_edx; 840 regs[tESI] = scp->sc_esi; 841 regs[tEDI] = scp->sc_edi; 842 regs[tCS] = scp->sc_cs; 843 regs[tDS] = scp->sc_ds; 844 regs[tES] = scp->sc_es; 845 regs[tSS] = scp->sc_ss; 846 regs[tISP] = scp->sc_isp; 847 848 if (useracc((caddr_t)scp, sizeof (*scp), 0) == 0) 849 return(EINVAL); 850 851 if (scp->sc_onstack & 01) 852 p->p_sigacts->ps_sigstk.ss_flags |= SA_ONSTACK; 853 else 854 p->p_sigacts->ps_sigstk.ss_flags &= ~SA_ONSTACK; 855 p->p_sigmask = scp->sc_mask &~ 856 (sigmask(SIGKILL)|sigmask(SIGCONT)|sigmask(SIGSTOP)); 857 regs[tEBP] = scp->sc_fp; 858 regs[tESP] = scp->sc_sp; 859 regs[tEIP] = scp->sc_pc; 860 regs[tEFLAGS] = eflags; 861 return(EJUSTRETURN); 862} 863 864static int waittime = -1; 865static struct pcb dumppcb; 866 867__dead void 868boot(howto) 869 int howto; 870{ 871 if (!cold && (howto & RB_NOSYNC) == 0 && waittime < 0) { 872 register struct buf *bp; 873 int iter, nbusy; 874 875 waittime = 0; 876 printf("\nsyncing disks... "); 877 878 sync(&proc0, NULL, NULL); 879 880 for (iter = 0; iter < 20; iter++) { 881 nbusy = 0; 882 for (bp = &buf[nbuf]; --bp >= buf; ) { 883 if ((bp->b_flags & (B_BUSY | B_INVAL)) == B_BUSY) { 884 nbusy++; 885 } 886 } 887 if (nbusy == 0) 888 break; 889 printf("%d ", nbusy); 890 DELAY(40000 * iter); 891 } 892 if (nbusy) { 893 /* 894 * Failed to sync all blocks. Indicate this and don't 895 * unmount filesystems (thus forcing an fsck on reboot). 896 */ 897 printf("giving up\n"); 898#ifdef SHOW_BUSYBUFS 899 nbusy = 0; 900 for (bp = &buf[nbuf]; --bp >= buf; ) { 901 if ((bp->b_flags & (B_BUSY | B_INVAL)) == B_BUSY) { 902 nbusy++; 903 printf("%d: dev:%08x, flags:%08x, blkno:%d, lblkno:%d\n", nbusy, bp->b_dev, bp->b_flags, bp->b_blkno, bp->b_lblkno); 904 } 905 } 906 DELAY(5000000); /* 5 seconds */ 907#endif 908 } else { 909 printf("done\n"); 910 /* 911 * Unmount filesystems 912 */ 913 if (panicstr == 0) 914 vfs_unmountall(); 915 } 916 DELAY(100000); /* wait for console output to finish */ 917 dev_shutdownall(FALSE); 918 } 919 splhigh(); 920 if (howto & RB_HALT) { 921 printf("\n"); 922 printf("The operating system has halted.\n"); 923 printf("Please press any key to reboot.\n\n"); 924 cngetc(); 925 } else { 926 if (howto & RB_DUMP) { 927 if (!cold) { 928 savectx(&dumppcb, 0); 929 dumppcb.pcb_ptd = rcr3(); 930 dumpsys(); 931 } 932 933 if (PANIC_REBOOT_WAIT_TIME != 0) { 934 if (PANIC_REBOOT_WAIT_TIME != -1) { 935 int loop; 936 printf("Automatic reboot in %d seconds - press a key on the console to abort\n", 937 PANIC_REBOOT_WAIT_TIME); 938 for (loop = PANIC_REBOOT_WAIT_TIME * 10; loop > 0; --loop) { 939 DELAY(1000 * 100); /* 1/10th second */ 940 if (cncheckc()) /* Did user type a key? */ 941 break; 942 } 943 if (!loop) 944 goto die; 945 } 946 } else { /* zero time specified - reboot NOW */ 947 goto die; 948 } 949 printf("--> Press a key on the console to reboot <--\n"); 950 cngetc(); 951 } 952 } 953die: 954 printf("Rebooting...\n"); 955 DELAY(1000000); /* wait 1 sec for printf's to complete and be read */ 956 cpu_reset(); 957 for(;;) ; 958 /* NOTREACHED */ 959} 960 961/* 962 * Magic number for savecore 963 * 964 * exported (symorder) and used at least by savecore(8) 965 * 966 */ 967u_long dumpmag = 0x8fca0101UL; 968 969static int dumpsize = 0; /* also for savecore */ 970 971static int dodump = 1; 972SYSCTL_INT(_machdep, OID_AUTO, do_dump, CTLFLAG_RW, &dodump, 0, ""); 973 974/* 975 * Doadump comes here after turning off memory management and 976 * getting on the dump stack, either when called above, or by 977 * the auto-restart code. 978 */ 979static void 980dumpsys() 981{ 982 983 if (!dodump) 984 return; 985 if (dumpdev == NODEV) 986 return; 987 if ((minor(dumpdev)&07) != 1) 988 return; 989 if (!(bdevsw[major(dumpdev)])) 990 return; 991 if (!(bdevsw[major(dumpdev)]->d_dump)) 992 return; 993 dumpsize = Maxmem; 994 printf("\ndumping to dev %lx, offset %ld\n", dumpdev, dumplo); 995 printf("dump "); 996 switch ((*bdevsw[major(dumpdev)]->d_dump)(dumpdev)) { 997 998 case ENXIO: 999 printf("device bad\n"); 1000 break; 1001 1002 case EFAULT: 1003 printf("device not ready\n"); 1004 break; 1005 1006 case EINVAL: 1007 printf("area improper\n"); 1008 break; 1009 1010 case EIO: 1011 printf("i/o error\n"); 1012 break; 1013 1014 case EINTR: 1015 printf("aborted from console\n"); 1016 break; 1017 1018 default: 1019 printf("succeeded\n"); 1020 break; 1021 } 1022} 1023 1024/* 1025 * Clear registers on exec 1026 */ 1027void 1028setregs(p, entry, stack) 1029 struct proc *p; 1030 u_long entry; 1031 u_long stack; 1032{ 1033 int *regs = p->p_md.md_regs; 1034 1035 bzero(regs, sizeof(struct trapframe)); 1036 regs[tEIP] = entry; 1037 regs[tESP] = stack; 1038 regs[tEFLAGS] = PSL_USER | (regs[tEFLAGS] & PSL_T); 1039 regs[tSS] = _udatasel; 1040 regs[tDS] = _udatasel; 1041 regs[tES] = _udatasel; 1042 regs[tCS] = _ucodesel; 1043 1044 p->p_addr->u_pcb.pcb_flags = 0; /* no fp at all */ 1045 load_cr0(rcr0() | CR0_TS); /* start emulating */ 1046#if NNPX > 0 1047 npxinit(__INITIAL_NPXCW__); 1048#endif /* NNPX > 0 */ 1049} 1050 1051static int 1052sysctl_machdep_adjkerntz SYSCTL_HANDLER_ARGS 1053{ 1054 int error; 1055 error = sysctl_handle_int(oidp, oidp->oid_arg1, oidp->oid_arg2, 1056 req); 1057 if (!error && req->newptr) 1058 resettodr(); 1059 return (error); 1060} 1061 1062SYSCTL_PROC(_machdep, CPU_ADJKERNTZ, adjkerntz, CTLTYPE_INT|CTLFLAG_RW, 1063 &adjkerntz, 0, sysctl_machdep_adjkerntz, "I", ""); 1064 1065SYSCTL_INT(_machdep, CPU_DISRTCSET, disable_rtc_set, 1066 CTLFLAG_RW, &disable_rtc_set, 0, ""); 1067 1068SYSCTL_STRUCT(_machdep, CPU_BOOTINFO, bootinfo, 1069 CTLFLAG_RD, &bootinfo, bootinfo, ""); 1070 1071/* 1072 * Initialize 386 and configure to run kernel 1073 */ 1074 1075/* 1076 * Initialize segments & interrupt table 1077 */ 1078 1079int currentldt; 1080int _default_ldt; 1081union descriptor gdt[NGDT]; /* global descriptor table */ 1082struct gate_descriptor idt[NIDT]; /* interrupt descriptor table */ 1083union descriptor ldt[NLDT]; /* local descriptor table */ 1084 1085static struct i386tss dblfault_tss; 1086static char dblfault_stack[PAGE_SIZE]; 1087 1088extern struct user *proc0paddr; 1089 1090/* software prototypes -- in more palatable form */ 1091struct soft_segment_descriptor gdt_segs[] = { 1092/* GNULL_SEL 0 Null Descriptor */ 1093{ 0x0, /* segment base address */ 1094 0x0, /* length */ 1095 0, /* segment type */ 1096 0, /* segment descriptor priority level */ 1097 0, /* segment descriptor present */ 1098 0, 0, 1099 0, /* default 32 vs 16 bit size */ 1100 0 /* limit granularity (byte/page units)*/ }, 1101/* GCODE_SEL 1 Code Descriptor for kernel */ 1102{ 0x0, /* segment base address */ 1103 0xfffff, /* length - all address space */ 1104 SDT_MEMERA, /* segment type */ 1105 0, /* segment descriptor priority level */ 1106 1, /* segment descriptor present */ 1107 0, 0, 1108 1, /* default 32 vs 16 bit size */ 1109 1 /* limit granularity (byte/page units)*/ }, 1110/* GDATA_SEL 2 Data Descriptor for kernel */ 1111{ 0x0, /* segment base address */ 1112 0xfffff, /* length - all address space */ 1113 SDT_MEMRWA, /* segment type */ 1114 0, /* segment descriptor priority level */ 1115 1, /* segment descriptor present */ 1116 0, 0, 1117 1, /* default 32 vs 16 bit size */ 1118 1 /* limit granularity (byte/page units)*/ }, 1119/* GLDT_SEL 3 LDT Descriptor */ 1120{ (int) ldt, /* segment base address */ 1121 sizeof(ldt)-1, /* length - all address space */ 1122 SDT_SYSLDT, /* segment type */ 1123 0, /* segment descriptor priority level */ 1124 1, /* segment descriptor present */ 1125 0, 0, 1126 0, /* unused - default 32 vs 16 bit size */ 1127 0 /* limit granularity (byte/page units)*/ }, 1128/* GTGATE_SEL 4 Null Descriptor - Placeholder */ 1129{ 0x0, /* segment base address */ 1130 0x0, /* length - all address space */ 1131 0, /* segment type */ 1132 0, /* segment descriptor priority level */ 1133 0, /* segment descriptor present */ 1134 0, 0, 1135 0, /* default 32 vs 16 bit size */ 1136 0 /* limit granularity (byte/page units)*/ }, 1137/* GPANIC_SEL 5 Panic Tss Descriptor */ 1138{ (int) &dblfault_tss, /* segment base address */ 1139 sizeof(struct i386tss)-1,/* length - all address space */ 1140 SDT_SYS386TSS, /* segment type */ 1141 0, /* segment descriptor priority level */ 1142 1, /* segment descriptor present */ 1143 0, 0, 1144 0, /* unused - default 32 vs 16 bit size */ 1145 0 /* limit granularity (byte/page units)*/ }, 1146/* GPROC0_SEL 6 Proc 0 Tss Descriptor */ 1147{ (int) kstack, /* segment base address */ 1148 sizeof(struct i386tss)-1,/* length - all address space */ 1149 SDT_SYS386TSS, /* segment type */ 1150 0, /* segment descriptor priority level */ 1151 1, /* segment descriptor present */ 1152 0, 0, 1153 0, /* unused - default 32 vs 16 bit size */ 1154 0 /* limit granularity (byte/page units)*/ }, 1155/* GUSERLDT_SEL 7 User LDT Descriptor per process */ 1156{ (int) ldt, /* segment base address */ 1157 (512 * sizeof(union descriptor)-1), /* length */ 1158 SDT_SYSLDT, /* segment type */ 1159 0, /* segment descriptor priority level */ 1160 1, /* segment descriptor present */ 1161 0, 0, 1162 0, /* unused - default 32 vs 16 bit size */ 1163 0 /* limit granularity (byte/page units)*/ }, 1164/* GAPMCODE32_SEL 8 APM BIOS 32-bit interface (32bit Code) */ 1165{ 0, /* segment base address (overwritten by APM) */ 1166 0xfffff, /* length */ 1167 SDT_MEMERA, /* segment type */ 1168 0, /* segment descriptor priority level */ 1169 1, /* segment descriptor present */ 1170 0, 0, 1171 1, /* default 32 vs 16 bit size */ 1172 1 /* limit granularity (byte/page units)*/ }, 1173/* GAPMCODE16_SEL 9 APM BIOS 32-bit interface (16bit Code) */ 1174{ 0, /* segment base address (overwritten by APM) */ 1175 0xfffff, /* length */ 1176 SDT_MEMERA, /* segment type */ 1177 0, /* segment descriptor priority level */ 1178 1, /* segment descriptor present */ 1179 0, 0, 1180 0, /* default 32 vs 16 bit size */ 1181 1 /* limit granularity (byte/page units)*/ }, 1182/* GAPMDATA_SEL 10 APM BIOS 32-bit interface (Data) */ 1183{ 0, /* segment base address (overwritten by APM) */ 1184 0xfffff, /* length */ 1185 SDT_MEMRWA, /* segment type */ 1186 0, /* segment descriptor priority level */ 1187 1, /* segment descriptor present */ 1188 0, 0, 1189 1, /* default 32 vs 16 bit size */ 1190 1 /* limit granularity (byte/page units)*/ }, 1191}; 1192 1193static struct soft_segment_descriptor ldt_segs[] = { 1194 /* Null Descriptor - overwritten by call gate */ 1195{ 0x0, /* segment base address */ 1196 0x0, /* length - all address space */ 1197 0, /* segment type */ 1198 0, /* segment descriptor priority level */ 1199 0, /* segment descriptor present */ 1200 0, 0, 1201 0, /* default 32 vs 16 bit size */ 1202 0 /* limit granularity (byte/page units)*/ }, 1203 /* Null Descriptor - overwritten by call gate */ 1204{ 0x0, /* segment base address */ 1205 0x0, /* length - all address space */ 1206 0, /* segment type */ 1207 0, /* segment descriptor priority level */ 1208 0, /* segment descriptor present */ 1209 0, 0, 1210 0, /* default 32 vs 16 bit size */ 1211 0 /* limit granularity (byte/page units)*/ }, 1212 /* Null Descriptor - overwritten by call gate */ 1213{ 0x0, /* segment base address */ 1214 0x0, /* length - all address space */ 1215 0, /* segment type */ 1216 0, /* segment descriptor priority level */ 1217 0, /* segment descriptor present */ 1218 0, 0, 1219 0, /* default 32 vs 16 bit size */ 1220 0 /* limit granularity (byte/page units)*/ }, 1221 /* Code Descriptor for user */ 1222{ 0x0, /* segment base address */ 1223 0xfffff, /* length - all address space */ 1224 SDT_MEMERA, /* segment type */ 1225 SEL_UPL, /* segment descriptor priority level */ 1226 1, /* segment descriptor present */ 1227 0, 0, 1228 1, /* default 32 vs 16 bit size */ 1229 1 /* limit granularity (byte/page units)*/ }, 1230 /* Data Descriptor for user */ 1231{ 0x0, /* segment base address */ 1232 0xfffff, /* length - all address space */ 1233 SDT_MEMRWA, /* segment type */ 1234 SEL_UPL, /* segment descriptor priority level */ 1235 1, /* segment descriptor present */ 1236 0, 0, 1237 1, /* default 32 vs 16 bit size */ 1238 1 /* limit granularity (byte/page units)*/ }, 1239}; 1240 1241void 1242setidt(idx, func, typ, dpl, selec) 1243 int idx; 1244 inthand_t *func; 1245 int typ; 1246 int dpl; 1247 int selec; 1248{ 1249 struct gate_descriptor *ip = idt + idx; 1250 1251 ip->gd_looffset = (int)func; 1252 ip->gd_selector = selec; 1253 ip->gd_stkcpy = 0; 1254 ip->gd_xx = 0; 1255 ip->gd_type = typ; 1256 ip->gd_dpl = dpl; 1257 ip->gd_p = 1; 1258 ip->gd_hioffset = ((int)func)>>16 ; 1259} 1260 1261#define IDTVEC(name) __CONCAT(X,name) 1262 1263extern inthand_t 1264 IDTVEC(div), IDTVEC(dbg), IDTVEC(nmi), IDTVEC(bpt), IDTVEC(ofl), 1265 IDTVEC(bnd), IDTVEC(ill), IDTVEC(dna), IDTVEC(fpusegm), 1266 IDTVEC(tss), IDTVEC(missing), IDTVEC(stk), IDTVEC(prot), 1267 IDTVEC(page), IDTVEC(rsvd), IDTVEC(fpu), IDTVEC(align), 1268 IDTVEC(syscall); 1269 1270#if defined(COMPAT_LINUX) || defined(LINUX) 1271extern inthand_t 1272 IDTVEC(linux_syscall); 1273#endif 1274 1275void 1276sdtossd(sd, ssd) 1277 struct segment_descriptor *sd; 1278 struct soft_segment_descriptor *ssd; 1279{ 1280 ssd->ssd_base = (sd->sd_hibase << 24) | sd->sd_lobase; 1281 ssd->ssd_limit = (sd->sd_hilimit << 16) | sd->sd_lolimit; 1282 ssd->ssd_type = sd->sd_type; 1283 ssd->ssd_dpl = sd->sd_dpl; 1284 ssd->ssd_p = sd->sd_p; 1285 ssd->ssd_def32 = sd->sd_def32; 1286 ssd->ssd_gran = sd->sd_gran; 1287} 1288 1289void 1290init386(first) 1291 int first; 1292{ 1293 int x; 1294 unsigned biosbasemem, biosextmem; 1295 struct gate_descriptor *gdp; 1296 int gsel_tss; 1297 /* table descriptors - used to load tables by microp */ 1298 struct region_descriptor r_gdt, r_idt; 1299 int pagesinbase, pagesinext; 1300 int target_page, pa_indx; 1301 1302 proc0.p_addr = proc0paddr; 1303 1304 /* 1305 * Initialize the console before we print anything out. 1306 */ 1307 cninit(); 1308 1309 /* 1310 * make gdt memory segments, the code segment goes up to end of the 1311 * page with etext in it, the data segment goes to the end of 1312 * the address space 1313 */ 1314 /* 1315 * XXX text protection is temporarily (?) disabled. The limit was 1316 * i386_btop(i386_round_page(etext)) - 1. 1317 */ 1318 gdt_segs[GCODE_SEL].ssd_limit = i386_btop(0) - 1; 1319 gdt_segs[GDATA_SEL].ssd_limit = i386_btop(0) - 1; 1320 for (x = 0; x < NGDT; x++) 1321 ssdtosd(&gdt_segs[x], &gdt[x].sd); 1322 1323 /* make ldt memory segments */ 1324 /* 1325 * The data segment limit must not cover the user area because we 1326 * don't want the user area to be writable in copyout() etc. (page 1327 * level protection is lost in kernel mode on 386's). Also, we 1328 * don't want the user area to be writable directly (page level 1329 * protection of the user area is not available on 486's with 1330 * CR0_WP set, because there is no user-read/kernel-write mode). 1331 * 1332 * XXX - VM_MAXUSER_ADDRESS is an end address, not a max. And it 1333 * should be spelled ...MAX_USER... 1334 */ 1335#define VM_END_USER_RW_ADDRESS VM_MAXUSER_ADDRESS 1336 /* 1337 * The code segment limit has to cover the user area until we move 1338 * the signal trampoline out of the user area. This is safe because 1339 * the code segment cannot be written to directly. 1340 */ 1341#define VM_END_USER_R_ADDRESS (VM_END_USER_RW_ADDRESS + UPAGES * NBPG) 1342 ldt_segs[LUCODE_SEL].ssd_limit = i386_btop(VM_END_USER_R_ADDRESS) - 1; 1343 ldt_segs[LUDATA_SEL].ssd_limit = i386_btop(VM_END_USER_RW_ADDRESS) - 1; 1344 /* Note. eventually want private ldts per process */ 1345 for (x = 0; x < NLDT; x++) 1346 ssdtosd(&ldt_segs[x], &ldt[x].sd); 1347 1348 /* exceptions */ 1349 for (x = 0; x < NIDT; x++) 1350 setidt(x, &IDTVEC(rsvd), SDT_SYS386TGT, SEL_KPL, GSEL(GCODE_SEL, SEL_KPL)); 1351 setidt(0, &IDTVEC(div), SDT_SYS386TGT, SEL_KPL, GSEL(GCODE_SEL, SEL_KPL)); 1352 setidt(1, &IDTVEC(dbg), SDT_SYS386TGT, SEL_KPL, GSEL(GCODE_SEL, SEL_KPL)); 1353 setidt(2, &IDTVEC(nmi), SDT_SYS386TGT, SEL_KPL, GSEL(GCODE_SEL, SEL_KPL)); 1354 setidt(3, &IDTVEC(bpt), SDT_SYS386TGT, SEL_UPL, GSEL(GCODE_SEL, SEL_KPL)); 1355 setidt(4, &IDTVEC(ofl), SDT_SYS386TGT, SEL_UPL, GSEL(GCODE_SEL, SEL_KPL)); 1356 setidt(5, &IDTVEC(bnd), SDT_SYS386TGT, SEL_KPL, GSEL(GCODE_SEL, SEL_KPL)); 1357 setidt(6, &IDTVEC(ill), SDT_SYS386TGT, SEL_KPL, GSEL(GCODE_SEL, SEL_KPL)); 1358 setidt(7, &IDTVEC(dna), SDT_SYS386TGT, SEL_KPL, GSEL(GCODE_SEL, SEL_KPL)); 1359 setidt(8, 0, SDT_SYSTASKGT, SEL_KPL, GSEL(GPANIC_SEL, SEL_KPL)); 1360 setidt(9, &IDTVEC(fpusegm), SDT_SYS386TGT, SEL_KPL, GSEL(GCODE_SEL, SEL_KPL)); 1361 setidt(10, &IDTVEC(tss), SDT_SYS386TGT, SEL_KPL, GSEL(GCODE_SEL, SEL_KPL)); 1362 setidt(11, &IDTVEC(missing), SDT_SYS386TGT, SEL_KPL, GSEL(GCODE_SEL, SEL_KPL)); 1363 setidt(12, &IDTVEC(stk), SDT_SYS386TGT, SEL_KPL, GSEL(GCODE_SEL, SEL_KPL)); 1364 setidt(13, &IDTVEC(prot), SDT_SYS386TGT, SEL_KPL, GSEL(GCODE_SEL, SEL_KPL)); 1365 setidt(14, &IDTVEC(page), SDT_SYS386TGT, SEL_KPL, GSEL(GCODE_SEL, SEL_KPL)); 1366 setidt(15, &IDTVEC(rsvd), SDT_SYS386TGT, SEL_KPL, GSEL(GCODE_SEL, SEL_KPL)); 1367 setidt(16, &IDTVEC(fpu), SDT_SYS386TGT, SEL_KPL, GSEL(GCODE_SEL, SEL_KPL)); 1368 setidt(17, &IDTVEC(align), SDT_SYS386TGT, SEL_KPL, GSEL(GCODE_SEL, SEL_KPL)); 1369#if defined(COMPAT_LINUX) || defined(LINUX) 1370 setidt(0x80, &IDTVEC(linux_syscall), SDT_SYS386TGT, SEL_UPL, GSEL(GCODE_SEL, SEL_KPL)); 1371#endif 1372 1373#include "isa.h" 1374#if NISA >0 1375 isa_defaultirq(); 1376#endif 1377 rand_initialize(); 1378 1379 r_gdt.rd_limit = sizeof(gdt) - 1; 1380 r_gdt.rd_base = (int) gdt; 1381 lgdt(&r_gdt); 1382 1383 r_idt.rd_limit = sizeof(idt) - 1; 1384 r_idt.rd_base = (int) idt; 1385 lidt(&r_idt); 1386 1387 _default_ldt = GSEL(GLDT_SEL, SEL_KPL); 1388 lldt(_default_ldt); 1389 currentldt = _default_ldt; 1390 1391#ifdef DDB 1392 kdb_init(); 1393 if (boothowto & RB_KDB) 1394 Debugger("Boot flags requested debugger"); 1395#endif 1396 1397 /* Use BIOS values stored in RTC CMOS RAM, since probing 1398 * breaks certain 386 AT relics. 1399 */ 1400 biosbasemem = rtcin(RTC_BASELO)+ (rtcin(RTC_BASEHI)<<8); 1401 biosextmem = rtcin(RTC_EXTLO)+ (rtcin(RTC_EXTHI)<<8); 1402 1403 /* 1404 * Print a warning if the official BIOS interface disagrees 1405 * with the hackish interface used above. Eventually only 1406 * the official interface should be used. 1407 */ 1408 if (bootinfo.bi_memsizes_valid) { 1409 if (bootinfo.bi_basemem != biosbasemem) 1410 printf("BIOS basemem (%ldK) != RTC basemem (%dK)\n", 1411 bootinfo.bi_basemem, biosbasemem); 1412 if (bootinfo.bi_extmem != biosextmem) 1413 printf("BIOS extmem (%ldK) != RTC extmem (%dK)\n", 1414 bootinfo.bi_extmem, biosextmem); 1415 } 1416 1417 /* 1418 * If BIOS tells us that it has more than 640k in the basemem, 1419 * don't believe it - set it to 640k. 1420 */ 1421 if (biosbasemem > 640) 1422 biosbasemem = 640; 1423 1424 /* 1425 * Some 386 machines might give us a bogus number for extended 1426 * mem. If this happens, stop now. 1427 */ 1428#ifndef LARGEMEM 1429 if (biosextmem > 65536) { 1430 panic("extended memory beyond limit of 64MB"); 1431 /* NOTREACHED */ 1432 } 1433#endif 1434 1435 pagesinbase = biosbasemem * 1024 / NBPG; 1436 pagesinext = biosextmem * 1024 / NBPG; 1437 1438 /* 1439 * Special hack for chipsets that still remap the 384k hole when 1440 * there's 16MB of memory - this really confuses people that 1441 * are trying to use bus mastering ISA controllers with the 1442 * "16MB limit"; they only have 16MB, but the remapping puts 1443 * them beyond the limit. 1444 */ 1445 /* 1446 * If extended memory is between 15-16MB (16-17MB phys address range), 1447 * chop it to 15MB. 1448 */ 1449 if ((pagesinext > 3840) && (pagesinext < 4096)) 1450 pagesinext = 3840; 1451 1452 /* 1453 * Maxmem isn't the "maximum memory", it's one larger than the 1454 * highest page of of the physical address space. It 1455 */ 1456 Maxmem = pagesinext + 0x100000/PAGE_SIZE; 1457 1458#ifdef MAXMEM 1459 Maxmem = MAXMEM/4; 1460#endif 1461 1462 /* call pmap initialization to make new kernel address space */ 1463 pmap_bootstrap (first, 0); 1464 1465 /* 1466 * Size up each available chunk of physical memory. 1467 */ 1468 1469 /* 1470 * We currently don't bother testing base memory. 1471 * XXX ...but we probably should. 1472 */ 1473 pa_indx = 0; 1474 badpages = 0; 1475 if (pagesinbase > 1) { 1476 phys_avail[pa_indx++] = PAGE_SIZE; /* skip first page of memory */ 1477 phys_avail[pa_indx] = ptoa(pagesinbase);/* memory up to the ISA hole */ 1478 physmem = pagesinbase - 1; 1479 } else { 1480 /* point at first chunk end */ 1481 pa_indx++; 1482 } 1483 1484 for (target_page = avail_start; target_page < ptoa(Maxmem); target_page += PAGE_SIZE) { 1485 int tmp, page_bad = FALSE; 1486 1487 /* 1488 * map page into kernel: valid, read/write, non-cacheable 1489 */ 1490 *(int *)CMAP1 = PG_V | PG_KW | PG_N | target_page; 1491 pmap_update(); 1492 1493 tmp = *(int *)CADDR1; 1494 /* 1495 * Test for alternating 1's and 0's 1496 */ 1497 *(volatile int *)CADDR1 = 0xaaaaaaaa; 1498 if (*(volatile int *)CADDR1 != 0xaaaaaaaa) { 1499 page_bad = TRUE; 1500 } 1501 /* 1502 * Test for alternating 0's and 1's 1503 */ 1504 *(volatile int *)CADDR1 = 0x55555555; 1505 if (*(volatile int *)CADDR1 != 0x55555555) { 1506 page_bad = TRUE; 1507 } 1508 /* 1509 * Test for all 1's 1510 */ 1511 *(volatile int *)CADDR1 = 0xffffffff; 1512 if (*(volatile int *)CADDR1 != 0xffffffff) { 1513 page_bad = TRUE; 1514 } 1515 /* 1516 * Test for all 0's 1517 */ 1518 *(volatile int *)CADDR1 = 0x0; 1519 if (*(volatile int *)CADDR1 != 0x0) { 1520 /* 1521 * test of page failed 1522 */ 1523 page_bad = TRUE; 1524 } 1525 /* 1526 * Restore original value. 1527 */ 1528 *(int *)CADDR1 = tmp; 1529 1530 /* 1531 * Adjust array of valid/good pages. 1532 */ 1533 if (page_bad == FALSE) { 1534 /* 1535 * If this good page is a continuation of the 1536 * previous set of good pages, then just increase 1537 * the end pointer. Otherwise start a new chunk. 1538 * Note that "end" points one higher than end, 1539 * making the range >= start and < end. 1540 */ 1541 if (phys_avail[pa_indx] == target_page) { 1542 phys_avail[pa_indx] += PAGE_SIZE; 1543 } else { 1544 pa_indx++; 1545 if (pa_indx == PHYS_AVAIL_ARRAY_END) { 1546 printf("Too many holes in the physical address space, giving up\n"); 1547 pa_indx--; 1548 break; 1549 } 1550 phys_avail[pa_indx++] = target_page; /* start */ 1551 phys_avail[pa_indx] = target_page + PAGE_SIZE; /* end */ 1552 } 1553 physmem++; 1554 } else { 1555 badpages++; 1556 page_bad = FALSE; 1557 } 1558 } 1559 1560 *(int *)CMAP1 = 0; 1561 pmap_update(); 1562 1563 /* 1564 * XXX 1565 * The last chunk must contain at least one page plus the message 1566 * buffer to avoid complicating other code (message buffer address 1567 * calculation, etc.). 1568 */ 1569 while (phys_avail[pa_indx - 1] + PAGE_SIZE + 1570 round_page(sizeof(struct msgbuf)) >= phys_avail[pa_indx]) { 1571 physmem -= atop(phys_avail[pa_indx] - phys_avail[pa_indx - 1]); 1572 phys_avail[pa_indx--] = 0; 1573 phys_avail[pa_indx--] = 0; 1574 } 1575 1576 Maxmem = atop(phys_avail[pa_indx]); 1577 1578 /* Trim off space for the message buffer. */ 1579 phys_avail[pa_indx] -= round_page(sizeof(struct msgbuf)); 1580 1581 avail_end = phys_avail[pa_indx]; 1582 1583 /* now running on new page tables, configured,and u/iom is accessible */ 1584 1585 /* make a initial tss so microp can get interrupt stack on syscall! */ 1586 proc0.p_addr->u_pcb.pcb_tss.tss_esp0 = (int) kstack + UPAGES*NBPG; 1587 proc0.p_addr->u_pcb.pcb_tss.tss_ss0 = GSEL(GDATA_SEL, SEL_KPL) ; 1588 gsel_tss = GSEL(GPROC0_SEL, SEL_KPL); 1589 1590 dblfault_tss.tss_esp = dblfault_tss.tss_esp0 = dblfault_tss.tss_esp1 = 1591 dblfault_tss.tss_esp2 = (int) &dblfault_stack[sizeof(dblfault_stack)]; 1592 dblfault_tss.tss_ss = dblfault_tss.tss_ss0 = dblfault_tss.tss_ss1 = 1593 dblfault_tss.tss_ss2 = GSEL(GDATA_SEL, SEL_KPL); 1594 dblfault_tss.tss_cr3 = IdlePTD; 1595 dblfault_tss.tss_eip = (int) dblfault_handler; 1596 dblfault_tss.tss_eflags = PSL_KERNEL; 1597 dblfault_tss.tss_ds = dblfault_tss.tss_es = dblfault_tss.tss_fs = dblfault_tss.tss_gs = 1598 GSEL(GDATA_SEL, SEL_KPL); 1599 dblfault_tss.tss_cs = GSEL(GCODE_SEL, SEL_KPL); 1600 dblfault_tss.tss_ldt = GSEL(GLDT_SEL, SEL_KPL); 1601 1602 ((struct i386tss *)gdt_segs[GPROC0_SEL].ssd_base)->tss_ioopt = 1603 (sizeof(struct i386tss))<<16; 1604 1605 ltr(gsel_tss); 1606 1607 /* make a call gate to reenter kernel with */ 1608 gdp = &ldt[LSYS5CALLS_SEL].gd; 1609 1610 x = (int) &IDTVEC(syscall); 1611 gdp->gd_looffset = x++; 1612 gdp->gd_selector = GSEL(GCODE_SEL,SEL_KPL); 1613 gdp->gd_stkcpy = 1; 1614 gdp->gd_type = SDT_SYS386CGT; 1615 gdp->gd_dpl = SEL_UPL; 1616 gdp->gd_p = 1; 1617 gdp->gd_hioffset = ((int) &IDTVEC(syscall)) >>16; 1618 1619 /* transfer to user mode */ 1620 1621 _ucodesel = LSEL(LUCODE_SEL, SEL_UPL); 1622 _udatasel = LSEL(LUDATA_SEL, SEL_UPL); 1623 1624 /* setup proc 0's pcb */ 1625 bcopy(&sigcode, proc0.p_addr->u_pcb.pcb_sigc, szsigcode); 1626 proc0.p_addr->u_pcb.pcb_flags = 0; 1627 proc0.p_addr->u_pcb.pcb_ptd = IdlePTD; 1628} 1629 1630/* 1631 * The registers are in the frame; the frame is in the user area of 1632 * the process in question; when the process is active, the registers 1633 * are in "the kernel stack"; when it's not, they're still there, but 1634 * things get flipped around. So, since p->p_md.md_regs is the whole address 1635 * of the register set, take its offset from the kernel stack, and 1636 * index into the user block. Don't you just *love* virtual memory? 1637 * (I'm starting to think seymour is right...) 1638 */ 1639#define TF_REGP(p) ((struct trapframe *) \ 1640 ((char *)(p)->p_addr \ 1641 + ((char *)(p)->p_md.md_regs - kstack))) 1642 1643int 1644ptrace_set_pc(p, addr) 1645 struct proc *p; 1646 unsigned int addr; 1647{ 1648 TF_REGP(p)->tf_eip = addr; 1649 return (0); 1650} 1651 1652int 1653ptrace_single_step(p) 1654 struct proc *p; 1655{ 1656 TF_REGP(p)->tf_eflags |= PSL_T; 1657 return (0); 1658} 1659 1660int ptrace_write_u(p, off, data) 1661 struct proc *p; 1662 vm_offset_t off; 1663 int data; 1664{ 1665 struct trapframe frame_copy; 1666 vm_offset_t min; 1667 struct trapframe *tp; 1668 1669 /* 1670 * Privileged kernel state is scattered all over the user area. 1671 * Only allow write access to parts of regs and to fpregs. 1672 */ 1673 min = (char *)p->p_md.md_regs - kstack; 1674 if (off >= min && off <= min + sizeof(struct trapframe) - sizeof(int)) { 1675 tp = TF_REGP(p); 1676 frame_copy = *tp; 1677 *(int *)((char *)&frame_copy + (off - min)) = data; 1678 if (!EFLAGS_SECURE(frame_copy.tf_eflags, tp->tf_eflags) || 1679 !CS_SECURE(frame_copy.tf_cs)) 1680 return (EINVAL); 1681 *(int*)((char *)p->p_addr + off) = data; 1682 return (0); 1683 } 1684 min = offsetof(struct user, u_pcb) + offsetof(struct pcb, pcb_savefpu); 1685 if (off >= min && off <= min + sizeof(struct save87) - sizeof(int)) { 1686 *(int*)((char *)p->p_addr + off) = data; 1687 return (0); 1688 } 1689 return (EFAULT); 1690} 1691 1692int 1693fill_regs(p, regs) 1694 struct proc *p; 1695 struct reg *regs; 1696{ 1697 struct trapframe *tp; 1698 1699 tp = TF_REGP(p); 1700 regs->r_es = tp->tf_es; 1701 regs->r_ds = tp->tf_ds; 1702 regs->r_edi = tp->tf_edi; 1703 regs->r_esi = tp->tf_esi; 1704 regs->r_ebp = tp->tf_ebp; 1705 regs->r_ebx = tp->tf_ebx; 1706 regs->r_edx = tp->tf_edx; 1707 regs->r_ecx = tp->tf_ecx; 1708 regs->r_eax = tp->tf_eax; 1709 regs->r_eip = tp->tf_eip; 1710 regs->r_cs = tp->tf_cs; 1711 regs->r_eflags = tp->tf_eflags; 1712 regs->r_esp = tp->tf_esp; 1713 regs->r_ss = tp->tf_ss; 1714 return (0); 1715} 1716 1717int 1718set_regs(p, regs) 1719 struct proc *p; 1720 struct reg *regs; 1721{ 1722 struct trapframe *tp; 1723 1724 tp = TF_REGP(p); 1725 if (!EFLAGS_SECURE(regs->r_eflags, tp->tf_eflags) || 1726 !CS_SECURE(regs->r_cs)) 1727 return (EINVAL); 1728 tp->tf_es = regs->r_es; 1729 tp->tf_ds = regs->r_ds; 1730 tp->tf_edi = regs->r_edi; 1731 tp->tf_esi = regs->r_esi; 1732 tp->tf_ebp = regs->r_ebp; 1733 tp->tf_ebx = regs->r_ebx; 1734 tp->tf_edx = regs->r_edx; 1735 tp->tf_ecx = regs->r_ecx; 1736 tp->tf_eax = regs->r_eax; 1737 tp->tf_eip = regs->r_eip; 1738 tp->tf_cs = regs->r_cs; 1739 tp->tf_eflags = regs->r_eflags; 1740 tp->tf_esp = regs->r_esp; 1741 tp->tf_ss = regs->r_ss; 1742 return (0); 1743} 1744 1745#ifndef DDB 1746void 1747Debugger(const char *msg) 1748{ 1749 printf("Debugger(\"%s\") called.\n", msg); 1750} 1751#endif /* no DDB */ 1752 1753#include <sys/disklabel.h> 1754#define b_cylin b_resid 1755/* 1756 * Determine the size of the transfer, and make sure it is 1757 * within the boundaries of the partition. Adjust transfer 1758 * if needed, and signal errors or early completion. 1759 */ 1760int 1761bounds_check_with_label(struct buf *bp, struct disklabel *lp, int wlabel) 1762{ 1763 struct partition *p = lp->d_partitions + dkpart(bp->b_dev); 1764 int labelsect = lp->d_partitions[0].p_offset; 1765 int maxsz = p->p_size, 1766 sz = (bp->b_bcount + DEV_BSIZE - 1) >> DEV_BSHIFT; 1767 1768 /* overwriting disk label ? */ 1769 /* XXX should also protect bootstrap in first 8K */ 1770 if (bp->b_blkno + p->p_offset <= LABELSECTOR + labelsect && 1771#if LABELSECTOR != 0 1772 bp->b_blkno + p->p_offset + sz > LABELSECTOR + labelsect && 1773#endif 1774 (bp->b_flags & B_READ) == 0 && wlabel == 0) { 1775 bp->b_error = EROFS; 1776 goto bad; 1777 } 1778 1779#if defined(DOSBBSECTOR) && defined(notyet) 1780 /* overwriting master boot record? */ 1781 if (bp->b_blkno + p->p_offset <= DOSBBSECTOR && 1782 (bp->b_flags & B_READ) == 0 && wlabel == 0) { 1783 bp->b_error = EROFS; 1784 goto bad; 1785 } 1786#endif 1787 1788 /* beyond partition? */ 1789 if (bp->b_blkno < 0 || bp->b_blkno + sz > maxsz) { 1790 /* if exactly at end of disk, return an EOF */ 1791 if (bp->b_blkno == maxsz) { 1792 bp->b_resid = bp->b_bcount; 1793 return(0); 1794 } 1795 /* or truncate if part of it fits */ 1796 sz = maxsz - bp->b_blkno; 1797 if (sz <= 0) { 1798 bp->b_error = EINVAL; 1799 goto bad; 1800 } 1801 bp->b_bcount = sz << DEV_BSHIFT; 1802 } 1803 1804 /* calculate cylinder for disksort to order transfers with */ 1805 bp->b_pblkno = bp->b_blkno + p->p_offset; 1806 bp->b_cylin = bp->b_pblkno / lp->d_secpercyl; 1807 return(1); 1808 1809bad: 1810 bp->b_flags |= B_ERROR; 1811 return(-1); 1812} 1813 1814int 1815disk_externalize(int drive, struct sysctl_req *req) 1816{ 1817 return SYSCTL_OUT(req, &drive, sizeof drive); 1818} 1819