machdep.c revision 31337
1/*- 2 * Copyright (c) 1992 Terrence R. Lambert. 3 * Copyright (c) 1982, 1987, 1990 The Regents of the University of California. 4 * All rights reserved. 5 * 6 * This code is derived from software contributed to Berkeley by 7 * William Jolitz. 8 * 9 * Redistribution and use in source and binary forms, with or without 10 * modification, are permitted provided that the following conditions 11 * are met: 12 * 1. Redistributions of source code must retain the above copyright 13 * notice, this list of conditions and the following disclaimer. 14 * 2. Redistributions in binary form must reproduce the above copyright 15 * notice, this list of conditions and the following disclaimer in the 16 * documentation and/or other materials provided with the distribution. 17 * 3. All advertising materials mentioning features or use of this software 18 * must display the following acknowledgement: 19 * This product includes software developed by the University of 20 * California, Berkeley and its contributors. 21 * 4. Neither the name of the University nor the names of its contributors 22 * may be used to endorse or promote products derived from this software 23 * without specific prior written permission. 24 * 25 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 26 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 27 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 28 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 29 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 30 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 31 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 32 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 33 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 34 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 35 * SUCH DAMAGE. 36 * 37 * from: @(#)machdep.c 7.4 (Berkeley) 6/3/91 38 * $Id: machdep.c,v 1.272 1997/11/20 19:30:29 bde Exp $ 39 */ 40 41#include "apm.h" 42#include "npx.h" 43#include "opt_sysvipc.h" 44#include "opt_ddb.h" 45#include "opt_bounce.h" 46#include "opt_maxmem.h" 47#include "opt_perfmon.h" 48#include "opt_smp.h" 49#include "opt_userconfig.h" 50#include "opt_vm86.h" 51 52#include <sys/param.h> 53#include <sys/systm.h> 54#include <sys/sysproto.h> 55#include <sys/signalvar.h> 56#include <sys/kernel.h> 57#include <sys/proc.h> 58#include <sys/buf.h> 59#include <sys/reboot.h> 60#include <sys/conf.h> 61#include <sys/callout.h> 62#include <sys/malloc.h> 63#include <sys/mbuf.h> 64#include <sys/msgbuf.h> 65#include <sys/sysent.h> 66#include <sys/sysctl.h> 67#include <sys/vmmeter.h> 68 69#ifdef SYSVSHM 70#include <sys/shm.h> 71#endif 72 73#ifdef SYSVMSG 74#include <sys/msg.h> 75#endif 76 77#ifdef SYSVSEM 78#include <sys/sem.h> 79#endif 80 81#include <vm/vm.h> 82#include <vm/vm_param.h> 83#include <vm/vm_prot.h> 84#include <sys/lock.h> 85#include <vm/vm_kern.h> 86#include <vm/vm_object.h> 87#include <vm/vm_page.h> 88#include <vm/vm_map.h> 89#include <vm/vm_pager.h> 90#include <vm/vm_extern.h> 91 92#include <sys/user.h> 93#include <sys/exec.h> 94 95#include <ddb/ddb.h> 96 97#include <net/netisr.h> 98 99#if NAPM > 0 100#include <machine/apm_bios.h> 101#endif 102#include <machine/cpu.h> 103#include <machine/reg.h> 104#include <machine/clock.h> 105#include <machine/specialreg.h> 106#include <machine/cons.h> 107#include <machine/bootinfo.h> 108#include <machine/ipl.h> 109#include <machine/md_var.h> 110#include <machine/pcb_ext.h> /* pcb.h included via sys/user.h */ 111#ifdef SMP 112#include <machine/smp.h> 113#endif 114#ifdef PERFMON 115#include <machine/perfmon.h> 116#endif 117 118#include <i386/isa/isa_device.h> 119#include <i386/isa/intr_machdep.h> 120#include <i386/isa/rtc.h> 121#include <machine/random.h> 122 123extern void init386 __P((int first)); 124extern int ptrace_set_pc __P((struct proc *p, unsigned int addr)); 125extern int ptrace_single_step __P((struct proc *p)); 126extern int ptrace_write_u __P((struct proc *p, vm_offset_t off, int data)); 127extern void dblfault_handler __P((void)); 128 129extern void printcpuinfo(void); /* XXX header file */ 130extern void earlysetcpuclass(void); /* same header file */ 131extern void finishidentcpu(void); 132extern void panicifcpuunsupported(void); 133extern void initializecpu(void); 134 135static void cpu_startup __P((void *)); 136SYSINIT(cpu, SI_SUB_CPU, SI_ORDER_FIRST, cpu_startup, NULL) 137 138static MALLOC_DEFINE(M_MBUF, "mbuf", "mbuf"); 139 140#ifdef BOUNCE_BUFFERS 141#ifdef BOUNCEPAGES 142int bouncepages = BOUNCEPAGES; 143#else 144int bouncepages = 0; 145#endif 146#endif /* BOUNCE_BUFFERS */ 147 148int msgbufmapped = 0; /* set when safe to use msgbuf */ 149int _udatasel, _ucodesel; 150u_int atdevbase; 151 152 153int physmem = 0; 154int cold = 1; 155 156static int 157sysctl_hw_physmem SYSCTL_HANDLER_ARGS 158{ 159 int error = sysctl_handle_int(oidp, 0, ctob(physmem), req); 160 return (error); 161} 162 163SYSCTL_PROC(_hw, HW_PHYSMEM, physmem, CTLTYPE_INT|CTLFLAG_RD, 164 0, 0, sysctl_hw_physmem, "I", ""); 165 166static int 167sysctl_hw_usermem SYSCTL_HANDLER_ARGS 168{ 169 int error = sysctl_handle_int(oidp, 0, 170 ctob(physmem - cnt.v_wire_count), req); 171 return (error); 172} 173 174SYSCTL_PROC(_hw, HW_USERMEM, usermem, CTLTYPE_INT|CTLFLAG_RD, 175 0, 0, sysctl_hw_usermem, "I", ""); 176 177int boothowto = 0, bootverbose = 0, Maxmem = 0; 178long dumplo; 179 180vm_offset_t phys_avail[10]; 181 182/* must be 2 less so 0 0 can signal end of chunks */ 183#define PHYS_AVAIL_ARRAY_END ((sizeof(phys_avail) / sizeof(vm_offset_t)) - 2) 184 185static void setup_netisrs __P((struct linker_set *)); /* XXX declare elsewhere */ 186 187static vm_offset_t buffer_sva, buffer_eva; 188vm_offset_t clean_sva, clean_eva; 189static vm_offset_t pager_sva, pager_eva; 190extern struct linker_set netisr_set; 191 192#define offsetof(type, member) ((size_t)(&((type *)0)->member)) 193 194static void 195cpu_startup(dummy) 196 void *dummy; 197{ 198 register unsigned i; 199 register caddr_t v; 200 vm_offset_t maxaddr; 201 vm_size_t size = 0; 202 int firstaddr; 203 vm_offset_t minaddr; 204 205 if (boothowto & RB_VERBOSE) 206 bootverbose++; 207 208 /* 209 * Good {morning,afternoon,evening,night}. 210 */ 211 printf(version); 212 earlysetcpuclass(); 213 startrtclock(); 214 printcpuinfo(); 215 panicifcpuunsupported(); 216#ifdef PERFMON 217 perfmon_init(); 218#endif 219 printf("real memory = %d (%dK bytes)\n", ptoa(Maxmem), ptoa(Maxmem) / 1024); 220 /* 221 * Display any holes after the first chunk of extended memory. 222 */ 223 if (bootverbose) { 224 int indx; 225 226 printf("Physical memory chunk(s):\n"); 227 for (indx = 0; phys_avail[indx + 1] != 0; indx += 2) { 228 int size1 = phys_avail[indx + 1] - phys_avail[indx]; 229 230 printf("0x%08lx - 0x%08lx, %d bytes (%d pages)\n", phys_avail[indx], 231 phys_avail[indx + 1] - 1, size1, size1 / PAGE_SIZE); 232 } 233 } 234 235 /* 236 * Quickly wire in netisrs. 237 */ 238 setup_netisrs(&netisr_set); 239 240 /* 241 * Calculate callout wheel size 242 */ 243 for (callwheelsize = 1, callwheelbits = 0; 244 callwheelsize < ncallout; 245 callwheelsize <<= 1, ++callwheelbits) 246 ; 247 callwheelmask = callwheelsize - 1; 248 249 /* 250 * Allocate space for system data structures. 251 * The first available kernel virtual address is in "v". 252 * As pages of kernel virtual memory are allocated, "v" is incremented. 253 * As pages of memory are allocated and cleared, 254 * "firstaddr" is incremented. 255 * An index into the kernel page table corresponding to the 256 * virtual memory address maintained in "v" is kept in "mapaddr". 257 */ 258 259 /* 260 * Make two passes. The first pass calculates how much memory is 261 * needed and allocates it. The second pass assigns virtual 262 * addresses to the various data structures. 263 */ 264 firstaddr = 0; 265again: 266 v = (caddr_t)firstaddr; 267 268#define valloc(name, type, num) \ 269 (name) = (type *)v; v = (caddr_t)((name)+(num)) 270#define valloclim(name, type, num, lim) \ 271 (name) = (type *)v; v = (caddr_t)((lim) = ((name)+(num))) 272 valloc(callout, struct callout, ncallout); 273 valloc(callwheel, struct callout_tailq, callwheelsize); 274#ifdef SYSVSHM 275 valloc(shmsegs, struct shmid_ds, shminfo.shmmni); 276#endif 277#ifdef SYSVSEM 278 valloc(sema, struct semid_ds, seminfo.semmni); 279 valloc(sem, struct sem, seminfo.semmns); 280 /* This is pretty disgusting! */ 281 valloc(semu, int, (seminfo.semmnu * seminfo.semusz) / sizeof(int)); 282#endif 283#ifdef SYSVMSG 284 valloc(msgpool, char, msginfo.msgmax); 285 valloc(msgmaps, struct msgmap, msginfo.msgseg); 286 valloc(msghdrs, struct msg, msginfo.msgtql); 287 valloc(msqids, struct msqid_ds, msginfo.msgmni); 288#endif 289 290 if (nbuf == 0) { 291 nbuf = 30; 292 if( physmem > 1024) 293 nbuf += min((physmem - 1024) / 8, 2048); 294 } 295 nswbuf = max(min(nbuf/4, 128), 16); 296 297 valloc(swbuf, struct buf, nswbuf); 298 valloc(buf, struct buf, nbuf); 299 300#ifdef BOUNCE_BUFFERS 301 /* 302 * If there is more than 16MB of memory, allocate some bounce buffers 303 */ 304 if (Maxmem > 4096) { 305 if (bouncepages == 0) { 306 bouncepages = 64; 307 bouncepages += ((Maxmem - 4096) / 2048) * 32; 308 if (bouncepages > 128) 309 bouncepages = 128; 310 } 311 v = (caddr_t)((vm_offset_t)round_page(v)); 312 valloc(bouncememory, char, bouncepages * PAGE_SIZE); 313 } 314#endif 315 316 /* 317 * End of first pass, size has been calculated so allocate memory 318 */ 319 if (firstaddr == 0) { 320 size = (vm_size_t)(v - firstaddr); 321 firstaddr = (int)kmem_alloc(kernel_map, round_page(size)); 322 if (firstaddr == 0) 323 panic("startup: no room for tables"); 324 goto again; 325 } 326 327 /* 328 * End of second pass, addresses have been assigned 329 */ 330 if ((vm_size_t)(v - firstaddr) != size) 331 panic("startup: table size inconsistency"); 332 333#ifdef BOUNCE_BUFFERS 334 clean_map = kmem_suballoc(kernel_map, &clean_sva, &clean_eva, 335 (nbuf*BKVASIZE) + (nswbuf*MAXPHYS) + 336 maxbkva + pager_map_size, TRUE); 337 io_map = kmem_suballoc(clean_map, &minaddr, &maxaddr, maxbkva, FALSE); 338#else 339 clean_map = kmem_suballoc(kernel_map, &clean_sva, &clean_eva, 340 (nbuf*BKVASIZE) + (nswbuf*MAXPHYS) + pager_map_size, TRUE); 341#endif 342 buffer_map = kmem_suballoc(clean_map, &buffer_sva, &buffer_eva, 343 (nbuf*BKVASIZE), TRUE); 344 pager_map = kmem_suballoc(clean_map, &pager_sva, &pager_eva, 345 (nswbuf*MAXPHYS) + pager_map_size, TRUE); 346 pager_map->system_map = 1; 347 exec_map = kmem_suballoc(kernel_map, &minaddr, &maxaddr, 348 (16*ARG_MAX), TRUE); 349 u_map = kmem_suballoc(kernel_map, &minaddr, &maxaddr, 350 (maxproc*UPAGES*PAGE_SIZE), FALSE); 351 352 /* 353 * Finally, allocate mbuf pool. Since mclrefcnt is an off-size 354 * we use the more space efficient malloc in place of kmem_alloc. 355 */ 356 { 357 vm_offset_t mb_map_size; 358 359 mb_map_size = nmbufs * MSIZE + nmbclusters * MCLBYTES; 360 mb_map_size = roundup2(mb_map_size, max(MCLBYTES, PAGE_SIZE)); 361 mclrefcnt = malloc(mb_map_size / MCLBYTES, M_MBUF, M_NOWAIT); 362 bzero(mclrefcnt, mb_map_size / MCLBYTES); 363 mb_map = kmem_suballoc(kmem_map, (vm_offset_t *)&mbutl, &maxaddr, 364 mb_map_size, FALSE); 365 mb_map->system_map = 1; 366 } 367 368 /* 369 * Initialize callouts 370 */ 371 SLIST_INIT(&callfree); 372 for (i = 0; i < ncallout; i++) { 373 SLIST_INSERT_HEAD(&callfree, &callout[i], c_links.sle); 374 } 375 376 for (i = 0; i < callwheelsize; i++) { 377 TAILQ_INIT(&callwheel[i]); 378 } 379 380#if defined(USERCONFIG) 381#if defined(USERCONFIG_BOOT) 382 if (1) { 383#else 384 if (boothowto & RB_CONFIG) { 385#endif 386 userconfig(); 387 cninit(); /* the preferred console may have changed */ 388 } 389#endif 390 391#ifdef BOUNCE_BUFFERS 392 /* 393 * init bounce buffers 394 */ 395 vm_bounce_init(); 396#endif 397 398 printf("avail memory = %d (%dK bytes)\n", ptoa(cnt.v_free_count), 399 ptoa(cnt.v_free_count) / 1024); 400 401 /* 402 * Set up buffers, so they can be used to read disk labels. 403 */ 404 bufinit(); 405 vm_pager_bufferinit(); 406 407#ifdef SMP 408 /* 409 * OK, enough kmem_alloc/malloc state should be up, lets get on with it! 410 */ 411 mp_start(); /* fire up the APs and APICs */ 412 mp_announce(); 413#endif /* SMP */ 414} 415 416int 417register_netisr(num, handler) 418 int num; 419 netisr_t *handler; 420{ 421 422 if (num < 0 || num >= (sizeof(netisrs)/sizeof(*netisrs)) ) { 423 printf("register_netisr: bad isr number: %d\n", num); 424 return (EINVAL); 425 } 426 netisrs[num] = handler; 427 return (0); 428} 429 430static void 431setup_netisrs(ls) 432 struct linker_set *ls; 433{ 434 int i; 435 const struct netisrtab *nit; 436 437 for(i = 0; ls->ls_items[i]; i++) { 438 nit = (const struct netisrtab *)ls->ls_items[i]; 439 register_netisr(nit->nit_num, nit->nit_isr); 440 } 441} 442 443/* 444 * Send an interrupt to process. 445 * 446 * Stack is set up to allow sigcode stored 447 * at top to call routine, followed by kcall 448 * to sigreturn routine below. After sigreturn 449 * resets the signal mask, the stack, and the 450 * frame pointer, it returns to the user 451 * specified pc, psl. 452 */ 453void 454sendsig(catcher, sig, mask, code) 455 sig_t catcher; 456 int sig, mask; 457 u_long code; 458{ 459 register struct proc *p = curproc; 460 register struct trapframe *regs; 461 register struct sigframe *fp; 462 struct sigframe sf; 463 struct sigacts *psp = p->p_sigacts; 464 int oonstack; 465 466 regs = p->p_md.md_regs; 467 oonstack = psp->ps_sigstk.ss_flags & SS_ONSTACK; 468 /* 469 * Allocate and validate space for the signal handler context. 470 */ 471 if ((psp->ps_flags & SAS_ALTSTACK) && !oonstack && 472 (psp->ps_sigonstack & sigmask(sig))) { 473 fp = (struct sigframe *)(psp->ps_sigstk.ss_sp + 474 psp->ps_sigstk.ss_size - sizeof(struct sigframe)); 475 psp->ps_sigstk.ss_flags |= SS_ONSTACK; 476 } else { 477 fp = (struct sigframe *)regs->tf_esp - 1; 478 } 479 480 /* 481 * grow() will return FALSE if the fp will not fit inside the stack 482 * and the stack can not be grown. useracc will return FALSE 483 * if access is denied. 484 */ 485 if ((grow(p, (int)fp) == FALSE) || 486 (useracc((caddr_t)fp, sizeof(struct sigframe), B_WRITE) == FALSE)) { 487 /* 488 * Process has trashed its stack; give it an illegal 489 * instruction to halt it in its tracks. 490 */ 491 SIGACTION(p, SIGILL) = SIG_DFL; 492 sig = sigmask(SIGILL); 493 p->p_sigignore &= ~sig; 494 p->p_sigcatch &= ~sig; 495 p->p_sigmask &= ~sig; 496 psignal(p, SIGILL); 497 return; 498 } 499 500 /* 501 * Build the argument list for the signal handler. 502 */ 503 if (p->p_sysent->sv_sigtbl) { 504 if (sig < p->p_sysent->sv_sigsize) 505 sig = p->p_sysent->sv_sigtbl[sig]; 506 else 507 sig = p->p_sysent->sv_sigsize + 1; 508 } 509 sf.sf_signum = sig; 510 sf.sf_code = code; 511 sf.sf_scp = &fp->sf_sc; 512 sf.sf_addr = (char *) regs->tf_err; 513 sf.sf_handler = catcher; 514 515 /* save scratch registers */ 516 sf.sf_sc.sc_eax = regs->tf_eax; 517 sf.sf_sc.sc_ebx = regs->tf_ebx; 518 sf.sf_sc.sc_ecx = regs->tf_ecx; 519 sf.sf_sc.sc_edx = regs->tf_edx; 520 sf.sf_sc.sc_esi = regs->tf_esi; 521 sf.sf_sc.sc_edi = regs->tf_edi; 522 sf.sf_sc.sc_cs = regs->tf_cs; 523 sf.sf_sc.sc_ds = regs->tf_ds; 524 sf.sf_sc.sc_ss = regs->tf_ss; 525 sf.sf_sc.sc_es = regs->tf_es; 526 sf.sf_sc.sc_isp = regs->tf_isp; 527 528 /* 529 * Build the signal context to be used by sigreturn. 530 */ 531 sf.sf_sc.sc_onstack = oonstack; 532 sf.sf_sc.sc_mask = mask; 533 sf.sf_sc.sc_sp = regs->tf_esp; 534 sf.sf_sc.sc_fp = regs->tf_ebp; 535 sf.sf_sc.sc_pc = regs->tf_eip; 536 sf.sf_sc.sc_ps = regs->tf_eflags; 537 sf.sf_sc.sc_trapno = regs->tf_trapno; 538 sf.sf_sc.sc_err = regs->tf_err; 539 540 /* 541 * If we're a vm86 process, we want to save the segment registers. 542 * We also change eflags to be our emulated eflags, not the actual 543 * eflags. 544 */ 545 if (regs->tf_eflags & PSL_VM) { 546 struct trapframe_vm86 *tf = (struct trapframe_vm86 *)regs; 547 struct vm86_kernel *vm86 = &p->p_addr->u_pcb.pcb_ext->ext_vm86; 548 549 sf.sf_sc.sc_gs = tf->tf_vm86_gs; 550 sf.sf_sc.sc_fs = tf->tf_vm86_fs; 551 sf.sf_sc.sc_es = tf->tf_vm86_es; 552 sf.sf_sc.sc_ds = tf->tf_vm86_ds; 553 554 if (vm86->vm86_has_vme == 0) 555 sf.sf_sc.sc_ps = (tf->tf_eflags & ~(PSL_VIF | PSL_VIP)) 556 | (vm86->vm86_eflags & (PSL_VIF | PSL_VIP)); 557 558 /* 559 * We should never have PSL_T set when returning from vm86 560 * mode. It may be set here if we deliver a signal before 561 * getting to vm86 mode, so turn it off. 562 */ 563 tf->tf_eflags &= ~(PSL_VM | PSL_T | PSL_VIF | PSL_VIP); 564 } 565 566 /* 567 * Copy the sigframe out to the user's stack. 568 */ 569 if (copyout(&sf, fp, sizeof(struct sigframe)) != 0) { 570 /* 571 * Something is wrong with the stack pointer. 572 * ...Kill the process. 573 */ 574 sigexit(p, SIGILL); 575 } 576 577 regs->tf_esp = (int)fp; 578 regs->tf_eip = (int)(((char *)PS_STRINGS) - *(p->p_sysent->sv_szsigcode)); 579 regs->tf_cs = _ucodesel; 580 regs->tf_ds = _udatasel; 581 regs->tf_es = _udatasel; 582 regs->tf_ss = _udatasel; 583} 584 585/* 586 * System call to cleanup state after a signal 587 * has been taken. Reset signal mask and 588 * stack state from context left by sendsig (above). 589 * Return to previous pc and psl as specified by 590 * context left by sendsig. Check carefully to 591 * make sure that the user has not modified the 592 * state to gain improper privileges. 593 */ 594int 595sigreturn(p, uap) 596 struct proc *p; 597 struct sigreturn_args /* { 598 struct sigcontext *sigcntxp; 599 } */ *uap; 600{ 601 register struct sigcontext *scp; 602 register struct sigframe *fp; 603 register struct trapframe *regs = p->p_md.md_regs; 604 int eflags; 605 606 /* 607 * (XXX old comment) regs->tf_esp points to the return address. 608 * The user scp pointer is above that. 609 * The return address is faked in the signal trampoline code 610 * for consistency. 611 */ 612 scp = uap->sigcntxp; 613 fp = (struct sigframe *) 614 ((caddr_t)scp - offsetof(struct sigframe, sf_sc)); 615 616 if (useracc((caddr_t)fp, sizeof (*fp), B_WRITE) == 0) 617 return(EFAULT); 618 619 eflags = scp->sc_ps; 620 if (eflags & PSL_VM) { 621 struct trapframe_vm86 *tf = (struct trapframe_vm86 *)regs; 622 struct vm86_kernel *vm86; 623 624 /* 625 * if pcb_ext == 0 or vm86_inited == 0, the user hasn't 626 * set up the vm86 area, and we can't enter vm86 mode. 627 */ 628 if (p->p_addr->u_pcb.pcb_ext == 0) 629 return (EINVAL); 630 vm86 = &p->p_addr->u_pcb.pcb_ext->ext_vm86; 631 if (vm86->vm86_inited == 0) 632 return (EINVAL); 633 634 /* go back to user mode if both flags are set */ 635 if ((eflags & PSL_VIP) && (eflags & PSL_VIF)) 636 trapsignal(p, SIGBUS, 0); 637 638#define VM_USERCHANGE (PSL_USERCHANGE | PSL_RF) 639#define VME_USERCHANGE (VM_USERCHANGE | PSL_VIP | PSL_VIF) 640 if (vm86->vm86_has_vme) { 641 eflags = (tf->tf_eflags & ~VME_USERCHANGE) | 642 (eflags & VME_USERCHANGE) | PSL_VM; 643 } else { 644 vm86->vm86_eflags = eflags; /* save VIF, VIP */ 645 eflags = (tf->tf_eflags & ~VM_USERCHANGE) | (eflags & VM_USERCHANGE) | PSL_VM; 646 } 647 tf->tf_vm86_ds = scp->sc_ds; 648 tf->tf_vm86_es = scp->sc_es; 649 tf->tf_vm86_fs = scp->sc_fs; 650 tf->tf_vm86_gs = scp->sc_gs; 651 tf->tf_ds = _udatasel; 652 tf->tf_es = _udatasel; 653 } else { 654 /* 655 * Don't allow users to change privileged or reserved flags. 656 */ 657#define EFLAGS_SECURE(ef, oef) ((((ef) ^ (oef)) & ~PSL_USERCHANGE) == 0) 658 /* 659 * XXX do allow users to change the privileged flag PSL_RF. 660 * The cpu sets PSL_RF in tf_eflags for faults. Debuggers 661 * should sometimes set it there too. tf_eflags is kept in 662 * the signal context during signal handling and there is no 663 * other place to remember it, so the PSL_RF bit may be 664 * corrupted by the signal handler without us knowing. 665 * Corruption of the PSL_RF bit at worst causes one more or 666 * one less debugger trap, so allowing it is fairly harmless. 667 */ 668 if (!EFLAGS_SECURE(eflags & ~PSL_RF, regs->tf_eflags & ~PSL_RF)) { 669#ifdef DEBUG 670 printf("sigreturn: eflags = 0x%x\n", eflags); 671#endif 672 return(EINVAL); 673 } 674 675 /* 676 * Don't allow users to load a valid privileged %cs. Let the 677 * hardware check for invalid selectors, excess privilege in 678 * other selectors, invalid %eip's and invalid %esp's. 679 */ 680#define CS_SECURE(cs) (ISPL(cs) == SEL_UPL) 681 if (!CS_SECURE(scp->sc_cs)) { 682#ifdef DEBUG 683 printf("sigreturn: cs = 0x%x\n", scp->sc_cs); 684#endif 685 trapsignal(p, SIGBUS, T_PROTFLT); 686 return(EINVAL); 687 } 688 regs->tf_ds = scp->sc_ds; 689 regs->tf_es = scp->sc_es; 690 } 691 /* restore scratch registers */ 692 regs->tf_eax = scp->sc_eax; 693 regs->tf_ebx = scp->sc_ebx; 694 regs->tf_ecx = scp->sc_ecx; 695 regs->tf_edx = scp->sc_edx; 696 regs->tf_esi = scp->sc_esi; 697 regs->tf_edi = scp->sc_edi; 698 regs->tf_cs = scp->sc_cs; 699 regs->tf_ss = scp->sc_ss; 700 regs->tf_isp = scp->sc_isp; 701 702 if (useracc((caddr_t)scp, sizeof (*scp), B_WRITE) == 0) 703 return(EINVAL); 704 705 if (scp->sc_onstack & 01) 706 p->p_sigacts->ps_sigstk.ss_flags |= SS_ONSTACK; 707 else 708 p->p_sigacts->ps_sigstk.ss_flags &= ~SS_ONSTACK; 709 p->p_sigmask = scp->sc_mask & ~sigcantmask; 710 regs->tf_ebp = scp->sc_fp; 711 regs->tf_esp = scp->sc_sp; 712 regs->tf_eip = scp->sc_pc; 713 regs->tf_eflags = eflags; 714 return(EJUSTRETURN); 715} 716 717/* 718 * Machine dependent boot() routine 719 * 720 * I haven't seen anything to put here yet 721 * Possibly some stuff might be grafted back here from boot() 722 */ 723void 724cpu_boot(int howto) 725{ 726} 727 728/* 729 * Shutdown the CPU as much as possible 730 */ 731void 732cpu_halt(void) 733{ 734 for (;;) 735 __asm__ ("hlt"); 736} 737 738/* 739 * Turn the power off. 740 */ 741void 742cpu_power_down(void) 743{ 744#if NAPM > 0 745 apm_power_off(); 746#endif 747} 748 749/* 750 * Clear registers on exec 751 */ 752void 753setregs(p, entry, stack) 754 struct proc *p; 755 u_long entry; 756 u_long stack; 757{ 758 struct trapframe *regs = p->p_md.md_regs; 759 760#ifdef USER_LDT 761 struct pcb *pcb = &p->p_addr->u_pcb; 762 763 /* was i386_user_cleanup() in NetBSD */ 764 if (pcb->pcb_ldt) { 765 if (pcb == curpcb) 766 lldt(GSEL(GUSERLDT_SEL, SEL_KPL)); 767 kmem_free(kernel_map, (vm_offset_t)pcb->pcb_ldt, 768 pcb->pcb_ldt_len * sizeof(union descriptor)); 769 pcb->pcb_ldt_len = (int)pcb->pcb_ldt = 0; 770 } 771#endif 772 773 bzero((char *)regs, sizeof(struct trapframe)); 774 regs->tf_eip = entry; 775 regs->tf_esp = stack; 776 regs->tf_eflags = PSL_USER | (regs->tf_eflags & PSL_T); 777 regs->tf_ss = _udatasel; 778 regs->tf_ds = _udatasel; 779 regs->tf_es = _udatasel; 780 regs->tf_cs = _ucodesel; 781 782 /* 783 * Initialize the math emulator (if any) for the current process. 784 * Actually, just clear the bit that says that the emulator has 785 * been initialized. Initialization is delayed until the process 786 * traps to the emulator (if it is done at all) mainly because 787 * emulators don't provide an entry point for initialization. 788 */ 789 p->p_addr->u_pcb.pcb_flags &= ~FP_SOFTFP; 790 791 /* 792 * Arrange to trap the next npx or `fwait' instruction (see npx.c 793 * for why fwait must be trapped at least if there is an npx or an 794 * emulator). This is mainly to handle the case where npx0 is not 795 * configured, since the npx routines normally set up the trap 796 * otherwise. It should be done only at boot time, but doing it 797 * here allows modifying `npx_exists' for testing the emulator on 798 * systems with an npx. 799 */ 800 load_cr0(rcr0() | CR0_MP | CR0_TS); 801 802#if NNPX > 0 803 /* Initialize the npx (if any) for the current process. */ 804 npxinit(__INITIAL_NPXCW__); 805#endif 806} 807 808static int 809sysctl_machdep_adjkerntz SYSCTL_HANDLER_ARGS 810{ 811 int error; 812 error = sysctl_handle_int(oidp, oidp->oid_arg1, oidp->oid_arg2, 813 req); 814 if (!error && req->newptr) 815 resettodr(); 816 return (error); 817} 818 819SYSCTL_PROC(_machdep, CPU_ADJKERNTZ, adjkerntz, CTLTYPE_INT|CTLFLAG_RW, 820 &adjkerntz, 0, sysctl_machdep_adjkerntz, "I", ""); 821 822SYSCTL_INT(_machdep, CPU_DISRTCSET, disable_rtc_set, 823 CTLFLAG_RW, &disable_rtc_set, 0, ""); 824 825SYSCTL_STRUCT(_machdep, CPU_BOOTINFO, bootinfo, 826 CTLFLAG_RD, &bootinfo, bootinfo, ""); 827 828SYSCTL_INT(_machdep, CPU_WALLCLOCK, wall_cmos_clock, 829 CTLFLAG_RW, &wall_cmos_clock, 0, ""); 830 831/* 832 * Initialize 386 and configure to run kernel 833 */ 834 835/* 836 * Initialize segments & interrupt table 837 */ 838 839int currentldt; 840int _default_ldt; 841#ifdef SMP 842union descriptor gdt[NGDT + NCPU]; /* global descriptor table */ 843#else 844union descriptor gdt[NGDT]; /* global descriptor table */ 845#endif 846struct gate_descriptor idt[NIDT]; /* interrupt descriptor table */ 847union descriptor ldt[NLDT]; /* local descriptor table */ 848#ifdef SMP 849/* table descriptors - used to load tables by microp */ 850struct region_descriptor r_gdt, r_idt; 851#endif 852 853#ifdef SMP 854extern struct i386tss common_tss; /* One tss per cpu */ 855#ifdef VM86 856extern struct segment_descriptor common_tssd; 857extern int private_tss; 858extern u_int my_tr; 859#endif /* VM86 */ 860#else 861struct i386tss common_tss; 862#ifdef VM86 863struct segment_descriptor common_tssd; 864u_int private_tss; /* flag indicating private tss */ 865u_int my_tr; /* which task register setting */ 866#endif /* VM86 */ 867#endif 868 869static struct i386tss dblfault_tss; 870static char dblfault_stack[PAGE_SIZE]; 871 872extern struct user *proc0paddr; 873 874 875/* software prototypes -- in more palatable form */ 876struct soft_segment_descriptor gdt_segs[ 877#ifdef SMP 878 NGDT + NCPU 879#endif 880 ] = { 881/* GNULL_SEL 0 Null Descriptor */ 882{ 0x0, /* segment base address */ 883 0x0, /* length */ 884 0, /* segment type */ 885 0, /* segment descriptor priority level */ 886 0, /* segment descriptor present */ 887 0, 0, 888 0, /* default 32 vs 16 bit size */ 889 0 /* limit granularity (byte/page units)*/ }, 890/* GCODE_SEL 1 Code Descriptor for kernel */ 891{ 0x0, /* segment base address */ 892 0xfffff, /* length - all address space */ 893 SDT_MEMERA, /* segment type */ 894 0, /* segment descriptor priority level */ 895 1, /* segment descriptor present */ 896 0, 0, 897 1, /* default 32 vs 16 bit size */ 898 1 /* limit granularity (byte/page units)*/ }, 899/* GDATA_SEL 2 Data Descriptor for kernel */ 900{ 0x0, /* segment base address */ 901 0xfffff, /* length - all address space */ 902 SDT_MEMRWA, /* segment type */ 903 0, /* segment descriptor priority level */ 904 1, /* segment descriptor present */ 905 0, 0, 906 1, /* default 32 vs 16 bit size */ 907 1 /* limit granularity (byte/page units)*/ }, 908/* GLDT_SEL 3 LDT Descriptor */ 909{ (int) ldt, /* segment base address */ 910 sizeof(ldt)-1, /* length - all address space */ 911 SDT_SYSLDT, /* segment type */ 912 SEL_UPL, /* segment descriptor priority level */ 913 1, /* segment descriptor present */ 914 0, 0, 915 0, /* unused - default 32 vs 16 bit size */ 916 0 /* limit granularity (byte/page units)*/ }, 917/* GTGATE_SEL 4 Null Descriptor - Placeholder */ 918{ 0x0, /* segment base address */ 919 0x0, /* length - all address space */ 920 0, /* segment type */ 921 0, /* segment descriptor priority level */ 922 0, /* segment descriptor present */ 923 0, 0, 924 0, /* default 32 vs 16 bit size */ 925 0 /* limit granularity (byte/page units)*/ }, 926/* GPANIC_SEL 5 Panic Tss Descriptor */ 927{ (int) &dblfault_tss, /* segment base address */ 928 sizeof(struct i386tss)-1,/* length - all address space */ 929 SDT_SYS386TSS, /* segment type */ 930 0, /* segment descriptor priority level */ 931 1, /* segment descriptor present */ 932 0, 0, 933 0, /* unused - default 32 vs 16 bit size */ 934 0 /* limit granularity (byte/page units)*/ }, 935/* GPROC0_SEL 6 Proc 0 Tss Descriptor */ 936{ 937 (int) &common_tss, /* segment base address */ 938 sizeof(struct i386tss)-1,/* length - all address space */ 939 SDT_SYS386TSS, /* segment type */ 940 0, /* segment descriptor priority level */ 941 1, /* segment descriptor present */ 942 0, 0, 943 0, /* unused - default 32 vs 16 bit size */ 944 0 /* limit granularity (byte/page units)*/ }, 945/* GUSERLDT_SEL 7 User LDT Descriptor per process */ 946{ (int) ldt, /* segment base address */ 947 (512 * sizeof(union descriptor)-1), /* length */ 948 SDT_SYSLDT, /* segment type */ 949 0, /* segment descriptor priority level */ 950 1, /* segment descriptor present */ 951 0, 0, 952 0, /* unused - default 32 vs 16 bit size */ 953 0 /* limit granularity (byte/page units)*/ }, 954/* GAPMCODE32_SEL 8 APM BIOS 32-bit interface (32bit Code) */ 955{ 0, /* segment base address (overwritten by APM) */ 956 0xfffff, /* length */ 957 SDT_MEMERA, /* segment type */ 958 0, /* segment descriptor priority level */ 959 1, /* segment descriptor present */ 960 0, 0, 961 1, /* default 32 vs 16 bit size */ 962 1 /* limit granularity (byte/page units)*/ }, 963/* GAPMCODE16_SEL 9 APM BIOS 32-bit interface (16bit Code) */ 964{ 0, /* segment base address (overwritten by APM) */ 965 0xfffff, /* length */ 966 SDT_MEMERA, /* segment type */ 967 0, /* segment descriptor priority level */ 968 1, /* segment descriptor present */ 969 0, 0, 970 0, /* default 32 vs 16 bit size */ 971 1 /* limit granularity (byte/page units)*/ }, 972/* GAPMDATA_SEL 10 APM BIOS 32-bit interface (Data) */ 973{ 0, /* segment base address (overwritten by APM) */ 974 0xfffff, /* length */ 975 SDT_MEMRWA, /* segment type */ 976 0, /* segment descriptor priority level */ 977 1, /* segment descriptor present */ 978 0, 0, 979 1, /* default 32 vs 16 bit size */ 980 1 /* limit granularity (byte/page units)*/ }, 981}; 982 983static struct soft_segment_descriptor ldt_segs[] = { 984 /* Null Descriptor - overwritten by call gate */ 985{ 0x0, /* segment base address */ 986 0x0, /* length - all address space */ 987 0, /* segment type */ 988 0, /* segment descriptor priority level */ 989 0, /* segment descriptor present */ 990 0, 0, 991 0, /* default 32 vs 16 bit size */ 992 0 /* limit granularity (byte/page units)*/ }, 993 /* Null Descriptor - overwritten by call gate */ 994{ 0x0, /* segment base address */ 995 0x0, /* length - all address space */ 996 0, /* segment type */ 997 0, /* segment descriptor priority level */ 998 0, /* segment descriptor present */ 999 0, 0, 1000 0, /* default 32 vs 16 bit size */ 1001 0 /* limit granularity (byte/page units)*/ }, 1002 /* Null Descriptor - overwritten by call gate */ 1003{ 0x0, /* segment base address */ 1004 0x0, /* length - all address space */ 1005 0, /* segment type */ 1006 0, /* segment descriptor priority level */ 1007 0, /* segment descriptor present */ 1008 0, 0, 1009 0, /* default 32 vs 16 bit size */ 1010 0 /* limit granularity (byte/page units)*/ }, 1011 /* Code Descriptor for user */ 1012{ 0x0, /* segment base address */ 1013 0xfffff, /* length - all address space */ 1014 SDT_MEMERA, /* segment type */ 1015 SEL_UPL, /* segment descriptor priority level */ 1016 1, /* segment descriptor present */ 1017 0, 0, 1018 1, /* default 32 vs 16 bit size */ 1019 1 /* limit granularity (byte/page units)*/ }, 1020 /* Data Descriptor for user */ 1021{ 0x0, /* segment base address */ 1022 0xfffff, /* length - all address space */ 1023 SDT_MEMRWA, /* segment type */ 1024 SEL_UPL, /* segment descriptor priority level */ 1025 1, /* segment descriptor present */ 1026 0, 0, 1027 1, /* default 32 vs 16 bit size */ 1028 1 /* limit granularity (byte/page units)*/ }, 1029}; 1030 1031void 1032setidt(idx, func, typ, dpl, selec) 1033 int idx; 1034 inthand_t *func; 1035 int typ; 1036 int dpl; 1037 int selec; 1038{ 1039 struct gate_descriptor *ip = idt + idx; 1040 1041 ip->gd_looffset = (int)func; 1042 ip->gd_selector = selec; 1043 ip->gd_stkcpy = 0; 1044 ip->gd_xx = 0; 1045 ip->gd_type = typ; 1046 ip->gd_dpl = dpl; 1047 ip->gd_p = 1; 1048 ip->gd_hioffset = ((int)func)>>16 ; 1049} 1050 1051#define IDTVEC(name) __CONCAT(X,name) 1052 1053extern inthand_t 1054 IDTVEC(div), IDTVEC(dbg), IDTVEC(nmi), IDTVEC(bpt), IDTVEC(ofl), 1055 IDTVEC(bnd), IDTVEC(ill), IDTVEC(dna), IDTVEC(fpusegm), 1056 IDTVEC(tss), IDTVEC(missing), IDTVEC(stk), IDTVEC(prot), 1057 IDTVEC(page), IDTVEC(mchk), IDTVEC(rsvd), IDTVEC(fpu), IDTVEC(align), 1058 IDTVEC(syscall), IDTVEC(int0x80_syscall); 1059 1060void 1061sdtossd(sd, ssd) 1062 struct segment_descriptor *sd; 1063 struct soft_segment_descriptor *ssd; 1064{ 1065 ssd->ssd_base = (sd->sd_hibase << 24) | sd->sd_lobase; 1066 ssd->ssd_limit = (sd->sd_hilimit << 16) | sd->sd_lolimit; 1067 ssd->ssd_type = sd->sd_type; 1068 ssd->ssd_dpl = sd->sd_dpl; 1069 ssd->ssd_p = sd->sd_p; 1070 ssd->ssd_def32 = sd->sd_def32; 1071 ssd->ssd_gran = sd->sd_gran; 1072} 1073 1074void 1075init386(first) 1076 int first; 1077{ 1078 int x; 1079 unsigned biosbasemem, biosextmem; 1080 struct gate_descriptor *gdp; 1081 int gsel_tss; 1082 1083 struct isa_device *idp; 1084#ifndef SMP 1085 /* table descriptors - used to load tables by microp */ 1086 struct region_descriptor r_gdt, r_idt; 1087#endif 1088 int pagesinbase, pagesinext; 1089 int target_page, pa_indx; 1090 int off; 1091 int speculative_mprobe; 1092 1093 /* 1094 * Prevent lowering of the ipl if we call tsleep() early. 1095 */ 1096 safepri = cpl; 1097 1098 proc0.p_addr = proc0paddr; 1099 1100 atdevbase = ISA_HOLE_START + KERNBASE; 1101 1102 /* 1103 * Initialize the console before we print anything out. 1104 */ 1105 cninit(); 1106 1107 /* 1108 * make gdt memory segments, the code segment goes up to end of the 1109 * page with etext in it, the data segment goes to the end of 1110 * the address space 1111 */ 1112 /* 1113 * XXX text protection is temporarily (?) disabled. The limit was 1114 * i386_btop(round_page(etext)) - 1. 1115 */ 1116 gdt_segs[GCODE_SEL].ssd_limit = i386_btop(0) - 1; 1117 gdt_segs[GDATA_SEL].ssd_limit = i386_btop(0) - 1; 1118#ifdef BDE_DEBUGGER 1119#define NGDT1 8 /* avoid overwriting db entries with APM ones */ 1120#else 1121#define NGDT1 (sizeof gdt_segs / sizeof gdt_segs[0]) 1122#endif 1123 for (x = 0; x < NGDT1; x++) 1124 ssdtosd(&gdt_segs[x], &gdt[x].sd); 1125#ifdef VM86 1126 common_tssd = gdt[GPROC0_SEL].sd; 1127#endif /* VM86 */ 1128 1129#ifdef SMP 1130 /* 1131 * Spin these up now. init_secondary() grabs them. We could use 1132 * #for(x,y,z) / #endfor cpp directives if they existed. 1133 */ 1134 for (x = 0; x < NCPU; x++) { 1135 gdt_segs[NGDT + x] = gdt_segs[GPROC0_SEL]; 1136 ssdtosd(&gdt_segs[NGDT + x], &gdt[NGDT + x].sd); 1137 } 1138#endif 1139 1140 /* make ldt memory segments */ 1141 /* 1142 * The data segment limit must not cover the user area because we 1143 * don't want the user area to be writable in copyout() etc. (page 1144 * level protection is lost in kernel mode on 386's). Also, we 1145 * don't want the user area to be writable directly (page level 1146 * protection of the user area is not available on 486's with 1147 * CR0_WP set, because there is no user-read/kernel-write mode). 1148 * 1149 * XXX - VM_MAXUSER_ADDRESS is an end address, not a max. And it 1150 * should be spelled ...MAX_USER... 1151 */ 1152#define VM_END_USER_RW_ADDRESS VM_MAXUSER_ADDRESS 1153 /* 1154 * The code segment limit has to cover the user area until we move 1155 * the signal trampoline out of the user area. This is safe because 1156 * the code segment cannot be written to directly. 1157 */ 1158#define VM_END_USER_R_ADDRESS (VM_END_USER_RW_ADDRESS + UPAGES * PAGE_SIZE) 1159 ldt_segs[LUCODE_SEL].ssd_limit = i386_btop(VM_END_USER_R_ADDRESS) - 1; 1160 ldt_segs[LUDATA_SEL].ssd_limit = i386_btop(VM_END_USER_RW_ADDRESS) - 1; 1161 for (x = 0; x < sizeof ldt_segs / sizeof ldt_segs[0]; x++) 1162 ssdtosd(&ldt_segs[x], &ldt[x].sd); 1163 1164 /* exceptions */ 1165 for (x = 0; x < NIDT; x++) 1166 setidt(x, &IDTVEC(rsvd), SDT_SYS386TGT, SEL_KPL, GSEL(GCODE_SEL, SEL_KPL)); 1167 setidt(0, &IDTVEC(div), SDT_SYS386TGT, SEL_KPL, GSEL(GCODE_SEL, SEL_KPL)); 1168 setidt(1, &IDTVEC(dbg), SDT_SYS386TGT, SEL_KPL, GSEL(GCODE_SEL, SEL_KPL)); 1169 setidt(2, &IDTVEC(nmi), SDT_SYS386TGT, SEL_KPL, GSEL(GCODE_SEL, SEL_KPL)); 1170 setidt(3, &IDTVEC(bpt), SDT_SYS386TGT, SEL_UPL, GSEL(GCODE_SEL, SEL_KPL)); 1171 setidt(4, &IDTVEC(ofl), SDT_SYS386TGT, SEL_UPL, GSEL(GCODE_SEL, SEL_KPL)); 1172 setidt(5, &IDTVEC(bnd), SDT_SYS386TGT, SEL_KPL, GSEL(GCODE_SEL, SEL_KPL)); 1173 setidt(6, &IDTVEC(ill), SDT_SYS386TGT, SEL_KPL, GSEL(GCODE_SEL, SEL_KPL)); 1174 setidt(7, &IDTVEC(dna), SDT_SYS386TGT, SEL_KPL, GSEL(GCODE_SEL, SEL_KPL)); 1175 setidt(8, 0, SDT_SYSTASKGT, SEL_KPL, GSEL(GPANIC_SEL, SEL_KPL)); 1176 setidt(9, &IDTVEC(fpusegm), SDT_SYS386TGT, SEL_KPL, GSEL(GCODE_SEL, SEL_KPL)); 1177 setidt(10, &IDTVEC(tss), SDT_SYS386TGT, SEL_KPL, GSEL(GCODE_SEL, SEL_KPL)); 1178 setidt(11, &IDTVEC(missing), SDT_SYS386TGT, SEL_KPL, GSEL(GCODE_SEL, SEL_KPL)); 1179 setidt(12, &IDTVEC(stk), SDT_SYS386TGT, SEL_KPL, GSEL(GCODE_SEL, SEL_KPL)); 1180 setidt(13, &IDTVEC(prot), SDT_SYS386TGT, SEL_KPL, GSEL(GCODE_SEL, SEL_KPL)); 1181 setidt(14, &IDTVEC(page), SDT_SYS386TGT, SEL_KPL, GSEL(GCODE_SEL, SEL_KPL)); 1182 setidt(15, &IDTVEC(rsvd), SDT_SYS386TGT, SEL_KPL, GSEL(GCODE_SEL, SEL_KPL)); 1183 setidt(16, &IDTVEC(fpu), SDT_SYS386TGT, SEL_KPL, GSEL(GCODE_SEL, SEL_KPL)); 1184 setidt(17, &IDTVEC(align), SDT_SYS386TGT, SEL_KPL, GSEL(GCODE_SEL, SEL_KPL)); 1185 setidt(18, &IDTVEC(mchk), SDT_SYS386TGT, SEL_KPL, GSEL(GCODE_SEL, SEL_KPL)); 1186 setidt(0x80, &IDTVEC(int0x80_syscall), 1187 SDT_SYS386TGT, SEL_UPL, GSEL(GCODE_SEL, SEL_KPL)); 1188 1189#include "isa.h" 1190#if NISA >0 1191 isa_defaultirq(); 1192#endif 1193 rand_initialize(); 1194 1195 r_gdt.rd_limit = sizeof(gdt) - 1; 1196 r_gdt.rd_base = (int) gdt; 1197 lgdt(&r_gdt); 1198 1199 r_idt.rd_limit = sizeof(idt) - 1; 1200 r_idt.rd_base = (int) idt; 1201 lidt(&r_idt); 1202 1203 _default_ldt = GSEL(GLDT_SEL, SEL_KPL); 1204 lldt(_default_ldt); 1205 currentldt = _default_ldt; 1206 1207#ifdef DDB 1208 kdb_init(); 1209 if (boothowto & RB_KDB) 1210 Debugger("Boot flags requested debugger"); 1211#endif 1212 1213 finishidentcpu(); /* Final stage of CPU initialization */ 1214 setidt(6, &IDTVEC(ill), SDT_SYS386TGT, SEL_KPL, GSEL(GCODE_SEL, SEL_KPL)); 1215 initializecpu(); /* Initialize CPU registers */ 1216 1217 /* Use BIOS values stored in RTC CMOS RAM, since probing 1218 * breaks certain 386 AT relics. 1219 */ 1220 biosbasemem = rtcin(RTC_BASELO)+ (rtcin(RTC_BASEHI)<<8); 1221 biosextmem = rtcin(RTC_EXTLO)+ (rtcin(RTC_EXTHI)<<8); 1222 1223 /* 1224 * If BIOS tells us that it has more than 640k in the basemem, 1225 * don't believe it - set it to 640k. 1226 */ 1227 if (biosbasemem > 640) { 1228 printf("Preposterous RTC basemem of %dK, truncating to 640K\n", 1229 biosbasemem); 1230 biosbasemem = 640; 1231 } 1232 if (bootinfo.bi_memsizes_valid && bootinfo.bi_basemem > 640) { 1233 printf("Preposterous BIOS basemem of %dK, truncating to 640K\n", 1234 bootinfo.bi_basemem); 1235 bootinfo.bi_basemem = 640; 1236 } 1237 1238 /* 1239 * Warn if the official BIOS interface disagrees with the RTC 1240 * interface used above about the amount of base memory or the 1241 * amount of extended memory. Prefer the BIOS value for the base 1242 * memory. This is necessary for machines that `steal' base 1243 * memory for use as BIOS memory, at least if we are going to use 1244 * the BIOS for apm. Prefer the RTC value for extended memory. 1245 * Eventually the hackish interface shouldn't even be looked at. 1246 */ 1247 if (bootinfo.bi_memsizes_valid) { 1248 if (bootinfo.bi_basemem != biosbasemem) { 1249 vm_offset_t pa; 1250 1251 printf( 1252 "BIOS basemem (%ldK) != RTC basemem (%dK), setting to BIOS value\n", 1253 bootinfo.bi_basemem, biosbasemem); 1254 biosbasemem = bootinfo.bi_basemem; 1255 1256 /* 1257 * XXX if biosbasemem is now < 640, there is `hole' 1258 * between the end of base memory and the start of 1259 * ISA memory. The hole may be empty or it may 1260 * contain BIOS code or data. Map it read/write so 1261 * that the BIOS can write to it. (Memory from 0 to 1262 * the physical end of the kernel is mapped read-only 1263 * to begin with and then parts of it are remapped. 1264 * The parts that aren't remapped form holes that 1265 * remain read-only and are unused by the kernel. 1266 * The base memory area is below the physical end of 1267 * the kernel and right now forms a read-only hole. 1268 * The part of it from 0 to 1269 * (trunc_page(biosbasemem * 1024) - 1) will be 1270 * remapped and used by the kernel later.) 1271 * 1272 * This code is similar to the code used in 1273 * pmap_mapdev, but since no memory needs to be 1274 * allocated we simply change the mapping. 1275 */ 1276 for (pa = trunc_page(biosbasemem * 1024); 1277 pa < ISA_HOLE_START; pa += PAGE_SIZE) { 1278 unsigned *pte; 1279 1280 pte = (unsigned *)vtopte(pa + KERNBASE); 1281 *pte = pa | PG_RW | PG_V; 1282 } 1283 } 1284 if (bootinfo.bi_extmem != biosextmem) 1285 printf("BIOS extmem (%ldK) != RTC extmem (%dK)\n", 1286 bootinfo.bi_extmem, biosextmem); 1287 } 1288 1289#ifdef SMP 1290 /* make hole for AP bootstrap code */ 1291 pagesinbase = mp_bootaddress(biosbasemem) / PAGE_SIZE; 1292#else 1293 pagesinbase = biosbasemem * 1024 / PAGE_SIZE; 1294#endif 1295 1296 pagesinext = biosextmem * 1024 / PAGE_SIZE; 1297 1298 /* 1299 * Special hack for chipsets that still remap the 384k hole when 1300 * there's 16MB of memory - this really confuses people that 1301 * are trying to use bus mastering ISA controllers with the 1302 * "16MB limit"; they only have 16MB, but the remapping puts 1303 * them beyond the limit. 1304 */ 1305 /* 1306 * If extended memory is between 15-16MB (16-17MB phys address range), 1307 * chop it to 15MB. 1308 */ 1309 if ((pagesinext > 3840) && (pagesinext < 4096)) 1310 pagesinext = 3840; 1311 1312 /* 1313 * Maxmem isn't the "maximum memory", it's one larger than the 1314 * highest page of the physical address space. It should be 1315 * called something like "Maxphyspage". 1316 */ 1317 Maxmem = pagesinext + 0x100000/PAGE_SIZE; 1318 /* 1319 * Indicate that we wish to do a speculative search for memory beyond 1320 * the end of the reported size if the indicated amount is 64MB (0x4000 1321 * pages) - which is the largest amount that the BIOS/bootblocks can 1322 * currently report. If a specific amount of memory is indicated via 1323 * the MAXMEM option or the npx0 "msize", then don't do the speculative 1324 * memory probe. 1325 */ 1326 if (Maxmem >= 0x4000) 1327 speculative_mprobe = TRUE; 1328 else 1329 speculative_mprobe = FALSE; 1330 1331#ifdef MAXMEM 1332 Maxmem = MAXMEM/4; 1333 speculative_mprobe = FALSE; 1334#endif 1335 1336#if NNPX > 0 1337 idp = find_isadev(isa_devtab_null, &npxdriver, 0); 1338 if (idp != NULL && idp->id_msize != 0) { 1339 Maxmem = idp->id_msize / 4; 1340 speculative_mprobe = FALSE; 1341 } 1342#endif 1343 1344#ifdef SMP 1345 /* look for the MP hardware - needed for apic addresses */ 1346 mp_probe(); 1347#endif 1348 1349 /* call pmap initialization to make new kernel address space */ 1350 pmap_bootstrap (first, 0); 1351 1352 /* 1353 * Size up each available chunk of physical memory. 1354 */ 1355 1356 /* 1357 * We currently don't bother testing base memory. 1358 * XXX ...but we probably should. 1359 */ 1360 pa_indx = 0; 1361 if (pagesinbase > 1) { 1362 phys_avail[pa_indx++] = PAGE_SIZE; /* skip first page of memory */ 1363 phys_avail[pa_indx] = ptoa(pagesinbase);/* memory up to the ISA hole */ 1364 physmem = pagesinbase - 1; 1365 } else { 1366 /* point at first chunk end */ 1367 pa_indx++; 1368 } 1369 1370 for (target_page = avail_start; target_page < ptoa(Maxmem); target_page += PAGE_SIZE) { 1371 int tmp, page_bad; 1372 1373 page_bad = FALSE; 1374 1375 /* 1376 * map page into kernel: valid, read/write, non-cacheable 1377 */ 1378 *(int *)CMAP1 = PG_V | PG_RW | PG_N | target_page; 1379 invltlb(); 1380 1381 tmp = *(int *)CADDR1; 1382 /* 1383 * Test for alternating 1's and 0's 1384 */ 1385 *(volatile int *)CADDR1 = 0xaaaaaaaa; 1386 if (*(volatile int *)CADDR1 != 0xaaaaaaaa) { 1387 page_bad = TRUE; 1388 } 1389 /* 1390 * Test for alternating 0's and 1's 1391 */ 1392 *(volatile int *)CADDR1 = 0x55555555; 1393 if (*(volatile int *)CADDR1 != 0x55555555) { 1394 page_bad = TRUE; 1395 } 1396 /* 1397 * Test for all 1's 1398 */ 1399 *(volatile int *)CADDR1 = 0xffffffff; 1400 if (*(volatile int *)CADDR1 != 0xffffffff) { 1401 page_bad = TRUE; 1402 } 1403 /* 1404 * Test for all 0's 1405 */ 1406 *(volatile int *)CADDR1 = 0x0; 1407 if (*(volatile int *)CADDR1 != 0x0) { 1408 /* 1409 * test of page failed 1410 */ 1411 page_bad = TRUE; 1412 } 1413 /* 1414 * Restore original value. 1415 */ 1416 *(int *)CADDR1 = tmp; 1417 1418 /* 1419 * Adjust array of valid/good pages. 1420 */ 1421 if (page_bad == FALSE) { 1422 /* 1423 * If this good page is a continuation of the 1424 * previous set of good pages, then just increase 1425 * the end pointer. Otherwise start a new chunk. 1426 * Note that "end" points one higher than end, 1427 * making the range >= start and < end. 1428 * If we're also doing a speculative memory 1429 * test and we at or past the end, bump up Maxmem 1430 * so that we keep going. The first bad page 1431 * will terminate the loop. 1432 */ 1433 if (phys_avail[pa_indx] == target_page) { 1434 phys_avail[pa_indx] += PAGE_SIZE; 1435 if (speculative_mprobe == TRUE && 1436 phys_avail[pa_indx] >= (64*1024*1024)) 1437 Maxmem++; 1438 } else { 1439 pa_indx++; 1440 if (pa_indx == PHYS_AVAIL_ARRAY_END) { 1441 printf("Too many holes in the physical address space, giving up\n"); 1442 pa_indx--; 1443 break; 1444 } 1445 phys_avail[pa_indx++] = target_page; /* start */ 1446 phys_avail[pa_indx] = target_page + PAGE_SIZE; /* end */ 1447 } 1448 physmem++; 1449 } 1450 } 1451 1452 *(int *)CMAP1 = 0; 1453 invltlb(); 1454 1455 /* 1456 * XXX 1457 * The last chunk must contain at least one page plus the message 1458 * buffer to avoid complicating other code (message buffer address 1459 * calculation, etc.). 1460 */ 1461 while (phys_avail[pa_indx - 1] + PAGE_SIZE + 1462 round_page(sizeof(struct msgbuf)) >= phys_avail[pa_indx]) { 1463 physmem -= atop(phys_avail[pa_indx] - phys_avail[pa_indx - 1]); 1464 phys_avail[pa_indx--] = 0; 1465 phys_avail[pa_indx--] = 0; 1466 } 1467 1468 Maxmem = atop(phys_avail[pa_indx]); 1469 1470 /* Trim off space for the message buffer. */ 1471 phys_avail[pa_indx] -= round_page(sizeof(struct msgbuf)); 1472 1473 avail_end = phys_avail[pa_indx]; 1474 1475 /* now running on new page tables, configured,and u/iom is accessible */ 1476 1477 /* Map the message buffer. */ 1478 for (off = 0; off < round_page(sizeof(struct msgbuf)); off += PAGE_SIZE) 1479 pmap_enter(kernel_pmap, (vm_offset_t)msgbufp + off, 1480 avail_end + off, VM_PROT_ALL, TRUE); 1481 msgbufmapped = 1; 1482 1483 /* make an initial tss so cpu can get interrupt stack on syscall! */ 1484#ifdef VM86 1485 common_tss.tss_esp0 = (int) proc0.p_addr + UPAGES*PAGE_SIZE - 16; 1486#else 1487 common_tss.tss_esp0 = (int) proc0.p_addr + UPAGES*PAGE_SIZE; 1488#endif /* VM86 */ 1489 common_tss.tss_ss0 = GSEL(GDATA_SEL, SEL_KPL) ; 1490 common_tss.tss_ioopt = (sizeof common_tss) << 16; 1491 gsel_tss = GSEL(GPROC0_SEL, SEL_KPL); 1492 ltr(gsel_tss); 1493#ifdef VM86 1494 private_tss = 0; 1495 my_tr = GPROC0_SEL; 1496#endif 1497 1498 dblfault_tss.tss_esp = dblfault_tss.tss_esp0 = dblfault_tss.tss_esp1 = 1499 dblfault_tss.tss_esp2 = (int) &dblfault_stack[sizeof(dblfault_stack)]; 1500 dblfault_tss.tss_ss = dblfault_tss.tss_ss0 = dblfault_tss.tss_ss1 = 1501 dblfault_tss.tss_ss2 = GSEL(GDATA_SEL, SEL_KPL); 1502 dblfault_tss.tss_cr3 = (int)IdlePTD; 1503 dblfault_tss.tss_eip = (int) dblfault_handler; 1504 dblfault_tss.tss_eflags = PSL_KERNEL; 1505 dblfault_tss.tss_ds = dblfault_tss.tss_es = dblfault_tss.tss_fs = 1506 dblfault_tss.tss_gs = GSEL(GDATA_SEL, SEL_KPL); 1507 dblfault_tss.tss_cs = GSEL(GCODE_SEL, SEL_KPL); 1508 dblfault_tss.tss_ldt = GSEL(GLDT_SEL, SEL_KPL); 1509 1510 /* make a call gate to reenter kernel with */ 1511 gdp = &ldt[LSYS5CALLS_SEL].gd; 1512 1513 x = (int) &IDTVEC(syscall); 1514 gdp->gd_looffset = x++; 1515 gdp->gd_selector = GSEL(GCODE_SEL,SEL_KPL); 1516 gdp->gd_stkcpy = 1; 1517 gdp->gd_type = SDT_SYS386CGT; 1518 gdp->gd_dpl = SEL_UPL; 1519 gdp->gd_p = 1; 1520 gdp->gd_hioffset = ((int) &IDTVEC(syscall)) >>16; 1521 1522 /* XXX does this work? */ 1523 ldt[LBSDICALLS_SEL] = ldt[LSYS5CALLS_SEL]; 1524 1525 /* transfer to user mode */ 1526 1527 _ucodesel = LSEL(LUCODE_SEL, SEL_UPL); 1528 _udatasel = LSEL(LUDATA_SEL, SEL_UPL); 1529 1530 /* setup proc 0's pcb */ 1531 proc0.p_addr->u_pcb.pcb_flags = 0; 1532 proc0.p_addr->u_pcb.pcb_cr3 = (int)IdlePTD; 1533 proc0.p_addr->u_pcb.pcb_mpnest = 1; 1534 proc0.p_addr->u_pcb.pcb_ext = 0; 1535} 1536 1537int 1538ptrace_set_pc(p, addr) 1539 struct proc *p; 1540 unsigned int addr; 1541{ 1542 p->p_md.md_regs->tf_eip = addr; 1543 return (0); 1544} 1545 1546int 1547ptrace_single_step(p) 1548 struct proc *p; 1549{ 1550 p->p_md.md_regs->tf_eflags |= PSL_T; 1551 return (0); 1552} 1553 1554int ptrace_write_u(p, off, data) 1555 struct proc *p; 1556 vm_offset_t off; 1557 int data; 1558{ 1559 struct trapframe frame_copy; 1560 vm_offset_t min; 1561 struct trapframe *tp; 1562 1563 /* 1564 * Privileged kernel state is scattered all over the user area. 1565 * Only allow write access to parts of regs and to fpregs. 1566 */ 1567 min = (char *)p->p_md.md_regs - (char *)p->p_addr; 1568 if (off >= min && off <= min + sizeof(struct trapframe) - sizeof(int)) { 1569 tp = p->p_md.md_regs; 1570 frame_copy = *tp; 1571 *(int *)((char *)&frame_copy + (off - min)) = data; 1572 if (!EFLAGS_SECURE(frame_copy.tf_eflags, tp->tf_eflags) || 1573 !CS_SECURE(frame_copy.tf_cs)) 1574 return (EINVAL); 1575 *(int*)((char *)p->p_addr + off) = data; 1576 return (0); 1577 } 1578 min = offsetof(struct user, u_pcb) + offsetof(struct pcb, pcb_savefpu); 1579 if (off >= min && off <= min + sizeof(struct save87) - sizeof(int)) { 1580 *(int*)((char *)p->p_addr + off) = data; 1581 return (0); 1582 } 1583 return (EFAULT); 1584} 1585 1586int 1587fill_regs(p, regs) 1588 struct proc *p; 1589 struct reg *regs; 1590{ 1591 struct pcb *pcb; 1592 struct trapframe *tp; 1593 1594 tp = p->p_md.md_regs; 1595 regs->r_es = tp->tf_es; 1596 regs->r_ds = tp->tf_ds; 1597 regs->r_edi = tp->tf_edi; 1598 regs->r_esi = tp->tf_esi; 1599 regs->r_ebp = tp->tf_ebp; 1600 regs->r_ebx = tp->tf_ebx; 1601 regs->r_edx = tp->tf_edx; 1602 regs->r_ecx = tp->tf_ecx; 1603 regs->r_eax = tp->tf_eax; 1604 regs->r_eip = tp->tf_eip; 1605 regs->r_cs = tp->tf_cs; 1606 regs->r_eflags = tp->tf_eflags; 1607 regs->r_esp = tp->tf_esp; 1608 regs->r_ss = tp->tf_ss; 1609 pcb = &p->p_addr->u_pcb; 1610 regs->r_fs = pcb->pcb_fs; 1611 regs->r_gs = pcb->pcb_gs; 1612 return (0); 1613} 1614 1615int 1616set_regs(p, regs) 1617 struct proc *p; 1618 struct reg *regs; 1619{ 1620 struct pcb *pcb; 1621 struct trapframe *tp; 1622 1623 tp = p->p_md.md_regs; 1624 if (!EFLAGS_SECURE(regs->r_eflags, tp->tf_eflags) || 1625 !CS_SECURE(regs->r_cs)) 1626 return (EINVAL); 1627 tp->tf_es = regs->r_es; 1628 tp->tf_ds = regs->r_ds; 1629 tp->tf_edi = regs->r_edi; 1630 tp->tf_esi = regs->r_esi; 1631 tp->tf_ebp = regs->r_ebp; 1632 tp->tf_ebx = regs->r_ebx; 1633 tp->tf_edx = regs->r_edx; 1634 tp->tf_ecx = regs->r_ecx; 1635 tp->tf_eax = regs->r_eax; 1636 tp->tf_eip = regs->r_eip; 1637 tp->tf_cs = regs->r_cs; 1638 tp->tf_eflags = regs->r_eflags; 1639 tp->tf_esp = regs->r_esp; 1640 tp->tf_ss = regs->r_ss; 1641 pcb = &p->p_addr->u_pcb; 1642 pcb->pcb_fs = regs->r_fs; 1643 pcb->pcb_gs = regs->r_gs; 1644 return (0); 1645} 1646 1647#ifndef DDB 1648void 1649Debugger(const char *msg) 1650{ 1651 printf("Debugger(\"%s\") called.\n", msg); 1652} 1653#endif /* no DDB */ 1654 1655#include <sys/disklabel.h> 1656 1657/* 1658 * Determine the size of the transfer, and make sure it is 1659 * within the boundaries of the partition. Adjust transfer 1660 * if needed, and signal errors or early completion. 1661 */ 1662int 1663bounds_check_with_label(struct buf *bp, struct disklabel *lp, int wlabel) 1664{ 1665 struct partition *p = lp->d_partitions + dkpart(bp->b_dev); 1666 int labelsect = lp->d_partitions[0].p_offset; 1667 int maxsz = p->p_size, 1668 sz = (bp->b_bcount + DEV_BSIZE - 1) >> DEV_BSHIFT; 1669 1670 /* overwriting disk label ? */ 1671 /* XXX should also protect bootstrap in first 8K */ 1672 if (bp->b_blkno + p->p_offset <= LABELSECTOR + labelsect && 1673#if LABELSECTOR != 0 1674 bp->b_blkno + p->p_offset + sz > LABELSECTOR + labelsect && 1675#endif 1676 (bp->b_flags & B_READ) == 0 && wlabel == 0) { 1677 bp->b_error = EROFS; 1678 goto bad; 1679 } 1680 1681#if defined(DOSBBSECTOR) && defined(notyet) 1682 /* overwriting master boot record? */ 1683 if (bp->b_blkno + p->p_offset <= DOSBBSECTOR && 1684 (bp->b_flags & B_READ) == 0 && wlabel == 0) { 1685 bp->b_error = EROFS; 1686 goto bad; 1687 } 1688#endif 1689 1690 /* beyond partition? */ 1691 if (bp->b_blkno < 0 || bp->b_blkno + sz > maxsz) { 1692 /* if exactly at end of disk, return an EOF */ 1693 if (bp->b_blkno == maxsz) { 1694 bp->b_resid = bp->b_bcount; 1695 return(0); 1696 } 1697 /* or truncate if part of it fits */ 1698 sz = maxsz - bp->b_blkno; 1699 if (sz <= 0) { 1700 bp->b_error = EINVAL; 1701 goto bad; 1702 } 1703 bp->b_bcount = sz << DEV_BSHIFT; 1704 } 1705 1706 bp->b_pblkno = bp->b_blkno + p->p_offset; 1707 return(1); 1708 1709bad: 1710 bp->b_flags |= B_ERROR; 1711 return(-1); 1712} 1713 1714#ifdef DDB 1715 1716/* 1717 * Provide inb() and outb() as functions. They are normally only 1718 * available as macros calling inlined functions, thus cannot be 1719 * called inside DDB. 1720 * 1721 * The actual code is stolen from <machine/cpufunc.h>, and de-inlined. 1722 */ 1723 1724#undef inb 1725#undef outb 1726 1727/* silence compiler warnings */ 1728u_char inb(u_int); 1729void outb(u_int, u_char); 1730 1731u_char 1732inb(u_int port) 1733{ 1734 u_char data; 1735 /* 1736 * We use %%dx and not %1 here because i/o is done at %dx and not at 1737 * %edx, while gcc generates inferior code (movw instead of movl) 1738 * if we tell it to load (u_short) port. 1739 */ 1740 __asm __volatile("inb %%dx,%0" : "=a" (data) : "d" (port)); 1741 return (data); 1742} 1743 1744void 1745outb(u_int port, u_char data) 1746{ 1747 u_char al; 1748 /* 1749 * Use an unnecessary assignment to help gcc's register allocator. 1750 * This make a large difference for gcc-1.40 and a tiny difference 1751 * for gcc-2.6.0. For gcc-1.40, al had to be ``asm("ax")'' for 1752 * best results. gcc-2.6.0 can't handle this. 1753 */ 1754 al = data; 1755 __asm __volatile("outb %0,%%dx" : : "a" (al), "d" (port)); 1756} 1757 1758#endif /* DDB */ 1759