machdep.c revision 29109
1/*- 2 * Copyright (c) 1992 Terrence R. Lambert. 3 * Copyright (c) 1982, 1987, 1990 The Regents of the University of California. 4 * All rights reserved. 5 * 6 * This code is derived from software contributed to Berkeley by 7 * William Jolitz. 8 * 9 * Redistribution and use in source and binary forms, with or without 10 * modification, are permitted provided that the following conditions 11 * are met: 12 * 1. Redistributions of source code must retain the above copyright 13 * notice, this list of conditions and the following disclaimer. 14 * 2. Redistributions in binary form must reproduce the above copyright 15 * notice, this list of conditions and the following disclaimer in the 16 * documentation and/or other materials provided with the distribution. 17 * 3. All advertising materials mentioning features or use of this software 18 * must display the following acknowledgement: 19 * This product includes software developed by the University of 20 * California, Berkeley and its contributors. 21 * 4. Neither the name of the University nor the names of its contributors 22 * may be used to endorse or promote products derived from this software 23 * without specific prior written permission. 24 * 25 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 26 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 27 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 28 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 29 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 30 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 31 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 32 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 33 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 34 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 35 * SUCH DAMAGE. 36 * 37 * from: @(#)machdep.c 7.4 (Berkeley) 6/3/91 38 * $Id: machdep.c,v 1.261 1997/09/02 20:05:28 bde Exp $ 39 */ 40 41#include "apm.h" 42#include "npx.h" 43#include "opt_sysvipc.h" 44#include "opt_ddb.h" 45#include "opt_bounce.h" 46#include "opt_maxmem.h" 47#include "opt_perfmon.h" 48#include "opt_smp.h" 49#include "opt_userconfig.h" 50 51#include <sys/param.h> 52#include <sys/systm.h> 53#include <sys/sysproto.h> 54#include <sys/signalvar.h> 55#include <sys/kernel.h> 56#include <sys/proc.h> 57#include <sys/buf.h> 58#include <sys/reboot.h> 59#include <sys/conf.h> 60#include <sys/callout.h> 61#include <sys/malloc.h> 62#include <sys/mbuf.h> 63#include <sys/msgbuf.h> 64#include <sys/sysent.h> 65#include <sys/sysctl.h> 66#include <sys/vmmeter.h> 67 68#ifdef SYSVSHM 69#include <sys/shm.h> 70#endif 71 72#ifdef SYSVMSG 73#include <sys/msg.h> 74#endif 75 76#ifdef SYSVSEM 77#include <sys/sem.h> 78#endif 79 80#include <vm/vm.h> 81#include <vm/vm_param.h> 82#include <vm/vm_prot.h> 83#include <sys/lock.h> 84#include <vm/vm_kern.h> 85#include <vm/vm_object.h> 86#include <vm/vm_page.h> 87#include <vm/vm_map.h> 88#include <vm/vm_pager.h> 89#include <vm/vm_extern.h> 90 91#include <sys/user.h> 92#include <sys/exec.h> 93 94#include <ddb/ddb.h> 95 96#include <net/netisr.h> 97 98#if NAPM > 0 99#include <machine/apm_bios.h> 100#endif 101#include <machine/cpu.h> 102#include <machine/reg.h> 103#include <machine/clock.h> 104#include <machine/specialreg.h> 105#include <machine/cons.h> 106#include <machine/bootinfo.h> 107#include <machine/md_var.h> 108#ifdef SMP 109#include <machine/smp.h> 110#endif 111#ifdef PERFMON 112#include <machine/perfmon.h> 113#endif 114 115#include <i386/isa/isa_device.h> 116#include <i386/isa/intr_machdep.h> 117#include <i386/isa/rtc.h> 118#include <machine/random.h> 119 120extern void init386 __P((int first)); 121extern int ptrace_set_pc __P((struct proc *p, unsigned int addr)); 122extern int ptrace_single_step __P((struct proc *p)); 123extern int ptrace_write_u __P((struct proc *p, vm_offset_t off, int data)); 124extern void dblfault_handler __P((void)); 125 126extern void printcpuinfo(void); /* XXX header file */ 127extern void earlysetcpuclass(void); /* same header file */ 128extern void finishidentcpu(void); 129extern void panicifcpuunsupported(void); 130extern void initializecpu(void); 131 132static void cpu_startup __P((void *)); 133SYSINIT(cpu, SI_SUB_CPU, SI_ORDER_FIRST, cpu_startup, NULL) 134 135 136#ifdef BOUNCE_BUFFERS 137extern char *bouncememory; 138extern int maxbkva; 139#ifdef BOUNCEPAGES 140int bouncepages = BOUNCEPAGES; 141#else 142int bouncepages = 0; 143#endif 144#endif /* BOUNCE_BUFFERS */ 145 146extern int freebufspace; 147int msgbufmapped = 0; /* set when safe to use msgbuf */ 148int _udatasel, _ucodesel; 149u_int atdevbase; 150 151 152int physmem = 0; 153int cold = 1; 154 155static int 156sysctl_hw_physmem SYSCTL_HANDLER_ARGS 157{ 158 int error = sysctl_handle_int(oidp, 0, ctob(physmem), req); 159 return (error); 160} 161 162SYSCTL_PROC(_hw, HW_PHYSMEM, physmem, CTLTYPE_INT|CTLFLAG_RD, 163 0, 0, sysctl_hw_physmem, "I", ""); 164 165static int 166sysctl_hw_usermem SYSCTL_HANDLER_ARGS 167{ 168 int error = sysctl_handle_int(oidp, 0, 169 ctob(physmem - cnt.v_wire_count), req); 170 return (error); 171} 172 173SYSCTL_PROC(_hw, HW_USERMEM, usermem, CTLTYPE_INT|CTLFLAG_RD, 174 0, 0, sysctl_hw_usermem, "I", ""); 175 176int boothowto = 0, bootverbose = 0, Maxmem = 0; 177long dumplo; 178extern int bootdev; 179 180vm_offset_t phys_avail[10]; 181 182/* must be 2 less so 0 0 can signal end of chunks */ 183#define PHYS_AVAIL_ARRAY_END ((sizeof(phys_avail) / sizeof(vm_offset_t)) - 2) 184 185static void setup_netisrs __P((struct linker_set *)); /* XXX declare elsewhere */ 186 187static vm_offset_t buffer_sva, buffer_eva; 188vm_offset_t clean_sva, clean_eva; 189static vm_offset_t pager_sva, pager_eva; 190extern struct linker_set netisr_set; 191 192#define offsetof(type, member) ((size_t)(&((type *)0)->member)) 193 194static void 195cpu_startup(dummy) 196 void *dummy; 197{ 198 register unsigned i; 199 register caddr_t v; 200 vm_offset_t maxaddr; 201 vm_size_t size = 0; 202 int firstaddr; 203 vm_offset_t minaddr; 204 205 if (boothowto & RB_VERBOSE) 206 bootverbose++; 207 208 /* 209 * Good {morning,afternoon,evening,night}. 210 */ 211 printf(version); 212 earlysetcpuclass(); 213 startrtclock(); 214 printcpuinfo(); 215 panicifcpuunsupported(); 216#ifdef PERFMON 217 perfmon_init(); 218#endif 219 printf("real memory = %d (%dK bytes)\n", ptoa(Maxmem), ptoa(Maxmem) / 1024); 220 /* 221 * Display any holes after the first chunk of extended memory. 222 */ 223 if (bootverbose) { 224 int indx; 225 226 printf("Physical memory chunk(s):\n"); 227 for (indx = 0; phys_avail[indx + 1] != 0; indx += 2) { 228 int size = phys_avail[indx + 1] - phys_avail[indx]; 229 230 printf("0x%08lx - 0x%08lx, %d bytes (%d pages)\n", phys_avail[indx], 231 phys_avail[indx + 1] - 1, size, size / PAGE_SIZE); 232 } 233 } 234 235 /* 236 * Quickly wire in netisrs. 237 */ 238 setup_netisrs(&netisr_set); 239 240 /* 241 * Allocate space for system data structures. 242 * The first available kernel virtual address is in "v". 243 * As pages of kernel virtual memory are allocated, "v" is incremented. 244 * As pages of memory are allocated and cleared, 245 * "firstaddr" is incremented. 246 * An index into the kernel page table corresponding to the 247 * virtual memory address maintained in "v" is kept in "mapaddr". 248 */ 249 250 /* 251 * Make two passes. The first pass calculates how much memory is 252 * needed and allocates it. The second pass assigns virtual 253 * addresses to the various data structures. 254 */ 255 firstaddr = 0; 256again: 257 v = (caddr_t)firstaddr; 258 259#define valloc(name, type, num) \ 260 (name) = (type *)v; v = (caddr_t)((name)+(num)) 261#define valloclim(name, type, num, lim) \ 262 (name) = (type *)v; v = (caddr_t)((lim) = ((name)+(num))) 263 valloc(callout, struct callout, ncallout); 264#ifdef SYSVSHM 265 valloc(shmsegs, struct shmid_ds, shminfo.shmmni); 266#endif 267#ifdef SYSVSEM 268 valloc(sema, struct semid_ds, seminfo.semmni); 269 valloc(sem, struct sem, seminfo.semmns); 270 /* This is pretty disgusting! */ 271 valloc(semu, int, (seminfo.semmnu * seminfo.semusz) / sizeof(int)); 272#endif 273#ifdef SYSVMSG 274 valloc(msgpool, char, msginfo.msgmax); 275 valloc(msgmaps, struct msgmap, msginfo.msgseg); 276 valloc(msghdrs, struct msg, msginfo.msgtql); 277 valloc(msqids, struct msqid_ds, msginfo.msgmni); 278#endif 279 280 if (nbuf == 0) { 281 nbuf = 30; 282 if( physmem > 1024) 283 nbuf += min((physmem - 1024) / 8, 2048); 284 } 285 nswbuf = max(min(nbuf/4, 128), 16); 286 287 valloc(swbuf, struct buf, nswbuf); 288 valloc(buf, struct buf, nbuf); 289 290#ifdef BOUNCE_BUFFERS 291 /* 292 * If there is more than 16MB of memory, allocate some bounce buffers 293 */ 294 if (Maxmem > 4096) { 295 if (bouncepages == 0) { 296 bouncepages = 64; 297 bouncepages += ((Maxmem - 4096) / 2048) * 32; 298 if (bouncepages > 128) 299 bouncepages = 128; 300 } 301 v = (caddr_t)((vm_offset_t)round_page(v)); 302 valloc(bouncememory, char, bouncepages * PAGE_SIZE); 303 } 304#endif 305 306 /* 307 * End of first pass, size has been calculated so allocate memory 308 */ 309 if (firstaddr == 0) { 310 size = (vm_size_t)(v - firstaddr); 311 firstaddr = (int)kmem_alloc(kernel_map, round_page(size)); 312 if (firstaddr == 0) 313 panic("startup: no room for tables"); 314 goto again; 315 } 316 317 /* 318 * End of second pass, addresses have been assigned 319 */ 320 if ((vm_size_t)(v - firstaddr) != size) 321 panic("startup: table size inconsistency"); 322 323#ifdef BOUNCE_BUFFERS 324 clean_map = kmem_suballoc(kernel_map, &clean_sva, &clean_eva, 325 (nbuf*BKVASIZE) + (nswbuf*MAXPHYS) + 326 maxbkva + pager_map_size, TRUE); 327 io_map = kmem_suballoc(clean_map, &minaddr, &maxaddr, maxbkva, FALSE); 328#else 329 clean_map = kmem_suballoc(kernel_map, &clean_sva, &clean_eva, 330 (nbuf*BKVASIZE) + (nswbuf*MAXPHYS) + pager_map_size, TRUE); 331#endif 332 buffer_map = kmem_suballoc(clean_map, &buffer_sva, &buffer_eva, 333 (nbuf*BKVASIZE), TRUE); 334 pager_map = kmem_suballoc(clean_map, &pager_sva, &pager_eva, 335 (nswbuf*MAXPHYS) + pager_map_size, TRUE); 336 pager_map->system_map = 1; 337 exec_map = kmem_suballoc(kernel_map, &minaddr, &maxaddr, 338 (16*ARG_MAX), TRUE); 339 u_map = kmem_suballoc(kernel_map, &minaddr, &maxaddr, 340 (maxproc*UPAGES*PAGE_SIZE), FALSE); 341 342 /* 343 * Finally, allocate mbuf pool. Since mclrefcnt is an off-size 344 * we use the more space efficient malloc in place of kmem_alloc. 345 */ 346 { 347 vm_offset_t mb_map_size; 348 349 mb_map_size = nmbufs * MSIZE + nmbclusters * MCLBYTES; 350 mb_map_size = roundup2(mb_map_size, max(MCLBYTES, PAGE_SIZE)); 351 mclrefcnt = malloc(mb_map_size / MCLBYTES, M_MBUF, M_NOWAIT); 352 bzero(mclrefcnt, mb_map_size / MCLBYTES); 353 mb_map = kmem_suballoc(kmem_map, (vm_offset_t *)&mbutl, &maxaddr, 354 mb_map_size, FALSE); 355 mb_map->system_map = 1; 356 } 357 358 /* 359 * Initialize callouts 360 */ 361 callfree = callout; 362 for (i = 1; i < ncallout; i++) 363 callout[i-1].c_next = &callout[i]; 364 365#if defined(USERCONFIG) 366#if defined(USERCONFIG_BOOT) 367 if (1) { 368#else 369 if (boothowto & RB_CONFIG) { 370#endif 371 userconfig(); 372 cninit(); /* the preferred console may have changed */ 373 } 374#endif 375 376#ifdef BOUNCE_BUFFERS 377 /* 378 * init bounce buffers 379 */ 380 vm_bounce_init(); 381#endif 382 383 printf("avail memory = %d (%dK bytes)\n", ptoa(cnt.v_free_count), 384 ptoa(cnt.v_free_count) / 1024); 385 386 /* 387 * Set up buffers, so they can be used to read disk labels. 388 */ 389 bufinit(); 390 vm_pager_bufferinit(); 391 392#ifdef SMP 393 /* 394 * OK, enough kmem_alloc/malloc state should be up, lets get on with it! 395 */ 396 mp_start(); /* fire up the APs and APICs */ 397 mp_announce(); 398#endif /* SMP */ 399} 400 401int 402register_netisr(num, handler) 403 int num; 404 netisr_t *handler; 405{ 406 407 if (num < 0 || num >= (sizeof(netisrs)/sizeof(*netisrs)) ) { 408 printf("register_netisr: bad isr number: %d\n", num); 409 return (EINVAL); 410 } 411 netisrs[num] = handler; 412 return (0); 413} 414 415static void 416setup_netisrs(ls) 417 struct linker_set *ls; 418{ 419 int i; 420 const struct netisrtab *nit; 421 422 for(i = 0; ls->ls_items[i]; i++) { 423 nit = (const struct netisrtab *)ls->ls_items[i]; 424 register_netisr(nit->nit_num, nit->nit_isr); 425 } 426} 427 428/* 429 * Send an interrupt to process. 430 * 431 * Stack is set up to allow sigcode stored 432 * at top to call routine, followed by kcall 433 * to sigreturn routine below. After sigreturn 434 * resets the signal mask, the stack, and the 435 * frame pointer, it returns to the user 436 * specified pc, psl. 437 */ 438void 439sendsig(catcher, sig, mask, code) 440 sig_t catcher; 441 int sig, mask; 442 u_long code; 443{ 444 register struct proc *p = curproc; 445 register struct trapframe *regs; 446 register struct sigframe *fp; 447 struct sigframe sf; 448 struct sigacts *psp = p->p_sigacts; 449 int oonstack; 450 451 regs = p->p_md.md_regs; 452 oonstack = psp->ps_sigstk.ss_flags & SS_ONSTACK; 453 /* 454 * Allocate and validate space for the signal handler context. 455 */ 456 if ((psp->ps_flags & SAS_ALTSTACK) && !oonstack && 457 (psp->ps_sigonstack & sigmask(sig))) { 458 fp = (struct sigframe *)(psp->ps_sigstk.ss_sp + 459 psp->ps_sigstk.ss_size - sizeof(struct sigframe)); 460 psp->ps_sigstk.ss_flags |= SS_ONSTACK; 461 } else { 462 fp = (struct sigframe *)regs->tf_esp - 1; 463 } 464 465 /* 466 * grow() will return FALSE if the fp will not fit inside the stack 467 * and the stack can not be grown. useracc will return FALSE 468 * if access is denied. 469 */ 470 if ((grow(p, (int)fp) == FALSE) || 471 (useracc((caddr_t)fp, sizeof(struct sigframe), B_WRITE) == FALSE)) { 472 /* 473 * Process has trashed its stack; give it an illegal 474 * instruction to halt it in its tracks. 475 */ 476 SIGACTION(p, SIGILL) = SIG_DFL; 477 sig = sigmask(SIGILL); 478 p->p_sigignore &= ~sig; 479 p->p_sigcatch &= ~sig; 480 p->p_sigmask &= ~sig; 481 psignal(p, SIGILL); 482 return; 483 } 484 485 /* 486 * Build the argument list for the signal handler. 487 */ 488 if (p->p_sysent->sv_sigtbl) { 489 if (sig < p->p_sysent->sv_sigsize) 490 sig = p->p_sysent->sv_sigtbl[sig]; 491 else 492 sig = p->p_sysent->sv_sigsize + 1; 493 } 494 sf.sf_signum = sig; 495 sf.sf_code = code; 496 sf.sf_scp = &fp->sf_sc; 497 sf.sf_addr = (char *) regs->tf_err; 498 sf.sf_handler = catcher; 499 500 /* save scratch registers */ 501 sf.sf_sc.sc_eax = regs->tf_eax; 502 sf.sf_sc.sc_ebx = regs->tf_ebx; 503 sf.sf_sc.sc_ecx = regs->tf_ecx; 504 sf.sf_sc.sc_edx = regs->tf_edx; 505 sf.sf_sc.sc_esi = regs->tf_esi; 506 sf.sf_sc.sc_edi = regs->tf_edi; 507 sf.sf_sc.sc_cs = regs->tf_cs; 508 sf.sf_sc.sc_ds = regs->tf_ds; 509 sf.sf_sc.sc_ss = regs->tf_ss; 510 sf.sf_sc.sc_es = regs->tf_es; 511 sf.sf_sc.sc_isp = regs->tf_isp; 512 513 /* 514 * Build the signal context to be used by sigreturn. 515 */ 516 sf.sf_sc.sc_onstack = oonstack; 517 sf.sf_sc.sc_mask = mask; 518 sf.sf_sc.sc_sp = regs->tf_esp; 519 sf.sf_sc.sc_fp = regs->tf_ebp; 520 sf.sf_sc.sc_pc = regs->tf_eip; 521 sf.sf_sc.sc_ps = regs->tf_eflags; 522 sf.sf_sc.sc_trapno = regs->tf_trapno; 523 sf.sf_sc.sc_err = regs->tf_err; 524 525 /* 526 * If we're a vm86 process, we want to save the segment registers. 527 * We also change eflags to be our emulated eflags, not the actual 528 * eflags. 529 */ 530 if (regs->tf_eflags & PSL_VM) { 531 struct trapframe_vm86 *tf = (struct trapframe_vm86 *)regs; 532 struct vm86_kernel *vm86 = &p->p_addr->u_pcb.pcb_ext->ext_vm86; 533 534 sf.sf_sc.sc_gs = tf->tf_vm86_gs; 535 sf.sf_sc.sc_fs = tf->tf_vm86_fs; 536 sf.sf_sc.sc_es = tf->tf_vm86_es; 537 sf.sf_sc.sc_ds = tf->tf_vm86_ds; 538 539 if (vm86->vm86_has_vme == 0) 540 sf.sf_sc.sc_ps = (tf->tf_eflags & ~(PSL_VIF | PSL_VIP)) 541 | (vm86->vm86_eflags & (PSL_VIF | PSL_VIP)); 542 543 /* 544 * We should never have PSL_T set when returning from vm86 545 * mode. It may be set here if we deliver a signal before 546 * getting to vm86 mode, so turn it off. 547 */ 548 tf->tf_eflags &= ~(PSL_VM | PSL_T | PSL_VIF | PSL_VIP); 549 } 550 551 /* 552 * Copy the sigframe out to the user's stack. 553 */ 554 if (copyout(&sf, fp, sizeof(struct sigframe)) != 0) { 555 /* 556 * Something is wrong with the stack pointer. 557 * ...Kill the process. 558 */ 559 sigexit(p, SIGILL); 560 } 561 562 regs->tf_esp = (int)fp; 563 regs->tf_eip = (int)(((char *)PS_STRINGS) - *(p->p_sysent->sv_szsigcode)); 564 regs->tf_cs = _ucodesel; 565 regs->tf_ds = _udatasel; 566 regs->tf_es = _udatasel; 567 regs->tf_ss = _udatasel; 568} 569 570/* 571 * System call to cleanup state after a signal 572 * has been taken. Reset signal mask and 573 * stack state from context left by sendsig (above). 574 * Return to previous pc and psl as specified by 575 * context left by sendsig. Check carefully to 576 * make sure that the user has not modified the 577 * state to gain improper privileges. 578 */ 579int 580sigreturn(p, uap, retval) 581 struct proc *p; 582 struct sigreturn_args /* { 583 struct sigcontext *sigcntxp; 584 } */ *uap; 585 int *retval; 586{ 587 register struct sigcontext *scp; 588 register struct sigframe *fp; 589 register struct trapframe *regs = p->p_md.md_regs; 590 int eflags; 591 592 /* 593 * (XXX old comment) regs->tf_esp points to the return address. 594 * The user scp pointer is above that. 595 * The return address is faked in the signal trampoline code 596 * for consistency. 597 */ 598 scp = uap->sigcntxp; 599 fp = (struct sigframe *) 600 ((caddr_t)scp - offsetof(struct sigframe, sf_sc)); 601 602 if (useracc((caddr_t)fp, sizeof (*fp), B_WRITE) == 0) 603 return(EFAULT); 604 605 eflags = scp->sc_ps; 606 if (eflags & PSL_VM) { 607 struct trapframe_vm86 *tf = (struct trapframe_vm86 *)regs; 608 struct vm86_kernel *vm86; 609 610 /* 611 * if pcb_ext == 0 or vm86_inited == 0, the user hasn't 612 * set up the vm86 area, and we can't enter vm86 mode. 613 */ 614 if (p->p_addr->u_pcb.pcb_ext == 0) 615 return (EINVAL); 616 vm86 = &p->p_addr->u_pcb.pcb_ext->ext_vm86; 617 if (vm86->vm86_inited == 0) 618 return (EINVAL); 619 620 /* go back to user mode if both flags are set */ 621 if ((eflags & PSL_VIP) && (eflags & PSL_VIF)) 622 trapsignal(p, SIGBUS, 0); 623 624#define VM_USERCHANGE (PSL_USERCHANGE | PSL_RF) 625#define VME_USERCHANGE (VM_USERCHANGE | PSL_VIP | PSL_VIF) 626 if (vm86->vm86_has_vme) { 627 eflags = (tf->tf_eflags & ~VME_USERCHANGE) | 628 (eflags & VME_USERCHANGE) | PSL_VM; 629 } else { 630 vm86->vm86_eflags = eflags; /* save VIF, VIP */ 631 eflags = (tf->tf_eflags & ~VM_USERCHANGE) | (eflags & VM_USERCHANGE) | PSL_VM; 632 } 633 tf->tf_vm86_ds = scp->sc_ds; 634 tf->tf_vm86_es = scp->sc_es; 635 tf->tf_vm86_fs = scp->sc_fs; 636 tf->tf_vm86_gs = scp->sc_gs; 637 tf->tf_ds = _udatasel; 638 tf->tf_es = _udatasel; 639 } else { 640 /* 641 * Don't allow users to change privileged or reserved flags. 642 */ 643#define EFLAGS_SECURE(ef, oef) ((((ef) ^ (oef)) & ~PSL_USERCHANGE) == 0) 644 /* 645 * XXX do allow users to change the privileged flag PSL_RF. 646 * The cpu sets PSL_RF in tf_eflags for faults. Debuggers 647 * should sometimes set it there too. tf_eflags is kept in 648 * the signal context during signal handling and there is no 649 * other place to remember it, so the PSL_RF bit may be 650 * corrupted by the signal handler without us knowing. 651 * Corruption of the PSL_RF bit at worst causes one more or 652 * one less debugger trap, so allowing it is fairly harmless. 653 */ 654 if (!EFLAGS_SECURE(eflags & ~PSL_RF, regs->tf_eflags & ~PSL_RF)) { 655#ifdef DEBUG 656 printf("sigreturn: eflags = 0x%x\n", eflags); 657#endif 658 return(EINVAL); 659 } 660 661 /* 662 * Don't allow users to load a valid privileged %cs. Let the 663 * hardware check for invalid selectors, excess privilege in 664 * other selectors, invalid %eip's and invalid %esp's. 665 */ 666#define CS_SECURE(cs) (ISPL(cs) == SEL_UPL) 667 if (!CS_SECURE(scp->sc_cs)) { 668#ifdef DEBUG 669 printf("sigreturn: cs = 0x%x\n", scp->sc_cs); 670#endif 671 trapsignal(p, SIGBUS, T_PROTFLT); 672 return(EINVAL); 673 } 674 regs->tf_ds = scp->sc_ds; 675 regs->tf_es = scp->sc_es; 676 } 677 /* restore scratch registers */ 678 regs->tf_eax = scp->sc_eax; 679 regs->tf_ebx = scp->sc_ebx; 680 regs->tf_ecx = scp->sc_ecx; 681 regs->tf_edx = scp->sc_edx; 682 regs->tf_esi = scp->sc_esi; 683 regs->tf_edi = scp->sc_edi; 684 regs->tf_cs = scp->sc_cs; 685 regs->tf_ss = scp->sc_ss; 686 regs->tf_isp = scp->sc_isp; 687 688 if (useracc((caddr_t)scp, sizeof (*scp), B_WRITE) == 0) 689 return(EINVAL); 690 691 if (scp->sc_onstack & 01) 692 p->p_sigacts->ps_sigstk.ss_flags |= SS_ONSTACK; 693 else 694 p->p_sigacts->ps_sigstk.ss_flags &= ~SS_ONSTACK; 695 p->p_sigmask = scp->sc_mask & ~sigcantmask; 696 regs->tf_ebp = scp->sc_fp; 697 regs->tf_esp = scp->sc_sp; 698 regs->tf_eip = scp->sc_pc; 699 regs->tf_eflags = eflags; 700 return(EJUSTRETURN); 701} 702 703/* 704 * Machine dependent boot() routine 705 * 706 * I haven't seen anything to put here yet 707 * Possibly some stuff might be grafted back here from boot() 708 */ 709void 710cpu_boot(int howto) 711{ 712} 713 714/* 715 * Shutdown the CPU as much as possible 716 */ 717void 718cpu_halt(void) 719{ 720 for (;;) 721 __asm__ ("hlt"); 722} 723 724/* 725 * Turn the power off. 726 */ 727void 728cpu_power_down(void) 729{ 730#if NAPM > 0 731 apm_power_off(); 732#endif 733} 734 735/* 736 * Clear registers on exec 737 */ 738void 739setregs(p, entry, stack) 740 struct proc *p; 741 u_long entry; 742 u_long stack; 743{ 744 struct trapframe *regs = p->p_md.md_regs; 745 746#ifdef USER_LDT 747 struct pcb *pcb = &p->p_addr->u_pcb; 748 749 /* was i386_user_cleanup() in NetBSD */ 750 if (pcb->pcb_ldt) { 751 if (pcb == curpcb) 752 lldt(GSEL(GUSERLDT_SEL, SEL_KPL)); 753 kmem_free(kernel_map, (vm_offset_t)pcb->pcb_ldt, 754 pcb->pcb_ldt_len * sizeof(union descriptor)); 755 pcb->pcb_ldt_len = (int)pcb->pcb_ldt = 0; 756 } 757#endif 758 759 bzero((char *)regs, sizeof(struct trapframe)); 760 regs->tf_eip = entry; 761 regs->tf_esp = stack; 762 regs->tf_eflags = PSL_USER | (regs->tf_eflags & PSL_T); 763 regs->tf_ss = _udatasel; 764 regs->tf_ds = _udatasel; 765 regs->tf_es = _udatasel; 766 regs->tf_cs = _ucodesel; 767 768 /* 769 * Initialize the math emulator (if any) for the current process. 770 * Actually, just clear the bit that says that the emulator has 771 * been initialized. Initialization is delayed until the process 772 * traps to the emulator (if it is done at all) mainly because 773 * emulators don't provide an entry point for initialization. 774 */ 775 p->p_addr->u_pcb.pcb_flags &= ~FP_SOFTFP; 776 777 /* 778 * Arrange to trap the next npx or `fwait' instruction (see npx.c 779 * for why fwait must be trapped at least if there is an npx or an 780 * emulator). This is mainly to handle the case where npx0 is not 781 * configured, since the npx routines normally set up the trap 782 * otherwise. It should be done only at boot time, but doing it 783 * here allows modifying `npx_exists' for testing the emulator on 784 * systems with an npx. 785 */ 786 load_cr0(rcr0() | CR0_MP | CR0_TS); 787 788#if NNPX > 0 789 /* Initialize the npx (if any) for the current process. */ 790 npxinit(__INITIAL_NPXCW__); 791#endif 792} 793 794static int 795sysctl_machdep_adjkerntz SYSCTL_HANDLER_ARGS 796{ 797 int error; 798 error = sysctl_handle_int(oidp, oidp->oid_arg1, oidp->oid_arg2, 799 req); 800 if (!error && req->newptr) 801 resettodr(); 802 return (error); 803} 804 805SYSCTL_PROC(_machdep, CPU_ADJKERNTZ, adjkerntz, CTLTYPE_INT|CTLFLAG_RW, 806 &adjkerntz, 0, sysctl_machdep_adjkerntz, "I", ""); 807 808SYSCTL_INT(_machdep, CPU_DISRTCSET, disable_rtc_set, 809 CTLFLAG_RW, &disable_rtc_set, 0, ""); 810 811SYSCTL_STRUCT(_machdep, CPU_BOOTINFO, bootinfo, 812 CTLFLAG_RD, &bootinfo, bootinfo, ""); 813 814SYSCTL_INT(_machdep, CPU_WALLCLOCK, wall_cmos_clock, 815 CTLFLAG_RW, &wall_cmos_clock, 0, ""); 816 817/* 818 * Initialize 386 and configure to run kernel 819 */ 820 821/* 822 * Initialize segments & interrupt table 823 */ 824 825int currentldt; 826int _default_ldt; 827#ifdef SMP 828union descriptor gdt[NGDT + NCPU]; /* global descriptor table */ 829#else 830union descriptor gdt[NGDT]; /* global descriptor table */ 831#endif 832struct gate_descriptor idt[NIDT]; /* interrupt descriptor table */ 833union descriptor ldt[NLDT]; /* local descriptor table */ 834#ifdef SMP 835/* table descriptors - used to load tables by microp */ 836struct region_descriptor r_gdt, r_idt; 837#endif 838 839#ifdef SMP 840extern struct i386tss common_tss; /* One tss per cpu */ 841#ifdef VM86 842extern struct segment_descriptor common_tssd; 843#endif /* VM86 */ 844#else 845struct i386tss common_tss; 846#ifdef VM86 847struct segment_descriptor common_tssd; 848u_int private_tss = 0; /* flag indicating private tss */ 849#endif /* VM86 */ 850#endif 851 852static struct i386tss dblfault_tss; 853static char dblfault_stack[PAGE_SIZE]; 854 855extern struct user *proc0paddr; 856 857 858/* software prototypes -- in more palatable form */ 859struct soft_segment_descriptor gdt_segs[ 860#ifdef SMP 861 NGDT + NCPU 862#endif 863 ] = { 864/* GNULL_SEL 0 Null Descriptor */ 865{ 0x0, /* segment base address */ 866 0x0, /* length */ 867 0, /* segment type */ 868 0, /* segment descriptor priority level */ 869 0, /* segment descriptor present */ 870 0, 0, 871 0, /* default 32 vs 16 bit size */ 872 0 /* limit granularity (byte/page units)*/ }, 873/* GCODE_SEL 1 Code Descriptor for kernel */ 874{ 0x0, /* segment base address */ 875 0xfffff, /* length - all address space */ 876 SDT_MEMERA, /* segment type */ 877 0, /* segment descriptor priority level */ 878 1, /* segment descriptor present */ 879 0, 0, 880 1, /* default 32 vs 16 bit size */ 881 1 /* limit granularity (byte/page units)*/ }, 882/* GDATA_SEL 2 Data Descriptor for kernel */ 883{ 0x0, /* segment base address */ 884 0xfffff, /* length - all address space */ 885 SDT_MEMRWA, /* segment type */ 886 0, /* segment descriptor priority level */ 887 1, /* segment descriptor present */ 888 0, 0, 889 1, /* default 32 vs 16 bit size */ 890 1 /* limit granularity (byte/page units)*/ }, 891/* GLDT_SEL 3 LDT Descriptor */ 892{ (int) ldt, /* segment base address */ 893 sizeof(ldt)-1, /* length - all address space */ 894 SDT_SYSLDT, /* segment type */ 895 SEL_UPL, /* segment descriptor priority level */ 896 1, /* segment descriptor present */ 897 0, 0, 898 0, /* unused - default 32 vs 16 bit size */ 899 0 /* limit granularity (byte/page units)*/ }, 900/* GTGATE_SEL 4 Null Descriptor - Placeholder */ 901{ 0x0, /* segment base address */ 902 0x0, /* length - all address space */ 903 0, /* segment type */ 904 0, /* segment descriptor priority level */ 905 0, /* segment descriptor present */ 906 0, 0, 907 0, /* default 32 vs 16 bit size */ 908 0 /* limit granularity (byte/page units)*/ }, 909/* GPANIC_SEL 5 Panic Tss Descriptor */ 910{ (int) &dblfault_tss, /* segment base address */ 911 sizeof(struct i386tss)-1,/* length - all address space */ 912 SDT_SYS386TSS, /* segment type */ 913 0, /* segment descriptor priority level */ 914 1, /* segment descriptor present */ 915 0, 0, 916 0, /* unused - default 32 vs 16 bit size */ 917 0 /* limit granularity (byte/page units)*/ }, 918/* GPROC0_SEL 6 Proc 0 Tss Descriptor */ 919{ 920 (int) &common_tss, /* segment base address */ 921 sizeof(struct i386tss)-1,/* length - all address space */ 922 SDT_SYS386TSS, /* segment type */ 923 0, /* segment descriptor priority level */ 924 1, /* segment descriptor present */ 925 0, 0, 926 0, /* unused - default 32 vs 16 bit size */ 927 0 /* limit granularity (byte/page units)*/ }, 928/* GUSERLDT_SEL 7 User LDT Descriptor per process */ 929{ (int) ldt, /* segment base address */ 930 (512 * sizeof(union descriptor)-1), /* length */ 931 SDT_SYSLDT, /* segment type */ 932 0, /* segment descriptor priority level */ 933 1, /* segment descriptor present */ 934 0, 0, 935 0, /* unused - default 32 vs 16 bit size */ 936 0 /* limit granularity (byte/page units)*/ }, 937/* GAPMCODE32_SEL 8 APM BIOS 32-bit interface (32bit Code) */ 938{ 0, /* segment base address (overwritten by APM) */ 939 0xfffff, /* length */ 940 SDT_MEMERA, /* segment type */ 941 0, /* segment descriptor priority level */ 942 1, /* segment descriptor present */ 943 0, 0, 944 1, /* default 32 vs 16 bit size */ 945 1 /* limit granularity (byte/page units)*/ }, 946/* GAPMCODE16_SEL 9 APM BIOS 32-bit interface (16bit Code) */ 947{ 0, /* segment base address (overwritten by APM) */ 948 0xfffff, /* length */ 949 SDT_MEMERA, /* segment type */ 950 0, /* segment descriptor priority level */ 951 1, /* segment descriptor present */ 952 0, 0, 953 0, /* default 32 vs 16 bit size */ 954 1 /* limit granularity (byte/page units)*/ }, 955/* GAPMDATA_SEL 10 APM BIOS 32-bit interface (Data) */ 956{ 0, /* segment base address (overwritten by APM) */ 957 0xfffff, /* length */ 958 SDT_MEMRWA, /* segment type */ 959 0, /* segment descriptor priority level */ 960 1, /* segment descriptor present */ 961 0, 0, 962 1, /* default 32 vs 16 bit size */ 963 1 /* limit granularity (byte/page units)*/ }, 964}; 965 966static struct soft_segment_descriptor ldt_segs[] = { 967 /* Null Descriptor - overwritten by call gate */ 968{ 0x0, /* segment base address */ 969 0x0, /* length - all address space */ 970 0, /* segment type */ 971 0, /* segment descriptor priority level */ 972 0, /* segment descriptor present */ 973 0, 0, 974 0, /* default 32 vs 16 bit size */ 975 0 /* limit granularity (byte/page units)*/ }, 976 /* Null Descriptor - overwritten by call gate */ 977{ 0x0, /* segment base address */ 978 0x0, /* length - all address space */ 979 0, /* segment type */ 980 0, /* segment descriptor priority level */ 981 0, /* segment descriptor present */ 982 0, 0, 983 0, /* default 32 vs 16 bit size */ 984 0 /* limit granularity (byte/page units)*/ }, 985 /* Null Descriptor - overwritten by call gate */ 986{ 0x0, /* segment base address */ 987 0x0, /* length - all address space */ 988 0, /* segment type */ 989 0, /* segment descriptor priority level */ 990 0, /* segment descriptor present */ 991 0, 0, 992 0, /* default 32 vs 16 bit size */ 993 0 /* limit granularity (byte/page units)*/ }, 994 /* Code Descriptor for user */ 995{ 0x0, /* segment base address */ 996 0xfffff, /* length - all address space */ 997 SDT_MEMERA, /* segment type */ 998 SEL_UPL, /* segment descriptor priority level */ 999 1, /* segment descriptor present */ 1000 0, 0, 1001 1, /* default 32 vs 16 bit size */ 1002 1 /* limit granularity (byte/page units)*/ }, 1003 /* Data Descriptor for user */ 1004{ 0x0, /* segment base address */ 1005 0xfffff, /* length - all address space */ 1006 SDT_MEMRWA, /* segment type */ 1007 SEL_UPL, /* segment descriptor priority level */ 1008 1, /* segment descriptor present */ 1009 0, 0, 1010 1, /* default 32 vs 16 bit size */ 1011 1 /* limit granularity (byte/page units)*/ }, 1012}; 1013 1014void 1015setidt(idx, func, typ, dpl, selec) 1016 int idx; 1017 inthand_t *func; 1018 int typ; 1019 int dpl; 1020 int selec; 1021{ 1022 struct gate_descriptor *ip = idt + idx; 1023 1024 ip->gd_looffset = (int)func; 1025 ip->gd_selector = selec; 1026 ip->gd_stkcpy = 0; 1027 ip->gd_xx = 0; 1028 ip->gd_type = typ; 1029 ip->gd_dpl = dpl; 1030 ip->gd_p = 1; 1031 ip->gd_hioffset = ((int)func)>>16 ; 1032} 1033 1034#define IDTVEC(name) __CONCAT(X,name) 1035 1036extern inthand_t 1037 IDTVEC(div), IDTVEC(dbg), IDTVEC(nmi), IDTVEC(bpt), IDTVEC(ofl), 1038 IDTVEC(bnd), IDTVEC(ill), IDTVEC(dna), IDTVEC(fpusegm), 1039 IDTVEC(tss), IDTVEC(missing), IDTVEC(stk), IDTVEC(prot), 1040 IDTVEC(page), IDTVEC(mchk), IDTVEC(rsvd), IDTVEC(fpu), IDTVEC(align), 1041 IDTVEC(syscall), IDTVEC(int0x80_syscall); 1042 1043void 1044sdtossd(sd, ssd) 1045 struct segment_descriptor *sd; 1046 struct soft_segment_descriptor *ssd; 1047{ 1048 ssd->ssd_base = (sd->sd_hibase << 24) | sd->sd_lobase; 1049 ssd->ssd_limit = (sd->sd_hilimit << 16) | sd->sd_lolimit; 1050 ssd->ssd_type = sd->sd_type; 1051 ssd->ssd_dpl = sd->sd_dpl; 1052 ssd->ssd_p = sd->sd_p; 1053 ssd->ssd_def32 = sd->sd_def32; 1054 ssd->ssd_gran = sd->sd_gran; 1055} 1056 1057void 1058init386(first) 1059 int first; 1060{ 1061 int x; 1062 unsigned biosbasemem, biosextmem; 1063 struct gate_descriptor *gdp; 1064 int gsel_tss; 1065 1066 struct isa_device *idp; 1067#ifndef SMP 1068 /* table descriptors - used to load tables by microp */ 1069 struct region_descriptor r_gdt, r_idt; 1070#endif 1071 int pagesinbase, pagesinext; 1072 int target_page, pa_indx; 1073 int off; 1074 int speculative_mtest; 1075 1076 proc0.p_addr = proc0paddr; 1077 1078 atdevbase = ISA_HOLE_START + KERNBASE; 1079 1080 /* 1081 * Initialize the console before we print anything out. 1082 */ 1083 cninit(); 1084 1085 /* 1086 * make gdt memory segments, the code segment goes up to end of the 1087 * page with etext in it, the data segment goes to the end of 1088 * the address space 1089 */ 1090 /* 1091 * XXX text protection is temporarily (?) disabled. The limit was 1092 * i386_btop(round_page(etext)) - 1. 1093 */ 1094 gdt_segs[GCODE_SEL].ssd_limit = i386_btop(0) - 1; 1095 gdt_segs[GDATA_SEL].ssd_limit = i386_btop(0) - 1; 1096#ifdef BDE_DEBUGGER 1097#define NGDT1 8 /* avoid overwriting db entries with APM ones */ 1098#else 1099#define NGDT1 (sizeof gdt_segs / sizeof gdt_segs[0]) 1100#endif 1101 for (x = 0; x < NGDT1; x++) 1102 ssdtosd(&gdt_segs[x], &gdt[x].sd); 1103#ifdef VM86 1104 common_tssd = gdt[GPROC0_SEL].sd; 1105#endif /* VM86 */ 1106 1107#ifdef SMP 1108 /* 1109 * Spin these up now. init_secondary() grabs them. We could use 1110 * #for(x,y,z) / #endfor cpp directives if they existed. 1111 */ 1112 for (x = 0; x < NCPU; x++) { 1113 gdt_segs[NGDT + x] = gdt_segs[GPROC0_SEL]; 1114 ssdtosd(&gdt_segs[NGDT + x], &gdt[NGDT + x].sd); 1115 } 1116#endif 1117 1118 /* make ldt memory segments */ 1119 /* 1120 * The data segment limit must not cover the user area because we 1121 * don't want the user area to be writable in copyout() etc. (page 1122 * level protection is lost in kernel mode on 386's). Also, we 1123 * don't want the user area to be writable directly (page level 1124 * protection of the user area is not available on 486's with 1125 * CR0_WP set, because there is no user-read/kernel-write mode). 1126 * 1127 * XXX - VM_MAXUSER_ADDRESS is an end address, not a max. And it 1128 * should be spelled ...MAX_USER... 1129 */ 1130#define VM_END_USER_RW_ADDRESS VM_MAXUSER_ADDRESS 1131 /* 1132 * The code segment limit has to cover the user area until we move 1133 * the signal trampoline out of the user area. This is safe because 1134 * the code segment cannot be written to directly. 1135 */ 1136#define VM_END_USER_R_ADDRESS (VM_END_USER_RW_ADDRESS + UPAGES * PAGE_SIZE) 1137 ldt_segs[LUCODE_SEL].ssd_limit = i386_btop(VM_END_USER_R_ADDRESS) - 1; 1138 ldt_segs[LUDATA_SEL].ssd_limit = i386_btop(VM_END_USER_RW_ADDRESS) - 1; 1139 for (x = 0; x < sizeof ldt_segs / sizeof ldt_segs[0]; x++) 1140 ssdtosd(&ldt_segs[x], &ldt[x].sd); 1141 1142 /* exceptions */ 1143 for (x = 0; x < NIDT; x++) 1144 setidt(x, &IDTVEC(rsvd), SDT_SYS386TGT, SEL_KPL, GSEL(GCODE_SEL, SEL_KPL)); 1145 setidt(0, &IDTVEC(div), SDT_SYS386TGT, SEL_KPL, GSEL(GCODE_SEL, SEL_KPL)); 1146 setidt(1, &IDTVEC(dbg), SDT_SYS386TGT, SEL_KPL, GSEL(GCODE_SEL, SEL_KPL)); 1147 setidt(2, &IDTVEC(nmi), SDT_SYS386TGT, SEL_KPL, GSEL(GCODE_SEL, SEL_KPL)); 1148 setidt(3, &IDTVEC(bpt), SDT_SYS386TGT, SEL_UPL, GSEL(GCODE_SEL, SEL_KPL)); 1149 setidt(4, &IDTVEC(ofl), SDT_SYS386TGT, SEL_UPL, GSEL(GCODE_SEL, SEL_KPL)); 1150 setidt(5, &IDTVEC(bnd), SDT_SYS386TGT, SEL_KPL, GSEL(GCODE_SEL, SEL_KPL)); 1151 setidt(6, &IDTVEC(ill), SDT_SYS386TGT, SEL_KPL, GSEL(GCODE_SEL, SEL_KPL)); 1152 setidt(7, &IDTVEC(dna), SDT_SYS386TGT, SEL_KPL, GSEL(GCODE_SEL, SEL_KPL)); 1153 setidt(8, 0, SDT_SYSTASKGT, SEL_KPL, GSEL(GPANIC_SEL, SEL_KPL)); 1154 setidt(9, &IDTVEC(fpusegm), SDT_SYS386TGT, SEL_KPL, GSEL(GCODE_SEL, SEL_KPL)); 1155 setidt(10, &IDTVEC(tss), SDT_SYS386TGT, SEL_KPL, GSEL(GCODE_SEL, SEL_KPL)); 1156 setidt(11, &IDTVEC(missing), SDT_SYS386TGT, SEL_KPL, GSEL(GCODE_SEL, SEL_KPL)); 1157 setidt(12, &IDTVEC(stk), SDT_SYS386TGT, SEL_KPL, GSEL(GCODE_SEL, SEL_KPL)); 1158 setidt(13, &IDTVEC(prot), SDT_SYS386TGT, SEL_KPL, GSEL(GCODE_SEL, SEL_KPL)); 1159 setidt(14, &IDTVEC(page), SDT_SYS386TGT, SEL_KPL, GSEL(GCODE_SEL, SEL_KPL)); 1160 setidt(15, &IDTVEC(rsvd), SDT_SYS386TGT, SEL_KPL, GSEL(GCODE_SEL, SEL_KPL)); 1161 setidt(16, &IDTVEC(fpu), SDT_SYS386TGT, SEL_KPL, GSEL(GCODE_SEL, SEL_KPL)); 1162 setidt(17, &IDTVEC(align), SDT_SYS386TGT, SEL_KPL, GSEL(GCODE_SEL, SEL_KPL)); 1163 setidt(18, &IDTVEC(mchk), SDT_SYS386TGT, SEL_KPL, GSEL(GCODE_SEL, SEL_KPL)); 1164 setidt(0x80, &IDTVEC(int0x80_syscall), 1165 SDT_SYS386TGT, SEL_UPL, GSEL(GCODE_SEL, SEL_KPL)); 1166 1167#include "isa.h" 1168#if NISA >0 1169 isa_defaultirq(); 1170#endif 1171 rand_initialize(); 1172 1173 r_gdt.rd_limit = sizeof(gdt) - 1; 1174 r_gdt.rd_base = (int) gdt; 1175 lgdt(&r_gdt); 1176 1177 r_idt.rd_limit = sizeof(idt) - 1; 1178 r_idt.rd_base = (int) idt; 1179 lidt(&r_idt); 1180 1181 _default_ldt = GSEL(GLDT_SEL, SEL_KPL); 1182 lldt(_default_ldt); 1183 currentldt = _default_ldt; 1184 1185#ifdef DDB 1186 kdb_init(); 1187 if (boothowto & RB_KDB) 1188 Debugger("Boot flags requested debugger"); 1189#endif 1190 1191 finishidentcpu(); /* Final stage of CPU initialization */ 1192 setidt(6, &IDTVEC(ill), SDT_SYS386TGT, SEL_KPL, GSEL(GCODE_SEL, SEL_KPL)); 1193 initializecpu(); /* Initialize CPU registers */ 1194 1195 /* Use BIOS values stored in RTC CMOS RAM, since probing 1196 * breaks certain 386 AT relics. 1197 */ 1198 biosbasemem = rtcin(RTC_BASELO)+ (rtcin(RTC_BASEHI)<<8); 1199 biosextmem = rtcin(RTC_EXTLO)+ (rtcin(RTC_EXTHI)<<8); 1200 1201 /* 1202 * If BIOS tells us that it has more than 640k in the basemem, 1203 * don't believe it - set it to 640k. 1204 */ 1205 if (biosbasemem > 640) { 1206 printf("Preposterous RTC basemem of %dK, truncating to 640K\n", 1207 biosbasemem); 1208 biosbasemem = 640; 1209 } 1210 if (bootinfo.bi_memsizes_valid && bootinfo.bi_basemem > 640) { 1211 printf("Preposterous BIOS basemem of %dK, truncating to 640K\n", 1212 bootinfo.bi_basemem); 1213 bootinfo.bi_basemem = 640; 1214 } 1215 1216 /* 1217 * Warn if the official BIOS interface disagrees with the RTC 1218 * interface used above about the amount of base memory or the 1219 * amount of extended memory. Prefer the BIOS value for the base 1220 * memory. This is necessary for machines that `steal' base 1221 * memory for use as BIOS memory, at least if we are going to use 1222 * the BIOS for apm. Prefer the RTC value for extended memory. 1223 * Eventually the hackish interface shouldn't even be looked at. 1224 */ 1225 if (bootinfo.bi_memsizes_valid) { 1226 if (bootinfo.bi_basemem != biosbasemem) { 1227 vm_offset_t pa; 1228 1229 printf( 1230 "BIOS basemem (%ldK) != RTC basemem (%dK), setting to BIOS value\n", 1231 bootinfo.bi_basemem, biosbasemem); 1232 biosbasemem = bootinfo.bi_basemem; 1233 1234 /* 1235 * XXX if biosbasemem is now < 640, there is `hole' 1236 * between the end of base memory and the start of 1237 * ISA memory. The hole may be empty or it may 1238 * contain BIOS code or data. Map it read/write so 1239 * that the BIOS can write to it. (Memory from 0 to 1240 * the physical end of the kernel is mapped read-only 1241 * to begin with and then parts of it are remapped. 1242 * The parts that aren't remapped form holes that 1243 * remain read-only and are unused by the kernel. 1244 * The base memory area is below the physical end of 1245 * the kernel and right now forms a read-only hole. 1246 * The part of it from 0 to 1247 * (trunc_page(biosbasemem * 1024) - 1) will be 1248 * remapped and used by the kernel later.) 1249 * 1250 * This code is similar to the code used in 1251 * pmap_mapdev, but since no memory needs to be 1252 * allocated we simply change the mapping. 1253 */ 1254 for (pa = trunc_page(biosbasemem * 1024); 1255 pa < ISA_HOLE_START; pa += PAGE_SIZE) { 1256 unsigned *pte; 1257 1258 pte = (unsigned *)vtopte(pa + KERNBASE); 1259 *pte = pa | PG_RW | PG_V; 1260 } 1261 } 1262 if (bootinfo.bi_extmem != biosextmem) 1263 printf("BIOS extmem (%ldK) != RTC extmem (%dK)\n", 1264 bootinfo.bi_extmem, biosextmem); 1265 } 1266 1267#ifdef SMP 1268 /* make hole for AP bootstrap code */ 1269 pagesinbase = mp_bootaddress(biosbasemem) / PAGE_SIZE; 1270#else 1271 pagesinbase = biosbasemem * 1024 / PAGE_SIZE; 1272#endif 1273 1274 pagesinext = biosextmem * 1024 / PAGE_SIZE; 1275 1276 /* 1277 * Special hack for chipsets that still remap the 384k hole when 1278 * there's 16MB of memory - this really confuses people that 1279 * are trying to use bus mastering ISA controllers with the 1280 * "16MB limit"; they only have 16MB, but the remapping puts 1281 * them beyond the limit. 1282 */ 1283 /* 1284 * If extended memory is between 15-16MB (16-17MB phys address range), 1285 * chop it to 15MB. 1286 */ 1287 if ((pagesinext > 3840) && (pagesinext < 4096)) 1288 pagesinext = 3840; 1289 1290 /* 1291 * Maxmem isn't the "maximum memory", it's one larger than the 1292 * highest page of the physical address space. It should be 1293 * called something like "Maxphyspage". 1294 */ 1295 Maxmem = pagesinext + 0x100000/PAGE_SIZE; 1296 /* 1297 * Indicate that we wish to do a speculative search for memory beyond 1298 * the end of the reported size if the indicated amount is 64MB (0x4000 1299 * pages) - which is the largest amount that the BIOS/bootblocks can 1300 * currently report. If a specific amount of memory is indicated via 1301 * the MAXMEM option or the npx0 "msize", then don't do the speculative 1302 * memory test. 1303 */ 1304 if (Maxmem == 0x4000) 1305 speculative_mtest = TRUE; 1306 else 1307 speculative_mtest = FALSE; 1308 1309#ifdef MAXMEM 1310 Maxmem = MAXMEM/4; 1311 speculative_mtest = FALSE; 1312#endif 1313 1314#if NNPX > 0 1315 idp = find_isadev(isa_devtab_null, &npxdriver, 0); 1316 if (idp != NULL && idp->id_msize != 0) { 1317 Maxmem = idp->id_msize / 4; 1318 speculative_mtest = FALSE; 1319 } 1320#endif 1321 1322#ifdef SMP 1323 /* look for the MP hardware - needed for apic addresses */ 1324 mp_probe(); 1325#endif 1326 1327 /* call pmap initialization to make new kernel address space */ 1328 pmap_bootstrap (first, 0); 1329 1330 /* 1331 * Size up each available chunk of physical memory. 1332 */ 1333 1334 /* 1335 * We currently don't bother testing base memory. 1336 * XXX ...but we probably should. 1337 */ 1338 pa_indx = 0; 1339 if (pagesinbase > 1) { 1340 phys_avail[pa_indx++] = PAGE_SIZE; /* skip first page of memory */ 1341 phys_avail[pa_indx] = ptoa(pagesinbase);/* memory up to the ISA hole */ 1342 physmem = pagesinbase - 1; 1343 } else { 1344 /* point at first chunk end */ 1345 pa_indx++; 1346 } 1347 1348 for (target_page = avail_start; target_page < ptoa(Maxmem); target_page += PAGE_SIZE) { 1349 int tmp, page_bad; 1350 1351 page_bad = FALSE; 1352 1353 /* 1354 * map page into kernel: valid, read/write, non-cacheable 1355 */ 1356 *(int *)CMAP1 = PG_V | PG_RW | PG_N | target_page; 1357 invltlb(); 1358 1359 tmp = *(int *)CADDR1; 1360 /* 1361 * Test for alternating 1's and 0's 1362 */ 1363 *(volatile int *)CADDR1 = 0xaaaaaaaa; 1364 if (*(volatile int *)CADDR1 != 0xaaaaaaaa) { 1365 page_bad = TRUE; 1366 } 1367 /* 1368 * Test for alternating 0's and 1's 1369 */ 1370 *(volatile int *)CADDR1 = 0x55555555; 1371 if (*(volatile int *)CADDR1 != 0x55555555) { 1372 page_bad = TRUE; 1373 } 1374 /* 1375 * Test for all 1's 1376 */ 1377 *(volatile int *)CADDR1 = 0xffffffff; 1378 if (*(volatile int *)CADDR1 != 0xffffffff) { 1379 page_bad = TRUE; 1380 } 1381 /* 1382 * Test for all 0's 1383 */ 1384 *(volatile int *)CADDR1 = 0x0; 1385 if (*(volatile int *)CADDR1 != 0x0) { 1386 /* 1387 * test of page failed 1388 */ 1389 page_bad = TRUE; 1390 } 1391 /* 1392 * Restore original value. 1393 */ 1394 *(int *)CADDR1 = tmp; 1395 1396 /* 1397 * Adjust array of valid/good pages. 1398 */ 1399 if (page_bad == FALSE) { 1400 /* 1401 * If this good page is a continuation of the 1402 * previous set of good pages, then just increase 1403 * the end pointer. Otherwise start a new chunk. 1404 * Note that "end" points one higher than end, 1405 * making the range >= start and < end. 1406 * If we're also doing a speculative memory 1407 * test and we at or past the end, bump up Maxmem 1408 * so that we keep going. The first bad page 1409 * will terminate the loop. 1410 */ 1411 if (phys_avail[pa_indx] == target_page) { 1412 phys_avail[pa_indx] += PAGE_SIZE; 1413 if (speculative_mtest == TRUE && 1414 phys_avail[pa_indx] >= (64*1024*1024)) 1415 Maxmem++; 1416 } else { 1417 pa_indx++; 1418 if (pa_indx == PHYS_AVAIL_ARRAY_END) { 1419 printf("Too many holes in the physical address space, giving up\n"); 1420 pa_indx--; 1421 break; 1422 } 1423 phys_avail[pa_indx++] = target_page; /* start */ 1424 phys_avail[pa_indx] = target_page + PAGE_SIZE; /* end */ 1425 } 1426 physmem++; 1427 } 1428 } 1429 1430 *(int *)CMAP1 = 0; 1431 invltlb(); 1432 1433 /* 1434 * XXX 1435 * The last chunk must contain at least one page plus the message 1436 * buffer to avoid complicating other code (message buffer address 1437 * calculation, etc.). 1438 */ 1439 while (phys_avail[pa_indx - 1] + PAGE_SIZE + 1440 round_page(sizeof(struct msgbuf)) >= phys_avail[pa_indx]) { 1441 physmem -= atop(phys_avail[pa_indx] - phys_avail[pa_indx - 1]); 1442 phys_avail[pa_indx--] = 0; 1443 phys_avail[pa_indx--] = 0; 1444 } 1445 1446 Maxmem = atop(phys_avail[pa_indx]); 1447 1448 /* Trim off space for the message buffer. */ 1449 phys_avail[pa_indx] -= round_page(sizeof(struct msgbuf)); 1450 1451 avail_end = phys_avail[pa_indx]; 1452 1453 /* now running on new page tables, configured,and u/iom is accessible */ 1454 1455 /* Map the message buffer. */ 1456 for (off = 0; off < round_page(sizeof(struct msgbuf)); off += PAGE_SIZE) 1457 pmap_enter(kernel_pmap, (vm_offset_t)msgbufp + off, 1458 avail_end + off, VM_PROT_ALL, TRUE); 1459 msgbufmapped = 1; 1460 1461 /* make an initial tss so cpu can get interrupt stack on syscall! */ 1462#ifdef VM86 1463 common_tss.tss_esp0 = (int) proc0.p_addr + UPAGES*PAGE_SIZE - 16; 1464#else 1465 common_tss.tss_esp0 = (int) proc0.p_addr + UPAGES*PAGE_SIZE; 1466#endif /* VM86 */ 1467 common_tss.tss_ss0 = GSEL(GDATA_SEL, SEL_KPL) ; 1468 common_tss.tss_ioopt = (sizeof common_tss) << 16; 1469 gsel_tss = GSEL(GPROC0_SEL, SEL_KPL); 1470 ltr(gsel_tss); 1471 1472 dblfault_tss.tss_esp = dblfault_tss.tss_esp0 = dblfault_tss.tss_esp1 = 1473 dblfault_tss.tss_esp2 = (int) &dblfault_stack[sizeof(dblfault_stack)]; 1474 dblfault_tss.tss_ss = dblfault_tss.tss_ss0 = dblfault_tss.tss_ss1 = 1475 dblfault_tss.tss_ss2 = GSEL(GDATA_SEL, SEL_KPL); 1476 dblfault_tss.tss_cr3 = (int)IdlePTD; 1477 dblfault_tss.tss_eip = (int) dblfault_handler; 1478 dblfault_tss.tss_eflags = PSL_KERNEL; 1479 dblfault_tss.tss_ds = dblfault_tss.tss_es = dblfault_tss.tss_fs = 1480 dblfault_tss.tss_gs = GSEL(GDATA_SEL, SEL_KPL); 1481 dblfault_tss.tss_cs = GSEL(GCODE_SEL, SEL_KPL); 1482 dblfault_tss.tss_ldt = GSEL(GLDT_SEL, SEL_KPL); 1483 1484 /* make a call gate to reenter kernel with */ 1485 gdp = &ldt[LSYS5CALLS_SEL].gd; 1486 1487 x = (int) &IDTVEC(syscall); 1488 gdp->gd_looffset = x++; 1489 gdp->gd_selector = GSEL(GCODE_SEL,SEL_KPL); 1490 gdp->gd_stkcpy = 1; 1491 gdp->gd_type = SDT_SYS386CGT; 1492 gdp->gd_dpl = SEL_UPL; 1493 gdp->gd_p = 1; 1494 gdp->gd_hioffset = ((int) &IDTVEC(syscall)) >>16; 1495 1496 /* XXX does this work? */ 1497 ldt[LBSDICALLS_SEL] = ldt[LSYS5CALLS_SEL]; 1498 1499 /* transfer to user mode */ 1500 1501 _ucodesel = LSEL(LUCODE_SEL, SEL_UPL); 1502 _udatasel = LSEL(LUDATA_SEL, SEL_UPL); 1503 1504 /* setup proc 0's pcb */ 1505 proc0.p_addr->u_pcb.pcb_flags = 0; 1506 proc0.p_addr->u_pcb.pcb_cr3 = (int)IdlePTD; 1507 proc0.p_addr->u_pcb.pcb_mpnest = 1; 1508 proc0.p_addr->u_pcb.pcb_ext = 0; 1509} 1510 1511int 1512ptrace_set_pc(p, addr) 1513 struct proc *p; 1514 unsigned int addr; 1515{ 1516 p->p_md.md_regs->tf_eip = addr; 1517 return (0); 1518} 1519 1520int 1521ptrace_single_step(p) 1522 struct proc *p; 1523{ 1524 p->p_md.md_regs->tf_eflags |= PSL_T; 1525 return (0); 1526} 1527 1528int ptrace_write_u(p, off, data) 1529 struct proc *p; 1530 vm_offset_t off; 1531 int data; 1532{ 1533 struct trapframe frame_copy; 1534 vm_offset_t min; 1535 struct trapframe *tp; 1536 1537 /* 1538 * Privileged kernel state is scattered all over the user area. 1539 * Only allow write access to parts of regs and to fpregs. 1540 */ 1541 min = (char *)p->p_md.md_regs - (char *)p->p_addr; 1542 if (off >= min && off <= min + sizeof(struct trapframe) - sizeof(int)) { 1543 tp = p->p_md.md_regs; 1544 frame_copy = *tp; 1545 *(int *)((char *)&frame_copy + (off - min)) = data; 1546 if (!EFLAGS_SECURE(frame_copy.tf_eflags, tp->tf_eflags) || 1547 !CS_SECURE(frame_copy.tf_cs)) 1548 return (EINVAL); 1549 *(int*)((char *)p->p_addr + off) = data; 1550 return (0); 1551 } 1552 min = offsetof(struct user, u_pcb) + offsetof(struct pcb, pcb_savefpu); 1553 if (off >= min && off <= min + sizeof(struct save87) - sizeof(int)) { 1554 *(int*)((char *)p->p_addr + off) = data; 1555 return (0); 1556 } 1557 return (EFAULT); 1558} 1559 1560int 1561fill_regs(p, regs) 1562 struct proc *p; 1563 struct reg *regs; 1564{ 1565 struct pcb *pcb; 1566 struct trapframe *tp; 1567 1568 tp = p->p_md.md_regs; 1569 regs->r_es = tp->tf_es; 1570 regs->r_ds = tp->tf_ds; 1571 regs->r_edi = tp->tf_edi; 1572 regs->r_esi = tp->tf_esi; 1573 regs->r_ebp = tp->tf_ebp; 1574 regs->r_ebx = tp->tf_ebx; 1575 regs->r_edx = tp->tf_edx; 1576 regs->r_ecx = tp->tf_ecx; 1577 regs->r_eax = tp->tf_eax; 1578 regs->r_eip = tp->tf_eip; 1579 regs->r_cs = tp->tf_cs; 1580 regs->r_eflags = tp->tf_eflags; 1581 regs->r_esp = tp->tf_esp; 1582 regs->r_ss = tp->tf_ss; 1583 pcb = &p->p_addr->u_pcb; 1584 regs->r_fs = pcb->pcb_fs; 1585 regs->r_gs = pcb->pcb_gs; 1586 return (0); 1587} 1588 1589int 1590set_regs(p, regs) 1591 struct proc *p; 1592 struct reg *regs; 1593{ 1594 struct pcb *pcb; 1595 struct trapframe *tp; 1596 1597 tp = p->p_md.md_regs; 1598 if (!EFLAGS_SECURE(regs->r_eflags, tp->tf_eflags) || 1599 !CS_SECURE(regs->r_cs)) 1600 return (EINVAL); 1601 tp->tf_es = regs->r_es; 1602 tp->tf_ds = regs->r_ds; 1603 tp->tf_edi = regs->r_edi; 1604 tp->tf_esi = regs->r_esi; 1605 tp->tf_ebp = regs->r_ebp; 1606 tp->tf_ebx = regs->r_ebx; 1607 tp->tf_edx = regs->r_edx; 1608 tp->tf_ecx = regs->r_ecx; 1609 tp->tf_eax = regs->r_eax; 1610 tp->tf_eip = regs->r_eip; 1611 tp->tf_cs = regs->r_cs; 1612 tp->tf_eflags = regs->r_eflags; 1613 tp->tf_esp = regs->r_esp; 1614 tp->tf_ss = regs->r_ss; 1615 pcb = &p->p_addr->u_pcb; 1616 pcb->pcb_fs = regs->r_fs; 1617 pcb->pcb_gs = regs->r_gs; 1618 return (0); 1619} 1620 1621#ifndef DDB 1622void 1623Debugger(const char *msg) 1624{ 1625 printf("Debugger(\"%s\") called.\n", msg); 1626} 1627#endif /* no DDB */ 1628 1629#include <sys/disklabel.h> 1630 1631/* 1632 * Determine the size of the transfer, and make sure it is 1633 * within the boundaries of the partition. Adjust transfer 1634 * if needed, and signal errors or early completion. 1635 */ 1636int 1637bounds_check_with_label(struct buf *bp, struct disklabel *lp, int wlabel) 1638{ 1639 struct partition *p = lp->d_partitions + dkpart(bp->b_dev); 1640 int labelsect = lp->d_partitions[0].p_offset; 1641 int maxsz = p->p_size, 1642 sz = (bp->b_bcount + DEV_BSIZE - 1) >> DEV_BSHIFT; 1643 1644 /* overwriting disk label ? */ 1645 /* XXX should also protect bootstrap in first 8K */ 1646 if (bp->b_blkno + p->p_offset <= LABELSECTOR + labelsect && 1647#if LABELSECTOR != 0 1648 bp->b_blkno + p->p_offset + sz > LABELSECTOR + labelsect && 1649#endif 1650 (bp->b_flags & B_READ) == 0 && wlabel == 0) { 1651 bp->b_error = EROFS; 1652 goto bad; 1653 } 1654 1655#if defined(DOSBBSECTOR) && defined(notyet) 1656 /* overwriting master boot record? */ 1657 if (bp->b_blkno + p->p_offset <= DOSBBSECTOR && 1658 (bp->b_flags & B_READ) == 0 && wlabel == 0) { 1659 bp->b_error = EROFS; 1660 goto bad; 1661 } 1662#endif 1663 1664 /* beyond partition? */ 1665 if (bp->b_blkno < 0 || bp->b_blkno + sz > maxsz) { 1666 /* if exactly at end of disk, return an EOF */ 1667 if (bp->b_blkno == maxsz) { 1668 bp->b_resid = bp->b_bcount; 1669 return(0); 1670 } 1671 /* or truncate if part of it fits */ 1672 sz = maxsz - bp->b_blkno; 1673 if (sz <= 0) { 1674 bp->b_error = EINVAL; 1675 goto bad; 1676 } 1677 bp->b_bcount = sz << DEV_BSHIFT; 1678 } 1679 1680 bp->b_pblkno = bp->b_blkno + p->p_offset; 1681 return(1); 1682 1683bad: 1684 bp->b_flags |= B_ERROR; 1685 return(-1); 1686} 1687 1688#ifdef DDB 1689 1690/* 1691 * Provide inb() and outb() as functions. They are normally only 1692 * available as macros calling inlined functions, thus cannot be 1693 * called inside DDB. 1694 * 1695 * The actual code is stolen from <machine/cpufunc.h>, and de-inlined. 1696 */ 1697 1698#undef inb 1699#undef outb 1700 1701/* silence compiler warnings */ 1702u_char inb(u_int); 1703void outb(u_int, u_char); 1704 1705u_char 1706inb(u_int port) 1707{ 1708 u_char data; 1709 /* 1710 * We use %%dx and not %1 here because i/o is done at %dx and not at 1711 * %edx, while gcc generates inferior code (movw instead of movl) 1712 * if we tell it to load (u_short) port. 1713 */ 1714 __asm __volatile("inb %%dx,%0" : "=a" (data) : "d" (port)); 1715 return (data); 1716} 1717 1718void 1719outb(u_int port, u_char data) 1720{ 1721 u_char al; 1722 /* 1723 * Use an unnecessary assignment to help gcc's register allocator. 1724 * This make a large difference for gcc-1.40 and a tiny difference 1725 * for gcc-2.6.0. For gcc-1.40, al had to be ``asm("ax")'' for 1726 * best results. gcc-2.6.0 can't handle this. 1727 */ 1728 al = data; 1729 __asm __volatile("outb %0,%%dx" : : "a" (al), "d" (port)); 1730} 1731 1732#endif /* DDB */ 1733