vm_machdep.c revision 1312
1/*- 2 * Copyright (c) 1982, 1986 The Regents of the University of California. 3 * Copyright (c) 1989, 1990 William Jolitz 4 * All rights reserved. 5 * 6 * This code is derived from software contributed to Berkeley by 7 * the Systems Programming Group of the University of Utah Computer 8 * Science Department, and William Jolitz. 9 * 10 * Redistribution and use in source and binary forms, with or without 11 * modification, are permitted provided that the following conditions 12 * are met: 13 * 1. Redistributions of source code must retain the above copyright 14 * notice, this list of conditions and the following disclaimer. 15 * 2. Redistributions in binary form must reproduce the above copyright 16 * notice, this list of conditions and the following disclaimer in the 17 * documentation and/or other materials provided with the distribution. 18 * 3. All advertising materials mentioning features or use of this software 19 * must display the following acknowledgement: 20 * This product includes software developed by the University of 21 * California, Berkeley and its contributors. 22 * 4. Neither the name of the University nor the names of its contributors 23 * may be used to endorse or promote products derived from this software 24 * without specific prior written permission. 25 * 26 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 27 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 28 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 29 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 30 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 31 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 32 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 33 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 34 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 35 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 36 * SUCH DAMAGE. 37 * 38 * from: @(#)vm_machdep.c 7.3 (Berkeley) 5/13/91 39 * Utah $Hdr: vm_machdep.c 1.16.1.1 89/06/23$ 40 * $Id: vm_machdep.c,v 1.15 1994/03/24 23:12:35 davidg Exp $ 41 */ 42 43#include "npx.h" 44#include "param.h" 45#include "systm.h" 46#include "proc.h" 47#include "malloc.h" 48#include "buf.h" 49#include "user.h" 50 51#include "../include/cpu.h" 52 53#include "vm/vm.h" 54#include "vm/vm_kern.h" 55 56#ifndef NOBOUNCE 57 58caddr_t bouncememory; 59vm_offset_t bouncepa, bouncepaend; 60int bouncepages, bpwait; 61vm_map_t bounce_map; 62int bmwait, bmfreeing; 63 64#define BITS_IN_UNSIGNED (8*sizeof(unsigned)) 65int bounceallocarraysize; 66unsigned *bounceallocarray; 67int bouncefree; 68 69#define SIXTEENMEG (4096*4096) 70#define MAXBKVA 512 71 72/* special list that can be used at interrupt time for eventual kva free */ 73struct kvasfree { 74 vm_offset_t addr; 75 vm_offset_t size; 76} kvaf[MAXBKVA]; 77 78int kvasfreecnt; 79 80/* 81 * get bounce buffer pages (count physically contiguous) 82 * (only 1 inplemented now) 83 */ 84vm_offset_t 85vm_bounce_page_find(count) 86 int count; 87{ 88 int bit; 89 int s,i; 90 91 if (count != 1) 92 panic("vm_bounce_page_find -- no support for > 1 page yet!!!"); 93 94 s = splbio(); 95retry: 96 for (i = 0; i < bounceallocarraysize; i++) { 97 if (bounceallocarray[i] != 0xffffffff) { 98 if (bit = ffs(~bounceallocarray[i])) { 99 bounceallocarray[i] |= 1 << (bit - 1) ; 100 bouncefree -= count; 101 splx(s); 102 return bouncepa + (i * BITS_IN_UNSIGNED + (bit - 1)) * NBPG; 103 } 104 } 105 } 106 bpwait = 1; 107 tsleep((caddr_t) &bounceallocarray, PRIBIO, "bncwai", 0); 108 goto retry; 109} 110 111/* 112 * free count bounce buffer pages 113 */ 114void 115vm_bounce_page_free(pa, count) 116 vm_offset_t pa; 117 int count; 118{ 119 int allocindex; 120 int index; 121 int bit; 122 123 if (count != 1) 124 panic("vm_bounce_page_free -- no support for > 1 page yet!!!\n"); 125 126 index = (pa - bouncepa) / NBPG; 127 128 if ((index < 0) || (index >= bouncepages)) 129 panic("vm_bounce_page_free -- bad index\n"); 130 131 allocindex = index / BITS_IN_UNSIGNED; 132 bit = index % BITS_IN_UNSIGNED; 133 134 bounceallocarray[allocindex] &= ~(1 << bit); 135 136 bouncefree += count; 137 if (bpwait) { 138 bpwait = 0; 139 wakeup((caddr_t) &bounceallocarray); 140 } 141} 142 143/* 144 * allocate count bounce buffer kva pages 145 */ 146vm_offset_t 147vm_bounce_kva(count) 148 int count; 149{ 150 int tofree; 151 int i; 152 int startfree; 153 vm_offset_t kva = 0; 154 int s = splbio(); 155 int size = count*NBPG; 156 startfree = 0; 157more: 158 if (!bmfreeing && (tofree = kvasfreecnt)) { 159 bmfreeing = 1; 160more1: 161 for (i = startfree; i < kvasfreecnt; i++) { 162 /* 163 * if we have a kva of the right size, no sense 164 * in freeing/reallocating... 165 * might affect fragmentation short term, but 166 * as long as the amount of bounce_map is 167 * significantly more than the maximum transfer 168 * size, I don't think that it is a problem. 169 */ 170 pmap_remove(kernel_pmap, 171 kvaf[i].addr, kvaf[i].addr + kvaf[i].size); 172 if( !kva && kvaf[i].size == size) { 173 kva = kvaf[i].addr; 174 } else { 175 kmem_free_wakeup(bounce_map, kvaf[i].addr, 176 kvaf[i].size); 177 } 178 } 179 if (kvasfreecnt != tofree) { 180 startfree = i; 181 bmfreeing = 0; 182 goto more; 183 } 184 kvasfreecnt = 0; 185 bmfreeing = 0; 186 } 187 188 if (!kva && !(kva = kmem_alloc_pageable(bounce_map, size))) { 189 bmwait = 1; 190 tsleep((caddr_t) bounce_map, PRIBIO, "bmwait", 0); 191 goto more; 192 } 193 splx(s); 194 195 return kva; 196} 197 198/* 199 * init the bounce buffer system 200 */ 201void 202vm_bounce_init() 203{ 204 vm_offset_t minaddr, maxaddr; 205 206 if (bouncepages == 0) 207 return; 208 209 bounceallocarraysize = (bouncepages + BITS_IN_UNSIGNED - 1) / BITS_IN_UNSIGNED; 210 bounceallocarray = malloc(bounceallocarraysize * sizeof(unsigned), M_TEMP, M_NOWAIT); 211 212 if (!bounceallocarray) 213 panic("Cannot allocate bounce resource array\n"); 214 215 bzero(bounceallocarray, bounceallocarraysize * sizeof(long)); 216 217 bounce_map = kmem_suballoc(kernel_map, &minaddr, &maxaddr, MAXBKVA * NBPG, FALSE); 218 219 bouncepa = pmap_kextract((vm_offset_t) bouncememory); 220 bouncepaend = bouncepa + bouncepages * NBPG; 221 bouncefree = bouncepages; 222 kvasfreecnt = 0; 223} 224 225/* 226 * do the things necessary to the struct buf to implement 227 * bounce buffers... inserted before the disk sort 228 */ 229void 230vm_bounce_alloc(bp) 231 struct buf *bp; 232{ 233 int countvmpg; 234 vm_offset_t vastart, vaend; 235 vm_offset_t vapstart, vapend; 236 vm_offset_t va, kva; 237 vm_offset_t pa; 238 int dobounceflag = 0; 239 int bounceindex; 240 int i; 241 int s; 242 243 if (bouncepages == 0) 244 return; 245 246 vastart = (vm_offset_t) bp->b_un.b_addr; 247 vaend = (vm_offset_t) bp->b_un.b_addr + bp->b_bcount; 248 249 vapstart = i386_trunc_page(vastart); 250 vapend = i386_round_page(vaend); 251 countvmpg = (vapend - vapstart) / NBPG; 252 253/* 254 * if any page is above 16MB, then go into bounce-buffer mode 255 */ 256 va = vapstart; 257 for (i = 0; i < countvmpg; i++) { 258 pa = pmap_kextract(va); 259 if (pa >= SIXTEENMEG) 260 ++dobounceflag; 261 va += NBPG; 262 } 263 if (dobounceflag == 0) 264 return; 265 266 if (bouncepages < dobounceflag) 267 panic("Not enough bounce buffers!!!"); 268 269/* 270 * allocate a replacement kva for b_addr 271 */ 272 kva = vm_bounce_kva(countvmpg); 273 va = vapstart; 274 for (i = 0; i < countvmpg; i++) { 275 pa = pmap_kextract(va); 276 if (pa >= SIXTEENMEG) { 277 /* 278 * allocate a replacement page 279 */ 280 vm_offset_t bpa = vm_bounce_page_find(1); 281 pmap_kenter(kva + (NBPG * i), bpa); 282 /* 283 * if we are writing, the copy the data into the page 284 */ 285 if ((bp->b_flags & B_READ) == 0) 286 bcopy((caddr_t) va, (caddr_t) kva + (NBPG * i), NBPG); 287 } else { 288 /* 289 * use original page 290 */ 291 pmap_kenter(kva + (NBPG * i), pa); 292 } 293 va += NBPG; 294 } 295 pmap_update(); 296 297/* 298 * flag the buffer as being bounced 299 */ 300 bp->b_flags |= B_BOUNCE; 301/* 302 * save the original buffer kva 303 */ 304 bp->b_savekva = bp->b_un.b_addr; 305/* 306 * put our new kva into the buffer (offset by original offset) 307 */ 308 bp->b_un.b_addr = (caddr_t) (((vm_offset_t) kva) | 309 ((vm_offset_t) bp->b_savekva & (NBPG - 1))); 310 return; 311} 312 313/* 314 * hook into biodone to free bounce buffer 315 */ 316void 317vm_bounce_free(bp) 318 struct buf *bp; 319{ 320 int i; 321 vm_offset_t origkva, bouncekva; 322 vm_offset_t vastart, vaend; 323 vm_offset_t vapstart, vapend; 324 int countbounce = 0; 325 vm_offset_t firstbouncepa = 0; 326 int firstbounceindex; 327 int countvmpg; 328 vm_offset_t bcount; 329 int s; 330 331/* 332 * if this isn't a bounced buffer, then just return 333 */ 334 if ((bp->b_flags & B_BOUNCE) == 0) 335 return; 336 337 origkva = (vm_offset_t) bp->b_savekva; 338 bouncekva = (vm_offset_t) bp->b_un.b_addr; 339 340 vastart = bouncekva; 341 vaend = bouncekva + bp->b_bcount; 342 bcount = bp->b_bcount; 343 344 vapstart = i386_trunc_page(vastart); 345 vapend = i386_round_page(vaend); 346 347 countvmpg = (vapend - vapstart) / NBPG; 348 349/* 350 * check every page in the kva space for b_addr 351 */ 352 for (i = 0; i < countvmpg; i++) { 353 vm_offset_t mybouncepa; 354 vm_offset_t copycount; 355 356 copycount = i386_round_page(bouncekva + 1) - bouncekva; 357 mybouncepa = pmap_kextract(i386_trunc_page(bouncekva)); 358 359/* 360 * if this is a bounced pa, then process as one 361 */ 362 if ((mybouncepa >= bouncepa) && (mybouncepa < bouncepaend)) { 363 if (copycount > bcount) 364 copycount = bcount; 365/* 366 * if this is a read, then copy from bounce buffer into original buffer 367 */ 368 if (bp->b_flags & B_READ) 369 bcopy((caddr_t) bouncekva, (caddr_t) origkva, copycount); 370/* 371 * free the bounce allocation 372 */ 373 vm_bounce_page_free(i386_trunc_page(mybouncepa), 1); 374 } 375 376 origkva += copycount; 377 bouncekva += copycount; 378 bcount -= copycount; 379 } 380 381/* 382 * add the old kva into the "to free" list 383 */ 384 bouncekva = i386_trunc_page((vm_offset_t) bp->b_un.b_addr); 385 kvaf[kvasfreecnt].addr = bouncekva; 386 kvaf[kvasfreecnt++].size = countvmpg * NBPG; 387 if (bmwait) { 388 /* 389 * if anyone is waiting on the bounce-map, then wakeup 390 */ 391 wakeup((caddr_t) bounce_map); 392 bmwait = 0; 393 } 394 395 bp->b_un.b_addr = bp->b_savekva; 396 bp->b_savekva = 0; 397 bp->b_flags &= ~B_BOUNCE; 398 399 return; 400} 401 402#endif /* NOBOUNCE */ 403 404/* 405 * Finish a fork operation, with process p2 nearly set up. 406 * Copy and update the kernel stack and pcb, making the child 407 * ready to run, and marking it so that it can return differently 408 * than the parent. Returns 1 in the child process, 0 in the parent. 409 * We currently double-map the user area so that the stack is at the same 410 * address in each process; in the future we will probably relocate 411 * the frame pointers on the stack after copying. 412 */ 413int 414cpu_fork(p1, p2) 415 register struct proc *p1, *p2; 416{ 417 register struct user *up = p2->p_addr; 418 int foo, offset, addr, i; 419 extern char kstack[]; 420 extern int mvesp(); 421 422 /* 423 * Copy pcb and stack from proc p1 to p2. 424 * We do this as cheaply as possible, copying only the active 425 * part of the stack. The stack and pcb need to agree; 426 * this is tricky, as the final pcb is constructed by savectx, 427 * but its frame isn't yet on the stack when the stack is copied. 428 * swtch compensates for this when the child eventually runs. 429 * This should be done differently, with a single call 430 * that copies and updates the pcb+stack, 431 * replacing the bcopy and savectx. 432 */ 433 p2->p_addr->u_pcb = p1->p_addr->u_pcb; 434 offset = mvesp() - (int)kstack; 435 bcopy((caddr_t)kstack + offset, (caddr_t)p2->p_addr + offset, 436 (unsigned) ctob(UPAGES) - offset); 437 p2->p_regs = p1->p_regs; 438 439 /* 440 * Wire top of address space of child to it's kstack. 441 * First, fault in a page of pte's to map it. 442 */ 443#if 0 444 addr = trunc_page((u_int)vtopte(kstack)); 445 vm_map_pageable(&p2->p_vmspace->vm_map, addr, addr+NBPG, FALSE); 446 for (i=0; i < UPAGES; i++) 447 pmap_enter(&p2->p_vmspace->vm_pmap, kstack+i*NBPG, 448 pmap_extract(kernel_pmap, ((int)p2->p_addr)+i*NBPG), 449 /* 450 * The user area has to be mapped writable because 451 * it contains the kernel stack (when CR0_WP is on 452 * on a 486 there is no user-read/kernel-write 453 * mode). It is protected from user mode access 454 * by the segment limits. 455 */ 456 VM_PROT_READ|VM_PROT_WRITE, TRUE); 457#endif 458 pmap_activate(&p2->p_vmspace->vm_pmap, &up->u_pcb); 459 460 /* 461 * 462 * Arrange for a non-local goto when the new process 463 * is started, to resume here, returning nonzero from setjmp. 464 */ 465 if (savectx(up, 1)) { 466 /* 467 * Return 1 in child. 468 */ 469 return (1); 470 } 471 return (0); 472} 473 474#ifdef notyet 475/* 476 * cpu_exit is called as the last action during exit. 477 * 478 * We change to an inactive address space and a "safe" stack, 479 * passing thru an argument to the new stack. Now, safely isolated 480 * from the resources we're shedding, we release the address space 481 * and any remaining machine-dependent resources, including the 482 * memory for the user structure and kernel stack. 483 * 484 * Next, we assign a dummy context to be written over by swtch, 485 * calling it to send this process off to oblivion. 486 * [The nullpcb allows us to minimize cost in swtch() by not having 487 * a special case]. 488 */ 489struct proc *swtch_to_inactive(); 490volatile void 491cpu_exit(p) 492 register struct proc *p; 493{ 494 static struct pcb nullpcb; /* pcb to overwrite on last swtch */ 495 496#if NNPX > 0 497 npxexit(p); 498#endif /* NNPX */ 499 500 /* move to inactive space and stack, passing arg accross */ 501 p = swtch_to_inactive(p); 502 503 /* drop per-process resources */ 504 vmspace_free(p->p_vmspace); 505 kmem_free(kernel_map, (vm_offset_t)p->p_addr, ctob(UPAGES)); 506 507 p->p_addr = (struct user *) &nullpcb; 508 splclock(); 509 swtch(); 510 /* NOTREACHED */ 511} 512#else 513void 514cpu_exit(p) 515 register struct proc *p; 516{ 517 518#if NNPX > 0 519 npxexit(p); 520#endif /* NNPX */ 521 splclock(); 522 curproc = 0; 523 swtch(); 524 /* 525 * This is to shutup the compiler, and if swtch() failed I suppose 526 * this would be a good thing. This keeps gcc happy because panic 527 * is a volatile void function as well. 528 */ 529 panic("cpu_exit"); 530} 531 532void 533cpu_wait(p) struct proc *p; { 534/* extern vm_map_t upages_map; */ 535 extern char kstack[]; 536 537 /* drop per-process resources */ 538 pmap_remove(vm_map_pmap(kernel_map), (vm_offset_t) p->p_addr, 539 ((vm_offset_t) p->p_addr) + ctob(UPAGES)); 540 kmem_free(kernel_map, (vm_offset_t)p->p_addr, ctob(UPAGES)); 541 vmspace_free(p->p_vmspace); 542} 543#endif 544 545/* 546 * Set a red zone in the kernel stack after the u. area. 547 */ 548void 549setredzone(pte, vaddr) 550 u_short *pte; 551 caddr_t vaddr; 552{ 553/* eventually do this by setting up an expand-down stack segment 554 for ss0: selector, allowing stack access down to top of u. 555 this means though that protection violations need to be handled 556 thru a double fault exception that must do an integral task 557 switch to a known good context, within which a dump can be 558 taken. a sensible scheme might be to save the initial context 559 used by sched (that has physical memory mapped 1:1 at bottom) 560 and take the dump while still in mapped mode */ 561} 562 563/* 564 * Convert kernel VA to physical address 565 */ 566u_long 567kvtop(void *addr) 568{ 569 vm_offset_t va; 570 571 va = pmap_kextract((vm_offset_t)addr); 572 if (va == 0) 573 panic("kvtop: zero page frame"); 574 return((int)va); 575} 576 577extern vm_map_t phys_map; 578 579/* 580 * Map an IO request into kernel virtual address space. Requests fall into 581 * one of five catagories: 582 * 583 * B_PHYS|B_UAREA: User u-area swap. 584 * Address is relative to start of u-area (p_addr). 585 * B_PHYS|B_PAGET: User page table swap. 586 * Address is a kernel VA in usrpt (Usrptmap). 587 * B_PHYS|B_DIRTY: Dirty page push. 588 * Address is a VA in proc2's address space. 589 * B_PHYS|B_PGIN: Kernel pagein of user pages. 590 * Address is VA in user's address space. 591 * B_PHYS: User "raw" IO request. 592 * Address is VA in user's address space. 593 * 594 * All requests are (re)mapped into kernel VA space via the useriomap 595 * (a name with only slightly more meaning than "kernelmap") 596 */ 597void 598vmapbuf(bp) 599 register struct buf *bp; 600{ 601 register int npf; 602 register caddr_t addr; 603 register long flags = bp->b_flags; 604 struct proc *p; 605 int off; 606 vm_offset_t kva; 607 register vm_offset_t pa; 608 609 if ((flags & B_PHYS) == 0) 610 panic("vmapbuf"); 611 addr = bp->b_saveaddr = bp->b_un.b_addr; 612 off = (int)addr & PGOFSET; 613 p = bp->b_proc; 614 npf = btoc(round_page(bp->b_bcount + off)); 615 kva = kmem_alloc_wait(phys_map, ctob(npf)); 616 bp->b_un.b_addr = (caddr_t) (kva + off); 617 while (npf--) { 618 pa = pmap_extract(&p->p_vmspace->vm_pmap, (vm_offset_t)addr); 619 if (pa == 0) 620 panic("vmapbuf: null page frame"); 621 pmap_kenter(kva, trunc_page(pa)); 622 addr += PAGE_SIZE; 623 kva += PAGE_SIZE; 624 } 625 pmap_update(); 626} 627 628/* 629 * Free the io map PTEs associated with this IO operation. 630 * We also invalidate the TLB entries and restore the original b_addr. 631 */ 632void 633vunmapbuf(bp) 634 register struct buf *bp; 635{ 636 register int npf; 637 register caddr_t addr = bp->b_un.b_addr; 638 vm_offset_t kva; 639 640 if ((bp->b_flags & B_PHYS) == 0) 641 panic("vunmapbuf"); 642 npf = btoc(round_page(bp->b_bcount + ((int)addr & PGOFSET))); 643 kva = (vm_offset_t)((int)addr & ~PGOFSET); 644 kmem_free_wakeup(phys_map, kva, ctob(npf)); 645 bp->b_un.b_addr = bp->b_saveaddr; 646 bp->b_saveaddr = NULL; 647} 648 649/* 650 * Force reset the processor by invalidating the entire address space! 651 */ 652void 653cpu_reset() { 654 655 /* force a shutdown by unmapping entire address space ! */ 656 bzero((caddr_t) PTD, NBPG); 657 658 /* "good night, sweet prince .... <THUNK!>" */ 659 tlbflush(); 660 /* NOTREACHED */ 661 while(1); 662} 663 664/* 665 * Grow the user stack to allow for 'sp'. This version grows the stack in 666 * chunks of SGROWSIZ. 667 */ 668int 669grow(p, sp) 670 struct proc *p; 671 int sp; 672{ 673 unsigned int nss; 674 caddr_t v; 675 struct vmspace *vm = p->p_vmspace; 676 677 if ((caddr_t)sp <= vm->vm_maxsaddr || (unsigned)sp >= (unsigned)USRSTACK) 678 return (1); 679 680 nss = roundup(USRSTACK - (unsigned)sp, PAGE_SIZE); 681 682 if (nss > p->p_rlimit[RLIMIT_STACK].rlim_cur) 683 return (0); 684 685 if (vm->vm_ssize && roundup(vm->vm_ssize << PAGE_SHIFT, 686 SGROWSIZ) < nss) { 687 int grow_amount; 688 /* 689 * If necessary, grow the VM that the stack occupies 690 * to allow for the rlimit. This allows us to not have 691 * to allocate all of the VM up-front in execve (which 692 * is expensive). 693 * Grow the VM by the amount requested rounded up to 694 * the nearest SGROWSIZ to provide for some hysteresis. 695 */ 696 grow_amount = roundup((nss - (vm->vm_ssize << PAGE_SHIFT)), SGROWSIZ); 697 v = (char *)USRSTACK - roundup(vm->vm_ssize << PAGE_SHIFT, 698 SGROWSIZ) - grow_amount; 699 /* 700 * If there isn't enough room to extend by SGROWSIZ, then 701 * just extend to the maximum size 702 */ 703 if (v < vm->vm_maxsaddr) { 704 v = vm->vm_maxsaddr; 705 grow_amount = MAXSSIZ - (vm->vm_ssize << PAGE_SHIFT); 706 } 707 if (vm_allocate(&vm->vm_map, (vm_offset_t *)&v, 708 grow_amount, FALSE) != KERN_SUCCESS) { 709 return (0); 710 } 711 vm->vm_ssize += grow_amount >> PAGE_SHIFT; 712 } 713 714 return (1); 715} 716