vm_machdep.c revision 1415
1/*- 2 * Copyright (c) 1982, 1986 The Regents of the University of California. 3 * Copyright (c) 1989, 1990 William Jolitz 4 * Copyright (c) 1994 John Dyson 5 * All rights reserved. 6 * 7 * This code is derived from software contributed to Berkeley by 8 * the Systems Programming Group of the University of Utah Computer 9 * Science Department, and William Jolitz. 10 * 11 * Redistribution and use in source and binary forms, with or without 12 * modification, are permitted provided that the following conditions 13 * are met: 14 * 1. Redistributions of source code must retain the above copyright 15 * notice, this list of conditions and the following disclaimer. 16 * 2. Redistributions in binary form must reproduce the above copyright 17 * notice, this list of conditions and the following disclaimer in the 18 * documentation and/or other materials provided with the distribution. 19 * 3. All advertising materials mentioning features or use of this software 20 * must display the following acknowledgement: 21 * This product includes software developed by the University of 22 * California, Berkeley and its contributors. 23 * 4. Neither the name of the University nor the names of its contributors 24 * may be used to endorse or promote products derived from this software 25 * without specific prior written permission. 26 * 27 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 28 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 29 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 30 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 31 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 32 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 33 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 34 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 35 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 36 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 37 * SUCH DAMAGE. 38 * 39 * from: @(#)vm_machdep.c 7.3 (Berkeley) 5/13/91 40 * Utah $Hdr: vm_machdep.c 1.16.1.1 89/06/23$ 41 * $Id: vm_machdep.c,v 1.20 1994/04/20 07:06:20 davidg Exp $ 42 */ 43 44#include "npx.h" 45#include "param.h" 46#include "systm.h" 47#include "proc.h" 48#include "malloc.h" 49#include "buf.h" 50#include "user.h" 51 52#include "../include/cpu.h" 53 54#include "vm/vm.h" 55#include "vm/vm_kern.h" 56 57#define b_cylin b_resid 58 59#define MAXCLSTATS 256 60int clstats[MAXCLSTATS]; 61int rqstats[MAXCLSTATS]; 62 63 64#ifndef NOBOUNCE 65 66caddr_t bouncememory; 67vm_offset_t bouncepa, bouncepaend; 68int bouncepages, bpwait; 69vm_map_t io_map; 70int bmwait, bmfreeing; 71 72#define BITS_IN_UNSIGNED (8*sizeof(unsigned)) 73int bounceallocarraysize; 74unsigned *bounceallocarray; 75int bouncefree; 76 77#define SIXTEENMEG (4096*4096) 78#define MAXBKVA 1024 79 80/* special list that can be used at interrupt time for eventual kva free */ 81struct kvasfree { 82 vm_offset_t addr; 83 vm_offset_t size; 84} kvaf[MAXBKVA]; 85 86int kvasfreecnt; 87 88vm_offset_t vm_bounce_kva(); 89/* 90 * get bounce buffer pages (count physically contiguous) 91 * (only 1 inplemented now) 92 */ 93vm_offset_t 94vm_bounce_page_find(count) 95 int count; 96{ 97 int bit; 98 int s,i; 99 100 if (count != 1) 101 panic("vm_bounce_page_find -- no support for > 1 page yet!!!"); 102 103 s = splbio(); 104retry: 105 for (i = 0; i < bounceallocarraysize; i++) { 106 if (bounceallocarray[i] != 0xffffffff) { 107 if (bit = ffs(~bounceallocarray[i])) { 108 bounceallocarray[i] |= 1 << (bit - 1) ; 109 bouncefree -= count; 110 splx(s); 111 return bouncepa + (i * BITS_IN_UNSIGNED + (bit - 1)) * NBPG; 112 } 113 } 114 } 115 bpwait = 1; 116 tsleep((caddr_t) &bounceallocarray, PRIBIO, "bncwai", 0); 117 goto retry; 118} 119 120void 121vm_bounce_kva_free(addr, size, now) 122 vm_offset_t addr; 123 vm_offset_t size; 124 int now; 125{ 126 int s = splbio(); 127 kvaf[kvasfreecnt].addr = addr; 128 kvaf[kvasfreecnt++].size = size; 129 if( now) { 130 /* 131 * this will do wakeups 132 */ 133 vm_bounce_kva(0,0); 134 } else { 135 if (bmwait) { 136 /* 137 * if anyone is waiting on the bounce-map, then wakeup 138 */ 139 wakeup((caddr_t) io_map); 140 bmwait = 0; 141 } 142 } 143 splx(s); 144} 145 146/* 147 * free count bounce buffer pages 148 */ 149void 150vm_bounce_page_free(pa, count) 151 vm_offset_t pa; 152 int count; 153{ 154 int allocindex; 155 int index; 156 int bit; 157 158 if (count != 1) 159 panic("vm_bounce_page_free -- no support for > 1 page yet!!!\n"); 160 161 index = (pa - bouncepa) / NBPG; 162 163 if ((index < 0) || (index >= bouncepages)) 164 panic("vm_bounce_page_free -- bad index\n"); 165 166 allocindex = index / BITS_IN_UNSIGNED; 167 bit = index % BITS_IN_UNSIGNED; 168 169 bounceallocarray[allocindex] &= ~(1 << bit); 170 171 bouncefree += count; 172 if (bpwait) { 173 bpwait = 0; 174 wakeup((caddr_t) &bounceallocarray); 175 } 176} 177 178/* 179 * allocate count bounce buffer kva pages 180 */ 181vm_offset_t 182vm_bounce_kva(count, waitok) 183 int count; 184 int waitok; 185{ 186 int tofree; 187 int i; 188 int startfree; 189 vm_offset_t kva = 0; 190 int s = splbio(); 191 int size = count; 192 startfree = 0; 193more: 194 if (!bmfreeing && (tofree = kvasfreecnt)) { 195 bmfreeing = 1; 196 for (i = startfree; i < kvasfreecnt; i++) { 197 /* 198 * if we have a kva of the right size, no sense 199 * in freeing/reallocating... 200 * might affect fragmentation short term, but 201 * as long as the amount of io_map is 202 * significantly more than the maximum transfer 203 * size, I don't think that it is a problem. 204 */ 205 pmap_remove(kernel_pmap, 206 kvaf[i].addr, kvaf[i].addr + kvaf[i].size); 207 if( size && !kva && kvaf[i].size == size) { 208 kva = kvaf[i].addr; 209 } else { 210 kmem_free_wakeup(io_map, kvaf[i].addr, 211 kvaf[i].size); 212 } 213 } 214 if (kvasfreecnt != tofree) { 215 startfree = i; 216 bmfreeing = 0; 217 goto more; 218 } 219 kvasfreecnt = 0; 220 bmfreeing = 0; 221 } 222 223 if( size == 0) { 224 splx(s); 225 return NULL; 226 } 227 228 if (!kva && !(kva = kmem_alloc_pageable(io_map, size))) { 229 if( !waitok) { 230 splx(s); 231 return NULL; 232 } 233 bmwait = 1; 234 tsleep((caddr_t) io_map, PRIBIO, "bmwait", 0); 235 goto more; 236 } 237 splx(s); 238 239 return kva; 240} 241 242/* 243 * same as vm_bounce_kva -- but really allocate 244 */ 245vm_offset_t 246vm_bounce_kva_alloc(count) 247int count; 248{ 249 int i; 250 vm_offset_t kva; 251 vm_offset_t pa; 252 if( bouncepages == 0) { 253 kva = (vm_offset_t) malloc(count*NBPG, M_TEMP, M_WAITOK); 254 return kva; 255 } 256 kva = vm_bounce_kva(count, 1); 257 for(i=0;i<count;i++) { 258 pa = vm_bounce_page_find(1); 259 pmap_kenter(kva + i * NBPG, pa); 260 } 261 return kva; 262} 263 264/* 265 * same as vm_bounce_kva_free -- but really free 266 */ 267void 268vm_bounce_kva_alloc_free(kva, count) 269 vm_offset_t kva; 270 int count; 271{ 272 int i; 273 vm_offset_t pa; 274 if( bouncepages == 0) { 275 free((caddr_t) kva, M_TEMP); 276 return; 277 } 278 for(i = 0; i < count; i++) { 279 pa = pmap_kextract(kva + i * NBPG); 280 vm_bounce_page_free(pa, 1); 281 } 282 vm_bounce_kva_free(kva, count); 283} 284 285/* 286 * do the things necessary to the struct buf to implement 287 * bounce buffers... inserted before the disk sort 288 */ 289void 290vm_bounce_alloc(bp) 291 struct buf *bp; 292{ 293 int countvmpg; 294 vm_offset_t vastart, vaend; 295 vm_offset_t vapstart, vapend; 296 vm_offset_t va, kva; 297 vm_offset_t pa; 298 int dobounceflag = 0; 299 int bounceindex; 300 int i; 301 int s; 302 303 if (bouncepages == 0) 304 return; 305 306 if (bp->b_bufsize < bp->b_bcount) { 307 printf("vm_bounce_alloc: b_bufsize(%d) < b_bcount(%d) !!!!\n", 308 bp->b_bufsize, bp->b_bcount); 309 bp->b_bufsize = bp->b_bcount; 310 } 311 312 vastart = (vm_offset_t) bp->b_un.b_addr; 313 vaend = (vm_offset_t) bp->b_un.b_addr + bp->b_bufsize; 314 315 vapstart = i386_trunc_page(vastart); 316 vapend = i386_round_page(vaend); 317 countvmpg = (vapend - vapstart) / NBPG; 318 319/* 320 * if any page is above 16MB, then go into bounce-buffer mode 321 */ 322 va = vapstart; 323 for (i = 0; i < countvmpg; i++) { 324 pa = pmap_kextract(va); 325 if (pa >= SIXTEENMEG) 326 ++dobounceflag; 327 va += NBPG; 328 } 329 if (dobounceflag == 0) 330 return; 331 332 if (bouncepages < dobounceflag) 333 panic("Not enough bounce buffers!!!"); 334 335/* 336 * allocate a replacement kva for b_addr 337 */ 338 kva = vm_bounce_kva(countvmpg*NBPG, 1); 339 va = vapstart; 340 for (i = 0; i < countvmpg; i++) { 341 pa = pmap_kextract(va); 342 if (pa >= SIXTEENMEG) { 343 /* 344 * allocate a replacement page 345 */ 346 vm_offset_t bpa = vm_bounce_page_find(1); 347 pmap_kenter(kva + (NBPG * i), bpa); 348 /* 349 * if we are writing, the copy the data into the page 350 */ 351 if ((bp->b_flags & B_READ) == 0) { 352 pmap_update(); 353 bcopy((caddr_t) va, (caddr_t) kva + (NBPG * i), NBPG); 354 } 355 } else { 356 /* 357 * use original page 358 */ 359 pmap_kenter(kva + (NBPG * i), pa); 360 } 361 va += NBPG; 362 } 363 pmap_update(); 364 365/* 366 * flag the buffer as being bounced 367 */ 368 bp->b_flags |= B_BOUNCE; 369/* 370 * save the original buffer kva 371 */ 372 bp->b_savekva = bp->b_un.b_addr; 373/* 374 * put our new kva into the buffer (offset by original offset) 375 */ 376 bp->b_un.b_addr = (caddr_t) (((vm_offset_t) kva) | 377 ((vm_offset_t) bp->b_savekva & (NBPG - 1))); 378 return; 379} 380 381/* 382 * hook into biodone to free bounce buffer 383 */ 384void 385vm_bounce_free(bp) 386 struct buf *bp; 387{ 388 int i; 389 vm_offset_t origkva, bouncekva; 390 vm_offset_t vastart, vaend; 391 vm_offset_t vapstart, vapend; 392 int countbounce = 0; 393 vm_offset_t firstbouncepa = 0; 394 int firstbounceindex; 395 int countvmpg; 396 vm_offset_t bcount; 397 int s; 398 399/* 400 * if this isn't a bounced buffer, then just return 401 */ 402 if ((bp->b_flags & B_BOUNCE) == 0) 403 return; 404 405 origkva = (vm_offset_t) bp->b_savekva; 406 bouncekva = (vm_offset_t) bp->b_un.b_addr; 407 408 vastart = bouncekva; 409 vaend = bouncekva + bp->b_bufsize; 410 bcount = bp->b_bufsize; 411 412 vapstart = i386_trunc_page(vastart); 413 vapend = i386_round_page(vaend); 414 415 countvmpg = (vapend - vapstart) / NBPG; 416 417/* 418 * check every page in the kva space for b_addr 419 */ 420 for (i = 0; i < countvmpg; i++) { 421 vm_offset_t mybouncepa; 422 vm_offset_t copycount; 423 424 copycount = i386_round_page(bouncekva + 1) - bouncekva; 425 mybouncepa = pmap_kextract(i386_trunc_page(bouncekva)); 426 427/* 428 * if this is a bounced pa, then process as one 429 */ 430 if ((mybouncepa >= bouncepa) && (mybouncepa < bouncepaend)) { 431 if (copycount > bcount) 432 copycount = bcount; 433/* 434 * if this is a read, then copy from bounce buffer into original buffer 435 */ 436 if (bp->b_flags & B_READ) 437 bcopy((caddr_t) bouncekva, (caddr_t) origkva, copycount); 438/* 439 * free the bounce allocation 440 */ 441 vm_bounce_page_free(i386_trunc_page(mybouncepa), 1); 442 } 443 444 origkva += copycount; 445 bouncekva += copycount; 446 bcount -= copycount; 447 } 448 449/* 450 * add the old kva into the "to free" list 451 */ 452 bouncekva = i386_trunc_page((vm_offset_t) bp->b_un.b_addr); 453 vm_bounce_kva_free( bouncekva, countvmpg*NBPG, 0); 454 bp->b_un.b_addr = bp->b_savekva; 455 bp->b_savekva = 0; 456 bp->b_flags &= ~B_BOUNCE; 457 458 return; 459} 460 461#endif /* NOBOUNCE */ 462 463/* 464 * init the bounce buffer system 465 */ 466void 467vm_bounce_init() 468{ 469 vm_offset_t minaddr, maxaddr; 470 471 io_map = kmem_suballoc(kernel_map, &minaddr, &maxaddr, MAXBKVA * NBPG, FALSE); 472 kvasfreecnt = 0; 473 474#ifndef NOBOUNCE 475 if (bouncepages == 0) 476 return; 477 478 bounceallocarraysize = (bouncepages + BITS_IN_UNSIGNED - 1) / BITS_IN_UNSIGNED; 479 bounceallocarray = malloc(bounceallocarraysize * sizeof(unsigned), M_TEMP, M_NOWAIT); 480 481 if (!bounceallocarray) 482 panic("Cannot allocate bounce resource array\n"); 483 484 bzero(bounceallocarray, bounceallocarraysize * sizeof(long)); 485 486 487 bouncepa = pmap_kextract((vm_offset_t) bouncememory); 488 bouncepaend = bouncepa + bouncepages * NBPG; 489 bouncefree = bouncepages; 490#endif 491 492} 493 494 495static void 496cldiskvamerge( kvanew, orig1, orig1cnt, orig2, orig2cnt) 497 vm_offset_t kvanew; 498 vm_offset_t orig1, orig1cnt; 499 vm_offset_t orig2, orig2cnt; 500{ 501 int i; 502 vm_offset_t pa; 503/* 504 * enter the transfer physical addresses into the new kva 505 */ 506 for(i=0;i<orig1cnt;i++) { 507 vm_offset_t pa; 508 pa = pmap_kextract((caddr_t) orig1 + i * PAGE_SIZE); 509 pmap_kenter(kvanew + i * PAGE_SIZE, pa); 510 } 511 512 for(i=0;i<orig2cnt;i++) { 513 vm_offset_t pa; 514 pa = pmap_kextract((caddr_t) orig2 + i * PAGE_SIZE); 515 pmap_kenter(kvanew + (i + orig1cnt) * PAGE_SIZE, pa); 516 } 517 pmap_update(); 518} 519 520void 521cldisksort(struct buf *dp, struct buf *bp, vm_offset_t maxio) 522{ 523 register struct buf *ap, *newbp; 524 int i, trycount=0; 525 vm_offset_t orig1pages, orig2pages; 526 vm_offset_t orig1begin, orig2begin; 527 vm_offset_t kvanew, kvaorig; 528 529 if( bp->b_bcount < MAXCLSTATS*PAGE_SIZE) 530 ++rqstats[bp->b_bcount/PAGE_SIZE]; 531 /* 532 * If nothing on the activity queue, then 533 * we become the only thing. 534 */ 535 ap = dp->b_actf; 536 if(ap == NULL) { 537 dp->b_actf = bp; 538 dp->b_actl = bp; 539 bp->av_forw = NULL; 540 return; 541 } 542 543 /* 544 * If we lie after the first (currently active) 545 * request, then we must locate the second request list 546 * and add ourselves to it. 547 */ 548 549 if (bp->b_pblkno < ap->b_pblkno) { 550 while (ap->av_forw) { 551 /* 552 * Check for an ``inversion'' in the 553 * normally ascending block numbers, 554 * indicating the start of the second request list. 555 */ 556 if (ap->av_forw->b_pblkno < ap->b_pblkno) { 557 /* 558 * Search the second request list 559 * for the first request at a larger 560 * block number. We go before that; 561 * if there is no such request, we go at end. 562 */ 563 do { 564 if (bp->b_pblkno < ap->av_forw->b_pblkno) 565 goto insert; 566 ap = ap->av_forw; 567 } while (ap->av_forw); 568 goto insert; /* after last */ 569 } 570 ap = ap->av_forw; 571 } 572 /* 573 * No inversions... we will go after the last, and 574 * be the first request in the second request list. 575 */ 576 goto insert; 577 } 578 /* 579 * Request is at/after the current request... 580 * sort in the first request list. 581 */ 582 while (ap->av_forw) { 583 /* 584 * We want to go after the current request 585 * if there is an inversion after it (i.e. it is 586 * the end of the first request list), or if 587 * the next request is a larger block than our request. 588 */ 589 if (ap->av_forw->b_pblkno < ap->b_pblkno || 590 bp->b_pblkno < ap->av_forw->b_pblkno ) 591 goto insert; 592 ap = ap->av_forw; 593 } 594 595insert: 596 597 /* 598 * read clustering with new read-ahead disk drives hurts mostly, so 599 * we don't bother... 600 */ 601 if( bp->b_flags & B_READ) 602 goto nocluster; 603 /* 604 * we currently only cluster I/O transfers that are at page-aligned 605 * kvas and transfers that are multiples of page lengths. 606 */ 607 if ((bp->b_flags & B_BAD) == 0 && 608 ((bp->b_bcount & PAGE_MASK) == 0) && 609 (((vm_offset_t) bp->b_un.b_addr & PAGE_MASK) == 0)) { 610 if( maxio > MAXCLSTATS*PAGE_SIZE) 611 maxio = MAXCLSTATS*PAGE_SIZE; 612 /* 613 * merge with previous? 614 * conditions: 615 * 1) We reside physically immediately after the previous block. 616 * 2) The previous block is not first on the device queue because 617 * such a block might be active. 618 * 3) The mode of the two I/Os is identical. 619 * 4) The previous kva is page aligned and the previous transfer 620 * is a multiple of a page in length. 621 * 5) And the total I/O size would be below the maximum. 622 */ 623 if( (ap->b_pblkno + (ap->b_bcount / DEV_BSIZE) == bp->b_pblkno) && 624 (dp->b_actf != ap) && 625 ((ap->b_flags & ~B_CLUSTER) == bp->b_flags) && 626 ((ap->b_flags & B_BAD) == 0) && 627 ((ap->b_bcount & PAGE_MASK) == 0) && 628 (((vm_offset_t) ap->b_un.b_addr & PAGE_MASK) == 0) && 629 (ap->b_bcount + bp->b_bcount < maxio)) { 630 631 orig1begin = (vm_offset_t) ap->b_un.b_addr; 632 orig1pages = ap->b_bcount / PAGE_SIZE; 633 634 orig2begin = (vm_offset_t) bp->b_un.b_addr; 635 orig2pages = bp->b_bcount / PAGE_SIZE; 636 /* 637 * see if we can allocate a kva, if we cannot, the don't 638 * cluster. 639 */ 640 kvanew = vm_bounce_kva( PAGE_SIZE * (orig1pages + orig2pages), 0); 641 if( !kvanew) { 642 goto nocluster; 643 } 644 645 646 if( (ap->b_flags & B_CLUSTER) == 0) { 647 648 /* 649 * get a physical buf pointer 650 */ 651 newbp = (struct buf *)trypbuf(); 652 if( !newbp) { 653 vm_bounce_kva_free( kvanew, PAGE_SIZE * (orig1pages + orig2pages), 1); 654 goto nocluster; 655 } 656 657 cldiskvamerge( kvanew, orig1begin, orig1pages, orig2begin, orig2pages); 658 659 /* 660 * build the new bp to be handed off to the device 661 */ 662 663 --clstats[ap->b_bcount/PAGE_SIZE]; 664 *newbp = *ap; 665 newbp->b_flags |= B_CLUSTER; 666 newbp->b_un.b_addr = (caddr_t) kvanew; 667 newbp->b_bcount += bp->b_bcount; 668 newbp->b_bufsize = newbp->b_bcount; 669 newbp->b_clusterf = ap; 670 newbp->b_clusterl = bp; 671 ++clstats[newbp->b_bcount/PAGE_SIZE]; 672 673 /* 674 * enter the new bp onto the device queue 675 */ 676 if( ap->av_forw) 677 ap->av_forw->av_back = newbp; 678 else 679 dp->b_actl = newbp; 680 681 if( dp->b_actf != ap ) 682 ap->av_back->av_forw = newbp; 683 else 684 dp->b_actf = newbp; 685 686 /* 687 * enter the previous bps onto the cluster queue 688 */ 689 ap->av_forw = bp; 690 bp->av_back = ap; 691 692 ap->av_back = NULL; 693 bp->av_forw = NULL; 694 695 } else { 696 vm_offset_t addr; 697 698 cldiskvamerge( kvanew, orig1begin, orig1pages, orig2begin, orig2pages); 699 /* 700 * free the old kva 701 */ 702 vm_bounce_kva_free( orig1begin, ap->b_bufsize, 0); 703 --clstats[ap->b_bcount/PAGE_SIZE]; 704 705 ap->b_un.b_addr = (caddr_t) kvanew; 706 707 ap->b_clusterl->av_forw = bp; 708 bp->av_forw = NULL; 709 bp->av_back = ap->b_clusterl; 710 ap->b_clusterl = bp; 711 712 ap->b_bcount += bp->b_bcount; 713 ap->b_bufsize = ap->b_bcount; 714 ++clstats[ap->b_bcount/PAGE_SIZE]; 715 } 716 return; 717 /* 718 * merge with next? 719 * conditions: 720 * 1) We reside physically before the next block. 721 * 3) The mode of the two I/Os is identical. 722 * 4) The next kva is page aligned and the next transfer 723 * is a multiple of a page in length. 724 * 5) And the total I/O size would be below the maximum. 725 */ 726 } else if( ap->av_forw && 727 (bp->b_pblkno + (bp->b_bcount / DEV_BSIZE) == ap->av_forw->b_pblkno) && 728 (bp->b_flags == (ap->av_forw->b_flags & ~B_CLUSTER)) && 729 ((ap->av_forw->b_flags & B_BAD) == 0) && 730 ((ap->av_forw->b_bcount & PAGE_MASK) == 0) && 731 (((vm_offset_t) ap->av_forw->b_un.b_addr & PAGE_MASK) == 0) && 732 (ap->av_forw->b_bcount + bp->b_bcount < maxio)) { 733 734 orig1begin = (vm_offset_t) bp->b_un.b_addr; 735 orig1pages = bp->b_bcount / PAGE_SIZE; 736 737 orig2begin = (vm_offset_t) ap->av_forw->b_un.b_addr; 738 orig2pages = ap->av_forw->b_bcount / PAGE_SIZE; 739 740 /* 741 * see if we can allocate a kva, if we cannot, the don't 742 * cluster. 743 */ 744 kvanew = vm_bounce_kva( PAGE_SIZE * (orig1pages + orig2pages), 0); 745 if( !kvanew) { 746 goto nocluster; 747 } 748 749 /* 750 * if next isn't a cluster we need to create one 751 */ 752 if( (ap->av_forw->b_flags & B_CLUSTER) == 0) { 753 754 /* 755 * get a physical buf pointer 756 */ 757 newbp = (struct buf *)trypbuf(); 758 if( !newbp) { 759 vm_bounce_kva_free( kvanew, PAGE_SIZE * (orig1pages + orig2pages), 1); 760 goto nocluster; 761 } 762 763 cldiskvamerge( kvanew, orig1begin, orig1pages, orig2begin, orig2pages); 764 ap = ap->av_forw; 765 --clstats[ap->b_bcount/PAGE_SIZE]; 766 *newbp = *ap; 767 newbp->b_flags |= B_CLUSTER; 768 newbp->b_un.b_addr = (caddr_t) kvanew; 769 newbp->b_blkno = bp->b_blkno; 770 newbp->b_pblkno = bp->b_pblkno; 771 newbp->b_bcount += bp->b_bcount; 772 newbp->b_bufsize = newbp->b_bcount; 773 newbp->b_clusterf = bp; 774 newbp->b_clusterl = ap; 775 ++clstats[newbp->b_bcount/PAGE_SIZE]; 776 777 if( ap->av_forw) 778 ap->av_forw->av_back = newbp; 779 else 780 dp->b_actl = newbp; 781 782 if( dp->b_actf != ap ) 783 ap->av_back->av_forw = newbp; 784 else 785 dp->b_actf = newbp; 786 787 bp->av_forw = ap; 788 ap->av_back = bp; 789 790 bp->av_back = NULL; 791 ap->av_forw = NULL; 792 } else { 793 vm_offset_t addr; 794 795 cldiskvamerge( kvanew, orig1begin, orig1pages, orig2begin, orig2pages); 796 ap = ap->av_forw; 797 vm_bounce_kva_free( orig2begin, ap->b_bufsize, 0); 798 799 ap->b_un.b_addr = (caddr_t) kvanew; 800 bp->av_forw = ap->b_clusterf; 801 ap->b_clusterf->av_back = bp; 802 ap->b_clusterf = bp; 803 bp->av_back = NULL; 804 --clstats[ap->b_bcount/PAGE_SIZE]; 805 806 ap->b_blkno = bp->b_blkno; 807 ap->b_pblkno = bp->b_pblkno; 808 ap->b_bcount += bp->b_bcount; 809 ap->b_bufsize = ap->b_bcount; 810 ++clstats[ap->b_bcount/PAGE_SIZE]; 811 812 } 813 return; 814 } 815 } 816 /* 817 * don't merge 818 */ 819nocluster: 820 ++clstats[bp->b_bcount/PAGE_SIZE]; 821 bp->av_forw = ap->av_forw; 822 if( bp->av_forw) 823 bp->av_forw->av_back = bp; 824 else 825 dp->b_actl = bp; 826 827 ap->av_forw = bp; 828 bp->av_back = ap; 829} 830 831/* 832 * quick version of vm_fault 833 */ 834 835void 836vm_fault_quick( v, prot) 837 vm_offset_t v; 838 int prot; 839{ 840 if( (cpu_class == CPUCLASS_386) && 841 (prot & VM_PROT_WRITE)) 842 vm_fault(&curproc->p_vmspace->vm_map, v, 843 VM_PROT_READ|VM_PROT_WRITE, FALSE); 844 else if( prot & VM_PROT_WRITE) 845 *(volatile char *)v += 0; 846 else 847 *(volatile char *)v; 848} 849 850 851/* 852 * Finish a fork operation, with process p2 nearly set up. 853 * Copy and update the kernel stack and pcb, making the child 854 * ready to run, and marking it so that it can return differently 855 * than the parent. Returns 1 in the child process, 0 in the parent. 856 * We currently double-map the user area so that the stack is at the same 857 * address in each process; in the future we will probably relocate 858 * the frame pointers on the stack after copying. 859 */ 860int 861cpu_fork(p1, p2) 862 register struct proc *p1, *p2; 863{ 864 register struct user *up = p2->p_addr; 865 int foo, offset, addr, i; 866 extern char kstack[]; 867 extern int mvesp(); 868 869 /* 870 * Copy pcb and stack from proc p1 to p2. 871 * We do this as cheaply as possible, copying only the active 872 * part of the stack. The stack and pcb need to agree; 873 * this is tricky, as the final pcb is constructed by savectx, 874 * but its frame isn't yet on the stack when the stack is copied. 875 * swtch compensates for this when the child eventually runs. 876 * This should be done differently, with a single call 877 * that copies and updates the pcb+stack, 878 * replacing the bcopy and savectx. 879 */ 880 p2->p_addr->u_pcb = p1->p_addr->u_pcb; 881 offset = mvesp() - (int)kstack; 882 bcopy((caddr_t)kstack + offset, (caddr_t)p2->p_addr + offset, 883 (unsigned) ctob(UPAGES) - offset); 884 p2->p_regs = p1->p_regs; 885 886 /* 887 * Wire top of address space of child to it's kstack. 888 * First, fault in a page of pte's to map it. 889 */ 890#if 0 891 addr = trunc_page((u_int)vtopte(kstack)); 892 vm_map_pageable(&p2->p_vmspace->vm_map, addr, addr+NBPG, FALSE); 893 for (i=0; i < UPAGES; i++) 894 pmap_enter(&p2->p_vmspace->vm_pmap, kstack+i*NBPG, 895 pmap_extract(kernel_pmap, ((int)p2->p_addr)+i*NBPG), 896 /* 897 * The user area has to be mapped writable because 898 * it contains the kernel stack (when CR0_WP is on 899 * on a 486 there is no user-read/kernel-write 900 * mode). It is protected from user mode access 901 * by the segment limits. 902 */ 903 VM_PROT_READ|VM_PROT_WRITE, TRUE); 904#endif 905 pmap_activate(&p2->p_vmspace->vm_pmap, &up->u_pcb); 906 907 /* 908 * 909 * Arrange for a non-local goto when the new process 910 * is started, to resume here, returning nonzero from setjmp. 911 */ 912 if (savectx(up, 1)) { 913 /* 914 * Return 1 in child. 915 */ 916 return (1); 917 } 918 return (0); 919} 920 921#ifdef notyet 922/* 923 * cpu_exit is called as the last action during exit. 924 * 925 * We change to an inactive address space and a "safe" stack, 926 * passing thru an argument to the new stack. Now, safely isolated 927 * from the resources we're shedding, we release the address space 928 * and any remaining machine-dependent resources, including the 929 * memory for the user structure and kernel stack. 930 * 931 * Next, we assign a dummy context to be written over by swtch, 932 * calling it to send this process off to oblivion. 933 * [The nullpcb allows us to minimize cost in swtch() by not having 934 * a special case]. 935 */ 936struct proc *swtch_to_inactive(); 937volatile void 938cpu_exit(p) 939 register struct proc *p; 940{ 941 static struct pcb nullpcb; /* pcb to overwrite on last swtch */ 942 943#if NNPX > 0 944 npxexit(p); 945#endif /* NNPX */ 946 947 /* move to inactive space and stack, passing arg accross */ 948 p = swtch_to_inactive(p); 949 950 /* drop per-process resources */ 951 vmspace_free(p->p_vmspace); 952 kmem_free(kernel_map, (vm_offset_t)p->p_addr, ctob(UPAGES)); 953 954 p->p_addr = (struct user *) &nullpcb; 955 splclock(); 956 swtch(); 957 /* NOTREACHED */ 958} 959#else 960void 961cpu_exit(p) 962 register struct proc *p; 963{ 964 965#if NNPX > 0 966 npxexit(p); 967#endif /* NNPX */ 968 splclock(); 969 curproc = 0; 970 swtch(); 971 /* 972 * This is to shutup the compiler, and if swtch() failed I suppose 973 * this would be a good thing. This keeps gcc happy because panic 974 * is a volatile void function as well. 975 */ 976 panic("cpu_exit"); 977} 978 979void 980cpu_wait(p) struct proc *p; { 981/* extern vm_map_t upages_map; */ 982 extern char kstack[]; 983 984 /* drop per-process resources */ 985 pmap_remove(vm_map_pmap(kernel_map), (vm_offset_t) p->p_addr, 986 ((vm_offset_t) p->p_addr) + ctob(UPAGES)); 987 kmem_free(kernel_map, (vm_offset_t)p->p_addr, ctob(UPAGES)); 988 vmspace_free(p->p_vmspace); 989} 990#endif 991 992/* 993 * Set a red zone in the kernel stack after the u. area. 994 */ 995void 996setredzone(pte, vaddr) 997 u_short *pte; 998 caddr_t vaddr; 999{ 1000/* eventually do this by setting up an expand-down stack segment 1001 for ss0: selector, allowing stack access down to top of u. 1002 this means though that protection violations need to be handled 1003 thru a double fault exception that must do an integral task 1004 switch to a known good context, within which a dump can be 1005 taken. a sensible scheme might be to save the initial context 1006 used by sched (that has physical memory mapped 1:1 at bottom) 1007 and take the dump while still in mapped mode */ 1008} 1009 1010/* 1011 * Convert kernel VA to physical address 1012 */ 1013u_long 1014kvtop(void *addr) 1015{ 1016 vm_offset_t va; 1017 1018 va = pmap_kextract((vm_offset_t)addr); 1019 if (va == 0) 1020 panic("kvtop: zero page frame"); 1021 return((int)va); 1022} 1023 1024extern vm_map_t phys_map; 1025 1026/* 1027 * Map an IO request into kernel virtual address space. 1028 * 1029 * All requests are (re)mapped into kernel VA space. 1030 * Notice that we use b_bufsize for the size of the buffer 1031 * to be mapped. b_bcount might be modified by the driver. 1032 */ 1033void 1034vmapbuf(bp) 1035 register struct buf *bp; 1036{ 1037 register int npf; 1038 register caddr_t addr; 1039 register long flags = bp->b_flags; 1040 struct proc *p; 1041 int off; 1042 vm_offset_t kva; 1043 register vm_offset_t pa; 1044 1045 if ((flags & B_PHYS) == 0) 1046 panic("vmapbuf"); 1047 addr = bp->b_saveaddr = bp->b_un.b_addr; 1048 off = (int)addr & PGOFSET; 1049 p = bp->b_proc; 1050 npf = btoc(round_page(bp->b_bufsize + off)); 1051 kva = kmem_alloc_wait(phys_map, ctob(npf)); 1052 bp->b_un.b_addr = (caddr_t) (kva + off); 1053 while (npf--) { 1054 pa = pmap_extract(&p->p_vmspace->vm_pmap, (vm_offset_t)addr); 1055 if (pa == 0) 1056 panic("vmapbuf: null page frame"); 1057 pmap_kenter(kva, trunc_page(pa)); 1058 addr += PAGE_SIZE; 1059 kva += PAGE_SIZE; 1060 } 1061 pmap_update(); 1062} 1063 1064/* 1065 * Free the io map PTEs associated with this IO operation. 1066 * We also invalidate the TLB entries and restore the original b_addr. 1067 */ 1068void 1069vunmapbuf(bp) 1070 register struct buf *bp; 1071{ 1072 register int npf; 1073 register caddr_t addr = bp->b_un.b_addr; 1074 vm_offset_t kva; 1075 1076 if ((bp->b_flags & B_PHYS) == 0) 1077 panic("vunmapbuf"); 1078 npf = btoc(round_page(bp->b_bufsize + ((int)addr & PGOFSET))); 1079 kva = (vm_offset_t)((int)addr & ~PGOFSET); 1080 kmem_free_wakeup(phys_map, kva, ctob(npf)); 1081 bp->b_un.b_addr = bp->b_saveaddr; 1082 bp->b_saveaddr = NULL; 1083} 1084 1085/* 1086 * Force reset the processor by invalidating the entire address space! 1087 */ 1088void 1089cpu_reset() { 1090 1091 /* force a shutdown by unmapping entire address space ! */ 1092 bzero((caddr_t) PTD, NBPG); 1093 1094 /* "good night, sweet prince .... <THUNK!>" */ 1095 tlbflush(); 1096 /* NOTREACHED */ 1097 while(1); 1098} 1099 1100/* 1101 * Grow the user stack to allow for 'sp'. This version grows the stack in 1102 * chunks of SGROWSIZ. 1103 */ 1104int 1105grow(p, sp) 1106 struct proc *p; 1107 int sp; 1108{ 1109 unsigned int nss; 1110 caddr_t v; 1111 struct vmspace *vm = p->p_vmspace; 1112 1113 if ((caddr_t)sp <= vm->vm_maxsaddr || (unsigned)sp >= (unsigned)USRSTACK) 1114 return (1); 1115 1116 nss = roundup(USRSTACK - (unsigned)sp, PAGE_SIZE); 1117 1118 if (nss > p->p_rlimit[RLIMIT_STACK].rlim_cur) 1119 return (0); 1120 1121 if (vm->vm_ssize && roundup(vm->vm_ssize << PAGE_SHIFT, 1122 SGROWSIZ) < nss) { 1123 int grow_amount; 1124 /* 1125 * If necessary, grow the VM that the stack occupies 1126 * to allow for the rlimit. This allows us to not have 1127 * to allocate all of the VM up-front in execve (which 1128 * is expensive). 1129 * Grow the VM by the amount requested rounded up to 1130 * the nearest SGROWSIZ to provide for some hysteresis. 1131 */ 1132 grow_amount = roundup((nss - (vm->vm_ssize << PAGE_SHIFT)), SGROWSIZ); 1133 v = (char *)USRSTACK - roundup(vm->vm_ssize << PAGE_SHIFT, 1134 SGROWSIZ) - grow_amount; 1135 /* 1136 * If there isn't enough room to extend by SGROWSIZ, then 1137 * just extend to the maximum size 1138 */ 1139 if (v < vm->vm_maxsaddr) { 1140 v = vm->vm_maxsaddr; 1141 grow_amount = MAXSSIZ - (vm->vm_ssize << PAGE_SHIFT); 1142 } 1143 if (vm_allocate(&vm->vm_map, (vm_offset_t *)&v, 1144 grow_amount, FALSE) != KERN_SUCCESS) { 1145 return (0); 1146 } 1147 vm->vm_ssize += grow_amount >> PAGE_SHIFT; 1148 } 1149 1150 return (1); 1151} 1152