vm_machdep.c revision 1379
1/*- 2 * Copyright (c) 1982, 1986 The Regents of the University of California. 3 * Copyright (c) 1989, 1990 William Jolitz 4 * Copyright (c) 1994 John Dyson 5 * All rights reserved. 6 * 7 * This code is derived from software contributed to Berkeley by 8 * the Systems Programming Group of the University of Utah Computer 9 * Science Department, and William Jolitz. 10 * 11 * Redistribution and use in source and binary forms, with or without 12 * modification, are permitted provided that the following conditions 13 * are met: 14 * 1. Redistributions of source code must retain the above copyright 15 * notice, this list of conditions and the following disclaimer. 16 * 2. Redistributions in binary form must reproduce the above copyright 17 * notice, this list of conditions and the following disclaimer in the 18 * documentation and/or other materials provided with the distribution. 19 * 3. All advertising materials mentioning features or use of this software 20 * must display the following acknowledgement: 21 * This product includes software developed by the University of 22 * California, Berkeley and its contributors. 23 * 4. Neither the name of the University nor the names of its contributors 24 * may be used to endorse or promote products derived from this software 25 * without specific prior written permission. 26 * 27 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 28 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 29 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 30 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 31 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 32 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 33 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 34 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 35 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 36 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 37 * SUCH DAMAGE. 38 * 39 * from: @(#)vm_machdep.c 7.3 (Berkeley) 5/13/91 40 * Utah $Hdr: vm_machdep.c 1.16.1.1 89/06/23$ 41 * $Id: vm_machdep.c,v 1.19 1994/04/14 07:49:40 davidg Exp $ 42 */ 43 44#include "npx.h" 45#include "param.h" 46#include "systm.h" 47#include "proc.h" 48#include "malloc.h" 49#include "buf.h" 50#include "user.h" 51 52#include "../include/cpu.h" 53 54#include "vm/vm.h" 55#include "vm/vm_kern.h" 56 57#define b_cylin b_resid 58 59#define MAXCLSTATS 256 60int clstats[MAXCLSTATS]; 61int rqstats[MAXCLSTATS]; 62 63 64#ifndef NOBOUNCE 65 66caddr_t bouncememory; 67vm_offset_t bouncepa, bouncepaend; 68int bouncepages, bpwait; 69vm_map_t io_map; 70int bmwait, bmfreeing; 71 72#define BITS_IN_UNSIGNED (8*sizeof(unsigned)) 73int bounceallocarraysize; 74unsigned *bounceallocarray; 75int bouncefree; 76 77#define SIXTEENMEG (4096*4096) 78#define MAXBKVA 1024 79 80/* special list that can be used at interrupt time for eventual kva free */ 81struct kvasfree { 82 vm_offset_t addr; 83 vm_offset_t size; 84} kvaf[MAXBKVA]; 85 86int kvasfreecnt; 87 88vm_offset_t vm_bounce_kva(); 89/* 90 * get bounce buffer pages (count physically contiguous) 91 * (only 1 inplemented now) 92 */ 93vm_offset_t 94vm_bounce_page_find(count) 95 int count; 96{ 97 int bit; 98 int s,i; 99 100 if (count != 1) 101 panic("vm_bounce_page_find -- no support for > 1 page yet!!!"); 102 103 s = splbio(); 104retry: 105 for (i = 0; i < bounceallocarraysize; i++) { 106 if (bounceallocarray[i] != 0xffffffff) { 107 if (bit = ffs(~bounceallocarray[i])) { 108 bounceallocarray[i] |= 1 << (bit - 1) ; 109 bouncefree -= count; 110 splx(s); 111 return bouncepa + (i * BITS_IN_UNSIGNED + (bit - 1)) * NBPG; 112 } 113 } 114 } 115 bpwait = 1; 116 tsleep((caddr_t) &bounceallocarray, PRIBIO, "bncwai", 0); 117 goto retry; 118} 119 120void 121vm_bounce_kva_free(addr, size, now) 122 vm_offset_t addr; 123 vm_offset_t size; 124 int now; 125{ 126 int s = splbio(); 127 kvaf[kvasfreecnt].addr = addr; 128 kvaf[kvasfreecnt++].size = size; 129 if( now) { 130 /* 131 * this will do wakeups 132 */ 133 vm_bounce_kva(0,0); 134 } else { 135 if (bmwait) { 136 /* 137 * if anyone is waiting on the bounce-map, then wakeup 138 */ 139 wakeup((caddr_t) io_map); 140 bmwait = 0; 141 } 142 } 143 splx(s); 144} 145 146/* 147 * free count bounce buffer pages 148 */ 149void 150vm_bounce_page_free(pa, count) 151 vm_offset_t pa; 152 int count; 153{ 154 int allocindex; 155 int index; 156 int bit; 157 158 if (count != 1) 159 panic("vm_bounce_page_free -- no support for > 1 page yet!!!\n"); 160 161 index = (pa - bouncepa) / NBPG; 162 163 if ((index < 0) || (index >= bouncepages)) 164 panic("vm_bounce_page_free -- bad index\n"); 165 166 allocindex = index / BITS_IN_UNSIGNED; 167 bit = index % BITS_IN_UNSIGNED; 168 169 bounceallocarray[allocindex] &= ~(1 << bit); 170 171 bouncefree += count; 172 if (bpwait) { 173 bpwait = 0; 174 wakeup((caddr_t) &bounceallocarray); 175 } 176} 177 178/* 179 * allocate count bounce buffer kva pages 180 */ 181vm_offset_t 182vm_bounce_kva(count, waitok) 183 int count; 184 int waitok; 185{ 186 int tofree; 187 int i; 188 int startfree; 189 vm_offset_t kva = 0; 190 int s = splbio(); 191 int size = count; 192 startfree = 0; 193more: 194 if (!bmfreeing && (tofree = kvasfreecnt)) { 195 bmfreeing = 1; 196 for (i = startfree; i < kvasfreecnt; i++) { 197 /* 198 * if we have a kva of the right size, no sense 199 * in freeing/reallocating... 200 * might affect fragmentation short term, but 201 * as long as the amount of io_map is 202 * significantly more than the maximum transfer 203 * size, I don't think that it is a problem. 204 */ 205 pmap_remove(kernel_pmap, 206 kvaf[i].addr, kvaf[i].addr + kvaf[i].size); 207 if( size && !kva && kvaf[i].size == size) { 208 kva = kvaf[i].addr; 209 } else { 210 kmem_free_wakeup(io_map, kvaf[i].addr, 211 kvaf[i].size); 212 } 213 } 214 if (kvasfreecnt != tofree) { 215 startfree = i; 216 bmfreeing = 0; 217 goto more; 218 } 219 kvasfreecnt = 0; 220 bmfreeing = 0; 221 } 222 223 if( size == 0) { 224 splx(s); 225 return NULL; 226 } 227 228 if (!kva && !(kva = kmem_alloc_pageable(io_map, size))) { 229 if( !waitok) { 230 splx(s); 231 return NULL; 232 } 233 bmwait = 1; 234 tsleep((caddr_t) io_map, PRIBIO, "bmwait", 0); 235 goto more; 236 } 237 splx(s); 238 239 return kva; 240} 241 242/* 243 * same as vm_bounce_kva -- but really allocate 244 */ 245vm_offset_t 246vm_bounce_kva_alloc(count) 247int count; 248{ 249 int i; 250 vm_offset_t kva; 251 vm_offset_t pa; 252 if( bouncepages == 0) { 253 kva = (vm_offset_t) malloc(count*NBPG, M_TEMP, M_WAITOK); 254 return kva; 255 } 256 kva = vm_bounce_kva(count, 1); 257 for(i=0;i<count;i++) { 258 pa = vm_bounce_page_find(1); 259 pmap_kenter(kva + i * NBPG, pa); 260 } 261 return kva; 262} 263 264/* 265 * same as vm_bounce_kva_free -- but really free 266 */ 267void 268vm_bounce_kva_alloc_free(kva, count) 269 vm_offset_t kva; 270 int count; 271{ 272 int i; 273 vm_offset_t pa; 274 if( bouncepages == 0) { 275 free((caddr_t) kva, M_TEMP); 276 return; 277 } 278 for(i = 0; i < count; i++) { 279 pa = pmap_kextract(kva + i * NBPG); 280 vm_bounce_page_free(pa, 1); 281 } 282 vm_bounce_kva_free(kva, count); 283} 284 285/* 286 * do the things necessary to the struct buf to implement 287 * bounce buffers... inserted before the disk sort 288 */ 289void 290vm_bounce_alloc(bp) 291 struct buf *bp; 292{ 293 int countvmpg; 294 vm_offset_t vastart, vaend; 295 vm_offset_t vapstart, vapend; 296 vm_offset_t va, kva; 297 vm_offset_t pa; 298 int dobounceflag = 0; 299 int bounceindex; 300 int i; 301 int s; 302 303 if (bouncepages == 0) 304 return; 305 306 if (bp->b_bufsize < bp->b_bcount) { 307 printf("vm_bounce_alloc: b_bufsize(%d) < b_bcount(%d) !!!!\n", 308 bp->b_bufsize, bp->b_bcount); 309 bp->b_bufsize = bp->b_bcount; 310 } 311 312 vastart = (vm_offset_t) bp->b_un.b_addr; 313 vaend = (vm_offset_t) bp->b_un.b_addr + bp->b_bufsize; 314 315 vapstart = i386_trunc_page(vastart); 316 vapend = i386_round_page(vaend); 317 countvmpg = (vapend - vapstart) / NBPG; 318 319/* 320 * if any page is above 16MB, then go into bounce-buffer mode 321 */ 322 va = vapstart; 323 for (i = 0; i < countvmpg; i++) { 324 pa = pmap_kextract(va); 325 if (pa >= SIXTEENMEG) 326 ++dobounceflag; 327 va += NBPG; 328 } 329 if (dobounceflag == 0) 330 return; 331 332 if (bouncepages < dobounceflag) 333 panic("Not enough bounce buffers!!!"); 334 335/* 336 * allocate a replacement kva for b_addr 337 */ 338 kva = vm_bounce_kva(countvmpg*NBPG, 1); 339 va = vapstart; 340 for (i = 0; i < countvmpg; i++) { 341 pa = pmap_kextract(va); 342 if (pa >= SIXTEENMEG) { 343 /* 344 * allocate a replacement page 345 */ 346 vm_offset_t bpa = vm_bounce_page_find(1); 347 pmap_kenter(kva + (NBPG * i), bpa); 348 /* 349 * if we are writing, the copy the data into the page 350 */ 351 if ((bp->b_flags & B_READ) == 0) { 352 pmap_update(); 353 bcopy((caddr_t) va, (caddr_t) kva + (NBPG * i), NBPG); 354 } 355 } else { 356 /* 357 * use original page 358 */ 359 pmap_kenter(kva + (NBPG * i), pa); 360 } 361 va += NBPG; 362 } 363 pmap_update(); 364 365/* 366 * flag the buffer as being bounced 367 */ 368 bp->b_flags |= B_BOUNCE; 369/* 370 * save the original buffer kva 371 */ 372 bp->b_savekva = bp->b_un.b_addr; 373/* 374 * put our new kva into the buffer (offset by original offset) 375 */ 376 bp->b_un.b_addr = (caddr_t) (((vm_offset_t) kva) | 377 ((vm_offset_t) bp->b_savekva & (NBPG - 1))); 378 return; 379} 380 381/* 382 * hook into biodone to free bounce buffer 383 */ 384void 385vm_bounce_free(bp) 386 struct buf *bp; 387{ 388 int i; 389 vm_offset_t origkva, bouncekva; 390 vm_offset_t vastart, vaend; 391 vm_offset_t vapstart, vapend; 392 int countbounce = 0; 393 vm_offset_t firstbouncepa = 0; 394 int firstbounceindex; 395 int countvmpg; 396 vm_offset_t bcount; 397 int s; 398 399/* 400 * if this isn't a bounced buffer, then just return 401 */ 402 if ((bp->b_flags & B_BOUNCE) == 0) 403 return; 404 405 origkva = (vm_offset_t) bp->b_savekva; 406 bouncekva = (vm_offset_t) bp->b_un.b_addr; 407 408 vastart = bouncekva; 409 vaend = bouncekva + bp->b_bufsize; 410 bcount = bp->b_bufsize; 411 412 vapstart = i386_trunc_page(vastart); 413 vapend = i386_round_page(vaend); 414 415 countvmpg = (vapend - vapstart) / NBPG; 416 417/* 418 * check every page in the kva space for b_addr 419 */ 420 for (i = 0; i < countvmpg; i++) { 421 vm_offset_t mybouncepa; 422 vm_offset_t copycount; 423 424 copycount = i386_round_page(bouncekva + 1) - bouncekva; 425 mybouncepa = pmap_kextract(i386_trunc_page(bouncekva)); 426 427/* 428 * if this is a bounced pa, then process as one 429 */ 430 if ((mybouncepa >= bouncepa) && (mybouncepa < bouncepaend)) { 431 if (copycount > bcount) 432 copycount = bcount; 433/* 434 * if this is a read, then copy from bounce buffer into original buffer 435 */ 436 if (bp->b_flags & B_READ) 437 bcopy((caddr_t) bouncekva, (caddr_t) origkva, copycount); 438/* 439 * free the bounce allocation 440 */ 441 vm_bounce_page_free(i386_trunc_page(mybouncepa), 1); 442 } 443 444 origkva += copycount; 445 bouncekva += copycount; 446 bcount -= copycount; 447 } 448 449/* 450 * add the old kva into the "to free" list 451 */ 452 bouncekva = i386_trunc_page((vm_offset_t) bp->b_un.b_addr); 453 vm_bounce_kva_free( bouncekva, countvmpg*NBPG, 0); 454 bp->b_un.b_addr = bp->b_savekva; 455 bp->b_savekva = 0; 456 bp->b_flags &= ~B_BOUNCE; 457 458 return; 459} 460 461#endif /* NOBOUNCE */ 462 463/* 464 * init the bounce buffer system 465 */ 466void 467vm_bounce_init() 468{ 469 vm_offset_t minaddr, maxaddr; 470 471 io_map = kmem_suballoc(kernel_map, &minaddr, &maxaddr, MAXBKVA * NBPG, FALSE); 472 kvasfreecnt = 0; 473 474#ifndef NOBOUNCE 475 if (bouncepages == 0) 476 return; 477 478 bounceallocarraysize = (bouncepages + BITS_IN_UNSIGNED - 1) / BITS_IN_UNSIGNED; 479 bounceallocarray = malloc(bounceallocarraysize * sizeof(unsigned), M_TEMP, M_NOWAIT); 480 481 if (!bounceallocarray) 482 panic("Cannot allocate bounce resource array\n"); 483 484 bzero(bounceallocarray, bounceallocarraysize * sizeof(long)); 485 486 487 bouncepa = pmap_kextract((vm_offset_t) bouncememory); 488 bouncepaend = bouncepa + bouncepages * NBPG; 489 bouncefree = bouncepages; 490#endif 491 492} 493 494 495static void 496cldiskvamerge( kvanew, orig1, orig1cnt, orig2, orig2cnt) 497 vm_offset_t kvanew; 498 vm_offset_t orig1, orig1cnt; 499 vm_offset_t orig2, orig2cnt; 500{ 501 int i; 502 vm_offset_t pa; 503/* 504 * enter the transfer physical addresses into the new kva 505 */ 506 for(i=0;i<orig1cnt;i++) { 507 vm_offset_t pa; 508 pa = pmap_kextract((caddr_t) orig1 + i * PAGE_SIZE); 509 pmap_kenter(kvanew + i * PAGE_SIZE, pa); 510 } 511 512 for(i=0;i<orig2cnt;i++) { 513 vm_offset_t pa; 514 pa = pmap_kextract((caddr_t) orig2 + i * PAGE_SIZE); 515 pmap_kenter(kvanew + (i + orig1cnt) * PAGE_SIZE, pa); 516 } 517 pmap_update(); 518} 519 520void 521cldisksort(struct buf *dp, struct buf *bp, vm_offset_t maxio) 522{ 523 register struct buf *ap, *newbp; 524 int i, trycount=0; 525 vm_offset_t orig1pages, orig2pages; 526 vm_offset_t orig1begin, orig2begin; 527 vm_offset_t kvanew, kvaorig; 528 529 if( bp->b_bcount < MAXCLSTATS*PAGE_SIZE) 530 ++rqstats[bp->b_bcount/PAGE_SIZE]; 531 /* 532 * If nothing on the activity queue, then 533 * we become the only thing. 534 */ 535 ap = dp->b_actf; 536 if(ap == NULL) { 537 dp->b_actf = bp; 538 dp->b_actl = bp; 539 bp->av_forw = NULL; 540 return; 541 } 542 543 /* 544 * If we lie after the first (currently active) 545 * request, then we must locate the second request list 546 * and add ourselves to it. 547 */ 548 549 if (bp->b_pblkno < ap->b_pblkno) { 550 while (ap->av_forw) { 551 /* 552 * Check for an ``inversion'' in the 553 * normally ascending block numbers, 554 * indicating the start of the second request list. 555 */ 556 if (ap->av_forw->b_pblkno < ap->b_pblkno) { 557 /* 558 * Search the second request list 559 * for the first request at a larger 560 * block number. We go before that; 561 * if there is no such request, we go at end. 562 */ 563 do { 564 if (bp->b_pblkno < ap->av_forw->b_pblkno) 565 goto insert; 566 ap = ap->av_forw; 567 } while (ap->av_forw); 568 goto insert; /* after last */ 569 } 570 ap = ap->av_forw; 571 } 572 /* 573 * No inversions... we will go after the last, and 574 * be the first request in the second request list. 575 */ 576 goto insert; 577 } 578 /* 579 * Request is at/after the current request... 580 * sort in the first request list. 581 */ 582 while (ap->av_forw) { 583 /* 584 * We want to go after the current request 585 * if there is an inversion after it (i.e. it is 586 * the end of the first request list), or if 587 * the next request is a larger block than our request. 588 */ 589 if (ap->av_forw->b_pblkno < ap->b_pblkno || 590 bp->b_pblkno < ap->av_forw->b_pblkno ) 591 goto insert; 592 ap = ap->av_forw; 593 } 594 595insert: 596 597#if 0 598 /* 599 * read clustering with new read-ahead disk drives hurts mostly, so 600 * we don't bother... 601 */ 602 if( bp->b_flags & B_READ) 603 goto nocluster; 604#endif 605 /* 606 * we currently only cluster I/O transfers that are at page-aligned 607 * kvas and transfers that are multiples of page lengths. 608 */ 609 if ((bp->b_flags & B_BAD) == 0 && 610 ((bp->b_bcount & PAGE_MASK) == 0) && 611 (((vm_offset_t) bp->b_un.b_addr & PAGE_MASK) == 0)) { 612 if( maxio > MAXCLSTATS*PAGE_SIZE) 613 maxio = MAXCLSTATS*PAGE_SIZE; 614 /* 615 * merge with previous? 616 * conditions: 617 * 1) We reside physically immediately after the previous block. 618 * 2) The previous block is not first on the device queue because 619 * such a block might be active. 620 * 3) The mode of the two I/Os is identical. 621 * 4) The previous kva is page aligned and the previous transfer 622 * is a multiple of a page in length. 623 * 5) And the total I/O size would be below the maximum. 624 */ 625 if( (ap->b_pblkno + (ap->b_bcount / DEV_BSIZE) == bp->b_pblkno) && 626 (dp->b_actf != ap) && 627 ((ap->b_flags & ~B_CLUSTER) == bp->b_flags) && 628 ((ap->b_flags & B_BAD) == 0) && 629 ((ap->b_bcount & PAGE_MASK) == 0) && 630 (((vm_offset_t) ap->b_un.b_addr & PAGE_MASK) == 0) && 631 (ap->b_bcount + bp->b_bcount < maxio)) { 632 633 orig1begin = (vm_offset_t) ap->b_un.b_addr; 634 orig1pages = ap->b_bcount / PAGE_SIZE; 635 636 orig2begin = (vm_offset_t) bp->b_un.b_addr; 637 orig2pages = bp->b_bcount / PAGE_SIZE; 638 /* 639 * see if we can allocate a kva, if we cannot, the don't 640 * cluster. 641 */ 642 kvanew = vm_bounce_kva( PAGE_SIZE * (orig1pages + orig2pages), 0); 643 if( !kvanew) { 644 goto nocluster; 645 } 646 647 648 if( (ap->b_flags & B_CLUSTER) == 0) { 649 650 /* 651 * get a physical buf pointer 652 */ 653 newbp = (struct buf *)trypbuf(); 654 if( !newbp) { 655 vm_bounce_kva_free( kvanew, PAGE_SIZE * (orig1pages + orig2pages), 1); 656 goto nocluster; 657 } 658 659 cldiskvamerge( kvanew, orig1begin, orig1pages, orig2begin, orig2pages); 660 661 /* 662 * build the new bp to be handed off to the device 663 */ 664 665 --clstats[ap->b_bcount/PAGE_SIZE]; 666 *newbp = *ap; 667 newbp->b_flags |= B_CLUSTER; 668 newbp->b_un.b_addr = (caddr_t) kvanew; 669 newbp->b_bcount += bp->b_bcount; 670 newbp->b_bufsize = newbp->b_bcount; 671 newbp->b_clusterf = ap; 672 newbp->b_clusterl = bp; 673 ++clstats[newbp->b_bcount/PAGE_SIZE]; 674 675 /* 676 * enter the new bp onto the device queue 677 */ 678 if( ap->av_forw) 679 ap->av_forw->av_back = newbp; 680 else 681 dp->b_actl = newbp; 682 683 if( dp->b_actf != ap ) 684 ap->av_back->av_forw = newbp; 685 else 686 dp->b_actf = newbp; 687 688 /* 689 * enter the previous bps onto the cluster queue 690 */ 691 ap->av_forw = bp; 692 bp->av_back = ap; 693 694 ap->av_back = NULL; 695 bp->av_forw = NULL; 696 697 } else { 698 vm_offset_t addr; 699 700 cldiskvamerge( kvanew, orig1begin, orig1pages, orig2begin, orig2pages); 701 /* 702 * free the old kva 703 */ 704 vm_bounce_kva_free( orig1begin, ap->b_bufsize, 0); 705 --clstats[ap->b_bcount/PAGE_SIZE]; 706 707 ap->b_un.b_addr = (caddr_t) kvanew; 708 709 ap->b_clusterl->av_forw = bp; 710 bp->av_forw = NULL; 711 bp->av_back = ap->b_clusterl; 712 ap->b_clusterl = bp; 713 714 ap->b_bcount += bp->b_bcount; 715 ap->b_bufsize = ap->b_bcount; 716 ++clstats[ap->b_bcount/PAGE_SIZE]; 717 } 718 return; 719 /* 720 * merge with next? 721 * conditions: 722 * 1) We reside physically before the next block. 723 * 3) The mode of the two I/Os is identical. 724 * 4) The next kva is page aligned and the next transfer 725 * is a multiple of a page in length. 726 * 5) And the total I/O size would be below the maximum. 727 */ 728 } else if( ap->av_forw && 729 (bp->b_pblkno + (bp->b_bcount / DEV_BSIZE) == ap->av_forw->b_pblkno) && 730 (bp->b_flags == (ap->av_forw->b_flags & ~B_CLUSTER)) && 731 ((ap->av_forw->b_flags & B_BAD) == 0) && 732 ((ap->av_forw->b_bcount & PAGE_MASK) == 0) && 733 (((vm_offset_t) ap->av_forw->b_un.b_addr & PAGE_MASK) == 0) && 734 (ap->av_forw->b_bcount + bp->b_bcount < maxio)) { 735 736 orig1begin = (vm_offset_t) bp->b_un.b_addr; 737 orig1pages = bp->b_bcount / PAGE_SIZE; 738 739 orig2begin = (vm_offset_t) ap->av_forw->b_un.b_addr; 740 orig2pages = ap->av_forw->b_bcount / PAGE_SIZE; 741 742 /* 743 * see if we can allocate a kva, if we cannot, the don't 744 * cluster. 745 */ 746 kvanew = vm_bounce_kva( PAGE_SIZE * (orig1pages + orig2pages), 0); 747 if( !kvanew) { 748 goto nocluster; 749 } 750 751 /* 752 * if next isn't a cluster we need to create one 753 */ 754 if( (ap->av_forw->b_flags & B_CLUSTER) == 0) { 755 756 /* 757 * get a physical buf pointer 758 */ 759 newbp = (struct buf *)trypbuf(); 760 if( !newbp) { 761 vm_bounce_kva_free( kvanew, PAGE_SIZE * (orig1pages + orig2pages), 1); 762 goto nocluster; 763 } 764 765 cldiskvamerge( kvanew, orig1begin, orig1pages, orig2begin, orig2pages); 766 ap = ap->av_forw; 767 --clstats[ap->b_bcount/PAGE_SIZE]; 768 *newbp = *ap; 769 newbp->b_flags |= B_CLUSTER; 770 newbp->b_un.b_addr = (caddr_t) kvanew; 771 newbp->b_blkno = bp->b_blkno; 772 newbp->b_pblkno = bp->b_pblkno; 773 newbp->b_bcount += bp->b_bcount; 774 newbp->b_bufsize = newbp->b_bcount; 775 newbp->b_clusterf = bp; 776 newbp->b_clusterl = ap; 777 ++clstats[newbp->b_bcount/PAGE_SIZE]; 778 779 if( ap->av_forw) 780 ap->av_forw->av_back = newbp; 781 else 782 dp->b_actl = newbp; 783 784 if( dp->b_actf != ap ) 785 ap->av_back->av_forw = newbp; 786 else 787 dp->b_actf = newbp; 788 789 bp->av_forw = ap; 790 ap->av_back = bp; 791 792 bp->av_back = NULL; 793 ap->av_forw = NULL; 794 } else { 795 vm_offset_t addr; 796 797 cldiskvamerge( kvanew, orig1begin, orig1pages, orig2begin, orig2pages); 798 ap = ap->av_forw; 799 vm_bounce_kva_free( orig2begin, ap->b_bufsize, 0); 800 801 ap->b_un.b_addr = (caddr_t) kvanew; 802 bp->av_forw = ap->b_clusterf; 803 ap->b_clusterf->av_back = bp; 804 ap->b_clusterf = bp; 805 bp->av_back = NULL; 806 --clstats[ap->b_bcount/PAGE_SIZE]; 807 808 ap->b_blkno = bp->b_blkno; 809 ap->b_pblkno = bp->b_pblkno; 810 ap->b_bcount += bp->b_bcount; 811 ap->b_bufsize = ap->b_bcount; 812 ++clstats[ap->b_bcount/PAGE_SIZE]; 813 814 } 815 return; 816 } 817 } 818 /* 819 * don't merge 820 */ 821nocluster: 822 ++clstats[bp->b_bcount/PAGE_SIZE]; 823 bp->av_forw = ap->av_forw; 824 if( bp->av_forw) 825 bp->av_forw->av_back = bp; 826 else 827 dp->b_actl = bp; 828 829 ap->av_forw = bp; 830 bp->av_back = ap; 831} 832 833 834/* 835 * Finish a fork operation, with process p2 nearly set up. 836 * Copy and update the kernel stack and pcb, making the child 837 * ready to run, and marking it so that it can return differently 838 * than the parent. Returns 1 in the child process, 0 in the parent. 839 * We currently double-map the user area so that the stack is at the same 840 * address in each process; in the future we will probably relocate 841 * the frame pointers on the stack after copying. 842 */ 843int 844cpu_fork(p1, p2) 845 register struct proc *p1, *p2; 846{ 847 register struct user *up = p2->p_addr; 848 int foo, offset, addr, i; 849 extern char kstack[]; 850 extern int mvesp(); 851 852 /* 853 * Copy pcb and stack from proc p1 to p2. 854 * We do this as cheaply as possible, copying only the active 855 * part of the stack. The stack and pcb need to agree; 856 * this is tricky, as the final pcb is constructed by savectx, 857 * but its frame isn't yet on the stack when the stack is copied. 858 * swtch compensates for this when the child eventually runs. 859 * This should be done differently, with a single call 860 * that copies and updates the pcb+stack, 861 * replacing the bcopy and savectx. 862 */ 863 p2->p_addr->u_pcb = p1->p_addr->u_pcb; 864 offset = mvesp() - (int)kstack; 865 bcopy((caddr_t)kstack + offset, (caddr_t)p2->p_addr + offset, 866 (unsigned) ctob(UPAGES) - offset); 867 p2->p_regs = p1->p_regs; 868 869 /* 870 * Wire top of address space of child to it's kstack. 871 * First, fault in a page of pte's to map it. 872 */ 873#if 0 874 addr = trunc_page((u_int)vtopte(kstack)); 875 vm_map_pageable(&p2->p_vmspace->vm_map, addr, addr+NBPG, FALSE); 876 for (i=0; i < UPAGES; i++) 877 pmap_enter(&p2->p_vmspace->vm_pmap, kstack+i*NBPG, 878 pmap_extract(kernel_pmap, ((int)p2->p_addr)+i*NBPG), 879 /* 880 * The user area has to be mapped writable because 881 * it contains the kernel stack (when CR0_WP is on 882 * on a 486 there is no user-read/kernel-write 883 * mode). It is protected from user mode access 884 * by the segment limits. 885 */ 886 VM_PROT_READ|VM_PROT_WRITE, TRUE); 887#endif 888 pmap_activate(&p2->p_vmspace->vm_pmap, &up->u_pcb); 889 890 /* 891 * 892 * Arrange for a non-local goto when the new process 893 * is started, to resume here, returning nonzero from setjmp. 894 */ 895 if (savectx(up, 1)) { 896 /* 897 * Return 1 in child. 898 */ 899 return (1); 900 } 901 return (0); 902} 903 904#ifdef notyet 905/* 906 * cpu_exit is called as the last action during exit. 907 * 908 * We change to an inactive address space and a "safe" stack, 909 * passing thru an argument to the new stack. Now, safely isolated 910 * from the resources we're shedding, we release the address space 911 * and any remaining machine-dependent resources, including the 912 * memory for the user structure and kernel stack. 913 * 914 * Next, we assign a dummy context to be written over by swtch, 915 * calling it to send this process off to oblivion. 916 * [The nullpcb allows us to minimize cost in swtch() by not having 917 * a special case]. 918 */ 919struct proc *swtch_to_inactive(); 920volatile void 921cpu_exit(p) 922 register struct proc *p; 923{ 924 static struct pcb nullpcb; /* pcb to overwrite on last swtch */ 925 926#if NNPX > 0 927 npxexit(p); 928#endif /* NNPX */ 929 930 /* move to inactive space and stack, passing arg accross */ 931 p = swtch_to_inactive(p); 932 933 /* drop per-process resources */ 934 vmspace_free(p->p_vmspace); 935 kmem_free(kernel_map, (vm_offset_t)p->p_addr, ctob(UPAGES)); 936 937 p->p_addr = (struct user *) &nullpcb; 938 splclock(); 939 swtch(); 940 /* NOTREACHED */ 941} 942#else 943void 944cpu_exit(p) 945 register struct proc *p; 946{ 947 948#if NNPX > 0 949 npxexit(p); 950#endif /* NNPX */ 951 splclock(); 952 curproc = 0; 953 swtch(); 954 /* 955 * This is to shutup the compiler, and if swtch() failed I suppose 956 * this would be a good thing. This keeps gcc happy because panic 957 * is a volatile void function as well. 958 */ 959 panic("cpu_exit"); 960} 961 962void 963cpu_wait(p) struct proc *p; { 964/* extern vm_map_t upages_map; */ 965 extern char kstack[]; 966 967 /* drop per-process resources */ 968 pmap_remove(vm_map_pmap(kernel_map), (vm_offset_t) p->p_addr, 969 ((vm_offset_t) p->p_addr) + ctob(UPAGES)); 970 kmem_free(kernel_map, (vm_offset_t)p->p_addr, ctob(UPAGES)); 971 vmspace_free(p->p_vmspace); 972} 973#endif 974 975/* 976 * Set a red zone in the kernel stack after the u. area. 977 */ 978void 979setredzone(pte, vaddr) 980 u_short *pte; 981 caddr_t vaddr; 982{ 983/* eventually do this by setting up an expand-down stack segment 984 for ss0: selector, allowing stack access down to top of u. 985 this means though that protection violations need to be handled 986 thru a double fault exception that must do an integral task 987 switch to a known good context, within which a dump can be 988 taken. a sensible scheme might be to save the initial context 989 used by sched (that has physical memory mapped 1:1 at bottom) 990 and take the dump while still in mapped mode */ 991} 992 993/* 994 * Convert kernel VA to physical address 995 */ 996u_long 997kvtop(void *addr) 998{ 999 vm_offset_t va; 1000 1001 va = pmap_kextract((vm_offset_t)addr); 1002 if (va == 0) 1003 panic("kvtop: zero page frame"); 1004 return((int)va); 1005} 1006 1007extern vm_map_t phys_map; 1008 1009/* 1010 * Map an IO request into kernel virtual address space. 1011 * 1012 * All requests are (re)mapped into kernel VA space. 1013 * Notice that we use b_bufsize for the size of the buffer 1014 * to be mapped. b_bcount might be modified by the driver. 1015 */ 1016void 1017vmapbuf(bp) 1018 register struct buf *bp; 1019{ 1020 register int npf; 1021 register caddr_t addr; 1022 register long flags = bp->b_flags; 1023 struct proc *p; 1024 int off; 1025 vm_offset_t kva; 1026 register vm_offset_t pa; 1027 1028 if ((flags & B_PHYS) == 0) 1029 panic("vmapbuf"); 1030 addr = bp->b_saveaddr = bp->b_un.b_addr; 1031 off = (int)addr & PGOFSET; 1032 p = bp->b_proc; 1033 npf = btoc(round_page(bp->b_bufsize + off)); 1034 kva = kmem_alloc_wait(phys_map, ctob(npf)); 1035 bp->b_un.b_addr = (caddr_t) (kva + off); 1036 while (npf--) { 1037 pa = pmap_extract(&p->p_vmspace->vm_pmap, (vm_offset_t)addr); 1038 if (pa == 0) 1039 panic("vmapbuf: null page frame"); 1040 pmap_kenter(kva, trunc_page(pa)); 1041 addr += PAGE_SIZE; 1042 kva += PAGE_SIZE; 1043 } 1044 pmap_update(); 1045} 1046 1047/* 1048 * Free the io map PTEs associated with this IO operation. 1049 * We also invalidate the TLB entries and restore the original b_addr. 1050 */ 1051void 1052vunmapbuf(bp) 1053 register struct buf *bp; 1054{ 1055 register int npf; 1056 register caddr_t addr = bp->b_un.b_addr; 1057 vm_offset_t kva; 1058 1059 if ((bp->b_flags & B_PHYS) == 0) 1060 panic("vunmapbuf"); 1061 npf = btoc(round_page(bp->b_bufsize + ((int)addr & PGOFSET))); 1062 kva = (vm_offset_t)((int)addr & ~PGOFSET); 1063 kmem_free_wakeup(phys_map, kva, ctob(npf)); 1064 bp->b_un.b_addr = bp->b_saveaddr; 1065 bp->b_saveaddr = NULL; 1066} 1067 1068/* 1069 * Force reset the processor by invalidating the entire address space! 1070 */ 1071void 1072cpu_reset() { 1073 1074 /* force a shutdown by unmapping entire address space ! */ 1075 bzero((caddr_t) PTD, NBPG); 1076 1077 /* "good night, sweet prince .... <THUNK!>" */ 1078 tlbflush(); 1079 /* NOTREACHED */ 1080 while(1); 1081} 1082 1083/* 1084 * Grow the user stack to allow for 'sp'. This version grows the stack in 1085 * chunks of SGROWSIZ. 1086 */ 1087int 1088grow(p, sp) 1089 struct proc *p; 1090 int sp; 1091{ 1092 unsigned int nss; 1093 caddr_t v; 1094 struct vmspace *vm = p->p_vmspace; 1095 1096 if ((caddr_t)sp <= vm->vm_maxsaddr || (unsigned)sp >= (unsigned)USRSTACK) 1097 return (1); 1098 1099 nss = roundup(USRSTACK - (unsigned)sp, PAGE_SIZE); 1100 1101 if (nss > p->p_rlimit[RLIMIT_STACK].rlim_cur) 1102 return (0); 1103 1104 if (vm->vm_ssize && roundup(vm->vm_ssize << PAGE_SHIFT, 1105 SGROWSIZ) < nss) { 1106 int grow_amount; 1107 /* 1108 * If necessary, grow the VM that the stack occupies 1109 * to allow for the rlimit. This allows us to not have 1110 * to allocate all of the VM up-front in execve (which 1111 * is expensive). 1112 * Grow the VM by the amount requested rounded up to 1113 * the nearest SGROWSIZ to provide for some hysteresis. 1114 */ 1115 grow_amount = roundup((nss - (vm->vm_ssize << PAGE_SHIFT)), SGROWSIZ); 1116 v = (char *)USRSTACK - roundup(vm->vm_ssize << PAGE_SHIFT, 1117 SGROWSIZ) - grow_amount; 1118 /* 1119 * If there isn't enough room to extend by SGROWSIZ, then 1120 * just extend to the maximum size 1121 */ 1122 if (v < vm->vm_maxsaddr) { 1123 v = vm->vm_maxsaddr; 1124 grow_amount = MAXSSIZ - (vm->vm_ssize << PAGE_SHIFT); 1125 } 1126 if (vm_allocate(&vm->vm_map, (vm_offset_t *)&v, 1127 grow_amount, FALSE) != KERN_SUCCESS) { 1128 return (0); 1129 } 1130 vm->vm_ssize += grow_amount >> PAGE_SHIFT; 1131 } 1132 1133 return (1); 1134} 1135