1/* $OpenBSD: uvm_pager.c,v 1.91 2023/08/11 17:53:22 mpi Exp $ */ 2/* $NetBSD: uvm_pager.c,v 1.36 2000/11/27 18:26:41 chs Exp $ */ 3 4/* 5 * Copyright (c) 1997 Charles D. Cranor and Washington University. 6 * All rights reserved. 7 * 8 * Redistribution and use in source and binary forms, with or without 9 * modification, are permitted provided that the following conditions 10 * are met: 11 * 1. Redistributions of source code must retain the above copyright 12 * notice, this list of conditions and the following disclaimer. 13 * 2. Redistributions in binary form must reproduce the above copyright 14 * notice, this list of conditions and the following disclaimer in the 15 * documentation and/or other materials provided with the distribution. 16 * 17 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR 18 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES 19 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. 20 * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, 21 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT 22 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 23 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 24 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 25 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF 26 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 27 * 28 * from: Id: uvm_pager.c,v 1.1.2.23 1998/02/02 20:38:06 chuck Exp 29 */ 30 31/* 32 * uvm_pager.c: generic functions used to assist the pagers. 33 */ 34 35#include <sys/param.h> 36#include <sys/systm.h> 37#include <sys/malloc.h> 38#include <sys/pool.h> 39#include <sys/buf.h> 40#include <sys/atomic.h> 41 42#include <uvm/uvm.h> 43 44const struct uvm_pagerops *uvmpagerops[] = { 45 &aobj_pager, 46 &uvm_deviceops, 47 &uvm_vnodeops, 48}; 49 50/* 51 * the pager map: provides KVA for I/O 52 * 53 * Each uvm_pseg has room for MAX_PAGERMAP_SEGS pager io space of 54 * MAXBSIZE bytes. 55 * 56 * The number of uvm_pseg instances is dynamic using an array segs. 57 * At most UVM_PSEG_COUNT instances can exist. 58 * 59 * psegs[0/1] always exist (so that the pager can always map in pages). 60 * psegs[0/1] element 0 are always reserved for the pagedaemon. 61 * 62 * Any other pseg is automatically created when no space is available 63 * and automatically destroyed when it is no longer in use. 64 */ 65#define MAX_PAGER_SEGS 16 66#define PSEG_NUMSEGS (PAGER_MAP_SIZE / MAX_PAGER_SEGS / MAXBSIZE) 67struct uvm_pseg { 68 /* Start of virtual space; 0 if not inited. */ 69 vaddr_t start; 70 /* Bitmap of the segments in use in this pseg. */ 71 int use; 72}; 73struct mutex uvm_pseg_lck; 74struct uvm_pseg psegs[PSEG_NUMSEGS]; 75 76#define UVM_PSEG_FULL(pseg) ((pseg)->use == (1 << MAX_PAGER_SEGS) - 1) 77#define UVM_PSEG_EMPTY(pseg) ((pseg)->use == 0) 78#define UVM_PSEG_INUSE(pseg,id) (((pseg)->use & (1 << (id))) != 0) 79 80void uvm_pseg_init(struct uvm_pseg *); 81vaddr_t uvm_pseg_get(int); 82void uvm_pseg_release(vaddr_t); 83 84/* 85 * uvm_pager_init: init pagers (at boot time) 86 */ 87void 88uvm_pager_init(void) 89{ 90 int lcv; 91 92 /* init pager map */ 93 uvm_pseg_init(&psegs[0]); 94 uvm_pseg_init(&psegs[1]); 95 mtx_init(&uvm_pseg_lck, IPL_VM); 96 97 /* init ASYNC I/O queue */ 98 TAILQ_INIT(&uvm.aio_done); 99 100 /* call pager init functions */ 101 for (lcv = 0 ; lcv < sizeof(uvmpagerops)/sizeof(struct uvm_pagerops *); 102 lcv++) { 103 if (uvmpagerops[lcv]->pgo_init) 104 uvmpagerops[lcv]->pgo_init(); 105 } 106} 107 108/* 109 * Initialize a uvm_pseg. 110 * 111 * May fail, in which case seg->start == 0. 112 * 113 * Caller locks uvm_pseg_lck. 114 */ 115void 116uvm_pseg_init(struct uvm_pseg *pseg) 117{ 118 KASSERT(pseg->start == 0); 119 KASSERT(pseg->use == 0); 120 pseg->start = (vaddr_t)km_alloc(MAX_PAGER_SEGS * MAXBSIZE, 121 &kv_any, &kp_none, &kd_trylock); 122} 123 124/* 125 * Acquire a pager map segment. 126 * 127 * Returns a vaddr for paging. 0 on failure. 128 * 129 * Caller does not lock. 130 */ 131vaddr_t 132uvm_pseg_get(int flags) 133{ 134 int i; 135 struct uvm_pseg *pseg; 136 137 /* 138 * XXX Prevent lock ordering issue in uvm_unmap_detach(). A real 139 * fix would be to move the KERNEL_LOCK() out of uvm_unmap_detach(). 140 * 141 * witness_checkorder() at witness_checkorder+0xba0 142 * __mp_lock() at __mp_lock+0x5f 143 * uvm_unmap_detach() at uvm_unmap_detach+0xc5 144 * uvm_map() at uvm_map+0x857 145 * uvm_km_valloc_try() at uvm_km_valloc_try+0x65 146 * uvm_pseg_get() at uvm_pseg_get+0x6f 147 * uvm_pagermapin() at uvm_pagermapin+0x45 148 * uvn_io() at uvn_io+0xcf 149 * uvn_get() at uvn_get+0x156 150 * uvm_fault_lower() at uvm_fault_lower+0x28a 151 * uvm_fault() at uvm_fault+0x1b3 152 * upageflttrap() at upageflttrap+0x62 153 */ 154 KERNEL_LOCK(); 155 mtx_enter(&uvm_pseg_lck); 156 157pager_seg_restart: 158 /* Find first pseg that has room. */ 159 for (pseg = &psegs[0]; pseg != &psegs[PSEG_NUMSEGS]; pseg++) { 160 if (UVM_PSEG_FULL(pseg)) 161 continue; 162 163 if (pseg->start == 0) { 164 /* Need initialization. */ 165 uvm_pseg_init(pseg); 166 if (pseg->start == 0) 167 goto pager_seg_fail; 168 } 169 170 /* Keep indexes 0,1 reserved for pagedaemon. */ 171 if ((pseg == &psegs[0] || pseg == &psegs[1]) && 172 (curproc != uvm.pagedaemon_proc)) 173 i = 2; 174 else 175 i = 0; 176 177 for (; i < MAX_PAGER_SEGS; i++) { 178 if (!UVM_PSEG_INUSE(pseg, i)) { 179 pseg->use |= 1 << i; 180 mtx_leave(&uvm_pseg_lck); 181 KERNEL_UNLOCK(); 182 return pseg->start + i * MAXBSIZE; 183 } 184 } 185 } 186 187pager_seg_fail: 188 if ((flags & UVMPAGER_MAPIN_WAITOK) != 0) { 189 msleep_nsec(&psegs, &uvm_pseg_lck, PVM, "pagerseg", INFSLP); 190 goto pager_seg_restart; 191 } 192 193 mtx_leave(&uvm_pseg_lck); 194 KERNEL_UNLOCK(); 195 return 0; 196} 197 198/* 199 * Release a pager map segment. 200 * 201 * Caller does not lock. 202 * 203 * Deallocates pseg if it is no longer in use. 204 */ 205void 206uvm_pseg_release(vaddr_t segaddr) 207{ 208 int id; 209 struct uvm_pseg *pseg; 210 vaddr_t va = 0; 211 212 mtx_enter(&uvm_pseg_lck); 213 for (pseg = &psegs[0]; pseg != &psegs[PSEG_NUMSEGS]; pseg++) { 214 if (pseg->start <= segaddr && 215 segaddr < pseg->start + MAX_PAGER_SEGS * MAXBSIZE) 216 break; 217 } 218 KASSERT(pseg != &psegs[PSEG_NUMSEGS]); 219 220 id = (segaddr - pseg->start) / MAXBSIZE; 221 KASSERT(id >= 0 && id < MAX_PAGER_SEGS); 222 223 /* test for no remainder */ 224 KDASSERT(segaddr == pseg->start + id * MAXBSIZE); 225 226 227 KASSERT(UVM_PSEG_INUSE(pseg, id)); 228 229 pseg->use &= ~(1 << id); 230 wakeup(&psegs); 231 232 if ((pseg != &psegs[0] && pseg != &psegs[1]) && UVM_PSEG_EMPTY(pseg)) { 233 va = pseg->start; 234 pseg->start = 0; 235 } 236 237 mtx_leave(&uvm_pseg_lck); 238 239 if (va) { 240 km_free((void *)va, MAX_PAGER_SEGS * MAXBSIZE, 241 &kv_any, &kp_none); 242 } 243} 244 245/* 246 * uvm_pagermapin: map pages into KVA for I/O that needs mappings 247 * 248 * We basically just km_valloc a blank map entry to reserve the space in the 249 * kernel map and then use pmap_enter() to put the mappings in by hand. 250 */ 251vaddr_t 252uvm_pagermapin(struct vm_page **pps, int npages, int flags) 253{ 254 vaddr_t kva, cva; 255 vm_prot_t prot; 256 vsize_t size; 257 struct vm_page *pp; 258 259#if defined(__HAVE_PMAP_DIRECT) 260 /* 261 * Use direct mappings for single page, unless there is a risk 262 * of aliasing. 263 */ 264 if (npages == 1 && PMAP_PREFER_ALIGN() == 0) { 265 KASSERT(pps[0]); 266 KASSERT(pps[0]->pg_flags & PG_BUSY); 267 return pmap_map_direct(pps[0]); 268 } 269#endif 270 271 prot = PROT_READ; 272 if (flags & UVMPAGER_MAPIN_READ) 273 prot |= PROT_WRITE; 274 size = ptoa(npages); 275 276 KASSERT(size <= MAXBSIZE); 277 278 kva = uvm_pseg_get(flags); 279 if (kva == 0) 280 return 0; 281 282 for (cva = kva ; size != 0 ; size -= PAGE_SIZE, cva += PAGE_SIZE) { 283 pp = *pps++; 284 KASSERT(pp); 285 KASSERT(pp->pg_flags & PG_BUSY); 286 /* Allow pmap_enter to fail. */ 287 if (pmap_enter(pmap_kernel(), cva, VM_PAGE_TO_PHYS(pp), 288 prot, PMAP_WIRED | PMAP_CANFAIL | prot) != 0) { 289 pmap_remove(pmap_kernel(), kva, cva); 290 pmap_update(pmap_kernel()); 291 uvm_pseg_release(kva); 292 return 0; 293 } 294 } 295 pmap_update(pmap_kernel()); 296 return kva; 297} 298 299/* 300 * uvm_pagermapout: remove KVA mapping 301 * 302 * We remove our mappings by hand and then remove the mapping. 303 */ 304void 305uvm_pagermapout(vaddr_t kva, int npages) 306{ 307#if defined(__HAVE_PMAP_DIRECT) 308 /* 309 * Use direct mappings for single page, unless there is a risk 310 * of aliasing. 311 */ 312 if (npages == 1 && PMAP_PREFER_ALIGN() == 0) { 313 pmap_unmap_direct(kva); 314 return; 315 } 316#endif 317 318 pmap_remove(pmap_kernel(), kva, kva + ((vsize_t)npages << PAGE_SHIFT)); 319 pmap_update(pmap_kernel()); 320 uvm_pseg_release(kva); 321 322} 323 324/* 325 * uvm_mk_pcluster 326 * 327 * generic "make 'pager put' cluster" function. a pager can either 328 * [1] set pgo_mk_pcluster to NULL (never cluster), [2] set it to this 329 * generic function, or [3] set it to a pager specific function. 330 * 331 * => caller must lock object _and_ pagequeues (since we need to look 332 * at active vs. inactive bits, etc.) 333 * => caller must make center page busy and write-protect it 334 * => we mark all cluster pages busy for the caller 335 * => the caller must unbusy all pages (and check wanted/released 336 * status if it drops the object lock) 337 * => flags: 338 * PGO_ALLPAGES: all pages in object are valid targets 339 * !PGO_ALLPAGES: use "lo" and "hi" to limit range of cluster 340 * PGO_DOACTCLUST: include active pages in cluster. 341 * PGO_FREE: set the PG_RELEASED bits on the cluster so they'll be freed 342 * in async io (caller must clean on error). 343 * NOTE: the caller should clear PG_CLEANCHK bits if PGO_DOACTCLUST. 344 * PG_CLEANCHK is only a hint, but clearing will help reduce 345 * the number of calls we make to the pmap layer. 346 */ 347 348struct vm_page ** 349uvm_mk_pcluster(struct uvm_object *uobj, struct vm_page **pps, int *npages, 350 struct vm_page *center, int flags, voff_t mlo, voff_t mhi) 351{ 352 struct vm_page **ppsp, *pclust; 353 voff_t lo, hi, curoff; 354 int center_idx, forward, incr; 355 356 /* 357 * center page should already be busy and write protected. XXX: 358 * suppose page is wired? if we lock, then a process could 359 * fault/block on it. if we don't lock, a process could write the 360 * pages in the middle of an I/O. (consider an msync()). let's 361 * lock it for now (better to delay than corrupt data?). 362 */ 363 /* get cluster boundaries, check sanity, and apply our limits as well.*/ 364 uobj->pgops->pgo_cluster(uobj, center->offset, &lo, &hi); 365 if ((flags & PGO_ALLPAGES) == 0) { 366 if (lo < mlo) 367 lo = mlo; 368 if (hi > mhi) 369 hi = mhi; 370 } 371 if ((hi - lo) >> PAGE_SHIFT > *npages) { /* pps too small, bail out! */ 372 pps[0] = center; 373 *npages = 1; 374 return pps; 375 } 376 377 /* now determine the center and attempt to cluster around the edges */ 378 center_idx = (center->offset - lo) >> PAGE_SHIFT; 379 pps[center_idx] = center; /* plug in the center page */ 380 ppsp = &pps[center_idx]; 381 *npages = 1; 382 383 /* 384 * attempt to cluster around the left [backward], and then 385 * the right side [forward]. 386 * 387 * note that for inactive pages (pages that have been deactivated) 388 * there are no valid mappings and PG_CLEAN should be up to date. 389 * [i.e. there is no need to query the pmap with pmap_is_modified 390 * since there are no mappings]. 391 */ 392 for (forward = 0 ; forward <= 1 ; forward++) { 393 incr = forward ? PAGE_SIZE : -PAGE_SIZE; 394 curoff = center->offset + incr; 395 for ( ;(forward == 0 && curoff >= lo) || 396 (forward && curoff < hi); 397 curoff += incr) { 398 399 pclust = uvm_pagelookup(uobj, curoff); /* lookup page */ 400 if (pclust == NULL) { 401 break; /* no page */ 402 } 403 /* handle active pages */ 404 /* NOTE: inactive pages don't have pmap mappings */ 405 if ((pclust->pg_flags & PQ_INACTIVE) == 0) { 406 if ((flags & PGO_DOACTCLUST) == 0) { 407 /* dont want mapped pages at all */ 408 break; 409 } 410 411 /* make sure "clean" bit is sync'd */ 412 if ((pclust->pg_flags & PG_CLEANCHK) == 0) { 413 if ((pclust->pg_flags & (PG_CLEAN|PG_BUSY)) 414 == PG_CLEAN && 415 pmap_is_modified(pclust)) 416 atomic_clearbits_int( 417 &pclust->pg_flags, 418 PG_CLEAN); 419 /* now checked */ 420 atomic_setbits_int(&pclust->pg_flags, 421 PG_CLEANCHK); 422 } 423 } 424 425 /* is page available for cleaning and does it need it */ 426 if ((pclust->pg_flags & (PG_CLEAN|PG_BUSY)) != 0) { 427 break; /* page is already clean or is busy */ 428 } 429 430 /* yes! enroll the page in our array */ 431 atomic_setbits_int(&pclust->pg_flags, PG_BUSY); 432 UVM_PAGE_OWN(pclust, "uvm_mk_pcluster"); 433 434 /* 435 * If we want to free after io is done, and we're 436 * async, set the released flag 437 */ 438 if ((flags & (PGO_FREE|PGO_SYNCIO)) == PGO_FREE) 439 atomic_setbits_int(&pclust->pg_flags, 440 PG_RELEASED); 441 442 /* XXX: protect wired page? see above comment. */ 443 pmap_page_protect(pclust, PROT_READ); 444 if (!forward) { 445 ppsp--; /* back up one page */ 446 *ppsp = pclust; 447 } else { 448 /* move forward one page */ 449 ppsp[*npages] = pclust; 450 } 451 (*npages)++; 452 } 453 } 454 455 /* 456 * done! return the cluster array to the caller!!! 457 */ 458 return ppsp; 459} 460 461/* 462 * uvm_pager_put: high level pageout routine 463 * 464 * we want to pageout page "pg" to backing store, clustering if 465 * possible. 466 * 467 * => page queues must be locked by caller 468 * => if page is not swap-backed, then "uobj" points to the object 469 * backing it. 470 * => if page is swap-backed, then "uobj" should be NULL. 471 * => "pg" should be PG_BUSY (by caller), and !PG_CLEAN 472 * for swap-backed memory, "pg" can be NULL if there is no page 473 * of interest [sometimes the case for the pagedaemon] 474 * => "ppsp_ptr" should point to an array of npages vm_page pointers 475 * for possible cluster building 476 * => flags (first two for non-swap-backed pages) 477 * PGO_ALLPAGES: all pages in uobj are valid targets 478 * PGO_DOACTCLUST: include "PQ_ACTIVE" pages as valid targets 479 * PGO_SYNCIO: do SYNC I/O (no async) 480 * PGO_PDFREECLUST: pagedaemon: drop cluster on successful I/O 481 * PGO_FREE: tell the aio daemon to free pages in the async case. 482 * => start/stop: if (uobj && !PGO_ALLPAGES) limit targets to this range 483 * if (!uobj) start is the (daddr_t) of the starting swapblk 484 * => return state: 485 * 1. we return the VM_PAGER status code of the pageout 486 * 2. we return with the page queues unlocked 487 * 3. on errors we always drop the cluster. thus, if we return 488 * !PEND, !OK, then the caller only has to worry about 489 * un-busying the main page (not the cluster pages). 490 * 4. on success, if !PGO_PDFREECLUST, we return the cluster 491 * with all pages busy (caller must un-busy and check 492 * wanted/released flags). 493 */ 494int 495uvm_pager_put(struct uvm_object *uobj, struct vm_page *pg, 496 struct vm_page ***ppsp_ptr, int *npages, int flags, 497 voff_t start, voff_t stop) 498{ 499 int result; 500 daddr_t swblk; 501 struct vm_page **ppsp = *ppsp_ptr; 502 503 /* 504 * note that uobj is null if we are doing a swap-backed pageout. 505 * note that uobj is !null if we are doing normal object pageout. 506 * note that the page queues must be locked to cluster. 507 */ 508 if (uobj) { /* if !swap-backed */ 509 /* 510 * attempt to build a cluster for pageout using its 511 * make-put-cluster function (if it has one). 512 */ 513 if (uobj->pgops->pgo_mk_pcluster) { 514 ppsp = uobj->pgops->pgo_mk_pcluster(uobj, ppsp, 515 npages, pg, flags, start, stop); 516 *ppsp_ptr = ppsp; /* update caller's pointer */ 517 } else { 518 ppsp[0] = pg; 519 *npages = 1; 520 } 521 522 swblk = 0; /* XXX: keep gcc happy */ 523 } else { 524 /* 525 * for swap-backed pageout, the caller (the pagedaemon) has 526 * already built the cluster for us. the starting swap 527 * block we are writing to has been passed in as "start." 528 * "pg" could be NULL if there is no page we are especially 529 * interested in (in which case the whole cluster gets dropped 530 * in the event of an error or a sync "done"). 531 */ 532 swblk = start; 533 /* ppsp and npages should be ok */ 534 } 535 536 /* now that we've clustered we can unlock the page queues */ 537 uvm_unlock_pageq(); 538 539 /* 540 * now attempt the I/O. if we have a failure and we are 541 * clustered, we will drop the cluster and try again. 542 */ 543ReTry: 544 if (uobj) { 545 result = uobj->pgops->pgo_put(uobj, ppsp, *npages, flags); 546 } else { 547 /* XXX daddr_t -> int */ 548 result = uvm_swap_put(swblk, ppsp, *npages, flags); 549 } 550 551 /* 552 * we have attempted the I/O. 553 * 554 * if the I/O was a success then: 555 * if !PGO_PDFREECLUST, we return the cluster to the 556 * caller (who must un-busy all pages) 557 * else we un-busy cluster pages for the pagedaemon 558 * 559 * if I/O is pending (async i/o) then we return the pending code. 560 * [in this case the async i/o done function must clean up when 561 * i/o is done...] 562 */ 563 if (result == VM_PAGER_PEND || result == VM_PAGER_OK) { 564 if (result == VM_PAGER_OK && (flags & PGO_PDFREECLUST)) { 565 /* drop cluster */ 566 if (*npages > 1 || pg == NULL) 567 uvm_pager_dropcluster(uobj, pg, ppsp, npages, 568 PGO_PDFREECLUST); 569 } 570 return (result); 571 } 572 573 /* 574 * a pager error occurred (even after dropping the cluster, if there 575 * was one). give up! the caller only has one page ("pg") 576 * to worry about. 577 */ 578 if (*npages > 1 || pg == NULL) { 579 uvm_pager_dropcluster(uobj, pg, ppsp, npages, PGO_REALLOCSWAP); 580 581 /* 582 * for failed swap-backed pageouts with a "pg", 583 * we need to reset pg's swslot to either: 584 * "swblk" (for transient errors, so we can retry), 585 * or 0 (for hard errors). 586 */ 587 if (uobj == NULL && pg != NULL) { 588 /* XXX daddr_t -> int */ 589 int nswblk = (result == VM_PAGER_AGAIN) ? swblk : 0; 590 if (pg->pg_flags & PQ_ANON) { 591 rw_enter(pg->uanon->an_lock, RW_WRITE); 592 pg->uanon->an_swslot = nswblk; 593 rw_exit(pg->uanon->an_lock); 594 } else { 595 rw_enter(pg->uobject->vmobjlock, RW_WRITE); 596 uao_set_swslot(pg->uobject, 597 pg->offset >> PAGE_SHIFT, 598 nswblk); 599 rw_exit(pg->uobject->vmobjlock); 600 } 601 } 602 if (result == VM_PAGER_AGAIN) { 603 /* 604 * for transient failures, free all the swslots that 605 * we're not going to retry with. 606 */ 607 if (uobj == NULL) { 608 if (pg) { 609 /* XXX daddr_t -> int */ 610 uvm_swap_free(swblk + 1, *npages - 1); 611 } else { 612 /* XXX daddr_t -> int */ 613 uvm_swap_free(swblk, *npages); 614 } 615 } 616 if (pg) { 617 ppsp[0] = pg; 618 *npages = 1; 619 goto ReTry; 620 } 621 } else if (uobj == NULL) { 622 /* 623 * for hard errors on swap-backed pageouts, 624 * mark the swslots as bad. note that we do not 625 * free swslots that we mark bad. 626 */ 627 /* XXX daddr_t -> int */ 628 uvm_swap_markbad(swblk, *npages); 629 } 630 } 631 632 /* 633 * a pager error occurred (even after dropping the cluster, if there 634 * was one). give up! the caller only has one page ("pg") 635 * to worry about. 636 */ 637 638 return result; 639} 640 641/* 642 * uvm_pager_dropcluster: drop a cluster we have built (because we 643 * got an error, or, if PGO_PDFREECLUST we are un-busying the 644 * cluster pages on behalf of the pagedaemon). 645 * 646 * => uobj, if non-null, is a non-swap-backed object 647 * => page queues are not locked 648 * => pg is our page of interest (the one we clustered around, can be null) 649 * => ppsp/npages is our current cluster 650 * => flags: PGO_PDFREECLUST: pageout was a success: un-busy cluster 651 * pages on behalf of the pagedaemon. 652 * PGO_REALLOCSWAP: drop previously allocated swap slots for 653 * clustered swap-backed pages (except for "pg" if !NULL) 654 * "swblk" is the start of swap alloc (e.g. for ppsp[0]) 655 * [only meaningful if swap-backed (uobj == NULL)] 656 */ 657 658void 659uvm_pager_dropcluster(struct uvm_object *uobj, struct vm_page *pg, 660 struct vm_page **ppsp, int *npages, int flags) 661{ 662 int lcv; 663 664 KASSERT(uobj == NULL || rw_write_held(uobj->vmobjlock)); 665 666 /* drop all pages but "pg" */ 667 for (lcv = 0 ; lcv < *npages ; lcv++) { 668 /* skip "pg" or empty slot */ 669 if (ppsp[lcv] == pg || ppsp[lcv] == NULL) 670 continue; 671 672 /* 673 * Note that PQ_ANON bit can't change as long as we are holding 674 * the PG_BUSY bit (so there is no need to lock the page 675 * queues to test it). 676 */ 677 if (!uobj) { 678 if (ppsp[lcv]->pg_flags & PQ_ANON) { 679 rw_enter(ppsp[lcv]->uanon->an_lock, RW_WRITE); 680 if (flags & PGO_REALLOCSWAP) 681 /* zap swap block */ 682 ppsp[lcv]->uanon->an_swslot = 0; 683 } else { 684 rw_enter(ppsp[lcv]->uobject->vmobjlock, 685 RW_WRITE); 686 if (flags & PGO_REALLOCSWAP) 687 uao_set_swslot(ppsp[lcv]->uobject, 688 ppsp[lcv]->offset >> PAGE_SHIFT, 0); 689 } 690 } 691 692 /* did someone want the page while we had it busy-locked? */ 693 if (ppsp[lcv]->pg_flags & PG_WANTED) { 694 wakeup(ppsp[lcv]); 695 } 696 697 /* if page was released, release it. otherwise un-busy it */ 698 if (ppsp[lcv]->pg_flags & PG_RELEASED && 699 ppsp[lcv]->pg_flags & PQ_ANON) { 700 /* kills anon and frees pg */ 701 uvm_anon_release(ppsp[lcv]->uanon); 702 continue; 703 } else { 704 /* 705 * if we were planning on async io then we would 706 * have PG_RELEASED set, clear that with the others. 707 */ 708 atomic_clearbits_int(&ppsp[lcv]->pg_flags, 709 PG_BUSY|PG_WANTED|PG_FAKE|PG_RELEASED); 710 UVM_PAGE_OWN(ppsp[lcv], NULL); 711 } 712 713 /* 714 * if we are operating on behalf of the pagedaemon and we 715 * had a successful pageout update the page! 716 */ 717 if (flags & PGO_PDFREECLUST) { 718 pmap_clear_reference(ppsp[lcv]); 719 pmap_clear_modify(ppsp[lcv]); 720 atomic_setbits_int(&ppsp[lcv]->pg_flags, PG_CLEAN); 721 } 722 723 /* if anonymous cluster, unlock object and move on */ 724 if (!uobj) { 725 if (ppsp[lcv]->pg_flags & PQ_ANON) 726 rw_exit(ppsp[lcv]->uanon->an_lock); 727 else 728 rw_exit(ppsp[lcv]->uobject->vmobjlock); 729 } 730 } 731} 732 733/* 734 * interrupt-context iodone handler for single-buf i/os 735 * or the top-level buf of a nested-buf i/o. 736 * 737 * => must be at splbio(). 738 */ 739 740void 741uvm_aio_biodone(struct buf *bp) 742{ 743 splassert(IPL_BIO); 744 745 /* reset b_iodone for when this is a single-buf i/o. */ 746 bp->b_iodone = uvm_aio_aiodone; 747 748 mtx_enter(&uvm.aiodoned_lock); 749 TAILQ_INSERT_TAIL(&uvm.aio_done, bp, b_freelist); 750 wakeup(&uvm.aiodoned); 751 mtx_leave(&uvm.aiodoned_lock); 752} 753 754void 755uvm_aio_aiodone_pages(struct vm_page **pgs, int npages, boolean_t write, 756 int error) 757{ 758 struct vm_page *pg; 759 struct rwlock *slock; 760 boolean_t swap; 761 int i, swslot; 762 763 slock = NULL; 764 pg = pgs[0]; 765 swap = (pg->uanon != NULL && pg->uobject == NULL) || 766 (pg->pg_flags & PQ_AOBJ) != 0; 767 768 KASSERT(swap); 769 KASSERT(write); 770 771 if (error) { 772 if (pg->uobject != NULL) { 773 swslot = uao_find_swslot(pg->uobject, 774 pg->offset >> PAGE_SHIFT); 775 } else { 776 swslot = pg->uanon->an_swslot; 777 } 778 KASSERT(swslot); 779 } 780 781 for (i = 0; i < npages; i++) { 782 int anon_disposed = 0; 783 784 pg = pgs[i]; 785 KASSERT((pg->pg_flags & PG_FAKE) == 0); 786 787 /* 788 * lock each page's object (or anon) individually since 789 * each page may need a different lock. 790 */ 791 if (pg->uobject != NULL) { 792 slock = pg->uobject->vmobjlock; 793 } else { 794 slock = pg->uanon->an_lock; 795 } 796 rw_enter(slock, RW_WRITE); 797 anon_disposed = (pg->pg_flags & PG_RELEASED) != 0; 798 KASSERT(!anon_disposed || pg->uobject != NULL || 799 pg->uanon->an_ref == 0); 800 uvm_lock_pageq(); 801 802 /* 803 * if this was a successful write, 804 * mark the page PG_CLEAN. 805 */ 806 if (!error) { 807 pmap_clear_reference(pg); 808 pmap_clear_modify(pg); 809 atomic_setbits_int(&pg->pg_flags, PG_CLEAN); 810 } 811 812 /* 813 * unlock everything for this page now. 814 */ 815 if (pg->uobject == NULL && anon_disposed) { 816 uvm_unlock_pageq(); 817 uvm_anon_release(pg->uanon); 818 } else { 819 uvm_page_unbusy(&pg, 1); 820 uvm_unlock_pageq(); 821 rw_exit(slock); 822 } 823 } 824 825 if (error) { 826 uvm_swap_markbad(swslot, npages); 827 } 828} 829 830/* 831 * uvm_aio_aiodone: do iodone processing for async i/os. 832 * this should be called in thread context, not interrupt context. 833 */ 834void 835uvm_aio_aiodone(struct buf *bp) 836{ 837 int npages = bp->b_bufsize >> PAGE_SHIFT; 838 struct vm_page *pgs[MAXPHYS >> PAGE_SHIFT]; 839 int i, error; 840 boolean_t write; 841 842 KASSERT(npages <= MAXPHYS >> PAGE_SHIFT); 843 splassert(IPL_BIO); 844 845 error = (bp->b_flags & B_ERROR) ? (bp->b_error ? bp->b_error : EIO) : 0; 846 write = (bp->b_flags & B_READ) == 0; 847 848 for (i = 0; i < npages; i++) 849 pgs[i] = uvm_atopg((vaddr_t)bp->b_data + 850 ((vsize_t)i << PAGE_SHIFT)); 851 uvm_pagermapout((vaddr_t)bp->b_data, npages); 852#ifdef UVM_SWAP_ENCRYPT 853 /* 854 * XXX - assumes that we only get ASYNC writes. used to be above. 855 */ 856 if (pgs[0]->pg_flags & PQ_ENCRYPT) { 857 uvm_swap_freepages(pgs, npages); 858 goto freed; 859 } 860#endif /* UVM_SWAP_ENCRYPT */ 861 862 uvm_aio_aiodone_pages(pgs, npages, write, error); 863 864#ifdef UVM_SWAP_ENCRYPT 865freed: 866#endif 867 pool_put(&bufpool, bp); 868} 869