1/* $OpenBSD: uvm_pdaemon.c,v 1.114 2024/05/01 12:54:27 mpi Exp $ */ 2/* $NetBSD: uvm_pdaemon.c,v 1.23 2000/08/20 10:24:14 bjh21 Exp $ */ 3 4/* 5 * Copyright (c) 1997 Charles D. Cranor and Washington University. 6 * Copyright (c) 1991, 1993, The Regents of the University of California. 7 * 8 * All rights reserved. 9 * 10 * This code is derived from software contributed to Berkeley by 11 * The Mach Operating System project at Carnegie-Mellon University. 12 * 13 * Redistribution and use in source and binary forms, with or without 14 * modification, are permitted provided that the following conditions 15 * are met: 16 * 1. Redistributions of source code must retain the above copyright 17 * notice, this list of conditions and the following disclaimer. 18 * 2. Redistributions in binary form must reproduce the above copyright 19 * notice, this list of conditions and the following disclaimer in the 20 * documentation and/or other materials provided with the distribution. 21 * 3. Neither the name of the University nor the names of its contributors 22 * may be used to endorse or promote products derived from this software 23 * without specific prior written permission. 24 * 25 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 26 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 27 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 28 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 29 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 30 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 31 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 32 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 33 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 34 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 35 * SUCH DAMAGE. 36 * 37 * @(#)vm_pageout.c 8.5 (Berkeley) 2/14/94 38 * from: Id: uvm_pdaemon.c,v 1.1.2.32 1998/02/06 05:26:30 chs Exp 39 * 40 * 41 * Copyright (c) 1987, 1990 Carnegie-Mellon University. 42 * All rights reserved. 43 * 44 * Permission to use, copy, modify and distribute this software and 45 * its documentation is hereby granted, provided that both the copyright 46 * notice and this permission notice appear in all copies of the 47 * software, derivative works or modified versions, and any portions 48 * thereof, and that both notices appear in supporting documentation. 49 * 50 * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS" 51 * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND 52 * FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE. 53 * 54 * Carnegie Mellon requests users of this software to return to 55 * 56 * Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU 57 * School of Computer Science 58 * Carnegie Mellon University 59 * Pittsburgh PA 15213-3890 60 * 61 * any improvements or extensions that they make and grant Carnegie the 62 * rights to redistribute these changes. 63 */ 64 65/* 66 * uvm_pdaemon.c: the page daemon 67 */ 68 69#include <sys/param.h> 70#include <sys/systm.h> 71#include <sys/kernel.h> 72#include <sys/pool.h> 73#include <sys/proc.h> 74#include <sys/buf.h> 75#include <sys/mount.h> 76#include <sys/atomic.h> 77 78#ifdef HIBERNATE 79#include <sys/hibernate.h> 80#endif 81 82#include <uvm/uvm.h> 83 84#include "drm.h" 85 86#if NDRM > 0 87extern void drmbackoff(long); 88#endif 89 90/* 91 * UVMPD_NUMDIRTYREACTS is how many dirty pages the pagedaemon will reactivate 92 * in a pass thru the inactive list when swap is full. the value should be 93 * "small"... if it's too large we'll cycle the active pages thru the inactive 94 * queue too quickly to for them to be referenced and avoid being freed. 95 */ 96 97#define UVMPD_NUMDIRTYREACTS 16 98 99 100/* 101 * local prototypes 102 */ 103 104struct rwlock *uvmpd_trylockowner(struct vm_page *); 105void uvmpd_scan(struct uvm_pmalloc *, struct uvm_constraint_range *); 106void uvmpd_scan_inactive(struct uvm_pmalloc *, 107 struct uvm_constraint_range *, struct pglist *); 108void uvmpd_tune(void); 109void uvmpd_drop(struct pglist *); 110int uvmpd_dropswap(struct vm_page *); 111 112/* 113 * uvm_wait: wait (sleep) for the page daemon to free some pages 114 * 115 * => should be called with all locks released 116 * => should _not_ be called by the page daemon (to avoid deadlock) 117 */ 118 119void 120uvm_wait(const char *wmsg) 121{ 122 uint64_t timo = INFSLP; 123 124#ifdef DIAGNOSTIC 125 if (curproc == &proc0) 126 panic("%s: cannot sleep for memory during boot", __func__); 127#endif 128 129 /* 130 * check for page daemon going to sleep (waiting for itself) 131 */ 132 if (curproc == uvm.pagedaemon_proc) { 133 printf("uvm_wait emergency bufbackoff\n"); 134 if (bufbackoff(NULL, 4) == 0) 135 return; 136 /* 137 * now we have a problem: the pagedaemon wants to go to 138 * sleep until it frees more memory. but how can it 139 * free more memory if it is asleep? that is a deadlock. 140 * we have two options: 141 * [1] panic now 142 * [2] put a timeout on the sleep, thus causing the 143 * pagedaemon to only pause (rather than sleep forever) 144 * 145 * note that option [2] will only help us if we get lucky 146 * and some other process on the system breaks the deadlock 147 * by exiting or freeing memory (thus allowing the pagedaemon 148 * to continue). for now we panic if DEBUG is defined, 149 * otherwise we hope for the best with option [2] (better 150 * yet, this should never happen in the first place!). 151 */ 152 153 printf("pagedaemon: deadlock detected!\n"); 154 timo = MSEC_TO_NSEC(125); /* set timeout */ 155#if defined(DEBUG) 156 /* DEBUG: panic so we can debug it */ 157 panic("pagedaemon deadlock"); 158#endif 159 } 160 161 uvm_lock_fpageq(); 162 wakeup(&uvm.pagedaemon); /* wake the daemon! */ 163 msleep_nsec(&uvmexp.free, &uvm.fpageqlock, PVM | PNORELOCK, wmsg, timo); 164} 165 166/* 167 * uvmpd_tune: tune paging parameters 168 */ 169void 170uvmpd_tune(void) 171{ 172 int val; 173 174 val = uvmexp.npages / 30; 175 176 /* XXX: what are these values good for? */ 177 val = max(val, (16*1024) >> PAGE_SHIFT); 178 179 /* Make sure there's always a user page free. */ 180 if (val < uvmexp.reserve_kernel + 1) 181 val = uvmexp.reserve_kernel + 1; 182 uvmexp.freemin = val; 183 184 /* Calculate free target. */ 185 val = (uvmexp.freemin * 4) / 3; 186 if (val <= uvmexp.freemin) 187 val = uvmexp.freemin + 1; 188 uvmexp.freetarg = val; 189 190 uvmexp.wiredmax = uvmexp.npages / 3; 191} 192 193/* 194 * Indicate to the page daemon that a nowait call failed and it should 195 * recover at least some memory in the most restricted region (assumed 196 * to be dma_constraint). 197 */ 198volatile int uvm_nowait_failed; 199 200/* 201 * uvm_pageout: the main loop for the pagedaemon 202 */ 203void 204uvm_pageout(void *arg) 205{ 206 struct uvm_constraint_range constraint; 207 struct uvm_pmalloc *pma; 208 int free; 209 210 /* ensure correct priority and set paging parameters... */ 211 uvm.pagedaemon_proc = curproc; 212 (void) spl0(); 213 uvmpd_tune(); 214 215 for (;;) { 216 long size; 217 218 uvm_lock_fpageq(); 219 if (!uvm_nowait_failed && TAILQ_EMPTY(&uvm.pmr_control.allocs)) { 220 msleep_nsec(&uvm.pagedaemon, &uvm.fpageqlock, PVM, 221 "pgdaemon", INFSLP); 222 uvmexp.pdwoke++; 223 } 224 225 if ((pma = TAILQ_FIRST(&uvm.pmr_control.allocs)) != NULL) { 226 pma->pm_flags |= UVM_PMA_BUSY; 227 constraint = pma->pm_constraint; 228 } else { 229 if (uvm_nowait_failed) { 230 /* 231 * XXX realistically, this is what our 232 * nowait callers probably care about 233 */ 234 constraint = dma_constraint; 235 uvm_nowait_failed = 0; 236 } else 237 constraint = no_constraint; 238 } 239 free = uvmexp.free - BUFPAGES_DEFICIT; 240 uvm_unlock_fpageq(); 241 242 /* 243 * now lock page queues and recompute inactive count 244 */ 245 uvm_lock_pageq(); 246 uvmexp.inactarg = (uvmexp.active + uvmexp.inactive) / 3; 247 if (uvmexp.inactarg <= uvmexp.freetarg) { 248 uvmexp.inactarg = uvmexp.freetarg + 1; 249 } 250 uvm_unlock_pageq(); 251 252 /* Reclaim pages from the buffer cache if possible. */ 253 size = 0; 254 if (pma != NULL) 255 size += pma->pm_size >> PAGE_SHIFT; 256 if (free < uvmexp.freetarg) 257 size += uvmexp.freetarg - free; 258 if (size == 0) 259 size = 16; /* XXX */ 260 261 (void) bufbackoff(&constraint, size * 2); 262#if NDRM > 0 263 drmbackoff(size * 2); 264#endif 265 uvm_pmr_cache_drain(); 266 267 /* 268 * scan if needed 269 */ 270 uvm_lock_pageq(); 271 free = uvmexp.free - BUFPAGES_DEFICIT; 272 if (pma != NULL || (free < uvmexp.freetarg) || 273 ((uvmexp.inactive + BUFPAGES_INACT) < uvmexp.inactarg)) { 274 uvmpd_scan(pma, &constraint); 275 } 276 277 /* 278 * if there's any free memory to be had, 279 * wake up any waiters. 280 */ 281 uvm_lock_fpageq(); 282 if (uvmexp.free > uvmexp.reserve_kernel || 283 uvmexp.paging == 0) { 284 wakeup(&uvmexp.free); 285 } 286 287 if (pma != NULL) { 288 /* 289 * XXX If UVM_PMA_FREED isn't set, no pages 290 * were freed. Should we set UVM_PMA_FAIL in 291 * that case? 292 */ 293 pma->pm_flags &= ~UVM_PMA_BUSY; 294 if (pma->pm_flags & UVM_PMA_FREED) { 295 pma->pm_flags &= ~UVM_PMA_LINKED; 296 TAILQ_REMOVE(&uvm.pmr_control.allocs, pma, 297 pmq); 298 wakeup(pma); 299 } 300 } 301 uvm_unlock_fpageq(); 302 303 /* 304 * scan done. unlock page queues (the only lock we are holding) 305 */ 306 uvm_unlock_pageq(); 307 308 sched_pause(yield); 309 } 310 /*NOTREACHED*/ 311} 312 313 314/* 315 * uvm_aiodone_daemon: main loop for the aiodone daemon. 316 */ 317void 318uvm_aiodone_daemon(void *arg) 319{ 320 int s, free; 321 struct buf *bp, *nbp; 322 323 uvm.aiodoned_proc = curproc; 324 325 for (;;) { 326 /* 327 * Check for done aio structures. If we've got structures to 328 * process, do so. Otherwise sleep while avoiding races. 329 */ 330 mtx_enter(&uvm.aiodoned_lock); 331 while ((bp = TAILQ_FIRST(&uvm.aio_done)) == NULL) 332 msleep_nsec(&uvm.aiodoned, &uvm.aiodoned_lock, 333 PVM, "aiodoned", INFSLP); 334 /* Take the list for ourselves. */ 335 TAILQ_INIT(&uvm.aio_done); 336 mtx_leave(&uvm.aiodoned_lock); 337 338 /* process each i/o that's done. */ 339 free = uvmexp.free; 340 while (bp != NULL) { 341 if (bp->b_flags & B_PDAEMON) { 342 uvmexp.paging -= bp->b_bufsize >> PAGE_SHIFT; 343 } 344 nbp = TAILQ_NEXT(bp, b_freelist); 345 s = splbio(); /* b_iodone must by called at splbio */ 346 (*bp->b_iodone)(bp); 347 splx(s); 348 bp = nbp; 349 350 sched_pause(yield); 351 } 352 uvm_lock_fpageq(); 353 wakeup(free <= uvmexp.reserve_kernel ? &uvm.pagedaemon : 354 &uvmexp.free); 355 uvm_unlock_fpageq(); 356 } 357} 358 359/* 360 * uvmpd_trylockowner: trylock the page's owner. 361 * 362 * => return the locked rwlock on success. otherwise, return NULL. 363 */ 364struct rwlock * 365uvmpd_trylockowner(struct vm_page *pg) 366{ 367 368 struct uvm_object *uobj = pg->uobject; 369 struct rwlock *slock; 370 371 if (uobj != NULL) { 372 slock = uobj->vmobjlock; 373 } else { 374 struct vm_anon *anon = pg->uanon; 375 376 KASSERT(anon != NULL); 377 slock = anon->an_lock; 378 } 379 380 if (rw_enter(slock, RW_WRITE|RW_NOSLEEP)) { 381 return NULL; 382 } 383 384 return slock; 385} 386 387/* 388 * uvmpd_dropswap: free any swap allocated to this page. 389 * 390 * => called with owner locked. 391 * => return 1 if a page had an associated slot. 392 */ 393int 394uvmpd_dropswap(struct vm_page *pg) 395{ 396 struct vm_anon *anon = pg->uanon; 397 int slot, result = 0; 398 399 if ((pg->pg_flags & PQ_ANON) && anon->an_swslot) { 400 uvm_swap_free(anon->an_swslot, 1); 401 anon->an_swslot = 0; 402 result = 1; 403 } else if (pg->pg_flags & PQ_AOBJ) { 404 slot = uao_dropswap(pg->uobject, pg->offset >> PAGE_SHIFT); 405 if (slot) 406 result = 1; 407 } 408 409 return result; 410} 411 412/* 413 * uvmpd_scan_inactive: scan an inactive list for pages to clean or free. 414 * 415 * => called with page queues locked 416 * => we work on meeting our free target by converting inactive pages 417 * into free pages. 418 * => we handle the building of swap-backed clusters 419 * => we return TRUE if we are exiting because we met our target 420 */ 421void 422uvmpd_scan_inactive(struct uvm_pmalloc *pma, 423 struct uvm_constraint_range *constraint, struct pglist *pglst) 424{ 425 int free, result; 426 struct vm_page *p, *nextpg; 427 struct uvm_object *uobj; 428 struct vm_page *pps[SWCLUSTPAGES], **ppsp; 429 int npages; 430 struct vm_page *swpps[SWCLUSTPAGES]; /* XXX: see below */ 431 struct rwlock *slock; 432 int swnpages, swcpages; /* XXX: see below */ 433 int swslot; 434 struct vm_anon *anon; 435 boolean_t swap_backed; 436 vaddr_t start; 437 int dirtyreacts; 438 paddr_t paddr; 439 440 /* 441 * swslot is non-zero if we are building a swap cluster. we want 442 * to stay in the loop while we have a page to scan or we have 443 * a swap-cluster to build. 444 */ 445 swslot = 0; 446 swnpages = swcpages = 0; 447 dirtyreacts = 0; 448 p = NULL; 449 450 /* Start with the first page on the list that fit in `constraint' */ 451 TAILQ_FOREACH(p, pglst, pageq) { 452 paddr = atop(VM_PAGE_TO_PHYS(p)); 453 if (paddr >= constraint->ucr_low && 454 paddr < constraint->ucr_high) 455 break; 456 } 457 458 for (; p != NULL || swslot != 0; p = nextpg) { 459 /* 460 * note that p can be NULL iff we have traversed the whole 461 * list and need to do one final swap-backed clustered pageout. 462 */ 463 uobj = NULL; 464 anon = NULL; 465 if (p) { 466 /* 467 * see if we've met our target 468 */ 469 free = uvmexp.free - BUFPAGES_DEFICIT; 470 if (((pma == NULL || (pma->pm_flags & UVM_PMA_FREED)) && 471 (free + uvmexp.paging >= uvmexp.freetarg << 2)) || 472 dirtyreacts == UVMPD_NUMDIRTYREACTS) { 473 if (swslot == 0) { 474 /* exit now if no swap-i/o pending */ 475 break; 476 } 477 478 /* set p to null to signal final swap i/o */ 479 p = NULL; 480 nextpg = NULL; 481 } 482 } 483 if (p) { /* if (we have a new page to consider) */ 484 /* 485 * we are below target and have a new page to consider. 486 */ 487 uvmexp.pdscans++; 488 nextpg = TAILQ_NEXT(p, pageq); 489 490 anon = p->uanon; 491 uobj = p->uobject; 492 493 /* 494 * first we attempt to lock the object that this page 495 * belongs to. if our attempt fails we skip on to 496 * the next page (no harm done). it is important to 497 * "try" locking the object as we are locking in the 498 * wrong order (pageq -> object) and we don't want to 499 * deadlock. 500 */ 501 slock = uvmpd_trylockowner(p); 502 if (slock == NULL) { 503 continue; 504 } 505 506 /* 507 * move referenced pages back to active queue 508 * and skip to next page. 509 */ 510 if (pmap_is_referenced(p)) { 511 uvm_pageactivate(p); 512 rw_exit(slock); 513 uvmexp.pdreact++; 514 continue; 515 } 516 517 if (p->pg_flags & PG_BUSY) { 518 rw_exit(slock); 519 uvmexp.pdbusy++; 520 continue; 521 } 522 523 /* does the page belong to an object? */ 524 if (uobj != NULL) { 525 uvmexp.pdobscan++; 526 } else { 527 KASSERT(anon != NULL); 528 uvmexp.pdanscan++; 529 } 530 531 /* 532 * we now have the page queues locked. 533 * the page is not busy. if the page is clean we 534 * can free it now and continue. 535 */ 536 if (p->pg_flags & PG_CLEAN) { 537 if (p->pg_flags & PQ_SWAPBACKED) { 538 /* this page now lives only in swap */ 539 atomic_inc_int(&uvmexp.swpgonly); 540 } 541 542 /* zap all mappings with pmap_page_protect... */ 543 pmap_page_protect(p, PROT_NONE); 544 uvm_pagefree(p); 545 uvmexp.pdfreed++; 546 547 if (anon) { 548 549 /* 550 * an anonymous page can only be clean 551 * if it has backing store assigned. 552 */ 553 554 KASSERT(anon->an_swslot != 0); 555 556 /* remove from object */ 557 anon->an_page = NULL; 558 } 559 rw_exit(slock); 560 continue; 561 } 562 563 /* 564 * this page is dirty, skip it if we'll have met our 565 * free target when all the current pageouts complete. 566 */ 567 if ((pma == NULL || (pma->pm_flags & UVM_PMA_FREED)) && 568 (free + uvmexp.paging > uvmexp.freetarg << 2)) { 569 rw_exit(slock); 570 continue; 571 } 572 573 /* 574 * this page is dirty, but we can't page it out 575 * since all pages in swap are only in swap. 576 * reactivate it so that we eventually cycle 577 * all pages thru the inactive queue. 578 */ 579 if ((p->pg_flags & PQ_SWAPBACKED) && uvm_swapisfull()) { 580 dirtyreacts++; 581 uvm_pageactivate(p); 582 rw_exit(slock); 583 continue; 584 } 585 586 /* 587 * if the page is swap-backed and dirty and swap space 588 * is full, free any swap allocated to the page 589 * so that other pages can be paged out. 590 */ 591 if ((p->pg_flags & PQ_SWAPBACKED) && uvm_swapisfilled()) 592 uvmpd_dropswap(p); 593 594 /* 595 * the page we are looking at is dirty. we must 596 * clean it before it can be freed. to do this we 597 * first mark the page busy so that no one else will 598 * touch the page. we write protect all the mappings 599 * of the page so that no one touches it while it is 600 * in I/O. 601 */ 602 603 swap_backed = ((p->pg_flags & PQ_SWAPBACKED) != 0); 604 atomic_setbits_int(&p->pg_flags, PG_BUSY); 605 UVM_PAGE_OWN(p, "scan_inactive"); 606 pmap_page_protect(p, PROT_READ); 607 uvmexp.pgswapout++; 608 609 /* 610 * for swap-backed pages we need to (re)allocate 611 * swap space. 612 */ 613 if (swap_backed) { 614 /* free old swap slot (if any) */ 615 uvmpd_dropswap(p); 616 617 /* start new cluster (if necessary) */ 618 if (swslot == 0) { 619 swnpages = SWCLUSTPAGES; 620 swslot = uvm_swap_alloc(&swnpages, 621 TRUE); 622 if (swslot == 0) { 623 /* no swap? give up! */ 624 atomic_clearbits_int( 625 &p->pg_flags, 626 PG_BUSY); 627 UVM_PAGE_OWN(p, NULL); 628 rw_exit(slock); 629 continue; 630 } 631 swcpages = 0; /* cluster is empty */ 632 } 633 634 /* add block to cluster */ 635 swpps[swcpages] = p; 636 if (anon) 637 anon->an_swslot = swslot + swcpages; 638 else 639 uao_set_swslot(uobj, 640 p->offset >> PAGE_SHIFT, 641 swslot + swcpages); 642 swcpages++; 643 rw_exit(slock); 644 645 /* cluster not full yet? */ 646 if (swcpages < swnpages) 647 continue; 648 } 649 } else { 650 /* if p == NULL we must be doing a last swap i/o */ 651 swap_backed = TRUE; 652 } 653 654 /* 655 * now consider doing the pageout. 656 * 657 * for swap-backed pages, we do the pageout if we have either 658 * filled the cluster (in which case (swnpages == swcpages) or 659 * run out of pages (p == NULL). 660 * 661 * for object pages, we always do the pageout. 662 */ 663 if (swap_backed) { 664 /* starting I/O now... set up for it */ 665 npages = swcpages; 666 ppsp = swpps; 667 /* for swap-backed pages only */ 668 start = (vaddr_t) swslot; 669 670 /* if this is final pageout we could have a few 671 * extra swap blocks */ 672 if (swcpages < swnpages) { 673 uvm_swap_free(swslot + swcpages, 674 (swnpages - swcpages)); 675 } 676 } else { 677 /* normal object pageout */ 678 ppsp = pps; 679 npages = sizeof(pps) / sizeof(struct vm_page *); 680 /* not looked at because PGO_ALLPAGES is set */ 681 start = 0; 682 } 683 684 /* 685 * now do the pageout. 686 * 687 * for swap_backed pages we have already built the cluster. 688 * for !swap_backed pages, uvm_pager_put will call the object's 689 * "make put cluster" function to build a cluster on our behalf. 690 * 691 * we pass the PGO_PDFREECLUST flag to uvm_pager_put to instruct 692 * it to free the cluster pages for us on a successful I/O (it 693 * always does this for un-successful I/O requests). this 694 * allows us to do clustered pageout without having to deal 695 * with cluster pages at this level. 696 * 697 * note locking semantics of uvm_pager_put with PGO_PDFREECLUST: 698 * IN: locked: page queues 699 * OUT: locked: 700 * !locked: pageqs 701 */ 702 703 uvmexp.pdpageouts++; 704 result = uvm_pager_put(swap_backed ? NULL : uobj, p, 705 &ppsp, &npages, PGO_ALLPAGES|PGO_PDFREECLUST, start, 0); 706 707 /* 708 * if we did i/o to swap, zero swslot to indicate that we are 709 * no longer building a swap-backed cluster. 710 */ 711 712 if (swap_backed) 713 swslot = 0; /* done with this cluster */ 714 715 /* 716 * first, we check for VM_PAGER_PEND which means that the 717 * async I/O is in progress and the async I/O done routine 718 * will clean up after us. in this case we move on to the 719 * next page. 720 * 721 * there is a very remote chance that the pending async i/o can 722 * finish _before_ we get here. if that happens, our page "p" 723 * may no longer be on the inactive queue. so we verify this 724 * when determining the next page (starting over at the head if 725 * we've lost our inactive page). 726 */ 727 728 if (result == VM_PAGER_PEND) { 729 uvmexp.paging += npages; 730 uvm_lock_pageq(); 731 uvmexp.pdpending++; 732 if (p) { 733 if (p->pg_flags & PQ_INACTIVE) 734 nextpg = TAILQ_NEXT(p, pageq); 735 else 736 nextpg = TAILQ_FIRST(pglst); 737 } else { 738 nextpg = NULL; 739 } 740 continue; 741 } 742 743 /* clean up "p" if we have one */ 744 if (p) { 745 /* 746 * the I/O request to "p" is done and uvm_pager_put 747 * has freed any cluster pages it may have allocated 748 * during I/O. all that is left for us to do is 749 * clean up page "p" (which is still PG_BUSY). 750 * 751 * our result could be one of the following: 752 * VM_PAGER_OK: successful pageout 753 * 754 * VM_PAGER_AGAIN: tmp resource shortage, we skip 755 * to next page 756 * VM_PAGER_{FAIL,ERROR,BAD}: an error. we 757 * "reactivate" page to get it out of the way (it 758 * will eventually drift back into the inactive 759 * queue for a retry). 760 * VM_PAGER_UNLOCK: should never see this as it is 761 * only valid for "get" operations 762 */ 763 764 /* relock p's object: page queues not lock yet, so 765 * no need for "try" */ 766 767 /* !swap_backed case: already locked... */ 768 if (swap_backed) { 769 rw_enter(slock, RW_WRITE); 770 } 771 772#ifdef DIAGNOSTIC 773 if (result == VM_PAGER_UNLOCK) 774 panic("pagedaemon: pageout returned " 775 "invalid 'unlock' code"); 776#endif 777 778 /* handle PG_WANTED now */ 779 if (p->pg_flags & PG_WANTED) 780 wakeup(p); 781 782 atomic_clearbits_int(&p->pg_flags, PG_BUSY|PG_WANTED); 783 UVM_PAGE_OWN(p, NULL); 784 785 /* released during I/O? Can only happen for anons */ 786 if (p->pg_flags & PG_RELEASED) { 787 KASSERT(anon != NULL); 788 /* 789 * remove page so we can get nextpg, 790 * also zero out anon so we don't use 791 * it after the free. 792 */ 793 anon->an_page = NULL; 794 p->uanon = NULL; 795 796 rw_exit(anon->an_lock); 797 uvm_anfree(anon); /* kills anon */ 798 pmap_page_protect(p, PROT_NONE); 799 anon = NULL; 800 uvm_lock_pageq(); 801 nextpg = TAILQ_NEXT(p, pageq); 802 /* free released page */ 803 uvm_pagefree(p); 804 } else { /* page was not released during I/O */ 805 uvm_lock_pageq(); 806 nextpg = TAILQ_NEXT(p, pageq); 807 if (result != VM_PAGER_OK) { 808 /* pageout was a failure... */ 809 if (result != VM_PAGER_AGAIN) 810 uvm_pageactivate(p); 811 pmap_clear_reference(p); 812 /* XXXCDC: if (swap_backed) FREE p's 813 * swap block? */ 814 } else { 815 /* pageout was a success... */ 816 pmap_clear_reference(p); 817 pmap_clear_modify(p); 818 atomic_setbits_int(&p->pg_flags, 819 PG_CLEAN); 820 } 821 } 822 823 /* 824 * drop object lock (if there is an object left). do 825 * a safety check of nextpg to make sure it is on the 826 * inactive queue (it should be since PG_BUSY pages on 827 * the inactive queue can't be re-queued [note: not 828 * true for active queue]). 829 */ 830 rw_exit(slock); 831 832 if (nextpg && (nextpg->pg_flags & PQ_INACTIVE) == 0) { 833 nextpg = TAILQ_FIRST(pglst); /* reload! */ 834 } 835 } else { 836 /* 837 * if p is null in this loop, make sure it stays null 838 * in the next loop. 839 */ 840 nextpg = NULL; 841 842 /* 843 * lock page queues here just so they're always locked 844 * at the end of the loop. 845 */ 846 uvm_lock_pageq(); 847 } 848 } 849} 850 851/* 852 * uvmpd_scan: scan the page queues and attempt to meet our targets. 853 * 854 * => called with pageq's locked 855 */ 856 857void 858uvmpd_scan(struct uvm_pmalloc *pma, struct uvm_constraint_range *constraint) 859{ 860 int free, inactive_shortage, swap_shortage, pages_freed; 861 struct vm_page *p, *nextpg; 862 struct rwlock *slock; 863 paddr_t paddr; 864 865 MUTEX_ASSERT_LOCKED(&uvm.pageqlock); 866 867 uvmexp.pdrevs++; /* counter */ 868 869 /* 870 * get current "free" page count 871 */ 872 free = uvmexp.free - BUFPAGES_DEFICIT; 873 874#ifdef __HAVE_PMAP_COLLECT 875 /* 876 * swap out some processes if we are below our free target. 877 * we need to unlock the page queues for this. 878 */ 879 if (free < uvmexp.freetarg) { 880 uvmexp.pdswout++; 881 uvm_unlock_pageq(); 882 uvm_swapout_threads(); 883 uvm_lock_pageq(); 884 } 885#endif 886 887 /* 888 * now we want to work on meeting our targets. first we work on our 889 * free target by converting inactive pages into free pages. then 890 * we work on meeting our inactive target by converting active pages 891 * to inactive ones. 892 */ 893 894 pages_freed = uvmexp.pdfreed; 895 (void) uvmpd_scan_inactive(pma, constraint, &uvm.page_inactive); 896 pages_freed = uvmexp.pdfreed - pages_freed; 897 898 /* 899 * we have done the scan to get free pages. now we work on meeting 900 * our inactive target. 901 */ 902 inactive_shortage = uvmexp.inactarg - uvmexp.inactive - BUFPAGES_INACT; 903 904 /* 905 * detect if we're not going to be able to page anything out 906 * until we free some swap resources from active pages. 907 */ 908 free = uvmexp.free - BUFPAGES_DEFICIT; 909 swap_shortage = 0; 910 if (free < uvmexp.freetarg && uvm_swapisfilled() && !uvm_swapisfull() && 911 pages_freed == 0) { 912 swap_shortage = uvmexp.freetarg - free; 913 } 914 915 for (p = TAILQ_FIRST(&uvm.page_active); 916 p != NULL && (inactive_shortage > 0 || swap_shortage > 0); 917 p = nextpg) { 918 nextpg = TAILQ_NEXT(p, pageq); 919 if (p->pg_flags & PG_BUSY) { 920 continue; 921 } 922 923 /* 924 * skip this page if it doesn't match the constraint. 925 */ 926 paddr = atop(VM_PAGE_TO_PHYS(p)); 927 if (paddr < constraint->ucr_low && 928 paddr >= constraint->ucr_high) 929 continue; 930 931 /* 932 * lock the page's owner. 933 */ 934 slock = uvmpd_trylockowner(p); 935 if (slock == NULL) { 936 continue; 937 } 938 939 /* 940 * skip this page if it's busy. 941 */ 942 if ((p->pg_flags & PG_BUSY) != 0) { 943 rw_exit(slock); 944 continue; 945 } 946 947 /* 948 * if there's a shortage of swap, free any swap allocated 949 * to this page so that other pages can be paged out. 950 */ 951 if (swap_shortage > 0) { 952 if (uvmpd_dropswap(p)) { 953 atomic_clearbits_int(&p->pg_flags, PG_CLEAN); 954 swap_shortage--; 955 } 956 } 957 958 /* 959 * deactivate this page if there's a shortage of 960 * inactive pages. 961 */ 962 if (inactive_shortage > 0) { 963 pmap_page_protect(p, PROT_NONE); 964 /* no need to check wire_count as pg is "active" */ 965 uvm_pagedeactivate(p); 966 uvmexp.pddeact++; 967 inactive_shortage--; 968 } 969 970 /* 971 * we're done with this page. 972 */ 973 rw_exit(slock); 974 } 975} 976 977#ifdef HIBERNATE 978 979/* 980 * uvmpd_drop: drop clean pages from list 981 */ 982void 983uvmpd_drop(struct pglist *pglst) 984{ 985 struct vm_page *p, *nextpg; 986 987 for (p = TAILQ_FIRST(pglst); p != NULL; p = nextpg) { 988 nextpg = TAILQ_NEXT(p, pageq); 989 990 if (p->pg_flags & PQ_ANON || p->uobject == NULL) 991 continue; 992 993 if (p->pg_flags & PG_BUSY) 994 continue; 995 996 if (p->pg_flags & PG_CLEAN) { 997 struct uvm_object * uobj = p->uobject; 998 999 rw_enter(uobj->vmobjlock, RW_WRITE); 1000 uvm_lock_pageq(); 1001 /* 1002 * we now have the page queues locked. 1003 * the page is not busy. if the page is clean we 1004 * can free it now and continue. 1005 */ 1006 if (p->pg_flags & PG_CLEAN) { 1007 if (p->pg_flags & PQ_SWAPBACKED) { 1008 /* this page now lives only in swap */ 1009 atomic_inc_int(&uvmexp.swpgonly); 1010 } 1011 1012 /* zap all mappings with pmap_page_protect... */ 1013 pmap_page_protect(p, PROT_NONE); 1014 uvm_pagefree(p); 1015 } 1016 uvm_unlock_pageq(); 1017 rw_exit(uobj->vmobjlock); 1018 } 1019 } 1020} 1021 1022void 1023uvmpd_hibernate(void) 1024{ 1025 uvmpd_drop(&uvm.page_inactive); 1026 uvmpd_drop(&uvm.page_active); 1027} 1028 1029#endif 1030