1/*	$OpenBSD: uvm_pdaemon.c,v 1.114 2024/05/01 12:54:27 mpi Exp $	*/
2/*	$NetBSD: uvm_pdaemon.c,v 1.23 2000/08/20 10:24:14 bjh21 Exp $	*/
3
4/*
5 * Copyright (c) 1997 Charles D. Cranor and Washington University.
6 * Copyright (c) 1991, 1993, The Regents of the University of California.
7 *
8 * All rights reserved.
9 *
10 * This code is derived from software contributed to Berkeley by
11 * The Mach Operating System project at Carnegie-Mellon University.
12 *
13 * Redistribution and use in source and binary forms, with or without
14 * modification, are permitted provided that the following conditions
15 * are met:
16 * 1. Redistributions of source code must retain the above copyright
17 *    notice, this list of conditions and the following disclaimer.
18 * 2. Redistributions in binary form must reproduce the above copyright
19 *    notice, this list of conditions and the following disclaimer in the
20 *    documentation and/or other materials provided with the distribution.
21 * 3. Neither the name of the University nor the names of its contributors
22 *    may be used to endorse or promote products derived from this software
23 *    without specific prior written permission.
24 *
25 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
26 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
27 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
28 * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
29 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
30 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
31 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
32 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
33 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
34 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
35 * SUCH DAMAGE.
36 *
37 *	@(#)vm_pageout.c        8.5 (Berkeley) 2/14/94
38 * from: Id: uvm_pdaemon.c,v 1.1.2.32 1998/02/06 05:26:30 chs Exp
39 *
40 *
41 * Copyright (c) 1987, 1990 Carnegie-Mellon University.
42 * All rights reserved.
43 *
44 * Permission to use, copy, modify and distribute this software and
45 * its documentation is hereby granted, provided that both the copyright
46 * notice and this permission notice appear in all copies of the
47 * software, derivative works or modified versions, and any portions
48 * thereof, and that both notices appear in supporting documentation.
49 *
50 * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
51 * CONDITION.  CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND
52 * FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
53 *
54 * Carnegie Mellon requests users of this software to return to
55 *
56 *  Software Distribution Coordinator  or  Software.Distribution@CS.CMU.EDU
57 *  School of Computer Science
58 *  Carnegie Mellon University
59 *  Pittsburgh PA 15213-3890
60 *
61 * any improvements or extensions that they make and grant Carnegie the
62 * rights to redistribute these changes.
63 */
64
65/*
66 * uvm_pdaemon.c: the page daemon
67 */
68
69#include <sys/param.h>
70#include <sys/systm.h>
71#include <sys/kernel.h>
72#include <sys/pool.h>
73#include <sys/proc.h>
74#include <sys/buf.h>
75#include <sys/mount.h>
76#include <sys/atomic.h>
77
78#ifdef HIBERNATE
79#include <sys/hibernate.h>
80#endif
81
82#include <uvm/uvm.h>
83
84#include "drm.h"
85
86#if NDRM > 0
87extern void drmbackoff(long);
88#endif
89
90/*
91 * UVMPD_NUMDIRTYREACTS is how many dirty pages the pagedaemon will reactivate
92 * in a pass thru the inactive list when swap is full.  the value should be
93 * "small"... if it's too large we'll cycle the active pages thru the inactive
94 * queue too quickly to for them to be referenced and avoid being freed.
95 */
96
97#define UVMPD_NUMDIRTYREACTS 16
98
99
100/*
101 * local prototypes
102 */
103
104struct rwlock	*uvmpd_trylockowner(struct vm_page *);
105void		uvmpd_scan(struct uvm_pmalloc *, struct uvm_constraint_range *);
106void		uvmpd_scan_inactive(struct uvm_pmalloc *,
107		    struct uvm_constraint_range *, struct pglist *);
108void		uvmpd_tune(void);
109void		uvmpd_drop(struct pglist *);
110int		uvmpd_dropswap(struct vm_page *);
111
112/*
113 * uvm_wait: wait (sleep) for the page daemon to free some pages
114 *
115 * => should be called with all locks released
116 * => should _not_ be called by the page daemon (to avoid deadlock)
117 */
118
119void
120uvm_wait(const char *wmsg)
121{
122	uint64_t timo = INFSLP;
123
124#ifdef DIAGNOSTIC
125	if (curproc == &proc0)
126		panic("%s: cannot sleep for memory during boot", __func__);
127#endif
128
129	/*
130	 * check for page daemon going to sleep (waiting for itself)
131	 */
132	if (curproc == uvm.pagedaemon_proc) {
133		printf("uvm_wait emergency bufbackoff\n");
134		if (bufbackoff(NULL, 4) == 0)
135			return;
136		/*
137		 * now we have a problem: the pagedaemon wants to go to
138		 * sleep until it frees more memory.   but how can it
139		 * free more memory if it is asleep?  that is a deadlock.
140		 * we have two options:
141		 *  [1] panic now
142		 *  [2] put a timeout on the sleep, thus causing the
143		 *      pagedaemon to only pause (rather than sleep forever)
144		 *
145		 * note that option [2] will only help us if we get lucky
146		 * and some other process on the system breaks the deadlock
147		 * by exiting or freeing memory (thus allowing the pagedaemon
148		 * to continue).  for now we panic if DEBUG is defined,
149		 * otherwise we hope for the best with option [2] (better
150		 * yet, this should never happen in the first place!).
151		 */
152
153		printf("pagedaemon: deadlock detected!\n");
154		timo = MSEC_TO_NSEC(125);	/* set timeout */
155#if defined(DEBUG)
156		/* DEBUG: panic so we can debug it */
157		panic("pagedaemon deadlock");
158#endif
159	}
160
161	uvm_lock_fpageq();
162	wakeup(&uvm.pagedaemon);		/* wake the daemon! */
163	msleep_nsec(&uvmexp.free, &uvm.fpageqlock, PVM | PNORELOCK, wmsg, timo);
164}
165
166/*
167 * uvmpd_tune: tune paging parameters
168 */
169void
170uvmpd_tune(void)
171{
172	int val;
173
174	val = uvmexp.npages / 30;
175
176	/* XXX:  what are these values good for? */
177	val = max(val, (16*1024) >> PAGE_SHIFT);
178
179	/* Make sure there's always a user page free. */
180	if (val < uvmexp.reserve_kernel + 1)
181		val = uvmexp.reserve_kernel + 1;
182	uvmexp.freemin = val;
183
184	/* Calculate free target. */
185	val = (uvmexp.freemin * 4) / 3;
186	if (val <= uvmexp.freemin)
187		val = uvmexp.freemin + 1;
188	uvmexp.freetarg = val;
189
190	uvmexp.wiredmax = uvmexp.npages / 3;
191}
192
193/*
194 * Indicate to the page daemon that a nowait call failed and it should
195 * recover at least some memory in the most restricted region (assumed
196 * to be dma_constraint).
197 */
198volatile int uvm_nowait_failed;
199
200/*
201 * uvm_pageout: the main loop for the pagedaemon
202 */
203void
204uvm_pageout(void *arg)
205{
206	struct uvm_constraint_range constraint;
207	struct uvm_pmalloc *pma;
208	int free;
209
210	/* ensure correct priority and set paging parameters... */
211	uvm.pagedaemon_proc = curproc;
212	(void) spl0();
213	uvmpd_tune();
214
215	for (;;) {
216		long size;
217
218		uvm_lock_fpageq();
219		if (!uvm_nowait_failed && TAILQ_EMPTY(&uvm.pmr_control.allocs)) {
220			msleep_nsec(&uvm.pagedaemon, &uvm.fpageqlock, PVM,
221			    "pgdaemon", INFSLP);
222			uvmexp.pdwoke++;
223		}
224
225		if ((pma = TAILQ_FIRST(&uvm.pmr_control.allocs)) != NULL) {
226			pma->pm_flags |= UVM_PMA_BUSY;
227			constraint = pma->pm_constraint;
228		} else {
229			if (uvm_nowait_failed) {
230				/*
231				 * XXX realistically, this is what our
232				 * nowait callers probably care about
233				 */
234				constraint = dma_constraint;
235				uvm_nowait_failed = 0;
236			} else
237				constraint = no_constraint;
238		}
239		free = uvmexp.free - BUFPAGES_DEFICIT;
240		uvm_unlock_fpageq();
241
242		/*
243		 * now lock page queues and recompute inactive count
244		 */
245		uvm_lock_pageq();
246		uvmexp.inactarg = (uvmexp.active + uvmexp.inactive) / 3;
247		if (uvmexp.inactarg <= uvmexp.freetarg) {
248			uvmexp.inactarg = uvmexp.freetarg + 1;
249		}
250		uvm_unlock_pageq();
251
252		/* Reclaim pages from the buffer cache if possible. */
253		size = 0;
254		if (pma != NULL)
255			size += pma->pm_size >> PAGE_SHIFT;
256		if (free < uvmexp.freetarg)
257			size += uvmexp.freetarg - free;
258		if (size == 0)
259			size = 16; /* XXX */
260
261		(void) bufbackoff(&constraint, size * 2);
262#if NDRM > 0
263		drmbackoff(size * 2);
264#endif
265		uvm_pmr_cache_drain();
266
267		/*
268		 * scan if needed
269		 */
270		uvm_lock_pageq();
271		free = uvmexp.free - BUFPAGES_DEFICIT;
272		if (pma != NULL || (free < uvmexp.freetarg) ||
273		    ((uvmexp.inactive + BUFPAGES_INACT) < uvmexp.inactarg)) {
274			uvmpd_scan(pma, &constraint);
275		}
276
277		/*
278		 * if there's any free memory to be had,
279		 * wake up any waiters.
280		 */
281		uvm_lock_fpageq();
282		if (uvmexp.free > uvmexp.reserve_kernel ||
283		    uvmexp.paging == 0) {
284			wakeup(&uvmexp.free);
285		}
286
287		if (pma != NULL) {
288			/*
289			 * XXX If UVM_PMA_FREED isn't set, no pages
290			 * were freed.  Should we set UVM_PMA_FAIL in
291			 * that case?
292			 */
293			pma->pm_flags &= ~UVM_PMA_BUSY;
294			if (pma->pm_flags & UVM_PMA_FREED) {
295				pma->pm_flags &= ~UVM_PMA_LINKED;
296				TAILQ_REMOVE(&uvm.pmr_control.allocs, pma,
297				    pmq);
298				wakeup(pma);
299			}
300		}
301		uvm_unlock_fpageq();
302
303		/*
304		 * scan done.  unlock page queues (the only lock we are holding)
305		 */
306		uvm_unlock_pageq();
307
308		sched_pause(yield);
309	}
310	/*NOTREACHED*/
311}
312
313
314/*
315 * uvm_aiodone_daemon:  main loop for the aiodone daemon.
316 */
317void
318uvm_aiodone_daemon(void *arg)
319{
320	int s, free;
321	struct buf *bp, *nbp;
322
323	uvm.aiodoned_proc = curproc;
324
325	for (;;) {
326		/*
327		 * Check for done aio structures. If we've got structures to
328		 * process, do so. Otherwise sleep while avoiding races.
329		 */
330		mtx_enter(&uvm.aiodoned_lock);
331		while ((bp = TAILQ_FIRST(&uvm.aio_done)) == NULL)
332			msleep_nsec(&uvm.aiodoned, &uvm.aiodoned_lock,
333			    PVM, "aiodoned", INFSLP);
334		/* Take the list for ourselves. */
335		TAILQ_INIT(&uvm.aio_done);
336		mtx_leave(&uvm.aiodoned_lock);
337
338		/* process each i/o that's done. */
339		free = uvmexp.free;
340		while (bp != NULL) {
341			if (bp->b_flags & B_PDAEMON) {
342				uvmexp.paging -= bp->b_bufsize >> PAGE_SHIFT;
343			}
344			nbp = TAILQ_NEXT(bp, b_freelist);
345			s = splbio();	/* b_iodone must by called at splbio */
346			(*bp->b_iodone)(bp);
347			splx(s);
348			bp = nbp;
349
350			sched_pause(yield);
351		}
352		uvm_lock_fpageq();
353		wakeup(free <= uvmexp.reserve_kernel ? &uvm.pagedaemon :
354		    &uvmexp.free);
355		uvm_unlock_fpageq();
356	}
357}
358
359/*
360 * uvmpd_trylockowner: trylock the page's owner.
361 *
362 * => return the locked rwlock on success.  otherwise, return NULL.
363 */
364struct rwlock *
365uvmpd_trylockowner(struct vm_page *pg)
366{
367
368	struct uvm_object *uobj = pg->uobject;
369	struct rwlock *slock;
370
371	if (uobj != NULL) {
372		slock = uobj->vmobjlock;
373	} else {
374		struct vm_anon *anon = pg->uanon;
375
376		KASSERT(anon != NULL);
377		slock = anon->an_lock;
378	}
379
380	if (rw_enter(slock, RW_WRITE|RW_NOSLEEP)) {
381		return NULL;
382	}
383
384	return slock;
385}
386
387/*
388 * uvmpd_dropswap: free any swap allocated to this page.
389 *
390 * => called with owner locked.
391 * => return 1 if a page had an associated slot.
392 */
393int
394uvmpd_dropswap(struct vm_page *pg)
395{
396	struct vm_anon *anon = pg->uanon;
397	int slot, result = 0;
398
399	if ((pg->pg_flags & PQ_ANON) && anon->an_swslot) {
400		uvm_swap_free(anon->an_swslot, 1);
401		anon->an_swslot = 0;
402		result = 1;
403	} else if (pg->pg_flags & PQ_AOBJ) {
404		slot = uao_dropswap(pg->uobject, pg->offset >> PAGE_SHIFT);
405		if (slot)
406			result = 1;
407	}
408
409	return result;
410}
411
412/*
413 * uvmpd_scan_inactive: scan an inactive list for pages to clean or free.
414 *
415 * => called with page queues locked
416 * => we work on meeting our free target by converting inactive pages
417 *    into free pages.
418 * => we handle the building of swap-backed clusters
419 * => we return TRUE if we are exiting because we met our target
420 */
421void
422uvmpd_scan_inactive(struct uvm_pmalloc *pma,
423    struct uvm_constraint_range *constraint, struct pglist *pglst)
424{
425	int free, result;
426	struct vm_page *p, *nextpg;
427	struct uvm_object *uobj;
428	struct vm_page *pps[SWCLUSTPAGES], **ppsp;
429	int npages;
430	struct vm_page *swpps[SWCLUSTPAGES]; 	/* XXX: see below */
431	struct rwlock *slock;
432	int swnpages, swcpages;				/* XXX: see below */
433	int swslot;
434	struct vm_anon *anon;
435	boolean_t swap_backed;
436	vaddr_t start;
437	int dirtyreacts;
438	paddr_t paddr;
439
440	/*
441	 * swslot is non-zero if we are building a swap cluster.  we want
442	 * to stay in the loop while we have a page to scan or we have
443	 * a swap-cluster to build.
444	 */
445	swslot = 0;
446	swnpages = swcpages = 0;
447	dirtyreacts = 0;
448	p = NULL;
449
450	/* Start with the first page on the list that fit in `constraint' */
451	TAILQ_FOREACH(p, pglst, pageq) {
452		paddr = atop(VM_PAGE_TO_PHYS(p));
453		if (paddr >= constraint->ucr_low &&
454		    paddr < constraint->ucr_high)
455			break;
456	}
457
458	for (; p != NULL || swslot != 0; p = nextpg) {
459		/*
460		 * note that p can be NULL iff we have traversed the whole
461		 * list and need to do one final swap-backed clustered pageout.
462		 */
463		uobj = NULL;
464		anon = NULL;
465		if (p) {
466			/*
467			 * see if we've met our target
468			 */
469			free = uvmexp.free - BUFPAGES_DEFICIT;
470			if (((pma == NULL || (pma->pm_flags & UVM_PMA_FREED)) &&
471			    (free + uvmexp.paging >= uvmexp.freetarg << 2)) ||
472			    dirtyreacts == UVMPD_NUMDIRTYREACTS) {
473				if (swslot == 0) {
474					/* exit now if no swap-i/o pending */
475					break;
476				}
477
478				/* set p to null to signal final swap i/o */
479				p = NULL;
480				nextpg = NULL;
481			}
482		}
483		if (p) {	/* if (we have a new page to consider) */
484			/*
485			 * we are below target and have a new page to consider.
486			 */
487			uvmexp.pdscans++;
488			nextpg = TAILQ_NEXT(p, pageq);
489
490			anon = p->uanon;
491			uobj = p->uobject;
492
493			/*
494			 * first we attempt to lock the object that this page
495			 * belongs to.  if our attempt fails we skip on to
496			 * the next page (no harm done).  it is important to
497			 * "try" locking the object as we are locking in the
498			 * wrong order (pageq -> object) and we don't want to
499			 * deadlock.
500			 */
501			slock = uvmpd_trylockowner(p);
502			if (slock == NULL) {
503				continue;
504			}
505
506			/*
507			 * move referenced pages back to active queue
508			 * and skip to next page.
509			 */
510			if (pmap_is_referenced(p)) {
511				uvm_pageactivate(p);
512				rw_exit(slock);
513				uvmexp.pdreact++;
514				continue;
515			}
516
517			if (p->pg_flags & PG_BUSY) {
518				rw_exit(slock);
519				uvmexp.pdbusy++;
520				continue;
521			}
522
523			/* does the page belong to an object? */
524			if (uobj != NULL) {
525				uvmexp.pdobscan++;
526			} else {
527				KASSERT(anon != NULL);
528				uvmexp.pdanscan++;
529			}
530
531			/*
532			 * we now have the page queues locked.
533			 * the page is not busy.   if the page is clean we
534			 * can free it now and continue.
535			 */
536			if (p->pg_flags & PG_CLEAN) {
537				if (p->pg_flags & PQ_SWAPBACKED) {
538					/* this page now lives only in swap */
539					atomic_inc_int(&uvmexp.swpgonly);
540				}
541
542				/* zap all mappings with pmap_page_protect... */
543				pmap_page_protect(p, PROT_NONE);
544				uvm_pagefree(p);
545				uvmexp.pdfreed++;
546
547				if (anon) {
548
549					/*
550					 * an anonymous page can only be clean
551					 * if it has backing store assigned.
552					 */
553
554					KASSERT(anon->an_swslot != 0);
555
556					/* remove from object */
557					anon->an_page = NULL;
558				}
559				rw_exit(slock);
560				continue;
561			}
562
563			/*
564			 * this page is dirty, skip it if we'll have met our
565			 * free target when all the current pageouts complete.
566			 */
567			if ((pma == NULL || (pma->pm_flags & UVM_PMA_FREED)) &&
568			    (free + uvmexp.paging > uvmexp.freetarg << 2)) {
569				rw_exit(slock);
570				continue;
571			}
572
573			/*
574			 * this page is dirty, but we can't page it out
575			 * since all pages in swap are only in swap.
576			 * reactivate it so that we eventually cycle
577			 * all pages thru the inactive queue.
578			 */
579			if ((p->pg_flags & PQ_SWAPBACKED) && uvm_swapisfull()) {
580				dirtyreacts++;
581				uvm_pageactivate(p);
582				rw_exit(slock);
583				continue;
584			}
585
586			/*
587			 * if the page is swap-backed and dirty and swap space
588			 * is full, free any swap allocated to the page
589			 * so that other pages can be paged out.
590			 */
591			if ((p->pg_flags & PQ_SWAPBACKED) && uvm_swapisfilled())
592				uvmpd_dropswap(p);
593
594			/*
595			 * the page we are looking at is dirty.   we must
596			 * clean it before it can be freed.  to do this we
597			 * first mark the page busy so that no one else will
598			 * touch the page.   we write protect all the mappings
599			 * of the page so that no one touches it while it is
600			 * in I/O.
601			 */
602
603			swap_backed = ((p->pg_flags & PQ_SWAPBACKED) != 0);
604			atomic_setbits_int(&p->pg_flags, PG_BUSY);
605			UVM_PAGE_OWN(p, "scan_inactive");
606			pmap_page_protect(p, PROT_READ);
607			uvmexp.pgswapout++;
608
609			/*
610			 * for swap-backed pages we need to (re)allocate
611			 * swap space.
612			 */
613			if (swap_backed) {
614				/* free old swap slot (if any) */
615				uvmpd_dropswap(p);
616
617				/* start new cluster (if necessary) */
618				if (swslot == 0) {
619					swnpages = SWCLUSTPAGES;
620					swslot = uvm_swap_alloc(&swnpages,
621					    TRUE);
622					if (swslot == 0) {
623						/* no swap?  give up! */
624						atomic_clearbits_int(
625						    &p->pg_flags,
626						    PG_BUSY);
627						UVM_PAGE_OWN(p, NULL);
628						rw_exit(slock);
629						continue;
630					}
631					swcpages = 0;	/* cluster is empty */
632				}
633
634				/* add block to cluster */
635				swpps[swcpages] = p;
636				if (anon)
637					anon->an_swslot = swslot + swcpages;
638				else
639					uao_set_swslot(uobj,
640					    p->offset >> PAGE_SHIFT,
641					    swslot + swcpages);
642				swcpages++;
643				rw_exit(slock);
644
645				/* cluster not full yet? */
646				if (swcpages < swnpages)
647					continue;
648			}
649		} else {
650			/* if p == NULL we must be doing a last swap i/o */
651			swap_backed = TRUE;
652		}
653
654		/*
655		 * now consider doing the pageout.
656		 *
657		 * for swap-backed pages, we do the pageout if we have either
658		 * filled the cluster (in which case (swnpages == swcpages) or
659		 * run out of pages (p == NULL).
660		 *
661		 * for object pages, we always do the pageout.
662		 */
663		if (swap_backed) {
664			/* starting I/O now... set up for it */
665			npages = swcpages;
666			ppsp = swpps;
667			/* for swap-backed pages only */
668			start = (vaddr_t) swslot;
669
670			/* if this is final pageout we could have a few
671			 * extra swap blocks */
672			if (swcpages < swnpages) {
673				uvm_swap_free(swslot + swcpages,
674				    (swnpages - swcpages));
675			}
676		} else {
677			/* normal object pageout */
678			ppsp = pps;
679			npages = sizeof(pps) / sizeof(struct vm_page *);
680			/* not looked at because PGO_ALLPAGES is set */
681			start = 0;
682		}
683
684		/*
685		 * now do the pageout.
686		 *
687		 * for swap_backed pages we have already built the cluster.
688		 * for !swap_backed pages, uvm_pager_put will call the object's
689		 * "make put cluster" function to build a cluster on our behalf.
690		 *
691		 * we pass the PGO_PDFREECLUST flag to uvm_pager_put to instruct
692		 * it to free the cluster pages for us on a successful I/O (it
693		 * always does this for un-successful I/O requests).  this
694		 * allows us to do clustered pageout without having to deal
695		 * with cluster pages at this level.
696		 *
697		 * note locking semantics of uvm_pager_put with PGO_PDFREECLUST:
698		 *  IN: locked: page queues
699		 * OUT: locked:
700		 *     !locked: pageqs
701		 */
702
703		uvmexp.pdpageouts++;
704		result = uvm_pager_put(swap_backed ? NULL : uobj, p,
705		    &ppsp, &npages, PGO_ALLPAGES|PGO_PDFREECLUST, start, 0);
706
707		/*
708		 * if we did i/o to swap, zero swslot to indicate that we are
709		 * no longer building a swap-backed cluster.
710		 */
711
712		if (swap_backed)
713			swslot = 0;		/* done with this cluster */
714
715		/*
716		 * first, we check for VM_PAGER_PEND which means that the
717		 * async I/O is in progress and the async I/O done routine
718		 * will clean up after us.   in this case we move on to the
719		 * next page.
720		 *
721		 * there is a very remote chance that the pending async i/o can
722		 * finish _before_ we get here.   if that happens, our page "p"
723		 * may no longer be on the inactive queue.   so we verify this
724		 * when determining the next page (starting over at the head if
725		 * we've lost our inactive page).
726		 */
727
728		if (result == VM_PAGER_PEND) {
729			uvmexp.paging += npages;
730			uvm_lock_pageq();
731			uvmexp.pdpending++;
732			if (p) {
733				if (p->pg_flags & PQ_INACTIVE)
734					nextpg = TAILQ_NEXT(p, pageq);
735				else
736					nextpg = TAILQ_FIRST(pglst);
737			} else {
738				nextpg = NULL;
739			}
740			continue;
741		}
742
743		/* clean up "p" if we have one */
744		if (p) {
745			/*
746			 * the I/O request to "p" is done and uvm_pager_put
747			 * has freed any cluster pages it may have allocated
748			 * during I/O.  all that is left for us to do is
749			 * clean up page "p" (which is still PG_BUSY).
750			 *
751			 * our result could be one of the following:
752			 *   VM_PAGER_OK: successful pageout
753			 *
754			 *   VM_PAGER_AGAIN: tmp resource shortage, we skip
755			 *     to next page
756			 *   VM_PAGER_{FAIL,ERROR,BAD}: an error.   we
757			 *     "reactivate" page to get it out of the way (it
758			 *     will eventually drift back into the inactive
759			 *     queue for a retry).
760			 *   VM_PAGER_UNLOCK: should never see this as it is
761			 *     only valid for "get" operations
762			 */
763
764			/* relock p's object: page queues not lock yet, so
765			 * no need for "try" */
766
767			/* !swap_backed case: already locked... */
768			if (swap_backed) {
769				rw_enter(slock, RW_WRITE);
770			}
771
772#ifdef DIAGNOSTIC
773			if (result == VM_PAGER_UNLOCK)
774				panic("pagedaemon: pageout returned "
775				    "invalid 'unlock' code");
776#endif
777
778			/* handle PG_WANTED now */
779			if (p->pg_flags & PG_WANTED)
780				wakeup(p);
781
782			atomic_clearbits_int(&p->pg_flags, PG_BUSY|PG_WANTED);
783			UVM_PAGE_OWN(p, NULL);
784
785			/* released during I/O? Can only happen for anons */
786			if (p->pg_flags & PG_RELEASED) {
787				KASSERT(anon != NULL);
788				/*
789				 * remove page so we can get nextpg,
790				 * also zero out anon so we don't use
791				 * it after the free.
792				 */
793				anon->an_page = NULL;
794				p->uanon = NULL;
795
796				rw_exit(anon->an_lock);
797				uvm_anfree(anon);	/* kills anon */
798				pmap_page_protect(p, PROT_NONE);
799				anon = NULL;
800				uvm_lock_pageq();
801				nextpg = TAILQ_NEXT(p, pageq);
802				/* free released page */
803				uvm_pagefree(p);
804			} else {	/* page was not released during I/O */
805				uvm_lock_pageq();
806				nextpg = TAILQ_NEXT(p, pageq);
807				if (result != VM_PAGER_OK) {
808					/* pageout was a failure... */
809					if (result != VM_PAGER_AGAIN)
810						uvm_pageactivate(p);
811					pmap_clear_reference(p);
812					/* XXXCDC: if (swap_backed) FREE p's
813					 * swap block? */
814				} else {
815					/* pageout was a success... */
816					pmap_clear_reference(p);
817					pmap_clear_modify(p);
818					atomic_setbits_int(&p->pg_flags,
819					    PG_CLEAN);
820				}
821			}
822
823			/*
824			 * drop object lock (if there is an object left).   do
825			 * a safety check of nextpg to make sure it is on the
826			 * inactive queue (it should be since PG_BUSY pages on
827			 * the inactive queue can't be re-queued [note: not
828			 * true for active queue]).
829			 */
830			rw_exit(slock);
831
832			if (nextpg && (nextpg->pg_flags & PQ_INACTIVE) == 0) {
833				nextpg = TAILQ_FIRST(pglst);	/* reload! */
834			}
835		} else {
836			/*
837			 * if p is null in this loop, make sure it stays null
838			 * in the next loop.
839			 */
840			nextpg = NULL;
841
842			/*
843			 * lock page queues here just so they're always locked
844			 * at the end of the loop.
845			 */
846			uvm_lock_pageq();
847		}
848	}
849}
850
851/*
852 * uvmpd_scan: scan the page queues and attempt to meet our targets.
853 *
854 * => called with pageq's locked
855 */
856
857void
858uvmpd_scan(struct uvm_pmalloc *pma, struct uvm_constraint_range *constraint)
859{
860	int free, inactive_shortage, swap_shortage, pages_freed;
861	struct vm_page *p, *nextpg;
862	struct rwlock *slock;
863	paddr_t paddr;
864
865	MUTEX_ASSERT_LOCKED(&uvm.pageqlock);
866
867	uvmexp.pdrevs++;		/* counter */
868
869	/*
870	 * get current "free" page count
871	 */
872	free = uvmexp.free - BUFPAGES_DEFICIT;
873
874#ifdef __HAVE_PMAP_COLLECT
875	/*
876	 * swap out some processes if we are below our free target.
877	 * we need to unlock the page queues for this.
878	 */
879	if (free < uvmexp.freetarg) {
880		uvmexp.pdswout++;
881		uvm_unlock_pageq();
882		uvm_swapout_threads();
883		uvm_lock_pageq();
884	}
885#endif
886
887	/*
888	 * now we want to work on meeting our targets.   first we work on our
889	 * free target by converting inactive pages into free pages.  then
890	 * we work on meeting our inactive target by converting active pages
891	 * to inactive ones.
892	 */
893
894	pages_freed = uvmexp.pdfreed;
895	(void) uvmpd_scan_inactive(pma, constraint, &uvm.page_inactive);
896	pages_freed = uvmexp.pdfreed - pages_freed;
897
898	/*
899	 * we have done the scan to get free pages.   now we work on meeting
900	 * our inactive target.
901	 */
902	inactive_shortage = uvmexp.inactarg - uvmexp.inactive - BUFPAGES_INACT;
903
904	/*
905	 * detect if we're not going to be able to page anything out
906	 * until we free some swap resources from active pages.
907	 */
908	free = uvmexp.free - BUFPAGES_DEFICIT;
909	swap_shortage = 0;
910	if (free < uvmexp.freetarg && uvm_swapisfilled() && !uvm_swapisfull() &&
911	    pages_freed == 0) {
912		swap_shortage = uvmexp.freetarg - free;
913	}
914
915	for (p = TAILQ_FIRST(&uvm.page_active);
916	     p != NULL && (inactive_shortage > 0 || swap_shortage > 0);
917	     p = nextpg) {
918		nextpg = TAILQ_NEXT(p, pageq);
919		if (p->pg_flags & PG_BUSY) {
920			continue;
921		}
922
923		/*
924		 * skip this page if it doesn't match the constraint.
925		 */
926		paddr = atop(VM_PAGE_TO_PHYS(p));
927		if (paddr < constraint->ucr_low &&
928		    paddr >= constraint->ucr_high)
929			continue;
930
931		/*
932		 * lock the page's owner.
933		 */
934		slock = uvmpd_trylockowner(p);
935		if (slock == NULL) {
936			continue;
937		}
938
939		/*
940		 * skip this page if it's busy.
941		 */
942		if ((p->pg_flags & PG_BUSY) != 0) {
943			rw_exit(slock);
944			continue;
945		}
946
947		/*
948		 * if there's a shortage of swap, free any swap allocated
949		 * to this page so that other pages can be paged out.
950		 */
951		if (swap_shortage > 0) {
952			if (uvmpd_dropswap(p)) {
953				atomic_clearbits_int(&p->pg_flags, PG_CLEAN);
954				swap_shortage--;
955			}
956		}
957
958		/*
959		 * deactivate this page if there's a shortage of
960		 * inactive pages.
961		 */
962		if (inactive_shortage > 0) {
963			pmap_page_protect(p, PROT_NONE);
964			/* no need to check wire_count as pg is "active" */
965			uvm_pagedeactivate(p);
966			uvmexp.pddeact++;
967			inactive_shortage--;
968		}
969
970		/*
971		 * we're done with this page.
972		 */
973		rw_exit(slock);
974	}
975}
976
977#ifdef HIBERNATE
978
979/*
980 * uvmpd_drop: drop clean pages from list
981 */
982void
983uvmpd_drop(struct pglist *pglst)
984{
985	struct vm_page *p, *nextpg;
986
987	for (p = TAILQ_FIRST(pglst); p != NULL; p = nextpg) {
988		nextpg = TAILQ_NEXT(p, pageq);
989
990		if (p->pg_flags & PQ_ANON || p->uobject == NULL)
991			continue;
992
993		if (p->pg_flags & PG_BUSY)
994			continue;
995
996		if (p->pg_flags & PG_CLEAN) {
997			struct uvm_object * uobj = p->uobject;
998
999			rw_enter(uobj->vmobjlock, RW_WRITE);
1000			uvm_lock_pageq();
1001			/*
1002			 * we now have the page queues locked.
1003			 * the page is not busy.   if the page is clean we
1004			 * can free it now and continue.
1005			 */
1006			if (p->pg_flags & PG_CLEAN) {
1007				if (p->pg_flags & PQ_SWAPBACKED) {
1008					/* this page now lives only in swap */
1009					atomic_inc_int(&uvmexp.swpgonly);
1010				}
1011
1012				/* zap all mappings with pmap_page_protect... */
1013				pmap_page_protect(p, PROT_NONE);
1014				uvm_pagefree(p);
1015			}
1016			uvm_unlock_pageq();
1017			rw_exit(uobj->vmobjlock);
1018		}
1019	}
1020}
1021
1022void
1023uvmpd_hibernate(void)
1024{
1025	uvmpd_drop(&uvm.page_inactive);
1026	uvmpd_drop(&uvm.page_active);
1027}
1028
1029#endif
1030