vm_pageout.c revision 58714
1/*
2 * Copyright (c) 1991 Regents of the University of California.
3 * All rights reserved.
4 * Copyright (c) 1994 John S. Dyson
5 * All rights reserved.
6 * Copyright (c) 1994 David Greenman
7 * All rights reserved.
8 *
9 * This code is derived from software contributed to Berkeley by
10 * The Mach Operating System project at Carnegie-Mellon University.
11 *
12 * Redistribution and use in source and binary forms, with or without
13 * modification, are permitted provided that the following conditions
14 * are met:
15 * 1. Redistributions of source code must retain the above copyright
16 *    notice, this list of conditions and the following disclaimer.
17 * 2. Redistributions in binary form must reproduce the above copyright
18 *    notice, this list of conditions and the following disclaimer in the
19 *    documentation and/or other materials provided with the distribution.
20 * 3. All advertising materials mentioning features or use of this software
21 *    must display the following acknowledgement:
22 *	This product includes software developed by the University of
23 *	California, Berkeley and its contributors.
24 * 4. Neither the name of the University nor the names of its contributors
25 *    may be used to endorse or promote products derived from this software
26 *    without specific prior written permission.
27 *
28 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
29 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
30 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
31 * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
32 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
33 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
34 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
35 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
36 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
37 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
38 * SUCH DAMAGE.
39 *
40 *	from: @(#)vm_pageout.c	7.4 (Berkeley) 5/7/91
41 *
42 *
43 * Copyright (c) 1987, 1990 Carnegie-Mellon University.
44 * All rights reserved.
45 *
46 * Authors: Avadis Tevanian, Jr., Michael Wayne Young
47 *
48 * Permission to use, copy, modify and distribute this software and
49 * its documentation is hereby granted, provided that both the copyright
50 * notice and this permission notice appear in all copies of the
51 * software, derivative works or modified versions, and any portions
52 * thereof, and that both notices appear in supporting documentation.
53 *
54 * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
55 * CONDITION.  CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND
56 * FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
57 *
58 * Carnegie Mellon requests users of this software to return to
59 *
60 *  Software Distribution Coordinator  or  Software.Distribution@CS.CMU.EDU
61 *  School of Computer Science
62 *  Carnegie Mellon University
63 *  Pittsburgh PA 15213-3890
64 *
65 * any improvements or extensions that they make and grant Carnegie the
66 * rights to redistribute these changes.
67 *
68 * $FreeBSD: head/sys/vm/vm_pageout.c 58708 2000-03-27 21:33:32Z dillon $
69 */
70
71/*
72 *	The proverbial page-out daemon.
73 */
74
75#include "opt_vm.h"
76#include <sys/param.h>
77#include <sys/systm.h>
78#include <sys/kernel.h>
79#include <sys/proc.h>
80#include <sys/kthread.h>
81#include <sys/resourcevar.h>
82#include <sys/signalvar.h>
83#include <sys/vnode.h>
84#include <sys/vmmeter.h>
85#include <sys/sysctl.h>
86
87#include <vm/vm.h>
88#include <vm/vm_param.h>
89#include <sys/lock.h>
90#include <vm/vm_object.h>
91#include <vm/vm_page.h>
92#include <vm/vm_map.h>
93#include <vm/vm_pageout.h>
94#include <vm/vm_pager.h>
95#include <vm/swap_pager.h>
96#include <vm/vm_extern.h>
97
98/*
99 * System initialization
100 */
101
102/* the kernel process "vm_pageout"*/
103static void vm_pageout __P((void));
104static int vm_pageout_clean __P((vm_page_t));
105static int vm_pageout_scan __P((void));
106static int vm_pageout_free_page_calc __P((vm_size_t count));
107struct proc *pageproc;
108
109static struct kproc_desc page_kp = {
110	"pagedaemon",
111	vm_pageout,
112	&pageproc
113};
114SYSINIT(pagedaemon, SI_SUB_KTHREAD_PAGE, SI_ORDER_FIRST, kproc_start, &page_kp)
115
116#if !defined(NO_SWAPPING)
117/* the kernel process "vm_daemon"*/
118static void vm_daemon __P((void));
119static struct	proc *vmproc;
120
121static struct kproc_desc vm_kp = {
122	"vmdaemon",
123	vm_daemon,
124	&vmproc
125};
126SYSINIT(vmdaemon, SI_SUB_KTHREAD_VM, SI_ORDER_FIRST, kproc_start, &vm_kp)
127#endif
128
129
130int vm_pages_needed=0;		/* Event on which pageout daemon sleeps */
131int vm_pageout_deficit=0;	/* Estimated number of pages deficit */
132int vm_pageout_pages_needed=0;	/* flag saying that the pageout daemon needs pages */
133
134#if !defined(NO_SWAPPING)
135static int vm_pageout_req_swapout;	/* XXX */
136static int vm_daemon_needed;
137#endif
138extern int vm_swap_size;
139static int vm_pageout_stats_max=0, vm_pageout_stats_interval = 0;
140static int vm_pageout_full_stats_interval = 0;
141static int vm_pageout_stats_free_max=0, vm_pageout_algorithm_lru=0;
142static int defer_swap_pageouts=0;
143static int disable_swap_pageouts=0;
144
145static int max_page_launder=100;
146#if defined(NO_SWAPPING)
147static int vm_swap_enabled=0;
148static int vm_swap_idle_enabled=0;
149#else
150static int vm_swap_enabled=1;
151static int vm_swap_idle_enabled=0;
152#endif
153
154SYSCTL_INT(_vm, VM_PAGEOUT_ALGORITHM, pageout_algorithm,
155	CTLFLAG_RW, &vm_pageout_algorithm_lru, 0, "LRU page mgmt");
156
157SYSCTL_INT(_vm, OID_AUTO, pageout_stats_max,
158	CTLFLAG_RW, &vm_pageout_stats_max, 0, "Max pageout stats scan length");
159
160SYSCTL_INT(_vm, OID_AUTO, pageout_full_stats_interval,
161	CTLFLAG_RW, &vm_pageout_full_stats_interval, 0, "Interval for full stats scan");
162
163SYSCTL_INT(_vm, OID_AUTO, pageout_stats_interval,
164	CTLFLAG_RW, &vm_pageout_stats_interval, 0, "Interval for partial stats scan");
165
166SYSCTL_INT(_vm, OID_AUTO, pageout_stats_free_max,
167	CTLFLAG_RW, &vm_pageout_stats_free_max, 0, "Not implemented");
168
169#if defined(NO_SWAPPING)
170SYSCTL_INT(_vm, VM_SWAPPING_ENABLED, swap_enabled,
171	CTLFLAG_RD, &vm_swap_enabled, 0, "");
172SYSCTL_INT(_vm, OID_AUTO, swap_idle_enabled,
173	CTLFLAG_RD, &vm_swap_idle_enabled, 0, "");
174#else
175SYSCTL_INT(_vm, VM_SWAPPING_ENABLED, swap_enabled,
176	CTLFLAG_RW, &vm_swap_enabled, 0, "Enable entire process swapout");
177SYSCTL_INT(_vm, OID_AUTO, swap_idle_enabled,
178	CTLFLAG_RW, &vm_swap_idle_enabled, 0, "Allow swapout on idle criteria");
179#endif
180
181SYSCTL_INT(_vm, OID_AUTO, defer_swapspace_pageouts,
182	CTLFLAG_RW, &defer_swap_pageouts, 0, "Give preference to dirty pages in mem");
183
184SYSCTL_INT(_vm, OID_AUTO, disable_swapspace_pageouts,
185	CTLFLAG_RW, &disable_swap_pageouts, 0, "Disallow swapout of dirty pages");
186
187SYSCTL_INT(_vm, OID_AUTO, max_page_launder,
188	CTLFLAG_RW, &max_page_launder, 0, "Maximum number of pages to clean per pass");
189
190
191#define VM_PAGEOUT_PAGE_COUNT 16
192int vm_pageout_page_count = VM_PAGEOUT_PAGE_COUNT;
193
194int vm_page_max_wired;		/* XXX max # of wired pages system-wide */
195
196#if !defined(NO_SWAPPING)
197typedef void freeer_fcn_t __P((vm_map_t, vm_object_t, vm_pindex_t, int));
198static void vm_pageout_map_deactivate_pages __P((vm_map_t, vm_pindex_t));
199static freeer_fcn_t vm_pageout_object_deactivate_pages;
200static void vm_req_vmdaemon __P((void));
201#endif
202static void vm_pageout_page_stats(void);
203
204/*
205 * vm_pageout_clean:
206 *
207 * Clean the page and remove it from the laundry.
208 *
209 * We set the busy bit to cause potential page faults on this page to
210 * block.  Note the careful timing, however, the busy bit isn't set till
211 * late and we cannot do anything that will mess with the page.
212 */
213
214static int
215vm_pageout_clean(m)
216	vm_page_t m;
217{
218	register vm_object_t object;
219	vm_page_t mc[2*vm_pageout_page_count];
220	int pageout_count;
221	int ib, is, page_base;
222	vm_pindex_t pindex = m->pindex;
223
224	object = m->object;
225
226	/*
227	 * It doesn't cost us anything to pageout OBJT_DEFAULT or OBJT_SWAP
228	 * with the new swapper, but we could have serious problems paging
229	 * out other object types if there is insufficient memory.
230	 *
231	 * Unfortunately, checking free memory here is far too late, so the
232	 * check has been moved up a procedural level.
233	 */
234
235	/*
236	 * Don't mess with the page if it's busy.
237	 */
238	if ((m->hold_count != 0) ||
239	    ((m->busy != 0) || (m->flags & PG_BUSY)))
240		return 0;
241
242	mc[vm_pageout_page_count] = m;
243	pageout_count = 1;
244	page_base = vm_pageout_page_count;
245	ib = 1;
246	is = 1;
247
248	/*
249	 * Scan object for clusterable pages.
250	 *
251	 * We can cluster ONLY if: ->> the page is NOT
252	 * clean, wired, busy, held, or mapped into a
253	 * buffer, and one of the following:
254	 * 1) The page is inactive, or a seldom used
255	 *    active page.
256	 * -or-
257	 * 2) we force the issue.
258	 *
259	 * During heavy mmap/modification loads the pageout
260	 * daemon can really fragment the underlying file
261	 * due to flushing pages out of order and not trying
262	 * align the clusters (which leave sporatic out-of-order
263	 * holes).  To solve this problem we do the reverse scan
264	 * first and attempt to align our cluster, then do a
265	 * forward scan if room remains.
266	 */
267
268more:
269	while (ib && pageout_count < vm_pageout_page_count) {
270		vm_page_t p;
271
272		if (ib > pindex) {
273			ib = 0;
274			break;
275		}
276
277		if ((p = vm_page_lookup(object, pindex - ib)) == NULL) {
278			ib = 0;
279			break;
280		}
281		if (((p->queue - p->pc) == PQ_CACHE) ||
282		    (p->flags & PG_BUSY) || p->busy) {
283			ib = 0;
284			break;
285		}
286		vm_page_test_dirty(p);
287		if ((p->dirty & p->valid) == 0 ||
288		    p->queue != PQ_INACTIVE ||
289		    p->wire_count != 0 ||
290		    p->hold_count != 0) {
291			ib = 0;
292			break;
293		}
294		mc[--page_base] = p;
295		++pageout_count;
296		++ib;
297		/*
298		 * alignment boundry, stop here and switch directions.  Do
299		 * not clear ib.
300		 */
301		if ((pindex - (ib - 1)) % vm_pageout_page_count == 0)
302			break;
303	}
304
305	while (pageout_count < vm_pageout_page_count &&
306	    pindex + is < object->size) {
307		vm_page_t p;
308
309		if ((p = vm_page_lookup(object, pindex + is)) == NULL)
310			break;
311		if (((p->queue - p->pc) == PQ_CACHE) ||
312		    (p->flags & PG_BUSY) || p->busy) {
313			break;
314		}
315		vm_page_test_dirty(p);
316		if ((p->dirty & p->valid) == 0 ||
317		    p->queue != PQ_INACTIVE ||
318		    p->wire_count != 0 ||
319		    p->hold_count != 0) {
320			break;
321		}
322		mc[page_base + pageout_count] = p;
323		++pageout_count;
324		++is;
325	}
326
327	/*
328	 * If we exhausted our forward scan, continue with the reverse scan
329	 * when possible, even past a page boundry.  This catches boundry
330	 * conditions.
331	 */
332	if (ib && pageout_count < vm_pageout_page_count)
333		goto more;
334
335	/*
336	 * we allow reads during pageouts...
337	 */
338	return vm_pageout_flush(&mc[page_base], pageout_count, 0);
339}
340
341/*
342 * vm_pageout_flush() - launder the given pages
343 *
344 *	The given pages are laundered.  Note that we setup for the start of
345 *	I/O ( i.e. busy the page ), mark it read-only, and bump the object
346 *	reference count all in here rather then in the parent.  If we want
347 *	the parent to do more sophisticated things we may have to change
348 *	the ordering.
349 */
350
351int
352vm_pageout_flush(mc, count, flags)
353	vm_page_t *mc;
354	int count;
355	int flags;
356{
357	register vm_object_t object;
358	int pageout_status[count];
359	int numpagedout = 0;
360	int i;
361
362	/*
363	 * Initiate I/O.  Bump the vm_page_t->busy counter and
364	 * mark the pages read-only.
365	 *
366	 * We do not have to fixup the clean/dirty bits here... we can
367	 * allow the pager to do it after the I/O completes.
368	 */
369
370	for (i = 0; i < count; i++) {
371		vm_page_io_start(mc[i]);
372		vm_page_protect(mc[i], VM_PROT_READ);
373	}
374
375	object = mc[0]->object;
376	vm_object_pip_add(object, count);
377
378	vm_pager_put_pages(object, mc, count,
379	    (flags | ((object == kernel_object) ? OBJPC_SYNC : 0)),
380	    pageout_status);
381
382	for (i = 0; i < count; i++) {
383		vm_page_t mt = mc[i];
384
385		switch (pageout_status[i]) {
386		case VM_PAGER_OK:
387			numpagedout++;
388			break;
389		case VM_PAGER_PEND:
390			numpagedout++;
391			break;
392		case VM_PAGER_BAD:
393			/*
394			 * Page outside of range of object. Right now we
395			 * essentially lose the changes by pretending it
396			 * worked.
397			 */
398			pmap_clear_modify(VM_PAGE_TO_PHYS(mt));
399			vm_page_undirty(mt);
400			break;
401		case VM_PAGER_ERROR:
402		case VM_PAGER_FAIL:
403			/*
404			 * If page couldn't be paged out, then reactivate the
405			 * page so it doesn't clog the inactive list.  (We
406			 * will try paging out it again later).
407			 */
408			vm_page_activate(mt);
409			break;
410		case VM_PAGER_AGAIN:
411			break;
412		}
413
414		/*
415		 * If the operation is still going, leave the page busy to
416		 * block all other accesses. Also, leave the paging in
417		 * progress indicator set so that we don't attempt an object
418		 * collapse.
419		 */
420		if (pageout_status[i] != VM_PAGER_PEND) {
421			vm_object_pip_wakeup(object);
422			vm_page_io_finish(mt);
423		}
424	}
425	return numpagedout;
426}
427
428#if !defined(NO_SWAPPING)
429/*
430 *	vm_pageout_object_deactivate_pages
431 *
432 *	deactivate enough pages to satisfy the inactive target
433 *	requirements or if vm_page_proc_limit is set, then
434 *	deactivate all of the pages in the object and its
435 *	backing_objects.
436 *
437 *	The object and map must be locked.
438 */
439static void
440vm_pageout_object_deactivate_pages(map, object, desired, map_remove_only)
441	vm_map_t map;
442	vm_object_t object;
443	vm_pindex_t desired;
444	int map_remove_only;
445{
446	register vm_page_t p, next;
447	int rcount;
448	int remove_mode;
449	int s;
450
451	if (object->type == OBJT_DEVICE)
452		return;
453
454	while (object) {
455		if (pmap_resident_count(vm_map_pmap(map)) <= desired)
456			return;
457		if (object->paging_in_progress)
458			return;
459
460		remove_mode = map_remove_only;
461		if (object->shadow_count > 1)
462			remove_mode = 1;
463	/*
464	 * scan the objects entire memory queue
465	 */
466		rcount = object->resident_page_count;
467		p = TAILQ_FIRST(&object->memq);
468		while (p && (rcount-- > 0)) {
469			int actcount;
470			if (pmap_resident_count(vm_map_pmap(map)) <= desired)
471				return;
472			next = TAILQ_NEXT(p, listq);
473			cnt.v_pdpages++;
474			if (p->wire_count != 0 ||
475			    p->hold_count != 0 ||
476			    p->busy != 0 ||
477			    (p->flags & PG_BUSY) ||
478			    !pmap_page_exists(vm_map_pmap(map), VM_PAGE_TO_PHYS(p))) {
479				p = next;
480				continue;
481			}
482
483			actcount = pmap_ts_referenced(VM_PAGE_TO_PHYS(p));
484			if (actcount) {
485				vm_page_flag_set(p, PG_REFERENCED);
486			} else if (p->flags & PG_REFERENCED) {
487				actcount = 1;
488			}
489
490			if ((p->queue != PQ_ACTIVE) &&
491				(p->flags & PG_REFERENCED)) {
492				vm_page_activate(p);
493				p->act_count += actcount;
494				vm_page_flag_clear(p, PG_REFERENCED);
495			} else if (p->queue == PQ_ACTIVE) {
496				if ((p->flags & PG_REFERENCED) == 0) {
497					p->act_count -= min(p->act_count, ACT_DECLINE);
498					if (!remove_mode && (vm_pageout_algorithm_lru || (p->act_count == 0))) {
499						vm_page_protect(p, VM_PROT_NONE);
500						vm_page_deactivate(p);
501					} else {
502						s = splvm();
503						TAILQ_REMOVE(&vm_page_queues[PQ_ACTIVE].pl, p, pageq);
504						TAILQ_INSERT_TAIL(&vm_page_queues[PQ_ACTIVE].pl, p, pageq);
505						splx(s);
506					}
507				} else {
508					vm_page_activate(p);
509					vm_page_flag_clear(p, PG_REFERENCED);
510					if (p->act_count < (ACT_MAX - ACT_ADVANCE))
511						p->act_count += ACT_ADVANCE;
512					s = splvm();
513					TAILQ_REMOVE(&vm_page_queues[PQ_ACTIVE].pl, p, pageq);
514					TAILQ_INSERT_TAIL(&vm_page_queues[PQ_ACTIVE].pl, p, pageq);
515					splx(s);
516				}
517			} else if (p->queue == PQ_INACTIVE) {
518				vm_page_protect(p, VM_PROT_NONE);
519			}
520			p = next;
521		}
522		object = object->backing_object;
523	}
524	return;
525}
526
527/*
528 * deactivate some number of pages in a map, try to do it fairly, but
529 * that is really hard to do.
530 */
531static void
532vm_pageout_map_deactivate_pages(map, desired)
533	vm_map_t map;
534	vm_pindex_t desired;
535{
536	vm_map_entry_t tmpe;
537	vm_object_t obj, bigobj;
538
539	if (lockmgr(&map->lock, LK_EXCLUSIVE | LK_NOWAIT, (void *)0, curproc)) {
540		return;
541	}
542
543	bigobj = NULL;
544
545	/*
546	 * first, search out the biggest object, and try to free pages from
547	 * that.
548	 */
549	tmpe = map->header.next;
550	while (tmpe != &map->header) {
551		if ((tmpe->eflags & MAP_ENTRY_IS_SUB_MAP) == 0) {
552			obj = tmpe->object.vm_object;
553			if ((obj != NULL) && (obj->shadow_count <= 1) &&
554				((bigobj == NULL) ||
555				 (bigobj->resident_page_count < obj->resident_page_count))) {
556				bigobj = obj;
557			}
558		}
559		tmpe = tmpe->next;
560	}
561
562	if (bigobj)
563		vm_pageout_object_deactivate_pages(map, bigobj, desired, 0);
564
565	/*
566	 * Next, hunt around for other pages to deactivate.  We actually
567	 * do this search sort of wrong -- .text first is not the best idea.
568	 */
569	tmpe = map->header.next;
570	while (tmpe != &map->header) {
571		if (pmap_resident_count(vm_map_pmap(map)) <= desired)
572			break;
573		if ((tmpe->eflags & MAP_ENTRY_IS_SUB_MAP) == 0) {
574			obj = tmpe->object.vm_object;
575			if (obj)
576				vm_pageout_object_deactivate_pages(map, obj, desired, 0);
577		}
578		tmpe = tmpe->next;
579	};
580
581	/*
582	 * Remove all mappings if a process is swapped out, this will free page
583	 * table pages.
584	 */
585	if (desired == 0)
586		pmap_remove(vm_map_pmap(map),
587			VM_MIN_ADDRESS, VM_MAXUSER_ADDRESS);
588	vm_map_unlock(map);
589	return;
590}
591#endif
592
593/*
594 * Don't try to be fancy - being fancy can lead to VOP_LOCK's and therefore
595 * to vnode deadlocks.  We only do it for OBJT_DEFAULT and OBJT_SWAP objects
596 * which we know can be trivially freed.
597 */
598
599void
600vm_pageout_page_free(vm_page_t m) {
601	vm_object_t object = m->object;
602	int type = object->type;
603
604	if (type == OBJT_SWAP || type == OBJT_DEFAULT)
605		vm_object_reference(object);
606	vm_page_busy(m);
607	vm_page_protect(m, VM_PROT_NONE);
608	vm_page_free(m);
609	if (type == OBJT_SWAP || type == OBJT_DEFAULT)
610		vm_object_deallocate(object);
611}
612
613/*
614 *	vm_pageout_scan does the dirty work for the pageout daemon.
615 */
616static int
617vm_pageout_scan()
618{
619	vm_page_t m, next;
620	int page_shortage, maxscan, pcount;
621	int addl_page_shortage, addl_page_shortage_init;
622	int maxlaunder;
623	int launder_loop = 0;
624	struct proc *p, *bigproc;
625	vm_offset_t size, bigsize;
626	vm_object_t object;
627	int force_wakeup = 0;
628	int actcount;
629	int vnodes_skipped = 0;
630	int s;
631
632	/*
633	 * Do whatever cleanup that the pmap code can.
634	 */
635	pmap_collect();
636
637	addl_page_shortage_init = vm_pageout_deficit;
638	vm_pageout_deficit = 0;
639
640	if (max_page_launder == 0)
641		max_page_launder = 1;
642
643	/*
644	 * Calculate the number of pages we want to either free or move
645	 * to the cache.
646	 */
647
648	page_shortage = vm_paging_target() + addl_page_shortage_init;
649
650	/*
651	 * Figure out what to do with dirty pages when they are encountered.
652	 * Assume that 1/3 of the pages on the inactive list are clean.  If
653	 * we think we can reach our target, disable laundering (do not
654	 * clean any dirty pages).  If we miss the target we will loop back
655	 * up and do a laundering run.
656	 */
657
658	if (cnt.v_inactive_count / 3 > page_shortage) {
659		maxlaunder = 0;
660		launder_loop = 0;
661	} else {
662		maxlaunder =
663		    (cnt.v_inactive_target > max_page_launder) ?
664		    max_page_launder : cnt.v_inactive_target;
665		launder_loop = 1;
666	}
667
668	/*
669	 * Start scanning the inactive queue for pages we can move to the
670	 * cache or free.  The scan will stop when the target is reached or
671	 * we have scanned the entire inactive queue.
672	 */
673
674rescan0:
675	addl_page_shortage = addl_page_shortage_init;
676	maxscan = cnt.v_inactive_count;
677	for (m = TAILQ_FIRST(&vm_page_queues[PQ_INACTIVE].pl);
678	     m != NULL && maxscan-- > 0 && page_shortage > 0;
679	     m = next) {
680
681		cnt.v_pdpages++;
682
683		if (m->queue != PQ_INACTIVE) {
684			goto rescan0;
685		}
686
687		next = TAILQ_NEXT(m, pageq);
688
689		if (m->hold_count) {
690			s = splvm();
691			TAILQ_REMOVE(&vm_page_queues[PQ_INACTIVE].pl, m, pageq);
692			TAILQ_INSERT_TAIL(&vm_page_queues[PQ_INACTIVE].pl, m, pageq);
693			splx(s);
694			addl_page_shortage++;
695			continue;
696		}
697		/*
698		 * Dont mess with busy pages, keep in the front of the
699		 * queue, most likely are being paged out.
700		 */
701		if (m->busy || (m->flags & PG_BUSY)) {
702			addl_page_shortage++;
703			continue;
704		}
705
706		/*
707		 * If the object is not being used, we ignore previous
708		 * references.
709		 */
710		if (m->object->ref_count == 0) {
711			vm_page_flag_clear(m, PG_REFERENCED);
712			pmap_clear_reference(VM_PAGE_TO_PHYS(m));
713
714		/*
715		 * Otherwise, if the page has been referenced while in the
716		 * inactive queue, we bump the "activation count" upwards,
717		 * making it less likely that the page will be added back to
718		 * the inactive queue prematurely again.  Here we check the
719		 * page tables (or emulated bits, if any), given the upper
720		 * level VM system not knowing anything about existing
721		 * references.
722		 */
723		} else if (((m->flags & PG_REFERENCED) == 0) &&
724			(actcount = pmap_ts_referenced(VM_PAGE_TO_PHYS(m)))) {
725			vm_page_activate(m);
726			m->act_count += (actcount + ACT_ADVANCE);
727			continue;
728		}
729
730		/*
731		 * If the upper level VM system knows about any page
732		 * references, we activate the page.  We also set the
733		 * "activation count" higher than normal so that we will less
734		 * likely place pages back onto the inactive queue again.
735		 */
736		if ((m->flags & PG_REFERENCED) != 0) {
737			vm_page_flag_clear(m, PG_REFERENCED);
738			actcount = pmap_ts_referenced(VM_PAGE_TO_PHYS(m));
739			vm_page_activate(m);
740			m->act_count += (actcount + ACT_ADVANCE + 1);
741			continue;
742		}
743
744		/*
745		 * If the upper level VM system doesn't know anything about
746		 * the page being dirty, we have to check for it again.  As
747		 * far as the VM code knows, any partially dirty pages are
748		 * fully dirty.
749		 */
750		if (m->dirty == 0) {
751			vm_page_test_dirty(m);
752		} else {
753			vm_page_dirty(m);
754		}
755
756		/*
757		 * Invalid pages can be easily freed
758		 */
759		if (m->valid == 0) {
760			vm_pageout_page_free(m);
761			cnt.v_dfree++;
762			--page_shortage;
763
764		/*
765		 * Clean pages can be placed onto the cache queue.
766		 */
767		} else if (m->dirty == 0) {
768			vm_page_cache(m);
769			--page_shortage;
770
771		/*
772		 * Dirty pages need to be paged out.  Note that we clean
773		 * only a limited number of pages per pagedaemon pass.
774		 */
775		} else if (maxlaunder > 0) {
776			int written;
777			int swap_pageouts_ok;
778			struct vnode *vp = NULL;
779
780			object = m->object;
781
782			if ((object->type != OBJT_SWAP) && (object->type != OBJT_DEFAULT)) {
783				swap_pageouts_ok = 1;
784			} else {
785				swap_pageouts_ok = !(defer_swap_pageouts || disable_swap_pageouts);
786				swap_pageouts_ok |= (!disable_swap_pageouts && defer_swap_pageouts &&
787				vm_page_count_min());
788
789			}
790
791			/*
792			 * We don't bother paging objects that are "dead".
793			 * Those objects are in a "rundown" state.
794			 */
795			if (!swap_pageouts_ok || (object->flags & OBJ_DEAD)) {
796				s = splvm();
797				TAILQ_REMOVE(&vm_page_queues[PQ_INACTIVE].pl, m, pageq);
798				TAILQ_INSERT_TAIL(&vm_page_queues[PQ_INACTIVE].pl, m, pageq);
799				splx(s);
800				continue;
801			}
802
803			/*
804			 * For now we protect against potential memory
805			 * deadlocks by requiring significant memory to be
806			 * free if the object is not OBJT_DEFAULT or OBJT_SWAP.
807			 * We do not 'trust' any other object type to operate
808			 * with low memory, not even OBJT_DEVICE.  The VM
809			 * allocator will special case allocations done by
810			 * the pageout daemon so the check below actually
811			 * does have some hysteresis in it.  It isn't the best
812			 * solution, though.
813			 */
814
815			if (object->type != OBJT_DEFAULT &&
816			    object->type != OBJT_SWAP &&
817			    cnt.v_free_count < cnt.v_free_reserved) {
818				s = splvm();
819				TAILQ_REMOVE(&vm_page_queues[PQ_INACTIVE].pl, m, pageq);
820				TAILQ_INSERT_TAIL(&vm_page_queues[PQ_INACTIVE].pl, m,
821				    pageq);
822				splx(s);
823				continue;
824			}
825
826			/*
827			 * Presumably we have sufficient free memory to do
828			 * the more sophisticated checks and locking required
829			 * for vnodes.
830			 *
831			 * The object is already known NOT to be dead.  The
832			 * vget() may still block, though, because
833			 * VOP_ISLOCKED() doesn't check to see if an inode
834			 * (v_data) is associated with the vnode.  If it isn't,
835			 * vget() will load in it from disk.  Worse, vget()
836			 * may actually get stuck waiting on "inode" if another
837			 * process is in the process of bringing the inode in.
838			 * This is bad news for us either way.
839			 *
840			 * So for the moment we check v_data == NULL as a
841			 * workaround.  This means that vnodes which do not
842			 * use v_data in the way we expect probably will not
843			 * wind up being paged out by the pager and it will be
844			 * up to the syncer to get them.  That's better then
845			 * us blocking here.
846			 *
847			 * This whole code section is bogus - we need to fix
848			 * the vnode pager to handle vm_page_t's without us
849			 * having to do any sophisticated VOP tests.
850			 */
851
852			if (object->type == OBJT_VNODE) {
853				vp = object->handle;
854
855				if (VOP_ISLOCKED(vp, NULL) ||
856				    vp->v_data == NULL ||
857				    vget(vp, LK_EXCLUSIVE|LK_NOOBJ, curproc)) {
858					if ((m->queue == PQ_INACTIVE) &&
859						(m->hold_count == 0) &&
860						(m->busy == 0) &&
861						(m->flags & PG_BUSY) == 0) {
862						s = splvm();
863						TAILQ_REMOVE(&vm_page_queues[PQ_INACTIVE].pl, m, pageq);
864						TAILQ_INSERT_TAIL(&vm_page_queues[PQ_INACTIVE].pl, m, pageq);
865						splx(s);
866					}
867					if (object->flags & OBJ_MIGHTBEDIRTY)
868						vnodes_skipped++;
869					continue;
870				}
871
872				/*
873				 * The page might have been moved to another queue
874				 * during potential blocking in vget() above.
875				 */
876				if (m->queue != PQ_INACTIVE) {
877					if (object->flags & OBJ_MIGHTBEDIRTY)
878						vnodes_skipped++;
879					vput(vp);
880					continue;
881				}
882
883				/*
884				 * The page may have been busied during the blocking in
885				 * vput();  We don't move the page back onto the end of
886				 * the queue so that statistics are more correct if we don't.
887				 */
888				if (m->busy || (m->flags & PG_BUSY)) {
889					vput(vp);
890					continue;
891				}
892
893				/*
894				 * If the page has become held, then skip it
895				 */
896				if (m->hold_count) {
897					s = splvm();
898					TAILQ_REMOVE(&vm_page_queues[PQ_INACTIVE].pl, m, pageq);
899					TAILQ_INSERT_TAIL(&vm_page_queues[PQ_INACTIVE].pl, m, pageq);
900					splx(s);
901					if (object->flags & OBJ_MIGHTBEDIRTY)
902						vnodes_skipped++;
903					vput(vp);
904					continue;
905				}
906			}
907
908			/*
909			 * If a page is dirty, then it is either being washed
910			 * (but not yet cleaned) or it is still in the
911			 * laundry.  If it is still in the laundry, then we
912			 * start the cleaning operation.
913			 */
914			written = vm_pageout_clean(m);
915			if (vp)
916				vput(vp);
917
918			maxlaunder -= written;
919		}
920	}
921
922	/*
923	 * If we still have a page shortage and we didn't launder anything,
924	 * run the inactive scan again and launder something this time.
925	 */
926
927	if (launder_loop == 0 && page_shortage > 0) {
928		launder_loop = 1;
929		maxlaunder =
930		    (cnt.v_inactive_target > max_page_launder) ?
931		    max_page_launder : cnt.v_inactive_target;
932		goto rescan0;
933	}
934
935	/*
936	 * Compute the page shortage from the point of view of having to
937	 * move pages from the active queue to the inactive queue.
938	 */
939
940	page_shortage = (cnt.v_inactive_target + cnt.v_cache_min) -
941	    (cnt.v_free_count + cnt.v_inactive_count + cnt.v_cache_count);
942	page_shortage += addl_page_shortage;
943
944	/*
945	 * Scan the active queue for things we can deactivate
946	 */
947
948	pcount = cnt.v_active_count;
949	m = TAILQ_FIRST(&vm_page_queues[PQ_ACTIVE].pl);
950
951	while ((m != NULL) && (pcount-- > 0) && (page_shortage > 0)) {
952
953		/*
954		 * This is a consistency check, and should likely be a panic
955		 * or warning.
956		 */
957		if (m->queue != PQ_ACTIVE) {
958			break;
959		}
960
961		next = TAILQ_NEXT(m, pageq);
962		/*
963		 * Don't deactivate pages that are busy.
964		 */
965		if ((m->busy != 0) ||
966		    (m->flags & PG_BUSY) ||
967		    (m->hold_count != 0)) {
968			s = splvm();
969			TAILQ_REMOVE(&vm_page_queues[PQ_ACTIVE].pl, m, pageq);
970			TAILQ_INSERT_TAIL(&vm_page_queues[PQ_ACTIVE].pl, m, pageq);
971			splx(s);
972			m = next;
973			continue;
974		}
975
976		/*
977		 * The count for pagedaemon pages is done after checking the
978		 * page for eligibility...
979		 */
980		cnt.v_pdpages++;
981
982		/*
983		 * Check to see "how much" the page has been used.
984		 */
985		actcount = 0;
986		if (m->object->ref_count != 0) {
987			if (m->flags & PG_REFERENCED) {
988				actcount += 1;
989			}
990			actcount += pmap_ts_referenced(VM_PAGE_TO_PHYS(m));
991			if (actcount) {
992				m->act_count += ACT_ADVANCE + actcount;
993				if (m->act_count > ACT_MAX)
994					m->act_count = ACT_MAX;
995			}
996		}
997
998		/*
999		 * Since we have "tested" this bit, we need to clear it now.
1000		 */
1001		vm_page_flag_clear(m, PG_REFERENCED);
1002
1003		/*
1004		 * Only if an object is currently being used, do we use the
1005		 * page activation count stats.
1006		 */
1007		if (actcount && (m->object->ref_count != 0)) {
1008			s = splvm();
1009			TAILQ_REMOVE(&vm_page_queues[PQ_ACTIVE].pl, m, pageq);
1010			TAILQ_INSERT_TAIL(&vm_page_queues[PQ_ACTIVE].pl, m, pageq);
1011			splx(s);
1012		} else {
1013			m->act_count -= min(m->act_count, ACT_DECLINE);
1014			if (vm_pageout_algorithm_lru ||
1015				(m->object->ref_count == 0) || (m->act_count == 0)) {
1016				page_shortage--;
1017				if (m->object->ref_count == 0) {
1018					vm_page_protect(m, VM_PROT_NONE);
1019					if (m->dirty == 0)
1020						vm_page_cache(m);
1021					else
1022						vm_page_deactivate(m);
1023				} else {
1024					vm_page_deactivate(m);
1025				}
1026			} else {
1027				s = splvm();
1028				TAILQ_REMOVE(&vm_page_queues[PQ_ACTIVE].pl, m, pageq);
1029				TAILQ_INSERT_TAIL(&vm_page_queues[PQ_ACTIVE].pl, m, pageq);
1030				splx(s);
1031			}
1032		}
1033		m = next;
1034	}
1035
1036	s = splvm();
1037
1038	/*
1039	 * We try to maintain some *really* free pages, this allows interrupt
1040	 * code to be guaranteed space.  Since both cache and free queues
1041	 * are considered basically 'free', moving pages from cache to free
1042	 * does not effect other calculations.
1043	 */
1044
1045	while (cnt.v_free_count < cnt.v_free_reserved) {
1046		static int cache_rover = 0;
1047		m = vm_page_list_find(PQ_CACHE, cache_rover, FALSE);
1048		if (!m)
1049			break;
1050		if ((m->flags & PG_BUSY) || m->busy || m->hold_count || m->wire_count) {
1051#ifdef INVARIANTS
1052			printf("Warning: busy page %p found in cache\n", m);
1053#endif
1054			vm_page_deactivate(m);
1055			continue;
1056		}
1057		cache_rover = (cache_rover + PQ_PRIME2) & PQ_L2_MASK;
1058		vm_pageout_page_free(m);
1059		cnt.v_dfree++;
1060	}
1061	splx(s);
1062
1063#if !defined(NO_SWAPPING)
1064	/*
1065	 * Idle process swapout -- run once per second.
1066	 */
1067	if (vm_swap_idle_enabled) {
1068		static long lsec;
1069		if (time_second != lsec) {
1070			vm_pageout_req_swapout |= VM_SWAP_IDLE;
1071			vm_req_vmdaemon();
1072			lsec = time_second;
1073		}
1074	}
1075#endif
1076
1077	/*
1078	 * If we didn't get enough free pages, and we have skipped a vnode
1079	 * in a writeable object, wakeup the sync daemon.  And kick swapout
1080	 * if we did not get enough free pages.
1081	 */
1082	if (vm_paging_target() > 0) {
1083		if (vnodes_skipped && vm_page_count_min())
1084			(void) speedup_syncer();
1085#if !defined(NO_SWAPPING)
1086		if (vm_swap_enabled && vm_page_count_target()) {
1087			vm_req_vmdaemon();
1088			vm_pageout_req_swapout |= VM_SWAP_NORMAL;
1089		}
1090#endif
1091	}
1092
1093	/*
1094	 * make sure that we have swap space -- if we are low on memory and
1095	 * swap -- then kill the biggest process.
1096	 */
1097	if ((vm_swap_size == 0 || swap_pager_full) && vm_page_count_min()) {
1098		bigproc = NULL;
1099		bigsize = 0;
1100		for (p = allproc.lh_first; p != 0; p = p->p_list.le_next) {
1101			/*
1102			 * if this is a system process, skip it
1103			 */
1104			if ((p->p_flag & P_SYSTEM) || (p->p_lock > 0) ||
1105			    (p->p_pid == 1) ||
1106			    ((p->p_pid < 48) && (vm_swap_size != 0))) {
1107				continue;
1108			}
1109			/*
1110			 * if the process is in a non-running type state,
1111			 * don't touch it.
1112			 */
1113			if (p->p_stat != SRUN && p->p_stat != SSLEEP) {
1114				continue;
1115			}
1116			/*
1117			 * get the process size
1118			 */
1119			size = vmspace_resident_count(p->p_vmspace);
1120			/*
1121			 * if the this process is bigger than the biggest one
1122			 * remember it.
1123			 */
1124			if (size > bigsize) {
1125				bigproc = p;
1126				bigsize = size;
1127			}
1128		}
1129		if (bigproc != NULL) {
1130			killproc(bigproc, "out of swap space");
1131			bigproc->p_estcpu = 0;
1132			bigproc->p_nice = PRIO_MIN;
1133			resetpriority(bigproc);
1134			wakeup(&cnt.v_free_count);
1135		}
1136	}
1137	return force_wakeup;
1138}
1139
1140/*
1141 * This routine tries to maintain the pseudo LRU active queue,
1142 * so that during long periods of time where there is no paging,
1143 * that some statistic accumulation still occurs.  This code
1144 * helps the situation where paging just starts to occur.
1145 */
1146static void
1147vm_pageout_page_stats()
1148{
1149	int s;
1150	vm_page_t m,next;
1151	int pcount,tpcount;		/* Number of pages to check */
1152	static int fullintervalcount = 0;
1153	int page_shortage;
1154	int s0;
1155
1156	page_shortage =
1157	    (cnt.v_inactive_target + cnt.v_cache_max + cnt.v_free_min) -
1158	    (cnt.v_free_count + cnt.v_inactive_count + cnt.v_cache_count);
1159
1160	if (page_shortage <= 0)
1161		return;
1162
1163	s0 = splvm();
1164
1165	pcount = cnt.v_active_count;
1166	fullintervalcount += vm_pageout_stats_interval;
1167	if (fullintervalcount < vm_pageout_full_stats_interval) {
1168		tpcount = (vm_pageout_stats_max * cnt.v_active_count) / cnt.v_page_count;
1169		if (pcount > tpcount)
1170			pcount = tpcount;
1171	}
1172
1173	m = TAILQ_FIRST(&vm_page_queues[PQ_ACTIVE].pl);
1174	while ((m != NULL) && (pcount-- > 0)) {
1175		int actcount;
1176
1177		if (m->queue != PQ_ACTIVE) {
1178			break;
1179		}
1180
1181		next = TAILQ_NEXT(m, pageq);
1182		/*
1183		 * Don't deactivate pages that are busy.
1184		 */
1185		if ((m->busy != 0) ||
1186		    (m->flags & PG_BUSY) ||
1187		    (m->hold_count != 0)) {
1188			s = splvm();
1189			TAILQ_REMOVE(&vm_page_queues[PQ_ACTIVE].pl, m, pageq);
1190			TAILQ_INSERT_TAIL(&vm_page_queues[PQ_ACTIVE].pl, m, pageq);
1191			splx(s);
1192			m = next;
1193			continue;
1194		}
1195
1196		actcount = 0;
1197		if (m->flags & PG_REFERENCED) {
1198			vm_page_flag_clear(m, PG_REFERENCED);
1199			actcount += 1;
1200		}
1201
1202		actcount += pmap_ts_referenced(VM_PAGE_TO_PHYS(m));
1203		if (actcount) {
1204			m->act_count += ACT_ADVANCE + actcount;
1205			if (m->act_count > ACT_MAX)
1206				m->act_count = ACT_MAX;
1207			s = splvm();
1208			TAILQ_REMOVE(&vm_page_queues[PQ_ACTIVE].pl, m, pageq);
1209			TAILQ_INSERT_TAIL(&vm_page_queues[PQ_ACTIVE].pl, m, pageq);
1210			splx(s);
1211		} else {
1212			if (m->act_count == 0) {
1213				/*
1214				 * We turn off page access, so that we have more accurate
1215				 * RSS stats.  We don't do this in the normal page deactivation
1216				 * when the system is loaded VM wise, because the cost of
1217				 * the large number of page protect operations would be higher
1218				 * than the value of doing the operation.
1219				 */
1220				vm_page_protect(m, VM_PROT_NONE);
1221				vm_page_deactivate(m);
1222			} else {
1223				m->act_count -= min(m->act_count, ACT_DECLINE);
1224				s = splvm();
1225				TAILQ_REMOVE(&vm_page_queues[PQ_ACTIVE].pl, m, pageq);
1226				TAILQ_INSERT_TAIL(&vm_page_queues[PQ_ACTIVE].pl, m, pageq);
1227				splx(s);
1228			}
1229		}
1230
1231		m = next;
1232	}
1233	splx(s0);
1234}
1235
1236static int
1237vm_pageout_free_page_calc(count)
1238vm_size_t count;
1239{
1240	if (count < cnt.v_page_count)
1241		 return 0;
1242	/*
1243	 * free_reserved needs to include enough for the largest swap pager
1244	 * structures plus enough for any pv_entry structs when paging.
1245	 */
1246	if (cnt.v_page_count > 1024)
1247		cnt.v_free_min = 4 + (cnt.v_page_count - 1024) / 200;
1248	else
1249		cnt.v_free_min = 4;
1250	cnt.v_pageout_free_min = (2*MAXBSIZE)/PAGE_SIZE +
1251		cnt.v_interrupt_free_min;
1252	cnt.v_free_reserved = vm_pageout_page_count +
1253		cnt.v_pageout_free_min + (count / 768) + PQ_L2_SIZE;
1254	cnt.v_free_severe = cnt.v_free_min / 2;
1255	cnt.v_free_min += cnt.v_free_reserved;
1256	cnt.v_free_severe += cnt.v_free_reserved;
1257	return 1;
1258}
1259
1260
1261/*
1262 *	vm_pageout is the high level pageout daemon.
1263 */
1264static void
1265vm_pageout()
1266{
1267	/*
1268	 * Initialize some paging parameters.
1269	 */
1270
1271	cnt.v_interrupt_free_min = 2;
1272	if (cnt.v_page_count < 2000)
1273		vm_pageout_page_count = 8;
1274
1275	vm_pageout_free_page_calc(cnt.v_page_count);
1276	/*
1277	 * free_reserved needs to include enough for the largest swap pager
1278	 * structures plus enough for any pv_entry structs when paging.
1279	 */
1280	if (cnt.v_free_count > 6144)
1281		cnt.v_free_target = 3 * cnt.v_free_min + cnt.v_free_reserved;
1282	else
1283		cnt.v_free_target = 2 * cnt.v_free_min + cnt.v_free_reserved;
1284
1285	if (cnt.v_free_count > 2048) {
1286		cnt.v_cache_min = cnt.v_free_target;
1287		cnt.v_cache_max = 2 * cnt.v_cache_min;
1288		cnt.v_inactive_target = (3 * cnt.v_free_target) / 2;
1289	} else {
1290		cnt.v_cache_min = 0;
1291		cnt.v_cache_max = 0;
1292		cnt.v_inactive_target = cnt.v_free_count / 4;
1293	}
1294	if (cnt.v_inactive_target > cnt.v_free_count / 3)
1295		cnt.v_inactive_target = cnt.v_free_count / 3;
1296
1297	/* XXX does not really belong here */
1298	if (vm_page_max_wired == 0)
1299		vm_page_max_wired = cnt.v_free_count / 3;
1300
1301	if (vm_pageout_stats_max == 0)
1302		vm_pageout_stats_max = cnt.v_free_target;
1303
1304	/*
1305	 * Set interval in seconds for stats scan.
1306	 */
1307	if (vm_pageout_stats_interval == 0)
1308		vm_pageout_stats_interval = 5;
1309	if (vm_pageout_full_stats_interval == 0)
1310		vm_pageout_full_stats_interval = vm_pageout_stats_interval * 4;
1311
1312
1313	/*
1314	 * Set maximum free per pass
1315	 */
1316	if (vm_pageout_stats_free_max == 0)
1317		vm_pageout_stats_free_max = 5;
1318
1319	max_page_launder = (cnt.v_page_count > 1800 ? 32 : 16);
1320
1321	curproc->p_flag |= P_BUFEXHAUST;
1322	swap_pager_swap_init();
1323	/*
1324	 * The pageout daemon is never done, so loop forever.
1325	 */
1326	while (TRUE) {
1327		int error;
1328		int s = splvm();
1329
1330		if (vm_pages_needed && vm_page_count_min()) {
1331			/*
1332			 * Still not done, sleep a bit and go again
1333			 */
1334			vm_pages_needed = 0;
1335			tsleep(&vm_pages_needed, PVM, "psleep", hz/2);
1336		} else {
1337			/*
1338			 * Good enough, sleep & handle stats
1339			 */
1340			vm_pages_needed = 0;
1341			error = tsleep(&vm_pages_needed,
1342				PVM, "psleep", vm_pageout_stats_interval * hz);
1343			if (error && !vm_pages_needed) {
1344				splx(s);
1345				vm_pageout_page_stats();
1346				continue;
1347			}
1348		}
1349
1350		if (vm_pages_needed)
1351			cnt.v_pdwakeups++;
1352		vm_pages_needed = 0;
1353		splx(s);
1354		vm_pageout_scan();
1355		vm_pageout_deficit = 0;
1356		wakeup(&cnt.v_free_count);
1357	}
1358}
1359
1360void
1361pagedaemon_wakeup()
1362{
1363	if (!vm_pages_needed && curproc != pageproc) {
1364		vm_pages_needed++;
1365		wakeup(&vm_pages_needed);
1366	}
1367}
1368
1369#if !defined(NO_SWAPPING)
1370static void
1371vm_req_vmdaemon()
1372{
1373	static int lastrun = 0;
1374
1375	if ((ticks > (lastrun + hz)) || (ticks < lastrun)) {
1376		wakeup(&vm_daemon_needed);
1377		lastrun = ticks;
1378	}
1379}
1380
1381static void
1382vm_daemon()
1383{
1384	struct proc *p;
1385
1386	while (TRUE) {
1387		tsleep(&vm_daemon_needed, PPAUSE, "psleep", 0);
1388		if (vm_pageout_req_swapout) {
1389			swapout_procs(vm_pageout_req_swapout);
1390			vm_pageout_req_swapout = 0;
1391		}
1392		/*
1393		 * scan the processes for exceeding their rlimits or if
1394		 * process is swapped out -- deactivate pages
1395		 */
1396
1397		for (p = allproc.lh_first; p != 0; p = p->p_list.le_next) {
1398			vm_pindex_t limit, size;
1399
1400			/*
1401			 * if this is a system process or if we have already
1402			 * looked at this process, skip it.
1403			 */
1404			if (p->p_flag & (P_SYSTEM | P_WEXIT)) {
1405				continue;
1406			}
1407			/*
1408			 * if the process is in a non-running type state,
1409			 * don't touch it.
1410			 */
1411			if (p->p_stat != SRUN && p->p_stat != SSLEEP) {
1412				continue;
1413			}
1414			/*
1415			 * get a limit
1416			 */
1417			limit = OFF_TO_IDX(
1418			    qmin(p->p_rlimit[RLIMIT_RSS].rlim_cur,
1419				p->p_rlimit[RLIMIT_RSS].rlim_max));
1420
1421			/*
1422			 * let processes that are swapped out really be
1423			 * swapped out set the limit to nothing (will force a
1424			 * swap-out.)
1425			 */
1426			if ((p->p_flag & P_INMEM) == 0)
1427				limit = 0;	/* XXX */
1428
1429			size = vmspace_resident_count(p->p_vmspace);
1430			if (limit >= 0 && size >= limit) {
1431				vm_pageout_map_deactivate_pages(
1432				    &p->p_vmspace->vm_map, limit);
1433			}
1434		}
1435	}
1436}
1437#endif
1438