vm_pageout.c revision 49937
1/*
2 * Copyright (c) 1991 Regents of the University of California.
3 * All rights reserved.
4 * Copyright (c) 1994 John S. Dyson
5 * All rights reserved.
6 * Copyright (c) 1994 David Greenman
7 * All rights reserved.
8 *
9 * This code is derived from software contributed to Berkeley by
10 * The Mach Operating System project at Carnegie-Mellon University.
11 *
12 * Redistribution and use in source and binary forms, with or without
13 * modification, are permitted provided that the following conditions
14 * are met:
15 * 1. Redistributions of source code must retain the above copyright
16 *    notice, this list of conditions and the following disclaimer.
17 * 2. Redistributions in binary form must reproduce the above copyright
18 *    notice, this list of conditions and the following disclaimer in the
19 *    documentation and/or other materials provided with the distribution.
20 * 3. All advertising materials mentioning features or use of this software
21 *    must display the following acknowledgement:
22 *	This product includes software developed by the University of
23 *	California, Berkeley and its contributors.
24 * 4. Neither the name of the University nor the names of its contributors
25 *    may be used to endorse or promote products derived from this software
26 *    without specific prior written permission.
27 *
28 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
29 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
30 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
31 * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
32 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
33 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
34 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
35 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
36 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
37 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
38 * SUCH DAMAGE.
39 *
40 *	from: @(#)vm_pageout.c	7.4 (Berkeley) 5/7/91
41 *
42 *
43 * Copyright (c) 1987, 1990 Carnegie-Mellon University.
44 * All rights reserved.
45 *
46 * Authors: Avadis Tevanian, Jr., Michael Wayne Young
47 *
48 * Permission to use, copy, modify and distribute this software and
49 * its documentation is hereby granted, provided that both the copyright
50 * notice and this permission notice appear in all copies of the
51 * software, derivative works or modified versions, and any portions
52 * thereof, and that both notices appear in supporting documentation.
53 *
54 * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
55 * CONDITION.  CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND
56 * FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
57 *
58 * Carnegie Mellon requests users of this software to return to
59 *
60 *  Software Distribution Coordinator  or  Software.Distribution@CS.CMU.EDU
61 *  School of Computer Science
62 *  Carnegie Mellon University
63 *  Pittsburgh PA 15213-3890
64 *
65 * any improvements or extensions that they make and grant Carnegie the
66 * rights to redistribute these changes.
67 *
68 * $Id: vm_pageout.c,v 1.144 1999/07/04 00:25:37 mckusick Exp $
69 */
70
71/*
72 *	The proverbial page-out daemon.
73 */
74
75#include "opt_vm.h"
76#include <sys/param.h>
77#include <sys/systm.h>
78#include <sys/kernel.h>
79#include <sys/proc.h>
80#include <sys/kthread.h>
81#include <sys/resourcevar.h>
82#include <sys/signalvar.h>
83#include <sys/vnode.h>
84#include <sys/vmmeter.h>
85#include <sys/sysctl.h>
86
87#include <vm/vm.h>
88#include <vm/vm_param.h>
89#include <vm/vm_prot.h>
90#include <sys/lock.h>
91#include <vm/vm_object.h>
92#include <vm/vm_page.h>
93#include <vm/vm_map.h>
94#include <vm/vm_pageout.h>
95#include <vm/vm_pager.h>
96#include <vm/swap_pager.h>
97#include <vm/vm_extern.h>
98
99/*
100 * System initialization
101 */
102
103/* the kernel process "vm_pageout"*/
104static void vm_pageout __P((void));
105static int vm_pageout_clean __P((vm_page_t));
106static int vm_pageout_scan __P((void));
107static int vm_pageout_free_page_calc __P((vm_size_t count));
108struct proc *pageproc;
109
110static struct kproc_desc page_kp = {
111	"pagedaemon",
112	vm_pageout,
113	&pageproc
114};
115SYSINIT(pagedaemon, SI_SUB_KTHREAD_PAGE, SI_ORDER_FIRST, kproc_start, &page_kp)
116
117#if !defined(NO_SWAPPING)
118/* the kernel process "vm_daemon"*/
119static void vm_daemon __P((void));
120static struct	proc *vmproc;
121
122static struct kproc_desc vm_kp = {
123	"vmdaemon",
124	vm_daemon,
125	&vmproc
126};
127SYSINIT(vmdaemon, SI_SUB_KTHREAD_VM, SI_ORDER_FIRST, kproc_start, &vm_kp)
128#endif
129
130
131int vm_pages_needed=0;		/* Event on which pageout daemon sleeps */
132int vm_pageout_deficit=0;	/* Estimated number of pages deficit */
133int vm_pageout_pages_needed=0;	/* flag saying that the pageout daemon needs pages */
134
135extern int npendingio;
136#if !defined(NO_SWAPPING)
137static int vm_pageout_req_swapout;	/* XXX */
138static int vm_daemon_needed;
139#endif
140extern int nswiodone;
141extern int vm_swap_size;
142static int vm_pageout_stats_max=0, vm_pageout_stats_interval = 0;
143static int vm_pageout_full_stats_interval = 0;
144static int vm_pageout_stats_free_max=0, vm_pageout_algorithm_lru=0;
145static int defer_swap_pageouts=0;
146static int disable_swap_pageouts=0;
147
148static int max_page_launder=100;
149#if defined(NO_SWAPPING)
150static int vm_swap_enabled=0;
151static int vm_swap_idle_enabled=0;
152#else
153static int vm_swap_enabled=1;
154static int vm_swap_idle_enabled=0;
155#endif
156
157SYSCTL_INT(_vm, VM_PAGEOUT_ALGORITHM, pageout_algorithm,
158	CTLFLAG_RW, &vm_pageout_algorithm_lru, 0, "LRU page mgmt");
159
160SYSCTL_INT(_vm, OID_AUTO, pageout_stats_max,
161	CTLFLAG_RW, &vm_pageout_stats_max, 0, "Max pageout stats scan length");
162
163SYSCTL_INT(_vm, OID_AUTO, pageout_full_stats_interval,
164	CTLFLAG_RW, &vm_pageout_full_stats_interval, 0, "Interval for full stats scan");
165
166SYSCTL_INT(_vm, OID_AUTO, pageout_stats_interval,
167	CTLFLAG_RW, &vm_pageout_stats_interval, 0, "Interval for partial stats scan");
168
169SYSCTL_INT(_vm, OID_AUTO, pageout_stats_free_max,
170	CTLFLAG_RW, &vm_pageout_stats_free_max, 0, "Not implemented");
171
172#if defined(NO_SWAPPING)
173SYSCTL_INT(_vm, VM_SWAPPING_ENABLED, swap_enabled,
174	CTLFLAG_RD, &vm_swap_enabled, 0, "");
175SYSCTL_INT(_vm, OID_AUTO, swap_idle_enabled,
176	CTLFLAG_RD, &vm_swap_idle_enabled, 0, "");
177#else
178SYSCTL_INT(_vm, VM_SWAPPING_ENABLED, swap_enabled,
179	CTLFLAG_RW, &vm_swap_enabled, 0, "Enable entire process swapout");
180SYSCTL_INT(_vm, OID_AUTO, swap_idle_enabled,
181	CTLFLAG_RW, &vm_swap_idle_enabled, 0, "Allow swapout on idle criteria");
182#endif
183
184SYSCTL_INT(_vm, OID_AUTO, defer_swapspace_pageouts,
185	CTLFLAG_RW, &defer_swap_pageouts, 0, "Give preference to dirty pages in mem");
186
187SYSCTL_INT(_vm, OID_AUTO, disable_swapspace_pageouts,
188	CTLFLAG_RW, &disable_swap_pageouts, 0, "Disallow swapout of dirty pages");
189
190SYSCTL_INT(_vm, OID_AUTO, max_page_launder,
191	CTLFLAG_RW, &max_page_launder, 0, "Maximum number of pages to clean per pass");
192
193
194#define VM_PAGEOUT_PAGE_COUNT 16
195int vm_pageout_page_count = VM_PAGEOUT_PAGE_COUNT;
196
197int vm_page_max_wired;		/* XXX max # of wired pages system-wide */
198
199#if !defined(NO_SWAPPING)
200typedef void freeer_fcn_t __P((vm_map_t, vm_object_t, vm_pindex_t, int));
201static void vm_pageout_map_deactivate_pages __P((vm_map_t, vm_pindex_t));
202static freeer_fcn_t vm_pageout_object_deactivate_pages;
203static void vm_req_vmdaemon __P((void));
204#endif
205static void vm_pageout_page_stats(void);
206
207/*
208 * vm_pageout_clean:
209 *
210 * Clean the page and remove it from the laundry.
211 *
212 * We set the busy bit to cause potential page faults on this page to
213 * block.  Note the careful timing, however, the busy bit isn't set till
214 * late and we cannot do anything that will mess with the page.
215 */
216
217static int
218vm_pageout_clean(m)
219	vm_page_t m;
220{
221	register vm_object_t object;
222	vm_page_t mc[2*vm_pageout_page_count];
223	int pageout_count;
224	int i, forward_okay, backward_okay, page_base;
225	vm_pindex_t pindex = m->pindex;
226
227	object = m->object;
228
229	/*
230	 * It doesn't cost us anything to pageout OBJT_DEFAULT or OBJT_SWAP
231	 * with the new swapper, but we could have serious problems paging
232	 * out other object types if there is insufficient memory.
233	 *
234	 * Unfortunately, checking free memory here is far too late, so the
235	 * check has been moved up a procedural level.
236	 */
237
238	/*
239	 * Don't mess with the page if it's busy.
240	 */
241	if ((m->hold_count != 0) ||
242	    ((m->busy != 0) || (m->flags & PG_BUSY)))
243		return 0;
244
245	mc[vm_pageout_page_count] = m;
246	pageout_count = 1;
247	page_base = vm_pageout_page_count;
248	forward_okay = TRUE;
249	if (pindex != 0)
250		backward_okay = TRUE;
251	else
252		backward_okay = FALSE;
253	/*
254	 * Scan object for clusterable pages.
255	 *
256	 * We can cluster ONLY if: ->> the page is NOT
257	 * clean, wired, busy, held, or mapped into a
258	 * buffer, and one of the following:
259	 * 1) The page is inactive, or a seldom used
260	 *    active page.
261	 * -or-
262	 * 2) we force the issue.
263	 */
264	for (i = 1; (i < vm_pageout_page_count) && (forward_okay || backward_okay); i++) {
265		vm_page_t p;
266
267		/*
268		 * See if forward page is clusterable.
269		 */
270		if (forward_okay) {
271			/*
272			 * Stop forward scan at end of object.
273			 */
274			if ((pindex + i) > object->size) {
275				forward_okay = FALSE;
276				goto do_backward;
277			}
278			p = vm_page_lookup(object, pindex + i);
279			if (p) {
280				if (((p->queue - p->pc) == PQ_CACHE) ||
281					(p->flags & PG_BUSY) || p->busy) {
282					forward_okay = FALSE;
283					goto do_backward;
284				}
285				vm_page_test_dirty(p);
286				if ((p->dirty & p->valid) != 0 &&
287				    (p->queue == PQ_INACTIVE) &&
288				    (p->wire_count == 0) &&
289				    (p->hold_count == 0)) {
290					mc[vm_pageout_page_count + i] = p;
291					pageout_count++;
292					if (pageout_count == vm_pageout_page_count)
293						break;
294				} else {
295					forward_okay = FALSE;
296				}
297			} else {
298				forward_okay = FALSE;
299			}
300		}
301do_backward:
302		/*
303		 * See if backward page is clusterable.
304		 */
305		if (backward_okay) {
306			/*
307			 * Stop backward scan at beginning of object.
308			 */
309			if ((pindex - i) == 0) {
310				backward_okay = FALSE;
311			}
312			p = vm_page_lookup(object, pindex - i);
313			if (p) {
314				if (((p->queue - p->pc) == PQ_CACHE) ||
315					(p->flags & PG_BUSY) || p->busy) {
316					backward_okay = FALSE;
317					continue;
318				}
319				vm_page_test_dirty(p);
320				if ((p->dirty & p->valid) != 0 &&
321				    (p->queue == PQ_INACTIVE) &&
322				    (p->wire_count == 0) &&
323				    (p->hold_count == 0)) {
324					mc[vm_pageout_page_count - i] = p;
325					pageout_count++;
326					page_base--;
327					if (pageout_count == vm_pageout_page_count)
328						break;
329				} else {
330					backward_okay = FALSE;
331				}
332			} else {
333				backward_okay = FALSE;
334			}
335		}
336	}
337
338	/*
339	 * we allow reads during pageouts...
340	 */
341	return vm_pageout_flush(&mc[page_base], pageout_count, 0);
342}
343
344/*
345 * vm_pageout_flush() - launder the given pages
346 *
347 *	The given pages are laundered.  Note that we setup for the start of
348 *	I/O ( i.e. busy the page ), mark it read-only, and bump the object
349 *	reference count all in here rather then in the parent.  If we want
350 *	the parent to do more sophisticated things we may have to change
351 *	the ordering.
352 */
353
354int
355vm_pageout_flush(mc, count, flags)
356	vm_page_t *mc;
357	int count;
358	int flags;
359{
360	register vm_object_t object;
361	int pageout_status[count];
362	int numpagedout = 0;
363	int i;
364
365	/*
366	 * Initiate I/O.  Bump the vm_page_t->busy counter and
367	 * mark the pages read-only.
368	 *
369	 * We do not have to fixup the clean/dirty bits here... we can
370	 * allow the pager to do it after the I/O completes.
371	 */
372
373	for (i = 0; i < count; i++) {
374		vm_page_io_start(mc[i]);
375		vm_page_protect(mc[i], VM_PROT_READ);
376	}
377
378	object = mc[0]->object;
379	vm_object_pip_add(object, count);
380
381	vm_pager_put_pages(object, mc, count,
382	    (flags | ((object == kernel_object) ? OBJPC_SYNC : 0)),
383	    pageout_status);
384
385	for (i = 0; i < count; i++) {
386		vm_page_t mt = mc[i];
387
388		switch (pageout_status[i]) {
389		case VM_PAGER_OK:
390			numpagedout++;
391			break;
392		case VM_PAGER_PEND:
393			numpagedout++;
394			break;
395		case VM_PAGER_BAD:
396			/*
397			 * Page outside of range of object. Right now we
398			 * essentially lose the changes by pretending it
399			 * worked.
400			 */
401			pmap_clear_modify(VM_PAGE_TO_PHYS(mt));
402			mt->dirty = 0;
403			break;
404		case VM_PAGER_ERROR:
405		case VM_PAGER_FAIL:
406			/*
407			 * If page couldn't be paged out, then reactivate the
408			 * page so it doesn't clog the inactive list.  (We
409			 * will try paging out it again later).
410			 */
411			vm_page_activate(mt);
412			break;
413		case VM_PAGER_AGAIN:
414			break;
415		}
416
417		/*
418		 * If the operation is still going, leave the page busy to
419		 * block all other accesses. Also, leave the paging in
420		 * progress indicator set so that we don't attempt an object
421		 * collapse.
422		 */
423		if (pageout_status[i] != VM_PAGER_PEND) {
424			vm_object_pip_wakeup(object);
425			vm_page_io_finish(mt);
426		}
427	}
428	return numpagedout;
429}
430
431#if !defined(NO_SWAPPING)
432/*
433 *	vm_pageout_object_deactivate_pages
434 *
435 *	deactivate enough pages to satisfy the inactive target
436 *	requirements or if vm_page_proc_limit is set, then
437 *	deactivate all of the pages in the object and its
438 *	backing_objects.
439 *
440 *	The object and map must be locked.
441 */
442static void
443vm_pageout_object_deactivate_pages(map, object, desired, map_remove_only)
444	vm_map_t map;
445	vm_object_t object;
446	vm_pindex_t desired;
447	int map_remove_only;
448{
449	register vm_page_t p, next;
450	int rcount;
451	int remove_mode;
452	int s;
453
454	if (object->type == OBJT_DEVICE)
455		return;
456
457	while (object) {
458		if (pmap_resident_count(vm_map_pmap(map)) <= desired)
459			return;
460		if (object->paging_in_progress)
461			return;
462
463		remove_mode = map_remove_only;
464		if (object->shadow_count > 1)
465			remove_mode = 1;
466	/*
467	 * scan the objects entire memory queue
468	 */
469		rcount = object->resident_page_count;
470		p = TAILQ_FIRST(&object->memq);
471		while (p && (rcount-- > 0)) {
472			int actcount;
473			if (pmap_resident_count(vm_map_pmap(map)) <= desired)
474				return;
475			next = TAILQ_NEXT(p, listq);
476			cnt.v_pdpages++;
477			if (p->wire_count != 0 ||
478			    p->hold_count != 0 ||
479			    p->busy != 0 ||
480			    (p->flags & PG_BUSY) ||
481			    !pmap_page_exists(vm_map_pmap(map), VM_PAGE_TO_PHYS(p))) {
482				p = next;
483				continue;
484			}
485
486			actcount = pmap_ts_referenced(VM_PAGE_TO_PHYS(p));
487			if (actcount) {
488				vm_page_flag_set(p, PG_REFERENCED);
489			} else if (p->flags & PG_REFERENCED) {
490				actcount = 1;
491			}
492
493			if ((p->queue != PQ_ACTIVE) &&
494				(p->flags & PG_REFERENCED)) {
495				vm_page_activate(p);
496				p->act_count += actcount;
497				vm_page_flag_clear(p, PG_REFERENCED);
498			} else if (p->queue == PQ_ACTIVE) {
499				if ((p->flags & PG_REFERENCED) == 0) {
500					p->act_count -= min(p->act_count, ACT_DECLINE);
501					if (!remove_mode && (vm_pageout_algorithm_lru || (p->act_count == 0))) {
502						vm_page_protect(p, VM_PROT_NONE);
503						vm_page_deactivate(p);
504					} else {
505						s = splvm();
506						TAILQ_REMOVE(&vm_page_queue_active, p, pageq);
507						TAILQ_INSERT_TAIL(&vm_page_queue_active, p, pageq);
508						splx(s);
509					}
510				} else {
511					vm_page_activate(p);
512					vm_page_flag_clear(p, PG_REFERENCED);
513					if (p->act_count < (ACT_MAX - ACT_ADVANCE))
514						p->act_count += ACT_ADVANCE;
515					s = splvm();
516					TAILQ_REMOVE(&vm_page_queue_active, p, pageq);
517					TAILQ_INSERT_TAIL(&vm_page_queue_active, p, pageq);
518					splx(s);
519				}
520			} else if (p->queue == PQ_INACTIVE) {
521				vm_page_protect(p, VM_PROT_NONE);
522			}
523			p = next;
524		}
525		object = object->backing_object;
526	}
527	return;
528}
529
530/*
531 * deactivate some number of pages in a map, try to do it fairly, but
532 * that is really hard to do.
533 */
534static void
535vm_pageout_map_deactivate_pages(map, desired)
536	vm_map_t map;
537	vm_pindex_t desired;
538{
539	vm_map_entry_t tmpe;
540	vm_object_t obj, bigobj;
541
542	if (lockmgr(&map->lock, LK_EXCLUSIVE | LK_NOWAIT, (void *)0, curproc)) {
543		return;
544	}
545
546	bigobj = NULL;
547
548	/*
549	 * first, search out the biggest object, and try to free pages from
550	 * that.
551	 */
552	tmpe = map->header.next;
553	while (tmpe != &map->header) {
554		if ((tmpe->eflags & MAP_ENTRY_IS_SUB_MAP) == 0) {
555			obj = tmpe->object.vm_object;
556			if ((obj != NULL) && (obj->shadow_count <= 1) &&
557				((bigobj == NULL) ||
558				 (bigobj->resident_page_count < obj->resident_page_count))) {
559				bigobj = obj;
560			}
561		}
562		tmpe = tmpe->next;
563	}
564
565	if (bigobj)
566		vm_pageout_object_deactivate_pages(map, bigobj, desired, 0);
567
568	/*
569	 * Next, hunt around for other pages to deactivate.  We actually
570	 * do this search sort of wrong -- .text first is not the best idea.
571	 */
572	tmpe = map->header.next;
573	while (tmpe != &map->header) {
574		if (pmap_resident_count(vm_map_pmap(map)) <= desired)
575			break;
576		if ((tmpe->eflags & MAP_ENTRY_IS_SUB_MAP) == 0) {
577			obj = tmpe->object.vm_object;
578			if (obj)
579				vm_pageout_object_deactivate_pages(map, obj, desired, 0);
580		}
581		tmpe = tmpe->next;
582	};
583
584	/*
585	 * Remove all mappings if a process is swapped out, this will free page
586	 * table pages.
587	 */
588	if (desired == 0)
589		pmap_remove(vm_map_pmap(map),
590			VM_MIN_ADDRESS, VM_MAXUSER_ADDRESS);
591	vm_map_unlock(map);
592	return;
593}
594#endif
595
596/*
597 * Don't try to be fancy - being fancy can lead to VOP_LOCK's and therefore
598 * to vnode deadlocks.  We only do it for OBJT_DEFAULT and OBJT_SWAP objects
599 * which we know can be trivially freed.
600 */
601
602void
603vm_pageout_page_free(vm_page_t m) {
604	vm_object_t object = m->object;
605	int type = object->type;
606
607	if (type == OBJT_SWAP || type == OBJT_DEFAULT)
608		vm_object_reference(object);
609	vm_page_busy(m);
610	vm_page_protect(m, VM_PROT_NONE);
611	vm_page_free(m);
612	if (type == OBJT_SWAP || type == OBJT_DEFAULT)
613		vm_object_deallocate(object);
614}
615
616/*
617 *	vm_pageout_scan does the dirty work for the pageout daemon.
618 */
619static int
620vm_pageout_scan()
621{
622	vm_page_t m, next;
623	int page_shortage, maxscan, pcount;
624	int addl_page_shortage, addl_page_shortage_init;
625	int maxlaunder;
626	int launder_loop = 0;
627	struct proc *p, *bigproc;
628	vm_offset_t size, bigsize;
629	vm_object_t object;
630	int force_wakeup = 0;
631	int actcount;
632	int vnodes_skipped = 0;
633	int s;
634
635	/*
636	 * Do whatever cleanup that the pmap code can.
637	 */
638	pmap_collect();
639
640	addl_page_shortage_init = vm_pageout_deficit;
641	vm_pageout_deficit = 0;
642
643	if (max_page_launder == 0)
644		max_page_launder = 1;
645
646	/*
647	 * Calculate the number of pages we want to either free or move
648	 * to the cache.
649	 */
650
651	page_shortage = (cnt.v_free_target + cnt.v_cache_min) -
652	    (cnt.v_free_count + cnt.v_cache_count);
653	page_shortage += addl_page_shortage_init;
654
655	/*
656	 * Figure out what to do with dirty pages when they are encountered.
657	 * Assume that 1/3 of the pages on the inactive list are clean.  If
658	 * we think we can reach our target, disable laundering (do not
659	 * clean any dirty pages).  If we miss the target we will loop back
660	 * up and do a laundering run.
661	 */
662
663	if (cnt.v_inactive_count / 3 > page_shortage) {
664		maxlaunder = 0;
665		launder_loop = 0;
666	} else {
667		maxlaunder =
668		    (cnt.v_inactive_target > max_page_launder) ?
669		    max_page_launder : cnt.v_inactive_target;
670		launder_loop = 1;
671	}
672
673	/*
674	 * Start scanning the inactive queue for pages we can move to the
675	 * cache or free.  The scan will stop when the target is reached or
676	 * we have scanned the entire inactive queue.
677	 */
678
679rescan0:
680	addl_page_shortage = addl_page_shortage_init;
681	maxscan = cnt.v_inactive_count;
682	for (m = TAILQ_FIRST(&vm_page_queue_inactive);
683	     m != NULL && maxscan-- > 0 && page_shortage > 0;
684	     m = next) {
685
686		cnt.v_pdpages++;
687
688		if (m->queue != PQ_INACTIVE) {
689			goto rescan0;
690		}
691
692		next = TAILQ_NEXT(m, pageq);
693
694		if (m->hold_count) {
695			s = splvm();
696			TAILQ_REMOVE(&vm_page_queue_inactive, m, pageq);
697			TAILQ_INSERT_TAIL(&vm_page_queue_inactive, m, pageq);
698			splx(s);
699			addl_page_shortage++;
700			continue;
701		}
702		/*
703		 * Dont mess with busy pages, keep in the front of the
704		 * queue, most likely are being paged out.
705		 */
706		if (m->busy || (m->flags & PG_BUSY)) {
707			addl_page_shortage++;
708			continue;
709		}
710
711		/*
712		 * If the object is not being used, we ignore previous
713		 * references.
714		 */
715		if (m->object->ref_count == 0) {
716			vm_page_flag_clear(m, PG_REFERENCED);
717			pmap_clear_reference(VM_PAGE_TO_PHYS(m));
718
719		/*
720		 * Otherwise, if the page has been referenced while in the
721		 * inactive queue, we bump the "activation count" upwards,
722		 * making it less likely that the page will be added back to
723		 * the inactive queue prematurely again.  Here we check the
724		 * page tables (or emulated bits, if any), given the upper
725		 * level VM system not knowing anything about existing
726		 * references.
727		 */
728		} else if (((m->flags & PG_REFERENCED) == 0) &&
729			(actcount = pmap_ts_referenced(VM_PAGE_TO_PHYS(m)))) {
730			vm_page_activate(m);
731			m->act_count += (actcount + ACT_ADVANCE);
732			continue;
733		}
734
735		/*
736		 * If the upper level VM system knows about any page
737		 * references, we activate the page.  We also set the
738		 * "activation count" higher than normal so that we will less
739		 * likely place pages back onto the inactive queue again.
740		 */
741		if ((m->flags & PG_REFERENCED) != 0) {
742			vm_page_flag_clear(m, PG_REFERENCED);
743			actcount = pmap_ts_referenced(VM_PAGE_TO_PHYS(m));
744			vm_page_activate(m);
745			m->act_count += (actcount + ACT_ADVANCE + 1);
746			continue;
747		}
748
749		/*
750		 * If the upper level VM system doesn't know anything about
751		 * the page being dirty, we have to check for it again.  As
752		 * far as the VM code knows, any partially dirty pages are
753		 * fully dirty.
754		 */
755		if (m->dirty == 0) {
756			vm_page_test_dirty(m);
757		} else {
758			vm_page_dirty(m);
759		}
760
761		/*
762		 * Invalid pages can be easily freed
763		 */
764		if (m->valid == 0) {
765			vm_pageout_page_free(m);
766			cnt.v_dfree++;
767			--page_shortage;
768
769		/*
770		 * Clean pages can be placed onto the cache queue.
771		 */
772		} else if (m->dirty == 0) {
773			vm_page_cache(m);
774			--page_shortage;
775
776		/*
777		 * Dirty pages need to be paged out.  Note that we clean
778		 * only a limited number of pages per pagedaemon pass.
779		 */
780		} else if (maxlaunder > 0) {
781			int written;
782			int swap_pageouts_ok;
783			struct vnode *vp = NULL;
784
785			object = m->object;
786
787			if ((object->type != OBJT_SWAP) && (object->type != OBJT_DEFAULT)) {
788				swap_pageouts_ok = 1;
789			} else {
790				swap_pageouts_ok = !(defer_swap_pageouts || disable_swap_pageouts);
791				swap_pageouts_ok |= (!disable_swap_pageouts && defer_swap_pageouts &&
792					(cnt.v_free_count + cnt.v_cache_count) < cnt.v_free_min);
793
794			}
795
796			/*
797			 * We don't bother paging objects that are "dead".
798			 * Those objects are in a "rundown" state.
799			 */
800			if (!swap_pageouts_ok || (object->flags & OBJ_DEAD)) {
801				s = splvm();
802				TAILQ_REMOVE(&vm_page_queue_inactive, m, pageq);
803				TAILQ_INSERT_TAIL(&vm_page_queue_inactive, m, pageq);
804				splx(s);
805				continue;
806			}
807
808			/*
809			 * For now we protect against potential memory
810			 * deadlocks by requiring significant memory to be
811			 * free if the object is not OBJT_DEFAULT or OBJT_SWAP.
812			 * We do not 'trust' any other object type to operate
813			 * with low memory, not even OBJT_DEVICE.  The VM
814			 * allocator will special case allocations done by
815			 * the pageout daemon so the check below actually
816			 * does have some hysteresis in it.  It isn't the best
817			 * solution, though.
818			 */
819
820			if (object->type != OBJT_DEFAULT &&
821			    object->type != OBJT_SWAP &&
822			    cnt.v_free_count < cnt.v_free_reserved) {
823				s = splvm();
824				TAILQ_REMOVE(&vm_page_queue_inactive, m, pageq);
825				TAILQ_INSERT_TAIL(&vm_page_queue_inactive, m,
826				    pageq);
827				splx(s);
828				continue;
829			}
830
831			/*
832			 * Presumably we have sufficient free memory to do
833			 * the more sophisticated checks and locking required
834			 * for vnodes.
835			 *
836			 * The object is already known NOT to be dead.  The
837			 * vget() may still block, though, because
838			 * VOP_ISLOCKED() doesn't check to see if an inode
839			 * (v_data) is associated with the vnode.  If it isn't,
840			 * vget() will load in it from disk.  Worse, vget()
841			 * may actually get stuck waiting on "inode" if another
842			 * process is in the process of bringing the inode in.
843			 * This is bad news for us either way.
844			 *
845			 * So for the moment we check v_data == NULL as a
846			 * workaround.  This means that vnodes which do not
847			 * use v_data in the way we expect probably will not
848			 * wind up being paged out by the pager and it will be
849			 * up to the syncer to get them.  That's better then
850			 * us blocking here.
851			 *
852			 * This whole code section is bogus - we need to fix
853			 * the vnode pager to handle vm_page_t's without us
854			 * having to do any sophisticated VOP tests.
855			 */
856
857			if (object->type == OBJT_VNODE) {
858				vp = object->handle;
859
860				if (VOP_ISLOCKED(vp) ||
861				    vp->v_data == NULL ||
862				    vget(vp, LK_EXCLUSIVE|LK_NOOBJ, curproc)) {
863					if ((m->queue == PQ_INACTIVE) &&
864						(m->hold_count == 0) &&
865						(m->busy == 0) &&
866						(m->flags & PG_BUSY) == 0) {
867						s = splvm();
868						TAILQ_REMOVE(&vm_page_queue_inactive, m, pageq);
869						TAILQ_INSERT_TAIL(&vm_page_queue_inactive, m, pageq);
870						splx(s);
871					}
872					if (object->flags & OBJ_MIGHTBEDIRTY)
873						vnodes_skipped++;
874					continue;
875				}
876
877				/*
878				 * The page might have been moved to another queue
879				 * during potential blocking in vget() above.
880				 */
881				if (m->queue != PQ_INACTIVE) {
882					if (object->flags & OBJ_MIGHTBEDIRTY)
883						vnodes_skipped++;
884					vput(vp);
885					continue;
886				}
887
888				/*
889				 * The page may have been busied during the blocking in
890				 * vput();  We don't move the page back onto the end of
891				 * the queue so that statistics are more correct if we don't.
892				 */
893				if (m->busy || (m->flags & PG_BUSY)) {
894					vput(vp);
895					continue;
896				}
897
898				/*
899				 * If the page has become held, then skip it
900				 */
901				if (m->hold_count) {
902					s = splvm();
903					TAILQ_REMOVE(&vm_page_queue_inactive, m, pageq);
904					TAILQ_INSERT_TAIL(&vm_page_queue_inactive, m, pageq);
905					splx(s);
906					if (object->flags & OBJ_MIGHTBEDIRTY)
907						vnodes_skipped++;
908					vput(vp);
909					continue;
910				}
911			}
912
913			/*
914			 * If a page is dirty, then it is either being washed
915			 * (but not yet cleaned) or it is still in the
916			 * laundry.  If it is still in the laundry, then we
917			 * start the cleaning operation.
918			 */
919			written = vm_pageout_clean(m);
920			if (vp)
921				vput(vp);
922
923			maxlaunder -= written;
924		}
925	}
926
927	/*
928	 * If we still have a page shortage and we didn't launder anything,
929	 * run the inactive scan again and launder something this time.
930	 */
931
932	if (launder_loop == 0 && page_shortage > 0) {
933		launder_loop = 1;
934		maxlaunder =
935		    (cnt.v_inactive_target > max_page_launder) ?
936		    max_page_launder : cnt.v_inactive_target;
937		goto rescan0;
938	}
939
940	/*
941	 * Compute the page shortage from the point of view of having to
942	 * move pages from the active queue to the inactive queue.
943	 */
944
945	page_shortage = (cnt.v_inactive_target + cnt.v_cache_min) -
946	    (cnt.v_free_count + cnt.v_inactive_count + cnt.v_cache_count);
947	page_shortage += addl_page_shortage;
948
949	/*
950	 * Scan the active queue for things we can deactivate
951	 */
952
953	pcount = cnt.v_active_count;
954	m = TAILQ_FIRST(&vm_page_queue_active);
955
956	while ((m != NULL) && (pcount-- > 0) && (page_shortage > 0)) {
957
958		/*
959		 * This is a consistancy check, and should likely be a panic
960		 * or warning.
961		 */
962		if (m->queue != PQ_ACTIVE) {
963			break;
964		}
965
966		next = TAILQ_NEXT(m, pageq);
967		/*
968		 * Don't deactivate pages that are busy.
969		 */
970		if ((m->busy != 0) ||
971		    (m->flags & PG_BUSY) ||
972		    (m->hold_count != 0)) {
973			s = splvm();
974			TAILQ_REMOVE(&vm_page_queue_active, m, pageq);
975			TAILQ_INSERT_TAIL(&vm_page_queue_active, m, pageq);
976			splx(s);
977			m = next;
978			continue;
979		}
980
981		/*
982		 * The count for pagedaemon pages is done after checking the
983		 * page for eligbility...
984		 */
985		cnt.v_pdpages++;
986
987		/*
988		 * Check to see "how much" the page has been used.
989		 */
990		actcount = 0;
991		if (m->object->ref_count != 0) {
992			if (m->flags & PG_REFERENCED) {
993				actcount += 1;
994			}
995			actcount += pmap_ts_referenced(VM_PAGE_TO_PHYS(m));
996			if (actcount) {
997				m->act_count += ACT_ADVANCE + actcount;
998				if (m->act_count > ACT_MAX)
999					m->act_count = ACT_MAX;
1000			}
1001		}
1002
1003		/*
1004		 * Since we have "tested" this bit, we need to clear it now.
1005		 */
1006		vm_page_flag_clear(m, PG_REFERENCED);
1007
1008		/*
1009		 * Only if an object is currently being used, do we use the
1010		 * page activation count stats.
1011		 */
1012		if (actcount && (m->object->ref_count != 0)) {
1013			s = splvm();
1014			TAILQ_REMOVE(&vm_page_queue_active, m, pageq);
1015			TAILQ_INSERT_TAIL(&vm_page_queue_active, m, pageq);
1016			splx(s);
1017		} else {
1018			m->act_count -= min(m->act_count, ACT_DECLINE);
1019			if (vm_pageout_algorithm_lru ||
1020				(m->object->ref_count == 0) || (m->act_count == 0)) {
1021				page_shortage--;
1022				if (m->object->ref_count == 0) {
1023					vm_page_protect(m, VM_PROT_NONE);
1024					if (m->dirty == 0)
1025						vm_page_cache(m);
1026					else
1027						vm_page_deactivate(m);
1028				} else {
1029					vm_page_deactivate(m);
1030				}
1031			} else {
1032				s = splvm();
1033				TAILQ_REMOVE(&vm_page_queue_active, m, pageq);
1034				TAILQ_INSERT_TAIL(&vm_page_queue_active, m, pageq);
1035				splx(s);
1036			}
1037		}
1038		m = next;
1039	}
1040
1041	s = splvm();
1042
1043	/*
1044	 * We try to maintain some *really* free pages, this allows interrupt
1045	 * code to be guaranteed space.  Since both cache and free queues
1046	 * are considered basically 'free', moving pages from cache to free
1047	 * does not effect other calculations.
1048	 */
1049
1050	while (cnt.v_free_count < cnt.v_free_reserved) {
1051		static int cache_rover = 0;
1052		m = vm_page_list_find(PQ_CACHE, cache_rover, FALSE);
1053		if (!m)
1054			break;
1055		if ((m->flags & PG_BUSY) || m->busy || m->hold_count || m->wire_count) {
1056#ifdef INVARIANTS
1057			printf("Warning: busy page %p found in cache\n", m);
1058#endif
1059			vm_page_deactivate(m);
1060			continue;
1061		}
1062		cache_rover = (cache_rover + PQ_PRIME2) & PQ_L2_MASK;
1063		vm_pageout_page_free(m);
1064		cnt.v_dfree++;
1065	}
1066	splx(s);
1067
1068#if !defined(NO_SWAPPING)
1069	/*
1070	 * Idle process swapout -- run once per second.
1071	 */
1072	if (vm_swap_idle_enabled) {
1073		static long lsec;
1074		if (time_second != lsec) {
1075			vm_pageout_req_swapout |= VM_SWAP_IDLE;
1076			vm_req_vmdaemon();
1077			lsec = time_second;
1078		}
1079	}
1080#endif
1081
1082	/*
1083	 * If we didn't get enough free pages, and we have skipped a vnode
1084	 * in a writeable object, wakeup the sync daemon.  And kick swapout
1085	 * if we did not get enough free pages.
1086	 */
1087	if ((cnt.v_cache_count + cnt.v_free_count) <
1088		(cnt.v_free_target + cnt.v_cache_min) ) {
1089		if (vnodes_skipped &&
1090		    (cnt.v_cache_count + cnt.v_free_count) < cnt.v_free_min) {
1091			(void) speedup_syncer();
1092		}
1093#if !defined(NO_SWAPPING)
1094		if (vm_swap_enabled &&
1095			(cnt.v_free_count + cnt.v_cache_count < cnt.v_free_target)) {
1096			vm_req_vmdaemon();
1097			vm_pageout_req_swapout |= VM_SWAP_NORMAL;
1098		}
1099#endif
1100	}
1101
1102	/*
1103	 * make sure that we have swap space -- if we are low on memory and
1104	 * swap -- then kill the biggest process.
1105	 */
1106	if ((vm_swap_size == 0 || swap_pager_full) &&
1107	    ((cnt.v_free_count + cnt.v_cache_count) < cnt.v_free_min)) {
1108		bigproc = NULL;
1109		bigsize = 0;
1110		for (p = allproc.lh_first; p != 0; p = p->p_list.le_next) {
1111			/*
1112			 * if this is a system process, skip it
1113			 */
1114			if ((p->p_flag & P_SYSTEM) || (p->p_lock > 0) ||
1115			    (p->p_pid == 1) ||
1116			    ((p->p_pid < 48) && (vm_swap_size != 0))) {
1117				continue;
1118			}
1119			/*
1120			 * if the process is in a non-running type state,
1121			 * don't touch it.
1122			 */
1123			if (p->p_stat != SRUN && p->p_stat != SSLEEP) {
1124				continue;
1125			}
1126			/*
1127			 * get the process size
1128			 */
1129			size = vmspace_resident_count(p->p_vmspace);
1130			/*
1131			 * if the this process is bigger than the biggest one
1132			 * remember it.
1133			 */
1134			if (size > bigsize) {
1135				bigproc = p;
1136				bigsize = size;
1137			}
1138		}
1139		if (bigproc != NULL) {
1140			killproc(bigproc, "out of swap space");
1141			bigproc->p_estcpu = 0;
1142			bigproc->p_nice = PRIO_MIN;
1143			resetpriority(bigproc);
1144			wakeup(&cnt.v_free_count);
1145		}
1146	}
1147	return force_wakeup;
1148}
1149
1150/*
1151 * This routine tries to maintain the pseudo LRU active queue,
1152 * so that during long periods of time where there is no paging,
1153 * that some statistic accumlation still occurs.  This code
1154 * helps the situation where paging just starts to occur.
1155 */
1156static void
1157vm_pageout_page_stats()
1158{
1159	int s;
1160	vm_page_t m,next;
1161	int pcount,tpcount;		/* Number of pages to check */
1162	static int fullintervalcount = 0;
1163	int page_shortage;
1164
1165	page_shortage = (cnt.v_inactive_target + cnt.v_cache_max + cnt.v_free_min) -
1166	    (cnt.v_free_count + cnt.v_inactive_count + cnt.v_cache_count);
1167	if (page_shortage <= 0)
1168		return;
1169
1170	pcount = cnt.v_active_count;
1171	fullintervalcount += vm_pageout_stats_interval;
1172	if (fullintervalcount < vm_pageout_full_stats_interval) {
1173		tpcount = (vm_pageout_stats_max * cnt.v_active_count) / cnt.v_page_count;
1174		if (pcount > tpcount)
1175			pcount = tpcount;
1176	}
1177
1178	m = TAILQ_FIRST(&vm_page_queue_active);
1179	while ((m != NULL) && (pcount-- > 0)) {
1180		int actcount;
1181
1182		if (m->queue != PQ_ACTIVE) {
1183			break;
1184		}
1185
1186		next = TAILQ_NEXT(m, pageq);
1187		/*
1188		 * Don't deactivate pages that are busy.
1189		 */
1190		if ((m->busy != 0) ||
1191		    (m->flags & PG_BUSY) ||
1192		    (m->hold_count != 0)) {
1193			s = splvm();
1194			TAILQ_REMOVE(&vm_page_queue_active, m, pageq);
1195			TAILQ_INSERT_TAIL(&vm_page_queue_active, m, pageq);
1196			splx(s);
1197			m = next;
1198			continue;
1199		}
1200
1201		actcount = 0;
1202		if (m->flags & PG_REFERENCED) {
1203			vm_page_flag_clear(m, PG_REFERENCED);
1204			actcount += 1;
1205		}
1206
1207		actcount += pmap_ts_referenced(VM_PAGE_TO_PHYS(m));
1208		if (actcount) {
1209			m->act_count += ACT_ADVANCE + actcount;
1210			if (m->act_count > ACT_MAX)
1211				m->act_count = ACT_MAX;
1212			s = splvm();
1213			TAILQ_REMOVE(&vm_page_queue_active, m, pageq);
1214			TAILQ_INSERT_TAIL(&vm_page_queue_active, m, pageq);
1215			splx(s);
1216		} else {
1217			if (m->act_count == 0) {
1218				/*
1219				 * We turn off page access, so that we have more accurate
1220				 * RSS stats.  We don't do this in the normal page deactivation
1221				 * when the system is loaded VM wise, because the cost of
1222				 * the large number of page protect operations would be higher
1223				 * than the value of doing the operation.
1224				 */
1225				vm_page_protect(m, VM_PROT_NONE);
1226				vm_page_deactivate(m);
1227			} else {
1228				m->act_count -= min(m->act_count, ACT_DECLINE);
1229				s = splvm();
1230				TAILQ_REMOVE(&vm_page_queue_active, m, pageq);
1231				TAILQ_INSERT_TAIL(&vm_page_queue_active, m, pageq);
1232				splx(s);
1233			}
1234		}
1235
1236		m = next;
1237	}
1238}
1239
1240static int
1241vm_pageout_free_page_calc(count)
1242vm_size_t count;
1243{
1244	if (count < cnt.v_page_count)
1245		 return 0;
1246	/*
1247	 * free_reserved needs to include enough for the largest swap pager
1248	 * structures plus enough for any pv_entry structs when paging.
1249	 */
1250	if (cnt.v_page_count > 1024)
1251		cnt.v_free_min = 4 + (cnt.v_page_count - 1024) / 200;
1252	else
1253		cnt.v_free_min = 4;
1254	cnt.v_pageout_free_min = (2*MAXBSIZE)/PAGE_SIZE +
1255		cnt.v_interrupt_free_min;
1256	cnt.v_free_reserved = vm_pageout_page_count +
1257		cnt.v_pageout_free_min + (count / 768) + PQ_L2_SIZE;
1258	cnt.v_free_min += cnt.v_free_reserved;
1259	return 1;
1260}
1261
1262
1263/*
1264 *	vm_pageout is the high level pageout daemon.
1265 */
1266static void
1267vm_pageout()
1268{
1269	/*
1270	 * Initialize some paging parameters.
1271	 */
1272
1273	cnt.v_interrupt_free_min = 2;
1274	if (cnt.v_page_count < 2000)
1275		vm_pageout_page_count = 8;
1276
1277	vm_pageout_free_page_calc(cnt.v_page_count);
1278	/*
1279	 * free_reserved needs to include enough for the largest swap pager
1280	 * structures plus enough for any pv_entry structs when paging.
1281	 */
1282	if (cnt.v_free_count > 6144)
1283		cnt.v_free_target = 3 * cnt.v_free_min + cnt.v_free_reserved;
1284	else
1285		cnt.v_free_target = 2 * cnt.v_free_min + cnt.v_free_reserved;
1286
1287	if (cnt.v_free_count > 2048) {
1288		cnt.v_cache_min = cnt.v_free_target;
1289		cnt.v_cache_max = 2 * cnt.v_cache_min;
1290		cnt.v_inactive_target = (3 * cnt.v_free_target) / 2;
1291	} else {
1292		cnt.v_cache_min = 0;
1293		cnt.v_cache_max = 0;
1294		cnt.v_inactive_target = cnt.v_free_count / 4;
1295	}
1296	if (cnt.v_inactive_target > cnt.v_free_count / 3)
1297		cnt.v_inactive_target = cnt.v_free_count / 3;
1298
1299	/* XXX does not really belong here */
1300	if (vm_page_max_wired == 0)
1301		vm_page_max_wired = cnt.v_free_count / 3;
1302
1303	if (vm_pageout_stats_max == 0)
1304		vm_pageout_stats_max = cnt.v_free_target;
1305
1306	/*
1307	 * Set interval in seconds for stats scan.
1308	 */
1309	if (vm_pageout_stats_interval == 0)
1310		vm_pageout_stats_interval = 5;
1311	if (vm_pageout_full_stats_interval == 0)
1312		vm_pageout_full_stats_interval = vm_pageout_stats_interval * 4;
1313
1314
1315	/*
1316	 * Set maximum free per pass
1317	 */
1318	if (vm_pageout_stats_free_max == 0)
1319		vm_pageout_stats_free_max = 5;
1320
1321	max_page_launder = (cnt.v_page_count > 1800 ? 32 : 16);
1322
1323	curproc->p_flag |= P_BUFEXHAUST;
1324	swap_pager_swap_init();
1325	/*
1326	 * The pageout daemon is never done, so loop forever.
1327	 */
1328	while (TRUE) {
1329		int error;
1330		int s = splvm();
1331		if (!vm_pages_needed ||
1332			((cnt.v_free_count + cnt.v_cache_count) > cnt.v_free_min)) {
1333			vm_pages_needed = 0;
1334			error = tsleep(&vm_pages_needed,
1335				PVM, "psleep", vm_pageout_stats_interval * hz);
1336			if (error && !vm_pages_needed) {
1337				splx(s);
1338				vm_pageout_page_stats();
1339				continue;
1340			}
1341		} else if (vm_pages_needed) {
1342			vm_pages_needed = 0;
1343			tsleep(&vm_pages_needed, PVM, "psleep", hz/2);
1344		}
1345
1346		if (vm_pages_needed)
1347			cnt.v_pdwakeups++;
1348		vm_pages_needed = 0;
1349		splx(s);
1350		vm_pageout_scan();
1351		vm_pageout_deficit = 0;
1352		wakeup(&cnt.v_free_count);
1353	}
1354}
1355
1356void
1357pagedaemon_wakeup()
1358{
1359	if (!vm_pages_needed && curproc != pageproc) {
1360		vm_pages_needed++;
1361		wakeup(&vm_pages_needed);
1362	}
1363}
1364
1365#if !defined(NO_SWAPPING)
1366static void
1367vm_req_vmdaemon()
1368{
1369	static int lastrun = 0;
1370
1371	if ((ticks > (lastrun + hz)) || (ticks < lastrun)) {
1372		wakeup(&vm_daemon_needed);
1373		lastrun = ticks;
1374	}
1375}
1376
1377static void
1378vm_daemon()
1379{
1380	struct proc *p;
1381
1382	while (TRUE) {
1383		tsleep(&vm_daemon_needed, PPAUSE, "psleep", 0);
1384		if (vm_pageout_req_swapout) {
1385			swapout_procs(vm_pageout_req_swapout);
1386			vm_pageout_req_swapout = 0;
1387		}
1388		/*
1389		 * scan the processes for exceeding their rlimits or if
1390		 * process is swapped out -- deactivate pages
1391		 */
1392
1393		for (p = allproc.lh_first; p != 0; p = p->p_list.le_next) {
1394			vm_pindex_t limit, size;
1395
1396			/*
1397			 * if this is a system process or if we have already
1398			 * looked at this process, skip it.
1399			 */
1400			if (p->p_flag & (P_SYSTEM | P_WEXIT)) {
1401				continue;
1402			}
1403			/*
1404			 * if the process is in a non-running type state,
1405			 * don't touch it.
1406			 */
1407			if (p->p_stat != SRUN && p->p_stat != SSLEEP) {
1408				continue;
1409			}
1410			/*
1411			 * get a limit
1412			 */
1413			limit = OFF_TO_IDX(
1414			    qmin(p->p_rlimit[RLIMIT_RSS].rlim_cur,
1415				p->p_rlimit[RLIMIT_RSS].rlim_max));
1416
1417			/*
1418			 * let processes that are swapped out really be
1419			 * swapped out set the limit to nothing (will force a
1420			 * swap-out.)
1421			 */
1422			if ((p->p_flag & P_INMEM) == 0)
1423				limit = 0;	/* XXX */
1424
1425			size = vmspace_resident_count(p->p_vmspace);
1426			if (limit >= 0 && size >= limit) {
1427				vm_pageout_map_deactivate_pages(
1428				    &p->p_vmspace->vm_map, limit);
1429			}
1430		}
1431	}
1432}
1433#endif
1434