vm_pageout.c revision 7904
172847Sgshapiro/*
250472Speter * Copyright (c) 1991 Regents of the University of California.
372847Sgshapiro * All rights reserved.
472847Sgshapiro * Copyright (c) 1994 John S. Dyson
572847Sgshapiro * All rights reserved.
672847Sgshapiro * Copyright (c) 1994 David Greenman
772847Sgshapiro * All rights reserved.
872847Sgshapiro *
972847Sgshapiro * This code is derived from software contributed to Berkeley by
1072847Sgshapiro * The Mach Operating System project at Carnegie-Mellon University.
1172847Sgshapiro *
1272847Sgshapiro * Redistribution and use in source and binary forms, with or without
1372847Sgshapiro * modification, are permitted provided that the following conditions
1493315Sgshapiro * are met:
1593853Sgshapiro * 1. Redistributions of source code must retain the above copyright
1693853Sgshapiro *    notice, this list of conditions and the following disclaimer.
1793853Sgshapiro * 2. Redistributions in binary form must reproduce the above copyright
1893853Sgshapiro *    notice, this list of conditions and the following disclaimer in the
1993853Sgshapiro *    documentation and/or other materials provided with the distribution.
2093853Sgshapiro * 3. All advertising materials mentioning features or use of this software
2193853Sgshapiro *    must display the following acknowledgement:
2293853Sgshapiro *	This product includes software developed by the University of
2372847Sgshapiro *	California, Berkeley and its contributors.
2493853Sgshapiro * 4. Neither the name of the University nor the names of its contributors
2593853Sgshapiro *    may be used to endorse or promote products derived from this software
2693853Sgshapiro *    without specific prior written permission.
2793853Sgshapiro *
2893853Sgshapiro * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
2993853Sgshapiro * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
3093853Sgshapiro * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
3193315Sgshapiro * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
3293315Sgshapiro * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
3393315Sgshapiro * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
3493315Sgshapiro * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
3593315Sgshapiro * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
3672847Sgshapiro * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
3772847Sgshapiro * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
3872847Sgshapiro * SUCH DAMAGE.
3972847Sgshapiro *
4072847Sgshapiro *	from: @(#)vm_pageout.c	7.4 (Berkeley) 5/7/91
4172847Sgshapiro *
4272847Sgshapiro *
4372847Sgshapiro * Copyright (c) 1987, 1990 Carnegie-Mellon University.
4494676Sgshapiro * All rights reserved.
4594676Sgshapiro *
4672847Sgshapiro * Authors: Avadis Tevanian, Jr., Michael Wayne Young
47117286Sgshapiro *
4872847Sgshapiro * Permission to use, copy, modify and distribute this software and
4976622Sgshapiro * its documentation is hereby granted, provided that both the copyright
5076622Sgshapiro * notice and this permission notice appear in all copies of the
5194676Sgshapiro * software, derivative works or modified versions, and any portions
52117291Sgshapiro * thereof, and that both notices appear in supporting documentation.
53117291Sgshapiro *
54117291Sgshapiro * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
5594676Sgshapiro * CONDITION.  CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND
56117286Sgshapiro * FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
57117291Sgshapiro *
58117291Sgshapiro * Carnegie Mellon requests users of this software to return to
59117291Sgshapiro *
6072847Sgshapiro *  Software Distribution Coordinator  or  Software.Distribution@CS.CMU.EDU
6172847Sgshapiro *  School of Computer Science
6272847Sgshapiro *  Carnegie Mellon University
6372847Sgshapiro *  Pittsburgh PA 15213-3890
6472847Sgshapiro *
6572847Sgshapiro * any improvements or extensions that they make and grant Carnegie the
6639146Sbrian * rights to redistribute these changes.
6776622Sgshapiro *
6876622Sgshapiro * $Id: vm_pageout.c,v 1.47 1995/04/16 14:12:15 davidg Exp $
6976622Sgshapiro */
7076622Sgshapiro
7184684Sgshapiro/*
7276622Sgshapiro *	The proverbial page-out daemon.
7376622Sgshapiro */
7476622Sgshapiro
75117291Sgshapiro#include <sys/param.h>
76117291Sgshapiro#include <sys/systm.h>
77130157Sgshapiro#include <sys/kernel.h>
7894676Sgshapiro#include <sys/proc.h>
79117291Sgshapiro#include <sys/resourcevar.h>
80117291Sgshapiro#include <sys/malloc.h>
81117291Sgshapiro#include <sys/kernel.h>
82117291Sgshapiro#include <sys/signalvar.h>
8373209Sgshapiro#include <sys/vnode.h>
8430581Sjmb
8594676Sgshapiro#include <vm/vm.h>
8694676Sgshapiro#include <vm/vm_page.h>
8794676Sgshapiro#include <vm/vm_pageout.h>
8894676Sgshapiro#include <vm/vm_kern.h>
8972847Sgshapiro#include <vm/swap_pager.h>
9057947Srwatson#include <vm/vnode_pager.h>
9172847Sgshapiro
9272847Sgshapiroint vm_pages_needed;		/* Event on which pageout daemon sleeps */
9372847Sgshapiro
9472847Sgshapiroint vm_pageout_pages_needed;	/* flag saying that the pageout daemon needs pages */
9572847Sgshapiro
9672847Sgshapiroextern int npendingio;
9793765Sruint vm_pageout_req_swapout;	/* XXX */
9893765Sruint vm_daemon_needed;
9972847Sgshapiroextern int nswiodone;
10057947Srwatsonextern int swap_pager_full;
10172847Sgshapiroextern int vm_swap_size;
10293853Sgshapiroextern int vfs_update_wakeup;
10372847Sgshapiro
10493853Sgshapiro#define MAXSCAN 1024		/* maximum number of pages to scan in queues */
10557947Srwatson
10672847Sgshapiro#define MAXLAUNDER (cnt.v_page_count > 1800 ? 32 : 16)
10772847Sgshapiro
10872847Sgshapiro#define VM_PAGEOUT_PAGE_COUNT 8
10972847Sgshapiroint vm_pageout_page_count = VM_PAGEOUT_PAGE_COUNT;
11072847Sgshapiro
11172847Sgshapiroint vm_page_max_wired;		/* XXX max # of wired pages system-wide */
11264567Sgshapiro
11397200Sgshapiro/*
11497200Sgshapiro * vm_pageout_clean:
11597200Sgshapiro * 	cleans a vm_page
11672847Sgshapiro */
117117286Sgshapiroint
11857947Srwatsonvm_pageout_clean(m, sync)
11972847Sgshapiro	register vm_page_t m;
12072847Sgshapiro	int sync;
12172847Sgshapiro{
12272847Sgshapiro	/*
12372847Sgshapiro	 * Clean the page and remove it from the laundry.
12475074Sgshapiro	 *
12575074Sgshapiro	 * We set the busy bit to cause potential page faults on this page to
12672847Sgshapiro	 * block.
12772847Sgshapiro	 *
12875073Sgshapiro	 * And we set pageout-in-progress to keep the object from disappearing
12972847Sgshapiro	 * during pageout.  This guarantees that the page won't move from the
13075074Sgshapiro	 * inactive queue.  (However, any other page on the inactive queue may
13157947Srwatson	 * move!)
13275073Sgshapiro	 */
13372847Sgshapiro
13472847Sgshapiro	register vm_object_t object;
13572847Sgshapiro	register vm_pager_t pager;
13672847Sgshapiro	int pageout_status[VM_PAGEOUT_PAGE_COUNT];
13757947Srwatson	vm_page_t ms[VM_PAGEOUT_PAGE_COUNT], mb[VM_PAGEOUT_PAGE_COUNT];
13872847Sgshapiro	int pageout_count, b_pageout_count;
13972847Sgshapiro	int anyok = 0;
14072847Sgshapiro	int i;
14172847Sgshapiro	vm_offset_t offset = m->offset;
14272847Sgshapiro
143117286Sgshapiro	object = m->object;
14472847Sgshapiro	if (!object) {
14572847Sgshapiro		printf("pager: object missing\n");
14672847Sgshapiro		return 0;
147117286Sgshapiro	}
14875074Sgshapiro	if (!object->pager && (object->flags & OBJ_INTERNAL) == 0) {
14997200Sgshapiro		printf("pager: non internal obj without pager\n");
15072847Sgshapiro	}
15172847Sgshapiro	/*
152117286Sgshapiro	 * Try to collapse the object before making a pager for it.  We must
15375073Sgshapiro	 * unlock the page queues first. We try to defer the creation of a
15497200Sgshapiro	 * pager until all shadows are not paging.  This allows
15575073Sgshapiro	 * vm_object_collapse to work better and helps control swap space
15675073Sgshapiro	 * size. (J. Dyson 11 Nov 93)
15772847Sgshapiro	 */
15872847Sgshapiro
15972847Sgshapiro	if (!object->pager &&
16072847Sgshapiro	    (cnt.v_free_count + cnt.v_cache_count) < cnt.v_pageout_free_min)
16172847Sgshapiro		return 0;
16272847Sgshapiro
16372847Sgshapiro	if ((!sync && m->hold_count != 0) ||
16472847Sgshapiro	    ((m->busy != 0) || (m->flags & PG_BUSY)))
165117286Sgshapiro		return 0;
16672847Sgshapiro
167117286Sgshapiro	if (!sync && object->shadow) {
16880175Sgshapiro		vm_object_collapse(object);
169117286Sgshapiro	}
17072847Sgshapiro	pageout_count = 1;
17172847Sgshapiro	ms[0] = m;
17272847Sgshapiro
17372847Sgshapiro	pager = object->pager;
17472847Sgshapiro	if (pager) {
175110576Sgshapiro		for (i = 1; i < vm_pageout_page_count; i++) {
176117286Sgshapiro			vm_page_t mt;
177110576Sgshapiro
17897200Sgshapiro			ms[i] = mt = vm_page_lookup(object, offset + i * NBPG);
179110576Sgshapiro			if (mt) {
18072847Sgshapiro				if (mt->flags & (PG_BUSY|PG_CACHE) || mt->busy)
18172847Sgshapiro					break;
18272847Sgshapiro				/*
18372847Sgshapiro				 * we can cluster ONLY if: ->> the page is NOT
18472847Sgshapiro				 * busy, and is NOT clean the page is not
185117286Sgshapiro				 * wired, busy, held, or mapped into a buffer.
18672847Sgshapiro				 * and one of the following: 1) The page is
18757947Srwatson				 * inactive, or a seldom used active page. 2)
18872847Sgshapiro				 * or we force the issue.
18972847Sgshapiro				 */
19072847Sgshapiro				vm_page_test_dirty(mt);
191117286Sgshapiro				if ((mt->dirty & mt->valid) != 0
19272847Sgshapiro				    && ((mt->flags & PG_INACTIVE) ||
19390806Sgshapiro						(sync == VM_PAGEOUT_FORCE))
19494676Sgshapiro				    && (mt->wire_count == 0)
19594676Sgshapiro				    && (mt->hold_count == 0))
19694676Sgshapiro					pageout_count++;
19794676Sgshapiro				else
19894676Sgshapiro					break;
199117286Sgshapiro			} else
20094676Sgshapiro				break;
201100872Sru		}
20294676Sgshapiro
20394676Sgshapiro		if ((pageout_count < vm_pageout_page_count) && (offset != 0)) {
20494676Sgshapiro			b_pageout_count = 0;
205117286Sgshapiro			for (i = 0; i < vm_pageout_page_count-pageout_count; i++) {
20694676Sgshapiro				vm_page_t mt;
20790806Sgshapiro
20890806Sgshapiro				mt = vm_page_lookup(object, offset - (i + 1) * NBPG);
20990806Sgshapiro				if (mt) {
21090806Sgshapiro					if (mt->flags & (PG_BUSY|PG_CACHE) || mt->busy)
21194676Sgshapiro						break;
21294676Sgshapiro					vm_page_test_dirty(mt);
213100872Sru					if ((mt->dirty & mt->valid) != 0
21490806Sgshapiro					    && ((mt->flags & PG_INACTIVE) ||
21594676Sgshapiro							(sync == VM_PAGEOUT_FORCE))
21690806Sgshapiro					    && (mt->wire_count == 0)
217117286Sgshapiro					    && (mt->hold_count == 0)) {
21872847Sgshapiro						mb[b_pageout_count] = mt;
219117286Sgshapiro						b_pageout_count++;
22072847Sgshapiro						if ((offset - (i + 1) * NBPG) == 0)
22193853Sgshapiro							break;
22293853Sgshapiro					} else
22393853Sgshapiro						break;
22493853Sgshapiro				} else
22593853Sgshapiro					break;
22693853Sgshapiro			}
22772847Sgshapiro			if (b_pageout_count > 0) {
22893853Sgshapiro				for(i=pageout_count - 1;i>=0;--i) {
22993853Sgshapiro					ms[i+b_pageout_count] = ms[i];
23093853Sgshapiro				}
23193853Sgshapiro				for(i=0;i<b_pageout_count;i++) {
23293853Sgshapiro					ms[i] = mb[b_pageout_count - (i + 1)];
23393853Sgshapiro				}
23472847Sgshapiro				pageout_count += b_pageout_count;
23593853Sgshapiro			}
23693853Sgshapiro		}
23793853Sgshapiro
23893853Sgshapiro		/*
23993853Sgshapiro		 * we allow reads during pageouts...
24093853Sgshapiro		 */
24176623Sgshapiro		for (i = 0; i < pageout_count; i++) {
24276623Sgshapiro			ms[i]->flags |= PG_BUSY;
24376623Sgshapiro			vm_page_protect(ms[i], VM_PROT_READ);
24476623Sgshapiro		}
24576623Sgshapiro		object->paging_in_progress += pageout_count;
24695317Sgshapiro	} else {
24795317Sgshapiro
24895317Sgshapiro		m->flags |= PG_BUSY;
249
250		vm_page_protect(m, VM_PROT_READ);
251
252		object->paging_in_progress++;
253
254		pager = vm_pager_allocate(PG_DFLT, (caddr_t) 0,
255		    object->size, VM_PROT_ALL, 0);
256		if (pager != NULL) {
257			object->pager = pager;
258		}
259	}
260
261	/*
262	 * If there is no pager for the page, use the default pager.  If
263	 * there's no place to put the page at the moment, leave it in the
264	 * laundry and hope that there will be paging space later.
265	 */
266
267	if ((pager && pager->pg_type == PG_SWAP) ||
268	    (cnt.v_free_count + cnt.v_cache_count) >= cnt.v_pageout_free_min) {
269		if (pageout_count == 1) {
270			pageout_status[0] = pager ?
271			    vm_pager_put(pager, m,
272			    ((sync || (object == kernel_object)) ? TRUE : FALSE)) :
273			    VM_PAGER_FAIL;
274		} else {
275			if (!pager) {
276				for (i = 0; i < pageout_count; i++)
277					pageout_status[i] = VM_PAGER_FAIL;
278			} else {
279				vm_pager_put_pages(pager, ms, pageout_count,
280				    ((sync || (object == kernel_object)) ? TRUE : FALSE),
281				    pageout_status);
282			}
283		}
284	} else {
285		for (i = 0; i < pageout_count; i++)
286			pageout_status[i] = VM_PAGER_FAIL;
287	}
288
289	for (i = 0; i < pageout_count; i++) {
290		switch (pageout_status[i]) {
291		case VM_PAGER_OK:
292			++anyok;
293			break;
294		case VM_PAGER_PEND:
295			++anyok;
296			break;
297		case VM_PAGER_BAD:
298			/*
299			 * Page outside of range of object. Right now we
300			 * essentially lose the changes by pretending it
301			 * worked.
302			 */
303			pmap_clear_modify(VM_PAGE_TO_PHYS(ms[i]));
304			ms[i]->dirty = 0;
305			break;
306		case VM_PAGER_ERROR:
307		case VM_PAGER_FAIL:
308			/*
309			 * If page couldn't be paged out, then reactivate the
310			 * page so it doesn't clog the inactive list.  (We
311			 * will try paging out it again later).
312			 */
313			if (ms[i]->flags & PG_INACTIVE)
314				vm_page_activate(ms[i]);
315			break;
316		case VM_PAGER_AGAIN:
317			break;
318		}
319
320
321		/*
322		 * If the operation is still going, leave the page busy to
323		 * block all other accesses. Also, leave the paging in
324		 * progress indicator set so that we don't attempt an object
325		 * collapse.
326		 */
327		if (pageout_status[i] != VM_PAGER_PEND) {
328			vm_object_pip_wakeup(object);
329			if ((ms[i]->flags & (PG_REFERENCED|PG_WANTED)) ||
330			    pmap_is_referenced(VM_PAGE_TO_PHYS(ms[i]))) {
331				pmap_clear_reference(VM_PAGE_TO_PHYS(ms[i]));
332				ms[i]->flags &= ~PG_REFERENCED;
333				if (ms[i]->flags & PG_INACTIVE)
334					vm_page_activate(ms[i]);
335			}
336			PAGE_WAKEUP(ms[i]);
337		}
338	}
339	return anyok;
340}
341
342/*
343 *	vm_pageout_object_deactivate_pages
344 *
345 *	deactivate enough pages to satisfy the inactive target
346 *	requirements or if vm_page_proc_limit is set, then
347 *	deactivate all of the pages in the object and its
348 *	shadows.
349 *
350 *	The object and map must be locked.
351 */
352int
353vm_pageout_object_deactivate_pages(map, object, count, map_remove_only)
354	vm_map_t map;
355	vm_object_t object;
356	int count;
357	int map_remove_only;
358{
359	register vm_page_t p, next;
360	int rcount;
361	int dcount;
362
363	dcount = 0;
364	if (count == 0)
365		count = 1;
366
367	if (object->pager && (object->pager->pg_type == PG_DEVICE))
368		return 0;
369
370	if (object->shadow) {
371		if (object->shadow->ref_count == 1)
372			dcount += vm_pageout_object_deactivate_pages(map, object->shadow, count / 2 + 1, map_remove_only);
373		else
374			vm_pageout_object_deactivate_pages(map, object->shadow, count, 1);
375	}
376	if (object->paging_in_progress || !vm_object_lock_try(object))
377		return dcount;
378
379	/*
380	 * scan the objects entire memory queue
381	 */
382	rcount = object->resident_page_count;
383	p = object->memq.tqh_first;
384	while (p && (rcount-- > 0)) {
385		next = p->listq.tqe_next;
386		cnt.v_pdpages++;
387		vm_page_lock_queues();
388		if (p->wire_count != 0 ||
389		    p->hold_count != 0 ||
390		    p->busy != 0 ||
391		    !pmap_page_exists(vm_map_pmap(map), VM_PAGE_TO_PHYS(p))) {
392			p = next;
393			continue;
394		}
395		/*
396		 * if a page is active, not wired and is in the processes
397		 * pmap, then deactivate the page.
398		 */
399		if ((p->flags & (PG_ACTIVE | PG_BUSY)) == PG_ACTIVE) {
400			if (!pmap_is_referenced(VM_PAGE_TO_PHYS(p)) &&
401			    (p->flags & (PG_REFERENCED|PG_WANTED)) == 0) {
402				p->act_count -= min(p->act_count, ACT_DECLINE);
403				/*
404				 * if the page act_count is zero -- then we
405				 * deactivate
406				 */
407				if (!p->act_count) {
408					if (!map_remove_only)
409						vm_page_deactivate(p);
410					vm_page_protect(p, VM_PROT_NONE);
411					/*
412					 * else if on the next go-around we
413					 * will deactivate the page we need to
414					 * place the page on the end of the
415					 * queue to age the other pages in
416					 * memory.
417					 */
418				} else {
419					TAILQ_REMOVE(&vm_page_queue_active, p, pageq);
420					TAILQ_INSERT_TAIL(&vm_page_queue_active, p, pageq);
421				}
422				/*
423				 * see if we are done yet
424				 */
425				if (p->flags & PG_INACTIVE) {
426					--count;
427					++dcount;
428					if (count <= 0 &&
429					    cnt.v_inactive_count > cnt.v_inactive_target) {
430						vm_page_unlock_queues();
431						vm_object_unlock(object);
432						return dcount;
433					}
434				}
435			} else {
436				/*
437				 * Move the page to the bottom of the queue.
438				 */
439				pmap_clear_reference(VM_PAGE_TO_PHYS(p));
440				p->flags &= ~PG_REFERENCED;
441				if (p->act_count < ACT_MAX)
442					p->act_count += ACT_ADVANCE;
443
444				TAILQ_REMOVE(&vm_page_queue_active, p, pageq);
445				TAILQ_INSERT_TAIL(&vm_page_queue_active, p, pageq);
446			}
447		} else if ((p->flags & (PG_INACTIVE | PG_BUSY)) == PG_INACTIVE) {
448			vm_page_protect(p, VM_PROT_NONE);
449		}
450		vm_page_unlock_queues();
451		p = next;
452	}
453	vm_object_unlock(object);
454	return dcount;
455}
456
457
458/*
459 * deactivate some number of pages in a map, try to do it fairly, but
460 * that is really hard to do.
461 */
462
463void
464vm_pageout_map_deactivate_pages(map, entry, count, freeer)
465	vm_map_t map;
466	vm_map_entry_t entry;
467	int *count;
468	int (*freeer) (vm_map_t, vm_object_t, int);
469{
470	vm_map_t tmpm;
471	vm_map_entry_t tmpe;
472	vm_object_t obj;
473
474	if (*count <= 0)
475		return;
476	vm_map_reference(map);
477	if (!lock_try_read(&map->lock)) {
478		vm_map_deallocate(map);
479		return;
480	}
481	if (entry == 0) {
482		tmpe = map->header.next;
483		while (tmpe != &map->header && *count > 0) {
484			vm_pageout_map_deactivate_pages(map, tmpe, count, freeer, 0);
485			tmpe = tmpe->next;
486		};
487	} else if (entry->is_sub_map || entry->is_a_map) {
488		tmpm = entry->object.share_map;
489		tmpe = tmpm->header.next;
490		while (tmpe != &tmpm->header && *count > 0) {
491			vm_pageout_map_deactivate_pages(tmpm, tmpe, count, freeer, 0);
492			tmpe = tmpe->next;
493		};
494	} else if ((obj = entry->object.vm_object) != 0) {
495		*count -= (*freeer) (map, obj, *count);
496	}
497	lock_read_done(&map->lock);
498	vm_map_deallocate(map);
499	return;
500}
501
502void
503vm_req_vmdaemon()
504{
505	static int lastrun = 0;
506
507	if ((ticks > (lastrun + hz / 10)) || (ticks < lastrun)) {
508		wakeup((caddr_t) &vm_daemon_needed);
509		lastrun = ticks;
510	}
511}
512
513/*
514 *	vm_pageout_scan does the dirty work for the pageout daemon.
515 */
516int
517vm_pageout_scan()
518{
519	vm_page_t m;
520	int page_shortage, maxscan, maxlaunder, pcount;
521	int pages_freed;
522	vm_page_t next;
523	struct proc *p, *bigproc;
524	vm_offset_t size, bigsize;
525	vm_object_t object;
526	int force_wakeup = 0;
527	int vnodes_skipped = 0;
528
529	pages_freed = 0;
530
531	/*
532	 * Start scanning the inactive queue for pages we can free. We keep
533	 * scanning until we have enough free pages or we have scanned through
534	 * the entire queue.  If we encounter dirty pages, we start cleaning
535	 * them.
536	 */
537
538	maxlaunder = (cnt.v_inactive_target > MAXLAUNDER) ?
539	    MAXLAUNDER : cnt.v_inactive_target;
540
541rescan1:
542	maxscan = cnt.v_inactive_count;
543	m = vm_page_queue_inactive.tqh_first;
544	while ((m != NULL) && (maxscan-- > 0) &&
545	    ((cnt.v_cache_count + cnt.v_free_count) < (cnt.v_cache_min + cnt.v_free_target))) {
546		vm_page_t next;
547
548		cnt.v_pdpages++;
549		next = m->pageq.tqe_next;
550
551#if defined(VM_DIAGNOSE)
552		if ((m->flags & PG_INACTIVE) == 0) {
553			printf("vm_pageout_scan: page not inactive?\n");
554			break;
555		}
556#endif
557
558		/*
559		 * dont mess with busy pages
560		 */
561		if (m->hold_count || m->busy || (m->flags & PG_BUSY)) {
562			TAILQ_REMOVE(&vm_page_queue_inactive, m, pageq);
563			TAILQ_INSERT_TAIL(&vm_page_queue_inactive, m, pageq);
564			m = next;
565			continue;
566		}
567		if (((m->flags & PG_REFERENCED) == 0) &&
568		    pmap_is_referenced(VM_PAGE_TO_PHYS(m))) {
569			m->flags |= PG_REFERENCED;
570		}
571		if (m->object->ref_count == 0) {
572			m->flags &= ~PG_REFERENCED;
573			pmap_clear_reference(VM_PAGE_TO_PHYS(m));
574		}
575		if ((m->flags & (PG_REFERENCED|PG_WANTED)) != 0) {
576			m->flags &= ~PG_REFERENCED;
577			pmap_clear_reference(VM_PAGE_TO_PHYS(m));
578			vm_page_activate(m);
579			if (m->act_count < ACT_MAX)
580				m->act_count += ACT_ADVANCE;
581			m = next;
582			continue;
583		}
584
585		vm_page_test_dirty(m);
586		if (m->dirty == 0) {
587			if (m->bmapped == 0) {
588				if (m->valid == 0) {
589					pmap_page_protect(VM_PAGE_TO_PHYS(m), VM_PROT_NONE);
590					vm_page_free(m);
591					cnt.v_dfree++;
592				} else {
593					vm_page_cache(m);
594				}
595				++pages_freed;
596			} else {
597				m = next;
598				continue;
599			}
600		} else if (maxlaunder > 0) {
601			int written;
602			struct vnode *vp = NULL;
603
604			object = m->object;
605			if ((object->flags & OBJ_DEAD) || !vm_object_lock_try(object)) {
606				m = next;
607				continue;
608			}
609
610			if (object->pager && object->pager->pg_type == PG_VNODE) {
611				vp = ((vn_pager_t) object->pager->pg_data)->vnp_vp;
612				if (VOP_ISLOCKED(vp) || vget(vp, 1)) {
613					vm_object_unlock(object);
614					if (object->flags & OBJ_WRITEABLE)
615						++vnodes_skipped;
616					m = next;
617					continue;
618				}
619			}
620
621			/*
622			 * If a page is dirty, then it is either being washed
623			 * (but not yet cleaned) or it is still in the
624			 * laundry.  If it is still in the laundry, then we
625			 * start the cleaning operation.
626			 */
627			written = vm_pageout_clean(m, 0);
628
629			if (vp)
630				vput(vp);
631
632			vm_object_unlock(object);
633
634			if (!next) {
635				break;
636			}
637			maxlaunder -= written;
638			/*
639			 * if the next page has been re-activated, start
640			 * scanning again
641			 */
642			if ((next->flags & PG_INACTIVE) == 0) {
643				goto rescan1;
644			}
645		}
646		m = next;
647	}
648
649	/*
650	 * Compute the page shortage.  If we are still very low on memory be
651	 * sure that we will move a minimal amount of pages from active to
652	 * inactive.
653	 */
654
655	page_shortage = cnt.v_inactive_target -
656	    (cnt.v_free_count + cnt.v_inactive_count + cnt.v_cache_count);
657	if (page_shortage <= 0) {
658		if (pages_freed == 0) {
659			page_shortage = cnt.v_free_min - cnt.v_free_count;
660		} else {
661			page_shortage = 1;
662		}
663	}
664	maxscan = MAXSCAN;
665	pcount = cnt.v_active_count;
666	m = vm_page_queue_active.tqh_first;
667	while ((m != NULL) && (maxscan > 0) && (pcount-- > 0) && (page_shortage > 0)) {
668
669		cnt.v_pdpages++;
670		next = m->pageq.tqe_next;
671
672		/*
673		 * Don't deactivate pages that are busy.
674		 */
675		if ((m->busy != 0) ||
676		    (m->flags & PG_BUSY) ||
677		    (m->hold_count != 0)) {
678			TAILQ_REMOVE(&vm_page_queue_active, m, pageq);
679			TAILQ_INSERT_TAIL(&vm_page_queue_active, m, pageq);
680			m = next;
681			continue;
682		}
683		if (m->object->ref_count && ((m->flags & (PG_REFERENCED|PG_WANTED)) ||
684			pmap_is_referenced(VM_PAGE_TO_PHYS(m)))) {
685			int s;
686
687			pmap_clear_reference(VM_PAGE_TO_PHYS(m));
688			m->flags &= ~PG_REFERENCED;
689			if (m->act_count < ACT_MAX) {
690				m->act_count += ACT_ADVANCE;
691			}
692			TAILQ_REMOVE(&vm_page_queue_active, m, pageq);
693			TAILQ_INSERT_TAIL(&vm_page_queue_active, m, pageq);
694		} else {
695			m->flags &= ~PG_REFERENCED;
696			pmap_clear_reference(VM_PAGE_TO_PHYS(m));
697			m->act_count -= min(m->act_count, ACT_DECLINE);
698
699			/*
700			 * if the page act_count is zero -- then we deactivate
701			 */
702			if (!m->act_count && (page_shortage > 0)) {
703				if (m->object->ref_count == 0) {
704					--page_shortage;
705					vm_page_test_dirty(m);
706					if ((m->bmapped == 0) && (m->dirty == 0) ) {
707						m->act_count = 0;
708						vm_page_cache(m);
709					} else {
710						vm_page_deactivate(m);
711					}
712				} else {
713					vm_page_deactivate(m);
714					--page_shortage;
715				}
716			} else if (m->act_count) {
717				TAILQ_REMOVE(&vm_page_queue_active, m, pageq);
718				TAILQ_INSERT_TAIL(&vm_page_queue_active, m, pageq);
719			}
720		}
721		maxscan--;
722		m = next;
723	}
724
725	/*
726	 * We try to maintain some *really* free pages, this allows interrupt
727	 * code to be guaranteed space.
728	 */
729	while (cnt.v_free_count < cnt.v_free_reserved) {
730		m = vm_page_queue_cache.tqh_first;
731		if (!m)
732			break;
733		vm_page_free(m);
734		cnt.v_dfree++;
735	}
736
737	/*
738	 * If we didn't get enough free pages, and we have skipped a vnode
739	 * in a writeable object, wakeup the sync daemon.  And kick swapout
740	 * if we did not get enough free pages.
741	 */
742	if ((cnt.v_cache_count + cnt.v_free_count) < cnt.v_free_target) {
743		if (vnodes_skipped &&
744		    (cnt.v_cache_count + cnt.v_free_count) < cnt.v_free_min) {
745			if (!vfs_update_wakeup) {
746				vfs_update_wakeup = 1;
747				wakeup((caddr_t) &vfs_update_wakeup);
748			}
749		}
750		/*
751		 * now swap processes out if we are in low memory conditions
752		 */
753		if (!swap_pager_full && vm_swap_size &&
754			vm_pageout_req_swapout == 0) {
755			vm_pageout_req_swapout = 1;
756			vm_req_vmdaemon();
757		}
758	}
759
760	if ((cnt.v_inactive_count + cnt.v_free_count + cnt.v_cache_count) <
761	    (cnt.v_inactive_target + cnt.v_free_min)) {
762		vm_req_vmdaemon();
763	}
764
765	/*
766	 * make sure that we have swap space -- if we are low on memory and
767	 * swap -- then kill the biggest process.
768	 */
769	if ((vm_swap_size == 0 || swap_pager_full) &&
770	    ((cnt.v_free_count + cnt.v_cache_count) < cnt.v_free_min)) {
771		bigproc = NULL;
772		bigsize = 0;
773		for (p = (struct proc *) allproc; p != NULL; p = p->p_next) {
774			/*
775			 * if this is a system process, skip it
776			 */
777			if ((p->p_flag & P_SYSTEM) || (p->p_pid == 1) ||
778			    ((p->p_pid < 48) && (vm_swap_size != 0))) {
779				continue;
780			}
781			/*
782			 * if the process is in a non-running type state,
783			 * don't touch it.
784			 */
785			if (p->p_stat != SRUN && p->p_stat != SSLEEP) {
786				continue;
787			}
788			/*
789			 * get the process size
790			 */
791			size = p->p_vmspace->vm_pmap.pm_stats.resident_count;
792			/*
793			 * if the this process is bigger than the biggest one
794			 * remember it.
795			 */
796			if (size > bigsize) {
797				bigproc = p;
798				bigsize = size;
799			}
800		}
801		if (bigproc != NULL) {
802			printf("Process %lu killed by vm_pageout -- out of swap\n", (u_long) bigproc->p_pid);
803			psignal(bigproc, SIGKILL);
804			bigproc->p_estcpu = 0;
805			bigproc->p_nice = PRIO_MIN;
806			resetpriority(bigproc);
807			wakeup((caddr_t) &cnt.v_free_count);
808		}
809	}
810	return force_wakeup;
811}
812
813/*
814 *	vm_pageout is the high level pageout daemon.
815 */
816void
817vm_pageout()
818{
819	(void) spl0();
820
821	/*
822	 * Initialize some paging parameters.
823	 */
824
825	cnt.v_interrupt_free_min = 2;
826
827	if (cnt.v_page_count > 1024)
828		cnt.v_free_min = 4 + (cnt.v_page_count - 1024) / 200;
829	else
830		cnt.v_free_min = 4;
831	/*
832	 * free_reserved needs to include enough for the largest swap pager
833	 * structures plus enough for any pv_entry structs when paging.
834	 */
835	cnt.v_pageout_free_min = 6 + cnt.v_page_count / 1024 +
836				cnt.v_interrupt_free_min;
837	cnt.v_free_reserved = cnt.v_pageout_free_min + 2;
838	cnt.v_free_target = 3 * cnt.v_free_min + cnt.v_free_reserved;
839	cnt.v_free_min += cnt.v_free_reserved;
840
841	if (cnt.v_page_count > 1024) {
842		cnt.v_cache_max = (cnt.v_free_count - 1024) / 2;
843		cnt.v_cache_min = (cnt.v_free_count - 1024) / 8;
844		cnt.v_inactive_target = 2*cnt.v_cache_min + 192;
845	} else {
846		cnt.v_cache_min = 0;
847		cnt.v_cache_max = 0;
848		cnt.v_inactive_target = cnt.v_free_count / 4;
849	}
850
851	/* XXX does not really belong here */
852	if (vm_page_max_wired == 0)
853		vm_page_max_wired = cnt.v_free_count / 3;
854
855
856	(void) swap_pager_alloc(0, 0, 0, 0);
857	/*
858	 * The pageout daemon is never done, so loop forever.
859	 */
860	while (TRUE) {
861		int s = splhigh();
862
863		if (!vm_pages_needed ||
864			((cnt.v_free_count >= cnt.v_free_reserved) &&
865			 (cnt.v_free_count + cnt.v_cache_count >= cnt.v_free_min))) {
866			vm_pages_needed = 0;
867			tsleep((caddr_t) &vm_pages_needed, PVM, "psleep", 0);
868		}
869		vm_pages_needed = 0;
870		splx(s);
871		cnt.v_pdwakeups++;
872		vm_pager_sync();
873		vm_pageout_scan();
874		vm_pager_sync();
875		wakeup((caddr_t) &cnt.v_free_count);
876		wakeup((caddr_t) kmem_map);
877	}
878}
879
880void
881vm_daemon()
882{
883	vm_object_t object;
884	struct proc *p;
885
886	while (TRUE) {
887		tsleep((caddr_t) &vm_daemon_needed, PUSER, "psleep", 0);
888		if( vm_pageout_req_swapout) {
889			swapout_threads();
890			vm_pageout_req_swapout = 0;
891		}
892		/*
893		 * scan the processes for exceeding their rlimits or if
894		 * process is swapped out -- deactivate pages
895		 */
896
897		for (p = (struct proc *) allproc; p != NULL; p = p->p_next) {
898			int overage;
899			quad_t limit;
900			vm_offset_t size;
901
902			/*
903			 * if this is a system process or if we have already
904			 * looked at this process, skip it.
905			 */
906			if (p->p_flag & (P_SYSTEM | P_WEXIT)) {
907				continue;
908			}
909			/*
910			 * if the process is in a non-running type state,
911			 * don't touch it.
912			 */
913			if (p->p_stat != SRUN && p->p_stat != SSLEEP) {
914				continue;
915			}
916			/*
917			 * get a limit
918			 */
919			limit = qmin(p->p_rlimit[RLIMIT_RSS].rlim_cur,
920			    p->p_rlimit[RLIMIT_RSS].rlim_max);
921
922			/*
923			 * let processes that are swapped out really be
924			 * swapped out set the limit to nothing (will force a
925			 * swap-out.)
926			 */
927			if ((p->p_flag & P_INMEM) == 0)
928				limit = 0;	/* XXX */
929
930			size = p->p_vmspace->vm_pmap.pm_stats.resident_count * NBPG;
931			if (limit >= 0 && size >= limit) {
932				overage = (size - limit) / NBPG;
933				vm_pageout_map_deactivate_pages(&p->p_vmspace->vm_map,
934				    (vm_map_entry_t) 0, &overage, vm_pageout_object_deactivate_pages);
935			}
936		}
937	}
938
939	/*
940	 * we remove cached objects that have no RSS...
941	 */
942restart:
943	vm_object_cache_lock();
944	object = vm_object_cached_list.tqh_first;
945	while (object) {
946		vm_object_cache_unlock();
947		/*
948		 * if there are no resident pages -- get rid of the object
949		 */
950		if (object->resident_page_count == 0) {
951			if (object != vm_object_lookup(object->pager))
952				panic("vm_object_cache_trim: I'm sooo confused.");
953			pager_cache(object, FALSE);
954			goto restart;
955		}
956		object = object->cached_list.tqe_next;
957		vm_object_cache_lock();
958	}
959	vm_object_cache_unlock();
960}
961