vm_pageout.c revision 212360
118334Speter/*- 256392Sobrien * Copyright (c) 1991 Regents of the University of California. 318334Speter * All rights reserved. 418334Speter * Copyright (c) 1994 John S. Dyson 518334Speter * All rights reserved. 618334Speter * Copyright (c) 1994 David Greenman 718334Speter * All rights reserved. 818334Speter * Copyright (c) 2005 Yahoo! Technologies Norway AS 918334Speter * All rights reserved. 1018334Speter * 1118334Speter * This code is derived from software contributed to Berkeley by 1218334Speter * The Mach Operating System project at Carnegie-Mellon University. 1318334Speter * 1418334Speter * Redistribution and use in source and binary forms, with or without 1518334Speter * modification, are permitted provided that the following conditions 1618334Speter * are met: 1718334Speter * 1. Redistributions of source code must retain the above copyright 1818334Speter * notice, this list of conditions and the following disclaimer. 1918334Speter * 2. Redistributions in binary form must reproduce the above copyright 2018334Speter * notice, this list of conditions and the following disclaimer in the 2152558Sobrien * documentation and/or other materials provided with the distribution. 2252558Sobrien * 3. All advertising materials mentioning features or use of this software 2318334Speter * must display the following acknowledgement: 2418334Speter * This product includes software developed by the University of 2518334Speter * California, Berkeley and its contributors. 2618334Speter * 4. Neither the name of the University nor the names of its contributors 2718334Speter * may be used to endorse or promote products derived from this software 2818334Speter * without specific prior written permission. 2950615Sobrien * 3050615Sobrien * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 3150615Sobrien * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 3218334Speter * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 3318334Speter * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 3418334Speter * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 3550615Sobrien * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 3650615Sobrien * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 3718334Speter * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 3850615Sobrien * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 3950615Sobrien * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 4050615Sobrien * SUCH DAMAGE. 4118334Speter * 4218334Speter * from: @(#)vm_pageout.c 7.4 (Berkeley) 5/7/91 4318334Speter * 4418334Speter * 4518334Speter * Copyright (c) 1987, 1990 Carnegie-Mellon University. 4618334Speter * All rights reserved. 4718334Speter * 4850615Sobrien * Authors: Avadis Tevanian, Jr., Michael Wayne Young 4950615Sobrien * 5050615Sobrien * Permission to use, copy, modify and distribute this software and 5118334Speter * its documentation is hereby granted, provided that both the copyright 5218334Speter * notice and this permission notice appear in all copies of the 5350615Sobrien * software, derivative works or modified versions, and any portions 5450615Sobrien * thereof, and that both notices appear in supporting documentation. 5552558Sobrien * 5652558Sobrien * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS" 5752558Sobrien * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND 5818334Speter * FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE. 5950615Sobrien * 6050615Sobrien * Carnegie Mellon requests users of this software to return to 6150615Sobrien * 6250615Sobrien * Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU 6350615Sobrien * School of Computer Science 6450615Sobrien * Carnegie Mellon University 6550615Sobrien * Pittsburgh PA 15213-3890 6650615Sobrien * 6750615Sobrien * any improvements or extensions that they make and grant Carnegie the 6850615Sobrien * rights to redistribute these changes. 6950615Sobrien */ 7050615Sobrien 7150615Sobrien/* 7250615Sobrien * The proverbial page-out daemon. 7350615Sobrien */ 7450615Sobrien 7518334Speter#include <sys/cdefs.h> 7618334Speter__FBSDID("$FreeBSD: head/sys/vm/vm_pageout.c 212360 2010-09-09 13:32:58Z nwhitehorn $"); 7718334Speter 7818334Speter#include "opt_vm.h" 7918334Speter#include <sys/param.h> 8018334Speter#include <sys/systm.h> 8118334Speter#include <sys/kernel.h> 8218334Speter#include <sys/eventhandler.h> 8318334Speter#include <sys/lock.h> 8418334Speter#include <sys/mutex.h> 8518334Speter#include <sys/proc.h> 8618334Speter#include <sys/kthread.h> 8718334Speter#include <sys/ktr.h> 8818334Speter#include <sys/mount.h> 8918334Speter#include <sys/resourcevar.h> 9018334Speter#include <sys/sched.h> 9118334Speter#include <sys/signalvar.h> 9218334Speter#include <sys/vnode.h> 9318334Speter#include <sys/vmmeter.h> 9418334Speter#include <sys/sx.h> 9518334Speter#include <sys/sysctl.h> 9618334Speter 9718334Speter#include <vm/vm.h> 9818334Speter#include <vm/vm_param.h> 9918334Speter#include <vm/vm_object.h> 10018334Speter#include <vm/vm_page.h> 10118334Speter#include <vm/vm_map.h> 10218334Speter#include <vm/vm_pageout.h> 10318334Speter#include <vm/vm_pager.h> 10450615Sobrien#include <vm/swap_pager.h> 10550615Sobrien#include <vm/vm_extern.h> 10650615Sobrien#include <vm/uma.h> 10750615Sobrien 10850615Sobrien/* 10950615Sobrien * System initialization 11050615Sobrien */ 11150615Sobrien 11250615Sobrien/* the kernel process "vm_pageout"*/ 11350615Sobrienstatic void vm_pageout(void); 11450615Sobrienstatic int vm_pageout_clean(vm_page_t); 11550615Sobrienstatic void vm_pageout_scan(int pass); 11650615Sobrien 11750615Sobrienstruct proc *pageproc; 11850615Sobrien 11950615Sobrienstatic struct kproc_desc page_kp = { 12050615Sobrien "pagedaemon", 12150615Sobrien vm_pageout, 12250615Sobrien &pageproc 12350615Sobrien}; 12450615SobrienSYSINIT(pagedaemon, SI_SUB_KTHREAD_PAGE, SI_ORDER_FIRST, kproc_start, 12550615Sobrien &page_kp); 12650615Sobrien 12750615Sobrien#if !defined(NO_SWAPPING) 12850615Sobrien/* the kernel process "vm_daemon"*/ 12950615Sobrienstatic void vm_daemon(void); 13050615Sobrienstatic struct proc *vmproc; 13150615Sobrien 13250615Sobrienstatic struct kproc_desc vm_kp = { 13350615Sobrien "vmdaemon", 13450615Sobrien vm_daemon, 13550615Sobrien &vmproc 13650615Sobrien}; 13752558SobrienSYSINIT(vmdaemon, SI_SUB_KTHREAD_VM, SI_ORDER_FIRST, kproc_start, &vm_kp); 13852558Sobrien#endif 13952558Sobrien 14052558Sobrien 14118334Speterint vm_pages_needed; /* Event on which pageout daemon sleeps */ 14218334Speterint vm_pageout_deficit; /* Estimated number of pages deficit */ 14318334Speterint vm_pageout_pages_needed; /* flag saying that the pageout daemon needs pages */ 14418334Speter 14518334Speter#if !defined(NO_SWAPPING) 14618334Speterstatic int vm_pageout_req_swapout; /* XXX */ 14718334Speterstatic int vm_daemon_needed; 14818334Speterstatic struct mtx vm_daemon_mtx; 14918334Speter/* Allow for use by vm_pageout before vm_daemon is initialized. */ 15018334SpeterMTX_SYSINIT(vm_daemon, &vm_daemon_mtx, "vm daemon", MTX_DEF); 15118334Speter#endif 15218334Speterstatic int vm_max_launder = 32; 15318334Speterstatic int vm_pageout_stats_max=0, vm_pageout_stats_interval = 0; 15450615Sobrienstatic int vm_pageout_full_stats_interval = 0; 15550615Sobrienstatic int vm_pageout_algorithm=0; 15618334Speterstatic int defer_swap_pageouts=0; 15718334Speterstatic int disable_swap_pageouts=0; 15818334Speter 15918334Speter#if defined(NO_SWAPPING) 16018334Speterstatic int vm_swap_enabled=0; 16118334Speterstatic int vm_swap_idle_enabled=0; 16218334Speter#else 16318334Speterstatic int vm_swap_enabled=1; 16418334Speterstatic int vm_swap_idle_enabled=0; 16518334Speter#endif 16618334Speter 16750615SobrienSYSCTL_INT(_vm, VM_PAGEOUT_ALGORITHM, pageout_algorithm, 16818334Speter CTLFLAG_RW, &vm_pageout_algorithm, 0, "LRU page mgmt"); 16950615Sobrien 17050615SobrienSYSCTL_INT(_vm, OID_AUTO, max_launder, 17150615Sobrien CTLFLAG_RW, &vm_max_launder, 0, "Limit dirty flushes in pageout"); 17218334Speter 17352558SobrienSYSCTL_INT(_vm, OID_AUTO, pageout_stats_max, 17452558Sobrien CTLFLAG_RW, &vm_pageout_stats_max, 0, "Max pageout stats scan length"); 17552558Sobrien 17652558SobrienSYSCTL_INT(_vm, OID_AUTO, pageout_full_stats_interval, 17752558Sobrien CTLFLAG_RW, &vm_pageout_full_stats_interval, 0, "Interval for full stats scan"); 17852558Sobrien 17952558SobrienSYSCTL_INT(_vm, OID_AUTO, pageout_stats_interval, 18052558Sobrien CTLFLAG_RW, &vm_pageout_stats_interval, 0, "Interval for partial stats scan"); 18152558Sobrien 18252558Sobrien#if defined(NO_SWAPPING) 18352558SobrienSYSCTL_INT(_vm, VM_SWAPPING_ENABLED, swap_enabled, 18452558Sobrien CTLFLAG_RD, &vm_swap_enabled, 0, "Enable entire process swapout"); 18552558SobrienSYSCTL_INT(_vm, OID_AUTO, swap_idle_enabled, 18652558Sobrien CTLFLAG_RD, &vm_swap_idle_enabled, 0, "Allow swapout on idle criteria"); 18752558Sobrien#else 18852558SobrienSYSCTL_INT(_vm, VM_SWAPPING_ENABLED, swap_enabled, 18952558Sobrien CTLFLAG_RW, &vm_swap_enabled, 0, "Enable entire process swapout"); 19050615SobrienSYSCTL_INT(_vm, OID_AUTO, swap_idle_enabled, 19152558Sobrien CTLFLAG_RW, &vm_swap_idle_enabled, 0, "Allow swapout on idle criteria"); 19252558Sobrien#endif 19352558Sobrien 19452558SobrienSYSCTL_INT(_vm, OID_AUTO, defer_swapspace_pageouts, 19550615Sobrien CTLFLAG_RW, &defer_swap_pageouts, 0, "Give preference to dirty pages in mem"); 19652558Sobrien 19752558SobrienSYSCTL_INT(_vm, OID_AUTO, disable_swapspace_pageouts, 19852558Sobrien CTLFLAG_RW, &disable_swap_pageouts, 0, "Disallow swapout of dirty pages"); 19952558Sobrien 20052558Sobrienstatic int pageout_lock_miss; 20152558SobrienSYSCTL_INT(_vm, OID_AUTO, pageout_lock_miss, 20252558Sobrien CTLFLAG_RD, &pageout_lock_miss, 0, "vget() lock misses during pageout"); 20352558Sobrien 20452558Sobrien#define VM_PAGEOUT_PAGE_COUNT 16 20552558Sobrienint vm_pageout_page_count = VM_PAGEOUT_PAGE_COUNT; 20652558Sobrien 20752558Sobrienint vm_page_max_wired; /* XXX max # of wired pages system-wide */ 20852558SobrienSYSCTL_INT(_vm, OID_AUTO, max_wired, 20952558Sobrien CTLFLAG_RW, &vm_page_max_wired, 0, "System-wide limit to wired page count"); 21052558Sobrien 21152558Sobrien#if !defined(NO_SWAPPING) 21252558Sobrienstatic void vm_pageout_map_deactivate_pages(vm_map_t, long); 21352558Sobrienstatic void vm_pageout_object_deactivate_pages(pmap_t, vm_object_t, long); 21452558Sobrienstatic void vm_req_vmdaemon(int req); 21552558Sobrien#endif 21652558Sobrienstatic void vm_pageout_page_stats(void); 21752558Sobrien 21852558Sobrienstatic void 21950615Sobrienvm_pageout_init_marker(vm_page_t marker, u_short queue) 22052558Sobrien{ 22152558Sobrien 22250615Sobrien bzero(marker, sizeof(*marker)); 22350615Sobrien marker->flags = PG_FICTITIOUS | PG_MARKER; 22418334Speter marker->oflags = VPO_BUSY; 22552558Sobrien marker->queue = queue; 22652558Sobrien marker->wire_count = 1; 22752558Sobrien} 22852558Sobrien 22952558Sobrien/* 23052558Sobrien * vm_pageout_fallback_object_lock: 23152558Sobrien * 23252558Sobrien * Lock vm object currently associated with `m'. VM_OBJECT_TRYLOCK is 23352558Sobrien * known to have failed and page queue must be either PQ_ACTIVE or 23452558Sobrien * PQ_INACTIVE. To avoid lock order violation, unlock the page queues 23550615Sobrien * while locking the vm object. Use marker page to detect page queue 23650615Sobrien * changes and maintain notion of next page on page queue. Return 23718334Speter * TRUE if no changes were detected, FALSE otherwise. vm object is 23818334Speter * locked on return. 23918334Speter * 24018334Speter * This function depends on both the lock portion of struct vm_object 24118334Speter * and normal struct vm_page being type stable. 24218334Speter */ 24318334Speterboolean_t 24418334Spetervm_pageout_fallback_object_lock(vm_page_t m, vm_page_t *next) 24518334Speter{ 24618334Speter struct vm_page marker; 24718334Speter boolean_t unchanged; 24818334Speter u_short queue; 24918334Speter vm_object_t object; 25018334Speter 25118334Speter queue = m->queue; 25218334Speter vm_pageout_init_marker(&marker, queue); 25318334Speter object = m->object; 25418334Speter 25518334Speter TAILQ_INSERT_AFTER(&vm_page_queues[queue].pl, 25618334Speter m, &marker, pageq); 25718334Speter vm_page_unlock_queues(); 25818334Speter vm_page_unlock(m); 25918334Speter VM_OBJECT_LOCK(object); 26018334Speter vm_page_lock(m); 26152558Sobrien vm_page_lock_queues(); 26252558Sobrien 26352558Sobrien /* Page queue might have changed. */ 26418334Speter *next = TAILQ_NEXT(&marker, pageq); 26518334Speter unchanged = (m->queue == queue && 26618334Speter m->object == object && 26718334Speter &marker == TAILQ_NEXT(m, pageq)); 26818334Speter TAILQ_REMOVE(&vm_page_queues[queue].pl, 26918334Speter &marker, pageq); 27018334Speter return (unchanged); 27118334Speter} 27218334Speter 27318334Speter/* 27418334Speter * Lock the page while holding the page queue lock. Use marker page 27518334Speter * to detect page queue changes and maintain notion of next page on 27618334Speter * page queue. Return TRUE if no changes were detected, FALSE 27752558Sobrien * otherwise. The page is locked on return. The page queue lock might 27818334Speter * be dropped and reacquired. 27918334Speter * 28018334Speter * This function depends on normal struct vm_page being type stable. 28118334Speter */ 28218334Speterboolean_t 28318334Spetervm_pageout_page_lock(vm_page_t m, vm_page_t *next) 28418334Speter{ 28518334Speter struct vm_page marker; 28618334Speter boolean_t unchanged; 28718334Speter u_short queue; 28818334Speter 28918334Speter vm_page_lock_assert(m, MA_NOTOWNED); 29050615Sobrien mtx_assert(&vm_page_queue_mtx, MA_OWNED); 29118334Speter 29250615Sobrien if (vm_page_trylock(m)) 29318334Speter return (TRUE); 29418334Speter 29550615Sobrien queue = m->queue; 29618334Speter vm_pageout_init_marker(&marker, queue); 29718334Speter 29850615Sobrien TAILQ_INSERT_AFTER(&vm_page_queues[queue].pl, m, &marker, pageq); 29918334Speter vm_page_unlock_queues(); 30018334Speter vm_page_lock(m); 30118334Speter vm_page_lock_queues(); 30252558Sobrien 30318334Speter /* Page queue might have changed. */ 30418334Speter *next = TAILQ_NEXT(&marker, pageq); 30550615Sobrien unchanged = (m->queue == queue && &marker == TAILQ_NEXT(m, pageq)); 30618334Speter TAILQ_REMOVE(&vm_page_queues[queue].pl, &marker, pageq); 30750615Sobrien return (unchanged); 30818334Speter} 30950615Sobrien 31018334Speter/* 31150615Sobrien * vm_pageout_clean: 31250615Sobrien * 31350615Sobrien * Clean the page and remove it from the laundry. 31450615Sobrien * 31552558Sobrien * We set the busy bit to cause potential page faults on this page to 31618334Speter * block. Note the careful timing, however, the busy bit isn't set till 31718334Speter * late and we cannot do anything that will mess with the page. 31818334Speter */ 31918334Speterstatic int 32018334Spetervm_pageout_clean(vm_page_t m) 32118334Speter{ 32218334Speter vm_object_t object; 32318334Speter vm_page_t mc[2*vm_pageout_page_count], pb, ps; 32418334Speter int pageout_count; 32518334Speter int ib, is, page_base; 32618334Speter vm_pindex_t pindex = m->pindex; 32718334Speter 32818334Speter vm_page_lock_assert(m, MA_OWNED); 32918334Speter VM_OBJECT_LOCK_ASSERT(m->object, MA_OWNED); 33018334Speter 33118334Speter /* 33218334Speter * It doesn't cost us anything to pageout OBJT_DEFAULT or OBJT_SWAP 33318334Speter * with the new swapper, but we could have serious problems paging 33418334Speter * out other object types if there is insufficient memory. 33518334Speter * 33618334Speter * Unfortunately, checking free memory here is far too late, so the 33718334Speter * check has been moved up a procedural level. 33818334Speter */ 33918334Speter 34018334Speter /* 34118334Speter * Can't clean the page if it's busy or held. 34218334Speter */ 34318334Speter KASSERT(m->busy == 0 && (m->oflags & VPO_BUSY) == 0, 34418334Speter ("vm_pageout_clean: page %p is busy", m)); 34518334Speter KASSERT(m->hold_count == 0, ("vm_pageout_clean: page %p is held", m)); 34618334Speter 34718334Speter mc[vm_pageout_page_count] = pb = ps = m; 34818334Speter pageout_count = 1; 34950615Sobrien page_base = vm_pageout_page_count; 35050615Sobrien ib = 1; 35150615Sobrien is = 1; 35250615Sobrien 35350615Sobrien /* 35450615Sobrien * Scan object for clusterable pages. 35550615Sobrien * 35618334Speter * We can cluster ONLY if: ->> the page is NOT 35718334Speter * clean, wired, busy, held, or mapped into a 35818334Speter * buffer, and one of the following: 35918334Speter * 1) The page is inactive, or a seldom used 36018334Speter * active page. 36118334Speter * -or- 36250615Sobrien * 2) we force the issue. 36350615Sobrien * 36450615Sobrien * During heavy mmap/modification loads the pageout 36550615Sobrien * daemon can really fragment the underlying file 36650615Sobrien * due to flushing pages out of order and not trying 36750615Sobrien * align the clusters (which leave sporatic out-of-order 36850615Sobrien * holes). To solve this problem we do the reverse scan 36918334Speter * first and attempt to align our cluster, then do a 37052558Sobrien * forward scan if room remains. 37118334Speter */ 37218334Speter object = m->object; 37318334Spetermore: 37452558Sobrien while (ib && pageout_count < vm_pageout_page_count) { 37552558Sobrien vm_page_t p; 37652558Sobrien 37718334Speter if (ib > pindex) { 37852558Sobrien ib = 0; 37952558Sobrien break; 38052558Sobrien } 38152558Sobrien 38218334Speter if ((p = vm_page_prev(pb)) == NULL || 38318334Speter (p->oflags & VPO_BUSY) != 0 || p->busy != 0) { 38418334Speter ib = 0; 38552558Sobrien break; 38618334Speter } 38718334Speter vm_page_lock(p); 38818334Speter vm_page_test_dirty(p); 38918334Speter if (p->dirty == 0 || 39018334Speter p->queue != PQ_INACTIVE || 39118334Speter p->hold_count != 0) { /* may be undergoing I/O */ 39218334Speter vm_page_unlock(p); 39318334Speter ib = 0; 39418334Speter break; 39550615Sobrien } 39650615Sobrien vm_page_unlock(p); 39750615Sobrien mc[--page_base] = pb = p; 39850615Sobrien ++pageout_count; 39950615Sobrien ++ib; 40050615Sobrien /* 40150615Sobrien * alignment boundry, stop here and switch directions. Do 40250615Sobrien * not clear ib. 40350615Sobrien */ 40450615Sobrien if ((pindex - (ib - 1)) % vm_pageout_page_count == 0) 40550615Sobrien break; 40650615Sobrien } 40718334Speter 40818334Speter while (pageout_count < vm_pageout_page_count && 40918334Speter pindex + is < object->size) { 41018334Speter vm_page_t p; 41118334Speter 41218334Speter if ((p = vm_page_next(ps)) == NULL || 41318334Speter (p->oflags & VPO_BUSY) != 0 || p->busy != 0) 41418334Speter break; 41518334Speter vm_page_lock(p); 41618334Speter vm_page_test_dirty(p); 41718334Speter if (p->dirty == 0 || 41818334Speter p->queue != PQ_INACTIVE || 41918334Speter p->hold_count != 0) { /* may be undergoing I/O */ 42018334Speter vm_page_unlock(p); 42118334Speter break; 42218334Speter } 42318334Speter vm_page_unlock(p); 42418334Speter mc[page_base + pageout_count] = ps = p; 42518334Speter ++pageout_count; 42618334Speter ++is; 42718334Speter } 42818334Speter 42918334Speter /* 43018334Speter * If we exhausted our forward scan, continue with the reverse scan 43118334Speter * when possible, even past a page boundry. This catches boundry 43218334Speter * conditions. 43318334Speter */ 43418334Speter if (ib && pageout_count < vm_pageout_page_count) 43518334Speter goto more; 43618334Speter 43718334Speter vm_page_unlock(m); 43818334Speter /* 43918334Speter * we allow reads during pageouts... 44018334Speter */ 44118334Speter return (vm_pageout_flush(&mc[page_base], pageout_count, 0)); 44218334Speter} 44318334Speter 44418334Speter/* 44518334Speter * vm_pageout_flush() - launder the given pages 44618334Speter * 44718334Speter * The given pages are laundered. Note that we setup for the start of 44818334Speter * I/O ( i.e. busy the page ), mark it read-only, and bump the object 44918334Speter * reference count all in here rather then in the parent. If we want 45018334Speter * the parent to do more sophisticated things we may have to change 45118334Speter * the ordering. 45218334Speter */ 45318334Speterint 45418334Spetervm_pageout_flush(vm_page_t *mc, int count, int flags) 45518334Speter{ 45618334Speter vm_object_t object = mc[0]->object; 45718334Speter int pageout_status[count]; 45818334Speter int numpagedout = 0; 45918334Speter int i; 46018334Speter 46118334Speter VM_OBJECT_LOCK_ASSERT(object, MA_OWNED); 46218334Speter mtx_assert(&vm_page_queue_mtx, MA_NOTOWNED); 46318334Speter 46418334Speter /* 46518334Speter * Initiate I/O. Bump the vm_page_t->busy counter and 46618334Speter * mark the pages read-only. 46718334Speter * 46818334Speter * We do not have to fixup the clean/dirty bits here... we can 46918334Speter * allow the pager to do it after the I/O completes. 47018334Speter * 47118334Speter * NOTE! mc[i]->dirty may be partial or fragmented due to an 47218334Speter * edge case with file fragments. 47318334Speter */ 47418334Speter for (i = 0; i < count; i++) { 47518334Speter KASSERT(mc[i]->valid == VM_PAGE_BITS_ALL, 47618334Speter ("vm_pageout_flush: partially invalid page %p index %d/%d", 47718334Speter mc[i], i, count)); 47818334Speter vm_page_io_start(mc[i]); 47918334Speter pmap_remove_write(mc[i]); 48018334Speter } 48118334Speter vm_object_pip_add(object, count); 48218334Speter 48318334Speter vm_pager_put_pages(object, mc, count, flags, pageout_status); 48418334Speter 48518334Speter for (i = 0; i < count; i++) { 48618334Speter vm_page_t mt = mc[i]; 48718334Speter 48818334Speter KASSERT(pageout_status[i] == VM_PAGER_PEND || 48918334Speter (mt->flags & PG_WRITEABLE) == 0, 49018334Speter ("vm_pageout_flush: page %p is not write protected", mt)); 49118334Speter switch (pageout_status[i]) { 49218334Speter case VM_PAGER_OK: 49318334Speter case VM_PAGER_PEND: 49418334Speter numpagedout++; 49518334Speter break; 49618334Speter case VM_PAGER_BAD: 49718334Speter /* 49818334Speter * Page outside of range of object. Right now we 49918334Speter * essentially lose the changes by pretending it 50018334Speter * worked. 50118334Speter */ 50218334Speter vm_page_undirty(mt); 50318334Speter break; 50418334Speter case VM_PAGER_ERROR: 50518334Speter case VM_PAGER_FAIL: 50618334Speter /* 50718334Speter * If page couldn't be paged out, then reactivate the 50818334Speter * page so it doesn't clog the inactive list. (We 50918334Speter * will try paging out it again later). 51018334Speter */ 51118334Speter vm_page_lock(mt); 51218334Speter vm_page_activate(mt); 51318334Speter vm_page_unlock(mt); 51450615Sobrien break; 51550615Sobrien case VM_PAGER_AGAIN: 51650615Sobrien break; 51750615Sobrien } 51850615Sobrien 51950615Sobrien /* 52050615Sobrien * If the operation is still going, leave the page busy to 52150615Sobrien * block all other accesses. Also, leave the paging in 52250615Sobrien * progress indicator set so that we don't attempt an object 52350615Sobrien * collapse. 52450615Sobrien */ 52550615Sobrien if (pageout_status[i] != VM_PAGER_PEND) { 52650615Sobrien vm_object_pip_wakeup(object); 52750615Sobrien vm_page_io_finish(mt); 52850615Sobrien if (vm_page_count_severe()) { 52918334Speter vm_page_lock(mt); 53018334Speter vm_page_try_to_cache(mt); 53118334Speter vm_page_unlock(mt); 53218334Speter } 53318334Speter } 53418334Speter } 53518334Speter return (numpagedout); 53618334Speter} 53718334Speter 53818334Speter#if !defined(NO_SWAPPING) 53918334Speter/* 54018334Speter * vm_pageout_object_deactivate_pages 54118334Speter * 54218334Speter * Deactivate enough pages to satisfy the inactive target 54318334Speter * requirements. 54418334Speter * 54550615Sobrien * The object and map must be locked. 54650615Sobrien */ 54750615Sobrienstatic void 54850615Sobrienvm_pageout_object_deactivate_pages(pmap_t pmap, vm_object_t first_object, 54950615Sobrien long desired) 55052558Sobrien{ 55152558Sobrien vm_object_t backing_object, object; 55252558Sobrien vm_page_t p; 55352558Sobrien int actcount, remove_mode; 55418334Speter 55518334Speter VM_OBJECT_LOCK_ASSERT(first_object, MA_OWNED); 55618334Speter if (first_object->type == OBJT_DEVICE || 55718334Speter first_object->type == OBJT_SG) 55818334Speter return; 55918334Speter for (object = first_object;; object = backing_object) { 56018334Speter if (pmap_resident_count(pmap) <= desired) 56150615Sobrien goto unlock_return; 56218334Speter VM_OBJECT_LOCK_ASSERT(object, MA_OWNED); 56318334Speter if (object->type == OBJT_PHYS || object->paging_in_progress) 56418334Speter goto unlock_return; 56552558Sobrien 56652558Sobrien remove_mode = 0; 56752558Sobrien if (object->shadow_count > 1) 56852558Sobrien remove_mode = 1; 56952558Sobrien /* 57052558Sobrien * Scan the object's entire memory queue. 57152558Sobrien */ 57252558Sobrien TAILQ_FOREACH(p, &object->memq, listq) { 57352558Sobrien if (pmap_resident_count(pmap) <= desired) 57452558Sobrien goto unlock_return; 57552558Sobrien if ((p->oflags & VPO_BUSY) != 0 || p->busy != 0) 57618334Speter continue; 57718334Speter PCPU_INC(cnt.v_pdpages); 57818334Speter vm_page_lock(p); 57918334Speter if (p->wire_count != 0 || p->hold_count != 0 || 58052558Sobrien !pmap_page_exists_quick(pmap, p)) { 58118334Speter vm_page_unlock(p); 58218334Speter continue; 58318334Speter } 58452558Sobrien actcount = pmap_ts_referenced(p); 58552558Sobrien if ((p->flags & PG_REFERENCED) != 0) { 58652558Sobrien if (actcount == 0) 58752558Sobrien actcount = 1; 58818334Speter vm_page_lock_queues(); 58918334Speter vm_page_flag_clear(p, PG_REFERENCED); 59018334Speter vm_page_unlock_queues(); 59118334Speter } 59250615Sobrien if (p->queue != PQ_ACTIVE && actcount != 0) { 59350615Sobrien vm_page_activate(p); 59450615Sobrien p->act_count += actcount; 59550615Sobrien } else if (p->queue == PQ_ACTIVE) { 59618334Speter if (actcount == 0) { 59718334Speter p->act_count -= min(p->act_count, 59818334Speter ACT_DECLINE); 59918334Speter if (!remove_mode && 60018334Speter (vm_pageout_algorithm || 60150615Sobrien p->act_count == 0)) { 60250615Sobrien pmap_remove_all(p); 60350615Sobrien vm_page_deactivate(p); 60450615Sobrien } else { 60518334Speter vm_page_lock_queues(); 60618334Speter vm_page_requeue(p); 60718334Speter vm_page_unlock_queues(); 60818334Speter } 60918334Speter } else { 61018334Speter vm_page_activate(p); 61118334Speter if (p->act_count < ACT_MAX - 61218334Speter ACT_ADVANCE) 61318334Speter p->act_count += ACT_ADVANCE; 61418334Speter vm_page_lock_queues(); 61518334Speter vm_page_requeue(p); 61618334Speter vm_page_unlock_queues(); 61718334Speter } 61818334Speter } else if (p->queue == PQ_INACTIVE) 61918334Speter pmap_remove_all(p); 62050615Sobrien vm_page_unlock(p); 62150615Sobrien } 62250615Sobrien if ((backing_object = object->backing_object) == NULL) 62350615Sobrien goto unlock_return; 62450615Sobrien VM_OBJECT_LOCK(backing_object); 62518334Speter if (object != first_object) 62618334Speter VM_OBJECT_UNLOCK(object); 62718334Speter } 62818334Speterunlock_return: 62918334Speter if (object != first_object) 63018334Speter VM_OBJECT_UNLOCK(object); 63118334Speter} 63218334Speter 63318334Speter/* 63418334Speter * deactivate some number of pages in a map, try to do it fairly, but 63518334Speter * that is really hard to do. 63618334Speter */ 63718334Speterstatic void 63818334Spetervm_pageout_map_deactivate_pages(map, desired) 63918334Speter vm_map_t map; 64018334Speter long desired; 64118334Speter{ 64218334Speter vm_map_entry_t tmpe; 64318334Speter vm_object_t obj, bigobj; 64418334Speter int nothingwired; 64518334Speter 64618334Speter if (!vm_map_trylock(map)) 64718334Speter return; 64850615Sobrien 64950615Sobrien bigobj = NULL; 65018334Speter nothingwired = TRUE; 65150615Sobrien 65250615Sobrien /* 65350615Sobrien * first, search out the biggest object, and try to free pages from 65450615Sobrien * that. 65550615Sobrien */ 65650615Sobrien tmpe = map->header.next; 65750615Sobrien while (tmpe != &map->header) { 65850615Sobrien if ((tmpe->eflags & MAP_ENTRY_IS_SUB_MAP) == 0) { 65950615Sobrien obj = tmpe->object.vm_object; 66050615Sobrien if (obj != NULL && VM_OBJECT_TRYLOCK(obj)) { 66118334Speter if (obj->shadow_count <= 1 && 66218334Speter (bigobj == NULL || 66318334Speter bigobj->resident_page_count < obj->resident_page_count)) { 66418334Speter if (bigobj != NULL) 66518334Speter VM_OBJECT_UNLOCK(bigobj); 66618334Speter bigobj = obj; 66718334Speter } else 66818334Speter VM_OBJECT_UNLOCK(obj); 66918334Speter } 67018334Speter } 67118334Speter if (tmpe->wired_count > 0) 67218334Speter nothingwired = FALSE; 67318334Speter tmpe = tmpe->next; 67418334Speter } 67518334Speter 67618334Speter if (bigobj != NULL) { 67718334Speter vm_pageout_object_deactivate_pages(map->pmap, bigobj, desired); 67818334Speter VM_OBJECT_UNLOCK(bigobj); 67918334Speter } 68018334Speter /* 68118334Speter * Next, hunt around for other pages to deactivate. We actually 68250615Sobrien * do this search sort of wrong -- .text first is not the best idea. 68350615Sobrien */ 68450615Sobrien tmpe = map->header.next; 68550615Sobrien while (tmpe != &map->header) { 68650615Sobrien if (pmap_resident_count(vm_map_pmap(map)) <= desired) 68750615Sobrien break; 68850615Sobrien if ((tmpe->eflags & MAP_ENTRY_IS_SUB_MAP) == 0) { 68950615Sobrien obj = tmpe->object.vm_object; 69050615Sobrien if (obj != NULL) { 69150615Sobrien VM_OBJECT_LOCK(obj); 69250615Sobrien vm_pageout_object_deactivate_pages(map->pmap, obj, desired); 69350615Sobrien VM_OBJECT_UNLOCK(obj); 69450615Sobrien } 69550615Sobrien } 69650615Sobrien tmpe = tmpe->next; 69752558Sobrien } 69850615Sobrien 69950615Sobrien /* 70050615Sobrien * Remove all mappings if a process is swapped out, this will free page 70150615Sobrien * table pages. 70250615Sobrien */ 70318334Speter if (desired == 0 && nothingwired) { 70418334Speter tmpe = map->header.next; 70518334Speter while (tmpe != &map->header) { 70618334Speter pmap_remove(vm_map_pmap(map), tmpe->start, tmpe->end); 70718334Speter tmpe = tmpe->next; 70818334Speter } 70918334Speter } 71018334Speter vm_map_unlock(map); 71118334Speter} 71250615Sobrien#endif /* !defined(NO_SWAPPING) */ 71350615Sobrien 71450615Sobrien/* 71518334Speter * vm_pageout_scan does the dirty work for the pageout daemon. 71618334Speter */ 71718334Speterstatic void 71850615Sobrienvm_pageout_scan(int pass) 71950615Sobrien{ 72050615Sobrien vm_page_t m, next; 72150615Sobrien struct vm_page marker; 72250615Sobrien int page_shortage, maxscan, pcount; 72350615Sobrien int addl_page_shortage, addl_page_shortage_init; 72450615Sobrien vm_object_t object; 72550615Sobrien int actcount; 72650615Sobrien int vnodes_skipped = 0; 72718334Speter int maxlaunder; 72818334Speter 72918334Speter /* 73018334Speter * Decrease registered cache sizes. 73118334Speter */ 73218334Speter EVENTHANDLER_INVOKE(vm_lowmem, 0); 73318334Speter /* 73418334Speter * We do this explicitly after the caches have been drained above. 73518334Speter */ 73618334Speter uma_reclaim(); 73718334Speter 73818334Speter addl_page_shortage_init = atomic_readandclear_int(&vm_pageout_deficit); 73938510Sbde 74038510Sbde /* 74138510Sbde * Calculate the number of pages we want to either free or move 74250615Sobrien * to the cache. 74350615Sobrien */ 74450615Sobrien page_shortage = vm_paging_target() + addl_page_shortage_init; 74550615Sobrien 74650615Sobrien vm_pageout_init_marker(&marker, PQ_INACTIVE); 74750615Sobrien 74850615Sobrien /* 74950615Sobrien * Start scanning the inactive queue for pages we can move to the 75050615Sobrien * cache or free. The scan will stop when the target is reached or 75150615Sobrien * we have scanned the entire inactive queue. Note that m->act_count 75250615Sobrien * is not used to form decisions for the inactive queue, only for the 75350615Sobrien * active queue. 75450615Sobrien * 75550615Sobrien * maxlaunder limits the number of dirty pages we flush per scan. 75650615Sobrien * For most systems a smaller value (16 or 32) is more robust under 75750615Sobrien * extreme memory and disk pressure because any unnecessary writes 75850615Sobrien * to disk can result in extreme performance degredation. However, 75950615Sobrien * systems with excessive dirty pages (especially when MAP_NOSYNC is 76050615Sobrien * used) will die horribly with limited laundering. If the pageout 76150615Sobrien * daemon cannot clean enough pages in the first pass, we let it go 76250615Sobrien * all out in succeeding passes. 76350615Sobrien */ 76450615Sobrien if ((maxlaunder = vm_max_launder) <= 1) 76550615Sobrien maxlaunder = 1; 76650615Sobrien if (pass) 76750615Sobrien maxlaunder = 10000; 76850615Sobrien vm_page_lock_queues(); 76950615Sobrienrescan0: 77052558Sobrien addl_page_shortage = addl_page_shortage_init; 77152558Sobrien maxscan = cnt.v_inactive_count; 77250615Sobrien 77352558Sobrien for (m = TAILQ_FIRST(&vm_page_queues[PQ_INACTIVE].pl); 77452558Sobrien m != NULL && maxscan-- > 0 && page_shortage > 0; 77552558Sobrien m = next) { 77650615Sobrien 77752558Sobrien cnt.v_pdpages++; 77852558Sobrien 77950615Sobrien if (m->queue != PQ_INACTIVE) 78050615Sobrien goto rescan0; 78150615Sobrien 78252558Sobrien next = TAILQ_NEXT(m, pageq); 78350615Sobrien 78450615Sobrien /* 78550615Sobrien * skip marker pages 78650615Sobrien */ 78752558Sobrien if (m->flags & PG_MARKER) 78850615Sobrien continue; 78950615Sobrien 79050615Sobrien /* 79150615Sobrien * Lock the page. 79250615Sobrien */ 79350615Sobrien if (!vm_pageout_page_lock(m, &next)) { 79450615Sobrien vm_page_unlock(m); 79550615Sobrien addl_page_shortage++; 79650615Sobrien continue; 79750615Sobrien } 79850615Sobrien 79950615Sobrien /* 80050615Sobrien * A held page may be undergoing I/O, so skip it. 80150615Sobrien */ 80250615Sobrien if (m->hold_count) { 80350615Sobrien vm_page_unlock(m); 80450615Sobrien vm_page_requeue(m); 80550615Sobrien addl_page_shortage++; 80650615Sobrien continue; 80750615Sobrien } 80850615Sobrien 80950615Sobrien /* 81050615Sobrien * Don't mess with busy pages, keep in the front of the 81150615Sobrien * queue, most likely are being paged out. 81250615Sobrien */ 81352558Sobrien object = m->object; 81450615Sobrien if (!VM_OBJECT_TRYLOCK(object) && 81550615Sobrien (!vm_pageout_fallback_object_lock(m, &next) || 81650615Sobrien m->hold_count != 0)) { 81750615Sobrien VM_OBJECT_UNLOCK(object); 81852558Sobrien vm_page_unlock(m); 81950615Sobrien addl_page_shortage++; 82050615Sobrien continue; 82152558Sobrien } 82250615Sobrien if (m->busy || (m->oflags & VPO_BUSY)) { 82350615Sobrien vm_page_unlock(m); 82450615Sobrien VM_OBJECT_UNLOCK(object); 82552558Sobrien addl_page_shortage++; 82652558Sobrien continue; 82752558Sobrien } 82852558Sobrien 82952558Sobrien /* 83052558Sobrien * If the object is not being used, we ignore previous 83152558Sobrien * references. 83252558Sobrien */ 83352558Sobrien if (object->ref_count == 0) { 83452558Sobrien vm_page_flag_clear(m, PG_REFERENCED); 83552558Sobrien KASSERT(!pmap_page_is_mapped(m), 83618334Speter ("vm_pageout_scan: page %p is mapped", m)); 83718334Speter 83818334Speter /* 83918334Speter * Otherwise, if the page has been referenced while in the 84018334Speter * inactive queue, we bump the "activation count" upwards, 84118334Speter * making it less likely that the page will be added back to 84250615Sobrien * the inactive queue prematurely again. Here we check the 84318334Speter * page tables (or emulated bits, if any), given the upper 84450615Sobrien * level VM system not knowing anything about existing 84550615Sobrien * references. 84650615Sobrien */ 84750615Sobrien } else if (((m->flags & PG_REFERENCED) == 0) && 84850615Sobrien (actcount = pmap_ts_referenced(m))) { 84950615Sobrien vm_page_activate(m); 85052558Sobrien VM_OBJECT_UNLOCK(object); 85152558Sobrien m->act_count += (actcount + ACT_ADVANCE); 85250615Sobrien vm_page_unlock(m); 85350615Sobrien continue; 85450615Sobrien } 85550615Sobrien 85650615Sobrien /* 85750615Sobrien * If the upper level VM system knows about any page 85850615Sobrien * references, we activate the page. We also set the 85950615Sobrien * "activation count" higher than normal so that we will less 86050615Sobrien * likely place pages back onto the inactive queue again. 86150615Sobrien */ 86250615Sobrien if ((m->flags & PG_REFERENCED) != 0) { 86350615Sobrien vm_page_flag_clear(m, PG_REFERENCED); 86450615Sobrien actcount = pmap_ts_referenced(m); 86550615Sobrien vm_page_activate(m); 86650615Sobrien VM_OBJECT_UNLOCK(object); 86752558Sobrien m->act_count += (actcount + ACT_ADVANCE + 1); 86850615Sobrien vm_page_unlock(m); 86952558Sobrien continue; 87050615Sobrien } 87150615Sobrien 87250615Sobrien /* 87350615Sobrien * If the upper level VM system does not believe that the page 87450615Sobrien * is fully dirty, but it is mapped for write access, then we 87550615Sobrien * consult the pmap to see if the page's dirty status should 87650615Sobrien * be updated. 87750615Sobrien */ 87850615Sobrien if (m->dirty != VM_PAGE_BITS_ALL && 87950615Sobrien (m->flags & PG_WRITEABLE) != 0) { 88050615Sobrien /* 88150615Sobrien * Avoid a race condition: Unless write access is 88250615Sobrien * removed from the page, another processor could 88350615Sobrien * modify it before all access is removed by the call 88450615Sobrien * to vm_page_cache() below. If vm_page_cache() finds 88550615Sobrien * that the page has been modified when it removes all 88650615Sobrien * access, it panics because it cannot cache dirty 88750615Sobrien * pages. In principle, we could eliminate just write 88850615Sobrien * access here rather than all access. In the expected 88950615Sobrien * case, when there are no last instant modifications 89050615Sobrien * to the page, removing all access will be cheaper 89150615Sobrien * overall. 89250615Sobrien */ 89350615Sobrien if (pmap_is_modified(m)) 89450615Sobrien vm_page_dirty(m); 89550615Sobrien else if (m->dirty == 0) 89650615Sobrien pmap_remove_all(m); 89750615Sobrien } 89850615Sobrien 89950615Sobrien if (m->valid == 0) { 90050615Sobrien /* 90150615Sobrien * Invalid pages can be easily freed 90250615Sobrien */ 90350615Sobrien vm_page_free(m); 90450615Sobrien cnt.v_dfree++; 90550615Sobrien --page_shortage; 90650615Sobrien } else if (m->dirty == 0) { 90750615Sobrien /* 90850615Sobrien * Clean pages can be placed onto the cache queue. 90950615Sobrien * This effectively frees them. 91050615Sobrien */ 91150615Sobrien vm_page_cache(m); 91250615Sobrien --page_shortage; 91350615Sobrien } else if ((m->flags & PG_WINATCFLS) == 0 && pass == 0) { 91450615Sobrien /* 91550615Sobrien * Dirty pages need to be paged out, but flushing 91650615Sobrien * a page is extremely expensive verses freeing 91750615Sobrien * a clean page. Rather then artificially limiting 91850615Sobrien * the number of pages we can flush, we instead give 91950615Sobrien * dirty pages extra priority on the inactive queue 92050615Sobrien * by forcing them to be cycled through the queue 92150615Sobrien * twice before being flushed, after which the 92250615Sobrien * (now clean) page will cycle through once more 92350615Sobrien * before being freed. This significantly extends 92450615Sobrien * the thrash point for a heavily loaded machine. 92552558Sobrien */ 92650615Sobrien vm_page_flag_set(m, PG_WINATCFLS); 92750615Sobrien vm_page_requeue(m); 92850615Sobrien } else if (maxlaunder > 0) { 92950615Sobrien /* 93050615Sobrien * We always want to try to flush some dirty pages if 93150615Sobrien * we encounter them, to keep the system stable. 93250615Sobrien * Normally this number is small, but under extreme 93350615Sobrien * pressure where there are insufficient clean pages 93450615Sobrien * on the inactive queue, we may have to go all out. 93550615Sobrien */ 93650615Sobrien int swap_pageouts_ok, vfslocked = 0; 93750615Sobrien struct vnode *vp = NULL; 93850615Sobrien struct mount *mp = NULL; 93950615Sobrien 94050615Sobrien if ((object->type != OBJT_SWAP) && (object->type != OBJT_DEFAULT)) { 94150615Sobrien swap_pageouts_ok = 1; 94250615Sobrien } else { 94350615Sobrien swap_pageouts_ok = !(defer_swap_pageouts || disable_swap_pageouts); 94452558Sobrien swap_pageouts_ok |= (!disable_swap_pageouts && defer_swap_pageouts && 94550615Sobrien vm_page_count_min()); 94650615Sobrien 94750615Sobrien } 94850615Sobrien 94950615Sobrien /* 95050615Sobrien * We don't bother paging objects that are "dead". 95150615Sobrien * Those objects are in a "rundown" state. 95250615Sobrien */ 95352558Sobrien if (!swap_pageouts_ok || (object->flags & OBJ_DEAD)) { 95452558Sobrien vm_page_unlock(m); 95550615Sobrien VM_OBJECT_UNLOCK(object); 95650615Sobrien vm_page_requeue(m); 95750615Sobrien continue; 95850615Sobrien } 95950615Sobrien 96052558Sobrien /* 96152558Sobrien * Following operations may unlock 96252558Sobrien * vm_page_queue_mtx, invalidating the 'next' 96350615Sobrien * pointer. To prevent an inordinate number 96450615Sobrien * of restarts we use our marker to remember 96550615Sobrien * our place. 96650615Sobrien * 96750615Sobrien */ 96850615Sobrien TAILQ_INSERT_AFTER(&vm_page_queues[PQ_INACTIVE].pl, 96950615Sobrien m, &marker, pageq); 97050615Sobrien /* 97150615Sobrien * The object is already known NOT to be dead. It 97250615Sobrien * is possible for the vget() to block the whole 97350615Sobrien * pageout daemon, but the new low-memory handling 97450615Sobrien * code should prevent it. 97550615Sobrien * 97650615Sobrien * The previous code skipped locked vnodes and, worse, 97750615Sobrien * reordered pages in the queue. This results in 97850615Sobrien * completely non-deterministic operation and, on a 97950615Sobrien * busy system, can lead to extremely non-optimal 98050615Sobrien * pageouts. For example, it can cause clean pages 98152558Sobrien * to be freed and dirty pages to be moved to the end 98252558Sobrien * of the queue. Since dirty pages are also moved to 98352558Sobrien * the end of the queue once-cleaned, this gives 98452558Sobrien * way too large a weighting to defering the freeing 98552558Sobrien * of dirty pages. 98652558Sobrien * 98752558Sobrien * We can't wait forever for the vnode lock, we might 98852558Sobrien * deadlock due to a vn_read() getting stuck in 98918334Speter * vm_wait while holding this vnode. We skip the 99018334Speter * vnode if we can't get it in a reasonable amount 99150615Sobrien * of time. 99250615Sobrien */ 99318334Speter if (object->type == OBJT_VNODE) { 99418334Speter vm_page_unlock_queues(); 99550615Sobrien vm_page_unlock(m); 99618334Speter vp = object->handle; 99752558Sobrien if (vp->v_type == VREG && 99852558Sobrien vn_start_write(vp, &mp, V_NOWAIT) != 0) { 99950615Sobrien mp = NULL; 100050615Sobrien ++pageout_lock_miss; 100150615Sobrien if (object->flags & OBJ_MIGHTBEDIRTY) 100250615Sobrien vnodes_skipped++; 100350615Sobrien vm_page_lock_queues(); 100450615Sobrien goto unlock_and_continue; 100518334Speter } 100650615Sobrien KASSERT(mp != NULL, 100750615Sobrien ("vp %p with NULL v_mount", vp)); 100850615Sobrien vm_object_reference_locked(object); 100952558Sobrien VM_OBJECT_UNLOCK(object); 101018334Speter vfslocked = VFS_LOCK_GIANT(vp->v_mount); 101150615Sobrien if (vget(vp, LK_EXCLUSIVE | LK_TIMELOCK, 101250615Sobrien curthread)) { 101350615Sobrien VM_OBJECT_LOCK(object); 101450615Sobrien vm_page_lock_queues(); 101550615Sobrien ++pageout_lock_miss; 101650615Sobrien if (object->flags & OBJ_MIGHTBEDIRTY) 101750615Sobrien vnodes_skipped++; 101850615Sobrien vp = NULL; 101918334Speter goto unlock_and_continue; 102050615Sobrien } 102150615Sobrien VM_OBJECT_LOCK(object); 102250615Sobrien vm_page_lock(m); 102350615Sobrien vm_page_lock_queues(); 102418334Speter /* 102550615Sobrien * The page might have been moved to another 102650615Sobrien * queue during potential blocking in vget() 102750615Sobrien * above. The page might have been freed and 102850615Sobrien * reused for another vnode. 102950615Sobrien */ 103050615Sobrien if (m->queue != PQ_INACTIVE || 103150615Sobrien m->object != object || 103250615Sobrien TAILQ_NEXT(m, pageq) != &marker) { 103350615Sobrien vm_page_unlock(m); 103450615Sobrien if (object->flags & OBJ_MIGHTBEDIRTY) 103550615Sobrien vnodes_skipped++; 103652558Sobrien goto unlock_and_continue; 103750615Sobrien } 103850615Sobrien 103950615Sobrien /* 104050615Sobrien * The page may have been busied during the 104150615Sobrien * blocking in vget(). We don't move the 104218334Speter * page back onto the end of the queue so that 104350615Sobrien * statistics are more correct if we don't. 104450615Sobrien */ 104550615Sobrien if (m->busy || (m->oflags & VPO_BUSY)) { 104650615Sobrien vm_page_unlock(m); 104752558Sobrien goto unlock_and_continue; 104852558Sobrien } 104952558Sobrien 105050615Sobrien /* 105150615Sobrien * If the page has become held it might 105252558Sobrien * be undergoing I/O, so skip it 105350615Sobrien */ 105450615Sobrien if (m->hold_count) { 105552558Sobrien vm_page_unlock(m); 105652558Sobrien vm_page_requeue(m); 105752558Sobrien if (object->flags & OBJ_MIGHTBEDIRTY) 105850615Sobrien vnodes_skipped++; 105950615Sobrien goto unlock_and_continue; 106050615Sobrien } 106150615Sobrien } 106250615Sobrien 106350615Sobrien /* 106450615Sobrien * If a page is dirty, then it is either being washed 106550615Sobrien * (but not yet cleaned) or it is still in the 106650615Sobrien * laundry. If it is still in the laundry, then we 106750615Sobrien * start the cleaning operation. 106850615Sobrien * 106950615Sobrien * decrement page_shortage on success to account for 107050615Sobrien * the (future) cleaned page. Otherwise we could wind 107150615Sobrien * up laundering or cleaning too many pages. 107250615Sobrien */ 107350615Sobrien vm_page_unlock_queues(); 107450615Sobrien if (vm_pageout_clean(m) != 0) { 107550615Sobrien --page_shortage; 107650615Sobrien --maxlaunder; 107750615Sobrien } 107850615Sobrien vm_page_lock_queues(); 107950615Sobrienunlock_and_continue: 108050615Sobrien vm_page_lock_assert(m, MA_NOTOWNED); 108150615Sobrien VM_OBJECT_UNLOCK(object); 108250615Sobrien if (mp != NULL) { 108350615Sobrien vm_page_unlock_queues(); 108450615Sobrien if (vp != NULL) 108550615Sobrien vput(vp); 108650615Sobrien VFS_UNLOCK_GIANT(vfslocked); 108750615Sobrien vm_object_deallocate(object); 108850615Sobrien vn_finished_write(mp); 108950615Sobrien vm_page_lock_queues(); 109050615Sobrien } 109150615Sobrien next = TAILQ_NEXT(&marker, pageq); 109250615Sobrien TAILQ_REMOVE(&vm_page_queues[PQ_INACTIVE].pl, 109350615Sobrien &marker, pageq); 109450615Sobrien vm_page_lock_assert(m, MA_NOTOWNED); 109550615Sobrien continue; 109650615Sobrien } 109750615Sobrien vm_page_unlock(m); 109850615Sobrien VM_OBJECT_UNLOCK(object); 109950615Sobrien } 110050615Sobrien 110152558Sobrien /* 110250615Sobrien * Compute the number of pages we want to try to move from the 110350615Sobrien * active queue to the inactive queue. 110450615Sobrien */ 110550615Sobrien page_shortage = vm_paging_target() + 110650615Sobrien cnt.v_inactive_target - cnt.v_inactive_count; 110750615Sobrien page_shortage += addl_page_shortage; 110850615Sobrien 110918334Speter /* 111050615Sobrien * Scan the active queue for things we can deactivate. We nominally 111150615Sobrien * track the per-page activity counter and use it to locate 111250615Sobrien * deactivation candidates. 111350615Sobrien */ 111450615Sobrien pcount = cnt.v_active_count; 111550615Sobrien m = TAILQ_FIRST(&vm_page_queues[PQ_ACTIVE].pl); 111652558Sobrien mtx_assert(&vm_page_queue_mtx, MA_OWNED); 111752558Sobrien 111852558Sobrien while ((m != NULL) && (pcount-- > 0) && (page_shortage > 0)) { 111952558Sobrien 112052558Sobrien KASSERT(m->queue == PQ_ACTIVE, 112152558Sobrien ("vm_pageout_scan: page %p isn't active", m)); 112250615Sobrien 112352558Sobrien next = TAILQ_NEXT(m, pageq); 112450615Sobrien if ((m->flags & PG_MARKER) != 0) { 112552558Sobrien m = next; 112652558Sobrien continue; 112750615Sobrien } 112850615Sobrien if (!vm_pageout_page_lock(m, &next)) { 112952558Sobrien vm_page_unlock(m); 113052558Sobrien m = next; 113150615Sobrien continue; 113250615Sobrien } 113350615Sobrien object = m->object; 113452558Sobrien if (!VM_OBJECT_TRYLOCK(object) && 113550615Sobrien !vm_pageout_fallback_object_lock(m, &next)) { 113650615Sobrien VM_OBJECT_UNLOCK(object); 113750615Sobrien vm_page_unlock(m); 113850615Sobrien m = next; 113950615Sobrien continue; 114050615Sobrien } 114150615Sobrien 114250615Sobrien /* 114350615Sobrien * Don't deactivate pages that are busy. 114450615Sobrien */ 114550615Sobrien if ((m->busy != 0) || 114650615Sobrien (m->oflags & VPO_BUSY) || 114752558Sobrien (m->hold_count != 0)) { 114850615Sobrien vm_page_unlock(m); 114950615Sobrien VM_OBJECT_UNLOCK(object); 115050615Sobrien vm_page_requeue(m); 115150615Sobrien m = next; 115250615Sobrien continue; 115350615Sobrien } 115450615Sobrien 115550615Sobrien /* 115650615Sobrien * The count for pagedaemon pages is done after checking the 115750615Sobrien * page for eligibility... 115818334Speter */ 115950615Sobrien cnt.v_pdpages++; 116018334Speter 116150615Sobrien /* 116250615Sobrien * Check to see "how much" the page has been used. 116350615Sobrien */ 116450615Sobrien actcount = 0; 116550615Sobrien if (object->ref_count != 0) { 116650615Sobrien if (m->flags & PG_REFERENCED) { 116750615Sobrien actcount += 1; 116850615Sobrien } 116950615Sobrien actcount += pmap_ts_referenced(m); 117052558Sobrien if (actcount) { 117150615Sobrien m->act_count += ACT_ADVANCE + actcount; 117252558Sobrien if (m->act_count > ACT_MAX) 117350615Sobrien m->act_count = ACT_MAX; 117450615Sobrien } 117550615Sobrien } 117650615Sobrien 117750615Sobrien /* 117850615Sobrien * Since we have "tested" this bit, we need to clear it now. 117950615Sobrien */ 118050615Sobrien vm_page_flag_clear(m, PG_REFERENCED); 118152558Sobrien 118252558Sobrien /* 118352558Sobrien * Only if an object is currently being used, do we use the 118450615Sobrien * page activation count stats. 118550615Sobrien */ 118650615Sobrien if (actcount && (object->ref_count != 0)) { 118718334Speter vm_page_requeue(m); 118818334Speter } else { 118918334Speter m->act_count -= min(m->act_count, ACT_DECLINE); 119018334Speter if (vm_pageout_algorithm || 119118334Speter object->ref_count == 0 || 119218334Speter m->act_count == 0) { 119318334Speter page_shortage--; 119418334Speter if (object->ref_count == 0) { 119518334Speter KASSERT(!pmap_page_is_mapped(m), 119618334Speter ("vm_pageout_scan: page %p is mapped", m)); 119718334Speter if (m->dirty == 0) 119818334Speter vm_page_cache(m); 119918334Speter else 120018334Speter vm_page_deactivate(m); 120118334Speter } else { 120218334Speter vm_page_deactivate(m); 120318334Speter } 120418334Speter } else { 120518334Speter vm_page_requeue(m); 120618334Speter } 120718334Speter } 120818334Speter vm_page_unlock(m); 120918334Speter VM_OBJECT_UNLOCK(object); 121018334Speter m = next; 121118334Speter } 121218334Speter vm_page_unlock_queues(); 121318334Speter#if !defined(NO_SWAPPING) 121418334Speter /* 121518334Speter * Idle process swapout -- run once per second. 121618334Speter */ 121718334Speter if (vm_swap_idle_enabled) { 121818334Speter static long lsec; 121918334Speter if (time_second != lsec) { 122018334Speter vm_req_vmdaemon(VM_SWAP_IDLE); 122118334Speter lsec = time_second; 122218334Speter } 122318334Speter } 122418334Speter#endif 122518334Speter 122618334Speter /* 122718334Speter * If we didn't get enough free pages, and we have skipped a vnode 122818334Speter * in a writeable object, wakeup the sync daemon. And kick swapout 122918334Speter * if we did not get enough free pages. 123018334Speter */ 123118334Speter if (vm_paging_target() > 0) { 123218334Speter if (vnodes_skipped && vm_page_count_min()) 123318334Speter (void) speedup_syncer(); 123418334Speter#if !defined(NO_SWAPPING) 123518334Speter if (vm_swap_enabled && vm_page_count_target()) 123618334Speter vm_req_vmdaemon(VM_SWAP_NORMAL); 123718334Speter#endif 123818334Speter } 123918334Speter 124018334Speter /* 124118334Speter * If we are critically low on one of RAM or swap and low on 124218334Speter * the other, kill the largest process. However, we avoid 124318334Speter * doing this on the first pass in order to give ourselves a 124418334Speter * chance to flush out dirty vnode-backed pages and to allow 124518334Speter * active pages to be moved to the inactive queue and reclaimed. 124618334Speter */ 124718334Speter if (pass != 0 && 124818334Speter ((swap_pager_avail < 64 && vm_page_count_min()) || 124918334Speter (swap_pager_full && vm_paging_target() > 0))) 125018334Speter vm_pageout_oom(VM_OOM_MEM); 125118334Speter} 125218334Speter 125350615Sobrien 125418334Spetervoid 125550615Sobrienvm_pageout_oom(int shortage) 125650615Sobrien{ 125750615Sobrien struct proc *p, *bigproc; 125850615Sobrien vm_offset_t size, bigsize; 125950615Sobrien struct thread *td; 126050615Sobrien struct vmspace *vm; 126150615Sobrien 126250615Sobrien /* 126350615Sobrien * We keep the process bigproc locked once we find it to keep anyone 126450615Sobrien * from messing with it; however, there is a possibility of 126550615Sobrien * deadlock if process B is bigproc and one of it's child processes 126650615Sobrien * attempts to propagate a signal to B while we are waiting for A's 126750615Sobrien * lock while walking this list. To avoid this, we don't block on 126818334Speter * the process lock but just skip a process if it is already locked. 126918334Speter */ 127018334Speter bigproc = NULL; 127118334Speter bigsize = 0; 127218334Speter sx_slock(&allproc_lock); 127318334Speter FOREACH_PROC_IN_SYSTEM(p) { 127418334Speter int breakout; 127550615Sobrien 127618334Speter if (PROC_TRYLOCK(p) == 0) 127752558Sobrien continue; 127852558Sobrien /* 127952558Sobrien * If this is a system, protected or killed process, skip it. 128052558Sobrien */ 128152558Sobrien if ((p->p_flag & (P_INEXEC | P_PROTECTED | P_SYSTEM)) || 128252558Sobrien (p->p_pid == 1) || P_KILLED(p) || 128352558Sobrien ((p->p_pid < 48) && (swap_pager_avail != 0))) { 128452558Sobrien PROC_UNLOCK(p); 128552558Sobrien continue; 128652558Sobrien } 128752558Sobrien /* 128852558Sobrien * If the process is in a non-running type state, 128952558Sobrien * don't touch it. Check all the threads individually. 129052558Sobrien */ 129152558Sobrien breakout = 0; 129252558Sobrien FOREACH_THREAD_IN_PROC(p, td) { 129352558Sobrien thread_lock(td); 129452558Sobrien if (!TD_ON_RUNQ(td) && 129552558Sobrien !TD_IS_RUNNING(td) && 129652558Sobrien !TD_IS_SLEEPING(td)) { 129752558Sobrien thread_unlock(td); 129852558Sobrien breakout = 1; 129952558Sobrien break; 130052558Sobrien } 130152558Sobrien thread_unlock(td); 130252558Sobrien } 130352558Sobrien if (breakout) { 130452558Sobrien PROC_UNLOCK(p); 130552558Sobrien continue; 130652558Sobrien } 130752558Sobrien /* 130852558Sobrien * get the process size 130918334Speter */ 131018334Speter vm = vmspace_acquire_ref(p); 131118334Speter if (vm == NULL) { 131218334Speter PROC_UNLOCK(p); 131318334Speter continue; 131418334Speter } 131518334Speter if (!vm_map_trylock_read(&vm->vm_map)) { 131650615Sobrien vmspace_free(vm); 131718334Speter PROC_UNLOCK(p); 131818334Speter continue; 131950615Sobrien } 132018334Speter size = vmspace_swap_count(vm); 132118334Speter vm_map_unlock_read(&vm->vm_map); 132250615Sobrien if (shortage == VM_OOM_MEM) 132318334Speter size += vmspace_resident_count(vm); 132418334Speter vmspace_free(vm); 132518334Speter /* 132652558Sobrien * if the this process is bigger than the biggest one 132718334Speter * remember it. 132850615Sobrien */ 132918334Speter if (size > bigsize) { 133050615Sobrien if (bigproc != NULL) 133118334Speter PROC_UNLOCK(bigproc); 133218334Speter bigproc = p; 133318334Speter bigsize = size; 133418334Speter } else 133518334Speter PROC_UNLOCK(p); 133618334Speter } 133718334Speter sx_sunlock(&allproc_lock); 133818334Speter if (bigproc != NULL) { 133952558Sobrien killproc(bigproc, "out of swap space"); 134018334Speter sched_nice(bigproc, PRIO_MIN); 134118334Speter PROC_UNLOCK(bigproc); 134218334Speter wakeup(&cnt.v_free_count); 134318334Speter } 134450615Sobrien} 134550615Sobrien 134650615Sobrien/* 134750615Sobrien * This routine tries to maintain the pseudo LRU active queue, 134852558Sobrien * so that during long periods of time where there is no paging, 134918334Speter * that some statistic accumulation still occurs. This code 135018334Speter * helps the situation where paging just starts to occur. 135118334Speter */ 135218334Speterstatic void 135318334Spetervm_pageout_page_stats() 135450615Sobrien{ 135550615Sobrien vm_object_t object; 135650615Sobrien vm_page_t m,next; 135750615Sobrien int pcount,tpcount; /* Number of pages to check */ 135850615Sobrien static int fullintervalcount = 0; 135950615Sobrien int page_shortage; 136050615Sobrien 136150615Sobrien page_shortage = 136250615Sobrien (cnt.v_inactive_target + cnt.v_cache_max + cnt.v_free_min) - 136350615Sobrien (cnt.v_free_count + cnt.v_inactive_count + cnt.v_cache_count); 136418334Speter 136550615Sobrien if (page_shortage <= 0) 136650615Sobrien return; 136750615Sobrien 136850615Sobrien vm_page_lock_queues(); 136950615Sobrien pcount = cnt.v_active_count; 137050615Sobrien fullintervalcount += vm_pageout_stats_interval; 137150615Sobrien if (fullintervalcount < vm_pageout_full_stats_interval) { 137250615Sobrien tpcount = (int64_t)vm_pageout_stats_max * cnt.v_active_count / 137350615Sobrien cnt.v_page_count; 137450615Sobrien if (pcount > tpcount) 137550615Sobrien pcount = tpcount; 137650615Sobrien } else { 137750615Sobrien fullintervalcount = 0; 137850615Sobrien } 137918334Speter 138018334Speter m = TAILQ_FIRST(&vm_page_queues[PQ_ACTIVE].pl); 138150615Sobrien while ((m != NULL) && (pcount-- > 0)) { 138250615Sobrien int actcount; 138350615Sobrien 138450615Sobrien KASSERT(m->queue == PQ_ACTIVE, 138550615Sobrien ("vm_pageout_page_stats: page %p isn't active", m)); 138650615Sobrien 138718334Speter next = TAILQ_NEXT(m, pageq); 138850615Sobrien if ((m->flags & PG_MARKER) != 0) { 138950615Sobrien m = next; 139050615Sobrien continue; 139150615Sobrien } 139250615Sobrien vm_page_lock_assert(m, MA_NOTOWNED); 139350615Sobrien if (!vm_pageout_page_lock(m, &next)) { 139450615Sobrien vm_page_unlock(m); 139550615Sobrien m = next; 139650615Sobrien continue; 139750615Sobrien } 139850615Sobrien object = m->object; 139950615Sobrien if (!VM_OBJECT_TRYLOCK(object) && 140050615Sobrien !vm_pageout_fallback_object_lock(m, &next)) { 140150615Sobrien VM_OBJECT_UNLOCK(object); 140250615Sobrien vm_page_unlock(m); 140350615Sobrien m = next; 140418334Speter continue; 140518334Speter } 140618334Speter 140718334Speter /* 140818334Speter * Don't deactivate pages that are busy. 140918334Speter */ 141018334Speter if ((m->busy != 0) || 141152558Sobrien (m->oflags & VPO_BUSY) || 141218334Speter (m->hold_count != 0)) { 141318334Speter vm_page_unlock(m); 141418334Speter VM_OBJECT_UNLOCK(object); 141518334Speter vm_page_requeue(m); 141618334Speter m = next; 141718334Speter continue; 141818334Speter } 141918334Speter 142018334Speter actcount = 0; 142118334Speter if (m->flags & PG_REFERENCED) { 142218334Speter vm_page_flag_clear(m, PG_REFERENCED); 142318334Speter actcount += 1; 142418334Speter } 142518334Speter 142618334Speter actcount += pmap_ts_referenced(m); 142718334Speter if (actcount) { 142818334Speter m->act_count += ACT_ADVANCE + actcount; 142918334Speter if (m->act_count > ACT_MAX) 143018334Speter m->act_count = ACT_MAX; 143118334Speter vm_page_requeue(m); 143218334Speter } else { 143318334Speter if (m->act_count == 0) { 143418334Speter /* 143518334Speter * We turn off page access, so that we have 143652558Sobrien * more accurate RSS stats. We don't do this 143718334Speter * in the normal page deactivation when the 143818334Speter * system is loaded VM wise, because the 143918334Speter * cost of the large number of page protect 144018334Speter * operations would be higher than the value 144118334Speter * of doing the operation. 144218334Speter */ 144318334Speter pmap_remove_all(m); 144418334Speter vm_page_deactivate(m); 144518334Speter } else { 144618334Speter m->act_count -= min(m->act_count, ACT_DECLINE); 144718334Speter vm_page_requeue(m); 144818334Speter } 144918334Speter } 145052558Sobrien vm_page_unlock(m); 145118334Speter VM_OBJECT_UNLOCK(object); 145218334Speter m = next; 145318334Speter } 145418334Speter vm_page_unlock_queues(); 145518334Speter} 145618334Speter 145718334Speter/* 145818334Speter * vm_pageout is the high level pageout daemon. 145952558Sobrien */ 146018334Speterstatic void 146152558Sobrienvm_pageout() 146218334Speter{ 146318334Speter int error, pass; 146418334Speter 146518334Speter /* 146618334Speter * Initialize some paging parameters. 146718334Speter */ 146818334Speter cnt.v_interrupt_free_min = 2; 146952558Sobrien if (cnt.v_page_count < 2000) 147018334Speter vm_pageout_page_count = 8; 147152558Sobrien 147252558Sobrien /* 147352558Sobrien * v_free_reserved needs to include enough for the largest 147452558Sobrien * swap pager structures plus enough for any pv_entry structs 147552558Sobrien * when paging. 147652558Sobrien */ 147752558Sobrien if (cnt.v_page_count > 1024) 147852558Sobrien cnt.v_free_min = 4 + (cnt.v_page_count - 1024) / 200; 147952558Sobrien else 148052558Sobrien cnt.v_free_min = 4; 148152558Sobrien cnt.v_pageout_free_min = (2*MAXBSIZE)/PAGE_SIZE + 148252558Sobrien cnt.v_interrupt_free_min; 148352558Sobrien cnt.v_free_reserved = vm_pageout_page_count + 148452558Sobrien cnt.v_pageout_free_min + (cnt.v_page_count / 768); 148550615Sobrien cnt.v_free_severe = cnt.v_free_min / 2; 148652558Sobrien cnt.v_free_min += cnt.v_free_reserved; 148718334Speter cnt.v_free_severe += cnt.v_free_reserved; 148818334Speter 148918334Speter /* 149018334Speter * v_free_target and v_cache_min control pageout hysteresis. Note 149118334Speter * that these are more a measure of the VM cache queue hysteresis 149218334Speter * then the VM free queue. Specifically, v_free_target is the 149318334Speter * high water mark (free+cache pages). 149418334Speter * 149518334Speter * v_free_reserved + v_cache_min (mostly means v_cache_min) is the 149618334Speter * low water mark, while v_free_min is the stop. v_cache_min must 149718334Speter * be big enough to handle memory needs while the pageout daemon 149818334Speter * is signalled and run to free more pages. 149918334Speter */ 150018334Speter if (cnt.v_free_count > 6144) 150118334Speter cnt.v_free_target = 4 * cnt.v_free_min + cnt.v_free_reserved; 150218334Speter else 150318334Speter cnt.v_free_target = 2 * cnt.v_free_min + cnt.v_free_reserved; 150418334Speter 150518334Speter if (cnt.v_free_count > 2048) { 150650615Sobrien cnt.v_cache_min = cnt.v_free_target; 150718334Speter cnt.v_cache_max = 2 * cnt.v_cache_min; 150852558Sobrien cnt.v_inactive_target = (3 * cnt.v_free_target) / 2; 150918334Speter } else { 151018334Speter cnt.v_cache_min = 0; 151118334Speter cnt.v_cache_max = 0; 151218334Speter cnt.v_inactive_target = cnt.v_free_count / 4; 151318334Speter } 151418334Speter if (cnt.v_inactive_target > cnt.v_free_count / 3) 151518334Speter cnt.v_inactive_target = cnt.v_free_count / 3; 151618334Speter 151718334Speter /* XXX does not really belong here */ 151818334Speter if (vm_page_max_wired == 0) 151918334Speter vm_page_max_wired = cnt.v_free_count / 3; 152018334Speter 152118334Speter if (vm_pageout_stats_max == 0) 152218334Speter vm_pageout_stats_max = cnt.v_free_target; 152318334Speter 152418334Speter /* 152518334Speter * Set interval in seconds for stats scan. 152618334Speter */ 152718334Speter if (vm_pageout_stats_interval == 0) 152818334Speter vm_pageout_stats_interval = 5; 152918334Speter if (vm_pageout_full_stats_interval == 0) 153018334Speter vm_pageout_full_stats_interval = vm_pageout_stats_interval * 4; 153118334Speter 153218334Speter swap_pager_swap_init(); 153318334Speter pass = 0; 153418334Speter /* 153550615Sobrien * The pageout daemon is never done, so loop forever. 153618334Speter */ 153718334Speter while (TRUE) { 153818334Speter /* 153918334Speter * If we have enough free memory, wakeup waiters. Do 154018334Speter * not clear vm_pages_needed until we reach our target, 154118334Speter * otherwise we may be woken up over and over again and 154218334Speter * waste a lot of cpu. 154318334Speter */ 154418334Speter mtx_lock(&vm_page_queue_free_mtx); 154518334Speter if (vm_pages_needed && !vm_page_count_min()) { 154618334Speter if (!vm_paging_needed()) 154752558Sobrien vm_pages_needed = 0; 154818334Speter wakeup(&cnt.v_free_count); 154918334Speter } 155018334Speter if (vm_pages_needed) { 155118334Speter /* 155218334Speter * Still not done, take a second pass without waiting 155318334Speter * (unlimited dirty cleaning), otherwise sleep a bit 155418334Speter * and try again. 155552558Sobrien */ 155618334Speter ++pass; 155752558Sobrien if (pass > 1) 155852558Sobrien msleep(&vm_pages_needed, 155952558Sobrien &vm_page_queue_free_mtx, PVM, "psleep", 156052558Sobrien hz / 2); 156118334Speter } else { 156218334Speter /* 156318334Speter * Good enough, sleep & handle stats. Prime the pass 156418334Speter * for the next run. 156518334Speter */ 156618334Speter if (pass > 1) 156750615Sobrien pass = 1; 156818334Speter else 156952558Sobrien pass = 0; 157052558Sobrien error = msleep(&vm_pages_needed, 157118334Speter &vm_page_queue_free_mtx, PVM, "psleep", 157218334Speter vm_pageout_stats_interval * hz); 157352558Sobrien if (error && !vm_pages_needed) { 157452558Sobrien mtx_unlock(&vm_page_queue_free_mtx); 157518334Speter pass = 0; 157618334Speter vm_pageout_page_stats(); 157718334Speter continue; 157852558Sobrien } 157918334Speter } 158018334Speter if (vm_pages_needed) 158118334Speter cnt.v_pdwakeups++; 158218334Speter mtx_unlock(&vm_page_queue_free_mtx); 158318334Speter vm_pageout_scan(pass); 158418334Speter } 158518334Speter} 158618334Speter 158718334Speter/* 158818334Speter * Unless the free page queue lock is held by the caller, this function 158952558Sobrien * should be regarded as advisory. Specifically, the caller should 159018334Speter * not msleep() on &cnt.v_free_count following this function unless 159118334Speter * the free page queue lock is held until the msleep() is performed. 159252558Sobrien */ 159352558Sobrienvoid 159452558Sobrienpagedaemon_wakeup() 159552558Sobrien{ 159618334Speter 159718334Speter if (!vm_pages_needed && curthread->td_proc != pageproc) { 159818334Speter vm_pages_needed = 1; 159952558Sobrien wakeup(&vm_pages_needed); 160052558Sobrien } 160118334Speter} 160218334Speter 160318334Speter#if !defined(NO_SWAPPING) 160418334Speterstatic void 160518334Spetervm_req_vmdaemon(int req) 160652558Sobrien{ 160752558Sobrien static int lastrun = 0; 160852558Sobrien 160918334Speter mtx_lock(&vm_daemon_mtx); 161018334Speter vm_pageout_req_swapout |= req; 161152558Sobrien if ((ticks > (lastrun + hz)) || (ticks < lastrun)) { 161252558Sobrien wakeup(&vm_daemon_needed); 161318334Speter lastrun = ticks; 161452558Sobrien } 161552558Sobrien mtx_unlock(&vm_daemon_mtx); 161652558Sobrien} 161752558Sobrien 161852558Sobrienstatic void 161952558Sobrienvm_daemon() 162052558Sobrien{ 162152558Sobrien struct rlimit rsslim; 162252558Sobrien struct proc *p; 162352558Sobrien struct thread *td; 162452558Sobrien struct vmspace *vm; 162552558Sobrien int breakout, swapout_flags; 162652558Sobrien 162752558Sobrien while (TRUE) { 162852558Sobrien mtx_lock(&vm_daemon_mtx); 162952558Sobrien msleep(&vm_daemon_needed, &vm_daemon_mtx, PPAUSE, "psleep", 0); 163018334Speter swapout_flags = vm_pageout_req_swapout; 163118334Speter vm_pageout_req_swapout = 0; 163252558Sobrien mtx_unlock(&vm_daemon_mtx); 163352558Sobrien if (swapout_flags) 163452558Sobrien swapout_procs(swapout_flags); 163552558Sobrien 163652558Sobrien /* 163752558Sobrien * scan the processes for exceeding their rlimits or if 163852558Sobrien * process is swapped out -- deactivate pages 163952558Sobrien */ 164018334Speter sx_slock(&allproc_lock); 164152558Sobrien FOREACH_PROC_IN_SYSTEM(p) { 164252558Sobrien vm_pindex_t limit, size; 164352558Sobrien 164452558Sobrien /* 164552558Sobrien * if this is a system process or if we have already 164652558Sobrien * looked at this process, skip it. 164752558Sobrien */ 164852558Sobrien PROC_LOCK(p); 164952558Sobrien if (p->p_flag & (P_INEXEC | P_SYSTEM | P_WEXIT)) { 165052558Sobrien PROC_UNLOCK(p); 165152558Sobrien continue; 165252558Sobrien } 165352558Sobrien /* 165418334Speter * if the process is in a non-running type state, 165552558Sobrien * don't touch it. 165618334Speter */ 165718334Speter breakout = 0; 165852558Sobrien FOREACH_THREAD_IN_PROC(p, td) { 165918334Speter thread_lock(td); 166018334Speter if (!TD_ON_RUNQ(td) && 166118334Speter !TD_IS_RUNNING(td) && 166218334Speter !TD_IS_SLEEPING(td)) { 166318334Speter thread_unlock(td); 166418334Speter breakout = 1; 166552558Sobrien break; 166652558Sobrien } 166752558Sobrien thread_unlock(td); 166852558Sobrien } 166952558Sobrien if (breakout) { 167052558Sobrien PROC_UNLOCK(p); 167152558Sobrien continue; 167252558Sobrien } 167352558Sobrien /* 167452558Sobrien * get a limit 167552558Sobrien */ 167652558Sobrien lim_rlimit(p, RLIMIT_RSS, &rsslim); 167752558Sobrien limit = OFF_TO_IDX( 167852558Sobrien qmin(rsslim.rlim_cur, rsslim.rlim_max)); 167952558Sobrien 168052558Sobrien /* 168152558Sobrien * let processes that are swapped out really be 168252558Sobrien * swapped out set the limit to nothing (will force a 168352558Sobrien * swap-out.) 168452558Sobrien */ 168552558Sobrien if ((p->p_flag & P_INMEM) == 0) 168652558Sobrien limit = 0; /* XXX */ 168752558Sobrien vm = vmspace_acquire_ref(p); 168852558Sobrien PROC_UNLOCK(p); 168952558Sobrien if (vm == NULL) 169052558Sobrien continue; 169152558Sobrien 169252558Sobrien size = vmspace_resident_count(vm); 169352558Sobrien if (limit >= 0 && size >= limit) { 169452558Sobrien vm_pageout_map_deactivate_pages( 169518334Speter &vm->vm_map, limit); 169618334Speter } 169718334Speter vmspace_free(vm); 169818334Speter } 169918334Speter sx_sunlock(&allproc_lock); 170018334Speter } 170152558Sobrien} 170218334Speter#endif /* !defined(NO_SWAPPING) */ 170352558Sobrien