vm_pageout.c revision 8692
1/*
2 * Copyright (c) 1991 Regents of the University of California.
3 * All rights reserved.
4 * Copyright (c) 1994 John S. Dyson
5 * All rights reserved.
6 * Copyright (c) 1994 David Greenman
7 * All rights reserved.
8 *
9 * This code is derived from software contributed to Berkeley by
10 * The Mach Operating System project at Carnegie-Mellon University.
11 *
12 * Redistribution and use in source and binary forms, with or without
13 * modification, are permitted provided that the following conditions
14 * are met:
15 * 1. Redistributions of source code must retain the above copyright
16 *    notice, this list of conditions and the following disclaimer.
17 * 2. Redistributions in binary form must reproduce the above copyright
18 *    notice, this list of conditions and the following disclaimer in the
19 *    documentation and/or other materials provided with the distribution.
20 * 3. All advertising materials mentioning features or use of this software
21 *    must display the following acknowledgement:
22 *	This product includes software developed by the University of
23 *	California, Berkeley and its contributors.
24 * 4. Neither the name of the University nor the names of its contributors
25 *    may be used to endorse or promote products derived from this software
26 *    without specific prior written permission.
27 *
28 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
29 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
30 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
31 * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
32 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
33 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
34 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
35 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
36 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
37 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
38 * SUCH DAMAGE.
39 *
40 *	from: @(#)vm_pageout.c	7.4 (Berkeley) 5/7/91
41 *
42 *
43 * Copyright (c) 1987, 1990 Carnegie-Mellon University.
44 * All rights reserved.
45 *
46 * Authors: Avadis Tevanian, Jr., Michael Wayne Young
47 *
48 * Permission to use, copy, modify and distribute this software and
49 * its documentation is hereby granted, provided that both the copyright
50 * notice and this permission notice appear in all copies of the
51 * software, derivative works or modified versions, and any portions
52 * thereof, and that both notices appear in supporting documentation.
53 *
54 * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
55 * CONDITION.  CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND
56 * FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
57 *
58 * Carnegie Mellon requests users of this software to return to
59 *
60 *  Software Distribution Coordinator  or  Software.Distribution@CS.CMU.EDU
61 *  School of Computer Science
62 *  Carnegie Mellon University
63 *  Pittsburgh PA 15213-3890
64 *
65 * any improvements or extensions that they make and grant Carnegie the
66 * rights to redistribute these changes.
67 *
68 * $Id: vm_pageout.c,v 1.49 1995/05/10 18:56:06 davidg Exp $
69 */
70
71/*
72 *	The proverbial page-out daemon.
73 */
74
75#include <sys/param.h>
76#include <sys/systm.h>
77#include <sys/kernel.h>
78#include <sys/proc.h>
79#include <sys/resourcevar.h>
80#include <sys/malloc.h>
81#include <sys/kernel.h>
82#include <sys/signalvar.h>
83#include <sys/vnode.h>
84
85#include <vm/vm.h>
86#include <vm/vm_page.h>
87#include <vm/vm_pageout.h>
88#include <vm/vm_kern.h>
89#include <vm/swap_pager.h>
90#include <vm/vnode_pager.h>
91
92int vm_pages_needed;		/* Event on which pageout daemon sleeps */
93
94int vm_pageout_pages_needed;	/* flag saying that the pageout daemon needs pages */
95
96extern int npendingio;
97int vm_pageout_req_swapout;	/* XXX */
98int vm_daemon_needed;
99extern int nswiodone;
100extern int swap_pager_full;
101extern int vm_swap_size;
102extern int vfs_update_wakeup;
103
104#define MAXSCAN 1024		/* maximum number of pages to scan in queues */
105
106#define MAXLAUNDER (cnt.v_page_count > 1800 ? 32 : 16)
107
108#define VM_PAGEOUT_PAGE_COUNT 8
109int vm_pageout_page_count = VM_PAGEOUT_PAGE_COUNT;
110
111int vm_page_max_wired;		/* XXX max # of wired pages system-wide */
112
113/*
114 * vm_pageout_clean:
115 * 	cleans a vm_page
116 */
117int
118vm_pageout_clean(m, sync)
119	register vm_page_t m;
120	int sync;
121{
122	/*
123	 * Clean the page and remove it from the laundry.
124	 *
125	 * We set the busy bit to cause potential page faults on this page to
126	 * block.
127	 *
128	 * And we set pageout-in-progress to keep the object from disappearing
129	 * during pageout.  This guarantees that the page won't move from the
130	 * inactive queue.  (However, any other page on the inactive queue may
131	 * move!)
132	 */
133
134	register vm_object_t object;
135	register vm_pager_t pager;
136	int pageout_status[VM_PAGEOUT_PAGE_COUNT];
137	vm_page_t ms[VM_PAGEOUT_PAGE_COUNT], mb[VM_PAGEOUT_PAGE_COUNT];
138	int pageout_count, b_pageout_count;
139	int anyok = 0;
140	int i;
141	vm_offset_t offset = m->offset;
142
143	object = m->object;
144	if (!object) {
145		printf("pager: object missing\n");
146		return 0;
147	}
148	if (!object->pager && (object->flags & OBJ_INTERNAL) == 0) {
149		printf("pager: non internal obj without pager\n");
150	}
151	/*
152	 * Try to collapse the object before making a pager for it.  We must
153	 * unlock the page queues first. We try to defer the creation of a
154	 * pager until all shadows are not paging.  This allows
155	 * vm_object_collapse to work better and helps control swap space
156	 * size. (J. Dyson 11 Nov 93)
157	 */
158
159	if (!object->pager &&
160	    (cnt.v_free_count + cnt.v_cache_count) < cnt.v_pageout_free_min)
161		return 0;
162
163	if ((!sync && m->hold_count != 0) ||
164	    ((m->busy != 0) || (m->flags & PG_BUSY)))
165		return 0;
166
167	if (!sync && object->shadow) {
168		vm_object_collapse(object);
169	}
170	pageout_count = 1;
171	ms[0] = m;
172
173	pager = object->pager;
174	if (pager) {
175		for (i = 1; i < vm_pageout_page_count; i++) {
176			vm_page_t mt;
177
178			ms[i] = mt = vm_page_lookup(object, offset + i * NBPG);
179			if (mt) {
180				if (mt->flags & (PG_BUSY|PG_CACHE) || mt->busy)
181					break;
182				/*
183				 * we can cluster ONLY if: ->> the page is NOT
184				 * busy, and is NOT clean the page is not
185				 * wired, busy, held, or mapped into a buffer.
186				 * and one of the following: 1) The page is
187				 * inactive, or a seldom used active page. 2)
188				 * or we force the issue.
189				 */
190				vm_page_test_dirty(mt);
191				if ((mt->dirty & mt->valid) != 0
192				    && ((mt->flags & PG_INACTIVE) ||
193						(sync == VM_PAGEOUT_FORCE))
194				    && (mt->wire_count == 0)
195				    && (mt->hold_count == 0))
196					pageout_count++;
197				else
198					break;
199			} else
200				break;
201		}
202
203		if ((pageout_count < vm_pageout_page_count) && (offset != 0)) {
204			b_pageout_count = 0;
205			for (i = 0; i < vm_pageout_page_count-pageout_count; i++) {
206				vm_page_t mt;
207
208				mt = vm_page_lookup(object, offset - (i + 1) * NBPG);
209				if (mt) {
210					if (mt->flags & (PG_BUSY|PG_CACHE) || mt->busy)
211						break;
212					vm_page_test_dirty(mt);
213					if ((mt->dirty & mt->valid) != 0
214					    && ((mt->flags & PG_INACTIVE) ||
215							(sync == VM_PAGEOUT_FORCE))
216					    && (mt->wire_count == 0)
217					    && (mt->hold_count == 0)) {
218						mb[b_pageout_count] = mt;
219						b_pageout_count++;
220						if ((offset - (i + 1) * NBPG) == 0)
221							break;
222					} else
223						break;
224				} else
225					break;
226			}
227			if (b_pageout_count > 0) {
228				for(i=pageout_count - 1;i>=0;--i) {
229					ms[i+b_pageout_count] = ms[i];
230				}
231				for(i=0;i<b_pageout_count;i++) {
232					ms[i] = mb[b_pageout_count - (i + 1)];
233				}
234				pageout_count += b_pageout_count;
235			}
236		}
237
238		/*
239		 * we allow reads during pageouts...
240		 */
241		for (i = 0; i < pageout_count; i++) {
242			ms[i]->flags |= PG_BUSY;
243			vm_page_protect(ms[i], VM_PROT_READ);
244		}
245		object->paging_in_progress += pageout_count;
246	} else {
247
248		m->flags |= PG_BUSY;
249
250		vm_page_protect(m, VM_PROT_READ);
251
252		object->paging_in_progress++;
253
254		pager = vm_pager_allocate(PG_DFLT, 0,
255		    object->size, VM_PROT_ALL, 0);
256		if (pager != NULL) {
257			object->pager = pager;
258		}
259	}
260
261	/*
262	 * If there is no pager for the page, use the default pager.  If
263	 * there's no place to put the page at the moment, leave it in the
264	 * laundry and hope that there will be paging space later.
265	 */
266
267	if ((pager && pager->pg_type == PG_SWAP) ||
268	    (cnt.v_free_count + cnt.v_cache_count) >= cnt.v_pageout_free_min) {
269		if (pageout_count == 1) {
270			pageout_status[0] = pager ?
271			    vm_pager_put(pager, m,
272			    ((sync || (object == kernel_object)) ? TRUE : FALSE)) :
273			    VM_PAGER_FAIL;
274		} else {
275			if (!pager) {
276				for (i = 0; i < pageout_count; i++)
277					pageout_status[i] = VM_PAGER_FAIL;
278			} else {
279				vm_pager_put_pages(pager, ms, pageout_count,
280				    ((sync || (object == kernel_object)) ? TRUE : FALSE),
281				    pageout_status);
282			}
283		}
284	} else {
285		for (i = 0; i < pageout_count; i++)
286			pageout_status[i] = VM_PAGER_FAIL;
287	}
288
289	for (i = 0; i < pageout_count; i++) {
290		switch (pageout_status[i]) {
291		case VM_PAGER_OK:
292			++anyok;
293			break;
294		case VM_PAGER_PEND:
295			++anyok;
296			break;
297		case VM_PAGER_BAD:
298			/*
299			 * Page outside of range of object. Right now we
300			 * essentially lose the changes by pretending it
301			 * worked.
302			 */
303			pmap_clear_modify(VM_PAGE_TO_PHYS(ms[i]));
304			ms[i]->dirty = 0;
305			break;
306		case VM_PAGER_ERROR:
307		case VM_PAGER_FAIL:
308			/*
309			 * If page couldn't be paged out, then reactivate the
310			 * page so it doesn't clog the inactive list.  (We
311			 * will try paging out it again later).
312			 */
313			if (ms[i]->flags & PG_INACTIVE)
314				vm_page_activate(ms[i]);
315			break;
316		case VM_PAGER_AGAIN:
317			break;
318		}
319
320
321		/*
322		 * If the operation is still going, leave the page busy to
323		 * block all other accesses. Also, leave the paging in
324		 * progress indicator set so that we don't attempt an object
325		 * collapse.
326		 */
327		if (pageout_status[i] != VM_PAGER_PEND) {
328			vm_object_pip_wakeup(object);
329			if ((ms[i]->flags & (PG_REFERENCED|PG_WANTED)) ||
330			    pmap_is_referenced(VM_PAGE_TO_PHYS(ms[i]))) {
331				pmap_clear_reference(VM_PAGE_TO_PHYS(ms[i]));
332				ms[i]->flags &= ~PG_REFERENCED;
333				if (ms[i]->flags & PG_INACTIVE)
334					vm_page_activate(ms[i]);
335			}
336			PAGE_WAKEUP(ms[i]);
337		}
338	}
339	return anyok;
340}
341
342/*
343 *	vm_pageout_object_deactivate_pages
344 *
345 *	deactivate enough pages to satisfy the inactive target
346 *	requirements or if vm_page_proc_limit is set, then
347 *	deactivate all of the pages in the object and its
348 *	shadows.
349 *
350 *	The object and map must be locked.
351 */
352int
353vm_pageout_object_deactivate_pages(map, object, count, map_remove_only)
354	vm_map_t map;
355	vm_object_t object;
356	int count;
357	int map_remove_only;
358{
359	register vm_page_t p, next;
360	int rcount;
361	int dcount;
362
363	dcount = 0;
364	if (count == 0)
365		count = 1;
366
367	if (object->pager && (object->pager->pg_type == PG_DEVICE))
368		return 0;
369
370	if (object->shadow) {
371		if (object->shadow->ref_count == 1)
372			dcount += vm_pageout_object_deactivate_pages(map, object->shadow, count / 2 + 1, map_remove_only);
373		else
374			vm_pageout_object_deactivate_pages(map, object->shadow, count, 1);
375	}
376	if (object->paging_in_progress || !vm_object_lock_try(object))
377		return dcount;
378
379	/*
380	 * scan the objects entire memory queue
381	 */
382	rcount = object->resident_page_count;
383	p = object->memq.tqh_first;
384	while (p && (rcount-- > 0)) {
385		next = p->listq.tqe_next;
386		cnt.v_pdpages++;
387		vm_page_lock_queues();
388		if (p->wire_count != 0 ||
389		    p->hold_count != 0 ||
390		    p->busy != 0 ||
391		    !pmap_page_exists(vm_map_pmap(map), VM_PAGE_TO_PHYS(p))) {
392			p = next;
393			continue;
394		}
395		/*
396		 * if a page is active, not wired and is in the processes
397		 * pmap, then deactivate the page.
398		 */
399		if ((p->flags & (PG_ACTIVE | PG_BUSY)) == PG_ACTIVE) {
400			if (!pmap_is_referenced(VM_PAGE_TO_PHYS(p)) &&
401			    (p->flags & (PG_REFERENCED|PG_WANTED)) == 0) {
402				p->act_count -= min(p->act_count, ACT_DECLINE);
403				/*
404				 * if the page act_count is zero -- then we
405				 * deactivate
406				 */
407				if (!p->act_count) {
408					if (!map_remove_only)
409						vm_page_deactivate(p);
410					vm_page_protect(p, VM_PROT_NONE);
411					/*
412					 * else if on the next go-around we
413					 * will deactivate the page we need to
414					 * place the page on the end of the
415					 * queue to age the other pages in
416					 * memory.
417					 */
418				} else {
419					TAILQ_REMOVE(&vm_page_queue_active, p, pageq);
420					TAILQ_INSERT_TAIL(&vm_page_queue_active, p, pageq);
421				}
422				/*
423				 * see if we are done yet
424				 */
425				if (p->flags & PG_INACTIVE) {
426					--count;
427					++dcount;
428					if (count <= 0 &&
429					    cnt.v_inactive_count > cnt.v_inactive_target) {
430						vm_page_unlock_queues();
431						vm_object_unlock(object);
432						return dcount;
433					}
434				}
435			} else {
436				/*
437				 * Move the page to the bottom of the queue.
438				 */
439				pmap_clear_reference(VM_PAGE_TO_PHYS(p));
440				p->flags &= ~PG_REFERENCED;
441				if (p->act_count < ACT_MAX)
442					p->act_count += ACT_ADVANCE;
443
444				TAILQ_REMOVE(&vm_page_queue_active, p, pageq);
445				TAILQ_INSERT_TAIL(&vm_page_queue_active, p, pageq);
446			}
447		} else if ((p->flags & (PG_INACTIVE | PG_BUSY)) == PG_INACTIVE) {
448			vm_page_protect(p, VM_PROT_NONE);
449		}
450		vm_page_unlock_queues();
451		p = next;
452	}
453	vm_object_unlock(object);
454	return dcount;
455}
456
457
458/*
459 * deactivate some number of pages in a map, try to do it fairly, but
460 * that is really hard to do.
461 */
462
463void
464vm_pageout_map_deactivate_pages(map, entry, count, freeer)
465	vm_map_t map;
466	vm_map_entry_t entry;
467	int *count;
468	int (*freeer) (vm_map_t, vm_object_t, int);
469{
470	vm_map_t tmpm;
471	vm_map_entry_t tmpe;
472	vm_object_t obj;
473
474	if (*count <= 0)
475		return;
476	vm_map_reference(map);
477	if (!lock_try_read(&map->lock)) {
478		vm_map_deallocate(map);
479		return;
480	}
481	if (entry == 0) {
482		tmpe = map->header.next;
483		while (tmpe != &map->header && *count > 0) {
484			vm_pageout_map_deactivate_pages(map, tmpe, count, freeer, 0);
485			tmpe = tmpe->next;
486		};
487	} else if (entry->is_sub_map || entry->is_a_map) {
488		tmpm = entry->object.share_map;
489		tmpe = tmpm->header.next;
490		while (tmpe != &tmpm->header && *count > 0) {
491			vm_pageout_map_deactivate_pages(tmpm, tmpe, count, freeer, 0);
492			tmpe = tmpe->next;
493		};
494	} else if ((obj = entry->object.vm_object) != 0) {
495		*count -= (*freeer) (map, obj, *count);
496	}
497	lock_read_done(&map->lock);
498	vm_map_deallocate(map);
499	return;
500}
501
502void
503vm_req_vmdaemon()
504{
505	static int lastrun = 0;
506
507	if ((ticks > (lastrun + hz / 10)) || (ticks < lastrun)) {
508		wakeup((caddr_t) &vm_daemon_needed);
509		lastrun = ticks;
510	}
511}
512
513/*
514 *	vm_pageout_scan does the dirty work for the pageout daemon.
515 */
516int
517vm_pageout_scan()
518{
519	vm_page_t m;
520	int page_shortage, maxscan, maxlaunder, pcount;
521	int pages_freed;
522	vm_page_t next;
523	struct proc *p, *bigproc;
524	vm_offset_t size, bigsize;
525	vm_object_t object;
526	int force_wakeup = 0;
527	int vnodes_skipped = 0;
528
529	pages_freed = 0;
530
531	/*
532	 * Start scanning the inactive queue for pages we can free. We keep
533	 * scanning until we have enough free pages or we have scanned through
534	 * the entire queue.  If we encounter dirty pages, we start cleaning
535	 * them.
536	 */
537
538	maxlaunder = (cnt.v_inactive_target > MAXLAUNDER) ?
539	    MAXLAUNDER : cnt.v_inactive_target;
540
541rescan1:
542	maxscan = cnt.v_inactive_count;
543	m = vm_page_queue_inactive.tqh_first;
544	while ((m != NULL) && (maxscan-- > 0) &&
545	    ((cnt.v_cache_count + cnt.v_free_count) < (cnt.v_cache_min + cnt.v_free_target))) {
546		vm_page_t next;
547
548		cnt.v_pdpages++;
549		next = m->pageq.tqe_next;
550
551#if defined(VM_DIAGNOSE)
552		if ((m->flags & PG_INACTIVE) == 0) {
553			printf("vm_pageout_scan: page not inactive?\n");
554			break;
555		}
556#endif
557
558		/*
559		 * dont mess with busy pages
560		 */
561		if (m->hold_count || m->busy || (m->flags & PG_BUSY)) {
562			TAILQ_REMOVE(&vm_page_queue_inactive, m, pageq);
563			TAILQ_INSERT_TAIL(&vm_page_queue_inactive, m, pageq);
564			m = next;
565			continue;
566		}
567		if (((m->flags & PG_REFERENCED) == 0) &&
568		    pmap_is_referenced(VM_PAGE_TO_PHYS(m))) {
569			m->flags |= PG_REFERENCED;
570		}
571		if (m->object->ref_count == 0) {
572			m->flags &= ~PG_REFERENCED;
573			pmap_clear_reference(VM_PAGE_TO_PHYS(m));
574		}
575		if ((m->flags & (PG_REFERENCED|PG_WANTED)) != 0) {
576			m->flags &= ~PG_REFERENCED;
577			pmap_clear_reference(VM_PAGE_TO_PHYS(m));
578			vm_page_activate(m);
579			if (m->act_count < ACT_MAX)
580				m->act_count += ACT_ADVANCE;
581			m = next;
582			continue;
583		}
584
585		vm_page_test_dirty(m);
586		if (m->dirty == 0) {
587			if (m->bmapped == 0) {
588				if (m->valid == 0) {
589					pmap_page_protect(VM_PAGE_TO_PHYS(m), VM_PROT_NONE);
590					vm_page_free(m);
591					cnt.v_dfree++;
592				} else {
593					vm_page_cache(m);
594				}
595				++pages_freed;
596			} else {
597				m = next;
598				continue;
599			}
600		} else if (maxlaunder > 0) {
601			int written;
602			struct vnode *vp = NULL;
603
604			object = m->object;
605			if ((object->flags & OBJ_DEAD) || !vm_object_lock_try(object)) {
606				m = next;
607				continue;
608			}
609
610			if (object->pager && object->pager->pg_type == PG_VNODE) {
611				vp = ((vn_pager_t) object->pager->pg_data)->vnp_vp;
612				if (VOP_ISLOCKED(vp) || vget(vp, 1)) {
613					vm_object_unlock(object);
614					if (object->flags & OBJ_WRITEABLE)
615						++vnodes_skipped;
616					m = next;
617					continue;
618				}
619			}
620
621			/*
622			 * If a page is dirty, then it is either being washed
623			 * (but not yet cleaned) or it is still in the
624			 * laundry.  If it is still in the laundry, then we
625			 * start the cleaning operation.
626			 */
627			written = vm_pageout_clean(m, 0);
628
629			if (vp)
630				vput(vp);
631
632			vm_object_unlock(object);
633
634			if (!next) {
635				break;
636			}
637			maxlaunder -= written;
638			/*
639			 * if the next page has been re-activated, start
640			 * scanning again
641			 */
642			if ((next->flags & PG_INACTIVE) == 0) {
643				goto rescan1;
644			}
645		}
646		m = next;
647	}
648
649	/*
650	 * Compute the page shortage.  If we are still very low on memory be
651	 * sure that we will move a minimal amount of pages from active to
652	 * inactive.
653	 */
654
655	page_shortage = cnt.v_inactive_target -
656	    (cnt.v_free_count + cnt.v_inactive_count + cnt.v_cache_count);
657	if (page_shortage <= 0) {
658		if (pages_freed == 0) {
659			page_shortage = cnt.v_free_min - cnt.v_free_count;
660		} else {
661			page_shortage = 1;
662		}
663	}
664	maxscan = MAXSCAN;
665	pcount = cnt.v_active_count;
666	m = vm_page_queue_active.tqh_first;
667	while ((m != NULL) && (maxscan > 0) && (pcount-- > 0) && (page_shortage > 0)) {
668
669		cnt.v_pdpages++;
670		next = m->pageq.tqe_next;
671
672		/*
673		 * Don't deactivate pages that are busy.
674		 */
675		if ((m->busy != 0) ||
676		    (m->flags & PG_BUSY) ||
677		    (m->hold_count != 0)) {
678			TAILQ_REMOVE(&vm_page_queue_active, m, pageq);
679			TAILQ_INSERT_TAIL(&vm_page_queue_active, m, pageq);
680			m = next;
681			continue;
682		}
683		if (m->object->ref_count && ((m->flags & (PG_REFERENCED|PG_WANTED)) ||
684			pmap_is_referenced(VM_PAGE_TO_PHYS(m)))) {
685			int s;
686
687			pmap_clear_reference(VM_PAGE_TO_PHYS(m));
688			m->flags &= ~PG_REFERENCED;
689			if (m->act_count < ACT_MAX) {
690				m->act_count += ACT_ADVANCE;
691			}
692			TAILQ_REMOVE(&vm_page_queue_active, m, pageq);
693			TAILQ_INSERT_TAIL(&vm_page_queue_active, m, pageq);
694		} else {
695			m->flags &= ~PG_REFERENCED;
696			pmap_clear_reference(VM_PAGE_TO_PHYS(m));
697			m->act_count -= min(m->act_count, ACT_DECLINE);
698
699			/*
700			 * if the page act_count is zero -- then we deactivate
701			 */
702			if (!m->act_count && (page_shortage > 0)) {
703				if (m->object->ref_count == 0) {
704					--page_shortage;
705					vm_page_test_dirty(m);
706					if ((m->bmapped == 0) && (m->dirty == 0) ) {
707						m->act_count = 0;
708						vm_page_cache(m);
709					} else {
710						vm_page_deactivate(m);
711					}
712				} else {
713					vm_page_deactivate(m);
714					--page_shortage;
715				}
716			} else if (m->act_count) {
717				TAILQ_REMOVE(&vm_page_queue_active, m, pageq);
718				TAILQ_INSERT_TAIL(&vm_page_queue_active, m, pageq);
719			}
720		}
721		maxscan--;
722		m = next;
723	}
724
725	/*
726	 * We try to maintain some *really* free pages, this allows interrupt
727	 * code to be guaranteed space.
728	 */
729	while (cnt.v_free_count < cnt.v_free_reserved) {
730		m = vm_page_queue_cache.tqh_first;
731		if (!m)
732			break;
733		vm_page_free(m);
734		cnt.v_dfree++;
735	}
736
737	/*
738	 * If we didn't get enough free pages, and we have skipped a vnode
739	 * in a writeable object, wakeup the sync daemon.  And kick swapout
740	 * if we did not get enough free pages.
741	 */
742	if ((cnt.v_cache_count + cnt.v_free_count) < cnt.v_free_target) {
743		if (vnodes_skipped &&
744		    (cnt.v_cache_count + cnt.v_free_count) < cnt.v_free_min) {
745			if (!vfs_update_wakeup) {
746				vfs_update_wakeup = 1;
747				wakeup((caddr_t) &vfs_update_wakeup);
748			}
749		}
750		/*
751		 * now swap processes out if we are in low memory conditions
752		 */
753		if (!swap_pager_full && vm_swap_size &&
754			vm_pageout_req_swapout == 0) {
755			vm_pageout_req_swapout = 1;
756			vm_req_vmdaemon();
757		}
758	}
759
760	if ((cnt.v_inactive_count + cnt.v_free_count + cnt.v_cache_count) <
761	    (cnt.v_inactive_target + cnt.v_free_min)) {
762		vm_req_vmdaemon();
763	}
764
765	/*
766	 * make sure that we have swap space -- if we are low on memory and
767	 * swap -- then kill the biggest process.
768	 */
769	if ((vm_swap_size == 0 || swap_pager_full) &&
770	    ((cnt.v_free_count + cnt.v_cache_count) < cnt.v_free_min)) {
771		bigproc = NULL;
772		bigsize = 0;
773		for (p = (struct proc *) allproc; p != NULL; p = p->p_next) {
774			/*
775			 * if this is a system process, skip it
776			 */
777			if ((p->p_flag & P_SYSTEM) || (p->p_pid == 1) ||
778			    ((p->p_pid < 48) && (vm_swap_size != 0))) {
779				continue;
780			}
781			/*
782			 * if the process is in a non-running type state,
783			 * don't touch it.
784			 */
785			if (p->p_stat != SRUN && p->p_stat != SSLEEP) {
786				continue;
787			}
788			/*
789			 * get the process size
790			 */
791			size = p->p_vmspace->vm_pmap.pm_stats.resident_count;
792			/*
793			 * if the this process is bigger than the biggest one
794			 * remember it.
795			 */
796			if (size > bigsize) {
797				bigproc = p;
798				bigsize = size;
799			}
800		}
801		if (bigproc != NULL) {
802			printf("Process %lu killed by vm_pageout -- out of swap\n", (u_long) bigproc->p_pid);
803			psignal(bigproc, SIGKILL);
804			bigproc->p_estcpu = 0;
805			bigproc->p_nice = PRIO_MIN;
806			resetpriority(bigproc);
807			wakeup((caddr_t) &cnt.v_free_count);
808		}
809	}
810	return force_wakeup;
811}
812
813/*
814 *	vm_pageout is the high level pageout daemon.
815 */
816void
817vm_pageout()
818{
819	(void) spl0();
820
821	/*
822	 * Initialize some paging parameters.
823	 */
824
825	cnt.v_interrupt_free_min = 2;
826
827	if (cnt.v_page_count > 1024)
828		cnt.v_free_min = 4 + (cnt.v_page_count - 1024) / 200;
829	else
830		cnt.v_free_min = 4;
831	/*
832	 * free_reserved needs to include enough for the largest swap pager
833	 * structures plus enough for any pv_entry structs when paging.
834	 */
835	cnt.v_pageout_free_min = 6 + cnt.v_page_count / 1024 +
836				cnt.v_interrupt_free_min;
837	cnt.v_free_reserved = cnt.v_pageout_free_min + 6;
838	cnt.v_free_target = 3 * cnt.v_free_min + cnt.v_free_reserved;
839	cnt.v_free_min += cnt.v_free_reserved;
840
841	if (cnt.v_page_count > 1024) {
842		cnt.v_cache_max = (cnt.v_free_count - 1024) / 2;
843		cnt.v_cache_min = (cnt.v_free_count - 1024) / 8;
844		cnt.v_inactive_target = 2*cnt.v_cache_min + 192;
845	} else {
846		cnt.v_cache_min = 0;
847		cnt.v_cache_max = 0;
848		cnt.v_inactive_target = cnt.v_free_count / 4;
849	}
850
851	/* XXX does not really belong here */
852	if (vm_page_max_wired == 0)
853		vm_page_max_wired = cnt.v_free_count / 3;
854
855
856	(void) swap_pager_alloc(0, 0, 0, 0);
857	/*
858	 * The pageout daemon is never done, so loop forever.
859	 */
860	while (TRUE) {
861		int s = splhigh();
862
863		if (!vm_pages_needed ||
864			((cnt.v_free_count >= cnt.v_free_reserved) &&
865			 (cnt.v_free_count + cnt.v_cache_count >= cnt.v_free_min))) {
866			vm_pages_needed = 0;
867			tsleep((caddr_t) &vm_pages_needed, PVM, "psleep", 0);
868		}
869		vm_pages_needed = 0;
870		splx(s);
871		cnt.v_pdwakeups++;
872		vm_pager_sync();
873		vm_pageout_scan();
874		vm_pager_sync();
875		wakeup((caddr_t) &cnt.v_free_count);
876		wakeup((caddr_t) kmem_map);
877	}
878}
879
880void
881vm_daemon()
882{
883	vm_object_t object;
884	struct proc *p;
885
886	while (TRUE) {
887		tsleep((caddr_t) &vm_daemon_needed, PUSER, "psleep", 0);
888		if( vm_pageout_req_swapout) {
889			swapout_threads();
890			vm_pageout_req_swapout = 0;
891		}
892		/*
893		 * scan the processes for exceeding their rlimits or if
894		 * process is swapped out -- deactivate pages
895		 */
896
897		for (p = (struct proc *) allproc; p != NULL; p = p->p_next) {
898			int overage;
899			quad_t limit;
900			vm_offset_t size;
901
902			/*
903			 * if this is a system process or if we have already
904			 * looked at this process, skip it.
905			 */
906			if (p->p_flag & (P_SYSTEM | P_WEXIT)) {
907				continue;
908			}
909			/*
910			 * if the process is in a non-running type state,
911			 * don't touch it.
912			 */
913			if (p->p_stat != SRUN && p->p_stat != SSLEEP) {
914				continue;
915			}
916			/*
917			 * get a limit
918			 */
919			limit = qmin(p->p_rlimit[RLIMIT_RSS].rlim_cur,
920			    p->p_rlimit[RLIMIT_RSS].rlim_max);
921
922			/*
923			 * let processes that are swapped out really be
924			 * swapped out set the limit to nothing (will force a
925			 * swap-out.)
926			 */
927			if ((p->p_flag & P_INMEM) == 0)
928				limit = 0;	/* XXX */
929
930			size = p->p_vmspace->vm_pmap.pm_stats.resident_count * NBPG;
931			if (limit >= 0 && size >= limit) {
932				overage = (size - limit) / NBPG;
933				vm_pageout_map_deactivate_pages(&p->p_vmspace->vm_map,
934				    (vm_map_entry_t) 0, &overage, vm_pageout_object_deactivate_pages);
935			}
936		}
937	}
938
939	/*
940	 * we remove cached objects that have no RSS...
941	 */
942restart:
943	vm_object_cache_lock();
944	object = vm_object_cached_list.tqh_first;
945	while (object) {
946		vm_object_cache_unlock();
947		/*
948		 * if there are no resident pages -- get rid of the object
949		 */
950		if (object->resident_page_count == 0) {
951			if (object != vm_object_lookup(object->pager))
952				panic("vm_object_cache_trim: I'm sooo confused.");
953			pager_cache(object, FALSE);
954			goto restart;
955		}
956		object = object->cached_list.tqe_next;
957		vm_object_cache_lock();
958	}
959	vm_object_cache_unlock();
960}
961