vm_object.c revision 179159
1/*-
2 * Copyright (c) 1991, 1993
3 *	The Regents of the University of California.  All rights reserved.
4 *
5 * This code is derived from software contributed to Berkeley by
6 * The Mach Operating System project at Carnegie-Mellon University.
7 *
8 * Redistribution and use in source and binary forms, with or without
9 * modification, are permitted provided that the following conditions
10 * are met:
11 * 1. Redistributions of source code must retain the above copyright
12 *    notice, this list of conditions and the following disclaimer.
13 * 2. Redistributions in binary form must reproduce the above copyright
14 *    notice, this list of conditions and the following disclaimer in the
15 *    documentation and/or other materials provided with the distribution.
16 * 4. Neither the name of the University nor the names of its contributors
17 *    may be used to endorse or promote products derived from this software
18 *    without specific prior written permission.
19 *
20 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
21 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
22 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
23 * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
24 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
25 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
26 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
27 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
28 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
29 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
30 * SUCH DAMAGE.
31 *
32 *	from: @(#)vm_object.c	8.5 (Berkeley) 3/22/94
33 *
34 *
35 * Copyright (c) 1987, 1990 Carnegie-Mellon University.
36 * All rights reserved.
37 *
38 * Authors: Avadis Tevanian, Jr., Michael Wayne Young
39 *
40 * Permission to use, copy, modify and distribute this software and
41 * its documentation is hereby granted, provided that both the copyright
42 * notice and this permission notice appear in all copies of the
43 * software, derivative works or modified versions, and any portions
44 * thereof, and that both notices appear in supporting documentation.
45 *
46 * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
47 * CONDITION.  CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND
48 * FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
49 *
50 * Carnegie Mellon requests users of this software to return to
51 *
52 *  Software Distribution Coordinator  or  Software.Distribution@CS.CMU.EDU
53 *  School of Computer Science
54 *  Carnegie Mellon University
55 *  Pittsburgh PA 15213-3890
56 *
57 * any improvements or extensions that they make and grant Carnegie the
58 * rights to redistribute these changes.
59 */
60
61/*
62 *	Virtual memory object module.
63 */
64
65#include <sys/cdefs.h>
66__FBSDID("$FreeBSD: head/sys/vm/vm_object.c 179159 2008-05-20 19:05:43Z ups $");
67
68#include "opt_vm.h"
69
70#include <sys/param.h>
71#include <sys/systm.h>
72#include <sys/lock.h>
73#include <sys/mman.h>
74#include <sys/mount.h>
75#include <sys/kernel.h>
76#include <sys/sysctl.h>
77#include <sys/mutex.h>
78#include <sys/proc.h>		/* for curproc, pageproc */
79#include <sys/socket.h>
80#include <sys/vnode.h>
81#include <sys/vmmeter.h>
82#include <sys/sx.h>
83
84#include <vm/vm.h>
85#include <vm/vm_param.h>
86#include <vm/pmap.h>
87#include <vm/vm_map.h>
88#include <vm/vm_object.h>
89#include <vm/vm_page.h>
90#include <vm/vm_pageout.h>
91#include <vm/vm_pager.h>
92#include <vm/swap_pager.h>
93#include <vm/vm_kern.h>
94#include <vm/vm_extern.h>
95#include <vm/vm_reserv.h>
96#include <vm/uma.h>
97
98#define EASY_SCAN_FACTOR       8
99
100#define MSYNC_FLUSH_HARDSEQ	0x01
101#define MSYNC_FLUSH_SOFTSEQ	0x02
102
103/*
104 * msync / VM object flushing optimizations
105 */
106static int msync_flush_flags = MSYNC_FLUSH_HARDSEQ | MSYNC_FLUSH_SOFTSEQ;
107SYSCTL_INT(_vm, OID_AUTO, msync_flush_flags,
108        CTLFLAG_RW, &msync_flush_flags, 0, "");
109
110static int old_msync;
111SYSCTL_INT(_vm, OID_AUTO, old_msync, CTLFLAG_RW, &old_msync, 0,
112    "Use old (insecure) msync behavior");
113
114static void	vm_object_qcollapse(vm_object_t object);
115static int	vm_object_page_collect_flush(vm_object_t object, vm_page_t p, int curgeneration, int pagerflags);
116static void	vm_object_vndeallocate(vm_object_t object);
117
118/*
119 *	Virtual memory objects maintain the actual data
120 *	associated with allocated virtual memory.  A given
121 *	page of memory exists within exactly one object.
122 *
123 *	An object is only deallocated when all "references"
124 *	are given up.  Only one "reference" to a given
125 *	region of an object should be writeable.
126 *
127 *	Associated with each object is a list of all resident
128 *	memory pages belonging to that object; this list is
129 *	maintained by the "vm_page" module, and locked by the object's
130 *	lock.
131 *
132 *	Each object also records a "pager" routine which is
133 *	used to retrieve (and store) pages to the proper backing
134 *	storage.  In addition, objects may be backed by other
135 *	objects from which they were virtual-copied.
136 *
137 *	The only items within the object structure which are
138 *	modified after time of creation are:
139 *		reference count		locked by object's lock
140 *		pager routine		locked by object's lock
141 *
142 */
143
144struct object_q vm_object_list;
145struct mtx vm_object_list_mtx;	/* lock for object list and count */
146
147struct vm_object kernel_object_store;
148struct vm_object kmem_object_store;
149
150SYSCTL_NODE(_vm_stats, OID_AUTO, object, CTLFLAG_RD, 0, "VM object stats");
151
152static long object_collapses;
153SYSCTL_LONG(_vm_stats_object, OID_AUTO, collapses, CTLFLAG_RD,
154    &object_collapses, 0, "VM object collapses");
155
156static long object_bypasses;
157SYSCTL_LONG(_vm_stats_object, OID_AUTO, bypasses, CTLFLAG_RD,
158    &object_bypasses, 0, "VM object bypasses");
159
160static uma_zone_t obj_zone;
161
162static int vm_object_zinit(void *mem, int size, int flags);
163
164#ifdef INVARIANTS
165static void vm_object_zdtor(void *mem, int size, void *arg);
166
167static void
168vm_object_zdtor(void *mem, int size, void *arg)
169{
170	vm_object_t object;
171
172	object = (vm_object_t)mem;
173	KASSERT(TAILQ_EMPTY(&object->memq),
174	    ("object %p has resident pages",
175	    object));
176#if VM_NRESERVLEVEL > 0
177	KASSERT(LIST_EMPTY(&object->rvq),
178	    ("object %p has reservations",
179	    object));
180#endif
181	KASSERT(object->cache == NULL,
182	    ("object %p has cached pages",
183	    object));
184	KASSERT(object->paging_in_progress == 0,
185	    ("object %p paging_in_progress = %d",
186	    object, object->paging_in_progress));
187	KASSERT(object->resident_page_count == 0,
188	    ("object %p resident_page_count = %d",
189	    object, object->resident_page_count));
190	KASSERT(object->shadow_count == 0,
191	    ("object %p shadow_count = %d",
192	    object, object->shadow_count));
193}
194#endif
195
196static int
197vm_object_zinit(void *mem, int size, int flags)
198{
199	vm_object_t object;
200
201	object = (vm_object_t)mem;
202	bzero(&object->mtx, sizeof(object->mtx));
203	VM_OBJECT_LOCK_INIT(object, "standard object");
204
205	/* These are true for any object that has been freed */
206	object->paging_in_progress = 0;
207	object->resident_page_count = 0;
208	object->shadow_count = 0;
209	return (0);
210}
211
212void
213_vm_object_allocate(objtype_t type, vm_pindex_t size, vm_object_t object)
214{
215
216	TAILQ_INIT(&object->memq);
217	LIST_INIT(&object->shadow_head);
218
219	object->root = NULL;
220	object->type = type;
221	object->size = size;
222	object->generation = 1;
223	object->ref_count = 1;
224	object->flags = 0;
225	if ((object->type == OBJT_DEFAULT) || (object->type == OBJT_SWAP))
226		object->flags = OBJ_ONEMAPPING;
227	object->pg_color = 0;
228	object->handle = NULL;
229	object->backing_object = NULL;
230	object->backing_object_offset = (vm_ooffset_t) 0;
231#if VM_NRESERVLEVEL > 0
232	LIST_INIT(&object->rvq);
233#endif
234	object->cache = NULL;
235
236	mtx_lock(&vm_object_list_mtx);
237	TAILQ_INSERT_TAIL(&vm_object_list, object, object_list);
238	mtx_unlock(&vm_object_list_mtx);
239}
240
241/*
242 *	vm_object_init:
243 *
244 *	Initialize the VM objects module.
245 */
246void
247vm_object_init(void)
248{
249	TAILQ_INIT(&vm_object_list);
250	mtx_init(&vm_object_list_mtx, "vm object_list", NULL, MTX_DEF);
251
252	VM_OBJECT_LOCK_INIT(&kernel_object_store, "kernel object");
253	_vm_object_allocate(OBJT_PHYS, OFF_TO_IDX(VM_MAX_KERNEL_ADDRESS - VM_MIN_KERNEL_ADDRESS),
254	    kernel_object);
255#if VM_NRESERVLEVEL > 0
256	kernel_object->flags |= OBJ_COLORED;
257	kernel_object->pg_color = (u_short)atop(VM_MIN_KERNEL_ADDRESS);
258#endif
259
260	VM_OBJECT_LOCK_INIT(&kmem_object_store, "kmem object");
261	_vm_object_allocate(OBJT_PHYS, OFF_TO_IDX(VM_MAX_KERNEL_ADDRESS - VM_MIN_KERNEL_ADDRESS),
262	    kmem_object);
263#if VM_NRESERVLEVEL > 0
264	kmem_object->flags |= OBJ_COLORED;
265	kmem_object->pg_color = (u_short)atop(VM_MIN_KERNEL_ADDRESS);
266#endif
267
268	/*
269	 * The lock portion of struct vm_object must be type stable due
270	 * to vm_pageout_fallback_object_lock locking a vm object
271	 * without holding any references to it.
272	 */
273	obj_zone = uma_zcreate("VM OBJECT", sizeof (struct vm_object), NULL,
274#ifdef INVARIANTS
275	    vm_object_zdtor,
276#else
277	    NULL,
278#endif
279	    vm_object_zinit, NULL, UMA_ALIGN_PTR, UMA_ZONE_VM|UMA_ZONE_NOFREE);
280}
281
282void
283vm_object_clear_flag(vm_object_t object, u_short bits)
284{
285
286	VM_OBJECT_LOCK_ASSERT(object, MA_OWNED);
287	object->flags &= ~bits;
288}
289
290void
291vm_object_pip_add(vm_object_t object, short i)
292{
293
294	VM_OBJECT_LOCK_ASSERT(object, MA_OWNED);
295	object->paging_in_progress += i;
296}
297
298void
299vm_object_pip_subtract(vm_object_t object, short i)
300{
301
302	VM_OBJECT_LOCK_ASSERT(object, MA_OWNED);
303	object->paging_in_progress -= i;
304}
305
306void
307vm_object_pip_wakeup(vm_object_t object)
308{
309
310	VM_OBJECT_LOCK_ASSERT(object, MA_OWNED);
311	object->paging_in_progress--;
312	if ((object->flags & OBJ_PIPWNT) && object->paging_in_progress == 0) {
313		vm_object_clear_flag(object, OBJ_PIPWNT);
314		wakeup(object);
315	}
316}
317
318void
319vm_object_pip_wakeupn(vm_object_t object, short i)
320{
321
322	VM_OBJECT_LOCK_ASSERT(object, MA_OWNED);
323	if (i)
324		object->paging_in_progress -= i;
325	if ((object->flags & OBJ_PIPWNT) && object->paging_in_progress == 0) {
326		vm_object_clear_flag(object, OBJ_PIPWNT);
327		wakeup(object);
328	}
329}
330
331void
332vm_object_pip_wait(vm_object_t object, char *waitid)
333{
334
335	VM_OBJECT_LOCK_ASSERT(object, MA_OWNED);
336	while (object->paging_in_progress) {
337		object->flags |= OBJ_PIPWNT;
338		msleep(object, VM_OBJECT_MTX(object), PVM, waitid, 0);
339	}
340}
341
342/*
343 *	vm_object_allocate:
344 *
345 *	Returns a new object with the given size.
346 */
347vm_object_t
348vm_object_allocate(objtype_t type, vm_pindex_t size)
349{
350	vm_object_t object;
351
352	object = (vm_object_t)uma_zalloc(obj_zone, M_WAITOK);
353	_vm_object_allocate(type, size, object);
354	return (object);
355}
356
357
358/*
359 *	vm_object_reference:
360 *
361 *	Gets another reference to the given object.  Note: OBJ_DEAD
362 *	objects can be referenced during final cleaning.
363 */
364void
365vm_object_reference(vm_object_t object)
366{
367	if (object == NULL)
368		return;
369	VM_OBJECT_LOCK(object);
370	vm_object_reference_locked(object);
371	VM_OBJECT_UNLOCK(object);
372}
373
374/*
375 *	vm_object_reference_locked:
376 *
377 *	Gets another reference to the given object.
378 *
379 *	The object must be locked.
380 */
381void
382vm_object_reference_locked(vm_object_t object)
383{
384	struct vnode *vp;
385
386	VM_OBJECT_LOCK_ASSERT(object, MA_OWNED);
387	object->ref_count++;
388	if (object->type == OBJT_VNODE) {
389		vp = object->handle;
390		vref(vp);
391	}
392}
393
394/*
395 * Handle deallocating an object of type OBJT_VNODE.
396 */
397static void
398vm_object_vndeallocate(vm_object_t object)
399{
400	struct vnode *vp = (struct vnode *) object->handle;
401
402	VFS_ASSERT_GIANT(vp->v_mount);
403	VM_OBJECT_LOCK_ASSERT(object, MA_OWNED);
404	KASSERT(object->type == OBJT_VNODE,
405	    ("vm_object_vndeallocate: not a vnode object"));
406	KASSERT(vp != NULL, ("vm_object_vndeallocate: missing vp"));
407#ifdef INVARIANTS
408	if (object->ref_count == 0) {
409		vprint("vm_object_vndeallocate", vp);
410		panic("vm_object_vndeallocate: bad object reference count");
411	}
412#endif
413
414	object->ref_count--;
415	if (object->ref_count == 0) {
416		mp_fixme("Unlocked vflag access.");
417		vp->v_vflag &= ~VV_TEXT;
418	}
419	VM_OBJECT_UNLOCK(object);
420	/*
421	 * vrele may need a vop lock
422	 */
423	vrele(vp);
424}
425
426/*
427 *	vm_object_deallocate:
428 *
429 *	Release a reference to the specified object,
430 *	gained either through a vm_object_allocate
431 *	or a vm_object_reference call.  When all references
432 *	are gone, storage associated with this object
433 *	may be relinquished.
434 *
435 *	No object may be locked.
436 */
437void
438vm_object_deallocate(vm_object_t object)
439{
440	vm_object_t temp;
441
442	while (object != NULL) {
443		int vfslocked;
444
445		vfslocked = 0;
446	restart:
447		VM_OBJECT_LOCK(object);
448		if (object->type == OBJT_VNODE) {
449			struct vnode *vp = (struct vnode *) object->handle;
450
451			/*
452			 * Conditionally acquire Giant for a vnode-backed
453			 * object.  We have to be careful since the type of
454			 * a vnode object can change while the object is
455			 * unlocked.
456			 */
457			if (VFS_NEEDSGIANT(vp->v_mount) && !vfslocked) {
458				vfslocked = 1;
459				if (!mtx_trylock(&Giant)) {
460					VM_OBJECT_UNLOCK(object);
461					mtx_lock(&Giant);
462					goto restart;
463				}
464			}
465			vm_object_vndeallocate(object);
466			VFS_UNLOCK_GIANT(vfslocked);
467			return;
468		} else
469			/*
470			 * This is to handle the case that the object
471			 * changed type while we dropped its lock to
472			 * obtain Giant.
473			 */
474			VFS_UNLOCK_GIANT(vfslocked);
475
476		KASSERT(object->ref_count != 0,
477			("vm_object_deallocate: object deallocated too many times: %d", object->type));
478
479		/*
480		 * If the reference count goes to 0 we start calling
481		 * vm_object_terminate() on the object chain.
482		 * A ref count of 1 may be a special case depending on the
483		 * shadow count being 0 or 1.
484		 */
485		object->ref_count--;
486		if (object->ref_count > 1) {
487			VM_OBJECT_UNLOCK(object);
488			return;
489		} else if (object->ref_count == 1) {
490			if (object->shadow_count == 0 &&
491			    object->handle == NULL &&
492			    (object->type == OBJT_DEFAULT ||
493			     object->type == OBJT_SWAP)) {
494				vm_object_set_flag(object, OBJ_ONEMAPPING);
495			} else if ((object->shadow_count == 1) &&
496			    (object->handle == NULL) &&
497			    (object->type == OBJT_DEFAULT ||
498			     object->type == OBJT_SWAP)) {
499				vm_object_t robject;
500
501				robject = LIST_FIRST(&object->shadow_head);
502				KASSERT(robject != NULL,
503				    ("vm_object_deallocate: ref_count: %d, shadow_count: %d",
504					 object->ref_count,
505					 object->shadow_count));
506				if (!VM_OBJECT_TRYLOCK(robject)) {
507					/*
508					 * Avoid a potential deadlock.
509					 */
510					object->ref_count++;
511					VM_OBJECT_UNLOCK(object);
512					/*
513					 * More likely than not the thread
514					 * holding robject's lock has lower
515					 * priority than the current thread.
516					 * Let the lower priority thread run.
517					 */
518					pause("vmo_de", 1);
519					continue;
520				}
521				/*
522				 * Collapse object into its shadow unless its
523				 * shadow is dead.  In that case, object will
524				 * be deallocated by the thread that is
525				 * deallocating its shadow.
526				 */
527				if ((robject->flags & OBJ_DEAD) == 0 &&
528				    (robject->handle == NULL) &&
529				    (robject->type == OBJT_DEFAULT ||
530				     robject->type == OBJT_SWAP)) {
531
532					robject->ref_count++;
533retry:
534					if (robject->paging_in_progress) {
535						VM_OBJECT_UNLOCK(object);
536						vm_object_pip_wait(robject,
537						    "objde1");
538						temp = robject->backing_object;
539						if (object == temp) {
540							VM_OBJECT_LOCK(object);
541							goto retry;
542						}
543					} else if (object->paging_in_progress) {
544						VM_OBJECT_UNLOCK(robject);
545						object->flags |= OBJ_PIPWNT;
546						msleep(object,
547						    VM_OBJECT_MTX(object),
548						    PDROP | PVM, "objde2", 0);
549						VM_OBJECT_LOCK(robject);
550						temp = robject->backing_object;
551						if (object == temp) {
552							VM_OBJECT_LOCK(object);
553							goto retry;
554						}
555					} else
556						VM_OBJECT_UNLOCK(object);
557
558					if (robject->ref_count == 1) {
559						robject->ref_count--;
560						object = robject;
561						goto doterm;
562					}
563					object = robject;
564					vm_object_collapse(object);
565					VM_OBJECT_UNLOCK(object);
566					continue;
567				}
568				VM_OBJECT_UNLOCK(robject);
569			}
570			VM_OBJECT_UNLOCK(object);
571			return;
572		}
573doterm:
574		temp = object->backing_object;
575		if (temp != NULL) {
576			VM_OBJECT_LOCK(temp);
577			LIST_REMOVE(object, shadow_list);
578			temp->shadow_count--;
579			temp->generation++;
580			VM_OBJECT_UNLOCK(temp);
581			object->backing_object = NULL;
582		}
583		/*
584		 * Don't double-terminate, we could be in a termination
585		 * recursion due to the terminate having to sync data
586		 * to disk.
587		 */
588		if ((object->flags & OBJ_DEAD) == 0)
589			vm_object_terminate(object);
590		else
591			VM_OBJECT_UNLOCK(object);
592		object = temp;
593	}
594}
595
596/*
597 *	vm_object_destroy removes the object from the global object list
598 *      and frees the space for the object.
599 */
600void
601vm_object_destroy(vm_object_t object)
602{
603
604	/*
605	 * Remove the object from the global object list.
606	 */
607	mtx_lock(&vm_object_list_mtx);
608	TAILQ_REMOVE(&vm_object_list, object, object_list);
609	mtx_unlock(&vm_object_list_mtx);
610
611	/*
612	 * Free the space for the object.
613	 */
614	uma_zfree(obj_zone, object);
615
616}
617
618
619
620/*
621 *	vm_object_terminate actually destroys the specified object, freeing
622 *	up all previously used resources.
623 *
624 *	The object must be locked.
625 *	This routine may block.
626 */
627void
628vm_object_terminate(vm_object_t object)
629{
630	vm_page_t p;
631
632	VM_OBJECT_LOCK_ASSERT(object, MA_OWNED);
633
634	/*
635	 * Make sure no one uses us.
636	 */
637	vm_object_set_flag(object, OBJ_DEAD);
638
639	/*
640	 * wait for the pageout daemon to be done with the object
641	 */
642	vm_object_pip_wait(object, "objtrm");
643
644	KASSERT(!object->paging_in_progress,
645		("vm_object_terminate: pageout in progress"));
646
647	/*
648	 * Clean and free the pages, as appropriate. All references to the
649	 * object are gone, so we don't need to lock it.
650	 */
651	if (object->type == OBJT_VNODE) {
652		struct vnode *vp = (struct vnode *)object->handle;
653
654		/*
655		 * Clean pages and flush buffers.
656		 */
657		vm_object_page_clean(object, 0, 0, OBJPC_SYNC);
658		VM_OBJECT_UNLOCK(object);
659
660		vinvalbuf(vp, V_SAVE, NULL, 0, 0);
661
662		VM_OBJECT_LOCK(object);
663	}
664
665	KASSERT(object->ref_count == 0,
666		("vm_object_terminate: object with references, ref_count=%d",
667		object->ref_count));
668
669	/*
670	 * Now free any remaining pages. For internal objects, this also
671	 * removes them from paging queues. Don't free wired pages, just
672	 * remove them from the object.
673	 */
674	vm_page_lock_queues();
675	while ((p = TAILQ_FIRST(&object->memq)) != NULL) {
676		KASSERT(!p->busy && (p->oflags & VPO_BUSY) == 0,
677			("vm_object_terminate: freeing busy page %p "
678			"p->busy = %d, p->flags %x\n", p, p->busy, p->flags));
679		if (p->wire_count == 0) {
680			vm_page_free(p);
681			cnt.v_pfree++;
682		} else {
683			vm_page_remove(p);
684		}
685	}
686	vm_page_unlock_queues();
687
688#if VM_NRESERVLEVEL > 0
689	if (__predict_false(!LIST_EMPTY(&object->rvq)))
690		vm_reserv_break_all(object);
691#endif
692	if (__predict_false(object->cache != NULL))
693		vm_page_cache_free(object, 0, 0);
694
695	/*
696	 * Let the pager know object is dead.
697	 */
698	vm_pager_deallocate(object);
699	VM_OBJECT_UNLOCK(object);
700
701	vm_object_destroy(object);
702}
703
704/*
705 *	vm_object_page_clean
706 *
707 *	Clean all dirty pages in the specified range of object.  Leaves page
708 * 	on whatever queue it is currently on.   If NOSYNC is set then do not
709 *	write out pages with VPO_NOSYNC set (originally comes from MAP_NOSYNC),
710 *	leaving the object dirty.
711 *
712 *	When stuffing pages asynchronously, allow clustering.  XXX we need a
713 *	synchronous clustering mode implementation.
714 *
715 *	Odd semantics: if start == end, we clean everything.
716 *
717 *	The object must be locked.
718 */
719void
720vm_object_page_clean(vm_object_t object, vm_pindex_t start, vm_pindex_t end, int flags)
721{
722	vm_page_t p, np;
723	vm_pindex_t tstart, tend;
724	vm_pindex_t pi;
725	int clearobjflags;
726	int pagerflags;
727	int curgeneration;
728
729	VM_OBJECT_LOCK_ASSERT(object, MA_OWNED);
730	if (object->type != OBJT_VNODE ||
731		(object->flags & OBJ_MIGHTBEDIRTY) == 0)
732		return;
733
734	pagerflags = (flags & (OBJPC_SYNC | OBJPC_INVAL)) ? VM_PAGER_PUT_SYNC : VM_PAGER_CLUSTER_OK;
735	pagerflags |= (flags & OBJPC_INVAL) ? VM_PAGER_PUT_INVAL : 0;
736
737	vm_object_set_flag(object, OBJ_CLEANING);
738
739	tstart = start;
740	if (end == 0) {
741		tend = object->size;
742	} else {
743		tend = end;
744	}
745
746	vm_page_lock_queues();
747	/*
748	 * If the caller is smart and only msync()s a range he knows is
749	 * dirty, we may be able to avoid an object scan.  This results in
750	 * a phenominal improvement in performance.  We cannot do this
751	 * as a matter of course because the object may be huge - e.g.
752	 * the size might be in the gigabytes or terrabytes.
753	 */
754	if (msync_flush_flags & MSYNC_FLUSH_HARDSEQ) {
755		vm_pindex_t tscan;
756		int scanlimit;
757		int scanreset;
758
759		scanreset = object->resident_page_count / EASY_SCAN_FACTOR;
760		if (scanreset < 16)
761			scanreset = 16;
762		pagerflags |= VM_PAGER_IGNORE_CLEANCHK;
763
764		scanlimit = scanreset;
765		tscan = tstart;
766		while (tscan < tend) {
767			curgeneration = object->generation;
768			p = vm_page_lookup(object, tscan);
769			if (p == NULL || p->valid == 0) {
770				if (--scanlimit == 0)
771					break;
772				++tscan;
773				continue;
774			}
775			vm_page_test_dirty(p);
776			if ((p->dirty & p->valid) == 0) {
777				if (--scanlimit == 0)
778					break;
779				++tscan;
780				continue;
781			}
782			/*
783			 * If we have been asked to skip nosync pages and
784			 * this is a nosync page, we can't continue.
785			 */
786			if ((flags & OBJPC_NOSYNC) && (p->oflags & VPO_NOSYNC)) {
787				if (--scanlimit == 0)
788					break;
789				++tscan;
790				continue;
791			}
792			scanlimit = scanreset;
793
794			/*
795			 * This returns 0 if it was unable to busy the first
796			 * page (i.e. had to sleep).
797			 */
798			tscan += vm_object_page_collect_flush(object, p, curgeneration, pagerflags);
799		}
800
801		/*
802		 * If everything was dirty and we flushed it successfully,
803		 * and the requested range is not the entire object, we
804		 * don't have to mess with CLEANCHK or MIGHTBEDIRTY and can
805		 * return immediately.
806		 */
807		if (tscan >= tend && (tstart || tend < object->size)) {
808			vm_page_unlock_queues();
809			vm_object_clear_flag(object, OBJ_CLEANING);
810			return;
811		}
812		pagerflags &= ~VM_PAGER_IGNORE_CLEANCHK;
813	}
814
815	/*
816	 * Generally set CLEANCHK interlock and make the page read-only so
817	 * we can then clear the object flags.
818	 *
819	 * However, if this is a nosync mmap then the object is likely to
820	 * stay dirty so do not mess with the page and do not clear the
821	 * object flags.
822	 */
823	clearobjflags = 1;
824	TAILQ_FOREACH(p, &object->memq, listq) {
825		p->oflags |= VPO_CLEANCHK;
826		if ((flags & OBJPC_NOSYNC) && (p->oflags & VPO_NOSYNC))
827			clearobjflags = 0;
828		else
829			pmap_remove_write(p);
830	}
831
832	if (clearobjflags && (tstart == 0) && (tend == object->size)) {
833		struct vnode *vp;
834
835		vm_object_clear_flag(object, OBJ_MIGHTBEDIRTY);
836		if (object->type == OBJT_VNODE &&
837		    (vp = (struct vnode *)object->handle) != NULL) {
838			VI_LOCK(vp);
839			if (vp->v_iflag & VI_OBJDIRTY)
840				vp->v_iflag &= ~VI_OBJDIRTY;
841			VI_UNLOCK(vp);
842		}
843	}
844
845rescan:
846	curgeneration = object->generation;
847
848	for (p = TAILQ_FIRST(&object->memq); p; p = np) {
849		int n;
850
851		np = TAILQ_NEXT(p, listq);
852
853again:
854		pi = p->pindex;
855		if ((p->oflags & VPO_CLEANCHK) == 0 ||
856			(pi < tstart) || (pi >= tend) ||
857		    p->valid == 0) {
858			p->oflags &= ~VPO_CLEANCHK;
859			continue;
860		}
861
862		vm_page_test_dirty(p);
863		if ((p->dirty & p->valid) == 0) {
864			p->oflags &= ~VPO_CLEANCHK;
865			continue;
866		}
867
868		/*
869		 * If we have been asked to skip nosync pages and this is a
870		 * nosync page, skip it.  Note that the object flags were
871		 * not cleared in this case so we do not have to set them.
872		 */
873		if ((flags & OBJPC_NOSYNC) && (p->oflags & VPO_NOSYNC)) {
874			p->oflags &= ~VPO_CLEANCHK;
875			continue;
876		}
877
878		n = vm_object_page_collect_flush(object, p,
879			curgeneration, pagerflags);
880		if (n == 0)
881			goto rescan;
882
883		if (object->generation != curgeneration)
884			goto rescan;
885
886		/*
887		 * Try to optimize the next page.  If we can't we pick up
888		 * our (random) scan where we left off.
889		 */
890		if (msync_flush_flags & MSYNC_FLUSH_SOFTSEQ) {
891			if ((p = vm_page_lookup(object, pi + n)) != NULL)
892				goto again;
893		}
894	}
895	vm_page_unlock_queues();
896#if 0
897	VOP_FSYNC(vp, (pagerflags & VM_PAGER_PUT_SYNC)?MNT_WAIT:0, curproc);
898#endif
899
900	vm_object_clear_flag(object, OBJ_CLEANING);
901	return;
902}
903
904static int
905vm_object_page_collect_flush(vm_object_t object, vm_page_t p, int curgeneration, int pagerflags)
906{
907	int runlen;
908	int maxf;
909	int chkb;
910	int maxb;
911	int i;
912	vm_pindex_t pi;
913	vm_page_t maf[vm_pageout_page_count];
914	vm_page_t mab[vm_pageout_page_count];
915	vm_page_t ma[vm_pageout_page_count];
916
917	mtx_assert(&vm_page_queue_mtx, MA_OWNED);
918	pi = p->pindex;
919	while (vm_page_sleep_if_busy(p, TRUE, "vpcwai")) {
920		vm_page_lock_queues();
921		if (object->generation != curgeneration) {
922			return(0);
923		}
924	}
925	maxf = 0;
926	for(i = 1; i < vm_pageout_page_count; i++) {
927		vm_page_t tp;
928
929		if ((tp = vm_page_lookup(object, pi + i)) != NULL) {
930			if ((tp->oflags & VPO_BUSY) ||
931				((pagerflags & VM_PAGER_IGNORE_CLEANCHK) == 0 &&
932				 (tp->oflags & VPO_CLEANCHK) == 0) ||
933				(tp->busy != 0))
934				break;
935			vm_page_test_dirty(tp);
936			if ((tp->dirty & tp->valid) == 0) {
937				tp->oflags &= ~VPO_CLEANCHK;
938				break;
939			}
940			maf[ i - 1 ] = tp;
941			maxf++;
942			continue;
943		}
944		break;
945	}
946
947	maxb = 0;
948	chkb = vm_pageout_page_count -  maxf;
949	if (chkb) {
950		for(i = 1; i < chkb;i++) {
951			vm_page_t tp;
952
953			if ((tp = vm_page_lookup(object, pi - i)) != NULL) {
954				if ((tp->oflags & VPO_BUSY) ||
955					((pagerflags & VM_PAGER_IGNORE_CLEANCHK) == 0 &&
956					 (tp->oflags & VPO_CLEANCHK) == 0) ||
957					(tp->busy != 0))
958					break;
959				vm_page_test_dirty(tp);
960				if ((tp->dirty & tp->valid) == 0) {
961					tp->oflags &= ~VPO_CLEANCHK;
962					break;
963				}
964				mab[ i - 1 ] = tp;
965				maxb++;
966				continue;
967			}
968			break;
969		}
970	}
971
972	for(i = 0; i < maxb; i++) {
973		int index = (maxb - i) - 1;
974		ma[index] = mab[i];
975		ma[index]->oflags &= ~VPO_CLEANCHK;
976	}
977	p->oflags &= ~VPO_CLEANCHK;
978	ma[maxb] = p;
979	for(i = 0; i < maxf; i++) {
980		int index = (maxb + i) + 1;
981		ma[index] = maf[i];
982		ma[index]->oflags &= ~VPO_CLEANCHK;
983	}
984	runlen = maxb + maxf + 1;
985
986	vm_pageout_flush(ma, runlen, pagerflags);
987	for (i = 0; i < runlen; i++) {
988		if (ma[i]->valid & ma[i]->dirty) {
989			pmap_remove_write(ma[i]);
990			ma[i]->oflags |= VPO_CLEANCHK;
991
992			/*
993			 * maxf will end up being the actual number of pages
994			 * we wrote out contiguously, non-inclusive of the
995			 * first page.  We do not count look-behind pages.
996			 */
997			if (i >= maxb + 1 && (maxf > i - maxb - 1))
998				maxf = i - maxb - 1;
999		}
1000	}
1001	return(maxf + 1);
1002}
1003
1004/*
1005 * Note that there is absolutely no sense in writing out
1006 * anonymous objects, so we track down the vnode object
1007 * to write out.
1008 * We invalidate (remove) all pages from the address space
1009 * for semantic correctness.
1010 *
1011 * Note: certain anonymous maps, such as MAP_NOSYNC maps,
1012 * may start out with a NULL object.
1013 */
1014void
1015vm_object_sync(vm_object_t object, vm_ooffset_t offset, vm_size_t size,
1016    boolean_t syncio, boolean_t invalidate)
1017{
1018	vm_object_t backing_object;
1019	struct vnode *vp;
1020	struct mount *mp;
1021	int flags;
1022
1023	if (object == NULL)
1024		return;
1025	VM_OBJECT_LOCK(object);
1026	while ((backing_object = object->backing_object) != NULL) {
1027		VM_OBJECT_LOCK(backing_object);
1028		offset += object->backing_object_offset;
1029		VM_OBJECT_UNLOCK(object);
1030		object = backing_object;
1031		if (object->size < OFF_TO_IDX(offset + size))
1032			size = IDX_TO_OFF(object->size) - offset;
1033	}
1034	/*
1035	 * Flush pages if writing is allowed, invalidate them
1036	 * if invalidation requested.  Pages undergoing I/O
1037	 * will be ignored by vm_object_page_remove().
1038	 *
1039	 * We cannot lock the vnode and then wait for paging
1040	 * to complete without deadlocking against vm_fault.
1041	 * Instead we simply call vm_object_page_remove() and
1042	 * allow it to block internally on a page-by-page
1043	 * basis when it encounters pages undergoing async
1044	 * I/O.
1045	 */
1046	if (object->type == OBJT_VNODE &&
1047	    (object->flags & OBJ_MIGHTBEDIRTY) != 0) {
1048		int vfslocked;
1049		vp = object->handle;
1050		VM_OBJECT_UNLOCK(object);
1051		(void) vn_start_write(vp, &mp, V_WAIT);
1052		vfslocked = VFS_LOCK_GIANT(vp->v_mount);
1053		vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
1054		flags = (syncio || invalidate) ? OBJPC_SYNC : 0;
1055		flags |= invalidate ? OBJPC_INVAL : 0;
1056		VM_OBJECT_LOCK(object);
1057		vm_object_page_clean(object,
1058		    OFF_TO_IDX(offset),
1059		    OFF_TO_IDX(offset + size + PAGE_MASK),
1060		    flags);
1061		VM_OBJECT_UNLOCK(object);
1062		VOP_UNLOCK(vp, 0);
1063		VFS_UNLOCK_GIANT(vfslocked);
1064		vn_finished_write(mp);
1065		VM_OBJECT_LOCK(object);
1066	}
1067	if ((object->type == OBJT_VNODE ||
1068	     object->type == OBJT_DEVICE) && invalidate) {
1069		boolean_t purge;
1070		purge = old_msync || (object->type == OBJT_DEVICE);
1071		vm_object_page_remove(object,
1072		    OFF_TO_IDX(offset),
1073		    OFF_TO_IDX(offset + size + PAGE_MASK),
1074		    purge ? FALSE : TRUE);
1075	}
1076	VM_OBJECT_UNLOCK(object);
1077}
1078
1079/*
1080 *	vm_object_madvise:
1081 *
1082 *	Implements the madvise function at the object/page level.
1083 *
1084 *	MADV_WILLNEED	(any object)
1085 *
1086 *	    Activate the specified pages if they are resident.
1087 *
1088 *	MADV_DONTNEED	(any object)
1089 *
1090 *	    Deactivate the specified pages if they are resident.
1091 *
1092 *	MADV_FREE	(OBJT_DEFAULT/OBJT_SWAP objects,
1093 *			 OBJ_ONEMAPPING only)
1094 *
1095 *	    Deactivate and clean the specified pages if they are
1096 *	    resident.  This permits the process to reuse the pages
1097 *	    without faulting or the kernel to reclaim the pages
1098 *	    without I/O.
1099 */
1100void
1101vm_object_madvise(vm_object_t object, vm_pindex_t pindex, int count, int advise)
1102{
1103	vm_pindex_t end, tpindex;
1104	vm_object_t backing_object, tobject;
1105	vm_page_t m;
1106
1107	if (object == NULL)
1108		return;
1109	VM_OBJECT_LOCK(object);
1110	end = pindex + count;
1111	/*
1112	 * Locate and adjust resident pages
1113	 */
1114	for (; pindex < end; pindex += 1) {
1115relookup:
1116		tobject = object;
1117		tpindex = pindex;
1118shadowlookup:
1119		/*
1120		 * MADV_FREE only operates on OBJT_DEFAULT or OBJT_SWAP pages
1121		 * and those pages must be OBJ_ONEMAPPING.
1122		 */
1123		if (advise == MADV_FREE) {
1124			if ((tobject->type != OBJT_DEFAULT &&
1125			     tobject->type != OBJT_SWAP) ||
1126			    (tobject->flags & OBJ_ONEMAPPING) == 0) {
1127				goto unlock_tobject;
1128			}
1129		}
1130		m = vm_page_lookup(tobject, tpindex);
1131		if (m == NULL && advise == MADV_WILLNEED) {
1132			/*
1133			 * If the page is cached, reactivate it.
1134			 */
1135			m = vm_page_alloc(tobject, tpindex, VM_ALLOC_IFCACHED |
1136			    VM_ALLOC_NOBUSY);
1137		}
1138		if (m == NULL) {
1139			/*
1140			 * There may be swap even if there is no backing page
1141			 */
1142			if (advise == MADV_FREE && tobject->type == OBJT_SWAP)
1143				swap_pager_freespace(tobject, tpindex, 1);
1144			/*
1145			 * next object
1146			 */
1147			backing_object = tobject->backing_object;
1148			if (backing_object == NULL)
1149				goto unlock_tobject;
1150			VM_OBJECT_LOCK(backing_object);
1151			tpindex += OFF_TO_IDX(tobject->backing_object_offset);
1152			if (tobject != object)
1153				VM_OBJECT_UNLOCK(tobject);
1154			tobject = backing_object;
1155			goto shadowlookup;
1156		}
1157		/*
1158		 * If the page is busy or not in a normal active state,
1159		 * we skip it.  If the page is not managed there are no
1160		 * page queues to mess with.  Things can break if we mess
1161		 * with pages in any of the below states.
1162		 */
1163		vm_page_lock_queues();
1164		if (m->hold_count ||
1165		    m->wire_count ||
1166		    (m->flags & PG_UNMANAGED) ||
1167		    m->valid != VM_PAGE_BITS_ALL) {
1168			vm_page_unlock_queues();
1169			goto unlock_tobject;
1170		}
1171		if ((m->oflags & VPO_BUSY) || m->busy) {
1172			vm_page_flag_set(m, PG_REFERENCED);
1173			vm_page_unlock_queues();
1174			if (object != tobject)
1175				VM_OBJECT_UNLOCK(object);
1176			m->oflags |= VPO_WANTED;
1177			msleep(m, VM_OBJECT_MTX(tobject), PDROP | PVM, "madvpo", 0);
1178			VM_OBJECT_LOCK(object);
1179  			goto relookup;
1180		}
1181		if (advise == MADV_WILLNEED) {
1182			vm_page_activate(m);
1183		} else if (advise == MADV_DONTNEED) {
1184			vm_page_dontneed(m);
1185		} else if (advise == MADV_FREE) {
1186			/*
1187			 * Mark the page clean.  This will allow the page
1188			 * to be freed up by the system.  However, such pages
1189			 * are often reused quickly by malloc()/free()
1190			 * so we do not do anything that would cause
1191			 * a page fault if we can help it.
1192			 *
1193			 * Specifically, we do not try to actually free
1194			 * the page now nor do we try to put it in the
1195			 * cache (which would cause a page fault on reuse).
1196			 *
1197			 * But we do make the page is freeable as we
1198			 * can without actually taking the step of unmapping
1199			 * it.
1200			 */
1201			pmap_clear_modify(m);
1202			m->dirty = 0;
1203			m->act_count = 0;
1204			vm_page_dontneed(m);
1205		}
1206		vm_page_unlock_queues();
1207		if (advise == MADV_FREE && tobject->type == OBJT_SWAP)
1208			swap_pager_freespace(tobject, tpindex, 1);
1209unlock_tobject:
1210		if (tobject != object)
1211			VM_OBJECT_UNLOCK(tobject);
1212	}
1213	VM_OBJECT_UNLOCK(object);
1214}
1215
1216/*
1217 *	vm_object_shadow:
1218 *
1219 *	Create a new object which is backed by the
1220 *	specified existing object range.  The source
1221 *	object reference is deallocated.
1222 *
1223 *	The new object and offset into that object
1224 *	are returned in the source parameters.
1225 */
1226void
1227vm_object_shadow(
1228	vm_object_t *object,	/* IN/OUT */
1229	vm_ooffset_t *offset,	/* IN/OUT */
1230	vm_size_t length)
1231{
1232	vm_object_t source;
1233	vm_object_t result;
1234
1235	source = *object;
1236
1237	/*
1238	 * Don't create the new object if the old object isn't shared.
1239	 */
1240	if (source != NULL) {
1241		VM_OBJECT_LOCK(source);
1242		if (source->ref_count == 1 &&
1243		    source->handle == NULL &&
1244		    (source->type == OBJT_DEFAULT ||
1245		     source->type == OBJT_SWAP)) {
1246			VM_OBJECT_UNLOCK(source);
1247			return;
1248		}
1249		VM_OBJECT_UNLOCK(source);
1250	}
1251
1252	/*
1253	 * Allocate a new object with the given length.
1254	 */
1255	result = vm_object_allocate(OBJT_DEFAULT, length);
1256
1257	/*
1258	 * The new object shadows the source object, adding a reference to it.
1259	 * Our caller changes his reference to point to the new object,
1260	 * removing a reference to the source object.  Net result: no change
1261	 * of reference count.
1262	 *
1263	 * Try to optimize the result object's page color when shadowing
1264	 * in order to maintain page coloring consistency in the combined
1265	 * shadowed object.
1266	 */
1267	result->backing_object = source;
1268	/*
1269	 * Store the offset into the source object, and fix up the offset into
1270	 * the new object.
1271	 */
1272	result->backing_object_offset = *offset;
1273	if (source != NULL) {
1274		VM_OBJECT_LOCK(source);
1275		LIST_INSERT_HEAD(&source->shadow_head, result, shadow_list);
1276		source->shadow_count++;
1277		source->generation++;
1278#if VM_NRESERVLEVEL > 0
1279		result->flags |= source->flags & (OBJ_NEEDGIANT | OBJ_COLORED);
1280		result->pg_color = (source->pg_color + OFF_TO_IDX(*offset)) &
1281		    ((1 << (VM_NFREEORDER - 1)) - 1);
1282#else
1283		result->flags |= source->flags & OBJ_NEEDGIANT;
1284#endif
1285		VM_OBJECT_UNLOCK(source);
1286	}
1287
1288
1289	/*
1290	 * Return the new things
1291	 */
1292	*offset = 0;
1293	*object = result;
1294}
1295
1296/*
1297 *	vm_object_split:
1298 *
1299 * Split the pages in a map entry into a new object.  This affords
1300 * easier removal of unused pages, and keeps object inheritance from
1301 * being a negative impact on memory usage.
1302 */
1303void
1304vm_object_split(vm_map_entry_t entry)
1305{
1306	vm_page_t m, m_next;
1307	vm_object_t orig_object, new_object, source;
1308	vm_pindex_t idx, offidxstart;
1309	vm_size_t size;
1310
1311	orig_object = entry->object.vm_object;
1312	if (orig_object->type != OBJT_DEFAULT && orig_object->type != OBJT_SWAP)
1313		return;
1314	if (orig_object->ref_count <= 1)
1315		return;
1316	VM_OBJECT_UNLOCK(orig_object);
1317
1318	offidxstart = OFF_TO_IDX(entry->offset);
1319	size = atop(entry->end - entry->start);
1320
1321	/*
1322	 * If swap_pager_copy() is later called, it will convert new_object
1323	 * into a swap object.
1324	 */
1325	new_object = vm_object_allocate(OBJT_DEFAULT, size);
1326
1327	/*
1328	 * At this point, the new object is still private, so the order in
1329	 * which the original and new objects are locked does not matter.
1330	 */
1331	VM_OBJECT_LOCK(new_object);
1332	VM_OBJECT_LOCK(orig_object);
1333	source = orig_object->backing_object;
1334	if (source != NULL) {
1335		VM_OBJECT_LOCK(source);
1336		if ((source->flags & OBJ_DEAD) != 0) {
1337			VM_OBJECT_UNLOCK(source);
1338			VM_OBJECT_UNLOCK(orig_object);
1339			VM_OBJECT_UNLOCK(new_object);
1340			vm_object_deallocate(new_object);
1341			VM_OBJECT_LOCK(orig_object);
1342			return;
1343		}
1344		LIST_INSERT_HEAD(&source->shadow_head,
1345				  new_object, shadow_list);
1346		source->shadow_count++;
1347		source->generation++;
1348		vm_object_reference_locked(source);	/* for new_object */
1349		vm_object_clear_flag(source, OBJ_ONEMAPPING);
1350		VM_OBJECT_UNLOCK(source);
1351		new_object->backing_object_offset =
1352			orig_object->backing_object_offset + entry->offset;
1353		new_object->backing_object = source;
1354	}
1355	new_object->flags |= orig_object->flags & OBJ_NEEDGIANT;
1356retry:
1357	if ((m = TAILQ_FIRST(&orig_object->memq)) != NULL) {
1358		if (m->pindex < offidxstart) {
1359			m = vm_page_splay(offidxstart, orig_object->root);
1360			if ((orig_object->root = m)->pindex < offidxstart)
1361				m = TAILQ_NEXT(m, listq);
1362		}
1363	}
1364	vm_page_lock_queues();
1365	for (; m != NULL && (idx = m->pindex - offidxstart) < size;
1366	    m = m_next) {
1367		m_next = TAILQ_NEXT(m, listq);
1368
1369		/*
1370		 * We must wait for pending I/O to complete before we can
1371		 * rename the page.
1372		 *
1373		 * We do not have to VM_PROT_NONE the page as mappings should
1374		 * not be changed by this operation.
1375		 */
1376		if ((m->oflags & VPO_BUSY) || m->busy) {
1377			vm_page_flag_set(m, PG_REFERENCED);
1378			vm_page_unlock_queues();
1379			VM_OBJECT_UNLOCK(new_object);
1380			m->oflags |= VPO_WANTED;
1381			msleep(m, VM_OBJECT_MTX(orig_object), PVM, "spltwt", 0);
1382			VM_OBJECT_LOCK(new_object);
1383			goto retry;
1384		}
1385		vm_page_rename(m, new_object, idx);
1386		/* page automatically made dirty by rename and cache handled */
1387		vm_page_busy(m);
1388	}
1389	vm_page_unlock_queues();
1390	if (orig_object->type == OBJT_SWAP) {
1391		/*
1392		 * swap_pager_copy() can sleep, in which case the orig_object's
1393		 * and new_object's locks are released and reacquired.
1394		 */
1395		swap_pager_copy(orig_object, new_object, offidxstart, 0);
1396
1397		/*
1398		 * Transfer any cached pages from orig_object to new_object.
1399		 */
1400		if (__predict_false(orig_object->cache != NULL))
1401			vm_page_cache_transfer(orig_object, offidxstart,
1402			    new_object);
1403	}
1404	VM_OBJECT_UNLOCK(orig_object);
1405	TAILQ_FOREACH(m, &new_object->memq, listq)
1406		vm_page_wakeup(m);
1407	VM_OBJECT_UNLOCK(new_object);
1408	entry->object.vm_object = new_object;
1409	entry->offset = 0LL;
1410	vm_object_deallocate(orig_object);
1411	VM_OBJECT_LOCK(new_object);
1412}
1413
1414#define	OBSC_TEST_ALL_SHADOWED	0x0001
1415#define	OBSC_COLLAPSE_NOWAIT	0x0002
1416#define	OBSC_COLLAPSE_WAIT	0x0004
1417
1418static int
1419vm_object_backing_scan(vm_object_t object, int op)
1420{
1421	int r = 1;
1422	vm_page_t p;
1423	vm_object_t backing_object;
1424	vm_pindex_t backing_offset_index;
1425
1426	VM_OBJECT_LOCK_ASSERT(object, MA_OWNED);
1427	VM_OBJECT_LOCK_ASSERT(object->backing_object, MA_OWNED);
1428
1429	backing_object = object->backing_object;
1430	backing_offset_index = OFF_TO_IDX(object->backing_object_offset);
1431
1432	/*
1433	 * Initial conditions
1434	 */
1435	if (op & OBSC_TEST_ALL_SHADOWED) {
1436		/*
1437		 * We do not want to have to test for the existence of cache
1438		 * or swap pages in the backing object.  XXX but with the
1439		 * new swapper this would be pretty easy to do.
1440		 *
1441		 * XXX what about anonymous MAP_SHARED memory that hasn't
1442		 * been ZFOD faulted yet?  If we do not test for this, the
1443		 * shadow test may succeed! XXX
1444		 */
1445		if (backing_object->type != OBJT_DEFAULT) {
1446			return (0);
1447		}
1448	}
1449	if (op & OBSC_COLLAPSE_WAIT) {
1450		vm_object_set_flag(backing_object, OBJ_DEAD);
1451	}
1452
1453	/*
1454	 * Our scan
1455	 */
1456	p = TAILQ_FIRST(&backing_object->memq);
1457	while (p) {
1458		vm_page_t next = TAILQ_NEXT(p, listq);
1459		vm_pindex_t new_pindex = p->pindex - backing_offset_index;
1460
1461		if (op & OBSC_TEST_ALL_SHADOWED) {
1462			vm_page_t pp;
1463
1464			/*
1465			 * Ignore pages outside the parent object's range
1466			 * and outside the parent object's mapping of the
1467			 * backing object.
1468			 *
1469			 * note that we do not busy the backing object's
1470			 * page.
1471			 */
1472			if (
1473			    p->pindex < backing_offset_index ||
1474			    new_pindex >= object->size
1475			) {
1476				p = next;
1477				continue;
1478			}
1479
1480			/*
1481			 * See if the parent has the page or if the parent's
1482			 * object pager has the page.  If the parent has the
1483			 * page but the page is not valid, the parent's
1484			 * object pager must have the page.
1485			 *
1486			 * If this fails, the parent does not completely shadow
1487			 * the object and we might as well give up now.
1488			 */
1489
1490			pp = vm_page_lookup(object, new_pindex);
1491			if (
1492			    (pp == NULL || pp->valid == 0) &&
1493			    !vm_pager_has_page(object, new_pindex, NULL, NULL)
1494			) {
1495				r = 0;
1496				break;
1497			}
1498		}
1499
1500		/*
1501		 * Check for busy page
1502		 */
1503		if (op & (OBSC_COLLAPSE_WAIT | OBSC_COLLAPSE_NOWAIT)) {
1504			vm_page_t pp;
1505
1506			if (op & OBSC_COLLAPSE_NOWAIT) {
1507				if ((p->oflags & VPO_BUSY) ||
1508				    !p->valid ||
1509				    p->busy) {
1510					p = next;
1511					continue;
1512				}
1513			} else if (op & OBSC_COLLAPSE_WAIT) {
1514				if ((p->oflags & VPO_BUSY) || p->busy) {
1515					vm_page_lock_queues();
1516					vm_page_flag_set(p, PG_REFERENCED);
1517					vm_page_unlock_queues();
1518					VM_OBJECT_UNLOCK(object);
1519					p->oflags |= VPO_WANTED;
1520					msleep(p, VM_OBJECT_MTX(backing_object),
1521					    PDROP | PVM, "vmocol", 0);
1522					VM_OBJECT_LOCK(object);
1523					VM_OBJECT_LOCK(backing_object);
1524					/*
1525					 * If we slept, anything could have
1526					 * happened.  Since the object is
1527					 * marked dead, the backing offset
1528					 * should not have changed so we
1529					 * just restart our scan.
1530					 */
1531					p = TAILQ_FIRST(&backing_object->memq);
1532					continue;
1533				}
1534			}
1535
1536			KASSERT(
1537			    p->object == backing_object,
1538			    ("vm_object_backing_scan: object mismatch")
1539			);
1540
1541			/*
1542			 * Destroy any associated swap
1543			 */
1544			if (backing_object->type == OBJT_SWAP) {
1545				swap_pager_freespace(
1546				    backing_object,
1547				    p->pindex,
1548				    1
1549				);
1550			}
1551
1552			if (
1553			    p->pindex < backing_offset_index ||
1554			    new_pindex >= object->size
1555			) {
1556				/*
1557				 * Page is out of the parent object's range, we
1558				 * can simply destroy it.
1559				 */
1560				vm_page_lock_queues();
1561				KASSERT(!pmap_page_is_mapped(p),
1562				    ("freeing mapped page %p", p));
1563				if (p->wire_count == 0)
1564					vm_page_free(p);
1565				else
1566					vm_page_remove(p);
1567				vm_page_unlock_queues();
1568				p = next;
1569				continue;
1570			}
1571
1572			pp = vm_page_lookup(object, new_pindex);
1573			if (
1574			    pp != NULL ||
1575			    vm_pager_has_page(object, new_pindex, NULL, NULL)
1576			) {
1577				/*
1578				 * page already exists in parent OR swap exists
1579				 * for this location in the parent.  Destroy
1580				 * the original page from the backing object.
1581				 *
1582				 * Leave the parent's page alone
1583				 */
1584				vm_page_lock_queues();
1585				KASSERT(!pmap_page_is_mapped(p),
1586				    ("freeing mapped page %p", p));
1587				if (p->wire_count == 0)
1588					vm_page_free(p);
1589				else
1590					vm_page_remove(p);
1591				vm_page_unlock_queues();
1592				p = next;
1593				continue;
1594			}
1595
1596#if VM_NRESERVLEVEL > 0
1597			/*
1598			 * Rename the reservation.
1599			 */
1600			vm_reserv_rename(p, object, backing_object,
1601			    backing_offset_index);
1602#endif
1603
1604			/*
1605			 * Page does not exist in parent, rename the
1606			 * page from the backing object to the main object.
1607			 *
1608			 * If the page was mapped to a process, it can remain
1609			 * mapped through the rename.
1610			 */
1611			vm_page_lock_queues();
1612			vm_page_rename(p, object, new_pindex);
1613			vm_page_unlock_queues();
1614			/* page automatically made dirty by rename */
1615		}
1616		p = next;
1617	}
1618	return (r);
1619}
1620
1621
1622/*
1623 * this version of collapse allows the operation to occur earlier and
1624 * when paging_in_progress is true for an object...  This is not a complete
1625 * operation, but should plug 99.9% of the rest of the leaks.
1626 */
1627static void
1628vm_object_qcollapse(vm_object_t object)
1629{
1630	vm_object_t backing_object = object->backing_object;
1631
1632	VM_OBJECT_LOCK_ASSERT(object, MA_OWNED);
1633	VM_OBJECT_LOCK_ASSERT(backing_object, MA_OWNED);
1634
1635	if (backing_object->ref_count != 1)
1636		return;
1637
1638	vm_object_backing_scan(object, OBSC_COLLAPSE_NOWAIT);
1639}
1640
1641/*
1642 *	vm_object_collapse:
1643 *
1644 *	Collapse an object with the object backing it.
1645 *	Pages in the backing object are moved into the
1646 *	parent, and the backing object is deallocated.
1647 */
1648void
1649vm_object_collapse(vm_object_t object)
1650{
1651	VM_OBJECT_LOCK_ASSERT(object, MA_OWNED);
1652
1653	while (TRUE) {
1654		vm_object_t backing_object;
1655
1656		/*
1657		 * Verify that the conditions are right for collapse:
1658		 *
1659		 * The object exists and the backing object exists.
1660		 */
1661		if ((backing_object = object->backing_object) == NULL)
1662			break;
1663
1664		/*
1665		 * we check the backing object first, because it is most likely
1666		 * not collapsable.
1667		 */
1668		VM_OBJECT_LOCK(backing_object);
1669		if (backing_object->handle != NULL ||
1670		    (backing_object->type != OBJT_DEFAULT &&
1671		     backing_object->type != OBJT_SWAP) ||
1672		    (backing_object->flags & OBJ_DEAD) ||
1673		    object->handle != NULL ||
1674		    (object->type != OBJT_DEFAULT &&
1675		     object->type != OBJT_SWAP) ||
1676		    (object->flags & OBJ_DEAD)) {
1677			VM_OBJECT_UNLOCK(backing_object);
1678			break;
1679		}
1680
1681		if (
1682		    object->paging_in_progress != 0 ||
1683		    backing_object->paging_in_progress != 0
1684		) {
1685			vm_object_qcollapse(object);
1686			VM_OBJECT_UNLOCK(backing_object);
1687			break;
1688		}
1689		/*
1690		 * We know that we can either collapse the backing object (if
1691		 * the parent is the only reference to it) or (perhaps) have
1692		 * the parent bypass the object if the parent happens to shadow
1693		 * all the resident pages in the entire backing object.
1694		 *
1695		 * This is ignoring pager-backed pages such as swap pages.
1696		 * vm_object_backing_scan fails the shadowing test in this
1697		 * case.
1698		 */
1699		if (backing_object->ref_count == 1) {
1700			/*
1701			 * If there is exactly one reference to the backing
1702			 * object, we can collapse it into the parent.
1703			 */
1704			vm_object_backing_scan(object, OBSC_COLLAPSE_WAIT);
1705
1706#if VM_NRESERVLEVEL > 0
1707			/*
1708			 * Break any reservations from backing_object.
1709			 */
1710			if (__predict_false(!LIST_EMPTY(&backing_object->rvq)))
1711				vm_reserv_break_all(backing_object);
1712#endif
1713
1714			/*
1715			 * Move the pager from backing_object to object.
1716			 */
1717			if (backing_object->type == OBJT_SWAP) {
1718				/*
1719				 * swap_pager_copy() can sleep, in which case
1720				 * the backing_object's and object's locks are
1721				 * released and reacquired.
1722				 */
1723				swap_pager_copy(
1724				    backing_object,
1725				    object,
1726				    OFF_TO_IDX(object->backing_object_offset), TRUE);
1727
1728				/*
1729				 * Free any cached pages from backing_object.
1730				 */
1731				if (__predict_false(backing_object->cache != NULL))
1732					vm_page_cache_free(backing_object, 0, 0);
1733			}
1734			/*
1735			 * Object now shadows whatever backing_object did.
1736			 * Note that the reference to
1737			 * backing_object->backing_object moves from within
1738			 * backing_object to within object.
1739			 */
1740			LIST_REMOVE(object, shadow_list);
1741			backing_object->shadow_count--;
1742			backing_object->generation++;
1743			if (backing_object->backing_object) {
1744				VM_OBJECT_LOCK(backing_object->backing_object);
1745				LIST_REMOVE(backing_object, shadow_list);
1746				LIST_INSERT_HEAD(
1747				    &backing_object->backing_object->shadow_head,
1748				    object, shadow_list);
1749				/*
1750				 * The shadow_count has not changed.
1751				 */
1752				backing_object->backing_object->generation++;
1753				VM_OBJECT_UNLOCK(backing_object->backing_object);
1754			}
1755			object->backing_object = backing_object->backing_object;
1756			object->backing_object_offset +=
1757			    backing_object->backing_object_offset;
1758
1759			/*
1760			 * Discard backing_object.
1761			 *
1762			 * Since the backing object has no pages, no pager left,
1763			 * and no object references within it, all that is
1764			 * necessary is to dispose of it.
1765			 */
1766			KASSERT(backing_object->ref_count == 1, ("backing_object %p was somehow re-referenced during collapse!", backing_object));
1767			VM_OBJECT_UNLOCK(backing_object);
1768
1769			mtx_lock(&vm_object_list_mtx);
1770			TAILQ_REMOVE(
1771			    &vm_object_list,
1772			    backing_object,
1773			    object_list
1774			);
1775			mtx_unlock(&vm_object_list_mtx);
1776
1777			uma_zfree(obj_zone, backing_object);
1778
1779			object_collapses++;
1780		} else {
1781			vm_object_t new_backing_object;
1782
1783			/*
1784			 * If we do not entirely shadow the backing object,
1785			 * there is nothing we can do so we give up.
1786			 */
1787			if (object->resident_page_count != object->size &&
1788			    vm_object_backing_scan(object,
1789			    OBSC_TEST_ALL_SHADOWED) == 0) {
1790				VM_OBJECT_UNLOCK(backing_object);
1791				break;
1792			}
1793
1794			/*
1795			 * Make the parent shadow the next object in the
1796			 * chain.  Deallocating backing_object will not remove
1797			 * it, since its reference count is at least 2.
1798			 */
1799			LIST_REMOVE(object, shadow_list);
1800			backing_object->shadow_count--;
1801			backing_object->generation++;
1802
1803			new_backing_object = backing_object->backing_object;
1804			if ((object->backing_object = new_backing_object) != NULL) {
1805				VM_OBJECT_LOCK(new_backing_object);
1806				LIST_INSERT_HEAD(
1807				    &new_backing_object->shadow_head,
1808				    object,
1809				    shadow_list
1810				);
1811				new_backing_object->shadow_count++;
1812				new_backing_object->generation++;
1813				vm_object_reference_locked(new_backing_object);
1814				VM_OBJECT_UNLOCK(new_backing_object);
1815				object->backing_object_offset +=
1816					backing_object->backing_object_offset;
1817			}
1818
1819			/*
1820			 * Drop the reference count on backing_object. Since
1821			 * its ref_count was at least 2, it will not vanish.
1822			 */
1823			backing_object->ref_count--;
1824			VM_OBJECT_UNLOCK(backing_object);
1825			object_bypasses++;
1826		}
1827
1828		/*
1829		 * Try again with this object's new backing object.
1830		 */
1831	}
1832}
1833
1834/*
1835 *	vm_object_page_remove:
1836 *
1837 *	For the given object, either frees or invalidates each of the
1838 *	specified pages.  In general, a page is freed.  However, if a
1839 *	page is wired for any reason other than the existence of a
1840 *	managed, wired mapping, then it may be invalidated but not
1841 *	removed from the object.  Pages are specified by the given
1842 *	range ["start", "end") and Boolean "clean_only".  As a
1843 *	special case, if "end" is zero, then the range extends from
1844 *	"start" to the end of the object.  If "clean_only" is TRUE,
1845 *	then only the non-dirty pages within the specified range are
1846 *	affected.
1847 *
1848 *	In general, this operation should only be performed on objects
1849 *	that contain managed pages.  There are two exceptions.  First,
1850 *	it may be performed on the kernel and kmem objects.  Second,
1851 *	it may be used by msync(..., MS_INVALIDATE) to invalidate
1852 *	device-backed pages.
1853 *
1854 *	The object must be locked.
1855 */
1856void
1857vm_object_page_remove(vm_object_t object, vm_pindex_t start, vm_pindex_t end,
1858    boolean_t clean_only)
1859{
1860	vm_page_t p, next;
1861	int wirings;
1862
1863	VM_OBJECT_LOCK_ASSERT(object, MA_OWNED);
1864	if (object->resident_page_count == 0)
1865		goto skipmemq;
1866
1867	/*
1868	 * Since physically-backed objects do not use managed pages, we can't
1869	 * remove pages from the object (we must instead remove the page
1870	 * references, and then destroy the object).
1871	 */
1872	KASSERT(object->type != OBJT_PHYS || object == kernel_object ||
1873	    object == kmem_object,
1874	    ("attempt to remove pages from a physical object"));
1875
1876	vm_object_pip_add(object, 1);
1877again:
1878	vm_page_lock_queues();
1879	if ((p = TAILQ_FIRST(&object->memq)) != NULL) {
1880		if (p->pindex < start) {
1881			p = vm_page_splay(start, object->root);
1882			if ((object->root = p)->pindex < start)
1883				p = TAILQ_NEXT(p, listq);
1884		}
1885	}
1886	/*
1887	 * Assert: the variable p is either (1) the page with the
1888	 * least pindex greater than or equal to the parameter pindex
1889	 * or (2) NULL.
1890	 */
1891	for (;
1892	     p != NULL && (p->pindex < end || end == 0);
1893	     p = next) {
1894		next = TAILQ_NEXT(p, listq);
1895
1896		/*
1897		 * If the page is wired for any reason besides the
1898		 * existence of managed, wired mappings, then it cannot
1899		 * be freed.  For example, fictitious pages, which
1900		 * represent device memory, are inherently wired and
1901		 * cannot be freed.  They can, however, be invalidated
1902		 * if "clean_only" is FALSE.
1903		 */
1904		if ((wirings = p->wire_count) != 0 &&
1905		    (wirings = pmap_page_wired_mappings(p)) != p->wire_count) {
1906			/* Fictitious pages do not have managed mappings. */
1907			if ((p->flags & PG_FICTITIOUS) == 0)
1908				pmap_remove_all(p);
1909			/* Account for removal of managed, wired mappings. */
1910			p->wire_count -= wirings;
1911			if (!clean_only)
1912				p->valid = 0;
1913			continue;
1914		}
1915		if (vm_page_sleep_if_busy(p, TRUE, "vmopar"))
1916			goto again;
1917		KASSERT((p->flags & PG_FICTITIOUS) == 0,
1918		    ("vm_object_page_remove: page %p is fictitious", p));
1919		if (clean_only && p->valid) {
1920			pmap_remove_write(p);
1921			if (p->valid & p->dirty)
1922				continue;
1923		}
1924		pmap_remove_all(p);
1925		/* Account for removal of managed, wired mappings. */
1926		if (wirings != 0)
1927			p->wire_count -= wirings;
1928		vm_page_free(p);
1929	}
1930	vm_page_unlock_queues();
1931	vm_object_pip_wakeup(object);
1932skipmemq:
1933	if (__predict_false(object->cache != NULL))
1934		vm_page_cache_free(object, start, end);
1935}
1936
1937/*
1938 *	Routine:	vm_object_coalesce
1939 *	Function:	Coalesces two objects backing up adjoining
1940 *			regions of memory into a single object.
1941 *
1942 *	returns TRUE if objects were combined.
1943 *
1944 *	NOTE:	Only works at the moment if the second object is NULL -
1945 *		if it's not, which object do we lock first?
1946 *
1947 *	Parameters:
1948 *		prev_object	First object to coalesce
1949 *		prev_offset	Offset into prev_object
1950 *		prev_size	Size of reference to prev_object
1951 *		next_size	Size of reference to the second object
1952 *
1953 *	Conditions:
1954 *	The object must *not* be locked.
1955 */
1956boolean_t
1957vm_object_coalesce(vm_object_t prev_object, vm_ooffset_t prev_offset,
1958	vm_size_t prev_size, vm_size_t next_size)
1959{
1960	vm_pindex_t next_pindex;
1961
1962	if (prev_object == NULL)
1963		return (TRUE);
1964	VM_OBJECT_LOCK(prev_object);
1965	if (prev_object->type != OBJT_DEFAULT &&
1966	    prev_object->type != OBJT_SWAP) {
1967		VM_OBJECT_UNLOCK(prev_object);
1968		return (FALSE);
1969	}
1970
1971	/*
1972	 * Try to collapse the object first
1973	 */
1974	vm_object_collapse(prev_object);
1975
1976	/*
1977	 * Can't coalesce if: . more than one reference . paged out . shadows
1978	 * another object . has a copy elsewhere (any of which mean that the
1979	 * pages not mapped to prev_entry may be in use anyway)
1980	 */
1981	if (prev_object->backing_object != NULL) {
1982		VM_OBJECT_UNLOCK(prev_object);
1983		return (FALSE);
1984	}
1985
1986	prev_size >>= PAGE_SHIFT;
1987	next_size >>= PAGE_SHIFT;
1988	next_pindex = OFF_TO_IDX(prev_offset) + prev_size;
1989
1990	if ((prev_object->ref_count > 1) &&
1991	    (prev_object->size != next_pindex)) {
1992		VM_OBJECT_UNLOCK(prev_object);
1993		return (FALSE);
1994	}
1995
1996	/*
1997	 * Remove any pages that may still be in the object from a previous
1998	 * deallocation.
1999	 */
2000	if (next_pindex < prev_object->size) {
2001		vm_object_page_remove(prev_object,
2002				      next_pindex,
2003				      next_pindex + next_size, FALSE);
2004		if (prev_object->type == OBJT_SWAP)
2005			swap_pager_freespace(prev_object,
2006					     next_pindex, next_size);
2007	}
2008
2009	/*
2010	 * Extend the object if necessary.
2011	 */
2012	if (next_pindex + next_size > prev_object->size)
2013		prev_object->size = next_pindex + next_size;
2014
2015	VM_OBJECT_UNLOCK(prev_object);
2016	return (TRUE);
2017}
2018
2019void
2020vm_object_set_writeable_dirty(vm_object_t object)
2021{
2022	struct vnode *vp;
2023
2024	VM_OBJECT_LOCK_ASSERT(object, MA_OWNED);
2025	if ((object->flags & OBJ_MIGHTBEDIRTY) != 0)
2026		return;
2027	vm_object_set_flag(object, OBJ_MIGHTBEDIRTY);
2028	if (object->type == OBJT_VNODE &&
2029	    (vp = (struct vnode *)object->handle) != NULL) {
2030		VI_LOCK(vp);
2031		vp->v_iflag |= VI_OBJDIRTY;
2032		VI_UNLOCK(vp);
2033	}
2034}
2035
2036#include "opt_ddb.h"
2037#ifdef DDB
2038#include <sys/kernel.h>
2039
2040#include <sys/cons.h>
2041
2042#include <ddb/ddb.h>
2043
2044static int
2045_vm_object_in_map(vm_map_t map, vm_object_t object, vm_map_entry_t entry)
2046{
2047	vm_map_t tmpm;
2048	vm_map_entry_t tmpe;
2049	vm_object_t obj;
2050	int entcount;
2051
2052	if (map == 0)
2053		return 0;
2054
2055	if (entry == 0) {
2056		tmpe = map->header.next;
2057		entcount = map->nentries;
2058		while (entcount-- && (tmpe != &map->header)) {
2059			if (_vm_object_in_map(map, object, tmpe)) {
2060				return 1;
2061			}
2062			tmpe = tmpe->next;
2063		}
2064	} else if (entry->eflags & MAP_ENTRY_IS_SUB_MAP) {
2065		tmpm = entry->object.sub_map;
2066		tmpe = tmpm->header.next;
2067		entcount = tmpm->nentries;
2068		while (entcount-- && tmpe != &tmpm->header) {
2069			if (_vm_object_in_map(tmpm, object, tmpe)) {
2070				return 1;
2071			}
2072			tmpe = tmpe->next;
2073		}
2074	} else if ((obj = entry->object.vm_object) != NULL) {
2075		for (; obj; obj = obj->backing_object)
2076			if (obj == object) {
2077				return 1;
2078			}
2079	}
2080	return 0;
2081}
2082
2083static int
2084vm_object_in_map(vm_object_t object)
2085{
2086	struct proc *p;
2087
2088	/* sx_slock(&allproc_lock); */
2089	FOREACH_PROC_IN_SYSTEM(p) {
2090		if (!p->p_vmspace /* || (p->p_flag & (P_SYSTEM|P_WEXIT)) */)
2091			continue;
2092		if (_vm_object_in_map(&p->p_vmspace->vm_map, object, 0)) {
2093			/* sx_sunlock(&allproc_lock); */
2094			return 1;
2095		}
2096	}
2097	/* sx_sunlock(&allproc_lock); */
2098	if (_vm_object_in_map(kernel_map, object, 0))
2099		return 1;
2100	if (_vm_object_in_map(kmem_map, object, 0))
2101		return 1;
2102	if (_vm_object_in_map(pager_map, object, 0))
2103		return 1;
2104	if (_vm_object_in_map(buffer_map, object, 0))
2105		return 1;
2106	return 0;
2107}
2108
2109DB_SHOW_COMMAND(vmochk, vm_object_check)
2110{
2111	vm_object_t object;
2112
2113	/*
2114	 * make sure that internal objs are in a map somewhere
2115	 * and none have zero ref counts.
2116	 */
2117	TAILQ_FOREACH(object, &vm_object_list, object_list) {
2118		if (object->handle == NULL &&
2119		    (object->type == OBJT_DEFAULT || object->type == OBJT_SWAP)) {
2120			if (object->ref_count == 0) {
2121				db_printf("vmochk: internal obj has zero ref count: %ld\n",
2122					(long)object->size);
2123			}
2124			if (!vm_object_in_map(object)) {
2125				db_printf(
2126			"vmochk: internal obj is not in a map: "
2127			"ref: %d, size: %lu: 0x%lx, backing_object: %p\n",
2128				    object->ref_count, (u_long)object->size,
2129				    (u_long)object->size,
2130				    (void *)object->backing_object);
2131			}
2132		}
2133	}
2134}
2135
2136/*
2137 *	vm_object_print:	[ debug ]
2138 */
2139DB_SHOW_COMMAND(object, vm_object_print_static)
2140{
2141	/* XXX convert args. */
2142	vm_object_t object = (vm_object_t)addr;
2143	boolean_t full = have_addr;
2144
2145	vm_page_t p;
2146
2147	/* XXX count is an (unused) arg.  Avoid shadowing it. */
2148#define	count	was_count
2149
2150	int count;
2151
2152	if (object == NULL)
2153		return;
2154
2155	db_iprintf(
2156	    "Object %p: type=%d, size=0x%jx, res=%d, ref=%d, flags=0x%x\n",
2157	    object, (int)object->type, (uintmax_t)object->size,
2158	    object->resident_page_count, object->ref_count, object->flags);
2159	db_iprintf(" sref=%d, backing_object(%d)=(%p)+0x%jx\n",
2160	    object->shadow_count,
2161	    object->backing_object ? object->backing_object->ref_count : 0,
2162	    object->backing_object, (uintmax_t)object->backing_object_offset);
2163
2164	if (!full)
2165		return;
2166
2167	db_indent += 2;
2168	count = 0;
2169	TAILQ_FOREACH(p, &object->memq, listq) {
2170		if (count == 0)
2171			db_iprintf("memory:=");
2172		else if (count == 6) {
2173			db_printf("\n");
2174			db_iprintf(" ...");
2175			count = 0;
2176		} else
2177			db_printf(",");
2178		count++;
2179
2180		db_printf("(off=0x%jx,page=0x%jx)",
2181		    (uintmax_t)p->pindex, (uintmax_t)VM_PAGE_TO_PHYS(p));
2182	}
2183	if (count != 0)
2184		db_printf("\n");
2185	db_indent -= 2;
2186}
2187
2188/* XXX. */
2189#undef count
2190
2191/* XXX need this non-static entry for calling from vm_map_print. */
2192void
2193vm_object_print(
2194        /* db_expr_t */ long addr,
2195	boolean_t have_addr,
2196	/* db_expr_t */ long count,
2197	char *modif)
2198{
2199	vm_object_print_static(addr, have_addr, count, modif);
2200}
2201
2202DB_SHOW_COMMAND(vmopag, vm_object_print_pages)
2203{
2204	vm_object_t object;
2205	int nl = 0;
2206	int c;
2207
2208	TAILQ_FOREACH(object, &vm_object_list, object_list) {
2209		vm_pindex_t idx, fidx;
2210		vm_pindex_t osize;
2211		vm_paddr_t pa = -1;
2212		int rcount;
2213		vm_page_t m;
2214
2215		db_printf("new object: %p\n", (void *)object);
2216		if (nl > 18) {
2217			c = cngetc();
2218			if (c != ' ')
2219				return;
2220			nl = 0;
2221		}
2222		nl++;
2223		rcount = 0;
2224		fidx = 0;
2225		osize = object->size;
2226		if (osize > 128)
2227			osize = 128;
2228		for (idx = 0; idx < osize; idx++) {
2229			m = vm_page_lookup(object, idx);
2230			if (m == NULL) {
2231				if (rcount) {
2232					db_printf(" index(%ld)run(%d)pa(0x%lx)\n",
2233						(long)fidx, rcount, (long)pa);
2234					if (nl > 18) {
2235						c = cngetc();
2236						if (c != ' ')
2237							return;
2238						nl = 0;
2239					}
2240					nl++;
2241					rcount = 0;
2242				}
2243				continue;
2244			}
2245
2246
2247			if (rcount &&
2248				(VM_PAGE_TO_PHYS(m) == pa + rcount * PAGE_SIZE)) {
2249				++rcount;
2250				continue;
2251			}
2252			if (rcount) {
2253				db_printf(" index(%ld)run(%d)pa(0x%lx)\n",
2254					(long)fidx, rcount, (long)pa);
2255				if (nl > 18) {
2256					c = cngetc();
2257					if (c != ' ')
2258						return;
2259					nl = 0;
2260				}
2261				nl++;
2262			}
2263			fidx = idx;
2264			pa = VM_PAGE_TO_PHYS(m);
2265			rcount = 1;
2266		}
2267		if (rcount) {
2268			db_printf(" index(%ld)run(%d)pa(0x%lx)\n",
2269				(long)fidx, rcount, (long)pa);
2270			if (nl > 18) {
2271				c = cngetc();
2272				if (c != ' ')
2273					return;
2274				nl = 0;
2275			}
2276			nl++;
2277		}
2278	}
2279}
2280#endif /* DDB */
2281