vm_object.c revision 218304
1/*-
2 * Copyright (c) 1991, 1993
3 *	The Regents of the University of California.  All rights reserved.
4 *
5 * This code is derived from software contributed to Berkeley by
6 * The Mach Operating System project at Carnegie-Mellon University.
7 *
8 * Redistribution and use in source and binary forms, with or without
9 * modification, are permitted provided that the following conditions
10 * are met:
11 * 1. Redistributions of source code must retain the above copyright
12 *    notice, this list of conditions and the following disclaimer.
13 * 2. Redistributions in binary form must reproduce the above copyright
14 *    notice, this list of conditions and the following disclaimer in the
15 *    documentation and/or other materials provided with the distribution.
16 * 4. Neither the name of the University nor the names of its contributors
17 *    may be used to endorse or promote products derived from this software
18 *    without specific prior written permission.
19 *
20 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
21 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
22 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
23 * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
24 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
25 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
26 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
27 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
28 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
29 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
30 * SUCH DAMAGE.
31 *
32 *	from: @(#)vm_object.c	8.5 (Berkeley) 3/22/94
33 *
34 *
35 * Copyright (c) 1987, 1990 Carnegie-Mellon University.
36 * All rights reserved.
37 *
38 * Authors: Avadis Tevanian, Jr., Michael Wayne Young
39 *
40 * Permission to use, copy, modify and distribute this software and
41 * its documentation is hereby granted, provided that both the copyright
42 * notice and this permission notice appear in all copies of the
43 * software, derivative works or modified versions, and any portions
44 * thereof, and that both notices appear in supporting documentation.
45 *
46 * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
47 * CONDITION.  CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND
48 * FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
49 *
50 * Carnegie Mellon requests users of this software to return to
51 *
52 *  Software Distribution Coordinator  or  Software.Distribution@CS.CMU.EDU
53 *  School of Computer Science
54 *  Carnegie Mellon University
55 *  Pittsburgh PA 15213-3890
56 *
57 * any improvements or extensions that they make and grant Carnegie the
58 * rights to redistribute these changes.
59 */
60
61/*
62 *	Virtual memory object module.
63 */
64
65#include <sys/cdefs.h>
66__FBSDID("$FreeBSD: head/sys/vm/vm_object.c 218304 2011-02-04 21:49:24Z alc $");
67
68#include "opt_vm.h"
69
70#include <sys/param.h>
71#include <sys/systm.h>
72#include <sys/lock.h>
73#include <sys/mman.h>
74#include <sys/mount.h>
75#include <sys/kernel.h>
76#include <sys/sysctl.h>
77#include <sys/mutex.h>
78#include <sys/proc.h>		/* for curproc, pageproc */
79#include <sys/socket.h>
80#include <sys/resourcevar.h>
81#include <sys/vnode.h>
82#include <sys/vmmeter.h>
83#include <sys/sx.h>
84
85#include <vm/vm.h>
86#include <vm/vm_param.h>
87#include <vm/pmap.h>
88#include <vm/vm_map.h>
89#include <vm/vm_object.h>
90#include <vm/vm_page.h>
91#include <vm/vm_pageout.h>
92#include <vm/vm_pager.h>
93#include <vm/swap_pager.h>
94#include <vm/vm_kern.h>
95#include <vm/vm_extern.h>
96#include <vm/vm_reserv.h>
97#include <vm/uma.h>
98
99static int old_msync;
100SYSCTL_INT(_vm, OID_AUTO, old_msync, CTLFLAG_RW, &old_msync, 0,
101    "Use old (insecure) msync behavior");
102
103static int	vm_object_page_collect_flush(vm_object_t object, vm_page_t p,
104		    int pagerflags, int flags, int *clearobjflags);
105static boolean_t vm_object_page_remove_write(vm_page_t p, int flags,
106		    int *clearobjflags);
107static void	vm_object_qcollapse(vm_object_t object);
108static void	vm_object_vndeallocate(vm_object_t object);
109
110/*
111 *	Virtual memory objects maintain the actual data
112 *	associated with allocated virtual memory.  A given
113 *	page of memory exists within exactly one object.
114 *
115 *	An object is only deallocated when all "references"
116 *	are given up.  Only one "reference" to a given
117 *	region of an object should be writeable.
118 *
119 *	Associated with each object is a list of all resident
120 *	memory pages belonging to that object; this list is
121 *	maintained by the "vm_page" module, and locked by the object's
122 *	lock.
123 *
124 *	Each object also records a "pager" routine which is
125 *	used to retrieve (and store) pages to the proper backing
126 *	storage.  In addition, objects may be backed by other
127 *	objects from which they were virtual-copied.
128 *
129 *	The only items within the object structure which are
130 *	modified after time of creation are:
131 *		reference count		locked by object's lock
132 *		pager routine		locked by object's lock
133 *
134 */
135
136struct object_q vm_object_list;
137struct mtx vm_object_list_mtx;	/* lock for object list and count */
138
139struct vm_object kernel_object_store;
140struct vm_object kmem_object_store;
141
142SYSCTL_NODE(_vm_stats, OID_AUTO, object, CTLFLAG_RD, 0, "VM object stats");
143
144static long object_collapses;
145SYSCTL_LONG(_vm_stats_object, OID_AUTO, collapses, CTLFLAG_RD,
146    &object_collapses, 0, "VM object collapses");
147
148static long object_bypasses;
149SYSCTL_LONG(_vm_stats_object, OID_AUTO, bypasses, CTLFLAG_RD,
150    &object_bypasses, 0, "VM object bypasses");
151
152static uma_zone_t obj_zone;
153
154static int vm_object_zinit(void *mem, int size, int flags);
155
156#ifdef INVARIANTS
157static void vm_object_zdtor(void *mem, int size, void *arg);
158
159static void
160vm_object_zdtor(void *mem, int size, void *arg)
161{
162	vm_object_t object;
163
164	object = (vm_object_t)mem;
165	KASSERT(TAILQ_EMPTY(&object->memq),
166	    ("object %p has resident pages",
167	    object));
168#if VM_NRESERVLEVEL > 0
169	KASSERT(LIST_EMPTY(&object->rvq),
170	    ("object %p has reservations",
171	    object));
172#endif
173	KASSERT(object->cache == NULL,
174	    ("object %p has cached pages",
175	    object));
176	KASSERT(object->paging_in_progress == 0,
177	    ("object %p paging_in_progress = %d",
178	    object, object->paging_in_progress));
179	KASSERT(object->resident_page_count == 0,
180	    ("object %p resident_page_count = %d",
181	    object, object->resident_page_count));
182	KASSERT(object->shadow_count == 0,
183	    ("object %p shadow_count = %d",
184	    object, object->shadow_count));
185}
186#endif
187
188static int
189vm_object_zinit(void *mem, int size, int flags)
190{
191	vm_object_t object;
192
193	object = (vm_object_t)mem;
194	bzero(&object->mtx, sizeof(object->mtx));
195	VM_OBJECT_LOCK_INIT(object, "standard object");
196
197	/* These are true for any object that has been freed */
198	object->paging_in_progress = 0;
199	object->resident_page_count = 0;
200	object->shadow_count = 0;
201	return (0);
202}
203
204void
205_vm_object_allocate(objtype_t type, vm_pindex_t size, vm_object_t object)
206{
207
208	TAILQ_INIT(&object->memq);
209	LIST_INIT(&object->shadow_head);
210
211	object->root = NULL;
212	object->type = type;
213	object->size = size;
214	object->generation = 1;
215	object->ref_count = 1;
216	object->memattr = VM_MEMATTR_DEFAULT;
217	object->flags = 0;
218	object->cred = NULL;
219	object->charge = 0;
220	if ((object->type == OBJT_DEFAULT) || (object->type == OBJT_SWAP))
221		object->flags = OBJ_ONEMAPPING;
222	object->pg_color = 0;
223	object->handle = NULL;
224	object->backing_object = NULL;
225	object->backing_object_offset = (vm_ooffset_t) 0;
226#if VM_NRESERVLEVEL > 0
227	LIST_INIT(&object->rvq);
228#endif
229	object->cache = NULL;
230
231	mtx_lock(&vm_object_list_mtx);
232	TAILQ_INSERT_TAIL(&vm_object_list, object, object_list);
233	mtx_unlock(&vm_object_list_mtx);
234}
235
236/*
237 *	vm_object_init:
238 *
239 *	Initialize the VM objects module.
240 */
241void
242vm_object_init(void)
243{
244	TAILQ_INIT(&vm_object_list);
245	mtx_init(&vm_object_list_mtx, "vm object_list", NULL, MTX_DEF);
246
247	VM_OBJECT_LOCK_INIT(kernel_object, "kernel object");
248	_vm_object_allocate(OBJT_PHYS, OFF_TO_IDX(VM_MAX_KERNEL_ADDRESS - VM_MIN_KERNEL_ADDRESS),
249	    kernel_object);
250#if VM_NRESERVLEVEL > 0
251	kernel_object->flags |= OBJ_COLORED;
252	kernel_object->pg_color = (u_short)atop(VM_MIN_KERNEL_ADDRESS);
253#endif
254
255	VM_OBJECT_LOCK_INIT(kmem_object, "kmem object");
256	_vm_object_allocate(OBJT_PHYS, OFF_TO_IDX(VM_MAX_KERNEL_ADDRESS - VM_MIN_KERNEL_ADDRESS),
257	    kmem_object);
258#if VM_NRESERVLEVEL > 0
259	kmem_object->flags |= OBJ_COLORED;
260	kmem_object->pg_color = (u_short)atop(VM_MIN_KERNEL_ADDRESS);
261#endif
262
263	/*
264	 * The lock portion of struct vm_object must be type stable due
265	 * to vm_pageout_fallback_object_lock locking a vm object
266	 * without holding any references to it.
267	 */
268	obj_zone = uma_zcreate("VM OBJECT", sizeof (struct vm_object), NULL,
269#ifdef INVARIANTS
270	    vm_object_zdtor,
271#else
272	    NULL,
273#endif
274	    vm_object_zinit, NULL, UMA_ALIGN_PTR, UMA_ZONE_VM|UMA_ZONE_NOFREE);
275}
276
277void
278vm_object_clear_flag(vm_object_t object, u_short bits)
279{
280
281	VM_OBJECT_LOCK_ASSERT(object, MA_OWNED);
282	object->flags &= ~bits;
283}
284
285/*
286 *	Sets the default memory attribute for the specified object.  Pages
287 *	that are allocated to this object are by default assigned this memory
288 *	attribute.
289 *
290 *	Presently, this function must be called before any pages are allocated
291 *	to the object.  In the future, this requirement may be relaxed for
292 *	"default" and "swap" objects.
293 */
294int
295vm_object_set_memattr(vm_object_t object, vm_memattr_t memattr)
296{
297
298	VM_OBJECT_LOCK_ASSERT(object, MA_OWNED);
299	switch (object->type) {
300	case OBJT_DEFAULT:
301	case OBJT_DEVICE:
302	case OBJT_PHYS:
303	case OBJT_SG:
304	case OBJT_SWAP:
305	case OBJT_VNODE:
306		if (!TAILQ_EMPTY(&object->memq))
307			return (KERN_FAILURE);
308		break;
309	case OBJT_DEAD:
310		return (KERN_INVALID_ARGUMENT);
311	}
312	object->memattr = memattr;
313	return (KERN_SUCCESS);
314}
315
316void
317vm_object_pip_add(vm_object_t object, short i)
318{
319
320	VM_OBJECT_LOCK_ASSERT(object, MA_OWNED);
321	object->paging_in_progress += i;
322}
323
324void
325vm_object_pip_subtract(vm_object_t object, short i)
326{
327
328	VM_OBJECT_LOCK_ASSERT(object, MA_OWNED);
329	object->paging_in_progress -= i;
330}
331
332void
333vm_object_pip_wakeup(vm_object_t object)
334{
335
336	VM_OBJECT_LOCK_ASSERT(object, MA_OWNED);
337	object->paging_in_progress--;
338	if ((object->flags & OBJ_PIPWNT) && object->paging_in_progress == 0) {
339		vm_object_clear_flag(object, OBJ_PIPWNT);
340		wakeup(object);
341	}
342}
343
344void
345vm_object_pip_wakeupn(vm_object_t object, short i)
346{
347
348	VM_OBJECT_LOCK_ASSERT(object, MA_OWNED);
349	if (i)
350		object->paging_in_progress -= i;
351	if ((object->flags & OBJ_PIPWNT) && object->paging_in_progress == 0) {
352		vm_object_clear_flag(object, OBJ_PIPWNT);
353		wakeup(object);
354	}
355}
356
357void
358vm_object_pip_wait(vm_object_t object, char *waitid)
359{
360
361	VM_OBJECT_LOCK_ASSERT(object, MA_OWNED);
362	while (object->paging_in_progress) {
363		object->flags |= OBJ_PIPWNT;
364		msleep(object, VM_OBJECT_MTX(object), PVM, waitid, 0);
365	}
366}
367
368/*
369 *	vm_object_allocate:
370 *
371 *	Returns a new object with the given size.
372 */
373vm_object_t
374vm_object_allocate(objtype_t type, vm_pindex_t size)
375{
376	vm_object_t object;
377
378	object = (vm_object_t)uma_zalloc(obj_zone, M_WAITOK);
379	_vm_object_allocate(type, size, object);
380	return (object);
381}
382
383
384/*
385 *	vm_object_reference:
386 *
387 *	Gets another reference to the given object.  Note: OBJ_DEAD
388 *	objects can be referenced during final cleaning.
389 */
390void
391vm_object_reference(vm_object_t object)
392{
393	if (object == NULL)
394		return;
395	VM_OBJECT_LOCK(object);
396	vm_object_reference_locked(object);
397	VM_OBJECT_UNLOCK(object);
398}
399
400/*
401 *	vm_object_reference_locked:
402 *
403 *	Gets another reference to the given object.
404 *
405 *	The object must be locked.
406 */
407void
408vm_object_reference_locked(vm_object_t object)
409{
410	struct vnode *vp;
411
412	VM_OBJECT_LOCK_ASSERT(object, MA_OWNED);
413	object->ref_count++;
414	if (object->type == OBJT_VNODE) {
415		vp = object->handle;
416		vref(vp);
417	}
418}
419
420/*
421 * Handle deallocating an object of type OBJT_VNODE.
422 */
423static void
424vm_object_vndeallocate(vm_object_t object)
425{
426	struct vnode *vp = (struct vnode *) object->handle;
427
428	VFS_ASSERT_GIANT(vp->v_mount);
429	VM_OBJECT_LOCK_ASSERT(object, MA_OWNED);
430	KASSERT(object->type == OBJT_VNODE,
431	    ("vm_object_vndeallocate: not a vnode object"));
432	KASSERT(vp != NULL, ("vm_object_vndeallocate: missing vp"));
433#ifdef INVARIANTS
434	if (object->ref_count == 0) {
435		vprint("vm_object_vndeallocate", vp);
436		panic("vm_object_vndeallocate: bad object reference count");
437	}
438#endif
439
440	object->ref_count--;
441	if (object->ref_count == 0) {
442		mp_fixme("Unlocked vflag access.");
443		vp->v_vflag &= ~VV_TEXT;
444	}
445	VM_OBJECT_UNLOCK(object);
446	/*
447	 * vrele may need a vop lock
448	 */
449	vrele(vp);
450}
451
452/*
453 *	vm_object_deallocate:
454 *
455 *	Release a reference to the specified object,
456 *	gained either through a vm_object_allocate
457 *	or a vm_object_reference call.  When all references
458 *	are gone, storage associated with this object
459 *	may be relinquished.
460 *
461 *	No object may be locked.
462 */
463void
464vm_object_deallocate(vm_object_t object)
465{
466	vm_object_t temp;
467
468	while (object != NULL) {
469		int vfslocked;
470
471		vfslocked = 0;
472	restart:
473		VM_OBJECT_LOCK(object);
474		if (object->type == OBJT_VNODE) {
475			struct vnode *vp = (struct vnode *) object->handle;
476
477			/*
478			 * Conditionally acquire Giant for a vnode-backed
479			 * object.  We have to be careful since the type of
480			 * a vnode object can change while the object is
481			 * unlocked.
482			 */
483			if (VFS_NEEDSGIANT(vp->v_mount) && !vfslocked) {
484				vfslocked = 1;
485				if (!mtx_trylock(&Giant)) {
486					VM_OBJECT_UNLOCK(object);
487					mtx_lock(&Giant);
488					goto restart;
489				}
490			}
491			vm_object_vndeallocate(object);
492			VFS_UNLOCK_GIANT(vfslocked);
493			return;
494		} else
495			/*
496			 * This is to handle the case that the object
497			 * changed type while we dropped its lock to
498			 * obtain Giant.
499			 */
500			VFS_UNLOCK_GIANT(vfslocked);
501
502		KASSERT(object->ref_count != 0,
503			("vm_object_deallocate: object deallocated too many times: %d", object->type));
504
505		/*
506		 * If the reference count goes to 0 we start calling
507		 * vm_object_terminate() on the object chain.
508		 * A ref count of 1 may be a special case depending on the
509		 * shadow count being 0 or 1.
510		 */
511		object->ref_count--;
512		if (object->ref_count > 1) {
513			VM_OBJECT_UNLOCK(object);
514			return;
515		} else if (object->ref_count == 1) {
516			if (object->shadow_count == 0 &&
517			    object->handle == NULL &&
518			    (object->type == OBJT_DEFAULT ||
519			     object->type == OBJT_SWAP)) {
520				vm_object_set_flag(object, OBJ_ONEMAPPING);
521			} else if ((object->shadow_count == 1) &&
522			    (object->handle == NULL) &&
523			    (object->type == OBJT_DEFAULT ||
524			     object->type == OBJT_SWAP)) {
525				vm_object_t robject;
526
527				robject = LIST_FIRST(&object->shadow_head);
528				KASSERT(robject != NULL,
529				    ("vm_object_deallocate: ref_count: %d, shadow_count: %d",
530					 object->ref_count,
531					 object->shadow_count));
532				if (!VM_OBJECT_TRYLOCK(robject)) {
533					/*
534					 * Avoid a potential deadlock.
535					 */
536					object->ref_count++;
537					VM_OBJECT_UNLOCK(object);
538					/*
539					 * More likely than not the thread
540					 * holding robject's lock has lower
541					 * priority than the current thread.
542					 * Let the lower priority thread run.
543					 */
544					pause("vmo_de", 1);
545					continue;
546				}
547				/*
548				 * Collapse object into its shadow unless its
549				 * shadow is dead.  In that case, object will
550				 * be deallocated by the thread that is
551				 * deallocating its shadow.
552				 */
553				if ((robject->flags & OBJ_DEAD) == 0 &&
554				    (robject->handle == NULL) &&
555				    (robject->type == OBJT_DEFAULT ||
556				     robject->type == OBJT_SWAP)) {
557
558					robject->ref_count++;
559retry:
560					if (robject->paging_in_progress) {
561						VM_OBJECT_UNLOCK(object);
562						vm_object_pip_wait(robject,
563						    "objde1");
564						temp = robject->backing_object;
565						if (object == temp) {
566							VM_OBJECT_LOCK(object);
567							goto retry;
568						}
569					} else if (object->paging_in_progress) {
570						VM_OBJECT_UNLOCK(robject);
571						object->flags |= OBJ_PIPWNT;
572						msleep(object,
573						    VM_OBJECT_MTX(object),
574						    PDROP | PVM, "objde2", 0);
575						VM_OBJECT_LOCK(robject);
576						temp = robject->backing_object;
577						if (object == temp) {
578							VM_OBJECT_LOCK(object);
579							goto retry;
580						}
581					} else
582						VM_OBJECT_UNLOCK(object);
583
584					if (robject->ref_count == 1) {
585						robject->ref_count--;
586						object = robject;
587						goto doterm;
588					}
589					object = robject;
590					vm_object_collapse(object);
591					VM_OBJECT_UNLOCK(object);
592					continue;
593				}
594				VM_OBJECT_UNLOCK(robject);
595			}
596			VM_OBJECT_UNLOCK(object);
597			return;
598		}
599doterm:
600		temp = object->backing_object;
601		if (temp != NULL) {
602			VM_OBJECT_LOCK(temp);
603			LIST_REMOVE(object, shadow_list);
604			temp->shadow_count--;
605			VM_OBJECT_UNLOCK(temp);
606			object->backing_object = NULL;
607		}
608		/*
609		 * Don't double-terminate, we could be in a termination
610		 * recursion due to the terminate having to sync data
611		 * to disk.
612		 */
613		if ((object->flags & OBJ_DEAD) == 0)
614			vm_object_terminate(object);
615		else
616			VM_OBJECT_UNLOCK(object);
617		object = temp;
618	}
619}
620
621/*
622 *	vm_object_destroy removes the object from the global object list
623 *      and frees the space for the object.
624 */
625void
626vm_object_destroy(vm_object_t object)
627{
628
629	/*
630	 * Remove the object from the global object list.
631	 */
632	mtx_lock(&vm_object_list_mtx);
633	TAILQ_REMOVE(&vm_object_list, object, object_list);
634	mtx_unlock(&vm_object_list_mtx);
635
636	/*
637	 * Release the allocation charge.
638	 */
639	if (object->cred != NULL) {
640		KASSERT(object->type == OBJT_DEFAULT ||
641		    object->type == OBJT_SWAP,
642		    ("vm_object_terminate: non-swap obj %p has cred",
643		     object));
644		swap_release_by_cred(object->charge, object->cred);
645		object->charge = 0;
646		crfree(object->cred);
647		object->cred = NULL;
648	}
649
650	/*
651	 * Free the space for the object.
652	 */
653	uma_zfree(obj_zone, object);
654}
655
656/*
657 *	vm_object_terminate actually destroys the specified object, freeing
658 *	up all previously used resources.
659 *
660 *	The object must be locked.
661 *	This routine may block.
662 */
663void
664vm_object_terminate(vm_object_t object)
665{
666	vm_page_t p, p_next;
667
668	VM_OBJECT_LOCK_ASSERT(object, MA_OWNED);
669
670	/*
671	 * Make sure no one uses us.
672	 */
673	vm_object_set_flag(object, OBJ_DEAD);
674
675	/*
676	 * wait for the pageout daemon to be done with the object
677	 */
678	vm_object_pip_wait(object, "objtrm");
679
680	KASSERT(!object->paging_in_progress,
681		("vm_object_terminate: pageout in progress"));
682
683	/*
684	 * Clean and free the pages, as appropriate. All references to the
685	 * object are gone, so we don't need to lock it.
686	 */
687	if (object->type == OBJT_VNODE) {
688		struct vnode *vp = (struct vnode *)object->handle;
689
690		/*
691		 * Clean pages and flush buffers.
692		 */
693		vm_object_page_clean(object, 0, 0, OBJPC_SYNC);
694		VM_OBJECT_UNLOCK(object);
695
696		vinvalbuf(vp, V_SAVE, 0, 0);
697
698		VM_OBJECT_LOCK(object);
699	}
700
701	KASSERT(object->ref_count == 0,
702		("vm_object_terminate: object with references, ref_count=%d",
703		object->ref_count));
704
705	/*
706	 * Free any remaining pageable pages.  This also removes them from the
707	 * paging queues.  However, don't free wired pages, just remove them
708	 * from the object.  Rather than incrementally removing each page from
709	 * the object, the page and object are reset to any empty state.
710	 */
711	TAILQ_FOREACH_SAFE(p, &object->memq, listq, p_next) {
712		KASSERT(!p->busy && (p->oflags & VPO_BUSY) == 0,
713		    ("vm_object_terminate: freeing busy page %p", p));
714		vm_page_lock(p);
715		/*
716		 * Optimize the page's removal from the object by resetting
717		 * its "object" field.  Specifically, if the page is not
718		 * wired, then the effect of this assignment is that
719		 * vm_page_free()'s call to vm_page_remove() will return
720		 * immediately without modifying the page or the object.
721		 */
722		p->object = NULL;
723		if (p->wire_count == 0) {
724			vm_page_free(p);
725			PCPU_INC(cnt.v_pfree);
726		}
727		vm_page_unlock(p);
728	}
729	/*
730	 * If the object contained any pages, then reset it to an empty state.
731	 * None of the object's fields, including "resident_page_count", were
732	 * modified by the preceding loop.
733	 */
734	if (object->resident_page_count != 0) {
735		object->root = NULL;
736		TAILQ_INIT(&object->memq);
737		object->resident_page_count = 0;
738		if (object->type == OBJT_VNODE)
739			vdrop(object->handle);
740	}
741
742#if VM_NRESERVLEVEL > 0
743	if (__predict_false(!LIST_EMPTY(&object->rvq)))
744		vm_reserv_break_all(object);
745#endif
746	if (__predict_false(object->cache != NULL))
747		vm_page_cache_free(object, 0, 0);
748
749	/*
750	 * Let the pager know object is dead.
751	 */
752	vm_pager_deallocate(object);
753	VM_OBJECT_UNLOCK(object);
754
755	vm_object_destroy(object);
756}
757
758/*
759 * Make the page read-only so that we can clear the object flags.  However, if
760 * this is a nosync mmap then the object is likely to stay dirty so do not
761 * mess with the page and do not clear the object flags.  Returns TRUE if the
762 * page should be flushed, and FALSE otherwise.
763 */
764static boolean_t
765vm_object_page_remove_write(vm_page_t p, int flags, int *clearobjflags)
766{
767
768	/*
769	 * If we have been asked to skip nosync pages and this is a
770	 * nosync page, skip it.  Note that the object flags were not
771	 * cleared in this case so we do not have to set them.
772	 */
773	if ((flags & OBJPC_NOSYNC) != 0 && (p->oflags & VPO_NOSYNC) != 0) {
774		*clearobjflags = 0;
775		return (FALSE);
776	} else {
777		pmap_remove_write(p);
778		return (p->dirty != 0);
779	}
780}
781
782/*
783 *	vm_object_page_clean
784 *
785 *	Clean all dirty pages in the specified range of object.  Leaves page
786 * 	on whatever queue it is currently on.   If NOSYNC is set then do not
787 *	write out pages with VPO_NOSYNC set (originally comes from MAP_NOSYNC),
788 *	leaving the object dirty.
789 *
790 *	When stuffing pages asynchronously, allow clustering.  XXX we need a
791 *	synchronous clustering mode implementation.
792 *
793 *	Odd semantics: if start == end, we clean everything.
794 *
795 *	The object must be locked.
796 */
797void
798vm_object_page_clean(vm_object_t object, vm_pindex_t start, vm_pindex_t end,
799    int flags)
800{
801	vm_page_t np, p;
802	vm_pindex_t pi, tend;
803	int clearobjflags, curgeneration, n, pagerflags;
804
805	mtx_assert(&vm_page_queue_mtx, MA_NOTOWNED);
806	VM_OBJECT_LOCK_ASSERT(object, MA_OWNED);
807	KASSERT(object->type == OBJT_VNODE, ("Not a vnode object"));
808	if ((object->flags & OBJ_MIGHTBEDIRTY) == 0 ||
809	    object->resident_page_count == 0)
810		return;
811
812	pagerflags = (flags & (OBJPC_SYNC | OBJPC_INVAL)) != 0 ?
813	    VM_PAGER_PUT_SYNC : VM_PAGER_CLUSTER_OK;
814	pagerflags |= (flags & OBJPC_INVAL) != 0 ? VM_PAGER_PUT_INVAL : 0;
815
816	tend = (end == 0) ? object->size : end;
817	clearobjflags = start == 0 && tend == object->size;
818
819rescan:
820	curgeneration = object->generation;
821
822	for (p = vm_page_find_least(object, start); p != NULL; p = np) {
823		pi = p->pindex;
824		if (pi >= tend)
825			break;
826		np = TAILQ_NEXT(p, listq);
827		if (p->valid == 0)
828			continue;
829		if (vm_page_sleep_if_busy(p, TRUE, "vpcwai")) {
830			if (object->generation != curgeneration)
831				goto rescan;
832			np = vm_page_find_least(object, pi);
833			continue;
834		}
835		if (!vm_object_page_remove_write(p, flags, &clearobjflags))
836			continue;
837
838		n = vm_object_page_collect_flush(object, p, pagerflags,
839		    flags, &clearobjflags);
840		if (object->generation != curgeneration)
841			goto rescan;
842		np = vm_page_find_least(object, pi + n);
843	}
844#if 0
845	VOP_FSYNC(vp, (pagerflags & VM_PAGER_PUT_SYNC) ? MNT_WAIT : 0);
846#endif
847
848	if (clearobjflags)
849		vm_object_clear_flag(object, OBJ_MIGHTBEDIRTY);
850}
851
852static int
853vm_object_page_collect_flush(vm_object_t object, vm_page_t p, int pagerflags,
854    int flags, int *clearobjflags)
855{
856	vm_page_t ma[vm_pageout_page_count], p_first, tp;
857	int count, i, mreq, runlen;
858
859	mtx_assert(&vm_page_queue_mtx, MA_NOTOWNED);
860	vm_page_lock_assert(p, MA_NOTOWNED);
861	VM_OBJECT_LOCK_ASSERT(object, MA_OWNED);
862
863	count = 1;
864	mreq = 0;
865
866	for (tp = p; count < vm_pageout_page_count; count++) {
867		tp = vm_page_next(tp);
868		if (tp == NULL || tp->busy != 0 || (tp->oflags & VPO_BUSY) != 0)
869			break;
870		if (!vm_object_page_remove_write(tp, flags, clearobjflags))
871			break;
872	}
873
874	for (p_first = p; count < vm_pageout_page_count; count++) {
875		tp = vm_page_prev(p_first);
876		if (tp == NULL || tp->busy != 0 || (tp->oflags & VPO_BUSY) != 0)
877			break;
878		if (!vm_object_page_remove_write(tp, flags, clearobjflags))
879			break;
880		p_first = tp;
881		mreq++;
882	}
883
884	for (tp = p_first, i = 0; i < count; tp = TAILQ_NEXT(tp, listq), i++)
885		ma[i] = tp;
886
887	vm_pageout_flush(ma, count, pagerflags, mreq, &runlen);
888	return (runlen);
889}
890
891/*
892 * Note that there is absolutely no sense in writing out
893 * anonymous objects, so we track down the vnode object
894 * to write out.
895 * We invalidate (remove) all pages from the address space
896 * for semantic correctness.
897 *
898 * Note: certain anonymous maps, such as MAP_NOSYNC maps,
899 * may start out with a NULL object.
900 */
901void
902vm_object_sync(vm_object_t object, vm_ooffset_t offset, vm_size_t size,
903    boolean_t syncio, boolean_t invalidate)
904{
905	vm_object_t backing_object;
906	struct vnode *vp;
907	struct mount *mp;
908	int flags;
909
910	if (object == NULL)
911		return;
912	VM_OBJECT_LOCK(object);
913	while ((backing_object = object->backing_object) != NULL) {
914		VM_OBJECT_LOCK(backing_object);
915		offset += object->backing_object_offset;
916		VM_OBJECT_UNLOCK(object);
917		object = backing_object;
918		if (object->size < OFF_TO_IDX(offset + size))
919			size = IDX_TO_OFF(object->size) - offset;
920	}
921	/*
922	 * Flush pages if writing is allowed, invalidate them
923	 * if invalidation requested.  Pages undergoing I/O
924	 * will be ignored by vm_object_page_remove().
925	 *
926	 * We cannot lock the vnode and then wait for paging
927	 * to complete without deadlocking against vm_fault.
928	 * Instead we simply call vm_object_page_remove() and
929	 * allow it to block internally on a page-by-page
930	 * basis when it encounters pages undergoing async
931	 * I/O.
932	 */
933	if (object->type == OBJT_VNODE &&
934	    (object->flags & OBJ_MIGHTBEDIRTY) != 0) {
935		int vfslocked;
936		vp = object->handle;
937		VM_OBJECT_UNLOCK(object);
938		(void) vn_start_write(vp, &mp, V_WAIT);
939		vfslocked = VFS_LOCK_GIANT(vp->v_mount);
940		vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
941		flags = (syncio || invalidate) ? OBJPC_SYNC : 0;
942		flags |= invalidate ? OBJPC_INVAL : 0;
943		VM_OBJECT_LOCK(object);
944		vm_object_page_clean(object,
945		    OFF_TO_IDX(offset),
946		    OFF_TO_IDX(offset + size + PAGE_MASK),
947		    flags);
948		VM_OBJECT_UNLOCK(object);
949		VOP_UNLOCK(vp, 0);
950		VFS_UNLOCK_GIANT(vfslocked);
951		vn_finished_write(mp);
952		VM_OBJECT_LOCK(object);
953	}
954	if ((object->type == OBJT_VNODE ||
955	     object->type == OBJT_DEVICE) && invalidate) {
956		boolean_t purge;
957		purge = old_msync || (object->type == OBJT_DEVICE);
958		vm_object_page_remove(object,
959		    OFF_TO_IDX(offset),
960		    OFF_TO_IDX(offset + size + PAGE_MASK),
961		    purge ? FALSE : TRUE);
962	}
963	VM_OBJECT_UNLOCK(object);
964}
965
966/*
967 *	vm_object_madvise:
968 *
969 *	Implements the madvise function at the object/page level.
970 *
971 *	MADV_WILLNEED	(any object)
972 *
973 *	    Activate the specified pages if they are resident.
974 *
975 *	MADV_DONTNEED	(any object)
976 *
977 *	    Deactivate the specified pages if they are resident.
978 *
979 *	MADV_FREE	(OBJT_DEFAULT/OBJT_SWAP objects,
980 *			 OBJ_ONEMAPPING only)
981 *
982 *	    Deactivate and clean the specified pages if they are
983 *	    resident.  This permits the process to reuse the pages
984 *	    without faulting or the kernel to reclaim the pages
985 *	    without I/O.
986 */
987void
988vm_object_madvise(vm_object_t object, vm_pindex_t pindex, int count, int advise)
989{
990	vm_pindex_t end, tpindex;
991	vm_object_t backing_object, tobject;
992	vm_page_t m;
993
994	if (object == NULL)
995		return;
996	VM_OBJECT_LOCK(object);
997	end = pindex + count;
998	/*
999	 * Locate and adjust resident pages
1000	 */
1001	for (; pindex < end; pindex += 1) {
1002relookup:
1003		tobject = object;
1004		tpindex = pindex;
1005shadowlookup:
1006		/*
1007		 * MADV_FREE only operates on OBJT_DEFAULT or OBJT_SWAP pages
1008		 * and those pages must be OBJ_ONEMAPPING.
1009		 */
1010		if (advise == MADV_FREE) {
1011			if ((tobject->type != OBJT_DEFAULT &&
1012			     tobject->type != OBJT_SWAP) ||
1013			    (tobject->flags & OBJ_ONEMAPPING) == 0) {
1014				goto unlock_tobject;
1015			}
1016		} else if (tobject->type == OBJT_PHYS)
1017			goto unlock_tobject;
1018		m = vm_page_lookup(tobject, tpindex);
1019		if (m == NULL && advise == MADV_WILLNEED) {
1020			/*
1021			 * If the page is cached, reactivate it.
1022			 */
1023			m = vm_page_alloc(tobject, tpindex, VM_ALLOC_IFCACHED |
1024			    VM_ALLOC_NOBUSY);
1025		}
1026		if (m == NULL) {
1027			/*
1028			 * There may be swap even if there is no backing page
1029			 */
1030			if (advise == MADV_FREE && tobject->type == OBJT_SWAP)
1031				swap_pager_freespace(tobject, tpindex, 1);
1032			/*
1033			 * next object
1034			 */
1035			backing_object = tobject->backing_object;
1036			if (backing_object == NULL)
1037				goto unlock_tobject;
1038			VM_OBJECT_LOCK(backing_object);
1039			tpindex += OFF_TO_IDX(tobject->backing_object_offset);
1040			if (tobject != object)
1041				VM_OBJECT_UNLOCK(tobject);
1042			tobject = backing_object;
1043			goto shadowlookup;
1044		} else if (m->valid != VM_PAGE_BITS_ALL)
1045			goto unlock_tobject;
1046		/*
1047		 * If the page is not in a normal state, skip it.
1048		 */
1049		vm_page_lock(m);
1050		if (m->hold_count != 0 || m->wire_count != 0) {
1051			vm_page_unlock(m);
1052			goto unlock_tobject;
1053		}
1054		KASSERT((m->flags & (PG_FICTITIOUS | PG_UNMANAGED)) == 0,
1055		    ("vm_object_madvise: page %p is not managed", m));
1056		if ((m->oflags & VPO_BUSY) || m->busy) {
1057			if (advise == MADV_WILLNEED) {
1058				/*
1059				 * Reference the page before unlocking and
1060				 * sleeping so that the page daemon is less
1061				 * likely to reclaim it.
1062				 */
1063				vm_page_lock_queues();
1064				vm_page_flag_set(m, PG_REFERENCED);
1065				vm_page_unlock_queues();
1066			}
1067			vm_page_unlock(m);
1068			if (object != tobject)
1069				VM_OBJECT_UNLOCK(object);
1070			m->oflags |= VPO_WANTED;
1071			msleep(m, VM_OBJECT_MTX(tobject), PDROP | PVM, "madvpo",
1072			    0);
1073			VM_OBJECT_LOCK(object);
1074  			goto relookup;
1075		}
1076		if (advise == MADV_WILLNEED) {
1077			vm_page_activate(m);
1078		} else if (advise == MADV_DONTNEED) {
1079			vm_page_dontneed(m);
1080		} else if (advise == MADV_FREE) {
1081			/*
1082			 * Mark the page clean.  This will allow the page
1083			 * to be freed up by the system.  However, such pages
1084			 * are often reused quickly by malloc()/free()
1085			 * so we do not do anything that would cause
1086			 * a page fault if we can help it.
1087			 *
1088			 * Specifically, we do not try to actually free
1089			 * the page now nor do we try to put it in the
1090			 * cache (which would cause a page fault on reuse).
1091			 *
1092			 * But we do make the page is freeable as we
1093			 * can without actually taking the step of unmapping
1094			 * it.
1095			 */
1096			pmap_clear_modify(m);
1097			m->dirty = 0;
1098			m->act_count = 0;
1099			vm_page_dontneed(m);
1100		}
1101		vm_page_unlock(m);
1102		if (advise == MADV_FREE && tobject->type == OBJT_SWAP)
1103			swap_pager_freespace(tobject, tpindex, 1);
1104unlock_tobject:
1105		if (tobject != object)
1106			VM_OBJECT_UNLOCK(tobject);
1107	}
1108	VM_OBJECT_UNLOCK(object);
1109}
1110
1111/*
1112 *	vm_object_shadow:
1113 *
1114 *	Create a new object which is backed by the
1115 *	specified existing object range.  The source
1116 *	object reference is deallocated.
1117 *
1118 *	The new object and offset into that object
1119 *	are returned in the source parameters.
1120 */
1121void
1122vm_object_shadow(
1123	vm_object_t *object,	/* IN/OUT */
1124	vm_ooffset_t *offset,	/* IN/OUT */
1125	vm_size_t length)
1126{
1127	vm_object_t source;
1128	vm_object_t result;
1129
1130	source = *object;
1131
1132	/*
1133	 * Don't create the new object if the old object isn't shared.
1134	 */
1135	if (source != NULL) {
1136		VM_OBJECT_LOCK(source);
1137		if (source->ref_count == 1 &&
1138		    source->handle == NULL &&
1139		    (source->type == OBJT_DEFAULT ||
1140		     source->type == OBJT_SWAP)) {
1141			VM_OBJECT_UNLOCK(source);
1142			return;
1143		}
1144		VM_OBJECT_UNLOCK(source);
1145	}
1146
1147	/*
1148	 * Allocate a new object with the given length.
1149	 */
1150	result = vm_object_allocate(OBJT_DEFAULT, atop(length));
1151
1152	/*
1153	 * The new object shadows the source object, adding a reference to it.
1154	 * Our caller changes his reference to point to the new object,
1155	 * removing a reference to the source object.  Net result: no change
1156	 * of reference count.
1157	 *
1158	 * Try to optimize the result object's page color when shadowing
1159	 * in order to maintain page coloring consistency in the combined
1160	 * shadowed object.
1161	 */
1162	result->backing_object = source;
1163	/*
1164	 * Store the offset into the source object, and fix up the offset into
1165	 * the new object.
1166	 */
1167	result->backing_object_offset = *offset;
1168	if (source != NULL) {
1169		VM_OBJECT_LOCK(source);
1170		LIST_INSERT_HEAD(&source->shadow_head, result, shadow_list);
1171		source->shadow_count++;
1172#if VM_NRESERVLEVEL > 0
1173		result->flags |= source->flags & OBJ_COLORED;
1174		result->pg_color = (source->pg_color + OFF_TO_IDX(*offset)) &
1175		    ((1 << (VM_NFREEORDER - 1)) - 1);
1176#endif
1177		VM_OBJECT_UNLOCK(source);
1178	}
1179
1180
1181	/*
1182	 * Return the new things
1183	 */
1184	*offset = 0;
1185	*object = result;
1186}
1187
1188/*
1189 *	vm_object_split:
1190 *
1191 * Split the pages in a map entry into a new object.  This affords
1192 * easier removal of unused pages, and keeps object inheritance from
1193 * being a negative impact on memory usage.
1194 */
1195void
1196vm_object_split(vm_map_entry_t entry)
1197{
1198	vm_page_t m, m_next;
1199	vm_object_t orig_object, new_object, source;
1200	vm_pindex_t idx, offidxstart;
1201	vm_size_t size;
1202
1203	orig_object = entry->object.vm_object;
1204	if (orig_object->type != OBJT_DEFAULT && orig_object->type != OBJT_SWAP)
1205		return;
1206	if (orig_object->ref_count <= 1)
1207		return;
1208	VM_OBJECT_UNLOCK(orig_object);
1209
1210	offidxstart = OFF_TO_IDX(entry->offset);
1211	size = atop(entry->end - entry->start);
1212
1213	/*
1214	 * If swap_pager_copy() is later called, it will convert new_object
1215	 * into a swap object.
1216	 */
1217	new_object = vm_object_allocate(OBJT_DEFAULT, size);
1218
1219	/*
1220	 * At this point, the new object is still private, so the order in
1221	 * which the original and new objects are locked does not matter.
1222	 */
1223	VM_OBJECT_LOCK(new_object);
1224	VM_OBJECT_LOCK(orig_object);
1225	source = orig_object->backing_object;
1226	if (source != NULL) {
1227		VM_OBJECT_LOCK(source);
1228		if ((source->flags & OBJ_DEAD) != 0) {
1229			VM_OBJECT_UNLOCK(source);
1230			VM_OBJECT_UNLOCK(orig_object);
1231			VM_OBJECT_UNLOCK(new_object);
1232			vm_object_deallocate(new_object);
1233			VM_OBJECT_LOCK(orig_object);
1234			return;
1235		}
1236		LIST_INSERT_HEAD(&source->shadow_head,
1237				  new_object, shadow_list);
1238		source->shadow_count++;
1239		vm_object_reference_locked(source);	/* for new_object */
1240		vm_object_clear_flag(source, OBJ_ONEMAPPING);
1241		VM_OBJECT_UNLOCK(source);
1242		new_object->backing_object_offset =
1243			orig_object->backing_object_offset + entry->offset;
1244		new_object->backing_object = source;
1245	}
1246	if (orig_object->cred != NULL) {
1247		new_object->cred = orig_object->cred;
1248		crhold(orig_object->cred);
1249		new_object->charge = ptoa(size);
1250		KASSERT(orig_object->charge >= ptoa(size),
1251		    ("orig_object->charge < 0"));
1252		orig_object->charge -= ptoa(size);
1253	}
1254retry:
1255	m = vm_page_find_least(orig_object, offidxstart);
1256	for (; m != NULL && (idx = m->pindex - offidxstart) < size;
1257	    m = m_next) {
1258		m_next = TAILQ_NEXT(m, listq);
1259
1260		/*
1261		 * We must wait for pending I/O to complete before we can
1262		 * rename the page.
1263		 *
1264		 * We do not have to VM_PROT_NONE the page as mappings should
1265		 * not be changed by this operation.
1266		 */
1267		if ((m->oflags & VPO_BUSY) || m->busy) {
1268			VM_OBJECT_UNLOCK(new_object);
1269			m->oflags |= VPO_WANTED;
1270			msleep(m, VM_OBJECT_MTX(orig_object), PVM, "spltwt", 0);
1271			VM_OBJECT_LOCK(new_object);
1272			goto retry;
1273		}
1274		vm_page_lock(m);
1275		vm_page_rename(m, new_object, idx);
1276		vm_page_unlock(m);
1277		/* page automatically made dirty by rename and cache handled */
1278		vm_page_busy(m);
1279	}
1280	if (orig_object->type == OBJT_SWAP) {
1281		/*
1282		 * swap_pager_copy() can sleep, in which case the orig_object's
1283		 * and new_object's locks are released and reacquired.
1284		 */
1285		swap_pager_copy(orig_object, new_object, offidxstart, 0);
1286
1287		/*
1288		 * Transfer any cached pages from orig_object to new_object.
1289		 */
1290		if (__predict_false(orig_object->cache != NULL))
1291			vm_page_cache_transfer(orig_object, offidxstart,
1292			    new_object);
1293	}
1294	VM_OBJECT_UNLOCK(orig_object);
1295	TAILQ_FOREACH(m, &new_object->memq, listq)
1296		vm_page_wakeup(m);
1297	VM_OBJECT_UNLOCK(new_object);
1298	entry->object.vm_object = new_object;
1299	entry->offset = 0LL;
1300	vm_object_deallocate(orig_object);
1301	VM_OBJECT_LOCK(new_object);
1302}
1303
1304#define	OBSC_TEST_ALL_SHADOWED	0x0001
1305#define	OBSC_COLLAPSE_NOWAIT	0x0002
1306#define	OBSC_COLLAPSE_WAIT	0x0004
1307
1308static int
1309vm_object_backing_scan(vm_object_t object, int op)
1310{
1311	int r = 1;
1312	vm_page_t p;
1313	vm_object_t backing_object;
1314	vm_pindex_t backing_offset_index;
1315
1316	VM_OBJECT_LOCK_ASSERT(object, MA_OWNED);
1317	VM_OBJECT_LOCK_ASSERT(object->backing_object, MA_OWNED);
1318
1319	backing_object = object->backing_object;
1320	backing_offset_index = OFF_TO_IDX(object->backing_object_offset);
1321
1322	/*
1323	 * Initial conditions
1324	 */
1325	if (op & OBSC_TEST_ALL_SHADOWED) {
1326		/*
1327		 * We do not want to have to test for the existence of cache
1328		 * or swap pages in the backing object.  XXX but with the
1329		 * new swapper this would be pretty easy to do.
1330		 *
1331		 * XXX what about anonymous MAP_SHARED memory that hasn't
1332		 * been ZFOD faulted yet?  If we do not test for this, the
1333		 * shadow test may succeed! XXX
1334		 */
1335		if (backing_object->type != OBJT_DEFAULT) {
1336			return (0);
1337		}
1338	}
1339	if (op & OBSC_COLLAPSE_WAIT) {
1340		vm_object_set_flag(backing_object, OBJ_DEAD);
1341	}
1342
1343	/*
1344	 * Our scan
1345	 */
1346	p = TAILQ_FIRST(&backing_object->memq);
1347	while (p) {
1348		vm_page_t next = TAILQ_NEXT(p, listq);
1349		vm_pindex_t new_pindex = p->pindex - backing_offset_index;
1350
1351		if (op & OBSC_TEST_ALL_SHADOWED) {
1352			vm_page_t pp;
1353
1354			/*
1355			 * Ignore pages outside the parent object's range
1356			 * and outside the parent object's mapping of the
1357			 * backing object.
1358			 *
1359			 * note that we do not busy the backing object's
1360			 * page.
1361			 */
1362			if (
1363			    p->pindex < backing_offset_index ||
1364			    new_pindex >= object->size
1365			) {
1366				p = next;
1367				continue;
1368			}
1369
1370			/*
1371			 * See if the parent has the page or if the parent's
1372			 * object pager has the page.  If the parent has the
1373			 * page but the page is not valid, the parent's
1374			 * object pager must have the page.
1375			 *
1376			 * If this fails, the parent does not completely shadow
1377			 * the object and we might as well give up now.
1378			 */
1379
1380			pp = vm_page_lookup(object, new_pindex);
1381			if (
1382			    (pp == NULL || pp->valid == 0) &&
1383			    !vm_pager_has_page(object, new_pindex, NULL, NULL)
1384			) {
1385				r = 0;
1386				break;
1387			}
1388		}
1389
1390		/*
1391		 * Check for busy page
1392		 */
1393		if (op & (OBSC_COLLAPSE_WAIT | OBSC_COLLAPSE_NOWAIT)) {
1394			vm_page_t pp;
1395
1396			if (op & OBSC_COLLAPSE_NOWAIT) {
1397				if ((p->oflags & VPO_BUSY) ||
1398				    !p->valid ||
1399				    p->busy) {
1400					p = next;
1401					continue;
1402				}
1403			} else if (op & OBSC_COLLAPSE_WAIT) {
1404				if ((p->oflags & VPO_BUSY) || p->busy) {
1405					VM_OBJECT_UNLOCK(object);
1406					p->oflags |= VPO_WANTED;
1407					msleep(p, VM_OBJECT_MTX(backing_object),
1408					    PDROP | PVM, "vmocol", 0);
1409					VM_OBJECT_LOCK(object);
1410					VM_OBJECT_LOCK(backing_object);
1411					/*
1412					 * If we slept, anything could have
1413					 * happened.  Since the object is
1414					 * marked dead, the backing offset
1415					 * should not have changed so we
1416					 * just restart our scan.
1417					 */
1418					p = TAILQ_FIRST(&backing_object->memq);
1419					continue;
1420				}
1421			}
1422
1423			KASSERT(
1424			    p->object == backing_object,
1425			    ("vm_object_backing_scan: object mismatch")
1426			);
1427
1428			/*
1429			 * Destroy any associated swap
1430			 */
1431			if (backing_object->type == OBJT_SWAP) {
1432				swap_pager_freespace(
1433				    backing_object,
1434				    p->pindex,
1435				    1
1436				);
1437			}
1438
1439			if (
1440			    p->pindex < backing_offset_index ||
1441			    new_pindex >= object->size
1442			) {
1443				/*
1444				 * Page is out of the parent object's range, we
1445				 * can simply destroy it.
1446				 */
1447				vm_page_lock(p);
1448				KASSERT(!pmap_page_is_mapped(p),
1449				    ("freeing mapped page %p", p));
1450				if (p->wire_count == 0)
1451					vm_page_free(p);
1452				else
1453					vm_page_remove(p);
1454				vm_page_unlock(p);
1455				p = next;
1456				continue;
1457			}
1458
1459			pp = vm_page_lookup(object, new_pindex);
1460			if (
1461			    pp != NULL ||
1462			    vm_pager_has_page(object, new_pindex, NULL, NULL)
1463			) {
1464				/*
1465				 * page already exists in parent OR swap exists
1466				 * for this location in the parent.  Destroy
1467				 * the original page from the backing object.
1468				 *
1469				 * Leave the parent's page alone
1470				 */
1471				vm_page_lock(p);
1472				KASSERT(!pmap_page_is_mapped(p),
1473				    ("freeing mapped page %p", p));
1474				if (p->wire_count == 0)
1475					vm_page_free(p);
1476				else
1477					vm_page_remove(p);
1478				vm_page_unlock(p);
1479				p = next;
1480				continue;
1481			}
1482
1483#if VM_NRESERVLEVEL > 0
1484			/*
1485			 * Rename the reservation.
1486			 */
1487			vm_reserv_rename(p, object, backing_object,
1488			    backing_offset_index);
1489#endif
1490
1491			/*
1492			 * Page does not exist in parent, rename the
1493			 * page from the backing object to the main object.
1494			 *
1495			 * If the page was mapped to a process, it can remain
1496			 * mapped through the rename.
1497			 */
1498			vm_page_lock(p);
1499			vm_page_rename(p, object, new_pindex);
1500			vm_page_unlock(p);
1501			/* page automatically made dirty by rename */
1502		}
1503		p = next;
1504	}
1505	return (r);
1506}
1507
1508
1509/*
1510 * this version of collapse allows the operation to occur earlier and
1511 * when paging_in_progress is true for an object...  This is not a complete
1512 * operation, but should plug 99.9% of the rest of the leaks.
1513 */
1514static void
1515vm_object_qcollapse(vm_object_t object)
1516{
1517	vm_object_t backing_object = object->backing_object;
1518
1519	VM_OBJECT_LOCK_ASSERT(object, MA_OWNED);
1520	VM_OBJECT_LOCK_ASSERT(backing_object, MA_OWNED);
1521
1522	if (backing_object->ref_count != 1)
1523		return;
1524
1525	vm_object_backing_scan(object, OBSC_COLLAPSE_NOWAIT);
1526}
1527
1528/*
1529 *	vm_object_collapse:
1530 *
1531 *	Collapse an object with the object backing it.
1532 *	Pages in the backing object are moved into the
1533 *	parent, and the backing object is deallocated.
1534 */
1535void
1536vm_object_collapse(vm_object_t object)
1537{
1538	VM_OBJECT_LOCK_ASSERT(object, MA_OWNED);
1539
1540	while (TRUE) {
1541		vm_object_t backing_object;
1542
1543		/*
1544		 * Verify that the conditions are right for collapse:
1545		 *
1546		 * The object exists and the backing object exists.
1547		 */
1548		if ((backing_object = object->backing_object) == NULL)
1549			break;
1550
1551		/*
1552		 * we check the backing object first, because it is most likely
1553		 * not collapsable.
1554		 */
1555		VM_OBJECT_LOCK(backing_object);
1556		if (backing_object->handle != NULL ||
1557		    (backing_object->type != OBJT_DEFAULT &&
1558		     backing_object->type != OBJT_SWAP) ||
1559		    (backing_object->flags & OBJ_DEAD) ||
1560		    object->handle != NULL ||
1561		    (object->type != OBJT_DEFAULT &&
1562		     object->type != OBJT_SWAP) ||
1563		    (object->flags & OBJ_DEAD)) {
1564			VM_OBJECT_UNLOCK(backing_object);
1565			break;
1566		}
1567
1568		if (
1569		    object->paging_in_progress != 0 ||
1570		    backing_object->paging_in_progress != 0
1571		) {
1572			vm_object_qcollapse(object);
1573			VM_OBJECT_UNLOCK(backing_object);
1574			break;
1575		}
1576		/*
1577		 * We know that we can either collapse the backing object (if
1578		 * the parent is the only reference to it) or (perhaps) have
1579		 * the parent bypass the object if the parent happens to shadow
1580		 * all the resident pages in the entire backing object.
1581		 *
1582		 * This is ignoring pager-backed pages such as swap pages.
1583		 * vm_object_backing_scan fails the shadowing test in this
1584		 * case.
1585		 */
1586		if (backing_object->ref_count == 1) {
1587			/*
1588			 * If there is exactly one reference to the backing
1589			 * object, we can collapse it into the parent.
1590			 */
1591			vm_object_backing_scan(object, OBSC_COLLAPSE_WAIT);
1592
1593#if VM_NRESERVLEVEL > 0
1594			/*
1595			 * Break any reservations from backing_object.
1596			 */
1597			if (__predict_false(!LIST_EMPTY(&backing_object->rvq)))
1598				vm_reserv_break_all(backing_object);
1599#endif
1600
1601			/*
1602			 * Move the pager from backing_object to object.
1603			 */
1604			if (backing_object->type == OBJT_SWAP) {
1605				/*
1606				 * swap_pager_copy() can sleep, in which case
1607				 * the backing_object's and object's locks are
1608				 * released and reacquired.
1609				 */
1610				swap_pager_copy(
1611				    backing_object,
1612				    object,
1613				    OFF_TO_IDX(object->backing_object_offset), TRUE);
1614
1615				/*
1616				 * Free any cached pages from backing_object.
1617				 */
1618				if (__predict_false(backing_object->cache != NULL))
1619					vm_page_cache_free(backing_object, 0, 0);
1620			}
1621			/*
1622			 * Object now shadows whatever backing_object did.
1623			 * Note that the reference to
1624			 * backing_object->backing_object moves from within
1625			 * backing_object to within object.
1626			 */
1627			LIST_REMOVE(object, shadow_list);
1628			backing_object->shadow_count--;
1629			if (backing_object->backing_object) {
1630				VM_OBJECT_LOCK(backing_object->backing_object);
1631				LIST_REMOVE(backing_object, shadow_list);
1632				LIST_INSERT_HEAD(
1633				    &backing_object->backing_object->shadow_head,
1634				    object, shadow_list);
1635				/*
1636				 * The shadow_count has not changed.
1637				 */
1638				VM_OBJECT_UNLOCK(backing_object->backing_object);
1639			}
1640			object->backing_object = backing_object->backing_object;
1641			object->backing_object_offset +=
1642			    backing_object->backing_object_offset;
1643
1644			/*
1645			 * Discard backing_object.
1646			 *
1647			 * Since the backing object has no pages, no pager left,
1648			 * and no object references within it, all that is
1649			 * necessary is to dispose of it.
1650			 */
1651			KASSERT(backing_object->ref_count == 1, (
1652"backing_object %p was somehow re-referenced during collapse!",
1653			    backing_object));
1654			VM_OBJECT_UNLOCK(backing_object);
1655			vm_object_destroy(backing_object);
1656
1657			object_collapses++;
1658		} else {
1659			vm_object_t new_backing_object;
1660
1661			/*
1662			 * If we do not entirely shadow the backing object,
1663			 * there is nothing we can do so we give up.
1664			 */
1665			if (object->resident_page_count != object->size &&
1666			    vm_object_backing_scan(object,
1667			    OBSC_TEST_ALL_SHADOWED) == 0) {
1668				VM_OBJECT_UNLOCK(backing_object);
1669				break;
1670			}
1671
1672			/*
1673			 * Make the parent shadow the next object in the
1674			 * chain.  Deallocating backing_object will not remove
1675			 * it, since its reference count is at least 2.
1676			 */
1677			LIST_REMOVE(object, shadow_list);
1678			backing_object->shadow_count--;
1679
1680			new_backing_object = backing_object->backing_object;
1681			if ((object->backing_object = new_backing_object) != NULL) {
1682				VM_OBJECT_LOCK(new_backing_object);
1683				LIST_INSERT_HEAD(
1684				    &new_backing_object->shadow_head,
1685				    object,
1686				    shadow_list
1687				);
1688				new_backing_object->shadow_count++;
1689				vm_object_reference_locked(new_backing_object);
1690				VM_OBJECT_UNLOCK(new_backing_object);
1691				object->backing_object_offset +=
1692					backing_object->backing_object_offset;
1693			}
1694
1695			/*
1696			 * Drop the reference count on backing_object. Since
1697			 * its ref_count was at least 2, it will not vanish.
1698			 */
1699			backing_object->ref_count--;
1700			VM_OBJECT_UNLOCK(backing_object);
1701			object_bypasses++;
1702		}
1703
1704		/*
1705		 * Try again with this object's new backing object.
1706		 */
1707	}
1708}
1709
1710/*
1711 *	vm_object_page_remove:
1712 *
1713 *	For the given object, either frees or invalidates each of the
1714 *	specified pages.  In general, a page is freed.  However, if a
1715 *	page is wired for any reason other than the existence of a
1716 *	managed, wired mapping, then it may be invalidated but not
1717 *	removed from the object.  Pages are specified by the given
1718 *	range ["start", "end") and Boolean "clean_only".  As a
1719 *	special case, if "end" is zero, then the range extends from
1720 *	"start" to the end of the object.  If "clean_only" is TRUE,
1721 *	then only the non-dirty pages within the specified range are
1722 *	affected.
1723 *
1724 *	In general, this operation should only be performed on objects
1725 *	that contain managed pages.  There are two exceptions.  First,
1726 *	it may be performed on the kernel and kmem objects.  Second,
1727 *	it may be used by msync(..., MS_INVALIDATE) to invalidate
1728 *	device-backed pages.  In both of these cases, "clean_only"
1729 *	must be FALSE.
1730 *
1731 *	The object must be locked.
1732 */
1733void
1734vm_object_page_remove(vm_object_t object, vm_pindex_t start, vm_pindex_t end,
1735    boolean_t clean_only)
1736{
1737	vm_page_t p, next;
1738	int wirings;
1739
1740	VM_OBJECT_LOCK_ASSERT(object, MA_OWNED);
1741	if (object->resident_page_count == 0)
1742		goto skipmemq;
1743
1744	/*
1745	 * Since physically-backed objects do not use managed pages, we can't
1746	 * remove pages from the object (we must instead remove the page
1747	 * references, and then destroy the object).
1748	 */
1749	KASSERT(object->type != OBJT_PHYS || object == kernel_object ||
1750	    object == kmem_object,
1751	    ("attempt to remove pages from a physical object"));
1752
1753	vm_object_pip_add(object, 1);
1754again:
1755	p = vm_page_find_least(object, start);
1756
1757	/*
1758	 * Assert: the variable p is either (1) the page with the
1759	 * least pindex greater than or equal to the parameter pindex
1760	 * or (2) NULL.
1761	 */
1762	for (;
1763	     p != NULL && (p->pindex < end || end == 0);
1764	     p = next) {
1765		next = TAILQ_NEXT(p, listq);
1766
1767		/*
1768		 * If the page is wired for any reason besides the
1769		 * existence of managed, wired mappings, then it cannot
1770		 * be freed.  For example, fictitious pages, which
1771		 * represent device memory, are inherently wired and
1772		 * cannot be freed.  They can, however, be invalidated
1773		 * if "clean_only" is FALSE.
1774		 */
1775		vm_page_lock(p);
1776		if ((wirings = p->wire_count) != 0 &&
1777		    (wirings = pmap_page_wired_mappings(p)) != p->wire_count) {
1778			/* Fictitious pages do not have managed mappings. */
1779			if ((p->flags & PG_FICTITIOUS) == 0)
1780				pmap_remove_all(p);
1781			/* Account for removal of managed, wired mappings. */
1782			p->wire_count -= wirings;
1783			if (!clean_only) {
1784				p->valid = 0;
1785				vm_page_undirty(p);
1786			}
1787			vm_page_unlock(p);
1788			continue;
1789		}
1790		if (vm_page_sleep_if_busy(p, TRUE, "vmopar"))
1791			goto again;
1792		KASSERT((p->flags & PG_FICTITIOUS) == 0,
1793		    ("vm_object_page_remove: page %p is fictitious", p));
1794		if (clean_only && p->valid) {
1795			pmap_remove_write(p);
1796			if (p->dirty) {
1797				vm_page_unlock(p);
1798				continue;
1799			}
1800		}
1801		pmap_remove_all(p);
1802		/* Account for removal of managed, wired mappings. */
1803		if (wirings != 0)
1804			p->wire_count -= wirings;
1805		vm_page_free(p);
1806		vm_page_unlock(p);
1807	}
1808	vm_object_pip_wakeup(object);
1809skipmemq:
1810	if (__predict_false(object->cache != NULL))
1811		vm_page_cache_free(object, start, end);
1812}
1813
1814/*
1815 *	Populate the specified range of the object with valid pages.  Returns
1816 *	TRUE if the range is successfully populated and FALSE otherwise.
1817 *
1818 *	Note: This function should be optimized to pass a larger array of
1819 *	pages to vm_pager_get_pages() before it is applied to a non-
1820 *	OBJT_DEVICE object.
1821 *
1822 *	The object must be locked.
1823 */
1824boolean_t
1825vm_object_populate(vm_object_t object, vm_pindex_t start, vm_pindex_t end)
1826{
1827	vm_page_t m, ma[1];
1828	vm_pindex_t pindex;
1829	int rv;
1830
1831	VM_OBJECT_LOCK_ASSERT(object, MA_OWNED);
1832	for (pindex = start; pindex < end; pindex++) {
1833		m = vm_page_grab(object, pindex, VM_ALLOC_NORMAL |
1834		    VM_ALLOC_RETRY);
1835		if (m->valid != VM_PAGE_BITS_ALL) {
1836			ma[0] = m;
1837			rv = vm_pager_get_pages(object, ma, 1, 0);
1838			m = vm_page_lookup(object, pindex);
1839			if (m == NULL)
1840				break;
1841			if (rv != VM_PAGER_OK) {
1842				vm_page_lock(m);
1843				vm_page_free(m);
1844				vm_page_unlock(m);
1845				break;
1846			}
1847		}
1848		/*
1849		 * Keep "m" busy because a subsequent iteration may unlock
1850		 * the object.
1851		 */
1852	}
1853	if (pindex > start) {
1854		m = vm_page_lookup(object, start);
1855		while (m != NULL && m->pindex < pindex) {
1856			vm_page_wakeup(m);
1857			m = TAILQ_NEXT(m, listq);
1858		}
1859	}
1860	return (pindex == end);
1861}
1862
1863/*
1864 *	Routine:	vm_object_coalesce
1865 *	Function:	Coalesces two objects backing up adjoining
1866 *			regions of memory into a single object.
1867 *
1868 *	returns TRUE if objects were combined.
1869 *
1870 *	NOTE:	Only works at the moment if the second object is NULL -
1871 *		if it's not, which object do we lock first?
1872 *
1873 *	Parameters:
1874 *		prev_object	First object to coalesce
1875 *		prev_offset	Offset into prev_object
1876 *		prev_size	Size of reference to prev_object
1877 *		next_size	Size of reference to the second object
1878 *		reserved	Indicator that extension region has
1879 *				swap accounted for
1880 *
1881 *	Conditions:
1882 *	The object must *not* be locked.
1883 */
1884boolean_t
1885vm_object_coalesce(vm_object_t prev_object, vm_ooffset_t prev_offset,
1886    vm_size_t prev_size, vm_size_t next_size, boolean_t reserved)
1887{
1888	vm_pindex_t next_pindex;
1889
1890	if (prev_object == NULL)
1891		return (TRUE);
1892	VM_OBJECT_LOCK(prev_object);
1893	if (prev_object->type != OBJT_DEFAULT &&
1894	    prev_object->type != OBJT_SWAP) {
1895		VM_OBJECT_UNLOCK(prev_object);
1896		return (FALSE);
1897	}
1898
1899	/*
1900	 * Try to collapse the object first
1901	 */
1902	vm_object_collapse(prev_object);
1903
1904	/*
1905	 * Can't coalesce if: . more than one reference . paged out . shadows
1906	 * another object . has a copy elsewhere (any of which mean that the
1907	 * pages not mapped to prev_entry may be in use anyway)
1908	 */
1909	if (prev_object->backing_object != NULL) {
1910		VM_OBJECT_UNLOCK(prev_object);
1911		return (FALSE);
1912	}
1913
1914	prev_size >>= PAGE_SHIFT;
1915	next_size >>= PAGE_SHIFT;
1916	next_pindex = OFF_TO_IDX(prev_offset) + prev_size;
1917
1918	if ((prev_object->ref_count > 1) &&
1919	    (prev_object->size != next_pindex)) {
1920		VM_OBJECT_UNLOCK(prev_object);
1921		return (FALSE);
1922	}
1923
1924	/*
1925	 * Account for the charge.
1926	 */
1927	if (prev_object->cred != NULL) {
1928
1929		/*
1930		 * If prev_object was charged, then this mapping,
1931		 * althought not charged now, may become writable
1932		 * later. Non-NULL cred in the object would prevent
1933		 * swap reservation during enabling of the write
1934		 * access, so reserve swap now. Failed reservation
1935		 * cause allocation of the separate object for the map
1936		 * entry, and swap reservation for this entry is
1937		 * managed in appropriate time.
1938		 */
1939		if (!reserved && !swap_reserve_by_cred(ptoa(next_size),
1940		    prev_object->cred)) {
1941			return (FALSE);
1942		}
1943		prev_object->charge += ptoa(next_size);
1944	}
1945
1946	/*
1947	 * Remove any pages that may still be in the object from a previous
1948	 * deallocation.
1949	 */
1950	if (next_pindex < prev_object->size) {
1951		vm_object_page_remove(prev_object,
1952				      next_pindex,
1953				      next_pindex + next_size, FALSE);
1954		if (prev_object->type == OBJT_SWAP)
1955			swap_pager_freespace(prev_object,
1956					     next_pindex, next_size);
1957#if 0
1958		if (prev_object->cred != NULL) {
1959			KASSERT(prev_object->charge >=
1960			    ptoa(prev_object->size - next_pindex),
1961			    ("object %p overcharged 1 %jx %jx", prev_object,
1962				(uintmax_t)next_pindex, (uintmax_t)next_size));
1963			prev_object->charge -= ptoa(prev_object->size -
1964			    next_pindex);
1965		}
1966#endif
1967	}
1968
1969	/*
1970	 * Extend the object if necessary.
1971	 */
1972	if (next_pindex + next_size > prev_object->size)
1973		prev_object->size = next_pindex + next_size;
1974
1975	VM_OBJECT_UNLOCK(prev_object);
1976	return (TRUE);
1977}
1978
1979void
1980vm_object_set_writeable_dirty(vm_object_t object)
1981{
1982
1983	VM_OBJECT_LOCK_ASSERT(object, MA_OWNED);
1984	if (object->type != OBJT_VNODE)
1985		return;
1986	object->generation++;
1987	if ((object->flags & OBJ_MIGHTBEDIRTY) != 0)
1988		return;
1989	vm_object_set_flag(object, OBJ_MIGHTBEDIRTY);
1990}
1991
1992#include "opt_ddb.h"
1993#ifdef DDB
1994#include <sys/kernel.h>
1995
1996#include <sys/cons.h>
1997
1998#include <ddb/ddb.h>
1999
2000static int
2001_vm_object_in_map(vm_map_t map, vm_object_t object, vm_map_entry_t entry)
2002{
2003	vm_map_t tmpm;
2004	vm_map_entry_t tmpe;
2005	vm_object_t obj;
2006	int entcount;
2007
2008	if (map == 0)
2009		return 0;
2010
2011	if (entry == 0) {
2012		tmpe = map->header.next;
2013		entcount = map->nentries;
2014		while (entcount-- && (tmpe != &map->header)) {
2015			if (_vm_object_in_map(map, object, tmpe)) {
2016				return 1;
2017			}
2018			tmpe = tmpe->next;
2019		}
2020	} else if (entry->eflags & MAP_ENTRY_IS_SUB_MAP) {
2021		tmpm = entry->object.sub_map;
2022		tmpe = tmpm->header.next;
2023		entcount = tmpm->nentries;
2024		while (entcount-- && tmpe != &tmpm->header) {
2025			if (_vm_object_in_map(tmpm, object, tmpe)) {
2026				return 1;
2027			}
2028			tmpe = tmpe->next;
2029		}
2030	} else if ((obj = entry->object.vm_object) != NULL) {
2031		for (; obj; obj = obj->backing_object)
2032			if (obj == object) {
2033				return 1;
2034			}
2035	}
2036	return 0;
2037}
2038
2039static int
2040vm_object_in_map(vm_object_t object)
2041{
2042	struct proc *p;
2043
2044	/* sx_slock(&allproc_lock); */
2045	FOREACH_PROC_IN_SYSTEM(p) {
2046		if (!p->p_vmspace /* || (p->p_flag & (P_SYSTEM|P_WEXIT)) */)
2047			continue;
2048		if (_vm_object_in_map(&p->p_vmspace->vm_map, object, 0)) {
2049			/* sx_sunlock(&allproc_lock); */
2050			return 1;
2051		}
2052	}
2053	/* sx_sunlock(&allproc_lock); */
2054	if (_vm_object_in_map(kernel_map, object, 0))
2055		return 1;
2056	if (_vm_object_in_map(kmem_map, object, 0))
2057		return 1;
2058	if (_vm_object_in_map(pager_map, object, 0))
2059		return 1;
2060	if (_vm_object_in_map(buffer_map, object, 0))
2061		return 1;
2062	return 0;
2063}
2064
2065DB_SHOW_COMMAND(vmochk, vm_object_check)
2066{
2067	vm_object_t object;
2068
2069	/*
2070	 * make sure that internal objs are in a map somewhere
2071	 * and none have zero ref counts.
2072	 */
2073	TAILQ_FOREACH(object, &vm_object_list, object_list) {
2074		if (object->handle == NULL &&
2075		    (object->type == OBJT_DEFAULT || object->type == OBJT_SWAP)) {
2076			if (object->ref_count == 0) {
2077				db_printf("vmochk: internal obj has zero ref count: %ld\n",
2078					(long)object->size);
2079			}
2080			if (!vm_object_in_map(object)) {
2081				db_printf(
2082			"vmochk: internal obj is not in a map: "
2083			"ref: %d, size: %lu: 0x%lx, backing_object: %p\n",
2084				    object->ref_count, (u_long)object->size,
2085				    (u_long)object->size,
2086				    (void *)object->backing_object);
2087			}
2088		}
2089	}
2090}
2091
2092/*
2093 *	vm_object_print:	[ debug ]
2094 */
2095DB_SHOW_COMMAND(object, vm_object_print_static)
2096{
2097	/* XXX convert args. */
2098	vm_object_t object = (vm_object_t)addr;
2099	boolean_t full = have_addr;
2100
2101	vm_page_t p;
2102
2103	/* XXX count is an (unused) arg.  Avoid shadowing it. */
2104#define	count	was_count
2105
2106	int count;
2107
2108	if (object == NULL)
2109		return;
2110
2111	db_iprintf(
2112	    "Object %p: type=%d, size=0x%jx, res=%d, ref=%d, flags=0x%x ruid %d charge %jx\n",
2113	    object, (int)object->type, (uintmax_t)object->size,
2114	    object->resident_page_count, object->ref_count, object->flags,
2115	    object->cred ? object->cred->cr_ruid : -1, (uintmax_t)object->charge);
2116	db_iprintf(" sref=%d, backing_object(%d)=(%p)+0x%jx\n",
2117	    object->shadow_count,
2118	    object->backing_object ? object->backing_object->ref_count : 0,
2119	    object->backing_object, (uintmax_t)object->backing_object_offset);
2120
2121	if (!full)
2122		return;
2123
2124	db_indent += 2;
2125	count = 0;
2126	TAILQ_FOREACH(p, &object->memq, listq) {
2127		if (count == 0)
2128			db_iprintf("memory:=");
2129		else if (count == 6) {
2130			db_printf("\n");
2131			db_iprintf(" ...");
2132			count = 0;
2133		} else
2134			db_printf(",");
2135		count++;
2136
2137		db_printf("(off=0x%jx,page=0x%jx)",
2138		    (uintmax_t)p->pindex, (uintmax_t)VM_PAGE_TO_PHYS(p));
2139	}
2140	if (count != 0)
2141		db_printf("\n");
2142	db_indent -= 2;
2143}
2144
2145/* XXX. */
2146#undef count
2147
2148/* XXX need this non-static entry for calling from vm_map_print. */
2149void
2150vm_object_print(
2151        /* db_expr_t */ long addr,
2152	boolean_t have_addr,
2153	/* db_expr_t */ long count,
2154	char *modif)
2155{
2156	vm_object_print_static(addr, have_addr, count, modif);
2157}
2158
2159DB_SHOW_COMMAND(vmopag, vm_object_print_pages)
2160{
2161	vm_object_t object;
2162	vm_pindex_t fidx;
2163	vm_paddr_t pa;
2164	vm_page_t m, prev_m;
2165	int rcount, nl, c;
2166
2167	nl = 0;
2168	TAILQ_FOREACH(object, &vm_object_list, object_list) {
2169		db_printf("new object: %p\n", (void *)object);
2170		if (nl > 18) {
2171			c = cngetc();
2172			if (c != ' ')
2173				return;
2174			nl = 0;
2175		}
2176		nl++;
2177		rcount = 0;
2178		fidx = 0;
2179		pa = -1;
2180		TAILQ_FOREACH(m, &object->memq, listq) {
2181			if (m->pindex > 128)
2182				break;
2183			if ((prev_m = TAILQ_PREV(m, pglist, listq)) != NULL &&
2184			    prev_m->pindex + 1 != m->pindex) {
2185				if (rcount) {
2186					db_printf(" index(%ld)run(%d)pa(0x%lx)\n",
2187						(long)fidx, rcount, (long)pa);
2188					if (nl > 18) {
2189						c = cngetc();
2190						if (c != ' ')
2191							return;
2192						nl = 0;
2193					}
2194					nl++;
2195					rcount = 0;
2196				}
2197			}
2198			if (rcount &&
2199				(VM_PAGE_TO_PHYS(m) == pa + rcount * PAGE_SIZE)) {
2200				++rcount;
2201				continue;
2202			}
2203			if (rcount) {
2204				db_printf(" index(%ld)run(%d)pa(0x%lx)\n",
2205					(long)fidx, rcount, (long)pa);
2206				if (nl > 18) {
2207					c = cngetc();
2208					if (c != ' ')
2209						return;
2210					nl = 0;
2211				}
2212				nl++;
2213			}
2214			fidx = m->pindex;
2215			pa = VM_PAGE_TO_PHYS(m);
2216			rcount = 1;
2217		}
2218		if (rcount) {
2219			db_printf(" index(%ld)run(%d)pa(0x%lx)\n",
2220				(long)fidx, rcount, (long)pa);
2221			if (nl > 18) {
2222				c = cngetc();
2223				if (c != ' ')
2224					return;
2225				nl = 0;
2226			}
2227			nl++;
2228		}
2229	}
2230}
2231#endif /* DDB */
2232