vm_object.c revision 160960
1/*-
2 * Copyright (c) 1991, 1993
3 *	The Regents of the University of California.  All rights reserved.
4 *
5 * This code is derived from software contributed to Berkeley by
6 * The Mach Operating System project at Carnegie-Mellon University.
7 *
8 * Redistribution and use in source and binary forms, with or without
9 * modification, are permitted provided that the following conditions
10 * are met:
11 * 1. Redistributions of source code must retain the above copyright
12 *    notice, this list of conditions and the following disclaimer.
13 * 2. Redistributions in binary form must reproduce the above copyright
14 *    notice, this list of conditions and the following disclaimer in the
15 *    documentation and/or other materials provided with the distribution.
16 * 4. Neither the name of the University nor the names of its contributors
17 *    may be used to endorse or promote products derived from this software
18 *    without specific prior written permission.
19 *
20 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
21 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
22 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
23 * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
24 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
25 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
26 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
27 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
28 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
29 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
30 * SUCH DAMAGE.
31 *
32 *	from: @(#)vm_object.c	8.5 (Berkeley) 3/22/94
33 *
34 *
35 * Copyright (c) 1987, 1990 Carnegie-Mellon University.
36 * All rights reserved.
37 *
38 * Authors: Avadis Tevanian, Jr., Michael Wayne Young
39 *
40 * Permission to use, copy, modify and distribute this software and
41 * its documentation is hereby granted, provided that both the copyright
42 * notice and this permission notice appear in all copies of the
43 * software, derivative works or modified versions, and any portions
44 * thereof, and that both notices appear in supporting documentation.
45 *
46 * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
47 * CONDITION.  CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND
48 * FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
49 *
50 * Carnegie Mellon requests users of this software to return to
51 *
52 *  Software Distribution Coordinator  or  Software.Distribution@CS.CMU.EDU
53 *  School of Computer Science
54 *  Carnegie Mellon University
55 *  Pittsburgh PA 15213-3890
56 *
57 * any improvements or extensions that they make and grant Carnegie the
58 * rights to redistribute these changes.
59 */
60
61/*
62 *	Virtual memory object module.
63 */
64
65#include <sys/cdefs.h>
66__FBSDID("$FreeBSD: head/sys/vm/vm_object.c 160960 2006-08-03 23:56:11Z alc $");
67
68#include <sys/param.h>
69#include <sys/systm.h>
70#include <sys/lock.h>
71#include <sys/mman.h>
72#include <sys/mount.h>
73#include <sys/kernel.h>
74#include <sys/sysctl.h>
75#include <sys/mutex.h>
76#include <sys/proc.h>		/* for curproc, pageproc */
77#include <sys/socket.h>
78#include <sys/vnode.h>
79#include <sys/vmmeter.h>
80#include <sys/sx.h>
81
82#include <vm/vm.h>
83#include <vm/vm_param.h>
84#include <vm/pmap.h>
85#include <vm/vm_map.h>
86#include <vm/vm_object.h>
87#include <vm/vm_page.h>
88#include <vm/vm_pageout.h>
89#include <vm/vm_pager.h>
90#include <vm/swap_pager.h>
91#include <vm/vm_kern.h>
92#include <vm/vm_extern.h>
93#include <vm/uma.h>
94
95#define EASY_SCAN_FACTOR       8
96
97#define MSYNC_FLUSH_HARDSEQ	0x01
98#define MSYNC_FLUSH_SOFTSEQ	0x02
99
100/*
101 * msync / VM object flushing optimizations
102 */
103static int msync_flush_flags = MSYNC_FLUSH_HARDSEQ | MSYNC_FLUSH_SOFTSEQ;
104SYSCTL_INT(_vm, OID_AUTO, msync_flush_flags,
105        CTLFLAG_RW, &msync_flush_flags, 0, "");
106
107static int old_msync;
108SYSCTL_INT(_vm, OID_AUTO, old_msync, CTLFLAG_RW, &old_msync, 0,
109    "Use old (insecure) msync behavior");
110
111static void	vm_object_qcollapse(vm_object_t object);
112static int	vm_object_page_collect_flush(vm_object_t object, vm_page_t p, int curgeneration, int pagerflags);
113static void	vm_object_vndeallocate(vm_object_t object);
114
115/*
116 *	Virtual memory objects maintain the actual data
117 *	associated with allocated virtual memory.  A given
118 *	page of memory exists within exactly one object.
119 *
120 *	An object is only deallocated when all "references"
121 *	are given up.  Only one "reference" to a given
122 *	region of an object should be writeable.
123 *
124 *	Associated with each object is a list of all resident
125 *	memory pages belonging to that object; this list is
126 *	maintained by the "vm_page" module, and locked by the object's
127 *	lock.
128 *
129 *	Each object also records a "pager" routine which is
130 *	used to retrieve (and store) pages to the proper backing
131 *	storage.  In addition, objects may be backed by other
132 *	objects from which they were virtual-copied.
133 *
134 *	The only items within the object structure which are
135 *	modified after time of creation are:
136 *		reference count		locked by object's lock
137 *		pager routine		locked by object's lock
138 *
139 */
140
141struct object_q vm_object_list;
142struct mtx vm_object_list_mtx;	/* lock for object list and count */
143
144struct vm_object kernel_object_store;
145struct vm_object kmem_object_store;
146
147SYSCTL_DECL(_vm_stats);
148SYSCTL_NODE(_vm_stats, OID_AUTO, object, CTLFLAG_RD, 0, "VM object stats");
149
150static long object_collapses;
151SYSCTL_LONG(_vm_stats_object, OID_AUTO, collapses, CTLFLAG_RD,
152    &object_collapses, 0, "VM object collapses");
153
154static long object_bypasses;
155SYSCTL_LONG(_vm_stats_object, OID_AUTO, bypasses, CTLFLAG_RD,
156    &object_bypasses, 0, "VM object bypasses");
157
158/*
159 * next_index determines the page color that is assigned to the next
160 * allocated object.  Accesses to next_index are not synchronized
161 * because the effects of two or more object allocations using
162 * next_index simultaneously are inconsequential.  At any given time,
163 * numerous objects have the same page color.
164 */
165static int next_index;
166
167static uma_zone_t obj_zone;
168
169static int vm_object_zinit(void *mem, int size, int flags);
170
171#ifdef INVARIANTS
172static void vm_object_zdtor(void *mem, int size, void *arg);
173
174static void
175vm_object_zdtor(void *mem, int size, void *arg)
176{
177	vm_object_t object;
178
179	object = (vm_object_t)mem;
180	KASSERT(TAILQ_EMPTY(&object->memq),
181	    ("object %p has resident pages",
182	    object));
183	KASSERT(object->paging_in_progress == 0,
184	    ("object %p paging_in_progress = %d",
185	    object, object->paging_in_progress));
186	KASSERT(object->resident_page_count == 0,
187	    ("object %p resident_page_count = %d",
188	    object, object->resident_page_count));
189	KASSERT(object->shadow_count == 0,
190	    ("object %p shadow_count = %d",
191	    object, object->shadow_count));
192}
193#endif
194
195static int
196vm_object_zinit(void *mem, int size, int flags)
197{
198	vm_object_t object;
199
200	object = (vm_object_t)mem;
201	bzero(&object->mtx, sizeof(object->mtx));
202	VM_OBJECT_LOCK_INIT(object, "standard object");
203
204	/* These are true for any object that has been freed */
205	object->paging_in_progress = 0;
206	object->resident_page_count = 0;
207	object->shadow_count = 0;
208	return (0);
209}
210
211void
212_vm_object_allocate(objtype_t type, vm_pindex_t size, vm_object_t object)
213{
214	int incr;
215
216	TAILQ_INIT(&object->memq);
217	LIST_INIT(&object->shadow_head);
218
219	object->root = NULL;
220	object->type = type;
221	object->size = size;
222	object->generation = 1;
223	object->ref_count = 1;
224	object->flags = 0;
225	if ((object->type == OBJT_DEFAULT) || (object->type == OBJT_SWAP))
226		object->flags = OBJ_ONEMAPPING;
227	incr = PQ_MAXLENGTH;
228	if (size <= incr)
229		incr = size;
230	object->pg_color = next_index;
231	next_index = (object->pg_color + incr) & PQ_COLORMASK;
232	object->handle = NULL;
233	object->backing_object = NULL;
234	object->backing_object_offset = (vm_ooffset_t) 0;
235
236	mtx_lock(&vm_object_list_mtx);
237	TAILQ_INSERT_TAIL(&vm_object_list, object, object_list);
238	mtx_unlock(&vm_object_list_mtx);
239}
240
241/*
242 *	vm_object_init:
243 *
244 *	Initialize the VM objects module.
245 */
246void
247vm_object_init(void)
248{
249	TAILQ_INIT(&vm_object_list);
250	mtx_init(&vm_object_list_mtx, "vm object_list", NULL, MTX_DEF);
251
252	VM_OBJECT_LOCK_INIT(&kernel_object_store, "kernel object");
253	_vm_object_allocate(OBJT_DEFAULT, OFF_TO_IDX(VM_MAX_KERNEL_ADDRESS - VM_MIN_KERNEL_ADDRESS),
254	    kernel_object);
255
256	VM_OBJECT_LOCK_INIT(&kmem_object_store, "kmem object");
257	_vm_object_allocate(OBJT_DEFAULT, OFF_TO_IDX(VM_MAX_KERNEL_ADDRESS - VM_MIN_KERNEL_ADDRESS),
258	    kmem_object);
259
260	/*
261	 * The lock portion of struct vm_object must be type stable due
262	 * to vm_pageout_fallback_object_lock locking a vm object
263	 * without holding any references to it.
264	 */
265	obj_zone = uma_zcreate("VM OBJECT", sizeof (struct vm_object), NULL,
266#ifdef INVARIANTS
267	    vm_object_zdtor,
268#else
269	    NULL,
270#endif
271	    vm_object_zinit, NULL, UMA_ALIGN_PTR, UMA_ZONE_VM|UMA_ZONE_NOFREE);
272}
273
274void
275vm_object_clear_flag(vm_object_t object, u_short bits)
276{
277
278	VM_OBJECT_LOCK_ASSERT(object, MA_OWNED);
279	object->flags &= ~bits;
280}
281
282void
283vm_object_pip_add(vm_object_t object, short i)
284{
285
286	VM_OBJECT_LOCK_ASSERT(object, MA_OWNED);
287	object->paging_in_progress += i;
288}
289
290void
291vm_object_pip_subtract(vm_object_t object, short i)
292{
293
294	VM_OBJECT_LOCK_ASSERT(object, MA_OWNED);
295	object->paging_in_progress -= i;
296}
297
298void
299vm_object_pip_wakeup(vm_object_t object)
300{
301
302	VM_OBJECT_LOCK_ASSERT(object, MA_OWNED);
303	object->paging_in_progress--;
304	if ((object->flags & OBJ_PIPWNT) && object->paging_in_progress == 0) {
305		vm_object_clear_flag(object, OBJ_PIPWNT);
306		wakeup(object);
307	}
308}
309
310void
311vm_object_pip_wakeupn(vm_object_t object, short i)
312{
313
314	VM_OBJECT_LOCK_ASSERT(object, MA_OWNED);
315	if (i)
316		object->paging_in_progress -= i;
317	if ((object->flags & OBJ_PIPWNT) && object->paging_in_progress == 0) {
318		vm_object_clear_flag(object, OBJ_PIPWNT);
319		wakeup(object);
320	}
321}
322
323void
324vm_object_pip_wait(vm_object_t object, char *waitid)
325{
326
327	VM_OBJECT_LOCK_ASSERT(object, MA_OWNED);
328	while (object->paging_in_progress) {
329		object->flags |= OBJ_PIPWNT;
330		msleep(object, VM_OBJECT_MTX(object), PVM, waitid, 0);
331	}
332}
333
334/*
335 *	vm_object_allocate:
336 *
337 *	Returns a new object with the given size.
338 */
339vm_object_t
340vm_object_allocate(objtype_t type, vm_pindex_t size)
341{
342	vm_object_t object;
343
344	object = (vm_object_t)uma_zalloc(obj_zone, M_WAITOK);
345	_vm_object_allocate(type, size, object);
346	return (object);
347}
348
349
350/*
351 *	vm_object_reference:
352 *
353 *	Gets another reference to the given object.  Note: OBJ_DEAD
354 *	objects can be referenced during final cleaning.
355 */
356void
357vm_object_reference(vm_object_t object)
358{
359	struct vnode *vp;
360
361	if (object == NULL)
362		return;
363	VM_OBJECT_LOCK(object);
364	object->ref_count++;
365	if (object->type == OBJT_VNODE) {
366		int vfslocked;
367
368		vp = object->handle;
369		VM_OBJECT_UNLOCK(object);
370		vfslocked = VFS_LOCK_GIANT(vp->v_mount);
371		vget(vp, LK_RETRY, curthread);
372		VFS_UNLOCK_GIANT(vfslocked);
373	} else
374		VM_OBJECT_UNLOCK(object);
375}
376
377/*
378 *	vm_object_reference_locked:
379 *
380 *	Gets another reference to the given object.
381 *
382 *	The object must be locked.
383 */
384void
385vm_object_reference_locked(vm_object_t object)
386{
387	struct vnode *vp;
388
389	VM_OBJECT_LOCK_ASSERT(object, MA_OWNED);
390	KASSERT((object->flags & OBJ_DEAD) == 0,
391	    ("vm_object_reference_locked: dead object referenced"));
392	object->ref_count++;
393	if (object->type == OBJT_VNODE) {
394		vp = object->handle;
395		vref(vp);
396	}
397}
398
399/*
400 * Handle deallocating an object of type OBJT_VNODE.
401 */
402static void
403vm_object_vndeallocate(vm_object_t object)
404{
405	struct vnode *vp = (struct vnode *) object->handle;
406
407	VFS_ASSERT_GIANT(vp->v_mount);
408	VM_OBJECT_LOCK_ASSERT(object, MA_OWNED);
409	KASSERT(object->type == OBJT_VNODE,
410	    ("vm_object_vndeallocate: not a vnode object"));
411	KASSERT(vp != NULL, ("vm_object_vndeallocate: missing vp"));
412#ifdef INVARIANTS
413	if (object->ref_count == 0) {
414		vprint("vm_object_vndeallocate", vp);
415		panic("vm_object_vndeallocate: bad object reference count");
416	}
417#endif
418
419	object->ref_count--;
420	if (object->ref_count == 0) {
421		mp_fixme("Unlocked vflag access.");
422		vp->v_vflag &= ~VV_TEXT;
423	}
424	VM_OBJECT_UNLOCK(object);
425	/*
426	 * vrele may need a vop lock
427	 */
428	vrele(vp);
429}
430
431/*
432 *	vm_object_deallocate:
433 *
434 *	Release a reference to the specified object,
435 *	gained either through a vm_object_allocate
436 *	or a vm_object_reference call.  When all references
437 *	are gone, storage associated with this object
438 *	may be relinquished.
439 *
440 *	No object may be locked.
441 */
442void
443vm_object_deallocate(vm_object_t object)
444{
445	vm_object_t temp;
446
447	while (object != NULL) {
448		int vfslocked;
449
450		vfslocked = 0;
451	restart:
452		VM_OBJECT_LOCK(object);
453		if (object->type == OBJT_VNODE) {
454			struct vnode *vp = (struct vnode *) object->handle;
455
456			/*
457			 * Conditionally acquire Giant for a vnode-backed
458			 * object.  We have to be careful since the type of
459			 * a vnode object can change while the object is
460			 * unlocked.
461			 */
462			if (VFS_NEEDSGIANT(vp->v_mount) && !vfslocked) {
463				vfslocked = 1;
464				if (!mtx_trylock(&Giant)) {
465					VM_OBJECT_UNLOCK(object);
466					mtx_lock(&Giant);
467					goto restart;
468				}
469			}
470			vm_object_vndeallocate(object);
471			VFS_UNLOCK_GIANT(vfslocked);
472			return;
473		} else
474			/*
475			 * This is to handle the case that the object
476			 * changed type while we dropped its lock to
477			 * obtain Giant.
478			 */
479			VFS_UNLOCK_GIANT(vfslocked);
480
481		KASSERT(object->ref_count != 0,
482			("vm_object_deallocate: object deallocated too many times: %d", object->type));
483
484		/*
485		 * If the reference count goes to 0 we start calling
486		 * vm_object_terminate() on the object chain.
487		 * A ref count of 1 may be a special case depending on the
488		 * shadow count being 0 or 1.
489		 */
490		object->ref_count--;
491		if (object->ref_count > 1) {
492			VM_OBJECT_UNLOCK(object);
493			return;
494		} else if (object->ref_count == 1) {
495			if (object->shadow_count == 0) {
496				vm_object_set_flag(object, OBJ_ONEMAPPING);
497			} else if ((object->shadow_count == 1) &&
498			    (object->handle == NULL) &&
499			    (object->type == OBJT_DEFAULT ||
500			     object->type == OBJT_SWAP)) {
501				vm_object_t robject;
502
503				robject = LIST_FIRST(&object->shadow_head);
504				KASSERT(robject != NULL,
505				    ("vm_object_deallocate: ref_count: %d, shadow_count: %d",
506					 object->ref_count,
507					 object->shadow_count));
508				if (!VM_OBJECT_TRYLOCK(robject)) {
509					/*
510					 * Avoid a potential deadlock.
511					 */
512					object->ref_count++;
513					VM_OBJECT_UNLOCK(object);
514					/*
515					 * More likely than not the thread
516					 * holding robject's lock has lower
517					 * priority than the current thread.
518					 * Let the lower priority thread run.
519					 */
520					tsleep(&proc0, PVM, "vmo_de", 1);
521					continue;
522				}
523				/*
524				 * Collapse object into its shadow unless its
525				 * shadow is dead.  In that case, object will
526				 * be deallocated by the thread that is
527				 * deallocating its shadow.
528				 */
529				if ((robject->flags & OBJ_DEAD) == 0 &&
530				    (robject->handle == NULL) &&
531				    (robject->type == OBJT_DEFAULT ||
532				     robject->type == OBJT_SWAP)) {
533
534					robject->ref_count++;
535retry:
536					if (robject->paging_in_progress) {
537						VM_OBJECT_UNLOCK(object);
538						vm_object_pip_wait(robject,
539						    "objde1");
540						temp = robject->backing_object;
541						if (object == temp) {
542							VM_OBJECT_LOCK(object);
543							goto retry;
544						}
545					} else if (object->paging_in_progress) {
546						VM_OBJECT_UNLOCK(robject);
547						object->flags |= OBJ_PIPWNT;
548						msleep(object,
549						    VM_OBJECT_MTX(object),
550						    PDROP | PVM, "objde2", 0);
551						VM_OBJECT_LOCK(robject);
552						temp = robject->backing_object;
553						if (object == temp) {
554							VM_OBJECT_LOCK(object);
555							goto retry;
556						}
557					} else
558						VM_OBJECT_UNLOCK(object);
559
560					if (robject->ref_count == 1) {
561						robject->ref_count--;
562						object = robject;
563						goto doterm;
564					}
565					object = robject;
566					vm_object_collapse(object);
567					VM_OBJECT_UNLOCK(object);
568					continue;
569				}
570				VM_OBJECT_UNLOCK(robject);
571			}
572			VM_OBJECT_UNLOCK(object);
573			return;
574		}
575doterm:
576		temp = object->backing_object;
577		if (temp != NULL) {
578			VM_OBJECT_LOCK(temp);
579			LIST_REMOVE(object, shadow_list);
580			temp->shadow_count--;
581			temp->generation++;
582			VM_OBJECT_UNLOCK(temp);
583			object->backing_object = NULL;
584		}
585		/*
586		 * Don't double-terminate, we could be in a termination
587		 * recursion due to the terminate having to sync data
588		 * to disk.
589		 */
590		if ((object->flags & OBJ_DEAD) == 0)
591			vm_object_terminate(object);
592		else
593			VM_OBJECT_UNLOCK(object);
594		object = temp;
595	}
596}
597
598/*
599 *	vm_object_terminate actually destroys the specified object, freeing
600 *	up all previously used resources.
601 *
602 *	The object must be locked.
603 *	This routine may block.
604 */
605void
606vm_object_terminate(vm_object_t object)
607{
608	vm_page_t p;
609
610	VM_OBJECT_LOCK_ASSERT(object, MA_OWNED);
611
612	/*
613	 * Make sure no one uses us.
614	 */
615	vm_object_set_flag(object, OBJ_DEAD);
616
617	/*
618	 * wait for the pageout daemon to be done with the object
619	 */
620	vm_object_pip_wait(object, "objtrm");
621
622	KASSERT(!object->paging_in_progress,
623		("vm_object_terminate: pageout in progress"));
624
625	/*
626	 * Clean and free the pages, as appropriate. All references to the
627	 * object are gone, so we don't need to lock it.
628	 */
629	if (object->type == OBJT_VNODE) {
630		struct vnode *vp = (struct vnode *)object->handle;
631
632		/*
633		 * Clean pages and flush buffers.
634		 */
635		vm_object_page_clean(object, 0, 0, OBJPC_SYNC);
636		VM_OBJECT_UNLOCK(object);
637
638		vinvalbuf(vp, V_SAVE, NULL, 0, 0);
639
640		VM_OBJECT_LOCK(object);
641	}
642
643	KASSERT(object->ref_count == 0,
644		("vm_object_terminate: object with references, ref_count=%d",
645		object->ref_count));
646
647	/*
648	 * Now free any remaining pages. For internal objects, this also
649	 * removes them from paging queues. Don't free wired pages, just
650	 * remove them from the object.
651	 */
652	vm_page_lock_queues();
653	while ((p = TAILQ_FIRST(&object->memq)) != NULL) {
654		KASSERT(!p->busy && (p->flags & PG_BUSY) == 0,
655			("vm_object_terminate: freeing busy page %p "
656			"p->busy = %d, p->flags %x\n", p, p->busy, p->flags));
657		if (p->wire_count == 0) {
658			vm_page_free(p);
659			cnt.v_pfree++;
660		} else {
661			vm_page_remove(p);
662		}
663	}
664	vm_page_unlock_queues();
665
666	/*
667	 * Let the pager know object is dead.
668	 */
669	vm_pager_deallocate(object);
670	VM_OBJECT_UNLOCK(object);
671
672	/*
673	 * Remove the object from the global object list.
674	 */
675	mtx_lock(&vm_object_list_mtx);
676	TAILQ_REMOVE(&vm_object_list, object, object_list);
677	mtx_unlock(&vm_object_list_mtx);
678
679	/*
680	 * Free the space for the object.
681	 */
682	uma_zfree(obj_zone, object);
683}
684
685/*
686 *	vm_object_page_clean
687 *
688 *	Clean all dirty pages in the specified range of object.  Leaves page
689 * 	on whatever queue it is currently on.   If NOSYNC is set then do not
690 *	write out pages with PG_NOSYNC set (originally comes from MAP_NOSYNC),
691 *	leaving the object dirty.
692 *
693 *	When stuffing pages asynchronously, allow clustering.  XXX we need a
694 *	synchronous clustering mode implementation.
695 *
696 *	Odd semantics: if start == end, we clean everything.
697 *
698 *	The object must be locked.
699 */
700void
701vm_object_page_clean(vm_object_t object, vm_pindex_t start, vm_pindex_t end, int flags)
702{
703	vm_page_t p, np;
704	vm_pindex_t tstart, tend;
705	vm_pindex_t pi;
706	int clearobjflags;
707	int pagerflags;
708	int curgeneration;
709
710	VM_OBJECT_LOCK_ASSERT(object, MA_OWNED);
711	if (object->type != OBJT_VNODE ||
712		(object->flags & OBJ_MIGHTBEDIRTY) == 0)
713		return;
714
715	pagerflags = (flags & (OBJPC_SYNC | OBJPC_INVAL)) ? VM_PAGER_PUT_SYNC : VM_PAGER_CLUSTER_OK;
716	pagerflags |= (flags & OBJPC_INVAL) ? VM_PAGER_PUT_INVAL : 0;
717
718	vm_object_set_flag(object, OBJ_CLEANING);
719
720	tstart = start;
721	if (end == 0) {
722		tend = object->size;
723	} else {
724		tend = end;
725	}
726
727	vm_page_lock_queues();
728	/*
729	 * If the caller is smart and only msync()s a range he knows is
730	 * dirty, we may be able to avoid an object scan.  This results in
731	 * a phenominal improvement in performance.  We cannot do this
732	 * as a matter of course because the object may be huge - e.g.
733	 * the size might be in the gigabytes or terrabytes.
734	 */
735	if (msync_flush_flags & MSYNC_FLUSH_HARDSEQ) {
736		vm_pindex_t tscan;
737		int scanlimit;
738		int scanreset;
739
740		scanreset = object->resident_page_count / EASY_SCAN_FACTOR;
741		if (scanreset < 16)
742			scanreset = 16;
743		pagerflags |= VM_PAGER_IGNORE_CLEANCHK;
744
745		scanlimit = scanreset;
746		tscan = tstart;
747		while (tscan < tend) {
748			curgeneration = object->generation;
749			p = vm_page_lookup(object, tscan);
750			if (p == NULL || p->valid == 0 ||
751			    VM_PAGE_INQUEUE1(p, PQ_CACHE)) {
752				if (--scanlimit == 0)
753					break;
754				++tscan;
755				continue;
756			}
757			vm_page_test_dirty(p);
758			if ((p->dirty & p->valid) == 0) {
759				if (--scanlimit == 0)
760					break;
761				++tscan;
762				continue;
763			}
764			/*
765			 * If we have been asked to skip nosync pages and
766			 * this is a nosync page, we can't continue.
767			 */
768			if ((flags & OBJPC_NOSYNC) && (p->flags & PG_NOSYNC)) {
769				if (--scanlimit == 0)
770					break;
771				++tscan;
772				continue;
773			}
774			scanlimit = scanreset;
775
776			/*
777			 * This returns 0 if it was unable to busy the first
778			 * page (i.e. had to sleep).
779			 */
780			tscan += vm_object_page_collect_flush(object, p, curgeneration, pagerflags);
781		}
782
783		/*
784		 * If everything was dirty and we flushed it successfully,
785		 * and the requested range is not the entire object, we
786		 * don't have to mess with CLEANCHK or MIGHTBEDIRTY and can
787		 * return immediately.
788		 */
789		if (tscan >= tend && (tstart || tend < object->size)) {
790			vm_page_unlock_queues();
791			vm_object_clear_flag(object, OBJ_CLEANING);
792			return;
793		}
794		pagerflags &= ~VM_PAGER_IGNORE_CLEANCHK;
795	}
796
797	/*
798	 * Generally set CLEANCHK interlock and make the page read-only so
799	 * we can then clear the object flags.
800	 *
801	 * However, if this is a nosync mmap then the object is likely to
802	 * stay dirty so do not mess with the page and do not clear the
803	 * object flags.
804	 */
805	clearobjflags = 1;
806	TAILQ_FOREACH(p, &object->memq, listq) {
807		vm_page_flag_set(p, PG_CLEANCHK);
808		if ((flags & OBJPC_NOSYNC) && (p->flags & PG_NOSYNC))
809			clearobjflags = 0;
810		else
811			pmap_remove_write(p);
812	}
813
814	if (clearobjflags && (tstart == 0) && (tend == object->size)) {
815		struct vnode *vp;
816
817		vm_object_clear_flag(object, OBJ_MIGHTBEDIRTY);
818		if (object->type == OBJT_VNODE &&
819		    (vp = (struct vnode *)object->handle) != NULL) {
820			VI_LOCK(vp);
821			if (vp->v_iflag & VI_OBJDIRTY)
822				vp->v_iflag &= ~VI_OBJDIRTY;
823			VI_UNLOCK(vp);
824		}
825	}
826
827rescan:
828	curgeneration = object->generation;
829
830	for (p = TAILQ_FIRST(&object->memq); p; p = np) {
831		int n;
832
833		np = TAILQ_NEXT(p, listq);
834
835again:
836		pi = p->pindex;
837		if (((p->flags & PG_CLEANCHK) == 0) ||
838			(pi < tstart) || (pi >= tend) ||
839			(p->valid == 0) ||
840		    VM_PAGE_INQUEUE1(p, PQ_CACHE)) {
841			vm_page_flag_clear(p, PG_CLEANCHK);
842			continue;
843		}
844
845		vm_page_test_dirty(p);
846		if ((p->dirty & p->valid) == 0) {
847			vm_page_flag_clear(p, PG_CLEANCHK);
848			continue;
849		}
850
851		/*
852		 * If we have been asked to skip nosync pages and this is a
853		 * nosync page, skip it.  Note that the object flags were
854		 * not cleared in this case so we do not have to set them.
855		 */
856		if ((flags & OBJPC_NOSYNC) && (p->flags & PG_NOSYNC)) {
857			vm_page_flag_clear(p, PG_CLEANCHK);
858			continue;
859		}
860
861		n = vm_object_page_collect_flush(object, p,
862			curgeneration, pagerflags);
863		if (n == 0)
864			goto rescan;
865
866		if (object->generation != curgeneration)
867			goto rescan;
868
869		/*
870		 * Try to optimize the next page.  If we can't we pick up
871		 * our (random) scan where we left off.
872		 */
873		if (msync_flush_flags & MSYNC_FLUSH_SOFTSEQ) {
874			if ((p = vm_page_lookup(object, pi + n)) != NULL)
875				goto again;
876		}
877	}
878	vm_page_unlock_queues();
879#if 0
880	VOP_FSYNC(vp, (pagerflags & VM_PAGER_PUT_SYNC)?MNT_WAIT:0, curproc);
881#endif
882
883	vm_object_clear_flag(object, OBJ_CLEANING);
884	return;
885}
886
887static int
888vm_object_page_collect_flush(vm_object_t object, vm_page_t p, int curgeneration, int pagerflags)
889{
890	int runlen;
891	int maxf;
892	int chkb;
893	int maxb;
894	int i;
895	vm_pindex_t pi;
896	vm_page_t maf[vm_pageout_page_count];
897	vm_page_t mab[vm_pageout_page_count];
898	vm_page_t ma[vm_pageout_page_count];
899
900	mtx_assert(&vm_page_queue_mtx, MA_OWNED);
901	pi = p->pindex;
902	while (vm_page_sleep_if_busy(p, TRUE, "vpcwai")) {
903		vm_page_lock_queues();
904		if (object->generation != curgeneration) {
905			return(0);
906		}
907	}
908	maxf = 0;
909	for(i = 1; i < vm_pageout_page_count; i++) {
910		vm_page_t tp;
911
912		if ((tp = vm_page_lookup(object, pi + i)) != NULL) {
913			if ((tp->flags & PG_BUSY) ||
914				((pagerflags & VM_PAGER_IGNORE_CLEANCHK) == 0 &&
915				 (tp->flags & PG_CLEANCHK) == 0) ||
916				(tp->busy != 0))
917				break;
918			if (VM_PAGE_INQUEUE1(tp, PQ_CACHE)) {
919				vm_page_flag_clear(tp, PG_CLEANCHK);
920				break;
921			}
922			vm_page_test_dirty(tp);
923			if ((tp->dirty & tp->valid) == 0) {
924				vm_page_flag_clear(tp, PG_CLEANCHK);
925				break;
926			}
927			maf[ i - 1 ] = tp;
928			maxf++;
929			continue;
930		}
931		break;
932	}
933
934	maxb = 0;
935	chkb = vm_pageout_page_count -  maxf;
936	if (chkb) {
937		for(i = 1; i < chkb;i++) {
938			vm_page_t tp;
939
940			if ((tp = vm_page_lookup(object, pi - i)) != NULL) {
941				if ((tp->flags & PG_BUSY) ||
942					((pagerflags & VM_PAGER_IGNORE_CLEANCHK) == 0 &&
943					 (tp->flags & PG_CLEANCHK) == 0) ||
944					(tp->busy != 0))
945					break;
946				if (VM_PAGE_INQUEUE1(tp, PQ_CACHE)) {
947					vm_page_flag_clear(tp, PG_CLEANCHK);
948					break;
949				}
950				vm_page_test_dirty(tp);
951				if ((tp->dirty & tp->valid) == 0) {
952					vm_page_flag_clear(tp, PG_CLEANCHK);
953					break;
954				}
955				mab[ i - 1 ] = tp;
956				maxb++;
957				continue;
958			}
959			break;
960		}
961	}
962
963	for(i = 0; i < maxb; i++) {
964		int index = (maxb - i) - 1;
965		ma[index] = mab[i];
966		vm_page_flag_clear(ma[index], PG_CLEANCHK);
967	}
968	vm_page_flag_clear(p, PG_CLEANCHK);
969	ma[maxb] = p;
970	for(i = 0; i < maxf; i++) {
971		int index = (maxb + i) + 1;
972		ma[index] = maf[i];
973		vm_page_flag_clear(ma[index], PG_CLEANCHK);
974	}
975	runlen = maxb + maxf + 1;
976
977	vm_pageout_flush(ma, runlen, pagerflags);
978	for (i = 0; i < runlen; i++) {
979		if (ma[i]->valid & ma[i]->dirty) {
980			pmap_remove_write(ma[i]);
981			vm_page_flag_set(ma[i], PG_CLEANCHK);
982
983			/*
984			 * maxf will end up being the actual number of pages
985			 * we wrote out contiguously, non-inclusive of the
986			 * first page.  We do not count look-behind pages.
987			 */
988			if (i >= maxb + 1 && (maxf > i - maxb - 1))
989				maxf = i - maxb - 1;
990		}
991	}
992	return(maxf + 1);
993}
994
995/*
996 * Note that there is absolutely no sense in writing out
997 * anonymous objects, so we track down the vnode object
998 * to write out.
999 * We invalidate (remove) all pages from the address space
1000 * for semantic correctness.
1001 *
1002 * Note: certain anonymous maps, such as MAP_NOSYNC maps,
1003 * may start out with a NULL object.
1004 */
1005void
1006vm_object_sync(vm_object_t object, vm_ooffset_t offset, vm_size_t size,
1007    boolean_t syncio, boolean_t invalidate)
1008{
1009	vm_object_t backing_object;
1010	struct vnode *vp;
1011	struct mount *mp;
1012	int flags;
1013
1014	if (object == NULL)
1015		return;
1016	VM_OBJECT_LOCK(object);
1017	while ((backing_object = object->backing_object) != NULL) {
1018		VM_OBJECT_LOCK(backing_object);
1019		offset += object->backing_object_offset;
1020		VM_OBJECT_UNLOCK(object);
1021		object = backing_object;
1022		if (object->size < OFF_TO_IDX(offset + size))
1023			size = IDX_TO_OFF(object->size) - offset;
1024	}
1025	/*
1026	 * Flush pages if writing is allowed, invalidate them
1027	 * if invalidation requested.  Pages undergoing I/O
1028	 * will be ignored by vm_object_page_remove().
1029	 *
1030	 * We cannot lock the vnode and then wait for paging
1031	 * to complete without deadlocking against vm_fault.
1032	 * Instead we simply call vm_object_page_remove() and
1033	 * allow it to block internally on a page-by-page
1034	 * basis when it encounters pages undergoing async
1035	 * I/O.
1036	 */
1037	if (object->type == OBJT_VNODE &&
1038	    (object->flags & OBJ_MIGHTBEDIRTY) != 0) {
1039		int vfslocked;
1040		vp = object->handle;
1041		VM_OBJECT_UNLOCK(object);
1042		(void) vn_start_write(vp, &mp, V_WAIT);
1043		vfslocked = VFS_LOCK_GIANT(vp->v_mount);
1044		vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, curthread);
1045		flags = (syncio || invalidate) ? OBJPC_SYNC : 0;
1046		flags |= invalidate ? OBJPC_INVAL : 0;
1047		VM_OBJECT_LOCK(object);
1048		vm_object_page_clean(object,
1049		    OFF_TO_IDX(offset),
1050		    OFF_TO_IDX(offset + size + PAGE_MASK),
1051		    flags);
1052		VM_OBJECT_UNLOCK(object);
1053		VOP_UNLOCK(vp, 0, curthread);
1054		VFS_UNLOCK_GIANT(vfslocked);
1055		vn_finished_write(mp);
1056		VM_OBJECT_LOCK(object);
1057	}
1058	if ((object->type == OBJT_VNODE ||
1059	     object->type == OBJT_DEVICE) && invalidate) {
1060		boolean_t purge;
1061		purge = old_msync || (object->type == OBJT_DEVICE);
1062		vm_object_page_remove(object,
1063		    OFF_TO_IDX(offset),
1064		    OFF_TO_IDX(offset + size + PAGE_MASK),
1065		    purge ? FALSE : TRUE);
1066	}
1067	VM_OBJECT_UNLOCK(object);
1068}
1069
1070/*
1071 *	vm_object_madvise:
1072 *
1073 *	Implements the madvise function at the object/page level.
1074 *
1075 *	MADV_WILLNEED	(any object)
1076 *
1077 *	    Activate the specified pages if they are resident.
1078 *
1079 *	MADV_DONTNEED	(any object)
1080 *
1081 *	    Deactivate the specified pages if they are resident.
1082 *
1083 *	MADV_FREE	(OBJT_DEFAULT/OBJT_SWAP objects,
1084 *			 OBJ_ONEMAPPING only)
1085 *
1086 *	    Deactivate and clean the specified pages if they are
1087 *	    resident.  This permits the process to reuse the pages
1088 *	    without faulting or the kernel to reclaim the pages
1089 *	    without I/O.
1090 */
1091void
1092vm_object_madvise(vm_object_t object, vm_pindex_t pindex, int count, int advise)
1093{
1094	vm_pindex_t end, tpindex;
1095	vm_object_t backing_object, tobject;
1096	vm_page_t m;
1097
1098	if (object == NULL)
1099		return;
1100	VM_OBJECT_LOCK(object);
1101	end = pindex + count;
1102	/*
1103	 * Locate and adjust resident pages
1104	 */
1105	for (; pindex < end; pindex += 1) {
1106relookup:
1107		tobject = object;
1108		tpindex = pindex;
1109shadowlookup:
1110		/*
1111		 * MADV_FREE only operates on OBJT_DEFAULT or OBJT_SWAP pages
1112		 * and those pages must be OBJ_ONEMAPPING.
1113		 */
1114		if (advise == MADV_FREE) {
1115			if ((tobject->type != OBJT_DEFAULT &&
1116			     tobject->type != OBJT_SWAP) ||
1117			    (tobject->flags & OBJ_ONEMAPPING) == 0) {
1118				goto unlock_tobject;
1119			}
1120		}
1121		m = vm_page_lookup(tobject, tpindex);
1122		if (m == NULL) {
1123			/*
1124			 * There may be swap even if there is no backing page
1125			 */
1126			if (advise == MADV_FREE && tobject->type == OBJT_SWAP)
1127				swap_pager_freespace(tobject, tpindex, 1);
1128			/*
1129			 * next object
1130			 */
1131			backing_object = tobject->backing_object;
1132			if (backing_object == NULL)
1133				goto unlock_tobject;
1134			VM_OBJECT_LOCK(backing_object);
1135			tpindex += OFF_TO_IDX(tobject->backing_object_offset);
1136			if (tobject != object)
1137				VM_OBJECT_UNLOCK(tobject);
1138			tobject = backing_object;
1139			goto shadowlookup;
1140		}
1141		/*
1142		 * If the page is busy or not in a normal active state,
1143		 * we skip it.  If the page is not managed there are no
1144		 * page queues to mess with.  Things can break if we mess
1145		 * with pages in any of the below states.
1146		 */
1147		vm_page_lock_queues();
1148		if (m->hold_count ||
1149		    m->wire_count ||
1150		    (m->flags & PG_UNMANAGED) ||
1151		    m->valid != VM_PAGE_BITS_ALL) {
1152			vm_page_unlock_queues();
1153			goto unlock_tobject;
1154		}
1155		if ((m->flags & PG_BUSY) || m->busy) {
1156			vm_page_flag_set(m, PG_WANTED | PG_REFERENCED);
1157			vm_page_unlock_queues();
1158			if (object != tobject)
1159				VM_OBJECT_UNLOCK(object);
1160			msleep(m, VM_OBJECT_MTX(tobject), PDROP | PVM, "madvpo", 0);
1161			VM_OBJECT_LOCK(object);
1162  			goto relookup;
1163		}
1164		if (advise == MADV_WILLNEED) {
1165			vm_page_activate(m);
1166		} else if (advise == MADV_DONTNEED) {
1167			vm_page_dontneed(m);
1168		} else if (advise == MADV_FREE) {
1169			/*
1170			 * Mark the page clean.  This will allow the page
1171			 * to be freed up by the system.  However, such pages
1172			 * are often reused quickly by malloc()/free()
1173			 * so we do not do anything that would cause
1174			 * a page fault if we can help it.
1175			 *
1176			 * Specifically, we do not try to actually free
1177			 * the page now nor do we try to put it in the
1178			 * cache (which would cause a page fault on reuse).
1179			 *
1180			 * But we do make the page is freeable as we
1181			 * can without actually taking the step of unmapping
1182			 * it.
1183			 */
1184			pmap_clear_modify(m);
1185			m->dirty = 0;
1186			m->act_count = 0;
1187			vm_page_dontneed(m);
1188		}
1189		vm_page_unlock_queues();
1190		if (advise == MADV_FREE && tobject->type == OBJT_SWAP)
1191			swap_pager_freespace(tobject, tpindex, 1);
1192unlock_tobject:
1193		if (tobject != object)
1194			VM_OBJECT_UNLOCK(tobject);
1195	}
1196	VM_OBJECT_UNLOCK(object);
1197}
1198
1199/*
1200 *	vm_object_shadow:
1201 *
1202 *	Create a new object which is backed by the
1203 *	specified existing object range.  The source
1204 *	object reference is deallocated.
1205 *
1206 *	The new object and offset into that object
1207 *	are returned in the source parameters.
1208 */
1209void
1210vm_object_shadow(
1211	vm_object_t *object,	/* IN/OUT */
1212	vm_ooffset_t *offset,	/* IN/OUT */
1213	vm_size_t length)
1214{
1215	vm_object_t source;
1216	vm_object_t result;
1217
1218	source = *object;
1219
1220	/*
1221	 * Don't create the new object if the old object isn't shared.
1222	 */
1223	if (source != NULL) {
1224		VM_OBJECT_LOCK(source);
1225		if (source->ref_count == 1 &&
1226		    source->handle == NULL &&
1227		    (source->type == OBJT_DEFAULT ||
1228		     source->type == OBJT_SWAP)) {
1229			VM_OBJECT_UNLOCK(source);
1230			return;
1231		}
1232		VM_OBJECT_UNLOCK(source);
1233	}
1234
1235	/*
1236	 * Allocate a new object with the given length.
1237	 */
1238	result = vm_object_allocate(OBJT_DEFAULT, length);
1239
1240	/*
1241	 * The new object shadows the source object, adding a reference to it.
1242	 * Our caller changes his reference to point to the new object,
1243	 * removing a reference to the source object.  Net result: no change
1244	 * of reference count.
1245	 *
1246	 * Try to optimize the result object's page color when shadowing
1247	 * in order to maintain page coloring consistency in the combined
1248	 * shadowed object.
1249	 */
1250	result->backing_object = source;
1251	/*
1252	 * Store the offset into the source object, and fix up the offset into
1253	 * the new object.
1254	 */
1255	result->backing_object_offset = *offset;
1256	if (source != NULL) {
1257		VM_OBJECT_LOCK(source);
1258		LIST_INSERT_HEAD(&source->shadow_head, result, shadow_list);
1259		source->shadow_count++;
1260		source->generation++;
1261		if (length < source->size)
1262			length = source->size;
1263		if (length > PQ_MAXLENGTH || source->generation > 1)
1264			length = PQ_MAXLENGTH;
1265		result->pg_color = (source->pg_color +
1266		    length * source->generation) & PQ_COLORMASK;
1267		result->flags |= source->flags & OBJ_NEEDGIANT;
1268		VM_OBJECT_UNLOCK(source);
1269		next_index = (result->pg_color + PQ_MAXLENGTH) & PQ_COLORMASK;
1270	}
1271
1272
1273	/*
1274	 * Return the new things
1275	 */
1276	*offset = 0;
1277	*object = result;
1278}
1279
1280/*
1281 *	vm_object_split:
1282 *
1283 * Split the pages in a map entry into a new object.  This affords
1284 * easier removal of unused pages, and keeps object inheritance from
1285 * being a negative impact on memory usage.
1286 */
1287void
1288vm_object_split(vm_map_entry_t entry)
1289{
1290	vm_page_t m;
1291	vm_object_t orig_object, new_object, source;
1292	vm_pindex_t offidxstart, offidxend;
1293	vm_size_t idx, size;
1294
1295	orig_object = entry->object.vm_object;
1296	if (orig_object->type != OBJT_DEFAULT && orig_object->type != OBJT_SWAP)
1297		return;
1298	if (orig_object->ref_count <= 1)
1299		return;
1300	VM_OBJECT_UNLOCK(orig_object);
1301
1302	offidxstart = OFF_TO_IDX(entry->offset);
1303	offidxend = offidxstart + OFF_TO_IDX(entry->end - entry->start);
1304	size = offidxend - offidxstart;
1305
1306	/*
1307	 * If swap_pager_copy() is later called, it will convert new_object
1308	 * into a swap object.
1309	 */
1310	new_object = vm_object_allocate(OBJT_DEFAULT, size);
1311
1312	VM_OBJECT_LOCK(new_object);
1313	VM_OBJECT_LOCK(orig_object);
1314	source = orig_object->backing_object;
1315	if (source != NULL) {
1316		VM_OBJECT_LOCK(source);
1317		LIST_INSERT_HEAD(&source->shadow_head,
1318				  new_object, shadow_list);
1319		source->shadow_count++;
1320		source->generation++;
1321		vm_object_reference_locked(source);	/* for new_object */
1322		vm_object_clear_flag(source, OBJ_ONEMAPPING);
1323		VM_OBJECT_UNLOCK(source);
1324		new_object->backing_object_offset =
1325			orig_object->backing_object_offset + entry->offset;
1326		new_object->backing_object = source;
1327	}
1328	new_object->flags |= orig_object->flags & OBJ_NEEDGIANT;
1329	vm_page_lock_queues();
1330	for (idx = 0; idx < size; idx++) {
1331	retry:
1332		m = vm_page_lookup(orig_object, offidxstart + idx);
1333		if (m == NULL)
1334			continue;
1335
1336		/*
1337		 * We must wait for pending I/O to complete before we can
1338		 * rename the page.
1339		 *
1340		 * We do not have to VM_PROT_NONE the page as mappings should
1341		 * not be changed by this operation.
1342		 */
1343		if ((m->flags & PG_BUSY) || m->busy) {
1344			vm_page_flag_set(m, PG_WANTED | PG_REFERENCED);
1345			vm_page_unlock_queues();
1346			VM_OBJECT_UNLOCK(new_object);
1347			msleep(m, VM_OBJECT_MTX(orig_object), PDROP | PVM, "spltwt", 0);
1348			VM_OBJECT_LOCK(new_object);
1349			VM_OBJECT_LOCK(orig_object);
1350			vm_page_lock_queues();
1351			goto retry;
1352		}
1353		vm_page_rename(m, new_object, idx);
1354		/* page automatically made dirty by rename and cache handled */
1355		vm_page_busy(m);
1356	}
1357	vm_page_unlock_queues();
1358	if (orig_object->type == OBJT_SWAP) {
1359		/*
1360		 * swap_pager_copy() can sleep, in which case the orig_object's
1361		 * and new_object's locks are released and reacquired.
1362		 */
1363		swap_pager_copy(orig_object, new_object, offidxstart, 0);
1364	}
1365	VM_OBJECT_UNLOCK(orig_object);
1366	vm_page_lock_queues();
1367	TAILQ_FOREACH(m, &new_object->memq, listq)
1368		vm_page_wakeup(m);
1369	vm_page_unlock_queues();
1370	VM_OBJECT_UNLOCK(new_object);
1371	entry->object.vm_object = new_object;
1372	entry->offset = 0LL;
1373	vm_object_deallocate(orig_object);
1374	VM_OBJECT_LOCK(new_object);
1375}
1376
1377#define	OBSC_TEST_ALL_SHADOWED	0x0001
1378#define	OBSC_COLLAPSE_NOWAIT	0x0002
1379#define	OBSC_COLLAPSE_WAIT	0x0004
1380
1381static int
1382vm_object_backing_scan(vm_object_t object, int op)
1383{
1384	int r = 1;
1385	vm_page_t p;
1386	vm_object_t backing_object;
1387	vm_pindex_t backing_offset_index;
1388
1389	VM_OBJECT_LOCK_ASSERT(object, MA_OWNED);
1390	VM_OBJECT_LOCK_ASSERT(object->backing_object, MA_OWNED);
1391
1392	backing_object = object->backing_object;
1393	backing_offset_index = OFF_TO_IDX(object->backing_object_offset);
1394
1395	/*
1396	 * Initial conditions
1397	 */
1398	if (op & OBSC_TEST_ALL_SHADOWED) {
1399		/*
1400		 * We do not want to have to test for the existence of
1401		 * swap pages in the backing object.  XXX but with the
1402		 * new swapper this would be pretty easy to do.
1403		 *
1404		 * XXX what about anonymous MAP_SHARED memory that hasn't
1405		 * been ZFOD faulted yet?  If we do not test for this, the
1406		 * shadow test may succeed! XXX
1407		 */
1408		if (backing_object->type != OBJT_DEFAULT) {
1409			return (0);
1410		}
1411	}
1412	if (op & OBSC_COLLAPSE_WAIT) {
1413		vm_object_set_flag(backing_object, OBJ_DEAD);
1414	}
1415
1416	/*
1417	 * Our scan
1418	 */
1419	p = TAILQ_FIRST(&backing_object->memq);
1420	while (p) {
1421		vm_page_t next = TAILQ_NEXT(p, listq);
1422		vm_pindex_t new_pindex = p->pindex - backing_offset_index;
1423
1424		if (op & OBSC_TEST_ALL_SHADOWED) {
1425			vm_page_t pp;
1426
1427			/*
1428			 * Ignore pages outside the parent object's range
1429			 * and outside the parent object's mapping of the
1430			 * backing object.
1431			 *
1432			 * note that we do not busy the backing object's
1433			 * page.
1434			 */
1435			if (
1436			    p->pindex < backing_offset_index ||
1437			    new_pindex >= object->size
1438			) {
1439				p = next;
1440				continue;
1441			}
1442
1443			/*
1444			 * See if the parent has the page or if the parent's
1445			 * object pager has the page.  If the parent has the
1446			 * page but the page is not valid, the parent's
1447			 * object pager must have the page.
1448			 *
1449			 * If this fails, the parent does not completely shadow
1450			 * the object and we might as well give up now.
1451			 */
1452
1453			pp = vm_page_lookup(object, new_pindex);
1454			if (
1455			    (pp == NULL || pp->valid == 0) &&
1456			    !vm_pager_has_page(object, new_pindex, NULL, NULL)
1457			) {
1458				r = 0;
1459				break;
1460			}
1461		}
1462
1463		/*
1464		 * Check for busy page
1465		 */
1466		if (op & (OBSC_COLLAPSE_WAIT | OBSC_COLLAPSE_NOWAIT)) {
1467			vm_page_t pp;
1468
1469			if (op & OBSC_COLLAPSE_NOWAIT) {
1470				if ((p->flags & PG_BUSY) ||
1471				    !p->valid ||
1472				    p->busy) {
1473					p = next;
1474					continue;
1475				}
1476			} else if (op & OBSC_COLLAPSE_WAIT) {
1477				if ((p->flags & PG_BUSY) || p->busy) {
1478					vm_page_lock_queues();
1479					vm_page_flag_set(p,
1480					    PG_WANTED | PG_REFERENCED);
1481					vm_page_unlock_queues();
1482					VM_OBJECT_UNLOCK(object);
1483					msleep(p, VM_OBJECT_MTX(backing_object),
1484					    PDROP | PVM, "vmocol", 0);
1485					VM_OBJECT_LOCK(object);
1486					VM_OBJECT_LOCK(backing_object);
1487					/*
1488					 * If we slept, anything could have
1489					 * happened.  Since the object is
1490					 * marked dead, the backing offset
1491					 * should not have changed so we
1492					 * just restart our scan.
1493					 */
1494					p = TAILQ_FIRST(&backing_object->memq);
1495					continue;
1496				}
1497			}
1498
1499			KASSERT(
1500			    p->object == backing_object,
1501			    ("vm_object_backing_scan: object mismatch")
1502			);
1503
1504			/*
1505			 * Destroy any associated swap
1506			 */
1507			if (backing_object->type == OBJT_SWAP) {
1508				swap_pager_freespace(
1509				    backing_object,
1510				    p->pindex,
1511				    1
1512				);
1513			}
1514
1515			if (
1516			    p->pindex < backing_offset_index ||
1517			    new_pindex >= object->size
1518			) {
1519				/*
1520				 * Page is out of the parent object's range, we
1521				 * can simply destroy it.
1522				 */
1523				vm_page_lock_queues();
1524				KASSERT(!pmap_page_is_mapped(p),
1525				    ("freeing mapped page %p", p));
1526				if (p->wire_count == 0)
1527					vm_page_free(p);
1528				else
1529					vm_page_remove(p);
1530				vm_page_unlock_queues();
1531				p = next;
1532				continue;
1533			}
1534
1535			pp = vm_page_lookup(object, new_pindex);
1536			if (
1537			    pp != NULL ||
1538			    vm_pager_has_page(object, new_pindex, NULL, NULL)
1539			) {
1540				/*
1541				 * page already exists in parent OR swap exists
1542				 * for this location in the parent.  Destroy
1543				 * the original page from the backing object.
1544				 *
1545				 * Leave the parent's page alone
1546				 */
1547				vm_page_lock_queues();
1548				KASSERT(!pmap_page_is_mapped(p),
1549				    ("freeing mapped page %p", p));
1550				if (p->wire_count == 0)
1551					vm_page_free(p);
1552				else
1553					vm_page_remove(p);
1554				vm_page_unlock_queues();
1555				p = next;
1556				continue;
1557			}
1558
1559			/*
1560			 * Page does not exist in parent, rename the
1561			 * page from the backing object to the main object.
1562			 *
1563			 * If the page was mapped to a process, it can remain
1564			 * mapped through the rename.
1565			 */
1566			vm_page_lock_queues();
1567			vm_page_rename(p, object, new_pindex);
1568			vm_page_unlock_queues();
1569			/* page automatically made dirty by rename */
1570		}
1571		p = next;
1572	}
1573	return (r);
1574}
1575
1576
1577/*
1578 * this version of collapse allows the operation to occur earlier and
1579 * when paging_in_progress is true for an object...  This is not a complete
1580 * operation, but should plug 99.9% of the rest of the leaks.
1581 */
1582static void
1583vm_object_qcollapse(vm_object_t object)
1584{
1585	vm_object_t backing_object = object->backing_object;
1586
1587	VM_OBJECT_LOCK_ASSERT(object, MA_OWNED);
1588	VM_OBJECT_LOCK_ASSERT(backing_object, MA_OWNED);
1589
1590	if (backing_object->ref_count != 1)
1591		return;
1592
1593	vm_object_backing_scan(object, OBSC_COLLAPSE_NOWAIT);
1594}
1595
1596/*
1597 *	vm_object_collapse:
1598 *
1599 *	Collapse an object with the object backing it.
1600 *	Pages in the backing object are moved into the
1601 *	parent, and the backing object is deallocated.
1602 */
1603void
1604vm_object_collapse(vm_object_t object)
1605{
1606	VM_OBJECT_LOCK_ASSERT(object, MA_OWNED);
1607
1608	while (TRUE) {
1609		vm_object_t backing_object;
1610
1611		/*
1612		 * Verify that the conditions are right for collapse:
1613		 *
1614		 * The object exists and the backing object exists.
1615		 */
1616		if ((backing_object = object->backing_object) == NULL)
1617			break;
1618
1619		/*
1620		 * we check the backing object first, because it is most likely
1621		 * not collapsable.
1622		 */
1623		VM_OBJECT_LOCK(backing_object);
1624		if (backing_object->handle != NULL ||
1625		    (backing_object->type != OBJT_DEFAULT &&
1626		     backing_object->type != OBJT_SWAP) ||
1627		    (backing_object->flags & OBJ_DEAD) ||
1628		    object->handle != NULL ||
1629		    (object->type != OBJT_DEFAULT &&
1630		     object->type != OBJT_SWAP) ||
1631		    (object->flags & OBJ_DEAD)) {
1632			VM_OBJECT_UNLOCK(backing_object);
1633			break;
1634		}
1635
1636		if (
1637		    object->paging_in_progress != 0 ||
1638		    backing_object->paging_in_progress != 0
1639		) {
1640			vm_object_qcollapse(object);
1641			VM_OBJECT_UNLOCK(backing_object);
1642			break;
1643		}
1644		/*
1645		 * We know that we can either collapse the backing object (if
1646		 * the parent is the only reference to it) or (perhaps) have
1647		 * the parent bypass the object if the parent happens to shadow
1648		 * all the resident pages in the entire backing object.
1649		 *
1650		 * This is ignoring pager-backed pages such as swap pages.
1651		 * vm_object_backing_scan fails the shadowing test in this
1652		 * case.
1653		 */
1654		if (backing_object->ref_count == 1) {
1655			/*
1656			 * If there is exactly one reference to the backing
1657			 * object, we can collapse it into the parent.
1658			 */
1659			vm_object_backing_scan(object, OBSC_COLLAPSE_WAIT);
1660
1661			/*
1662			 * Move the pager from backing_object to object.
1663			 */
1664			if (backing_object->type == OBJT_SWAP) {
1665				/*
1666				 * swap_pager_copy() can sleep, in which case
1667				 * the backing_object's and object's locks are
1668				 * released and reacquired.
1669				 */
1670				swap_pager_copy(
1671				    backing_object,
1672				    object,
1673				    OFF_TO_IDX(object->backing_object_offset), TRUE);
1674			}
1675			/*
1676			 * Object now shadows whatever backing_object did.
1677			 * Note that the reference to
1678			 * backing_object->backing_object moves from within
1679			 * backing_object to within object.
1680			 */
1681			LIST_REMOVE(object, shadow_list);
1682			backing_object->shadow_count--;
1683			backing_object->generation++;
1684			if (backing_object->backing_object) {
1685				VM_OBJECT_LOCK(backing_object->backing_object);
1686				LIST_REMOVE(backing_object, shadow_list);
1687				LIST_INSERT_HEAD(
1688				    &backing_object->backing_object->shadow_head,
1689				    object, shadow_list);
1690				/*
1691				 * The shadow_count has not changed.
1692				 */
1693				backing_object->backing_object->generation++;
1694				VM_OBJECT_UNLOCK(backing_object->backing_object);
1695			}
1696			object->backing_object = backing_object->backing_object;
1697			object->backing_object_offset +=
1698			    backing_object->backing_object_offset;
1699
1700			/*
1701			 * Discard backing_object.
1702			 *
1703			 * Since the backing object has no pages, no pager left,
1704			 * and no object references within it, all that is
1705			 * necessary is to dispose of it.
1706			 */
1707			KASSERT(backing_object->ref_count == 1, ("backing_object %p was somehow re-referenced during collapse!", backing_object));
1708			VM_OBJECT_UNLOCK(backing_object);
1709
1710			mtx_lock(&vm_object_list_mtx);
1711			TAILQ_REMOVE(
1712			    &vm_object_list,
1713			    backing_object,
1714			    object_list
1715			);
1716			mtx_unlock(&vm_object_list_mtx);
1717
1718			uma_zfree(obj_zone, backing_object);
1719
1720			object_collapses++;
1721		} else {
1722			vm_object_t new_backing_object;
1723
1724			/*
1725			 * If we do not entirely shadow the backing object,
1726			 * there is nothing we can do so we give up.
1727			 */
1728			if (object->resident_page_count != object->size &&
1729			    vm_object_backing_scan(object,
1730			    OBSC_TEST_ALL_SHADOWED) == 0) {
1731				VM_OBJECT_UNLOCK(backing_object);
1732				break;
1733			}
1734
1735			/*
1736			 * Make the parent shadow the next object in the
1737			 * chain.  Deallocating backing_object will not remove
1738			 * it, since its reference count is at least 2.
1739			 */
1740			LIST_REMOVE(object, shadow_list);
1741			backing_object->shadow_count--;
1742			backing_object->generation++;
1743
1744			new_backing_object = backing_object->backing_object;
1745			if ((object->backing_object = new_backing_object) != NULL) {
1746				VM_OBJECT_LOCK(new_backing_object);
1747				LIST_INSERT_HEAD(
1748				    &new_backing_object->shadow_head,
1749				    object,
1750				    shadow_list
1751				);
1752				new_backing_object->shadow_count++;
1753				new_backing_object->generation++;
1754				vm_object_reference_locked(new_backing_object);
1755				VM_OBJECT_UNLOCK(new_backing_object);
1756				object->backing_object_offset +=
1757					backing_object->backing_object_offset;
1758			}
1759
1760			/*
1761			 * Drop the reference count on backing_object. Since
1762			 * its ref_count was at least 2, it will not vanish.
1763			 */
1764			backing_object->ref_count--;
1765			VM_OBJECT_UNLOCK(backing_object);
1766			object_bypasses++;
1767		}
1768
1769		/*
1770		 * Try again with this object's new backing object.
1771		 */
1772	}
1773}
1774
1775/*
1776 *	vm_object_page_remove:
1777 *
1778 *	Removes all physical pages in the given range from the
1779 *	object's list of pages.  If the range's end is zero, all
1780 *	physical pages from the range's start to the end of the object
1781 *	are deleted.
1782 *
1783 *	The object must be locked.
1784 */
1785void
1786vm_object_page_remove(vm_object_t object, vm_pindex_t start, vm_pindex_t end,
1787    boolean_t clean_only)
1788{
1789	vm_page_t p, next;
1790
1791	VM_OBJECT_LOCK_ASSERT(object, MA_OWNED);
1792	if (object->resident_page_count == 0)
1793		return;
1794
1795	/*
1796	 * Since physically-backed objects do not use managed pages, we can't
1797	 * remove pages from the object (we must instead remove the page
1798	 * references, and then destroy the object).
1799	 */
1800	KASSERT(object->type != OBJT_PHYS,
1801	    ("attempt to remove pages from a physical object"));
1802
1803	vm_object_pip_add(object, 1);
1804again:
1805	vm_page_lock_queues();
1806	if ((p = TAILQ_FIRST(&object->memq)) != NULL) {
1807		if (p->pindex < start) {
1808			p = vm_page_splay(start, object->root);
1809			if ((object->root = p)->pindex < start)
1810				p = TAILQ_NEXT(p, listq);
1811		}
1812	}
1813	/*
1814	 * Assert: the variable p is either (1) the page with the
1815	 * least pindex greater than or equal to the parameter pindex
1816	 * or (2) NULL.
1817	 */
1818	for (;
1819	     p != NULL && (p->pindex < end || end == 0);
1820	     p = next) {
1821		next = TAILQ_NEXT(p, listq);
1822
1823		if (p->wire_count != 0) {
1824			pmap_remove_all(p);
1825			if (!clean_only)
1826				p->valid = 0;
1827			continue;
1828		}
1829		if (vm_page_sleep_if_busy(p, TRUE, "vmopar"))
1830			goto again;
1831		if (clean_only && p->valid) {
1832			pmap_remove_write(p);
1833			if (p->valid & p->dirty)
1834				continue;
1835		}
1836		pmap_remove_all(p);
1837		vm_page_free(p);
1838	}
1839	vm_page_unlock_queues();
1840	vm_object_pip_wakeup(object);
1841}
1842
1843/*
1844 *	Routine:	vm_object_coalesce
1845 *	Function:	Coalesces two objects backing up adjoining
1846 *			regions of memory into a single object.
1847 *
1848 *	returns TRUE if objects were combined.
1849 *
1850 *	NOTE:	Only works at the moment if the second object is NULL -
1851 *		if it's not, which object do we lock first?
1852 *
1853 *	Parameters:
1854 *		prev_object	First object to coalesce
1855 *		prev_offset	Offset into prev_object
1856 *		prev_size	Size of reference to prev_object
1857 *		next_size	Size of reference to the second object
1858 *
1859 *	Conditions:
1860 *	The object must *not* be locked.
1861 */
1862boolean_t
1863vm_object_coalesce(vm_object_t prev_object, vm_ooffset_t prev_offset,
1864	vm_size_t prev_size, vm_size_t next_size)
1865{
1866	vm_pindex_t next_pindex;
1867
1868	if (prev_object == NULL)
1869		return (TRUE);
1870	VM_OBJECT_LOCK(prev_object);
1871	if (prev_object->type != OBJT_DEFAULT &&
1872	    prev_object->type != OBJT_SWAP) {
1873		VM_OBJECT_UNLOCK(prev_object);
1874		return (FALSE);
1875	}
1876
1877	/*
1878	 * Try to collapse the object first
1879	 */
1880	vm_object_collapse(prev_object);
1881
1882	/*
1883	 * Can't coalesce if: . more than one reference . paged out . shadows
1884	 * another object . has a copy elsewhere (any of which mean that the
1885	 * pages not mapped to prev_entry may be in use anyway)
1886	 */
1887	if (prev_object->backing_object != NULL) {
1888		VM_OBJECT_UNLOCK(prev_object);
1889		return (FALSE);
1890	}
1891
1892	prev_size >>= PAGE_SHIFT;
1893	next_size >>= PAGE_SHIFT;
1894	next_pindex = OFF_TO_IDX(prev_offset) + prev_size;
1895
1896	if ((prev_object->ref_count > 1) &&
1897	    (prev_object->size != next_pindex)) {
1898		VM_OBJECT_UNLOCK(prev_object);
1899		return (FALSE);
1900	}
1901
1902	/*
1903	 * Remove any pages that may still be in the object from a previous
1904	 * deallocation.
1905	 */
1906	if (next_pindex < prev_object->size) {
1907		vm_object_page_remove(prev_object,
1908				      next_pindex,
1909				      next_pindex + next_size, FALSE);
1910		if (prev_object->type == OBJT_SWAP)
1911			swap_pager_freespace(prev_object,
1912					     next_pindex, next_size);
1913	}
1914
1915	/*
1916	 * Extend the object if necessary.
1917	 */
1918	if (next_pindex + next_size > prev_object->size)
1919		prev_object->size = next_pindex + next_size;
1920
1921	VM_OBJECT_UNLOCK(prev_object);
1922	return (TRUE);
1923}
1924
1925void
1926vm_object_set_writeable_dirty(vm_object_t object)
1927{
1928	struct vnode *vp;
1929
1930	VM_OBJECT_LOCK_ASSERT(object, MA_OWNED);
1931	if ((object->flags & OBJ_MIGHTBEDIRTY) != 0)
1932		return;
1933	vm_object_set_flag(object, OBJ_MIGHTBEDIRTY);
1934	if (object->type == OBJT_VNODE &&
1935	    (vp = (struct vnode *)object->handle) != NULL) {
1936		VI_LOCK(vp);
1937		vp->v_iflag |= VI_OBJDIRTY;
1938		VI_UNLOCK(vp);
1939	}
1940}
1941
1942#include "opt_ddb.h"
1943#ifdef DDB
1944#include <sys/kernel.h>
1945
1946#include <sys/cons.h>
1947
1948#include <ddb/ddb.h>
1949
1950static int
1951_vm_object_in_map(vm_map_t map, vm_object_t object, vm_map_entry_t entry)
1952{
1953	vm_map_t tmpm;
1954	vm_map_entry_t tmpe;
1955	vm_object_t obj;
1956	int entcount;
1957
1958	if (map == 0)
1959		return 0;
1960
1961	if (entry == 0) {
1962		tmpe = map->header.next;
1963		entcount = map->nentries;
1964		while (entcount-- && (tmpe != &map->header)) {
1965			if (_vm_object_in_map(map, object, tmpe)) {
1966				return 1;
1967			}
1968			tmpe = tmpe->next;
1969		}
1970	} else if (entry->eflags & MAP_ENTRY_IS_SUB_MAP) {
1971		tmpm = entry->object.sub_map;
1972		tmpe = tmpm->header.next;
1973		entcount = tmpm->nentries;
1974		while (entcount-- && tmpe != &tmpm->header) {
1975			if (_vm_object_in_map(tmpm, object, tmpe)) {
1976				return 1;
1977			}
1978			tmpe = tmpe->next;
1979		}
1980	} else if ((obj = entry->object.vm_object) != NULL) {
1981		for (; obj; obj = obj->backing_object)
1982			if (obj == object) {
1983				return 1;
1984			}
1985	}
1986	return 0;
1987}
1988
1989static int
1990vm_object_in_map(vm_object_t object)
1991{
1992	struct proc *p;
1993
1994	/* sx_slock(&allproc_lock); */
1995	LIST_FOREACH(p, &allproc, p_list) {
1996		if (!p->p_vmspace /* || (p->p_flag & (P_SYSTEM|P_WEXIT)) */)
1997			continue;
1998		if (_vm_object_in_map(&p->p_vmspace->vm_map, object, 0)) {
1999			/* sx_sunlock(&allproc_lock); */
2000			return 1;
2001		}
2002	}
2003	/* sx_sunlock(&allproc_lock); */
2004	if (_vm_object_in_map(kernel_map, object, 0))
2005		return 1;
2006	if (_vm_object_in_map(kmem_map, object, 0))
2007		return 1;
2008	if (_vm_object_in_map(pager_map, object, 0))
2009		return 1;
2010	if (_vm_object_in_map(buffer_map, object, 0))
2011		return 1;
2012	return 0;
2013}
2014
2015DB_SHOW_COMMAND(vmochk, vm_object_check)
2016{
2017	vm_object_t object;
2018
2019	/*
2020	 * make sure that internal objs are in a map somewhere
2021	 * and none have zero ref counts.
2022	 */
2023	TAILQ_FOREACH(object, &vm_object_list, object_list) {
2024		if (object->handle == NULL &&
2025		    (object->type == OBJT_DEFAULT || object->type == OBJT_SWAP)) {
2026			if (object->ref_count == 0) {
2027				db_printf("vmochk: internal obj has zero ref count: %ld\n",
2028					(long)object->size);
2029			}
2030			if (!vm_object_in_map(object)) {
2031				db_printf(
2032			"vmochk: internal obj is not in a map: "
2033			"ref: %d, size: %lu: 0x%lx, backing_object: %p\n",
2034				    object->ref_count, (u_long)object->size,
2035				    (u_long)object->size,
2036				    (void *)object->backing_object);
2037			}
2038		}
2039	}
2040}
2041
2042/*
2043 *	vm_object_print:	[ debug ]
2044 */
2045DB_SHOW_COMMAND(object, vm_object_print_static)
2046{
2047	/* XXX convert args. */
2048	vm_object_t object = (vm_object_t)addr;
2049	boolean_t full = have_addr;
2050
2051	vm_page_t p;
2052
2053	/* XXX count is an (unused) arg.  Avoid shadowing it. */
2054#define	count	was_count
2055
2056	int count;
2057
2058	if (object == NULL)
2059		return;
2060
2061	db_iprintf(
2062	    "Object %p: type=%d, size=0x%jx, res=%d, ref=%d, flags=0x%x\n",
2063	    object, (int)object->type, (uintmax_t)object->size,
2064	    object->resident_page_count, object->ref_count, object->flags);
2065	db_iprintf(" sref=%d, backing_object(%d)=(%p)+0x%jx\n",
2066	    object->shadow_count,
2067	    object->backing_object ? object->backing_object->ref_count : 0,
2068	    object->backing_object, (uintmax_t)object->backing_object_offset);
2069
2070	if (!full)
2071		return;
2072
2073	db_indent += 2;
2074	count = 0;
2075	TAILQ_FOREACH(p, &object->memq, listq) {
2076		if (count == 0)
2077			db_iprintf("memory:=");
2078		else if (count == 6) {
2079			db_printf("\n");
2080			db_iprintf(" ...");
2081			count = 0;
2082		} else
2083			db_printf(",");
2084		count++;
2085
2086		db_printf("(off=0x%jx,page=0x%jx)",
2087		    (uintmax_t)p->pindex, (uintmax_t)VM_PAGE_TO_PHYS(p));
2088	}
2089	if (count != 0)
2090		db_printf("\n");
2091	db_indent -= 2;
2092}
2093
2094/* XXX. */
2095#undef count
2096
2097/* XXX need this non-static entry for calling from vm_map_print. */
2098void
2099vm_object_print(
2100        /* db_expr_t */ long addr,
2101	boolean_t have_addr,
2102	/* db_expr_t */ long count,
2103	char *modif)
2104{
2105	vm_object_print_static(addr, have_addr, count, modif);
2106}
2107
2108DB_SHOW_COMMAND(vmopag, vm_object_print_pages)
2109{
2110	vm_object_t object;
2111	int nl = 0;
2112	int c;
2113
2114	TAILQ_FOREACH(object, &vm_object_list, object_list) {
2115		vm_pindex_t idx, fidx;
2116		vm_pindex_t osize;
2117		vm_paddr_t pa = -1, padiff;
2118		int rcount;
2119		vm_page_t m;
2120
2121		db_printf("new object: %p\n", (void *)object);
2122		if (nl > 18) {
2123			c = cngetc();
2124			if (c != ' ')
2125				return;
2126			nl = 0;
2127		}
2128		nl++;
2129		rcount = 0;
2130		fidx = 0;
2131		osize = object->size;
2132		if (osize > 128)
2133			osize = 128;
2134		for (idx = 0; idx < osize; idx++) {
2135			m = vm_page_lookup(object, idx);
2136			if (m == NULL) {
2137				if (rcount) {
2138					db_printf(" index(%ld)run(%d)pa(0x%lx)\n",
2139						(long)fidx, rcount, (long)pa);
2140					if (nl > 18) {
2141						c = cngetc();
2142						if (c != ' ')
2143							return;
2144						nl = 0;
2145					}
2146					nl++;
2147					rcount = 0;
2148				}
2149				continue;
2150			}
2151
2152
2153			if (rcount &&
2154				(VM_PAGE_TO_PHYS(m) == pa + rcount * PAGE_SIZE)) {
2155				++rcount;
2156				continue;
2157			}
2158			if (rcount) {
2159				padiff = pa + rcount * PAGE_SIZE - VM_PAGE_TO_PHYS(m);
2160				padiff >>= PAGE_SHIFT;
2161				padiff &= PQ_COLORMASK;
2162				if (padiff == 0) {
2163					pa = VM_PAGE_TO_PHYS(m) - rcount * PAGE_SIZE;
2164					++rcount;
2165					continue;
2166				}
2167				db_printf(" index(%ld)run(%d)pa(0x%lx)",
2168					(long)fidx, rcount, (long)pa);
2169				db_printf("pd(%ld)\n", (long)padiff);
2170				if (nl > 18) {
2171					c = cngetc();
2172					if (c != ' ')
2173						return;
2174					nl = 0;
2175				}
2176				nl++;
2177			}
2178			fidx = idx;
2179			pa = VM_PAGE_TO_PHYS(m);
2180			rcount = 1;
2181		}
2182		if (rcount) {
2183			db_printf(" index(%ld)run(%d)pa(0x%lx)\n",
2184				(long)fidx, rcount, (long)pa);
2185			if (nl > 18) {
2186				c = cngetc();
2187				if (c != ' ')
2188					return;
2189				nl = 0;
2190			}
2191			nl++;
2192		}
2193	}
2194}
2195#endif /* DDB */
2196