vm_object.c revision 71576
1/*
2 * Copyright (c) 1991, 1993
3 *	The Regents of the University of California.  All rights reserved.
4 *
5 * This code is derived from software contributed to Berkeley by
6 * The Mach Operating System project at Carnegie-Mellon University.
7 *
8 * Redistribution and use in source and binary forms, with or without
9 * modification, are permitted provided that the following conditions
10 * are met:
11 * 1. Redistributions of source code must retain the above copyright
12 *    notice, this list of conditions and the following disclaimer.
13 * 2. Redistributions in binary form must reproduce the above copyright
14 *    notice, this list of conditions and the following disclaimer in the
15 *    documentation and/or other materials provided with the distribution.
16 * 3. All advertising materials mentioning features or use of this software
17 *    must display the following acknowledgement:
18 *	This product includes software developed by the University of
19 *	California, Berkeley and its contributors.
20 * 4. Neither the name of the University nor the names of its contributors
21 *    may be used to endorse or promote products derived from this software
22 *    without specific prior written permission.
23 *
24 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
25 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
26 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
27 * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
28 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
29 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
30 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
31 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
32 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
33 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
34 * SUCH DAMAGE.
35 *
36 *	from: @(#)vm_object.c	8.5 (Berkeley) 3/22/94
37 *
38 *
39 * Copyright (c) 1987, 1990 Carnegie-Mellon University.
40 * All rights reserved.
41 *
42 * Authors: Avadis Tevanian, Jr., Michael Wayne Young
43 *
44 * Permission to use, copy, modify and distribute this software and
45 * its documentation is hereby granted, provided that both the copyright
46 * notice and this permission notice appear in all copies of the
47 * software, derivative works or modified versions, and any portions
48 * thereof, and that both notices appear in supporting documentation.
49 *
50 * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
51 * CONDITION.  CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND
52 * FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
53 *
54 * Carnegie Mellon requests users of this software to return to
55 *
56 *  Software Distribution Coordinator  or  Software.Distribution@CS.CMU.EDU
57 *  School of Computer Science
58 *  Carnegie Mellon University
59 *  Pittsburgh PA 15213-3890
60 *
61 * any improvements or extensions that they make and grant Carnegie the
62 * rights to redistribute these changes.
63 *
64 * $FreeBSD: head/sys/vm/vm_object.c 71576 2001-01-24 12:35:55Z jasone $
65 */
66
67/*
68 *	Virtual memory object module.
69 */
70
71#include <sys/param.h>
72#include <sys/systm.h>
73#include <sys/proc.h>		/* for curproc, pageproc */
74#include <sys/vnode.h>
75#include <sys/vmmeter.h>
76#include <sys/mman.h>
77#include <sys/mount.h>
78#include <sys/mutex.h>
79
80#include <vm/vm.h>
81#include <vm/vm_param.h>
82#include <vm/pmap.h>
83#include <vm/vm_map.h>
84#include <vm/vm_object.h>
85#include <vm/vm_page.h>
86#include <vm/vm_pageout.h>
87#include <vm/vm_pager.h>
88#include <vm/vm_zone.h>
89#include <vm/swap_pager.h>
90#include <vm/vm_kern.h>
91#include <vm/vm_extern.h>
92
93static void	vm_object_qcollapse __P((vm_object_t object));
94
95/*
96 *	Virtual memory objects maintain the actual data
97 *	associated with allocated virtual memory.  A given
98 *	page of memory exists within exactly one object.
99 *
100 *	An object is only deallocated when all "references"
101 *	are given up.  Only one "reference" to a given
102 *	region of an object should be writeable.
103 *
104 *	Associated with each object is a list of all resident
105 *	memory pages belonging to that object; this list is
106 *	maintained by the "vm_page" module, and locked by the object's
107 *	lock.
108 *
109 *	Each object also records a "pager" routine which is
110 *	used to retrieve (and store) pages to the proper backing
111 *	storage.  In addition, objects may be backed by other
112 *	objects from which they were virtual-copied.
113 *
114 *	The only items within the object structure which are
115 *	modified after time of creation are:
116 *		reference count		locked by object's lock
117 *		pager routine		locked by object's lock
118 *
119 */
120
121struct object_q vm_object_list;
122static struct mtx vm_object_list_mtx;
123static long vm_object_count;		/* count of all objects */
124vm_object_t kernel_object;
125vm_object_t kmem_object;
126static struct vm_object kernel_object_store;
127static struct vm_object kmem_object_store;
128extern int vm_pageout_page_count;
129
130static long object_collapses;
131static long object_bypasses;
132static int next_index;
133static vm_zone_t obj_zone;
134static struct vm_zone obj_zone_store;
135static int object_hash_rand;
136#define VM_OBJECTS_INIT 256
137static struct vm_object vm_objects_init[VM_OBJECTS_INIT];
138
139void
140_vm_object_allocate(type, size, object)
141	objtype_t type;
142	vm_size_t size;
143	vm_object_t object;
144{
145	int incr;
146	TAILQ_INIT(&object->memq);
147	TAILQ_INIT(&object->shadow_head);
148
149	object->type = type;
150	object->size = size;
151	object->ref_count = 1;
152	object->flags = 0;
153	if ((object->type == OBJT_DEFAULT) || (object->type == OBJT_SWAP))
154		vm_object_set_flag(object, OBJ_ONEMAPPING);
155	object->paging_in_progress = 0;
156	object->resident_page_count = 0;
157	object->shadow_count = 0;
158	object->pg_color = next_index;
159	if ( size > (PQ_L2_SIZE / 3 + PQ_PRIME1))
160		incr = PQ_L2_SIZE / 3 + PQ_PRIME1;
161	else
162		incr = size;
163	next_index = (next_index + incr) & PQ_L2_MASK;
164	object->handle = NULL;
165	object->backing_object = NULL;
166	object->backing_object_offset = (vm_ooffset_t) 0;
167	/*
168	 * Try to generate a number that will spread objects out in the
169	 * hash table.  We 'wipe' new objects across the hash in 128 page
170	 * increments plus 1 more to offset it a little more by the time
171	 * it wraps around.
172	 */
173	object->hash_rand = object_hash_rand - 129;
174
175	object->generation++;
176
177	TAILQ_INSERT_TAIL(&vm_object_list, object, object_list);
178	vm_object_count++;
179	object_hash_rand = object->hash_rand;
180}
181
182/*
183 *	vm_object_init:
184 *
185 *	Initialize the VM objects module.
186 */
187void
188vm_object_init()
189{
190	TAILQ_INIT(&vm_object_list);
191	mtx_init(&vm_object_list_mtx, "vm object_list", MTX_DEF);
192	vm_object_count = 0;
193
194	kernel_object = &kernel_object_store;
195	_vm_object_allocate(OBJT_DEFAULT, OFF_TO_IDX(VM_MAX_KERNEL_ADDRESS - VM_MIN_KERNEL_ADDRESS),
196	    kernel_object);
197
198	kmem_object = &kmem_object_store;
199	_vm_object_allocate(OBJT_DEFAULT, OFF_TO_IDX(VM_MAX_KERNEL_ADDRESS - VM_MIN_KERNEL_ADDRESS),
200	    kmem_object);
201
202	obj_zone = &obj_zone_store;
203	zbootinit(obj_zone, "VM OBJECT", sizeof (struct vm_object),
204		vm_objects_init, VM_OBJECTS_INIT);
205}
206
207void
208vm_object_init2() {
209	zinitna(obj_zone, NULL, NULL, 0, 0, 0, 1);
210}
211
212/*
213 *	vm_object_allocate:
214 *
215 *	Returns a new object with the given size.
216 */
217
218vm_object_t
219vm_object_allocate(type, size)
220	objtype_t type;
221	vm_size_t size;
222{
223	vm_object_t result;
224
225	result = (vm_object_t) zalloc(obj_zone);
226
227	_vm_object_allocate(type, size, result);
228
229	return (result);
230}
231
232
233/*
234 *	vm_object_reference:
235 *
236 *	Gets another reference to the given object.
237 */
238void
239vm_object_reference(object)
240	vm_object_t object;
241{
242	if (object == NULL)
243		return;
244
245	KASSERT(!(object->flags & OBJ_DEAD),
246	    ("vm_object_reference: attempting to reference dead obj"));
247
248	object->ref_count++;
249	if (object->type == OBJT_VNODE) {
250		while (vget((struct vnode *) object->handle, LK_RETRY|LK_NOOBJ, curproc)) {
251			printf("vm_object_reference: delay in getting object\n");
252		}
253	}
254}
255
256void
257vm_object_vndeallocate(object)
258	vm_object_t object;
259{
260	struct vnode *vp = (struct vnode *) object->handle;
261
262	KASSERT(object->type == OBJT_VNODE,
263	    ("vm_object_vndeallocate: not a vnode object"));
264	KASSERT(vp != NULL, ("vm_object_vndeallocate: missing vp"));
265#ifdef INVARIANTS
266	if (object->ref_count == 0) {
267		vprint("vm_object_vndeallocate", vp);
268		panic("vm_object_vndeallocate: bad object reference count");
269	}
270#endif
271
272	object->ref_count--;
273	if (object->ref_count == 0) {
274		vp->v_flag &= ~VTEXT;
275		vm_object_clear_flag(object, OBJ_OPT);
276	}
277	vrele(vp);
278}
279
280/*
281 *	vm_object_deallocate:
282 *
283 *	Release a reference to the specified object,
284 *	gained either through a vm_object_allocate
285 *	or a vm_object_reference call.  When all references
286 *	are gone, storage associated with this object
287 *	may be relinquished.
288 *
289 *	No object may be locked.
290 */
291void
292vm_object_deallocate(object)
293	vm_object_t object;
294{
295	vm_object_t temp;
296
297	while (object != NULL) {
298
299		if (object->type == OBJT_VNODE) {
300			vm_object_vndeallocate(object);
301			return;
302		}
303
304		if (object->ref_count == 0) {
305			panic("vm_object_deallocate: object deallocated too many times: %d", object->type);
306		} else if (object->ref_count > 2) {
307			object->ref_count--;
308			return;
309		}
310
311		/*
312		 * Here on ref_count of one or two, which are special cases for
313		 * objects.
314		 */
315		if ((object->ref_count == 2) && (object->shadow_count == 0)) {
316			vm_object_set_flag(object, OBJ_ONEMAPPING);
317			object->ref_count--;
318			return;
319		} else if ((object->ref_count == 2) && (object->shadow_count == 1)) {
320			object->ref_count--;
321			if ((object->handle == NULL) &&
322			    (object->type == OBJT_DEFAULT ||
323			     object->type == OBJT_SWAP)) {
324				vm_object_t robject;
325
326				robject = TAILQ_FIRST(&object->shadow_head);
327				KASSERT(robject != NULL,
328				    ("vm_object_deallocate: ref_count: %d, shadow_count: %d",
329					 object->ref_count,
330					 object->shadow_count));
331				if ((robject->handle == NULL) &&
332				    (robject->type == OBJT_DEFAULT ||
333				     robject->type == OBJT_SWAP)) {
334
335					robject->ref_count++;
336
337					while (
338						robject->paging_in_progress ||
339						object->paging_in_progress
340					) {
341						vm_object_pip_sleep(robject, "objde1");
342						vm_object_pip_sleep(object, "objde2");
343					}
344
345					if (robject->ref_count == 1) {
346						robject->ref_count--;
347						object = robject;
348						goto doterm;
349					}
350
351					object = robject;
352					vm_object_collapse(object);
353					continue;
354				}
355			}
356
357			return;
358
359		} else {
360			object->ref_count--;
361			if (object->ref_count != 0)
362				return;
363		}
364
365doterm:
366
367		temp = object->backing_object;
368		if (temp) {
369			TAILQ_REMOVE(&temp->shadow_head, object, shadow_list);
370			temp->shadow_count--;
371			if (temp->ref_count == 0)
372				vm_object_clear_flag(temp, OBJ_OPT);
373			temp->generation++;
374			object->backing_object = NULL;
375		}
376		vm_object_terminate(object);
377		/* unlocks and deallocates object */
378		object = temp;
379	}
380}
381
382/*
383 *	vm_object_terminate actually destroys the specified object, freeing
384 *	up all previously used resources.
385 *
386 *	The object must be locked.
387 *	This routine may block.
388 */
389void
390vm_object_terminate(object)
391	vm_object_t object;
392{
393	vm_page_t p;
394	int s;
395
396	/*
397	 * Make sure no one uses us.
398	 */
399	vm_object_set_flag(object, OBJ_DEAD);
400
401	/*
402	 * wait for the pageout daemon to be done with the object
403	 */
404	vm_object_pip_wait(object, "objtrm");
405
406	KASSERT(!object->paging_in_progress,
407		("vm_object_terminate: pageout in progress"));
408
409	/*
410	 * Clean and free the pages, as appropriate. All references to the
411	 * object are gone, so we don't need to lock it.
412	 */
413	if (object->type == OBJT_VNODE) {
414		struct vnode *vp;
415
416		/*
417		 * Freeze optimized copies.
418		 */
419		vm_freeze_copyopts(object, 0, object->size);
420
421		/*
422		 * Clean pages and flush buffers.
423		 */
424		vm_object_page_clean(object, 0, 0, OBJPC_SYNC);
425
426		vp = (struct vnode *) object->handle;
427		vinvalbuf(vp, V_SAVE, NOCRED, NULL, 0, 0);
428	}
429
430	if (object->ref_count != 0)
431		panic("vm_object_terminate: object with references, ref_count=%d", object->ref_count);
432
433	/*
434	 * Now free any remaining pages. For internal objects, this also
435	 * removes them from paging queues. Don't free wired pages, just
436	 * remove them from the object.
437	 */
438	s = splvm();
439	while ((p = TAILQ_FIRST(&object->memq)) != NULL) {
440		if (p->busy || (p->flags & PG_BUSY))
441			panic("vm_object_terminate: freeing busy page %p\n", p);
442		if (p->wire_count == 0) {
443			vm_page_busy(p);
444			vm_page_free(p);
445			cnt.v_pfree++;
446		} else {
447			vm_page_busy(p);
448			vm_page_remove(p);
449		}
450	}
451	splx(s);
452
453	/*
454	 * Let the pager know object is dead.
455	 */
456	vm_pager_deallocate(object);
457
458	/*
459	 * Remove the object from the global object list.
460	 */
461	mtx_enter(&vm_object_list_mtx, MTX_DEF);
462	TAILQ_REMOVE(&vm_object_list, object, object_list);
463	mtx_exit(&vm_object_list_mtx, MTX_DEF);
464
465	wakeup(object);
466
467	/*
468	 * Free the space for the object.
469	 */
470	zfree(obj_zone, object);
471}
472
473/*
474 *	vm_object_page_clean
475 *
476 *	Clean all dirty pages in the specified range of object.  Leaves page
477 * 	on whatever queue it is currently on.   If NOSYNC is set then do not
478 *	write out pages with PG_NOSYNC set (originally comes from MAP_NOSYNC),
479 *	leaving the object dirty.
480 *
481 *	Odd semantics: if start == end, we clean everything.
482 *
483 *	The object must be locked.
484 */
485
486void
487vm_object_page_clean(object, start, end, flags)
488	vm_object_t object;
489	vm_pindex_t start;
490	vm_pindex_t end;
491	int flags;
492{
493	vm_page_t p, np, tp;
494	vm_offset_t tstart, tend;
495	vm_pindex_t pi;
496	int s;
497	struct vnode *vp;
498	int runlen;
499	int maxf;
500	int chkb;
501	int maxb;
502	int i;
503	int clearobjflags;
504	int pagerflags;
505	vm_page_t maf[vm_pageout_page_count];
506	vm_page_t mab[vm_pageout_page_count];
507	vm_page_t ma[vm_pageout_page_count];
508	int curgeneration;
509
510	if (object->type != OBJT_VNODE ||
511		(object->flags & OBJ_MIGHTBEDIRTY) == 0)
512		return;
513
514	pagerflags = (flags & (OBJPC_SYNC | OBJPC_INVAL)) ? VM_PAGER_PUT_SYNC : 0;
515	pagerflags |= (flags & OBJPC_INVAL) ? VM_PAGER_PUT_INVAL : 0;
516
517	vp = object->handle;
518
519	vm_object_set_flag(object, OBJ_CLEANING);
520
521	tstart = start;
522	if (end == 0) {
523		tend = object->size;
524	} else {
525		tend = end;
526	}
527
528	/*
529	 * Generally set CLEANCHK interlock and make the page read-only so
530	 * we can then clear the object flags.
531	 *
532	 * However, if this is a nosync mmap then the object is likely to
533	 * stay dirty so do not mess with the page and do not clear the
534	 * object flags.
535	 */
536
537	clearobjflags = 1;
538
539	for(p = TAILQ_FIRST(&object->memq); p; p = TAILQ_NEXT(p, listq)) {
540		vm_page_flag_set(p, PG_CLEANCHK);
541		if ((flags & OBJPC_NOSYNC) && (p->flags & PG_NOSYNC))
542			clearobjflags = 0;
543		else
544			vm_page_protect(p, VM_PROT_READ);
545	}
546
547	if (clearobjflags && (tstart == 0) && (tend == object->size)) {
548		vm_object_clear_flag(object, OBJ_WRITEABLE|OBJ_MIGHTBEDIRTY);
549	}
550
551rescan:
552	curgeneration = object->generation;
553
554	for(p = TAILQ_FIRST(&object->memq); p; p = np) {
555		np = TAILQ_NEXT(p, listq);
556
557		pi = p->pindex;
558		if (((p->flags & PG_CLEANCHK) == 0) ||
559			(pi < tstart) || (pi >= tend) ||
560			(p->valid == 0) ||
561			((p->queue - p->pc) == PQ_CACHE)) {
562			vm_page_flag_clear(p, PG_CLEANCHK);
563			continue;
564		}
565
566		vm_page_test_dirty(p);
567		if ((p->dirty & p->valid) == 0) {
568			vm_page_flag_clear(p, PG_CLEANCHK);
569			continue;
570		}
571
572		/*
573		 * If we have been asked to skip nosync pages and this is a
574		 * nosync page, skip it.  Note that the object flags were
575		 * not cleared in this case so we do not have to set them.
576		 */
577		if ((flags & OBJPC_NOSYNC) && (p->flags & PG_NOSYNC)) {
578			vm_page_flag_clear(p, PG_CLEANCHK);
579			continue;
580		}
581
582		s = splvm();
583		while (vm_page_sleep_busy(p, TRUE, "vpcwai")) {
584			if (object->generation != curgeneration) {
585				splx(s);
586				goto rescan;
587			}
588		}
589
590		maxf = 0;
591		for(i=1;i<vm_pageout_page_count;i++) {
592			if ((tp = vm_page_lookup(object, pi + i)) != NULL) {
593				if ((tp->flags & PG_BUSY) ||
594					(tp->flags & PG_CLEANCHK) == 0 ||
595					(tp->busy != 0))
596					break;
597				if((tp->queue - tp->pc) == PQ_CACHE) {
598					vm_page_flag_clear(tp, PG_CLEANCHK);
599					break;
600				}
601				vm_page_test_dirty(tp);
602				if ((tp->dirty & tp->valid) == 0) {
603					vm_page_flag_clear(tp, PG_CLEANCHK);
604					break;
605				}
606				maf[ i - 1 ] = tp;
607				maxf++;
608				continue;
609			}
610			break;
611		}
612
613		maxb = 0;
614		chkb = vm_pageout_page_count -  maxf;
615		if (chkb) {
616			for(i = 1; i < chkb;i++) {
617				if ((tp = vm_page_lookup(object, pi - i)) != NULL) {
618					if ((tp->flags & PG_BUSY) ||
619						(tp->flags & PG_CLEANCHK) == 0 ||
620						(tp->busy != 0))
621						break;
622					if((tp->queue - tp->pc) == PQ_CACHE) {
623						vm_page_flag_clear(tp, PG_CLEANCHK);
624						break;
625					}
626					vm_page_test_dirty(tp);
627					if ((tp->dirty & tp->valid) == 0) {
628						vm_page_flag_clear(tp, PG_CLEANCHK);
629						break;
630					}
631					mab[ i - 1 ] = tp;
632					maxb++;
633					continue;
634				}
635				break;
636			}
637		}
638
639		for(i=0;i<maxb;i++) {
640			int index = (maxb - i) - 1;
641			ma[index] = mab[i];
642			vm_page_flag_clear(ma[index], PG_CLEANCHK);
643		}
644		vm_page_flag_clear(p, PG_CLEANCHK);
645		ma[maxb] = p;
646		for(i=0;i<maxf;i++) {
647			int index = (maxb + i) + 1;
648			ma[index] = maf[i];
649			vm_page_flag_clear(ma[index], PG_CLEANCHK);
650		}
651		runlen = maxb + maxf + 1;
652
653		splx(s);
654		vm_pageout_flush(ma, runlen, pagerflags);
655		for (i = 0; i<runlen; i++) {
656			if (ma[i]->valid & ma[i]->dirty) {
657				vm_page_protect(ma[i], VM_PROT_READ);
658				vm_page_flag_set(ma[i], PG_CLEANCHK);
659			}
660		}
661		if (object->generation != curgeneration)
662			goto rescan;
663	}
664
665#if 0
666	VOP_FSYNC(vp, NULL, (pagerflags & VM_PAGER_PUT_SYNC)?MNT_WAIT:0, curproc);
667#endif
668
669	vm_object_clear_flag(object, OBJ_CLEANING);
670	return;
671}
672
673#ifdef not_used
674/* XXX I cannot tell if this should be an exported symbol */
675/*
676 *	vm_object_deactivate_pages
677 *
678 *	Deactivate all pages in the specified object.  (Keep its pages
679 *	in memory even though it is no longer referenced.)
680 *
681 *	The object must be locked.
682 */
683static void
684vm_object_deactivate_pages(object)
685	vm_object_t object;
686{
687	vm_page_t p, next;
688
689	for (p = TAILQ_FIRST(&object->memq); p != NULL; p = next) {
690		next = TAILQ_NEXT(p, listq);
691		vm_page_deactivate(p);
692	}
693}
694#endif
695
696/*
697 * Same as vm_object_pmap_copy, except range checking really
698 * works, and is meant for small sections of an object.
699 *
700 * This code protects resident pages by making them read-only
701 * and is typically called on a fork or split when a page
702 * is converted to copy-on-write.
703 *
704 * NOTE: If the page is already at VM_PROT_NONE, calling
705 * vm_page_protect will have no effect.
706 */
707
708void
709vm_object_pmap_copy_1(object, start, end)
710	vm_object_t object;
711	vm_pindex_t start;
712	vm_pindex_t end;
713{
714	vm_pindex_t idx;
715	vm_page_t p;
716
717	if (object == NULL || (object->flags & OBJ_WRITEABLE) == 0)
718		return;
719
720	for (idx = start; idx < end; idx++) {
721		p = vm_page_lookup(object, idx);
722		if (p == NULL)
723			continue;
724		vm_page_protect(p, VM_PROT_READ);
725	}
726}
727
728/*
729 *	vm_object_pmap_remove:
730 *
731 *	Removes all physical pages in the specified
732 *	object range from all physical maps.
733 *
734 *	The object must *not* be locked.
735 */
736void
737vm_object_pmap_remove(object, start, end)
738	vm_object_t object;
739	vm_pindex_t start;
740	vm_pindex_t end;
741{
742	vm_page_t p;
743
744	if (object == NULL)
745		return;
746	for (p = TAILQ_FIRST(&object->memq);
747		p != NULL;
748		p = TAILQ_NEXT(p, listq)) {
749		if (p->pindex >= start && p->pindex < end)
750			vm_page_protect(p, VM_PROT_NONE);
751	}
752	if ((start == 0) && (object->size == end))
753		vm_object_clear_flag(object, OBJ_WRITEABLE);
754}
755
756/*
757 *	vm_object_madvise:
758 *
759 *	Implements the madvise function at the object/page level.
760 *
761 *	MADV_WILLNEED	(any object)
762 *
763 *	    Activate the specified pages if they are resident.
764 *
765 *	MADV_DONTNEED	(any object)
766 *
767 *	    Deactivate the specified pages if they are resident.
768 *
769 *	MADV_FREE	(OBJT_DEFAULT/OBJT_SWAP objects,
770 *			 OBJ_ONEMAPPING only)
771 *
772 *	    Deactivate and clean the specified pages if they are
773 *	    resident.  This permits the process to reuse the pages
774 *	    without faulting or the kernel to reclaim the pages
775 *	    without I/O.
776 */
777void
778vm_object_madvise(object, pindex, count, advise)
779	vm_object_t object;
780	vm_pindex_t pindex;
781	int count;
782	int advise;
783{
784	vm_pindex_t end, tpindex;
785	vm_object_t tobject;
786	vm_page_t m;
787
788	if (object == NULL)
789		return;
790
791	end = pindex + count;
792
793	/*
794	 * Locate and adjust resident pages
795	 */
796
797	for (; pindex < end; pindex += 1) {
798relookup:
799		tobject = object;
800		tpindex = pindex;
801shadowlookup:
802		/*
803		 * MADV_FREE only operates on OBJT_DEFAULT or OBJT_SWAP pages
804		 * and those pages must be OBJ_ONEMAPPING.
805		 */
806		if (advise == MADV_FREE) {
807			if ((tobject->type != OBJT_DEFAULT &&
808			     tobject->type != OBJT_SWAP) ||
809			    (tobject->flags & OBJ_ONEMAPPING) == 0) {
810				continue;
811			}
812		}
813
814		m = vm_page_lookup(tobject, tpindex);
815
816		if (m == NULL) {
817			/*
818			 * There may be swap even if there is no backing page
819			 */
820			if (advise == MADV_FREE && tobject->type == OBJT_SWAP)
821				swap_pager_freespace(tobject, tpindex, 1);
822
823			/*
824			 * next object
825			 */
826			tobject = tobject->backing_object;
827			if (tobject == NULL)
828				continue;
829			tpindex += OFF_TO_IDX(tobject->backing_object_offset);
830			goto shadowlookup;
831		}
832
833		/*
834		 * If the page is busy or not in a normal active state,
835		 * we skip it.  If the page is not managed there are no
836		 * page queues to mess with.  Things can break if we mess
837		 * with pages in any of the below states.
838		 */
839		if (
840		    m->hold_count ||
841		    m->wire_count ||
842		    (m->flags & PG_UNMANAGED) ||
843		    m->valid != VM_PAGE_BITS_ALL
844		) {
845			continue;
846		}
847
848 		if (vm_page_sleep_busy(m, TRUE, "madvpo"))
849  			goto relookup;
850
851		if (advise == MADV_WILLNEED) {
852			vm_page_activate(m);
853		} else if (advise == MADV_DONTNEED) {
854			vm_page_dontneed(m);
855		} else if (advise == MADV_FREE) {
856			/*
857			 * Mark the page clean.  This will allow the page
858			 * to be freed up by the system.  However, such pages
859			 * are often reused quickly by malloc()/free()
860			 * so we do not do anything that would cause
861			 * a page fault if we can help it.
862			 *
863			 * Specifically, we do not try to actually free
864			 * the page now nor do we try to put it in the
865			 * cache (which would cause a page fault on reuse).
866			 *
867			 * But we do make the page is freeable as we
868			 * can without actually taking the step of unmapping
869			 * it.
870			 */
871			pmap_clear_modify(m);
872			m->dirty = 0;
873			m->act_count = 0;
874			vm_page_dontneed(m);
875			if (tobject->type == OBJT_SWAP)
876				swap_pager_freespace(tobject, tpindex, 1);
877		}
878	}
879}
880
881/*
882 *	vm_object_shadow:
883 *
884 *	Create a new object which is backed by the
885 *	specified existing object range.  The source
886 *	object reference is deallocated.
887 *
888 *	The new object and offset into that object
889 *	are returned in the source parameters.
890 */
891
892void
893vm_object_shadow(object, offset, length)
894	vm_object_t *object;	/* IN/OUT */
895	vm_ooffset_t *offset;	/* IN/OUT */
896	vm_size_t length;
897{
898	vm_object_t source;
899	vm_object_t result;
900
901	source = *object;
902
903	/*
904	 * Don't create the new object if the old object isn't shared.
905	 */
906
907	if (source != NULL &&
908	    source->ref_count == 1 &&
909	    source->handle == NULL &&
910	    (source->type == OBJT_DEFAULT ||
911	     source->type == OBJT_SWAP))
912		return;
913
914	/*
915	 * Allocate a new object with the given length
916	 */
917
918	if ((result = vm_object_allocate(OBJT_DEFAULT, length)) == NULL)
919		panic("vm_object_shadow: no object for shadowing");
920
921	/*
922	 * The new object shadows the source object, adding a reference to it.
923	 * Our caller changes his reference to point to the new object,
924	 * removing a reference to the source object.  Net result: no change
925	 * of reference count.
926	 *
927	 * Try to optimize the result object's page color when shadowing
928	 * in order to maintain page coloring consistency in the combined
929	 * shadowed object.
930	 */
931	result->backing_object = source;
932	if (source) {
933		TAILQ_INSERT_TAIL(&source->shadow_head, result, shadow_list);
934		source->shadow_count++;
935		source->generation++;
936		result->pg_color = (source->pg_color + OFF_TO_IDX(*offset)) & PQ_L2_MASK;
937	}
938
939	/*
940	 * Store the offset into the source object, and fix up the offset into
941	 * the new object.
942	 */
943
944	result->backing_object_offset = *offset;
945
946	/*
947	 * Return the new things
948	 */
949
950	*offset = 0;
951	*object = result;
952}
953
954#define	OBSC_TEST_ALL_SHADOWED	0x0001
955#define	OBSC_COLLAPSE_NOWAIT	0x0002
956#define	OBSC_COLLAPSE_WAIT	0x0004
957
958static __inline int
959vm_object_backing_scan(vm_object_t object, int op)
960{
961	int s;
962	int r = 1;
963	vm_page_t p;
964	vm_object_t backing_object;
965	vm_pindex_t backing_offset_index;
966
967	s = splvm();
968
969	backing_object = object->backing_object;
970	backing_offset_index = OFF_TO_IDX(object->backing_object_offset);
971
972	/*
973	 * Initial conditions
974	 */
975
976	if (op & OBSC_TEST_ALL_SHADOWED) {
977		/*
978		 * We do not want to have to test for the existence of
979		 * swap pages in the backing object.  XXX but with the
980		 * new swapper this would be pretty easy to do.
981		 *
982		 * XXX what about anonymous MAP_SHARED memory that hasn't
983		 * been ZFOD faulted yet?  If we do not test for this, the
984		 * shadow test may succeed! XXX
985		 */
986		if (backing_object->type != OBJT_DEFAULT) {
987			splx(s);
988			return(0);
989		}
990	}
991	if (op & OBSC_COLLAPSE_WAIT) {
992		vm_object_set_flag(backing_object, OBJ_DEAD);
993	}
994
995	/*
996	 * Our scan
997	 */
998
999	p = TAILQ_FIRST(&backing_object->memq);
1000	while (p) {
1001		vm_page_t next = TAILQ_NEXT(p, listq);
1002		vm_pindex_t new_pindex = p->pindex - backing_offset_index;
1003
1004		if (op & OBSC_TEST_ALL_SHADOWED) {
1005			vm_page_t pp;
1006
1007			/*
1008			 * Ignore pages outside the parent object's range
1009			 * and outside the parent object's mapping of the
1010			 * backing object.
1011			 *
1012			 * note that we do not busy the backing object's
1013			 * page.
1014			 */
1015
1016			if (
1017			    p->pindex < backing_offset_index ||
1018			    new_pindex >= object->size
1019			) {
1020				p = next;
1021				continue;
1022			}
1023
1024			/*
1025			 * See if the parent has the page or if the parent's
1026			 * object pager has the page.  If the parent has the
1027			 * page but the page is not valid, the parent's
1028			 * object pager must have the page.
1029			 *
1030			 * If this fails, the parent does not completely shadow
1031			 * the object and we might as well give up now.
1032			 */
1033
1034			pp = vm_page_lookup(object, new_pindex);
1035			if (
1036			    (pp == NULL || pp->valid == 0) &&
1037			    !vm_pager_has_page(object, new_pindex, NULL, NULL)
1038			) {
1039				r = 0;
1040				break;
1041			}
1042		}
1043
1044		/*
1045		 * Check for busy page
1046		 */
1047
1048		if (op & (OBSC_COLLAPSE_WAIT | OBSC_COLLAPSE_NOWAIT)) {
1049			vm_page_t pp;
1050
1051			if (op & OBSC_COLLAPSE_NOWAIT) {
1052				if (
1053				    (p->flags & PG_BUSY) ||
1054				    !p->valid ||
1055				    p->hold_count ||
1056				    p->wire_count ||
1057				    p->busy
1058				) {
1059					p = next;
1060					continue;
1061				}
1062			} else if (op & OBSC_COLLAPSE_WAIT) {
1063				if (vm_page_sleep_busy(p, TRUE, "vmocol")) {
1064					/*
1065					 * If we slept, anything could have
1066					 * happened.  Since the object is
1067					 * marked dead, the backing offset
1068					 * should not have changed so we
1069					 * just restart our scan.
1070					 */
1071					p = TAILQ_FIRST(&backing_object->memq);
1072					continue;
1073				}
1074			}
1075
1076			/*
1077			 * Busy the page
1078			 */
1079			vm_page_busy(p);
1080
1081			KASSERT(
1082			    p->object == backing_object,
1083			    ("vm_object_qcollapse(): object mismatch")
1084			);
1085
1086			/*
1087			 * Destroy any associated swap
1088			 */
1089			if (backing_object->type == OBJT_SWAP) {
1090				swap_pager_freespace(
1091				    backing_object,
1092				    p->pindex,
1093				    1
1094				);
1095			}
1096
1097			if (
1098			    p->pindex < backing_offset_index ||
1099			    new_pindex >= object->size
1100			) {
1101				/*
1102				 * Page is out of the parent object's range, we
1103				 * can simply destroy it.
1104				 */
1105				vm_page_protect(p, VM_PROT_NONE);
1106				vm_page_free(p);
1107				p = next;
1108				continue;
1109			}
1110
1111			pp = vm_page_lookup(object, new_pindex);
1112			if (
1113			    pp != NULL ||
1114			    vm_pager_has_page(object, new_pindex, NULL, NULL)
1115			) {
1116				/*
1117				 * page already exists in parent OR swap exists
1118				 * for this location in the parent.  Destroy
1119				 * the original page from the backing object.
1120				 *
1121				 * Leave the parent's page alone
1122				 */
1123				vm_page_protect(p, VM_PROT_NONE);
1124				vm_page_free(p);
1125				p = next;
1126				continue;
1127			}
1128
1129			/*
1130			 * Page does not exist in parent, rename the
1131			 * page from the backing object to the main object.
1132			 *
1133			 * If the page was mapped to a process, it can remain
1134			 * mapped through the rename.
1135			 */
1136			if ((p->queue - p->pc) == PQ_CACHE)
1137				vm_page_deactivate(p);
1138
1139			vm_page_rename(p, object, new_pindex);
1140			/* page automatically made dirty by rename */
1141		}
1142		p = next;
1143	}
1144	splx(s);
1145	return(r);
1146}
1147
1148
1149/*
1150 * this version of collapse allows the operation to occur earlier and
1151 * when paging_in_progress is true for an object...  This is not a complete
1152 * operation, but should plug 99.9% of the rest of the leaks.
1153 */
1154static void
1155vm_object_qcollapse(object)
1156	vm_object_t object;
1157{
1158	vm_object_t backing_object = object->backing_object;
1159
1160	if (backing_object->ref_count != 1)
1161		return;
1162
1163	backing_object->ref_count += 2;
1164
1165	vm_object_backing_scan(object, OBSC_COLLAPSE_NOWAIT);
1166
1167	backing_object->ref_count -= 2;
1168}
1169
1170/*
1171 *	vm_object_collapse:
1172 *
1173 *	Collapse an object with the object backing it.
1174 *	Pages in the backing object are moved into the
1175 *	parent, and the backing object is deallocated.
1176 */
1177void
1178vm_object_collapse(object)
1179	vm_object_t object;
1180{
1181	while (TRUE) {
1182		vm_object_t backing_object;
1183
1184		/*
1185		 * Verify that the conditions are right for collapse:
1186		 *
1187		 * The object exists and the backing object exists.
1188		 */
1189		if (object == NULL)
1190			break;
1191
1192		if ((backing_object = object->backing_object) == NULL)
1193			break;
1194
1195		/*
1196		 * we check the backing object first, because it is most likely
1197		 * not collapsable.
1198		 */
1199		if (backing_object->handle != NULL ||
1200		    (backing_object->type != OBJT_DEFAULT &&
1201		     backing_object->type != OBJT_SWAP) ||
1202		    (backing_object->flags & OBJ_DEAD) ||
1203		    object->handle != NULL ||
1204		    (object->type != OBJT_DEFAULT &&
1205		     object->type != OBJT_SWAP) ||
1206		    (object->flags & OBJ_DEAD)) {
1207			break;
1208		}
1209
1210		if (
1211		    object->paging_in_progress != 0 ||
1212		    backing_object->paging_in_progress != 0
1213		) {
1214			vm_object_qcollapse(object);
1215			break;
1216		}
1217
1218		/*
1219		 * We know that we can either collapse the backing object (if
1220		 * the parent is the only reference to it) or (perhaps) have
1221		 * the parent bypass the object if the parent happens to shadow
1222		 * all the resident pages in the entire backing object.
1223		 *
1224		 * This is ignoring pager-backed pages such as swap pages.
1225		 * vm_object_backing_scan fails the shadowing test in this
1226		 * case.
1227		 */
1228
1229		if (backing_object->ref_count == 1) {
1230			/*
1231			 * If there is exactly one reference to the backing
1232			 * object, we can collapse it into the parent.
1233			 */
1234
1235			vm_object_backing_scan(object, OBSC_COLLAPSE_WAIT);
1236
1237			/*
1238			 * Move the pager from backing_object to object.
1239			 */
1240
1241			if (backing_object->type == OBJT_SWAP) {
1242				vm_object_pip_add(backing_object, 1);
1243
1244				/*
1245				 * scrap the paging_offset junk and do a
1246				 * discrete copy.  This also removes major
1247				 * assumptions about how the swap-pager
1248				 * works from where it doesn't belong.  The
1249				 * new swapper is able to optimize the
1250				 * destroy-source case.
1251				 */
1252
1253				vm_object_pip_add(object, 1);
1254				swap_pager_copy(
1255				    backing_object,
1256				    object,
1257				    OFF_TO_IDX(object->backing_object_offset), TRUE);
1258				vm_object_pip_wakeup(object);
1259
1260				vm_object_pip_wakeup(backing_object);
1261			}
1262			/*
1263			 * Object now shadows whatever backing_object did.
1264			 * Note that the reference to
1265			 * backing_object->backing_object moves from within
1266			 * backing_object to within object.
1267			 */
1268
1269			TAILQ_REMOVE(
1270			    &object->backing_object->shadow_head,
1271			    object,
1272			    shadow_list
1273			);
1274			object->backing_object->shadow_count--;
1275			object->backing_object->generation++;
1276			if (backing_object->backing_object) {
1277				TAILQ_REMOVE(
1278				    &backing_object->backing_object->shadow_head,
1279				    backing_object,
1280				    shadow_list
1281				);
1282				backing_object->backing_object->shadow_count--;
1283				backing_object->backing_object->generation++;
1284			}
1285			object->backing_object = backing_object->backing_object;
1286			if (object->backing_object) {
1287				TAILQ_INSERT_TAIL(
1288				    &object->backing_object->shadow_head,
1289				    object,
1290				    shadow_list
1291				);
1292				object->backing_object->shadow_count++;
1293				object->backing_object->generation++;
1294			}
1295
1296			object->backing_object_offset +=
1297			    backing_object->backing_object_offset;
1298
1299			/*
1300			 * Discard backing_object.
1301			 *
1302			 * Since the backing object has no pages, no pager left,
1303			 * and no object references within it, all that is
1304			 * necessary is to dispose of it.
1305			 */
1306
1307			TAILQ_REMOVE(
1308			    &vm_object_list,
1309			    backing_object,
1310			    object_list
1311			);
1312			vm_object_count--;
1313
1314			zfree(obj_zone, backing_object);
1315
1316			object_collapses++;
1317		} else {
1318			vm_object_t new_backing_object;
1319
1320			/*
1321			 * If we do not entirely shadow the backing object,
1322			 * there is nothing we can do so we give up.
1323			 */
1324
1325			if (vm_object_backing_scan(object, OBSC_TEST_ALL_SHADOWED) == 0) {
1326				break;
1327			}
1328
1329			/*
1330			 * Make the parent shadow the next object in the
1331			 * chain.  Deallocating backing_object will not remove
1332			 * it, since its reference count is at least 2.
1333			 */
1334
1335			TAILQ_REMOVE(
1336			    &backing_object->shadow_head,
1337			    object,
1338			    shadow_list
1339			);
1340			backing_object->shadow_count--;
1341			backing_object->generation++;
1342
1343			new_backing_object = backing_object->backing_object;
1344			if ((object->backing_object = new_backing_object) != NULL) {
1345				vm_object_reference(new_backing_object);
1346				TAILQ_INSERT_TAIL(
1347				    &new_backing_object->shadow_head,
1348				    object,
1349				    shadow_list
1350				);
1351				new_backing_object->shadow_count++;
1352				new_backing_object->generation++;
1353				object->backing_object_offset +=
1354					backing_object->backing_object_offset;
1355			}
1356
1357			/*
1358			 * Drop the reference count on backing_object. Since
1359			 * its ref_count was at least 2, it will not vanish;
1360			 * so we don't need to call vm_object_deallocate, but
1361			 * we do anyway.
1362			 */
1363			vm_object_deallocate(backing_object);
1364			object_bypasses++;
1365		}
1366
1367		/*
1368		 * Try again with this object's new backing object.
1369		 */
1370	}
1371}
1372
1373/*
1374 *	vm_object_page_remove: [internal]
1375 *
1376 *	Removes all physical pages in the specified
1377 *	object range from the object's list of pages.
1378 *
1379 *	The object must be locked.
1380 */
1381void
1382vm_object_page_remove(object, start, end, clean_only)
1383	vm_object_t object;
1384	vm_pindex_t start;
1385	vm_pindex_t end;
1386	boolean_t clean_only;
1387{
1388	vm_page_t p, next;
1389	unsigned int size;
1390	int all;
1391
1392	if (object == NULL ||
1393	    object->resident_page_count == 0)
1394		return;
1395
1396	all = ((end == 0) && (start == 0));
1397
1398	/*
1399	 * Since physically-backed objects do not use managed pages, we can't
1400	 * remove pages from the object (we must instead remove the page
1401	 * references, and then destroy the object).
1402	 */
1403	KASSERT(object->type != OBJT_PHYS, ("attempt to remove pages from a physical object"));
1404
1405	vm_object_pip_add(object, 1);
1406again:
1407	size = end - start;
1408	if (all || size > object->resident_page_count / 4) {
1409		for (p = TAILQ_FIRST(&object->memq); p != NULL; p = next) {
1410			next = TAILQ_NEXT(p, listq);
1411			if (all || ((start <= p->pindex) && (p->pindex < end))) {
1412				if (p->wire_count != 0) {
1413					vm_page_protect(p, VM_PROT_NONE);
1414					if (!clean_only)
1415						p->valid = 0;
1416					continue;
1417				}
1418
1419				/*
1420				 * The busy flags are only cleared at
1421				 * interrupt -- minimize the spl transitions
1422				 */
1423
1424 				if (vm_page_sleep_busy(p, TRUE, "vmopar"))
1425 					goto again;
1426
1427				if (clean_only && p->valid) {
1428					vm_page_test_dirty(p);
1429					if (p->valid & p->dirty)
1430						continue;
1431				}
1432
1433				vm_page_busy(p);
1434				vm_page_protect(p, VM_PROT_NONE);
1435				vm_page_free(p);
1436			}
1437		}
1438	} else {
1439		while (size > 0) {
1440			if ((p = vm_page_lookup(object, start)) != 0) {
1441
1442				if (p->wire_count != 0) {
1443					vm_page_protect(p, VM_PROT_NONE);
1444					if (!clean_only)
1445						p->valid = 0;
1446					start += 1;
1447					size -= 1;
1448					continue;
1449				}
1450
1451				/*
1452				 * The busy flags are only cleared at
1453				 * interrupt -- minimize the spl transitions
1454				 */
1455 				if (vm_page_sleep_busy(p, TRUE, "vmopar"))
1456					goto again;
1457
1458				if (clean_only && p->valid) {
1459					vm_page_test_dirty(p);
1460					if (p->valid & p->dirty) {
1461						start += 1;
1462						size -= 1;
1463						continue;
1464					}
1465				}
1466
1467				vm_page_busy(p);
1468				vm_page_protect(p, VM_PROT_NONE);
1469				vm_page_free(p);
1470			}
1471			start += 1;
1472			size -= 1;
1473		}
1474	}
1475	vm_object_pip_wakeup(object);
1476}
1477
1478/*
1479 *	Routine:	vm_object_coalesce
1480 *	Function:	Coalesces two objects backing up adjoining
1481 *			regions of memory into a single object.
1482 *
1483 *	returns TRUE if objects were combined.
1484 *
1485 *	NOTE:	Only works at the moment if the second object is NULL -
1486 *		if it's not, which object do we lock first?
1487 *
1488 *	Parameters:
1489 *		prev_object	First object to coalesce
1490 *		prev_offset	Offset into prev_object
1491 *		next_object	Second object into coalesce
1492 *		next_offset	Offset into next_object
1493 *
1494 *		prev_size	Size of reference to prev_object
1495 *		next_size	Size of reference to next_object
1496 *
1497 *	Conditions:
1498 *	The object must *not* be locked.
1499 */
1500boolean_t
1501vm_object_coalesce(prev_object, prev_pindex, prev_size, next_size)
1502	vm_object_t prev_object;
1503	vm_pindex_t prev_pindex;
1504	vm_size_t prev_size, next_size;
1505{
1506	vm_pindex_t next_pindex;
1507
1508	if (prev_object == NULL) {
1509		return (TRUE);
1510	}
1511
1512	if (prev_object->type != OBJT_DEFAULT &&
1513	    prev_object->type != OBJT_SWAP) {
1514		return (FALSE);
1515	}
1516
1517	/*
1518	 * Try to collapse the object first
1519	 */
1520	vm_object_collapse(prev_object);
1521
1522	/*
1523	 * Can't coalesce if: . more than one reference . paged out . shadows
1524	 * another object . has a copy elsewhere (any of which mean that the
1525	 * pages not mapped to prev_entry may be in use anyway)
1526	 */
1527
1528	if (prev_object->backing_object != NULL) {
1529		return (FALSE);
1530	}
1531
1532	prev_size >>= PAGE_SHIFT;
1533	next_size >>= PAGE_SHIFT;
1534	next_pindex = prev_pindex + prev_size;
1535
1536	if ((prev_object->ref_count > 1) &&
1537	    (prev_object->size != next_pindex)) {
1538		return (FALSE);
1539	}
1540
1541	/*
1542	 * Remove any pages that may still be in the object from a previous
1543	 * deallocation.
1544	 */
1545	if (next_pindex < prev_object->size) {
1546		vm_object_page_remove(prev_object,
1547				      next_pindex,
1548				      next_pindex + next_size, FALSE);
1549		if (prev_object->type == OBJT_SWAP)
1550			swap_pager_freespace(prev_object,
1551					     next_pindex, next_size);
1552	}
1553
1554	/*
1555	 * Extend the object if necessary.
1556	 */
1557	if (next_pindex + next_size > prev_object->size)
1558		prev_object->size = next_pindex + next_size;
1559
1560	return (TRUE);
1561}
1562
1563#include "opt_ddb.h"
1564#ifdef DDB
1565#include <sys/kernel.h>
1566
1567#include <sys/cons.h>
1568
1569#include <ddb/ddb.h>
1570
1571static int	_vm_object_in_map __P((vm_map_t map, vm_object_t object,
1572				       vm_map_entry_t entry));
1573static int	vm_object_in_map __P((vm_object_t object));
1574
1575static int
1576_vm_object_in_map(map, object, entry)
1577	vm_map_t map;
1578	vm_object_t object;
1579	vm_map_entry_t entry;
1580{
1581	vm_map_t tmpm;
1582	vm_map_entry_t tmpe;
1583	vm_object_t obj;
1584	int entcount;
1585
1586	if (map == 0)
1587		return 0;
1588
1589	if (entry == 0) {
1590		tmpe = map->header.next;
1591		entcount = map->nentries;
1592		while (entcount-- && (tmpe != &map->header)) {
1593			if( _vm_object_in_map(map, object, tmpe)) {
1594				return 1;
1595			}
1596			tmpe = tmpe->next;
1597		}
1598	} else if (entry->eflags & MAP_ENTRY_IS_SUB_MAP) {
1599		tmpm = entry->object.sub_map;
1600		tmpe = tmpm->header.next;
1601		entcount = tmpm->nentries;
1602		while (entcount-- && tmpe != &tmpm->header) {
1603			if( _vm_object_in_map(tmpm, object, tmpe)) {
1604				return 1;
1605			}
1606			tmpe = tmpe->next;
1607		}
1608	} else if ((obj = entry->object.vm_object) != NULL) {
1609		for(; obj; obj=obj->backing_object)
1610			if( obj == object) {
1611				return 1;
1612			}
1613	}
1614	return 0;
1615}
1616
1617static int
1618vm_object_in_map( object)
1619	vm_object_t object;
1620{
1621	struct proc *p;
1622	ALLPROC_LOCK(AP_SHARED);
1623	for (p = allproc.lh_first; p != 0; p = p->p_list.le_next) {
1624		if( !p->p_vmspace /* || (p->p_flag & (P_SYSTEM|P_WEXIT)) */)
1625			continue;
1626		if( _vm_object_in_map(&p->p_vmspace->vm_map, object, 0)) {
1627			ALLPROC_LOCK(AP_RELEASE);
1628			return 1;
1629		}
1630	}
1631	ALLPROC_LOCK(AP_RELEASE);
1632	if( _vm_object_in_map( kernel_map, object, 0))
1633		return 1;
1634	if( _vm_object_in_map( kmem_map, object, 0))
1635		return 1;
1636	if( _vm_object_in_map( pager_map, object, 0))
1637		return 1;
1638	if( _vm_object_in_map( buffer_map, object, 0))
1639		return 1;
1640	if( _vm_object_in_map( mb_map, object, 0))
1641		return 1;
1642	return 0;
1643}
1644
1645DB_SHOW_COMMAND(vmochk, vm_object_check)
1646{
1647	vm_object_t object;
1648
1649	/*
1650	 * make sure that internal objs are in a map somewhere
1651	 * and none have zero ref counts.
1652	 */
1653	for (object = TAILQ_FIRST(&vm_object_list);
1654			object != NULL;
1655			object = TAILQ_NEXT(object, object_list)) {
1656		if (object->handle == NULL &&
1657		    (object->type == OBJT_DEFAULT || object->type == OBJT_SWAP)) {
1658			if (object->ref_count == 0) {
1659				db_printf("vmochk: internal obj has zero ref count: %ld\n",
1660					(long)object->size);
1661			}
1662			if (!vm_object_in_map(object)) {
1663				db_printf(
1664			"vmochk: internal obj is not in a map: "
1665			"ref: %d, size: %lu: 0x%lx, backing_object: %p\n",
1666				    object->ref_count, (u_long)object->size,
1667				    (u_long)object->size,
1668				    (void *)object->backing_object);
1669			}
1670		}
1671	}
1672}
1673
1674/*
1675 *	vm_object_print:	[ debug ]
1676 */
1677DB_SHOW_COMMAND(object, vm_object_print_static)
1678{
1679	/* XXX convert args. */
1680	vm_object_t object = (vm_object_t)addr;
1681	boolean_t full = have_addr;
1682
1683	vm_page_t p;
1684
1685	/* XXX count is an (unused) arg.  Avoid shadowing it. */
1686#define	count	was_count
1687
1688	int count;
1689
1690	if (object == NULL)
1691		return;
1692
1693	db_iprintf(
1694	    "Object %p: type=%d, size=0x%lx, res=%d, ref=%d, flags=0x%x\n",
1695	    object, (int)object->type, (u_long)object->size,
1696	    object->resident_page_count, object->ref_count, object->flags);
1697	/*
1698	 * XXX no %qd in kernel.  Truncate object->backing_object_offset.
1699	 */
1700	db_iprintf(" sref=%d, backing_object(%d)=(%p)+0x%lx\n",
1701	    object->shadow_count,
1702	    object->backing_object ? object->backing_object->ref_count : 0,
1703	    object->backing_object, (long)object->backing_object_offset);
1704
1705	if (!full)
1706		return;
1707
1708	db_indent += 2;
1709	count = 0;
1710	for (p = TAILQ_FIRST(&object->memq); p != NULL; p = TAILQ_NEXT(p, listq)) {
1711		if (count == 0)
1712			db_iprintf("memory:=");
1713		else if (count == 6) {
1714			db_printf("\n");
1715			db_iprintf(" ...");
1716			count = 0;
1717		} else
1718			db_printf(",");
1719		count++;
1720
1721		db_printf("(off=0x%lx,page=0x%lx)",
1722		    (u_long) p->pindex, (u_long) VM_PAGE_TO_PHYS(p));
1723	}
1724	if (count != 0)
1725		db_printf("\n");
1726	db_indent -= 2;
1727}
1728
1729/* XXX. */
1730#undef count
1731
1732/* XXX need this non-static entry for calling from vm_map_print. */
1733void
1734vm_object_print(addr, have_addr, count, modif)
1735        /* db_expr_t */ long addr;
1736	boolean_t have_addr;
1737	/* db_expr_t */ long count;
1738	char *modif;
1739{
1740	vm_object_print_static(addr, have_addr, count, modif);
1741}
1742
1743DB_SHOW_COMMAND(vmopag, vm_object_print_pages)
1744{
1745	vm_object_t object;
1746	int nl = 0;
1747	int c;
1748	for (object = TAILQ_FIRST(&vm_object_list);
1749			object != NULL;
1750			object = TAILQ_NEXT(object, object_list)) {
1751		vm_pindex_t idx, fidx;
1752		vm_pindex_t osize;
1753		vm_offset_t pa = -1, padiff;
1754		int rcount;
1755		vm_page_t m;
1756
1757		db_printf("new object: %p\n", (void *)object);
1758		if ( nl > 18) {
1759			c = cngetc();
1760			if (c != ' ')
1761				return;
1762			nl = 0;
1763		}
1764		nl++;
1765		rcount = 0;
1766		fidx = 0;
1767		osize = object->size;
1768		if (osize > 128)
1769			osize = 128;
1770		for(idx=0;idx<osize;idx++) {
1771			m = vm_page_lookup(object, idx);
1772			if (m == NULL) {
1773				if (rcount) {
1774					db_printf(" index(%ld)run(%d)pa(0x%lx)\n",
1775						(long)fidx, rcount, (long)pa);
1776					if ( nl > 18) {
1777						c = cngetc();
1778						if (c != ' ')
1779							return;
1780						nl = 0;
1781					}
1782					nl++;
1783					rcount = 0;
1784				}
1785				continue;
1786			}
1787
1788
1789			if (rcount &&
1790				(VM_PAGE_TO_PHYS(m) == pa + rcount * PAGE_SIZE)) {
1791				++rcount;
1792				continue;
1793			}
1794			if (rcount) {
1795				padiff = pa + rcount * PAGE_SIZE - VM_PAGE_TO_PHYS(m);
1796				padiff >>= PAGE_SHIFT;
1797				padiff &= PQ_L2_MASK;
1798				if (padiff == 0) {
1799					pa = VM_PAGE_TO_PHYS(m) - rcount * PAGE_SIZE;
1800					++rcount;
1801					continue;
1802				}
1803				db_printf(" index(%ld)run(%d)pa(0x%lx)",
1804					(long)fidx, rcount, (long)pa);
1805				db_printf("pd(%ld)\n", (long)padiff);
1806				if ( nl > 18) {
1807					c = cngetc();
1808					if (c != ' ')
1809						return;
1810					nl = 0;
1811				}
1812				nl++;
1813			}
1814			fidx = idx;
1815			pa = VM_PAGE_TO_PHYS(m);
1816			rcount = 1;
1817		}
1818		if (rcount) {
1819			db_printf(" index(%ld)run(%d)pa(0x%lx)\n",
1820				(long)fidx, rcount, (long)pa);
1821			if ( nl > 18) {
1822				c = cngetc();
1823				if (c != ' ')
1824					return;
1825				nl = 0;
1826			}
1827			nl++;
1828		}
1829	}
1830}
1831#endif /* DDB */
1832