vm_object.c revision 7870
1/*
2 * Copyright (c) 1991, 1993
3 *	The Regents of the University of California.  All rights reserved.
4 *
5 * This code is derived from software contributed to Berkeley by
6 * The Mach Operating System project at Carnegie-Mellon University.
7 *
8 * Redistribution and use in source and binary forms, with or without
9 * modification, are permitted provided that the following conditions
10 * are met:
11 * 1. Redistributions of source code must retain the above copyright
12 *    notice, this list of conditions and the following disclaimer.
13 * 2. Redistributions in binary form must reproduce the above copyright
14 *    notice, this list of conditions and the following disclaimer in the
15 *    documentation and/or other materials provided with the distribution.
16 * 3. All advertising materials mentioning features or use of this software
17 *    must display the following acknowledgement:
18 *	This product includes software developed by the University of
19 *	California, Berkeley and its contributors.
20 * 4. Neither the name of the University nor the names of its contributors
21 *    may be used to endorse or promote products derived from this software
22 *    without specific prior written permission.
23 *
24 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
25 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
26 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
27 * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
28 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
29 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
30 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
31 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
32 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
33 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
34 * SUCH DAMAGE.
35 *
36 *	from: @(#)vm_object.c	8.5 (Berkeley) 3/22/94
37 *
38 *
39 * Copyright (c) 1987, 1990 Carnegie-Mellon University.
40 * All rights reserved.
41 *
42 * Authors: Avadis Tevanian, Jr., Michael Wayne Young
43 *
44 * Permission to use, copy, modify and distribute this software and
45 * its documentation is hereby granted, provided that both the copyright
46 * notice and this permission notice appear in all copies of the
47 * software, derivative works or modified versions, and any portions
48 * thereof, and that both notices appear in supporting documentation.
49 *
50 * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
51 * CONDITION.  CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND
52 * FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
53 *
54 * Carnegie Mellon requests users of this software to return to
55 *
56 *  Software Distribution Coordinator  or  Software.Distribution@CS.CMU.EDU
57 *  School of Computer Science
58 *  Carnegie Mellon University
59 *  Pittsburgh PA 15213-3890
60 *
61 * any improvements or extensions that they make and grant Carnegie the
62 * rights to redistribute these changes.
63 *
64 * $Id: vm_object.c,v 1.41 1995/04/09 06:03:49 davidg Exp $
65 */
66
67/*
68 *	Virtual memory object module.
69 */
70
71#include <sys/param.h>
72#include <sys/systm.h>
73#include <sys/kernel.h>
74#include <sys/proc.h>		/* for curproc, pageproc */
75#include <sys/malloc.h>
76#include <sys/vnode.h>
77#include <sys/mount.h>
78
79#include <vm/vm.h>
80#include <vm/vm_page.h>
81#include <vm/vm_pageout.h>
82#include <vm/vm_pager.h>
83#include <vm/swap_pager.h>
84#include <vm/vnode_pager.h>
85#include <vm/vm_kern.h>
86
87static void _vm_object_allocate(vm_size_t, vm_object_t);
88
89
90/*
91 *	Virtual memory objects maintain the actual data
92 *	associated with allocated virtual memory.  A given
93 *	page of memory exists within exactly one object.
94 *
95 *	An object is only deallocated when all "references"
96 *	are given up.  Only one "reference" to a given
97 *	region of an object should be writeable.
98 *
99 *	Associated with each object is a list of all resident
100 *	memory pages belonging to that object; this list is
101 *	maintained by the "vm_page" module, and locked by the object's
102 *	lock.
103 *
104 *	Each object also records a "pager" routine which is
105 *	used to retrieve (and store) pages to the proper backing
106 *	storage.  In addition, objects may be backed by other
107 *	objects from which they were virtual-copied.
108 *
109 *	The only items within the object structure which are
110 *	modified after time of creation are:
111 *		reference count		locked by object's lock
112 *		pager routine		locked by object's lock
113 *
114 */
115
116
117struct vm_object kernel_object_store;
118struct vm_object kmem_object_store;
119
120int vm_object_cache_max;
121
122#define	VM_OBJECT_HASH_COUNT	509
123
124struct vm_object_hash_head vm_object_hashtable[VM_OBJECT_HASH_COUNT];
125
126long object_collapses = 0;
127long object_bypasses = 0;
128
129static void
130_vm_object_allocate(size, object)
131	vm_size_t size;
132	register vm_object_t object;
133{
134	TAILQ_INIT(&object->memq);
135	TAILQ_INIT(&object->reverse_shadow_head);
136
137	object->size = size;
138	object->ref_count = 1;
139	vm_object_lock_init(object);
140	object->flags = OBJ_INTERNAL;	/* pager will reset */
141	object->paging_in_progress = 0;
142	object->resident_page_count = 0;
143
144	object->pager = NULL;
145	object->paging_offset = 0;
146	object->shadow = NULL;
147	object->shadow_offset = (vm_offset_t) 0;
148	object->copy = NULL;
149
150	object->last_read = 0;
151
152	simple_lock(&vm_object_list_lock);
153	TAILQ_INSERT_TAIL(&vm_object_list, object, object_list);
154	vm_object_count++;
155	simple_unlock(&vm_object_list_lock);
156}
157
158/*
159 *	vm_object_init:
160 *
161 *	Initialize the VM objects module.
162 */
163void
164vm_object_init(vm_offset_t nothing)
165{
166	register int i;
167
168	TAILQ_INIT(&vm_object_cached_list);
169	TAILQ_INIT(&vm_object_list);
170	vm_object_count = 0;
171	simple_lock_init(&vm_cache_lock);
172	simple_lock_init(&vm_object_list_lock);
173
174	vm_object_cache_max = 84;
175	if (cnt.v_page_count > 1000)
176		vm_object_cache_max += (cnt.v_page_count - 1000) / 4;
177
178	for (i = 0; i < VM_OBJECT_HASH_COUNT; i++)
179		TAILQ_INIT(&vm_object_hashtable[i]);
180
181	kernel_object = &kernel_object_store;
182	_vm_object_allocate(VM_MAX_KERNEL_ADDRESS - VM_MIN_KERNEL_ADDRESS,
183	    kernel_object);
184
185	kmem_object = &kmem_object_store;
186	_vm_object_allocate(VM_MAX_KERNEL_ADDRESS - VM_MIN_KERNEL_ADDRESS,
187	    kmem_object);
188}
189
190/*
191 *	vm_object_allocate:
192 *
193 *	Returns a new object with the given size.
194 */
195
196vm_object_t
197vm_object_allocate(size)
198	vm_size_t size;
199{
200	register vm_object_t result;
201
202	result = (vm_object_t)
203	    malloc((u_long) sizeof *result, M_VMOBJ, M_WAITOK);
204
205
206	_vm_object_allocate(size, result);
207
208	return (result);
209}
210
211
212/*
213 *	vm_object_reference:
214 *
215 *	Gets another reference to the given object.
216 */
217inline void
218vm_object_reference(object)
219	register vm_object_t object;
220{
221	if (object == NULL)
222		return;
223
224	vm_object_lock(object);
225	object->ref_count++;
226	vm_object_unlock(object);
227}
228
229/*
230 *	vm_object_deallocate:
231 *
232 *	Release a reference to the specified object,
233 *	gained either through a vm_object_allocate
234 *	or a vm_object_reference call.  When all references
235 *	are gone, storage associated with this object
236 *	may be relinquished.
237 *
238 *	No object may be locked.
239 */
240void
241vm_object_deallocate(object)
242	vm_object_t object;
243{
244	vm_object_t temp;
245	vm_pager_t pager;
246
247	while (object != NULL) {
248
249		if (object->ref_count == 0)
250			panic("vm_object_deallocate: object deallocated too many times");
251
252		/*
253		 * The cache holds a reference (uncounted) to the object; we
254		 * must lock it before removing the object.
255		 */
256
257		vm_object_cache_lock();
258
259		/*
260		 * Lose the reference
261		 */
262		vm_object_lock(object);
263
264		object->ref_count--;
265
266		if (object->ref_count != 0) {
267			if ((object->ref_count == 1) &&
268			    (object->flags & OBJ_INTERNAL)) {
269				vm_object_t robject;
270				robject = object->reverse_shadow_head.tqh_first;
271				if ((robject != NULL) &&
272				    (robject->flags & OBJ_INTERNAL)) {
273					int s;
274					robject->ref_count += 2;
275					object->ref_count += 2;
276
277					do {
278						s = splhigh();
279						while (robject->paging_in_progress) {
280							robject->flags |= OBJ_PIPWNT;
281							tsleep(robject, PVM, "objde1", 0);
282						}
283
284						while (object->paging_in_progress) {
285							object->flags |= OBJ_PIPWNT;
286							tsleep(object, PVM, "objde2", 0);
287						}
288						splx(s);
289
290					} while( object->paging_in_progress || robject->paging_in_progress);
291
292					object->ref_count -= 2;
293					robject->ref_count -= 2;
294					if( robject->ref_count == 0) {
295						vm_object_unlock(object);
296						vm_object_cache_unlock();
297						robject->ref_count += 1;
298						object = robject;
299						continue;
300					}
301					vm_object_cache_unlock();
302					vm_object_unlock(object);
303					vm_object_lock(robject);
304					vm_object_collapse(robject);
305					return;
306				}
307			}
308			vm_object_unlock(object);
309			/*
310			 * If there are still references, then we are done.
311			 */
312			vm_object_cache_unlock();
313			return;
314		}
315
316		pager = object->pager;
317
318		if (pager && pager->pg_type == PG_VNODE) {
319			vn_pager_t vnp = (vn_pager_t) pager->pg_data;
320
321			vnp->vnp_vp->v_flag &= ~VTEXT;
322		}
323
324		/*
325		 * See if this object can persist and has some resident
326		 * pages.  If so, enter it in the cache.
327		 */
328		if (object->flags & OBJ_CANPERSIST) {
329			if (object->resident_page_count != 0) {
330				TAILQ_INSERT_TAIL(&vm_object_cached_list, object,
331				    cached_list);
332				vm_object_cached++;
333				vm_object_cache_unlock();
334
335				vm_object_unlock(object);
336
337				vm_object_cache_trim();
338				return;
339			} else {
340				object->flags &= ~OBJ_CANPERSIST;
341			}
342		}
343
344		/*
345		 * Make sure no one can look us up now.
346		 */
347		object->flags |= OBJ_DEAD;
348		if ((object->flags & OBJ_INTERNAL) == 0)
349			vm_object_remove(pager);
350		vm_object_cache_unlock();
351
352		temp = object->shadow;
353		if (temp)
354			TAILQ_REMOVE(&temp->reverse_shadow_head, object, reverse_shadow_list);
355		vm_object_terminate(object);
356		/* unlocks and deallocates object */
357		object = temp;
358	}
359}
360
361/*
362 *	vm_object_terminate actually destroys the specified object, freeing
363 *	up all previously used resources.
364 *
365 *	The object must be locked.
366 */
367void
368vm_object_terminate(object)
369	register vm_object_t object;
370{
371	register vm_page_t p, next;
372	vm_object_t shadow_object;
373	int s;
374
375	/*
376	 * Detach the object from its shadow if we are the shadow's copy.
377	 */
378	if ((shadow_object = object->shadow) != NULL) {
379		vm_object_lock(shadow_object);
380		if (shadow_object->copy == object)
381			shadow_object->copy = NULL;
382		vm_object_unlock(shadow_object);
383	}
384
385	/*
386	 * wait for the pageout daemon to be done with the object
387	 */
388	s = splhigh();
389	while (object->paging_in_progress) {
390		vm_object_unlock(object);
391		object->flags |= OBJ_PIPWNT;
392		tsleep((caddr_t) object, PVM, "objtrm", 0);
393		vm_object_lock(object);
394	}
395	splx(s);
396
397	if (object->paging_in_progress != 0)
398		panic("vm_object_deallocate: pageout in progress");
399
400	/*
401	 * Clean and free the pages, as appropriate. All references to the
402	 * object are gone, so we don't need to lock it.
403	 */
404	if (object->pager && (object->pager->pg_type == PG_VNODE)) {
405		vn_pager_t vnp = object->pager->pg_data;
406		struct vnode *vp;
407
408		vp = vnp->vnp_vp;
409		VOP_LOCK(vp);
410		(void) _vm_object_page_clean(object, 0, 0, TRUE);
411		vinvalbuf(vp, V_SAVE, NOCRED, NULL, 0, 0);
412		VOP_UNLOCK(vp);
413	}
414
415	/*
416	 * Now free the pages. For internal objects, this also removes them
417	 * from paging queues.
418	 */
419	while ((p = object->memq.tqh_first) != NULL) {
420		VM_PAGE_CHECK(p);
421		vm_page_lock_queues();
422		if (p->flags & PG_BUSY)
423			printf("vm_object_terminate: freeing busy page\n");
424		PAGE_WAKEUP(p);
425		vm_page_free(p);
426		cnt.v_pfree++;
427		vm_page_unlock_queues();
428	}
429	vm_object_unlock(object);
430
431	/*
432	 * Let the pager know object is dead.
433	 */
434	if (object->pager != NULL)
435		vm_pager_deallocate(object->pager);
436
437	simple_lock(&vm_object_list_lock);
438	TAILQ_REMOVE(&vm_object_list, object, object_list);
439	vm_object_count--;
440	simple_unlock(&vm_object_list_lock);
441
442	wakeup(object);
443
444	/*
445	 * Free the space for the object.
446	 */
447	free((caddr_t) object, M_VMOBJ);
448}
449
450/*
451 *	vm_object_page_clean
452 *
453 *	Clean all dirty pages in the specified range of object.
454 *	Leaves page on whatever queue it is currently on.
455 *
456 *	Odd semantics: if start == end, we clean everything.
457 *
458 *	The object must be locked.
459 */
460
461void
462_vm_object_page_clean(object, start, end, syncio)
463	vm_object_t object;
464	vm_offset_t start;
465	vm_offset_t end;
466	boolean_t syncio;
467{
468	register vm_page_t p;
469	register vm_offset_t tstart, tend;
470	int pass;
471	int pgcount, s;
472	int allclean;
473	int entireobj;
474
475	if (object->pager == NULL || (object->flags & OBJ_WRITEABLE) == 0)
476		return;
477
478	if (start != end) {
479		start = trunc_page(start);
480		end = round_page(end);
481	}
482
483	pass = 0;
484startover:
485	tstart = start;
486	if (end == 0) {
487		tend = object->size;
488	} else {
489		tend = end;
490	}
491	entireobj = 0;
492	if (tstart == 0 && tend == object->size) {
493		object->flags &= ~OBJ_WRITEABLE;
494		entireobj = 1;
495	}
496	/*
497	 * Wait until potential collapse operation is complete
498	 */
499	if (object->flags & OBJ_INTERNAL) {
500		s = splhigh();
501		while (object->paging_in_progress) {
502			object->flags |= OBJ_PIPWNT;
503			tsleep(object, PVM, "objpcw", 0);
504		}
505		splx(s);
506	}
507
508	pgcount = object->resident_page_count;
509
510	if (pass == 0 &&
511	    (pgcount < 128 || pgcount > (object->size / (8 * PAGE_SIZE)))) {
512		allclean = 1;
513		for(; pgcount && (tstart < tend); tstart += PAGE_SIZE) {
514			p = vm_page_lookup(object, tstart);
515			if (!p)
516				continue;
517			--pgcount;
518			s = splhigh();
519			TAILQ_REMOVE(&object->memq, p, listq);
520			TAILQ_INSERT_TAIL(&object->memq, p, listq);
521			splx(s);
522			if (entireobj)
523				vm_page_protect(p, VM_PROT_READ);
524			if ((p->flags & (PG_BUSY|PG_CACHE)) || p->busy ||
525				p->valid == 0) {
526				continue;
527			}
528			vm_page_test_dirty(p);
529			if ((p->valid & p->dirty) != 0) {
530				vm_offset_t tincr;
531				tincr = vm_pageout_clean(p, VM_PAGEOUT_FORCE);
532				pgcount -= (tincr - 1);
533				tincr *= PAGE_SIZE;
534				tstart += tincr - PAGE_SIZE;
535				allclean = 0;
536			}
537		}
538		if (!allclean) {
539			pass = 1;
540			goto startover;
541		}
542		return;
543	}
544
545	allclean = 1;
546	while ((p = object->memq.tqh_first) != NULL && pgcount > 0) {
547
548		if (p->flags & PG_CACHE) {
549			goto donext;
550		}
551
552		if (entireobj || (p->offset >= tstart && p->offset < tend)) {
553			if (entireobj)
554				vm_page_protect(p, VM_PROT_READ);
555
556			if (p->valid == 0) {
557				goto donext;
558			}
559
560			s = splhigh();
561			if ((p->flags & PG_BUSY) || p->busy) {
562				allclean = 0;
563				if (pass > 0) {
564					p->flags |= PG_WANTED;
565					tsleep(p, PVM, "objpcn", 0);
566					splx(s);
567					continue;
568				} else {
569					splx(s);
570					goto donext;
571				}
572			}
573
574			TAILQ_REMOVE(&object->memq, p, listq);
575			TAILQ_INSERT_TAIL(&object->memq, p, listq);
576			splx(s);
577
578			pgcount--;
579			vm_page_test_dirty(p);
580			if ((p->valid & p->dirty) != 0) {
581				vm_pageout_clean(p, VM_PAGEOUT_FORCE);
582				allclean = 0;
583			}
584			continue;
585		}
586	donext:
587		TAILQ_REMOVE(&object->memq, p, listq);
588		TAILQ_INSERT_TAIL(&object->memq, p, listq);
589		pgcount--;
590	}
591	if ((!allclean && (pass == 0)) ||
592	    (entireobj && (object->flags & OBJ_WRITEABLE))) {
593		pass = 1;
594		if (entireobj)
595			object->flags &= ~OBJ_WRITEABLE;
596		goto startover;
597	}
598	return;
599}
600
601
602void
603vm_object_page_clean(object, start, end, syncio)
604	register vm_object_t object;
605	register vm_offset_t start;
606	register vm_offset_t end;
607	boolean_t syncio;
608{
609	if (object->pager && (object->flags & OBJ_WRITEABLE) &&
610		(object->pager->pg_type == PG_VNODE)) {
611		vn_pager_t vnp = (vn_pager_t) object->pager->pg_data;
612		struct vnode *vp;
613
614		vp = vnp->vnp_vp;
615		vget(vp, 1);
616		_vm_object_page_clean(object, start, end, syncio);
617		vput(vp);
618	} else {
619		_vm_object_page_clean(object, start, end, syncio);
620	}
621}
622
623void
624vm_object_cache_clean()
625{
626	vm_object_t object;
627	vm_object_cache_lock();
628	while(1) {
629		object = vm_object_cached_list.tqh_first;
630		while( object) {
631			if( (object->flags & OBJ_WRITEABLE) &&
632				object->pager &&
633				object->pager->pg_type == PG_VNODE) {
634				vm_object_page_clean(object, 0, 0, 0);
635				goto loop;
636			}
637			object = object->cached_list.tqe_next;
638		}
639		return;
640loop:
641	}
642}
643
644/*
645 *	vm_object_deactivate_pages
646 *
647 *	Deactivate all pages in the specified object.  (Keep its pages
648 *	in memory even though it is no longer referenced.)
649 *
650 *	The object must be locked.
651 */
652void
653vm_object_deactivate_pages(object)
654	register vm_object_t object;
655{
656	register vm_page_t p, next;
657
658	for (p = object->memq.tqh_first; p != NULL; p = next) {
659		next = p->listq.tqe_next;
660		vm_page_lock_queues();
661		vm_page_deactivate(p);
662		vm_page_unlock_queues();
663	}
664}
665
666/*
667 *	Trim the object cache to size.
668 */
669void
670vm_object_cache_trim()
671{
672	register vm_object_t object;
673
674	vm_object_cache_lock();
675	while (vm_object_cached > vm_object_cache_max) {
676		object = vm_object_cached_list.tqh_first;
677		vm_object_cache_unlock();
678
679		if (object != vm_object_lookup(object->pager))
680			panic("vm_object_cache_trim: I'm sooo confused.");
681
682		pager_cache(object, FALSE);
683
684		vm_object_cache_lock();
685	}
686	vm_object_cache_unlock();
687}
688
689
690/*
691 *	vm_object_pmap_copy:
692 *
693 *	Makes all physical pages in the specified
694 *	object range copy-on-write.  No writeable
695 *	references to these pages should remain.
696 *
697 *	The object must *not* be locked.
698 */
699void
700vm_object_pmap_copy(object, start, end)
701	register vm_object_t object;
702	register vm_offset_t start;
703	register vm_offset_t end;
704{
705	register vm_page_t p;
706
707	if (object == NULL)
708		return;
709
710	vm_object_lock(object);
711	for (p = object->memq.tqh_first; p != NULL; p = p->listq.tqe_next) {
712		if ((start <= p->offset) && (p->offset < end)) {
713			vm_page_protect(p, VM_PROT_READ);
714			p->flags |= PG_COPYONWRITE;
715		}
716	}
717	vm_object_unlock(object);
718}
719
720/*
721 *	vm_object_pmap_remove:
722 *
723 *	Removes all physical pages in the specified
724 *	object range from all physical maps.
725 *
726 *	The object must *not* be locked.
727 */
728void
729vm_object_pmap_remove(object, start, end)
730	register vm_object_t object;
731	register vm_offset_t start;
732	register vm_offset_t end;
733{
734	register vm_page_t p;
735	int s;
736
737	if (object == NULL)
738		return;
739	++object->paging_in_progress;
740
741	vm_object_lock(object);
742again:
743	for (p = object->memq.tqh_first; p != NULL; p = p->listq.tqe_next) {
744		if ((start <= p->offset) && (p->offset < end)) {
745			s = splhigh();
746			if ((p->flags & PG_BUSY) || p->busy) {
747				p->flags |= PG_WANTED;
748				tsleep((caddr_t) p, PVM, "vmopmr", 0);
749				splx(s);
750				goto again;
751			}
752			splx(s);
753			vm_page_protect(p, VM_PROT_NONE);
754		}
755	}
756	vm_object_unlock(object);
757	vm_object_pip_wakeup(object);
758}
759
760/*
761 *	vm_object_copy:
762 *
763 *	Create a new object which is a copy of an existing
764 *	object, and mark all of the pages in the existing
765 *	object 'copy-on-write'.  The new object has one reference.
766 *	Returns the new object.
767 *
768 *	May defer the copy until later if the object is not backed
769 *	up by a non-default pager.
770 */
771void
772vm_object_copy(src_object, src_offset, size,
773    dst_object, dst_offset, src_needs_copy)
774	register vm_object_t src_object;
775	vm_offset_t src_offset;
776	vm_size_t size;
777	vm_object_t *dst_object;/* OUT */
778	vm_offset_t *dst_offset;/* OUT */
779	boolean_t *src_needs_copy;	/* OUT */
780{
781	register vm_object_t new_copy;
782	register vm_object_t old_copy;
783	vm_offset_t new_start, new_end;
784
785	register vm_page_t p;
786
787	if (src_object == NULL) {
788		/*
789		 * Nothing to copy
790		 */
791		*dst_object = NULL;
792		*dst_offset = 0;
793		*src_needs_copy = FALSE;
794		return;
795	}
796	/*
797	 * If the object's pager is null_pager or the default pager, we don't
798	 * have to make a copy of it.  Instead, we set the needs copy flag and
799	 * make a shadow later.
800	 */
801
802	vm_object_lock(src_object);
803
804	/*
805	 * Try to collapse the object before copying it.
806	 */
807
808	vm_object_collapse(src_object);
809
810	if (src_object->pager == NULL ||
811	    (src_object->flags & OBJ_INTERNAL)) {
812
813		/*
814		 * Make another reference to the object
815		 */
816		src_object->ref_count++;
817
818		/*
819		 * Mark all of the pages copy-on-write.
820		 */
821		for (p = src_object->memq.tqh_first; p; p = p->listq.tqe_next)
822			if (src_offset <= p->offset &&
823			    p->offset < src_offset + size)
824				p->flags |= PG_COPYONWRITE;
825		vm_object_unlock(src_object);
826
827		*dst_object = src_object;
828		*dst_offset = src_offset;
829
830		/*
831		 * Must make a shadow when write is desired
832		 */
833		*src_needs_copy = TRUE;
834		return;
835	}
836	/*
837	 * If the object has a pager, the pager wants to see all of the
838	 * changes.  We need a copy-object for the changed pages.
839	 *
840	 * If there is a copy-object, and it is empty, no changes have been made
841	 * to the object since the copy-object was made.  We can use the same
842	 * copy- object.
843	 */
844
845Retry1:
846	old_copy = src_object->copy;
847	if (old_copy != NULL) {
848		/*
849		 * Try to get the locks (out of order)
850		 */
851		if (!vm_object_lock_try(old_copy)) {
852			vm_object_unlock(src_object);
853
854			/* should spin a bit here... */
855			tsleep((caddr_t) old_copy, PVM, "cpylck", 1);
856			vm_object_lock(src_object);
857			goto Retry1;
858		}
859		if (old_copy->resident_page_count == 0 &&
860		    old_copy->pager == NULL) {
861			/*
862			 * Return another reference to the existing
863			 * copy-object.
864			 */
865			old_copy->ref_count++;
866			vm_object_unlock(old_copy);
867			vm_object_unlock(src_object);
868			*dst_object = old_copy;
869			*dst_offset = src_offset;
870			*src_needs_copy = FALSE;
871			return;
872		}
873		vm_object_unlock(old_copy);
874	}
875	vm_object_unlock(src_object);
876
877	/*
878	 * If the object has a pager, the pager wants to see all of the
879	 * changes.  We must make a copy-object and put the changed pages
880	 * there.
881	 *
882	 * The copy-object is always made large enough to completely shadow the
883	 * original object, since it may have several users who want to shadow
884	 * the original object at different points.
885	 */
886
887	new_copy = vm_object_allocate(src_object->size);
888
889Retry2:
890	vm_object_lock(src_object);
891	/*
892	 * Copy object may have changed while we were unlocked
893	 */
894	old_copy = src_object->copy;
895	if (old_copy != NULL) {
896		/*
897		 * Try to get the locks (out of order)
898		 */
899		if (!vm_object_lock_try(old_copy)) {
900			vm_object_unlock(src_object);
901			tsleep((caddr_t) old_copy, PVM, "cpylck", 1);
902			goto Retry2;
903		}
904		/*
905		 * Consistency check
906		 */
907		if (old_copy->shadow != src_object ||
908		    old_copy->shadow_offset != (vm_offset_t) 0)
909			panic("vm_object_copy: copy/shadow inconsistency");
910
911		/*
912		 * Make the old copy-object shadow the new one. It will
913		 * receive no more pages from the original object.
914		 */
915
916		src_object->ref_count--;	/* remove ref. from old_copy */
917		if (old_copy->shadow)
918			TAILQ_REMOVE(&old_copy->shadow->reverse_shadow_head, old_copy, reverse_shadow_list);
919		old_copy->shadow = new_copy;
920		TAILQ_INSERT_TAIL(&old_copy->shadow->reverse_shadow_head, old_copy, reverse_shadow_list);
921		new_copy->ref_count++;	/* locking not needed - we have the
922					 * only pointer */
923		vm_object_unlock(old_copy);	/* done with old_copy */
924	}
925	new_start = (vm_offset_t) 0;	/* always shadow original at 0 */
926	new_end = (vm_offset_t) new_copy->size;	/* for the whole object */
927
928	/*
929	 * Point the new copy at the existing object.
930	 */
931
932	new_copy->shadow = src_object;
933	TAILQ_INSERT_TAIL(&new_copy->shadow->reverse_shadow_head, new_copy, reverse_shadow_list);
934	new_copy->shadow_offset = new_start;
935	src_object->ref_count++;
936	src_object->copy = new_copy;
937
938	/*
939	 * Mark all the affected pages of the existing object copy-on-write.
940	 */
941	for (p = src_object->memq.tqh_first; p != NULL; p = p->listq.tqe_next)
942		if ((new_start <= p->offset) && (p->offset < new_end))
943			p->flags |= PG_COPYONWRITE;
944
945	vm_object_unlock(src_object);
946
947	*dst_object = new_copy;
948	*dst_offset = src_offset - new_start;
949	*src_needs_copy = FALSE;
950}
951
952/*
953 *	vm_object_shadow:
954 *
955 *	Create a new object which is backed by the
956 *	specified existing object range.  The source
957 *	object reference is deallocated.
958 *
959 *	The new object and offset into that object
960 *	are returned in the source parameters.
961 */
962
963void
964vm_object_shadow(object, offset, length)
965	vm_object_t *object;	/* IN/OUT */
966	vm_offset_t *offset;	/* IN/OUT */
967	vm_size_t length;
968{
969	register vm_object_t source;
970	register vm_object_t result;
971
972	source = *object;
973
974	/*
975	 * Allocate a new object with the given length
976	 */
977
978	if ((result = vm_object_allocate(length)) == NULL)
979		panic("vm_object_shadow: no object for shadowing");
980
981	/*
982	 * The new object shadows the source object, adding a reference to it.
983	 * Our caller changes his reference to point to the new object,
984	 * removing a reference to the source object.  Net result: no change
985	 * of reference count.
986	 */
987	result->shadow = source;
988	if (source)
989		TAILQ_INSERT_TAIL(&result->shadow->reverse_shadow_head, result, reverse_shadow_list);
990
991	/*
992	 * Store the offset into the source object, and fix up the offset into
993	 * the new object.
994	 */
995
996	result->shadow_offset = *offset;
997
998	/*
999	 * Return the new things
1000	 */
1001
1002	*offset = 0;
1003	*object = result;
1004}
1005
1006/*
1007 *	vm_object_hash hashes the pager/id pair.
1008 */
1009
1010#define vm_object_hash(pager) \
1011	(((unsigned)pager >> 5)%VM_OBJECT_HASH_COUNT)
1012
1013/*
1014 *	vm_object_lookup looks in the object cache for an object with the
1015 *	specified pager and paging id.
1016 */
1017
1018vm_object_t
1019vm_object_lookup(pager)
1020	vm_pager_t pager;
1021{
1022	register vm_object_hash_entry_t entry;
1023	vm_object_t object;
1024
1025	vm_object_cache_lock();
1026
1027	for (entry = vm_object_hashtable[vm_object_hash(pager)].tqh_first;
1028	    entry != NULL;
1029	    entry = entry->hash_links.tqe_next) {
1030		object = entry->object;
1031		if (object->pager == pager) {
1032			vm_object_lock(object);
1033			if (object->ref_count == 0) {
1034				TAILQ_REMOVE(&vm_object_cached_list, object,
1035				    cached_list);
1036				vm_object_cached--;
1037			}
1038			object->ref_count++;
1039			vm_object_unlock(object);
1040			vm_object_cache_unlock();
1041			return (object);
1042		}
1043	}
1044
1045	vm_object_cache_unlock();
1046	return (NULL);
1047}
1048
1049/*
1050 *	vm_object_enter enters the specified object/pager/id into
1051 *	the hash table.
1052 */
1053
1054void
1055vm_object_enter(object, pager)
1056	vm_object_t object;
1057	vm_pager_t pager;
1058{
1059	struct vm_object_hash_head *bucket;
1060	register vm_object_hash_entry_t entry;
1061
1062	/*
1063	 * We don't cache null objects, and we can't cache objects with the
1064	 * null pager.
1065	 */
1066
1067	if (object == NULL)
1068		return;
1069	if (pager == NULL)
1070		return;
1071
1072	bucket = &vm_object_hashtable[vm_object_hash(pager)];
1073	entry = (vm_object_hash_entry_t)
1074	    malloc((u_long) sizeof *entry, M_VMOBJHASH, M_WAITOK);
1075	entry->object = object;
1076
1077	vm_object_cache_lock();
1078	TAILQ_INSERT_TAIL(bucket, entry, hash_links);
1079	vm_object_cache_unlock();
1080}
1081
1082/*
1083 *	vm_object_remove:
1084 *
1085 *	Remove the pager from the hash table.
1086 *	Note:  This assumes that the object cache
1087 *	is locked.  XXX this should be fixed
1088 *	by reorganizing vm_object_deallocate.
1089 */
1090void
1091vm_object_remove(pager)
1092	register vm_pager_t pager;
1093{
1094	struct vm_object_hash_head *bucket;
1095	register vm_object_hash_entry_t entry;
1096	register vm_object_t object;
1097
1098	bucket = &vm_object_hashtable[vm_object_hash(pager)];
1099
1100	for (entry = bucket->tqh_first;
1101	    entry != NULL;
1102	    entry = entry->hash_links.tqe_next) {
1103		object = entry->object;
1104		if (object->pager == pager) {
1105			TAILQ_REMOVE(bucket, entry, hash_links);
1106			free((caddr_t) entry, M_VMOBJHASH);
1107			break;
1108		}
1109	}
1110}
1111
1112/*
1113 * this version of collapse allows the operation to occur earlier and
1114 * when paging_in_progress is true for an object...  This is not a complete
1115 * operation, but should plug 99.9% of the rest of the leaks.
1116 */
1117static void
1118vm_object_qcollapse(object)
1119	register vm_object_t object;
1120{
1121	register vm_object_t backing_object;
1122	register vm_offset_t backing_offset, new_offset;
1123	register vm_page_t p, pp;
1124	register vm_size_t size;
1125
1126	backing_object = object->shadow;
1127	if (backing_object->shadow != NULL &&
1128	    backing_object->shadow->copy == backing_object)
1129		return;
1130	if (backing_object->ref_count != 1)
1131		return;
1132
1133	backing_object->ref_count += 2;
1134
1135	backing_offset = object->shadow_offset;
1136	size = object->size;
1137	p = backing_object->memq.tqh_first;
1138	while (p) {
1139		vm_page_t next;
1140
1141		next = p->listq.tqe_next;
1142		if ((p->flags & (PG_BUSY | PG_FICTITIOUS | PG_CACHE)) ||
1143		    !p->valid || p->hold_count || p->wire_count || p->busy) {
1144			p = next;
1145			continue;
1146		}
1147		vm_page_protect(p, VM_PROT_NONE);
1148		new_offset = (p->offset - backing_offset);
1149		if (p->offset < backing_offset ||
1150		    new_offset >= size) {
1151			if (backing_object->pager)
1152				swap_pager_freespace(backing_object->pager,
1153				    backing_object->paging_offset + p->offset, PAGE_SIZE);
1154			vm_page_lock_queues();
1155			vm_page_free(p);
1156			vm_page_unlock_queues();
1157		} else {
1158			pp = vm_page_lookup(object, new_offset);
1159			if (pp != NULL || (object->pager && vm_pager_has_page(object->pager,
1160				    object->paging_offset + new_offset))) {
1161				if (backing_object->pager)
1162					swap_pager_freespace(backing_object->pager,
1163					    backing_object->paging_offset + p->offset, PAGE_SIZE);
1164				vm_page_lock_queues();
1165				vm_page_free(p);
1166				vm_page_unlock_queues();
1167			} else {
1168				if( backing_object->pager)
1169					swap_pager_freespace(backing_object->pager,
1170					    backing_object->paging_offset + p->offset, PAGE_SIZE);
1171				vm_page_rename(p, object, new_offset);
1172				p->dirty = VM_PAGE_BITS_ALL;
1173			}
1174		}
1175		p = next;
1176	}
1177	backing_object->ref_count -= 2;
1178}
1179
1180boolean_t vm_object_collapse_allowed = TRUE;
1181
1182/*
1183 *	vm_object_collapse:
1184 *
1185 *	Collapse an object with the object backing it.
1186 *	Pages in the backing object are moved into the
1187 *	parent, and the backing object is deallocated.
1188 *
1189 *	Requires that the object be locked and the page
1190 *	queues be unlocked.
1191 *
1192 *	This routine has significant changes by John S. Dyson
1193 *	to fix some swap memory leaks.  18 Dec 93
1194 *
1195 */
1196void
1197vm_object_collapse(object)
1198	register vm_object_t object;
1199
1200{
1201	register vm_object_t backing_object;
1202	register vm_offset_t backing_offset;
1203	register vm_size_t size;
1204	register vm_offset_t new_offset;
1205	register vm_page_t p, pp;
1206
1207	if (!vm_object_collapse_allowed)
1208		return;
1209
1210	while (TRUE) {
1211		/*
1212		 * Verify that the conditions are right for collapse:
1213		 *
1214		 * The object exists and no pages in it are currently being paged
1215		 * out.
1216		 */
1217		if (object == NULL)
1218			return;
1219
1220		/*
1221		 * Make sure there is a backing object.
1222		 */
1223		if ((backing_object = object->shadow) == NULL)
1224			return;
1225
1226		/*
1227		 * we check the backing object first, because it is most likely
1228		 * !OBJ_INTERNAL.
1229		 */
1230		if ((backing_object->flags & OBJ_INTERNAL) == 0 ||
1231		    (backing_object->flags & OBJ_DEAD) ||
1232		    (object->flags & OBJ_INTERNAL) == 0 ||
1233		    (object->flags & OBJ_DEAD))
1234			return;
1235
1236		if (object->paging_in_progress != 0 ||
1237		    backing_object->paging_in_progress != 0) {
1238			if (vm_object_lock_try(backing_object)) {
1239				vm_object_qcollapse(object);
1240				vm_object_unlock(backing_object);
1241			}
1242			return;
1243		}
1244
1245		vm_object_lock(backing_object);
1246
1247		/*
1248		 * The backing object can't be a copy-object: the
1249		 * shadow_offset for the copy-object must stay as 0.
1250		 * Furthermore (for the 'we have all the pages' case), if we
1251		 * bypass backing_object and just shadow the next object in
1252		 * the chain, old pages from that object would then have to be
1253		 * copied BOTH into the (former) backing_object and into the
1254		 * parent object.
1255		 */
1256		if (backing_object->shadow != NULL &&
1257		    backing_object->shadow->copy == backing_object) {
1258			vm_object_unlock(backing_object);
1259			return;
1260		}
1261
1262		/*
1263		 * We know that we can either collapse the backing object (if
1264		 * the parent is the only reference to it) or (perhaps) remove
1265		 * the parent's reference to it.
1266		 */
1267
1268		backing_offset = object->shadow_offset;
1269		size = object->size;
1270
1271		/*
1272		 * If there is exactly one reference to the backing object, we
1273		 * can collapse it into the parent.
1274		 */
1275
1276		if (backing_object->ref_count == 1) {
1277
1278			backing_object->flags |= OBJ_DEAD;
1279			/*
1280			 * We can collapse the backing object.
1281			 *
1282			 * Move all in-memory pages from backing_object to the
1283			 * parent.  Pages that have been paged out will be
1284			 * overwritten by any of the parent's pages that
1285			 * shadow them.
1286			 */
1287
1288			while ((p = backing_object->memq.tqh_first) != 0) {
1289
1290				new_offset = (p->offset - backing_offset);
1291
1292				/*
1293				 * If the parent has a page here, or if this
1294				 * page falls outside the parent, dispose of
1295				 * it.
1296				 *
1297				 * Otherwise, move it as planned.
1298				 */
1299
1300				if (p->offset < backing_offset ||
1301				    new_offset >= size) {
1302					vm_page_lock_queues();
1303					vm_page_protect(p, VM_PROT_NONE);
1304					PAGE_WAKEUP(p);
1305					vm_page_free(p);
1306					vm_page_unlock_queues();
1307				} else {
1308					pp = vm_page_lookup(object, new_offset);
1309					if (pp != NULL || (object->pager && vm_pager_has_page(object->pager,
1310					    object->paging_offset + new_offset))) {
1311						vm_page_lock_queues();
1312						vm_page_protect(p, VM_PROT_NONE);
1313						PAGE_WAKEUP(p);
1314						vm_page_free(p);
1315						vm_page_unlock_queues();
1316					} else {
1317						vm_page_rename(p, object, new_offset);
1318					}
1319				}
1320			}
1321
1322			/*
1323			 * Move the pager from backing_object to object.
1324			 */
1325
1326			if (backing_object->pager) {
1327				backing_object->paging_in_progress++;
1328				if (object->pager) {
1329					vm_pager_t bopager;
1330
1331					object->paging_in_progress++;
1332					/*
1333					 * copy shadow object pages into ours
1334					 * and destroy unneeded pages in
1335					 * shadow object.
1336					 */
1337					bopager = backing_object->pager;
1338					backing_object->pager = NULL;
1339					swap_pager_copy(
1340					    bopager, backing_object->paging_offset,
1341					    object->pager, object->paging_offset,
1342					    object->shadow_offset);
1343					vm_object_pip_wakeup(object);
1344				} else {
1345					object->paging_in_progress++;
1346					/*
1347					 * grab the shadow objects pager
1348					 */
1349					object->pager = backing_object->pager;
1350					object->paging_offset = backing_object->paging_offset + backing_offset;
1351					backing_object->pager = NULL;
1352					/*
1353					 * free unnecessary blocks
1354					 */
1355					swap_pager_freespace(object->pager, 0, object->paging_offset);
1356					vm_object_pip_wakeup(object);
1357				}
1358
1359				vm_object_pip_wakeup(backing_object);
1360			}
1361			/*
1362			 * Object now shadows whatever backing_object did.
1363			 * Note that the reference to backing_object->shadow
1364			 * moves from within backing_object to within object.
1365			 */
1366
1367			TAILQ_REMOVE(&object->shadow->reverse_shadow_head, object,
1368			    reverse_shadow_list);
1369			if (backing_object->shadow)
1370				TAILQ_REMOVE(&backing_object->shadow->reverse_shadow_head,
1371				    backing_object, reverse_shadow_list);
1372			object->shadow = backing_object->shadow;
1373			if (object->shadow)
1374				TAILQ_INSERT_TAIL(&object->shadow->reverse_shadow_head,
1375				    object, reverse_shadow_list);
1376
1377			object->shadow_offset += backing_object->shadow_offset;
1378			/*
1379			 * Discard backing_object.
1380			 *
1381			 * Since the backing object has no pages, no pager left,
1382			 * and no object references within it, all that is
1383			 * necessary is to dispose of it.
1384			 */
1385
1386			vm_object_unlock(backing_object);
1387
1388			simple_lock(&vm_object_list_lock);
1389			TAILQ_REMOVE(&vm_object_list, backing_object,
1390			    object_list);
1391			vm_object_count--;
1392			simple_unlock(&vm_object_list_lock);
1393
1394			free((caddr_t) backing_object, M_VMOBJ);
1395
1396			object_collapses++;
1397		} else {
1398			/*
1399			 * If all of the pages in the backing object are
1400			 * shadowed by the parent object, the parent object no
1401			 * longer has to shadow the backing object; it can
1402			 * shadow the next one in the chain.
1403			 *
1404			 * The backing object must not be paged out - we'd have
1405			 * to check all of the paged-out pages, as well.
1406			 */
1407
1408			if (backing_object->pager != NULL) {
1409				vm_object_unlock(backing_object);
1410				return;
1411			}
1412			/*
1413			 * Should have a check for a 'small' number of pages
1414			 * here.
1415			 */
1416
1417			for (p = backing_object->memq.tqh_first; p; p = p->listq.tqe_next) {
1418				new_offset = (p->offset - backing_offset);
1419
1420				/*
1421				 * If the parent has a page here, or if this
1422				 * page falls outside the parent, keep going.
1423				 *
1424				 * Otherwise, the backing_object must be left in
1425				 * the chain.
1426				 */
1427
1428				if (p->offset >= backing_offset &&
1429				    new_offset <= size &&
1430				    ((pp = vm_page_lookup(object, new_offset)) == NULL ||
1431					!pp->valid) &&
1432				    (!object->pager || !vm_pager_has_page(object->pager, object->paging_offset + new_offset))) {
1433					/*
1434					 * Page still needed. Can't go any
1435					 * further.
1436					 */
1437					vm_object_unlock(backing_object);
1438					return;
1439				}
1440			}
1441
1442			/*
1443			 * Make the parent shadow the next object in the
1444			 * chain.  Deallocating backing_object will not remove
1445			 * it, since its reference count is at least 2.
1446			 */
1447
1448			TAILQ_REMOVE(&object->shadow->reverse_shadow_head,
1449			    object, reverse_shadow_list);
1450			vm_object_reference(object->shadow = backing_object->shadow);
1451			if (object->shadow)
1452				TAILQ_INSERT_TAIL(&object->shadow->reverse_shadow_head,
1453				    object, reverse_shadow_list);
1454			object->shadow_offset += backing_object->shadow_offset;
1455
1456			/*
1457			 * Backing object might have had a copy pointer to us.
1458			 * If it did, clear it.
1459			 */
1460			if (backing_object->copy == object) {
1461				backing_object->copy = NULL;
1462			}
1463			/*
1464			 * Drop the reference count on backing_object. Since
1465			 * its ref_count was at least 2, it will not vanish;
1466			 * so we don't need to call vm_object_deallocate.
1467			 */
1468			if (backing_object->ref_count == 1)
1469				printf("should have called obj deallocate\n");
1470			backing_object->ref_count--;
1471			vm_object_unlock(backing_object);
1472
1473			object_bypasses++;
1474
1475		}
1476
1477		/*
1478		 * Try again with this object's new backing object.
1479		 */
1480	}
1481}
1482
1483/*
1484 *	vm_object_page_remove: [internal]
1485 *
1486 *	Removes all physical pages in the specified
1487 *	object range from the object's list of pages.
1488 *
1489 *	The object must be locked.
1490 */
1491void
1492vm_object_page_remove(object, start, end, clean_only)
1493	register vm_object_t object;
1494	register vm_offset_t start;
1495	register vm_offset_t end;
1496	boolean_t clean_only;
1497{
1498	register vm_page_t p, next;
1499	vm_offset_t size;
1500	int s;
1501
1502	if (object == NULL)
1503		return;
1504
1505	object->paging_in_progress++;
1506	start = trunc_page(start);
1507	end = round_page(end);
1508again:
1509	size = end - start;
1510	if (size > 4 * PAGE_SIZE || size >= object->size / 4) {
1511		for (p = object->memq.tqh_first; p != NULL; p = next) {
1512			next = p->listq.tqe_next;
1513			if ((start <= p->offset) && (p->offset < end)) {
1514				s = splhigh();
1515				if (p->bmapped) {
1516					splx(s);
1517					continue;
1518				}
1519				if ((p->flags & PG_BUSY) || p->busy) {
1520					p->flags |= PG_WANTED;
1521					tsleep((caddr_t) p, PVM, "vmopar", 0);
1522					splx(s);
1523					goto again;
1524				}
1525				splx(s);
1526				if (clean_only) {
1527					vm_page_test_dirty(p);
1528					if (p->valid & p->dirty)
1529						continue;
1530				}
1531				vm_page_protect(p, VM_PROT_NONE);
1532				vm_page_lock_queues();
1533				PAGE_WAKEUP(p);
1534				vm_page_free(p);
1535				vm_page_unlock_queues();
1536			}
1537		}
1538	} else {
1539		while (size > 0) {
1540			while ((p = vm_page_lookup(object, start)) != 0) {
1541				s = splhigh();
1542				if (p->bmapped) {
1543					splx(s);
1544					break;
1545				}
1546				if ((p->flags & PG_BUSY) || p->busy) {
1547					p->flags |= PG_WANTED;
1548					tsleep((caddr_t) p, PVM, "vmopar", 0);
1549					splx(s);
1550					goto again;
1551				}
1552				splx(s);
1553				if (clean_only) {
1554					vm_page_test_dirty(p);
1555					if (p->valid & p->dirty)
1556						continue;
1557				}
1558				vm_page_protect(p, VM_PROT_NONE);
1559				vm_page_lock_queues();
1560				PAGE_WAKEUP(p);
1561				vm_page_free(p);
1562				vm_page_unlock_queues();
1563			}
1564			start += PAGE_SIZE;
1565			size -= PAGE_SIZE;
1566		}
1567	}
1568	vm_object_pip_wakeup(object);
1569}
1570
1571/*
1572 *	Routine:	vm_object_coalesce
1573 *	Function:	Coalesces two objects backing up adjoining
1574 *			regions of memory into a single object.
1575 *
1576 *	returns TRUE if objects were combined.
1577 *
1578 *	NOTE:	Only works at the moment if the second object is NULL -
1579 *		if it's not, which object do we lock first?
1580 *
1581 *	Parameters:
1582 *		prev_object	First object to coalesce
1583 *		prev_offset	Offset into prev_object
1584 *		next_object	Second object into coalesce
1585 *		next_offset	Offset into next_object
1586 *
1587 *		prev_size	Size of reference to prev_object
1588 *		next_size	Size of reference to next_object
1589 *
1590 *	Conditions:
1591 *	The object must *not* be locked.
1592 */
1593boolean_t
1594vm_object_coalesce(prev_object, next_object,
1595    prev_offset, next_offset,
1596    prev_size, next_size)
1597	register vm_object_t prev_object;
1598	vm_object_t next_object;
1599	vm_offset_t prev_offset, next_offset;
1600	vm_size_t prev_size, next_size;
1601{
1602	vm_size_t newsize;
1603
1604	if (next_object != NULL) {
1605		return (FALSE);
1606	}
1607	if (prev_object == NULL) {
1608		return (TRUE);
1609	}
1610	vm_object_lock(prev_object);
1611
1612	/*
1613	 * Try to collapse the object first
1614	 */
1615	vm_object_collapse(prev_object);
1616
1617	/*
1618	 * Can't coalesce if: . more than one reference . paged out . shadows
1619	 * another object . has a copy elsewhere (any of which mean that the
1620	 * pages not mapped to prev_entry may be in use anyway)
1621	 */
1622
1623	if (prev_object->ref_count > 1 ||
1624	    prev_object->pager != NULL ||
1625	    prev_object->shadow != NULL ||
1626	    prev_object->copy != NULL) {
1627		vm_object_unlock(prev_object);
1628		return (FALSE);
1629	}
1630	/*
1631	 * Remove any pages that may still be in the object from a previous
1632	 * deallocation.
1633	 */
1634
1635	vm_object_page_remove(prev_object,
1636	    prev_offset + prev_size,
1637	    prev_offset + prev_size + next_size, FALSE);
1638
1639	/*
1640	 * Extend the object if necessary.
1641	 */
1642	newsize = prev_offset + prev_size + next_size;
1643	if (newsize > prev_object->size)
1644		prev_object->size = newsize;
1645
1646	vm_object_unlock(prev_object);
1647	return (TRUE);
1648}
1649
1650/*
1651 * returns page after looking up in shadow chain
1652 */
1653
1654vm_page_t
1655vm_object_page_lookup(object, offset)
1656	vm_object_t object;
1657	vm_offset_t offset;
1658{
1659	vm_page_t m;
1660
1661	if (!(m = vm_page_lookup(object, offset))) {
1662		if (!object->shadow)
1663			return 0;
1664		else
1665			return vm_object_page_lookup(object->shadow, offset + object->shadow_offset);
1666	}
1667	return m;
1668}
1669
1670int
1671_vm_object_in_map(map, object, entry)
1672	vm_map_t map;
1673	vm_object_t object;
1674	vm_map_entry_t entry;
1675{
1676	vm_map_t tmpm;
1677	vm_map_entry_t tmpe;
1678	vm_object_t obj;
1679	int entcount;
1680
1681	if (map == 0)
1682		return 0;
1683
1684	if (entry == 0) {
1685		tmpe = map->header.next;
1686		entcount = map->nentries;
1687		while (entcount-- && (tmpe != &map->header)) {
1688			if( _vm_object_in_map(map, object, tmpe)) {
1689				return 1;
1690			}
1691			tmpe = tmpe->next;
1692		}
1693	} else if (entry->is_sub_map || entry->is_a_map) {
1694		tmpm = entry->object.share_map;
1695		tmpe = tmpm->header.next;
1696		entcount = tmpm->nentries;
1697		while (entcount-- && tmpe != &tmpm->header) {
1698			if( _vm_object_in_map(tmpm, object, tmpe)) {
1699				return 1;
1700			}
1701			tmpe = tmpe->next;
1702		}
1703	} else if (obj = entry->object.vm_object) {
1704		for(; obj; obj=obj->shadow)
1705			if( obj == object) {
1706				return 1;
1707			}
1708	}
1709	return 0;
1710}
1711
1712int
1713vm_object_in_map( object)
1714	vm_object_t object;
1715{
1716	struct proc *p;
1717	for (p = (struct proc *) allproc; p != NULL; p = p->p_next) {
1718		if( !p->p_vmspace /* || (p->p_flag & (P_SYSTEM|P_WEXIT)) */)
1719			continue;
1720/*
1721		if (p->p_stat != SRUN && p->p_stat != SSLEEP) {
1722			continue;
1723		}
1724*/
1725		if( _vm_object_in_map(&p->p_vmspace->vm_map, object, 0))
1726			return 1;
1727	}
1728	if( _vm_object_in_map( kernel_map, object, 0))
1729		return 1;
1730	if( _vm_object_in_map( kmem_map, object, 0))
1731		return 1;
1732	if( _vm_object_in_map( pager_map, object, 0))
1733		return 1;
1734	if( _vm_object_in_map( buffer_map, object, 0))
1735		return 1;
1736	if( _vm_object_in_map( io_map, object, 0))
1737		return 1;
1738	if( _vm_object_in_map( phys_map, object, 0))
1739		return 1;
1740	if( _vm_object_in_map( mb_map, object, 0))
1741		return 1;
1742	if( _vm_object_in_map( u_map, object, 0))
1743		return 1;
1744	return 0;
1745}
1746
1747void
1748vm_object_check() {
1749	int i;
1750	int maxhash = 0;
1751	vm_object_t object;
1752	vm_object_hash_entry_t entry;
1753
1754	/*
1755	 * make sure that no internal objs are hashed
1756	 */
1757	for (i=0; i<VM_OBJECT_HASH_COUNT;i++) {
1758		int lsize = 0;
1759		for (entry = vm_object_hashtable[i].tqh_first;
1760		    entry != NULL;
1761		    entry = entry->hash_links.tqe_next) {
1762			if( entry->object->flags & OBJ_INTERNAL) {
1763				printf("vmochk: internal obj on hash: size: %d\n", entry->object->size);
1764			}
1765			++lsize;
1766		}
1767		if( lsize > maxhash)
1768			maxhash = lsize;
1769	}
1770
1771	printf("maximum object hash queue size: %d\n",  maxhash);
1772
1773	/*
1774	 * make sure that internal objs are in a map somewhere
1775	 * and none have zero ref counts.
1776	 */
1777	for (object = vm_object_list.tqh_first;
1778			object != NULL;
1779			object = object->object_list.tqe_next) {
1780		if( object->flags & OBJ_INTERNAL) {
1781			if( object->ref_count == 0) {
1782				printf("vmochk: internal obj has zero ref count: %d\n",
1783					object->size);
1784			}
1785			if( !vm_object_in_map(object)) {
1786				printf("vmochk: internal obj is not in a map: ref: %d, size: %d, pager: 0x%x, shadow: 0x%x\n", object->ref_count, object->size, object->pager, object->shadow);
1787			}
1788		}
1789	}
1790}
1791
1792#define DEBUG
1793#if defined(DEBUG) || defined(DDB)
1794/*
1795 *	vm_object_print:	[ debug ]
1796 */
1797void
1798vm_object_print(object, full)
1799	vm_object_t object;
1800	boolean_t full;
1801{
1802	register vm_page_t p;
1803
1804	register int count;
1805
1806	if (object == NULL)
1807		return;
1808
1809	iprintf("Object 0x%x: size=0x%x, res=%d, ref=%d, ",
1810	    (int) object, (int) object->size,
1811	    object->resident_page_count, object->ref_count);
1812	printf("pager=0x%x+0x%x, shadow=(0x%x)+0x%x\n",
1813	    (int) object->pager, (int) object->paging_offset,
1814	    (int) object->shadow, (int) object->shadow_offset);
1815	printf("cache: next=%p, prev=%p\n",
1816	    object->cached_list.tqe_next, object->cached_list.tqe_prev);
1817
1818	if (!full)
1819		return;
1820
1821	indent += 2;
1822	count = 0;
1823	for (p = object->memq.tqh_first; p != NULL; p = p->listq.tqe_next) {
1824		if (count == 0)
1825			iprintf("memory:=");
1826		else if (count == 6) {
1827			printf("\n");
1828			iprintf(" ...");
1829			count = 0;
1830		} else
1831			printf(",");
1832		count++;
1833
1834		printf("(off=0x%lx,page=0x%lx)",
1835		    (u_long) p->offset, (u_long) VM_PAGE_TO_PHYS(p));
1836	}
1837	if (count != 0)
1838		printf("\n");
1839	indent -= 2;
1840}
1841#endif				/* defined(DEBUG) || defined(DDB) */
1842