vm_object.c revision 6129
1/*
2 * Copyright (c) 1991, 1993
3 *	The Regents of the University of California.  All rights reserved.
4 *
5 * This code is derived from software contributed to Berkeley by
6 * The Mach Operating System project at Carnegie-Mellon University.
7 *
8 * Redistribution and use in source and binary forms, with or without
9 * modification, are permitted provided that the following conditions
10 * are met:
11 * 1. Redistributions of source code must retain the above copyright
12 *    notice, this list of conditions and the following disclaimer.
13 * 2. Redistributions in binary form must reproduce the above copyright
14 *    notice, this list of conditions and the following disclaimer in the
15 *    documentation and/or other materials provided with the distribution.
16 * 3. All advertising materials mentioning features or use of this software
17 *    must display the following acknowledgement:
18 *	This product includes software developed by the University of
19 *	California, Berkeley and its contributors.
20 * 4. Neither the name of the University nor the names of its contributors
21 *    may be used to endorse or promote products derived from this software
22 *    without specific prior written permission.
23 *
24 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
25 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
26 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
27 * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
28 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
29 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
30 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
31 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
32 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
33 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
34 * SUCH DAMAGE.
35 *
36 *	from: @(#)vm_object.c	8.5 (Berkeley) 3/22/94
37 *
38 *
39 * Copyright (c) 1987, 1990 Carnegie-Mellon University.
40 * All rights reserved.
41 *
42 * Authors: Avadis Tevanian, Jr., Michael Wayne Young
43 *
44 * Permission to use, copy, modify and distribute this software and
45 * its documentation is hereby granted, provided that both the copyright
46 * notice and this permission notice appear in all copies of the
47 * software, derivative works or modified versions, and any portions
48 * thereof, and that both notices appear in supporting documentation.
49 *
50 * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
51 * CONDITION.  CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND
52 * FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
53 *
54 * Carnegie Mellon requests users of this software to return to
55 *
56 *  Software Distribution Coordinator  or  Software.Distribution@CS.CMU.EDU
57 *  School of Computer Science
58 *  Carnegie Mellon University
59 *  Pittsburgh PA 15213-3890
60 *
61 * any improvements or extensions that they make and grant Carnegie the
62 * rights to redistribute these changes.
63 *
64 * $Id: vm_object.c,v 1.20 1995/01/25 20:36:29 davidg Exp $
65 */
66
67/*
68 *	Virtual memory object module.
69 */
70
71#include <sys/param.h>
72#include <sys/systm.h>
73#include <sys/kernel.h>
74#include <sys/proc.h>		/* for curproc, pageproc */
75#include <sys/malloc.h>
76#include <sys/vnode.h>
77#include <sys/mount.h>
78
79#include <vm/vm.h>
80#include <vm/vm_page.h>
81#include <vm/vm_pageout.h>
82#include <vm/vm_pager.h>
83#include <vm/swap_pager.h>
84#include <vm/vnode_pager.h>
85#include <vm/vm_kern.h>
86
87static void _vm_object_allocate(vm_size_t, vm_object_t);
88
89/*
90 *	Virtual memory objects maintain the actual data
91 *	associated with allocated virtual memory.  A given
92 *	page of memory exists within exactly one object.
93 *
94 *	An object is only deallocated when all "references"
95 *	are given up.  Only one "reference" to a given
96 *	region of an object should be writeable.
97 *
98 *	Associated with each object is a list of all resident
99 *	memory pages belonging to that object; this list is
100 *	maintained by the "vm_page" module, and locked by the object's
101 *	lock.
102 *
103 *	Each object also records a "pager" routine which is
104 *	used to retrieve (and store) pages to the proper backing
105 *	storage.  In addition, objects may be backed by other
106 *	objects from which they were virtual-copied.
107 *
108 *	The only items within the object structure which are
109 *	modified after time of creation are:
110 *		reference count		locked by object's lock
111 *		pager routine		locked by object's lock
112 *
113 */
114
115
116struct vm_object kernel_object_store;
117struct vm_object kmem_object_store;
118
119int vm_object_cache_max;
120
121#define	VM_OBJECT_HASH_COUNT	509
122
123struct vm_object_hash_head vm_object_hashtable[VM_OBJECT_HASH_COUNT];
124
125long object_collapses = 0;
126long object_bypasses = 0;
127
128static void
129_vm_object_allocate(size, object)
130	vm_size_t size;
131	register vm_object_t object;
132{
133	TAILQ_INIT(&object->memq);
134	TAILQ_INIT(&object->reverse_shadow_head);
135
136	object->size = size;
137	object->ref_count = 1;
138	vm_object_lock_init(object);
139	object->flags = OBJ_INTERNAL;	/* vm_allocate_with_pager will reset */
140	object->paging_in_progress = 0;
141	object->resident_page_count = 0;
142
143	object->pager = NULL;
144	object->paging_offset = 0;
145	object->shadow = NULL;
146	object->shadow_offset = (vm_offset_t) 0;
147	object->copy = NULL;
148
149	object->last_read = 0;
150
151	simple_lock(&vm_object_list_lock);
152	TAILQ_INSERT_TAIL(&vm_object_list, object, object_list);
153	vm_object_count++;
154	cnt.v_nzfod += atop(size);
155	simple_unlock(&vm_object_list_lock);
156}
157
158/*
159 *	vm_object_init:
160 *
161 *	Initialize the VM objects module.
162 */
163void
164vm_object_init(vm_offset_t nothing)
165{
166	register int i;
167
168	TAILQ_INIT(&vm_object_cached_list);
169	TAILQ_INIT(&vm_object_list);
170	vm_object_count = 0;
171	simple_lock_init(&vm_cache_lock);
172	simple_lock_init(&vm_object_list_lock);
173	vm_object_cache_max = (cnt.v_page_count - 500) / 8;
174
175	for (i = 0; i < VM_OBJECT_HASH_COUNT; i++)
176		TAILQ_INIT(&vm_object_hashtable[i]);
177
178	kernel_object = &kernel_object_store;
179	_vm_object_allocate(VM_MAX_KERNEL_ADDRESS - VM_MIN_KERNEL_ADDRESS,
180	    kernel_object);
181
182	kmem_object = &kmem_object_store;
183	_vm_object_allocate(VM_MAX_KERNEL_ADDRESS - VM_MIN_KERNEL_ADDRESS,
184	    kmem_object);
185}
186
187/*
188 *	vm_object_allocate:
189 *
190 *	Returns a new object with the given size.
191 */
192
193vm_object_t
194vm_object_allocate(size)
195	vm_size_t size;
196{
197	register vm_object_t result;
198
199	result = (vm_object_t)
200	    malloc((u_long) sizeof *result, M_VMOBJ, M_WAITOK);
201
202
203	_vm_object_allocate(size, result);
204
205	return (result);
206}
207
208
209/*
210 *	vm_object_reference:
211 *
212 *	Gets another reference to the given object.
213 */
214inline void
215vm_object_reference(object)
216	register vm_object_t object;
217{
218	if (object == NULL)
219		return;
220
221	vm_object_lock(object);
222	object->ref_count++;
223	vm_object_unlock(object);
224}
225
226/*
227 *	vm_object_deallocate:
228 *
229 *	Release a reference to the specified object,
230 *	gained either through a vm_object_allocate
231 *	or a vm_object_reference call.  When all references
232 *	are gone, storage associated with this object
233 *	may be relinquished.
234 *
235 *	No object may be locked.
236 */
237void
238vm_object_deallocate(object)
239	vm_object_t object;
240{
241	vm_object_t temp;
242
243	while (object != NULL) {
244
245		/*
246		 * The cache holds a reference (uncounted) to the object; we
247		 * must lock it before removing the object.
248		 */
249
250		vm_object_cache_lock();
251
252		/*
253		 * Lose the reference
254		 */
255		vm_object_lock(object);
256		if (--(object->ref_count) != 0) {
257			if (object->ref_count == 1) {
258				vm_object_t robject;
259				robject = object->reverse_shadow_head.tqh_first;
260				if( robject) {
261					int s;
262					robject->ref_count += 2;
263					object->ref_count += 1;
264
265					do {
266						s = splhigh();
267						while( robject->paging_in_progress) {
268							tsleep(robject, PVM, "objde1", 0);
269						}
270
271						while( object->paging_in_progress) {
272							tsleep(object, PVM, "objde2", 0);
273						}
274						splx(s);
275
276					} while( object->paging_in_progress || robject->paging_in_progress);
277
278					object->ref_count -= 1;
279					robject->ref_count -= 2;
280					if( robject->ref_count == 0) {
281						vm_object_unlock(object);
282						vm_object_cache_unlock();
283						robject->ref_count += 1;
284						vm_object_deallocate(robject);
285						return;
286					}
287					vm_object_cache_unlock();
288					vm_object_unlock(object);
289					vm_object_lock(robject);
290					vm_object_collapse(robject);
291					return;
292				}
293			}
294			vm_object_unlock(object);
295			/*
296			 * If there are still references, then we are done.
297			 */
298			vm_object_cache_unlock();
299			return;
300		}
301		/*
302		 * See if this object can persist.  If so, enter it in the
303		 * cache, then deactivate all of its pages.
304		 */
305
306		if (object->flags & OBJ_CANPERSIST) {
307
308			TAILQ_INSERT_TAIL(&vm_object_cached_list, object,
309			    cached_list);
310			vm_object_cached++;
311			vm_object_cache_unlock();
312
313			vm_object_unlock(object);
314
315			vm_object_cache_trim();
316			return;
317		}
318		/*
319		 * Make sure no one can look us up now.
320		 */
321		object->flags |= OBJ_DEAD;
322		vm_object_remove(object->pager);
323		vm_object_cache_unlock();
324
325		temp = object->shadow;
326		if (temp)
327			TAILQ_REMOVE(&temp->reverse_shadow_head, object, reverse_shadow_list);
328		vm_object_terminate(object);
329		/* unlocks and deallocates object */
330		object = temp;
331	}
332}
333
334/*
335 *	vm_object_terminate actually destroys the specified object, freeing
336 *	up all previously used resources.
337 *
338 *	The object must be locked.
339 */
340void
341vm_object_terminate(object)
342	register vm_object_t object;
343{
344	register vm_page_t p, next;
345	vm_object_t shadow_object;
346	int s;
347	struct vnode *vp = NULL;
348
349	/*
350	 * Detach the object from its shadow if we are the shadow's copy.
351	 */
352	if ((shadow_object = object->shadow) != NULL) {
353		vm_object_lock(shadow_object);
354		if (shadow_object->copy == object)
355			shadow_object->copy = NULL;
356/*
357		else if (shadow_object->copy != NULL)
358			panic("vm_object_terminate: copy/shadow inconsistency");
359*/
360		vm_object_unlock(shadow_object);
361	}
362	if (object->pager && (object->pager->pg_type == PG_VNODE)) {
363		vn_pager_t vnp = object->pager->pg_data;
364
365		vp = vnp->vnp_vp;
366		VOP_FSYNC(vp, NOCRED, MNT_WAIT, NULL);
367		vinvalbuf(vp, 0, NOCRED, NULL, 0, 0);
368	}
369	/*
370	 * Wait until the pageout daemon is through with the object.
371	 */
372
373	s = splhigh();
374	while (object->paging_in_progress) {
375		vm_object_unlock(object);
376		tsleep((caddr_t) object, PVM, "objtrm", 0);
377		vm_object_lock(object);
378	}
379	splx(s);
380
381	/*
382	 * While the paging system is locked, pull the object's pages off the
383	 * active and inactive queues.  This keeps the pageout daemon from
384	 * playing with them during vm_pager_deallocate.
385	 *
386	 * We can't free the pages yet, because the object's pager may have to
387	 * write them out before deallocating the paging space.
388	 */
389
390	for (p = object->memq.tqh_first; p; p = next) {
391		VM_PAGE_CHECK(p);
392		next = p->listq.tqe_next;
393
394		vm_page_lock_queues();
395		if (p->flags & PG_CACHE)
396			vm_page_free(p);
397		else
398			vm_page_unqueue(p);
399		vm_page_unlock_queues();
400		p = next;
401	}
402
403	if (object->paging_in_progress != 0)
404		panic("vm_object_deallocate: pageout in progress");
405
406	/*
407	 * Clean and free the pages, as appropriate. All references to the
408	 * object are gone, so we don't need to lock it.
409	 */
410
411	if (((object->flags & OBJ_INTERNAL) == 0) &&
412	    object->pager && (object->pager->pg_type != PG_DEVICE)) {
413		(void) vm_object_page_clean(object, 0, 0, TRUE, TRUE);
414	}
415	/*
416	 * one last time -- get rid of buffers that might have been created
417	 * for the vm_object_page_clean
418	 */
419	if (vp != NULL) {
420		vm_object_unlock(object);
421		vinvalbuf(vp, 0, NOCRED, NULL, 0, 0);
422		vm_object_lock(object);
423	}
424	/*
425	 * Now free the pages. For internal objects, this also removes them
426	 * from paging queues.
427	 */
428	while ((p = object->memq.tqh_first) != NULL) {
429		VM_PAGE_CHECK(p);
430		vm_page_lock_queues();
431		PAGE_WAKEUP(p);
432		vm_page_free(p);
433		cnt.v_pfree++;
434		vm_page_unlock_queues();
435	}
436	vm_object_unlock(object);
437
438	/*
439	 * Let the pager know object is dead.
440	 */
441	if (object->pager != NULL)
442		vm_pager_deallocate(object->pager);
443
444	simple_lock(&vm_object_list_lock);
445	TAILQ_REMOVE(&vm_object_list, object, object_list);
446	vm_object_count--;
447	simple_unlock(&vm_object_list_lock);
448
449	/*
450	 * Free the space for the object.
451	 */
452	free((caddr_t) object, M_VMOBJ);
453}
454
455/*
456 *	vm_object_page_clean
457 *
458 *	Clean all dirty pages in the specified range of object.
459 *	Leaves page on whatever queue it is currently on.
460 *
461 *	Odd semantics: if start == end, we clean everything.
462 *
463 *	The object must be locked.
464 */
465#if 1
466boolean_t
467vm_object_page_clean(object, start, end, syncio, de_queue)
468	register vm_object_t object;
469	register vm_offset_t start;
470	register vm_offset_t end;
471	boolean_t syncio;
472	boolean_t de_queue;
473{
474	register vm_page_t p, nextp;
475	int size;
476
477	if (object->pager == NULL)
478		return 1;
479
480	if (start != end) {
481		start = trunc_page(start);
482		end = round_page(end);
483	}
484	size = end - start;
485
486again:
487	/*
488	 * Wait until the pageout daemon is through with the object.
489	 */
490	while (object->paging_in_progress) {
491		tsleep(object, PVM, "objpcw", 0);
492	}
493
494	nextp = object->memq.tqh_first;
495	while ((p = nextp) && ((start == end) || (size != 0))) {
496		nextp = p->listq.tqe_next;
497		if (start == end || (p->offset >= start && p->offset < end)) {
498			if ((p->flags & PG_BUSY) || p->busy) {
499				int s = splhigh();
500
501				p->flags |= PG_WANTED;
502				tsleep(p, PVM, "objpcn", 0);
503				splx(s);
504				goto again;
505			}
506			size -= PAGE_SIZE;
507
508			vm_page_test_dirty(p);
509
510			if ((p->dirty & p->valid) != 0) {
511				vm_pageout_clean(p, VM_PAGEOUT_FORCE);
512				goto again;
513			}
514		}
515	}
516	wakeup((caddr_t) object);
517	return 1;
518}
519#endif
520/*
521 *	vm_object_page_clean
522 *
523 *	Clean all dirty pages in the specified range of object.
524 *	If syncio is TRUE, page cleaning is done synchronously.
525 *	If de_queue is TRUE, pages are removed from any paging queue
526 *	they were on, otherwise they are left on whatever queue they
527 *	were on before the cleaning operation began.
528 *
529 *	Odd semantics: if start == end, we clean everything.
530 *
531 *	The object must be locked.
532 *
533 *	Returns TRUE if all was well, FALSE if there was a pager error
534 *	somewhere.  We attempt to clean (and dequeue) all pages regardless
535 *	of where an error occurs.
536 */
537#if 0
538boolean_t
539vm_object_page_clean(object, start, end, syncio, de_queue)
540	register vm_object_t object;
541	register vm_offset_t start;
542	register vm_offset_t end;
543	boolean_t syncio;
544	boolean_t de_queue;
545{
546	register vm_page_t p;
547	int onqueue;
548	boolean_t noerror = TRUE;
549
550	if (object == NULL)
551		return (TRUE);
552
553	/*
554	 * If it is an internal object and there is no pager, attempt to
555	 * allocate one.  Note that vm_object_collapse may relocate one from a
556	 * collapsed object so we must recheck afterward.
557	 */
558	if ((object->flags & OBJ_INTERNAL) && object->pager == NULL) {
559		vm_object_collapse(object);
560		if (object->pager == NULL) {
561			vm_pager_t pager;
562
563			vm_object_unlock(object);
564			pager = vm_pager_allocate(PG_DFLT, (caddr_t) 0,
565			    object->size, VM_PROT_ALL,
566			    (vm_offset_t) 0);
567			if (pager)
568				vm_object_setpager(object, pager, 0, FALSE);
569			vm_object_lock(object);
570		}
571	}
572	if (object->pager == NULL)
573		return (FALSE);
574
575again:
576	/*
577	 * Wait until the pageout daemon is through with the object.
578	 */
579	while (object->paging_in_progress) {
580		vm_object_sleep((int) object, object, FALSE);
581		vm_object_lock(object);
582	}
583	/*
584	 * Loop through the object page list cleaning as necessary.
585	 */
586	for (p = object->memq.tqh_first; p != NULL; p = p->listq.tqe_next) {
587		onqueue = 0;
588		if ((start == end || p->offset >= start && p->offset < end) &&
589		    !(p->flags & PG_FICTITIOUS)) {
590			vm_page_test_dirty(p);
591			/*
592			 * Remove the page from any paging queue. This needs
593			 * to be done if either we have been explicitly asked
594			 * to do so or it is about to be cleaned (see comment
595			 * below).
596			 */
597			if (de_queue || (p->dirty & p->valid)) {
598				vm_page_lock_queues();
599				if (p->flags & PG_ACTIVE) {
600					TAILQ_REMOVE(&vm_page_queue_active,
601					    p, pageq);
602					p->flags &= ~PG_ACTIVE;
603					cnt.v_active_count--;
604					onqueue = 1;
605				} else if (p->flags & PG_INACTIVE) {
606					TAILQ_REMOVE(&vm_page_queue_inactive,
607					    p, pageq);
608					p->flags &= ~PG_INACTIVE;
609					cnt.v_inactive_count--;
610					onqueue = -1;
611				} else
612					onqueue = 0;
613				vm_page_unlock_queues();
614			}
615			/*
616			 * To ensure the state of the page doesn't change
617			 * during the clean operation we do two things. First
618			 * we set the busy bit and write-protect all mappings
619			 * to ensure that write accesses to the page block (in
620			 * vm_fault).  Second, we remove the page from any
621			 * paging queue to foil the pageout daemon
622			 * (vm_pageout_scan).
623			 */
624			pmap_page_protect(VM_PAGE_TO_PHYS(p), VM_PROT_READ);
625			if (p->dirty & p->valid) {
626				p->flags |= PG_BUSY;
627				object->paging_in_progress++;
628				vm_object_unlock(object);
629				/*
630				 * XXX if put fails we mark the page as clean
631				 * to avoid an infinite loop. Will loose
632				 * changes to the page.
633				 */
634				if (vm_pager_put(object->pager, p, syncio)) {
635					printf("%s: pager_put error\n",
636					    "vm_object_page_clean");
637					p->dirty = 0;
638					noerror = FALSE;
639				}
640				vm_object_lock(object);
641				object->paging_in_progress--;
642				if (!de_queue && onqueue) {
643					vm_page_lock_queues();
644					if (onqueue > 0)
645						vm_page_activate(p);
646					else
647						vm_page_deactivate(p);
648					vm_page_unlock_queues();
649				}
650				PAGE_WAKEUP(p);
651				goto again;
652			}
653		}
654	}
655	return (noerror);
656}
657#endif
658
659/*
660 *	vm_object_deactivate_pages
661 *
662 *	Deactivate all pages in the specified object.  (Keep its pages
663 *	in memory even though it is no longer referenced.)
664 *
665 *	The object must be locked.
666 */
667void
668vm_object_deactivate_pages(object)
669	register vm_object_t object;
670{
671	register vm_page_t p, next;
672
673	for (p = object->memq.tqh_first; p != NULL; p = next) {
674		next = p->listq.tqe_next;
675		vm_page_lock_queues();
676		vm_page_deactivate(p);
677		vm_page_unlock_queues();
678	}
679}
680
681/*
682 *	Trim the object cache to size.
683 */
684void
685vm_object_cache_trim()
686{
687	register vm_object_t object;
688
689	vm_object_cache_lock();
690	while (vm_object_cached > vm_object_cache_max) {
691		object = vm_object_cached_list.tqh_first;
692		vm_object_cache_unlock();
693
694		if (object != vm_object_lookup(object->pager))
695			panic("vm_object_cache_trim: I'm sooo confused.");
696
697		pager_cache(object, FALSE);
698
699		vm_object_cache_lock();
700	}
701	vm_object_cache_unlock();
702}
703
704
705/*
706 *	vm_object_pmap_copy:
707 *
708 *	Makes all physical pages in the specified
709 *	object range copy-on-write.  No writeable
710 *	references to these pages should remain.
711 *
712 *	The object must *not* be locked.
713 */
714void
715vm_object_pmap_copy(object, start, end)
716	register vm_object_t object;
717	register vm_offset_t start;
718	register vm_offset_t end;
719{
720	register vm_page_t p;
721
722	if (object == NULL)
723		return;
724
725	vm_object_lock(object);
726	for (p = object->memq.tqh_first; p != NULL; p = p->listq.tqe_next) {
727		if ((start <= p->offset) && (p->offset < end)) {
728			pmap_page_protect(VM_PAGE_TO_PHYS(p), VM_PROT_READ);
729			p->flags |= PG_COPYONWRITE;
730		}
731	}
732	vm_object_unlock(object);
733}
734
735/*
736 *	vm_object_pmap_remove:
737 *
738 *	Removes all physical pages in the specified
739 *	object range from all physical maps.
740 *
741 *	The object must *not* be locked.
742 */
743void
744vm_object_pmap_remove(object, start, end)
745	register vm_object_t object;
746	register vm_offset_t start;
747	register vm_offset_t end;
748{
749	register vm_page_t p;
750	int s;
751
752	if (object == NULL)
753		return;
754	++object->paging_in_progress;
755
756	vm_object_lock(object);
757again:
758	for (p = object->memq.tqh_first; p != NULL; p = p->listq.tqe_next) {
759		if ((start <= p->offset) && (p->offset < end)) {
760			s = splhigh();
761			if ((p->flags & PG_BUSY) || p->busy) {
762				p->flags |= PG_WANTED;
763				tsleep((caddr_t) p, PVM, "vmopmr", 0);
764				splx(s);
765				goto again;
766			}
767			splx(s);
768			pmap_page_protect(VM_PAGE_TO_PHYS(p), VM_PROT_NONE);
769		}
770	}
771	vm_object_unlock(object);
772	--object->paging_in_progress;
773	if (object->paging_in_progress == 0)
774		wakeup((caddr_t) object);
775}
776
777/*
778 *	vm_object_copy:
779 *
780 *	Create a new object which is a copy of an existing
781 *	object, and mark all of the pages in the existing
782 *	object 'copy-on-write'.  The new object has one reference.
783 *	Returns the new object.
784 *
785 *	May defer the copy until later if the object is not backed
786 *	up by a non-default pager.
787 */
788void
789vm_object_copy(src_object, src_offset, size,
790    dst_object, dst_offset, src_needs_copy)
791	register vm_object_t src_object;
792	vm_offset_t src_offset;
793	vm_size_t size;
794	vm_object_t *dst_object;/* OUT */
795	vm_offset_t *dst_offset;/* OUT */
796	boolean_t *src_needs_copy;	/* OUT */
797{
798	register vm_object_t new_copy;
799	register vm_object_t old_copy;
800	vm_offset_t new_start, new_end;
801
802	register vm_page_t p;
803
804	if (src_object == NULL) {
805		/*
806		 * Nothing to copy
807		 */
808		*dst_object = NULL;
809		*dst_offset = 0;
810		*src_needs_copy = FALSE;
811		return;
812	}
813	/*
814	 * If the object's pager is null_pager or the default pager, we don't
815	 * have to make a copy of it.  Instead, we set the needs copy flag and
816	 * make a shadow later.
817	 */
818
819	vm_object_lock(src_object);
820
821	/*
822	 * Try to collapse the object before copying it.
823	 */
824
825	vm_object_collapse(src_object);
826
827	if (src_object->pager == NULL ||
828	    (src_object->flags & OBJ_INTERNAL)) {
829
830		/*
831		 * Make another reference to the object
832		 */
833		src_object->ref_count++;
834
835		/*
836		 * Mark all of the pages copy-on-write.
837		 */
838		for (p = src_object->memq.tqh_first; p; p = p->listq.tqe_next)
839			if (src_offset <= p->offset &&
840			    p->offset < src_offset + size)
841				p->flags |= PG_COPYONWRITE;
842		vm_object_unlock(src_object);
843
844		*dst_object = src_object;
845		*dst_offset = src_offset;
846
847		/*
848		 * Must make a shadow when write is desired
849		 */
850		*src_needs_copy = TRUE;
851		return;
852	}
853	/*
854	 * If the object has a pager, the pager wants to see all of the
855	 * changes.  We need a copy-object for the changed pages.
856	 *
857	 * If there is a copy-object, and it is empty, no changes have been made
858	 * to the object since the copy-object was made.  We can use the same
859	 * copy- object.
860	 */
861
862Retry1:
863	old_copy = src_object->copy;
864	if (old_copy != NULL) {
865		/*
866		 * Try to get the locks (out of order)
867		 */
868		if (!vm_object_lock_try(old_copy)) {
869			vm_object_unlock(src_object);
870
871			/* should spin a bit here... */
872			tsleep((caddr_t) old_copy, PVM, "cpylck", 1);
873			vm_object_lock(src_object);
874			goto Retry1;
875		}
876		if (old_copy->resident_page_count == 0 &&
877		    old_copy->pager == NULL) {
878			/*
879			 * Return another reference to the existing
880			 * copy-object.
881			 */
882			old_copy->ref_count++;
883			vm_object_unlock(old_copy);
884			vm_object_unlock(src_object);
885			*dst_object = old_copy;
886			*dst_offset = src_offset;
887			*src_needs_copy = FALSE;
888			return;
889		}
890		vm_object_unlock(old_copy);
891	}
892	vm_object_unlock(src_object);
893
894	/*
895	 * If the object has a pager, the pager wants to see all of the
896	 * changes.  We must make a copy-object and put the changed pages
897	 * there.
898	 *
899	 * The copy-object is always made large enough to completely shadow the
900	 * original object, since it may have several users who want to shadow
901	 * the original object at different points.
902	 */
903
904	new_copy = vm_object_allocate(src_object->size);
905
906Retry2:
907	vm_object_lock(src_object);
908	/*
909	 * Copy object may have changed while we were unlocked
910	 */
911	old_copy = src_object->copy;
912	if (old_copy != NULL) {
913		/*
914		 * Try to get the locks (out of order)
915		 */
916		if (!vm_object_lock_try(old_copy)) {
917			vm_object_unlock(src_object);
918			tsleep((caddr_t) old_copy, PVM, "cpylck", 1);
919			goto Retry2;
920		}
921		/*
922		 * Consistency check
923		 */
924		if (old_copy->shadow != src_object ||
925		    old_copy->shadow_offset != (vm_offset_t) 0)
926			panic("vm_object_copy: copy/shadow inconsistency");
927
928		/*
929		 * Make the old copy-object shadow the new one. It will
930		 * receive no more pages from the original object.
931		 */
932
933		src_object->ref_count--;	/* remove ref. from old_copy */
934		if (old_copy->shadow)
935			TAILQ_REMOVE(&old_copy->shadow->reverse_shadow_head, old_copy, reverse_shadow_list);
936		old_copy->shadow = new_copy;
937		TAILQ_INSERT_TAIL(&old_copy->shadow->reverse_shadow_head, old_copy, reverse_shadow_list);
938		new_copy->ref_count++;	/* locking not needed - we have the
939					 * only pointer */
940		vm_object_unlock(old_copy);	/* done with old_copy */
941	}
942	new_start = (vm_offset_t) 0;	/* always shadow original at 0 */
943	new_end = (vm_offset_t) new_copy->size;	/* for the whole object */
944
945	/*
946	 * Point the new copy at the existing object.
947	 */
948
949	new_copy->shadow = src_object;
950	TAILQ_INSERT_TAIL(&new_copy->shadow->reverse_shadow_head, new_copy, reverse_shadow_list);
951	new_copy->shadow_offset = new_start;
952	src_object->ref_count++;
953	src_object->copy = new_copy;
954
955	/*
956	 * Mark all the affected pages of the existing object copy-on-write.
957	 */
958	for (p = src_object->memq.tqh_first; p != NULL; p = p->listq.tqe_next)
959		if ((new_start <= p->offset) && (p->offset < new_end))
960			p->flags |= PG_COPYONWRITE;
961
962	vm_object_unlock(src_object);
963
964	*dst_object = new_copy;
965	*dst_offset = src_offset - new_start;
966	*src_needs_copy = FALSE;
967}
968
969/*
970 *	vm_object_shadow:
971 *
972 *	Create a new object which is backed by the
973 *	specified existing object range.  The source
974 *	object reference is deallocated.
975 *
976 *	The new object and offset into that object
977 *	are returned in the source parameters.
978 */
979
980void
981vm_object_shadow(object, offset, length)
982	vm_object_t *object;	/* IN/OUT */
983	vm_offset_t *offset;	/* IN/OUT */
984	vm_size_t length;
985{
986	register vm_object_t source;
987	register vm_object_t result;
988
989	source = *object;
990
991	/*
992	 * Allocate a new object with the given length
993	 */
994
995	if ((result = vm_object_allocate(length)) == NULL)
996		panic("vm_object_shadow: no object for shadowing");
997
998	/*
999	 * The new object shadows the source object, adding a reference to it.
1000	 * Our caller changes his reference to point to the new object,
1001	 * removing a reference to the source object.  Net result: no change
1002	 * of reference count.
1003	 */
1004	result->shadow = source;
1005	if (source)
1006		TAILQ_INSERT_TAIL(&result->shadow->reverse_shadow_head, result, reverse_shadow_list);
1007
1008	/*
1009	 * Store the offset into the source object, and fix up the offset into
1010	 * the new object.
1011	 */
1012
1013	result->shadow_offset = *offset;
1014
1015	/*
1016	 * Return the new things
1017	 */
1018
1019	*offset = 0;
1020	*object = result;
1021}
1022
1023/*
1024 *	Set the specified object's pager to the specified pager.
1025 */
1026
1027void
1028vm_object_setpager(object, pager, paging_offset,
1029    read_only)
1030	vm_object_t object;
1031	vm_pager_t pager;
1032	vm_offset_t paging_offset;
1033	boolean_t read_only;
1034{
1035	vm_object_lock(object);	/* XXX ? */
1036	if (object->pager && object->pager != pager) {
1037		panic("!!!pager already allocated!!!\n");
1038	}
1039	object->pager = pager;
1040	object->paging_offset = paging_offset;
1041	vm_object_unlock(object);	/* XXX ? */
1042}
1043
1044/*
1045 *	vm_object_hash hashes the pager/id pair.
1046 */
1047
1048#define vm_object_hash(pager) \
1049	(((unsigned)pager >> 5)%VM_OBJECT_HASH_COUNT)
1050
1051/*
1052 *	vm_object_lookup looks in the object cache for an object with the
1053 *	specified pager and paging id.
1054 */
1055
1056vm_object_t
1057vm_object_lookup(pager)
1058	vm_pager_t pager;
1059{
1060	register vm_object_hash_entry_t entry;
1061	vm_object_t object;
1062
1063	cnt.v_lookups++;
1064	vm_object_cache_lock();
1065
1066	for (entry = vm_object_hashtable[vm_object_hash(pager)].tqh_first;
1067	    entry != NULL;
1068	    entry = entry->hash_links.tqe_next) {
1069		object = entry->object;
1070		if (object->pager == pager) {
1071			vm_object_lock(object);
1072			if (object->ref_count == 0) {
1073				TAILQ_REMOVE(&vm_object_cached_list, object,
1074				    cached_list);
1075				vm_object_cached--;
1076			}
1077			object->ref_count++;
1078			vm_object_unlock(object);
1079			vm_object_cache_unlock();
1080			cnt.v_hits++;
1081			return (object);
1082		}
1083	}
1084
1085	vm_object_cache_unlock();
1086	return (NULL);
1087}
1088
1089/*
1090 *	vm_object_enter enters the specified object/pager/id into
1091 *	the hash table.
1092 */
1093
1094void
1095vm_object_enter(object, pager)
1096	vm_object_t object;
1097	vm_pager_t pager;
1098{
1099	struct vm_object_hash_head *bucket;
1100	register vm_object_hash_entry_t entry;
1101
1102	/*
1103	 * We don't cache null objects, and we can't cache objects with the
1104	 * null pager.
1105	 */
1106
1107	if (object == NULL)
1108		return;
1109	if (pager == NULL)
1110		return;
1111
1112	bucket = &vm_object_hashtable[vm_object_hash(pager)];
1113	entry = (vm_object_hash_entry_t)
1114	    malloc((u_long) sizeof *entry, M_VMOBJHASH, M_WAITOK);
1115	entry->object = object;
1116	object->flags |= OBJ_CANPERSIST;
1117
1118	vm_object_cache_lock();
1119	TAILQ_INSERT_TAIL(bucket, entry, hash_links);
1120	vm_object_cache_unlock();
1121}
1122
1123/*
1124 *	vm_object_remove:
1125 *
1126 *	Remove the pager from the hash table.
1127 *	Note:  This assumes that the object cache
1128 *	is locked.  XXX this should be fixed
1129 *	by reorganizing vm_object_deallocate.
1130 */
1131void
1132vm_object_remove(pager)
1133	register vm_pager_t pager;
1134{
1135	struct vm_object_hash_head *bucket;
1136	register vm_object_hash_entry_t entry;
1137	register vm_object_t object;
1138
1139	bucket = &vm_object_hashtable[vm_object_hash(pager)];
1140
1141	for (entry = bucket->tqh_first;
1142	    entry != NULL;
1143	    entry = entry->hash_links.tqe_next) {
1144		object = entry->object;
1145		if (object->pager == pager) {
1146			TAILQ_REMOVE(bucket, entry, hash_links);
1147			free((caddr_t) entry, M_VMOBJHASH);
1148			break;
1149		}
1150	}
1151}
1152
1153/*
1154 * this version of collapse allows the operation to occur earlier and
1155 * when paging_in_progress is true for an object...  This is not a complete
1156 * operation, but should plug 99.9% of the rest of the leaks.
1157 */
1158static void
1159vm_object_qcollapse(object)
1160	register vm_object_t object;
1161{
1162	register vm_object_t backing_object;
1163	register vm_offset_t backing_offset, new_offset;
1164	register vm_page_t p, pp;
1165	register vm_size_t size;
1166
1167	backing_object = object->shadow;
1168	if (!backing_object)
1169		return;
1170	if ((backing_object->flags & OBJ_INTERNAL) == 0)
1171		return;
1172	if (backing_object->shadow != NULL &&
1173	    backing_object->shadow->copy == backing_object)
1174		return;
1175	if (backing_object->ref_count != 1)
1176		return;
1177
1178	backing_object->ref_count += 2;
1179
1180	backing_offset = object->shadow_offset;
1181	size = object->size;
1182	p = backing_object->memq.tqh_first;
1183	while (p) {
1184		vm_page_t next;
1185
1186		next = p->listq.tqe_next;
1187		if ((p->flags & (PG_BUSY | PG_FICTITIOUS | PG_CACHE)) ||
1188		    !p->valid || p->hold_count || p->wire_count || p->busy || p->bmapped) {
1189			p = next;
1190			continue;
1191		}
1192		pmap_page_protect(VM_PAGE_TO_PHYS(p), VM_PROT_NONE);
1193		new_offset = (p->offset - backing_offset);
1194		if (p->offset < backing_offset ||
1195		    new_offset >= size) {
1196			if (backing_object->pager)
1197				swap_pager_freespace(backing_object->pager,
1198				    backing_object->paging_offset + p->offset, PAGE_SIZE);
1199			vm_page_lock_queues();
1200			vm_page_free(p);
1201			vm_page_unlock_queues();
1202		} else {
1203			pp = vm_page_lookup(object, new_offset);
1204			if (pp != NULL || (object->pager && vm_pager_has_page(object->pager,
1205				    object->paging_offset + new_offset))) {
1206				if (backing_object->pager)
1207					swap_pager_freespace(backing_object->pager,
1208					    backing_object->paging_offset + p->offset, PAGE_SIZE);
1209				vm_page_lock_queues();
1210				vm_page_free(p);
1211				vm_page_unlock_queues();
1212			} else {
1213				if( backing_object->pager)
1214					swap_pager_freespace(backing_object->pager,
1215					    backing_object->paging_offset + p->offset, PAGE_SIZE);
1216				vm_page_rename(p, object, new_offset);
1217			}
1218		}
1219		p = next;
1220	}
1221	backing_object->ref_count -= 2;
1222}
1223
1224boolean_t vm_object_collapse_allowed = TRUE;
1225
1226/*
1227 *	vm_object_collapse:
1228 *
1229 *	Collapse an object with the object backing it.
1230 *	Pages in the backing object are moved into the
1231 *	parent, and the backing object is deallocated.
1232 *
1233 *	Requires that the object be locked and the page
1234 *	queues be unlocked.
1235 *
1236 *	This routine has significant changes by John S. Dyson
1237 *	to fix some swap memory leaks.  18 Dec 93
1238 *
1239 */
1240void
1241vm_object_collapse(object)
1242	register vm_object_t object;
1243
1244{
1245	register vm_object_t backing_object;
1246	register vm_offset_t backing_offset;
1247	register vm_size_t size;
1248	register vm_offset_t new_offset;
1249	register vm_page_t p, pp;
1250
1251	if (!vm_object_collapse_allowed)
1252		return;
1253
1254	while (TRUE) {
1255		/*
1256		 * Verify that the conditions are right for collapse:
1257		 *
1258		 * The object exists and no pages in it are currently being paged
1259		 * out.
1260		 */
1261		if (object == NULL)
1262			return;
1263
1264		/*
1265		 * Make sure there is a backing object.
1266		 */
1267		if ((backing_object = object->shadow) == NULL)
1268			return;
1269
1270		if (object->paging_in_progress != 0) {
1271			if (backing_object) {
1272				if (vm_object_lock_try(backing_object)) {
1273					vm_object_qcollapse(object);
1274					vm_object_unlock(backing_object);
1275				}
1276			}
1277			return;
1278		}
1279
1280		vm_object_lock(backing_object);
1281		/*
1282		 * ... The backing object is not read_only, and no pages in
1283		 * the backing object are currently being paged out. The
1284		 * backing object is internal.
1285		 */
1286
1287		if ((backing_object->flags & OBJ_INTERNAL) == 0 ||
1288		    backing_object->paging_in_progress != 0) {
1289			vm_object_qcollapse(object);
1290			vm_object_unlock(backing_object);
1291			return;
1292		}
1293		/*
1294		 * The backing object can't be a copy-object: the
1295		 * shadow_offset for the copy-object must stay as 0.
1296		 * Furthermore (for the 'we have all the pages' case), if we
1297		 * bypass backing_object and just shadow the next object in
1298		 * the chain, old pages from that object would then have to be
1299		 * copied BOTH into the (former) backing_object and into the
1300		 * parent object.
1301		 */
1302		if (backing_object->shadow != NULL &&
1303		    backing_object->shadow->copy == backing_object) {
1304			vm_object_unlock(backing_object);
1305			return;
1306		}
1307		/*
1308		 * we can deal only with the swap pager
1309		 */
1310		if ((object->pager &&
1311			object->pager->pg_type != PG_SWAP) ||
1312		    (backing_object->pager &&
1313			backing_object->pager->pg_type != PG_SWAP)) {
1314			vm_object_unlock(backing_object);
1315			return;
1316		}
1317		/*
1318		 * We know that we can either collapse the backing object (if
1319		 * the parent is the only reference to it) or (perhaps) remove
1320		 * the parent's reference to it.
1321		 */
1322
1323		backing_offset = object->shadow_offset;
1324		size = object->size;
1325
1326		/*
1327		 * If there is exactly one reference to the backing object, we
1328		 * can collapse it into the parent.
1329		 */
1330
1331		if (backing_object->ref_count == 1) {
1332
1333			backing_object->flags |= OBJ_DEAD;
1334			/*
1335			 * We can collapse the backing object.
1336			 *
1337			 * Move all in-memory pages from backing_object to the
1338			 * parent.  Pages that have been paged out will be
1339			 * overwritten by any of the parent's pages that
1340			 * shadow them.
1341			 */
1342
1343			while ((p = backing_object->memq.tqh_first) != 0) {
1344
1345				new_offset = (p->offset - backing_offset);
1346
1347				/*
1348				 * If the parent has a page here, or if this
1349				 * page falls outside the parent, dispose of
1350				 * it.
1351				 *
1352				 * Otherwise, move it as planned.
1353				 */
1354
1355				if (p->offset < backing_offset ||
1356				    new_offset >= size) {
1357					vm_page_lock_queues();
1358					pmap_page_protect(VM_PAGE_TO_PHYS(p), VM_PROT_NONE);
1359					PAGE_WAKEUP(p);
1360					vm_page_free(p);
1361					vm_page_unlock_queues();
1362				} else {
1363					pp = vm_page_lookup(object, new_offset);
1364					if (pp != NULL || (object->pager && vm_pager_has_page(object->pager,
1365					    object->paging_offset + new_offset))) {
1366						vm_page_lock_queues();
1367						pmap_page_protect(VM_PAGE_TO_PHYS(p), VM_PROT_NONE);
1368						PAGE_WAKEUP(p);
1369						vm_page_free(p);
1370						vm_page_unlock_queues();
1371					} else {
1372						vm_page_rename(p, object, new_offset);
1373					}
1374				}
1375			}
1376
1377			/*
1378			 * Move the pager from backing_object to object.
1379			 */
1380
1381			if (backing_object->pager) {
1382				backing_object->paging_in_progress++;
1383				if (object->pager) {
1384					vm_pager_t bopager;
1385
1386					object->paging_in_progress++;
1387					/*
1388					 * copy shadow object pages into ours
1389					 * and destroy unneeded pages in
1390					 * shadow object.
1391					 */
1392					bopager = backing_object->pager;
1393					backing_object->pager = NULL;
1394					vm_object_remove(backing_object->pager);
1395					swap_pager_copy(
1396					    bopager, backing_object->paging_offset,
1397					    object->pager, object->paging_offset,
1398					    object->shadow_offset);
1399					object->paging_in_progress--;
1400					if (object->paging_in_progress == 0)
1401						wakeup((caddr_t) object);
1402				} else {
1403					object->paging_in_progress++;
1404					/*
1405					 * grab the shadow objects pager
1406					 */
1407					object->pager = backing_object->pager;
1408					object->paging_offset = backing_object->paging_offset + backing_offset;
1409					vm_object_remove(backing_object->pager);
1410					backing_object->pager = NULL;
1411					/*
1412					 * free unnecessary blocks
1413					 */
1414					swap_pager_freespace(object->pager, 0, object->paging_offset);
1415					object->paging_in_progress--;
1416					if (object->paging_in_progress == 0)
1417						wakeup((caddr_t) object);
1418				}
1419				backing_object->paging_in_progress--;
1420				if (backing_object->paging_in_progress == 0)
1421					wakeup((caddr_t) backing_object);
1422			}
1423			/*
1424			 * Object now shadows whatever backing_object did.
1425			 * Note that the reference to backing_object->shadow
1426			 * moves from within backing_object to within object.
1427			 */
1428
1429			TAILQ_REMOVE(&object->shadow->reverse_shadow_head, object,
1430			    reverse_shadow_list);
1431			if (backing_object->shadow)
1432				TAILQ_REMOVE(&backing_object->shadow->reverse_shadow_head,
1433				    backing_object, reverse_shadow_list);
1434			object->shadow = backing_object->shadow;
1435			if (object->shadow)
1436				TAILQ_INSERT_TAIL(&object->shadow->reverse_shadow_head,
1437				    object, reverse_shadow_list);
1438
1439			object->shadow_offset += backing_object->shadow_offset;
1440			if (object->shadow != NULL &&
1441			    object->shadow->copy != NULL) {
1442				panic("vm_object_collapse: we collapsed a copy-object!");
1443			}
1444			/*
1445			 * Discard backing_object.
1446			 *
1447			 * Since the backing object has no pages, no pager left,
1448			 * and no object references within it, all that is
1449			 * necessary is to dispose of it.
1450			 */
1451
1452			vm_object_unlock(backing_object);
1453
1454			simple_lock(&vm_object_list_lock);
1455			TAILQ_REMOVE(&vm_object_list, backing_object,
1456			    object_list);
1457			vm_object_count--;
1458			simple_unlock(&vm_object_list_lock);
1459
1460			free((caddr_t) backing_object, M_VMOBJ);
1461
1462			object_collapses++;
1463		} else {
1464			/*
1465			 * If all of the pages in the backing object are
1466			 * shadowed by the parent object, the parent object no
1467			 * longer has to shadow the backing object; it can
1468			 * shadow the next one in the chain.
1469			 *
1470			 * The backing object must not be paged out - we'd have
1471			 * to check all of the paged-out pages, as well.
1472			 */
1473
1474			if (backing_object->pager != NULL) {
1475				vm_object_unlock(backing_object);
1476				return;
1477			}
1478			/*
1479			 * Should have a check for a 'small' number of pages
1480			 * here.
1481			 */
1482
1483			for (p = backing_object->memq.tqh_first; p; p = p->listq.tqe_next) {
1484				new_offset = (p->offset - backing_offset);
1485
1486				/*
1487				 * If the parent has a page here, or if this
1488				 * page falls outside the parent, keep going.
1489				 *
1490				 * Otherwise, the backing_object must be left in
1491				 * the chain.
1492				 */
1493
1494				if (p->offset >= backing_offset &&
1495				    new_offset <= size &&
1496				    ((pp = vm_page_lookup(object, new_offset)) == NULL ||
1497					!pp->valid) &&
1498				    (!object->pager || !vm_pager_has_page(object->pager, object->paging_offset + new_offset))) {
1499					/*
1500					 * Page still needed. Can't go any
1501					 * further.
1502					 */
1503					vm_object_unlock(backing_object);
1504					return;
1505				}
1506			}
1507
1508			/*
1509			 * Make the parent shadow the next object in the
1510			 * chain.  Deallocating backing_object will not remove
1511			 * it, since its reference count is at least 2.
1512			 */
1513
1514			TAILQ_REMOVE(&object->shadow->reverse_shadow_head,
1515			    object, reverse_shadow_list);
1516			vm_object_reference(object->shadow = backing_object->shadow);
1517			if (object->shadow)
1518				TAILQ_INSERT_TAIL(&object->shadow->reverse_shadow_head,
1519				    object, reverse_shadow_list);
1520			object->shadow_offset += backing_object->shadow_offset;
1521
1522			/*
1523			 * Backing object might have had a copy pointer to us.
1524			 * If it did, clear it.
1525			 */
1526			if (backing_object->copy == object) {
1527				backing_object->copy = NULL;
1528			}
1529			/*
1530			 * Drop the reference count on backing_object. Since
1531			 * its ref_count was at least 2, it will not vanish;
1532			 * so we don't need to call vm_object_deallocate.
1533			 */
1534			if (backing_object->ref_count == 1)
1535				printf("should have called obj deallocate\n");
1536			backing_object->ref_count--;
1537			vm_object_unlock(backing_object);
1538
1539			object_bypasses++;
1540
1541		}
1542
1543		/*
1544		 * Try again with this object's new backing object.
1545		 */
1546	}
1547}
1548
1549/*
1550 *	vm_object_page_remove: [internal]
1551 *
1552 *	Removes all physical pages in the specified
1553 *	object range from the object's list of pages.
1554 *
1555 *	The object must be locked.
1556 */
1557void
1558vm_object_page_remove(object, start, end)
1559	register vm_object_t object;
1560	register vm_offset_t start;
1561	register vm_offset_t end;
1562{
1563	register vm_page_t p, next;
1564	vm_offset_t size;
1565	int s;
1566
1567	if (object == NULL)
1568		return;
1569
1570	object->paging_in_progress++;
1571	start = trunc_page(start);
1572	end = round_page(end);
1573again:
1574	size = end - start;
1575	if (size > 4 * PAGE_SIZE || size >= object->size / 4) {
1576		for (p = object->memq.tqh_first; p != NULL; p = next) {
1577			next = p->listq.tqe_next;
1578			if ((start <= p->offset) && (p->offset < end)) {
1579				s = splhigh();
1580				if (p->bmapped) {
1581					splx(s);
1582					continue;
1583				}
1584				if ((p->flags & PG_BUSY) || p->busy) {
1585					p->flags |= PG_WANTED;
1586					tsleep((caddr_t) p, PVM, "vmopar", 0);
1587					splx(s);
1588					goto again;
1589				}
1590				splx(s);
1591				pmap_page_protect(VM_PAGE_TO_PHYS(p), VM_PROT_NONE);
1592				vm_page_lock_queues();
1593				PAGE_WAKEUP(p);
1594				vm_page_free(p);
1595				vm_page_unlock_queues();
1596			}
1597		}
1598	} else {
1599		while (size > 0) {
1600			while ((p = vm_page_lookup(object, start)) != 0) {
1601				s = splhigh();
1602				if (p->bmapped) {
1603					splx(s);
1604					break;
1605				}
1606				if ((p->flags & PG_BUSY) || p->busy) {
1607					p->flags |= PG_WANTED;
1608					tsleep((caddr_t) p, PVM, "vmopar", 0);
1609					splx(s);
1610					goto again;
1611				}
1612				splx(s);
1613				pmap_page_protect(VM_PAGE_TO_PHYS(p), VM_PROT_NONE);
1614				vm_page_lock_queues();
1615				PAGE_WAKEUP(p);
1616				vm_page_free(p);
1617				vm_page_unlock_queues();
1618			}
1619			start += PAGE_SIZE;
1620			size -= PAGE_SIZE;
1621		}
1622	}
1623	--object->paging_in_progress;
1624	if (object->paging_in_progress == 0)
1625		wakeup((caddr_t) object);
1626}
1627
1628/*
1629 *	Routine:	vm_object_coalesce
1630 *	Function:	Coalesces two objects backing up adjoining
1631 *			regions of memory into a single object.
1632 *
1633 *	returns TRUE if objects were combined.
1634 *
1635 *	NOTE:	Only works at the moment if the second object is NULL -
1636 *		if it's not, which object do we lock first?
1637 *
1638 *	Parameters:
1639 *		prev_object	First object to coalesce
1640 *		prev_offset	Offset into prev_object
1641 *		next_object	Second object into coalesce
1642 *		next_offset	Offset into next_object
1643 *
1644 *		prev_size	Size of reference to prev_object
1645 *		next_size	Size of reference to next_object
1646 *
1647 *	Conditions:
1648 *	The object must *not* be locked.
1649 */
1650boolean_t
1651vm_object_coalesce(prev_object, next_object,
1652    prev_offset, next_offset,
1653    prev_size, next_size)
1654	register vm_object_t prev_object;
1655	vm_object_t next_object;
1656	vm_offset_t prev_offset, next_offset;
1657	vm_size_t prev_size, next_size;
1658{
1659	vm_size_t newsize;
1660
1661	if (next_object != NULL) {
1662		return (FALSE);
1663	}
1664	if (prev_object == NULL) {
1665		return (TRUE);
1666	}
1667	vm_object_lock(prev_object);
1668
1669	/*
1670	 * Try to collapse the object first
1671	 */
1672	vm_object_collapse(prev_object);
1673
1674	/*
1675	 * Can't coalesce if: . more than one reference . paged out . shadows
1676	 * another object . has a copy elsewhere (any of which mean that the
1677	 * pages not mapped to prev_entry may be in use anyway)
1678	 */
1679
1680	if (prev_object->ref_count > 1 ||
1681	    prev_object->pager != NULL ||
1682	    prev_object->shadow != NULL ||
1683	    prev_object->copy != NULL) {
1684		vm_object_unlock(prev_object);
1685		return (FALSE);
1686	}
1687	/*
1688	 * Remove any pages that may still be in the object from a previous
1689	 * deallocation.
1690	 */
1691
1692	vm_object_page_remove(prev_object,
1693	    prev_offset + prev_size,
1694	    prev_offset + prev_size + next_size);
1695
1696	/*
1697	 * Extend the object if necessary.
1698	 */
1699	newsize = prev_offset + prev_size + next_size;
1700	if (newsize > prev_object->size)
1701		prev_object->size = newsize;
1702
1703	vm_object_unlock(prev_object);
1704	return (TRUE);
1705}
1706
1707/*
1708 * returns page after looking up in shadow chain
1709 */
1710
1711vm_page_t
1712vm_object_page_lookup(object, offset)
1713	vm_object_t object;
1714	vm_offset_t offset;
1715{
1716	vm_page_t m;
1717
1718	if (!(m = vm_page_lookup(object, offset))) {
1719		if (!object->shadow)
1720			return 0;
1721		else
1722			return vm_object_page_lookup(object->shadow, offset + object->shadow_offset);
1723	}
1724	return m;
1725}
1726
1727int
1728_vm_object_in_map(map, object, entry)
1729	vm_map_t map;
1730	vm_object_t object;
1731	vm_map_entry_t entry;
1732{
1733	vm_map_t tmpm;
1734	vm_map_entry_t tmpe;
1735	vm_object_t obj;
1736	int entcount;
1737
1738	if (map == 0)
1739		return 0;
1740
1741	if (entry == 0) {
1742		tmpe = map->header.next;
1743		entcount = map->nentries;
1744		while (entcount-- && (tmpe != &map->header)) {
1745			if( _vm_object_in_map(map, object, tmpe)) {
1746				return 1;
1747			}
1748			tmpe = tmpe->next;
1749		}
1750	} else if (entry->is_sub_map || entry->is_a_map) {
1751		tmpm = entry->object.share_map;
1752		tmpe = tmpm->header.next;
1753		entcount = tmpm->nentries;
1754		while (entcount-- && tmpe != &tmpm->header) {
1755			if( _vm_object_in_map(tmpm, object, tmpe)) {
1756				return 1;
1757			}
1758			tmpe = tmpe->next;
1759		}
1760	} else if (obj = entry->object.vm_object) {
1761		for(; obj; obj=obj->shadow)
1762			if( obj == object) {
1763				return 1;
1764			}
1765	}
1766	return 0;
1767}
1768
1769int
1770vm_object_in_map( object)
1771	vm_object_t object;
1772{
1773	struct proc *p;
1774	for (p = (struct proc *) allproc; p != NULL; p = p->p_next) {
1775		if( !p->p_vmspace /* || (p->p_flag & (P_SYSTEM|P_WEXIT)) */)
1776			continue;
1777/*
1778		if (p->p_stat != SRUN && p->p_stat != SSLEEP) {
1779			continue;
1780		}
1781*/
1782		if( _vm_object_in_map(&p->p_vmspace->vm_map, object, 0))
1783			return 1;
1784	}
1785	if( _vm_object_in_map( kernel_map, object, 0))
1786		return 1;
1787	if( _vm_object_in_map( kmem_map, object, 0))
1788		return 1;
1789	if( _vm_object_in_map( pager_map, object, 0))
1790		return 1;
1791	if( _vm_object_in_map( buffer_map, object, 0))
1792		return 1;
1793	if( _vm_object_in_map( io_map, object, 0))
1794		return 1;
1795	if( _vm_object_in_map( phys_map, object, 0))
1796		return 1;
1797	if( _vm_object_in_map( mb_map, object, 0))
1798		return 1;
1799	if( _vm_object_in_map( u_map, object, 0))
1800		return 1;
1801	return 0;
1802}
1803
1804void
1805vm_object_check() {
1806	int i;
1807	int maxhash = 0;
1808	vm_object_t object;
1809	vm_object_hash_entry_t entry;
1810
1811	/*
1812	 * make sure that no internal objs are hashed
1813	 */
1814	for (i=0; i<VM_OBJECT_HASH_COUNT;i++) {
1815		int lsize = 0;
1816		for (entry = vm_object_hashtable[i].tqh_first;
1817		    entry != NULL;
1818		    entry = entry->hash_links.tqe_next) {
1819			if( entry->object->flags & OBJ_INTERNAL) {
1820				printf("vmochk: internal obj on hash: size: %d\n", entry->object->size);
1821			}
1822			++lsize;
1823		}
1824		if( lsize > maxhash)
1825			maxhash = lsize;
1826	}
1827
1828	printf("maximum object hash queue size: %d\n",  maxhash);
1829
1830	/*
1831	 * make sure that internal objs are in a map somewhere
1832	 * and none have zero ref counts.
1833	 */
1834	for (object = vm_object_list.tqh_first;
1835			object != NULL;
1836			object = object->object_list.tqe_next) {
1837		if( object->flags & OBJ_INTERNAL) {
1838			if( object->ref_count == 0) {
1839				printf("vmochk: internal obj has zero ref count: %d\n",
1840					object->size);
1841			}
1842			if( !vm_object_in_map(object)) {
1843				printf("vmochk: internal obj is not in a map: ref: %d, size: %d, pager: 0x%x, shadow: 0x%x\n", object->ref_count, object->size, object->pager, object->shadow);
1844			}
1845		}
1846	}
1847}
1848
1849#define DEBUG
1850#if defined(DEBUG) || defined(DDB)
1851/*
1852 *	vm_object_print:	[ debug ]
1853 */
1854void
1855vm_object_print(object, full)
1856	vm_object_t object;
1857	boolean_t full;
1858{
1859	register vm_page_t p;
1860	extern indent;
1861
1862	register int count;
1863
1864	if (object == NULL)
1865		return;
1866
1867	iprintf("Object 0x%x: size=0x%x, res=%d, ref=%d, ",
1868	    (int) object, (int) object->size,
1869	    object->resident_page_count, object->ref_count);
1870	printf("pager=0x%x+0x%x, shadow=(0x%x)+0x%x\n",
1871	    (int) object->pager, (int) object->paging_offset,
1872	    (int) object->shadow, (int) object->shadow_offset);
1873	printf("cache: next=%p, prev=%p\n",
1874	    object->cached_list.tqe_next, object->cached_list.tqe_prev);
1875
1876	if (!full)
1877		return;
1878
1879	indent += 2;
1880	count = 0;
1881	for (p = object->memq.tqh_first; p != NULL; p = p->listq.tqe_next) {
1882		if (count == 0)
1883			iprintf("memory:=");
1884		else if (count == 6) {
1885			printf("\n");
1886			iprintf(" ...");
1887			count = 0;
1888		} else
1889			printf(",");
1890		count++;
1891
1892		printf("(off=0x%lx,page=0x%lx)",
1893		    (u_long) p->offset, (u_long) VM_PAGE_TO_PHYS(p));
1894	}
1895	if (count != 0)
1896		printf("\n");
1897	indent -= 2;
1898}
1899#endif				/* defined(DEBUG) || defined(DDB) */
1900