vm_object.c revision 12767
1/* 2 * Copyright (c) 1991, 1993 3 * The Regents of the University of California. All rights reserved. 4 * 5 * This code is derived from software contributed to Berkeley by 6 * The Mach Operating System project at Carnegie-Mellon University. 7 * 8 * Redistribution and use in source and binary forms, with or without 9 * modification, are permitted provided that the following conditions 10 * are met: 11 * 1. Redistributions of source code must retain the above copyright 12 * notice, this list of conditions and the following disclaimer. 13 * 2. Redistributions in binary form must reproduce the above copyright 14 * notice, this list of conditions and the following disclaimer in the 15 * documentation and/or other materials provided with the distribution. 16 * 3. All advertising materials mentioning features or use of this software 17 * must display the following acknowledgement: 18 * This product includes software developed by the University of 19 * California, Berkeley and its contributors. 20 * 4. Neither the name of the University nor the names of its contributors 21 * may be used to endorse or promote products derived from this software 22 * without specific prior written permission. 23 * 24 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 25 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 26 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 27 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 28 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 29 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 30 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 31 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 32 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 33 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 34 * SUCH DAMAGE. 35 * 36 * from: @(#)vm_object.c 8.5 (Berkeley) 3/22/94 37 * 38 * 39 * Copyright (c) 1987, 1990 Carnegie-Mellon University. 40 * All rights reserved. 41 * 42 * Authors: Avadis Tevanian, Jr., Michael Wayne Young 43 * 44 * Permission to use, copy, modify and distribute this software and 45 * its documentation is hereby granted, provided that both the copyright 46 * notice and this permission notice appear in all copies of the 47 * software, derivative works or modified versions, and any portions 48 * thereof, and that both notices appear in supporting documentation. 49 * 50 * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS" 51 * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND 52 * FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE. 53 * 54 * Carnegie Mellon requests users of this software to return to 55 * 56 * Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU 57 * School of Computer Science 58 * Carnegie Mellon University 59 * Pittsburgh PA 15213-3890 60 * 61 * any improvements or extensions that they make and grant Carnegie the 62 * rights to redistribute these changes. 63 * 64 * $Id: vm_object.c,v 1.58 1995/12/07 12:48:21 davidg Exp $ 65 */ 66 67/* 68 * Virtual memory object module. 69 */ 70 71#include <sys/param.h> 72#include <sys/systm.h> 73#include <sys/kernel.h> 74#include <sys/proc.h> /* for curproc, pageproc */ 75#include <sys/malloc.h> 76#include <sys/vnode.h> 77#include <sys/mount.h> 78#include <sys/vmmeter.h> 79 80#include <vm/vm.h> 81#include <vm/vm_param.h> 82#include <vm/vm_prot.h> 83#include <vm/lock.h> 84#include <vm/pmap.h> 85#include <vm/vm_map.h> 86#include <vm/vm_object.h> 87#include <vm/vm_page.h> 88#include <vm/vm_pageout.h> 89#include <vm/vm_pager.h> 90#include <vm/swap_pager.h> 91#include <vm/vm_kern.h> 92#include <vm/vm_extern.h> 93 94#ifdef DDB 95extern void vm_object_check __P((void)); 96#endif 97 98static void _vm_object_allocate __P((objtype_t, vm_size_t, vm_object_t)); 99#ifdef DDB 100static int _vm_object_in_map __P((vm_map_t map, vm_object_t object, 101 vm_map_entry_t entry)); 102static int vm_object_in_map __P((vm_object_t object)); 103#endif 104static vm_page_t 105 vm_object_page_lookup __P((vm_object_t object, 106 vm_offset_t offset)); 107static void vm_object_qcollapse __P((vm_object_t object)); 108 109/* 110 * Virtual memory objects maintain the actual data 111 * associated with allocated virtual memory. A given 112 * page of memory exists within exactly one object. 113 * 114 * An object is only deallocated when all "references" 115 * are given up. Only one "reference" to a given 116 * region of an object should be writeable. 117 * 118 * Associated with each object is a list of all resident 119 * memory pages belonging to that object; this list is 120 * maintained by the "vm_page" module, and locked by the object's 121 * lock. 122 * 123 * Each object also records a "pager" routine which is 124 * used to retrieve (and store) pages to the proper backing 125 * storage. In addition, objects may be backed by other 126 * objects from which they were virtual-copied. 127 * 128 * The only items within the object structure which are 129 * modified after time of creation are: 130 * reference count locked by object's lock 131 * pager routine locked by object's lock 132 * 133 */ 134 135int vm_object_cache_max; 136struct object_q vm_object_cached_list; 137int vm_object_cached; 138struct object_q vm_object_list; 139long vm_object_count; 140vm_object_t kernel_object; 141vm_object_t kmem_object; 142struct vm_object kernel_object_store; 143struct vm_object kmem_object_store; 144extern int vm_pageout_page_count; 145 146long object_collapses; 147long object_bypasses; 148 149static void 150_vm_object_allocate(type, size, object) 151 objtype_t type; 152 vm_size_t size; 153 register vm_object_t object; 154{ 155 TAILQ_INIT(&object->memq); 156 TAILQ_INIT(&object->shadow_head); 157 158 object->type = type; 159 object->size = size; 160 object->ref_count = 1; 161 object->flags = 0; 162 object->paging_in_progress = 0; 163 object->resident_page_count = 0; 164 object->handle = NULL; 165 object->paging_offset = (vm_ooffset_t) 0; 166 object->backing_object = NULL; 167 object->backing_object_offset = (vm_ooffset_t) 0; 168 169 object->last_read = 0; 170 171 TAILQ_INSERT_TAIL(&vm_object_list, object, object_list); 172 vm_object_count++; 173} 174 175/* 176 * vm_object_init: 177 * 178 * Initialize the VM objects module. 179 */ 180void 181vm_object_init() 182{ 183 TAILQ_INIT(&vm_object_cached_list); 184 TAILQ_INIT(&vm_object_list); 185 vm_object_count = 0; 186 187 vm_object_cache_max = 84; 188 if (cnt.v_page_count > 1000) 189 vm_object_cache_max += (cnt.v_page_count - 1000) / 4; 190 191 kernel_object = &kernel_object_store; 192 _vm_object_allocate(OBJT_DEFAULT, OFF_TO_IDX(VM_MAX_KERNEL_ADDRESS - VM_MIN_KERNEL_ADDRESS), 193 kernel_object); 194 195 kmem_object = &kmem_object_store; 196 _vm_object_allocate(OBJT_DEFAULT, OFF_TO_IDX(VM_MAX_KERNEL_ADDRESS - VM_MIN_KERNEL_ADDRESS), 197 kmem_object); 198} 199 200/* 201 * vm_object_allocate: 202 * 203 * Returns a new object with the given size. 204 */ 205 206vm_object_t 207vm_object_allocate(type, size) 208 objtype_t type; 209 vm_size_t size; 210{ 211 register vm_object_t result; 212 213 result = (vm_object_t) 214 malloc((u_long) sizeof *result, M_VMOBJ, M_WAITOK); 215 216 217 _vm_object_allocate(type, size, result); 218 219 return (result); 220} 221 222 223/* 224 * vm_object_reference: 225 * 226 * Gets another reference to the given object. 227 */ 228inline void 229vm_object_reference(object) 230 register vm_object_t object; 231{ 232 if (object == NULL) 233 return; 234 235 if (object->ref_count == 0) { 236 if ((object->flags & OBJ_CANPERSIST) == 0) 237 panic("vm_object_reference: non-persistent object with 0 ref_count"); 238 TAILQ_REMOVE(&vm_object_cached_list, object, cached_list); 239 vm_object_cached--; 240 } 241 object->ref_count++; 242} 243 244/* 245 * vm_object_deallocate: 246 * 247 * Release a reference to the specified object, 248 * gained either through a vm_object_allocate 249 * or a vm_object_reference call. When all references 250 * are gone, storage associated with this object 251 * may be relinquished. 252 * 253 * No object may be locked. 254 */ 255void 256vm_object_deallocate(object) 257 vm_object_t object; 258{ 259 vm_object_t temp; 260 261 while (object != NULL) { 262 263 if (object->ref_count == 0) 264 panic("vm_object_deallocate: object deallocated too many times"); 265 266 /* 267 * Lose the reference 268 */ 269 object->ref_count--; 270 271 if (object->ref_count != 0) { 272 if ((object->ref_count == 1) && 273 (object->handle == NULL) && 274 (object->type == OBJT_DEFAULT || 275 object->type == OBJT_SWAP)) { 276 vm_object_t robject; 277 robject = object->shadow_head.tqh_first; 278 if ((robject != NULL) && 279 (robject->handle == NULL) && 280 (robject->type == OBJT_DEFAULT || 281 robject->type == OBJT_SWAP)) { 282 int s; 283 robject->ref_count += 2; 284 object->ref_count += 2; 285 286 do { 287 s = splhigh(); 288 while (robject->paging_in_progress) { 289 robject->flags |= OBJ_PIPWNT; 290 tsleep(robject, PVM, "objde1", 0); 291 } 292 293 while (object->paging_in_progress) { 294 object->flags |= OBJ_PIPWNT; 295 tsleep(object, PVM, "objde2", 0); 296 } 297 splx(s); 298 299 } while( object->paging_in_progress || robject->paging_in_progress); 300 301 object->ref_count -= 2; 302 robject->ref_count -= 2; 303 if( robject->ref_count == 0) { 304 robject->ref_count += 1; 305 object = robject; 306 continue; 307 } 308 vm_object_collapse(robject); 309 return; 310 } 311 } 312 /* 313 * If there are still references, then we are done. 314 */ 315 return; 316 } 317 318 if (object->type == OBJT_VNODE) { 319 struct vnode *vp = object->handle; 320 321 vp->v_flag &= ~VTEXT; 322 } 323 324 /* 325 * See if this object can persist and has some resident 326 * pages. If so, enter it in the cache. 327 */ 328 if (object->flags & OBJ_CANPERSIST) { 329 if (object->resident_page_count != 0) { 330 vm_object_page_clean(object, 0, 0 ,TRUE, TRUE); 331 TAILQ_INSERT_TAIL(&vm_object_cached_list, object, 332 cached_list); 333 vm_object_cached++; 334 335 vm_object_cache_trim(); 336 return; 337 } else { 338 object->flags &= ~OBJ_CANPERSIST; 339 } 340 } 341 342 /* 343 * Make sure no one uses us. 344 */ 345 object->flags |= OBJ_DEAD; 346 347 temp = object->backing_object; 348 if (temp) 349 TAILQ_REMOVE(&temp->shadow_head, object, shadow_list); 350 vm_object_terminate(object); 351 /* unlocks and deallocates object */ 352 object = temp; 353 } 354} 355 356/* 357 * vm_object_terminate actually destroys the specified object, freeing 358 * up all previously used resources. 359 * 360 * The object must be locked. 361 */ 362void 363vm_object_terminate(object) 364 register vm_object_t object; 365{ 366 register vm_page_t p; 367 int s; 368 369 /* 370 * wait for the pageout daemon to be done with the object 371 */ 372 s = splhigh(); 373 while (object->paging_in_progress) { 374 object->flags |= OBJ_PIPWNT; 375 tsleep(object, PVM, "objtrm", 0); 376 } 377 splx(s); 378 379 if (object->paging_in_progress != 0) 380 panic("vm_object_deallocate: pageout in progress"); 381 382 /* 383 * Clean and free the pages, as appropriate. All references to the 384 * object are gone, so we don't need to lock it. 385 */ 386 if (object->type == OBJT_VNODE) { 387 struct vnode *vp = object->handle; 388 389 VOP_LOCK(vp); 390 vm_object_page_clean(object, 0, 0, TRUE, FALSE); 391 vinvalbuf(vp, V_SAVE, NOCRED, NULL, 0, 0); 392 VOP_UNLOCK(vp); 393 } 394 395 /* 396 * Now free the pages. For internal objects, this also removes them 397 * from paging queues. 398 */ 399 while ((p = object->memq.tqh_first) != NULL) { 400 if (p->flags & PG_BUSY) 401 printf("vm_object_terminate: freeing busy page\n"); 402 PAGE_WAKEUP(p); 403 vm_page_free(p); 404 cnt.v_pfree++; 405 } 406 407 /* 408 * Let the pager know object is dead. 409 */ 410 vm_pager_deallocate(object); 411 412 TAILQ_REMOVE(&vm_object_list, object, object_list); 413 vm_object_count--; 414 415 wakeup(object); 416 417 /* 418 * Free the space for the object. 419 */ 420 free((caddr_t) object, M_VMOBJ); 421} 422 423/* 424 * vm_object_page_clean 425 * 426 * Clean all dirty pages in the specified range of object. 427 * Leaves page on whatever queue it is currently on. 428 * 429 * Odd semantics: if start == end, we clean everything. 430 * 431 * The object must be locked. 432 */ 433 434void 435vm_object_page_clean(object, start, end, syncio, lockflag) 436 vm_object_t object; 437 vm_pindex_t start; 438 vm_pindex_t end; 439 boolean_t syncio; 440 boolean_t lockflag; 441{ 442 register vm_page_t p; 443 register vm_offset_t tstart, tend; 444 int s; 445 struct vnode *vp; 446 int runlen; 447 vm_page_t ma[vm_pageout_page_count]; 448 449 if (object->type != OBJT_VNODE || 450 (object->flags & OBJ_MIGHTBEDIRTY) == 0) 451 return; 452 453 vp = object->handle; 454 455 if (lockflag) 456 VOP_LOCK(vp); 457 object->flags |= OBJ_CLEANING; 458 459 tstart = start; 460 if (end == 0) { 461 tend = object->size; 462 } else { 463 tend = end; 464 } 465 if ((tstart == 0) && (tend == object->size)) { 466 object->flags &= ~(OBJ_WRITEABLE|OBJ_MIGHTBEDIRTY); 467 } 468 469 runlen = 0; 470 for(;tstart < tend; tstart += 1) { 471relookup: 472 p = vm_page_lookup(object, tstart); 473 if (!p) { 474 if (runlen > 0) { 475 vm_pageout_flush(ma, runlen, syncio); 476 runlen = 0; 477 } 478 continue; 479 } 480 if ((p->valid == 0) || (p->flags & PG_CACHE)) { 481 if (runlen > 0) { 482 vm_pageout_flush(ma, runlen, syncio); 483 runlen = 0; 484 } 485 continue; 486 } 487 488 vm_page_protect(p, VM_PROT_READ); 489 490 s = splhigh(); 491 while ((p->flags & PG_BUSY) || p->busy) { 492 if (runlen > 0) { 493 splx(s); 494 vm_pageout_flush(ma, runlen, syncio); 495 runlen = 0; 496 goto relookup; 497 } 498 p->flags |= PG_WANTED|PG_REFERENCED; 499 tsleep(p, PVM, "vpcwai", 0); 500 splx(s); 501 goto relookup; 502 } 503 splx(s); 504 505 if (p->dirty == 0) 506 vm_page_test_dirty(p); 507 508 if ((p->valid & p->dirty) != 0) { 509 ma[runlen] = p; 510 p->flags |= PG_BUSY; 511 runlen++; 512 if (runlen >= vm_pageout_page_count) { 513 vm_pageout_flush(ma, runlen, syncio); 514 runlen = 0; 515 } 516 } else if (runlen > 0) { 517 vm_pageout_flush(ma, runlen, syncio); 518 runlen = 0; 519 } 520 521 } 522 if (runlen > 0) { 523 vm_pageout_flush(ma, runlen, syncio); 524 } 525 526 VOP_FSYNC(vp, NULL, syncio, curproc); 527 528 if (lockflag) 529 VOP_UNLOCK(vp); 530 object->flags &= ~OBJ_CLEANING; 531 return; 532} 533 534/* 535 * vm_object_deactivate_pages 536 * 537 * Deactivate all pages in the specified object. (Keep its pages 538 * in memory even though it is no longer referenced.) 539 * 540 * The object must be locked. 541 */ 542void 543vm_object_deactivate_pages(object) 544 register vm_object_t object; 545{ 546 register vm_page_t p, next; 547 548 for (p = object->memq.tqh_first; p != NULL; p = next) { 549 next = p->listq.tqe_next; 550 vm_page_deactivate(p); 551 } 552} 553 554/* 555 * Trim the object cache to size. 556 */ 557void 558vm_object_cache_trim() 559{ 560 register vm_object_t object; 561 562 while (vm_object_cached > vm_object_cache_max) { 563 object = vm_object_cached_list.tqh_first; 564 565 vm_object_reference(object); 566 pager_cache(object, FALSE); 567 } 568} 569 570 571/* 572 * vm_object_pmap_copy: 573 * 574 * Makes all physical pages in the specified 575 * object range copy-on-write. No writeable 576 * references to these pages should remain. 577 * 578 * The object must *not* be locked. 579 */ 580void 581vm_object_pmap_copy(object, start, end) 582 register vm_object_t object; 583 register vm_pindex_t start; 584 register vm_pindex_t end; 585{ 586 register vm_page_t p; 587 588 if (object == NULL || (object->flags & OBJ_WRITEABLE) == 0) 589 return; 590 591 for (p = object->memq.tqh_first; p != NULL; p = p->listq.tqe_next) { 592 vm_page_protect(p, VM_PROT_READ); 593 } 594 595 object->flags &= ~OBJ_WRITEABLE; 596} 597 598/* 599 * vm_object_pmap_remove: 600 * 601 * Removes all physical pages in the specified 602 * object range from all physical maps. 603 * 604 * The object must *not* be locked. 605 */ 606void 607vm_object_pmap_remove(object, start, end) 608 register vm_object_t object; 609 register vm_pindex_t start; 610 register vm_pindex_t end; 611{ 612 register vm_page_t p; 613 if (object == NULL) 614 return; 615 for (p = object->memq.tqh_first; p != NULL; p = p->listq.tqe_next) { 616 vm_page_protect(p, VM_PROT_NONE); 617 } 618} 619 620/* 621 * vm_object_copy: 622 * 623 * Create a new object which is a copy of an existing 624 * object, and mark all of the pages in the existing 625 * object 'copy-on-write'. The new object has one reference. 626 * Returns the new object. 627 * 628 * May defer the copy until later if the object is not backed 629 * up by a non-default pager. 630 */ 631void 632vm_object_copy(src_object, src_offset, 633 dst_object, dst_offset, src_needs_copy) 634 register vm_object_t src_object; 635 vm_pindex_t src_offset; 636 vm_object_t *dst_object;/* OUT */ 637 vm_pindex_t *dst_offset;/* OUT */ 638 boolean_t *src_needs_copy; /* OUT */ 639{ 640 if (src_object == NULL) { 641 /* 642 * Nothing to copy 643 */ 644 *dst_object = NULL; 645 *dst_offset = 0; 646 *src_needs_copy = FALSE; 647 return; 648 } 649 650 /* 651 * Try to collapse the object before copying it. 652 */ 653 if (src_object->handle == NULL && 654 (src_object->type == OBJT_DEFAULT || 655 src_object->type == OBJT_SWAP)) 656 vm_object_collapse(src_object); 657 658 659 /* 660 * Make another reference to the object 661 */ 662 src_object->ref_count++; 663 664 *dst_object = src_object; 665 *dst_offset = src_offset; 666 667 /* 668 * Must make a shadow when write is desired 669 */ 670 *src_needs_copy = TRUE; 671 return; 672} 673 674/* 675 * vm_object_shadow: 676 * 677 * Create a new object which is backed by the 678 * specified existing object range. The source 679 * object reference is deallocated. 680 * 681 * The new object and offset into that object 682 * are returned in the source parameters. 683 */ 684 685void 686vm_object_shadow(object, offset, length) 687 vm_object_t *object; /* IN/OUT */ 688 vm_ooffset_t *offset; /* IN/OUT */ 689 vm_size_t length; 690{ 691 register vm_object_t source; 692 register vm_object_t result; 693 694 source = *object; 695 696 /* 697 * Allocate a new object with the given length 698 */ 699 700 if ((result = vm_object_allocate(OBJT_DEFAULT, length)) == NULL) 701 panic("vm_object_shadow: no object for shadowing"); 702 703 /* 704 * The new object shadows the source object, adding a reference to it. 705 * Our caller changes his reference to point to the new object, 706 * removing a reference to the source object. Net result: no change 707 * of reference count. 708 */ 709 result->backing_object = source; 710 if (source) 711 TAILQ_INSERT_TAIL(&result->backing_object->shadow_head, result, shadow_list); 712 713 /* 714 * Store the offset into the source object, and fix up the offset into 715 * the new object. 716 */ 717 718 result->backing_object_offset = *offset; 719 720 /* 721 * Return the new things 722 */ 723 724 *offset = 0; 725 *object = result; 726} 727 728 729/* 730 * this version of collapse allows the operation to occur earlier and 731 * when paging_in_progress is true for an object... This is not a complete 732 * operation, but should plug 99.9% of the rest of the leaks. 733 */ 734static void 735vm_object_qcollapse(object) 736 register vm_object_t object; 737{ 738 register vm_object_t backing_object; 739 register vm_pindex_t backing_offset_index, paging_offset_index; 740 vm_pindex_t backing_object_paging_offset_index; 741 vm_pindex_t new_pindex; 742 register vm_page_t p, pp; 743 register vm_size_t size; 744 745 backing_object = object->backing_object; 746 if (backing_object->ref_count != 1) 747 return; 748 749 backing_object->ref_count += 2; 750 751 backing_offset_index = OFF_TO_IDX(object->backing_object_offset); 752 backing_object_paging_offset_index = OFF_TO_IDX(backing_object->paging_offset); 753 paging_offset_index = OFF_TO_IDX(object->paging_offset); 754 size = object->size; 755 p = backing_object->memq.tqh_first; 756 while (p) { 757 vm_page_t next; 758 759 next = p->listq.tqe_next; 760 if ((p->flags & (PG_BUSY | PG_FICTITIOUS | PG_CACHE)) || 761 !p->valid || p->hold_count || p->wire_count || p->busy) { 762 p = next; 763 continue; 764 } 765 vm_page_protect(p, VM_PROT_NONE); 766 new_pindex = p->pindex - backing_offset_index; 767 if (p->pindex < backing_offset_index || 768 new_pindex >= size) { 769 if (backing_object->type == OBJT_SWAP) 770 swap_pager_freespace(backing_object, 771 backing_object_paging_offset_index+p->pindex, 772 1); 773 vm_page_free(p); 774 } else { 775 pp = vm_page_lookup(object, new_pindex); 776 if (pp != NULL || (object->type == OBJT_SWAP && vm_pager_has_page(object, 777 paging_offset_index + new_pindex, NULL, NULL))) { 778 if (backing_object->type == OBJT_SWAP) 779 swap_pager_freespace(backing_object, 780 backing_object_paging_offset_index + p->pindex, 1); 781 vm_page_free(p); 782 } else { 783 if (backing_object->type == OBJT_SWAP) 784 swap_pager_freespace(backing_object, 785 backing_object_paging_offset_index + p->pindex, 1); 786 vm_page_rename(p, object, new_pindex); 787 p->dirty = VM_PAGE_BITS_ALL; 788 } 789 } 790 p = next; 791 } 792 backing_object->ref_count -= 2; 793} 794 795/* 796 * vm_object_collapse: 797 * 798 * Collapse an object with the object backing it. 799 * Pages in the backing object are moved into the 800 * parent, and the backing object is deallocated. 801 */ 802void 803vm_object_collapse(object) 804 vm_object_t object; 805 806{ 807 vm_object_t backing_object; 808 vm_ooffset_t backing_offset; 809 vm_size_t size; 810 vm_pindex_t new_pindex, backing_offset_index; 811 vm_page_t p, pp; 812 813 while (TRUE) { 814 /* 815 * Verify that the conditions are right for collapse: 816 * 817 * The object exists and no pages in it are currently being paged 818 * out. 819 */ 820 if (object == NULL) 821 return; 822 823 /* 824 * Make sure there is a backing object. 825 */ 826 if ((backing_object = object->backing_object) == NULL) 827 return; 828 829 /* 830 * we check the backing object first, because it is most likely 831 * not collapsable. 832 */ 833 if (backing_object->handle != NULL || 834 (backing_object->type != OBJT_DEFAULT && 835 backing_object->type != OBJT_SWAP) || 836 (backing_object->flags & OBJ_DEAD) || 837 object->handle != NULL || 838 (object->type != OBJT_DEFAULT && 839 object->type != OBJT_SWAP) || 840 (object->flags & OBJ_DEAD)) { 841 return; 842 } 843 844 if (object->paging_in_progress != 0 || 845 backing_object->paging_in_progress != 0) { 846 vm_object_qcollapse(object); 847 return; 848 } 849 850 /* 851 * We know that we can either collapse the backing object (if 852 * the parent is the only reference to it) or (perhaps) remove 853 * the parent's reference to it. 854 */ 855 856 backing_offset = object->backing_object_offset; 857 backing_offset_index = OFF_TO_IDX(backing_offset); 858 size = object->size; 859 860 /* 861 * If there is exactly one reference to the backing object, we 862 * can collapse it into the parent. 863 */ 864 865 if (backing_object->ref_count == 1) { 866 867 backing_object->flags |= OBJ_DEAD; 868 /* 869 * We can collapse the backing object. 870 * 871 * Move all in-memory pages from backing_object to the 872 * parent. Pages that have been paged out will be 873 * overwritten by any of the parent's pages that 874 * shadow them. 875 */ 876 877 while ((p = backing_object->memq.tqh_first) != 0) { 878 879 new_pindex = p->pindex - backing_offset_index; 880 881 /* 882 * If the parent has a page here, or if this 883 * page falls outside the parent, dispose of 884 * it. 885 * 886 * Otherwise, move it as planned. 887 */ 888 889 if (p->pindex < backing_offset_index || 890 new_pindex >= size) { 891 vm_page_protect(p, VM_PROT_NONE); 892 PAGE_WAKEUP(p); 893 vm_page_free(p); 894 } else { 895 pp = vm_page_lookup(object, new_pindex); 896 if (pp != NULL || (object->type == OBJT_SWAP && vm_pager_has_page(object, 897 OFF_TO_IDX(object->paging_offset) + new_pindex, NULL, NULL))) { 898 vm_page_protect(p, VM_PROT_NONE); 899 PAGE_WAKEUP(p); 900 vm_page_free(p); 901 } else { 902 vm_page_rename(p, object, new_pindex); 903 } 904 } 905 } 906 907 /* 908 * Move the pager from backing_object to object. 909 */ 910 911 if (backing_object->type == OBJT_SWAP) { 912 backing_object->paging_in_progress++; 913 if (object->type == OBJT_SWAP) { 914 object->paging_in_progress++; 915 /* 916 * copy shadow object pages into ours 917 * and destroy unneeded pages in 918 * shadow object. 919 */ 920 swap_pager_copy( 921 backing_object, 922 OFF_TO_IDX(backing_object->paging_offset), 923 object, 924 OFF_TO_IDX(object->paging_offset), 925 OFF_TO_IDX(object->backing_object_offset)); 926 vm_object_pip_wakeup(object); 927 } else { 928 object->paging_in_progress++; 929 /* 930 * move the shadow backing_object's pager data to 931 * "object" and convert "object" type to OBJT_SWAP. 932 */ 933 object->type = OBJT_SWAP; 934 object->un_pager.swp.swp_nblocks = 935 backing_object->un_pager.swp.swp_nblocks; 936 object->un_pager.swp.swp_allocsize = 937 backing_object->un_pager.swp.swp_allocsize; 938 object->un_pager.swp.swp_blocks = 939 backing_object->un_pager.swp.swp_blocks; 940 object->un_pager.swp.swp_poip = /* XXX */ 941 backing_object->un_pager.swp.swp_poip; 942 object->paging_offset = backing_object->paging_offset + backing_offset; 943 TAILQ_INSERT_TAIL(&swap_pager_un_object_list, object, pager_object_list); 944 945 /* 946 * Convert backing object from OBJT_SWAP to 947 * OBJT_DEFAULT. XXX - only the TAILQ_REMOVE is 948 * actually necessary. 949 */ 950 backing_object->type = OBJT_DEFAULT; 951 TAILQ_REMOVE(&swap_pager_un_object_list, backing_object, pager_object_list); 952 /* 953 * free unnecessary blocks 954 */ 955 swap_pager_freespace(object, 0, 956 OFF_TO_IDX(object->paging_offset)); 957 vm_object_pip_wakeup(object); 958 } 959 960 vm_object_pip_wakeup(backing_object); 961 } 962 /* 963 * Object now shadows whatever backing_object did. 964 * Note that the reference to backing_object->backing_object 965 * moves from within backing_object to within object. 966 */ 967 968 TAILQ_REMOVE(&object->backing_object->shadow_head, object, 969 shadow_list); 970 if (backing_object->backing_object) 971 TAILQ_REMOVE(&backing_object->backing_object->shadow_head, 972 backing_object, shadow_list); 973 object->backing_object = backing_object->backing_object; 974 if (object->backing_object) 975 TAILQ_INSERT_TAIL(&object->backing_object->shadow_head, 976 object, shadow_list); 977 978 object->backing_object_offset += backing_object->backing_object_offset; 979 /* 980 * Discard backing_object. 981 * 982 * Since the backing object has no pages, no pager left, 983 * and no object references within it, all that is 984 * necessary is to dispose of it. 985 */ 986 987 TAILQ_REMOVE(&vm_object_list, backing_object, 988 object_list); 989 vm_object_count--; 990 991 free((caddr_t) backing_object, M_VMOBJ); 992 993 object_collapses++; 994 } else { 995 /* 996 * If all of the pages in the backing object are 997 * shadowed by the parent object, the parent object no 998 * longer has to shadow the backing object; it can 999 * shadow the next one in the chain. 1000 * 1001 * The backing object must not be paged out - we'd have 1002 * to check all of the paged-out pages, as well. 1003 */ 1004 1005 if (backing_object->type != OBJT_DEFAULT) { 1006 return; 1007 } 1008 /* 1009 * Should have a check for a 'small' number of pages 1010 * here. 1011 */ 1012 1013 for (p = backing_object->memq.tqh_first; p; p = p->listq.tqe_next) { 1014 new_pindex = p->pindex - backing_offset_index; 1015 1016 /* 1017 * If the parent has a page here, or if this 1018 * page falls outside the parent, keep going. 1019 * 1020 * Otherwise, the backing_object must be left in 1021 * the chain. 1022 */ 1023 1024 if (p->pindex >= backing_offset_index && 1025 new_pindex <= size) { 1026 1027 pp = vm_page_lookup(object, new_pindex); 1028 1029 if ((pp == NULL || pp->valid == 0) && 1030 !vm_pager_has_page(object, OFF_TO_IDX(object->paging_offset) + new_pindex, NULL, NULL)) { 1031 /* 1032 * Page still needed. Can't go any 1033 * further. 1034 */ 1035 return; 1036 } 1037 } 1038 } 1039 1040 /* 1041 * Make the parent shadow the next object in the 1042 * chain. Deallocating backing_object will not remove 1043 * it, since its reference count is at least 2. 1044 */ 1045 1046 TAILQ_REMOVE(&object->backing_object->shadow_head, 1047 object, shadow_list); 1048 vm_object_reference(object->backing_object = backing_object->backing_object); 1049 if (object->backing_object) 1050 TAILQ_INSERT_TAIL(&object->backing_object->shadow_head, 1051 object, shadow_list); 1052 object->backing_object_offset += backing_object->backing_object_offset; 1053 1054 /* 1055 * Drop the reference count on backing_object. Since 1056 * its ref_count was at least 2, it will not vanish; 1057 * so we don't need to call vm_object_deallocate. 1058 */ 1059 if (backing_object->ref_count == 1) 1060 printf("should have called obj deallocate\n"); 1061 backing_object->ref_count--; 1062 1063 object_bypasses++; 1064 1065 } 1066 1067 /* 1068 * Try again with this object's new backing object. 1069 */ 1070 } 1071} 1072 1073/* 1074 * vm_object_page_remove: [internal] 1075 * 1076 * Removes all physical pages in the specified 1077 * object range from the object's list of pages. 1078 * 1079 * The object must be locked. 1080 */ 1081void 1082vm_object_page_remove(object, start, end, clean_only) 1083 register vm_object_t object; 1084 register vm_pindex_t start; 1085 register vm_pindex_t end; 1086 boolean_t clean_only; 1087{ 1088 register vm_page_t p, next; 1089 unsigned int size; 1090 int s; 1091 1092 if (object == NULL) 1093 return; 1094 1095 object->paging_in_progress++; 1096again: 1097 size = end - start; 1098 if (size > 4 || size >= object->size / 4) { 1099 for (p = object->memq.tqh_first; p != NULL; p = next) { 1100 next = p->listq.tqe_next; 1101 if ((start <= p->pindex) && (p->pindex < end)) { 1102 s = splhigh(); 1103 if (p->bmapped) { 1104 splx(s); 1105 continue; 1106 } 1107 if ((p->flags & PG_BUSY) || p->busy) { 1108 p->flags |= PG_WANTED; 1109 tsleep(p, PVM, "vmopar", 0); 1110 splx(s); 1111 goto again; 1112 } 1113 splx(s); 1114 if (clean_only) { 1115 vm_page_test_dirty(p); 1116 if (p->valid & p->dirty) 1117 continue; 1118 } 1119 vm_page_protect(p, VM_PROT_NONE); 1120 PAGE_WAKEUP(p); 1121 vm_page_free(p); 1122 } 1123 } 1124 } else { 1125 while (size > 0) { 1126 while ((p = vm_page_lookup(object, start)) != 0) { 1127 s = splhigh(); 1128 if (p->bmapped) { 1129 splx(s); 1130 break; 1131 } 1132 if ((p->flags & PG_BUSY) || p->busy) { 1133 p->flags |= PG_WANTED; 1134 tsleep(p, PVM, "vmopar", 0); 1135 splx(s); 1136 goto again; 1137 } 1138 splx(s); 1139 if (clean_only) { 1140 vm_page_test_dirty(p); 1141 if (p->valid & p->dirty) 1142 continue; 1143 } 1144 vm_page_protect(p, VM_PROT_NONE); 1145 PAGE_WAKEUP(p); 1146 vm_page_free(p); 1147 } 1148 start += 1; 1149 size -= 1; 1150 } 1151 } 1152 vm_object_pip_wakeup(object); 1153} 1154 1155/* 1156 * Routine: vm_object_coalesce 1157 * Function: Coalesces two objects backing up adjoining 1158 * regions of memory into a single object. 1159 * 1160 * returns TRUE if objects were combined. 1161 * 1162 * NOTE: Only works at the moment if the second object is NULL - 1163 * if it's not, which object do we lock first? 1164 * 1165 * Parameters: 1166 * prev_object First object to coalesce 1167 * prev_offset Offset into prev_object 1168 * next_object Second object into coalesce 1169 * next_offset Offset into next_object 1170 * 1171 * prev_size Size of reference to prev_object 1172 * next_size Size of reference to next_object 1173 * 1174 * Conditions: 1175 * The object must *not* be locked. 1176 */ 1177boolean_t 1178vm_object_coalesce(prev_object, prev_pindex, prev_size, next_size) 1179 register vm_object_t prev_object; 1180 vm_pindex_t prev_pindex; 1181 vm_size_t prev_size, next_size; 1182{ 1183 vm_size_t newsize; 1184 1185 if (prev_object == NULL) { 1186 return (TRUE); 1187 } 1188 1189 /* 1190 * Try to collapse the object first 1191 */ 1192 vm_object_collapse(prev_object); 1193 1194 /* 1195 * Can't coalesce if: . more than one reference . paged out . shadows 1196 * another object . has a copy elsewhere (any of which mean that the 1197 * pages not mapped to prev_entry may be in use anyway) 1198 */ 1199 1200 if (prev_object->ref_count > 1 || 1201 prev_object->type != OBJT_DEFAULT || 1202 prev_object->backing_object != NULL) { 1203 return (FALSE); 1204 } 1205 1206 prev_size >>= PAGE_SHIFT; 1207 next_size >>= PAGE_SHIFT; 1208 /* 1209 * Remove any pages that may still be in the object from a previous 1210 * deallocation. 1211 */ 1212 1213 vm_object_page_remove(prev_object, 1214 prev_pindex + prev_size, 1215 prev_pindex + prev_size + next_size, FALSE); 1216 1217 /* 1218 * Extend the object if necessary. 1219 */ 1220 newsize = prev_pindex + prev_size + next_size; 1221 if (newsize > prev_object->size) 1222 prev_object->size = newsize; 1223 1224 return (TRUE); 1225} 1226 1227#ifdef DDB 1228 1229static int 1230_vm_object_in_map(map, object, entry) 1231 vm_map_t map; 1232 vm_object_t object; 1233 vm_map_entry_t entry; 1234{ 1235 vm_map_t tmpm; 1236 vm_map_entry_t tmpe; 1237 vm_object_t obj; 1238 int entcount; 1239 1240 if (map == 0) 1241 return 0; 1242 1243 if (entry == 0) { 1244 tmpe = map->header.next; 1245 entcount = map->nentries; 1246 while (entcount-- && (tmpe != &map->header)) { 1247 if( _vm_object_in_map(map, object, tmpe)) { 1248 return 1; 1249 } 1250 tmpe = tmpe->next; 1251 } 1252 } else if (entry->is_sub_map || entry->is_a_map) { 1253 tmpm = entry->object.share_map; 1254 tmpe = tmpm->header.next; 1255 entcount = tmpm->nentries; 1256 while (entcount-- && tmpe != &tmpm->header) { 1257 if( _vm_object_in_map(tmpm, object, tmpe)) { 1258 return 1; 1259 } 1260 tmpe = tmpe->next; 1261 } 1262 } else if (obj = entry->object.vm_object) { 1263 for(; obj; obj=obj->backing_object) 1264 if( obj == object) { 1265 return 1; 1266 } 1267 } 1268 return 0; 1269} 1270 1271static int 1272vm_object_in_map( object) 1273 vm_object_t object; 1274{ 1275 struct proc *p; 1276 for (p = (struct proc *) allproc; p != NULL; p = p->p_next) { 1277 if( !p->p_vmspace /* || (p->p_flag & (P_SYSTEM|P_WEXIT)) */) 1278 continue; 1279/* 1280 if (p->p_stat != SRUN && p->p_stat != SSLEEP) { 1281 continue; 1282 } 1283*/ 1284 if( _vm_object_in_map(&p->p_vmspace->vm_map, object, 0)) 1285 return 1; 1286 } 1287 if( _vm_object_in_map( kernel_map, object, 0)) 1288 return 1; 1289 if( _vm_object_in_map( kmem_map, object, 0)) 1290 return 1; 1291 if( _vm_object_in_map( pager_map, object, 0)) 1292 return 1; 1293 if( _vm_object_in_map( buffer_map, object, 0)) 1294 return 1; 1295 if( _vm_object_in_map( io_map, object, 0)) 1296 return 1; 1297 if( _vm_object_in_map( phys_map, object, 0)) 1298 return 1; 1299 if( _vm_object_in_map( mb_map, object, 0)) 1300 return 1; 1301 if( _vm_object_in_map( u_map, object, 0)) 1302 return 1; 1303 return 0; 1304} 1305 1306 1307void 1308vm_object_check() { 1309 vm_object_t object; 1310 1311 /* 1312 * make sure that internal objs are in a map somewhere 1313 * and none have zero ref counts. 1314 */ 1315 for (object = vm_object_list.tqh_first; 1316 object != NULL; 1317 object = object->object_list.tqe_next) { 1318 if (object->handle == NULL && 1319 (object->type == OBJT_DEFAULT || object->type == OBJT_SWAP)) { 1320 if (object->ref_count == 0) { 1321 printf("vmochk: internal obj has zero ref count: %d\n", 1322 object->size); 1323 } 1324 if (!vm_object_in_map(object)) { 1325 printf("vmochk: internal obj is not in a map: " 1326 "ref: %d, size: %d: 0x%x, backing_object: 0x%x\n", 1327 object->ref_count, object->size, 1328 object->size, object->backing_object); 1329 } 1330 } 1331 } 1332} 1333 1334/* 1335 * vm_object_print: [ debug ] 1336 */ 1337void 1338vm_object_print(iobject, full, dummy3, dummy4) 1339 /* db_expr_t */ int iobject; 1340 boolean_t full; 1341 /* db_expr_t */ int dummy3; 1342 char *dummy4; 1343{ 1344 vm_object_t object = (vm_object_t)iobject; /* XXX */ 1345 register vm_page_t p; 1346 1347 register int count; 1348 1349 if (object == NULL) 1350 return; 1351 1352 iprintf("Object 0x%x: size=0x%x, res=%d, ref=%d, ", 1353 (int) object, (int) object->size, 1354 object->resident_page_count, object->ref_count); 1355 printf("offset=0x%x, backing_object=(0x%x)+0x%x\n", 1356 (int) object->paging_offset, 1357 (int) object->backing_object, (int) object->backing_object_offset); 1358 printf("cache: next=%p, prev=%p\n", 1359 object->cached_list.tqe_next, object->cached_list.tqe_prev); 1360 1361 if (!full) 1362 return; 1363 1364 indent += 2; 1365 count = 0; 1366 for (p = object->memq.tqh_first; p != NULL; p = p->listq.tqe_next) { 1367 if (count == 0) 1368 iprintf("memory:="); 1369 else if (count == 6) { 1370 printf("\n"); 1371 iprintf(" ..."); 1372 count = 0; 1373 } else 1374 printf(","); 1375 count++; 1376 1377 printf("(off=0x%lx,page=0x%lx)", 1378 (u_long) p->pindex, (u_long) VM_PAGE_TO_PHYS(p)); 1379 } 1380 if (count != 0) 1381 printf("\n"); 1382 indent -= 2; 1383} 1384#endif /* DDB */ 1385