vm_object.c revision 17334
1/* 2 * Copyright (c) 1991, 1993 3 * The Regents of the University of California. All rights reserved. 4 * 5 * This code is derived from software contributed to Berkeley by 6 * The Mach Operating System project at Carnegie-Mellon University. 7 * 8 * Redistribution and use in source and binary forms, with or without 9 * modification, are permitted provided that the following conditions 10 * are met: 11 * 1. Redistributions of source code must retain the above copyright 12 * notice, this list of conditions and the following disclaimer. 13 * 2. Redistributions in binary form must reproduce the above copyright 14 * notice, this list of conditions and the following disclaimer in the 15 * documentation and/or other materials provided with the distribution. 16 * 3. All advertising materials mentioning features or use of this software 17 * must display the following acknowledgement: 18 * This product includes software developed by the University of 19 * California, Berkeley and its contributors. 20 * 4. Neither the name of the University nor the names of its contributors 21 * may be used to endorse or promote products derived from this software 22 * without specific prior written permission. 23 * 24 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 25 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 26 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 27 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 28 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 29 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 30 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 31 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 32 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 33 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 34 * SUCH DAMAGE. 35 * 36 * from: @(#)vm_object.c 8.5 (Berkeley) 3/22/94 37 * 38 * 39 * Copyright (c) 1987, 1990 Carnegie-Mellon University. 40 * All rights reserved. 41 * 42 * Authors: Avadis Tevanian, Jr., Michael Wayne Young 43 * 44 * Permission to use, copy, modify and distribute this software and 45 * its documentation is hereby granted, provided that both the copyright 46 * notice and this permission notice appear in all copies of the 47 * software, derivative works or modified versions, and any portions 48 * thereof, and that both notices appear in supporting documentation. 49 * 50 * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS" 51 * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND 52 * FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE. 53 * 54 * Carnegie Mellon requests users of this software to return to 55 * 56 * Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU 57 * School of Computer Science 58 * Carnegie Mellon University 59 * Pittsburgh PA 15213-3890 60 * 61 * any improvements or extensions that they make and grant Carnegie the 62 * rights to redistribute these changes. 63 * 64 * $Id: vm_object.c,v 1.77 1996/07/27 03:24:03 dyson Exp $ 65 */ 66 67/* 68 * Virtual memory object module. 69 */ 70#include "opt_ddb.h" 71 72#include <sys/param.h> 73#include <sys/systm.h> 74#include <sys/kernel.h> 75#include <sys/proc.h> /* for curproc, pageproc */ 76#include <sys/malloc.h> 77#include <sys/vnode.h> 78#include <sys/mount.h> 79#include <sys/vmmeter.h> 80#include <sys/mman.h> 81 82#include <vm/vm.h> 83#include <vm/vm_param.h> 84#include <vm/vm_prot.h> 85#include <vm/lock.h> 86#include <vm/pmap.h> 87#include <vm/vm_map.h> 88#include <vm/vm_object.h> 89#include <vm/vm_page.h> 90#include <vm/vm_pageout.h> 91#include <vm/vm_pager.h> 92#include <vm/swap_pager.h> 93#include <vm/vm_kern.h> 94#include <vm/vm_extern.h> 95 96#ifdef DDB 97static void DDB_vm_object_check __P((void)); 98#endif 99 100static void _vm_object_allocate __P((objtype_t, vm_size_t, vm_object_t)); 101#ifdef DDB 102static int _vm_object_in_map __P((vm_map_t map, vm_object_t object, 103 vm_map_entry_t entry)); 104static int vm_object_in_map __P((vm_object_t object)); 105#endif 106static void vm_object_qcollapse __P((vm_object_t object)); 107#ifdef not_used 108static void vm_object_deactivate_pages __P((vm_object_t)); 109#endif 110static void vm_object_terminate __P((vm_object_t)); 111static void vm_object_cache_trim __P((void)); 112 113/* 114 * Virtual memory objects maintain the actual data 115 * associated with allocated virtual memory. A given 116 * page of memory exists within exactly one object. 117 * 118 * An object is only deallocated when all "references" 119 * are given up. Only one "reference" to a given 120 * region of an object should be writeable. 121 * 122 * Associated with each object is a list of all resident 123 * memory pages belonging to that object; this list is 124 * maintained by the "vm_page" module, and locked by the object's 125 * lock. 126 * 127 * Each object also records a "pager" routine which is 128 * used to retrieve (and store) pages to the proper backing 129 * storage. In addition, objects may be backed by other 130 * objects from which they were virtual-copied. 131 * 132 * The only items within the object structure which are 133 * modified after time of creation are: 134 * reference count locked by object's lock 135 * pager routine locked by object's lock 136 * 137 */ 138 139int vm_object_cache_max; 140struct object_q vm_object_cached_list; 141static int vm_object_cached; 142struct object_q vm_object_list; 143static long vm_object_count; 144vm_object_t kernel_object; 145vm_object_t kmem_object; 146static struct vm_object kernel_object_store; 147static struct vm_object kmem_object_store; 148extern int vm_pageout_page_count; 149 150static long object_collapses; 151static long object_bypasses; 152 153static void 154_vm_object_allocate(type, size, object) 155 objtype_t type; 156 vm_size_t size; 157 register vm_object_t object; 158{ 159 TAILQ_INIT(&object->memq); 160 TAILQ_INIT(&object->shadow_head); 161 162 object->type = type; 163 object->size = size; 164 object->ref_count = 1; 165 object->flags = 0; 166 object->behavior = OBJ_NORMAL; 167 object->paging_in_progress = 0; 168 object->resident_page_count = 0; 169 object->shadow_count = 0; 170 object->handle = NULL; 171 object->paging_offset = (vm_ooffset_t) 0; 172 object->backing_object = NULL; 173 object->backing_object_offset = (vm_ooffset_t) 0; 174 175 object->last_read = 0; 176 177 TAILQ_INSERT_TAIL(&vm_object_list, object, object_list); 178 vm_object_count++; 179} 180 181/* 182 * vm_object_init: 183 * 184 * Initialize the VM objects module. 185 */ 186void 187vm_object_init() 188{ 189 TAILQ_INIT(&vm_object_cached_list); 190 TAILQ_INIT(&vm_object_list); 191 vm_object_count = 0; 192 193 vm_object_cache_max = 84; 194 if (cnt.v_page_count > 1000) 195 vm_object_cache_max += (cnt.v_page_count - 1000) / 4; 196 197 kernel_object = &kernel_object_store; 198 _vm_object_allocate(OBJT_DEFAULT, OFF_TO_IDX(VM_MAX_KERNEL_ADDRESS - VM_MIN_KERNEL_ADDRESS), 199 kernel_object); 200 201 kmem_object = &kmem_object_store; 202 _vm_object_allocate(OBJT_DEFAULT, OFF_TO_IDX(VM_MAX_KERNEL_ADDRESS - VM_MIN_KERNEL_ADDRESS), 203 kmem_object); 204} 205 206/* 207 * vm_object_allocate: 208 * 209 * Returns a new object with the given size. 210 */ 211 212vm_object_t 213vm_object_allocate(type, size) 214 objtype_t type; 215 vm_size_t size; 216{ 217 register vm_object_t result; 218 219 result = (vm_object_t) 220 malloc((u_long) sizeof *result, M_VMOBJ, M_WAITOK); 221 222 223 _vm_object_allocate(type, size, result); 224 225 return (result); 226} 227 228 229/* 230 * vm_object_reference: 231 * 232 * Gets another reference to the given object. 233 */ 234inline void 235vm_object_reference(object) 236 register vm_object_t object; 237{ 238 if (object == NULL) 239 return; 240 241 if (object->ref_count == 0) { 242 if ((object->flags & OBJ_CANPERSIST) == 0) 243 panic("vm_object_reference: non-persistent object with 0 ref_count"); 244 TAILQ_REMOVE(&vm_object_cached_list, object, cached_list); 245 vm_object_cached--; 246 } 247 object->ref_count++; 248} 249 250/* 251 * vm_object_deallocate: 252 * 253 * Release a reference to the specified object, 254 * gained either through a vm_object_allocate 255 * or a vm_object_reference call. When all references 256 * are gone, storage associated with this object 257 * may be relinquished. 258 * 259 * No object may be locked. 260 */ 261void 262vm_object_deallocate(object) 263 vm_object_t object; 264{ 265 vm_object_t temp; 266 267 while (object != NULL) { 268 269 if (object->ref_count == 0) 270 panic("vm_object_deallocate: object deallocated too many times"); 271 272 /* 273 * Lose the reference 274 */ 275 object->ref_count--; 276 if (object->ref_count != 0) { 277 if ((object->ref_count == 1) && 278 (object->handle == NULL) && 279 (object->type == OBJT_DEFAULT || 280 object->type == OBJT_SWAP)) { 281 vm_object_t robject; 282 robject = TAILQ_FIRST(&object->shadow_head); 283 if ((robject != NULL) && 284 (robject->handle == NULL) && 285 (robject->type == OBJT_DEFAULT || 286 robject->type == OBJT_SWAP)) { 287 int s; 288 robject->ref_count += 2; 289 object->ref_count += 2; 290 291 do { 292 s = splvm(); 293 while (robject->paging_in_progress) { 294 robject->flags |= OBJ_PIPWNT; 295 tsleep(robject, PVM, "objde1", 0); 296 } 297 298 while (object->paging_in_progress) { 299 object->flags |= OBJ_PIPWNT; 300 tsleep(object, PVM, "objde2", 0); 301 } 302 splx(s); 303 304 } while( object->paging_in_progress || robject->paging_in_progress); 305 306 object->ref_count -= 2; 307 robject->ref_count -= 2; 308 if( robject->ref_count == 0) { 309 robject->ref_count += 1; 310 object = robject; 311 continue; 312 } 313 vm_object_collapse(robject); 314 return; 315 } 316 } 317 /* 318 * If there are still references, then we are done. 319 */ 320 return; 321 } 322 323 if (object->type == OBJT_VNODE) { 324 struct vnode *vp = object->handle; 325 326 vp->v_flag &= ~VTEXT; 327 } 328 329 /* 330 * See if this object can persist and has some resident 331 * pages. If so, enter it in the cache. 332 */ 333 if (object->flags & OBJ_CANPERSIST) { 334 if (object->resident_page_count != 0) { 335 vm_object_page_clean(object, 0, 0 ,TRUE, TRUE); 336 TAILQ_INSERT_TAIL(&vm_object_cached_list, object, 337 cached_list); 338 vm_object_cached++; 339 340 vm_object_cache_trim(); 341 return; 342 } else { 343 object->flags &= ~OBJ_CANPERSIST; 344 } 345 } 346 347 /* 348 * Make sure no one uses us. 349 */ 350 object->flags |= OBJ_DEAD; 351 352 temp = object->backing_object; 353 if (temp) { 354 TAILQ_REMOVE(&temp->shadow_head, object, shadow_list); 355 --temp->shadow_count; 356 } 357 vm_object_terminate(object); 358 /* unlocks and deallocates object */ 359 object = temp; 360 } 361} 362 363/* 364 * vm_object_terminate actually destroys the specified object, freeing 365 * up all previously used resources. 366 * 367 * The object must be locked. 368 */ 369static void 370vm_object_terminate(object) 371 register vm_object_t object; 372{ 373 register vm_page_t p; 374 int s; 375 376 /* 377 * wait for the pageout daemon to be done with the object 378 */ 379 s = splvm(); 380 while (object->paging_in_progress) { 381 object->flags |= OBJ_PIPWNT; 382 tsleep(object, PVM, "objtrm", 0); 383 } 384 splx(s); 385 386 if (object->paging_in_progress != 0) 387 panic("vm_object_deallocate: pageout in progress"); 388 389 /* 390 * Clean and free the pages, as appropriate. All references to the 391 * object are gone, so we don't need to lock it. 392 */ 393 if (object->type == OBJT_VNODE) { 394 struct vnode *vp = object->handle; 395 396 VOP_LOCK(vp); 397 vm_object_page_clean(object, 0, 0, TRUE, FALSE); 398 vinvalbuf(vp, V_SAVE, NOCRED, NULL, 0, 0); 399 VOP_UNLOCK(vp); 400 } 401 /* 402 * Now free the pages. For internal objects, this also removes them 403 * from paging queues. 404 */ 405 while ((p = TAILQ_FIRST(&object->memq)) != NULL) { 406 if (p->flags & PG_BUSY) 407 printf("vm_object_terminate: freeing busy page\n"); 408 PAGE_WAKEUP(p); 409 vm_page_free(p); 410 cnt.v_pfree++; 411 } 412 413 /* 414 * Let the pager know object is dead. 415 */ 416 vm_pager_deallocate(object); 417 418 TAILQ_REMOVE(&vm_object_list, object, object_list); 419 vm_object_count--; 420 421 wakeup(object); 422 423 /* 424 * Free the space for the object. 425 */ 426 free((caddr_t) object, M_VMOBJ); 427} 428 429/* 430 * vm_object_page_clean 431 * 432 * Clean all dirty pages in the specified range of object. 433 * Leaves page on whatever queue it is currently on. 434 * 435 * Odd semantics: if start == end, we clean everything. 436 * 437 * The object must be locked. 438 */ 439 440void 441vm_object_page_clean(object, start, end, syncio, lockflag) 442 vm_object_t object; 443 vm_pindex_t start; 444 vm_pindex_t end; 445 boolean_t syncio; 446 boolean_t lockflag; 447{ 448 register vm_page_t p, np, tp; 449 register vm_offset_t tstart, tend; 450 vm_pindex_t pi; 451 int s; 452 struct vnode *vp; 453 int runlen; 454 int maxf; 455 int chkb; 456 int maxb; 457 int i; 458 vm_page_t maf[vm_pageout_page_count]; 459 vm_page_t mab[vm_pageout_page_count]; 460 vm_page_t ma[vm_pageout_page_count]; 461 462 if (object->type != OBJT_VNODE || 463 (object->flags & OBJ_MIGHTBEDIRTY) == 0) 464 return; 465 466 vp = object->handle; 467 468 if (lockflag) 469 VOP_LOCK(vp); 470 object->flags |= OBJ_CLEANING; 471 472 tstart = start; 473 if (end == 0) { 474 tend = object->size; 475 } else { 476 tend = end; 477 } 478 if ((tstart == 0) && (tend == object->size)) { 479 object->flags &= ~(OBJ_WRITEABLE|OBJ_MIGHTBEDIRTY); 480 } 481 for(p = TAILQ_FIRST(&object->memq); p; p = TAILQ_NEXT(p, listq)) 482 p->flags |= PG_CLEANCHK; 483 484rescan: 485 for(p = TAILQ_FIRST(&object->memq); p; p = np) { 486 np = TAILQ_NEXT(p, listq); 487 488 pi = p->pindex; 489 if (((p->flags & PG_CLEANCHK) == 0) || 490 (pi < tstart) || (pi >= tend) || 491 (p->valid == 0) || (p->queue == PQ_CACHE)) { 492 p->flags &= ~PG_CLEANCHK; 493 continue; 494 } 495 496 vm_page_test_dirty(p); 497 if ((p->dirty & p->valid) == 0) { 498 p->flags &= ~PG_CLEANCHK; 499 continue; 500 } 501 502 s = splvm(); 503 if ((p->flags & PG_BUSY) || p->busy) { 504 p->flags |= PG_WANTED|PG_REFERENCED; 505 tsleep(p, PVM, "vpcwai", 0); 506 splx(s); 507 goto rescan; 508 } 509 splx(s); 510 511 s = splvm(); 512 maxf = 0; 513 for(i=1;i<vm_pageout_page_count;i++) { 514 if (tp = vm_page_lookup(object, pi + i)) { 515 if ((tp->flags & PG_BUSY) || 516 (tp->flags & PG_CLEANCHK) == 0) 517 break; 518 if (tp->queue == PQ_CACHE) { 519 tp->flags &= ~PG_CLEANCHK; 520 break; 521 } 522 vm_page_test_dirty(tp); 523 if ((tp->dirty & tp->valid) == 0) { 524 tp->flags &= ~PG_CLEANCHK; 525 break; 526 } 527 maf[ i - 1 ] = tp; 528 maxf++; 529 continue; 530 } 531 break; 532 } 533 534 maxb = 0; 535 chkb = vm_pageout_page_count - maxf; 536 if (chkb) { 537 for(i = 1; i < chkb;i++) { 538 if (tp = vm_page_lookup(object, pi - i)) { 539 if ((tp->flags & PG_BUSY) || 540 (tp->flags & PG_CLEANCHK) == 0) 541 break; 542 if (tp->queue == PQ_CACHE) { 543 tp->flags &= ~PG_CLEANCHK; 544 break; 545 } 546 vm_page_test_dirty(tp); 547 if ((tp->dirty & tp->valid) == 0) { 548 tp->flags &= ~PG_CLEANCHK; 549 break; 550 } 551 mab[ i - 1 ] = tp; 552 maxb++; 553 continue; 554 } 555 break; 556 } 557 } 558 559 for(i=0;i<maxb;i++) { 560 int index = (maxb - i) - 1; 561 ma[index] = mab[i]; 562 ma[index]->flags |= PG_BUSY; 563 ma[index]->flags &= ~PG_CLEANCHK; 564 vm_page_protect(ma[index], VM_PROT_READ); 565 } 566 vm_page_protect(p, VM_PROT_READ); 567 p->flags |= PG_BUSY; 568 p->flags &= ~PG_CLEANCHK; 569 ma[maxb] = p; 570 for(i=0;i<maxf;i++) { 571 int index = (maxb + i) + 1; 572 ma[index] = maf[i]; 573 ma[index]->flags |= PG_BUSY; 574 ma[index]->flags &= ~PG_CLEANCHK; 575 vm_page_protect(ma[index], VM_PROT_READ); 576 } 577 runlen = maxb + maxf + 1; 578 splx(s); 579 vm_pageout_flush(ma, runlen, 0); 580 goto rescan; 581 } 582 583 VOP_FSYNC(vp, NULL, syncio, curproc); 584 585 if (lockflag) 586 VOP_UNLOCK(vp); 587 object->flags &= ~OBJ_CLEANING; 588 return; 589} 590 591#ifdef not_used 592/* XXX I cannot tell if this should be an exported symbol */ 593/* 594 * vm_object_deactivate_pages 595 * 596 * Deactivate all pages in the specified object. (Keep its pages 597 * in memory even though it is no longer referenced.) 598 * 599 * The object must be locked. 600 */ 601static void 602vm_object_deactivate_pages(object) 603 register vm_object_t object; 604{ 605 register vm_page_t p, next; 606 607 for (p = TAILQ_FIRST(&object->memq); p != NULL; p = next) { 608 next = TAILQ_NEXT(p, listq); 609 vm_page_deactivate(p); 610 } 611} 612#endif 613 614/* 615 * Trim the object cache to size. 616 */ 617static void 618vm_object_cache_trim() 619{ 620 register vm_object_t object; 621 622 while (vm_object_cached > vm_object_cache_max) { 623 object = TAILQ_FIRST(&vm_object_cached_list); 624 625 vm_object_reference(object); 626 pager_cache(object, FALSE); 627 } 628} 629 630 631/* 632 * vm_object_pmap_copy: 633 * 634 * Makes all physical pages in the specified 635 * object range copy-on-write. No writeable 636 * references to these pages should remain. 637 * 638 * The object must *not* be locked. 639 */ 640void 641vm_object_pmap_copy(object, start, end) 642 register vm_object_t object; 643 register vm_pindex_t start; 644 register vm_pindex_t end; 645{ 646 register vm_page_t p; 647 648 if (object == NULL || (object->flags & OBJ_WRITEABLE) == 0) 649 return; 650 651 for (p = TAILQ_FIRST(&object->memq); 652 p != NULL; 653 p = TAILQ_NEXT(p, listq)) { 654 vm_page_protect(p, VM_PROT_READ); 655 } 656 657 object->flags &= ~OBJ_WRITEABLE; 658} 659 660/* 661 * vm_object_pmap_remove: 662 * 663 * Removes all physical pages in the specified 664 * object range from all physical maps. 665 * 666 * The object must *not* be locked. 667 */ 668void 669vm_object_pmap_remove(object, start, end) 670 register vm_object_t object; 671 register vm_pindex_t start; 672 register vm_pindex_t end; 673{ 674 register vm_page_t p; 675 if (object == NULL) 676 return; 677 for (p = TAILQ_FIRST(&object->memq); 678 p != NULL; 679 p = TAILQ_NEXT(p, listq)) { 680 if (p->pindex >= start && p->pindex < end) 681 vm_page_protect(p, VM_PROT_NONE); 682 } 683} 684 685/* 686 * vm_object_madvise: 687 * 688 * Implements the madvise function at the object/page level. 689 */ 690void 691vm_object_madvise(object, pindex, count, advise) 692 vm_object_t object; 693 vm_pindex_t pindex; 694 int count; 695 int advise; 696{ 697 vm_pindex_t end; 698 vm_page_t m; 699 700 if (object == NULL) 701 return; 702 703 end = pindex + count; 704 705 for (; pindex < end; pindex += 1) { 706 m = vm_page_lookup(object, pindex); 707 708 /* 709 * If the page is busy or not in a normal active state, 710 * we skip it. Things can break if we mess with pages 711 * in any of the below states. 712 */ 713 if (m == NULL || m->busy || (m->flags & PG_BUSY) || 714 m->hold_count || m->wire_count || 715 m->valid != VM_PAGE_BITS_ALL) 716 continue; 717 718 if (advise == MADV_WILLNEED) { 719 if (m->queue != PQ_ACTIVE) 720 vm_page_activate(m); 721 } else if ((advise == MADV_DONTNEED) || 722 ((advise == MADV_FREE) && 723 ((object->type != OBJT_DEFAULT) && 724 (object->type != OBJT_SWAP)))) { 725 vm_page_deactivate(m); 726 } else if (advise == MADV_FREE) { 727 /* 728 * Force a demand-zero on next ref 729 */ 730 if (object->type == OBJT_SWAP) 731 swap_pager_dmzspace(object, m->pindex, 1); 732 vm_page_protect(m, VM_PROT_NONE); 733 vm_page_free(m); 734 } 735 } 736} 737 738/* 739 * vm_object_shadow: 740 * 741 * Create a new object which is backed by the 742 * specified existing object range. The source 743 * object reference is deallocated. 744 * 745 * The new object and offset into that object 746 * are returned in the source parameters. 747 */ 748 749void 750vm_object_shadow(object, offset, length) 751 vm_object_t *object; /* IN/OUT */ 752 vm_ooffset_t *offset; /* IN/OUT */ 753 vm_size_t length; 754{ 755 register vm_object_t source; 756 register vm_object_t result; 757 758 source = *object; 759 760 /* 761 * Allocate a new object with the given length 762 */ 763 764 if ((result = vm_object_allocate(OBJT_DEFAULT, length)) == NULL) 765 panic("vm_object_shadow: no object for shadowing"); 766 767 /* 768 * The new object shadows the source object, adding a reference to it. 769 * Our caller changes his reference to point to the new object, 770 * removing a reference to the source object. Net result: no change 771 * of reference count. 772 */ 773 result->backing_object = source; 774 if (source) { 775 TAILQ_INSERT_TAIL(&source->shadow_head, result, shadow_list); 776 ++source->shadow_count; 777 } 778 779 /* 780 * Store the offset into the source object, and fix up the offset into 781 * the new object. 782 */ 783 784 result->backing_object_offset = *offset; 785 786 /* 787 * Return the new things 788 */ 789 790 *offset = 0; 791 *object = result; 792} 793 794 795/* 796 * this version of collapse allows the operation to occur earlier and 797 * when paging_in_progress is true for an object... This is not a complete 798 * operation, but should plug 99.9% of the rest of the leaks. 799 */ 800static void 801vm_object_qcollapse(object) 802 register vm_object_t object; 803{ 804 register vm_object_t backing_object; 805 register vm_pindex_t backing_offset_index, paging_offset_index; 806 vm_pindex_t backing_object_paging_offset_index; 807 vm_pindex_t new_pindex; 808 register vm_page_t p, pp; 809 register vm_size_t size; 810 811 backing_object = object->backing_object; 812 if (backing_object->ref_count != 1) 813 return; 814 815 backing_object->ref_count += 2; 816 817 backing_offset_index = OFF_TO_IDX(object->backing_object_offset); 818 backing_object_paging_offset_index = OFF_TO_IDX(backing_object->paging_offset); 819 paging_offset_index = OFF_TO_IDX(object->paging_offset); 820 size = object->size; 821 p = TAILQ_FIRST(&backing_object->memq); 822 while (p) { 823 vm_page_t next; 824 825 next = TAILQ_NEXT(p, listq); 826 if ((p->flags & (PG_BUSY | PG_FICTITIOUS)) || 827 (p->queue == PQ_CACHE) || !p->valid || p->hold_count || p->wire_count || p->busy) { 828 p = next; 829 continue; 830 } 831 new_pindex = p->pindex - backing_offset_index; 832 if (p->pindex < backing_offset_index || 833 new_pindex >= size) { 834 if (backing_object->type == OBJT_SWAP) 835 swap_pager_freespace(backing_object, 836 backing_object_paging_offset_index+p->pindex, 837 1); 838 vm_page_protect(p, VM_PROT_NONE); 839 vm_page_free(p); 840 } else { 841 pp = vm_page_lookup(object, new_pindex); 842 if (pp != NULL || (object->type == OBJT_SWAP && vm_pager_has_page(object, 843 paging_offset_index + new_pindex, NULL, NULL))) { 844 if (backing_object->type == OBJT_SWAP) 845 swap_pager_freespace(backing_object, 846 backing_object_paging_offset_index + p->pindex, 1); 847 vm_page_protect(p, VM_PROT_NONE); 848 vm_page_free(p); 849 } else { 850 if (backing_object->type == OBJT_SWAP) 851 swap_pager_freespace(backing_object, 852 backing_object_paging_offset_index + p->pindex, 1); 853 vm_page_rename(p, object, new_pindex); 854 p->dirty = VM_PAGE_BITS_ALL; 855 } 856 } 857 p = next; 858 } 859 backing_object->ref_count -= 2; 860} 861 862/* 863 * vm_object_collapse: 864 * 865 * Collapse an object with the object backing it. 866 * Pages in the backing object are moved into the 867 * parent, and the backing object is deallocated. 868 */ 869void 870vm_object_collapse(object) 871 vm_object_t object; 872 873{ 874 vm_object_t backing_object; 875 vm_ooffset_t backing_offset; 876 vm_size_t size; 877 vm_pindex_t new_pindex, backing_offset_index; 878 vm_page_t p, pp; 879 880 while (TRUE) { 881 /* 882 * Verify that the conditions are right for collapse: 883 * 884 * The object exists and no pages in it are currently being paged 885 * out. 886 */ 887 if (object == NULL) 888 return; 889 890 /* 891 * Make sure there is a backing object. 892 */ 893 if ((backing_object = object->backing_object) == NULL) 894 return; 895 896 /* 897 * we check the backing object first, because it is most likely 898 * not collapsable. 899 */ 900 if (backing_object->handle != NULL || 901 (backing_object->type != OBJT_DEFAULT && 902 backing_object->type != OBJT_SWAP) || 903 (backing_object->flags & OBJ_DEAD) || 904 object->handle != NULL || 905 (object->type != OBJT_DEFAULT && 906 object->type != OBJT_SWAP) || 907 (object->flags & OBJ_DEAD)) { 908 return; 909 } 910 911 if (object->paging_in_progress != 0 || 912 backing_object->paging_in_progress != 0) { 913 vm_object_qcollapse(object); 914 return; 915 } 916 917 /* 918 * We know that we can either collapse the backing object (if 919 * the parent is the only reference to it) or (perhaps) remove 920 * the parent's reference to it. 921 */ 922 923 backing_offset = object->backing_object_offset; 924 backing_offset_index = OFF_TO_IDX(backing_offset); 925 size = object->size; 926 927 /* 928 * If there is exactly one reference to the backing object, we 929 * can collapse it into the parent. 930 */ 931 932 if (backing_object->ref_count == 1) { 933 934 backing_object->flags |= OBJ_DEAD; 935 /* 936 * We can collapse the backing object. 937 * 938 * Move all in-memory pages from backing_object to the 939 * parent. Pages that have been paged out will be 940 * overwritten by any of the parent's pages that 941 * shadow them. 942 */ 943 944 while ((p = TAILQ_FIRST(&backing_object->memq)) != 0) { 945 946 new_pindex = p->pindex - backing_offset_index; 947 948 /* 949 * If the parent has a page here, or if this 950 * page falls outside the parent, dispose of 951 * it. 952 * 953 * Otherwise, move it as planned. 954 */ 955 956 if (p->pindex < backing_offset_index || 957 new_pindex >= size) { 958 vm_page_protect(p, VM_PROT_NONE); 959 PAGE_WAKEUP(p); 960 vm_page_free(p); 961 } else { 962 pp = vm_page_lookup(object, new_pindex); 963 if (pp != NULL || (object->type == OBJT_SWAP && vm_pager_has_page(object, 964 OFF_TO_IDX(object->paging_offset) + new_pindex, NULL, NULL))) { 965 vm_page_protect(p, VM_PROT_NONE); 966 PAGE_WAKEUP(p); 967 vm_page_free(p); 968 } else { 969 vm_page_rename(p, object, new_pindex); 970 } 971 } 972 } 973 974 /* 975 * Move the pager from backing_object to object. 976 */ 977 978 if (backing_object->type == OBJT_SWAP) { 979 backing_object->paging_in_progress++; 980 if (object->type == OBJT_SWAP) { 981 object->paging_in_progress++; 982 /* 983 * copy shadow object pages into ours 984 * and destroy unneeded pages in 985 * shadow object. 986 */ 987 swap_pager_copy( 988 backing_object, 989 OFF_TO_IDX(backing_object->paging_offset), 990 object, 991 OFF_TO_IDX(object->paging_offset), 992 OFF_TO_IDX(object->backing_object_offset)); 993 vm_object_pip_wakeup(object); 994 } else { 995 object->paging_in_progress++; 996 /* 997 * move the shadow backing_object's pager data to 998 * "object" and convert "object" type to OBJT_SWAP. 999 */ 1000 object->type = OBJT_SWAP; 1001 object->un_pager.swp.swp_nblocks = 1002 backing_object->un_pager.swp.swp_nblocks; 1003 object->un_pager.swp.swp_allocsize = 1004 backing_object->un_pager.swp.swp_allocsize; 1005 object->un_pager.swp.swp_blocks = 1006 backing_object->un_pager.swp.swp_blocks; 1007 object->un_pager.swp.swp_poip = /* XXX */ 1008 backing_object->un_pager.swp.swp_poip; 1009 object->paging_offset = backing_object->paging_offset + backing_offset; 1010 TAILQ_INSERT_TAIL(&swap_pager_un_object_list, object, pager_object_list); 1011 1012 /* 1013 * Convert backing object from OBJT_SWAP to 1014 * OBJT_DEFAULT. XXX - only the TAILQ_REMOVE is 1015 * actually necessary. 1016 */ 1017 backing_object->type = OBJT_DEFAULT; 1018 TAILQ_REMOVE(&swap_pager_un_object_list, backing_object, pager_object_list); 1019 /* 1020 * free unnecessary blocks 1021 */ 1022 swap_pager_freespace(object, 0, 1023 OFF_TO_IDX(object->paging_offset)); 1024 vm_object_pip_wakeup(object); 1025 } 1026 1027 vm_object_pip_wakeup(backing_object); 1028 } 1029 /* 1030 * Object now shadows whatever backing_object did. 1031 * Note that the reference to backing_object->backing_object 1032 * moves from within backing_object to within object. 1033 */ 1034 1035 TAILQ_REMOVE(&object->backing_object->shadow_head, object, 1036 shadow_list); 1037 --object->backing_object->shadow_count; 1038 if (backing_object->backing_object) { 1039 TAILQ_REMOVE(&backing_object->backing_object->shadow_head, 1040 backing_object, shadow_list); 1041 --backing_object->backing_object->shadow_count; 1042 } 1043 object->backing_object = backing_object->backing_object; 1044 if (object->backing_object) { 1045 TAILQ_INSERT_TAIL(&object->backing_object->shadow_head, 1046 object, shadow_list); 1047 ++object->backing_object->shadow_count; 1048 } 1049 1050 object->backing_object_offset += backing_object->backing_object_offset; 1051 /* 1052 * Discard backing_object. 1053 * 1054 * Since the backing object has no pages, no pager left, 1055 * and no object references within it, all that is 1056 * necessary is to dispose of it. 1057 */ 1058 1059 TAILQ_REMOVE(&vm_object_list, backing_object, 1060 object_list); 1061 vm_object_count--; 1062 1063 free((caddr_t) backing_object, M_VMOBJ); 1064 1065 object_collapses++; 1066 } else { 1067 /* 1068 * If all of the pages in the backing object are 1069 * shadowed by the parent object, the parent object no 1070 * longer has to shadow the backing object; it can 1071 * shadow the next one in the chain. 1072 * 1073 * The backing object must not be paged out - we'd have 1074 * to check all of the paged-out pages, as well. 1075 */ 1076 1077 if (backing_object->type != OBJT_DEFAULT) { 1078 return; 1079 } 1080 /* 1081 * Should have a check for a 'small' number of pages 1082 * here. 1083 */ 1084 1085 for (p = TAILQ_FIRST(&backing_object->memq); p; p = TAILQ_NEXT(p, listq)) { 1086 new_pindex = p->pindex - backing_offset_index; 1087 1088 /* 1089 * If the parent has a page here, or if this 1090 * page falls outside the parent, keep going. 1091 * 1092 * Otherwise, the backing_object must be left in 1093 * the chain. 1094 */ 1095 1096 if (p->pindex >= backing_offset_index && 1097 new_pindex <= size) { 1098 1099 pp = vm_page_lookup(object, new_pindex); 1100 1101 if ((pp == NULL || pp->valid == 0) && 1102 !vm_pager_has_page(object, OFF_TO_IDX(object->paging_offset) + new_pindex, NULL, NULL)) { 1103 /* 1104 * Page still needed. Can't go any 1105 * further. 1106 */ 1107 return; 1108 } 1109 } 1110 } 1111 1112 /* 1113 * Make the parent shadow the next object in the 1114 * chain. Deallocating backing_object will not remove 1115 * it, since its reference count is at least 2. 1116 */ 1117 1118 TAILQ_REMOVE(&object->backing_object->shadow_head, 1119 object, shadow_list); 1120 --object->backing_object->shadow_count; 1121 vm_object_reference(object->backing_object = backing_object->backing_object); 1122 if (object->backing_object) { 1123 TAILQ_INSERT_TAIL(&object->backing_object->shadow_head, 1124 object, shadow_list); 1125 ++object->backing_object->shadow_count; 1126 } 1127 object->backing_object_offset += backing_object->backing_object_offset; 1128 1129 /* 1130 * Drop the reference count on backing_object. Since 1131 * its ref_count was at least 2, it will not vanish; 1132 * so we don't need to call vm_object_deallocate. 1133 */ 1134 if (backing_object->ref_count == 1) 1135 printf("should have called obj deallocate\n"); 1136 backing_object->ref_count--; 1137 1138 object_bypasses++; 1139 1140 } 1141 1142 /* 1143 * Try again with this object's new backing object. 1144 */ 1145 } 1146} 1147 1148/* 1149 * vm_object_page_remove: [internal] 1150 * 1151 * Removes all physical pages in the specified 1152 * object range from the object's list of pages. 1153 * 1154 * The object must be locked. 1155 */ 1156void 1157vm_object_page_remove(object, start, end, clean_only) 1158 register vm_object_t object; 1159 register vm_pindex_t start; 1160 register vm_pindex_t end; 1161 boolean_t clean_only; 1162{ 1163 register vm_page_t p, next; 1164 unsigned int size; 1165 int s; 1166 1167 if (object == NULL) 1168 return; 1169 1170 object->paging_in_progress++; 1171again: 1172 size = end - start; 1173 if (size > 4 || size >= object->size / 4) { 1174 for (p = TAILQ_FIRST(&object->memq); p != NULL; p = next) { 1175 next = TAILQ_NEXT(p, listq); 1176 if ((start <= p->pindex) && (p->pindex < end)) { 1177 if (p->wire_count != 0) { 1178 vm_page_protect(p, VM_PROT_NONE); 1179 p->valid = 0; 1180 continue; 1181 } 1182 1183 /* 1184 * The busy flags are only cleared at 1185 * interrupt -- minimize the spl transitions 1186 */ 1187 if ((p->flags & PG_BUSY) || p->busy) { 1188 s = splvm(); 1189 if ((p->flags & PG_BUSY) || p->busy) { 1190 p->flags |= PG_WANTED; 1191 tsleep(p, PVM, "vmopar", 0); 1192 splx(s); 1193 goto again; 1194 } 1195 splx(s); 1196 } 1197 1198 if (clean_only) { 1199 vm_page_test_dirty(p); 1200 if (p->valid & p->dirty) 1201 continue; 1202 } 1203 vm_page_protect(p, VM_PROT_NONE); 1204 PAGE_WAKEUP(p); 1205 vm_page_free(p); 1206 } 1207 } 1208 } else { 1209 while (size > 0) { 1210 if ((p = vm_page_lookup(object, start)) != 0) { 1211 if (p->wire_count != 0) { 1212 p->valid = 0; 1213 vm_page_protect(p, VM_PROT_NONE); 1214 start += 1; 1215 size -= 1; 1216 continue; 1217 } 1218 /* 1219 * The busy flags are only cleared at 1220 * interrupt -- minimize the spl transitions 1221 */ 1222 if ((p->flags & PG_BUSY) || p->busy) { 1223 s = splvm(); 1224 if ((p->flags & PG_BUSY) || p->busy) { 1225 p->flags |= PG_WANTED; 1226 tsleep(p, PVM, "vmopar", 0); 1227 splx(s); 1228 goto again; 1229 } 1230 splx(s); 1231 } 1232 if (clean_only) { 1233 vm_page_test_dirty(p); 1234 if (p->valid & p->dirty) { 1235 start += 1; 1236 size -= 1; 1237 continue; 1238 } 1239 } 1240 vm_page_protect(p, VM_PROT_NONE); 1241 PAGE_WAKEUP(p); 1242 vm_page_free(p); 1243 } 1244 start += 1; 1245 size -= 1; 1246 } 1247 } 1248 vm_object_pip_wakeup(object); 1249} 1250 1251/* 1252 * Routine: vm_object_coalesce 1253 * Function: Coalesces two objects backing up adjoining 1254 * regions of memory into a single object. 1255 * 1256 * returns TRUE if objects were combined. 1257 * 1258 * NOTE: Only works at the moment if the second object is NULL - 1259 * if it's not, which object do we lock first? 1260 * 1261 * Parameters: 1262 * prev_object First object to coalesce 1263 * prev_offset Offset into prev_object 1264 * next_object Second object into coalesce 1265 * next_offset Offset into next_object 1266 * 1267 * prev_size Size of reference to prev_object 1268 * next_size Size of reference to next_object 1269 * 1270 * Conditions: 1271 * The object must *not* be locked. 1272 */ 1273boolean_t 1274vm_object_coalesce(prev_object, prev_pindex, prev_size, next_size) 1275 register vm_object_t prev_object; 1276 vm_pindex_t prev_pindex; 1277 vm_size_t prev_size, next_size; 1278{ 1279 vm_size_t newsize; 1280 1281 if (prev_object == NULL) { 1282 return (TRUE); 1283 } 1284 1285 if (prev_object->type != OBJT_DEFAULT) { 1286 return (FALSE); 1287 } 1288 1289 /* 1290 * Try to collapse the object first 1291 */ 1292 vm_object_collapse(prev_object); 1293 1294 /* 1295 * Can't coalesce if: . more than one reference . paged out . shadows 1296 * another object . has a copy elsewhere (any of which mean that the 1297 * pages not mapped to prev_entry may be in use anyway) 1298 */ 1299 1300 if (prev_object->ref_count > 1 || 1301 prev_object->backing_object != NULL) { 1302 return (FALSE); 1303 } 1304 1305 prev_size >>= PAGE_SHIFT; 1306 next_size >>= PAGE_SHIFT; 1307 /* 1308 * Remove any pages that may still be in the object from a previous 1309 * deallocation. 1310 */ 1311 1312 vm_object_page_remove(prev_object, 1313 prev_pindex + prev_size, 1314 prev_pindex + prev_size + next_size, FALSE); 1315 1316 /* 1317 * Extend the object if necessary. 1318 */ 1319 newsize = prev_pindex + prev_size + next_size; 1320 if (newsize > prev_object->size) 1321 prev_object->size = newsize; 1322 1323 return (TRUE); 1324} 1325 1326#ifdef DDB 1327 1328static int 1329_vm_object_in_map(map, object, entry) 1330 vm_map_t map; 1331 vm_object_t object; 1332 vm_map_entry_t entry; 1333{ 1334 vm_map_t tmpm; 1335 vm_map_entry_t tmpe; 1336 vm_object_t obj; 1337 int entcount; 1338 1339 if (map == 0) 1340 return 0; 1341 1342 if (entry == 0) { 1343 tmpe = map->header.next; 1344 entcount = map->nentries; 1345 while (entcount-- && (tmpe != &map->header)) { 1346 if( _vm_object_in_map(map, object, tmpe)) { 1347 return 1; 1348 } 1349 tmpe = tmpe->next; 1350 } 1351 } else if (entry->is_sub_map || entry->is_a_map) { 1352 tmpm = entry->object.share_map; 1353 tmpe = tmpm->header.next; 1354 entcount = tmpm->nentries; 1355 while (entcount-- && tmpe != &tmpm->header) { 1356 if( _vm_object_in_map(tmpm, object, tmpe)) { 1357 return 1; 1358 } 1359 tmpe = tmpe->next; 1360 } 1361 } else if (obj = entry->object.vm_object) { 1362 for(; obj; obj=obj->backing_object) 1363 if( obj == object) { 1364 return 1; 1365 } 1366 } 1367 return 0; 1368} 1369 1370static int 1371vm_object_in_map( object) 1372 vm_object_t object; 1373{ 1374 struct proc *p; 1375 for (p = allproc.lh_first; p != 0; p = p->p_list.le_next) { 1376 if( !p->p_vmspace /* || (p->p_flag & (P_SYSTEM|P_WEXIT)) */) 1377 continue; 1378 if( _vm_object_in_map(&p->p_vmspace->vm_map, object, 0)) 1379 return 1; 1380 } 1381 if( _vm_object_in_map( kernel_map, object, 0)) 1382 return 1; 1383 if( _vm_object_in_map( kmem_map, object, 0)) 1384 return 1; 1385 if( _vm_object_in_map( pager_map, object, 0)) 1386 return 1; 1387 if( _vm_object_in_map( buffer_map, object, 0)) 1388 return 1; 1389 if( _vm_object_in_map( io_map, object, 0)) 1390 return 1; 1391 if( _vm_object_in_map( phys_map, object, 0)) 1392 return 1; 1393 if( _vm_object_in_map( mb_map, object, 0)) 1394 return 1; 1395 if( _vm_object_in_map( u_map, object, 0)) 1396 return 1; 1397 return 0; 1398} 1399 1400 1401#ifdef DDB 1402static void 1403DDB_vm_object_check() 1404{ 1405 vm_object_t object; 1406 1407 /* 1408 * make sure that internal objs are in a map somewhere 1409 * and none have zero ref counts. 1410 */ 1411 for (object = TAILQ_FIRST(&vm_object_list); 1412 object != NULL; 1413 object = TAILQ_NEXT(object, object_list)) { 1414 if (object->handle == NULL && 1415 (object->type == OBJT_DEFAULT || object->type == OBJT_SWAP)) { 1416 if (object->ref_count == 0) { 1417 printf("vmochk: internal obj has zero ref count: %d\n", 1418 object->size); 1419 } 1420 if (!vm_object_in_map(object)) { 1421 printf("vmochk: internal obj is not in a map: " 1422 "ref: %d, size: %d: 0x%x, backing_object: 0x%x\n", 1423 object->ref_count, object->size, 1424 object->size, object->backing_object); 1425 } 1426 } 1427 } 1428} 1429#endif /* DDB */ 1430 1431/* 1432 * vm_object_print: [ debug ] 1433 */ 1434void 1435vm_object_print(iobject, full, dummy3, dummy4) 1436 /* db_expr_t */ int iobject; 1437 boolean_t full; 1438 /* db_expr_t */ int dummy3; 1439 char *dummy4; 1440{ 1441 vm_object_t object = (vm_object_t)iobject; /* XXX */ 1442 register vm_page_t p; 1443 1444 register int count; 1445 1446 if (object == NULL) 1447 return; 1448 1449 iprintf("Object 0x%x: size=0x%x, res=%d, ref=%d, ", 1450 (int) object, (int) object->size, 1451 object->resident_page_count, object->ref_count); 1452 printf("offset=0x%x, backing_object=(0x%x)+0x%x\n", 1453 (int) object->paging_offset, 1454 (int) object->backing_object, (int) object->backing_object_offset); 1455 printf("cache: next=%p, prev=%p\n", 1456 TAILQ_NEXT(object, cached_list), TAILQ_PREV(object, cached_list)); 1457 1458 if (!full) 1459 return; 1460 1461 indent += 2; 1462 count = 0; 1463 for (p = TAILQ_FIRST(&object->memq); p != NULL; p = TAILQ_NEXT(p, listq)) { 1464 if (count == 0) 1465 iprintf("memory:="); 1466 else if (count == 6) { 1467 printf("\n"); 1468 iprintf(" ..."); 1469 count = 0; 1470 } else 1471 printf(","); 1472 count++; 1473 1474 printf("(off=0x%lx,page=0x%lx)", 1475 (u_long) p->pindex, (u_long) VM_PAGE_TO_PHYS(p)); 1476 } 1477 if (count != 0) 1478 printf("\n"); 1479 indent -= 2; 1480} 1481#endif /* DDB */ 1482