163 uma_prealloc(kmapentzone, MAX_KMAPENT); 164 mapentzone = uma_zcreate("MAP ENTRY", sizeof(struct vm_map_entry), 165 NULL, NULL, NULL, NULL, UMA_ALIGN_PTR, 0); 166 uma_prealloc(mapentzone, MAX_MAPENT); 167} 168 169static void 170vmspace_zfini(void *mem, int size) 171{ 172 struct vmspace *vm; 173 174 vm = (struct vmspace *)mem; 175 176 vm_map_zfini(&vm->vm_map, sizeof(vm->vm_map)); 177} 178 179static void 180vmspace_zinit(void *mem, int size) 181{ 182 struct vmspace *vm; 183 184 vm = (struct vmspace *)mem; 185 186 vm_map_zinit(&vm->vm_map, sizeof(vm->vm_map)); 187} 188 189static void 190vm_map_zfini(void *mem, int size) 191{ 192 vm_map_t map; 193 194 GIANT_REQUIRED; 195 map = (vm_map_t)mem; 196 197 lockdestroy(&map->lock); 198} 199 200static void 201vm_map_zinit(void *mem, int size) 202{ 203 vm_map_t map; 204 205 GIANT_REQUIRED; 206 207 map = (vm_map_t)mem; 208 map->nentries = 0; 209 map->size = 0; 210 map->infork = 0; 211 lockinit(&map->lock, PVM, "thrd_sleep", 0, LK_NOPAUSE); 212} 213 214#ifdef INVARIANTS 215static void 216vmspace_zdtor(void *mem, int size, void *arg) 217{ 218 struct vmspace *vm; 219 220 vm = (struct vmspace *)mem; 221 222 vm_map_zdtor(&vm->vm_map, sizeof(vm->vm_map), arg); 223} 224static void 225vm_map_zdtor(void *mem, int size, void *arg) 226{ 227 vm_map_t map; 228 229 map = (vm_map_t)mem; 230 KASSERT(map->nentries == 0, 231 ("map %p nentries == %d on free.", 232 map, map->nentries)); 233 KASSERT(map->size == 0, 234 ("map %p size == %lu on free.", 235 map, (unsigned long)map->size)); 236 KASSERT(map->infork == 0, 237 ("map %p infork == %d on free.", 238 map, map->infork)); 239} 240#endif /* INVARIANTS */ 241 242/* 243 * Allocate a vmspace structure, including a vm_map and pmap, 244 * and initialize those structures. The refcnt is set to 1. 245 * The remaining fields must be initialized by the caller. 246 */ 247struct vmspace * 248vmspace_alloc(min, max) 249 vm_offset_t min, max; 250{ 251 struct vmspace *vm; 252 253 GIANT_REQUIRED; 254 vm = uma_zalloc(vmspace_zone, M_WAITOK); 255 CTR1(KTR_VM, "vmspace_alloc: %p", vm); 256 _vm_map_init(&vm->vm_map, min, max); 257 pmap_pinit(vmspace_pmap(vm)); 258 vm->vm_map.pmap = vmspace_pmap(vm); /* XXX */ 259 vm->vm_refcnt = 1; 260 vm->vm_shm = NULL; 261 vm->vm_freer = NULL; 262 return (vm); 263} 264 265void 266vm_init2(void) 267{ 268 uma_zone_set_obj(kmapentzone, &kmapentobj, cnt.v_page_count / 4); 269 vmspace_zone = uma_zcreate("VMSPACE", sizeof(struct vmspace), NULL, 270#ifdef INVARIANTS 271 vmspace_zdtor, 272#else 273 NULL, 274#endif 275 vmspace_zinit, vmspace_zfini, UMA_ALIGN_PTR, UMA_ZONE_NOFREE); 276 pmap_init2(); 277 vm_object_init2(); 278} 279 280static __inline void 281vmspace_dofree(struct vmspace *vm) 282{ 283 CTR1(KTR_VM, "vmspace_free: %p", vm); 284 /* 285 * Lock the map, to wait out all other references to it. 286 * Delete all of the mappings and pages they hold, then call 287 * the pmap module to reclaim anything left. 288 */ 289 vm_map_lock(&vm->vm_map); 290 (void) vm_map_delete(&vm->vm_map, vm->vm_map.min_offset, 291 vm->vm_map.max_offset); 292 vm_map_unlock(&vm->vm_map); 293 294 pmap_release(vmspace_pmap(vm)); 295 uma_zfree(vmspace_zone, vm); 296} 297 298void 299vmspace_free(struct vmspace *vm) 300{ 301 GIANT_REQUIRED; 302 303 if (vm->vm_refcnt == 0) 304 panic("vmspace_free: attempt to free already freed vmspace"); 305 306 if (--vm->vm_refcnt == 0) 307 vmspace_dofree(vm); 308} 309 310void 311vmspace_exitfree(struct proc *p) 312{ 313 struct vmspace *vm; 314 315 GIANT_REQUIRED; 316 if (p == p->p_vmspace->vm_freer) { 317 vm = p->p_vmspace; 318 p->p_vmspace = NULL; 319 vmspace_dofree(vm); 320 } 321} 322 323/* 324 * vmspace_swap_count() - count the approximate swap useage in pages for a 325 * vmspace. 326 * 327 * Swap useage is determined by taking the proportional swap used by 328 * VM objects backing the VM map. To make up for fractional losses, 329 * if the VM object has any swap use at all the associated map entries 330 * count for at least 1 swap page. 331 */ 332int 333vmspace_swap_count(struct vmspace *vmspace) 334{ 335 vm_map_t map = &vmspace->vm_map; 336 vm_map_entry_t cur; 337 int count = 0; 338 339 vm_map_lock_read(map); 340 for (cur = map->header.next; cur != &map->header; cur = cur->next) { 341 vm_object_t object; 342 343 if ((cur->eflags & MAP_ENTRY_IS_SUB_MAP) == 0 && 344 (object = cur->object.vm_object) != NULL && 345 object->type == OBJT_SWAP 346 ) { 347 int n = (cur->end - cur->start) / PAGE_SIZE; 348 349 if (object->un_pager.swp.swp_bcount) { 350 count += object->un_pager.swp.swp_bcount * 351 SWAP_META_PAGES * n / object->size + 1; 352 } 353 } 354 } 355 vm_map_unlock_read(map); 356 return (count); 357} 358 359u_char 360vm_map_entry_behavior(struct vm_map_entry *entry) 361{ 362 return entry->eflags & MAP_ENTRY_BEHAV_MASK; 363} 364 365void 366vm_map_entry_set_behavior(struct vm_map_entry *entry, u_char behavior) 367{ 368 entry->eflags = (entry->eflags & ~MAP_ENTRY_BEHAV_MASK) | 369 (behavior & MAP_ENTRY_BEHAV_MASK); 370} 371 372void 373_vm_map_lock(vm_map_t map, const char *file, int line) 374{ 375 vm_map_printf("locking map LK_EXCLUSIVE: %p\n", map); 376 if (lockmgr(&map->lock, LK_EXCLUSIVE, NULL, curthread) != 0) 377 panic("vm_map_lock: failed to get lock"); 378 map->timestamp++; 379} 380 381void 382_vm_map_unlock(vm_map_t map, const char *file, int line) 383{ 384 vm_map_printf("locking map LK_RELEASE: %p\n", map); 385 lockmgr(&(map)->lock, LK_RELEASE, NULL, curthread); 386} 387 388void 389_vm_map_lock_read(vm_map_t map, const char *file, int line) 390{ 391 vm_map_printf("locking map LK_SHARED: %p\n", map); 392 lockmgr(&(map)->lock, LK_SHARED, NULL, curthread); 393} 394 395void 396_vm_map_unlock_read(vm_map_t map, const char *file, int line) 397{ 398 vm_map_printf("locking map LK_RELEASE: %p\n", map); 399 lockmgr(&(map)->lock, LK_RELEASE, NULL, curthread); 400} 401 402int 403_vm_map_trylock(vm_map_t map, const char *file, int line) 404{ 405 406 return (lockmgr(&map->lock, LK_EXCLUSIVE | LK_NOWAIT, NULL, 407 curthread) == 0); 408} 409 410static __inline__ int 411__vm_map_lock_upgrade(vm_map_t map, struct thread *td) { 412 int error; 413 414 vm_map_printf("locking map LK_EXCLUPGRADE: %p\n", map); 415 error = lockmgr(&map->lock, LK_EXCLUPGRADE, NULL, td); 416 if (error == 0) 417 map->timestamp++; 418 return error; 419} 420 421int 422_vm_map_lock_upgrade(vm_map_t map, const char *file, int line) 423{ 424 return (__vm_map_lock_upgrade(map, curthread)); 425} 426 427void 428_vm_map_lock_downgrade(vm_map_t map, const char *file, int line) 429{ 430 vm_map_printf("locking map LK_DOWNGRADE: %p\n", map); 431 lockmgr(&map->lock, LK_DOWNGRADE, NULL, curthread); 432} 433 434void 435_vm_map_set_recursive(vm_map_t map, const char *file, int line) 436{ 437 mtx_lock((map)->lock.lk_interlock); 438 map->lock.lk_flags |= LK_CANRECURSE; 439 mtx_unlock((map)->lock.lk_interlock); 440} 441 442void 443_vm_map_clear_recursive(vm_map_t map, const char *file, int line) 444{ 445 mtx_lock((map)->lock.lk_interlock); 446 map->lock.lk_flags &= ~LK_CANRECURSE; 447 mtx_unlock((map)->lock.lk_interlock); 448} 449 450struct pmap * 451vmspace_pmap(struct vmspace *vmspace) 452{ 453 return &vmspace->vm_pmap; 454} 455 456long 457vmspace_resident_count(struct vmspace *vmspace) 458{ 459 return pmap_resident_count(vmspace_pmap(vmspace)); 460} 461 462/* 463 * vm_map_create: 464 * 465 * Creates and returns a new empty VM map with 466 * the given physical map structure, and having 467 * the given lower and upper address bounds. 468 */ 469vm_map_t 470vm_map_create(pmap_t pmap, vm_offset_t min, vm_offset_t max) 471{ 472 vm_map_t result; 473 474 GIANT_REQUIRED; 475 476 result = uma_zalloc(mapzone, M_WAITOK); 477 CTR1(KTR_VM, "vm_map_create: %p", result); 478 _vm_map_init(result, min, max); 479 result->pmap = pmap; 480 return (result); 481} 482 483/* 484 * Initialize an existing vm_map structure 485 * such as that in the vmspace structure. 486 * The pmap is set elsewhere. 487 */ 488static void 489_vm_map_init(vm_map_t map, vm_offset_t min, vm_offset_t max) 490{ 491 492 map->header.next = map->header.prev = &map->header; 493 map->system_map = 0; 494 map->min_offset = min; 495 map->max_offset = max; 496 map->first_free = &map->header; 497 map->hint = &map->header; 498 map->timestamp = 0; 499} 500 501void 502vm_map_init(vm_map_t map, vm_offset_t min, vm_offset_t max) 503{ 504 _vm_map_init(map, min, max); 505 lockinit(&map->lock, PVM, "thrd_sleep", 0, LK_NOPAUSE); 506} 507 508/* 509 * vm_map_entry_dispose: [ internal use only ] 510 * 511 * Inverse of vm_map_entry_create. 512 */ 513static void 514vm_map_entry_dispose(vm_map_t map, vm_map_entry_t entry) 515{ 516 uma_zfree((map->system_map || !mapentzone) 517 ? kmapentzone : mapentzone, entry); 518} 519 520/* 521 * vm_map_entry_create: [ internal use only ] 522 * 523 * Allocates a VM map entry for insertion. 524 * No entry fields are filled in. 525 */ 526static vm_map_entry_t 527vm_map_entry_create(vm_map_t map) 528{ 529 vm_map_entry_t new_entry; 530 531 new_entry = uma_zalloc((map->system_map || !mapentzone) ? 532 kmapentzone : mapentzone, M_WAITOK); 533 if (new_entry == NULL) 534 panic("vm_map_entry_create: kernel resources exhausted"); 535 return (new_entry); 536} 537 538/* 539 * vm_map_entry_{un,}link: 540 * 541 * Insert/remove entries from maps. 542 */ 543static __inline void 544vm_map_entry_link(vm_map_t map, 545 vm_map_entry_t after_where, 546 vm_map_entry_t entry) 547{ 548 549 CTR4(KTR_VM, 550 "vm_map_entry_link: map %p, nentries %d, entry %p, after %p", map, 551 map->nentries, entry, after_where); 552 map->nentries++; 553 entry->prev = after_where; 554 entry->next = after_where->next; 555 entry->next->prev = entry; 556 after_where->next = entry; 557} 558 559static __inline void 560vm_map_entry_unlink(vm_map_t map, 561 vm_map_entry_t entry) 562{ 563 vm_map_entry_t prev = entry->prev; 564 vm_map_entry_t next = entry->next; 565 566 next->prev = prev; 567 prev->next = next; 568 map->nentries--; 569 CTR3(KTR_VM, "vm_map_entry_unlink: map %p, nentries %d, entry %p", map, 570 map->nentries, entry); 571} 572 573/* 574 * SAVE_HINT: 575 * 576 * Saves the specified entry as the hint for 577 * future lookups. 578 */ 579#define SAVE_HINT(map,value) \ 580 (map)->hint = (value); 581 582/* 583 * vm_map_lookup_entry: [ internal use only ] 584 * 585 * Finds the map entry containing (or 586 * immediately preceding) the specified address 587 * in the given map; the entry is returned 588 * in the "entry" parameter. The boolean 589 * result indicates whether the address is 590 * actually contained in the map. 591 */ 592boolean_t 593vm_map_lookup_entry( 594 vm_map_t map, 595 vm_offset_t address, 596 vm_map_entry_t *entry) /* OUT */ 597{ 598 vm_map_entry_t cur; 599 vm_map_entry_t last; 600 601 GIANT_REQUIRED; 602 /* 603 * Start looking either from the head of the list, or from the hint. 604 */ 605 cur = map->hint; 606 607 if (cur == &map->header) 608 cur = cur->next; 609 610 if (address >= cur->start) { 611 /* 612 * Go from hint to end of list. 613 * 614 * But first, make a quick check to see if we are already looking 615 * at the entry we want (which is usually the case). Note also 616 * that we don't need to save the hint here... it is the same 617 * hint (unless we are at the header, in which case the hint 618 * didn't buy us anything anyway). 619 */ 620 last = &map->header; 621 if ((cur != last) && (cur->end > address)) { 622 *entry = cur; 623 return (TRUE); 624 } 625 } else { 626 /* 627 * Go from start to hint, *inclusively* 628 */ 629 last = cur->next; 630 cur = map->header.next; 631 } 632 633 /* 634 * Search linearly 635 */ 636 while (cur != last) { 637 if (cur->end > address) { 638 if (address >= cur->start) { 639 /* 640 * Save this lookup for future hints, and 641 * return 642 */ 643 *entry = cur; 644 SAVE_HINT(map, cur); 645 return (TRUE); 646 } 647 break; 648 } 649 cur = cur->next; 650 } 651 *entry = cur->prev; 652 SAVE_HINT(map, *entry); 653 return (FALSE); 654} 655 656/* 657 * vm_map_insert: 658 * 659 * Inserts the given whole VM object into the target 660 * map at the specified address range. The object's 661 * size should match that of the address range. 662 * 663 * Requires that the map be locked, and leaves it so. 664 * 665 * If object is non-NULL, ref count must be bumped by caller 666 * prior to making call to account for the new entry. 667 */ 668int 669vm_map_insert(vm_map_t map, vm_object_t object, vm_ooffset_t offset, 670 vm_offset_t start, vm_offset_t end, vm_prot_t prot, vm_prot_t max, 671 int cow) 672{ 673 vm_map_entry_t new_entry; 674 vm_map_entry_t prev_entry; 675 vm_map_entry_t temp_entry; 676 vm_eflags_t protoeflags; 677 678 GIANT_REQUIRED; 679 680 /* 681 * Check that the start and end points are not bogus. 682 */ 683 if ((start < map->min_offset) || (end > map->max_offset) || 684 (start >= end)) 685 return (KERN_INVALID_ADDRESS); 686 687 /* 688 * Find the entry prior to the proposed starting address; if it's part 689 * of an existing entry, this range is bogus. 690 */ 691 if (vm_map_lookup_entry(map, start, &temp_entry)) 692 return (KERN_NO_SPACE); 693 694 prev_entry = temp_entry; 695 696 /* 697 * Assert that the next entry doesn't overlap the end point. 698 */ 699 if ((prev_entry->next != &map->header) && 700 (prev_entry->next->start < end)) 701 return (KERN_NO_SPACE); 702 703 protoeflags = 0; 704 705 if (cow & MAP_COPY_ON_WRITE) 706 protoeflags |= MAP_ENTRY_COW|MAP_ENTRY_NEEDS_COPY; 707 708 if (cow & MAP_NOFAULT) { 709 protoeflags |= MAP_ENTRY_NOFAULT; 710 711 KASSERT(object == NULL, 712 ("vm_map_insert: paradoxical MAP_NOFAULT request")); 713 } 714 if (cow & MAP_DISABLE_SYNCER) 715 protoeflags |= MAP_ENTRY_NOSYNC; 716 if (cow & MAP_DISABLE_COREDUMP) 717 protoeflags |= MAP_ENTRY_NOCOREDUMP; 718 719 if (object) { 720 /* 721 * When object is non-NULL, it could be shared with another 722 * process. We have to set or clear OBJ_ONEMAPPING 723 * appropriately. 724 */ 725 if ((object->ref_count > 1) || (object->shadow_count != 0)) { 726 vm_object_clear_flag(object, OBJ_ONEMAPPING); 727 } 728 } 729 else if ((prev_entry != &map->header) && 730 (prev_entry->eflags == protoeflags) && 731 (prev_entry->end == start) && 732 (prev_entry->wired_count == 0) && 733 ((prev_entry->object.vm_object == NULL) || 734 vm_object_coalesce(prev_entry->object.vm_object, 735 OFF_TO_IDX(prev_entry->offset), 736 (vm_size_t)(prev_entry->end - prev_entry->start), 737 (vm_size_t)(end - prev_entry->end)))) { 738 /* 739 * We were able to extend the object. Determine if we 740 * can extend the previous map entry to include the 741 * new range as well. 742 */ 743 if ((prev_entry->inheritance == VM_INHERIT_DEFAULT) && 744 (prev_entry->protection == prot) && 745 (prev_entry->max_protection == max)) { 746 map->size += (end - prev_entry->end); 747 prev_entry->end = end; 748 vm_map_simplify_entry(map, prev_entry); 749 return (KERN_SUCCESS); 750 } 751 752 /* 753 * If we can extend the object but cannot extend the 754 * map entry, we have to create a new map entry. We 755 * must bump the ref count on the extended object to 756 * account for it. object may be NULL. 757 */ 758 object = prev_entry->object.vm_object; 759 offset = prev_entry->offset + 760 (prev_entry->end - prev_entry->start); 761 vm_object_reference(object); 762 } 763 764 /* 765 * NOTE: if conditionals fail, object can be NULL here. This occurs 766 * in things like the buffer map where we manage kva but do not manage 767 * backing objects. 768 */ 769 770 /* 771 * Create a new entry 772 */ 773 new_entry = vm_map_entry_create(map); 774 new_entry->start = start; 775 new_entry->end = end; 776 777 new_entry->eflags = protoeflags; 778 new_entry->object.vm_object = object; 779 new_entry->offset = offset; 780 new_entry->avail_ssize = 0; 781 782 new_entry->inheritance = VM_INHERIT_DEFAULT; 783 new_entry->protection = prot; 784 new_entry->max_protection = max; 785 new_entry->wired_count = 0; 786 787 /* 788 * Insert the new entry into the list 789 */ 790 vm_map_entry_link(map, prev_entry, new_entry); 791 map->size += new_entry->end - new_entry->start; 792 793 /* 794 * Update the free space hint 795 */ 796 if ((map->first_free == prev_entry) && 797 (prev_entry->end >= new_entry->start)) { 798 map->first_free = new_entry; 799 } 800 801#if 0 802 /* 803 * Temporarily removed to avoid MAP_STACK panic, due to 804 * MAP_STACK being a huge hack. Will be added back in 805 * when MAP_STACK (and the user stack mapping) is fixed. 806 */ 807 /* 808 * It may be possible to simplify the entry 809 */ 810 vm_map_simplify_entry(map, new_entry); 811#endif 812 813 if (cow & (MAP_PREFAULT|MAP_PREFAULT_PARTIAL)) { 814 pmap_object_init_pt(map->pmap, start, 815 object, OFF_TO_IDX(offset), end - start, 816 cow & MAP_PREFAULT_PARTIAL); 817 } 818 819 return (KERN_SUCCESS); 820} 821 822/* 823 * Find sufficient space for `length' bytes in the given map, starting at 824 * `start'. The map must be locked. Returns 0 on success, 1 on no space. 825 */ 826int 827vm_map_findspace( 828 vm_map_t map, 829 vm_offset_t start, 830 vm_size_t length, 831 vm_offset_t *addr) 832{ 833 vm_map_entry_t entry, next; 834 vm_offset_t end; 835 836 GIANT_REQUIRED; 837 if (start < map->min_offset) 838 start = map->min_offset; 839 if (start > map->max_offset) 840 return (1); 841 842 /* 843 * Look for the first possible address; if there's already something 844 * at this address, we have to start after it. 845 */ 846 if (start == map->min_offset) { 847 if ((entry = map->first_free) != &map->header) 848 start = entry->end; 849 } else { 850 vm_map_entry_t tmp; 851 852 if (vm_map_lookup_entry(map, start, &tmp)) 853 start = tmp->end; 854 entry = tmp; 855 } 856 857 /* 858 * Look through the rest of the map, trying to fit a new region in the 859 * gap between existing regions, or after the very last region. 860 */ 861 for (;; start = (entry = next)->end) { 862 /* 863 * Find the end of the proposed new region. Be sure we didn't 864 * go beyond the end of the map, or wrap around the address; 865 * if so, we lose. Otherwise, if this is the last entry, or 866 * if the proposed new region fits before the next entry, we 867 * win. 868 */ 869 end = start + length; 870 if (end > map->max_offset || end < start) 871 return (1); 872 next = entry->next; 873 if (next == &map->header || next->start >= end) 874 break; 875 } 876 SAVE_HINT(map, entry); 877 *addr = start; 878 if (map == kernel_map) { 879 vm_offset_t ksize; 880 if ((ksize = round_page(start + length)) > kernel_vm_end) { 881 pmap_growkernel(ksize); 882 } 883 } 884 return (0); 885} 886 887/* 888 * vm_map_find finds an unallocated region in the target address 889 * map with the given length. The search is defined to be 890 * first-fit from the specified address; the region found is 891 * returned in the same parameter. 892 * 893 * If object is non-NULL, ref count must be bumped by caller 894 * prior to making call to account for the new entry. 895 */ 896int 897vm_map_find(vm_map_t map, vm_object_t object, vm_ooffset_t offset, 898 vm_offset_t *addr, /* IN/OUT */ 899 vm_size_t length, boolean_t find_space, vm_prot_t prot, 900 vm_prot_t max, int cow) 901{ 902 vm_offset_t start; 903 int result, s = 0; 904 905 GIANT_REQUIRED; 906 907 start = *addr; 908 909 if (map == kmem_map) 910 s = splvm(); 911 912 vm_map_lock(map); 913 if (find_space) { 914 if (vm_map_findspace(map, start, length, addr)) { 915 vm_map_unlock(map); 916 if (map == kmem_map) 917 splx(s); 918 return (KERN_NO_SPACE); 919 } 920 start = *addr; 921 } 922 result = vm_map_insert(map, object, offset, 923 start, start + length, prot, max, cow); 924 vm_map_unlock(map); 925 926 if (map == kmem_map) 927 splx(s); 928 929 return (result); 930} 931 932/* 933 * vm_map_simplify_entry: 934 * 935 * Simplify the given map entry by merging with either neighbor. This 936 * routine also has the ability to merge with both neighbors. 937 * 938 * The map must be locked. 939 * 940 * This routine guarentees that the passed entry remains valid (though 941 * possibly extended). When merging, this routine may delete one or 942 * both neighbors. 943 */ 944void 945vm_map_simplify_entry(vm_map_t map, vm_map_entry_t entry) 946{ 947 vm_map_entry_t next, prev; 948 vm_size_t prevsize, esize; 949 950 GIANT_REQUIRED; 951 952 if (entry->eflags & MAP_ENTRY_IS_SUB_MAP) 953 return; 954 955 prev = entry->prev; 956 if (prev != &map->header) { 957 prevsize = prev->end - prev->start; 958 if ( (prev->end == entry->start) && 959 (prev->object.vm_object == entry->object.vm_object) && 960 (!prev->object.vm_object || 961 (prev->offset + prevsize == entry->offset)) && 962 (prev->eflags == entry->eflags) && 963 (prev->protection == entry->protection) && 964 (prev->max_protection == entry->max_protection) && 965 (prev->inheritance == entry->inheritance) && 966 (prev->wired_count == entry->wired_count)) { 967 if (map->first_free == prev) 968 map->first_free = entry; 969 if (map->hint == prev) 970 map->hint = entry; 971 vm_map_entry_unlink(map, prev); 972 entry->start = prev->start; 973 entry->offset = prev->offset; 974 if (prev->object.vm_object) 975 vm_object_deallocate(prev->object.vm_object); 976 vm_map_entry_dispose(map, prev); 977 } 978 } 979 980 next = entry->next; 981 if (next != &map->header) { 982 esize = entry->end - entry->start; 983 if ((entry->end == next->start) && 984 (next->object.vm_object == entry->object.vm_object) && 985 (!entry->object.vm_object || 986 (entry->offset + esize == next->offset)) && 987 (next->eflags == entry->eflags) && 988 (next->protection == entry->protection) && 989 (next->max_protection == entry->max_protection) && 990 (next->inheritance == entry->inheritance) && 991 (next->wired_count == entry->wired_count)) { 992 if (map->first_free == next) 993 map->first_free = entry; 994 if (map->hint == next) 995 map->hint = entry; 996 vm_map_entry_unlink(map, next); 997 entry->end = next->end; 998 if (next->object.vm_object) 999 vm_object_deallocate(next->object.vm_object); 1000 vm_map_entry_dispose(map, next); 1001 } 1002 } 1003} 1004/* 1005 * vm_map_clip_start: [ internal use only ] 1006 * 1007 * Asserts that the given entry begins at or after 1008 * the specified address; if necessary, 1009 * it splits the entry into two. 1010 */ 1011#define vm_map_clip_start(map, entry, startaddr) \ 1012{ \ 1013 if (startaddr > entry->start) \ 1014 _vm_map_clip_start(map, entry, startaddr); \ 1015} 1016 1017/* 1018 * This routine is called only when it is known that 1019 * the entry must be split. 1020 */ 1021static void 1022_vm_map_clip_start(vm_map_t map, vm_map_entry_t entry, vm_offset_t start) 1023{ 1024 vm_map_entry_t new_entry; 1025 1026 /* 1027 * Split off the front portion -- note that we must insert the new 1028 * entry BEFORE this one, so that this entry has the specified 1029 * starting address. 1030 */ 1031 vm_map_simplify_entry(map, entry); 1032 1033 /* 1034 * If there is no object backing this entry, we might as well create 1035 * one now. If we defer it, an object can get created after the map 1036 * is clipped, and individual objects will be created for the split-up 1037 * map. This is a bit of a hack, but is also about the best place to 1038 * put this improvement. 1039 */ 1040 if (entry->object.vm_object == NULL && !map->system_map) { 1041 vm_object_t object; 1042 object = vm_object_allocate(OBJT_DEFAULT, 1043 atop(entry->end - entry->start)); 1044 entry->object.vm_object = object; 1045 entry->offset = 0; 1046 } 1047 1048 new_entry = vm_map_entry_create(map); 1049 *new_entry = *entry; 1050 1051 new_entry->end = start; 1052 entry->offset += (start - entry->start); 1053 entry->start = start; 1054 1055 vm_map_entry_link(map, entry->prev, new_entry); 1056 1057 if ((entry->eflags & MAP_ENTRY_IS_SUB_MAP) == 0) { 1058 vm_object_reference(new_entry->object.vm_object); 1059 } 1060} 1061 1062/* 1063 * vm_map_clip_end: [ internal use only ] 1064 * 1065 * Asserts that the given entry ends at or before 1066 * the specified address; if necessary, 1067 * it splits the entry into two. 1068 */ 1069#define vm_map_clip_end(map, entry, endaddr) \ 1070{ \ 1071 if (endaddr < entry->end) \ 1072 _vm_map_clip_end(map, entry, endaddr); \ 1073} 1074 1075/* 1076 * This routine is called only when it is known that 1077 * the entry must be split. 1078 */ 1079static void 1080_vm_map_clip_end(vm_map_t map, vm_map_entry_t entry, vm_offset_t end) 1081{ 1082 vm_map_entry_t new_entry; 1083 1084 /* 1085 * If there is no object backing this entry, we might as well create 1086 * one now. If we defer it, an object can get created after the map 1087 * is clipped, and individual objects will be created for the split-up 1088 * map. This is a bit of a hack, but is also about the best place to 1089 * put this improvement. 1090 */ 1091 if (entry->object.vm_object == NULL && !map->system_map) { 1092 vm_object_t object; 1093 object = vm_object_allocate(OBJT_DEFAULT, 1094 atop(entry->end - entry->start)); 1095 entry->object.vm_object = object; 1096 entry->offset = 0; 1097 } 1098 1099 /* 1100 * Create a new entry and insert it AFTER the specified entry 1101 */ 1102 new_entry = vm_map_entry_create(map); 1103 *new_entry = *entry; 1104 1105 new_entry->start = entry->end = end; 1106 new_entry->offset += (end - entry->start); 1107 1108 vm_map_entry_link(map, entry, new_entry); 1109 1110 if ((entry->eflags & MAP_ENTRY_IS_SUB_MAP) == 0) { 1111 vm_object_reference(new_entry->object.vm_object); 1112 } 1113} 1114 1115/* 1116 * VM_MAP_RANGE_CHECK: [ internal use only ] 1117 * 1118 * Asserts that the starting and ending region 1119 * addresses fall within the valid range of the map. 1120 */ 1121#define VM_MAP_RANGE_CHECK(map, start, end) \ 1122 { \ 1123 if (start < vm_map_min(map)) \ 1124 start = vm_map_min(map); \ 1125 if (end > vm_map_max(map)) \ 1126 end = vm_map_max(map); \ 1127 if (start > end) \ 1128 start = end; \ 1129 } 1130 1131/* 1132 * vm_map_submap: [ kernel use only ] 1133 * 1134 * Mark the given range as handled by a subordinate map. 1135 * 1136 * This range must have been created with vm_map_find, 1137 * and no other operations may have been performed on this 1138 * range prior to calling vm_map_submap. 1139 * 1140 * Only a limited number of operations can be performed 1141 * within this rage after calling vm_map_submap: 1142 * vm_fault 1143 * [Don't try vm_map_copy!] 1144 * 1145 * To remove a submapping, one must first remove the 1146 * range from the superior map, and then destroy the 1147 * submap (if desired). [Better yet, don't try it.] 1148 */ 1149int 1150vm_map_submap( 1151 vm_map_t map, 1152 vm_offset_t start, 1153 vm_offset_t end, 1154 vm_map_t submap) 1155{ 1156 vm_map_entry_t entry; 1157 int result = KERN_INVALID_ARGUMENT; 1158 1159 GIANT_REQUIRED; 1160 1161 vm_map_lock(map); 1162 1163 VM_MAP_RANGE_CHECK(map, start, end); 1164 1165 if (vm_map_lookup_entry(map, start, &entry)) { 1166 vm_map_clip_start(map, entry, start); 1167 } else 1168 entry = entry->next; 1169 1170 vm_map_clip_end(map, entry, end); 1171 1172 if ((entry->start == start) && (entry->end == end) && 1173 ((entry->eflags & MAP_ENTRY_COW) == 0) && 1174 (entry->object.vm_object == NULL)) { 1175 entry->object.sub_map = submap; 1176 entry->eflags |= MAP_ENTRY_IS_SUB_MAP; 1177 result = KERN_SUCCESS; 1178 } 1179 vm_map_unlock(map); 1180 1181 return (result); 1182} 1183 1184/* 1185 * vm_map_protect: 1186 * 1187 * Sets the protection of the specified address 1188 * region in the target map. If "set_max" is 1189 * specified, the maximum protection is to be set; 1190 * otherwise, only the current protection is affected. 1191 */ 1192int 1193vm_map_protect(vm_map_t map, vm_offset_t start, vm_offset_t end, 1194 vm_prot_t new_prot, boolean_t set_max) 1195{ 1196 vm_map_entry_t current; 1197 vm_map_entry_t entry; 1198 1199 GIANT_REQUIRED; 1200 vm_map_lock(map); 1201 1202 VM_MAP_RANGE_CHECK(map, start, end); 1203 1204 if (vm_map_lookup_entry(map, start, &entry)) { 1205 vm_map_clip_start(map, entry, start); 1206 } else { 1207 entry = entry->next; 1208 } 1209 1210 /* 1211 * Make a first pass to check for protection violations. 1212 */ 1213 current = entry; 1214 while ((current != &map->header) && (current->start < end)) { 1215 if (current->eflags & MAP_ENTRY_IS_SUB_MAP) { 1216 vm_map_unlock(map); 1217 return (KERN_INVALID_ARGUMENT); 1218 } 1219 if ((new_prot & current->max_protection) != new_prot) { 1220 vm_map_unlock(map); 1221 return (KERN_PROTECTION_FAILURE); 1222 } 1223 current = current->next; 1224 } 1225 1226 /* 1227 * Go back and fix up protections. [Note that clipping is not 1228 * necessary the second time.] 1229 */ 1230 current = entry; 1231 while ((current != &map->header) && (current->start < end)) { 1232 vm_prot_t old_prot; 1233 1234 vm_map_clip_end(map, current, end); 1235 1236 old_prot = current->protection; 1237 if (set_max) 1238 current->protection = 1239 (current->max_protection = new_prot) & 1240 old_prot; 1241 else 1242 current->protection = new_prot; 1243 1244 /* 1245 * Update physical map if necessary. Worry about copy-on-write 1246 * here -- CHECK THIS XXX 1247 */ 1248 if (current->protection != old_prot) { 1249#define MASK(entry) (((entry)->eflags & MAP_ENTRY_COW) ? ~VM_PROT_WRITE : \ 1250 VM_PROT_ALL) 1251 pmap_protect(map->pmap, current->start, 1252 current->end, 1253 current->protection & MASK(current)); 1254#undef MASK 1255 } 1256 vm_map_simplify_entry(map, current); 1257 current = current->next; 1258 } 1259 vm_map_unlock(map); 1260 return (KERN_SUCCESS); 1261} 1262 1263/* 1264 * vm_map_madvise: 1265 * 1266 * This routine traverses a processes map handling the madvise 1267 * system call. Advisories are classified as either those effecting 1268 * the vm_map_entry structure, or those effecting the underlying 1269 * objects. 1270 */ 1271int 1272vm_map_madvise( 1273 vm_map_t map, 1274 vm_offset_t start, 1275 vm_offset_t end, 1276 int behav) 1277{ 1278 vm_map_entry_t current, entry; 1279 int modify_map = 0; 1280 1281 GIANT_REQUIRED; 1282 1283 /* 1284 * Some madvise calls directly modify the vm_map_entry, in which case 1285 * we need to use an exclusive lock on the map and we need to perform 1286 * various clipping operations. Otherwise we only need a read-lock 1287 * on the map. 1288 */ 1289 switch(behav) { 1290 case MADV_NORMAL: 1291 case MADV_SEQUENTIAL: 1292 case MADV_RANDOM: 1293 case MADV_NOSYNC: 1294 case MADV_AUTOSYNC: 1295 case MADV_NOCORE: 1296 case MADV_CORE: 1297 modify_map = 1; 1298 vm_map_lock(map); 1299 break; 1300 case MADV_WILLNEED: 1301 case MADV_DONTNEED: 1302 case MADV_FREE: 1303 vm_map_lock_read(map); 1304 break; 1305 default: 1306 return (KERN_INVALID_ARGUMENT); 1307 } 1308 1309 /* 1310 * Locate starting entry and clip if necessary. 1311 */ 1312 VM_MAP_RANGE_CHECK(map, start, end); 1313 1314 if (vm_map_lookup_entry(map, start, &entry)) { 1315 if (modify_map) 1316 vm_map_clip_start(map, entry, start); 1317 } else { 1318 entry = entry->next; 1319 } 1320 1321 if (modify_map) { 1322 /* 1323 * madvise behaviors that are implemented in the vm_map_entry. 1324 * 1325 * We clip the vm_map_entry so that behavioral changes are 1326 * limited to the specified address range. 1327 */ 1328 for (current = entry; 1329 (current != &map->header) && (current->start < end); 1330 current = current->next 1331 ) { 1332 if (current->eflags & MAP_ENTRY_IS_SUB_MAP) 1333 continue; 1334 1335 vm_map_clip_end(map, current, end); 1336 1337 switch (behav) { 1338 case MADV_NORMAL: 1339 vm_map_entry_set_behavior(current, MAP_ENTRY_BEHAV_NORMAL); 1340 break; 1341 case MADV_SEQUENTIAL: 1342 vm_map_entry_set_behavior(current, MAP_ENTRY_BEHAV_SEQUENTIAL); 1343 break; 1344 case MADV_RANDOM: 1345 vm_map_entry_set_behavior(current, MAP_ENTRY_BEHAV_RANDOM); 1346 break; 1347 case MADV_NOSYNC: 1348 current->eflags |= MAP_ENTRY_NOSYNC; 1349 break; 1350 case MADV_AUTOSYNC: 1351 current->eflags &= ~MAP_ENTRY_NOSYNC; 1352 break; 1353 case MADV_NOCORE: 1354 current->eflags |= MAP_ENTRY_NOCOREDUMP; 1355 break; 1356 case MADV_CORE: 1357 current->eflags &= ~MAP_ENTRY_NOCOREDUMP; 1358 break; 1359 default: 1360 break; 1361 } 1362 vm_map_simplify_entry(map, current); 1363 } 1364 vm_map_unlock(map); 1365 } else { 1366 vm_pindex_t pindex; 1367 int count; 1368 1369 /* 1370 * madvise behaviors that are implemented in the underlying 1371 * vm_object. 1372 * 1373 * Since we don't clip the vm_map_entry, we have to clip 1374 * the vm_object pindex and count. 1375 */ 1376 for (current = entry; 1377 (current != &map->header) && (current->start < end); 1378 current = current->next 1379 ) { 1380 vm_offset_t useStart; 1381 1382 if (current->eflags & MAP_ENTRY_IS_SUB_MAP) 1383 continue; 1384 1385 pindex = OFF_TO_IDX(current->offset); 1386 count = atop(current->end - current->start); 1387 useStart = current->start; 1388 1389 if (current->start < start) { 1390 pindex += atop(start - current->start); 1391 count -= atop(start - current->start); 1392 useStart = start; 1393 } 1394 if (current->end > end) 1395 count -= atop(current->end - end); 1396 1397 if (count <= 0) 1398 continue; 1399 1400 vm_object_madvise(current->object.vm_object, 1401 pindex, count, behav); 1402 if (behav == MADV_WILLNEED) { 1403 pmap_object_init_pt( 1404 map->pmap, 1405 useStart, 1406 current->object.vm_object, 1407 pindex, 1408 (count << PAGE_SHIFT), 1409 MAP_PREFAULT_MADVISE 1410 ); 1411 } 1412 } 1413 vm_map_unlock_read(map); 1414 } 1415 return (0); 1416} 1417 1418 1419/* 1420 * vm_map_inherit: 1421 * 1422 * Sets the inheritance of the specified address 1423 * range in the target map. Inheritance 1424 * affects how the map will be shared with 1425 * child maps at the time of vm_map_fork. 1426 */ 1427int 1428vm_map_inherit(vm_map_t map, vm_offset_t start, vm_offset_t end, 1429 vm_inherit_t new_inheritance) 1430{ 1431 vm_map_entry_t entry; 1432 vm_map_entry_t temp_entry; 1433 1434 GIANT_REQUIRED; 1435 1436 switch (new_inheritance) { 1437 case VM_INHERIT_NONE: 1438 case VM_INHERIT_COPY: 1439 case VM_INHERIT_SHARE: 1440 break; 1441 default: 1442 return (KERN_INVALID_ARGUMENT); 1443 } 1444 1445 vm_map_lock(map); 1446 1447 VM_MAP_RANGE_CHECK(map, start, end); 1448 1449 if (vm_map_lookup_entry(map, start, &temp_entry)) { 1450 entry = temp_entry; 1451 vm_map_clip_start(map, entry, start); 1452 } else 1453 entry = temp_entry->next; 1454 1455 while ((entry != &map->header) && (entry->start < end)) { 1456 vm_map_clip_end(map, entry, end); 1457 1458 entry->inheritance = new_inheritance; 1459 1460 vm_map_simplify_entry(map, entry); 1461 1462 entry = entry->next; 1463 } 1464 1465 vm_map_unlock(map); 1466 return (KERN_SUCCESS); 1467} 1468 1469/* 1470 * Implement the semantics of mlock 1471 */ 1472int 1473vm_map_user_pageable( 1474 vm_map_t map, 1475 vm_offset_t start, 1476 vm_offset_t end, 1477 boolean_t new_pageable) 1478{ 1479 vm_map_entry_t entry; 1480 vm_map_entry_t start_entry; 1481 vm_offset_t estart; 1482 vm_offset_t eend; 1483 int rv; 1484 1485 vm_map_lock(map); 1486 VM_MAP_RANGE_CHECK(map, start, end); 1487 1488 if (vm_map_lookup_entry(map, start, &start_entry) == FALSE) { 1489 vm_map_unlock(map); 1490 return (KERN_INVALID_ADDRESS); 1491 } 1492 1493 if (new_pageable) { 1494 1495 entry = start_entry; 1496 vm_map_clip_start(map, entry, start); 1497 1498 /* 1499 * Now decrement the wiring count for each region. If a region 1500 * becomes completely unwired, unwire its physical pages and 1501 * mappings. 1502 */ 1503 while ((entry != &map->header) && (entry->start < end)) { 1504 if (entry->eflags & MAP_ENTRY_USER_WIRED) { 1505 vm_map_clip_end(map, entry, end); 1506 entry->eflags &= ~MAP_ENTRY_USER_WIRED; 1507 entry->wired_count--; 1508 if (entry->wired_count == 0) 1509 vm_fault_unwire(map, entry->start, entry->end); 1510 } 1511 vm_map_simplify_entry(map,entry); 1512 entry = entry->next; 1513 } 1514 } else { 1515 1516 entry = start_entry; 1517 1518 while ((entry != &map->header) && (entry->start < end)) { 1519 1520 if (entry->eflags & MAP_ENTRY_USER_WIRED) { 1521 entry = entry->next; 1522 continue; 1523 } 1524 1525 if (entry->wired_count != 0) { 1526 entry->wired_count++; 1527 entry->eflags |= MAP_ENTRY_USER_WIRED; 1528 entry = entry->next; 1529 continue; 1530 } 1531 1532 /* Here on entry being newly wired */ 1533 1534 if ((entry->eflags & MAP_ENTRY_IS_SUB_MAP) == 0) { 1535 int copyflag = entry->eflags & MAP_ENTRY_NEEDS_COPY; 1536 if (copyflag && ((entry->protection & VM_PROT_WRITE) != 0)) { 1537 1538 vm_object_shadow(&entry->object.vm_object, 1539 &entry->offset, 1540 atop(entry->end - entry->start)); 1541 entry->eflags &= ~MAP_ENTRY_NEEDS_COPY; 1542 1543 } else if (entry->object.vm_object == NULL && 1544 !map->system_map) { 1545 1546 entry->object.vm_object = 1547 vm_object_allocate(OBJT_DEFAULT, 1548 atop(entry->end - entry->start)); 1549 entry->offset = (vm_offset_t) 0; 1550 1551 } 1552 } 1553 1554 vm_map_clip_start(map, entry, start); 1555 vm_map_clip_end(map, entry, end); 1556 1557 entry->wired_count++; 1558 entry->eflags |= MAP_ENTRY_USER_WIRED; 1559 estart = entry->start; 1560 eend = entry->end; 1561 1562 /* First we need to allow map modifications */ 1563 vm_map_set_recursive(map); 1564 vm_map_lock_downgrade(map); 1565 map->timestamp++; 1566 1567 rv = vm_fault_user_wire(map, entry->start, entry->end); 1568 if (rv) { 1569 1570 entry->wired_count--; 1571 entry->eflags &= ~MAP_ENTRY_USER_WIRED; 1572 1573 vm_map_clear_recursive(map); 1574 vm_map_unlock(map); 1575 1576 /* 1577 * At this point, the map is unlocked, and 1578 * entry might no longer be valid. Use copy 1579 * of entry start value obtained while entry 1580 * was valid. 1581 */ 1582 (void) vm_map_user_pageable(map, start, estart, 1583 TRUE); 1584 return rv; 1585 } 1586 1587 vm_map_clear_recursive(map); 1588 if (vm_map_lock_upgrade(map)) { 1589 vm_map_lock(map); 1590 if (vm_map_lookup_entry(map, estart, &entry) 1591 == FALSE) { 1592 vm_map_unlock(map); 1593 /* 1594 * vm_fault_user_wire succeded, thus 1595 * the area between start and eend 1596 * is wired and has to be unwired 1597 * here as part of the cleanup. 1598 */ 1599 (void) vm_map_user_pageable(map, 1600 start, 1601 eend, 1602 TRUE); 1603 return (KERN_INVALID_ADDRESS); 1604 } 1605 } 1606 vm_map_simplify_entry(map,entry); 1607 } 1608 } 1609 map->timestamp++; 1610 vm_map_unlock(map); 1611 return KERN_SUCCESS; 1612} 1613 1614/* 1615 * vm_map_pageable: 1616 * 1617 * Sets the pageability of the specified address 1618 * range in the target map. Regions specified 1619 * as not pageable require locked-down physical 1620 * memory and physical page maps. 1621 * 1622 * The map must not be locked, but a reference 1623 * must remain to the map throughout the call. 1624 */ 1625int 1626vm_map_pageable( 1627 vm_map_t map, 1628 vm_offset_t start, 1629 vm_offset_t end, 1630 boolean_t new_pageable) 1631{ 1632 vm_map_entry_t entry; 1633 vm_map_entry_t start_entry; 1634 vm_offset_t failed = 0; 1635 int rv; 1636 1637 GIANT_REQUIRED; 1638 1639 vm_map_lock(map); 1640 1641 VM_MAP_RANGE_CHECK(map, start, end); 1642 1643 /* 1644 * Only one pageability change may take place at one time, since 1645 * vm_fault assumes it will be called only once for each 1646 * wiring/unwiring. Therefore, we have to make sure we're actually 1647 * changing the pageability for the entire region. We do so before 1648 * making any changes. 1649 */ 1650 if (vm_map_lookup_entry(map, start, &start_entry) == FALSE) { 1651 vm_map_unlock(map); 1652 return (KERN_INVALID_ADDRESS); 1653 } 1654 entry = start_entry; 1655 1656 /* 1657 * Actions are rather different for wiring and unwiring, so we have 1658 * two separate cases. 1659 */ 1660 if (new_pageable) { 1661 vm_map_clip_start(map, entry, start); 1662 1663 /* 1664 * Unwiring. First ensure that the range to be unwired is 1665 * really wired down and that there are no holes. 1666 */ 1667 while ((entry != &map->header) && (entry->start < end)) { 1668 if (entry->wired_count == 0 || 1669 (entry->end < end && 1670 (entry->next == &map->header || 1671 entry->next->start > entry->end))) { 1672 vm_map_unlock(map); 1673 return (KERN_INVALID_ARGUMENT); 1674 } 1675 entry = entry->next; 1676 } 1677 1678 /* 1679 * Now decrement the wiring count for each region. If a region 1680 * becomes completely unwired, unwire its physical pages and 1681 * mappings. 1682 */ 1683 entry = start_entry; 1684 while ((entry != &map->header) && (entry->start < end)) { 1685 vm_map_clip_end(map, entry, end); 1686 1687 entry->wired_count--; 1688 if (entry->wired_count == 0) 1689 vm_fault_unwire(map, entry->start, entry->end); 1690 1691 vm_map_simplify_entry(map, entry); 1692 1693 entry = entry->next; 1694 } 1695 } else { 1696 /* 1697 * Wiring. We must do this in two passes: 1698 * 1699 * 1. Holding the write lock, we create any shadow or zero-fill 1700 * objects that need to be created. Then we clip each map 1701 * entry to the region to be wired and increment its wiring 1702 * count. We create objects before clipping the map entries 1703 * to avoid object proliferation. 1704 * 1705 * 2. We downgrade to a read lock, and call vm_fault_wire to 1706 * fault in the pages for any newly wired area (wired_count is 1707 * 1). 1708 * 1709 * Downgrading to a read lock for vm_fault_wire avoids a possible 1710 * deadlock with another process that may have faulted on one 1711 * of the pages to be wired (it would mark the page busy, 1712 * blocking us, then in turn block on the map lock that we 1713 * hold). Because of problems in the recursive lock package, 1714 * we cannot upgrade to a write lock in vm_map_lookup. Thus, 1715 * any actions that require the write lock must be done 1716 * beforehand. Because we keep the read lock on the map, the 1717 * copy-on-write status of the entries we modify here cannot 1718 * change. 1719 */ 1720 1721 /* 1722 * Pass 1. 1723 */ 1724 while ((entry != &map->header) && (entry->start < end)) { 1725 if (entry->wired_count == 0) { 1726 1727 /* 1728 * Perform actions of vm_map_lookup that need 1729 * the write lock on the map: create a shadow 1730 * object for a copy-on-write region, or an 1731 * object for a zero-fill region. 1732 * 1733 * We don't have to do this for entries that 1734 * point to sub maps, because we won't 1735 * hold the lock on the sub map. 1736 */ 1737 if ((entry->eflags & MAP_ENTRY_IS_SUB_MAP) == 0) { 1738 int copyflag = entry->eflags & MAP_ENTRY_NEEDS_COPY; 1739 if (copyflag && 1740 ((entry->protection & VM_PROT_WRITE) != 0)) { 1741 1742 vm_object_shadow(&entry->object.vm_object, 1743 &entry->offset, 1744 atop(entry->end - entry->start)); 1745 entry->eflags &= ~MAP_ENTRY_NEEDS_COPY; 1746 } else if (entry->object.vm_object == NULL && 1747 !map->system_map) { 1748 entry->object.vm_object = 1749 vm_object_allocate(OBJT_DEFAULT, 1750 atop(entry->end - entry->start)); 1751 entry->offset = (vm_offset_t) 0; 1752 } 1753 } 1754 } 1755 vm_map_clip_start(map, entry, start); 1756 vm_map_clip_end(map, entry, end); 1757 entry->wired_count++; 1758 1759 /* 1760 * Check for holes 1761 */ 1762 if (entry->end < end && 1763 (entry->next == &map->header || 1764 entry->next->start > entry->end)) { 1765 /* 1766 * Found one. Object creation actions do not 1767 * need to be undone, but the wired counts 1768 * need to be restored. 1769 */ 1770 while (entry != &map->header && entry->end > start) { 1771 entry->wired_count--; 1772 entry = entry->prev; 1773 } 1774 vm_map_unlock(map); 1775 return (KERN_INVALID_ARGUMENT); 1776 } 1777 entry = entry->next; 1778 } 1779 1780 /* 1781 * Pass 2. 1782 */ 1783 1784 /* 1785 * HACK HACK HACK HACK 1786 * 1787 * If we are wiring in the kernel map or a submap of it, 1788 * unlock the map to avoid deadlocks. We trust that the 1789 * kernel is well-behaved, and therefore will not do 1790 * anything destructive to this region of the map while 1791 * we have it unlocked. We cannot trust user processes 1792 * to do the same. 1793 * 1794 * HACK HACK HACK HACK 1795 */ 1796 if (vm_map_pmap(map) == kernel_pmap) { 1797 vm_map_unlock(map); /* trust me ... */ 1798 } else { 1799 vm_map_lock_downgrade(map); 1800 } 1801 1802 rv = 0; 1803 entry = start_entry; 1804 while (entry != &map->header && entry->start < end) { 1805 /* 1806 * If vm_fault_wire fails for any page we need to undo 1807 * what has been done. We decrement the wiring count 1808 * for those pages which have not yet been wired (now) 1809 * and unwire those that have (later). 1810 * 1811 * XXX this violates the locking protocol on the map, 1812 * needs to be fixed. 1813 */ 1814 if (rv) 1815 entry->wired_count--; 1816 else if (entry->wired_count == 1) { 1817 rv = vm_fault_wire(map, entry->start, entry->end); 1818 if (rv) { 1819 failed = entry->start; 1820 entry->wired_count--; 1821 } 1822 } 1823 entry = entry->next; 1824 } 1825 1826 if (vm_map_pmap(map) == kernel_pmap) { 1827 vm_map_lock(map); 1828 } 1829 if (rv) { 1830 vm_map_unlock(map); 1831 (void) vm_map_pageable(map, start, failed, TRUE); 1832 return (rv); 1833 } 1834 /* 1835 * An exclusive lock on the map is needed in order to call 1836 * vm_map_simplify_entry(). If the current lock on the map 1837 * is only a shared lock, an upgrade is needed. 1838 */ 1839 if (vm_map_pmap(map) != kernel_pmap && 1840 vm_map_lock_upgrade(map)) { 1841 vm_map_lock(map); 1842 if (vm_map_lookup_entry(map, start, &start_entry) == 1843 FALSE) { 1844 vm_map_unlock(map); 1845 return KERN_SUCCESS; 1846 } 1847 } 1848 vm_map_simplify_entry(map, start_entry); 1849 } 1850 1851 vm_map_unlock(map); 1852 1853 return (KERN_SUCCESS); 1854} 1855 1856/* 1857 * vm_map_clean 1858 * 1859 * Push any dirty cached pages in the address range to their pager. 1860 * If syncio is TRUE, dirty pages are written synchronously. 1861 * If invalidate is TRUE, any cached pages are freed as well. 1862 * 1863 * Returns an error if any part of the specified range is not mapped. 1864 */ 1865int 1866vm_map_clean( 1867 vm_map_t map, 1868 vm_offset_t start, 1869 vm_offset_t end, 1870 boolean_t syncio, 1871 boolean_t invalidate) 1872{ 1873 vm_map_entry_t current; 1874 vm_map_entry_t entry; 1875 vm_size_t size; 1876 vm_object_t object; 1877 vm_ooffset_t offset; 1878 1879 GIANT_REQUIRED; 1880 1881 vm_map_lock_read(map); 1882 VM_MAP_RANGE_CHECK(map, start, end); 1883 if (!vm_map_lookup_entry(map, start, &entry)) { 1884 vm_map_unlock_read(map); 1885 return (KERN_INVALID_ADDRESS); 1886 } 1887 /* 1888 * Make a first pass to check for holes. 1889 */ 1890 for (current = entry; current->start < end; current = current->next) { 1891 if (current->eflags & MAP_ENTRY_IS_SUB_MAP) { 1892 vm_map_unlock_read(map); 1893 return (KERN_INVALID_ARGUMENT); 1894 } 1895 if (end > current->end && 1896 (current->next == &map->header || 1897 current->end != current->next->start)) { 1898 vm_map_unlock_read(map); 1899 return (KERN_INVALID_ADDRESS); 1900 } 1901 } 1902 1903 if (invalidate) 1904 pmap_remove(vm_map_pmap(map), start, end); 1905 /* 1906 * Make a second pass, cleaning/uncaching pages from the indicated 1907 * objects as we go. 1908 */ 1909 for (current = entry; current->start < end; current = current->next) { 1910 offset = current->offset + (start - current->start); 1911 size = (end <= current->end ? end : current->end) - start; 1912 if (current->eflags & MAP_ENTRY_IS_SUB_MAP) { 1913 vm_map_t smap; 1914 vm_map_entry_t tentry; 1915 vm_size_t tsize; 1916 1917 smap = current->object.sub_map; 1918 vm_map_lock_read(smap); 1919 (void) vm_map_lookup_entry(smap, offset, &tentry); 1920 tsize = tentry->end - offset; 1921 if (tsize < size) 1922 size = tsize; 1923 object = tentry->object.vm_object; 1924 offset = tentry->offset + (offset - tentry->start); 1925 vm_map_unlock_read(smap); 1926 } else { 1927 object = current->object.vm_object; 1928 } 1929 /* 1930 * Note that there is absolutely no sense in writing out 1931 * anonymous objects, so we track down the vnode object 1932 * to write out. 1933 * We invalidate (remove) all pages from the address space 1934 * anyway, for semantic correctness. 1935 * 1936 * note: certain anonymous maps, such as MAP_NOSYNC maps, 1937 * may start out with a NULL object. 1938 */ 1939 while (object && object->backing_object) { 1940 object = object->backing_object; 1941 offset += object->backing_object_offset; 1942 if (object->size < OFF_TO_IDX(offset + size)) 1943 size = IDX_TO_OFF(object->size) - offset; 1944 } 1945 if (object && (object->type == OBJT_VNODE) && 1946 (current->protection & VM_PROT_WRITE)) { 1947 /* 1948 * Flush pages if writing is allowed, invalidate them 1949 * if invalidation requested. Pages undergoing I/O 1950 * will be ignored by vm_object_page_remove(). 1951 * 1952 * We cannot lock the vnode and then wait for paging 1953 * to complete without deadlocking against vm_fault. 1954 * Instead we simply call vm_object_page_remove() and 1955 * allow it to block internally on a page-by-page 1956 * basis when it encounters pages undergoing async 1957 * I/O. 1958 */ 1959 int flags; 1960 1961 vm_object_reference(object); 1962 vn_lock(object->handle, LK_EXCLUSIVE | LK_RETRY, curthread); 1963 flags = (syncio || invalidate) ? OBJPC_SYNC : 0; 1964 flags |= invalidate ? OBJPC_INVAL : 0; 1965 vm_object_page_clean(object, 1966 OFF_TO_IDX(offset), 1967 OFF_TO_IDX(offset + size + PAGE_MASK), 1968 flags); 1969 if (invalidate) { 1970 /*vm_object_pip_wait(object, "objmcl");*/ 1971 vm_object_page_remove(object, 1972 OFF_TO_IDX(offset), 1973 OFF_TO_IDX(offset + size + PAGE_MASK), 1974 FALSE); 1975 } 1976 VOP_UNLOCK(object->handle, 0, curthread); 1977 vm_object_deallocate(object); 1978 } 1979 start += size; 1980 } 1981 1982 vm_map_unlock_read(map); 1983 return (KERN_SUCCESS); 1984} 1985 1986/* 1987 * vm_map_entry_unwire: [ internal use only ] 1988 * 1989 * Make the region specified by this entry pageable. 1990 * 1991 * The map in question should be locked. 1992 * [This is the reason for this routine's existence.] 1993 */ 1994static void 1995vm_map_entry_unwire(vm_map_t map, vm_map_entry_t entry) 1996{ 1997 vm_fault_unwire(map, entry->start, entry->end); 1998 entry->wired_count = 0; 1999} 2000 2001/* 2002 * vm_map_entry_delete: [ internal use only ] 2003 * 2004 * Deallocate the given entry from the target map. 2005 */ 2006static void 2007vm_map_entry_delete(vm_map_t map, vm_map_entry_t entry) 2008{ 2009 vm_map_entry_unlink(map, entry); 2010 map->size -= entry->end - entry->start; 2011 2012 if ((entry->eflags & MAP_ENTRY_IS_SUB_MAP) == 0) { 2013 vm_object_deallocate(entry->object.vm_object); 2014 } 2015 2016 vm_map_entry_dispose(map, entry); 2017} 2018 2019/* 2020 * vm_map_delete: [ internal use only ] 2021 * 2022 * Deallocates the given address range from the target 2023 * map. 2024 */ 2025int 2026vm_map_delete(vm_map_t map, vm_offset_t start, vm_offset_t end) 2027{ 2028 vm_object_t object; 2029 vm_map_entry_t entry; 2030 vm_map_entry_t first_entry; 2031 2032 GIANT_REQUIRED; 2033 2034 /* 2035 * Find the start of the region, and clip it 2036 */ 2037 if (!vm_map_lookup_entry(map, start, &first_entry)) 2038 entry = first_entry->next; 2039 else { 2040 entry = first_entry; 2041 vm_map_clip_start(map, entry, start); 2042 /* 2043 * Fix the lookup hint now, rather than each time though the 2044 * loop. 2045 */ 2046 SAVE_HINT(map, entry->prev); 2047 } 2048 2049 /* 2050 * Save the free space hint 2051 */ 2052 if (entry == &map->header) { 2053 map->first_free = &map->header; 2054 } else if (map->first_free->start >= start) { 2055 map->first_free = entry->prev; 2056 } 2057 2058 /* 2059 * Step through all entries in this region 2060 */ 2061 while ((entry != &map->header) && (entry->start < end)) { 2062 vm_map_entry_t next; 2063 vm_offset_t s, e; 2064 vm_pindex_t offidxstart, offidxend, count; 2065 2066 vm_map_clip_end(map, entry, end); 2067 2068 s = entry->start; 2069 e = entry->end; 2070 next = entry->next; 2071 2072 offidxstart = OFF_TO_IDX(entry->offset); 2073 count = OFF_TO_IDX(e - s); 2074 object = entry->object.vm_object; 2075 2076 /* 2077 * Unwire before removing addresses from the pmap; otherwise, 2078 * unwiring will put the entries back in the pmap. 2079 */ 2080 if (entry->wired_count != 0) { 2081 vm_map_entry_unwire(map, entry); 2082 } 2083 2084 offidxend = offidxstart + count; 2085 2086 if ((object == kernel_object) || (object == kmem_object)) { 2087 vm_object_page_remove(object, offidxstart, offidxend, FALSE); 2088 } else { 2089 pmap_remove(map->pmap, s, e); 2090 if (object != NULL && 2091 object->ref_count != 1 && 2092 (object->flags & (OBJ_NOSPLIT|OBJ_ONEMAPPING)) == OBJ_ONEMAPPING && 2093 (object->type == OBJT_DEFAULT || object->type == OBJT_SWAP)) { 2094 vm_object_collapse(object); 2095 vm_object_page_remove(object, offidxstart, offidxend, FALSE); 2096 if (object->type == OBJT_SWAP) { 2097 swap_pager_freespace(object, offidxstart, count); 2098 } 2099 if (offidxend >= object->size && 2100 offidxstart < object->size) { 2101 object->size = offidxstart; 2102 } 2103 } 2104 } 2105 2106 /* 2107 * Delete the entry (which may delete the object) only after 2108 * removing all pmap entries pointing to its pages. 2109 * (Otherwise, its page frames may be reallocated, and any 2110 * modify bits will be set in the wrong object!) 2111 */ 2112 vm_map_entry_delete(map, entry); 2113 entry = next; 2114 } 2115 return (KERN_SUCCESS); 2116} 2117 2118/* 2119 * vm_map_remove: 2120 * 2121 * Remove the given address range from the target map. 2122 * This is the exported form of vm_map_delete. 2123 */ 2124int 2125vm_map_remove(vm_map_t map, vm_offset_t start, vm_offset_t end) 2126{ 2127 int result, s = 0; 2128 2129 GIANT_REQUIRED; 2130 2131 if (map == kmem_map) 2132 s = splvm(); 2133 2134 vm_map_lock(map); 2135 VM_MAP_RANGE_CHECK(map, start, end); 2136 result = vm_map_delete(map, start, end); 2137 vm_map_unlock(map); 2138 2139 if (map == kmem_map) 2140 splx(s); 2141 2142 return (result); 2143} 2144 2145/* 2146 * vm_map_check_protection: 2147 * 2148 * Assert that the target map allows the specified 2149 * privilege on the entire address region given. 2150 * The entire region must be allocated. 2151 */ 2152boolean_t 2153vm_map_check_protection(vm_map_t map, vm_offset_t start, vm_offset_t end, 2154 vm_prot_t protection) 2155{ 2156 vm_map_entry_t entry; 2157 vm_map_entry_t tmp_entry; 2158 2159 GIANT_REQUIRED; 2160 2161 vm_map_lock_read(map); 2162 if (!vm_map_lookup_entry(map, start, &tmp_entry)) { 2163 vm_map_unlock_read(map); 2164 return (FALSE); 2165 } 2166 entry = tmp_entry; 2167 2168 while (start < end) { 2169 if (entry == &map->header) { 2170 vm_map_unlock_read(map); 2171 return (FALSE); 2172 } 2173 /* 2174 * No holes allowed! 2175 */ 2176 if (start < entry->start) { 2177 vm_map_unlock_read(map); 2178 return (FALSE); 2179 } 2180 /* 2181 * Check protection associated with entry. 2182 */ 2183 if ((entry->protection & protection) != protection) { 2184 vm_map_unlock_read(map); 2185 return (FALSE); 2186 } 2187 /* go to next entry */ 2188 start = entry->end; 2189 entry = entry->next; 2190 } 2191 vm_map_unlock_read(map); 2192 return (TRUE); 2193} 2194 2195/* 2196 * Split the pages in a map entry into a new object. This affords 2197 * easier removal of unused pages, and keeps object inheritance from 2198 * being a negative impact on memory usage. 2199 */ 2200static void 2201vm_map_split(vm_map_entry_t entry) 2202{ 2203 vm_page_t m; 2204 vm_object_t orig_object, new_object, source; 2205 vm_offset_t s, e; 2206 vm_pindex_t offidxstart, offidxend, idx; 2207 vm_size_t size; 2208 vm_ooffset_t offset; 2209 2210 GIANT_REQUIRED; 2211 2212 orig_object = entry->object.vm_object; 2213 if (orig_object->type != OBJT_DEFAULT && orig_object->type != OBJT_SWAP) 2214 return; 2215 if (orig_object->ref_count <= 1) 2216 return; 2217 2218 offset = entry->offset; 2219 s = entry->start; 2220 e = entry->end; 2221 2222 offidxstart = OFF_TO_IDX(offset); 2223 offidxend = offidxstart + OFF_TO_IDX(e - s); 2224 size = offidxend - offidxstart; 2225 2226 new_object = vm_pager_allocate(orig_object->type, 2227 NULL, IDX_TO_OFF(size), VM_PROT_ALL, 0LL); 2228 if (new_object == NULL) 2229 return; 2230 2231 source = orig_object->backing_object; 2232 if (source != NULL) { 2233 vm_object_reference(source); /* Referenced by new_object */ 2234 TAILQ_INSERT_TAIL(&source->shadow_head, 2235 new_object, shadow_list); 2236 vm_object_clear_flag(source, OBJ_ONEMAPPING); 2237 new_object->backing_object_offset = 2238 orig_object->backing_object_offset + IDX_TO_OFF(offidxstart); 2239 new_object->backing_object = source; 2240 source->shadow_count++; 2241 source->generation++; 2242 } 2243 2244 for (idx = 0; idx < size; idx++) { 2245 vm_page_t m; 2246 2247 retry: 2248 m = vm_page_lookup(orig_object, offidxstart + idx); 2249 if (m == NULL) 2250 continue; 2251 2252 /* 2253 * We must wait for pending I/O to complete before we can 2254 * rename the page. 2255 * 2256 * We do not have to VM_PROT_NONE the page as mappings should 2257 * not be changed by this operation. 2258 */ 2259 if (vm_page_sleep_busy(m, TRUE, "spltwt")) 2260 goto retry; 2261 2262 vm_page_busy(m); 2263 vm_page_rename(m, new_object, idx); 2264 /* page automatically made dirty by rename and cache handled */ 2265 vm_page_busy(m); 2266 } 2267 2268 if (orig_object->type == OBJT_SWAP) { 2269 vm_object_pip_add(orig_object, 1); 2270 /* 2271 * copy orig_object pages into new_object 2272 * and destroy unneeded pages in 2273 * shadow object. 2274 */ 2275 swap_pager_copy(orig_object, new_object, offidxstart, 0); 2276 vm_object_pip_wakeup(orig_object); 2277 } 2278 2279 for (idx = 0; idx < size; idx++) { 2280 m = vm_page_lookup(new_object, idx); 2281 if (m) { 2282 vm_page_wakeup(m); 2283 } 2284 } 2285 2286 entry->object.vm_object = new_object; 2287 entry->offset = 0LL; 2288 vm_object_deallocate(orig_object); 2289} 2290 2291/* 2292 * vm_map_copy_entry: 2293 * 2294 * Copies the contents of the source entry to the destination 2295 * entry. The entries *must* be aligned properly. 2296 */ 2297static void 2298vm_map_copy_entry( 2299 vm_map_t src_map, 2300 vm_map_t dst_map, 2301 vm_map_entry_t src_entry, 2302 vm_map_entry_t dst_entry) 2303{ 2304 vm_object_t src_object; 2305 2306 if ((dst_entry->eflags|src_entry->eflags) & MAP_ENTRY_IS_SUB_MAP) 2307 return; 2308 2309 if (src_entry->wired_count == 0) { 2310 2311 /* 2312 * If the source entry is marked needs_copy, it is already 2313 * write-protected. 2314 */ 2315 if ((src_entry->eflags & MAP_ENTRY_NEEDS_COPY) == 0) { 2316 pmap_protect(src_map->pmap, 2317 src_entry->start, 2318 src_entry->end, 2319 src_entry->protection & ~VM_PROT_WRITE); 2320 } 2321 2322 /* 2323 * Make a copy of the object. 2324 */ 2325 if ((src_object = src_entry->object.vm_object) != NULL) { 2326 2327 if ((src_object->handle == NULL) && 2328 (src_object->type == OBJT_DEFAULT || 2329 src_object->type == OBJT_SWAP)) { 2330 vm_object_collapse(src_object); 2331 if ((src_object->flags & (OBJ_NOSPLIT|OBJ_ONEMAPPING)) == OBJ_ONEMAPPING) { 2332 vm_map_split(src_entry); 2333 src_object = src_entry->object.vm_object; 2334 } 2335 } 2336 2337 vm_object_reference(src_object); 2338 vm_object_clear_flag(src_object, OBJ_ONEMAPPING); 2339 dst_entry->object.vm_object = src_object; 2340 src_entry->eflags |= (MAP_ENTRY_COW|MAP_ENTRY_NEEDS_COPY); 2341 dst_entry->eflags |= (MAP_ENTRY_COW|MAP_ENTRY_NEEDS_COPY); 2342 dst_entry->offset = src_entry->offset; 2343 } else { 2344 dst_entry->object.vm_object = NULL; 2345 dst_entry->offset = 0; 2346 } 2347 2348 pmap_copy(dst_map->pmap, src_map->pmap, dst_entry->start, 2349 dst_entry->end - dst_entry->start, src_entry->start); 2350 } else { 2351 /* 2352 * Of course, wired down pages can't be set copy-on-write. 2353 * Cause wired pages to be copied into the new map by 2354 * simulating faults (the new pages are pageable) 2355 */ 2356 vm_fault_copy_entry(dst_map, src_map, dst_entry, src_entry); 2357 } 2358} 2359 2360/* 2361 * vmspace_fork: 2362 * Create a new process vmspace structure and vm_map 2363 * based on those of an existing process. The new map 2364 * is based on the old map, according to the inheritance 2365 * values on the regions in that map. 2366 * 2367 * The source map must not be locked. 2368 */ 2369struct vmspace * 2370vmspace_fork(struct vmspace *vm1) 2371{ 2372 struct vmspace *vm2; 2373 vm_map_t old_map = &vm1->vm_map; 2374 vm_map_t new_map; 2375 vm_map_entry_t old_entry; 2376 vm_map_entry_t new_entry; 2377 vm_object_t object; 2378 2379 GIANT_REQUIRED; 2380 2381 vm_map_lock(old_map); 2382 old_map->infork = 1; 2383 2384 vm2 = vmspace_alloc(old_map->min_offset, old_map->max_offset); 2385 bcopy(&vm1->vm_startcopy, &vm2->vm_startcopy, 2386 (caddr_t) &vm1->vm_endcopy - (caddr_t) &vm1->vm_startcopy); 2387 new_map = &vm2->vm_map; /* XXX */ 2388 new_map->timestamp = 1; 2389 2390 old_entry = old_map->header.next; 2391 2392 while (old_entry != &old_map->header) { 2393 if (old_entry->eflags & MAP_ENTRY_IS_SUB_MAP) 2394 panic("vm_map_fork: encountered a submap"); 2395 2396 switch (old_entry->inheritance) { 2397 case VM_INHERIT_NONE: 2398 break; 2399 2400 case VM_INHERIT_SHARE: 2401 /* 2402 * Clone the entry, creating the shared object if necessary. 2403 */ 2404 object = old_entry->object.vm_object; 2405 if (object == NULL) { 2406 object = vm_object_allocate(OBJT_DEFAULT, 2407 atop(old_entry->end - old_entry->start)); 2408 old_entry->object.vm_object = object; 2409 old_entry->offset = (vm_offset_t) 0; 2410 } 2411 2412 /* 2413 * Add the reference before calling vm_object_shadow 2414 * to insure that a shadow object is created. 2415 */ 2416 vm_object_reference(object); 2417 if (old_entry->eflags & MAP_ENTRY_NEEDS_COPY) { 2418 vm_object_shadow(&old_entry->object.vm_object, 2419 &old_entry->offset, 2420 atop(old_entry->end - old_entry->start)); 2421 old_entry->eflags &= ~MAP_ENTRY_NEEDS_COPY; 2422 /* Transfer the second reference too. */ 2423 vm_object_reference( 2424 old_entry->object.vm_object); 2425 vm_object_deallocate(object); 2426 object = old_entry->object.vm_object; 2427 } 2428 vm_object_clear_flag(object, OBJ_ONEMAPPING); 2429 2430 /* 2431 * Clone the entry, referencing the shared object. 2432 */ 2433 new_entry = vm_map_entry_create(new_map); 2434 *new_entry = *old_entry; 2435 new_entry->eflags &= ~MAP_ENTRY_USER_WIRED; 2436 new_entry->wired_count = 0; 2437 2438 /* 2439 * Insert the entry into the new map -- we know we're 2440 * inserting at the end of the new map. 2441 */ 2442 vm_map_entry_link(new_map, new_map->header.prev, 2443 new_entry); 2444 2445 /* 2446 * Update the physical map 2447 */ 2448 pmap_copy(new_map->pmap, old_map->pmap, 2449 new_entry->start, 2450 (old_entry->end - old_entry->start), 2451 old_entry->start); 2452 break; 2453 2454 case VM_INHERIT_COPY: 2455 /* 2456 * Clone the entry and link into the map. 2457 */ 2458 new_entry = vm_map_entry_create(new_map); 2459 *new_entry = *old_entry; 2460 new_entry->eflags &= ~MAP_ENTRY_USER_WIRED; 2461 new_entry->wired_count = 0; 2462 new_entry->object.vm_object = NULL; 2463 vm_map_entry_link(new_map, new_map->header.prev, 2464 new_entry); 2465 vm_map_copy_entry(old_map, new_map, old_entry, 2466 new_entry); 2467 break; 2468 } 2469 old_entry = old_entry->next; 2470 } 2471 2472 new_map->size = old_map->size; 2473 old_map->infork = 0; 2474 vm_map_unlock(old_map); 2475 2476 return (vm2); 2477} 2478 2479int 2480vm_map_stack (vm_map_t map, vm_offset_t addrbos, vm_size_t max_ssize, 2481 vm_prot_t prot, vm_prot_t max, int cow) 2482{ 2483 vm_map_entry_t prev_entry; 2484 vm_map_entry_t new_stack_entry; 2485 vm_size_t init_ssize; 2486 int rv; 2487 2488 GIANT_REQUIRED; 2489 2490 if (VM_MIN_ADDRESS > 0 && addrbos < VM_MIN_ADDRESS) 2491 return (KERN_NO_SPACE); 2492 2493 if (max_ssize < sgrowsiz) 2494 init_ssize = max_ssize; 2495 else 2496 init_ssize = sgrowsiz; 2497 2498 vm_map_lock(map); 2499 2500 /* If addr is already mapped, no go */ 2501 if (vm_map_lookup_entry(map, addrbos, &prev_entry)) { 2502 vm_map_unlock(map); 2503 return (KERN_NO_SPACE); 2504 } 2505 2506 /* If we can't accomodate max_ssize in the current mapping, 2507 * no go. However, we need to be aware that subsequent user 2508 * mappings might map into the space we have reserved for 2509 * stack, and currently this space is not protected. 2510 * 2511 * Hopefully we will at least detect this condition 2512 * when we try to grow the stack. 2513 */ 2514 if ((prev_entry->next != &map->header) && 2515 (prev_entry->next->start < addrbos + max_ssize)) { 2516 vm_map_unlock(map); 2517 return (KERN_NO_SPACE); 2518 } 2519 2520 /* We initially map a stack of only init_ssize. We will 2521 * grow as needed later. Since this is to be a grow 2522 * down stack, we map at the top of the range. 2523 * 2524 * Note: we would normally expect prot and max to be 2525 * VM_PROT_ALL, and cow to be 0. Possibly we should 2526 * eliminate these as input parameters, and just 2527 * pass these values here in the insert call. 2528 */ 2529 rv = vm_map_insert(map, NULL, 0, addrbos + max_ssize - init_ssize, 2530 addrbos + max_ssize, prot, max, cow); 2531 2532 /* Now set the avail_ssize amount */ 2533 if (rv == KERN_SUCCESS){ 2534 if (prev_entry != &map->header) 2535 vm_map_clip_end(map, prev_entry, addrbos + max_ssize - init_ssize); 2536 new_stack_entry = prev_entry->next; 2537 if (new_stack_entry->end != addrbos + max_ssize || 2538 new_stack_entry->start != addrbos + max_ssize - init_ssize) 2539 panic ("Bad entry start/end for new stack entry"); 2540 else 2541 new_stack_entry->avail_ssize = max_ssize - init_ssize; 2542 } 2543 2544 vm_map_unlock(map); 2545 return (rv); 2546} 2547 2548/* Attempts to grow a vm stack entry. Returns KERN_SUCCESS if the 2549 * desired address is already mapped, or if we successfully grow 2550 * the stack. Also returns KERN_SUCCESS if addr is outside the 2551 * stack range (this is strange, but preserves compatibility with 2552 * the grow function in vm_machdep.c). 2553 */ 2554int 2555vm_map_growstack (struct proc *p, vm_offset_t addr) 2556{ 2557 vm_map_entry_t prev_entry; 2558 vm_map_entry_t stack_entry; 2559 vm_map_entry_t new_stack_entry; 2560 struct vmspace *vm = p->p_vmspace; 2561 vm_map_t map = &vm->vm_map; 2562 vm_offset_t end; 2563 int grow_amount; 2564 int rv; 2565 int is_procstack; 2566 2567 GIANT_REQUIRED; 2568 2569Retry: 2570 vm_map_lock_read(map); 2571 2572 /* If addr is already in the entry range, no need to grow.*/ 2573 if (vm_map_lookup_entry(map, addr, &prev_entry)) { 2574 vm_map_unlock_read(map); 2575 return (KERN_SUCCESS); 2576 } 2577 2578 if ((stack_entry = prev_entry->next) == &map->header) { 2579 vm_map_unlock_read(map); 2580 return (KERN_SUCCESS); 2581 } 2582 if (prev_entry == &map->header) 2583 end = stack_entry->start - stack_entry->avail_ssize; 2584 else 2585 end = prev_entry->end; 2586 2587 /* This next test mimics the old grow function in vm_machdep.c. 2588 * It really doesn't quite make sense, but we do it anyway 2589 * for compatibility. 2590 * 2591 * If not growable stack, return success. This signals the 2592 * caller to proceed as he would normally with normal vm. 2593 */ 2594 if (stack_entry->avail_ssize < 1 || 2595 addr >= stack_entry->start || 2596 addr < stack_entry->start - stack_entry->avail_ssize) { 2597 vm_map_unlock_read(map); 2598 return (KERN_SUCCESS); 2599 } 2600 2601 /* Find the minimum grow amount */ 2602 grow_amount = roundup (stack_entry->start - addr, PAGE_SIZE); 2603 if (grow_amount > stack_entry->avail_ssize) { 2604 vm_map_unlock_read(map); 2605 return (KERN_NO_SPACE); 2606 } 2607 2608 /* If there is no longer enough space between the entries 2609 * nogo, and adjust the available space. Note: this 2610 * should only happen if the user has mapped into the 2611 * stack area after the stack was created, and is 2612 * probably an error. 2613 * 2614 * This also effectively destroys any guard page the user 2615 * might have intended by limiting the stack size. 2616 */ 2617 if (grow_amount > stack_entry->start - end) { 2618 if (vm_map_lock_upgrade(map)) 2619 goto Retry; 2620 2621 stack_entry->avail_ssize = stack_entry->start - end; 2622 2623 vm_map_unlock(map); 2624 return (KERN_NO_SPACE); 2625 } 2626 2627 is_procstack = addr >= (vm_offset_t)vm->vm_maxsaddr; 2628 2629 /* If this is the main process stack, see if we're over the 2630 * stack limit. 2631 */ 2632 if (is_procstack && (ctob(vm->vm_ssize) + grow_amount > 2633 p->p_rlimit[RLIMIT_STACK].rlim_cur)) { 2634 vm_map_unlock_read(map); 2635 return (KERN_NO_SPACE); 2636 } 2637 2638 /* Round up the grow amount modulo SGROWSIZ */ 2639 grow_amount = roundup (grow_amount, sgrowsiz); 2640 if (grow_amount > stack_entry->avail_ssize) { 2641 grow_amount = stack_entry->avail_ssize; 2642 } 2643 if (is_procstack && (ctob(vm->vm_ssize) + grow_amount > 2644 p->p_rlimit[RLIMIT_STACK].rlim_cur)) { 2645 grow_amount = p->p_rlimit[RLIMIT_STACK].rlim_cur - 2646 ctob(vm->vm_ssize); 2647 } 2648 2649 if (vm_map_lock_upgrade(map)) 2650 goto Retry; 2651 2652 /* Get the preliminary new entry start value */ 2653 addr = stack_entry->start - grow_amount; 2654 2655 /* If this puts us into the previous entry, cut back our growth 2656 * to the available space. Also, see the note above. 2657 */ 2658 if (addr < end) { 2659 stack_entry->avail_ssize = stack_entry->start - end; 2660 addr = end; 2661 } 2662 2663 rv = vm_map_insert(map, NULL, 0, addr, stack_entry->start, 2664 VM_PROT_ALL, 2665 VM_PROT_ALL, 2666 0); 2667 2668 /* Adjust the available stack space by the amount we grew. */ 2669 if (rv == KERN_SUCCESS) { 2670 if (prev_entry != &map->header) 2671 vm_map_clip_end(map, prev_entry, addr); 2672 new_stack_entry = prev_entry->next; 2673 if (new_stack_entry->end != stack_entry->start || 2674 new_stack_entry->start != addr) 2675 panic ("Bad stack grow start/end in new stack entry"); 2676 else { 2677 new_stack_entry->avail_ssize = stack_entry->avail_ssize - 2678 (new_stack_entry->end - 2679 new_stack_entry->start); 2680 if (is_procstack) 2681 vm->vm_ssize += btoc(new_stack_entry->end - 2682 new_stack_entry->start); 2683 } 2684 } 2685 2686 vm_map_unlock(map); 2687 return (rv); 2688} 2689 2690/* 2691 * Unshare the specified VM space for exec. If other processes are 2692 * mapped to it, then create a new one. The new vmspace is null. 2693 */ 2694void 2695vmspace_exec(struct proc *p) 2696{ 2697 struct vmspace *oldvmspace = p->p_vmspace; 2698 struct vmspace *newvmspace; 2699 vm_map_t map = &p->p_vmspace->vm_map; 2700 2701 GIANT_REQUIRED; 2702 newvmspace = vmspace_alloc(map->min_offset, map->max_offset); 2703 bcopy(&oldvmspace->vm_startcopy, &newvmspace->vm_startcopy, 2704 (caddr_t) (newvmspace + 1) - (caddr_t) &newvmspace->vm_startcopy); 2705 /* 2706 * This code is written like this for prototype purposes. The 2707 * goal is to avoid running down the vmspace here, but let the 2708 * other process's that are still using the vmspace to finally 2709 * run it down. Even though there is little or no chance of blocking 2710 * here, it is a good idea to keep this form for future mods. 2711 */ 2712 p->p_vmspace = newvmspace; 2713 pmap_pinit2(vmspace_pmap(newvmspace)); 2714 vmspace_free(oldvmspace); 2715 if (p == curthread->td_proc) /* XXXKSE ? */ 2716 pmap_activate(curthread); 2717} 2718 2719/* 2720 * Unshare the specified VM space for forcing COW. This 2721 * is called by rfork, for the (RFMEM|RFPROC) == 0 case. 2722 */ 2723void 2724vmspace_unshare(struct proc *p) 2725{ 2726 struct vmspace *oldvmspace = p->p_vmspace; 2727 struct vmspace *newvmspace; 2728 2729 GIANT_REQUIRED; 2730 if (oldvmspace->vm_refcnt == 1) 2731 return; 2732 newvmspace = vmspace_fork(oldvmspace); 2733 p->p_vmspace = newvmspace; 2734 pmap_pinit2(vmspace_pmap(newvmspace)); 2735 vmspace_free(oldvmspace); 2736 if (p == curthread->td_proc) /* XXXKSE ? */ 2737 pmap_activate(curthread); 2738} 2739 2740/* 2741 * vm_map_lookup: 2742 * 2743 * Finds the VM object, offset, and 2744 * protection for a given virtual address in the 2745 * specified map, assuming a page fault of the 2746 * type specified. 2747 * 2748 * Leaves the map in question locked for read; return 2749 * values are guaranteed until a vm_map_lookup_done 2750 * call is performed. Note that the map argument 2751 * is in/out; the returned map must be used in 2752 * the call to vm_map_lookup_done. 2753 * 2754 * A handle (out_entry) is returned for use in 2755 * vm_map_lookup_done, to make that fast. 2756 * 2757 * If a lookup is requested with "write protection" 2758 * specified, the map may be changed to perform virtual 2759 * copying operations, although the data referenced will 2760 * remain the same. 2761 */ 2762int 2763vm_map_lookup(vm_map_t *var_map, /* IN/OUT */ 2764 vm_offset_t vaddr, 2765 vm_prot_t fault_typea, 2766 vm_map_entry_t *out_entry, /* OUT */ 2767 vm_object_t *object, /* OUT */ 2768 vm_pindex_t *pindex, /* OUT */ 2769 vm_prot_t *out_prot, /* OUT */ 2770 boolean_t *wired) /* OUT */ 2771{ 2772 vm_map_entry_t entry; 2773 vm_map_t map = *var_map; 2774 vm_prot_t prot; 2775 vm_prot_t fault_type = fault_typea; 2776 2777 GIANT_REQUIRED; 2778RetryLookup:; 2779 /* 2780 * Lookup the faulting address. 2781 */ 2782 2783 vm_map_lock_read(map); 2784#define RETURN(why) \ 2785 { \ 2786 vm_map_unlock_read(map); \ 2787 return (why); \ 2788 } 2789 2790 /* 2791 * If the map has an interesting hint, try it before calling full 2792 * blown lookup routine. 2793 */ 2794 entry = map->hint; 2795 *out_entry = entry; 2796 if ((entry == &map->header) || 2797 (vaddr < entry->start) || (vaddr >= entry->end)) { 2798 vm_map_entry_t tmp_entry; 2799 2800 /* 2801 * Entry was either not a valid hint, or the vaddr was not 2802 * contained in the entry, so do a full lookup. 2803 */ 2804 if (!vm_map_lookup_entry(map, vaddr, &tmp_entry)) 2805 RETURN(KERN_INVALID_ADDRESS); 2806 2807 entry = tmp_entry; 2808 *out_entry = entry; 2809 } 2810 2811 /* 2812 * Handle submaps. 2813 */ 2814 if (entry->eflags & MAP_ENTRY_IS_SUB_MAP) { 2815 vm_map_t old_map = map; 2816 2817 *var_map = map = entry->object.sub_map; 2818 vm_map_unlock_read(old_map); 2819 goto RetryLookup; 2820 } 2821 2822 /* 2823 * Check whether this task is allowed to have this page. 2824 * Note the special case for MAP_ENTRY_COW 2825 * pages with an override. This is to implement a forced 2826 * COW for debuggers. 2827 */ 2828 if (fault_type & VM_PROT_OVERRIDE_WRITE) 2829 prot = entry->max_protection; 2830 else 2831 prot = entry->protection; 2832 fault_type &= (VM_PROT_READ|VM_PROT_WRITE|VM_PROT_EXECUTE); 2833 if ((fault_type & prot) != fault_type) { 2834 RETURN(KERN_PROTECTION_FAILURE); 2835 } 2836 if ((entry->eflags & MAP_ENTRY_USER_WIRED) && 2837 (entry->eflags & MAP_ENTRY_COW) && 2838 (fault_type & VM_PROT_WRITE) && 2839 (fault_typea & VM_PROT_OVERRIDE_WRITE) == 0) { 2840 RETURN(KERN_PROTECTION_FAILURE); 2841 } 2842 2843 /* 2844 * If this page is not pageable, we have to get it for all possible 2845 * accesses. 2846 */ 2847 *wired = (entry->wired_count != 0); 2848 if (*wired) 2849 prot = fault_type = entry->protection; 2850 2851 /* 2852 * If the entry was copy-on-write, we either ... 2853 */ 2854 if (entry->eflags & MAP_ENTRY_NEEDS_COPY) { 2855 /* 2856 * If we want to write the page, we may as well handle that 2857 * now since we've got the map locked. 2858 * 2859 * If we don't need to write the page, we just demote the 2860 * permissions allowed. 2861 */ 2862 if (fault_type & VM_PROT_WRITE) { 2863 /* 2864 * Make a new object, and place it in the object 2865 * chain. Note that no new references have appeared 2866 * -- one just moved from the map to the new 2867 * object. 2868 */ 2869 if (vm_map_lock_upgrade(map)) 2870 goto RetryLookup; 2871 vm_object_shadow( 2872 &entry->object.vm_object, 2873 &entry->offset, 2874 atop(entry->end - entry->start)); 2875 entry->eflags &= ~MAP_ENTRY_NEEDS_COPY; 2876 vm_map_lock_downgrade(map); 2877 } else { 2878 /* 2879 * We're attempting to read a copy-on-write page -- 2880 * don't allow writes. 2881 */ 2882 prot &= ~VM_PROT_WRITE; 2883 } 2884 } 2885 2886 /* 2887 * Create an object if necessary. 2888 */ 2889 if (entry->object.vm_object == NULL && 2890 !map->system_map) { 2891 if (vm_map_lock_upgrade(map)) 2892 goto RetryLookup; 2893 entry->object.vm_object = vm_object_allocate(OBJT_DEFAULT, 2894 atop(entry->end - entry->start)); 2895 entry->offset = 0; 2896 vm_map_lock_downgrade(map); 2897 } 2898 2899 /* 2900 * Return the object/offset from this entry. If the entry was 2901 * copy-on-write or empty, it has been fixed up. 2902 */ 2903 *pindex = OFF_TO_IDX((vaddr - entry->start) + entry->offset); 2904 *object = entry->object.vm_object; 2905 2906 /* 2907 * Return whether this is the only map sharing this data. 2908 */ 2909 *out_prot = prot; 2910 return (KERN_SUCCESS); 2911 2912#undef RETURN 2913} 2914 2915/* 2916 * vm_map_lookup_done: 2917 * 2918 * Releases locks acquired by a vm_map_lookup 2919 * (according to the handle returned by that lookup). 2920 */ 2921void 2922vm_map_lookup_done(vm_map_t map, vm_map_entry_t entry) 2923{ 2924 /* 2925 * Unlock the main-level map 2926 */ 2927 GIANT_REQUIRED; 2928 vm_map_unlock_read(map); 2929} 2930 2931/* 2932 * Implement uiomove with VM operations. This handles (and collateral changes) 2933 * support every combination of source object modification, and COW type 2934 * operations. 2935 */ 2936int 2937vm_uiomove( 2938 vm_map_t mapa, 2939 vm_object_t srcobject, 2940 off_t cp, 2941 int cnta, 2942 vm_offset_t uaddra, 2943 int *npages) 2944{ 2945 vm_map_t map; 2946 vm_object_t first_object, oldobject, object; 2947 vm_map_entry_t entry; 2948 vm_prot_t prot; 2949 boolean_t wired; 2950 int tcnt, rv; 2951 vm_offset_t uaddr, start, end, tend; 2952 vm_pindex_t first_pindex, osize, oindex; 2953 off_t ooffset; 2954 int cnt; 2955 2956 GIANT_REQUIRED; 2957 2958 if (npages) 2959 *npages = 0; 2960 2961 cnt = cnta; 2962 uaddr = uaddra; 2963 2964 while (cnt > 0) { 2965 map = mapa; 2966 2967 if ((vm_map_lookup(&map, uaddr, 2968 VM_PROT_READ, &entry, &first_object, 2969 &first_pindex, &prot, &wired)) != KERN_SUCCESS) { 2970 return EFAULT; 2971 } 2972 2973 vm_map_clip_start(map, entry, uaddr); 2974 2975 tcnt = cnt; 2976 tend = uaddr + tcnt; 2977 if (tend > entry->end) { 2978 tcnt = entry->end - uaddr; 2979 tend = entry->end; 2980 } 2981 2982 vm_map_clip_end(map, entry, tend); 2983 2984 start = entry->start; 2985 end = entry->end; 2986 2987 osize = atop(tcnt); 2988 2989 oindex = OFF_TO_IDX(cp); 2990 if (npages) { 2991 vm_pindex_t idx; 2992 for (idx = 0; idx < osize; idx++) { 2993 vm_page_t m; 2994 if ((m = vm_page_lookup(srcobject, oindex + idx)) == NULL) { 2995 vm_map_lookup_done(map, entry); 2996 return 0; 2997 } 2998 /* 2999 * disallow busy or invalid pages, but allow 3000 * m->busy pages if they are entirely valid. 3001 */ 3002 if ((m->flags & PG_BUSY) || 3003 ((m->valid & VM_PAGE_BITS_ALL) != VM_PAGE_BITS_ALL)) { 3004 vm_map_lookup_done(map, entry); 3005 return 0; 3006 } 3007 } 3008 } 3009 3010/* 3011 * If we are changing an existing map entry, just redirect 3012 * the object, and change mappings. 3013 */ 3014 if ((first_object->type == OBJT_VNODE) && 3015 ((oldobject = entry->object.vm_object) == first_object)) { 3016 3017 if ((entry->offset != cp) || (oldobject != srcobject)) { 3018 /* 3019 * Remove old window into the file 3020 */ 3021 pmap_remove (map->pmap, uaddr, tend); 3022 3023 /* 3024 * Force copy on write for mmaped regions 3025 */ 3026 vm_object_pmap_copy_1 (srcobject, oindex, oindex + osize); 3027 3028 /* 3029 * Point the object appropriately 3030 */ 3031 if (oldobject != srcobject) { 3032 3033 /* 3034 * Set the object optimization hint flag 3035 */ 3036 vm_object_set_flag(srcobject, OBJ_OPT); 3037 vm_object_reference(srcobject); 3038 entry->object.vm_object = srcobject; 3039 3040 if (oldobject) { 3041 vm_object_deallocate(oldobject); 3042 } 3043 } 3044 3045 entry->offset = cp; 3046 map->timestamp++; 3047 } else { 3048 pmap_remove (map->pmap, uaddr, tend); 3049 } 3050 3051 } else if ((first_object->ref_count == 1) && 3052 (first_object->size == osize) && 3053 ((first_object->type == OBJT_DEFAULT) || 3054 (first_object->type == OBJT_SWAP)) ) { 3055 3056 oldobject = first_object->backing_object; 3057 3058 if ((first_object->backing_object_offset != cp) || 3059 (oldobject != srcobject)) { 3060 /* 3061 * Remove old window into the file 3062 */ 3063 pmap_remove (map->pmap, uaddr, tend); 3064 3065 /* 3066 * Remove unneeded old pages 3067 */ 3068 vm_object_page_remove(first_object, 0, 0, 0); 3069 3070 /* 3071 * Invalidate swap space 3072 */ 3073 if (first_object->type == OBJT_SWAP) { 3074 swap_pager_freespace(first_object, 3075 0, 3076 first_object->size); 3077 } 3078 3079 /* 3080 * Force copy on write for mmaped regions 3081 */ 3082 vm_object_pmap_copy_1 (srcobject, oindex, oindex + osize); 3083 3084 /* 3085 * Point the object appropriately 3086 */ 3087 if (oldobject != srcobject) { 3088 /* 3089 * Set the object optimization hint flag 3090 */ 3091 vm_object_set_flag(srcobject, OBJ_OPT); 3092 vm_object_reference(srcobject); 3093 3094 if (oldobject) { 3095 TAILQ_REMOVE(&oldobject->shadow_head, 3096 first_object, shadow_list); 3097 oldobject->shadow_count--; 3098 /* XXX bump generation? */ 3099 vm_object_deallocate(oldobject); 3100 } 3101 3102 TAILQ_INSERT_TAIL(&srcobject->shadow_head, 3103 first_object, shadow_list); 3104 srcobject->shadow_count++; 3105 /* XXX bump generation? */ 3106 3107 first_object->backing_object = srcobject; 3108 } 3109 first_object->backing_object_offset = cp; 3110 map->timestamp++; 3111 } else { 3112 pmap_remove (map->pmap, uaddr, tend); 3113 } 3114/* 3115 * Otherwise, we have to do a logical mmap. 3116 */ 3117 } else { 3118 3119 vm_object_set_flag(srcobject, OBJ_OPT); 3120 vm_object_reference(srcobject); 3121 3122 pmap_remove (map->pmap, uaddr, tend); 3123 3124 vm_object_pmap_copy_1 (srcobject, oindex, oindex + osize); 3125 vm_map_lock_upgrade(map); 3126 3127 if (entry == &map->header) { 3128 map->first_free = &map->header; 3129 } else if (map->first_free->start >= start) { 3130 map->first_free = entry->prev; 3131 } 3132 3133 SAVE_HINT(map, entry->prev); 3134 vm_map_entry_delete(map, entry); 3135 3136 object = srcobject; 3137 ooffset = cp; 3138 3139 rv = vm_map_insert(map, object, ooffset, start, tend, 3140 VM_PROT_ALL, VM_PROT_ALL, MAP_COPY_ON_WRITE); 3141 3142 if (rv != KERN_SUCCESS) 3143 panic("vm_uiomove: could not insert new entry: %d", rv); 3144 } 3145 3146/* 3147 * Map the window directly, if it is already in memory 3148 */ 3149 pmap_object_init_pt(map->pmap, uaddr, 3150 srcobject, oindex, tcnt, 0); 3151 3152 map->timestamp++; 3153 vm_map_unlock(map); 3154 3155 cnt -= tcnt; 3156 uaddr += tcnt; 3157 cp += tcnt; 3158 if (npages) 3159 *npages += osize; 3160 } 3161 return 0; 3162} 3163 3164/* 3165 * Performs the copy_on_write operations necessary to allow the virtual copies 3166 * into user space to work. This has to be called for write(2) system calls 3167 * from other processes, file unlinking, and file size shrinkage. 3168 */ 3169void 3170vm_freeze_copyopts(vm_object_t object, vm_pindex_t froma, vm_pindex_t toa) 3171{ 3172 int rv; 3173 vm_object_t robject; 3174 vm_pindex_t idx; 3175 3176 GIANT_REQUIRED; 3177 if ((object == NULL) || 3178 ((object->flags & OBJ_OPT) == 0)) 3179 return; 3180 3181 if (object->shadow_count > object->ref_count) 3182 panic("vm_freeze_copyopts: sc > rc"); 3183 3184 while ((robject = TAILQ_FIRST(&object->shadow_head)) != NULL) { 3185 vm_pindex_t bo_pindex; 3186 vm_page_t m_in, m_out; 3187 3188 bo_pindex = OFF_TO_IDX(robject->backing_object_offset); 3189 3190 vm_object_reference(robject); 3191 3192 vm_object_pip_wait(robject, "objfrz"); 3193 3194 if (robject->ref_count == 1) { 3195 vm_object_deallocate(robject); 3196 continue; 3197 } 3198 3199 vm_object_pip_add(robject, 1); 3200 3201 for (idx = 0; idx < robject->size; idx++) { 3202 3203 m_out = vm_page_grab(robject, idx, 3204 VM_ALLOC_NORMAL | VM_ALLOC_RETRY); 3205 3206 if (m_out->valid == 0) { 3207 m_in = vm_page_grab(object, bo_pindex + idx, 3208 VM_ALLOC_NORMAL | VM_ALLOC_RETRY); 3209 if (m_in->valid == 0) { 3210 rv = vm_pager_get_pages(object, &m_in, 1, 0); 3211 if (rv != VM_PAGER_OK) { 3212 printf("vm_freeze_copyopts: cannot read page from file: %lx\n", (long)m_in->pindex); 3213 continue; 3214 } 3215 vm_page_deactivate(m_in); 3216 } 3217 3218 vm_page_protect(m_in, VM_PROT_NONE); 3219 pmap_copy_page(m_in, m_out); 3220 m_out->valid = m_in->valid; 3221 vm_page_dirty(m_out); 3222 vm_page_activate(m_out); 3223 vm_page_wakeup(m_in); 3224 } 3225 vm_page_wakeup(m_out); 3226 } 3227 3228 object->shadow_count--; 3229 object->ref_count--; 3230 TAILQ_REMOVE(&object->shadow_head, robject, shadow_list); 3231 robject->backing_object = NULL; 3232 robject->backing_object_offset = 0; 3233 3234 vm_object_pip_wakeup(robject); 3235 vm_object_deallocate(robject); 3236 } 3237 3238 vm_object_clear_flag(object, OBJ_OPT); 3239} 3240 3241#include "opt_ddb.h" 3242#ifdef DDB 3243#include <sys/kernel.h> 3244 3245#include <ddb/ddb.h> 3246 3247/* 3248 * vm_map_print: [ debug ] 3249 */ 3250DB_SHOW_COMMAND(map, vm_map_print) 3251{ 3252 static int nlines; 3253 /* XXX convert args. */ 3254 vm_map_t map = (vm_map_t)addr; 3255 boolean_t full = have_addr; 3256 3257 vm_map_entry_t entry; 3258 3259 db_iprintf("Task map %p: pmap=%p, nentries=%d, version=%u\n", 3260 (void *)map, 3261 (void *)map->pmap, map->nentries, map->timestamp); 3262 nlines++; 3263 3264 if (!full && db_indent) 3265 return; 3266 3267 db_indent += 2; 3268 for (entry = map->header.next; entry != &map->header; 3269 entry = entry->next) { 3270 db_iprintf("map entry %p: start=%p, end=%p\n", 3271 (void *)entry, (void *)entry->start, (void *)entry->end); 3272 nlines++; 3273 { 3274 static char *inheritance_name[4] = 3275 {"share", "copy", "none", "donate_copy"}; 3276 3277 db_iprintf(" prot=%x/%x/%s", 3278 entry->protection, 3279 entry->max_protection, 3280 inheritance_name[(int)(unsigned char)entry->inheritance]); 3281 if (entry->wired_count != 0) 3282 db_printf(", wired"); 3283 } 3284 if (entry->eflags & MAP_ENTRY_IS_SUB_MAP) { 3285 /* XXX no %qd in kernel. Truncate entry->offset. */ 3286 db_printf(", share=%p, offset=0x%lx\n", 3287 (void *)entry->object.sub_map, 3288 (long)entry->offset); 3289 nlines++; 3290 if ((entry->prev == &map->header) || 3291 (entry->prev->object.sub_map != 3292 entry->object.sub_map)) { 3293 db_indent += 2; 3294 vm_map_print((db_expr_t)(intptr_t) 3295 entry->object.sub_map, 3296 full, 0, (char *)0); 3297 db_indent -= 2; 3298 } 3299 } else { 3300 /* XXX no %qd in kernel. Truncate entry->offset. */ 3301 db_printf(", object=%p, offset=0x%lx", 3302 (void *)entry->object.vm_object, 3303 (long)entry->offset); 3304 if (entry->eflags & MAP_ENTRY_COW) 3305 db_printf(", copy (%s)", 3306 (entry->eflags & MAP_ENTRY_NEEDS_COPY) ? "needed" : "done"); 3307 db_printf("\n"); 3308 nlines++; 3309 3310 if ((entry->prev == &map->header) || 3311 (entry->prev->object.vm_object != 3312 entry->object.vm_object)) { 3313 db_indent += 2; 3314 vm_object_print((db_expr_t)(intptr_t) 3315 entry->object.vm_object, 3316 full, 0, (char *)0); 3317 nlines += 4; 3318 db_indent -= 2; 3319 } 3320 } 3321 } 3322 db_indent -= 2; 3323 if (db_indent == 0) 3324 nlines = 0; 3325} 3326 3327 3328DB_SHOW_COMMAND(procvm, procvm) 3329{ 3330 struct proc *p; 3331 3332 if (have_addr) { 3333 p = (struct proc *) addr; 3334 } else { 3335 p = curproc; 3336 } 3337 3338 db_printf("p = %p, vmspace = %p, map = %p, pmap = %p\n", 3339 (void *)p, (void *)p->p_vmspace, (void *)&p->p_vmspace->vm_map, 3340 (void *)vmspace_pmap(p->p_vmspace)); 3341 3342 vm_map_print((db_expr_t)(intptr_t)&p->p_vmspace->vm_map, 1, 0, NULL); 3343} 3344 3345#endif /* DDB */
| 163 uma_prealloc(kmapentzone, MAX_KMAPENT); 164 mapentzone = uma_zcreate("MAP ENTRY", sizeof(struct vm_map_entry), 165 NULL, NULL, NULL, NULL, UMA_ALIGN_PTR, 0); 166 uma_prealloc(mapentzone, MAX_MAPENT); 167} 168 169static void 170vmspace_zfini(void *mem, int size) 171{ 172 struct vmspace *vm; 173 174 vm = (struct vmspace *)mem; 175 176 vm_map_zfini(&vm->vm_map, sizeof(vm->vm_map)); 177} 178 179static void 180vmspace_zinit(void *mem, int size) 181{ 182 struct vmspace *vm; 183 184 vm = (struct vmspace *)mem; 185 186 vm_map_zinit(&vm->vm_map, sizeof(vm->vm_map)); 187} 188 189static void 190vm_map_zfini(void *mem, int size) 191{ 192 vm_map_t map; 193 194 GIANT_REQUIRED; 195 map = (vm_map_t)mem; 196 197 lockdestroy(&map->lock); 198} 199 200static void 201vm_map_zinit(void *mem, int size) 202{ 203 vm_map_t map; 204 205 GIANT_REQUIRED; 206 207 map = (vm_map_t)mem; 208 map->nentries = 0; 209 map->size = 0; 210 map->infork = 0; 211 lockinit(&map->lock, PVM, "thrd_sleep", 0, LK_NOPAUSE); 212} 213 214#ifdef INVARIANTS 215static void 216vmspace_zdtor(void *mem, int size, void *arg) 217{ 218 struct vmspace *vm; 219 220 vm = (struct vmspace *)mem; 221 222 vm_map_zdtor(&vm->vm_map, sizeof(vm->vm_map), arg); 223} 224static void 225vm_map_zdtor(void *mem, int size, void *arg) 226{ 227 vm_map_t map; 228 229 map = (vm_map_t)mem; 230 KASSERT(map->nentries == 0, 231 ("map %p nentries == %d on free.", 232 map, map->nentries)); 233 KASSERT(map->size == 0, 234 ("map %p size == %lu on free.", 235 map, (unsigned long)map->size)); 236 KASSERT(map->infork == 0, 237 ("map %p infork == %d on free.", 238 map, map->infork)); 239} 240#endif /* INVARIANTS */ 241 242/* 243 * Allocate a vmspace structure, including a vm_map and pmap, 244 * and initialize those structures. The refcnt is set to 1. 245 * The remaining fields must be initialized by the caller. 246 */ 247struct vmspace * 248vmspace_alloc(min, max) 249 vm_offset_t min, max; 250{ 251 struct vmspace *vm; 252 253 GIANT_REQUIRED; 254 vm = uma_zalloc(vmspace_zone, M_WAITOK); 255 CTR1(KTR_VM, "vmspace_alloc: %p", vm); 256 _vm_map_init(&vm->vm_map, min, max); 257 pmap_pinit(vmspace_pmap(vm)); 258 vm->vm_map.pmap = vmspace_pmap(vm); /* XXX */ 259 vm->vm_refcnt = 1; 260 vm->vm_shm = NULL; 261 vm->vm_freer = NULL; 262 return (vm); 263} 264 265void 266vm_init2(void) 267{ 268 uma_zone_set_obj(kmapentzone, &kmapentobj, cnt.v_page_count / 4); 269 vmspace_zone = uma_zcreate("VMSPACE", sizeof(struct vmspace), NULL, 270#ifdef INVARIANTS 271 vmspace_zdtor, 272#else 273 NULL, 274#endif 275 vmspace_zinit, vmspace_zfini, UMA_ALIGN_PTR, UMA_ZONE_NOFREE); 276 pmap_init2(); 277 vm_object_init2(); 278} 279 280static __inline void 281vmspace_dofree(struct vmspace *vm) 282{ 283 CTR1(KTR_VM, "vmspace_free: %p", vm); 284 /* 285 * Lock the map, to wait out all other references to it. 286 * Delete all of the mappings and pages they hold, then call 287 * the pmap module to reclaim anything left. 288 */ 289 vm_map_lock(&vm->vm_map); 290 (void) vm_map_delete(&vm->vm_map, vm->vm_map.min_offset, 291 vm->vm_map.max_offset); 292 vm_map_unlock(&vm->vm_map); 293 294 pmap_release(vmspace_pmap(vm)); 295 uma_zfree(vmspace_zone, vm); 296} 297 298void 299vmspace_free(struct vmspace *vm) 300{ 301 GIANT_REQUIRED; 302 303 if (vm->vm_refcnt == 0) 304 panic("vmspace_free: attempt to free already freed vmspace"); 305 306 if (--vm->vm_refcnt == 0) 307 vmspace_dofree(vm); 308} 309 310void 311vmspace_exitfree(struct proc *p) 312{ 313 struct vmspace *vm; 314 315 GIANT_REQUIRED; 316 if (p == p->p_vmspace->vm_freer) { 317 vm = p->p_vmspace; 318 p->p_vmspace = NULL; 319 vmspace_dofree(vm); 320 } 321} 322 323/* 324 * vmspace_swap_count() - count the approximate swap useage in pages for a 325 * vmspace. 326 * 327 * Swap useage is determined by taking the proportional swap used by 328 * VM objects backing the VM map. To make up for fractional losses, 329 * if the VM object has any swap use at all the associated map entries 330 * count for at least 1 swap page. 331 */ 332int 333vmspace_swap_count(struct vmspace *vmspace) 334{ 335 vm_map_t map = &vmspace->vm_map; 336 vm_map_entry_t cur; 337 int count = 0; 338 339 vm_map_lock_read(map); 340 for (cur = map->header.next; cur != &map->header; cur = cur->next) { 341 vm_object_t object; 342 343 if ((cur->eflags & MAP_ENTRY_IS_SUB_MAP) == 0 && 344 (object = cur->object.vm_object) != NULL && 345 object->type == OBJT_SWAP 346 ) { 347 int n = (cur->end - cur->start) / PAGE_SIZE; 348 349 if (object->un_pager.swp.swp_bcount) { 350 count += object->un_pager.swp.swp_bcount * 351 SWAP_META_PAGES * n / object->size + 1; 352 } 353 } 354 } 355 vm_map_unlock_read(map); 356 return (count); 357} 358 359u_char 360vm_map_entry_behavior(struct vm_map_entry *entry) 361{ 362 return entry->eflags & MAP_ENTRY_BEHAV_MASK; 363} 364 365void 366vm_map_entry_set_behavior(struct vm_map_entry *entry, u_char behavior) 367{ 368 entry->eflags = (entry->eflags & ~MAP_ENTRY_BEHAV_MASK) | 369 (behavior & MAP_ENTRY_BEHAV_MASK); 370} 371 372void 373_vm_map_lock(vm_map_t map, const char *file, int line) 374{ 375 vm_map_printf("locking map LK_EXCLUSIVE: %p\n", map); 376 if (lockmgr(&map->lock, LK_EXCLUSIVE, NULL, curthread) != 0) 377 panic("vm_map_lock: failed to get lock"); 378 map->timestamp++; 379} 380 381void 382_vm_map_unlock(vm_map_t map, const char *file, int line) 383{ 384 vm_map_printf("locking map LK_RELEASE: %p\n", map); 385 lockmgr(&(map)->lock, LK_RELEASE, NULL, curthread); 386} 387 388void 389_vm_map_lock_read(vm_map_t map, const char *file, int line) 390{ 391 vm_map_printf("locking map LK_SHARED: %p\n", map); 392 lockmgr(&(map)->lock, LK_SHARED, NULL, curthread); 393} 394 395void 396_vm_map_unlock_read(vm_map_t map, const char *file, int line) 397{ 398 vm_map_printf("locking map LK_RELEASE: %p\n", map); 399 lockmgr(&(map)->lock, LK_RELEASE, NULL, curthread); 400} 401 402int 403_vm_map_trylock(vm_map_t map, const char *file, int line) 404{ 405 406 return (lockmgr(&map->lock, LK_EXCLUSIVE | LK_NOWAIT, NULL, 407 curthread) == 0); 408} 409 410static __inline__ int 411__vm_map_lock_upgrade(vm_map_t map, struct thread *td) { 412 int error; 413 414 vm_map_printf("locking map LK_EXCLUPGRADE: %p\n", map); 415 error = lockmgr(&map->lock, LK_EXCLUPGRADE, NULL, td); 416 if (error == 0) 417 map->timestamp++; 418 return error; 419} 420 421int 422_vm_map_lock_upgrade(vm_map_t map, const char *file, int line) 423{ 424 return (__vm_map_lock_upgrade(map, curthread)); 425} 426 427void 428_vm_map_lock_downgrade(vm_map_t map, const char *file, int line) 429{ 430 vm_map_printf("locking map LK_DOWNGRADE: %p\n", map); 431 lockmgr(&map->lock, LK_DOWNGRADE, NULL, curthread); 432} 433 434void 435_vm_map_set_recursive(vm_map_t map, const char *file, int line) 436{ 437 mtx_lock((map)->lock.lk_interlock); 438 map->lock.lk_flags |= LK_CANRECURSE; 439 mtx_unlock((map)->lock.lk_interlock); 440} 441 442void 443_vm_map_clear_recursive(vm_map_t map, const char *file, int line) 444{ 445 mtx_lock((map)->lock.lk_interlock); 446 map->lock.lk_flags &= ~LK_CANRECURSE; 447 mtx_unlock((map)->lock.lk_interlock); 448} 449 450struct pmap * 451vmspace_pmap(struct vmspace *vmspace) 452{ 453 return &vmspace->vm_pmap; 454} 455 456long 457vmspace_resident_count(struct vmspace *vmspace) 458{ 459 return pmap_resident_count(vmspace_pmap(vmspace)); 460} 461 462/* 463 * vm_map_create: 464 * 465 * Creates and returns a new empty VM map with 466 * the given physical map structure, and having 467 * the given lower and upper address bounds. 468 */ 469vm_map_t 470vm_map_create(pmap_t pmap, vm_offset_t min, vm_offset_t max) 471{ 472 vm_map_t result; 473 474 GIANT_REQUIRED; 475 476 result = uma_zalloc(mapzone, M_WAITOK); 477 CTR1(KTR_VM, "vm_map_create: %p", result); 478 _vm_map_init(result, min, max); 479 result->pmap = pmap; 480 return (result); 481} 482 483/* 484 * Initialize an existing vm_map structure 485 * such as that in the vmspace structure. 486 * The pmap is set elsewhere. 487 */ 488static void 489_vm_map_init(vm_map_t map, vm_offset_t min, vm_offset_t max) 490{ 491 492 map->header.next = map->header.prev = &map->header; 493 map->system_map = 0; 494 map->min_offset = min; 495 map->max_offset = max; 496 map->first_free = &map->header; 497 map->hint = &map->header; 498 map->timestamp = 0; 499} 500 501void 502vm_map_init(vm_map_t map, vm_offset_t min, vm_offset_t max) 503{ 504 _vm_map_init(map, min, max); 505 lockinit(&map->lock, PVM, "thrd_sleep", 0, LK_NOPAUSE); 506} 507 508/* 509 * vm_map_entry_dispose: [ internal use only ] 510 * 511 * Inverse of vm_map_entry_create. 512 */ 513static void 514vm_map_entry_dispose(vm_map_t map, vm_map_entry_t entry) 515{ 516 uma_zfree((map->system_map || !mapentzone) 517 ? kmapentzone : mapentzone, entry); 518} 519 520/* 521 * vm_map_entry_create: [ internal use only ] 522 * 523 * Allocates a VM map entry for insertion. 524 * No entry fields are filled in. 525 */ 526static vm_map_entry_t 527vm_map_entry_create(vm_map_t map) 528{ 529 vm_map_entry_t new_entry; 530 531 new_entry = uma_zalloc((map->system_map || !mapentzone) ? 532 kmapentzone : mapentzone, M_WAITOK); 533 if (new_entry == NULL) 534 panic("vm_map_entry_create: kernel resources exhausted"); 535 return (new_entry); 536} 537 538/* 539 * vm_map_entry_{un,}link: 540 * 541 * Insert/remove entries from maps. 542 */ 543static __inline void 544vm_map_entry_link(vm_map_t map, 545 vm_map_entry_t after_where, 546 vm_map_entry_t entry) 547{ 548 549 CTR4(KTR_VM, 550 "vm_map_entry_link: map %p, nentries %d, entry %p, after %p", map, 551 map->nentries, entry, after_where); 552 map->nentries++; 553 entry->prev = after_where; 554 entry->next = after_where->next; 555 entry->next->prev = entry; 556 after_where->next = entry; 557} 558 559static __inline void 560vm_map_entry_unlink(vm_map_t map, 561 vm_map_entry_t entry) 562{ 563 vm_map_entry_t prev = entry->prev; 564 vm_map_entry_t next = entry->next; 565 566 next->prev = prev; 567 prev->next = next; 568 map->nentries--; 569 CTR3(KTR_VM, "vm_map_entry_unlink: map %p, nentries %d, entry %p", map, 570 map->nentries, entry); 571} 572 573/* 574 * SAVE_HINT: 575 * 576 * Saves the specified entry as the hint for 577 * future lookups. 578 */ 579#define SAVE_HINT(map,value) \ 580 (map)->hint = (value); 581 582/* 583 * vm_map_lookup_entry: [ internal use only ] 584 * 585 * Finds the map entry containing (or 586 * immediately preceding) the specified address 587 * in the given map; the entry is returned 588 * in the "entry" parameter. The boolean 589 * result indicates whether the address is 590 * actually contained in the map. 591 */ 592boolean_t 593vm_map_lookup_entry( 594 vm_map_t map, 595 vm_offset_t address, 596 vm_map_entry_t *entry) /* OUT */ 597{ 598 vm_map_entry_t cur; 599 vm_map_entry_t last; 600 601 GIANT_REQUIRED; 602 /* 603 * Start looking either from the head of the list, or from the hint. 604 */ 605 cur = map->hint; 606 607 if (cur == &map->header) 608 cur = cur->next; 609 610 if (address >= cur->start) { 611 /* 612 * Go from hint to end of list. 613 * 614 * But first, make a quick check to see if we are already looking 615 * at the entry we want (which is usually the case). Note also 616 * that we don't need to save the hint here... it is the same 617 * hint (unless we are at the header, in which case the hint 618 * didn't buy us anything anyway). 619 */ 620 last = &map->header; 621 if ((cur != last) && (cur->end > address)) { 622 *entry = cur; 623 return (TRUE); 624 } 625 } else { 626 /* 627 * Go from start to hint, *inclusively* 628 */ 629 last = cur->next; 630 cur = map->header.next; 631 } 632 633 /* 634 * Search linearly 635 */ 636 while (cur != last) { 637 if (cur->end > address) { 638 if (address >= cur->start) { 639 /* 640 * Save this lookup for future hints, and 641 * return 642 */ 643 *entry = cur; 644 SAVE_HINT(map, cur); 645 return (TRUE); 646 } 647 break; 648 } 649 cur = cur->next; 650 } 651 *entry = cur->prev; 652 SAVE_HINT(map, *entry); 653 return (FALSE); 654} 655 656/* 657 * vm_map_insert: 658 * 659 * Inserts the given whole VM object into the target 660 * map at the specified address range. The object's 661 * size should match that of the address range. 662 * 663 * Requires that the map be locked, and leaves it so. 664 * 665 * If object is non-NULL, ref count must be bumped by caller 666 * prior to making call to account for the new entry. 667 */ 668int 669vm_map_insert(vm_map_t map, vm_object_t object, vm_ooffset_t offset, 670 vm_offset_t start, vm_offset_t end, vm_prot_t prot, vm_prot_t max, 671 int cow) 672{ 673 vm_map_entry_t new_entry; 674 vm_map_entry_t prev_entry; 675 vm_map_entry_t temp_entry; 676 vm_eflags_t protoeflags; 677 678 GIANT_REQUIRED; 679 680 /* 681 * Check that the start and end points are not bogus. 682 */ 683 if ((start < map->min_offset) || (end > map->max_offset) || 684 (start >= end)) 685 return (KERN_INVALID_ADDRESS); 686 687 /* 688 * Find the entry prior to the proposed starting address; if it's part 689 * of an existing entry, this range is bogus. 690 */ 691 if (vm_map_lookup_entry(map, start, &temp_entry)) 692 return (KERN_NO_SPACE); 693 694 prev_entry = temp_entry; 695 696 /* 697 * Assert that the next entry doesn't overlap the end point. 698 */ 699 if ((prev_entry->next != &map->header) && 700 (prev_entry->next->start < end)) 701 return (KERN_NO_SPACE); 702 703 protoeflags = 0; 704 705 if (cow & MAP_COPY_ON_WRITE) 706 protoeflags |= MAP_ENTRY_COW|MAP_ENTRY_NEEDS_COPY; 707 708 if (cow & MAP_NOFAULT) { 709 protoeflags |= MAP_ENTRY_NOFAULT; 710 711 KASSERT(object == NULL, 712 ("vm_map_insert: paradoxical MAP_NOFAULT request")); 713 } 714 if (cow & MAP_DISABLE_SYNCER) 715 protoeflags |= MAP_ENTRY_NOSYNC; 716 if (cow & MAP_DISABLE_COREDUMP) 717 protoeflags |= MAP_ENTRY_NOCOREDUMP; 718 719 if (object) { 720 /* 721 * When object is non-NULL, it could be shared with another 722 * process. We have to set or clear OBJ_ONEMAPPING 723 * appropriately. 724 */ 725 if ((object->ref_count > 1) || (object->shadow_count != 0)) { 726 vm_object_clear_flag(object, OBJ_ONEMAPPING); 727 } 728 } 729 else if ((prev_entry != &map->header) && 730 (prev_entry->eflags == protoeflags) && 731 (prev_entry->end == start) && 732 (prev_entry->wired_count == 0) && 733 ((prev_entry->object.vm_object == NULL) || 734 vm_object_coalesce(prev_entry->object.vm_object, 735 OFF_TO_IDX(prev_entry->offset), 736 (vm_size_t)(prev_entry->end - prev_entry->start), 737 (vm_size_t)(end - prev_entry->end)))) { 738 /* 739 * We were able to extend the object. Determine if we 740 * can extend the previous map entry to include the 741 * new range as well. 742 */ 743 if ((prev_entry->inheritance == VM_INHERIT_DEFAULT) && 744 (prev_entry->protection == prot) && 745 (prev_entry->max_protection == max)) { 746 map->size += (end - prev_entry->end); 747 prev_entry->end = end; 748 vm_map_simplify_entry(map, prev_entry); 749 return (KERN_SUCCESS); 750 } 751 752 /* 753 * If we can extend the object but cannot extend the 754 * map entry, we have to create a new map entry. We 755 * must bump the ref count on the extended object to 756 * account for it. object may be NULL. 757 */ 758 object = prev_entry->object.vm_object; 759 offset = prev_entry->offset + 760 (prev_entry->end - prev_entry->start); 761 vm_object_reference(object); 762 } 763 764 /* 765 * NOTE: if conditionals fail, object can be NULL here. This occurs 766 * in things like the buffer map where we manage kva but do not manage 767 * backing objects. 768 */ 769 770 /* 771 * Create a new entry 772 */ 773 new_entry = vm_map_entry_create(map); 774 new_entry->start = start; 775 new_entry->end = end; 776 777 new_entry->eflags = protoeflags; 778 new_entry->object.vm_object = object; 779 new_entry->offset = offset; 780 new_entry->avail_ssize = 0; 781 782 new_entry->inheritance = VM_INHERIT_DEFAULT; 783 new_entry->protection = prot; 784 new_entry->max_protection = max; 785 new_entry->wired_count = 0; 786 787 /* 788 * Insert the new entry into the list 789 */ 790 vm_map_entry_link(map, prev_entry, new_entry); 791 map->size += new_entry->end - new_entry->start; 792 793 /* 794 * Update the free space hint 795 */ 796 if ((map->first_free == prev_entry) && 797 (prev_entry->end >= new_entry->start)) { 798 map->first_free = new_entry; 799 } 800 801#if 0 802 /* 803 * Temporarily removed to avoid MAP_STACK panic, due to 804 * MAP_STACK being a huge hack. Will be added back in 805 * when MAP_STACK (and the user stack mapping) is fixed. 806 */ 807 /* 808 * It may be possible to simplify the entry 809 */ 810 vm_map_simplify_entry(map, new_entry); 811#endif 812 813 if (cow & (MAP_PREFAULT|MAP_PREFAULT_PARTIAL)) { 814 pmap_object_init_pt(map->pmap, start, 815 object, OFF_TO_IDX(offset), end - start, 816 cow & MAP_PREFAULT_PARTIAL); 817 } 818 819 return (KERN_SUCCESS); 820} 821 822/* 823 * Find sufficient space for `length' bytes in the given map, starting at 824 * `start'. The map must be locked. Returns 0 on success, 1 on no space. 825 */ 826int 827vm_map_findspace( 828 vm_map_t map, 829 vm_offset_t start, 830 vm_size_t length, 831 vm_offset_t *addr) 832{ 833 vm_map_entry_t entry, next; 834 vm_offset_t end; 835 836 GIANT_REQUIRED; 837 if (start < map->min_offset) 838 start = map->min_offset; 839 if (start > map->max_offset) 840 return (1); 841 842 /* 843 * Look for the first possible address; if there's already something 844 * at this address, we have to start after it. 845 */ 846 if (start == map->min_offset) { 847 if ((entry = map->first_free) != &map->header) 848 start = entry->end; 849 } else { 850 vm_map_entry_t tmp; 851 852 if (vm_map_lookup_entry(map, start, &tmp)) 853 start = tmp->end; 854 entry = tmp; 855 } 856 857 /* 858 * Look through the rest of the map, trying to fit a new region in the 859 * gap between existing regions, or after the very last region. 860 */ 861 for (;; start = (entry = next)->end) { 862 /* 863 * Find the end of the proposed new region. Be sure we didn't 864 * go beyond the end of the map, or wrap around the address; 865 * if so, we lose. Otherwise, if this is the last entry, or 866 * if the proposed new region fits before the next entry, we 867 * win. 868 */ 869 end = start + length; 870 if (end > map->max_offset || end < start) 871 return (1); 872 next = entry->next; 873 if (next == &map->header || next->start >= end) 874 break; 875 } 876 SAVE_HINT(map, entry); 877 *addr = start; 878 if (map == kernel_map) { 879 vm_offset_t ksize; 880 if ((ksize = round_page(start + length)) > kernel_vm_end) { 881 pmap_growkernel(ksize); 882 } 883 } 884 return (0); 885} 886 887/* 888 * vm_map_find finds an unallocated region in the target address 889 * map with the given length. The search is defined to be 890 * first-fit from the specified address; the region found is 891 * returned in the same parameter. 892 * 893 * If object is non-NULL, ref count must be bumped by caller 894 * prior to making call to account for the new entry. 895 */ 896int 897vm_map_find(vm_map_t map, vm_object_t object, vm_ooffset_t offset, 898 vm_offset_t *addr, /* IN/OUT */ 899 vm_size_t length, boolean_t find_space, vm_prot_t prot, 900 vm_prot_t max, int cow) 901{ 902 vm_offset_t start; 903 int result, s = 0; 904 905 GIANT_REQUIRED; 906 907 start = *addr; 908 909 if (map == kmem_map) 910 s = splvm(); 911 912 vm_map_lock(map); 913 if (find_space) { 914 if (vm_map_findspace(map, start, length, addr)) { 915 vm_map_unlock(map); 916 if (map == kmem_map) 917 splx(s); 918 return (KERN_NO_SPACE); 919 } 920 start = *addr; 921 } 922 result = vm_map_insert(map, object, offset, 923 start, start + length, prot, max, cow); 924 vm_map_unlock(map); 925 926 if (map == kmem_map) 927 splx(s); 928 929 return (result); 930} 931 932/* 933 * vm_map_simplify_entry: 934 * 935 * Simplify the given map entry by merging with either neighbor. This 936 * routine also has the ability to merge with both neighbors. 937 * 938 * The map must be locked. 939 * 940 * This routine guarentees that the passed entry remains valid (though 941 * possibly extended). When merging, this routine may delete one or 942 * both neighbors. 943 */ 944void 945vm_map_simplify_entry(vm_map_t map, vm_map_entry_t entry) 946{ 947 vm_map_entry_t next, prev; 948 vm_size_t prevsize, esize; 949 950 GIANT_REQUIRED; 951 952 if (entry->eflags & MAP_ENTRY_IS_SUB_MAP) 953 return; 954 955 prev = entry->prev; 956 if (prev != &map->header) { 957 prevsize = prev->end - prev->start; 958 if ( (prev->end == entry->start) && 959 (prev->object.vm_object == entry->object.vm_object) && 960 (!prev->object.vm_object || 961 (prev->offset + prevsize == entry->offset)) && 962 (prev->eflags == entry->eflags) && 963 (prev->protection == entry->protection) && 964 (prev->max_protection == entry->max_protection) && 965 (prev->inheritance == entry->inheritance) && 966 (prev->wired_count == entry->wired_count)) { 967 if (map->first_free == prev) 968 map->first_free = entry; 969 if (map->hint == prev) 970 map->hint = entry; 971 vm_map_entry_unlink(map, prev); 972 entry->start = prev->start; 973 entry->offset = prev->offset; 974 if (prev->object.vm_object) 975 vm_object_deallocate(prev->object.vm_object); 976 vm_map_entry_dispose(map, prev); 977 } 978 } 979 980 next = entry->next; 981 if (next != &map->header) { 982 esize = entry->end - entry->start; 983 if ((entry->end == next->start) && 984 (next->object.vm_object == entry->object.vm_object) && 985 (!entry->object.vm_object || 986 (entry->offset + esize == next->offset)) && 987 (next->eflags == entry->eflags) && 988 (next->protection == entry->protection) && 989 (next->max_protection == entry->max_protection) && 990 (next->inheritance == entry->inheritance) && 991 (next->wired_count == entry->wired_count)) { 992 if (map->first_free == next) 993 map->first_free = entry; 994 if (map->hint == next) 995 map->hint = entry; 996 vm_map_entry_unlink(map, next); 997 entry->end = next->end; 998 if (next->object.vm_object) 999 vm_object_deallocate(next->object.vm_object); 1000 vm_map_entry_dispose(map, next); 1001 } 1002 } 1003} 1004/* 1005 * vm_map_clip_start: [ internal use only ] 1006 * 1007 * Asserts that the given entry begins at or after 1008 * the specified address; if necessary, 1009 * it splits the entry into two. 1010 */ 1011#define vm_map_clip_start(map, entry, startaddr) \ 1012{ \ 1013 if (startaddr > entry->start) \ 1014 _vm_map_clip_start(map, entry, startaddr); \ 1015} 1016 1017/* 1018 * This routine is called only when it is known that 1019 * the entry must be split. 1020 */ 1021static void 1022_vm_map_clip_start(vm_map_t map, vm_map_entry_t entry, vm_offset_t start) 1023{ 1024 vm_map_entry_t new_entry; 1025 1026 /* 1027 * Split off the front portion -- note that we must insert the new 1028 * entry BEFORE this one, so that this entry has the specified 1029 * starting address. 1030 */ 1031 vm_map_simplify_entry(map, entry); 1032 1033 /* 1034 * If there is no object backing this entry, we might as well create 1035 * one now. If we defer it, an object can get created after the map 1036 * is clipped, and individual objects will be created for the split-up 1037 * map. This is a bit of a hack, but is also about the best place to 1038 * put this improvement. 1039 */ 1040 if (entry->object.vm_object == NULL && !map->system_map) { 1041 vm_object_t object; 1042 object = vm_object_allocate(OBJT_DEFAULT, 1043 atop(entry->end - entry->start)); 1044 entry->object.vm_object = object; 1045 entry->offset = 0; 1046 } 1047 1048 new_entry = vm_map_entry_create(map); 1049 *new_entry = *entry; 1050 1051 new_entry->end = start; 1052 entry->offset += (start - entry->start); 1053 entry->start = start; 1054 1055 vm_map_entry_link(map, entry->prev, new_entry); 1056 1057 if ((entry->eflags & MAP_ENTRY_IS_SUB_MAP) == 0) { 1058 vm_object_reference(new_entry->object.vm_object); 1059 } 1060} 1061 1062/* 1063 * vm_map_clip_end: [ internal use only ] 1064 * 1065 * Asserts that the given entry ends at or before 1066 * the specified address; if necessary, 1067 * it splits the entry into two. 1068 */ 1069#define vm_map_clip_end(map, entry, endaddr) \ 1070{ \ 1071 if (endaddr < entry->end) \ 1072 _vm_map_clip_end(map, entry, endaddr); \ 1073} 1074 1075/* 1076 * This routine is called only when it is known that 1077 * the entry must be split. 1078 */ 1079static void 1080_vm_map_clip_end(vm_map_t map, vm_map_entry_t entry, vm_offset_t end) 1081{ 1082 vm_map_entry_t new_entry; 1083 1084 /* 1085 * If there is no object backing this entry, we might as well create 1086 * one now. If we defer it, an object can get created after the map 1087 * is clipped, and individual objects will be created for the split-up 1088 * map. This is a bit of a hack, but is also about the best place to 1089 * put this improvement. 1090 */ 1091 if (entry->object.vm_object == NULL && !map->system_map) { 1092 vm_object_t object; 1093 object = vm_object_allocate(OBJT_DEFAULT, 1094 atop(entry->end - entry->start)); 1095 entry->object.vm_object = object; 1096 entry->offset = 0; 1097 } 1098 1099 /* 1100 * Create a new entry and insert it AFTER the specified entry 1101 */ 1102 new_entry = vm_map_entry_create(map); 1103 *new_entry = *entry; 1104 1105 new_entry->start = entry->end = end; 1106 new_entry->offset += (end - entry->start); 1107 1108 vm_map_entry_link(map, entry, new_entry); 1109 1110 if ((entry->eflags & MAP_ENTRY_IS_SUB_MAP) == 0) { 1111 vm_object_reference(new_entry->object.vm_object); 1112 } 1113} 1114 1115/* 1116 * VM_MAP_RANGE_CHECK: [ internal use only ] 1117 * 1118 * Asserts that the starting and ending region 1119 * addresses fall within the valid range of the map. 1120 */ 1121#define VM_MAP_RANGE_CHECK(map, start, end) \ 1122 { \ 1123 if (start < vm_map_min(map)) \ 1124 start = vm_map_min(map); \ 1125 if (end > vm_map_max(map)) \ 1126 end = vm_map_max(map); \ 1127 if (start > end) \ 1128 start = end; \ 1129 } 1130 1131/* 1132 * vm_map_submap: [ kernel use only ] 1133 * 1134 * Mark the given range as handled by a subordinate map. 1135 * 1136 * This range must have been created with vm_map_find, 1137 * and no other operations may have been performed on this 1138 * range prior to calling vm_map_submap. 1139 * 1140 * Only a limited number of operations can be performed 1141 * within this rage after calling vm_map_submap: 1142 * vm_fault 1143 * [Don't try vm_map_copy!] 1144 * 1145 * To remove a submapping, one must first remove the 1146 * range from the superior map, and then destroy the 1147 * submap (if desired). [Better yet, don't try it.] 1148 */ 1149int 1150vm_map_submap( 1151 vm_map_t map, 1152 vm_offset_t start, 1153 vm_offset_t end, 1154 vm_map_t submap) 1155{ 1156 vm_map_entry_t entry; 1157 int result = KERN_INVALID_ARGUMENT; 1158 1159 GIANT_REQUIRED; 1160 1161 vm_map_lock(map); 1162 1163 VM_MAP_RANGE_CHECK(map, start, end); 1164 1165 if (vm_map_lookup_entry(map, start, &entry)) { 1166 vm_map_clip_start(map, entry, start); 1167 } else 1168 entry = entry->next; 1169 1170 vm_map_clip_end(map, entry, end); 1171 1172 if ((entry->start == start) && (entry->end == end) && 1173 ((entry->eflags & MAP_ENTRY_COW) == 0) && 1174 (entry->object.vm_object == NULL)) { 1175 entry->object.sub_map = submap; 1176 entry->eflags |= MAP_ENTRY_IS_SUB_MAP; 1177 result = KERN_SUCCESS; 1178 } 1179 vm_map_unlock(map); 1180 1181 return (result); 1182} 1183 1184/* 1185 * vm_map_protect: 1186 * 1187 * Sets the protection of the specified address 1188 * region in the target map. If "set_max" is 1189 * specified, the maximum protection is to be set; 1190 * otherwise, only the current protection is affected. 1191 */ 1192int 1193vm_map_protect(vm_map_t map, vm_offset_t start, vm_offset_t end, 1194 vm_prot_t new_prot, boolean_t set_max) 1195{ 1196 vm_map_entry_t current; 1197 vm_map_entry_t entry; 1198 1199 GIANT_REQUIRED; 1200 vm_map_lock(map); 1201 1202 VM_MAP_RANGE_CHECK(map, start, end); 1203 1204 if (vm_map_lookup_entry(map, start, &entry)) { 1205 vm_map_clip_start(map, entry, start); 1206 } else { 1207 entry = entry->next; 1208 } 1209 1210 /* 1211 * Make a first pass to check for protection violations. 1212 */ 1213 current = entry; 1214 while ((current != &map->header) && (current->start < end)) { 1215 if (current->eflags & MAP_ENTRY_IS_SUB_MAP) { 1216 vm_map_unlock(map); 1217 return (KERN_INVALID_ARGUMENT); 1218 } 1219 if ((new_prot & current->max_protection) != new_prot) { 1220 vm_map_unlock(map); 1221 return (KERN_PROTECTION_FAILURE); 1222 } 1223 current = current->next; 1224 } 1225 1226 /* 1227 * Go back and fix up protections. [Note that clipping is not 1228 * necessary the second time.] 1229 */ 1230 current = entry; 1231 while ((current != &map->header) && (current->start < end)) { 1232 vm_prot_t old_prot; 1233 1234 vm_map_clip_end(map, current, end); 1235 1236 old_prot = current->protection; 1237 if (set_max) 1238 current->protection = 1239 (current->max_protection = new_prot) & 1240 old_prot; 1241 else 1242 current->protection = new_prot; 1243 1244 /* 1245 * Update physical map if necessary. Worry about copy-on-write 1246 * here -- CHECK THIS XXX 1247 */ 1248 if (current->protection != old_prot) { 1249#define MASK(entry) (((entry)->eflags & MAP_ENTRY_COW) ? ~VM_PROT_WRITE : \ 1250 VM_PROT_ALL) 1251 pmap_protect(map->pmap, current->start, 1252 current->end, 1253 current->protection & MASK(current)); 1254#undef MASK 1255 } 1256 vm_map_simplify_entry(map, current); 1257 current = current->next; 1258 } 1259 vm_map_unlock(map); 1260 return (KERN_SUCCESS); 1261} 1262 1263/* 1264 * vm_map_madvise: 1265 * 1266 * This routine traverses a processes map handling the madvise 1267 * system call. Advisories are classified as either those effecting 1268 * the vm_map_entry structure, or those effecting the underlying 1269 * objects. 1270 */ 1271int 1272vm_map_madvise( 1273 vm_map_t map, 1274 vm_offset_t start, 1275 vm_offset_t end, 1276 int behav) 1277{ 1278 vm_map_entry_t current, entry; 1279 int modify_map = 0; 1280 1281 GIANT_REQUIRED; 1282 1283 /* 1284 * Some madvise calls directly modify the vm_map_entry, in which case 1285 * we need to use an exclusive lock on the map and we need to perform 1286 * various clipping operations. Otherwise we only need a read-lock 1287 * on the map. 1288 */ 1289 switch(behav) { 1290 case MADV_NORMAL: 1291 case MADV_SEQUENTIAL: 1292 case MADV_RANDOM: 1293 case MADV_NOSYNC: 1294 case MADV_AUTOSYNC: 1295 case MADV_NOCORE: 1296 case MADV_CORE: 1297 modify_map = 1; 1298 vm_map_lock(map); 1299 break; 1300 case MADV_WILLNEED: 1301 case MADV_DONTNEED: 1302 case MADV_FREE: 1303 vm_map_lock_read(map); 1304 break; 1305 default: 1306 return (KERN_INVALID_ARGUMENT); 1307 } 1308 1309 /* 1310 * Locate starting entry and clip if necessary. 1311 */ 1312 VM_MAP_RANGE_CHECK(map, start, end); 1313 1314 if (vm_map_lookup_entry(map, start, &entry)) { 1315 if (modify_map) 1316 vm_map_clip_start(map, entry, start); 1317 } else { 1318 entry = entry->next; 1319 } 1320 1321 if (modify_map) { 1322 /* 1323 * madvise behaviors that are implemented in the vm_map_entry. 1324 * 1325 * We clip the vm_map_entry so that behavioral changes are 1326 * limited to the specified address range. 1327 */ 1328 for (current = entry; 1329 (current != &map->header) && (current->start < end); 1330 current = current->next 1331 ) { 1332 if (current->eflags & MAP_ENTRY_IS_SUB_MAP) 1333 continue; 1334 1335 vm_map_clip_end(map, current, end); 1336 1337 switch (behav) { 1338 case MADV_NORMAL: 1339 vm_map_entry_set_behavior(current, MAP_ENTRY_BEHAV_NORMAL); 1340 break; 1341 case MADV_SEQUENTIAL: 1342 vm_map_entry_set_behavior(current, MAP_ENTRY_BEHAV_SEQUENTIAL); 1343 break; 1344 case MADV_RANDOM: 1345 vm_map_entry_set_behavior(current, MAP_ENTRY_BEHAV_RANDOM); 1346 break; 1347 case MADV_NOSYNC: 1348 current->eflags |= MAP_ENTRY_NOSYNC; 1349 break; 1350 case MADV_AUTOSYNC: 1351 current->eflags &= ~MAP_ENTRY_NOSYNC; 1352 break; 1353 case MADV_NOCORE: 1354 current->eflags |= MAP_ENTRY_NOCOREDUMP; 1355 break; 1356 case MADV_CORE: 1357 current->eflags &= ~MAP_ENTRY_NOCOREDUMP; 1358 break; 1359 default: 1360 break; 1361 } 1362 vm_map_simplify_entry(map, current); 1363 } 1364 vm_map_unlock(map); 1365 } else { 1366 vm_pindex_t pindex; 1367 int count; 1368 1369 /* 1370 * madvise behaviors that are implemented in the underlying 1371 * vm_object. 1372 * 1373 * Since we don't clip the vm_map_entry, we have to clip 1374 * the vm_object pindex and count. 1375 */ 1376 for (current = entry; 1377 (current != &map->header) && (current->start < end); 1378 current = current->next 1379 ) { 1380 vm_offset_t useStart; 1381 1382 if (current->eflags & MAP_ENTRY_IS_SUB_MAP) 1383 continue; 1384 1385 pindex = OFF_TO_IDX(current->offset); 1386 count = atop(current->end - current->start); 1387 useStart = current->start; 1388 1389 if (current->start < start) { 1390 pindex += atop(start - current->start); 1391 count -= atop(start - current->start); 1392 useStart = start; 1393 } 1394 if (current->end > end) 1395 count -= atop(current->end - end); 1396 1397 if (count <= 0) 1398 continue; 1399 1400 vm_object_madvise(current->object.vm_object, 1401 pindex, count, behav); 1402 if (behav == MADV_WILLNEED) { 1403 pmap_object_init_pt( 1404 map->pmap, 1405 useStart, 1406 current->object.vm_object, 1407 pindex, 1408 (count << PAGE_SHIFT), 1409 MAP_PREFAULT_MADVISE 1410 ); 1411 } 1412 } 1413 vm_map_unlock_read(map); 1414 } 1415 return (0); 1416} 1417 1418 1419/* 1420 * vm_map_inherit: 1421 * 1422 * Sets the inheritance of the specified address 1423 * range in the target map. Inheritance 1424 * affects how the map will be shared with 1425 * child maps at the time of vm_map_fork. 1426 */ 1427int 1428vm_map_inherit(vm_map_t map, vm_offset_t start, vm_offset_t end, 1429 vm_inherit_t new_inheritance) 1430{ 1431 vm_map_entry_t entry; 1432 vm_map_entry_t temp_entry; 1433 1434 GIANT_REQUIRED; 1435 1436 switch (new_inheritance) { 1437 case VM_INHERIT_NONE: 1438 case VM_INHERIT_COPY: 1439 case VM_INHERIT_SHARE: 1440 break; 1441 default: 1442 return (KERN_INVALID_ARGUMENT); 1443 } 1444 1445 vm_map_lock(map); 1446 1447 VM_MAP_RANGE_CHECK(map, start, end); 1448 1449 if (vm_map_lookup_entry(map, start, &temp_entry)) { 1450 entry = temp_entry; 1451 vm_map_clip_start(map, entry, start); 1452 } else 1453 entry = temp_entry->next; 1454 1455 while ((entry != &map->header) && (entry->start < end)) { 1456 vm_map_clip_end(map, entry, end); 1457 1458 entry->inheritance = new_inheritance; 1459 1460 vm_map_simplify_entry(map, entry); 1461 1462 entry = entry->next; 1463 } 1464 1465 vm_map_unlock(map); 1466 return (KERN_SUCCESS); 1467} 1468 1469/* 1470 * Implement the semantics of mlock 1471 */ 1472int 1473vm_map_user_pageable( 1474 vm_map_t map, 1475 vm_offset_t start, 1476 vm_offset_t end, 1477 boolean_t new_pageable) 1478{ 1479 vm_map_entry_t entry; 1480 vm_map_entry_t start_entry; 1481 vm_offset_t estart; 1482 vm_offset_t eend; 1483 int rv; 1484 1485 vm_map_lock(map); 1486 VM_MAP_RANGE_CHECK(map, start, end); 1487 1488 if (vm_map_lookup_entry(map, start, &start_entry) == FALSE) { 1489 vm_map_unlock(map); 1490 return (KERN_INVALID_ADDRESS); 1491 } 1492 1493 if (new_pageable) { 1494 1495 entry = start_entry; 1496 vm_map_clip_start(map, entry, start); 1497 1498 /* 1499 * Now decrement the wiring count for each region. If a region 1500 * becomes completely unwired, unwire its physical pages and 1501 * mappings. 1502 */ 1503 while ((entry != &map->header) && (entry->start < end)) { 1504 if (entry->eflags & MAP_ENTRY_USER_WIRED) { 1505 vm_map_clip_end(map, entry, end); 1506 entry->eflags &= ~MAP_ENTRY_USER_WIRED; 1507 entry->wired_count--; 1508 if (entry->wired_count == 0) 1509 vm_fault_unwire(map, entry->start, entry->end); 1510 } 1511 vm_map_simplify_entry(map,entry); 1512 entry = entry->next; 1513 } 1514 } else { 1515 1516 entry = start_entry; 1517 1518 while ((entry != &map->header) && (entry->start < end)) { 1519 1520 if (entry->eflags & MAP_ENTRY_USER_WIRED) { 1521 entry = entry->next; 1522 continue; 1523 } 1524 1525 if (entry->wired_count != 0) { 1526 entry->wired_count++; 1527 entry->eflags |= MAP_ENTRY_USER_WIRED; 1528 entry = entry->next; 1529 continue; 1530 } 1531 1532 /* Here on entry being newly wired */ 1533 1534 if ((entry->eflags & MAP_ENTRY_IS_SUB_MAP) == 0) { 1535 int copyflag = entry->eflags & MAP_ENTRY_NEEDS_COPY; 1536 if (copyflag && ((entry->protection & VM_PROT_WRITE) != 0)) { 1537 1538 vm_object_shadow(&entry->object.vm_object, 1539 &entry->offset, 1540 atop(entry->end - entry->start)); 1541 entry->eflags &= ~MAP_ENTRY_NEEDS_COPY; 1542 1543 } else if (entry->object.vm_object == NULL && 1544 !map->system_map) { 1545 1546 entry->object.vm_object = 1547 vm_object_allocate(OBJT_DEFAULT, 1548 atop(entry->end - entry->start)); 1549 entry->offset = (vm_offset_t) 0; 1550 1551 } 1552 } 1553 1554 vm_map_clip_start(map, entry, start); 1555 vm_map_clip_end(map, entry, end); 1556 1557 entry->wired_count++; 1558 entry->eflags |= MAP_ENTRY_USER_WIRED; 1559 estart = entry->start; 1560 eend = entry->end; 1561 1562 /* First we need to allow map modifications */ 1563 vm_map_set_recursive(map); 1564 vm_map_lock_downgrade(map); 1565 map->timestamp++; 1566 1567 rv = vm_fault_user_wire(map, entry->start, entry->end); 1568 if (rv) { 1569 1570 entry->wired_count--; 1571 entry->eflags &= ~MAP_ENTRY_USER_WIRED; 1572 1573 vm_map_clear_recursive(map); 1574 vm_map_unlock(map); 1575 1576 /* 1577 * At this point, the map is unlocked, and 1578 * entry might no longer be valid. Use copy 1579 * of entry start value obtained while entry 1580 * was valid. 1581 */ 1582 (void) vm_map_user_pageable(map, start, estart, 1583 TRUE); 1584 return rv; 1585 } 1586 1587 vm_map_clear_recursive(map); 1588 if (vm_map_lock_upgrade(map)) { 1589 vm_map_lock(map); 1590 if (vm_map_lookup_entry(map, estart, &entry) 1591 == FALSE) { 1592 vm_map_unlock(map); 1593 /* 1594 * vm_fault_user_wire succeded, thus 1595 * the area between start and eend 1596 * is wired and has to be unwired 1597 * here as part of the cleanup. 1598 */ 1599 (void) vm_map_user_pageable(map, 1600 start, 1601 eend, 1602 TRUE); 1603 return (KERN_INVALID_ADDRESS); 1604 } 1605 } 1606 vm_map_simplify_entry(map,entry); 1607 } 1608 } 1609 map->timestamp++; 1610 vm_map_unlock(map); 1611 return KERN_SUCCESS; 1612} 1613 1614/* 1615 * vm_map_pageable: 1616 * 1617 * Sets the pageability of the specified address 1618 * range in the target map. Regions specified 1619 * as not pageable require locked-down physical 1620 * memory and physical page maps. 1621 * 1622 * The map must not be locked, but a reference 1623 * must remain to the map throughout the call. 1624 */ 1625int 1626vm_map_pageable( 1627 vm_map_t map, 1628 vm_offset_t start, 1629 vm_offset_t end, 1630 boolean_t new_pageable) 1631{ 1632 vm_map_entry_t entry; 1633 vm_map_entry_t start_entry; 1634 vm_offset_t failed = 0; 1635 int rv; 1636 1637 GIANT_REQUIRED; 1638 1639 vm_map_lock(map); 1640 1641 VM_MAP_RANGE_CHECK(map, start, end); 1642 1643 /* 1644 * Only one pageability change may take place at one time, since 1645 * vm_fault assumes it will be called only once for each 1646 * wiring/unwiring. Therefore, we have to make sure we're actually 1647 * changing the pageability for the entire region. We do so before 1648 * making any changes. 1649 */ 1650 if (vm_map_lookup_entry(map, start, &start_entry) == FALSE) { 1651 vm_map_unlock(map); 1652 return (KERN_INVALID_ADDRESS); 1653 } 1654 entry = start_entry; 1655 1656 /* 1657 * Actions are rather different for wiring and unwiring, so we have 1658 * two separate cases. 1659 */ 1660 if (new_pageable) { 1661 vm_map_clip_start(map, entry, start); 1662 1663 /* 1664 * Unwiring. First ensure that the range to be unwired is 1665 * really wired down and that there are no holes. 1666 */ 1667 while ((entry != &map->header) && (entry->start < end)) { 1668 if (entry->wired_count == 0 || 1669 (entry->end < end && 1670 (entry->next == &map->header || 1671 entry->next->start > entry->end))) { 1672 vm_map_unlock(map); 1673 return (KERN_INVALID_ARGUMENT); 1674 } 1675 entry = entry->next; 1676 } 1677 1678 /* 1679 * Now decrement the wiring count for each region. If a region 1680 * becomes completely unwired, unwire its physical pages and 1681 * mappings. 1682 */ 1683 entry = start_entry; 1684 while ((entry != &map->header) && (entry->start < end)) { 1685 vm_map_clip_end(map, entry, end); 1686 1687 entry->wired_count--; 1688 if (entry->wired_count == 0) 1689 vm_fault_unwire(map, entry->start, entry->end); 1690 1691 vm_map_simplify_entry(map, entry); 1692 1693 entry = entry->next; 1694 } 1695 } else { 1696 /* 1697 * Wiring. We must do this in two passes: 1698 * 1699 * 1. Holding the write lock, we create any shadow or zero-fill 1700 * objects that need to be created. Then we clip each map 1701 * entry to the region to be wired and increment its wiring 1702 * count. We create objects before clipping the map entries 1703 * to avoid object proliferation. 1704 * 1705 * 2. We downgrade to a read lock, and call vm_fault_wire to 1706 * fault in the pages for any newly wired area (wired_count is 1707 * 1). 1708 * 1709 * Downgrading to a read lock for vm_fault_wire avoids a possible 1710 * deadlock with another process that may have faulted on one 1711 * of the pages to be wired (it would mark the page busy, 1712 * blocking us, then in turn block on the map lock that we 1713 * hold). Because of problems in the recursive lock package, 1714 * we cannot upgrade to a write lock in vm_map_lookup. Thus, 1715 * any actions that require the write lock must be done 1716 * beforehand. Because we keep the read lock on the map, the 1717 * copy-on-write status of the entries we modify here cannot 1718 * change. 1719 */ 1720 1721 /* 1722 * Pass 1. 1723 */ 1724 while ((entry != &map->header) && (entry->start < end)) { 1725 if (entry->wired_count == 0) { 1726 1727 /* 1728 * Perform actions of vm_map_lookup that need 1729 * the write lock on the map: create a shadow 1730 * object for a copy-on-write region, or an 1731 * object for a zero-fill region. 1732 * 1733 * We don't have to do this for entries that 1734 * point to sub maps, because we won't 1735 * hold the lock on the sub map. 1736 */ 1737 if ((entry->eflags & MAP_ENTRY_IS_SUB_MAP) == 0) { 1738 int copyflag = entry->eflags & MAP_ENTRY_NEEDS_COPY; 1739 if (copyflag && 1740 ((entry->protection & VM_PROT_WRITE) != 0)) { 1741 1742 vm_object_shadow(&entry->object.vm_object, 1743 &entry->offset, 1744 atop(entry->end - entry->start)); 1745 entry->eflags &= ~MAP_ENTRY_NEEDS_COPY; 1746 } else if (entry->object.vm_object == NULL && 1747 !map->system_map) { 1748 entry->object.vm_object = 1749 vm_object_allocate(OBJT_DEFAULT, 1750 atop(entry->end - entry->start)); 1751 entry->offset = (vm_offset_t) 0; 1752 } 1753 } 1754 } 1755 vm_map_clip_start(map, entry, start); 1756 vm_map_clip_end(map, entry, end); 1757 entry->wired_count++; 1758 1759 /* 1760 * Check for holes 1761 */ 1762 if (entry->end < end && 1763 (entry->next == &map->header || 1764 entry->next->start > entry->end)) { 1765 /* 1766 * Found one. Object creation actions do not 1767 * need to be undone, but the wired counts 1768 * need to be restored. 1769 */ 1770 while (entry != &map->header && entry->end > start) { 1771 entry->wired_count--; 1772 entry = entry->prev; 1773 } 1774 vm_map_unlock(map); 1775 return (KERN_INVALID_ARGUMENT); 1776 } 1777 entry = entry->next; 1778 } 1779 1780 /* 1781 * Pass 2. 1782 */ 1783 1784 /* 1785 * HACK HACK HACK HACK 1786 * 1787 * If we are wiring in the kernel map or a submap of it, 1788 * unlock the map to avoid deadlocks. We trust that the 1789 * kernel is well-behaved, and therefore will not do 1790 * anything destructive to this region of the map while 1791 * we have it unlocked. We cannot trust user processes 1792 * to do the same. 1793 * 1794 * HACK HACK HACK HACK 1795 */ 1796 if (vm_map_pmap(map) == kernel_pmap) { 1797 vm_map_unlock(map); /* trust me ... */ 1798 } else { 1799 vm_map_lock_downgrade(map); 1800 } 1801 1802 rv = 0; 1803 entry = start_entry; 1804 while (entry != &map->header && entry->start < end) { 1805 /* 1806 * If vm_fault_wire fails for any page we need to undo 1807 * what has been done. We decrement the wiring count 1808 * for those pages which have not yet been wired (now) 1809 * and unwire those that have (later). 1810 * 1811 * XXX this violates the locking protocol on the map, 1812 * needs to be fixed. 1813 */ 1814 if (rv) 1815 entry->wired_count--; 1816 else if (entry->wired_count == 1) { 1817 rv = vm_fault_wire(map, entry->start, entry->end); 1818 if (rv) { 1819 failed = entry->start; 1820 entry->wired_count--; 1821 } 1822 } 1823 entry = entry->next; 1824 } 1825 1826 if (vm_map_pmap(map) == kernel_pmap) { 1827 vm_map_lock(map); 1828 } 1829 if (rv) { 1830 vm_map_unlock(map); 1831 (void) vm_map_pageable(map, start, failed, TRUE); 1832 return (rv); 1833 } 1834 /* 1835 * An exclusive lock on the map is needed in order to call 1836 * vm_map_simplify_entry(). If the current lock on the map 1837 * is only a shared lock, an upgrade is needed. 1838 */ 1839 if (vm_map_pmap(map) != kernel_pmap && 1840 vm_map_lock_upgrade(map)) { 1841 vm_map_lock(map); 1842 if (vm_map_lookup_entry(map, start, &start_entry) == 1843 FALSE) { 1844 vm_map_unlock(map); 1845 return KERN_SUCCESS; 1846 } 1847 } 1848 vm_map_simplify_entry(map, start_entry); 1849 } 1850 1851 vm_map_unlock(map); 1852 1853 return (KERN_SUCCESS); 1854} 1855 1856/* 1857 * vm_map_clean 1858 * 1859 * Push any dirty cached pages in the address range to their pager. 1860 * If syncio is TRUE, dirty pages are written synchronously. 1861 * If invalidate is TRUE, any cached pages are freed as well. 1862 * 1863 * Returns an error if any part of the specified range is not mapped. 1864 */ 1865int 1866vm_map_clean( 1867 vm_map_t map, 1868 vm_offset_t start, 1869 vm_offset_t end, 1870 boolean_t syncio, 1871 boolean_t invalidate) 1872{ 1873 vm_map_entry_t current; 1874 vm_map_entry_t entry; 1875 vm_size_t size; 1876 vm_object_t object; 1877 vm_ooffset_t offset; 1878 1879 GIANT_REQUIRED; 1880 1881 vm_map_lock_read(map); 1882 VM_MAP_RANGE_CHECK(map, start, end); 1883 if (!vm_map_lookup_entry(map, start, &entry)) { 1884 vm_map_unlock_read(map); 1885 return (KERN_INVALID_ADDRESS); 1886 } 1887 /* 1888 * Make a first pass to check for holes. 1889 */ 1890 for (current = entry; current->start < end; current = current->next) { 1891 if (current->eflags & MAP_ENTRY_IS_SUB_MAP) { 1892 vm_map_unlock_read(map); 1893 return (KERN_INVALID_ARGUMENT); 1894 } 1895 if (end > current->end && 1896 (current->next == &map->header || 1897 current->end != current->next->start)) { 1898 vm_map_unlock_read(map); 1899 return (KERN_INVALID_ADDRESS); 1900 } 1901 } 1902 1903 if (invalidate) 1904 pmap_remove(vm_map_pmap(map), start, end); 1905 /* 1906 * Make a second pass, cleaning/uncaching pages from the indicated 1907 * objects as we go. 1908 */ 1909 for (current = entry; current->start < end; current = current->next) { 1910 offset = current->offset + (start - current->start); 1911 size = (end <= current->end ? end : current->end) - start; 1912 if (current->eflags & MAP_ENTRY_IS_SUB_MAP) { 1913 vm_map_t smap; 1914 vm_map_entry_t tentry; 1915 vm_size_t tsize; 1916 1917 smap = current->object.sub_map; 1918 vm_map_lock_read(smap); 1919 (void) vm_map_lookup_entry(smap, offset, &tentry); 1920 tsize = tentry->end - offset; 1921 if (tsize < size) 1922 size = tsize; 1923 object = tentry->object.vm_object; 1924 offset = tentry->offset + (offset - tentry->start); 1925 vm_map_unlock_read(smap); 1926 } else { 1927 object = current->object.vm_object; 1928 } 1929 /* 1930 * Note that there is absolutely no sense in writing out 1931 * anonymous objects, so we track down the vnode object 1932 * to write out. 1933 * We invalidate (remove) all pages from the address space 1934 * anyway, for semantic correctness. 1935 * 1936 * note: certain anonymous maps, such as MAP_NOSYNC maps, 1937 * may start out with a NULL object. 1938 */ 1939 while (object && object->backing_object) { 1940 object = object->backing_object; 1941 offset += object->backing_object_offset; 1942 if (object->size < OFF_TO_IDX(offset + size)) 1943 size = IDX_TO_OFF(object->size) - offset; 1944 } 1945 if (object && (object->type == OBJT_VNODE) && 1946 (current->protection & VM_PROT_WRITE)) { 1947 /* 1948 * Flush pages if writing is allowed, invalidate them 1949 * if invalidation requested. Pages undergoing I/O 1950 * will be ignored by vm_object_page_remove(). 1951 * 1952 * We cannot lock the vnode and then wait for paging 1953 * to complete without deadlocking against vm_fault. 1954 * Instead we simply call vm_object_page_remove() and 1955 * allow it to block internally on a page-by-page 1956 * basis when it encounters pages undergoing async 1957 * I/O. 1958 */ 1959 int flags; 1960 1961 vm_object_reference(object); 1962 vn_lock(object->handle, LK_EXCLUSIVE | LK_RETRY, curthread); 1963 flags = (syncio || invalidate) ? OBJPC_SYNC : 0; 1964 flags |= invalidate ? OBJPC_INVAL : 0; 1965 vm_object_page_clean(object, 1966 OFF_TO_IDX(offset), 1967 OFF_TO_IDX(offset + size + PAGE_MASK), 1968 flags); 1969 if (invalidate) { 1970 /*vm_object_pip_wait(object, "objmcl");*/ 1971 vm_object_page_remove(object, 1972 OFF_TO_IDX(offset), 1973 OFF_TO_IDX(offset + size + PAGE_MASK), 1974 FALSE); 1975 } 1976 VOP_UNLOCK(object->handle, 0, curthread); 1977 vm_object_deallocate(object); 1978 } 1979 start += size; 1980 } 1981 1982 vm_map_unlock_read(map); 1983 return (KERN_SUCCESS); 1984} 1985 1986/* 1987 * vm_map_entry_unwire: [ internal use only ] 1988 * 1989 * Make the region specified by this entry pageable. 1990 * 1991 * The map in question should be locked. 1992 * [This is the reason for this routine's existence.] 1993 */ 1994static void 1995vm_map_entry_unwire(vm_map_t map, vm_map_entry_t entry) 1996{ 1997 vm_fault_unwire(map, entry->start, entry->end); 1998 entry->wired_count = 0; 1999} 2000 2001/* 2002 * vm_map_entry_delete: [ internal use only ] 2003 * 2004 * Deallocate the given entry from the target map. 2005 */ 2006static void 2007vm_map_entry_delete(vm_map_t map, vm_map_entry_t entry) 2008{ 2009 vm_map_entry_unlink(map, entry); 2010 map->size -= entry->end - entry->start; 2011 2012 if ((entry->eflags & MAP_ENTRY_IS_SUB_MAP) == 0) { 2013 vm_object_deallocate(entry->object.vm_object); 2014 } 2015 2016 vm_map_entry_dispose(map, entry); 2017} 2018 2019/* 2020 * vm_map_delete: [ internal use only ] 2021 * 2022 * Deallocates the given address range from the target 2023 * map. 2024 */ 2025int 2026vm_map_delete(vm_map_t map, vm_offset_t start, vm_offset_t end) 2027{ 2028 vm_object_t object; 2029 vm_map_entry_t entry; 2030 vm_map_entry_t first_entry; 2031 2032 GIANT_REQUIRED; 2033 2034 /* 2035 * Find the start of the region, and clip it 2036 */ 2037 if (!vm_map_lookup_entry(map, start, &first_entry)) 2038 entry = first_entry->next; 2039 else { 2040 entry = first_entry; 2041 vm_map_clip_start(map, entry, start); 2042 /* 2043 * Fix the lookup hint now, rather than each time though the 2044 * loop. 2045 */ 2046 SAVE_HINT(map, entry->prev); 2047 } 2048 2049 /* 2050 * Save the free space hint 2051 */ 2052 if (entry == &map->header) { 2053 map->first_free = &map->header; 2054 } else if (map->first_free->start >= start) { 2055 map->first_free = entry->prev; 2056 } 2057 2058 /* 2059 * Step through all entries in this region 2060 */ 2061 while ((entry != &map->header) && (entry->start < end)) { 2062 vm_map_entry_t next; 2063 vm_offset_t s, e; 2064 vm_pindex_t offidxstart, offidxend, count; 2065 2066 vm_map_clip_end(map, entry, end); 2067 2068 s = entry->start; 2069 e = entry->end; 2070 next = entry->next; 2071 2072 offidxstart = OFF_TO_IDX(entry->offset); 2073 count = OFF_TO_IDX(e - s); 2074 object = entry->object.vm_object; 2075 2076 /* 2077 * Unwire before removing addresses from the pmap; otherwise, 2078 * unwiring will put the entries back in the pmap. 2079 */ 2080 if (entry->wired_count != 0) { 2081 vm_map_entry_unwire(map, entry); 2082 } 2083 2084 offidxend = offidxstart + count; 2085 2086 if ((object == kernel_object) || (object == kmem_object)) { 2087 vm_object_page_remove(object, offidxstart, offidxend, FALSE); 2088 } else { 2089 pmap_remove(map->pmap, s, e); 2090 if (object != NULL && 2091 object->ref_count != 1 && 2092 (object->flags & (OBJ_NOSPLIT|OBJ_ONEMAPPING)) == OBJ_ONEMAPPING && 2093 (object->type == OBJT_DEFAULT || object->type == OBJT_SWAP)) { 2094 vm_object_collapse(object); 2095 vm_object_page_remove(object, offidxstart, offidxend, FALSE); 2096 if (object->type == OBJT_SWAP) { 2097 swap_pager_freespace(object, offidxstart, count); 2098 } 2099 if (offidxend >= object->size && 2100 offidxstart < object->size) { 2101 object->size = offidxstart; 2102 } 2103 } 2104 } 2105 2106 /* 2107 * Delete the entry (which may delete the object) only after 2108 * removing all pmap entries pointing to its pages. 2109 * (Otherwise, its page frames may be reallocated, and any 2110 * modify bits will be set in the wrong object!) 2111 */ 2112 vm_map_entry_delete(map, entry); 2113 entry = next; 2114 } 2115 return (KERN_SUCCESS); 2116} 2117 2118/* 2119 * vm_map_remove: 2120 * 2121 * Remove the given address range from the target map. 2122 * This is the exported form of vm_map_delete. 2123 */ 2124int 2125vm_map_remove(vm_map_t map, vm_offset_t start, vm_offset_t end) 2126{ 2127 int result, s = 0; 2128 2129 GIANT_REQUIRED; 2130 2131 if (map == kmem_map) 2132 s = splvm(); 2133 2134 vm_map_lock(map); 2135 VM_MAP_RANGE_CHECK(map, start, end); 2136 result = vm_map_delete(map, start, end); 2137 vm_map_unlock(map); 2138 2139 if (map == kmem_map) 2140 splx(s); 2141 2142 return (result); 2143} 2144 2145/* 2146 * vm_map_check_protection: 2147 * 2148 * Assert that the target map allows the specified 2149 * privilege on the entire address region given. 2150 * The entire region must be allocated. 2151 */ 2152boolean_t 2153vm_map_check_protection(vm_map_t map, vm_offset_t start, vm_offset_t end, 2154 vm_prot_t protection) 2155{ 2156 vm_map_entry_t entry; 2157 vm_map_entry_t tmp_entry; 2158 2159 GIANT_REQUIRED; 2160 2161 vm_map_lock_read(map); 2162 if (!vm_map_lookup_entry(map, start, &tmp_entry)) { 2163 vm_map_unlock_read(map); 2164 return (FALSE); 2165 } 2166 entry = tmp_entry; 2167 2168 while (start < end) { 2169 if (entry == &map->header) { 2170 vm_map_unlock_read(map); 2171 return (FALSE); 2172 } 2173 /* 2174 * No holes allowed! 2175 */ 2176 if (start < entry->start) { 2177 vm_map_unlock_read(map); 2178 return (FALSE); 2179 } 2180 /* 2181 * Check protection associated with entry. 2182 */ 2183 if ((entry->protection & protection) != protection) { 2184 vm_map_unlock_read(map); 2185 return (FALSE); 2186 } 2187 /* go to next entry */ 2188 start = entry->end; 2189 entry = entry->next; 2190 } 2191 vm_map_unlock_read(map); 2192 return (TRUE); 2193} 2194 2195/* 2196 * Split the pages in a map entry into a new object. This affords 2197 * easier removal of unused pages, and keeps object inheritance from 2198 * being a negative impact on memory usage. 2199 */ 2200static void 2201vm_map_split(vm_map_entry_t entry) 2202{ 2203 vm_page_t m; 2204 vm_object_t orig_object, new_object, source; 2205 vm_offset_t s, e; 2206 vm_pindex_t offidxstart, offidxend, idx; 2207 vm_size_t size; 2208 vm_ooffset_t offset; 2209 2210 GIANT_REQUIRED; 2211 2212 orig_object = entry->object.vm_object; 2213 if (orig_object->type != OBJT_DEFAULT && orig_object->type != OBJT_SWAP) 2214 return; 2215 if (orig_object->ref_count <= 1) 2216 return; 2217 2218 offset = entry->offset; 2219 s = entry->start; 2220 e = entry->end; 2221 2222 offidxstart = OFF_TO_IDX(offset); 2223 offidxend = offidxstart + OFF_TO_IDX(e - s); 2224 size = offidxend - offidxstart; 2225 2226 new_object = vm_pager_allocate(orig_object->type, 2227 NULL, IDX_TO_OFF(size), VM_PROT_ALL, 0LL); 2228 if (new_object == NULL) 2229 return; 2230 2231 source = orig_object->backing_object; 2232 if (source != NULL) { 2233 vm_object_reference(source); /* Referenced by new_object */ 2234 TAILQ_INSERT_TAIL(&source->shadow_head, 2235 new_object, shadow_list); 2236 vm_object_clear_flag(source, OBJ_ONEMAPPING); 2237 new_object->backing_object_offset = 2238 orig_object->backing_object_offset + IDX_TO_OFF(offidxstart); 2239 new_object->backing_object = source; 2240 source->shadow_count++; 2241 source->generation++; 2242 } 2243 2244 for (idx = 0; idx < size; idx++) { 2245 vm_page_t m; 2246 2247 retry: 2248 m = vm_page_lookup(orig_object, offidxstart + idx); 2249 if (m == NULL) 2250 continue; 2251 2252 /* 2253 * We must wait for pending I/O to complete before we can 2254 * rename the page. 2255 * 2256 * We do not have to VM_PROT_NONE the page as mappings should 2257 * not be changed by this operation. 2258 */ 2259 if (vm_page_sleep_busy(m, TRUE, "spltwt")) 2260 goto retry; 2261 2262 vm_page_busy(m); 2263 vm_page_rename(m, new_object, idx); 2264 /* page automatically made dirty by rename and cache handled */ 2265 vm_page_busy(m); 2266 } 2267 2268 if (orig_object->type == OBJT_SWAP) { 2269 vm_object_pip_add(orig_object, 1); 2270 /* 2271 * copy orig_object pages into new_object 2272 * and destroy unneeded pages in 2273 * shadow object. 2274 */ 2275 swap_pager_copy(orig_object, new_object, offidxstart, 0); 2276 vm_object_pip_wakeup(orig_object); 2277 } 2278 2279 for (idx = 0; idx < size; idx++) { 2280 m = vm_page_lookup(new_object, idx); 2281 if (m) { 2282 vm_page_wakeup(m); 2283 } 2284 } 2285 2286 entry->object.vm_object = new_object; 2287 entry->offset = 0LL; 2288 vm_object_deallocate(orig_object); 2289} 2290 2291/* 2292 * vm_map_copy_entry: 2293 * 2294 * Copies the contents of the source entry to the destination 2295 * entry. The entries *must* be aligned properly. 2296 */ 2297static void 2298vm_map_copy_entry( 2299 vm_map_t src_map, 2300 vm_map_t dst_map, 2301 vm_map_entry_t src_entry, 2302 vm_map_entry_t dst_entry) 2303{ 2304 vm_object_t src_object; 2305 2306 if ((dst_entry->eflags|src_entry->eflags) & MAP_ENTRY_IS_SUB_MAP) 2307 return; 2308 2309 if (src_entry->wired_count == 0) { 2310 2311 /* 2312 * If the source entry is marked needs_copy, it is already 2313 * write-protected. 2314 */ 2315 if ((src_entry->eflags & MAP_ENTRY_NEEDS_COPY) == 0) { 2316 pmap_protect(src_map->pmap, 2317 src_entry->start, 2318 src_entry->end, 2319 src_entry->protection & ~VM_PROT_WRITE); 2320 } 2321 2322 /* 2323 * Make a copy of the object. 2324 */ 2325 if ((src_object = src_entry->object.vm_object) != NULL) { 2326 2327 if ((src_object->handle == NULL) && 2328 (src_object->type == OBJT_DEFAULT || 2329 src_object->type == OBJT_SWAP)) { 2330 vm_object_collapse(src_object); 2331 if ((src_object->flags & (OBJ_NOSPLIT|OBJ_ONEMAPPING)) == OBJ_ONEMAPPING) { 2332 vm_map_split(src_entry); 2333 src_object = src_entry->object.vm_object; 2334 } 2335 } 2336 2337 vm_object_reference(src_object); 2338 vm_object_clear_flag(src_object, OBJ_ONEMAPPING); 2339 dst_entry->object.vm_object = src_object; 2340 src_entry->eflags |= (MAP_ENTRY_COW|MAP_ENTRY_NEEDS_COPY); 2341 dst_entry->eflags |= (MAP_ENTRY_COW|MAP_ENTRY_NEEDS_COPY); 2342 dst_entry->offset = src_entry->offset; 2343 } else { 2344 dst_entry->object.vm_object = NULL; 2345 dst_entry->offset = 0; 2346 } 2347 2348 pmap_copy(dst_map->pmap, src_map->pmap, dst_entry->start, 2349 dst_entry->end - dst_entry->start, src_entry->start); 2350 } else { 2351 /* 2352 * Of course, wired down pages can't be set copy-on-write. 2353 * Cause wired pages to be copied into the new map by 2354 * simulating faults (the new pages are pageable) 2355 */ 2356 vm_fault_copy_entry(dst_map, src_map, dst_entry, src_entry); 2357 } 2358} 2359 2360/* 2361 * vmspace_fork: 2362 * Create a new process vmspace structure and vm_map 2363 * based on those of an existing process. The new map 2364 * is based on the old map, according to the inheritance 2365 * values on the regions in that map. 2366 * 2367 * The source map must not be locked. 2368 */ 2369struct vmspace * 2370vmspace_fork(struct vmspace *vm1) 2371{ 2372 struct vmspace *vm2; 2373 vm_map_t old_map = &vm1->vm_map; 2374 vm_map_t new_map; 2375 vm_map_entry_t old_entry; 2376 vm_map_entry_t new_entry; 2377 vm_object_t object; 2378 2379 GIANT_REQUIRED; 2380 2381 vm_map_lock(old_map); 2382 old_map->infork = 1; 2383 2384 vm2 = vmspace_alloc(old_map->min_offset, old_map->max_offset); 2385 bcopy(&vm1->vm_startcopy, &vm2->vm_startcopy, 2386 (caddr_t) &vm1->vm_endcopy - (caddr_t) &vm1->vm_startcopy); 2387 new_map = &vm2->vm_map; /* XXX */ 2388 new_map->timestamp = 1; 2389 2390 old_entry = old_map->header.next; 2391 2392 while (old_entry != &old_map->header) { 2393 if (old_entry->eflags & MAP_ENTRY_IS_SUB_MAP) 2394 panic("vm_map_fork: encountered a submap"); 2395 2396 switch (old_entry->inheritance) { 2397 case VM_INHERIT_NONE: 2398 break; 2399 2400 case VM_INHERIT_SHARE: 2401 /* 2402 * Clone the entry, creating the shared object if necessary. 2403 */ 2404 object = old_entry->object.vm_object; 2405 if (object == NULL) { 2406 object = vm_object_allocate(OBJT_DEFAULT, 2407 atop(old_entry->end - old_entry->start)); 2408 old_entry->object.vm_object = object; 2409 old_entry->offset = (vm_offset_t) 0; 2410 } 2411 2412 /* 2413 * Add the reference before calling vm_object_shadow 2414 * to insure that a shadow object is created. 2415 */ 2416 vm_object_reference(object); 2417 if (old_entry->eflags & MAP_ENTRY_NEEDS_COPY) { 2418 vm_object_shadow(&old_entry->object.vm_object, 2419 &old_entry->offset, 2420 atop(old_entry->end - old_entry->start)); 2421 old_entry->eflags &= ~MAP_ENTRY_NEEDS_COPY; 2422 /* Transfer the second reference too. */ 2423 vm_object_reference( 2424 old_entry->object.vm_object); 2425 vm_object_deallocate(object); 2426 object = old_entry->object.vm_object; 2427 } 2428 vm_object_clear_flag(object, OBJ_ONEMAPPING); 2429 2430 /* 2431 * Clone the entry, referencing the shared object. 2432 */ 2433 new_entry = vm_map_entry_create(new_map); 2434 *new_entry = *old_entry; 2435 new_entry->eflags &= ~MAP_ENTRY_USER_WIRED; 2436 new_entry->wired_count = 0; 2437 2438 /* 2439 * Insert the entry into the new map -- we know we're 2440 * inserting at the end of the new map. 2441 */ 2442 vm_map_entry_link(new_map, new_map->header.prev, 2443 new_entry); 2444 2445 /* 2446 * Update the physical map 2447 */ 2448 pmap_copy(new_map->pmap, old_map->pmap, 2449 new_entry->start, 2450 (old_entry->end - old_entry->start), 2451 old_entry->start); 2452 break; 2453 2454 case VM_INHERIT_COPY: 2455 /* 2456 * Clone the entry and link into the map. 2457 */ 2458 new_entry = vm_map_entry_create(new_map); 2459 *new_entry = *old_entry; 2460 new_entry->eflags &= ~MAP_ENTRY_USER_WIRED; 2461 new_entry->wired_count = 0; 2462 new_entry->object.vm_object = NULL; 2463 vm_map_entry_link(new_map, new_map->header.prev, 2464 new_entry); 2465 vm_map_copy_entry(old_map, new_map, old_entry, 2466 new_entry); 2467 break; 2468 } 2469 old_entry = old_entry->next; 2470 } 2471 2472 new_map->size = old_map->size; 2473 old_map->infork = 0; 2474 vm_map_unlock(old_map); 2475 2476 return (vm2); 2477} 2478 2479int 2480vm_map_stack (vm_map_t map, vm_offset_t addrbos, vm_size_t max_ssize, 2481 vm_prot_t prot, vm_prot_t max, int cow) 2482{ 2483 vm_map_entry_t prev_entry; 2484 vm_map_entry_t new_stack_entry; 2485 vm_size_t init_ssize; 2486 int rv; 2487 2488 GIANT_REQUIRED; 2489 2490 if (VM_MIN_ADDRESS > 0 && addrbos < VM_MIN_ADDRESS) 2491 return (KERN_NO_SPACE); 2492 2493 if (max_ssize < sgrowsiz) 2494 init_ssize = max_ssize; 2495 else 2496 init_ssize = sgrowsiz; 2497 2498 vm_map_lock(map); 2499 2500 /* If addr is already mapped, no go */ 2501 if (vm_map_lookup_entry(map, addrbos, &prev_entry)) { 2502 vm_map_unlock(map); 2503 return (KERN_NO_SPACE); 2504 } 2505 2506 /* If we can't accomodate max_ssize in the current mapping, 2507 * no go. However, we need to be aware that subsequent user 2508 * mappings might map into the space we have reserved for 2509 * stack, and currently this space is not protected. 2510 * 2511 * Hopefully we will at least detect this condition 2512 * when we try to grow the stack. 2513 */ 2514 if ((prev_entry->next != &map->header) && 2515 (prev_entry->next->start < addrbos + max_ssize)) { 2516 vm_map_unlock(map); 2517 return (KERN_NO_SPACE); 2518 } 2519 2520 /* We initially map a stack of only init_ssize. We will 2521 * grow as needed later. Since this is to be a grow 2522 * down stack, we map at the top of the range. 2523 * 2524 * Note: we would normally expect prot and max to be 2525 * VM_PROT_ALL, and cow to be 0. Possibly we should 2526 * eliminate these as input parameters, and just 2527 * pass these values here in the insert call. 2528 */ 2529 rv = vm_map_insert(map, NULL, 0, addrbos + max_ssize - init_ssize, 2530 addrbos + max_ssize, prot, max, cow); 2531 2532 /* Now set the avail_ssize amount */ 2533 if (rv == KERN_SUCCESS){ 2534 if (prev_entry != &map->header) 2535 vm_map_clip_end(map, prev_entry, addrbos + max_ssize - init_ssize); 2536 new_stack_entry = prev_entry->next; 2537 if (new_stack_entry->end != addrbos + max_ssize || 2538 new_stack_entry->start != addrbos + max_ssize - init_ssize) 2539 panic ("Bad entry start/end for new stack entry"); 2540 else 2541 new_stack_entry->avail_ssize = max_ssize - init_ssize; 2542 } 2543 2544 vm_map_unlock(map); 2545 return (rv); 2546} 2547 2548/* Attempts to grow a vm stack entry. Returns KERN_SUCCESS if the 2549 * desired address is already mapped, or if we successfully grow 2550 * the stack. Also returns KERN_SUCCESS if addr is outside the 2551 * stack range (this is strange, but preserves compatibility with 2552 * the grow function in vm_machdep.c). 2553 */ 2554int 2555vm_map_growstack (struct proc *p, vm_offset_t addr) 2556{ 2557 vm_map_entry_t prev_entry; 2558 vm_map_entry_t stack_entry; 2559 vm_map_entry_t new_stack_entry; 2560 struct vmspace *vm = p->p_vmspace; 2561 vm_map_t map = &vm->vm_map; 2562 vm_offset_t end; 2563 int grow_amount; 2564 int rv; 2565 int is_procstack; 2566 2567 GIANT_REQUIRED; 2568 2569Retry: 2570 vm_map_lock_read(map); 2571 2572 /* If addr is already in the entry range, no need to grow.*/ 2573 if (vm_map_lookup_entry(map, addr, &prev_entry)) { 2574 vm_map_unlock_read(map); 2575 return (KERN_SUCCESS); 2576 } 2577 2578 if ((stack_entry = prev_entry->next) == &map->header) { 2579 vm_map_unlock_read(map); 2580 return (KERN_SUCCESS); 2581 } 2582 if (prev_entry == &map->header) 2583 end = stack_entry->start - stack_entry->avail_ssize; 2584 else 2585 end = prev_entry->end; 2586 2587 /* This next test mimics the old grow function in vm_machdep.c. 2588 * It really doesn't quite make sense, but we do it anyway 2589 * for compatibility. 2590 * 2591 * If not growable stack, return success. This signals the 2592 * caller to proceed as he would normally with normal vm. 2593 */ 2594 if (stack_entry->avail_ssize < 1 || 2595 addr >= stack_entry->start || 2596 addr < stack_entry->start - stack_entry->avail_ssize) { 2597 vm_map_unlock_read(map); 2598 return (KERN_SUCCESS); 2599 } 2600 2601 /* Find the minimum grow amount */ 2602 grow_amount = roundup (stack_entry->start - addr, PAGE_SIZE); 2603 if (grow_amount > stack_entry->avail_ssize) { 2604 vm_map_unlock_read(map); 2605 return (KERN_NO_SPACE); 2606 } 2607 2608 /* If there is no longer enough space between the entries 2609 * nogo, and adjust the available space. Note: this 2610 * should only happen if the user has mapped into the 2611 * stack area after the stack was created, and is 2612 * probably an error. 2613 * 2614 * This also effectively destroys any guard page the user 2615 * might have intended by limiting the stack size. 2616 */ 2617 if (grow_amount > stack_entry->start - end) { 2618 if (vm_map_lock_upgrade(map)) 2619 goto Retry; 2620 2621 stack_entry->avail_ssize = stack_entry->start - end; 2622 2623 vm_map_unlock(map); 2624 return (KERN_NO_SPACE); 2625 } 2626 2627 is_procstack = addr >= (vm_offset_t)vm->vm_maxsaddr; 2628 2629 /* If this is the main process stack, see if we're over the 2630 * stack limit. 2631 */ 2632 if (is_procstack && (ctob(vm->vm_ssize) + grow_amount > 2633 p->p_rlimit[RLIMIT_STACK].rlim_cur)) { 2634 vm_map_unlock_read(map); 2635 return (KERN_NO_SPACE); 2636 } 2637 2638 /* Round up the grow amount modulo SGROWSIZ */ 2639 grow_amount = roundup (grow_amount, sgrowsiz); 2640 if (grow_amount > stack_entry->avail_ssize) { 2641 grow_amount = stack_entry->avail_ssize; 2642 } 2643 if (is_procstack && (ctob(vm->vm_ssize) + grow_amount > 2644 p->p_rlimit[RLIMIT_STACK].rlim_cur)) { 2645 grow_amount = p->p_rlimit[RLIMIT_STACK].rlim_cur - 2646 ctob(vm->vm_ssize); 2647 } 2648 2649 if (vm_map_lock_upgrade(map)) 2650 goto Retry; 2651 2652 /* Get the preliminary new entry start value */ 2653 addr = stack_entry->start - grow_amount; 2654 2655 /* If this puts us into the previous entry, cut back our growth 2656 * to the available space. Also, see the note above. 2657 */ 2658 if (addr < end) { 2659 stack_entry->avail_ssize = stack_entry->start - end; 2660 addr = end; 2661 } 2662 2663 rv = vm_map_insert(map, NULL, 0, addr, stack_entry->start, 2664 VM_PROT_ALL, 2665 VM_PROT_ALL, 2666 0); 2667 2668 /* Adjust the available stack space by the amount we grew. */ 2669 if (rv == KERN_SUCCESS) { 2670 if (prev_entry != &map->header) 2671 vm_map_clip_end(map, prev_entry, addr); 2672 new_stack_entry = prev_entry->next; 2673 if (new_stack_entry->end != stack_entry->start || 2674 new_stack_entry->start != addr) 2675 panic ("Bad stack grow start/end in new stack entry"); 2676 else { 2677 new_stack_entry->avail_ssize = stack_entry->avail_ssize - 2678 (new_stack_entry->end - 2679 new_stack_entry->start); 2680 if (is_procstack) 2681 vm->vm_ssize += btoc(new_stack_entry->end - 2682 new_stack_entry->start); 2683 } 2684 } 2685 2686 vm_map_unlock(map); 2687 return (rv); 2688} 2689 2690/* 2691 * Unshare the specified VM space for exec. If other processes are 2692 * mapped to it, then create a new one. The new vmspace is null. 2693 */ 2694void 2695vmspace_exec(struct proc *p) 2696{ 2697 struct vmspace *oldvmspace = p->p_vmspace; 2698 struct vmspace *newvmspace; 2699 vm_map_t map = &p->p_vmspace->vm_map; 2700 2701 GIANT_REQUIRED; 2702 newvmspace = vmspace_alloc(map->min_offset, map->max_offset); 2703 bcopy(&oldvmspace->vm_startcopy, &newvmspace->vm_startcopy, 2704 (caddr_t) (newvmspace + 1) - (caddr_t) &newvmspace->vm_startcopy); 2705 /* 2706 * This code is written like this for prototype purposes. The 2707 * goal is to avoid running down the vmspace here, but let the 2708 * other process's that are still using the vmspace to finally 2709 * run it down. Even though there is little or no chance of blocking 2710 * here, it is a good idea to keep this form for future mods. 2711 */ 2712 p->p_vmspace = newvmspace; 2713 pmap_pinit2(vmspace_pmap(newvmspace)); 2714 vmspace_free(oldvmspace); 2715 if (p == curthread->td_proc) /* XXXKSE ? */ 2716 pmap_activate(curthread); 2717} 2718 2719/* 2720 * Unshare the specified VM space for forcing COW. This 2721 * is called by rfork, for the (RFMEM|RFPROC) == 0 case. 2722 */ 2723void 2724vmspace_unshare(struct proc *p) 2725{ 2726 struct vmspace *oldvmspace = p->p_vmspace; 2727 struct vmspace *newvmspace; 2728 2729 GIANT_REQUIRED; 2730 if (oldvmspace->vm_refcnt == 1) 2731 return; 2732 newvmspace = vmspace_fork(oldvmspace); 2733 p->p_vmspace = newvmspace; 2734 pmap_pinit2(vmspace_pmap(newvmspace)); 2735 vmspace_free(oldvmspace); 2736 if (p == curthread->td_proc) /* XXXKSE ? */ 2737 pmap_activate(curthread); 2738} 2739 2740/* 2741 * vm_map_lookup: 2742 * 2743 * Finds the VM object, offset, and 2744 * protection for a given virtual address in the 2745 * specified map, assuming a page fault of the 2746 * type specified. 2747 * 2748 * Leaves the map in question locked for read; return 2749 * values are guaranteed until a vm_map_lookup_done 2750 * call is performed. Note that the map argument 2751 * is in/out; the returned map must be used in 2752 * the call to vm_map_lookup_done. 2753 * 2754 * A handle (out_entry) is returned for use in 2755 * vm_map_lookup_done, to make that fast. 2756 * 2757 * If a lookup is requested with "write protection" 2758 * specified, the map may be changed to perform virtual 2759 * copying operations, although the data referenced will 2760 * remain the same. 2761 */ 2762int 2763vm_map_lookup(vm_map_t *var_map, /* IN/OUT */ 2764 vm_offset_t vaddr, 2765 vm_prot_t fault_typea, 2766 vm_map_entry_t *out_entry, /* OUT */ 2767 vm_object_t *object, /* OUT */ 2768 vm_pindex_t *pindex, /* OUT */ 2769 vm_prot_t *out_prot, /* OUT */ 2770 boolean_t *wired) /* OUT */ 2771{ 2772 vm_map_entry_t entry; 2773 vm_map_t map = *var_map; 2774 vm_prot_t prot; 2775 vm_prot_t fault_type = fault_typea; 2776 2777 GIANT_REQUIRED; 2778RetryLookup:; 2779 /* 2780 * Lookup the faulting address. 2781 */ 2782 2783 vm_map_lock_read(map); 2784#define RETURN(why) \ 2785 { \ 2786 vm_map_unlock_read(map); \ 2787 return (why); \ 2788 } 2789 2790 /* 2791 * If the map has an interesting hint, try it before calling full 2792 * blown lookup routine. 2793 */ 2794 entry = map->hint; 2795 *out_entry = entry; 2796 if ((entry == &map->header) || 2797 (vaddr < entry->start) || (vaddr >= entry->end)) { 2798 vm_map_entry_t tmp_entry; 2799 2800 /* 2801 * Entry was either not a valid hint, or the vaddr was not 2802 * contained in the entry, so do a full lookup. 2803 */ 2804 if (!vm_map_lookup_entry(map, vaddr, &tmp_entry)) 2805 RETURN(KERN_INVALID_ADDRESS); 2806 2807 entry = tmp_entry; 2808 *out_entry = entry; 2809 } 2810 2811 /* 2812 * Handle submaps. 2813 */ 2814 if (entry->eflags & MAP_ENTRY_IS_SUB_MAP) { 2815 vm_map_t old_map = map; 2816 2817 *var_map = map = entry->object.sub_map; 2818 vm_map_unlock_read(old_map); 2819 goto RetryLookup; 2820 } 2821 2822 /* 2823 * Check whether this task is allowed to have this page. 2824 * Note the special case for MAP_ENTRY_COW 2825 * pages with an override. This is to implement a forced 2826 * COW for debuggers. 2827 */ 2828 if (fault_type & VM_PROT_OVERRIDE_WRITE) 2829 prot = entry->max_protection; 2830 else 2831 prot = entry->protection; 2832 fault_type &= (VM_PROT_READ|VM_PROT_WRITE|VM_PROT_EXECUTE); 2833 if ((fault_type & prot) != fault_type) { 2834 RETURN(KERN_PROTECTION_FAILURE); 2835 } 2836 if ((entry->eflags & MAP_ENTRY_USER_WIRED) && 2837 (entry->eflags & MAP_ENTRY_COW) && 2838 (fault_type & VM_PROT_WRITE) && 2839 (fault_typea & VM_PROT_OVERRIDE_WRITE) == 0) { 2840 RETURN(KERN_PROTECTION_FAILURE); 2841 } 2842 2843 /* 2844 * If this page is not pageable, we have to get it for all possible 2845 * accesses. 2846 */ 2847 *wired = (entry->wired_count != 0); 2848 if (*wired) 2849 prot = fault_type = entry->protection; 2850 2851 /* 2852 * If the entry was copy-on-write, we either ... 2853 */ 2854 if (entry->eflags & MAP_ENTRY_NEEDS_COPY) { 2855 /* 2856 * If we want to write the page, we may as well handle that 2857 * now since we've got the map locked. 2858 * 2859 * If we don't need to write the page, we just demote the 2860 * permissions allowed. 2861 */ 2862 if (fault_type & VM_PROT_WRITE) { 2863 /* 2864 * Make a new object, and place it in the object 2865 * chain. Note that no new references have appeared 2866 * -- one just moved from the map to the new 2867 * object. 2868 */ 2869 if (vm_map_lock_upgrade(map)) 2870 goto RetryLookup; 2871 vm_object_shadow( 2872 &entry->object.vm_object, 2873 &entry->offset, 2874 atop(entry->end - entry->start)); 2875 entry->eflags &= ~MAP_ENTRY_NEEDS_COPY; 2876 vm_map_lock_downgrade(map); 2877 } else { 2878 /* 2879 * We're attempting to read a copy-on-write page -- 2880 * don't allow writes. 2881 */ 2882 prot &= ~VM_PROT_WRITE; 2883 } 2884 } 2885 2886 /* 2887 * Create an object if necessary. 2888 */ 2889 if (entry->object.vm_object == NULL && 2890 !map->system_map) { 2891 if (vm_map_lock_upgrade(map)) 2892 goto RetryLookup; 2893 entry->object.vm_object = vm_object_allocate(OBJT_DEFAULT, 2894 atop(entry->end - entry->start)); 2895 entry->offset = 0; 2896 vm_map_lock_downgrade(map); 2897 } 2898 2899 /* 2900 * Return the object/offset from this entry. If the entry was 2901 * copy-on-write or empty, it has been fixed up. 2902 */ 2903 *pindex = OFF_TO_IDX((vaddr - entry->start) + entry->offset); 2904 *object = entry->object.vm_object; 2905 2906 /* 2907 * Return whether this is the only map sharing this data. 2908 */ 2909 *out_prot = prot; 2910 return (KERN_SUCCESS); 2911 2912#undef RETURN 2913} 2914 2915/* 2916 * vm_map_lookup_done: 2917 * 2918 * Releases locks acquired by a vm_map_lookup 2919 * (according to the handle returned by that lookup). 2920 */ 2921void 2922vm_map_lookup_done(vm_map_t map, vm_map_entry_t entry) 2923{ 2924 /* 2925 * Unlock the main-level map 2926 */ 2927 GIANT_REQUIRED; 2928 vm_map_unlock_read(map); 2929} 2930 2931/* 2932 * Implement uiomove with VM operations. This handles (and collateral changes) 2933 * support every combination of source object modification, and COW type 2934 * operations. 2935 */ 2936int 2937vm_uiomove( 2938 vm_map_t mapa, 2939 vm_object_t srcobject, 2940 off_t cp, 2941 int cnta, 2942 vm_offset_t uaddra, 2943 int *npages) 2944{ 2945 vm_map_t map; 2946 vm_object_t first_object, oldobject, object; 2947 vm_map_entry_t entry; 2948 vm_prot_t prot; 2949 boolean_t wired; 2950 int tcnt, rv; 2951 vm_offset_t uaddr, start, end, tend; 2952 vm_pindex_t first_pindex, osize, oindex; 2953 off_t ooffset; 2954 int cnt; 2955 2956 GIANT_REQUIRED; 2957 2958 if (npages) 2959 *npages = 0; 2960 2961 cnt = cnta; 2962 uaddr = uaddra; 2963 2964 while (cnt > 0) { 2965 map = mapa; 2966 2967 if ((vm_map_lookup(&map, uaddr, 2968 VM_PROT_READ, &entry, &first_object, 2969 &first_pindex, &prot, &wired)) != KERN_SUCCESS) { 2970 return EFAULT; 2971 } 2972 2973 vm_map_clip_start(map, entry, uaddr); 2974 2975 tcnt = cnt; 2976 tend = uaddr + tcnt; 2977 if (tend > entry->end) { 2978 tcnt = entry->end - uaddr; 2979 tend = entry->end; 2980 } 2981 2982 vm_map_clip_end(map, entry, tend); 2983 2984 start = entry->start; 2985 end = entry->end; 2986 2987 osize = atop(tcnt); 2988 2989 oindex = OFF_TO_IDX(cp); 2990 if (npages) { 2991 vm_pindex_t idx; 2992 for (idx = 0; idx < osize; idx++) { 2993 vm_page_t m; 2994 if ((m = vm_page_lookup(srcobject, oindex + idx)) == NULL) { 2995 vm_map_lookup_done(map, entry); 2996 return 0; 2997 } 2998 /* 2999 * disallow busy or invalid pages, but allow 3000 * m->busy pages if they are entirely valid. 3001 */ 3002 if ((m->flags & PG_BUSY) || 3003 ((m->valid & VM_PAGE_BITS_ALL) != VM_PAGE_BITS_ALL)) { 3004 vm_map_lookup_done(map, entry); 3005 return 0; 3006 } 3007 } 3008 } 3009 3010/* 3011 * If we are changing an existing map entry, just redirect 3012 * the object, and change mappings. 3013 */ 3014 if ((first_object->type == OBJT_VNODE) && 3015 ((oldobject = entry->object.vm_object) == first_object)) { 3016 3017 if ((entry->offset != cp) || (oldobject != srcobject)) { 3018 /* 3019 * Remove old window into the file 3020 */ 3021 pmap_remove (map->pmap, uaddr, tend); 3022 3023 /* 3024 * Force copy on write for mmaped regions 3025 */ 3026 vm_object_pmap_copy_1 (srcobject, oindex, oindex + osize); 3027 3028 /* 3029 * Point the object appropriately 3030 */ 3031 if (oldobject != srcobject) { 3032 3033 /* 3034 * Set the object optimization hint flag 3035 */ 3036 vm_object_set_flag(srcobject, OBJ_OPT); 3037 vm_object_reference(srcobject); 3038 entry->object.vm_object = srcobject; 3039 3040 if (oldobject) { 3041 vm_object_deallocate(oldobject); 3042 } 3043 } 3044 3045 entry->offset = cp; 3046 map->timestamp++; 3047 } else { 3048 pmap_remove (map->pmap, uaddr, tend); 3049 } 3050 3051 } else if ((first_object->ref_count == 1) && 3052 (first_object->size == osize) && 3053 ((first_object->type == OBJT_DEFAULT) || 3054 (first_object->type == OBJT_SWAP)) ) { 3055 3056 oldobject = first_object->backing_object; 3057 3058 if ((first_object->backing_object_offset != cp) || 3059 (oldobject != srcobject)) { 3060 /* 3061 * Remove old window into the file 3062 */ 3063 pmap_remove (map->pmap, uaddr, tend); 3064 3065 /* 3066 * Remove unneeded old pages 3067 */ 3068 vm_object_page_remove(first_object, 0, 0, 0); 3069 3070 /* 3071 * Invalidate swap space 3072 */ 3073 if (first_object->type == OBJT_SWAP) { 3074 swap_pager_freespace(first_object, 3075 0, 3076 first_object->size); 3077 } 3078 3079 /* 3080 * Force copy on write for mmaped regions 3081 */ 3082 vm_object_pmap_copy_1 (srcobject, oindex, oindex + osize); 3083 3084 /* 3085 * Point the object appropriately 3086 */ 3087 if (oldobject != srcobject) { 3088 /* 3089 * Set the object optimization hint flag 3090 */ 3091 vm_object_set_flag(srcobject, OBJ_OPT); 3092 vm_object_reference(srcobject); 3093 3094 if (oldobject) { 3095 TAILQ_REMOVE(&oldobject->shadow_head, 3096 first_object, shadow_list); 3097 oldobject->shadow_count--; 3098 /* XXX bump generation? */ 3099 vm_object_deallocate(oldobject); 3100 } 3101 3102 TAILQ_INSERT_TAIL(&srcobject->shadow_head, 3103 first_object, shadow_list); 3104 srcobject->shadow_count++; 3105 /* XXX bump generation? */ 3106 3107 first_object->backing_object = srcobject; 3108 } 3109 first_object->backing_object_offset = cp; 3110 map->timestamp++; 3111 } else { 3112 pmap_remove (map->pmap, uaddr, tend); 3113 } 3114/* 3115 * Otherwise, we have to do a logical mmap. 3116 */ 3117 } else { 3118 3119 vm_object_set_flag(srcobject, OBJ_OPT); 3120 vm_object_reference(srcobject); 3121 3122 pmap_remove (map->pmap, uaddr, tend); 3123 3124 vm_object_pmap_copy_1 (srcobject, oindex, oindex + osize); 3125 vm_map_lock_upgrade(map); 3126 3127 if (entry == &map->header) { 3128 map->first_free = &map->header; 3129 } else if (map->first_free->start >= start) { 3130 map->first_free = entry->prev; 3131 } 3132 3133 SAVE_HINT(map, entry->prev); 3134 vm_map_entry_delete(map, entry); 3135 3136 object = srcobject; 3137 ooffset = cp; 3138 3139 rv = vm_map_insert(map, object, ooffset, start, tend, 3140 VM_PROT_ALL, VM_PROT_ALL, MAP_COPY_ON_WRITE); 3141 3142 if (rv != KERN_SUCCESS) 3143 panic("vm_uiomove: could not insert new entry: %d", rv); 3144 } 3145 3146/* 3147 * Map the window directly, if it is already in memory 3148 */ 3149 pmap_object_init_pt(map->pmap, uaddr, 3150 srcobject, oindex, tcnt, 0); 3151 3152 map->timestamp++; 3153 vm_map_unlock(map); 3154 3155 cnt -= tcnt; 3156 uaddr += tcnt; 3157 cp += tcnt; 3158 if (npages) 3159 *npages += osize; 3160 } 3161 return 0; 3162} 3163 3164/* 3165 * Performs the copy_on_write operations necessary to allow the virtual copies 3166 * into user space to work. This has to be called for write(2) system calls 3167 * from other processes, file unlinking, and file size shrinkage. 3168 */ 3169void 3170vm_freeze_copyopts(vm_object_t object, vm_pindex_t froma, vm_pindex_t toa) 3171{ 3172 int rv; 3173 vm_object_t robject; 3174 vm_pindex_t idx; 3175 3176 GIANT_REQUIRED; 3177 if ((object == NULL) || 3178 ((object->flags & OBJ_OPT) == 0)) 3179 return; 3180 3181 if (object->shadow_count > object->ref_count) 3182 panic("vm_freeze_copyopts: sc > rc"); 3183 3184 while ((robject = TAILQ_FIRST(&object->shadow_head)) != NULL) { 3185 vm_pindex_t bo_pindex; 3186 vm_page_t m_in, m_out; 3187 3188 bo_pindex = OFF_TO_IDX(robject->backing_object_offset); 3189 3190 vm_object_reference(robject); 3191 3192 vm_object_pip_wait(robject, "objfrz"); 3193 3194 if (robject->ref_count == 1) { 3195 vm_object_deallocate(robject); 3196 continue; 3197 } 3198 3199 vm_object_pip_add(robject, 1); 3200 3201 for (idx = 0; idx < robject->size; idx++) { 3202 3203 m_out = vm_page_grab(robject, idx, 3204 VM_ALLOC_NORMAL | VM_ALLOC_RETRY); 3205 3206 if (m_out->valid == 0) { 3207 m_in = vm_page_grab(object, bo_pindex + idx, 3208 VM_ALLOC_NORMAL | VM_ALLOC_RETRY); 3209 if (m_in->valid == 0) { 3210 rv = vm_pager_get_pages(object, &m_in, 1, 0); 3211 if (rv != VM_PAGER_OK) { 3212 printf("vm_freeze_copyopts: cannot read page from file: %lx\n", (long)m_in->pindex); 3213 continue; 3214 } 3215 vm_page_deactivate(m_in); 3216 } 3217 3218 vm_page_protect(m_in, VM_PROT_NONE); 3219 pmap_copy_page(m_in, m_out); 3220 m_out->valid = m_in->valid; 3221 vm_page_dirty(m_out); 3222 vm_page_activate(m_out); 3223 vm_page_wakeup(m_in); 3224 } 3225 vm_page_wakeup(m_out); 3226 } 3227 3228 object->shadow_count--; 3229 object->ref_count--; 3230 TAILQ_REMOVE(&object->shadow_head, robject, shadow_list); 3231 robject->backing_object = NULL; 3232 robject->backing_object_offset = 0; 3233 3234 vm_object_pip_wakeup(robject); 3235 vm_object_deallocate(robject); 3236 } 3237 3238 vm_object_clear_flag(object, OBJ_OPT); 3239} 3240 3241#include "opt_ddb.h" 3242#ifdef DDB 3243#include <sys/kernel.h> 3244 3245#include <ddb/ddb.h> 3246 3247/* 3248 * vm_map_print: [ debug ] 3249 */ 3250DB_SHOW_COMMAND(map, vm_map_print) 3251{ 3252 static int nlines; 3253 /* XXX convert args. */ 3254 vm_map_t map = (vm_map_t)addr; 3255 boolean_t full = have_addr; 3256 3257 vm_map_entry_t entry; 3258 3259 db_iprintf("Task map %p: pmap=%p, nentries=%d, version=%u\n", 3260 (void *)map, 3261 (void *)map->pmap, map->nentries, map->timestamp); 3262 nlines++; 3263 3264 if (!full && db_indent) 3265 return; 3266 3267 db_indent += 2; 3268 for (entry = map->header.next; entry != &map->header; 3269 entry = entry->next) { 3270 db_iprintf("map entry %p: start=%p, end=%p\n", 3271 (void *)entry, (void *)entry->start, (void *)entry->end); 3272 nlines++; 3273 { 3274 static char *inheritance_name[4] = 3275 {"share", "copy", "none", "donate_copy"}; 3276 3277 db_iprintf(" prot=%x/%x/%s", 3278 entry->protection, 3279 entry->max_protection, 3280 inheritance_name[(int)(unsigned char)entry->inheritance]); 3281 if (entry->wired_count != 0) 3282 db_printf(", wired"); 3283 } 3284 if (entry->eflags & MAP_ENTRY_IS_SUB_MAP) { 3285 /* XXX no %qd in kernel. Truncate entry->offset. */ 3286 db_printf(", share=%p, offset=0x%lx\n", 3287 (void *)entry->object.sub_map, 3288 (long)entry->offset); 3289 nlines++; 3290 if ((entry->prev == &map->header) || 3291 (entry->prev->object.sub_map != 3292 entry->object.sub_map)) { 3293 db_indent += 2; 3294 vm_map_print((db_expr_t)(intptr_t) 3295 entry->object.sub_map, 3296 full, 0, (char *)0); 3297 db_indent -= 2; 3298 } 3299 } else { 3300 /* XXX no %qd in kernel. Truncate entry->offset. */ 3301 db_printf(", object=%p, offset=0x%lx", 3302 (void *)entry->object.vm_object, 3303 (long)entry->offset); 3304 if (entry->eflags & MAP_ENTRY_COW) 3305 db_printf(", copy (%s)", 3306 (entry->eflags & MAP_ENTRY_NEEDS_COPY) ? "needed" : "done"); 3307 db_printf("\n"); 3308 nlines++; 3309 3310 if ((entry->prev == &map->header) || 3311 (entry->prev->object.vm_object != 3312 entry->object.vm_object)) { 3313 db_indent += 2; 3314 vm_object_print((db_expr_t)(intptr_t) 3315 entry->object.vm_object, 3316 full, 0, (char *)0); 3317 nlines += 4; 3318 db_indent -= 2; 3319 } 3320 } 3321 } 3322 db_indent -= 2; 3323 if (db_indent == 0) 3324 nlines = 0; 3325} 3326 3327 3328DB_SHOW_COMMAND(procvm, procvm) 3329{ 3330 struct proc *p; 3331 3332 if (have_addr) { 3333 p = (struct proc *) addr; 3334 } else { 3335 p = curproc; 3336 } 3337 3338 db_printf("p = %p, vmspace = %p, map = %p, pmap = %p\n", 3339 (void *)p, (void *)p->p_vmspace, (void *)&p->p_vmspace->vm_map, 3340 (void *)vmspace_pmap(p->p_vmspace)); 3341 3342 vm_map_print((db_expr_t)(intptr_t)&p->p_vmspace->vm_map, 1, 0, NULL); 3343} 3344 3345#endif /* DDB */
|