vm_kern.c revision 248084
130994Sphk/*- 22729Sdfr * Copyright (c) 1991, 1993 32729Sdfr * The Regents of the University of California. All rights reserved. 42729Sdfr * 52729Sdfr * This code is derived from software contributed to Berkeley by 62729Sdfr * The Mach Operating System project at Carnegie-Mellon University. 72729Sdfr * 82729Sdfr * Redistribution and use in source and binary forms, with or without 92729Sdfr * modification, are permitted provided that the following conditions 102729Sdfr * are met: 112729Sdfr * 1. Redistributions of source code must retain the above copyright 122729Sdfr * notice, this list of conditions and the following disclaimer. 132729Sdfr * 2. Redistributions in binary form must reproduce the above copyright 142729Sdfr * notice, this list of conditions and the following disclaimer in the 152729Sdfr * documentation and/or other materials provided with the distribution. 162729Sdfr * 4. Neither the name of the University nor the names of its contributors 172729Sdfr * may be used to endorse or promote products derived from this software 182729Sdfr * without specific prior written permission. 192729Sdfr * 202729Sdfr * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 212729Sdfr * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 222729Sdfr * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 232729Sdfr * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 2411626Sbde * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 252729Sdfr * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 262729Sdfr * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 272729Sdfr * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 2811626Sbde * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 292729Sdfr * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 3010653Sdg * SUCH DAMAGE. 3110358Sjulian * 3210358Sjulian * from: @(#)vm_kern.c 8.3 (Berkeley) 1/12/94 332729Sdfr * 342729Sdfr * 352729Sdfr * Copyright (c) 1987, 1990 Carnegie-Mellon University. 3612866Speter * All rights reserved. 3711626Sbde * 3830994Sphk * Authors: Avadis Tevanian, Jr., Michael Wayne Young 3911626Sbde * 4030994Sphk * Permission to use, copy, modify and distribute this software and 4111626Sbde * its documentation is hereby granted, provided that both the copyright 4230994Sphk * notice and this permission notice appear in all copies of the 4311626Sbde * software, derivative works or modified versions, and any portions 4430994Sphk * thereof, and that both notices appear in supporting documentation. 4512866Speter * 4611626Sbde * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS" 472729Sdfr * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND 4811626Sbde * FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE. 4912819Sphk * 5011626Sbde * Carnegie Mellon requests users of this software to return to 5111626Sbde * 5211626Sbde * Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU 532729Sdfr * School of Computer Science 5412819Sphk * Carnegie Mellon University 5512819Sphk * Pittsburgh PA 15213-3890 5612819Sphk * 579759Sbde * any improvements or extensions that they make and grant Carnegie the 589759Sbde * rights to redistribute these changes. 599759Sbde */ 609759Sbde 612729Sdfr/* 622836Sdg * Kernel memory management. 6311626Sbde */ 6411626Sbde 652729Sdfr#include <sys/cdefs.h> 662729Sdfr__FBSDID("$FreeBSD: head/sys/vm/vm_kern.c 248084 2013-03-09 02:32:23Z attilio $"); 672729Sdfr 682729Sdfr#include <sys/param.h> 692729Sdfr#include <sys/systm.h> 702729Sdfr#include <sys/kernel.h> /* for ticks and hz */ 712729Sdfr#include <sys/eventhandler.h> 722729Sdfr#include <sys/lock.h> 732729Sdfr#include <sys/proc.h> 742729Sdfr#include <sys/malloc.h> 752729Sdfr#include <sys/rwlock.h> 762729Sdfr#include <sys/sysctl.h> 772729Sdfr 782729Sdfr#include <vm/vm.h> 792729Sdfr#include <vm/vm_param.h> 802729Sdfr#include <vm/pmap.h> 812729Sdfr#include <vm/vm_map.h> 822729Sdfr#include <vm/vm_object.h> 832729Sdfr#include <vm/vm_page.h> 842729Sdfr#include <vm/vm_pageout.h> 852729Sdfr#include <vm/vm_extern.h> 862729Sdfr#include <vm/uma.h> 872729Sdfr 882729Sdfrvm_map_t kernel_map=0; 892729Sdfrvm_map_t kmem_map=0; 902729Sdfrvm_map_t exec_map=0; 912729Sdfrvm_map_t pipe_map; 922729Sdfrvm_map_t buffer_map=0; 932729Sdfr 942729Sdfrconst void *zero_region; 952729SdfrCTASSERT((ZERO_REGION_SIZE & PAGE_MASK) == 0); 962729Sdfr 972729SdfrSYSCTL_ULONG(_vm, OID_AUTO, min_kernel_address, CTLFLAG_RD, 982729Sdfr NULL, VM_MIN_KERNEL_ADDRESS, "Min kernel address"); 992729Sdfr 1002729SdfrSYSCTL_ULONG(_vm, OID_AUTO, max_kernel_address, CTLFLAG_RD, 1012729Sdfr#if defined(__arm__) || defined(__sparc64__) 1022729Sdfr &vm_max_kernel_address, 0, 1032729Sdfr#else 1042729Sdfr NULL, VM_MAX_KERNEL_ADDRESS, 1052729Sdfr#endif 1062729Sdfr "Max kernel address"); 1072729Sdfr 1082729Sdfr/* 1092729Sdfr * kmem_alloc_nofault: 1102729Sdfr * 1112729Sdfr * Allocate a virtual address range with no underlying object and 1122729Sdfr * no initial mapping to physical memory. Any mapping from this 1132729Sdfr * range to physical memory must be explicitly created prior to 1142729Sdfr * its use, typically with pmap_qenter(). Any attempt to create 1152729Sdfr * a mapping on demand through vm_fault() will result in a panic. 1162729Sdfr */ 1172729Sdfrvm_offset_t 1182729Sdfrkmem_alloc_nofault(map, size) 1192729Sdfr vm_map_t map; 1202729Sdfr vm_size_t size; 1212729Sdfr{ 1222729Sdfr vm_offset_t addr; 12330994Sphk int result; 12411626Sbde 12511626Sbde size = round_page(size); 12611626Sbde addr = vm_map_min(map); 12711626Sbde result = vm_map_find(map, NULL, 0, &addr, size, VMFS_ANY_SPACE, 12811626Sbde VM_PROT_ALL, VM_PROT_ALL, MAP_NOFAULT); 12911626Sbde if (result != KERN_SUCCESS) { 13011626Sbde return (0); 13111626Sbde } 13211626Sbde return (addr); 13311626Sbde} 1342729Sdfr 1352729Sdfr/* 1362729Sdfr * kmem_alloc_nofault_space: 1372729Sdfr * 13830994Sphk * Allocate a virtual address range with no underlying object and 1392729Sdfr * no initial mapping to physical memory within the specified 1402729Sdfr * address space. Any mapping from this range to physical memory 1412729Sdfr * must be explicitly created prior to its use, typically with 1422729Sdfr * pmap_qenter(). Any attempt to create a mapping on demand 1432729Sdfr * through vm_fault() will result in a panic. 1442729Sdfr */ 1452729Sdfrvm_offset_t 1462729Sdfrkmem_alloc_nofault_space(map, size, find_space) 1472729Sdfr vm_map_t map; 1482729Sdfr vm_size_t size; 1492729Sdfr int find_space; 1502729Sdfr{ 1512729Sdfr vm_offset_t addr; 1522729Sdfr int result; 1532729Sdfr 1542729Sdfr size = round_page(size); 1552729Sdfr addr = vm_map_min(map); 1562729Sdfr result = vm_map_find(map, NULL, 0, &addr, size, find_space, 1572729Sdfr VM_PROT_ALL, VM_PROT_ALL, MAP_NOFAULT); 1582729Sdfr if (result != KERN_SUCCESS) { 1592729Sdfr return (0); 1602729Sdfr } 1612729Sdfr return (addr); 1622729Sdfr} 1632729Sdfr 1642729Sdfr/* 16512866Speter * Allocate wired-down memory in the kernel's address map 1662729Sdfr * or a submap. 1672729Sdfr */ 1682729Sdfrvm_offset_t 16912866Speterkmem_alloc(map, size) 1702729Sdfr vm_map_t map; 17112866Speter vm_size_t size; 1722729Sdfr{ 17312866Speter vm_offset_t addr; 17430994Sphk vm_offset_t offset; 1752729Sdfr 1762729Sdfr size = round_page(size); 1772729Sdfr 1782729Sdfr /* 1792729Sdfr * Use the kernel object for wired-down kernel pages. Assume that no 18012866Speter * region of the kernel object is referenced more than once. 1812729Sdfr */ 1823308Sphk 1832729Sdfr /* 1842729Sdfr * Locate sufficient space in the map. This will give us the final 1852729Sdfr * virtual address for the new memory, and thus will tell us the 1862729Sdfr * offset within the kernel map. 1872729Sdfr */ 1882729Sdfr vm_map_lock(map); 1892729Sdfr if (vm_map_findspace(map, vm_map_min(map), size, &addr)) { 1902729Sdfr vm_map_unlock(map); 1912729Sdfr return (0); 1922729Sdfr } 1932729Sdfr offset = addr - VM_MIN_KERNEL_ADDRESS; 1942729Sdfr vm_object_reference(kernel_object); 1952729Sdfr vm_map_insert(map, kernel_object, offset, addr, addr + size, 1962729Sdfr VM_PROT_ALL, VM_PROT_ALL, 0); 1972729Sdfr vm_map_unlock(map); 1982729Sdfr 1992729Sdfr /* 2002729Sdfr * And finally, mark the data as non-pageable. 2012729Sdfr */ 2022729Sdfr (void) vm_map_wire(map, addr, addr + size, 2032729Sdfr VM_MAP_WIRE_SYSTEM|VM_MAP_WIRE_NOHOLES); 2042729Sdfr 2052729Sdfr return (addr); 2062729Sdfr} 2072729Sdfr 2082729Sdfr/* 2092729Sdfr * Allocates a region from the kernel address map and physical pages 2102729Sdfr * within the specified address range to the kernel object. Creates a 2112729Sdfr * wired mapping from this region to these pages, and returns the 2122729Sdfr * region's starting virtual address. The allocated pages are not 2132729Sdfr * necessarily physically contiguous. If M_ZERO is specified through the 2142729Sdfr * given flags, then the pages are zeroed before they are mapped. 2152729Sdfr */ 2162729Sdfrvm_offset_t 2172729Sdfrkmem_alloc_attr(vm_map_t map, vm_size_t size, int flags, vm_paddr_t low, 2182729Sdfr vm_paddr_t high, vm_memattr_t memattr) 2192729Sdfr{ 2202729Sdfr vm_object_t object = kernel_object; 2212729Sdfr vm_offset_t addr; 2222729Sdfr vm_ooffset_t end_offset, offset; 2232729Sdfr vm_page_t m; 2242729Sdfr int pflags, tries; 2252729Sdfr 2262729Sdfr size = round_page(size); 2272729Sdfr vm_map_lock(map); 2282729Sdfr if (vm_map_findspace(map, vm_map_min(map), size, &addr)) { 2292729Sdfr vm_map_unlock(map); 2302729Sdfr return (0); 2312729Sdfr } 2322729Sdfr offset = addr - VM_MIN_KERNEL_ADDRESS; 2332729Sdfr vm_object_reference(object); 2342729Sdfr vm_map_insert(map, object, offset, addr, addr + size, VM_PROT_ALL, 2352729Sdfr VM_PROT_ALL, 0); 2362729Sdfr pflags = malloc2vm_flags(flags) | VM_ALLOC_NOBUSY; 2372729Sdfr VM_OBJECT_WLOCK(object); 2382729Sdfr end_offset = offset + size; 2392729Sdfr for (; offset < end_offset; offset += PAGE_SIZE) { 2402729Sdfr tries = 0; 2412729Sdfrretry: 2422729Sdfr m = vm_page_alloc_contig(object, OFF_TO_IDX(offset), pflags, 1, 2432729Sdfr low, high, PAGE_SIZE, 0, memattr); 2442729Sdfr if (m == NULL) { 2452729Sdfr VM_OBJECT_WUNLOCK(object); 2462729Sdfr if (tries < ((flags & M_NOWAIT) != 0 ? 1 : 3)) { 2472729Sdfr vm_map_unlock(map); 2482729Sdfr vm_pageout_grow_cache(tries, low, high); 2492729Sdfr vm_map_lock(map); 2502729Sdfr VM_OBJECT_WLOCK(object); 2512729Sdfr tries++; 2522729Sdfr goto retry; 2532729Sdfr } 2542729Sdfr 2552729Sdfr /* 2562729Sdfr * Since the pages that were allocated by any previous 2572729Sdfr * iterations of this loop are not busy, they can be 2582729Sdfr * freed by vm_object_page_remove(), which is called 2592729Sdfr * by vm_map_delete(). 2602729Sdfr */ 2612729Sdfr vm_map_delete(map, addr, addr + size); 2622729Sdfr vm_map_unlock(map); 2632729Sdfr return (0); 2642729Sdfr } 2652729Sdfr if ((flags & M_ZERO) && (m->flags & PG_ZERO) == 0) 2662729Sdfr pmap_zero_page(m); 2672729Sdfr m->valid = VM_PAGE_BITS_ALL; 2682729Sdfr } 2692729Sdfr VM_OBJECT_WUNLOCK(object); 2702729Sdfr vm_map_unlock(map); 2712729Sdfr vm_map_wire(map, addr, addr + size, VM_MAP_WIRE_SYSTEM | 2722729Sdfr VM_MAP_WIRE_NOHOLES); 2732729Sdfr return (addr); 2742729Sdfr} 2752729Sdfr 2762729Sdfr/* 2772729Sdfr * Allocates a region from the kernel address map and physically 2782729Sdfr * contiguous pages within the specified address range to the kernel 2792729Sdfr * object. Creates a wired mapping from this region to these pages, and 2802729Sdfr * returns the region's starting virtual address. If M_ZERO is specified 2812729Sdfr * through the given flags, then the pages are zeroed before they are 2822729Sdfr * mapped. 2832729Sdfr */ 2842729Sdfrvm_offset_t 2852729Sdfrkmem_alloc_contig(vm_map_t map, vm_size_t size, int flags, vm_paddr_t low, 2862729Sdfr vm_paddr_t high, u_long alignment, vm_paddr_t boundary, 2872729Sdfr vm_memattr_t memattr) 2882729Sdfr{ 2892729Sdfr vm_object_t object = kernel_object; 2902729Sdfr vm_offset_t addr; 2912729Sdfr vm_ooffset_t offset; 2922729Sdfr vm_page_t end_m, m; 2932729Sdfr int pflags, tries; 2942729Sdfr 2952729Sdfr size = round_page(size); 2962729Sdfr vm_map_lock(map); 29730994Sphk if (vm_map_findspace(map, vm_map_min(map), size, &addr)) { 2982729Sdfr vm_map_unlock(map); 2992729Sdfr return (0); 3002729Sdfr } 30112866Speter offset = addr - VM_MIN_KERNEL_ADDRESS; 3022729Sdfr vm_object_reference(object); 3032729Sdfr vm_map_insert(map, object, offset, addr, addr + size, VM_PROT_ALL, 3042729Sdfr VM_PROT_ALL, 0); 3052729Sdfr pflags = malloc2vm_flags(flags) | VM_ALLOC_NOBUSY; 30612866Speter VM_OBJECT_WLOCK(object); 3072729Sdfr tries = 0; 30812866Speterretry: 30930994Sphk m = vm_page_alloc_contig(object, OFF_TO_IDX(offset), pflags, 3102729Sdfr atop(size), low, high, alignment, boundary, memattr); 3112729Sdfr if (m == NULL) { 3122729Sdfr VM_OBJECT_WUNLOCK(object); 3132729Sdfr if (tries < ((flags & M_NOWAIT) != 0 ? 1 : 3)) { 3142729Sdfr vm_map_unlock(map); 3152729Sdfr vm_pageout_grow_cache(tries, low, high); 3162729Sdfr vm_map_lock(map); 3172836Sdg VM_OBJECT_WLOCK(object); 3182729Sdfr tries++; 3192729Sdfr goto retry; 3202729Sdfr } 3212729Sdfr vm_map_delete(map, addr, addr + size); 3222729Sdfr vm_map_unlock(map); 3232729Sdfr return (0); 3242729Sdfr } 3252729Sdfr end_m = m + atop(size); 3262729Sdfr for (; m < end_m; m++) { 3272729Sdfr if ((flags & M_ZERO) && (m->flags & PG_ZERO) == 0) 3282729Sdfr pmap_zero_page(m); 3292729Sdfr m->valid = VM_PAGE_BITS_ALL; 3302729Sdfr } 3312729Sdfr VM_OBJECT_WUNLOCK(object); 3322729Sdfr vm_map_unlock(map); 3332729Sdfr vm_map_wire(map, addr, addr + size, VM_MAP_WIRE_SYSTEM | 3342729Sdfr VM_MAP_WIRE_NOHOLES); 3352729Sdfr return (addr); 3362729Sdfr} 3372729Sdfr 3382729Sdfr/* 3392729Sdfr * kmem_free: 3402729Sdfr * 3412729Sdfr * Release a region of kernel virtual memory allocated 3422729Sdfr * with kmem_alloc, and return the physical pages 3432729Sdfr * associated with that region. 3442729Sdfr * 3452729Sdfr * This routine may not block on kernel maps. 3462729Sdfr */ 3472729Sdfrvoid 3482729Sdfrkmem_free(map, addr, size) 3492729Sdfr vm_map_t map; 3502729Sdfr vm_offset_t addr; 3512729Sdfr vm_size_t size; 3522729Sdfr{ 3532729Sdfr 3542729Sdfr (void) vm_map_remove(map, trunc_page(addr), round_page(addr + size)); 3552729Sdfr} 3562729Sdfr 3572729Sdfr/* 3582729Sdfr * kmem_suballoc: 3592729Sdfr * 3602729Sdfr * Allocates a map to manage a subrange 3612729Sdfr * of the kernel virtual address space. 3622729Sdfr * 3632729Sdfr * Arguments are as follows: 3642729Sdfr * 3652729Sdfr * parent Map to take range from 3662729Sdfr * min, max Returned endpoints of map 3672729Sdfr * size Size of range to find 3682729Sdfr * superpage_align Request that min is superpage aligned 3692729Sdfr */ 3702729Sdfrvm_map_t 3718876Srgrimeskmem_suballoc(vm_map_t parent, vm_offset_t *min, vm_offset_t *max, 3722729Sdfr vm_size_t size, boolean_t superpage_align) 3732729Sdfr{ 3742729Sdfr int ret; 3752729Sdfr vm_map_t result; 3762729Sdfr 3772729Sdfr size = round_page(size); 3782729Sdfr 3792729Sdfr *min = vm_map_min(parent); 3802729Sdfr ret = vm_map_find(parent, NULL, 0, min, size, superpage_align ? 3812729Sdfr VMFS_ALIGNED_SPACE : VMFS_ANY_SPACE, VM_PROT_ALL, VM_PROT_ALL, 3822729Sdfr MAP_ACC_NO_CHARGE); 3832729Sdfr if (ret != KERN_SUCCESS) 3842729Sdfr panic("kmem_suballoc: bad status return of %d", ret); 3852729Sdfr *max = *min + size; 3862729Sdfr result = vm_map_create(vm_map_pmap(parent), *min, *max); 3872729Sdfr if (result == NULL) 3882729Sdfr panic("kmem_suballoc: cannot create submap"); 3892729Sdfr if (vm_map_submap(parent, *min, *max, result) != KERN_SUCCESS) 3902729Sdfr panic("kmem_suballoc: unable to change range to submap"); 3912729Sdfr return (result); 3922729Sdfr} 3932729Sdfr 3942729Sdfr/* 3952729Sdfr * kmem_malloc: 3962729Sdfr * 3972729Sdfr * Allocate wired-down memory in the kernel's address map for the higher 3982729Sdfr * level kernel memory allocator (kern/kern_malloc.c). We cannot use 3992729Sdfr * kmem_alloc() because we may need to allocate memory at interrupt 4002729Sdfr * level where we cannot block (canwait == FALSE). 4012729Sdfr * 4022729Sdfr * This routine has its own private kernel submap (kmem_map) and object 40330994Sphk * (kmem_object). This, combined with the fact that only malloc uses 4042729Sdfr * this routine, ensures that we will never block in map or object waits. 4052729Sdfr * 4062729Sdfr * We don't worry about expanding the map (adding entries) since entries 40712866Speter * for wired maps are statically allocated. 4082729Sdfr * 4092729Sdfr * `map' is ONLY allowed to be kmem_map or one of the mbuf submaps to 41012866Speter * which we never free. 4112729Sdfr */ 4122729Sdfrvm_offset_t 4132729Sdfrkmem_malloc(map, size, flags) 41412866Speter vm_map_t map; 4152729Sdfr vm_size_t size; 41612866Speter int flags; 41730994Sphk{ 4182729Sdfr vm_offset_t addr; 4192729Sdfr int i, rv; 4202729Sdfr 4212729Sdfr size = round_page(size); 42212866Speter addr = vm_map_min(map); 4232729Sdfr 4242729Sdfr /* 4252729Sdfr * Locate sufficient space in the map. This will give us the final 4262729Sdfr * virtual address for the new memory, and thus will tell us the 4272729Sdfr * offset within the kernel map. 4282729Sdfr */ 4292729Sdfr vm_map_lock(map); 4302729Sdfr if (vm_map_findspace(map, vm_map_min(map), size, &addr)) { 4312729Sdfr vm_map_unlock(map); 4322729Sdfr if ((flags & M_NOWAIT) == 0) { 4332729Sdfr for (i = 0; i < 8; i++) { 4342729Sdfr EVENTHANDLER_INVOKE(vm_lowmem, 0); 4352729Sdfr uma_reclaim(); 4362729Sdfr vm_map_lock(map); 4372729Sdfr if (vm_map_findspace(map, vm_map_min(map), 4382729Sdfr size, &addr) == 0) { 4392729Sdfr break; 4402729Sdfr } 4412729Sdfr vm_map_unlock(map); 4422729Sdfr tsleep(&i, 0, "nokva", (hz / 4) * (i + 1)); 4432729Sdfr } 4442729Sdfr if (i == 8) { 4452729Sdfr panic("kmem_malloc(%ld): kmem_map too small: %ld total allocated", 4462729Sdfr (long)size, (long)map->size); 4472729Sdfr } 4482729Sdfr } else { 4492729Sdfr return (0); 4502729Sdfr } 4512729Sdfr } 4522729Sdfr 4532729Sdfr rv = kmem_back(map, addr, size, flags); 4542729Sdfr vm_map_unlock(map); 4552729Sdfr return (rv == KERN_SUCCESS ? addr : 0); 4562729Sdfr} 4572729Sdfr 4582729Sdfr/* 4592729Sdfr * kmem_back: 4602729Sdfr * 4612729Sdfr * Allocate physical pages for the specified virtual address range. 4622729Sdfr */ 4632729Sdfrint 4642729Sdfrkmem_back(vm_map_t map, vm_offset_t addr, vm_size_t size, int flags) 4652729Sdfr{ 4662729Sdfr vm_offset_t offset, i; 4672729Sdfr vm_map_entry_t entry; 4682729Sdfr vm_page_t m; 4692729Sdfr int pflags; 4702729Sdfr boolean_t found; 4712729Sdfr 4722729Sdfr KASSERT(vm_map_locked(map), ("kmem_back: map %p is not locked", map)); 4732729Sdfr offset = addr - VM_MIN_KERNEL_ADDRESS; 4742729Sdfr vm_object_reference(kmem_object); 4752729Sdfr vm_map_insert(map, kmem_object, offset, addr, addr + size, 4762729Sdfr VM_PROT_ALL, VM_PROT_ALL, 0); 4772729Sdfr 4782729Sdfr /* 4792729Sdfr * Assert: vm_map_insert() will never be able to extend the 4802836Sdg * previous entry so vm_map_lookup_entry() will find a new 4812729Sdfr * entry exactly corresponding to this address range and it 4822729Sdfr * will have wired_count == 0. 4832729Sdfr */ 4842729Sdfr found = vm_map_lookup_entry(map, addr, &entry); 4852729Sdfr KASSERT(found && entry->start == addr && entry->end == addr + size && 4862729Sdfr entry->wired_count == 0 && (entry->eflags & MAP_ENTRY_IN_TRANSITION) 4872729Sdfr == 0, ("kmem_back: entry not found or misaligned")); 4882729Sdfr 4892729Sdfr pflags = malloc2vm_flags(flags) | VM_ALLOC_WIRED; 4902729Sdfr 4912729Sdfr VM_OBJECT_WLOCK(kmem_object); 4922729Sdfr for (i = 0; i < size; i += PAGE_SIZE) { 4932729Sdfrretry: 4942729Sdfr m = vm_page_alloc(kmem_object, OFF_TO_IDX(offset + i), pflags); 4952729Sdfr 4962729Sdfr /* 4972729Sdfr * Ran out of space, free everything up and return. Don't need 4982729Sdfr * to lock page queues here as we know that the pages we got 4992729Sdfr * aren't on any queues. 5002729Sdfr */ 5012729Sdfr if (m == NULL) { 5022729Sdfr if ((flags & M_NOWAIT) == 0) { 5032729Sdfr VM_OBJECT_WUNLOCK(kmem_object); 5042729Sdfr entry->eflags |= MAP_ENTRY_IN_TRANSITION; 5052729Sdfr vm_map_unlock(map); 5062729Sdfr VM_WAIT; 5072729Sdfr vm_map_lock(map); 5082729Sdfr KASSERT( 5092729Sdfr(entry->eflags & (MAP_ENTRY_IN_TRANSITION | MAP_ENTRY_NEEDS_WAKEUP)) == 5102729Sdfr MAP_ENTRY_IN_TRANSITION, 5112729Sdfr ("kmem_back: volatile entry")); 5122729Sdfr entry->eflags &= ~MAP_ENTRY_IN_TRANSITION; 5132729Sdfr VM_OBJECT_WLOCK(kmem_object); 5142729Sdfr goto retry; 5152729Sdfr } 5162729Sdfr /* 5172729Sdfr * Free the pages before removing the map entry. 5182729Sdfr * They are already marked busy. Calling 5192729Sdfr * vm_map_delete before the pages has been freed or 5202729Sdfr * unbusied will cause a deadlock. 5212729Sdfr */ 5222729Sdfr while (i != 0) { 5232729Sdfr i -= PAGE_SIZE; 5242729Sdfr m = vm_page_lookup(kmem_object, 5252729Sdfr OFF_TO_IDX(offset + i)); 5262729Sdfr vm_page_unwire(m, 0); 5272729Sdfr vm_page_free(m); 5282729Sdfr } 5292729Sdfr VM_OBJECT_WUNLOCK(kmem_object); 5302729Sdfr vm_map_delete(map, addr, addr + size); 5312729Sdfr return (KERN_NO_SPACE); 5322729Sdfr } 5332729Sdfr if (flags & M_ZERO && (m->flags & PG_ZERO) == 0) 5342729Sdfr pmap_zero_page(m); 5352729Sdfr m->valid = VM_PAGE_BITS_ALL; 5362729Sdfr KASSERT((m->oflags & VPO_UNMANAGED) != 0, 5372729Sdfr ("kmem_malloc: page %p is managed", m)); 5382729Sdfr } 5392729Sdfr VM_OBJECT_WUNLOCK(kmem_object); 5402729Sdfr 5412729Sdfr /* 5422729Sdfr * Mark map entry as non-pageable. Repeat the assert. 5432729Sdfr */ 5442729Sdfr KASSERT(entry->start == addr && entry->end == addr + size && 5452729Sdfr entry->wired_count == 0, 5462729Sdfr ("kmem_back: entry not found or misaligned after allocation")); 5472729Sdfr entry->wired_count = 1; 5482729Sdfr 5492729Sdfr /* 5502729Sdfr * At this point, the kmem_object must be unlocked because 5512729Sdfr * vm_map_simplify_entry() calls vm_object_deallocate(), which 5522729Sdfr * locks the kmem_object. 5532729Sdfr */ 5542729Sdfr vm_map_simplify_entry(map, entry); 5552729Sdfr 5562729Sdfr /* 5572729Sdfr * Loop thru pages, entering them in the pmap. 5582729Sdfr */ 5592729Sdfr VM_OBJECT_WLOCK(kmem_object); 5602729Sdfr for (i = 0; i < size; i += PAGE_SIZE) { 5612729Sdfr m = vm_page_lookup(kmem_object, OFF_TO_IDX(offset + i)); 5622729Sdfr /* 5632729Sdfr * Because this is kernel_pmap, this call will not block. 5642729Sdfr */ 5652729Sdfr pmap_enter(kernel_pmap, addr + i, VM_PROT_ALL, m, VM_PROT_ALL, 5662729Sdfr TRUE); 5672729Sdfr vm_page_wakeup(m); 5682729Sdfr } 5692729Sdfr VM_OBJECT_WUNLOCK(kmem_object); 5702729Sdfr 5712729Sdfr return (KERN_SUCCESS); 5722729Sdfr} 5732729Sdfr 5742729Sdfr/* 5752729Sdfr * kmem_alloc_wait: 5762729Sdfr * 5772729Sdfr * Allocates pageable memory from a sub-map of the kernel. If the submap 5782729Sdfr * has no room, the caller sleeps waiting for more memory in the submap. 5792729Sdfr * 5802729Sdfr * This routine may block. 5812729Sdfr */ 5822729Sdfrvm_offset_t 5832729Sdfrkmem_alloc_wait(map, size) 5842729Sdfr vm_map_t map; 5852729Sdfr vm_size_t size; 5862729Sdfr{ 5872729Sdfr vm_offset_t addr; 5882729Sdfr 5892729Sdfr size = round_page(size); 5902729Sdfr if (!swap_reserve(size)) 5912729Sdfr return (0); 5922729Sdfr 5932729Sdfr for (;;) { 5942729Sdfr /* 5952729Sdfr * To make this work for more than one map, use the map's lock 5962729Sdfr * to lock out sleepers/wakers. 5972729Sdfr */ 5982729Sdfr vm_map_lock(map); 5992729Sdfr if (vm_map_findspace(map, vm_map_min(map), size, &addr) == 0) 6002729Sdfr break; 6012729Sdfr /* no space now; see if we can ever get space */ 6022729Sdfr if (vm_map_max(map) - vm_map_min(map) < size) { 6032729Sdfr vm_map_unlock(map); 6042729Sdfr swap_release(size); 6052729Sdfr return (0); 6062729Sdfr } 6072729Sdfr map->needs_wakeup = TRUE; 6082729Sdfr vm_map_unlock_and_wait(map, 0); 6092729Sdfr } 6102729Sdfr vm_map_insert(map, NULL, 0, addr, addr + size, VM_PROT_ALL, 6112729Sdfr VM_PROT_ALL, MAP_ACC_CHARGED); 6122729Sdfr vm_map_unlock(map); 6132729Sdfr return (addr); 6142729Sdfr} 6152729Sdfr 6162729Sdfr/* 6172729Sdfr * kmem_free_wakeup: 6182729Sdfr * 6192729Sdfr * Returns memory to a submap of the kernel, and wakes up any processes 6202729Sdfr * waiting for memory in that map. 6212729Sdfr */ 6222729Sdfrvoid 6232729Sdfrkmem_free_wakeup(map, addr, size) 6242729Sdfr vm_map_t map; 6252729Sdfr vm_offset_t addr; 6262729Sdfr vm_size_t size; 6272729Sdfr{ 6282729Sdfr 6292729Sdfr vm_map_lock(map); 6302729Sdfr (void) vm_map_delete(map, trunc_page(addr), round_page(addr + size)); 6312729Sdfr if (map->needs_wakeup) { 6322729Sdfr map->needs_wakeup = FALSE; 6332729Sdfr vm_map_wakeup(map); 6342729Sdfr } 6352729Sdfr vm_map_unlock(map); 6362729Sdfr} 6372729Sdfr 6382729Sdfrstatic void 6392729Sdfrkmem_init_zero_region(void) 6402729Sdfr{ 6412729Sdfr vm_offset_t addr, i; 6422729Sdfr vm_page_t m; 6432729Sdfr int error; 6442729Sdfr 6452729Sdfr /* 6462729Sdfr * Map a single physical page of zeros to a larger virtual range. 6472729Sdfr * This requires less looping in places that want large amounts of 6482729Sdfr * zeros, while not using much more physical resources. 64917971Sbde */ 6502729Sdfr addr = kmem_alloc_nofault(kernel_map, ZERO_REGION_SIZE); 6512729Sdfr m = vm_page_alloc(NULL, 0, VM_ALLOC_NORMAL | 6522729Sdfr VM_ALLOC_NOOBJ | VM_ALLOC_WIRED | VM_ALLOC_ZERO); 6532729Sdfr if ((m->flags & PG_ZERO) == 0) 6542729Sdfr pmap_zero_page(m); 6552729Sdfr for (i = 0; i < ZERO_REGION_SIZE; i += PAGE_SIZE) 6562729Sdfr pmap_qenter(addr + i, &m, 1); 6572729Sdfr error = vm_map_protect(kernel_map, addr, addr + ZERO_REGION_SIZE, 6582729Sdfr VM_PROT_READ, TRUE); 6592729Sdfr KASSERT(error == 0, ("error=%d", error)); 6602729Sdfr 6612729Sdfr zero_region = (const void *)addr; 6622729Sdfr} 6632729Sdfr 6642729Sdfr/* 6652729Sdfr * kmem_init: 6662729Sdfr * 6672729Sdfr * Create the kernel map; insert a mapping covering kernel text, 6682729Sdfr * data, bss, and all space allocated thus far (`boostrap' data). The 6692729Sdfr * new map will thus map the range between VM_MIN_KERNEL_ADDRESS and 6702729Sdfr * `start' as allocated, and the range between `start' and `end' as free. 6712729Sdfr */ 6722729Sdfrvoid 6732729Sdfrkmem_init(start, end) 6742729Sdfr vm_offset_t start, end; 6752729Sdfr{ 6762729Sdfr vm_map_t m; 6772729Sdfr 6782729Sdfr m = vm_map_create(kernel_pmap, VM_MIN_KERNEL_ADDRESS, end); 6792729Sdfr m->system_map = 1; 6802729Sdfr vm_map_lock(m); 6812729Sdfr /* N.B.: cannot use kgdb to debug, starting with this assignment ... */ 6822729Sdfr kernel_map = m; 6832729Sdfr (void) vm_map_insert(m, NULL, (vm_ooffset_t) 0, 6842729Sdfr#ifdef __amd64__ 6852729Sdfr KERNBASE, 6862729Sdfr#else 6872729Sdfr VM_MIN_KERNEL_ADDRESS, 6882729Sdfr#endif 6892729Sdfr start, VM_PROT_ALL, VM_PROT_ALL, MAP_NOFAULT); 6902729Sdfr /* ... and ending with the completion of the above `insert' */ 69117971Sbde vm_map_unlock(m); 6922729Sdfr 6932729Sdfr kmem_init_zero_region(); 6942729Sdfr} 6952729Sdfr 6962729Sdfr#ifdef DIAGNOSTIC 6972729Sdfr/* 6982729Sdfr * Allow userspace to directly trigger the VM drain routine for testing 6992729Sdfr * purposes. 7002729Sdfr */ 7012729Sdfrstatic int 7022729Sdfrdebug_vm_lowmem(SYSCTL_HANDLER_ARGS) 7032729Sdfr{ 7042729Sdfr int error, i; 7052729Sdfr 7062729Sdfr i = 0; 7072729Sdfr error = sysctl_handle_int(oidp, &i, 0, req); 7082729Sdfr if (error) 7092729Sdfr return (error); 7102729Sdfr if (i) 7112729Sdfr EVENTHANDLER_INVOKE(vm_lowmem, 0); 7122729Sdfr return (0); 7132729Sdfr} 7142729Sdfr 7152729SdfrSYSCTL_PROC(_debug, OID_AUTO, vm_lowmem, CTLTYPE_INT | CTLFLAG_RW, 0, 0, 7162729Sdfr debug_vm_lowmem, "I", "set to trigger vm_lowmem event"); 7172729Sdfr#endif 7182729Sdfr