vm_kern.c revision 248277
1139825Simp/*- 21541Srgrimes * Copyright (c) 1991, 1993 31541Srgrimes * The Regents of the University of California. All rights reserved. 41541Srgrimes * 51541Srgrimes * This code is derived from software contributed to Berkeley by 61541Srgrimes * The Mach Operating System project at Carnegie-Mellon University. 71541Srgrimes * 81541Srgrimes * Redistribution and use in source and binary forms, with or without 91541Srgrimes * modification, are permitted provided that the following conditions 101541Srgrimes * are met: 111541Srgrimes * 1. Redistributions of source code must retain the above copyright 121541Srgrimes * notice, this list of conditions and the following disclaimer. 131541Srgrimes * 2. Redistributions in binary form must reproduce the above copyright 141541Srgrimes * notice, this list of conditions and the following disclaimer in the 151541Srgrimes * documentation and/or other materials provided with the distribution. 161541Srgrimes * 4. Neither the name of the University nor the names of its contributors 171541Srgrimes * may be used to endorse or promote products derived from this software 181541Srgrimes * without specific prior written permission. 191541Srgrimes * 201541Srgrimes * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 211541Srgrimes * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 221541Srgrimes * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 231541Srgrimes * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 241541Srgrimes * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 251541Srgrimes * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 261541Srgrimes * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 271541Srgrimes * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 281541Srgrimes * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 291541Srgrimes * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 301541Srgrimes * SUCH DAMAGE. 311541Srgrimes * 321817Sdg * from: @(#)vm_kern.c 8.3 (Berkeley) 1/12/94 331541Srgrimes * 341541Srgrimes * 351541Srgrimes * Copyright (c) 1987, 1990 Carnegie-Mellon University. 361541Srgrimes * All rights reserved. 371541Srgrimes * 381541Srgrimes * Authors: Avadis Tevanian, Jr., Michael Wayne Young 395455Sdg * 401541Srgrimes * Permission to use, copy, modify and distribute this software and 411541Srgrimes * its documentation is hereby granted, provided that both the copyright 421541Srgrimes * notice and this permission notice appear in all copies of the 431541Srgrimes * software, derivative works or modified versions, and any portions 441541Srgrimes * thereof, and that both notices appear in supporting documentation. 455455Sdg * 465455Sdg * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS" 475455Sdg * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND 481541Srgrimes * FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE. 495455Sdg * 501541Srgrimes * Carnegie Mellon requests users of this software to return to 511541Srgrimes * 521541Srgrimes * Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU 531541Srgrimes * School of Computer Science 541541Srgrimes * Carnegie Mellon University 551541Srgrimes * Pittsburgh PA 15213-3890 561541Srgrimes * 571541Srgrimes * any improvements or extensions that they make and grant Carnegie the 581541Srgrimes * rights to redistribute these changes. 591541Srgrimes */ 601541Srgrimes 611541Srgrimes/* 621541Srgrimes * Kernel memory management. 631541Srgrimes */ 641541Srgrimes 65116226Sobrien#include <sys/cdefs.h> 66116226Sobrien__FBSDID("$FreeBSD: head/sys/vm/vm_kern.c 248277 2013-03-14 19:50:09Z kib $"); 67116226Sobrien 681541Srgrimes#include <sys/param.h> 691541Srgrimes#include <sys/systm.h> 7087157Sluigi#include <sys/kernel.h> /* for ticks and hz */ 71168395Spjd#include <sys/eventhandler.h> 7276166Smarkm#include <sys/lock.h> 732112Swollman#include <sys/proc.h> 746129Sdg#include <sys/malloc.h> 75248084Sattilio#include <sys/rwlock.h> 76188964Srwatson#include <sys/sysctl.h> 771541Srgrimes 781541Srgrimes#include <vm/vm.h> 7912662Sdg#include <vm/vm_param.h> 8012662Sdg#include <vm/pmap.h> 8112662Sdg#include <vm/vm_map.h> 8212662Sdg#include <vm/vm_object.h> 831541Srgrimes#include <vm/vm_page.h> 841541Srgrimes#include <vm/vm_pageout.h> 8512726Sbde#include <vm/vm_extern.h> 86168395Spjd#include <vm/uma.h> 871541Srgrimes 88248277Skibvm_map_t kernel_map; 89248277Skibvm_map_t kmem_map; 90248277Skibvm_map_t exec_map; 91118764Ssilbyvm_map_t pipe_map; 92248277Skibvm_map_t buffer_map; 932112Swollman 94221853Smdfconst void *zero_region; 95221853SmdfCTASSERT((ZERO_REGION_SIZE & PAGE_MASK) == 0); 96221853Smdf 97246316SmariusSYSCTL_ULONG(_vm, OID_AUTO, min_kernel_address, CTLFLAG_RD, 98246316Smarius NULL, VM_MIN_KERNEL_ADDRESS, "Min kernel address"); 99246316Smarius 100246316SmariusSYSCTL_ULONG(_vm, OID_AUTO, max_kernel_address, CTLFLAG_RD, 101246926Salc#if defined(__arm__) || defined(__sparc64__) 102246316Smarius &vm_max_kernel_address, 0, 103246316Smarius#else 104246316Smarius NULL, VM_MAX_KERNEL_ADDRESS, 105246316Smarius#endif 106246316Smarius "Max kernel address"); 107246316Smarius 1081541Srgrimes/* 10947841Sdt * kmem_alloc_nofault: 11047841Sdt * 111118317Salc * Allocate a virtual address range with no underlying object and 112118317Salc * no initial mapping to physical memory. Any mapping from this 113118317Salc * range to physical memory must be explicitly created prior to 114118317Salc * its use, typically with pmap_qenter(). Any attempt to create 115118317Salc * a mapping on demand through vm_fault() will result in a panic. 11647841Sdt */ 11747841Sdtvm_offset_t 11847841Sdtkmem_alloc_nofault(map, size) 11947841Sdt vm_map_t map; 12070480Salfred vm_size_t size; 12147841Sdt{ 12247841Sdt vm_offset_t addr; 12370480Salfred int result; 12447841Sdt 12547841Sdt size = round_page(size); 12647841Sdt addr = vm_map_min(map); 127178933Salc result = vm_map_find(map, NULL, 0, &addr, size, VMFS_ANY_SPACE, 128178933Salc VM_PROT_ALL, VM_PROT_ALL, MAP_NOFAULT); 12947841Sdt if (result != KERN_SUCCESS) { 13047841Sdt return (0); 13147841Sdt } 13247841Sdt return (addr); 13347841Sdt} 13447841Sdt 13547841Sdt/* 136206819Sjmallett * kmem_alloc_nofault_space: 137206819Sjmallett * 138206819Sjmallett * Allocate a virtual address range with no underlying object and 139206819Sjmallett * no initial mapping to physical memory within the specified 140206819Sjmallett * address space. Any mapping from this range to physical memory 141206819Sjmallett * must be explicitly created prior to its use, typically with 142206819Sjmallett * pmap_qenter(). Any attempt to create a mapping on demand 143206819Sjmallett * through vm_fault() will result in a panic. 144206819Sjmallett */ 145206819Sjmallettvm_offset_t 146206819Sjmallettkmem_alloc_nofault_space(map, size, find_space) 147206819Sjmallett vm_map_t map; 148206819Sjmallett vm_size_t size; 149206819Sjmallett int find_space; 150206819Sjmallett{ 151206819Sjmallett vm_offset_t addr; 152206819Sjmallett int result; 153206819Sjmallett 154206819Sjmallett size = round_page(size); 155206819Sjmallett addr = vm_map_min(map); 156206819Sjmallett result = vm_map_find(map, NULL, 0, &addr, size, find_space, 157206819Sjmallett VM_PROT_ALL, VM_PROT_ALL, MAP_NOFAULT); 158206819Sjmallett if (result != KERN_SUCCESS) { 159206819Sjmallett return (0); 160206819Sjmallett } 161206819Sjmallett return (addr); 162206819Sjmallett} 163206819Sjmallett 164206819Sjmallett/* 1651541Srgrimes * Allocate wired-down memory in the kernel's address map 1661541Srgrimes * or a submap. 1671541Srgrimes */ 1688876Srgrimesvm_offset_t 1695455Sdgkmem_alloc(map, size) 17070480Salfred vm_map_t map; 17170480Salfred vm_size_t size; 1721541Srgrimes{ 1735455Sdg vm_offset_t addr; 17470480Salfred vm_offset_t offset; 1751541Srgrimes 1761541Srgrimes size = round_page(size); 1771541Srgrimes 1781541Srgrimes /* 1795455Sdg * Use the kernel object for wired-down kernel pages. Assume that no 1805455Sdg * region of the kernel object is referenced more than once. 1811541Srgrimes */ 1821541Srgrimes 1831541Srgrimes /* 1845455Sdg * Locate sufficient space in the map. This will give us the final 1855455Sdg * virtual address for the new memory, and thus will tell us the 1865455Sdg * offset within the kernel map. 1871541Srgrimes */ 1881541Srgrimes vm_map_lock(map); 18933758Sdyson if (vm_map_findspace(map, vm_map_min(map), size, &addr)) { 1901541Srgrimes vm_map_unlock(map); 1911541Srgrimes return (0); 1921541Srgrimes } 1931541Srgrimes offset = addr - VM_MIN_KERNEL_ADDRESS; 1941541Srgrimes vm_object_reference(kernel_object); 19513490Sdyson vm_map_insert(map, kernel_object, offset, addr, addr + size, 19613490Sdyson VM_PROT_ALL, VM_PROT_ALL, 0); 1971541Srgrimes vm_map_unlock(map); 1981541Srgrimes 1991541Srgrimes /* 2005455Sdg * And finally, mark the data as non-pageable. 2011541Srgrimes */ 202118771Sbms (void) vm_map_wire(map, addr, addr + size, 203118771Sbms VM_MAP_WIRE_SYSTEM|VM_MAP_WIRE_NOHOLES); 2041541Srgrimes 2055455Sdg return (addr); 2061541Srgrimes} 2071541Srgrimes 2081541Srgrimes/* 209238452Salc * Allocates a region from the kernel address map and physical pages 210238452Salc * within the specified address range to the kernel object. Creates a 211238452Salc * wired mapping from this region to these pages, and returns the 212238452Salc * region's starting virtual address. The allocated pages are not 213238452Salc * necessarily physically contiguous. If M_ZERO is specified through the 214238452Salc * given flags, then the pages are zeroed before they are mapped. 215238452Salc */ 216238452Salcvm_offset_t 217238452Salckmem_alloc_attr(vm_map_t map, vm_size_t size, int flags, vm_paddr_t low, 218238452Salc vm_paddr_t high, vm_memattr_t memattr) 219238452Salc{ 220238452Salc vm_object_t object = kernel_object; 221238452Salc vm_offset_t addr; 222238452Salc vm_ooffset_t end_offset, offset; 223238452Salc vm_page_t m; 224238452Salc int pflags, tries; 225238452Salc 226238452Salc size = round_page(size); 227238452Salc vm_map_lock(map); 228238452Salc if (vm_map_findspace(map, vm_map_min(map), size, &addr)) { 229238452Salc vm_map_unlock(map); 230238452Salc return (0); 231238452Salc } 232238452Salc offset = addr - VM_MIN_KERNEL_ADDRESS; 233238452Salc vm_object_reference(object); 234238452Salc vm_map_insert(map, object, offset, addr, addr + size, VM_PROT_ALL, 235238452Salc VM_PROT_ALL, 0); 236243040Skib pflags = malloc2vm_flags(flags) | VM_ALLOC_NOBUSY; 237248084Sattilio VM_OBJECT_WLOCK(object); 238238452Salc end_offset = offset + size; 239238452Salc for (; offset < end_offset; offset += PAGE_SIZE) { 240238452Salc tries = 0; 241238452Salcretry: 242238452Salc m = vm_page_alloc_contig(object, OFF_TO_IDX(offset), pflags, 1, 243238452Salc low, high, PAGE_SIZE, 0, memattr); 244238452Salc if (m == NULL) { 245248084Sattilio VM_OBJECT_WUNLOCK(object); 246238452Salc if (tries < ((flags & M_NOWAIT) != 0 ? 1 : 3)) { 247238452Salc vm_map_unlock(map); 248238561Salc vm_pageout_grow_cache(tries, low, high); 249238452Salc vm_map_lock(map); 250248084Sattilio VM_OBJECT_WLOCK(object); 251238452Salc tries++; 252238452Salc goto retry; 253238452Salc } 254238452Salc 255238452Salc /* 256238452Salc * Since the pages that were allocated by any previous 257238452Salc * iterations of this loop are not busy, they can be 258238452Salc * freed by vm_object_page_remove(), which is called 259238452Salc * by vm_map_delete(). 260238452Salc */ 261238452Salc vm_map_delete(map, addr, addr + size); 262238452Salc vm_map_unlock(map); 263238452Salc return (0); 264238452Salc } 265238452Salc if ((flags & M_ZERO) && (m->flags & PG_ZERO) == 0) 266238452Salc pmap_zero_page(m); 267238452Salc m->valid = VM_PAGE_BITS_ALL; 268238452Salc } 269248084Sattilio VM_OBJECT_WUNLOCK(object); 270238452Salc vm_map_unlock(map); 271238452Salc vm_map_wire(map, addr, addr + size, VM_MAP_WIRE_SYSTEM | 272238452Salc VM_MAP_WIRE_NOHOLES); 273238452Salc return (addr); 274238452Salc} 275238452Salc 276238452Salc/* 277238452Salc * Allocates a region from the kernel address map and physically 278238452Salc * contiguous pages within the specified address range to the kernel 279238452Salc * object. Creates a wired mapping from this region to these pages, and 280238452Salc * returns the region's starting virtual address. If M_ZERO is specified 281238452Salc * through the given flags, then the pages are zeroed before they are 282238452Salc * mapped. 283238452Salc */ 284238452Salcvm_offset_t 285238452Salckmem_alloc_contig(vm_map_t map, vm_size_t size, int flags, vm_paddr_t low, 286238452Salc vm_paddr_t high, u_long alignment, vm_paddr_t boundary, 287238452Salc vm_memattr_t memattr) 288238452Salc{ 289238452Salc vm_object_t object = kernel_object; 290238452Salc vm_offset_t addr; 291238452Salc vm_ooffset_t offset; 292238452Salc vm_page_t end_m, m; 293238452Salc int pflags, tries; 294238452Salc 295238452Salc size = round_page(size); 296238452Salc vm_map_lock(map); 297238452Salc if (vm_map_findspace(map, vm_map_min(map), size, &addr)) { 298238452Salc vm_map_unlock(map); 299238452Salc return (0); 300238452Salc } 301238452Salc offset = addr - VM_MIN_KERNEL_ADDRESS; 302238452Salc vm_object_reference(object); 303238452Salc vm_map_insert(map, object, offset, addr, addr + size, VM_PROT_ALL, 304238452Salc VM_PROT_ALL, 0); 305243040Skib pflags = malloc2vm_flags(flags) | VM_ALLOC_NOBUSY; 306248084Sattilio VM_OBJECT_WLOCK(object); 307238452Salc tries = 0; 308238452Salcretry: 309238452Salc m = vm_page_alloc_contig(object, OFF_TO_IDX(offset), pflags, 310238452Salc atop(size), low, high, alignment, boundary, memattr); 311238452Salc if (m == NULL) { 312248084Sattilio VM_OBJECT_WUNLOCK(object); 313238452Salc if (tries < ((flags & M_NOWAIT) != 0 ? 1 : 3)) { 314238452Salc vm_map_unlock(map); 315238561Salc vm_pageout_grow_cache(tries, low, high); 316238452Salc vm_map_lock(map); 317248084Sattilio VM_OBJECT_WLOCK(object); 318238452Salc tries++; 319238452Salc goto retry; 320238452Salc } 321238452Salc vm_map_delete(map, addr, addr + size); 322238452Salc vm_map_unlock(map); 323238452Salc return (0); 324238452Salc } 325238452Salc end_m = m + atop(size); 326238452Salc for (; m < end_m; m++) { 327238452Salc if ((flags & M_ZERO) && (m->flags & PG_ZERO) == 0) 328238452Salc pmap_zero_page(m); 329238452Salc m->valid = VM_PAGE_BITS_ALL; 330238452Salc } 331248084Sattilio VM_OBJECT_WUNLOCK(object); 332238452Salc vm_map_unlock(map); 333238452Salc vm_map_wire(map, addr, addr + size, VM_MAP_WIRE_SYSTEM | 334238452Salc VM_MAP_WIRE_NOHOLES); 335238452Salc return (addr); 336238452Salc} 337238452Salc 338238452Salc/* 3391541Srgrimes * kmem_free: 3401541Srgrimes * 3411541Srgrimes * Release a region of kernel virtual memory allocated 3421541Srgrimes * with kmem_alloc, and return the physical pages 3431541Srgrimes * associated with that region. 34442957Sdillon * 34542957Sdillon * This routine may not block on kernel maps. 3461541Srgrimes */ 3478876Srgrimesvoid 3485455Sdgkmem_free(map, addr, size) 3495455Sdg vm_map_t map; 35070480Salfred vm_offset_t addr; 3515455Sdg vm_size_t size; 3521541Srgrimes{ 35371571Sjhb 3541541Srgrimes (void) vm_map_remove(map, trunc_page(addr), round_page(addr + size)); 3551541Srgrimes} 3561541Srgrimes 3571541Srgrimes/* 3581541Srgrimes * kmem_suballoc: 3591541Srgrimes * 3601541Srgrimes * Allocates a map to manage a subrange 3611541Srgrimes * of the kernel virtual address space. 3621541Srgrimes * 3631541Srgrimes * Arguments are as follows: 3641541Srgrimes * 3651541Srgrimes * parent Map to take range from 36670480Salfred * min, max Returned endpoints of map 3671541Srgrimes * size Size of range to find 368178933Salc * superpage_align Request that min is superpage aligned 3691541Srgrimes */ 3708876Srgrimesvm_map_t 371178933Salckmem_suballoc(vm_map_t parent, vm_offset_t *min, vm_offset_t *max, 372178933Salc vm_size_t size, boolean_t superpage_align) 3731541Srgrimes{ 37470478Salfred int ret; 3755455Sdg vm_map_t result; 3761541Srgrimes 3771541Srgrimes size = round_page(size); 3781541Srgrimes 379178637Salc *min = vm_map_min(parent); 380178933Salc ret = vm_map_find(parent, NULL, 0, min, size, superpage_align ? 381194766Skib VMFS_ALIGNED_SPACE : VMFS_ANY_SPACE, VM_PROT_ALL, VM_PROT_ALL, 382194766Skib MAP_ACC_NO_CHARGE); 383177762Salc if (ret != KERN_SUCCESS) 384177762Salc panic("kmem_suballoc: bad status return of %d", ret); 3851541Srgrimes *max = *min + size; 38632702Sdyson result = vm_map_create(vm_map_pmap(parent), *min, *max); 3871541Srgrimes if (result == NULL) 3881541Srgrimes panic("kmem_suballoc: cannot create submap"); 38970478Salfred if (vm_map_submap(parent, *min, *max, result) != KERN_SUCCESS) 3901541Srgrimes panic("kmem_suballoc: unable to change range to submap"); 3915455Sdg return (result); 3921541Srgrimes} 3931541Srgrimes 3941541Srgrimes/* 39542957Sdillon * kmem_malloc: 3961541Srgrimes * 39742957Sdillon * Allocate wired-down memory in the kernel's address map for the higher 39842957Sdillon * level kernel memory allocator (kern/kern_malloc.c). We cannot use 39942957Sdillon * kmem_alloc() because we may need to allocate memory at interrupt 40042957Sdillon * level where we cannot block (canwait == FALSE). 4011541Srgrimes * 40242957Sdillon * This routine has its own private kernel submap (kmem_map) and object 40342957Sdillon * (kmem_object). This, combined with the fact that only malloc uses 40442957Sdillon * this routine, ensures that we will never block in map or object waits. 4051541Srgrimes * 40642957Sdillon * We don't worry about expanding the map (adding entries) since entries 40742957Sdillon * for wired maps are statically allocated. 40842957Sdillon * 40978592Sbmilekic * `map' is ONLY allowed to be kmem_map or one of the mbuf submaps to 41078592Sbmilekic * which we never free. 4111541Srgrimes */ 4121541Srgrimesvm_offset_t 41342957Sdillonkmem_malloc(map, size, flags) 41470480Salfred vm_map_t map; 41570480Salfred vm_size_t size; 41642957Sdillon int flags; 4171541Srgrimes{ 4185455Sdg vm_offset_t addr; 419211194Smdf int i, rv; 4201541Srgrimes 4211541Srgrimes size = round_page(size); 4221541Srgrimes addr = vm_map_min(map); 4231541Srgrimes 4241541Srgrimes /* 4255455Sdg * Locate sufficient space in the map. This will give us the final 4265455Sdg * virtual address for the new memory, and thus will tell us the 4275455Sdg * offset within the kernel map. 4281541Srgrimes */ 4291541Srgrimes vm_map_lock(map); 43033758Sdyson if (vm_map_findspace(map, vm_map_min(map), size, &addr)) { 4311541Srgrimes vm_map_unlock(map); 432175210Spjd if ((flags & M_NOWAIT) == 0) { 433175210Spjd for (i = 0; i < 8; i++) { 434175210Spjd EVENTHANDLER_INVOKE(vm_lowmem, 0); 435175210Spjd uma_reclaim(); 436175210Spjd vm_map_lock(map); 437175210Spjd if (vm_map_findspace(map, vm_map_min(map), 438175210Spjd size, &addr) == 0) { 439175210Spjd break; 440175210Spjd } 441168395Spjd vm_map_unlock(map); 442175210Spjd tsleep(&i, 0, "nokva", (hz / 4) * (i + 1)); 443175210Spjd } 444175210Spjd if (i == 8) { 445168395Spjd panic("kmem_malloc(%ld): kmem_map too small: %ld total allocated", 446175210Spjd (long)size, (long)map->size); 447168395Spjd } 448168395Spjd } else { 449168395Spjd return (0); 450168395Spjd } 4511541Srgrimes } 452211194Smdf 453211194Smdf rv = kmem_back(map, addr, size, flags); 454211194Smdf vm_map_unlock(map); 455211194Smdf return (rv == KERN_SUCCESS ? addr : 0); 456211194Smdf} 457211194Smdf 458211194Smdf/* 459211194Smdf * kmem_back: 460211194Smdf * 461211194Smdf * Allocate physical pages for the specified virtual address range. 462211194Smdf */ 463211194Smdfint 464211194Smdfkmem_back(vm_map_t map, vm_offset_t addr, vm_size_t size, int flags) 465211194Smdf{ 466211194Smdf vm_offset_t offset, i; 467211194Smdf vm_map_entry_t entry; 468211194Smdf vm_page_t m; 469211194Smdf int pflags; 470218701Skib boolean_t found; 471211194Smdf 472212931Smdf KASSERT(vm_map_locked(map), ("kmem_back: map %p is not locked", map)); 47315367Sdyson offset = addr - VM_MIN_KERNEL_ADDRESS; 4741541Srgrimes vm_object_reference(kmem_object); 47513490Sdyson vm_map_insert(map, kmem_object, offset, addr, addr + size, 476218701Skib VM_PROT_ALL, VM_PROT_ALL, 0); 4771541Srgrimes 478218701Skib /* 479218701Skib * Assert: vm_map_insert() will never be able to extend the 480218701Skib * previous entry so vm_map_lookup_entry() will find a new 481218701Skib * entry exactly corresponding to this address range and it 482218701Skib * will have wired_count == 0. 483218701Skib */ 484218701Skib found = vm_map_lookup_entry(map, addr, &entry); 485218701Skib KASSERT(found && entry->start == addr && entry->end == addr + size && 486218701Skib entry->wired_count == 0 && (entry->eflags & MAP_ENTRY_IN_TRANSITION) 487218701Skib == 0, ("kmem_back: entry not found or misaligned")); 488218701Skib 489243040Skib pflags = malloc2vm_flags(flags) | VM_ALLOC_WIRED; 49098455Sjeff 491248084Sattilio VM_OBJECT_WLOCK(kmem_object); 4921541Srgrimes for (i = 0; i < size; i += PAGE_SIZE) { 49315809Sdysonretry: 49498450Sjeff m = vm_page_alloc(kmem_object, OFF_TO_IDX(offset + i), pflags); 49598450Sjeff 4961541Srgrimes /* 4975455Sdg * Ran out of space, free everything up and return. Don't need 4985455Sdg * to lock page queues here as we know that the pages we got 4995455Sdg * aren't on any queues. 5001541Srgrimes */ 5011541Srgrimes if (m == NULL) { 50242957Sdillon if ((flags & M_NOWAIT) == 0) { 503248084Sattilio VM_OBJECT_WUNLOCK(kmem_object); 504218701Skib entry->eflags |= MAP_ENTRY_IN_TRANSITION; 50544793Salc vm_map_unlock(map); 50615809Sdyson VM_WAIT; 50744793Salc vm_map_lock(map); 508218701Skib KASSERT( 509218701Skib(entry->eflags & (MAP_ENTRY_IN_TRANSITION | MAP_ENTRY_NEEDS_WAKEUP)) == 510218701Skib MAP_ENTRY_IN_TRANSITION, 511218701Skib ("kmem_back: volatile entry")); 512218701Skib entry->eflags &= ~MAP_ENTRY_IN_TRANSITION; 513248084Sattilio VM_OBJECT_WLOCK(kmem_object); 51415809Sdyson goto retry; 51515809Sdyson } 51691946Stegge /* 51791946Stegge * Free the pages before removing the map entry. 51891946Stegge * They are already marked busy. Calling 51991946Stegge * vm_map_delete before the pages has been freed or 52091946Stegge * unbusied will cause a deadlock. 52191946Stegge */ 52291946Stegge while (i != 0) { 52391946Stegge i -= PAGE_SIZE; 52491946Stegge m = vm_page_lookup(kmem_object, 52591946Stegge OFF_TO_IDX(offset + i)); 526108351Salc vm_page_unwire(m, 0); 52791946Stegge vm_page_free(m); 52891946Stegge } 529248084Sattilio VM_OBJECT_WUNLOCK(kmem_object); 530189015Skib vm_map_delete(map, addr, addr + size); 531211194Smdf return (KERN_NO_SPACE); 5321541Srgrimes } 53398455Sjeff if (flags & M_ZERO && (m->flags & PG_ZERO) == 0) 534102382Salc pmap_zero_page(m); 535120761Salc m->valid = VM_PAGE_BITS_ALL; 536224746Skib KASSERT((m->oflags & VPO_UNMANAGED) != 0, 537166964Salc ("kmem_malloc: page %p is managed", m)); 5381541Srgrimes } 539248084Sattilio VM_OBJECT_WUNLOCK(kmem_object); 5401541Srgrimes 5411541Srgrimes /* 542218701Skib * Mark map entry as non-pageable. Repeat the assert. 5431541Srgrimes */ 544218701Skib KASSERT(entry->start == addr && entry->end == addr + size && 545218701Skib entry->wired_count == 0, 546218701Skib ("kmem_back: entry not found or misaligned after allocation")); 54744793Salc entry->wired_count = 1; 5481541Srgrimes 549124048Salc /* 550124048Salc * At this point, the kmem_object must be unlocked because 551124048Salc * vm_map_simplify_entry() calls vm_object_deallocate(), which 552124048Salc * locks the kmem_object. 553124048Salc */ 55420993Sdyson vm_map_simplify_entry(map, entry); 55520993Sdyson 5561541Srgrimes /* 557164234Salc * Loop thru pages, entering them in the pmap. 5581541Srgrimes */ 559248084Sattilio VM_OBJECT_WLOCK(kmem_object); 5601541Srgrimes for (i = 0; i < size; i += PAGE_SIZE) { 56112767Sdyson m = vm_page_lookup(kmem_object, OFF_TO_IDX(offset + i)); 56242957Sdillon /* 56342957Sdillon * Because this is kernel_pmap, this call will not block. 56442957Sdillon */ 565175067Salc pmap_enter(kernel_pmap, addr + i, VM_PROT_ALL, m, VM_PROT_ALL, 566175067Salc TRUE); 567108351Salc vm_page_wakeup(m); 5681541Srgrimes } 569248084Sattilio VM_OBJECT_WUNLOCK(kmem_object); 5701541Srgrimes 571211194Smdf return (KERN_SUCCESS); 5721541Srgrimes} 5731541Srgrimes 5741541Srgrimes/* 57542957Sdillon * kmem_alloc_wait: 5761541Srgrimes * 5771541Srgrimes * Allocates pageable memory from a sub-map of the kernel. If the submap 5781541Srgrimes * has no room, the caller sleeps waiting for more memory in the submap. 5791541Srgrimes * 58042957Sdillon * This routine may block. 5811541Srgrimes */ 5828876Srgrimesvm_offset_t 5835455Sdgkmem_alloc_wait(map, size) 5845455Sdg vm_map_t map; 5855455Sdg vm_size_t size; 5861541Srgrimes{ 5875455Sdg vm_offset_t addr; 5881541Srgrimes 5891541Srgrimes size = round_page(size); 590194766Skib if (!swap_reserve(size)) 591194766Skib return (0); 5921541Srgrimes 5931541Srgrimes for (;;) { 5941541Srgrimes /* 5955455Sdg * To make this work for more than one map, use the map's lock 5965455Sdg * to lock out sleepers/wakers. 5971541Srgrimes */ 5981541Srgrimes vm_map_lock(map); 59933758Sdyson if (vm_map_findspace(map, vm_map_min(map), size, &addr) == 0) 6001541Srgrimes break; 6011541Srgrimes /* no space now; see if we can ever get space */ 6021541Srgrimes if (vm_map_max(map) - vm_map_min(map) < size) { 6031541Srgrimes vm_map_unlock(map); 604194766Skib swap_release(size); 6051541Srgrimes return (0); 6061541Srgrimes } 60799754Salc map->needs_wakeup = TRUE; 608173429Spjd vm_map_unlock_and_wait(map, 0); 6091541Srgrimes } 610194766Skib vm_map_insert(map, NULL, 0, addr, addr + size, VM_PROT_ALL, 611194766Skib VM_PROT_ALL, MAP_ACC_CHARGED); 6121541Srgrimes vm_map_unlock(map); 6131541Srgrimes return (addr); 6141541Srgrimes} 6151541Srgrimes 6161541Srgrimes/* 61742957Sdillon * kmem_free_wakeup: 6181541Srgrimes * 6199507Sdg * Returns memory to a submap of the kernel, and wakes up any processes 6201541Srgrimes * waiting for memory in that map. 6211541Srgrimes */ 6228876Srgrimesvoid 6235455Sdgkmem_free_wakeup(map, addr, size) 6245455Sdg vm_map_t map; 6255455Sdg vm_offset_t addr; 6265455Sdg vm_size_t size; 6271541Srgrimes{ 62876827Salfred 6291541Srgrimes vm_map_lock(map); 630189015Skib (void) vm_map_delete(map, trunc_page(addr), round_page(addr + size)); 63199754Salc if (map->needs_wakeup) { 63299754Salc map->needs_wakeup = FALSE; 63399754Salc vm_map_wakeup(map); 63499754Salc } 6351541Srgrimes vm_map_unlock(map); 6361541Srgrimes} 6371541Srgrimes 638221853Smdfstatic void 639221853Smdfkmem_init_zero_region(void) 640221853Smdf{ 641221855Smdf vm_offset_t addr, i; 642221853Smdf vm_page_t m; 643221853Smdf int error; 644221853Smdf 645221855Smdf /* 646221855Smdf * Map a single physical page of zeros to a larger virtual range. 647221855Smdf * This requires less looping in places that want large amounts of 648221855Smdf * zeros, while not using much more physical resources. 649221855Smdf */ 650221853Smdf addr = kmem_alloc_nofault(kernel_map, ZERO_REGION_SIZE); 651226843Salc m = vm_page_alloc(NULL, 0, VM_ALLOC_NORMAL | 652221853Smdf VM_ALLOC_NOOBJ | VM_ALLOC_WIRED | VM_ALLOC_ZERO); 653221853Smdf if ((m->flags & PG_ZERO) == 0) 654221853Smdf pmap_zero_page(m); 655221853Smdf for (i = 0; i < ZERO_REGION_SIZE; i += PAGE_SIZE) 656221853Smdf pmap_qenter(addr + i, &m, 1); 657221853Smdf error = vm_map_protect(kernel_map, addr, addr + ZERO_REGION_SIZE, 658221853Smdf VM_PROT_READ, TRUE); 659221853Smdf KASSERT(error == 0, ("error=%d", error)); 660221853Smdf 661221853Smdf zero_region = (const void *)addr; 662221853Smdf} 663221853Smdf 6641541Srgrimes/* 66542957Sdillon * kmem_init: 66642957Sdillon * 66742957Sdillon * Create the kernel map; insert a mapping covering kernel text, 66842957Sdillon * data, bss, and all space allocated thus far (`boostrap' data). The 66942957Sdillon * new map will thus map the range between VM_MIN_KERNEL_ADDRESS and 67042957Sdillon * `start' as allocated, and the range between `start' and `end' as free. 6711541Srgrimes */ 6728876Srgrimesvoid 6735455Sdgkmem_init(start, end) 6741541Srgrimes vm_offset_t start, end; 6751541Srgrimes{ 67670480Salfred vm_map_t m; 6771541Srgrimes 67832702Sdyson m = vm_map_create(kernel_pmap, VM_MIN_KERNEL_ADDRESS, end); 679108426Salc m->system_map = 1; 6801541Srgrimes vm_map_lock(m); 6811541Srgrimes /* N.B.: cannot use kgdb to debug, starting with this assignment ... */ 6821541Srgrimes kernel_map = m; 683108426Salc (void) vm_map_insert(m, NULL, (vm_ooffset_t) 0, 684179923Salc#ifdef __amd64__ 685179923Salc KERNBASE, 686179923Salc#else 687179923Salc VM_MIN_KERNEL_ADDRESS, 688179923Salc#endif 689179923Salc start, VM_PROT_ALL, VM_PROT_ALL, MAP_NOFAULT); 6901541Srgrimes /* ... and ending with the completion of the above `insert' */ 6911541Srgrimes vm_map_unlock(m); 692221853Smdf 693221853Smdf kmem_init_zero_region(); 6941541Srgrimes} 695188964Srwatson 696188967Srwatson#ifdef DIAGNOSTIC 697188964Srwatson/* 698188964Srwatson * Allow userspace to directly trigger the VM drain routine for testing 699188964Srwatson * purposes. 700188964Srwatson */ 701188964Srwatsonstatic int 702188964Srwatsondebug_vm_lowmem(SYSCTL_HANDLER_ARGS) 703188964Srwatson{ 704188964Srwatson int error, i; 705188964Srwatson 706188964Srwatson i = 0; 707188964Srwatson error = sysctl_handle_int(oidp, &i, 0, req); 708188964Srwatson if (error) 709188964Srwatson return (error); 710188964Srwatson if (i) 711188964Srwatson EVENTHANDLER_INVOKE(vm_lowmem, 0); 712188964Srwatson return (0); 713188964Srwatson} 714188964Srwatson 715188964SrwatsonSYSCTL_PROC(_debug, OID_AUTO, vm_lowmem, CTLTYPE_INT | CTLFLAG_RW, 0, 0, 716188964Srwatson debug_vm_lowmem, "I", "set to trigger vm_lowmem event"); 717188967Srwatson#endif 718