vm_kern.c revision 226843
1139825Simp/*- 21541Srgrimes * Copyright (c) 1991, 1993 31541Srgrimes * The Regents of the University of California. All rights reserved. 41541Srgrimes * 51541Srgrimes * This code is derived from software contributed to Berkeley by 61541Srgrimes * The Mach Operating System project at Carnegie-Mellon University. 71541Srgrimes * 81541Srgrimes * Redistribution and use in source and binary forms, with or without 91541Srgrimes * modification, are permitted provided that the following conditions 101541Srgrimes * are met: 111541Srgrimes * 1. Redistributions of source code must retain the above copyright 121541Srgrimes * notice, this list of conditions and the following disclaimer. 131541Srgrimes * 2. Redistributions in binary form must reproduce the above copyright 141541Srgrimes * notice, this list of conditions and the following disclaimer in the 151541Srgrimes * documentation and/or other materials provided with the distribution. 161541Srgrimes * 4. Neither the name of the University nor the names of its contributors 171541Srgrimes * may be used to endorse or promote products derived from this software 181541Srgrimes * without specific prior written permission. 191541Srgrimes * 201541Srgrimes * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 211541Srgrimes * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 221541Srgrimes * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 231541Srgrimes * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 241541Srgrimes * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 251541Srgrimes * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 261541Srgrimes * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 271541Srgrimes * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 281541Srgrimes * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 291541Srgrimes * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 301541Srgrimes * SUCH DAMAGE. 311541Srgrimes * 321817Sdg * from: @(#)vm_kern.c 8.3 (Berkeley) 1/12/94 331541Srgrimes * 341541Srgrimes * 351541Srgrimes * Copyright (c) 1987, 1990 Carnegie-Mellon University. 361541Srgrimes * All rights reserved. 371541Srgrimes * 381541Srgrimes * Authors: Avadis Tevanian, Jr., Michael Wayne Young 395455Sdg * 401541Srgrimes * Permission to use, copy, modify and distribute this software and 411541Srgrimes * its documentation is hereby granted, provided that both the copyright 421541Srgrimes * notice and this permission notice appear in all copies of the 431541Srgrimes * software, derivative works or modified versions, and any portions 441541Srgrimes * thereof, and that both notices appear in supporting documentation. 455455Sdg * 465455Sdg * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS" 475455Sdg * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND 481541Srgrimes * FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE. 495455Sdg * 501541Srgrimes * Carnegie Mellon requests users of this software to return to 511541Srgrimes * 521541Srgrimes * Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU 531541Srgrimes * School of Computer Science 541541Srgrimes * Carnegie Mellon University 551541Srgrimes * Pittsburgh PA 15213-3890 561541Srgrimes * 571541Srgrimes * any improvements or extensions that they make and grant Carnegie the 581541Srgrimes * rights to redistribute these changes. 591541Srgrimes */ 601541Srgrimes 611541Srgrimes/* 621541Srgrimes * Kernel memory management. 631541Srgrimes */ 641541Srgrimes 65116226Sobrien#include <sys/cdefs.h> 66116226Sobrien__FBSDID("$FreeBSD: head/sys/vm/vm_kern.c 226843 2011-10-27 16:39:17Z alc $"); 67116226Sobrien 681541Srgrimes#include <sys/param.h> 691541Srgrimes#include <sys/systm.h> 7087157Sluigi#include <sys/kernel.h> /* for ticks and hz */ 71168395Spjd#include <sys/eventhandler.h> 7276166Smarkm#include <sys/lock.h> 7376166Smarkm#include <sys/mutex.h> 742112Swollman#include <sys/proc.h> 756129Sdg#include <sys/malloc.h> 76188964Srwatson#include <sys/sysctl.h> 771541Srgrimes 781541Srgrimes#include <vm/vm.h> 7912662Sdg#include <vm/vm_param.h> 8012662Sdg#include <vm/pmap.h> 8112662Sdg#include <vm/vm_map.h> 8212662Sdg#include <vm/vm_object.h> 831541Srgrimes#include <vm/vm_page.h> 841541Srgrimes#include <vm/vm_pageout.h> 8512726Sbde#include <vm/vm_extern.h> 86168395Spjd#include <vm/uma.h> 871541Srgrimes 8819830Sdysonvm_map_t kernel_map=0; 8919830Sdysonvm_map_t kmem_map=0; 9019830Sdysonvm_map_t exec_map=0; 91118764Ssilbyvm_map_t pipe_map; 9219830Sdysonvm_map_t buffer_map=0; 932112Swollman 94221853Smdfconst void *zero_region; 95221853SmdfCTASSERT((ZERO_REGION_SIZE & PAGE_MASK) == 0); 96221853Smdf 971541Srgrimes/* 9847841Sdt * kmem_alloc_nofault: 9947841Sdt * 100118317Salc * Allocate a virtual address range with no underlying object and 101118317Salc * no initial mapping to physical memory. Any mapping from this 102118317Salc * range to physical memory must be explicitly created prior to 103118317Salc * its use, typically with pmap_qenter(). Any attempt to create 104118317Salc * a mapping on demand through vm_fault() will result in a panic. 10547841Sdt */ 10647841Sdtvm_offset_t 10747841Sdtkmem_alloc_nofault(map, size) 10847841Sdt vm_map_t map; 10970480Salfred vm_size_t size; 11047841Sdt{ 11147841Sdt vm_offset_t addr; 11270480Salfred int result; 11347841Sdt 11447841Sdt size = round_page(size); 11547841Sdt addr = vm_map_min(map); 116178933Salc result = vm_map_find(map, NULL, 0, &addr, size, VMFS_ANY_SPACE, 117178933Salc VM_PROT_ALL, VM_PROT_ALL, MAP_NOFAULT); 11847841Sdt if (result != KERN_SUCCESS) { 11947841Sdt return (0); 12047841Sdt } 12147841Sdt return (addr); 12247841Sdt} 12347841Sdt 12447841Sdt/* 125206819Sjmallett * kmem_alloc_nofault_space: 126206819Sjmallett * 127206819Sjmallett * Allocate a virtual address range with no underlying object and 128206819Sjmallett * no initial mapping to physical memory within the specified 129206819Sjmallett * address space. Any mapping from this range to physical memory 130206819Sjmallett * must be explicitly created prior to its use, typically with 131206819Sjmallett * pmap_qenter(). Any attempt to create a mapping on demand 132206819Sjmallett * through vm_fault() will result in a panic. 133206819Sjmallett */ 134206819Sjmallettvm_offset_t 135206819Sjmallettkmem_alloc_nofault_space(map, size, find_space) 136206819Sjmallett vm_map_t map; 137206819Sjmallett vm_size_t size; 138206819Sjmallett int find_space; 139206819Sjmallett{ 140206819Sjmallett vm_offset_t addr; 141206819Sjmallett int result; 142206819Sjmallett 143206819Sjmallett size = round_page(size); 144206819Sjmallett addr = vm_map_min(map); 145206819Sjmallett result = vm_map_find(map, NULL, 0, &addr, size, find_space, 146206819Sjmallett VM_PROT_ALL, VM_PROT_ALL, MAP_NOFAULT); 147206819Sjmallett if (result != KERN_SUCCESS) { 148206819Sjmallett return (0); 149206819Sjmallett } 150206819Sjmallett return (addr); 151206819Sjmallett} 152206819Sjmallett 153206819Sjmallett/* 1541541Srgrimes * Allocate wired-down memory in the kernel's address map 1551541Srgrimes * or a submap. 1561541Srgrimes */ 1578876Srgrimesvm_offset_t 1585455Sdgkmem_alloc(map, size) 15970480Salfred vm_map_t map; 16070480Salfred vm_size_t size; 1611541Srgrimes{ 1625455Sdg vm_offset_t addr; 16370480Salfred vm_offset_t offset; 1645455Sdg vm_offset_t i; 1651541Srgrimes 1661541Srgrimes size = round_page(size); 1671541Srgrimes 1681541Srgrimes /* 1695455Sdg * Use the kernel object for wired-down kernel pages. Assume that no 1705455Sdg * region of the kernel object is referenced more than once. 1711541Srgrimes */ 1721541Srgrimes 1731541Srgrimes /* 1745455Sdg * Locate sufficient space in the map. This will give us the final 1755455Sdg * virtual address for the new memory, and thus will tell us the 1765455Sdg * offset within the kernel map. 1771541Srgrimes */ 1781541Srgrimes vm_map_lock(map); 17933758Sdyson if (vm_map_findspace(map, vm_map_min(map), size, &addr)) { 1801541Srgrimes vm_map_unlock(map); 1811541Srgrimes return (0); 1821541Srgrimes } 1831541Srgrimes offset = addr - VM_MIN_KERNEL_ADDRESS; 1841541Srgrimes vm_object_reference(kernel_object); 18513490Sdyson vm_map_insert(map, kernel_object, offset, addr, addr + size, 18613490Sdyson VM_PROT_ALL, VM_PROT_ALL, 0); 1871541Srgrimes vm_map_unlock(map); 1881541Srgrimes 1891541Srgrimes /* 1905455Sdg * Guarantee that there are pages already in this object before 191122383Smini * calling vm_map_wire. This is to prevent the following 1925455Sdg * scenario: 1938876Srgrimes * 1945455Sdg * 1) Threads have swapped out, so that there is a pager for the 1955455Sdg * kernel_object. 2) The kmsg zone is empty, and so we are 196122383Smini * kmem_allocing a new page for it. 3) vm_map_wire calls vm_fault; 1975455Sdg * there is no page, but there is a pager, so we call 1985455Sdg * pager_data_request. But the kmsg zone is empty, so we must 1995455Sdg * kmem_alloc. 4) goto 1 5) Even if the kmsg zone is not empty: when 2005455Sdg * we get the data back from the pager, it will be (very stale) 2015455Sdg * non-zero data. kmem_alloc is defined to return zero-filled memory. 2028876Srgrimes * 2035455Sdg * We're intentionally not activating the pages we allocate to prevent a 204122383Smini * race with page-out. vm_map_wire will wire the pages. 2051541Srgrimes */ 206120761Salc VM_OBJECT_LOCK(kernel_object); 2075455Sdg for (i = 0; i < size; i += PAGE_SIZE) { 2085455Sdg vm_page_t mem; 2091541Srgrimes 21033109Sdyson mem = vm_page_grab(kernel_object, OFF_TO_IDX(offset + i), 211136923Salc VM_ALLOC_NOBUSY | VM_ALLOC_ZERO | VM_ALLOC_RETRY); 212120761Salc mem->valid = VM_PAGE_BITS_ALL; 213224746Skib KASSERT((mem->oflags & VPO_UNMANAGED) != 0, 214166964Salc ("kmem_alloc: page %p is managed", mem)); 2151541Srgrimes } 216120761Salc VM_OBJECT_UNLOCK(kernel_object); 2175455Sdg 2181541Srgrimes /* 2195455Sdg * And finally, mark the data as non-pageable. 2201541Srgrimes */ 221118771Sbms (void) vm_map_wire(map, addr, addr + size, 222118771Sbms VM_MAP_WIRE_SYSTEM|VM_MAP_WIRE_NOHOLES); 2231541Srgrimes 2245455Sdg return (addr); 2251541Srgrimes} 2261541Srgrimes 2271541Srgrimes/* 2281541Srgrimes * kmem_free: 2291541Srgrimes * 2301541Srgrimes * Release a region of kernel virtual memory allocated 2311541Srgrimes * with kmem_alloc, and return the physical pages 2321541Srgrimes * associated with that region. 23342957Sdillon * 23442957Sdillon * This routine may not block on kernel maps. 2351541Srgrimes */ 2368876Srgrimesvoid 2375455Sdgkmem_free(map, addr, size) 2385455Sdg vm_map_t map; 23970480Salfred vm_offset_t addr; 2405455Sdg vm_size_t size; 2411541Srgrimes{ 24271571Sjhb 2431541Srgrimes (void) vm_map_remove(map, trunc_page(addr), round_page(addr + size)); 2441541Srgrimes} 2451541Srgrimes 2461541Srgrimes/* 2471541Srgrimes * kmem_suballoc: 2481541Srgrimes * 2491541Srgrimes * Allocates a map to manage a subrange 2501541Srgrimes * of the kernel virtual address space. 2511541Srgrimes * 2521541Srgrimes * Arguments are as follows: 2531541Srgrimes * 2541541Srgrimes * parent Map to take range from 25570480Salfred * min, max Returned endpoints of map 2561541Srgrimes * size Size of range to find 257178933Salc * superpage_align Request that min is superpage aligned 2581541Srgrimes */ 2598876Srgrimesvm_map_t 260178933Salckmem_suballoc(vm_map_t parent, vm_offset_t *min, vm_offset_t *max, 261178933Salc vm_size_t size, boolean_t superpage_align) 2621541Srgrimes{ 26370478Salfred int ret; 2645455Sdg vm_map_t result; 2651541Srgrimes 2661541Srgrimes size = round_page(size); 2671541Srgrimes 268178637Salc *min = vm_map_min(parent); 269178933Salc ret = vm_map_find(parent, NULL, 0, min, size, superpage_align ? 270194766Skib VMFS_ALIGNED_SPACE : VMFS_ANY_SPACE, VM_PROT_ALL, VM_PROT_ALL, 271194766Skib MAP_ACC_NO_CHARGE); 272177762Salc if (ret != KERN_SUCCESS) 273177762Salc panic("kmem_suballoc: bad status return of %d", ret); 2741541Srgrimes *max = *min + size; 27532702Sdyson result = vm_map_create(vm_map_pmap(parent), *min, *max); 2761541Srgrimes if (result == NULL) 2771541Srgrimes panic("kmem_suballoc: cannot create submap"); 27870478Salfred if (vm_map_submap(parent, *min, *max, result) != KERN_SUCCESS) 2791541Srgrimes panic("kmem_suballoc: unable to change range to submap"); 2805455Sdg return (result); 2811541Srgrimes} 2821541Srgrimes 2831541Srgrimes/* 28442957Sdillon * kmem_malloc: 2851541Srgrimes * 28642957Sdillon * Allocate wired-down memory in the kernel's address map for the higher 28742957Sdillon * level kernel memory allocator (kern/kern_malloc.c). We cannot use 28842957Sdillon * kmem_alloc() because we may need to allocate memory at interrupt 28942957Sdillon * level where we cannot block (canwait == FALSE). 2901541Srgrimes * 29142957Sdillon * This routine has its own private kernel submap (kmem_map) and object 29242957Sdillon * (kmem_object). This, combined with the fact that only malloc uses 29342957Sdillon * this routine, ensures that we will never block in map or object waits. 2941541Srgrimes * 29542957Sdillon * We don't worry about expanding the map (adding entries) since entries 29642957Sdillon * for wired maps are statically allocated. 29742957Sdillon * 29878592Sbmilekic * `map' is ONLY allowed to be kmem_map or one of the mbuf submaps to 29978592Sbmilekic * which we never free. 3001541Srgrimes */ 3011541Srgrimesvm_offset_t 30242957Sdillonkmem_malloc(map, size, flags) 30370480Salfred vm_map_t map; 30470480Salfred vm_size_t size; 30542957Sdillon int flags; 3061541Srgrimes{ 3075455Sdg vm_offset_t addr; 308211194Smdf int i, rv; 3091541Srgrimes 3101541Srgrimes size = round_page(size); 3111541Srgrimes addr = vm_map_min(map); 3121541Srgrimes 3131541Srgrimes /* 3145455Sdg * Locate sufficient space in the map. This will give us the final 3155455Sdg * virtual address for the new memory, and thus will tell us the 3165455Sdg * offset within the kernel map. 3171541Srgrimes */ 3181541Srgrimes vm_map_lock(map); 31933758Sdyson if (vm_map_findspace(map, vm_map_min(map), size, &addr)) { 3201541Srgrimes vm_map_unlock(map); 321175210Spjd if ((flags & M_NOWAIT) == 0) { 322175210Spjd for (i = 0; i < 8; i++) { 323175210Spjd EVENTHANDLER_INVOKE(vm_lowmem, 0); 324175210Spjd uma_reclaim(); 325175210Spjd vm_map_lock(map); 326175210Spjd if (vm_map_findspace(map, vm_map_min(map), 327175210Spjd size, &addr) == 0) { 328175210Spjd break; 329175210Spjd } 330168395Spjd vm_map_unlock(map); 331175210Spjd tsleep(&i, 0, "nokva", (hz / 4) * (i + 1)); 332175210Spjd } 333175210Spjd if (i == 8) { 334168395Spjd panic("kmem_malloc(%ld): kmem_map too small: %ld total allocated", 335175210Spjd (long)size, (long)map->size); 336168395Spjd } 337168395Spjd } else { 338168395Spjd return (0); 339168395Spjd } 3401541Srgrimes } 341211194Smdf 342211194Smdf rv = kmem_back(map, addr, size, flags); 343211194Smdf vm_map_unlock(map); 344211194Smdf return (rv == KERN_SUCCESS ? addr : 0); 345211194Smdf} 346211194Smdf 347211194Smdf/* 348211194Smdf * kmem_back: 349211194Smdf * 350211194Smdf * Allocate physical pages for the specified virtual address range. 351211194Smdf */ 352211194Smdfint 353211194Smdfkmem_back(vm_map_t map, vm_offset_t addr, vm_size_t size, int flags) 354211194Smdf{ 355211194Smdf vm_offset_t offset, i; 356211194Smdf vm_map_entry_t entry; 357211194Smdf vm_page_t m; 358211194Smdf int pflags; 359218701Skib boolean_t found; 360211194Smdf 361212931Smdf KASSERT(vm_map_locked(map), ("kmem_back: map %p is not locked", map)); 36215367Sdyson offset = addr - VM_MIN_KERNEL_ADDRESS; 3631541Srgrimes vm_object_reference(kmem_object); 36413490Sdyson vm_map_insert(map, kmem_object, offset, addr, addr + size, 365218701Skib VM_PROT_ALL, VM_PROT_ALL, 0); 3661541Srgrimes 367218701Skib /* 368218701Skib * Assert: vm_map_insert() will never be able to extend the 369218701Skib * previous entry so vm_map_lookup_entry() will find a new 370218701Skib * entry exactly corresponding to this address range and it 371218701Skib * will have wired_count == 0. 372218701Skib */ 373218701Skib found = vm_map_lookup_entry(map, addr, &entry); 374218701Skib KASSERT(found && entry->start == addr && entry->end == addr + size && 375218701Skib entry->wired_count == 0 && (entry->eflags & MAP_ENTRY_IN_TRANSITION) 376218701Skib == 0, ("kmem_back: entry not found or misaligned")); 377218701Skib 37898455Sjeff if ((flags & (M_NOWAIT|M_USE_RESERVE)) == M_NOWAIT) 379108351Salc pflags = VM_ALLOC_INTERRUPT | VM_ALLOC_WIRED; 38098455Sjeff else 381108351Salc pflags = VM_ALLOC_SYSTEM | VM_ALLOC_WIRED; 38298455Sjeff 38398455Sjeff if (flags & M_ZERO) 38498455Sjeff pflags |= VM_ALLOC_ZERO; 38598455Sjeff 386113489Salc VM_OBJECT_LOCK(kmem_object); 3871541Srgrimes for (i = 0; i < size; i += PAGE_SIZE) { 38815809Sdysonretry: 38998450Sjeff m = vm_page_alloc(kmem_object, OFF_TO_IDX(offset + i), pflags); 39098450Sjeff 3911541Srgrimes /* 3925455Sdg * Ran out of space, free everything up and return. Don't need 3935455Sdg * to lock page queues here as we know that the pages we got 3945455Sdg * aren't on any queues. 3951541Srgrimes */ 3961541Srgrimes if (m == NULL) { 39742957Sdillon if ((flags & M_NOWAIT) == 0) { 398113489Salc VM_OBJECT_UNLOCK(kmem_object); 399218701Skib entry->eflags |= MAP_ENTRY_IN_TRANSITION; 40044793Salc vm_map_unlock(map); 40115809Sdyson VM_WAIT; 40244793Salc vm_map_lock(map); 403218701Skib KASSERT( 404218701Skib(entry->eflags & (MAP_ENTRY_IN_TRANSITION | MAP_ENTRY_NEEDS_WAKEUP)) == 405218701Skib MAP_ENTRY_IN_TRANSITION, 406218701Skib ("kmem_back: volatile entry")); 407218701Skib entry->eflags &= ~MAP_ENTRY_IN_TRANSITION; 408113489Salc VM_OBJECT_LOCK(kmem_object); 40915809Sdyson goto retry; 41015809Sdyson } 41191946Stegge /* 41291946Stegge * Free the pages before removing the map entry. 41391946Stegge * They are already marked busy. Calling 41491946Stegge * vm_map_delete before the pages has been freed or 41591946Stegge * unbusied will cause a deadlock. 41691946Stegge */ 41791946Stegge while (i != 0) { 41891946Stegge i -= PAGE_SIZE; 41991946Stegge m = vm_page_lookup(kmem_object, 42091946Stegge OFF_TO_IDX(offset + i)); 421108351Salc vm_page_unwire(m, 0); 42291946Stegge vm_page_free(m); 42391946Stegge } 424113489Salc VM_OBJECT_UNLOCK(kmem_object); 425189015Skib vm_map_delete(map, addr, addr + size); 426211194Smdf return (KERN_NO_SPACE); 4271541Srgrimes } 42898455Sjeff if (flags & M_ZERO && (m->flags & PG_ZERO) == 0) 429102382Salc pmap_zero_page(m); 430120761Salc m->valid = VM_PAGE_BITS_ALL; 431224746Skib KASSERT((m->oflags & VPO_UNMANAGED) != 0, 432166964Salc ("kmem_malloc: page %p is managed", m)); 4331541Srgrimes } 434113489Salc VM_OBJECT_UNLOCK(kmem_object); 4351541Srgrimes 4361541Srgrimes /* 437218701Skib * Mark map entry as non-pageable. Repeat the assert. 4381541Srgrimes */ 439218701Skib KASSERT(entry->start == addr && entry->end == addr + size && 440218701Skib entry->wired_count == 0, 441218701Skib ("kmem_back: entry not found or misaligned after allocation")); 44244793Salc entry->wired_count = 1; 4431541Srgrimes 444124048Salc /* 445124048Salc * At this point, the kmem_object must be unlocked because 446124048Salc * vm_map_simplify_entry() calls vm_object_deallocate(), which 447124048Salc * locks the kmem_object. 448124048Salc */ 44920993Sdyson vm_map_simplify_entry(map, entry); 45020993Sdyson 4511541Srgrimes /* 452164234Salc * Loop thru pages, entering them in the pmap. 4531541Srgrimes */ 454124048Salc VM_OBJECT_LOCK(kmem_object); 4551541Srgrimes for (i = 0; i < size; i += PAGE_SIZE) { 45612767Sdyson m = vm_page_lookup(kmem_object, OFF_TO_IDX(offset + i)); 45742957Sdillon /* 45842957Sdillon * Because this is kernel_pmap, this call will not block. 45942957Sdillon */ 460175067Salc pmap_enter(kernel_pmap, addr + i, VM_PROT_ALL, m, VM_PROT_ALL, 461175067Salc TRUE); 462108351Salc vm_page_wakeup(m); 4631541Srgrimes } 464124048Salc VM_OBJECT_UNLOCK(kmem_object); 4651541Srgrimes 466211194Smdf return (KERN_SUCCESS); 4671541Srgrimes} 4681541Srgrimes 4691541Srgrimes/* 47042957Sdillon * kmem_alloc_wait: 4711541Srgrimes * 4721541Srgrimes * Allocates pageable memory from a sub-map of the kernel. If the submap 4731541Srgrimes * has no room, the caller sleeps waiting for more memory in the submap. 4741541Srgrimes * 47542957Sdillon * This routine may block. 4761541Srgrimes */ 4778876Srgrimesvm_offset_t 4785455Sdgkmem_alloc_wait(map, size) 4795455Sdg vm_map_t map; 4805455Sdg vm_size_t size; 4811541Srgrimes{ 4825455Sdg vm_offset_t addr; 4831541Srgrimes 4841541Srgrimes size = round_page(size); 485194766Skib if (!swap_reserve(size)) 486194766Skib return (0); 4871541Srgrimes 4881541Srgrimes for (;;) { 4891541Srgrimes /* 4905455Sdg * To make this work for more than one map, use the map's lock 4915455Sdg * to lock out sleepers/wakers. 4921541Srgrimes */ 4931541Srgrimes vm_map_lock(map); 49433758Sdyson if (vm_map_findspace(map, vm_map_min(map), size, &addr) == 0) 4951541Srgrimes break; 4961541Srgrimes /* no space now; see if we can ever get space */ 4971541Srgrimes if (vm_map_max(map) - vm_map_min(map) < size) { 4981541Srgrimes vm_map_unlock(map); 499194766Skib swap_release(size); 5001541Srgrimes return (0); 5011541Srgrimes } 50299754Salc map->needs_wakeup = TRUE; 503173429Spjd vm_map_unlock_and_wait(map, 0); 5041541Srgrimes } 505194766Skib vm_map_insert(map, NULL, 0, addr, addr + size, VM_PROT_ALL, 506194766Skib VM_PROT_ALL, MAP_ACC_CHARGED); 5071541Srgrimes vm_map_unlock(map); 5081541Srgrimes return (addr); 5091541Srgrimes} 5101541Srgrimes 5111541Srgrimes/* 51242957Sdillon * kmem_free_wakeup: 5131541Srgrimes * 5149507Sdg * Returns memory to a submap of the kernel, and wakes up any processes 5151541Srgrimes * waiting for memory in that map. 5161541Srgrimes */ 5178876Srgrimesvoid 5185455Sdgkmem_free_wakeup(map, addr, size) 5195455Sdg vm_map_t map; 5205455Sdg vm_offset_t addr; 5215455Sdg vm_size_t size; 5221541Srgrimes{ 52376827Salfred 5241541Srgrimes vm_map_lock(map); 525189015Skib (void) vm_map_delete(map, trunc_page(addr), round_page(addr + size)); 52699754Salc if (map->needs_wakeup) { 52799754Salc map->needs_wakeup = FALSE; 52899754Salc vm_map_wakeup(map); 52999754Salc } 5301541Srgrimes vm_map_unlock(map); 5311541Srgrimes} 5321541Srgrimes 533221853Smdfstatic void 534221853Smdfkmem_init_zero_region(void) 535221853Smdf{ 536221855Smdf vm_offset_t addr, i; 537221853Smdf vm_page_t m; 538221853Smdf int error; 539221853Smdf 540221855Smdf /* 541221855Smdf * Map a single physical page of zeros to a larger virtual range. 542221855Smdf * This requires less looping in places that want large amounts of 543221855Smdf * zeros, while not using much more physical resources. 544221855Smdf */ 545221853Smdf addr = kmem_alloc_nofault(kernel_map, ZERO_REGION_SIZE); 546226843Salc m = vm_page_alloc(NULL, 0, VM_ALLOC_NORMAL | 547221853Smdf VM_ALLOC_NOOBJ | VM_ALLOC_WIRED | VM_ALLOC_ZERO); 548221853Smdf if ((m->flags & PG_ZERO) == 0) 549221853Smdf pmap_zero_page(m); 550221853Smdf for (i = 0; i < ZERO_REGION_SIZE; i += PAGE_SIZE) 551221853Smdf pmap_qenter(addr + i, &m, 1); 552221853Smdf error = vm_map_protect(kernel_map, addr, addr + ZERO_REGION_SIZE, 553221853Smdf VM_PROT_READ, TRUE); 554221853Smdf KASSERT(error == 0, ("error=%d", error)); 555221853Smdf 556221853Smdf zero_region = (const void *)addr; 557221853Smdf} 558221853Smdf 5591541Srgrimes/* 56042957Sdillon * kmem_init: 56142957Sdillon * 56242957Sdillon * Create the kernel map; insert a mapping covering kernel text, 56342957Sdillon * data, bss, and all space allocated thus far (`boostrap' data). The 56442957Sdillon * new map will thus map the range between VM_MIN_KERNEL_ADDRESS and 56542957Sdillon * `start' as allocated, and the range between `start' and `end' as free. 5661541Srgrimes */ 5678876Srgrimesvoid 5685455Sdgkmem_init(start, end) 5691541Srgrimes vm_offset_t start, end; 5701541Srgrimes{ 57170480Salfred vm_map_t m; 5721541Srgrimes 57332702Sdyson m = vm_map_create(kernel_pmap, VM_MIN_KERNEL_ADDRESS, end); 574108426Salc m->system_map = 1; 5751541Srgrimes vm_map_lock(m); 5761541Srgrimes /* N.B.: cannot use kgdb to debug, starting with this assignment ... */ 5771541Srgrimes kernel_map = m; 578108426Salc (void) vm_map_insert(m, NULL, (vm_ooffset_t) 0, 579179923Salc#ifdef __amd64__ 580179923Salc KERNBASE, 581179923Salc#else 582179923Salc VM_MIN_KERNEL_ADDRESS, 583179923Salc#endif 584179923Salc start, VM_PROT_ALL, VM_PROT_ALL, MAP_NOFAULT); 5851541Srgrimes /* ... and ending with the completion of the above `insert' */ 5861541Srgrimes vm_map_unlock(m); 587221853Smdf 588221853Smdf kmem_init_zero_region(); 5891541Srgrimes} 590188964Srwatson 591188967Srwatson#ifdef DIAGNOSTIC 592188964Srwatson/* 593188964Srwatson * Allow userspace to directly trigger the VM drain routine for testing 594188964Srwatson * purposes. 595188964Srwatson */ 596188964Srwatsonstatic int 597188964Srwatsondebug_vm_lowmem(SYSCTL_HANDLER_ARGS) 598188964Srwatson{ 599188964Srwatson int error, i; 600188964Srwatson 601188964Srwatson i = 0; 602188964Srwatson error = sysctl_handle_int(oidp, &i, 0, req); 603188964Srwatson if (error) 604188964Srwatson return (error); 605188964Srwatson if (i) 606188964Srwatson EVENTHANDLER_INVOKE(vm_lowmem, 0); 607188964Srwatson return (0); 608188964Srwatson} 609188964Srwatson 610188964SrwatsonSYSCTL_PROC(_debug, OID_AUTO, vm_lowmem, CTLTYPE_INT | CTLFLAG_RW, 0, 0, 611188964Srwatson debug_vm_lowmem, "I", "set to trigger vm_lowmem event"); 612188967Srwatson#endif 613