vm_kern.c revision 243040
130711Swollman/*-
22742Swollman * Copyright (c) 1991, 1993
32742Swollman *	The Regents of the University of California.  All rights reserved.
42742Swollman *
52742Swollman * This code is derived from software contributed to Berkeley by
62742Swollman * The Mach Operating System project at Carnegie-Mellon University.
72742Swollman *
82742Swollman * Redistribution and use in source and binary forms, with or without
92742Swollman * modification, are permitted provided that the following conditions
102742Swollman * are met:
112742Swollman * 1. Redistributions of source code must retain the above copyright
122742Swollman *    notice, this list of conditions and the following disclaimer.
132742Swollman * 2. Redistributions in binary form must reproduce the above copyright
142742Swollman *    notice, this list of conditions and the following disclaimer in the
152742Swollman *    documentation and/or other materials provided with the distribution.
162742Swollman * 4. Neither the name of the University nor the names of its contributors
172742Swollman *    may be used to endorse or promote products derived from this software
182742Swollman *    without specific prior written permission.
192742Swollman *
202742Swollman * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
212742Swollman * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
222742Swollman * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
239908Swollman * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
242742Swollman * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
2530711Swollman * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
262742Swollman * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
272742Swollman * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
289908Swollman * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
292742Swollman * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
302742Swollman * SUCH DAMAGE.
3114343Swollman *
3214343Swollman *	from: @(#)vm_kern.c	8.3 (Berkeley) 1/12/94
3314343Swollman *
3414343Swollman *
3514343Swollman * Copyright (c) 1987, 1990 Carnegie-Mellon University.
3614343Swollman * All rights reserved.
372742Swollman *
389908Swollman * Authors: Avadis Tevanian, Jr., Michael Wayne Young
3920094Swollman *
4020094Swollman * Permission to use, copy, modify and distribute this software and
4120094Swollman * its documentation is hereby granted, provided that both the copyright
4220094Swollman * notice and this permission notice appear in all copies of the
4320094Swollman * software, derivative works or modified versions, and any portions
4420094Swollman * thereof, and that both notices appear in supporting documentation.
4520094Swollman *
4620094Swollman * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
4720094Swollman * CONDITION.  CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND
4820094Swollman * FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
4920094Swollman *
5020094Swollman * Carnegie Mellon requests users of this software to return to
5120094Swollman *
5221217Swollman *  Software Distribution Coordinator  or  Software.Distribution@CS.CMU.EDU
5321217Swollman *  School of Computer Science
5420094Swollman *  Carnegie Mellon University
5520094Swollman *  Pittsburgh PA 15213-3890
562742Swollman *
572742Swollman * any improvements or extensions that they make and grant Carnegie the
5814343Swollman * rights to redistribute these changes.
5914343Swollman */
6021217Swollman
6120094Swollman/*
6220094Swollman *	Kernel memory management.
6320094Swollman */
6420094Swollman
6520094Swollman#include <sys/cdefs.h>
662742Swollman__FBSDID("$FreeBSD: head/sys/vm/vm_kern.c 243040 2012-11-14 20:01:40Z kib $");
679908Swollman
682742Swollman#include <sys/param.h>
6914343Swollman#include <sys/systm.h>
7014343Swollman#include <sys/kernel.h>		/* for ticks and hz */
7114343Swollman#include <sys/eventhandler.h>
7214343Swollman#include <sys/lock.h>
7314343Swollman#include <sys/mutex.h>
7414343Swollman#include <sys/proc.h>
7514343Swollman#include <sys/malloc.h>
7614343Swollman#include <sys/sysctl.h>
7714343Swollman
782742Swollman#include <vm/vm.h>
792742Swollman#include <vm/vm_param.h>
802742Swollman#include <vm/pmap.h>
812742Swollman#include <vm/vm_map.h>
8214343Swollman#include <vm/vm_object.h>
832742Swollman#include <vm/vm_page.h>
842742Swollman#include <vm/vm_pageout.h>
859908Swollman#include <vm/vm_extern.h>
862742Swollman#include <vm/uma.h>
8714343Swollman
8814343Swollmanvm_map_t kernel_map=0;
8914343Swollmanvm_map_t kmem_map=0;
9014343Swollmanvm_map_t exec_map=0;
9114343Swollmanvm_map_t pipe_map;
9214343Swollmanvm_map_t buffer_map=0;
9314343Swollman
9414343Swollmanconst void *zero_region;
9514343SwollmanCTASSERT((ZERO_REGION_SIZE & PAGE_MASK) == 0);
9614343Swollman
9714343Swollman/*
9814343Swollman *	kmem_alloc_nofault:
9914343Swollman *
1002742Swollman *	Allocate a virtual address range with no underlying object and
1012742Swollman *	no initial mapping to physical memory.  Any mapping from this
1022742Swollman *	range to physical memory must be explicitly created prior to
10314343Swollman *	its use, typically with pmap_qenter().  Any attempt to create
1042742Swollman *	a mapping on demand through vm_fault() will result in a panic.
1052742Swollman */
1069908Swollmanvm_offset_t
1072742Swollmankmem_alloc_nofault(map, size)
10814343Swollman	vm_map_t map;
10914343Swollman	vm_size_t size;
11014343Swollman{
11114343Swollman	vm_offset_t addr;
11214343Swollman	int result;
11314343Swollman
11414343Swollman	size = round_page(size);
11514343Swollman	addr = vm_map_min(map);
1162742Swollman	result = vm_map_find(map, NULL, 0, &addr, size, VMFS_ANY_SPACE,
1172742Swollman	    VM_PROT_ALL, VM_PROT_ALL, MAP_NOFAULT);
1182742Swollman	if (result != KERN_SUCCESS) {
11914343Swollman		return (0);
1202742Swollman	}
1212742Swollman	return (addr);
1229908Swollman}
1232742Swollman
12414343Swollman/*
12514343Swollman *	kmem_alloc_nofault_space:
12614343Swollman *
12714343Swollman *	Allocate a virtual address range with no underlying object and
12814343Swollman *	no initial mapping to physical memory within the specified
12914343Swollman *	address space.  Any mapping from this range to physical memory
13014343Swollman *	must be explicitly created prior to its use, typically with
13114343Swollman *	pmap_qenter().  Any attempt to create a mapping on demand
13214343Swollman *	through vm_fault() will result in a panic.
13314343Swollman */
1342742Swollmanvm_offset_t
1352742Swollmankmem_alloc_nofault_space(map, size, find_space)
1362742Swollman	vm_map_t map;
13714343Swollman	vm_size_t size;
1382742Swollman	int find_space;
1392742Swollman{
1402742Swollman	vm_offset_t addr;
1412742Swollman	int result;
1422742Swollman
14314343Swollman	size = round_page(size);
1442742Swollman	addr = vm_map_min(map);
1458029Swollman	result = vm_map_find(map, NULL, 0, &addr, size, find_space,
14614343Swollman	    VM_PROT_ALL, VM_PROT_ALL, MAP_NOFAULT);
14714343Swollman	if (result != KERN_SUCCESS) {
14814343Swollman		return (0);
14914343Swollman	}
15014343Swollman	return (addr);
15114343Swollman}
15214343Swollman
15314343Swollman/*
15414343Swollman *	Allocate wired-down memory in the kernel's address map
15514343Swollman *	or a submap.
1562742Swollman */
1572742Swollmanvm_offset_t
15814343Swollmankmem_alloc(map, size)
1598029Swollman	vm_map_t map;
16014343Swollman	vm_size_t size;
16114343Swollman{
1622742Swollman	vm_offset_t addr;
16319878Swollman	vm_offset_t offset;
1642742Swollman
1652742Swollman	size = round_page(size);
16614343Swollman
1672742Swollman	/*
1682742Swollman	 * Use the kernel object for wired-down kernel pages. Assume that no
16914343Swollman	 * region of the kernel object is referenced more than once.
17014343Swollman	 */
17114343Swollman
17214343Swollman	/*
17330711Swollman	 * Locate sufficient space in the map.  This will give us the final
17430711Swollman	 * virtual address for the new memory, and thus will tell us the
17530711Swollman	 * offset within the kernel map.
1762742Swollman	 */
17730711Swollman	vm_map_lock(map);
17830711Swollman	if (vm_map_findspace(map, vm_map_min(map), size, &addr)) {
17930711Swollman		vm_map_unlock(map);
1802742Swollman		return (0);
1812742Swollman	}
1822742Swollman	offset = addr - VM_MIN_KERNEL_ADDRESS;
1832742Swollman	vm_object_reference(kernel_object);
18419878Swollman	vm_map_insert(map, kernel_object, offset, addr, addr + size,
18519878Swollman		VM_PROT_ALL, VM_PROT_ALL, 0);
18619878Swollman	vm_map_unlock(map);
1872742Swollman
1882742Swollman	/*
18919878Swollman	 * And finally, mark the data as non-pageable.
19019878Swollman	 */
1912742Swollman	(void) vm_map_wire(map, addr, addr + size,
1922742Swollman	    VM_MAP_WIRE_SYSTEM|VM_MAP_WIRE_NOHOLES);
1932742Swollman
1942742Swollman	return (addr);
19519878Swollman}
1962742Swollman
1972742Swollman/*
1982742Swollman *	Allocates a region from the kernel address map and physical pages
1992742Swollman *	within the specified address range to the kernel object.  Creates a
20019878Swollman *	wired mapping from this region to these pages, and returns the
2012742Swollman *	region's starting virtual address.  The allocated pages are not
2022742Swollman *	necessarily physically contiguous.  If M_ZERO is specified through the
2032742Swollman *	given flags, then the pages are zeroed before they are mapped.
2042742Swollman */
20519878Swollmanvm_offset_t
2062742Swollmankmem_alloc_attr(vm_map_t map, vm_size_t size, int flags, vm_paddr_t low,
20719878Swollman    vm_paddr_t high, vm_memattr_t memattr)
2082742Swollman{
20919878Swollman	vm_object_t object = kernel_object;
2102742Swollman	vm_offset_t addr;
2112742Swollman	vm_ooffset_t end_offset, offset;
2122742Swollman	vm_page_t m;
2132742Swollman	int pflags, tries;
2142742Swollman
2152742Swollman	size = round_page(size);
2162742Swollman	vm_map_lock(map);
21714343Swollman	if (vm_map_findspace(map, vm_map_min(map), size, &addr)) {
21814343Swollman		vm_map_unlock(map);
2192742Swollman		return (0);
2202742Swollman	}
22114343Swollman	offset = addr - VM_MIN_KERNEL_ADDRESS;
22214343Swollman	vm_object_reference(object);
2232742Swollman	vm_map_insert(map, object, offset, addr, addr + size, VM_PROT_ALL,
2242742Swollman	    VM_PROT_ALL, 0);
22514343Swollman	pflags = malloc2vm_flags(flags) | VM_ALLOC_NOBUSY;
22614343Swollman	VM_OBJECT_LOCK(object);
2272742Swollman	end_offset = offset + size;
22814343Swollman	for (; offset < end_offset; offset += PAGE_SIZE) {
22914343Swollman		tries = 0;
23014343Swollmanretry:
2312742Swollman		m = vm_page_alloc_contig(object, OFF_TO_IDX(offset), pflags, 1,
2322742Swollman		    low, high, PAGE_SIZE, 0, memattr);
2332742Swollman		if (m == NULL) {
23419878Swollman			VM_OBJECT_UNLOCK(object);
2352742Swollman			if (tries < ((flags & M_NOWAIT) != 0 ? 1 : 3)) {
23619878Swollman				vm_map_unlock(map);
23719878Swollman				vm_pageout_grow_cache(tries, low, high);
23819878Swollman				vm_map_lock(map);
2392742Swollman				VM_OBJECT_LOCK(object);
24019878Swollman				tries++;
24119878Swollman				goto retry;
24219878Swollman			}
2432742Swollman
24414343Swollman			/*
24514343Swollman			 * Since the pages that were allocated by any previous
24614343Swollman			 * iterations of this loop are not busy, they can be
24719878Swollman			 * freed by vm_object_page_remove(), which is called
24819878Swollman			 * by vm_map_delete().
24914343Swollman			 */
25014343Swollman			vm_map_delete(map, addr, addr + size);
25114343Swollman			vm_map_unlock(map);
25214343Swollman			return (0);
25319878Swollman		}
25419878Swollman		if ((flags & M_ZERO) && (m->flags & PG_ZERO) == 0)
25514343Swollman			pmap_zero_page(m);
25619878Swollman		m->valid = VM_PAGE_BITS_ALL;
25719878Swollman	}
25819878Swollman	VM_OBJECT_UNLOCK(object);
25914343Swollman	vm_map_unlock(map);
26014343Swollman	vm_map_wire(map, addr, addr + size, VM_MAP_WIRE_SYSTEM |
26114343Swollman	    VM_MAP_WIRE_NOHOLES);
26214343Swollman	return (addr);
26319878Swollman}
26419878Swollman
26514343Swollman/*
26619878Swollman *	Allocates a region from the kernel address map and physically
26714343Swollman *	contiguous pages within the specified address range to the kernel
26819878Swollman *	object.  Creates a wired mapping from this region to these pages, and
26914343Swollman *	returns the region's starting virtual address.  If M_ZERO is specified
27019878Swollman *	through the given flags, then the pages are zeroed before they are
27119878Swollman *	mapped.
27214343Swollman */
2732742Swollmanvm_offset_t
2742742Swollmankmem_alloc_contig(vm_map_t map, vm_size_t size, int flags, vm_paddr_t low,
2752742Swollman    vm_paddr_t high, u_long alignment, vm_paddr_t boundary,
27619878Swollman    vm_memattr_t memattr)
2772742Swollman{
27819878Swollman	vm_object_t object = kernel_object;
27919878Swollman	vm_offset_t addr;
2802742Swollman	vm_ooffset_t offset;
2812742Swollman	vm_page_t end_m, m;
2822742Swollman	int pflags, tries;
28319878Swollman
28419878Swollman	size = round_page(size);
28530711Swollman	vm_map_lock(map);
28630711Swollman	if (vm_map_findspace(map, vm_map_min(map), size, &addr)) {
28730711Swollman		vm_map_unlock(map);
2882742Swollman		return (0);
2892742Swollman	}
2902742Swollman	offset = addr - VM_MIN_KERNEL_ADDRESS;
2912742Swollman	vm_object_reference(object);
2922742Swollman	vm_map_insert(map, object, offset, addr, addr + size, VM_PROT_ALL,
2932742Swollman	    VM_PROT_ALL, 0);
2942742Swollman	pflags = malloc2vm_flags(flags) | VM_ALLOC_NOBUSY;
2952742Swollman	VM_OBJECT_LOCK(object);
2962742Swollman	tries = 0;
2972742Swollmanretry:
2982742Swollman	m = vm_page_alloc_contig(object, OFF_TO_IDX(offset), pflags,
2992742Swollman	    atop(size), low, high, alignment, boundary, memattr);
30014343Swollman	if (m == NULL) {
3012742Swollman		VM_OBJECT_UNLOCK(object);
30214343Swollman		if (tries < ((flags & M_NOWAIT) != 0 ? 1 : 3)) {
30314343Swollman			vm_map_unlock(map);
3042742Swollman			vm_pageout_grow_cache(tries, low, high);
30514343Swollman			vm_map_lock(map);
30614343Swollman			VM_OBJECT_LOCK(object);
30714343Swollman			tries++;
30814343Swollman			goto retry;
30914343Swollman		}
3109908Swollman		vm_map_delete(map, addr, addr + size);
3119908Swollman		vm_map_unlock(map);
3129908Swollman		return (0);
3139908Swollman	}
3149908Swollman	end_m = m + atop(size);
3159908Swollman	for (; m < end_m; m++) {
3169908Swollman		if ((flags & M_ZERO) && (m->flags & PG_ZERO) == 0)
31720094Swollman			pmap_zero_page(m);
31820094Swollman		m->valid = VM_PAGE_BITS_ALL;
3192742Swollman	}
3202742Swollman	VM_OBJECT_UNLOCK(object);
32114343Swollman	vm_map_unlock(map);
3222742Swollman	vm_map_wire(map, addr, addr + size, VM_MAP_WIRE_SYSTEM |
32320094Swollman	    VM_MAP_WIRE_NOHOLES);
3242742Swollman	return (addr);
3258029Swollman}
32630711Swollman
32730711Swollman/*
3282742Swollman *	kmem_free:
32930711Swollman *
33030711Swollman *	Release a region of kernel virtual memory allocated
33130711Swollman *	with kmem_alloc, and return the physical pages
33230711Swollman *	associated with that region.
33330711Swollman *
3342742Swollman *	This routine may not block on kernel maps.
3352742Swollman */
3362742Swollmanvoid
3372742Swollmankmem_free(map, addr, size)
3382742Swollman	vm_map_t map;
3392742Swollman	vm_offset_t addr;
34019878Swollman	vm_size_t size;
34119878Swollman{
34219878Swollman
3432742Swollman	(void) vm_map_remove(map, trunc_page(addr), round_page(addr + size));
3442742Swollman}
3452742Swollman
3462742Swollman/*
34719878Swollman *	kmem_suballoc:
34819878Swollman *
3492742Swollman *	Allocates a map to manage a subrange
3509908Swollman *	of the kernel virtual address space.
3519908Swollman *
3529908Swollman *	Arguments are as follows:
35319878Swollman *
3549908Swollman *	parent		Map to take range from
3552742Swollman *	min, max	Returned endpoints of map
35621217Swollman *	size		Size of range to find
3572742Swollman *	superpage_align	Request that min is superpage aligned
3582742Swollman */
3592742Swollmanvm_map_t
3602742Swollmankmem_suballoc(vm_map_t parent, vm_offset_t *min, vm_offset_t *max,
36119878Swollman    vm_size_t size, boolean_t superpage_align)
36219878Swollman{
3632742Swollman	int ret;
3642742Swollman	vm_map_t result;
3652742Swollman
3662742Swollman	size = round_page(size);
36719878Swollman
3682742Swollman	*min = vm_map_min(parent);
36914343Swollman	ret = vm_map_find(parent, NULL, 0, min, size, superpage_align ?
37014343Swollman	    VMFS_ALIGNED_SPACE : VMFS_ANY_SPACE, VM_PROT_ALL, VM_PROT_ALL,
37114343Swollman	    MAP_ACC_NO_CHARGE);
37219878Swollman	if (ret != KERN_SUCCESS)
37314343Swollman		panic("kmem_suballoc: bad status return of %d", ret);
37414343Swollman	*max = *min + size;
37514343Swollman	result = vm_map_create(vm_map_pmap(parent), *min, *max);
37614343Swollman	if (result == NULL)
37714343Swollman		panic("kmem_suballoc: cannot create submap");
37814343Swollman	if (vm_map_submap(parent, *min, *max, result) != KERN_SUCCESS)
37914343Swollman		panic("kmem_suballoc: unable to change range to submap");
38019878Swollman	return (result);
38119878Swollman}
38214343Swollman
3832742Swollman/*
3842742Swollman *	kmem_malloc:
3852742Swollman *
3862742Swollman * 	Allocate wired-down memory in the kernel's address map for the higher
38719878Swollman * 	level kernel memory allocator (kern/kern_malloc.c).  We cannot use
3882742Swollman * 	kmem_alloc() because we may need to allocate memory at interrupt
3892742Swollman * 	level where we cannot block (canwait == FALSE).
3902742Swollman *
3912742Swollman * 	This routine has its own private kernel submap (kmem_map) and object
39219878Swollman * 	(kmem_object).  This, combined with the fact that only malloc uses
3932742Swollman * 	this routine, ensures that we will never block in map or object waits.
3942742Swollman *
3952742Swollman * 	We don't worry about expanding the map (adding entries) since entries
3962742Swollman * 	for wired maps are statically allocated.
39719878Swollman *
39819878Swollman *	`map' is ONLY allowed to be kmem_map or one of the mbuf submaps to
3992742Swollman *	which we never free.
4002742Swollman */
4012742Swollmanvm_offset_t
4022742Swollmankmem_malloc(map, size, flags)
40319878Swollman	vm_map_t map;
4042742Swollman	vm_size_t size;
4052742Swollman	int flags;
4062742Swollman{
40719878Swollman	vm_offset_t addr;
40819878Swollman	int i, rv;
40919878Swollman
41020094Swollman	size = round_page(size);
41120094Swollman	addr = vm_map_min(map);
41220094Swollman
4132742Swollman	/*
4142742Swollman	 * Locate sufficient space in the map.  This will give us the final
41519878Swollman	 * virtual address for the new memory, and thus will tell us the
4162742Swollman	 * offset within the kernel map.
4172742Swollman	 */
4182742Swollman	vm_map_lock(map);
4192742Swollman	if (vm_map_findspace(map, vm_map_min(map), size, &addr)) {
42019878Swollman		vm_map_unlock(map);
4212742Swollman                if ((flags & M_NOWAIT) == 0) {
4222742Swollman			for (i = 0; i < 8; i++) {
4232742Swollman				EVENTHANDLER_INVOKE(vm_lowmem, 0);
4242742Swollman				uma_reclaim();
42519878Swollman				vm_map_lock(map);
4262742Swollman				if (vm_map_findspace(map, vm_map_min(map),
4272742Swollman				    size, &addr) == 0) {
4282742Swollman					break;
4292742Swollman				}
4302742Swollman				vm_map_unlock(map);
4312742Swollman				tsleep(&i, 0, "nokva", (hz / 4) * (i + 1));
4322742Swollman			}
4332742Swollman			if (i == 8) {
4342742Swollman				panic("kmem_malloc(%ld): kmem_map too small: %ld total allocated",
43520094Swollman				    (long)size, (long)map->size);
4362742Swollman			}
4372742Swollman		} else {
4382742Swollman			return (0);
4392742Swollman		}
44020094Swollman	}
44120094Swollman
44220094Swollman	rv = kmem_back(map, addr, size, flags);
44320094Swollman	vm_map_unlock(map);
44420094Swollman	return (rv == KERN_SUCCESS ? addr : 0);
44520094Swollman}
44620094Swollman
44720094Swollman/*
44820094Swollman *	kmem_back:
4492742Swollman *
4502742Swollman *	Allocate physical pages for the specified virtual address range.
4512742Swollman */
4522742Swollmanint
4532742Swollmankmem_back(vm_map_t map, vm_offset_t addr, vm_size_t size, int flags)
4542742Swollman{
4552742Swollman	vm_offset_t offset, i;
4562742Swollman	vm_map_entry_t entry;
4572742Swollman	vm_page_t m;
4582742Swollman	int pflags;
4592742Swollman	boolean_t found;
4602742Swollman
4612742Swollman	KASSERT(vm_map_locked(map), ("kmem_back: map %p is not locked", map));
4622742Swollman	offset = addr - VM_MIN_KERNEL_ADDRESS;
4632742Swollman	vm_object_reference(kmem_object);
4642742Swollman	vm_map_insert(map, kmem_object, offset, addr, addr + size,
4652742Swollman	    VM_PROT_ALL, VM_PROT_ALL, 0);
46619878Swollman
4672742Swollman	/*
4682742Swollman	 * Assert: vm_map_insert() will never be able to extend the
46919878Swollman	 * previous entry so vm_map_lookup_entry() will find a new
4702742Swollman	 * entry exactly corresponding to this address range and it
47114343Swollman	 * will have wired_count == 0.
4722742Swollman	 */
47314343Swollman	found = vm_map_lookup_entry(map, addr, &entry);
4749908Swollman	KASSERT(found && entry->start == addr && entry->end == addr + size &&
4759908Swollman	    entry->wired_count == 0 && (entry->eflags & MAP_ENTRY_IN_TRANSITION)
47619878Swollman	    == 0, ("kmem_back: entry not found or misaligned"));
4772742Swollman
4782742Swollman	pflags = malloc2vm_flags(flags) | VM_ALLOC_WIRED;
4792742Swollman
4802742Swollman	VM_OBJECT_LOCK(kmem_object);
4812742Swollman	for (i = 0; i < size; i += PAGE_SIZE) {
48219878Swollmanretry:
4832742Swollman		m = vm_page_alloc(kmem_object, OFF_TO_IDX(offset + i), pflags);
4842742Swollman
4852742Swollman		/*
4862742Swollman		 * Ran out of space, free everything up and return. Don't need
4872742Swollman		 * to lock page queues here as we know that the pages we got
4882742Swollman		 * aren't on any queues.
4892742Swollman		 */
4902742Swollman		if (m == NULL) {
4912742Swollman			if ((flags & M_NOWAIT) == 0) {
4922742Swollman				VM_OBJECT_UNLOCK(kmem_object);
4932742Swollman				entry->eflags |= MAP_ENTRY_IN_TRANSITION;
4942742Swollman				vm_map_unlock(map);
4952742Swollman				VM_WAIT;
4962742Swollman				vm_map_lock(map);
4972742Swollman				KASSERT(
4982742Swollman(entry->eflags & (MAP_ENTRY_IN_TRANSITION | MAP_ENTRY_NEEDS_WAKEUP)) ==
4992742Swollman				    MAP_ENTRY_IN_TRANSITION,
5002742Swollman				    ("kmem_back: volatile entry"));
5012742Swollman				entry->eflags &= ~MAP_ENTRY_IN_TRANSITION;
5022742Swollman				VM_OBJECT_LOCK(kmem_object);
5032742Swollman				goto retry;
5042742Swollman			}
5052742Swollman			/*
50614343Swollman			 * Free the pages before removing the map entry.
5079908Swollman			 * They are already marked busy.  Calling
5089908Swollman			 * vm_map_delete before the pages has been freed or
5099908Swollman			 * unbusied will cause a deadlock.
5109908Swollman			 */
5119908Swollman			while (i != 0) {
51214343Swollman				i -= PAGE_SIZE;
5132742Swollman				m = vm_page_lookup(kmem_object,
5149908Swollman						   OFF_TO_IDX(offset + i));
5159908Swollman				vm_page_unwire(m, 0);
51619878Swollman				vm_page_free(m);
5172742Swollman			}
5182742Swollman			VM_OBJECT_UNLOCK(kmem_object);
5192742Swollman			vm_map_delete(map, addr, addr + size);
5202742Swollman			return (KERN_NO_SPACE);
5219908Swollman		}
5222742Swollman		if (flags & M_ZERO && (m->flags & PG_ZERO) == 0)
52319878Swollman			pmap_zero_page(m);
5242742Swollman		m->valid = VM_PAGE_BITS_ALL;
5252742Swollman		KASSERT((m->oflags & VPO_UNMANAGED) != 0,
5262742Swollman		    ("kmem_malloc: page %p is managed", m));
5279908Swollman	}
5282742Swollman	VM_OBJECT_UNLOCK(kmem_object);
52919878Swollman
5302742Swollman	/*
5312742Swollman	 * Mark map entry as non-pageable.  Repeat the assert.
5322742Swollman	 */
5332742Swollman	KASSERT(entry->start == addr && entry->end == addr + size &&
5342742Swollman	    entry->wired_count == 0,
5352742Swollman	    ("kmem_back: entry not found or misaligned after allocation"));
5362742Swollman	entry->wired_count = 1;
5372742Swollman
5382742Swollman	/*
5392742Swollman	 * At this point, the kmem_object must be unlocked because
5402742Swollman	 * vm_map_simplify_entry() calls vm_object_deallocate(), which
5412742Swollman	 * locks the kmem_object.
5422742Swollman	 */
5432742Swollman	vm_map_simplify_entry(map, entry);
54419878Swollman
5452742Swollman	/*
5462742Swollman	 * Loop thru pages, entering them in the pmap.
5472742Swollman	 */
54819878Swollman	VM_OBJECT_LOCK(kmem_object);
5492742Swollman	for (i = 0; i < size; i += PAGE_SIZE) {
5502742Swollman		m = vm_page_lookup(kmem_object, OFF_TO_IDX(offset + i));
5512742Swollman		/*
5522742Swollman		 * Because this is kernel_pmap, this call will not block.
5532742Swollman		 */
5542742Swollman		pmap_enter(kernel_pmap, addr + i, VM_PROT_ALL, m, VM_PROT_ALL,
55519878Swollman		    TRUE);
5562742Swollman		vm_page_wakeup(m);
5572742Swollman	}
5582742Swollman	VM_OBJECT_UNLOCK(kmem_object);
5599908Swollman
56019878Swollman	return (KERN_SUCCESS);
5612742Swollman}
5622742Swollman
5632742Swollman/*
5642742Swollman *	kmem_alloc_wait:
5652742Swollman *
5662742Swollman *	Allocates pageable memory from a sub-map of the kernel.  If the submap
5672742Swollman *	has no room, the caller sleeps waiting for more memory in the submap.
5682742Swollman *
5692742Swollman *	This routine may block.
5702742Swollman */
57119878Swollmanvm_offset_t
5722742Swollmankmem_alloc_wait(map, size)
5732742Swollman	vm_map_t map;
5742742Swollman	vm_size_t size;
57519878Swollman{
5762742Swollman	vm_offset_t addr;
5772742Swollman
5782742Swollman	size = round_page(size);
5792742Swollman	if (!swap_reserve(size))
58019878Swollman		return (0);
5812742Swollman
5822742Swollman	for (;;) {
5832742Swollman		/*
5842742Swollman		 * To make this work for more than one map, use the map's lock
58519878Swollman		 * to lock out sleepers/wakers.
5862742Swollman		 */
5872742Swollman		vm_map_lock(map);
5882742Swollman		if (vm_map_findspace(map, vm_map_min(map), size, &addr) == 0)
5892742Swollman			break;
5902742Swollman		/* no space now; see if we can ever get space */
5912742Swollman		if (vm_map_max(map) - vm_map_min(map) < size) {
5922742Swollman			vm_map_unlock(map);
59319878Swollman			swap_release(size);
5942742Swollman			return (0);
5952742Swollman		}
5969908Swollman		map->needs_wakeup = TRUE;
5972742Swollman		vm_map_unlock_and_wait(map, 0);
59819878Swollman	}
5992742Swollman	vm_map_insert(map, NULL, 0, addr, addr + size, VM_PROT_ALL,
6002742Swollman	    VM_PROT_ALL, MAP_ACC_CHARGED);
6012742Swollman	vm_map_unlock(map);
6022742Swollman	return (addr);
6032742Swollman}
6042742Swollman
6052742Swollman/*
6062742Swollman *	kmem_free_wakeup:
6072742Swollman *
6082742Swollman *	Returns memory to a submap of the kernel, and wakes up any processes
6092742Swollman *	waiting for memory in that map.
6102742Swollman */
6112742Swollmanvoid
6122742Swollmankmem_free_wakeup(map, addr, size)
6132742Swollman	vm_map_t map;
6142742Swollman	vm_offset_t addr;
6152742Swollman	vm_size_t size;
61619878Swollman{
6172742Swollman
6182742Swollman	vm_map_lock(map);
6192742Swollman	(void) vm_map_delete(map, trunc_page(addr), round_page(addr + size));
6202742Swollman	if (map->needs_wakeup) {
6212742Swollman		map->needs_wakeup = FALSE;
6222742Swollman		vm_map_wakeup(map);
6232742Swollman	}
6249908Swollman	vm_map_unlock(map);
6252742Swollman}
62619878Swollman
6272742Swollmanstatic void
6282742Swollmankmem_init_zero_region(void)
6292742Swollman{
6302742Swollman	vm_offset_t addr, i;
63119878Swollman	vm_page_t m;
6322742Swollman	int error;
6332742Swollman
6342742Swollman	/*
6352742Swollman	 * Map a single physical page of zeros to a larger virtual range.
6362742Swollman	 * This requires less looping in places that want large amounts of
6372742Swollman	 * zeros, while not using much more physical resources.
6382742Swollman	 */
6392742Swollman	addr = kmem_alloc_nofault(kernel_map, ZERO_REGION_SIZE);
6402742Swollman	m = vm_page_alloc(NULL, 0, VM_ALLOC_NORMAL |
6412742Swollman	    VM_ALLOC_NOOBJ | VM_ALLOC_WIRED | VM_ALLOC_ZERO);
64219878Swollman	if ((m->flags & PG_ZERO) == 0)
6432742Swollman		pmap_zero_page(m);
6442742Swollman	for (i = 0; i < ZERO_REGION_SIZE; i += PAGE_SIZE)
6452742Swollman		pmap_qenter(addr + i, &m, 1);
6462742Swollman	error = vm_map_protect(kernel_map, addr, addr + ZERO_REGION_SIZE,
64719878Swollman	    VM_PROT_READ, TRUE);
6482742Swollman	KASSERT(error == 0, ("error=%d", error));
6492742Swollman
6502742Swollman	zero_region = (const void *)addr;
6512742Swollman}
6522742Swollman
65319878Swollman/*
6542742Swollman * 	kmem_init:
6552742Swollman *
6562742Swollman *	Create the kernel map; insert a mapping covering kernel text,
6572742Swollman *	data, bss, and all space allocated thus far (`boostrap' data).  The
65819878Swollman *	new map will thus map the range between VM_MIN_KERNEL_ADDRESS and
6592742Swollman *	`start' as allocated, and the range between `start' and `end' as free.
6602742Swollman */
6612742Swollmanvoid
6622742Swollmankmem_init(start, end)
6639908Swollman	vm_offset_t start, end;
6649908Swollman{
6659908Swollman	vm_map_t m;
6669908Swollman
6672742Swollman	m = vm_map_create(kernel_pmap, VM_MIN_KERNEL_ADDRESS, end);
6689908Swollman	m->system_map = 1;
6699908Swollman	vm_map_lock(m);
67019878Swollman	/* N.B.: cannot use kgdb to debug, starting with this assignment ... */
6712742Swollman	kernel_map = m;
6722742Swollman	(void) vm_map_insert(m, NULL, (vm_ooffset_t) 0,
6732742Swollman#ifdef __amd64__
67419878Swollman	    KERNBASE,
6752742Swollman#else
6762742Swollman	    VM_MIN_KERNEL_ADDRESS,
6772742Swollman#endif
6782742Swollman	    start, VM_PROT_ALL, VM_PROT_ALL, MAP_NOFAULT);
6792742Swollman	/* ... and ending with the completion of the above `insert' */
6802742Swollman	vm_map_unlock(m);
6812742Swollman
6822742Swollman	kmem_init_zero_region();
6832742Swollman}
6842742Swollman
6852742Swollman#ifdef DIAGNOSTIC
6862742Swollman/*
6872742Swollman * Allow userspace to directly trigger the VM drain routine for testing
6882742Swollman * purposes.
6892742Swollman */
6902742Swollmanstatic int
6912742Swollmandebug_vm_lowmem(SYSCTL_HANDLER_ARGS)
6922742Swollman{
6932742Swollman	int error, i;
6942742Swollman
69519878Swollman	i = 0;
6962742Swollman	error = sysctl_handle_int(oidp, &i, 0, req);
6972742Swollman	if (error)
6982742Swollman		return (error);
6992742Swollman	if (i)
7002742Swollman		EVENTHANDLER_INVOKE(vm_lowmem, 0);
7012742Swollman	return (0);
7022742Swollman}
7032742Swollman
7042742SwollmanSYSCTL_PROC(_debug, OID_AUTO, vm_lowmem, CTLTYPE_INT | CTLFLAG_RW, 0, 0,
7052742Swollman    debug_vm_lowmem, "I", "set to trigger vm_lowmem event");
70619878Swollman#endif
7072742Swollman