vm_kern.c revision 12726
15455Sdg/*
21541Srgrimes * Copyright (c) 1991, 1993
31541Srgrimes *	The Regents of the University of California.  All rights reserved.
41541Srgrimes *
51541Srgrimes * This code is derived from software contributed to Berkeley by
61541Srgrimes * The Mach Operating System project at Carnegie-Mellon University.
71541Srgrimes *
81541Srgrimes * Redistribution and use in source and binary forms, with or without
91541Srgrimes * modification, are permitted provided that the following conditions
101541Srgrimes * are met:
111541Srgrimes * 1. Redistributions of source code must retain the above copyright
121541Srgrimes *    notice, this list of conditions and the following disclaimer.
131541Srgrimes * 2. Redistributions in binary form must reproduce the above copyright
141541Srgrimes *    notice, this list of conditions and the following disclaimer in the
151541Srgrimes *    documentation and/or other materials provided with the distribution.
161541Srgrimes * 3. All advertising materials mentioning features or use of this software
171541Srgrimes *    must display the following acknowledgement:
181541Srgrimes *	This product includes software developed by the University of
191541Srgrimes *	California, Berkeley and its contributors.
201541Srgrimes * 4. Neither the name of the University nor the names of its contributors
211541Srgrimes *    may be used to endorse or promote products derived from this software
221541Srgrimes *    without specific prior written permission.
231541Srgrimes *
241541Srgrimes * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
251541Srgrimes * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
261541Srgrimes * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
271541Srgrimes * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
281541Srgrimes * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
291541Srgrimes * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
301541Srgrimes * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
311541Srgrimes * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
321541Srgrimes * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
331541Srgrimes * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
341541Srgrimes * SUCH DAMAGE.
351541Srgrimes *
361817Sdg *	from: @(#)vm_kern.c	8.3 (Berkeley) 1/12/94
371541Srgrimes *
381541Srgrimes *
391541Srgrimes * Copyright (c) 1987, 1990 Carnegie-Mellon University.
401541Srgrimes * All rights reserved.
411541Srgrimes *
421541Srgrimes * Authors: Avadis Tevanian, Jr., Michael Wayne Young
435455Sdg *
441541Srgrimes * Permission to use, copy, modify and distribute this software and
451541Srgrimes * its documentation is hereby granted, provided that both the copyright
461541Srgrimes * notice and this permission notice appear in all copies of the
471541Srgrimes * software, derivative works or modified versions, and any portions
481541Srgrimes * thereof, and that both notices appear in supporting documentation.
495455Sdg *
505455Sdg * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
515455Sdg * CONDITION.  CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND
521541Srgrimes * FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
535455Sdg *
541541Srgrimes * Carnegie Mellon requests users of this software to return to
551541Srgrimes *
561541Srgrimes *  Software Distribution Coordinator  or  Software.Distribution@CS.CMU.EDU
571541Srgrimes *  School of Computer Science
581541Srgrimes *  Carnegie Mellon University
591541Srgrimes *  Pittsburgh PA 15213-3890
601541Srgrimes *
611541Srgrimes * any improvements or extensions that they make and grant Carnegie the
621541Srgrimes * rights to redistribute these changes.
631817Sdg *
6412726Sbde * $Id: vm_kern.c,v 1.18 1995/12/07 12:48:13 davidg Exp $
651541Srgrimes */
661541Srgrimes
671541Srgrimes/*
681541Srgrimes *	Kernel memory management.
691541Srgrimes */
701541Srgrimes
711541Srgrimes#include <sys/param.h>
721541Srgrimes#include <sys/systm.h>
732112Swollman#include <sys/kernel.h>
742112Swollman#include <sys/proc.h>
756129Sdg#include <sys/malloc.h>
767066Sdg#include <sys/syslog.h>
7712662Sdg#include <sys/queue.h>
7812662Sdg#include <sys/vmmeter.h>
791541Srgrimes
801541Srgrimes#include <vm/vm.h>
8112662Sdg#include <vm/vm_param.h>
8212662Sdg#include <vm/vm_prot.h>
8312662Sdg#include <vm/lock.h>
8412662Sdg#include <vm/pmap.h>
8512662Sdg#include <vm/vm_map.h>
8612662Sdg#include <vm/vm_object.h>
871541Srgrimes#include <vm/vm_page.h>
881541Srgrimes#include <vm/vm_pageout.h>
891541Srgrimes#include <vm/vm_kern.h>
9012726Sbde#include <vm/vm_extern.h>
911541Srgrimes
925455Sdgvm_map_t buffer_map;
935455Sdgvm_map_t kernel_map;
945455Sdgvm_map_t kmem_map;
955455Sdgvm_map_t mb_map;
969759Sbdeint mb_map_full;
975455Sdgvm_map_t io_map;
985455Sdgvm_map_t clean_map;
995455Sdgvm_map_t phys_map;
1005455Sdgvm_map_t exec_map;
1015455Sdgvm_map_t u_map;
1022112Swollman
1031541Srgrimes/*
1041541Srgrimes *	kmem_alloc_pageable:
1051541Srgrimes *
1061541Srgrimes *	Allocate pageable memory to the kernel's address map.
10712259Sdg *	"map" must be kernel_map or a submap of kernel_map.
1081541Srgrimes */
1091541Srgrimes
1108876Srgrimesvm_offset_t
1115455Sdgkmem_alloc_pageable(map, size)
1125455Sdg	vm_map_t map;
1135455Sdg	register vm_size_t size;
1141541Srgrimes{
1155455Sdg	vm_offset_t addr;
1165455Sdg	register int result;
1171541Srgrimes
1181541Srgrimes	size = round_page(size);
1191541Srgrimes	addr = vm_map_min(map);
1201541Srgrimes	result = vm_map_find(map, NULL, (vm_offset_t) 0,
1215455Sdg	    &addr, size, TRUE);
1221541Srgrimes	if (result != KERN_SUCCESS) {
1235455Sdg		return (0);
1241541Srgrimes	}
1255455Sdg	return (addr);
1261541Srgrimes}
1271541Srgrimes
1281541Srgrimes/*
1291541Srgrimes *	Allocate wired-down memory in the kernel's address map
1301541Srgrimes *	or a submap.
1311541Srgrimes */
1328876Srgrimesvm_offset_t
1335455Sdgkmem_alloc(map, size)
1345455Sdg	register vm_map_t map;
1355455Sdg	register vm_size_t size;
1361541Srgrimes{
1375455Sdg	vm_offset_t addr;
1385455Sdg	register vm_offset_t offset;
1395455Sdg	vm_offset_t i;
1401541Srgrimes
1411541Srgrimes	size = round_page(size);
1421541Srgrimes
1431541Srgrimes	/*
1445455Sdg	 * Use the kernel object for wired-down kernel pages. Assume that no
1455455Sdg	 * region of the kernel object is referenced more than once.
1461541Srgrimes	 */
1471541Srgrimes
1481541Srgrimes	/*
1495455Sdg	 * Locate sufficient space in the map.  This will give us the final
1505455Sdg	 * virtual address for the new memory, and thus will tell us the
1515455Sdg	 * offset within the kernel map.
1521541Srgrimes	 */
1531541Srgrimes	vm_map_lock(map);
1541541Srgrimes	if (vm_map_findspace(map, 0, size, &addr)) {
1551541Srgrimes		vm_map_unlock(map);
1561541Srgrimes		return (0);
1571541Srgrimes	}
1581541Srgrimes	offset = addr - VM_MIN_KERNEL_ADDRESS;
1591541Srgrimes	vm_object_reference(kernel_object);
1601541Srgrimes	vm_map_insert(map, kernel_object, offset, addr, addr + size);
1611541Srgrimes	vm_map_unlock(map);
1621541Srgrimes
1631541Srgrimes	/*
1645455Sdg	 * Guarantee that there are pages already in this object before
1655455Sdg	 * calling vm_map_pageable.  This is to prevent the following
1665455Sdg	 * scenario:
1678876Srgrimes	 *
1685455Sdg	 * 1) Threads have swapped out, so that there is a pager for the
1695455Sdg	 * kernel_object. 2) The kmsg zone is empty, and so we are
1705455Sdg	 * kmem_allocing a new page for it. 3) vm_map_pageable calls vm_fault;
1715455Sdg	 * there is no page, but there is a pager, so we call
1725455Sdg	 * pager_data_request.  But the kmsg zone is empty, so we must
1735455Sdg	 * kmem_alloc. 4) goto 1 5) Even if the kmsg zone is not empty: when
1745455Sdg	 * we get the data back from the pager, it will be (very stale)
1755455Sdg	 * non-zero data.  kmem_alloc is defined to return zero-filled memory.
1768876Srgrimes	 *
1775455Sdg	 * We're intentionally not activating the pages we allocate to prevent a
1785455Sdg	 * race with page-out.  vm_map_pageable will wire the pages.
1791541Srgrimes	 */
1801541Srgrimes
1815455Sdg	for (i = 0; i < size; i += PAGE_SIZE) {
1825455Sdg		vm_page_t mem;
1831541Srgrimes
18410548Sdyson		while ((mem = vm_page_alloc(kernel_object, offset + i, (VM_ALLOC_NORMAL|VM_ALLOC_ZERO))) == NULL) {
1851541Srgrimes			VM_WAIT;
1861541Srgrimes		}
18710548Sdyson		if ((mem->flags & PG_ZERO) == 0)
18810548Sdyson			vm_page_zero_fill(mem);
18910548Sdyson		mem->flags &= ~(PG_BUSY|PG_ZERO);
1906585Sdg		mem->valid = VM_PAGE_BITS_ALL;
1911541Srgrimes	}
1925455Sdg
1931541Srgrimes	/*
1945455Sdg	 * And finally, mark the data as non-pageable.
1951541Srgrimes	 */
1961541Srgrimes
1971541Srgrimes	(void) vm_map_pageable(map, (vm_offset_t) addr, addr + size, FALSE);
1981541Srgrimes
1991541Srgrimes	/*
2005455Sdg	 * Try to coalesce the map
2011541Srgrimes	 */
2021541Srgrimes	vm_map_simplify(map, addr);
2031541Srgrimes
2045455Sdg	return (addr);
2051541Srgrimes}
2061541Srgrimes
2071541Srgrimes/*
2081541Srgrimes *	kmem_free:
2091541Srgrimes *
2101541Srgrimes *	Release a region of kernel virtual memory allocated
2111541Srgrimes *	with kmem_alloc, and return the physical pages
2121541Srgrimes *	associated with that region.
2131541Srgrimes */
2148876Srgrimesvoid
2155455Sdgkmem_free(map, addr, size)
2165455Sdg	vm_map_t map;
2175455Sdg	register vm_offset_t addr;
2185455Sdg	vm_size_t size;
2191541Srgrimes{
2201541Srgrimes	(void) vm_map_remove(map, trunc_page(addr), round_page(addr + size));
2211541Srgrimes}
2221541Srgrimes
2231541Srgrimes/*
2241541Srgrimes *	kmem_suballoc:
2251541Srgrimes *
2261541Srgrimes *	Allocates a map to manage a subrange
2271541Srgrimes *	of the kernel virtual address space.
2281541Srgrimes *
2291541Srgrimes *	Arguments are as follows:
2301541Srgrimes *
2311541Srgrimes *	parent		Map to take range from
2321541Srgrimes *	size		Size of range to find
2331541Srgrimes *	min, max	Returned endpoints of map
2341541Srgrimes *	pageable	Can the region be paged
2351541Srgrimes */
2368876Srgrimesvm_map_t
2375455Sdgkmem_suballoc(parent, min, max, size, pageable)
2385455Sdg	register vm_map_t parent;
2395455Sdg	vm_offset_t *min, *max;
2405455Sdg	register vm_size_t size;
2415455Sdg	boolean_t pageable;
2421541Srgrimes{
2435455Sdg	register int ret;
2445455Sdg	vm_map_t result;
2451541Srgrimes
2461541Srgrimes	size = round_page(size);
2471541Srgrimes
2481541Srgrimes	*min = (vm_offset_t) vm_map_min(parent);
2491541Srgrimes	ret = vm_map_find(parent, NULL, (vm_offset_t) 0,
2505455Sdg	    min, size, TRUE);
2511541Srgrimes	if (ret != KERN_SUCCESS) {
2521541Srgrimes		printf("kmem_suballoc: bad status return of %d.\n", ret);
2531541Srgrimes		panic("kmem_suballoc");
2541541Srgrimes	}
2551541Srgrimes	*max = *min + size;
2561541Srgrimes	pmap_reference(vm_map_pmap(parent));
2571541Srgrimes	result = vm_map_create(vm_map_pmap(parent), *min, *max, pageable);
2581541Srgrimes	if (result == NULL)
2591541Srgrimes		panic("kmem_suballoc: cannot create submap");
2601541Srgrimes	if ((ret = vm_map_submap(parent, *min, *max, result)) != KERN_SUCCESS)
2611541Srgrimes		panic("kmem_suballoc: unable to change range to submap");
2625455Sdg	return (result);
2631541Srgrimes}
2641541Srgrimes
2651541Srgrimes/*
2661541Srgrimes * Allocate wired-down memory in the kernel's address map for the higher
2671541Srgrimes * level kernel memory allocator (kern/kern_malloc.c).  We cannot use
2681541Srgrimes * kmem_alloc() because we may need to allocate memory at interrupt
2691541Srgrimes * level where we cannot block (canwait == FALSE).
2701541Srgrimes *
2711541Srgrimes * This routine has its own private kernel submap (kmem_map) and object
2721541Srgrimes * (kmem_object).  This, combined with the fact that only malloc uses
2731541Srgrimes * this routine, ensures that we will never block in map or object waits.
2741541Srgrimes *
2751541Srgrimes * Note that this still only works in a uni-processor environment and
2761541Srgrimes * when called at splhigh().
2771541Srgrimes *
2781541Srgrimes * We don't worry about expanding the map (adding entries) since entries
2791541Srgrimes * for wired maps are statically allocated.
2801541Srgrimes */
2811541Srgrimesvm_offset_t
2826129Sdgkmem_malloc(map, size, waitflag)
2835455Sdg	register vm_map_t map;
2845455Sdg	register vm_size_t size;
2856129Sdg	boolean_t waitflag;
2861541Srgrimes{
2875455Sdg	register vm_offset_t offset, i;
2885455Sdg	vm_map_entry_t entry;
2895455Sdg	vm_offset_t addr;
2905455Sdg	vm_page_t m;
2911541Srgrimes
2921541Srgrimes	if (map != kmem_map && map != mb_map)
2937066Sdg		panic("kmem_malloc: map != {kmem,mb}_map");
2941541Srgrimes
2951541Srgrimes	size = round_page(size);
2961541Srgrimes	addr = vm_map_min(map);
2971541Srgrimes
2981541Srgrimes	/*
2995455Sdg	 * Locate sufficient space in the map.  This will give us the final
3005455Sdg	 * virtual address for the new memory, and thus will tell us the
3015455Sdg	 * offset within the kernel map.
3021541Srgrimes	 */
3031541Srgrimes	vm_map_lock(map);
3041541Srgrimes	if (vm_map_findspace(map, 0, size, &addr)) {
3051541Srgrimes		vm_map_unlock(map);
3067066Sdg		if (map == mb_map) {
3077066Sdg			mb_map_full = TRUE;
3087066Sdg			log(LOG_ERR, "mb_map full\n");
3097066Sdg			return (0);
3107066Sdg		}
3116129Sdg		if (waitflag == M_WAITOK)
3127066Sdg			panic("kmem_malloc: kmem_map too small");
3131541Srgrimes		return (0);
3141541Srgrimes	}
3151541Srgrimes	offset = addr - vm_map_min(kmem_map);
3161541Srgrimes	vm_object_reference(kmem_object);
3171541Srgrimes	vm_map_insert(map, kmem_object, offset, addr, addr + size);
3181541Srgrimes
3191541Srgrimes	/*
3205455Sdg	 * If we can wait, just mark the range as wired (will fault pages as
3215455Sdg	 * necessary).
3221541Srgrimes	 */
3236129Sdg	if (waitflag == M_WAITOK) {
3241541Srgrimes		vm_map_unlock(map);
3251541Srgrimes		(void) vm_map_pageable(map, (vm_offset_t) addr, addr + size,
3265455Sdg		    FALSE);
3271541Srgrimes		vm_map_simplify(map, addr);
3285455Sdg		return (addr);
3291541Srgrimes	}
3301541Srgrimes	/*
3311541Srgrimes	 * If we cannot wait then we must allocate all memory up front,
3321541Srgrimes	 * pulling it off the active queue to prevent pageout.
3331541Srgrimes	 */
3341541Srgrimes	for (i = 0; i < size; i += PAGE_SIZE) {
3356129Sdg		m = vm_page_alloc(kmem_object, offset + i,
3366129Sdg			(waitflag == M_NOWAIT) ? VM_ALLOC_INTERRUPT : VM_ALLOC_SYSTEM);
3371541Srgrimes
3381541Srgrimes		/*
3395455Sdg		 * Ran out of space, free everything up and return. Don't need
3405455Sdg		 * to lock page queues here as we know that the pages we got
3415455Sdg		 * aren't on any queues.
3421541Srgrimes		 */
3431541Srgrimes		if (m == NULL) {
3441541Srgrimes			while (i != 0) {
3451541Srgrimes				i -= PAGE_SIZE;
3461541Srgrimes				m = vm_page_lookup(kmem_object, offset + i);
3471541Srgrimes				vm_page_free(m);
3481541Srgrimes			}
3491541Srgrimes			vm_map_delete(map, addr, addr + size);
3501541Srgrimes			vm_map_unlock(map);
3515455Sdg			return (0);
3521541Srgrimes		}
35310548Sdyson		m->flags &= ~(PG_BUSY|PG_ZERO);
3546585Sdg		m->valid = VM_PAGE_BITS_ALL;
3551541Srgrimes	}
3561541Srgrimes
3571541Srgrimes	/*
3585455Sdg	 * Mark map entry as non-pageable. Assert: vm_map_insert() will never
3595455Sdg	 * be able to extend the previous entry so there will be a new entry
3605455Sdg	 * exactly corresponding to this address range and it will have
3615455Sdg	 * wired_count == 0.
3621541Srgrimes	 */
3631541Srgrimes	if (!vm_map_lookup_entry(map, addr, &entry) ||
3641541Srgrimes	    entry->start != addr || entry->end != addr + size ||
3651541Srgrimes	    entry->wired_count)
3661541Srgrimes		panic("kmem_malloc: entry not found or misaligned");
3671541Srgrimes	entry->wired_count++;
3681541Srgrimes
3691541Srgrimes	/*
3705455Sdg	 * Loop thru pages, entering them in the pmap. (We cannot add them to
3715455Sdg	 * the wired count without wrapping the vm_page_queue_lock in
3725455Sdg	 * splimp...)
3731541Srgrimes	 */
3741541Srgrimes	for (i = 0; i < size; i += PAGE_SIZE) {
3751541Srgrimes		m = vm_page_lookup(kmem_object, offset + i);
3765455Sdg		pmap_kenter(addr + i, VM_PAGE_TO_PHYS(m));
3771541Srgrimes	}
3781541Srgrimes	vm_map_unlock(map);
3791541Srgrimes
3801541Srgrimes	vm_map_simplify(map, addr);
3815455Sdg	return (addr);
3821541Srgrimes}
3831541Srgrimes
3841541Srgrimes/*
3851541Srgrimes *	kmem_alloc_wait
3861541Srgrimes *
3871541Srgrimes *	Allocates pageable memory from a sub-map of the kernel.  If the submap
3881541Srgrimes *	has no room, the caller sleeps waiting for more memory in the submap.
3891541Srgrimes *
3901541Srgrimes */
3918876Srgrimesvm_offset_t
3925455Sdgkmem_alloc_wait(map, size)
3935455Sdg	vm_map_t map;
3945455Sdg	vm_size_t size;
3951541Srgrimes{
3965455Sdg	vm_offset_t addr;
3971541Srgrimes
3981541Srgrimes	size = round_page(size);
3991541Srgrimes
4001541Srgrimes	for (;;) {
4011541Srgrimes		/*
4025455Sdg		 * To make this work for more than one map, use the map's lock
4035455Sdg		 * to lock out sleepers/wakers.
4041541Srgrimes		 */
4051541Srgrimes		vm_map_lock(map);
4061541Srgrimes		if (vm_map_findspace(map, 0, size, &addr) == 0)
4071541Srgrimes			break;
4081541Srgrimes		/* no space now; see if we can ever get space */
4091541Srgrimes		if (vm_map_max(map) - vm_map_min(map) < size) {
4101541Srgrimes			vm_map_unlock(map);
4111541Srgrimes			return (0);
4121541Srgrimes		}
4131541Srgrimes		vm_map_unlock(map);
4149507Sdg		tsleep(map, PVM, "kmaw", 0);
4151541Srgrimes	}
4165455Sdg	vm_map_insert(map, NULL, (vm_offset_t) 0, addr, addr + size);
4171541Srgrimes	vm_map_unlock(map);
4181541Srgrimes	return (addr);
4191541Srgrimes}
4201541Srgrimes
4211541Srgrimes/*
4221541Srgrimes *	kmem_free_wakeup
4231541Srgrimes *
4249507Sdg *	Returns memory to a submap of the kernel, and wakes up any processes
4251541Srgrimes *	waiting for memory in that map.
4261541Srgrimes */
4278876Srgrimesvoid
4285455Sdgkmem_free_wakeup(map, addr, size)
4295455Sdg	vm_map_t map;
4305455Sdg	vm_offset_t addr;
4315455Sdg	vm_size_t size;
4321541Srgrimes{
4331541Srgrimes	vm_map_lock(map);
4341541Srgrimes	(void) vm_map_delete(map, trunc_page(addr), round_page(addr + size));
4359507Sdg	wakeup(map);
4361541Srgrimes	vm_map_unlock(map);
4371541Srgrimes}
4381541Srgrimes
4391541Srgrimes/*
4401541Srgrimes * Create the kernel map; insert a mapping covering kernel text, data, bss,
4411541Srgrimes * and all space allocated thus far (`boostrap' data).  The new map will thus
4421541Srgrimes * map the range between VM_MIN_KERNEL_ADDRESS and `start' as allocated, and
4431541Srgrimes * the range between `start' and `end' as free.
4441541Srgrimes */
4458876Srgrimesvoid
4465455Sdgkmem_init(start, end)
4471541Srgrimes	vm_offset_t start, end;
4481541Srgrimes{
4491541Srgrimes	register vm_map_t m;
4501541Srgrimes
4511541Srgrimes	m = vm_map_create(kernel_pmap, VM_MIN_KERNEL_ADDRESS, end, FALSE);
4521541Srgrimes	vm_map_lock(m);
4531541Srgrimes	/* N.B.: cannot use kgdb to debug, starting with this assignment ... */
4541541Srgrimes	kernel_map = m;
4555455Sdg	(void) vm_map_insert(m, NULL, (vm_offset_t) 0,
4561541Srgrimes	    VM_MIN_KERNEL_ADDRESS, start);
4571541Srgrimes	/* ... and ending with the completion of the above `insert' */
4581541Srgrimes	vm_map_unlock(m);
4591541Srgrimes}
460