vm_kern.c revision 38799
168349Sobrien/*
2133359Sobrien * Copyright (c) 1991, 1993
3133359Sobrien *	The Regents of the University of California.  All rights reserved.
4133359Sobrien *
5133359Sobrien * This code is derived from software contributed to Berkeley by
6133359Sobrien * The Mach Operating System project at Carnegie-Mellon University.
7133359Sobrien *
8133359Sobrien * Redistribution and use in source and binary forms, with or without
9133359Sobrien * modification, are permitted provided that the following conditions
10133359Sobrien * are met:
11133359Sobrien * 1. Redistributions of source code must retain the above copyright
12133359Sobrien *    notice, this list of conditions and the following disclaimer.
13133359Sobrien * 2. Redistributions in binary form must reproduce the above copyright
14133359Sobrien *    notice, this list of conditions and the following disclaimer in the
15133359Sobrien *    documentation and/or other materials provided with the distribution.
16133359Sobrien * 3. All advertising materials mentioning features or use of this software
17133359Sobrien *    must display the following acknowledgement:
18133359Sobrien *	This product includes software developed by the University of
19133359Sobrien *	California, Berkeley and its contributors.
20133359Sobrien * 4. Neither the name of the University nor the names of its contributors
21133359Sobrien *    may be used to endorse or promote products derived from this software
22133359Sobrien *    without specific prior written permission.
23133359Sobrien *
24133359Sobrien * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
25133359Sobrien * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
26133359Sobrien * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
27133359Sobrien * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
28133359Sobrien * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
2968349Sobrien * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
3068349Sobrien * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
3168349Sobrien * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
3268349Sobrien * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
3368349Sobrien * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
3468349Sobrien * SUCH DAMAGE.
35226048Sobrien *
3668349Sobrien *	from: @(#)vm_kern.c	8.3 (Berkeley) 1/12/94
37103373Sobrien *
38103373Sobrien *
39103373Sobrien * Copyright (c) 1987, 1990 Carnegie-Mellon University.
40103373Sobrien * All rights reserved.
4168349Sobrien *
4268349Sobrien * Authors: Avadis Tevanian, Jr., Michael Wayne Young
4368349Sobrien *
4468349Sobrien * Permission to use, copy, modify and distribute this software and
4568349Sobrien * its documentation is hereby granted, provided that both the copyright
4668349Sobrien * notice and this permission notice appear in all copies of the
4768349Sobrien * software, derivative works or modified versions, and any portions
4868349Sobrien * thereof, and that both notices appear in supporting documentation.
4968349Sobrien *
5068349Sobrien * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
5168349Sobrien * CONDITION.  CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND
5268349Sobrien * FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
5368349Sobrien *
54226048Sobrien * Carnegie Mellon requests users of this software to return to
5568349Sobrien *
5668349Sobrien *  Software Distribution Coordinator  or  Software.Distribution@CS.CMU.EDU
57330569Sgordon *  School of Computer Science
58330569Sgordon *  Carnegie Mellon University
59330569Sgordon *  Pittsburgh PA 15213-3890
60330569Sgordon *
61330569Sgordon * any improvements or extensions that they make and grant Carnegie the
62330569Sgordon * rights to redistribute these changes.
63330569Sgordon *
64330569Sgordon * $Id: vm_kern.c,v 1.49 1998/08/24 08:39:37 dfr Exp $
65330569Sgordon */
66330569Sgordon
67330569Sgordon/*
68330569Sgordon *	Kernel memory management.
69330569Sgordon */
70330569Sgordon
71330569Sgordon#include <sys/param.h>
72330569Sgordon#include <sys/systm.h>
73330569Sgordon#include <sys/proc.h>
74330569Sgordon#include <sys/malloc.h>
75330569Sgordon
76330569Sgordon#include <vm/vm.h>
77330569Sgordon#include <vm/vm_param.h>
78330569Sgordon#include <vm/vm_prot.h>
79330569Sgordon#include <sys/lock.h>
80330569Sgordon#include <vm/pmap.h>
81330569Sgordon#include <vm/vm_map.h>
82330569Sgordon#include <vm/vm_object.h>
83330569Sgordon#include <vm/vm_page.h>
84330569Sgordon#include <vm/vm_pageout.h>
85330569Sgordon#include <vm/vm_extern.h>
86330569Sgordon
87330569Sgordonvm_map_t kernel_map=0;
88330569Sgordonvm_map_t kmem_map=0;
89330569Sgordonvm_map_t exec_map=0;
90330569Sgordonvm_map_t clean_map=0;
91330569Sgordonvm_map_t u_map=0;
92330569Sgordonvm_map_t buffer_map=0;
9368349Sobrienvm_map_t mb_map=0;
9468349Sobrienint mb_map_full=0;
9568349Sobrienvm_map_t io_map=0;
9668349Sobrienvm_map_t phys_map=0;
9768349Sobrien
9868349Sobrien/*
9968349Sobrien *	kmem_alloc_pageable:
10068349Sobrien *
10168349Sobrien *	Allocate pageable memory to the kernel's address map.
10268349Sobrien *	"map" must be kernel_map or a submap of kernel_map.
10368349Sobrien */
10468349Sobrien
10568349Sobrienvm_offset_t
10668349Sobrienkmem_alloc_pageable(map, size)
10768349Sobrien	vm_map_t map;
10868349Sobrien	register vm_size_t size;
10968349Sobrien{
11068349Sobrien	vm_offset_t addr;
11168349Sobrien	register int result;
11268349Sobrien
11368349Sobrien	size = round_page(size);
11468349Sobrien	addr = vm_map_min(map);
11568349Sobrien	result = vm_map_find(map, NULL, (vm_offset_t) 0,
11668349Sobrien	    &addr, size, TRUE, VM_PROT_ALL, VM_PROT_ALL, 0);
11768349Sobrien	if (result != KERN_SUCCESS) {
11868349Sobrien		return (0);
11968349Sobrien	}
12068349Sobrien	return (addr);
12168349Sobrien}
12268349Sobrien
12368349Sobrien/*
12468349Sobrien *	Allocate wired-down memory in the kernel's address map
12568349Sobrien *	or a submap.
12668349Sobrien */
127226048Sobrienvm_offset_t
128226048Sobrienkmem_alloc(map, size)
129226048Sobrien	register vm_map_t map;
130226048Sobrien	register vm_size_t size;
13168349Sobrien{
132186690Sobrien	vm_offset_t addr;
133186690Sobrien	register vm_offset_t offset;
134186690Sobrien	vm_offset_t i;
135186690Sobrien
136186690Sobrien	size = round_page(size);
137186690Sobrien
138186690Sobrien	/*
139186690Sobrien	 * Use the kernel object for wired-down kernel pages. Assume that no
14068349Sobrien	 * region of the kernel object is referenced more than once.
141226048Sobrien	 */
142226048Sobrien
143226048Sobrien	/*
144330569Sgordon	 * Locate sufficient space in the map.  This will give us the final
14568349Sobrien	 * virtual address for the new memory, and thus will tell us the
14668349Sobrien	 * offset within the kernel map.
147226048Sobrien	 */
148226048Sobrien	vm_map_lock(map);
149226048Sobrien	if (vm_map_findspace(map, vm_map_min(map), size, &addr)) {
15068349Sobrien		vm_map_unlock(map);
15168349Sobrien		return (0);
152226048Sobrien	}
153226048Sobrien	offset = addr - VM_MIN_KERNEL_ADDRESS;
154226048Sobrien	vm_object_reference(kernel_object);
15568349Sobrien	vm_map_insert(map, kernel_object, offset, addr, addr + size,
15668349Sobrien		VM_PROT_ALL, VM_PROT_ALL, 0);
15768349Sobrien	vm_map_unlock(map);
15868349Sobrien
15968349Sobrien	/*
16068349Sobrien	 * Guarantee that there are pages already in this object before
16168349Sobrien	 * calling vm_map_pageable.  This is to prevent the following
16268349Sobrien	 * scenario:
16368349Sobrien	 *
16468349Sobrien	 * 1) Threads have swapped out, so that there is a pager for the
16568349Sobrien	 * kernel_object. 2) The kmsg zone is empty, and so we are
16668349Sobrien	 * kmem_allocing a new page for it. 3) vm_map_pageable calls vm_fault;
16768349Sobrien	 * there is no page, but there is a pager, so we call
16868349Sobrien	 * pager_data_request.  But the kmsg zone is empty, so we must
16968349Sobrien	 * kmem_alloc. 4) goto 1 5) Even if the kmsg zone is not empty: when
17068349Sobrien	 * we get the data back from the pager, it will be (very stale)
17168349Sobrien	 * non-zero data.  kmem_alloc is defined to return zero-filled memory.
17268349Sobrien	 *
17368349Sobrien	 * We're intentionally not activating the pages we allocate to prevent a
17468349Sobrien	 * race with page-out.  vm_map_pageable will wire the pages.
17568349Sobrien	 */
17668349Sobrien
17768349Sobrien	for (i = 0; i < size; i += PAGE_SIZE) {
17868349Sobrien		vm_page_t mem;
17968349Sobrien
18068349Sobrien		mem = vm_page_grab(kernel_object, OFF_TO_IDX(offset + i),
18168349Sobrien				VM_ALLOC_ZERO | VM_ALLOC_RETRY);
18268349Sobrien		if ((mem->flags & PG_ZERO) == 0)
18368349Sobrien			vm_page_zero_fill(mem);
18468349Sobrien		vm_page_flag_clear(mem, (PG_BUSY | PG_ZERO));
18568349Sobrien		mem->valid = VM_PAGE_BITS_ALL;
18668349Sobrien	}
18768349Sobrien
18868349Sobrien	/*
18968349Sobrien	 * And finally, mark the data as non-pageable.
19068349Sobrien	 */
19168349Sobrien
19268349Sobrien	(void) vm_map_pageable(map, (vm_offset_t) addr, addr + size, FALSE);
19368349Sobrien
19468349Sobrien	return (addr);
19568349Sobrien}
19668349Sobrien
19768349Sobrien/*
19868349Sobrien *	kmem_free:
19968349Sobrien *
20068349Sobrien *	Release a region of kernel virtual memory allocated
20168349Sobrien *	with kmem_alloc, and return the physical pages
20268349Sobrien *	associated with that region.
20368349Sobrien */
20468349Sobrienvoid
20568349Sobrienkmem_free(map, addr, size)
20668349Sobrien	vm_map_t map;
20768349Sobrien	register vm_offset_t addr;
20868349Sobrien	vm_size_t size;
20968349Sobrien{
21068349Sobrien	(void) vm_map_remove(map, trunc_page(addr), round_page(addr + size));
21168349Sobrien}
21268349Sobrien
21368349Sobrien/*
21468349Sobrien *	kmem_suballoc:
21568349Sobrien *
21668349Sobrien *	Allocates a map to manage a subrange
21768349Sobrien *	of the kernel virtual address space.
21868349Sobrien *
21968349Sobrien *	Arguments are as follows:
22068349Sobrien *
22168349Sobrien *	parent		Map to take range from
22268349Sobrien *	size		Size of range to find
22368349Sobrien *	min, max	Returned endpoints of map
22468349Sobrien *	pageable	Can the region be paged
22568349Sobrien */
22668349Sobrienvm_map_t
22768349Sobrienkmem_suballoc(parent, min, max, size)
22868349Sobrien	register vm_map_t parent;
22968349Sobrien	vm_offset_t *min, *max;
23068349Sobrien	register vm_size_t size;
23168349Sobrien{
232103373Sobrien	register int ret;
233330569Sgordon	vm_map_t result;
234103373Sobrien
235330569Sgordon	size = round_page(size);
236330569Sgordon
237330569Sgordon	*min = (vm_offset_t) vm_map_min(parent);
238330569Sgordon	ret = vm_map_find(parent, NULL, (vm_offset_t) 0,
239330569Sgordon	    min, size, TRUE, VM_PROT_ALL, VM_PROT_ALL, 0);
240330569Sgordon	if (ret != KERN_SUCCESS) {
241330569Sgordon		printf("kmem_suballoc: bad status return of %d.\n", ret);
242330569Sgordon		panic("kmem_suballoc");
243330569Sgordon	}
244330569Sgordon	*max = *min + size;
245330569Sgordon	pmap_reference(vm_map_pmap(parent));
246330569Sgordon	result = vm_map_create(vm_map_pmap(parent), *min, *max);
247330569Sgordon	if (result == NULL)
248330569Sgordon		panic("kmem_suballoc: cannot create submap");
249330569Sgordon	if ((ret = vm_map_submap(parent, *min, *max, result)) != KERN_SUCCESS)
250330569Sgordon		panic("kmem_suballoc: unable to change range to submap");
251330569Sgordon	return (result);
252330569Sgordon}
253330569Sgordon
254330569Sgordon/*
255330569Sgordon * Allocate wired-down memory in the kernel's address map for the higher
256330569Sgordon * level kernel memory allocator (kern/kern_malloc.c).  We cannot use
257330569Sgordon * kmem_alloc() because we may need to allocate memory at interrupt
258330569Sgordon * level where we cannot block (canwait == FALSE).
259330569Sgordon *
260330569Sgordon * This routine has its own private kernel submap (kmem_map) and object
26168349Sobrien * (kmem_object).  This, combined with the fact that only malloc uses
26268349Sobrien * this routine, ensures that we will never block in map or object waits.
26368349Sobrien *
26468349Sobrien * Note that this still only works in a uni-processor environment and
26568349Sobrien * when called at splhigh().
26668349Sobrien *
26768349Sobrien * We don't worry about expanding the map (adding entries) since entries
26868349Sobrien * for wired maps are statically allocated.
26968349Sobrien */
27068349Sobrienvm_offset_t
27168349Sobrienkmem_malloc(map, size, waitflag)
27268349Sobrien	register vm_map_t map;
27368349Sobrien	register vm_size_t size;
274175296Sobrien	boolean_t waitflag;
27568349Sobrien{
27668349Sobrien	register vm_offset_t offset, i;
27768349Sobrien	vm_map_entry_t entry;
27868349Sobrien	vm_offset_t addr;
279226048Sobrien	vm_page_t m;
28068349Sobrien
28168349Sobrien	if (map != kmem_map && map != mb_map)
28268349Sobrien		panic("kmem_malloc: map != {kmem,mb}_map");
283103373Sobrien
284103373Sobrien	size = round_page(size);
285226048Sobrien	addr = vm_map_min(map);
286226048Sobrien
287226048Sobrien	/*
288226048Sobrien	 * Locate sufficient space in the map.  This will give us the final
289226048Sobrien	 * virtual address for the new memory, and thus will tell us the
290226048Sobrien	 * offset within the kernel map.
291226048Sobrien	 */
292226048Sobrien	vm_map_lock(map);
293226048Sobrien	if (vm_map_findspace(map, vm_map_min(map), size, &addr)) {
294226048Sobrien		vm_map_unlock(map);
295226048Sobrien		if (map == mb_map) {
296226048Sobrien			mb_map_full = TRUE;
297226048Sobrien			printf("Out of mbuf clusters - adjust NMBCLUSTERS or increase maxusers!\n");
298103373Sobrien			return (0);
299103373Sobrien		}
300226048Sobrien		if (waitflag == M_WAITOK)
301226048Sobrien			panic("kmem_malloc(%d): kmem_map too small: %d total allocated",
302226048Sobrien				size, map->size);
303226048Sobrien		return (0);
304226048Sobrien	}
305103373Sobrien	offset = addr - VM_MIN_KERNEL_ADDRESS;
306226048Sobrien	vm_object_reference(kmem_object);
307226048Sobrien	vm_map_insert(map, kmem_object, offset, addr, addr + size,
308226048Sobrien		VM_PROT_ALL, VM_PROT_ALL, 0);
309226048Sobrien
310226048Sobrien	for (i = 0; i < size; i += PAGE_SIZE) {
311226048Sobrienretry:
312226048Sobrien		m = vm_page_alloc(kmem_object, OFF_TO_IDX(offset + i),
313226048Sobrien			(waitflag == M_NOWAIT) ? VM_ALLOC_INTERRUPT : VM_ALLOC_SYSTEM);
314226048Sobrien
315226048Sobrien		/*
316226048Sobrien		 * Ran out of space, free everything up and return. Don't need
317226048Sobrien		 * to lock page queues here as we know that the pages we got
318226048Sobrien		 * aren't on any queues.
319226048Sobrien		 */
320226048Sobrien		if (m == NULL) {
321267843Sdelphij			if (waitflag == M_WAITOK) {
322267843Sdelphij				VM_WAIT;
323267843Sdelphij				goto retry;
324267843Sdelphij			}
325267843Sdelphij			while (i != 0) {
326267843Sdelphij				i -= PAGE_SIZE;
327267843Sdelphij				m = vm_page_lookup(kmem_object,
328267843Sdelphij					OFF_TO_IDX(offset + i));
329267843Sdelphij				vm_page_free(m);
330267843Sdelphij			}
331267843Sdelphij			vm_map_delete(map, addr, addr + size);
332267843Sdelphij			vm_map_unlock(map);
333267843Sdelphij			return (0);
334267843Sdelphij		}
335267843Sdelphij		vm_page_flag_clear(m, PG_ZERO);
336267843Sdelphij		m->valid = VM_PAGE_BITS_ALL;
337267843Sdelphij	}
338267843Sdelphij
339267843Sdelphij	/*
340267843Sdelphij	 * Mark map entry as non-pageable. Assert: vm_map_insert() will never
341267843Sdelphij	 * be able to extend the previous entry so there will be a new entry
342267843Sdelphij	 * exactly corresponding to this address range and it will have
343267843Sdelphij	 * wired_count == 0.
344267843Sdelphij	 */
345267843Sdelphij	if (!vm_map_lookup_entry(map, addr, &entry) ||
346267843Sdelphij	    entry->start != addr || entry->end != addr + size ||
347267843Sdelphij	    entry->wired_count)
348267843Sdelphij		panic("kmem_malloc: entry not found or misaligned");
349267843Sdelphij	entry->wired_count++;
350267843Sdelphij
351267843Sdelphij	vm_map_simplify_entry(map, entry);
352267843Sdelphij
353267843Sdelphij	/*
354267843Sdelphij	 * Loop thru pages, entering them in the pmap. (We cannot add them to
355267843Sdelphij	 * the wired count without wrapping the vm_page_queue_lock in
356267843Sdelphij	 * splimp...)
357267843Sdelphij	 */
358330569Sgordon	for (i = 0; i < size; i += PAGE_SIZE) {
359330569Sgordon		m = vm_page_lookup(kmem_object, OFF_TO_IDX(offset + i));
360330569Sgordon		vm_page_wire(m);
361330569Sgordon		vm_page_wakeup(m);
362330569Sgordon		pmap_enter(kernel_pmap, addr + i, VM_PAGE_TO_PHYS(m),
363267843Sdelphij			VM_PROT_ALL, 1);
364267843Sdelphij		vm_page_flag_set(m, PG_MAPPED | PG_WRITEABLE | PG_REFERENCED);
365267843Sdelphij	}
366186690Sobrien	vm_map_unlock(map);
367186690Sobrien
368186690Sobrien	return (addr);
369186690Sobrien}
370186690Sobrien
371186690Sobrien/*
372186690Sobrien *	kmem_alloc_wait
373186690Sobrien *
374186690Sobrien *	Allocates pageable memory from a sub-map of the kernel.  If the submap
375186690Sobrien *	has no room, the caller sleeps waiting for more memory in the submap.
376186690Sobrien *
377186690Sobrien */
378186690Sobrienvm_offset_t
379186690Sobrienkmem_alloc_wait(map, size)
380186690Sobrien	vm_map_t map;
381186690Sobrien	vm_size_t size;
382186690Sobrien{
383186690Sobrien	vm_offset_t addr;
384186690Sobrien
385186690Sobrien	size = round_page(size);
386186690Sobrien
387186690Sobrien	for (;;) {
388186690Sobrien		/*
389186690Sobrien		 * To make this work for more than one map, use the map's lock
390186690Sobrien		 * to lock out sleepers/wakers.
391186690Sobrien		 */
392186690Sobrien		vm_map_lock(map);
393186690Sobrien		if (vm_map_findspace(map, vm_map_min(map), size, &addr) == 0)
394186690Sobrien			break;
395186690Sobrien		/* no space now; see if we can ever get space */
396186690Sobrien		if (vm_map_max(map) - vm_map_min(map) < size) {
397186690Sobrien			vm_map_unlock(map);
398186690Sobrien			return (0);
399186690Sobrien		}
400186690Sobrien		vm_map_unlock(map);
401186690Sobrien		tsleep(map, PVM, "kmaw", 0);
402186690Sobrien	}
403186690Sobrien	vm_map_insert(map, NULL, (vm_offset_t) 0, addr, addr + size, VM_PROT_ALL, VM_PROT_ALL, 0);
404186690Sobrien	vm_map_unlock(map);
405186690Sobrien	return (addr);
406186690Sobrien}
407186690Sobrien
408186690Sobrien/*
409186690Sobrien *	kmem_free_wakeup
410186690Sobrien *
411186690Sobrien *	Returns memory to a submap of the kernel, and wakes up any processes
412186690Sobrien *	waiting for memory in that map.
413186690Sobrien */
414186690Sobrienvoid
415186690Sobrienkmem_free_wakeup(map, addr, size)
416186690Sobrien	vm_map_t map;
417186690Sobrien	vm_offset_t addr;
418186690Sobrien	vm_size_t size;
419186690Sobrien{
420186690Sobrien	vm_map_lock(map);
421186690Sobrien	(void) vm_map_delete(map, trunc_page(addr), round_page(addr + size));
422186690Sobrien	wakeup(map);
423186690Sobrien	vm_map_unlock(map);
424186690Sobrien}
425186690Sobrien
426186690Sobrien/*
427186690Sobrien * Create the kernel map; insert a mapping covering kernel text, data, bss,
428186690Sobrien * and all space allocated thus far (`boostrap' data).  The new map will thus
429186690Sobrien * map the range between VM_MIN_KERNEL_ADDRESS and `start' as allocated, and
430186690Sobrien * the range between `start' and `end' as free.
431186690Sobrien */
432186690Sobrienvoid
43368349Sobrienkmem_init(start, end)
434	vm_offset_t start, end;
435{
436	register vm_map_t m;
437
438	m = vm_map_create(kernel_pmap, VM_MIN_KERNEL_ADDRESS, end);
439	vm_map_lock(m);
440	/* N.B.: cannot use kgdb to debug, starting with this assignment ... */
441	kernel_map = m;
442	kernel_map->system_map = 1;
443	(void) vm_map_insert(m, NULL, (vm_offset_t) 0,
444	    VM_MIN_KERNEL_ADDRESS, start, VM_PROT_ALL, VM_PROT_ALL, 0);
445	/* ... and ending with the completion of the above `insert' */
446	vm_map_unlock(m);
447}
448