vm_kern.c revision 32702
1202878Srdivacky/*
2202878Srdivacky * Copyright (c) 1991, 1993
3202878Srdivacky *	The Regents of the University of California.  All rights reserved.
4202878Srdivacky *
5202878Srdivacky * This code is derived from software contributed to Berkeley by
6202878Srdivacky * The Mach Operating System project at Carnegie-Mellon University.
7202878Srdivacky *
8202878Srdivacky * Redistribution and use in source and binary forms, with or without
9202878Srdivacky * modification, are permitted provided that the following conditions
10202878Srdivacky * are met:
11202878Srdivacky * 1. Redistributions of source code must retain the above copyright
12202878Srdivacky *    notice, this list of conditions and the following disclaimer.
13202878Srdivacky * 2. Redistributions in binary form must reproduce the above copyright
14202878Srdivacky *    notice, this list of conditions and the following disclaimer in the
15202878Srdivacky *    documentation and/or other materials provided with the distribution.
16202878Srdivacky * 3. All advertising materials mentioning features or use of this software
17202878Srdivacky *    must display the following acknowledgement:
18218893Sdim *	This product includes software developed by the University of
19202878Srdivacky *	California, Berkeley and its contributors.
20202878Srdivacky * 4. Neither the name of the University nor the names of its contributors
21202878Srdivacky *    may be used to endorse or promote products derived from this software
22202878Srdivacky *    without specific prior written permission.
23202878Srdivacky *
24202878Srdivacky * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
25202878Srdivacky * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
26202878Srdivacky * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
27226633Sdim * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
28202878Srdivacky * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
29202878Srdivacky * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
30202878Srdivacky * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
31202878Srdivacky * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
32202878Srdivacky * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
33202878Srdivacky * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
34202878Srdivacky * SUCH DAMAGE.
35218893Sdim *
36202878Srdivacky *	from: @(#)vm_kern.c	8.3 (Berkeley) 1/12/94
37202878Srdivacky *
38202878Srdivacky *
39202878Srdivacky * Copyright (c) 1987, 1990 Carnegie-Mellon University.
40218893Sdim * All rights reserved.
41202878Srdivacky *
42202878Srdivacky * Authors: Avadis Tevanian, Jr., Michael Wayne Young
43202878Srdivacky *
44202878Srdivacky * Permission to use, copy, modify and distribute this software and
45202878Srdivacky * its documentation is hereby granted, provided that both the copyright
46202878Srdivacky * notice and this permission notice appear in all copies of the
47202878Srdivacky * software, derivative works or modified versions, and any portions
48218893Sdim * thereof, and that both notices appear in supporting documentation.
49202878Srdivacky *
50202878Srdivacky * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
51202878Srdivacky * CONDITION.  CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND
52202878Srdivacky * FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
53202878Srdivacky *
54202878Srdivacky * Carnegie Mellon requests users of this software to return to
55202878Srdivacky *
56202878Srdivacky *  Software Distribution Coordinator  or  Software.Distribution@CS.CMU.EDU
57202878Srdivacky *  School of Computer Science
58202878Srdivacky *  Carnegie Mellon University
59202878Srdivacky *  Pittsburgh PA 15213-3890
60202878Srdivacky *
61202878Srdivacky * any improvements or extensions that they make and grant Carnegie the
62218893Sdim * rights to redistribute these changes.
63202878Srdivacky *
64218893Sdim * $Id: vm_kern.c,v 1.39 1997/08/05 00:01:52 dyson Exp $
65202878Srdivacky */
66202878Srdivacky
67202878Srdivacky/*
68202878Srdivacky *	Kernel memory management.
69218893Sdim */
70218893Sdim
71218893Sdim#include <sys/param.h>
72218893Sdim#include <sys/systm.h>
73218893Sdim#include <sys/proc.h>
74218893Sdim#include <sys/malloc.h>
75218893Sdim#include <sys/syslog.h>
76218893Sdim
77218893Sdim#include <vm/vm.h>
78218893Sdim#include <vm/vm_param.h>
79218893Sdim#include <vm/vm_prot.h>
80218893Sdim#include <sys/lock.h>
81218893Sdim#include <vm/pmap.h>
82218893Sdim#include <vm/vm_map.h>
83218893Sdim#include <vm/vm_object.h>
84218893Sdim#include <vm/vm_page.h>
85218893Sdim#include <vm/vm_pageout.h>
86218893Sdim#include <vm/vm_extern.h>
87218893Sdim
88218893Sdimvm_map_t kernel_map=0;
89218893Sdimvm_map_t kmem_map=0;
90218893Sdimvm_map_t exec_map=0;
91218893Sdimvm_map_t clean_map=0;
92218893Sdimvm_map_t u_map=0;
93218893Sdimvm_map_t buffer_map=0;
94202878Srdivackyvm_map_t mb_map=0;
95218893Sdimint mb_map_full=0;
96218893Sdimvm_map_t io_map=0;
97218893Sdimvm_map_t phys_map=0;
98202878Srdivacky
99218893Sdim/*
100218893Sdim *	kmem_alloc_pageable:
101218893Sdim *
102218893Sdim *	Allocate pageable memory to the kernel's address map.
103218893Sdim *	"map" must be kernel_map or a submap of kernel_map.
104218893Sdim */
105218893Sdim
106218893Sdimvm_offset_t
107218893Sdimkmem_alloc_pageable(map, size)
108218893Sdim	vm_map_t map;
109202878Srdivacky	register vm_size_t size;
110218893Sdim{
111207618Srdivacky	vm_offset_t addr;
112207618Srdivacky	register int result;
113207618Srdivacky
114218893Sdim	size = round_page(size);
115202878Srdivacky	addr = vm_map_min(map);
116202878Srdivacky	result = vm_map_find(map, NULL, (vm_offset_t) 0,
117202878Srdivacky	    &addr, size, TRUE, VM_PROT_ALL, VM_PROT_ALL, 0);
118202878Srdivacky	if (result != KERN_SUCCESS) {
119202878Srdivacky		return (0);
120202878Srdivacky	}
121202878Srdivacky	return (addr);
122202878Srdivacky}
123202878Srdivacky
124218893Sdim/*
125202878Srdivacky *	Allocate wired-down memory in the kernel's address map
126202878Srdivacky *	or a submap.
127202878Srdivacky */
128202878Srdivackyvm_offset_t
129202878Srdivackykmem_alloc(map, size)
130202878Srdivacky	register vm_map_t map;
131202878Srdivacky	register vm_size_t size;
132202878Srdivacky{
133202878Srdivacky	vm_offset_t addr;
134202878Srdivacky	register vm_offset_t offset;
135202878Srdivacky	vm_offset_t i;
136202878Srdivacky
137218893Sdim	size = round_page(size);
138202878Srdivacky
139202878Srdivacky	/*
140202878Srdivacky	 * Use the kernel object for wired-down kernel pages. Assume that no
141202878Srdivacky	 * region of the kernel object is referenced more than once.
142202878Srdivacky	 */
143202878Srdivacky
144202878Srdivacky	/*
145202878Srdivacky	 * Locate sufficient space in the map.  This will give us the final
146202878Srdivacky	 * virtual address for the new memory, and thus will tell us the
147202878Srdivacky	 * offset within the kernel map.
148202878Srdivacky	 */
149202878Srdivacky	vm_map_lock(map);
150226633Sdim	if (vm_map_findspace(map, 0, size, &addr)) {
151202878Srdivacky		vm_map_unlock(map);
152218893Sdim		return (0);
153202878Srdivacky	}
154202878Srdivacky	offset = addr - VM_MIN_KERNEL_ADDRESS;
155202878Srdivacky	vm_object_reference(kernel_object);
156202878Srdivacky	vm_map_insert(map, kernel_object, offset, addr, addr + size,
157202878Srdivacky		VM_PROT_ALL, VM_PROT_ALL, 0);
158212904Sdim	vm_map_unlock(map);
159212904Sdim
160212904Sdim	/*
161212904Sdim	 * Guarantee that there are pages already in this object before
162212904Sdim	 * calling vm_map_pageable.  This is to prevent the following
163212904Sdim	 * scenario:
164202878Srdivacky	 *
165202878Srdivacky	 * 1) Threads have swapped out, so that there is a pager for the
166202878Srdivacky	 * kernel_object. 2) The kmsg zone is empty, and so we are
167202878Srdivacky	 * kmem_allocing a new page for it. 3) vm_map_pageable calls vm_fault;
168202878Srdivacky	 * there is no page, but there is a pager, so we call
169202878Srdivacky	 * pager_data_request.  But the kmsg zone is empty, so we must
170202878Srdivacky	 * kmem_alloc. 4) goto 1 5) Even if the kmsg zone is not empty: when
171202878Srdivacky	 * we get the data back from the pager, it will be (very stale)
172202878Srdivacky	 * non-zero data.  kmem_alloc is defined to return zero-filled memory.
173202878Srdivacky	 *
174218893Sdim	 * We're intentionally not activating the pages we allocate to prevent a
175202878Srdivacky	 * race with page-out.  vm_map_pageable will wire the pages.
176202878Srdivacky	 */
177218893Sdim
178218893Sdim	for (i = 0; i < size; i += PAGE_SIZE) {
179218893Sdim		vm_page_t mem;
180218893Sdim
181218893Sdim		while ((mem = vm_page_alloc(kernel_object,
182218893Sdim			OFF_TO_IDX(offset + i), VM_ALLOC_ZERO)) == NULL) {
183218893Sdim			VM_WAIT;
184202878Srdivacky		}
185212904Sdim		if ((mem->flags & PG_ZERO) == 0)
186202878Srdivacky			vm_page_zero_fill(mem);
187205218Srdivacky		mem->flags &= ~(PG_BUSY|PG_ZERO);
188218893Sdim		mem->valid = VM_PAGE_BITS_ALL;
189218893Sdim	}
190218893Sdim
191218893Sdim	/*
192218893Sdim	 * And finally, mark the data as non-pageable.
193218893Sdim	 */
194205218Srdivacky
195218893Sdim	(void) vm_map_pageable(map, (vm_offset_t) addr, addr + size, FALSE);
196212904Sdim
197212904Sdim	return (addr);
198212904Sdim}
199218893Sdim
200202878Srdivacky/*
201202878Srdivacky *	kmem_free:
202218893Sdim *
203202878Srdivacky *	Release a region of kernel virtual memory allocated
204202878Srdivacky *	with kmem_alloc, and return the physical pages
205208599Srdivacky *	associated with that region.
206208599Srdivacky */
207208599Srdivackyvoid
208208599Srdivackykmem_free(map, addr, size)
209208599Srdivacky	vm_map_t map;
210208599Srdivacky	register vm_offset_t addr;
211202878Srdivacky	vm_size_t size;
212202878Srdivacky{
213202878Srdivacky	(void) vm_map_remove(map, trunc_page(addr), round_page(addr + size));
214218893Sdim}
215202878Srdivacky
216202878Srdivacky/*
217221345Sdim *	kmem_suballoc:
218218893Sdim *
219202878Srdivacky *	Allocates a map to manage a subrange
220218893Sdim *	of the kernel virtual address space.
221202878Srdivacky *
222212904Sdim *	Arguments are as follows:
223221345Sdim *
224218893Sdim *	parent		Map to take range from
225212904Sdim *	size		Size of range to find
226212904Sdim *	min, max	Returned endpoints of map
227212904Sdim *	pageable	Can the region be paged
228218893Sdim */
229202878Srdivackyvm_map_t
230202878Srdivackykmem_suballoc(parent, min, max, size)
231218893Sdim	register vm_map_t parent;
232202878Srdivacky	vm_offset_t *min, *max;
233202878Srdivacky	register vm_size_t size;
234202878Srdivacky{
235202878Srdivacky	register int ret;
236202878Srdivacky	vm_map_t result;
237218893Sdim
238202878Srdivacky	size = round_page(size);
239202878Srdivacky
240221345Sdim	*min = (vm_offset_t) vm_map_min(parent);
241202878Srdivacky	ret = vm_map_find(parent, NULL, (vm_offset_t) 0,
242202878Srdivacky	    min, size, TRUE, VM_PROT_ALL, VM_PROT_ALL, 0);
243202878Srdivacky	if (ret != KERN_SUCCESS) {
244221345Sdim		printf("kmem_suballoc: bad status return of %d.\n", ret);
245218893Sdim		panic("kmem_suballoc");
246212904Sdim	}
247212904Sdim	*max = *min + size;
248212904Sdim	pmap_reference(vm_map_pmap(parent));
249218893Sdim	result = vm_map_create(vm_map_pmap(parent), *min, *max);
250202878Srdivacky	if (result == NULL)
251202878Srdivacky		panic("kmem_suballoc: cannot create submap");
252202878Srdivacky	if ((ret = vm_map_submap(parent, *min, *max, result)) != KERN_SUCCESS)
253218893Sdim		panic("kmem_suballoc: unable to change range to submap");
254202878Srdivacky	return (result);
255221345Sdim}
256202878Srdivacky
257218893Sdim/*
258202878Srdivacky * Allocate wired-down memory in the kernel's address map for the higher
259202878Srdivacky * level kernel memory allocator (kern/kern_malloc.c).  We cannot use
260202878Srdivacky * kmem_alloc() because we may need to allocate memory at interrupt
261221345Sdim * level where we cannot block (canwait == FALSE).
262218893Sdim *
263212904Sdim * This routine has its own private kernel submap (kmem_map) and object
264212904Sdim * (kmem_object).  This, combined with the fact that only malloc uses
265212904Sdim * this routine, ensures that we will never block in map or object waits.
266218893Sdim *
267202878Srdivacky * Note that this still only works in a uni-processor environment and
268202878Srdivacky * when called at splhigh().
269202878Srdivacky *
270218893Sdim * We don't worry about expanding the map (adding entries) since entries
271218893Sdim * for wired maps are statically allocated.
272218893Sdim */
273218893Sdimvm_offset_t
274218893Sdimkmem_malloc(map, size, waitflag)
275218893Sdim	register vm_map_t map;
276218893Sdim	register vm_size_t size;
277218893Sdim	boolean_t waitflag;
278218893Sdim{
279218893Sdim	register vm_offset_t offset, i;
280218893Sdim	vm_map_entry_t entry;
281218893Sdim	vm_offset_t addr;
282218893Sdim	vm_page_t m;
283218893Sdim
284218893Sdim	if (map != kmem_map && map != mb_map)
285218893Sdim		panic("kmem_malloc: map != {kmem,mb}_map");
286218893Sdim
287218893Sdim	size = round_page(size);
288218893Sdim	addr = vm_map_min(map);
289218893Sdim
290218893Sdim	/*
291218893Sdim	 * Locate sufficient space in the map.  This will give us the final
292218893Sdim	 * virtual address for the new memory, and thus will tell us the
293218893Sdim	 * offset within the kernel map.
294218893Sdim	 */
295218893Sdim	vm_map_lock(map);
296218893Sdim	if (vm_map_findspace(map, 0, size, &addr)) {
297218893Sdim		vm_map_unlock(map);
298218893Sdim		if (map == mb_map) {
299218893Sdim			mb_map_full = TRUE;
300218893Sdim			log(LOG_ERR, "Out of mbuf clusters - increase maxusers!\n");
301218893Sdim			return (0);
302218893Sdim		}
303218893Sdim		if (waitflag == M_WAITOK)
304218893Sdim			panic("kmem_malloc: kmem_map too small");
305218893Sdim		return (0);
306202878Srdivacky	}
307202878Srdivacky	offset = addr - VM_MIN_KERNEL_ADDRESS;
308202878Srdivacky	vm_object_reference(kmem_object);
309202878Srdivacky	vm_map_insert(map, kmem_object, offset, addr, addr + size,
310202878Srdivacky		VM_PROT_ALL, VM_PROT_ALL, 0);
311202878Srdivacky
312202878Srdivacky	for (i = 0; i < size; i += PAGE_SIZE) {
313202878Srdivackyretry:
314202878Srdivacky		m = vm_page_alloc(kmem_object, OFF_TO_IDX(offset + i),
315218893Sdim			(waitflag == M_NOWAIT) ? VM_ALLOC_INTERRUPT : VM_ALLOC_SYSTEM);
316202878Srdivacky
317202878Srdivacky		/*
318202878Srdivacky		 * Ran out of space, free everything up and return. Don't need
319202878Srdivacky		 * to lock page queues here as we know that the pages we got
320202878Srdivacky		 * aren't on any queues.
321218893Sdim		 */
322202878Srdivacky		if (m == NULL) {
323202878Srdivacky			if (waitflag == M_WAITOK) {
324202878Srdivacky				VM_WAIT;
325202878Srdivacky				goto retry;
326202878Srdivacky			}
327202878Srdivacky			while (i != 0) {
328221345Sdim				i -= PAGE_SIZE;
329221345Sdim				m = vm_page_lookup(kmem_object,
330202878Srdivacky					OFF_TO_IDX(offset + i));
331202878Srdivacky				PAGE_WAKEUP(m);
332202878Srdivacky				vm_page_free(m);
333202878Srdivacky			}
334202878Srdivacky			vm_map_delete(map, addr, addr + size);
335202878Srdivacky			vm_map_unlock(map);
336202878Srdivacky			return (0);
337202878Srdivacky		}
338226633Sdim		m->flags &= ~PG_ZERO;
339226633Sdim		m->valid = VM_PAGE_BITS_ALL;
340226633Sdim	}
341226633Sdim
342226633Sdim	/*
343226633Sdim	 * Mark map entry as non-pageable. Assert: vm_map_insert() will never
344226633Sdim	 * be able to extend the previous entry so there will be a new entry
345226633Sdim	 * exactly corresponding to this address range and it will have
346226633Sdim	 * wired_count == 0.
347226633Sdim	 */
348226633Sdim	if (!vm_map_lookup_entry(map, addr, &entry) ||
349202878Srdivacky	    entry->start != addr || entry->end != addr + size ||
350202878Srdivacky	    entry->wired_count)
351202878Srdivacky		panic("kmem_malloc: entry not found or misaligned");
352202878Srdivacky	entry->wired_count++;
353202878Srdivacky
354221345Sdim	vm_map_simplify_entry(map, entry);
355221345Sdim
356221345Sdim	/*
357221345Sdim	 * Loop thru pages, entering them in the pmap. (We cannot add them to
358221345Sdim	 * the wired count without wrapping the vm_page_queue_lock in
359202878Srdivacky	 * splimp...)
360202878Srdivacky	 */
361202878Srdivacky	for (i = 0; i < size; i += PAGE_SIZE) {
362202878Srdivacky		m = vm_page_lookup(kmem_object, OFF_TO_IDX(offset + i));
363218893Sdim		vm_page_wire(m);
364226633Sdim		PAGE_WAKEUP(m);
365226633Sdim		pmap_enter(kernel_pmap, addr + i, VM_PAGE_TO_PHYS(m),
366226633Sdim			VM_PROT_ALL, 1);
367226633Sdim		m->flags |= PG_MAPPED|PG_WRITEABLE;
368226633Sdim	}
369226633Sdim	vm_map_unlock(map);
370226633Sdim
371202878Srdivacky	return (addr);
372226633Sdim}
373221345Sdim
374221345Sdim/*
375221345Sdim *	kmem_alloc_wait
376221345Sdim *
377221345Sdim *	Allocates pageable memory from a sub-map of the kernel.  If the submap
378202878Srdivacky *	has no room, the caller sleeps waiting for more memory in the submap.
379226633Sdim *
380226633Sdim */
381226633Sdimvm_offset_t
382226633Sdimkmem_alloc_wait(map, size)
383226633Sdim	vm_map_t map;
384226633Sdim	vm_size_t size;
385226633Sdim{
386226633Sdim	vm_offset_t addr;
387202878Srdivacky
388202878Srdivacky	size = round_page(size);
389202878Srdivacky
390202878Srdivacky	for (;;) {
391202878Srdivacky		/*
392218893Sdim		 * To make this work for more than one map, use the map's lock
393202878Srdivacky		 * to lock out sleepers/wakers.
394202878Srdivacky		 */
395202878Srdivacky		vm_map_lock(map);
396202878Srdivacky		if (vm_map_findspace(map, 0, size, &addr) == 0)
397202878Srdivacky			break;
398202878Srdivacky		/* no space now; see if we can ever get space */
399202878Srdivacky		if (vm_map_max(map) - vm_map_min(map) < size) {
400202878Srdivacky			vm_map_unlock(map);
401202878Srdivacky			return (0);
402221345Sdim		}
403226633Sdim		vm_map_unlock(map);
404221345Sdim		tsleep(map, PVM, "kmaw", 0);
405202878Srdivacky	}
406202878Srdivacky	vm_map_insert(map, NULL, (vm_offset_t) 0, addr, addr + size, VM_PROT_ALL, VM_PROT_ALL, 0);
407202878Srdivacky	vm_map_unlock(map);
408202878Srdivacky	return (addr);
409202878Srdivacky}
410202878Srdivacky
411202878Srdivacky/*
412202878Srdivacky *	kmem_free_wakeup
413202878Srdivacky *
414202878Srdivacky *	Returns memory to a submap of the kernel, and wakes up any processes
415202878Srdivacky *	waiting for memory in that map.
416202878Srdivacky */
417202878Srdivackyvoid
418208599Srdivackykmem_free_wakeup(map, addr, size)
419223017Sdim	vm_map_t map;
420218893Sdim	vm_offset_t addr;
421202878Srdivacky	vm_size_t size;
422202878Srdivacky{
423202878Srdivacky	vm_map_lock(map);
424218893Sdim	(void) vm_map_delete(map, trunc_page(addr), round_page(addr + size));
425202878Srdivacky	wakeup(map);
426202878Srdivacky	vm_map_unlock(map);
427202878Srdivacky}
428202878Srdivacky
429218893Sdim/*
430202878Srdivacky * Create the kernel map; insert a mapping covering kernel text, data, bss,
431202878Srdivacky * and all space allocated thus far (`boostrap' data).  The new map will thus
432202878Srdivacky * map the range between VM_MIN_KERNEL_ADDRESS and `start' as allocated, and
433218893Sdim * the range between `start' and `end' as free.
434202878Srdivacky */
435202878Srdivackyvoid
436202878Srdivackykmem_init(start, end)
437202878Srdivacky	vm_offset_t start, end;
438202878Srdivacky{
439202878Srdivacky	register vm_map_t m;
440218893Sdim
441202878Srdivacky	m = vm_map_create(kernel_pmap, VM_MIN_KERNEL_ADDRESS, end);
442202878Srdivacky	vm_map_lock(m);
443202878Srdivacky	/* N.B.: cannot use kgdb to debug, starting with this assignment ... */
444202878Srdivacky	kernel_map = m;
445202878Srdivacky	kernel_map->system_map = 1;
446202878Srdivacky	(void) vm_map_insert(m, NULL, (vm_offset_t) 0,
447218893Sdim	    VM_MIN_KERNEL_ADDRESS, start, VM_PROT_ALL, VM_PROT_ALL, 0);
448202878Srdivacky	/* ... and ending with the completion of the above `insert' */
449218893Sdim	vm_map_unlock(m);
450202878Srdivacky}
451218893Sdim