vm_kern.c revision 16892
1193323Sed/*
2193323Sed * Copyright (c) 1991, 1993
3193323Sed *	The Regents of the University of California.  All rights reserved.
4193323Sed *
5193323Sed * This code is derived from software contributed to Berkeley by
6193323Sed * The Mach Operating System project at Carnegie-Mellon University.
7193323Sed *
8193323Sed * Redistribution and use in source and binary forms, with or without
9193323Sed * modification, are permitted provided that the following conditions
10218893Sdim * are met:
11218893Sdim * 1. Redistributions of source code must retain the above copyright
12218893Sdim *    notice, this list of conditions and the following disclaimer.
13193323Sed * 2. Redistributions in binary form must reproduce the above copyright
14193323Sed *    notice, this list of conditions and the following disclaimer in the
15193323Sed *    documentation and/or other materials provided with the distribution.
16193323Sed * 3. All advertising materials mentioning features or use of this software
17218893Sdim *    must display the following acknowledgement:
18218893Sdim *	This product includes software developed by the University of
19193323Sed *	California, Berkeley and its contributors.
20221345Sdim * 4. Neither the name of the University nor the names of its contributors
21205218Srdivacky *    may be used to endorse or promote products derived from this software
22205218Srdivacky *    without specific prior written permission.
23193323Sed *
24193323Sed * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
25193323Sed * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
26193323Sed * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
27218893Sdim * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
28218893Sdim * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
29218893Sdim * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
30218893Sdim * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
31218893Sdim * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
32218893Sdim * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
33218893Sdim * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
34218893Sdim * SUCH DAMAGE.
35218893Sdim *
36218893Sdim *	from: @(#)vm_kern.c	8.3 (Berkeley) 1/12/94
37218893Sdim *
38218893Sdim *
39218893Sdim * Copyright (c) 1987, 1990 Carnegie-Mellon University.
40218893Sdim * All rights reserved.
41218893Sdim *
42218893Sdim * Authors: Avadis Tevanian, Jr., Michael Wayne Young
43218893Sdim *
44218893Sdim * Permission to use, copy, modify and distribute this software and
45218893Sdim * its documentation is hereby granted, provided that both the copyright
46218893Sdim * notice and this permission notice appear in all copies of the
47218893Sdim * software, derivative works or modified versions, and any portions
48218893Sdim * thereof, and that both notices appear in supporting documentation.
49218893Sdim *
50218893Sdim * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
51224145Sdim * CONDITION.  CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND
52218893Sdim * FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
53224145Sdim *
54224145Sdim * Carnegie Mellon requests users of this software to return to
55224145Sdim *
56224145Sdim *  Software Distribution Coordinator  or  Software.Distribution@CS.CMU.EDU
57218893Sdim *  School of Computer Science
58218893Sdim *  Carnegie Mellon University
59218893Sdim *  Pittsburgh PA 15213-3890
60218893Sdim *
61218893Sdim * any improvements or extensions that they make and grant Carnegie the
62218893Sdim * rights to redistribute these changes.
63218893Sdim *
64218893Sdim * $Id: vm_kern.c,v 1.26 1996/05/23 02:24:55 dyson Exp $
65218893Sdim */
66218893Sdim
67218893Sdim/*
68218893Sdim *	Kernel memory management.
69218893Sdim */
70218893Sdim
71218893Sdim#include <sys/param.h>
72218893Sdim#include <sys/systm.h>
73218893Sdim#include <sys/kernel.h>
74193323Sed#include <sys/proc.h>
75193323Sed#include <sys/malloc.h>
76193323Sed#include <sys/syslog.h>
77193323Sed#include <sys/queue.h>
78193323Sed#include <sys/vmmeter.h>
79193323Sed
80193323Sed#include <vm/vm.h>
81193323Sed#include <vm/vm_param.h>
82193323Sed#include <vm/vm_prot.h>
83193323Sed#include <vm/lock.h>
84193323Sed#include <vm/pmap.h>
85193323Sed#include <vm/vm_map.h>
86193323Sed#include <vm/vm_object.h>
87193323Sed#include <vm/vm_page.h>
88193323Sed#include <vm/vm_pageout.h>
89193323Sed#include <vm/vm_kern.h>
90193323Sed#include <vm/vm_extern.h>
91193323Sed
92193323Sedvm_map_t buffer_map;
93193323Sedvm_map_t kernel_map;
94193323Sedvm_map_t kmem_map;
95193323Sedvm_map_t mb_map;
96193323Sedint mb_map_full;
97193323Sedvm_map_t mcl_map;
98193323Sedint mcl_map_full;
99193323Sedvm_map_t io_map;
100193323Sedvm_map_t clean_map;
101193323Sedvm_map_t phys_map;
102193323Sedvm_map_t exec_map;
103193323Sedvm_map_t exech_map;
104193323Sedvm_map_t u_map;
105193323Sed
106193323Sed/*
107193323Sed *	kmem_alloc_pageable:
108193323Sed *
109193323Sed *	Allocate pageable memory to the kernel's address map.
110221345Sdim *	"map" must be kernel_map or a submap of kernel_map.
111221345Sdim */
112221345Sdim
113221345Sdimvm_offset_t
114193323Sedkmem_alloc_pageable(map, size)
115193323Sed	vm_map_t map;
116193323Sed	register vm_size_t size;
117193323Sed{
118193323Sed	vm_offset_t addr;
119193323Sed	register int result;
120193323Sed
121193323Sed	size = round_page(size);
122193323Sed	addr = vm_map_min(map);
123193323Sed	result = vm_map_find(map, NULL, (vm_offset_t) 0,
124193323Sed	    &addr, size, TRUE, VM_PROT_ALL, VM_PROT_ALL, 0);
125193323Sed	if (result != KERN_SUCCESS) {
126193323Sed		return (0);
127193323Sed	}
128193323Sed	return (addr);
129193323Sed}
130193323Sed
131193323Sed/*
132193323Sed *	Allocate wired-down memory in the kernel's address map
133193323Sed *	or a submap.
134193323Sed */
135193323Sedvm_offset_t
136193323Sedkmem_alloc(map, size)
137193323Sed	register vm_map_t map;
138193323Sed	register vm_size_t size;
139193323Sed{
140193323Sed	vm_offset_t addr;
141193323Sed	register vm_offset_t offset;
142193323Sed	vm_offset_t i;
143193323Sed
144193323Sed	size = round_page(size);
145193323Sed
146221345Sdim	/*
147221345Sdim	 * Use the kernel object for wired-down kernel pages. Assume that no
148221345Sdim	 * region of the kernel object is referenced more than once.
149221345Sdim	 */
150205218Srdivacky
151205218Srdivacky	/*
152205218Srdivacky	 * Locate sufficient space in the map.  This will give us the final
153205218Srdivacky	 * virtual address for the new memory, and thus will tell us the
154205218Srdivacky	 * offset within the kernel map.
155193323Sed	 */
156193323Sed	vm_map_lock(map);
157193323Sed	if (vm_map_findspace(map, 0, size, &addr)) {
158193323Sed		vm_map_unlock(map);
159193323Sed		return (0);
160193323Sed	}
161193323Sed	offset = addr - VM_MIN_KERNEL_ADDRESS;
162193323Sed	vm_object_reference(kernel_object);
163193323Sed	vm_map_insert(map, kernel_object, offset, addr, addr + size,
164193323Sed		VM_PROT_ALL, VM_PROT_ALL, 0);
165193323Sed	vm_map_unlock(map);
166193323Sed
167193323Sed	/*
168193323Sed	 * Guarantee that there are pages already in this object before
169193323Sed	 * calling vm_map_pageable.  This is to prevent the following
170205218Srdivacky	 * scenario:
171205218Srdivacky	 *
172205218Srdivacky	 * 1) Threads have swapped out, so that there is a pager for the
173205218Srdivacky	 * kernel_object. 2) The kmsg zone is empty, and so we are
174221345Sdim	 * kmem_allocing a new page for it. 3) vm_map_pageable calls vm_fault;
175221345Sdim	 * there is no page, but there is a pager, so we call
176221345Sdim	 * pager_data_request.  But the kmsg zone is empty, so we must
177221345Sdim	 * kmem_alloc. 4) goto 1 5) Even if the kmsg zone is not empty: when
178221345Sdim	 * we get the data back from the pager, it will be (very stale)
179221345Sdim	 * non-zero data.  kmem_alloc is defined to return zero-filled memory.
180221345Sdim	 *
181221345Sdim	 * We're intentionally not activating the pages we allocate to prevent a
182221345Sdim	 * race with page-out.  vm_map_pageable will wire the pages.
183221345Sdim	 */
184221345Sdim
185221345Sdim	for (i = 0; i < size; i += PAGE_SIZE) {
186221345Sdim		vm_page_t mem;
187221345Sdim
188221345Sdim		while ((mem = vm_page_alloc(kernel_object,
189221345Sdim			OFF_TO_IDX(offset + i), VM_ALLOC_ZERO)) == NULL) {
190			VM_WAIT;
191		}
192		if ((mem->flags & PG_ZERO) == 0)
193			vm_page_zero_fill(mem);
194		mem->flags &= ~(PG_BUSY|PG_ZERO);
195		mem->valid = VM_PAGE_BITS_ALL;
196	}
197
198	/*
199	 * And finally, mark the data as non-pageable.
200	 */
201
202	(void) vm_map_pageable(map, (vm_offset_t) addr, addr + size, FALSE);
203
204	/*
205	 * Try to coalesce the map
206	 */
207	vm_map_simplify(map, addr);
208
209	return (addr);
210}
211
212/*
213 *	kmem_free:
214 *
215 *	Release a region of kernel virtual memory allocated
216 *	with kmem_alloc, and return the physical pages
217 *	associated with that region.
218 */
219void
220kmem_free(map, addr, size)
221	vm_map_t map;
222	register vm_offset_t addr;
223	vm_size_t size;
224{
225	(void) vm_map_remove(map, trunc_page(addr), round_page(addr + size));
226}
227
228/*
229 *	kmem_suballoc:
230 *
231 *	Allocates a map to manage a subrange
232 *	of the kernel virtual address space.
233 *
234 *	Arguments are as follows:
235 *
236 *	parent		Map to take range from
237 *	size		Size of range to find
238 *	min, max	Returned endpoints of map
239 *	pageable	Can the region be paged
240 */
241vm_map_t
242kmem_suballoc(parent, min, max, size, pageable)
243	register vm_map_t parent;
244	vm_offset_t *min, *max;
245	register vm_size_t size;
246	boolean_t pageable;
247{
248	register int ret;
249	vm_map_t result;
250
251	size = round_page(size);
252
253	*min = (vm_offset_t) vm_map_min(parent);
254	ret = vm_map_find(parent, NULL, (vm_offset_t) 0,
255	    min, size, TRUE, VM_PROT_ALL, VM_PROT_ALL, 0);
256	if (ret != KERN_SUCCESS) {
257		printf("kmem_suballoc: bad status return of %d.\n", ret);
258		panic("kmem_suballoc");
259	}
260	*max = *min + size;
261	pmap_reference(vm_map_pmap(parent));
262	result = vm_map_create(vm_map_pmap(parent), *min, *max, pageable);
263	if (result == NULL)
264		panic("kmem_suballoc: cannot create submap");
265	if ((ret = vm_map_submap(parent, *min, *max, result)) != KERN_SUCCESS)
266		panic("kmem_suballoc: unable to change range to submap");
267	return (result);
268}
269
270/*
271 * Allocate wired-down memory in the kernel's address map for the higher
272 * level kernel memory allocator (kern/kern_malloc.c).  We cannot use
273 * kmem_alloc() because we may need to allocate memory at interrupt
274 * level where we cannot block (canwait == FALSE).
275 *
276 * This routine has its own private kernel submap (kmem_map) and object
277 * (kmem_object).  This, combined with the fact that only malloc uses
278 * this routine, ensures that we will never block in map or object waits.
279 *
280 * Note that this still only works in a uni-processor environment and
281 * when called at splhigh().
282 *
283 * We don't worry about expanding the map (adding entries) since entries
284 * for wired maps are statically allocated.
285 */
286vm_offset_t
287kmem_malloc(map, size, waitflag)
288	register vm_map_t map;
289	register vm_size_t size;
290	boolean_t waitflag;
291{
292	register vm_offset_t offset, i;
293	vm_map_entry_t entry;
294	vm_offset_t addr;
295	vm_page_t m;
296
297	if (map != kmem_map && map != mb_map && map != mcl_map)
298		panic("kmem_malloc: map != {kmem,mb,mcl}_map");
299
300	size = round_page(size);
301	addr = vm_map_min(map);
302
303	/*
304	 * Locate sufficient space in the map.  This will give us the final
305	 * virtual address for the new memory, and thus will tell us the
306	 * offset within the kernel map.
307	 */
308	vm_map_lock(map);
309	if (vm_map_findspace(map, 0, size, &addr)) {
310		vm_map_unlock(map);
311		if (map == mb_map) {
312			mb_map_full = TRUE;
313			log(LOG_ERR, "Out of mbufs - increase maxusers!\n");
314			return (0);
315		}
316		if (map == mcl_map) {
317			mcl_map_full = TRUE;
318			log(LOG_ERR,
319			    "Out of mbuf clusters - increase maxusers!\n");
320			return (0);
321		}
322		if (waitflag == M_WAITOK)
323			panic("kmem_malloc: kmem_map too small");
324		return (0);
325	}
326	offset = addr - VM_MIN_KERNEL_ADDRESS;
327	vm_object_reference(kmem_object);
328	vm_map_insert(map, kmem_object, offset, addr, addr + size,
329		VM_PROT_ALL, VM_PROT_ALL, 0);
330
331	for (i = 0; i < size; i += PAGE_SIZE) {
332retry:
333		m = vm_page_alloc(kmem_object, OFF_TO_IDX(offset + i),
334			(waitflag == M_NOWAIT) ? VM_ALLOC_INTERRUPT : VM_ALLOC_SYSTEM);
335
336		/*
337		 * Ran out of space, free everything up and return. Don't need
338		 * to lock page queues here as we know that the pages we got
339		 * aren't on any queues.
340		 */
341		if (m == NULL) {
342			if (waitflag == M_WAITOK) {
343				VM_WAIT;
344				goto retry;
345			}
346			while (i != 0) {
347				i -= PAGE_SIZE;
348				m = vm_page_lookup(kmem_object,
349					OFF_TO_IDX(offset + i));
350				PAGE_WAKEUP(m);
351				vm_page_free(m);
352			}
353			vm_map_delete(map, addr, addr + size);
354			vm_map_unlock(map);
355			return (0);
356		}
357		m->flags &= ~PG_ZERO;
358		m->valid = VM_PAGE_BITS_ALL;
359	}
360
361	/*
362	 * Mark map entry as non-pageable. Assert: vm_map_insert() will never
363	 * be able to extend the previous entry so there will be a new entry
364	 * exactly corresponding to this address range and it will have
365	 * wired_count == 0.
366	 */
367	if (!vm_map_lookup_entry(map, addr, &entry) ||
368	    entry->start != addr || entry->end != addr + size ||
369	    entry->wired_count)
370		panic("kmem_malloc: entry not found or misaligned");
371	entry->wired_count++;
372
373	/*
374	 * Loop thru pages, entering them in the pmap. (We cannot add them to
375	 * the wired count without wrapping the vm_page_queue_lock in
376	 * splimp...)
377	 */
378	for (i = 0; i < size; i += PAGE_SIZE) {
379		m = vm_page_lookup(kmem_object, OFF_TO_IDX(offset + i));
380		vm_page_wire(m);
381		PAGE_WAKEUP(m);
382		pmap_enter(kernel_pmap, addr + i, VM_PAGE_TO_PHYS(m),
383			VM_PROT_ALL, 1);
384		m->flags |= PG_MAPPED|PG_WRITEABLE;
385	}
386	vm_map_unlock(map);
387
388	vm_map_simplify(map, addr);
389	return (addr);
390}
391
392/*
393 *	kmem_alloc_wait
394 *
395 *	Allocates pageable memory from a sub-map of the kernel.  If the submap
396 *	has no room, the caller sleeps waiting for more memory in the submap.
397 *
398 */
399vm_offset_t
400kmem_alloc_wait(map, size)
401	vm_map_t map;
402	vm_size_t size;
403{
404	vm_offset_t addr;
405
406	size = round_page(size);
407
408	for (;;) {
409		/*
410		 * To make this work for more than one map, use the map's lock
411		 * to lock out sleepers/wakers.
412		 */
413		vm_map_lock(map);
414		if (vm_map_findspace(map, 0, size, &addr) == 0)
415			break;
416		/* no space now; see if we can ever get space */
417		if (vm_map_max(map) - vm_map_min(map) < size) {
418			vm_map_unlock(map);
419			return (0);
420		}
421		vm_map_unlock(map);
422		tsleep(map, PVM, "kmaw", 0);
423	}
424	vm_map_insert(map, NULL, (vm_offset_t) 0, addr, addr + size, VM_PROT_ALL, VM_PROT_ALL, 0);
425	vm_map_unlock(map);
426	return (addr);
427}
428
429/*
430 *	kmem_free_wakeup
431 *
432 *	Returns memory to a submap of the kernel, and wakes up any processes
433 *	waiting for memory in that map.
434 */
435void
436kmem_free_wakeup(map, addr, size)
437	vm_map_t map;
438	vm_offset_t addr;
439	vm_size_t size;
440{
441	vm_map_lock(map);
442	(void) vm_map_delete(map, trunc_page(addr), round_page(addr + size));
443	wakeup(map);
444	vm_map_unlock(map);
445}
446
447/*
448 * Create the kernel map; insert a mapping covering kernel text, data, bss,
449 * and all space allocated thus far (`boostrap' data).  The new map will thus
450 * map the range between VM_MIN_KERNEL_ADDRESS and `start' as allocated, and
451 * the range between `start' and `end' as free.
452 */
453void
454kmem_init(start, end)
455	vm_offset_t start, end;
456{
457	register vm_map_t m;
458
459	m = vm_map_create(kernel_pmap, VM_MIN_KERNEL_ADDRESS, end, FALSE);
460	vm_map_lock(m);
461	/* N.B.: cannot use kgdb to debug, starting with this assignment ... */
462	kernel_map = m;
463	(void) vm_map_insert(m, NULL, (vm_offset_t) 0,
464	    VM_MIN_KERNEL_ADDRESS, start, VM_PROT_ALL, VM_PROT_ALL, 0);
465	/* ... and ending with the completion of the above `insert' */
466	vm_map_unlock(m);
467}
468