vm_kern.c revision 1549
1/*
2 * Copyright (c) 1991, 1993
3 *	The Regents of the University of California.  All rights reserved.
4 *
5 * This code is derived from software contributed to Berkeley by
6 * The Mach Operating System project at Carnegie-Mellon University.
7 *
8 * Redistribution and use in source and binary forms, with or without
9 * modification, are permitted provided that the following conditions
10 * are met:
11 * 1. Redistributions of source code must retain the above copyright
12 *    notice, this list of conditions and the following disclaimer.
13 * 2. Redistributions in binary form must reproduce the above copyright
14 *    notice, this list of conditions and the following disclaimer in the
15 *    documentation and/or other materials provided with the distribution.
16 * 3. All advertising materials mentioning features or use of this software
17 *    must display the following acknowledgement:
18 *	This product includes software developed by the University of
19 *	California, Berkeley and its contributors.
20 * 4. Neither the name of the University nor the names of its contributors
21 *    may be used to endorse or promote products derived from this software
22 *    without specific prior written permission.
23 *
24 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
25 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
26 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
27 * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
28 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
29 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
30 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
31 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
32 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
33 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
34 * SUCH DAMAGE.
35 *
36 *	@(#)vm_kern.c	8.3 (Berkeley) 1/12/94
37 *
38 *
39 * Copyright (c) 1987, 1990 Carnegie-Mellon University.
40 * All rights reserved.
41 *
42 * Authors: Avadis Tevanian, Jr., Michael Wayne Young
43 *
44 * Permission to use, copy, modify and distribute this software and
45 * its documentation is hereby granted, provided that both the copyright
46 * notice and this permission notice appear in all copies of the
47 * software, derivative works or modified versions, and any portions
48 * thereof, and that both notices appear in supporting documentation.
49 *
50 * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
51 * CONDITION.  CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND
52 * FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
53 *
54 * Carnegie Mellon requests users of this software to return to
55 *
56 *  Software Distribution Coordinator  or  Software.Distribution@CS.CMU.EDU
57 *  School of Computer Science
58 *  Carnegie Mellon University
59 *  Pittsburgh PA 15213-3890
60 *
61 * any improvements or extensions that they make and grant Carnegie the
62 * rights to redistribute these changes.
63 */
64
65/*
66 *	Kernel memory management.
67 */
68
69#include <sys/param.h>
70#include <sys/systm.h>
71
72#include <vm/vm.h>
73#include <vm/vm_page.h>
74#include <vm/vm_pageout.h>
75#include <vm/vm_kern.h>
76
77/*
78 *	kmem_alloc_pageable:
79 *
80 *	Allocate pageable memory to the kernel's address map.
81 *	map must be "kernel_map" below.
82 */
83
84vm_offset_t kmem_alloc_pageable(map, size)
85	vm_map_t		map;
86	register vm_size_t	size;
87{
88	vm_offset_t		addr;
89	register int		result;
90
91#if	0
92	if (map != kernel_map)
93		panic("kmem_alloc_pageable: not called with kernel_map");
94#endif
95
96	size = round_page(size);
97
98	addr = vm_map_min(map);
99	result = vm_map_find(map, NULL, (vm_offset_t) 0,
100				&addr, size, TRUE);
101	if (result != KERN_SUCCESS) {
102		return(0);
103	}
104
105	return(addr);
106}
107
108/*
109 *	Allocate wired-down memory in the kernel's address map
110 *	or a submap.
111 */
112vm_offset_t kmem_alloc(map, size)
113	register vm_map_t	map;
114	register vm_size_t	size;
115{
116	vm_offset_t		addr;
117	register vm_offset_t	offset;
118	extern vm_object_t	kernel_object;
119	vm_offset_t		i;
120
121	size = round_page(size);
122
123	/*
124	 *	Use the kernel object for wired-down kernel pages.
125	 *	Assume that no region of the kernel object is
126	 *	referenced more than once.
127	 */
128
129	/*
130	 * Locate sufficient space in the map.  This will give us the
131	 * final virtual address for the new memory, and thus will tell
132	 * us the offset within the kernel map.
133	 */
134	vm_map_lock(map);
135	if (vm_map_findspace(map, 0, size, &addr)) {
136		vm_map_unlock(map);
137		return (0);
138	}
139	offset = addr - VM_MIN_KERNEL_ADDRESS;
140	vm_object_reference(kernel_object);
141	vm_map_insert(map, kernel_object, offset, addr, addr + size);
142	vm_map_unlock(map);
143
144	/*
145	 *	Guarantee that there are pages already in this object
146	 *	before calling vm_map_pageable.  This is to prevent the
147	 *	following scenario:
148	 *
149	 *		1) Threads have swapped out, so that there is a
150	 *		   pager for the kernel_object.
151	 *		2) The kmsg zone is empty, and so we are kmem_allocing
152	 *		   a new page for it.
153	 *		3) vm_map_pageable calls vm_fault; there is no page,
154	 *		   but there is a pager, so we call
155	 *		   pager_data_request.  But the kmsg zone is empty,
156	 *		   so we must kmem_alloc.
157	 *		4) goto 1
158	 *		5) Even if the kmsg zone is not empty: when we get
159	 *		   the data back from the pager, it will be (very
160	 *		   stale) non-zero data.  kmem_alloc is defined to
161	 *		   return zero-filled memory.
162	 *
163	 *	We're intentionally not activating the pages we allocate
164	 *	to prevent a race with page-out.  vm_map_pageable will wire
165	 *	the pages.
166	 */
167
168	vm_object_lock(kernel_object);
169	for (i = 0 ; i < size; i+= PAGE_SIZE) {
170		vm_page_t	mem;
171
172		while ((mem = vm_page_alloc(kernel_object, offset+i)) == NULL) {
173			vm_object_unlock(kernel_object);
174			VM_WAIT;
175			vm_object_lock(kernel_object);
176		}
177		vm_page_zero_fill(mem);
178		mem->flags &= ~PG_BUSY;
179	}
180	vm_object_unlock(kernel_object);
181
182	/*
183	 *	And finally, mark the data as non-pageable.
184	 */
185
186	(void) vm_map_pageable(map, (vm_offset_t) addr, addr + size, FALSE);
187
188	/*
189	 *	Try to coalesce the map
190	 */
191
192	vm_map_simplify(map, addr);
193
194	return(addr);
195}
196
197/*
198 *	kmem_free:
199 *
200 *	Release a region of kernel virtual memory allocated
201 *	with kmem_alloc, and return the physical pages
202 *	associated with that region.
203 */
204void kmem_free(map, addr, size)
205	vm_map_t		map;
206	register vm_offset_t	addr;
207	vm_size_t		size;
208{
209	(void) vm_map_remove(map, trunc_page(addr), round_page(addr + size));
210}
211
212/*
213 *	kmem_suballoc:
214 *
215 *	Allocates a map to manage a subrange
216 *	of the kernel virtual address space.
217 *
218 *	Arguments are as follows:
219 *
220 *	parent		Map to take range from
221 *	size		Size of range to find
222 *	min, max	Returned endpoints of map
223 *	pageable	Can the region be paged
224 */
225vm_map_t kmem_suballoc(parent, min, max, size, pageable)
226	register vm_map_t	parent;
227	vm_offset_t		*min, *max;
228	register vm_size_t	size;
229	boolean_t		pageable;
230{
231	register int	ret;
232	vm_map_t	result;
233
234	size = round_page(size);
235
236	*min = (vm_offset_t) vm_map_min(parent);
237	ret = vm_map_find(parent, NULL, (vm_offset_t) 0,
238				min, size, TRUE);
239	if (ret != KERN_SUCCESS) {
240		printf("kmem_suballoc: bad status return of %d.\n", ret);
241		panic("kmem_suballoc");
242	}
243	*max = *min + size;
244	pmap_reference(vm_map_pmap(parent));
245	result = vm_map_create(vm_map_pmap(parent), *min, *max, pageable);
246	if (result == NULL)
247		panic("kmem_suballoc: cannot create submap");
248	if ((ret = vm_map_submap(parent, *min, *max, result)) != KERN_SUCCESS)
249		panic("kmem_suballoc: unable to change range to submap");
250	return(result);
251}
252
253/*
254 * Allocate wired-down memory in the kernel's address map for the higher
255 * level kernel memory allocator (kern/kern_malloc.c).  We cannot use
256 * kmem_alloc() because we may need to allocate memory at interrupt
257 * level where we cannot block (canwait == FALSE).
258 *
259 * This routine has its own private kernel submap (kmem_map) and object
260 * (kmem_object).  This, combined with the fact that only malloc uses
261 * this routine, ensures that we will never block in map or object waits.
262 *
263 * Note that this still only works in a uni-processor environment and
264 * when called at splhigh().
265 *
266 * We don't worry about expanding the map (adding entries) since entries
267 * for wired maps are statically allocated.
268 */
269vm_offset_t
270kmem_malloc(map, size, canwait)
271	register vm_map_t	map;
272	register vm_size_t	size;
273	boolean_t		canwait;
274{
275	register vm_offset_t	offset, i;
276	vm_map_entry_t		entry;
277	vm_offset_t		addr;
278	vm_page_t		m;
279	extern vm_object_t	kmem_object;
280
281	if (map != kmem_map && map != mb_map)
282		panic("kern_malloc_alloc: map != {kmem,mb}_map");
283
284	size = round_page(size);
285	addr = vm_map_min(map);
286
287	/*
288	 * Locate sufficient space in the map.  This will give us the
289	 * final virtual address for the new memory, and thus will tell
290	 * us the offset within the kernel map.
291	 */
292	vm_map_lock(map);
293	if (vm_map_findspace(map, 0, size, &addr)) {
294		vm_map_unlock(map);
295#if 0
296		if (canwait)		/* XXX  should wait */
297			panic("kmem_malloc: %s too small",
298			    map == kmem_map ? "kmem_map" : "mb_map");
299#endif
300		if (canwait)
301			panic("kmem_malloc: map too small");
302		return (0);
303	}
304	offset = addr - vm_map_min(kmem_map);
305	vm_object_reference(kmem_object);
306	vm_map_insert(map, kmem_object, offset, addr, addr + size);
307
308	/*
309	 * If we can wait, just mark the range as wired
310	 * (will fault pages as necessary).
311	 */
312	if (canwait) {
313		vm_map_unlock(map);
314		(void) vm_map_pageable(map, (vm_offset_t) addr, addr + size,
315				       FALSE);
316		vm_map_simplify(map, addr);
317		return(addr);
318	}
319
320	/*
321	 * If we cannot wait then we must allocate all memory up front,
322	 * pulling it off the active queue to prevent pageout.
323	 */
324	vm_object_lock(kmem_object);
325	for (i = 0; i < size; i += PAGE_SIZE) {
326		m = vm_page_alloc(kmem_object, offset + i);
327
328		/*
329		 * Ran out of space, free everything up and return.
330		 * Don't need to lock page queues here as we know
331		 * that the pages we got aren't on any queues.
332		 */
333		if (m == NULL) {
334			while (i != 0) {
335				i -= PAGE_SIZE;
336				m = vm_page_lookup(kmem_object, offset + i);
337				vm_page_free(m);
338			}
339			vm_object_unlock(kmem_object);
340			vm_map_delete(map, addr, addr + size);
341			vm_map_unlock(map);
342			return(0);
343		}
344#if 0
345		vm_page_zero_fill(m);
346#endif
347		m->flags &= ~PG_BUSY;
348	}
349	vm_object_unlock(kmem_object);
350
351	/*
352	 * Mark map entry as non-pageable.
353	 * Assert: vm_map_insert() will never be able to extend the previous
354	 * entry so there will be a new entry exactly corresponding to this
355	 * address range and it will have wired_count == 0.
356	 */
357	if (!vm_map_lookup_entry(map, addr, &entry) ||
358	    entry->start != addr || entry->end != addr + size ||
359	    entry->wired_count)
360		panic("kmem_malloc: entry not found or misaligned");
361	entry->wired_count++;
362
363	/*
364	 * Loop thru pages, entering them in the pmap.
365	 * (We cannot add them to the wired count without
366	 * wrapping the vm_page_queue_lock in splimp...)
367	 */
368	for (i = 0; i < size; i += PAGE_SIZE) {
369		vm_object_lock(kmem_object);
370		m = vm_page_lookup(kmem_object, offset + i);
371		vm_object_unlock(kmem_object);
372		pmap_enter(map->pmap, addr + i, VM_PAGE_TO_PHYS(m),
373			   VM_PROT_DEFAULT, TRUE);
374	}
375	vm_map_unlock(map);
376
377	vm_map_simplify(map, addr);
378	return(addr);
379}
380
381/*
382 *	kmem_alloc_wait
383 *
384 *	Allocates pageable memory from a sub-map of the kernel.  If the submap
385 *	has no room, the caller sleeps waiting for more memory in the submap.
386 *
387 */
388vm_offset_t kmem_alloc_wait(map, size)
389	vm_map_t	map;
390	vm_size_t	size;
391{
392	vm_offset_t	addr;
393
394	size = round_page(size);
395
396	for (;;) {
397		/*
398		 * To make this work for more than one map,
399		 * use the map's lock to lock out sleepers/wakers.
400		 */
401		vm_map_lock(map);
402		if (vm_map_findspace(map, 0, size, &addr) == 0)
403			break;
404		/* no space now; see if we can ever get space */
405		if (vm_map_max(map) - vm_map_min(map) < size) {
406			vm_map_unlock(map);
407			return (0);
408		}
409		assert_wait((int)map, TRUE);
410		vm_map_unlock(map);
411		thread_block("kmaw");
412	}
413	vm_map_insert(map, NULL, (vm_offset_t)0, addr, addr + size);
414	vm_map_unlock(map);
415	return (addr);
416}
417
418/*
419 *	kmem_free_wakeup
420 *
421 *	Returns memory to a submap of the kernel, and wakes up any threads
422 *	waiting for memory in that map.
423 */
424void	kmem_free_wakeup(map, addr, size)
425	vm_map_t	map;
426	vm_offset_t	addr;
427	vm_size_t	size;
428{
429	vm_map_lock(map);
430	(void) vm_map_delete(map, trunc_page(addr), round_page(addr + size));
431	thread_wakeup((int)map);
432	vm_map_unlock(map);
433}
434
435/*
436 * Create the kernel map; insert a mapping covering kernel text, data, bss,
437 * and all space allocated thus far (`boostrap' data).  The new map will thus
438 * map the range between VM_MIN_KERNEL_ADDRESS and `start' as allocated, and
439 * the range between `start' and `end' as free.
440 */
441void kmem_init(start, end)
442	vm_offset_t start, end;
443{
444	register vm_map_t m;
445
446	m = vm_map_create(kernel_pmap, VM_MIN_KERNEL_ADDRESS, end, FALSE);
447	vm_map_lock(m);
448	/* N.B.: cannot use kgdb to debug, starting with this assignment ... */
449	kernel_map = m;
450	(void) vm_map_insert(m, NULL, (vm_offset_t)0,
451	    VM_MIN_KERNEL_ADDRESS, start);
452	/* ... and ending with the completion of the above `insert' */
453	vm_map_unlock(m);
454}
455