kern_malloc.c revision 92654
1/*
2 * Copyright (c) 1987, 1991, 1993
3 *	The Regents of the University of California.  All rights reserved.
4 *
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions
7 * are met:
8 * 1. Redistributions of source code must retain the above copyright
9 *    notice, this list of conditions and the following disclaimer.
10 * 2. Redistributions in binary form must reproduce the above copyright
11 *    notice, this list of conditions and the following disclaimer in the
12 *    documentation and/or other materials provided with the distribution.
13 * 3. All advertising materials mentioning features or use of this software
14 *    must display the following acknowledgement:
15 *	This product includes software developed by the University of
16 *	California, Berkeley and its contributors.
17 * 4. Neither the name of the University nor the names of its contributors
18 *    may be used to endorse or promote products derived from this software
19 *    without specific prior written permission.
20 *
21 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
22 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
23 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
24 * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
25 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
26 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
27 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
28 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
29 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
30 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
31 * SUCH DAMAGE.
32 *
33 *	@(#)kern_malloc.c	8.3 (Berkeley) 1/4/94
34 * $FreeBSD: head/sys/kern/kern_malloc.c 92654 2002-03-19 09:11:49Z jeff $
35 */
36
37#include "opt_vm.h"
38
39#include <sys/param.h>
40#include <sys/systm.h>
41#include <sys/kernel.h>
42#include <sys/lock.h>
43#include <sys/malloc.h>
44#include <sys/mbuf.h>
45#include <sys/mutex.h>
46#include <sys/vmmeter.h>
47#include <sys/proc.h>
48
49#include <vm/vm.h>
50#include <vm/vm_param.h>
51#include <vm/vm_kern.h>
52#include <vm/vm_extern.h>
53#include <vm/pmap.h>
54#include <vm/vm_map.h>
55#include <vm/uma.h>
56#include <vm/uma_int.h>
57
58#if defined(INVARIANTS) && defined(__i386__)
59#include <machine/cpu.h>
60#endif
61
62/*
63 * When realloc() is called, if the new size is sufficiently smaller than
64 * the old size, realloc() will allocate a new, smaller block to avoid
65 * wasting memory. 'Sufficiently smaller' is defined as: newsize <=
66 * oldsize / 2^n, where REALLOC_FRACTION defines the value of 'n'.
67 */
68#ifndef REALLOC_FRACTION
69#define	REALLOC_FRACTION	1	/* new block if <= half the size */
70#endif
71
72MALLOC_DEFINE(M_CACHE, "cache", "Various Dynamically allocated caches");
73MALLOC_DEFINE(M_DEVBUF, "devbuf", "device driver memory");
74MALLOC_DEFINE(M_TEMP, "temp", "misc temporary data buffers");
75
76MALLOC_DEFINE(M_IP6OPT, "ip6opt", "IPv6 options");
77MALLOC_DEFINE(M_IP6NDP, "ip6ndp", "IPv6 Neighbor Discovery");
78
79static void kmeminit __P((void *));
80SYSINIT(kmem, SI_SUB_KMEM, SI_ORDER_FIRST, kmeminit, NULL)
81
82static MALLOC_DEFINE(M_FREE, "free", "should be on free list");
83
84static struct malloc_type *kmemstatistics;
85static char *kmembase;
86static char *kmemlimit;
87
88#define KMEM_ZSHIFT	4
89#define KMEM_ZBASE	16
90#define KMEM_ZMASK	(KMEM_ZBASE - 1)
91
92#define KMEM_ZMAX	65536
93#define KMEM_ZSIZE	(KMEM_ZMAX >> KMEM_ZSHIFT)
94static uma_zone_t kmemzones[KMEM_ZSIZE + 1];
95
96
97/* These won't be powers of two for long */
98struct {
99	int size;
100	char *name;
101} kmemsizes[] = {
102	{16, "16"},
103	{32, "32"},
104	{64, "64"},
105	{128, "128"},
106	{256, "256"},
107	{512, "512"},
108	{1024, "1024"},
109	{2048, "2048"},
110	{4096, "4096"},
111	{8192, "8192"},
112	{16384, "16384"},
113	{32768, "32768"},
114	{65536, "65536"},
115	{0, NULL},
116};
117
118static struct mtx malloc_mtx;
119
120u_int vm_kmem_size;
121
122/*
123 *	malloc:
124 *
125 *	Allocate a block of memory.
126 *
127 *	If M_NOWAIT is set, this routine will not block and return NULL if
128 *	the allocation fails.
129 */
130void *
131malloc(size, type, flags)
132	unsigned long size;
133	struct malloc_type *type;
134	int flags;
135{
136	int s;
137	long indx;
138	caddr_t va;
139	uma_zone_t zone;
140	register struct malloc_type *ksp = type;
141
142#if defined(INVARIANTS)
143	if (flags == M_WAITOK)
144		KASSERT(curthread->td_intr_nesting_level == 0,
145		   ("malloc(M_WAITOK) in interrupt context"));
146#endif
147	s = splmem();
148	/* mtx_lock(&malloc_mtx); XXX */
149	while (ksp->ks_memuse >= ksp->ks_limit) {
150		if (flags & M_NOWAIT) {
151			splx(s);
152			/* mtx_unlock(&malloc_mtx); XXX */
153			return ((void *) NULL);
154		}
155		if (ksp->ks_limblocks < 65535)
156			ksp->ks_limblocks++;
157		msleep((caddr_t)ksp, /* &malloc_mtx */ NULL, PSWP+2, type->ks_shortdesc,
158		    0);
159	}
160	/* mtx_unlock(&malloc_mtx); XXX */
161
162	if (size <= KMEM_ZMAX) {
163		indx = size;
164		if (indx & KMEM_ZMASK)
165			indx = (indx & ~KMEM_ZMASK) + KMEM_ZBASE;
166		zone = kmemzones[indx >> KMEM_ZSHIFT];
167		indx = zone->uz_size;
168		va = uma_zalloc(zone, flags);
169		if (va == NULL) {
170			/* mtx_lock(&malloc_mtx); XXX */
171			goto out;
172		}
173		ksp->ks_size |= indx;
174	} else {
175		/* XXX This is not the next power of two so this will break ks_size */
176		indx = roundup(size, PAGE_SIZE);
177		zone = NULL;
178		va = uma_large_malloc(size, flags);
179		if (va == NULL) {
180			/* mtx_lock(&malloc_mtx); XXX */
181			goto out;
182		}
183	}
184	/* mtx_lock(&malloc_mtx); XXX */
185	ksp->ks_memuse += indx;
186	ksp->ks_inuse++;
187out:
188	ksp->ks_calls++;
189	if (ksp->ks_memuse > ksp->ks_maxused)
190		ksp->ks_maxused = ksp->ks_memuse;
191	splx(s);
192	/* mtx_unlock(&malloc_mtx); XXX */
193	/* XXX: Do idle pre-zeroing.  */
194	if (va != NULL && (flags & M_ZERO))
195		bzero(va, size);
196	return ((void *) va);
197}
198
199/*
200 *	free:
201 *
202 *	Free a block of memory allocated by malloc.
203 *
204 *	This routine may not block.
205 */
206void
207free(addr, type)
208	void *addr;
209	struct malloc_type *type;
210{
211	uma_slab_t slab;
212	void *mem;
213	u_long size;
214	int s;
215	register struct malloc_type *ksp = type;
216
217	/* free(NULL, ...) does nothing */
218	if (addr == NULL)
219		return;
220
221	size = 0;
222	s = splmem();
223
224	mem = (void *)((u_long)addr & (~UMA_SLAB_MASK));
225	slab = hash_sfind(mallochash, mem);
226
227	if (slab == NULL)
228		panic("free: address %p(%p) has not been allocated.\n", addr, mem);
229
230	if (!(slab->us_flags & UMA_SLAB_MALLOC)) {
231		size = slab->us_zone->uz_size;
232		uma_zfree_arg(slab->us_zone, addr, slab);
233	} else {
234		size = slab->us_size;
235		uma_large_free(slab);
236	}
237	/* mtx_lock(&malloc_mtx); XXX */
238
239	ksp->ks_memuse -= size;
240	if (ksp->ks_memuse + size >= ksp->ks_limit &&
241	    ksp->ks_memuse < ksp->ks_limit)
242		wakeup((caddr_t)ksp);
243	ksp->ks_inuse--;
244	splx(s);
245	/* mtx_unlock(&malloc_mtx); XXX */
246}
247
248/*
249 *	realloc: change the size of a memory block
250 */
251void *
252realloc(addr, size, type, flags)
253	void *addr;
254	unsigned long size;
255	struct malloc_type *type;
256	int flags;
257{
258	uma_slab_t slab;
259	unsigned long alloc;
260	void *newaddr;
261
262	/* realloc(NULL, ...) is equivalent to malloc(...) */
263	if (addr == NULL)
264		return (malloc(size, type, flags));
265
266	slab = hash_sfind(mallochash,
267	    (void *)((u_long)addr & ~(UMA_SLAB_MASK)));
268
269	/* Sanity check */
270	KASSERT(slab != NULL,
271	    ("realloc: address %p out of range", (void *)addr));
272
273	/* Get the size of the original block */
274	if (slab->us_zone)
275		alloc = slab->us_zone->uz_size;
276	else
277		alloc = slab->us_size;
278
279	/* Reuse the original block if appropriate */
280	if (size <= alloc
281	    && (size > (alloc >> REALLOC_FRACTION) || alloc == MINALLOCSIZE))
282		return (addr);
283
284	/* Allocate a new, bigger (or smaller) block */
285	if ((newaddr = malloc(size, type, flags)) == NULL)
286		return (NULL);
287
288	/* Copy over original contents */
289	bcopy(addr, newaddr, min(size, alloc));
290	free(addr, type);
291	return (newaddr);
292}
293
294/*
295 *	reallocf: same as realloc() but free memory on failure.
296 */
297void *
298reallocf(addr, size, type, flags)
299	void *addr;
300	unsigned long size;
301	struct malloc_type *type;
302	int flags;
303{
304	void *mem;
305
306	if ((mem = realloc(addr, size, type, flags)) == NULL)
307		free(addr, type);
308	return (mem);
309}
310
311/*
312 * Initialize the kernel memory allocator
313 */
314/* ARGSUSED*/
315static void
316kmeminit(dummy)
317	void *dummy;
318{
319	register long indx;
320	u_long npg;
321	u_long mem_size;
322	void *hashmem;
323	u_long hashsize;
324	int highbit;
325	int bits;
326	int i;
327
328	mtx_init(&malloc_mtx, "malloc", MTX_DEF);
329
330	/*
331	 * Try to auto-tune the kernel memory size, so that it is
332	 * more applicable for a wider range of machine sizes.
333	 * On an X86, a VM_KMEM_SIZE_SCALE value of 4 is good, while
334	 * a VM_KMEM_SIZE of 12MB is a fair compromise.  The
335	 * VM_KMEM_SIZE_MAX is dependent on the maximum KVA space
336	 * available, and on an X86 with a total KVA space of 256MB,
337	 * try to keep VM_KMEM_SIZE_MAX at 80MB or below.
338	 *
339	 * Note that the kmem_map is also used by the zone allocator,
340	 * so make sure that there is enough space.
341	 */
342	vm_kmem_size = VM_KMEM_SIZE;
343	mem_size = cnt.v_page_count * PAGE_SIZE;
344
345#if defined(VM_KMEM_SIZE_SCALE)
346	if ((mem_size / VM_KMEM_SIZE_SCALE) > vm_kmem_size)
347		vm_kmem_size = mem_size / VM_KMEM_SIZE_SCALE;
348#endif
349
350#if defined(VM_KMEM_SIZE_MAX)
351	if (vm_kmem_size >= VM_KMEM_SIZE_MAX)
352		vm_kmem_size = VM_KMEM_SIZE_MAX;
353#endif
354
355	/* Allow final override from the kernel environment */
356	TUNABLE_INT_FETCH("kern.vm.kmem.size", &vm_kmem_size);
357
358	/*
359	 * Limit kmem virtual size to twice the physical memory.
360	 * This allows for kmem map sparseness, but limits the size
361	 * to something sane. Be careful to not overflow the 32bit
362	 * ints while doing the check.
363	 */
364	if ((vm_kmem_size / 2) > (cnt.v_page_count * PAGE_SIZE))
365		vm_kmem_size = 2 * cnt.v_page_count * PAGE_SIZE;
366
367	/*
368	 * In mbuf_init(), we set up submaps for mbufs and clusters, in which
369	 * case we rounddown() (nmbufs * MSIZE) and (nmbclusters * MCLBYTES),
370	 * respectively. Mathematically, this means that what we do here may
371	 * amount to slightly more address space than we need for the submaps,
372	 * but it never hurts to have an extra page in kmem_map.
373	 */
374	npg = (nmbufs * MSIZE + nmbclusters * MCLBYTES + nmbcnt *
375	    sizeof(u_int) + vm_kmem_size) / PAGE_SIZE;
376
377	kmem_map = kmem_suballoc(kernel_map, (vm_offset_t *)&kmembase,
378		(vm_offset_t *)&kmemlimit, (vm_size_t)(npg * PAGE_SIZE));
379	kmem_map->system_map = 1;
380
381	hashsize = npg * sizeof(void *);
382
383	highbit = 0;
384	bits = 0;
385	/* The hash size must be a power of two */
386	for (i = 0; i < 8 * sizeof(hashsize); i++)
387		if (hashsize & (1 << i)) {
388			highbit = i;
389			bits++;
390		}
391	if (bits > 1)
392		hashsize = 1 << (highbit);
393
394	hashmem = (void *)kmem_alloc(kernel_map, (vm_size_t)hashsize);
395	uma_startup2(hashmem, hashsize / sizeof(void *));
396
397	for (i = 0, indx = 0; kmemsizes[indx].size != 0; indx++) {
398		uma_zone_t zone;
399		int size = kmemsizes[indx].size;
400		char *name = kmemsizes[indx].name;
401
402		zone = uma_zcreate(name, size, NULL, NULL, NULL, NULL,
403		    UMA_ALIGN_PTR, UMA_ZONE_MALLOC);
404		for (;i <= size; i+= KMEM_ZBASE)
405			kmemzones[i >> KMEM_ZSHIFT] = zone;
406
407	}
408}
409
410void
411malloc_init(data)
412	void *data;
413{
414	struct malloc_type *type = (struct malloc_type *)data;
415
416	if (type->ks_magic != M_MAGIC)
417		panic("malloc type lacks magic");
418
419	if (type->ks_limit != 0)
420		return;
421
422	if (cnt.v_page_count == 0)
423		panic("malloc_init not allowed before vm init");
424
425	/*
426	 * The default limits for each malloc region is 1/2 of the
427	 * malloc portion of the kmem map size.
428	 */
429	type->ks_limit = vm_kmem_size / 2;
430	type->ks_next = kmemstatistics;
431	kmemstatistics = type;
432}
433
434void
435malloc_uninit(data)
436	void *data;
437{
438	struct malloc_type *type = (struct malloc_type *)data;
439	struct malloc_type *t;
440
441	if (type->ks_magic != M_MAGIC)
442		panic("malloc type lacks magic");
443
444	if (cnt.v_page_count == 0)
445		panic("malloc_uninit not allowed before vm init");
446
447	if (type->ks_limit == 0)
448		panic("malloc_uninit on uninitialized type");
449
450	if (type == kmemstatistics)
451		kmemstatistics = type->ks_next;
452	else {
453		for (t = kmemstatistics; t->ks_next != NULL; t = t->ks_next) {
454			if (t->ks_next == type) {
455				t->ks_next = type->ks_next;
456				break;
457			}
458		}
459	}
460	type->ks_next = NULL;
461	type->ks_limit = 0;
462}
463