kern_malloc.c revision 95923
1/* 2 * Copyright (c) 1987, 1991, 1993 3 * The Regents of the University of California. All rights reserved. 4 * 5 * Redistribution and use in source and binary forms, with or without 6 * modification, are permitted provided that the following conditions 7 * are met: 8 * 1. Redistributions of source code must retain the above copyright 9 * notice, this list of conditions and the following disclaimer. 10 * 2. Redistributions in binary form must reproduce the above copyright 11 * notice, this list of conditions and the following disclaimer in the 12 * documentation and/or other materials provided with the distribution. 13 * 3. All advertising materials mentioning features or use of this software 14 * must display the following acknowledgement: 15 * This product includes software developed by the University of 16 * California, Berkeley and its contributors. 17 * 4. Neither the name of the University nor the names of its contributors 18 * may be used to endorse or promote products derived from this software 19 * without specific prior written permission. 20 * 21 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 22 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 23 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 24 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 25 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 26 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 27 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 28 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 29 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 30 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 31 * SUCH DAMAGE. 32 * 33 * @(#)kern_malloc.c 8.3 (Berkeley) 1/4/94 34 * $FreeBSD: head/sys/kern/kern_malloc.c 95923 2002-05-02 07:22:19Z jeff $ 35 */ 36 37#include "opt_vm.h" 38 39#include <sys/param.h> 40#include <sys/systm.h> 41#include <sys/kernel.h> 42#include <sys/lock.h> 43#include <sys/malloc.h> 44#include <sys/mbuf.h> 45#include <sys/mutex.h> 46#include <sys/vmmeter.h> 47#include <sys/proc.h> 48#include <sys/sysctl.h> 49 50#include <vm/vm.h> 51#include <vm/vm_param.h> 52#include <vm/vm_kern.h> 53#include <vm/vm_extern.h> 54#include <vm/pmap.h> 55#include <vm/vm_map.h> 56#include <vm/uma.h> 57#include <vm/uma_int.h> 58#include <vm/uma_dbg.h> 59 60#if defined(INVARIANTS) && defined(__i386__) 61#include <machine/cpu.h> 62#endif 63 64/* 65 * When realloc() is called, if the new size is sufficiently smaller than 66 * the old size, realloc() will allocate a new, smaller block to avoid 67 * wasting memory. 'Sufficiently smaller' is defined as: newsize <= 68 * oldsize / 2^n, where REALLOC_FRACTION defines the value of 'n'. 69 */ 70#ifndef REALLOC_FRACTION 71#define REALLOC_FRACTION 1 /* new block if <= half the size */ 72#endif 73 74MALLOC_DEFINE(M_CACHE, "cache", "Various Dynamically allocated caches"); 75MALLOC_DEFINE(M_DEVBUF, "devbuf", "device driver memory"); 76MALLOC_DEFINE(M_TEMP, "temp", "misc temporary data buffers"); 77 78MALLOC_DEFINE(M_IP6OPT, "ip6opt", "IPv6 options"); 79MALLOC_DEFINE(M_IP6NDP, "ip6ndp", "IPv6 Neighbor Discovery"); 80 81static void kmeminit(void *); 82SYSINIT(kmem, SI_SUB_KMEM, SI_ORDER_FIRST, kmeminit, NULL) 83 84static MALLOC_DEFINE(M_FREE, "free", "should be on free list"); 85 86static struct malloc_type *kmemstatistics; 87static char *kmembase; 88static char *kmemlimit; 89 90#define KMEM_ZSHIFT 4 91#define KMEM_ZBASE 16 92#define KMEM_ZMASK (KMEM_ZBASE - 1) 93 94#define KMEM_ZMAX 65536 95#define KMEM_ZSIZE (KMEM_ZMAX >> KMEM_ZSHIFT) 96static u_int8_t kmemsize[KMEM_ZSIZE + 1]; 97 98/* These won't be powers of two for long */ 99struct { 100 int kz_size; 101 char *kz_name; 102 uma_zone_t kz_zone; 103} kmemzones[] = { 104 {16, "16", NULL}, 105 {32, "32", NULL}, 106 {64, "64", NULL}, 107 {128, "128", NULL}, 108 {256, "256", NULL}, 109 {512, "512", NULL}, 110 {1024, "1024", NULL}, 111 {2048, "2048", NULL}, 112 {4096, "4096", NULL}, 113 {8192, "8192", NULL}, 114 {16384, "16384", NULL}, 115 {32768, "32768", NULL}, 116 {65536, "65536", NULL}, 117 {0, NULL}, 118}; 119 120u_int vm_kmem_size; 121 122/* 123 * The malloc_mtx protects the kmemstatistics linked list as well as the 124 * mallochash. 125 */ 126 127struct mtx malloc_mtx; 128 129#ifdef MALLOC_PROFILE 130uint64_t krequests[KMEM_ZSIZE + 1]; 131 132static int sysctl_kern_mprof(SYSCTL_HANDLER_ARGS); 133#endif 134 135static int sysctl_kern_malloc(SYSCTL_HANDLER_ARGS); 136 137/* 138 * malloc: 139 * 140 * Allocate a block of memory. 141 * 142 * If M_NOWAIT is set, this routine will not block and return NULL if 143 * the allocation fails. 144 */ 145void * 146malloc(size, type, flags) 147 unsigned long size; 148 struct malloc_type *type; 149 int flags; 150{ 151 int indx; 152 caddr_t va; 153 uma_zone_t zone; 154 register struct malloc_type *ksp = type; 155 156#if 0 157 if (size == 0) 158 Debugger("zero size malloc"); 159#endif 160#if defined(INVARIANTS) 161 if (flags == M_WAITOK) 162 KASSERT(curthread->td_intr_nesting_level == 0, 163 ("malloc(M_WAITOK) in interrupt context")); 164#endif 165 if (size <= KMEM_ZMAX) { 166 if (size & KMEM_ZMASK) 167 size = (size & ~KMEM_ZMASK) + KMEM_ZBASE; 168 indx = kmemsize[size >> KMEM_ZSHIFT]; 169 zone = kmemzones[indx].kz_zone; 170#ifdef MALLOC_PROFILE 171 krequests[size >> KMEM_ZSHIFT]++; 172#endif 173 va = uma_zalloc(zone, flags); 174 mtx_lock(&ksp->ks_mtx); 175 if (va == NULL) 176 goto out; 177 178 ksp->ks_size |= 1 << indx; 179 size = zone->uz_size; 180 } else { 181 size = roundup(size, PAGE_SIZE); 182 zone = NULL; 183 va = uma_large_malloc(size, flags); 184 mtx_lock(&ksp->ks_mtx); 185 if (va == NULL) 186 goto out; 187 } 188 ksp->ks_memuse += size; 189 ksp->ks_inuse++; 190out: 191 ksp->ks_calls++; 192 if (ksp->ks_memuse > ksp->ks_maxused) 193 ksp->ks_maxused = ksp->ks_memuse; 194 195 mtx_unlock(&ksp->ks_mtx); 196 return ((void *) va); 197} 198 199/* 200 * free: 201 * 202 * Free a block of memory allocated by malloc. 203 * 204 * This routine may not block. 205 */ 206void 207free(addr, type) 208 void *addr; 209 struct malloc_type *type; 210{ 211 uma_slab_t slab; 212 void *mem; 213 u_long size; 214 register struct malloc_type *ksp = type; 215 216 /* free(NULL, ...) does nothing */ 217 if (addr == NULL) 218 return; 219 220 size = 0; 221 222 mem = (void *)((u_long)addr & (~UMA_SLAB_MASK)); 223 mtx_lock(&malloc_mtx); 224 slab = hash_sfind(mallochash, mem); 225 mtx_unlock(&malloc_mtx); 226 227 if (slab == NULL) 228 panic("free: address %p(%p) has not been allocated.\n", 229 addr, mem); 230 231 if (!(slab->us_flags & UMA_SLAB_MALLOC)) { 232 size = slab->us_zone->uz_size; 233 uma_zfree_arg(slab->us_zone, addr, slab); 234 } else { 235 size = slab->us_size; 236 uma_large_free(slab); 237 } 238 mtx_lock(&ksp->ks_mtx); 239 ksp->ks_memuse -= size; 240 ksp->ks_inuse--; 241 mtx_unlock(&ksp->ks_mtx); 242} 243 244/* 245 * realloc: change the size of a memory block 246 */ 247void * 248realloc(addr, size, type, flags) 249 void *addr; 250 unsigned long size; 251 struct malloc_type *type; 252 int flags; 253{ 254 uma_slab_t slab; 255 unsigned long alloc; 256 void *newaddr; 257 258 /* realloc(NULL, ...) is equivalent to malloc(...) */ 259 if (addr == NULL) 260 return (malloc(size, type, flags)); 261 262 mtx_lock(&malloc_mtx); 263 slab = hash_sfind(mallochash, 264 (void *)((u_long)addr & ~(UMA_SLAB_MASK))); 265 mtx_unlock(&malloc_mtx); 266 267 /* Sanity check */ 268 KASSERT(slab != NULL, 269 ("realloc: address %p out of range", (void *)addr)); 270 271 /* Get the size of the original block */ 272 if (slab->us_zone) 273 alloc = slab->us_zone->uz_size; 274 else 275 alloc = slab->us_size; 276 277 /* Reuse the original block if appropriate */ 278 if (size <= alloc 279 && (size > (alloc >> REALLOC_FRACTION) || alloc == MINALLOCSIZE)) 280 return (addr); 281 282 /* Allocate a new, bigger (or smaller) block */ 283 if ((newaddr = malloc(size, type, flags)) == NULL) 284 return (NULL); 285 286 /* Copy over original contents */ 287 bcopy(addr, newaddr, min(size, alloc)); 288 free(addr, type); 289 return (newaddr); 290} 291 292/* 293 * reallocf: same as realloc() but free memory on failure. 294 */ 295void * 296reallocf(addr, size, type, flags) 297 void *addr; 298 unsigned long size; 299 struct malloc_type *type; 300 int flags; 301{ 302 void *mem; 303 304 if ((mem = realloc(addr, size, type, flags)) == NULL) 305 free(addr, type); 306 return (mem); 307} 308 309/* 310 * Initialize the kernel memory allocator 311 */ 312/* ARGSUSED*/ 313static void 314kmeminit(dummy) 315 void *dummy; 316{ 317 u_int8_t indx; 318 u_long npg; 319 u_long mem_size; 320 void *hashmem; 321 u_long hashsize; 322 int highbit; 323 int bits; 324 int i; 325 326 mtx_init(&malloc_mtx, "malloc", NULL, MTX_DEF); 327 328 /* 329 * Try to auto-tune the kernel memory size, so that it is 330 * more applicable for a wider range of machine sizes. 331 * On an X86, a VM_KMEM_SIZE_SCALE value of 4 is good, while 332 * a VM_KMEM_SIZE of 12MB is a fair compromise. The 333 * VM_KMEM_SIZE_MAX is dependent on the maximum KVA space 334 * available, and on an X86 with a total KVA space of 256MB, 335 * try to keep VM_KMEM_SIZE_MAX at 80MB or below. 336 * 337 * Note that the kmem_map is also used by the zone allocator, 338 * so make sure that there is enough space. 339 */ 340 vm_kmem_size = VM_KMEM_SIZE; 341 mem_size = cnt.v_page_count * PAGE_SIZE; 342 343#if defined(VM_KMEM_SIZE_SCALE) 344 if ((mem_size / VM_KMEM_SIZE_SCALE) > vm_kmem_size) 345 vm_kmem_size = mem_size / VM_KMEM_SIZE_SCALE; 346#endif 347 348#if defined(VM_KMEM_SIZE_MAX) 349 if (vm_kmem_size >= VM_KMEM_SIZE_MAX) 350 vm_kmem_size = VM_KMEM_SIZE_MAX; 351#endif 352 353 /* Allow final override from the kernel environment */ 354 TUNABLE_INT_FETCH("kern.vm.kmem.size", &vm_kmem_size); 355 356 /* 357 * Limit kmem virtual size to twice the physical memory. 358 * This allows for kmem map sparseness, but limits the size 359 * to something sane. Be careful to not overflow the 32bit 360 * ints while doing the check. 361 */ 362 if ((vm_kmem_size / 2) > (cnt.v_page_count * PAGE_SIZE)) 363 vm_kmem_size = 2 * cnt.v_page_count * PAGE_SIZE; 364 365 /* 366 * In mbuf_init(), we set up submaps for mbufs and clusters, in which 367 * case we rounddown() (nmbufs * MSIZE) and (nmbclusters * MCLBYTES), 368 * respectively. Mathematically, this means that what we do here may 369 * amount to slightly more address space than we need for the submaps, 370 * but it never hurts to have an extra page in kmem_map. 371 */ 372 npg = (nmbufs * MSIZE + nmbclusters * MCLBYTES + nmbcnt * 373 sizeof(u_int) + vm_kmem_size) / PAGE_SIZE; 374 375 kmem_map = kmem_suballoc(kernel_map, (vm_offset_t *)&kmembase, 376 (vm_offset_t *)&kmemlimit, (vm_size_t)(npg * PAGE_SIZE)); 377 kmem_map->system_map = 1; 378 379 hashsize = npg * sizeof(void *); 380 381 highbit = 0; 382 bits = 0; 383 /* The hash size must be a power of two */ 384 for (i = 0; i < 8 * sizeof(hashsize); i++) 385 if (hashsize & (1 << i)) { 386 highbit = i; 387 bits++; 388 } 389 if (bits > 1) 390 hashsize = 1 << (highbit); 391 392 hashmem = (void *)kmem_alloc(kernel_map, (vm_size_t)hashsize); 393 uma_startup2(hashmem, hashsize / sizeof(void *)); 394 395 for (i = 0, indx = 0; kmemzones[indx].kz_size != 0; indx++) { 396 int size = kmemzones[indx].kz_size; 397 char *name = kmemzones[indx].kz_name; 398 399 kmemzones[indx].kz_zone = uma_zcreate(name, size, 400#ifdef INVARIANTS 401 trash_ctor, trash_dtor, trash_init, trash_fini, 402#else 403 NULL, NULL, NULL, NULL, 404#endif 405 UMA_ALIGN_PTR, UMA_ZONE_MALLOC); 406 407 for (;i <= size; i+= KMEM_ZBASE) 408 kmemsize[i >> KMEM_ZSHIFT] = indx; 409 410 } 411} 412 413void 414malloc_init(data) 415 void *data; 416{ 417 struct malloc_type *type = (struct malloc_type *)data; 418 419 mtx_lock(&malloc_mtx); 420 if (type->ks_magic != M_MAGIC) 421 panic("malloc type lacks magic"); 422 423 if (cnt.v_page_count == 0) 424 panic("malloc_init not allowed before vm init"); 425 426 if (type->ks_next != NULL) 427 return; 428 429 type->ks_next = kmemstatistics; 430 kmemstatistics = type; 431 mtx_init(&type->ks_mtx, type->ks_shortdesc, "Malloc Stats", MTX_DEF); 432 mtx_unlock(&malloc_mtx); 433} 434 435void 436malloc_uninit(data) 437 void *data; 438{ 439 struct malloc_type *type = (struct malloc_type *)data; 440 struct malloc_type *t; 441 442 mtx_lock(&malloc_mtx); 443 mtx_lock(&type->ks_mtx); 444 if (type->ks_magic != M_MAGIC) 445 panic("malloc type lacks magic"); 446 447 if (cnt.v_page_count == 0) 448 panic("malloc_uninit not allowed before vm init"); 449 450 if (type == kmemstatistics) 451 kmemstatistics = type->ks_next; 452 else { 453 for (t = kmemstatistics; t->ks_next != NULL; t = t->ks_next) { 454 if (t->ks_next == type) { 455 t->ks_next = type->ks_next; 456 break; 457 } 458 } 459 } 460 type->ks_next = NULL; 461 mtx_destroy(&type->ks_mtx); 462 mtx_unlock(&malloc_mtx); 463} 464 465static int 466sysctl_kern_malloc(SYSCTL_HANDLER_ARGS) 467{ 468 struct malloc_type *type; 469 int linesize = 128; 470 int curline; 471 int bufsize; 472 int first; 473 int error; 474 char *buf; 475 char *p; 476 int cnt; 477 int len; 478 int i; 479 480 cnt = 0; 481 482 mtx_lock(&malloc_mtx); 483 for (type = kmemstatistics; type != NULL; type = type->ks_next) 484 cnt++; 485 486 mtx_unlock(&malloc_mtx); 487 bufsize = linesize * (cnt + 1); 488 p = buf = (char *)malloc(bufsize, M_TEMP, M_WAITOK|M_ZERO); 489 mtx_lock(&malloc_mtx); 490 491 len = snprintf(p, linesize, 492 "\n Type InUse MemUse HighUse Requests Size(s)\n"); 493 p += len; 494 495 for (type = kmemstatistics; cnt != 0 && type != NULL; 496 type = type->ks_next, cnt--) { 497 if (type->ks_calls == 0) 498 continue; 499 500 curline = linesize - 2; /* Leave room for the \n */ 501 len = snprintf(p, curline, "%13s%6lu%6luK%7luK%9llu", 502 type->ks_shortdesc, 503 type->ks_inuse, 504 (type->ks_memuse + 1023) / 1024, 505 (type->ks_maxused + 1023) / 1024, 506 (long long unsigned)type->ks_calls); 507 curline -= len; 508 p += len; 509 510 first = 1; 511 for (i = 0; i < 8 * sizeof(type->ks_size); i++) 512 if (type->ks_size & (1 << i)) { 513 if (first) 514 len = snprintf(p, curline, " "); 515 else 516 len = snprintf(p, curline, ","); 517 curline -= len; 518 p += len; 519 520 len = snprintf(p, curline, 521 "%s", kmemzones[i].kz_name); 522 curline -= len; 523 p += len; 524 525 first = 0; 526 } 527 528 len = snprintf(p, 2, "\n"); 529 p += len; 530 } 531 532 mtx_unlock(&malloc_mtx); 533 error = SYSCTL_OUT(req, buf, p - buf); 534 535 free(buf, M_TEMP); 536 return (error); 537} 538 539SYSCTL_OID(_kern, OID_AUTO, malloc, CTLTYPE_STRING|CTLFLAG_RD, 540 NULL, 0, sysctl_kern_malloc, "A", "Malloc Stats"); 541 542#ifdef MALLOC_PROFILE 543 544static int 545sysctl_kern_mprof(SYSCTL_HANDLER_ARGS) 546{ 547 int linesize = 64; 548 uint64_t count; 549 uint64_t waste; 550 uint64_t mem; 551 int bufsize; 552 int error; 553 char *buf; 554 int rsize; 555 int size; 556 char *p; 557 int len; 558 int i; 559 560 bufsize = linesize * (KMEM_ZSIZE + 1); 561 bufsize += 128; /* For the stats line */ 562 bufsize += 128; /* For the banner line */ 563 waste = 0; 564 mem = 0; 565 566 p = buf = (char *)malloc(bufsize, M_TEMP, M_WAITOK|M_ZERO); 567 len = snprintf(p, bufsize, 568 "\n Size Requests Real Size\n"); 569 bufsize -= len; 570 p += len; 571 572 for (i = 0; i < KMEM_ZSIZE; i++) { 573 size = i << KMEM_ZSHIFT; 574 rsize = kmemzones[kmemsize[i]].kz_size; 575 count = (long long unsigned)krequests[i]; 576 577 len = snprintf(p, bufsize, "%6d%28llu%11d\n", 578 size, (unsigned long long)count, rsize); 579 bufsize -= len; 580 p += len; 581 582 if ((rsize * count) > (size * count)) 583 waste += (rsize * count) - (size * count); 584 mem += (rsize * count); 585 } 586 587 len = snprintf(p, bufsize, 588 "\nTotal memory used:\t%30llu\nTotal Memory wasted:\t%30llu\n", 589 (unsigned long long)mem, (unsigned long long)waste); 590 p += len; 591 592 error = SYSCTL_OUT(req, buf, p - buf); 593 594 free(buf, M_TEMP); 595 return (error); 596} 597 598SYSCTL_OID(_kern, OID_AUTO, mprof, CTLTYPE_STRING|CTLFLAG_RD, 599 NULL, 0, sysctl_kern_mprof, "A", "Malloc Profiling"); 600#endif /* MALLOC_PROFILE */ 601