1/* 2 * Copyright (c) 2002, Jeffrey Roberson <jroberson@chesapeake.net> 3 * All rights reserved. 4 * 5 * Redistribution and use in source and binary forms, with or without 6 * modification, are permitted provided that the following conditions 7 * are met: 8 * 1. Redistributions of source code must retain the above copyright 9 * notice unmodified, this list of conditions, and the following 10 * disclaimer. 11 * 2. Redistributions in binary form must reproduce the above copyright 12 * notice, this list of conditions and the following disclaimer in the 13 * documentation and/or other materials provided with the distribution. 14 * 15 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR 16 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES 17 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. 18 * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, 19 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT 20 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 21 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 22 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 23 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF 24 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 25 *
| 1/* 2 * Copyright (c) 2002, Jeffrey Roberson <jroberson@chesapeake.net> 3 * All rights reserved. 4 * 5 * Redistribution and use in source and binary forms, with or without 6 * modification, are permitted provided that the following conditions 7 * are met: 8 * 1. Redistributions of source code must retain the above copyright 9 * notice unmodified, this list of conditions, and the following 10 * disclaimer. 11 * 2. Redistributions in binary form must reproduce the above copyright 12 * notice, this list of conditions and the following disclaimer in the 13 * documentation and/or other materials provided with the distribution. 14 * 15 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR 16 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES 17 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. 18 * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, 19 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT 20 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 21 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 22 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 23 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF 24 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 25 *
|
26 * $FreeBSD: head/sys/vm/uma_core.c 95758 2002-04-29 23:45:41Z jeff $
| 26 * $FreeBSD: head/sys/vm/uma_core.c 95766 2002-04-30 04:26:34Z jeff $
|
27 * 28 */ 29 30/* 31 * uma_core.c Implementation of the Universal Memory allocator 32 * 33 * This allocator is intended to replace the multitude of similar object caches 34 * in the standard FreeBSD kernel. The intent is to be flexible as well as 35 * effecient. A primary design goal is to return unused memory to the rest of 36 * the system. This will make the system as a whole more flexible due to the 37 * ability to move memory to subsystems which most need it instead of leaving 38 * pools of reserved memory unused. 39 * 40 * The basic ideas stem from similar slab/zone based allocators whose algorithms 41 * are well known. 42 * 43 */ 44 45/* 46 * TODO: 47 * - Improve memory usage for large allocations 48 * - Improve INVARIANTS (0xdeadc0de write out) 49 * - Investigate cache size adjustments 50 */ 51 52/* I should really use ktr.. */ 53/* 54#define UMA_DEBUG 1 55#define UMA_DEBUG_ALLOC 1 56#define UMA_DEBUG_ALLOC_1 1 57*/ 58 59 60#include "opt_param.h" 61#include <sys/param.h> 62#include <sys/systm.h> 63#include <sys/kernel.h> 64#include <sys/types.h> 65#include <sys/queue.h> 66#include <sys/malloc.h> 67#include <sys/lock.h> 68#include <sys/sysctl.h> 69#include <sys/mutex.h> 70#include <sys/smp.h> 71#include <sys/vmmeter.h> 72 73#include <machine/types.h> 74 75#include <vm/vm.h> 76#include <vm/vm_object.h> 77#include <vm/vm_page.h> 78#include <vm/vm_param.h> 79#include <vm/vm_map.h> 80#include <vm/vm_kern.h> 81#include <vm/vm_extern.h> 82#include <vm/uma.h> 83#include <vm/uma_int.h> 84 85/* 86 * This is the zone from which all zones are spawned. The idea is that even 87 * the zone heads are allocated from the allocator, so we use the bss section 88 * to bootstrap us. 89 */ 90static struct uma_zone masterzone; 91static uma_zone_t zones = &masterzone; 92 93/* This is the zone from which all of uma_slab_t's are allocated. */ 94static uma_zone_t slabzone; 95 96/* 97 * The initial hash tables come out of this zone so they can be allocated 98 * prior to malloc coming up. 99 */ 100static uma_zone_t hashzone; 101 102/* 103 * Zone that buckets come from. 104 */ 105static uma_zone_t bucketzone; 106 107/* 108 * Are we allowed to allocate buckets? 109 */ 110static int bucketdisable = 1; 111 112/* Linked list of all zones in the system */ 113static LIST_HEAD(,uma_zone) uma_zones = LIST_HEAD_INITIALIZER(&uma_zones); 114 115/* This mutex protects the zone list */ 116static struct mtx uma_mtx; 117 118/* Linked list of boot time pages */ 119static LIST_HEAD(,uma_slab) uma_boot_pages = 120 LIST_HEAD_INITIALIZER(&uma_boot_pages); 121 122/* Count of free boottime pages */ 123static int uma_boot_free = 0; 124 125/* Is the VM done starting up? */ 126static int booted = 0; 127 128/* This is the handle used to schedule our working set calculator */ 129static struct callout uma_callout; 130 131/* This is mp_maxid + 1, for use while looping over each cpu */ 132static int maxcpu; 133 134/* 135 * This structure is passed as the zone ctor arg so that I don't have to create 136 * a special allocation function just for zones. 137 */ 138struct uma_zctor_args { 139 char *name; 140 int size; 141 uma_ctor ctor; 142 uma_dtor dtor; 143 uma_init uminit; 144 uma_fini fini; 145 int align; 146 u_int16_t flags; 147}; 148 149/* 150 * This is the malloc hash table which is used to find the zone that a 151 * malloc allocation came from. It is not currently resizeable. The 152 * memory for the actual hash bucket is allocated in kmeminit. 153 */ 154struct uma_hash mhash; 155struct uma_hash *mallochash = &mhash; 156 157/* Prototypes.. */ 158 159static void *obj_alloc(uma_zone_t, int, u_int8_t *, int); 160static void *page_alloc(uma_zone_t, int, u_int8_t *, int); 161static void page_free(void *, int, u_int8_t); 162static uma_slab_t slab_zalloc(uma_zone_t, int); 163static void cache_drain(uma_zone_t); 164static void bucket_drain(uma_zone_t, uma_bucket_t); 165static void zone_drain(uma_zone_t); 166static void zone_ctor(void *, int, void *); 167static void zone_dtor(void *, int, void *); 168static void zero_init(void *, int); 169static void zone_small_init(uma_zone_t zone); 170static void zone_large_init(uma_zone_t zone); 171static void zone_foreach(void (*zfunc)(uma_zone_t)); 172static void zone_timeout(uma_zone_t zone); 173static struct slabhead *hash_alloc(int *); 174static void hash_expand(struct uma_hash *, struct slabhead *, int); 175static void hash_free(struct slabhead *hash, int hashsize); 176static void uma_timeout(void *); 177static void uma_startup3(void); 178static void *uma_zalloc_internal(uma_zone_t, void *, int, uma_bucket_t); 179static void uma_zfree_internal(uma_zone_t, void *, void *, int); 180static void bucket_enable(void); 181void uma_print_zone(uma_zone_t); 182void uma_print_stats(void); 183static int sysctl_vm_zone(SYSCTL_HANDLER_ARGS); 184 185SYSCTL_OID(_vm, OID_AUTO, zone, CTLTYPE_STRING|CTLFLAG_RD, 186 NULL, 0, sysctl_vm_zone, "A", "Zone Info"); 187SYSINIT(uma_startup3, SI_SUB_VM_CONF, SI_ORDER_SECOND, uma_startup3, NULL); 188 189/* 190 * This routine checks to see whether or not it's safe to enable buckets. 191 */ 192 193static void 194bucket_enable(void) 195{ 196 if (cnt.v_free_count < cnt.v_free_min) 197 bucketdisable = 1; 198 else 199 bucketdisable = 0; 200} 201 202 203/* 204 * Routine called by timeout which is used to fire off some time interval 205 * based calculations. (working set, stats, etc.) 206 * 207 * Arguments: 208 * arg Unused 209 * 210 * Returns: 211 * Nothing 212 */ 213static void 214uma_timeout(void *unused) 215{ 216 bucket_enable(); 217 zone_foreach(zone_timeout); 218 219 /* Reschedule this event */ 220 callout_reset(&uma_callout, UMA_WORKING_TIME * hz, uma_timeout, NULL); 221} 222 223/* 224 * Routine to perform timeout driven calculations. This does the working set 225 * as well as hash expanding, and per cpu statistics aggregation. 226 * 227 * Arguments: 228 * zone The zone to operate on 229 * 230 * Returns: 231 * Nothing 232 */ 233static void 234zone_timeout(uma_zone_t zone) 235{ 236 uma_cache_t cache; 237 u_int64_t alloc; 238 int free; 239 int cpu; 240 241 alloc = 0; 242 free = 0; 243 244 /* 245 * Aggregate per cpu cache statistics back to the zone. 246 * 247 * I may rewrite this to set a flag in the per cpu cache instead of 248 * locking. If the flag is not cleared on the next round I will have 249 * to lock and do it here instead so that the statistics don't get too 250 * far out of sync. 251 */ 252 if (!(zone->uz_flags & UMA_ZFLAG_INTERNAL)) { 253 for (cpu = 0; cpu < maxcpu; cpu++) { 254 if (CPU_ABSENT(cpu)) 255 continue; 256 CPU_LOCK(zone, cpu); 257 cache = &zone->uz_cpu[cpu]; 258 /* Add them up, and reset */ 259 alloc += cache->uc_allocs; 260 cache->uc_allocs = 0; 261 if (cache->uc_allocbucket) 262 free += cache->uc_allocbucket->ub_ptr + 1; 263 if (cache->uc_freebucket) 264 free += cache->uc_freebucket->ub_ptr + 1; 265 CPU_UNLOCK(zone, cpu); 266 } 267 } 268 269 /* Now push these stats back into the zone.. */ 270 ZONE_LOCK(zone); 271 zone->uz_allocs += alloc; 272 273 /* 274 * cachefree is an instantanious snapshot of what is in the per cpu 275 * caches, not an accurate counter 276 */ 277 zone->uz_cachefree = free; 278 279 /* 280 * Expand the zone hash table. 281 * 282 * This is done if the number of slabs is larger than the hash size. 283 * What I'm trying to do here is completely reduce collisions. This 284 * may be a little aggressive. Should I allow for two collisions max? 285 */ 286 287 if ((zone->uz_flags & UMA_ZFLAG_OFFPAGE) && 288 !(zone->uz_flags & UMA_ZFLAG_MALLOC)) { 289 if (zone->uz_pages / zone->uz_ppera 290 >= zone->uz_hash.uh_hashsize) { 291 struct slabhead *newhash; 292 int newsize; 293 294 newsize = zone->uz_hash.uh_hashsize; 295 ZONE_UNLOCK(zone); 296 newhash = hash_alloc(&newsize); 297 ZONE_LOCK(zone); 298 hash_expand(&zone->uz_hash, newhash, newsize); 299 } 300 } 301 302 /* 303 * Here we compute the working set size as the total number of items 304 * left outstanding since the last time interval. This is slightly 305 * suboptimal. What we really want is the highest number of outstanding 306 * items during the last time quantum. This should be close enough. 307 * 308 * The working set size is used to throttle the zone_drain function. 309 * We don't want to return memory that we may need again immediately. 310 */ 311 alloc = zone->uz_allocs - zone->uz_oallocs; 312 zone->uz_oallocs = zone->uz_allocs; 313 zone->uz_wssize = alloc; 314 315 ZONE_UNLOCK(zone); 316} 317 318/* 319 * Allocate and zero fill the next sized hash table from the appropriate 320 * backing store. 321 * 322 * Arguments: 323 * oldsize On input it's the size we're currently at and on output 324 * it is the expanded size. 325 * 326 * Returns: 327 * slabhead The new hash bucket or NULL if the allocation failed. 328 */ 329struct slabhead * 330hash_alloc(int *oldsize) 331{ 332 struct slabhead *newhash; 333 int newsize; 334 int alloc; 335 336 /* We're just going to go to a power of two greater */ 337 if (*oldsize) { 338 newsize = (*oldsize) * 2; 339 alloc = sizeof(newhash[0]) * newsize; 340 /* XXX Shouldn't be abusing DEVBUF here */ 341 newhash = (struct slabhead *)malloc(alloc, M_DEVBUF, M_NOWAIT); 342 } else { 343 alloc = sizeof(newhash[0]) * UMA_HASH_SIZE_INIT; 344 newhash = uma_zalloc_internal(hashzone, NULL, M_WAITOK, NULL); 345 newsize = UMA_HASH_SIZE_INIT; 346 } 347 if (newhash) 348 bzero(newhash, alloc); 349 350 *oldsize = newsize; 351 352 return (newhash); 353} 354 355/* 356 * Expands the hash table for OFFPAGE zones. This is done from zone_timeout 357 * to reduce collisions. This must not be done in the regular allocation path, 358 * otherwise, we can recurse on the vm while allocating pages. 359 * 360 * Arguments: 361 * hash The hash you want to expand by a factor of two. 362 * 363 * Returns: 364 * Nothing 365 * 366 * Discussion: 367 */ 368static void 369hash_expand(struct uma_hash *hash, struct slabhead *newhash, int newsize) 370{ 371 struct slabhead *oldhash; 372 uma_slab_t slab; 373 int oldsize; 374 int hval; 375 int i; 376 377 if (!newhash) 378 return; 379 380 oldsize = hash->uh_hashsize; 381 oldhash = hash->uh_slab_hash; 382 383 if (oldsize >= newsize) { 384 hash_free(newhash, newsize); 385 return; 386 } 387 388 hash->uh_hashmask = newsize - 1; 389 390 /* 391 * I need to investigate hash algorithms for resizing without a 392 * full rehash. 393 */ 394 395 for (i = 0; i < oldsize; i++) 396 while (!SLIST_EMPTY(&hash->uh_slab_hash[i])) { 397 slab = SLIST_FIRST(&hash->uh_slab_hash[i]); 398 SLIST_REMOVE_HEAD(&hash->uh_slab_hash[i], us_hlink); 399 hval = UMA_HASH(hash, slab->us_data); 400 SLIST_INSERT_HEAD(&newhash[hval], slab, us_hlink); 401 } 402 403 if (oldhash) 404 hash_free(oldhash, oldsize); 405 406 hash->uh_slab_hash = newhash; 407 hash->uh_hashsize = newsize; 408 409 return; 410} 411 412/* 413 * Free the hash bucket to the appropriate backing store. 414 * 415 * Arguments: 416 * slab_hash The hash bucket we're freeing 417 * hashsize The number of entries in that hash bucket 418 * 419 * Returns: 420 * Nothing 421 */ 422static void 423hash_free(struct slabhead *slab_hash, int hashsize) 424{ 425 if (hashsize == UMA_HASH_SIZE_INIT) 426 uma_zfree_internal(hashzone, 427 slab_hash, NULL, 0); 428 else 429 free(slab_hash, M_DEVBUF); 430} 431 432/* 433 * Frees all outstanding items in a bucket 434 * 435 * Arguments: 436 * zone The zone to free to, must be unlocked. 437 * bucket The free/alloc bucket with items, cpu queue must be locked. 438 * 439 * Returns: 440 * Nothing 441 */ 442 443static void 444bucket_drain(uma_zone_t zone, uma_bucket_t bucket) 445{ 446 uma_slab_t slab; 447 int mzone; 448 void *item; 449 450 if (bucket == NULL) 451 return; 452 453 slab = NULL; 454 mzone = 0; 455 456 /* We have to lookup the slab again for malloc.. */ 457 if (zone->uz_flags & UMA_ZFLAG_MALLOC) 458 mzone = 1; 459 460 while (bucket->ub_ptr > -1) { 461 item = bucket->ub_bucket[bucket->ub_ptr]; 462#ifdef INVARIANTS 463 bucket->ub_bucket[bucket->ub_ptr] = NULL; 464 KASSERT(item != NULL, 465 ("bucket_drain: botched ptr, item is NULL")); 466#endif 467 bucket->ub_ptr--; 468 /* 469 * This is extremely inefficient. The slab pointer was passed 470 * to uma_zfree_arg, but we lost it because the buckets don't 471 * hold them. This will go away when free() gets a size passed 472 * to it. 473 */ 474 if (mzone) 475 slab = hash_sfind(mallochash, 476 (u_int8_t *)((unsigned long)item & 477 (~UMA_SLAB_MASK))); 478 uma_zfree_internal(zone, item, slab, 1); 479 } 480} 481 482/* 483 * Drains the per cpu caches for a zone. 484 * 485 * Arguments: 486 * zone The zone to drain, must be unlocked. 487 * 488 * Returns: 489 * Nothing 490 * 491 * This function returns with the zone locked so that the per cpu queues can 492 * not be filled until zone_drain is finished. 493 * 494 */ 495static void 496cache_drain(uma_zone_t zone) 497{ 498 uma_bucket_t bucket; 499 uma_cache_t cache; 500 int cpu; 501 502 /* 503 * Flush out the per cpu queues. 504 * 505 * XXX This causes unnecessary thrashing due to immediately having 506 * empty per cpu queues. I need to improve this. 507 */ 508 509 /* 510 * We have to lock each cpu cache before locking the zone 511 */ 512 ZONE_UNLOCK(zone); 513 514 for (cpu = 0; cpu < maxcpu; cpu++) { 515 if (CPU_ABSENT(cpu)) 516 continue; 517 CPU_LOCK(zone, cpu); 518 cache = &zone->uz_cpu[cpu]; 519 bucket_drain(zone, cache->uc_allocbucket); 520 bucket_drain(zone, cache->uc_freebucket); 521 } 522 523 /* 524 * Drain the bucket queues and free the buckets, we just keep two per 525 * cpu (alloc/free). 526 */ 527 ZONE_LOCK(zone); 528 while ((bucket = LIST_FIRST(&zone->uz_full_bucket)) != NULL) { 529 LIST_REMOVE(bucket, ub_link); 530 ZONE_UNLOCK(zone); 531 bucket_drain(zone, bucket); 532 uma_zfree_internal(bucketzone, bucket, NULL, 0); 533 ZONE_LOCK(zone); 534 } 535 536 /* Now we do the free queue.. */ 537 while ((bucket = LIST_FIRST(&zone->uz_free_bucket)) != NULL) { 538 LIST_REMOVE(bucket, ub_link); 539 uma_zfree_internal(bucketzone, bucket, NULL, 0); 540 } 541 542 /* We unlock here, but they will all block until the zone is unlocked */ 543 for (cpu = 0; cpu < maxcpu; cpu++) { 544 if (CPU_ABSENT(cpu)) 545 continue; 546 CPU_UNLOCK(zone, cpu); 547 } 548 549 zone->uz_cachefree = 0; 550} 551 552/* 553 * Frees pages from a zone back to the system. This is done on demand from 554 * the pageout daemon. 555 * 556 * Arguments: 557 * zone The zone to free pages from 558 * all Should we drain all items? 559 * 560 * Returns: 561 * Nothing. 562 */ 563static void 564zone_drain(uma_zone_t zone) 565{ 566 uma_slab_t slab; 567 uma_slab_t n; 568 u_int64_t extra; 569 u_int8_t flags; 570 u_int8_t *mem; 571 int i; 572 573 /* 574 * We don't want to take pages from staticly allocated zones at this 575 * time 576 */ 577 if (zone->uz_flags & UMA_ZFLAG_NOFREE || zone->uz_freef == NULL) 578 return; 579 580 ZONE_LOCK(zone); 581 582 if (!(zone->uz_flags & UMA_ZFLAG_INTERNAL)) 583 cache_drain(zone); 584 585 if (zone->uz_free < zone->uz_wssize) 586 goto finished; 587#ifdef UMA_DEBUG 588 printf("%s working set size: %llu free items: %u\n", 589 zone->uz_name, (unsigned long long)zone->uz_wssize, zone->uz_free); 590#endif 591 extra = zone->uz_free - zone->uz_wssize; 592 extra /= zone->uz_ipers; 593 594 /* extra is now the number of extra slabs that we can free */ 595 596 if (extra == 0) 597 goto finished; 598 599 slab = LIST_FIRST(&zone->uz_free_slab); 600 while (slab && extra) { 601 n = LIST_NEXT(slab, us_link); 602 603 /* We have no where to free these to */ 604 if (slab->us_flags & UMA_SLAB_BOOT) { 605 slab = n; 606 continue; 607 } 608 609 LIST_REMOVE(slab, us_link); 610 zone->uz_pages -= zone->uz_ppera; 611 zone->uz_free -= zone->uz_ipers; 612 if (zone->uz_fini) 613 for (i = 0; i < zone->uz_ipers; i++) 614 zone->uz_fini( 615 slab->us_data + (zone->uz_rsize * i), 616 zone->uz_size); 617 flags = slab->us_flags; 618 mem = slab->us_data; 619 if (zone->uz_flags & UMA_ZFLAG_OFFPAGE) { 620 if (zone->uz_flags & UMA_ZFLAG_MALLOC) { 621 UMA_HASH_REMOVE(mallochash, 622 slab, slab->us_data); 623 } else { 624 UMA_HASH_REMOVE(&zone->uz_hash, 625 slab, slab->us_data); 626 } 627 uma_zfree_internal(slabzone, slab, NULL, 0); 628 } else if (zone->uz_flags & UMA_ZFLAG_MALLOC) 629 UMA_HASH_REMOVE(mallochash, slab, slab->us_data); 630#ifdef UMA_DEBUG 631 printf("%s: Returning %d bytes.\n", 632 zone->uz_name, UMA_SLAB_SIZE * zone->uz_ppera); 633#endif 634 zone->uz_freef(mem, UMA_SLAB_SIZE * zone->uz_ppera, flags); 635 636 slab = n; 637 extra--; 638 } 639 640finished: 641 ZONE_UNLOCK(zone); 642} 643 644/* 645 * Allocate a new slab for a zone. This does not insert the slab onto a list. 646 * 647 * Arguments: 648 * zone The zone to allocate slabs for 649 * wait Shall we wait? 650 * 651 * Returns: 652 * The slab that was allocated or NULL if there is no memory and the 653 * caller specified M_NOWAIT. 654 * 655 */ 656static uma_slab_t 657slab_zalloc(uma_zone_t zone, int wait) 658{ 659 uma_slab_t slab; /* Starting slab */ 660 u_int8_t *mem; 661 u_int8_t flags; 662 int i; 663 664 slab = NULL; 665 666#ifdef UMA_DEBUG 667 printf("slab_zalloc: Allocating a new slab for %s\n", zone->uz_name); 668#endif 669 ZONE_UNLOCK(zone); 670 671 if (zone->uz_flags & UMA_ZFLAG_OFFPAGE) { 672 slab = uma_zalloc_internal(slabzone, NULL, wait, NULL); 673 if (slab == NULL) { 674 ZONE_LOCK(zone); 675 return NULL; 676 } 677 } 678 679 if (booted || (zone->uz_flags & UMA_ZFLAG_PRIVALLOC)) { 680 mtx_lock(&Giant); 681 mem = zone->uz_allocf(zone, 682 zone->uz_ppera * UMA_SLAB_SIZE, &flags, wait); 683 mtx_unlock(&Giant); 684 if (mem == NULL) { 685 ZONE_LOCK(zone); 686 return (NULL); 687 } 688 } else { 689 uma_slab_t tmps; 690 691 if (zone->uz_ppera > 1) 692 panic("UMA: Attemping to allocate multiple pages before vm has started.\n"); 693 if (zone->uz_flags & UMA_ZFLAG_MALLOC) 694 panic("Mallocing before uma_startup2 has been called.\n"); 695 if (uma_boot_free == 0) 696 panic("UMA: Ran out of pre init pages, increase UMA_BOOT_PAGES\n"); 697 tmps = LIST_FIRST(&uma_boot_pages); 698 LIST_REMOVE(tmps, us_link); 699 uma_boot_free--; 700 mem = tmps->us_data; 701 } 702 703 ZONE_LOCK(zone); 704 705 /* Alloc slab structure for offpage, otherwise adjust it's position */ 706 if (!(zone->uz_flags & UMA_ZFLAG_OFFPAGE)) { 707 slab = (uma_slab_t )(mem + zone->uz_pgoff); 708 } else { 709 if (!(zone->uz_flags & UMA_ZFLAG_MALLOC)) 710 UMA_HASH_INSERT(&zone->uz_hash, slab, mem); 711 } 712 if (zone->uz_flags & UMA_ZFLAG_MALLOC) { 713#ifdef UMA_DEBUG 714 printf("Inserting %p into malloc hash from slab %p\n", 715 mem, slab); 716#endif 717 /* XXX Yikes! No lock on the malloc hash! */ 718 UMA_HASH_INSERT(mallochash, slab, mem); 719 } 720 721 slab->us_zone = zone; 722 slab->us_data = mem; 723 724 /* 725 * This is intended to spread data out across cache lines. 726 * 727 * This code doesn't seem to work properly on x86, and on alpha 728 * it makes absolutely no performance difference. I'm sure it could 729 * use some tuning, but sun makes outrageous claims about it's 730 * performance. 731 */ 732#if 0 733 if (zone->uz_cachemax) { 734 slab->us_data += zone->uz_cacheoff; 735 zone->uz_cacheoff += UMA_CACHE_INC; 736 if (zone->uz_cacheoff > zone->uz_cachemax) 737 zone->uz_cacheoff = 0; 738 } 739#endif 740 741 slab->us_freecount = zone->uz_ipers; 742 slab->us_firstfree = 0; 743 slab->us_flags = flags; 744 for (i = 0; i < zone->uz_ipers; i++) 745 slab->us_freelist[i] = i+1; 746 747 if (zone->uz_init) 748 for (i = 0; i < zone->uz_ipers; i++) 749 zone->uz_init(slab->us_data + (zone->uz_rsize * i), 750 zone->uz_size); 751 752 zone->uz_pages += zone->uz_ppera; 753 zone->uz_free += zone->uz_ipers; 754 755 return (slab); 756} 757 758/* 759 * Allocates a number of pages from the system 760 * 761 * Arguments: 762 * zone Unused 763 * bytes The number of bytes requested 764 * wait Shall we wait? 765 * 766 * Returns: 767 * A pointer to the alloced memory or possibly 768 * NULL if M_NOWAIT is set. 769 */ 770static void * 771page_alloc(uma_zone_t zone, int bytes, u_int8_t *pflag, int wait) 772{ 773 void *p; /* Returned page */ 774 775 /* 776 * XXX The original zone allocator did this, but I don't think it's 777 * necessary in current. 778 */ 779 780 if (lockstatus(&kernel_map->lock, NULL)) { 781 *pflag = UMA_SLAB_KMEM; 782 p = (void *) kmem_malloc(kmem_map, bytes, wait); 783 } else { 784 *pflag = UMA_SLAB_KMAP; 785 p = (void *) kmem_alloc(kernel_map, bytes); 786 } 787 788 return (p); 789} 790 791/* 792 * Allocates a number of pages from within an object 793 * 794 * Arguments: 795 * zone Unused 796 * bytes The number of bytes requested 797 * wait Shall we wait? 798 * 799 * Returns: 800 * A pointer to the alloced memory or possibly 801 * NULL if M_NOWAIT is set. 802 */ 803static void * 804obj_alloc(uma_zone_t zone, int bytes, u_int8_t *flags, int wait) 805{ 806 vm_offset_t zkva; 807 vm_offset_t retkva; 808 vm_page_t p; 809 int pages; 810 811 retkva = NULL; 812 pages = zone->uz_pages; 813 814 /* 815 * This looks a little weird since we're getting one page at a time 816 */ 817 while (bytes > 0) { 818 p = vm_page_alloc(zone->uz_obj, pages, 819 VM_ALLOC_INTERRUPT); 820 if (p == NULL) 821 return (NULL); 822 823 zkva = zone->uz_kva + pages * PAGE_SIZE; 824 if (retkva == NULL) 825 retkva = zkva; 826 pmap_qenter(zkva, &p, 1); 827 bytes -= PAGE_SIZE; 828 pages += 1; 829 } 830 831 *flags = UMA_SLAB_PRIV; 832 833 return ((void *)retkva); 834} 835 836/* 837 * Frees a number of pages to the system 838 * 839 * Arguments: 840 * mem A pointer to the memory to be freed 841 * size The size of the memory being freed 842 * flags The original p->us_flags field 843 * 844 * Returns: 845 * Nothing 846 * 847 */ 848static void 849page_free(void *mem, int size, u_int8_t flags) 850{ 851 vm_map_t map; 852 if (flags & UMA_SLAB_KMEM) 853 map = kmem_map; 854 else if (flags & UMA_SLAB_KMAP) 855 map = kernel_map; 856 else 857 panic("UMA: page_free used with invalid flags %d\n", flags); 858 859 kmem_free(map, (vm_offset_t)mem, size); 860} 861 862/* 863 * Zero fill initializer 864 * 865 * Arguments/Returns follow uma_init specifications 866 * 867 */ 868static void 869zero_init(void *mem, int size) 870{ 871 bzero(mem, size); 872} 873 874/* 875 * Finish creating a small uma zone. This calculates ipers, and the zone size. 876 * 877 * Arguments 878 * zone The zone we should initialize 879 * 880 * Returns 881 * Nothing 882 */ 883static void 884zone_small_init(uma_zone_t zone) 885{ 886 int rsize; 887 int memused; 888 int ipers; 889 890 rsize = zone->uz_size; 891 892 if (rsize < UMA_SMALLEST_UNIT) 893 rsize = UMA_SMALLEST_UNIT; 894 895 if (rsize & zone->uz_align) 896 rsize = (rsize & ~zone->uz_align) + (zone->uz_align + 1); 897 898 zone->uz_rsize = rsize; 899 900 rsize += 1; /* Account for the byte of linkage */ 901 zone->uz_ipers = (UMA_SLAB_SIZE - sizeof(struct uma_slab)) / rsize; 902 zone->uz_ppera = 1; 903 904 memused = zone->uz_ipers * zone->uz_rsize; 905 906 /* Can we do any better? */ 907 if ((UMA_SLAB_SIZE - memused) >= UMA_MAX_WASTE) { 908 if (zone->uz_flags & UMA_ZFLAG_INTERNAL) 909 return; 910 ipers = UMA_SLAB_SIZE / zone->uz_rsize; 911 if (ipers > zone->uz_ipers) { 912 zone->uz_flags |= UMA_ZFLAG_OFFPAGE; 913 zone->uz_ipers = ipers; 914 } 915 } 916 917} 918 919/* 920 * Finish creating a large (> UMA_SLAB_SIZE) uma zone. Just give in and do 921 * OFFPAGE for now. When I can allow for more dynamic slab sizes this will be 922 * more complicated. 923 * 924 * Arguments 925 * zone The zone we should initialize 926 * 927 * Returns 928 * Nothing 929 */ 930static void 931zone_large_init(uma_zone_t zone) 932{ 933 int pages; 934 935 pages = zone->uz_size / UMA_SLAB_SIZE; 936 937 /* Account for remainder */ 938 if ((pages * UMA_SLAB_SIZE) < zone->uz_size) 939 pages++; 940 941 zone->uz_ppera = pages; 942 zone->uz_ipers = 1; 943 944 zone->uz_flags |= UMA_ZFLAG_OFFPAGE; 945 zone->uz_rsize = zone->uz_size; 946} 947 948/* 949 * Zone header ctor. This initializes all fields, locks, etc. And inserts 950 * the zone onto the global zone list. 951 * 952 * Arguments/Returns follow uma_ctor specifications 953 * udata Actually uma_zcreat_args 954 * 955 */ 956 957static void 958zone_ctor(void *mem, int size, void *udata) 959{ 960 struct uma_zctor_args *arg = udata; 961 uma_zone_t zone = mem; 962 int privlc; 963 int cplen; 964 int cpu; 965 966 bzero(zone, size); 967 zone->uz_name = arg->name; 968 zone->uz_size = arg->size; 969 zone->uz_ctor = arg->ctor; 970 zone->uz_dtor = arg->dtor; 971 zone->uz_init = arg->uminit; 972 zone->uz_align = arg->align; 973 zone->uz_free = 0; 974 zone->uz_pages = 0; 975 zone->uz_flags = 0; 976 zone->uz_allocf = page_alloc; 977 zone->uz_freef = page_free; 978 979 if (arg->flags & UMA_ZONE_ZINIT) 980 zone->uz_init = zero_init; 981 982 if (arg->flags & UMA_ZONE_INTERNAL) 983 zone->uz_flags |= UMA_ZFLAG_INTERNAL; 984 985 if (arg->flags & UMA_ZONE_MALLOC) 986 zone->uz_flags |= UMA_ZFLAG_MALLOC; 987 988 if (arg->flags & UMA_ZONE_NOFREE) 989 zone->uz_flags |= UMA_ZFLAG_NOFREE; 990 991 if (zone->uz_size > UMA_SLAB_SIZE) 992 zone_large_init(zone); 993 else 994 zone_small_init(zone); 995 996 if (arg->flags & UMA_ZONE_MTXCLASS) 997 privlc = 1; 998 else 999 privlc = 0; 1000 1001 /* We do this so that the per cpu lock name is unique for each zone */ 1002 memcpy(zone->uz_lname, "PCPU ", 5); 1003 cplen = min(strlen(zone->uz_name) + 1, LOCKNAME_LEN - 6); 1004 memcpy(zone->uz_lname+5, zone->uz_name, cplen); 1005 zone->uz_lname[LOCKNAME_LEN - 1] = '\0'; 1006 1007 /* 1008 * If we're putting the slab header in the actual page we need to 1009 * figure out where in each page it goes. This calculates a right 1010 * justified offset into the memory on a ALIGN_PTR boundary. 1011 */ 1012 if (!(zone->uz_flags & UMA_ZFLAG_OFFPAGE)) { 1013 int totsize; 1014 int waste; 1015 1016 /* Size of the slab struct and free list */ 1017 totsize = sizeof(struct uma_slab) + zone->uz_ipers; 1018 if (totsize & UMA_ALIGN_PTR) 1019 totsize = (totsize & ~UMA_ALIGN_PTR) + 1020 (UMA_ALIGN_PTR + 1); 1021 zone->uz_pgoff = UMA_SLAB_SIZE - totsize; 1022 1023 waste = zone->uz_pgoff; 1024 waste -= (zone->uz_ipers * zone->uz_rsize); 1025 1026 /* 1027 * This calculates how much space we have for cache line size 1028 * optimizations. It works by offseting each slab slightly. 1029 * Currently it breaks on x86, and so it is disabled. 1030 */ 1031 1032 if (zone->uz_align < UMA_CACHE_INC && waste > UMA_CACHE_INC) { 1033 zone->uz_cachemax = waste - UMA_CACHE_INC; 1034 zone->uz_cacheoff = 0; 1035 } 1036 1037 totsize = zone->uz_pgoff + sizeof(struct uma_slab) 1038 + zone->uz_ipers; 1039 /* I don't think it's possible, but I'll make sure anyway */ 1040 if (totsize > UMA_SLAB_SIZE) { 1041 printf("zone %s ipers %d rsize %d size %d\n", 1042 zone->uz_name, zone->uz_ipers, zone->uz_rsize, 1043 zone->uz_size); 1044 panic("UMA slab won't fit.\n"); 1045 } 1046 } else { 1047 struct slabhead *newhash; 1048 int hashsize; 1049 1050 hashsize = 0; 1051 newhash = hash_alloc(&hashsize); 1052 hash_expand(&zone->uz_hash, newhash, hashsize); 1053 zone->uz_pgoff = 0; 1054 } 1055 1056#ifdef UMA_DEBUG 1057 printf("%s(%p) size = %d ipers = %d ppera = %d pgoff = %d\n", 1058 zone->uz_name, zone, 1059 zone->uz_size, zone->uz_ipers, 1060 zone->uz_ppera, zone->uz_pgoff); 1061#endif 1062 ZONE_LOCK_INIT(zone, privlc); 1063 1064 mtx_lock(&uma_mtx); 1065 LIST_INSERT_HEAD(&uma_zones, zone, uz_link); 1066 mtx_unlock(&uma_mtx); 1067 1068 /* 1069 * Some internal zones don't have room allocated for the per cpu 1070 * caches. If we're internal, bail out here. 1071 */ 1072 1073 if (zone->uz_flags & UMA_ZFLAG_INTERNAL) 1074 return; 1075 1076 if (zone->uz_ipers < UMA_BUCKET_SIZE) 1077 zone->uz_count = zone->uz_ipers - 1; 1078 else 1079 zone->uz_count = UMA_BUCKET_SIZE - 1; 1080 1081 for (cpu = 0; cpu < maxcpu; cpu++) 1082 CPU_LOCK_INIT(zone, cpu, privlc); 1083} 1084 1085/* 1086 * Zone header dtor. This frees all data, destroys locks, frees the hash table 1087 * and removes the zone from the global list. 1088 * 1089 * Arguments/Returns follow uma_dtor specifications 1090 * udata unused 1091 */ 1092 1093static void 1094zone_dtor(void *arg, int size, void *udata) 1095{ 1096 uma_zone_t zone; 1097 int cpu; 1098 1099 zone = (uma_zone_t)arg; 1100 1101 mtx_lock(&uma_mtx); 1102 LIST_REMOVE(zone, uz_link); 1103 mtx_unlock(&uma_mtx); 1104 1105 ZONE_LOCK(zone); 1106 zone->uz_wssize = 0; 1107 ZONE_UNLOCK(zone); 1108 1109 zone_drain(zone); 1110 ZONE_LOCK(zone); 1111 if (zone->uz_free != 0) 1112 printf("Zone %s was not empty. Lost %d pages of memory.\n", 1113 zone->uz_name, zone->uz_pages); 1114 1115 if ((zone->uz_flags & UMA_ZFLAG_INTERNAL) != 0) 1116 for (cpu = 0; cpu < maxcpu; cpu++) 1117 CPU_LOCK_FINI(zone, cpu); 1118 1119 if ((zone->uz_flags & UMA_ZFLAG_OFFPAGE) != 0) 1120 hash_free(zone->uz_hash.uh_slab_hash, 1121 zone->uz_hash.uh_hashsize); 1122 1123 ZONE_UNLOCK(zone); 1124 ZONE_LOCK_FINI(zone); 1125} 1126/* 1127 * Traverses every zone in the system and calls a callback 1128 * 1129 * Arguments: 1130 * zfunc A pointer to a function which accepts a zone 1131 * as an argument. 1132 * 1133 * Returns: 1134 * Nothing 1135 */ 1136static void 1137zone_foreach(void (*zfunc)(uma_zone_t)) 1138{ 1139 uma_zone_t zone; 1140 1141 mtx_lock(&uma_mtx); 1142 LIST_FOREACH(zone, &uma_zones, uz_link) { 1143 zfunc(zone); 1144 } 1145 mtx_unlock(&uma_mtx); 1146} 1147 1148/* Public functions */ 1149/* See uma.h */ 1150void 1151uma_startup(void *bootmem) 1152{ 1153 struct uma_zctor_args args; 1154 uma_slab_t slab; 1155 int slabsize; 1156 int i; 1157 1158#ifdef UMA_DEBUG 1159 printf("Creating uma zone headers zone.\n"); 1160#endif 1161#ifdef SMP 1162 maxcpu = mp_maxid + 1; 1163#else 1164 maxcpu = 1; 1165#endif 1166#ifdef UMA_DEBUG 1167 printf("Max cpu = %d, mp_maxid = %d\n", maxcpu, mp_maxid); 1168 Debugger("stop"); 1169#endif 1170 mtx_init(&uma_mtx, "UMA lock", NULL, MTX_DEF); 1171 /* "manually" Create the initial zone */ 1172 args.name = "UMA Zones"; 1173 args.size = sizeof(struct uma_zone) + 1174 (sizeof(struct uma_cache) * (maxcpu - 1)); 1175 args.ctor = zone_ctor; 1176 args.dtor = zone_dtor; 1177 args.uminit = zero_init; 1178 args.fini = NULL; 1179 args.align = 32 - 1; 1180 args.flags = UMA_ZONE_INTERNAL; 1181 /* The initial zone has no Per cpu queues so it's smaller */ 1182 zone_ctor(zones, sizeof(struct uma_zone), &args); 1183 1184#ifdef UMA_DEBUG 1185 printf("Filling boot free list.\n"); 1186#endif 1187 for (i = 0; i < UMA_BOOT_PAGES; i++) { 1188 slab = (uma_slab_t)((u_int8_t *)bootmem + (i * UMA_SLAB_SIZE)); 1189 slab->us_data = (u_int8_t *)slab; 1190 slab->us_flags = UMA_SLAB_BOOT; 1191 LIST_INSERT_HEAD(&uma_boot_pages, slab, us_link); 1192 uma_boot_free++; 1193 } 1194 1195#ifdef UMA_DEBUG 1196 printf("Creating slab zone.\n"); 1197#endif 1198 1199 /* 1200 * This is the max number of free list items we'll have with 1201 * offpage slabs. 1202 */ 1203 1204 slabsize = UMA_SLAB_SIZE - sizeof(struct uma_slab); 1205 slabsize /= UMA_MAX_WASTE; 1206 slabsize++; /* In case there it's rounded */ 1207 slabsize += sizeof(struct uma_slab); 1208 1209 /* Now make a zone for slab headers */ 1210 slabzone = uma_zcreate("UMA Slabs", 1211 slabsize, 1212 NULL, NULL, NULL, NULL, 1213 UMA_ALIGN_PTR, UMA_ZONE_INTERNAL); 1214 1215 hashzone = uma_zcreate("UMA Hash", 1216 sizeof(struct slabhead *) * UMA_HASH_SIZE_INIT, 1217 NULL, NULL, NULL, NULL, 1218 UMA_ALIGN_PTR, UMA_ZONE_INTERNAL); 1219 1220 bucketzone = uma_zcreate("UMA Buckets", sizeof(struct uma_bucket), 1221 NULL, NULL, NULL, NULL, 1222 UMA_ALIGN_PTR, UMA_ZONE_INTERNAL); 1223 1224 1225#ifdef UMA_DEBUG 1226 printf("UMA startup complete.\n"); 1227#endif 1228} 1229 1230/* see uma.h */ 1231void 1232uma_startup2(void *hashmem, u_long elems) 1233{ 1234 bzero(hashmem, elems * sizeof(void *)); 1235 mallochash->uh_slab_hash = hashmem; 1236 mallochash->uh_hashsize = elems; 1237 mallochash->uh_hashmask = elems - 1; 1238 booted = 1; 1239 bucket_enable(); 1240#ifdef UMA_DEBUG 1241 printf("UMA startup2 complete.\n"); 1242#endif 1243} 1244 1245/* 1246 * Initialize our callout handle 1247 * 1248 */ 1249 1250static void 1251uma_startup3(void) 1252{ 1253#ifdef UMA_DEBUG 1254 printf("Starting callout.\n"); 1255#endif 1256 callout_init(&uma_callout, 0); 1257 callout_reset(&uma_callout, UMA_WORKING_TIME * hz, uma_timeout, NULL); 1258#ifdef UMA_DEBUG 1259 printf("UMA startup3 complete.\n"); 1260#endif 1261} 1262 1263/* See uma.h */ 1264uma_zone_t 1265uma_zcreate(char *name, int size, uma_ctor ctor, uma_dtor dtor, uma_init uminit, 1266 uma_fini fini, int align, u_int16_t flags) 1267 1268{ 1269 struct uma_zctor_args args; 1270 1271 /* This stuff is essential for the zone ctor */ 1272 args.name = name; 1273 args.size = size; 1274 args.ctor = ctor; 1275 args.dtor = dtor; 1276 args.uminit = uminit; 1277 args.fini = fini; 1278 args.align = align; 1279 args.flags = flags; 1280 1281 return (uma_zalloc_internal(zones, &args, M_WAITOK, NULL)); 1282} 1283 1284/* See uma.h */ 1285void 1286uma_zdestroy(uma_zone_t zone) 1287{ 1288 uma_zfree_internal(zones, zone, NULL, 0); 1289} 1290 1291/* See uma.h */ 1292void *
| 27 * 28 */ 29 30/* 31 * uma_core.c Implementation of the Universal Memory allocator 32 * 33 * This allocator is intended to replace the multitude of similar object caches 34 * in the standard FreeBSD kernel. The intent is to be flexible as well as 35 * effecient. A primary design goal is to return unused memory to the rest of 36 * the system. This will make the system as a whole more flexible due to the 37 * ability to move memory to subsystems which most need it instead of leaving 38 * pools of reserved memory unused. 39 * 40 * The basic ideas stem from similar slab/zone based allocators whose algorithms 41 * are well known. 42 * 43 */ 44 45/* 46 * TODO: 47 * - Improve memory usage for large allocations 48 * - Improve INVARIANTS (0xdeadc0de write out) 49 * - Investigate cache size adjustments 50 */ 51 52/* I should really use ktr.. */ 53/* 54#define UMA_DEBUG 1 55#define UMA_DEBUG_ALLOC 1 56#define UMA_DEBUG_ALLOC_1 1 57*/ 58 59 60#include "opt_param.h" 61#include <sys/param.h> 62#include <sys/systm.h> 63#include <sys/kernel.h> 64#include <sys/types.h> 65#include <sys/queue.h> 66#include <sys/malloc.h> 67#include <sys/lock.h> 68#include <sys/sysctl.h> 69#include <sys/mutex.h> 70#include <sys/smp.h> 71#include <sys/vmmeter.h> 72 73#include <machine/types.h> 74 75#include <vm/vm.h> 76#include <vm/vm_object.h> 77#include <vm/vm_page.h> 78#include <vm/vm_param.h> 79#include <vm/vm_map.h> 80#include <vm/vm_kern.h> 81#include <vm/vm_extern.h> 82#include <vm/uma.h> 83#include <vm/uma_int.h> 84 85/* 86 * This is the zone from which all zones are spawned. The idea is that even 87 * the zone heads are allocated from the allocator, so we use the bss section 88 * to bootstrap us. 89 */ 90static struct uma_zone masterzone; 91static uma_zone_t zones = &masterzone; 92 93/* This is the zone from which all of uma_slab_t's are allocated. */ 94static uma_zone_t slabzone; 95 96/* 97 * The initial hash tables come out of this zone so they can be allocated 98 * prior to malloc coming up. 99 */ 100static uma_zone_t hashzone; 101 102/* 103 * Zone that buckets come from. 104 */ 105static uma_zone_t bucketzone; 106 107/* 108 * Are we allowed to allocate buckets? 109 */ 110static int bucketdisable = 1; 111 112/* Linked list of all zones in the system */ 113static LIST_HEAD(,uma_zone) uma_zones = LIST_HEAD_INITIALIZER(&uma_zones); 114 115/* This mutex protects the zone list */ 116static struct mtx uma_mtx; 117 118/* Linked list of boot time pages */ 119static LIST_HEAD(,uma_slab) uma_boot_pages = 120 LIST_HEAD_INITIALIZER(&uma_boot_pages); 121 122/* Count of free boottime pages */ 123static int uma_boot_free = 0; 124 125/* Is the VM done starting up? */ 126static int booted = 0; 127 128/* This is the handle used to schedule our working set calculator */ 129static struct callout uma_callout; 130 131/* This is mp_maxid + 1, for use while looping over each cpu */ 132static int maxcpu; 133 134/* 135 * This structure is passed as the zone ctor arg so that I don't have to create 136 * a special allocation function just for zones. 137 */ 138struct uma_zctor_args { 139 char *name; 140 int size; 141 uma_ctor ctor; 142 uma_dtor dtor; 143 uma_init uminit; 144 uma_fini fini; 145 int align; 146 u_int16_t flags; 147}; 148 149/* 150 * This is the malloc hash table which is used to find the zone that a 151 * malloc allocation came from. It is not currently resizeable. The 152 * memory for the actual hash bucket is allocated in kmeminit. 153 */ 154struct uma_hash mhash; 155struct uma_hash *mallochash = &mhash; 156 157/* Prototypes.. */ 158 159static void *obj_alloc(uma_zone_t, int, u_int8_t *, int); 160static void *page_alloc(uma_zone_t, int, u_int8_t *, int); 161static void page_free(void *, int, u_int8_t); 162static uma_slab_t slab_zalloc(uma_zone_t, int); 163static void cache_drain(uma_zone_t); 164static void bucket_drain(uma_zone_t, uma_bucket_t); 165static void zone_drain(uma_zone_t); 166static void zone_ctor(void *, int, void *); 167static void zone_dtor(void *, int, void *); 168static void zero_init(void *, int); 169static void zone_small_init(uma_zone_t zone); 170static void zone_large_init(uma_zone_t zone); 171static void zone_foreach(void (*zfunc)(uma_zone_t)); 172static void zone_timeout(uma_zone_t zone); 173static struct slabhead *hash_alloc(int *); 174static void hash_expand(struct uma_hash *, struct slabhead *, int); 175static void hash_free(struct slabhead *hash, int hashsize); 176static void uma_timeout(void *); 177static void uma_startup3(void); 178static void *uma_zalloc_internal(uma_zone_t, void *, int, uma_bucket_t); 179static void uma_zfree_internal(uma_zone_t, void *, void *, int); 180static void bucket_enable(void); 181void uma_print_zone(uma_zone_t); 182void uma_print_stats(void); 183static int sysctl_vm_zone(SYSCTL_HANDLER_ARGS); 184 185SYSCTL_OID(_vm, OID_AUTO, zone, CTLTYPE_STRING|CTLFLAG_RD, 186 NULL, 0, sysctl_vm_zone, "A", "Zone Info"); 187SYSINIT(uma_startup3, SI_SUB_VM_CONF, SI_ORDER_SECOND, uma_startup3, NULL); 188 189/* 190 * This routine checks to see whether or not it's safe to enable buckets. 191 */ 192 193static void 194bucket_enable(void) 195{ 196 if (cnt.v_free_count < cnt.v_free_min) 197 bucketdisable = 1; 198 else 199 bucketdisable = 0; 200} 201 202 203/* 204 * Routine called by timeout which is used to fire off some time interval 205 * based calculations. (working set, stats, etc.) 206 * 207 * Arguments: 208 * arg Unused 209 * 210 * Returns: 211 * Nothing 212 */ 213static void 214uma_timeout(void *unused) 215{ 216 bucket_enable(); 217 zone_foreach(zone_timeout); 218 219 /* Reschedule this event */ 220 callout_reset(&uma_callout, UMA_WORKING_TIME * hz, uma_timeout, NULL); 221} 222 223/* 224 * Routine to perform timeout driven calculations. This does the working set 225 * as well as hash expanding, and per cpu statistics aggregation. 226 * 227 * Arguments: 228 * zone The zone to operate on 229 * 230 * Returns: 231 * Nothing 232 */ 233static void 234zone_timeout(uma_zone_t zone) 235{ 236 uma_cache_t cache; 237 u_int64_t alloc; 238 int free; 239 int cpu; 240 241 alloc = 0; 242 free = 0; 243 244 /* 245 * Aggregate per cpu cache statistics back to the zone. 246 * 247 * I may rewrite this to set a flag in the per cpu cache instead of 248 * locking. If the flag is not cleared on the next round I will have 249 * to lock and do it here instead so that the statistics don't get too 250 * far out of sync. 251 */ 252 if (!(zone->uz_flags & UMA_ZFLAG_INTERNAL)) { 253 for (cpu = 0; cpu < maxcpu; cpu++) { 254 if (CPU_ABSENT(cpu)) 255 continue; 256 CPU_LOCK(zone, cpu); 257 cache = &zone->uz_cpu[cpu]; 258 /* Add them up, and reset */ 259 alloc += cache->uc_allocs; 260 cache->uc_allocs = 0; 261 if (cache->uc_allocbucket) 262 free += cache->uc_allocbucket->ub_ptr + 1; 263 if (cache->uc_freebucket) 264 free += cache->uc_freebucket->ub_ptr + 1; 265 CPU_UNLOCK(zone, cpu); 266 } 267 } 268 269 /* Now push these stats back into the zone.. */ 270 ZONE_LOCK(zone); 271 zone->uz_allocs += alloc; 272 273 /* 274 * cachefree is an instantanious snapshot of what is in the per cpu 275 * caches, not an accurate counter 276 */ 277 zone->uz_cachefree = free; 278 279 /* 280 * Expand the zone hash table. 281 * 282 * This is done if the number of slabs is larger than the hash size. 283 * What I'm trying to do here is completely reduce collisions. This 284 * may be a little aggressive. Should I allow for two collisions max? 285 */ 286 287 if ((zone->uz_flags & UMA_ZFLAG_OFFPAGE) && 288 !(zone->uz_flags & UMA_ZFLAG_MALLOC)) { 289 if (zone->uz_pages / zone->uz_ppera 290 >= zone->uz_hash.uh_hashsize) { 291 struct slabhead *newhash; 292 int newsize; 293 294 newsize = zone->uz_hash.uh_hashsize; 295 ZONE_UNLOCK(zone); 296 newhash = hash_alloc(&newsize); 297 ZONE_LOCK(zone); 298 hash_expand(&zone->uz_hash, newhash, newsize); 299 } 300 } 301 302 /* 303 * Here we compute the working set size as the total number of items 304 * left outstanding since the last time interval. This is slightly 305 * suboptimal. What we really want is the highest number of outstanding 306 * items during the last time quantum. This should be close enough. 307 * 308 * The working set size is used to throttle the zone_drain function. 309 * We don't want to return memory that we may need again immediately. 310 */ 311 alloc = zone->uz_allocs - zone->uz_oallocs; 312 zone->uz_oallocs = zone->uz_allocs; 313 zone->uz_wssize = alloc; 314 315 ZONE_UNLOCK(zone); 316} 317 318/* 319 * Allocate and zero fill the next sized hash table from the appropriate 320 * backing store. 321 * 322 * Arguments: 323 * oldsize On input it's the size we're currently at and on output 324 * it is the expanded size. 325 * 326 * Returns: 327 * slabhead The new hash bucket or NULL if the allocation failed. 328 */ 329struct slabhead * 330hash_alloc(int *oldsize) 331{ 332 struct slabhead *newhash; 333 int newsize; 334 int alloc; 335 336 /* We're just going to go to a power of two greater */ 337 if (*oldsize) { 338 newsize = (*oldsize) * 2; 339 alloc = sizeof(newhash[0]) * newsize; 340 /* XXX Shouldn't be abusing DEVBUF here */ 341 newhash = (struct slabhead *)malloc(alloc, M_DEVBUF, M_NOWAIT); 342 } else { 343 alloc = sizeof(newhash[0]) * UMA_HASH_SIZE_INIT; 344 newhash = uma_zalloc_internal(hashzone, NULL, M_WAITOK, NULL); 345 newsize = UMA_HASH_SIZE_INIT; 346 } 347 if (newhash) 348 bzero(newhash, alloc); 349 350 *oldsize = newsize; 351 352 return (newhash); 353} 354 355/* 356 * Expands the hash table for OFFPAGE zones. This is done from zone_timeout 357 * to reduce collisions. This must not be done in the regular allocation path, 358 * otherwise, we can recurse on the vm while allocating pages. 359 * 360 * Arguments: 361 * hash The hash you want to expand by a factor of two. 362 * 363 * Returns: 364 * Nothing 365 * 366 * Discussion: 367 */ 368static void 369hash_expand(struct uma_hash *hash, struct slabhead *newhash, int newsize) 370{ 371 struct slabhead *oldhash; 372 uma_slab_t slab; 373 int oldsize; 374 int hval; 375 int i; 376 377 if (!newhash) 378 return; 379 380 oldsize = hash->uh_hashsize; 381 oldhash = hash->uh_slab_hash; 382 383 if (oldsize >= newsize) { 384 hash_free(newhash, newsize); 385 return; 386 } 387 388 hash->uh_hashmask = newsize - 1; 389 390 /* 391 * I need to investigate hash algorithms for resizing without a 392 * full rehash. 393 */ 394 395 for (i = 0; i < oldsize; i++) 396 while (!SLIST_EMPTY(&hash->uh_slab_hash[i])) { 397 slab = SLIST_FIRST(&hash->uh_slab_hash[i]); 398 SLIST_REMOVE_HEAD(&hash->uh_slab_hash[i], us_hlink); 399 hval = UMA_HASH(hash, slab->us_data); 400 SLIST_INSERT_HEAD(&newhash[hval], slab, us_hlink); 401 } 402 403 if (oldhash) 404 hash_free(oldhash, oldsize); 405 406 hash->uh_slab_hash = newhash; 407 hash->uh_hashsize = newsize; 408 409 return; 410} 411 412/* 413 * Free the hash bucket to the appropriate backing store. 414 * 415 * Arguments: 416 * slab_hash The hash bucket we're freeing 417 * hashsize The number of entries in that hash bucket 418 * 419 * Returns: 420 * Nothing 421 */ 422static void 423hash_free(struct slabhead *slab_hash, int hashsize) 424{ 425 if (hashsize == UMA_HASH_SIZE_INIT) 426 uma_zfree_internal(hashzone, 427 slab_hash, NULL, 0); 428 else 429 free(slab_hash, M_DEVBUF); 430} 431 432/* 433 * Frees all outstanding items in a bucket 434 * 435 * Arguments: 436 * zone The zone to free to, must be unlocked. 437 * bucket The free/alloc bucket with items, cpu queue must be locked. 438 * 439 * Returns: 440 * Nothing 441 */ 442 443static void 444bucket_drain(uma_zone_t zone, uma_bucket_t bucket) 445{ 446 uma_slab_t slab; 447 int mzone; 448 void *item; 449 450 if (bucket == NULL) 451 return; 452 453 slab = NULL; 454 mzone = 0; 455 456 /* We have to lookup the slab again for malloc.. */ 457 if (zone->uz_flags & UMA_ZFLAG_MALLOC) 458 mzone = 1; 459 460 while (bucket->ub_ptr > -1) { 461 item = bucket->ub_bucket[bucket->ub_ptr]; 462#ifdef INVARIANTS 463 bucket->ub_bucket[bucket->ub_ptr] = NULL; 464 KASSERT(item != NULL, 465 ("bucket_drain: botched ptr, item is NULL")); 466#endif 467 bucket->ub_ptr--; 468 /* 469 * This is extremely inefficient. The slab pointer was passed 470 * to uma_zfree_arg, but we lost it because the buckets don't 471 * hold them. This will go away when free() gets a size passed 472 * to it. 473 */ 474 if (mzone) 475 slab = hash_sfind(mallochash, 476 (u_int8_t *)((unsigned long)item & 477 (~UMA_SLAB_MASK))); 478 uma_zfree_internal(zone, item, slab, 1); 479 } 480} 481 482/* 483 * Drains the per cpu caches for a zone. 484 * 485 * Arguments: 486 * zone The zone to drain, must be unlocked. 487 * 488 * Returns: 489 * Nothing 490 * 491 * This function returns with the zone locked so that the per cpu queues can 492 * not be filled until zone_drain is finished. 493 * 494 */ 495static void 496cache_drain(uma_zone_t zone) 497{ 498 uma_bucket_t bucket; 499 uma_cache_t cache; 500 int cpu; 501 502 /* 503 * Flush out the per cpu queues. 504 * 505 * XXX This causes unnecessary thrashing due to immediately having 506 * empty per cpu queues. I need to improve this. 507 */ 508 509 /* 510 * We have to lock each cpu cache before locking the zone 511 */ 512 ZONE_UNLOCK(zone); 513 514 for (cpu = 0; cpu < maxcpu; cpu++) { 515 if (CPU_ABSENT(cpu)) 516 continue; 517 CPU_LOCK(zone, cpu); 518 cache = &zone->uz_cpu[cpu]; 519 bucket_drain(zone, cache->uc_allocbucket); 520 bucket_drain(zone, cache->uc_freebucket); 521 } 522 523 /* 524 * Drain the bucket queues and free the buckets, we just keep two per 525 * cpu (alloc/free). 526 */ 527 ZONE_LOCK(zone); 528 while ((bucket = LIST_FIRST(&zone->uz_full_bucket)) != NULL) { 529 LIST_REMOVE(bucket, ub_link); 530 ZONE_UNLOCK(zone); 531 bucket_drain(zone, bucket); 532 uma_zfree_internal(bucketzone, bucket, NULL, 0); 533 ZONE_LOCK(zone); 534 } 535 536 /* Now we do the free queue.. */ 537 while ((bucket = LIST_FIRST(&zone->uz_free_bucket)) != NULL) { 538 LIST_REMOVE(bucket, ub_link); 539 uma_zfree_internal(bucketzone, bucket, NULL, 0); 540 } 541 542 /* We unlock here, but they will all block until the zone is unlocked */ 543 for (cpu = 0; cpu < maxcpu; cpu++) { 544 if (CPU_ABSENT(cpu)) 545 continue; 546 CPU_UNLOCK(zone, cpu); 547 } 548 549 zone->uz_cachefree = 0; 550} 551 552/* 553 * Frees pages from a zone back to the system. This is done on demand from 554 * the pageout daemon. 555 * 556 * Arguments: 557 * zone The zone to free pages from 558 * all Should we drain all items? 559 * 560 * Returns: 561 * Nothing. 562 */ 563static void 564zone_drain(uma_zone_t zone) 565{ 566 uma_slab_t slab; 567 uma_slab_t n; 568 u_int64_t extra; 569 u_int8_t flags; 570 u_int8_t *mem; 571 int i; 572 573 /* 574 * We don't want to take pages from staticly allocated zones at this 575 * time 576 */ 577 if (zone->uz_flags & UMA_ZFLAG_NOFREE || zone->uz_freef == NULL) 578 return; 579 580 ZONE_LOCK(zone); 581 582 if (!(zone->uz_flags & UMA_ZFLAG_INTERNAL)) 583 cache_drain(zone); 584 585 if (zone->uz_free < zone->uz_wssize) 586 goto finished; 587#ifdef UMA_DEBUG 588 printf("%s working set size: %llu free items: %u\n", 589 zone->uz_name, (unsigned long long)zone->uz_wssize, zone->uz_free); 590#endif 591 extra = zone->uz_free - zone->uz_wssize; 592 extra /= zone->uz_ipers; 593 594 /* extra is now the number of extra slabs that we can free */ 595 596 if (extra == 0) 597 goto finished; 598 599 slab = LIST_FIRST(&zone->uz_free_slab); 600 while (slab && extra) { 601 n = LIST_NEXT(slab, us_link); 602 603 /* We have no where to free these to */ 604 if (slab->us_flags & UMA_SLAB_BOOT) { 605 slab = n; 606 continue; 607 } 608 609 LIST_REMOVE(slab, us_link); 610 zone->uz_pages -= zone->uz_ppera; 611 zone->uz_free -= zone->uz_ipers; 612 if (zone->uz_fini) 613 for (i = 0; i < zone->uz_ipers; i++) 614 zone->uz_fini( 615 slab->us_data + (zone->uz_rsize * i), 616 zone->uz_size); 617 flags = slab->us_flags; 618 mem = slab->us_data; 619 if (zone->uz_flags & UMA_ZFLAG_OFFPAGE) { 620 if (zone->uz_flags & UMA_ZFLAG_MALLOC) { 621 UMA_HASH_REMOVE(mallochash, 622 slab, slab->us_data); 623 } else { 624 UMA_HASH_REMOVE(&zone->uz_hash, 625 slab, slab->us_data); 626 } 627 uma_zfree_internal(slabzone, slab, NULL, 0); 628 } else if (zone->uz_flags & UMA_ZFLAG_MALLOC) 629 UMA_HASH_REMOVE(mallochash, slab, slab->us_data); 630#ifdef UMA_DEBUG 631 printf("%s: Returning %d bytes.\n", 632 zone->uz_name, UMA_SLAB_SIZE * zone->uz_ppera); 633#endif 634 zone->uz_freef(mem, UMA_SLAB_SIZE * zone->uz_ppera, flags); 635 636 slab = n; 637 extra--; 638 } 639 640finished: 641 ZONE_UNLOCK(zone); 642} 643 644/* 645 * Allocate a new slab for a zone. This does not insert the slab onto a list. 646 * 647 * Arguments: 648 * zone The zone to allocate slabs for 649 * wait Shall we wait? 650 * 651 * Returns: 652 * The slab that was allocated or NULL if there is no memory and the 653 * caller specified M_NOWAIT. 654 * 655 */ 656static uma_slab_t 657slab_zalloc(uma_zone_t zone, int wait) 658{ 659 uma_slab_t slab; /* Starting slab */ 660 u_int8_t *mem; 661 u_int8_t flags; 662 int i; 663 664 slab = NULL; 665 666#ifdef UMA_DEBUG 667 printf("slab_zalloc: Allocating a new slab for %s\n", zone->uz_name); 668#endif 669 ZONE_UNLOCK(zone); 670 671 if (zone->uz_flags & UMA_ZFLAG_OFFPAGE) { 672 slab = uma_zalloc_internal(slabzone, NULL, wait, NULL); 673 if (slab == NULL) { 674 ZONE_LOCK(zone); 675 return NULL; 676 } 677 } 678 679 if (booted || (zone->uz_flags & UMA_ZFLAG_PRIVALLOC)) { 680 mtx_lock(&Giant); 681 mem = zone->uz_allocf(zone, 682 zone->uz_ppera * UMA_SLAB_SIZE, &flags, wait); 683 mtx_unlock(&Giant); 684 if (mem == NULL) { 685 ZONE_LOCK(zone); 686 return (NULL); 687 } 688 } else { 689 uma_slab_t tmps; 690 691 if (zone->uz_ppera > 1) 692 panic("UMA: Attemping to allocate multiple pages before vm has started.\n"); 693 if (zone->uz_flags & UMA_ZFLAG_MALLOC) 694 panic("Mallocing before uma_startup2 has been called.\n"); 695 if (uma_boot_free == 0) 696 panic("UMA: Ran out of pre init pages, increase UMA_BOOT_PAGES\n"); 697 tmps = LIST_FIRST(&uma_boot_pages); 698 LIST_REMOVE(tmps, us_link); 699 uma_boot_free--; 700 mem = tmps->us_data; 701 } 702 703 ZONE_LOCK(zone); 704 705 /* Alloc slab structure for offpage, otherwise adjust it's position */ 706 if (!(zone->uz_flags & UMA_ZFLAG_OFFPAGE)) { 707 slab = (uma_slab_t )(mem + zone->uz_pgoff); 708 } else { 709 if (!(zone->uz_flags & UMA_ZFLAG_MALLOC)) 710 UMA_HASH_INSERT(&zone->uz_hash, slab, mem); 711 } 712 if (zone->uz_flags & UMA_ZFLAG_MALLOC) { 713#ifdef UMA_DEBUG 714 printf("Inserting %p into malloc hash from slab %p\n", 715 mem, slab); 716#endif 717 /* XXX Yikes! No lock on the malloc hash! */ 718 UMA_HASH_INSERT(mallochash, slab, mem); 719 } 720 721 slab->us_zone = zone; 722 slab->us_data = mem; 723 724 /* 725 * This is intended to spread data out across cache lines. 726 * 727 * This code doesn't seem to work properly on x86, and on alpha 728 * it makes absolutely no performance difference. I'm sure it could 729 * use some tuning, but sun makes outrageous claims about it's 730 * performance. 731 */ 732#if 0 733 if (zone->uz_cachemax) { 734 slab->us_data += zone->uz_cacheoff; 735 zone->uz_cacheoff += UMA_CACHE_INC; 736 if (zone->uz_cacheoff > zone->uz_cachemax) 737 zone->uz_cacheoff = 0; 738 } 739#endif 740 741 slab->us_freecount = zone->uz_ipers; 742 slab->us_firstfree = 0; 743 slab->us_flags = flags; 744 for (i = 0; i < zone->uz_ipers; i++) 745 slab->us_freelist[i] = i+1; 746 747 if (zone->uz_init) 748 for (i = 0; i < zone->uz_ipers; i++) 749 zone->uz_init(slab->us_data + (zone->uz_rsize * i), 750 zone->uz_size); 751 752 zone->uz_pages += zone->uz_ppera; 753 zone->uz_free += zone->uz_ipers; 754 755 return (slab); 756} 757 758/* 759 * Allocates a number of pages from the system 760 * 761 * Arguments: 762 * zone Unused 763 * bytes The number of bytes requested 764 * wait Shall we wait? 765 * 766 * Returns: 767 * A pointer to the alloced memory or possibly 768 * NULL if M_NOWAIT is set. 769 */ 770static void * 771page_alloc(uma_zone_t zone, int bytes, u_int8_t *pflag, int wait) 772{ 773 void *p; /* Returned page */ 774 775 /* 776 * XXX The original zone allocator did this, but I don't think it's 777 * necessary in current. 778 */ 779 780 if (lockstatus(&kernel_map->lock, NULL)) { 781 *pflag = UMA_SLAB_KMEM; 782 p = (void *) kmem_malloc(kmem_map, bytes, wait); 783 } else { 784 *pflag = UMA_SLAB_KMAP; 785 p = (void *) kmem_alloc(kernel_map, bytes); 786 } 787 788 return (p); 789} 790 791/* 792 * Allocates a number of pages from within an object 793 * 794 * Arguments: 795 * zone Unused 796 * bytes The number of bytes requested 797 * wait Shall we wait? 798 * 799 * Returns: 800 * A pointer to the alloced memory or possibly 801 * NULL if M_NOWAIT is set. 802 */ 803static void * 804obj_alloc(uma_zone_t zone, int bytes, u_int8_t *flags, int wait) 805{ 806 vm_offset_t zkva; 807 vm_offset_t retkva; 808 vm_page_t p; 809 int pages; 810 811 retkva = NULL; 812 pages = zone->uz_pages; 813 814 /* 815 * This looks a little weird since we're getting one page at a time 816 */ 817 while (bytes > 0) { 818 p = vm_page_alloc(zone->uz_obj, pages, 819 VM_ALLOC_INTERRUPT); 820 if (p == NULL) 821 return (NULL); 822 823 zkva = zone->uz_kva + pages * PAGE_SIZE; 824 if (retkva == NULL) 825 retkva = zkva; 826 pmap_qenter(zkva, &p, 1); 827 bytes -= PAGE_SIZE; 828 pages += 1; 829 } 830 831 *flags = UMA_SLAB_PRIV; 832 833 return ((void *)retkva); 834} 835 836/* 837 * Frees a number of pages to the system 838 * 839 * Arguments: 840 * mem A pointer to the memory to be freed 841 * size The size of the memory being freed 842 * flags The original p->us_flags field 843 * 844 * Returns: 845 * Nothing 846 * 847 */ 848static void 849page_free(void *mem, int size, u_int8_t flags) 850{ 851 vm_map_t map; 852 if (flags & UMA_SLAB_KMEM) 853 map = kmem_map; 854 else if (flags & UMA_SLAB_KMAP) 855 map = kernel_map; 856 else 857 panic("UMA: page_free used with invalid flags %d\n", flags); 858 859 kmem_free(map, (vm_offset_t)mem, size); 860} 861 862/* 863 * Zero fill initializer 864 * 865 * Arguments/Returns follow uma_init specifications 866 * 867 */ 868static void 869zero_init(void *mem, int size) 870{ 871 bzero(mem, size); 872} 873 874/* 875 * Finish creating a small uma zone. This calculates ipers, and the zone size. 876 * 877 * Arguments 878 * zone The zone we should initialize 879 * 880 * Returns 881 * Nothing 882 */ 883static void 884zone_small_init(uma_zone_t zone) 885{ 886 int rsize; 887 int memused; 888 int ipers; 889 890 rsize = zone->uz_size; 891 892 if (rsize < UMA_SMALLEST_UNIT) 893 rsize = UMA_SMALLEST_UNIT; 894 895 if (rsize & zone->uz_align) 896 rsize = (rsize & ~zone->uz_align) + (zone->uz_align + 1); 897 898 zone->uz_rsize = rsize; 899 900 rsize += 1; /* Account for the byte of linkage */ 901 zone->uz_ipers = (UMA_SLAB_SIZE - sizeof(struct uma_slab)) / rsize; 902 zone->uz_ppera = 1; 903 904 memused = zone->uz_ipers * zone->uz_rsize; 905 906 /* Can we do any better? */ 907 if ((UMA_SLAB_SIZE - memused) >= UMA_MAX_WASTE) { 908 if (zone->uz_flags & UMA_ZFLAG_INTERNAL) 909 return; 910 ipers = UMA_SLAB_SIZE / zone->uz_rsize; 911 if (ipers > zone->uz_ipers) { 912 zone->uz_flags |= UMA_ZFLAG_OFFPAGE; 913 zone->uz_ipers = ipers; 914 } 915 } 916 917} 918 919/* 920 * Finish creating a large (> UMA_SLAB_SIZE) uma zone. Just give in and do 921 * OFFPAGE for now. When I can allow for more dynamic slab sizes this will be 922 * more complicated. 923 * 924 * Arguments 925 * zone The zone we should initialize 926 * 927 * Returns 928 * Nothing 929 */ 930static void 931zone_large_init(uma_zone_t zone) 932{ 933 int pages; 934 935 pages = zone->uz_size / UMA_SLAB_SIZE; 936 937 /* Account for remainder */ 938 if ((pages * UMA_SLAB_SIZE) < zone->uz_size) 939 pages++; 940 941 zone->uz_ppera = pages; 942 zone->uz_ipers = 1; 943 944 zone->uz_flags |= UMA_ZFLAG_OFFPAGE; 945 zone->uz_rsize = zone->uz_size; 946} 947 948/* 949 * Zone header ctor. This initializes all fields, locks, etc. And inserts 950 * the zone onto the global zone list. 951 * 952 * Arguments/Returns follow uma_ctor specifications 953 * udata Actually uma_zcreat_args 954 * 955 */ 956 957static void 958zone_ctor(void *mem, int size, void *udata) 959{ 960 struct uma_zctor_args *arg = udata; 961 uma_zone_t zone = mem; 962 int privlc; 963 int cplen; 964 int cpu; 965 966 bzero(zone, size); 967 zone->uz_name = arg->name; 968 zone->uz_size = arg->size; 969 zone->uz_ctor = arg->ctor; 970 zone->uz_dtor = arg->dtor; 971 zone->uz_init = arg->uminit; 972 zone->uz_align = arg->align; 973 zone->uz_free = 0; 974 zone->uz_pages = 0; 975 zone->uz_flags = 0; 976 zone->uz_allocf = page_alloc; 977 zone->uz_freef = page_free; 978 979 if (arg->flags & UMA_ZONE_ZINIT) 980 zone->uz_init = zero_init; 981 982 if (arg->flags & UMA_ZONE_INTERNAL) 983 zone->uz_flags |= UMA_ZFLAG_INTERNAL; 984 985 if (arg->flags & UMA_ZONE_MALLOC) 986 zone->uz_flags |= UMA_ZFLAG_MALLOC; 987 988 if (arg->flags & UMA_ZONE_NOFREE) 989 zone->uz_flags |= UMA_ZFLAG_NOFREE; 990 991 if (zone->uz_size > UMA_SLAB_SIZE) 992 zone_large_init(zone); 993 else 994 zone_small_init(zone); 995 996 if (arg->flags & UMA_ZONE_MTXCLASS) 997 privlc = 1; 998 else 999 privlc = 0; 1000 1001 /* We do this so that the per cpu lock name is unique for each zone */ 1002 memcpy(zone->uz_lname, "PCPU ", 5); 1003 cplen = min(strlen(zone->uz_name) + 1, LOCKNAME_LEN - 6); 1004 memcpy(zone->uz_lname+5, zone->uz_name, cplen); 1005 zone->uz_lname[LOCKNAME_LEN - 1] = '\0'; 1006 1007 /* 1008 * If we're putting the slab header in the actual page we need to 1009 * figure out where in each page it goes. This calculates a right 1010 * justified offset into the memory on a ALIGN_PTR boundary. 1011 */ 1012 if (!(zone->uz_flags & UMA_ZFLAG_OFFPAGE)) { 1013 int totsize; 1014 int waste; 1015 1016 /* Size of the slab struct and free list */ 1017 totsize = sizeof(struct uma_slab) + zone->uz_ipers; 1018 if (totsize & UMA_ALIGN_PTR) 1019 totsize = (totsize & ~UMA_ALIGN_PTR) + 1020 (UMA_ALIGN_PTR + 1); 1021 zone->uz_pgoff = UMA_SLAB_SIZE - totsize; 1022 1023 waste = zone->uz_pgoff; 1024 waste -= (zone->uz_ipers * zone->uz_rsize); 1025 1026 /* 1027 * This calculates how much space we have for cache line size 1028 * optimizations. It works by offseting each slab slightly. 1029 * Currently it breaks on x86, and so it is disabled. 1030 */ 1031 1032 if (zone->uz_align < UMA_CACHE_INC && waste > UMA_CACHE_INC) { 1033 zone->uz_cachemax = waste - UMA_CACHE_INC; 1034 zone->uz_cacheoff = 0; 1035 } 1036 1037 totsize = zone->uz_pgoff + sizeof(struct uma_slab) 1038 + zone->uz_ipers; 1039 /* I don't think it's possible, but I'll make sure anyway */ 1040 if (totsize > UMA_SLAB_SIZE) { 1041 printf("zone %s ipers %d rsize %d size %d\n", 1042 zone->uz_name, zone->uz_ipers, zone->uz_rsize, 1043 zone->uz_size); 1044 panic("UMA slab won't fit.\n"); 1045 } 1046 } else { 1047 struct slabhead *newhash; 1048 int hashsize; 1049 1050 hashsize = 0; 1051 newhash = hash_alloc(&hashsize); 1052 hash_expand(&zone->uz_hash, newhash, hashsize); 1053 zone->uz_pgoff = 0; 1054 } 1055 1056#ifdef UMA_DEBUG 1057 printf("%s(%p) size = %d ipers = %d ppera = %d pgoff = %d\n", 1058 zone->uz_name, zone, 1059 zone->uz_size, zone->uz_ipers, 1060 zone->uz_ppera, zone->uz_pgoff); 1061#endif 1062 ZONE_LOCK_INIT(zone, privlc); 1063 1064 mtx_lock(&uma_mtx); 1065 LIST_INSERT_HEAD(&uma_zones, zone, uz_link); 1066 mtx_unlock(&uma_mtx); 1067 1068 /* 1069 * Some internal zones don't have room allocated for the per cpu 1070 * caches. If we're internal, bail out here. 1071 */ 1072 1073 if (zone->uz_flags & UMA_ZFLAG_INTERNAL) 1074 return; 1075 1076 if (zone->uz_ipers < UMA_BUCKET_SIZE) 1077 zone->uz_count = zone->uz_ipers - 1; 1078 else 1079 zone->uz_count = UMA_BUCKET_SIZE - 1; 1080 1081 for (cpu = 0; cpu < maxcpu; cpu++) 1082 CPU_LOCK_INIT(zone, cpu, privlc); 1083} 1084 1085/* 1086 * Zone header dtor. This frees all data, destroys locks, frees the hash table 1087 * and removes the zone from the global list. 1088 * 1089 * Arguments/Returns follow uma_dtor specifications 1090 * udata unused 1091 */ 1092 1093static void 1094zone_dtor(void *arg, int size, void *udata) 1095{ 1096 uma_zone_t zone; 1097 int cpu; 1098 1099 zone = (uma_zone_t)arg; 1100 1101 mtx_lock(&uma_mtx); 1102 LIST_REMOVE(zone, uz_link); 1103 mtx_unlock(&uma_mtx); 1104 1105 ZONE_LOCK(zone); 1106 zone->uz_wssize = 0; 1107 ZONE_UNLOCK(zone); 1108 1109 zone_drain(zone); 1110 ZONE_LOCK(zone); 1111 if (zone->uz_free != 0) 1112 printf("Zone %s was not empty. Lost %d pages of memory.\n", 1113 zone->uz_name, zone->uz_pages); 1114 1115 if ((zone->uz_flags & UMA_ZFLAG_INTERNAL) != 0) 1116 for (cpu = 0; cpu < maxcpu; cpu++) 1117 CPU_LOCK_FINI(zone, cpu); 1118 1119 if ((zone->uz_flags & UMA_ZFLAG_OFFPAGE) != 0) 1120 hash_free(zone->uz_hash.uh_slab_hash, 1121 zone->uz_hash.uh_hashsize); 1122 1123 ZONE_UNLOCK(zone); 1124 ZONE_LOCK_FINI(zone); 1125} 1126/* 1127 * Traverses every zone in the system and calls a callback 1128 * 1129 * Arguments: 1130 * zfunc A pointer to a function which accepts a zone 1131 * as an argument. 1132 * 1133 * Returns: 1134 * Nothing 1135 */ 1136static void 1137zone_foreach(void (*zfunc)(uma_zone_t)) 1138{ 1139 uma_zone_t zone; 1140 1141 mtx_lock(&uma_mtx); 1142 LIST_FOREACH(zone, &uma_zones, uz_link) { 1143 zfunc(zone); 1144 } 1145 mtx_unlock(&uma_mtx); 1146} 1147 1148/* Public functions */ 1149/* See uma.h */ 1150void 1151uma_startup(void *bootmem) 1152{ 1153 struct uma_zctor_args args; 1154 uma_slab_t slab; 1155 int slabsize; 1156 int i; 1157 1158#ifdef UMA_DEBUG 1159 printf("Creating uma zone headers zone.\n"); 1160#endif 1161#ifdef SMP 1162 maxcpu = mp_maxid + 1; 1163#else 1164 maxcpu = 1; 1165#endif 1166#ifdef UMA_DEBUG 1167 printf("Max cpu = %d, mp_maxid = %d\n", maxcpu, mp_maxid); 1168 Debugger("stop"); 1169#endif 1170 mtx_init(&uma_mtx, "UMA lock", NULL, MTX_DEF); 1171 /* "manually" Create the initial zone */ 1172 args.name = "UMA Zones"; 1173 args.size = sizeof(struct uma_zone) + 1174 (sizeof(struct uma_cache) * (maxcpu - 1)); 1175 args.ctor = zone_ctor; 1176 args.dtor = zone_dtor; 1177 args.uminit = zero_init; 1178 args.fini = NULL; 1179 args.align = 32 - 1; 1180 args.flags = UMA_ZONE_INTERNAL; 1181 /* The initial zone has no Per cpu queues so it's smaller */ 1182 zone_ctor(zones, sizeof(struct uma_zone), &args); 1183 1184#ifdef UMA_DEBUG 1185 printf("Filling boot free list.\n"); 1186#endif 1187 for (i = 0; i < UMA_BOOT_PAGES; i++) { 1188 slab = (uma_slab_t)((u_int8_t *)bootmem + (i * UMA_SLAB_SIZE)); 1189 slab->us_data = (u_int8_t *)slab; 1190 slab->us_flags = UMA_SLAB_BOOT; 1191 LIST_INSERT_HEAD(&uma_boot_pages, slab, us_link); 1192 uma_boot_free++; 1193 } 1194 1195#ifdef UMA_DEBUG 1196 printf("Creating slab zone.\n"); 1197#endif 1198 1199 /* 1200 * This is the max number of free list items we'll have with 1201 * offpage slabs. 1202 */ 1203 1204 slabsize = UMA_SLAB_SIZE - sizeof(struct uma_slab); 1205 slabsize /= UMA_MAX_WASTE; 1206 slabsize++; /* In case there it's rounded */ 1207 slabsize += sizeof(struct uma_slab); 1208 1209 /* Now make a zone for slab headers */ 1210 slabzone = uma_zcreate("UMA Slabs", 1211 slabsize, 1212 NULL, NULL, NULL, NULL, 1213 UMA_ALIGN_PTR, UMA_ZONE_INTERNAL); 1214 1215 hashzone = uma_zcreate("UMA Hash", 1216 sizeof(struct slabhead *) * UMA_HASH_SIZE_INIT, 1217 NULL, NULL, NULL, NULL, 1218 UMA_ALIGN_PTR, UMA_ZONE_INTERNAL); 1219 1220 bucketzone = uma_zcreate("UMA Buckets", sizeof(struct uma_bucket), 1221 NULL, NULL, NULL, NULL, 1222 UMA_ALIGN_PTR, UMA_ZONE_INTERNAL); 1223 1224 1225#ifdef UMA_DEBUG 1226 printf("UMA startup complete.\n"); 1227#endif 1228} 1229 1230/* see uma.h */ 1231void 1232uma_startup2(void *hashmem, u_long elems) 1233{ 1234 bzero(hashmem, elems * sizeof(void *)); 1235 mallochash->uh_slab_hash = hashmem; 1236 mallochash->uh_hashsize = elems; 1237 mallochash->uh_hashmask = elems - 1; 1238 booted = 1; 1239 bucket_enable(); 1240#ifdef UMA_DEBUG 1241 printf("UMA startup2 complete.\n"); 1242#endif 1243} 1244 1245/* 1246 * Initialize our callout handle 1247 * 1248 */ 1249 1250static void 1251uma_startup3(void) 1252{ 1253#ifdef UMA_DEBUG 1254 printf("Starting callout.\n"); 1255#endif 1256 callout_init(&uma_callout, 0); 1257 callout_reset(&uma_callout, UMA_WORKING_TIME * hz, uma_timeout, NULL); 1258#ifdef UMA_DEBUG 1259 printf("UMA startup3 complete.\n"); 1260#endif 1261} 1262 1263/* See uma.h */ 1264uma_zone_t 1265uma_zcreate(char *name, int size, uma_ctor ctor, uma_dtor dtor, uma_init uminit, 1266 uma_fini fini, int align, u_int16_t flags) 1267 1268{ 1269 struct uma_zctor_args args; 1270 1271 /* This stuff is essential for the zone ctor */ 1272 args.name = name; 1273 args.size = size; 1274 args.ctor = ctor; 1275 args.dtor = dtor; 1276 args.uminit = uminit; 1277 args.fini = fini; 1278 args.align = align; 1279 args.flags = flags; 1280 1281 return (uma_zalloc_internal(zones, &args, M_WAITOK, NULL)); 1282} 1283 1284/* See uma.h */ 1285void 1286uma_zdestroy(uma_zone_t zone) 1287{ 1288 uma_zfree_internal(zones, zone, NULL, 0); 1289} 1290 1291/* See uma.h */ 1292void *
|
1293uma_zalloc_arg(uma_zone_t zone, void *udata, int wait)
| 1293uma_zalloc_arg(uma_zone_t zone, void *udata, int flags)
|
1294{ 1295 void *item; 1296 uma_cache_t cache; 1297 uma_bucket_t bucket; 1298 int cpu; 1299 1300 /* This is the fast path allocation */ 1301#ifdef UMA_DEBUG_ALLOC_1 1302 printf("Allocating one item from %s(%p)\n", zone->uz_name, zone); 1303#endif 1304 1305zalloc_restart: 1306 cpu = PCPU_GET(cpuid); 1307 CPU_LOCK(zone, cpu); 1308 cache = &zone->uz_cpu[cpu]; 1309 1310zalloc_start: 1311 bucket = cache->uc_allocbucket; 1312 1313 if (bucket) { 1314 if (bucket->ub_ptr > -1) { 1315 item = bucket->ub_bucket[bucket->ub_ptr]; 1316#ifdef INVARIANTS 1317 bucket->ub_bucket[bucket->ub_ptr] = NULL; 1318#endif 1319 bucket->ub_ptr--; 1320 KASSERT(item != NULL, 1321 ("uma_zalloc: Bucket pointer mangled.")); 1322 cache->uc_allocs++; 1323 CPU_UNLOCK(zone, cpu); 1324 if (zone->uz_ctor) 1325 zone->uz_ctor(item, zone->uz_size, udata);
| 1294{ 1295 void *item; 1296 uma_cache_t cache; 1297 uma_bucket_t bucket; 1298 int cpu; 1299 1300 /* This is the fast path allocation */ 1301#ifdef UMA_DEBUG_ALLOC_1 1302 printf("Allocating one item from %s(%p)\n", zone->uz_name, zone); 1303#endif 1304 1305zalloc_restart: 1306 cpu = PCPU_GET(cpuid); 1307 CPU_LOCK(zone, cpu); 1308 cache = &zone->uz_cpu[cpu]; 1309 1310zalloc_start: 1311 bucket = cache->uc_allocbucket; 1312 1313 if (bucket) { 1314 if (bucket->ub_ptr > -1) { 1315 item = bucket->ub_bucket[bucket->ub_ptr]; 1316#ifdef INVARIANTS 1317 bucket->ub_bucket[bucket->ub_ptr] = NULL; 1318#endif 1319 bucket->ub_ptr--; 1320 KASSERT(item != NULL, 1321 ("uma_zalloc: Bucket pointer mangled.")); 1322 cache->uc_allocs++; 1323 CPU_UNLOCK(zone, cpu); 1324 if (zone->uz_ctor) 1325 zone->uz_ctor(item, zone->uz_size, udata);
|
| 1326 if (flags & M_ZERO) 1327 bzero(item, zone->uz_size);
|
1326 return (item); 1327 } else if (cache->uc_freebucket) { 1328 /* 1329 * We have run out of items in our allocbucket. 1330 * See if we can switch with our free bucket. 1331 */ 1332 if (cache->uc_freebucket->ub_ptr > -1) { 1333 uma_bucket_t swap; 1334 1335#ifdef UMA_DEBUG_ALLOC 1336 printf("uma_zalloc: Swapping empty with alloc.\n"); 1337#endif 1338 swap = cache->uc_freebucket; 1339 cache->uc_freebucket = cache->uc_allocbucket; 1340 cache->uc_allocbucket = swap; 1341 1342 goto zalloc_start; 1343 } 1344 } 1345 } 1346 ZONE_LOCK(zone); 1347 /* Since we have locked the zone we may as well send back our stats */ 1348 zone->uz_allocs += cache->uc_allocs; 1349 cache->uc_allocs = 0; 1350 1351 /* Our old one is now a free bucket */ 1352 if (cache->uc_allocbucket) { 1353 KASSERT(cache->uc_allocbucket->ub_ptr == -1, 1354 ("uma_zalloc_arg: Freeing a non free bucket.")); 1355 LIST_INSERT_HEAD(&zone->uz_free_bucket, 1356 cache->uc_allocbucket, ub_link); 1357 cache->uc_allocbucket = NULL; 1358 } 1359 1360 /* Check the free list for a new alloc bucket */ 1361 if ((bucket = LIST_FIRST(&zone->uz_full_bucket)) != NULL) { 1362 KASSERT(bucket->ub_ptr != -1, 1363 ("uma_zalloc_arg: Returning an empty bucket.")); 1364 1365 LIST_REMOVE(bucket, ub_link); 1366 cache->uc_allocbucket = bucket; 1367 ZONE_UNLOCK(zone); 1368 goto zalloc_start; 1369 } 1370 /* Bump up our uz_count so we get here less */ 1371 if (zone->uz_count < UMA_BUCKET_SIZE - 1) 1372 zone->uz_count++; 1373 1374 /* We are no longer associated with this cpu!!! */ 1375 CPU_UNLOCK(zone, cpu); 1376 1377 /* 1378 * Now lets just fill a bucket and put it on the free list. If that 1379 * works we'll restart the allocation from the begining. 1380 * 1381 * Try this zone's free list first so we don't allocate extra buckets. 1382 */ 1383 1384 if ((bucket = LIST_FIRST(&zone->uz_free_bucket)) != NULL) 1385 LIST_REMOVE(bucket, ub_link); 1386 1387 /* Now we no longer need the zone lock. */ 1388 ZONE_UNLOCK(zone); 1389 1390 if (bucket == NULL) 1391 bucket = uma_zalloc_internal(bucketzone,
| 1328 return (item); 1329 } else if (cache->uc_freebucket) { 1330 /* 1331 * We have run out of items in our allocbucket. 1332 * See if we can switch with our free bucket. 1333 */ 1334 if (cache->uc_freebucket->ub_ptr > -1) { 1335 uma_bucket_t swap; 1336 1337#ifdef UMA_DEBUG_ALLOC 1338 printf("uma_zalloc: Swapping empty with alloc.\n"); 1339#endif 1340 swap = cache->uc_freebucket; 1341 cache->uc_freebucket = cache->uc_allocbucket; 1342 cache->uc_allocbucket = swap; 1343 1344 goto zalloc_start; 1345 } 1346 } 1347 } 1348 ZONE_LOCK(zone); 1349 /* Since we have locked the zone we may as well send back our stats */ 1350 zone->uz_allocs += cache->uc_allocs; 1351 cache->uc_allocs = 0; 1352 1353 /* Our old one is now a free bucket */ 1354 if (cache->uc_allocbucket) { 1355 KASSERT(cache->uc_allocbucket->ub_ptr == -1, 1356 ("uma_zalloc_arg: Freeing a non free bucket.")); 1357 LIST_INSERT_HEAD(&zone->uz_free_bucket, 1358 cache->uc_allocbucket, ub_link); 1359 cache->uc_allocbucket = NULL; 1360 } 1361 1362 /* Check the free list for a new alloc bucket */ 1363 if ((bucket = LIST_FIRST(&zone->uz_full_bucket)) != NULL) { 1364 KASSERT(bucket->ub_ptr != -1, 1365 ("uma_zalloc_arg: Returning an empty bucket.")); 1366 1367 LIST_REMOVE(bucket, ub_link); 1368 cache->uc_allocbucket = bucket; 1369 ZONE_UNLOCK(zone); 1370 goto zalloc_start; 1371 } 1372 /* Bump up our uz_count so we get here less */ 1373 if (zone->uz_count < UMA_BUCKET_SIZE - 1) 1374 zone->uz_count++; 1375 1376 /* We are no longer associated with this cpu!!! */ 1377 CPU_UNLOCK(zone, cpu); 1378 1379 /* 1380 * Now lets just fill a bucket and put it on the free list. If that 1381 * works we'll restart the allocation from the begining. 1382 * 1383 * Try this zone's free list first so we don't allocate extra buckets. 1384 */ 1385 1386 if ((bucket = LIST_FIRST(&zone->uz_free_bucket)) != NULL) 1387 LIST_REMOVE(bucket, ub_link); 1388 1389 /* Now we no longer need the zone lock. */ 1390 ZONE_UNLOCK(zone); 1391 1392 if (bucket == NULL) 1393 bucket = uma_zalloc_internal(bucketzone,
|
1392 NULL, wait, NULL);
| 1394 NULL, flags, NULL);
|
1393 1394 if (bucket != NULL) { 1395#ifdef INVARIANTS 1396 bzero(bucket, bucketzone->uz_size); 1397#endif 1398 bucket->ub_ptr = -1; 1399
| 1395 1396 if (bucket != NULL) { 1397#ifdef INVARIANTS 1398 bzero(bucket, bucketzone->uz_size); 1399#endif 1400 bucket->ub_ptr = -1; 1401
|
1400 if (uma_zalloc_internal(zone, udata, wait, bucket))
| 1402 if (uma_zalloc_internal(zone, udata, flags, bucket))
|
1401 goto zalloc_restart; 1402 else 1403 uma_zfree_internal(bucketzone, bucket, NULL, 0); 1404 } 1405 /* 1406 * We may not get a bucket if we recurse, so 1407 * return an actual item. 1408 */ 1409#ifdef UMA_DEBUG 1410 printf("uma_zalloc_arg: Bucketzone returned NULL\n"); 1411#endif 1412
| 1403 goto zalloc_restart; 1404 else 1405 uma_zfree_internal(bucketzone, bucket, NULL, 0); 1406 } 1407 /* 1408 * We may not get a bucket if we recurse, so 1409 * return an actual item. 1410 */ 1411#ifdef UMA_DEBUG 1412 printf("uma_zalloc_arg: Bucketzone returned NULL\n"); 1413#endif 1414
|
1413 return (uma_zalloc_internal(zone, udata, wait, NULL));
| 1415 return (uma_zalloc_internal(zone, udata, flags, NULL));
|
1414} 1415 1416/* 1417 * Allocates an item for an internal zone OR fills a bucket 1418 * 1419 * Arguments 1420 * zone The zone to alloc for. 1421 * udata The data to be passed to the constructor.
| 1416} 1417 1418/* 1419 * Allocates an item for an internal zone OR fills a bucket 1420 * 1421 * Arguments 1422 * zone The zone to alloc for. 1423 * udata The data to be passed to the constructor.
|
1422 * wait M_WAITOK or M_NOWAIT.
| 1424 * flags M_WAITOK, M_NOWAIT, M_ZERO.
|
1423 * bucket The bucket to fill or NULL 1424 * 1425 * Returns 1426 * NULL if there is no memory and M_NOWAIT is set 1427 * An item if called on an interal zone 1428 * Non NULL if called to fill a bucket and it was successful. 1429 * 1430 * Discussion: 1431 * This was much cleaner before it had to do per cpu caches. It is 1432 * complicated now because it has to handle the simple internal case, and 1433 * the more involved bucket filling and allocation. 1434 */ 1435 1436static void *
| 1425 * bucket The bucket to fill or NULL 1426 * 1427 * Returns 1428 * NULL if there is no memory and M_NOWAIT is set 1429 * An item if called on an interal zone 1430 * Non NULL if called to fill a bucket and it was successful. 1431 * 1432 * Discussion: 1433 * This was much cleaner before it had to do per cpu caches. It is 1434 * complicated now because it has to handle the simple internal case, and 1435 * the more involved bucket filling and allocation. 1436 */ 1437 1438static void *
|
1437uma_zalloc_internal(uma_zone_t zone, void *udata, int wait, uma_bucket_t bucket)
| 1439uma_zalloc_internal(uma_zone_t zone, void *udata, int flags, uma_bucket_t bucket)
|
1438{ 1439 uma_slab_t slab; 1440 u_int8_t freei; 1441 void *item; 1442 1443 item = NULL; 1444 1445 /* 1446 * This is to stop us from allocating per cpu buckets while we're 1447 * running out of UMA_BOOT_PAGES. Otherwise, we would exhaust the 1448 * boot pages. 1449 */ 1450 1451 if (bucketdisable && zone == bucketzone) 1452 return (NULL); 1453 1454#ifdef UMA_DEBUG_ALLOC 1455 printf("INTERNAL: Allocating one item from %s(%p)\n", zone->uz_name, zone); 1456#endif 1457 ZONE_LOCK(zone); 1458 1459 /* 1460 * This code is here to limit the number of simultaneous bucket fills 1461 * for any given zone to the number of per cpu caches in this zone. This 1462 * is done so that we don't allocate more memory than we really need. 1463 */ 1464 1465 if (bucket) { 1466#ifdef SMP 1467 if (zone->uz_fills >= mp_ncpus) { 1468#else 1469 if (zone->uz_fills > 1) { 1470#endif 1471 ZONE_UNLOCK(zone); 1472 return (NULL); 1473 } 1474 1475 zone->uz_fills++; 1476 } 1477 1478new_slab: 1479 1480 /* Find a slab with some space */ 1481 if (zone->uz_free) { 1482 if (!LIST_EMPTY(&zone->uz_part_slab)) { 1483 slab = LIST_FIRST(&zone->uz_part_slab); 1484 } else { 1485 slab = LIST_FIRST(&zone->uz_free_slab); 1486 LIST_REMOVE(slab, us_link); 1487 LIST_INSERT_HEAD(&zone->uz_part_slab, slab, us_link); 1488 } 1489 } else { 1490 /* 1491 * This is to prevent us from recursively trying to allocate 1492 * buckets. The problem is that if an allocation forces us to 1493 * grab a new bucket we will call page_alloc, which will go off 1494 * and cause the vm to allocate vm_map_entries. If we need new 1495 * buckets there too we will recurse in kmem_alloc and bad 1496 * things happen. So instead we return a NULL bucket, and make 1497 * the code that allocates buckets smart enough to deal with it */ 1498 if (zone == bucketzone && zone->uz_recurse != 0) { 1499 ZONE_UNLOCK(zone); 1500 return (NULL); 1501 } 1502 while (zone->uz_maxpages && 1503 zone->uz_pages >= zone->uz_maxpages) { 1504 zone->uz_flags |= UMA_ZFLAG_FULL; 1505
| 1440{ 1441 uma_slab_t slab; 1442 u_int8_t freei; 1443 void *item; 1444 1445 item = NULL; 1446 1447 /* 1448 * This is to stop us from allocating per cpu buckets while we're 1449 * running out of UMA_BOOT_PAGES. Otherwise, we would exhaust the 1450 * boot pages. 1451 */ 1452 1453 if (bucketdisable && zone == bucketzone) 1454 return (NULL); 1455 1456#ifdef UMA_DEBUG_ALLOC 1457 printf("INTERNAL: Allocating one item from %s(%p)\n", zone->uz_name, zone); 1458#endif 1459 ZONE_LOCK(zone); 1460 1461 /* 1462 * This code is here to limit the number of simultaneous bucket fills 1463 * for any given zone to the number of per cpu caches in this zone. This 1464 * is done so that we don't allocate more memory than we really need. 1465 */ 1466 1467 if (bucket) { 1468#ifdef SMP 1469 if (zone->uz_fills >= mp_ncpus) { 1470#else 1471 if (zone->uz_fills > 1) { 1472#endif 1473 ZONE_UNLOCK(zone); 1474 return (NULL); 1475 } 1476 1477 zone->uz_fills++; 1478 } 1479 1480new_slab: 1481 1482 /* Find a slab with some space */ 1483 if (zone->uz_free) { 1484 if (!LIST_EMPTY(&zone->uz_part_slab)) { 1485 slab = LIST_FIRST(&zone->uz_part_slab); 1486 } else { 1487 slab = LIST_FIRST(&zone->uz_free_slab); 1488 LIST_REMOVE(slab, us_link); 1489 LIST_INSERT_HEAD(&zone->uz_part_slab, slab, us_link); 1490 } 1491 } else { 1492 /* 1493 * This is to prevent us from recursively trying to allocate 1494 * buckets. The problem is that if an allocation forces us to 1495 * grab a new bucket we will call page_alloc, which will go off 1496 * and cause the vm to allocate vm_map_entries. If we need new 1497 * buckets there too we will recurse in kmem_alloc and bad 1498 * things happen. So instead we return a NULL bucket, and make 1499 * the code that allocates buckets smart enough to deal with it */ 1500 if (zone == bucketzone && zone->uz_recurse != 0) { 1501 ZONE_UNLOCK(zone); 1502 return (NULL); 1503 } 1504 while (zone->uz_maxpages && 1505 zone->uz_pages >= zone->uz_maxpages) { 1506 zone->uz_flags |= UMA_ZFLAG_FULL; 1507
|
1506 if (wait & M_WAITOK)
| 1508 if (flags & M_WAITOK)
|
1507 msleep(zone, &zone->uz_lock, PVM, "zonelimit", 0); 1508 else 1509 goto alloc_fail; 1510 1511 goto new_slab; 1512 } 1513 1514 zone->uz_recurse++;
| 1509 msleep(zone, &zone->uz_lock, PVM, "zonelimit", 0); 1510 else 1511 goto alloc_fail; 1512 1513 goto new_slab; 1514 } 1515 1516 zone->uz_recurse++;
|
1515 slab = slab_zalloc(zone, wait);
| 1517 slab = slab_zalloc(zone, flags);
|
1516 zone->uz_recurse--; 1517 /* 1518 * We might not have been able to get a slab but another cpu 1519 * could have while we were unlocked. If we did get a slab put 1520 * it on the partially used slab list. If not check the free 1521 * count and restart or fail accordingly. 1522 */ 1523 if (slab) 1524 LIST_INSERT_HEAD(&zone->uz_part_slab, slab, us_link); 1525 else if (zone->uz_free == 0) 1526 goto alloc_fail; 1527 else 1528 goto new_slab; 1529 } 1530 /* 1531 * If this is our first time though put this guy on the list. 1532 */ 1533 if (bucket != NULL && bucket->ub_ptr == -1) 1534 LIST_INSERT_HEAD(&zone->uz_full_bucket, 1535 bucket, ub_link); 1536 1537 1538 while (slab->us_freecount) { 1539 freei = slab->us_firstfree; 1540 slab->us_firstfree = slab->us_freelist[freei]; 1541#ifdef INVARIANTS 1542 slab->us_freelist[freei] = 255; 1543#endif 1544 slab->us_freecount--; 1545 zone->uz_free--; 1546 item = slab->us_data + (zone->uz_rsize * freei); 1547 1548 if (bucket == NULL) { 1549 zone->uz_allocs++; 1550 break; 1551 } 1552 bucket->ub_bucket[++bucket->ub_ptr] = item; 1553 1554 /* Don't overfill the bucket! */ 1555 if (bucket->ub_ptr == zone->uz_count) 1556 break; 1557 } 1558 1559 /* Move this slab to the full list */ 1560 if (slab->us_freecount == 0) { 1561 LIST_REMOVE(slab, us_link); 1562 LIST_INSERT_HEAD(&zone->uz_full_slab, slab, us_link); 1563 } 1564 1565 if (bucket != NULL) { 1566 /* Try to keep the buckets totally full, but don't block */ 1567 if (bucket->ub_ptr < zone->uz_count) {
| 1518 zone->uz_recurse--; 1519 /* 1520 * We might not have been able to get a slab but another cpu 1521 * could have while we were unlocked. If we did get a slab put 1522 * it on the partially used slab list. If not check the free 1523 * count and restart or fail accordingly. 1524 */ 1525 if (slab) 1526 LIST_INSERT_HEAD(&zone->uz_part_slab, slab, us_link); 1527 else if (zone->uz_free == 0) 1528 goto alloc_fail; 1529 else 1530 goto new_slab; 1531 } 1532 /* 1533 * If this is our first time though put this guy on the list. 1534 */ 1535 if (bucket != NULL && bucket->ub_ptr == -1) 1536 LIST_INSERT_HEAD(&zone->uz_full_bucket, 1537 bucket, ub_link); 1538 1539 1540 while (slab->us_freecount) { 1541 freei = slab->us_firstfree; 1542 slab->us_firstfree = slab->us_freelist[freei]; 1543#ifdef INVARIANTS 1544 slab->us_freelist[freei] = 255; 1545#endif 1546 slab->us_freecount--; 1547 zone->uz_free--; 1548 item = slab->us_data + (zone->uz_rsize * freei); 1549 1550 if (bucket == NULL) { 1551 zone->uz_allocs++; 1552 break; 1553 } 1554 bucket->ub_bucket[++bucket->ub_ptr] = item; 1555 1556 /* Don't overfill the bucket! */ 1557 if (bucket->ub_ptr == zone->uz_count) 1558 break; 1559 } 1560 1561 /* Move this slab to the full list */ 1562 if (slab->us_freecount == 0) { 1563 LIST_REMOVE(slab, us_link); 1564 LIST_INSERT_HEAD(&zone->uz_full_slab, slab, us_link); 1565 } 1566 1567 if (bucket != NULL) { 1568 /* Try to keep the buckets totally full, but don't block */ 1569 if (bucket->ub_ptr < zone->uz_count) {
|
1568 wait = M_NOWAIT;
| 1570 flags |= M_NOWAIT; 1571 flags &= ~M_WAITOK;
|
1569 goto new_slab; 1570 } else 1571 zone->uz_fills--; 1572 } 1573 1574 ZONE_UNLOCK(zone); 1575 1576 /* Only construct at this time if we're not filling a bucket */
| 1572 goto new_slab; 1573 } else 1574 zone->uz_fills--; 1575 } 1576 1577 ZONE_UNLOCK(zone); 1578 1579 /* Only construct at this time if we're not filling a bucket */
|
1577 if (bucket == NULL && zone->uz_ctor != NULL)
| 1580 if (bucket == NULL && zone->uz_ctor != NULL) {
|
1578 zone->uz_ctor(item, zone->uz_size, udata);
| 1581 zone->uz_ctor(item, zone->uz_size, udata);
|
| 1582 if (flags & M_ZERO) 1583 bzero(item, zone->uz_size); 1584 }
|
1579 1580 return (item); 1581 1582alloc_fail: 1583 if (bucket != NULL) 1584 zone->uz_fills--; 1585 ZONE_UNLOCK(zone); 1586 1587 if (bucket != NULL && bucket->ub_ptr != -1) 1588 return (bucket); 1589 1590 return (NULL); 1591} 1592 1593/* See uma.h */ 1594void 1595uma_zfree_arg(uma_zone_t zone, void *item, void *udata) 1596{ 1597 uma_cache_t cache; 1598 uma_bucket_t bucket; 1599 int cpu; 1600 1601 /* This is the fast path free */ 1602#ifdef UMA_DEBUG_ALLOC_1 1603 printf("Freeing item %p to %s(%p)\n", item, zone->uz_name, zone); 1604#endif 1605 /* 1606 * The race here is acceptable. If we miss it we'll just have to wait 1607 * a little longer for the limits to be reset. 1608 */ 1609 1610 if (zone->uz_flags & UMA_ZFLAG_FULL) 1611 goto zfree_internal; 1612 1613zfree_restart: 1614 cpu = PCPU_GET(cpuid); 1615 CPU_LOCK(zone, cpu); 1616 cache = &zone->uz_cpu[cpu]; 1617 1618zfree_start: 1619 bucket = cache->uc_freebucket; 1620 1621 if (bucket) { 1622 /* 1623 * Do we have room in our bucket? It is OK for this uz count 1624 * check to be slightly out of sync. 1625 */ 1626 1627 if (bucket->ub_ptr < zone->uz_count) { 1628 bucket->ub_ptr++; 1629 KASSERT(bucket->ub_bucket[bucket->ub_ptr] == NULL, 1630 ("uma_zfree: Freeing to non free bucket index.")); 1631 bucket->ub_bucket[bucket->ub_ptr] = item; 1632 if (zone->uz_dtor) 1633 zone->uz_dtor(item, zone->uz_size, udata); 1634 CPU_UNLOCK(zone, cpu); 1635 return; 1636 } else if (cache->uc_allocbucket) { 1637#ifdef UMA_DEBUG_ALLOC 1638 printf("uma_zfree: Swapping buckets.\n"); 1639#endif 1640 /* 1641 * We have run out of space in our freebucket. 1642 * See if we can switch with our alloc bucket. 1643 */ 1644 if (cache->uc_allocbucket->ub_ptr < 1645 cache->uc_freebucket->ub_ptr) { 1646 uma_bucket_t swap; 1647 1648 swap = cache->uc_freebucket; 1649 cache->uc_freebucket = cache->uc_allocbucket; 1650 cache->uc_allocbucket = swap; 1651 1652 goto zfree_start; 1653 } 1654 } 1655 } 1656 1657 /* 1658 * We can get here for two reasons: 1659 * 1660 * 1) The buckets are NULL 1661 * 2) The alloc and free buckets are both somewhat full. 1662 * 1663 */ 1664 1665 ZONE_LOCK(zone); 1666 1667 bucket = cache->uc_freebucket; 1668 cache->uc_freebucket = NULL; 1669 1670 /* Can we throw this on the zone full list? */ 1671 if (bucket != NULL) { 1672#ifdef UMA_DEBUG_ALLOC 1673 printf("uma_zfree: Putting old bucket on the free list.\n"); 1674#endif 1675 /* ub_ptr is pointing to the last free item */ 1676 KASSERT(bucket->ub_ptr != -1, 1677 ("uma_zfree: Attempting to insert an empty bucket onto the full list.\n")); 1678 LIST_INSERT_HEAD(&zone->uz_full_bucket, 1679 bucket, ub_link); 1680 } 1681 if ((bucket = LIST_FIRST(&zone->uz_free_bucket)) != NULL) { 1682 LIST_REMOVE(bucket, ub_link); 1683 ZONE_UNLOCK(zone); 1684 cache->uc_freebucket = bucket; 1685 goto zfree_start; 1686 } 1687 /* We're done with this CPU now */ 1688 CPU_UNLOCK(zone, cpu); 1689 1690 /* And the zone.. */ 1691 ZONE_UNLOCK(zone); 1692 1693#ifdef UMA_DEBUG_ALLOC 1694 printf("uma_zfree: Allocating new free bucket.\n"); 1695#endif 1696 bucket = uma_zalloc_internal(bucketzone, 1697 NULL, M_NOWAIT, NULL); 1698 if (bucket) { 1699#ifdef INVARIANTS 1700 bzero(bucket, bucketzone->uz_size); 1701#endif 1702 bucket->ub_ptr = -1; 1703 ZONE_LOCK(zone); 1704 LIST_INSERT_HEAD(&zone->uz_free_bucket, 1705 bucket, ub_link); 1706 ZONE_UNLOCK(zone); 1707 goto zfree_restart; 1708 } 1709 1710 /* 1711 * If nothing else caught this, we'll just do an internal free. 1712 */ 1713 1714zfree_internal: 1715 1716 uma_zfree_internal(zone, item, udata, 0); 1717 1718 return; 1719 1720} 1721 1722/* 1723 * Frees an item to an INTERNAL zone or allocates a free bucket 1724 * 1725 * Arguments: 1726 * zone The zone to free to 1727 * item The item we're freeing 1728 * udata User supplied data for the dtor 1729 * skip Skip the dtor, it was done in uma_zfree_arg 1730 */ 1731 1732static void 1733uma_zfree_internal(uma_zone_t zone, void *item, void *udata, int skip) 1734{ 1735 uma_slab_t slab; 1736 u_int8_t *mem; 1737 u_int8_t freei; 1738 1739 ZONE_LOCK(zone); 1740 1741 if (!(zone->uz_flags & UMA_ZFLAG_MALLOC)) { 1742 mem = (u_int8_t *)((unsigned long)item & (~UMA_SLAB_MASK)); 1743 if (zone->uz_flags & UMA_ZFLAG_OFFPAGE) 1744 slab = hash_sfind(&zone->uz_hash, mem); 1745 else { 1746 mem += zone->uz_pgoff; 1747 slab = (uma_slab_t)mem; 1748 } 1749 } else { 1750 slab = (uma_slab_t)udata; 1751 } 1752 1753 /* Do we need to remove from any lists? */ 1754 if (slab->us_freecount+1 == zone->uz_ipers) { 1755 LIST_REMOVE(slab, us_link); 1756 LIST_INSERT_HEAD(&zone->uz_free_slab, slab, us_link); 1757 } else if (slab->us_freecount == 0) { 1758 LIST_REMOVE(slab, us_link); 1759 LIST_INSERT_HEAD(&zone->uz_part_slab, slab, us_link); 1760 } 1761 1762 /* Slab management stuff */ 1763 freei = ((unsigned long)item - (unsigned long)slab->us_data) 1764 / zone->uz_rsize; 1765#ifdef INVARIANTS 1766 if (((freei * zone->uz_rsize) + slab->us_data) != item) 1767 panic("zone: %s(%p) slab %p freed address %p unaligned.\n", 1768 zone->uz_name, zone, slab, item); 1769 if (freei >= zone->uz_ipers) 1770 panic("zone: %s(%p) slab %p freelist %i out of range 0-%d\n", 1771 zone->uz_name, zone, slab, freei, zone->uz_ipers-1); 1772 1773 if (slab->us_freelist[freei] != 255) { 1774 printf("Slab at %p, freei %d = %d.\n", 1775 slab, freei, slab->us_freelist[freei]); 1776 panic("Duplicate free of item %p from zone %p(%s)\n", 1777 item, zone, zone->uz_name); 1778 } 1779#endif 1780 slab->us_freelist[freei] = slab->us_firstfree; 1781 slab->us_firstfree = freei; 1782 slab->us_freecount++; 1783 1784 /* Zone statistics */ 1785 zone->uz_free++; 1786 1787 if (!skip && zone->uz_dtor) 1788 zone->uz_dtor(item, zone->uz_size, udata); 1789 1790 if (zone->uz_flags & UMA_ZFLAG_FULL) { 1791 if (zone->uz_pages < zone->uz_maxpages) 1792 zone->uz_flags &= ~UMA_ZFLAG_FULL; 1793 1794 /* We can handle one more allocation */ 1795 wakeup_one(&zone); 1796 } 1797 1798 ZONE_UNLOCK(zone); 1799} 1800 1801/* See uma.h */ 1802void 1803uma_zone_set_max(uma_zone_t zone, int nitems) 1804{ 1805 ZONE_LOCK(zone); 1806 if (zone->uz_ppera > 1) 1807 zone->uz_maxpages = nitems * zone->uz_ppera; 1808 else 1809 zone->uz_maxpages = nitems / zone->uz_ipers; 1810 1811 if (zone->uz_maxpages * zone->uz_ipers < nitems) 1812 zone->uz_maxpages++; 1813 1814 ZONE_UNLOCK(zone); 1815} 1816 1817/* See uma.h */ 1818void 1819uma_zone_set_freef(uma_zone_t zone, uma_free freef) 1820{ 1821 ZONE_LOCK(zone); 1822 1823 zone->uz_freef = freef; 1824 1825 ZONE_UNLOCK(zone); 1826} 1827 1828/* See uma.h */ 1829void 1830uma_zone_set_allocf(uma_zone_t zone, uma_alloc allocf) 1831{ 1832 ZONE_LOCK(zone); 1833 1834 zone->uz_flags |= UMA_ZFLAG_PRIVALLOC; 1835 zone->uz_allocf = allocf; 1836 1837 ZONE_UNLOCK(zone); 1838} 1839 1840/* See uma.h */ 1841int 1842uma_zone_set_obj(uma_zone_t zone, struct vm_object *obj, int count) 1843{ 1844 int pages; 1845 vm_offset_t kva; 1846 1847 mtx_lock(&Giant); 1848 1849 pages = count / zone->uz_ipers; 1850 1851 if (pages * zone->uz_ipers < count) 1852 pages++; 1853 1854 kva = kmem_alloc_pageable(kernel_map, pages * UMA_SLAB_SIZE); 1855 1856 if (kva == 0) { 1857 mtx_unlock(&Giant); 1858 return (0); 1859 } 1860 1861 1862 if (obj == NULL) 1863 obj = vm_object_allocate(OBJT_DEFAULT, 1864 zone->uz_maxpages); 1865 else 1866 _vm_object_allocate(OBJT_DEFAULT, 1867 zone->uz_maxpages, obj); 1868 1869 ZONE_LOCK(zone); 1870 zone->uz_kva = kva; 1871 zone->uz_obj = obj; 1872 zone->uz_maxpages = pages; 1873 1874 zone->uz_allocf = obj_alloc; 1875 zone->uz_flags |= UMA_ZFLAG_NOFREE | UMA_ZFLAG_PRIVALLOC; 1876 1877 ZONE_UNLOCK(zone); 1878 mtx_unlock(&Giant); 1879 1880 return (1); 1881} 1882 1883/* See uma.h */ 1884void 1885uma_prealloc(uma_zone_t zone, int items) 1886{ 1887 int slabs; 1888 uma_slab_t slab; 1889 1890 ZONE_LOCK(zone); 1891 slabs = items / zone->uz_ipers; 1892 if (slabs * zone->uz_ipers < items) 1893 slabs++; 1894 1895 while (slabs > 0) { 1896 slab = slab_zalloc(zone, M_WAITOK); 1897 LIST_INSERT_HEAD(&zone->uz_free_slab, slab, us_link); 1898 slabs--; 1899 } 1900 ZONE_UNLOCK(zone); 1901} 1902 1903/* See uma.h */ 1904void 1905uma_reclaim(void) 1906{ 1907 /* 1908 * You might think that the delay below would improve performance since 1909 * the allocator will give away memory that it may ask for immediately. 1910 * Really, it makes things worse, since cpu cycles are so much cheaper 1911 * than disk activity. 1912 */ 1913#if 0 1914 static struct timeval tv = {0}; 1915 struct timeval now; 1916 getmicrouptime(&now); 1917 if (now.tv_sec > tv.tv_sec + 30) 1918 tv = now; 1919 else 1920 return; 1921#endif 1922#ifdef UMA_DEBUG 1923 printf("UMA: vm asked us to release pages!\n"); 1924#endif 1925 bucket_enable(); 1926 zone_foreach(zone_drain); 1927 1928 /* 1929 * Some slabs may have been freed but this zone will be visited early 1930 * we visit again so that we can free pages that are empty once other 1931 * zones are drained. We have to do the same for buckets. 1932 */ 1933 zone_drain(slabzone); 1934 zone_drain(bucketzone); 1935} 1936 1937void * 1938uma_large_malloc(int size, int wait) 1939{ 1940 void *mem; 1941 uma_slab_t slab; 1942 u_int8_t flags; 1943 1944 slab = uma_zalloc_internal(slabzone, NULL, wait, NULL); 1945 if (slab == NULL) 1946 return (NULL); 1947 1948 mem = page_alloc(NULL, size, &flags, wait); 1949 if (mem) { 1950 slab->us_data = mem; 1951 slab->us_flags = flags | UMA_SLAB_MALLOC; 1952 slab->us_size = size; 1953 UMA_HASH_INSERT(mallochash, slab, mem); 1954 } else { 1955 uma_zfree_internal(slabzone, slab, NULL, 0); 1956 } 1957 1958 1959 return (mem); 1960} 1961 1962void 1963uma_large_free(uma_slab_t slab) 1964{ 1965 UMA_HASH_REMOVE(mallochash, slab, slab->us_data); 1966 page_free(slab->us_data, slab->us_size, slab->us_flags); 1967 uma_zfree_internal(slabzone, slab, NULL, 0); 1968} 1969 1970void 1971uma_print_stats(void) 1972{ 1973 zone_foreach(uma_print_zone); 1974} 1975 1976void 1977uma_print_zone(uma_zone_t zone) 1978{ 1979 printf("%s(%p) size %d(%d) flags %d ipers %d ppera %d out %d free %d\n", 1980 zone->uz_name, zone, zone->uz_size, zone->uz_rsize, zone->uz_flags, 1981 zone->uz_ipers, zone->uz_ppera, 1982 (zone->uz_ipers * zone->uz_pages) - zone->uz_free, zone->uz_free); 1983} 1984 1985/* 1986 * Sysctl handler for vm.zone 1987 * 1988 * stolen from vm_zone.c 1989 */ 1990static int 1991sysctl_vm_zone(SYSCTL_HANDLER_ARGS) 1992{ 1993 int error, len, cnt; 1994 const int linesize = 128; /* conservative */ 1995 int totalfree; 1996 char *tmpbuf, *offset; 1997 uma_zone_t z; 1998 char *p; 1999 2000 cnt = 0; 2001 mtx_lock(&uma_mtx); 2002 LIST_FOREACH(z, &uma_zones, uz_link) 2003 cnt++; 2004 mtx_unlock(&uma_mtx); 2005 MALLOC(tmpbuf, char *, (cnt == 0 ? 1 : cnt) * linesize, 2006 M_TEMP, M_WAITOK); 2007 len = snprintf(tmpbuf, linesize, 2008 "\nITEM SIZE LIMIT USED FREE REQUESTS\n\n"); 2009 if (cnt == 0) 2010 tmpbuf[len - 1] = '\0'; 2011 error = SYSCTL_OUT(req, tmpbuf, cnt == 0 ? len-1 : len); 2012 if (error || cnt == 0) 2013 goto out; 2014 offset = tmpbuf; 2015 mtx_lock(&uma_mtx); 2016 LIST_FOREACH(z, &uma_zones, uz_link) { 2017 if (cnt == 0) /* list may have changed size */ 2018 break; 2019 ZONE_LOCK(z); 2020 totalfree = z->uz_free + z->uz_cachefree; 2021 len = snprintf(offset, linesize, 2022 "%-12.12s %6.6u, %8.8u, %6.6u, %6.6u, %8.8llu\n", 2023 z->uz_name, z->uz_size, 2024 z->uz_maxpages * z->uz_ipers, 2025 (z->uz_ipers * (z->uz_pages / z->uz_ppera)) - totalfree, 2026 totalfree, 2027 (unsigned long long)z->uz_allocs); 2028 ZONE_UNLOCK(z); 2029 for (p = offset + 12; p > offset && *p == ' '; --p) 2030 /* nothing */ ; 2031 p[1] = ':'; 2032 cnt--; 2033 offset += len; 2034 } 2035 mtx_unlock(&uma_mtx); 2036 *offset++ = '\0'; 2037 error = SYSCTL_OUT(req, tmpbuf, offset - tmpbuf); 2038out: 2039 FREE(tmpbuf, M_TEMP); 2040 return (error); 2041}
| 1585 1586 return (item); 1587 1588alloc_fail: 1589 if (bucket != NULL) 1590 zone->uz_fills--; 1591 ZONE_UNLOCK(zone); 1592 1593 if (bucket != NULL && bucket->ub_ptr != -1) 1594 return (bucket); 1595 1596 return (NULL); 1597} 1598 1599/* See uma.h */ 1600void 1601uma_zfree_arg(uma_zone_t zone, void *item, void *udata) 1602{ 1603 uma_cache_t cache; 1604 uma_bucket_t bucket; 1605 int cpu; 1606 1607 /* This is the fast path free */ 1608#ifdef UMA_DEBUG_ALLOC_1 1609 printf("Freeing item %p to %s(%p)\n", item, zone->uz_name, zone); 1610#endif 1611 /* 1612 * The race here is acceptable. If we miss it we'll just have to wait 1613 * a little longer for the limits to be reset. 1614 */ 1615 1616 if (zone->uz_flags & UMA_ZFLAG_FULL) 1617 goto zfree_internal; 1618 1619zfree_restart: 1620 cpu = PCPU_GET(cpuid); 1621 CPU_LOCK(zone, cpu); 1622 cache = &zone->uz_cpu[cpu]; 1623 1624zfree_start: 1625 bucket = cache->uc_freebucket; 1626 1627 if (bucket) { 1628 /* 1629 * Do we have room in our bucket? It is OK for this uz count 1630 * check to be slightly out of sync. 1631 */ 1632 1633 if (bucket->ub_ptr < zone->uz_count) { 1634 bucket->ub_ptr++; 1635 KASSERT(bucket->ub_bucket[bucket->ub_ptr] == NULL, 1636 ("uma_zfree: Freeing to non free bucket index.")); 1637 bucket->ub_bucket[bucket->ub_ptr] = item; 1638 if (zone->uz_dtor) 1639 zone->uz_dtor(item, zone->uz_size, udata); 1640 CPU_UNLOCK(zone, cpu); 1641 return; 1642 } else if (cache->uc_allocbucket) { 1643#ifdef UMA_DEBUG_ALLOC 1644 printf("uma_zfree: Swapping buckets.\n"); 1645#endif 1646 /* 1647 * We have run out of space in our freebucket. 1648 * See if we can switch with our alloc bucket. 1649 */ 1650 if (cache->uc_allocbucket->ub_ptr < 1651 cache->uc_freebucket->ub_ptr) { 1652 uma_bucket_t swap; 1653 1654 swap = cache->uc_freebucket; 1655 cache->uc_freebucket = cache->uc_allocbucket; 1656 cache->uc_allocbucket = swap; 1657 1658 goto zfree_start; 1659 } 1660 } 1661 } 1662 1663 /* 1664 * We can get here for two reasons: 1665 * 1666 * 1) The buckets are NULL 1667 * 2) The alloc and free buckets are both somewhat full. 1668 * 1669 */ 1670 1671 ZONE_LOCK(zone); 1672 1673 bucket = cache->uc_freebucket; 1674 cache->uc_freebucket = NULL; 1675 1676 /* Can we throw this on the zone full list? */ 1677 if (bucket != NULL) { 1678#ifdef UMA_DEBUG_ALLOC 1679 printf("uma_zfree: Putting old bucket on the free list.\n"); 1680#endif 1681 /* ub_ptr is pointing to the last free item */ 1682 KASSERT(bucket->ub_ptr != -1, 1683 ("uma_zfree: Attempting to insert an empty bucket onto the full list.\n")); 1684 LIST_INSERT_HEAD(&zone->uz_full_bucket, 1685 bucket, ub_link); 1686 } 1687 if ((bucket = LIST_FIRST(&zone->uz_free_bucket)) != NULL) { 1688 LIST_REMOVE(bucket, ub_link); 1689 ZONE_UNLOCK(zone); 1690 cache->uc_freebucket = bucket; 1691 goto zfree_start; 1692 } 1693 /* We're done with this CPU now */ 1694 CPU_UNLOCK(zone, cpu); 1695 1696 /* And the zone.. */ 1697 ZONE_UNLOCK(zone); 1698 1699#ifdef UMA_DEBUG_ALLOC 1700 printf("uma_zfree: Allocating new free bucket.\n"); 1701#endif 1702 bucket = uma_zalloc_internal(bucketzone, 1703 NULL, M_NOWAIT, NULL); 1704 if (bucket) { 1705#ifdef INVARIANTS 1706 bzero(bucket, bucketzone->uz_size); 1707#endif 1708 bucket->ub_ptr = -1; 1709 ZONE_LOCK(zone); 1710 LIST_INSERT_HEAD(&zone->uz_free_bucket, 1711 bucket, ub_link); 1712 ZONE_UNLOCK(zone); 1713 goto zfree_restart; 1714 } 1715 1716 /* 1717 * If nothing else caught this, we'll just do an internal free. 1718 */ 1719 1720zfree_internal: 1721 1722 uma_zfree_internal(zone, item, udata, 0); 1723 1724 return; 1725 1726} 1727 1728/* 1729 * Frees an item to an INTERNAL zone or allocates a free bucket 1730 * 1731 * Arguments: 1732 * zone The zone to free to 1733 * item The item we're freeing 1734 * udata User supplied data for the dtor 1735 * skip Skip the dtor, it was done in uma_zfree_arg 1736 */ 1737 1738static void 1739uma_zfree_internal(uma_zone_t zone, void *item, void *udata, int skip) 1740{ 1741 uma_slab_t slab; 1742 u_int8_t *mem; 1743 u_int8_t freei; 1744 1745 ZONE_LOCK(zone); 1746 1747 if (!(zone->uz_flags & UMA_ZFLAG_MALLOC)) { 1748 mem = (u_int8_t *)((unsigned long)item & (~UMA_SLAB_MASK)); 1749 if (zone->uz_flags & UMA_ZFLAG_OFFPAGE) 1750 slab = hash_sfind(&zone->uz_hash, mem); 1751 else { 1752 mem += zone->uz_pgoff; 1753 slab = (uma_slab_t)mem; 1754 } 1755 } else { 1756 slab = (uma_slab_t)udata; 1757 } 1758 1759 /* Do we need to remove from any lists? */ 1760 if (slab->us_freecount+1 == zone->uz_ipers) { 1761 LIST_REMOVE(slab, us_link); 1762 LIST_INSERT_HEAD(&zone->uz_free_slab, slab, us_link); 1763 } else if (slab->us_freecount == 0) { 1764 LIST_REMOVE(slab, us_link); 1765 LIST_INSERT_HEAD(&zone->uz_part_slab, slab, us_link); 1766 } 1767 1768 /* Slab management stuff */ 1769 freei = ((unsigned long)item - (unsigned long)slab->us_data) 1770 / zone->uz_rsize; 1771#ifdef INVARIANTS 1772 if (((freei * zone->uz_rsize) + slab->us_data) != item) 1773 panic("zone: %s(%p) slab %p freed address %p unaligned.\n", 1774 zone->uz_name, zone, slab, item); 1775 if (freei >= zone->uz_ipers) 1776 panic("zone: %s(%p) slab %p freelist %i out of range 0-%d\n", 1777 zone->uz_name, zone, slab, freei, zone->uz_ipers-1); 1778 1779 if (slab->us_freelist[freei] != 255) { 1780 printf("Slab at %p, freei %d = %d.\n", 1781 slab, freei, slab->us_freelist[freei]); 1782 panic("Duplicate free of item %p from zone %p(%s)\n", 1783 item, zone, zone->uz_name); 1784 } 1785#endif 1786 slab->us_freelist[freei] = slab->us_firstfree; 1787 slab->us_firstfree = freei; 1788 slab->us_freecount++; 1789 1790 /* Zone statistics */ 1791 zone->uz_free++; 1792 1793 if (!skip && zone->uz_dtor) 1794 zone->uz_dtor(item, zone->uz_size, udata); 1795 1796 if (zone->uz_flags & UMA_ZFLAG_FULL) { 1797 if (zone->uz_pages < zone->uz_maxpages) 1798 zone->uz_flags &= ~UMA_ZFLAG_FULL; 1799 1800 /* We can handle one more allocation */ 1801 wakeup_one(&zone); 1802 } 1803 1804 ZONE_UNLOCK(zone); 1805} 1806 1807/* See uma.h */ 1808void 1809uma_zone_set_max(uma_zone_t zone, int nitems) 1810{ 1811 ZONE_LOCK(zone); 1812 if (zone->uz_ppera > 1) 1813 zone->uz_maxpages = nitems * zone->uz_ppera; 1814 else 1815 zone->uz_maxpages = nitems / zone->uz_ipers; 1816 1817 if (zone->uz_maxpages * zone->uz_ipers < nitems) 1818 zone->uz_maxpages++; 1819 1820 ZONE_UNLOCK(zone); 1821} 1822 1823/* See uma.h */ 1824void 1825uma_zone_set_freef(uma_zone_t zone, uma_free freef) 1826{ 1827 ZONE_LOCK(zone); 1828 1829 zone->uz_freef = freef; 1830 1831 ZONE_UNLOCK(zone); 1832} 1833 1834/* See uma.h */ 1835void 1836uma_zone_set_allocf(uma_zone_t zone, uma_alloc allocf) 1837{ 1838 ZONE_LOCK(zone); 1839 1840 zone->uz_flags |= UMA_ZFLAG_PRIVALLOC; 1841 zone->uz_allocf = allocf; 1842 1843 ZONE_UNLOCK(zone); 1844} 1845 1846/* See uma.h */ 1847int 1848uma_zone_set_obj(uma_zone_t zone, struct vm_object *obj, int count) 1849{ 1850 int pages; 1851 vm_offset_t kva; 1852 1853 mtx_lock(&Giant); 1854 1855 pages = count / zone->uz_ipers; 1856 1857 if (pages * zone->uz_ipers < count) 1858 pages++; 1859 1860 kva = kmem_alloc_pageable(kernel_map, pages * UMA_SLAB_SIZE); 1861 1862 if (kva == 0) { 1863 mtx_unlock(&Giant); 1864 return (0); 1865 } 1866 1867 1868 if (obj == NULL) 1869 obj = vm_object_allocate(OBJT_DEFAULT, 1870 zone->uz_maxpages); 1871 else 1872 _vm_object_allocate(OBJT_DEFAULT, 1873 zone->uz_maxpages, obj); 1874 1875 ZONE_LOCK(zone); 1876 zone->uz_kva = kva; 1877 zone->uz_obj = obj; 1878 zone->uz_maxpages = pages; 1879 1880 zone->uz_allocf = obj_alloc; 1881 zone->uz_flags |= UMA_ZFLAG_NOFREE | UMA_ZFLAG_PRIVALLOC; 1882 1883 ZONE_UNLOCK(zone); 1884 mtx_unlock(&Giant); 1885 1886 return (1); 1887} 1888 1889/* See uma.h */ 1890void 1891uma_prealloc(uma_zone_t zone, int items) 1892{ 1893 int slabs; 1894 uma_slab_t slab; 1895 1896 ZONE_LOCK(zone); 1897 slabs = items / zone->uz_ipers; 1898 if (slabs * zone->uz_ipers < items) 1899 slabs++; 1900 1901 while (slabs > 0) { 1902 slab = slab_zalloc(zone, M_WAITOK); 1903 LIST_INSERT_HEAD(&zone->uz_free_slab, slab, us_link); 1904 slabs--; 1905 } 1906 ZONE_UNLOCK(zone); 1907} 1908 1909/* See uma.h */ 1910void 1911uma_reclaim(void) 1912{ 1913 /* 1914 * You might think that the delay below would improve performance since 1915 * the allocator will give away memory that it may ask for immediately. 1916 * Really, it makes things worse, since cpu cycles are so much cheaper 1917 * than disk activity. 1918 */ 1919#if 0 1920 static struct timeval tv = {0}; 1921 struct timeval now; 1922 getmicrouptime(&now); 1923 if (now.tv_sec > tv.tv_sec + 30) 1924 tv = now; 1925 else 1926 return; 1927#endif 1928#ifdef UMA_DEBUG 1929 printf("UMA: vm asked us to release pages!\n"); 1930#endif 1931 bucket_enable(); 1932 zone_foreach(zone_drain); 1933 1934 /* 1935 * Some slabs may have been freed but this zone will be visited early 1936 * we visit again so that we can free pages that are empty once other 1937 * zones are drained. We have to do the same for buckets. 1938 */ 1939 zone_drain(slabzone); 1940 zone_drain(bucketzone); 1941} 1942 1943void * 1944uma_large_malloc(int size, int wait) 1945{ 1946 void *mem; 1947 uma_slab_t slab; 1948 u_int8_t flags; 1949 1950 slab = uma_zalloc_internal(slabzone, NULL, wait, NULL); 1951 if (slab == NULL) 1952 return (NULL); 1953 1954 mem = page_alloc(NULL, size, &flags, wait); 1955 if (mem) { 1956 slab->us_data = mem; 1957 slab->us_flags = flags | UMA_SLAB_MALLOC; 1958 slab->us_size = size; 1959 UMA_HASH_INSERT(mallochash, slab, mem); 1960 } else { 1961 uma_zfree_internal(slabzone, slab, NULL, 0); 1962 } 1963 1964 1965 return (mem); 1966} 1967 1968void 1969uma_large_free(uma_slab_t slab) 1970{ 1971 UMA_HASH_REMOVE(mallochash, slab, slab->us_data); 1972 page_free(slab->us_data, slab->us_size, slab->us_flags); 1973 uma_zfree_internal(slabzone, slab, NULL, 0); 1974} 1975 1976void 1977uma_print_stats(void) 1978{ 1979 zone_foreach(uma_print_zone); 1980} 1981 1982void 1983uma_print_zone(uma_zone_t zone) 1984{ 1985 printf("%s(%p) size %d(%d) flags %d ipers %d ppera %d out %d free %d\n", 1986 zone->uz_name, zone, zone->uz_size, zone->uz_rsize, zone->uz_flags, 1987 zone->uz_ipers, zone->uz_ppera, 1988 (zone->uz_ipers * zone->uz_pages) - zone->uz_free, zone->uz_free); 1989} 1990 1991/* 1992 * Sysctl handler for vm.zone 1993 * 1994 * stolen from vm_zone.c 1995 */ 1996static int 1997sysctl_vm_zone(SYSCTL_HANDLER_ARGS) 1998{ 1999 int error, len, cnt; 2000 const int linesize = 128; /* conservative */ 2001 int totalfree; 2002 char *tmpbuf, *offset; 2003 uma_zone_t z; 2004 char *p; 2005 2006 cnt = 0; 2007 mtx_lock(&uma_mtx); 2008 LIST_FOREACH(z, &uma_zones, uz_link) 2009 cnt++; 2010 mtx_unlock(&uma_mtx); 2011 MALLOC(tmpbuf, char *, (cnt == 0 ? 1 : cnt) * linesize, 2012 M_TEMP, M_WAITOK); 2013 len = snprintf(tmpbuf, linesize, 2014 "\nITEM SIZE LIMIT USED FREE REQUESTS\n\n"); 2015 if (cnt == 0) 2016 tmpbuf[len - 1] = '\0'; 2017 error = SYSCTL_OUT(req, tmpbuf, cnt == 0 ? len-1 : len); 2018 if (error || cnt == 0) 2019 goto out; 2020 offset = tmpbuf; 2021 mtx_lock(&uma_mtx); 2022 LIST_FOREACH(z, &uma_zones, uz_link) { 2023 if (cnt == 0) /* list may have changed size */ 2024 break; 2025 ZONE_LOCK(z); 2026 totalfree = z->uz_free + z->uz_cachefree; 2027 len = snprintf(offset, linesize, 2028 "%-12.12s %6.6u, %8.8u, %6.6u, %6.6u, %8.8llu\n", 2029 z->uz_name, z->uz_size, 2030 z->uz_maxpages * z->uz_ipers, 2031 (z->uz_ipers * (z->uz_pages / z->uz_ppera)) - totalfree, 2032 totalfree, 2033 (unsigned long long)z->uz_allocs); 2034 ZONE_UNLOCK(z); 2035 for (p = offset + 12; p > offset && *p == ' '; --p) 2036 /* nothing */ ; 2037 p[1] = ':'; 2038 cnt--; 2039 offset += len; 2040 } 2041 mtx_unlock(&uma_mtx); 2042 *offset++ = '\0'; 2043 error = SYSCTL_OUT(req, tmpbuf, offset - tmpbuf); 2044out: 2045 FREE(tmpbuf, M_TEMP); 2046 return (error); 2047}
|