52 53/* I should really use ktr.. */ 54/* 55#define UMA_DEBUG 1 56#define UMA_DEBUG_ALLOC 1 57#define UMA_DEBUG_ALLOC_1 1 58*/ 59 60#include "opt_param.h" 61#include <sys/param.h> 62#include <sys/systm.h> 63#include <sys/kernel.h> 64#include <sys/types.h> 65#include <sys/queue.h> 66#include <sys/malloc.h> 67#include <sys/ktr.h> 68#include <sys/lock.h> 69#include <sys/sysctl.h> 70#include <sys/mutex.h> 71#include <sys/proc.h> 72#include <sys/sbuf.h> 73#include <sys/smp.h> 74#include <sys/vmmeter.h> 75 76#include <vm/vm.h> 77#include <vm/vm_object.h> 78#include <vm/vm_page.h> 79#include <vm/vm_param.h> 80#include <vm/vm_map.h> 81#include <vm/vm_kern.h> 82#include <vm/vm_extern.h> 83#include <vm/uma.h> 84#include <vm/uma_int.h> 85#include <vm/uma_dbg.h> 86 87#include <machine/vmparam.h> 88 89/* 90 * This is the zone and keg from which all zones are spawned. The idea is that 91 * even the zone & keg heads are allocated from the allocator, so we use the 92 * bss section to bootstrap us. 93 */ 94static struct uma_keg masterkeg; 95static struct uma_zone masterzone_k; 96static struct uma_zone masterzone_z; 97static uma_zone_t kegs = &masterzone_k; 98static uma_zone_t zones = &masterzone_z; 99 100/* This is the zone from which all of uma_slab_t's are allocated. */ 101static uma_zone_t slabzone; 102static uma_zone_t slabrefzone; /* With refcounters (for UMA_ZONE_REFCNT) */ 103 104/* 105 * The initial hash tables come out of this zone so they can be allocated 106 * prior to malloc coming up. 107 */ 108static uma_zone_t hashzone; 109 110static MALLOC_DEFINE(M_UMAHASH, "UMAHash", "UMA Hash Buckets"); 111 112/* 113 * Are we allowed to allocate buckets? 114 */ 115static int bucketdisable = 1; 116 117/* Linked list of all kegs in the system */ 118static LIST_HEAD(,uma_keg) uma_kegs = LIST_HEAD_INITIALIZER(&uma_kegs); 119 120/* This mutex protects the keg list */ 121static struct mtx uma_mtx; 122 123/* Linked list of boot time pages */ 124static LIST_HEAD(,uma_slab) uma_boot_pages = 125 LIST_HEAD_INITIALIZER(&uma_boot_pages); 126 127/* Count of free boottime pages */ 128static int uma_boot_free = 0; 129 130/* Is the VM done starting up? */ 131static int booted = 0; 132 133/* Maximum number of allowed items-per-slab if the slab header is OFFPAGE */ 134static u_int uma_max_ipers; 135static u_int uma_max_ipers_ref; 136 137/* 138 * This is the handle used to schedule events that need to happen 139 * outside of the allocation fast path. 140 */ 141static struct callout uma_callout; 142#define UMA_TIMEOUT 20 /* Seconds for callout interval. */ 143 144/* 145 * This structure is passed as the zone ctor arg so that I don't have to create 146 * a special allocation function just for zones. 147 */ 148struct uma_zctor_args { 149 char *name; 150 size_t size; 151 uma_ctor ctor; 152 uma_dtor dtor; 153 uma_init uminit; 154 uma_fini fini; 155 uma_keg_t keg; 156 int align; 157 u_int32_t flags; 158}; 159 160struct uma_kctor_args { 161 uma_zone_t zone; 162 size_t size; 163 uma_init uminit; 164 uma_fini fini; 165 int align; 166 u_int32_t flags; 167}; 168 169struct uma_bucket_zone { 170 uma_zone_t ubz_zone; 171 char *ubz_name; 172 int ubz_entries; 173}; 174 175#define BUCKET_MAX 128 176 177struct uma_bucket_zone bucket_zones[] = { 178 { NULL, "16 Bucket", 16 }, 179 { NULL, "32 Bucket", 32 }, 180 { NULL, "64 Bucket", 64 }, 181 { NULL, "128 Bucket", 128 }, 182 { NULL, NULL, 0} 183}; 184 185#define BUCKET_SHIFT 4 186#define BUCKET_ZONES ((BUCKET_MAX >> BUCKET_SHIFT) + 1) 187 188/* 189 * bucket_size[] maps requested bucket sizes to zones that allocate a bucket 190 * of approximately the right size. 191 */ 192static uint8_t bucket_size[BUCKET_ZONES]; 193 194/* 195 * Flags and enumerations to be passed to internal functions. 196 */ 197enum zfreeskip { SKIP_NONE, SKIP_DTOR, SKIP_FINI }; 198 199#define ZFREE_STATFAIL 0x00000001 /* Update zone failure statistic. */ 200#define ZFREE_STATFREE 0x00000002 /* Update zone free statistic. */ 201 202/* Prototypes.. */ 203 204static void *obj_alloc(uma_zone_t, int, u_int8_t *, int); 205static void *page_alloc(uma_zone_t, int, u_int8_t *, int); 206static void *startup_alloc(uma_zone_t, int, u_int8_t *, int); 207static void page_free(void *, int, u_int8_t); 208static uma_slab_t slab_zalloc(uma_zone_t, int); 209static void cache_drain(uma_zone_t); 210static void bucket_drain(uma_zone_t, uma_bucket_t); 211static void bucket_cache_drain(uma_zone_t zone); 212static int keg_ctor(void *, int, void *, int); 213static void keg_dtor(void *, int, void *); 214static int zone_ctor(void *, int, void *, int); 215static void zone_dtor(void *, int, void *); 216static int zero_init(void *, int, int); 217static void zone_small_init(uma_zone_t zone); 218static void zone_large_init(uma_zone_t zone); 219static void zone_foreach(void (*zfunc)(uma_zone_t)); 220static void zone_timeout(uma_zone_t zone); 221static int hash_alloc(struct uma_hash *); 222static int hash_expand(struct uma_hash *, struct uma_hash *); 223static void hash_free(struct uma_hash *hash); 224static void uma_timeout(void *); 225static void uma_startup3(void); 226static void *uma_zalloc_internal(uma_zone_t, void *, int); 227static void uma_zfree_internal(uma_zone_t, void *, void *, enum zfreeskip, 228 int); 229static void bucket_enable(void); 230static void bucket_init(void); 231static uma_bucket_t bucket_alloc(int, int); 232static void bucket_free(uma_bucket_t); 233static void bucket_zone_drain(void); 234static int uma_zalloc_bucket(uma_zone_t zone, int flags); 235static uma_slab_t uma_zone_slab(uma_zone_t zone, int flags); 236static void *uma_slab_alloc(uma_zone_t zone, uma_slab_t slab); 237static void zone_drain(uma_zone_t); 238static uma_zone_t uma_kcreate(uma_zone_t zone, size_t size, uma_init uminit, 239 uma_fini fini, int align, u_int32_t flags); 240 241void uma_print_zone(uma_zone_t); 242void uma_print_stats(void); 243static int sysctl_vm_zone(SYSCTL_HANDLER_ARGS); 244static int sysctl_vm_zone_count(SYSCTL_HANDLER_ARGS); 245static int sysctl_vm_zone_stats(SYSCTL_HANDLER_ARGS); 246 247#ifdef WITNESS 248static int nosleepwithlocks = 1; 249SYSCTL_INT(_debug, OID_AUTO, nosleepwithlocks, CTLFLAG_RW, &nosleepwithlocks, 250 0, "Convert M_WAITOK to M_NOWAIT to avoid lock-held-across-sleep paths"); 251#else 252static int nosleepwithlocks = 0; 253SYSCTL_INT(_debug, OID_AUTO, nosleepwithlocks, CTLFLAG_RW, &nosleepwithlocks, 254 0, "Convert M_WAITOK to M_NOWAIT to avoid lock-held-across-sleep paths"); 255#endif 256SYSCTL_OID(_vm, OID_AUTO, zone, CTLTYPE_STRING|CTLFLAG_RD, 257 NULL, 0, sysctl_vm_zone, "A", "Zone Info"); 258SYSINIT(uma_startup3, SI_SUB_VM_CONF, SI_ORDER_SECOND, uma_startup3, NULL); 259 260SYSCTL_PROC(_vm, OID_AUTO, zone_count, CTLFLAG_RD|CTLTYPE_INT, 261 0, 0, sysctl_vm_zone_count, "I", "Number of UMA zones"); 262 263SYSCTL_PROC(_vm, OID_AUTO, zone_stats, CTLFLAG_RD|CTLTYPE_STRUCT, 264 0, 0, sysctl_vm_zone_stats, "s,struct uma_type_header", "Zone Stats"); 265 266/* 267 * This routine checks to see whether or not it's safe to enable buckets. 268 */ 269 270static void 271bucket_enable(void) 272{ 273 if (cnt.v_free_count < cnt.v_free_min) 274 bucketdisable = 1; 275 else 276 bucketdisable = 0; 277} 278 279/* 280 * Initialize bucket_zones, the array of zones of buckets of various sizes. 281 * 282 * For each zone, calculate the memory required for each bucket, consisting 283 * of the header and an array of pointers. Initialize bucket_size[] to point 284 * the range of appropriate bucket sizes at the zone. 285 */ 286static void 287bucket_init(void) 288{ 289 struct uma_bucket_zone *ubz; 290 int i; 291 int j; 292 293 for (i = 0, j = 0; bucket_zones[j].ubz_entries != 0; j++) { 294 int size; 295 296 ubz = &bucket_zones[j]; 297 size = roundup(sizeof(struct uma_bucket), sizeof(void *)); 298 size += sizeof(void *) * ubz->ubz_entries; 299 ubz->ubz_zone = uma_zcreate(ubz->ubz_name, size, 300 NULL, NULL, NULL, NULL, UMA_ALIGN_PTR, UMA_ZFLAG_INTERNAL); 301 for (; i <= ubz->ubz_entries; i += (1 << BUCKET_SHIFT)) 302 bucket_size[i >> BUCKET_SHIFT] = j; 303 } 304} 305 306/* 307 * Given a desired number of entries for a bucket, return the zone from which 308 * to allocate the bucket. 309 */ 310static struct uma_bucket_zone * 311bucket_zone_lookup(int entries) 312{ 313 int idx; 314 315 idx = howmany(entries, 1 << BUCKET_SHIFT); 316 return (&bucket_zones[bucket_size[idx]]); 317} 318 319static uma_bucket_t 320bucket_alloc(int entries, int bflags) 321{ 322 struct uma_bucket_zone *ubz; 323 uma_bucket_t bucket; 324 325 /* 326 * This is to stop us from allocating per cpu buckets while we're 327 * running out of UMA_BOOT_PAGES. Otherwise, we would exhaust the 328 * boot pages. This also prevents us from allocating buckets in 329 * low memory situations. 330 */ 331 if (bucketdisable) 332 return (NULL); 333 334 ubz = bucket_zone_lookup(entries); 335 bucket = uma_zalloc_internal(ubz->ubz_zone, NULL, bflags); 336 if (bucket) { 337#ifdef INVARIANTS 338 bzero(bucket->ub_bucket, sizeof(void *) * ubz->ubz_entries); 339#endif 340 bucket->ub_cnt = 0; 341 bucket->ub_entries = ubz->ubz_entries; 342 } 343 344 return (bucket); 345} 346 347static void 348bucket_free(uma_bucket_t bucket) 349{ 350 struct uma_bucket_zone *ubz; 351 352 ubz = bucket_zone_lookup(bucket->ub_entries); 353 uma_zfree_internal(ubz->ubz_zone, bucket, NULL, SKIP_NONE, 354 ZFREE_STATFREE); 355} 356 357static void 358bucket_zone_drain(void) 359{ 360 struct uma_bucket_zone *ubz; 361 362 for (ubz = &bucket_zones[0]; ubz->ubz_entries != 0; ubz++) 363 zone_drain(ubz->ubz_zone); 364} 365 366 367/* 368 * Routine called by timeout which is used to fire off some time interval 369 * based calculations. (stats, hash size, etc.) 370 * 371 * Arguments: 372 * arg Unused 373 * 374 * Returns: 375 * Nothing 376 */ 377static void 378uma_timeout(void *unused) 379{ 380 bucket_enable(); 381 zone_foreach(zone_timeout); 382 383 /* Reschedule this event */ 384 callout_reset(&uma_callout, UMA_TIMEOUT * hz, uma_timeout, NULL); 385} 386 387/* 388 * Routine to perform timeout driven calculations. This expands the 389 * hashes and does per cpu statistics aggregation. 390 * 391 * Arguments: 392 * zone The zone to operate on 393 * 394 * Returns: 395 * Nothing 396 */ 397static void 398zone_timeout(uma_zone_t zone) 399{ 400 uma_keg_t keg; 401 u_int64_t alloc; 402 403 keg = zone->uz_keg; 404 alloc = 0; 405 406 /* 407 * Expand the zone hash table. 408 * 409 * This is done if the number of slabs is larger than the hash size. 410 * What I'm trying to do here is completely reduce collisions. This 411 * may be a little aggressive. Should I allow for two collisions max? 412 */ 413 ZONE_LOCK(zone); 414 if (keg->uk_flags & UMA_ZONE_HASH && 415 keg->uk_pages / keg->uk_ppera >= keg->uk_hash.uh_hashsize) { 416 struct uma_hash newhash; 417 struct uma_hash oldhash; 418 int ret; 419 420 /* 421 * This is so involved because allocating and freeing 422 * while the zone lock is held will lead to deadlock. 423 * I have to do everything in stages and check for 424 * races. 425 */ 426 newhash = keg->uk_hash; 427 ZONE_UNLOCK(zone); 428 ret = hash_alloc(&newhash); 429 ZONE_LOCK(zone); 430 if (ret) { 431 if (hash_expand(&keg->uk_hash, &newhash)) { 432 oldhash = keg->uk_hash; 433 keg->uk_hash = newhash; 434 } else 435 oldhash = newhash; 436 437 ZONE_UNLOCK(zone); 438 hash_free(&oldhash); 439 ZONE_LOCK(zone); 440 } 441 } 442 ZONE_UNLOCK(zone); 443} 444 445/* 446 * Allocate and zero fill the next sized hash table from the appropriate 447 * backing store. 448 * 449 * Arguments: 450 * hash A new hash structure with the old hash size in uh_hashsize 451 * 452 * Returns: 453 * 1 on sucess and 0 on failure. 454 */ 455static int 456hash_alloc(struct uma_hash *hash) 457{ 458 int oldsize; 459 int alloc; 460 461 oldsize = hash->uh_hashsize; 462 463 /* We're just going to go to a power of two greater */ 464 if (oldsize) { 465 hash->uh_hashsize = oldsize * 2; 466 alloc = sizeof(hash->uh_slab_hash[0]) * hash->uh_hashsize; 467 hash->uh_slab_hash = (struct slabhead *)malloc(alloc, 468 M_UMAHASH, M_NOWAIT); 469 } else { 470 alloc = sizeof(hash->uh_slab_hash[0]) * UMA_HASH_SIZE_INIT; 471 hash->uh_slab_hash = uma_zalloc_internal(hashzone, NULL, 472 M_WAITOK); 473 hash->uh_hashsize = UMA_HASH_SIZE_INIT; 474 } 475 if (hash->uh_slab_hash) { 476 bzero(hash->uh_slab_hash, alloc); 477 hash->uh_hashmask = hash->uh_hashsize - 1; 478 return (1); 479 } 480 481 return (0); 482} 483 484/* 485 * Expands the hash table for HASH zones. This is done from zone_timeout 486 * to reduce collisions. This must not be done in the regular allocation 487 * path, otherwise, we can recurse on the vm while allocating pages. 488 * 489 * Arguments: 490 * oldhash The hash you want to expand 491 * newhash The hash structure for the new table 492 * 493 * Returns: 494 * Nothing 495 * 496 * Discussion: 497 */ 498static int 499hash_expand(struct uma_hash *oldhash, struct uma_hash *newhash) 500{ 501 uma_slab_t slab; 502 int hval; 503 int i; 504 505 if (!newhash->uh_slab_hash) 506 return (0); 507 508 if (oldhash->uh_hashsize >= newhash->uh_hashsize) 509 return (0); 510 511 /* 512 * I need to investigate hash algorithms for resizing without a 513 * full rehash. 514 */ 515 516 for (i = 0; i < oldhash->uh_hashsize; i++) 517 while (!SLIST_EMPTY(&oldhash->uh_slab_hash[i])) { 518 slab = SLIST_FIRST(&oldhash->uh_slab_hash[i]); 519 SLIST_REMOVE_HEAD(&oldhash->uh_slab_hash[i], us_hlink); 520 hval = UMA_HASH(newhash, slab->us_data); 521 SLIST_INSERT_HEAD(&newhash->uh_slab_hash[hval], 522 slab, us_hlink); 523 } 524 525 return (1); 526} 527 528/* 529 * Free the hash bucket to the appropriate backing store. 530 * 531 * Arguments: 532 * slab_hash The hash bucket we're freeing 533 * hashsize The number of entries in that hash bucket 534 * 535 * Returns: 536 * Nothing 537 */ 538static void 539hash_free(struct uma_hash *hash) 540{ 541 if (hash->uh_slab_hash == NULL) 542 return; 543 if (hash->uh_hashsize == UMA_HASH_SIZE_INIT) 544 uma_zfree_internal(hashzone, 545 hash->uh_slab_hash, NULL, SKIP_NONE, ZFREE_STATFREE); 546 else 547 free(hash->uh_slab_hash, M_UMAHASH); 548} 549 550/* 551 * Frees all outstanding items in a bucket 552 * 553 * Arguments: 554 * zone The zone to free to, must be unlocked. 555 * bucket The free/alloc bucket with items, cpu queue must be locked. 556 * 557 * Returns: 558 * Nothing 559 */ 560 561static void 562bucket_drain(uma_zone_t zone, uma_bucket_t bucket) 563{ 564 uma_slab_t slab; 565 int mzone; 566 void *item; 567 568 if (bucket == NULL) 569 return; 570 571 slab = NULL; 572 mzone = 0; 573 574 /* We have to lookup the slab again for malloc.. */ 575 if (zone->uz_keg->uk_flags & UMA_ZONE_MALLOC) 576 mzone = 1; 577 578 while (bucket->ub_cnt > 0) { 579 bucket->ub_cnt--; 580 item = bucket->ub_bucket[bucket->ub_cnt]; 581#ifdef INVARIANTS 582 bucket->ub_bucket[bucket->ub_cnt] = NULL; 583 KASSERT(item != NULL, 584 ("bucket_drain: botched ptr, item is NULL")); 585#endif 586 /* 587 * This is extremely inefficient. The slab pointer was passed 588 * to uma_zfree_arg, but we lost it because the buckets don't 589 * hold them. This will go away when free() gets a size passed 590 * to it. 591 */ 592 if (mzone) 593 slab = vtoslab((vm_offset_t)item & (~UMA_SLAB_MASK)); 594 uma_zfree_internal(zone, item, slab, SKIP_DTOR, 0); 595 } 596} 597 598/* 599 * Drains the per cpu caches for a zone. 600 * 601 * NOTE: This may only be called while the zone is being turn down, and not 602 * during normal operation. This is necessary in order that we do not have 603 * to migrate CPUs to drain the per-CPU caches. 604 * 605 * Arguments: 606 * zone The zone to drain, must be unlocked. 607 * 608 * Returns: 609 * Nothing 610 */ 611static void 612cache_drain(uma_zone_t zone) 613{ 614 uma_cache_t cache; 615 int cpu; 616 617 /* 618 * XXX: It is safe to not lock the per-CPU caches, because we're 619 * tearing down the zone anyway. I.e., there will be no further use 620 * of the caches at this point. 621 * 622 * XXX: It would good to be able to assert that the zone is being 623 * torn down to prevent improper use of cache_drain(). 624 * 625 * XXX: We lock the zone before passing into bucket_cache_drain() as 626 * it is used elsewhere. Should the tear-down path be made special 627 * there in some form? 628 */ 629 for (cpu = 0; cpu <= mp_maxid; cpu++) { 630 if (CPU_ABSENT(cpu)) 631 continue; 632 cache = &zone->uz_cpu[cpu]; 633 bucket_drain(zone, cache->uc_allocbucket); 634 bucket_drain(zone, cache->uc_freebucket); 635 if (cache->uc_allocbucket != NULL) 636 bucket_free(cache->uc_allocbucket); 637 if (cache->uc_freebucket != NULL) 638 bucket_free(cache->uc_freebucket); 639 cache->uc_allocbucket = cache->uc_freebucket = NULL; 640 } 641 ZONE_LOCK(zone); 642 bucket_cache_drain(zone); 643 ZONE_UNLOCK(zone); 644} 645 646/* 647 * Drain the cached buckets from a zone. Expects a locked zone on entry. 648 */ 649static void 650bucket_cache_drain(uma_zone_t zone) 651{ 652 uma_bucket_t bucket; 653 654 /* 655 * Drain the bucket queues and free the buckets, we just keep two per 656 * cpu (alloc/free). 657 */ 658 while ((bucket = LIST_FIRST(&zone->uz_full_bucket)) != NULL) { 659 LIST_REMOVE(bucket, ub_link); 660 ZONE_UNLOCK(zone); 661 bucket_drain(zone, bucket); 662 bucket_free(bucket); 663 ZONE_LOCK(zone); 664 } 665 666 /* Now we do the free queue.. */ 667 while ((bucket = LIST_FIRST(&zone->uz_free_bucket)) != NULL) { 668 LIST_REMOVE(bucket, ub_link); 669 bucket_free(bucket); 670 } 671} 672 673/* 674 * Frees pages from a zone back to the system. This is done on demand from 675 * the pageout daemon. 676 * 677 * Arguments: 678 * zone The zone to free pages from 679 * all Should we drain all items? 680 * 681 * Returns: 682 * Nothing. 683 */ 684static void 685zone_drain(uma_zone_t zone) 686{ 687 struct slabhead freeslabs = { 0 }; 688 uma_keg_t keg; 689 uma_slab_t slab; 690 uma_slab_t n; 691 u_int8_t flags; 692 u_int8_t *mem; 693 int i; 694 695 keg = zone->uz_keg; 696 697 /* 698 * We don't want to take pages from statically allocated zones at this 699 * time 700 */ 701 if (keg->uk_flags & UMA_ZONE_NOFREE || keg->uk_freef == NULL) 702 return; 703 704 ZONE_LOCK(zone); 705 706#ifdef UMA_DEBUG 707 printf("%s free items: %u\n", zone->uz_name, keg->uk_free); 708#endif 709 bucket_cache_drain(zone); 710 if (keg->uk_free == 0) 711 goto finished; 712 713 slab = LIST_FIRST(&keg->uk_free_slab); 714 while (slab) { 715 n = LIST_NEXT(slab, us_link); 716 717 /* We have no where to free these to */ 718 if (slab->us_flags & UMA_SLAB_BOOT) { 719 slab = n; 720 continue; 721 } 722 723 LIST_REMOVE(slab, us_link); 724 keg->uk_pages -= keg->uk_ppera; 725 keg->uk_free -= keg->uk_ipers; 726 727 if (keg->uk_flags & UMA_ZONE_HASH) 728 UMA_HASH_REMOVE(&keg->uk_hash, slab, slab->us_data); 729 730 SLIST_INSERT_HEAD(&freeslabs, slab, us_hlink); 731 732 slab = n; 733 } 734finished: 735 ZONE_UNLOCK(zone); 736 737 while ((slab = SLIST_FIRST(&freeslabs)) != NULL) { 738 SLIST_REMOVE(&freeslabs, slab, uma_slab, us_hlink); 739 if (keg->uk_fini) 740 for (i = 0; i < keg->uk_ipers; i++) 741 keg->uk_fini( 742 slab->us_data + (keg->uk_rsize * i), 743 keg->uk_size); 744 flags = slab->us_flags; 745 mem = slab->us_data; 746 747 if ((keg->uk_flags & UMA_ZONE_MALLOC) || 748 (keg->uk_flags & UMA_ZONE_REFCNT)) { 749 vm_object_t obj; 750 751 if (flags & UMA_SLAB_KMEM) 752 obj = kmem_object; 753 else 754 obj = NULL; 755 for (i = 0; i < keg->uk_ppera; i++) 756 vsetobj((vm_offset_t)mem + (i * PAGE_SIZE), 757 obj); 758 } 759 if (keg->uk_flags & UMA_ZONE_OFFPAGE) 760 uma_zfree_internal(keg->uk_slabzone, slab, NULL, 761 SKIP_NONE, ZFREE_STATFREE); 762#ifdef UMA_DEBUG 763 printf("%s: Returning %d bytes.\n", 764 zone->uz_name, UMA_SLAB_SIZE * keg->uk_ppera); 765#endif 766 keg->uk_freef(mem, UMA_SLAB_SIZE * keg->uk_ppera, flags); 767 } 768} 769 770/* 771 * Allocate a new slab for a zone. This does not insert the slab onto a list. 772 * 773 * Arguments: 774 * zone The zone to allocate slabs for 775 * wait Shall we wait? 776 * 777 * Returns: 778 * The slab that was allocated or NULL if there is no memory and the 779 * caller specified M_NOWAIT. 780 */ 781static uma_slab_t 782slab_zalloc(uma_zone_t zone, int wait) 783{ 784 uma_slabrefcnt_t slabref; 785 uma_slab_t slab; 786 uma_keg_t keg; 787 u_int8_t *mem; 788 u_int8_t flags; 789 int i; 790 791 slab = NULL; 792 keg = zone->uz_keg; 793 794#ifdef UMA_DEBUG 795 printf("slab_zalloc: Allocating a new slab for %s\n", zone->uz_name); 796#endif 797 ZONE_UNLOCK(zone); 798 799 if (keg->uk_flags & UMA_ZONE_OFFPAGE) { 800 slab = uma_zalloc_internal(keg->uk_slabzone, NULL, wait); 801 if (slab == NULL) { 802 ZONE_LOCK(zone); 803 return NULL; 804 } 805 } 806 807 /* 808 * This reproduces the old vm_zone behavior of zero filling pages the 809 * first time they are added to a zone. 810 * 811 * Malloced items are zeroed in uma_zalloc. 812 */ 813 814 if ((keg->uk_flags & UMA_ZONE_MALLOC) == 0) 815 wait |= M_ZERO; 816 else 817 wait &= ~M_ZERO; 818 819 mem = keg->uk_allocf(zone, keg->uk_ppera * UMA_SLAB_SIZE, 820 &flags, wait); 821 if (mem == NULL) { 822 if (keg->uk_flags & UMA_ZONE_OFFPAGE) 823 uma_zfree_internal(keg->uk_slabzone, slab, NULL, 824 SKIP_NONE, ZFREE_STATFREE); 825 ZONE_LOCK(zone); 826 return (NULL); 827 } 828 829 /* Point the slab into the allocated memory */ 830 if (!(keg->uk_flags & UMA_ZONE_OFFPAGE)) 831 slab = (uma_slab_t )(mem + keg->uk_pgoff); 832 833 if ((keg->uk_flags & UMA_ZONE_MALLOC) || 834 (keg->uk_flags & UMA_ZONE_REFCNT)) 835 for (i = 0; i < keg->uk_ppera; i++) 836 vsetslab((vm_offset_t)mem + (i * PAGE_SIZE), slab); 837 838 slab->us_keg = keg; 839 slab->us_data = mem; 840 slab->us_freecount = keg->uk_ipers; 841 slab->us_firstfree = 0; 842 slab->us_flags = flags; 843 844 if (keg->uk_flags & UMA_ZONE_REFCNT) { 845 slabref = (uma_slabrefcnt_t)slab; 846 for (i = 0; i < keg->uk_ipers; i++) { 847 slabref->us_freelist[i].us_refcnt = 0; 848 slabref->us_freelist[i].us_item = i+1; 849 } 850 } else { 851 for (i = 0; i < keg->uk_ipers; i++) 852 slab->us_freelist[i].us_item = i+1; 853 } 854 855 if (keg->uk_init != NULL) { 856 for (i = 0; i < keg->uk_ipers; i++) 857 if (keg->uk_init(slab->us_data + (keg->uk_rsize * i), 858 keg->uk_size, wait) != 0) 859 break; 860 if (i != keg->uk_ipers) { 861 if (keg->uk_fini != NULL) { 862 for (i--; i > -1; i--) 863 keg->uk_fini(slab->us_data + 864 (keg->uk_rsize * i), 865 keg->uk_size); 866 } 867 if ((keg->uk_flags & UMA_ZONE_MALLOC) || 868 (keg->uk_flags & UMA_ZONE_REFCNT)) { 869 vm_object_t obj; 870 871 if (flags & UMA_SLAB_KMEM) 872 obj = kmem_object; 873 else 874 obj = NULL; 875 for (i = 0; i < keg->uk_ppera; i++) 876 vsetobj((vm_offset_t)mem + 877 (i * PAGE_SIZE), obj); 878 } 879 if (keg->uk_flags & UMA_ZONE_OFFPAGE) 880 uma_zfree_internal(keg->uk_slabzone, slab, 881 NULL, SKIP_NONE, ZFREE_STATFREE); 882 keg->uk_freef(mem, UMA_SLAB_SIZE * keg->uk_ppera, 883 flags); 884 ZONE_LOCK(zone); 885 return (NULL); 886 } 887 } 888 ZONE_LOCK(zone); 889 890 if (keg->uk_flags & UMA_ZONE_HASH) 891 UMA_HASH_INSERT(&keg->uk_hash, slab, mem); 892 893 keg->uk_pages += keg->uk_ppera; 894 keg->uk_free += keg->uk_ipers; 895 896 return (slab); 897} 898 899/* 900 * This function is intended to be used early on in place of page_alloc() so 901 * that we may use the boot time page cache to satisfy allocations before 902 * the VM is ready. 903 */ 904static void * 905startup_alloc(uma_zone_t zone, int bytes, u_int8_t *pflag, int wait) 906{ 907 uma_keg_t keg; 908 909 keg = zone->uz_keg; 910 911 /* 912 * Check our small startup cache to see if it has pages remaining. 913 */ 914 mtx_lock(&uma_mtx); 915 if (uma_boot_free != 0) { 916 uma_slab_t tmps; 917 918 tmps = LIST_FIRST(&uma_boot_pages); 919 LIST_REMOVE(tmps, us_link); 920 uma_boot_free--; 921 mtx_unlock(&uma_mtx); 922 *pflag = tmps->us_flags; 923 return (tmps->us_data); 924 } 925 mtx_unlock(&uma_mtx); 926 if (booted == 0) 927 panic("UMA: Increase UMA_BOOT_PAGES"); 928 /* 929 * Now that we've booted reset these users to their real allocator. 930 */ 931#ifdef UMA_MD_SMALL_ALLOC 932 keg->uk_allocf = uma_small_alloc; 933#else 934 keg->uk_allocf = page_alloc; 935#endif 936 return keg->uk_allocf(zone, bytes, pflag, wait); 937} 938 939/* 940 * Allocates a number of pages from the system 941 * 942 * Arguments: 943 * zone Unused 944 * bytes The number of bytes requested 945 * wait Shall we wait? 946 * 947 * Returns: 948 * A pointer to the alloced memory or possibly 949 * NULL if M_NOWAIT is set. 950 */ 951static void * 952page_alloc(uma_zone_t zone, int bytes, u_int8_t *pflag, int wait) 953{ 954 void *p; /* Returned page */ 955 956 *pflag = UMA_SLAB_KMEM; 957 p = (void *) kmem_malloc(kmem_map, bytes, wait); 958 959 return (p); 960} 961 962/* 963 * Allocates a number of pages from within an object 964 * 965 * Arguments: 966 * zone Unused 967 * bytes The number of bytes requested 968 * wait Shall we wait? 969 * 970 * Returns: 971 * A pointer to the alloced memory or possibly 972 * NULL if M_NOWAIT is set. 973 */ 974static void * 975obj_alloc(uma_zone_t zone, int bytes, u_int8_t *flags, int wait) 976{ 977 vm_object_t object; 978 vm_offset_t retkva, zkva; 979 vm_page_t p; 980 int pages, startpages; 981 982 object = zone->uz_keg->uk_obj; 983 retkva = 0; 984 985 /* 986 * This looks a little weird since we're getting one page at a time. 987 */ 988 VM_OBJECT_LOCK(object); 989 p = TAILQ_LAST(&object->memq, pglist); 990 pages = p != NULL ? p->pindex + 1 : 0; 991 startpages = pages; 992 zkva = zone->uz_keg->uk_kva + pages * PAGE_SIZE; 993 for (; bytes > 0; bytes -= PAGE_SIZE) { 994 p = vm_page_alloc(object, pages, 995 VM_ALLOC_INTERRUPT | VM_ALLOC_WIRED); 996 if (p == NULL) { 997 if (pages != startpages) 998 pmap_qremove(retkva, pages - startpages); 999 while (pages != startpages) { 1000 pages--; 1001 p = TAILQ_LAST(&object->memq, pglist); 1002 vm_page_lock_queues(); 1003 vm_page_unwire(p, 0); 1004 vm_page_free(p); 1005 vm_page_unlock_queues(); 1006 } 1007 retkva = 0; 1008 goto done; 1009 } 1010 pmap_qenter(zkva, &p, 1); 1011 if (retkva == 0) 1012 retkva = zkva; 1013 zkva += PAGE_SIZE; 1014 pages += 1; 1015 } 1016done: 1017 VM_OBJECT_UNLOCK(object); 1018 *flags = UMA_SLAB_PRIV; 1019 1020 return ((void *)retkva); 1021} 1022 1023/* 1024 * Frees a number of pages to the system 1025 * 1026 * Arguments: 1027 * mem A pointer to the memory to be freed 1028 * size The size of the memory being freed 1029 * flags The original p->us_flags field 1030 * 1031 * Returns: 1032 * Nothing 1033 */ 1034static void 1035page_free(void *mem, int size, u_int8_t flags) 1036{ 1037 vm_map_t map; 1038 1039 if (flags & UMA_SLAB_KMEM) 1040 map = kmem_map; 1041 else 1042 panic("UMA: page_free used with invalid flags %d\n", flags); 1043 1044 kmem_free(map, (vm_offset_t)mem, size); 1045} 1046 1047/* 1048 * Zero fill initializer 1049 * 1050 * Arguments/Returns follow uma_init specifications 1051 */ 1052static int 1053zero_init(void *mem, int size, int flags) 1054{ 1055 bzero(mem, size); 1056 return (0); 1057} 1058 1059/* 1060 * Finish creating a small uma zone. This calculates ipers, and the zone size. 1061 * 1062 * Arguments 1063 * zone The zone we should initialize 1064 * 1065 * Returns 1066 * Nothing 1067 */ 1068static void 1069zone_small_init(uma_zone_t zone) 1070{ 1071 uma_keg_t keg; 1072 u_int rsize; 1073 u_int memused; 1074 u_int wastedspace; 1075 u_int shsize; 1076 1077 keg = zone->uz_keg; 1078 KASSERT(keg != NULL, ("Keg is null in zone_small_init")); 1079 rsize = keg->uk_size; 1080 1081 if (rsize < UMA_SMALLEST_UNIT) 1082 rsize = UMA_SMALLEST_UNIT; 1083 if (rsize & keg->uk_align) 1084 rsize = (rsize & ~keg->uk_align) + (keg->uk_align + 1); 1085 1086 keg->uk_rsize = rsize; 1087 keg->uk_ppera = 1; 1088 1089 if (keg->uk_flags & UMA_ZONE_REFCNT) { 1090 rsize += UMA_FRITMREF_SZ; /* linkage & refcnt */ 1091 shsize = sizeof(struct uma_slab_refcnt); 1092 } else { 1093 rsize += UMA_FRITM_SZ; /* Account for linkage */ 1094 shsize = sizeof(struct uma_slab); 1095 } 1096 1097 keg->uk_ipers = (UMA_SLAB_SIZE - shsize) / rsize; 1098 KASSERT(keg->uk_ipers != 0, ("zone_small_init: ipers is 0")); 1099 memused = keg->uk_ipers * rsize + shsize; 1100 wastedspace = UMA_SLAB_SIZE - memused; 1101 1102 /* 1103 * We can't do OFFPAGE if we're internal or if we've been 1104 * asked to not go to the VM for buckets. If we do this we 1105 * may end up going to the VM (kmem_map) for slabs which we 1106 * do not want to do if we're UMA_ZFLAG_CACHEONLY as a 1107 * result of UMA_ZONE_VM, which clearly forbids it. 1108 */ 1109 if ((keg->uk_flags & UMA_ZFLAG_INTERNAL) || 1110 (keg->uk_flags & UMA_ZFLAG_CACHEONLY)) 1111 return; 1112 1113 if ((wastedspace >= UMA_MAX_WASTE) && 1114 (keg->uk_ipers < (UMA_SLAB_SIZE / keg->uk_rsize))) { 1115 keg->uk_ipers = UMA_SLAB_SIZE / keg->uk_rsize; 1116 KASSERT(keg->uk_ipers <= 255, 1117 ("zone_small_init: keg->uk_ipers too high!")); 1118#ifdef UMA_DEBUG 1119 printf("UMA decided we need offpage slab headers for " 1120 "zone: %s, calculated wastedspace = %d, " 1121 "maximum wasted space allowed = %d, " 1122 "calculated ipers = %d, " 1123 "new wasted space = %d\n", zone->uz_name, wastedspace, 1124 UMA_MAX_WASTE, keg->uk_ipers, 1125 UMA_SLAB_SIZE - keg->uk_ipers * keg->uk_rsize); 1126#endif 1127 keg->uk_flags |= UMA_ZONE_OFFPAGE; 1128 if ((keg->uk_flags & UMA_ZONE_MALLOC) == 0) 1129 keg->uk_flags |= UMA_ZONE_HASH; 1130 } 1131} 1132 1133/* 1134 * Finish creating a large (> UMA_SLAB_SIZE) uma zone. Just give in and do 1135 * OFFPAGE for now. When I can allow for more dynamic slab sizes this will be 1136 * more complicated. 1137 * 1138 * Arguments 1139 * zone The zone we should initialize 1140 * 1141 * Returns 1142 * Nothing 1143 */ 1144static void 1145zone_large_init(uma_zone_t zone) 1146{ 1147 uma_keg_t keg; 1148 int pages; 1149 1150 keg = zone->uz_keg; 1151 1152 KASSERT(keg != NULL, ("Keg is null in zone_large_init")); 1153 KASSERT((keg->uk_flags & UMA_ZFLAG_CACHEONLY) == 0, 1154 ("zone_large_init: Cannot large-init a UMA_ZFLAG_CACHEONLY zone")); 1155 1156 pages = keg->uk_size / UMA_SLAB_SIZE; 1157 1158 /* Account for remainder */ 1159 if ((pages * UMA_SLAB_SIZE) < keg->uk_size) 1160 pages++; 1161 1162 keg->uk_ppera = pages; 1163 keg->uk_ipers = 1; 1164 1165 keg->uk_flags |= UMA_ZONE_OFFPAGE; 1166 if ((keg->uk_flags & UMA_ZONE_MALLOC) == 0) 1167 keg->uk_flags |= UMA_ZONE_HASH; 1168 1169 keg->uk_rsize = keg->uk_size; 1170} 1171 1172/* 1173 * Keg header ctor. This initializes all fields, locks, etc. And inserts 1174 * the keg onto the global keg list. 1175 * 1176 * Arguments/Returns follow uma_ctor specifications 1177 * udata Actually uma_kctor_args 1178 */ 1179static int 1180keg_ctor(void *mem, int size, void *udata, int flags) 1181{ 1182 struct uma_kctor_args *arg = udata; 1183 uma_keg_t keg = mem; 1184 uma_zone_t zone; 1185 1186 bzero(keg, size); 1187 keg->uk_size = arg->size; 1188 keg->uk_init = arg->uminit; 1189 keg->uk_fini = arg->fini; 1190 keg->uk_align = arg->align; 1191 keg->uk_free = 0; 1192 keg->uk_pages = 0; 1193 keg->uk_flags = arg->flags; 1194 keg->uk_allocf = page_alloc; 1195 keg->uk_freef = page_free; 1196 keg->uk_recurse = 0; 1197 keg->uk_slabzone = NULL; 1198 1199 /* 1200 * The master zone is passed to us at keg-creation time. 1201 */ 1202 zone = arg->zone; 1203 zone->uz_keg = keg; 1204 1205 if (arg->flags & UMA_ZONE_VM) 1206 keg->uk_flags |= UMA_ZFLAG_CACHEONLY; 1207 1208 if (arg->flags & UMA_ZONE_ZINIT) 1209 keg->uk_init = zero_init; 1210 1211 /* 1212 * The +UMA_FRITM_SZ added to uk_size is to account for the 1213 * linkage that is added to the size in zone_small_init(). If 1214 * we don't account for this here then we may end up in 1215 * zone_small_init() with a calculated 'ipers' of 0. 1216 */ 1217 if (keg->uk_flags & UMA_ZONE_REFCNT) { 1218 if ((keg->uk_size+UMA_FRITMREF_SZ) > 1219 (UMA_SLAB_SIZE - sizeof(struct uma_slab_refcnt))) 1220 zone_large_init(zone); 1221 else 1222 zone_small_init(zone); 1223 } else { 1224 if ((keg->uk_size+UMA_FRITM_SZ) > 1225 (UMA_SLAB_SIZE - sizeof(struct uma_slab))) 1226 zone_large_init(zone); 1227 else 1228 zone_small_init(zone); 1229 } 1230 1231 if (keg->uk_flags & UMA_ZONE_OFFPAGE) { 1232 if (keg->uk_flags & UMA_ZONE_REFCNT) 1233 keg->uk_slabzone = slabrefzone; 1234 else 1235 keg->uk_slabzone = slabzone; 1236 } 1237 1238 /* 1239 * If we haven't booted yet we need allocations to go through the 1240 * startup cache until the vm is ready. 1241 */ 1242 if (keg->uk_ppera == 1) { 1243#ifdef UMA_MD_SMALL_ALLOC 1244 keg->uk_allocf = uma_small_alloc; 1245 keg->uk_freef = uma_small_free; 1246#endif 1247 if (booted == 0) 1248 keg->uk_allocf = startup_alloc; 1249 } 1250 1251 /* 1252 * Initialize keg's lock (shared among zones) through 1253 * Master zone 1254 */ 1255 zone->uz_lock = &keg->uk_lock; 1256 if (arg->flags & UMA_ZONE_MTXCLASS) 1257 ZONE_LOCK_INIT(zone, 1); 1258 else 1259 ZONE_LOCK_INIT(zone, 0); 1260 1261 /* 1262 * If we're putting the slab header in the actual page we need to 1263 * figure out where in each page it goes. This calculates a right 1264 * justified offset into the memory on an ALIGN_PTR boundary. 1265 */ 1266 if (!(keg->uk_flags & UMA_ZONE_OFFPAGE)) { 1267 u_int totsize; 1268 1269 /* Size of the slab struct and free list */ 1270 if (keg->uk_flags & UMA_ZONE_REFCNT) 1271 totsize = sizeof(struct uma_slab_refcnt) + 1272 keg->uk_ipers * UMA_FRITMREF_SZ; 1273 else 1274 totsize = sizeof(struct uma_slab) + 1275 keg->uk_ipers * UMA_FRITM_SZ; 1276 1277 if (totsize & UMA_ALIGN_PTR) 1278 totsize = (totsize & ~UMA_ALIGN_PTR) + 1279 (UMA_ALIGN_PTR + 1); 1280 keg->uk_pgoff = UMA_SLAB_SIZE - totsize; 1281 1282 if (keg->uk_flags & UMA_ZONE_REFCNT) 1283 totsize = keg->uk_pgoff + sizeof(struct uma_slab_refcnt) 1284 + keg->uk_ipers * UMA_FRITMREF_SZ; 1285 else 1286 totsize = keg->uk_pgoff + sizeof(struct uma_slab) 1287 + keg->uk_ipers * UMA_FRITM_SZ; 1288 1289 /* 1290 * The only way the following is possible is if with our 1291 * UMA_ALIGN_PTR adjustments we are now bigger than 1292 * UMA_SLAB_SIZE. I haven't checked whether this is 1293 * mathematically possible for all cases, so we make 1294 * sure here anyway. 1295 */ 1296 if (totsize > UMA_SLAB_SIZE) { 1297 printf("zone %s ipers %d rsize %d size %d\n", 1298 zone->uz_name, keg->uk_ipers, keg->uk_rsize, 1299 keg->uk_size); 1300 panic("UMA slab won't fit.\n"); 1301 } 1302 } 1303 1304 if (keg->uk_flags & UMA_ZONE_HASH) 1305 hash_alloc(&keg->uk_hash); 1306 1307#ifdef UMA_DEBUG 1308 printf("%s(%p) size = %d ipers = %d ppera = %d pgoff = %d\n", 1309 zone->uz_name, zone, 1310 keg->uk_size, keg->uk_ipers, 1311 keg->uk_ppera, keg->uk_pgoff); 1312#endif 1313 1314 LIST_INSERT_HEAD(&keg->uk_zones, zone, uz_link); 1315 1316 mtx_lock(&uma_mtx); 1317 LIST_INSERT_HEAD(&uma_kegs, keg, uk_link); 1318 mtx_unlock(&uma_mtx); 1319 return (0); 1320} 1321 1322/* 1323 * Zone header ctor. This initializes all fields, locks, etc. 1324 * 1325 * Arguments/Returns follow uma_ctor specifications 1326 * udata Actually uma_zctor_args 1327 */ 1328 1329static int 1330zone_ctor(void *mem, int size, void *udata, int flags) 1331{ 1332 struct uma_zctor_args *arg = udata; 1333 uma_zone_t zone = mem; 1334 uma_zone_t z; 1335 uma_keg_t keg; 1336 1337 bzero(zone, size); 1338 zone->uz_name = arg->name; 1339 zone->uz_ctor = arg->ctor; 1340 zone->uz_dtor = arg->dtor; 1341 zone->uz_init = NULL; 1342 zone->uz_fini = NULL; 1343 zone->uz_allocs = 0; 1344 zone->uz_frees = 0; 1345 zone->uz_fails = 0; 1346 zone->uz_fills = zone->uz_count = 0; 1347 1348 if (arg->flags & UMA_ZONE_SECONDARY) { 1349 KASSERT(arg->keg != NULL, ("Secondary zone on zero'd keg")); 1350 keg = arg->keg; 1351 zone->uz_keg = keg; 1352 zone->uz_init = arg->uminit; 1353 zone->uz_fini = arg->fini; 1354 zone->uz_lock = &keg->uk_lock; 1355 mtx_lock(&uma_mtx); 1356 ZONE_LOCK(zone); 1357 keg->uk_flags |= UMA_ZONE_SECONDARY; 1358 LIST_FOREACH(z, &keg->uk_zones, uz_link) { 1359 if (LIST_NEXT(z, uz_link) == NULL) { 1360 LIST_INSERT_AFTER(z, zone, uz_link); 1361 break; 1362 } 1363 } 1364 ZONE_UNLOCK(zone); 1365 mtx_unlock(&uma_mtx); 1366 } else if (arg->keg == NULL) { 1367 if (uma_kcreate(zone, arg->size, arg->uminit, arg->fini, 1368 arg->align, arg->flags) == NULL) 1369 return (ENOMEM); 1370 } else { 1371 struct uma_kctor_args karg; 1372 int error; 1373 1374 /* We should only be here from uma_startup() */ 1375 karg.size = arg->size; 1376 karg.uminit = arg->uminit; 1377 karg.fini = arg->fini; 1378 karg.align = arg->align; 1379 karg.flags = arg->flags; 1380 karg.zone = zone; 1381 error = keg_ctor(arg->keg, sizeof(struct uma_keg), &karg, 1382 flags); 1383 if (error) 1384 return (error); 1385 } 1386 keg = zone->uz_keg; 1387 zone->uz_lock = &keg->uk_lock; 1388 1389 /* 1390 * Some internal zones don't have room allocated for the per cpu 1391 * caches. If we're internal, bail out here. 1392 */ 1393 if (keg->uk_flags & UMA_ZFLAG_INTERNAL) { 1394 KASSERT((keg->uk_flags & UMA_ZONE_SECONDARY) == 0, 1395 ("Secondary zone requested UMA_ZFLAG_INTERNAL")); 1396 return (0); 1397 } 1398 1399 if (keg->uk_flags & UMA_ZONE_MAXBUCKET) 1400 zone->uz_count = BUCKET_MAX; 1401 else if (keg->uk_ipers <= BUCKET_MAX) 1402 zone->uz_count = keg->uk_ipers; 1403 else 1404 zone->uz_count = BUCKET_MAX; 1405 return (0); 1406} 1407 1408/* 1409 * Keg header dtor. This frees all data, destroys locks, frees the hash 1410 * table and removes the keg from the global list. 1411 * 1412 * Arguments/Returns follow uma_dtor specifications 1413 * udata unused 1414 */ 1415static void 1416keg_dtor(void *arg, int size, void *udata) 1417{ 1418 uma_keg_t keg; 1419 1420 keg = (uma_keg_t)arg; 1421 mtx_lock(&keg->uk_lock); 1422 if (keg->uk_free != 0) { 1423 printf("Freed UMA keg was not empty (%d items). " 1424 " Lost %d pages of memory.\n", 1425 keg->uk_free, keg->uk_pages); 1426 } 1427 mtx_unlock(&keg->uk_lock); 1428 1429 if (keg->uk_flags & UMA_ZONE_HASH) 1430 hash_free(&keg->uk_hash); 1431 1432 mtx_destroy(&keg->uk_lock); 1433} 1434 1435/* 1436 * Zone header dtor. 1437 * 1438 * Arguments/Returns follow uma_dtor specifications 1439 * udata unused 1440 */ 1441static void 1442zone_dtor(void *arg, int size, void *udata) 1443{ 1444 uma_zone_t zone; 1445 uma_keg_t keg; 1446 1447 zone = (uma_zone_t)arg; 1448 keg = zone->uz_keg; 1449 1450 if (!(keg->uk_flags & UMA_ZFLAG_INTERNAL)) 1451 cache_drain(zone); 1452 1453 mtx_lock(&uma_mtx); 1454 zone_drain(zone); 1455 if (keg->uk_flags & UMA_ZONE_SECONDARY) { 1456 LIST_REMOVE(zone, uz_link); 1457 /* 1458 * XXX there are some races here where 1459 * the zone can be drained but zone lock 1460 * released and then refilled before we 1461 * remove it... we dont care for now 1462 */ 1463 ZONE_LOCK(zone); 1464 if (LIST_EMPTY(&keg->uk_zones)) 1465 keg->uk_flags &= ~UMA_ZONE_SECONDARY; 1466 ZONE_UNLOCK(zone); 1467 mtx_unlock(&uma_mtx); 1468 } else { 1469 LIST_REMOVE(keg, uk_link); 1470 LIST_REMOVE(zone, uz_link); 1471 mtx_unlock(&uma_mtx); 1472 uma_zfree_internal(kegs, keg, NULL, SKIP_NONE, 1473 ZFREE_STATFREE); 1474 } 1475 zone->uz_keg = NULL; 1476} 1477 1478/* 1479 * Traverses every zone in the system and calls a callback 1480 * 1481 * Arguments: 1482 * zfunc A pointer to a function which accepts a zone 1483 * as an argument. 1484 * 1485 * Returns: 1486 * Nothing 1487 */ 1488static void 1489zone_foreach(void (*zfunc)(uma_zone_t)) 1490{ 1491 uma_keg_t keg; 1492 uma_zone_t zone; 1493 1494 mtx_lock(&uma_mtx); 1495 LIST_FOREACH(keg, &uma_kegs, uk_link) { 1496 LIST_FOREACH(zone, &keg->uk_zones, uz_link) 1497 zfunc(zone); 1498 } 1499 mtx_unlock(&uma_mtx); 1500} 1501 1502/* Public functions */ 1503/* See uma.h */ 1504void 1505uma_startup(void *bootmem) 1506{ 1507 struct uma_zctor_args args; 1508 uma_slab_t slab; 1509 u_int slabsize; 1510 u_int objsize, totsize, wsize; 1511 int i; 1512 1513#ifdef UMA_DEBUG 1514 printf("Creating uma keg headers zone and keg.\n"); 1515#endif 1516 /* 1517 * The general UMA lock is a recursion-allowed lock because 1518 * there is a code path where, while we're still configured 1519 * to use startup_alloc() for backend page allocations, we 1520 * may end up in uma_reclaim() which calls zone_foreach(zone_drain), 1521 * which grabs uma_mtx, only to later call into startup_alloc() 1522 * because while freeing we needed to allocate a bucket. Since 1523 * startup_alloc() also takes uma_mtx, we need to be able to 1524 * recurse on it. 1525 */ 1526 mtx_init(&uma_mtx, "UMA lock", NULL, MTX_DEF | MTX_RECURSE); 1527 1528 /* 1529 * Figure out the maximum number of items-per-slab we'll have if 1530 * we're using the OFFPAGE slab header to track free items, given 1531 * all possible object sizes and the maximum desired wastage 1532 * (UMA_MAX_WASTE). 1533 * 1534 * We iterate until we find an object size for 1535 * which the calculated wastage in zone_small_init() will be 1536 * enough to warrant OFFPAGE. Since wastedspace versus objsize 1537 * is an overall increasing see-saw function, we find the smallest 1538 * objsize such that the wastage is always acceptable for objects 1539 * with that objsize or smaller. Since a smaller objsize always 1540 * generates a larger possible uma_max_ipers, we use this computed 1541 * objsize to calculate the largest ipers possible. Since the 1542 * ipers calculated for OFFPAGE slab headers is always larger than 1543 * the ipers initially calculated in zone_small_init(), we use 1544 * the former's equation (UMA_SLAB_SIZE / keg->uk_rsize) to 1545 * obtain the maximum ipers possible for offpage slab headers. 1546 * 1547 * It should be noted that ipers versus objsize is an inversly 1548 * proportional function which drops off rather quickly so as 1549 * long as our UMA_MAX_WASTE is such that the objsize we calculate 1550 * falls into the portion of the inverse relation AFTER the steep 1551 * falloff, then uma_max_ipers shouldn't be too high (~10 on i386). 1552 * 1553 * Note that we have 8-bits (1 byte) to use as a freelist index 1554 * inside the actual slab header itself and this is enough to 1555 * accomodate us. In the worst case, a UMA_SMALLEST_UNIT sized 1556 * object with offpage slab header would have ipers = 1557 * UMA_SLAB_SIZE / UMA_SMALLEST_UNIT (currently = 256), which is 1558 * 1 greater than what our byte-integer freelist index can 1559 * accomodate, but we know that this situation never occurs as 1560 * for UMA_SMALLEST_UNIT-sized objects, we will never calculate 1561 * that we need to go to offpage slab headers. Or, if we do, 1562 * then we trap that condition below and panic in the INVARIANTS case. 1563 */ 1564 wsize = UMA_SLAB_SIZE - sizeof(struct uma_slab) - UMA_MAX_WASTE; 1565 totsize = wsize; 1566 objsize = UMA_SMALLEST_UNIT; 1567 while (totsize >= wsize) { 1568 totsize = (UMA_SLAB_SIZE - sizeof(struct uma_slab)) / 1569 (objsize + UMA_FRITM_SZ); 1570 totsize *= (UMA_FRITM_SZ + objsize); 1571 objsize++; 1572 } 1573 if (objsize > UMA_SMALLEST_UNIT) 1574 objsize--; 1575 uma_max_ipers = UMA_SLAB_SIZE / objsize; 1576 1577 wsize = UMA_SLAB_SIZE - sizeof(struct uma_slab_refcnt) - UMA_MAX_WASTE; 1578 totsize = wsize; 1579 objsize = UMA_SMALLEST_UNIT; 1580 while (totsize >= wsize) { 1581 totsize = (UMA_SLAB_SIZE - sizeof(struct uma_slab_refcnt)) / 1582 (objsize + UMA_FRITMREF_SZ); 1583 totsize *= (UMA_FRITMREF_SZ + objsize); 1584 objsize++; 1585 } 1586 if (objsize > UMA_SMALLEST_UNIT) 1587 objsize--; 1588 uma_max_ipers_ref = UMA_SLAB_SIZE / objsize; 1589 1590 KASSERT((uma_max_ipers_ref <= 255) && (uma_max_ipers <= 255), 1591 ("uma_startup: calculated uma_max_ipers values too large!")); 1592 1593#ifdef UMA_DEBUG 1594 printf("Calculated uma_max_ipers (for OFFPAGE) is %d\n", uma_max_ipers); 1595 printf("Calculated uma_max_ipers_slab (for OFFPAGE) is %d\n", 1596 uma_max_ipers_ref); 1597#endif 1598 1599 /* "manually" create the initial zone */ 1600 args.name = "UMA Kegs"; 1601 args.size = sizeof(struct uma_keg); 1602 args.ctor = keg_ctor; 1603 args.dtor = keg_dtor; 1604 args.uminit = zero_init; 1605 args.fini = NULL; 1606 args.keg = &masterkeg; 1607 args.align = 32 - 1; 1608 args.flags = UMA_ZFLAG_INTERNAL; 1609 /* The initial zone has no Per cpu queues so it's smaller */ 1610 zone_ctor(kegs, sizeof(struct uma_zone), &args, M_WAITOK); 1611 1612#ifdef UMA_DEBUG 1613 printf("Filling boot free list.\n"); 1614#endif 1615 for (i = 0; i < UMA_BOOT_PAGES; i++) { 1616 slab = (uma_slab_t)((u_int8_t *)bootmem + (i * UMA_SLAB_SIZE)); 1617 slab->us_data = (u_int8_t *)slab; 1618 slab->us_flags = UMA_SLAB_BOOT; 1619 LIST_INSERT_HEAD(&uma_boot_pages, slab, us_link); 1620 uma_boot_free++; 1621 } 1622 1623#ifdef UMA_DEBUG 1624 printf("Creating uma zone headers zone and keg.\n"); 1625#endif 1626 args.name = "UMA Zones"; 1627 args.size = sizeof(struct uma_zone) + 1628 (sizeof(struct uma_cache) * (mp_maxid + 1)); 1629 args.ctor = zone_ctor; 1630 args.dtor = zone_dtor; 1631 args.uminit = zero_init; 1632 args.fini = NULL; 1633 args.keg = NULL; 1634 args.align = 32 - 1; 1635 args.flags = UMA_ZFLAG_INTERNAL; 1636 /* The initial zone has no Per cpu queues so it's smaller */ 1637 zone_ctor(zones, sizeof(struct uma_zone), &args, M_WAITOK); 1638 1639#ifdef UMA_DEBUG 1640 printf("Initializing pcpu cache locks.\n"); 1641#endif 1642#ifdef UMA_DEBUG 1643 printf("Creating slab and hash zones.\n"); 1644#endif 1645 1646 /* 1647 * This is the max number of free list items we'll have with 1648 * offpage slabs. 1649 */ 1650 slabsize = uma_max_ipers * UMA_FRITM_SZ; 1651 slabsize += sizeof(struct uma_slab); 1652 1653 /* Now make a zone for slab headers */ 1654 slabzone = uma_zcreate("UMA Slabs", 1655 slabsize, 1656 NULL, NULL, NULL, NULL, 1657 UMA_ALIGN_PTR, UMA_ZFLAG_INTERNAL); 1658 1659 /* 1660 * We also create a zone for the bigger slabs with reference 1661 * counts in them, to accomodate UMA_ZONE_REFCNT zones. 1662 */ 1663 slabsize = uma_max_ipers_ref * UMA_FRITMREF_SZ; 1664 slabsize += sizeof(struct uma_slab_refcnt); 1665 slabrefzone = uma_zcreate("UMA RCntSlabs", 1666 slabsize, 1667 NULL, NULL, NULL, NULL, 1668 UMA_ALIGN_PTR, 1669 UMA_ZFLAG_INTERNAL); 1670 1671 hashzone = uma_zcreate("UMA Hash", 1672 sizeof(struct slabhead *) * UMA_HASH_SIZE_INIT, 1673 NULL, NULL, NULL, NULL, 1674 UMA_ALIGN_PTR, UMA_ZFLAG_INTERNAL); 1675 1676 bucket_init(); 1677 1678#ifdef UMA_MD_SMALL_ALLOC 1679 booted = 1; 1680#endif 1681 1682#ifdef UMA_DEBUG 1683 printf("UMA startup complete.\n"); 1684#endif 1685} 1686 1687/* see uma.h */ 1688void 1689uma_startup2(void) 1690{ 1691 booted = 1; 1692 bucket_enable(); 1693#ifdef UMA_DEBUG 1694 printf("UMA startup2 complete.\n"); 1695#endif 1696} 1697 1698/* 1699 * Initialize our callout handle 1700 * 1701 */ 1702 1703static void 1704uma_startup3(void) 1705{ 1706#ifdef UMA_DEBUG 1707 printf("Starting callout.\n"); 1708#endif 1709 callout_init(&uma_callout, CALLOUT_MPSAFE); 1710 callout_reset(&uma_callout, UMA_TIMEOUT * hz, uma_timeout, NULL); 1711#ifdef UMA_DEBUG 1712 printf("UMA startup3 complete.\n"); 1713#endif 1714} 1715 1716static uma_zone_t 1717uma_kcreate(uma_zone_t zone, size_t size, uma_init uminit, uma_fini fini, 1718 int align, u_int32_t flags) 1719{ 1720 struct uma_kctor_args args; 1721 1722 args.size = size; 1723 args.uminit = uminit; 1724 args.fini = fini; 1725 args.align = align; 1726 args.flags = flags; 1727 args.zone = zone; 1728 return (uma_zalloc_internal(kegs, &args, M_WAITOK)); 1729} 1730 1731/* See uma.h */ 1732uma_zone_t 1733uma_zcreate(char *name, size_t size, uma_ctor ctor, uma_dtor dtor, 1734 uma_init uminit, uma_fini fini, int align, u_int32_t flags) 1735 1736{ 1737 struct uma_zctor_args args; 1738 1739 /* This stuff is essential for the zone ctor */ 1740 args.name = name; 1741 args.size = size; 1742 args.ctor = ctor; 1743 args.dtor = dtor; 1744 args.uminit = uminit; 1745 args.fini = fini; 1746 args.align = align; 1747 args.flags = flags; 1748 args.keg = NULL; 1749 1750 return (uma_zalloc_internal(zones, &args, M_WAITOK)); 1751} 1752 1753/* See uma.h */ 1754uma_zone_t 1755uma_zsecond_create(char *name, uma_ctor ctor, uma_dtor dtor, 1756 uma_init zinit, uma_fini zfini, uma_zone_t master) 1757{ 1758 struct uma_zctor_args args; 1759 1760 args.name = name; 1761 args.size = master->uz_keg->uk_size; 1762 args.ctor = ctor; 1763 args.dtor = dtor; 1764 args.uminit = zinit; 1765 args.fini = zfini; 1766 args.align = master->uz_keg->uk_align; 1767 args.flags = master->uz_keg->uk_flags | UMA_ZONE_SECONDARY; 1768 args.keg = master->uz_keg; 1769 1770 return (uma_zalloc_internal(zones, &args, M_WAITOK)); 1771} 1772 1773/* See uma.h */ 1774void 1775uma_zdestroy(uma_zone_t zone) 1776{ 1777 1778 uma_zfree_internal(zones, zone, NULL, SKIP_NONE, ZFREE_STATFREE); 1779} 1780 1781/* See uma.h */ 1782void * 1783uma_zalloc_arg(uma_zone_t zone, void *udata, int flags) 1784{ 1785 void *item; 1786 uma_cache_t cache; 1787 uma_bucket_t bucket; 1788 int cpu; 1789 int badness; 1790 1791 /* This is the fast path allocation */ 1792#ifdef UMA_DEBUG_ALLOC_1 1793 printf("Allocating one item from %s(%p)\n", zone->uz_name, zone); 1794#endif 1795 CTR3(KTR_UMA, "uma_zalloc_arg thread %x zone %s flags %d", curthread, 1796 zone->uz_name, flags); 1797 1798 if (!(flags & M_NOWAIT)) { 1799 KASSERT(curthread->td_intr_nesting_level == 0, 1800 ("malloc(M_WAITOK) in interrupt context")); 1801 if (nosleepwithlocks) { 1802#ifdef WITNESS 1803 badness = WITNESS_WARN(WARN_GIANTOK | WARN_SLEEPOK, 1804 NULL, 1805 "malloc(M_WAITOK) of \"%s\", forcing M_NOWAIT", 1806 zone->uz_name); 1807#else 1808 badness = 1; 1809#endif 1810 } else { 1811 badness = 0; 1812#ifdef WITNESS 1813 WITNESS_WARN(WARN_GIANTOK | WARN_SLEEPOK, NULL, 1814 "malloc(M_WAITOK) of \"%s\"", zone->uz_name); 1815#endif 1816 } 1817 if (badness) { 1818 flags &= ~M_WAITOK; 1819 flags |= M_NOWAIT; 1820 } 1821 } 1822 1823 /* 1824 * If possible, allocate from the per-CPU cache. There are two 1825 * requirements for safe access to the per-CPU cache: (1) the thread 1826 * accessing the cache must not be preempted or yield during access, 1827 * and (2) the thread must not migrate CPUs without switching which 1828 * cache it accesses. We rely on a critical section to prevent 1829 * preemption and migration. We release the critical section in 1830 * order to acquire the zone mutex if we are unable to allocate from 1831 * the current cache; when we re-acquire the critical section, we 1832 * must detect and handle migration if it has occurred. 1833 */ 1834zalloc_restart: 1835 critical_enter(); 1836 cpu = curcpu; 1837 cache = &zone->uz_cpu[cpu]; 1838 1839zalloc_start: 1840 bucket = cache->uc_allocbucket; 1841 1842 if (bucket) { 1843 if (bucket->ub_cnt > 0) { 1844 bucket->ub_cnt--; 1845 item = bucket->ub_bucket[bucket->ub_cnt]; 1846#ifdef INVARIANTS 1847 bucket->ub_bucket[bucket->ub_cnt] = NULL; 1848#endif 1849 KASSERT(item != NULL, 1850 ("uma_zalloc: Bucket pointer mangled.")); 1851 cache->uc_allocs++; 1852 critical_exit(); 1853#ifdef INVARIANTS 1854 ZONE_LOCK(zone); 1855 uma_dbg_alloc(zone, NULL, item); 1856 ZONE_UNLOCK(zone); 1857#endif 1858 if (zone->uz_ctor != NULL) { 1859 if (zone->uz_ctor(item, zone->uz_keg->uk_size, 1860 udata, flags) != 0) { 1861 uma_zfree_internal(zone, item, udata, 1862 SKIP_DTOR, ZFREE_STATFAIL | 1863 ZFREE_STATFREE); 1864 return (NULL); 1865 } 1866 } 1867 if (flags & M_ZERO) 1868 bzero(item, zone->uz_keg->uk_size); 1869 return (item); 1870 } else if (cache->uc_freebucket) { 1871 /* 1872 * We have run out of items in our allocbucket. 1873 * See if we can switch with our free bucket. 1874 */ 1875 if (cache->uc_freebucket->ub_cnt > 0) { 1876#ifdef UMA_DEBUG_ALLOC 1877 printf("uma_zalloc: Swapping empty with" 1878 " alloc.\n"); 1879#endif 1880 bucket = cache->uc_freebucket; 1881 cache->uc_freebucket = cache->uc_allocbucket; 1882 cache->uc_allocbucket = bucket; 1883 1884 goto zalloc_start; 1885 } 1886 } 1887 } 1888 /* 1889 * Attempt to retrieve the item from the per-CPU cache has failed, so 1890 * we must go back to the zone. This requires the zone lock, so we 1891 * must drop the critical section, then re-acquire it when we go back 1892 * to the cache. Since the critical section is released, we may be 1893 * preempted or migrate. As such, make sure not to maintain any 1894 * thread-local state specific to the cache from prior to releasing 1895 * the critical section. 1896 */ 1897 critical_exit(); 1898 ZONE_LOCK(zone); 1899 critical_enter(); 1900 cpu = curcpu; 1901 cache = &zone->uz_cpu[cpu]; 1902 bucket = cache->uc_allocbucket; 1903 if (bucket != NULL) { 1904 if (bucket->ub_cnt > 0) { 1905 ZONE_UNLOCK(zone); 1906 goto zalloc_start; 1907 } 1908 bucket = cache->uc_freebucket; 1909 if (bucket != NULL && bucket->ub_cnt > 0) { 1910 ZONE_UNLOCK(zone); 1911 goto zalloc_start; 1912 } 1913 } 1914 1915 /* Since we have locked the zone we may as well send back our stats */ 1916 zone->uz_allocs += cache->uc_allocs; 1917 cache->uc_allocs = 0; 1918 zone->uz_frees += cache->uc_frees; 1919 cache->uc_frees = 0; 1920 1921 /* Our old one is now a free bucket */ 1922 if (cache->uc_allocbucket) { 1923 KASSERT(cache->uc_allocbucket->ub_cnt == 0, 1924 ("uma_zalloc_arg: Freeing a non free bucket.")); 1925 LIST_INSERT_HEAD(&zone->uz_free_bucket, 1926 cache->uc_allocbucket, ub_link); 1927 cache->uc_allocbucket = NULL; 1928 } 1929 1930 /* Check the free list for a new alloc bucket */ 1931 if ((bucket = LIST_FIRST(&zone->uz_full_bucket)) != NULL) { 1932 KASSERT(bucket->ub_cnt != 0, 1933 ("uma_zalloc_arg: Returning an empty bucket.")); 1934 1935 LIST_REMOVE(bucket, ub_link); 1936 cache->uc_allocbucket = bucket; 1937 ZONE_UNLOCK(zone); 1938 goto zalloc_start; 1939 } 1940 /* We are no longer associated with this CPU. */ 1941 critical_exit(); 1942 1943 /* Bump up our uz_count so we get here less */ 1944 if (zone->uz_count < BUCKET_MAX) 1945 zone->uz_count++; 1946 1947 /* 1948 * Now lets just fill a bucket and put it on the free list. If that 1949 * works we'll restart the allocation from the begining. 1950 */ 1951 if (uma_zalloc_bucket(zone, flags)) { 1952 ZONE_UNLOCK(zone); 1953 goto zalloc_restart; 1954 } 1955 ZONE_UNLOCK(zone); 1956 /* 1957 * We may not be able to get a bucket so return an actual item. 1958 */ 1959#ifdef UMA_DEBUG 1960 printf("uma_zalloc_arg: Bucketzone returned NULL\n"); 1961#endif 1962 1963 return (uma_zalloc_internal(zone, udata, flags)); 1964} 1965 1966static uma_slab_t 1967uma_zone_slab(uma_zone_t zone, int flags) 1968{ 1969 uma_slab_t slab; 1970 uma_keg_t keg; 1971 1972 keg = zone->uz_keg; 1973 1974 /* 1975 * This is to prevent us from recursively trying to allocate 1976 * buckets. The problem is that if an allocation forces us to 1977 * grab a new bucket we will call page_alloc, which will go off 1978 * and cause the vm to allocate vm_map_entries. If we need new 1979 * buckets there too we will recurse in kmem_alloc and bad 1980 * things happen. So instead we return a NULL bucket, and make 1981 * the code that allocates buckets smart enough to deal with it 1982 * 1983 * XXX: While we want this protection for the bucket zones so that 1984 * recursion from the VM is handled (and the calling code that 1985 * allocates buckets knows how to deal with it), we do not want 1986 * to prevent allocation from the slab header zones (slabzone 1987 * and slabrefzone) if uk_recurse is not zero for them. The 1988 * reason is that it could lead to NULL being returned for 1989 * slab header allocations even in the M_WAITOK case, and the 1990 * caller can't handle that. 1991 */ 1992 if (keg->uk_flags & UMA_ZFLAG_INTERNAL && keg->uk_recurse != 0) 1993 if ((zone != slabzone) && (zone != slabrefzone)) 1994 return (NULL); 1995 1996 slab = NULL; 1997 1998 for (;;) { 1999 /* 2000 * Find a slab with some space. Prefer slabs that are partially 2001 * used over those that are totally full. This helps to reduce 2002 * fragmentation. 2003 */ 2004 if (keg->uk_free != 0) { 2005 if (!LIST_EMPTY(&keg->uk_part_slab)) { 2006 slab = LIST_FIRST(&keg->uk_part_slab); 2007 } else { 2008 slab = LIST_FIRST(&keg->uk_free_slab); 2009 LIST_REMOVE(slab, us_link); 2010 LIST_INSERT_HEAD(&keg->uk_part_slab, slab, 2011 us_link); 2012 } 2013 return (slab); 2014 } 2015 2016 /* 2017 * M_NOVM means don't ask at all! 2018 */ 2019 if (flags & M_NOVM) 2020 break; 2021 2022 if (keg->uk_maxpages && 2023 keg->uk_pages >= keg->uk_maxpages) { 2024 keg->uk_flags |= UMA_ZFLAG_FULL; 2025 2026 if (flags & M_NOWAIT) 2027 break; 2028 else 2029 msleep(keg, &keg->uk_lock, PVM, 2030 "zonelimit", 0); 2031 continue; 2032 } 2033 keg->uk_recurse++; 2034 slab = slab_zalloc(zone, flags); 2035 keg->uk_recurse--; 2036 2037 /* 2038 * If we got a slab here it's safe to mark it partially used 2039 * and return. We assume that the caller is going to remove 2040 * at least one item. 2041 */ 2042 if (slab) { 2043 LIST_INSERT_HEAD(&keg->uk_part_slab, slab, us_link); 2044 return (slab); 2045 } 2046 /* 2047 * We might not have been able to get a slab but another cpu 2048 * could have while we were unlocked. Check again before we 2049 * fail. 2050 */ 2051 if (flags & M_NOWAIT) 2052 flags |= M_NOVM; 2053 } 2054 return (slab); 2055} 2056 2057static void * 2058uma_slab_alloc(uma_zone_t zone, uma_slab_t slab) 2059{ 2060 uma_keg_t keg; 2061 uma_slabrefcnt_t slabref; 2062 void *item; 2063 u_int8_t freei; 2064 2065 keg = zone->uz_keg; 2066 2067 freei = slab->us_firstfree; 2068 if (keg->uk_flags & UMA_ZONE_REFCNT) { 2069 slabref = (uma_slabrefcnt_t)slab; 2070 slab->us_firstfree = slabref->us_freelist[freei].us_item; 2071 } else { 2072 slab->us_firstfree = slab->us_freelist[freei].us_item; 2073 } 2074 item = slab->us_data + (keg->uk_rsize * freei); 2075 2076 slab->us_freecount--; 2077 keg->uk_free--; 2078#ifdef INVARIANTS 2079 uma_dbg_alloc(zone, slab, item); 2080#endif 2081 /* Move this slab to the full list */ 2082 if (slab->us_freecount == 0) { 2083 LIST_REMOVE(slab, us_link); 2084 LIST_INSERT_HEAD(&keg->uk_full_slab, slab, us_link); 2085 } 2086 2087 return (item); 2088} 2089 2090static int 2091uma_zalloc_bucket(uma_zone_t zone, int flags) 2092{ 2093 uma_bucket_t bucket; 2094 uma_slab_t slab; 2095 int16_t saved; 2096 int max, origflags = flags; 2097 2098 /* 2099 * Try this zone's free list first so we don't allocate extra buckets. 2100 */ 2101 if ((bucket = LIST_FIRST(&zone->uz_free_bucket)) != NULL) { 2102 KASSERT(bucket->ub_cnt == 0, 2103 ("uma_zalloc_bucket: Bucket on free list is not empty.")); 2104 LIST_REMOVE(bucket, ub_link); 2105 } else { 2106 int bflags; 2107 2108 bflags = (flags & ~M_ZERO); 2109 if (zone->uz_keg->uk_flags & UMA_ZFLAG_CACHEONLY) 2110 bflags |= M_NOVM; 2111 2112 ZONE_UNLOCK(zone); 2113 bucket = bucket_alloc(zone->uz_count, bflags); 2114 ZONE_LOCK(zone); 2115 } 2116 2117 if (bucket == NULL) 2118 return (0); 2119 2120#ifdef SMP 2121 /* 2122 * This code is here to limit the number of simultaneous bucket fills 2123 * for any given zone to the number of per cpu caches in this zone. This 2124 * is done so that we don't allocate more memory than we really need. 2125 */ 2126 if (zone->uz_fills >= mp_ncpus) 2127 goto done; 2128 2129#endif 2130 zone->uz_fills++; 2131 2132 max = MIN(bucket->ub_entries, zone->uz_count); 2133 /* Try to keep the buckets totally full */ 2134 saved = bucket->ub_cnt; 2135 while (bucket->ub_cnt < max && 2136 (slab = uma_zone_slab(zone, flags)) != NULL) { 2137 while (slab->us_freecount && bucket->ub_cnt < max) { 2138 bucket->ub_bucket[bucket->ub_cnt++] = 2139 uma_slab_alloc(zone, slab); 2140 } 2141 2142 /* Don't block on the next fill */ 2143 flags |= M_NOWAIT; 2144 } 2145 2146 /* 2147 * We unlock here because we need to call the zone's init. 2148 * It should be safe to unlock because the slab dealt with 2149 * above is already on the appropriate list within the keg 2150 * and the bucket we filled is not yet on any list, so we 2151 * own it. 2152 */ 2153 if (zone->uz_init != NULL) { 2154 int i; 2155 2156 ZONE_UNLOCK(zone); 2157 for (i = saved; i < bucket->ub_cnt; i++) 2158 if (zone->uz_init(bucket->ub_bucket[i], 2159 zone->uz_keg->uk_size, origflags) != 0) 2160 break; 2161 /* 2162 * If we couldn't initialize the whole bucket, put the 2163 * rest back onto the freelist. 2164 */ 2165 if (i != bucket->ub_cnt) { 2166 int j; 2167 2168 for (j = i; j < bucket->ub_cnt; j++) { 2169 uma_zfree_internal(zone, bucket->ub_bucket[j], 2170 NULL, SKIP_FINI, 0); 2171#ifdef INVARIANTS 2172 bucket->ub_bucket[j] = NULL; 2173#endif 2174 } 2175 bucket->ub_cnt = i; 2176 } 2177 ZONE_LOCK(zone); 2178 } 2179 2180 zone->uz_fills--; 2181 if (bucket->ub_cnt != 0) { 2182 LIST_INSERT_HEAD(&zone->uz_full_bucket, 2183 bucket, ub_link); 2184 return (1); 2185 } 2186#ifdef SMP 2187done: 2188#endif 2189 bucket_free(bucket); 2190 2191 return (0); 2192} 2193/* 2194 * Allocates an item for an internal zone 2195 * 2196 * Arguments 2197 * zone The zone to alloc for. 2198 * udata The data to be passed to the constructor. 2199 * flags M_WAITOK, M_NOWAIT, M_ZERO. 2200 * 2201 * Returns 2202 * NULL if there is no memory and M_NOWAIT is set 2203 * An item if successful 2204 */ 2205 2206static void * 2207uma_zalloc_internal(uma_zone_t zone, void *udata, int flags) 2208{ 2209 uma_keg_t keg; 2210 uma_slab_t slab; 2211 void *item; 2212 2213 item = NULL; 2214 keg = zone->uz_keg; 2215 2216#ifdef UMA_DEBUG_ALLOC 2217 printf("INTERNAL: Allocating one item from %s(%p)\n", zone->uz_name, zone); 2218#endif 2219 ZONE_LOCK(zone); 2220 2221 slab = uma_zone_slab(zone, flags); 2222 if (slab == NULL) { 2223 zone->uz_fails++; 2224 ZONE_UNLOCK(zone); 2225 return (NULL); 2226 } 2227 2228 item = uma_slab_alloc(zone, slab); 2229 2230 zone->uz_allocs++; 2231 2232 ZONE_UNLOCK(zone); 2233 2234 /* 2235 * We have to call both the zone's init (not the keg's init) 2236 * and the zone's ctor. This is because the item is going from 2237 * a keg slab directly to the user, and the user is expecting it 2238 * to be both zone-init'd as well as zone-ctor'd. 2239 */ 2240 if (zone->uz_init != NULL) { 2241 if (zone->uz_init(item, keg->uk_size, flags) != 0) { 2242 uma_zfree_internal(zone, item, udata, SKIP_FINI, 2243 ZFREE_STATFAIL | ZFREE_STATFREE); 2244 return (NULL); 2245 } 2246 } 2247 if (zone->uz_ctor != NULL) { 2248 if (zone->uz_ctor(item, keg->uk_size, udata, flags) != 0) { 2249 uma_zfree_internal(zone, item, udata, SKIP_DTOR, 2250 ZFREE_STATFAIL | ZFREE_STATFREE); 2251 return (NULL); 2252 } 2253 } 2254 if (flags & M_ZERO) 2255 bzero(item, keg->uk_size); 2256 2257 return (item); 2258} 2259 2260/* See uma.h */ 2261void 2262uma_zfree_arg(uma_zone_t zone, void *item, void *udata) 2263{ 2264 uma_keg_t keg; 2265 uma_cache_t cache; 2266 uma_bucket_t bucket; 2267 int bflags; 2268 int cpu; 2269 2270 keg = zone->uz_keg; 2271 2272#ifdef UMA_DEBUG_ALLOC_1 2273 printf("Freeing item %p to %s(%p)\n", item, zone->uz_name, zone); 2274#endif 2275 CTR2(KTR_UMA, "uma_zfree_arg thread %x zone %s", curthread, 2276 zone->uz_name); 2277 2278 if (zone->uz_dtor) 2279 zone->uz_dtor(item, keg->uk_size, udata); 2280#ifdef INVARIANTS 2281 ZONE_LOCK(zone); 2282 if (keg->uk_flags & UMA_ZONE_MALLOC) 2283 uma_dbg_free(zone, udata, item); 2284 else 2285 uma_dbg_free(zone, NULL, item); 2286 ZONE_UNLOCK(zone); 2287#endif 2288 /* 2289 * The race here is acceptable. If we miss it we'll just have to wait 2290 * a little longer for the limits to be reset. 2291 */ 2292 if (keg->uk_flags & UMA_ZFLAG_FULL) 2293 goto zfree_internal; 2294 2295 /* 2296 * If possible, free to the per-CPU cache. There are two 2297 * requirements for safe access to the per-CPU cache: (1) the thread 2298 * accessing the cache must not be preempted or yield during access, 2299 * and (2) the thread must not migrate CPUs without switching which 2300 * cache it accesses. We rely on a critical section to prevent 2301 * preemption and migration. We release the critical section in 2302 * order to acquire the zone mutex if we are unable to free to the 2303 * current cache; when we re-acquire the critical section, we must 2304 * detect and handle migration if it has occurred. 2305 */ 2306zfree_restart: 2307 critical_enter(); 2308 cpu = curcpu; 2309 cache = &zone->uz_cpu[cpu]; 2310 2311zfree_start: 2312 bucket = cache->uc_freebucket; 2313 2314 if (bucket) { 2315 /* 2316 * Do we have room in our bucket? It is OK for this uz count 2317 * check to be slightly out of sync. 2318 */ 2319 2320 if (bucket->ub_cnt < bucket->ub_entries) { 2321 KASSERT(bucket->ub_bucket[bucket->ub_cnt] == NULL, 2322 ("uma_zfree: Freeing to non free bucket index.")); 2323 bucket->ub_bucket[bucket->ub_cnt] = item; 2324 bucket->ub_cnt++; 2325 cache->uc_frees++; 2326 critical_exit(); 2327 return; 2328 } else if (cache->uc_allocbucket) { 2329#ifdef UMA_DEBUG_ALLOC 2330 printf("uma_zfree: Swapping buckets.\n"); 2331#endif 2332 /* 2333 * We have run out of space in our freebucket. 2334 * See if we can switch with our alloc bucket. 2335 */ 2336 if (cache->uc_allocbucket->ub_cnt < 2337 cache->uc_freebucket->ub_cnt) { 2338 bucket = cache->uc_freebucket; 2339 cache->uc_freebucket = cache->uc_allocbucket; 2340 cache->uc_allocbucket = bucket; 2341 goto zfree_start; 2342 } 2343 } 2344 } 2345 /* 2346 * We can get here for two reasons: 2347 * 2348 * 1) The buckets are NULL 2349 * 2) The alloc and free buckets are both somewhat full. 2350 * 2351 * We must go back the zone, which requires acquiring the zone lock, 2352 * which in turn means we must release and re-acquire the critical 2353 * section. Since the critical section is released, we may be 2354 * preempted or migrate. As such, make sure not to maintain any 2355 * thread-local state specific to the cache from prior to releasing 2356 * the critical section. 2357 */ 2358 critical_exit(); 2359 ZONE_LOCK(zone); 2360 critical_enter(); 2361 cpu = curcpu; 2362 cache = &zone->uz_cpu[cpu]; 2363 if (cache->uc_freebucket != NULL) { 2364 if (cache->uc_freebucket->ub_cnt < 2365 cache->uc_freebucket->ub_entries) { 2366 ZONE_UNLOCK(zone); 2367 goto zfree_start; 2368 } 2369 if (cache->uc_allocbucket != NULL && 2370 (cache->uc_allocbucket->ub_cnt < 2371 cache->uc_freebucket->ub_cnt)) { 2372 ZONE_UNLOCK(zone); 2373 goto zfree_start; 2374 } 2375 } 2376 2377 /* Since we have locked the zone we may as well send back our stats */ 2378 zone->uz_allocs += cache->uc_allocs; 2379 cache->uc_allocs = 0; 2380 zone->uz_frees += cache->uc_frees; 2381 cache->uc_frees = 0; 2382 2383 bucket = cache->uc_freebucket; 2384 cache->uc_freebucket = NULL; 2385 2386 /* Can we throw this on the zone full list? */ 2387 if (bucket != NULL) { 2388#ifdef UMA_DEBUG_ALLOC 2389 printf("uma_zfree: Putting old bucket on the free list.\n"); 2390#endif 2391 /* ub_cnt is pointing to the last free item */ 2392 KASSERT(bucket->ub_cnt != 0, 2393 ("uma_zfree: Attempting to insert an empty bucket onto the full list.\n")); 2394 LIST_INSERT_HEAD(&zone->uz_full_bucket, 2395 bucket, ub_link); 2396 } 2397 if ((bucket = LIST_FIRST(&zone->uz_free_bucket)) != NULL) { 2398 LIST_REMOVE(bucket, ub_link); 2399 ZONE_UNLOCK(zone); 2400 cache->uc_freebucket = bucket; 2401 goto zfree_start; 2402 } 2403 /* We are no longer associated with this CPU. */ 2404 critical_exit(); 2405 2406 /* And the zone.. */ 2407 ZONE_UNLOCK(zone); 2408 2409#ifdef UMA_DEBUG_ALLOC 2410 printf("uma_zfree: Allocating new free bucket.\n"); 2411#endif 2412 bflags = M_NOWAIT; 2413 2414 if (keg->uk_flags & UMA_ZFLAG_CACHEONLY) 2415 bflags |= M_NOVM; 2416 bucket = bucket_alloc(zone->uz_count, bflags); 2417 if (bucket) { 2418 ZONE_LOCK(zone); 2419 LIST_INSERT_HEAD(&zone->uz_free_bucket, 2420 bucket, ub_link); 2421 ZONE_UNLOCK(zone); 2422 goto zfree_restart; 2423 } 2424 2425 /* 2426 * If nothing else caught this, we'll just do an internal free. 2427 */ 2428zfree_internal: 2429 uma_zfree_internal(zone, item, udata, SKIP_DTOR, ZFREE_STATFAIL | 2430 ZFREE_STATFREE); 2431 2432 return; 2433} 2434 2435/* 2436 * Frees an item to an INTERNAL zone or allocates a free bucket 2437 * 2438 * Arguments: 2439 * zone The zone to free to 2440 * item The item we're freeing 2441 * udata User supplied data for the dtor 2442 * skip Skip dtors and finis 2443 */ 2444static void 2445uma_zfree_internal(uma_zone_t zone, void *item, void *udata, 2446 enum zfreeskip skip, int flags) 2447{ 2448 uma_slab_t slab; 2449 uma_slabrefcnt_t slabref; 2450 uma_keg_t keg; 2451 u_int8_t *mem; 2452 u_int8_t freei; 2453 2454 keg = zone->uz_keg; 2455 2456 if (skip < SKIP_DTOR && zone->uz_dtor) 2457 zone->uz_dtor(item, keg->uk_size, udata); 2458 if (skip < SKIP_FINI && zone->uz_fini) 2459 zone->uz_fini(item, keg->uk_size); 2460 2461 ZONE_LOCK(zone); 2462 2463 if (flags & ZFREE_STATFAIL) 2464 zone->uz_fails++; 2465 if (flags & ZFREE_STATFREE) 2466 zone->uz_frees++; 2467 2468 if (!(keg->uk_flags & UMA_ZONE_MALLOC)) { 2469 mem = (u_int8_t *)((unsigned long)item & (~UMA_SLAB_MASK)); 2470 if (keg->uk_flags & UMA_ZONE_HASH) 2471 slab = hash_sfind(&keg->uk_hash, mem); 2472 else { 2473 mem += keg->uk_pgoff; 2474 slab = (uma_slab_t)mem; 2475 } 2476 } else { 2477 slab = (uma_slab_t)udata; 2478 } 2479 2480 /* Do we need to remove from any lists? */ 2481 if (slab->us_freecount+1 == keg->uk_ipers) { 2482 LIST_REMOVE(slab, us_link); 2483 LIST_INSERT_HEAD(&keg->uk_free_slab, slab, us_link); 2484 } else if (slab->us_freecount == 0) { 2485 LIST_REMOVE(slab, us_link); 2486 LIST_INSERT_HEAD(&keg->uk_part_slab, slab, us_link); 2487 } 2488 2489 /* Slab management stuff */ 2490 freei = ((unsigned long)item - (unsigned long)slab->us_data) 2491 / keg->uk_rsize; 2492 2493#ifdef INVARIANTS 2494 if (!skip) 2495 uma_dbg_free(zone, slab, item); 2496#endif 2497 2498 if (keg->uk_flags & UMA_ZONE_REFCNT) { 2499 slabref = (uma_slabrefcnt_t)slab; 2500 slabref->us_freelist[freei].us_item = slab->us_firstfree; 2501 } else { 2502 slab->us_freelist[freei].us_item = slab->us_firstfree; 2503 } 2504 slab->us_firstfree = freei; 2505 slab->us_freecount++; 2506 2507 /* Zone statistics */ 2508 keg->uk_free++; 2509 2510 if (keg->uk_flags & UMA_ZFLAG_FULL) { 2511 if (keg->uk_pages < keg->uk_maxpages) 2512 keg->uk_flags &= ~UMA_ZFLAG_FULL; 2513 2514 /* We can handle one more allocation */ 2515 wakeup_one(keg); 2516 } 2517 2518 ZONE_UNLOCK(zone); 2519} 2520 2521/* See uma.h */ 2522void 2523uma_zone_set_max(uma_zone_t zone, int nitems) 2524{ 2525 uma_keg_t keg; 2526 2527 keg = zone->uz_keg; 2528 ZONE_LOCK(zone); 2529 if (keg->uk_ppera > 1) 2530 keg->uk_maxpages = nitems * keg->uk_ppera; 2531 else 2532 keg->uk_maxpages = nitems / keg->uk_ipers; 2533 2534 if (keg->uk_maxpages * keg->uk_ipers < nitems) 2535 keg->uk_maxpages++; 2536 2537 ZONE_UNLOCK(zone); 2538} 2539 2540/* See uma.h */ 2541void 2542uma_zone_set_init(uma_zone_t zone, uma_init uminit) 2543{ 2544 ZONE_LOCK(zone); 2545 KASSERT(zone->uz_keg->uk_pages == 0, 2546 ("uma_zone_set_init on non-empty keg")); 2547 zone->uz_keg->uk_init = uminit; 2548 ZONE_UNLOCK(zone); 2549} 2550 2551/* See uma.h */ 2552void 2553uma_zone_set_fini(uma_zone_t zone, uma_fini fini) 2554{ 2555 ZONE_LOCK(zone); 2556 KASSERT(zone->uz_keg->uk_pages == 0, 2557 ("uma_zone_set_fini on non-empty keg")); 2558 zone->uz_keg->uk_fini = fini; 2559 ZONE_UNLOCK(zone); 2560} 2561 2562/* See uma.h */ 2563void 2564uma_zone_set_zinit(uma_zone_t zone, uma_init zinit) 2565{ 2566 ZONE_LOCK(zone); 2567 KASSERT(zone->uz_keg->uk_pages == 0, 2568 ("uma_zone_set_zinit on non-empty keg")); 2569 zone->uz_init = zinit; 2570 ZONE_UNLOCK(zone); 2571} 2572 2573/* See uma.h */ 2574void 2575uma_zone_set_zfini(uma_zone_t zone, uma_fini zfini) 2576{ 2577 ZONE_LOCK(zone); 2578 KASSERT(zone->uz_keg->uk_pages == 0, 2579 ("uma_zone_set_zfini on non-empty keg")); 2580 zone->uz_fini = zfini; 2581 ZONE_UNLOCK(zone); 2582} 2583 2584/* See uma.h */ 2585/* XXX uk_freef is not actually used with the zone locked */ 2586void 2587uma_zone_set_freef(uma_zone_t zone, uma_free freef) 2588{ 2589 ZONE_LOCK(zone); 2590 zone->uz_keg->uk_freef = freef; 2591 ZONE_UNLOCK(zone); 2592} 2593 2594/* See uma.h */ 2595/* XXX uk_allocf is not actually used with the zone locked */ 2596void 2597uma_zone_set_allocf(uma_zone_t zone, uma_alloc allocf) 2598{ 2599 ZONE_LOCK(zone); 2600 zone->uz_keg->uk_flags |= UMA_ZFLAG_PRIVALLOC; 2601 zone->uz_keg->uk_allocf = allocf; 2602 ZONE_UNLOCK(zone); 2603} 2604 2605/* See uma.h */ 2606int 2607uma_zone_set_obj(uma_zone_t zone, struct vm_object *obj, int count) 2608{ 2609 uma_keg_t keg; 2610 vm_offset_t kva; 2611 int pages; 2612 2613 keg = zone->uz_keg; 2614 pages = count / keg->uk_ipers; 2615 2616 if (pages * keg->uk_ipers < count) 2617 pages++; 2618 2619 kva = kmem_alloc_nofault(kernel_map, pages * UMA_SLAB_SIZE); 2620 2621 if (kva == 0) 2622 return (0); 2623 if (obj == NULL) { 2624 obj = vm_object_allocate(OBJT_DEFAULT, 2625 pages); 2626 } else { 2627 VM_OBJECT_LOCK_INIT(obj, "uma object"); 2628 _vm_object_allocate(OBJT_DEFAULT, 2629 pages, obj); 2630 } 2631 ZONE_LOCK(zone); 2632 keg->uk_kva = kva; 2633 keg->uk_obj = obj; 2634 keg->uk_maxpages = pages; 2635 keg->uk_allocf = obj_alloc; 2636 keg->uk_flags |= UMA_ZONE_NOFREE | UMA_ZFLAG_PRIVALLOC; 2637 ZONE_UNLOCK(zone); 2638 return (1); 2639} 2640 2641/* See uma.h */ 2642void 2643uma_prealloc(uma_zone_t zone, int items) 2644{ 2645 int slabs; 2646 uma_slab_t slab; 2647 uma_keg_t keg; 2648 2649 keg = zone->uz_keg; 2650 ZONE_LOCK(zone); 2651 slabs = items / keg->uk_ipers; 2652 if (slabs * keg->uk_ipers < items) 2653 slabs++; 2654 while (slabs > 0) { 2655 slab = slab_zalloc(zone, M_WAITOK); 2656 LIST_INSERT_HEAD(&keg->uk_free_slab, slab, us_link); 2657 slabs--; 2658 } 2659 ZONE_UNLOCK(zone); 2660} 2661 2662/* See uma.h */ 2663u_int32_t * 2664uma_find_refcnt(uma_zone_t zone, void *item) 2665{ 2666 uma_slabrefcnt_t slabref; 2667 uma_keg_t keg; 2668 u_int32_t *refcnt; 2669 int idx; 2670 2671 keg = zone->uz_keg; 2672 slabref = (uma_slabrefcnt_t)vtoslab((vm_offset_t)item & 2673 (~UMA_SLAB_MASK)); 2674 KASSERT(slabref != NULL && slabref->us_keg->uk_flags & UMA_ZONE_REFCNT, 2675 ("uma_find_refcnt(): zone possibly not UMA_ZONE_REFCNT")); 2676 idx = ((unsigned long)item - (unsigned long)slabref->us_data) 2677 / keg->uk_rsize; 2678 refcnt = &slabref->us_freelist[idx].us_refcnt; 2679 return refcnt; 2680} 2681 2682/* See uma.h */ 2683void 2684uma_reclaim(void) 2685{ 2686#ifdef UMA_DEBUG 2687 printf("UMA: vm asked us to release pages!\n"); 2688#endif 2689 bucket_enable(); 2690 zone_foreach(zone_drain); 2691 /* 2692 * Some slabs may have been freed but this zone will be visited early 2693 * we visit again so that we can free pages that are empty once other 2694 * zones are drained. We have to do the same for buckets. 2695 */ 2696 zone_drain(slabzone); 2697 zone_drain(slabrefzone); 2698 bucket_zone_drain(); 2699} 2700 2701void * 2702uma_large_malloc(int size, int wait) 2703{ 2704 void *mem; 2705 uma_slab_t slab; 2706 u_int8_t flags; 2707 2708 slab = uma_zalloc_internal(slabzone, NULL, wait); 2709 if (slab == NULL) 2710 return (NULL); 2711 mem = page_alloc(NULL, size, &flags, wait); 2712 if (mem) { 2713 vsetslab((vm_offset_t)mem, slab); 2714 slab->us_data = mem; 2715 slab->us_flags = flags | UMA_SLAB_MALLOC; 2716 slab->us_size = size; 2717 } else { 2718 uma_zfree_internal(slabzone, slab, NULL, SKIP_NONE, 2719 ZFREE_STATFAIL | ZFREE_STATFREE); 2720 } 2721 2722 return (mem); 2723} 2724 2725void 2726uma_large_free(uma_slab_t slab) 2727{ 2728 vsetobj((vm_offset_t)slab->us_data, kmem_object); 2729 page_free(slab->us_data, slab->us_size, slab->us_flags); 2730 uma_zfree_internal(slabzone, slab, NULL, SKIP_NONE, ZFREE_STATFREE); 2731} 2732 2733void 2734uma_print_stats(void) 2735{ 2736 zone_foreach(uma_print_zone); 2737} 2738 2739static void 2740slab_print(uma_slab_t slab) 2741{ 2742 printf("slab: keg %p, data %p, freecount %d, firstfree %d\n", 2743 slab->us_keg, slab->us_data, slab->us_freecount, 2744 slab->us_firstfree); 2745} 2746 2747static void 2748cache_print(uma_cache_t cache) 2749{ 2750 printf("alloc: %p(%d), free: %p(%d)\n", 2751 cache->uc_allocbucket, 2752 cache->uc_allocbucket?cache->uc_allocbucket->ub_cnt:0, 2753 cache->uc_freebucket, 2754 cache->uc_freebucket?cache->uc_freebucket->ub_cnt:0); 2755} 2756 2757void 2758uma_print_zone(uma_zone_t zone) 2759{ 2760 uma_cache_t cache; 2761 uma_keg_t keg; 2762 uma_slab_t slab; 2763 int i; 2764 2765 keg = zone->uz_keg; 2766 printf("%s(%p) size %d(%d) flags %d ipers %d ppera %d out %d free %d\n", 2767 zone->uz_name, zone, keg->uk_size, keg->uk_rsize, keg->uk_flags, 2768 keg->uk_ipers, keg->uk_ppera, 2769 (keg->uk_ipers * keg->uk_pages) - keg->uk_free, keg->uk_free); 2770 printf("Part slabs:\n"); 2771 LIST_FOREACH(slab, &keg->uk_part_slab, us_link) 2772 slab_print(slab); 2773 printf("Free slabs:\n"); 2774 LIST_FOREACH(slab, &keg->uk_free_slab, us_link) 2775 slab_print(slab); 2776 printf("Full slabs:\n"); 2777 LIST_FOREACH(slab, &keg->uk_full_slab, us_link) 2778 slab_print(slab); 2779 for (i = 0; i <= mp_maxid; i++) { 2780 if (CPU_ABSENT(i)) 2781 continue; 2782 cache = &zone->uz_cpu[i]; 2783 printf("CPU %d Cache:\n", i); 2784 cache_print(cache); 2785 } 2786} 2787 2788/* 2789 * Generate statistics across both the zone and its per-cpu cache's. Return 2790 * desired statistics if the pointer is non-NULL for that statistic. 2791 * 2792 * Note: does not update the zone statistics, as it can't safely clear the 2793 * per-CPU cache statistic. 2794 * 2795 * XXXRW: Following the uc_allocbucket and uc_freebucket pointers here isn't 2796 * safe from off-CPU; we should modify the caches to track this information 2797 * directly so that we don't have to. 2798 */ 2799static void 2800uma_zone_sumstat(uma_zone_t z, int *cachefreep, u_int64_t *allocsp, 2801 u_int64_t *freesp) 2802{ 2803 uma_cache_t cache; 2804 u_int64_t allocs, frees; 2805 int cachefree, cpu; 2806 2807 allocs = frees = 0; 2808 cachefree = 0; 2809 for (cpu = 0; cpu <= mp_maxid; cpu++) { 2810 if (CPU_ABSENT(cpu)) 2811 continue; 2812 cache = &z->uz_cpu[cpu]; 2813 if (cache->uc_allocbucket != NULL) 2814 cachefree += cache->uc_allocbucket->ub_cnt; 2815 if (cache->uc_freebucket != NULL) 2816 cachefree += cache->uc_freebucket->ub_cnt; 2817 allocs += cache->uc_allocs; 2818 frees += cache->uc_frees; 2819 } 2820 allocs += z->uz_allocs; 2821 frees += z->uz_frees; 2822 if (cachefreep != NULL) 2823 *cachefreep = cachefree; 2824 if (allocsp != NULL) 2825 *allocsp = allocs; 2826 if (freesp != NULL) 2827 *freesp = frees; 2828} 2829 2830/* 2831 * Sysctl handler for vm.zone 2832 * 2833 * stolen from vm_zone.c 2834 */ 2835static int 2836sysctl_vm_zone(SYSCTL_HANDLER_ARGS) 2837{ 2838 int error, len, cnt; 2839 const int linesize = 128; /* conservative */ 2840 int totalfree; 2841 char *tmpbuf, *offset; 2842 uma_zone_t z; 2843 uma_keg_t zk; 2844 char *p; 2845 int cachefree; 2846 uma_bucket_t bucket; 2847 u_int64_t allocs, frees; 2848 2849 cnt = 0; 2850 mtx_lock(&uma_mtx); 2851 LIST_FOREACH(zk, &uma_kegs, uk_link) { 2852 LIST_FOREACH(z, &zk->uk_zones, uz_link) 2853 cnt++; 2854 } 2855 mtx_unlock(&uma_mtx); 2856 MALLOC(tmpbuf, char *, (cnt == 0 ? 1 : cnt) * linesize, 2857 M_TEMP, M_WAITOK); 2858 len = snprintf(tmpbuf, linesize, 2859 "\nITEM SIZE LIMIT USED FREE REQUESTS\n\n"); 2860 if (cnt == 0) 2861 tmpbuf[len - 1] = '\0'; 2862 error = SYSCTL_OUT(req, tmpbuf, cnt == 0 ? len-1 : len); 2863 if (error || cnt == 0) 2864 goto out; 2865 offset = tmpbuf; 2866 mtx_lock(&uma_mtx); 2867 LIST_FOREACH(zk, &uma_kegs, uk_link) { 2868 LIST_FOREACH(z, &zk->uk_zones, uz_link) { 2869 if (cnt == 0) /* list may have changed size */ 2870 break; 2871 ZONE_LOCK(z); 2872 cachefree = 0; 2873 if (!(zk->uk_flags & UMA_ZFLAG_INTERNAL)) { 2874 uma_zone_sumstat(z, &cachefree, &allocs, &frees); 2875 } else { 2876 allocs = z->uz_allocs; 2877 frees = z->uz_frees; 2878 } 2879 2880 LIST_FOREACH(bucket, &z->uz_full_bucket, ub_link) { 2881 cachefree += bucket->ub_cnt; 2882 } 2883 totalfree = zk->uk_free + cachefree; 2884 len = snprintf(offset, linesize, 2885 "%-12.12s %6.6u, %8.8u, %6.6u, %6.6u, %8.8llu\n", 2886 z->uz_name, zk->uk_size, 2887 zk->uk_maxpages * zk->uk_ipers, 2888 (zk->uk_ipers * (zk->uk_pages / zk->uk_ppera)) - totalfree, 2889 totalfree, 2890 (unsigned long long)allocs); 2891 ZONE_UNLOCK(z); 2892 for (p = offset + 12; p > offset && *p == ' '; --p) 2893 /* nothing */ ; 2894 p[1] = ':'; 2895 cnt--; 2896 offset += len; 2897 } 2898 } 2899 mtx_unlock(&uma_mtx); 2900 *offset++ = '\0'; 2901 error = SYSCTL_OUT(req, tmpbuf, offset - tmpbuf); 2902out: 2903 FREE(tmpbuf, M_TEMP); 2904 return (error); 2905} 2906 2907static int 2908sysctl_vm_zone_count(SYSCTL_HANDLER_ARGS) 2909{ 2910 uma_keg_t kz; 2911 uma_zone_t z; 2912 int count; 2913 2914 count = 0; 2915 mtx_lock(&uma_mtx); 2916 LIST_FOREACH(kz, &uma_kegs, uk_link) { 2917 LIST_FOREACH(z, &kz->uk_zones, uz_link) 2918 count++; 2919 } 2920 mtx_unlock(&uma_mtx); 2921 return (sysctl_handle_int(oidp, &count, 0, req)); 2922} 2923 2924static int 2925sysctl_vm_zone_stats(SYSCTL_HANDLER_ARGS) 2926{ 2927 struct uma_stream_header ush; 2928 struct uma_type_header uth; 2929 struct uma_percpu_stat ups; 2930 uma_bucket_t bucket; 2931 struct sbuf sbuf; 2932 uma_cache_t cache; 2933 uma_keg_t kz; 2934 uma_zone_t z; 2935 char *buffer; 2936 int buflen, count, error, i; 2937 2938 mtx_lock(&uma_mtx); 2939restart: 2940 mtx_assert(&uma_mtx, MA_OWNED); 2941 count = 0; 2942 LIST_FOREACH(kz, &uma_kegs, uk_link) { 2943 LIST_FOREACH(z, &kz->uk_zones, uz_link) 2944 count++; 2945 } 2946 mtx_unlock(&uma_mtx); 2947 2948 buflen = sizeof(ush) + count * (sizeof(uth) + sizeof(ups) * 2949 (mp_maxid + 1)) + 1; 2950 buffer = malloc(buflen, M_TEMP, M_WAITOK | M_ZERO); 2951 2952 mtx_lock(&uma_mtx); 2953 i = 0; 2954 LIST_FOREACH(kz, &uma_kegs, uk_link) { 2955 LIST_FOREACH(z, &kz->uk_zones, uz_link) 2956 i++; 2957 } 2958 if (i > count) { 2959 free(buffer, M_TEMP); 2960 goto restart; 2961 } 2962 count = i; 2963 2964 sbuf_new(&sbuf, buffer, buflen, SBUF_FIXEDLEN); 2965 2966 /* 2967 * Insert stream header. 2968 */ 2969 bzero(&ush, sizeof(ush)); 2970 ush.ush_version = UMA_STREAM_VERSION; 2971 ush.ush_maxcpus = (mp_maxid + 1); 2972 ush.ush_count = count; 2973 if (sbuf_bcat(&sbuf, &ush, sizeof(ush)) < 0) { 2974 mtx_unlock(&uma_mtx); 2975 error = ENOMEM; 2976 goto out; 2977 } 2978 2979 LIST_FOREACH(kz, &uma_kegs, uk_link) { 2980 LIST_FOREACH(z, &kz->uk_zones, uz_link) { 2981 bzero(&uth, sizeof(uth)); 2982 ZONE_LOCK(z);
| 52 53/* I should really use ktr.. */ 54/* 55#define UMA_DEBUG 1 56#define UMA_DEBUG_ALLOC 1 57#define UMA_DEBUG_ALLOC_1 1 58*/ 59 60#include "opt_param.h" 61#include <sys/param.h> 62#include <sys/systm.h> 63#include <sys/kernel.h> 64#include <sys/types.h> 65#include <sys/queue.h> 66#include <sys/malloc.h> 67#include <sys/ktr.h> 68#include <sys/lock.h> 69#include <sys/sysctl.h> 70#include <sys/mutex.h> 71#include <sys/proc.h> 72#include <sys/sbuf.h> 73#include <sys/smp.h> 74#include <sys/vmmeter.h> 75 76#include <vm/vm.h> 77#include <vm/vm_object.h> 78#include <vm/vm_page.h> 79#include <vm/vm_param.h> 80#include <vm/vm_map.h> 81#include <vm/vm_kern.h> 82#include <vm/vm_extern.h> 83#include <vm/uma.h> 84#include <vm/uma_int.h> 85#include <vm/uma_dbg.h> 86 87#include <machine/vmparam.h> 88 89/* 90 * This is the zone and keg from which all zones are spawned. The idea is that 91 * even the zone & keg heads are allocated from the allocator, so we use the 92 * bss section to bootstrap us. 93 */ 94static struct uma_keg masterkeg; 95static struct uma_zone masterzone_k; 96static struct uma_zone masterzone_z; 97static uma_zone_t kegs = &masterzone_k; 98static uma_zone_t zones = &masterzone_z; 99 100/* This is the zone from which all of uma_slab_t's are allocated. */ 101static uma_zone_t slabzone; 102static uma_zone_t slabrefzone; /* With refcounters (for UMA_ZONE_REFCNT) */ 103 104/* 105 * The initial hash tables come out of this zone so they can be allocated 106 * prior to malloc coming up. 107 */ 108static uma_zone_t hashzone; 109 110static MALLOC_DEFINE(M_UMAHASH, "UMAHash", "UMA Hash Buckets"); 111 112/* 113 * Are we allowed to allocate buckets? 114 */ 115static int bucketdisable = 1; 116 117/* Linked list of all kegs in the system */ 118static LIST_HEAD(,uma_keg) uma_kegs = LIST_HEAD_INITIALIZER(&uma_kegs); 119 120/* This mutex protects the keg list */ 121static struct mtx uma_mtx; 122 123/* Linked list of boot time pages */ 124static LIST_HEAD(,uma_slab) uma_boot_pages = 125 LIST_HEAD_INITIALIZER(&uma_boot_pages); 126 127/* Count of free boottime pages */ 128static int uma_boot_free = 0; 129 130/* Is the VM done starting up? */ 131static int booted = 0; 132 133/* Maximum number of allowed items-per-slab if the slab header is OFFPAGE */ 134static u_int uma_max_ipers; 135static u_int uma_max_ipers_ref; 136 137/* 138 * This is the handle used to schedule events that need to happen 139 * outside of the allocation fast path. 140 */ 141static struct callout uma_callout; 142#define UMA_TIMEOUT 20 /* Seconds for callout interval. */ 143 144/* 145 * This structure is passed as the zone ctor arg so that I don't have to create 146 * a special allocation function just for zones. 147 */ 148struct uma_zctor_args { 149 char *name; 150 size_t size; 151 uma_ctor ctor; 152 uma_dtor dtor; 153 uma_init uminit; 154 uma_fini fini; 155 uma_keg_t keg; 156 int align; 157 u_int32_t flags; 158}; 159 160struct uma_kctor_args { 161 uma_zone_t zone; 162 size_t size; 163 uma_init uminit; 164 uma_fini fini; 165 int align; 166 u_int32_t flags; 167}; 168 169struct uma_bucket_zone { 170 uma_zone_t ubz_zone; 171 char *ubz_name; 172 int ubz_entries; 173}; 174 175#define BUCKET_MAX 128 176 177struct uma_bucket_zone bucket_zones[] = { 178 { NULL, "16 Bucket", 16 }, 179 { NULL, "32 Bucket", 32 }, 180 { NULL, "64 Bucket", 64 }, 181 { NULL, "128 Bucket", 128 }, 182 { NULL, NULL, 0} 183}; 184 185#define BUCKET_SHIFT 4 186#define BUCKET_ZONES ((BUCKET_MAX >> BUCKET_SHIFT) + 1) 187 188/* 189 * bucket_size[] maps requested bucket sizes to zones that allocate a bucket 190 * of approximately the right size. 191 */ 192static uint8_t bucket_size[BUCKET_ZONES]; 193 194/* 195 * Flags and enumerations to be passed to internal functions. 196 */ 197enum zfreeskip { SKIP_NONE, SKIP_DTOR, SKIP_FINI }; 198 199#define ZFREE_STATFAIL 0x00000001 /* Update zone failure statistic. */ 200#define ZFREE_STATFREE 0x00000002 /* Update zone free statistic. */ 201 202/* Prototypes.. */ 203 204static void *obj_alloc(uma_zone_t, int, u_int8_t *, int); 205static void *page_alloc(uma_zone_t, int, u_int8_t *, int); 206static void *startup_alloc(uma_zone_t, int, u_int8_t *, int); 207static void page_free(void *, int, u_int8_t); 208static uma_slab_t slab_zalloc(uma_zone_t, int); 209static void cache_drain(uma_zone_t); 210static void bucket_drain(uma_zone_t, uma_bucket_t); 211static void bucket_cache_drain(uma_zone_t zone); 212static int keg_ctor(void *, int, void *, int); 213static void keg_dtor(void *, int, void *); 214static int zone_ctor(void *, int, void *, int); 215static void zone_dtor(void *, int, void *); 216static int zero_init(void *, int, int); 217static void zone_small_init(uma_zone_t zone); 218static void zone_large_init(uma_zone_t zone); 219static void zone_foreach(void (*zfunc)(uma_zone_t)); 220static void zone_timeout(uma_zone_t zone); 221static int hash_alloc(struct uma_hash *); 222static int hash_expand(struct uma_hash *, struct uma_hash *); 223static void hash_free(struct uma_hash *hash); 224static void uma_timeout(void *); 225static void uma_startup3(void); 226static void *uma_zalloc_internal(uma_zone_t, void *, int); 227static void uma_zfree_internal(uma_zone_t, void *, void *, enum zfreeskip, 228 int); 229static void bucket_enable(void); 230static void bucket_init(void); 231static uma_bucket_t bucket_alloc(int, int); 232static void bucket_free(uma_bucket_t); 233static void bucket_zone_drain(void); 234static int uma_zalloc_bucket(uma_zone_t zone, int flags); 235static uma_slab_t uma_zone_slab(uma_zone_t zone, int flags); 236static void *uma_slab_alloc(uma_zone_t zone, uma_slab_t slab); 237static void zone_drain(uma_zone_t); 238static uma_zone_t uma_kcreate(uma_zone_t zone, size_t size, uma_init uminit, 239 uma_fini fini, int align, u_int32_t flags); 240 241void uma_print_zone(uma_zone_t); 242void uma_print_stats(void); 243static int sysctl_vm_zone(SYSCTL_HANDLER_ARGS); 244static int sysctl_vm_zone_count(SYSCTL_HANDLER_ARGS); 245static int sysctl_vm_zone_stats(SYSCTL_HANDLER_ARGS); 246 247#ifdef WITNESS 248static int nosleepwithlocks = 1; 249SYSCTL_INT(_debug, OID_AUTO, nosleepwithlocks, CTLFLAG_RW, &nosleepwithlocks, 250 0, "Convert M_WAITOK to M_NOWAIT to avoid lock-held-across-sleep paths"); 251#else 252static int nosleepwithlocks = 0; 253SYSCTL_INT(_debug, OID_AUTO, nosleepwithlocks, CTLFLAG_RW, &nosleepwithlocks, 254 0, "Convert M_WAITOK to M_NOWAIT to avoid lock-held-across-sleep paths"); 255#endif 256SYSCTL_OID(_vm, OID_AUTO, zone, CTLTYPE_STRING|CTLFLAG_RD, 257 NULL, 0, sysctl_vm_zone, "A", "Zone Info"); 258SYSINIT(uma_startup3, SI_SUB_VM_CONF, SI_ORDER_SECOND, uma_startup3, NULL); 259 260SYSCTL_PROC(_vm, OID_AUTO, zone_count, CTLFLAG_RD|CTLTYPE_INT, 261 0, 0, sysctl_vm_zone_count, "I", "Number of UMA zones"); 262 263SYSCTL_PROC(_vm, OID_AUTO, zone_stats, CTLFLAG_RD|CTLTYPE_STRUCT, 264 0, 0, sysctl_vm_zone_stats, "s,struct uma_type_header", "Zone Stats"); 265 266/* 267 * This routine checks to see whether or not it's safe to enable buckets. 268 */ 269 270static void 271bucket_enable(void) 272{ 273 if (cnt.v_free_count < cnt.v_free_min) 274 bucketdisable = 1; 275 else 276 bucketdisable = 0; 277} 278 279/* 280 * Initialize bucket_zones, the array of zones of buckets of various sizes. 281 * 282 * For each zone, calculate the memory required for each bucket, consisting 283 * of the header and an array of pointers. Initialize bucket_size[] to point 284 * the range of appropriate bucket sizes at the zone. 285 */ 286static void 287bucket_init(void) 288{ 289 struct uma_bucket_zone *ubz; 290 int i; 291 int j; 292 293 for (i = 0, j = 0; bucket_zones[j].ubz_entries != 0; j++) { 294 int size; 295 296 ubz = &bucket_zones[j]; 297 size = roundup(sizeof(struct uma_bucket), sizeof(void *)); 298 size += sizeof(void *) * ubz->ubz_entries; 299 ubz->ubz_zone = uma_zcreate(ubz->ubz_name, size, 300 NULL, NULL, NULL, NULL, UMA_ALIGN_PTR, UMA_ZFLAG_INTERNAL); 301 for (; i <= ubz->ubz_entries; i += (1 << BUCKET_SHIFT)) 302 bucket_size[i >> BUCKET_SHIFT] = j; 303 } 304} 305 306/* 307 * Given a desired number of entries for a bucket, return the zone from which 308 * to allocate the bucket. 309 */ 310static struct uma_bucket_zone * 311bucket_zone_lookup(int entries) 312{ 313 int idx; 314 315 idx = howmany(entries, 1 << BUCKET_SHIFT); 316 return (&bucket_zones[bucket_size[idx]]); 317} 318 319static uma_bucket_t 320bucket_alloc(int entries, int bflags) 321{ 322 struct uma_bucket_zone *ubz; 323 uma_bucket_t bucket; 324 325 /* 326 * This is to stop us from allocating per cpu buckets while we're 327 * running out of UMA_BOOT_PAGES. Otherwise, we would exhaust the 328 * boot pages. This also prevents us from allocating buckets in 329 * low memory situations. 330 */ 331 if (bucketdisable) 332 return (NULL); 333 334 ubz = bucket_zone_lookup(entries); 335 bucket = uma_zalloc_internal(ubz->ubz_zone, NULL, bflags); 336 if (bucket) { 337#ifdef INVARIANTS 338 bzero(bucket->ub_bucket, sizeof(void *) * ubz->ubz_entries); 339#endif 340 bucket->ub_cnt = 0; 341 bucket->ub_entries = ubz->ubz_entries; 342 } 343 344 return (bucket); 345} 346 347static void 348bucket_free(uma_bucket_t bucket) 349{ 350 struct uma_bucket_zone *ubz; 351 352 ubz = bucket_zone_lookup(bucket->ub_entries); 353 uma_zfree_internal(ubz->ubz_zone, bucket, NULL, SKIP_NONE, 354 ZFREE_STATFREE); 355} 356 357static void 358bucket_zone_drain(void) 359{ 360 struct uma_bucket_zone *ubz; 361 362 for (ubz = &bucket_zones[0]; ubz->ubz_entries != 0; ubz++) 363 zone_drain(ubz->ubz_zone); 364} 365 366 367/* 368 * Routine called by timeout which is used to fire off some time interval 369 * based calculations. (stats, hash size, etc.) 370 * 371 * Arguments: 372 * arg Unused 373 * 374 * Returns: 375 * Nothing 376 */ 377static void 378uma_timeout(void *unused) 379{ 380 bucket_enable(); 381 zone_foreach(zone_timeout); 382 383 /* Reschedule this event */ 384 callout_reset(&uma_callout, UMA_TIMEOUT * hz, uma_timeout, NULL); 385} 386 387/* 388 * Routine to perform timeout driven calculations. This expands the 389 * hashes and does per cpu statistics aggregation. 390 * 391 * Arguments: 392 * zone The zone to operate on 393 * 394 * Returns: 395 * Nothing 396 */ 397static void 398zone_timeout(uma_zone_t zone) 399{ 400 uma_keg_t keg; 401 u_int64_t alloc; 402 403 keg = zone->uz_keg; 404 alloc = 0; 405 406 /* 407 * Expand the zone hash table. 408 * 409 * This is done if the number of slabs is larger than the hash size. 410 * What I'm trying to do here is completely reduce collisions. This 411 * may be a little aggressive. Should I allow for two collisions max? 412 */ 413 ZONE_LOCK(zone); 414 if (keg->uk_flags & UMA_ZONE_HASH && 415 keg->uk_pages / keg->uk_ppera >= keg->uk_hash.uh_hashsize) { 416 struct uma_hash newhash; 417 struct uma_hash oldhash; 418 int ret; 419 420 /* 421 * This is so involved because allocating and freeing 422 * while the zone lock is held will lead to deadlock. 423 * I have to do everything in stages and check for 424 * races. 425 */ 426 newhash = keg->uk_hash; 427 ZONE_UNLOCK(zone); 428 ret = hash_alloc(&newhash); 429 ZONE_LOCK(zone); 430 if (ret) { 431 if (hash_expand(&keg->uk_hash, &newhash)) { 432 oldhash = keg->uk_hash; 433 keg->uk_hash = newhash; 434 } else 435 oldhash = newhash; 436 437 ZONE_UNLOCK(zone); 438 hash_free(&oldhash); 439 ZONE_LOCK(zone); 440 } 441 } 442 ZONE_UNLOCK(zone); 443} 444 445/* 446 * Allocate and zero fill the next sized hash table from the appropriate 447 * backing store. 448 * 449 * Arguments: 450 * hash A new hash structure with the old hash size in uh_hashsize 451 * 452 * Returns: 453 * 1 on sucess and 0 on failure. 454 */ 455static int 456hash_alloc(struct uma_hash *hash) 457{ 458 int oldsize; 459 int alloc; 460 461 oldsize = hash->uh_hashsize; 462 463 /* We're just going to go to a power of two greater */ 464 if (oldsize) { 465 hash->uh_hashsize = oldsize * 2; 466 alloc = sizeof(hash->uh_slab_hash[0]) * hash->uh_hashsize; 467 hash->uh_slab_hash = (struct slabhead *)malloc(alloc, 468 M_UMAHASH, M_NOWAIT); 469 } else { 470 alloc = sizeof(hash->uh_slab_hash[0]) * UMA_HASH_SIZE_INIT; 471 hash->uh_slab_hash = uma_zalloc_internal(hashzone, NULL, 472 M_WAITOK); 473 hash->uh_hashsize = UMA_HASH_SIZE_INIT; 474 } 475 if (hash->uh_slab_hash) { 476 bzero(hash->uh_slab_hash, alloc); 477 hash->uh_hashmask = hash->uh_hashsize - 1; 478 return (1); 479 } 480 481 return (0); 482} 483 484/* 485 * Expands the hash table for HASH zones. This is done from zone_timeout 486 * to reduce collisions. This must not be done in the regular allocation 487 * path, otherwise, we can recurse on the vm while allocating pages. 488 * 489 * Arguments: 490 * oldhash The hash you want to expand 491 * newhash The hash structure for the new table 492 * 493 * Returns: 494 * Nothing 495 * 496 * Discussion: 497 */ 498static int 499hash_expand(struct uma_hash *oldhash, struct uma_hash *newhash) 500{ 501 uma_slab_t slab; 502 int hval; 503 int i; 504 505 if (!newhash->uh_slab_hash) 506 return (0); 507 508 if (oldhash->uh_hashsize >= newhash->uh_hashsize) 509 return (0); 510 511 /* 512 * I need to investigate hash algorithms for resizing without a 513 * full rehash. 514 */ 515 516 for (i = 0; i < oldhash->uh_hashsize; i++) 517 while (!SLIST_EMPTY(&oldhash->uh_slab_hash[i])) { 518 slab = SLIST_FIRST(&oldhash->uh_slab_hash[i]); 519 SLIST_REMOVE_HEAD(&oldhash->uh_slab_hash[i], us_hlink); 520 hval = UMA_HASH(newhash, slab->us_data); 521 SLIST_INSERT_HEAD(&newhash->uh_slab_hash[hval], 522 slab, us_hlink); 523 } 524 525 return (1); 526} 527 528/* 529 * Free the hash bucket to the appropriate backing store. 530 * 531 * Arguments: 532 * slab_hash The hash bucket we're freeing 533 * hashsize The number of entries in that hash bucket 534 * 535 * Returns: 536 * Nothing 537 */ 538static void 539hash_free(struct uma_hash *hash) 540{ 541 if (hash->uh_slab_hash == NULL) 542 return; 543 if (hash->uh_hashsize == UMA_HASH_SIZE_INIT) 544 uma_zfree_internal(hashzone, 545 hash->uh_slab_hash, NULL, SKIP_NONE, ZFREE_STATFREE); 546 else 547 free(hash->uh_slab_hash, M_UMAHASH); 548} 549 550/* 551 * Frees all outstanding items in a bucket 552 * 553 * Arguments: 554 * zone The zone to free to, must be unlocked. 555 * bucket The free/alloc bucket with items, cpu queue must be locked. 556 * 557 * Returns: 558 * Nothing 559 */ 560 561static void 562bucket_drain(uma_zone_t zone, uma_bucket_t bucket) 563{ 564 uma_slab_t slab; 565 int mzone; 566 void *item; 567 568 if (bucket == NULL) 569 return; 570 571 slab = NULL; 572 mzone = 0; 573 574 /* We have to lookup the slab again for malloc.. */ 575 if (zone->uz_keg->uk_flags & UMA_ZONE_MALLOC) 576 mzone = 1; 577 578 while (bucket->ub_cnt > 0) { 579 bucket->ub_cnt--; 580 item = bucket->ub_bucket[bucket->ub_cnt]; 581#ifdef INVARIANTS 582 bucket->ub_bucket[bucket->ub_cnt] = NULL; 583 KASSERT(item != NULL, 584 ("bucket_drain: botched ptr, item is NULL")); 585#endif 586 /* 587 * This is extremely inefficient. The slab pointer was passed 588 * to uma_zfree_arg, but we lost it because the buckets don't 589 * hold them. This will go away when free() gets a size passed 590 * to it. 591 */ 592 if (mzone) 593 slab = vtoslab((vm_offset_t)item & (~UMA_SLAB_MASK)); 594 uma_zfree_internal(zone, item, slab, SKIP_DTOR, 0); 595 } 596} 597 598/* 599 * Drains the per cpu caches for a zone. 600 * 601 * NOTE: This may only be called while the zone is being turn down, and not 602 * during normal operation. This is necessary in order that we do not have 603 * to migrate CPUs to drain the per-CPU caches. 604 * 605 * Arguments: 606 * zone The zone to drain, must be unlocked. 607 * 608 * Returns: 609 * Nothing 610 */ 611static void 612cache_drain(uma_zone_t zone) 613{ 614 uma_cache_t cache; 615 int cpu; 616 617 /* 618 * XXX: It is safe to not lock the per-CPU caches, because we're 619 * tearing down the zone anyway. I.e., there will be no further use 620 * of the caches at this point. 621 * 622 * XXX: It would good to be able to assert that the zone is being 623 * torn down to prevent improper use of cache_drain(). 624 * 625 * XXX: We lock the zone before passing into bucket_cache_drain() as 626 * it is used elsewhere. Should the tear-down path be made special 627 * there in some form? 628 */ 629 for (cpu = 0; cpu <= mp_maxid; cpu++) { 630 if (CPU_ABSENT(cpu)) 631 continue; 632 cache = &zone->uz_cpu[cpu]; 633 bucket_drain(zone, cache->uc_allocbucket); 634 bucket_drain(zone, cache->uc_freebucket); 635 if (cache->uc_allocbucket != NULL) 636 bucket_free(cache->uc_allocbucket); 637 if (cache->uc_freebucket != NULL) 638 bucket_free(cache->uc_freebucket); 639 cache->uc_allocbucket = cache->uc_freebucket = NULL; 640 } 641 ZONE_LOCK(zone); 642 bucket_cache_drain(zone); 643 ZONE_UNLOCK(zone); 644} 645 646/* 647 * Drain the cached buckets from a zone. Expects a locked zone on entry. 648 */ 649static void 650bucket_cache_drain(uma_zone_t zone) 651{ 652 uma_bucket_t bucket; 653 654 /* 655 * Drain the bucket queues and free the buckets, we just keep two per 656 * cpu (alloc/free). 657 */ 658 while ((bucket = LIST_FIRST(&zone->uz_full_bucket)) != NULL) { 659 LIST_REMOVE(bucket, ub_link); 660 ZONE_UNLOCK(zone); 661 bucket_drain(zone, bucket); 662 bucket_free(bucket); 663 ZONE_LOCK(zone); 664 } 665 666 /* Now we do the free queue.. */ 667 while ((bucket = LIST_FIRST(&zone->uz_free_bucket)) != NULL) { 668 LIST_REMOVE(bucket, ub_link); 669 bucket_free(bucket); 670 } 671} 672 673/* 674 * Frees pages from a zone back to the system. This is done on demand from 675 * the pageout daemon. 676 * 677 * Arguments: 678 * zone The zone to free pages from 679 * all Should we drain all items? 680 * 681 * Returns: 682 * Nothing. 683 */ 684static void 685zone_drain(uma_zone_t zone) 686{ 687 struct slabhead freeslabs = { 0 }; 688 uma_keg_t keg; 689 uma_slab_t slab; 690 uma_slab_t n; 691 u_int8_t flags; 692 u_int8_t *mem; 693 int i; 694 695 keg = zone->uz_keg; 696 697 /* 698 * We don't want to take pages from statically allocated zones at this 699 * time 700 */ 701 if (keg->uk_flags & UMA_ZONE_NOFREE || keg->uk_freef == NULL) 702 return; 703 704 ZONE_LOCK(zone); 705 706#ifdef UMA_DEBUG 707 printf("%s free items: %u\n", zone->uz_name, keg->uk_free); 708#endif 709 bucket_cache_drain(zone); 710 if (keg->uk_free == 0) 711 goto finished; 712 713 slab = LIST_FIRST(&keg->uk_free_slab); 714 while (slab) { 715 n = LIST_NEXT(slab, us_link); 716 717 /* We have no where to free these to */ 718 if (slab->us_flags & UMA_SLAB_BOOT) { 719 slab = n; 720 continue; 721 } 722 723 LIST_REMOVE(slab, us_link); 724 keg->uk_pages -= keg->uk_ppera; 725 keg->uk_free -= keg->uk_ipers; 726 727 if (keg->uk_flags & UMA_ZONE_HASH) 728 UMA_HASH_REMOVE(&keg->uk_hash, slab, slab->us_data); 729 730 SLIST_INSERT_HEAD(&freeslabs, slab, us_hlink); 731 732 slab = n; 733 } 734finished: 735 ZONE_UNLOCK(zone); 736 737 while ((slab = SLIST_FIRST(&freeslabs)) != NULL) { 738 SLIST_REMOVE(&freeslabs, slab, uma_slab, us_hlink); 739 if (keg->uk_fini) 740 for (i = 0; i < keg->uk_ipers; i++) 741 keg->uk_fini( 742 slab->us_data + (keg->uk_rsize * i), 743 keg->uk_size); 744 flags = slab->us_flags; 745 mem = slab->us_data; 746 747 if ((keg->uk_flags & UMA_ZONE_MALLOC) || 748 (keg->uk_flags & UMA_ZONE_REFCNT)) { 749 vm_object_t obj; 750 751 if (flags & UMA_SLAB_KMEM) 752 obj = kmem_object; 753 else 754 obj = NULL; 755 for (i = 0; i < keg->uk_ppera; i++) 756 vsetobj((vm_offset_t)mem + (i * PAGE_SIZE), 757 obj); 758 } 759 if (keg->uk_flags & UMA_ZONE_OFFPAGE) 760 uma_zfree_internal(keg->uk_slabzone, slab, NULL, 761 SKIP_NONE, ZFREE_STATFREE); 762#ifdef UMA_DEBUG 763 printf("%s: Returning %d bytes.\n", 764 zone->uz_name, UMA_SLAB_SIZE * keg->uk_ppera); 765#endif 766 keg->uk_freef(mem, UMA_SLAB_SIZE * keg->uk_ppera, flags); 767 } 768} 769 770/* 771 * Allocate a new slab for a zone. This does not insert the slab onto a list. 772 * 773 * Arguments: 774 * zone The zone to allocate slabs for 775 * wait Shall we wait? 776 * 777 * Returns: 778 * The slab that was allocated or NULL if there is no memory and the 779 * caller specified M_NOWAIT. 780 */ 781static uma_slab_t 782slab_zalloc(uma_zone_t zone, int wait) 783{ 784 uma_slabrefcnt_t slabref; 785 uma_slab_t slab; 786 uma_keg_t keg; 787 u_int8_t *mem; 788 u_int8_t flags; 789 int i; 790 791 slab = NULL; 792 keg = zone->uz_keg; 793 794#ifdef UMA_DEBUG 795 printf("slab_zalloc: Allocating a new slab for %s\n", zone->uz_name); 796#endif 797 ZONE_UNLOCK(zone); 798 799 if (keg->uk_flags & UMA_ZONE_OFFPAGE) { 800 slab = uma_zalloc_internal(keg->uk_slabzone, NULL, wait); 801 if (slab == NULL) { 802 ZONE_LOCK(zone); 803 return NULL; 804 } 805 } 806 807 /* 808 * This reproduces the old vm_zone behavior of zero filling pages the 809 * first time they are added to a zone. 810 * 811 * Malloced items are zeroed in uma_zalloc. 812 */ 813 814 if ((keg->uk_flags & UMA_ZONE_MALLOC) == 0) 815 wait |= M_ZERO; 816 else 817 wait &= ~M_ZERO; 818 819 mem = keg->uk_allocf(zone, keg->uk_ppera * UMA_SLAB_SIZE, 820 &flags, wait); 821 if (mem == NULL) { 822 if (keg->uk_flags & UMA_ZONE_OFFPAGE) 823 uma_zfree_internal(keg->uk_slabzone, slab, NULL, 824 SKIP_NONE, ZFREE_STATFREE); 825 ZONE_LOCK(zone); 826 return (NULL); 827 } 828 829 /* Point the slab into the allocated memory */ 830 if (!(keg->uk_flags & UMA_ZONE_OFFPAGE)) 831 slab = (uma_slab_t )(mem + keg->uk_pgoff); 832 833 if ((keg->uk_flags & UMA_ZONE_MALLOC) || 834 (keg->uk_flags & UMA_ZONE_REFCNT)) 835 for (i = 0; i < keg->uk_ppera; i++) 836 vsetslab((vm_offset_t)mem + (i * PAGE_SIZE), slab); 837 838 slab->us_keg = keg; 839 slab->us_data = mem; 840 slab->us_freecount = keg->uk_ipers; 841 slab->us_firstfree = 0; 842 slab->us_flags = flags; 843 844 if (keg->uk_flags & UMA_ZONE_REFCNT) { 845 slabref = (uma_slabrefcnt_t)slab; 846 for (i = 0; i < keg->uk_ipers; i++) { 847 slabref->us_freelist[i].us_refcnt = 0; 848 slabref->us_freelist[i].us_item = i+1; 849 } 850 } else { 851 for (i = 0; i < keg->uk_ipers; i++) 852 slab->us_freelist[i].us_item = i+1; 853 } 854 855 if (keg->uk_init != NULL) { 856 for (i = 0; i < keg->uk_ipers; i++) 857 if (keg->uk_init(slab->us_data + (keg->uk_rsize * i), 858 keg->uk_size, wait) != 0) 859 break; 860 if (i != keg->uk_ipers) { 861 if (keg->uk_fini != NULL) { 862 for (i--; i > -1; i--) 863 keg->uk_fini(slab->us_data + 864 (keg->uk_rsize * i), 865 keg->uk_size); 866 } 867 if ((keg->uk_flags & UMA_ZONE_MALLOC) || 868 (keg->uk_flags & UMA_ZONE_REFCNT)) { 869 vm_object_t obj; 870 871 if (flags & UMA_SLAB_KMEM) 872 obj = kmem_object; 873 else 874 obj = NULL; 875 for (i = 0; i < keg->uk_ppera; i++) 876 vsetobj((vm_offset_t)mem + 877 (i * PAGE_SIZE), obj); 878 } 879 if (keg->uk_flags & UMA_ZONE_OFFPAGE) 880 uma_zfree_internal(keg->uk_slabzone, slab, 881 NULL, SKIP_NONE, ZFREE_STATFREE); 882 keg->uk_freef(mem, UMA_SLAB_SIZE * keg->uk_ppera, 883 flags); 884 ZONE_LOCK(zone); 885 return (NULL); 886 } 887 } 888 ZONE_LOCK(zone); 889 890 if (keg->uk_flags & UMA_ZONE_HASH) 891 UMA_HASH_INSERT(&keg->uk_hash, slab, mem); 892 893 keg->uk_pages += keg->uk_ppera; 894 keg->uk_free += keg->uk_ipers; 895 896 return (slab); 897} 898 899/* 900 * This function is intended to be used early on in place of page_alloc() so 901 * that we may use the boot time page cache to satisfy allocations before 902 * the VM is ready. 903 */ 904static void * 905startup_alloc(uma_zone_t zone, int bytes, u_int8_t *pflag, int wait) 906{ 907 uma_keg_t keg; 908 909 keg = zone->uz_keg; 910 911 /* 912 * Check our small startup cache to see if it has pages remaining. 913 */ 914 mtx_lock(&uma_mtx); 915 if (uma_boot_free != 0) { 916 uma_slab_t tmps; 917 918 tmps = LIST_FIRST(&uma_boot_pages); 919 LIST_REMOVE(tmps, us_link); 920 uma_boot_free--; 921 mtx_unlock(&uma_mtx); 922 *pflag = tmps->us_flags; 923 return (tmps->us_data); 924 } 925 mtx_unlock(&uma_mtx); 926 if (booted == 0) 927 panic("UMA: Increase UMA_BOOT_PAGES"); 928 /* 929 * Now that we've booted reset these users to their real allocator. 930 */ 931#ifdef UMA_MD_SMALL_ALLOC 932 keg->uk_allocf = uma_small_alloc; 933#else 934 keg->uk_allocf = page_alloc; 935#endif 936 return keg->uk_allocf(zone, bytes, pflag, wait); 937} 938 939/* 940 * Allocates a number of pages from the system 941 * 942 * Arguments: 943 * zone Unused 944 * bytes The number of bytes requested 945 * wait Shall we wait? 946 * 947 * Returns: 948 * A pointer to the alloced memory or possibly 949 * NULL if M_NOWAIT is set. 950 */ 951static void * 952page_alloc(uma_zone_t zone, int bytes, u_int8_t *pflag, int wait) 953{ 954 void *p; /* Returned page */ 955 956 *pflag = UMA_SLAB_KMEM; 957 p = (void *) kmem_malloc(kmem_map, bytes, wait); 958 959 return (p); 960} 961 962/* 963 * Allocates a number of pages from within an object 964 * 965 * Arguments: 966 * zone Unused 967 * bytes The number of bytes requested 968 * wait Shall we wait? 969 * 970 * Returns: 971 * A pointer to the alloced memory or possibly 972 * NULL if M_NOWAIT is set. 973 */ 974static void * 975obj_alloc(uma_zone_t zone, int bytes, u_int8_t *flags, int wait) 976{ 977 vm_object_t object; 978 vm_offset_t retkva, zkva; 979 vm_page_t p; 980 int pages, startpages; 981 982 object = zone->uz_keg->uk_obj; 983 retkva = 0; 984 985 /* 986 * This looks a little weird since we're getting one page at a time. 987 */ 988 VM_OBJECT_LOCK(object); 989 p = TAILQ_LAST(&object->memq, pglist); 990 pages = p != NULL ? p->pindex + 1 : 0; 991 startpages = pages; 992 zkva = zone->uz_keg->uk_kva + pages * PAGE_SIZE; 993 for (; bytes > 0; bytes -= PAGE_SIZE) { 994 p = vm_page_alloc(object, pages, 995 VM_ALLOC_INTERRUPT | VM_ALLOC_WIRED); 996 if (p == NULL) { 997 if (pages != startpages) 998 pmap_qremove(retkva, pages - startpages); 999 while (pages != startpages) { 1000 pages--; 1001 p = TAILQ_LAST(&object->memq, pglist); 1002 vm_page_lock_queues(); 1003 vm_page_unwire(p, 0); 1004 vm_page_free(p); 1005 vm_page_unlock_queues(); 1006 } 1007 retkva = 0; 1008 goto done; 1009 } 1010 pmap_qenter(zkva, &p, 1); 1011 if (retkva == 0) 1012 retkva = zkva; 1013 zkva += PAGE_SIZE; 1014 pages += 1; 1015 } 1016done: 1017 VM_OBJECT_UNLOCK(object); 1018 *flags = UMA_SLAB_PRIV; 1019 1020 return ((void *)retkva); 1021} 1022 1023/* 1024 * Frees a number of pages to the system 1025 * 1026 * Arguments: 1027 * mem A pointer to the memory to be freed 1028 * size The size of the memory being freed 1029 * flags The original p->us_flags field 1030 * 1031 * Returns: 1032 * Nothing 1033 */ 1034static void 1035page_free(void *mem, int size, u_int8_t flags) 1036{ 1037 vm_map_t map; 1038 1039 if (flags & UMA_SLAB_KMEM) 1040 map = kmem_map; 1041 else 1042 panic("UMA: page_free used with invalid flags %d\n", flags); 1043 1044 kmem_free(map, (vm_offset_t)mem, size); 1045} 1046 1047/* 1048 * Zero fill initializer 1049 * 1050 * Arguments/Returns follow uma_init specifications 1051 */ 1052static int 1053zero_init(void *mem, int size, int flags) 1054{ 1055 bzero(mem, size); 1056 return (0); 1057} 1058 1059/* 1060 * Finish creating a small uma zone. This calculates ipers, and the zone size. 1061 * 1062 * Arguments 1063 * zone The zone we should initialize 1064 * 1065 * Returns 1066 * Nothing 1067 */ 1068static void 1069zone_small_init(uma_zone_t zone) 1070{ 1071 uma_keg_t keg; 1072 u_int rsize; 1073 u_int memused; 1074 u_int wastedspace; 1075 u_int shsize; 1076 1077 keg = zone->uz_keg; 1078 KASSERT(keg != NULL, ("Keg is null in zone_small_init")); 1079 rsize = keg->uk_size; 1080 1081 if (rsize < UMA_SMALLEST_UNIT) 1082 rsize = UMA_SMALLEST_UNIT; 1083 if (rsize & keg->uk_align) 1084 rsize = (rsize & ~keg->uk_align) + (keg->uk_align + 1); 1085 1086 keg->uk_rsize = rsize; 1087 keg->uk_ppera = 1; 1088 1089 if (keg->uk_flags & UMA_ZONE_REFCNT) { 1090 rsize += UMA_FRITMREF_SZ; /* linkage & refcnt */ 1091 shsize = sizeof(struct uma_slab_refcnt); 1092 } else { 1093 rsize += UMA_FRITM_SZ; /* Account for linkage */ 1094 shsize = sizeof(struct uma_slab); 1095 } 1096 1097 keg->uk_ipers = (UMA_SLAB_SIZE - shsize) / rsize; 1098 KASSERT(keg->uk_ipers != 0, ("zone_small_init: ipers is 0")); 1099 memused = keg->uk_ipers * rsize + shsize; 1100 wastedspace = UMA_SLAB_SIZE - memused; 1101 1102 /* 1103 * We can't do OFFPAGE if we're internal or if we've been 1104 * asked to not go to the VM for buckets. If we do this we 1105 * may end up going to the VM (kmem_map) for slabs which we 1106 * do not want to do if we're UMA_ZFLAG_CACHEONLY as a 1107 * result of UMA_ZONE_VM, which clearly forbids it. 1108 */ 1109 if ((keg->uk_flags & UMA_ZFLAG_INTERNAL) || 1110 (keg->uk_flags & UMA_ZFLAG_CACHEONLY)) 1111 return; 1112 1113 if ((wastedspace >= UMA_MAX_WASTE) && 1114 (keg->uk_ipers < (UMA_SLAB_SIZE / keg->uk_rsize))) { 1115 keg->uk_ipers = UMA_SLAB_SIZE / keg->uk_rsize; 1116 KASSERT(keg->uk_ipers <= 255, 1117 ("zone_small_init: keg->uk_ipers too high!")); 1118#ifdef UMA_DEBUG 1119 printf("UMA decided we need offpage slab headers for " 1120 "zone: %s, calculated wastedspace = %d, " 1121 "maximum wasted space allowed = %d, " 1122 "calculated ipers = %d, " 1123 "new wasted space = %d\n", zone->uz_name, wastedspace, 1124 UMA_MAX_WASTE, keg->uk_ipers, 1125 UMA_SLAB_SIZE - keg->uk_ipers * keg->uk_rsize); 1126#endif 1127 keg->uk_flags |= UMA_ZONE_OFFPAGE; 1128 if ((keg->uk_flags & UMA_ZONE_MALLOC) == 0) 1129 keg->uk_flags |= UMA_ZONE_HASH; 1130 } 1131} 1132 1133/* 1134 * Finish creating a large (> UMA_SLAB_SIZE) uma zone. Just give in and do 1135 * OFFPAGE for now. When I can allow for more dynamic slab sizes this will be 1136 * more complicated. 1137 * 1138 * Arguments 1139 * zone The zone we should initialize 1140 * 1141 * Returns 1142 * Nothing 1143 */ 1144static void 1145zone_large_init(uma_zone_t zone) 1146{ 1147 uma_keg_t keg; 1148 int pages; 1149 1150 keg = zone->uz_keg; 1151 1152 KASSERT(keg != NULL, ("Keg is null in zone_large_init")); 1153 KASSERT((keg->uk_flags & UMA_ZFLAG_CACHEONLY) == 0, 1154 ("zone_large_init: Cannot large-init a UMA_ZFLAG_CACHEONLY zone")); 1155 1156 pages = keg->uk_size / UMA_SLAB_SIZE; 1157 1158 /* Account for remainder */ 1159 if ((pages * UMA_SLAB_SIZE) < keg->uk_size) 1160 pages++; 1161 1162 keg->uk_ppera = pages; 1163 keg->uk_ipers = 1; 1164 1165 keg->uk_flags |= UMA_ZONE_OFFPAGE; 1166 if ((keg->uk_flags & UMA_ZONE_MALLOC) == 0) 1167 keg->uk_flags |= UMA_ZONE_HASH; 1168 1169 keg->uk_rsize = keg->uk_size; 1170} 1171 1172/* 1173 * Keg header ctor. This initializes all fields, locks, etc. And inserts 1174 * the keg onto the global keg list. 1175 * 1176 * Arguments/Returns follow uma_ctor specifications 1177 * udata Actually uma_kctor_args 1178 */ 1179static int 1180keg_ctor(void *mem, int size, void *udata, int flags) 1181{ 1182 struct uma_kctor_args *arg = udata; 1183 uma_keg_t keg = mem; 1184 uma_zone_t zone; 1185 1186 bzero(keg, size); 1187 keg->uk_size = arg->size; 1188 keg->uk_init = arg->uminit; 1189 keg->uk_fini = arg->fini; 1190 keg->uk_align = arg->align; 1191 keg->uk_free = 0; 1192 keg->uk_pages = 0; 1193 keg->uk_flags = arg->flags; 1194 keg->uk_allocf = page_alloc; 1195 keg->uk_freef = page_free; 1196 keg->uk_recurse = 0; 1197 keg->uk_slabzone = NULL; 1198 1199 /* 1200 * The master zone is passed to us at keg-creation time. 1201 */ 1202 zone = arg->zone; 1203 zone->uz_keg = keg; 1204 1205 if (arg->flags & UMA_ZONE_VM) 1206 keg->uk_flags |= UMA_ZFLAG_CACHEONLY; 1207 1208 if (arg->flags & UMA_ZONE_ZINIT) 1209 keg->uk_init = zero_init; 1210 1211 /* 1212 * The +UMA_FRITM_SZ added to uk_size is to account for the 1213 * linkage that is added to the size in zone_small_init(). If 1214 * we don't account for this here then we may end up in 1215 * zone_small_init() with a calculated 'ipers' of 0. 1216 */ 1217 if (keg->uk_flags & UMA_ZONE_REFCNT) { 1218 if ((keg->uk_size+UMA_FRITMREF_SZ) > 1219 (UMA_SLAB_SIZE - sizeof(struct uma_slab_refcnt))) 1220 zone_large_init(zone); 1221 else 1222 zone_small_init(zone); 1223 } else { 1224 if ((keg->uk_size+UMA_FRITM_SZ) > 1225 (UMA_SLAB_SIZE - sizeof(struct uma_slab))) 1226 zone_large_init(zone); 1227 else 1228 zone_small_init(zone); 1229 } 1230 1231 if (keg->uk_flags & UMA_ZONE_OFFPAGE) { 1232 if (keg->uk_flags & UMA_ZONE_REFCNT) 1233 keg->uk_slabzone = slabrefzone; 1234 else 1235 keg->uk_slabzone = slabzone; 1236 } 1237 1238 /* 1239 * If we haven't booted yet we need allocations to go through the 1240 * startup cache until the vm is ready. 1241 */ 1242 if (keg->uk_ppera == 1) { 1243#ifdef UMA_MD_SMALL_ALLOC 1244 keg->uk_allocf = uma_small_alloc; 1245 keg->uk_freef = uma_small_free; 1246#endif 1247 if (booted == 0) 1248 keg->uk_allocf = startup_alloc; 1249 } 1250 1251 /* 1252 * Initialize keg's lock (shared among zones) through 1253 * Master zone 1254 */ 1255 zone->uz_lock = &keg->uk_lock; 1256 if (arg->flags & UMA_ZONE_MTXCLASS) 1257 ZONE_LOCK_INIT(zone, 1); 1258 else 1259 ZONE_LOCK_INIT(zone, 0); 1260 1261 /* 1262 * If we're putting the slab header in the actual page we need to 1263 * figure out where in each page it goes. This calculates a right 1264 * justified offset into the memory on an ALIGN_PTR boundary. 1265 */ 1266 if (!(keg->uk_flags & UMA_ZONE_OFFPAGE)) { 1267 u_int totsize; 1268 1269 /* Size of the slab struct and free list */ 1270 if (keg->uk_flags & UMA_ZONE_REFCNT) 1271 totsize = sizeof(struct uma_slab_refcnt) + 1272 keg->uk_ipers * UMA_FRITMREF_SZ; 1273 else 1274 totsize = sizeof(struct uma_slab) + 1275 keg->uk_ipers * UMA_FRITM_SZ; 1276 1277 if (totsize & UMA_ALIGN_PTR) 1278 totsize = (totsize & ~UMA_ALIGN_PTR) + 1279 (UMA_ALIGN_PTR + 1); 1280 keg->uk_pgoff = UMA_SLAB_SIZE - totsize; 1281 1282 if (keg->uk_flags & UMA_ZONE_REFCNT) 1283 totsize = keg->uk_pgoff + sizeof(struct uma_slab_refcnt) 1284 + keg->uk_ipers * UMA_FRITMREF_SZ; 1285 else 1286 totsize = keg->uk_pgoff + sizeof(struct uma_slab) 1287 + keg->uk_ipers * UMA_FRITM_SZ; 1288 1289 /* 1290 * The only way the following is possible is if with our 1291 * UMA_ALIGN_PTR adjustments we are now bigger than 1292 * UMA_SLAB_SIZE. I haven't checked whether this is 1293 * mathematically possible for all cases, so we make 1294 * sure here anyway. 1295 */ 1296 if (totsize > UMA_SLAB_SIZE) { 1297 printf("zone %s ipers %d rsize %d size %d\n", 1298 zone->uz_name, keg->uk_ipers, keg->uk_rsize, 1299 keg->uk_size); 1300 panic("UMA slab won't fit.\n"); 1301 } 1302 } 1303 1304 if (keg->uk_flags & UMA_ZONE_HASH) 1305 hash_alloc(&keg->uk_hash); 1306 1307#ifdef UMA_DEBUG 1308 printf("%s(%p) size = %d ipers = %d ppera = %d pgoff = %d\n", 1309 zone->uz_name, zone, 1310 keg->uk_size, keg->uk_ipers, 1311 keg->uk_ppera, keg->uk_pgoff); 1312#endif 1313 1314 LIST_INSERT_HEAD(&keg->uk_zones, zone, uz_link); 1315 1316 mtx_lock(&uma_mtx); 1317 LIST_INSERT_HEAD(&uma_kegs, keg, uk_link); 1318 mtx_unlock(&uma_mtx); 1319 return (0); 1320} 1321 1322/* 1323 * Zone header ctor. This initializes all fields, locks, etc. 1324 * 1325 * Arguments/Returns follow uma_ctor specifications 1326 * udata Actually uma_zctor_args 1327 */ 1328 1329static int 1330zone_ctor(void *mem, int size, void *udata, int flags) 1331{ 1332 struct uma_zctor_args *arg = udata; 1333 uma_zone_t zone = mem; 1334 uma_zone_t z; 1335 uma_keg_t keg; 1336 1337 bzero(zone, size); 1338 zone->uz_name = arg->name; 1339 zone->uz_ctor = arg->ctor; 1340 zone->uz_dtor = arg->dtor; 1341 zone->uz_init = NULL; 1342 zone->uz_fini = NULL; 1343 zone->uz_allocs = 0; 1344 zone->uz_frees = 0; 1345 zone->uz_fails = 0; 1346 zone->uz_fills = zone->uz_count = 0; 1347 1348 if (arg->flags & UMA_ZONE_SECONDARY) { 1349 KASSERT(arg->keg != NULL, ("Secondary zone on zero'd keg")); 1350 keg = arg->keg; 1351 zone->uz_keg = keg; 1352 zone->uz_init = arg->uminit; 1353 zone->uz_fini = arg->fini; 1354 zone->uz_lock = &keg->uk_lock; 1355 mtx_lock(&uma_mtx); 1356 ZONE_LOCK(zone); 1357 keg->uk_flags |= UMA_ZONE_SECONDARY; 1358 LIST_FOREACH(z, &keg->uk_zones, uz_link) { 1359 if (LIST_NEXT(z, uz_link) == NULL) { 1360 LIST_INSERT_AFTER(z, zone, uz_link); 1361 break; 1362 } 1363 } 1364 ZONE_UNLOCK(zone); 1365 mtx_unlock(&uma_mtx); 1366 } else if (arg->keg == NULL) { 1367 if (uma_kcreate(zone, arg->size, arg->uminit, arg->fini, 1368 arg->align, arg->flags) == NULL) 1369 return (ENOMEM); 1370 } else { 1371 struct uma_kctor_args karg; 1372 int error; 1373 1374 /* We should only be here from uma_startup() */ 1375 karg.size = arg->size; 1376 karg.uminit = arg->uminit; 1377 karg.fini = arg->fini; 1378 karg.align = arg->align; 1379 karg.flags = arg->flags; 1380 karg.zone = zone; 1381 error = keg_ctor(arg->keg, sizeof(struct uma_keg), &karg, 1382 flags); 1383 if (error) 1384 return (error); 1385 } 1386 keg = zone->uz_keg; 1387 zone->uz_lock = &keg->uk_lock; 1388 1389 /* 1390 * Some internal zones don't have room allocated for the per cpu 1391 * caches. If we're internal, bail out here. 1392 */ 1393 if (keg->uk_flags & UMA_ZFLAG_INTERNAL) { 1394 KASSERT((keg->uk_flags & UMA_ZONE_SECONDARY) == 0, 1395 ("Secondary zone requested UMA_ZFLAG_INTERNAL")); 1396 return (0); 1397 } 1398 1399 if (keg->uk_flags & UMA_ZONE_MAXBUCKET) 1400 zone->uz_count = BUCKET_MAX; 1401 else if (keg->uk_ipers <= BUCKET_MAX) 1402 zone->uz_count = keg->uk_ipers; 1403 else 1404 zone->uz_count = BUCKET_MAX; 1405 return (0); 1406} 1407 1408/* 1409 * Keg header dtor. This frees all data, destroys locks, frees the hash 1410 * table and removes the keg from the global list. 1411 * 1412 * Arguments/Returns follow uma_dtor specifications 1413 * udata unused 1414 */ 1415static void 1416keg_dtor(void *arg, int size, void *udata) 1417{ 1418 uma_keg_t keg; 1419 1420 keg = (uma_keg_t)arg; 1421 mtx_lock(&keg->uk_lock); 1422 if (keg->uk_free != 0) { 1423 printf("Freed UMA keg was not empty (%d items). " 1424 " Lost %d pages of memory.\n", 1425 keg->uk_free, keg->uk_pages); 1426 } 1427 mtx_unlock(&keg->uk_lock); 1428 1429 if (keg->uk_flags & UMA_ZONE_HASH) 1430 hash_free(&keg->uk_hash); 1431 1432 mtx_destroy(&keg->uk_lock); 1433} 1434 1435/* 1436 * Zone header dtor. 1437 * 1438 * Arguments/Returns follow uma_dtor specifications 1439 * udata unused 1440 */ 1441static void 1442zone_dtor(void *arg, int size, void *udata) 1443{ 1444 uma_zone_t zone; 1445 uma_keg_t keg; 1446 1447 zone = (uma_zone_t)arg; 1448 keg = zone->uz_keg; 1449 1450 if (!(keg->uk_flags & UMA_ZFLAG_INTERNAL)) 1451 cache_drain(zone); 1452 1453 mtx_lock(&uma_mtx); 1454 zone_drain(zone); 1455 if (keg->uk_flags & UMA_ZONE_SECONDARY) { 1456 LIST_REMOVE(zone, uz_link); 1457 /* 1458 * XXX there are some races here where 1459 * the zone can be drained but zone lock 1460 * released and then refilled before we 1461 * remove it... we dont care for now 1462 */ 1463 ZONE_LOCK(zone); 1464 if (LIST_EMPTY(&keg->uk_zones)) 1465 keg->uk_flags &= ~UMA_ZONE_SECONDARY; 1466 ZONE_UNLOCK(zone); 1467 mtx_unlock(&uma_mtx); 1468 } else { 1469 LIST_REMOVE(keg, uk_link); 1470 LIST_REMOVE(zone, uz_link); 1471 mtx_unlock(&uma_mtx); 1472 uma_zfree_internal(kegs, keg, NULL, SKIP_NONE, 1473 ZFREE_STATFREE); 1474 } 1475 zone->uz_keg = NULL; 1476} 1477 1478/* 1479 * Traverses every zone in the system and calls a callback 1480 * 1481 * Arguments: 1482 * zfunc A pointer to a function which accepts a zone 1483 * as an argument. 1484 * 1485 * Returns: 1486 * Nothing 1487 */ 1488static void 1489zone_foreach(void (*zfunc)(uma_zone_t)) 1490{ 1491 uma_keg_t keg; 1492 uma_zone_t zone; 1493 1494 mtx_lock(&uma_mtx); 1495 LIST_FOREACH(keg, &uma_kegs, uk_link) { 1496 LIST_FOREACH(zone, &keg->uk_zones, uz_link) 1497 zfunc(zone); 1498 } 1499 mtx_unlock(&uma_mtx); 1500} 1501 1502/* Public functions */ 1503/* See uma.h */ 1504void 1505uma_startup(void *bootmem) 1506{ 1507 struct uma_zctor_args args; 1508 uma_slab_t slab; 1509 u_int slabsize; 1510 u_int objsize, totsize, wsize; 1511 int i; 1512 1513#ifdef UMA_DEBUG 1514 printf("Creating uma keg headers zone and keg.\n"); 1515#endif 1516 /* 1517 * The general UMA lock is a recursion-allowed lock because 1518 * there is a code path where, while we're still configured 1519 * to use startup_alloc() for backend page allocations, we 1520 * may end up in uma_reclaim() which calls zone_foreach(zone_drain), 1521 * which grabs uma_mtx, only to later call into startup_alloc() 1522 * because while freeing we needed to allocate a bucket. Since 1523 * startup_alloc() also takes uma_mtx, we need to be able to 1524 * recurse on it. 1525 */ 1526 mtx_init(&uma_mtx, "UMA lock", NULL, MTX_DEF | MTX_RECURSE); 1527 1528 /* 1529 * Figure out the maximum number of items-per-slab we'll have if 1530 * we're using the OFFPAGE slab header to track free items, given 1531 * all possible object sizes and the maximum desired wastage 1532 * (UMA_MAX_WASTE). 1533 * 1534 * We iterate until we find an object size for 1535 * which the calculated wastage in zone_small_init() will be 1536 * enough to warrant OFFPAGE. Since wastedspace versus objsize 1537 * is an overall increasing see-saw function, we find the smallest 1538 * objsize such that the wastage is always acceptable for objects 1539 * with that objsize or smaller. Since a smaller objsize always 1540 * generates a larger possible uma_max_ipers, we use this computed 1541 * objsize to calculate the largest ipers possible. Since the 1542 * ipers calculated for OFFPAGE slab headers is always larger than 1543 * the ipers initially calculated in zone_small_init(), we use 1544 * the former's equation (UMA_SLAB_SIZE / keg->uk_rsize) to 1545 * obtain the maximum ipers possible for offpage slab headers. 1546 * 1547 * It should be noted that ipers versus objsize is an inversly 1548 * proportional function which drops off rather quickly so as 1549 * long as our UMA_MAX_WASTE is such that the objsize we calculate 1550 * falls into the portion of the inverse relation AFTER the steep 1551 * falloff, then uma_max_ipers shouldn't be too high (~10 on i386). 1552 * 1553 * Note that we have 8-bits (1 byte) to use as a freelist index 1554 * inside the actual slab header itself and this is enough to 1555 * accomodate us. In the worst case, a UMA_SMALLEST_UNIT sized 1556 * object with offpage slab header would have ipers = 1557 * UMA_SLAB_SIZE / UMA_SMALLEST_UNIT (currently = 256), which is 1558 * 1 greater than what our byte-integer freelist index can 1559 * accomodate, but we know that this situation never occurs as 1560 * for UMA_SMALLEST_UNIT-sized objects, we will never calculate 1561 * that we need to go to offpage slab headers. Or, if we do, 1562 * then we trap that condition below and panic in the INVARIANTS case. 1563 */ 1564 wsize = UMA_SLAB_SIZE - sizeof(struct uma_slab) - UMA_MAX_WASTE; 1565 totsize = wsize; 1566 objsize = UMA_SMALLEST_UNIT; 1567 while (totsize >= wsize) { 1568 totsize = (UMA_SLAB_SIZE - sizeof(struct uma_slab)) / 1569 (objsize + UMA_FRITM_SZ); 1570 totsize *= (UMA_FRITM_SZ + objsize); 1571 objsize++; 1572 } 1573 if (objsize > UMA_SMALLEST_UNIT) 1574 objsize--; 1575 uma_max_ipers = UMA_SLAB_SIZE / objsize; 1576 1577 wsize = UMA_SLAB_SIZE - sizeof(struct uma_slab_refcnt) - UMA_MAX_WASTE; 1578 totsize = wsize; 1579 objsize = UMA_SMALLEST_UNIT; 1580 while (totsize >= wsize) { 1581 totsize = (UMA_SLAB_SIZE - sizeof(struct uma_slab_refcnt)) / 1582 (objsize + UMA_FRITMREF_SZ); 1583 totsize *= (UMA_FRITMREF_SZ + objsize); 1584 objsize++; 1585 } 1586 if (objsize > UMA_SMALLEST_UNIT) 1587 objsize--; 1588 uma_max_ipers_ref = UMA_SLAB_SIZE / objsize; 1589 1590 KASSERT((uma_max_ipers_ref <= 255) && (uma_max_ipers <= 255), 1591 ("uma_startup: calculated uma_max_ipers values too large!")); 1592 1593#ifdef UMA_DEBUG 1594 printf("Calculated uma_max_ipers (for OFFPAGE) is %d\n", uma_max_ipers); 1595 printf("Calculated uma_max_ipers_slab (for OFFPAGE) is %d\n", 1596 uma_max_ipers_ref); 1597#endif 1598 1599 /* "manually" create the initial zone */ 1600 args.name = "UMA Kegs"; 1601 args.size = sizeof(struct uma_keg); 1602 args.ctor = keg_ctor; 1603 args.dtor = keg_dtor; 1604 args.uminit = zero_init; 1605 args.fini = NULL; 1606 args.keg = &masterkeg; 1607 args.align = 32 - 1; 1608 args.flags = UMA_ZFLAG_INTERNAL; 1609 /* The initial zone has no Per cpu queues so it's smaller */ 1610 zone_ctor(kegs, sizeof(struct uma_zone), &args, M_WAITOK); 1611 1612#ifdef UMA_DEBUG 1613 printf("Filling boot free list.\n"); 1614#endif 1615 for (i = 0; i < UMA_BOOT_PAGES; i++) { 1616 slab = (uma_slab_t)((u_int8_t *)bootmem + (i * UMA_SLAB_SIZE)); 1617 slab->us_data = (u_int8_t *)slab; 1618 slab->us_flags = UMA_SLAB_BOOT; 1619 LIST_INSERT_HEAD(&uma_boot_pages, slab, us_link); 1620 uma_boot_free++; 1621 } 1622 1623#ifdef UMA_DEBUG 1624 printf("Creating uma zone headers zone and keg.\n"); 1625#endif 1626 args.name = "UMA Zones"; 1627 args.size = sizeof(struct uma_zone) + 1628 (sizeof(struct uma_cache) * (mp_maxid + 1)); 1629 args.ctor = zone_ctor; 1630 args.dtor = zone_dtor; 1631 args.uminit = zero_init; 1632 args.fini = NULL; 1633 args.keg = NULL; 1634 args.align = 32 - 1; 1635 args.flags = UMA_ZFLAG_INTERNAL; 1636 /* The initial zone has no Per cpu queues so it's smaller */ 1637 zone_ctor(zones, sizeof(struct uma_zone), &args, M_WAITOK); 1638 1639#ifdef UMA_DEBUG 1640 printf("Initializing pcpu cache locks.\n"); 1641#endif 1642#ifdef UMA_DEBUG 1643 printf("Creating slab and hash zones.\n"); 1644#endif 1645 1646 /* 1647 * This is the max number of free list items we'll have with 1648 * offpage slabs. 1649 */ 1650 slabsize = uma_max_ipers * UMA_FRITM_SZ; 1651 slabsize += sizeof(struct uma_slab); 1652 1653 /* Now make a zone for slab headers */ 1654 slabzone = uma_zcreate("UMA Slabs", 1655 slabsize, 1656 NULL, NULL, NULL, NULL, 1657 UMA_ALIGN_PTR, UMA_ZFLAG_INTERNAL); 1658 1659 /* 1660 * We also create a zone for the bigger slabs with reference 1661 * counts in them, to accomodate UMA_ZONE_REFCNT zones. 1662 */ 1663 slabsize = uma_max_ipers_ref * UMA_FRITMREF_SZ; 1664 slabsize += sizeof(struct uma_slab_refcnt); 1665 slabrefzone = uma_zcreate("UMA RCntSlabs", 1666 slabsize, 1667 NULL, NULL, NULL, NULL, 1668 UMA_ALIGN_PTR, 1669 UMA_ZFLAG_INTERNAL); 1670 1671 hashzone = uma_zcreate("UMA Hash", 1672 sizeof(struct slabhead *) * UMA_HASH_SIZE_INIT, 1673 NULL, NULL, NULL, NULL, 1674 UMA_ALIGN_PTR, UMA_ZFLAG_INTERNAL); 1675 1676 bucket_init(); 1677 1678#ifdef UMA_MD_SMALL_ALLOC 1679 booted = 1; 1680#endif 1681 1682#ifdef UMA_DEBUG 1683 printf("UMA startup complete.\n"); 1684#endif 1685} 1686 1687/* see uma.h */ 1688void 1689uma_startup2(void) 1690{ 1691 booted = 1; 1692 bucket_enable(); 1693#ifdef UMA_DEBUG 1694 printf("UMA startup2 complete.\n"); 1695#endif 1696} 1697 1698/* 1699 * Initialize our callout handle 1700 * 1701 */ 1702 1703static void 1704uma_startup3(void) 1705{ 1706#ifdef UMA_DEBUG 1707 printf("Starting callout.\n"); 1708#endif 1709 callout_init(&uma_callout, CALLOUT_MPSAFE); 1710 callout_reset(&uma_callout, UMA_TIMEOUT * hz, uma_timeout, NULL); 1711#ifdef UMA_DEBUG 1712 printf("UMA startup3 complete.\n"); 1713#endif 1714} 1715 1716static uma_zone_t 1717uma_kcreate(uma_zone_t zone, size_t size, uma_init uminit, uma_fini fini, 1718 int align, u_int32_t flags) 1719{ 1720 struct uma_kctor_args args; 1721 1722 args.size = size; 1723 args.uminit = uminit; 1724 args.fini = fini; 1725 args.align = align; 1726 args.flags = flags; 1727 args.zone = zone; 1728 return (uma_zalloc_internal(kegs, &args, M_WAITOK)); 1729} 1730 1731/* See uma.h */ 1732uma_zone_t 1733uma_zcreate(char *name, size_t size, uma_ctor ctor, uma_dtor dtor, 1734 uma_init uminit, uma_fini fini, int align, u_int32_t flags) 1735 1736{ 1737 struct uma_zctor_args args; 1738 1739 /* This stuff is essential for the zone ctor */ 1740 args.name = name; 1741 args.size = size; 1742 args.ctor = ctor; 1743 args.dtor = dtor; 1744 args.uminit = uminit; 1745 args.fini = fini; 1746 args.align = align; 1747 args.flags = flags; 1748 args.keg = NULL; 1749 1750 return (uma_zalloc_internal(zones, &args, M_WAITOK)); 1751} 1752 1753/* See uma.h */ 1754uma_zone_t 1755uma_zsecond_create(char *name, uma_ctor ctor, uma_dtor dtor, 1756 uma_init zinit, uma_fini zfini, uma_zone_t master) 1757{ 1758 struct uma_zctor_args args; 1759 1760 args.name = name; 1761 args.size = master->uz_keg->uk_size; 1762 args.ctor = ctor; 1763 args.dtor = dtor; 1764 args.uminit = zinit; 1765 args.fini = zfini; 1766 args.align = master->uz_keg->uk_align; 1767 args.flags = master->uz_keg->uk_flags | UMA_ZONE_SECONDARY; 1768 args.keg = master->uz_keg; 1769 1770 return (uma_zalloc_internal(zones, &args, M_WAITOK)); 1771} 1772 1773/* See uma.h */ 1774void 1775uma_zdestroy(uma_zone_t zone) 1776{ 1777 1778 uma_zfree_internal(zones, zone, NULL, SKIP_NONE, ZFREE_STATFREE); 1779} 1780 1781/* See uma.h */ 1782void * 1783uma_zalloc_arg(uma_zone_t zone, void *udata, int flags) 1784{ 1785 void *item; 1786 uma_cache_t cache; 1787 uma_bucket_t bucket; 1788 int cpu; 1789 int badness; 1790 1791 /* This is the fast path allocation */ 1792#ifdef UMA_DEBUG_ALLOC_1 1793 printf("Allocating one item from %s(%p)\n", zone->uz_name, zone); 1794#endif 1795 CTR3(KTR_UMA, "uma_zalloc_arg thread %x zone %s flags %d", curthread, 1796 zone->uz_name, flags); 1797 1798 if (!(flags & M_NOWAIT)) { 1799 KASSERT(curthread->td_intr_nesting_level == 0, 1800 ("malloc(M_WAITOK) in interrupt context")); 1801 if (nosleepwithlocks) { 1802#ifdef WITNESS 1803 badness = WITNESS_WARN(WARN_GIANTOK | WARN_SLEEPOK, 1804 NULL, 1805 "malloc(M_WAITOK) of \"%s\", forcing M_NOWAIT", 1806 zone->uz_name); 1807#else 1808 badness = 1; 1809#endif 1810 } else { 1811 badness = 0; 1812#ifdef WITNESS 1813 WITNESS_WARN(WARN_GIANTOK | WARN_SLEEPOK, NULL, 1814 "malloc(M_WAITOK) of \"%s\"", zone->uz_name); 1815#endif 1816 } 1817 if (badness) { 1818 flags &= ~M_WAITOK; 1819 flags |= M_NOWAIT; 1820 } 1821 } 1822 1823 /* 1824 * If possible, allocate from the per-CPU cache. There are two 1825 * requirements for safe access to the per-CPU cache: (1) the thread 1826 * accessing the cache must not be preempted or yield during access, 1827 * and (2) the thread must not migrate CPUs without switching which 1828 * cache it accesses. We rely on a critical section to prevent 1829 * preemption and migration. We release the critical section in 1830 * order to acquire the zone mutex if we are unable to allocate from 1831 * the current cache; when we re-acquire the critical section, we 1832 * must detect and handle migration if it has occurred. 1833 */ 1834zalloc_restart: 1835 critical_enter(); 1836 cpu = curcpu; 1837 cache = &zone->uz_cpu[cpu]; 1838 1839zalloc_start: 1840 bucket = cache->uc_allocbucket; 1841 1842 if (bucket) { 1843 if (bucket->ub_cnt > 0) { 1844 bucket->ub_cnt--; 1845 item = bucket->ub_bucket[bucket->ub_cnt]; 1846#ifdef INVARIANTS 1847 bucket->ub_bucket[bucket->ub_cnt] = NULL; 1848#endif 1849 KASSERT(item != NULL, 1850 ("uma_zalloc: Bucket pointer mangled.")); 1851 cache->uc_allocs++; 1852 critical_exit(); 1853#ifdef INVARIANTS 1854 ZONE_LOCK(zone); 1855 uma_dbg_alloc(zone, NULL, item); 1856 ZONE_UNLOCK(zone); 1857#endif 1858 if (zone->uz_ctor != NULL) { 1859 if (zone->uz_ctor(item, zone->uz_keg->uk_size, 1860 udata, flags) != 0) { 1861 uma_zfree_internal(zone, item, udata, 1862 SKIP_DTOR, ZFREE_STATFAIL | 1863 ZFREE_STATFREE); 1864 return (NULL); 1865 } 1866 } 1867 if (flags & M_ZERO) 1868 bzero(item, zone->uz_keg->uk_size); 1869 return (item); 1870 } else if (cache->uc_freebucket) { 1871 /* 1872 * We have run out of items in our allocbucket. 1873 * See if we can switch with our free bucket. 1874 */ 1875 if (cache->uc_freebucket->ub_cnt > 0) { 1876#ifdef UMA_DEBUG_ALLOC 1877 printf("uma_zalloc: Swapping empty with" 1878 " alloc.\n"); 1879#endif 1880 bucket = cache->uc_freebucket; 1881 cache->uc_freebucket = cache->uc_allocbucket; 1882 cache->uc_allocbucket = bucket; 1883 1884 goto zalloc_start; 1885 } 1886 } 1887 } 1888 /* 1889 * Attempt to retrieve the item from the per-CPU cache has failed, so 1890 * we must go back to the zone. This requires the zone lock, so we 1891 * must drop the critical section, then re-acquire it when we go back 1892 * to the cache. Since the critical section is released, we may be 1893 * preempted or migrate. As such, make sure not to maintain any 1894 * thread-local state specific to the cache from prior to releasing 1895 * the critical section. 1896 */ 1897 critical_exit(); 1898 ZONE_LOCK(zone); 1899 critical_enter(); 1900 cpu = curcpu; 1901 cache = &zone->uz_cpu[cpu]; 1902 bucket = cache->uc_allocbucket; 1903 if (bucket != NULL) { 1904 if (bucket->ub_cnt > 0) { 1905 ZONE_UNLOCK(zone); 1906 goto zalloc_start; 1907 } 1908 bucket = cache->uc_freebucket; 1909 if (bucket != NULL && bucket->ub_cnt > 0) { 1910 ZONE_UNLOCK(zone); 1911 goto zalloc_start; 1912 } 1913 } 1914 1915 /* Since we have locked the zone we may as well send back our stats */ 1916 zone->uz_allocs += cache->uc_allocs; 1917 cache->uc_allocs = 0; 1918 zone->uz_frees += cache->uc_frees; 1919 cache->uc_frees = 0; 1920 1921 /* Our old one is now a free bucket */ 1922 if (cache->uc_allocbucket) { 1923 KASSERT(cache->uc_allocbucket->ub_cnt == 0, 1924 ("uma_zalloc_arg: Freeing a non free bucket.")); 1925 LIST_INSERT_HEAD(&zone->uz_free_bucket, 1926 cache->uc_allocbucket, ub_link); 1927 cache->uc_allocbucket = NULL; 1928 } 1929 1930 /* Check the free list for a new alloc bucket */ 1931 if ((bucket = LIST_FIRST(&zone->uz_full_bucket)) != NULL) { 1932 KASSERT(bucket->ub_cnt != 0, 1933 ("uma_zalloc_arg: Returning an empty bucket.")); 1934 1935 LIST_REMOVE(bucket, ub_link); 1936 cache->uc_allocbucket = bucket; 1937 ZONE_UNLOCK(zone); 1938 goto zalloc_start; 1939 } 1940 /* We are no longer associated with this CPU. */ 1941 critical_exit(); 1942 1943 /* Bump up our uz_count so we get here less */ 1944 if (zone->uz_count < BUCKET_MAX) 1945 zone->uz_count++; 1946 1947 /* 1948 * Now lets just fill a bucket and put it on the free list. If that 1949 * works we'll restart the allocation from the begining. 1950 */ 1951 if (uma_zalloc_bucket(zone, flags)) { 1952 ZONE_UNLOCK(zone); 1953 goto zalloc_restart; 1954 } 1955 ZONE_UNLOCK(zone); 1956 /* 1957 * We may not be able to get a bucket so return an actual item. 1958 */ 1959#ifdef UMA_DEBUG 1960 printf("uma_zalloc_arg: Bucketzone returned NULL\n"); 1961#endif 1962 1963 return (uma_zalloc_internal(zone, udata, flags)); 1964} 1965 1966static uma_slab_t 1967uma_zone_slab(uma_zone_t zone, int flags) 1968{ 1969 uma_slab_t slab; 1970 uma_keg_t keg; 1971 1972 keg = zone->uz_keg; 1973 1974 /* 1975 * This is to prevent us from recursively trying to allocate 1976 * buckets. The problem is that if an allocation forces us to 1977 * grab a new bucket we will call page_alloc, which will go off 1978 * and cause the vm to allocate vm_map_entries. If we need new 1979 * buckets there too we will recurse in kmem_alloc and bad 1980 * things happen. So instead we return a NULL bucket, and make 1981 * the code that allocates buckets smart enough to deal with it 1982 * 1983 * XXX: While we want this protection for the bucket zones so that 1984 * recursion from the VM is handled (and the calling code that 1985 * allocates buckets knows how to deal with it), we do not want 1986 * to prevent allocation from the slab header zones (slabzone 1987 * and slabrefzone) if uk_recurse is not zero for them. The 1988 * reason is that it could lead to NULL being returned for 1989 * slab header allocations even in the M_WAITOK case, and the 1990 * caller can't handle that. 1991 */ 1992 if (keg->uk_flags & UMA_ZFLAG_INTERNAL && keg->uk_recurse != 0) 1993 if ((zone != slabzone) && (zone != slabrefzone)) 1994 return (NULL); 1995 1996 slab = NULL; 1997 1998 for (;;) { 1999 /* 2000 * Find a slab with some space. Prefer slabs that are partially 2001 * used over those that are totally full. This helps to reduce 2002 * fragmentation. 2003 */ 2004 if (keg->uk_free != 0) { 2005 if (!LIST_EMPTY(&keg->uk_part_slab)) { 2006 slab = LIST_FIRST(&keg->uk_part_slab); 2007 } else { 2008 slab = LIST_FIRST(&keg->uk_free_slab); 2009 LIST_REMOVE(slab, us_link); 2010 LIST_INSERT_HEAD(&keg->uk_part_slab, slab, 2011 us_link); 2012 } 2013 return (slab); 2014 } 2015 2016 /* 2017 * M_NOVM means don't ask at all! 2018 */ 2019 if (flags & M_NOVM) 2020 break; 2021 2022 if (keg->uk_maxpages && 2023 keg->uk_pages >= keg->uk_maxpages) { 2024 keg->uk_flags |= UMA_ZFLAG_FULL; 2025 2026 if (flags & M_NOWAIT) 2027 break; 2028 else 2029 msleep(keg, &keg->uk_lock, PVM, 2030 "zonelimit", 0); 2031 continue; 2032 } 2033 keg->uk_recurse++; 2034 slab = slab_zalloc(zone, flags); 2035 keg->uk_recurse--; 2036 2037 /* 2038 * If we got a slab here it's safe to mark it partially used 2039 * and return. We assume that the caller is going to remove 2040 * at least one item. 2041 */ 2042 if (slab) { 2043 LIST_INSERT_HEAD(&keg->uk_part_slab, slab, us_link); 2044 return (slab); 2045 } 2046 /* 2047 * We might not have been able to get a slab but another cpu 2048 * could have while we were unlocked. Check again before we 2049 * fail. 2050 */ 2051 if (flags & M_NOWAIT) 2052 flags |= M_NOVM; 2053 } 2054 return (slab); 2055} 2056 2057static void * 2058uma_slab_alloc(uma_zone_t zone, uma_slab_t slab) 2059{ 2060 uma_keg_t keg; 2061 uma_slabrefcnt_t slabref; 2062 void *item; 2063 u_int8_t freei; 2064 2065 keg = zone->uz_keg; 2066 2067 freei = slab->us_firstfree; 2068 if (keg->uk_flags & UMA_ZONE_REFCNT) { 2069 slabref = (uma_slabrefcnt_t)slab; 2070 slab->us_firstfree = slabref->us_freelist[freei].us_item; 2071 } else { 2072 slab->us_firstfree = slab->us_freelist[freei].us_item; 2073 } 2074 item = slab->us_data + (keg->uk_rsize * freei); 2075 2076 slab->us_freecount--; 2077 keg->uk_free--; 2078#ifdef INVARIANTS 2079 uma_dbg_alloc(zone, slab, item); 2080#endif 2081 /* Move this slab to the full list */ 2082 if (slab->us_freecount == 0) { 2083 LIST_REMOVE(slab, us_link); 2084 LIST_INSERT_HEAD(&keg->uk_full_slab, slab, us_link); 2085 } 2086 2087 return (item); 2088} 2089 2090static int 2091uma_zalloc_bucket(uma_zone_t zone, int flags) 2092{ 2093 uma_bucket_t bucket; 2094 uma_slab_t slab; 2095 int16_t saved; 2096 int max, origflags = flags; 2097 2098 /* 2099 * Try this zone's free list first so we don't allocate extra buckets. 2100 */ 2101 if ((bucket = LIST_FIRST(&zone->uz_free_bucket)) != NULL) { 2102 KASSERT(bucket->ub_cnt == 0, 2103 ("uma_zalloc_bucket: Bucket on free list is not empty.")); 2104 LIST_REMOVE(bucket, ub_link); 2105 } else { 2106 int bflags; 2107 2108 bflags = (flags & ~M_ZERO); 2109 if (zone->uz_keg->uk_flags & UMA_ZFLAG_CACHEONLY) 2110 bflags |= M_NOVM; 2111 2112 ZONE_UNLOCK(zone); 2113 bucket = bucket_alloc(zone->uz_count, bflags); 2114 ZONE_LOCK(zone); 2115 } 2116 2117 if (bucket == NULL) 2118 return (0); 2119 2120#ifdef SMP 2121 /* 2122 * This code is here to limit the number of simultaneous bucket fills 2123 * for any given zone to the number of per cpu caches in this zone. This 2124 * is done so that we don't allocate more memory than we really need. 2125 */ 2126 if (zone->uz_fills >= mp_ncpus) 2127 goto done; 2128 2129#endif 2130 zone->uz_fills++; 2131 2132 max = MIN(bucket->ub_entries, zone->uz_count); 2133 /* Try to keep the buckets totally full */ 2134 saved = bucket->ub_cnt; 2135 while (bucket->ub_cnt < max && 2136 (slab = uma_zone_slab(zone, flags)) != NULL) { 2137 while (slab->us_freecount && bucket->ub_cnt < max) { 2138 bucket->ub_bucket[bucket->ub_cnt++] = 2139 uma_slab_alloc(zone, slab); 2140 } 2141 2142 /* Don't block on the next fill */ 2143 flags |= M_NOWAIT; 2144 } 2145 2146 /* 2147 * We unlock here because we need to call the zone's init. 2148 * It should be safe to unlock because the slab dealt with 2149 * above is already on the appropriate list within the keg 2150 * and the bucket we filled is not yet on any list, so we 2151 * own it. 2152 */ 2153 if (zone->uz_init != NULL) { 2154 int i; 2155 2156 ZONE_UNLOCK(zone); 2157 for (i = saved; i < bucket->ub_cnt; i++) 2158 if (zone->uz_init(bucket->ub_bucket[i], 2159 zone->uz_keg->uk_size, origflags) != 0) 2160 break; 2161 /* 2162 * If we couldn't initialize the whole bucket, put the 2163 * rest back onto the freelist. 2164 */ 2165 if (i != bucket->ub_cnt) { 2166 int j; 2167 2168 for (j = i; j < bucket->ub_cnt; j++) { 2169 uma_zfree_internal(zone, bucket->ub_bucket[j], 2170 NULL, SKIP_FINI, 0); 2171#ifdef INVARIANTS 2172 bucket->ub_bucket[j] = NULL; 2173#endif 2174 } 2175 bucket->ub_cnt = i; 2176 } 2177 ZONE_LOCK(zone); 2178 } 2179 2180 zone->uz_fills--; 2181 if (bucket->ub_cnt != 0) { 2182 LIST_INSERT_HEAD(&zone->uz_full_bucket, 2183 bucket, ub_link); 2184 return (1); 2185 } 2186#ifdef SMP 2187done: 2188#endif 2189 bucket_free(bucket); 2190 2191 return (0); 2192} 2193/* 2194 * Allocates an item for an internal zone 2195 * 2196 * Arguments 2197 * zone The zone to alloc for. 2198 * udata The data to be passed to the constructor. 2199 * flags M_WAITOK, M_NOWAIT, M_ZERO. 2200 * 2201 * Returns 2202 * NULL if there is no memory and M_NOWAIT is set 2203 * An item if successful 2204 */ 2205 2206static void * 2207uma_zalloc_internal(uma_zone_t zone, void *udata, int flags) 2208{ 2209 uma_keg_t keg; 2210 uma_slab_t slab; 2211 void *item; 2212 2213 item = NULL; 2214 keg = zone->uz_keg; 2215 2216#ifdef UMA_DEBUG_ALLOC 2217 printf("INTERNAL: Allocating one item from %s(%p)\n", zone->uz_name, zone); 2218#endif 2219 ZONE_LOCK(zone); 2220 2221 slab = uma_zone_slab(zone, flags); 2222 if (slab == NULL) { 2223 zone->uz_fails++; 2224 ZONE_UNLOCK(zone); 2225 return (NULL); 2226 } 2227 2228 item = uma_slab_alloc(zone, slab); 2229 2230 zone->uz_allocs++; 2231 2232 ZONE_UNLOCK(zone); 2233 2234 /* 2235 * We have to call both the zone's init (not the keg's init) 2236 * and the zone's ctor. This is because the item is going from 2237 * a keg slab directly to the user, and the user is expecting it 2238 * to be both zone-init'd as well as zone-ctor'd. 2239 */ 2240 if (zone->uz_init != NULL) { 2241 if (zone->uz_init(item, keg->uk_size, flags) != 0) { 2242 uma_zfree_internal(zone, item, udata, SKIP_FINI, 2243 ZFREE_STATFAIL | ZFREE_STATFREE); 2244 return (NULL); 2245 } 2246 } 2247 if (zone->uz_ctor != NULL) { 2248 if (zone->uz_ctor(item, keg->uk_size, udata, flags) != 0) { 2249 uma_zfree_internal(zone, item, udata, SKIP_DTOR, 2250 ZFREE_STATFAIL | ZFREE_STATFREE); 2251 return (NULL); 2252 } 2253 } 2254 if (flags & M_ZERO) 2255 bzero(item, keg->uk_size); 2256 2257 return (item); 2258} 2259 2260/* See uma.h */ 2261void 2262uma_zfree_arg(uma_zone_t zone, void *item, void *udata) 2263{ 2264 uma_keg_t keg; 2265 uma_cache_t cache; 2266 uma_bucket_t bucket; 2267 int bflags; 2268 int cpu; 2269 2270 keg = zone->uz_keg; 2271 2272#ifdef UMA_DEBUG_ALLOC_1 2273 printf("Freeing item %p to %s(%p)\n", item, zone->uz_name, zone); 2274#endif 2275 CTR2(KTR_UMA, "uma_zfree_arg thread %x zone %s", curthread, 2276 zone->uz_name); 2277 2278 if (zone->uz_dtor) 2279 zone->uz_dtor(item, keg->uk_size, udata); 2280#ifdef INVARIANTS 2281 ZONE_LOCK(zone); 2282 if (keg->uk_flags & UMA_ZONE_MALLOC) 2283 uma_dbg_free(zone, udata, item); 2284 else 2285 uma_dbg_free(zone, NULL, item); 2286 ZONE_UNLOCK(zone); 2287#endif 2288 /* 2289 * The race here is acceptable. If we miss it we'll just have to wait 2290 * a little longer for the limits to be reset. 2291 */ 2292 if (keg->uk_flags & UMA_ZFLAG_FULL) 2293 goto zfree_internal; 2294 2295 /* 2296 * If possible, free to the per-CPU cache. There are two 2297 * requirements for safe access to the per-CPU cache: (1) the thread 2298 * accessing the cache must not be preempted or yield during access, 2299 * and (2) the thread must not migrate CPUs without switching which 2300 * cache it accesses. We rely on a critical section to prevent 2301 * preemption and migration. We release the critical section in 2302 * order to acquire the zone mutex if we are unable to free to the 2303 * current cache; when we re-acquire the critical section, we must 2304 * detect and handle migration if it has occurred. 2305 */ 2306zfree_restart: 2307 critical_enter(); 2308 cpu = curcpu; 2309 cache = &zone->uz_cpu[cpu]; 2310 2311zfree_start: 2312 bucket = cache->uc_freebucket; 2313 2314 if (bucket) { 2315 /* 2316 * Do we have room in our bucket? It is OK for this uz count 2317 * check to be slightly out of sync. 2318 */ 2319 2320 if (bucket->ub_cnt < bucket->ub_entries) { 2321 KASSERT(bucket->ub_bucket[bucket->ub_cnt] == NULL, 2322 ("uma_zfree: Freeing to non free bucket index.")); 2323 bucket->ub_bucket[bucket->ub_cnt] = item; 2324 bucket->ub_cnt++; 2325 cache->uc_frees++; 2326 critical_exit(); 2327 return; 2328 } else if (cache->uc_allocbucket) { 2329#ifdef UMA_DEBUG_ALLOC 2330 printf("uma_zfree: Swapping buckets.\n"); 2331#endif 2332 /* 2333 * We have run out of space in our freebucket. 2334 * See if we can switch with our alloc bucket. 2335 */ 2336 if (cache->uc_allocbucket->ub_cnt < 2337 cache->uc_freebucket->ub_cnt) { 2338 bucket = cache->uc_freebucket; 2339 cache->uc_freebucket = cache->uc_allocbucket; 2340 cache->uc_allocbucket = bucket; 2341 goto zfree_start; 2342 } 2343 } 2344 } 2345 /* 2346 * We can get here for two reasons: 2347 * 2348 * 1) The buckets are NULL 2349 * 2) The alloc and free buckets are both somewhat full. 2350 * 2351 * We must go back the zone, which requires acquiring the zone lock, 2352 * which in turn means we must release and re-acquire the critical 2353 * section. Since the critical section is released, we may be 2354 * preempted or migrate. As such, make sure not to maintain any 2355 * thread-local state specific to the cache from prior to releasing 2356 * the critical section. 2357 */ 2358 critical_exit(); 2359 ZONE_LOCK(zone); 2360 critical_enter(); 2361 cpu = curcpu; 2362 cache = &zone->uz_cpu[cpu]; 2363 if (cache->uc_freebucket != NULL) { 2364 if (cache->uc_freebucket->ub_cnt < 2365 cache->uc_freebucket->ub_entries) { 2366 ZONE_UNLOCK(zone); 2367 goto zfree_start; 2368 } 2369 if (cache->uc_allocbucket != NULL && 2370 (cache->uc_allocbucket->ub_cnt < 2371 cache->uc_freebucket->ub_cnt)) { 2372 ZONE_UNLOCK(zone); 2373 goto zfree_start; 2374 } 2375 } 2376 2377 /* Since we have locked the zone we may as well send back our stats */ 2378 zone->uz_allocs += cache->uc_allocs; 2379 cache->uc_allocs = 0; 2380 zone->uz_frees += cache->uc_frees; 2381 cache->uc_frees = 0; 2382 2383 bucket = cache->uc_freebucket; 2384 cache->uc_freebucket = NULL; 2385 2386 /* Can we throw this on the zone full list? */ 2387 if (bucket != NULL) { 2388#ifdef UMA_DEBUG_ALLOC 2389 printf("uma_zfree: Putting old bucket on the free list.\n"); 2390#endif 2391 /* ub_cnt is pointing to the last free item */ 2392 KASSERT(bucket->ub_cnt != 0, 2393 ("uma_zfree: Attempting to insert an empty bucket onto the full list.\n")); 2394 LIST_INSERT_HEAD(&zone->uz_full_bucket, 2395 bucket, ub_link); 2396 } 2397 if ((bucket = LIST_FIRST(&zone->uz_free_bucket)) != NULL) { 2398 LIST_REMOVE(bucket, ub_link); 2399 ZONE_UNLOCK(zone); 2400 cache->uc_freebucket = bucket; 2401 goto zfree_start; 2402 } 2403 /* We are no longer associated with this CPU. */ 2404 critical_exit(); 2405 2406 /* And the zone.. */ 2407 ZONE_UNLOCK(zone); 2408 2409#ifdef UMA_DEBUG_ALLOC 2410 printf("uma_zfree: Allocating new free bucket.\n"); 2411#endif 2412 bflags = M_NOWAIT; 2413 2414 if (keg->uk_flags & UMA_ZFLAG_CACHEONLY) 2415 bflags |= M_NOVM; 2416 bucket = bucket_alloc(zone->uz_count, bflags); 2417 if (bucket) { 2418 ZONE_LOCK(zone); 2419 LIST_INSERT_HEAD(&zone->uz_free_bucket, 2420 bucket, ub_link); 2421 ZONE_UNLOCK(zone); 2422 goto zfree_restart; 2423 } 2424 2425 /* 2426 * If nothing else caught this, we'll just do an internal free. 2427 */ 2428zfree_internal: 2429 uma_zfree_internal(zone, item, udata, SKIP_DTOR, ZFREE_STATFAIL | 2430 ZFREE_STATFREE); 2431 2432 return; 2433} 2434 2435/* 2436 * Frees an item to an INTERNAL zone or allocates a free bucket 2437 * 2438 * Arguments: 2439 * zone The zone to free to 2440 * item The item we're freeing 2441 * udata User supplied data for the dtor 2442 * skip Skip dtors and finis 2443 */ 2444static void 2445uma_zfree_internal(uma_zone_t zone, void *item, void *udata, 2446 enum zfreeskip skip, int flags) 2447{ 2448 uma_slab_t slab; 2449 uma_slabrefcnt_t slabref; 2450 uma_keg_t keg; 2451 u_int8_t *mem; 2452 u_int8_t freei; 2453 2454 keg = zone->uz_keg; 2455 2456 if (skip < SKIP_DTOR && zone->uz_dtor) 2457 zone->uz_dtor(item, keg->uk_size, udata); 2458 if (skip < SKIP_FINI && zone->uz_fini) 2459 zone->uz_fini(item, keg->uk_size); 2460 2461 ZONE_LOCK(zone); 2462 2463 if (flags & ZFREE_STATFAIL) 2464 zone->uz_fails++; 2465 if (flags & ZFREE_STATFREE) 2466 zone->uz_frees++; 2467 2468 if (!(keg->uk_flags & UMA_ZONE_MALLOC)) { 2469 mem = (u_int8_t *)((unsigned long)item & (~UMA_SLAB_MASK)); 2470 if (keg->uk_flags & UMA_ZONE_HASH) 2471 slab = hash_sfind(&keg->uk_hash, mem); 2472 else { 2473 mem += keg->uk_pgoff; 2474 slab = (uma_slab_t)mem; 2475 } 2476 } else { 2477 slab = (uma_slab_t)udata; 2478 } 2479 2480 /* Do we need to remove from any lists? */ 2481 if (slab->us_freecount+1 == keg->uk_ipers) { 2482 LIST_REMOVE(slab, us_link); 2483 LIST_INSERT_HEAD(&keg->uk_free_slab, slab, us_link); 2484 } else if (slab->us_freecount == 0) { 2485 LIST_REMOVE(slab, us_link); 2486 LIST_INSERT_HEAD(&keg->uk_part_slab, slab, us_link); 2487 } 2488 2489 /* Slab management stuff */ 2490 freei = ((unsigned long)item - (unsigned long)slab->us_data) 2491 / keg->uk_rsize; 2492 2493#ifdef INVARIANTS 2494 if (!skip) 2495 uma_dbg_free(zone, slab, item); 2496#endif 2497 2498 if (keg->uk_flags & UMA_ZONE_REFCNT) { 2499 slabref = (uma_slabrefcnt_t)slab; 2500 slabref->us_freelist[freei].us_item = slab->us_firstfree; 2501 } else { 2502 slab->us_freelist[freei].us_item = slab->us_firstfree; 2503 } 2504 slab->us_firstfree = freei; 2505 slab->us_freecount++; 2506 2507 /* Zone statistics */ 2508 keg->uk_free++; 2509 2510 if (keg->uk_flags & UMA_ZFLAG_FULL) { 2511 if (keg->uk_pages < keg->uk_maxpages) 2512 keg->uk_flags &= ~UMA_ZFLAG_FULL; 2513 2514 /* We can handle one more allocation */ 2515 wakeup_one(keg); 2516 } 2517 2518 ZONE_UNLOCK(zone); 2519} 2520 2521/* See uma.h */ 2522void 2523uma_zone_set_max(uma_zone_t zone, int nitems) 2524{ 2525 uma_keg_t keg; 2526 2527 keg = zone->uz_keg; 2528 ZONE_LOCK(zone); 2529 if (keg->uk_ppera > 1) 2530 keg->uk_maxpages = nitems * keg->uk_ppera; 2531 else 2532 keg->uk_maxpages = nitems / keg->uk_ipers; 2533 2534 if (keg->uk_maxpages * keg->uk_ipers < nitems) 2535 keg->uk_maxpages++; 2536 2537 ZONE_UNLOCK(zone); 2538} 2539 2540/* See uma.h */ 2541void 2542uma_zone_set_init(uma_zone_t zone, uma_init uminit) 2543{ 2544 ZONE_LOCK(zone); 2545 KASSERT(zone->uz_keg->uk_pages == 0, 2546 ("uma_zone_set_init on non-empty keg")); 2547 zone->uz_keg->uk_init = uminit; 2548 ZONE_UNLOCK(zone); 2549} 2550 2551/* See uma.h */ 2552void 2553uma_zone_set_fini(uma_zone_t zone, uma_fini fini) 2554{ 2555 ZONE_LOCK(zone); 2556 KASSERT(zone->uz_keg->uk_pages == 0, 2557 ("uma_zone_set_fini on non-empty keg")); 2558 zone->uz_keg->uk_fini = fini; 2559 ZONE_UNLOCK(zone); 2560} 2561 2562/* See uma.h */ 2563void 2564uma_zone_set_zinit(uma_zone_t zone, uma_init zinit) 2565{ 2566 ZONE_LOCK(zone); 2567 KASSERT(zone->uz_keg->uk_pages == 0, 2568 ("uma_zone_set_zinit on non-empty keg")); 2569 zone->uz_init = zinit; 2570 ZONE_UNLOCK(zone); 2571} 2572 2573/* See uma.h */ 2574void 2575uma_zone_set_zfini(uma_zone_t zone, uma_fini zfini) 2576{ 2577 ZONE_LOCK(zone); 2578 KASSERT(zone->uz_keg->uk_pages == 0, 2579 ("uma_zone_set_zfini on non-empty keg")); 2580 zone->uz_fini = zfini; 2581 ZONE_UNLOCK(zone); 2582} 2583 2584/* See uma.h */ 2585/* XXX uk_freef is not actually used with the zone locked */ 2586void 2587uma_zone_set_freef(uma_zone_t zone, uma_free freef) 2588{ 2589 ZONE_LOCK(zone); 2590 zone->uz_keg->uk_freef = freef; 2591 ZONE_UNLOCK(zone); 2592} 2593 2594/* See uma.h */ 2595/* XXX uk_allocf is not actually used with the zone locked */ 2596void 2597uma_zone_set_allocf(uma_zone_t zone, uma_alloc allocf) 2598{ 2599 ZONE_LOCK(zone); 2600 zone->uz_keg->uk_flags |= UMA_ZFLAG_PRIVALLOC; 2601 zone->uz_keg->uk_allocf = allocf; 2602 ZONE_UNLOCK(zone); 2603} 2604 2605/* See uma.h */ 2606int 2607uma_zone_set_obj(uma_zone_t zone, struct vm_object *obj, int count) 2608{ 2609 uma_keg_t keg; 2610 vm_offset_t kva; 2611 int pages; 2612 2613 keg = zone->uz_keg; 2614 pages = count / keg->uk_ipers; 2615 2616 if (pages * keg->uk_ipers < count) 2617 pages++; 2618 2619 kva = kmem_alloc_nofault(kernel_map, pages * UMA_SLAB_SIZE); 2620 2621 if (kva == 0) 2622 return (0); 2623 if (obj == NULL) { 2624 obj = vm_object_allocate(OBJT_DEFAULT, 2625 pages); 2626 } else { 2627 VM_OBJECT_LOCK_INIT(obj, "uma object"); 2628 _vm_object_allocate(OBJT_DEFAULT, 2629 pages, obj); 2630 } 2631 ZONE_LOCK(zone); 2632 keg->uk_kva = kva; 2633 keg->uk_obj = obj; 2634 keg->uk_maxpages = pages; 2635 keg->uk_allocf = obj_alloc; 2636 keg->uk_flags |= UMA_ZONE_NOFREE | UMA_ZFLAG_PRIVALLOC; 2637 ZONE_UNLOCK(zone); 2638 return (1); 2639} 2640 2641/* See uma.h */ 2642void 2643uma_prealloc(uma_zone_t zone, int items) 2644{ 2645 int slabs; 2646 uma_slab_t slab; 2647 uma_keg_t keg; 2648 2649 keg = zone->uz_keg; 2650 ZONE_LOCK(zone); 2651 slabs = items / keg->uk_ipers; 2652 if (slabs * keg->uk_ipers < items) 2653 slabs++; 2654 while (slabs > 0) { 2655 slab = slab_zalloc(zone, M_WAITOK); 2656 LIST_INSERT_HEAD(&keg->uk_free_slab, slab, us_link); 2657 slabs--; 2658 } 2659 ZONE_UNLOCK(zone); 2660} 2661 2662/* See uma.h */ 2663u_int32_t * 2664uma_find_refcnt(uma_zone_t zone, void *item) 2665{ 2666 uma_slabrefcnt_t slabref; 2667 uma_keg_t keg; 2668 u_int32_t *refcnt; 2669 int idx; 2670 2671 keg = zone->uz_keg; 2672 slabref = (uma_slabrefcnt_t)vtoslab((vm_offset_t)item & 2673 (~UMA_SLAB_MASK)); 2674 KASSERT(slabref != NULL && slabref->us_keg->uk_flags & UMA_ZONE_REFCNT, 2675 ("uma_find_refcnt(): zone possibly not UMA_ZONE_REFCNT")); 2676 idx = ((unsigned long)item - (unsigned long)slabref->us_data) 2677 / keg->uk_rsize; 2678 refcnt = &slabref->us_freelist[idx].us_refcnt; 2679 return refcnt; 2680} 2681 2682/* See uma.h */ 2683void 2684uma_reclaim(void) 2685{ 2686#ifdef UMA_DEBUG 2687 printf("UMA: vm asked us to release pages!\n"); 2688#endif 2689 bucket_enable(); 2690 zone_foreach(zone_drain); 2691 /* 2692 * Some slabs may have been freed but this zone will be visited early 2693 * we visit again so that we can free pages that are empty once other 2694 * zones are drained. We have to do the same for buckets. 2695 */ 2696 zone_drain(slabzone); 2697 zone_drain(slabrefzone); 2698 bucket_zone_drain(); 2699} 2700 2701void * 2702uma_large_malloc(int size, int wait) 2703{ 2704 void *mem; 2705 uma_slab_t slab; 2706 u_int8_t flags; 2707 2708 slab = uma_zalloc_internal(slabzone, NULL, wait); 2709 if (slab == NULL) 2710 return (NULL); 2711 mem = page_alloc(NULL, size, &flags, wait); 2712 if (mem) { 2713 vsetslab((vm_offset_t)mem, slab); 2714 slab->us_data = mem; 2715 slab->us_flags = flags | UMA_SLAB_MALLOC; 2716 slab->us_size = size; 2717 } else { 2718 uma_zfree_internal(slabzone, slab, NULL, SKIP_NONE, 2719 ZFREE_STATFAIL | ZFREE_STATFREE); 2720 } 2721 2722 return (mem); 2723} 2724 2725void 2726uma_large_free(uma_slab_t slab) 2727{ 2728 vsetobj((vm_offset_t)slab->us_data, kmem_object); 2729 page_free(slab->us_data, slab->us_size, slab->us_flags); 2730 uma_zfree_internal(slabzone, slab, NULL, SKIP_NONE, ZFREE_STATFREE); 2731} 2732 2733void 2734uma_print_stats(void) 2735{ 2736 zone_foreach(uma_print_zone); 2737} 2738 2739static void 2740slab_print(uma_slab_t slab) 2741{ 2742 printf("slab: keg %p, data %p, freecount %d, firstfree %d\n", 2743 slab->us_keg, slab->us_data, slab->us_freecount, 2744 slab->us_firstfree); 2745} 2746 2747static void 2748cache_print(uma_cache_t cache) 2749{ 2750 printf("alloc: %p(%d), free: %p(%d)\n", 2751 cache->uc_allocbucket, 2752 cache->uc_allocbucket?cache->uc_allocbucket->ub_cnt:0, 2753 cache->uc_freebucket, 2754 cache->uc_freebucket?cache->uc_freebucket->ub_cnt:0); 2755} 2756 2757void 2758uma_print_zone(uma_zone_t zone) 2759{ 2760 uma_cache_t cache; 2761 uma_keg_t keg; 2762 uma_slab_t slab; 2763 int i; 2764 2765 keg = zone->uz_keg; 2766 printf("%s(%p) size %d(%d) flags %d ipers %d ppera %d out %d free %d\n", 2767 zone->uz_name, zone, keg->uk_size, keg->uk_rsize, keg->uk_flags, 2768 keg->uk_ipers, keg->uk_ppera, 2769 (keg->uk_ipers * keg->uk_pages) - keg->uk_free, keg->uk_free); 2770 printf("Part slabs:\n"); 2771 LIST_FOREACH(slab, &keg->uk_part_slab, us_link) 2772 slab_print(slab); 2773 printf("Free slabs:\n"); 2774 LIST_FOREACH(slab, &keg->uk_free_slab, us_link) 2775 slab_print(slab); 2776 printf("Full slabs:\n"); 2777 LIST_FOREACH(slab, &keg->uk_full_slab, us_link) 2778 slab_print(slab); 2779 for (i = 0; i <= mp_maxid; i++) { 2780 if (CPU_ABSENT(i)) 2781 continue; 2782 cache = &zone->uz_cpu[i]; 2783 printf("CPU %d Cache:\n", i); 2784 cache_print(cache); 2785 } 2786} 2787 2788/* 2789 * Generate statistics across both the zone and its per-cpu cache's. Return 2790 * desired statistics if the pointer is non-NULL for that statistic. 2791 * 2792 * Note: does not update the zone statistics, as it can't safely clear the 2793 * per-CPU cache statistic. 2794 * 2795 * XXXRW: Following the uc_allocbucket and uc_freebucket pointers here isn't 2796 * safe from off-CPU; we should modify the caches to track this information 2797 * directly so that we don't have to. 2798 */ 2799static void 2800uma_zone_sumstat(uma_zone_t z, int *cachefreep, u_int64_t *allocsp, 2801 u_int64_t *freesp) 2802{ 2803 uma_cache_t cache; 2804 u_int64_t allocs, frees; 2805 int cachefree, cpu; 2806 2807 allocs = frees = 0; 2808 cachefree = 0; 2809 for (cpu = 0; cpu <= mp_maxid; cpu++) { 2810 if (CPU_ABSENT(cpu)) 2811 continue; 2812 cache = &z->uz_cpu[cpu]; 2813 if (cache->uc_allocbucket != NULL) 2814 cachefree += cache->uc_allocbucket->ub_cnt; 2815 if (cache->uc_freebucket != NULL) 2816 cachefree += cache->uc_freebucket->ub_cnt; 2817 allocs += cache->uc_allocs; 2818 frees += cache->uc_frees; 2819 } 2820 allocs += z->uz_allocs; 2821 frees += z->uz_frees; 2822 if (cachefreep != NULL) 2823 *cachefreep = cachefree; 2824 if (allocsp != NULL) 2825 *allocsp = allocs; 2826 if (freesp != NULL) 2827 *freesp = frees; 2828} 2829 2830/* 2831 * Sysctl handler for vm.zone 2832 * 2833 * stolen from vm_zone.c 2834 */ 2835static int 2836sysctl_vm_zone(SYSCTL_HANDLER_ARGS) 2837{ 2838 int error, len, cnt; 2839 const int linesize = 128; /* conservative */ 2840 int totalfree; 2841 char *tmpbuf, *offset; 2842 uma_zone_t z; 2843 uma_keg_t zk; 2844 char *p; 2845 int cachefree; 2846 uma_bucket_t bucket; 2847 u_int64_t allocs, frees; 2848 2849 cnt = 0; 2850 mtx_lock(&uma_mtx); 2851 LIST_FOREACH(zk, &uma_kegs, uk_link) { 2852 LIST_FOREACH(z, &zk->uk_zones, uz_link) 2853 cnt++; 2854 } 2855 mtx_unlock(&uma_mtx); 2856 MALLOC(tmpbuf, char *, (cnt == 0 ? 1 : cnt) * linesize, 2857 M_TEMP, M_WAITOK); 2858 len = snprintf(tmpbuf, linesize, 2859 "\nITEM SIZE LIMIT USED FREE REQUESTS\n\n"); 2860 if (cnt == 0) 2861 tmpbuf[len - 1] = '\0'; 2862 error = SYSCTL_OUT(req, tmpbuf, cnt == 0 ? len-1 : len); 2863 if (error || cnt == 0) 2864 goto out; 2865 offset = tmpbuf; 2866 mtx_lock(&uma_mtx); 2867 LIST_FOREACH(zk, &uma_kegs, uk_link) { 2868 LIST_FOREACH(z, &zk->uk_zones, uz_link) { 2869 if (cnt == 0) /* list may have changed size */ 2870 break; 2871 ZONE_LOCK(z); 2872 cachefree = 0; 2873 if (!(zk->uk_flags & UMA_ZFLAG_INTERNAL)) { 2874 uma_zone_sumstat(z, &cachefree, &allocs, &frees); 2875 } else { 2876 allocs = z->uz_allocs; 2877 frees = z->uz_frees; 2878 } 2879 2880 LIST_FOREACH(bucket, &z->uz_full_bucket, ub_link) { 2881 cachefree += bucket->ub_cnt; 2882 } 2883 totalfree = zk->uk_free + cachefree; 2884 len = snprintf(offset, linesize, 2885 "%-12.12s %6.6u, %8.8u, %6.6u, %6.6u, %8.8llu\n", 2886 z->uz_name, zk->uk_size, 2887 zk->uk_maxpages * zk->uk_ipers, 2888 (zk->uk_ipers * (zk->uk_pages / zk->uk_ppera)) - totalfree, 2889 totalfree, 2890 (unsigned long long)allocs); 2891 ZONE_UNLOCK(z); 2892 for (p = offset + 12; p > offset && *p == ' '; --p) 2893 /* nothing */ ; 2894 p[1] = ':'; 2895 cnt--; 2896 offset += len; 2897 } 2898 } 2899 mtx_unlock(&uma_mtx); 2900 *offset++ = '\0'; 2901 error = SYSCTL_OUT(req, tmpbuf, offset - tmpbuf); 2902out: 2903 FREE(tmpbuf, M_TEMP); 2904 return (error); 2905} 2906 2907static int 2908sysctl_vm_zone_count(SYSCTL_HANDLER_ARGS) 2909{ 2910 uma_keg_t kz; 2911 uma_zone_t z; 2912 int count; 2913 2914 count = 0; 2915 mtx_lock(&uma_mtx); 2916 LIST_FOREACH(kz, &uma_kegs, uk_link) { 2917 LIST_FOREACH(z, &kz->uk_zones, uz_link) 2918 count++; 2919 } 2920 mtx_unlock(&uma_mtx); 2921 return (sysctl_handle_int(oidp, &count, 0, req)); 2922} 2923 2924static int 2925sysctl_vm_zone_stats(SYSCTL_HANDLER_ARGS) 2926{ 2927 struct uma_stream_header ush; 2928 struct uma_type_header uth; 2929 struct uma_percpu_stat ups; 2930 uma_bucket_t bucket; 2931 struct sbuf sbuf; 2932 uma_cache_t cache; 2933 uma_keg_t kz; 2934 uma_zone_t z; 2935 char *buffer; 2936 int buflen, count, error, i; 2937 2938 mtx_lock(&uma_mtx); 2939restart: 2940 mtx_assert(&uma_mtx, MA_OWNED); 2941 count = 0; 2942 LIST_FOREACH(kz, &uma_kegs, uk_link) { 2943 LIST_FOREACH(z, &kz->uk_zones, uz_link) 2944 count++; 2945 } 2946 mtx_unlock(&uma_mtx); 2947 2948 buflen = sizeof(ush) + count * (sizeof(uth) + sizeof(ups) * 2949 (mp_maxid + 1)) + 1; 2950 buffer = malloc(buflen, M_TEMP, M_WAITOK | M_ZERO); 2951 2952 mtx_lock(&uma_mtx); 2953 i = 0; 2954 LIST_FOREACH(kz, &uma_kegs, uk_link) { 2955 LIST_FOREACH(z, &kz->uk_zones, uz_link) 2956 i++; 2957 } 2958 if (i > count) { 2959 free(buffer, M_TEMP); 2960 goto restart; 2961 } 2962 count = i; 2963 2964 sbuf_new(&sbuf, buffer, buflen, SBUF_FIXEDLEN); 2965 2966 /* 2967 * Insert stream header. 2968 */ 2969 bzero(&ush, sizeof(ush)); 2970 ush.ush_version = UMA_STREAM_VERSION; 2971 ush.ush_maxcpus = (mp_maxid + 1); 2972 ush.ush_count = count; 2973 if (sbuf_bcat(&sbuf, &ush, sizeof(ush)) < 0) { 2974 mtx_unlock(&uma_mtx); 2975 error = ENOMEM; 2976 goto out; 2977 } 2978 2979 LIST_FOREACH(kz, &uma_kegs, uk_link) { 2980 LIST_FOREACH(z, &kz->uk_zones, uz_link) { 2981 bzero(&uth, sizeof(uth)); 2982 ZONE_LOCK(z);
|