Cross Reference: /freebsd-11.0-release/sys/vm/uma

Deleted Added

sdiff udiff text old ( 103531 ) new ( 103623 )

full compact

uma_core.c (103531)	uma_core.c (103623)
1/*	1/*
2 * Copyright (c) 2002, Jeffrey Roberson <jroberson@chesapeake.net>	2 * Copyright (c) 2002, Jeffrey Roberson <jeff@freebsd.org>
3 * All rights reserved. 4 * 5 * Redistribution and use in source and binary forms, with or without 6 * modification, are permitted provided that the following conditions 7 * are met: 8 * 1. Redistributions of source code must retain the above copyright 9 * notice unmodified, this list of conditions, and the following 10 * disclaimer. 11 * 2. Redistributions in binary form must reproduce the above copyright 12 * notice, this list of conditions and the following disclaimer in the 13 * documentation and/or other materials provided with the distribution. 14 * 15 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR 16 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES 17 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. 18 * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, 19 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT 20 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 21 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 22 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 23 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF 24 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 25 *	3 * All rights reserved. 4 * 5 * Redistribution and use in source and binary forms, with or without 6 * modification, are permitted provided that the following conditions 7 * are met: 8 * 1. Redistributions of source code must retain the above copyright 9 * notice unmodified, this list of conditions, and the following 10 * disclaimer. 11 * 2. Redistributions in binary form must reproduce the above copyright 12 * notice, this list of conditions and the following disclaimer in the 13 * documentation and/or other materials provided with the distribution. 14 * 15 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR 16 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES 17 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. 18 * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, 19 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT 20 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 21 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 22 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 23 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF 24 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 25 *
26 * $FreeBSD: head/sys/vm/uma_core.c 103531 2002-09-18 08:26:30Z jeff $	26 * $FreeBSD: head/sys/vm/uma_core.c 103623 2002-09-19 06:05:32Z jeff $
27 * 28 / 29 30/ 31 * uma_core.c Implementation of the Universal Memory allocator 32 * 33 * This allocator is intended to replace the multitude of similar object caches 34 * in the standard FreeBSD kernel. The intent is to be flexible as well as 35 * effecient. A primary design goal is to return unused memory to the rest of 36 * the system. This will make the system as a whole more flexible due to the 37 * ability to move memory to subsystems which most need it instead of leaving 38 * pools of reserved memory unused. 39 * 40 * The basic ideas stem from similar slab/zone based allocators whose algorithms 41 * are well known. 42 * 43 / 44 45/ 46 * TODO: 47 * - Improve memory usage for large allocations 48 * - Investigate cache size adjustments 49 / 50 51/ I should really use ktr.. / 52/ 53#define UMA_DEBUG 1 54#define UMA_DEBUG_ALLOC 1 55#define UMA_DEBUG_ALLOC_1 1 56/ 57 58 59#include "opt_param.h" 60#include <sys/param.h> 61#include <sys/systm.h> 62#include <sys/kernel.h> 63#include <sys/types.h> 64#include <sys/queue.h> 65#include <sys/malloc.h> 66#include <sys/lock.h> 67#include <sys/sysctl.h> 68#include <sys/mutex.h> 69#include <sys/proc.h> 70#include <sys/smp.h> 71#include <sys/vmmeter.h> 72 73#include <vm/vm.h> 74#include <vm/vm_object.h> 75#include <vm/vm_page.h> 76#include <vm/vm_param.h> 77#include <vm/vm_map.h> 78#include <vm/vm_kern.h> 79#include <vm/vm_extern.h> 80#include <vm/uma.h> 81#include <vm/uma_int.h> 82#include <vm/uma_dbg.h> 83 84/ 85 * This is the zone from which all zones are spawned. The idea is that even 86 * the zone heads are allocated from the allocator, so we use the bss section 87 * to bootstrap us. 88 / 89static struct uma_zone masterzone; 90static uma_zone_t zones = &masterzone; 91 92/ This is the zone from which all of uma_slab_t's are allocated. / 93static uma_zone_t slabzone; 94 95/ 96 * The initial hash tables come out of this zone so they can be allocated 97 * prior to malloc coming up. 98 / 99static uma_zone_t hashzone; 100* 101/* 102 * Zone that buckets come from. 103 / 104static uma_zone_t bucketzone; 105* 106/* 107 * Are we allowed to allocate buckets? 108 / 109static int bucketdisable = 1; 110* 111/* Linked list of all zones in the system / 112static LIST_HEAD(,uma_zone) uma_zones = LIST_HEAD_INITIALIZER(&uma_zones); 113* 114/* This mutex protects the zone list / 115static struct mtx uma_mtx; 116* 117/* Linked list of boot time pages / 118static LIST_HEAD(,uma_slab) uma_boot_pages = 119* LIST_HEAD_INITIALIZER(&uma_boot_pages); 120 121/* Count of free boottime pages / 122static int uma_boot_free = 0; 123* 124/* Is the VM done starting up? / 125static int booted = 0; 126* 127/* This is the handle used to schedule our working set calculator / 128static struct callout uma_callout; 129* 130/* This is mp_maxid + 1, for use while looping over each cpu / 131static int maxcpu; 132* 133/* 134 * This structure is passed as the zone ctor arg so that I don't have to create 135 * a special allocation function just for zones. 136 / 137struct uma_zctor_args { 138* char name; 139* size_t size; 140 uma_ctor ctor; 141 uma_dtor dtor; 142 uma_init uminit; 143 uma_fini fini; 144 int align; 145 u_int16_t flags; 146}; 147 148/* Prototypes.. / 149* 150static void obj_alloc(uma_zone_t, int, u_int8_t , int); 151static void page_alloc(uma_zone_t, int, u_int8_t , int); 152static void page_free(void , int, u_int8_t); 153static uma_slab_t slab_zalloc(uma_zone_t, int); 154static void cache_drain(uma_zone_t); 155static void bucket_drain(uma_zone_t, uma_bucket_t); 156static void zone_drain(uma_zone_t); 157static void zone_ctor(void , int, void ); 158static void zone_dtor(void , int, void ); 159static void zero_init(void , int); 160static void zone_small_init(uma_zone_t zone); 161static void zone_large_init(uma_zone_t zone); 162static void zone_foreach(void (zfunc)(uma_zone_t)); 163static void zone_timeout(uma_zone_t zone); 164static int hash_alloc(struct uma_hash ); 165static int hash_expand(struct uma_hash , struct uma_hash ); 166static void hash_free(struct uma_hash hash); 167static void uma_timeout(void ); 168static void uma_startup3(void); 169static void uma_zalloc_internal(uma_zone_t, void , int, uma_bucket_t); 170static void uma_zfree_internal(uma_zone_t, void , void , int); 171static void bucket_enable(void); 172void uma_print_zone(uma_zone_t); 173void uma_print_stats(void); 174static int sysctl_vm_zone(SYSCTL_HANDLER_ARGS); 175 176SYSCTL_OID(_vm, OID_AUTO, zone, CTLTYPE_STRING\|CTLFLAG_RD, 177 NULL, 0, sysctl_vm_zone, "A", "Zone Info"); 178SYSINIT(uma_startup3, SI_SUB_VM_CONF, SI_ORDER_SECOND, uma_startup3, NULL); 179 180/* 181 * This routine checks to see whether or not it's safe to enable buckets. 182 / 183* 184static void 185bucket_enable(void) 186{ 187 if (cnt.v_free_count < cnt.v_free_min) 188 bucketdisable = 1; 189 else 190 bucketdisable = 0; 191} 192 193 194/* 195 * Routine called by timeout which is used to fire off some time interval 196 * based calculations. (working set, stats, etc.) 197 * 198 * Arguments: 199 * arg Unused 200 * 201 * Returns: 202 * Nothing 203 / 204static void 205uma_timeout(void unused) 206{ 207 bucket_enable(); 208 zone_foreach(zone_timeout); 209 210 /* Reschedule this event / 211* callout_reset(&uma_callout, UMA_WORKING_TIME * hz, uma_timeout, NULL); 212} 213 214/* 215 * Routine to perform timeout driven calculations. This does the working set 216 * as well as hash expanding, and per cpu statistics aggregation. 217 * 218 * Arguments: 219 * zone The zone to operate on 220 * 221 * Returns: 222 * Nothing 223 / 224static void 225zone_timeout(uma_zone_t zone) 226{ 227* uma_cache_t cache; 228 u_int64_t alloc; 229 int free; 230 int cpu; 231 232 alloc = 0; 233 free = 0; 234 235 /* 236 * Aggregate per cpu cache statistics back to the zone. 237 * 238 * I may rewrite this to set a flag in the per cpu cache instead of 239 * locking. If the flag is not cleared on the next round I will have 240 * to lock and do it here instead so that the statistics don't get too 241 * far out of sync. 242 / 243* if (!(zone->uz_flags & UMA_ZFLAG_INTERNAL)) { 244 for (cpu = 0; cpu < maxcpu; cpu++) { 245 if (CPU_ABSENT(cpu)) 246 continue; 247 CPU_LOCK(zone, cpu); 248 cache = &zone->uz_cpu[cpu]; 249 /* Add them up, and reset / 250* alloc += cache->uc_allocs; 251 cache->uc_allocs = 0; 252 if (cache->uc_allocbucket) 253 free += cache->uc_allocbucket->ub_ptr + 1; 254 if (cache->uc_freebucket) 255 free += cache->uc_freebucket->ub_ptr + 1; 256 CPU_UNLOCK(zone, cpu); 257 } 258 } 259 260 /* Now push these stats back into the zone.. / 261* ZONE_LOCK(zone); 262 zone->uz_allocs += alloc; 263 264 /* 265 * cachefree is an instantanious snapshot of what is in the per cpu 266 * caches, not an accurate counter 267 / 268* zone->uz_cachefree = free; 269 270 /* 271 * Expand the zone hash table. 272 * 273 * This is done if the number of slabs is larger than the hash size. 274 * What I'm trying to do here is completely reduce collisions. This 275 * may be a little aggressive. Should I allow for two collisions max? 276 / 277* 278 if (zone->uz_flags & UMA_ZFLAG_HASH && 279 zone->uz_pages / zone->uz_ppera >= zone->uz_hash.uh_hashsize) { 280 struct uma_hash newhash; 281 struct uma_hash oldhash; 282 int ret; 283 284 /* 285 * This is so involved because allocating and freeing 286 * while the zone lock is held will lead to deadlock. 287 * I have to do everything in stages and check for 288 * races. 289 / 290* newhash = zone->uz_hash; 291 ZONE_UNLOCK(zone); 292 ret = hash_alloc(&newhash); 293 ZONE_LOCK(zone); 294 if (ret) { 295 if (hash_expand(&zone->uz_hash, &newhash)) { 296 oldhash = zone->uz_hash; 297 zone->uz_hash = newhash; 298 } else 299 oldhash = newhash; 300 301 ZONE_UNLOCK(zone); 302 hash_free(&oldhash); 303 ZONE_LOCK(zone); 304 } 305 } 306 307 /* 308 * Here we compute the working set size as the total number of items 309 * left outstanding since the last time interval. This is slightly 310 * suboptimal. What we really want is the highest number of outstanding 311 * items during the last time quantum. This should be close enough. 312 * 313 * The working set size is used to throttle the zone_drain function. 314 * We don't want to return memory that we may need again immediately. 315 / 316* alloc = zone->uz_allocs - zone->uz_oallocs; 317 zone->uz_oallocs = zone->uz_allocs; 318 zone->uz_wssize = alloc; 319 320 ZONE_UNLOCK(zone); 321} 322 323/* 324 * Allocate and zero fill the next sized hash table from the appropriate 325 * backing store. 326 * 327 * Arguments: 328 * hash A new hash structure with the old hash size in uh_hashsize 329 * 330 * Returns: 331 * 1 on sucess and 0 on failure. 332 / 333int 334hash_alloc(struct uma_hash hash) 335{ 336 int oldsize; 337 int alloc; 338 339 oldsize = hash->uh_hashsize; 340 341 /* We're just going to go to a power of two greater / 342* if (oldsize) { 343 hash->uh_hashsize = oldsize * 2; 344 alloc = sizeof(hash->uh_slab_hash[0]) * hash->uh_hashsize; 345 /* XXX Shouldn't be abusing DEVBUF here / 346* hash->uh_slab_hash = (struct slabhead )malloc(alloc, 347* M_DEVBUF, M_NOWAIT); 348 } else { 349 alloc = sizeof(hash->uh_slab_hash[0]) * UMA_HASH_SIZE_INIT; 350 hash->uh_slab_hash = uma_zalloc_internal(hashzone, NULL, 351 M_WAITOK, NULL); 352 hash->uh_hashsize = UMA_HASH_SIZE_INIT; 353 } 354 if (hash->uh_slab_hash) { 355 bzero(hash->uh_slab_hash, alloc); 356 hash->uh_hashmask = hash->uh_hashsize - 1; 357 return (1); 358 } 359 360 return (0); 361} 362 363/* 364 * Expands the hash table for OFFPAGE zones. This is done from zone_timeout 365 * to reduce collisions. This must not be done in the regular allocation path, 366 * otherwise, we can recurse on the vm while allocating pages. 367 * 368 * Arguments: 369 * oldhash The hash you want to expand 370 * newhash The hash structure for the new table 371 * 372 * Returns: 373 * Nothing 374 * 375 * Discussion: 376 / 377static int 378hash_expand(struct uma_hash oldhash, struct uma_hash newhash) 379{ 380* uma_slab_t slab; 381 int hval; 382 int i; 383 384 if (!newhash->uh_slab_hash) 385 return (0); 386 387 if (oldhash->uh_hashsize >= newhash->uh_hashsize) 388 return (0); 389 390 /* 391 * I need to investigate hash algorithms for resizing without a 392 * full rehash. 393 / 394* 395 for (i = 0; i < oldhash->uh_hashsize; i++) 396 while (!SLIST_EMPTY(&oldhash->uh_slab_hash[i])) { 397 slab = SLIST_FIRST(&oldhash->uh_slab_hash[i]); 398 SLIST_REMOVE_HEAD(&oldhash->uh_slab_hash[i], us_hlink); 399 hval = UMA_HASH(newhash, slab->us_data); 400 SLIST_INSERT_HEAD(&newhash->uh_slab_hash[hval], 401 slab, us_hlink); 402 } 403 404 return (1); 405} 406 407/* 408 * Free the hash bucket to the appropriate backing store. 409 * 410 * Arguments: 411 * slab_hash The hash bucket we're freeing 412 * hashsize The number of entries in that hash bucket 413 * 414 * Returns: 415 * Nothing 416 / 417static void 418hash_free(struct uma_hash hash) 419{ 420 if (hash->uh_slab_hash == NULL) 421 return; 422 if (hash->uh_hashsize == UMA_HASH_SIZE_INIT) 423 uma_zfree_internal(hashzone, 424 hash->uh_slab_hash, NULL, 0); 425 else 426 free(hash->uh_slab_hash, M_DEVBUF); 427} 428 429/* 430 * Frees all outstanding items in a bucket 431 * 432 * Arguments: 433 * zone The zone to free to, must be unlocked. 434 * bucket The free/alloc bucket with items, cpu queue must be locked. 435 * 436 * Returns: 437 * Nothing 438 / 439* 440static void 441bucket_drain(uma_zone_t zone, uma_bucket_t bucket) 442{ 443 uma_slab_t slab; 444 int mzone; 445 void item; 446* 447 if (bucket == NULL) 448 return; 449 450 slab = NULL; 451 mzone = 0; 452 453 /* We have to lookup the slab again for malloc.. / 454* if (zone->uz_flags & UMA_ZFLAG_MALLOC) 455 mzone = 1; 456 457 while (bucket->ub_ptr > -1) { 458 item = bucket->ub_bucket[bucket->ub_ptr]; 459#ifdef INVARIANTS 460 bucket->ub_bucket[bucket->ub_ptr] = NULL; 461 KASSERT(item != NULL, 462 ("bucket_drain: botched ptr, item is NULL")); 463#endif 464 bucket->ub_ptr--; 465 /* 466 * This is extremely inefficient. The slab pointer was passed 467 * to uma_zfree_arg, but we lost it because the buckets don't 468 * hold them. This will go away when free() gets a size passed 469 * to it. 470 / 471* if (mzone) 472 slab = vtoslab((vm_offset_t)item & (~UMA_SLAB_MASK)); 473 uma_zfree_internal(zone, item, slab, 1); 474 } 475} 476 477/* 478 * Drains the per cpu caches for a zone. 479 * 480 * Arguments: 481 * zone The zone to drain, must be unlocked. 482 * 483 * Returns: 484 * Nothing 485 * 486 * This function returns with the zone locked so that the per cpu queues can 487 * not be filled until zone_drain is finished. 488 * 489 / 490static void 491cache_drain(uma_zone_t zone) 492{ 493* uma_bucket_t bucket; 494 uma_cache_t cache; 495 int cpu; 496 497 /* 498 * Flush out the per cpu queues. 499 * 500 * XXX This causes unnecessary thrashing due to immediately having 501 * empty per cpu queues. I need to improve this. 502 / 503* 504 /* 505 * We have to lock each cpu cache before locking the zone 506 / 507* ZONE_UNLOCK(zone); 508 509 for (cpu = 0; cpu < maxcpu; cpu++) { 510 if (CPU_ABSENT(cpu)) 511 continue; 512 CPU_LOCK(zone, cpu); 513 cache = &zone->uz_cpu[cpu]; 514 bucket_drain(zone, cache->uc_allocbucket); 515 bucket_drain(zone, cache->uc_freebucket); 516 } 517 518 /* 519 * Drain the bucket queues and free the buckets, we just keep two per 520 * cpu (alloc/free). 521 / 522* ZONE_LOCK(zone); 523 while ((bucket = LIST_FIRST(&zone->uz_full_bucket)) != NULL) { 524 LIST_REMOVE(bucket, ub_link); 525 ZONE_UNLOCK(zone); 526 bucket_drain(zone, bucket); 527 uma_zfree_internal(bucketzone, bucket, NULL, 0); 528 ZONE_LOCK(zone); 529 } 530 531 /* Now we do the free queue.. / 532* while ((bucket = LIST_FIRST(&zone->uz_free_bucket)) != NULL) { 533 LIST_REMOVE(bucket, ub_link); 534 uma_zfree_internal(bucketzone, bucket, NULL, 0); 535 } 536 537 /* We unlock here, but they will all block until the zone is unlocked / 538* for (cpu = 0; cpu < maxcpu; cpu++) { 539 if (CPU_ABSENT(cpu)) 540 continue; 541 CPU_UNLOCK(zone, cpu); 542 } 543 544 zone->uz_cachefree = 0; 545} 546 547/* 548 * Frees pages from a zone back to the system. This is done on demand from 549 * the pageout daemon. 550 * 551 * Arguments: 552 * zone The zone to free pages from 553 * all Should we drain all items? 554 * 555 * Returns: 556 * Nothing. 557 / 558static void 559zone_drain(uma_zone_t zone) 560{ 561* struct slabhead freeslabs = {}; 562 uma_slab_t slab; 563 uma_slab_t n; 564 u_int64_t extra; 565 u_int8_t flags; 566 u_int8_t mem; 567* int i; 568 569 /* 570 * We don't want to take pages from staticly allocated zones at this 571 * time 572 / 573* if (zone->uz_flags & UMA_ZFLAG_NOFREE \|\| zone->uz_freef == NULL) 574 return; 575 576 ZONE_LOCK(zone); 577 578 if (!(zone->uz_flags & UMA_ZFLAG_INTERNAL)) 579 cache_drain(zone); 580 581 if (zone->uz_free < zone->uz_wssize) 582 goto finished; 583#ifdef UMA_DEBUG 584 printf("%s working set size: %llu free items: %u\n", 585 zone->uz_name, (unsigned long long)zone->uz_wssize, zone->uz_free); 586#endif 587 extra = zone->uz_free - zone->uz_wssize; 588 extra /= zone->uz_ipers; 589 590 /* extra is now the number of extra slabs that we can free / 591* 592 if (extra == 0) 593 goto finished; 594 595 slab = LIST_FIRST(&zone->uz_free_slab); 596 while (slab && extra) { 597 n = LIST_NEXT(slab, us_link); 598 599 /* We have no where to free these to / 600* if (slab->us_flags & UMA_SLAB_BOOT) { 601 slab = n; 602 continue; 603 } 604 605 LIST_REMOVE(slab, us_link); 606 zone->uz_pages -= zone->uz_ppera; 607 zone->uz_free -= zone->uz_ipers; 608 609 if (zone->uz_flags & UMA_ZFLAG_HASH) 610 UMA_HASH_REMOVE(&zone->uz_hash, slab, slab->us_data); 611 612 SLIST_INSERT_HEAD(&freeslabs, slab, us_hlink); 613 614 slab = n; 615 extra--; 616 } 617finished: 618 ZONE_UNLOCK(zone); 619 620 while ((slab = SLIST_FIRST(&freeslabs)) != NULL) { 621 SLIST_REMOVE(&freeslabs, slab, uma_slab, us_hlink); 622 if (zone->uz_fini) 623 for (i = 0; i < zone->uz_ipers; i++) 624 zone->uz_fini( 625 slab->us_data + (zone->uz_rsize * i), 626 zone->uz_size); 627 flags = slab->us_flags; 628 mem = slab->us_data; 629 630 if (zone->uz_flags & UMA_ZFLAG_OFFPAGE) 631 uma_zfree_internal(slabzone, slab, NULL, 0); 632 if (zone->uz_flags & UMA_ZFLAG_MALLOC) 633 for (i = 0; i < zone->uz_ppera; i++) 634 vsetobj((vm_offset_t)mem + (i * PAGE_SIZE), 635 kmem_object); 636#ifdef UMA_DEBUG 637 printf("%s: Returning %d bytes.\n", 638 zone->uz_name, UMA_SLAB_SIZE * zone->uz_ppera); 639#endif 640 zone->uz_freef(mem, UMA_SLAB_SIZE * zone->uz_ppera, flags); 641 } 642 643} 644 645/* 646 * Allocate a new slab for a zone. This does not insert the slab onto a list. 647 * 648 * Arguments: 649 * zone The zone to allocate slabs for 650 * wait Shall we wait? 651 * 652 * Returns: 653 * The slab that was allocated or NULL if there is no memory and the 654 * caller specified M_NOWAIT. 655 * 656 / 657static uma_slab_t 658slab_zalloc(uma_zone_t zone, int wait) 659{ 660* uma_slab_t slab; /* Starting slab / 661* u_int8_t mem; 662* u_int8_t flags; 663 int i; 664 665 slab = NULL; 666 667#ifdef UMA_DEBUG 668 printf("slab_zalloc: Allocating a new slab for %s\n", zone->uz_name); 669#endif 670 ZONE_UNLOCK(zone); 671 672 if (zone->uz_flags & UMA_ZFLAG_OFFPAGE) { 673 slab = uma_zalloc_internal(slabzone, NULL, wait, NULL); 674 if (slab == NULL) { 675 ZONE_LOCK(zone); 676 return NULL; 677 } 678 } 679 680 /* 681 * This reproduces the old vm_zone behavior of zero filling pages the 682 * first time they are added to a zone. 683 * 684 * Malloced items are zeroed in uma_zalloc. 685 / 686* 687 if ((zone->uz_flags & UMA_ZFLAG_MALLOC) == 0) 688 wait \|= M_ZERO; 689 else 690 wait &= ~M_ZERO; 691 692 if (booted \|\| (zone->uz_flags & UMA_ZFLAG_PRIVALLOC)) { 693 mtx_lock(&Giant); 694 mem = zone->uz_allocf(zone, 695 zone->uz_ppera * UMA_SLAB_SIZE, &flags, wait); 696 mtx_unlock(&Giant); 697 if (mem == NULL) { 698 ZONE_LOCK(zone); 699 return (NULL); 700 } 701 } else { 702 uma_slab_t tmps; 703 704 if (zone->uz_ppera > 1) 705 panic("UMA: Attemping to allocate multiple pages before vm has started.\n"); 706 if (zone->uz_flags & UMA_ZFLAG_MALLOC) 707 panic("Mallocing before uma_startup2 has been called.\n"); 708 if (uma_boot_free == 0) 709 panic("UMA: Ran out of pre init pages, increase UMA_BOOT_PAGES\n"); 710 tmps = LIST_FIRST(&uma_boot_pages); 711 LIST_REMOVE(tmps, us_link); 712 uma_boot_free--; 713 mem = tmps->us_data; 714 } 715 716 /* Point the slab into the allocated memory / 717* if (!(zone->uz_flags & UMA_ZFLAG_OFFPAGE)) 718 slab = (uma_slab_t )(mem + zone->uz_pgoff); 719 720 if (zone->uz_flags & UMA_ZFLAG_MALLOC) 721 for (i = 0; i < zone->uz_ppera; i++) 722 vsetslab((vm_offset_t)mem + (i * PAGE_SIZE), slab); 723 724 slab->us_zone = zone; 725 slab->us_data = mem; 726 727 /* 728 * This is intended to spread data out across cache lines. 729 * 730 * This code doesn't seem to work properly on x86, and on alpha 731 * it makes absolutely no performance difference. I'm sure it could 732 * use some tuning, but sun makes outrageous claims about it's 733 * performance. 734 / 735#if 0 736* if (zone->uz_cachemax) { 737 slab->us_data += zone->uz_cacheoff; 738 zone->uz_cacheoff += UMA_CACHE_INC; 739 if (zone->uz_cacheoff > zone->uz_cachemax) 740 zone->uz_cacheoff = 0; 741 } 742#endif 743 744 slab->us_freecount = zone->uz_ipers; 745 slab->us_firstfree = 0; 746 slab->us_flags = flags; 747 for (i = 0; i < zone->uz_ipers; i++) 748 slab->us_freelist[i] = i+1; 749 750 if (zone->uz_init) 751 for (i = 0; i < zone->uz_ipers; i++) 752 zone->uz_init(slab->us_data + (zone->uz_rsize * i), 753 zone->uz_size); 754 ZONE_LOCK(zone); 755 756 if (zone->uz_flags & UMA_ZFLAG_HASH) 757 UMA_HASH_INSERT(&zone->uz_hash, slab, mem); 758 759 zone->uz_pages += zone->uz_ppera; 760 zone->uz_free += zone->uz_ipers; 761 762 763 return (slab); 764} 765 766/* 767 * Allocates a number of pages from the system 768 * 769 * Arguments: 770 * zone Unused 771 * bytes The number of bytes requested 772 * wait Shall we wait? 773 * 774 * Returns: 775 * A pointer to the alloced memory or possibly 776 * NULL if M_NOWAIT is set. 777 / 778static void 779page_alloc(uma_zone_t zone, int bytes, u_int8_t pflag, int wait) 780{ 781* void p; / Returned page / 782* 783 pflag = UMA_SLAB_KMEM; 784* p = (void ) kmem_malloc(kmem_map, bytes, wait); 785* 786 return (p); 787} 788 789/* 790 * Allocates a number of pages from within an object 791 * 792 * Arguments: 793 * zone Unused 794 * bytes The number of bytes requested 795 * wait Shall we wait? 796 * 797 * Returns: 798 * A pointer to the alloced memory or possibly 799 * NULL if M_NOWAIT is set. 800 * 801 * TODO: If we fail during a multi-page allocation release the pages that have 802 * already been allocated. 803 / 804static void 805obj_alloc(uma_zone_t zone, int bytes, u_int8_t flags, int wait) 806{ 807* vm_offset_t zkva; 808 vm_offset_t retkva; 809 vm_page_t p; 810 int pages; 811 812 retkva = 0; 813 pages = zone->uz_pages; 814 815 /* 816 * This looks a little weird since we're getting one page at a time 817 / 818* while (bytes > 0) { 819 p = vm_page_alloc(zone->uz_obj, pages, 820 VM_ALLOC_INTERRUPT); 821 if (p == NULL) 822 return (NULL); 823 824 zkva = zone->uz_kva + pages * PAGE_SIZE; 825 if (retkva == 0) 826 retkva = zkva; 827 pmap_qenter(zkva, &p, 1); 828 bytes -= PAGE_SIZE; 829 pages += 1; 830 } 831 832 flags = UMA_SLAB_PRIV; 833* 834 return ((void )retkva); 835} 836* 837/* 838 * Frees a number of pages to the system 839 * 840 * Arguments: 841 * mem A pointer to the memory to be freed 842 * size The size of the memory being freed 843 * flags The original p->us_flags field 844 * 845 * Returns: 846 * Nothing 847 * 848 / 849static void 850page_free(void mem, int size, u_int8_t flags) 851{ 852 vm_map_t map; 853 854 if (flags & UMA_SLAB_KMEM) 855 map = kmem_map; 856 else 857 panic("UMA: page_free used with invalid flags %d\n", flags); 858 859 kmem_free(map, (vm_offset_t)mem, size); 860} 861 862/* 863 * Zero fill initializer 864 * 865 * Arguments/Returns follow uma_init specifications 866 * 867 / 868static void 869zero_init(void mem, int size) 870{ 871 bzero(mem, size); 872} 873 874/* 875 * Finish creating a small uma zone. This calculates ipers, and the zone size. 876 * 877 * Arguments 878 * zone The zone we should initialize 879 * 880 * Returns 881 * Nothing 882 / 883static void 884zone_small_init(uma_zone_t zone) 885{ 886* int rsize; 887 int memused; 888 int ipers; 889 890 rsize = zone->uz_size; 891 892 if (rsize < UMA_SMALLEST_UNIT) 893 rsize = UMA_SMALLEST_UNIT; 894 895 if (rsize & zone->uz_align) 896 rsize = (rsize & ~zone->uz_align) + (zone->uz_align + 1); 897 898 zone->uz_rsize = rsize; 899 900 rsize += 1; /* Account for the byte of linkage / 901* zone->uz_ipers = (UMA_SLAB_SIZE - sizeof(struct uma_slab)) / rsize; 902 zone->uz_ppera = 1; 903 904 memused = zone->uz_ipers * zone->uz_rsize; 905 906 /* Can we do any better? / 907* if ((UMA_SLAB_SIZE - memused) >= UMA_MAX_WASTE) { 908 if (zone->uz_flags & UMA_ZFLAG_INTERNAL) 909 return; 910 ipers = UMA_SLAB_SIZE / zone->uz_rsize; 911 if (ipers > zone->uz_ipers) { 912 zone->uz_flags \|= UMA_ZFLAG_OFFPAGE; 913 if ((zone->uz_flags & UMA_ZFLAG_MALLOC) == 0) 914 zone->uz_flags \|= UMA_ZFLAG_HASH; 915 zone->uz_ipers = ipers; 916 } 917 } 918 919} 920 921/* 922 * Finish creating a large (> UMA_SLAB_SIZE) uma zone. Just give in and do 923 * OFFPAGE for now. When I can allow for more dynamic slab sizes this will be 924 * more complicated. 925 * 926 * Arguments 927 * zone The zone we should initialize 928 * 929 * Returns 930 * Nothing 931 / 932static void 933zone_large_init(uma_zone_t zone) 934{ 935* int pages; 936 937 pages = zone->uz_size / UMA_SLAB_SIZE; 938 939 /* Account for remainder / 940* if ((pages * UMA_SLAB_SIZE) < zone->uz_size) 941 pages++; 942 943 zone->uz_ppera = pages; 944 zone->uz_ipers = 1; 945 946 zone->uz_flags \|= UMA_ZFLAG_OFFPAGE; 947 if ((zone->uz_flags & UMA_ZFLAG_MALLOC) == 0) 948 zone->uz_flags \|= UMA_ZFLAG_HASH; 949 950 zone->uz_rsize = zone->uz_size; 951} 952 953/* 954 * Zone header ctor. This initializes all fields, locks, etc. And inserts 955 * the zone onto the global zone list. 956 * 957 * Arguments/Returns follow uma_ctor specifications 958 * udata Actually uma_zcreat_args 959 * 960 / 961* 962static void 963zone_ctor(void mem, int size, void udata) 964{ 965 struct uma_zctor_args arg = udata; 966* uma_zone_t zone = mem; 967 int privlc; 968 int cplen; 969 int cpu; 970 971 bzero(zone, size); 972 zone->uz_name = arg->name; 973 zone->uz_size = arg->size; 974 zone->uz_ctor = arg->ctor; 975 zone->uz_dtor = arg->dtor; 976 zone->uz_init = arg->uminit; 977 zone->uz_fini = arg->fini; 978 zone->uz_align = arg->align; 979 zone->uz_free = 0; 980 zone->uz_pages = 0; 981 zone->uz_flags = 0; 982 zone->uz_allocf = page_alloc; 983 zone->uz_freef = page_free; 984 985 if (arg->flags & UMA_ZONE_ZINIT) 986 zone->uz_init = zero_init; 987 988 if (arg->flags & UMA_ZONE_INTERNAL) 989 zone->uz_flags \|= UMA_ZFLAG_INTERNAL; 990 991 if (arg->flags & UMA_ZONE_MALLOC) 992 zone->uz_flags \|= UMA_ZFLAG_MALLOC; 993 994 if (arg->flags & UMA_ZONE_NOFREE) 995 zone->uz_flags \|= UMA_ZFLAG_NOFREE; 996 997 if (arg->flags & UMA_ZONE_VM) 998 zone->uz_flags \|= UMA_ZFLAG_BUCKETCACHE; 999 1000 if (zone->uz_size > UMA_SLAB_SIZE) 1001 zone_large_init(zone); 1002 else 1003 zone_small_init(zone); 1004 1005 if (arg->flags & UMA_ZONE_MTXCLASS) 1006 privlc = 1; 1007 else 1008 privlc = 0; 1009 1010 /* We do this so that the per cpu lock name is unique for each zone / 1011* memcpy(zone->uz_lname, "PCPU ", 5); 1012 cplen = min(strlen(zone->uz_name) + 1, LOCKNAME_LEN - 6); 1013 memcpy(zone->uz_lname+5, zone->uz_name, cplen); 1014 zone->uz_lname[LOCKNAME_LEN - 1] = '\0'; 1015 1016 /* 1017 * If we're putting the slab header in the actual page we need to 1018 * figure out where in each page it goes. This calculates a right 1019 * justified offset into the memory on a ALIGN_PTR boundary. 1020 / 1021* if (!(zone->uz_flags & UMA_ZFLAG_OFFPAGE)) { 1022 int totsize; 1023 int waste; 1024 1025 /* Size of the slab struct and free list / 1026* totsize = sizeof(struct uma_slab) + zone->uz_ipers; 1027 if (totsize & UMA_ALIGN_PTR) 1028 totsize = (totsize & ~UMA_ALIGN_PTR) + 1029 (UMA_ALIGN_PTR + 1); 1030 zone->uz_pgoff = UMA_SLAB_SIZE - totsize; 1031 1032 waste = zone->uz_pgoff; 1033 waste -= (zone->uz_ipers * zone->uz_rsize); 1034 1035 /* 1036 * This calculates how much space we have for cache line size 1037 * optimizations. It works by offseting each slab slightly. 1038 * Currently it breaks on x86, and so it is disabled. 1039 / 1040* 1041 if (zone->uz_align < UMA_CACHE_INC && waste > UMA_CACHE_INC) { 1042 zone->uz_cachemax = waste - UMA_CACHE_INC; 1043 zone->uz_cacheoff = 0; 1044 } 1045 1046 totsize = zone->uz_pgoff + sizeof(struct uma_slab) 1047 + zone->uz_ipers; 1048 /* I don't think it's possible, but I'll make sure anyway / 1049* if (totsize > UMA_SLAB_SIZE) { 1050 printf("zone %s ipers %d rsize %d size %d\n", 1051 zone->uz_name, zone->uz_ipers, zone->uz_rsize, 1052 zone->uz_size); 1053 panic("UMA slab won't fit.\n"); 1054 } 1055 } 1056 1057 if (zone->uz_flags & UMA_ZFLAG_HASH) 1058 hash_alloc(&zone->uz_hash); 1059 1060#ifdef UMA_DEBUG 1061 printf("%s(%p) size = %d ipers = %d ppera = %d pgoff = %d\n", 1062 zone->uz_name, zone, 1063 zone->uz_size, zone->uz_ipers, 1064 zone->uz_ppera, zone->uz_pgoff); 1065#endif 1066 ZONE_LOCK_INIT(zone, privlc); 1067 1068 mtx_lock(&uma_mtx); 1069 LIST_INSERT_HEAD(&uma_zones, zone, uz_link); 1070 mtx_unlock(&uma_mtx); 1071 1072 /* 1073 * Some internal zones don't have room allocated for the per cpu 1074 * caches. If we're internal, bail out here. 1075 / 1076* 1077 if (zone->uz_flags & UMA_ZFLAG_INTERNAL) 1078 return; 1079 1080 if (zone->uz_ipers < UMA_BUCKET_SIZE) 1081 zone->uz_count = zone->uz_ipers - 1; 1082 else 1083 zone->uz_count = UMA_BUCKET_SIZE - 1; 1084 1085 for (cpu = 0; cpu < maxcpu; cpu++) 1086 CPU_LOCK_INIT(zone, cpu, privlc); 1087} 1088 1089/* 1090 * Zone header dtor. This frees all data, destroys locks, frees the hash table 1091 * and removes the zone from the global list. 1092 * 1093 * Arguments/Returns follow uma_dtor specifications 1094 * udata unused 1095 / 1096* 1097static void 1098zone_dtor(void arg, int size, void udata) 1099{ 1100 uma_zone_t zone; 1101 int cpu; 1102 1103 zone = (uma_zone_t)arg; 1104 1105 ZONE_LOCK(zone); 1106 zone->uz_wssize = 0; 1107 ZONE_UNLOCK(zone); 1108 1109 mtx_lock(&uma_mtx); 1110 LIST_REMOVE(zone, uz_link); 1111 zone_drain(zone); 1112 mtx_unlock(&uma_mtx); 1113 1114 ZONE_LOCK(zone); 1115 if (zone->uz_free != 0) 1116 printf("Zone %s was not empty. Lost %d pages of memory.\n", 1117 zone->uz_name, zone->uz_pages); 1118 1119 if ((zone->uz_flags & UMA_ZFLAG_INTERNAL) == 0) 1120 for (cpu = 0; cpu < maxcpu; cpu++) 1121 CPU_LOCK_FINI(zone, cpu); 1122 1123 ZONE_UNLOCK(zone); 1124 if ((zone->uz_flags & UMA_ZFLAG_OFFPAGE) != 0) 1125 hash_free(&zone->uz_hash); 1126 1127 ZONE_LOCK_FINI(zone); 1128} 1129/* 1130 * Traverses every zone in the system and calls a callback 1131 * 1132 * Arguments: 1133 * zfunc A pointer to a function which accepts a zone 1134 * as an argument. 1135 * 1136 * Returns: 1137 * Nothing 1138 / 1139static void 1140zone_foreach(void (zfunc)(uma_zone_t)) 1141{ 1142 uma_zone_t zone; 1143 1144 mtx_lock(&uma_mtx); 1145 LIST_FOREACH(zone, &uma_zones, uz_link) { 1146 zfunc(zone); 1147 } 1148 mtx_unlock(&uma_mtx); 1149} 1150 1151/* Public functions / 1152/ See uma.h / 1153void 1154uma_startup(void bootmem) 1155{ 1156 struct uma_zctor_args args; 1157 uma_slab_t slab; 1158 int slabsize; 1159 int i; 1160 1161#ifdef UMA_DEBUG 1162 printf("Creating uma zone headers zone.\n"); 1163#endif 1164#ifdef SMP 1165 maxcpu = mp_maxid + 1; 1166#else 1167 maxcpu = 1; 1168#endif 1169#ifdef UMA_DEBUG 1170 printf("Max cpu = %d, mp_maxid = %d\n", maxcpu, mp_maxid); 1171 Debugger("stop"); 1172#endif 1173 mtx_init(&uma_mtx, "UMA lock", NULL, MTX_DEF); 1174 /* "manually" Create the initial zone / 1175* args.name = "UMA Zones"; 1176 args.size = sizeof(struct uma_zone) + 1177 (sizeof(struct uma_cache) * (maxcpu - 1)); 1178 args.ctor = zone_ctor; 1179 args.dtor = zone_dtor; 1180 args.uminit = zero_init; 1181 args.fini = NULL; 1182 args.align = 32 - 1; 1183 args.flags = UMA_ZONE_INTERNAL; 1184 /* The initial zone has no Per cpu queues so it's smaller / 1185* zone_ctor(zones, sizeof(struct uma_zone), &args); 1186 1187#ifdef UMA_DEBUG 1188 printf("Filling boot free list.\n"); 1189#endif 1190 for (i = 0; i < UMA_BOOT_PAGES; i++) { 1191 slab = (uma_slab_t)((u_int8_t )bootmem + (i UMA_SLAB_SIZE)); 1192 slab->us_data = (u_int8_t )slab; 1193* slab->us_flags = UMA_SLAB_BOOT; 1194 LIST_INSERT_HEAD(&uma_boot_pages, slab, us_link); 1195 uma_boot_free++; 1196 } 1197 1198#ifdef UMA_DEBUG 1199 printf("Creating slab zone.\n"); 1200#endif 1201 1202 /* 1203 * This is the max number of free list items we'll have with 1204 * offpage slabs. 1205 / 1206* 1207 slabsize = UMA_SLAB_SIZE - sizeof(struct uma_slab); 1208 slabsize /= UMA_MAX_WASTE; 1209 slabsize++; /* In case there it's rounded / 1210* slabsize += sizeof(struct uma_slab); 1211 1212 /* Now make a zone for slab headers / 1213* slabzone = uma_zcreate("UMA Slabs", 1214 slabsize, 1215 NULL, NULL, NULL, NULL, 1216 UMA_ALIGN_PTR, UMA_ZONE_INTERNAL); 1217 1218 hashzone = uma_zcreate("UMA Hash", 1219 sizeof(struct slabhead ) UMA_HASH_SIZE_INIT, 1220 NULL, NULL, NULL, NULL, 1221 UMA_ALIGN_PTR, UMA_ZONE_INTERNAL); 1222 1223 bucketzone = uma_zcreate("UMA Buckets", sizeof(struct uma_bucket), 1224 NULL, NULL, NULL, NULL, 1225 UMA_ALIGN_PTR, UMA_ZONE_INTERNAL); 1226 1227 1228#ifdef UMA_DEBUG 1229 printf("UMA startup complete.\n"); 1230#endif 1231} 1232 1233/* see uma.h / 1234void 1235uma_startup2(void) 1236{ 1237* booted = 1; 1238 bucket_enable(); 1239#ifdef UMA_DEBUG 1240 printf("UMA startup2 complete.\n"); 1241#endif 1242} 1243 1244/* 1245 * Initialize our callout handle 1246 * 1247 / 1248* 1249static void 1250uma_startup3(void) 1251{ 1252#ifdef UMA_DEBUG 1253 printf("Starting callout.\n"); 1254#endif 1255 callout_init(&uma_callout, 0); 1256 callout_reset(&uma_callout, UMA_WORKING_TIME * hz, uma_timeout, NULL); 1257#ifdef UMA_DEBUG 1258 printf("UMA startup3 complete.\n"); 1259#endif 1260} 1261 1262/* See uma.h / 1263uma_zone_t 1264uma_zcreate(char name, size_t size, uma_ctor ctor, uma_dtor dtor, 1265 uma_init uminit, uma_fini fini, int align, u_int16_t flags) 1266 1267{ 1268 struct uma_zctor_args args; 1269 1270 /* This stuff is essential for the zone ctor / 1271* args.name = name; 1272 args.size = size; 1273 args.ctor = ctor; 1274 args.dtor = dtor; 1275 args.uminit = uminit; 1276 args.fini = fini; 1277 args.align = align; 1278 args.flags = flags; 1279 1280 return (uma_zalloc_internal(zones, &args, M_WAITOK, NULL)); 1281} 1282 1283/* See uma.h / 1284void 1285uma_zdestroy(uma_zone_t zone) 1286{ 1287* uma_zfree_internal(zones, zone, NULL, 0); 1288} 1289 1290/* See uma.h / 1291void 1292uma_zalloc_arg(uma_zone_t zone, void udata, int flags) 1293{ 1294* void item; 1295* uma_cache_t cache; 1296 uma_bucket_t bucket; 1297 int cpu; 1298 1299 /* This is the fast path allocation / 1300#ifdef UMA_DEBUG_ALLOC_1 1301* printf("Allocating one item from %s(%p)\n", zone->uz_name, zone); 1302#endif 1303 1304 if (!(flags & M_NOWAIT)) { 1305 KASSERT(curthread->td_intr_nesting_level == 0, 1306 ("malloc(M_WAITOK) in interrupt context")); 1307 WITNESS_SLEEP(1, NULL); 1308 } 1309 1310zalloc_restart: 1311 cpu = PCPU_GET(cpuid); 1312 CPU_LOCK(zone, cpu); 1313 cache = &zone->uz_cpu[cpu]; 1314 1315zalloc_start: 1316 bucket = cache->uc_allocbucket; 1317 1318 if (bucket) { 1319 if (bucket->ub_ptr > -1) { 1320 item = bucket->ub_bucket[bucket->ub_ptr]; 1321#ifdef INVARIANTS 1322 bucket->ub_bucket[bucket->ub_ptr] = NULL; 1323#endif 1324 bucket->ub_ptr--; 1325 KASSERT(item != NULL, 1326 ("uma_zalloc: Bucket pointer mangled.")); 1327 cache->uc_allocs++; 1328#ifdef INVARIANTS 1329 uma_dbg_alloc(zone, NULL, item); 1330#endif 1331 CPU_UNLOCK(zone, cpu); 1332 if (zone->uz_ctor) 1333 zone->uz_ctor(item, zone->uz_size, udata); 1334 if (flags & M_ZERO) 1335 bzero(item, zone->uz_size); 1336 return (item); 1337 } else if (cache->uc_freebucket) { 1338 /* 1339 * We have run out of items in our allocbucket. 1340 * See if we can switch with our free bucket. 1341 / 1342* if (cache->uc_freebucket->ub_ptr > -1) { 1343 uma_bucket_t swap; 1344 1345#ifdef UMA_DEBUG_ALLOC 1346 printf("uma_zalloc: Swapping empty with alloc.\n"); 1347#endif 1348 swap = cache->uc_freebucket; 1349 cache->uc_freebucket = cache->uc_allocbucket; 1350 cache->uc_allocbucket = swap; 1351 1352 goto zalloc_start; 1353 } 1354 } 1355 } 1356 ZONE_LOCK(zone); 1357 /* Since we have locked the zone we may as well send back our stats / 1358* zone->uz_allocs += cache->uc_allocs; 1359 cache->uc_allocs = 0; 1360 1361 /* Our old one is now a free bucket / 1362* if (cache->uc_allocbucket) { 1363 KASSERT(cache->uc_allocbucket->ub_ptr == -1, 1364 ("uma_zalloc_arg: Freeing a non free bucket.")); 1365 LIST_INSERT_HEAD(&zone->uz_free_bucket, 1366 cache->uc_allocbucket, ub_link); 1367 cache->uc_allocbucket = NULL; 1368 } 1369 1370 /* Check the free list for a new alloc bucket / 1371* if ((bucket = LIST_FIRST(&zone->uz_full_bucket)) != NULL) { 1372 KASSERT(bucket->ub_ptr != -1, 1373 ("uma_zalloc_arg: Returning an empty bucket.")); 1374 1375 LIST_REMOVE(bucket, ub_link); 1376 cache->uc_allocbucket = bucket; 1377 ZONE_UNLOCK(zone); 1378 goto zalloc_start; 1379 } 1380 /* Bump up our uz_count so we get here less / 1381* if (zone->uz_count < UMA_BUCKET_SIZE - 1) 1382 zone->uz_count++; 1383 1384 /* We are no longer associated with this cpu!!! / 1385* CPU_UNLOCK(zone, cpu); 1386 1387 /* 1388 * Now lets just fill a bucket and put it on the free list. If that 1389 * works we'll restart the allocation from the begining. 1390 * 1391 * Try this zone's free list first so we don't allocate extra buckets. 1392 / 1393* 1394 if ((bucket = LIST_FIRST(&zone->uz_free_bucket)) != NULL) 1395 LIST_REMOVE(bucket, ub_link); 1396 1397 /* Now we no longer need the zone lock. / 1398* ZONE_UNLOCK(zone); 1399 1400 if (bucket == NULL) { 1401 int bflags; 1402 1403 bflags = flags; 1404 if (zone->uz_flags & UMA_ZFLAG_BUCKETCACHE) 1405 bflags \|= M_NOVM; 1406 1407 bucket = uma_zalloc_internal(bucketzone, 1408 NULL, bflags, NULL); 1409 } 1410 1411 if (bucket != NULL) { 1412#ifdef INVARIANTS 1413 bzero(bucket, bucketzone->uz_size); 1414#endif 1415 bucket->ub_ptr = -1; 1416 1417 if (uma_zalloc_internal(zone, udata, flags, bucket)) 1418 goto zalloc_restart; 1419 else 1420 uma_zfree_internal(bucketzone, bucket, NULL, 0); 1421 } 1422 /* 1423 * We may not get a bucket if we recurse, so 1424 * return an actual item. 1425 / 1426#ifdef UMA_DEBUG 1427* printf("uma_zalloc_arg: Bucketzone returned NULL\n"); 1428#endif 1429 1430 return (uma_zalloc_internal(zone, udata, flags, NULL)); 1431} 1432 1433/* 1434 * Allocates an item for an internal zone OR fills a bucket 1435 * 1436 * Arguments 1437 * zone The zone to alloc for. 1438 * udata The data to be passed to the constructor. 1439 * flags M_WAITOK, M_NOWAIT, M_ZERO. 1440 * bucket The bucket to fill or NULL 1441 * 1442 * Returns 1443 * NULL if there is no memory and M_NOWAIT is set 1444 * An item if called on an interal zone 1445 * Non NULL if called to fill a bucket and it was successful. 1446 * 1447 * Discussion: 1448 * This was much cleaner before it had to do per cpu caches. It is 1449 * complicated now because it has to handle the simple internal case, and 1450 * the more involved bucket filling and allocation. 1451 / 1452* 1453static void * 1454uma_zalloc_internal(uma_zone_t zone, void udata, int flags, uma_bucket_t bucket) 1455{ 1456* uma_slab_t slab; 1457 u_int8_t freei; 1458 void item; 1459* 1460 item = NULL; 1461 1462 /* 1463 * This is to stop us from allocating per cpu buckets while we're 1464 * running out of UMA_BOOT_PAGES. Otherwise, we would exhaust the 1465 * boot pages. 1466 / 1467* 1468 if (bucketdisable && zone == bucketzone) 1469 return (NULL); 1470 1471#ifdef UMA_DEBUG_ALLOC 1472 printf("INTERNAL: Allocating one item from %s(%p)\n", zone->uz_name, zone); 1473#endif 1474 ZONE_LOCK(zone); 1475 1476 /* 1477 * This code is here to limit the number of simultaneous bucket fills 1478 * for any given zone to the number of per cpu caches in this zone. This 1479 * is done so that we don't allocate more memory than we really need. 1480 / 1481* 1482 if (bucket) { 1483#ifdef SMP 1484 if (zone->uz_fills >= mp_ncpus) { 1485#else 1486 if (zone->uz_fills > 1) { 1487#endif 1488 ZONE_UNLOCK(zone); 1489 return (NULL); 1490 } 1491 1492 zone->uz_fills++; 1493 } 1494 1495new_slab: 1496 1497 /* Find a slab with some space / 1498* if (zone->uz_free) { 1499 if (!LIST_EMPTY(&zone->uz_part_slab)) { 1500 slab = LIST_FIRST(&zone->uz_part_slab); 1501 } else { 1502 slab = LIST_FIRST(&zone->uz_free_slab); 1503 LIST_REMOVE(slab, us_link); 1504 LIST_INSERT_HEAD(&zone->uz_part_slab, slab, us_link); 1505 } 1506 } else { 1507 /* 1508 * This is to prevent us from recursively trying to allocate 1509 * buckets. The problem is that if an allocation forces us to 1510 * grab a new bucket we will call page_alloc, which will go off 1511 * and cause the vm to allocate vm_map_entries. If we need new 1512 * buckets there too we will recurse in kmem_alloc and bad 1513 * things happen. So instead we return a NULL bucket, and make 1514 * the code that allocates buckets smart enough to deal with it 1515 / 1516* if (zone == bucketzone && zone->uz_recurse != 0) { 1517 ZONE_UNLOCK(zone); 1518 return (NULL); 1519 } 1520 while (zone->uz_maxpages && 1521 zone->uz_pages >= zone->uz_maxpages) { 1522 zone->uz_flags \|= UMA_ZFLAG_FULL; 1523 1524 if (flags & M_WAITOK) 1525 msleep(zone, &zone->uz_lock, PVM, "zonelimit", 0); 1526 else 1527 goto alloc_fail; 1528 1529 goto new_slab; 1530 } 1531 1532 if (flags & M_NOVM) 1533 goto alloc_fail; 1534 1535 zone->uz_recurse++; 1536 slab = slab_zalloc(zone, flags); 1537 zone->uz_recurse--; 1538 /* 1539 * We might not have been able to get a slab but another cpu 1540 * could have while we were unlocked. If we did get a slab put 1541 * it on the partially used slab list. If not check the free 1542 * count and restart or fail accordingly. 1543 / 1544* if (slab) 1545 LIST_INSERT_HEAD(&zone->uz_part_slab, slab, us_link); 1546 else if (zone->uz_free == 0) 1547 goto alloc_fail; 1548 else 1549 goto new_slab; 1550 } 1551 /* 1552 * If this is our first time though put this guy on the list. 1553 / 1554* if (bucket != NULL && bucket->ub_ptr == -1) 1555 LIST_INSERT_HEAD(&zone->uz_full_bucket, 1556 bucket, ub_link); 1557 1558 1559 while (slab->us_freecount) { 1560 freei = slab->us_firstfree; 1561 slab->us_firstfree = slab->us_freelist[freei]; 1562 1563 item = slab->us_data + (zone->uz_rsize * freei); 1564 1565 slab->us_freecount--; 1566 zone->uz_free--; 1567#ifdef INVARIANTS 1568 uma_dbg_alloc(zone, slab, item); 1569#endif 1570 if (bucket == NULL) { 1571 zone->uz_allocs++; 1572 break; 1573 } 1574 bucket->ub_bucket[++bucket->ub_ptr] = item; 1575 1576 /* Don't overfill the bucket! / 1577* if (bucket->ub_ptr == zone->uz_count) 1578 break; 1579 } 1580 1581 /* Move this slab to the full list / 1582* if (slab->us_freecount == 0) { 1583 LIST_REMOVE(slab, us_link); 1584 LIST_INSERT_HEAD(&zone->uz_full_slab, slab, us_link); 1585 } 1586 1587 if (bucket != NULL) { 1588 /* Try to keep the buckets totally full, but don't block / 1589* if (bucket->ub_ptr < zone->uz_count) { 1590 flags \|= M_NOWAIT; 1591 flags &= ~M_WAITOK; 1592 goto new_slab; 1593 } else 1594 zone->uz_fills--; 1595 } 1596 1597 ZONE_UNLOCK(zone); 1598 1599 /* Only construct at this time if we're not filling a bucket / 1600* if (bucket == NULL) { 1601 if (zone->uz_ctor != NULL) 1602 zone->uz_ctor(item, zone->uz_size, udata); 1603 if (flags & M_ZERO) 1604 bzero(item, zone->uz_size); 1605 } 1606 1607 return (item); 1608 1609alloc_fail: 1610 if (bucket != NULL) 1611 zone->uz_fills--; 1612 ZONE_UNLOCK(zone); 1613 1614 if (bucket != NULL && bucket->ub_ptr != -1) 1615 return (bucket); 1616 1617 return (NULL); 1618} 1619 1620/* See uma.h / 1621void 1622uma_zfree_arg(uma_zone_t zone, void item, void udata) 1623{ 1624* uma_cache_t cache; 1625 uma_bucket_t bucket; 1626 int bflags; 1627 int cpu; 1628 1629 /* This is the fast path free / 1630#ifdef UMA_DEBUG_ALLOC_1 1631* printf("Freeing item %p to %s(%p)\n", item, zone->uz_name, zone); 1632#endif 1633 /* 1634 * The race here is acceptable. If we miss it we'll just have to wait 1635 * a little longer for the limits to be reset. 1636 / 1637* 1638 if (zone->uz_flags & UMA_ZFLAG_FULL) 1639 goto zfree_internal; 1640 1641zfree_restart: 1642 cpu = PCPU_GET(cpuid); 1643 CPU_LOCK(zone, cpu); 1644 cache = &zone->uz_cpu[cpu]; 1645 1646zfree_start: 1647 bucket = cache->uc_freebucket; 1648 1649 if (bucket) { 1650 /* 1651 * Do we have room in our bucket? It is OK for this uz count 1652 * check to be slightly out of sync. 1653 / 1654* 1655 if (bucket->ub_ptr < zone->uz_count) { 1656 bucket->ub_ptr++; 1657 KASSERT(bucket->ub_bucket[bucket->ub_ptr] == NULL, 1658 ("uma_zfree: Freeing to non free bucket index.")); 1659 bucket->ub_bucket[bucket->ub_ptr] = item; 1660 if (zone->uz_dtor) 1661 zone->uz_dtor(item, zone->uz_size, udata); 1662#ifdef INVARIANTS 1663 if (zone->uz_flags & UMA_ZFLAG_MALLOC) 1664 uma_dbg_free(zone, udata, item); 1665 else 1666 uma_dbg_free(zone, NULL, item); 1667#endif 1668 CPU_UNLOCK(zone, cpu); 1669 return; 1670 } else if (cache->uc_allocbucket) { 1671#ifdef UMA_DEBUG_ALLOC 1672 printf("uma_zfree: Swapping buckets.\n"); 1673#endif 1674 /* 1675 * We have run out of space in our freebucket. 1676 * See if we can switch with our alloc bucket. 1677 / 1678* if (cache->uc_allocbucket->ub_ptr < 1679 cache->uc_freebucket->ub_ptr) { 1680 uma_bucket_t swap; 1681 1682 swap = cache->uc_freebucket; 1683 cache->uc_freebucket = cache->uc_allocbucket; 1684 cache->uc_allocbucket = swap; 1685 1686 goto zfree_start; 1687 } 1688 } 1689 } 1690 1691 /* 1692 * We can get here for two reasons: 1693 * 1694 * 1) The buckets are NULL 1695 * 2) The alloc and free buckets are both somewhat full. 1696 * 1697 / 1698* 1699 ZONE_LOCK(zone); 1700 1701 bucket = cache->uc_freebucket; 1702 cache->uc_freebucket = NULL; 1703 1704 /* Can we throw this on the zone full list? / 1705* if (bucket != NULL) { 1706#ifdef UMA_DEBUG_ALLOC 1707 printf("uma_zfree: Putting old bucket on the free list.\n"); 1708#endif 1709 /* ub_ptr is pointing to the last free item / 1710* KASSERT(bucket->ub_ptr != -1, 1711 ("uma_zfree: Attempting to insert an empty bucket onto the full list.\n")); 1712 LIST_INSERT_HEAD(&zone->uz_full_bucket, 1713 bucket, ub_link); 1714 } 1715 if ((bucket = LIST_FIRST(&zone->uz_free_bucket)) != NULL) { 1716 LIST_REMOVE(bucket, ub_link); 1717 ZONE_UNLOCK(zone); 1718 cache->uc_freebucket = bucket; 1719 goto zfree_start; 1720 } 1721 /* We're done with this CPU now / 1722* CPU_UNLOCK(zone, cpu); 1723 1724 /* And the zone.. / 1725* ZONE_UNLOCK(zone); 1726 1727#ifdef UMA_DEBUG_ALLOC 1728 printf("uma_zfree: Allocating new free bucket.\n"); 1729#endif 1730 bflags = M_NOWAIT; 1731 1732 if (zone->uz_flags & UMA_ZFLAG_BUCKETCACHE) 1733 bflags \|= M_NOVM; 1734#ifdef INVARIANTS 1735 bflags \|= M_ZERO; 1736#endif 1737 bucket = uma_zalloc_internal(bucketzone, 1738 NULL, bflags, NULL); 1739 if (bucket) { 1740 bucket->ub_ptr = -1; 1741 ZONE_LOCK(zone); 1742 LIST_INSERT_HEAD(&zone->uz_free_bucket, 1743 bucket, ub_link); 1744 ZONE_UNLOCK(zone); 1745 goto zfree_restart; 1746 } 1747 1748 /* 1749 * If nothing else caught this, we'll just do an internal free. 1750 / 1751* 1752zfree_internal: 1753 1754 uma_zfree_internal(zone, item, udata, 0); 1755 1756 return; 1757 1758} 1759 1760/* 1761 * Frees an item to an INTERNAL zone or allocates a free bucket 1762 * 1763 * Arguments: 1764 * zone The zone to free to 1765 * item The item we're freeing 1766 * udata User supplied data for the dtor 1767 * skip Skip the dtor, it was done in uma_zfree_arg 1768 / 1769* 1770static void 1771uma_zfree_internal(uma_zone_t zone, void item, void udata, int skip) 1772{ 1773 uma_slab_t slab; 1774 u_int8_t mem; 1775* u_int8_t freei; 1776 1777 ZONE_LOCK(zone); 1778 1779 if (!(zone->uz_flags & UMA_ZFLAG_MALLOC)) { 1780 mem = (u_int8_t )((unsigned long)item & (~UMA_SLAB_MASK)); 1781* if (zone->uz_flags & UMA_ZFLAG_HASH) 1782 slab = hash_sfind(&zone->uz_hash, mem); 1783 else { 1784 mem += zone->uz_pgoff; 1785 slab = (uma_slab_t)mem; 1786 } 1787 } else { 1788 slab = (uma_slab_t)udata; 1789 } 1790 1791 /* Do we need to remove from any lists? / 1792* if (slab->us_freecount+1 == zone->uz_ipers) { 1793 LIST_REMOVE(slab, us_link); 1794 LIST_INSERT_HEAD(&zone->uz_free_slab, slab, us_link); 1795 } else if (slab->us_freecount == 0) { 1796 LIST_REMOVE(slab, us_link); 1797 LIST_INSERT_HEAD(&zone->uz_part_slab, slab, us_link); 1798 } 1799 1800 /* Slab management stuff / 1801* freei = ((unsigned long)item - (unsigned long)slab->us_data) 1802 / zone->uz_rsize; 1803 1804#ifdef INVARIANTS 1805 if (!skip) 1806 uma_dbg_free(zone, slab, item); 1807#endif 1808 1809 slab->us_freelist[freei] = slab->us_firstfree; 1810 slab->us_firstfree = freei; 1811 slab->us_freecount++; 1812 1813 /* Zone statistics / 1814* zone->uz_free++; 1815 1816 if (!skip && zone->uz_dtor) 1817 zone->uz_dtor(item, zone->uz_size, udata); 1818 1819 if (zone->uz_flags & UMA_ZFLAG_FULL) { 1820 if (zone->uz_pages < zone->uz_maxpages) 1821 zone->uz_flags &= ~UMA_ZFLAG_FULL; 1822 1823 /* We can handle one more allocation / 1824* wakeup_one(&zone); 1825 } 1826 1827 ZONE_UNLOCK(zone); 1828} 1829 1830/* See uma.h / 1831void 1832uma_zone_set_max(uma_zone_t zone, int nitems) 1833{ 1834* ZONE_LOCK(zone); 1835 if (zone->uz_ppera > 1) 1836 zone->uz_maxpages = nitems * zone->uz_ppera; 1837 else 1838 zone->uz_maxpages = nitems / zone->uz_ipers; 1839 1840 if (zone->uz_maxpages * zone->uz_ipers < nitems) 1841 zone->uz_maxpages++; 1842 1843 ZONE_UNLOCK(zone); 1844} 1845 1846/* See uma.h / 1847void 1848uma_zone_set_freef(uma_zone_t zone, uma_free freef) 1849{ 1850* ZONE_LOCK(zone); 1851 1852 zone->uz_freef = freef; 1853 1854 ZONE_UNLOCK(zone); 1855} 1856 1857/* See uma.h / 1858void 1859uma_zone_set_allocf(uma_zone_t zone, uma_alloc allocf) 1860{ 1861* ZONE_LOCK(zone); 1862 1863 zone->uz_flags \|= UMA_ZFLAG_PRIVALLOC; 1864 zone->uz_allocf = allocf; 1865 1866 ZONE_UNLOCK(zone); 1867} 1868 1869/* See uma.h / 1870int 1871uma_zone_set_obj(uma_zone_t zone, struct vm_object obj, int count) 1872{ 1873 int pages; 1874 vm_offset_t kva; 1875 1876 mtx_lock(&Giant); 1877 1878 pages = count / zone->uz_ipers; 1879 1880 if (pages * zone->uz_ipers < count) 1881 pages++; 1882 1883 kva = kmem_alloc_pageable(kernel_map, pages * UMA_SLAB_SIZE); 1884 1885 if (kva == 0) { 1886 mtx_unlock(&Giant); 1887 return (0); 1888 } 1889 1890 1891 if (obj == NULL) 1892 obj = vm_object_allocate(OBJT_DEFAULT, 1893 pages); 1894 else 1895 _vm_object_allocate(OBJT_DEFAULT, 1896 pages, obj); 1897 1898 ZONE_LOCK(zone); 1899 zone->uz_kva = kva; 1900 zone->uz_obj = obj; 1901 zone->uz_maxpages = pages; 1902 1903 zone->uz_allocf = obj_alloc; 1904 zone->uz_flags \|= UMA_ZFLAG_NOFREE \| UMA_ZFLAG_PRIVALLOC; 1905 1906 ZONE_UNLOCK(zone); 1907 mtx_unlock(&Giant); 1908 1909 return (1); 1910} 1911 1912/* See uma.h / 1913void 1914uma_prealloc(uma_zone_t zone, int items) 1915{ 1916* int slabs; 1917 uma_slab_t slab; 1918 1919 ZONE_LOCK(zone); 1920 slabs = items / zone->uz_ipers; 1921 if (slabs * zone->uz_ipers < items) 1922 slabs++; 1923 1924 while (slabs > 0) { 1925 slab = slab_zalloc(zone, M_WAITOK); 1926 LIST_INSERT_HEAD(&zone->uz_free_slab, slab, us_link); 1927 slabs--; 1928 } 1929 ZONE_UNLOCK(zone); 1930} 1931 1932/* See uma.h / 1933void 1934uma_reclaim(void) 1935{ 1936* /* 1937 * You might think that the delay below would improve performance since 1938 * the allocator will give away memory that it may ask for immediately. 1939 * Really, it makes things worse, since cpu cycles are so much cheaper 1940 * than disk activity. 1941 / 1942#if 0 1943* static struct timeval tv = {0}; 1944 struct timeval now; 1945 getmicrouptime(&now); 1946 if (now.tv_sec > tv.tv_sec + 30) 1947 tv = now; 1948 else 1949 return; 1950#endif 1951#ifdef UMA_DEBUG 1952 printf("UMA: vm asked us to release pages!\n"); 1953#endif 1954 bucket_enable(); 1955 zone_foreach(zone_drain); 1956 1957 /* 1958 * Some slabs may have been freed but this zone will be visited early 1959 * we visit again so that we can free pages that are empty once other 1960 * zones are drained. We have to do the same for buckets. 1961 / 1962* zone_drain(slabzone); 1963 zone_drain(bucketzone); 1964} 1965 1966void * 1967uma_large_malloc(int size, int wait) 1968{ 1969 void mem; 1970* uma_slab_t slab; 1971 u_int8_t flags; 1972 1973 slab = uma_zalloc_internal(slabzone, NULL, wait, NULL); 1974 if (slab == NULL) 1975 return (NULL); 1976 1977 mem = page_alloc(NULL, size, &flags, wait); 1978 if (mem) { 1979 vsetslab((vm_offset_t)mem, slab); 1980 slab->us_data = mem; 1981 slab->us_flags = flags \| UMA_SLAB_MALLOC; 1982 slab->us_size = size; 1983 } else { 1984 uma_zfree_internal(slabzone, slab, NULL, 0); 1985 } 1986 1987 1988 return (mem); 1989} 1990 1991void 1992uma_large_free(uma_slab_t slab) 1993{ 1994 vsetobj((vm_offset_t)slab->us_data, kmem_object); 1995 page_free(slab->us_data, slab->us_size, slab->us_flags); 1996 uma_zfree_internal(slabzone, slab, NULL, 0); 1997} 1998 1999void 2000uma_print_stats(void) 2001{ 2002 zone_foreach(uma_print_zone); 2003} 2004 2005void 2006uma_print_zone(uma_zone_t zone) 2007{ 2008 printf("%s(%p) size %d(%d) flags %d ipers %d ppera %d out %d free %d\n", 2009 zone->uz_name, zone, zone->uz_size, zone->uz_rsize, zone->uz_flags, 2010 zone->uz_ipers, zone->uz_ppera, 2011 (zone->uz_ipers * zone->uz_pages) - zone->uz_free, zone->uz_free); 2012} 2013 2014/* 2015 * Sysctl handler for vm.zone 2016 * 2017 * stolen from vm_zone.c 2018 / 2019static int 2020sysctl_vm_zone(SYSCTL_HANDLER_ARGS) 2021{ 2022* int error, len, cnt; 2023 const int linesize = 128; /* conservative / 2024* int totalfree; 2025 char tmpbuf, offset; 2026 uma_zone_t z; 2027 char p; 2028* 2029 cnt = 0; 2030 mtx_lock(&uma_mtx); 2031 LIST_FOREACH(z, &uma_zones, uz_link) 2032 cnt++; 2033 mtx_unlock(&uma_mtx); 2034 MALLOC(tmpbuf, char , (cnt == 0 ? 1 : cnt) linesize, 2035 M_TEMP, M_WAITOK); 2036 len = snprintf(tmpbuf, linesize, 2037 "\nITEM SIZE LIMIT USED FREE REQUESTS\n\n"); 2038 if (cnt == 0) 2039 tmpbuf[len - 1] = '\0'; 2040 error = SYSCTL_OUT(req, tmpbuf, cnt == 0 ? len-1 : len); 2041 if (error \|\| cnt == 0) 2042 goto out; 2043 offset = tmpbuf; 2044 mtx_lock(&uma_mtx); 2045 LIST_FOREACH(z, &uma_zones, uz_link) { 2046 if (cnt == 0) /* list may have changed size / 2047* break; 2048 ZONE_LOCK(z); 2049 totalfree = z->uz_free + z->uz_cachefree; 2050 len = snprintf(offset, linesize, 2051 "%-12.12s %6.6u, %8.8u, %6.6u, %6.6u, %8.8llu\n", 2052 z->uz_name, z->uz_size, 2053 z->uz_maxpages * z->uz_ipers, 2054 (z->uz_ipers * (z->uz_pages / z->uz_ppera)) - totalfree, 2055 totalfree, 2056 (unsigned long long)z->uz_allocs); 2057 ZONE_UNLOCK(z); 2058 for (p = offset + 12; p > offset && p == ' '; --p) 2059* /* nothing / ; 2060* p[1] = ':'; 2061 cnt--; 2062 offset += len; 2063 } 2064 mtx_unlock(&uma_mtx); 2065 offset++ = '\0'; 2066* error = SYSCTL_OUT(req, tmpbuf, offset - tmpbuf); 2067out: 2068 FREE(tmpbuf, M_TEMP); 2069 return (error); 2070}	27 * 28 / 29 30/ 31 * uma_core.c Implementation of the Universal Memory allocator 32 * 33 * This allocator is intended to replace the multitude of similar object caches 34 * in the standard FreeBSD kernel. The intent is to be flexible as well as 35 * effecient. A primary design goal is to return unused memory to the rest of 36 * the system. This will make the system as a whole more flexible due to the 37 * ability to move memory to subsystems which most need it instead of leaving 38 * pools of reserved memory unused. 39 * 40 * The basic ideas stem from similar slab/zone based allocators whose algorithms 41 * are well known. 42 * 43 / 44 45/ 46 * TODO: 47 * - Improve memory usage for large allocations 48 * - Investigate cache size adjustments 49 / 50 51/ I should really use ktr.. / 52/ 53#define UMA_DEBUG 1 54#define UMA_DEBUG_ALLOC 1 55#define UMA_DEBUG_ALLOC_1 1 56/ 57 58 59#include "opt_param.h" 60#include <sys/param.h> 61#include <sys/systm.h> 62#include <sys/kernel.h> 63#include <sys/types.h> 64#include <sys/queue.h> 65#include <sys/malloc.h> 66#include <sys/lock.h> 67#include <sys/sysctl.h> 68#include <sys/mutex.h> 69#include <sys/proc.h> 70#include <sys/smp.h> 71#include <sys/vmmeter.h> 72 73#include <vm/vm.h> 74#include <vm/vm_object.h> 75#include <vm/vm_page.h> 76#include <vm/vm_param.h> 77#include <vm/vm_map.h> 78#include <vm/vm_kern.h> 79#include <vm/vm_extern.h> 80#include <vm/uma.h> 81#include <vm/uma_int.h> 82#include <vm/uma_dbg.h> 83 84/ 85 * This is the zone from which all zones are spawned. The idea is that even 86 * the zone heads are allocated from the allocator, so we use the bss section 87 * to bootstrap us. 88 / 89static struct uma_zone masterzone; 90static uma_zone_t zones = &masterzone; 91 92/ This is the zone from which all of uma_slab_t's are allocated. / 93static uma_zone_t slabzone; 94 95/ 96 * The initial hash tables come out of this zone so they can be allocated 97 * prior to malloc coming up. 98 / 99static uma_zone_t hashzone; 100* 101/* 102 * Zone that buckets come from. 103 / 104static uma_zone_t bucketzone; 105* 106/* 107 * Are we allowed to allocate buckets? 108 / 109static int bucketdisable = 1; 110* 111/* Linked list of all zones in the system / 112static LIST_HEAD(,uma_zone) uma_zones = LIST_HEAD_INITIALIZER(&uma_zones); 113* 114/* This mutex protects the zone list / 115static struct mtx uma_mtx; 116* 117/* Linked list of boot time pages / 118static LIST_HEAD(,uma_slab) uma_boot_pages = 119* LIST_HEAD_INITIALIZER(&uma_boot_pages); 120 121/* Count of free boottime pages / 122static int uma_boot_free = 0; 123* 124/* Is the VM done starting up? / 125static int booted = 0; 126* 127/* This is the handle used to schedule our working set calculator / 128static struct callout uma_callout; 129* 130/* This is mp_maxid + 1, for use while looping over each cpu / 131static int maxcpu; 132* 133/* 134 * This structure is passed as the zone ctor arg so that I don't have to create 135 * a special allocation function just for zones. 136 / 137struct uma_zctor_args { 138* char name; 139* size_t size; 140 uma_ctor ctor; 141 uma_dtor dtor; 142 uma_init uminit; 143 uma_fini fini; 144 int align; 145 u_int16_t flags; 146}; 147 148/* Prototypes.. / 149* 150static void obj_alloc(uma_zone_t, int, u_int8_t , int); 151static void page_alloc(uma_zone_t, int, u_int8_t , int); 152static void page_free(void , int, u_int8_t); 153static uma_slab_t slab_zalloc(uma_zone_t, int); 154static void cache_drain(uma_zone_t); 155static void bucket_drain(uma_zone_t, uma_bucket_t); 156static void zone_drain(uma_zone_t); 157static void zone_ctor(void , int, void ); 158static void zone_dtor(void , int, void ); 159static void zero_init(void , int); 160static void zone_small_init(uma_zone_t zone); 161static void zone_large_init(uma_zone_t zone); 162static void zone_foreach(void (zfunc)(uma_zone_t)); 163static void zone_timeout(uma_zone_t zone); 164static int hash_alloc(struct uma_hash ); 165static int hash_expand(struct uma_hash , struct uma_hash ); 166static void hash_free(struct uma_hash hash); 167static void uma_timeout(void ); 168static void uma_startup3(void); 169static void uma_zalloc_internal(uma_zone_t, void , int, uma_bucket_t); 170static void uma_zfree_internal(uma_zone_t, void , void , int); 171static void bucket_enable(void); 172void uma_print_zone(uma_zone_t); 173void uma_print_stats(void); 174static int sysctl_vm_zone(SYSCTL_HANDLER_ARGS); 175 176SYSCTL_OID(_vm, OID_AUTO, zone, CTLTYPE_STRING\|CTLFLAG_RD, 177 NULL, 0, sysctl_vm_zone, "A", "Zone Info"); 178SYSINIT(uma_startup3, SI_SUB_VM_CONF, SI_ORDER_SECOND, uma_startup3, NULL); 179 180/* 181 * This routine checks to see whether or not it's safe to enable buckets. 182 / 183* 184static void 185bucket_enable(void) 186{ 187 if (cnt.v_free_count < cnt.v_free_min) 188 bucketdisable = 1; 189 else 190 bucketdisable = 0; 191} 192 193 194/* 195 * Routine called by timeout which is used to fire off some time interval 196 * based calculations. (working set, stats, etc.) 197 * 198 * Arguments: 199 * arg Unused 200 * 201 * Returns: 202 * Nothing 203 / 204static void 205uma_timeout(void unused) 206{ 207 bucket_enable(); 208 zone_foreach(zone_timeout); 209 210 /* Reschedule this event / 211* callout_reset(&uma_callout, UMA_WORKING_TIME * hz, uma_timeout, NULL); 212} 213 214/* 215 * Routine to perform timeout driven calculations. This does the working set 216 * as well as hash expanding, and per cpu statistics aggregation. 217 * 218 * Arguments: 219 * zone The zone to operate on 220 * 221 * Returns: 222 * Nothing 223 / 224static void 225zone_timeout(uma_zone_t zone) 226{ 227* uma_cache_t cache; 228 u_int64_t alloc; 229 int free; 230 int cpu; 231 232 alloc = 0; 233 free = 0; 234 235 /* 236 * Aggregate per cpu cache statistics back to the zone. 237 * 238 * I may rewrite this to set a flag in the per cpu cache instead of 239 * locking. If the flag is not cleared on the next round I will have 240 * to lock and do it here instead so that the statistics don't get too 241 * far out of sync. 242 / 243* if (!(zone->uz_flags & UMA_ZFLAG_INTERNAL)) { 244 for (cpu = 0; cpu < maxcpu; cpu++) { 245 if (CPU_ABSENT(cpu)) 246 continue; 247 CPU_LOCK(zone, cpu); 248 cache = &zone->uz_cpu[cpu]; 249 /* Add them up, and reset / 250* alloc += cache->uc_allocs; 251 cache->uc_allocs = 0; 252 if (cache->uc_allocbucket) 253 free += cache->uc_allocbucket->ub_ptr + 1; 254 if (cache->uc_freebucket) 255 free += cache->uc_freebucket->ub_ptr + 1; 256 CPU_UNLOCK(zone, cpu); 257 } 258 } 259 260 /* Now push these stats back into the zone.. / 261* ZONE_LOCK(zone); 262 zone->uz_allocs += alloc; 263 264 /* 265 * cachefree is an instantanious snapshot of what is in the per cpu 266 * caches, not an accurate counter 267 / 268* zone->uz_cachefree = free; 269 270 /* 271 * Expand the zone hash table. 272 * 273 * This is done if the number of slabs is larger than the hash size. 274 * What I'm trying to do here is completely reduce collisions. This 275 * may be a little aggressive. Should I allow for two collisions max? 276 / 277* 278 if (zone->uz_flags & UMA_ZFLAG_HASH && 279 zone->uz_pages / zone->uz_ppera >= zone->uz_hash.uh_hashsize) { 280 struct uma_hash newhash; 281 struct uma_hash oldhash; 282 int ret; 283 284 /* 285 * This is so involved because allocating and freeing 286 * while the zone lock is held will lead to deadlock. 287 * I have to do everything in stages and check for 288 * races. 289 / 290* newhash = zone->uz_hash; 291 ZONE_UNLOCK(zone); 292 ret = hash_alloc(&newhash); 293 ZONE_LOCK(zone); 294 if (ret) { 295 if (hash_expand(&zone->uz_hash, &newhash)) { 296 oldhash = zone->uz_hash; 297 zone->uz_hash = newhash; 298 } else 299 oldhash = newhash; 300 301 ZONE_UNLOCK(zone); 302 hash_free(&oldhash); 303 ZONE_LOCK(zone); 304 } 305 } 306 307 /* 308 * Here we compute the working set size as the total number of items 309 * left outstanding since the last time interval. This is slightly 310 * suboptimal. What we really want is the highest number of outstanding 311 * items during the last time quantum. This should be close enough. 312 * 313 * The working set size is used to throttle the zone_drain function. 314 * We don't want to return memory that we may need again immediately. 315 / 316* alloc = zone->uz_allocs - zone->uz_oallocs; 317 zone->uz_oallocs = zone->uz_allocs; 318 zone->uz_wssize = alloc; 319 320 ZONE_UNLOCK(zone); 321} 322 323/* 324 * Allocate and zero fill the next sized hash table from the appropriate 325 * backing store. 326 * 327 * Arguments: 328 * hash A new hash structure with the old hash size in uh_hashsize 329 * 330 * Returns: 331 * 1 on sucess and 0 on failure. 332 / 333int 334hash_alloc(struct uma_hash hash) 335{ 336 int oldsize; 337 int alloc; 338 339 oldsize = hash->uh_hashsize; 340 341 /* We're just going to go to a power of two greater / 342* if (oldsize) { 343 hash->uh_hashsize = oldsize * 2; 344 alloc = sizeof(hash->uh_slab_hash[0]) * hash->uh_hashsize; 345 /* XXX Shouldn't be abusing DEVBUF here / 346* hash->uh_slab_hash = (struct slabhead )malloc(alloc, 347* M_DEVBUF, M_NOWAIT); 348 } else { 349 alloc = sizeof(hash->uh_slab_hash[0]) * UMA_HASH_SIZE_INIT; 350 hash->uh_slab_hash = uma_zalloc_internal(hashzone, NULL, 351 M_WAITOK, NULL); 352 hash->uh_hashsize = UMA_HASH_SIZE_INIT; 353 } 354 if (hash->uh_slab_hash) { 355 bzero(hash->uh_slab_hash, alloc); 356 hash->uh_hashmask = hash->uh_hashsize - 1; 357 return (1); 358 } 359 360 return (0); 361} 362 363/* 364 * Expands the hash table for OFFPAGE zones. This is done from zone_timeout 365 * to reduce collisions. This must not be done in the regular allocation path, 366 * otherwise, we can recurse on the vm while allocating pages. 367 * 368 * Arguments: 369 * oldhash The hash you want to expand 370 * newhash The hash structure for the new table 371 * 372 * Returns: 373 * Nothing 374 * 375 * Discussion: 376 / 377static int 378hash_expand(struct uma_hash oldhash, struct uma_hash newhash) 379{ 380* uma_slab_t slab; 381 int hval; 382 int i; 383 384 if (!newhash->uh_slab_hash) 385 return (0); 386 387 if (oldhash->uh_hashsize >= newhash->uh_hashsize) 388 return (0); 389 390 /* 391 * I need to investigate hash algorithms for resizing without a 392 * full rehash. 393 / 394* 395 for (i = 0; i < oldhash->uh_hashsize; i++) 396 while (!SLIST_EMPTY(&oldhash->uh_slab_hash[i])) { 397 slab = SLIST_FIRST(&oldhash->uh_slab_hash[i]); 398 SLIST_REMOVE_HEAD(&oldhash->uh_slab_hash[i], us_hlink); 399 hval = UMA_HASH(newhash, slab->us_data); 400 SLIST_INSERT_HEAD(&newhash->uh_slab_hash[hval], 401 slab, us_hlink); 402 } 403 404 return (1); 405} 406 407/* 408 * Free the hash bucket to the appropriate backing store. 409 * 410 * Arguments: 411 * slab_hash The hash bucket we're freeing 412 * hashsize The number of entries in that hash bucket 413 * 414 * Returns: 415 * Nothing 416 / 417static void 418hash_free(struct uma_hash hash) 419{ 420 if (hash->uh_slab_hash == NULL) 421 return; 422 if (hash->uh_hashsize == UMA_HASH_SIZE_INIT) 423 uma_zfree_internal(hashzone, 424 hash->uh_slab_hash, NULL, 0); 425 else 426 free(hash->uh_slab_hash, M_DEVBUF); 427} 428 429/* 430 * Frees all outstanding items in a bucket 431 * 432 * Arguments: 433 * zone The zone to free to, must be unlocked. 434 * bucket The free/alloc bucket with items, cpu queue must be locked. 435 * 436 * Returns: 437 * Nothing 438 / 439* 440static void 441bucket_drain(uma_zone_t zone, uma_bucket_t bucket) 442{ 443 uma_slab_t slab; 444 int mzone; 445 void item; 446* 447 if (bucket == NULL) 448 return; 449 450 slab = NULL; 451 mzone = 0; 452 453 /* We have to lookup the slab again for malloc.. / 454* if (zone->uz_flags & UMA_ZFLAG_MALLOC) 455 mzone = 1; 456 457 while (bucket->ub_ptr > -1) { 458 item = bucket->ub_bucket[bucket->ub_ptr]; 459#ifdef INVARIANTS 460 bucket->ub_bucket[bucket->ub_ptr] = NULL; 461 KASSERT(item != NULL, 462 ("bucket_drain: botched ptr, item is NULL")); 463#endif 464 bucket->ub_ptr--; 465 /* 466 * This is extremely inefficient. The slab pointer was passed 467 * to uma_zfree_arg, but we lost it because the buckets don't 468 * hold them. This will go away when free() gets a size passed 469 * to it. 470 / 471* if (mzone) 472 slab = vtoslab((vm_offset_t)item & (~UMA_SLAB_MASK)); 473 uma_zfree_internal(zone, item, slab, 1); 474 } 475} 476 477/* 478 * Drains the per cpu caches for a zone. 479 * 480 * Arguments: 481 * zone The zone to drain, must be unlocked. 482 * 483 * Returns: 484 * Nothing 485 * 486 * This function returns with the zone locked so that the per cpu queues can 487 * not be filled until zone_drain is finished. 488 * 489 / 490static void 491cache_drain(uma_zone_t zone) 492{ 493* uma_bucket_t bucket; 494 uma_cache_t cache; 495 int cpu; 496 497 /* 498 * Flush out the per cpu queues. 499 * 500 * XXX This causes unnecessary thrashing due to immediately having 501 * empty per cpu queues. I need to improve this. 502 / 503* 504 /* 505 * We have to lock each cpu cache before locking the zone 506 / 507* ZONE_UNLOCK(zone); 508 509 for (cpu = 0; cpu < maxcpu; cpu++) { 510 if (CPU_ABSENT(cpu)) 511 continue; 512 CPU_LOCK(zone, cpu); 513 cache = &zone->uz_cpu[cpu]; 514 bucket_drain(zone, cache->uc_allocbucket); 515 bucket_drain(zone, cache->uc_freebucket); 516 } 517 518 /* 519 * Drain the bucket queues and free the buckets, we just keep two per 520 * cpu (alloc/free). 521 / 522* ZONE_LOCK(zone); 523 while ((bucket = LIST_FIRST(&zone->uz_full_bucket)) != NULL) { 524 LIST_REMOVE(bucket, ub_link); 525 ZONE_UNLOCK(zone); 526 bucket_drain(zone, bucket); 527 uma_zfree_internal(bucketzone, bucket, NULL, 0); 528 ZONE_LOCK(zone); 529 } 530 531 /* Now we do the free queue.. / 532* while ((bucket = LIST_FIRST(&zone->uz_free_bucket)) != NULL) { 533 LIST_REMOVE(bucket, ub_link); 534 uma_zfree_internal(bucketzone, bucket, NULL, 0); 535 } 536 537 /* We unlock here, but they will all block until the zone is unlocked / 538* for (cpu = 0; cpu < maxcpu; cpu++) { 539 if (CPU_ABSENT(cpu)) 540 continue; 541 CPU_UNLOCK(zone, cpu); 542 } 543 544 zone->uz_cachefree = 0; 545} 546 547/* 548 * Frees pages from a zone back to the system. This is done on demand from 549 * the pageout daemon. 550 * 551 * Arguments: 552 * zone The zone to free pages from 553 * all Should we drain all items? 554 * 555 * Returns: 556 * Nothing. 557 / 558static void 559zone_drain(uma_zone_t zone) 560{ 561* struct slabhead freeslabs = {}; 562 uma_slab_t slab; 563 uma_slab_t n; 564 u_int64_t extra; 565 u_int8_t flags; 566 u_int8_t mem; 567* int i; 568 569 /* 570 * We don't want to take pages from staticly allocated zones at this 571 * time 572 / 573* if (zone->uz_flags & UMA_ZFLAG_NOFREE \|\| zone->uz_freef == NULL) 574 return; 575 576 ZONE_LOCK(zone); 577 578 if (!(zone->uz_flags & UMA_ZFLAG_INTERNAL)) 579 cache_drain(zone); 580 581 if (zone->uz_free < zone->uz_wssize) 582 goto finished; 583#ifdef UMA_DEBUG 584 printf("%s working set size: %llu free items: %u\n", 585 zone->uz_name, (unsigned long long)zone->uz_wssize, zone->uz_free); 586#endif 587 extra = zone->uz_free - zone->uz_wssize; 588 extra /= zone->uz_ipers; 589 590 /* extra is now the number of extra slabs that we can free / 591* 592 if (extra == 0) 593 goto finished; 594 595 slab = LIST_FIRST(&zone->uz_free_slab); 596 while (slab && extra) { 597 n = LIST_NEXT(slab, us_link); 598 599 /* We have no where to free these to / 600* if (slab->us_flags & UMA_SLAB_BOOT) { 601 slab = n; 602 continue; 603 } 604 605 LIST_REMOVE(slab, us_link); 606 zone->uz_pages -= zone->uz_ppera; 607 zone->uz_free -= zone->uz_ipers; 608 609 if (zone->uz_flags & UMA_ZFLAG_HASH) 610 UMA_HASH_REMOVE(&zone->uz_hash, slab, slab->us_data); 611 612 SLIST_INSERT_HEAD(&freeslabs, slab, us_hlink); 613 614 slab = n; 615 extra--; 616 } 617finished: 618 ZONE_UNLOCK(zone); 619 620 while ((slab = SLIST_FIRST(&freeslabs)) != NULL) { 621 SLIST_REMOVE(&freeslabs, slab, uma_slab, us_hlink); 622 if (zone->uz_fini) 623 for (i = 0; i < zone->uz_ipers; i++) 624 zone->uz_fini( 625 slab->us_data + (zone->uz_rsize * i), 626 zone->uz_size); 627 flags = slab->us_flags; 628 mem = slab->us_data; 629 630 if (zone->uz_flags & UMA_ZFLAG_OFFPAGE) 631 uma_zfree_internal(slabzone, slab, NULL, 0); 632 if (zone->uz_flags & UMA_ZFLAG_MALLOC) 633 for (i = 0; i < zone->uz_ppera; i++) 634 vsetobj((vm_offset_t)mem + (i * PAGE_SIZE), 635 kmem_object); 636#ifdef UMA_DEBUG 637 printf("%s: Returning %d bytes.\n", 638 zone->uz_name, UMA_SLAB_SIZE * zone->uz_ppera); 639#endif 640 zone->uz_freef(mem, UMA_SLAB_SIZE * zone->uz_ppera, flags); 641 } 642 643} 644 645/* 646 * Allocate a new slab for a zone. This does not insert the slab onto a list. 647 * 648 * Arguments: 649 * zone The zone to allocate slabs for 650 * wait Shall we wait? 651 * 652 * Returns: 653 * The slab that was allocated or NULL if there is no memory and the 654 * caller specified M_NOWAIT. 655 * 656 / 657static uma_slab_t 658slab_zalloc(uma_zone_t zone, int wait) 659{ 660* uma_slab_t slab; /* Starting slab / 661* u_int8_t mem; 662* u_int8_t flags; 663 int i; 664 665 slab = NULL; 666 667#ifdef UMA_DEBUG 668 printf("slab_zalloc: Allocating a new slab for %s\n", zone->uz_name); 669#endif 670 ZONE_UNLOCK(zone); 671 672 if (zone->uz_flags & UMA_ZFLAG_OFFPAGE) { 673 slab = uma_zalloc_internal(slabzone, NULL, wait, NULL); 674 if (slab == NULL) { 675 ZONE_LOCK(zone); 676 return NULL; 677 } 678 } 679 680 /* 681 * This reproduces the old vm_zone behavior of zero filling pages the 682 * first time they are added to a zone. 683 * 684 * Malloced items are zeroed in uma_zalloc. 685 / 686* 687 if ((zone->uz_flags & UMA_ZFLAG_MALLOC) == 0) 688 wait \|= M_ZERO; 689 else 690 wait &= ~M_ZERO; 691 692 if (booted \|\| (zone->uz_flags & UMA_ZFLAG_PRIVALLOC)) { 693 mtx_lock(&Giant); 694 mem = zone->uz_allocf(zone, 695 zone->uz_ppera * UMA_SLAB_SIZE, &flags, wait); 696 mtx_unlock(&Giant); 697 if (mem == NULL) { 698 ZONE_LOCK(zone); 699 return (NULL); 700 } 701 } else { 702 uma_slab_t tmps; 703 704 if (zone->uz_ppera > 1) 705 panic("UMA: Attemping to allocate multiple pages before vm has started.\n"); 706 if (zone->uz_flags & UMA_ZFLAG_MALLOC) 707 panic("Mallocing before uma_startup2 has been called.\n"); 708 if (uma_boot_free == 0) 709 panic("UMA: Ran out of pre init pages, increase UMA_BOOT_PAGES\n"); 710 tmps = LIST_FIRST(&uma_boot_pages); 711 LIST_REMOVE(tmps, us_link); 712 uma_boot_free--; 713 mem = tmps->us_data; 714 } 715 716 /* Point the slab into the allocated memory / 717* if (!(zone->uz_flags & UMA_ZFLAG_OFFPAGE)) 718 slab = (uma_slab_t )(mem + zone->uz_pgoff); 719 720 if (zone->uz_flags & UMA_ZFLAG_MALLOC) 721 for (i = 0; i < zone->uz_ppera; i++) 722 vsetslab((vm_offset_t)mem + (i * PAGE_SIZE), slab); 723 724 slab->us_zone = zone; 725 slab->us_data = mem; 726 727 /* 728 * This is intended to spread data out across cache lines. 729 * 730 * This code doesn't seem to work properly on x86, and on alpha 731 * it makes absolutely no performance difference. I'm sure it could 732 * use some tuning, but sun makes outrageous claims about it's 733 * performance. 734 / 735#if 0 736* if (zone->uz_cachemax) { 737 slab->us_data += zone->uz_cacheoff; 738 zone->uz_cacheoff += UMA_CACHE_INC; 739 if (zone->uz_cacheoff > zone->uz_cachemax) 740 zone->uz_cacheoff = 0; 741 } 742#endif 743 744 slab->us_freecount = zone->uz_ipers; 745 slab->us_firstfree = 0; 746 slab->us_flags = flags; 747 for (i = 0; i < zone->uz_ipers; i++) 748 slab->us_freelist[i] = i+1; 749 750 if (zone->uz_init) 751 for (i = 0; i < zone->uz_ipers; i++) 752 zone->uz_init(slab->us_data + (zone->uz_rsize * i), 753 zone->uz_size); 754 ZONE_LOCK(zone); 755 756 if (zone->uz_flags & UMA_ZFLAG_HASH) 757 UMA_HASH_INSERT(&zone->uz_hash, slab, mem); 758 759 zone->uz_pages += zone->uz_ppera; 760 zone->uz_free += zone->uz_ipers; 761 762 763 return (slab); 764} 765 766/* 767 * Allocates a number of pages from the system 768 * 769 * Arguments: 770 * zone Unused 771 * bytes The number of bytes requested 772 * wait Shall we wait? 773 * 774 * Returns: 775 * A pointer to the alloced memory or possibly 776 * NULL if M_NOWAIT is set. 777 / 778static void 779page_alloc(uma_zone_t zone, int bytes, u_int8_t pflag, int wait) 780{ 781* void p; / Returned page / 782* 783 pflag = UMA_SLAB_KMEM; 784* p = (void ) kmem_malloc(kmem_map, bytes, wait); 785* 786 return (p); 787} 788 789/* 790 * Allocates a number of pages from within an object 791 * 792 * Arguments: 793 * zone Unused 794 * bytes The number of bytes requested 795 * wait Shall we wait? 796 * 797 * Returns: 798 * A pointer to the alloced memory or possibly 799 * NULL if M_NOWAIT is set. 800 * 801 * TODO: If we fail during a multi-page allocation release the pages that have 802 * already been allocated. 803 / 804static void 805obj_alloc(uma_zone_t zone, int bytes, u_int8_t flags, int wait) 806{ 807* vm_offset_t zkva; 808 vm_offset_t retkva; 809 vm_page_t p; 810 int pages; 811 812 retkva = 0; 813 pages = zone->uz_pages; 814 815 /* 816 * This looks a little weird since we're getting one page at a time 817 / 818* while (bytes > 0) { 819 p = vm_page_alloc(zone->uz_obj, pages, 820 VM_ALLOC_INTERRUPT); 821 if (p == NULL) 822 return (NULL); 823 824 zkva = zone->uz_kva + pages * PAGE_SIZE; 825 if (retkva == 0) 826 retkva = zkva; 827 pmap_qenter(zkva, &p, 1); 828 bytes -= PAGE_SIZE; 829 pages += 1; 830 } 831 832 flags = UMA_SLAB_PRIV; 833* 834 return ((void )retkva); 835} 836* 837/* 838 * Frees a number of pages to the system 839 * 840 * Arguments: 841 * mem A pointer to the memory to be freed 842 * size The size of the memory being freed 843 * flags The original p->us_flags field 844 * 845 * Returns: 846 * Nothing 847 * 848 / 849static void 850page_free(void mem, int size, u_int8_t flags) 851{ 852 vm_map_t map; 853 854 if (flags & UMA_SLAB_KMEM) 855 map = kmem_map; 856 else 857 panic("UMA: page_free used with invalid flags %d\n", flags); 858 859 kmem_free(map, (vm_offset_t)mem, size); 860} 861 862/* 863 * Zero fill initializer 864 * 865 * Arguments/Returns follow uma_init specifications 866 * 867 / 868static void 869zero_init(void mem, int size) 870{ 871 bzero(mem, size); 872} 873 874/* 875 * Finish creating a small uma zone. This calculates ipers, and the zone size. 876 * 877 * Arguments 878 * zone The zone we should initialize 879 * 880 * Returns 881 * Nothing 882 / 883static void 884zone_small_init(uma_zone_t zone) 885{ 886* int rsize; 887 int memused; 888 int ipers; 889 890 rsize = zone->uz_size; 891 892 if (rsize < UMA_SMALLEST_UNIT) 893 rsize = UMA_SMALLEST_UNIT; 894 895 if (rsize & zone->uz_align) 896 rsize = (rsize & ~zone->uz_align) + (zone->uz_align + 1); 897 898 zone->uz_rsize = rsize; 899 900 rsize += 1; /* Account for the byte of linkage / 901* zone->uz_ipers = (UMA_SLAB_SIZE - sizeof(struct uma_slab)) / rsize; 902 zone->uz_ppera = 1; 903 904 memused = zone->uz_ipers * zone->uz_rsize; 905 906 /* Can we do any better? / 907* if ((UMA_SLAB_SIZE - memused) >= UMA_MAX_WASTE) { 908 if (zone->uz_flags & UMA_ZFLAG_INTERNAL) 909 return; 910 ipers = UMA_SLAB_SIZE / zone->uz_rsize; 911 if (ipers > zone->uz_ipers) { 912 zone->uz_flags \|= UMA_ZFLAG_OFFPAGE; 913 if ((zone->uz_flags & UMA_ZFLAG_MALLOC) == 0) 914 zone->uz_flags \|= UMA_ZFLAG_HASH; 915 zone->uz_ipers = ipers; 916 } 917 } 918 919} 920 921/* 922 * Finish creating a large (> UMA_SLAB_SIZE) uma zone. Just give in and do 923 * OFFPAGE for now. When I can allow for more dynamic slab sizes this will be 924 * more complicated. 925 * 926 * Arguments 927 * zone The zone we should initialize 928 * 929 * Returns 930 * Nothing 931 / 932static void 933zone_large_init(uma_zone_t zone) 934{ 935* int pages; 936 937 pages = zone->uz_size / UMA_SLAB_SIZE; 938 939 /* Account for remainder / 940* if ((pages * UMA_SLAB_SIZE) < zone->uz_size) 941 pages++; 942 943 zone->uz_ppera = pages; 944 zone->uz_ipers = 1; 945 946 zone->uz_flags \|= UMA_ZFLAG_OFFPAGE; 947 if ((zone->uz_flags & UMA_ZFLAG_MALLOC) == 0) 948 zone->uz_flags \|= UMA_ZFLAG_HASH; 949 950 zone->uz_rsize = zone->uz_size; 951} 952 953/* 954 * Zone header ctor. This initializes all fields, locks, etc. And inserts 955 * the zone onto the global zone list. 956 * 957 * Arguments/Returns follow uma_ctor specifications 958 * udata Actually uma_zcreat_args 959 * 960 / 961* 962static void 963zone_ctor(void mem, int size, void udata) 964{ 965 struct uma_zctor_args arg = udata; 966* uma_zone_t zone = mem; 967 int privlc; 968 int cplen; 969 int cpu; 970 971 bzero(zone, size); 972 zone->uz_name = arg->name; 973 zone->uz_size = arg->size; 974 zone->uz_ctor = arg->ctor; 975 zone->uz_dtor = arg->dtor; 976 zone->uz_init = arg->uminit; 977 zone->uz_fini = arg->fini; 978 zone->uz_align = arg->align; 979 zone->uz_free = 0; 980 zone->uz_pages = 0; 981 zone->uz_flags = 0; 982 zone->uz_allocf = page_alloc; 983 zone->uz_freef = page_free; 984 985 if (arg->flags & UMA_ZONE_ZINIT) 986 zone->uz_init = zero_init; 987 988 if (arg->flags & UMA_ZONE_INTERNAL) 989 zone->uz_flags \|= UMA_ZFLAG_INTERNAL; 990 991 if (arg->flags & UMA_ZONE_MALLOC) 992 zone->uz_flags \|= UMA_ZFLAG_MALLOC; 993 994 if (arg->flags & UMA_ZONE_NOFREE) 995 zone->uz_flags \|= UMA_ZFLAG_NOFREE; 996 997 if (arg->flags & UMA_ZONE_VM) 998 zone->uz_flags \|= UMA_ZFLAG_BUCKETCACHE; 999 1000 if (zone->uz_size > UMA_SLAB_SIZE) 1001 zone_large_init(zone); 1002 else 1003 zone_small_init(zone); 1004 1005 if (arg->flags & UMA_ZONE_MTXCLASS) 1006 privlc = 1; 1007 else 1008 privlc = 0; 1009 1010 /* We do this so that the per cpu lock name is unique for each zone / 1011* memcpy(zone->uz_lname, "PCPU ", 5); 1012 cplen = min(strlen(zone->uz_name) + 1, LOCKNAME_LEN - 6); 1013 memcpy(zone->uz_lname+5, zone->uz_name, cplen); 1014 zone->uz_lname[LOCKNAME_LEN - 1] = '\0'; 1015 1016 /* 1017 * If we're putting the slab header in the actual page we need to 1018 * figure out where in each page it goes. This calculates a right 1019 * justified offset into the memory on a ALIGN_PTR boundary. 1020 / 1021* if (!(zone->uz_flags & UMA_ZFLAG_OFFPAGE)) { 1022 int totsize; 1023 int waste; 1024 1025 /* Size of the slab struct and free list / 1026* totsize = sizeof(struct uma_slab) + zone->uz_ipers; 1027 if (totsize & UMA_ALIGN_PTR) 1028 totsize = (totsize & ~UMA_ALIGN_PTR) + 1029 (UMA_ALIGN_PTR + 1); 1030 zone->uz_pgoff = UMA_SLAB_SIZE - totsize; 1031 1032 waste = zone->uz_pgoff; 1033 waste -= (zone->uz_ipers * zone->uz_rsize); 1034 1035 /* 1036 * This calculates how much space we have for cache line size 1037 * optimizations. It works by offseting each slab slightly. 1038 * Currently it breaks on x86, and so it is disabled. 1039 / 1040* 1041 if (zone->uz_align < UMA_CACHE_INC && waste > UMA_CACHE_INC) { 1042 zone->uz_cachemax = waste - UMA_CACHE_INC; 1043 zone->uz_cacheoff = 0; 1044 } 1045 1046 totsize = zone->uz_pgoff + sizeof(struct uma_slab) 1047 + zone->uz_ipers; 1048 /* I don't think it's possible, but I'll make sure anyway / 1049* if (totsize > UMA_SLAB_SIZE) { 1050 printf("zone %s ipers %d rsize %d size %d\n", 1051 zone->uz_name, zone->uz_ipers, zone->uz_rsize, 1052 zone->uz_size); 1053 panic("UMA slab won't fit.\n"); 1054 } 1055 } 1056 1057 if (zone->uz_flags & UMA_ZFLAG_HASH) 1058 hash_alloc(&zone->uz_hash); 1059 1060#ifdef UMA_DEBUG 1061 printf("%s(%p) size = %d ipers = %d ppera = %d pgoff = %d\n", 1062 zone->uz_name, zone, 1063 zone->uz_size, zone->uz_ipers, 1064 zone->uz_ppera, zone->uz_pgoff); 1065#endif 1066 ZONE_LOCK_INIT(zone, privlc); 1067 1068 mtx_lock(&uma_mtx); 1069 LIST_INSERT_HEAD(&uma_zones, zone, uz_link); 1070 mtx_unlock(&uma_mtx); 1071 1072 /* 1073 * Some internal zones don't have room allocated for the per cpu 1074 * caches. If we're internal, bail out here. 1075 / 1076* 1077 if (zone->uz_flags & UMA_ZFLAG_INTERNAL) 1078 return; 1079 1080 if (zone->uz_ipers < UMA_BUCKET_SIZE) 1081 zone->uz_count = zone->uz_ipers - 1; 1082 else 1083 zone->uz_count = UMA_BUCKET_SIZE - 1; 1084 1085 for (cpu = 0; cpu < maxcpu; cpu++) 1086 CPU_LOCK_INIT(zone, cpu, privlc); 1087} 1088 1089/* 1090 * Zone header dtor. This frees all data, destroys locks, frees the hash table 1091 * and removes the zone from the global list. 1092 * 1093 * Arguments/Returns follow uma_dtor specifications 1094 * udata unused 1095 / 1096* 1097static void 1098zone_dtor(void arg, int size, void udata) 1099{ 1100 uma_zone_t zone; 1101 int cpu; 1102 1103 zone = (uma_zone_t)arg; 1104 1105 ZONE_LOCK(zone); 1106 zone->uz_wssize = 0; 1107 ZONE_UNLOCK(zone); 1108 1109 mtx_lock(&uma_mtx); 1110 LIST_REMOVE(zone, uz_link); 1111 zone_drain(zone); 1112 mtx_unlock(&uma_mtx); 1113 1114 ZONE_LOCK(zone); 1115 if (zone->uz_free != 0) 1116 printf("Zone %s was not empty. Lost %d pages of memory.\n", 1117 zone->uz_name, zone->uz_pages); 1118 1119 if ((zone->uz_flags & UMA_ZFLAG_INTERNAL) == 0) 1120 for (cpu = 0; cpu < maxcpu; cpu++) 1121 CPU_LOCK_FINI(zone, cpu); 1122 1123 ZONE_UNLOCK(zone); 1124 if ((zone->uz_flags & UMA_ZFLAG_OFFPAGE) != 0) 1125 hash_free(&zone->uz_hash); 1126 1127 ZONE_LOCK_FINI(zone); 1128} 1129/* 1130 * Traverses every zone in the system and calls a callback 1131 * 1132 * Arguments: 1133 * zfunc A pointer to a function which accepts a zone 1134 * as an argument. 1135 * 1136 * Returns: 1137 * Nothing 1138 / 1139static void 1140zone_foreach(void (zfunc)(uma_zone_t)) 1141{ 1142 uma_zone_t zone; 1143 1144 mtx_lock(&uma_mtx); 1145 LIST_FOREACH(zone, &uma_zones, uz_link) { 1146 zfunc(zone); 1147 } 1148 mtx_unlock(&uma_mtx); 1149} 1150 1151/* Public functions / 1152/ See uma.h / 1153void 1154uma_startup(void bootmem) 1155{ 1156 struct uma_zctor_args args; 1157 uma_slab_t slab; 1158 int slabsize; 1159 int i; 1160 1161#ifdef UMA_DEBUG 1162 printf("Creating uma zone headers zone.\n"); 1163#endif 1164#ifdef SMP 1165 maxcpu = mp_maxid + 1; 1166#else 1167 maxcpu = 1; 1168#endif 1169#ifdef UMA_DEBUG 1170 printf("Max cpu = %d, mp_maxid = %d\n", maxcpu, mp_maxid); 1171 Debugger("stop"); 1172#endif 1173 mtx_init(&uma_mtx, "UMA lock", NULL, MTX_DEF); 1174 /* "manually" Create the initial zone / 1175* args.name = "UMA Zones"; 1176 args.size = sizeof(struct uma_zone) + 1177 (sizeof(struct uma_cache) * (maxcpu - 1)); 1178 args.ctor = zone_ctor; 1179 args.dtor = zone_dtor; 1180 args.uminit = zero_init; 1181 args.fini = NULL; 1182 args.align = 32 - 1; 1183 args.flags = UMA_ZONE_INTERNAL; 1184 /* The initial zone has no Per cpu queues so it's smaller / 1185* zone_ctor(zones, sizeof(struct uma_zone), &args); 1186 1187#ifdef UMA_DEBUG 1188 printf("Filling boot free list.\n"); 1189#endif 1190 for (i = 0; i < UMA_BOOT_PAGES; i++) { 1191 slab = (uma_slab_t)((u_int8_t )bootmem + (i UMA_SLAB_SIZE)); 1192 slab->us_data = (u_int8_t )slab; 1193* slab->us_flags = UMA_SLAB_BOOT; 1194 LIST_INSERT_HEAD(&uma_boot_pages, slab, us_link); 1195 uma_boot_free++; 1196 } 1197 1198#ifdef UMA_DEBUG 1199 printf("Creating slab zone.\n"); 1200#endif 1201 1202 /* 1203 * This is the max number of free list items we'll have with 1204 * offpage slabs. 1205 / 1206* 1207 slabsize = UMA_SLAB_SIZE - sizeof(struct uma_slab); 1208 slabsize /= UMA_MAX_WASTE; 1209 slabsize++; /* In case there it's rounded / 1210* slabsize += sizeof(struct uma_slab); 1211 1212 /* Now make a zone for slab headers / 1213* slabzone = uma_zcreate("UMA Slabs", 1214 slabsize, 1215 NULL, NULL, NULL, NULL, 1216 UMA_ALIGN_PTR, UMA_ZONE_INTERNAL); 1217 1218 hashzone = uma_zcreate("UMA Hash", 1219 sizeof(struct slabhead ) UMA_HASH_SIZE_INIT, 1220 NULL, NULL, NULL, NULL, 1221 UMA_ALIGN_PTR, UMA_ZONE_INTERNAL); 1222 1223 bucketzone = uma_zcreate("UMA Buckets", sizeof(struct uma_bucket), 1224 NULL, NULL, NULL, NULL, 1225 UMA_ALIGN_PTR, UMA_ZONE_INTERNAL); 1226 1227 1228#ifdef UMA_DEBUG 1229 printf("UMA startup complete.\n"); 1230#endif 1231} 1232 1233/* see uma.h / 1234void 1235uma_startup2(void) 1236{ 1237* booted = 1; 1238 bucket_enable(); 1239#ifdef UMA_DEBUG 1240 printf("UMA startup2 complete.\n"); 1241#endif 1242} 1243 1244/* 1245 * Initialize our callout handle 1246 * 1247 / 1248* 1249static void 1250uma_startup3(void) 1251{ 1252#ifdef UMA_DEBUG 1253 printf("Starting callout.\n"); 1254#endif 1255 callout_init(&uma_callout, 0); 1256 callout_reset(&uma_callout, UMA_WORKING_TIME * hz, uma_timeout, NULL); 1257#ifdef UMA_DEBUG 1258 printf("UMA startup3 complete.\n"); 1259#endif 1260} 1261 1262/* See uma.h / 1263uma_zone_t 1264uma_zcreate(char name, size_t size, uma_ctor ctor, uma_dtor dtor, 1265 uma_init uminit, uma_fini fini, int align, u_int16_t flags) 1266 1267{ 1268 struct uma_zctor_args args; 1269 1270 /* This stuff is essential for the zone ctor / 1271* args.name = name; 1272 args.size = size; 1273 args.ctor = ctor; 1274 args.dtor = dtor; 1275 args.uminit = uminit; 1276 args.fini = fini; 1277 args.align = align; 1278 args.flags = flags; 1279 1280 return (uma_zalloc_internal(zones, &args, M_WAITOK, NULL)); 1281} 1282 1283/* See uma.h / 1284void 1285uma_zdestroy(uma_zone_t zone) 1286{ 1287* uma_zfree_internal(zones, zone, NULL, 0); 1288} 1289 1290/* See uma.h / 1291void 1292uma_zalloc_arg(uma_zone_t zone, void udata, int flags) 1293{ 1294* void item; 1295* uma_cache_t cache; 1296 uma_bucket_t bucket; 1297 int cpu; 1298 1299 /* This is the fast path allocation / 1300#ifdef UMA_DEBUG_ALLOC_1 1301* printf("Allocating one item from %s(%p)\n", zone->uz_name, zone); 1302#endif 1303 1304 if (!(flags & M_NOWAIT)) { 1305 KASSERT(curthread->td_intr_nesting_level == 0, 1306 ("malloc(M_WAITOK) in interrupt context")); 1307 WITNESS_SLEEP(1, NULL); 1308 } 1309 1310zalloc_restart: 1311 cpu = PCPU_GET(cpuid); 1312 CPU_LOCK(zone, cpu); 1313 cache = &zone->uz_cpu[cpu]; 1314 1315zalloc_start: 1316 bucket = cache->uc_allocbucket; 1317 1318 if (bucket) { 1319 if (bucket->ub_ptr > -1) { 1320 item = bucket->ub_bucket[bucket->ub_ptr]; 1321#ifdef INVARIANTS 1322 bucket->ub_bucket[bucket->ub_ptr] = NULL; 1323#endif 1324 bucket->ub_ptr--; 1325 KASSERT(item != NULL, 1326 ("uma_zalloc: Bucket pointer mangled.")); 1327 cache->uc_allocs++; 1328#ifdef INVARIANTS 1329 uma_dbg_alloc(zone, NULL, item); 1330#endif 1331 CPU_UNLOCK(zone, cpu); 1332 if (zone->uz_ctor) 1333 zone->uz_ctor(item, zone->uz_size, udata); 1334 if (flags & M_ZERO) 1335 bzero(item, zone->uz_size); 1336 return (item); 1337 } else if (cache->uc_freebucket) { 1338 /* 1339 * We have run out of items in our allocbucket. 1340 * See if we can switch with our free bucket. 1341 / 1342* if (cache->uc_freebucket->ub_ptr > -1) { 1343 uma_bucket_t swap; 1344 1345#ifdef UMA_DEBUG_ALLOC 1346 printf("uma_zalloc: Swapping empty with alloc.\n"); 1347#endif 1348 swap = cache->uc_freebucket; 1349 cache->uc_freebucket = cache->uc_allocbucket; 1350 cache->uc_allocbucket = swap; 1351 1352 goto zalloc_start; 1353 } 1354 } 1355 } 1356 ZONE_LOCK(zone); 1357 /* Since we have locked the zone we may as well send back our stats / 1358* zone->uz_allocs += cache->uc_allocs; 1359 cache->uc_allocs = 0; 1360 1361 /* Our old one is now a free bucket / 1362* if (cache->uc_allocbucket) { 1363 KASSERT(cache->uc_allocbucket->ub_ptr == -1, 1364 ("uma_zalloc_arg: Freeing a non free bucket.")); 1365 LIST_INSERT_HEAD(&zone->uz_free_bucket, 1366 cache->uc_allocbucket, ub_link); 1367 cache->uc_allocbucket = NULL; 1368 } 1369 1370 /* Check the free list for a new alloc bucket / 1371* if ((bucket = LIST_FIRST(&zone->uz_full_bucket)) != NULL) { 1372 KASSERT(bucket->ub_ptr != -1, 1373 ("uma_zalloc_arg: Returning an empty bucket.")); 1374 1375 LIST_REMOVE(bucket, ub_link); 1376 cache->uc_allocbucket = bucket; 1377 ZONE_UNLOCK(zone); 1378 goto zalloc_start; 1379 } 1380 /* Bump up our uz_count so we get here less / 1381* if (zone->uz_count < UMA_BUCKET_SIZE - 1) 1382 zone->uz_count++; 1383 1384 /* We are no longer associated with this cpu!!! / 1385* CPU_UNLOCK(zone, cpu); 1386 1387 /* 1388 * Now lets just fill a bucket and put it on the free list. If that 1389 * works we'll restart the allocation from the begining. 1390 * 1391 * Try this zone's free list first so we don't allocate extra buckets. 1392 / 1393* 1394 if ((bucket = LIST_FIRST(&zone->uz_free_bucket)) != NULL) 1395 LIST_REMOVE(bucket, ub_link); 1396 1397 /* Now we no longer need the zone lock. / 1398* ZONE_UNLOCK(zone); 1399 1400 if (bucket == NULL) { 1401 int bflags; 1402 1403 bflags = flags; 1404 if (zone->uz_flags & UMA_ZFLAG_BUCKETCACHE) 1405 bflags \|= M_NOVM; 1406 1407 bucket = uma_zalloc_internal(bucketzone, 1408 NULL, bflags, NULL); 1409 } 1410 1411 if (bucket != NULL) { 1412#ifdef INVARIANTS 1413 bzero(bucket, bucketzone->uz_size); 1414#endif 1415 bucket->ub_ptr = -1; 1416 1417 if (uma_zalloc_internal(zone, udata, flags, bucket)) 1418 goto zalloc_restart; 1419 else 1420 uma_zfree_internal(bucketzone, bucket, NULL, 0); 1421 } 1422 /* 1423 * We may not get a bucket if we recurse, so 1424 * return an actual item. 1425 / 1426#ifdef UMA_DEBUG 1427* printf("uma_zalloc_arg: Bucketzone returned NULL\n"); 1428#endif 1429 1430 return (uma_zalloc_internal(zone, udata, flags, NULL)); 1431} 1432 1433/* 1434 * Allocates an item for an internal zone OR fills a bucket 1435 * 1436 * Arguments 1437 * zone The zone to alloc for. 1438 * udata The data to be passed to the constructor. 1439 * flags M_WAITOK, M_NOWAIT, M_ZERO. 1440 * bucket The bucket to fill or NULL 1441 * 1442 * Returns 1443 * NULL if there is no memory and M_NOWAIT is set 1444 * An item if called on an interal zone 1445 * Non NULL if called to fill a bucket and it was successful. 1446 * 1447 * Discussion: 1448 * This was much cleaner before it had to do per cpu caches. It is 1449 * complicated now because it has to handle the simple internal case, and 1450 * the more involved bucket filling and allocation. 1451 / 1452* 1453static void * 1454uma_zalloc_internal(uma_zone_t zone, void udata, int flags, uma_bucket_t bucket) 1455{ 1456* uma_slab_t slab; 1457 u_int8_t freei; 1458 void item; 1459* 1460 item = NULL; 1461 1462 /* 1463 * This is to stop us from allocating per cpu buckets while we're 1464 * running out of UMA_BOOT_PAGES. Otherwise, we would exhaust the 1465 * boot pages. 1466 / 1467* 1468 if (bucketdisable && zone == bucketzone) 1469 return (NULL); 1470 1471#ifdef UMA_DEBUG_ALLOC 1472 printf("INTERNAL: Allocating one item from %s(%p)\n", zone->uz_name, zone); 1473#endif 1474 ZONE_LOCK(zone); 1475 1476 /* 1477 * This code is here to limit the number of simultaneous bucket fills 1478 * for any given zone to the number of per cpu caches in this zone. This 1479 * is done so that we don't allocate more memory than we really need. 1480 / 1481* 1482 if (bucket) { 1483#ifdef SMP 1484 if (zone->uz_fills >= mp_ncpus) { 1485#else 1486 if (zone->uz_fills > 1) { 1487#endif 1488 ZONE_UNLOCK(zone); 1489 return (NULL); 1490 } 1491 1492 zone->uz_fills++; 1493 } 1494 1495new_slab: 1496 1497 /* Find a slab with some space / 1498* if (zone->uz_free) { 1499 if (!LIST_EMPTY(&zone->uz_part_slab)) { 1500 slab = LIST_FIRST(&zone->uz_part_slab); 1501 } else { 1502 slab = LIST_FIRST(&zone->uz_free_slab); 1503 LIST_REMOVE(slab, us_link); 1504 LIST_INSERT_HEAD(&zone->uz_part_slab, slab, us_link); 1505 } 1506 } else { 1507 /* 1508 * This is to prevent us from recursively trying to allocate 1509 * buckets. The problem is that if an allocation forces us to 1510 * grab a new bucket we will call page_alloc, which will go off 1511 * and cause the vm to allocate vm_map_entries. If we need new 1512 * buckets there too we will recurse in kmem_alloc and bad 1513 * things happen. So instead we return a NULL bucket, and make 1514 * the code that allocates buckets smart enough to deal with it 1515 / 1516* if (zone == bucketzone && zone->uz_recurse != 0) { 1517 ZONE_UNLOCK(zone); 1518 return (NULL); 1519 } 1520 while (zone->uz_maxpages && 1521 zone->uz_pages >= zone->uz_maxpages) { 1522 zone->uz_flags \|= UMA_ZFLAG_FULL; 1523 1524 if (flags & M_WAITOK) 1525 msleep(zone, &zone->uz_lock, PVM, "zonelimit", 0); 1526 else 1527 goto alloc_fail; 1528 1529 goto new_slab; 1530 } 1531 1532 if (flags & M_NOVM) 1533 goto alloc_fail; 1534 1535 zone->uz_recurse++; 1536 slab = slab_zalloc(zone, flags); 1537 zone->uz_recurse--; 1538 /* 1539 * We might not have been able to get a slab but another cpu 1540 * could have while we were unlocked. If we did get a slab put 1541 * it on the partially used slab list. If not check the free 1542 * count and restart or fail accordingly. 1543 / 1544* if (slab) 1545 LIST_INSERT_HEAD(&zone->uz_part_slab, slab, us_link); 1546 else if (zone->uz_free == 0) 1547 goto alloc_fail; 1548 else 1549 goto new_slab; 1550 } 1551 /* 1552 * If this is our first time though put this guy on the list. 1553 / 1554* if (bucket != NULL && bucket->ub_ptr == -1) 1555 LIST_INSERT_HEAD(&zone->uz_full_bucket, 1556 bucket, ub_link); 1557 1558 1559 while (slab->us_freecount) { 1560 freei = slab->us_firstfree; 1561 slab->us_firstfree = slab->us_freelist[freei]; 1562 1563 item = slab->us_data + (zone->uz_rsize * freei); 1564 1565 slab->us_freecount--; 1566 zone->uz_free--; 1567#ifdef INVARIANTS 1568 uma_dbg_alloc(zone, slab, item); 1569#endif 1570 if (bucket == NULL) { 1571 zone->uz_allocs++; 1572 break; 1573 } 1574 bucket->ub_bucket[++bucket->ub_ptr] = item; 1575 1576 /* Don't overfill the bucket! / 1577* if (bucket->ub_ptr == zone->uz_count) 1578 break; 1579 } 1580 1581 /* Move this slab to the full list / 1582* if (slab->us_freecount == 0) { 1583 LIST_REMOVE(slab, us_link); 1584 LIST_INSERT_HEAD(&zone->uz_full_slab, slab, us_link); 1585 } 1586 1587 if (bucket != NULL) { 1588 /* Try to keep the buckets totally full, but don't block / 1589* if (bucket->ub_ptr < zone->uz_count) { 1590 flags \|= M_NOWAIT; 1591 flags &= ~M_WAITOK; 1592 goto new_slab; 1593 } else 1594 zone->uz_fills--; 1595 } 1596 1597 ZONE_UNLOCK(zone); 1598 1599 /* Only construct at this time if we're not filling a bucket / 1600* if (bucket == NULL) { 1601 if (zone->uz_ctor != NULL) 1602 zone->uz_ctor(item, zone->uz_size, udata); 1603 if (flags & M_ZERO) 1604 bzero(item, zone->uz_size); 1605 } 1606 1607 return (item); 1608 1609alloc_fail: 1610 if (bucket != NULL) 1611 zone->uz_fills--; 1612 ZONE_UNLOCK(zone); 1613 1614 if (bucket != NULL && bucket->ub_ptr != -1) 1615 return (bucket); 1616 1617 return (NULL); 1618} 1619 1620/* See uma.h / 1621void 1622uma_zfree_arg(uma_zone_t zone, void item, void udata) 1623{ 1624* uma_cache_t cache; 1625 uma_bucket_t bucket; 1626 int bflags; 1627 int cpu; 1628 1629 /* This is the fast path free / 1630#ifdef UMA_DEBUG_ALLOC_1 1631* printf("Freeing item %p to %s(%p)\n", item, zone->uz_name, zone); 1632#endif 1633 /* 1634 * The race here is acceptable. If we miss it we'll just have to wait 1635 * a little longer for the limits to be reset. 1636 / 1637* 1638 if (zone->uz_flags & UMA_ZFLAG_FULL) 1639 goto zfree_internal; 1640 1641zfree_restart: 1642 cpu = PCPU_GET(cpuid); 1643 CPU_LOCK(zone, cpu); 1644 cache = &zone->uz_cpu[cpu]; 1645 1646zfree_start: 1647 bucket = cache->uc_freebucket; 1648 1649 if (bucket) { 1650 /* 1651 * Do we have room in our bucket? It is OK for this uz count 1652 * check to be slightly out of sync. 1653 / 1654* 1655 if (bucket->ub_ptr < zone->uz_count) { 1656 bucket->ub_ptr++; 1657 KASSERT(bucket->ub_bucket[bucket->ub_ptr] == NULL, 1658 ("uma_zfree: Freeing to non free bucket index.")); 1659 bucket->ub_bucket[bucket->ub_ptr] = item; 1660 if (zone->uz_dtor) 1661 zone->uz_dtor(item, zone->uz_size, udata); 1662#ifdef INVARIANTS 1663 if (zone->uz_flags & UMA_ZFLAG_MALLOC) 1664 uma_dbg_free(zone, udata, item); 1665 else 1666 uma_dbg_free(zone, NULL, item); 1667#endif 1668 CPU_UNLOCK(zone, cpu); 1669 return; 1670 } else if (cache->uc_allocbucket) { 1671#ifdef UMA_DEBUG_ALLOC 1672 printf("uma_zfree: Swapping buckets.\n"); 1673#endif 1674 /* 1675 * We have run out of space in our freebucket. 1676 * See if we can switch with our alloc bucket. 1677 / 1678* if (cache->uc_allocbucket->ub_ptr < 1679 cache->uc_freebucket->ub_ptr) { 1680 uma_bucket_t swap; 1681 1682 swap = cache->uc_freebucket; 1683 cache->uc_freebucket = cache->uc_allocbucket; 1684 cache->uc_allocbucket = swap; 1685 1686 goto zfree_start; 1687 } 1688 } 1689 } 1690 1691 /* 1692 * We can get here for two reasons: 1693 * 1694 * 1) The buckets are NULL 1695 * 2) The alloc and free buckets are both somewhat full. 1696 * 1697 / 1698* 1699 ZONE_LOCK(zone); 1700 1701 bucket = cache->uc_freebucket; 1702 cache->uc_freebucket = NULL; 1703 1704 /* Can we throw this on the zone full list? / 1705* if (bucket != NULL) { 1706#ifdef UMA_DEBUG_ALLOC 1707 printf("uma_zfree: Putting old bucket on the free list.\n"); 1708#endif 1709 /* ub_ptr is pointing to the last free item / 1710* KASSERT(bucket->ub_ptr != -1, 1711 ("uma_zfree: Attempting to insert an empty bucket onto the full list.\n")); 1712 LIST_INSERT_HEAD(&zone->uz_full_bucket, 1713 bucket, ub_link); 1714 } 1715 if ((bucket = LIST_FIRST(&zone->uz_free_bucket)) != NULL) { 1716 LIST_REMOVE(bucket, ub_link); 1717 ZONE_UNLOCK(zone); 1718 cache->uc_freebucket = bucket; 1719 goto zfree_start; 1720 } 1721 /* We're done with this CPU now / 1722* CPU_UNLOCK(zone, cpu); 1723 1724 /* And the zone.. / 1725* ZONE_UNLOCK(zone); 1726 1727#ifdef UMA_DEBUG_ALLOC 1728 printf("uma_zfree: Allocating new free bucket.\n"); 1729#endif 1730 bflags = M_NOWAIT; 1731 1732 if (zone->uz_flags & UMA_ZFLAG_BUCKETCACHE) 1733 bflags \|= M_NOVM; 1734#ifdef INVARIANTS 1735 bflags \|= M_ZERO; 1736#endif 1737 bucket = uma_zalloc_internal(bucketzone, 1738 NULL, bflags, NULL); 1739 if (bucket) { 1740 bucket->ub_ptr = -1; 1741 ZONE_LOCK(zone); 1742 LIST_INSERT_HEAD(&zone->uz_free_bucket, 1743 bucket, ub_link); 1744 ZONE_UNLOCK(zone); 1745 goto zfree_restart; 1746 } 1747 1748 /* 1749 * If nothing else caught this, we'll just do an internal free. 1750 / 1751* 1752zfree_internal: 1753 1754 uma_zfree_internal(zone, item, udata, 0); 1755 1756 return; 1757 1758} 1759 1760/* 1761 * Frees an item to an INTERNAL zone or allocates a free bucket 1762 * 1763 * Arguments: 1764 * zone The zone to free to 1765 * item The item we're freeing 1766 * udata User supplied data for the dtor 1767 * skip Skip the dtor, it was done in uma_zfree_arg 1768 / 1769* 1770static void 1771uma_zfree_internal(uma_zone_t zone, void item, void udata, int skip) 1772{ 1773 uma_slab_t slab; 1774 u_int8_t mem; 1775* u_int8_t freei; 1776 1777 ZONE_LOCK(zone); 1778 1779 if (!(zone->uz_flags & UMA_ZFLAG_MALLOC)) { 1780 mem = (u_int8_t )((unsigned long)item & (~UMA_SLAB_MASK)); 1781* if (zone->uz_flags & UMA_ZFLAG_HASH) 1782 slab = hash_sfind(&zone->uz_hash, mem); 1783 else { 1784 mem += zone->uz_pgoff; 1785 slab = (uma_slab_t)mem; 1786 } 1787 } else { 1788 slab = (uma_slab_t)udata; 1789 } 1790 1791 /* Do we need to remove from any lists? / 1792* if (slab->us_freecount+1 == zone->uz_ipers) { 1793 LIST_REMOVE(slab, us_link); 1794 LIST_INSERT_HEAD(&zone->uz_free_slab, slab, us_link); 1795 } else if (slab->us_freecount == 0) { 1796 LIST_REMOVE(slab, us_link); 1797 LIST_INSERT_HEAD(&zone->uz_part_slab, slab, us_link); 1798 } 1799 1800 /* Slab management stuff / 1801* freei = ((unsigned long)item - (unsigned long)slab->us_data) 1802 / zone->uz_rsize; 1803 1804#ifdef INVARIANTS 1805 if (!skip) 1806 uma_dbg_free(zone, slab, item); 1807#endif 1808 1809 slab->us_freelist[freei] = slab->us_firstfree; 1810 slab->us_firstfree = freei; 1811 slab->us_freecount++; 1812 1813 /* Zone statistics / 1814* zone->uz_free++; 1815 1816 if (!skip && zone->uz_dtor) 1817 zone->uz_dtor(item, zone->uz_size, udata); 1818 1819 if (zone->uz_flags & UMA_ZFLAG_FULL) { 1820 if (zone->uz_pages < zone->uz_maxpages) 1821 zone->uz_flags &= ~UMA_ZFLAG_FULL; 1822 1823 /* We can handle one more allocation / 1824* wakeup_one(&zone); 1825 } 1826 1827 ZONE_UNLOCK(zone); 1828} 1829 1830/* See uma.h / 1831void 1832uma_zone_set_max(uma_zone_t zone, int nitems) 1833{ 1834* ZONE_LOCK(zone); 1835 if (zone->uz_ppera > 1) 1836 zone->uz_maxpages = nitems * zone->uz_ppera; 1837 else 1838 zone->uz_maxpages = nitems / zone->uz_ipers; 1839 1840 if (zone->uz_maxpages * zone->uz_ipers < nitems) 1841 zone->uz_maxpages++; 1842 1843 ZONE_UNLOCK(zone); 1844} 1845 1846/* See uma.h / 1847void 1848uma_zone_set_freef(uma_zone_t zone, uma_free freef) 1849{ 1850* ZONE_LOCK(zone); 1851 1852 zone->uz_freef = freef; 1853 1854 ZONE_UNLOCK(zone); 1855} 1856 1857/* See uma.h / 1858void 1859uma_zone_set_allocf(uma_zone_t zone, uma_alloc allocf) 1860{ 1861* ZONE_LOCK(zone); 1862 1863 zone->uz_flags \|= UMA_ZFLAG_PRIVALLOC; 1864 zone->uz_allocf = allocf; 1865 1866 ZONE_UNLOCK(zone); 1867} 1868 1869/* See uma.h / 1870int 1871uma_zone_set_obj(uma_zone_t zone, struct vm_object obj, int count) 1872{ 1873 int pages; 1874 vm_offset_t kva; 1875 1876 mtx_lock(&Giant); 1877 1878 pages = count / zone->uz_ipers; 1879 1880 if (pages * zone->uz_ipers < count) 1881 pages++; 1882 1883 kva = kmem_alloc_pageable(kernel_map, pages * UMA_SLAB_SIZE); 1884 1885 if (kva == 0) { 1886 mtx_unlock(&Giant); 1887 return (0); 1888 } 1889 1890 1891 if (obj == NULL) 1892 obj = vm_object_allocate(OBJT_DEFAULT, 1893 pages); 1894 else 1895 _vm_object_allocate(OBJT_DEFAULT, 1896 pages, obj); 1897 1898 ZONE_LOCK(zone); 1899 zone->uz_kva = kva; 1900 zone->uz_obj = obj; 1901 zone->uz_maxpages = pages; 1902 1903 zone->uz_allocf = obj_alloc; 1904 zone->uz_flags \|= UMA_ZFLAG_NOFREE \| UMA_ZFLAG_PRIVALLOC; 1905 1906 ZONE_UNLOCK(zone); 1907 mtx_unlock(&Giant); 1908 1909 return (1); 1910} 1911 1912/* See uma.h / 1913void 1914uma_prealloc(uma_zone_t zone, int items) 1915{ 1916* int slabs; 1917 uma_slab_t slab; 1918 1919 ZONE_LOCK(zone); 1920 slabs = items / zone->uz_ipers; 1921 if (slabs * zone->uz_ipers < items) 1922 slabs++; 1923 1924 while (slabs > 0) { 1925 slab = slab_zalloc(zone, M_WAITOK); 1926 LIST_INSERT_HEAD(&zone->uz_free_slab, slab, us_link); 1927 slabs--; 1928 } 1929 ZONE_UNLOCK(zone); 1930} 1931 1932/* See uma.h / 1933void 1934uma_reclaim(void) 1935{ 1936* /* 1937 * You might think that the delay below would improve performance since 1938 * the allocator will give away memory that it may ask for immediately. 1939 * Really, it makes things worse, since cpu cycles are so much cheaper 1940 * than disk activity. 1941 / 1942#if 0 1943* static struct timeval tv = {0}; 1944 struct timeval now; 1945 getmicrouptime(&now); 1946 if (now.tv_sec > tv.tv_sec + 30) 1947 tv = now; 1948 else 1949 return; 1950#endif 1951#ifdef UMA_DEBUG 1952 printf("UMA: vm asked us to release pages!\n"); 1953#endif 1954 bucket_enable(); 1955 zone_foreach(zone_drain); 1956 1957 /* 1958 * Some slabs may have been freed but this zone will be visited early 1959 * we visit again so that we can free pages that are empty once other 1960 * zones are drained. We have to do the same for buckets. 1961 / 1962* zone_drain(slabzone); 1963 zone_drain(bucketzone); 1964} 1965 1966void * 1967uma_large_malloc(int size, int wait) 1968{ 1969 void mem; 1970* uma_slab_t slab; 1971 u_int8_t flags; 1972 1973 slab = uma_zalloc_internal(slabzone, NULL, wait, NULL); 1974 if (slab == NULL) 1975 return (NULL); 1976 1977 mem = page_alloc(NULL, size, &flags, wait); 1978 if (mem) { 1979 vsetslab((vm_offset_t)mem, slab); 1980 slab->us_data = mem; 1981 slab->us_flags = flags \| UMA_SLAB_MALLOC; 1982 slab->us_size = size; 1983 } else { 1984 uma_zfree_internal(slabzone, slab, NULL, 0); 1985 } 1986 1987 1988 return (mem); 1989} 1990 1991void 1992uma_large_free(uma_slab_t slab) 1993{ 1994 vsetobj((vm_offset_t)slab->us_data, kmem_object); 1995 page_free(slab->us_data, slab->us_size, slab->us_flags); 1996 uma_zfree_internal(slabzone, slab, NULL, 0); 1997} 1998 1999void 2000uma_print_stats(void) 2001{ 2002 zone_foreach(uma_print_zone); 2003} 2004 2005void 2006uma_print_zone(uma_zone_t zone) 2007{ 2008 printf("%s(%p) size %d(%d) flags %d ipers %d ppera %d out %d free %d\n", 2009 zone->uz_name, zone, zone->uz_size, zone->uz_rsize, zone->uz_flags, 2010 zone->uz_ipers, zone->uz_ppera, 2011 (zone->uz_ipers * zone->uz_pages) - zone->uz_free, zone->uz_free); 2012} 2013 2014/* 2015 * Sysctl handler for vm.zone 2016 * 2017 * stolen from vm_zone.c 2018 / 2019static int 2020sysctl_vm_zone(SYSCTL_HANDLER_ARGS) 2021{ 2022* int error, len, cnt; 2023 const int linesize = 128; /* conservative / 2024* int totalfree; 2025 char tmpbuf, offset; 2026 uma_zone_t z; 2027 char p; 2028* 2029 cnt = 0; 2030 mtx_lock(&uma_mtx); 2031 LIST_FOREACH(z, &uma_zones, uz_link) 2032 cnt++; 2033 mtx_unlock(&uma_mtx); 2034 MALLOC(tmpbuf, char , (cnt == 0 ? 1 : cnt) linesize, 2035 M_TEMP, M_WAITOK); 2036 len = snprintf(tmpbuf, linesize, 2037 "\nITEM SIZE LIMIT USED FREE REQUESTS\n\n"); 2038 if (cnt == 0) 2039 tmpbuf[len - 1] = '\0'; 2040 error = SYSCTL_OUT(req, tmpbuf, cnt == 0 ? len-1 : len); 2041 if (error \|\| cnt == 0) 2042 goto out; 2043 offset = tmpbuf; 2044 mtx_lock(&uma_mtx); 2045 LIST_FOREACH(z, &uma_zones, uz_link) { 2046 if (cnt == 0) /* list may have changed size / 2047* break; 2048 ZONE_LOCK(z); 2049 totalfree = z->uz_free + z->uz_cachefree; 2050 len = snprintf(offset, linesize, 2051 "%-12.12s %6.6u, %8.8u, %6.6u, %6.6u, %8.8llu\n", 2052 z->uz_name, z->uz_size, 2053 z->uz_maxpages * z->uz_ipers, 2054 (z->uz_ipers * (z->uz_pages / z->uz_ppera)) - totalfree, 2055 totalfree, 2056 (unsigned long long)z->uz_allocs); 2057 ZONE_UNLOCK(z); 2058 for (p = offset + 12; p > offset && p == ' '; --p) 2059* /* nothing / ; 2060* p[1] = ':'; 2061 cnt--; 2062 offset += len; 2063 } 2064 mtx_unlock(&uma_mtx); 2065 offset++ = '\0'; 2066* error = SYSCTL_OUT(req, tmpbuf, offset - tmpbuf); 2067out: 2068 FREE(tmpbuf, M_TEMP); 2069 return (error); 2070}