1/* 2 * Copyright (c) 2002, Jeffrey Roberson <jroberson@chesapeake.net> 3 * All rights reserved. 4 * 5 * Redistribution and use in source and binary forms, with or without 6 * modification, are permitted provided that the following conditions 7 * are met: 8 * 1. Redistributions of source code must retain the above copyright 9 * notice unmodified, this list of conditions, and the following 10 * disclaimer. 11 * 2. Redistributions in binary form must reproduce the above copyright 12 * notice, this list of conditions and the following disclaimer in the 13 * documentation and/or other materials provided with the distribution. 14 * 15 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR 16 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES 17 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. 18 * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, 19 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT 20 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 21 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 22 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 23 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF 24 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 25 *
| 1/* 2 * Copyright (c) 2002, Jeffrey Roberson <jroberson@chesapeake.net> 3 * All rights reserved. 4 * 5 * Redistribution and use in source and binary forms, with or without 6 * modification, are permitted provided that the following conditions 7 * are met: 8 * 1. Redistributions of source code must retain the above copyright 9 * notice unmodified, this list of conditions, and the following 10 * disclaimer. 11 * 2. Redistributions in binary form must reproduce the above copyright 12 * notice, this list of conditions and the following disclaimer in the 13 * documentation and/or other materials provided with the distribution. 14 * 15 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR 16 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES 17 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. 18 * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, 19 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT 20 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 21 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 22 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 23 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF 24 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 25 *
|
26 * $FreeBSD: head/sys/vm/uma_core.c 92654 2002-03-19 09:11:49Z jeff $
| 26 * $FreeBSD: head/sys/vm/uma_core.c 92758 2002-03-20 05:28:34Z jeff $
|
27 * 28 */ 29 30/* 31 * uma_core.c Implementation of the Universal Memory allocator 32 * 33 * This allocator is intended to replace the multitude of similar object caches 34 * in the standard FreeBSD kernel. The intent is to be flexible as well as 35 * effecient. A primary design goal is to return unused memory to the rest of 36 * the system. This will make the system as a whole more flexible due to the 37 * ability to move memory to subsystems which most need it instead of leaving 38 * pools of reserved memory unused. 39 * 40 * The basic ideas stem from similar slab/zone based allocators whose algorithms 41 * are well known. 42 * 43 */ 44 45/* 46 * TODO: 47 * - Improve memory usage for large allocations 48 * - Improve INVARIANTS (0xdeadc0de write out) 49 * - Investigate cache size adjustments 50 */ 51 52/* I should really use ktr.. */ 53/* 54#define UMA_DEBUG 1 55#define UMA_DEBUG_ALLOC 1 56#define UMA_DEBUG_ALLOC_1 1 57*/ 58 59 60#include "opt_param.h" 61#include <sys/param.h> 62#include <sys/systm.h> 63#include <sys/kernel.h> 64#include <sys/types.h> 65#include <sys/queue.h> 66#include <sys/malloc.h> 67#include <sys/lock.h> 68#include <sys/sysctl.h> 69#include <machine/types.h> 70#include <sys/mutex.h> 71#include <sys/smp.h> 72 73#include <vm/vm.h> 74#include <vm/vm_object.h> 75#include <vm/vm_page.h> 76#include <vm/vm_param.h> 77#include <vm/vm_map.h> 78#include <vm/vm_kern.h> 79#include <vm/vm_extern.h> 80#include <vm/uma.h> 81#include <vm/uma_int.h> 82 83/* 84 * This is the zone from which all zones are spawned. The idea is that even 85 * the zone heads are allocated from the allocator, so we use the bss section 86 * to bootstrap us. 87 */ 88static struct uma_zone master_zone; 89static uma_zone_t zones = &master_zone; 90 91/* This is the zone from which all of uma_slab_t's are allocated. */ 92static uma_zone_t slabzone; 93 94/* 95 * The initial hash tables come out of this zone so they can be allocated 96 * prior to malloc coming up. 97 */ 98static uma_zone_t hashzone; 99 100/* 101 * Zone that buckets come from. 102 */ 103static uma_zone_t bucketzone; 104 105/* Linked list of all zones in the system */ 106static LIST_HEAD(,uma_zone) uma_zones = LIST_HEAD_INITIALIZER(&uma_zones); 107 108/* This mutex protects the zone list */ 109static struct mtx uma_mtx; 110 111/* Linked list of boot time pages */ 112static LIST_HEAD(,uma_slab) uma_boot_pages = 113 LIST_HEAD_INITIALIZER(&uma_boot_pages); 114 115/* Count of free boottime pages */ 116static int uma_boot_free = 0; 117 118/* Is the VM done starting up? */ 119static int booted = 0; 120 121/* This is the handle used to schedule our working set calculator */ 122static struct callout uma_callout; 123 124/* This is mp_maxid + 1, for use while looping over each cpu */ 125static int maxcpu; 126 127/* 128 * This structure is passed as the zone ctor arg so that I don't have to create 129 * a special allocation function just for zones. 130 */ 131struct uma_zctor_args { 132 char *name; 133 int size; 134 uma_ctor ctor; 135 uma_dtor dtor; 136 uma_init uminit; 137 uma_fini fini; 138 int align; 139 u_int16_t flags; 140}; 141 142/* 143 * This is the malloc hash table which is used to find the zone that a 144 * malloc allocation came from. It is not currently resizeable. The 145 * memory for the actual hash bucket is allocated in kmeminit. 146 */ 147struct uma_hash mhash; 148struct uma_hash *mallochash = &mhash; 149 150/* Prototypes.. */ 151 152static void *obj_alloc(uma_zone_t, int, u_int8_t *, int); 153static void *page_alloc(uma_zone_t, int, u_int8_t *, int); 154static void page_free(void *, int, u_int8_t); 155static uma_slab_t slab_zalloc(uma_zone_t, int); 156static void cache_drain(uma_zone_t); 157static void bucket_drain(uma_zone_t, uma_bucket_t); 158static void zone_drain(uma_zone_t); 159static void zone_ctor(void *, int, void *); 160static void zero_init(void *, int); 161static void zone_small_init(uma_zone_t zone); 162static void zone_large_init(uma_zone_t zone); 163static void zone_foreach(void (*zfunc)(uma_zone_t)); 164static void zone_timeout(uma_zone_t zone); 165static void hash_expand(struct uma_hash *); 166static void uma_timeout(void *); 167static void uma_startup3(void); 168static void *uma_zalloc_internal(uma_zone_t, void *, int, int *, int); 169static void uma_zfree_internal(uma_zone_t, 170 void *, void *, int); 171void uma_print_zone(uma_zone_t); 172void uma_print_stats(void); 173static int sysctl_vm_zone(SYSCTL_HANDLER_ARGS); 174 175SYSCTL_OID(_vm, OID_AUTO, zone, CTLTYPE_STRING|CTLFLAG_RD, 176 NULL, 0, sysctl_vm_zone, "A", "Zone Info"); 177SYSINIT(uma_startup3, SI_SUB_VM_CONF, SI_ORDER_SECOND, uma_startup3, NULL); 178 179 180/* 181 * Routine called by timeout which is used to fire off some time interval 182 * based calculations. (working set, stats, etc.) 183 * 184 * Arguments: 185 * arg Unused 186 * 187 * Returns: 188 * Nothing 189 */ 190static void 191uma_timeout(void *unused) 192{ 193 zone_foreach(zone_timeout); 194 195 /* Reschedule this event */ 196 callout_reset(&uma_callout, UMA_WORKING_TIME * hz, uma_timeout, NULL); 197} 198 199/* 200 * Routine to perform timeout driven calculations. This does the working set 201 * as well as hash expanding, and per cpu statistics aggregation. 202 * 203 * Arguments: 204 * zone The zone to operate on 205 * 206 * Returns: 207 * Nothing 208 */ 209static void 210zone_timeout(uma_zone_t zone) 211{ 212 uma_cache_t cache; 213 u_int64_t alloc; 214 int free; 215 int cpu; 216 217 alloc = 0; 218 free = 0; 219 220 /* 221 * Aggregate per cpu cache statistics back to the zone. 222 * 223 * I may rewrite this to set a flag in the per cpu cache instead of 224 * locking. If the flag is not cleared on the next round I will have 225 * to lock and do it here instead so that the statistics don't get too 226 * far out of sync. 227 */ 228 if (!(zone->uz_flags & UMA_ZFLAG_INTERNAL)) { 229 for (cpu = 0; cpu < maxcpu; cpu++) { 230 if (CPU_ABSENT(cpu)) 231 continue; 232 CPU_LOCK(zone, cpu); 233 cache = &zone->uz_cpu[cpu]; 234 /* Add them up, and reset */ 235 alloc += cache->uc_allocs; 236 cache->uc_allocs = 0; 237 if (cache->uc_allocbucket) 238 free += cache->uc_allocbucket->ub_ptr + 1; 239 if (cache->uc_freebucket) 240 free += cache->uc_freebucket->ub_ptr + 1; 241 CPU_UNLOCK(zone, cpu); 242 } 243 } 244 245 /* Now push these stats back into the zone.. */ 246 ZONE_LOCK(zone); 247 zone->uz_allocs += alloc; 248 249 /* 250 * cachefree is an instantanious snapshot of what is in the per cpu 251 * caches, not an accurate counter 252 */ 253 zone->uz_cachefree = free; 254 255 /* 256 * Expand the zone hash table. 257 * 258 * This is done if the number of slabs is larger than the hash size. 259 * What I'm trying to do here is completely reduce collisions. This 260 * may be a little aggressive. Should I allow for two collisions max? 261 */ 262 263 if ((zone->uz_flags & UMA_ZFLAG_OFFPAGE) && 264 !(zone->uz_flags & UMA_ZFLAG_MALLOC)) { 265 if (zone->uz_pages / zone->uz_ppera 266 >= zone->uz_hash.uh_hashsize) 267 hash_expand(&zone->uz_hash); 268 } 269 270 /* 271 * Here we compute the working set size as the total number of items 272 * left outstanding since the last time interval. This is slightly 273 * suboptimal. What we really want is the highest number of outstanding 274 * items during the last time quantum. This should be close enough. 275 * 276 * The working set size is used to throttle the zone_drain function. 277 * We don't want to return memory that we may need again immediately. 278 */ 279 alloc = zone->uz_allocs - zone->uz_oallocs; 280 zone->uz_oallocs = zone->uz_allocs; 281 zone->uz_wssize = alloc; 282 283 ZONE_UNLOCK(zone); 284} 285 286/* 287 * Expands the hash table for OFFPAGE zones. This is done from zone_timeout 288 * to reduce collisions. This must not be done in the regular allocation path, 289 * otherwise, we can recurse on the vm while allocating pages. 290 * 291 * Arguments: 292 * hash The hash you want to expand by a factor of two. 293 * 294 * Returns: 295 * Nothing 296 * 297 * Discussion: 298 */ 299static void 300hash_expand(struct uma_hash *hash) 301{ 302 struct slabhead *newhash; 303 struct slabhead *oldhash; 304 uma_slab_t slab; 305 int hzonefree; 306 int hashsize; 307 int alloc; 308 int hval; 309 int i; 310 311 312 /* 313 * Remember the old hash size and see if it has to go back to the 314 * hash zone, or malloc. The hash zone is used for the initial hash 315 */ 316 317 hashsize = hash->uh_hashsize; 318 oldhash = hash->uh_slab_hash; 319 320 if (hashsize == UMA_HASH_SIZE_INIT) 321 hzonefree = 1; 322 else 323 hzonefree = 0; 324 325 326 /* We're just going to go to a power of two greater */ 327 if (hash->uh_hashsize) { 328 alloc = sizeof(hash->uh_slab_hash[0]) * (hash->uh_hashsize * 2); 329 /* XXX Shouldn't be abusing DEVBUF here */ 330 newhash = (struct slabhead *)malloc(alloc, M_DEVBUF, M_NOWAIT); 331 if (newhash == NULL) { 332 return; 333 } 334 hash->uh_hashsize *= 2; 335 } else { 336 alloc = sizeof(hash->uh_slab_hash[0]) * UMA_HASH_SIZE_INIT; 337 newhash = uma_zalloc_internal(hashzone, NULL, M_WAITOK, NULL, -1); 338 hash->uh_hashsize = UMA_HASH_SIZE_INIT; 339 } 340 341 bzero(newhash, alloc); 342 343 hash->uh_hashmask = hash->uh_hashsize - 1; 344 345 /* 346 * I need to investigate hash algorithms for resizing without a 347 * full rehash. 348 */ 349 350 for (i = 0; i < hashsize; i++) 351 while (!SLIST_EMPTY(&hash->uh_slab_hash[i])) { 352 slab = SLIST_FIRST(&hash->uh_slab_hash[i]); 353 SLIST_REMOVE_HEAD(&hash->uh_slab_hash[i], us_hlink); 354 hval = UMA_HASH(hash, slab->us_data); 355 SLIST_INSERT_HEAD(&newhash[hval], slab, us_hlink); 356 } 357 358 if (hash->uh_slab_hash) { 359 if (hzonefree) 360 uma_zfree_internal(hashzone, 361 hash->uh_slab_hash, NULL, 0); 362 else 363 free(hash->uh_slab_hash, M_DEVBUF); 364 } 365 hash->uh_slab_hash = newhash; 366 367 return; 368} 369 370/* 371 * Frees all outstanding items in a bucket 372 * 373 * Arguments: 374 * zone The zone to free to, must be unlocked. 375 * bucket The free/alloc bucket with items, cpu queue must be locked. 376 * 377 * Returns: 378 * Nothing 379 */ 380 381static void 382bucket_drain(uma_zone_t zone, uma_bucket_t bucket) 383{ 384 uma_slab_t slab; 385 int mzone; 386 void *item; 387 388 if (bucket == NULL) 389 return; 390 391 slab = NULL; 392 mzone = 0; 393 394 /* We have to lookup the slab again for malloc.. */ 395 if (zone->uz_flags & UMA_ZFLAG_MALLOC) 396 mzone = 1; 397 398 while (bucket->ub_ptr > -1) { 399 item = bucket->ub_bucket[bucket->ub_ptr]; 400#ifdef INVARIANTS 401 bucket->ub_bucket[bucket->ub_ptr] = NULL; 402 KASSERT(item != NULL, 403 ("bucket_drain: botched ptr, item is NULL")); 404#endif 405 bucket->ub_ptr--; 406 /* 407 * This is extremely inefficient. The slab pointer was passed 408 * to uma_zfree_arg, but we lost it because the buckets don't 409 * hold them. This will go away when free() gets a size passed 410 * to it. 411 */ 412 if (mzone) 413 slab = hash_sfind(mallochash, 414 (u_int8_t *)((unsigned long)item & 415 (~UMA_SLAB_MASK))); 416 uma_zfree_internal(zone, item, slab, 1); 417 } 418} 419 420/* 421 * Drains the per cpu caches for a zone. 422 * 423 * Arguments: 424 * zone The zone to drain, must be unlocked. 425 * 426 * Returns: 427 * Nothing 428 * 429 * This function returns with the zone locked so that the per cpu queues can 430 * not be filled until zone_drain is finished. 431 * 432 */ 433static void 434cache_drain(uma_zone_t zone) 435{ 436 uma_bucket_t bucket; 437 uma_cache_t cache; 438 int cpu; 439 440 /* 441 * Flush out the per cpu queues. 442 * 443 * XXX This causes unneccisary thrashing due to immediately having 444 * empty per cpu queues. I need to improve this. 445 */ 446 447 /* 448 * We have to lock each cpu cache before locking the zone 449 */ 450 ZONE_UNLOCK(zone); 451 452 for (cpu = 0; cpu < maxcpu; cpu++) { 453 if (CPU_ABSENT(cpu)) 454 continue; 455 CPU_LOCK(zone, cpu); 456 cache = &zone->uz_cpu[cpu]; 457 bucket_drain(zone, cache->uc_allocbucket); 458 bucket_drain(zone, cache->uc_freebucket); 459 } 460 461 /* 462 * Drain the bucket queues and free the buckets, we just keep two per 463 * cpu (alloc/free). 464 */ 465 ZONE_LOCK(zone); 466 while ((bucket = LIST_FIRST(&zone->uz_full_bucket)) != NULL) { 467 LIST_REMOVE(bucket, ub_link); 468 ZONE_UNLOCK(zone); 469 bucket_drain(zone, bucket); 470 uma_zfree_internal(bucketzone, bucket, NULL, 0); 471 ZONE_LOCK(zone); 472 } 473 474 /* Now we do the free queue.. */ 475 while ((bucket = LIST_FIRST(&zone->uz_free_bucket)) != NULL) { 476 LIST_REMOVE(bucket, ub_link); 477 uma_zfree_internal(bucketzone, bucket, NULL, 0); 478 } 479 480 /* We unlock here, but they will all block until the zone is unlocked */ 481 for (cpu = 0; cpu < maxcpu; cpu++) { 482 if (CPU_ABSENT(cpu)) 483 continue; 484 CPU_UNLOCK(zone, cpu); 485 } 486} 487 488/* 489 * Frees pages from a zone back to the system. This is done on demand from 490 * the pageout daemon. 491 * 492 * Arguments: 493 * zone The zone to free pages from 494 * 495 * Returns: 496 * Nothing. 497 */ 498static void 499zone_drain(uma_zone_t zone) 500{ 501 uma_slab_t slab; 502 uma_slab_t n; 503 u_int64_t extra; 504 u_int8_t flags; 505 u_int8_t *mem; 506 int i; 507 508 /* 509 * We don't want to take pages from staticly allocated zones at this 510 * time 511 */ 512 if (zone->uz_flags & UMA_ZFLAG_NOFREE || zone->uz_freef == NULL) 513 return; 514 515 ZONE_LOCK(zone); 516 517 if (!(zone->uz_flags & UMA_ZFLAG_INTERNAL)) 518 cache_drain(zone); 519 520 if (zone->uz_free < zone->uz_wssize) 521 goto finished; 522#ifdef UMA_DEBUG 523 printf("%s working set size: %llu free items: %u\n", 524 zone->uz_name, (unsigned long long)zone->uz_wssize, zone->uz_free); 525#endif 526 extra = zone->uz_wssize - zone->uz_free; 527 extra /= zone->uz_ipers; 528 529 /* extra is now the number of extra slabs that we can free */ 530 531 if (extra == 0) 532 goto finished; 533 534 slab = LIST_FIRST(&zone->uz_free_slab); 535 while (slab && extra) { 536 n = LIST_NEXT(slab, us_link); 537 538 /* We have no where to free these to */ 539 if (slab->us_flags & UMA_SLAB_BOOT) { 540 slab = n; 541 continue; 542 } 543 544 LIST_REMOVE(slab, us_link); 545 zone->uz_pages -= zone->uz_ppera; 546 zone->uz_free -= zone->uz_ipers; 547 if (zone->uz_fini) 548 for (i = 0; i < zone->uz_ipers; i++) 549 zone->uz_fini( 550 slab->us_data + (zone->uz_rsize * i), 551 zone->uz_size); 552 flags = slab->us_flags; 553 mem = slab->us_data; 554 if (zone->uz_flags & UMA_ZFLAG_OFFPAGE) { 555 if (zone->uz_flags & UMA_ZFLAG_MALLOC) { 556 UMA_HASH_REMOVE(mallochash, 557 slab, slab->us_data); 558 } else { 559 UMA_HASH_REMOVE(&zone->uz_hash, 560 slab, slab->us_data); 561 } 562 uma_zfree_internal(slabzone, slab, NULL, 0); 563 } else if (zone->uz_flags & UMA_ZFLAG_MALLOC) 564 UMA_HASH_REMOVE(mallochash, slab, slab->us_data); 565#ifdef UMA_DEBUG 566 printf("%s: Returning %d bytes.\n", 567 zone->uz_name, UMA_SLAB_SIZE * zone->uz_ppera); 568#endif 569 zone->uz_freef(mem, UMA_SLAB_SIZE * zone->uz_ppera, flags); 570 571 slab = n; 572 extra--; 573 } 574 575finished: 576 ZONE_UNLOCK(zone); 577} 578 579/* 580 * Allocate a new slab for a zone. This does not insert the slab onto a list. 581 * 582 * Arguments: 583 * zone The zone to allocate slabs for 584 * wait Shall we wait? 585 * 586 * Returns: 587 * The slab that was allocated or NULL if there is no memory and the 588 * caller specified M_NOWAIT. 589 * 590 */ 591static uma_slab_t 592slab_zalloc(uma_zone_t zone, int wait) 593{ 594 uma_slab_t slab; /* Starting slab */ 595 u_int8_t *mem; 596 u_int8_t flags; 597 int i; 598 599#ifdef UMA_DEBUG 600 printf("slab_zalloc: Allocating a new slab for %s\n", zone->uz_name); 601#endif
| 27 * 28 */ 29 30/* 31 * uma_core.c Implementation of the Universal Memory allocator 32 * 33 * This allocator is intended to replace the multitude of similar object caches 34 * in the standard FreeBSD kernel. The intent is to be flexible as well as 35 * effecient. A primary design goal is to return unused memory to the rest of 36 * the system. This will make the system as a whole more flexible due to the 37 * ability to move memory to subsystems which most need it instead of leaving 38 * pools of reserved memory unused. 39 * 40 * The basic ideas stem from similar slab/zone based allocators whose algorithms 41 * are well known. 42 * 43 */ 44 45/* 46 * TODO: 47 * - Improve memory usage for large allocations 48 * - Improve INVARIANTS (0xdeadc0de write out) 49 * - Investigate cache size adjustments 50 */ 51 52/* I should really use ktr.. */ 53/* 54#define UMA_DEBUG 1 55#define UMA_DEBUG_ALLOC 1 56#define UMA_DEBUG_ALLOC_1 1 57*/ 58 59 60#include "opt_param.h" 61#include <sys/param.h> 62#include <sys/systm.h> 63#include <sys/kernel.h> 64#include <sys/types.h> 65#include <sys/queue.h> 66#include <sys/malloc.h> 67#include <sys/lock.h> 68#include <sys/sysctl.h> 69#include <machine/types.h> 70#include <sys/mutex.h> 71#include <sys/smp.h> 72 73#include <vm/vm.h> 74#include <vm/vm_object.h> 75#include <vm/vm_page.h> 76#include <vm/vm_param.h> 77#include <vm/vm_map.h> 78#include <vm/vm_kern.h> 79#include <vm/vm_extern.h> 80#include <vm/uma.h> 81#include <vm/uma_int.h> 82 83/* 84 * This is the zone from which all zones are spawned. The idea is that even 85 * the zone heads are allocated from the allocator, so we use the bss section 86 * to bootstrap us. 87 */ 88static struct uma_zone master_zone; 89static uma_zone_t zones = &master_zone; 90 91/* This is the zone from which all of uma_slab_t's are allocated. */ 92static uma_zone_t slabzone; 93 94/* 95 * The initial hash tables come out of this zone so they can be allocated 96 * prior to malloc coming up. 97 */ 98static uma_zone_t hashzone; 99 100/* 101 * Zone that buckets come from. 102 */ 103static uma_zone_t bucketzone; 104 105/* Linked list of all zones in the system */ 106static LIST_HEAD(,uma_zone) uma_zones = LIST_HEAD_INITIALIZER(&uma_zones); 107 108/* This mutex protects the zone list */ 109static struct mtx uma_mtx; 110 111/* Linked list of boot time pages */ 112static LIST_HEAD(,uma_slab) uma_boot_pages = 113 LIST_HEAD_INITIALIZER(&uma_boot_pages); 114 115/* Count of free boottime pages */ 116static int uma_boot_free = 0; 117 118/* Is the VM done starting up? */ 119static int booted = 0; 120 121/* This is the handle used to schedule our working set calculator */ 122static struct callout uma_callout; 123 124/* This is mp_maxid + 1, for use while looping over each cpu */ 125static int maxcpu; 126 127/* 128 * This structure is passed as the zone ctor arg so that I don't have to create 129 * a special allocation function just for zones. 130 */ 131struct uma_zctor_args { 132 char *name; 133 int size; 134 uma_ctor ctor; 135 uma_dtor dtor; 136 uma_init uminit; 137 uma_fini fini; 138 int align; 139 u_int16_t flags; 140}; 141 142/* 143 * This is the malloc hash table which is used to find the zone that a 144 * malloc allocation came from. It is not currently resizeable. The 145 * memory for the actual hash bucket is allocated in kmeminit. 146 */ 147struct uma_hash mhash; 148struct uma_hash *mallochash = &mhash; 149 150/* Prototypes.. */ 151 152static void *obj_alloc(uma_zone_t, int, u_int8_t *, int); 153static void *page_alloc(uma_zone_t, int, u_int8_t *, int); 154static void page_free(void *, int, u_int8_t); 155static uma_slab_t slab_zalloc(uma_zone_t, int); 156static void cache_drain(uma_zone_t); 157static void bucket_drain(uma_zone_t, uma_bucket_t); 158static void zone_drain(uma_zone_t); 159static void zone_ctor(void *, int, void *); 160static void zero_init(void *, int); 161static void zone_small_init(uma_zone_t zone); 162static void zone_large_init(uma_zone_t zone); 163static void zone_foreach(void (*zfunc)(uma_zone_t)); 164static void zone_timeout(uma_zone_t zone); 165static void hash_expand(struct uma_hash *); 166static void uma_timeout(void *); 167static void uma_startup3(void); 168static void *uma_zalloc_internal(uma_zone_t, void *, int, int *, int); 169static void uma_zfree_internal(uma_zone_t, 170 void *, void *, int); 171void uma_print_zone(uma_zone_t); 172void uma_print_stats(void); 173static int sysctl_vm_zone(SYSCTL_HANDLER_ARGS); 174 175SYSCTL_OID(_vm, OID_AUTO, zone, CTLTYPE_STRING|CTLFLAG_RD, 176 NULL, 0, sysctl_vm_zone, "A", "Zone Info"); 177SYSINIT(uma_startup3, SI_SUB_VM_CONF, SI_ORDER_SECOND, uma_startup3, NULL); 178 179 180/* 181 * Routine called by timeout which is used to fire off some time interval 182 * based calculations. (working set, stats, etc.) 183 * 184 * Arguments: 185 * arg Unused 186 * 187 * Returns: 188 * Nothing 189 */ 190static void 191uma_timeout(void *unused) 192{ 193 zone_foreach(zone_timeout); 194 195 /* Reschedule this event */ 196 callout_reset(&uma_callout, UMA_WORKING_TIME * hz, uma_timeout, NULL); 197} 198 199/* 200 * Routine to perform timeout driven calculations. This does the working set 201 * as well as hash expanding, and per cpu statistics aggregation. 202 * 203 * Arguments: 204 * zone The zone to operate on 205 * 206 * Returns: 207 * Nothing 208 */ 209static void 210zone_timeout(uma_zone_t zone) 211{ 212 uma_cache_t cache; 213 u_int64_t alloc; 214 int free; 215 int cpu; 216 217 alloc = 0; 218 free = 0; 219 220 /* 221 * Aggregate per cpu cache statistics back to the zone. 222 * 223 * I may rewrite this to set a flag in the per cpu cache instead of 224 * locking. If the flag is not cleared on the next round I will have 225 * to lock and do it here instead so that the statistics don't get too 226 * far out of sync. 227 */ 228 if (!(zone->uz_flags & UMA_ZFLAG_INTERNAL)) { 229 for (cpu = 0; cpu < maxcpu; cpu++) { 230 if (CPU_ABSENT(cpu)) 231 continue; 232 CPU_LOCK(zone, cpu); 233 cache = &zone->uz_cpu[cpu]; 234 /* Add them up, and reset */ 235 alloc += cache->uc_allocs; 236 cache->uc_allocs = 0; 237 if (cache->uc_allocbucket) 238 free += cache->uc_allocbucket->ub_ptr + 1; 239 if (cache->uc_freebucket) 240 free += cache->uc_freebucket->ub_ptr + 1; 241 CPU_UNLOCK(zone, cpu); 242 } 243 } 244 245 /* Now push these stats back into the zone.. */ 246 ZONE_LOCK(zone); 247 zone->uz_allocs += alloc; 248 249 /* 250 * cachefree is an instantanious snapshot of what is in the per cpu 251 * caches, not an accurate counter 252 */ 253 zone->uz_cachefree = free; 254 255 /* 256 * Expand the zone hash table. 257 * 258 * This is done if the number of slabs is larger than the hash size. 259 * What I'm trying to do here is completely reduce collisions. This 260 * may be a little aggressive. Should I allow for two collisions max? 261 */ 262 263 if ((zone->uz_flags & UMA_ZFLAG_OFFPAGE) && 264 !(zone->uz_flags & UMA_ZFLAG_MALLOC)) { 265 if (zone->uz_pages / zone->uz_ppera 266 >= zone->uz_hash.uh_hashsize) 267 hash_expand(&zone->uz_hash); 268 } 269 270 /* 271 * Here we compute the working set size as the total number of items 272 * left outstanding since the last time interval. This is slightly 273 * suboptimal. What we really want is the highest number of outstanding 274 * items during the last time quantum. This should be close enough. 275 * 276 * The working set size is used to throttle the zone_drain function. 277 * We don't want to return memory that we may need again immediately. 278 */ 279 alloc = zone->uz_allocs - zone->uz_oallocs; 280 zone->uz_oallocs = zone->uz_allocs; 281 zone->uz_wssize = alloc; 282 283 ZONE_UNLOCK(zone); 284} 285 286/* 287 * Expands the hash table for OFFPAGE zones. This is done from zone_timeout 288 * to reduce collisions. This must not be done in the regular allocation path, 289 * otherwise, we can recurse on the vm while allocating pages. 290 * 291 * Arguments: 292 * hash The hash you want to expand by a factor of two. 293 * 294 * Returns: 295 * Nothing 296 * 297 * Discussion: 298 */ 299static void 300hash_expand(struct uma_hash *hash) 301{ 302 struct slabhead *newhash; 303 struct slabhead *oldhash; 304 uma_slab_t slab; 305 int hzonefree; 306 int hashsize; 307 int alloc; 308 int hval; 309 int i; 310 311 312 /* 313 * Remember the old hash size and see if it has to go back to the 314 * hash zone, or malloc. The hash zone is used for the initial hash 315 */ 316 317 hashsize = hash->uh_hashsize; 318 oldhash = hash->uh_slab_hash; 319 320 if (hashsize == UMA_HASH_SIZE_INIT) 321 hzonefree = 1; 322 else 323 hzonefree = 0; 324 325 326 /* We're just going to go to a power of two greater */ 327 if (hash->uh_hashsize) { 328 alloc = sizeof(hash->uh_slab_hash[0]) * (hash->uh_hashsize * 2); 329 /* XXX Shouldn't be abusing DEVBUF here */ 330 newhash = (struct slabhead *)malloc(alloc, M_DEVBUF, M_NOWAIT); 331 if (newhash == NULL) { 332 return; 333 } 334 hash->uh_hashsize *= 2; 335 } else { 336 alloc = sizeof(hash->uh_slab_hash[0]) * UMA_HASH_SIZE_INIT; 337 newhash = uma_zalloc_internal(hashzone, NULL, M_WAITOK, NULL, -1); 338 hash->uh_hashsize = UMA_HASH_SIZE_INIT; 339 } 340 341 bzero(newhash, alloc); 342 343 hash->uh_hashmask = hash->uh_hashsize - 1; 344 345 /* 346 * I need to investigate hash algorithms for resizing without a 347 * full rehash. 348 */ 349 350 for (i = 0; i < hashsize; i++) 351 while (!SLIST_EMPTY(&hash->uh_slab_hash[i])) { 352 slab = SLIST_FIRST(&hash->uh_slab_hash[i]); 353 SLIST_REMOVE_HEAD(&hash->uh_slab_hash[i], us_hlink); 354 hval = UMA_HASH(hash, slab->us_data); 355 SLIST_INSERT_HEAD(&newhash[hval], slab, us_hlink); 356 } 357 358 if (hash->uh_slab_hash) { 359 if (hzonefree) 360 uma_zfree_internal(hashzone, 361 hash->uh_slab_hash, NULL, 0); 362 else 363 free(hash->uh_slab_hash, M_DEVBUF); 364 } 365 hash->uh_slab_hash = newhash; 366 367 return; 368} 369 370/* 371 * Frees all outstanding items in a bucket 372 * 373 * Arguments: 374 * zone The zone to free to, must be unlocked. 375 * bucket The free/alloc bucket with items, cpu queue must be locked. 376 * 377 * Returns: 378 * Nothing 379 */ 380 381static void 382bucket_drain(uma_zone_t zone, uma_bucket_t bucket) 383{ 384 uma_slab_t slab; 385 int mzone; 386 void *item; 387 388 if (bucket == NULL) 389 return; 390 391 slab = NULL; 392 mzone = 0; 393 394 /* We have to lookup the slab again for malloc.. */ 395 if (zone->uz_flags & UMA_ZFLAG_MALLOC) 396 mzone = 1; 397 398 while (bucket->ub_ptr > -1) { 399 item = bucket->ub_bucket[bucket->ub_ptr]; 400#ifdef INVARIANTS 401 bucket->ub_bucket[bucket->ub_ptr] = NULL; 402 KASSERT(item != NULL, 403 ("bucket_drain: botched ptr, item is NULL")); 404#endif 405 bucket->ub_ptr--; 406 /* 407 * This is extremely inefficient. The slab pointer was passed 408 * to uma_zfree_arg, but we lost it because the buckets don't 409 * hold them. This will go away when free() gets a size passed 410 * to it. 411 */ 412 if (mzone) 413 slab = hash_sfind(mallochash, 414 (u_int8_t *)((unsigned long)item & 415 (~UMA_SLAB_MASK))); 416 uma_zfree_internal(zone, item, slab, 1); 417 } 418} 419 420/* 421 * Drains the per cpu caches for a zone. 422 * 423 * Arguments: 424 * zone The zone to drain, must be unlocked. 425 * 426 * Returns: 427 * Nothing 428 * 429 * This function returns with the zone locked so that the per cpu queues can 430 * not be filled until zone_drain is finished. 431 * 432 */ 433static void 434cache_drain(uma_zone_t zone) 435{ 436 uma_bucket_t bucket; 437 uma_cache_t cache; 438 int cpu; 439 440 /* 441 * Flush out the per cpu queues. 442 * 443 * XXX This causes unneccisary thrashing due to immediately having 444 * empty per cpu queues. I need to improve this. 445 */ 446 447 /* 448 * We have to lock each cpu cache before locking the zone 449 */ 450 ZONE_UNLOCK(zone); 451 452 for (cpu = 0; cpu < maxcpu; cpu++) { 453 if (CPU_ABSENT(cpu)) 454 continue; 455 CPU_LOCK(zone, cpu); 456 cache = &zone->uz_cpu[cpu]; 457 bucket_drain(zone, cache->uc_allocbucket); 458 bucket_drain(zone, cache->uc_freebucket); 459 } 460 461 /* 462 * Drain the bucket queues and free the buckets, we just keep two per 463 * cpu (alloc/free). 464 */ 465 ZONE_LOCK(zone); 466 while ((bucket = LIST_FIRST(&zone->uz_full_bucket)) != NULL) { 467 LIST_REMOVE(bucket, ub_link); 468 ZONE_UNLOCK(zone); 469 bucket_drain(zone, bucket); 470 uma_zfree_internal(bucketzone, bucket, NULL, 0); 471 ZONE_LOCK(zone); 472 } 473 474 /* Now we do the free queue.. */ 475 while ((bucket = LIST_FIRST(&zone->uz_free_bucket)) != NULL) { 476 LIST_REMOVE(bucket, ub_link); 477 uma_zfree_internal(bucketzone, bucket, NULL, 0); 478 } 479 480 /* We unlock here, but they will all block until the zone is unlocked */ 481 for (cpu = 0; cpu < maxcpu; cpu++) { 482 if (CPU_ABSENT(cpu)) 483 continue; 484 CPU_UNLOCK(zone, cpu); 485 } 486} 487 488/* 489 * Frees pages from a zone back to the system. This is done on demand from 490 * the pageout daemon. 491 * 492 * Arguments: 493 * zone The zone to free pages from 494 * 495 * Returns: 496 * Nothing. 497 */ 498static void 499zone_drain(uma_zone_t zone) 500{ 501 uma_slab_t slab; 502 uma_slab_t n; 503 u_int64_t extra; 504 u_int8_t flags; 505 u_int8_t *mem; 506 int i; 507 508 /* 509 * We don't want to take pages from staticly allocated zones at this 510 * time 511 */ 512 if (zone->uz_flags & UMA_ZFLAG_NOFREE || zone->uz_freef == NULL) 513 return; 514 515 ZONE_LOCK(zone); 516 517 if (!(zone->uz_flags & UMA_ZFLAG_INTERNAL)) 518 cache_drain(zone); 519 520 if (zone->uz_free < zone->uz_wssize) 521 goto finished; 522#ifdef UMA_DEBUG 523 printf("%s working set size: %llu free items: %u\n", 524 zone->uz_name, (unsigned long long)zone->uz_wssize, zone->uz_free); 525#endif 526 extra = zone->uz_wssize - zone->uz_free; 527 extra /= zone->uz_ipers; 528 529 /* extra is now the number of extra slabs that we can free */ 530 531 if (extra == 0) 532 goto finished; 533 534 slab = LIST_FIRST(&zone->uz_free_slab); 535 while (slab && extra) { 536 n = LIST_NEXT(slab, us_link); 537 538 /* We have no where to free these to */ 539 if (slab->us_flags & UMA_SLAB_BOOT) { 540 slab = n; 541 continue; 542 } 543 544 LIST_REMOVE(slab, us_link); 545 zone->uz_pages -= zone->uz_ppera; 546 zone->uz_free -= zone->uz_ipers; 547 if (zone->uz_fini) 548 for (i = 0; i < zone->uz_ipers; i++) 549 zone->uz_fini( 550 slab->us_data + (zone->uz_rsize * i), 551 zone->uz_size); 552 flags = slab->us_flags; 553 mem = slab->us_data; 554 if (zone->uz_flags & UMA_ZFLAG_OFFPAGE) { 555 if (zone->uz_flags & UMA_ZFLAG_MALLOC) { 556 UMA_HASH_REMOVE(mallochash, 557 slab, slab->us_data); 558 } else { 559 UMA_HASH_REMOVE(&zone->uz_hash, 560 slab, slab->us_data); 561 } 562 uma_zfree_internal(slabzone, slab, NULL, 0); 563 } else if (zone->uz_flags & UMA_ZFLAG_MALLOC) 564 UMA_HASH_REMOVE(mallochash, slab, slab->us_data); 565#ifdef UMA_DEBUG 566 printf("%s: Returning %d bytes.\n", 567 zone->uz_name, UMA_SLAB_SIZE * zone->uz_ppera); 568#endif 569 zone->uz_freef(mem, UMA_SLAB_SIZE * zone->uz_ppera, flags); 570 571 slab = n; 572 extra--; 573 } 574 575finished: 576 ZONE_UNLOCK(zone); 577} 578 579/* 580 * Allocate a new slab for a zone. This does not insert the slab onto a list. 581 * 582 * Arguments: 583 * zone The zone to allocate slabs for 584 * wait Shall we wait? 585 * 586 * Returns: 587 * The slab that was allocated or NULL if there is no memory and the 588 * caller specified M_NOWAIT. 589 * 590 */ 591static uma_slab_t 592slab_zalloc(uma_zone_t zone, int wait) 593{ 594 uma_slab_t slab; /* Starting slab */ 595 u_int8_t *mem; 596 u_int8_t flags; 597 int i; 598 599#ifdef UMA_DEBUG 600 printf("slab_zalloc: Allocating a new slab for %s\n", zone->uz_name); 601#endif
|
| 602 if (zone->uz_maxpages && 603 zone->uz_pages + zone->uz_ppera > zone->uz_maxpages) 604 return (NULL);
|
602 603 if (booted || (zone->uz_flags & UMA_ZFLAG_PRIVALLOC)) { 604 ZONE_UNLOCK(zone); 605 mtx_lock(&Giant); 606 slab = (uma_slab_t )zone->uz_allocf(zone, 607 zone->uz_ppera * UMA_SLAB_SIZE, &flags, wait); 608 mtx_unlock(&Giant); 609 ZONE_LOCK(zone); 610 if (slab != NULL) 611 slab->us_data = (u_int8_t *)slab; 612 else 613 return (NULL); 614 } else { 615 616 if (zone->uz_ppera > 1) 617 panic("UMA: Attemping to allocate multiple pages before vm has started.\n"); 618 if (zone->uz_flags & UMA_ZFLAG_MALLOC) 619 panic("Mallocing before uma_startup2 has been called.\n"); 620 if (uma_boot_free == 0) 621 panic("UMA: Ran out of pre init pages, increase UMA_BOOT_PAGES\n"); 622 slab = LIST_FIRST(&uma_boot_pages); 623 LIST_REMOVE(slab, us_link); 624 uma_boot_free--; 625 } 626 627 mem = slab->us_data; 628 629 /* Alloc slab structure for offpage, otherwise adjust it's position */ 630 if (!(zone->uz_flags & UMA_ZFLAG_OFFPAGE)) { 631 slab = (uma_slab_t )(mem + zone->uz_pgoff); 632 } else { 633 slab = uma_zalloc_internal(slabzone, NULL, wait, NULL, -1); 634 if (slab == NULL) /* XXX This should go away */ 635 panic("UMA: No free slab structures"); 636 if (!(zone->uz_flags & UMA_ZFLAG_MALLOC)) 637 UMA_HASH_INSERT(&zone->uz_hash, slab, mem); 638 } 639 if (zone->uz_flags & UMA_ZFLAG_MALLOC) { 640#ifdef UMA_DEBUG 641 printf("Inserting %p into malloc hash from slab %p\n", 642 mem, slab); 643#endif 644 UMA_HASH_INSERT(mallochash, slab, mem); 645 } 646 647 slab->us_zone = zone; 648 slab->us_data = mem; 649 650 /* 651 * This is intended to spread data out across cache lines. 652 * 653 * This code doesn't seem to work properly on x86, and on alpha 654 * it makes absolutely no performance difference. I'm sure it could 655 * use some tuning, but sun makes outrageous claims about it's 656 * performance. 657 */ 658#if 0 659 if (zone->uz_cachemax) { 660 slab->us_data += zone->uz_cacheoff; 661 zone->uz_cacheoff += UMA_CACHE_INC; 662 if (zone->uz_cacheoff > zone->uz_cachemax) 663 zone->uz_cacheoff = 0; 664 } 665#endif 666 667 slab->us_freecount = zone->uz_ipers; 668 slab->us_firstfree = 0; 669 slab->us_flags = flags; 670 for (i = 0; i < zone->uz_ipers; i++) 671 slab->us_freelist[i] = i+1; 672 673 if (zone->uz_init) 674 for (i = 0; i < zone->uz_ipers; i++) 675 zone->uz_init(slab->us_data + (zone->uz_rsize * i), 676 zone->uz_size); 677 678 zone->uz_pages += zone->uz_ppera; 679 zone->uz_free += zone->uz_ipers; 680 681 return (slab); 682} 683 684/* 685 * Allocates a number of pages from the system 686 * 687 * Arguments: 688 * zone Unused 689 * bytes The number of bytes requested 690 * wait Shall we wait? 691 * 692 * Returns: 693 * A pointer to the alloced memory or possibly 694 * NULL if M_NOWAIT is set. 695 */ 696static void * 697page_alloc(uma_zone_t zone, int bytes, u_int8_t *pflag, int wait) 698{ 699 void *p; /* Returned page */ 700 701 /* 702 * XXX The original zone allocator did this, but I don't think it's 703 * neccisary in current. 704 */ 705 706 if (lockstatus(&kernel_map->lock, NULL)) { 707 *pflag = UMA_SLAB_KMEM; 708 p = (void *) kmem_malloc(kmem_map, bytes, wait); 709 } else { 710 *pflag = UMA_SLAB_KMAP; 711 p = (void *) kmem_alloc(kernel_map, bytes); 712 } 713 714 return (p); 715} 716 717/* 718 * Allocates a number of pages from within an object 719 * 720 * Arguments: 721 * zone Unused 722 * bytes The number of bytes requested 723 * wait Shall we wait? 724 * 725 * Returns: 726 * A pointer to the alloced memory or possibly 727 * NULL if M_NOWAIT is set. 728 */ 729static void * 730obj_alloc(uma_zone_t zone, int bytes, u_int8_t *flags, int wait) 731{ 732 vm_offset_t zkva; 733 vm_offset_t retkva; 734 vm_page_t p; 735 int pages; 736
| 605 606 if (booted || (zone->uz_flags & UMA_ZFLAG_PRIVALLOC)) { 607 ZONE_UNLOCK(zone); 608 mtx_lock(&Giant); 609 slab = (uma_slab_t )zone->uz_allocf(zone, 610 zone->uz_ppera * UMA_SLAB_SIZE, &flags, wait); 611 mtx_unlock(&Giant); 612 ZONE_LOCK(zone); 613 if (slab != NULL) 614 slab->us_data = (u_int8_t *)slab; 615 else 616 return (NULL); 617 } else { 618 619 if (zone->uz_ppera > 1) 620 panic("UMA: Attemping to allocate multiple pages before vm has started.\n"); 621 if (zone->uz_flags & UMA_ZFLAG_MALLOC) 622 panic("Mallocing before uma_startup2 has been called.\n"); 623 if (uma_boot_free == 0) 624 panic("UMA: Ran out of pre init pages, increase UMA_BOOT_PAGES\n"); 625 slab = LIST_FIRST(&uma_boot_pages); 626 LIST_REMOVE(slab, us_link); 627 uma_boot_free--; 628 } 629 630 mem = slab->us_data; 631 632 /* Alloc slab structure for offpage, otherwise adjust it's position */ 633 if (!(zone->uz_flags & UMA_ZFLAG_OFFPAGE)) { 634 slab = (uma_slab_t )(mem + zone->uz_pgoff); 635 } else { 636 slab = uma_zalloc_internal(slabzone, NULL, wait, NULL, -1); 637 if (slab == NULL) /* XXX This should go away */ 638 panic("UMA: No free slab structures"); 639 if (!(zone->uz_flags & UMA_ZFLAG_MALLOC)) 640 UMA_HASH_INSERT(&zone->uz_hash, slab, mem); 641 } 642 if (zone->uz_flags & UMA_ZFLAG_MALLOC) { 643#ifdef UMA_DEBUG 644 printf("Inserting %p into malloc hash from slab %p\n", 645 mem, slab); 646#endif 647 UMA_HASH_INSERT(mallochash, slab, mem); 648 } 649 650 slab->us_zone = zone; 651 slab->us_data = mem; 652 653 /* 654 * This is intended to spread data out across cache lines. 655 * 656 * This code doesn't seem to work properly on x86, and on alpha 657 * it makes absolutely no performance difference. I'm sure it could 658 * use some tuning, but sun makes outrageous claims about it's 659 * performance. 660 */ 661#if 0 662 if (zone->uz_cachemax) { 663 slab->us_data += zone->uz_cacheoff; 664 zone->uz_cacheoff += UMA_CACHE_INC; 665 if (zone->uz_cacheoff > zone->uz_cachemax) 666 zone->uz_cacheoff = 0; 667 } 668#endif 669 670 slab->us_freecount = zone->uz_ipers; 671 slab->us_firstfree = 0; 672 slab->us_flags = flags; 673 for (i = 0; i < zone->uz_ipers; i++) 674 slab->us_freelist[i] = i+1; 675 676 if (zone->uz_init) 677 for (i = 0; i < zone->uz_ipers; i++) 678 zone->uz_init(slab->us_data + (zone->uz_rsize * i), 679 zone->uz_size); 680 681 zone->uz_pages += zone->uz_ppera; 682 zone->uz_free += zone->uz_ipers; 683 684 return (slab); 685} 686 687/* 688 * Allocates a number of pages from the system 689 * 690 * Arguments: 691 * zone Unused 692 * bytes The number of bytes requested 693 * wait Shall we wait? 694 * 695 * Returns: 696 * A pointer to the alloced memory or possibly 697 * NULL if M_NOWAIT is set. 698 */ 699static void * 700page_alloc(uma_zone_t zone, int bytes, u_int8_t *pflag, int wait) 701{ 702 void *p; /* Returned page */ 703 704 /* 705 * XXX The original zone allocator did this, but I don't think it's 706 * neccisary in current. 707 */ 708 709 if (lockstatus(&kernel_map->lock, NULL)) { 710 *pflag = UMA_SLAB_KMEM; 711 p = (void *) kmem_malloc(kmem_map, bytes, wait); 712 } else { 713 *pflag = UMA_SLAB_KMAP; 714 p = (void *) kmem_alloc(kernel_map, bytes); 715 } 716 717 return (p); 718} 719 720/* 721 * Allocates a number of pages from within an object 722 * 723 * Arguments: 724 * zone Unused 725 * bytes The number of bytes requested 726 * wait Shall we wait? 727 * 728 * Returns: 729 * A pointer to the alloced memory or possibly 730 * NULL if M_NOWAIT is set. 731 */ 732static void * 733obj_alloc(uma_zone_t zone, int bytes, u_int8_t *flags, int wait) 734{ 735 vm_offset_t zkva; 736 vm_offset_t retkva; 737 vm_page_t p; 738 int pages; 739
|
737 738 if (zone->uz_pages + zone->uz_ppera > zone->uz_maxpages) 739 return (NULL); 740
| |
741 retkva = NULL; 742 pages = zone->uz_pages; 743 744 /* 745 * This looks a little weird since we're getting one page at a time 746 */ 747 while (bytes > 0) { 748 p = vm_page_alloc(zone->uz_obj, pages, 749 VM_ALLOC_INTERRUPT); 750 if (p == NULL) 751 return (NULL); 752 753 zkva = zone->uz_kva + pages * PAGE_SIZE; 754 if (retkva == NULL) 755 retkva = zkva; 756 pmap_qenter(zkva, &p, 1); 757 bytes -= PAGE_SIZE; 758 pages += 1; 759 } 760 761 *flags = UMA_SLAB_PRIV; 762 763 return ((void *)retkva); 764} 765 766/* 767 * Frees a number of pages to the system 768 * 769 * Arguments: 770 * mem A pointer to the memory to be freed 771 * size The size of the memory being freed 772 * flags The original p->us_flags field 773 * 774 * Returns: 775 * Nothing 776 * 777 */ 778static void 779page_free(void *mem, int size, u_int8_t flags) 780{ 781 vm_map_t map; 782 if (flags & UMA_SLAB_KMEM) 783 map = kmem_map; 784 else if (flags & UMA_SLAB_KMAP) 785 map = kernel_map; 786 else 787 panic("UMA: page_free used with invalid flags %d\n", flags); 788 789 kmem_free(map, (vm_offset_t)mem, size); 790} 791 792/* 793 * Zero fill initializer 794 * 795 * Arguments/Returns follow uma_init specifications 796 * 797 */ 798static void 799zero_init(void *mem, int size) 800{ 801 bzero(mem, size); 802} 803 804/* 805 * Finish creating a small uma zone. This calculates ipers, and the zone size. 806 * 807 * Arguments 808 * zone The zone we should initialize 809 * 810 * Returns 811 * Nothing 812 */ 813static void 814zone_small_init(uma_zone_t zone) 815{ 816 int rsize; 817 int memused; 818 int ipers; 819 820 rsize = zone->uz_size; 821 822 if (rsize < UMA_SMALLEST_UNIT) 823 rsize = UMA_SMALLEST_UNIT; 824 825 if (rsize & zone->uz_align) 826 rsize = (rsize & ~zone->uz_align) + (zone->uz_align + 1); 827 828 zone->uz_rsize = rsize; 829 830 rsize += 1; /* Account for the byte of linkage */ 831 zone->uz_ipers = (UMA_SLAB_SIZE - sizeof(struct uma_slab)) / rsize; 832 zone->uz_ppera = 1; 833 834 memused = zone->uz_ipers * zone->uz_rsize; 835 836 /* Can we do any better? */ 837 if ((UMA_SLAB_SIZE - memused) >= UMA_MAX_WASTE) { 838 if (zone->uz_flags & UMA_ZFLAG_INTERNAL) 839 return; 840 ipers = UMA_SLAB_SIZE / zone->uz_rsize; 841 if (ipers > zone->uz_ipers) { 842 zone->uz_flags |= UMA_ZFLAG_OFFPAGE; 843 zone->uz_ipers = ipers; 844 } 845 } 846 847} 848 849/* 850 * Finish creating a large (> UMA_SLAB_SIZE) uma zone. Just give in and do 851 * OFFPAGE for now. When I can allow for more dynamic slab sizes this will be 852 * more complicated. 853 * 854 * Arguments 855 * zone The zone we should initialize 856 * 857 * Returns 858 * Nothing 859 */ 860static void 861zone_large_init(uma_zone_t zone) 862{ 863 int pages; 864 865 pages = zone->uz_size / UMA_SLAB_SIZE; 866 867 /* Account for remainder */ 868 if ((pages * UMA_SLAB_SIZE) < zone->uz_size) 869 pages++; 870 871 zone->uz_ppera = pages; 872 zone->uz_ipers = 1; 873 874 zone->uz_flags |= UMA_ZFLAG_OFFPAGE; 875 zone->uz_rsize = zone->uz_size; 876} 877 878/* 879 * Zone header ctor. This initializes all fields, locks, etc. And inserts 880 * the zone onto the global zone list. 881 * 882 * Arguments/Returns follow uma_ctor specifications 883 * udata Actually uma_zcreat_args 884 * 885 */ 886 887static void 888zone_ctor(void *mem, int size, void *udata) 889{ 890 struct uma_zctor_args *arg = udata; 891 uma_zone_t zone = mem; 892 int cplen; 893 int cpu; 894 895 bzero(zone, size); 896 zone->uz_name = arg->name; 897 zone->uz_size = arg->size; 898 zone->uz_ctor = arg->ctor; 899 zone->uz_dtor = arg->dtor; 900 zone->uz_init = arg->uminit; 901 zone->uz_align = arg->align; 902 zone->uz_free = 0; 903 zone->uz_pages = 0; 904 zone->uz_flags = 0; 905 zone->uz_allocf = page_alloc; 906 zone->uz_freef = page_free; 907 908 if (arg->flags & UMA_ZONE_ZINIT) 909 zone->uz_init = zero_init; 910 911 if (arg->flags & UMA_ZONE_INTERNAL) 912 zone->uz_flags |= UMA_ZFLAG_INTERNAL; 913 914 if (arg->flags & UMA_ZONE_MALLOC) 915 zone->uz_flags |= UMA_ZFLAG_MALLOC; 916 917 if (arg->flags & UMA_ZONE_NOFREE) 918 zone->uz_flags |= UMA_ZFLAG_NOFREE; 919 920 if (zone->uz_size > UMA_SLAB_SIZE) 921 zone_large_init(zone); 922 else 923 zone_small_init(zone); 924 925 /* We do this so that the per cpu lock name is unique for each zone */ 926 memcpy(zone->uz_lname, "PCPU ", 5); 927 cplen = min(strlen(zone->uz_name) + 1, LOCKNAME_LEN - 6); 928 memcpy(zone->uz_lname+5, zone->uz_name, cplen); 929 zone->uz_lname[LOCKNAME_LEN - 1] = '\0'; 930 931 /* 932 * If we're putting the slab header in the actual page we need to 933 * figure out where in each page it goes. This calculates a right 934 * justified offset into the memory on a ALIGN_PTR boundary. 935 */ 936 if (!(zone->uz_flags & UMA_ZFLAG_OFFPAGE)) { 937 int totsize; 938 int waste; 939 940 /* Size of the slab struct and free list */ 941 totsize = sizeof(struct uma_slab) + zone->uz_ipers; 942 if (totsize & UMA_ALIGN_PTR) 943 totsize = (totsize & ~UMA_ALIGN_PTR) + 944 (UMA_ALIGN_PTR + 1); 945 zone->uz_pgoff = UMA_SLAB_SIZE - totsize; 946 947 waste = zone->uz_pgoff; 948 waste -= (zone->uz_ipers * zone->uz_rsize); 949 950 /* 951 * This calculates how much space we have for cache line size 952 * optimizations. It works by offseting each slab slightly. 953 * Currently it breaks on x86, and so it is disabled. 954 */ 955 956 if (zone->uz_align < UMA_CACHE_INC && waste > UMA_CACHE_INC) { 957 zone->uz_cachemax = waste - UMA_CACHE_INC; 958 zone->uz_cacheoff = 0; 959 } 960 961 totsize = zone->uz_pgoff + sizeof(struct uma_slab) 962 + zone->uz_ipers; 963 /* I don't think it's possible, but I'll make sure anyway */ 964 if (totsize > UMA_SLAB_SIZE) { 965 printf("zone %s ipers %d rsize %d size %d\n", 966 zone->uz_name, zone->uz_ipers, zone->uz_rsize, 967 zone->uz_size); 968 panic("UMA slab won't fit.\n"); 969 } 970 } else { 971 /* hash_expand here to allocate the initial hash table */ 972 hash_expand(&zone->uz_hash); 973 zone->uz_pgoff = 0; 974 } 975 976#ifdef UMA_DEBUG 977 printf("%s(%p) size = %d ipers = %d ppera = %d pgoff = %d\n", 978 zone->uz_name, zone, 979 zone->uz_size, zone->uz_ipers, 980 zone->uz_ppera, zone->uz_pgoff); 981#endif 982 ZONE_LOCK_INIT(zone); 983 984 mtx_lock(&uma_mtx); 985 LIST_INSERT_HEAD(&uma_zones, zone, uz_link); 986 mtx_unlock(&uma_mtx); 987 988 /* 989 * Some internal zones don't have room allocated for the per cpu 990 * caches. If we're internal, bail out here. 991 */ 992 993 if (zone->uz_flags & UMA_ZFLAG_INTERNAL) 994 return; 995 996 for (cpu = 0; cpu < maxcpu; cpu++) { 997 if (zone->uz_ipers < UMA_BUCKET_SIZE) 998 zone->uz_cpu[cpu].uc_count = zone->uz_ipers - 1; 999 else 1000 zone->uz_cpu[cpu].uc_count = UMA_BUCKET_SIZE - 1; 1001 CPU_LOCK_INIT(zone, cpu); 1002 } 1003} 1004 1005/* 1006 * Traverses every zone in the system and calls a callback 1007 * 1008 * Arguments: 1009 * zfunc A pointer to a function which accepts a zone 1010 * as an argument. 1011 * 1012 * Returns: 1013 * Nothing 1014 */ 1015static void 1016zone_foreach(void (*zfunc)(uma_zone_t)) 1017{ 1018 uma_zone_t zone; 1019 1020 mtx_lock(&uma_mtx); 1021 LIST_FOREACH(zone, &uma_zones, uz_link) { 1022 zfunc(zone); 1023 } 1024 mtx_unlock(&uma_mtx); 1025} 1026 1027/* Public functions */ 1028/* See uma.h */ 1029void 1030uma_startup(void *bootmem) 1031{ 1032 struct uma_zctor_args args; 1033 uma_slab_t slab; 1034 int slabsize; 1035 int i; 1036 1037#ifdef UMA_DEBUG 1038 printf("Creating uma zone headers zone.\n"); 1039#endif 1040#ifdef SMP 1041 maxcpu = mp_maxid + 1; 1042#else 1043 maxcpu = 1; 1044#endif 1045#ifdef UMA_DEBUG 1046 printf("Max cpu = %d, mp_maxid = %d\n", maxcpu, mp_maxid); 1047 Debugger("stop"); 1048#endif 1049 mtx_init(&uma_mtx, "UMA lock", MTX_DEF); 1050 /* "manually" Create the initial zone */ 1051 args.name = "UMA Zones"; 1052 args.size = sizeof(struct uma_zone) + 1053 (sizeof(struct uma_cache) * (maxcpu - 1)); 1054 args.ctor = zone_ctor; 1055 args.dtor = NULL; 1056 args.uminit = zero_init; 1057 args.fini = NULL; 1058 args.align = 32 - 1; 1059 args.flags = UMA_ZONE_INTERNAL; 1060 /* The initial zone has no Per cpu queues so it's smaller */ 1061 zone_ctor(zones, sizeof(struct uma_zone), &args); 1062 1063#ifdef UMA_DEBUG 1064 printf("Filling boot free list.\n"); 1065#endif 1066 for (i = 0; i < UMA_BOOT_PAGES; i++) { 1067 slab = (uma_slab_t)((u_int8_t *)bootmem + (i * UMA_SLAB_SIZE)); 1068 slab->us_data = (u_int8_t *)slab; 1069 slab->us_flags = UMA_SLAB_BOOT; 1070 LIST_INSERT_HEAD(&uma_boot_pages, slab, us_link); 1071 uma_boot_free++; 1072 } 1073 1074#ifdef UMA_DEBUG 1075 printf("Creating slab zone.\n"); 1076#endif 1077 1078 /* 1079 * This is the max number of free list items we'll have with 1080 * offpage slabs. 1081 */ 1082 1083 slabsize = UMA_SLAB_SIZE - sizeof(struct uma_slab); 1084 slabsize /= UMA_MAX_WASTE; 1085 slabsize++; /* In case there it's rounded */ 1086 slabsize += sizeof(struct uma_slab); 1087 1088 /* Now make a zone for slab headers */ 1089 slabzone = uma_zcreate("UMA Slabs", 1090 slabsize, 1091 NULL, NULL, NULL, NULL, 1092 UMA_ALIGN_PTR, UMA_ZONE_INTERNAL); 1093 1094 hashzone = uma_zcreate("UMA Hash", 1095 sizeof(struct slabhead *) * UMA_HASH_SIZE_INIT, 1096 NULL, NULL, NULL, NULL, 1097 UMA_ALIGN_PTR, UMA_ZONE_INTERNAL); 1098 1099 bucketzone = uma_zcreate("UMA Buckets", sizeof(struct uma_bucket), 1100 NULL, NULL, NULL, NULL, 1101 UMA_ALIGN_PTR, UMA_ZONE_INTERNAL); 1102 1103 1104#ifdef UMA_DEBUG 1105 printf("UMA startup complete.\n"); 1106#endif 1107} 1108 1109/* see uma.h */ 1110void 1111uma_startup2(void *hashmem, u_long elems) 1112{ 1113 bzero(hashmem, elems * sizeof(void *)); 1114 mallochash->uh_slab_hash = hashmem; 1115 mallochash->uh_hashsize = elems; 1116 mallochash->uh_hashmask = elems - 1; 1117 booted = 1; 1118#ifdef UMA_DEBUG 1119 printf("UMA startup2 complete.\n"); 1120#endif 1121} 1122 1123/* 1124 * Initialize our callout handle 1125 * 1126 */ 1127 1128static void 1129uma_startup3(void) 1130{ 1131#ifdef UMA_DEBUG 1132 printf("Starting callout.\n"); 1133#endif 1134 /* We'll be mpsafe once the vm is locked. */ 1135 callout_init(&uma_callout, 0); 1136 callout_reset(&uma_callout, UMA_WORKING_TIME * hz, uma_timeout, NULL); 1137#ifdef UMA_DEBUG 1138 printf("UMA startup3 complete.\n"); 1139#endif 1140} 1141 1142/* See uma.h */ 1143uma_zone_t 1144uma_zcreate(char *name, int size, uma_ctor ctor, uma_dtor dtor, uma_init uminit, 1145 uma_fini fini, int align, u_int16_t flags) 1146 1147{ 1148 struct uma_zctor_args args; 1149 1150 /* This stuff is essential for the zone ctor */ 1151 args.name = name; 1152 args.size = size; 1153 args.ctor = ctor; 1154 args.dtor = dtor; 1155 args.uminit = uminit; 1156 args.fini = fini; 1157 args.align = align; 1158 args.flags = flags; 1159 1160 return (uma_zalloc_internal(zones, &args, M_WAITOK, NULL, -1)); 1161} 1162 1163/* See uma.h */ 1164void * 1165uma_zalloc_arg(uma_zone_t zone, void *udata, int wait) 1166{ 1167 void *item; 1168 uma_cache_t cache; 1169 uma_bucket_t bucket; 1170 int isitem; 1171 int cpu; 1172 1173 /* This is the fast path allocation */ 1174#ifdef UMA_DEBUG_ALLOC_1 1175 printf("Allocating one item from %s(%p)\n", zone->uz_name, zone); 1176#endif 1177 cpu = PCPU_GET(cpuid); 1178 CPU_LOCK(zone, cpu); 1179 cache = &zone->uz_cpu[cpu]; 1180 cache->uc_allocs++; 1181 1182zalloc_start: 1183 bucket = cache->uc_allocbucket; 1184 1185 if (bucket) { 1186 if (bucket->ub_ptr > -1) { 1187 item = bucket->ub_bucket[bucket->ub_ptr]; 1188#ifdef INVARIANTS 1189 bucket->ub_bucket[bucket->ub_ptr] = NULL; 1190#endif 1191 bucket->ub_ptr--; 1192 KASSERT(item != NULL, 1193 ("uma_zalloc: Bucket pointer mangled.")); 1194 cache->uc_allocs++; 1195 CPU_UNLOCK(zone, cpu); 1196 if (zone->uz_ctor) 1197 zone->uz_ctor(item, zone->uz_size, udata); 1198 return (item); 1199 } else if (cache->uc_freebucket) { 1200 /* 1201 * We have run out of items in our allocbucket. 1202 * See if we can switch with our free bucket. 1203 */ 1204 if (cache->uc_freebucket->ub_ptr > -1) { 1205 uma_bucket_t swap; 1206 1207#ifdef UMA_DEBUG_ALLOC 1208 printf("uma_zalloc: Swapping empty with alloc.\n"); 1209#endif 1210 swap = cache->uc_freebucket; 1211 cache->uc_freebucket = cache->uc_allocbucket; 1212 cache->uc_allocbucket = swap; 1213 1214 goto zalloc_start; 1215 } 1216 } 1217 } 1218 /* 1219 * We can get here for three reasons: 1220 * 1221 * 1) The buckets are NULL 1222 * 2) The zone is INTERNAL, and so it has no buckets. 1223 * 3) The alloc and free buckets are both empty. 1224 * 1225 * Just handoff to uma_zalloc_internal to do the hard stuff 1226 * 1227 */ 1228#ifdef UMA_DEBUG_ALLOC 1229 printf("uma_zalloc: Falling back to zalloc_internal.\n"); 1230#endif 1231 1232 item = uma_zalloc_internal(zone, udata, wait, &isitem, cpu); 1233 1234#ifdef UMA_DEBUG 1235 printf("uma_zalloc: zalloc_internal completed.\n"); 1236#endif 1237 1238 if (item && isitem == 0) 1239 goto zalloc_start; 1240 1241 /* 1242 * If isitem is set then we should just return it. The cpu lock 1243 * was unlocked when we couldn't get a bucket. 1244 */
| 740 retkva = NULL; 741 pages = zone->uz_pages; 742 743 /* 744 * This looks a little weird since we're getting one page at a time 745 */ 746 while (bytes > 0) { 747 p = vm_page_alloc(zone->uz_obj, pages, 748 VM_ALLOC_INTERRUPT); 749 if (p == NULL) 750 return (NULL); 751 752 zkva = zone->uz_kva + pages * PAGE_SIZE; 753 if (retkva == NULL) 754 retkva = zkva; 755 pmap_qenter(zkva, &p, 1); 756 bytes -= PAGE_SIZE; 757 pages += 1; 758 } 759 760 *flags = UMA_SLAB_PRIV; 761 762 return ((void *)retkva); 763} 764 765/* 766 * Frees a number of pages to the system 767 * 768 * Arguments: 769 * mem A pointer to the memory to be freed 770 * size The size of the memory being freed 771 * flags The original p->us_flags field 772 * 773 * Returns: 774 * Nothing 775 * 776 */ 777static void 778page_free(void *mem, int size, u_int8_t flags) 779{ 780 vm_map_t map; 781 if (flags & UMA_SLAB_KMEM) 782 map = kmem_map; 783 else if (flags & UMA_SLAB_KMAP) 784 map = kernel_map; 785 else 786 panic("UMA: page_free used with invalid flags %d\n", flags); 787 788 kmem_free(map, (vm_offset_t)mem, size); 789} 790 791/* 792 * Zero fill initializer 793 * 794 * Arguments/Returns follow uma_init specifications 795 * 796 */ 797static void 798zero_init(void *mem, int size) 799{ 800 bzero(mem, size); 801} 802 803/* 804 * Finish creating a small uma zone. This calculates ipers, and the zone size. 805 * 806 * Arguments 807 * zone The zone we should initialize 808 * 809 * Returns 810 * Nothing 811 */ 812static void 813zone_small_init(uma_zone_t zone) 814{ 815 int rsize; 816 int memused; 817 int ipers; 818 819 rsize = zone->uz_size; 820 821 if (rsize < UMA_SMALLEST_UNIT) 822 rsize = UMA_SMALLEST_UNIT; 823 824 if (rsize & zone->uz_align) 825 rsize = (rsize & ~zone->uz_align) + (zone->uz_align + 1); 826 827 zone->uz_rsize = rsize; 828 829 rsize += 1; /* Account for the byte of linkage */ 830 zone->uz_ipers = (UMA_SLAB_SIZE - sizeof(struct uma_slab)) / rsize; 831 zone->uz_ppera = 1; 832 833 memused = zone->uz_ipers * zone->uz_rsize; 834 835 /* Can we do any better? */ 836 if ((UMA_SLAB_SIZE - memused) >= UMA_MAX_WASTE) { 837 if (zone->uz_flags & UMA_ZFLAG_INTERNAL) 838 return; 839 ipers = UMA_SLAB_SIZE / zone->uz_rsize; 840 if (ipers > zone->uz_ipers) { 841 zone->uz_flags |= UMA_ZFLAG_OFFPAGE; 842 zone->uz_ipers = ipers; 843 } 844 } 845 846} 847 848/* 849 * Finish creating a large (> UMA_SLAB_SIZE) uma zone. Just give in and do 850 * OFFPAGE for now. When I can allow for more dynamic slab sizes this will be 851 * more complicated. 852 * 853 * Arguments 854 * zone The zone we should initialize 855 * 856 * Returns 857 * Nothing 858 */ 859static void 860zone_large_init(uma_zone_t zone) 861{ 862 int pages; 863 864 pages = zone->uz_size / UMA_SLAB_SIZE; 865 866 /* Account for remainder */ 867 if ((pages * UMA_SLAB_SIZE) < zone->uz_size) 868 pages++; 869 870 zone->uz_ppera = pages; 871 zone->uz_ipers = 1; 872 873 zone->uz_flags |= UMA_ZFLAG_OFFPAGE; 874 zone->uz_rsize = zone->uz_size; 875} 876 877/* 878 * Zone header ctor. This initializes all fields, locks, etc. And inserts 879 * the zone onto the global zone list. 880 * 881 * Arguments/Returns follow uma_ctor specifications 882 * udata Actually uma_zcreat_args 883 * 884 */ 885 886static void 887zone_ctor(void *mem, int size, void *udata) 888{ 889 struct uma_zctor_args *arg = udata; 890 uma_zone_t zone = mem; 891 int cplen; 892 int cpu; 893 894 bzero(zone, size); 895 zone->uz_name = arg->name; 896 zone->uz_size = arg->size; 897 zone->uz_ctor = arg->ctor; 898 zone->uz_dtor = arg->dtor; 899 zone->uz_init = arg->uminit; 900 zone->uz_align = arg->align; 901 zone->uz_free = 0; 902 zone->uz_pages = 0; 903 zone->uz_flags = 0; 904 zone->uz_allocf = page_alloc; 905 zone->uz_freef = page_free; 906 907 if (arg->flags & UMA_ZONE_ZINIT) 908 zone->uz_init = zero_init; 909 910 if (arg->flags & UMA_ZONE_INTERNAL) 911 zone->uz_flags |= UMA_ZFLAG_INTERNAL; 912 913 if (arg->flags & UMA_ZONE_MALLOC) 914 zone->uz_flags |= UMA_ZFLAG_MALLOC; 915 916 if (arg->flags & UMA_ZONE_NOFREE) 917 zone->uz_flags |= UMA_ZFLAG_NOFREE; 918 919 if (zone->uz_size > UMA_SLAB_SIZE) 920 zone_large_init(zone); 921 else 922 zone_small_init(zone); 923 924 /* We do this so that the per cpu lock name is unique for each zone */ 925 memcpy(zone->uz_lname, "PCPU ", 5); 926 cplen = min(strlen(zone->uz_name) + 1, LOCKNAME_LEN - 6); 927 memcpy(zone->uz_lname+5, zone->uz_name, cplen); 928 zone->uz_lname[LOCKNAME_LEN - 1] = '\0'; 929 930 /* 931 * If we're putting the slab header in the actual page we need to 932 * figure out where in each page it goes. This calculates a right 933 * justified offset into the memory on a ALIGN_PTR boundary. 934 */ 935 if (!(zone->uz_flags & UMA_ZFLAG_OFFPAGE)) { 936 int totsize; 937 int waste; 938 939 /* Size of the slab struct and free list */ 940 totsize = sizeof(struct uma_slab) + zone->uz_ipers; 941 if (totsize & UMA_ALIGN_PTR) 942 totsize = (totsize & ~UMA_ALIGN_PTR) + 943 (UMA_ALIGN_PTR + 1); 944 zone->uz_pgoff = UMA_SLAB_SIZE - totsize; 945 946 waste = zone->uz_pgoff; 947 waste -= (zone->uz_ipers * zone->uz_rsize); 948 949 /* 950 * This calculates how much space we have for cache line size 951 * optimizations. It works by offseting each slab slightly. 952 * Currently it breaks on x86, and so it is disabled. 953 */ 954 955 if (zone->uz_align < UMA_CACHE_INC && waste > UMA_CACHE_INC) { 956 zone->uz_cachemax = waste - UMA_CACHE_INC; 957 zone->uz_cacheoff = 0; 958 } 959 960 totsize = zone->uz_pgoff + sizeof(struct uma_slab) 961 + zone->uz_ipers; 962 /* I don't think it's possible, but I'll make sure anyway */ 963 if (totsize > UMA_SLAB_SIZE) { 964 printf("zone %s ipers %d rsize %d size %d\n", 965 zone->uz_name, zone->uz_ipers, zone->uz_rsize, 966 zone->uz_size); 967 panic("UMA slab won't fit.\n"); 968 } 969 } else { 970 /* hash_expand here to allocate the initial hash table */ 971 hash_expand(&zone->uz_hash); 972 zone->uz_pgoff = 0; 973 } 974 975#ifdef UMA_DEBUG 976 printf("%s(%p) size = %d ipers = %d ppera = %d pgoff = %d\n", 977 zone->uz_name, zone, 978 zone->uz_size, zone->uz_ipers, 979 zone->uz_ppera, zone->uz_pgoff); 980#endif 981 ZONE_LOCK_INIT(zone); 982 983 mtx_lock(&uma_mtx); 984 LIST_INSERT_HEAD(&uma_zones, zone, uz_link); 985 mtx_unlock(&uma_mtx); 986 987 /* 988 * Some internal zones don't have room allocated for the per cpu 989 * caches. If we're internal, bail out here. 990 */ 991 992 if (zone->uz_flags & UMA_ZFLAG_INTERNAL) 993 return; 994 995 for (cpu = 0; cpu < maxcpu; cpu++) { 996 if (zone->uz_ipers < UMA_BUCKET_SIZE) 997 zone->uz_cpu[cpu].uc_count = zone->uz_ipers - 1; 998 else 999 zone->uz_cpu[cpu].uc_count = UMA_BUCKET_SIZE - 1; 1000 CPU_LOCK_INIT(zone, cpu); 1001 } 1002} 1003 1004/* 1005 * Traverses every zone in the system and calls a callback 1006 * 1007 * Arguments: 1008 * zfunc A pointer to a function which accepts a zone 1009 * as an argument. 1010 * 1011 * Returns: 1012 * Nothing 1013 */ 1014static void 1015zone_foreach(void (*zfunc)(uma_zone_t)) 1016{ 1017 uma_zone_t zone; 1018 1019 mtx_lock(&uma_mtx); 1020 LIST_FOREACH(zone, &uma_zones, uz_link) { 1021 zfunc(zone); 1022 } 1023 mtx_unlock(&uma_mtx); 1024} 1025 1026/* Public functions */ 1027/* See uma.h */ 1028void 1029uma_startup(void *bootmem) 1030{ 1031 struct uma_zctor_args args; 1032 uma_slab_t slab; 1033 int slabsize; 1034 int i; 1035 1036#ifdef UMA_DEBUG 1037 printf("Creating uma zone headers zone.\n"); 1038#endif 1039#ifdef SMP 1040 maxcpu = mp_maxid + 1; 1041#else 1042 maxcpu = 1; 1043#endif 1044#ifdef UMA_DEBUG 1045 printf("Max cpu = %d, mp_maxid = %d\n", maxcpu, mp_maxid); 1046 Debugger("stop"); 1047#endif 1048 mtx_init(&uma_mtx, "UMA lock", MTX_DEF); 1049 /* "manually" Create the initial zone */ 1050 args.name = "UMA Zones"; 1051 args.size = sizeof(struct uma_zone) + 1052 (sizeof(struct uma_cache) * (maxcpu - 1)); 1053 args.ctor = zone_ctor; 1054 args.dtor = NULL; 1055 args.uminit = zero_init; 1056 args.fini = NULL; 1057 args.align = 32 - 1; 1058 args.flags = UMA_ZONE_INTERNAL; 1059 /* The initial zone has no Per cpu queues so it's smaller */ 1060 zone_ctor(zones, sizeof(struct uma_zone), &args); 1061 1062#ifdef UMA_DEBUG 1063 printf("Filling boot free list.\n"); 1064#endif 1065 for (i = 0; i < UMA_BOOT_PAGES; i++) { 1066 slab = (uma_slab_t)((u_int8_t *)bootmem + (i * UMA_SLAB_SIZE)); 1067 slab->us_data = (u_int8_t *)slab; 1068 slab->us_flags = UMA_SLAB_BOOT; 1069 LIST_INSERT_HEAD(&uma_boot_pages, slab, us_link); 1070 uma_boot_free++; 1071 } 1072 1073#ifdef UMA_DEBUG 1074 printf("Creating slab zone.\n"); 1075#endif 1076 1077 /* 1078 * This is the max number of free list items we'll have with 1079 * offpage slabs. 1080 */ 1081 1082 slabsize = UMA_SLAB_SIZE - sizeof(struct uma_slab); 1083 slabsize /= UMA_MAX_WASTE; 1084 slabsize++; /* In case there it's rounded */ 1085 slabsize += sizeof(struct uma_slab); 1086 1087 /* Now make a zone for slab headers */ 1088 slabzone = uma_zcreate("UMA Slabs", 1089 slabsize, 1090 NULL, NULL, NULL, NULL, 1091 UMA_ALIGN_PTR, UMA_ZONE_INTERNAL); 1092 1093 hashzone = uma_zcreate("UMA Hash", 1094 sizeof(struct slabhead *) * UMA_HASH_SIZE_INIT, 1095 NULL, NULL, NULL, NULL, 1096 UMA_ALIGN_PTR, UMA_ZONE_INTERNAL); 1097 1098 bucketzone = uma_zcreate("UMA Buckets", sizeof(struct uma_bucket), 1099 NULL, NULL, NULL, NULL, 1100 UMA_ALIGN_PTR, UMA_ZONE_INTERNAL); 1101 1102 1103#ifdef UMA_DEBUG 1104 printf("UMA startup complete.\n"); 1105#endif 1106} 1107 1108/* see uma.h */ 1109void 1110uma_startup2(void *hashmem, u_long elems) 1111{ 1112 bzero(hashmem, elems * sizeof(void *)); 1113 mallochash->uh_slab_hash = hashmem; 1114 mallochash->uh_hashsize = elems; 1115 mallochash->uh_hashmask = elems - 1; 1116 booted = 1; 1117#ifdef UMA_DEBUG 1118 printf("UMA startup2 complete.\n"); 1119#endif 1120} 1121 1122/* 1123 * Initialize our callout handle 1124 * 1125 */ 1126 1127static void 1128uma_startup3(void) 1129{ 1130#ifdef UMA_DEBUG 1131 printf("Starting callout.\n"); 1132#endif 1133 /* We'll be mpsafe once the vm is locked. */ 1134 callout_init(&uma_callout, 0); 1135 callout_reset(&uma_callout, UMA_WORKING_TIME * hz, uma_timeout, NULL); 1136#ifdef UMA_DEBUG 1137 printf("UMA startup3 complete.\n"); 1138#endif 1139} 1140 1141/* See uma.h */ 1142uma_zone_t 1143uma_zcreate(char *name, int size, uma_ctor ctor, uma_dtor dtor, uma_init uminit, 1144 uma_fini fini, int align, u_int16_t flags) 1145 1146{ 1147 struct uma_zctor_args args; 1148 1149 /* This stuff is essential for the zone ctor */ 1150 args.name = name; 1151 args.size = size; 1152 args.ctor = ctor; 1153 args.dtor = dtor; 1154 args.uminit = uminit; 1155 args.fini = fini; 1156 args.align = align; 1157 args.flags = flags; 1158 1159 return (uma_zalloc_internal(zones, &args, M_WAITOK, NULL, -1)); 1160} 1161 1162/* See uma.h */ 1163void * 1164uma_zalloc_arg(uma_zone_t zone, void *udata, int wait) 1165{ 1166 void *item; 1167 uma_cache_t cache; 1168 uma_bucket_t bucket; 1169 int isitem; 1170 int cpu; 1171 1172 /* This is the fast path allocation */ 1173#ifdef UMA_DEBUG_ALLOC_1 1174 printf("Allocating one item from %s(%p)\n", zone->uz_name, zone); 1175#endif 1176 cpu = PCPU_GET(cpuid); 1177 CPU_LOCK(zone, cpu); 1178 cache = &zone->uz_cpu[cpu]; 1179 cache->uc_allocs++; 1180 1181zalloc_start: 1182 bucket = cache->uc_allocbucket; 1183 1184 if (bucket) { 1185 if (bucket->ub_ptr > -1) { 1186 item = bucket->ub_bucket[bucket->ub_ptr]; 1187#ifdef INVARIANTS 1188 bucket->ub_bucket[bucket->ub_ptr] = NULL; 1189#endif 1190 bucket->ub_ptr--; 1191 KASSERT(item != NULL, 1192 ("uma_zalloc: Bucket pointer mangled.")); 1193 cache->uc_allocs++; 1194 CPU_UNLOCK(zone, cpu); 1195 if (zone->uz_ctor) 1196 zone->uz_ctor(item, zone->uz_size, udata); 1197 return (item); 1198 } else if (cache->uc_freebucket) { 1199 /* 1200 * We have run out of items in our allocbucket. 1201 * See if we can switch with our free bucket. 1202 */ 1203 if (cache->uc_freebucket->ub_ptr > -1) { 1204 uma_bucket_t swap; 1205 1206#ifdef UMA_DEBUG_ALLOC 1207 printf("uma_zalloc: Swapping empty with alloc.\n"); 1208#endif 1209 swap = cache->uc_freebucket; 1210 cache->uc_freebucket = cache->uc_allocbucket; 1211 cache->uc_allocbucket = swap; 1212 1213 goto zalloc_start; 1214 } 1215 } 1216 } 1217 /* 1218 * We can get here for three reasons: 1219 * 1220 * 1) The buckets are NULL 1221 * 2) The zone is INTERNAL, and so it has no buckets. 1222 * 3) The alloc and free buckets are both empty. 1223 * 1224 * Just handoff to uma_zalloc_internal to do the hard stuff 1225 * 1226 */ 1227#ifdef UMA_DEBUG_ALLOC 1228 printf("uma_zalloc: Falling back to zalloc_internal.\n"); 1229#endif 1230 1231 item = uma_zalloc_internal(zone, udata, wait, &isitem, cpu); 1232 1233#ifdef UMA_DEBUG 1234 printf("uma_zalloc: zalloc_internal completed.\n"); 1235#endif 1236 1237 if (item && isitem == 0) 1238 goto zalloc_start; 1239 1240 /* 1241 * If isitem is set then we should just return it. The cpu lock 1242 * was unlocked when we couldn't get a bucket. 1243 */
|
1245 1246#ifdef INVARIANTS 1247 if (wait == M_WAITOK) 1248 KASSERT(item != NULL, 1249 ("uma_zalloc: WAITOK set but we're returning NULL")); 1250#endif
| |
1251 return item; 1252} 1253 1254/* 1255 * Allocates an item for an internal zone OR fills a bucket 1256 * 1257 * Arguments 1258 * zone The zone to alloc for. 1259 * udata The data to be passed to the constructor. 1260 * wait M_WAITOK or M_NOWAIT. 1261 * isitem The returned value is an item if this is true. 1262 * cpu The cpu # of the cache that we should use, or -1. 1263 * 1264 * Returns 1265 * NULL if there is no memory and M_NOWAIT is set 1266 * An item if called on an interal zone 1267 * Non NULL if called to fill a bucket and it was successful. 1268 * 1269 * Discussion: 1270 * This was much cleaner before it had to do per cpu caches. It is 1271 * complicated now because it has to handle the simple internal case, and 1272 * the more involved bucket filling and allocation. The isitem is there 1273 * to remove a failure case. You shouldn't fail on allocating from a zone 1274 * because there were no buckets. This allows the exported zalloc to just 1275 * return the item. 1276 * 1277 */ 1278 1279static void * 1280uma_zalloc_internal(uma_zone_t zone, void *udata, int wait, int *isitem, int cpu) 1281{ 1282 uma_bucket_t bucket; 1283 uma_cache_t cache; 1284 uma_slab_t slab; 1285 u_int8_t freei; 1286 void *item; 1287 1288 bucket = NULL; 1289 cache = NULL; 1290 item = NULL; 1291 1292 /* 1293 * This is to stop us from allocating per cpu buckets while we're running 1294 * out of UMA_BOOT_PAGES. Otherwise, we would exhaust the boot pages. 1295 */ 1296 1297 if (!booted && zone == bucketzone) 1298 return (NULL); 1299 1300#ifdef UMA_DEBUG_ALLOC 1301 printf("INTERNAL: Allocating one item from %s(%p)\n", zone->uz_name, zone); 1302#endif 1303 if (isitem != NULL) 1304 *isitem = 0; 1305 1306 ZONE_LOCK(zone); 1307 1308 /* We got here because we need to fill some buckets */ 1309 if (cpu != -1) { 1310 cache = &zone->uz_cpu[cpu]; 1311 1312 zone->uz_allocs += cache->uc_allocs; 1313 /* Check the free list */ 1314 bucket = LIST_FIRST(&zone->uz_full_bucket); 1315 if (bucket) { 1316 LIST_REMOVE(bucket, ub_link); 1317 /* Our old one is now a free bucket */ 1318 if (cache->uc_allocbucket) { 1319 KASSERT(cache->uc_allocbucket->ub_ptr == -1, 1320 ("uma_zalloc_internal: Freeing a non free bucket.")); 1321 LIST_INSERT_HEAD(&zone->uz_free_bucket, 1322 cache->uc_allocbucket, ub_link); 1323 } 1324 KASSERT(bucket->ub_ptr != -1, 1325 ("uma_zalloc_internal: Returning an empty bucket.")); 1326 /*zone->uz_free -= bucket->ub_ptr + 1;*/ 1327 cache->uc_allocbucket = bucket; 1328 ZONE_UNLOCK(zone); 1329 return (bucket); 1330 } 1331 /* Bump up our uc_count so we get here less */ 1332 if (cache->uc_count < UMA_BUCKET_SIZE - 1) 1333 cache->uc_count++; 1334 /* Nothing on the free list, try to re-use the old one */ 1335 bucket = cache->uc_allocbucket; 1336 if (bucket == NULL) { 1337 /* Nope, we need a new one */ 1338 CPU_UNLOCK(zone, cpu); 1339 ZONE_UNLOCK(zone); 1340 bucket = uma_zalloc_internal(bucketzone, 1341 NULL, wait, NULL, -1); 1342 CPU_LOCK(zone, cpu); 1343 ZONE_LOCK(zone); 1344 /* Did we lose the race? */ 1345 if (cache->uc_allocbucket) { 1346#ifdef UMA_DEBUG 1347 printf("uma_zalloc_internal: Lost race with another CPU.\n"); 1348#endif 1349 if (bucket) 1350 uma_zfree_internal(bucketzone, 1351 bucket, NULL, 0); 1352 ZONE_UNLOCK(zone); 1353 return (cache->uc_allocbucket); 1354 } 1355 cache->uc_allocbucket = bucket; 1356 1357 if (bucket) { 1358#ifdef INVARIANTS 1359 bzero(bucket, bucketzone->uz_size); 1360#endif 1361 bucket->ub_ptr = -1; 1362 } else { 1363 /* 1364 * We may not get a bucket if we recurse, so 1365 * return an actual item. The rest of this code 1366 * does the right thing if the cache is NULL. 1367 */ 1368#ifdef UMA_DEBUG 1369 printf("uma_zalloc_internal: Bucketzone returned NULL\n"); 1370#endif 1371 CPU_UNLOCK(zone, cpu); 1372 cache = NULL; 1373 cpu = -1; 1374 } 1375 } 1376 } 1377 1378new_slab: 1379 1380 /* Find a slab with some space */ 1381 if (zone->uz_free) { 1382 if (!LIST_EMPTY(&zone->uz_part_slab)) { 1383 slab = LIST_FIRST(&zone->uz_part_slab); 1384 } else { 1385 slab = LIST_FIRST(&zone->uz_free_slab); 1386 LIST_REMOVE(slab, us_link); 1387 LIST_INSERT_HEAD(&zone->uz_part_slab, slab, us_link); 1388 } 1389 } else { 1390 /* 1391 * This is to prevent us from recursively trying to allocate 1392 * buckets. The problem is that if an allocation forces us to 1393 * grab a new bucket we will call page_alloc, which will go off 1394 * and cause the vm to allocate vm_map_entries. If we need new 1395 * buckets there too we will recurse in kmem_alloc and bad 1396 * things happen. So instead we return a NULL bucket, and make 1397 * the code that allocates buckets smart enough to deal with it */ 1398 if (zone == bucketzone && zone->uz_recurse != 0) { 1399 ZONE_UNLOCK(zone); 1400 return (NULL); 1401 } 1402 zone->uz_recurse++; 1403 slab = slab_zalloc(zone, wait); 1404 zone->uz_recurse--; 1405 if (slab) { 1406 LIST_INSERT_HEAD(&zone->uz_part_slab, slab, us_link); 1407 /* 1408 * We might not have been able to get a page, but another cpu 1409 * could have while we were unlocked. 1410 */ 1411 } else if (zone->uz_free == 0) { 1412 ZONE_UNLOCK(zone); 1413 /* If we're filling a bucket return what we have */ 1414 if (bucket != NULL && bucket->ub_ptr != -1) { 1415 return (bucket); 1416 } else 1417 return (NULL); 1418 } else { 1419 /* Another cpu must have succeeded */ 1420 if ((slab = LIST_FIRST(&zone->uz_part_slab)) == NULL) { 1421 slab = LIST_FIRST(&zone->uz_free_slab); 1422 LIST_REMOVE(slab, us_link); 1423 LIST_INSERT_HEAD(&zone->uz_part_slab, 1424 slab, us_link); 1425 } 1426 } 1427 } 1428 1429 while (slab->us_freecount) { 1430 freei = slab->us_firstfree; 1431 slab->us_firstfree = slab->us_freelist[freei]; 1432#ifdef INVARIANTS 1433 slab->us_freelist[freei] = 255; 1434#endif 1435 slab->us_freecount--; 1436 zone->uz_free--; 1437 item = slab->us_data + (zone->uz_rsize * freei); 1438 1439 if (cache == NULL) { 1440 zone->uz_allocs++; 1441 break; 1442 } 1443 1444 bucket->ub_bucket[++bucket->ub_ptr] = item; 1445 1446 /* Don't overfill the bucket! */ 1447 if (bucket->ub_ptr == cache->uc_count) 1448 break; 1449 } 1450 1451 /* Move this slab to the full list */ 1452 if (slab->us_freecount == 0) { 1453 LIST_REMOVE(slab, us_link); 1454 LIST_INSERT_HEAD(&zone->uz_full_slab, slab, us_link); 1455 } 1456 1457 if (cache != NULL) { 1458 /* Try to keep the buckets totally full, but don't block */ 1459 if (bucket->ub_ptr < cache->uc_count) { 1460 wait = M_NOWAIT; 1461 goto new_slab; 1462 } 1463 } 1464 1465 ZONE_UNLOCK(zone); 1466 1467 /* Only construct at this time if we're not filling a bucket */ 1468 if (cache == NULL) { 1469 if (zone->uz_ctor) 1470 zone->uz_ctor(item, zone->uz_size, udata); 1471 1472 if (isitem != NULL) 1473 *isitem = 1; 1474 } 1475 1476 return (item); 1477} 1478 1479/* See uma.h */ 1480void 1481uma_zfree_arg(uma_zone_t zone, void *item, void *udata) 1482{ 1483 uma_cache_t cache; 1484 uma_bucket_t bucket; 1485 int cpu; 1486 1487 /* This is the fast path free */ 1488#ifdef UMA_DEBUG_ALLOC_1 1489 printf("Freeing item %p to %s(%p)\n", item, zone->uz_name, zone); 1490#endif 1491 cpu = PCPU_GET(cpuid); 1492 CPU_LOCK(zone, cpu); 1493 cache = &zone->uz_cpu[cpu]; 1494 1495zfree_start: 1496 bucket = cache->uc_freebucket; 1497 1498 if (bucket) { 1499 /* Do we have room in our bucket? */ 1500 if (bucket->ub_ptr < cache->uc_count) { 1501 bucket->ub_ptr++; 1502 KASSERT(bucket->ub_bucket[bucket->ub_ptr] == NULL, 1503 ("uma_zfree: Freeing to non free bucket index.")); 1504 bucket->ub_bucket[bucket->ub_ptr] = item; 1505 CPU_UNLOCK(zone, cpu); 1506 if (zone->uz_dtor) 1507 zone->uz_dtor(item, zone->uz_size, udata); 1508 return; 1509 } else if (cache->uc_allocbucket) { 1510#ifdef UMA_DEBUG_ALLOC 1511 printf("uma_zfree: Swapping buckets.\n"); 1512#endif 1513 /* 1514 * We have run out of space in our freebucket. 1515 * See if we can switch with our alloc bucket. 1516 */ 1517 if (cache->uc_allocbucket->ub_ptr < 1518 cache->uc_freebucket->ub_ptr) { 1519 uma_bucket_t swap; 1520 1521 swap = cache->uc_freebucket; 1522 cache->uc_freebucket = cache->uc_allocbucket; 1523 cache->uc_allocbucket = swap; 1524 1525 goto zfree_start; 1526 } 1527 } 1528 } 1529 1530 /* 1531 * We can get here for three reasons: 1532 * 1533 * 1) The buckets are NULL 1534 * 2) The zone is INTERNAL, and so it has no buckets. 1535 * 3) The alloc and free buckets are both somewhat full. 1536 * 1537 */ 1538 1539 ZONE_LOCK(zone); 1540 1541 if (!(zone->uz_flags & UMA_ZFLAG_INTERNAL)) { 1542 bucket = cache->uc_freebucket; 1543 cache->uc_freebucket = NULL; 1544 1545 /* Can we throw this on the zone full list? */ 1546 if (bucket != NULL) { 1547#ifdef UMA_DEBUG_ALLOC 1548 printf("uma_zfree: Putting old bucket on the free list.\n"); 1549#endif 1550 /* ub_ptr is pointing to the last free item */ 1551 KASSERT(bucket->ub_ptr != -1, 1552 ("uma_zfree: Attempting to insert an empty bucket onto the full list.\n")); 1553 /*zone->uz_free += bucket->ub_ptr + 1;*/ 1554 LIST_INSERT_HEAD(&zone->uz_full_bucket, 1555 bucket, ub_link); 1556 bucket = LIST_FIRST(&zone->uz_free_bucket); 1557 if (bucket) 1558 LIST_REMOVE(bucket, ub_link); 1559 } 1560 /* 1561 * Do we need to alloc one? Either the freebucket was NULL 1562 * or the free_bucket list was empty. 1563 */ 1564 if (bucket == NULL) { 1565#ifdef UMA_DEBUG_ALLOC 1566 printf("uma_zfree: Allocating new free bucket.\n"); 1567#endif 1568 /* This has to be done so we don't recurse on a lock */ 1569 ZONE_UNLOCK(zone); 1570 CPU_UNLOCK(zone, cpu); 1571 bucket = uma_zalloc_internal(bucketzone, 1572 NULL, M_NOWAIT, NULL, -1); 1573 CPU_LOCK(zone, cpu); 1574 ZONE_LOCK(zone); 1575 if (bucket) { 1576#ifdef INVARIANTS 1577 bzero(bucket, bucketzone->uz_size); 1578#endif 1579 bucket->ub_ptr = -1; 1580 } 1581 /* Did we lose the race? */ 1582 if (cache->uc_freebucket != NULL) { 1583 if (bucket) 1584 uma_zfree_internal(bucketzone, 1585 bucket, NULL, 0); 1586 ZONE_UNLOCK(zone); 1587 goto zfree_start; 1588 } 1589 /* If we couldn't get one just free directly */ 1590 if (bucket == NULL) 1591 goto zfree_internal; 1592 } 1593 cache->uc_freebucket = bucket; 1594 ZONE_UNLOCK(zone); 1595 goto zfree_start; 1596 } 1597 1598zfree_internal: 1599 1600 CPU_UNLOCK(zone, cpu); 1601 ZONE_UNLOCK(zone); 1602 uma_zfree_internal(zone, item, udata, 0); 1603 1604 return; 1605 1606} 1607 1608/* 1609 * Frees an item to an INTERNAL zone or allocates a free bucket 1610 * 1611 * Arguments: 1612 * zone The zone to free to 1613 * item The item we're freeing 1614 * udata User supplied data for the dtor 1615 * skip Skip the dtor, it was done in uma_zfree_arg 1616 */ 1617 1618static void 1619uma_zfree_internal(uma_zone_t zone, void *item, void *udata, int skip) 1620{ 1621 uma_slab_t slab; 1622 u_int8_t *mem; 1623 u_int8_t freei; 1624 1625 ZONE_LOCK(zone); 1626 1627 if (!(zone->uz_flags & UMA_ZFLAG_MALLOC)) { 1628 mem = (u_int8_t *)((unsigned long)item & (~UMA_SLAB_MASK)); 1629 if (zone->uz_flags & UMA_ZFLAG_OFFPAGE) 1630 slab = hash_sfind(&zone->uz_hash, mem); 1631 else { 1632 mem += zone->uz_pgoff; 1633 slab = (uma_slab_t)mem; 1634 } 1635 } else { 1636 slab = (uma_slab_t)udata; 1637 } 1638 1639 /* Do we need to remove from any lists? */ 1640 if (slab->us_freecount+1 == zone->uz_ipers) { 1641 LIST_REMOVE(slab, us_link); 1642 LIST_INSERT_HEAD(&zone->uz_free_slab, slab, us_link); 1643 } else if (slab->us_freecount == 0) { 1644 LIST_REMOVE(slab, us_link); 1645 LIST_INSERT_HEAD(&zone->uz_part_slab, slab, us_link); 1646 } 1647 1648 /* Slab management stuff */ 1649 freei = ((unsigned long)item - (unsigned long)slab->us_data) 1650 / zone->uz_rsize; 1651#ifdef INVARIANTS 1652 if (((freei * zone->uz_rsize) + slab->us_data) != item) 1653 panic("zone: %s(%p) slab %p freed address %p unaligned.\n", 1654 zone->uz_name, zone, slab, item); 1655 if (freei >= zone->uz_ipers) 1656 panic("zone: %s(%p) slab %p freelist %i out of range 0-%d\n", 1657 zone->uz_name, zone, slab, freei, zone->uz_ipers-1); 1658 1659 if (slab->us_freelist[freei] != 255) { 1660 printf("Slab at %p, freei %d = %d.\n", 1661 slab, freei, slab->us_freelist[freei]); 1662 panic("Duplicate free of item %p from zone %p(%s)\n", 1663 item, zone, zone->uz_name); 1664 } 1665#endif 1666 slab->us_freelist[freei] = slab->us_firstfree; 1667 slab->us_firstfree = freei; 1668 slab->us_freecount++; 1669 1670 /* Zone statistics */ 1671 zone->uz_free++; 1672 1673 ZONE_UNLOCK(zone); 1674 1675 if (!skip && zone->uz_dtor) 1676 zone->uz_dtor(item, zone->uz_size, udata); 1677} 1678 1679/* See uma.h */ 1680void
| 1244 return item; 1245} 1246 1247/* 1248 * Allocates an item for an internal zone OR fills a bucket 1249 * 1250 * Arguments 1251 * zone The zone to alloc for. 1252 * udata The data to be passed to the constructor. 1253 * wait M_WAITOK or M_NOWAIT. 1254 * isitem The returned value is an item if this is true. 1255 * cpu The cpu # of the cache that we should use, or -1. 1256 * 1257 * Returns 1258 * NULL if there is no memory and M_NOWAIT is set 1259 * An item if called on an interal zone 1260 * Non NULL if called to fill a bucket and it was successful. 1261 * 1262 * Discussion: 1263 * This was much cleaner before it had to do per cpu caches. It is 1264 * complicated now because it has to handle the simple internal case, and 1265 * the more involved bucket filling and allocation. The isitem is there 1266 * to remove a failure case. You shouldn't fail on allocating from a zone 1267 * because there were no buckets. This allows the exported zalloc to just 1268 * return the item. 1269 * 1270 */ 1271 1272static void * 1273uma_zalloc_internal(uma_zone_t zone, void *udata, int wait, int *isitem, int cpu) 1274{ 1275 uma_bucket_t bucket; 1276 uma_cache_t cache; 1277 uma_slab_t slab; 1278 u_int8_t freei; 1279 void *item; 1280 1281 bucket = NULL; 1282 cache = NULL; 1283 item = NULL; 1284 1285 /* 1286 * This is to stop us from allocating per cpu buckets while we're running 1287 * out of UMA_BOOT_PAGES. Otherwise, we would exhaust the boot pages. 1288 */ 1289 1290 if (!booted && zone == bucketzone) 1291 return (NULL); 1292 1293#ifdef UMA_DEBUG_ALLOC 1294 printf("INTERNAL: Allocating one item from %s(%p)\n", zone->uz_name, zone); 1295#endif 1296 if (isitem != NULL) 1297 *isitem = 0; 1298 1299 ZONE_LOCK(zone); 1300 1301 /* We got here because we need to fill some buckets */ 1302 if (cpu != -1) { 1303 cache = &zone->uz_cpu[cpu]; 1304 1305 zone->uz_allocs += cache->uc_allocs; 1306 /* Check the free list */ 1307 bucket = LIST_FIRST(&zone->uz_full_bucket); 1308 if (bucket) { 1309 LIST_REMOVE(bucket, ub_link); 1310 /* Our old one is now a free bucket */ 1311 if (cache->uc_allocbucket) { 1312 KASSERT(cache->uc_allocbucket->ub_ptr == -1, 1313 ("uma_zalloc_internal: Freeing a non free bucket.")); 1314 LIST_INSERT_HEAD(&zone->uz_free_bucket, 1315 cache->uc_allocbucket, ub_link); 1316 } 1317 KASSERT(bucket->ub_ptr != -1, 1318 ("uma_zalloc_internal: Returning an empty bucket.")); 1319 /*zone->uz_free -= bucket->ub_ptr + 1;*/ 1320 cache->uc_allocbucket = bucket; 1321 ZONE_UNLOCK(zone); 1322 return (bucket); 1323 } 1324 /* Bump up our uc_count so we get here less */ 1325 if (cache->uc_count < UMA_BUCKET_SIZE - 1) 1326 cache->uc_count++; 1327 /* Nothing on the free list, try to re-use the old one */ 1328 bucket = cache->uc_allocbucket; 1329 if (bucket == NULL) { 1330 /* Nope, we need a new one */ 1331 CPU_UNLOCK(zone, cpu); 1332 ZONE_UNLOCK(zone); 1333 bucket = uma_zalloc_internal(bucketzone, 1334 NULL, wait, NULL, -1); 1335 CPU_LOCK(zone, cpu); 1336 ZONE_LOCK(zone); 1337 /* Did we lose the race? */ 1338 if (cache->uc_allocbucket) { 1339#ifdef UMA_DEBUG 1340 printf("uma_zalloc_internal: Lost race with another CPU.\n"); 1341#endif 1342 if (bucket) 1343 uma_zfree_internal(bucketzone, 1344 bucket, NULL, 0); 1345 ZONE_UNLOCK(zone); 1346 return (cache->uc_allocbucket); 1347 } 1348 cache->uc_allocbucket = bucket; 1349 1350 if (bucket) { 1351#ifdef INVARIANTS 1352 bzero(bucket, bucketzone->uz_size); 1353#endif 1354 bucket->ub_ptr = -1; 1355 } else { 1356 /* 1357 * We may not get a bucket if we recurse, so 1358 * return an actual item. The rest of this code 1359 * does the right thing if the cache is NULL. 1360 */ 1361#ifdef UMA_DEBUG 1362 printf("uma_zalloc_internal: Bucketzone returned NULL\n"); 1363#endif 1364 CPU_UNLOCK(zone, cpu); 1365 cache = NULL; 1366 cpu = -1; 1367 } 1368 } 1369 } 1370 1371new_slab: 1372 1373 /* Find a slab with some space */ 1374 if (zone->uz_free) { 1375 if (!LIST_EMPTY(&zone->uz_part_slab)) { 1376 slab = LIST_FIRST(&zone->uz_part_slab); 1377 } else { 1378 slab = LIST_FIRST(&zone->uz_free_slab); 1379 LIST_REMOVE(slab, us_link); 1380 LIST_INSERT_HEAD(&zone->uz_part_slab, slab, us_link); 1381 } 1382 } else { 1383 /* 1384 * This is to prevent us from recursively trying to allocate 1385 * buckets. The problem is that if an allocation forces us to 1386 * grab a new bucket we will call page_alloc, which will go off 1387 * and cause the vm to allocate vm_map_entries. If we need new 1388 * buckets there too we will recurse in kmem_alloc and bad 1389 * things happen. So instead we return a NULL bucket, and make 1390 * the code that allocates buckets smart enough to deal with it */ 1391 if (zone == bucketzone && zone->uz_recurse != 0) { 1392 ZONE_UNLOCK(zone); 1393 return (NULL); 1394 } 1395 zone->uz_recurse++; 1396 slab = slab_zalloc(zone, wait); 1397 zone->uz_recurse--; 1398 if (slab) { 1399 LIST_INSERT_HEAD(&zone->uz_part_slab, slab, us_link); 1400 /* 1401 * We might not have been able to get a page, but another cpu 1402 * could have while we were unlocked. 1403 */ 1404 } else if (zone->uz_free == 0) { 1405 ZONE_UNLOCK(zone); 1406 /* If we're filling a bucket return what we have */ 1407 if (bucket != NULL && bucket->ub_ptr != -1) { 1408 return (bucket); 1409 } else 1410 return (NULL); 1411 } else { 1412 /* Another cpu must have succeeded */ 1413 if ((slab = LIST_FIRST(&zone->uz_part_slab)) == NULL) { 1414 slab = LIST_FIRST(&zone->uz_free_slab); 1415 LIST_REMOVE(slab, us_link); 1416 LIST_INSERT_HEAD(&zone->uz_part_slab, 1417 slab, us_link); 1418 } 1419 } 1420 } 1421 1422 while (slab->us_freecount) { 1423 freei = slab->us_firstfree; 1424 slab->us_firstfree = slab->us_freelist[freei]; 1425#ifdef INVARIANTS 1426 slab->us_freelist[freei] = 255; 1427#endif 1428 slab->us_freecount--; 1429 zone->uz_free--; 1430 item = slab->us_data + (zone->uz_rsize * freei); 1431 1432 if (cache == NULL) { 1433 zone->uz_allocs++; 1434 break; 1435 } 1436 1437 bucket->ub_bucket[++bucket->ub_ptr] = item; 1438 1439 /* Don't overfill the bucket! */ 1440 if (bucket->ub_ptr == cache->uc_count) 1441 break; 1442 } 1443 1444 /* Move this slab to the full list */ 1445 if (slab->us_freecount == 0) { 1446 LIST_REMOVE(slab, us_link); 1447 LIST_INSERT_HEAD(&zone->uz_full_slab, slab, us_link); 1448 } 1449 1450 if (cache != NULL) { 1451 /* Try to keep the buckets totally full, but don't block */ 1452 if (bucket->ub_ptr < cache->uc_count) { 1453 wait = M_NOWAIT; 1454 goto new_slab; 1455 } 1456 } 1457 1458 ZONE_UNLOCK(zone); 1459 1460 /* Only construct at this time if we're not filling a bucket */ 1461 if (cache == NULL) { 1462 if (zone->uz_ctor) 1463 zone->uz_ctor(item, zone->uz_size, udata); 1464 1465 if (isitem != NULL) 1466 *isitem = 1; 1467 } 1468 1469 return (item); 1470} 1471 1472/* See uma.h */ 1473void 1474uma_zfree_arg(uma_zone_t zone, void *item, void *udata) 1475{ 1476 uma_cache_t cache; 1477 uma_bucket_t bucket; 1478 int cpu; 1479 1480 /* This is the fast path free */ 1481#ifdef UMA_DEBUG_ALLOC_1 1482 printf("Freeing item %p to %s(%p)\n", item, zone->uz_name, zone); 1483#endif 1484 cpu = PCPU_GET(cpuid); 1485 CPU_LOCK(zone, cpu); 1486 cache = &zone->uz_cpu[cpu]; 1487 1488zfree_start: 1489 bucket = cache->uc_freebucket; 1490 1491 if (bucket) { 1492 /* Do we have room in our bucket? */ 1493 if (bucket->ub_ptr < cache->uc_count) { 1494 bucket->ub_ptr++; 1495 KASSERT(bucket->ub_bucket[bucket->ub_ptr] == NULL, 1496 ("uma_zfree: Freeing to non free bucket index.")); 1497 bucket->ub_bucket[bucket->ub_ptr] = item; 1498 CPU_UNLOCK(zone, cpu); 1499 if (zone->uz_dtor) 1500 zone->uz_dtor(item, zone->uz_size, udata); 1501 return; 1502 } else if (cache->uc_allocbucket) { 1503#ifdef UMA_DEBUG_ALLOC 1504 printf("uma_zfree: Swapping buckets.\n"); 1505#endif 1506 /* 1507 * We have run out of space in our freebucket. 1508 * See if we can switch with our alloc bucket. 1509 */ 1510 if (cache->uc_allocbucket->ub_ptr < 1511 cache->uc_freebucket->ub_ptr) { 1512 uma_bucket_t swap; 1513 1514 swap = cache->uc_freebucket; 1515 cache->uc_freebucket = cache->uc_allocbucket; 1516 cache->uc_allocbucket = swap; 1517 1518 goto zfree_start; 1519 } 1520 } 1521 } 1522 1523 /* 1524 * We can get here for three reasons: 1525 * 1526 * 1) The buckets are NULL 1527 * 2) The zone is INTERNAL, and so it has no buckets. 1528 * 3) The alloc and free buckets are both somewhat full. 1529 * 1530 */ 1531 1532 ZONE_LOCK(zone); 1533 1534 if (!(zone->uz_flags & UMA_ZFLAG_INTERNAL)) { 1535 bucket = cache->uc_freebucket; 1536 cache->uc_freebucket = NULL; 1537 1538 /* Can we throw this on the zone full list? */ 1539 if (bucket != NULL) { 1540#ifdef UMA_DEBUG_ALLOC 1541 printf("uma_zfree: Putting old bucket on the free list.\n"); 1542#endif 1543 /* ub_ptr is pointing to the last free item */ 1544 KASSERT(bucket->ub_ptr != -1, 1545 ("uma_zfree: Attempting to insert an empty bucket onto the full list.\n")); 1546 /*zone->uz_free += bucket->ub_ptr + 1;*/ 1547 LIST_INSERT_HEAD(&zone->uz_full_bucket, 1548 bucket, ub_link); 1549 bucket = LIST_FIRST(&zone->uz_free_bucket); 1550 if (bucket) 1551 LIST_REMOVE(bucket, ub_link); 1552 } 1553 /* 1554 * Do we need to alloc one? Either the freebucket was NULL 1555 * or the free_bucket list was empty. 1556 */ 1557 if (bucket == NULL) { 1558#ifdef UMA_DEBUG_ALLOC 1559 printf("uma_zfree: Allocating new free bucket.\n"); 1560#endif 1561 /* This has to be done so we don't recurse on a lock */ 1562 ZONE_UNLOCK(zone); 1563 CPU_UNLOCK(zone, cpu); 1564 bucket = uma_zalloc_internal(bucketzone, 1565 NULL, M_NOWAIT, NULL, -1); 1566 CPU_LOCK(zone, cpu); 1567 ZONE_LOCK(zone); 1568 if (bucket) { 1569#ifdef INVARIANTS 1570 bzero(bucket, bucketzone->uz_size); 1571#endif 1572 bucket->ub_ptr = -1; 1573 } 1574 /* Did we lose the race? */ 1575 if (cache->uc_freebucket != NULL) { 1576 if (bucket) 1577 uma_zfree_internal(bucketzone, 1578 bucket, NULL, 0); 1579 ZONE_UNLOCK(zone); 1580 goto zfree_start; 1581 } 1582 /* If we couldn't get one just free directly */ 1583 if (bucket == NULL) 1584 goto zfree_internal; 1585 } 1586 cache->uc_freebucket = bucket; 1587 ZONE_UNLOCK(zone); 1588 goto zfree_start; 1589 } 1590 1591zfree_internal: 1592 1593 CPU_UNLOCK(zone, cpu); 1594 ZONE_UNLOCK(zone); 1595 uma_zfree_internal(zone, item, udata, 0); 1596 1597 return; 1598 1599} 1600 1601/* 1602 * Frees an item to an INTERNAL zone or allocates a free bucket 1603 * 1604 * Arguments: 1605 * zone The zone to free to 1606 * item The item we're freeing 1607 * udata User supplied data for the dtor 1608 * skip Skip the dtor, it was done in uma_zfree_arg 1609 */ 1610 1611static void 1612uma_zfree_internal(uma_zone_t zone, void *item, void *udata, int skip) 1613{ 1614 uma_slab_t slab; 1615 u_int8_t *mem; 1616 u_int8_t freei; 1617 1618 ZONE_LOCK(zone); 1619 1620 if (!(zone->uz_flags & UMA_ZFLAG_MALLOC)) { 1621 mem = (u_int8_t *)((unsigned long)item & (~UMA_SLAB_MASK)); 1622 if (zone->uz_flags & UMA_ZFLAG_OFFPAGE) 1623 slab = hash_sfind(&zone->uz_hash, mem); 1624 else { 1625 mem += zone->uz_pgoff; 1626 slab = (uma_slab_t)mem; 1627 } 1628 } else { 1629 slab = (uma_slab_t)udata; 1630 } 1631 1632 /* Do we need to remove from any lists? */ 1633 if (slab->us_freecount+1 == zone->uz_ipers) { 1634 LIST_REMOVE(slab, us_link); 1635 LIST_INSERT_HEAD(&zone->uz_free_slab, slab, us_link); 1636 } else if (slab->us_freecount == 0) { 1637 LIST_REMOVE(slab, us_link); 1638 LIST_INSERT_HEAD(&zone->uz_part_slab, slab, us_link); 1639 } 1640 1641 /* Slab management stuff */ 1642 freei = ((unsigned long)item - (unsigned long)slab->us_data) 1643 / zone->uz_rsize; 1644#ifdef INVARIANTS 1645 if (((freei * zone->uz_rsize) + slab->us_data) != item) 1646 panic("zone: %s(%p) slab %p freed address %p unaligned.\n", 1647 zone->uz_name, zone, slab, item); 1648 if (freei >= zone->uz_ipers) 1649 panic("zone: %s(%p) slab %p freelist %i out of range 0-%d\n", 1650 zone->uz_name, zone, slab, freei, zone->uz_ipers-1); 1651 1652 if (slab->us_freelist[freei] != 255) { 1653 printf("Slab at %p, freei %d = %d.\n", 1654 slab, freei, slab->us_freelist[freei]); 1655 panic("Duplicate free of item %p from zone %p(%s)\n", 1656 item, zone, zone->uz_name); 1657 } 1658#endif 1659 slab->us_freelist[freei] = slab->us_firstfree; 1660 slab->us_firstfree = freei; 1661 slab->us_freecount++; 1662 1663 /* Zone statistics */ 1664 zone->uz_free++; 1665 1666 ZONE_UNLOCK(zone); 1667 1668 if (!skip && zone->uz_dtor) 1669 zone->uz_dtor(item, zone->uz_size, udata); 1670} 1671 1672/* See uma.h */ 1673void
|
| 1674uma_zone_set_max(uma_zone_t zone, int nitems) 1675{ 1676 ZONE_LOCK(zone); 1677 if (zone->uz_ppera > 1) 1678 zone->uz_maxpages = nitems / zone->uz_ppera; 1679 else 1680 zone->uz_maxpages = nitems / zone->uz_ipers; 1681 ZONE_UNLOCK(zone); 1682} 1683 1684/* See uma.h */ 1685void
|
1681uma_zone_set_freef(uma_zone_t zone, uma_free freef) 1682{ 1683 ZONE_LOCK(zone); 1684 1685 zone->uz_freef = freef; 1686 1687 ZONE_UNLOCK(zone); 1688} 1689 1690/* See uma.h */ 1691void 1692uma_zone_set_allocf(uma_zone_t zone, uma_alloc allocf) 1693{ 1694 ZONE_LOCK(zone); 1695 1696 zone->uz_flags |= UMA_ZFLAG_PRIVALLOC; 1697 zone->uz_allocf = allocf; 1698 1699 ZONE_UNLOCK(zone); 1700} 1701 1702/* See uma.h */ 1703int 1704uma_zone_set_obj(uma_zone_t zone, struct vm_object *obj, int count) 1705{ 1706 int pages; 1707 vm_offset_t kva; 1708 1709 ZONE_LOCK(zone); 1710 mtx_lock(&Giant); 1711 1712 zone->uz_obj = obj; 1713 pages = count / zone->uz_ipers; 1714 1715 if (pages * zone->uz_ipers < count) 1716 pages++; 1717 zone->uz_kva = NULL; 1718 ZONE_UNLOCK(zone); 1719 kva = kmem_alloc_pageable(kernel_map, pages * UMA_SLAB_SIZE); 1720 ZONE_LOCK(zone); 1721 1722 zone->uz_kva = kva; 1723 1724 if (zone->uz_kva == 0) { 1725 ZONE_UNLOCK(zone); 1726 return (0); 1727 } 1728 1729 zone->uz_maxpages = pages; 1730 1731 if (zone->uz_obj == NULL) 1732 zone->uz_obj = vm_object_allocate(OBJT_DEFAULT, 1733 zone->uz_maxpages); 1734 else 1735 _vm_object_allocate(OBJT_DEFAULT, 1736 zone->uz_maxpages, zone->uz_obj); 1737 1738 zone->uz_allocf = obj_alloc; 1739 zone->uz_flags |= UMA_ZFLAG_NOFREE | UMA_ZFLAG_PRIVALLOC; 1740 1741 mtx_unlock(&Giant); 1742 ZONE_UNLOCK(zone); 1743 1744 return (1); 1745} 1746 1747/* See uma.h */ 1748void 1749uma_prealloc(uma_zone_t zone, int items) 1750{ 1751 int slabs; 1752 uma_slab_t slab; 1753 1754 ZONE_LOCK(zone); 1755 slabs = items / zone->uz_ipers; 1756 if (slabs * zone->uz_ipers < items) 1757 slabs++; 1758 1759 while (slabs > 0) { 1760 slab = slab_zalloc(zone, M_WAITOK); 1761 LIST_INSERT_HEAD(&zone->uz_free_slab, slab, us_link); 1762 slabs--; 1763 } 1764 ZONE_UNLOCK(zone); 1765} 1766 1767/* See uma.h */ 1768void 1769uma_reclaim(void) 1770{ 1771 /* 1772 * You might think that the delay below would improve performance since 1773 * the allocator will give away memory that it may ask for immediately. 1774 * Really, it makes things worse, since cpu cycles are so much cheaper 1775 * than disk activity. 1776 */ 1777#if 0 1778 static struct timeval tv = {0}; 1779 struct timeval now; 1780 getmicrouptime(&now); 1781 if (now.tv_sec > tv.tv_sec + 30) 1782 tv = now; 1783 else 1784 return; 1785#endif 1786#ifdef UMA_DEBUG 1787 printf("UMA: vm asked us to release pages!\n"); 1788#endif 1789 zone_foreach(zone_drain); 1790 1791 /* 1792 * Some slabs may have been freed but this zone will be visited early 1793 * we visit again so that we can free pages that are empty once other 1794 * zones are drained. We have to do the same for buckets. 1795 */ 1796 zone_drain(slabzone); 1797 zone_drain(bucketzone); 1798} 1799 1800void * 1801uma_large_malloc(int size, int wait) 1802{ 1803 void *mem; 1804 uma_slab_t slab; 1805 u_int8_t flags; 1806 1807 slab = uma_zalloc_internal(slabzone, NULL, wait, NULL, -1); 1808 if (slab == NULL) 1809 return (NULL); 1810 1811 mem = page_alloc(NULL, size, &flags, wait); 1812 if (mem) { 1813 slab->us_data = mem; 1814 slab->us_flags = flags | UMA_SLAB_MALLOC; 1815 slab->us_size = size; 1816 UMA_HASH_INSERT(mallochash, slab, mem); 1817 } else { 1818 uma_zfree_internal(slabzone, slab, NULL, 0); 1819 } 1820 1821 1822 return (mem); 1823} 1824 1825void 1826uma_large_free(uma_slab_t slab) 1827{ 1828 UMA_HASH_REMOVE(mallochash, slab, slab->us_data); 1829 page_free(slab->us_data, slab->us_size, slab->us_flags); 1830 uma_zfree_internal(slabzone, slab, NULL, 0); 1831} 1832 1833void 1834uma_print_stats(void) 1835{ 1836 zone_foreach(uma_print_zone); 1837} 1838 1839void 1840uma_print_zone(uma_zone_t zone) 1841{ 1842 printf("%s(%p) size %d(%d) flags %d ipers %d ppera %d out %d free %d\n", 1843 zone->uz_name, zone, zone->uz_size, zone->uz_rsize, zone->uz_flags, 1844 zone->uz_ipers, zone->uz_ppera, 1845 (zone->uz_ipers * zone->uz_pages) - zone->uz_free, zone->uz_free); 1846} 1847 1848/* 1849 * Sysctl handler for vm.zone 1850 * 1851 * stolen from vm_zone.c 1852 */ 1853static int 1854sysctl_vm_zone(SYSCTL_HANDLER_ARGS) 1855{ 1856 int error, len, cnt; 1857 const int linesize = 128; /* conservative */ 1858 int totalfree; 1859 char *tmpbuf, *offset; 1860 uma_zone_t z; 1861 char *p; 1862 1863 cnt = 0; 1864 LIST_FOREACH(z, &uma_zones, uz_link) 1865 cnt++; 1866 MALLOC(tmpbuf, char *, (cnt == 0 ? 1 : cnt) * linesize, 1867 M_TEMP, M_WAITOK); 1868 len = snprintf(tmpbuf, linesize, 1869 "\nITEM SIZE LIMIT USED FREE REQUESTS\n\n"); 1870 if (cnt == 0) 1871 tmpbuf[len - 1] = '\0'; 1872 error = SYSCTL_OUT(req, tmpbuf, cnt == 0 ? len-1 : len); 1873 if (error || cnt == 0) 1874 goto out; 1875 offset = tmpbuf; 1876 LIST_FOREACH(z, &uma_zones, uz_link) { 1877 if (cnt == 0) /* list may have changed size */ 1878 break; 1879 ZONE_LOCK(z); 1880 totalfree = z->uz_free + z->uz_cachefree; 1881 len = snprintf(offset, linesize, 1882 "%-12.12s %6.6u, %8.8u, %6.6u, %6.6u, %8.8llu\n", 1883 z->uz_name, z->uz_size, 1884 z->uz_maxpages * z->uz_ipers, 1885 (z->uz_ipers * (z->uz_pages / z->uz_ppera)) - totalfree, 1886 totalfree, 1887 (unsigned long long)z->uz_allocs); 1888 ZONE_UNLOCK(z); 1889 for (p = offset + 12; p > offset && *p == ' '; --p) 1890 /* nothing */ ; 1891 p[1] = ':'; 1892 cnt--; 1893 offset += len; 1894 } 1895 *offset++ = '\0'; 1896 error = SYSCTL_OUT(req, tmpbuf, offset - tmpbuf); 1897out: 1898 FREE(tmpbuf, M_TEMP); 1899 return (error); 1900}
| 1686uma_zone_set_freef(uma_zone_t zone, uma_free freef) 1687{ 1688 ZONE_LOCK(zone); 1689 1690 zone->uz_freef = freef; 1691 1692 ZONE_UNLOCK(zone); 1693} 1694 1695/* See uma.h */ 1696void 1697uma_zone_set_allocf(uma_zone_t zone, uma_alloc allocf) 1698{ 1699 ZONE_LOCK(zone); 1700 1701 zone->uz_flags |= UMA_ZFLAG_PRIVALLOC; 1702 zone->uz_allocf = allocf; 1703 1704 ZONE_UNLOCK(zone); 1705} 1706 1707/* See uma.h */ 1708int 1709uma_zone_set_obj(uma_zone_t zone, struct vm_object *obj, int count) 1710{ 1711 int pages; 1712 vm_offset_t kva; 1713 1714 ZONE_LOCK(zone); 1715 mtx_lock(&Giant); 1716 1717 zone->uz_obj = obj; 1718 pages = count / zone->uz_ipers; 1719 1720 if (pages * zone->uz_ipers < count) 1721 pages++; 1722 zone->uz_kva = NULL; 1723 ZONE_UNLOCK(zone); 1724 kva = kmem_alloc_pageable(kernel_map, pages * UMA_SLAB_SIZE); 1725 ZONE_LOCK(zone); 1726 1727 zone->uz_kva = kva; 1728 1729 if (zone->uz_kva == 0) { 1730 ZONE_UNLOCK(zone); 1731 return (0); 1732 } 1733 1734 zone->uz_maxpages = pages; 1735 1736 if (zone->uz_obj == NULL) 1737 zone->uz_obj = vm_object_allocate(OBJT_DEFAULT, 1738 zone->uz_maxpages); 1739 else 1740 _vm_object_allocate(OBJT_DEFAULT, 1741 zone->uz_maxpages, zone->uz_obj); 1742 1743 zone->uz_allocf = obj_alloc; 1744 zone->uz_flags |= UMA_ZFLAG_NOFREE | UMA_ZFLAG_PRIVALLOC; 1745 1746 mtx_unlock(&Giant); 1747 ZONE_UNLOCK(zone); 1748 1749 return (1); 1750} 1751 1752/* See uma.h */ 1753void 1754uma_prealloc(uma_zone_t zone, int items) 1755{ 1756 int slabs; 1757 uma_slab_t slab; 1758 1759 ZONE_LOCK(zone); 1760 slabs = items / zone->uz_ipers; 1761 if (slabs * zone->uz_ipers < items) 1762 slabs++; 1763 1764 while (slabs > 0) { 1765 slab = slab_zalloc(zone, M_WAITOK); 1766 LIST_INSERT_HEAD(&zone->uz_free_slab, slab, us_link); 1767 slabs--; 1768 } 1769 ZONE_UNLOCK(zone); 1770} 1771 1772/* See uma.h */ 1773void 1774uma_reclaim(void) 1775{ 1776 /* 1777 * You might think that the delay below would improve performance since 1778 * the allocator will give away memory that it may ask for immediately. 1779 * Really, it makes things worse, since cpu cycles are so much cheaper 1780 * than disk activity. 1781 */ 1782#if 0 1783 static struct timeval tv = {0}; 1784 struct timeval now; 1785 getmicrouptime(&now); 1786 if (now.tv_sec > tv.tv_sec + 30) 1787 tv = now; 1788 else 1789 return; 1790#endif 1791#ifdef UMA_DEBUG 1792 printf("UMA: vm asked us to release pages!\n"); 1793#endif 1794 zone_foreach(zone_drain); 1795 1796 /* 1797 * Some slabs may have been freed but this zone will be visited early 1798 * we visit again so that we can free pages that are empty once other 1799 * zones are drained. We have to do the same for buckets. 1800 */ 1801 zone_drain(slabzone); 1802 zone_drain(bucketzone); 1803} 1804 1805void * 1806uma_large_malloc(int size, int wait) 1807{ 1808 void *mem; 1809 uma_slab_t slab; 1810 u_int8_t flags; 1811 1812 slab = uma_zalloc_internal(slabzone, NULL, wait, NULL, -1); 1813 if (slab == NULL) 1814 return (NULL); 1815 1816 mem = page_alloc(NULL, size, &flags, wait); 1817 if (mem) { 1818 slab->us_data = mem; 1819 slab->us_flags = flags | UMA_SLAB_MALLOC; 1820 slab->us_size = size; 1821 UMA_HASH_INSERT(mallochash, slab, mem); 1822 } else { 1823 uma_zfree_internal(slabzone, slab, NULL, 0); 1824 } 1825 1826 1827 return (mem); 1828} 1829 1830void 1831uma_large_free(uma_slab_t slab) 1832{ 1833 UMA_HASH_REMOVE(mallochash, slab, slab->us_data); 1834 page_free(slab->us_data, slab->us_size, slab->us_flags); 1835 uma_zfree_internal(slabzone, slab, NULL, 0); 1836} 1837 1838void 1839uma_print_stats(void) 1840{ 1841 zone_foreach(uma_print_zone); 1842} 1843 1844void 1845uma_print_zone(uma_zone_t zone) 1846{ 1847 printf("%s(%p) size %d(%d) flags %d ipers %d ppera %d out %d free %d\n", 1848 zone->uz_name, zone, zone->uz_size, zone->uz_rsize, zone->uz_flags, 1849 zone->uz_ipers, zone->uz_ppera, 1850 (zone->uz_ipers * zone->uz_pages) - zone->uz_free, zone->uz_free); 1851} 1852 1853/* 1854 * Sysctl handler for vm.zone 1855 * 1856 * stolen from vm_zone.c 1857 */ 1858static int 1859sysctl_vm_zone(SYSCTL_HANDLER_ARGS) 1860{ 1861 int error, len, cnt; 1862 const int linesize = 128; /* conservative */ 1863 int totalfree; 1864 char *tmpbuf, *offset; 1865 uma_zone_t z; 1866 char *p; 1867 1868 cnt = 0; 1869 LIST_FOREACH(z, &uma_zones, uz_link) 1870 cnt++; 1871 MALLOC(tmpbuf, char *, (cnt == 0 ? 1 : cnt) * linesize, 1872 M_TEMP, M_WAITOK); 1873 len = snprintf(tmpbuf, linesize, 1874 "\nITEM SIZE LIMIT USED FREE REQUESTS\n\n"); 1875 if (cnt == 0) 1876 tmpbuf[len - 1] = '\0'; 1877 error = SYSCTL_OUT(req, tmpbuf, cnt == 0 ? len-1 : len); 1878 if (error || cnt == 0) 1879 goto out; 1880 offset = tmpbuf; 1881 LIST_FOREACH(z, &uma_zones, uz_link) { 1882 if (cnt == 0) /* list may have changed size */ 1883 break; 1884 ZONE_LOCK(z); 1885 totalfree = z->uz_free + z->uz_cachefree; 1886 len = snprintf(offset, linesize, 1887 "%-12.12s %6.6u, %8.8u, %6.6u, %6.6u, %8.8llu\n", 1888 z->uz_name, z->uz_size, 1889 z->uz_maxpages * z->uz_ipers, 1890 (z->uz_ipers * (z->uz_pages / z->uz_ppera)) - totalfree, 1891 totalfree, 1892 (unsigned long long)z->uz_allocs); 1893 ZONE_UNLOCK(z); 1894 for (p = offset + 12; p > offset && *p == ' '; --p) 1895 /* nothing */ ; 1896 p[1] = ':'; 1897 cnt--; 1898 offset += len; 1899 } 1900 *offset++ = '\0'; 1901 error = SYSCTL_OUT(req, tmpbuf, offset - tmpbuf); 1902out: 1903 FREE(tmpbuf, M_TEMP); 1904 return (error); 1905}
|