Deleted Added
full compact
uma_core.c (163702) uma_core.c (165809)
1/*-
2 * Copyright (c) 2002, 2003, 2004, 2005 Jeffrey Roberson <jeff@FreeBSD.org>
3 * Copyright (c) 2004, 2005 Bosko Milekic <bmilekic@FreeBSD.org>
4 * Copyright (c) 2004-2006 Robert N. M. Watson
5 * All rights reserved.
6 *
7 * Redistribution and use in source and binary forms, with or without
8 * modification, are permitted provided that the following conditions
9 * are met:
10 * 1. Redistributions of source code must retain the above copyright
11 * notice unmodified, this list of conditions, and the following
12 * disclaimer.
13 * 2. Redistributions in binary form must reproduce the above copyright
14 * notice, this list of conditions and the following disclaimer in the
15 * documentation and/or other materials provided with the distribution.
16 *
17 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
18 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
19 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
20 * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
21 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
22 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
23 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
24 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
25 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
26 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
27 */
28
29/*
30 * uma_core.c Implementation of the Universal Memory allocator
31 *
32 * This allocator is intended to replace the multitude of similar object caches
33 * in the standard FreeBSD kernel. The intent is to be flexible as well as
34 * effecient. A primary design goal is to return unused memory to the rest of
35 * the system. This will make the system as a whole more flexible due to the
36 * ability to move memory to subsystems which most need it instead of leaving
37 * pools of reserved memory unused.
38 *
39 * The basic ideas stem from similar slab/zone based allocators whose algorithms
40 * are well known.
41 *
42 */
43
44/*
45 * TODO:
46 * - Improve memory usage for large allocations
47 * - Investigate cache size adjustments
48 */
49
50#include <sys/cdefs.h>
1/*-
2 * Copyright (c) 2002, 2003, 2004, 2005 Jeffrey Roberson <jeff@FreeBSD.org>
3 * Copyright (c) 2004, 2005 Bosko Milekic <bmilekic@FreeBSD.org>
4 * Copyright (c) 2004-2006 Robert N. M. Watson
5 * All rights reserved.
6 *
7 * Redistribution and use in source and binary forms, with or without
8 * modification, are permitted provided that the following conditions
9 * are met:
10 * 1. Redistributions of source code must retain the above copyright
11 * notice unmodified, this list of conditions, and the following
12 * disclaimer.
13 * 2. Redistributions in binary form must reproduce the above copyright
14 * notice, this list of conditions and the following disclaimer in the
15 * documentation and/or other materials provided with the distribution.
16 *
17 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
18 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
19 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
20 * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
21 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
22 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
23 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
24 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
25 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
26 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
27 */
28
29/*
30 * uma_core.c Implementation of the Universal Memory allocator
31 *
32 * This allocator is intended to replace the multitude of similar object caches
33 * in the standard FreeBSD kernel. The intent is to be flexible as well as
34 * effecient. A primary design goal is to return unused memory to the rest of
35 * the system. This will make the system as a whole more flexible due to the
36 * ability to move memory to subsystems which most need it instead of leaving
37 * pools of reserved memory unused.
38 *
39 * The basic ideas stem from similar slab/zone based allocators whose algorithms
40 * are well known.
41 *
42 */
43
44/*
45 * TODO:
46 * - Improve memory usage for large allocations
47 * - Investigate cache size adjustments
48 */
49
50#include <sys/cdefs.h>
51__FBSDID("$FreeBSD: head/sys/vm/uma_core.c 163702 2006-10-26 12:55:32Z rwatson $");
51__FBSDID("$FreeBSD: head/sys/vm/uma_core.c 165809 2007-01-05 19:09:01Z jhb $");
52
53/* I should really use ktr.. */
54/*
55#define UMA_DEBUG 1
56#define UMA_DEBUG_ALLOC 1
57#define UMA_DEBUG_ALLOC_1 1
58*/
59
60#include "opt_ddb.h"
61#include "opt_param.h"
62
63#include <sys/param.h>
64#include <sys/systm.h>
65#include <sys/kernel.h>
66#include <sys/types.h>
67#include <sys/queue.h>
68#include <sys/malloc.h>
69#include <sys/ktr.h>
70#include <sys/lock.h>
71#include <sys/sysctl.h>
72#include <sys/mutex.h>
73#include <sys/proc.h>
74#include <sys/sbuf.h>
75#include <sys/smp.h>
76#include <sys/vmmeter.h>
77
78#include <vm/vm.h>
79#include <vm/vm_object.h>
80#include <vm/vm_page.h>
81#include <vm/vm_param.h>
82#include <vm/vm_map.h>
83#include <vm/vm_kern.h>
84#include <vm/vm_extern.h>
85#include <vm/uma.h>
86#include <vm/uma_int.h>
87#include <vm/uma_dbg.h>
88
89#include <machine/vmparam.h>
90
91#include <ddb/ddb.h>
92
93/*
94 * This is the zone and keg from which all zones are spawned. The idea is that
95 * even the zone & keg heads are allocated from the allocator, so we use the
96 * bss section to bootstrap us.
97 */
98static struct uma_keg masterkeg;
99static struct uma_zone masterzone_k;
100static struct uma_zone masterzone_z;
101static uma_zone_t kegs = &masterzone_k;
102static uma_zone_t zones = &masterzone_z;
103
104/* This is the zone from which all of uma_slab_t's are allocated. */
105static uma_zone_t slabzone;
106static uma_zone_t slabrefzone; /* With refcounters (for UMA_ZONE_REFCNT) */
107
108/*
109 * The initial hash tables come out of this zone so they can be allocated
110 * prior to malloc coming up.
111 */
112static uma_zone_t hashzone;
113
114static MALLOC_DEFINE(M_UMAHASH, "UMAHash", "UMA Hash Buckets");
115
116/*
117 * Are we allowed to allocate buckets?
118 */
119static int bucketdisable = 1;
120
121/* Linked list of all kegs in the system */
122static LIST_HEAD(,uma_keg) uma_kegs = LIST_HEAD_INITIALIZER(&uma_kegs);
123
124/* This mutex protects the keg list */
125static struct mtx uma_mtx;
126
127/* Linked list of boot time pages */
128static LIST_HEAD(,uma_slab) uma_boot_pages =
129 LIST_HEAD_INITIALIZER(&uma_boot_pages);
130
131/* This mutex protects the boot time pages list */
132static struct mtx uma_boot_pages_mtx;
133
134/* Is the VM done starting up? */
135static int booted = 0;
136
137/* Maximum number of allowed items-per-slab if the slab header is OFFPAGE */
138static u_int uma_max_ipers;
139static u_int uma_max_ipers_ref;
140
141/*
142 * This is the handle used to schedule events that need to happen
143 * outside of the allocation fast path.
144 */
145static struct callout uma_callout;
146#define UMA_TIMEOUT 20 /* Seconds for callout interval. */
147
148/*
149 * This structure is passed as the zone ctor arg so that I don't have to create
150 * a special allocation function just for zones.
151 */
152struct uma_zctor_args {
153 char *name;
154 size_t size;
155 uma_ctor ctor;
156 uma_dtor dtor;
157 uma_init uminit;
158 uma_fini fini;
159 uma_keg_t keg;
160 int align;
161 u_int32_t flags;
162};
163
164struct uma_kctor_args {
165 uma_zone_t zone;
166 size_t size;
167 uma_init uminit;
168 uma_fini fini;
169 int align;
170 u_int32_t flags;
171};
172
173struct uma_bucket_zone {
174 uma_zone_t ubz_zone;
175 char *ubz_name;
176 int ubz_entries;
177};
178
179#define BUCKET_MAX 128
180
181struct uma_bucket_zone bucket_zones[] = {
182 { NULL, "16 Bucket", 16 },
183 { NULL, "32 Bucket", 32 },
184 { NULL, "64 Bucket", 64 },
185 { NULL, "128 Bucket", 128 },
186 { NULL, NULL, 0}
187};
188
189#define BUCKET_SHIFT 4
190#define BUCKET_ZONES ((BUCKET_MAX >> BUCKET_SHIFT) + 1)
191
192/*
193 * bucket_size[] maps requested bucket sizes to zones that allocate a bucket
194 * of approximately the right size.
195 */
196static uint8_t bucket_size[BUCKET_ZONES];
197
198/*
199 * Flags and enumerations to be passed to internal functions.
200 */
201enum zfreeskip { SKIP_NONE, SKIP_DTOR, SKIP_FINI };
202
203#define ZFREE_STATFAIL 0x00000001 /* Update zone failure statistic. */
204#define ZFREE_STATFREE 0x00000002 /* Update zone free statistic. */
205
206/* Prototypes.. */
207
208static void *obj_alloc(uma_zone_t, int, u_int8_t *, int);
209static void *page_alloc(uma_zone_t, int, u_int8_t *, int);
210static void *startup_alloc(uma_zone_t, int, u_int8_t *, int);
211static void page_free(void *, int, u_int8_t);
212static uma_slab_t slab_zalloc(uma_zone_t, int);
213static void cache_drain(uma_zone_t);
214static void bucket_drain(uma_zone_t, uma_bucket_t);
215static void bucket_cache_drain(uma_zone_t zone);
216static int keg_ctor(void *, int, void *, int);
217static void keg_dtor(void *, int, void *);
218static int zone_ctor(void *, int, void *, int);
219static void zone_dtor(void *, int, void *);
220static int zero_init(void *, int, int);
221static void zone_small_init(uma_zone_t zone);
222static void zone_large_init(uma_zone_t zone);
223static void zone_foreach(void (*zfunc)(uma_zone_t));
224static void zone_timeout(uma_zone_t zone);
225static int hash_alloc(struct uma_hash *);
226static int hash_expand(struct uma_hash *, struct uma_hash *);
227static void hash_free(struct uma_hash *hash);
228static void uma_timeout(void *);
229static void uma_startup3(void);
230static void *uma_zalloc_internal(uma_zone_t, void *, int);
231static void uma_zfree_internal(uma_zone_t, void *, void *, enum zfreeskip,
232 int);
233static void bucket_enable(void);
234static void bucket_init(void);
235static uma_bucket_t bucket_alloc(int, int);
236static void bucket_free(uma_bucket_t);
237static void bucket_zone_drain(void);
238static int uma_zalloc_bucket(uma_zone_t zone, int flags);
239static uma_slab_t uma_zone_slab(uma_zone_t zone, int flags);
240static void *uma_slab_alloc(uma_zone_t zone, uma_slab_t slab);
241static void zone_drain(uma_zone_t);
242static uma_zone_t uma_kcreate(uma_zone_t zone, size_t size, uma_init uminit,
243 uma_fini fini, int align, u_int32_t flags);
244
245void uma_print_zone(uma_zone_t);
246void uma_print_stats(void);
247static int sysctl_vm_zone_count(SYSCTL_HANDLER_ARGS);
248static int sysctl_vm_zone_stats(SYSCTL_HANDLER_ARGS);
249
250#ifdef WITNESS
251static int nosleepwithlocks = 1;
252#else
253static int nosleepwithlocks = 0;
254#endif
255SYSCTL_INT(_debug, OID_AUTO, nosleepwithlocks, CTLFLAG_RW, &nosleepwithlocks,
256 0, "Convert M_WAITOK to M_NOWAIT to avoid lock-held-across-sleep paths");
257SYSINIT(uma_startup3, SI_SUB_VM_CONF, SI_ORDER_SECOND, uma_startup3, NULL);
258
259SYSCTL_PROC(_vm, OID_AUTO, zone_count, CTLFLAG_RD|CTLTYPE_INT,
260 0, 0, sysctl_vm_zone_count, "I", "Number of UMA zones");
261
262SYSCTL_PROC(_vm, OID_AUTO, zone_stats, CTLFLAG_RD|CTLTYPE_STRUCT,
263 0, 0, sysctl_vm_zone_stats, "s,struct uma_type_header", "Zone Stats");
264
265/*
266 * This routine checks to see whether or not it's safe to enable buckets.
267 */
268
269static void
270bucket_enable(void)
271{
272 if (cnt.v_free_count < cnt.v_free_min)
273 bucketdisable = 1;
274 else
275 bucketdisable = 0;
276}
277
278/*
279 * Initialize bucket_zones, the array of zones of buckets of various sizes.
280 *
281 * For each zone, calculate the memory required for each bucket, consisting
282 * of the header and an array of pointers. Initialize bucket_size[] to point
283 * the range of appropriate bucket sizes at the zone.
284 */
285static void
286bucket_init(void)
287{
288 struct uma_bucket_zone *ubz;
289 int i;
290 int j;
291
292 for (i = 0, j = 0; bucket_zones[j].ubz_entries != 0; j++) {
293 int size;
294
295 ubz = &bucket_zones[j];
296 size = roundup(sizeof(struct uma_bucket), sizeof(void *));
297 size += sizeof(void *) * ubz->ubz_entries;
298 ubz->ubz_zone = uma_zcreate(ubz->ubz_name, size,
299 NULL, NULL, NULL, NULL, UMA_ALIGN_PTR, UMA_ZFLAG_INTERNAL);
300 for (; i <= ubz->ubz_entries; i += (1 << BUCKET_SHIFT))
301 bucket_size[i >> BUCKET_SHIFT] = j;
302 }
303}
304
305/*
306 * Given a desired number of entries for a bucket, return the zone from which
307 * to allocate the bucket.
308 */
309static struct uma_bucket_zone *
310bucket_zone_lookup(int entries)
311{
312 int idx;
313
314 idx = howmany(entries, 1 << BUCKET_SHIFT);
315 return (&bucket_zones[bucket_size[idx]]);
316}
317
318static uma_bucket_t
319bucket_alloc(int entries, int bflags)
320{
321 struct uma_bucket_zone *ubz;
322 uma_bucket_t bucket;
323
324 /*
325 * This is to stop us from allocating per cpu buckets while we're
326 * running out of vm.boot_pages. Otherwise, we would exhaust the
327 * boot pages. This also prevents us from allocating buckets in
328 * low memory situations.
329 */
330 if (bucketdisable)
331 return (NULL);
332
333 ubz = bucket_zone_lookup(entries);
334 bucket = uma_zalloc_internal(ubz->ubz_zone, NULL, bflags);
335 if (bucket) {
336#ifdef INVARIANTS
337 bzero(bucket->ub_bucket, sizeof(void *) * ubz->ubz_entries);
338#endif
339 bucket->ub_cnt = 0;
340 bucket->ub_entries = ubz->ubz_entries;
341 }
342
343 return (bucket);
344}
345
346static void
347bucket_free(uma_bucket_t bucket)
348{
349 struct uma_bucket_zone *ubz;
350
351 ubz = bucket_zone_lookup(bucket->ub_entries);
352 uma_zfree_internal(ubz->ubz_zone, bucket, NULL, SKIP_NONE,
353 ZFREE_STATFREE);
354}
355
356static void
357bucket_zone_drain(void)
358{
359 struct uma_bucket_zone *ubz;
360
361 for (ubz = &bucket_zones[0]; ubz->ubz_entries != 0; ubz++)
362 zone_drain(ubz->ubz_zone);
363}
364
365
366/*
367 * Routine called by timeout which is used to fire off some time interval
368 * based calculations. (stats, hash size, etc.)
369 *
370 * Arguments:
371 * arg Unused
372 *
373 * Returns:
374 * Nothing
375 */
376static void
377uma_timeout(void *unused)
378{
379 bucket_enable();
380 zone_foreach(zone_timeout);
381
382 /* Reschedule this event */
383 callout_reset(&uma_callout, UMA_TIMEOUT * hz, uma_timeout, NULL);
384}
385
386/*
387 * Routine to perform timeout driven calculations. This expands the
388 * hashes and does per cpu statistics aggregation.
389 *
390 * Arguments:
391 * zone The zone to operate on
392 *
393 * Returns:
394 * Nothing
395 */
396static void
397zone_timeout(uma_zone_t zone)
398{
399 uma_keg_t keg;
400 u_int64_t alloc;
401
402 keg = zone->uz_keg;
403 alloc = 0;
404
405 /*
406 * Expand the zone hash table.
407 *
408 * This is done if the number of slabs is larger than the hash size.
409 * What I'm trying to do here is completely reduce collisions. This
410 * may be a little aggressive. Should I allow for two collisions max?
411 */
412 ZONE_LOCK(zone);
413 if (keg->uk_flags & UMA_ZONE_HASH &&
414 keg->uk_pages / keg->uk_ppera >= keg->uk_hash.uh_hashsize) {
415 struct uma_hash newhash;
416 struct uma_hash oldhash;
417 int ret;
418
419 /*
420 * This is so involved because allocating and freeing
421 * while the zone lock is held will lead to deadlock.
422 * I have to do everything in stages and check for
423 * races.
424 */
425 newhash = keg->uk_hash;
426 ZONE_UNLOCK(zone);
427 ret = hash_alloc(&newhash);
428 ZONE_LOCK(zone);
429 if (ret) {
430 if (hash_expand(&keg->uk_hash, &newhash)) {
431 oldhash = keg->uk_hash;
432 keg->uk_hash = newhash;
433 } else
434 oldhash = newhash;
435
436 ZONE_UNLOCK(zone);
437 hash_free(&oldhash);
438 ZONE_LOCK(zone);
439 }
440 }
441 ZONE_UNLOCK(zone);
442}
443
444/*
445 * Allocate and zero fill the next sized hash table from the appropriate
446 * backing store.
447 *
448 * Arguments:
449 * hash A new hash structure with the old hash size in uh_hashsize
450 *
451 * Returns:
452 * 1 on sucess and 0 on failure.
453 */
454static int
455hash_alloc(struct uma_hash *hash)
456{
457 int oldsize;
458 int alloc;
459
460 oldsize = hash->uh_hashsize;
461
462 /* We're just going to go to a power of two greater */
463 if (oldsize) {
464 hash->uh_hashsize = oldsize * 2;
465 alloc = sizeof(hash->uh_slab_hash[0]) * hash->uh_hashsize;
466 hash->uh_slab_hash = (struct slabhead *)malloc(alloc,
467 M_UMAHASH, M_NOWAIT);
468 } else {
469 alloc = sizeof(hash->uh_slab_hash[0]) * UMA_HASH_SIZE_INIT;
470 hash->uh_slab_hash = uma_zalloc_internal(hashzone, NULL,
471 M_WAITOK);
472 hash->uh_hashsize = UMA_HASH_SIZE_INIT;
473 }
474 if (hash->uh_slab_hash) {
475 bzero(hash->uh_slab_hash, alloc);
476 hash->uh_hashmask = hash->uh_hashsize - 1;
477 return (1);
478 }
479
480 return (0);
481}
482
483/*
484 * Expands the hash table for HASH zones. This is done from zone_timeout
485 * to reduce collisions. This must not be done in the regular allocation
486 * path, otherwise, we can recurse on the vm while allocating pages.
487 *
488 * Arguments:
489 * oldhash The hash you want to expand
490 * newhash The hash structure for the new table
491 *
492 * Returns:
493 * Nothing
494 *
495 * Discussion:
496 */
497static int
498hash_expand(struct uma_hash *oldhash, struct uma_hash *newhash)
499{
500 uma_slab_t slab;
501 int hval;
502 int i;
503
504 if (!newhash->uh_slab_hash)
505 return (0);
506
507 if (oldhash->uh_hashsize >= newhash->uh_hashsize)
508 return (0);
509
510 /*
511 * I need to investigate hash algorithms for resizing without a
512 * full rehash.
513 */
514
515 for (i = 0; i < oldhash->uh_hashsize; i++)
516 while (!SLIST_EMPTY(&oldhash->uh_slab_hash[i])) {
517 slab = SLIST_FIRST(&oldhash->uh_slab_hash[i]);
518 SLIST_REMOVE_HEAD(&oldhash->uh_slab_hash[i], us_hlink);
519 hval = UMA_HASH(newhash, slab->us_data);
520 SLIST_INSERT_HEAD(&newhash->uh_slab_hash[hval],
521 slab, us_hlink);
522 }
523
524 return (1);
525}
526
527/*
528 * Free the hash bucket to the appropriate backing store.
529 *
530 * Arguments:
531 * slab_hash The hash bucket we're freeing
532 * hashsize The number of entries in that hash bucket
533 *
534 * Returns:
535 * Nothing
536 */
537static void
538hash_free(struct uma_hash *hash)
539{
540 if (hash->uh_slab_hash == NULL)
541 return;
542 if (hash->uh_hashsize == UMA_HASH_SIZE_INIT)
543 uma_zfree_internal(hashzone,
544 hash->uh_slab_hash, NULL, SKIP_NONE, ZFREE_STATFREE);
545 else
546 free(hash->uh_slab_hash, M_UMAHASH);
547}
548
549/*
550 * Frees all outstanding items in a bucket
551 *
552 * Arguments:
553 * zone The zone to free to, must be unlocked.
554 * bucket The free/alloc bucket with items, cpu queue must be locked.
555 *
556 * Returns:
557 * Nothing
558 */
559
560static void
561bucket_drain(uma_zone_t zone, uma_bucket_t bucket)
562{
563 uma_slab_t slab;
564 int mzone;
565 void *item;
566
567 if (bucket == NULL)
568 return;
569
570 slab = NULL;
571 mzone = 0;
572
573 /* We have to lookup the slab again for malloc.. */
574 if (zone->uz_keg->uk_flags & UMA_ZONE_MALLOC)
575 mzone = 1;
576
577 while (bucket->ub_cnt > 0) {
578 bucket->ub_cnt--;
579 item = bucket->ub_bucket[bucket->ub_cnt];
580#ifdef INVARIANTS
581 bucket->ub_bucket[bucket->ub_cnt] = NULL;
582 KASSERT(item != NULL,
583 ("bucket_drain: botched ptr, item is NULL"));
584#endif
585 /*
586 * This is extremely inefficient. The slab pointer was passed
587 * to uma_zfree_arg, but we lost it because the buckets don't
588 * hold them. This will go away when free() gets a size passed
589 * to it.
590 */
591 if (mzone)
592 slab = vtoslab((vm_offset_t)item & (~UMA_SLAB_MASK));
593 uma_zfree_internal(zone, item, slab, SKIP_DTOR, 0);
594 }
595}
596
597/*
598 * Drains the per cpu caches for a zone.
599 *
600 * NOTE: This may only be called while the zone is being turn down, and not
601 * during normal operation. This is necessary in order that we do not have
602 * to migrate CPUs to drain the per-CPU caches.
603 *
604 * Arguments:
605 * zone The zone to drain, must be unlocked.
606 *
607 * Returns:
608 * Nothing
609 */
610static void
611cache_drain(uma_zone_t zone)
612{
613 uma_cache_t cache;
614 int cpu;
615
616 /*
617 * XXX: It is safe to not lock the per-CPU caches, because we're
618 * tearing down the zone anyway. I.e., there will be no further use
619 * of the caches at this point.
620 *
621 * XXX: It would good to be able to assert that the zone is being
622 * torn down to prevent improper use of cache_drain().
623 *
624 * XXX: We lock the zone before passing into bucket_cache_drain() as
625 * it is used elsewhere. Should the tear-down path be made special
626 * there in some form?
627 */
628 for (cpu = 0; cpu <= mp_maxid; cpu++) {
629 if (CPU_ABSENT(cpu))
630 continue;
631 cache = &zone->uz_cpu[cpu];
632 bucket_drain(zone, cache->uc_allocbucket);
633 bucket_drain(zone, cache->uc_freebucket);
634 if (cache->uc_allocbucket != NULL)
635 bucket_free(cache->uc_allocbucket);
636 if (cache->uc_freebucket != NULL)
637 bucket_free(cache->uc_freebucket);
638 cache->uc_allocbucket = cache->uc_freebucket = NULL;
639 }
640 ZONE_LOCK(zone);
641 bucket_cache_drain(zone);
642 ZONE_UNLOCK(zone);
643}
644
645/*
646 * Drain the cached buckets from a zone. Expects a locked zone on entry.
647 */
648static void
649bucket_cache_drain(uma_zone_t zone)
650{
651 uma_bucket_t bucket;
652
653 /*
654 * Drain the bucket queues and free the buckets, we just keep two per
655 * cpu (alloc/free).
656 */
657 while ((bucket = LIST_FIRST(&zone->uz_full_bucket)) != NULL) {
658 LIST_REMOVE(bucket, ub_link);
659 ZONE_UNLOCK(zone);
660 bucket_drain(zone, bucket);
661 bucket_free(bucket);
662 ZONE_LOCK(zone);
663 }
664
665 /* Now we do the free queue.. */
666 while ((bucket = LIST_FIRST(&zone->uz_free_bucket)) != NULL) {
667 LIST_REMOVE(bucket, ub_link);
668 bucket_free(bucket);
669 }
670}
671
672/*
673 * Frees pages from a zone back to the system. This is done on demand from
674 * the pageout daemon.
675 *
676 * Arguments:
677 * zone The zone to free pages from
678 * all Should we drain all items?
679 *
680 * Returns:
681 * Nothing.
682 */
683static void
684zone_drain(uma_zone_t zone)
685{
686 struct slabhead freeslabs = { 0 };
687 uma_keg_t keg;
688 uma_slab_t slab;
689 uma_slab_t n;
690 u_int8_t flags;
691 u_int8_t *mem;
692 int i;
693
694 keg = zone->uz_keg;
695
696 /*
697 * We don't want to take pages from statically allocated zones at this
698 * time
699 */
700 if (keg->uk_flags & UMA_ZONE_NOFREE || keg->uk_freef == NULL)
701 return;
702
703 ZONE_LOCK(zone);
704
705#ifdef UMA_DEBUG
706 printf("%s free items: %u\n", zone->uz_name, keg->uk_free);
707#endif
708 bucket_cache_drain(zone);
709 if (keg->uk_free == 0)
710 goto finished;
711
712 slab = LIST_FIRST(&keg->uk_free_slab);
713 while (slab) {
714 n = LIST_NEXT(slab, us_link);
715
716 /* We have no where to free these to */
717 if (slab->us_flags & UMA_SLAB_BOOT) {
718 slab = n;
719 continue;
720 }
721
722 LIST_REMOVE(slab, us_link);
723 keg->uk_pages -= keg->uk_ppera;
724 keg->uk_free -= keg->uk_ipers;
725
726 if (keg->uk_flags & UMA_ZONE_HASH)
727 UMA_HASH_REMOVE(&keg->uk_hash, slab, slab->us_data);
728
729 SLIST_INSERT_HEAD(&freeslabs, slab, us_hlink);
730
731 slab = n;
732 }
733finished:
734 ZONE_UNLOCK(zone);
735
736 while ((slab = SLIST_FIRST(&freeslabs)) != NULL) {
737 SLIST_REMOVE(&freeslabs, slab, uma_slab, us_hlink);
738 if (keg->uk_fini)
739 for (i = 0; i < keg->uk_ipers; i++)
740 keg->uk_fini(
741 slab->us_data + (keg->uk_rsize * i),
742 keg->uk_size);
743 flags = slab->us_flags;
744 mem = slab->us_data;
745
746 if ((keg->uk_flags & UMA_ZONE_MALLOC) ||
747 (keg->uk_flags & UMA_ZONE_REFCNT)) {
748 vm_object_t obj;
749
750 if (flags & UMA_SLAB_KMEM)
751 obj = kmem_object;
752 else
753 obj = NULL;
754 for (i = 0; i < keg->uk_ppera; i++)
755 vsetobj((vm_offset_t)mem + (i * PAGE_SIZE),
756 obj);
757 }
758 if (keg->uk_flags & UMA_ZONE_OFFPAGE)
759 uma_zfree_internal(keg->uk_slabzone, slab, NULL,
760 SKIP_NONE, ZFREE_STATFREE);
761#ifdef UMA_DEBUG
762 printf("%s: Returning %d bytes.\n",
763 zone->uz_name, UMA_SLAB_SIZE * keg->uk_ppera);
764#endif
765 keg->uk_freef(mem, UMA_SLAB_SIZE * keg->uk_ppera, flags);
766 }
767}
768
769/*
770 * Allocate a new slab for a zone. This does not insert the slab onto a list.
771 *
772 * Arguments:
773 * zone The zone to allocate slabs for
774 * wait Shall we wait?
775 *
776 * Returns:
777 * The slab that was allocated or NULL if there is no memory and the
778 * caller specified M_NOWAIT.
779 */
780static uma_slab_t
781slab_zalloc(uma_zone_t zone, int wait)
782{
783 uma_slabrefcnt_t slabref;
784 uma_slab_t slab;
785 uma_keg_t keg;
786 u_int8_t *mem;
787 u_int8_t flags;
788 int i;
789
790 slab = NULL;
791 keg = zone->uz_keg;
792
793#ifdef UMA_DEBUG
794 printf("slab_zalloc: Allocating a new slab for %s\n", zone->uz_name);
795#endif
796 ZONE_UNLOCK(zone);
797
798 if (keg->uk_flags & UMA_ZONE_OFFPAGE) {
799 slab = uma_zalloc_internal(keg->uk_slabzone, NULL, wait);
800 if (slab == NULL) {
801 ZONE_LOCK(zone);
802 return NULL;
803 }
804 }
805
806 /*
807 * This reproduces the old vm_zone behavior of zero filling pages the
808 * first time they are added to a zone.
809 *
810 * Malloced items are zeroed in uma_zalloc.
811 */
812
813 if ((keg->uk_flags & UMA_ZONE_MALLOC) == 0)
814 wait |= M_ZERO;
815 else
816 wait &= ~M_ZERO;
817
818 mem = keg->uk_allocf(zone, keg->uk_ppera * UMA_SLAB_SIZE,
819 &flags, wait);
820 if (mem == NULL) {
821 if (keg->uk_flags & UMA_ZONE_OFFPAGE)
822 uma_zfree_internal(keg->uk_slabzone, slab, NULL,
823 SKIP_NONE, ZFREE_STATFREE);
824 ZONE_LOCK(zone);
825 return (NULL);
826 }
827
828 /* Point the slab into the allocated memory */
829 if (!(keg->uk_flags & UMA_ZONE_OFFPAGE))
830 slab = (uma_slab_t )(mem + keg->uk_pgoff);
831
832 if ((keg->uk_flags & UMA_ZONE_MALLOC) ||
833 (keg->uk_flags & UMA_ZONE_REFCNT))
834 for (i = 0; i < keg->uk_ppera; i++)
835 vsetslab((vm_offset_t)mem + (i * PAGE_SIZE), slab);
836
837 slab->us_keg = keg;
838 slab->us_data = mem;
839 slab->us_freecount = keg->uk_ipers;
840 slab->us_firstfree = 0;
841 slab->us_flags = flags;
842
843 if (keg->uk_flags & UMA_ZONE_REFCNT) {
844 slabref = (uma_slabrefcnt_t)slab;
845 for (i = 0; i < keg->uk_ipers; i++) {
846 slabref->us_freelist[i].us_refcnt = 0;
847 slabref->us_freelist[i].us_item = i+1;
848 }
849 } else {
850 for (i = 0; i < keg->uk_ipers; i++)
851 slab->us_freelist[i].us_item = i+1;
852 }
853
854 if (keg->uk_init != NULL) {
855 for (i = 0; i < keg->uk_ipers; i++)
856 if (keg->uk_init(slab->us_data + (keg->uk_rsize * i),
857 keg->uk_size, wait) != 0)
858 break;
859 if (i != keg->uk_ipers) {
860 if (keg->uk_fini != NULL) {
861 for (i--; i > -1; i--)
862 keg->uk_fini(slab->us_data +
863 (keg->uk_rsize * i),
864 keg->uk_size);
865 }
866 if ((keg->uk_flags & UMA_ZONE_MALLOC) ||
867 (keg->uk_flags & UMA_ZONE_REFCNT)) {
868 vm_object_t obj;
869
870 if (flags & UMA_SLAB_KMEM)
871 obj = kmem_object;
872 else
873 obj = NULL;
874 for (i = 0; i < keg->uk_ppera; i++)
875 vsetobj((vm_offset_t)mem +
876 (i * PAGE_SIZE), obj);
877 }
878 if (keg->uk_flags & UMA_ZONE_OFFPAGE)
879 uma_zfree_internal(keg->uk_slabzone, slab,
880 NULL, SKIP_NONE, ZFREE_STATFREE);
881 keg->uk_freef(mem, UMA_SLAB_SIZE * keg->uk_ppera,
882 flags);
883 ZONE_LOCK(zone);
884 return (NULL);
885 }
886 }
887 ZONE_LOCK(zone);
888
889 if (keg->uk_flags & UMA_ZONE_HASH)
890 UMA_HASH_INSERT(&keg->uk_hash, slab, mem);
891
892 keg->uk_pages += keg->uk_ppera;
893 keg->uk_free += keg->uk_ipers;
894
895 return (slab);
896}
897
898/*
899 * This function is intended to be used early on in place of page_alloc() so
900 * that we may use the boot time page cache to satisfy allocations before
901 * the VM is ready.
902 */
903static void *
904startup_alloc(uma_zone_t zone, int bytes, u_int8_t *pflag, int wait)
905{
906 uma_keg_t keg;
907 uma_slab_t tmps;
908
909 keg = zone->uz_keg;
910
911 /*
912 * Check our small startup cache to see if it has pages remaining.
913 */
914 mtx_lock(&uma_boot_pages_mtx);
915 if ((tmps = LIST_FIRST(&uma_boot_pages)) != NULL) {
916 LIST_REMOVE(tmps, us_link);
917 mtx_unlock(&uma_boot_pages_mtx);
918 *pflag = tmps->us_flags;
919 return (tmps->us_data);
920 }
921 mtx_unlock(&uma_boot_pages_mtx);
922 if (booted == 0)
923 panic("UMA: Increase vm.boot_pages");
924 /*
925 * Now that we've booted reset these users to their real allocator.
926 */
927#ifdef UMA_MD_SMALL_ALLOC
928 keg->uk_allocf = uma_small_alloc;
929#else
930 keg->uk_allocf = page_alloc;
931#endif
932 return keg->uk_allocf(zone, bytes, pflag, wait);
933}
934
935/*
936 * Allocates a number of pages from the system
937 *
938 * Arguments:
939 * zone Unused
940 * bytes The number of bytes requested
941 * wait Shall we wait?
942 *
943 * Returns:
944 * A pointer to the alloced memory or possibly
945 * NULL if M_NOWAIT is set.
946 */
947static void *
948page_alloc(uma_zone_t zone, int bytes, u_int8_t *pflag, int wait)
949{
950 void *p; /* Returned page */
951
952 *pflag = UMA_SLAB_KMEM;
953 p = (void *) kmem_malloc(kmem_map, bytes, wait);
954
955 return (p);
956}
957
958/*
959 * Allocates a number of pages from within an object
960 *
961 * Arguments:
962 * zone Unused
963 * bytes The number of bytes requested
964 * wait Shall we wait?
965 *
966 * Returns:
967 * A pointer to the alloced memory or possibly
968 * NULL if M_NOWAIT is set.
969 */
970static void *
971obj_alloc(uma_zone_t zone, int bytes, u_int8_t *flags, int wait)
972{
973 vm_object_t object;
974 vm_offset_t retkva, zkva;
975 vm_page_t p;
976 int pages, startpages;
977
978 object = zone->uz_keg->uk_obj;
979 retkva = 0;
980
981 /*
982 * This looks a little weird since we're getting one page at a time.
983 */
984 VM_OBJECT_LOCK(object);
985 p = TAILQ_LAST(&object->memq, pglist);
986 pages = p != NULL ? p->pindex + 1 : 0;
987 startpages = pages;
988 zkva = zone->uz_keg->uk_kva + pages * PAGE_SIZE;
989 for (; bytes > 0; bytes -= PAGE_SIZE) {
990 p = vm_page_alloc(object, pages,
991 VM_ALLOC_INTERRUPT | VM_ALLOC_WIRED);
992 if (p == NULL) {
993 if (pages != startpages)
994 pmap_qremove(retkva, pages - startpages);
995 while (pages != startpages) {
996 pages--;
997 p = TAILQ_LAST(&object->memq, pglist);
998 vm_page_lock_queues();
999 vm_page_unwire(p, 0);
1000 vm_page_free(p);
1001 vm_page_unlock_queues();
1002 }
1003 retkva = 0;
1004 goto done;
1005 }
1006 pmap_qenter(zkva, &p, 1);
1007 if (retkva == 0)
1008 retkva = zkva;
1009 zkva += PAGE_SIZE;
1010 pages += 1;
1011 }
1012done:
1013 VM_OBJECT_UNLOCK(object);
1014 *flags = UMA_SLAB_PRIV;
1015
1016 return ((void *)retkva);
1017}
1018
1019/*
1020 * Frees a number of pages to the system
1021 *
1022 * Arguments:
1023 * mem A pointer to the memory to be freed
1024 * size The size of the memory being freed
1025 * flags The original p->us_flags field
1026 *
1027 * Returns:
1028 * Nothing
1029 */
1030static void
1031page_free(void *mem, int size, u_int8_t flags)
1032{
1033 vm_map_t map;
1034
1035 if (flags & UMA_SLAB_KMEM)
1036 map = kmem_map;
1037 else
1038 panic("UMA: page_free used with invalid flags %d\n", flags);
1039
1040 kmem_free(map, (vm_offset_t)mem, size);
1041}
1042
1043/*
1044 * Zero fill initializer
1045 *
1046 * Arguments/Returns follow uma_init specifications
1047 */
1048static int
1049zero_init(void *mem, int size, int flags)
1050{
1051 bzero(mem, size);
1052 return (0);
1053}
1054
1055/*
1056 * Finish creating a small uma zone. This calculates ipers, and the zone size.
1057 *
1058 * Arguments
1059 * zone The zone we should initialize
1060 *
1061 * Returns
1062 * Nothing
1063 */
1064static void
1065zone_small_init(uma_zone_t zone)
1066{
1067 uma_keg_t keg;
1068 u_int rsize;
1069 u_int memused;
1070 u_int wastedspace;
1071 u_int shsize;
1072
1073 keg = zone->uz_keg;
1074 KASSERT(keg != NULL, ("Keg is null in zone_small_init"));
1075 rsize = keg->uk_size;
1076
1077 if (rsize < UMA_SMALLEST_UNIT)
1078 rsize = UMA_SMALLEST_UNIT;
1079 if (rsize & keg->uk_align)
1080 rsize = (rsize & ~keg->uk_align) + (keg->uk_align + 1);
1081
1082 keg->uk_rsize = rsize;
1083 keg->uk_ppera = 1;
1084
1085 if (keg->uk_flags & UMA_ZONE_REFCNT) {
1086 rsize += UMA_FRITMREF_SZ; /* linkage & refcnt */
1087 shsize = sizeof(struct uma_slab_refcnt);
1088 } else {
1089 rsize += UMA_FRITM_SZ; /* Account for linkage */
1090 shsize = sizeof(struct uma_slab);
1091 }
1092
1093 keg->uk_ipers = (UMA_SLAB_SIZE - shsize) / rsize;
1094 KASSERT(keg->uk_ipers != 0, ("zone_small_init: ipers is 0"));
1095 memused = keg->uk_ipers * rsize + shsize;
1096 wastedspace = UMA_SLAB_SIZE - memused;
1097
1098 /*
1099 * We can't do OFFPAGE if we're internal or if we've been
1100 * asked to not go to the VM for buckets. If we do this we
1101 * may end up going to the VM (kmem_map) for slabs which we
1102 * do not want to do if we're UMA_ZFLAG_CACHEONLY as a
1103 * result of UMA_ZONE_VM, which clearly forbids it.
1104 */
1105 if ((keg->uk_flags & UMA_ZFLAG_INTERNAL) ||
1106 (keg->uk_flags & UMA_ZFLAG_CACHEONLY))
1107 return;
1108
1109 if ((wastedspace >= UMA_MAX_WASTE) &&
1110 (keg->uk_ipers < (UMA_SLAB_SIZE / keg->uk_rsize))) {
1111 keg->uk_ipers = UMA_SLAB_SIZE / keg->uk_rsize;
1112 KASSERT(keg->uk_ipers <= 255,
1113 ("zone_small_init: keg->uk_ipers too high!"));
1114#ifdef UMA_DEBUG
1115 printf("UMA decided we need offpage slab headers for "
1116 "zone: %s, calculated wastedspace = %d, "
1117 "maximum wasted space allowed = %d, "
1118 "calculated ipers = %d, "
1119 "new wasted space = %d\n", zone->uz_name, wastedspace,
1120 UMA_MAX_WASTE, keg->uk_ipers,
1121 UMA_SLAB_SIZE - keg->uk_ipers * keg->uk_rsize);
1122#endif
1123 keg->uk_flags |= UMA_ZONE_OFFPAGE;
1124 if ((keg->uk_flags & UMA_ZONE_MALLOC) == 0)
1125 keg->uk_flags |= UMA_ZONE_HASH;
1126 }
1127}
1128
1129/*
1130 * Finish creating a large (> UMA_SLAB_SIZE) uma zone. Just give in and do
1131 * OFFPAGE for now. When I can allow for more dynamic slab sizes this will be
1132 * more complicated.
1133 *
1134 * Arguments
1135 * zone The zone we should initialize
1136 *
1137 * Returns
1138 * Nothing
1139 */
1140static void
1141zone_large_init(uma_zone_t zone)
1142{
1143 uma_keg_t keg;
1144 int pages;
1145
1146 keg = zone->uz_keg;
1147
1148 KASSERT(keg != NULL, ("Keg is null in zone_large_init"));
1149 KASSERT((keg->uk_flags & UMA_ZFLAG_CACHEONLY) == 0,
1150 ("zone_large_init: Cannot large-init a UMA_ZFLAG_CACHEONLY zone"));
1151
1152 pages = keg->uk_size / UMA_SLAB_SIZE;
1153
1154 /* Account for remainder */
1155 if ((pages * UMA_SLAB_SIZE) < keg->uk_size)
1156 pages++;
1157
1158 keg->uk_ppera = pages;
1159 keg->uk_ipers = 1;
1160
1161 keg->uk_flags |= UMA_ZONE_OFFPAGE;
1162 if ((keg->uk_flags & UMA_ZONE_MALLOC) == 0)
1163 keg->uk_flags |= UMA_ZONE_HASH;
1164
1165 keg->uk_rsize = keg->uk_size;
1166}
1167
1168/*
1169 * Keg header ctor. This initializes all fields, locks, etc. And inserts
1170 * the keg onto the global keg list.
1171 *
1172 * Arguments/Returns follow uma_ctor specifications
1173 * udata Actually uma_kctor_args
1174 */
1175static int
1176keg_ctor(void *mem, int size, void *udata, int flags)
1177{
1178 struct uma_kctor_args *arg = udata;
1179 uma_keg_t keg = mem;
1180 uma_zone_t zone;
1181
1182 bzero(keg, size);
1183 keg->uk_size = arg->size;
1184 keg->uk_init = arg->uminit;
1185 keg->uk_fini = arg->fini;
1186 keg->uk_align = arg->align;
1187 keg->uk_free = 0;
1188 keg->uk_pages = 0;
1189 keg->uk_flags = arg->flags;
1190 keg->uk_allocf = page_alloc;
1191 keg->uk_freef = page_free;
1192 keg->uk_recurse = 0;
1193 keg->uk_slabzone = NULL;
1194
1195 /*
1196 * The master zone is passed to us at keg-creation time.
1197 */
1198 zone = arg->zone;
1199 zone->uz_keg = keg;
1200
1201 if (arg->flags & UMA_ZONE_VM)
1202 keg->uk_flags |= UMA_ZFLAG_CACHEONLY;
1203
1204 if (arg->flags & UMA_ZONE_ZINIT)
1205 keg->uk_init = zero_init;
1206
1207 /*
1208 * The +UMA_FRITM_SZ added to uk_size is to account for the
1209 * linkage that is added to the size in zone_small_init(). If
1210 * we don't account for this here then we may end up in
1211 * zone_small_init() with a calculated 'ipers' of 0.
1212 */
1213 if (keg->uk_flags & UMA_ZONE_REFCNT) {
1214 if ((keg->uk_size+UMA_FRITMREF_SZ) >
1215 (UMA_SLAB_SIZE - sizeof(struct uma_slab_refcnt)))
1216 zone_large_init(zone);
1217 else
1218 zone_small_init(zone);
1219 } else {
1220 if ((keg->uk_size+UMA_FRITM_SZ) >
1221 (UMA_SLAB_SIZE - sizeof(struct uma_slab)))
1222 zone_large_init(zone);
1223 else
1224 zone_small_init(zone);
1225 }
1226
1227 if (keg->uk_flags & UMA_ZONE_OFFPAGE) {
1228 if (keg->uk_flags & UMA_ZONE_REFCNT)
1229 keg->uk_slabzone = slabrefzone;
1230 else
1231 keg->uk_slabzone = slabzone;
1232 }
1233
1234 /*
1235 * If we haven't booted yet we need allocations to go through the
1236 * startup cache until the vm is ready.
1237 */
1238 if (keg->uk_ppera == 1) {
1239#ifdef UMA_MD_SMALL_ALLOC
1240 keg->uk_allocf = uma_small_alloc;
1241 keg->uk_freef = uma_small_free;
1242#endif
1243 if (booted == 0)
1244 keg->uk_allocf = startup_alloc;
1245 }
1246
1247 /*
1248 * Initialize keg's lock (shared among zones) through
1249 * Master zone
1250 */
1251 zone->uz_lock = &keg->uk_lock;
1252 if (arg->flags & UMA_ZONE_MTXCLASS)
1253 ZONE_LOCK_INIT(zone, 1);
1254 else
1255 ZONE_LOCK_INIT(zone, 0);
1256
1257 /*
1258 * If we're putting the slab header in the actual page we need to
1259 * figure out where in each page it goes. This calculates a right
1260 * justified offset into the memory on an ALIGN_PTR boundary.
1261 */
1262 if (!(keg->uk_flags & UMA_ZONE_OFFPAGE)) {
1263 u_int totsize;
1264
1265 /* Size of the slab struct and free list */
1266 if (keg->uk_flags & UMA_ZONE_REFCNT)
1267 totsize = sizeof(struct uma_slab_refcnt) +
1268 keg->uk_ipers * UMA_FRITMREF_SZ;
1269 else
1270 totsize = sizeof(struct uma_slab) +
1271 keg->uk_ipers * UMA_FRITM_SZ;
1272
1273 if (totsize & UMA_ALIGN_PTR)
1274 totsize = (totsize & ~UMA_ALIGN_PTR) +
1275 (UMA_ALIGN_PTR + 1);
1276 keg->uk_pgoff = UMA_SLAB_SIZE - totsize;
1277
1278 if (keg->uk_flags & UMA_ZONE_REFCNT)
1279 totsize = keg->uk_pgoff + sizeof(struct uma_slab_refcnt)
1280 + keg->uk_ipers * UMA_FRITMREF_SZ;
1281 else
1282 totsize = keg->uk_pgoff + sizeof(struct uma_slab)
1283 + keg->uk_ipers * UMA_FRITM_SZ;
1284
1285 /*
1286 * The only way the following is possible is if with our
1287 * UMA_ALIGN_PTR adjustments we are now bigger than
1288 * UMA_SLAB_SIZE. I haven't checked whether this is
1289 * mathematically possible for all cases, so we make
1290 * sure here anyway.
1291 */
1292 if (totsize > UMA_SLAB_SIZE) {
1293 printf("zone %s ipers %d rsize %d size %d\n",
1294 zone->uz_name, keg->uk_ipers, keg->uk_rsize,
1295 keg->uk_size);
1296 panic("UMA slab won't fit.\n");
1297 }
1298 }
1299
1300 if (keg->uk_flags & UMA_ZONE_HASH)
1301 hash_alloc(&keg->uk_hash);
1302
1303#ifdef UMA_DEBUG
1304 printf("%s(%p) size = %d ipers = %d ppera = %d pgoff = %d\n",
1305 zone->uz_name, zone,
1306 keg->uk_size, keg->uk_ipers,
1307 keg->uk_ppera, keg->uk_pgoff);
1308#endif
1309
1310 LIST_INSERT_HEAD(&keg->uk_zones, zone, uz_link);
1311
1312 mtx_lock(&uma_mtx);
1313 LIST_INSERT_HEAD(&uma_kegs, keg, uk_link);
1314 mtx_unlock(&uma_mtx);
1315 return (0);
1316}
1317
1318/*
1319 * Zone header ctor. This initializes all fields, locks, etc.
1320 *
1321 * Arguments/Returns follow uma_ctor specifications
1322 * udata Actually uma_zctor_args
1323 */
1324
1325static int
1326zone_ctor(void *mem, int size, void *udata, int flags)
1327{
1328 struct uma_zctor_args *arg = udata;
1329 uma_zone_t zone = mem;
1330 uma_zone_t z;
1331 uma_keg_t keg;
1332
1333 bzero(zone, size);
1334 zone->uz_name = arg->name;
1335 zone->uz_ctor = arg->ctor;
1336 zone->uz_dtor = arg->dtor;
1337 zone->uz_init = NULL;
1338 zone->uz_fini = NULL;
1339 zone->uz_allocs = 0;
1340 zone->uz_frees = 0;
1341 zone->uz_fails = 0;
1342 zone->uz_fills = zone->uz_count = 0;
1343
1344 if (arg->flags & UMA_ZONE_SECONDARY) {
1345 KASSERT(arg->keg != NULL, ("Secondary zone on zero'd keg"));
1346 keg = arg->keg;
1347 zone->uz_keg = keg;
1348 zone->uz_init = arg->uminit;
1349 zone->uz_fini = arg->fini;
1350 zone->uz_lock = &keg->uk_lock;
1351 mtx_lock(&uma_mtx);
1352 ZONE_LOCK(zone);
1353 keg->uk_flags |= UMA_ZONE_SECONDARY;
1354 LIST_FOREACH(z, &keg->uk_zones, uz_link) {
1355 if (LIST_NEXT(z, uz_link) == NULL) {
1356 LIST_INSERT_AFTER(z, zone, uz_link);
1357 break;
1358 }
1359 }
1360 ZONE_UNLOCK(zone);
1361 mtx_unlock(&uma_mtx);
1362 } else if (arg->keg == NULL) {
1363 if (uma_kcreate(zone, arg->size, arg->uminit, arg->fini,
1364 arg->align, arg->flags) == NULL)
1365 return (ENOMEM);
1366 } else {
1367 struct uma_kctor_args karg;
1368 int error;
1369
1370 /* We should only be here from uma_startup() */
1371 karg.size = arg->size;
1372 karg.uminit = arg->uminit;
1373 karg.fini = arg->fini;
1374 karg.align = arg->align;
1375 karg.flags = arg->flags;
1376 karg.zone = zone;
1377 error = keg_ctor(arg->keg, sizeof(struct uma_keg), &karg,
1378 flags);
1379 if (error)
1380 return (error);
1381 }
1382 keg = zone->uz_keg;
1383 zone->uz_lock = &keg->uk_lock;
1384
1385 /*
1386 * Some internal zones don't have room allocated for the per cpu
1387 * caches. If we're internal, bail out here.
1388 */
1389 if (keg->uk_flags & UMA_ZFLAG_INTERNAL) {
1390 KASSERT((keg->uk_flags & UMA_ZONE_SECONDARY) == 0,
1391 ("Secondary zone requested UMA_ZFLAG_INTERNAL"));
1392 return (0);
1393 }
1394
1395 if (keg->uk_flags & UMA_ZONE_MAXBUCKET)
1396 zone->uz_count = BUCKET_MAX;
1397 else if (keg->uk_ipers <= BUCKET_MAX)
1398 zone->uz_count = keg->uk_ipers;
1399 else
1400 zone->uz_count = BUCKET_MAX;
1401 return (0);
1402}
1403
1404/*
1405 * Keg header dtor. This frees all data, destroys locks, frees the hash
1406 * table and removes the keg from the global list.
1407 *
1408 * Arguments/Returns follow uma_dtor specifications
1409 * udata unused
1410 */
1411static void
1412keg_dtor(void *arg, int size, void *udata)
1413{
1414 uma_keg_t keg;
1415
1416 keg = (uma_keg_t)arg;
1417 mtx_lock(&keg->uk_lock);
1418 if (keg->uk_free != 0) {
1419 printf("Freed UMA keg was not empty (%d items). "
1420 " Lost %d pages of memory.\n",
1421 keg->uk_free, keg->uk_pages);
1422 }
1423 mtx_unlock(&keg->uk_lock);
1424
1425 if (keg->uk_flags & UMA_ZONE_HASH)
1426 hash_free(&keg->uk_hash);
1427
1428 mtx_destroy(&keg->uk_lock);
1429}
1430
1431/*
1432 * Zone header dtor.
1433 *
1434 * Arguments/Returns follow uma_dtor specifications
1435 * udata unused
1436 */
1437static void
1438zone_dtor(void *arg, int size, void *udata)
1439{
1440 uma_zone_t zone;
1441 uma_keg_t keg;
1442
1443 zone = (uma_zone_t)arg;
1444 keg = zone->uz_keg;
1445
1446 if (!(keg->uk_flags & UMA_ZFLAG_INTERNAL))
1447 cache_drain(zone);
1448
1449 mtx_lock(&uma_mtx);
1450 zone_drain(zone);
1451 if (keg->uk_flags & UMA_ZONE_SECONDARY) {
1452 LIST_REMOVE(zone, uz_link);
1453 /*
1454 * XXX there are some races here where
1455 * the zone can be drained but zone lock
1456 * released and then refilled before we
1457 * remove it... we dont care for now
1458 */
1459 ZONE_LOCK(zone);
1460 if (LIST_EMPTY(&keg->uk_zones))
1461 keg->uk_flags &= ~UMA_ZONE_SECONDARY;
1462 ZONE_UNLOCK(zone);
1463 mtx_unlock(&uma_mtx);
1464 } else {
1465 LIST_REMOVE(keg, uk_link);
1466 LIST_REMOVE(zone, uz_link);
1467 mtx_unlock(&uma_mtx);
1468 uma_zfree_internal(kegs, keg, NULL, SKIP_NONE,
1469 ZFREE_STATFREE);
1470 }
1471 zone->uz_keg = NULL;
1472}
1473
1474/*
1475 * Traverses every zone in the system and calls a callback
1476 *
1477 * Arguments:
1478 * zfunc A pointer to a function which accepts a zone
1479 * as an argument.
1480 *
1481 * Returns:
1482 * Nothing
1483 */
1484static void
1485zone_foreach(void (*zfunc)(uma_zone_t))
1486{
1487 uma_keg_t keg;
1488 uma_zone_t zone;
1489
1490 mtx_lock(&uma_mtx);
1491 LIST_FOREACH(keg, &uma_kegs, uk_link) {
1492 LIST_FOREACH(zone, &keg->uk_zones, uz_link)
1493 zfunc(zone);
1494 }
1495 mtx_unlock(&uma_mtx);
1496}
1497
1498/* Public functions */
1499/* See uma.h */
1500void
1501uma_startup(void *bootmem, int boot_pages)
1502{
1503 struct uma_zctor_args args;
1504 uma_slab_t slab;
1505 u_int slabsize;
1506 u_int objsize, totsize, wsize;
1507 int i;
1508
1509#ifdef UMA_DEBUG
1510 printf("Creating uma keg headers zone and keg.\n");
1511#endif
1512 mtx_init(&uma_mtx, "UMA lock", NULL, MTX_DEF);
1513
1514 /*
1515 * Figure out the maximum number of items-per-slab we'll have if
1516 * we're using the OFFPAGE slab header to track free items, given
1517 * all possible object sizes and the maximum desired wastage
1518 * (UMA_MAX_WASTE).
1519 *
1520 * We iterate until we find an object size for
1521 * which the calculated wastage in zone_small_init() will be
1522 * enough to warrant OFFPAGE. Since wastedspace versus objsize
1523 * is an overall increasing see-saw function, we find the smallest
1524 * objsize such that the wastage is always acceptable for objects
1525 * with that objsize or smaller. Since a smaller objsize always
1526 * generates a larger possible uma_max_ipers, we use this computed
1527 * objsize to calculate the largest ipers possible. Since the
1528 * ipers calculated for OFFPAGE slab headers is always larger than
1529 * the ipers initially calculated in zone_small_init(), we use
1530 * the former's equation (UMA_SLAB_SIZE / keg->uk_rsize) to
1531 * obtain the maximum ipers possible for offpage slab headers.
1532 *
1533 * It should be noted that ipers versus objsize is an inversly
1534 * proportional function which drops off rather quickly so as
1535 * long as our UMA_MAX_WASTE is such that the objsize we calculate
1536 * falls into the portion of the inverse relation AFTER the steep
1537 * falloff, then uma_max_ipers shouldn't be too high (~10 on i386).
1538 *
1539 * Note that we have 8-bits (1 byte) to use as a freelist index
1540 * inside the actual slab header itself and this is enough to
1541 * accomodate us. In the worst case, a UMA_SMALLEST_UNIT sized
1542 * object with offpage slab header would have ipers =
1543 * UMA_SLAB_SIZE / UMA_SMALLEST_UNIT (currently = 256), which is
1544 * 1 greater than what our byte-integer freelist index can
1545 * accomodate, but we know that this situation never occurs as
1546 * for UMA_SMALLEST_UNIT-sized objects, we will never calculate
1547 * that we need to go to offpage slab headers. Or, if we do,
1548 * then we trap that condition below and panic in the INVARIANTS case.
1549 */
1550 wsize = UMA_SLAB_SIZE - sizeof(struct uma_slab) - UMA_MAX_WASTE;
1551 totsize = wsize;
1552 objsize = UMA_SMALLEST_UNIT;
1553 while (totsize >= wsize) {
1554 totsize = (UMA_SLAB_SIZE - sizeof(struct uma_slab)) /
1555 (objsize + UMA_FRITM_SZ);
1556 totsize *= (UMA_FRITM_SZ + objsize);
1557 objsize++;
1558 }
1559 if (objsize > UMA_SMALLEST_UNIT)
1560 objsize--;
1561 uma_max_ipers = UMA_SLAB_SIZE / objsize;
1562
1563 wsize = UMA_SLAB_SIZE - sizeof(struct uma_slab_refcnt) - UMA_MAX_WASTE;
1564 totsize = wsize;
1565 objsize = UMA_SMALLEST_UNIT;
1566 while (totsize >= wsize) {
1567 totsize = (UMA_SLAB_SIZE - sizeof(struct uma_slab_refcnt)) /
1568 (objsize + UMA_FRITMREF_SZ);
1569 totsize *= (UMA_FRITMREF_SZ + objsize);
1570 objsize++;
1571 }
1572 if (objsize > UMA_SMALLEST_UNIT)
1573 objsize--;
1574 uma_max_ipers_ref = UMA_SLAB_SIZE / objsize;
1575
1576 KASSERT((uma_max_ipers_ref <= 255) && (uma_max_ipers <= 255),
1577 ("uma_startup: calculated uma_max_ipers values too large!"));
1578
1579#ifdef UMA_DEBUG
1580 printf("Calculated uma_max_ipers (for OFFPAGE) is %d\n", uma_max_ipers);
1581 printf("Calculated uma_max_ipers_slab (for OFFPAGE) is %d\n",
1582 uma_max_ipers_ref);
1583#endif
1584
1585 /* "manually" create the initial zone */
1586 args.name = "UMA Kegs";
1587 args.size = sizeof(struct uma_keg);
1588 args.ctor = keg_ctor;
1589 args.dtor = keg_dtor;
1590 args.uminit = zero_init;
1591 args.fini = NULL;
1592 args.keg = &masterkeg;
1593 args.align = 32 - 1;
1594 args.flags = UMA_ZFLAG_INTERNAL;
1595 /* The initial zone has no Per cpu queues so it's smaller */
1596 zone_ctor(kegs, sizeof(struct uma_zone), &args, M_WAITOK);
1597
1598#ifdef UMA_DEBUG
1599 printf("Filling boot free list.\n");
1600#endif
1601 for (i = 0; i < boot_pages; i++) {
1602 slab = (uma_slab_t)((u_int8_t *)bootmem + (i * UMA_SLAB_SIZE));
1603 slab->us_data = (u_int8_t *)slab;
1604 slab->us_flags = UMA_SLAB_BOOT;
1605 LIST_INSERT_HEAD(&uma_boot_pages, slab, us_link);
1606 }
1607 mtx_init(&uma_boot_pages_mtx, "UMA boot pages", NULL, MTX_DEF);
1608
1609#ifdef UMA_DEBUG
1610 printf("Creating uma zone headers zone and keg.\n");
1611#endif
1612 args.name = "UMA Zones";
1613 args.size = sizeof(struct uma_zone) +
1614 (sizeof(struct uma_cache) * (mp_maxid + 1));
1615 args.ctor = zone_ctor;
1616 args.dtor = zone_dtor;
1617 args.uminit = zero_init;
1618 args.fini = NULL;
1619 args.keg = NULL;
1620 args.align = 32 - 1;
1621 args.flags = UMA_ZFLAG_INTERNAL;
1622 /* The initial zone has no Per cpu queues so it's smaller */
1623 zone_ctor(zones, sizeof(struct uma_zone), &args, M_WAITOK);
1624
1625#ifdef UMA_DEBUG
1626 printf("Initializing pcpu cache locks.\n");
1627#endif
1628#ifdef UMA_DEBUG
1629 printf("Creating slab and hash zones.\n");
1630#endif
1631
1632 /*
1633 * This is the max number of free list items we'll have with
1634 * offpage slabs.
1635 */
1636 slabsize = uma_max_ipers * UMA_FRITM_SZ;
1637 slabsize += sizeof(struct uma_slab);
1638
1639 /* Now make a zone for slab headers */
1640 slabzone = uma_zcreate("UMA Slabs",
1641 slabsize,
1642 NULL, NULL, NULL, NULL,
1643 UMA_ALIGN_PTR, UMA_ZFLAG_INTERNAL);
1644
1645 /*
1646 * We also create a zone for the bigger slabs with reference
1647 * counts in them, to accomodate UMA_ZONE_REFCNT zones.
1648 */
1649 slabsize = uma_max_ipers_ref * UMA_FRITMREF_SZ;
1650 slabsize += sizeof(struct uma_slab_refcnt);
1651 slabrefzone = uma_zcreate("UMA RCntSlabs",
1652 slabsize,
1653 NULL, NULL, NULL, NULL,
1654 UMA_ALIGN_PTR,
1655 UMA_ZFLAG_INTERNAL);
1656
1657 hashzone = uma_zcreate("UMA Hash",
1658 sizeof(struct slabhead *) * UMA_HASH_SIZE_INIT,
1659 NULL, NULL, NULL, NULL,
1660 UMA_ALIGN_PTR, UMA_ZFLAG_INTERNAL);
1661
1662 bucket_init();
1663
1664#ifdef UMA_MD_SMALL_ALLOC
1665 booted = 1;
1666#endif
1667
1668#ifdef UMA_DEBUG
1669 printf("UMA startup complete.\n");
1670#endif
1671}
1672
1673/* see uma.h */
1674void
1675uma_startup2(void)
1676{
1677 booted = 1;
1678 bucket_enable();
1679#ifdef UMA_DEBUG
1680 printf("UMA startup2 complete.\n");
1681#endif
1682}
1683
1684/*
1685 * Initialize our callout handle
1686 *
1687 */
1688
1689static void
1690uma_startup3(void)
1691{
1692#ifdef UMA_DEBUG
1693 printf("Starting callout.\n");
1694#endif
1695 callout_init(&uma_callout, CALLOUT_MPSAFE);
1696 callout_reset(&uma_callout, UMA_TIMEOUT * hz, uma_timeout, NULL);
1697#ifdef UMA_DEBUG
1698 printf("UMA startup3 complete.\n");
1699#endif
1700}
1701
1702static uma_zone_t
1703uma_kcreate(uma_zone_t zone, size_t size, uma_init uminit, uma_fini fini,
1704 int align, u_int32_t flags)
1705{
1706 struct uma_kctor_args args;
1707
1708 args.size = size;
1709 args.uminit = uminit;
1710 args.fini = fini;
1711 args.align = align;
1712 args.flags = flags;
1713 args.zone = zone;
1714 return (uma_zalloc_internal(kegs, &args, M_WAITOK));
1715}
1716
1717/* See uma.h */
1718uma_zone_t
1719uma_zcreate(char *name, size_t size, uma_ctor ctor, uma_dtor dtor,
1720 uma_init uminit, uma_fini fini, int align, u_int32_t flags)
1721
1722{
1723 struct uma_zctor_args args;
1724
1725 /* This stuff is essential for the zone ctor */
1726 args.name = name;
1727 args.size = size;
1728 args.ctor = ctor;
1729 args.dtor = dtor;
1730 args.uminit = uminit;
1731 args.fini = fini;
1732 args.align = align;
1733 args.flags = flags;
1734 args.keg = NULL;
1735
1736 return (uma_zalloc_internal(zones, &args, M_WAITOK));
1737}
1738
1739/* See uma.h */
1740uma_zone_t
1741uma_zsecond_create(char *name, uma_ctor ctor, uma_dtor dtor,
1742 uma_init zinit, uma_fini zfini, uma_zone_t master)
1743{
1744 struct uma_zctor_args args;
1745
1746 args.name = name;
1747 args.size = master->uz_keg->uk_size;
1748 args.ctor = ctor;
1749 args.dtor = dtor;
1750 args.uminit = zinit;
1751 args.fini = zfini;
1752 args.align = master->uz_keg->uk_align;
1753 args.flags = master->uz_keg->uk_flags | UMA_ZONE_SECONDARY;
1754 args.keg = master->uz_keg;
1755
1756 return (uma_zalloc_internal(zones, &args, M_WAITOK));
1757}
1758
1759/* See uma.h */
1760void
1761uma_zdestroy(uma_zone_t zone)
1762{
1763
1764 uma_zfree_internal(zones, zone, NULL, SKIP_NONE, ZFREE_STATFREE);
1765}
1766
1767/* See uma.h */
1768void *
1769uma_zalloc_arg(uma_zone_t zone, void *udata, int flags)
1770{
1771 void *item;
1772 uma_cache_t cache;
1773 uma_bucket_t bucket;
1774 int cpu;
1775 int badness;
1776
1777 /* This is the fast path allocation */
1778#ifdef UMA_DEBUG_ALLOC_1
1779 printf("Allocating one item from %s(%p)\n", zone->uz_name, zone);
1780#endif
1781 CTR3(KTR_UMA, "uma_zalloc_arg thread %x zone %s flags %d", curthread,
1782 zone->uz_name, flags);
1783
1784 if (!(flags & M_NOWAIT)) {
1785 KASSERT(curthread->td_intr_nesting_level == 0,
1786 ("malloc(M_WAITOK) in interrupt context"));
1787 if (nosleepwithlocks) {
1788#ifdef WITNESS
1789 badness = WITNESS_CHECK(WARN_GIANTOK | WARN_SLEEPOK,
1790 NULL,
1791 "malloc(M_WAITOK) of \"%s\", forcing M_NOWAIT",
1792 zone->uz_name);
1793#else
1794 badness = 1;
1795#endif
1796 } else {
1797 badness = 0;
1798 WITNESS_WARN(WARN_GIANTOK | WARN_SLEEPOK, NULL,
1799 "malloc(M_WAITOK) of \"%s\"", zone->uz_name);
1800 }
1801 if (badness) {
1802 flags &= ~M_WAITOK;
1803 flags |= M_NOWAIT;
1804 }
1805 }
1806
1807 /*
1808 * If possible, allocate from the per-CPU cache. There are two
1809 * requirements for safe access to the per-CPU cache: (1) the thread
1810 * accessing the cache must not be preempted or yield during access,
1811 * and (2) the thread must not migrate CPUs without switching which
1812 * cache it accesses. We rely on a critical section to prevent
1813 * preemption and migration. We release the critical section in
1814 * order to acquire the zone mutex if we are unable to allocate from
1815 * the current cache; when we re-acquire the critical section, we
1816 * must detect and handle migration if it has occurred.
1817 */
1818zalloc_restart:
1819 critical_enter();
1820 cpu = curcpu;
1821 cache = &zone->uz_cpu[cpu];
1822
1823zalloc_start:
1824 bucket = cache->uc_allocbucket;
1825
1826 if (bucket) {
1827 if (bucket->ub_cnt > 0) {
1828 bucket->ub_cnt--;
1829 item = bucket->ub_bucket[bucket->ub_cnt];
1830#ifdef INVARIANTS
1831 bucket->ub_bucket[bucket->ub_cnt] = NULL;
1832#endif
1833 KASSERT(item != NULL,
1834 ("uma_zalloc: Bucket pointer mangled."));
1835 cache->uc_allocs++;
1836 critical_exit();
1837#ifdef INVARIANTS
1838 ZONE_LOCK(zone);
1839 uma_dbg_alloc(zone, NULL, item);
1840 ZONE_UNLOCK(zone);
1841#endif
1842 if (zone->uz_ctor != NULL) {
1843 if (zone->uz_ctor(item, zone->uz_keg->uk_size,
1844 udata, flags) != 0) {
1845 uma_zfree_internal(zone, item, udata,
1846 SKIP_DTOR, ZFREE_STATFAIL |
1847 ZFREE_STATFREE);
1848 return (NULL);
1849 }
1850 }
1851 if (flags & M_ZERO)
1852 bzero(item, zone->uz_keg->uk_size);
1853 return (item);
1854 } else if (cache->uc_freebucket) {
1855 /*
1856 * We have run out of items in our allocbucket.
1857 * See if we can switch with our free bucket.
1858 */
1859 if (cache->uc_freebucket->ub_cnt > 0) {
1860#ifdef UMA_DEBUG_ALLOC
1861 printf("uma_zalloc: Swapping empty with"
1862 " alloc.\n");
1863#endif
1864 bucket = cache->uc_freebucket;
1865 cache->uc_freebucket = cache->uc_allocbucket;
1866 cache->uc_allocbucket = bucket;
1867
1868 goto zalloc_start;
1869 }
1870 }
1871 }
1872 /*
1873 * Attempt to retrieve the item from the per-CPU cache has failed, so
1874 * we must go back to the zone. This requires the zone lock, so we
1875 * must drop the critical section, then re-acquire it when we go back
1876 * to the cache. Since the critical section is released, we may be
1877 * preempted or migrate. As such, make sure not to maintain any
1878 * thread-local state specific to the cache from prior to releasing
1879 * the critical section.
1880 */
1881 critical_exit();
1882 ZONE_LOCK(zone);
1883 critical_enter();
1884 cpu = curcpu;
1885 cache = &zone->uz_cpu[cpu];
1886 bucket = cache->uc_allocbucket;
1887 if (bucket != NULL) {
1888 if (bucket->ub_cnt > 0) {
1889 ZONE_UNLOCK(zone);
1890 goto zalloc_start;
1891 }
1892 bucket = cache->uc_freebucket;
1893 if (bucket != NULL && bucket->ub_cnt > 0) {
1894 ZONE_UNLOCK(zone);
1895 goto zalloc_start;
1896 }
1897 }
1898
1899 /* Since we have locked the zone we may as well send back our stats */
1900 zone->uz_allocs += cache->uc_allocs;
1901 cache->uc_allocs = 0;
1902 zone->uz_frees += cache->uc_frees;
1903 cache->uc_frees = 0;
1904
1905 /* Our old one is now a free bucket */
1906 if (cache->uc_allocbucket) {
1907 KASSERT(cache->uc_allocbucket->ub_cnt == 0,
1908 ("uma_zalloc_arg: Freeing a non free bucket."));
1909 LIST_INSERT_HEAD(&zone->uz_free_bucket,
1910 cache->uc_allocbucket, ub_link);
1911 cache->uc_allocbucket = NULL;
1912 }
1913
1914 /* Check the free list for a new alloc bucket */
1915 if ((bucket = LIST_FIRST(&zone->uz_full_bucket)) != NULL) {
1916 KASSERT(bucket->ub_cnt != 0,
1917 ("uma_zalloc_arg: Returning an empty bucket."));
1918
1919 LIST_REMOVE(bucket, ub_link);
1920 cache->uc_allocbucket = bucket;
1921 ZONE_UNLOCK(zone);
1922 goto zalloc_start;
1923 }
1924 /* We are no longer associated with this CPU. */
1925 critical_exit();
1926
1927 /* Bump up our uz_count so we get here less */
1928 if (zone->uz_count < BUCKET_MAX)
1929 zone->uz_count++;
1930
1931 /*
1932 * Now lets just fill a bucket and put it on the free list. If that
1933 * works we'll restart the allocation from the begining.
1934 */
1935 if (uma_zalloc_bucket(zone, flags)) {
1936 ZONE_UNLOCK(zone);
1937 goto zalloc_restart;
1938 }
1939 ZONE_UNLOCK(zone);
1940 /*
1941 * We may not be able to get a bucket so return an actual item.
1942 */
1943#ifdef UMA_DEBUG
1944 printf("uma_zalloc_arg: Bucketzone returned NULL\n");
1945#endif
1946
1947 return (uma_zalloc_internal(zone, udata, flags));
1948}
1949
1950static uma_slab_t
1951uma_zone_slab(uma_zone_t zone, int flags)
1952{
1953 uma_slab_t slab;
1954 uma_keg_t keg;
1955
1956 keg = zone->uz_keg;
1957
1958 /*
1959 * This is to prevent us from recursively trying to allocate
1960 * buckets. The problem is that if an allocation forces us to
1961 * grab a new bucket we will call page_alloc, which will go off
1962 * and cause the vm to allocate vm_map_entries. If we need new
1963 * buckets there too we will recurse in kmem_alloc and bad
1964 * things happen. So instead we return a NULL bucket, and make
1965 * the code that allocates buckets smart enough to deal with it
1966 *
1967 * XXX: While we want this protection for the bucket zones so that
1968 * recursion from the VM is handled (and the calling code that
1969 * allocates buckets knows how to deal with it), we do not want
1970 * to prevent allocation from the slab header zones (slabzone
1971 * and slabrefzone) if uk_recurse is not zero for them. The
1972 * reason is that it could lead to NULL being returned for
1973 * slab header allocations even in the M_WAITOK case, and the
1974 * caller can't handle that.
1975 */
1976 if (keg->uk_flags & UMA_ZFLAG_INTERNAL && keg->uk_recurse != 0)
1977 if ((zone != slabzone) && (zone != slabrefzone))
1978 return (NULL);
1979
1980 slab = NULL;
1981
1982 for (;;) {
1983 /*
1984 * Find a slab with some space. Prefer slabs that are partially
1985 * used over those that are totally full. This helps to reduce
1986 * fragmentation.
1987 */
1988 if (keg->uk_free != 0) {
1989 if (!LIST_EMPTY(&keg->uk_part_slab)) {
1990 slab = LIST_FIRST(&keg->uk_part_slab);
1991 } else {
1992 slab = LIST_FIRST(&keg->uk_free_slab);
1993 LIST_REMOVE(slab, us_link);
1994 LIST_INSERT_HEAD(&keg->uk_part_slab, slab,
1995 us_link);
1996 }
1997 return (slab);
1998 }
1999
2000 /*
2001 * M_NOVM means don't ask at all!
2002 */
2003 if (flags & M_NOVM)
2004 break;
2005
2006 if (keg->uk_maxpages &&
2007 keg->uk_pages >= keg->uk_maxpages) {
2008 keg->uk_flags |= UMA_ZFLAG_FULL;
2009
2010 if (flags & M_NOWAIT)
2011 break;
2012 else
2013 msleep(keg, &keg->uk_lock, PVM,
2014 "zonelimit", 0);
2015 continue;
2016 }
2017 keg->uk_recurse++;
2018 slab = slab_zalloc(zone, flags);
2019 keg->uk_recurse--;
2020
2021 /*
2022 * If we got a slab here it's safe to mark it partially used
2023 * and return. We assume that the caller is going to remove
2024 * at least one item.
2025 */
2026 if (slab) {
2027 LIST_INSERT_HEAD(&keg->uk_part_slab, slab, us_link);
2028 return (slab);
2029 }
2030 /*
2031 * We might not have been able to get a slab but another cpu
2032 * could have while we were unlocked. Check again before we
2033 * fail.
2034 */
2035 if (flags & M_NOWAIT)
2036 flags |= M_NOVM;
2037 }
2038 return (slab);
2039}
2040
2041static void *
2042uma_slab_alloc(uma_zone_t zone, uma_slab_t slab)
2043{
2044 uma_keg_t keg;
2045 uma_slabrefcnt_t slabref;
2046 void *item;
2047 u_int8_t freei;
2048
2049 keg = zone->uz_keg;
2050
2051 freei = slab->us_firstfree;
2052 if (keg->uk_flags & UMA_ZONE_REFCNT) {
2053 slabref = (uma_slabrefcnt_t)slab;
2054 slab->us_firstfree = slabref->us_freelist[freei].us_item;
2055 } else {
2056 slab->us_firstfree = slab->us_freelist[freei].us_item;
2057 }
2058 item = slab->us_data + (keg->uk_rsize * freei);
2059
2060 slab->us_freecount--;
2061 keg->uk_free--;
2062#ifdef INVARIANTS
2063 uma_dbg_alloc(zone, slab, item);
2064#endif
2065 /* Move this slab to the full list */
2066 if (slab->us_freecount == 0) {
2067 LIST_REMOVE(slab, us_link);
2068 LIST_INSERT_HEAD(&keg->uk_full_slab, slab, us_link);
2069 }
2070
2071 return (item);
2072}
2073
2074static int
2075uma_zalloc_bucket(uma_zone_t zone, int flags)
2076{
2077 uma_bucket_t bucket;
2078 uma_slab_t slab;
2079 int16_t saved;
2080 int max, origflags = flags;
2081
2082 /*
2083 * Try this zone's free list first so we don't allocate extra buckets.
2084 */
2085 if ((bucket = LIST_FIRST(&zone->uz_free_bucket)) != NULL) {
2086 KASSERT(bucket->ub_cnt == 0,
2087 ("uma_zalloc_bucket: Bucket on free list is not empty."));
2088 LIST_REMOVE(bucket, ub_link);
2089 } else {
2090 int bflags;
2091
2092 bflags = (flags & ~M_ZERO);
2093 if (zone->uz_keg->uk_flags & UMA_ZFLAG_CACHEONLY)
2094 bflags |= M_NOVM;
2095
2096 ZONE_UNLOCK(zone);
2097 bucket = bucket_alloc(zone->uz_count, bflags);
2098 ZONE_LOCK(zone);
2099 }
2100
2101 if (bucket == NULL)
2102 return (0);
2103
2104#ifdef SMP
2105 /*
2106 * This code is here to limit the number of simultaneous bucket fills
2107 * for any given zone to the number of per cpu caches in this zone. This
2108 * is done so that we don't allocate more memory than we really need.
2109 */
2110 if (zone->uz_fills >= mp_ncpus)
2111 goto done;
2112
2113#endif
2114 zone->uz_fills++;
2115
2116 max = MIN(bucket->ub_entries, zone->uz_count);
2117 /* Try to keep the buckets totally full */
2118 saved = bucket->ub_cnt;
2119 while (bucket->ub_cnt < max &&
2120 (slab = uma_zone_slab(zone, flags)) != NULL) {
2121 while (slab->us_freecount && bucket->ub_cnt < max) {
2122 bucket->ub_bucket[bucket->ub_cnt++] =
2123 uma_slab_alloc(zone, slab);
2124 }
2125
2126 /* Don't block on the next fill */
2127 flags |= M_NOWAIT;
2128 }
2129
2130 /*
2131 * We unlock here because we need to call the zone's init.
2132 * It should be safe to unlock because the slab dealt with
2133 * above is already on the appropriate list within the keg
2134 * and the bucket we filled is not yet on any list, so we
2135 * own it.
2136 */
2137 if (zone->uz_init != NULL) {
2138 int i;
2139
2140 ZONE_UNLOCK(zone);
2141 for (i = saved; i < bucket->ub_cnt; i++)
2142 if (zone->uz_init(bucket->ub_bucket[i],
2143 zone->uz_keg->uk_size, origflags) != 0)
2144 break;
2145 /*
2146 * If we couldn't initialize the whole bucket, put the
2147 * rest back onto the freelist.
2148 */
2149 if (i != bucket->ub_cnt) {
2150 int j;
2151
2152 for (j = i; j < bucket->ub_cnt; j++) {
2153 uma_zfree_internal(zone, bucket->ub_bucket[j],
2154 NULL, SKIP_FINI, 0);
2155#ifdef INVARIANTS
2156 bucket->ub_bucket[j] = NULL;
2157#endif
2158 }
2159 bucket->ub_cnt = i;
2160 }
2161 ZONE_LOCK(zone);
2162 }
2163
2164 zone->uz_fills--;
2165 if (bucket->ub_cnt != 0) {
2166 LIST_INSERT_HEAD(&zone->uz_full_bucket,
2167 bucket, ub_link);
2168 return (1);
2169 }
2170#ifdef SMP
2171done:
2172#endif
2173 bucket_free(bucket);
2174
2175 return (0);
2176}
2177/*
2178 * Allocates an item for an internal zone
2179 *
2180 * Arguments
2181 * zone The zone to alloc for.
2182 * udata The data to be passed to the constructor.
2183 * flags M_WAITOK, M_NOWAIT, M_ZERO.
2184 *
2185 * Returns
2186 * NULL if there is no memory and M_NOWAIT is set
2187 * An item if successful
2188 */
2189
2190static void *
2191uma_zalloc_internal(uma_zone_t zone, void *udata, int flags)
2192{
2193 uma_keg_t keg;
2194 uma_slab_t slab;
2195 void *item;
2196
2197 item = NULL;
2198 keg = zone->uz_keg;
2199
2200#ifdef UMA_DEBUG_ALLOC
2201 printf("INTERNAL: Allocating one item from %s(%p)\n", zone->uz_name, zone);
2202#endif
2203 ZONE_LOCK(zone);
2204
2205 slab = uma_zone_slab(zone, flags);
2206 if (slab == NULL) {
2207 zone->uz_fails++;
2208 ZONE_UNLOCK(zone);
2209 return (NULL);
2210 }
2211
2212 item = uma_slab_alloc(zone, slab);
2213
2214 zone->uz_allocs++;
2215
2216 ZONE_UNLOCK(zone);
2217
2218 /*
2219 * We have to call both the zone's init (not the keg's init)
2220 * and the zone's ctor. This is because the item is going from
2221 * a keg slab directly to the user, and the user is expecting it
2222 * to be both zone-init'd as well as zone-ctor'd.
2223 */
2224 if (zone->uz_init != NULL) {
2225 if (zone->uz_init(item, keg->uk_size, flags) != 0) {
2226 uma_zfree_internal(zone, item, udata, SKIP_FINI,
2227 ZFREE_STATFAIL | ZFREE_STATFREE);
2228 return (NULL);
2229 }
2230 }
2231 if (zone->uz_ctor != NULL) {
2232 if (zone->uz_ctor(item, keg->uk_size, udata, flags) != 0) {
2233 uma_zfree_internal(zone, item, udata, SKIP_DTOR,
2234 ZFREE_STATFAIL | ZFREE_STATFREE);
2235 return (NULL);
2236 }
2237 }
2238 if (flags & M_ZERO)
2239 bzero(item, keg->uk_size);
2240
2241 return (item);
2242}
2243
2244/* See uma.h */
2245void
2246uma_zfree_arg(uma_zone_t zone, void *item, void *udata)
2247{
2248 uma_keg_t keg;
2249 uma_cache_t cache;
2250 uma_bucket_t bucket;
2251 int bflags;
2252 int cpu;
2253
2254 keg = zone->uz_keg;
2255
2256#ifdef UMA_DEBUG_ALLOC_1
2257 printf("Freeing item %p to %s(%p)\n", item, zone->uz_name, zone);
2258#endif
2259 CTR2(KTR_UMA, "uma_zfree_arg thread %x zone %s", curthread,
2260 zone->uz_name);
2261
2262 if (zone->uz_dtor)
2263 zone->uz_dtor(item, keg->uk_size, udata);
2264#ifdef INVARIANTS
2265 ZONE_LOCK(zone);
2266 if (keg->uk_flags & UMA_ZONE_MALLOC)
2267 uma_dbg_free(zone, udata, item);
2268 else
2269 uma_dbg_free(zone, NULL, item);
2270 ZONE_UNLOCK(zone);
2271#endif
2272 /*
2273 * The race here is acceptable. If we miss it we'll just have to wait
2274 * a little longer for the limits to be reset.
2275 */
2276 if (keg->uk_flags & UMA_ZFLAG_FULL)
2277 goto zfree_internal;
2278
2279 /*
2280 * If possible, free to the per-CPU cache. There are two
2281 * requirements for safe access to the per-CPU cache: (1) the thread
2282 * accessing the cache must not be preempted or yield during access,
2283 * and (2) the thread must not migrate CPUs without switching which
2284 * cache it accesses. We rely on a critical section to prevent
2285 * preemption and migration. We release the critical section in
2286 * order to acquire the zone mutex if we are unable to free to the
2287 * current cache; when we re-acquire the critical section, we must
2288 * detect and handle migration if it has occurred.
2289 */
2290zfree_restart:
2291 critical_enter();
2292 cpu = curcpu;
2293 cache = &zone->uz_cpu[cpu];
2294
2295zfree_start:
2296 bucket = cache->uc_freebucket;
2297
2298 if (bucket) {
2299 /*
2300 * Do we have room in our bucket? It is OK for this uz count
2301 * check to be slightly out of sync.
2302 */
2303
2304 if (bucket->ub_cnt < bucket->ub_entries) {
2305 KASSERT(bucket->ub_bucket[bucket->ub_cnt] == NULL,
2306 ("uma_zfree: Freeing to non free bucket index."));
2307 bucket->ub_bucket[bucket->ub_cnt] = item;
2308 bucket->ub_cnt++;
2309 cache->uc_frees++;
2310 critical_exit();
2311 return;
2312 } else if (cache->uc_allocbucket) {
2313#ifdef UMA_DEBUG_ALLOC
2314 printf("uma_zfree: Swapping buckets.\n");
2315#endif
2316 /*
2317 * We have run out of space in our freebucket.
2318 * See if we can switch with our alloc bucket.
2319 */
2320 if (cache->uc_allocbucket->ub_cnt <
2321 cache->uc_freebucket->ub_cnt) {
2322 bucket = cache->uc_freebucket;
2323 cache->uc_freebucket = cache->uc_allocbucket;
2324 cache->uc_allocbucket = bucket;
2325 goto zfree_start;
2326 }
2327 }
2328 }
2329 /*
2330 * We can get here for two reasons:
2331 *
2332 * 1) The buckets are NULL
2333 * 2) The alloc and free buckets are both somewhat full.
2334 *
2335 * We must go back the zone, which requires acquiring the zone lock,
2336 * which in turn means we must release and re-acquire the critical
2337 * section. Since the critical section is released, we may be
2338 * preempted or migrate. As such, make sure not to maintain any
2339 * thread-local state specific to the cache from prior to releasing
2340 * the critical section.
2341 */
2342 critical_exit();
2343 ZONE_LOCK(zone);
2344 critical_enter();
2345 cpu = curcpu;
2346 cache = &zone->uz_cpu[cpu];
2347 if (cache->uc_freebucket != NULL) {
2348 if (cache->uc_freebucket->ub_cnt <
2349 cache->uc_freebucket->ub_entries) {
2350 ZONE_UNLOCK(zone);
2351 goto zfree_start;
2352 }
2353 if (cache->uc_allocbucket != NULL &&
2354 (cache->uc_allocbucket->ub_cnt <
2355 cache->uc_freebucket->ub_cnt)) {
2356 ZONE_UNLOCK(zone);
2357 goto zfree_start;
2358 }
2359 }
2360
2361 /* Since we have locked the zone we may as well send back our stats */
2362 zone->uz_allocs += cache->uc_allocs;
2363 cache->uc_allocs = 0;
2364 zone->uz_frees += cache->uc_frees;
2365 cache->uc_frees = 0;
2366
2367 bucket = cache->uc_freebucket;
2368 cache->uc_freebucket = NULL;
2369
2370 /* Can we throw this on the zone full list? */
2371 if (bucket != NULL) {
2372#ifdef UMA_DEBUG_ALLOC
2373 printf("uma_zfree: Putting old bucket on the free list.\n");
2374#endif
2375 /* ub_cnt is pointing to the last free item */
2376 KASSERT(bucket->ub_cnt != 0,
2377 ("uma_zfree: Attempting to insert an empty bucket onto the full list.\n"));
2378 LIST_INSERT_HEAD(&zone->uz_full_bucket,
2379 bucket, ub_link);
2380 }
2381 if ((bucket = LIST_FIRST(&zone->uz_free_bucket)) != NULL) {
2382 LIST_REMOVE(bucket, ub_link);
2383 ZONE_UNLOCK(zone);
2384 cache->uc_freebucket = bucket;
2385 goto zfree_start;
2386 }
2387 /* We are no longer associated with this CPU. */
2388 critical_exit();
2389
2390 /* And the zone.. */
2391 ZONE_UNLOCK(zone);
2392
2393#ifdef UMA_DEBUG_ALLOC
2394 printf("uma_zfree: Allocating new free bucket.\n");
2395#endif
2396 bflags = M_NOWAIT;
2397
2398 if (keg->uk_flags & UMA_ZFLAG_CACHEONLY)
2399 bflags |= M_NOVM;
2400 bucket = bucket_alloc(zone->uz_count, bflags);
2401 if (bucket) {
2402 ZONE_LOCK(zone);
2403 LIST_INSERT_HEAD(&zone->uz_free_bucket,
2404 bucket, ub_link);
2405 ZONE_UNLOCK(zone);
2406 goto zfree_restart;
2407 }
2408
2409 /*
2410 * If nothing else caught this, we'll just do an internal free.
2411 */
2412zfree_internal:
2413 uma_zfree_internal(zone, item, udata, SKIP_DTOR, ZFREE_STATFREE);
2414
2415 return;
2416}
2417
2418/*
2419 * Frees an item to an INTERNAL zone or allocates a free bucket
2420 *
2421 * Arguments:
2422 * zone The zone to free to
2423 * item The item we're freeing
2424 * udata User supplied data for the dtor
2425 * skip Skip dtors and finis
2426 */
2427static void
2428uma_zfree_internal(uma_zone_t zone, void *item, void *udata,
2429 enum zfreeskip skip, int flags)
2430{
2431 uma_slab_t slab;
2432 uma_slabrefcnt_t slabref;
2433 uma_keg_t keg;
2434 u_int8_t *mem;
2435 u_int8_t freei;
2436
2437 keg = zone->uz_keg;
2438
2439 if (skip < SKIP_DTOR && zone->uz_dtor)
2440 zone->uz_dtor(item, keg->uk_size, udata);
2441 if (skip < SKIP_FINI && zone->uz_fini)
2442 zone->uz_fini(item, keg->uk_size);
2443
2444 ZONE_LOCK(zone);
2445
2446 if (flags & ZFREE_STATFAIL)
2447 zone->uz_fails++;
2448 if (flags & ZFREE_STATFREE)
2449 zone->uz_frees++;
2450
2451 if (!(keg->uk_flags & UMA_ZONE_MALLOC)) {
2452 mem = (u_int8_t *)((unsigned long)item & (~UMA_SLAB_MASK));
2453 if (keg->uk_flags & UMA_ZONE_HASH)
2454 slab = hash_sfind(&keg->uk_hash, mem);
2455 else {
2456 mem += keg->uk_pgoff;
2457 slab = (uma_slab_t)mem;
2458 }
2459 } else {
2460 slab = (uma_slab_t)udata;
2461 }
2462
2463 /* Do we need to remove from any lists? */
2464 if (slab->us_freecount+1 == keg->uk_ipers) {
2465 LIST_REMOVE(slab, us_link);
2466 LIST_INSERT_HEAD(&keg->uk_free_slab, slab, us_link);
2467 } else if (slab->us_freecount == 0) {
2468 LIST_REMOVE(slab, us_link);
2469 LIST_INSERT_HEAD(&keg->uk_part_slab, slab, us_link);
2470 }
2471
2472 /* Slab management stuff */
2473 freei = ((unsigned long)item - (unsigned long)slab->us_data)
2474 / keg->uk_rsize;
2475
2476#ifdef INVARIANTS
2477 if (!skip)
2478 uma_dbg_free(zone, slab, item);
2479#endif
2480
2481 if (keg->uk_flags & UMA_ZONE_REFCNT) {
2482 slabref = (uma_slabrefcnt_t)slab;
2483 slabref->us_freelist[freei].us_item = slab->us_firstfree;
2484 } else {
2485 slab->us_freelist[freei].us_item = slab->us_firstfree;
2486 }
2487 slab->us_firstfree = freei;
2488 slab->us_freecount++;
2489
2490 /* Zone statistics */
2491 keg->uk_free++;
2492
2493 if (keg->uk_flags & UMA_ZFLAG_FULL) {
2494 if (keg->uk_pages < keg->uk_maxpages)
2495 keg->uk_flags &= ~UMA_ZFLAG_FULL;
2496
2497 /* We can handle one more allocation */
2498 wakeup_one(keg);
2499 }
2500
2501 ZONE_UNLOCK(zone);
2502}
2503
2504/* See uma.h */
2505void
2506uma_zone_set_max(uma_zone_t zone, int nitems)
2507{
2508 uma_keg_t keg;
2509
2510 keg = zone->uz_keg;
2511 ZONE_LOCK(zone);
2512 if (keg->uk_ppera > 1)
2513 keg->uk_maxpages = nitems * keg->uk_ppera;
2514 else
2515 keg->uk_maxpages = nitems / keg->uk_ipers;
2516
2517 if (keg->uk_maxpages * keg->uk_ipers < nitems)
2518 keg->uk_maxpages++;
2519
2520 ZONE_UNLOCK(zone);
2521}
2522
2523/* See uma.h */
2524void
2525uma_zone_set_init(uma_zone_t zone, uma_init uminit)
2526{
2527 ZONE_LOCK(zone);
2528 KASSERT(zone->uz_keg->uk_pages == 0,
2529 ("uma_zone_set_init on non-empty keg"));
2530 zone->uz_keg->uk_init = uminit;
2531 ZONE_UNLOCK(zone);
2532}
2533
2534/* See uma.h */
2535void
2536uma_zone_set_fini(uma_zone_t zone, uma_fini fini)
2537{
2538 ZONE_LOCK(zone);
2539 KASSERT(zone->uz_keg->uk_pages == 0,
2540 ("uma_zone_set_fini on non-empty keg"));
2541 zone->uz_keg->uk_fini = fini;
2542 ZONE_UNLOCK(zone);
2543}
2544
2545/* See uma.h */
2546void
2547uma_zone_set_zinit(uma_zone_t zone, uma_init zinit)
2548{
2549 ZONE_LOCK(zone);
2550 KASSERT(zone->uz_keg->uk_pages == 0,
2551 ("uma_zone_set_zinit on non-empty keg"));
2552 zone->uz_init = zinit;
2553 ZONE_UNLOCK(zone);
2554}
2555
2556/* See uma.h */
2557void
2558uma_zone_set_zfini(uma_zone_t zone, uma_fini zfini)
2559{
2560 ZONE_LOCK(zone);
2561 KASSERT(zone->uz_keg->uk_pages == 0,
2562 ("uma_zone_set_zfini on non-empty keg"));
2563 zone->uz_fini = zfini;
2564 ZONE_UNLOCK(zone);
2565}
2566
2567/* See uma.h */
2568/* XXX uk_freef is not actually used with the zone locked */
2569void
2570uma_zone_set_freef(uma_zone_t zone, uma_free freef)
2571{
2572 ZONE_LOCK(zone);
2573 zone->uz_keg->uk_freef = freef;
2574 ZONE_UNLOCK(zone);
2575}
2576
2577/* See uma.h */
2578/* XXX uk_allocf is not actually used with the zone locked */
2579void
2580uma_zone_set_allocf(uma_zone_t zone, uma_alloc allocf)
2581{
2582 ZONE_LOCK(zone);
2583 zone->uz_keg->uk_flags |= UMA_ZFLAG_PRIVALLOC;
2584 zone->uz_keg->uk_allocf = allocf;
2585 ZONE_UNLOCK(zone);
2586}
2587
2588/* See uma.h */
2589int
2590uma_zone_set_obj(uma_zone_t zone, struct vm_object *obj, int count)
2591{
2592 uma_keg_t keg;
2593 vm_offset_t kva;
2594 int pages;
2595
2596 keg = zone->uz_keg;
2597 pages = count / keg->uk_ipers;
2598
2599 if (pages * keg->uk_ipers < count)
2600 pages++;
2601
2602 kva = kmem_alloc_nofault(kernel_map, pages * UMA_SLAB_SIZE);
2603
2604 if (kva == 0)
2605 return (0);
2606 if (obj == NULL) {
2607 obj = vm_object_allocate(OBJT_DEFAULT,
2608 pages);
2609 } else {
2610 VM_OBJECT_LOCK_INIT(obj, "uma object");
2611 _vm_object_allocate(OBJT_DEFAULT,
2612 pages, obj);
2613 }
2614 ZONE_LOCK(zone);
2615 keg->uk_kva = kva;
2616 keg->uk_obj = obj;
2617 keg->uk_maxpages = pages;
2618 keg->uk_allocf = obj_alloc;
2619 keg->uk_flags |= UMA_ZONE_NOFREE | UMA_ZFLAG_PRIVALLOC;
2620 ZONE_UNLOCK(zone);
2621 return (1);
2622}
2623
2624/* See uma.h */
2625void
2626uma_prealloc(uma_zone_t zone, int items)
2627{
2628 int slabs;
2629 uma_slab_t slab;
2630 uma_keg_t keg;
2631
2632 keg = zone->uz_keg;
2633 ZONE_LOCK(zone);
2634 slabs = items / keg->uk_ipers;
2635 if (slabs * keg->uk_ipers < items)
2636 slabs++;
2637 while (slabs > 0) {
2638 slab = slab_zalloc(zone, M_WAITOK);
2639 LIST_INSERT_HEAD(&keg->uk_free_slab, slab, us_link);
2640 slabs--;
2641 }
2642 ZONE_UNLOCK(zone);
2643}
2644
2645/* See uma.h */
2646u_int32_t *
2647uma_find_refcnt(uma_zone_t zone, void *item)
2648{
2649 uma_slabrefcnt_t slabref;
2650 uma_keg_t keg;
2651 u_int32_t *refcnt;
2652 int idx;
2653
2654 keg = zone->uz_keg;
2655 slabref = (uma_slabrefcnt_t)vtoslab((vm_offset_t)item &
2656 (~UMA_SLAB_MASK));
2657 KASSERT(slabref != NULL && slabref->us_keg->uk_flags & UMA_ZONE_REFCNT,
2658 ("uma_find_refcnt(): zone possibly not UMA_ZONE_REFCNT"));
2659 idx = ((unsigned long)item - (unsigned long)slabref->us_data)
2660 / keg->uk_rsize;
2661 refcnt = &slabref->us_freelist[idx].us_refcnt;
2662 return refcnt;
2663}
2664
2665/* See uma.h */
2666void
2667uma_reclaim(void)
2668{
2669#ifdef UMA_DEBUG
2670 printf("UMA: vm asked us to release pages!\n");
2671#endif
2672 bucket_enable();
2673 zone_foreach(zone_drain);
2674 /*
2675 * Some slabs may have been freed but this zone will be visited early
2676 * we visit again so that we can free pages that are empty once other
2677 * zones are drained. We have to do the same for buckets.
2678 */
2679 zone_drain(slabzone);
2680 zone_drain(slabrefzone);
2681 bucket_zone_drain();
2682}
2683
52
53/* I should really use ktr.. */
54/*
55#define UMA_DEBUG 1
56#define UMA_DEBUG_ALLOC 1
57#define UMA_DEBUG_ALLOC_1 1
58*/
59
60#include "opt_ddb.h"
61#include "opt_param.h"
62
63#include <sys/param.h>
64#include <sys/systm.h>
65#include <sys/kernel.h>
66#include <sys/types.h>
67#include <sys/queue.h>
68#include <sys/malloc.h>
69#include <sys/ktr.h>
70#include <sys/lock.h>
71#include <sys/sysctl.h>
72#include <sys/mutex.h>
73#include <sys/proc.h>
74#include <sys/sbuf.h>
75#include <sys/smp.h>
76#include <sys/vmmeter.h>
77
78#include <vm/vm.h>
79#include <vm/vm_object.h>
80#include <vm/vm_page.h>
81#include <vm/vm_param.h>
82#include <vm/vm_map.h>
83#include <vm/vm_kern.h>
84#include <vm/vm_extern.h>
85#include <vm/uma.h>
86#include <vm/uma_int.h>
87#include <vm/uma_dbg.h>
88
89#include <machine/vmparam.h>
90
91#include <ddb/ddb.h>
92
93/*
94 * This is the zone and keg from which all zones are spawned. The idea is that
95 * even the zone & keg heads are allocated from the allocator, so we use the
96 * bss section to bootstrap us.
97 */
98static struct uma_keg masterkeg;
99static struct uma_zone masterzone_k;
100static struct uma_zone masterzone_z;
101static uma_zone_t kegs = &masterzone_k;
102static uma_zone_t zones = &masterzone_z;
103
104/* This is the zone from which all of uma_slab_t's are allocated. */
105static uma_zone_t slabzone;
106static uma_zone_t slabrefzone; /* With refcounters (for UMA_ZONE_REFCNT) */
107
108/*
109 * The initial hash tables come out of this zone so they can be allocated
110 * prior to malloc coming up.
111 */
112static uma_zone_t hashzone;
113
114static MALLOC_DEFINE(M_UMAHASH, "UMAHash", "UMA Hash Buckets");
115
116/*
117 * Are we allowed to allocate buckets?
118 */
119static int bucketdisable = 1;
120
121/* Linked list of all kegs in the system */
122static LIST_HEAD(,uma_keg) uma_kegs = LIST_HEAD_INITIALIZER(&uma_kegs);
123
124/* This mutex protects the keg list */
125static struct mtx uma_mtx;
126
127/* Linked list of boot time pages */
128static LIST_HEAD(,uma_slab) uma_boot_pages =
129 LIST_HEAD_INITIALIZER(&uma_boot_pages);
130
131/* This mutex protects the boot time pages list */
132static struct mtx uma_boot_pages_mtx;
133
134/* Is the VM done starting up? */
135static int booted = 0;
136
137/* Maximum number of allowed items-per-slab if the slab header is OFFPAGE */
138static u_int uma_max_ipers;
139static u_int uma_max_ipers_ref;
140
141/*
142 * This is the handle used to schedule events that need to happen
143 * outside of the allocation fast path.
144 */
145static struct callout uma_callout;
146#define UMA_TIMEOUT 20 /* Seconds for callout interval. */
147
148/*
149 * This structure is passed as the zone ctor arg so that I don't have to create
150 * a special allocation function just for zones.
151 */
152struct uma_zctor_args {
153 char *name;
154 size_t size;
155 uma_ctor ctor;
156 uma_dtor dtor;
157 uma_init uminit;
158 uma_fini fini;
159 uma_keg_t keg;
160 int align;
161 u_int32_t flags;
162};
163
164struct uma_kctor_args {
165 uma_zone_t zone;
166 size_t size;
167 uma_init uminit;
168 uma_fini fini;
169 int align;
170 u_int32_t flags;
171};
172
173struct uma_bucket_zone {
174 uma_zone_t ubz_zone;
175 char *ubz_name;
176 int ubz_entries;
177};
178
179#define BUCKET_MAX 128
180
181struct uma_bucket_zone bucket_zones[] = {
182 { NULL, "16 Bucket", 16 },
183 { NULL, "32 Bucket", 32 },
184 { NULL, "64 Bucket", 64 },
185 { NULL, "128 Bucket", 128 },
186 { NULL, NULL, 0}
187};
188
189#define BUCKET_SHIFT 4
190#define BUCKET_ZONES ((BUCKET_MAX >> BUCKET_SHIFT) + 1)
191
192/*
193 * bucket_size[] maps requested bucket sizes to zones that allocate a bucket
194 * of approximately the right size.
195 */
196static uint8_t bucket_size[BUCKET_ZONES];
197
198/*
199 * Flags and enumerations to be passed to internal functions.
200 */
201enum zfreeskip { SKIP_NONE, SKIP_DTOR, SKIP_FINI };
202
203#define ZFREE_STATFAIL 0x00000001 /* Update zone failure statistic. */
204#define ZFREE_STATFREE 0x00000002 /* Update zone free statistic. */
205
206/* Prototypes.. */
207
208static void *obj_alloc(uma_zone_t, int, u_int8_t *, int);
209static void *page_alloc(uma_zone_t, int, u_int8_t *, int);
210static void *startup_alloc(uma_zone_t, int, u_int8_t *, int);
211static void page_free(void *, int, u_int8_t);
212static uma_slab_t slab_zalloc(uma_zone_t, int);
213static void cache_drain(uma_zone_t);
214static void bucket_drain(uma_zone_t, uma_bucket_t);
215static void bucket_cache_drain(uma_zone_t zone);
216static int keg_ctor(void *, int, void *, int);
217static void keg_dtor(void *, int, void *);
218static int zone_ctor(void *, int, void *, int);
219static void zone_dtor(void *, int, void *);
220static int zero_init(void *, int, int);
221static void zone_small_init(uma_zone_t zone);
222static void zone_large_init(uma_zone_t zone);
223static void zone_foreach(void (*zfunc)(uma_zone_t));
224static void zone_timeout(uma_zone_t zone);
225static int hash_alloc(struct uma_hash *);
226static int hash_expand(struct uma_hash *, struct uma_hash *);
227static void hash_free(struct uma_hash *hash);
228static void uma_timeout(void *);
229static void uma_startup3(void);
230static void *uma_zalloc_internal(uma_zone_t, void *, int);
231static void uma_zfree_internal(uma_zone_t, void *, void *, enum zfreeskip,
232 int);
233static void bucket_enable(void);
234static void bucket_init(void);
235static uma_bucket_t bucket_alloc(int, int);
236static void bucket_free(uma_bucket_t);
237static void bucket_zone_drain(void);
238static int uma_zalloc_bucket(uma_zone_t zone, int flags);
239static uma_slab_t uma_zone_slab(uma_zone_t zone, int flags);
240static void *uma_slab_alloc(uma_zone_t zone, uma_slab_t slab);
241static void zone_drain(uma_zone_t);
242static uma_zone_t uma_kcreate(uma_zone_t zone, size_t size, uma_init uminit,
243 uma_fini fini, int align, u_int32_t flags);
244
245void uma_print_zone(uma_zone_t);
246void uma_print_stats(void);
247static int sysctl_vm_zone_count(SYSCTL_HANDLER_ARGS);
248static int sysctl_vm_zone_stats(SYSCTL_HANDLER_ARGS);
249
250#ifdef WITNESS
251static int nosleepwithlocks = 1;
252#else
253static int nosleepwithlocks = 0;
254#endif
255SYSCTL_INT(_debug, OID_AUTO, nosleepwithlocks, CTLFLAG_RW, &nosleepwithlocks,
256 0, "Convert M_WAITOK to M_NOWAIT to avoid lock-held-across-sleep paths");
257SYSINIT(uma_startup3, SI_SUB_VM_CONF, SI_ORDER_SECOND, uma_startup3, NULL);
258
259SYSCTL_PROC(_vm, OID_AUTO, zone_count, CTLFLAG_RD|CTLTYPE_INT,
260 0, 0, sysctl_vm_zone_count, "I", "Number of UMA zones");
261
262SYSCTL_PROC(_vm, OID_AUTO, zone_stats, CTLFLAG_RD|CTLTYPE_STRUCT,
263 0, 0, sysctl_vm_zone_stats, "s,struct uma_type_header", "Zone Stats");
264
265/*
266 * This routine checks to see whether or not it's safe to enable buckets.
267 */
268
269static void
270bucket_enable(void)
271{
272 if (cnt.v_free_count < cnt.v_free_min)
273 bucketdisable = 1;
274 else
275 bucketdisable = 0;
276}
277
278/*
279 * Initialize bucket_zones, the array of zones of buckets of various sizes.
280 *
281 * For each zone, calculate the memory required for each bucket, consisting
282 * of the header and an array of pointers. Initialize bucket_size[] to point
283 * the range of appropriate bucket sizes at the zone.
284 */
285static void
286bucket_init(void)
287{
288 struct uma_bucket_zone *ubz;
289 int i;
290 int j;
291
292 for (i = 0, j = 0; bucket_zones[j].ubz_entries != 0; j++) {
293 int size;
294
295 ubz = &bucket_zones[j];
296 size = roundup(sizeof(struct uma_bucket), sizeof(void *));
297 size += sizeof(void *) * ubz->ubz_entries;
298 ubz->ubz_zone = uma_zcreate(ubz->ubz_name, size,
299 NULL, NULL, NULL, NULL, UMA_ALIGN_PTR, UMA_ZFLAG_INTERNAL);
300 for (; i <= ubz->ubz_entries; i += (1 << BUCKET_SHIFT))
301 bucket_size[i >> BUCKET_SHIFT] = j;
302 }
303}
304
305/*
306 * Given a desired number of entries for a bucket, return the zone from which
307 * to allocate the bucket.
308 */
309static struct uma_bucket_zone *
310bucket_zone_lookup(int entries)
311{
312 int idx;
313
314 idx = howmany(entries, 1 << BUCKET_SHIFT);
315 return (&bucket_zones[bucket_size[idx]]);
316}
317
318static uma_bucket_t
319bucket_alloc(int entries, int bflags)
320{
321 struct uma_bucket_zone *ubz;
322 uma_bucket_t bucket;
323
324 /*
325 * This is to stop us from allocating per cpu buckets while we're
326 * running out of vm.boot_pages. Otherwise, we would exhaust the
327 * boot pages. This also prevents us from allocating buckets in
328 * low memory situations.
329 */
330 if (bucketdisable)
331 return (NULL);
332
333 ubz = bucket_zone_lookup(entries);
334 bucket = uma_zalloc_internal(ubz->ubz_zone, NULL, bflags);
335 if (bucket) {
336#ifdef INVARIANTS
337 bzero(bucket->ub_bucket, sizeof(void *) * ubz->ubz_entries);
338#endif
339 bucket->ub_cnt = 0;
340 bucket->ub_entries = ubz->ubz_entries;
341 }
342
343 return (bucket);
344}
345
346static void
347bucket_free(uma_bucket_t bucket)
348{
349 struct uma_bucket_zone *ubz;
350
351 ubz = bucket_zone_lookup(bucket->ub_entries);
352 uma_zfree_internal(ubz->ubz_zone, bucket, NULL, SKIP_NONE,
353 ZFREE_STATFREE);
354}
355
356static void
357bucket_zone_drain(void)
358{
359 struct uma_bucket_zone *ubz;
360
361 for (ubz = &bucket_zones[0]; ubz->ubz_entries != 0; ubz++)
362 zone_drain(ubz->ubz_zone);
363}
364
365
366/*
367 * Routine called by timeout which is used to fire off some time interval
368 * based calculations. (stats, hash size, etc.)
369 *
370 * Arguments:
371 * arg Unused
372 *
373 * Returns:
374 * Nothing
375 */
376static void
377uma_timeout(void *unused)
378{
379 bucket_enable();
380 zone_foreach(zone_timeout);
381
382 /* Reschedule this event */
383 callout_reset(&uma_callout, UMA_TIMEOUT * hz, uma_timeout, NULL);
384}
385
386/*
387 * Routine to perform timeout driven calculations. This expands the
388 * hashes and does per cpu statistics aggregation.
389 *
390 * Arguments:
391 * zone The zone to operate on
392 *
393 * Returns:
394 * Nothing
395 */
396static void
397zone_timeout(uma_zone_t zone)
398{
399 uma_keg_t keg;
400 u_int64_t alloc;
401
402 keg = zone->uz_keg;
403 alloc = 0;
404
405 /*
406 * Expand the zone hash table.
407 *
408 * This is done if the number of slabs is larger than the hash size.
409 * What I'm trying to do here is completely reduce collisions. This
410 * may be a little aggressive. Should I allow for two collisions max?
411 */
412 ZONE_LOCK(zone);
413 if (keg->uk_flags & UMA_ZONE_HASH &&
414 keg->uk_pages / keg->uk_ppera >= keg->uk_hash.uh_hashsize) {
415 struct uma_hash newhash;
416 struct uma_hash oldhash;
417 int ret;
418
419 /*
420 * This is so involved because allocating and freeing
421 * while the zone lock is held will lead to deadlock.
422 * I have to do everything in stages and check for
423 * races.
424 */
425 newhash = keg->uk_hash;
426 ZONE_UNLOCK(zone);
427 ret = hash_alloc(&newhash);
428 ZONE_LOCK(zone);
429 if (ret) {
430 if (hash_expand(&keg->uk_hash, &newhash)) {
431 oldhash = keg->uk_hash;
432 keg->uk_hash = newhash;
433 } else
434 oldhash = newhash;
435
436 ZONE_UNLOCK(zone);
437 hash_free(&oldhash);
438 ZONE_LOCK(zone);
439 }
440 }
441 ZONE_UNLOCK(zone);
442}
443
444/*
445 * Allocate and zero fill the next sized hash table from the appropriate
446 * backing store.
447 *
448 * Arguments:
449 * hash A new hash structure with the old hash size in uh_hashsize
450 *
451 * Returns:
452 * 1 on sucess and 0 on failure.
453 */
454static int
455hash_alloc(struct uma_hash *hash)
456{
457 int oldsize;
458 int alloc;
459
460 oldsize = hash->uh_hashsize;
461
462 /* We're just going to go to a power of two greater */
463 if (oldsize) {
464 hash->uh_hashsize = oldsize * 2;
465 alloc = sizeof(hash->uh_slab_hash[0]) * hash->uh_hashsize;
466 hash->uh_slab_hash = (struct slabhead *)malloc(alloc,
467 M_UMAHASH, M_NOWAIT);
468 } else {
469 alloc = sizeof(hash->uh_slab_hash[0]) * UMA_HASH_SIZE_INIT;
470 hash->uh_slab_hash = uma_zalloc_internal(hashzone, NULL,
471 M_WAITOK);
472 hash->uh_hashsize = UMA_HASH_SIZE_INIT;
473 }
474 if (hash->uh_slab_hash) {
475 bzero(hash->uh_slab_hash, alloc);
476 hash->uh_hashmask = hash->uh_hashsize - 1;
477 return (1);
478 }
479
480 return (0);
481}
482
483/*
484 * Expands the hash table for HASH zones. This is done from zone_timeout
485 * to reduce collisions. This must not be done in the regular allocation
486 * path, otherwise, we can recurse on the vm while allocating pages.
487 *
488 * Arguments:
489 * oldhash The hash you want to expand
490 * newhash The hash structure for the new table
491 *
492 * Returns:
493 * Nothing
494 *
495 * Discussion:
496 */
497static int
498hash_expand(struct uma_hash *oldhash, struct uma_hash *newhash)
499{
500 uma_slab_t slab;
501 int hval;
502 int i;
503
504 if (!newhash->uh_slab_hash)
505 return (0);
506
507 if (oldhash->uh_hashsize >= newhash->uh_hashsize)
508 return (0);
509
510 /*
511 * I need to investigate hash algorithms for resizing without a
512 * full rehash.
513 */
514
515 for (i = 0; i < oldhash->uh_hashsize; i++)
516 while (!SLIST_EMPTY(&oldhash->uh_slab_hash[i])) {
517 slab = SLIST_FIRST(&oldhash->uh_slab_hash[i]);
518 SLIST_REMOVE_HEAD(&oldhash->uh_slab_hash[i], us_hlink);
519 hval = UMA_HASH(newhash, slab->us_data);
520 SLIST_INSERT_HEAD(&newhash->uh_slab_hash[hval],
521 slab, us_hlink);
522 }
523
524 return (1);
525}
526
527/*
528 * Free the hash bucket to the appropriate backing store.
529 *
530 * Arguments:
531 * slab_hash The hash bucket we're freeing
532 * hashsize The number of entries in that hash bucket
533 *
534 * Returns:
535 * Nothing
536 */
537static void
538hash_free(struct uma_hash *hash)
539{
540 if (hash->uh_slab_hash == NULL)
541 return;
542 if (hash->uh_hashsize == UMA_HASH_SIZE_INIT)
543 uma_zfree_internal(hashzone,
544 hash->uh_slab_hash, NULL, SKIP_NONE, ZFREE_STATFREE);
545 else
546 free(hash->uh_slab_hash, M_UMAHASH);
547}
548
549/*
550 * Frees all outstanding items in a bucket
551 *
552 * Arguments:
553 * zone The zone to free to, must be unlocked.
554 * bucket The free/alloc bucket with items, cpu queue must be locked.
555 *
556 * Returns:
557 * Nothing
558 */
559
560static void
561bucket_drain(uma_zone_t zone, uma_bucket_t bucket)
562{
563 uma_slab_t slab;
564 int mzone;
565 void *item;
566
567 if (bucket == NULL)
568 return;
569
570 slab = NULL;
571 mzone = 0;
572
573 /* We have to lookup the slab again for malloc.. */
574 if (zone->uz_keg->uk_flags & UMA_ZONE_MALLOC)
575 mzone = 1;
576
577 while (bucket->ub_cnt > 0) {
578 bucket->ub_cnt--;
579 item = bucket->ub_bucket[bucket->ub_cnt];
580#ifdef INVARIANTS
581 bucket->ub_bucket[bucket->ub_cnt] = NULL;
582 KASSERT(item != NULL,
583 ("bucket_drain: botched ptr, item is NULL"));
584#endif
585 /*
586 * This is extremely inefficient. The slab pointer was passed
587 * to uma_zfree_arg, but we lost it because the buckets don't
588 * hold them. This will go away when free() gets a size passed
589 * to it.
590 */
591 if (mzone)
592 slab = vtoslab((vm_offset_t)item & (~UMA_SLAB_MASK));
593 uma_zfree_internal(zone, item, slab, SKIP_DTOR, 0);
594 }
595}
596
597/*
598 * Drains the per cpu caches for a zone.
599 *
600 * NOTE: This may only be called while the zone is being turn down, and not
601 * during normal operation. This is necessary in order that we do not have
602 * to migrate CPUs to drain the per-CPU caches.
603 *
604 * Arguments:
605 * zone The zone to drain, must be unlocked.
606 *
607 * Returns:
608 * Nothing
609 */
610static void
611cache_drain(uma_zone_t zone)
612{
613 uma_cache_t cache;
614 int cpu;
615
616 /*
617 * XXX: It is safe to not lock the per-CPU caches, because we're
618 * tearing down the zone anyway. I.e., there will be no further use
619 * of the caches at this point.
620 *
621 * XXX: It would good to be able to assert that the zone is being
622 * torn down to prevent improper use of cache_drain().
623 *
624 * XXX: We lock the zone before passing into bucket_cache_drain() as
625 * it is used elsewhere. Should the tear-down path be made special
626 * there in some form?
627 */
628 for (cpu = 0; cpu <= mp_maxid; cpu++) {
629 if (CPU_ABSENT(cpu))
630 continue;
631 cache = &zone->uz_cpu[cpu];
632 bucket_drain(zone, cache->uc_allocbucket);
633 bucket_drain(zone, cache->uc_freebucket);
634 if (cache->uc_allocbucket != NULL)
635 bucket_free(cache->uc_allocbucket);
636 if (cache->uc_freebucket != NULL)
637 bucket_free(cache->uc_freebucket);
638 cache->uc_allocbucket = cache->uc_freebucket = NULL;
639 }
640 ZONE_LOCK(zone);
641 bucket_cache_drain(zone);
642 ZONE_UNLOCK(zone);
643}
644
645/*
646 * Drain the cached buckets from a zone. Expects a locked zone on entry.
647 */
648static void
649bucket_cache_drain(uma_zone_t zone)
650{
651 uma_bucket_t bucket;
652
653 /*
654 * Drain the bucket queues and free the buckets, we just keep two per
655 * cpu (alloc/free).
656 */
657 while ((bucket = LIST_FIRST(&zone->uz_full_bucket)) != NULL) {
658 LIST_REMOVE(bucket, ub_link);
659 ZONE_UNLOCK(zone);
660 bucket_drain(zone, bucket);
661 bucket_free(bucket);
662 ZONE_LOCK(zone);
663 }
664
665 /* Now we do the free queue.. */
666 while ((bucket = LIST_FIRST(&zone->uz_free_bucket)) != NULL) {
667 LIST_REMOVE(bucket, ub_link);
668 bucket_free(bucket);
669 }
670}
671
672/*
673 * Frees pages from a zone back to the system. This is done on demand from
674 * the pageout daemon.
675 *
676 * Arguments:
677 * zone The zone to free pages from
678 * all Should we drain all items?
679 *
680 * Returns:
681 * Nothing.
682 */
683static void
684zone_drain(uma_zone_t zone)
685{
686 struct slabhead freeslabs = { 0 };
687 uma_keg_t keg;
688 uma_slab_t slab;
689 uma_slab_t n;
690 u_int8_t flags;
691 u_int8_t *mem;
692 int i;
693
694 keg = zone->uz_keg;
695
696 /*
697 * We don't want to take pages from statically allocated zones at this
698 * time
699 */
700 if (keg->uk_flags & UMA_ZONE_NOFREE || keg->uk_freef == NULL)
701 return;
702
703 ZONE_LOCK(zone);
704
705#ifdef UMA_DEBUG
706 printf("%s free items: %u\n", zone->uz_name, keg->uk_free);
707#endif
708 bucket_cache_drain(zone);
709 if (keg->uk_free == 0)
710 goto finished;
711
712 slab = LIST_FIRST(&keg->uk_free_slab);
713 while (slab) {
714 n = LIST_NEXT(slab, us_link);
715
716 /* We have no where to free these to */
717 if (slab->us_flags & UMA_SLAB_BOOT) {
718 slab = n;
719 continue;
720 }
721
722 LIST_REMOVE(slab, us_link);
723 keg->uk_pages -= keg->uk_ppera;
724 keg->uk_free -= keg->uk_ipers;
725
726 if (keg->uk_flags & UMA_ZONE_HASH)
727 UMA_HASH_REMOVE(&keg->uk_hash, slab, slab->us_data);
728
729 SLIST_INSERT_HEAD(&freeslabs, slab, us_hlink);
730
731 slab = n;
732 }
733finished:
734 ZONE_UNLOCK(zone);
735
736 while ((slab = SLIST_FIRST(&freeslabs)) != NULL) {
737 SLIST_REMOVE(&freeslabs, slab, uma_slab, us_hlink);
738 if (keg->uk_fini)
739 for (i = 0; i < keg->uk_ipers; i++)
740 keg->uk_fini(
741 slab->us_data + (keg->uk_rsize * i),
742 keg->uk_size);
743 flags = slab->us_flags;
744 mem = slab->us_data;
745
746 if ((keg->uk_flags & UMA_ZONE_MALLOC) ||
747 (keg->uk_flags & UMA_ZONE_REFCNT)) {
748 vm_object_t obj;
749
750 if (flags & UMA_SLAB_KMEM)
751 obj = kmem_object;
752 else
753 obj = NULL;
754 for (i = 0; i < keg->uk_ppera; i++)
755 vsetobj((vm_offset_t)mem + (i * PAGE_SIZE),
756 obj);
757 }
758 if (keg->uk_flags & UMA_ZONE_OFFPAGE)
759 uma_zfree_internal(keg->uk_slabzone, slab, NULL,
760 SKIP_NONE, ZFREE_STATFREE);
761#ifdef UMA_DEBUG
762 printf("%s: Returning %d bytes.\n",
763 zone->uz_name, UMA_SLAB_SIZE * keg->uk_ppera);
764#endif
765 keg->uk_freef(mem, UMA_SLAB_SIZE * keg->uk_ppera, flags);
766 }
767}
768
769/*
770 * Allocate a new slab for a zone. This does not insert the slab onto a list.
771 *
772 * Arguments:
773 * zone The zone to allocate slabs for
774 * wait Shall we wait?
775 *
776 * Returns:
777 * The slab that was allocated or NULL if there is no memory and the
778 * caller specified M_NOWAIT.
779 */
780static uma_slab_t
781slab_zalloc(uma_zone_t zone, int wait)
782{
783 uma_slabrefcnt_t slabref;
784 uma_slab_t slab;
785 uma_keg_t keg;
786 u_int8_t *mem;
787 u_int8_t flags;
788 int i;
789
790 slab = NULL;
791 keg = zone->uz_keg;
792
793#ifdef UMA_DEBUG
794 printf("slab_zalloc: Allocating a new slab for %s\n", zone->uz_name);
795#endif
796 ZONE_UNLOCK(zone);
797
798 if (keg->uk_flags & UMA_ZONE_OFFPAGE) {
799 slab = uma_zalloc_internal(keg->uk_slabzone, NULL, wait);
800 if (slab == NULL) {
801 ZONE_LOCK(zone);
802 return NULL;
803 }
804 }
805
806 /*
807 * This reproduces the old vm_zone behavior of zero filling pages the
808 * first time they are added to a zone.
809 *
810 * Malloced items are zeroed in uma_zalloc.
811 */
812
813 if ((keg->uk_flags & UMA_ZONE_MALLOC) == 0)
814 wait |= M_ZERO;
815 else
816 wait &= ~M_ZERO;
817
818 mem = keg->uk_allocf(zone, keg->uk_ppera * UMA_SLAB_SIZE,
819 &flags, wait);
820 if (mem == NULL) {
821 if (keg->uk_flags & UMA_ZONE_OFFPAGE)
822 uma_zfree_internal(keg->uk_slabzone, slab, NULL,
823 SKIP_NONE, ZFREE_STATFREE);
824 ZONE_LOCK(zone);
825 return (NULL);
826 }
827
828 /* Point the slab into the allocated memory */
829 if (!(keg->uk_flags & UMA_ZONE_OFFPAGE))
830 slab = (uma_slab_t )(mem + keg->uk_pgoff);
831
832 if ((keg->uk_flags & UMA_ZONE_MALLOC) ||
833 (keg->uk_flags & UMA_ZONE_REFCNT))
834 for (i = 0; i < keg->uk_ppera; i++)
835 vsetslab((vm_offset_t)mem + (i * PAGE_SIZE), slab);
836
837 slab->us_keg = keg;
838 slab->us_data = mem;
839 slab->us_freecount = keg->uk_ipers;
840 slab->us_firstfree = 0;
841 slab->us_flags = flags;
842
843 if (keg->uk_flags & UMA_ZONE_REFCNT) {
844 slabref = (uma_slabrefcnt_t)slab;
845 for (i = 0; i < keg->uk_ipers; i++) {
846 slabref->us_freelist[i].us_refcnt = 0;
847 slabref->us_freelist[i].us_item = i+1;
848 }
849 } else {
850 for (i = 0; i < keg->uk_ipers; i++)
851 slab->us_freelist[i].us_item = i+1;
852 }
853
854 if (keg->uk_init != NULL) {
855 for (i = 0; i < keg->uk_ipers; i++)
856 if (keg->uk_init(slab->us_data + (keg->uk_rsize * i),
857 keg->uk_size, wait) != 0)
858 break;
859 if (i != keg->uk_ipers) {
860 if (keg->uk_fini != NULL) {
861 for (i--; i > -1; i--)
862 keg->uk_fini(slab->us_data +
863 (keg->uk_rsize * i),
864 keg->uk_size);
865 }
866 if ((keg->uk_flags & UMA_ZONE_MALLOC) ||
867 (keg->uk_flags & UMA_ZONE_REFCNT)) {
868 vm_object_t obj;
869
870 if (flags & UMA_SLAB_KMEM)
871 obj = kmem_object;
872 else
873 obj = NULL;
874 for (i = 0; i < keg->uk_ppera; i++)
875 vsetobj((vm_offset_t)mem +
876 (i * PAGE_SIZE), obj);
877 }
878 if (keg->uk_flags & UMA_ZONE_OFFPAGE)
879 uma_zfree_internal(keg->uk_slabzone, slab,
880 NULL, SKIP_NONE, ZFREE_STATFREE);
881 keg->uk_freef(mem, UMA_SLAB_SIZE * keg->uk_ppera,
882 flags);
883 ZONE_LOCK(zone);
884 return (NULL);
885 }
886 }
887 ZONE_LOCK(zone);
888
889 if (keg->uk_flags & UMA_ZONE_HASH)
890 UMA_HASH_INSERT(&keg->uk_hash, slab, mem);
891
892 keg->uk_pages += keg->uk_ppera;
893 keg->uk_free += keg->uk_ipers;
894
895 return (slab);
896}
897
898/*
899 * This function is intended to be used early on in place of page_alloc() so
900 * that we may use the boot time page cache to satisfy allocations before
901 * the VM is ready.
902 */
903static void *
904startup_alloc(uma_zone_t zone, int bytes, u_int8_t *pflag, int wait)
905{
906 uma_keg_t keg;
907 uma_slab_t tmps;
908
909 keg = zone->uz_keg;
910
911 /*
912 * Check our small startup cache to see if it has pages remaining.
913 */
914 mtx_lock(&uma_boot_pages_mtx);
915 if ((tmps = LIST_FIRST(&uma_boot_pages)) != NULL) {
916 LIST_REMOVE(tmps, us_link);
917 mtx_unlock(&uma_boot_pages_mtx);
918 *pflag = tmps->us_flags;
919 return (tmps->us_data);
920 }
921 mtx_unlock(&uma_boot_pages_mtx);
922 if (booted == 0)
923 panic("UMA: Increase vm.boot_pages");
924 /*
925 * Now that we've booted reset these users to their real allocator.
926 */
927#ifdef UMA_MD_SMALL_ALLOC
928 keg->uk_allocf = uma_small_alloc;
929#else
930 keg->uk_allocf = page_alloc;
931#endif
932 return keg->uk_allocf(zone, bytes, pflag, wait);
933}
934
935/*
936 * Allocates a number of pages from the system
937 *
938 * Arguments:
939 * zone Unused
940 * bytes The number of bytes requested
941 * wait Shall we wait?
942 *
943 * Returns:
944 * A pointer to the alloced memory or possibly
945 * NULL if M_NOWAIT is set.
946 */
947static void *
948page_alloc(uma_zone_t zone, int bytes, u_int8_t *pflag, int wait)
949{
950 void *p; /* Returned page */
951
952 *pflag = UMA_SLAB_KMEM;
953 p = (void *) kmem_malloc(kmem_map, bytes, wait);
954
955 return (p);
956}
957
958/*
959 * Allocates a number of pages from within an object
960 *
961 * Arguments:
962 * zone Unused
963 * bytes The number of bytes requested
964 * wait Shall we wait?
965 *
966 * Returns:
967 * A pointer to the alloced memory or possibly
968 * NULL if M_NOWAIT is set.
969 */
970static void *
971obj_alloc(uma_zone_t zone, int bytes, u_int8_t *flags, int wait)
972{
973 vm_object_t object;
974 vm_offset_t retkva, zkva;
975 vm_page_t p;
976 int pages, startpages;
977
978 object = zone->uz_keg->uk_obj;
979 retkva = 0;
980
981 /*
982 * This looks a little weird since we're getting one page at a time.
983 */
984 VM_OBJECT_LOCK(object);
985 p = TAILQ_LAST(&object->memq, pglist);
986 pages = p != NULL ? p->pindex + 1 : 0;
987 startpages = pages;
988 zkva = zone->uz_keg->uk_kva + pages * PAGE_SIZE;
989 for (; bytes > 0; bytes -= PAGE_SIZE) {
990 p = vm_page_alloc(object, pages,
991 VM_ALLOC_INTERRUPT | VM_ALLOC_WIRED);
992 if (p == NULL) {
993 if (pages != startpages)
994 pmap_qremove(retkva, pages - startpages);
995 while (pages != startpages) {
996 pages--;
997 p = TAILQ_LAST(&object->memq, pglist);
998 vm_page_lock_queues();
999 vm_page_unwire(p, 0);
1000 vm_page_free(p);
1001 vm_page_unlock_queues();
1002 }
1003 retkva = 0;
1004 goto done;
1005 }
1006 pmap_qenter(zkva, &p, 1);
1007 if (retkva == 0)
1008 retkva = zkva;
1009 zkva += PAGE_SIZE;
1010 pages += 1;
1011 }
1012done:
1013 VM_OBJECT_UNLOCK(object);
1014 *flags = UMA_SLAB_PRIV;
1015
1016 return ((void *)retkva);
1017}
1018
1019/*
1020 * Frees a number of pages to the system
1021 *
1022 * Arguments:
1023 * mem A pointer to the memory to be freed
1024 * size The size of the memory being freed
1025 * flags The original p->us_flags field
1026 *
1027 * Returns:
1028 * Nothing
1029 */
1030static void
1031page_free(void *mem, int size, u_int8_t flags)
1032{
1033 vm_map_t map;
1034
1035 if (flags & UMA_SLAB_KMEM)
1036 map = kmem_map;
1037 else
1038 panic("UMA: page_free used with invalid flags %d\n", flags);
1039
1040 kmem_free(map, (vm_offset_t)mem, size);
1041}
1042
1043/*
1044 * Zero fill initializer
1045 *
1046 * Arguments/Returns follow uma_init specifications
1047 */
1048static int
1049zero_init(void *mem, int size, int flags)
1050{
1051 bzero(mem, size);
1052 return (0);
1053}
1054
1055/*
1056 * Finish creating a small uma zone. This calculates ipers, and the zone size.
1057 *
1058 * Arguments
1059 * zone The zone we should initialize
1060 *
1061 * Returns
1062 * Nothing
1063 */
1064static void
1065zone_small_init(uma_zone_t zone)
1066{
1067 uma_keg_t keg;
1068 u_int rsize;
1069 u_int memused;
1070 u_int wastedspace;
1071 u_int shsize;
1072
1073 keg = zone->uz_keg;
1074 KASSERT(keg != NULL, ("Keg is null in zone_small_init"));
1075 rsize = keg->uk_size;
1076
1077 if (rsize < UMA_SMALLEST_UNIT)
1078 rsize = UMA_SMALLEST_UNIT;
1079 if (rsize & keg->uk_align)
1080 rsize = (rsize & ~keg->uk_align) + (keg->uk_align + 1);
1081
1082 keg->uk_rsize = rsize;
1083 keg->uk_ppera = 1;
1084
1085 if (keg->uk_flags & UMA_ZONE_REFCNT) {
1086 rsize += UMA_FRITMREF_SZ; /* linkage & refcnt */
1087 shsize = sizeof(struct uma_slab_refcnt);
1088 } else {
1089 rsize += UMA_FRITM_SZ; /* Account for linkage */
1090 shsize = sizeof(struct uma_slab);
1091 }
1092
1093 keg->uk_ipers = (UMA_SLAB_SIZE - shsize) / rsize;
1094 KASSERT(keg->uk_ipers != 0, ("zone_small_init: ipers is 0"));
1095 memused = keg->uk_ipers * rsize + shsize;
1096 wastedspace = UMA_SLAB_SIZE - memused;
1097
1098 /*
1099 * We can't do OFFPAGE if we're internal or if we've been
1100 * asked to not go to the VM for buckets. If we do this we
1101 * may end up going to the VM (kmem_map) for slabs which we
1102 * do not want to do if we're UMA_ZFLAG_CACHEONLY as a
1103 * result of UMA_ZONE_VM, which clearly forbids it.
1104 */
1105 if ((keg->uk_flags & UMA_ZFLAG_INTERNAL) ||
1106 (keg->uk_flags & UMA_ZFLAG_CACHEONLY))
1107 return;
1108
1109 if ((wastedspace >= UMA_MAX_WASTE) &&
1110 (keg->uk_ipers < (UMA_SLAB_SIZE / keg->uk_rsize))) {
1111 keg->uk_ipers = UMA_SLAB_SIZE / keg->uk_rsize;
1112 KASSERT(keg->uk_ipers <= 255,
1113 ("zone_small_init: keg->uk_ipers too high!"));
1114#ifdef UMA_DEBUG
1115 printf("UMA decided we need offpage slab headers for "
1116 "zone: %s, calculated wastedspace = %d, "
1117 "maximum wasted space allowed = %d, "
1118 "calculated ipers = %d, "
1119 "new wasted space = %d\n", zone->uz_name, wastedspace,
1120 UMA_MAX_WASTE, keg->uk_ipers,
1121 UMA_SLAB_SIZE - keg->uk_ipers * keg->uk_rsize);
1122#endif
1123 keg->uk_flags |= UMA_ZONE_OFFPAGE;
1124 if ((keg->uk_flags & UMA_ZONE_MALLOC) == 0)
1125 keg->uk_flags |= UMA_ZONE_HASH;
1126 }
1127}
1128
1129/*
1130 * Finish creating a large (> UMA_SLAB_SIZE) uma zone. Just give in and do
1131 * OFFPAGE for now. When I can allow for more dynamic slab sizes this will be
1132 * more complicated.
1133 *
1134 * Arguments
1135 * zone The zone we should initialize
1136 *
1137 * Returns
1138 * Nothing
1139 */
1140static void
1141zone_large_init(uma_zone_t zone)
1142{
1143 uma_keg_t keg;
1144 int pages;
1145
1146 keg = zone->uz_keg;
1147
1148 KASSERT(keg != NULL, ("Keg is null in zone_large_init"));
1149 KASSERT((keg->uk_flags & UMA_ZFLAG_CACHEONLY) == 0,
1150 ("zone_large_init: Cannot large-init a UMA_ZFLAG_CACHEONLY zone"));
1151
1152 pages = keg->uk_size / UMA_SLAB_SIZE;
1153
1154 /* Account for remainder */
1155 if ((pages * UMA_SLAB_SIZE) < keg->uk_size)
1156 pages++;
1157
1158 keg->uk_ppera = pages;
1159 keg->uk_ipers = 1;
1160
1161 keg->uk_flags |= UMA_ZONE_OFFPAGE;
1162 if ((keg->uk_flags & UMA_ZONE_MALLOC) == 0)
1163 keg->uk_flags |= UMA_ZONE_HASH;
1164
1165 keg->uk_rsize = keg->uk_size;
1166}
1167
1168/*
1169 * Keg header ctor. This initializes all fields, locks, etc. And inserts
1170 * the keg onto the global keg list.
1171 *
1172 * Arguments/Returns follow uma_ctor specifications
1173 * udata Actually uma_kctor_args
1174 */
1175static int
1176keg_ctor(void *mem, int size, void *udata, int flags)
1177{
1178 struct uma_kctor_args *arg = udata;
1179 uma_keg_t keg = mem;
1180 uma_zone_t zone;
1181
1182 bzero(keg, size);
1183 keg->uk_size = arg->size;
1184 keg->uk_init = arg->uminit;
1185 keg->uk_fini = arg->fini;
1186 keg->uk_align = arg->align;
1187 keg->uk_free = 0;
1188 keg->uk_pages = 0;
1189 keg->uk_flags = arg->flags;
1190 keg->uk_allocf = page_alloc;
1191 keg->uk_freef = page_free;
1192 keg->uk_recurse = 0;
1193 keg->uk_slabzone = NULL;
1194
1195 /*
1196 * The master zone is passed to us at keg-creation time.
1197 */
1198 zone = arg->zone;
1199 zone->uz_keg = keg;
1200
1201 if (arg->flags & UMA_ZONE_VM)
1202 keg->uk_flags |= UMA_ZFLAG_CACHEONLY;
1203
1204 if (arg->flags & UMA_ZONE_ZINIT)
1205 keg->uk_init = zero_init;
1206
1207 /*
1208 * The +UMA_FRITM_SZ added to uk_size is to account for the
1209 * linkage that is added to the size in zone_small_init(). If
1210 * we don't account for this here then we may end up in
1211 * zone_small_init() with a calculated 'ipers' of 0.
1212 */
1213 if (keg->uk_flags & UMA_ZONE_REFCNT) {
1214 if ((keg->uk_size+UMA_FRITMREF_SZ) >
1215 (UMA_SLAB_SIZE - sizeof(struct uma_slab_refcnt)))
1216 zone_large_init(zone);
1217 else
1218 zone_small_init(zone);
1219 } else {
1220 if ((keg->uk_size+UMA_FRITM_SZ) >
1221 (UMA_SLAB_SIZE - sizeof(struct uma_slab)))
1222 zone_large_init(zone);
1223 else
1224 zone_small_init(zone);
1225 }
1226
1227 if (keg->uk_flags & UMA_ZONE_OFFPAGE) {
1228 if (keg->uk_flags & UMA_ZONE_REFCNT)
1229 keg->uk_slabzone = slabrefzone;
1230 else
1231 keg->uk_slabzone = slabzone;
1232 }
1233
1234 /*
1235 * If we haven't booted yet we need allocations to go through the
1236 * startup cache until the vm is ready.
1237 */
1238 if (keg->uk_ppera == 1) {
1239#ifdef UMA_MD_SMALL_ALLOC
1240 keg->uk_allocf = uma_small_alloc;
1241 keg->uk_freef = uma_small_free;
1242#endif
1243 if (booted == 0)
1244 keg->uk_allocf = startup_alloc;
1245 }
1246
1247 /*
1248 * Initialize keg's lock (shared among zones) through
1249 * Master zone
1250 */
1251 zone->uz_lock = &keg->uk_lock;
1252 if (arg->flags & UMA_ZONE_MTXCLASS)
1253 ZONE_LOCK_INIT(zone, 1);
1254 else
1255 ZONE_LOCK_INIT(zone, 0);
1256
1257 /*
1258 * If we're putting the slab header in the actual page we need to
1259 * figure out where in each page it goes. This calculates a right
1260 * justified offset into the memory on an ALIGN_PTR boundary.
1261 */
1262 if (!(keg->uk_flags & UMA_ZONE_OFFPAGE)) {
1263 u_int totsize;
1264
1265 /* Size of the slab struct and free list */
1266 if (keg->uk_flags & UMA_ZONE_REFCNT)
1267 totsize = sizeof(struct uma_slab_refcnt) +
1268 keg->uk_ipers * UMA_FRITMREF_SZ;
1269 else
1270 totsize = sizeof(struct uma_slab) +
1271 keg->uk_ipers * UMA_FRITM_SZ;
1272
1273 if (totsize & UMA_ALIGN_PTR)
1274 totsize = (totsize & ~UMA_ALIGN_PTR) +
1275 (UMA_ALIGN_PTR + 1);
1276 keg->uk_pgoff = UMA_SLAB_SIZE - totsize;
1277
1278 if (keg->uk_flags & UMA_ZONE_REFCNT)
1279 totsize = keg->uk_pgoff + sizeof(struct uma_slab_refcnt)
1280 + keg->uk_ipers * UMA_FRITMREF_SZ;
1281 else
1282 totsize = keg->uk_pgoff + sizeof(struct uma_slab)
1283 + keg->uk_ipers * UMA_FRITM_SZ;
1284
1285 /*
1286 * The only way the following is possible is if with our
1287 * UMA_ALIGN_PTR adjustments we are now bigger than
1288 * UMA_SLAB_SIZE. I haven't checked whether this is
1289 * mathematically possible for all cases, so we make
1290 * sure here anyway.
1291 */
1292 if (totsize > UMA_SLAB_SIZE) {
1293 printf("zone %s ipers %d rsize %d size %d\n",
1294 zone->uz_name, keg->uk_ipers, keg->uk_rsize,
1295 keg->uk_size);
1296 panic("UMA slab won't fit.\n");
1297 }
1298 }
1299
1300 if (keg->uk_flags & UMA_ZONE_HASH)
1301 hash_alloc(&keg->uk_hash);
1302
1303#ifdef UMA_DEBUG
1304 printf("%s(%p) size = %d ipers = %d ppera = %d pgoff = %d\n",
1305 zone->uz_name, zone,
1306 keg->uk_size, keg->uk_ipers,
1307 keg->uk_ppera, keg->uk_pgoff);
1308#endif
1309
1310 LIST_INSERT_HEAD(&keg->uk_zones, zone, uz_link);
1311
1312 mtx_lock(&uma_mtx);
1313 LIST_INSERT_HEAD(&uma_kegs, keg, uk_link);
1314 mtx_unlock(&uma_mtx);
1315 return (0);
1316}
1317
1318/*
1319 * Zone header ctor. This initializes all fields, locks, etc.
1320 *
1321 * Arguments/Returns follow uma_ctor specifications
1322 * udata Actually uma_zctor_args
1323 */
1324
1325static int
1326zone_ctor(void *mem, int size, void *udata, int flags)
1327{
1328 struct uma_zctor_args *arg = udata;
1329 uma_zone_t zone = mem;
1330 uma_zone_t z;
1331 uma_keg_t keg;
1332
1333 bzero(zone, size);
1334 zone->uz_name = arg->name;
1335 zone->uz_ctor = arg->ctor;
1336 zone->uz_dtor = arg->dtor;
1337 zone->uz_init = NULL;
1338 zone->uz_fini = NULL;
1339 zone->uz_allocs = 0;
1340 zone->uz_frees = 0;
1341 zone->uz_fails = 0;
1342 zone->uz_fills = zone->uz_count = 0;
1343
1344 if (arg->flags & UMA_ZONE_SECONDARY) {
1345 KASSERT(arg->keg != NULL, ("Secondary zone on zero'd keg"));
1346 keg = arg->keg;
1347 zone->uz_keg = keg;
1348 zone->uz_init = arg->uminit;
1349 zone->uz_fini = arg->fini;
1350 zone->uz_lock = &keg->uk_lock;
1351 mtx_lock(&uma_mtx);
1352 ZONE_LOCK(zone);
1353 keg->uk_flags |= UMA_ZONE_SECONDARY;
1354 LIST_FOREACH(z, &keg->uk_zones, uz_link) {
1355 if (LIST_NEXT(z, uz_link) == NULL) {
1356 LIST_INSERT_AFTER(z, zone, uz_link);
1357 break;
1358 }
1359 }
1360 ZONE_UNLOCK(zone);
1361 mtx_unlock(&uma_mtx);
1362 } else if (arg->keg == NULL) {
1363 if (uma_kcreate(zone, arg->size, arg->uminit, arg->fini,
1364 arg->align, arg->flags) == NULL)
1365 return (ENOMEM);
1366 } else {
1367 struct uma_kctor_args karg;
1368 int error;
1369
1370 /* We should only be here from uma_startup() */
1371 karg.size = arg->size;
1372 karg.uminit = arg->uminit;
1373 karg.fini = arg->fini;
1374 karg.align = arg->align;
1375 karg.flags = arg->flags;
1376 karg.zone = zone;
1377 error = keg_ctor(arg->keg, sizeof(struct uma_keg), &karg,
1378 flags);
1379 if (error)
1380 return (error);
1381 }
1382 keg = zone->uz_keg;
1383 zone->uz_lock = &keg->uk_lock;
1384
1385 /*
1386 * Some internal zones don't have room allocated for the per cpu
1387 * caches. If we're internal, bail out here.
1388 */
1389 if (keg->uk_flags & UMA_ZFLAG_INTERNAL) {
1390 KASSERT((keg->uk_flags & UMA_ZONE_SECONDARY) == 0,
1391 ("Secondary zone requested UMA_ZFLAG_INTERNAL"));
1392 return (0);
1393 }
1394
1395 if (keg->uk_flags & UMA_ZONE_MAXBUCKET)
1396 zone->uz_count = BUCKET_MAX;
1397 else if (keg->uk_ipers <= BUCKET_MAX)
1398 zone->uz_count = keg->uk_ipers;
1399 else
1400 zone->uz_count = BUCKET_MAX;
1401 return (0);
1402}
1403
1404/*
1405 * Keg header dtor. This frees all data, destroys locks, frees the hash
1406 * table and removes the keg from the global list.
1407 *
1408 * Arguments/Returns follow uma_dtor specifications
1409 * udata unused
1410 */
1411static void
1412keg_dtor(void *arg, int size, void *udata)
1413{
1414 uma_keg_t keg;
1415
1416 keg = (uma_keg_t)arg;
1417 mtx_lock(&keg->uk_lock);
1418 if (keg->uk_free != 0) {
1419 printf("Freed UMA keg was not empty (%d items). "
1420 " Lost %d pages of memory.\n",
1421 keg->uk_free, keg->uk_pages);
1422 }
1423 mtx_unlock(&keg->uk_lock);
1424
1425 if (keg->uk_flags & UMA_ZONE_HASH)
1426 hash_free(&keg->uk_hash);
1427
1428 mtx_destroy(&keg->uk_lock);
1429}
1430
1431/*
1432 * Zone header dtor.
1433 *
1434 * Arguments/Returns follow uma_dtor specifications
1435 * udata unused
1436 */
1437static void
1438zone_dtor(void *arg, int size, void *udata)
1439{
1440 uma_zone_t zone;
1441 uma_keg_t keg;
1442
1443 zone = (uma_zone_t)arg;
1444 keg = zone->uz_keg;
1445
1446 if (!(keg->uk_flags & UMA_ZFLAG_INTERNAL))
1447 cache_drain(zone);
1448
1449 mtx_lock(&uma_mtx);
1450 zone_drain(zone);
1451 if (keg->uk_flags & UMA_ZONE_SECONDARY) {
1452 LIST_REMOVE(zone, uz_link);
1453 /*
1454 * XXX there are some races here where
1455 * the zone can be drained but zone lock
1456 * released and then refilled before we
1457 * remove it... we dont care for now
1458 */
1459 ZONE_LOCK(zone);
1460 if (LIST_EMPTY(&keg->uk_zones))
1461 keg->uk_flags &= ~UMA_ZONE_SECONDARY;
1462 ZONE_UNLOCK(zone);
1463 mtx_unlock(&uma_mtx);
1464 } else {
1465 LIST_REMOVE(keg, uk_link);
1466 LIST_REMOVE(zone, uz_link);
1467 mtx_unlock(&uma_mtx);
1468 uma_zfree_internal(kegs, keg, NULL, SKIP_NONE,
1469 ZFREE_STATFREE);
1470 }
1471 zone->uz_keg = NULL;
1472}
1473
1474/*
1475 * Traverses every zone in the system and calls a callback
1476 *
1477 * Arguments:
1478 * zfunc A pointer to a function which accepts a zone
1479 * as an argument.
1480 *
1481 * Returns:
1482 * Nothing
1483 */
1484static void
1485zone_foreach(void (*zfunc)(uma_zone_t))
1486{
1487 uma_keg_t keg;
1488 uma_zone_t zone;
1489
1490 mtx_lock(&uma_mtx);
1491 LIST_FOREACH(keg, &uma_kegs, uk_link) {
1492 LIST_FOREACH(zone, &keg->uk_zones, uz_link)
1493 zfunc(zone);
1494 }
1495 mtx_unlock(&uma_mtx);
1496}
1497
1498/* Public functions */
1499/* See uma.h */
1500void
1501uma_startup(void *bootmem, int boot_pages)
1502{
1503 struct uma_zctor_args args;
1504 uma_slab_t slab;
1505 u_int slabsize;
1506 u_int objsize, totsize, wsize;
1507 int i;
1508
1509#ifdef UMA_DEBUG
1510 printf("Creating uma keg headers zone and keg.\n");
1511#endif
1512 mtx_init(&uma_mtx, "UMA lock", NULL, MTX_DEF);
1513
1514 /*
1515 * Figure out the maximum number of items-per-slab we'll have if
1516 * we're using the OFFPAGE slab header to track free items, given
1517 * all possible object sizes and the maximum desired wastage
1518 * (UMA_MAX_WASTE).
1519 *
1520 * We iterate until we find an object size for
1521 * which the calculated wastage in zone_small_init() will be
1522 * enough to warrant OFFPAGE. Since wastedspace versus objsize
1523 * is an overall increasing see-saw function, we find the smallest
1524 * objsize such that the wastage is always acceptable for objects
1525 * with that objsize or smaller. Since a smaller objsize always
1526 * generates a larger possible uma_max_ipers, we use this computed
1527 * objsize to calculate the largest ipers possible. Since the
1528 * ipers calculated for OFFPAGE slab headers is always larger than
1529 * the ipers initially calculated in zone_small_init(), we use
1530 * the former's equation (UMA_SLAB_SIZE / keg->uk_rsize) to
1531 * obtain the maximum ipers possible for offpage slab headers.
1532 *
1533 * It should be noted that ipers versus objsize is an inversly
1534 * proportional function which drops off rather quickly so as
1535 * long as our UMA_MAX_WASTE is such that the objsize we calculate
1536 * falls into the portion of the inverse relation AFTER the steep
1537 * falloff, then uma_max_ipers shouldn't be too high (~10 on i386).
1538 *
1539 * Note that we have 8-bits (1 byte) to use as a freelist index
1540 * inside the actual slab header itself and this is enough to
1541 * accomodate us. In the worst case, a UMA_SMALLEST_UNIT sized
1542 * object with offpage slab header would have ipers =
1543 * UMA_SLAB_SIZE / UMA_SMALLEST_UNIT (currently = 256), which is
1544 * 1 greater than what our byte-integer freelist index can
1545 * accomodate, but we know that this situation never occurs as
1546 * for UMA_SMALLEST_UNIT-sized objects, we will never calculate
1547 * that we need to go to offpage slab headers. Or, if we do,
1548 * then we trap that condition below and panic in the INVARIANTS case.
1549 */
1550 wsize = UMA_SLAB_SIZE - sizeof(struct uma_slab) - UMA_MAX_WASTE;
1551 totsize = wsize;
1552 objsize = UMA_SMALLEST_UNIT;
1553 while (totsize >= wsize) {
1554 totsize = (UMA_SLAB_SIZE - sizeof(struct uma_slab)) /
1555 (objsize + UMA_FRITM_SZ);
1556 totsize *= (UMA_FRITM_SZ + objsize);
1557 objsize++;
1558 }
1559 if (objsize > UMA_SMALLEST_UNIT)
1560 objsize--;
1561 uma_max_ipers = UMA_SLAB_SIZE / objsize;
1562
1563 wsize = UMA_SLAB_SIZE - sizeof(struct uma_slab_refcnt) - UMA_MAX_WASTE;
1564 totsize = wsize;
1565 objsize = UMA_SMALLEST_UNIT;
1566 while (totsize >= wsize) {
1567 totsize = (UMA_SLAB_SIZE - sizeof(struct uma_slab_refcnt)) /
1568 (objsize + UMA_FRITMREF_SZ);
1569 totsize *= (UMA_FRITMREF_SZ + objsize);
1570 objsize++;
1571 }
1572 if (objsize > UMA_SMALLEST_UNIT)
1573 objsize--;
1574 uma_max_ipers_ref = UMA_SLAB_SIZE / objsize;
1575
1576 KASSERT((uma_max_ipers_ref <= 255) && (uma_max_ipers <= 255),
1577 ("uma_startup: calculated uma_max_ipers values too large!"));
1578
1579#ifdef UMA_DEBUG
1580 printf("Calculated uma_max_ipers (for OFFPAGE) is %d\n", uma_max_ipers);
1581 printf("Calculated uma_max_ipers_slab (for OFFPAGE) is %d\n",
1582 uma_max_ipers_ref);
1583#endif
1584
1585 /* "manually" create the initial zone */
1586 args.name = "UMA Kegs";
1587 args.size = sizeof(struct uma_keg);
1588 args.ctor = keg_ctor;
1589 args.dtor = keg_dtor;
1590 args.uminit = zero_init;
1591 args.fini = NULL;
1592 args.keg = &masterkeg;
1593 args.align = 32 - 1;
1594 args.flags = UMA_ZFLAG_INTERNAL;
1595 /* The initial zone has no Per cpu queues so it's smaller */
1596 zone_ctor(kegs, sizeof(struct uma_zone), &args, M_WAITOK);
1597
1598#ifdef UMA_DEBUG
1599 printf("Filling boot free list.\n");
1600#endif
1601 for (i = 0; i < boot_pages; i++) {
1602 slab = (uma_slab_t)((u_int8_t *)bootmem + (i * UMA_SLAB_SIZE));
1603 slab->us_data = (u_int8_t *)slab;
1604 slab->us_flags = UMA_SLAB_BOOT;
1605 LIST_INSERT_HEAD(&uma_boot_pages, slab, us_link);
1606 }
1607 mtx_init(&uma_boot_pages_mtx, "UMA boot pages", NULL, MTX_DEF);
1608
1609#ifdef UMA_DEBUG
1610 printf("Creating uma zone headers zone and keg.\n");
1611#endif
1612 args.name = "UMA Zones";
1613 args.size = sizeof(struct uma_zone) +
1614 (sizeof(struct uma_cache) * (mp_maxid + 1));
1615 args.ctor = zone_ctor;
1616 args.dtor = zone_dtor;
1617 args.uminit = zero_init;
1618 args.fini = NULL;
1619 args.keg = NULL;
1620 args.align = 32 - 1;
1621 args.flags = UMA_ZFLAG_INTERNAL;
1622 /* The initial zone has no Per cpu queues so it's smaller */
1623 zone_ctor(zones, sizeof(struct uma_zone), &args, M_WAITOK);
1624
1625#ifdef UMA_DEBUG
1626 printf("Initializing pcpu cache locks.\n");
1627#endif
1628#ifdef UMA_DEBUG
1629 printf("Creating slab and hash zones.\n");
1630#endif
1631
1632 /*
1633 * This is the max number of free list items we'll have with
1634 * offpage slabs.
1635 */
1636 slabsize = uma_max_ipers * UMA_FRITM_SZ;
1637 slabsize += sizeof(struct uma_slab);
1638
1639 /* Now make a zone for slab headers */
1640 slabzone = uma_zcreate("UMA Slabs",
1641 slabsize,
1642 NULL, NULL, NULL, NULL,
1643 UMA_ALIGN_PTR, UMA_ZFLAG_INTERNAL);
1644
1645 /*
1646 * We also create a zone for the bigger slabs with reference
1647 * counts in them, to accomodate UMA_ZONE_REFCNT zones.
1648 */
1649 slabsize = uma_max_ipers_ref * UMA_FRITMREF_SZ;
1650 slabsize += sizeof(struct uma_slab_refcnt);
1651 slabrefzone = uma_zcreate("UMA RCntSlabs",
1652 slabsize,
1653 NULL, NULL, NULL, NULL,
1654 UMA_ALIGN_PTR,
1655 UMA_ZFLAG_INTERNAL);
1656
1657 hashzone = uma_zcreate("UMA Hash",
1658 sizeof(struct slabhead *) * UMA_HASH_SIZE_INIT,
1659 NULL, NULL, NULL, NULL,
1660 UMA_ALIGN_PTR, UMA_ZFLAG_INTERNAL);
1661
1662 bucket_init();
1663
1664#ifdef UMA_MD_SMALL_ALLOC
1665 booted = 1;
1666#endif
1667
1668#ifdef UMA_DEBUG
1669 printf("UMA startup complete.\n");
1670#endif
1671}
1672
1673/* see uma.h */
1674void
1675uma_startup2(void)
1676{
1677 booted = 1;
1678 bucket_enable();
1679#ifdef UMA_DEBUG
1680 printf("UMA startup2 complete.\n");
1681#endif
1682}
1683
1684/*
1685 * Initialize our callout handle
1686 *
1687 */
1688
1689static void
1690uma_startup3(void)
1691{
1692#ifdef UMA_DEBUG
1693 printf("Starting callout.\n");
1694#endif
1695 callout_init(&uma_callout, CALLOUT_MPSAFE);
1696 callout_reset(&uma_callout, UMA_TIMEOUT * hz, uma_timeout, NULL);
1697#ifdef UMA_DEBUG
1698 printf("UMA startup3 complete.\n");
1699#endif
1700}
1701
1702static uma_zone_t
1703uma_kcreate(uma_zone_t zone, size_t size, uma_init uminit, uma_fini fini,
1704 int align, u_int32_t flags)
1705{
1706 struct uma_kctor_args args;
1707
1708 args.size = size;
1709 args.uminit = uminit;
1710 args.fini = fini;
1711 args.align = align;
1712 args.flags = flags;
1713 args.zone = zone;
1714 return (uma_zalloc_internal(kegs, &args, M_WAITOK));
1715}
1716
1717/* See uma.h */
1718uma_zone_t
1719uma_zcreate(char *name, size_t size, uma_ctor ctor, uma_dtor dtor,
1720 uma_init uminit, uma_fini fini, int align, u_int32_t flags)
1721
1722{
1723 struct uma_zctor_args args;
1724
1725 /* This stuff is essential for the zone ctor */
1726 args.name = name;
1727 args.size = size;
1728 args.ctor = ctor;
1729 args.dtor = dtor;
1730 args.uminit = uminit;
1731 args.fini = fini;
1732 args.align = align;
1733 args.flags = flags;
1734 args.keg = NULL;
1735
1736 return (uma_zalloc_internal(zones, &args, M_WAITOK));
1737}
1738
1739/* See uma.h */
1740uma_zone_t
1741uma_zsecond_create(char *name, uma_ctor ctor, uma_dtor dtor,
1742 uma_init zinit, uma_fini zfini, uma_zone_t master)
1743{
1744 struct uma_zctor_args args;
1745
1746 args.name = name;
1747 args.size = master->uz_keg->uk_size;
1748 args.ctor = ctor;
1749 args.dtor = dtor;
1750 args.uminit = zinit;
1751 args.fini = zfini;
1752 args.align = master->uz_keg->uk_align;
1753 args.flags = master->uz_keg->uk_flags | UMA_ZONE_SECONDARY;
1754 args.keg = master->uz_keg;
1755
1756 return (uma_zalloc_internal(zones, &args, M_WAITOK));
1757}
1758
1759/* See uma.h */
1760void
1761uma_zdestroy(uma_zone_t zone)
1762{
1763
1764 uma_zfree_internal(zones, zone, NULL, SKIP_NONE, ZFREE_STATFREE);
1765}
1766
1767/* See uma.h */
1768void *
1769uma_zalloc_arg(uma_zone_t zone, void *udata, int flags)
1770{
1771 void *item;
1772 uma_cache_t cache;
1773 uma_bucket_t bucket;
1774 int cpu;
1775 int badness;
1776
1777 /* This is the fast path allocation */
1778#ifdef UMA_DEBUG_ALLOC_1
1779 printf("Allocating one item from %s(%p)\n", zone->uz_name, zone);
1780#endif
1781 CTR3(KTR_UMA, "uma_zalloc_arg thread %x zone %s flags %d", curthread,
1782 zone->uz_name, flags);
1783
1784 if (!(flags & M_NOWAIT)) {
1785 KASSERT(curthread->td_intr_nesting_level == 0,
1786 ("malloc(M_WAITOK) in interrupt context"));
1787 if (nosleepwithlocks) {
1788#ifdef WITNESS
1789 badness = WITNESS_CHECK(WARN_GIANTOK | WARN_SLEEPOK,
1790 NULL,
1791 "malloc(M_WAITOK) of \"%s\", forcing M_NOWAIT",
1792 zone->uz_name);
1793#else
1794 badness = 1;
1795#endif
1796 } else {
1797 badness = 0;
1798 WITNESS_WARN(WARN_GIANTOK | WARN_SLEEPOK, NULL,
1799 "malloc(M_WAITOK) of \"%s\"", zone->uz_name);
1800 }
1801 if (badness) {
1802 flags &= ~M_WAITOK;
1803 flags |= M_NOWAIT;
1804 }
1805 }
1806
1807 /*
1808 * If possible, allocate from the per-CPU cache. There are two
1809 * requirements for safe access to the per-CPU cache: (1) the thread
1810 * accessing the cache must not be preempted or yield during access,
1811 * and (2) the thread must not migrate CPUs without switching which
1812 * cache it accesses. We rely on a critical section to prevent
1813 * preemption and migration. We release the critical section in
1814 * order to acquire the zone mutex if we are unable to allocate from
1815 * the current cache; when we re-acquire the critical section, we
1816 * must detect and handle migration if it has occurred.
1817 */
1818zalloc_restart:
1819 critical_enter();
1820 cpu = curcpu;
1821 cache = &zone->uz_cpu[cpu];
1822
1823zalloc_start:
1824 bucket = cache->uc_allocbucket;
1825
1826 if (bucket) {
1827 if (bucket->ub_cnt > 0) {
1828 bucket->ub_cnt--;
1829 item = bucket->ub_bucket[bucket->ub_cnt];
1830#ifdef INVARIANTS
1831 bucket->ub_bucket[bucket->ub_cnt] = NULL;
1832#endif
1833 KASSERT(item != NULL,
1834 ("uma_zalloc: Bucket pointer mangled."));
1835 cache->uc_allocs++;
1836 critical_exit();
1837#ifdef INVARIANTS
1838 ZONE_LOCK(zone);
1839 uma_dbg_alloc(zone, NULL, item);
1840 ZONE_UNLOCK(zone);
1841#endif
1842 if (zone->uz_ctor != NULL) {
1843 if (zone->uz_ctor(item, zone->uz_keg->uk_size,
1844 udata, flags) != 0) {
1845 uma_zfree_internal(zone, item, udata,
1846 SKIP_DTOR, ZFREE_STATFAIL |
1847 ZFREE_STATFREE);
1848 return (NULL);
1849 }
1850 }
1851 if (flags & M_ZERO)
1852 bzero(item, zone->uz_keg->uk_size);
1853 return (item);
1854 } else if (cache->uc_freebucket) {
1855 /*
1856 * We have run out of items in our allocbucket.
1857 * See if we can switch with our free bucket.
1858 */
1859 if (cache->uc_freebucket->ub_cnt > 0) {
1860#ifdef UMA_DEBUG_ALLOC
1861 printf("uma_zalloc: Swapping empty with"
1862 " alloc.\n");
1863#endif
1864 bucket = cache->uc_freebucket;
1865 cache->uc_freebucket = cache->uc_allocbucket;
1866 cache->uc_allocbucket = bucket;
1867
1868 goto zalloc_start;
1869 }
1870 }
1871 }
1872 /*
1873 * Attempt to retrieve the item from the per-CPU cache has failed, so
1874 * we must go back to the zone. This requires the zone lock, so we
1875 * must drop the critical section, then re-acquire it when we go back
1876 * to the cache. Since the critical section is released, we may be
1877 * preempted or migrate. As such, make sure not to maintain any
1878 * thread-local state specific to the cache from prior to releasing
1879 * the critical section.
1880 */
1881 critical_exit();
1882 ZONE_LOCK(zone);
1883 critical_enter();
1884 cpu = curcpu;
1885 cache = &zone->uz_cpu[cpu];
1886 bucket = cache->uc_allocbucket;
1887 if (bucket != NULL) {
1888 if (bucket->ub_cnt > 0) {
1889 ZONE_UNLOCK(zone);
1890 goto zalloc_start;
1891 }
1892 bucket = cache->uc_freebucket;
1893 if (bucket != NULL && bucket->ub_cnt > 0) {
1894 ZONE_UNLOCK(zone);
1895 goto zalloc_start;
1896 }
1897 }
1898
1899 /* Since we have locked the zone we may as well send back our stats */
1900 zone->uz_allocs += cache->uc_allocs;
1901 cache->uc_allocs = 0;
1902 zone->uz_frees += cache->uc_frees;
1903 cache->uc_frees = 0;
1904
1905 /* Our old one is now a free bucket */
1906 if (cache->uc_allocbucket) {
1907 KASSERT(cache->uc_allocbucket->ub_cnt == 0,
1908 ("uma_zalloc_arg: Freeing a non free bucket."));
1909 LIST_INSERT_HEAD(&zone->uz_free_bucket,
1910 cache->uc_allocbucket, ub_link);
1911 cache->uc_allocbucket = NULL;
1912 }
1913
1914 /* Check the free list for a new alloc bucket */
1915 if ((bucket = LIST_FIRST(&zone->uz_full_bucket)) != NULL) {
1916 KASSERT(bucket->ub_cnt != 0,
1917 ("uma_zalloc_arg: Returning an empty bucket."));
1918
1919 LIST_REMOVE(bucket, ub_link);
1920 cache->uc_allocbucket = bucket;
1921 ZONE_UNLOCK(zone);
1922 goto zalloc_start;
1923 }
1924 /* We are no longer associated with this CPU. */
1925 critical_exit();
1926
1927 /* Bump up our uz_count so we get here less */
1928 if (zone->uz_count < BUCKET_MAX)
1929 zone->uz_count++;
1930
1931 /*
1932 * Now lets just fill a bucket and put it on the free list. If that
1933 * works we'll restart the allocation from the begining.
1934 */
1935 if (uma_zalloc_bucket(zone, flags)) {
1936 ZONE_UNLOCK(zone);
1937 goto zalloc_restart;
1938 }
1939 ZONE_UNLOCK(zone);
1940 /*
1941 * We may not be able to get a bucket so return an actual item.
1942 */
1943#ifdef UMA_DEBUG
1944 printf("uma_zalloc_arg: Bucketzone returned NULL\n");
1945#endif
1946
1947 return (uma_zalloc_internal(zone, udata, flags));
1948}
1949
1950static uma_slab_t
1951uma_zone_slab(uma_zone_t zone, int flags)
1952{
1953 uma_slab_t slab;
1954 uma_keg_t keg;
1955
1956 keg = zone->uz_keg;
1957
1958 /*
1959 * This is to prevent us from recursively trying to allocate
1960 * buckets. The problem is that if an allocation forces us to
1961 * grab a new bucket we will call page_alloc, which will go off
1962 * and cause the vm to allocate vm_map_entries. If we need new
1963 * buckets there too we will recurse in kmem_alloc and bad
1964 * things happen. So instead we return a NULL bucket, and make
1965 * the code that allocates buckets smart enough to deal with it
1966 *
1967 * XXX: While we want this protection for the bucket zones so that
1968 * recursion from the VM is handled (and the calling code that
1969 * allocates buckets knows how to deal with it), we do not want
1970 * to prevent allocation from the slab header zones (slabzone
1971 * and slabrefzone) if uk_recurse is not zero for them. The
1972 * reason is that it could lead to NULL being returned for
1973 * slab header allocations even in the M_WAITOK case, and the
1974 * caller can't handle that.
1975 */
1976 if (keg->uk_flags & UMA_ZFLAG_INTERNAL && keg->uk_recurse != 0)
1977 if ((zone != slabzone) && (zone != slabrefzone))
1978 return (NULL);
1979
1980 slab = NULL;
1981
1982 for (;;) {
1983 /*
1984 * Find a slab with some space. Prefer slabs that are partially
1985 * used over those that are totally full. This helps to reduce
1986 * fragmentation.
1987 */
1988 if (keg->uk_free != 0) {
1989 if (!LIST_EMPTY(&keg->uk_part_slab)) {
1990 slab = LIST_FIRST(&keg->uk_part_slab);
1991 } else {
1992 slab = LIST_FIRST(&keg->uk_free_slab);
1993 LIST_REMOVE(slab, us_link);
1994 LIST_INSERT_HEAD(&keg->uk_part_slab, slab,
1995 us_link);
1996 }
1997 return (slab);
1998 }
1999
2000 /*
2001 * M_NOVM means don't ask at all!
2002 */
2003 if (flags & M_NOVM)
2004 break;
2005
2006 if (keg->uk_maxpages &&
2007 keg->uk_pages >= keg->uk_maxpages) {
2008 keg->uk_flags |= UMA_ZFLAG_FULL;
2009
2010 if (flags & M_NOWAIT)
2011 break;
2012 else
2013 msleep(keg, &keg->uk_lock, PVM,
2014 "zonelimit", 0);
2015 continue;
2016 }
2017 keg->uk_recurse++;
2018 slab = slab_zalloc(zone, flags);
2019 keg->uk_recurse--;
2020
2021 /*
2022 * If we got a slab here it's safe to mark it partially used
2023 * and return. We assume that the caller is going to remove
2024 * at least one item.
2025 */
2026 if (slab) {
2027 LIST_INSERT_HEAD(&keg->uk_part_slab, slab, us_link);
2028 return (slab);
2029 }
2030 /*
2031 * We might not have been able to get a slab but another cpu
2032 * could have while we were unlocked. Check again before we
2033 * fail.
2034 */
2035 if (flags & M_NOWAIT)
2036 flags |= M_NOVM;
2037 }
2038 return (slab);
2039}
2040
2041static void *
2042uma_slab_alloc(uma_zone_t zone, uma_slab_t slab)
2043{
2044 uma_keg_t keg;
2045 uma_slabrefcnt_t slabref;
2046 void *item;
2047 u_int8_t freei;
2048
2049 keg = zone->uz_keg;
2050
2051 freei = slab->us_firstfree;
2052 if (keg->uk_flags & UMA_ZONE_REFCNT) {
2053 slabref = (uma_slabrefcnt_t)slab;
2054 slab->us_firstfree = slabref->us_freelist[freei].us_item;
2055 } else {
2056 slab->us_firstfree = slab->us_freelist[freei].us_item;
2057 }
2058 item = slab->us_data + (keg->uk_rsize * freei);
2059
2060 slab->us_freecount--;
2061 keg->uk_free--;
2062#ifdef INVARIANTS
2063 uma_dbg_alloc(zone, slab, item);
2064#endif
2065 /* Move this slab to the full list */
2066 if (slab->us_freecount == 0) {
2067 LIST_REMOVE(slab, us_link);
2068 LIST_INSERT_HEAD(&keg->uk_full_slab, slab, us_link);
2069 }
2070
2071 return (item);
2072}
2073
2074static int
2075uma_zalloc_bucket(uma_zone_t zone, int flags)
2076{
2077 uma_bucket_t bucket;
2078 uma_slab_t slab;
2079 int16_t saved;
2080 int max, origflags = flags;
2081
2082 /*
2083 * Try this zone's free list first so we don't allocate extra buckets.
2084 */
2085 if ((bucket = LIST_FIRST(&zone->uz_free_bucket)) != NULL) {
2086 KASSERT(bucket->ub_cnt == 0,
2087 ("uma_zalloc_bucket: Bucket on free list is not empty."));
2088 LIST_REMOVE(bucket, ub_link);
2089 } else {
2090 int bflags;
2091
2092 bflags = (flags & ~M_ZERO);
2093 if (zone->uz_keg->uk_flags & UMA_ZFLAG_CACHEONLY)
2094 bflags |= M_NOVM;
2095
2096 ZONE_UNLOCK(zone);
2097 bucket = bucket_alloc(zone->uz_count, bflags);
2098 ZONE_LOCK(zone);
2099 }
2100
2101 if (bucket == NULL)
2102 return (0);
2103
2104#ifdef SMP
2105 /*
2106 * This code is here to limit the number of simultaneous bucket fills
2107 * for any given zone to the number of per cpu caches in this zone. This
2108 * is done so that we don't allocate more memory than we really need.
2109 */
2110 if (zone->uz_fills >= mp_ncpus)
2111 goto done;
2112
2113#endif
2114 zone->uz_fills++;
2115
2116 max = MIN(bucket->ub_entries, zone->uz_count);
2117 /* Try to keep the buckets totally full */
2118 saved = bucket->ub_cnt;
2119 while (bucket->ub_cnt < max &&
2120 (slab = uma_zone_slab(zone, flags)) != NULL) {
2121 while (slab->us_freecount && bucket->ub_cnt < max) {
2122 bucket->ub_bucket[bucket->ub_cnt++] =
2123 uma_slab_alloc(zone, slab);
2124 }
2125
2126 /* Don't block on the next fill */
2127 flags |= M_NOWAIT;
2128 }
2129
2130 /*
2131 * We unlock here because we need to call the zone's init.
2132 * It should be safe to unlock because the slab dealt with
2133 * above is already on the appropriate list within the keg
2134 * and the bucket we filled is not yet on any list, so we
2135 * own it.
2136 */
2137 if (zone->uz_init != NULL) {
2138 int i;
2139
2140 ZONE_UNLOCK(zone);
2141 for (i = saved; i < bucket->ub_cnt; i++)
2142 if (zone->uz_init(bucket->ub_bucket[i],
2143 zone->uz_keg->uk_size, origflags) != 0)
2144 break;
2145 /*
2146 * If we couldn't initialize the whole bucket, put the
2147 * rest back onto the freelist.
2148 */
2149 if (i != bucket->ub_cnt) {
2150 int j;
2151
2152 for (j = i; j < bucket->ub_cnt; j++) {
2153 uma_zfree_internal(zone, bucket->ub_bucket[j],
2154 NULL, SKIP_FINI, 0);
2155#ifdef INVARIANTS
2156 bucket->ub_bucket[j] = NULL;
2157#endif
2158 }
2159 bucket->ub_cnt = i;
2160 }
2161 ZONE_LOCK(zone);
2162 }
2163
2164 zone->uz_fills--;
2165 if (bucket->ub_cnt != 0) {
2166 LIST_INSERT_HEAD(&zone->uz_full_bucket,
2167 bucket, ub_link);
2168 return (1);
2169 }
2170#ifdef SMP
2171done:
2172#endif
2173 bucket_free(bucket);
2174
2175 return (0);
2176}
2177/*
2178 * Allocates an item for an internal zone
2179 *
2180 * Arguments
2181 * zone The zone to alloc for.
2182 * udata The data to be passed to the constructor.
2183 * flags M_WAITOK, M_NOWAIT, M_ZERO.
2184 *
2185 * Returns
2186 * NULL if there is no memory and M_NOWAIT is set
2187 * An item if successful
2188 */
2189
2190static void *
2191uma_zalloc_internal(uma_zone_t zone, void *udata, int flags)
2192{
2193 uma_keg_t keg;
2194 uma_slab_t slab;
2195 void *item;
2196
2197 item = NULL;
2198 keg = zone->uz_keg;
2199
2200#ifdef UMA_DEBUG_ALLOC
2201 printf("INTERNAL: Allocating one item from %s(%p)\n", zone->uz_name, zone);
2202#endif
2203 ZONE_LOCK(zone);
2204
2205 slab = uma_zone_slab(zone, flags);
2206 if (slab == NULL) {
2207 zone->uz_fails++;
2208 ZONE_UNLOCK(zone);
2209 return (NULL);
2210 }
2211
2212 item = uma_slab_alloc(zone, slab);
2213
2214 zone->uz_allocs++;
2215
2216 ZONE_UNLOCK(zone);
2217
2218 /*
2219 * We have to call both the zone's init (not the keg's init)
2220 * and the zone's ctor. This is because the item is going from
2221 * a keg slab directly to the user, and the user is expecting it
2222 * to be both zone-init'd as well as zone-ctor'd.
2223 */
2224 if (zone->uz_init != NULL) {
2225 if (zone->uz_init(item, keg->uk_size, flags) != 0) {
2226 uma_zfree_internal(zone, item, udata, SKIP_FINI,
2227 ZFREE_STATFAIL | ZFREE_STATFREE);
2228 return (NULL);
2229 }
2230 }
2231 if (zone->uz_ctor != NULL) {
2232 if (zone->uz_ctor(item, keg->uk_size, udata, flags) != 0) {
2233 uma_zfree_internal(zone, item, udata, SKIP_DTOR,
2234 ZFREE_STATFAIL | ZFREE_STATFREE);
2235 return (NULL);
2236 }
2237 }
2238 if (flags & M_ZERO)
2239 bzero(item, keg->uk_size);
2240
2241 return (item);
2242}
2243
2244/* See uma.h */
2245void
2246uma_zfree_arg(uma_zone_t zone, void *item, void *udata)
2247{
2248 uma_keg_t keg;
2249 uma_cache_t cache;
2250 uma_bucket_t bucket;
2251 int bflags;
2252 int cpu;
2253
2254 keg = zone->uz_keg;
2255
2256#ifdef UMA_DEBUG_ALLOC_1
2257 printf("Freeing item %p to %s(%p)\n", item, zone->uz_name, zone);
2258#endif
2259 CTR2(KTR_UMA, "uma_zfree_arg thread %x zone %s", curthread,
2260 zone->uz_name);
2261
2262 if (zone->uz_dtor)
2263 zone->uz_dtor(item, keg->uk_size, udata);
2264#ifdef INVARIANTS
2265 ZONE_LOCK(zone);
2266 if (keg->uk_flags & UMA_ZONE_MALLOC)
2267 uma_dbg_free(zone, udata, item);
2268 else
2269 uma_dbg_free(zone, NULL, item);
2270 ZONE_UNLOCK(zone);
2271#endif
2272 /*
2273 * The race here is acceptable. If we miss it we'll just have to wait
2274 * a little longer for the limits to be reset.
2275 */
2276 if (keg->uk_flags & UMA_ZFLAG_FULL)
2277 goto zfree_internal;
2278
2279 /*
2280 * If possible, free to the per-CPU cache. There are two
2281 * requirements for safe access to the per-CPU cache: (1) the thread
2282 * accessing the cache must not be preempted or yield during access,
2283 * and (2) the thread must not migrate CPUs without switching which
2284 * cache it accesses. We rely on a critical section to prevent
2285 * preemption and migration. We release the critical section in
2286 * order to acquire the zone mutex if we are unable to free to the
2287 * current cache; when we re-acquire the critical section, we must
2288 * detect and handle migration if it has occurred.
2289 */
2290zfree_restart:
2291 critical_enter();
2292 cpu = curcpu;
2293 cache = &zone->uz_cpu[cpu];
2294
2295zfree_start:
2296 bucket = cache->uc_freebucket;
2297
2298 if (bucket) {
2299 /*
2300 * Do we have room in our bucket? It is OK for this uz count
2301 * check to be slightly out of sync.
2302 */
2303
2304 if (bucket->ub_cnt < bucket->ub_entries) {
2305 KASSERT(bucket->ub_bucket[bucket->ub_cnt] == NULL,
2306 ("uma_zfree: Freeing to non free bucket index."));
2307 bucket->ub_bucket[bucket->ub_cnt] = item;
2308 bucket->ub_cnt++;
2309 cache->uc_frees++;
2310 critical_exit();
2311 return;
2312 } else if (cache->uc_allocbucket) {
2313#ifdef UMA_DEBUG_ALLOC
2314 printf("uma_zfree: Swapping buckets.\n");
2315#endif
2316 /*
2317 * We have run out of space in our freebucket.
2318 * See if we can switch with our alloc bucket.
2319 */
2320 if (cache->uc_allocbucket->ub_cnt <
2321 cache->uc_freebucket->ub_cnt) {
2322 bucket = cache->uc_freebucket;
2323 cache->uc_freebucket = cache->uc_allocbucket;
2324 cache->uc_allocbucket = bucket;
2325 goto zfree_start;
2326 }
2327 }
2328 }
2329 /*
2330 * We can get here for two reasons:
2331 *
2332 * 1) The buckets are NULL
2333 * 2) The alloc and free buckets are both somewhat full.
2334 *
2335 * We must go back the zone, which requires acquiring the zone lock,
2336 * which in turn means we must release and re-acquire the critical
2337 * section. Since the critical section is released, we may be
2338 * preempted or migrate. As such, make sure not to maintain any
2339 * thread-local state specific to the cache from prior to releasing
2340 * the critical section.
2341 */
2342 critical_exit();
2343 ZONE_LOCK(zone);
2344 critical_enter();
2345 cpu = curcpu;
2346 cache = &zone->uz_cpu[cpu];
2347 if (cache->uc_freebucket != NULL) {
2348 if (cache->uc_freebucket->ub_cnt <
2349 cache->uc_freebucket->ub_entries) {
2350 ZONE_UNLOCK(zone);
2351 goto zfree_start;
2352 }
2353 if (cache->uc_allocbucket != NULL &&
2354 (cache->uc_allocbucket->ub_cnt <
2355 cache->uc_freebucket->ub_cnt)) {
2356 ZONE_UNLOCK(zone);
2357 goto zfree_start;
2358 }
2359 }
2360
2361 /* Since we have locked the zone we may as well send back our stats */
2362 zone->uz_allocs += cache->uc_allocs;
2363 cache->uc_allocs = 0;
2364 zone->uz_frees += cache->uc_frees;
2365 cache->uc_frees = 0;
2366
2367 bucket = cache->uc_freebucket;
2368 cache->uc_freebucket = NULL;
2369
2370 /* Can we throw this on the zone full list? */
2371 if (bucket != NULL) {
2372#ifdef UMA_DEBUG_ALLOC
2373 printf("uma_zfree: Putting old bucket on the free list.\n");
2374#endif
2375 /* ub_cnt is pointing to the last free item */
2376 KASSERT(bucket->ub_cnt != 0,
2377 ("uma_zfree: Attempting to insert an empty bucket onto the full list.\n"));
2378 LIST_INSERT_HEAD(&zone->uz_full_bucket,
2379 bucket, ub_link);
2380 }
2381 if ((bucket = LIST_FIRST(&zone->uz_free_bucket)) != NULL) {
2382 LIST_REMOVE(bucket, ub_link);
2383 ZONE_UNLOCK(zone);
2384 cache->uc_freebucket = bucket;
2385 goto zfree_start;
2386 }
2387 /* We are no longer associated with this CPU. */
2388 critical_exit();
2389
2390 /* And the zone.. */
2391 ZONE_UNLOCK(zone);
2392
2393#ifdef UMA_DEBUG_ALLOC
2394 printf("uma_zfree: Allocating new free bucket.\n");
2395#endif
2396 bflags = M_NOWAIT;
2397
2398 if (keg->uk_flags & UMA_ZFLAG_CACHEONLY)
2399 bflags |= M_NOVM;
2400 bucket = bucket_alloc(zone->uz_count, bflags);
2401 if (bucket) {
2402 ZONE_LOCK(zone);
2403 LIST_INSERT_HEAD(&zone->uz_free_bucket,
2404 bucket, ub_link);
2405 ZONE_UNLOCK(zone);
2406 goto zfree_restart;
2407 }
2408
2409 /*
2410 * If nothing else caught this, we'll just do an internal free.
2411 */
2412zfree_internal:
2413 uma_zfree_internal(zone, item, udata, SKIP_DTOR, ZFREE_STATFREE);
2414
2415 return;
2416}
2417
2418/*
2419 * Frees an item to an INTERNAL zone or allocates a free bucket
2420 *
2421 * Arguments:
2422 * zone The zone to free to
2423 * item The item we're freeing
2424 * udata User supplied data for the dtor
2425 * skip Skip dtors and finis
2426 */
2427static void
2428uma_zfree_internal(uma_zone_t zone, void *item, void *udata,
2429 enum zfreeskip skip, int flags)
2430{
2431 uma_slab_t slab;
2432 uma_slabrefcnt_t slabref;
2433 uma_keg_t keg;
2434 u_int8_t *mem;
2435 u_int8_t freei;
2436
2437 keg = zone->uz_keg;
2438
2439 if (skip < SKIP_DTOR && zone->uz_dtor)
2440 zone->uz_dtor(item, keg->uk_size, udata);
2441 if (skip < SKIP_FINI && zone->uz_fini)
2442 zone->uz_fini(item, keg->uk_size);
2443
2444 ZONE_LOCK(zone);
2445
2446 if (flags & ZFREE_STATFAIL)
2447 zone->uz_fails++;
2448 if (flags & ZFREE_STATFREE)
2449 zone->uz_frees++;
2450
2451 if (!(keg->uk_flags & UMA_ZONE_MALLOC)) {
2452 mem = (u_int8_t *)((unsigned long)item & (~UMA_SLAB_MASK));
2453 if (keg->uk_flags & UMA_ZONE_HASH)
2454 slab = hash_sfind(&keg->uk_hash, mem);
2455 else {
2456 mem += keg->uk_pgoff;
2457 slab = (uma_slab_t)mem;
2458 }
2459 } else {
2460 slab = (uma_slab_t)udata;
2461 }
2462
2463 /* Do we need to remove from any lists? */
2464 if (slab->us_freecount+1 == keg->uk_ipers) {
2465 LIST_REMOVE(slab, us_link);
2466 LIST_INSERT_HEAD(&keg->uk_free_slab, slab, us_link);
2467 } else if (slab->us_freecount == 0) {
2468 LIST_REMOVE(slab, us_link);
2469 LIST_INSERT_HEAD(&keg->uk_part_slab, slab, us_link);
2470 }
2471
2472 /* Slab management stuff */
2473 freei = ((unsigned long)item - (unsigned long)slab->us_data)
2474 / keg->uk_rsize;
2475
2476#ifdef INVARIANTS
2477 if (!skip)
2478 uma_dbg_free(zone, slab, item);
2479#endif
2480
2481 if (keg->uk_flags & UMA_ZONE_REFCNT) {
2482 slabref = (uma_slabrefcnt_t)slab;
2483 slabref->us_freelist[freei].us_item = slab->us_firstfree;
2484 } else {
2485 slab->us_freelist[freei].us_item = slab->us_firstfree;
2486 }
2487 slab->us_firstfree = freei;
2488 slab->us_freecount++;
2489
2490 /* Zone statistics */
2491 keg->uk_free++;
2492
2493 if (keg->uk_flags & UMA_ZFLAG_FULL) {
2494 if (keg->uk_pages < keg->uk_maxpages)
2495 keg->uk_flags &= ~UMA_ZFLAG_FULL;
2496
2497 /* We can handle one more allocation */
2498 wakeup_one(keg);
2499 }
2500
2501 ZONE_UNLOCK(zone);
2502}
2503
2504/* See uma.h */
2505void
2506uma_zone_set_max(uma_zone_t zone, int nitems)
2507{
2508 uma_keg_t keg;
2509
2510 keg = zone->uz_keg;
2511 ZONE_LOCK(zone);
2512 if (keg->uk_ppera > 1)
2513 keg->uk_maxpages = nitems * keg->uk_ppera;
2514 else
2515 keg->uk_maxpages = nitems / keg->uk_ipers;
2516
2517 if (keg->uk_maxpages * keg->uk_ipers < nitems)
2518 keg->uk_maxpages++;
2519
2520 ZONE_UNLOCK(zone);
2521}
2522
2523/* See uma.h */
2524void
2525uma_zone_set_init(uma_zone_t zone, uma_init uminit)
2526{
2527 ZONE_LOCK(zone);
2528 KASSERT(zone->uz_keg->uk_pages == 0,
2529 ("uma_zone_set_init on non-empty keg"));
2530 zone->uz_keg->uk_init = uminit;
2531 ZONE_UNLOCK(zone);
2532}
2533
2534/* See uma.h */
2535void
2536uma_zone_set_fini(uma_zone_t zone, uma_fini fini)
2537{
2538 ZONE_LOCK(zone);
2539 KASSERT(zone->uz_keg->uk_pages == 0,
2540 ("uma_zone_set_fini on non-empty keg"));
2541 zone->uz_keg->uk_fini = fini;
2542 ZONE_UNLOCK(zone);
2543}
2544
2545/* See uma.h */
2546void
2547uma_zone_set_zinit(uma_zone_t zone, uma_init zinit)
2548{
2549 ZONE_LOCK(zone);
2550 KASSERT(zone->uz_keg->uk_pages == 0,
2551 ("uma_zone_set_zinit on non-empty keg"));
2552 zone->uz_init = zinit;
2553 ZONE_UNLOCK(zone);
2554}
2555
2556/* See uma.h */
2557void
2558uma_zone_set_zfini(uma_zone_t zone, uma_fini zfini)
2559{
2560 ZONE_LOCK(zone);
2561 KASSERT(zone->uz_keg->uk_pages == 0,
2562 ("uma_zone_set_zfini on non-empty keg"));
2563 zone->uz_fini = zfini;
2564 ZONE_UNLOCK(zone);
2565}
2566
2567/* See uma.h */
2568/* XXX uk_freef is not actually used with the zone locked */
2569void
2570uma_zone_set_freef(uma_zone_t zone, uma_free freef)
2571{
2572 ZONE_LOCK(zone);
2573 zone->uz_keg->uk_freef = freef;
2574 ZONE_UNLOCK(zone);
2575}
2576
2577/* See uma.h */
2578/* XXX uk_allocf is not actually used with the zone locked */
2579void
2580uma_zone_set_allocf(uma_zone_t zone, uma_alloc allocf)
2581{
2582 ZONE_LOCK(zone);
2583 zone->uz_keg->uk_flags |= UMA_ZFLAG_PRIVALLOC;
2584 zone->uz_keg->uk_allocf = allocf;
2585 ZONE_UNLOCK(zone);
2586}
2587
2588/* See uma.h */
2589int
2590uma_zone_set_obj(uma_zone_t zone, struct vm_object *obj, int count)
2591{
2592 uma_keg_t keg;
2593 vm_offset_t kva;
2594 int pages;
2595
2596 keg = zone->uz_keg;
2597 pages = count / keg->uk_ipers;
2598
2599 if (pages * keg->uk_ipers < count)
2600 pages++;
2601
2602 kva = kmem_alloc_nofault(kernel_map, pages * UMA_SLAB_SIZE);
2603
2604 if (kva == 0)
2605 return (0);
2606 if (obj == NULL) {
2607 obj = vm_object_allocate(OBJT_DEFAULT,
2608 pages);
2609 } else {
2610 VM_OBJECT_LOCK_INIT(obj, "uma object");
2611 _vm_object_allocate(OBJT_DEFAULT,
2612 pages, obj);
2613 }
2614 ZONE_LOCK(zone);
2615 keg->uk_kva = kva;
2616 keg->uk_obj = obj;
2617 keg->uk_maxpages = pages;
2618 keg->uk_allocf = obj_alloc;
2619 keg->uk_flags |= UMA_ZONE_NOFREE | UMA_ZFLAG_PRIVALLOC;
2620 ZONE_UNLOCK(zone);
2621 return (1);
2622}
2623
2624/* See uma.h */
2625void
2626uma_prealloc(uma_zone_t zone, int items)
2627{
2628 int slabs;
2629 uma_slab_t slab;
2630 uma_keg_t keg;
2631
2632 keg = zone->uz_keg;
2633 ZONE_LOCK(zone);
2634 slabs = items / keg->uk_ipers;
2635 if (slabs * keg->uk_ipers < items)
2636 slabs++;
2637 while (slabs > 0) {
2638 slab = slab_zalloc(zone, M_WAITOK);
2639 LIST_INSERT_HEAD(&keg->uk_free_slab, slab, us_link);
2640 slabs--;
2641 }
2642 ZONE_UNLOCK(zone);
2643}
2644
2645/* See uma.h */
2646u_int32_t *
2647uma_find_refcnt(uma_zone_t zone, void *item)
2648{
2649 uma_slabrefcnt_t slabref;
2650 uma_keg_t keg;
2651 u_int32_t *refcnt;
2652 int idx;
2653
2654 keg = zone->uz_keg;
2655 slabref = (uma_slabrefcnt_t)vtoslab((vm_offset_t)item &
2656 (~UMA_SLAB_MASK));
2657 KASSERT(slabref != NULL && slabref->us_keg->uk_flags & UMA_ZONE_REFCNT,
2658 ("uma_find_refcnt(): zone possibly not UMA_ZONE_REFCNT"));
2659 idx = ((unsigned long)item - (unsigned long)slabref->us_data)
2660 / keg->uk_rsize;
2661 refcnt = &slabref->us_freelist[idx].us_refcnt;
2662 return refcnt;
2663}
2664
2665/* See uma.h */
2666void
2667uma_reclaim(void)
2668{
2669#ifdef UMA_DEBUG
2670 printf("UMA: vm asked us to release pages!\n");
2671#endif
2672 bucket_enable();
2673 zone_foreach(zone_drain);
2674 /*
2675 * Some slabs may have been freed but this zone will be visited early
2676 * we visit again so that we can free pages that are empty once other
2677 * zones are drained. We have to do the same for buckets.
2678 */
2679 zone_drain(slabzone);
2680 zone_drain(slabrefzone);
2681 bucket_zone_drain();
2682}
2683
2684/* See uma.h */
2685int
2686uma_zone_exhausted(uma_zone_t zone)
2687{
2688 int full;
2689
2690 ZONE_LOCK(zone);
2691 full = (zone->uz_keg->uk_flags & UMA_ZFLAG_FULL);
2692 ZONE_UNLOCK(zone);
2693 return (full);
2694}
2695
2684void *
2685uma_large_malloc(int size, int wait)
2686{
2687 void *mem;
2688 uma_slab_t slab;
2689 u_int8_t flags;
2690
2691 slab = uma_zalloc_internal(slabzone, NULL, wait);
2692 if (slab == NULL)
2693 return (NULL);
2694 mem = page_alloc(NULL, size, &flags, wait);
2695 if (mem) {
2696 vsetslab((vm_offset_t)mem, slab);
2697 slab->us_data = mem;
2698 slab->us_flags = flags | UMA_SLAB_MALLOC;
2699 slab->us_size = size;
2700 } else {
2701 uma_zfree_internal(slabzone, slab, NULL, SKIP_NONE,
2702 ZFREE_STATFAIL | ZFREE_STATFREE);
2703 }
2704
2705 return (mem);
2706}
2707
2708void
2709uma_large_free(uma_slab_t slab)
2710{
2711 vsetobj((vm_offset_t)slab->us_data, kmem_object);
2712 page_free(slab->us_data, slab->us_size, slab->us_flags);
2713 uma_zfree_internal(slabzone, slab, NULL, SKIP_NONE, ZFREE_STATFREE);
2714}
2715
2716void
2717uma_print_stats(void)
2718{
2719 zone_foreach(uma_print_zone);
2720}
2721
2722static void
2723slab_print(uma_slab_t slab)
2724{
2725 printf("slab: keg %p, data %p, freecount %d, firstfree %d\n",
2726 slab->us_keg, slab->us_data, slab->us_freecount,
2727 slab->us_firstfree);
2728}
2729
2730static void
2731cache_print(uma_cache_t cache)
2732{
2733 printf("alloc: %p(%d), free: %p(%d)\n",
2734 cache->uc_allocbucket,
2735 cache->uc_allocbucket?cache->uc_allocbucket->ub_cnt:0,
2736 cache->uc_freebucket,
2737 cache->uc_freebucket?cache->uc_freebucket->ub_cnt:0);
2738}
2739
2740void
2741uma_print_zone(uma_zone_t zone)
2742{
2743 uma_cache_t cache;
2744 uma_keg_t keg;
2745 uma_slab_t slab;
2746 int i;
2747
2748 keg = zone->uz_keg;
2749 printf("%s(%p) size %d(%d) flags %d ipers %d ppera %d out %d free %d\n",
2750 zone->uz_name, zone, keg->uk_size, keg->uk_rsize, keg->uk_flags,
2751 keg->uk_ipers, keg->uk_ppera,
2752 (keg->uk_ipers * keg->uk_pages) - keg->uk_free, keg->uk_free);
2753 printf("Part slabs:\n");
2754 LIST_FOREACH(slab, &keg->uk_part_slab, us_link)
2755 slab_print(slab);
2756 printf("Free slabs:\n");
2757 LIST_FOREACH(slab, &keg->uk_free_slab, us_link)
2758 slab_print(slab);
2759 printf("Full slabs:\n");
2760 LIST_FOREACH(slab, &keg->uk_full_slab, us_link)
2761 slab_print(slab);
2762 for (i = 0; i <= mp_maxid; i++) {
2763 if (CPU_ABSENT(i))
2764 continue;
2765 cache = &zone->uz_cpu[i];
2766 printf("CPU %d Cache:\n", i);
2767 cache_print(cache);
2768 }
2769}
2770
2771#ifdef DDB
2772/*
2773 * Generate statistics across both the zone and its per-cpu cache's. Return
2774 * desired statistics if the pointer is non-NULL for that statistic.
2775 *
2776 * Note: does not update the zone statistics, as it can't safely clear the
2777 * per-CPU cache statistic.
2778 *
2779 * XXXRW: Following the uc_allocbucket and uc_freebucket pointers here isn't
2780 * safe from off-CPU; we should modify the caches to track this information
2781 * directly so that we don't have to.
2782 */
2783static void
2784uma_zone_sumstat(uma_zone_t z, int *cachefreep, u_int64_t *allocsp,
2785 u_int64_t *freesp)
2786{
2787 uma_cache_t cache;
2788 u_int64_t allocs, frees;
2789 int cachefree, cpu;
2790
2791 allocs = frees = 0;
2792 cachefree = 0;
2793 for (cpu = 0; cpu <= mp_maxid; cpu++) {
2794 if (CPU_ABSENT(cpu))
2795 continue;
2796 cache = &z->uz_cpu[cpu];
2797 if (cache->uc_allocbucket != NULL)
2798 cachefree += cache->uc_allocbucket->ub_cnt;
2799 if (cache->uc_freebucket != NULL)
2800 cachefree += cache->uc_freebucket->ub_cnt;
2801 allocs += cache->uc_allocs;
2802 frees += cache->uc_frees;
2803 }
2804 allocs += z->uz_allocs;
2805 frees += z->uz_frees;
2806 if (cachefreep != NULL)
2807 *cachefreep = cachefree;
2808 if (allocsp != NULL)
2809 *allocsp = allocs;
2810 if (freesp != NULL)
2811 *freesp = frees;
2812}
2813#endif /* DDB */
2814
2815static int
2816sysctl_vm_zone_count(SYSCTL_HANDLER_ARGS)
2817{
2818 uma_keg_t kz;
2819 uma_zone_t z;
2820 int count;
2821
2822 count = 0;
2823 mtx_lock(&uma_mtx);
2824 LIST_FOREACH(kz, &uma_kegs, uk_link) {
2825 LIST_FOREACH(z, &kz->uk_zones, uz_link)
2826 count++;
2827 }
2828 mtx_unlock(&uma_mtx);
2829 return (sysctl_handle_int(oidp, &count, 0, req));
2830}
2831
2832static int
2833sysctl_vm_zone_stats(SYSCTL_HANDLER_ARGS)
2834{
2835 struct uma_stream_header ush;
2836 struct uma_type_header uth;
2837 struct uma_percpu_stat ups;
2838 uma_bucket_t bucket;
2839 struct sbuf sbuf;
2840 uma_cache_t cache;
2841 uma_keg_t kz;
2842 uma_zone_t z;
2843 char *buffer;
2844 int buflen, count, error, i;
2845
2846 mtx_lock(&uma_mtx);
2847restart:
2848 mtx_assert(&uma_mtx, MA_OWNED);
2849 count = 0;
2850 LIST_FOREACH(kz, &uma_kegs, uk_link) {
2851 LIST_FOREACH(z, &kz->uk_zones, uz_link)
2852 count++;
2853 }
2854 mtx_unlock(&uma_mtx);
2855
2856 buflen = sizeof(ush) + count * (sizeof(uth) + sizeof(ups) *
2857 (mp_maxid + 1)) + 1;
2858 buffer = malloc(buflen, M_TEMP, M_WAITOK | M_ZERO);
2859
2860 mtx_lock(&uma_mtx);
2861 i = 0;
2862 LIST_FOREACH(kz, &uma_kegs, uk_link) {
2863 LIST_FOREACH(z, &kz->uk_zones, uz_link)
2864 i++;
2865 }
2866 if (i > count) {
2867 free(buffer, M_TEMP);
2868 goto restart;
2869 }
2870 count = i;
2871
2872 sbuf_new(&sbuf, buffer, buflen, SBUF_FIXEDLEN);
2873
2874 /*
2875 * Insert stream header.
2876 */
2877 bzero(&ush, sizeof(ush));
2878 ush.ush_version = UMA_STREAM_VERSION;
2879 ush.ush_maxcpus = (mp_maxid + 1);
2880 ush.ush_count = count;
2881 if (sbuf_bcat(&sbuf, &ush, sizeof(ush)) < 0) {
2882 mtx_unlock(&uma_mtx);
2883 error = ENOMEM;
2884 goto out;
2885 }
2886
2887 LIST_FOREACH(kz, &uma_kegs, uk_link) {
2888 LIST_FOREACH(z, &kz->uk_zones, uz_link) {
2889 bzero(&uth, sizeof(uth));
2890 ZONE_LOCK(z);
2891 strlcpy(uth.uth_name, z->uz_name, UTH_MAX_NAME);
2892 uth.uth_align = kz->uk_align;
2893 uth.uth_pages = kz->uk_pages;
2894 uth.uth_keg_free = kz->uk_free;
2895 uth.uth_size = kz->uk_size;
2896 uth.uth_rsize = kz->uk_rsize;
2897 uth.uth_maxpages = kz->uk_maxpages;
2898 if (kz->uk_ppera > 1)
2899 uth.uth_limit = kz->uk_maxpages /
2900 kz->uk_ppera;
2901 else
2902 uth.uth_limit = kz->uk_maxpages *
2903 kz->uk_ipers;
2904
2905 /*
2906 * A zone is secondary is it is not the first entry
2907 * on the keg's zone list.
2908 */
2909 if ((kz->uk_flags & UMA_ZONE_SECONDARY) &&
2910 (LIST_FIRST(&kz->uk_zones) != z))
2911 uth.uth_zone_flags = UTH_ZONE_SECONDARY;
2912
2913 LIST_FOREACH(bucket, &z->uz_full_bucket, ub_link)
2914 uth.uth_zone_free += bucket->ub_cnt;
2915 uth.uth_allocs = z->uz_allocs;
2916 uth.uth_frees = z->uz_frees;
2917 uth.uth_fails = z->uz_fails;
2918 if (sbuf_bcat(&sbuf, &uth, sizeof(uth)) < 0) {
2919 ZONE_UNLOCK(z);
2920 mtx_unlock(&uma_mtx);
2921 error = ENOMEM;
2922 goto out;
2923 }
2924 /*
2925 * While it is not normally safe to access the cache
2926 * bucket pointers while not on the CPU that owns the
2927 * cache, we only allow the pointers to be exchanged
2928 * without the zone lock held, not invalidated, so
2929 * accept the possible race associated with bucket
2930 * exchange during monitoring.
2931 */
2932 for (i = 0; i < (mp_maxid + 1); i++) {
2933 bzero(&ups, sizeof(ups));
2934 if (kz->uk_flags & UMA_ZFLAG_INTERNAL)
2935 goto skip;
2936 if (CPU_ABSENT(i))
2937 goto skip;
2938 cache = &z->uz_cpu[i];
2939 if (cache->uc_allocbucket != NULL)
2940 ups.ups_cache_free +=
2941 cache->uc_allocbucket->ub_cnt;
2942 if (cache->uc_freebucket != NULL)
2943 ups.ups_cache_free +=
2944 cache->uc_freebucket->ub_cnt;
2945 ups.ups_allocs = cache->uc_allocs;
2946 ups.ups_frees = cache->uc_frees;
2947skip:
2948 if (sbuf_bcat(&sbuf, &ups, sizeof(ups)) < 0) {
2949 ZONE_UNLOCK(z);
2950 mtx_unlock(&uma_mtx);
2951 error = ENOMEM;
2952 goto out;
2953 }
2954 }
2955 ZONE_UNLOCK(z);
2956 }
2957 }
2958 mtx_unlock(&uma_mtx);
2959 sbuf_finish(&sbuf);
2960 error = SYSCTL_OUT(req, sbuf_data(&sbuf), sbuf_len(&sbuf));
2961out:
2962 free(buffer, M_TEMP);
2963 return (error);
2964}
2965
2966#ifdef DDB
2967DB_SHOW_COMMAND(uma, db_show_uma)
2968{
2969 u_int64_t allocs, frees;
2970 uma_bucket_t bucket;
2971 uma_keg_t kz;
2972 uma_zone_t z;
2973 int cachefree;
2974
2975 db_printf("%18s %8s %8s %8s %12s\n", "Zone", "Size", "Used", "Free",
2976 "Requests");
2977 LIST_FOREACH(kz, &uma_kegs, uk_link) {
2978 LIST_FOREACH(z, &kz->uk_zones, uz_link) {
2979 if (kz->uk_flags & UMA_ZFLAG_INTERNAL) {
2980 allocs = z->uz_allocs;
2981 frees = z->uz_frees;
2982 cachefree = 0;
2983 } else
2984 uma_zone_sumstat(z, &cachefree, &allocs,
2985 &frees);
2986 if (!((kz->uk_flags & UMA_ZONE_SECONDARY) &&
2987 (LIST_FIRST(&kz->uk_zones) != z)))
2988 cachefree += kz->uk_free;
2989 LIST_FOREACH(bucket, &z->uz_full_bucket, ub_link)
2990 cachefree += bucket->ub_cnt;
2991 db_printf("%18s %8ju %8jd %8d %12ju\n", z->uz_name,
2992 (uintmax_t)kz->uk_size,
2993 (intmax_t)(allocs - frees), cachefree,
2994 (uintmax_t)allocs);
2995 }
2996 }
2997}
2998#endif
2696void *
2697uma_large_malloc(int size, int wait)
2698{
2699 void *mem;
2700 uma_slab_t slab;
2701 u_int8_t flags;
2702
2703 slab = uma_zalloc_internal(slabzone, NULL, wait);
2704 if (slab == NULL)
2705 return (NULL);
2706 mem = page_alloc(NULL, size, &flags, wait);
2707 if (mem) {
2708 vsetslab((vm_offset_t)mem, slab);
2709 slab->us_data = mem;
2710 slab->us_flags = flags | UMA_SLAB_MALLOC;
2711 slab->us_size = size;
2712 } else {
2713 uma_zfree_internal(slabzone, slab, NULL, SKIP_NONE,
2714 ZFREE_STATFAIL | ZFREE_STATFREE);
2715 }
2716
2717 return (mem);
2718}
2719
2720void
2721uma_large_free(uma_slab_t slab)
2722{
2723 vsetobj((vm_offset_t)slab->us_data, kmem_object);
2724 page_free(slab->us_data, slab->us_size, slab->us_flags);
2725 uma_zfree_internal(slabzone, slab, NULL, SKIP_NONE, ZFREE_STATFREE);
2726}
2727
2728void
2729uma_print_stats(void)
2730{
2731 zone_foreach(uma_print_zone);
2732}
2733
2734static void
2735slab_print(uma_slab_t slab)
2736{
2737 printf("slab: keg %p, data %p, freecount %d, firstfree %d\n",
2738 slab->us_keg, slab->us_data, slab->us_freecount,
2739 slab->us_firstfree);
2740}
2741
2742static void
2743cache_print(uma_cache_t cache)
2744{
2745 printf("alloc: %p(%d), free: %p(%d)\n",
2746 cache->uc_allocbucket,
2747 cache->uc_allocbucket?cache->uc_allocbucket->ub_cnt:0,
2748 cache->uc_freebucket,
2749 cache->uc_freebucket?cache->uc_freebucket->ub_cnt:0);
2750}
2751
2752void
2753uma_print_zone(uma_zone_t zone)
2754{
2755 uma_cache_t cache;
2756 uma_keg_t keg;
2757 uma_slab_t slab;
2758 int i;
2759
2760 keg = zone->uz_keg;
2761 printf("%s(%p) size %d(%d) flags %d ipers %d ppera %d out %d free %d\n",
2762 zone->uz_name, zone, keg->uk_size, keg->uk_rsize, keg->uk_flags,
2763 keg->uk_ipers, keg->uk_ppera,
2764 (keg->uk_ipers * keg->uk_pages) - keg->uk_free, keg->uk_free);
2765 printf("Part slabs:\n");
2766 LIST_FOREACH(slab, &keg->uk_part_slab, us_link)
2767 slab_print(slab);
2768 printf("Free slabs:\n");
2769 LIST_FOREACH(slab, &keg->uk_free_slab, us_link)
2770 slab_print(slab);
2771 printf("Full slabs:\n");
2772 LIST_FOREACH(slab, &keg->uk_full_slab, us_link)
2773 slab_print(slab);
2774 for (i = 0; i <= mp_maxid; i++) {
2775 if (CPU_ABSENT(i))
2776 continue;
2777 cache = &zone->uz_cpu[i];
2778 printf("CPU %d Cache:\n", i);
2779 cache_print(cache);
2780 }
2781}
2782
2783#ifdef DDB
2784/*
2785 * Generate statistics across both the zone and its per-cpu cache's. Return
2786 * desired statistics if the pointer is non-NULL for that statistic.
2787 *
2788 * Note: does not update the zone statistics, as it can't safely clear the
2789 * per-CPU cache statistic.
2790 *
2791 * XXXRW: Following the uc_allocbucket and uc_freebucket pointers here isn't
2792 * safe from off-CPU; we should modify the caches to track this information
2793 * directly so that we don't have to.
2794 */
2795static void
2796uma_zone_sumstat(uma_zone_t z, int *cachefreep, u_int64_t *allocsp,
2797 u_int64_t *freesp)
2798{
2799 uma_cache_t cache;
2800 u_int64_t allocs, frees;
2801 int cachefree, cpu;
2802
2803 allocs = frees = 0;
2804 cachefree = 0;
2805 for (cpu = 0; cpu <= mp_maxid; cpu++) {
2806 if (CPU_ABSENT(cpu))
2807 continue;
2808 cache = &z->uz_cpu[cpu];
2809 if (cache->uc_allocbucket != NULL)
2810 cachefree += cache->uc_allocbucket->ub_cnt;
2811 if (cache->uc_freebucket != NULL)
2812 cachefree += cache->uc_freebucket->ub_cnt;
2813 allocs += cache->uc_allocs;
2814 frees += cache->uc_frees;
2815 }
2816 allocs += z->uz_allocs;
2817 frees += z->uz_frees;
2818 if (cachefreep != NULL)
2819 *cachefreep = cachefree;
2820 if (allocsp != NULL)
2821 *allocsp = allocs;
2822 if (freesp != NULL)
2823 *freesp = frees;
2824}
2825#endif /* DDB */
2826
2827static int
2828sysctl_vm_zone_count(SYSCTL_HANDLER_ARGS)
2829{
2830 uma_keg_t kz;
2831 uma_zone_t z;
2832 int count;
2833
2834 count = 0;
2835 mtx_lock(&uma_mtx);
2836 LIST_FOREACH(kz, &uma_kegs, uk_link) {
2837 LIST_FOREACH(z, &kz->uk_zones, uz_link)
2838 count++;
2839 }
2840 mtx_unlock(&uma_mtx);
2841 return (sysctl_handle_int(oidp, &count, 0, req));
2842}
2843
2844static int
2845sysctl_vm_zone_stats(SYSCTL_HANDLER_ARGS)
2846{
2847 struct uma_stream_header ush;
2848 struct uma_type_header uth;
2849 struct uma_percpu_stat ups;
2850 uma_bucket_t bucket;
2851 struct sbuf sbuf;
2852 uma_cache_t cache;
2853 uma_keg_t kz;
2854 uma_zone_t z;
2855 char *buffer;
2856 int buflen, count, error, i;
2857
2858 mtx_lock(&uma_mtx);
2859restart:
2860 mtx_assert(&uma_mtx, MA_OWNED);
2861 count = 0;
2862 LIST_FOREACH(kz, &uma_kegs, uk_link) {
2863 LIST_FOREACH(z, &kz->uk_zones, uz_link)
2864 count++;
2865 }
2866 mtx_unlock(&uma_mtx);
2867
2868 buflen = sizeof(ush) + count * (sizeof(uth) + sizeof(ups) *
2869 (mp_maxid + 1)) + 1;
2870 buffer = malloc(buflen, M_TEMP, M_WAITOK | M_ZERO);
2871
2872 mtx_lock(&uma_mtx);
2873 i = 0;
2874 LIST_FOREACH(kz, &uma_kegs, uk_link) {
2875 LIST_FOREACH(z, &kz->uk_zones, uz_link)
2876 i++;
2877 }
2878 if (i > count) {
2879 free(buffer, M_TEMP);
2880 goto restart;
2881 }
2882 count = i;
2883
2884 sbuf_new(&sbuf, buffer, buflen, SBUF_FIXEDLEN);
2885
2886 /*
2887 * Insert stream header.
2888 */
2889 bzero(&ush, sizeof(ush));
2890 ush.ush_version = UMA_STREAM_VERSION;
2891 ush.ush_maxcpus = (mp_maxid + 1);
2892 ush.ush_count = count;
2893 if (sbuf_bcat(&sbuf, &ush, sizeof(ush)) < 0) {
2894 mtx_unlock(&uma_mtx);
2895 error = ENOMEM;
2896 goto out;
2897 }
2898
2899 LIST_FOREACH(kz, &uma_kegs, uk_link) {
2900 LIST_FOREACH(z, &kz->uk_zones, uz_link) {
2901 bzero(&uth, sizeof(uth));
2902 ZONE_LOCK(z);
2903 strlcpy(uth.uth_name, z->uz_name, UTH_MAX_NAME);
2904 uth.uth_align = kz->uk_align;
2905 uth.uth_pages = kz->uk_pages;
2906 uth.uth_keg_free = kz->uk_free;
2907 uth.uth_size = kz->uk_size;
2908 uth.uth_rsize = kz->uk_rsize;
2909 uth.uth_maxpages = kz->uk_maxpages;
2910 if (kz->uk_ppera > 1)
2911 uth.uth_limit = kz->uk_maxpages /
2912 kz->uk_ppera;
2913 else
2914 uth.uth_limit = kz->uk_maxpages *
2915 kz->uk_ipers;
2916
2917 /*
2918 * A zone is secondary is it is not the first entry
2919 * on the keg's zone list.
2920 */
2921 if ((kz->uk_flags & UMA_ZONE_SECONDARY) &&
2922 (LIST_FIRST(&kz->uk_zones) != z))
2923 uth.uth_zone_flags = UTH_ZONE_SECONDARY;
2924
2925 LIST_FOREACH(bucket, &z->uz_full_bucket, ub_link)
2926 uth.uth_zone_free += bucket->ub_cnt;
2927 uth.uth_allocs = z->uz_allocs;
2928 uth.uth_frees = z->uz_frees;
2929 uth.uth_fails = z->uz_fails;
2930 if (sbuf_bcat(&sbuf, &uth, sizeof(uth)) < 0) {
2931 ZONE_UNLOCK(z);
2932 mtx_unlock(&uma_mtx);
2933 error = ENOMEM;
2934 goto out;
2935 }
2936 /*
2937 * While it is not normally safe to access the cache
2938 * bucket pointers while not on the CPU that owns the
2939 * cache, we only allow the pointers to be exchanged
2940 * without the zone lock held, not invalidated, so
2941 * accept the possible race associated with bucket
2942 * exchange during monitoring.
2943 */
2944 for (i = 0; i < (mp_maxid + 1); i++) {
2945 bzero(&ups, sizeof(ups));
2946 if (kz->uk_flags & UMA_ZFLAG_INTERNAL)
2947 goto skip;
2948 if (CPU_ABSENT(i))
2949 goto skip;
2950 cache = &z->uz_cpu[i];
2951 if (cache->uc_allocbucket != NULL)
2952 ups.ups_cache_free +=
2953 cache->uc_allocbucket->ub_cnt;
2954 if (cache->uc_freebucket != NULL)
2955 ups.ups_cache_free +=
2956 cache->uc_freebucket->ub_cnt;
2957 ups.ups_allocs = cache->uc_allocs;
2958 ups.ups_frees = cache->uc_frees;
2959skip:
2960 if (sbuf_bcat(&sbuf, &ups, sizeof(ups)) < 0) {
2961 ZONE_UNLOCK(z);
2962 mtx_unlock(&uma_mtx);
2963 error = ENOMEM;
2964 goto out;
2965 }
2966 }
2967 ZONE_UNLOCK(z);
2968 }
2969 }
2970 mtx_unlock(&uma_mtx);
2971 sbuf_finish(&sbuf);
2972 error = SYSCTL_OUT(req, sbuf_data(&sbuf), sbuf_len(&sbuf));
2973out:
2974 free(buffer, M_TEMP);
2975 return (error);
2976}
2977
2978#ifdef DDB
2979DB_SHOW_COMMAND(uma, db_show_uma)
2980{
2981 u_int64_t allocs, frees;
2982 uma_bucket_t bucket;
2983 uma_keg_t kz;
2984 uma_zone_t z;
2985 int cachefree;
2986
2987 db_printf("%18s %8s %8s %8s %12s\n", "Zone", "Size", "Used", "Free",
2988 "Requests");
2989 LIST_FOREACH(kz, &uma_kegs, uk_link) {
2990 LIST_FOREACH(z, &kz->uk_zones, uz_link) {
2991 if (kz->uk_flags & UMA_ZFLAG_INTERNAL) {
2992 allocs = z->uz_allocs;
2993 frees = z->uz_frees;
2994 cachefree = 0;
2995 } else
2996 uma_zone_sumstat(z, &cachefree, &allocs,
2997 &frees);
2998 if (!((kz->uk_flags & UMA_ZONE_SECONDARY) &&
2999 (LIST_FIRST(&kz->uk_zones) != z)))
3000 cachefree += kz->uk_free;
3001 LIST_FOREACH(bucket, &z->uz_full_bucket, ub_link)
3002 cachefree += bucket->ub_cnt;
3003 db_printf("%18s %8ju %8jd %8d %12ju\n", z->uz_name,
3004 (uintmax_t)kz->uk_size,
3005 (intmax_t)(allocs - frees), cachefree,
3006 (uintmax_t)allocs);
3007 }
3008 }
3009}
3010#endif