Deleted Added
full compact
uma_core.c (148077) uma_core.c (148078)
1/*-
1/*-
2 * Copyright (c) 2002, 2003, 2004, 2005 Jeffrey Roberson <jeff@FreeBSD.org>
3 * Copyright (c) 2004, 2005 Bosko Milekic <bmilekic@FreeBSD.org>
2 * Copyright (c) 2004-2005 Robert N. M. Watson
4 * Copyright (c) 2004-2005 Robert N. M. Watson
3 * Copyright (c) 2004, 2005,
4 * Bosko Milekic <bmilekic@FreeBSD.org>. All rights reserved.
5 * Copyright (c) 2002, 2003, 2004, 2005,
6 * Jeffrey Roberson <jeff@FreeBSD.org>. All rights reserved.
5 * All rights reserved.
7 *
8 * Redistribution and use in source and binary forms, with or without
9 * modification, are permitted provided that the following conditions
10 * are met:
11 * 1. Redistributions of source code must retain the above copyright
12 * notice unmodified, this list of conditions, and the following
13 * disclaimer.
14 * 2. Redistributions in binary form must reproduce the above copyright
15 * notice, this list of conditions and the following disclaimer in the
16 * documentation and/or other materials provided with the distribution.
17 *
18 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
19 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
20 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
21 * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
22 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
23 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
24 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
25 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
26 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
27 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
28 */
29
30/*
31 * uma_core.c Implementation of the Universal Memory allocator
32 *
33 * This allocator is intended to replace the multitude of similar object caches
34 * in the standard FreeBSD kernel. The intent is to be flexible as well as
35 * effecient. A primary design goal is to return unused memory to the rest of
36 * the system. This will make the system as a whole more flexible due to the
37 * ability to move memory to subsystems which most need it instead of leaving
38 * pools of reserved memory unused.
39 *
40 * The basic ideas stem from similar slab/zone based allocators whose algorithms
41 * are well known.
42 *
43 */
44
45/*
46 * TODO:
47 * - Improve memory usage for large allocations
48 * - Investigate cache size adjustments
49 */
50
51#include <sys/cdefs.h>
6 *
7 * Redistribution and use in source and binary forms, with or without
8 * modification, are permitted provided that the following conditions
9 * are met:
10 * 1. Redistributions of source code must retain the above copyright
11 * notice unmodified, this list of conditions, and the following
12 * disclaimer.
13 * 2. Redistributions in binary form must reproduce the above copyright
14 * notice, this list of conditions and the following disclaimer in the
15 * documentation and/or other materials provided with the distribution.
16 *
17 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
18 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
19 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
20 * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
21 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
22 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
23 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
24 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
25 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
26 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
27 */
28
29/*
30 * uma_core.c Implementation of the Universal Memory allocator
31 *
32 * This allocator is intended to replace the multitude of similar object caches
33 * in the standard FreeBSD kernel. The intent is to be flexible as well as
34 * effecient. A primary design goal is to return unused memory to the rest of
35 * the system. This will make the system as a whole more flexible due to the
36 * ability to move memory to subsystems which most need it instead of leaving
37 * pools of reserved memory unused.
38 *
39 * The basic ideas stem from similar slab/zone based allocators whose algorithms
40 * are well known.
41 *
42 */
43
44/*
45 * TODO:
46 * - Improve memory usage for large allocations
47 * - Investigate cache size adjustments
48 */
49
50#include <sys/cdefs.h>
52__FBSDID("$FreeBSD: head/sys/vm/uma_core.c 148077 2005-07-16 09:40:34Z rwatson $");
51__FBSDID("$FreeBSD: head/sys/vm/uma_core.c 148078 2005-07-16 09:51:52Z rwatson $");
53
54/* I should really use ktr.. */
55/*
56#define UMA_DEBUG 1
57#define UMA_DEBUG_ALLOC 1
58#define UMA_DEBUG_ALLOC_1 1
59*/
60
61#include "opt_param.h"
62#include <sys/param.h>
63#include <sys/systm.h>
64#include <sys/kernel.h>
65#include <sys/types.h>
66#include <sys/queue.h>
67#include <sys/malloc.h>
68#include <sys/ktr.h>
69#include <sys/lock.h>
70#include <sys/sysctl.h>
71#include <sys/mutex.h>
72#include <sys/proc.h>
73#include <sys/sbuf.h>
74#include <sys/smp.h>
75#include <sys/vmmeter.h>
76
77#include <vm/vm.h>
78#include <vm/vm_object.h>
79#include <vm/vm_page.h>
80#include <vm/vm_param.h>
81#include <vm/vm_map.h>
82#include <vm/vm_kern.h>
83#include <vm/vm_extern.h>
84#include <vm/uma.h>
85#include <vm/uma_int.h>
86#include <vm/uma_dbg.h>
87
88#include <machine/vmparam.h>
89
90/*
91 * This is the zone and keg from which all zones are spawned. The idea is that
92 * even the zone & keg heads are allocated from the allocator, so we use the
93 * bss section to bootstrap us.
94 */
95static struct uma_keg masterkeg;
96static struct uma_zone masterzone_k;
97static struct uma_zone masterzone_z;
98static uma_zone_t kegs = &masterzone_k;
99static uma_zone_t zones = &masterzone_z;
100
101/* This is the zone from which all of uma_slab_t's are allocated. */
102static uma_zone_t slabzone;
103static uma_zone_t slabrefzone; /* With refcounters (for UMA_ZONE_REFCNT) */
104
105/*
106 * The initial hash tables come out of this zone so they can be allocated
107 * prior to malloc coming up.
108 */
109static uma_zone_t hashzone;
110
111static MALLOC_DEFINE(M_UMAHASH, "UMAHash", "UMA Hash Buckets");
112
113/*
114 * Are we allowed to allocate buckets?
115 */
116static int bucketdisable = 1;
117
118/* Linked list of all kegs in the system */
119static LIST_HEAD(,uma_keg) uma_kegs = LIST_HEAD_INITIALIZER(&uma_kegs);
120
121/* This mutex protects the keg list */
122static struct mtx uma_mtx;
123
124/* Linked list of boot time pages */
125static LIST_HEAD(,uma_slab) uma_boot_pages =
126 LIST_HEAD_INITIALIZER(&uma_boot_pages);
127
128/* Count of free boottime pages */
129static int uma_boot_free = 0;
130
131/* Is the VM done starting up? */
132static int booted = 0;
133
134/* Maximum number of allowed items-per-slab if the slab header is OFFPAGE */
135static u_int uma_max_ipers;
136static u_int uma_max_ipers_ref;
137
138/*
139 * This is the handle used to schedule events that need to happen
140 * outside of the allocation fast path.
141 */
142static struct callout uma_callout;
143#define UMA_TIMEOUT 20 /* Seconds for callout interval. */
144
145/*
146 * This structure is passed as the zone ctor arg so that I don't have to create
147 * a special allocation function just for zones.
148 */
149struct uma_zctor_args {
150 char *name;
151 size_t size;
152 uma_ctor ctor;
153 uma_dtor dtor;
154 uma_init uminit;
155 uma_fini fini;
156 uma_keg_t keg;
157 int align;
158 u_int32_t flags;
159};
160
161struct uma_kctor_args {
162 uma_zone_t zone;
163 size_t size;
164 uma_init uminit;
165 uma_fini fini;
166 int align;
167 u_int32_t flags;
168};
169
170struct uma_bucket_zone {
171 uma_zone_t ubz_zone;
172 char *ubz_name;
173 int ubz_entries;
174};
175
176#define BUCKET_MAX 128
177
178struct uma_bucket_zone bucket_zones[] = {
179 { NULL, "16 Bucket", 16 },
180 { NULL, "32 Bucket", 32 },
181 { NULL, "64 Bucket", 64 },
182 { NULL, "128 Bucket", 128 },
183 { NULL, NULL, 0}
184};
185
186#define BUCKET_SHIFT 4
187#define BUCKET_ZONES ((BUCKET_MAX >> BUCKET_SHIFT) + 1)
188
189/*
190 * bucket_size[] maps requested bucket sizes to zones that allocate a bucket
191 * of approximately the right size.
192 */
193static uint8_t bucket_size[BUCKET_ZONES];
194
195/*
196 * Flags and enumerations to be passed to internal functions.
197 */
198enum zfreeskip { SKIP_NONE, SKIP_DTOR, SKIP_FINI };
199
200#define ZFREE_STATFAIL 0x00000001 /* Update zone failure statistic. */
201
202/* Prototypes.. */
203
204static void *obj_alloc(uma_zone_t, int, u_int8_t *, int);
205static void *page_alloc(uma_zone_t, int, u_int8_t *, int);
206static void *startup_alloc(uma_zone_t, int, u_int8_t *, int);
207static void page_free(void *, int, u_int8_t);
208static uma_slab_t slab_zalloc(uma_zone_t, int);
209static void cache_drain(uma_zone_t);
210static void bucket_drain(uma_zone_t, uma_bucket_t);
211static void bucket_cache_drain(uma_zone_t zone);
212static int keg_ctor(void *, int, void *, int);
213static void keg_dtor(void *, int, void *);
214static int zone_ctor(void *, int, void *, int);
215static void zone_dtor(void *, int, void *);
216static int zero_init(void *, int, int);
217static void zone_small_init(uma_zone_t zone);
218static void zone_large_init(uma_zone_t zone);
219static void zone_foreach(void (*zfunc)(uma_zone_t));
220static void zone_timeout(uma_zone_t zone);
221static int hash_alloc(struct uma_hash *);
222static int hash_expand(struct uma_hash *, struct uma_hash *);
223static void hash_free(struct uma_hash *hash);
224static void uma_timeout(void *);
225static void uma_startup3(void);
226static void *uma_zalloc_internal(uma_zone_t, void *, int);
227static void uma_zfree_internal(uma_zone_t, void *, void *, enum zfreeskip,
228 int);
229static void bucket_enable(void);
230static void bucket_init(void);
231static uma_bucket_t bucket_alloc(int, int);
232static void bucket_free(uma_bucket_t);
233static void bucket_zone_drain(void);
234static int uma_zalloc_bucket(uma_zone_t zone, int flags);
235static uma_slab_t uma_zone_slab(uma_zone_t zone, int flags);
236static void *uma_slab_alloc(uma_zone_t zone, uma_slab_t slab);
237static void zone_drain(uma_zone_t);
238static uma_zone_t uma_kcreate(uma_zone_t zone, size_t size, uma_init uminit,
239 uma_fini fini, int align, u_int32_t flags);
240
241void uma_print_zone(uma_zone_t);
242void uma_print_stats(void);
243static int sysctl_vm_zone(SYSCTL_HANDLER_ARGS);
244static int sysctl_vm_zone_count(SYSCTL_HANDLER_ARGS);
245static int sysctl_vm_zone_stats(SYSCTL_HANDLER_ARGS);
246
247#ifdef WITNESS
248static int nosleepwithlocks = 1;
249SYSCTL_INT(_debug, OID_AUTO, nosleepwithlocks, CTLFLAG_RW, &nosleepwithlocks,
250 0, "Convert M_WAITOK to M_NOWAIT to avoid lock-held-across-sleep paths");
251#else
252static int nosleepwithlocks = 0;
253SYSCTL_INT(_debug, OID_AUTO, nosleepwithlocks, CTLFLAG_RW, &nosleepwithlocks,
254 0, "Convert M_WAITOK to M_NOWAIT to avoid lock-held-across-sleep paths");
255#endif
256SYSCTL_OID(_vm, OID_AUTO, zone, CTLTYPE_STRING|CTLFLAG_RD,
257 NULL, 0, sysctl_vm_zone, "A", "Zone Info");
258SYSINIT(uma_startup3, SI_SUB_VM_CONF, SI_ORDER_SECOND, uma_startup3, NULL);
259
260SYSCTL_PROC(_vm, OID_AUTO, zone_count, CTLFLAG_RD|CTLTYPE_INT,
261 0, 0, sysctl_vm_zone_count, "I", "Number of UMA zones");
262
263SYSCTL_PROC(_vm, OID_AUTO, zone_stats, CTLFLAG_RD|CTLTYPE_STRUCT,
264 0, 0, sysctl_vm_zone_stats, "s,struct uma_type_header", "Zone Stats");
265
266/*
267 * This routine checks to see whether or not it's safe to enable buckets.
268 */
269
270static void
271bucket_enable(void)
272{
273 if (cnt.v_free_count < cnt.v_free_min)
274 bucketdisable = 1;
275 else
276 bucketdisable = 0;
277}
278
279/*
280 * Initialize bucket_zones, the array of zones of buckets of various sizes.
281 *
282 * For each zone, calculate the memory required for each bucket, consisting
283 * of the header and an array of pointers. Initialize bucket_size[] to point
284 * the range of appropriate bucket sizes at the zone.
285 */
286static void
287bucket_init(void)
288{
289 struct uma_bucket_zone *ubz;
290 int i;
291 int j;
292
293 for (i = 0, j = 0; bucket_zones[j].ubz_entries != 0; j++) {
294 int size;
295
296 ubz = &bucket_zones[j];
297 size = roundup(sizeof(struct uma_bucket), sizeof(void *));
298 size += sizeof(void *) * ubz->ubz_entries;
299 ubz->ubz_zone = uma_zcreate(ubz->ubz_name, size,
300 NULL, NULL, NULL, NULL, UMA_ALIGN_PTR, UMA_ZFLAG_INTERNAL);
301 for (; i <= ubz->ubz_entries; i += (1 << BUCKET_SHIFT))
302 bucket_size[i >> BUCKET_SHIFT] = j;
303 }
304}
305
306/*
307 * Given a desired number of entries for a bucket, return the zone from which
308 * to allocate the bucket.
309 */
310static struct uma_bucket_zone *
311bucket_zone_lookup(int entries)
312{
313 int idx;
314
315 idx = howmany(entries, 1 << BUCKET_SHIFT);
316 return (&bucket_zones[bucket_size[idx]]);
317}
318
319static uma_bucket_t
320bucket_alloc(int entries, int bflags)
321{
322 struct uma_bucket_zone *ubz;
323 uma_bucket_t bucket;
324
325 /*
326 * This is to stop us from allocating per cpu buckets while we're
327 * running out of UMA_BOOT_PAGES. Otherwise, we would exhaust the
328 * boot pages. This also prevents us from allocating buckets in
329 * low memory situations.
330 */
331 if (bucketdisable)
332 return (NULL);
333
334 ubz = bucket_zone_lookup(entries);
335 bucket = uma_zalloc_internal(ubz->ubz_zone, NULL, bflags);
336 if (bucket) {
337#ifdef INVARIANTS
338 bzero(bucket->ub_bucket, sizeof(void *) * ubz->ubz_entries);
339#endif
340 bucket->ub_cnt = 0;
341 bucket->ub_entries = ubz->ubz_entries;
342 }
343
344 return (bucket);
345}
346
347static void
348bucket_free(uma_bucket_t bucket)
349{
350 struct uma_bucket_zone *ubz;
351
352 ubz = bucket_zone_lookup(bucket->ub_entries);
353 uma_zfree_internal(ubz->ubz_zone, bucket, NULL, SKIP_NONE, 0);
354}
355
356static void
357bucket_zone_drain(void)
358{
359 struct uma_bucket_zone *ubz;
360
361 for (ubz = &bucket_zones[0]; ubz->ubz_entries != 0; ubz++)
362 zone_drain(ubz->ubz_zone);
363}
364
365
366/*
367 * Routine called by timeout which is used to fire off some time interval
368 * based calculations. (stats, hash size, etc.)
369 *
370 * Arguments:
371 * arg Unused
372 *
373 * Returns:
374 * Nothing
375 */
376static void
377uma_timeout(void *unused)
378{
379 bucket_enable();
380 zone_foreach(zone_timeout);
381
382 /* Reschedule this event */
383 callout_reset(&uma_callout, UMA_TIMEOUT * hz, uma_timeout, NULL);
384}
385
386/*
387 * Routine to perform timeout driven calculations. This expands the
388 * hashes and does per cpu statistics aggregation.
389 *
390 * Arguments:
391 * zone The zone to operate on
392 *
393 * Returns:
394 * Nothing
395 */
396static void
397zone_timeout(uma_zone_t zone)
398{
399 uma_keg_t keg;
400 u_int64_t alloc;
401
402 keg = zone->uz_keg;
403 alloc = 0;
404
405 /*
406 * Expand the zone hash table.
407 *
408 * This is done if the number of slabs is larger than the hash size.
409 * What I'm trying to do here is completely reduce collisions. This
410 * may be a little aggressive. Should I allow for two collisions max?
411 */
412 ZONE_LOCK(zone);
413 if (keg->uk_flags & UMA_ZONE_HASH &&
414 keg->uk_pages / keg->uk_ppera >= keg->uk_hash.uh_hashsize) {
415 struct uma_hash newhash;
416 struct uma_hash oldhash;
417 int ret;
418
419 /*
420 * This is so involved because allocating and freeing
421 * while the zone lock is held will lead to deadlock.
422 * I have to do everything in stages and check for
423 * races.
424 */
425 newhash = keg->uk_hash;
426 ZONE_UNLOCK(zone);
427 ret = hash_alloc(&newhash);
428 ZONE_LOCK(zone);
429 if (ret) {
430 if (hash_expand(&keg->uk_hash, &newhash)) {
431 oldhash = keg->uk_hash;
432 keg->uk_hash = newhash;
433 } else
434 oldhash = newhash;
435
436 ZONE_UNLOCK(zone);
437 hash_free(&oldhash);
438 ZONE_LOCK(zone);
439 }
440 }
441 ZONE_UNLOCK(zone);
442}
443
444/*
445 * Allocate and zero fill the next sized hash table from the appropriate
446 * backing store.
447 *
448 * Arguments:
449 * hash A new hash structure with the old hash size in uh_hashsize
450 *
451 * Returns:
452 * 1 on sucess and 0 on failure.
453 */
454static int
455hash_alloc(struct uma_hash *hash)
456{
457 int oldsize;
458 int alloc;
459
460 oldsize = hash->uh_hashsize;
461
462 /* We're just going to go to a power of two greater */
463 if (oldsize) {
464 hash->uh_hashsize = oldsize * 2;
465 alloc = sizeof(hash->uh_slab_hash[0]) * hash->uh_hashsize;
466 hash->uh_slab_hash = (struct slabhead *)malloc(alloc,
467 M_UMAHASH, M_NOWAIT);
468 } else {
469 alloc = sizeof(hash->uh_slab_hash[0]) * UMA_HASH_SIZE_INIT;
470 hash->uh_slab_hash = uma_zalloc_internal(hashzone, NULL,
471 M_WAITOK);
472 hash->uh_hashsize = UMA_HASH_SIZE_INIT;
473 }
474 if (hash->uh_slab_hash) {
475 bzero(hash->uh_slab_hash, alloc);
476 hash->uh_hashmask = hash->uh_hashsize - 1;
477 return (1);
478 }
479
480 return (0);
481}
482
483/*
484 * Expands the hash table for HASH zones. This is done from zone_timeout
485 * to reduce collisions. This must not be done in the regular allocation
486 * path, otherwise, we can recurse on the vm while allocating pages.
487 *
488 * Arguments:
489 * oldhash The hash you want to expand
490 * newhash The hash structure for the new table
491 *
492 * Returns:
493 * Nothing
494 *
495 * Discussion:
496 */
497static int
498hash_expand(struct uma_hash *oldhash, struct uma_hash *newhash)
499{
500 uma_slab_t slab;
501 int hval;
502 int i;
503
504 if (!newhash->uh_slab_hash)
505 return (0);
506
507 if (oldhash->uh_hashsize >= newhash->uh_hashsize)
508 return (0);
509
510 /*
511 * I need to investigate hash algorithms for resizing without a
512 * full rehash.
513 */
514
515 for (i = 0; i < oldhash->uh_hashsize; i++)
516 while (!SLIST_EMPTY(&oldhash->uh_slab_hash[i])) {
517 slab = SLIST_FIRST(&oldhash->uh_slab_hash[i]);
518 SLIST_REMOVE_HEAD(&oldhash->uh_slab_hash[i], us_hlink);
519 hval = UMA_HASH(newhash, slab->us_data);
520 SLIST_INSERT_HEAD(&newhash->uh_slab_hash[hval],
521 slab, us_hlink);
522 }
523
524 return (1);
525}
526
527/*
528 * Free the hash bucket to the appropriate backing store.
529 *
530 * Arguments:
531 * slab_hash The hash bucket we're freeing
532 * hashsize The number of entries in that hash bucket
533 *
534 * Returns:
535 * Nothing
536 */
537static void
538hash_free(struct uma_hash *hash)
539{
540 if (hash->uh_slab_hash == NULL)
541 return;
542 if (hash->uh_hashsize == UMA_HASH_SIZE_INIT)
543 uma_zfree_internal(hashzone,
544 hash->uh_slab_hash, NULL, SKIP_NONE, 0);
545 else
546 free(hash->uh_slab_hash, M_UMAHASH);
547}
548
549/*
550 * Frees all outstanding items in a bucket
551 *
552 * Arguments:
553 * zone The zone to free to, must be unlocked.
554 * bucket The free/alloc bucket with items, cpu queue must be locked.
555 *
556 * Returns:
557 * Nothing
558 */
559
560static void
561bucket_drain(uma_zone_t zone, uma_bucket_t bucket)
562{
563 uma_slab_t slab;
564 int mzone;
565 void *item;
566
567 if (bucket == NULL)
568 return;
569
570 slab = NULL;
571 mzone = 0;
572
573 /* We have to lookup the slab again for malloc.. */
574 if (zone->uz_keg->uk_flags & UMA_ZONE_MALLOC)
575 mzone = 1;
576
577 while (bucket->ub_cnt > 0) {
578 bucket->ub_cnt--;
579 item = bucket->ub_bucket[bucket->ub_cnt];
580#ifdef INVARIANTS
581 bucket->ub_bucket[bucket->ub_cnt] = NULL;
582 KASSERT(item != NULL,
583 ("bucket_drain: botched ptr, item is NULL"));
584#endif
585 /*
586 * This is extremely inefficient. The slab pointer was passed
587 * to uma_zfree_arg, but we lost it because the buckets don't
588 * hold them. This will go away when free() gets a size passed
589 * to it.
590 */
591 if (mzone)
592 slab = vtoslab((vm_offset_t)item & (~UMA_SLAB_MASK));
593 uma_zfree_internal(zone, item, slab, SKIP_DTOR, 0);
594 }
595}
596
597/*
598 * Drains the per cpu caches for a zone.
599 *
600 * NOTE: This may only be called while the zone is being turn down, and not
601 * during normal operation. This is necessary in order that we do not have
602 * to migrate CPUs to drain the per-CPU caches.
603 *
604 * Arguments:
605 * zone The zone to drain, must be unlocked.
606 *
607 * Returns:
608 * Nothing
609 */
610static void
611cache_drain(uma_zone_t zone)
612{
613 uma_cache_t cache;
614 int cpu;
615
616 /*
617 * XXX: It is safe to not lock the per-CPU caches, because we're
618 * tearing down the zone anyway. I.e., there will be no further use
619 * of the caches at this point.
620 *
621 * XXX: It would good to be able to assert that the zone is being
622 * torn down to prevent improper use of cache_drain().
623 *
624 * XXX: We lock the zone before passing into bucket_cache_drain() as
625 * it is used elsewhere. Should the tear-down path be made special
626 * there in some form?
627 */
628 for (cpu = 0; cpu <= mp_maxid; cpu++) {
629 if (CPU_ABSENT(cpu))
630 continue;
631 cache = &zone->uz_cpu[cpu];
632 bucket_drain(zone, cache->uc_allocbucket);
633 bucket_drain(zone, cache->uc_freebucket);
634 if (cache->uc_allocbucket != NULL)
635 bucket_free(cache->uc_allocbucket);
636 if (cache->uc_freebucket != NULL)
637 bucket_free(cache->uc_freebucket);
638 cache->uc_allocbucket = cache->uc_freebucket = NULL;
639 }
640 ZONE_LOCK(zone);
641 bucket_cache_drain(zone);
642 ZONE_UNLOCK(zone);
643}
644
645/*
646 * Drain the cached buckets from a zone. Expects a locked zone on entry.
647 */
648static void
649bucket_cache_drain(uma_zone_t zone)
650{
651 uma_bucket_t bucket;
652
653 /*
654 * Drain the bucket queues and free the buckets, we just keep two per
655 * cpu (alloc/free).
656 */
657 while ((bucket = LIST_FIRST(&zone->uz_full_bucket)) != NULL) {
658 LIST_REMOVE(bucket, ub_link);
659 ZONE_UNLOCK(zone);
660 bucket_drain(zone, bucket);
661 bucket_free(bucket);
662 ZONE_LOCK(zone);
663 }
664
665 /* Now we do the free queue.. */
666 while ((bucket = LIST_FIRST(&zone->uz_free_bucket)) != NULL) {
667 LIST_REMOVE(bucket, ub_link);
668 bucket_free(bucket);
669 }
670}
671
672/*
673 * Frees pages from a zone back to the system. This is done on demand from
674 * the pageout daemon.
675 *
676 * Arguments:
677 * zone The zone to free pages from
678 * all Should we drain all items?
679 *
680 * Returns:
681 * Nothing.
682 */
683static void
684zone_drain(uma_zone_t zone)
685{
686 struct slabhead freeslabs = { 0 };
687 uma_keg_t keg;
688 uma_slab_t slab;
689 uma_slab_t n;
690 u_int8_t flags;
691 u_int8_t *mem;
692 int i;
693
694 keg = zone->uz_keg;
695
696 /*
697 * We don't want to take pages from statically allocated zones at this
698 * time
699 */
700 if (keg->uk_flags & UMA_ZONE_NOFREE || keg->uk_freef == NULL)
701 return;
702
703 ZONE_LOCK(zone);
704
705#ifdef UMA_DEBUG
706 printf("%s free items: %u\n", zone->uz_name, keg->uk_free);
707#endif
708 bucket_cache_drain(zone);
709 if (keg->uk_free == 0)
710 goto finished;
711
712 slab = LIST_FIRST(&keg->uk_free_slab);
713 while (slab) {
714 n = LIST_NEXT(slab, us_link);
715
716 /* We have no where to free these to */
717 if (slab->us_flags & UMA_SLAB_BOOT) {
718 slab = n;
719 continue;
720 }
721
722 LIST_REMOVE(slab, us_link);
723 keg->uk_pages -= keg->uk_ppera;
724 keg->uk_free -= keg->uk_ipers;
725
726 if (keg->uk_flags & UMA_ZONE_HASH)
727 UMA_HASH_REMOVE(&keg->uk_hash, slab, slab->us_data);
728
729 SLIST_INSERT_HEAD(&freeslabs, slab, us_hlink);
730
731 slab = n;
732 }
733finished:
734 ZONE_UNLOCK(zone);
735
736 while ((slab = SLIST_FIRST(&freeslabs)) != NULL) {
737 SLIST_REMOVE(&freeslabs, slab, uma_slab, us_hlink);
738 if (keg->uk_fini)
739 for (i = 0; i < keg->uk_ipers; i++)
740 keg->uk_fini(
741 slab->us_data + (keg->uk_rsize * i),
742 keg->uk_size);
743 flags = slab->us_flags;
744 mem = slab->us_data;
745
746 if ((keg->uk_flags & UMA_ZONE_MALLOC) ||
747 (keg->uk_flags & UMA_ZONE_REFCNT)) {
748 vm_object_t obj;
749
750 if (flags & UMA_SLAB_KMEM)
751 obj = kmem_object;
752 else
753 obj = NULL;
754 for (i = 0; i < keg->uk_ppera; i++)
755 vsetobj((vm_offset_t)mem + (i * PAGE_SIZE),
756 obj);
757 }
758 if (keg->uk_flags & UMA_ZONE_OFFPAGE)
759 uma_zfree_internal(keg->uk_slabzone, slab, NULL,
760 SKIP_NONE, 0);
761#ifdef UMA_DEBUG
762 printf("%s: Returning %d bytes.\n",
763 zone->uz_name, UMA_SLAB_SIZE * keg->uk_ppera);
764#endif
765 keg->uk_freef(mem, UMA_SLAB_SIZE * keg->uk_ppera, flags);
766 }
767}
768
769/*
770 * Allocate a new slab for a zone. This does not insert the slab onto a list.
771 *
772 * Arguments:
773 * zone The zone to allocate slabs for
774 * wait Shall we wait?
775 *
776 * Returns:
777 * The slab that was allocated or NULL if there is no memory and the
778 * caller specified M_NOWAIT.
779 */
780static uma_slab_t
781slab_zalloc(uma_zone_t zone, int wait)
782{
783 uma_slabrefcnt_t slabref;
784 uma_slab_t slab;
785 uma_keg_t keg;
786 u_int8_t *mem;
787 u_int8_t flags;
788 int i;
789
790 slab = NULL;
791 keg = zone->uz_keg;
792
793#ifdef UMA_DEBUG
794 printf("slab_zalloc: Allocating a new slab for %s\n", zone->uz_name);
795#endif
796 ZONE_UNLOCK(zone);
797
798 if (keg->uk_flags & UMA_ZONE_OFFPAGE) {
799 slab = uma_zalloc_internal(keg->uk_slabzone, NULL, wait);
800 if (slab == NULL) {
801 ZONE_LOCK(zone);
802 return NULL;
803 }
804 }
805
806 /*
807 * This reproduces the old vm_zone behavior of zero filling pages the
808 * first time they are added to a zone.
809 *
810 * Malloced items are zeroed in uma_zalloc.
811 */
812
813 if ((keg->uk_flags & UMA_ZONE_MALLOC) == 0)
814 wait |= M_ZERO;
815 else
816 wait &= ~M_ZERO;
817
818 mem = keg->uk_allocf(zone, keg->uk_ppera * UMA_SLAB_SIZE,
819 &flags, wait);
820 if (mem == NULL) {
821 if (keg->uk_flags & UMA_ZONE_OFFPAGE)
822 uma_zfree_internal(keg->uk_slabzone, slab, NULL,
823 SKIP_NONE, 0);
824 ZONE_LOCK(zone);
825 return (NULL);
826 }
827
828 /* Point the slab into the allocated memory */
829 if (!(keg->uk_flags & UMA_ZONE_OFFPAGE))
830 slab = (uma_slab_t )(mem + keg->uk_pgoff);
831
832 if ((keg->uk_flags & UMA_ZONE_MALLOC) ||
833 (keg->uk_flags & UMA_ZONE_REFCNT))
834 for (i = 0; i < keg->uk_ppera; i++)
835 vsetslab((vm_offset_t)mem + (i * PAGE_SIZE), slab);
836
837 slab->us_keg = keg;
838 slab->us_data = mem;
839 slab->us_freecount = keg->uk_ipers;
840 slab->us_firstfree = 0;
841 slab->us_flags = flags;
842
843 if (keg->uk_flags & UMA_ZONE_REFCNT) {
844 slabref = (uma_slabrefcnt_t)slab;
845 for (i = 0; i < keg->uk_ipers; i++) {
846 slabref->us_freelist[i].us_refcnt = 0;
847 slabref->us_freelist[i].us_item = i+1;
848 }
849 } else {
850 for (i = 0; i < keg->uk_ipers; i++)
851 slab->us_freelist[i].us_item = i+1;
852 }
853
854 if (keg->uk_init != NULL) {
855 for (i = 0; i < keg->uk_ipers; i++)
856 if (keg->uk_init(slab->us_data + (keg->uk_rsize * i),
857 keg->uk_size, wait) != 0)
858 break;
859 if (i != keg->uk_ipers) {
860 if (keg->uk_fini != NULL) {
861 for (i--; i > -1; i--)
862 keg->uk_fini(slab->us_data +
863 (keg->uk_rsize * i),
864 keg->uk_size);
865 }
866 if ((keg->uk_flags & UMA_ZONE_MALLOC) ||
867 (keg->uk_flags & UMA_ZONE_REFCNT)) {
868 vm_object_t obj;
869
870 if (flags & UMA_SLAB_KMEM)
871 obj = kmem_object;
872 else
873 obj = NULL;
874 for (i = 0; i < keg->uk_ppera; i++)
875 vsetobj((vm_offset_t)mem +
876 (i * PAGE_SIZE), obj);
877 }
878 if (keg->uk_flags & UMA_ZONE_OFFPAGE)
879 uma_zfree_internal(keg->uk_slabzone, slab,
880 NULL, SKIP_NONE, 0);
881 keg->uk_freef(mem, UMA_SLAB_SIZE * keg->uk_ppera,
882 flags);
883 ZONE_LOCK(zone);
884 return (NULL);
885 }
886 }
887 ZONE_LOCK(zone);
888
889 if (keg->uk_flags & UMA_ZONE_HASH)
890 UMA_HASH_INSERT(&keg->uk_hash, slab, mem);
891
892 keg->uk_pages += keg->uk_ppera;
893 keg->uk_free += keg->uk_ipers;
894
895 return (slab);
896}
897
898/*
899 * This function is intended to be used early on in place of page_alloc() so
900 * that we may use the boot time page cache to satisfy allocations before
901 * the VM is ready.
902 */
903static void *
904startup_alloc(uma_zone_t zone, int bytes, u_int8_t *pflag, int wait)
905{
906 uma_keg_t keg;
907
908 keg = zone->uz_keg;
909
910 /*
911 * Check our small startup cache to see if it has pages remaining.
912 */
913 mtx_lock(&uma_mtx);
914 if (uma_boot_free != 0) {
915 uma_slab_t tmps;
916
917 tmps = LIST_FIRST(&uma_boot_pages);
918 LIST_REMOVE(tmps, us_link);
919 uma_boot_free--;
920 mtx_unlock(&uma_mtx);
921 *pflag = tmps->us_flags;
922 return (tmps->us_data);
923 }
924 mtx_unlock(&uma_mtx);
925 if (booted == 0)
926 panic("UMA: Increase UMA_BOOT_PAGES");
927 /*
928 * Now that we've booted reset these users to their real allocator.
929 */
930#ifdef UMA_MD_SMALL_ALLOC
931 keg->uk_allocf = uma_small_alloc;
932#else
933 keg->uk_allocf = page_alloc;
934#endif
935 return keg->uk_allocf(zone, bytes, pflag, wait);
936}
937
938/*
939 * Allocates a number of pages from the system
940 *
941 * Arguments:
942 * zone Unused
943 * bytes The number of bytes requested
944 * wait Shall we wait?
945 *
946 * Returns:
947 * A pointer to the alloced memory or possibly
948 * NULL if M_NOWAIT is set.
949 */
950static void *
951page_alloc(uma_zone_t zone, int bytes, u_int8_t *pflag, int wait)
952{
953 void *p; /* Returned page */
954
955 *pflag = UMA_SLAB_KMEM;
956 p = (void *) kmem_malloc(kmem_map, bytes, wait);
957
958 return (p);
959}
960
961/*
962 * Allocates a number of pages from within an object
963 *
964 * Arguments:
965 * zone Unused
966 * bytes The number of bytes requested
967 * wait Shall we wait?
968 *
969 * Returns:
970 * A pointer to the alloced memory or possibly
971 * NULL if M_NOWAIT is set.
972 */
973static void *
974obj_alloc(uma_zone_t zone, int bytes, u_int8_t *flags, int wait)
975{
976 vm_object_t object;
977 vm_offset_t retkva, zkva;
978 vm_page_t p;
979 int pages, startpages;
980
981 object = zone->uz_keg->uk_obj;
982 retkva = 0;
983
984 /*
985 * This looks a little weird since we're getting one page at a time.
986 */
987 VM_OBJECT_LOCK(object);
988 p = TAILQ_LAST(&object->memq, pglist);
989 pages = p != NULL ? p->pindex + 1 : 0;
990 startpages = pages;
991 zkva = zone->uz_keg->uk_kva + pages * PAGE_SIZE;
992 for (; bytes > 0; bytes -= PAGE_SIZE) {
993 p = vm_page_alloc(object, pages,
994 VM_ALLOC_INTERRUPT | VM_ALLOC_WIRED);
995 if (p == NULL) {
996 if (pages != startpages)
997 pmap_qremove(retkva, pages - startpages);
998 while (pages != startpages) {
999 pages--;
1000 p = TAILQ_LAST(&object->memq, pglist);
1001 vm_page_lock_queues();
1002 vm_page_unwire(p, 0);
1003 vm_page_free(p);
1004 vm_page_unlock_queues();
1005 }
1006 retkva = 0;
1007 goto done;
1008 }
1009 pmap_qenter(zkva, &p, 1);
1010 if (retkva == 0)
1011 retkva = zkva;
1012 zkva += PAGE_SIZE;
1013 pages += 1;
1014 }
1015done:
1016 VM_OBJECT_UNLOCK(object);
1017 *flags = UMA_SLAB_PRIV;
1018
1019 return ((void *)retkva);
1020}
1021
1022/*
1023 * Frees a number of pages to the system
1024 *
1025 * Arguments:
1026 * mem A pointer to the memory to be freed
1027 * size The size of the memory being freed
1028 * flags The original p->us_flags field
1029 *
1030 * Returns:
1031 * Nothing
1032 */
1033static void
1034page_free(void *mem, int size, u_int8_t flags)
1035{
1036 vm_map_t map;
1037
1038 if (flags & UMA_SLAB_KMEM)
1039 map = kmem_map;
1040 else
1041 panic("UMA: page_free used with invalid flags %d\n", flags);
1042
1043 kmem_free(map, (vm_offset_t)mem, size);
1044}
1045
1046/*
1047 * Zero fill initializer
1048 *
1049 * Arguments/Returns follow uma_init specifications
1050 */
1051static int
1052zero_init(void *mem, int size, int flags)
1053{
1054 bzero(mem, size);
1055 return (0);
1056}
1057
1058/*
1059 * Finish creating a small uma zone. This calculates ipers, and the zone size.
1060 *
1061 * Arguments
1062 * zone The zone we should initialize
1063 *
1064 * Returns
1065 * Nothing
1066 */
1067static void
1068zone_small_init(uma_zone_t zone)
1069{
1070 uma_keg_t keg;
1071 u_int rsize;
1072 u_int memused;
1073 u_int wastedspace;
1074 u_int shsize;
1075
1076 keg = zone->uz_keg;
1077 KASSERT(keg != NULL, ("Keg is null in zone_small_init"));
1078 rsize = keg->uk_size;
1079
1080 if (rsize < UMA_SMALLEST_UNIT)
1081 rsize = UMA_SMALLEST_UNIT;
1082 if (rsize & keg->uk_align)
1083 rsize = (rsize & ~keg->uk_align) + (keg->uk_align + 1);
1084
1085 keg->uk_rsize = rsize;
1086 keg->uk_ppera = 1;
1087
1088 if (keg->uk_flags & UMA_ZONE_REFCNT) {
1089 rsize += UMA_FRITMREF_SZ; /* linkage & refcnt */
1090 shsize = sizeof(struct uma_slab_refcnt);
1091 } else {
1092 rsize += UMA_FRITM_SZ; /* Account for linkage */
1093 shsize = sizeof(struct uma_slab);
1094 }
1095
1096 keg->uk_ipers = (UMA_SLAB_SIZE - shsize) / rsize;
1097 KASSERT(keg->uk_ipers != 0, ("zone_small_init: ipers is 0"));
1098 memused = keg->uk_ipers * rsize + shsize;
1099 wastedspace = UMA_SLAB_SIZE - memused;
1100
1101 /*
1102 * We can't do OFFPAGE if we're internal or if we've been
1103 * asked to not go to the VM for buckets. If we do this we
1104 * may end up going to the VM (kmem_map) for slabs which we
1105 * do not want to do if we're UMA_ZFLAG_CACHEONLY as a
1106 * result of UMA_ZONE_VM, which clearly forbids it.
1107 */
1108 if ((keg->uk_flags & UMA_ZFLAG_INTERNAL) ||
1109 (keg->uk_flags & UMA_ZFLAG_CACHEONLY))
1110 return;
1111
1112 if ((wastedspace >= UMA_MAX_WASTE) &&
1113 (keg->uk_ipers < (UMA_SLAB_SIZE / keg->uk_rsize))) {
1114 keg->uk_ipers = UMA_SLAB_SIZE / keg->uk_rsize;
1115 KASSERT(keg->uk_ipers <= 255,
1116 ("zone_small_init: keg->uk_ipers too high!"));
1117#ifdef UMA_DEBUG
1118 printf("UMA decided we need offpage slab headers for "
1119 "zone: %s, calculated wastedspace = %d, "
1120 "maximum wasted space allowed = %d, "
1121 "calculated ipers = %d, "
1122 "new wasted space = %d\n", zone->uz_name, wastedspace,
1123 UMA_MAX_WASTE, keg->uk_ipers,
1124 UMA_SLAB_SIZE - keg->uk_ipers * keg->uk_rsize);
1125#endif
1126 keg->uk_flags |= UMA_ZONE_OFFPAGE;
1127 if ((keg->uk_flags & UMA_ZONE_MALLOC) == 0)
1128 keg->uk_flags |= UMA_ZONE_HASH;
1129 }
1130}
1131
1132/*
1133 * Finish creating a large (> UMA_SLAB_SIZE) uma zone. Just give in and do
1134 * OFFPAGE for now. When I can allow for more dynamic slab sizes this will be
1135 * more complicated.
1136 *
1137 * Arguments
1138 * zone The zone we should initialize
1139 *
1140 * Returns
1141 * Nothing
1142 */
1143static void
1144zone_large_init(uma_zone_t zone)
1145{
1146 uma_keg_t keg;
1147 int pages;
1148
1149 keg = zone->uz_keg;
1150
1151 KASSERT(keg != NULL, ("Keg is null in zone_large_init"));
1152 KASSERT((keg->uk_flags & UMA_ZFLAG_CACHEONLY) == 0,
1153 ("zone_large_init: Cannot large-init a UMA_ZFLAG_CACHEONLY zone"));
1154
1155 pages = keg->uk_size / UMA_SLAB_SIZE;
1156
1157 /* Account for remainder */
1158 if ((pages * UMA_SLAB_SIZE) < keg->uk_size)
1159 pages++;
1160
1161 keg->uk_ppera = pages;
1162 keg->uk_ipers = 1;
1163
1164 keg->uk_flags |= UMA_ZONE_OFFPAGE;
1165 if ((keg->uk_flags & UMA_ZONE_MALLOC) == 0)
1166 keg->uk_flags |= UMA_ZONE_HASH;
1167
1168 keg->uk_rsize = keg->uk_size;
1169}
1170
1171/*
1172 * Keg header ctor. This initializes all fields, locks, etc. And inserts
1173 * the keg onto the global keg list.
1174 *
1175 * Arguments/Returns follow uma_ctor specifications
1176 * udata Actually uma_kctor_args
1177 */
1178static int
1179keg_ctor(void *mem, int size, void *udata, int flags)
1180{
1181 struct uma_kctor_args *arg = udata;
1182 uma_keg_t keg = mem;
1183 uma_zone_t zone;
1184
1185 bzero(keg, size);
1186 keg->uk_size = arg->size;
1187 keg->uk_init = arg->uminit;
1188 keg->uk_fini = arg->fini;
1189 keg->uk_align = arg->align;
1190 keg->uk_free = 0;
1191 keg->uk_pages = 0;
1192 keg->uk_flags = arg->flags;
1193 keg->uk_allocf = page_alloc;
1194 keg->uk_freef = page_free;
1195 keg->uk_recurse = 0;
1196 keg->uk_slabzone = NULL;
1197
1198 /*
1199 * The master zone is passed to us at keg-creation time.
1200 */
1201 zone = arg->zone;
1202 zone->uz_keg = keg;
1203
1204 if (arg->flags & UMA_ZONE_VM)
1205 keg->uk_flags |= UMA_ZFLAG_CACHEONLY;
1206
1207 if (arg->flags & UMA_ZONE_ZINIT)
1208 keg->uk_init = zero_init;
1209
1210 /*
1211 * The +UMA_FRITM_SZ added to uk_size is to account for the
1212 * linkage that is added to the size in zone_small_init(). If
1213 * we don't account for this here then we may end up in
1214 * zone_small_init() with a calculated 'ipers' of 0.
1215 */
1216 if (keg->uk_flags & UMA_ZONE_REFCNT) {
1217 if ((keg->uk_size+UMA_FRITMREF_SZ) >
1218 (UMA_SLAB_SIZE - sizeof(struct uma_slab_refcnt)))
1219 zone_large_init(zone);
1220 else
1221 zone_small_init(zone);
1222 } else {
1223 if ((keg->uk_size+UMA_FRITM_SZ) >
1224 (UMA_SLAB_SIZE - sizeof(struct uma_slab)))
1225 zone_large_init(zone);
1226 else
1227 zone_small_init(zone);
1228 }
1229
1230 if (keg->uk_flags & UMA_ZONE_OFFPAGE) {
1231 if (keg->uk_flags & UMA_ZONE_REFCNT)
1232 keg->uk_slabzone = slabrefzone;
1233 else
1234 keg->uk_slabzone = slabzone;
1235 }
1236
1237 /*
1238 * If we haven't booted yet we need allocations to go through the
1239 * startup cache until the vm is ready.
1240 */
1241 if (keg->uk_ppera == 1) {
1242#ifdef UMA_MD_SMALL_ALLOC
1243 keg->uk_allocf = uma_small_alloc;
1244 keg->uk_freef = uma_small_free;
1245#endif
1246 if (booted == 0)
1247 keg->uk_allocf = startup_alloc;
1248 }
1249
1250 /*
1251 * Initialize keg's lock (shared among zones) through
1252 * Master zone
1253 */
1254 zone->uz_lock = &keg->uk_lock;
1255 if (arg->flags & UMA_ZONE_MTXCLASS)
1256 ZONE_LOCK_INIT(zone, 1);
1257 else
1258 ZONE_LOCK_INIT(zone, 0);
1259
1260 /*
1261 * If we're putting the slab header in the actual page we need to
1262 * figure out where in each page it goes. This calculates a right
1263 * justified offset into the memory on an ALIGN_PTR boundary.
1264 */
1265 if (!(keg->uk_flags & UMA_ZONE_OFFPAGE)) {
1266 u_int totsize;
1267
1268 /* Size of the slab struct and free list */
1269 if (keg->uk_flags & UMA_ZONE_REFCNT)
1270 totsize = sizeof(struct uma_slab_refcnt) +
1271 keg->uk_ipers * UMA_FRITMREF_SZ;
1272 else
1273 totsize = sizeof(struct uma_slab) +
1274 keg->uk_ipers * UMA_FRITM_SZ;
1275
1276 if (totsize & UMA_ALIGN_PTR)
1277 totsize = (totsize & ~UMA_ALIGN_PTR) +
1278 (UMA_ALIGN_PTR + 1);
1279 keg->uk_pgoff = UMA_SLAB_SIZE - totsize;
1280
1281 if (keg->uk_flags & UMA_ZONE_REFCNT)
1282 totsize = keg->uk_pgoff + sizeof(struct uma_slab_refcnt)
1283 + keg->uk_ipers * UMA_FRITMREF_SZ;
1284 else
1285 totsize = keg->uk_pgoff + sizeof(struct uma_slab)
1286 + keg->uk_ipers * UMA_FRITM_SZ;
1287
1288 /*
1289 * The only way the following is possible is if with our
1290 * UMA_ALIGN_PTR adjustments we are now bigger than
1291 * UMA_SLAB_SIZE. I haven't checked whether this is
1292 * mathematically possible for all cases, so we make
1293 * sure here anyway.
1294 */
1295 if (totsize > UMA_SLAB_SIZE) {
1296 printf("zone %s ipers %d rsize %d size %d\n",
1297 zone->uz_name, keg->uk_ipers, keg->uk_rsize,
1298 keg->uk_size);
1299 panic("UMA slab won't fit.\n");
1300 }
1301 }
1302
1303 if (keg->uk_flags & UMA_ZONE_HASH)
1304 hash_alloc(&keg->uk_hash);
1305
1306#ifdef UMA_DEBUG
1307 printf("%s(%p) size = %d ipers = %d ppera = %d pgoff = %d\n",
1308 zone->uz_name, zone,
1309 keg->uk_size, keg->uk_ipers,
1310 keg->uk_ppera, keg->uk_pgoff);
1311#endif
1312
1313 LIST_INSERT_HEAD(&keg->uk_zones, zone, uz_link);
1314
1315 mtx_lock(&uma_mtx);
1316 LIST_INSERT_HEAD(&uma_kegs, keg, uk_link);
1317 mtx_unlock(&uma_mtx);
1318 return (0);
1319}
1320
1321/*
1322 * Zone header ctor. This initializes all fields, locks, etc.
1323 *
1324 * Arguments/Returns follow uma_ctor specifications
1325 * udata Actually uma_zctor_args
1326 */
1327
1328static int
1329zone_ctor(void *mem, int size, void *udata, int flags)
1330{
1331 struct uma_zctor_args *arg = udata;
1332 uma_zone_t zone = mem;
1333 uma_zone_t z;
1334 uma_keg_t keg;
1335
1336 bzero(zone, size);
1337 zone->uz_name = arg->name;
1338 zone->uz_ctor = arg->ctor;
1339 zone->uz_dtor = arg->dtor;
1340 zone->uz_init = NULL;
1341 zone->uz_fini = NULL;
1342 zone->uz_allocs = 0;
1343 zone->uz_frees = 0;
1344 zone->uz_fails = 0;
1345 zone->uz_fills = zone->uz_count = 0;
1346
1347 if (arg->flags & UMA_ZONE_SECONDARY) {
1348 KASSERT(arg->keg != NULL, ("Secondary zone on zero'd keg"));
1349 keg = arg->keg;
1350 zone->uz_keg = keg;
1351 zone->uz_init = arg->uminit;
1352 zone->uz_fini = arg->fini;
1353 zone->uz_lock = &keg->uk_lock;
1354 mtx_lock(&uma_mtx);
1355 ZONE_LOCK(zone);
1356 keg->uk_flags |= UMA_ZONE_SECONDARY;
1357 LIST_FOREACH(z, &keg->uk_zones, uz_link) {
1358 if (LIST_NEXT(z, uz_link) == NULL) {
1359 LIST_INSERT_AFTER(z, zone, uz_link);
1360 break;
1361 }
1362 }
1363 ZONE_UNLOCK(zone);
1364 mtx_unlock(&uma_mtx);
1365 } else if (arg->keg == NULL) {
1366 if (uma_kcreate(zone, arg->size, arg->uminit, arg->fini,
1367 arg->align, arg->flags) == NULL)
1368 return (ENOMEM);
1369 } else {
1370 struct uma_kctor_args karg;
1371 int error;
1372
1373 /* We should only be here from uma_startup() */
1374 karg.size = arg->size;
1375 karg.uminit = arg->uminit;
1376 karg.fini = arg->fini;
1377 karg.align = arg->align;
1378 karg.flags = arg->flags;
1379 karg.zone = zone;
1380 error = keg_ctor(arg->keg, sizeof(struct uma_keg), &karg,
1381 flags);
1382 if (error)
1383 return (error);
1384 }
1385 keg = zone->uz_keg;
1386 zone->uz_lock = &keg->uk_lock;
1387
1388 /*
1389 * Some internal zones don't have room allocated for the per cpu
1390 * caches. If we're internal, bail out here.
1391 */
1392 if (keg->uk_flags & UMA_ZFLAG_INTERNAL) {
1393 KASSERT((keg->uk_flags & UMA_ZONE_SECONDARY) == 0,
1394 ("Secondary zone requested UMA_ZFLAG_INTERNAL"));
1395 return (0);
1396 }
1397
1398 if (keg->uk_flags & UMA_ZONE_MAXBUCKET)
1399 zone->uz_count = BUCKET_MAX;
1400 else if (keg->uk_ipers <= BUCKET_MAX)
1401 zone->uz_count = keg->uk_ipers;
1402 else
1403 zone->uz_count = BUCKET_MAX;
1404 return (0);
1405}
1406
1407/*
1408 * Keg header dtor. This frees all data, destroys locks, frees the hash
1409 * table and removes the keg from the global list.
1410 *
1411 * Arguments/Returns follow uma_dtor specifications
1412 * udata unused
1413 */
1414static void
1415keg_dtor(void *arg, int size, void *udata)
1416{
1417 uma_keg_t keg;
1418
1419 keg = (uma_keg_t)arg;
1420 mtx_lock(&keg->uk_lock);
1421 if (keg->uk_free != 0) {
1422 printf("Freed UMA keg was not empty (%d items). "
1423 " Lost %d pages of memory.\n",
1424 keg->uk_free, keg->uk_pages);
1425 }
1426 mtx_unlock(&keg->uk_lock);
1427
1428 if (keg->uk_flags & UMA_ZONE_HASH)
1429 hash_free(&keg->uk_hash);
1430
1431 mtx_destroy(&keg->uk_lock);
1432}
1433
1434/*
1435 * Zone header dtor.
1436 *
1437 * Arguments/Returns follow uma_dtor specifications
1438 * udata unused
1439 */
1440static void
1441zone_dtor(void *arg, int size, void *udata)
1442{
1443 uma_zone_t zone;
1444 uma_keg_t keg;
1445
1446 zone = (uma_zone_t)arg;
1447 keg = zone->uz_keg;
1448
1449 if (!(keg->uk_flags & UMA_ZFLAG_INTERNAL))
1450 cache_drain(zone);
1451
1452 mtx_lock(&uma_mtx);
1453 zone_drain(zone);
1454 if (keg->uk_flags & UMA_ZONE_SECONDARY) {
1455 LIST_REMOVE(zone, uz_link);
1456 /*
1457 * XXX there are some races here where
1458 * the zone can be drained but zone lock
1459 * released and then refilled before we
1460 * remove it... we dont care for now
1461 */
1462 ZONE_LOCK(zone);
1463 if (LIST_EMPTY(&keg->uk_zones))
1464 keg->uk_flags &= ~UMA_ZONE_SECONDARY;
1465 ZONE_UNLOCK(zone);
1466 mtx_unlock(&uma_mtx);
1467 } else {
1468 LIST_REMOVE(keg, uk_link);
1469 LIST_REMOVE(zone, uz_link);
1470 mtx_unlock(&uma_mtx);
1471 uma_zfree_internal(kegs, keg, NULL, SKIP_NONE, 0);
1472 }
1473 zone->uz_keg = NULL;
1474}
1475
1476/*
1477 * Traverses every zone in the system and calls a callback
1478 *
1479 * Arguments:
1480 * zfunc A pointer to a function which accepts a zone
1481 * as an argument.
1482 *
1483 * Returns:
1484 * Nothing
1485 */
1486static void
1487zone_foreach(void (*zfunc)(uma_zone_t))
1488{
1489 uma_keg_t keg;
1490 uma_zone_t zone;
1491
1492 mtx_lock(&uma_mtx);
1493 LIST_FOREACH(keg, &uma_kegs, uk_link) {
1494 LIST_FOREACH(zone, &keg->uk_zones, uz_link)
1495 zfunc(zone);
1496 }
1497 mtx_unlock(&uma_mtx);
1498}
1499
1500/* Public functions */
1501/* See uma.h */
1502void
1503uma_startup(void *bootmem)
1504{
1505 struct uma_zctor_args args;
1506 uma_slab_t slab;
1507 u_int slabsize;
1508 u_int objsize, totsize, wsize;
1509 int i;
1510
1511#ifdef UMA_DEBUG
1512 printf("Creating uma keg headers zone and keg.\n");
1513#endif
1514 /*
1515 * The general UMA lock is a recursion-allowed lock because
1516 * there is a code path where, while we're still configured
1517 * to use startup_alloc() for backend page allocations, we
1518 * may end up in uma_reclaim() which calls zone_foreach(zone_drain),
1519 * which grabs uma_mtx, only to later call into startup_alloc()
1520 * because while freeing we needed to allocate a bucket. Since
1521 * startup_alloc() also takes uma_mtx, we need to be able to
1522 * recurse on it.
1523 */
1524 mtx_init(&uma_mtx, "UMA lock", NULL, MTX_DEF | MTX_RECURSE);
1525
1526 /*
1527 * Figure out the maximum number of items-per-slab we'll have if
1528 * we're using the OFFPAGE slab header to track free items, given
1529 * all possible object sizes and the maximum desired wastage
1530 * (UMA_MAX_WASTE).
1531 *
1532 * We iterate until we find an object size for
1533 * which the calculated wastage in zone_small_init() will be
1534 * enough to warrant OFFPAGE. Since wastedspace versus objsize
1535 * is an overall increasing see-saw function, we find the smallest
1536 * objsize such that the wastage is always acceptable for objects
1537 * with that objsize or smaller. Since a smaller objsize always
1538 * generates a larger possible uma_max_ipers, we use this computed
1539 * objsize to calculate the largest ipers possible. Since the
1540 * ipers calculated for OFFPAGE slab headers is always larger than
1541 * the ipers initially calculated in zone_small_init(), we use
1542 * the former's equation (UMA_SLAB_SIZE / keg->uk_rsize) to
1543 * obtain the maximum ipers possible for offpage slab headers.
1544 *
1545 * It should be noted that ipers versus objsize is an inversly
1546 * proportional function which drops off rather quickly so as
1547 * long as our UMA_MAX_WASTE is such that the objsize we calculate
1548 * falls into the portion of the inverse relation AFTER the steep
1549 * falloff, then uma_max_ipers shouldn't be too high (~10 on i386).
1550 *
1551 * Note that we have 8-bits (1 byte) to use as a freelist index
1552 * inside the actual slab header itself and this is enough to
1553 * accomodate us. In the worst case, a UMA_SMALLEST_UNIT sized
1554 * object with offpage slab header would have ipers =
1555 * UMA_SLAB_SIZE / UMA_SMALLEST_UNIT (currently = 256), which is
1556 * 1 greater than what our byte-integer freelist index can
1557 * accomodate, but we know that this situation never occurs as
1558 * for UMA_SMALLEST_UNIT-sized objects, we will never calculate
1559 * that we need to go to offpage slab headers. Or, if we do,
1560 * then we trap that condition below and panic in the INVARIANTS case.
1561 */
1562 wsize = UMA_SLAB_SIZE - sizeof(struct uma_slab) - UMA_MAX_WASTE;
1563 totsize = wsize;
1564 objsize = UMA_SMALLEST_UNIT;
1565 while (totsize >= wsize) {
1566 totsize = (UMA_SLAB_SIZE - sizeof(struct uma_slab)) /
1567 (objsize + UMA_FRITM_SZ);
1568 totsize *= (UMA_FRITM_SZ + objsize);
1569 objsize++;
1570 }
1571 if (objsize > UMA_SMALLEST_UNIT)
1572 objsize--;
1573 uma_max_ipers = UMA_SLAB_SIZE / objsize;
1574
1575 wsize = UMA_SLAB_SIZE - sizeof(struct uma_slab_refcnt) - UMA_MAX_WASTE;
1576 totsize = wsize;
1577 objsize = UMA_SMALLEST_UNIT;
1578 while (totsize >= wsize) {
1579 totsize = (UMA_SLAB_SIZE - sizeof(struct uma_slab_refcnt)) /
1580 (objsize + UMA_FRITMREF_SZ);
1581 totsize *= (UMA_FRITMREF_SZ + objsize);
1582 objsize++;
1583 }
1584 if (objsize > UMA_SMALLEST_UNIT)
1585 objsize--;
1586 uma_max_ipers_ref = UMA_SLAB_SIZE / objsize;
1587
1588 KASSERT((uma_max_ipers_ref <= 255) && (uma_max_ipers <= 255),
1589 ("uma_startup: calculated uma_max_ipers values too large!"));
1590
1591#ifdef UMA_DEBUG
1592 printf("Calculated uma_max_ipers (for OFFPAGE) is %d\n", uma_max_ipers);
1593 printf("Calculated uma_max_ipers_slab (for OFFPAGE) is %d\n",
1594 uma_max_ipers_ref);
1595#endif
1596
1597 /* "manually" create the initial zone */
1598 args.name = "UMA Kegs";
1599 args.size = sizeof(struct uma_keg);
1600 args.ctor = keg_ctor;
1601 args.dtor = keg_dtor;
1602 args.uminit = zero_init;
1603 args.fini = NULL;
1604 args.keg = &masterkeg;
1605 args.align = 32 - 1;
1606 args.flags = UMA_ZFLAG_INTERNAL;
1607 /* The initial zone has no Per cpu queues so it's smaller */
1608 zone_ctor(kegs, sizeof(struct uma_zone), &args, M_WAITOK);
1609
1610#ifdef UMA_DEBUG
1611 printf("Filling boot free list.\n");
1612#endif
1613 for (i = 0; i < UMA_BOOT_PAGES; i++) {
1614 slab = (uma_slab_t)((u_int8_t *)bootmem + (i * UMA_SLAB_SIZE));
1615 slab->us_data = (u_int8_t *)slab;
1616 slab->us_flags = UMA_SLAB_BOOT;
1617 LIST_INSERT_HEAD(&uma_boot_pages, slab, us_link);
1618 uma_boot_free++;
1619 }
1620
1621#ifdef UMA_DEBUG
1622 printf("Creating uma zone headers zone and keg.\n");
1623#endif
1624 args.name = "UMA Zones";
1625 args.size = sizeof(struct uma_zone) +
1626 (sizeof(struct uma_cache) * (mp_maxid + 1));
1627 args.ctor = zone_ctor;
1628 args.dtor = zone_dtor;
1629 args.uminit = zero_init;
1630 args.fini = NULL;
1631 args.keg = NULL;
1632 args.align = 32 - 1;
1633 args.flags = UMA_ZFLAG_INTERNAL;
1634 /* The initial zone has no Per cpu queues so it's smaller */
1635 zone_ctor(zones, sizeof(struct uma_zone), &args, M_WAITOK);
1636
1637#ifdef UMA_DEBUG
1638 printf("Initializing pcpu cache locks.\n");
1639#endif
1640#ifdef UMA_DEBUG
1641 printf("Creating slab and hash zones.\n");
1642#endif
1643
1644 /*
1645 * This is the max number of free list items we'll have with
1646 * offpage slabs.
1647 */
1648 slabsize = uma_max_ipers * UMA_FRITM_SZ;
1649 slabsize += sizeof(struct uma_slab);
1650
1651 /* Now make a zone for slab headers */
1652 slabzone = uma_zcreate("UMA Slabs",
1653 slabsize,
1654 NULL, NULL, NULL, NULL,
1655 UMA_ALIGN_PTR, UMA_ZFLAG_INTERNAL);
1656
1657 /*
1658 * We also create a zone for the bigger slabs with reference
1659 * counts in them, to accomodate UMA_ZONE_REFCNT zones.
1660 */
1661 slabsize = uma_max_ipers_ref * UMA_FRITMREF_SZ;
1662 slabsize += sizeof(struct uma_slab_refcnt);
1663 slabrefzone = uma_zcreate("UMA RCntSlabs",
1664 slabsize,
1665 NULL, NULL, NULL, NULL,
1666 UMA_ALIGN_PTR,
1667 UMA_ZFLAG_INTERNAL);
1668
1669 hashzone = uma_zcreate("UMA Hash",
1670 sizeof(struct slabhead *) * UMA_HASH_SIZE_INIT,
1671 NULL, NULL, NULL, NULL,
1672 UMA_ALIGN_PTR, UMA_ZFLAG_INTERNAL);
1673
1674 bucket_init();
1675
1676#ifdef UMA_MD_SMALL_ALLOC
1677 booted = 1;
1678#endif
1679
1680#ifdef UMA_DEBUG
1681 printf("UMA startup complete.\n");
1682#endif
1683}
1684
1685/* see uma.h */
1686void
1687uma_startup2(void)
1688{
1689 booted = 1;
1690 bucket_enable();
1691#ifdef UMA_DEBUG
1692 printf("UMA startup2 complete.\n");
1693#endif
1694}
1695
1696/*
1697 * Initialize our callout handle
1698 *
1699 */
1700
1701static void
1702uma_startup3(void)
1703{
1704#ifdef UMA_DEBUG
1705 printf("Starting callout.\n");
1706#endif
1707 callout_init(&uma_callout, CALLOUT_MPSAFE);
1708 callout_reset(&uma_callout, UMA_TIMEOUT * hz, uma_timeout, NULL);
1709#ifdef UMA_DEBUG
1710 printf("UMA startup3 complete.\n");
1711#endif
1712}
1713
1714static uma_zone_t
1715uma_kcreate(uma_zone_t zone, size_t size, uma_init uminit, uma_fini fini,
1716 int align, u_int32_t flags)
1717{
1718 struct uma_kctor_args args;
1719
1720 args.size = size;
1721 args.uminit = uminit;
1722 args.fini = fini;
1723 args.align = align;
1724 args.flags = flags;
1725 args.zone = zone;
1726 return (uma_zalloc_internal(kegs, &args, M_WAITOK));
1727}
1728
1729/* See uma.h */
1730uma_zone_t
1731uma_zcreate(char *name, size_t size, uma_ctor ctor, uma_dtor dtor,
1732 uma_init uminit, uma_fini fini, int align, u_int32_t flags)
1733
1734{
1735 struct uma_zctor_args args;
1736
1737 /* This stuff is essential for the zone ctor */
1738 args.name = name;
1739 args.size = size;
1740 args.ctor = ctor;
1741 args.dtor = dtor;
1742 args.uminit = uminit;
1743 args.fini = fini;
1744 args.align = align;
1745 args.flags = flags;
1746 args.keg = NULL;
1747
1748 return (uma_zalloc_internal(zones, &args, M_WAITOK));
1749}
1750
1751/* See uma.h */
1752uma_zone_t
1753uma_zsecond_create(char *name, uma_ctor ctor, uma_dtor dtor,
1754 uma_init zinit, uma_fini zfini, uma_zone_t master)
1755{
1756 struct uma_zctor_args args;
1757
1758 args.name = name;
1759 args.size = master->uz_keg->uk_size;
1760 args.ctor = ctor;
1761 args.dtor = dtor;
1762 args.uminit = zinit;
1763 args.fini = zfini;
1764 args.align = master->uz_keg->uk_align;
1765 args.flags = master->uz_keg->uk_flags | UMA_ZONE_SECONDARY;
1766 args.keg = master->uz_keg;
1767
1768 return (uma_zalloc_internal(zones, &args, M_WAITOK));
1769}
1770
1771/* See uma.h */
1772void
1773uma_zdestroy(uma_zone_t zone)
1774{
1775 uma_zfree_internal(zones, zone, NULL, SKIP_NONE, 0);
1776}
1777
1778/* See uma.h */
1779void *
1780uma_zalloc_arg(uma_zone_t zone, void *udata, int flags)
1781{
1782 void *item;
1783 uma_cache_t cache;
1784 uma_bucket_t bucket;
1785 int cpu;
1786 int badness;
1787
1788 /* This is the fast path allocation */
1789#ifdef UMA_DEBUG_ALLOC_1
1790 printf("Allocating one item from %s(%p)\n", zone->uz_name, zone);
1791#endif
1792 CTR3(KTR_UMA, "uma_zalloc_arg thread %x zone %s flags %d", curthread,
1793 zone->uz_name, flags);
1794
1795 if (!(flags & M_NOWAIT)) {
1796 KASSERT(curthread->td_intr_nesting_level == 0,
1797 ("malloc(M_WAITOK) in interrupt context"));
1798 if (nosleepwithlocks) {
1799#ifdef WITNESS
1800 badness = WITNESS_WARN(WARN_GIANTOK | WARN_SLEEPOK,
1801 NULL,
1802 "malloc(M_WAITOK) of \"%s\", forcing M_NOWAIT",
1803 zone->uz_name);
1804#else
1805 badness = 1;
1806#endif
1807 } else {
1808 badness = 0;
1809#ifdef WITNESS
1810 WITNESS_WARN(WARN_GIANTOK | WARN_SLEEPOK, NULL,
1811 "malloc(M_WAITOK) of \"%s\"", zone->uz_name);
1812#endif
1813 }
1814 if (badness) {
1815 flags &= ~M_WAITOK;
1816 flags |= M_NOWAIT;
1817 }
1818 }
1819
1820 /*
1821 * If possible, allocate from the per-CPU cache. There are two
1822 * requirements for safe access to the per-CPU cache: (1) the thread
1823 * accessing the cache must not be preempted or yield during access,
1824 * and (2) the thread must not migrate CPUs without switching which
1825 * cache it accesses. We rely on a critical section to prevent
1826 * preemption and migration. We release the critical section in
1827 * order to acquire the zone mutex if we are unable to allocate from
1828 * the current cache; when we re-acquire the critical section, we
1829 * must detect and handle migration if it has occurred.
1830 */
1831zalloc_restart:
1832 critical_enter();
1833 cpu = curcpu;
1834 cache = &zone->uz_cpu[cpu];
1835
1836zalloc_start:
1837 bucket = cache->uc_allocbucket;
1838
1839 if (bucket) {
1840 if (bucket->ub_cnt > 0) {
1841 bucket->ub_cnt--;
1842 item = bucket->ub_bucket[bucket->ub_cnt];
1843#ifdef INVARIANTS
1844 bucket->ub_bucket[bucket->ub_cnt] = NULL;
1845#endif
1846 KASSERT(item != NULL,
1847 ("uma_zalloc: Bucket pointer mangled."));
1848 cache->uc_allocs++;
1849 critical_exit();
1850#ifdef INVARIANTS
1851 ZONE_LOCK(zone);
1852 uma_dbg_alloc(zone, NULL, item);
1853 ZONE_UNLOCK(zone);
1854#endif
1855 if (zone->uz_ctor != NULL) {
1856 if (zone->uz_ctor(item, zone->uz_keg->uk_size,
1857 udata, flags) != 0) {
1858 uma_zfree_internal(zone, item, udata,
1859 SKIP_DTOR, ZFREE_STATFAIL);
1860 return (NULL);
1861 }
1862 }
1863 if (flags & M_ZERO)
1864 bzero(item, zone->uz_keg->uk_size);
1865 return (item);
1866 } else if (cache->uc_freebucket) {
1867 /*
1868 * We have run out of items in our allocbucket.
1869 * See if we can switch with our free bucket.
1870 */
1871 if (cache->uc_freebucket->ub_cnt > 0) {
1872#ifdef UMA_DEBUG_ALLOC
1873 printf("uma_zalloc: Swapping empty with"
1874 " alloc.\n");
1875#endif
1876 bucket = cache->uc_freebucket;
1877 cache->uc_freebucket = cache->uc_allocbucket;
1878 cache->uc_allocbucket = bucket;
1879
1880 goto zalloc_start;
1881 }
1882 }
1883 }
1884 /*
1885 * Attempt to retrieve the item from the per-CPU cache has failed, so
1886 * we must go back to the zone. This requires the zone lock, so we
1887 * must drop the critical section, then re-acquire it when we go back
1888 * to the cache. Since the critical section is released, we may be
1889 * preempted or migrate. As such, make sure not to maintain any
1890 * thread-local state specific to the cache from prior to releasing
1891 * the critical section.
1892 */
1893 critical_exit();
1894 ZONE_LOCK(zone);
1895 critical_enter();
1896 cpu = curcpu;
1897 cache = &zone->uz_cpu[cpu];
1898 bucket = cache->uc_allocbucket;
1899 if (bucket != NULL) {
1900 if (bucket->ub_cnt > 0) {
1901 ZONE_UNLOCK(zone);
1902 goto zalloc_start;
1903 }
1904 bucket = cache->uc_freebucket;
1905 if (bucket != NULL && bucket->ub_cnt > 0) {
1906 ZONE_UNLOCK(zone);
1907 goto zalloc_start;
1908 }
1909 }
1910
1911 /* Since we have locked the zone we may as well send back our stats */
1912 zone->uz_allocs += cache->uc_allocs;
1913 cache->uc_allocs = 0;
1914 zone->uz_frees += cache->uc_frees;
1915 cache->uc_frees = 0;
1916
1917 /* Our old one is now a free bucket */
1918 if (cache->uc_allocbucket) {
1919 KASSERT(cache->uc_allocbucket->ub_cnt == 0,
1920 ("uma_zalloc_arg: Freeing a non free bucket."));
1921 LIST_INSERT_HEAD(&zone->uz_free_bucket,
1922 cache->uc_allocbucket, ub_link);
1923 cache->uc_allocbucket = NULL;
1924 }
1925
1926 /* Check the free list for a new alloc bucket */
1927 if ((bucket = LIST_FIRST(&zone->uz_full_bucket)) != NULL) {
1928 KASSERT(bucket->ub_cnt != 0,
1929 ("uma_zalloc_arg: Returning an empty bucket."));
1930
1931 LIST_REMOVE(bucket, ub_link);
1932 cache->uc_allocbucket = bucket;
1933 ZONE_UNLOCK(zone);
1934 goto zalloc_start;
1935 }
1936 /* We are no longer associated with this CPU. */
1937 critical_exit();
1938
1939 /* Bump up our uz_count so we get here less */
1940 if (zone->uz_count < BUCKET_MAX)
1941 zone->uz_count++;
1942
1943 /*
1944 * Now lets just fill a bucket and put it on the free list. If that
1945 * works we'll restart the allocation from the begining.
1946 */
1947 if (uma_zalloc_bucket(zone, flags)) {
1948 ZONE_UNLOCK(zone);
1949 goto zalloc_restart;
1950 }
1951 ZONE_UNLOCK(zone);
1952 /*
1953 * We may not be able to get a bucket so return an actual item.
1954 */
1955#ifdef UMA_DEBUG
1956 printf("uma_zalloc_arg: Bucketzone returned NULL\n");
1957#endif
1958
1959 return (uma_zalloc_internal(zone, udata, flags));
1960}
1961
1962static uma_slab_t
1963uma_zone_slab(uma_zone_t zone, int flags)
1964{
1965 uma_slab_t slab;
1966 uma_keg_t keg;
1967
1968 keg = zone->uz_keg;
1969
1970 /*
1971 * This is to prevent us from recursively trying to allocate
1972 * buckets. The problem is that if an allocation forces us to
1973 * grab a new bucket we will call page_alloc, which will go off
1974 * and cause the vm to allocate vm_map_entries. If we need new
1975 * buckets there too we will recurse in kmem_alloc and bad
1976 * things happen. So instead we return a NULL bucket, and make
1977 * the code that allocates buckets smart enough to deal with it
1978 *
1979 * XXX: While we want this protection for the bucket zones so that
1980 * recursion from the VM is handled (and the calling code that
1981 * allocates buckets knows how to deal with it), we do not want
1982 * to prevent allocation from the slab header zones (slabzone
1983 * and slabrefzone) if uk_recurse is not zero for them. The
1984 * reason is that it could lead to NULL being returned for
1985 * slab header allocations even in the M_WAITOK case, and the
1986 * caller can't handle that.
1987 */
1988 if (keg->uk_flags & UMA_ZFLAG_INTERNAL && keg->uk_recurse != 0)
1989 if ((zone != slabzone) && (zone != slabrefzone))
1990 return (NULL);
1991
1992 slab = NULL;
1993
1994 for (;;) {
1995 /*
1996 * Find a slab with some space. Prefer slabs that are partially
1997 * used over those that are totally full. This helps to reduce
1998 * fragmentation.
1999 */
2000 if (keg->uk_free != 0) {
2001 if (!LIST_EMPTY(&keg->uk_part_slab)) {
2002 slab = LIST_FIRST(&keg->uk_part_slab);
2003 } else {
2004 slab = LIST_FIRST(&keg->uk_free_slab);
2005 LIST_REMOVE(slab, us_link);
2006 LIST_INSERT_HEAD(&keg->uk_part_slab, slab,
2007 us_link);
2008 }
2009 return (slab);
2010 }
2011
2012 /*
2013 * M_NOVM means don't ask at all!
2014 */
2015 if (flags & M_NOVM)
2016 break;
2017
2018 if (keg->uk_maxpages &&
2019 keg->uk_pages >= keg->uk_maxpages) {
2020 keg->uk_flags |= UMA_ZFLAG_FULL;
2021
2022 if (flags & M_NOWAIT)
2023 break;
2024 else
2025 msleep(keg, &keg->uk_lock, PVM,
2026 "zonelimit", 0);
2027 continue;
2028 }
2029 keg->uk_recurse++;
2030 slab = slab_zalloc(zone, flags);
2031 keg->uk_recurse--;
2032
2033 /*
2034 * If we got a slab here it's safe to mark it partially used
2035 * and return. We assume that the caller is going to remove
2036 * at least one item.
2037 */
2038 if (slab) {
2039 LIST_INSERT_HEAD(&keg->uk_part_slab, slab, us_link);
2040 return (slab);
2041 }
2042 /*
2043 * We might not have been able to get a slab but another cpu
2044 * could have while we were unlocked. Check again before we
2045 * fail.
2046 */
2047 if (flags & M_NOWAIT)
2048 flags |= M_NOVM;
2049 }
2050 return (slab);
2051}
2052
2053static void *
2054uma_slab_alloc(uma_zone_t zone, uma_slab_t slab)
2055{
2056 uma_keg_t keg;
2057 uma_slabrefcnt_t slabref;
2058 void *item;
2059 u_int8_t freei;
2060
2061 keg = zone->uz_keg;
2062
2063 freei = slab->us_firstfree;
2064 if (keg->uk_flags & UMA_ZONE_REFCNT) {
2065 slabref = (uma_slabrefcnt_t)slab;
2066 slab->us_firstfree = slabref->us_freelist[freei].us_item;
2067 } else {
2068 slab->us_firstfree = slab->us_freelist[freei].us_item;
2069 }
2070 item = slab->us_data + (keg->uk_rsize * freei);
2071
2072 slab->us_freecount--;
2073 keg->uk_free--;
2074#ifdef INVARIANTS
2075 uma_dbg_alloc(zone, slab, item);
2076#endif
2077 /* Move this slab to the full list */
2078 if (slab->us_freecount == 0) {
2079 LIST_REMOVE(slab, us_link);
2080 LIST_INSERT_HEAD(&keg->uk_full_slab, slab, us_link);
2081 }
2082
2083 return (item);
2084}
2085
2086static int
2087uma_zalloc_bucket(uma_zone_t zone, int flags)
2088{
2089 uma_bucket_t bucket;
2090 uma_slab_t slab;
2091 int16_t saved;
2092 int max, origflags = flags;
2093
2094 /*
2095 * Try this zone's free list first so we don't allocate extra buckets.
2096 */
2097 if ((bucket = LIST_FIRST(&zone->uz_free_bucket)) != NULL) {
2098 KASSERT(bucket->ub_cnt == 0,
2099 ("uma_zalloc_bucket: Bucket on free list is not empty."));
2100 LIST_REMOVE(bucket, ub_link);
2101 } else {
2102 int bflags;
2103
2104 bflags = (flags & ~M_ZERO);
2105 if (zone->uz_keg->uk_flags & UMA_ZFLAG_CACHEONLY)
2106 bflags |= M_NOVM;
2107
2108 ZONE_UNLOCK(zone);
2109 bucket = bucket_alloc(zone->uz_count, bflags);
2110 ZONE_LOCK(zone);
2111 }
2112
2113 if (bucket == NULL)
2114 return (0);
2115
2116#ifdef SMP
2117 /*
2118 * This code is here to limit the number of simultaneous bucket fills
2119 * for any given zone to the number of per cpu caches in this zone. This
2120 * is done so that we don't allocate more memory than we really need.
2121 */
2122 if (zone->uz_fills >= mp_ncpus)
2123 goto done;
2124
2125#endif
2126 zone->uz_fills++;
2127
2128 max = MIN(bucket->ub_entries, zone->uz_count);
2129 /* Try to keep the buckets totally full */
2130 saved = bucket->ub_cnt;
2131 while (bucket->ub_cnt < max &&
2132 (slab = uma_zone_slab(zone, flags)) != NULL) {
2133 while (slab->us_freecount && bucket->ub_cnt < max) {
2134 bucket->ub_bucket[bucket->ub_cnt++] =
2135 uma_slab_alloc(zone, slab);
2136 }
2137
2138 /* Don't block on the next fill */
2139 flags |= M_NOWAIT;
2140 }
2141
2142 /*
2143 * We unlock here because we need to call the zone's init.
2144 * It should be safe to unlock because the slab dealt with
2145 * above is already on the appropriate list within the keg
2146 * and the bucket we filled is not yet on any list, so we
2147 * own it.
2148 */
2149 if (zone->uz_init != NULL) {
2150 int i;
2151
2152 ZONE_UNLOCK(zone);
2153 for (i = saved; i < bucket->ub_cnt; i++)
2154 if (zone->uz_init(bucket->ub_bucket[i],
2155 zone->uz_keg->uk_size, origflags) != 0)
2156 break;
2157 /*
2158 * If we couldn't initialize the whole bucket, put the
2159 * rest back onto the freelist.
2160 */
2161 if (i != bucket->ub_cnt) {
2162 int j;
2163
2164 for (j = i; j < bucket->ub_cnt; j++) {
2165 uma_zfree_internal(zone, bucket->ub_bucket[j],
2166 NULL, SKIP_FINI, 0);
2167#ifdef INVARIANTS
2168 bucket->ub_bucket[j] = NULL;
2169#endif
2170 }
2171 bucket->ub_cnt = i;
2172 }
2173 ZONE_LOCK(zone);
2174 }
2175
2176 zone->uz_fills--;
2177 if (bucket->ub_cnt != 0) {
2178 LIST_INSERT_HEAD(&zone->uz_full_bucket,
2179 bucket, ub_link);
2180 return (1);
2181 }
2182#ifdef SMP
2183done:
2184#endif
2185 bucket_free(bucket);
2186
2187 return (0);
2188}
2189/*
2190 * Allocates an item for an internal zone
2191 *
2192 * Arguments
2193 * zone The zone to alloc for.
2194 * udata The data to be passed to the constructor.
2195 * flags M_WAITOK, M_NOWAIT, M_ZERO.
2196 *
2197 * Returns
2198 * NULL if there is no memory and M_NOWAIT is set
2199 * An item if successful
2200 */
2201
2202static void *
2203uma_zalloc_internal(uma_zone_t zone, void *udata, int flags)
2204{
2205 uma_keg_t keg;
2206 uma_slab_t slab;
2207 void *item;
2208
2209 item = NULL;
2210 keg = zone->uz_keg;
2211
2212#ifdef UMA_DEBUG_ALLOC
2213 printf("INTERNAL: Allocating one item from %s(%p)\n", zone->uz_name, zone);
2214#endif
2215 ZONE_LOCK(zone);
2216
2217 slab = uma_zone_slab(zone, flags);
2218 if (slab == NULL) {
2219 zone->uz_fails++;
2220 ZONE_UNLOCK(zone);
2221 return (NULL);
2222 }
2223
2224 item = uma_slab_alloc(zone, slab);
2225
2226 zone->uz_allocs++;
2227
2228 ZONE_UNLOCK(zone);
2229
2230 /*
2231 * We have to call both the zone's init (not the keg's init)
2232 * and the zone's ctor. This is because the item is going from
2233 * a keg slab directly to the user, and the user is expecting it
2234 * to be both zone-init'd as well as zone-ctor'd.
2235 */
2236 if (zone->uz_init != NULL) {
2237 if (zone->uz_init(item, keg->uk_size, flags) != 0) {
2238 uma_zfree_internal(zone, item, udata, SKIP_FINI,
2239 ZFREE_STATFAIL);
2240 return (NULL);
2241 }
2242 }
2243 if (zone->uz_ctor != NULL) {
2244 if (zone->uz_ctor(item, keg->uk_size, udata, flags) != 0) {
2245 uma_zfree_internal(zone, item, udata, SKIP_DTOR,
2246 ZFREE_STATFAIL);
2247 return (NULL);
2248 }
2249 }
2250 if (flags & M_ZERO)
2251 bzero(item, keg->uk_size);
2252
2253 return (item);
2254}
2255
2256/* See uma.h */
2257void
2258uma_zfree_arg(uma_zone_t zone, void *item, void *udata)
2259{
2260 uma_keg_t keg;
2261 uma_cache_t cache;
2262 uma_bucket_t bucket;
2263 int bflags;
2264 int cpu;
2265
2266 keg = zone->uz_keg;
2267
2268#ifdef UMA_DEBUG_ALLOC_1
2269 printf("Freeing item %p to %s(%p)\n", item, zone->uz_name, zone);
2270#endif
2271 CTR2(KTR_UMA, "uma_zfree_arg thread %x zone %s", curthread,
2272 zone->uz_name);
2273
2274 if (zone->uz_dtor)
2275 zone->uz_dtor(item, keg->uk_size, udata);
2276#ifdef INVARIANTS
2277 ZONE_LOCK(zone);
2278 if (keg->uk_flags & UMA_ZONE_MALLOC)
2279 uma_dbg_free(zone, udata, item);
2280 else
2281 uma_dbg_free(zone, NULL, item);
2282 ZONE_UNLOCK(zone);
2283#endif
2284 /*
2285 * The race here is acceptable. If we miss it we'll just have to wait
2286 * a little longer for the limits to be reset.
2287 */
2288 if (keg->uk_flags & UMA_ZFLAG_FULL)
2289 goto zfree_internal;
2290
2291 /*
2292 * If possible, free to the per-CPU cache. There are two
2293 * requirements for safe access to the per-CPU cache: (1) the thread
2294 * accessing the cache must not be preempted or yield during access,
2295 * and (2) the thread must not migrate CPUs without switching which
2296 * cache it accesses. We rely on a critical section to prevent
2297 * preemption and migration. We release the critical section in
2298 * order to acquire the zone mutex if we are unable to free to the
2299 * current cache; when we re-acquire the critical section, we must
2300 * detect and handle migration if it has occurred.
2301 */
2302zfree_restart:
2303 critical_enter();
2304 cpu = curcpu;
2305 cache = &zone->uz_cpu[cpu];
2306
2307zfree_start:
2308 bucket = cache->uc_freebucket;
2309
2310 if (bucket) {
2311 /*
2312 * Do we have room in our bucket? It is OK for this uz count
2313 * check to be slightly out of sync.
2314 */
2315
2316 if (bucket->ub_cnt < bucket->ub_entries) {
2317 KASSERT(bucket->ub_bucket[bucket->ub_cnt] == NULL,
2318 ("uma_zfree: Freeing to non free bucket index."));
2319 bucket->ub_bucket[bucket->ub_cnt] = item;
2320 bucket->ub_cnt++;
2321 cache->uc_frees++;
2322 critical_exit();
2323 return;
2324 } else if (cache->uc_allocbucket) {
2325#ifdef UMA_DEBUG_ALLOC
2326 printf("uma_zfree: Swapping buckets.\n");
2327#endif
2328 /*
2329 * We have run out of space in our freebucket.
2330 * See if we can switch with our alloc bucket.
2331 */
2332 if (cache->uc_allocbucket->ub_cnt <
2333 cache->uc_freebucket->ub_cnt) {
2334 bucket = cache->uc_freebucket;
2335 cache->uc_freebucket = cache->uc_allocbucket;
2336 cache->uc_allocbucket = bucket;
2337 goto zfree_start;
2338 }
2339 }
2340 }
2341 /*
2342 * We can get here for two reasons:
2343 *
2344 * 1) The buckets are NULL
2345 * 2) The alloc and free buckets are both somewhat full.
2346 *
2347 * We must go back the zone, which requires acquiring the zone lock,
2348 * which in turn means we must release and re-acquire the critical
2349 * section. Since the critical section is released, we may be
2350 * preempted or migrate. As such, make sure not to maintain any
2351 * thread-local state specific to the cache from prior to releasing
2352 * the critical section.
2353 */
2354 critical_exit();
2355 ZONE_LOCK(zone);
2356 critical_enter();
2357 cpu = curcpu;
2358 cache = &zone->uz_cpu[cpu];
2359 if (cache->uc_freebucket != NULL) {
2360 if (cache->uc_freebucket->ub_cnt <
2361 cache->uc_freebucket->ub_entries) {
2362 ZONE_UNLOCK(zone);
2363 goto zfree_start;
2364 }
2365 if (cache->uc_allocbucket != NULL &&
2366 (cache->uc_allocbucket->ub_cnt <
2367 cache->uc_freebucket->ub_cnt)) {
2368 ZONE_UNLOCK(zone);
2369 goto zfree_start;
2370 }
2371 }
2372
2373 bucket = cache->uc_freebucket;
2374 cache->uc_freebucket = NULL;
2375
2376 /* Can we throw this on the zone full list? */
2377 if (bucket != NULL) {
2378#ifdef UMA_DEBUG_ALLOC
2379 printf("uma_zfree: Putting old bucket on the free list.\n");
2380#endif
2381 /* ub_cnt is pointing to the last free item */
2382 KASSERT(bucket->ub_cnt != 0,
2383 ("uma_zfree: Attempting to insert an empty bucket onto the full list.\n"));
2384 LIST_INSERT_HEAD(&zone->uz_full_bucket,
2385 bucket, ub_link);
2386 }
2387 if ((bucket = LIST_FIRST(&zone->uz_free_bucket)) != NULL) {
2388 LIST_REMOVE(bucket, ub_link);
2389 ZONE_UNLOCK(zone);
2390 cache->uc_freebucket = bucket;
2391 goto zfree_start;
2392 }
2393 /* We are no longer associated with this CPU. */
2394 critical_exit();
2395
2396 /* And the zone.. */
2397 ZONE_UNLOCK(zone);
2398
2399#ifdef UMA_DEBUG_ALLOC
2400 printf("uma_zfree: Allocating new free bucket.\n");
2401#endif
2402 bflags = M_NOWAIT;
2403
2404 if (keg->uk_flags & UMA_ZFLAG_CACHEONLY)
2405 bflags |= M_NOVM;
2406 bucket = bucket_alloc(zone->uz_count, bflags);
2407 if (bucket) {
2408 ZONE_LOCK(zone);
2409 LIST_INSERT_HEAD(&zone->uz_free_bucket,
2410 bucket, ub_link);
2411 ZONE_UNLOCK(zone);
2412 goto zfree_restart;
2413 }
2414
2415 /*
2416 * If nothing else caught this, we'll just do an internal free.
2417 */
2418zfree_internal:
2419 uma_zfree_internal(zone, item, udata, SKIP_DTOR, ZFREE_STATFAIL);
2420
2421 return;
2422}
2423
2424/*
2425 * Frees an item to an INTERNAL zone or allocates a free bucket
2426 *
2427 * Arguments:
2428 * zone The zone to free to
2429 * item The item we're freeing
2430 * udata User supplied data for the dtor
2431 * skip Skip dtors and finis
2432 */
2433static void
2434uma_zfree_internal(uma_zone_t zone, void *item, void *udata,
2435 enum zfreeskip skip, int flags)
2436{
2437 uma_slab_t slab;
2438 uma_slabrefcnt_t slabref;
2439 uma_keg_t keg;
2440 u_int8_t *mem;
2441 u_int8_t freei;
2442
2443 keg = zone->uz_keg;
2444
2445 if (skip < SKIP_DTOR && zone->uz_dtor)
2446 zone->uz_dtor(item, keg->uk_size, udata);
2447 if (skip < SKIP_FINI && zone->uz_fini)
2448 zone->uz_fini(item, keg->uk_size);
2449
2450 ZONE_LOCK(zone);
2451
2452 if (flags & ZFREE_STATFAIL)
2453 zone->uz_fails++;
2454
2455 if (!(keg->uk_flags & UMA_ZONE_MALLOC)) {
2456 mem = (u_int8_t *)((unsigned long)item & (~UMA_SLAB_MASK));
2457 if (keg->uk_flags & UMA_ZONE_HASH)
2458 slab = hash_sfind(&keg->uk_hash, mem);
2459 else {
2460 mem += keg->uk_pgoff;
2461 slab = (uma_slab_t)mem;
2462 }
2463 } else {
2464 slab = (uma_slab_t)udata;
2465 }
2466
2467 /* Do we need to remove from any lists? */
2468 if (slab->us_freecount+1 == keg->uk_ipers) {
2469 LIST_REMOVE(slab, us_link);
2470 LIST_INSERT_HEAD(&keg->uk_free_slab, slab, us_link);
2471 } else if (slab->us_freecount == 0) {
2472 LIST_REMOVE(slab, us_link);
2473 LIST_INSERT_HEAD(&keg->uk_part_slab, slab, us_link);
2474 }
2475
2476 /* Slab management stuff */
2477 freei = ((unsigned long)item - (unsigned long)slab->us_data)
2478 / keg->uk_rsize;
2479
2480#ifdef INVARIANTS
2481 if (!skip)
2482 uma_dbg_free(zone, slab, item);
2483#endif
2484
2485 if (keg->uk_flags & UMA_ZONE_REFCNT) {
2486 slabref = (uma_slabrefcnt_t)slab;
2487 slabref->us_freelist[freei].us_item = slab->us_firstfree;
2488 } else {
2489 slab->us_freelist[freei].us_item = slab->us_firstfree;
2490 }
2491 slab->us_firstfree = freei;
2492 slab->us_freecount++;
2493
2494 /* Zone statistics */
2495 keg->uk_free++;
2496 zone->uz_frees++;
2497
2498 if (keg->uk_flags & UMA_ZFLAG_FULL) {
2499 if (keg->uk_pages < keg->uk_maxpages)
2500 keg->uk_flags &= ~UMA_ZFLAG_FULL;
2501
2502 /* We can handle one more allocation */
2503 wakeup_one(keg);
2504 }
2505
2506 ZONE_UNLOCK(zone);
2507}
2508
2509/* See uma.h */
2510void
2511uma_zone_set_max(uma_zone_t zone, int nitems)
2512{
2513 uma_keg_t keg;
2514
2515 keg = zone->uz_keg;
2516 ZONE_LOCK(zone);
2517 if (keg->uk_ppera > 1)
2518 keg->uk_maxpages = nitems * keg->uk_ppera;
2519 else
2520 keg->uk_maxpages = nitems / keg->uk_ipers;
2521
2522 if (keg->uk_maxpages * keg->uk_ipers < nitems)
2523 keg->uk_maxpages++;
2524
2525 ZONE_UNLOCK(zone);
2526}
2527
2528/* See uma.h */
2529void
2530uma_zone_set_init(uma_zone_t zone, uma_init uminit)
2531{
2532 ZONE_LOCK(zone);
2533 KASSERT(zone->uz_keg->uk_pages == 0,
2534 ("uma_zone_set_init on non-empty keg"));
2535 zone->uz_keg->uk_init = uminit;
2536 ZONE_UNLOCK(zone);
2537}
2538
2539/* See uma.h */
2540void
2541uma_zone_set_fini(uma_zone_t zone, uma_fini fini)
2542{
2543 ZONE_LOCK(zone);
2544 KASSERT(zone->uz_keg->uk_pages == 0,
2545 ("uma_zone_set_fini on non-empty keg"));
2546 zone->uz_keg->uk_fini = fini;
2547 ZONE_UNLOCK(zone);
2548}
2549
2550/* See uma.h */
2551void
2552uma_zone_set_zinit(uma_zone_t zone, uma_init zinit)
2553{
2554 ZONE_LOCK(zone);
2555 KASSERT(zone->uz_keg->uk_pages == 0,
2556 ("uma_zone_set_zinit on non-empty keg"));
2557 zone->uz_init = zinit;
2558 ZONE_UNLOCK(zone);
2559}
2560
2561/* See uma.h */
2562void
2563uma_zone_set_zfini(uma_zone_t zone, uma_fini zfini)
2564{
2565 ZONE_LOCK(zone);
2566 KASSERT(zone->uz_keg->uk_pages == 0,
2567 ("uma_zone_set_zfini on non-empty keg"));
2568 zone->uz_fini = zfini;
2569 ZONE_UNLOCK(zone);
2570}
2571
2572/* See uma.h */
2573/* XXX uk_freef is not actually used with the zone locked */
2574void
2575uma_zone_set_freef(uma_zone_t zone, uma_free freef)
2576{
2577 ZONE_LOCK(zone);
2578 zone->uz_keg->uk_freef = freef;
2579 ZONE_UNLOCK(zone);
2580}
2581
2582/* See uma.h */
2583/* XXX uk_allocf is not actually used with the zone locked */
2584void
2585uma_zone_set_allocf(uma_zone_t zone, uma_alloc allocf)
2586{
2587 ZONE_LOCK(zone);
2588 zone->uz_keg->uk_flags |= UMA_ZFLAG_PRIVALLOC;
2589 zone->uz_keg->uk_allocf = allocf;
2590 ZONE_UNLOCK(zone);
2591}
2592
2593/* See uma.h */
2594int
2595uma_zone_set_obj(uma_zone_t zone, struct vm_object *obj, int count)
2596{
2597 uma_keg_t keg;
2598 vm_offset_t kva;
2599 int pages;
2600
2601 keg = zone->uz_keg;
2602 pages = count / keg->uk_ipers;
2603
2604 if (pages * keg->uk_ipers < count)
2605 pages++;
2606
2607 kva = kmem_alloc_nofault(kernel_map, pages * UMA_SLAB_SIZE);
2608
2609 if (kva == 0)
2610 return (0);
2611 if (obj == NULL) {
2612 obj = vm_object_allocate(OBJT_DEFAULT,
2613 pages);
2614 } else {
2615 VM_OBJECT_LOCK_INIT(obj, "uma object");
2616 _vm_object_allocate(OBJT_DEFAULT,
2617 pages, obj);
2618 }
2619 ZONE_LOCK(zone);
2620 keg->uk_kva = kva;
2621 keg->uk_obj = obj;
2622 keg->uk_maxpages = pages;
2623 keg->uk_allocf = obj_alloc;
2624 keg->uk_flags |= UMA_ZONE_NOFREE | UMA_ZFLAG_PRIVALLOC;
2625 ZONE_UNLOCK(zone);
2626 return (1);
2627}
2628
2629/* See uma.h */
2630void
2631uma_prealloc(uma_zone_t zone, int items)
2632{
2633 int slabs;
2634 uma_slab_t slab;
2635 uma_keg_t keg;
2636
2637 keg = zone->uz_keg;
2638 ZONE_LOCK(zone);
2639 slabs = items / keg->uk_ipers;
2640 if (slabs * keg->uk_ipers < items)
2641 slabs++;
2642 while (slabs > 0) {
2643 slab = slab_zalloc(zone, M_WAITOK);
2644 LIST_INSERT_HEAD(&keg->uk_free_slab, slab, us_link);
2645 slabs--;
2646 }
2647 ZONE_UNLOCK(zone);
2648}
2649
2650/* See uma.h */
2651u_int32_t *
2652uma_find_refcnt(uma_zone_t zone, void *item)
2653{
2654 uma_slabrefcnt_t slabref;
2655 uma_keg_t keg;
2656 u_int32_t *refcnt;
2657 int idx;
2658
2659 keg = zone->uz_keg;
2660 slabref = (uma_slabrefcnt_t)vtoslab((vm_offset_t)item &
2661 (~UMA_SLAB_MASK));
2662 KASSERT(slabref != NULL && slabref->us_keg->uk_flags & UMA_ZONE_REFCNT,
2663 ("uma_find_refcnt(): zone possibly not UMA_ZONE_REFCNT"));
2664 idx = ((unsigned long)item - (unsigned long)slabref->us_data)
2665 / keg->uk_rsize;
2666 refcnt = &slabref->us_freelist[idx].us_refcnt;
2667 return refcnt;
2668}
2669
2670/* See uma.h */
2671void
2672uma_reclaim(void)
2673{
2674#ifdef UMA_DEBUG
2675 printf("UMA: vm asked us to release pages!\n");
2676#endif
2677 bucket_enable();
2678 zone_foreach(zone_drain);
2679 /*
2680 * Some slabs may have been freed but this zone will be visited early
2681 * we visit again so that we can free pages that are empty once other
2682 * zones are drained. We have to do the same for buckets.
2683 */
2684 zone_drain(slabzone);
2685 zone_drain(slabrefzone);
2686 bucket_zone_drain();
2687}
2688
2689void *
2690uma_large_malloc(int size, int wait)
2691{
2692 void *mem;
2693 uma_slab_t slab;
2694 u_int8_t flags;
2695
2696 slab = uma_zalloc_internal(slabzone, NULL, wait);
2697 if (slab == NULL)
2698 return (NULL);
2699 mem = page_alloc(NULL, size, &flags, wait);
2700 if (mem) {
2701 vsetslab((vm_offset_t)mem, slab);
2702 slab->us_data = mem;
2703 slab->us_flags = flags | UMA_SLAB_MALLOC;
2704 slab->us_size = size;
2705 } else {
2706 uma_zfree_internal(slabzone, slab, NULL, SKIP_NONE,
2707 ZFREE_STATFAIL);
2708 }
2709
2710 return (mem);
2711}
2712
2713void
2714uma_large_free(uma_slab_t slab)
2715{
2716 vsetobj((vm_offset_t)slab->us_data, kmem_object);
2717 page_free(slab->us_data, slab->us_size, slab->us_flags);
2718 uma_zfree_internal(slabzone, slab, NULL, SKIP_NONE, 0);
2719}
2720
2721void
2722uma_print_stats(void)
2723{
2724 zone_foreach(uma_print_zone);
2725}
2726
2727static void
2728slab_print(uma_slab_t slab)
2729{
2730 printf("slab: keg %p, data %p, freecount %d, firstfree %d\n",
2731 slab->us_keg, slab->us_data, slab->us_freecount,
2732 slab->us_firstfree);
2733}
2734
2735static void
2736cache_print(uma_cache_t cache)
2737{
2738 printf("alloc: %p(%d), free: %p(%d)\n",
2739 cache->uc_allocbucket,
2740 cache->uc_allocbucket?cache->uc_allocbucket->ub_cnt:0,
2741 cache->uc_freebucket,
2742 cache->uc_freebucket?cache->uc_freebucket->ub_cnt:0);
2743}
2744
2745void
2746uma_print_zone(uma_zone_t zone)
2747{
2748 uma_cache_t cache;
2749 uma_keg_t keg;
2750 uma_slab_t slab;
2751 int i;
2752
2753 keg = zone->uz_keg;
2754 printf("%s(%p) size %d(%d) flags %d ipers %d ppera %d out %d free %d\n",
2755 zone->uz_name, zone, keg->uk_size, keg->uk_rsize, keg->uk_flags,
2756 keg->uk_ipers, keg->uk_ppera,
2757 (keg->uk_ipers * keg->uk_pages) - keg->uk_free, keg->uk_free);
2758 printf("Part slabs:\n");
2759 LIST_FOREACH(slab, &keg->uk_part_slab, us_link)
2760 slab_print(slab);
2761 printf("Free slabs:\n");
2762 LIST_FOREACH(slab, &keg->uk_free_slab, us_link)
2763 slab_print(slab);
2764 printf("Full slabs:\n");
2765 LIST_FOREACH(slab, &keg->uk_full_slab, us_link)
2766 slab_print(slab);
2767 for (i = 0; i <= mp_maxid; i++) {
2768 if (CPU_ABSENT(i))
2769 continue;
2770 cache = &zone->uz_cpu[i];
2771 printf("CPU %d Cache:\n", i);
2772 cache_print(cache);
2773 }
2774}
2775
2776/*
2777 * Generate statistics across both the zone and its per-cpu cache's. Return
2778 * desired statistics if the pointer is non-NULL for that statistic.
2779 *
2780 * Note: does not update the zone statistics, as it can't safely clear the
2781 * per-CPU cache statistic.
2782 *
2783 * XXXRW: Following the uc_allocbucket and uc_freebucket pointers here isn't
2784 * safe from off-CPU; we should modify the caches to track this information
2785 * directly so that we don't have to.
2786 */
2787static void
2788uma_zone_sumstat(uma_zone_t z, int *cachefreep, u_int64_t *allocsp,
2789 u_int64_t *freesp)
2790{
2791 uma_cache_t cache;
2792 u_int64_t allocs, frees;
2793 int cachefree, cpu;
2794
2795 allocs = frees = 0;
2796 cachefree = 0;
2797 for (cpu = 0; cpu <= mp_maxid; cpu++) {
2798 if (CPU_ABSENT(cpu))
2799 continue;
2800 cache = &z->uz_cpu[cpu];
2801 if (cache->uc_allocbucket != NULL)
2802 cachefree += cache->uc_allocbucket->ub_cnt;
2803 if (cache->uc_freebucket != NULL)
2804 cachefree += cache->uc_freebucket->ub_cnt;
2805 allocs += cache->uc_allocs;
2806 frees += cache->uc_frees;
2807 }
2808 allocs += z->uz_allocs;
2809 frees += z->uz_frees;
2810 if (cachefreep != NULL)
2811 *cachefreep = cachefree;
2812 if (allocsp != NULL)
2813 *allocsp = allocs;
2814 if (freesp != NULL)
2815 *freesp = frees;
2816}
2817
2818/*
2819 * Sysctl handler for vm.zone
2820 *
2821 * stolen from vm_zone.c
2822 */
2823static int
2824sysctl_vm_zone(SYSCTL_HANDLER_ARGS)
2825{
2826 int error, len, cnt;
2827 const int linesize = 128; /* conservative */
2828 int totalfree;
2829 char *tmpbuf, *offset;
2830 uma_zone_t z;
2831 uma_keg_t zk;
2832 char *p;
2833 int cachefree;
2834 uma_bucket_t bucket;
2835 u_int64_t allocs, frees;
2836
2837 cnt = 0;
2838 mtx_lock(&uma_mtx);
2839 LIST_FOREACH(zk, &uma_kegs, uk_link) {
2840 LIST_FOREACH(z, &zk->uk_zones, uz_link)
2841 cnt++;
2842 }
2843 mtx_unlock(&uma_mtx);
2844 MALLOC(tmpbuf, char *, (cnt == 0 ? 1 : cnt) * linesize,
2845 M_TEMP, M_WAITOK);
2846 len = snprintf(tmpbuf, linesize,
2847 "\nITEM SIZE LIMIT USED FREE REQUESTS\n\n");
2848 if (cnt == 0)
2849 tmpbuf[len - 1] = '\0';
2850 error = SYSCTL_OUT(req, tmpbuf, cnt == 0 ? len-1 : len);
2851 if (error || cnt == 0)
2852 goto out;
2853 offset = tmpbuf;
2854 mtx_lock(&uma_mtx);
2855 LIST_FOREACH(zk, &uma_kegs, uk_link) {
2856 LIST_FOREACH(z, &zk->uk_zones, uz_link) {
2857 if (cnt == 0) /* list may have changed size */
2858 break;
2859 ZONE_LOCK(z);
2860 cachefree = 0;
2861 if (!(zk->uk_flags & UMA_ZFLAG_INTERNAL)) {
2862 uma_zone_sumstat(z, &cachefree, &allocs, &frees);
2863 } else {
2864 allocs = z->uz_allocs;
2865 frees = z->uz_frees;
2866 }
2867
2868 LIST_FOREACH(bucket, &z->uz_full_bucket, ub_link) {
2869 cachefree += bucket->ub_cnt;
2870 }
2871 totalfree = zk->uk_free + cachefree;
2872 len = snprintf(offset, linesize,
2873 "%-12.12s %6.6u, %8.8u, %6.6u, %6.6u, %8.8llu\n",
2874 z->uz_name, zk->uk_size,
2875 zk->uk_maxpages * zk->uk_ipers,
2876 (zk->uk_ipers * (zk->uk_pages / zk->uk_ppera)) - totalfree,
2877 totalfree,
2878 (unsigned long long)allocs);
2879 ZONE_UNLOCK(z);
2880 for (p = offset + 12; p > offset && *p == ' '; --p)
2881 /* nothing */ ;
2882 p[1] = ':';
2883 cnt--;
2884 offset += len;
2885 }
2886 }
2887 mtx_unlock(&uma_mtx);
2888 *offset++ = '\0';
2889 error = SYSCTL_OUT(req, tmpbuf, offset - tmpbuf);
2890out:
2891 FREE(tmpbuf, M_TEMP);
2892 return (error);
2893}
2894
2895static int
2896sysctl_vm_zone_count(SYSCTL_HANDLER_ARGS)
2897{
2898 uma_keg_t kz;
2899 uma_zone_t z;
2900 int count;
2901
2902 count = 0;
2903 mtx_lock(&uma_mtx);
2904 LIST_FOREACH(kz, &uma_kegs, uk_link) {
2905 LIST_FOREACH(z, &kz->uk_zones, uz_link)
2906 count++;
2907 }
2908 mtx_unlock(&uma_mtx);
2909 return (sysctl_handle_int(oidp, &count, 0, req));
2910}
2911
2912static int
2913sysctl_vm_zone_stats(SYSCTL_HANDLER_ARGS)
2914{
2915 struct uma_stream_header ush;
2916 struct uma_type_header uth;
2917 struct uma_percpu_stat ups;
2918 uma_bucket_t bucket;
2919 struct sbuf sbuf;
2920 uma_cache_t cache;
2921 uma_keg_t kz;
2922 uma_zone_t z;
2923 char *buffer;
2924 int buflen, count, error, i;
2925
2926 mtx_lock(&uma_mtx);
2927restart:
2928 mtx_assert(&uma_mtx, MA_OWNED);
2929 count = 0;
2930 LIST_FOREACH(kz, &uma_kegs, uk_link) {
2931 LIST_FOREACH(z, &kz->uk_zones, uz_link)
2932 count++;
2933 }
2934 mtx_unlock(&uma_mtx);
2935
2936 buflen = sizeof(ush) + count * (sizeof(uth) + sizeof(ups) *
2937 MAXCPU) + 1;
2938 buffer = malloc(buflen, M_TEMP, M_WAITOK | M_ZERO);
2939
2940 mtx_lock(&uma_mtx);
2941 i = 0;
2942 LIST_FOREACH(kz, &uma_kegs, uk_link) {
2943 LIST_FOREACH(z, &kz->uk_zones, uz_link)
2944 i++;
2945 }
2946 if (i > count) {
2947 free(buffer, M_TEMP);
2948 goto restart;
2949 }
2950 count = i;
2951
2952 sbuf_new(&sbuf, buffer, buflen, SBUF_FIXEDLEN);
2953
2954 /*
2955 * Insert stream header.
2956 */
2957 bzero(&ush, sizeof(ush));
2958 ush.ush_version = UMA_STREAM_VERSION;
2959 ush.ush_maxcpus = MAXCPU;
2960 ush.ush_count = count;
2961 if (sbuf_bcat(&sbuf, &ush, sizeof(ush)) < 0) {
2962 mtx_unlock(&uma_mtx);
2963 error = ENOMEM;
2964 goto out;
2965 }
2966
2967 LIST_FOREACH(kz, &uma_kegs, uk_link) {
2968 LIST_FOREACH(z, &kz->uk_zones, uz_link) {
2969 bzero(&uth, sizeof(uth));
2970 ZONE_LOCK(z);
2971 strlcpy(uth.uth_name, z->uz_name, UMA_MAX_NAME);
2972 uth.uth_align = kz->uk_align;
2973 uth.uth_pages = kz->uk_pages;
2974 uth.uth_keg_free = kz->uk_free;
2975 uth.uth_size = kz->uk_size;
2976 uth.uth_rsize = kz->uk_rsize;
2977 uth.uth_maxpages = kz->uk_maxpages;
2978 if (kz->uk_ppera > 1)
2979 uth.uth_limit = kz->uk_maxpages /
2980 kz->uk_ppera;
2981 else
2982 uth.uth_limit = kz->uk_maxpages *
2983 kz->uk_ipers;
2984 LIST_FOREACH(bucket, &z->uz_full_bucket, ub_link)
2985 uth.uth_zone_free += bucket->ub_cnt;
2986 uth.uth_allocs = z->uz_allocs;
2987 uth.uth_frees = z->uz_frees;
2988 uth.uth_fails = z->uz_fails;
2989 if (sbuf_bcat(&sbuf, &uth, sizeof(uth)) < 0) {
2990 ZONE_UNLOCK(z);
2991 mtx_unlock(&uma_mtx);
2992 error = ENOMEM;
2993 goto out;
2994 }
2995 /*
2996 * While it is not normally safe to access the cache
2997 * bucket pointers while not on the CPU that owns the
2998 * cache, we only allow the pointers to be exchanged
2999 * without the zone lock held, not invalidated, so
3000 * accept the possible race associated with bucket
3001 * exchange during monitoring.
3002 */
3003 for (i = 0; i < MAXCPU; i++) {
3004 bzero(&ups, sizeof(ups));
3005 if (kz->uk_flags & UMA_ZFLAG_INTERNAL)
3006 goto skip;
3007 cache = &z->uz_cpu[i];
3008 if (cache->uc_allocbucket != NULL)
3009 ups.ups_cache_free +=
3010 cache->uc_allocbucket->ub_cnt;
3011 if (cache->uc_freebucket != NULL)
3012 ups.ups_cache_free +=
3013 cache->uc_freebucket->ub_cnt;
3014 ups.ups_allocs = cache->uc_allocs;
3015 ups.ups_frees = cache->uc_frees;
3016skip:
3017 if (sbuf_bcat(&sbuf, &ups, sizeof(ups)) < 0) {
3018 ZONE_UNLOCK(z);
3019 mtx_unlock(&uma_mtx);
3020 error = ENOMEM;
3021 goto out;
3022 }
3023 }
3024 ZONE_UNLOCK(z);
3025 }
3026 }
3027 mtx_unlock(&uma_mtx);
3028 sbuf_finish(&sbuf);
3029 error = SYSCTL_OUT(req, sbuf_data(&sbuf), sbuf_len(&sbuf));
3030out:
3031 free(buffer, M_TEMP);
3032 return (error);
3033}
52
53/* I should really use ktr.. */
54/*
55#define UMA_DEBUG 1
56#define UMA_DEBUG_ALLOC 1
57#define UMA_DEBUG_ALLOC_1 1
58*/
59
60#include "opt_param.h"
61#include <sys/param.h>
62#include <sys/systm.h>
63#include <sys/kernel.h>
64#include <sys/types.h>
65#include <sys/queue.h>
66#include <sys/malloc.h>
67#include <sys/ktr.h>
68#include <sys/lock.h>
69#include <sys/sysctl.h>
70#include <sys/mutex.h>
71#include <sys/proc.h>
72#include <sys/sbuf.h>
73#include <sys/smp.h>
74#include <sys/vmmeter.h>
75
76#include <vm/vm.h>
77#include <vm/vm_object.h>
78#include <vm/vm_page.h>
79#include <vm/vm_param.h>
80#include <vm/vm_map.h>
81#include <vm/vm_kern.h>
82#include <vm/vm_extern.h>
83#include <vm/uma.h>
84#include <vm/uma_int.h>
85#include <vm/uma_dbg.h>
86
87#include <machine/vmparam.h>
88
89/*
90 * This is the zone and keg from which all zones are spawned. The idea is that
91 * even the zone & keg heads are allocated from the allocator, so we use the
92 * bss section to bootstrap us.
93 */
94static struct uma_keg masterkeg;
95static struct uma_zone masterzone_k;
96static struct uma_zone masterzone_z;
97static uma_zone_t kegs = &masterzone_k;
98static uma_zone_t zones = &masterzone_z;
99
100/* This is the zone from which all of uma_slab_t's are allocated. */
101static uma_zone_t slabzone;
102static uma_zone_t slabrefzone; /* With refcounters (for UMA_ZONE_REFCNT) */
103
104/*
105 * The initial hash tables come out of this zone so they can be allocated
106 * prior to malloc coming up.
107 */
108static uma_zone_t hashzone;
109
110static MALLOC_DEFINE(M_UMAHASH, "UMAHash", "UMA Hash Buckets");
111
112/*
113 * Are we allowed to allocate buckets?
114 */
115static int bucketdisable = 1;
116
117/* Linked list of all kegs in the system */
118static LIST_HEAD(,uma_keg) uma_kegs = LIST_HEAD_INITIALIZER(&uma_kegs);
119
120/* This mutex protects the keg list */
121static struct mtx uma_mtx;
122
123/* Linked list of boot time pages */
124static LIST_HEAD(,uma_slab) uma_boot_pages =
125 LIST_HEAD_INITIALIZER(&uma_boot_pages);
126
127/* Count of free boottime pages */
128static int uma_boot_free = 0;
129
130/* Is the VM done starting up? */
131static int booted = 0;
132
133/* Maximum number of allowed items-per-slab if the slab header is OFFPAGE */
134static u_int uma_max_ipers;
135static u_int uma_max_ipers_ref;
136
137/*
138 * This is the handle used to schedule events that need to happen
139 * outside of the allocation fast path.
140 */
141static struct callout uma_callout;
142#define UMA_TIMEOUT 20 /* Seconds for callout interval. */
143
144/*
145 * This structure is passed as the zone ctor arg so that I don't have to create
146 * a special allocation function just for zones.
147 */
148struct uma_zctor_args {
149 char *name;
150 size_t size;
151 uma_ctor ctor;
152 uma_dtor dtor;
153 uma_init uminit;
154 uma_fini fini;
155 uma_keg_t keg;
156 int align;
157 u_int32_t flags;
158};
159
160struct uma_kctor_args {
161 uma_zone_t zone;
162 size_t size;
163 uma_init uminit;
164 uma_fini fini;
165 int align;
166 u_int32_t flags;
167};
168
169struct uma_bucket_zone {
170 uma_zone_t ubz_zone;
171 char *ubz_name;
172 int ubz_entries;
173};
174
175#define BUCKET_MAX 128
176
177struct uma_bucket_zone bucket_zones[] = {
178 { NULL, "16 Bucket", 16 },
179 { NULL, "32 Bucket", 32 },
180 { NULL, "64 Bucket", 64 },
181 { NULL, "128 Bucket", 128 },
182 { NULL, NULL, 0}
183};
184
185#define BUCKET_SHIFT 4
186#define BUCKET_ZONES ((BUCKET_MAX >> BUCKET_SHIFT) + 1)
187
188/*
189 * bucket_size[] maps requested bucket sizes to zones that allocate a bucket
190 * of approximately the right size.
191 */
192static uint8_t bucket_size[BUCKET_ZONES];
193
194/*
195 * Flags and enumerations to be passed to internal functions.
196 */
197enum zfreeskip { SKIP_NONE, SKIP_DTOR, SKIP_FINI };
198
199#define ZFREE_STATFAIL 0x00000001 /* Update zone failure statistic. */
200
201/* Prototypes.. */
202
203static void *obj_alloc(uma_zone_t, int, u_int8_t *, int);
204static void *page_alloc(uma_zone_t, int, u_int8_t *, int);
205static void *startup_alloc(uma_zone_t, int, u_int8_t *, int);
206static void page_free(void *, int, u_int8_t);
207static uma_slab_t slab_zalloc(uma_zone_t, int);
208static void cache_drain(uma_zone_t);
209static void bucket_drain(uma_zone_t, uma_bucket_t);
210static void bucket_cache_drain(uma_zone_t zone);
211static int keg_ctor(void *, int, void *, int);
212static void keg_dtor(void *, int, void *);
213static int zone_ctor(void *, int, void *, int);
214static void zone_dtor(void *, int, void *);
215static int zero_init(void *, int, int);
216static void zone_small_init(uma_zone_t zone);
217static void zone_large_init(uma_zone_t zone);
218static void zone_foreach(void (*zfunc)(uma_zone_t));
219static void zone_timeout(uma_zone_t zone);
220static int hash_alloc(struct uma_hash *);
221static int hash_expand(struct uma_hash *, struct uma_hash *);
222static void hash_free(struct uma_hash *hash);
223static void uma_timeout(void *);
224static void uma_startup3(void);
225static void *uma_zalloc_internal(uma_zone_t, void *, int);
226static void uma_zfree_internal(uma_zone_t, void *, void *, enum zfreeskip,
227 int);
228static void bucket_enable(void);
229static void bucket_init(void);
230static uma_bucket_t bucket_alloc(int, int);
231static void bucket_free(uma_bucket_t);
232static void bucket_zone_drain(void);
233static int uma_zalloc_bucket(uma_zone_t zone, int flags);
234static uma_slab_t uma_zone_slab(uma_zone_t zone, int flags);
235static void *uma_slab_alloc(uma_zone_t zone, uma_slab_t slab);
236static void zone_drain(uma_zone_t);
237static uma_zone_t uma_kcreate(uma_zone_t zone, size_t size, uma_init uminit,
238 uma_fini fini, int align, u_int32_t flags);
239
240void uma_print_zone(uma_zone_t);
241void uma_print_stats(void);
242static int sysctl_vm_zone(SYSCTL_HANDLER_ARGS);
243static int sysctl_vm_zone_count(SYSCTL_HANDLER_ARGS);
244static int sysctl_vm_zone_stats(SYSCTL_HANDLER_ARGS);
245
246#ifdef WITNESS
247static int nosleepwithlocks = 1;
248SYSCTL_INT(_debug, OID_AUTO, nosleepwithlocks, CTLFLAG_RW, &nosleepwithlocks,
249 0, "Convert M_WAITOK to M_NOWAIT to avoid lock-held-across-sleep paths");
250#else
251static int nosleepwithlocks = 0;
252SYSCTL_INT(_debug, OID_AUTO, nosleepwithlocks, CTLFLAG_RW, &nosleepwithlocks,
253 0, "Convert M_WAITOK to M_NOWAIT to avoid lock-held-across-sleep paths");
254#endif
255SYSCTL_OID(_vm, OID_AUTO, zone, CTLTYPE_STRING|CTLFLAG_RD,
256 NULL, 0, sysctl_vm_zone, "A", "Zone Info");
257SYSINIT(uma_startup3, SI_SUB_VM_CONF, SI_ORDER_SECOND, uma_startup3, NULL);
258
259SYSCTL_PROC(_vm, OID_AUTO, zone_count, CTLFLAG_RD|CTLTYPE_INT,
260 0, 0, sysctl_vm_zone_count, "I", "Number of UMA zones");
261
262SYSCTL_PROC(_vm, OID_AUTO, zone_stats, CTLFLAG_RD|CTLTYPE_STRUCT,
263 0, 0, sysctl_vm_zone_stats, "s,struct uma_type_header", "Zone Stats");
264
265/*
266 * This routine checks to see whether or not it's safe to enable buckets.
267 */
268
269static void
270bucket_enable(void)
271{
272 if (cnt.v_free_count < cnt.v_free_min)
273 bucketdisable = 1;
274 else
275 bucketdisable = 0;
276}
277
278/*
279 * Initialize bucket_zones, the array of zones of buckets of various sizes.
280 *
281 * For each zone, calculate the memory required for each bucket, consisting
282 * of the header and an array of pointers. Initialize bucket_size[] to point
283 * the range of appropriate bucket sizes at the zone.
284 */
285static void
286bucket_init(void)
287{
288 struct uma_bucket_zone *ubz;
289 int i;
290 int j;
291
292 for (i = 0, j = 0; bucket_zones[j].ubz_entries != 0; j++) {
293 int size;
294
295 ubz = &bucket_zones[j];
296 size = roundup(sizeof(struct uma_bucket), sizeof(void *));
297 size += sizeof(void *) * ubz->ubz_entries;
298 ubz->ubz_zone = uma_zcreate(ubz->ubz_name, size,
299 NULL, NULL, NULL, NULL, UMA_ALIGN_PTR, UMA_ZFLAG_INTERNAL);
300 for (; i <= ubz->ubz_entries; i += (1 << BUCKET_SHIFT))
301 bucket_size[i >> BUCKET_SHIFT] = j;
302 }
303}
304
305/*
306 * Given a desired number of entries for a bucket, return the zone from which
307 * to allocate the bucket.
308 */
309static struct uma_bucket_zone *
310bucket_zone_lookup(int entries)
311{
312 int idx;
313
314 idx = howmany(entries, 1 << BUCKET_SHIFT);
315 return (&bucket_zones[bucket_size[idx]]);
316}
317
318static uma_bucket_t
319bucket_alloc(int entries, int bflags)
320{
321 struct uma_bucket_zone *ubz;
322 uma_bucket_t bucket;
323
324 /*
325 * This is to stop us from allocating per cpu buckets while we're
326 * running out of UMA_BOOT_PAGES. Otherwise, we would exhaust the
327 * boot pages. This also prevents us from allocating buckets in
328 * low memory situations.
329 */
330 if (bucketdisable)
331 return (NULL);
332
333 ubz = bucket_zone_lookup(entries);
334 bucket = uma_zalloc_internal(ubz->ubz_zone, NULL, bflags);
335 if (bucket) {
336#ifdef INVARIANTS
337 bzero(bucket->ub_bucket, sizeof(void *) * ubz->ubz_entries);
338#endif
339 bucket->ub_cnt = 0;
340 bucket->ub_entries = ubz->ubz_entries;
341 }
342
343 return (bucket);
344}
345
346static void
347bucket_free(uma_bucket_t bucket)
348{
349 struct uma_bucket_zone *ubz;
350
351 ubz = bucket_zone_lookup(bucket->ub_entries);
352 uma_zfree_internal(ubz->ubz_zone, bucket, NULL, SKIP_NONE, 0);
353}
354
355static void
356bucket_zone_drain(void)
357{
358 struct uma_bucket_zone *ubz;
359
360 for (ubz = &bucket_zones[0]; ubz->ubz_entries != 0; ubz++)
361 zone_drain(ubz->ubz_zone);
362}
363
364
365/*
366 * Routine called by timeout which is used to fire off some time interval
367 * based calculations. (stats, hash size, etc.)
368 *
369 * Arguments:
370 * arg Unused
371 *
372 * Returns:
373 * Nothing
374 */
375static void
376uma_timeout(void *unused)
377{
378 bucket_enable();
379 zone_foreach(zone_timeout);
380
381 /* Reschedule this event */
382 callout_reset(&uma_callout, UMA_TIMEOUT * hz, uma_timeout, NULL);
383}
384
385/*
386 * Routine to perform timeout driven calculations. This expands the
387 * hashes and does per cpu statistics aggregation.
388 *
389 * Arguments:
390 * zone The zone to operate on
391 *
392 * Returns:
393 * Nothing
394 */
395static void
396zone_timeout(uma_zone_t zone)
397{
398 uma_keg_t keg;
399 u_int64_t alloc;
400
401 keg = zone->uz_keg;
402 alloc = 0;
403
404 /*
405 * Expand the zone hash table.
406 *
407 * This is done if the number of slabs is larger than the hash size.
408 * What I'm trying to do here is completely reduce collisions. This
409 * may be a little aggressive. Should I allow for two collisions max?
410 */
411 ZONE_LOCK(zone);
412 if (keg->uk_flags & UMA_ZONE_HASH &&
413 keg->uk_pages / keg->uk_ppera >= keg->uk_hash.uh_hashsize) {
414 struct uma_hash newhash;
415 struct uma_hash oldhash;
416 int ret;
417
418 /*
419 * This is so involved because allocating and freeing
420 * while the zone lock is held will lead to deadlock.
421 * I have to do everything in stages and check for
422 * races.
423 */
424 newhash = keg->uk_hash;
425 ZONE_UNLOCK(zone);
426 ret = hash_alloc(&newhash);
427 ZONE_LOCK(zone);
428 if (ret) {
429 if (hash_expand(&keg->uk_hash, &newhash)) {
430 oldhash = keg->uk_hash;
431 keg->uk_hash = newhash;
432 } else
433 oldhash = newhash;
434
435 ZONE_UNLOCK(zone);
436 hash_free(&oldhash);
437 ZONE_LOCK(zone);
438 }
439 }
440 ZONE_UNLOCK(zone);
441}
442
443/*
444 * Allocate and zero fill the next sized hash table from the appropriate
445 * backing store.
446 *
447 * Arguments:
448 * hash A new hash structure with the old hash size in uh_hashsize
449 *
450 * Returns:
451 * 1 on sucess and 0 on failure.
452 */
453static int
454hash_alloc(struct uma_hash *hash)
455{
456 int oldsize;
457 int alloc;
458
459 oldsize = hash->uh_hashsize;
460
461 /* We're just going to go to a power of two greater */
462 if (oldsize) {
463 hash->uh_hashsize = oldsize * 2;
464 alloc = sizeof(hash->uh_slab_hash[0]) * hash->uh_hashsize;
465 hash->uh_slab_hash = (struct slabhead *)malloc(alloc,
466 M_UMAHASH, M_NOWAIT);
467 } else {
468 alloc = sizeof(hash->uh_slab_hash[0]) * UMA_HASH_SIZE_INIT;
469 hash->uh_slab_hash = uma_zalloc_internal(hashzone, NULL,
470 M_WAITOK);
471 hash->uh_hashsize = UMA_HASH_SIZE_INIT;
472 }
473 if (hash->uh_slab_hash) {
474 bzero(hash->uh_slab_hash, alloc);
475 hash->uh_hashmask = hash->uh_hashsize - 1;
476 return (1);
477 }
478
479 return (0);
480}
481
482/*
483 * Expands the hash table for HASH zones. This is done from zone_timeout
484 * to reduce collisions. This must not be done in the regular allocation
485 * path, otherwise, we can recurse on the vm while allocating pages.
486 *
487 * Arguments:
488 * oldhash The hash you want to expand
489 * newhash The hash structure for the new table
490 *
491 * Returns:
492 * Nothing
493 *
494 * Discussion:
495 */
496static int
497hash_expand(struct uma_hash *oldhash, struct uma_hash *newhash)
498{
499 uma_slab_t slab;
500 int hval;
501 int i;
502
503 if (!newhash->uh_slab_hash)
504 return (0);
505
506 if (oldhash->uh_hashsize >= newhash->uh_hashsize)
507 return (0);
508
509 /*
510 * I need to investigate hash algorithms for resizing without a
511 * full rehash.
512 */
513
514 for (i = 0; i < oldhash->uh_hashsize; i++)
515 while (!SLIST_EMPTY(&oldhash->uh_slab_hash[i])) {
516 slab = SLIST_FIRST(&oldhash->uh_slab_hash[i]);
517 SLIST_REMOVE_HEAD(&oldhash->uh_slab_hash[i], us_hlink);
518 hval = UMA_HASH(newhash, slab->us_data);
519 SLIST_INSERT_HEAD(&newhash->uh_slab_hash[hval],
520 slab, us_hlink);
521 }
522
523 return (1);
524}
525
526/*
527 * Free the hash bucket to the appropriate backing store.
528 *
529 * Arguments:
530 * slab_hash The hash bucket we're freeing
531 * hashsize The number of entries in that hash bucket
532 *
533 * Returns:
534 * Nothing
535 */
536static void
537hash_free(struct uma_hash *hash)
538{
539 if (hash->uh_slab_hash == NULL)
540 return;
541 if (hash->uh_hashsize == UMA_HASH_SIZE_INIT)
542 uma_zfree_internal(hashzone,
543 hash->uh_slab_hash, NULL, SKIP_NONE, 0);
544 else
545 free(hash->uh_slab_hash, M_UMAHASH);
546}
547
548/*
549 * Frees all outstanding items in a bucket
550 *
551 * Arguments:
552 * zone The zone to free to, must be unlocked.
553 * bucket The free/alloc bucket with items, cpu queue must be locked.
554 *
555 * Returns:
556 * Nothing
557 */
558
559static void
560bucket_drain(uma_zone_t zone, uma_bucket_t bucket)
561{
562 uma_slab_t slab;
563 int mzone;
564 void *item;
565
566 if (bucket == NULL)
567 return;
568
569 slab = NULL;
570 mzone = 0;
571
572 /* We have to lookup the slab again for malloc.. */
573 if (zone->uz_keg->uk_flags & UMA_ZONE_MALLOC)
574 mzone = 1;
575
576 while (bucket->ub_cnt > 0) {
577 bucket->ub_cnt--;
578 item = bucket->ub_bucket[bucket->ub_cnt];
579#ifdef INVARIANTS
580 bucket->ub_bucket[bucket->ub_cnt] = NULL;
581 KASSERT(item != NULL,
582 ("bucket_drain: botched ptr, item is NULL"));
583#endif
584 /*
585 * This is extremely inefficient. The slab pointer was passed
586 * to uma_zfree_arg, but we lost it because the buckets don't
587 * hold them. This will go away when free() gets a size passed
588 * to it.
589 */
590 if (mzone)
591 slab = vtoslab((vm_offset_t)item & (~UMA_SLAB_MASK));
592 uma_zfree_internal(zone, item, slab, SKIP_DTOR, 0);
593 }
594}
595
596/*
597 * Drains the per cpu caches for a zone.
598 *
599 * NOTE: This may only be called while the zone is being turn down, and not
600 * during normal operation. This is necessary in order that we do not have
601 * to migrate CPUs to drain the per-CPU caches.
602 *
603 * Arguments:
604 * zone The zone to drain, must be unlocked.
605 *
606 * Returns:
607 * Nothing
608 */
609static void
610cache_drain(uma_zone_t zone)
611{
612 uma_cache_t cache;
613 int cpu;
614
615 /*
616 * XXX: It is safe to not lock the per-CPU caches, because we're
617 * tearing down the zone anyway. I.e., there will be no further use
618 * of the caches at this point.
619 *
620 * XXX: It would good to be able to assert that the zone is being
621 * torn down to prevent improper use of cache_drain().
622 *
623 * XXX: We lock the zone before passing into bucket_cache_drain() as
624 * it is used elsewhere. Should the tear-down path be made special
625 * there in some form?
626 */
627 for (cpu = 0; cpu <= mp_maxid; cpu++) {
628 if (CPU_ABSENT(cpu))
629 continue;
630 cache = &zone->uz_cpu[cpu];
631 bucket_drain(zone, cache->uc_allocbucket);
632 bucket_drain(zone, cache->uc_freebucket);
633 if (cache->uc_allocbucket != NULL)
634 bucket_free(cache->uc_allocbucket);
635 if (cache->uc_freebucket != NULL)
636 bucket_free(cache->uc_freebucket);
637 cache->uc_allocbucket = cache->uc_freebucket = NULL;
638 }
639 ZONE_LOCK(zone);
640 bucket_cache_drain(zone);
641 ZONE_UNLOCK(zone);
642}
643
644/*
645 * Drain the cached buckets from a zone. Expects a locked zone on entry.
646 */
647static void
648bucket_cache_drain(uma_zone_t zone)
649{
650 uma_bucket_t bucket;
651
652 /*
653 * Drain the bucket queues and free the buckets, we just keep two per
654 * cpu (alloc/free).
655 */
656 while ((bucket = LIST_FIRST(&zone->uz_full_bucket)) != NULL) {
657 LIST_REMOVE(bucket, ub_link);
658 ZONE_UNLOCK(zone);
659 bucket_drain(zone, bucket);
660 bucket_free(bucket);
661 ZONE_LOCK(zone);
662 }
663
664 /* Now we do the free queue.. */
665 while ((bucket = LIST_FIRST(&zone->uz_free_bucket)) != NULL) {
666 LIST_REMOVE(bucket, ub_link);
667 bucket_free(bucket);
668 }
669}
670
671/*
672 * Frees pages from a zone back to the system. This is done on demand from
673 * the pageout daemon.
674 *
675 * Arguments:
676 * zone The zone to free pages from
677 * all Should we drain all items?
678 *
679 * Returns:
680 * Nothing.
681 */
682static void
683zone_drain(uma_zone_t zone)
684{
685 struct slabhead freeslabs = { 0 };
686 uma_keg_t keg;
687 uma_slab_t slab;
688 uma_slab_t n;
689 u_int8_t flags;
690 u_int8_t *mem;
691 int i;
692
693 keg = zone->uz_keg;
694
695 /*
696 * We don't want to take pages from statically allocated zones at this
697 * time
698 */
699 if (keg->uk_flags & UMA_ZONE_NOFREE || keg->uk_freef == NULL)
700 return;
701
702 ZONE_LOCK(zone);
703
704#ifdef UMA_DEBUG
705 printf("%s free items: %u\n", zone->uz_name, keg->uk_free);
706#endif
707 bucket_cache_drain(zone);
708 if (keg->uk_free == 0)
709 goto finished;
710
711 slab = LIST_FIRST(&keg->uk_free_slab);
712 while (slab) {
713 n = LIST_NEXT(slab, us_link);
714
715 /* We have no where to free these to */
716 if (slab->us_flags & UMA_SLAB_BOOT) {
717 slab = n;
718 continue;
719 }
720
721 LIST_REMOVE(slab, us_link);
722 keg->uk_pages -= keg->uk_ppera;
723 keg->uk_free -= keg->uk_ipers;
724
725 if (keg->uk_flags & UMA_ZONE_HASH)
726 UMA_HASH_REMOVE(&keg->uk_hash, slab, slab->us_data);
727
728 SLIST_INSERT_HEAD(&freeslabs, slab, us_hlink);
729
730 slab = n;
731 }
732finished:
733 ZONE_UNLOCK(zone);
734
735 while ((slab = SLIST_FIRST(&freeslabs)) != NULL) {
736 SLIST_REMOVE(&freeslabs, slab, uma_slab, us_hlink);
737 if (keg->uk_fini)
738 for (i = 0; i < keg->uk_ipers; i++)
739 keg->uk_fini(
740 slab->us_data + (keg->uk_rsize * i),
741 keg->uk_size);
742 flags = slab->us_flags;
743 mem = slab->us_data;
744
745 if ((keg->uk_flags & UMA_ZONE_MALLOC) ||
746 (keg->uk_flags & UMA_ZONE_REFCNT)) {
747 vm_object_t obj;
748
749 if (flags & UMA_SLAB_KMEM)
750 obj = kmem_object;
751 else
752 obj = NULL;
753 for (i = 0; i < keg->uk_ppera; i++)
754 vsetobj((vm_offset_t)mem + (i * PAGE_SIZE),
755 obj);
756 }
757 if (keg->uk_flags & UMA_ZONE_OFFPAGE)
758 uma_zfree_internal(keg->uk_slabzone, slab, NULL,
759 SKIP_NONE, 0);
760#ifdef UMA_DEBUG
761 printf("%s: Returning %d bytes.\n",
762 zone->uz_name, UMA_SLAB_SIZE * keg->uk_ppera);
763#endif
764 keg->uk_freef(mem, UMA_SLAB_SIZE * keg->uk_ppera, flags);
765 }
766}
767
768/*
769 * Allocate a new slab for a zone. This does not insert the slab onto a list.
770 *
771 * Arguments:
772 * zone The zone to allocate slabs for
773 * wait Shall we wait?
774 *
775 * Returns:
776 * The slab that was allocated or NULL if there is no memory and the
777 * caller specified M_NOWAIT.
778 */
779static uma_slab_t
780slab_zalloc(uma_zone_t zone, int wait)
781{
782 uma_slabrefcnt_t slabref;
783 uma_slab_t slab;
784 uma_keg_t keg;
785 u_int8_t *mem;
786 u_int8_t flags;
787 int i;
788
789 slab = NULL;
790 keg = zone->uz_keg;
791
792#ifdef UMA_DEBUG
793 printf("slab_zalloc: Allocating a new slab for %s\n", zone->uz_name);
794#endif
795 ZONE_UNLOCK(zone);
796
797 if (keg->uk_flags & UMA_ZONE_OFFPAGE) {
798 slab = uma_zalloc_internal(keg->uk_slabzone, NULL, wait);
799 if (slab == NULL) {
800 ZONE_LOCK(zone);
801 return NULL;
802 }
803 }
804
805 /*
806 * This reproduces the old vm_zone behavior of zero filling pages the
807 * first time they are added to a zone.
808 *
809 * Malloced items are zeroed in uma_zalloc.
810 */
811
812 if ((keg->uk_flags & UMA_ZONE_MALLOC) == 0)
813 wait |= M_ZERO;
814 else
815 wait &= ~M_ZERO;
816
817 mem = keg->uk_allocf(zone, keg->uk_ppera * UMA_SLAB_SIZE,
818 &flags, wait);
819 if (mem == NULL) {
820 if (keg->uk_flags & UMA_ZONE_OFFPAGE)
821 uma_zfree_internal(keg->uk_slabzone, slab, NULL,
822 SKIP_NONE, 0);
823 ZONE_LOCK(zone);
824 return (NULL);
825 }
826
827 /* Point the slab into the allocated memory */
828 if (!(keg->uk_flags & UMA_ZONE_OFFPAGE))
829 slab = (uma_slab_t )(mem + keg->uk_pgoff);
830
831 if ((keg->uk_flags & UMA_ZONE_MALLOC) ||
832 (keg->uk_flags & UMA_ZONE_REFCNT))
833 for (i = 0; i < keg->uk_ppera; i++)
834 vsetslab((vm_offset_t)mem + (i * PAGE_SIZE), slab);
835
836 slab->us_keg = keg;
837 slab->us_data = mem;
838 slab->us_freecount = keg->uk_ipers;
839 slab->us_firstfree = 0;
840 slab->us_flags = flags;
841
842 if (keg->uk_flags & UMA_ZONE_REFCNT) {
843 slabref = (uma_slabrefcnt_t)slab;
844 for (i = 0; i < keg->uk_ipers; i++) {
845 slabref->us_freelist[i].us_refcnt = 0;
846 slabref->us_freelist[i].us_item = i+1;
847 }
848 } else {
849 for (i = 0; i < keg->uk_ipers; i++)
850 slab->us_freelist[i].us_item = i+1;
851 }
852
853 if (keg->uk_init != NULL) {
854 for (i = 0; i < keg->uk_ipers; i++)
855 if (keg->uk_init(slab->us_data + (keg->uk_rsize * i),
856 keg->uk_size, wait) != 0)
857 break;
858 if (i != keg->uk_ipers) {
859 if (keg->uk_fini != NULL) {
860 for (i--; i > -1; i--)
861 keg->uk_fini(slab->us_data +
862 (keg->uk_rsize * i),
863 keg->uk_size);
864 }
865 if ((keg->uk_flags & UMA_ZONE_MALLOC) ||
866 (keg->uk_flags & UMA_ZONE_REFCNT)) {
867 vm_object_t obj;
868
869 if (flags & UMA_SLAB_KMEM)
870 obj = kmem_object;
871 else
872 obj = NULL;
873 for (i = 0; i < keg->uk_ppera; i++)
874 vsetobj((vm_offset_t)mem +
875 (i * PAGE_SIZE), obj);
876 }
877 if (keg->uk_flags & UMA_ZONE_OFFPAGE)
878 uma_zfree_internal(keg->uk_slabzone, slab,
879 NULL, SKIP_NONE, 0);
880 keg->uk_freef(mem, UMA_SLAB_SIZE * keg->uk_ppera,
881 flags);
882 ZONE_LOCK(zone);
883 return (NULL);
884 }
885 }
886 ZONE_LOCK(zone);
887
888 if (keg->uk_flags & UMA_ZONE_HASH)
889 UMA_HASH_INSERT(&keg->uk_hash, slab, mem);
890
891 keg->uk_pages += keg->uk_ppera;
892 keg->uk_free += keg->uk_ipers;
893
894 return (slab);
895}
896
897/*
898 * This function is intended to be used early on in place of page_alloc() so
899 * that we may use the boot time page cache to satisfy allocations before
900 * the VM is ready.
901 */
902static void *
903startup_alloc(uma_zone_t zone, int bytes, u_int8_t *pflag, int wait)
904{
905 uma_keg_t keg;
906
907 keg = zone->uz_keg;
908
909 /*
910 * Check our small startup cache to see if it has pages remaining.
911 */
912 mtx_lock(&uma_mtx);
913 if (uma_boot_free != 0) {
914 uma_slab_t tmps;
915
916 tmps = LIST_FIRST(&uma_boot_pages);
917 LIST_REMOVE(tmps, us_link);
918 uma_boot_free--;
919 mtx_unlock(&uma_mtx);
920 *pflag = tmps->us_flags;
921 return (tmps->us_data);
922 }
923 mtx_unlock(&uma_mtx);
924 if (booted == 0)
925 panic("UMA: Increase UMA_BOOT_PAGES");
926 /*
927 * Now that we've booted reset these users to their real allocator.
928 */
929#ifdef UMA_MD_SMALL_ALLOC
930 keg->uk_allocf = uma_small_alloc;
931#else
932 keg->uk_allocf = page_alloc;
933#endif
934 return keg->uk_allocf(zone, bytes, pflag, wait);
935}
936
937/*
938 * Allocates a number of pages from the system
939 *
940 * Arguments:
941 * zone Unused
942 * bytes The number of bytes requested
943 * wait Shall we wait?
944 *
945 * Returns:
946 * A pointer to the alloced memory or possibly
947 * NULL if M_NOWAIT is set.
948 */
949static void *
950page_alloc(uma_zone_t zone, int bytes, u_int8_t *pflag, int wait)
951{
952 void *p; /* Returned page */
953
954 *pflag = UMA_SLAB_KMEM;
955 p = (void *) kmem_malloc(kmem_map, bytes, wait);
956
957 return (p);
958}
959
960/*
961 * Allocates a number of pages from within an object
962 *
963 * Arguments:
964 * zone Unused
965 * bytes The number of bytes requested
966 * wait Shall we wait?
967 *
968 * Returns:
969 * A pointer to the alloced memory or possibly
970 * NULL if M_NOWAIT is set.
971 */
972static void *
973obj_alloc(uma_zone_t zone, int bytes, u_int8_t *flags, int wait)
974{
975 vm_object_t object;
976 vm_offset_t retkva, zkva;
977 vm_page_t p;
978 int pages, startpages;
979
980 object = zone->uz_keg->uk_obj;
981 retkva = 0;
982
983 /*
984 * This looks a little weird since we're getting one page at a time.
985 */
986 VM_OBJECT_LOCK(object);
987 p = TAILQ_LAST(&object->memq, pglist);
988 pages = p != NULL ? p->pindex + 1 : 0;
989 startpages = pages;
990 zkva = zone->uz_keg->uk_kva + pages * PAGE_SIZE;
991 for (; bytes > 0; bytes -= PAGE_SIZE) {
992 p = vm_page_alloc(object, pages,
993 VM_ALLOC_INTERRUPT | VM_ALLOC_WIRED);
994 if (p == NULL) {
995 if (pages != startpages)
996 pmap_qremove(retkva, pages - startpages);
997 while (pages != startpages) {
998 pages--;
999 p = TAILQ_LAST(&object->memq, pglist);
1000 vm_page_lock_queues();
1001 vm_page_unwire(p, 0);
1002 vm_page_free(p);
1003 vm_page_unlock_queues();
1004 }
1005 retkva = 0;
1006 goto done;
1007 }
1008 pmap_qenter(zkva, &p, 1);
1009 if (retkva == 0)
1010 retkva = zkva;
1011 zkva += PAGE_SIZE;
1012 pages += 1;
1013 }
1014done:
1015 VM_OBJECT_UNLOCK(object);
1016 *flags = UMA_SLAB_PRIV;
1017
1018 return ((void *)retkva);
1019}
1020
1021/*
1022 * Frees a number of pages to the system
1023 *
1024 * Arguments:
1025 * mem A pointer to the memory to be freed
1026 * size The size of the memory being freed
1027 * flags The original p->us_flags field
1028 *
1029 * Returns:
1030 * Nothing
1031 */
1032static void
1033page_free(void *mem, int size, u_int8_t flags)
1034{
1035 vm_map_t map;
1036
1037 if (flags & UMA_SLAB_KMEM)
1038 map = kmem_map;
1039 else
1040 panic("UMA: page_free used with invalid flags %d\n", flags);
1041
1042 kmem_free(map, (vm_offset_t)mem, size);
1043}
1044
1045/*
1046 * Zero fill initializer
1047 *
1048 * Arguments/Returns follow uma_init specifications
1049 */
1050static int
1051zero_init(void *mem, int size, int flags)
1052{
1053 bzero(mem, size);
1054 return (0);
1055}
1056
1057/*
1058 * Finish creating a small uma zone. This calculates ipers, and the zone size.
1059 *
1060 * Arguments
1061 * zone The zone we should initialize
1062 *
1063 * Returns
1064 * Nothing
1065 */
1066static void
1067zone_small_init(uma_zone_t zone)
1068{
1069 uma_keg_t keg;
1070 u_int rsize;
1071 u_int memused;
1072 u_int wastedspace;
1073 u_int shsize;
1074
1075 keg = zone->uz_keg;
1076 KASSERT(keg != NULL, ("Keg is null in zone_small_init"));
1077 rsize = keg->uk_size;
1078
1079 if (rsize < UMA_SMALLEST_UNIT)
1080 rsize = UMA_SMALLEST_UNIT;
1081 if (rsize & keg->uk_align)
1082 rsize = (rsize & ~keg->uk_align) + (keg->uk_align + 1);
1083
1084 keg->uk_rsize = rsize;
1085 keg->uk_ppera = 1;
1086
1087 if (keg->uk_flags & UMA_ZONE_REFCNT) {
1088 rsize += UMA_FRITMREF_SZ; /* linkage & refcnt */
1089 shsize = sizeof(struct uma_slab_refcnt);
1090 } else {
1091 rsize += UMA_FRITM_SZ; /* Account for linkage */
1092 shsize = sizeof(struct uma_slab);
1093 }
1094
1095 keg->uk_ipers = (UMA_SLAB_SIZE - shsize) / rsize;
1096 KASSERT(keg->uk_ipers != 0, ("zone_small_init: ipers is 0"));
1097 memused = keg->uk_ipers * rsize + shsize;
1098 wastedspace = UMA_SLAB_SIZE - memused;
1099
1100 /*
1101 * We can't do OFFPAGE if we're internal or if we've been
1102 * asked to not go to the VM for buckets. If we do this we
1103 * may end up going to the VM (kmem_map) for slabs which we
1104 * do not want to do if we're UMA_ZFLAG_CACHEONLY as a
1105 * result of UMA_ZONE_VM, which clearly forbids it.
1106 */
1107 if ((keg->uk_flags & UMA_ZFLAG_INTERNAL) ||
1108 (keg->uk_flags & UMA_ZFLAG_CACHEONLY))
1109 return;
1110
1111 if ((wastedspace >= UMA_MAX_WASTE) &&
1112 (keg->uk_ipers < (UMA_SLAB_SIZE / keg->uk_rsize))) {
1113 keg->uk_ipers = UMA_SLAB_SIZE / keg->uk_rsize;
1114 KASSERT(keg->uk_ipers <= 255,
1115 ("zone_small_init: keg->uk_ipers too high!"));
1116#ifdef UMA_DEBUG
1117 printf("UMA decided we need offpage slab headers for "
1118 "zone: %s, calculated wastedspace = %d, "
1119 "maximum wasted space allowed = %d, "
1120 "calculated ipers = %d, "
1121 "new wasted space = %d\n", zone->uz_name, wastedspace,
1122 UMA_MAX_WASTE, keg->uk_ipers,
1123 UMA_SLAB_SIZE - keg->uk_ipers * keg->uk_rsize);
1124#endif
1125 keg->uk_flags |= UMA_ZONE_OFFPAGE;
1126 if ((keg->uk_flags & UMA_ZONE_MALLOC) == 0)
1127 keg->uk_flags |= UMA_ZONE_HASH;
1128 }
1129}
1130
1131/*
1132 * Finish creating a large (> UMA_SLAB_SIZE) uma zone. Just give in and do
1133 * OFFPAGE for now. When I can allow for more dynamic slab sizes this will be
1134 * more complicated.
1135 *
1136 * Arguments
1137 * zone The zone we should initialize
1138 *
1139 * Returns
1140 * Nothing
1141 */
1142static void
1143zone_large_init(uma_zone_t zone)
1144{
1145 uma_keg_t keg;
1146 int pages;
1147
1148 keg = zone->uz_keg;
1149
1150 KASSERT(keg != NULL, ("Keg is null in zone_large_init"));
1151 KASSERT((keg->uk_flags & UMA_ZFLAG_CACHEONLY) == 0,
1152 ("zone_large_init: Cannot large-init a UMA_ZFLAG_CACHEONLY zone"));
1153
1154 pages = keg->uk_size / UMA_SLAB_SIZE;
1155
1156 /* Account for remainder */
1157 if ((pages * UMA_SLAB_SIZE) < keg->uk_size)
1158 pages++;
1159
1160 keg->uk_ppera = pages;
1161 keg->uk_ipers = 1;
1162
1163 keg->uk_flags |= UMA_ZONE_OFFPAGE;
1164 if ((keg->uk_flags & UMA_ZONE_MALLOC) == 0)
1165 keg->uk_flags |= UMA_ZONE_HASH;
1166
1167 keg->uk_rsize = keg->uk_size;
1168}
1169
1170/*
1171 * Keg header ctor. This initializes all fields, locks, etc. And inserts
1172 * the keg onto the global keg list.
1173 *
1174 * Arguments/Returns follow uma_ctor specifications
1175 * udata Actually uma_kctor_args
1176 */
1177static int
1178keg_ctor(void *mem, int size, void *udata, int flags)
1179{
1180 struct uma_kctor_args *arg = udata;
1181 uma_keg_t keg = mem;
1182 uma_zone_t zone;
1183
1184 bzero(keg, size);
1185 keg->uk_size = arg->size;
1186 keg->uk_init = arg->uminit;
1187 keg->uk_fini = arg->fini;
1188 keg->uk_align = arg->align;
1189 keg->uk_free = 0;
1190 keg->uk_pages = 0;
1191 keg->uk_flags = arg->flags;
1192 keg->uk_allocf = page_alloc;
1193 keg->uk_freef = page_free;
1194 keg->uk_recurse = 0;
1195 keg->uk_slabzone = NULL;
1196
1197 /*
1198 * The master zone is passed to us at keg-creation time.
1199 */
1200 zone = arg->zone;
1201 zone->uz_keg = keg;
1202
1203 if (arg->flags & UMA_ZONE_VM)
1204 keg->uk_flags |= UMA_ZFLAG_CACHEONLY;
1205
1206 if (arg->flags & UMA_ZONE_ZINIT)
1207 keg->uk_init = zero_init;
1208
1209 /*
1210 * The +UMA_FRITM_SZ added to uk_size is to account for the
1211 * linkage that is added to the size in zone_small_init(). If
1212 * we don't account for this here then we may end up in
1213 * zone_small_init() with a calculated 'ipers' of 0.
1214 */
1215 if (keg->uk_flags & UMA_ZONE_REFCNT) {
1216 if ((keg->uk_size+UMA_FRITMREF_SZ) >
1217 (UMA_SLAB_SIZE - sizeof(struct uma_slab_refcnt)))
1218 zone_large_init(zone);
1219 else
1220 zone_small_init(zone);
1221 } else {
1222 if ((keg->uk_size+UMA_FRITM_SZ) >
1223 (UMA_SLAB_SIZE - sizeof(struct uma_slab)))
1224 zone_large_init(zone);
1225 else
1226 zone_small_init(zone);
1227 }
1228
1229 if (keg->uk_flags & UMA_ZONE_OFFPAGE) {
1230 if (keg->uk_flags & UMA_ZONE_REFCNT)
1231 keg->uk_slabzone = slabrefzone;
1232 else
1233 keg->uk_slabzone = slabzone;
1234 }
1235
1236 /*
1237 * If we haven't booted yet we need allocations to go through the
1238 * startup cache until the vm is ready.
1239 */
1240 if (keg->uk_ppera == 1) {
1241#ifdef UMA_MD_SMALL_ALLOC
1242 keg->uk_allocf = uma_small_alloc;
1243 keg->uk_freef = uma_small_free;
1244#endif
1245 if (booted == 0)
1246 keg->uk_allocf = startup_alloc;
1247 }
1248
1249 /*
1250 * Initialize keg's lock (shared among zones) through
1251 * Master zone
1252 */
1253 zone->uz_lock = &keg->uk_lock;
1254 if (arg->flags & UMA_ZONE_MTXCLASS)
1255 ZONE_LOCK_INIT(zone, 1);
1256 else
1257 ZONE_LOCK_INIT(zone, 0);
1258
1259 /*
1260 * If we're putting the slab header in the actual page we need to
1261 * figure out where in each page it goes. This calculates a right
1262 * justified offset into the memory on an ALIGN_PTR boundary.
1263 */
1264 if (!(keg->uk_flags & UMA_ZONE_OFFPAGE)) {
1265 u_int totsize;
1266
1267 /* Size of the slab struct and free list */
1268 if (keg->uk_flags & UMA_ZONE_REFCNT)
1269 totsize = sizeof(struct uma_slab_refcnt) +
1270 keg->uk_ipers * UMA_FRITMREF_SZ;
1271 else
1272 totsize = sizeof(struct uma_slab) +
1273 keg->uk_ipers * UMA_FRITM_SZ;
1274
1275 if (totsize & UMA_ALIGN_PTR)
1276 totsize = (totsize & ~UMA_ALIGN_PTR) +
1277 (UMA_ALIGN_PTR + 1);
1278 keg->uk_pgoff = UMA_SLAB_SIZE - totsize;
1279
1280 if (keg->uk_flags & UMA_ZONE_REFCNT)
1281 totsize = keg->uk_pgoff + sizeof(struct uma_slab_refcnt)
1282 + keg->uk_ipers * UMA_FRITMREF_SZ;
1283 else
1284 totsize = keg->uk_pgoff + sizeof(struct uma_slab)
1285 + keg->uk_ipers * UMA_FRITM_SZ;
1286
1287 /*
1288 * The only way the following is possible is if with our
1289 * UMA_ALIGN_PTR adjustments we are now bigger than
1290 * UMA_SLAB_SIZE. I haven't checked whether this is
1291 * mathematically possible for all cases, so we make
1292 * sure here anyway.
1293 */
1294 if (totsize > UMA_SLAB_SIZE) {
1295 printf("zone %s ipers %d rsize %d size %d\n",
1296 zone->uz_name, keg->uk_ipers, keg->uk_rsize,
1297 keg->uk_size);
1298 panic("UMA slab won't fit.\n");
1299 }
1300 }
1301
1302 if (keg->uk_flags & UMA_ZONE_HASH)
1303 hash_alloc(&keg->uk_hash);
1304
1305#ifdef UMA_DEBUG
1306 printf("%s(%p) size = %d ipers = %d ppera = %d pgoff = %d\n",
1307 zone->uz_name, zone,
1308 keg->uk_size, keg->uk_ipers,
1309 keg->uk_ppera, keg->uk_pgoff);
1310#endif
1311
1312 LIST_INSERT_HEAD(&keg->uk_zones, zone, uz_link);
1313
1314 mtx_lock(&uma_mtx);
1315 LIST_INSERT_HEAD(&uma_kegs, keg, uk_link);
1316 mtx_unlock(&uma_mtx);
1317 return (0);
1318}
1319
1320/*
1321 * Zone header ctor. This initializes all fields, locks, etc.
1322 *
1323 * Arguments/Returns follow uma_ctor specifications
1324 * udata Actually uma_zctor_args
1325 */
1326
1327static int
1328zone_ctor(void *mem, int size, void *udata, int flags)
1329{
1330 struct uma_zctor_args *arg = udata;
1331 uma_zone_t zone = mem;
1332 uma_zone_t z;
1333 uma_keg_t keg;
1334
1335 bzero(zone, size);
1336 zone->uz_name = arg->name;
1337 zone->uz_ctor = arg->ctor;
1338 zone->uz_dtor = arg->dtor;
1339 zone->uz_init = NULL;
1340 zone->uz_fini = NULL;
1341 zone->uz_allocs = 0;
1342 zone->uz_frees = 0;
1343 zone->uz_fails = 0;
1344 zone->uz_fills = zone->uz_count = 0;
1345
1346 if (arg->flags & UMA_ZONE_SECONDARY) {
1347 KASSERT(arg->keg != NULL, ("Secondary zone on zero'd keg"));
1348 keg = arg->keg;
1349 zone->uz_keg = keg;
1350 zone->uz_init = arg->uminit;
1351 zone->uz_fini = arg->fini;
1352 zone->uz_lock = &keg->uk_lock;
1353 mtx_lock(&uma_mtx);
1354 ZONE_LOCK(zone);
1355 keg->uk_flags |= UMA_ZONE_SECONDARY;
1356 LIST_FOREACH(z, &keg->uk_zones, uz_link) {
1357 if (LIST_NEXT(z, uz_link) == NULL) {
1358 LIST_INSERT_AFTER(z, zone, uz_link);
1359 break;
1360 }
1361 }
1362 ZONE_UNLOCK(zone);
1363 mtx_unlock(&uma_mtx);
1364 } else if (arg->keg == NULL) {
1365 if (uma_kcreate(zone, arg->size, arg->uminit, arg->fini,
1366 arg->align, arg->flags) == NULL)
1367 return (ENOMEM);
1368 } else {
1369 struct uma_kctor_args karg;
1370 int error;
1371
1372 /* We should only be here from uma_startup() */
1373 karg.size = arg->size;
1374 karg.uminit = arg->uminit;
1375 karg.fini = arg->fini;
1376 karg.align = arg->align;
1377 karg.flags = arg->flags;
1378 karg.zone = zone;
1379 error = keg_ctor(arg->keg, sizeof(struct uma_keg), &karg,
1380 flags);
1381 if (error)
1382 return (error);
1383 }
1384 keg = zone->uz_keg;
1385 zone->uz_lock = &keg->uk_lock;
1386
1387 /*
1388 * Some internal zones don't have room allocated for the per cpu
1389 * caches. If we're internal, bail out here.
1390 */
1391 if (keg->uk_flags & UMA_ZFLAG_INTERNAL) {
1392 KASSERT((keg->uk_flags & UMA_ZONE_SECONDARY) == 0,
1393 ("Secondary zone requested UMA_ZFLAG_INTERNAL"));
1394 return (0);
1395 }
1396
1397 if (keg->uk_flags & UMA_ZONE_MAXBUCKET)
1398 zone->uz_count = BUCKET_MAX;
1399 else if (keg->uk_ipers <= BUCKET_MAX)
1400 zone->uz_count = keg->uk_ipers;
1401 else
1402 zone->uz_count = BUCKET_MAX;
1403 return (0);
1404}
1405
1406/*
1407 * Keg header dtor. This frees all data, destroys locks, frees the hash
1408 * table and removes the keg from the global list.
1409 *
1410 * Arguments/Returns follow uma_dtor specifications
1411 * udata unused
1412 */
1413static void
1414keg_dtor(void *arg, int size, void *udata)
1415{
1416 uma_keg_t keg;
1417
1418 keg = (uma_keg_t)arg;
1419 mtx_lock(&keg->uk_lock);
1420 if (keg->uk_free != 0) {
1421 printf("Freed UMA keg was not empty (%d items). "
1422 " Lost %d pages of memory.\n",
1423 keg->uk_free, keg->uk_pages);
1424 }
1425 mtx_unlock(&keg->uk_lock);
1426
1427 if (keg->uk_flags & UMA_ZONE_HASH)
1428 hash_free(&keg->uk_hash);
1429
1430 mtx_destroy(&keg->uk_lock);
1431}
1432
1433/*
1434 * Zone header dtor.
1435 *
1436 * Arguments/Returns follow uma_dtor specifications
1437 * udata unused
1438 */
1439static void
1440zone_dtor(void *arg, int size, void *udata)
1441{
1442 uma_zone_t zone;
1443 uma_keg_t keg;
1444
1445 zone = (uma_zone_t)arg;
1446 keg = zone->uz_keg;
1447
1448 if (!(keg->uk_flags & UMA_ZFLAG_INTERNAL))
1449 cache_drain(zone);
1450
1451 mtx_lock(&uma_mtx);
1452 zone_drain(zone);
1453 if (keg->uk_flags & UMA_ZONE_SECONDARY) {
1454 LIST_REMOVE(zone, uz_link);
1455 /*
1456 * XXX there are some races here where
1457 * the zone can be drained but zone lock
1458 * released and then refilled before we
1459 * remove it... we dont care for now
1460 */
1461 ZONE_LOCK(zone);
1462 if (LIST_EMPTY(&keg->uk_zones))
1463 keg->uk_flags &= ~UMA_ZONE_SECONDARY;
1464 ZONE_UNLOCK(zone);
1465 mtx_unlock(&uma_mtx);
1466 } else {
1467 LIST_REMOVE(keg, uk_link);
1468 LIST_REMOVE(zone, uz_link);
1469 mtx_unlock(&uma_mtx);
1470 uma_zfree_internal(kegs, keg, NULL, SKIP_NONE, 0);
1471 }
1472 zone->uz_keg = NULL;
1473}
1474
1475/*
1476 * Traverses every zone in the system and calls a callback
1477 *
1478 * Arguments:
1479 * zfunc A pointer to a function which accepts a zone
1480 * as an argument.
1481 *
1482 * Returns:
1483 * Nothing
1484 */
1485static void
1486zone_foreach(void (*zfunc)(uma_zone_t))
1487{
1488 uma_keg_t keg;
1489 uma_zone_t zone;
1490
1491 mtx_lock(&uma_mtx);
1492 LIST_FOREACH(keg, &uma_kegs, uk_link) {
1493 LIST_FOREACH(zone, &keg->uk_zones, uz_link)
1494 zfunc(zone);
1495 }
1496 mtx_unlock(&uma_mtx);
1497}
1498
1499/* Public functions */
1500/* See uma.h */
1501void
1502uma_startup(void *bootmem)
1503{
1504 struct uma_zctor_args args;
1505 uma_slab_t slab;
1506 u_int slabsize;
1507 u_int objsize, totsize, wsize;
1508 int i;
1509
1510#ifdef UMA_DEBUG
1511 printf("Creating uma keg headers zone and keg.\n");
1512#endif
1513 /*
1514 * The general UMA lock is a recursion-allowed lock because
1515 * there is a code path where, while we're still configured
1516 * to use startup_alloc() for backend page allocations, we
1517 * may end up in uma_reclaim() which calls zone_foreach(zone_drain),
1518 * which grabs uma_mtx, only to later call into startup_alloc()
1519 * because while freeing we needed to allocate a bucket. Since
1520 * startup_alloc() also takes uma_mtx, we need to be able to
1521 * recurse on it.
1522 */
1523 mtx_init(&uma_mtx, "UMA lock", NULL, MTX_DEF | MTX_RECURSE);
1524
1525 /*
1526 * Figure out the maximum number of items-per-slab we'll have if
1527 * we're using the OFFPAGE slab header to track free items, given
1528 * all possible object sizes and the maximum desired wastage
1529 * (UMA_MAX_WASTE).
1530 *
1531 * We iterate until we find an object size for
1532 * which the calculated wastage in zone_small_init() will be
1533 * enough to warrant OFFPAGE. Since wastedspace versus objsize
1534 * is an overall increasing see-saw function, we find the smallest
1535 * objsize such that the wastage is always acceptable for objects
1536 * with that objsize or smaller. Since a smaller objsize always
1537 * generates a larger possible uma_max_ipers, we use this computed
1538 * objsize to calculate the largest ipers possible. Since the
1539 * ipers calculated for OFFPAGE slab headers is always larger than
1540 * the ipers initially calculated in zone_small_init(), we use
1541 * the former's equation (UMA_SLAB_SIZE / keg->uk_rsize) to
1542 * obtain the maximum ipers possible for offpage slab headers.
1543 *
1544 * It should be noted that ipers versus objsize is an inversly
1545 * proportional function which drops off rather quickly so as
1546 * long as our UMA_MAX_WASTE is such that the objsize we calculate
1547 * falls into the portion of the inverse relation AFTER the steep
1548 * falloff, then uma_max_ipers shouldn't be too high (~10 on i386).
1549 *
1550 * Note that we have 8-bits (1 byte) to use as a freelist index
1551 * inside the actual slab header itself and this is enough to
1552 * accomodate us. In the worst case, a UMA_SMALLEST_UNIT sized
1553 * object with offpage slab header would have ipers =
1554 * UMA_SLAB_SIZE / UMA_SMALLEST_UNIT (currently = 256), which is
1555 * 1 greater than what our byte-integer freelist index can
1556 * accomodate, but we know that this situation never occurs as
1557 * for UMA_SMALLEST_UNIT-sized objects, we will never calculate
1558 * that we need to go to offpage slab headers. Or, if we do,
1559 * then we trap that condition below and panic in the INVARIANTS case.
1560 */
1561 wsize = UMA_SLAB_SIZE - sizeof(struct uma_slab) - UMA_MAX_WASTE;
1562 totsize = wsize;
1563 objsize = UMA_SMALLEST_UNIT;
1564 while (totsize >= wsize) {
1565 totsize = (UMA_SLAB_SIZE - sizeof(struct uma_slab)) /
1566 (objsize + UMA_FRITM_SZ);
1567 totsize *= (UMA_FRITM_SZ + objsize);
1568 objsize++;
1569 }
1570 if (objsize > UMA_SMALLEST_UNIT)
1571 objsize--;
1572 uma_max_ipers = UMA_SLAB_SIZE / objsize;
1573
1574 wsize = UMA_SLAB_SIZE - sizeof(struct uma_slab_refcnt) - UMA_MAX_WASTE;
1575 totsize = wsize;
1576 objsize = UMA_SMALLEST_UNIT;
1577 while (totsize >= wsize) {
1578 totsize = (UMA_SLAB_SIZE - sizeof(struct uma_slab_refcnt)) /
1579 (objsize + UMA_FRITMREF_SZ);
1580 totsize *= (UMA_FRITMREF_SZ + objsize);
1581 objsize++;
1582 }
1583 if (objsize > UMA_SMALLEST_UNIT)
1584 objsize--;
1585 uma_max_ipers_ref = UMA_SLAB_SIZE / objsize;
1586
1587 KASSERT((uma_max_ipers_ref <= 255) && (uma_max_ipers <= 255),
1588 ("uma_startup: calculated uma_max_ipers values too large!"));
1589
1590#ifdef UMA_DEBUG
1591 printf("Calculated uma_max_ipers (for OFFPAGE) is %d\n", uma_max_ipers);
1592 printf("Calculated uma_max_ipers_slab (for OFFPAGE) is %d\n",
1593 uma_max_ipers_ref);
1594#endif
1595
1596 /* "manually" create the initial zone */
1597 args.name = "UMA Kegs";
1598 args.size = sizeof(struct uma_keg);
1599 args.ctor = keg_ctor;
1600 args.dtor = keg_dtor;
1601 args.uminit = zero_init;
1602 args.fini = NULL;
1603 args.keg = &masterkeg;
1604 args.align = 32 - 1;
1605 args.flags = UMA_ZFLAG_INTERNAL;
1606 /* The initial zone has no Per cpu queues so it's smaller */
1607 zone_ctor(kegs, sizeof(struct uma_zone), &args, M_WAITOK);
1608
1609#ifdef UMA_DEBUG
1610 printf("Filling boot free list.\n");
1611#endif
1612 for (i = 0; i < UMA_BOOT_PAGES; i++) {
1613 slab = (uma_slab_t)((u_int8_t *)bootmem + (i * UMA_SLAB_SIZE));
1614 slab->us_data = (u_int8_t *)slab;
1615 slab->us_flags = UMA_SLAB_BOOT;
1616 LIST_INSERT_HEAD(&uma_boot_pages, slab, us_link);
1617 uma_boot_free++;
1618 }
1619
1620#ifdef UMA_DEBUG
1621 printf("Creating uma zone headers zone and keg.\n");
1622#endif
1623 args.name = "UMA Zones";
1624 args.size = sizeof(struct uma_zone) +
1625 (sizeof(struct uma_cache) * (mp_maxid + 1));
1626 args.ctor = zone_ctor;
1627 args.dtor = zone_dtor;
1628 args.uminit = zero_init;
1629 args.fini = NULL;
1630 args.keg = NULL;
1631 args.align = 32 - 1;
1632 args.flags = UMA_ZFLAG_INTERNAL;
1633 /* The initial zone has no Per cpu queues so it's smaller */
1634 zone_ctor(zones, sizeof(struct uma_zone), &args, M_WAITOK);
1635
1636#ifdef UMA_DEBUG
1637 printf("Initializing pcpu cache locks.\n");
1638#endif
1639#ifdef UMA_DEBUG
1640 printf("Creating slab and hash zones.\n");
1641#endif
1642
1643 /*
1644 * This is the max number of free list items we'll have with
1645 * offpage slabs.
1646 */
1647 slabsize = uma_max_ipers * UMA_FRITM_SZ;
1648 slabsize += sizeof(struct uma_slab);
1649
1650 /* Now make a zone for slab headers */
1651 slabzone = uma_zcreate("UMA Slabs",
1652 slabsize,
1653 NULL, NULL, NULL, NULL,
1654 UMA_ALIGN_PTR, UMA_ZFLAG_INTERNAL);
1655
1656 /*
1657 * We also create a zone for the bigger slabs with reference
1658 * counts in them, to accomodate UMA_ZONE_REFCNT zones.
1659 */
1660 slabsize = uma_max_ipers_ref * UMA_FRITMREF_SZ;
1661 slabsize += sizeof(struct uma_slab_refcnt);
1662 slabrefzone = uma_zcreate("UMA RCntSlabs",
1663 slabsize,
1664 NULL, NULL, NULL, NULL,
1665 UMA_ALIGN_PTR,
1666 UMA_ZFLAG_INTERNAL);
1667
1668 hashzone = uma_zcreate("UMA Hash",
1669 sizeof(struct slabhead *) * UMA_HASH_SIZE_INIT,
1670 NULL, NULL, NULL, NULL,
1671 UMA_ALIGN_PTR, UMA_ZFLAG_INTERNAL);
1672
1673 bucket_init();
1674
1675#ifdef UMA_MD_SMALL_ALLOC
1676 booted = 1;
1677#endif
1678
1679#ifdef UMA_DEBUG
1680 printf("UMA startup complete.\n");
1681#endif
1682}
1683
1684/* see uma.h */
1685void
1686uma_startup2(void)
1687{
1688 booted = 1;
1689 bucket_enable();
1690#ifdef UMA_DEBUG
1691 printf("UMA startup2 complete.\n");
1692#endif
1693}
1694
1695/*
1696 * Initialize our callout handle
1697 *
1698 */
1699
1700static void
1701uma_startup3(void)
1702{
1703#ifdef UMA_DEBUG
1704 printf("Starting callout.\n");
1705#endif
1706 callout_init(&uma_callout, CALLOUT_MPSAFE);
1707 callout_reset(&uma_callout, UMA_TIMEOUT * hz, uma_timeout, NULL);
1708#ifdef UMA_DEBUG
1709 printf("UMA startup3 complete.\n");
1710#endif
1711}
1712
1713static uma_zone_t
1714uma_kcreate(uma_zone_t zone, size_t size, uma_init uminit, uma_fini fini,
1715 int align, u_int32_t flags)
1716{
1717 struct uma_kctor_args args;
1718
1719 args.size = size;
1720 args.uminit = uminit;
1721 args.fini = fini;
1722 args.align = align;
1723 args.flags = flags;
1724 args.zone = zone;
1725 return (uma_zalloc_internal(kegs, &args, M_WAITOK));
1726}
1727
1728/* See uma.h */
1729uma_zone_t
1730uma_zcreate(char *name, size_t size, uma_ctor ctor, uma_dtor dtor,
1731 uma_init uminit, uma_fini fini, int align, u_int32_t flags)
1732
1733{
1734 struct uma_zctor_args args;
1735
1736 /* This stuff is essential for the zone ctor */
1737 args.name = name;
1738 args.size = size;
1739 args.ctor = ctor;
1740 args.dtor = dtor;
1741 args.uminit = uminit;
1742 args.fini = fini;
1743 args.align = align;
1744 args.flags = flags;
1745 args.keg = NULL;
1746
1747 return (uma_zalloc_internal(zones, &args, M_WAITOK));
1748}
1749
1750/* See uma.h */
1751uma_zone_t
1752uma_zsecond_create(char *name, uma_ctor ctor, uma_dtor dtor,
1753 uma_init zinit, uma_fini zfini, uma_zone_t master)
1754{
1755 struct uma_zctor_args args;
1756
1757 args.name = name;
1758 args.size = master->uz_keg->uk_size;
1759 args.ctor = ctor;
1760 args.dtor = dtor;
1761 args.uminit = zinit;
1762 args.fini = zfini;
1763 args.align = master->uz_keg->uk_align;
1764 args.flags = master->uz_keg->uk_flags | UMA_ZONE_SECONDARY;
1765 args.keg = master->uz_keg;
1766
1767 return (uma_zalloc_internal(zones, &args, M_WAITOK));
1768}
1769
1770/* See uma.h */
1771void
1772uma_zdestroy(uma_zone_t zone)
1773{
1774 uma_zfree_internal(zones, zone, NULL, SKIP_NONE, 0);
1775}
1776
1777/* See uma.h */
1778void *
1779uma_zalloc_arg(uma_zone_t zone, void *udata, int flags)
1780{
1781 void *item;
1782 uma_cache_t cache;
1783 uma_bucket_t bucket;
1784 int cpu;
1785 int badness;
1786
1787 /* This is the fast path allocation */
1788#ifdef UMA_DEBUG_ALLOC_1
1789 printf("Allocating one item from %s(%p)\n", zone->uz_name, zone);
1790#endif
1791 CTR3(KTR_UMA, "uma_zalloc_arg thread %x zone %s flags %d", curthread,
1792 zone->uz_name, flags);
1793
1794 if (!(flags & M_NOWAIT)) {
1795 KASSERT(curthread->td_intr_nesting_level == 0,
1796 ("malloc(M_WAITOK) in interrupt context"));
1797 if (nosleepwithlocks) {
1798#ifdef WITNESS
1799 badness = WITNESS_WARN(WARN_GIANTOK | WARN_SLEEPOK,
1800 NULL,
1801 "malloc(M_WAITOK) of \"%s\", forcing M_NOWAIT",
1802 zone->uz_name);
1803#else
1804 badness = 1;
1805#endif
1806 } else {
1807 badness = 0;
1808#ifdef WITNESS
1809 WITNESS_WARN(WARN_GIANTOK | WARN_SLEEPOK, NULL,
1810 "malloc(M_WAITOK) of \"%s\"", zone->uz_name);
1811#endif
1812 }
1813 if (badness) {
1814 flags &= ~M_WAITOK;
1815 flags |= M_NOWAIT;
1816 }
1817 }
1818
1819 /*
1820 * If possible, allocate from the per-CPU cache. There are two
1821 * requirements for safe access to the per-CPU cache: (1) the thread
1822 * accessing the cache must not be preempted or yield during access,
1823 * and (2) the thread must not migrate CPUs without switching which
1824 * cache it accesses. We rely on a critical section to prevent
1825 * preemption and migration. We release the critical section in
1826 * order to acquire the zone mutex if we are unable to allocate from
1827 * the current cache; when we re-acquire the critical section, we
1828 * must detect and handle migration if it has occurred.
1829 */
1830zalloc_restart:
1831 critical_enter();
1832 cpu = curcpu;
1833 cache = &zone->uz_cpu[cpu];
1834
1835zalloc_start:
1836 bucket = cache->uc_allocbucket;
1837
1838 if (bucket) {
1839 if (bucket->ub_cnt > 0) {
1840 bucket->ub_cnt--;
1841 item = bucket->ub_bucket[bucket->ub_cnt];
1842#ifdef INVARIANTS
1843 bucket->ub_bucket[bucket->ub_cnt] = NULL;
1844#endif
1845 KASSERT(item != NULL,
1846 ("uma_zalloc: Bucket pointer mangled."));
1847 cache->uc_allocs++;
1848 critical_exit();
1849#ifdef INVARIANTS
1850 ZONE_LOCK(zone);
1851 uma_dbg_alloc(zone, NULL, item);
1852 ZONE_UNLOCK(zone);
1853#endif
1854 if (zone->uz_ctor != NULL) {
1855 if (zone->uz_ctor(item, zone->uz_keg->uk_size,
1856 udata, flags) != 0) {
1857 uma_zfree_internal(zone, item, udata,
1858 SKIP_DTOR, ZFREE_STATFAIL);
1859 return (NULL);
1860 }
1861 }
1862 if (flags & M_ZERO)
1863 bzero(item, zone->uz_keg->uk_size);
1864 return (item);
1865 } else if (cache->uc_freebucket) {
1866 /*
1867 * We have run out of items in our allocbucket.
1868 * See if we can switch with our free bucket.
1869 */
1870 if (cache->uc_freebucket->ub_cnt > 0) {
1871#ifdef UMA_DEBUG_ALLOC
1872 printf("uma_zalloc: Swapping empty with"
1873 " alloc.\n");
1874#endif
1875 bucket = cache->uc_freebucket;
1876 cache->uc_freebucket = cache->uc_allocbucket;
1877 cache->uc_allocbucket = bucket;
1878
1879 goto zalloc_start;
1880 }
1881 }
1882 }
1883 /*
1884 * Attempt to retrieve the item from the per-CPU cache has failed, so
1885 * we must go back to the zone. This requires the zone lock, so we
1886 * must drop the critical section, then re-acquire it when we go back
1887 * to the cache. Since the critical section is released, we may be
1888 * preempted or migrate. As such, make sure not to maintain any
1889 * thread-local state specific to the cache from prior to releasing
1890 * the critical section.
1891 */
1892 critical_exit();
1893 ZONE_LOCK(zone);
1894 critical_enter();
1895 cpu = curcpu;
1896 cache = &zone->uz_cpu[cpu];
1897 bucket = cache->uc_allocbucket;
1898 if (bucket != NULL) {
1899 if (bucket->ub_cnt > 0) {
1900 ZONE_UNLOCK(zone);
1901 goto zalloc_start;
1902 }
1903 bucket = cache->uc_freebucket;
1904 if (bucket != NULL && bucket->ub_cnt > 0) {
1905 ZONE_UNLOCK(zone);
1906 goto zalloc_start;
1907 }
1908 }
1909
1910 /* Since we have locked the zone we may as well send back our stats */
1911 zone->uz_allocs += cache->uc_allocs;
1912 cache->uc_allocs = 0;
1913 zone->uz_frees += cache->uc_frees;
1914 cache->uc_frees = 0;
1915
1916 /* Our old one is now a free bucket */
1917 if (cache->uc_allocbucket) {
1918 KASSERT(cache->uc_allocbucket->ub_cnt == 0,
1919 ("uma_zalloc_arg: Freeing a non free bucket."));
1920 LIST_INSERT_HEAD(&zone->uz_free_bucket,
1921 cache->uc_allocbucket, ub_link);
1922 cache->uc_allocbucket = NULL;
1923 }
1924
1925 /* Check the free list for a new alloc bucket */
1926 if ((bucket = LIST_FIRST(&zone->uz_full_bucket)) != NULL) {
1927 KASSERT(bucket->ub_cnt != 0,
1928 ("uma_zalloc_arg: Returning an empty bucket."));
1929
1930 LIST_REMOVE(bucket, ub_link);
1931 cache->uc_allocbucket = bucket;
1932 ZONE_UNLOCK(zone);
1933 goto zalloc_start;
1934 }
1935 /* We are no longer associated with this CPU. */
1936 critical_exit();
1937
1938 /* Bump up our uz_count so we get here less */
1939 if (zone->uz_count < BUCKET_MAX)
1940 zone->uz_count++;
1941
1942 /*
1943 * Now lets just fill a bucket and put it on the free list. If that
1944 * works we'll restart the allocation from the begining.
1945 */
1946 if (uma_zalloc_bucket(zone, flags)) {
1947 ZONE_UNLOCK(zone);
1948 goto zalloc_restart;
1949 }
1950 ZONE_UNLOCK(zone);
1951 /*
1952 * We may not be able to get a bucket so return an actual item.
1953 */
1954#ifdef UMA_DEBUG
1955 printf("uma_zalloc_arg: Bucketzone returned NULL\n");
1956#endif
1957
1958 return (uma_zalloc_internal(zone, udata, flags));
1959}
1960
1961static uma_slab_t
1962uma_zone_slab(uma_zone_t zone, int flags)
1963{
1964 uma_slab_t slab;
1965 uma_keg_t keg;
1966
1967 keg = zone->uz_keg;
1968
1969 /*
1970 * This is to prevent us from recursively trying to allocate
1971 * buckets. The problem is that if an allocation forces us to
1972 * grab a new bucket we will call page_alloc, which will go off
1973 * and cause the vm to allocate vm_map_entries. If we need new
1974 * buckets there too we will recurse in kmem_alloc and bad
1975 * things happen. So instead we return a NULL bucket, and make
1976 * the code that allocates buckets smart enough to deal with it
1977 *
1978 * XXX: While we want this protection for the bucket zones so that
1979 * recursion from the VM is handled (and the calling code that
1980 * allocates buckets knows how to deal with it), we do not want
1981 * to prevent allocation from the slab header zones (slabzone
1982 * and slabrefzone) if uk_recurse is not zero for them. The
1983 * reason is that it could lead to NULL being returned for
1984 * slab header allocations even in the M_WAITOK case, and the
1985 * caller can't handle that.
1986 */
1987 if (keg->uk_flags & UMA_ZFLAG_INTERNAL && keg->uk_recurse != 0)
1988 if ((zone != slabzone) && (zone != slabrefzone))
1989 return (NULL);
1990
1991 slab = NULL;
1992
1993 for (;;) {
1994 /*
1995 * Find a slab with some space. Prefer slabs that are partially
1996 * used over those that are totally full. This helps to reduce
1997 * fragmentation.
1998 */
1999 if (keg->uk_free != 0) {
2000 if (!LIST_EMPTY(&keg->uk_part_slab)) {
2001 slab = LIST_FIRST(&keg->uk_part_slab);
2002 } else {
2003 slab = LIST_FIRST(&keg->uk_free_slab);
2004 LIST_REMOVE(slab, us_link);
2005 LIST_INSERT_HEAD(&keg->uk_part_slab, slab,
2006 us_link);
2007 }
2008 return (slab);
2009 }
2010
2011 /*
2012 * M_NOVM means don't ask at all!
2013 */
2014 if (flags & M_NOVM)
2015 break;
2016
2017 if (keg->uk_maxpages &&
2018 keg->uk_pages >= keg->uk_maxpages) {
2019 keg->uk_flags |= UMA_ZFLAG_FULL;
2020
2021 if (flags & M_NOWAIT)
2022 break;
2023 else
2024 msleep(keg, &keg->uk_lock, PVM,
2025 "zonelimit", 0);
2026 continue;
2027 }
2028 keg->uk_recurse++;
2029 slab = slab_zalloc(zone, flags);
2030 keg->uk_recurse--;
2031
2032 /*
2033 * If we got a slab here it's safe to mark it partially used
2034 * and return. We assume that the caller is going to remove
2035 * at least one item.
2036 */
2037 if (slab) {
2038 LIST_INSERT_HEAD(&keg->uk_part_slab, slab, us_link);
2039 return (slab);
2040 }
2041 /*
2042 * We might not have been able to get a slab but another cpu
2043 * could have while we were unlocked. Check again before we
2044 * fail.
2045 */
2046 if (flags & M_NOWAIT)
2047 flags |= M_NOVM;
2048 }
2049 return (slab);
2050}
2051
2052static void *
2053uma_slab_alloc(uma_zone_t zone, uma_slab_t slab)
2054{
2055 uma_keg_t keg;
2056 uma_slabrefcnt_t slabref;
2057 void *item;
2058 u_int8_t freei;
2059
2060 keg = zone->uz_keg;
2061
2062 freei = slab->us_firstfree;
2063 if (keg->uk_flags & UMA_ZONE_REFCNT) {
2064 slabref = (uma_slabrefcnt_t)slab;
2065 slab->us_firstfree = slabref->us_freelist[freei].us_item;
2066 } else {
2067 slab->us_firstfree = slab->us_freelist[freei].us_item;
2068 }
2069 item = slab->us_data + (keg->uk_rsize * freei);
2070
2071 slab->us_freecount--;
2072 keg->uk_free--;
2073#ifdef INVARIANTS
2074 uma_dbg_alloc(zone, slab, item);
2075#endif
2076 /* Move this slab to the full list */
2077 if (slab->us_freecount == 0) {
2078 LIST_REMOVE(slab, us_link);
2079 LIST_INSERT_HEAD(&keg->uk_full_slab, slab, us_link);
2080 }
2081
2082 return (item);
2083}
2084
2085static int
2086uma_zalloc_bucket(uma_zone_t zone, int flags)
2087{
2088 uma_bucket_t bucket;
2089 uma_slab_t slab;
2090 int16_t saved;
2091 int max, origflags = flags;
2092
2093 /*
2094 * Try this zone's free list first so we don't allocate extra buckets.
2095 */
2096 if ((bucket = LIST_FIRST(&zone->uz_free_bucket)) != NULL) {
2097 KASSERT(bucket->ub_cnt == 0,
2098 ("uma_zalloc_bucket: Bucket on free list is not empty."));
2099 LIST_REMOVE(bucket, ub_link);
2100 } else {
2101 int bflags;
2102
2103 bflags = (flags & ~M_ZERO);
2104 if (zone->uz_keg->uk_flags & UMA_ZFLAG_CACHEONLY)
2105 bflags |= M_NOVM;
2106
2107 ZONE_UNLOCK(zone);
2108 bucket = bucket_alloc(zone->uz_count, bflags);
2109 ZONE_LOCK(zone);
2110 }
2111
2112 if (bucket == NULL)
2113 return (0);
2114
2115#ifdef SMP
2116 /*
2117 * This code is here to limit the number of simultaneous bucket fills
2118 * for any given zone to the number of per cpu caches in this zone. This
2119 * is done so that we don't allocate more memory than we really need.
2120 */
2121 if (zone->uz_fills >= mp_ncpus)
2122 goto done;
2123
2124#endif
2125 zone->uz_fills++;
2126
2127 max = MIN(bucket->ub_entries, zone->uz_count);
2128 /* Try to keep the buckets totally full */
2129 saved = bucket->ub_cnt;
2130 while (bucket->ub_cnt < max &&
2131 (slab = uma_zone_slab(zone, flags)) != NULL) {
2132 while (slab->us_freecount && bucket->ub_cnt < max) {
2133 bucket->ub_bucket[bucket->ub_cnt++] =
2134 uma_slab_alloc(zone, slab);
2135 }
2136
2137 /* Don't block on the next fill */
2138 flags |= M_NOWAIT;
2139 }
2140
2141 /*
2142 * We unlock here because we need to call the zone's init.
2143 * It should be safe to unlock because the slab dealt with
2144 * above is already on the appropriate list within the keg
2145 * and the bucket we filled is not yet on any list, so we
2146 * own it.
2147 */
2148 if (zone->uz_init != NULL) {
2149 int i;
2150
2151 ZONE_UNLOCK(zone);
2152 for (i = saved; i < bucket->ub_cnt; i++)
2153 if (zone->uz_init(bucket->ub_bucket[i],
2154 zone->uz_keg->uk_size, origflags) != 0)
2155 break;
2156 /*
2157 * If we couldn't initialize the whole bucket, put the
2158 * rest back onto the freelist.
2159 */
2160 if (i != bucket->ub_cnt) {
2161 int j;
2162
2163 for (j = i; j < bucket->ub_cnt; j++) {
2164 uma_zfree_internal(zone, bucket->ub_bucket[j],
2165 NULL, SKIP_FINI, 0);
2166#ifdef INVARIANTS
2167 bucket->ub_bucket[j] = NULL;
2168#endif
2169 }
2170 bucket->ub_cnt = i;
2171 }
2172 ZONE_LOCK(zone);
2173 }
2174
2175 zone->uz_fills--;
2176 if (bucket->ub_cnt != 0) {
2177 LIST_INSERT_HEAD(&zone->uz_full_bucket,
2178 bucket, ub_link);
2179 return (1);
2180 }
2181#ifdef SMP
2182done:
2183#endif
2184 bucket_free(bucket);
2185
2186 return (0);
2187}
2188/*
2189 * Allocates an item for an internal zone
2190 *
2191 * Arguments
2192 * zone The zone to alloc for.
2193 * udata The data to be passed to the constructor.
2194 * flags M_WAITOK, M_NOWAIT, M_ZERO.
2195 *
2196 * Returns
2197 * NULL if there is no memory and M_NOWAIT is set
2198 * An item if successful
2199 */
2200
2201static void *
2202uma_zalloc_internal(uma_zone_t zone, void *udata, int flags)
2203{
2204 uma_keg_t keg;
2205 uma_slab_t slab;
2206 void *item;
2207
2208 item = NULL;
2209 keg = zone->uz_keg;
2210
2211#ifdef UMA_DEBUG_ALLOC
2212 printf("INTERNAL: Allocating one item from %s(%p)\n", zone->uz_name, zone);
2213#endif
2214 ZONE_LOCK(zone);
2215
2216 slab = uma_zone_slab(zone, flags);
2217 if (slab == NULL) {
2218 zone->uz_fails++;
2219 ZONE_UNLOCK(zone);
2220 return (NULL);
2221 }
2222
2223 item = uma_slab_alloc(zone, slab);
2224
2225 zone->uz_allocs++;
2226
2227 ZONE_UNLOCK(zone);
2228
2229 /*
2230 * We have to call both the zone's init (not the keg's init)
2231 * and the zone's ctor. This is because the item is going from
2232 * a keg slab directly to the user, and the user is expecting it
2233 * to be both zone-init'd as well as zone-ctor'd.
2234 */
2235 if (zone->uz_init != NULL) {
2236 if (zone->uz_init(item, keg->uk_size, flags) != 0) {
2237 uma_zfree_internal(zone, item, udata, SKIP_FINI,
2238 ZFREE_STATFAIL);
2239 return (NULL);
2240 }
2241 }
2242 if (zone->uz_ctor != NULL) {
2243 if (zone->uz_ctor(item, keg->uk_size, udata, flags) != 0) {
2244 uma_zfree_internal(zone, item, udata, SKIP_DTOR,
2245 ZFREE_STATFAIL);
2246 return (NULL);
2247 }
2248 }
2249 if (flags & M_ZERO)
2250 bzero(item, keg->uk_size);
2251
2252 return (item);
2253}
2254
2255/* See uma.h */
2256void
2257uma_zfree_arg(uma_zone_t zone, void *item, void *udata)
2258{
2259 uma_keg_t keg;
2260 uma_cache_t cache;
2261 uma_bucket_t bucket;
2262 int bflags;
2263 int cpu;
2264
2265 keg = zone->uz_keg;
2266
2267#ifdef UMA_DEBUG_ALLOC_1
2268 printf("Freeing item %p to %s(%p)\n", item, zone->uz_name, zone);
2269#endif
2270 CTR2(KTR_UMA, "uma_zfree_arg thread %x zone %s", curthread,
2271 zone->uz_name);
2272
2273 if (zone->uz_dtor)
2274 zone->uz_dtor(item, keg->uk_size, udata);
2275#ifdef INVARIANTS
2276 ZONE_LOCK(zone);
2277 if (keg->uk_flags & UMA_ZONE_MALLOC)
2278 uma_dbg_free(zone, udata, item);
2279 else
2280 uma_dbg_free(zone, NULL, item);
2281 ZONE_UNLOCK(zone);
2282#endif
2283 /*
2284 * The race here is acceptable. If we miss it we'll just have to wait
2285 * a little longer for the limits to be reset.
2286 */
2287 if (keg->uk_flags & UMA_ZFLAG_FULL)
2288 goto zfree_internal;
2289
2290 /*
2291 * If possible, free to the per-CPU cache. There are two
2292 * requirements for safe access to the per-CPU cache: (1) the thread
2293 * accessing the cache must not be preempted or yield during access,
2294 * and (2) the thread must not migrate CPUs without switching which
2295 * cache it accesses. We rely on a critical section to prevent
2296 * preemption and migration. We release the critical section in
2297 * order to acquire the zone mutex if we are unable to free to the
2298 * current cache; when we re-acquire the critical section, we must
2299 * detect and handle migration if it has occurred.
2300 */
2301zfree_restart:
2302 critical_enter();
2303 cpu = curcpu;
2304 cache = &zone->uz_cpu[cpu];
2305
2306zfree_start:
2307 bucket = cache->uc_freebucket;
2308
2309 if (bucket) {
2310 /*
2311 * Do we have room in our bucket? It is OK for this uz count
2312 * check to be slightly out of sync.
2313 */
2314
2315 if (bucket->ub_cnt < bucket->ub_entries) {
2316 KASSERT(bucket->ub_bucket[bucket->ub_cnt] == NULL,
2317 ("uma_zfree: Freeing to non free bucket index."));
2318 bucket->ub_bucket[bucket->ub_cnt] = item;
2319 bucket->ub_cnt++;
2320 cache->uc_frees++;
2321 critical_exit();
2322 return;
2323 } else if (cache->uc_allocbucket) {
2324#ifdef UMA_DEBUG_ALLOC
2325 printf("uma_zfree: Swapping buckets.\n");
2326#endif
2327 /*
2328 * We have run out of space in our freebucket.
2329 * See if we can switch with our alloc bucket.
2330 */
2331 if (cache->uc_allocbucket->ub_cnt <
2332 cache->uc_freebucket->ub_cnt) {
2333 bucket = cache->uc_freebucket;
2334 cache->uc_freebucket = cache->uc_allocbucket;
2335 cache->uc_allocbucket = bucket;
2336 goto zfree_start;
2337 }
2338 }
2339 }
2340 /*
2341 * We can get here for two reasons:
2342 *
2343 * 1) The buckets are NULL
2344 * 2) The alloc and free buckets are both somewhat full.
2345 *
2346 * We must go back the zone, which requires acquiring the zone lock,
2347 * which in turn means we must release and re-acquire the critical
2348 * section. Since the critical section is released, we may be
2349 * preempted or migrate. As such, make sure not to maintain any
2350 * thread-local state specific to the cache from prior to releasing
2351 * the critical section.
2352 */
2353 critical_exit();
2354 ZONE_LOCK(zone);
2355 critical_enter();
2356 cpu = curcpu;
2357 cache = &zone->uz_cpu[cpu];
2358 if (cache->uc_freebucket != NULL) {
2359 if (cache->uc_freebucket->ub_cnt <
2360 cache->uc_freebucket->ub_entries) {
2361 ZONE_UNLOCK(zone);
2362 goto zfree_start;
2363 }
2364 if (cache->uc_allocbucket != NULL &&
2365 (cache->uc_allocbucket->ub_cnt <
2366 cache->uc_freebucket->ub_cnt)) {
2367 ZONE_UNLOCK(zone);
2368 goto zfree_start;
2369 }
2370 }
2371
2372 bucket = cache->uc_freebucket;
2373 cache->uc_freebucket = NULL;
2374
2375 /* Can we throw this on the zone full list? */
2376 if (bucket != NULL) {
2377#ifdef UMA_DEBUG_ALLOC
2378 printf("uma_zfree: Putting old bucket on the free list.\n");
2379#endif
2380 /* ub_cnt is pointing to the last free item */
2381 KASSERT(bucket->ub_cnt != 0,
2382 ("uma_zfree: Attempting to insert an empty bucket onto the full list.\n"));
2383 LIST_INSERT_HEAD(&zone->uz_full_bucket,
2384 bucket, ub_link);
2385 }
2386 if ((bucket = LIST_FIRST(&zone->uz_free_bucket)) != NULL) {
2387 LIST_REMOVE(bucket, ub_link);
2388 ZONE_UNLOCK(zone);
2389 cache->uc_freebucket = bucket;
2390 goto zfree_start;
2391 }
2392 /* We are no longer associated with this CPU. */
2393 critical_exit();
2394
2395 /* And the zone.. */
2396 ZONE_UNLOCK(zone);
2397
2398#ifdef UMA_DEBUG_ALLOC
2399 printf("uma_zfree: Allocating new free bucket.\n");
2400#endif
2401 bflags = M_NOWAIT;
2402
2403 if (keg->uk_flags & UMA_ZFLAG_CACHEONLY)
2404 bflags |= M_NOVM;
2405 bucket = bucket_alloc(zone->uz_count, bflags);
2406 if (bucket) {
2407 ZONE_LOCK(zone);
2408 LIST_INSERT_HEAD(&zone->uz_free_bucket,
2409 bucket, ub_link);
2410 ZONE_UNLOCK(zone);
2411 goto zfree_restart;
2412 }
2413
2414 /*
2415 * If nothing else caught this, we'll just do an internal free.
2416 */
2417zfree_internal:
2418 uma_zfree_internal(zone, item, udata, SKIP_DTOR, ZFREE_STATFAIL);
2419
2420 return;
2421}
2422
2423/*
2424 * Frees an item to an INTERNAL zone or allocates a free bucket
2425 *
2426 * Arguments:
2427 * zone The zone to free to
2428 * item The item we're freeing
2429 * udata User supplied data for the dtor
2430 * skip Skip dtors and finis
2431 */
2432static void
2433uma_zfree_internal(uma_zone_t zone, void *item, void *udata,
2434 enum zfreeskip skip, int flags)
2435{
2436 uma_slab_t slab;
2437 uma_slabrefcnt_t slabref;
2438 uma_keg_t keg;
2439 u_int8_t *mem;
2440 u_int8_t freei;
2441
2442 keg = zone->uz_keg;
2443
2444 if (skip < SKIP_DTOR && zone->uz_dtor)
2445 zone->uz_dtor(item, keg->uk_size, udata);
2446 if (skip < SKIP_FINI && zone->uz_fini)
2447 zone->uz_fini(item, keg->uk_size);
2448
2449 ZONE_LOCK(zone);
2450
2451 if (flags & ZFREE_STATFAIL)
2452 zone->uz_fails++;
2453
2454 if (!(keg->uk_flags & UMA_ZONE_MALLOC)) {
2455 mem = (u_int8_t *)((unsigned long)item & (~UMA_SLAB_MASK));
2456 if (keg->uk_flags & UMA_ZONE_HASH)
2457 slab = hash_sfind(&keg->uk_hash, mem);
2458 else {
2459 mem += keg->uk_pgoff;
2460 slab = (uma_slab_t)mem;
2461 }
2462 } else {
2463 slab = (uma_slab_t)udata;
2464 }
2465
2466 /* Do we need to remove from any lists? */
2467 if (slab->us_freecount+1 == keg->uk_ipers) {
2468 LIST_REMOVE(slab, us_link);
2469 LIST_INSERT_HEAD(&keg->uk_free_slab, slab, us_link);
2470 } else if (slab->us_freecount == 0) {
2471 LIST_REMOVE(slab, us_link);
2472 LIST_INSERT_HEAD(&keg->uk_part_slab, slab, us_link);
2473 }
2474
2475 /* Slab management stuff */
2476 freei = ((unsigned long)item - (unsigned long)slab->us_data)
2477 / keg->uk_rsize;
2478
2479#ifdef INVARIANTS
2480 if (!skip)
2481 uma_dbg_free(zone, slab, item);
2482#endif
2483
2484 if (keg->uk_flags & UMA_ZONE_REFCNT) {
2485 slabref = (uma_slabrefcnt_t)slab;
2486 slabref->us_freelist[freei].us_item = slab->us_firstfree;
2487 } else {
2488 slab->us_freelist[freei].us_item = slab->us_firstfree;
2489 }
2490 slab->us_firstfree = freei;
2491 slab->us_freecount++;
2492
2493 /* Zone statistics */
2494 keg->uk_free++;
2495 zone->uz_frees++;
2496
2497 if (keg->uk_flags & UMA_ZFLAG_FULL) {
2498 if (keg->uk_pages < keg->uk_maxpages)
2499 keg->uk_flags &= ~UMA_ZFLAG_FULL;
2500
2501 /* We can handle one more allocation */
2502 wakeup_one(keg);
2503 }
2504
2505 ZONE_UNLOCK(zone);
2506}
2507
2508/* See uma.h */
2509void
2510uma_zone_set_max(uma_zone_t zone, int nitems)
2511{
2512 uma_keg_t keg;
2513
2514 keg = zone->uz_keg;
2515 ZONE_LOCK(zone);
2516 if (keg->uk_ppera > 1)
2517 keg->uk_maxpages = nitems * keg->uk_ppera;
2518 else
2519 keg->uk_maxpages = nitems / keg->uk_ipers;
2520
2521 if (keg->uk_maxpages * keg->uk_ipers < nitems)
2522 keg->uk_maxpages++;
2523
2524 ZONE_UNLOCK(zone);
2525}
2526
2527/* See uma.h */
2528void
2529uma_zone_set_init(uma_zone_t zone, uma_init uminit)
2530{
2531 ZONE_LOCK(zone);
2532 KASSERT(zone->uz_keg->uk_pages == 0,
2533 ("uma_zone_set_init on non-empty keg"));
2534 zone->uz_keg->uk_init = uminit;
2535 ZONE_UNLOCK(zone);
2536}
2537
2538/* See uma.h */
2539void
2540uma_zone_set_fini(uma_zone_t zone, uma_fini fini)
2541{
2542 ZONE_LOCK(zone);
2543 KASSERT(zone->uz_keg->uk_pages == 0,
2544 ("uma_zone_set_fini on non-empty keg"));
2545 zone->uz_keg->uk_fini = fini;
2546 ZONE_UNLOCK(zone);
2547}
2548
2549/* See uma.h */
2550void
2551uma_zone_set_zinit(uma_zone_t zone, uma_init zinit)
2552{
2553 ZONE_LOCK(zone);
2554 KASSERT(zone->uz_keg->uk_pages == 0,
2555 ("uma_zone_set_zinit on non-empty keg"));
2556 zone->uz_init = zinit;
2557 ZONE_UNLOCK(zone);
2558}
2559
2560/* See uma.h */
2561void
2562uma_zone_set_zfini(uma_zone_t zone, uma_fini zfini)
2563{
2564 ZONE_LOCK(zone);
2565 KASSERT(zone->uz_keg->uk_pages == 0,
2566 ("uma_zone_set_zfini on non-empty keg"));
2567 zone->uz_fini = zfini;
2568 ZONE_UNLOCK(zone);
2569}
2570
2571/* See uma.h */
2572/* XXX uk_freef is not actually used with the zone locked */
2573void
2574uma_zone_set_freef(uma_zone_t zone, uma_free freef)
2575{
2576 ZONE_LOCK(zone);
2577 zone->uz_keg->uk_freef = freef;
2578 ZONE_UNLOCK(zone);
2579}
2580
2581/* See uma.h */
2582/* XXX uk_allocf is not actually used with the zone locked */
2583void
2584uma_zone_set_allocf(uma_zone_t zone, uma_alloc allocf)
2585{
2586 ZONE_LOCK(zone);
2587 zone->uz_keg->uk_flags |= UMA_ZFLAG_PRIVALLOC;
2588 zone->uz_keg->uk_allocf = allocf;
2589 ZONE_UNLOCK(zone);
2590}
2591
2592/* See uma.h */
2593int
2594uma_zone_set_obj(uma_zone_t zone, struct vm_object *obj, int count)
2595{
2596 uma_keg_t keg;
2597 vm_offset_t kva;
2598 int pages;
2599
2600 keg = zone->uz_keg;
2601 pages = count / keg->uk_ipers;
2602
2603 if (pages * keg->uk_ipers < count)
2604 pages++;
2605
2606 kva = kmem_alloc_nofault(kernel_map, pages * UMA_SLAB_SIZE);
2607
2608 if (kva == 0)
2609 return (0);
2610 if (obj == NULL) {
2611 obj = vm_object_allocate(OBJT_DEFAULT,
2612 pages);
2613 } else {
2614 VM_OBJECT_LOCK_INIT(obj, "uma object");
2615 _vm_object_allocate(OBJT_DEFAULT,
2616 pages, obj);
2617 }
2618 ZONE_LOCK(zone);
2619 keg->uk_kva = kva;
2620 keg->uk_obj = obj;
2621 keg->uk_maxpages = pages;
2622 keg->uk_allocf = obj_alloc;
2623 keg->uk_flags |= UMA_ZONE_NOFREE | UMA_ZFLAG_PRIVALLOC;
2624 ZONE_UNLOCK(zone);
2625 return (1);
2626}
2627
2628/* See uma.h */
2629void
2630uma_prealloc(uma_zone_t zone, int items)
2631{
2632 int slabs;
2633 uma_slab_t slab;
2634 uma_keg_t keg;
2635
2636 keg = zone->uz_keg;
2637 ZONE_LOCK(zone);
2638 slabs = items / keg->uk_ipers;
2639 if (slabs * keg->uk_ipers < items)
2640 slabs++;
2641 while (slabs > 0) {
2642 slab = slab_zalloc(zone, M_WAITOK);
2643 LIST_INSERT_HEAD(&keg->uk_free_slab, slab, us_link);
2644 slabs--;
2645 }
2646 ZONE_UNLOCK(zone);
2647}
2648
2649/* See uma.h */
2650u_int32_t *
2651uma_find_refcnt(uma_zone_t zone, void *item)
2652{
2653 uma_slabrefcnt_t slabref;
2654 uma_keg_t keg;
2655 u_int32_t *refcnt;
2656 int idx;
2657
2658 keg = zone->uz_keg;
2659 slabref = (uma_slabrefcnt_t)vtoslab((vm_offset_t)item &
2660 (~UMA_SLAB_MASK));
2661 KASSERT(slabref != NULL && slabref->us_keg->uk_flags & UMA_ZONE_REFCNT,
2662 ("uma_find_refcnt(): zone possibly not UMA_ZONE_REFCNT"));
2663 idx = ((unsigned long)item - (unsigned long)slabref->us_data)
2664 / keg->uk_rsize;
2665 refcnt = &slabref->us_freelist[idx].us_refcnt;
2666 return refcnt;
2667}
2668
2669/* See uma.h */
2670void
2671uma_reclaim(void)
2672{
2673#ifdef UMA_DEBUG
2674 printf("UMA: vm asked us to release pages!\n");
2675#endif
2676 bucket_enable();
2677 zone_foreach(zone_drain);
2678 /*
2679 * Some slabs may have been freed but this zone will be visited early
2680 * we visit again so that we can free pages that are empty once other
2681 * zones are drained. We have to do the same for buckets.
2682 */
2683 zone_drain(slabzone);
2684 zone_drain(slabrefzone);
2685 bucket_zone_drain();
2686}
2687
2688void *
2689uma_large_malloc(int size, int wait)
2690{
2691 void *mem;
2692 uma_slab_t slab;
2693 u_int8_t flags;
2694
2695 slab = uma_zalloc_internal(slabzone, NULL, wait);
2696 if (slab == NULL)
2697 return (NULL);
2698 mem = page_alloc(NULL, size, &flags, wait);
2699 if (mem) {
2700 vsetslab((vm_offset_t)mem, slab);
2701 slab->us_data = mem;
2702 slab->us_flags = flags | UMA_SLAB_MALLOC;
2703 slab->us_size = size;
2704 } else {
2705 uma_zfree_internal(slabzone, slab, NULL, SKIP_NONE,
2706 ZFREE_STATFAIL);
2707 }
2708
2709 return (mem);
2710}
2711
2712void
2713uma_large_free(uma_slab_t slab)
2714{
2715 vsetobj((vm_offset_t)slab->us_data, kmem_object);
2716 page_free(slab->us_data, slab->us_size, slab->us_flags);
2717 uma_zfree_internal(slabzone, slab, NULL, SKIP_NONE, 0);
2718}
2719
2720void
2721uma_print_stats(void)
2722{
2723 zone_foreach(uma_print_zone);
2724}
2725
2726static void
2727slab_print(uma_slab_t slab)
2728{
2729 printf("slab: keg %p, data %p, freecount %d, firstfree %d\n",
2730 slab->us_keg, slab->us_data, slab->us_freecount,
2731 slab->us_firstfree);
2732}
2733
2734static void
2735cache_print(uma_cache_t cache)
2736{
2737 printf("alloc: %p(%d), free: %p(%d)\n",
2738 cache->uc_allocbucket,
2739 cache->uc_allocbucket?cache->uc_allocbucket->ub_cnt:0,
2740 cache->uc_freebucket,
2741 cache->uc_freebucket?cache->uc_freebucket->ub_cnt:0);
2742}
2743
2744void
2745uma_print_zone(uma_zone_t zone)
2746{
2747 uma_cache_t cache;
2748 uma_keg_t keg;
2749 uma_slab_t slab;
2750 int i;
2751
2752 keg = zone->uz_keg;
2753 printf("%s(%p) size %d(%d) flags %d ipers %d ppera %d out %d free %d\n",
2754 zone->uz_name, zone, keg->uk_size, keg->uk_rsize, keg->uk_flags,
2755 keg->uk_ipers, keg->uk_ppera,
2756 (keg->uk_ipers * keg->uk_pages) - keg->uk_free, keg->uk_free);
2757 printf("Part slabs:\n");
2758 LIST_FOREACH(slab, &keg->uk_part_slab, us_link)
2759 slab_print(slab);
2760 printf("Free slabs:\n");
2761 LIST_FOREACH(slab, &keg->uk_free_slab, us_link)
2762 slab_print(slab);
2763 printf("Full slabs:\n");
2764 LIST_FOREACH(slab, &keg->uk_full_slab, us_link)
2765 slab_print(slab);
2766 for (i = 0; i <= mp_maxid; i++) {
2767 if (CPU_ABSENT(i))
2768 continue;
2769 cache = &zone->uz_cpu[i];
2770 printf("CPU %d Cache:\n", i);
2771 cache_print(cache);
2772 }
2773}
2774
2775/*
2776 * Generate statistics across both the zone and its per-cpu cache's. Return
2777 * desired statistics if the pointer is non-NULL for that statistic.
2778 *
2779 * Note: does not update the zone statistics, as it can't safely clear the
2780 * per-CPU cache statistic.
2781 *
2782 * XXXRW: Following the uc_allocbucket and uc_freebucket pointers here isn't
2783 * safe from off-CPU; we should modify the caches to track this information
2784 * directly so that we don't have to.
2785 */
2786static void
2787uma_zone_sumstat(uma_zone_t z, int *cachefreep, u_int64_t *allocsp,
2788 u_int64_t *freesp)
2789{
2790 uma_cache_t cache;
2791 u_int64_t allocs, frees;
2792 int cachefree, cpu;
2793
2794 allocs = frees = 0;
2795 cachefree = 0;
2796 for (cpu = 0; cpu <= mp_maxid; cpu++) {
2797 if (CPU_ABSENT(cpu))
2798 continue;
2799 cache = &z->uz_cpu[cpu];
2800 if (cache->uc_allocbucket != NULL)
2801 cachefree += cache->uc_allocbucket->ub_cnt;
2802 if (cache->uc_freebucket != NULL)
2803 cachefree += cache->uc_freebucket->ub_cnt;
2804 allocs += cache->uc_allocs;
2805 frees += cache->uc_frees;
2806 }
2807 allocs += z->uz_allocs;
2808 frees += z->uz_frees;
2809 if (cachefreep != NULL)
2810 *cachefreep = cachefree;
2811 if (allocsp != NULL)
2812 *allocsp = allocs;
2813 if (freesp != NULL)
2814 *freesp = frees;
2815}
2816
2817/*
2818 * Sysctl handler for vm.zone
2819 *
2820 * stolen from vm_zone.c
2821 */
2822static int
2823sysctl_vm_zone(SYSCTL_HANDLER_ARGS)
2824{
2825 int error, len, cnt;
2826 const int linesize = 128; /* conservative */
2827 int totalfree;
2828 char *tmpbuf, *offset;
2829 uma_zone_t z;
2830 uma_keg_t zk;
2831 char *p;
2832 int cachefree;
2833 uma_bucket_t bucket;
2834 u_int64_t allocs, frees;
2835
2836 cnt = 0;
2837 mtx_lock(&uma_mtx);
2838 LIST_FOREACH(zk, &uma_kegs, uk_link) {
2839 LIST_FOREACH(z, &zk->uk_zones, uz_link)
2840 cnt++;
2841 }
2842 mtx_unlock(&uma_mtx);
2843 MALLOC(tmpbuf, char *, (cnt == 0 ? 1 : cnt) * linesize,
2844 M_TEMP, M_WAITOK);
2845 len = snprintf(tmpbuf, linesize,
2846 "\nITEM SIZE LIMIT USED FREE REQUESTS\n\n");
2847 if (cnt == 0)
2848 tmpbuf[len - 1] = '\0';
2849 error = SYSCTL_OUT(req, tmpbuf, cnt == 0 ? len-1 : len);
2850 if (error || cnt == 0)
2851 goto out;
2852 offset = tmpbuf;
2853 mtx_lock(&uma_mtx);
2854 LIST_FOREACH(zk, &uma_kegs, uk_link) {
2855 LIST_FOREACH(z, &zk->uk_zones, uz_link) {
2856 if (cnt == 0) /* list may have changed size */
2857 break;
2858 ZONE_LOCK(z);
2859 cachefree = 0;
2860 if (!(zk->uk_flags & UMA_ZFLAG_INTERNAL)) {
2861 uma_zone_sumstat(z, &cachefree, &allocs, &frees);
2862 } else {
2863 allocs = z->uz_allocs;
2864 frees = z->uz_frees;
2865 }
2866
2867 LIST_FOREACH(bucket, &z->uz_full_bucket, ub_link) {
2868 cachefree += bucket->ub_cnt;
2869 }
2870 totalfree = zk->uk_free + cachefree;
2871 len = snprintf(offset, linesize,
2872 "%-12.12s %6.6u, %8.8u, %6.6u, %6.6u, %8.8llu\n",
2873 z->uz_name, zk->uk_size,
2874 zk->uk_maxpages * zk->uk_ipers,
2875 (zk->uk_ipers * (zk->uk_pages / zk->uk_ppera)) - totalfree,
2876 totalfree,
2877 (unsigned long long)allocs);
2878 ZONE_UNLOCK(z);
2879 for (p = offset + 12; p > offset && *p == ' '; --p)
2880 /* nothing */ ;
2881 p[1] = ':';
2882 cnt--;
2883 offset += len;
2884 }
2885 }
2886 mtx_unlock(&uma_mtx);
2887 *offset++ = '\0';
2888 error = SYSCTL_OUT(req, tmpbuf, offset - tmpbuf);
2889out:
2890 FREE(tmpbuf, M_TEMP);
2891 return (error);
2892}
2893
2894static int
2895sysctl_vm_zone_count(SYSCTL_HANDLER_ARGS)
2896{
2897 uma_keg_t kz;
2898 uma_zone_t z;
2899 int count;
2900
2901 count = 0;
2902 mtx_lock(&uma_mtx);
2903 LIST_FOREACH(kz, &uma_kegs, uk_link) {
2904 LIST_FOREACH(z, &kz->uk_zones, uz_link)
2905 count++;
2906 }
2907 mtx_unlock(&uma_mtx);
2908 return (sysctl_handle_int(oidp, &count, 0, req));
2909}
2910
2911static int
2912sysctl_vm_zone_stats(SYSCTL_HANDLER_ARGS)
2913{
2914 struct uma_stream_header ush;
2915 struct uma_type_header uth;
2916 struct uma_percpu_stat ups;
2917 uma_bucket_t bucket;
2918 struct sbuf sbuf;
2919 uma_cache_t cache;
2920 uma_keg_t kz;
2921 uma_zone_t z;
2922 char *buffer;
2923 int buflen, count, error, i;
2924
2925 mtx_lock(&uma_mtx);
2926restart:
2927 mtx_assert(&uma_mtx, MA_OWNED);
2928 count = 0;
2929 LIST_FOREACH(kz, &uma_kegs, uk_link) {
2930 LIST_FOREACH(z, &kz->uk_zones, uz_link)
2931 count++;
2932 }
2933 mtx_unlock(&uma_mtx);
2934
2935 buflen = sizeof(ush) + count * (sizeof(uth) + sizeof(ups) *
2936 MAXCPU) + 1;
2937 buffer = malloc(buflen, M_TEMP, M_WAITOK | M_ZERO);
2938
2939 mtx_lock(&uma_mtx);
2940 i = 0;
2941 LIST_FOREACH(kz, &uma_kegs, uk_link) {
2942 LIST_FOREACH(z, &kz->uk_zones, uz_link)
2943 i++;
2944 }
2945 if (i > count) {
2946 free(buffer, M_TEMP);
2947 goto restart;
2948 }
2949 count = i;
2950
2951 sbuf_new(&sbuf, buffer, buflen, SBUF_FIXEDLEN);
2952
2953 /*
2954 * Insert stream header.
2955 */
2956 bzero(&ush, sizeof(ush));
2957 ush.ush_version = UMA_STREAM_VERSION;
2958 ush.ush_maxcpus = MAXCPU;
2959 ush.ush_count = count;
2960 if (sbuf_bcat(&sbuf, &ush, sizeof(ush)) < 0) {
2961 mtx_unlock(&uma_mtx);
2962 error = ENOMEM;
2963 goto out;
2964 }
2965
2966 LIST_FOREACH(kz, &uma_kegs, uk_link) {
2967 LIST_FOREACH(z, &kz->uk_zones, uz_link) {
2968 bzero(&uth, sizeof(uth));
2969 ZONE_LOCK(z);
2970 strlcpy(uth.uth_name, z->uz_name, UMA_MAX_NAME);
2971 uth.uth_align = kz->uk_align;
2972 uth.uth_pages = kz->uk_pages;
2973 uth.uth_keg_free = kz->uk_free;
2974 uth.uth_size = kz->uk_size;
2975 uth.uth_rsize = kz->uk_rsize;
2976 uth.uth_maxpages = kz->uk_maxpages;
2977 if (kz->uk_ppera > 1)
2978 uth.uth_limit = kz->uk_maxpages /
2979 kz->uk_ppera;
2980 else
2981 uth.uth_limit = kz->uk_maxpages *
2982 kz->uk_ipers;
2983 LIST_FOREACH(bucket, &z->uz_full_bucket, ub_link)
2984 uth.uth_zone_free += bucket->ub_cnt;
2985 uth.uth_allocs = z->uz_allocs;
2986 uth.uth_frees = z->uz_frees;
2987 uth.uth_fails = z->uz_fails;
2988 if (sbuf_bcat(&sbuf, &uth, sizeof(uth)) < 0) {
2989 ZONE_UNLOCK(z);
2990 mtx_unlock(&uma_mtx);
2991 error = ENOMEM;
2992 goto out;
2993 }
2994 /*
2995 * While it is not normally safe to access the cache
2996 * bucket pointers while not on the CPU that owns the
2997 * cache, we only allow the pointers to be exchanged
2998 * without the zone lock held, not invalidated, so
2999 * accept the possible race associated with bucket
3000 * exchange during monitoring.
3001 */
3002 for (i = 0; i < MAXCPU; i++) {
3003 bzero(&ups, sizeof(ups));
3004 if (kz->uk_flags & UMA_ZFLAG_INTERNAL)
3005 goto skip;
3006 cache = &z->uz_cpu[i];
3007 if (cache->uc_allocbucket != NULL)
3008 ups.ups_cache_free +=
3009 cache->uc_allocbucket->ub_cnt;
3010 if (cache->uc_freebucket != NULL)
3011 ups.ups_cache_free +=
3012 cache->uc_freebucket->ub_cnt;
3013 ups.ups_allocs = cache->uc_allocs;
3014 ups.ups_frees = cache->uc_frees;
3015skip:
3016 if (sbuf_bcat(&sbuf, &ups, sizeof(ups)) < 0) {
3017 ZONE_UNLOCK(z);
3018 mtx_unlock(&uma_mtx);
3019 error = ENOMEM;
3020 goto out;
3021 }
3022 }
3023 ZONE_UNLOCK(z);
3024 }
3025 }
3026 mtx_unlock(&uma_mtx);
3027 sbuf_finish(&sbuf);
3028 error = SYSCTL_OUT(req, sbuf_data(&sbuf), sbuf_len(&sbuf));
3029out:
3030 free(buffer, M_TEMP);
3031 return (error);
3032}