Deleted Added
full compact
uma_core.c (141983) uma_core.c (141991)
1/*-
2 * Copyright (c) 2004, 2005,
1/*-
2 * Copyright (c) 2004, 2005,
3 * Bosko Milekic <bmilekic@freebsd.org>
3 * Bosko Milekic <bmilekic@FreeBSD.org>. All rights reserved.
4 * Copyright (c) 2002, 2003, 2004, 2005,
4 * Copyright (c) 2002, 2003, 2004, 2005,
5 * Jeffrey Roberson <jeff@freebsd.org>
5 * Jeffrey Roberson <jeff@FreeBSD.org>. All rights reserved.
6 *
7 * Redistribution and use in source and binary forms, with or without
8 * modification, are permitted provided that the following conditions
9 * are met:
10 * 1. Redistributions of source code must retain the above copyright
11 * notice unmodified, this list of conditions, and the following
12 * disclaimer.
13 * 2. Redistributions in binary form must reproduce the above copyright
14 * notice, this list of conditions and the following disclaimer in the
15 * documentation and/or other materials provided with the distribution.
16 *
17 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
18 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
19 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
20 * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
21 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
22 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
23 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
24 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
25 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
26 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
27 */
28
29/*
30 * uma_core.c Implementation of the Universal Memory allocator
31 *
32 * This allocator is intended to replace the multitude of similar object caches
33 * in the standard FreeBSD kernel. The intent is to be flexible as well as
34 * effecient. A primary design goal is to return unused memory to the rest of
35 * the system. This will make the system as a whole more flexible due to the
36 * ability to move memory to subsystems which most need it instead of leaving
37 * pools of reserved memory unused.
38 *
39 * The basic ideas stem from similar slab/zone based allocators whose algorithms
40 * are well known.
41 *
42 */
43
44/*
45 * TODO:
46 * - Improve memory usage for large allocations
47 * - Investigate cache size adjustments
48 */
49
50#include <sys/cdefs.h>
6 *
7 * Redistribution and use in source and binary forms, with or without
8 * modification, are permitted provided that the following conditions
9 * are met:
10 * 1. Redistributions of source code must retain the above copyright
11 * notice unmodified, this list of conditions, and the following
12 * disclaimer.
13 * 2. Redistributions in binary form must reproduce the above copyright
14 * notice, this list of conditions and the following disclaimer in the
15 * documentation and/or other materials provided with the distribution.
16 *
17 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
18 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
19 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
20 * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
21 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
22 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
23 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
24 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
25 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
26 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
27 */
28
29/*
30 * uma_core.c Implementation of the Universal Memory allocator
31 *
32 * This allocator is intended to replace the multitude of similar object caches
33 * in the standard FreeBSD kernel. The intent is to be flexible as well as
34 * effecient. A primary design goal is to return unused memory to the rest of
35 * the system. This will make the system as a whole more flexible due to the
36 * ability to move memory to subsystems which most need it instead of leaving
37 * pools of reserved memory unused.
38 *
39 * The basic ideas stem from similar slab/zone based allocators whose algorithms
40 * are well known.
41 *
42 */
43
44/*
45 * TODO:
46 * - Improve memory usage for large allocations
47 * - Investigate cache size adjustments
48 */
49
50#include <sys/cdefs.h>
51__FBSDID("$FreeBSD: head/sys/vm/uma_core.c 141983 2005-02-16 20:06:11Z bmilekic $");
51__FBSDID("$FreeBSD: head/sys/vm/uma_core.c 141991 2005-02-16 21:45:59Z bmilekic $");
52
53/* I should really use ktr.. */
54/*
55#define UMA_DEBUG 1
56#define UMA_DEBUG_ALLOC 1
57#define UMA_DEBUG_ALLOC_1 1
58*/
59
60#include "opt_param.h"
61#include <sys/param.h>
62#include <sys/systm.h>
63#include <sys/kernel.h>
64#include <sys/types.h>
65#include <sys/queue.h>
66#include <sys/malloc.h>
67#include <sys/ktr.h>
68#include <sys/lock.h>
69#include <sys/sysctl.h>
70#include <sys/mutex.h>
71#include <sys/proc.h>
72#include <sys/smp.h>
73#include <sys/vmmeter.h>
74
75#include <vm/vm.h>
76#include <vm/vm_object.h>
77#include <vm/vm_page.h>
78#include <vm/vm_param.h>
79#include <vm/vm_map.h>
80#include <vm/vm_kern.h>
81#include <vm/vm_extern.h>
82#include <vm/uma.h>
83#include <vm/uma_int.h>
84#include <vm/uma_dbg.h>
85
86#include <machine/vmparam.h>
87
88/*
89 * This is the zone and keg from which all zones are spawned. The idea is that
90 * even the zone & keg heads are allocated from the allocator, so we use the
91 * bss section to bootstrap us.
92 */
93static struct uma_keg masterkeg;
94static struct uma_zone masterzone_k;
95static struct uma_zone masterzone_z;
96static uma_zone_t kegs = &masterzone_k;
97static uma_zone_t zones = &masterzone_z;
98
99/* This is the zone from which all of uma_slab_t's are allocated. */
100static uma_zone_t slabzone;
101static uma_zone_t slabrefzone; /* With refcounters (for UMA_ZONE_REFCNT) */
102
103/*
104 * The initial hash tables come out of this zone so they can be allocated
105 * prior to malloc coming up.
106 */
107static uma_zone_t hashzone;
108
109static MALLOC_DEFINE(M_UMAHASH, "UMAHash", "UMA Hash Buckets");
110
111/*
112 * Are we allowed to allocate buckets?
113 */
114static int bucketdisable = 1;
115
116/* Linked list of all kegs in the system */
117static LIST_HEAD(,uma_keg) uma_kegs = LIST_HEAD_INITIALIZER(&uma_kegs);
118
119/* This mutex protects the keg list */
120static struct mtx uma_mtx;
121
122/* These are the pcpu cache locks */
123static struct mtx uma_pcpu_mtx[MAXCPU];
124
125/* Linked list of boot time pages */
126static LIST_HEAD(,uma_slab) uma_boot_pages =
127 LIST_HEAD_INITIALIZER(&uma_boot_pages);
128
129/* Count of free boottime pages */
130static int uma_boot_free = 0;
131
132/* Is the VM done starting up? */
133static int booted = 0;
134
135/* Maximum number of allowed items-per-slab if the slab header is OFFPAGE */
136static u_int uma_max_ipers;
137static u_int uma_max_ipers_ref;
138
139/*
140 * This is the handle used to schedule events that need to happen
141 * outside of the allocation fast path.
142 */
143static struct callout uma_callout;
144#define UMA_TIMEOUT 20 /* Seconds for callout interval. */
145
146/*
147 * This structure is passed as the zone ctor arg so that I don't have to create
148 * a special allocation function just for zones.
149 */
150struct uma_zctor_args {
151 char *name;
152 size_t size;
153 uma_ctor ctor;
154 uma_dtor dtor;
155 uma_init uminit;
156 uma_fini fini;
157 uma_keg_t keg;
158 int align;
159 u_int16_t flags;
160};
161
162struct uma_kctor_args {
163 uma_zone_t zone;
164 size_t size;
165 uma_init uminit;
166 uma_fini fini;
167 int align;
168 u_int16_t flags;
169};
170
171struct uma_bucket_zone {
172 uma_zone_t ubz_zone;
173 char *ubz_name;
174 int ubz_entries;
175};
176
177#define BUCKET_MAX 128
178
179struct uma_bucket_zone bucket_zones[] = {
180 { NULL, "16 Bucket", 16 },
181 { NULL, "32 Bucket", 32 },
182 { NULL, "64 Bucket", 64 },
183 { NULL, "128 Bucket", 128 },
184 { NULL, NULL, 0}
185};
186
187#define BUCKET_SHIFT 4
188#define BUCKET_ZONES ((BUCKET_MAX >> BUCKET_SHIFT) + 1)
189
190/*
191 * bucket_size[] maps requested bucket sizes to zones that allocate a bucket
192 * of approximately the right size.
193 */
194static uint8_t bucket_size[BUCKET_ZONES];
195
196enum zfreeskip { SKIP_NONE, SKIP_DTOR, SKIP_FINI };
197
198/* Prototypes.. */
199
200static void *obj_alloc(uma_zone_t, int, u_int8_t *, int);
201static void *page_alloc(uma_zone_t, int, u_int8_t *, int);
202static void *startup_alloc(uma_zone_t, int, u_int8_t *, int);
203static void page_free(void *, int, u_int8_t);
204static uma_slab_t slab_zalloc(uma_zone_t, int);
205static void cache_drain(uma_zone_t);
206static void bucket_drain(uma_zone_t, uma_bucket_t);
207static void bucket_cache_drain(uma_zone_t zone);
208static int keg_ctor(void *, int, void *, int);
209static void keg_dtor(void *, int, void *);
210static int zone_ctor(void *, int, void *, int);
211static void zone_dtor(void *, int, void *);
212static int zero_init(void *, int, int);
213static void zone_small_init(uma_zone_t zone);
214static void zone_large_init(uma_zone_t zone);
215static void zone_foreach(void (*zfunc)(uma_zone_t));
216static void zone_timeout(uma_zone_t zone);
217static int hash_alloc(struct uma_hash *);
218static int hash_expand(struct uma_hash *, struct uma_hash *);
219static void hash_free(struct uma_hash *hash);
220static void uma_timeout(void *);
221static void uma_startup3(void);
222static void *uma_zalloc_internal(uma_zone_t, void *, int);
223static void uma_zfree_internal(uma_zone_t, void *, void *, enum zfreeskip);
224static void bucket_enable(void);
225static void bucket_init(void);
226static uma_bucket_t bucket_alloc(int, int);
227static void bucket_free(uma_bucket_t);
228static void bucket_zone_drain(void);
229static int uma_zalloc_bucket(uma_zone_t zone, int flags);
230static uma_slab_t uma_zone_slab(uma_zone_t zone, int flags);
231static void *uma_slab_alloc(uma_zone_t zone, uma_slab_t slab);
232static void zone_drain(uma_zone_t);
233static uma_zone_t uma_kcreate(uma_zone_t zone, size_t size, uma_init uminit,
234 uma_fini fini, int align, u_int16_t flags);
235
236void uma_print_zone(uma_zone_t);
237void uma_print_stats(void);
238static int sysctl_vm_zone(SYSCTL_HANDLER_ARGS);
239
240#ifdef WITNESS
241static int nosleepwithlocks = 1;
242SYSCTL_INT(_debug, OID_AUTO, nosleepwithlocks, CTLFLAG_RW, &nosleepwithlocks,
243 0, "Convert M_WAITOK to M_NOWAIT to avoid lock-held-across-sleep paths");
244#else
245static int nosleepwithlocks = 0;
246SYSCTL_INT(_debug, OID_AUTO, nosleepwithlocks, CTLFLAG_RW, &nosleepwithlocks,
247 0, "Convert M_WAITOK to M_NOWAIT to avoid lock-held-across-sleep paths");
248#endif
249SYSCTL_OID(_vm, OID_AUTO, zone, CTLTYPE_STRING|CTLFLAG_RD,
250 NULL, 0, sysctl_vm_zone, "A", "Zone Info");
251SYSINIT(uma_startup3, SI_SUB_VM_CONF, SI_ORDER_SECOND, uma_startup3, NULL);
252
253/*
254 * This routine checks to see whether or not it's safe to enable buckets.
255 */
256
257static void
258bucket_enable(void)
259{
260 if (cnt.v_free_count < cnt.v_free_min)
261 bucketdisable = 1;
262 else
263 bucketdisable = 0;
264}
265
266/*
267 * Initialize bucket_zones, the array of zones of buckets of various sizes.
268 *
269 * For each zone, calculate the memory required for each bucket, consisting
270 * of the header and an array of pointers. Initialize bucket_size[] to point
271 * the range of appropriate bucket sizes at the zone.
272 */
273static void
274bucket_init(void)
275{
276 struct uma_bucket_zone *ubz;
277 int i;
278 int j;
279
280 for (i = 0, j = 0; bucket_zones[j].ubz_entries != 0; j++) {
281 int size;
282
283 ubz = &bucket_zones[j];
284 size = roundup(sizeof(struct uma_bucket), sizeof(void *));
285 size += sizeof(void *) * ubz->ubz_entries;
286 ubz->ubz_zone = uma_zcreate(ubz->ubz_name, size,
287 NULL, NULL, NULL, NULL, UMA_ALIGN_PTR, UMA_ZFLAG_INTERNAL);
288 for (; i <= ubz->ubz_entries; i += (1 << BUCKET_SHIFT))
289 bucket_size[i >> BUCKET_SHIFT] = j;
290 }
291}
292
293/*
294 * Given a desired number of entries for a bucket, return the zone from which
295 * to allocate the bucket.
296 */
297static struct uma_bucket_zone *
298bucket_zone_lookup(int entries)
299{
300 int idx;
301
302 idx = howmany(entries, 1 << BUCKET_SHIFT);
303 return (&bucket_zones[bucket_size[idx]]);
304}
305
306static uma_bucket_t
307bucket_alloc(int entries, int bflags)
308{
309 struct uma_bucket_zone *ubz;
310 uma_bucket_t bucket;
311
312 /*
313 * This is to stop us from allocating per cpu buckets while we're
314 * running out of UMA_BOOT_PAGES. Otherwise, we would exhaust the
315 * boot pages. This also prevents us from allocating buckets in
316 * low memory situations.
317 */
318 if (bucketdisable)
319 return (NULL);
320
321 ubz = bucket_zone_lookup(entries);
322 bucket = uma_zalloc_internal(ubz->ubz_zone, NULL, bflags);
323 if (bucket) {
324#ifdef INVARIANTS
325 bzero(bucket->ub_bucket, sizeof(void *) * ubz->ubz_entries);
326#endif
327 bucket->ub_cnt = 0;
328 bucket->ub_entries = ubz->ubz_entries;
329 }
330
331 return (bucket);
332}
333
334static void
335bucket_free(uma_bucket_t bucket)
336{
337 struct uma_bucket_zone *ubz;
338
339 ubz = bucket_zone_lookup(bucket->ub_entries);
340 uma_zfree_internal(ubz->ubz_zone, bucket, NULL, SKIP_NONE);
341}
342
343static void
344bucket_zone_drain(void)
345{
346 struct uma_bucket_zone *ubz;
347
348 for (ubz = &bucket_zones[0]; ubz->ubz_entries != 0; ubz++)
349 zone_drain(ubz->ubz_zone);
350}
351
352
353/*
354 * Routine called by timeout which is used to fire off some time interval
355 * based calculations. (stats, hash size, etc.)
356 *
357 * Arguments:
358 * arg Unused
359 *
360 * Returns:
361 * Nothing
362 */
363static void
364uma_timeout(void *unused)
365{
366 bucket_enable();
367 zone_foreach(zone_timeout);
368
369 /* Reschedule this event */
370 callout_reset(&uma_callout, UMA_TIMEOUT * hz, uma_timeout, NULL);
371}
372
373/*
374 * Routine to perform timeout driven calculations. This expands the
375 * hashes and does per cpu statistics aggregation.
376 *
377 * Arguments:
378 * zone The zone to operate on
379 *
380 * Returns:
381 * Nothing
382 */
383static void
384zone_timeout(uma_zone_t zone)
385{
386 uma_keg_t keg;
387 uma_cache_t cache;
388 u_int64_t alloc;
389 int cpu;
390
391 keg = zone->uz_keg;
392 alloc = 0;
393
394 /*
395 * Aggregate per cpu cache statistics back to the zone.
396 *
397 * XXX This should be done in the sysctl handler.
398 *
399 * I may rewrite this to set a flag in the per cpu cache instead of
400 * locking. If the flag is not cleared on the next round I will have
401 * to lock and do it here instead so that the statistics don't get too
402 * far out of sync.
403 */
404 if (!(keg->uk_flags & UMA_ZFLAG_INTERNAL)) {
405 for (cpu = 0; cpu <= mp_maxid; cpu++) {
406 if (CPU_ABSENT(cpu))
407 continue;
408 CPU_LOCK(cpu);
409 cache = &zone->uz_cpu[cpu];
410 /* Add them up, and reset */
411 alloc += cache->uc_allocs;
412 cache->uc_allocs = 0;
413 CPU_UNLOCK(cpu);
414 }
415 }
416
417 /* Now push these stats back into the zone.. */
418 ZONE_LOCK(zone);
419 zone->uz_allocs += alloc;
420
421 /*
422 * Expand the zone hash table.
423 *
424 * This is done if the number of slabs is larger than the hash size.
425 * What I'm trying to do here is completely reduce collisions. This
426 * may be a little aggressive. Should I allow for two collisions max?
427 */
428
429 if (keg->uk_flags & UMA_ZONE_HASH &&
430 keg->uk_pages / keg->uk_ppera >= keg->uk_hash.uh_hashsize) {
431 struct uma_hash newhash;
432 struct uma_hash oldhash;
433 int ret;
434
435 /*
436 * This is so involved because allocating and freeing
437 * while the zone lock is held will lead to deadlock.
438 * I have to do everything in stages and check for
439 * races.
440 */
441 newhash = keg->uk_hash;
442 ZONE_UNLOCK(zone);
443 ret = hash_alloc(&newhash);
444 ZONE_LOCK(zone);
445 if (ret) {
446 if (hash_expand(&keg->uk_hash, &newhash)) {
447 oldhash = keg->uk_hash;
448 keg->uk_hash = newhash;
449 } else
450 oldhash = newhash;
451
452 ZONE_UNLOCK(zone);
453 hash_free(&oldhash);
454 ZONE_LOCK(zone);
455 }
456 }
457 ZONE_UNLOCK(zone);
458}
459
460/*
461 * Allocate and zero fill the next sized hash table from the appropriate
462 * backing store.
463 *
464 * Arguments:
465 * hash A new hash structure with the old hash size in uh_hashsize
466 *
467 * Returns:
468 * 1 on sucess and 0 on failure.
469 */
470static int
471hash_alloc(struct uma_hash *hash)
472{
473 int oldsize;
474 int alloc;
475
476 oldsize = hash->uh_hashsize;
477
478 /* We're just going to go to a power of two greater */
479 if (oldsize) {
480 hash->uh_hashsize = oldsize * 2;
481 alloc = sizeof(hash->uh_slab_hash[0]) * hash->uh_hashsize;
482 hash->uh_slab_hash = (struct slabhead *)malloc(alloc,
483 M_UMAHASH, M_NOWAIT);
484 } else {
485 alloc = sizeof(hash->uh_slab_hash[0]) * UMA_HASH_SIZE_INIT;
486 hash->uh_slab_hash = uma_zalloc_internal(hashzone, NULL,
487 M_WAITOK);
488 hash->uh_hashsize = UMA_HASH_SIZE_INIT;
489 }
490 if (hash->uh_slab_hash) {
491 bzero(hash->uh_slab_hash, alloc);
492 hash->uh_hashmask = hash->uh_hashsize - 1;
493 return (1);
494 }
495
496 return (0);
497}
498
499/*
500 * Expands the hash table for HASH zones. This is done from zone_timeout
501 * to reduce collisions. This must not be done in the regular allocation
502 * path, otherwise, we can recurse on the vm while allocating pages.
503 *
504 * Arguments:
505 * oldhash The hash you want to expand
506 * newhash The hash structure for the new table
507 *
508 * Returns:
509 * Nothing
510 *
511 * Discussion:
512 */
513static int
514hash_expand(struct uma_hash *oldhash, struct uma_hash *newhash)
515{
516 uma_slab_t slab;
517 int hval;
518 int i;
519
520 if (!newhash->uh_slab_hash)
521 return (0);
522
523 if (oldhash->uh_hashsize >= newhash->uh_hashsize)
524 return (0);
525
526 /*
527 * I need to investigate hash algorithms for resizing without a
528 * full rehash.
529 */
530
531 for (i = 0; i < oldhash->uh_hashsize; i++)
532 while (!SLIST_EMPTY(&oldhash->uh_slab_hash[i])) {
533 slab = SLIST_FIRST(&oldhash->uh_slab_hash[i]);
534 SLIST_REMOVE_HEAD(&oldhash->uh_slab_hash[i], us_hlink);
535 hval = UMA_HASH(newhash, slab->us_data);
536 SLIST_INSERT_HEAD(&newhash->uh_slab_hash[hval],
537 slab, us_hlink);
538 }
539
540 return (1);
541}
542
543/*
544 * Free the hash bucket to the appropriate backing store.
545 *
546 * Arguments:
547 * slab_hash The hash bucket we're freeing
548 * hashsize The number of entries in that hash bucket
549 *
550 * Returns:
551 * Nothing
552 */
553static void
554hash_free(struct uma_hash *hash)
555{
556 if (hash->uh_slab_hash == NULL)
557 return;
558 if (hash->uh_hashsize == UMA_HASH_SIZE_INIT)
559 uma_zfree_internal(hashzone,
560 hash->uh_slab_hash, NULL, SKIP_NONE);
561 else
562 free(hash->uh_slab_hash, M_UMAHASH);
563}
564
565/*
566 * Frees all outstanding items in a bucket
567 *
568 * Arguments:
569 * zone The zone to free to, must be unlocked.
570 * bucket The free/alloc bucket with items, cpu queue must be locked.
571 *
572 * Returns:
573 * Nothing
574 */
575
576static void
577bucket_drain(uma_zone_t zone, uma_bucket_t bucket)
578{
579 uma_slab_t slab;
580 int mzone;
581 void *item;
582
583 if (bucket == NULL)
584 return;
585
586 slab = NULL;
587 mzone = 0;
588
589 /* We have to lookup the slab again for malloc.. */
590 if (zone->uz_keg->uk_flags & UMA_ZONE_MALLOC)
591 mzone = 1;
592
593 while (bucket->ub_cnt > 0) {
594 bucket->ub_cnt--;
595 item = bucket->ub_bucket[bucket->ub_cnt];
596#ifdef INVARIANTS
597 bucket->ub_bucket[bucket->ub_cnt] = NULL;
598 KASSERT(item != NULL,
599 ("bucket_drain: botched ptr, item is NULL"));
600#endif
601 /*
602 * This is extremely inefficient. The slab pointer was passed
603 * to uma_zfree_arg, but we lost it because the buckets don't
604 * hold them. This will go away when free() gets a size passed
605 * to it.
606 */
607 if (mzone)
608 slab = vtoslab((vm_offset_t)item & (~UMA_SLAB_MASK));
609 uma_zfree_internal(zone, item, slab, SKIP_DTOR);
610 }
611}
612
613/*
614 * Drains the per cpu caches for a zone.
615 *
616 * Arguments:
617 * zone The zone to drain, must be unlocked.
618 *
619 * Returns:
620 * Nothing
621 */
622static void
623cache_drain(uma_zone_t zone)
624{
625 uma_cache_t cache;
626 int cpu;
627
628 /*
629 * We have to lock each cpu cache before locking the zone
630 */
631 for (cpu = 0; cpu <= mp_maxid; cpu++) {
632 if (CPU_ABSENT(cpu))
633 continue;
634 CPU_LOCK(cpu);
635 cache = &zone->uz_cpu[cpu];
636 bucket_drain(zone, cache->uc_allocbucket);
637 bucket_drain(zone, cache->uc_freebucket);
638 if (cache->uc_allocbucket != NULL)
639 bucket_free(cache->uc_allocbucket);
640 if (cache->uc_freebucket != NULL)
641 bucket_free(cache->uc_freebucket);
642 cache->uc_allocbucket = cache->uc_freebucket = NULL;
643 }
644 ZONE_LOCK(zone);
645 bucket_cache_drain(zone);
646 ZONE_UNLOCK(zone);
647 for (cpu = 0; cpu <= mp_maxid; cpu++) {
648 if (CPU_ABSENT(cpu))
649 continue;
650 CPU_UNLOCK(cpu);
651 }
652}
653
654/*
655 * Drain the cached buckets from a zone. Expects a locked zone on entry.
656 */
657static void
658bucket_cache_drain(uma_zone_t zone)
659{
660 uma_bucket_t bucket;
661
662 /*
663 * Drain the bucket queues and free the buckets, we just keep two per
664 * cpu (alloc/free).
665 */
666 while ((bucket = LIST_FIRST(&zone->uz_full_bucket)) != NULL) {
667 LIST_REMOVE(bucket, ub_link);
668 ZONE_UNLOCK(zone);
669 bucket_drain(zone, bucket);
670 bucket_free(bucket);
671 ZONE_LOCK(zone);
672 }
673
674 /* Now we do the free queue.. */
675 while ((bucket = LIST_FIRST(&zone->uz_free_bucket)) != NULL) {
676 LIST_REMOVE(bucket, ub_link);
677 bucket_free(bucket);
678 }
679}
680
681/*
682 * Frees pages from a zone back to the system. This is done on demand from
683 * the pageout daemon.
684 *
685 * Arguments:
686 * zone The zone to free pages from
687 * all Should we drain all items?
688 *
689 * Returns:
690 * Nothing.
691 */
692static void
693zone_drain(uma_zone_t zone)
694{
695 struct slabhead freeslabs = { 0 };
696 uma_keg_t keg;
697 uma_slab_t slab;
698 uma_slab_t n;
699 u_int8_t flags;
700 u_int8_t *mem;
701 int i;
702
703 keg = zone->uz_keg;
704
705 /*
706 * We don't want to take pages from statically allocated zones at this
707 * time
708 */
709 if (keg->uk_flags & UMA_ZONE_NOFREE || keg->uk_freef == NULL)
710 return;
711
712 ZONE_LOCK(zone);
713
714#ifdef UMA_DEBUG
715 printf("%s free items: %u\n", zone->uz_name, keg->uk_free);
716#endif
717 bucket_cache_drain(zone);
718 if (keg->uk_free == 0)
719 goto finished;
720
721 slab = LIST_FIRST(&keg->uk_free_slab);
722 while (slab) {
723 n = LIST_NEXT(slab, us_link);
724
725 /* We have no where to free these to */
726 if (slab->us_flags & UMA_SLAB_BOOT) {
727 slab = n;
728 continue;
729 }
730
731 LIST_REMOVE(slab, us_link);
732 keg->uk_pages -= keg->uk_ppera;
733 keg->uk_free -= keg->uk_ipers;
734
735 if (keg->uk_flags & UMA_ZONE_HASH)
736 UMA_HASH_REMOVE(&keg->uk_hash, slab, slab->us_data);
737
738 SLIST_INSERT_HEAD(&freeslabs, slab, us_hlink);
739
740 slab = n;
741 }
742finished:
743 ZONE_UNLOCK(zone);
744
745 while ((slab = SLIST_FIRST(&freeslabs)) != NULL) {
746 SLIST_REMOVE(&freeslabs, slab, uma_slab, us_hlink);
747 if (keg->uk_fini)
748 for (i = 0; i < keg->uk_ipers; i++)
749 keg->uk_fini(
750 slab->us_data + (keg->uk_rsize * i),
751 keg->uk_size);
752 flags = slab->us_flags;
753 mem = slab->us_data;
754
755 if ((keg->uk_flags & UMA_ZONE_MALLOC) ||
756 (keg->uk_flags & UMA_ZONE_REFCNT)) {
757 for (i = 0; i < keg->uk_ppera; i++)
758 vsetobj((vm_offset_t)mem + (i * PAGE_SIZE),
759 kmem_object);
760 }
761 if (keg->uk_flags & UMA_ZONE_OFFPAGE)
762 uma_zfree_internal(keg->uk_slabzone, slab, NULL,
763 SKIP_NONE);
764#ifdef UMA_DEBUG
765 printf("%s: Returning %d bytes.\n",
766 zone->uz_name, UMA_SLAB_SIZE * keg->uk_ppera);
767#endif
768 keg->uk_freef(mem, UMA_SLAB_SIZE * keg->uk_ppera, flags);
769 }
770}
771
772/*
773 * Allocate a new slab for a zone. This does not insert the slab onto a list.
774 *
775 * Arguments:
776 * zone The zone to allocate slabs for
777 * wait Shall we wait?
778 *
779 * Returns:
780 * The slab that was allocated or NULL if there is no memory and the
781 * caller specified M_NOWAIT.
782 */
783static uma_slab_t
784slab_zalloc(uma_zone_t zone, int wait)
785{
786 uma_slabrefcnt_t slabref;
787 uma_slab_t slab;
788 uma_keg_t keg;
789 u_int8_t *mem;
790 u_int8_t flags;
791 int i;
792
793 slab = NULL;
794 keg = zone->uz_keg;
795
796#ifdef UMA_DEBUG
797 printf("slab_zalloc: Allocating a new slab for %s\n", zone->uz_name);
798#endif
799 ZONE_UNLOCK(zone);
800
801 if (keg->uk_flags & UMA_ZONE_OFFPAGE) {
802 slab = uma_zalloc_internal(keg->uk_slabzone, NULL, wait);
803 if (slab == NULL) {
804 ZONE_LOCK(zone);
805 return NULL;
806 }
807 }
808
809 /*
810 * This reproduces the old vm_zone behavior of zero filling pages the
811 * first time they are added to a zone.
812 *
813 * Malloced items are zeroed in uma_zalloc.
814 */
815
816 if ((keg->uk_flags & UMA_ZONE_MALLOC) == 0)
817 wait |= M_ZERO;
818 else
819 wait &= ~M_ZERO;
820
821 mem = keg->uk_allocf(zone, keg->uk_ppera * UMA_SLAB_SIZE,
822 &flags, wait);
823 if (mem == NULL) {
824 if (keg->uk_flags & UMA_ZONE_OFFPAGE)
825 uma_zfree_internal(keg->uk_slabzone, slab, NULL, 0);
826 ZONE_LOCK(zone);
827 return (NULL);
828 }
829
830 /* Point the slab into the allocated memory */
831 if (!(keg->uk_flags & UMA_ZONE_OFFPAGE))
832 slab = (uma_slab_t )(mem + keg->uk_pgoff);
833
834 if ((keg->uk_flags & UMA_ZONE_MALLOC) ||
835 (keg->uk_flags & UMA_ZONE_REFCNT))
836 for (i = 0; i < keg->uk_ppera; i++)
837 vsetslab((vm_offset_t)mem + (i * PAGE_SIZE), slab);
838
839 slab->us_keg = keg;
840 slab->us_data = mem;
841 slab->us_freecount = keg->uk_ipers;
842 slab->us_firstfree = 0;
843 slab->us_flags = flags;
844
845 if (keg->uk_flags & UMA_ZONE_REFCNT) {
846 slabref = (uma_slabrefcnt_t)slab;
847 for (i = 0; i < keg->uk_ipers; i++) {
848 slabref->us_freelist[i].us_refcnt = 0;
849 slabref->us_freelist[i].us_item = i+1;
850 }
851 } else {
852 for (i = 0; i < keg->uk_ipers; i++)
853 slab->us_freelist[i].us_item = i+1;
854 }
855
856 if (keg->uk_init != NULL) {
857 for (i = 0; i < keg->uk_ipers; i++)
858 if (keg->uk_init(slab->us_data + (keg->uk_rsize * i),
859 keg->uk_size, wait) != 0)
860 break;
861 if (i != keg->uk_ipers) {
862 if (keg->uk_fini != NULL) {
863 for (i--; i > -1; i--)
864 keg->uk_fini(slab->us_data +
865 (keg->uk_rsize * i),
866 keg->uk_size);
867 }
868 if ((keg->uk_flags & UMA_ZONE_MALLOC) ||
869 (keg->uk_flags & UMA_ZONE_REFCNT))
870 for (i = 0; i < keg->uk_ppera; i++)
871 vsetobj((vm_offset_t)mem +
872 (i * PAGE_SIZE), kmem_object);
873 if (keg->uk_flags & UMA_ZONE_OFFPAGE)
874 uma_zfree_internal(keg->uk_slabzone, slab,
875 NULL, SKIP_NONE);
876 keg->uk_freef(mem, UMA_SLAB_SIZE * keg->uk_ppera,
877 flags);
878 ZONE_LOCK(zone);
879 return (NULL);
880 }
881 }
882 ZONE_LOCK(zone);
883
884 if (keg->uk_flags & UMA_ZONE_HASH)
885 UMA_HASH_INSERT(&keg->uk_hash, slab, mem);
886
887 keg->uk_pages += keg->uk_ppera;
888 keg->uk_free += keg->uk_ipers;
889
890 return (slab);
891}
892
893/*
894 * This function is intended to be used early on in place of page_alloc() so
895 * that we may use the boot time page cache to satisfy allocations before
896 * the VM is ready.
897 */
898static void *
899startup_alloc(uma_zone_t zone, int bytes, u_int8_t *pflag, int wait)
900{
901 uma_keg_t keg;
902
903 keg = zone->uz_keg;
904
905 /*
906 * Check our small startup cache to see if it has pages remaining.
907 */
908 mtx_lock(&uma_mtx);
909 if (uma_boot_free != 0) {
910 uma_slab_t tmps;
911
912 tmps = LIST_FIRST(&uma_boot_pages);
913 LIST_REMOVE(tmps, us_link);
914 uma_boot_free--;
915 mtx_unlock(&uma_mtx);
916 *pflag = tmps->us_flags;
917 return (tmps->us_data);
918 }
919 mtx_unlock(&uma_mtx);
920 if (booted == 0)
921 panic("UMA: Increase UMA_BOOT_PAGES");
922 /*
923 * Now that we've booted reset these users to their real allocator.
924 */
925#ifdef UMA_MD_SMALL_ALLOC
926 keg->uk_allocf = uma_small_alloc;
927#else
928 keg->uk_allocf = page_alloc;
929#endif
930 return keg->uk_allocf(zone, bytes, pflag, wait);
931}
932
933/*
934 * Allocates a number of pages from the system
935 *
936 * Arguments:
937 * zone Unused
938 * bytes The number of bytes requested
939 * wait Shall we wait?
940 *
941 * Returns:
942 * A pointer to the alloced memory or possibly
943 * NULL if M_NOWAIT is set.
944 */
945static void *
946page_alloc(uma_zone_t zone, int bytes, u_int8_t *pflag, int wait)
947{
948 void *p; /* Returned page */
949
950 *pflag = UMA_SLAB_KMEM;
951 p = (void *) kmem_malloc(kmem_map, bytes, wait);
952
953 return (p);
954}
955
956/*
957 * Allocates a number of pages from within an object
958 *
959 * Arguments:
960 * zone Unused
961 * bytes The number of bytes requested
962 * wait Shall we wait?
963 *
964 * Returns:
965 * A pointer to the alloced memory or possibly
966 * NULL if M_NOWAIT is set.
967 */
968static void *
969obj_alloc(uma_zone_t zone, int bytes, u_int8_t *flags, int wait)
970{
971 vm_object_t object;
972 vm_offset_t retkva, zkva;
973 vm_page_t p;
974 int pages, startpages;
975
976 object = zone->uz_keg->uk_obj;
977 retkva = 0;
978
979 /*
980 * This looks a little weird since we're getting one page at a time.
981 */
982 VM_OBJECT_LOCK(object);
983 p = TAILQ_LAST(&object->memq, pglist);
984 pages = p != NULL ? p->pindex + 1 : 0;
985 startpages = pages;
986 zkva = zone->uz_keg->uk_kva + pages * PAGE_SIZE;
987 for (; bytes > 0; bytes -= PAGE_SIZE) {
988 p = vm_page_alloc(object, pages,
989 VM_ALLOC_INTERRUPT | VM_ALLOC_WIRED);
990 if (p == NULL) {
991 if (pages != startpages)
992 pmap_qremove(retkva, pages - startpages);
993 while (pages != startpages) {
994 pages--;
995 p = TAILQ_LAST(&object->memq, pglist);
996 vm_page_lock_queues();
997 vm_page_unwire(p, 0);
998 vm_page_free(p);
999 vm_page_unlock_queues();
1000 }
1001 retkva = 0;
1002 goto done;
1003 }
1004 pmap_qenter(zkva, &p, 1);
1005 if (retkva == 0)
1006 retkva = zkva;
1007 zkva += PAGE_SIZE;
1008 pages += 1;
1009 }
1010done:
1011 VM_OBJECT_UNLOCK(object);
1012 *flags = UMA_SLAB_PRIV;
1013
1014 return ((void *)retkva);
1015}
1016
1017/*
1018 * Frees a number of pages to the system
1019 *
1020 * Arguments:
1021 * mem A pointer to the memory to be freed
1022 * size The size of the memory being freed
1023 * flags The original p->us_flags field
1024 *
1025 * Returns:
1026 * Nothing
1027 */
1028static void
1029page_free(void *mem, int size, u_int8_t flags)
1030{
1031 vm_map_t map;
1032
1033 if (flags & UMA_SLAB_KMEM)
1034 map = kmem_map;
1035 else
1036 panic("UMA: page_free used with invalid flags %d\n", flags);
1037
1038 kmem_free(map, (vm_offset_t)mem, size);
1039}
1040
1041/*
1042 * Zero fill initializer
1043 *
1044 * Arguments/Returns follow uma_init specifications
1045 */
1046static int
1047zero_init(void *mem, int size, int flags)
1048{
1049 bzero(mem, size);
1050 return (0);
1051}
1052
1053/*
1054 * Finish creating a small uma zone. This calculates ipers, and the zone size.
1055 *
1056 * Arguments
1057 * zone The zone we should initialize
1058 *
1059 * Returns
1060 * Nothing
1061 */
1062static void
1063zone_small_init(uma_zone_t zone)
1064{
1065 uma_keg_t keg;
1066 u_int rsize;
1067 u_int memused;
1068 u_int wastedspace;
1069 u_int shsize;
1070
1071 keg = zone->uz_keg;
1072 KASSERT(keg != NULL, ("Keg is null in zone_small_init"));
1073 rsize = keg->uk_size;
1074
1075 if (rsize < UMA_SMALLEST_UNIT)
1076 rsize = UMA_SMALLEST_UNIT;
1077 if (rsize & keg->uk_align)
1078 rsize = (rsize & ~keg->uk_align) + (keg->uk_align + 1);
1079
1080 keg->uk_rsize = rsize;
1081 keg->uk_ppera = 1;
1082
1083 if (keg->uk_flags & UMA_ZONE_REFCNT) {
1084 rsize += UMA_FRITMREF_SZ; /* linkage & refcnt */
1085 shsize = sizeof(struct uma_slab_refcnt);
1086 } else {
1087 rsize += UMA_FRITM_SZ; /* Account for linkage */
1088 shsize = sizeof(struct uma_slab);
1089 }
1090
1091 keg->uk_ipers = (UMA_SLAB_SIZE - shsize) / rsize;
1092 KASSERT(keg->uk_ipers != 0, ("zone_small_init: ipers is 0"));
1093 memused = keg->uk_ipers * rsize + shsize;
1094 wastedspace = UMA_SLAB_SIZE - memused;
1095
1096 /*
1097 * We can't do OFFPAGE if we're internal or if we've been
1098 * asked to not go to the VM for buckets. If we do this we
1099 * may end up going to the VM (kmem_map) for slabs which we
1100 * do not want to do if we're UMA_ZFLAG_CACHEONLY as a
1101 * result of UMA_ZONE_VM, which clearly forbids it.
1102 */
1103 if ((keg->uk_flags & UMA_ZFLAG_INTERNAL) ||
1104 (keg->uk_flags & UMA_ZFLAG_CACHEONLY))
1105 return;
1106
1107 if ((wastedspace >= UMA_MAX_WASTE) &&
1108 (keg->uk_ipers < (UMA_SLAB_SIZE / keg->uk_rsize))) {
1109 keg->uk_ipers = UMA_SLAB_SIZE / keg->uk_rsize;
1110 KASSERT(keg->uk_ipers <= 255,
1111 ("zone_small_init: keg->uk_ipers too high!"));
1112#ifdef UMA_DEBUG
1113 printf("UMA decided we need offpage slab headers for "
1114 "zone: %s, calculated wastedspace = %d, "
1115 "maximum wasted space allowed = %d, "
1116 "calculated ipers = %d, "
1117 "new wasted space = %d\n", zone->uz_name, wastedspace,
1118 UMA_MAX_WASTE, keg->uk_ipers,
1119 UMA_SLAB_SIZE - keg->uk_ipers * keg->uk_rsize);
1120#endif
1121 keg->uk_flags |= UMA_ZONE_OFFPAGE;
1122 if ((keg->uk_flags & UMA_ZONE_MALLOC) == 0)
1123 keg->uk_flags |= UMA_ZONE_HASH;
1124 }
1125}
1126
1127/*
1128 * Finish creating a large (> UMA_SLAB_SIZE) uma zone. Just give in and do
1129 * OFFPAGE for now. When I can allow for more dynamic slab sizes this will be
1130 * more complicated.
1131 *
1132 * Arguments
1133 * zone The zone we should initialize
1134 *
1135 * Returns
1136 * Nothing
1137 */
1138static void
1139zone_large_init(uma_zone_t zone)
1140{
1141 uma_keg_t keg;
1142 int pages;
1143
1144 keg = zone->uz_keg;
1145
1146 KASSERT(keg != NULL, ("Keg is null in zone_large_init"));
1147 KASSERT((keg->uk_flags & UMA_ZFLAG_CACHEONLY) == 0,
1148 ("zone_large_init: Cannot large-init a UMA_ZFLAG_CACHEONLY zone"));
1149
1150 pages = keg->uk_size / UMA_SLAB_SIZE;
1151
1152 /* Account for remainder */
1153 if ((pages * UMA_SLAB_SIZE) < keg->uk_size)
1154 pages++;
1155
1156 keg->uk_ppera = pages;
1157 keg->uk_ipers = 1;
1158
1159 keg->uk_flags |= UMA_ZONE_OFFPAGE;
1160 if ((keg->uk_flags & UMA_ZONE_MALLOC) == 0)
1161 keg->uk_flags |= UMA_ZONE_HASH;
1162
1163 keg->uk_rsize = keg->uk_size;
1164}
1165
1166/*
1167 * Keg header ctor. This initializes all fields, locks, etc. And inserts
1168 * the keg onto the global keg list.
1169 *
1170 * Arguments/Returns follow uma_ctor specifications
1171 * udata Actually uma_kctor_args
1172 */
1173static int
1174keg_ctor(void *mem, int size, void *udata, int flags)
1175{
1176 struct uma_kctor_args *arg = udata;
1177 uma_keg_t keg = mem;
1178 uma_zone_t zone;
1179
1180 bzero(keg, size);
1181 keg->uk_size = arg->size;
1182 keg->uk_init = arg->uminit;
1183 keg->uk_fini = arg->fini;
1184 keg->uk_align = arg->align;
1185 keg->uk_free = 0;
1186 keg->uk_pages = 0;
1187 keg->uk_flags = arg->flags;
1188 keg->uk_allocf = page_alloc;
1189 keg->uk_freef = page_free;
1190 keg->uk_recurse = 0;
1191 keg->uk_slabzone = NULL;
1192
1193 /*
1194 * The master zone is passed to us at keg-creation time.
1195 */
1196 zone = arg->zone;
1197 zone->uz_keg = keg;
1198
1199 if (arg->flags & UMA_ZONE_VM)
1200 keg->uk_flags |= UMA_ZFLAG_CACHEONLY;
1201
1202 if (arg->flags & UMA_ZONE_ZINIT)
1203 keg->uk_init = zero_init;
1204
1205 /*
1206 * The +UMA_FRITM_SZ added to uk_size is to account for the
1207 * linkage that is added to the size in zone_small_init(). If
1208 * we don't account for this here then we may end up in
1209 * zone_small_init() with a calculated 'ipers' of 0.
1210 */
1211 if (keg->uk_flags & UMA_ZONE_REFCNT) {
1212 if ((keg->uk_size+UMA_FRITMREF_SZ) >
1213 (UMA_SLAB_SIZE - sizeof(struct uma_slab_refcnt)))
1214 zone_large_init(zone);
1215 else
1216 zone_small_init(zone);
1217 } else {
1218 if ((keg->uk_size+UMA_FRITM_SZ) >
1219 (UMA_SLAB_SIZE - sizeof(struct uma_slab)))
1220 zone_large_init(zone);
1221 else
1222 zone_small_init(zone);
1223 }
1224
1225 if (keg->uk_flags & UMA_ZONE_OFFPAGE) {
1226 if (keg->uk_flags & UMA_ZONE_REFCNT)
1227 keg->uk_slabzone = slabrefzone;
1228 else
1229 keg->uk_slabzone = slabzone;
1230 }
1231
1232 /*
1233 * If we haven't booted yet we need allocations to go through the
1234 * startup cache until the vm is ready.
1235 */
1236 if (keg->uk_ppera == 1) {
1237#ifdef UMA_MD_SMALL_ALLOC
1238 keg->uk_allocf = uma_small_alloc;
1239 keg->uk_freef = uma_small_free;
1240#endif
1241 if (booted == 0)
1242 keg->uk_allocf = startup_alloc;
1243 }
1244
1245 /*
1246 * Initialize keg's lock (shared among zones) through
1247 * Master zone
1248 */
1249 zone->uz_lock = &keg->uk_lock;
1250 if (arg->flags & UMA_ZONE_MTXCLASS)
1251 ZONE_LOCK_INIT(zone, 1);
1252 else
1253 ZONE_LOCK_INIT(zone, 0);
1254
1255 /*
1256 * If we're putting the slab header in the actual page we need to
1257 * figure out where in each page it goes. This calculates a right
1258 * justified offset into the memory on an ALIGN_PTR boundary.
1259 */
1260 if (!(keg->uk_flags & UMA_ZONE_OFFPAGE)) {
1261 u_int totsize;
1262
1263 /* Size of the slab struct and free list */
1264 if (keg->uk_flags & UMA_ZONE_REFCNT)
1265 totsize = sizeof(struct uma_slab_refcnt) +
1266 keg->uk_ipers * UMA_FRITMREF_SZ;
1267 else
1268 totsize = sizeof(struct uma_slab) +
1269 keg->uk_ipers * UMA_FRITM_SZ;
1270
1271 if (totsize & UMA_ALIGN_PTR)
1272 totsize = (totsize & ~UMA_ALIGN_PTR) +
1273 (UMA_ALIGN_PTR + 1);
1274 keg->uk_pgoff = UMA_SLAB_SIZE - totsize;
1275
1276 if (keg->uk_flags & UMA_ZONE_REFCNT)
1277 totsize = keg->uk_pgoff + sizeof(struct uma_slab_refcnt)
1278 + keg->uk_ipers * UMA_FRITMREF_SZ;
1279 else
1280 totsize = keg->uk_pgoff + sizeof(struct uma_slab)
1281 + keg->uk_ipers * UMA_FRITM_SZ;
1282
1283 /*
1284 * The only way the following is possible is if with our
1285 * UMA_ALIGN_PTR adjustments we are now bigger than
1286 * UMA_SLAB_SIZE. I haven't checked whether this is
1287 * mathematically possible for all cases, so we make
1288 * sure here anyway.
1289 */
1290 if (totsize > UMA_SLAB_SIZE) {
1291 printf("zone %s ipers %d rsize %d size %d\n",
1292 zone->uz_name, keg->uk_ipers, keg->uk_rsize,
1293 keg->uk_size);
1294 panic("UMA slab won't fit.\n");
1295 }
1296 }
1297
1298 if (keg->uk_flags & UMA_ZONE_HASH)
1299 hash_alloc(&keg->uk_hash);
1300
1301#ifdef UMA_DEBUG
1302 printf("%s(%p) size = %d ipers = %d ppera = %d pgoff = %d\n",
1303 zone->uz_name, zone,
1304 keg->uk_size, keg->uk_ipers,
1305 keg->uk_ppera, keg->uk_pgoff);
1306#endif
1307
1308 LIST_INSERT_HEAD(&keg->uk_zones, zone, uz_link);
1309
1310 mtx_lock(&uma_mtx);
1311 LIST_INSERT_HEAD(&uma_kegs, keg, uk_link);
1312 mtx_unlock(&uma_mtx);
1313 return (0);
1314}
1315
1316/*
1317 * Zone header ctor. This initializes all fields, locks, etc.
1318 *
1319 * Arguments/Returns follow uma_ctor specifications
1320 * udata Actually uma_zctor_args
1321 */
1322
1323static int
1324zone_ctor(void *mem, int size, void *udata, int flags)
1325{
1326 struct uma_zctor_args *arg = udata;
1327 uma_zone_t zone = mem;
1328 uma_zone_t z;
1329 uma_keg_t keg;
1330
1331 bzero(zone, size);
1332 zone->uz_name = arg->name;
1333 zone->uz_ctor = arg->ctor;
1334 zone->uz_dtor = arg->dtor;
1335 zone->uz_init = NULL;
1336 zone->uz_fini = NULL;
1337 zone->uz_allocs = 0;
1338 zone->uz_fills = zone->uz_count = 0;
1339
1340 if (arg->flags & UMA_ZONE_SECONDARY) {
1341 KASSERT(arg->keg != NULL, ("Secondary zone on zero'd keg"));
1342 keg = arg->keg;
1343 zone->uz_keg = keg;
1344 zone->uz_init = arg->uminit;
1345 zone->uz_fini = arg->fini;
1346 zone->uz_lock = &keg->uk_lock;
1347 mtx_lock(&uma_mtx);
1348 ZONE_LOCK(zone);
1349 keg->uk_flags |= UMA_ZONE_SECONDARY;
1350 LIST_FOREACH(z, &keg->uk_zones, uz_link) {
1351 if (LIST_NEXT(z, uz_link) == NULL) {
1352 LIST_INSERT_AFTER(z, zone, uz_link);
1353 break;
1354 }
1355 }
1356 ZONE_UNLOCK(zone);
1357 mtx_unlock(&uma_mtx);
1358 } else if (arg->keg == NULL) {
1359 if (uma_kcreate(zone, arg->size, arg->uminit, arg->fini,
1360 arg->align, arg->flags) == NULL)
1361 return (ENOMEM);
1362 } else {
1363 struct uma_kctor_args karg;
1364 int error;
1365
1366 /* We should only be here from uma_startup() */
1367 karg.size = arg->size;
1368 karg.uminit = arg->uminit;
1369 karg.fini = arg->fini;
1370 karg.align = arg->align;
1371 karg.flags = arg->flags;
1372 karg.zone = zone;
1373 error = keg_ctor(arg->keg, sizeof(struct uma_keg), &karg,
1374 flags);
1375 if (error)
1376 return (error);
1377 }
1378 keg = zone->uz_keg;
1379 zone->uz_lock = &keg->uk_lock;
1380
1381 /*
1382 * Some internal zones don't have room allocated for the per cpu
1383 * caches. If we're internal, bail out here.
1384 */
1385 if (keg->uk_flags & UMA_ZFLAG_INTERNAL) {
1386 KASSERT((keg->uk_flags & UMA_ZONE_SECONDARY) == 0,
1387 ("Secondary zone requested UMA_ZFLAG_INTERNAL"));
1388 return (0);
1389 }
1390
1391 if (keg->uk_flags & UMA_ZONE_MAXBUCKET)
1392 zone->uz_count = BUCKET_MAX;
1393 else if (keg->uk_ipers <= BUCKET_MAX)
1394 zone->uz_count = keg->uk_ipers;
1395 else
1396 zone->uz_count = BUCKET_MAX;
1397 return (0);
1398}
1399
1400/*
1401 * Keg header dtor. This frees all data, destroys locks, frees the hash
1402 * table and removes the keg from the global list.
1403 *
1404 * Arguments/Returns follow uma_dtor specifications
1405 * udata unused
1406 */
1407static void
1408keg_dtor(void *arg, int size, void *udata)
1409{
1410 uma_keg_t keg;
1411
1412 keg = (uma_keg_t)arg;
1413 mtx_lock(&keg->uk_lock);
1414 if (keg->uk_free != 0) {
1415 printf("Freed UMA keg was not empty (%d items). "
1416 " Lost %d pages of memory.\n",
1417 keg->uk_free, keg->uk_pages);
1418 }
1419 mtx_unlock(&keg->uk_lock);
1420
1421 if (keg->uk_flags & UMA_ZONE_HASH)
1422 hash_free(&keg->uk_hash);
1423
1424 mtx_destroy(&keg->uk_lock);
1425}
1426
1427/*
1428 * Zone header dtor.
1429 *
1430 * Arguments/Returns follow uma_dtor specifications
1431 * udata unused
1432 */
1433static void
1434zone_dtor(void *arg, int size, void *udata)
1435{
1436 uma_zone_t zone;
1437 uma_keg_t keg;
1438
1439 zone = (uma_zone_t)arg;
1440 keg = zone->uz_keg;
1441
1442 if (!(keg->uk_flags & UMA_ZFLAG_INTERNAL))
1443 cache_drain(zone);
1444
1445 mtx_lock(&uma_mtx);
1446 zone_drain(zone);
1447 if (keg->uk_flags & UMA_ZONE_SECONDARY) {
1448 LIST_REMOVE(zone, uz_link);
1449 /*
1450 * XXX there are some races here where
1451 * the zone can be drained but zone lock
1452 * released and then refilled before we
1453 * remove it... we dont care for now
1454 */
1455 ZONE_LOCK(zone);
1456 if (LIST_EMPTY(&keg->uk_zones))
1457 keg->uk_flags &= ~UMA_ZONE_SECONDARY;
1458 ZONE_UNLOCK(zone);
1459 mtx_unlock(&uma_mtx);
1460 } else {
1461 LIST_REMOVE(keg, uk_link);
1462 LIST_REMOVE(zone, uz_link);
1463 mtx_unlock(&uma_mtx);
1464 uma_zfree_internal(kegs, keg, NULL, SKIP_NONE);
1465 }
1466 zone->uz_keg = NULL;
1467}
1468
1469/*
1470 * Traverses every zone in the system and calls a callback
1471 *
1472 * Arguments:
1473 * zfunc A pointer to a function which accepts a zone
1474 * as an argument.
1475 *
1476 * Returns:
1477 * Nothing
1478 */
1479static void
1480zone_foreach(void (*zfunc)(uma_zone_t))
1481{
1482 uma_keg_t keg;
1483 uma_zone_t zone;
1484
1485 mtx_lock(&uma_mtx);
1486 LIST_FOREACH(keg, &uma_kegs, uk_link) {
1487 LIST_FOREACH(zone, &keg->uk_zones, uz_link)
1488 zfunc(zone);
1489 }
1490 mtx_unlock(&uma_mtx);
1491}
1492
1493/* Public functions */
1494/* See uma.h */
1495void
1496uma_startup(void *bootmem)
1497{
1498 struct uma_zctor_args args;
1499 uma_slab_t slab;
1500 u_int slabsize;
1501 u_int objsize, totsize, wsize;
1502 int i;
1503
1504#ifdef UMA_DEBUG
1505 printf("Creating uma keg headers zone and keg.\n");
1506#endif
1507 /*
1508 * The general UMA lock is a recursion-allowed lock because
1509 * there is a code path where, while we're still configured
1510 * to use startup_alloc() for backend page allocations, we
1511 * may end up in uma_reclaim() which calls zone_foreach(zone_drain),
1512 * which grabs uma_mtx, only to later call into startup_alloc()
1513 * because while freeing we needed to allocate a bucket. Since
1514 * startup_alloc() also takes uma_mtx, we need to be able to
1515 * recurse on it.
1516 */
1517 mtx_init(&uma_mtx, "UMA lock", NULL, MTX_DEF | MTX_RECURSE);
1518
1519 /*
1520 * Figure out the maximum number of items-per-slab we'll have if
1521 * we're using the OFFPAGE slab header to track free items, given
1522 * all possible object sizes and the maximum desired wastage
1523 * (UMA_MAX_WASTE).
1524 *
1525 * We iterate until we find an object size for
1526 * which the calculated wastage in zone_small_init() will be
1527 * enough to warrant OFFPAGE. Since wastedspace versus objsize
1528 * is an overall increasing see-saw function, we find the smallest
1529 * objsize such that the wastage is always acceptable for objects
1530 * with that objsize or smaller. Since a smaller objsize always
1531 * generates a larger possible uma_max_ipers, we use this computed
1532 * objsize to calculate the largest ipers possible. Since the
1533 * ipers calculated for OFFPAGE slab headers is always larger than
1534 * the ipers initially calculated in zone_small_init(), we use
1535 * the former's equation (UMA_SLAB_SIZE / keg->uk_rsize) to
1536 * obtain the maximum ipers possible for offpage slab headers.
1537 *
1538 * It should be noted that ipers versus objsize is an inversly
1539 * proportional function which drops off rather quickly so as
1540 * long as our UMA_MAX_WASTE is such that the objsize we calculate
1541 * falls into the portion of the inverse relation AFTER the steep
1542 * falloff, then uma_max_ipers shouldn't be too high (~10 on i386).
1543 *
1544 * Note that we have 8-bits (1 byte) to use as a freelist index
1545 * inside the actual slab header itself and this is enough to
1546 * accomodate us. In the worst case, a UMA_SMALLEST_UNIT sized
1547 * object with offpage slab header would have ipers =
1548 * UMA_SLAB_SIZE / UMA_SMALLEST_UNIT (currently = 256), which is
1549 * 1 greater than what our byte-integer freelist index can
1550 * accomodate, but we know that this situation never occurs as
1551 * for UMA_SMALLEST_UNIT-sized objects, we will never calculate
1552 * that we need to go to offpage slab headers. Or, if we do,
1553 * then we trap that condition below and panic in the INVARIANTS case.
1554 */
1555 wsize = UMA_SLAB_SIZE - sizeof(struct uma_slab) - UMA_MAX_WASTE;
1556 totsize = wsize;
1557 objsize = UMA_SMALLEST_UNIT;
1558 while (totsize >= wsize) {
1559 totsize = (UMA_SLAB_SIZE - sizeof(struct uma_slab)) /
1560 (objsize + UMA_FRITM_SZ);
1561 totsize *= (UMA_FRITM_SZ + objsize);
1562 objsize++;
1563 }
1564 if (objsize > UMA_SMALLEST_UNIT)
1565 objsize--;
1566 uma_max_ipers = UMA_SLAB_SIZE / objsize;
1567
1568 wsize = UMA_SLAB_SIZE - sizeof(struct uma_slab_refcnt) - UMA_MAX_WASTE;
1569 totsize = wsize;
1570 objsize = UMA_SMALLEST_UNIT;
1571 while (totsize >= wsize) {
1572 totsize = (UMA_SLAB_SIZE - sizeof(struct uma_slab_refcnt)) /
1573 (objsize + UMA_FRITMREF_SZ);
1574 totsize *= (UMA_FRITMREF_SZ + objsize);
1575 objsize++;
1576 }
1577 if (objsize > UMA_SMALLEST_UNIT)
1578 objsize--;
1579 uma_max_ipers_ref = UMA_SLAB_SIZE / objsize;
1580
1581 KASSERT((uma_max_ipers_ref <= 255) && (uma_max_ipers <= 255),
1582 ("uma_startup: calculated uma_max_ipers values too large!"));
1583
1584#ifdef UMA_DEBUG
1585 printf("Calculated uma_max_ipers (for OFFPAGE) is %d\n", uma_max_ipers);
1586 printf("Calculated uma_max_ipers_slab (for OFFPAGE) is %d\n",
1587 uma_max_ipers_ref);
1588#endif
1589
1590 /* "manually" create the initial zone */
1591 args.name = "UMA Kegs";
1592 args.size = sizeof(struct uma_keg);
1593 args.ctor = keg_ctor;
1594 args.dtor = keg_dtor;
1595 args.uminit = zero_init;
1596 args.fini = NULL;
1597 args.keg = &masterkeg;
1598 args.align = 32 - 1;
1599 args.flags = UMA_ZFLAG_INTERNAL;
1600 /* The initial zone has no Per cpu queues so it's smaller */
1601 zone_ctor(kegs, sizeof(struct uma_zone), &args, M_WAITOK);
1602
1603#ifdef UMA_DEBUG
1604 printf("Filling boot free list.\n");
1605#endif
1606 for (i = 0; i < UMA_BOOT_PAGES; i++) {
1607 slab = (uma_slab_t)((u_int8_t *)bootmem + (i * UMA_SLAB_SIZE));
1608 slab->us_data = (u_int8_t *)slab;
1609 slab->us_flags = UMA_SLAB_BOOT;
1610 LIST_INSERT_HEAD(&uma_boot_pages, slab, us_link);
1611 uma_boot_free++;
1612 }
1613
1614#ifdef UMA_DEBUG
1615 printf("Creating uma zone headers zone and keg.\n");
1616#endif
1617 args.name = "UMA Zones";
1618 args.size = sizeof(struct uma_zone) +
1619 (sizeof(struct uma_cache) * (mp_maxid + 1));
1620 args.ctor = zone_ctor;
1621 args.dtor = zone_dtor;
1622 args.uminit = zero_init;
1623 args.fini = NULL;
1624 args.keg = NULL;
1625 args.align = 32 - 1;
1626 args.flags = UMA_ZFLAG_INTERNAL;
1627 /* The initial zone has no Per cpu queues so it's smaller */
1628 zone_ctor(zones, sizeof(struct uma_zone), &args, M_WAITOK);
1629
1630#ifdef UMA_DEBUG
1631 printf("Initializing pcpu cache locks.\n");
1632#endif
1633 /* Initialize the pcpu cache lock set once and for all */
1634 for (i = 0; i <= mp_maxid; i++)
1635 CPU_LOCK_INIT(i);
1636
1637#ifdef UMA_DEBUG
1638 printf("Creating slab and hash zones.\n");
1639#endif
1640
1641 /*
1642 * This is the max number of free list items we'll have with
1643 * offpage slabs.
1644 */
1645 slabsize = uma_max_ipers * UMA_FRITM_SZ;
1646 slabsize += sizeof(struct uma_slab);
1647
1648 /* Now make a zone for slab headers */
1649 slabzone = uma_zcreate("UMA Slabs",
1650 slabsize,
1651 NULL, NULL, NULL, NULL,
1652 UMA_ALIGN_PTR, UMA_ZFLAG_INTERNAL);
1653
1654 /*
1655 * We also create a zone for the bigger slabs with reference
1656 * counts in them, to accomodate UMA_ZONE_REFCNT zones.
1657 */
1658 slabsize = uma_max_ipers_ref * UMA_FRITMREF_SZ;
1659 slabsize += sizeof(struct uma_slab_refcnt);
1660 slabrefzone = uma_zcreate("UMA RCntSlabs",
1661 slabsize,
1662 NULL, NULL, NULL, NULL,
1663 UMA_ALIGN_PTR,
1664 UMA_ZFLAG_INTERNAL);
1665
1666 hashzone = uma_zcreate("UMA Hash",
1667 sizeof(struct slabhead *) * UMA_HASH_SIZE_INIT,
1668 NULL, NULL, NULL, NULL,
1669 UMA_ALIGN_PTR, UMA_ZFLAG_INTERNAL);
1670
1671 bucket_init();
1672
1673#ifdef UMA_MD_SMALL_ALLOC
1674 booted = 1;
1675#endif
1676
1677#ifdef UMA_DEBUG
1678 printf("UMA startup complete.\n");
1679#endif
1680}
1681
1682/* see uma.h */
1683void
1684uma_startup2(void)
1685{
1686 booted = 1;
1687 bucket_enable();
1688#ifdef UMA_DEBUG
1689 printf("UMA startup2 complete.\n");
1690#endif
1691}
1692
1693/*
1694 * Initialize our callout handle
1695 *
1696 */
1697
1698static void
1699uma_startup3(void)
1700{
1701#ifdef UMA_DEBUG
1702 printf("Starting callout.\n");
1703#endif
1704 callout_init(&uma_callout, CALLOUT_MPSAFE);
1705 callout_reset(&uma_callout, UMA_TIMEOUT * hz, uma_timeout, NULL);
1706#ifdef UMA_DEBUG
1707 printf("UMA startup3 complete.\n");
1708#endif
1709}
1710
1711static uma_zone_t
1712uma_kcreate(uma_zone_t zone, size_t size, uma_init uminit, uma_fini fini,
1713 int align, u_int16_t flags)
1714{
1715 struct uma_kctor_args args;
1716
1717 args.size = size;
1718 args.uminit = uminit;
1719 args.fini = fini;
1720 args.align = align;
1721 args.flags = flags;
1722 args.zone = zone;
1723 return (uma_zalloc_internal(kegs, &args, M_WAITOK));
1724}
1725
1726/* See uma.h */
1727uma_zone_t
1728uma_zcreate(char *name, size_t size, uma_ctor ctor, uma_dtor dtor,
1729 uma_init uminit, uma_fini fini, int align, u_int16_t flags)
1730
1731{
1732 struct uma_zctor_args args;
1733
1734 /* This stuff is essential for the zone ctor */
1735 args.name = name;
1736 args.size = size;
1737 args.ctor = ctor;
1738 args.dtor = dtor;
1739 args.uminit = uminit;
1740 args.fini = fini;
1741 args.align = align;
1742 args.flags = flags;
1743 args.keg = NULL;
1744
1745 return (uma_zalloc_internal(zones, &args, M_WAITOK));
1746}
1747
1748/* See uma.h */
1749uma_zone_t
1750uma_zsecond_create(char *name, uma_ctor ctor, uma_dtor dtor,
1751 uma_init zinit, uma_fini zfini, uma_zone_t master)
1752{
1753 struct uma_zctor_args args;
1754
1755 args.name = name;
1756 args.size = master->uz_keg->uk_size;
1757 args.ctor = ctor;
1758 args.dtor = dtor;
1759 args.uminit = zinit;
1760 args.fini = zfini;
1761 args.align = master->uz_keg->uk_align;
1762 args.flags = master->uz_keg->uk_flags | UMA_ZONE_SECONDARY;
1763 args.keg = master->uz_keg;
1764
1765 return (uma_zalloc_internal(zones, &args, M_WAITOK));
1766}
1767
1768/* See uma.h */
1769void
1770uma_zdestroy(uma_zone_t zone)
1771{
1772 uma_zfree_internal(zones, zone, NULL, SKIP_NONE);
1773}
1774
1775/* See uma.h */
1776void *
1777uma_zalloc_arg(uma_zone_t zone, void *udata, int flags)
1778{
1779 void *item;
1780 uma_cache_t cache;
1781 uma_bucket_t bucket;
1782 int cpu;
1783 int badness;
1784
1785 /* This is the fast path allocation */
1786#ifdef UMA_DEBUG_ALLOC_1
1787 printf("Allocating one item from %s(%p)\n", zone->uz_name, zone);
1788#endif
1789 CTR3(KTR_UMA, "uma_zalloc_arg thread %x zone %s flags %d", curthread,
1790 zone->uz_name, flags);
1791
1792 if (!(flags & M_NOWAIT)) {
1793 KASSERT(curthread->td_intr_nesting_level == 0,
1794 ("malloc(M_WAITOK) in interrupt context"));
1795 if (nosleepwithlocks) {
1796#ifdef WITNESS
1797 badness = WITNESS_WARN(WARN_GIANTOK | WARN_SLEEPOK,
1798 NULL,
1799 "malloc(M_WAITOK) of \"%s\", forcing M_NOWAIT",
1800 zone->uz_name);
1801#else
1802 badness = 1;
1803#endif
1804 } else {
1805 badness = 0;
1806#ifdef WITNESS
1807 WITNESS_WARN(WARN_GIANTOK | WARN_SLEEPOK, NULL,
1808 "malloc(M_WAITOK) of \"%s\"", zone->uz_name);
1809#endif
1810 }
1811 if (badness) {
1812 flags &= ~M_WAITOK;
1813 flags |= M_NOWAIT;
1814 }
1815 }
1816
1817zalloc_restart:
1818 cpu = PCPU_GET(cpuid);
1819 CPU_LOCK(cpu);
1820 cache = &zone->uz_cpu[cpu];
1821
1822zalloc_start:
1823 bucket = cache->uc_allocbucket;
1824
1825 if (bucket) {
1826 if (bucket->ub_cnt > 0) {
1827 bucket->ub_cnt--;
1828 item = bucket->ub_bucket[bucket->ub_cnt];
1829#ifdef INVARIANTS
1830 bucket->ub_bucket[bucket->ub_cnt] = NULL;
1831#endif
1832 KASSERT(item != NULL,
1833 ("uma_zalloc: Bucket pointer mangled."));
1834 cache->uc_allocs++;
1835#ifdef INVARIANTS
1836 ZONE_LOCK(zone);
1837 uma_dbg_alloc(zone, NULL, item);
1838 ZONE_UNLOCK(zone);
1839#endif
1840 CPU_UNLOCK(cpu);
1841 if (zone->uz_ctor != NULL) {
1842 if (zone->uz_ctor(item, zone->uz_keg->uk_size,
1843 udata, flags) != 0) {
1844 uma_zfree_internal(zone, item, udata,
1845 SKIP_DTOR);
1846 return (NULL);
1847 }
1848 }
1849 if (flags & M_ZERO)
1850 bzero(item, zone->uz_keg->uk_size);
1851 return (item);
1852 } else if (cache->uc_freebucket) {
1853 /*
1854 * We have run out of items in our allocbucket.
1855 * See if we can switch with our free bucket.
1856 */
1857 if (cache->uc_freebucket->ub_cnt > 0) {
1858#ifdef UMA_DEBUG_ALLOC
1859 printf("uma_zalloc: Swapping empty with"
1860 " alloc.\n");
1861#endif
1862 bucket = cache->uc_freebucket;
1863 cache->uc_freebucket = cache->uc_allocbucket;
1864 cache->uc_allocbucket = bucket;
1865
1866 goto zalloc_start;
1867 }
1868 }
1869 }
1870 ZONE_LOCK(zone);
1871 /* Since we have locked the zone we may as well send back our stats */
1872 zone->uz_allocs += cache->uc_allocs;
1873 cache->uc_allocs = 0;
1874
1875 /* Our old one is now a free bucket */
1876 if (cache->uc_allocbucket) {
1877 KASSERT(cache->uc_allocbucket->ub_cnt == 0,
1878 ("uma_zalloc_arg: Freeing a non free bucket."));
1879 LIST_INSERT_HEAD(&zone->uz_free_bucket,
1880 cache->uc_allocbucket, ub_link);
1881 cache->uc_allocbucket = NULL;
1882 }
1883
1884 /* Check the free list for a new alloc bucket */
1885 if ((bucket = LIST_FIRST(&zone->uz_full_bucket)) != NULL) {
1886 KASSERT(bucket->ub_cnt != 0,
1887 ("uma_zalloc_arg: Returning an empty bucket."));
1888
1889 LIST_REMOVE(bucket, ub_link);
1890 cache->uc_allocbucket = bucket;
1891 ZONE_UNLOCK(zone);
1892 goto zalloc_start;
1893 }
1894 /* We are no longer associated with this cpu!!! */
1895 CPU_UNLOCK(cpu);
1896
1897 /* Bump up our uz_count so we get here less */
1898 if (zone->uz_count < BUCKET_MAX)
1899 zone->uz_count++;
1900
1901 /*
1902 * Now lets just fill a bucket and put it on the free list. If that
1903 * works we'll restart the allocation from the begining.
1904 */
1905 if (uma_zalloc_bucket(zone, flags)) {
1906 ZONE_UNLOCK(zone);
1907 goto zalloc_restart;
1908 }
1909 ZONE_UNLOCK(zone);
1910 /*
1911 * We may not be able to get a bucket so return an actual item.
1912 */
1913#ifdef UMA_DEBUG
1914 printf("uma_zalloc_arg: Bucketzone returned NULL\n");
1915#endif
1916
1917 return (uma_zalloc_internal(zone, udata, flags));
1918}
1919
1920static uma_slab_t
1921uma_zone_slab(uma_zone_t zone, int flags)
1922{
1923 uma_slab_t slab;
1924 uma_keg_t keg;
1925
1926 keg = zone->uz_keg;
1927
1928 /*
1929 * This is to prevent us from recursively trying to allocate
1930 * buckets. The problem is that if an allocation forces us to
1931 * grab a new bucket we will call page_alloc, which will go off
1932 * and cause the vm to allocate vm_map_entries. If we need new
1933 * buckets there too we will recurse in kmem_alloc and bad
1934 * things happen. So instead we return a NULL bucket, and make
1935 * the code that allocates buckets smart enough to deal with it
1936 *
1937 * XXX: While we want this protection for the bucket zones so that
1938 * recursion from the VM is handled (and the calling code that
1939 * allocates buckets knows how to deal with it), we do not want
1940 * to prevent allocation from the slab header zones (slabzone
1941 * and slabrefzone) if uk_recurse is not zero for them. The
1942 * reason is that it could lead to NULL being returned for
1943 * slab header allocations even in the M_WAITOK case, and the
1944 * caller can't handle that.
1945 */
1946 if (keg->uk_flags & UMA_ZFLAG_INTERNAL && keg->uk_recurse != 0)
1947 if ((zone != slabzone) && (zone != slabrefzone))
1948 return (NULL);
1949
1950 slab = NULL;
1951
1952 for (;;) {
1953 /*
1954 * Find a slab with some space. Prefer slabs that are partially
1955 * used over those that are totally full. This helps to reduce
1956 * fragmentation.
1957 */
1958 if (keg->uk_free != 0) {
1959 if (!LIST_EMPTY(&keg->uk_part_slab)) {
1960 slab = LIST_FIRST(&keg->uk_part_slab);
1961 } else {
1962 slab = LIST_FIRST(&keg->uk_free_slab);
1963 LIST_REMOVE(slab, us_link);
1964 LIST_INSERT_HEAD(&keg->uk_part_slab, slab,
1965 us_link);
1966 }
1967 return (slab);
1968 }
1969
1970 /*
1971 * M_NOVM means don't ask at all!
1972 */
1973 if (flags & M_NOVM)
1974 break;
1975
1976 if (keg->uk_maxpages &&
1977 keg->uk_pages >= keg->uk_maxpages) {
1978 keg->uk_flags |= UMA_ZFLAG_FULL;
1979
1980 if (flags & M_NOWAIT)
1981 break;
1982 else
1983 msleep(keg, &keg->uk_lock, PVM,
1984 "zonelimit", 0);
1985 continue;
1986 }
1987 keg->uk_recurse++;
1988 slab = slab_zalloc(zone, flags);
1989 keg->uk_recurse--;
1990
1991 /*
1992 * If we got a slab here it's safe to mark it partially used
1993 * and return. We assume that the caller is going to remove
1994 * at least one item.
1995 */
1996 if (slab) {
1997 LIST_INSERT_HEAD(&keg->uk_part_slab, slab, us_link);
1998 return (slab);
1999 }
2000 /*
2001 * We might not have been able to get a slab but another cpu
2002 * could have while we were unlocked. Check again before we
2003 * fail.
2004 */
2005 if (flags & M_NOWAIT)
2006 flags |= M_NOVM;
2007 }
2008 return (slab);
2009}
2010
2011static void *
2012uma_slab_alloc(uma_zone_t zone, uma_slab_t slab)
2013{
2014 uma_keg_t keg;
2015 uma_slabrefcnt_t slabref;
2016 void *item;
2017 u_int8_t freei;
2018
2019 keg = zone->uz_keg;
2020
2021 freei = slab->us_firstfree;
2022 if (keg->uk_flags & UMA_ZONE_REFCNT) {
2023 slabref = (uma_slabrefcnt_t)slab;
2024 slab->us_firstfree = slabref->us_freelist[freei].us_item;
2025 } else {
2026 slab->us_firstfree = slab->us_freelist[freei].us_item;
2027 }
2028 item = slab->us_data + (keg->uk_rsize * freei);
2029
2030 slab->us_freecount--;
2031 keg->uk_free--;
2032#ifdef INVARIANTS
2033 uma_dbg_alloc(zone, slab, item);
2034#endif
2035 /* Move this slab to the full list */
2036 if (slab->us_freecount == 0) {
2037 LIST_REMOVE(slab, us_link);
2038 LIST_INSERT_HEAD(&keg->uk_full_slab, slab, us_link);
2039 }
2040
2041 return (item);
2042}
2043
2044static int
2045uma_zalloc_bucket(uma_zone_t zone, int flags)
2046{
2047 uma_bucket_t bucket;
2048 uma_slab_t slab;
2049 int16_t saved;
2050 int max, origflags = flags;
2051
2052 /*
2053 * Try this zone's free list first so we don't allocate extra buckets.
2054 */
2055 if ((bucket = LIST_FIRST(&zone->uz_free_bucket)) != NULL) {
2056 KASSERT(bucket->ub_cnt == 0,
2057 ("uma_zalloc_bucket: Bucket on free list is not empty."));
2058 LIST_REMOVE(bucket, ub_link);
2059 } else {
2060 int bflags;
2061
2062 bflags = (flags & ~M_ZERO);
2063 if (zone->uz_keg->uk_flags & UMA_ZFLAG_CACHEONLY)
2064 bflags |= M_NOVM;
2065
2066 ZONE_UNLOCK(zone);
2067 bucket = bucket_alloc(zone->uz_count, bflags);
2068 ZONE_LOCK(zone);
2069 }
2070
2071 if (bucket == NULL)
2072 return (0);
2073
2074#ifdef SMP
2075 /*
2076 * This code is here to limit the number of simultaneous bucket fills
2077 * for any given zone to the number of per cpu caches in this zone. This
2078 * is done so that we don't allocate more memory than we really need.
2079 */
2080 if (zone->uz_fills >= mp_ncpus)
2081 goto done;
2082
2083#endif
2084 zone->uz_fills++;
2085
2086 max = MIN(bucket->ub_entries, zone->uz_count);
2087 /* Try to keep the buckets totally full */
2088 saved = bucket->ub_cnt;
2089 while (bucket->ub_cnt < max &&
2090 (slab = uma_zone_slab(zone, flags)) != NULL) {
2091 while (slab->us_freecount && bucket->ub_cnt < max) {
2092 bucket->ub_bucket[bucket->ub_cnt++] =
2093 uma_slab_alloc(zone, slab);
2094 }
2095
2096 /* Don't block on the next fill */
2097 flags |= M_NOWAIT;
2098 }
2099
2100 /*
2101 * We unlock here because we need to call the zone's init.
2102 * It should be safe to unlock because the slab dealt with
2103 * above is already on the appropriate list within the keg
2104 * and the bucket we filled is not yet on any list, so we
2105 * own it.
2106 */
2107 if (zone->uz_init != NULL) {
2108 int i;
2109
2110 ZONE_UNLOCK(zone);
2111 for (i = saved; i < bucket->ub_cnt; i++)
2112 if (zone->uz_init(bucket->ub_bucket[i],
2113 zone->uz_keg->uk_size, origflags) != 0)
2114 break;
2115 /*
2116 * If we couldn't initialize the whole bucket, put the
2117 * rest back onto the freelist.
2118 */
2119 if (i != bucket->ub_cnt) {
2120 int j;
2121
2122 for (j = i; j < bucket->ub_cnt; j++) {
2123 uma_zfree_internal(zone, bucket->ub_bucket[j],
2124 NULL, SKIP_FINI);
2125#ifdef INVARIANTS
2126 bucket->ub_bucket[j] = NULL;
2127#endif
2128 }
2129 bucket->ub_cnt = i;
2130 }
2131 ZONE_LOCK(zone);
2132 }
2133
2134 zone->uz_fills--;
2135 if (bucket->ub_cnt != 0) {
2136 LIST_INSERT_HEAD(&zone->uz_full_bucket,
2137 bucket, ub_link);
2138 return (1);
2139 }
2140#ifdef SMP
2141done:
2142#endif
2143 bucket_free(bucket);
2144
2145 return (0);
2146}
2147/*
2148 * Allocates an item for an internal zone
2149 *
2150 * Arguments
2151 * zone The zone to alloc for.
2152 * udata The data to be passed to the constructor.
2153 * flags M_WAITOK, M_NOWAIT, M_ZERO.
2154 *
2155 * Returns
2156 * NULL if there is no memory and M_NOWAIT is set
2157 * An item if successful
2158 */
2159
2160static void *
2161uma_zalloc_internal(uma_zone_t zone, void *udata, int flags)
2162{
2163 uma_keg_t keg;
2164 uma_slab_t slab;
2165 void *item;
2166
2167 item = NULL;
2168 keg = zone->uz_keg;
2169
2170#ifdef UMA_DEBUG_ALLOC
2171 printf("INTERNAL: Allocating one item from %s(%p)\n", zone->uz_name, zone);
2172#endif
2173 ZONE_LOCK(zone);
2174
2175 slab = uma_zone_slab(zone, flags);
2176 if (slab == NULL) {
2177 ZONE_UNLOCK(zone);
2178 return (NULL);
2179 }
2180
2181 item = uma_slab_alloc(zone, slab);
2182
2183 ZONE_UNLOCK(zone);
2184
2185 /*
2186 * We have to call both the zone's init (not the keg's init)
2187 * and the zone's ctor. This is because the item is going from
2188 * a keg slab directly to the user, and the user is expecting it
2189 * to be both zone-init'd as well as zone-ctor'd.
2190 */
2191 if (zone->uz_init != NULL) {
2192 if (zone->uz_init(item, keg->uk_size, flags) != 0) {
2193 uma_zfree_internal(zone, item, udata, SKIP_FINI);
2194 return (NULL);
2195 }
2196 }
2197 if (zone->uz_ctor != NULL) {
2198 if (zone->uz_ctor(item, keg->uk_size, udata, flags) != 0) {
2199 uma_zfree_internal(zone, item, udata, SKIP_DTOR);
2200 return (NULL);
2201 }
2202 }
2203 if (flags & M_ZERO)
2204 bzero(item, keg->uk_size);
2205
2206 return (item);
2207}
2208
2209/* See uma.h */
2210void
2211uma_zfree_arg(uma_zone_t zone, void *item, void *udata)
2212{
2213 uma_keg_t keg;
2214 uma_cache_t cache;
2215 uma_bucket_t bucket;
2216 int bflags;
2217 int cpu;
2218 enum zfreeskip skip;
2219
2220 /* This is the fast path free */
2221 skip = SKIP_NONE;
2222 keg = zone->uz_keg;
2223
2224#ifdef UMA_DEBUG_ALLOC_1
2225 printf("Freeing item %p to %s(%p)\n", item, zone->uz_name, zone);
2226#endif
2227 CTR2(KTR_UMA, "uma_zfree_arg thread %x zone %s", curthread,
2228 zone->uz_name);
2229
2230 /*
2231 * The race here is acceptable. If we miss it we'll just have to wait
2232 * a little longer for the limits to be reset.
2233 */
2234
2235 if (keg->uk_flags & UMA_ZFLAG_FULL)
2236 goto zfree_internal;
2237
2238 if (zone->uz_dtor) {
2239 zone->uz_dtor(item, keg->uk_size, udata);
2240 skip = SKIP_DTOR;
2241 }
2242
2243zfree_restart:
2244 cpu = PCPU_GET(cpuid);
2245 CPU_LOCK(cpu);
2246 cache = &zone->uz_cpu[cpu];
2247
2248zfree_start:
2249 bucket = cache->uc_freebucket;
2250
2251 if (bucket) {
2252 /*
2253 * Do we have room in our bucket? It is OK for this uz count
2254 * check to be slightly out of sync.
2255 */
2256
2257 if (bucket->ub_cnt < bucket->ub_entries) {
2258 KASSERT(bucket->ub_bucket[bucket->ub_cnt] == NULL,
2259 ("uma_zfree: Freeing to non free bucket index."));
2260 bucket->ub_bucket[bucket->ub_cnt] = item;
2261 bucket->ub_cnt++;
2262#ifdef INVARIANTS
2263 ZONE_LOCK(zone);
2264 if (keg->uk_flags & UMA_ZONE_MALLOC)
2265 uma_dbg_free(zone, udata, item);
2266 else
2267 uma_dbg_free(zone, NULL, item);
2268 ZONE_UNLOCK(zone);
2269#endif
2270 CPU_UNLOCK(cpu);
2271 return;
2272 } else if (cache->uc_allocbucket) {
2273#ifdef UMA_DEBUG_ALLOC
2274 printf("uma_zfree: Swapping buckets.\n");
2275#endif
2276 /*
2277 * We have run out of space in our freebucket.
2278 * See if we can switch with our alloc bucket.
2279 */
2280 if (cache->uc_allocbucket->ub_cnt <
2281 cache->uc_freebucket->ub_cnt) {
2282 bucket = cache->uc_freebucket;
2283 cache->uc_freebucket = cache->uc_allocbucket;
2284 cache->uc_allocbucket = bucket;
2285 goto zfree_start;
2286 }
2287 }
2288 }
2289 /*
2290 * We can get here for two reasons:
2291 *
2292 * 1) The buckets are NULL
2293 * 2) The alloc and free buckets are both somewhat full.
2294 */
2295
2296 ZONE_LOCK(zone);
2297
2298 bucket = cache->uc_freebucket;
2299 cache->uc_freebucket = NULL;
2300
2301 /* Can we throw this on the zone full list? */
2302 if (bucket != NULL) {
2303#ifdef UMA_DEBUG_ALLOC
2304 printf("uma_zfree: Putting old bucket on the free list.\n");
2305#endif
2306 /* ub_cnt is pointing to the last free item */
2307 KASSERT(bucket->ub_cnt != 0,
2308 ("uma_zfree: Attempting to insert an empty bucket onto the full list.\n"));
2309 LIST_INSERT_HEAD(&zone->uz_full_bucket,
2310 bucket, ub_link);
2311 }
2312 if ((bucket = LIST_FIRST(&zone->uz_free_bucket)) != NULL) {
2313 LIST_REMOVE(bucket, ub_link);
2314 ZONE_UNLOCK(zone);
2315 cache->uc_freebucket = bucket;
2316 goto zfree_start;
2317 }
2318 /* We're done with this CPU now */
2319 CPU_UNLOCK(cpu);
2320
2321 /* And the zone.. */
2322 ZONE_UNLOCK(zone);
2323
2324#ifdef UMA_DEBUG_ALLOC
2325 printf("uma_zfree: Allocating new free bucket.\n");
2326#endif
2327 bflags = M_NOWAIT;
2328
2329 if (keg->uk_flags & UMA_ZFLAG_CACHEONLY)
2330 bflags |= M_NOVM;
2331 bucket = bucket_alloc(zone->uz_count, bflags);
2332 if (bucket) {
2333 ZONE_LOCK(zone);
2334 LIST_INSERT_HEAD(&zone->uz_free_bucket,
2335 bucket, ub_link);
2336 ZONE_UNLOCK(zone);
2337 goto zfree_restart;
2338 }
2339
2340 /*
2341 * If nothing else caught this, we'll just do an internal free.
2342 */
2343
2344zfree_internal:
2345
2346#ifdef INVARIANTS
2347 /*
2348 * If we need to skip the dtor and the uma_dbg_free in
2349 * uma_zfree_internal because we've already called the dtor
2350 * above, but we ended up here, then we need to make sure
2351 * that we take care of the uma_dbg_free immediately.
2352 */
2353 if (skip) {
2354 ZONE_LOCK(zone);
2355 if (keg->uk_flags & UMA_ZONE_MALLOC)
2356 uma_dbg_free(zone, udata, item);
2357 else
2358 uma_dbg_free(zone, NULL, item);
2359 ZONE_UNLOCK(zone);
2360 }
2361#endif
2362 uma_zfree_internal(zone, item, udata, skip);
2363
2364 return;
2365}
2366
2367/*
2368 * Frees an item to an INTERNAL zone or allocates a free bucket
2369 *
2370 * Arguments:
2371 * zone The zone to free to
2372 * item The item we're freeing
2373 * udata User supplied data for the dtor
2374 * skip Skip dtors and finis
2375 */
2376static void
2377uma_zfree_internal(uma_zone_t zone, void *item, void *udata,
2378 enum zfreeskip skip)
2379{
2380 uma_slab_t slab;
2381 uma_slabrefcnt_t slabref;
2382 uma_keg_t keg;
2383 u_int8_t *mem;
2384 u_int8_t freei;
2385
2386 keg = zone->uz_keg;
2387
2388 if (skip < SKIP_DTOR && zone->uz_dtor)
2389 zone->uz_dtor(item, keg->uk_size, udata);
2390 if (skip < SKIP_FINI && zone->uz_fini)
2391 zone->uz_fini(item, keg->uk_size);
2392
2393 ZONE_LOCK(zone);
2394
2395 if (!(keg->uk_flags & UMA_ZONE_MALLOC)) {
2396 mem = (u_int8_t *)((unsigned long)item & (~UMA_SLAB_MASK));
2397 if (keg->uk_flags & UMA_ZONE_HASH)
2398 slab = hash_sfind(&keg->uk_hash, mem);
2399 else {
2400 mem += keg->uk_pgoff;
2401 slab = (uma_slab_t)mem;
2402 }
2403 } else {
2404 slab = (uma_slab_t)udata;
2405 }
2406
2407 /* Do we need to remove from any lists? */
2408 if (slab->us_freecount+1 == keg->uk_ipers) {
2409 LIST_REMOVE(slab, us_link);
2410 LIST_INSERT_HEAD(&keg->uk_free_slab, slab, us_link);
2411 } else if (slab->us_freecount == 0) {
2412 LIST_REMOVE(slab, us_link);
2413 LIST_INSERT_HEAD(&keg->uk_part_slab, slab, us_link);
2414 }
2415
2416 /* Slab management stuff */
2417 freei = ((unsigned long)item - (unsigned long)slab->us_data)
2418 / keg->uk_rsize;
2419
2420#ifdef INVARIANTS
2421 if (!skip)
2422 uma_dbg_free(zone, slab, item);
2423#endif
2424
2425 if (keg->uk_flags & UMA_ZONE_REFCNT) {
2426 slabref = (uma_slabrefcnt_t)slab;
2427 slabref->us_freelist[freei].us_item = slab->us_firstfree;
2428 } else {
2429 slab->us_freelist[freei].us_item = slab->us_firstfree;
2430 }
2431 slab->us_firstfree = freei;
2432 slab->us_freecount++;
2433
2434 /* Zone statistics */
2435 keg->uk_free++;
2436
2437 if (keg->uk_flags & UMA_ZFLAG_FULL) {
2438 if (keg->uk_pages < keg->uk_maxpages)
2439 keg->uk_flags &= ~UMA_ZFLAG_FULL;
2440
2441 /* We can handle one more allocation */
2442 wakeup_one(keg);
2443 }
2444
2445 ZONE_UNLOCK(zone);
2446}
2447
2448/* See uma.h */
2449void
2450uma_zone_set_max(uma_zone_t zone, int nitems)
2451{
2452 uma_keg_t keg;
2453
2454 keg = zone->uz_keg;
2455 ZONE_LOCK(zone);
2456 if (keg->uk_ppera > 1)
2457 keg->uk_maxpages = nitems * keg->uk_ppera;
2458 else
2459 keg->uk_maxpages = nitems / keg->uk_ipers;
2460
2461 if (keg->uk_maxpages * keg->uk_ipers < nitems)
2462 keg->uk_maxpages++;
2463
2464 ZONE_UNLOCK(zone);
2465}
2466
2467/* See uma.h */
2468void
2469uma_zone_set_init(uma_zone_t zone, uma_init uminit)
2470{
2471 ZONE_LOCK(zone);
2472 KASSERT(zone->uz_keg->uk_pages == 0,
2473 ("uma_zone_set_init on non-empty keg"));
2474 zone->uz_keg->uk_init = uminit;
2475 ZONE_UNLOCK(zone);
2476}
2477
2478/* See uma.h */
2479void
2480uma_zone_set_fini(uma_zone_t zone, uma_fini fini)
2481{
2482 ZONE_LOCK(zone);
2483 KASSERT(zone->uz_keg->uk_pages == 0,
2484 ("uma_zone_set_fini on non-empty keg"));
2485 zone->uz_keg->uk_fini = fini;
2486 ZONE_UNLOCK(zone);
2487}
2488
2489/* See uma.h */
2490void
2491uma_zone_set_zinit(uma_zone_t zone, uma_init zinit)
2492{
2493 ZONE_LOCK(zone);
2494 KASSERT(zone->uz_keg->uk_pages == 0,
2495 ("uma_zone_set_zinit on non-empty keg"));
2496 zone->uz_init = zinit;
2497 ZONE_UNLOCK(zone);
2498}
2499
2500/* See uma.h */
2501void
2502uma_zone_set_zfini(uma_zone_t zone, uma_fini zfini)
2503{
2504 ZONE_LOCK(zone);
2505 KASSERT(zone->uz_keg->uk_pages == 0,
2506 ("uma_zone_set_zfini on non-empty keg"));
2507 zone->uz_fini = zfini;
2508 ZONE_UNLOCK(zone);
2509}
2510
2511/* See uma.h */
2512/* XXX uk_freef is not actually used with the zone locked */
2513void
2514uma_zone_set_freef(uma_zone_t zone, uma_free freef)
2515{
2516 ZONE_LOCK(zone);
2517 zone->uz_keg->uk_freef = freef;
2518 ZONE_UNLOCK(zone);
2519}
2520
2521/* See uma.h */
2522/* XXX uk_allocf is not actually used with the zone locked */
2523void
2524uma_zone_set_allocf(uma_zone_t zone, uma_alloc allocf)
2525{
2526 ZONE_LOCK(zone);
2527 zone->uz_keg->uk_flags |= UMA_ZFLAG_PRIVALLOC;
2528 zone->uz_keg->uk_allocf = allocf;
2529 ZONE_UNLOCK(zone);
2530}
2531
2532/* See uma.h */
2533int
2534uma_zone_set_obj(uma_zone_t zone, struct vm_object *obj, int count)
2535{
2536 uma_keg_t keg;
2537 vm_offset_t kva;
2538 int pages;
2539
2540 keg = zone->uz_keg;
2541 pages = count / keg->uk_ipers;
2542
2543 if (pages * keg->uk_ipers < count)
2544 pages++;
2545
2546 kva = kmem_alloc_nofault(kernel_map, pages * UMA_SLAB_SIZE);
2547
2548 if (kva == 0)
2549 return (0);
2550 if (obj == NULL) {
2551 obj = vm_object_allocate(OBJT_DEFAULT,
2552 pages);
2553 } else {
2554 VM_OBJECT_LOCK_INIT(obj, "uma object");
2555 _vm_object_allocate(OBJT_DEFAULT,
2556 pages, obj);
2557 }
2558 ZONE_LOCK(zone);
2559 keg->uk_kva = kva;
2560 keg->uk_obj = obj;
2561 keg->uk_maxpages = pages;
2562 keg->uk_allocf = obj_alloc;
2563 keg->uk_flags |= UMA_ZONE_NOFREE | UMA_ZFLAG_PRIVALLOC;
2564 ZONE_UNLOCK(zone);
2565 return (1);
2566}
2567
2568/* See uma.h */
2569void
2570uma_prealloc(uma_zone_t zone, int items)
2571{
2572 int slabs;
2573 uma_slab_t slab;
2574 uma_keg_t keg;
2575
2576 keg = zone->uz_keg;
2577 ZONE_LOCK(zone);
2578 slabs = items / keg->uk_ipers;
2579 if (slabs * keg->uk_ipers < items)
2580 slabs++;
2581 while (slabs > 0) {
2582 slab = slab_zalloc(zone, M_WAITOK);
2583 LIST_INSERT_HEAD(&keg->uk_free_slab, slab, us_link);
2584 slabs--;
2585 }
2586 ZONE_UNLOCK(zone);
2587}
2588
2589/* See uma.h */
2590u_int32_t *
2591uma_find_refcnt(uma_zone_t zone, void *item)
2592{
2593 uma_slabrefcnt_t slabref;
2594 uma_keg_t keg;
2595 u_int32_t *refcnt;
2596 int idx;
2597
2598 keg = zone->uz_keg;
2599 slabref = (uma_slabrefcnt_t)vtoslab((vm_offset_t)item &
2600 (~UMA_SLAB_MASK));
2601 KASSERT(slabref != NULL && slabref->us_keg->uk_flags & UMA_ZONE_REFCNT,
2602 ("uma_find_refcnt(): zone possibly not UMA_ZONE_REFCNT"));
2603 idx = ((unsigned long)item - (unsigned long)slabref->us_data)
2604 / keg->uk_rsize;
2605 refcnt = &slabref->us_freelist[idx].us_refcnt;
2606 return refcnt;
2607}
2608
2609/* See uma.h */
2610void
2611uma_reclaim(void)
2612{
2613#ifdef UMA_DEBUG
2614 printf("UMA: vm asked us to release pages!\n");
2615#endif
2616 bucket_enable();
2617 zone_foreach(zone_drain);
2618 /*
2619 * Some slabs may have been freed but this zone will be visited early
2620 * we visit again so that we can free pages that are empty once other
2621 * zones are drained. We have to do the same for buckets.
2622 */
2623 zone_drain(slabzone);
2624 zone_drain(slabrefzone);
2625 bucket_zone_drain();
2626}
2627
2628void *
2629uma_large_malloc(int size, int wait)
2630{
2631 void *mem;
2632 uma_slab_t slab;
2633 u_int8_t flags;
2634
2635 slab = uma_zalloc_internal(slabzone, NULL, wait);
2636 if (slab == NULL)
2637 return (NULL);
2638 mem = page_alloc(NULL, size, &flags, wait);
2639 if (mem) {
2640 vsetslab((vm_offset_t)mem, slab);
2641 slab->us_data = mem;
2642 slab->us_flags = flags | UMA_SLAB_MALLOC;
2643 slab->us_size = size;
2644 } else {
2645 uma_zfree_internal(slabzone, slab, NULL, 0);
2646 }
2647
2648 return (mem);
2649}
2650
2651void
2652uma_large_free(uma_slab_t slab)
2653{
2654 vsetobj((vm_offset_t)slab->us_data, kmem_object);
2655 page_free(slab->us_data, slab->us_size, slab->us_flags);
2656 uma_zfree_internal(slabzone, slab, NULL, 0);
2657}
2658
2659void
2660uma_print_stats(void)
2661{
2662 zone_foreach(uma_print_zone);
2663}
2664
2665static void
2666slab_print(uma_slab_t slab)
2667{
2668 printf("slab: keg %p, data %p, freecount %d, firstfree %d\n",
2669 slab->us_keg, slab->us_data, slab->us_freecount,
2670 slab->us_firstfree);
2671}
2672
2673static void
2674cache_print(uma_cache_t cache)
2675{
2676 printf("alloc: %p(%d), free: %p(%d)\n",
2677 cache->uc_allocbucket,
2678 cache->uc_allocbucket?cache->uc_allocbucket->ub_cnt:0,
2679 cache->uc_freebucket,
2680 cache->uc_freebucket?cache->uc_freebucket->ub_cnt:0);
2681}
2682
2683void
2684uma_print_zone(uma_zone_t zone)
2685{
2686 uma_cache_t cache;
2687 uma_keg_t keg;
2688 uma_slab_t slab;
2689 int i;
2690
2691 keg = zone->uz_keg;
2692 printf("%s(%p) size %d(%d) flags %d ipers %d ppera %d out %d free %d\n",
2693 zone->uz_name, zone, keg->uk_size, keg->uk_rsize, keg->uk_flags,
2694 keg->uk_ipers, keg->uk_ppera,
2695 (keg->uk_ipers * keg->uk_pages) - keg->uk_free, keg->uk_free);
2696 printf("Part slabs:\n");
2697 LIST_FOREACH(slab, &keg->uk_part_slab, us_link)
2698 slab_print(slab);
2699 printf("Free slabs:\n");
2700 LIST_FOREACH(slab, &keg->uk_free_slab, us_link)
2701 slab_print(slab);
2702 printf("Full slabs:\n");
2703 LIST_FOREACH(slab, &keg->uk_full_slab, us_link)
2704 slab_print(slab);
2705 for (i = 0; i <= mp_maxid; i++) {
2706 if (CPU_ABSENT(i))
2707 continue;
2708 cache = &zone->uz_cpu[i];
2709 printf("CPU %d Cache:\n", i);
2710 cache_print(cache);
2711 }
2712}
2713
2714/*
2715 * Sysctl handler for vm.zone
2716 *
2717 * stolen from vm_zone.c
2718 */
2719static int
2720sysctl_vm_zone(SYSCTL_HANDLER_ARGS)
2721{
2722 int error, len, cnt;
2723 const int linesize = 128; /* conservative */
2724 int totalfree;
2725 char *tmpbuf, *offset;
2726 uma_zone_t z;
2727 uma_keg_t zk;
2728 char *p;
2729 int cpu;
2730 int cachefree;
2731 uma_bucket_t bucket;
2732 uma_cache_t cache;
2733
2734 cnt = 0;
2735 mtx_lock(&uma_mtx);
2736 LIST_FOREACH(zk, &uma_kegs, uk_link) {
2737 LIST_FOREACH(z, &zk->uk_zones, uz_link)
2738 cnt++;
2739 }
2740 mtx_unlock(&uma_mtx);
2741 MALLOC(tmpbuf, char *, (cnt == 0 ? 1 : cnt) * linesize,
2742 M_TEMP, M_WAITOK);
2743 len = snprintf(tmpbuf, linesize,
2744 "\nITEM SIZE LIMIT USED FREE REQUESTS\n\n");
2745 if (cnt == 0)
2746 tmpbuf[len - 1] = '\0';
2747 error = SYSCTL_OUT(req, tmpbuf, cnt == 0 ? len-1 : len);
2748 if (error || cnt == 0)
2749 goto out;
2750 offset = tmpbuf;
2751 mtx_lock(&uma_mtx);
2752 LIST_FOREACH(zk, &uma_kegs, uk_link) {
2753 LIST_FOREACH(z, &zk->uk_zones, uz_link) {
2754 if (cnt == 0) /* list may have changed size */
2755 break;
2756 if (!(zk->uk_flags & UMA_ZFLAG_INTERNAL)) {
2757 for (cpu = 0; cpu <= mp_maxid; cpu++) {
2758 if (CPU_ABSENT(cpu))
2759 continue;
2760 CPU_LOCK(cpu);
2761 }
2762 }
2763 ZONE_LOCK(z);
2764 cachefree = 0;
2765 if (!(zk->uk_flags & UMA_ZFLAG_INTERNAL)) {
2766 for (cpu = 0; cpu <= mp_maxid; cpu++) {
2767 if (CPU_ABSENT(cpu))
2768 continue;
2769 cache = &z->uz_cpu[cpu];
2770 if (cache->uc_allocbucket != NULL)
2771 cachefree += cache->uc_allocbucket->ub_cnt;
2772 if (cache->uc_freebucket != NULL)
2773 cachefree += cache->uc_freebucket->ub_cnt;
2774 CPU_UNLOCK(cpu);
2775 }
2776 }
2777 LIST_FOREACH(bucket, &z->uz_full_bucket, ub_link) {
2778 cachefree += bucket->ub_cnt;
2779 }
2780 totalfree = zk->uk_free + cachefree;
2781 len = snprintf(offset, linesize,
2782 "%-12.12s %6.6u, %8.8u, %6.6u, %6.6u, %8.8llu\n",
2783 z->uz_name, zk->uk_size,
2784 zk->uk_maxpages * zk->uk_ipers,
2785 (zk->uk_ipers * (zk->uk_pages / zk->uk_ppera)) - totalfree,
2786 totalfree,
2787 (unsigned long long)z->uz_allocs);
2788 ZONE_UNLOCK(z);
2789 for (p = offset + 12; p > offset && *p == ' '; --p)
2790 /* nothing */ ;
2791 p[1] = ':';
2792 cnt--;
2793 offset += len;
2794 }
2795 }
2796 mtx_unlock(&uma_mtx);
2797 *offset++ = '\0';
2798 error = SYSCTL_OUT(req, tmpbuf, offset - tmpbuf);
2799out:
2800 FREE(tmpbuf, M_TEMP);
2801 return (error);
2802}
52
53/* I should really use ktr.. */
54/*
55#define UMA_DEBUG 1
56#define UMA_DEBUG_ALLOC 1
57#define UMA_DEBUG_ALLOC_1 1
58*/
59
60#include "opt_param.h"
61#include <sys/param.h>
62#include <sys/systm.h>
63#include <sys/kernel.h>
64#include <sys/types.h>
65#include <sys/queue.h>
66#include <sys/malloc.h>
67#include <sys/ktr.h>
68#include <sys/lock.h>
69#include <sys/sysctl.h>
70#include <sys/mutex.h>
71#include <sys/proc.h>
72#include <sys/smp.h>
73#include <sys/vmmeter.h>
74
75#include <vm/vm.h>
76#include <vm/vm_object.h>
77#include <vm/vm_page.h>
78#include <vm/vm_param.h>
79#include <vm/vm_map.h>
80#include <vm/vm_kern.h>
81#include <vm/vm_extern.h>
82#include <vm/uma.h>
83#include <vm/uma_int.h>
84#include <vm/uma_dbg.h>
85
86#include <machine/vmparam.h>
87
88/*
89 * This is the zone and keg from which all zones are spawned. The idea is that
90 * even the zone & keg heads are allocated from the allocator, so we use the
91 * bss section to bootstrap us.
92 */
93static struct uma_keg masterkeg;
94static struct uma_zone masterzone_k;
95static struct uma_zone masterzone_z;
96static uma_zone_t kegs = &masterzone_k;
97static uma_zone_t zones = &masterzone_z;
98
99/* This is the zone from which all of uma_slab_t's are allocated. */
100static uma_zone_t slabzone;
101static uma_zone_t slabrefzone; /* With refcounters (for UMA_ZONE_REFCNT) */
102
103/*
104 * The initial hash tables come out of this zone so they can be allocated
105 * prior to malloc coming up.
106 */
107static uma_zone_t hashzone;
108
109static MALLOC_DEFINE(M_UMAHASH, "UMAHash", "UMA Hash Buckets");
110
111/*
112 * Are we allowed to allocate buckets?
113 */
114static int bucketdisable = 1;
115
116/* Linked list of all kegs in the system */
117static LIST_HEAD(,uma_keg) uma_kegs = LIST_HEAD_INITIALIZER(&uma_kegs);
118
119/* This mutex protects the keg list */
120static struct mtx uma_mtx;
121
122/* These are the pcpu cache locks */
123static struct mtx uma_pcpu_mtx[MAXCPU];
124
125/* Linked list of boot time pages */
126static LIST_HEAD(,uma_slab) uma_boot_pages =
127 LIST_HEAD_INITIALIZER(&uma_boot_pages);
128
129/* Count of free boottime pages */
130static int uma_boot_free = 0;
131
132/* Is the VM done starting up? */
133static int booted = 0;
134
135/* Maximum number of allowed items-per-slab if the slab header is OFFPAGE */
136static u_int uma_max_ipers;
137static u_int uma_max_ipers_ref;
138
139/*
140 * This is the handle used to schedule events that need to happen
141 * outside of the allocation fast path.
142 */
143static struct callout uma_callout;
144#define UMA_TIMEOUT 20 /* Seconds for callout interval. */
145
146/*
147 * This structure is passed as the zone ctor arg so that I don't have to create
148 * a special allocation function just for zones.
149 */
150struct uma_zctor_args {
151 char *name;
152 size_t size;
153 uma_ctor ctor;
154 uma_dtor dtor;
155 uma_init uminit;
156 uma_fini fini;
157 uma_keg_t keg;
158 int align;
159 u_int16_t flags;
160};
161
162struct uma_kctor_args {
163 uma_zone_t zone;
164 size_t size;
165 uma_init uminit;
166 uma_fini fini;
167 int align;
168 u_int16_t flags;
169};
170
171struct uma_bucket_zone {
172 uma_zone_t ubz_zone;
173 char *ubz_name;
174 int ubz_entries;
175};
176
177#define BUCKET_MAX 128
178
179struct uma_bucket_zone bucket_zones[] = {
180 { NULL, "16 Bucket", 16 },
181 { NULL, "32 Bucket", 32 },
182 { NULL, "64 Bucket", 64 },
183 { NULL, "128 Bucket", 128 },
184 { NULL, NULL, 0}
185};
186
187#define BUCKET_SHIFT 4
188#define BUCKET_ZONES ((BUCKET_MAX >> BUCKET_SHIFT) + 1)
189
190/*
191 * bucket_size[] maps requested bucket sizes to zones that allocate a bucket
192 * of approximately the right size.
193 */
194static uint8_t bucket_size[BUCKET_ZONES];
195
196enum zfreeskip { SKIP_NONE, SKIP_DTOR, SKIP_FINI };
197
198/* Prototypes.. */
199
200static void *obj_alloc(uma_zone_t, int, u_int8_t *, int);
201static void *page_alloc(uma_zone_t, int, u_int8_t *, int);
202static void *startup_alloc(uma_zone_t, int, u_int8_t *, int);
203static void page_free(void *, int, u_int8_t);
204static uma_slab_t slab_zalloc(uma_zone_t, int);
205static void cache_drain(uma_zone_t);
206static void bucket_drain(uma_zone_t, uma_bucket_t);
207static void bucket_cache_drain(uma_zone_t zone);
208static int keg_ctor(void *, int, void *, int);
209static void keg_dtor(void *, int, void *);
210static int zone_ctor(void *, int, void *, int);
211static void zone_dtor(void *, int, void *);
212static int zero_init(void *, int, int);
213static void zone_small_init(uma_zone_t zone);
214static void zone_large_init(uma_zone_t zone);
215static void zone_foreach(void (*zfunc)(uma_zone_t));
216static void zone_timeout(uma_zone_t zone);
217static int hash_alloc(struct uma_hash *);
218static int hash_expand(struct uma_hash *, struct uma_hash *);
219static void hash_free(struct uma_hash *hash);
220static void uma_timeout(void *);
221static void uma_startup3(void);
222static void *uma_zalloc_internal(uma_zone_t, void *, int);
223static void uma_zfree_internal(uma_zone_t, void *, void *, enum zfreeskip);
224static void bucket_enable(void);
225static void bucket_init(void);
226static uma_bucket_t bucket_alloc(int, int);
227static void bucket_free(uma_bucket_t);
228static void bucket_zone_drain(void);
229static int uma_zalloc_bucket(uma_zone_t zone, int flags);
230static uma_slab_t uma_zone_slab(uma_zone_t zone, int flags);
231static void *uma_slab_alloc(uma_zone_t zone, uma_slab_t slab);
232static void zone_drain(uma_zone_t);
233static uma_zone_t uma_kcreate(uma_zone_t zone, size_t size, uma_init uminit,
234 uma_fini fini, int align, u_int16_t flags);
235
236void uma_print_zone(uma_zone_t);
237void uma_print_stats(void);
238static int sysctl_vm_zone(SYSCTL_HANDLER_ARGS);
239
240#ifdef WITNESS
241static int nosleepwithlocks = 1;
242SYSCTL_INT(_debug, OID_AUTO, nosleepwithlocks, CTLFLAG_RW, &nosleepwithlocks,
243 0, "Convert M_WAITOK to M_NOWAIT to avoid lock-held-across-sleep paths");
244#else
245static int nosleepwithlocks = 0;
246SYSCTL_INT(_debug, OID_AUTO, nosleepwithlocks, CTLFLAG_RW, &nosleepwithlocks,
247 0, "Convert M_WAITOK to M_NOWAIT to avoid lock-held-across-sleep paths");
248#endif
249SYSCTL_OID(_vm, OID_AUTO, zone, CTLTYPE_STRING|CTLFLAG_RD,
250 NULL, 0, sysctl_vm_zone, "A", "Zone Info");
251SYSINIT(uma_startup3, SI_SUB_VM_CONF, SI_ORDER_SECOND, uma_startup3, NULL);
252
253/*
254 * This routine checks to see whether or not it's safe to enable buckets.
255 */
256
257static void
258bucket_enable(void)
259{
260 if (cnt.v_free_count < cnt.v_free_min)
261 bucketdisable = 1;
262 else
263 bucketdisable = 0;
264}
265
266/*
267 * Initialize bucket_zones, the array of zones of buckets of various sizes.
268 *
269 * For each zone, calculate the memory required for each bucket, consisting
270 * of the header and an array of pointers. Initialize bucket_size[] to point
271 * the range of appropriate bucket sizes at the zone.
272 */
273static void
274bucket_init(void)
275{
276 struct uma_bucket_zone *ubz;
277 int i;
278 int j;
279
280 for (i = 0, j = 0; bucket_zones[j].ubz_entries != 0; j++) {
281 int size;
282
283 ubz = &bucket_zones[j];
284 size = roundup(sizeof(struct uma_bucket), sizeof(void *));
285 size += sizeof(void *) * ubz->ubz_entries;
286 ubz->ubz_zone = uma_zcreate(ubz->ubz_name, size,
287 NULL, NULL, NULL, NULL, UMA_ALIGN_PTR, UMA_ZFLAG_INTERNAL);
288 for (; i <= ubz->ubz_entries; i += (1 << BUCKET_SHIFT))
289 bucket_size[i >> BUCKET_SHIFT] = j;
290 }
291}
292
293/*
294 * Given a desired number of entries for a bucket, return the zone from which
295 * to allocate the bucket.
296 */
297static struct uma_bucket_zone *
298bucket_zone_lookup(int entries)
299{
300 int idx;
301
302 idx = howmany(entries, 1 << BUCKET_SHIFT);
303 return (&bucket_zones[bucket_size[idx]]);
304}
305
306static uma_bucket_t
307bucket_alloc(int entries, int bflags)
308{
309 struct uma_bucket_zone *ubz;
310 uma_bucket_t bucket;
311
312 /*
313 * This is to stop us from allocating per cpu buckets while we're
314 * running out of UMA_BOOT_PAGES. Otherwise, we would exhaust the
315 * boot pages. This also prevents us from allocating buckets in
316 * low memory situations.
317 */
318 if (bucketdisable)
319 return (NULL);
320
321 ubz = bucket_zone_lookup(entries);
322 bucket = uma_zalloc_internal(ubz->ubz_zone, NULL, bflags);
323 if (bucket) {
324#ifdef INVARIANTS
325 bzero(bucket->ub_bucket, sizeof(void *) * ubz->ubz_entries);
326#endif
327 bucket->ub_cnt = 0;
328 bucket->ub_entries = ubz->ubz_entries;
329 }
330
331 return (bucket);
332}
333
334static void
335bucket_free(uma_bucket_t bucket)
336{
337 struct uma_bucket_zone *ubz;
338
339 ubz = bucket_zone_lookup(bucket->ub_entries);
340 uma_zfree_internal(ubz->ubz_zone, bucket, NULL, SKIP_NONE);
341}
342
343static void
344bucket_zone_drain(void)
345{
346 struct uma_bucket_zone *ubz;
347
348 for (ubz = &bucket_zones[0]; ubz->ubz_entries != 0; ubz++)
349 zone_drain(ubz->ubz_zone);
350}
351
352
353/*
354 * Routine called by timeout which is used to fire off some time interval
355 * based calculations. (stats, hash size, etc.)
356 *
357 * Arguments:
358 * arg Unused
359 *
360 * Returns:
361 * Nothing
362 */
363static void
364uma_timeout(void *unused)
365{
366 bucket_enable();
367 zone_foreach(zone_timeout);
368
369 /* Reschedule this event */
370 callout_reset(&uma_callout, UMA_TIMEOUT * hz, uma_timeout, NULL);
371}
372
373/*
374 * Routine to perform timeout driven calculations. This expands the
375 * hashes and does per cpu statistics aggregation.
376 *
377 * Arguments:
378 * zone The zone to operate on
379 *
380 * Returns:
381 * Nothing
382 */
383static void
384zone_timeout(uma_zone_t zone)
385{
386 uma_keg_t keg;
387 uma_cache_t cache;
388 u_int64_t alloc;
389 int cpu;
390
391 keg = zone->uz_keg;
392 alloc = 0;
393
394 /*
395 * Aggregate per cpu cache statistics back to the zone.
396 *
397 * XXX This should be done in the sysctl handler.
398 *
399 * I may rewrite this to set a flag in the per cpu cache instead of
400 * locking. If the flag is not cleared on the next round I will have
401 * to lock and do it here instead so that the statistics don't get too
402 * far out of sync.
403 */
404 if (!(keg->uk_flags & UMA_ZFLAG_INTERNAL)) {
405 for (cpu = 0; cpu <= mp_maxid; cpu++) {
406 if (CPU_ABSENT(cpu))
407 continue;
408 CPU_LOCK(cpu);
409 cache = &zone->uz_cpu[cpu];
410 /* Add them up, and reset */
411 alloc += cache->uc_allocs;
412 cache->uc_allocs = 0;
413 CPU_UNLOCK(cpu);
414 }
415 }
416
417 /* Now push these stats back into the zone.. */
418 ZONE_LOCK(zone);
419 zone->uz_allocs += alloc;
420
421 /*
422 * Expand the zone hash table.
423 *
424 * This is done if the number of slabs is larger than the hash size.
425 * What I'm trying to do here is completely reduce collisions. This
426 * may be a little aggressive. Should I allow for two collisions max?
427 */
428
429 if (keg->uk_flags & UMA_ZONE_HASH &&
430 keg->uk_pages / keg->uk_ppera >= keg->uk_hash.uh_hashsize) {
431 struct uma_hash newhash;
432 struct uma_hash oldhash;
433 int ret;
434
435 /*
436 * This is so involved because allocating and freeing
437 * while the zone lock is held will lead to deadlock.
438 * I have to do everything in stages and check for
439 * races.
440 */
441 newhash = keg->uk_hash;
442 ZONE_UNLOCK(zone);
443 ret = hash_alloc(&newhash);
444 ZONE_LOCK(zone);
445 if (ret) {
446 if (hash_expand(&keg->uk_hash, &newhash)) {
447 oldhash = keg->uk_hash;
448 keg->uk_hash = newhash;
449 } else
450 oldhash = newhash;
451
452 ZONE_UNLOCK(zone);
453 hash_free(&oldhash);
454 ZONE_LOCK(zone);
455 }
456 }
457 ZONE_UNLOCK(zone);
458}
459
460/*
461 * Allocate and zero fill the next sized hash table from the appropriate
462 * backing store.
463 *
464 * Arguments:
465 * hash A new hash structure with the old hash size in uh_hashsize
466 *
467 * Returns:
468 * 1 on sucess and 0 on failure.
469 */
470static int
471hash_alloc(struct uma_hash *hash)
472{
473 int oldsize;
474 int alloc;
475
476 oldsize = hash->uh_hashsize;
477
478 /* We're just going to go to a power of two greater */
479 if (oldsize) {
480 hash->uh_hashsize = oldsize * 2;
481 alloc = sizeof(hash->uh_slab_hash[0]) * hash->uh_hashsize;
482 hash->uh_slab_hash = (struct slabhead *)malloc(alloc,
483 M_UMAHASH, M_NOWAIT);
484 } else {
485 alloc = sizeof(hash->uh_slab_hash[0]) * UMA_HASH_SIZE_INIT;
486 hash->uh_slab_hash = uma_zalloc_internal(hashzone, NULL,
487 M_WAITOK);
488 hash->uh_hashsize = UMA_HASH_SIZE_INIT;
489 }
490 if (hash->uh_slab_hash) {
491 bzero(hash->uh_slab_hash, alloc);
492 hash->uh_hashmask = hash->uh_hashsize - 1;
493 return (1);
494 }
495
496 return (0);
497}
498
499/*
500 * Expands the hash table for HASH zones. This is done from zone_timeout
501 * to reduce collisions. This must not be done in the regular allocation
502 * path, otherwise, we can recurse on the vm while allocating pages.
503 *
504 * Arguments:
505 * oldhash The hash you want to expand
506 * newhash The hash structure for the new table
507 *
508 * Returns:
509 * Nothing
510 *
511 * Discussion:
512 */
513static int
514hash_expand(struct uma_hash *oldhash, struct uma_hash *newhash)
515{
516 uma_slab_t slab;
517 int hval;
518 int i;
519
520 if (!newhash->uh_slab_hash)
521 return (0);
522
523 if (oldhash->uh_hashsize >= newhash->uh_hashsize)
524 return (0);
525
526 /*
527 * I need to investigate hash algorithms for resizing without a
528 * full rehash.
529 */
530
531 for (i = 0; i < oldhash->uh_hashsize; i++)
532 while (!SLIST_EMPTY(&oldhash->uh_slab_hash[i])) {
533 slab = SLIST_FIRST(&oldhash->uh_slab_hash[i]);
534 SLIST_REMOVE_HEAD(&oldhash->uh_slab_hash[i], us_hlink);
535 hval = UMA_HASH(newhash, slab->us_data);
536 SLIST_INSERT_HEAD(&newhash->uh_slab_hash[hval],
537 slab, us_hlink);
538 }
539
540 return (1);
541}
542
543/*
544 * Free the hash bucket to the appropriate backing store.
545 *
546 * Arguments:
547 * slab_hash The hash bucket we're freeing
548 * hashsize The number of entries in that hash bucket
549 *
550 * Returns:
551 * Nothing
552 */
553static void
554hash_free(struct uma_hash *hash)
555{
556 if (hash->uh_slab_hash == NULL)
557 return;
558 if (hash->uh_hashsize == UMA_HASH_SIZE_INIT)
559 uma_zfree_internal(hashzone,
560 hash->uh_slab_hash, NULL, SKIP_NONE);
561 else
562 free(hash->uh_slab_hash, M_UMAHASH);
563}
564
565/*
566 * Frees all outstanding items in a bucket
567 *
568 * Arguments:
569 * zone The zone to free to, must be unlocked.
570 * bucket The free/alloc bucket with items, cpu queue must be locked.
571 *
572 * Returns:
573 * Nothing
574 */
575
576static void
577bucket_drain(uma_zone_t zone, uma_bucket_t bucket)
578{
579 uma_slab_t slab;
580 int mzone;
581 void *item;
582
583 if (bucket == NULL)
584 return;
585
586 slab = NULL;
587 mzone = 0;
588
589 /* We have to lookup the slab again for malloc.. */
590 if (zone->uz_keg->uk_flags & UMA_ZONE_MALLOC)
591 mzone = 1;
592
593 while (bucket->ub_cnt > 0) {
594 bucket->ub_cnt--;
595 item = bucket->ub_bucket[bucket->ub_cnt];
596#ifdef INVARIANTS
597 bucket->ub_bucket[bucket->ub_cnt] = NULL;
598 KASSERT(item != NULL,
599 ("bucket_drain: botched ptr, item is NULL"));
600#endif
601 /*
602 * This is extremely inefficient. The slab pointer was passed
603 * to uma_zfree_arg, but we lost it because the buckets don't
604 * hold them. This will go away when free() gets a size passed
605 * to it.
606 */
607 if (mzone)
608 slab = vtoslab((vm_offset_t)item & (~UMA_SLAB_MASK));
609 uma_zfree_internal(zone, item, slab, SKIP_DTOR);
610 }
611}
612
613/*
614 * Drains the per cpu caches for a zone.
615 *
616 * Arguments:
617 * zone The zone to drain, must be unlocked.
618 *
619 * Returns:
620 * Nothing
621 */
622static void
623cache_drain(uma_zone_t zone)
624{
625 uma_cache_t cache;
626 int cpu;
627
628 /*
629 * We have to lock each cpu cache before locking the zone
630 */
631 for (cpu = 0; cpu <= mp_maxid; cpu++) {
632 if (CPU_ABSENT(cpu))
633 continue;
634 CPU_LOCK(cpu);
635 cache = &zone->uz_cpu[cpu];
636 bucket_drain(zone, cache->uc_allocbucket);
637 bucket_drain(zone, cache->uc_freebucket);
638 if (cache->uc_allocbucket != NULL)
639 bucket_free(cache->uc_allocbucket);
640 if (cache->uc_freebucket != NULL)
641 bucket_free(cache->uc_freebucket);
642 cache->uc_allocbucket = cache->uc_freebucket = NULL;
643 }
644 ZONE_LOCK(zone);
645 bucket_cache_drain(zone);
646 ZONE_UNLOCK(zone);
647 for (cpu = 0; cpu <= mp_maxid; cpu++) {
648 if (CPU_ABSENT(cpu))
649 continue;
650 CPU_UNLOCK(cpu);
651 }
652}
653
654/*
655 * Drain the cached buckets from a zone. Expects a locked zone on entry.
656 */
657static void
658bucket_cache_drain(uma_zone_t zone)
659{
660 uma_bucket_t bucket;
661
662 /*
663 * Drain the bucket queues and free the buckets, we just keep two per
664 * cpu (alloc/free).
665 */
666 while ((bucket = LIST_FIRST(&zone->uz_full_bucket)) != NULL) {
667 LIST_REMOVE(bucket, ub_link);
668 ZONE_UNLOCK(zone);
669 bucket_drain(zone, bucket);
670 bucket_free(bucket);
671 ZONE_LOCK(zone);
672 }
673
674 /* Now we do the free queue.. */
675 while ((bucket = LIST_FIRST(&zone->uz_free_bucket)) != NULL) {
676 LIST_REMOVE(bucket, ub_link);
677 bucket_free(bucket);
678 }
679}
680
681/*
682 * Frees pages from a zone back to the system. This is done on demand from
683 * the pageout daemon.
684 *
685 * Arguments:
686 * zone The zone to free pages from
687 * all Should we drain all items?
688 *
689 * Returns:
690 * Nothing.
691 */
692static void
693zone_drain(uma_zone_t zone)
694{
695 struct slabhead freeslabs = { 0 };
696 uma_keg_t keg;
697 uma_slab_t slab;
698 uma_slab_t n;
699 u_int8_t flags;
700 u_int8_t *mem;
701 int i;
702
703 keg = zone->uz_keg;
704
705 /*
706 * We don't want to take pages from statically allocated zones at this
707 * time
708 */
709 if (keg->uk_flags & UMA_ZONE_NOFREE || keg->uk_freef == NULL)
710 return;
711
712 ZONE_LOCK(zone);
713
714#ifdef UMA_DEBUG
715 printf("%s free items: %u\n", zone->uz_name, keg->uk_free);
716#endif
717 bucket_cache_drain(zone);
718 if (keg->uk_free == 0)
719 goto finished;
720
721 slab = LIST_FIRST(&keg->uk_free_slab);
722 while (slab) {
723 n = LIST_NEXT(slab, us_link);
724
725 /* We have no where to free these to */
726 if (slab->us_flags & UMA_SLAB_BOOT) {
727 slab = n;
728 continue;
729 }
730
731 LIST_REMOVE(slab, us_link);
732 keg->uk_pages -= keg->uk_ppera;
733 keg->uk_free -= keg->uk_ipers;
734
735 if (keg->uk_flags & UMA_ZONE_HASH)
736 UMA_HASH_REMOVE(&keg->uk_hash, slab, slab->us_data);
737
738 SLIST_INSERT_HEAD(&freeslabs, slab, us_hlink);
739
740 slab = n;
741 }
742finished:
743 ZONE_UNLOCK(zone);
744
745 while ((slab = SLIST_FIRST(&freeslabs)) != NULL) {
746 SLIST_REMOVE(&freeslabs, slab, uma_slab, us_hlink);
747 if (keg->uk_fini)
748 for (i = 0; i < keg->uk_ipers; i++)
749 keg->uk_fini(
750 slab->us_data + (keg->uk_rsize * i),
751 keg->uk_size);
752 flags = slab->us_flags;
753 mem = slab->us_data;
754
755 if ((keg->uk_flags & UMA_ZONE_MALLOC) ||
756 (keg->uk_flags & UMA_ZONE_REFCNT)) {
757 for (i = 0; i < keg->uk_ppera; i++)
758 vsetobj((vm_offset_t)mem + (i * PAGE_SIZE),
759 kmem_object);
760 }
761 if (keg->uk_flags & UMA_ZONE_OFFPAGE)
762 uma_zfree_internal(keg->uk_slabzone, slab, NULL,
763 SKIP_NONE);
764#ifdef UMA_DEBUG
765 printf("%s: Returning %d bytes.\n",
766 zone->uz_name, UMA_SLAB_SIZE * keg->uk_ppera);
767#endif
768 keg->uk_freef(mem, UMA_SLAB_SIZE * keg->uk_ppera, flags);
769 }
770}
771
772/*
773 * Allocate a new slab for a zone. This does not insert the slab onto a list.
774 *
775 * Arguments:
776 * zone The zone to allocate slabs for
777 * wait Shall we wait?
778 *
779 * Returns:
780 * The slab that was allocated or NULL if there is no memory and the
781 * caller specified M_NOWAIT.
782 */
783static uma_slab_t
784slab_zalloc(uma_zone_t zone, int wait)
785{
786 uma_slabrefcnt_t slabref;
787 uma_slab_t slab;
788 uma_keg_t keg;
789 u_int8_t *mem;
790 u_int8_t flags;
791 int i;
792
793 slab = NULL;
794 keg = zone->uz_keg;
795
796#ifdef UMA_DEBUG
797 printf("slab_zalloc: Allocating a new slab for %s\n", zone->uz_name);
798#endif
799 ZONE_UNLOCK(zone);
800
801 if (keg->uk_flags & UMA_ZONE_OFFPAGE) {
802 slab = uma_zalloc_internal(keg->uk_slabzone, NULL, wait);
803 if (slab == NULL) {
804 ZONE_LOCK(zone);
805 return NULL;
806 }
807 }
808
809 /*
810 * This reproduces the old vm_zone behavior of zero filling pages the
811 * first time they are added to a zone.
812 *
813 * Malloced items are zeroed in uma_zalloc.
814 */
815
816 if ((keg->uk_flags & UMA_ZONE_MALLOC) == 0)
817 wait |= M_ZERO;
818 else
819 wait &= ~M_ZERO;
820
821 mem = keg->uk_allocf(zone, keg->uk_ppera * UMA_SLAB_SIZE,
822 &flags, wait);
823 if (mem == NULL) {
824 if (keg->uk_flags & UMA_ZONE_OFFPAGE)
825 uma_zfree_internal(keg->uk_slabzone, slab, NULL, 0);
826 ZONE_LOCK(zone);
827 return (NULL);
828 }
829
830 /* Point the slab into the allocated memory */
831 if (!(keg->uk_flags & UMA_ZONE_OFFPAGE))
832 slab = (uma_slab_t )(mem + keg->uk_pgoff);
833
834 if ((keg->uk_flags & UMA_ZONE_MALLOC) ||
835 (keg->uk_flags & UMA_ZONE_REFCNT))
836 for (i = 0; i < keg->uk_ppera; i++)
837 vsetslab((vm_offset_t)mem + (i * PAGE_SIZE), slab);
838
839 slab->us_keg = keg;
840 slab->us_data = mem;
841 slab->us_freecount = keg->uk_ipers;
842 slab->us_firstfree = 0;
843 slab->us_flags = flags;
844
845 if (keg->uk_flags & UMA_ZONE_REFCNT) {
846 slabref = (uma_slabrefcnt_t)slab;
847 for (i = 0; i < keg->uk_ipers; i++) {
848 slabref->us_freelist[i].us_refcnt = 0;
849 slabref->us_freelist[i].us_item = i+1;
850 }
851 } else {
852 for (i = 0; i < keg->uk_ipers; i++)
853 slab->us_freelist[i].us_item = i+1;
854 }
855
856 if (keg->uk_init != NULL) {
857 for (i = 0; i < keg->uk_ipers; i++)
858 if (keg->uk_init(slab->us_data + (keg->uk_rsize * i),
859 keg->uk_size, wait) != 0)
860 break;
861 if (i != keg->uk_ipers) {
862 if (keg->uk_fini != NULL) {
863 for (i--; i > -1; i--)
864 keg->uk_fini(slab->us_data +
865 (keg->uk_rsize * i),
866 keg->uk_size);
867 }
868 if ((keg->uk_flags & UMA_ZONE_MALLOC) ||
869 (keg->uk_flags & UMA_ZONE_REFCNT))
870 for (i = 0; i < keg->uk_ppera; i++)
871 vsetobj((vm_offset_t)mem +
872 (i * PAGE_SIZE), kmem_object);
873 if (keg->uk_flags & UMA_ZONE_OFFPAGE)
874 uma_zfree_internal(keg->uk_slabzone, slab,
875 NULL, SKIP_NONE);
876 keg->uk_freef(mem, UMA_SLAB_SIZE * keg->uk_ppera,
877 flags);
878 ZONE_LOCK(zone);
879 return (NULL);
880 }
881 }
882 ZONE_LOCK(zone);
883
884 if (keg->uk_flags & UMA_ZONE_HASH)
885 UMA_HASH_INSERT(&keg->uk_hash, slab, mem);
886
887 keg->uk_pages += keg->uk_ppera;
888 keg->uk_free += keg->uk_ipers;
889
890 return (slab);
891}
892
893/*
894 * This function is intended to be used early on in place of page_alloc() so
895 * that we may use the boot time page cache to satisfy allocations before
896 * the VM is ready.
897 */
898static void *
899startup_alloc(uma_zone_t zone, int bytes, u_int8_t *pflag, int wait)
900{
901 uma_keg_t keg;
902
903 keg = zone->uz_keg;
904
905 /*
906 * Check our small startup cache to see if it has pages remaining.
907 */
908 mtx_lock(&uma_mtx);
909 if (uma_boot_free != 0) {
910 uma_slab_t tmps;
911
912 tmps = LIST_FIRST(&uma_boot_pages);
913 LIST_REMOVE(tmps, us_link);
914 uma_boot_free--;
915 mtx_unlock(&uma_mtx);
916 *pflag = tmps->us_flags;
917 return (tmps->us_data);
918 }
919 mtx_unlock(&uma_mtx);
920 if (booted == 0)
921 panic("UMA: Increase UMA_BOOT_PAGES");
922 /*
923 * Now that we've booted reset these users to their real allocator.
924 */
925#ifdef UMA_MD_SMALL_ALLOC
926 keg->uk_allocf = uma_small_alloc;
927#else
928 keg->uk_allocf = page_alloc;
929#endif
930 return keg->uk_allocf(zone, bytes, pflag, wait);
931}
932
933/*
934 * Allocates a number of pages from the system
935 *
936 * Arguments:
937 * zone Unused
938 * bytes The number of bytes requested
939 * wait Shall we wait?
940 *
941 * Returns:
942 * A pointer to the alloced memory or possibly
943 * NULL if M_NOWAIT is set.
944 */
945static void *
946page_alloc(uma_zone_t zone, int bytes, u_int8_t *pflag, int wait)
947{
948 void *p; /* Returned page */
949
950 *pflag = UMA_SLAB_KMEM;
951 p = (void *) kmem_malloc(kmem_map, bytes, wait);
952
953 return (p);
954}
955
956/*
957 * Allocates a number of pages from within an object
958 *
959 * Arguments:
960 * zone Unused
961 * bytes The number of bytes requested
962 * wait Shall we wait?
963 *
964 * Returns:
965 * A pointer to the alloced memory or possibly
966 * NULL if M_NOWAIT is set.
967 */
968static void *
969obj_alloc(uma_zone_t zone, int bytes, u_int8_t *flags, int wait)
970{
971 vm_object_t object;
972 vm_offset_t retkva, zkva;
973 vm_page_t p;
974 int pages, startpages;
975
976 object = zone->uz_keg->uk_obj;
977 retkva = 0;
978
979 /*
980 * This looks a little weird since we're getting one page at a time.
981 */
982 VM_OBJECT_LOCK(object);
983 p = TAILQ_LAST(&object->memq, pglist);
984 pages = p != NULL ? p->pindex + 1 : 0;
985 startpages = pages;
986 zkva = zone->uz_keg->uk_kva + pages * PAGE_SIZE;
987 for (; bytes > 0; bytes -= PAGE_SIZE) {
988 p = vm_page_alloc(object, pages,
989 VM_ALLOC_INTERRUPT | VM_ALLOC_WIRED);
990 if (p == NULL) {
991 if (pages != startpages)
992 pmap_qremove(retkva, pages - startpages);
993 while (pages != startpages) {
994 pages--;
995 p = TAILQ_LAST(&object->memq, pglist);
996 vm_page_lock_queues();
997 vm_page_unwire(p, 0);
998 vm_page_free(p);
999 vm_page_unlock_queues();
1000 }
1001 retkva = 0;
1002 goto done;
1003 }
1004 pmap_qenter(zkva, &p, 1);
1005 if (retkva == 0)
1006 retkva = zkva;
1007 zkva += PAGE_SIZE;
1008 pages += 1;
1009 }
1010done:
1011 VM_OBJECT_UNLOCK(object);
1012 *flags = UMA_SLAB_PRIV;
1013
1014 return ((void *)retkva);
1015}
1016
1017/*
1018 * Frees a number of pages to the system
1019 *
1020 * Arguments:
1021 * mem A pointer to the memory to be freed
1022 * size The size of the memory being freed
1023 * flags The original p->us_flags field
1024 *
1025 * Returns:
1026 * Nothing
1027 */
1028static void
1029page_free(void *mem, int size, u_int8_t flags)
1030{
1031 vm_map_t map;
1032
1033 if (flags & UMA_SLAB_KMEM)
1034 map = kmem_map;
1035 else
1036 panic("UMA: page_free used with invalid flags %d\n", flags);
1037
1038 kmem_free(map, (vm_offset_t)mem, size);
1039}
1040
1041/*
1042 * Zero fill initializer
1043 *
1044 * Arguments/Returns follow uma_init specifications
1045 */
1046static int
1047zero_init(void *mem, int size, int flags)
1048{
1049 bzero(mem, size);
1050 return (0);
1051}
1052
1053/*
1054 * Finish creating a small uma zone. This calculates ipers, and the zone size.
1055 *
1056 * Arguments
1057 * zone The zone we should initialize
1058 *
1059 * Returns
1060 * Nothing
1061 */
1062static void
1063zone_small_init(uma_zone_t zone)
1064{
1065 uma_keg_t keg;
1066 u_int rsize;
1067 u_int memused;
1068 u_int wastedspace;
1069 u_int shsize;
1070
1071 keg = zone->uz_keg;
1072 KASSERT(keg != NULL, ("Keg is null in zone_small_init"));
1073 rsize = keg->uk_size;
1074
1075 if (rsize < UMA_SMALLEST_UNIT)
1076 rsize = UMA_SMALLEST_UNIT;
1077 if (rsize & keg->uk_align)
1078 rsize = (rsize & ~keg->uk_align) + (keg->uk_align + 1);
1079
1080 keg->uk_rsize = rsize;
1081 keg->uk_ppera = 1;
1082
1083 if (keg->uk_flags & UMA_ZONE_REFCNT) {
1084 rsize += UMA_FRITMREF_SZ; /* linkage & refcnt */
1085 shsize = sizeof(struct uma_slab_refcnt);
1086 } else {
1087 rsize += UMA_FRITM_SZ; /* Account for linkage */
1088 shsize = sizeof(struct uma_slab);
1089 }
1090
1091 keg->uk_ipers = (UMA_SLAB_SIZE - shsize) / rsize;
1092 KASSERT(keg->uk_ipers != 0, ("zone_small_init: ipers is 0"));
1093 memused = keg->uk_ipers * rsize + shsize;
1094 wastedspace = UMA_SLAB_SIZE - memused;
1095
1096 /*
1097 * We can't do OFFPAGE if we're internal or if we've been
1098 * asked to not go to the VM for buckets. If we do this we
1099 * may end up going to the VM (kmem_map) for slabs which we
1100 * do not want to do if we're UMA_ZFLAG_CACHEONLY as a
1101 * result of UMA_ZONE_VM, which clearly forbids it.
1102 */
1103 if ((keg->uk_flags & UMA_ZFLAG_INTERNAL) ||
1104 (keg->uk_flags & UMA_ZFLAG_CACHEONLY))
1105 return;
1106
1107 if ((wastedspace >= UMA_MAX_WASTE) &&
1108 (keg->uk_ipers < (UMA_SLAB_SIZE / keg->uk_rsize))) {
1109 keg->uk_ipers = UMA_SLAB_SIZE / keg->uk_rsize;
1110 KASSERT(keg->uk_ipers <= 255,
1111 ("zone_small_init: keg->uk_ipers too high!"));
1112#ifdef UMA_DEBUG
1113 printf("UMA decided we need offpage slab headers for "
1114 "zone: %s, calculated wastedspace = %d, "
1115 "maximum wasted space allowed = %d, "
1116 "calculated ipers = %d, "
1117 "new wasted space = %d\n", zone->uz_name, wastedspace,
1118 UMA_MAX_WASTE, keg->uk_ipers,
1119 UMA_SLAB_SIZE - keg->uk_ipers * keg->uk_rsize);
1120#endif
1121 keg->uk_flags |= UMA_ZONE_OFFPAGE;
1122 if ((keg->uk_flags & UMA_ZONE_MALLOC) == 0)
1123 keg->uk_flags |= UMA_ZONE_HASH;
1124 }
1125}
1126
1127/*
1128 * Finish creating a large (> UMA_SLAB_SIZE) uma zone. Just give in and do
1129 * OFFPAGE for now. When I can allow for more dynamic slab sizes this will be
1130 * more complicated.
1131 *
1132 * Arguments
1133 * zone The zone we should initialize
1134 *
1135 * Returns
1136 * Nothing
1137 */
1138static void
1139zone_large_init(uma_zone_t zone)
1140{
1141 uma_keg_t keg;
1142 int pages;
1143
1144 keg = zone->uz_keg;
1145
1146 KASSERT(keg != NULL, ("Keg is null in zone_large_init"));
1147 KASSERT((keg->uk_flags & UMA_ZFLAG_CACHEONLY) == 0,
1148 ("zone_large_init: Cannot large-init a UMA_ZFLAG_CACHEONLY zone"));
1149
1150 pages = keg->uk_size / UMA_SLAB_SIZE;
1151
1152 /* Account for remainder */
1153 if ((pages * UMA_SLAB_SIZE) < keg->uk_size)
1154 pages++;
1155
1156 keg->uk_ppera = pages;
1157 keg->uk_ipers = 1;
1158
1159 keg->uk_flags |= UMA_ZONE_OFFPAGE;
1160 if ((keg->uk_flags & UMA_ZONE_MALLOC) == 0)
1161 keg->uk_flags |= UMA_ZONE_HASH;
1162
1163 keg->uk_rsize = keg->uk_size;
1164}
1165
1166/*
1167 * Keg header ctor. This initializes all fields, locks, etc. And inserts
1168 * the keg onto the global keg list.
1169 *
1170 * Arguments/Returns follow uma_ctor specifications
1171 * udata Actually uma_kctor_args
1172 */
1173static int
1174keg_ctor(void *mem, int size, void *udata, int flags)
1175{
1176 struct uma_kctor_args *arg = udata;
1177 uma_keg_t keg = mem;
1178 uma_zone_t zone;
1179
1180 bzero(keg, size);
1181 keg->uk_size = arg->size;
1182 keg->uk_init = arg->uminit;
1183 keg->uk_fini = arg->fini;
1184 keg->uk_align = arg->align;
1185 keg->uk_free = 0;
1186 keg->uk_pages = 0;
1187 keg->uk_flags = arg->flags;
1188 keg->uk_allocf = page_alloc;
1189 keg->uk_freef = page_free;
1190 keg->uk_recurse = 0;
1191 keg->uk_slabzone = NULL;
1192
1193 /*
1194 * The master zone is passed to us at keg-creation time.
1195 */
1196 zone = arg->zone;
1197 zone->uz_keg = keg;
1198
1199 if (arg->flags & UMA_ZONE_VM)
1200 keg->uk_flags |= UMA_ZFLAG_CACHEONLY;
1201
1202 if (arg->flags & UMA_ZONE_ZINIT)
1203 keg->uk_init = zero_init;
1204
1205 /*
1206 * The +UMA_FRITM_SZ added to uk_size is to account for the
1207 * linkage that is added to the size in zone_small_init(). If
1208 * we don't account for this here then we may end up in
1209 * zone_small_init() with a calculated 'ipers' of 0.
1210 */
1211 if (keg->uk_flags & UMA_ZONE_REFCNT) {
1212 if ((keg->uk_size+UMA_FRITMREF_SZ) >
1213 (UMA_SLAB_SIZE - sizeof(struct uma_slab_refcnt)))
1214 zone_large_init(zone);
1215 else
1216 zone_small_init(zone);
1217 } else {
1218 if ((keg->uk_size+UMA_FRITM_SZ) >
1219 (UMA_SLAB_SIZE - sizeof(struct uma_slab)))
1220 zone_large_init(zone);
1221 else
1222 zone_small_init(zone);
1223 }
1224
1225 if (keg->uk_flags & UMA_ZONE_OFFPAGE) {
1226 if (keg->uk_flags & UMA_ZONE_REFCNT)
1227 keg->uk_slabzone = slabrefzone;
1228 else
1229 keg->uk_slabzone = slabzone;
1230 }
1231
1232 /*
1233 * If we haven't booted yet we need allocations to go through the
1234 * startup cache until the vm is ready.
1235 */
1236 if (keg->uk_ppera == 1) {
1237#ifdef UMA_MD_SMALL_ALLOC
1238 keg->uk_allocf = uma_small_alloc;
1239 keg->uk_freef = uma_small_free;
1240#endif
1241 if (booted == 0)
1242 keg->uk_allocf = startup_alloc;
1243 }
1244
1245 /*
1246 * Initialize keg's lock (shared among zones) through
1247 * Master zone
1248 */
1249 zone->uz_lock = &keg->uk_lock;
1250 if (arg->flags & UMA_ZONE_MTXCLASS)
1251 ZONE_LOCK_INIT(zone, 1);
1252 else
1253 ZONE_LOCK_INIT(zone, 0);
1254
1255 /*
1256 * If we're putting the slab header in the actual page we need to
1257 * figure out where in each page it goes. This calculates a right
1258 * justified offset into the memory on an ALIGN_PTR boundary.
1259 */
1260 if (!(keg->uk_flags & UMA_ZONE_OFFPAGE)) {
1261 u_int totsize;
1262
1263 /* Size of the slab struct and free list */
1264 if (keg->uk_flags & UMA_ZONE_REFCNT)
1265 totsize = sizeof(struct uma_slab_refcnt) +
1266 keg->uk_ipers * UMA_FRITMREF_SZ;
1267 else
1268 totsize = sizeof(struct uma_slab) +
1269 keg->uk_ipers * UMA_FRITM_SZ;
1270
1271 if (totsize & UMA_ALIGN_PTR)
1272 totsize = (totsize & ~UMA_ALIGN_PTR) +
1273 (UMA_ALIGN_PTR + 1);
1274 keg->uk_pgoff = UMA_SLAB_SIZE - totsize;
1275
1276 if (keg->uk_flags & UMA_ZONE_REFCNT)
1277 totsize = keg->uk_pgoff + sizeof(struct uma_slab_refcnt)
1278 + keg->uk_ipers * UMA_FRITMREF_SZ;
1279 else
1280 totsize = keg->uk_pgoff + sizeof(struct uma_slab)
1281 + keg->uk_ipers * UMA_FRITM_SZ;
1282
1283 /*
1284 * The only way the following is possible is if with our
1285 * UMA_ALIGN_PTR adjustments we are now bigger than
1286 * UMA_SLAB_SIZE. I haven't checked whether this is
1287 * mathematically possible for all cases, so we make
1288 * sure here anyway.
1289 */
1290 if (totsize > UMA_SLAB_SIZE) {
1291 printf("zone %s ipers %d rsize %d size %d\n",
1292 zone->uz_name, keg->uk_ipers, keg->uk_rsize,
1293 keg->uk_size);
1294 panic("UMA slab won't fit.\n");
1295 }
1296 }
1297
1298 if (keg->uk_flags & UMA_ZONE_HASH)
1299 hash_alloc(&keg->uk_hash);
1300
1301#ifdef UMA_DEBUG
1302 printf("%s(%p) size = %d ipers = %d ppera = %d pgoff = %d\n",
1303 zone->uz_name, zone,
1304 keg->uk_size, keg->uk_ipers,
1305 keg->uk_ppera, keg->uk_pgoff);
1306#endif
1307
1308 LIST_INSERT_HEAD(&keg->uk_zones, zone, uz_link);
1309
1310 mtx_lock(&uma_mtx);
1311 LIST_INSERT_HEAD(&uma_kegs, keg, uk_link);
1312 mtx_unlock(&uma_mtx);
1313 return (0);
1314}
1315
1316/*
1317 * Zone header ctor. This initializes all fields, locks, etc.
1318 *
1319 * Arguments/Returns follow uma_ctor specifications
1320 * udata Actually uma_zctor_args
1321 */
1322
1323static int
1324zone_ctor(void *mem, int size, void *udata, int flags)
1325{
1326 struct uma_zctor_args *arg = udata;
1327 uma_zone_t zone = mem;
1328 uma_zone_t z;
1329 uma_keg_t keg;
1330
1331 bzero(zone, size);
1332 zone->uz_name = arg->name;
1333 zone->uz_ctor = arg->ctor;
1334 zone->uz_dtor = arg->dtor;
1335 zone->uz_init = NULL;
1336 zone->uz_fini = NULL;
1337 zone->uz_allocs = 0;
1338 zone->uz_fills = zone->uz_count = 0;
1339
1340 if (arg->flags & UMA_ZONE_SECONDARY) {
1341 KASSERT(arg->keg != NULL, ("Secondary zone on zero'd keg"));
1342 keg = arg->keg;
1343 zone->uz_keg = keg;
1344 zone->uz_init = arg->uminit;
1345 zone->uz_fini = arg->fini;
1346 zone->uz_lock = &keg->uk_lock;
1347 mtx_lock(&uma_mtx);
1348 ZONE_LOCK(zone);
1349 keg->uk_flags |= UMA_ZONE_SECONDARY;
1350 LIST_FOREACH(z, &keg->uk_zones, uz_link) {
1351 if (LIST_NEXT(z, uz_link) == NULL) {
1352 LIST_INSERT_AFTER(z, zone, uz_link);
1353 break;
1354 }
1355 }
1356 ZONE_UNLOCK(zone);
1357 mtx_unlock(&uma_mtx);
1358 } else if (arg->keg == NULL) {
1359 if (uma_kcreate(zone, arg->size, arg->uminit, arg->fini,
1360 arg->align, arg->flags) == NULL)
1361 return (ENOMEM);
1362 } else {
1363 struct uma_kctor_args karg;
1364 int error;
1365
1366 /* We should only be here from uma_startup() */
1367 karg.size = arg->size;
1368 karg.uminit = arg->uminit;
1369 karg.fini = arg->fini;
1370 karg.align = arg->align;
1371 karg.flags = arg->flags;
1372 karg.zone = zone;
1373 error = keg_ctor(arg->keg, sizeof(struct uma_keg), &karg,
1374 flags);
1375 if (error)
1376 return (error);
1377 }
1378 keg = zone->uz_keg;
1379 zone->uz_lock = &keg->uk_lock;
1380
1381 /*
1382 * Some internal zones don't have room allocated for the per cpu
1383 * caches. If we're internal, bail out here.
1384 */
1385 if (keg->uk_flags & UMA_ZFLAG_INTERNAL) {
1386 KASSERT((keg->uk_flags & UMA_ZONE_SECONDARY) == 0,
1387 ("Secondary zone requested UMA_ZFLAG_INTERNAL"));
1388 return (0);
1389 }
1390
1391 if (keg->uk_flags & UMA_ZONE_MAXBUCKET)
1392 zone->uz_count = BUCKET_MAX;
1393 else if (keg->uk_ipers <= BUCKET_MAX)
1394 zone->uz_count = keg->uk_ipers;
1395 else
1396 zone->uz_count = BUCKET_MAX;
1397 return (0);
1398}
1399
1400/*
1401 * Keg header dtor. This frees all data, destroys locks, frees the hash
1402 * table and removes the keg from the global list.
1403 *
1404 * Arguments/Returns follow uma_dtor specifications
1405 * udata unused
1406 */
1407static void
1408keg_dtor(void *arg, int size, void *udata)
1409{
1410 uma_keg_t keg;
1411
1412 keg = (uma_keg_t)arg;
1413 mtx_lock(&keg->uk_lock);
1414 if (keg->uk_free != 0) {
1415 printf("Freed UMA keg was not empty (%d items). "
1416 " Lost %d pages of memory.\n",
1417 keg->uk_free, keg->uk_pages);
1418 }
1419 mtx_unlock(&keg->uk_lock);
1420
1421 if (keg->uk_flags & UMA_ZONE_HASH)
1422 hash_free(&keg->uk_hash);
1423
1424 mtx_destroy(&keg->uk_lock);
1425}
1426
1427/*
1428 * Zone header dtor.
1429 *
1430 * Arguments/Returns follow uma_dtor specifications
1431 * udata unused
1432 */
1433static void
1434zone_dtor(void *arg, int size, void *udata)
1435{
1436 uma_zone_t zone;
1437 uma_keg_t keg;
1438
1439 zone = (uma_zone_t)arg;
1440 keg = zone->uz_keg;
1441
1442 if (!(keg->uk_flags & UMA_ZFLAG_INTERNAL))
1443 cache_drain(zone);
1444
1445 mtx_lock(&uma_mtx);
1446 zone_drain(zone);
1447 if (keg->uk_flags & UMA_ZONE_SECONDARY) {
1448 LIST_REMOVE(zone, uz_link);
1449 /*
1450 * XXX there are some races here where
1451 * the zone can be drained but zone lock
1452 * released and then refilled before we
1453 * remove it... we dont care for now
1454 */
1455 ZONE_LOCK(zone);
1456 if (LIST_EMPTY(&keg->uk_zones))
1457 keg->uk_flags &= ~UMA_ZONE_SECONDARY;
1458 ZONE_UNLOCK(zone);
1459 mtx_unlock(&uma_mtx);
1460 } else {
1461 LIST_REMOVE(keg, uk_link);
1462 LIST_REMOVE(zone, uz_link);
1463 mtx_unlock(&uma_mtx);
1464 uma_zfree_internal(kegs, keg, NULL, SKIP_NONE);
1465 }
1466 zone->uz_keg = NULL;
1467}
1468
1469/*
1470 * Traverses every zone in the system and calls a callback
1471 *
1472 * Arguments:
1473 * zfunc A pointer to a function which accepts a zone
1474 * as an argument.
1475 *
1476 * Returns:
1477 * Nothing
1478 */
1479static void
1480zone_foreach(void (*zfunc)(uma_zone_t))
1481{
1482 uma_keg_t keg;
1483 uma_zone_t zone;
1484
1485 mtx_lock(&uma_mtx);
1486 LIST_FOREACH(keg, &uma_kegs, uk_link) {
1487 LIST_FOREACH(zone, &keg->uk_zones, uz_link)
1488 zfunc(zone);
1489 }
1490 mtx_unlock(&uma_mtx);
1491}
1492
1493/* Public functions */
1494/* See uma.h */
1495void
1496uma_startup(void *bootmem)
1497{
1498 struct uma_zctor_args args;
1499 uma_slab_t slab;
1500 u_int slabsize;
1501 u_int objsize, totsize, wsize;
1502 int i;
1503
1504#ifdef UMA_DEBUG
1505 printf("Creating uma keg headers zone and keg.\n");
1506#endif
1507 /*
1508 * The general UMA lock is a recursion-allowed lock because
1509 * there is a code path where, while we're still configured
1510 * to use startup_alloc() for backend page allocations, we
1511 * may end up in uma_reclaim() which calls zone_foreach(zone_drain),
1512 * which grabs uma_mtx, only to later call into startup_alloc()
1513 * because while freeing we needed to allocate a bucket. Since
1514 * startup_alloc() also takes uma_mtx, we need to be able to
1515 * recurse on it.
1516 */
1517 mtx_init(&uma_mtx, "UMA lock", NULL, MTX_DEF | MTX_RECURSE);
1518
1519 /*
1520 * Figure out the maximum number of items-per-slab we'll have if
1521 * we're using the OFFPAGE slab header to track free items, given
1522 * all possible object sizes and the maximum desired wastage
1523 * (UMA_MAX_WASTE).
1524 *
1525 * We iterate until we find an object size for
1526 * which the calculated wastage in zone_small_init() will be
1527 * enough to warrant OFFPAGE. Since wastedspace versus objsize
1528 * is an overall increasing see-saw function, we find the smallest
1529 * objsize such that the wastage is always acceptable for objects
1530 * with that objsize or smaller. Since a smaller objsize always
1531 * generates a larger possible uma_max_ipers, we use this computed
1532 * objsize to calculate the largest ipers possible. Since the
1533 * ipers calculated for OFFPAGE slab headers is always larger than
1534 * the ipers initially calculated in zone_small_init(), we use
1535 * the former's equation (UMA_SLAB_SIZE / keg->uk_rsize) to
1536 * obtain the maximum ipers possible for offpage slab headers.
1537 *
1538 * It should be noted that ipers versus objsize is an inversly
1539 * proportional function which drops off rather quickly so as
1540 * long as our UMA_MAX_WASTE is such that the objsize we calculate
1541 * falls into the portion of the inverse relation AFTER the steep
1542 * falloff, then uma_max_ipers shouldn't be too high (~10 on i386).
1543 *
1544 * Note that we have 8-bits (1 byte) to use as a freelist index
1545 * inside the actual slab header itself and this is enough to
1546 * accomodate us. In the worst case, a UMA_SMALLEST_UNIT sized
1547 * object with offpage slab header would have ipers =
1548 * UMA_SLAB_SIZE / UMA_SMALLEST_UNIT (currently = 256), which is
1549 * 1 greater than what our byte-integer freelist index can
1550 * accomodate, but we know that this situation never occurs as
1551 * for UMA_SMALLEST_UNIT-sized objects, we will never calculate
1552 * that we need to go to offpage slab headers. Or, if we do,
1553 * then we trap that condition below and panic in the INVARIANTS case.
1554 */
1555 wsize = UMA_SLAB_SIZE - sizeof(struct uma_slab) - UMA_MAX_WASTE;
1556 totsize = wsize;
1557 objsize = UMA_SMALLEST_UNIT;
1558 while (totsize >= wsize) {
1559 totsize = (UMA_SLAB_SIZE - sizeof(struct uma_slab)) /
1560 (objsize + UMA_FRITM_SZ);
1561 totsize *= (UMA_FRITM_SZ + objsize);
1562 objsize++;
1563 }
1564 if (objsize > UMA_SMALLEST_UNIT)
1565 objsize--;
1566 uma_max_ipers = UMA_SLAB_SIZE / objsize;
1567
1568 wsize = UMA_SLAB_SIZE - sizeof(struct uma_slab_refcnt) - UMA_MAX_WASTE;
1569 totsize = wsize;
1570 objsize = UMA_SMALLEST_UNIT;
1571 while (totsize >= wsize) {
1572 totsize = (UMA_SLAB_SIZE - sizeof(struct uma_slab_refcnt)) /
1573 (objsize + UMA_FRITMREF_SZ);
1574 totsize *= (UMA_FRITMREF_SZ + objsize);
1575 objsize++;
1576 }
1577 if (objsize > UMA_SMALLEST_UNIT)
1578 objsize--;
1579 uma_max_ipers_ref = UMA_SLAB_SIZE / objsize;
1580
1581 KASSERT((uma_max_ipers_ref <= 255) && (uma_max_ipers <= 255),
1582 ("uma_startup: calculated uma_max_ipers values too large!"));
1583
1584#ifdef UMA_DEBUG
1585 printf("Calculated uma_max_ipers (for OFFPAGE) is %d\n", uma_max_ipers);
1586 printf("Calculated uma_max_ipers_slab (for OFFPAGE) is %d\n",
1587 uma_max_ipers_ref);
1588#endif
1589
1590 /* "manually" create the initial zone */
1591 args.name = "UMA Kegs";
1592 args.size = sizeof(struct uma_keg);
1593 args.ctor = keg_ctor;
1594 args.dtor = keg_dtor;
1595 args.uminit = zero_init;
1596 args.fini = NULL;
1597 args.keg = &masterkeg;
1598 args.align = 32 - 1;
1599 args.flags = UMA_ZFLAG_INTERNAL;
1600 /* The initial zone has no Per cpu queues so it's smaller */
1601 zone_ctor(kegs, sizeof(struct uma_zone), &args, M_WAITOK);
1602
1603#ifdef UMA_DEBUG
1604 printf("Filling boot free list.\n");
1605#endif
1606 for (i = 0; i < UMA_BOOT_PAGES; i++) {
1607 slab = (uma_slab_t)((u_int8_t *)bootmem + (i * UMA_SLAB_SIZE));
1608 slab->us_data = (u_int8_t *)slab;
1609 slab->us_flags = UMA_SLAB_BOOT;
1610 LIST_INSERT_HEAD(&uma_boot_pages, slab, us_link);
1611 uma_boot_free++;
1612 }
1613
1614#ifdef UMA_DEBUG
1615 printf("Creating uma zone headers zone and keg.\n");
1616#endif
1617 args.name = "UMA Zones";
1618 args.size = sizeof(struct uma_zone) +
1619 (sizeof(struct uma_cache) * (mp_maxid + 1));
1620 args.ctor = zone_ctor;
1621 args.dtor = zone_dtor;
1622 args.uminit = zero_init;
1623 args.fini = NULL;
1624 args.keg = NULL;
1625 args.align = 32 - 1;
1626 args.flags = UMA_ZFLAG_INTERNAL;
1627 /* The initial zone has no Per cpu queues so it's smaller */
1628 zone_ctor(zones, sizeof(struct uma_zone), &args, M_WAITOK);
1629
1630#ifdef UMA_DEBUG
1631 printf("Initializing pcpu cache locks.\n");
1632#endif
1633 /* Initialize the pcpu cache lock set once and for all */
1634 for (i = 0; i <= mp_maxid; i++)
1635 CPU_LOCK_INIT(i);
1636
1637#ifdef UMA_DEBUG
1638 printf("Creating slab and hash zones.\n");
1639#endif
1640
1641 /*
1642 * This is the max number of free list items we'll have with
1643 * offpage slabs.
1644 */
1645 slabsize = uma_max_ipers * UMA_FRITM_SZ;
1646 slabsize += sizeof(struct uma_slab);
1647
1648 /* Now make a zone for slab headers */
1649 slabzone = uma_zcreate("UMA Slabs",
1650 slabsize,
1651 NULL, NULL, NULL, NULL,
1652 UMA_ALIGN_PTR, UMA_ZFLAG_INTERNAL);
1653
1654 /*
1655 * We also create a zone for the bigger slabs with reference
1656 * counts in them, to accomodate UMA_ZONE_REFCNT zones.
1657 */
1658 slabsize = uma_max_ipers_ref * UMA_FRITMREF_SZ;
1659 slabsize += sizeof(struct uma_slab_refcnt);
1660 slabrefzone = uma_zcreate("UMA RCntSlabs",
1661 slabsize,
1662 NULL, NULL, NULL, NULL,
1663 UMA_ALIGN_PTR,
1664 UMA_ZFLAG_INTERNAL);
1665
1666 hashzone = uma_zcreate("UMA Hash",
1667 sizeof(struct slabhead *) * UMA_HASH_SIZE_INIT,
1668 NULL, NULL, NULL, NULL,
1669 UMA_ALIGN_PTR, UMA_ZFLAG_INTERNAL);
1670
1671 bucket_init();
1672
1673#ifdef UMA_MD_SMALL_ALLOC
1674 booted = 1;
1675#endif
1676
1677#ifdef UMA_DEBUG
1678 printf("UMA startup complete.\n");
1679#endif
1680}
1681
1682/* see uma.h */
1683void
1684uma_startup2(void)
1685{
1686 booted = 1;
1687 bucket_enable();
1688#ifdef UMA_DEBUG
1689 printf("UMA startup2 complete.\n");
1690#endif
1691}
1692
1693/*
1694 * Initialize our callout handle
1695 *
1696 */
1697
1698static void
1699uma_startup3(void)
1700{
1701#ifdef UMA_DEBUG
1702 printf("Starting callout.\n");
1703#endif
1704 callout_init(&uma_callout, CALLOUT_MPSAFE);
1705 callout_reset(&uma_callout, UMA_TIMEOUT * hz, uma_timeout, NULL);
1706#ifdef UMA_DEBUG
1707 printf("UMA startup3 complete.\n");
1708#endif
1709}
1710
1711static uma_zone_t
1712uma_kcreate(uma_zone_t zone, size_t size, uma_init uminit, uma_fini fini,
1713 int align, u_int16_t flags)
1714{
1715 struct uma_kctor_args args;
1716
1717 args.size = size;
1718 args.uminit = uminit;
1719 args.fini = fini;
1720 args.align = align;
1721 args.flags = flags;
1722 args.zone = zone;
1723 return (uma_zalloc_internal(kegs, &args, M_WAITOK));
1724}
1725
1726/* See uma.h */
1727uma_zone_t
1728uma_zcreate(char *name, size_t size, uma_ctor ctor, uma_dtor dtor,
1729 uma_init uminit, uma_fini fini, int align, u_int16_t flags)
1730
1731{
1732 struct uma_zctor_args args;
1733
1734 /* This stuff is essential for the zone ctor */
1735 args.name = name;
1736 args.size = size;
1737 args.ctor = ctor;
1738 args.dtor = dtor;
1739 args.uminit = uminit;
1740 args.fini = fini;
1741 args.align = align;
1742 args.flags = flags;
1743 args.keg = NULL;
1744
1745 return (uma_zalloc_internal(zones, &args, M_WAITOK));
1746}
1747
1748/* See uma.h */
1749uma_zone_t
1750uma_zsecond_create(char *name, uma_ctor ctor, uma_dtor dtor,
1751 uma_init zinit, uma_fini zfini, uma_zone_t master)
1752{
1753 struct uma_zctor_args args;
1754
1755 args.name = name;
1756 args.size = master->uz_keg->uk_size;
1757 args.ctor = ctor;
1758 args.dtor = dtor;
1759 args.uminit = zinit;
1760 args.fini = zfini;
1761 args.align = master->uz_keg->uk_align;
1762 args.flags = master->uz_keg->uk_flags | UMA_ZONE_SECONDARY;
1763 args.keg = master->uz_keg;
1764
1765 return (uma_zalloc_internal(zones, &args, M_WAITOK));
1766}
1767
1768/* See uma.h */
1769void
1770uma_zdestroy(uma_zone_t zone)
1771{
1772 uma_zfree_internal(zones, zone, NULL, SKIP_NONE);
1773}
1774
1775/* See uma.h */
1776void *
1777uma_zalloc_arg(uma_zone_t zone, void *udata, int flags)
1778{
1779 void *item;
1780 uma_cache_t cache;
1781 uma_bucket_t bucket;
1782 int cpu;
1783 int badness;
1784
1785 /* This is the fast path allocation */
1786#ifdef UMA_DEBUG_ALLOC_1
1787 printf("Allocating one item from %s(%p)\n", zone->uz_name, zone);
1788#endif
1789 CTR3(KTR_UMA, "uma_zalloc_arg thread %x zone %s flags %d", curthread,
1790 zone->uz_name, flags);
1791
1792 if (!(flags & M_NOWAIT)) {
1793 KASSERT(curthread->td_intr_nesting_level == 0,
1794 ("malloc(M_WAITOK) in interrupt context"));
1795 if (nosleepwithlocks) {
1796#ifdef WITNESS
1797 badness = WITNESS_WARN(WARN_GIANTOK | WARN_SLEEPOK,
1798 NULL,
1799 "malloc(M_WAITOK) of \"%s\", forcing M_NOWAIT",
1800 zone->uz_name);
1801#else
1802 badness = 1;
1803#endif
1804 } else {
1805 badness = 0;
1806#ifdef WITNESS
1807 WITNESS_WARN(WARN_GIANTOK | WARN_SLEEPOK, NULL,
1808 "malloc(M_WAITOK) of \"%s\"", zone->uz_name);
1809#endif
1810 }
1811 if (badness) {
1812 flags &= ~M_WAITOK;
1813 flags |= M_NOWAIT;
1814 }
1815 }
1816
1817zalloc_restart:
1818 cpu = PCPU_GET(cpuid);
1819 CPU_LOCK(cpu);
1820 cache = &zone->uz_cpu[cpu];
1821
1822zalloc_start:
1823 bucket = cache->uc_allocbucket;
1824
1825 if (bucket) {
1826 if (bucket->ub_cnt > 0) {
1827 bucket->ub_cnt--;
1828 item = bucket->ub_bucket[bucket->ub_cnt];
1829#ifdef INVARIANTS
1830 bucket->ub_bucket[bucket->ub_cnt] = NULL;
1831#endif
1832 KASSERT(item != NULL,
1833 ("uma_zalloc: Bucket pointer mangled."));
1834 cache->uc_allocs++;
1835#ifdef INVARIANTS
1836 ZONE_LOCK(zone);
1837 uma_dbg_alloc(zone, NULL, item);
1838 ZONE_UNLOCK(zone);
1839#endif
1840 CPU_UNLOCK(cpu);
1841 if (zone->uz_ctor != NULL) {
1842 if (zone->uz_ctor(item, zone->uz_keg->uk_size,
1843 udata, flags) != 0) {
1844 uma_zfree_internal(zone, item, udata,
1845 SKIP_DTOR);
1846 return (NULL);
1847 }
1848 }
1849 if (flags & M_ZERO)
1850 bzero(item, zone->uz_keg->uk_size);
1851 return (item);
1852 } else if (cache->uc_freebucket) {
1853 /*
1854 * We have run out of items in our allocbucket.
1855 * See if we can switch with our free bucket.
1856 */
1857 if (cache->uc_freebucket->ub_cnt > 0) {
1858#ifdef UMA_DEBUG_ALLOC
1859 printf("uma_zalloc: Swapping empty with"
1860 " alloc.\n");
1861#endif
1862 bucket = cache->uc_freebucket;
1863 cache->uc_freebucket = cache->uc_allocbucket;
1864 cache->uc_allocbucket = bucket;
1865
1866 goto zalloc_start;
1867 }
1868 }
1869 }
1870 ZONE_LOCK(zone);
1871 /* Since we have locked the zone we may as well send back our stats */
1872 zone->uz_allocs += cache->uc_allocs;
1873 cache->uc_allocs = 0;
1874
1875 /* Our old one is now a free bucket */
1876 if (cache->uc_allocbucket) {
1877 KASSERT(cache->uc_allocbucket->ub_cnt == 0,
1878 ("uma_zalloc_arg: Freeing a non free bucket."));
1879 LIST_INSERT_HEAD(&zone->uz_free_bucket,
1880 cache->uc_allocbucket, ub_link);
1881 cache->uc_allocbucket = NULL;
1882 }
1883
1884 /* Check the free list for a new alloc bucket */
1885 if ((bucket = LIST_FIRST(&zone->uz_full_bucket)) != NULL) {
1886 KASSERT(bucket->ub_cnt != 0,
1887 ("uma_zalloc_arg: Returning an empty bucket."));
1888
1889 LIST_REMOVE(bucket, ub_link);
1890 cache->uc_allocbucket = bucket;
1891 ZONE_UNLOCK(zone);
1892 goto zalloc_start;
1893 }
1894 /* We are no longer associated with this cpu!!! */
1895 CPU_UNLOCK(cpu);
1896
1897 /* Bump up our uz_count so we get here less */
1898 if (zone->uz_count < BUCKET_MAX)
1899 zone->uz_count++;
1900
1901 /*
1902 * Now lets just fill a bucket and put it on the free list. If that
1903 * works we'll restart the allocation from the begining.
1904 */
1905 if (uma_zalloc_bucket(zone, flags)) {
1906 ZONE_UNLOCK(zone);
1907 goto zalloc_restart;
1908 }
1909 ZONE_UNLOCK(zone);
1910 /*
1911 * We may not be able to get a bucket so return an actual item.
1912 */
1913#ifdef UMA_DEBUG
1914 printf("uma_zalloc_arg: Bucketzone returned NULL\n");
1915#endif
1916
1917 return (uma_zalloc_internal(zone, udata, flags));
1918}
1919
1920static uma_slab_t
1921uma_zone_slab(uma_zone_t zone, int flags)
1922{
1923 uma_slab_t slab;
1924 uma_keg_t keg;
1925
1926 keg = zone->uz_keg;
1927
1928 /*
1929 * This is to prevent us from recursively trying to allocate
1930 * buckets. The problem is that if an allocation forces us to
1931 * grab a new bucket we will call page_alloc, which will go off
1932 * and cause the vm to allocate vm_map_entries. If we need new
1933 * buckets there too we will recurse in kmem_alloc and bad
1934 * things happen. So instead we return a NULL bucket, and make
1935 * the code that allocates buckets smart enough to deal with it
1936 *
1937 * XXX: While we want this protection for the bucket zones so that
1938 * recursion from the VM is handled (and the calling code that
1939 * allocates buckets knows how to deal with it), we do not want
1940 * to prevent allocation from the slab header zones (slabzone
1941 * and slabrefzone) if uk_recurse is not zero for them. The
1942 * reason is that it could lead to NULL being returned for
1943 * slab header allocations even in the M_WAITOK case, and the
1944 * caller can't handle that.
1945 */
1946 if (keg->uk_flags & UMA_ZFLAG_INTERNAL && keg->uk_recurse != 0)
1947 if ((zone != slabzone) && (zone != slabrefzone))
1948 return (NULL);
1949
1950 slab = NULL;
1951
1952 for (;;) {
1953 /*
1954 * Find a slab with some space. Prefer slabs that are partially
1955 * used over those that are totally full. This helps to reduce
1956 * fragmentation.
1957 */
1958 if (keg->uk_free != 0) {
1959 if (!LIST_EMPTY(&keg->uk_part_slab)) {
1960 slab = LIST_FIRST(&keg->uk_part_slab);
1961 } else {
1962 slab = LIST_FIRST(&keg->uk_free_slab);
1963 LIST_REMOVE(slab, us_link);
1964 LIST_INSERT_HEAD(&keg->uk_part_slab, slab,
1965 us_link);
1966 }
1967 return (slab);
1968 }
1969
1970 /*
1971 * M_NOVM means don't ask at all!
1972 */
1973 if (flags & M_NOVM)
1974 break;
1975
1976 if (keg->uk_maxpages &&
1977 keg->uk_pages >= keg->uk_maxpages) {
1978 keg->uk_flags |= UMA_ZFLAG_FULL;
1979
1980 if (flags & M_NOWAIT)
1981 break;
1982 else
1983 msleep(keg, &keg->uk_lock, PVM,
1984 "zonelimit", 0);
1985 continue;
1986 }
1987 keg->uk_recurse++;
1988 slab = slab_zalloc(zone, flags);
1989 keg->uk_recurse--;
1990
1991 /*
1992 * If we got a slab here it's safe to mark it partially used
1993 * and return. We assume that the caller is going to remove
1994 * at least one item.
1995 */
1996 if (slab) {
1997 LIST_INSERT_HEAD(&keg->uk_part_slab, slab, us_link);
1998 return (slab);
1999 }
2000 /*
2001 * We might not have been able to get a slab but another cpu
2002 * could have while we were unlocked. Check again before we
2003 * fail.
2004 */
2005 if (flags & M_NOWAIT)
2006 flags |= M_NOVM;
2007 }
2008 return (slab);
2009}
2010
2011static void *
2012uma_slab_alloc(uma_zone_t zone, uma_slab_t slab)
2013{
2014 uma_keg_t keg;
2015 uma_slabrefcnt_t slabref;
2016 void *item;
2017 u_int8_t freei;
2018
2019 keg = zone->uz_keg;
2020
2021 freei = slab->us_firstfree;
2022 if (keg->uk_flags & UMA_ZONE_REFCNT) {
2023 slabref = (uma_slabrefcnt_t)slab;
2024 slab->us_firstfree = slabref->us_freelist[freei].us_item;
2025 } else {
2026 slab->us_firstfree = slab->us_freelist[freei].us_item;
2027 }
2028 item = slab->us_data + (keg->uk_rsize * freei);
2029
2030 slab->us_freecount--;
2031 keg->uk_free--;
2032#ifdef INVARIANTS
2033 uma_dbg_alloc(zone, slab, item);
2034#endif
2035 /* Move this slab to the full list */
2036 if (slab->us_freecount == 0) {
2037 LIST_REMOVE(slab, us_link);
2038 LIST_INSERT_HEAD(&keg->uk_full_slab, slab, us_link);
2039 }
2040
2041 return (item);
2042}
2043
2044static int
2045uma_zalloc_bucket(uma_zone_t zone, int flags)
2046{
2047 uma_bucket_t bucket;
2048 uma_slab_t slab;
2049 int16_t saved;
2050 int max, origflags = flags;
2051
2052 /*
2053 * Try this zone's free list first so we don't allocate extra buckets.
2054 */
2055 if ((bucket = LIST_FIRST(&zone->uz_free_bucket)) != NULL) {
2056 KASSERT(bucket->ub_cnt == 0,
2057 ("uma_zalloc_bucket: Bucket on free list is not empty."));
2058 LIST_REMOVE(bucket, ub_link);
2059 } else {
2060 int bflags;
2061
2062 bflags = (flags & ~M_ZERO);
2063 if (zone->uz_keg->uk_flags & UMA_ZFLAG_CACHEONLY)
2064 bflags |= M_NOVM;
2065
2066 ZONE_UNLOCK(zone);
2067 bucket = bucket_alloc(zone->uz_count, bflags);
2068 ZONE_LOCK(zone);
2069 }
2070
2071 if (bucket == NULL)
2072 return (0);
2073
2074#ifdef SMP
2075 /*
2076 * This code is here to limit the number of simultaneous bucket fills
2077 * for any given zone to the number of per cpu caches in this zone. This
2078 * is done so that we don't allocate more memory than we really need.
2079 */
2080 if (zone->uz_fills >= mp_ncpus)
2081 goto done;
2082
2083#endif
2084 zone->uz_fills++;
2085
2086 max = MIN(bucket->ub_entries, zone->uz_count);
2087 /* Try to keep the buckets totally full */
2088 saved = bucket->ub_cnt;
2089 while (bucket->ub_cnt < max &&
2090 (slab = uma_zone_slab(zone, flags)) != NULL) {
2091 while (slab->us_freecount && bucket->ub_cnt < max) {
2092 bucket->ub_bucket[bucket->ub_cnt++] =
2093 uma_slab_alloc(zone, slab);
2094 }
2095
2096 /* Don't block on the next fill */
2097 flags |= M_NOWAIT;
2098 }
2099
2100 /*
2101 * We unlock here because we need to call the zone's init.
2102 * It should be safe to unlock because the slab dealt with
2103 * above is already on the appropriate list within the keg
2104 * and the bucket we filled is not yet on any list, so we
2105 * own it.
2106 */
2107 if (zone->uz_init != NULL) {
2108 int i;
2109
2110 ZONE_UNLOCK(zone);
2111 for (i = saved; i < bucket->ub_cnt; i++)
2112 if (zone->uz_init(bucket->ub_bucket[i],
2113 zone->uz_keg->uk_size, origflags) != 0)
2114 break;
2115 /*
2116 * If we couldn't initialize the whole bucket, put the
2117 * rest back onto the freelist.
2118 */
2119 if (i != bucket->ub_cnt) {
2120 int j;
2121
2122 for (j = i; j < bucket->ub_cnt; j++) {
2123 uma_zfree_internal(zone, bucket->ub_bucket[j],
2124 NULL, SKIP_FINI);
2125#ifdef INVARIANTS
2126 bucket->ub_bucket[j] = NULL;
2127#endif
2128 }
2129 bucket->ub_cnt = i;
2130 }
2131 ZONE_LOCK(zone);
2132 }
2133
2134 zone->uz_fills--;
2135 if (bucket->ub_cnt != 0) {
2136 LIST_INSERT_HEAD(&zone->uz_full_bucket,
2137 bucket, ub_link);
2138 return (1);
2139 }
2140#ifdef SMP
2141done:
2142#endif
2143 bucket_free(bucket);
2144
2145 return (0);
2146}
2147/*
2148 * Allocates an item for an internal zone
2149 *
2150 * Arguments
2151 * zone The zone to alloc for.
2152 * udata The data to be passed to the constructor.
2153 * flags M_WAITOK, M_NOWAIT, M_ZERO.
2154 *
2155 * Returns
2156 * NULL if there is no memory and M_NOWAIT is set
2157 * An item if successful
2158 */
2159
2160static void *
2161uma_zalloc_internal(uma_zone_t zone, void *udata, int flags)
2162{
2163 uma_keg_t keg;
2164 uma_slab_t slab;
2165 void *item;
2166
2167 item = NULL;
2168 keg = zone->uz_keg;
2169
2170#ifdef UMA_DEBUG_ALLOC
2171 printf("INTERNAL: Allocating one item from %s(%p)\n", zone->uz_name, zone);
2172#endif
2173 ZONE_LOCK(zone);
2174
2175 slab = uma_zone_slab(zone, flags);
2176 if (slab == NULL) {
2177 ZONE_UNLOCK(zone);
2178 return (NULL);
2179 }
2180
2181 item = uma_slab_alloc(zone, slab);
2182
2183 ZONE_UNLOCK(zone);
2184
2185 /*
2186 * We have to call both the zone's init (not the keg's init)
2187 * and the zone's ctor. This is because the item is going from
2188 * a keg slab directly to the user, and the user is expecting it
2189 * to be both zone-init'd as well as zone-ctor'd.
2190 */
2191 if (zone->uz_init != NULL) {
2192 if (zone->uz_init(item, keg->uk_size, flags) != 0) {
2193 uma_zfree_internal(zone, item, udata, SKIP_FINI);
2194 return (NULL);
2195 }
2196 }
2197 if (zone->uz_ctor != NULL) {
2198 if (zone->uz_ctor(item, keg->uk_size, udata, flags) != 0) {
2199 uma_zfree_internal(zone, item, udata, SKIP_DTOR);
2200 return (NULL);
2201 }
2202 }
2203 if (flags & M_ZERO)
2204 bzero(item, keg->uk_size);
2205
2206 return (item);
2207}
2208
2209/* See uma.h */
2210void
2211uma_zfree_arg(uma_zone_t zone, void *item, void *udata)
2212{
2213 uma_keg_t keg;
2214 uma_cache_t cache;
2215 uma_bucket_t bucket;
2216 int bflags;
2217 int cpu;
2218 enum zfreeskip skip;
2219
2220 /* This is the fast path free */
2221 skip = SKIP_NONE;
2222 keg = zone->uz_keg;
2223
2224#ifdef UMA_DEBUG_ALLOC_1
2225 printf("Freeing item %p to %s(%p)\n", item, zone->uz_name, zone);
2226#endif
2227 CTR2(KTR_UMA, "uma_zfree_arg thread %x zone %s", curthread,
2228 zone->uz_name);
2229
2230 /*
2231 * The race here is acceptable. If we miss it we'll just have to wait
2232 * a little longer for the limits to be reset.
2233 */
2234
2235 if (keg->uk_flags & UMA_ZFLAG_FULL)
2236 goto zfree_internal;
2237
2238 if (zone->uz_dtor) {
2239 zone->uz_dtor(item, keg->uk_size, udata);
2240 skip = SKIP_DTOR;
2241 }
2242
2243zfree_restart:
2244 cpu = PCPU_GET(cpuid);
2245 CPU_LOCK(cpu);
2246 cache = &zone->uz_cpu[cpu];
2247
2248zfree_start:
2249 bucket = cache->uc_freebucket;
2250
2251 if (bucket) {
2252 /*
2253 * Do we have room in our bucket? It is OK for this uz count
2254 * check to be slightly out of sync.
2255 */
2256
2257 if (bucket->ub_cnt < bucket->ub_entries) {
2258 KASSERT(bucket->ub_bucket[bucket->ub_cnt] == NULL,
2259 ("uma_zfree: Freeing to non free bucket index."));
2260 bucket->ub_bucket[bucket->ub_cnt] = item;
2261 bucket->ub_cnt++;
2262#ifdef INVARIANTS
2263 ZONE_LOCK(zone);
2264 if (keg->uk_flags & UMA_ZONE_MALLOC)
2265 uma_dbg_free(zone, udata, item);
2266 else
2267 uma_dbg_free(zone, NULL, item);
2268 ZONE_UNLOCK(zone);
2269#endif
2270 CPU_UNLOCK(cpu);
2271 return;
2272 } else if (cache->uc_allocbucket) {
2273#ifdef UMA_DEBUG_ALLOC
2274 printf("uma_zfree: Swapping buckets.\n");
2275#endif
2276 /*
2277 * We have run out of space in our freebucket.
2278 * See if we can switch with our alloc bucket.
2279 */
2280 if (cache->uc_allocbucket->ub_cnt <
2281 cache->uc_freebucket->ub_cnt) {
2282 bucket = cache->uc_freebucket;
2283 cache->uc_freebucket = cache->uc_allocbucket;
2284 cache->uc_allocbucket = bucket;
2285 goto zfree_start;
2286 }
2287 }
2288 }
2289 /*
2290 * We can get here for two reasons:
2291 *
2292 * 1) The buckets are NULL
2293 * 2) The alloc and free buckets are both somewhat full.
2294 */
2295
2296 ZONE_LOCK(zone);
2297
2298 bucket = cache->uc_freebucket;
2299 cache->uc_freebucket = NULL;
2300
2301 /* Can we throw this on the zone full list? */
2302 if (bucket != NULL) {
2303#ifdef UMA_DEBUG_ALLOC
2304 printf("uma_zfree: Putting old bucket on the free list.\n");
2305#endif
2306 /* ub_cnt is pointing to the last free item */
2307 KASSERT(bucket->ub_cnt != 0,
2308 ("uma_zfree: Attempting to insert an empty bucket onto the full list.\n"));
2309 LIST_INSERT_HEAD(&zone->uz_full_bucket,
2310 bucket, ub_link);
2311 }
2312 if ((bucket = LIST_FIRST(&zone->uz_free_bucket)) != NULL) {
2313 LIST_REMOVE(bucket, ub_link);
2314 ZONE_UNLOCK(zone);
2315 cache->uc_freebucket = bucket;
2316 goto zfree_start;
2317 }
2318 /* We're done with this CPU now */
2319 CPU_UNLOCK(cpu);
2320
2321 /* And the zone.. */
2322 ZONE_UNLOCK(zone);
2323
2324#ifdef UMA_DEBUG_ALLOC
2325 printf("uma_zfree: Allocating new free bucket.\n");
2326#endif
2327 bflags = M_NOWAIT;
2328
2329 if (keg->uk_flags & UMA_ZFLAG_CACHEONLY)
2330 bflags |= M_NOVM;
2331 bucket = bucket_alloc(zone->uz_count, bflags);
2332 if (bucket) {
2333 ZONE_LOCK(zone);
2334 LIST_INSERT_HEAD(&zone->uz_free_bucket,
2335 bucket, ub_link);
2336 ZONE_UNLOCK(zone);
2337 goto zfree_restart;
2338 }
2339
2340 /*
2341 * If nothing else caught this, we'll just do an internal free.
2342 */
2343
2344zfree_internal:
2345
2346#ifdef INVARIANTS
2347 /*
2348 * If we need to skip the dtor and the uma_dbg_free in
2349 * uma_zfree_internal because we've already called the dtor
2350 * above, but we ended up here, then we need to make sure
2351 * that we take care of the uma_dbg_free immediately.
2352 */
2353 if (skip) {
2354 ZONE_LOCK(zone);
2355 if (keg->uk_flags & UMA_ZONE_MALLOC)
2356 uma_dbg_free(zone, udata, item);
2357 else
2358 uma_dbg_free(zone, NULL, item);
2359 ZONE_UNLOCK(zone);
2360 }
2361#endif
2362 uma_zfree_internal(zone, item, udata, skip);
2363
2364 return;
2365}
2366
2367/*
2368 * Frees an item to an INTERNAL zone or allocates a free bucket
2369 *
2370 * Arguments:
2371 * zone The zone to free to
2372 * item The item we're freeing
2373 * udata User supplied data for the dtor
2374 * skip Skip dtors and finis
2375 */
2376static void
2377uma_zfree_internal(uma_zone_t zone, void *item, void *udata,
2378 enum zfreeskip skip)
2379{
2380 uma_slab_t slab;
2381 uma_slabrefcnt_t slabref;
2382 uma_keg_t keg;
2383 u_int8_t *mem;
2384 u_int8_t freei;
2385
2386 keg = zone->uz_keg;
2387
2388 if (skip < SKIP_DTOR && zone->uz_dtor)
2389 zone->uz_dtor(item, keg->uk_size, udata);
2390 if (skip < SKIP_FINI && zone->uz_fini)
2391 zone->uz_fini(item, keg->uk_size);
2392
2393 ZONE_LOCK(zone);
2394
2395 if (!(keg->uk_flags & UMA_ZONE_MALLOC)) {
2396 mem = (u_int8_t *)((unsigned long)item & (~UMA_SLAB_MASK));
2397 if (keg->uk_flags & UMA_ZONE_HASH)
2398 slab = hash_sfind(&keg->uk_hash, mem);
2399 else {
2400 mem += keg->uk_pgoff;
2401 slab = (uma_slab_t)mem;
2402 }
2403 } else {
2404 slab = (uma_slab_t)udata;
2405 }
2406
2407 /* Do we need to remove from any lists? */
2408 if (slab->us_freecount+1 == keg->uk_ipers) {
2409 LIST_REMOVE(slab, us_link);
2410 LIST_INSERT_HEAD(&keg->uk_free_slab, slab, us_link);
2411 } else if (slab->us_freecount == 0) {
2412 LIST_REMOVE(slab, us_link);
2413 LIST_INSERT_HEAD(&keg->uk_part_slab, slab, us_link);
2414 }
2415
2416 /* Slab management stuff */
2417 freei = ((unsigned long)item - (unsigned long)slab->us_data)
2418 / keg->uk_rsize;
2419
2420#ifdef INVARIANTS
2421 if (!skip)
2422 uma_dbg_free(zone, slab, item);
2423#endif
2424
2425 if (keg->uk_flags & UMA_ZONE_REFCNT) {
2426 slabref = (uma_slabrefcnt_t)slab;
2427 slabref->us_freelist[freei].us_item = slab->us_firstfree;
2428 } else {
2429 slab->us_freelist[freei].us_item = slab->us_firstfree;
2430 }
2431 slab->us_firstfree = freei;
2432 slab->us_freecount++;
2433
2434 /* Zone statistics */
2435 keg->uk_free++;
2436
2437 if (keg->uk_flags & UMA_ZFLAG_FULL) {
2438 if (keg->uk_pages < keg->uk_maxpages)
2439 keg->uk_flags &= ~UMA_ZFLAG_FULL;
2440
2441 /* We can handle one more allocation */
2442 wakeup_one(keg);
2443 }
2444
2445 ZONE_UNLOCK(zone);
2446}
2447
2448/* See uma.h */
2449void
2450uma_zone_set_max(uma_zone_t zone, int nitems)
2451{
2452 uma_keg_t keg;
2453
2454 keg = zone->uz_keg;
2455 ZONE_LOCK(zone);
2456 if (keg->uk_ppera > 1)
2457 keg->uk_maxpages = nitems * keg->uk_ppera;
2458 else
2459 keg->uk_maxpages = nitems / keg->uk_ipers;
2460
2461 if (keg->uk_maxpages * keg->uk_ipers < nitems)
2462 keg->uk_maxpages++;
2463
2464 ZONE_UNLOCK(zone);
2465}
2466
2467/* See uma.h */
2468void
2469uma_zone_set_init(uma_zone_t zone, uma_init uminit)
2470{
2471 ZONE_LOCK(zone);
2472 KASSERT(zone->uz_keg->uk_pages == 0,
2473 ("uma_zone_set_init on non-empty keg"));
2474 zone->uz_keg->uk_init = uminit;
2475 ZONE_UNLOCK(zone);
2476}
2477
2478/* See uma.h */
2479void
2480uma_zone_set_fini(uma_zone_t zone, uma_fini fini)
2481{
2482 ZONE_LOCK(zone);
2483 KASSERT(zone->uz_keg->uk_pages == 0,
2484 ("uma_zone_set_fini on non-empty keg"));
2485 zone->uz_keg->uk_fini = fini;
2486 ZONE_UNLOCK(zone);
2487}
2488
2489/* See uma.h */
2490void
2491uma_zone_set_zinit(uma_zone_t zone, uma_init zinit)
2492{
2493 ZONE_LOCK(zone);
2494 KASSERT(zone->uz_keg->uk_pages == 0,
2495 ("uma_zone_set_zinit on non-empty keg"));
2496 zone->uz_init = zinit;
2497 ZONE_UNLOCK(zone);
2498}
2499
2500/* See uma.h */
2501void
2502uma_zone_set_zfini(uma_zone_t zone, uma_fini zfini)
2503{
2504 ZONE_LOCK(zone);
2505 KASSERT(zone->uz_keg->uk_pages == 0,
2506 ("uma_zone_set_zfini on non-empty keg"));
2507 zone->uz_fini = zfini;
2508 ZONE_UNLOCK(zone);
2509}
2510
2511/* See uma.h */
2512/* XXX uk_freef is not actually used with the zone locked */
2513void
2514uma_zone_set_freef(uma_zone_t zone, uma_free freef)
2515{
2516 ZONE_LOCK(zone);
2517 zone->uz_keg->uk_freef = freef;
2518 ZONE_UNLOCK(zone);
2519}
2520
2521/* See uma.h */
2522/* XXX uk_allocf is not actually used with the zone locked */
2523void
2524uma_zone_set_allocf(uma_zone_t zone, uma_alloc allocf)
2525{
2526 ZONE_LOCK(zone);
2527 zone->uz_keg->uk_flags |= UMA_ZFLAG_PRIVALLOC;
2528 zone->uz_keg->uk_allocf = allocf;
2529 ZONE_UNLOCK(zone);
2530}
2531
2532/* See uma.h */
2533int
2534uma_zone_set_obj(uma_zone_t zone, struct vm_object *obj, int count)
2535{
2536 uma_keg_t keg;
2537 vm_offset_t kva;
2538 int pages;
2539
2540 keg = zone->uz_keg;
2541 pages = count / keg->uk_ipers;
2542
2543 if (pages * keg->uk_ipers < count)
2544 pages++;
2545
2546 kva = kmem_alloc_nofault(kernel_map, pages * UMA_SLAB_SIZE);
2547
2548 if (kva == 0)
2549 return (0);
2550 if (obj == NULL) {
2551 obj = vm_object_allocate(OBJT_DEFAULT,
2552 pages);
2553 } else {
2554 VM_OBJECT_LOCK_INIT(obj, "uma object");
2555 _vm_object_allocate(OBJT_DEFAULT,
2556 pages, obj);
2557 }
2558 ZONE_LOCK(zone);
2559 keg->uk_kva = kva;
2560 keg->uk_obj = obj;
2561 keg->uk_maxpages = pages;
2562 keg->uk_allocf = obj_alloc;
2563 keg->uk_flags |= UMA_ZONE_NOFREE | UMA_ZFLAG_PRIVALLOC;
2564 ZONE_UNLOCK(zone);
2565 return (1);
2566}
2567
2568/* See uma.h */
2569void
2570uma_prealloc(uma_zone_t zone, int items)
2571{
2572 int slabs;
2573 uma_slab_t slab;
2574 uma_keg_t keg;
2575
2576 keg = zone->uz_keg;
2577 ZONE_LOCK(zone);
2578 slabs = items / keg->uk_ipers;
2579 if (slabs * keg->uk_ipers < items)
2580 slabs++;
2581 while (slabs > 0) {
2582 slab = slab_zalloc(zone, M_WAITOK);
2583 LIST_INSERT_HEAD(&keg->uk_free_slab, slab, us_link);
2584 slabs--;
2585 }
2586 ZONE_UNLOCK(zone);
2587}
2588
2589/* See uma.h */
2590u_int32_t *
2591uma_find_refcnt(uma_zone_t zone, void *item)
2592{
2593 uma_slabrefcnt_t slabref;
2594 uma_keg_t keg;
2595 u_int32_t *refcnt;
2596 int idx;
2597
2598 keg = zone->uz_keg;
2599 slabref = (uma_slabrefcnt_t)vtoslab((vm_offset_t)item &
2600 (~UMA_SLAB_MASK));
2601 KASSERT(slabref != NULL && slabref->us_keg->uk_flags & UMA_ZONE_REFCNT,
2602 ("uma_find_refcnt(): zone possibly not UMA_ZONE_REFCNT"));
2603 idx = ((unsigned long)item - (unsigned long)slabref->us_data)
2604 / keg->uk_rsize;
2605 refcnt = &slabref->us_freelist[idx].us_refcnt;
2606 return refcnt;
2607}
2608
2609/* See uma.h */
2610void
2611uma_reclaim(void)
2612{
2613#ifdef UMA_DEBUG
2614 printf("UMA: vm asked us to release pages!\n");
2615#endif
2616 bucket_enable();
2617 zone_foreach(zone_drain);
2618 /*
2619 * Some slabs may have been freed but this zone will be visited early
2620 * we visit again so that we can free pages that are empty once other
2621 * zones are drained. We have to do the same for buckets.
2622 */
2623 zone_drain(slabzone);
2624 zone_drain(slabrefzone);
2625 bucket_zone_drain();
2626}
2627
2628void *
2629uma_large_malloc(int size, int wait)
2630{
2631 void *mem;
2632 uma_slab_t slab;
2633 u_int8_t flags;
2634
2635 slab = uma_zalloc_internal(slabzone, NULL, wait);
2636 if (slab == NULL)
2637 return (NULL);
2638 mem = page_alloc(NULL, size, &flags, wait);
2639 if (mem) {
2640 vsetslab((vm_offset_t)mem, slab);
2641 slab->us_data = mem;
2642 slab->us_flags = flags | UMA_SLAB_MALLOC;
2643 slab->us_size = size;
2644 } else {
2645 uma_zfree_internal(slabzone, slab, NULL, 0);
2646 }
2647
2648 return (mem);
2649}
2650
2651void
2652uma_large_free(uma_slab_t slab)
2653{
2654 vsetobj((vm_offset_t)slab->us_data, kmem_object);
2655 page_free(slab->us_data, slab->us_size, slab->us_flags);
2656 uma_zfree_internal(slabzone, slab, NULL, 0);
2657}
2658
2659void
2660uma_print_stats(void)
2661{
2662 zone_foreach(uma_print_zone);
2663}
2664
2665static void
2666slab_print(uma_slab_t slab)
2667{
2668 printf("slab: keg %p, data %p, freecount %d, firstfree %d\n",
2669 slab->us_keg, slab->us_data, slab->us_freecount,
2670 slab->us_firstfree);
2671}
2672
2673static void
2674cache_print(uma_cache_t cache)
2675{
2676 printf("alloc: %p(%d), free: %p(%d)\n",
2677 cache->uc_allocbucket,
2678 cache->uc_allocbucket?cache->uc_allocbucket->ub_cnt:0,
2679 cache->uc_freebucket,
2680 cache->uc_freebucket?cache->uc_freebucket->ub_cnt:0);
2681}
2682
2683void
2684uma_print_zone(uma_zone_t zone)
2685{
2686 uma_cache_t cache;
2687 uma_keg_t keg;
2688 uma_slab_t slab;
2689 int i;
2690
2691 keg = zone->uz_keg;
2692 printf("%s(%p) size %d(%d) flags %d ipers %d ppera %d out %d free %d\n",
2693 zone->uz_name, zone, keg->uk_size, keg->uk_rsize, keg->uk_flags,
2694 keg->uk_ipers, keg->uk_ppera,
2695 (keg->uk_ipers * keg->uk_pages) - keg->uk_free, keg->uk_free);
2696 printf("Part slabs:\n");
2697 LIST_FOREACH(slab, &keg->uk_part_slab, us_link)
2698 slab_print(slab);
2699 printf("Free slabs:\n");
2700 LIST_FOREACH(slab, &keg->uk_free_slab, us_link)
2701 slab_print(slab);
2702 printf("Full slabs:\n");
2703 LIST_FOREACH(slab, &keg->uk_full_slab, us_link)
2704 slab_print(slab);
2705 for (i = 0; i <= mp_maxid; i++) {
2706 if (CPU_ABSENT(i))
2707 continue;
2708 cache = &zone->uz_cpu[i];
2709 printf("CPU %d Cache:\n", i);
2710 cache_print(cache);
2711 }
2712}
2713
2714/*
2715 * Sysctl handler for vm.zone
2716 *
2717 * stolen from vm_zone.c
2718 */
2719static int
2720sysctl_vm_zone(SYSCTL_HANDLER_ARGS)
2721{
2722 int error, len, cnt;
2723 const int linesize = 128; /* conservative */
2724 int totalfree;
2725 char *tmpbuf, *offset;
2726 uma_zone_t z;
2727 uma_keg_t zk;
2728 char *p;
2729 int cpu;
2730 int cachefree;
2731 uma_bucket_t bucket;
2732 uma_cache_t cache;
2733
2734 cnt = 0;
2735 mtx_lock(&uma_mtx);
2736 LIST_FOREACH(zk, &uma_kegs, uk_link) {
2737 LIST_FOREACH(z, &zk->uk_zones, uz_link)
2738 cnt++;
2739 }
2740 mtx_unlock(&uma_mtx);
2741 MALLOC(tmpbuf, char *, (cnt == 0 ? 1 : cnt) * linesize,
2742 M_TEMP, M_WAITOK);
2743 len = snprintf(tmpbuf, linesize,
2744 "\nITEM SIZE LIMIT USED FREE REQUESTS\n\n");
2745 if (cnt == 0)
2746 tmpbuf[len - 1] = '\0';
2747 error = SYSCTL_OUT(req, tmpbuf, cnt == 0 ? len-1 : len);
2748 if (error || cnt == 0)
2749 goto out;
2750 offset = tmpbuf;
2751 mtx_lock(&uma_mtx);
2752 LIST_FOREACH(zk, &uma_kegs, uk_link) {
2753 LIST_FOREACH(z, &zk->uk_zones, uz_link) {
2754 if (cnt == 0) /* list may have changed size */
2755 break;
2756 if (!(zk->uk_flags & UMA_ZFLAG_INTERNAL)) {
2757 for (cpu = 0; cpu <= mp_maxid; cpu++) {
2758 if (CPU_ABSENT(cpu))
2759 continue;
2760 CPU_LOCK(cpu);
2761 }
2762 }
2763 ZONE_LOCK(z);
2764 cachefree = 0;
2765 if (!(zk->uk_flags & UMA_ZFLAG_INTERNAL)) {
2766 for (cpu = 0; cpu <= mp_maxid; cpu++) {
2767 if (CPU_ABSENT(cpu))
2768 continue;
2769 cache = &z->uz_cpu[cpu];
2770 if (cache->uc_allocbucket != NULL)
2771 cachefree += cache->uc_allocbucket->ub_cnt;
2772 if (cache->uc_freebucket != NULL)
2773 cachefree += cache->uc_freebucket->ub_cnt;
2774 CPU_UNLOCK(cpu);
2775 }
2776 }
2777 LIST_FOREACH(bucket, &z->uz_full_bucket, ub_link) {
2778 cachefree += bucket->ub_cnt;
2779 }
2780 totalfree = zk->uk_free + cachefree;
2781 len = snprintf(offset, linesize,
2782 "%-12.12s %6.6u, %8.8u, %6.6u, %6.6u, %8.8llu\n",
2783 z->uz_name, zk->uk_size,
2784 zk->uk_maxpages * zk->uk_ipers,
2785 (zk->uk_ipers * (zk->uk_pages / zk->uk_ppera)) - totalfree,
2786 totalfree,
2787 (unsigned long long)z->uz_allocs);
2788 ZONE_UNLOCK(z);
2789 for (p = offset + 12; p > offset && *p == ' '; --p)
2790 /* nothing */ ;
2791 p[1] = ':';
2792 cnt--;
2793 offset += len;
2794 }
2795 }
2796 mtx_unlock(&uma_mtx);
2797 *offset++ = '\0';
2798 error = SYSCTL_OUT(req, tmpbuf, offset - tmpbuf);
2799out:
2800 FREE(tmpbuf, M_TEMP);
2801 return (error);
2802}