Deleted Added
full compact
uma_core.c (251983) uma_core.c (252040)
1/*-
2 * Copyright (c) 2002-2005, 2009, 2013 Jeffrey Roberson <jeff@FreeBSD.org>
3 * Copyright (c) 2004, 2005 Bosko Milekic <bmilekic@FreeBSD.org>
4 * Copyright (c) 2004-2006 Robert N. M. Watson
5 * All rights reserved.
6 *
7 * Redistribution and use in source and binary forms, with or without
8 * modification, are permitted provided that the following conditions
9 * are met:
10 * 1. Redistributions of source code must retain the above copyright
11 * notice unmodified, this list of conditions, and the following
12 * disclaimer.
13 * 2. Redistributions in binary form must reproduce the above copyright
14 * notice, this list of conditions and the following disclaimer in the
15 * documentation and/or other materials provided with the distribution.
16 *
17 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
18 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
19 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
20 * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
21 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
22 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
23 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
24 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
25 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
26 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
27 */
28
29/*
30 * uma_core.c Implementation of the Universal Memory allocator
31 *
32 * This allocator is intended to replace the multitude of similar object caches
33 * in the standard FreeBSD kernel. The intent is to be flexible as well as
34 * effecient. A primary design goal is to return unused memory to the rest of
35 * the system. This will make the system as a whole more flexible due to the
36 * ability to move memory to subsystems which most need it instead of leaving
37 * pools of reserved memory unused.
38 *
39 * The basic ideas stem from similar slab/zone based allocators whose algorithms
40 * are well known.
41 *
42 */
43
44/*
45 * TODO:
46 * - Improve memory usage for large allocations
47 * - Investigate cache size adjustments
48 */
49
50#include <sys/cdefs.h>
1/*-
2 * Copyright (c) 2002-2005, 2009, 2013 Jeffrey Roberson <jeff@FreeBSD.org>
3 * Copyright (c) 2004, 2005 Bosko Milekic <bmilekic@FreeBSD.org>
4 * Copyright (c) 2004-2006 Robert N. M. Watson
5 * All rights reserved.
6 *
7 * Redistribution and use in source and binary forms, with or without
8 * modification, are permitted provided that the following conditions
9 * are met:
10 * 1. Redistributions of source code must retain the above copyright
11 * notice unmodified, this list of conditions, and the following
12 * disclaimer.
13 * 2. Redistributions in binary form must reproduce the above copyright
14 * notice, this list of conditions and the following disclaimer in the
15 * documentation and/or other materials provided with the distribution.
16 *
17 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
18 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
19 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
20 * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
21 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
22 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
23 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
24 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
25 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
26 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
27 */
28
29/*
30 * uma_core.c Implementation of the Universal Memory allocator
31 *
32 * This allocator is intended to replace the multitude of similar object caches
33 * in the standard FreeBSD kernel. The intent is to be flexible as well as
34 * effecient. A primary design goal is to return unused memory to the rest of
35 * the system. This will make the system as a whole more flexible due to the
36 * ability to move memory to subsystems which most need it instead of leaving
37 * pools of reserved memory unused.
38 *
39 * The basic ideas stem from similar slab/zone based allocators whose algorithms
40 * are well known.
41 *
42 */
43
44/*
45 * TODO:
46 * - Improve memory usage for large allocations
47 * - Investigate cache size adjustments
48 */
49
50#include <sys/cdefs.h>
51__FBSDID("$FreeBSD: head/sys/vm/uma_core.c 251983 2013-06-19 02:30:32Z jeff $");
51__FBSDID("$FreeBSD: head/sys/vm/uma_core.c 252040 2013-06-20 19:08:12Z jeff $");
52
53/* I should really use ktr.. */
54/*
55#define UMA_DEBUG 1
56#define UMA_DEBUG_ALLOC 1
57#define UMA_DEBUG_ALLOC_1 1
58*/
59
60#include "opt_ddb.h"
61#include "opt_param.h"
62#include "opt_vm.h"
63
64#include <sys/param.h>
65#include <sys/systm.h>
66#include <sys/bitset.h>
67#include <sys/kernel.h>
68#include <sys/types.h>
69#include <sys/queue.h>
70#include <sys/malloc.h>
71#include <sys/ktr.h>
72#include <sys/lock.h>
73#include <sys/sysctl.h>
74#include <sys/mutex.h>
75#include <sys/proc.h>
76#include <sys/rwlock.h>
77#include <sys/sbuf.h>
78#include <sys/smp.h>
79#include <sys/vmmeter.h>
80
81#include <vm/vm.h>
82#include <vm/vm_object.h>
83#include <vm/vm_page.h>
84#include <vm/vm_pageout.h>
85#include <vm/vm_param.h>
86#include <vm/vm_map.h>
87#include <vm/vm_kern.h>
88#include <vm/vm_extern.h>
89#include <vm/uma.h>
90#include <vm/uma_int.h>
91#include <vm/uma_dbg.h>
92
93#include <ddb/ddb.h>
94
95#ifdef DEBUG_MEMGUARD
96#include <vm/memguard.h>
97#endif
98
99/*
100 * This is the zone and keg from which all zones are spawned. The idea is that
101 * even the zone & keg heads are allocated from the allocator, so we use the
102 * bss section to bootstrap us.
103 */
104static struct uma_keg masterkeg;
105static struct uma_zone masterzone_k;
106static struct uma_zone masterzone_z;
107static uma_zone_t kegs = &masterzone_k;
108static uma_zone_t zones = &masterzone_z;
109
110/* This is the zone from which all of uma_slab_t's are allocated. */
111static uma_zone_t slabzone;
112static uma_zone_t slabrefzone; /* With refcounters (for UMA_ZONE_REFCNT) */
113
114/*
115 * The initial hash tables come out of this zone so they can be allocated
116 * prior to malloc coming up.
117 */
118static uma_zone_t hashzone;
119
120/* The boot-time adjusted value for cache line alignment. */
121int uma_align_cache = 64 - 1;
122
123static MALLOC_DEFINE(M_UMAHASH, "UMAHash", "UMA Hash Buckets");
124
125/*
126 * Are we allowed to allocate buckets?
127 */
128static int bucketdisable = 1;
129
130/* Linked list of all kegs in the system */
131static LIST_HEAD(,uma_keg) uma_kegs = LIST_HEAD_INITIALIZER(uma_kegs);
132
133/* This mutex protects the keg list */
134static struct mtx_padalign uma_mtx;
135
136/* Linked list of boot time pages */
137static LIST_HEAD(,uma_slab) uma_boot_pages =
138 LIST_HEAD_INITIALIZER(uma_boot_pages);
139
140/* This mutex protects the boot time pages list */
141static struct mtx_padalign uma_boot_pages_mtx;
142
143/* Is the VM done starting up? */
144static int booted = 0;
145#define UMA_STARTUP 1
146#define UMA_STARTUP2 2
147
148/* Maximum number of allowed items-per-slab if the slab header is OFFPAGE */
149static const u_int uma_max_ipers = SLAB_SETSIZE;
150
151/*
152 * Only mbuf clusters use ref zones. Just provide enough references
153 * to support the one user. New code should not use the ref facility.
154 */
155static const u_int uma_max_ipers_ref = PAGE_SIZE / MCLBYTES;
156
157/*
158 * This is the handle used to schedule events that need to happen
159 * outside of the allocation fast path.
160 */
161static struct callout uma_callout;
162#define UMA_TIMEOUT 20 /* Seconds for callout interval. */
163
164/*
165 * This structure is passed as the zone ctor arg so that I don't have to create
166 * a special allocation function just for zones.
167 */
168struct uma_zctor_args {
169 const char *name;
170 size_t size;
171 uma_ctor ctor;
172 uma_dtor dtor;
173 uma_init uminit;
174 uma_fini fini;
175 uma_import import;
176 uma_release release;
177 void *arg;
178 uma_keg_t keg;
179 int align;
180 uint32_t flags;
181};
182
183struct uma_kctor_args {
184 uma_zone_t zone;
185 size_t size;
186 uma_init uminit;
187 uma_fini fini;
188 int align;
189 uint32_t flags;
190};
191
192struct uma_bucket_zone {
193 uma_zone_t ubz_zone;
194 char *ubz_name;
195 int ubz_entries; /* Number of items it can hold. */
196 int ubz_maxsize; /* Maximum allocation size per-item. */
197};
198
199/*
200 * Compute the actual number of bucket entries to pack them in power
201 * of two sizes for more efficient space utilization.
202 */
203#define BUCKET_SIZE(n) \
204 (((sizeof(void *) * (n)) - sizeof(struct uma_bucket)) / sizeof(void *))
205
206#define BUCKET_MAX BUCKET_SIZE(128)
207
208struct uma_bucket_zone bucket_zones[] = {
209 { NULL, "32 Bucket", BUCKET_SIZE(32), 512 },
210 { NULL, "64 Bucket", BUCKET_SIZE(64), 256 },
211 { NULL, "128 Bucket", BUCKET_SIZE(128), 128 },
212 { NULL, NULL, 0}
213};
214static uma_zone_t largebucket;
215
216/*
217 * Flags and enumerations to be passed to internal functions.
218 */
219enum zfreeskip { SKIP_NONE = 0, SKIP_DTOR, SKIP_FINI };
220
221/* Prototypes.. */
222
223static void *noobj_alloc(uma_zone_t, int, uint8_t *, int);
224static void *page_alloc(uma_zone_t, int, uint8_t *, int);
225static void *startup_alloc(uma_zone_t, int, uint8_t *, int);
226static void page_free(void *, int, uint8_t);
227static uma_slab_t keg_alloc_slab(uma_keg_t, uma_zone_t, int);
228static void cache_drain(uma_zone_t);
229static void bucket_drain(uma_zone_t, uma_bucket_t);
230static void bucket_cache_drain(uma_zone_t zone);
231static int keg_ctor(void *, int, void *, int);
232static void keg_dtor(void *, int, void *);
233static int zone_ctor(void *, int, void *, int);
234static void zone_dtor(void *, int, void *);
235static int zero_init(void *, int, int);
236static void keg_small_init(uma_keg_t keg);
237static void keg_large_init(uma_keg_t keg);
238static void zone_foreach(void (*zfunc)(uma_zone_t));
239static void zone_timeout(uma_zone_t zone);
240static int hash_alloc(struct uma_hash *);
241static int hash_expand(struct uma_hash *, struct uma_hash *);
242static void hash_free(struct uma_hash *hash);
243static void uma_timeout(void *);
244static void uma_startup3(void);
245static void *zone_alloc_item(uma_zone_t, void *, int);
246static void zone_free_item(uma_zone_t, void *, void *, enum zfreeskip);
247static void bucket_enable(void);
248static void bucket_init(void);
52
53/* I should really use ktr.. */
54/*
55#define UMA_DEBUG 1
56#define UMA_DEBUG_ALLOC 1
57#define UMA_DEBUG_ALLOC_1 1
58*/
59
60#include "opt_ddb.h"
61#include "opt_param.h"
62#include "opt_vm.h"
63
64#include <sys/param.h>
65#include <sys/systm.h>
66#include <sys/bitset.h>
67#include <sys/kernel.h>
68#include <sys/types.h>
69#include <sys/queue.h>
70#include <sys/malloc.h>
71#include <sys/ktr.h>
72#include <sys/lock.h>
73#include <sys/sysctl.h>
74#include <sys/mutex.h>
75#include <sys/proc.h>
76#include <sys/rwlock.h>
77#include <sys/sbuf.h>
78#include <sys/smp.h>
79#include <sys/vmmeter.h>
80
81#include <vm/vm.h>
82#include <vm/vm_object.h>
83#include <vm/vm_page.h>
84#include <vm/vm_pageout.h>
85#include <vm/vm_param.h>
86#include <vm/vm_map.h>
87#include <vm/vm_kern.h>
88#include <vm/vm_extern.h>
89#include <vm/uma.h>
90#include <vm/uma_int.h>
91#include <vm/uma_dbg.h>
92
93#include <ddb/ddb.h>
94
95#ifdef DEBUG_MEMGUARD
96#include <vm/memguard.h>
97#endif
98
99/*
100 * This is the zone and keg from which all zones are spawned. The idea is that
101 * even the zone & keg heads are allocated from the allocator, so we use the
102 * bss section to bootstrap us.
103 */
104static struct uma_keg masterkeg;
105static struct uma_zone masterzone_k;
106static struct uma_zone masterzone_z;
107static uma_zone_t kegs = &masterzone_k;
108static uma_zone_t zones = &masterzone_z;
109
110/* This is the zone from which all of uma_slab_t's are allocated. */
111static uma_zone_t slabzone;
112static uma_zone_t slabrefzone; /* With refcounters (for UMA_ZONE_REFCNT) */
113
114/*
115 * The initial hash tables come out of this zone so they can be allocated
116 * prior to malloc coming up.
117 */
118static uma_zone_t hashzone;
119
120/* The boot-time adjusted value for cache line alignment. */
121int uma_align_cache = 64 - 1;
122
123static MALLOC_DEFINE(M_UMAHASH, "UMAHash", "UMA Hash Buckets");
124
125/*
126 * Are we allowed to allocate buckets?
127 */
128static int bucketdisable = 1;
129
130/* Linked list of all kegs in the system */
131static LIST_HEAD(,uma_keg) uma_kegs = LIST_HEAD_INITIALIZER(uma_kegs);
132
133/* This mutex protects the keg list */
134static struct mtx_padalign uma_mtx;
135
136/* Linked list of boot time pages */
137static LIST_HEAD(,uma_slab) uma_boot_pages =
138 LIST_HEAD_INITIALIZER(uma_boot_pages);
139
140/* This mutex protects the boot time pages list */
141static struct mtx_padalign uma_boot_pages_mtx;
142
143/* Is the VM done starting up? */
144static int booted = 0;
145#define UMA_STARTUP 1
146#define UMA_STARTUP2 2
147
148/* Maximum number of allowed items-per-slab if the slab header is OFFPAGE */
149static const u_int uma_max_ipers = SLAB_SETSIZE;
150
151/*
152 * Only mbuf clusters use ref zones. Just provide enough references
153 * to support the one user. New code should not use the ref facility.
154 */
155static const u_int uma_max_ipers_ref = PAGE_SIZE / MCLBYTES;
156
157/*
158 * This is the handle used to schedule events that need to happen
159 * outside of the allocation fast path.
160 */
161static struct callout uma_callout;
162#define UMA_TIMEOUT 20 /* Seconds for callout interval. */
163
164/*
165 * This structure is passed as the zone ctor arg so that I don't have to create
166 * a special allocation function just for zones.
167 */
168struct uma_zctor_args {
169 const char *name;
170 size_t size;
171 uma_ctor ctor;
172 uma_dtor dtor;
173 uma_init uminit;
174 uma_fini fini;
175 uma_import import;
176 uma_release release;
177 void *arg;
178 uma_keg_t keg;
179 int align;
180 uint32_t flags;
181};
182
183struct uma_kctor_args {
184 uma_zone_t zone;
185 size_t size;
186 uma_init uminit;
187 uma_fini fini;
188 int align;
189 uint32_t flags;
190};
191
192struct uma_bucket_zone {
193 uma_zone_t ubz_zone;
194 char *ubz_name;
195 int ubz_entries; /* Number of items it can hold. */
196 int ubz_maxsize; /* Maximum allocation size per-item. */
197};
198
199/*
200 * Compute the actual number of bucket entries to pack them in power
201 * of two sizes for more efficient space utilization.
202 */
203#define BUCKET_SIZE(n) \
204 (((sizeof(void *) * (n)) - sizeof(struct uma_bucket)) / sizeof(void *))
205
206#define BUCKET_MAX BUCKET_SIZE(128)
207
208struct uma_bucket_zone bucket_zones[] = {
209 { NULL, "32 Bucket", BUCKET_SIZE(32), 512 },
210 { NULL, "64 Bucket", BUCKET_SIZE(64), 256 },
211 { NULL, "128 Bucket", BUCKET_SIZE(128), 128 },
212 { NULL, NULL, 0}
213};
214static uma_zone_t largebucket;
215
216/*
217 * Flags and enumerations to be passed to internal functions.
218 */
219enum zfreeskip { SKIP_NONE = 0, SKIP_DTOR, SKIP_FINI };
220
221/* Prototypes.. */
222
223static void *noobj_alloc(uma_zone_t, int, uint8_t *, int);
224static void *page_alloc(uma_zone_t, int, uint8_t *, int);
225static void *startup_alloc(uma_zone_t, int, uint8_t *, int);
226static void page_free(void *, int, uint8_t);
227static uma_slab_t keg_alloc_slab(uma_keg_t, uma_zone_t, int);
228static void cache_drain(uma_zone_t);
229static void bucket_drain(uma_zone_t, uma_bucket_t);
230static void bucket_cache_drain(uma_zone_t zone);
231static int keg_ctor(void *, int, void *, int);
232static void keg_dtor(void *, int, void *);
233static int zone_ctor(void *, int, void *, int);
234static void zone_dtor(void *, int, void *);
235static int zero_init(void *, int, int);
236static void keg_small_init(uma_keg_t keg);
237static void keg_large_init(uma_keg_t keg);
238static void zone_foreach(void (*zfunc)(uma_zone_t));
239static void zone_timeout(uma_zone_t zone);
240static int hash_alloc(struct uma_hash *);
241static int hash_expand(struct uma_hash *, struct uma_hash *);
242static void hash_free(struct uma_hash *hash);
243static void uma_timeout(void *);
244static void uma_startup3(void);
245static void *zone_alloc_item(uma_zone_t, void *, int);
246static void zone_free_item(uma_zone_t, void *, void *, enum zfreeskip);
247static void bucket_enable(void);
248static void bucket_init(void);
249static uma_bucket_t bucket_alloc(int, int);
250static void bucket_free(uma_bucket_t);
249static uma_bucket_t bucket_alloc(uma_zone_t zone, int);
250static void bucket_free(uma_zone_t zone, uma_bucket_t);
251static void bucket_zone_drain(void);
252static uma_bucket_t zone_alloc_bucket(uma_zone_t zone, int flags);
253static uma_slab_t zone_fetch_slab(uma_zone_t zone, uma_keg_t last, int flags);
254static uma_slab_t zone_fetch_slab_multi(uma_zone_t zone, uma_keg_t last, int flags);
255static void *slab_alloc_item(uma_keg_t keg, uma_slab_t slab);
256static void slab_free_item(uma_keg_t keg, uma_slab_t slab, void *item);
257static uma_keg_t uma_kcreate(uma_zone_t zone, size_t size, uma_init uminit,
258 uma_fini fini, int align, uint32_t flags);
251static void bucket_zone_drain(void);
252static uma_bucket_t zone_alloc_bucket(uma_zone_t zone, int flags);
253static uma_slab_t zone_fetch_slab(uma_zone_t zone, uma_keg_t last, int flags);
254static uma_slab_t zone_fetch_slab_multi(uma_zone_t zone, uma_keg_t last, int flags);
255static void *slab_alloc_item(uma_keg_t keg, uma_slab_t slab);
256static void slab_free_item(uma_keg_t keg, uma_slab_t slab, void *item);
257static uma_keg_t uma_kcreate(uma_zone_t zone, size_t size, uma_init uminit,
258 uma_fini fini, int align, uint32_t flags);
259static inline void zone_relock(uma_zone_t zone, uma_keg_t keg);
260static inline void keg_relock(uma_keg_t keg, uma_zone_t zone);
261static int zone_import(uma_zone_t zone, void **bucket, int max, int flags);
262static void zone_release(uma_zone_t zone, void **bucket, int cnt);
263
264void uma_print_zone(uma_zone_t);
265void uma_print_stats(void);
266static int sysctl_vm_zone_count(SYSCTL_HANDLER_ARGS);
267static int sysctl_vm_zone_stats(SYSCTL_HANDLER_ARGS);
268
269SYSINIT(uma_startup3, SI_SUB_VM_CONF, SI_ORDER_SECOND, uma_startup3, NULL);
270
271SYSCTL_PROC(_vm, OID_AUTO, zone_count, CTLFLAG_RD|CTLTYPE_INT,
272 0, 0, sysctl_vm_zone_count, "I", "Number of UMA zones");
273
274SYSCTL_PROC(_vm, OID_AUTO, zone_stats, CTLFLAG_RD|CTLTYPE_STRUCT,
275 0, 0, sysctl_vm_zone_stats, "s,struct uma_type_header", "Zone Stats");
276
277static int zone_warnings = 1;
278TUNABLE_INT("vm.zone_warnings", &zone_warnings);
279SYSCTL_INT(_vm, OID_AUTO, zone_warnings, CTLFLAG_RW, &zone_warnings, 0,
280 "Warn when UMA zones becomes full");
281
282/*
283 * This routine checks to see whether or not it's safe to enable buckets.
284 */
285static void
286bucket_enable(void)
287{
288 bucketdisable = vm_page_count_min();
289}
290
291/*
292 * Initialize bucket_zones, the array of zones of buckets of various sizes.
293 *
294 * For each zone, calculate the memory required for each bucket, consisting
295 * of the header and an array of pointers.
296 */
297static void
298bucket_init(void)
299{
300 struct uma_bucket_zone *ubz;
301 int size;
302 int i;
303
304 for (i = 0, ubz = &bucket_zones[0]; ubz->ubz_entries != 0; ubz++) {
305 size = roundup(sizeof(struct uma_bucket), sizeof(void *));
306 size += sizeof(void *) * ubz->ubz_entries;
307 ubz->ubz_zone = uma_zcreate(ubz->ubz_name, size,
308 NULL, NULL, NULL, NULL, UMA_ALIGN_PTR,
309 UMA_ZONE_MAXBUCKET | UMA_ZONE_MTXCLASS);
310 }
311 /*
312 * To avoid recursive bucket allocation loops we disable buckets
313 * on the smallest bucket zone and use it for the largest zone.
314 * The remainder of the zones all use the largest zone.
315 */
316 ubz--;
317 ubz->ubz_zone->uz_count = bucket_zones[0].ubz_entries;
318 bucket_zones[0].ubz_zone->uz_count = 0;
319 largebucket = ubz->ubz_zone;
320}
321
322/*
323 * Given a desired number of entries for a bucket, return the zone from which
324 * to allocate the bucket.
325 */
326static struct uma_bucket_zone *
327bucket_zone_lookup(int entries)
328{
329 struct uma_bucket_zone *ubz;
330
331 for (ubz = &bucket_zones[0]; ubz->ubz_entries != 0; ubz++)
332 if (ubz->ubz_entries >= entries)
333 return (ubz);
334 ubz--;
335 return (ubz);
336}
337
338static int
339bucket_select(int size)
340{
341 struct uma_bucket_zone *ubz;
342
343 ubz = &bucket_zones[0];
344 if (size > ubz->ubz_maxsize)
345 return MAX((ubz->ubz_maxsize * ubz->ubz_entries) / size, 1);
346
347 for (; ubz->ubz_entries != 0; ubz++)
348 if (ubz->ubz_maxsize < size)
349 break;
350 ubz--;
351 return (ubz->ubz_entries);
352}
353
354static uma_bucket_t
259static int zone_import(uma_zone_t zone, void **bucket, int max, int flags);
260static void zone_release(uma_zone_t zone, void **bucket, int cnt);
261
262void uma_print_zone(uma_zone_t);
263void uma_print_stats(void);
264static int sysctl_vm_zone_count(SYSCTL_HANDLER_ARGS);
265static int sysctl_vm_zone_stats(SYSCTL_HANDLER_ARGS);
266
267SYSINIT(uma_startup3, SI_SUB_VM_CONF, SI_ORDER_SECOND, uma_startup3, NULL);
268
269SYSCTL_PROC(_vm, OID_AUTO, zone_count, CTLFLAG_RD|CTLTYPE_INT,
270 0, 0, sysctl_vm_zone_count, "I", "Number of UMA zones");
271
272SYSCTL_PROC(_vm, OID_AUTO, zone_stats, CTLFLAG_RD|CTLTYPE_STRUCT,
273 0, 0, sysctl_vm_zone_stats, "s,struct uma_type_header", "Zone Stats");
274
275static int zone_warnings = 1;
276TUNABLE_INT("vm.zone_warnings", &zone_warnings);
277SYSCTL_INT(_vm, OID_AUTO, zone_warnings, CTLFLAG_RW, &zone_warnings, 0,
278 "Warn when UMA zones becomes full");
279
280/*
281 * This routine checks to see whether or not it's safe to enable buckets.
282 */
283static void
284bucket_enable(void)
285{
286 bucketdisable = vm_page_count_min();
287}
288
289/*
290 * Initialize bucket_zones, the array of zones of buckets of various sizes.
291 *
292 * For each zone, calculate the memory required for each bucket, consisting
293 * of the header and an array of pointers.
294 */
295static void
296bucket_init(void)
297{
298 struct uma_bucket_zone *ubz;
299 int size;
300 int i;
301
302 for (i = 0, ubz = &bucket_zones[0]; ubz->ubz_entries != 0; ubz++) {
303 size = roundup(sizeof(struct uma_bucket), sizeof(void *));
304 size += sizeof(void *) * ubz->ubz_entries;
305 ubz->ubz_zone = uma_zcreate(ubz->ubz_name, size,
306 NULL, NULL, NULL, NULL, UMA_ALIGN_PTR,
307 UMA_ZONE_MAXBUCKET | UMA_ZONE_MTXCLASS);
308 }
309 /*
310 * To avoid recursive bucket allocation loops we disable buckets
311 * on the smallest bucket zone and use it for the largest zone.
312 * The remainder of the zones all use the largest zone.
313 */
314 ubz--;
315 ubz->ubz_zone->uz_count = bucket_zones[0].ubz_entries;
316 bucket_zones[0].ubz_zone->uz_count = 0;
317 largebucket = ubz->ubz_zone;
318}
319
320/*
321 * Given a desired number of entries for a bucket, return the zone from which
322 * to allocate the bucket.
323 */
324static struct uma_bucket_zone *
325bucket_zone_lookup(int entries)
326{
327 struct uma_bucket_zone *ubz;
328
329 for (ubz = &bucket_zones[0]; ubz->ubz_entries != 0; ubz++)
330 if (ubz->ubz_entries >= entries)
331 return (ubz);
332 ubz--;
333 return (ubz);
334}
335
336static int
337bucket_select(int size)
338{
339 struct uma_bucket_zone *ubz;
340
341 ubz = &bucket_zones[0];
342 if (size > ubz->ubz_maxsize)
343 return MAX((ubz->ubz_maxsize * ubz->ubz_entries) / size, 1);
344
345 for (; ubz->ubz_entries != 0; ubz++)
346 if (ubz->ubz_maxsize < size)
347 break;
348 ubz--;
349 return (ubz->ubz_entries);
350}
351
352static uma_bucket_t
355bucket_alloc(int entries, int bflags)
353bucket_alloc(uma_zone_t zone, int flags)
356{
357 struct uma_bucket_zone *ubz;
358 uma_bucket_t bucket;
359
360 /*
361 * This is to stop us from allocating per cpu buckets while we're
362 * running out of vm.boot_pages. Otherwise, we would exhaust the
363 * boot pages. This also prevents us from allocating buckets in
364 * low memory situations.
365 */
366 if (bucketdisable)
367 return (NULL);
368
354{
355 struct uma_bucket_zone *ubz;
356 uma_bucket_t bucket;
357
358 /*
359 * This is to stop us from allocating per cpu buckets while we're
360 * running out of vm.boot_pages. Otherwise, we would exhaust the
361 * boot pages. This also prevents us from allocating buckets in
362 * low memory situations.
363 */
364 if (bucketdisable)
365 return (NULL);
366
369 ubz = bucket_zone_lookup(entries);
370 bucket = uma_zalloc(ubz->ubz_zone, bflags);
367 if (zone->uz_flags & UMA_ZFLAG_CACHEONLY)
368 flags |= M_NOVM;
369 ubz = bucket_zone_lookup(zone->uz_count);
370 bucket = uma_zalloc(ubz->ubz_zone, flags);
371 if (bucket) {
372#ifdef INVARIANTS
373 bzero(bucket->ub_bucket, sizeof(void *) * ubz->ubz_entries);
374#endif
375 bucket->ub_cnt = 0;
376 bucket->ub_entries = ubz->ubz_entries;
377 }
378
379 return (bucket);
380}
381
382static void
371 if (bucket) {
372#ifdef INVARIANTS
373 bzero(bucket->ub_bucket, sizeof(void *) * ubz->ubz_entries);
374#endif
375 bucket->ub_cnt = 0;
376 bucket->ub_entries = ubz->ubz_entries;
377 }
378
379 return (bucket);
380}
381
382static void
383bucket_free(uma_bucket_t bucket)
383bucket_free(uma_zone_t zone, uma_bucket_t bucket)
384{
385 struct uma_bucket_zone *ubz;
386
387 KASSERT(bucket->ub_cnt == 0,
388 ("bucket_free: Freeing a non free bucket."));
389 ubz = bucket_zone_lookup(bucket->ub_entries);
390 uma_zfree(ubz->ubz_zone, bucket);
391}
392
393static void
394bucket_zone_drain(void)
395{
396 struct uma_bucket_zone *ubz;
397
398 for (ubz = &bucket_zones[0]; ubz->ubz_entries != 0; ubz++)
399 zone_drain(ubz->ubz_zone);
400}
401
402static void
403zone_log_warning(uma_zone_t zone)
404{
405 static const struct timeval warninterval = { 300, 0 };
406
407 if (!zone_warnings || zone->uz_warning == NULL)
408 return;
409
410 if (ratecheck(&zone->uz_ratecheck, &warninterval))
411 printf("[zone: %s] %s\n", zone->uz_name, zone->uz_warning);
412}
413
414static void
415zone_foreach_keg(uma_zone_t zone, void (*kegfn)(uma_keg_t))
416{
417 uma_klink_t klink;
418
419 LIST_FOREACH(klink, &zone->uz_kegs, kl_link)
420 kegfn(klink->kl_keg);
421}
422
423/*
424 * Routine called by timeout which is used to fire off some time interval
425 * based calculations. (stats, hash size, etc.)
426 *
427 * Arguments:
428 * arg Unused
429 *
430 * Returns:
431 * Nothing
432 */
433static void
434uma_timeout(void *unused)
435{
436 bucket_enable();
437 zone_foreach(zone_timeout);
438
439 /* Reschedule this event */
440 callout_reset(&uma_callout, UMA_TIMEOUT * hz, uma_timeout, NULL);
441}
442
443/*
444 * Routine to perform timeout driven calculations. This expands the
445 * hashes and does per cpu statistics aggregation.
446 *
447 * Returns nothing.
448 */
449static void
450keg_timeout(uma_keg_t keg)
451{
452
453 KEG_LOCK(keg);
454 /*
455 * Expand the keg hash table.
456 *
457 * This is done if the number of slabs is larger than the hash size.
458 * What I'm trying to do here is completely reduce collisions. This
459 * may be a little aggressive. Should I allow for two collisions max?
460 */
461 if (keg->uk_flags & UMA_ZONE_HASH &&
462 keg->uk_pages / keg->uk_ppera >= keg->uk_hash.uh_hashsize) {
463 struct uma_hash newhash;
464 struct uma_hash oldhash;
465 int ret;
466
467 /*
468 * This is so involved because allocating and freeing
469 * while the keg lock is held will lead to deadlock.
470 * I have to do everything in stages and check for
471 * races.
472 */
473 newhash = keg->uk_hash;
474 KEG_UNLOCK(keg);
475 ret = hash_alloc(&newhash);
476 KEG_LOCK(keg);
477 if (ret) {
478 if (hash_expand(&keg->uk_hash, &newhash)) {
479 oldhash = keg->uk_hash;
480 keg->uk_hash = newhash;
481 } else
482 oldhash = newhash;
483
484 KEG_UNLOCK(keg);
485 hash_free(&oldhash);
486 KEG_LOCK(keg);
487 }
488 }
489 KEG_UNLOCK(keg);
490}
491
492static void
493zone_timeout(uma_zone_t zone)
494{
495
496 zone_foreach_keg(zone, &keg_timeout);
497}
498
499/*
500 * Allocate and zero fill the next sized hash table from the appropriate
501 * backing store.
502 *
503 * Arguments:
504 * hash A new hash structure with the old hash size in uh_hashsize
505 *
506 * Returns:
507 * 1 on sucess and 0 on failure.
508 */
509static int
510hash_alloc(struct uma_hash *hash)
511{
512 int oldsize;
513 int alloc;
514
515 oldsize = hash->uh_hashsize;
516
517 /* We're just going to go to a power of two greater */
518 if (oldsize) {
519 hash->uh_hashsize = oldsize * 2;
520 alloc = sizeof(hash->uh_slab_hash[0]) * hash->uh_hashsize;
521 hash->uh_slab_hash = (struct slabhead *)malloc(alloc,
522 M_UMAHASH, M_NOWAIT);
523 } else {
524 alloc = sizeof(hash->uh_slab_hash[0]) * UMA_HASH_SIZE_INIT;
525 hash->uh_slab_hash = zone_alloc_item(hashzone, NULL,
526 M_WAITOK);
527 hash->uh_hashsize = UMA_HASH_SIZE_INIT;
528 }
529 if (hash->uh_slab_hash) {
530 bzero(hash->uh_slab_hash, alloc);
531 hash->uh_hashmask = hash->uh_hashsize - 1;
532 return (1);
533 }
534
535 return (0);
536}
537
538/*
539 * Expands the hash table for HASH zones. This is done from zone_timeout
540 * to reduce collisions. This must not be done in the regular allocation
541 * path, otherwise, we can recurse on the vm while allocating pages.
542 *
543 * Arguments:
544 * oldhash The hash you want to expand
545 * newhash The hash structure for the new table
546 *
547 * Returns:
548 * Nothing
549 *
550 * Discussion:
551 */
552static int
553hash_expand(struct uma_hash *oldhash, struct uma_hash *newhash)
554{
555 uma_slab_t slab;
556 int hval;
557 int i;
558
559 if (!newhash->uh_slab_hash)
560 return (0);
561
562 if (oldhash->uh_hashsize >= newhash->uh_hashsize)
563 return (0);
564
565 /*
566 * I need to investigate hash algorithms for resizing without a
567 * full rehash.
568 */
569
570 for (i = 0; i < oldhash->uh_hashsize; i++)
571 while (!SLIST_EMPTY(&oldhash->uh_slab_hash[i])) {
572 slab = SLIST_FIRST(&oldhash->uh_slab_hash[i]);
573 SLIST_REMOVE_HEAD(&oldhash->uh_slab_hash[i], us_hlink);
574 hval = UMA_HASH(newhash, slab->us_data);
575 SLIST_INSERT_HEAD(&newhash->uh_slab_hash[hval],
576 slab, us_hlink);
577 }
578
579 return (1);
580}
581
582/*
583 * Free the hash bucket to the appropriate backing store.
584 *
585 * Arguments:
586 * slab_hash The hash bucket we're freeing
587 * hashsize The number of entries in that hash bucket
588 *
589 * Returns:
590 * Nothing
591 */
592static void
593hash_free(struct uma_hash *hash)
594{
595 if (hash->uh_slab_hash == NULL)
596 return;
597 if (hash->uh_hashsize == UMA_HASH_SIZE_INIT)
598 zone_free_item(hashzone, hash->uh_slab_hash, NULL, SKIP_NONE);
599 else
600 free(hash->uh_slab_hash, M_UMAHASH);
601}
602
603/*
604 * Frees all outstanding items in a bucket
605 *
606 * Arguments:
607 * zone The zone to free to, must be unlocked.
608 * bucket The free/alloc bucket with items, cpu queue must be locked.
609 *
610 * Returns:
611 * Nothing
612 */
613
614static void
615bucket_drain(uma_zone_t zone, uma_bucket_t bucket)
616{
617 int i;
618
619 if (bucket == NULL)
620 return;
621
622 if (zone->uz_fini)
623 for (i = 0; i < bucket->ub_cnt; i++)
624 zone->uz_fini(bucket->ub_bucket[i], zone->uz_size);
625 zone->uz_release(zone->uz_arg, bucket->ub_bucket, bucket->ub_cnt);
626 bucket->ub_cnt = 0;
627}
628
629/*
630 * Drains the per cpu caches for a zone.
631 *
632 * NOTE: This may only be called while the zone is being turn down, and not
633 * during normal operation. This is necessary in order that we do not have
634 * to migrate CPUs to drain the per-CPU caches.
635 *
636 * Arguments:
637 * zone The zone to drain, must be unlocked.
638 *
639 * Returns:
640 * Nothing
641 */
642static void
643cache_drain(uma_zone_t zone)
644{
645 uma_cache_t cache;
646 int cpu;
647
648 /*
649 * XXX: It is safe to not lock the per-CPU caches, because we're
650 * tearing down the zone anyway. I.e., there will be no further use
651 * of the caches at this point.
652 *
653 * XXX: It would good to be able to assert that the zone is being
654 * torn down to prevent improper use of cache_drain().
655 *
656 * XXX: We lock the zone before passing into bucket_cache_drain() as
657 * it is used elsewhere. Should the tear-down path be made special
658 * there in some form?
659 */
660 CPU_FOREACH(cpu) {
661 cache = &zone->uz_cpu[cpu];
662 bucket_drain(zone, cache->uc_allocbucket);
663 bucket_drain(zone, cache->uc_freebucket);
664 if (cache->uc_allocbucket != NULL)
384{
385 struct uma_bucket_zone *ubz;
386
387 KASSERT(bucket->ub_cnt == 0,
388 ("bucket_free: Freeing a non free bucket."));
389 ubz = bucket_zone_lookup(bucket->ub_entries);
390 uma_zfree(ubz->ubz_zone, bucket);
391}
392
393static void
394bucket_zone_drain(void)
395{
396 struct uma_bucket_zone *ubz;
397
398 for (ubz = &bucket_zones[0]; ubz->ubz_entries != 0; ubz++)
399 zone_drain(ubz->ubz_zone);
400}
401
402static void
403zone_log_warning(uma_zone_t zone)
404{
405 static const struct timeval warninterval = { 300, 0 };
406
407 if (!zone_warnings || zone->uz_warning == NULL)
408 return;
409
410 if (ratecheck(&zone->uz_ratecheck, &warninterval))
411 printf("[zone: %s] %s\n", zone->uz_name, zone->uz_warning);
412}
413
414static void
415zone_foreach_keg(uma_zone_t zone, void (*kegfn)(uma_keg_t))
416{
417 uma_klink_t klink;
418
419 LIST_FOREACH(klink, &zone->uz_kegs, kl_link)
420 kegfn(klink->kl_keg);
421}
422
423/*
424 * Routine called by timeout which is used to fire off some time interval
425 * based calculations. (stats, hash size, etc.)
426 *
427 * Arguments:
428 * arg Unused
429 *
430 * Returns:
431 * Nothing
432 */
433static void
434uma_timeout(void *unused)
435{
436 bucket_enable();
437 zone_foreach(zone_timeout);
438
439 /* Reschedule this event */
440 callout_reset(&uma_callout, UMA_TIMEOUT * hz, uma_timeout, NULL);
441}
442
443/*
444 * Routine to perform timeout driven calculations. This expands the
445 * hashes and does per cpu statistics aggregation.
446 *
447 * Returns nothing.
448 */
449static void
450keg_timeout(uma_keg_t keg)
451{
452
453 KEG_LOCK(keg);
454 /*
455 * Expand the keg hash table.
456 *
457 * This is done if the number of slabs is larger than the hash size.
458 * What I'm trying to do here is completely reduce collisions. This
459 * may be a little aggressive. Should I allow for two collisions max?
460 */
461 if (keg->uk_flags & UMA_ZONE_HASH &&
462 keg->uk_pages / keg->uk_ppera >= keg->uk_hash.uh_hashsize) {
463 struct uma_hash newhash;
464 struct uma_hash oldhash;
465 int ret;
466
467 /*
468 * This is so involved because allocating and freeing
469 * while the keg lock is held will lead to deadlock.
470 * I have to do everything in stages and check for
471 * races.
472 */
473 newhash = keg->uk_hash;
474 KEG_UNLOCK(keg);
475 ret = hash_alloc(&newhash);
476 KEG_LOCK(keg);
477 if (ret) {
478 if (hash_expand(&keg->uk_hash, &newhash)) {
479 oldhash = keg->uk_hash;
480 keg->uk_hash = newhash;
481 } else
482 oldhash = newhash;
483
484 KEG_UNLOCK(keg);
485 hash_free(&oldhash);
486 KEG_LOCK(keg);
487 }
488 }
489 KEG_UNLOCK(keg);
490}
491
492static void
493zone_timeout(uma_zone_t zone)
494{
495
496 zone_foreach_keg(zone, &keg_timeout);
497}
498
499/*
500 * Allocate and zero fill the next sized hash table from the appropriate
501 * backing store.
502 *
503 * Arguments:
504 * hash A new hash structure with the old hash size in uh_hashsize
505 *
506 * Returns:
507 * 1 on sucess and 0 on failure.
508 */
509static int
510hash_alloc(struct uma_hash *hash)
511{
512 int oldsize;
513 int alloc;
514
515 oldsize = hash->uh_hashsize;
516
517 /* We're just going to go to a power of two greater */
518 if (oldsize) {
519 hash->uh_hashsize = oldsize * 2;
520 alloc = sizeof(hash->uh_slab_hash[0]) * hash->uh_hashsize;
521 hash->uh_slab_hash = (struct slabhead *)malloc(alloc,
522 M_UMAHASH, M_NOWAIT);
523 } else {
524 alloc = sizeof(hash->uh_slab_hash[0]) * UMA_HASH_SIZE_INIT;
525 hash->uh_slab_hash = zone_alloc_item(hashzone, NULL,
526 M_WAITOK);
527 hash->uh_hashsize = UMA_HASH_SIZE_INIT;
528 }
529 if (hash->uh_slab_hash) {
530 bzero(hash->uh_slab_hash, alloc);
531 hash->uh_hashmask = hash->uh_hashsize - 1;
532 return (1);
533 }
534
535 return (0);
536}
537
538/*
539 * Expands the hash table for HASH zones. This is done from zone_timeout
540 * to reduce collisions. This must not be done in the regular allocation
541 * path, otherwise, we can recurse on the vm while allocating pages.
542 *
543 * Arguments:
544 * oldhash The hash you want to expand
545 * newhash The hash structure for the new table
546 *
547 * Returns:
548 * Nothing
549 *
550 * Discussion:
551 */
552static int
553hash_expand(struct uma_hash *oldhash, struct uma_hash *newhash)
554{
555 uma_slab_t slab;
556 int hval;
557 int i;
558
559 if (!newhash->uh_slab_hash)
560 return (0);
561
562 if (oldhash->uh_hashsize >= newhash->uh_hashsize)
563 return (0);
564
565 /*
566 * I need to investigate hash algorithms for resizing without a
567 * full rehash.
568 */
569
570 for (i = 0; i < oldhash->uh_hashsize; i++)
571 while (!SLIST_EMPTY(&oldhash->uh_slab_hash[i])) {
572 slab = SLIST_FIRST(&oldhash->uh_slab_hash[i]);
573 SLIST_REMOVE_HEAD(&oldhash->uh_slab_hash[i], us_hlink);
574 hval = UMA_HASH(newhash, slab->us_data);
575 SLIST_INSERT_HEAD(&newhash->uh_slab_hash[hval],
576 slab, us_hlink);
577 }
578
579 return (1);
580}
581
582/*
583 * Free the hash bucket to the appropriate backing store.
584 *
585 * Arguments:
586 * slab_hash The hash bucket we're freeing
587 * hashsize The number of entries in that hash bucket
588 *
589 * Returns:
590 * Nothing
591 */
592static void
593hash_free(struct uma_hash *hash)
594{
595 if (hash->uh_slab_hash == NULL)
596 return;
597 if (hash->uh_hashsize == UMA_HASH_SIZE_INIT)
598 zone_free_item(hashzone, hash->uh_slab_hash, NULL, SKIP_NONE);
599 else
600 free(hash->uh_slab_hash, M_UMAHASH);
601}
602
603/*
604 * Frees all outstanding items in a bucket
605 *
606 * Arguments:
607 * zone The zone to free to, must be unlocked.
608 * bucket The free/alloc bucket with items, cpu queue must be locked.
609 *
610 * Returns:
611 * Nothing
612 */
613
614static void
615bucket_drain(uma_zone_t zone, uma_bucket_t bucket)
616{
617 int i;
618
619 if (bucket == NULL)
620 return;
621
622 if (zone->uz_fini)
623 for (i = 0; i < bucket->ub_cnt; i++)
624 zone->uz_fini(bucket->ub_bucket[i], zone->uz_size);
625 zone->uz_release(zone->uz_arg, bucket->ub_bucket, bucket->ub_cnt);
626 bucket->ub_cnt = 0;
627}
628
629/*
630 * Drains the per cpu caches for a zone.
631 *
632 * NOTE: This may only be called while the zone is being turn down, and not
633 * during normal operation. This is necessary in order that we do not have
634 * to migrate CPUs to drain the per-CPU caches.
635 *
636 * Arguments:
637 * zone The zone to drain, must be unlocked.
638 *
639 * Returns:
640 * Nothing
641 */
642static void
643cache_drain(uma_zone_t zone)
644{
645 uma_cache_t cache;
646 int cpu;
647
648 /*
649 * XXX: It is safe to not lock the per-CPU caches, because we're
650 * tearing down the zone anyway. I.e., there will be no further use
651 * of the caches at this point.
652 *
653 * XXX: It would good to be able to assert that the zone is being
654 * torn down to prevent improper use of cache_drain().
655 *
656 * XXX: We lock the zone before passing into bucket_cache_drain() as
657 * it is used elsewhere. Should the tear-down path be made special
658 * there in some form?
659 */
660 CPU_FOREACH(cpu) {
661 cache = &zone->uz_cpu[cpu];
662 bucket_drain(zone, cache->uc_allocbucket);
663 bucket_drain(zone, cache->uc_freebucket);
664 if (cache->uc_allocbucket != NULL)
665 bucket_free(cache->uc_allocbucket);
665 bucket_free(zone, cache->uc_allocbucket);
666 if (cache->uc_freebucket != NULL)
666 if (cache->uc_freebucket != NULL)
667 bucket_free(cache->uc_freebucket);
667 bucket_free(zone, cache->uc_freebucket);
668 cache->uc_allocbucket = cache->uc_freebucket = NULL;
669 }
670 ZONE_LOCK(zone);
671 bucket_cache_drain(zone);
672 ZONE_UNLOCK(zone);
673}
674
675/*
676 * Drain the cached buckets from a zone. Expects a locked zone on entry.
677 */
678static void
679bucket_cache_drain(uma_zone_t zone)
680{
681 uma_bucket_t bucket;
682
683 /*
684 * Drain the bucket queues and free the buckets, we just keep two per
685 * cpu (alloc/free).
686 */
687 while ((bucket = LIST_FIRST(&zone->uz_buckets)) != NULL) {
688 LIST_REMOVE(bucket, ub_link);
689 ZONE_UNLOCK(zone);
690 bucket_drain(zone, bucket);
668 cache->uc_allocbucket = cache->uc_freebucket = NULL;
669 }
670 ZONE_LOCK(zone);
671 bucket_cache_drain(zone);
672 ZONE_UNLOCK(zone);
673}
674
675/*
676 * Drain the cached buckets from a zone. Expects a locked zone on entry.
677 */
678static void
679bucket_cache_drain(uma_zone_t zone)
680{
681 uma_bucket_t bucket;
682
683 /*
684 * Drain the bucket queues and free the buckets, we just keep two per
685 * cpu (alloc/free).
686 */
687 while ((bucket = LIST_FIRST(&zone->uz_buckets)) != NULL) {
688 LIST_REMOVE(bucket, ub_link);
689 ZONE_UNLOCK(zone);
690 bucket_drain(zone, bucket);
691 bucket_free(bucket);
691 bucket_free(zone, bucket);
692 ZONE_LOCK(zone);
693 }
694}
695
696static void
697keg_free_slab(uma_keg_t keg, uma_slab_t slab, int start)
698{
699 uint8_t *mem;
700 int i;
701 uint8_t flags;
702
703 mem = slab->us_data;
704 flags = slab->us_flags;
705 i = start;
706 if (keg->uk_fini != NULL) {
707 for (i--; i > -1; i--)
708 keg->uk_fini(slab->us_data + (keg->uk_rsize * i),
709 keg->uk_size);
710 }
711 if (keg->uk_flags & UMA_ZONE_VTOSLAB) {
712 vm_object_t obj;
713
714 if (flags & UMA_SLAB_KMEM)
715 obj = kmem_object;
716 else if (flags & UMA_SLAB_KERNEL)
717 obj = kernel_object;
718 else
719 obj = NULL;
720 for (i = 0; i < keg->uk_ppera; i++)
721 vsetobj((vm_offset_t)mem + (i * PAGE_SIZE), obj);
722 }
723 if (keg->uk_flags & UMA_ZONE_OFFPAGE)
724 zone_free_item(keg->uk_slabzone, slab, NULL, SKIP_NONE);
725#ifdef UMA_DEBUG
726 printf("%s: Returning %d bytes.\n", keg->uk_name,
727 PAGE_SIZE * keg->uk_ppera);
728#endif
729 keg->uk_freef(mem, PAGE_SIZE * keg->uk_ppera, flags);
730}
731
732/*
733 * Frees pages from a keg back to the system. This is done on demand from
734 * the pageout daemon.
735 *
736 * Returns nothing.
737 */
738static void
739keg_drain(uma_keg_t keg)
740{
741 struct slabhead freeslabs = { 0 };
742 uma_slab_t slab;
743 uma_slab_t n;
744
745 /*
746 * We don't want to take pages from statically allocated kegs at this
747 * time
748 */
749 if (keg->uk_flags & UMA_ZONE_NOFREE || keg->uk_freef == NULL)
750 return;
751
752#ifdef UMA_DEBUG
753 printf("%s free items: %u\n", keg->uk_name, keg->uk_free);
754#endif
755 KEG_LOCK(keg);
756 if (keg->uk_free == 0)
757 goto finished;
758
759 slab = LIST_FIRST(&keg->uk_free_slab);
760 while (slab) {
761 n = LIST_NEXT(slab, us_link);
762
763 /* We have no where to free these to */
764 if (slab->us_flags & UMA_SLAB_BOOT) {
765 slab = n;
766 continue;
767 }
768
769 LIST_REMOVE(slab, us_link);
770 keg->uk_pages -= keg->uk_ppera;
771 keg->uk_free -= keg->uk_ipers;
772
773 if (keg->uk_flags & UMA_ZONE_HASH)
774 UMA_HASH_REMOVE(&keg->uk_hash, slab, slab->us_data);
775
776 SLIST_INSERT_HEAD(&freeslabs, slab, us_hlink);
777
778 slab = n;
779 }
780finished:
781 KEG_UNLOCK(keg);
782
783 while ((slab = SLIST_FIRST(&freeslabs)) != NULL) {
784 SLIST_REMOVE(&freeslabs, slab, uma_slab, us_hlink);
785 keg_free_slab(keg, slab, 0);
786 }
787}
788
789static void
790zone_drain_wait(uma_zone_t zone, int waitok)
791{
792
793 /*
794 * Set draining to interlock with zone_dtor() so we can release our
795 * locks as we go. Only dtor() should do a WAITOK call since it
796 * is the only call that knows the structure will still be available
797 * when it wakes up.
798 */
799 ZONE_LOCK(zone);
800 while (zone->uz_flags & UMA_ZFLAG_DRAINING) {
801 if (waitok == M_NOWAIT)
802 goto out;
803 mtx_unlock(&uma_mtx);
692 ZONE_LOCK(zone);
693 }
694}
695
696static void
697keg_free_slab(uma_keg_t keg, uma_slab_t slab, int start)
698{
699 uint8_t *mem;
700 int i;
701 uint8_t flags;
702
703 mem = slab->us_data;
704 flags = slab->us_flags;
705 i = start;
706 if (keg->uk_fini != NULL) {
707 for (i--; i > -1; i--)
708 keg->uk_fini(slab->us_data + (keg->uk_rsize * i),
709 keg->uk_size);
710 }
711 if (keg->uk_flags & UMA_ZONE_VTOSLAB) {
712 vm_object_t obj;
713
714 if (flags & UMA_SLAB_KMEM)
715 obj = kmem_object;
716 else if (flags & UMA_SLAB_KERNEL)
717 obj = kernel_object;
718 else
719 obj = NULL;
720 for (i = 0; i < keg->uk_ppera; i++)
721 vsetobj((vm_offset_t)mem + (i * PAGE_SIZE), obj);
722 }
723 if (keg->uk_flags & UMA_ZONE_OFFPAGE)
724 zone_free_item(keg->uk_slabzone, slab, NULL, SKIP_NONE);
725#ifdef UMA_DEBUG
726 printf("%s: Returning %d bytes.\n", keg->uk_name,
727 PAGE_SIZE * keg->uk_ppera);
728#endif
729 keg->uk_freef(mem, PAGE_SIZE * keg->uk_ppera, flags);
730}
731
732/*
733 * Frees pages from a keg back to the system. This is done on demand from
734 * the pageout daemon.
735 *
736 * Returns nothing.
737 */
738static void
739keg_drain(uma_keg_t keg)
740{
741 struct slabhead freeslabs = { 0 };
742 uma_slab_t slab;
743 uma_slab_t n;
744
745 /*
746 * We don't want to take pages from statically allocated kegs at this
747 * time
748 */
749 if (keg->uk_flags & UMA_ZONE_NOFREE || keg->uk_freef == NULL)
750 return;
751
752#ifdef UMA_DEBUG
753 printf("%s free items: %u\n", keg->uk_name, keg->uk_free);
754#endif
755 KEG_LOCK(keg);
756 if (keg->uk_free == 0)
757 goto finished;
758
759 slab = LIST_FIRST(&keg->uk_free_slab);
760 while (slab) {
761 n = LIST_NEXT(slab, us_link);
762
763 /* We have no where to free these to */
764 if (slab->us_flags & UMA_SLAB_BOOT) {
765 slab = n;
766 continue;
767 }
768
769 LIST_REMOVE(slab, us_link);
770 keg->uk_pages -= keg->uk_ppera;
771 keg->uk_free -= keg->uk_ipers;
772
773 if (keg->uk_flags & UMA_ZONE_HASH)
774 UMA_HASH_REMOVE(&keg->uk_hash, slab, slab->us_data);
775
776 SLIST_INSERT_HEAD(&freeslabs, slab, us_hlink);
777
778 slab = n;
779 }
780finished:
781 KEG_UNLOCK(keg);
782
783 while ((slab = SLIST_FIRST(&freeslabs)) != NULL) {
784 SLIST_REMOVE(&freeslabs, slab, uma_slab, us_hlink);
785 keg_free_slab(keg, slab, 0);
786 }
787}
788
789static void
790zone_drain_wait(uma_zone_t zone, int waitok)
791{
792
793 /*
794 * Set draining to interlock with zone_dtor() so we can release our
795 * locks as we go. Only dtor() should do a WAITOK call since it
796 * is the only call that knows the structure will still be available
797 * when it wakes up.
798 */
799 ZONE_LOCK(zone);
800 while (zone->uz_flags & UMA_ZFLAG_DRAINING) {
801 if (waitok == M_NOWAIT)
802 goto out;
803 mtx_unlock(&uma_mtx);
804 msleep(zone, zone->uz_lock, PVM, "zonedrain", 1);
804 msleep(zone, zone->uz_lockptr, PVM, "zonedrain", 1);
805 mtx_lock(&uma_mtx);
806 }
807 zone->uz_flags |= UMA_ZFLAG_DRAINING;
808 bucket_cache_drain(zone);
809 ZONE_UNLOCK(zone);
810 /*
811 * The DRAINING flag protects us from being freed while
812 * we're running. Normally the uma_mtx would protect us but we
813 * must be able to release and acquire the right lock for each keg.
814 */
815 zone_foreach_keg(zone, &keg_drain);
816 ZONE_LOCK(zone);
817 zone->uz_flags &= ~UMA_ZFLAG_DRAINING;
818 wakeup(zone);
819out:
820 ZONE_UNLOCK(zone);
821}
822
823void
824zone_drain(uma_zone_t zone)
825{
826
827 zone_drain_wait(zone, M_NOWAIT);
828}
829
830/*
831 * Allocate a new slab for a keg. This does not insert the slab onto a list.
832 *
833 * Arguments:
834 * wait Shall we wait?
835 *
836 * Returns:
837 * The slab that was allocated or NULL if there is no memory and the
838 * caller specified M_NOWAIT.
839 */
840static uma_slab_t
841keg_alloc_slab(uma_keg_t keg, uma_zone_t zone, int wait)
842{
843 uma_slabrefcnt_t slabref;
844 uma_alloc allocf;
845 uma_slab_t slab;
846 uint8_t *mem;
847 uint8_t flags;
848 int i;
849
850 mtx_assert(&keg->uk_lock, MA_OWNED);
851 slab = NULL;
852 mem = NULL;
853
854#ifdef UMA_DEBUG
855 printf("alloc_slab: Allocating a new slab for %s\n", keg->uk_name);
856#endif
857 allocf = keg->uk_allocf;
858 KEG_UNLOCK(keg);
859
860 if (keg->uk_flags & UMA_ZONE_OFFPAGE) {
861 slab = zone_alloc_item(keg->uk_slabzone, NULL, wait);
862 if (slab == NULL)
863 goto out;
864 }
865
866 /*
867 * This reproduces the old vm_zone behavior of zero filling pages the
868 * first time they are added to a zone.
869 *
870 * Malloced items are zeroed in uma_zalloc.
871 */
872
873 if ((keg->uk_flags & UMA_ZONE_MALLOC) == 0)
874 wait |= M_ZERO;
875 else
876 wait &= ~M_ZERO;
877
878 if (keg->uk_flags & UMA_ZONE_NODUMP)
879 wait |= M_NODUMP;
880
881 /* zone is passed for legacy reasons. */
882 mem = allocf(zone, keg->uk_ppera * PAGE_SIZE, &flags, wait);
883 if (mem == NULL) {
884 if (keg->uk_flags & UMA_ZONE_OFFPAGE)
885 zone_free_item(keg->uk_slabzone, slab, NULL, SKIP_NONE);
886 slab = NULL;
887 goto out;
888 }
889
890 /* Point the slab into the allocated memory */
891 if (!(keg->uk_flags & UMA_ZONE_OFFPAGE))
892 slab = (uma_slab_t )(mem + keg->uk_pgoff);
893
894 if (keg->uk_flags & UMA_ZONE_VTOSLAB)
895 for (i = 0; i < keg->uk_ppera; i++)
896 vsetslab((vm_offset_t)mem + (i * PAGE_SIZE), slab);
897
898 slab->us_keg = keg;
899 slab->us_data = mem;
900 slab->us_freecount = keg->uk_ipers;
901 slab->us_flags = flags;
902 BIT_FILL(SLAB_SETSIZE, &slab->us_free);
903#ifdef INVARIANTS
904 BIT_ZERO(SLAB_SETSIZE, &slab->us_debugfree);
905#endif
906 if (keg->uk_flags & UMA_ZONE_REFCNT) {
907 slabref = (uma_slabrefcnt_t)slab;
908 for (i = 0; i < keg->uk_ipers; i++)
909 slabref->us_refcnt[i] = 0;
910 }
911
912 if (keg->uk_init != NULL) {
913 for (i = 0; i < keg->uk_ipers; i++)
914 if (keg->uk_init(slab->us_data + (keg->uk_rsize * i),
915 keg->uk_size, wait) != 0)
916 break;
917 if (i != keg->uk_ipers) {
918 keg_free_slab(keg, slab, i);
919 slab = NULL;
920 goto out;
921 }
922 }
923out:
924 KEG_LOCK(keg);
925
926 if (slab != NULL) {
927 if (keg->uk_flags & UMA_ZONE_HASH)
928 UMA_HASH_INSERT(&keg->uk_hash, slab, mem);
929
930 keg->uk_pages += keg->uk_ppera;
931 keg->uk_free += keg->uk_ipers;
932 }
933
934 return (slab);
935}
936
937/*
938 * This function is intended to be used early on in place of page_alloc() so
939 * that we may use the boot time page cache to satisfy allocations before
940 * the VM is ready.
941 */
942static void *
943startup_alloc(uma_zone_t zone, int bytes, uint8_t *pflag, int wait)
944{
945 uma_keg_t keg;
946 uma_slab_t tmps;
947 int pages, check_pages;
948
949 keg = zone_first_keg(zone);
950 pages = howmany(bytes, PAGE_SIZE);
951 check_pages = pages - 1;
952 KASSERT(pages > 0, ("startup_alloc can't reserve 0 pages\n"));
953
954 /*
955 * Check our small startup cache to see if it has pages remaining.
956 */
957 mtx_lock(&uma_boot_pages_mtx);
958
959 /* First check if we have enough room. */
960 tmps = LIST_FIRST(&uma_boot_pages);
961 while (tmps != NULL && check_pages-- > 0)
962 tmps = LIST_NEXT(tmps, us_link);
963 if (tmps != NULL) {
964 /*
965 * It's ok to lose tmps references. The last one will
966 * have tmps->us_data pointing to the start address of
967 * "pages" contiguous pages of memory.
968 */
969 while (pages-- > 0) {
970 tmps = LIST_FIRST(&uma_boot_pages);
971 LIST_REMOVE(tmps, us_link);
972 }
973 mtx_unlock(&uma_boot_pages_mtx);
974 *pflag = tmps->us_flags;
975 return (tmps->us_data);
976 }
977 mtx_unlock(&uma_boot_pages_mtx);
978 if (booted < UMA_STARTUP2)
979 panic("UMA: Increase vm.boot_pages");
980 /*
981 * Now that we've booted reset these users to their real allocator.
982 */
983#ifdef UMA_MD_SMALL_ALLOC
984 keg->uk_allocf = (keg->uk_ppera > 1) ? page_alloc : uma_small_alloc;
985#else
986 keg->uk_allocf = page_alloc;
987#endif
988 return keg->uk_allocf(zone, bytes, pflag, wait);
989}
990
991/*
992 * Allocates a number of pages from the system
993 *
994 * Arguments:
995 * bytes The number of bytes requested
996 * wait Shall we wait?
997 *
998 * Returns:
999 * A pointer to the alloced memory or possibly
1000 * NULL if M_NOWAIT is set.
1001 */
1002static void *
1003page_alloc(uma_zone_t zone, int bytes, uint8_t *pflag, int wait)
1004{
1005 void *p; /* Returned page */
1006
1007 *pflag = UMA_SLAB_KMEM;
1008 p = (void *) kmem_malloc(kmem_map, bytes, wait);
1009
1010 return (p);
1011}
1012
1013/*
1014 * Allocates a number of pages from within an object
1015 *
1016 * Arguments:
1017 * bytes The number of bytes requested
1018 * wait Shall we wait?
1019 *
1020 * Returns:
1021 * A pointer to the alloced memory or possibly
1022 * NULL if M_NOWAIT is set.
1023 */
1024static void *
1025noobj_alloc(uma_zone_t zone, int bytes, uint8_t *flags, int wait)
1026{
1027 TAILQ_HEAD(, vm_page) alloctail;
1028 u_long npages;
1029 vm_offset_t retkva, zkva;
1030 vm_page_t p, p_next;
1031 uma_keg_t keg;
1032
1033 TAILQ_INIT(&alloctail);
1034 keg = zone_first_keg(zone);
1035
1036 npages = howmany(bytes, PAGE_SIZE);
1037 while (npages > 0) {
1038 p = vm_page_alloc(NULL, 0, VM_ALLOC_INTERRUPT |
1039 VM_ALLOC_WIRED | VM_ALLOC_NOOBJ);
1040 if (p != NULL) {
1041 /*
1042 * Since the page does not belong to an object, its
1043 * listq is unused.
1044 */
1045 TAILQ_INSERT_TAIL(&alloctail, p, listq);
1046 npages--;
1047 continue;
1048 }
1049 if (wait & M_WAITOK) {
1050 VM_WAIT;
1051 continue;
1052 }
1053
1054 /*
1055 * Page allocation failed, free intermediate pages and
1056 * exit.
1057 */
1058 TAILQ_FOREACH_SAFE(p, &alloctail, listq, p_next) {
1059 vm_page_unwire(p, 0);
1060 vm_page_free(p);
1061 }
1062 return (NULL);
1063 }
1064 *flags = UMA_SLAB_PRIV;
1065 zkva = keg->uk_kva +
1066 atomic_fetchadd_long(&keg->uk_offset, round_page(bytes));
1067 retkva = zkva;
1068 TAILQ_FOREACH(p, &alloctail, listq) {
1069 pmap_qenter(zkva, &p, 1);
1070 zkva += PAGE_SIZE;
1071 }
1072
1073 return ((void *)retkva);
1074}
1075
1076/*
1077 * Frees a number of pages to the system
1078 *
1079 * Arguments:
1080 * mem A pointer to the memory to be freed
1081 * size The size of the memory being freed
1082 * flags The original p->us_flags field
1083 *
1084 * Returns:
1085 * Nothing
1086 */
1087static void
1088page_free(void *mem, int size, uint8_t flags)
1089{
1090 vm_map_t map;
1091
1092 if (flags & UMA_SLAB_KMEM)
1093 map = kmem_map;
1094 else if (flags & UMA_SLAB_KERNEL)
1095 map = kernel_map;
1096 else
1097 panic("UMA: page_free used with invalid flags %d", flags);
1098
1099 kmem_free(map, (vm_offset_t)mem, size);
1100}
1101
1102/*
1103 * Zero fill initializer
1104 *
1105 * Arguments/Returns follow uma_init specifications
1106 */
1107static int
1108zero_init(void *mem, int size, int flags)
1109{
1110 bzero(mem, size);
1111 return (0);
1112}
1113
1114/*
1115 * Finish creating a small uma keg. This calculates ipers, and the keg size.
1116 *
1117 * Arguments
1118 * keg The zone we should initialize
1119 *
1120 * Returns
1121 * Nothing
1122 */
1123static void
1124keg_small_init(uma_keg_t keg)
1125{
1126 u_int rsize;
1127 u_int memused;
1128 u_int wastedspace;
1129 u_int shsize;
1130
1131 if (keg->uk_flags & UMA_ZONE_PCPU) {
1132 KASSERT(mp_ncpus > 0, ("%s: ncpus %d\n", __func__, mp_ncpus));
1133 keg->uk_slabsize = sizeof(struct pcpu);
1134 keg->uk_ppera = howmany(mp_ncpus * sizeof(struct pcpu),
1135 PAGE_SIZE);
1136 } else {
1137 keg->uk_slabsize = UMA_SLAB_SIZE;
1138 keg->uk_ppera = 1;
1139 }
1140
1141 /*
1142 * Calculate the size of each allocation (rsize) according to
1143 * alignment. If the requested size is smaller than we have
1144 * allocation bits for we round it up.
1145 */
1146 rsize = keg->uk_size;
1147 if (rsize < keg->uk_slabsize / SLAB_SETSIZE)
1148 rsize = keg->uk_slabsize / SLAB_SETSIZE;
1149 if (rsize & keg->uk_align)
1150 rsize = (rsize & ~keg->uk_align) + (keg->uk_align + 1);
1151 keg->uk_rsize = rsize;
1152
1153 KASSERT((keg->uk_flags & UMA_ZONE_PCPU) == 0 ||
1154 keg->uk_rsize < sizeof(struct pcpu),
1155 ("%s: size %u too large", __func__, keg->uk_rsize));
1156
1157 if (keg->uk_flags & UMA_ZONE_REFCNT)
1158 rsize += sizeof(uint32_t);
1159
1160 if (keg->uk_flags & UMA_ZONE_OFFPAGE)
1161 shsize = 0;
1162 else
1163 shsize = sizeof(struct uma_slab);
1164
1165 keg->uk_ipers = (keg->uk_slabsize - shsize) / rsize;
1166 KASSERT(keg->uk_ipers > 0 && keg->uk_ipers <= SLAB_SETSIZE,
1167 ("%s: keg->uk_ipers %u", __func__, keg->uk_ipers));
1168
1169 memused = keg->uk_ipers * rsize + shsize;
1170 wastedspace = keg->uk_slabsize - memused;
1171
1172 /*
1173 * We can't do OFFPAGE if we're internal or if we've been
1174 * asked to not go to the VM for buckets. If we do this we
1175 * may end up going to the VM (kmem_map) for slabs which we
1176 * do not want to do if we're UMA_ZFLAG_CACHEONLY as a
1177 * result of UMA_ZONE_VM, which clearly forbids it.
1178 */
1179 if ((keg->uk_flags & UMA_ZFLAG_INTERNAL) ||
1180 (keg->uk_flags & UMA_ZFLAG_CACHEONLY))
1181 return;
1182
1183 /*
1184 * See if using an OFFPAGE slab will limit our waste. Only do
1185 * this if it permits more items per-slab.
1186 *
1187 * XXX We could try growing slabsize to limit max waste as well.
1188 * Historically this was not done because the VM could not
1189 * efficiently handle contiguous allocations.
1190 */
1191 if ((wastedspace >= keg->uk_slabsize / UMA_MAX_WASTE) &&
1192 (keg->uk_ipers < (keg->uk_slabsize / keg->uk_rsize))) {
1193 keg->uk_ipers = keg->uk_slabsize / keg->uk_rsize;
1194 KASSERT(keg->uk_ipers > 0 && keg->uk_ipers <= SLAB_SETSIZE,
1195 ("%s: keg->uk_ipers %u", __func__, keg->uk_ipers));
1196#ifdef UMA_DEBUG
1197 printf("UMA decided we need offpage slab headers for "
1198 "keg: %s, calculated wastedspace = %d, "
1199 "maximum wasted space allowed = %d, "
1200 "calculated ipers = %d, "
1201 "new wasted space = %d\n", keg->uk_name, wastedspace,
1202 keg->uk_slabsize / UMA_MAX_WASTE, keg->uk_ipers,
1203 keg->uk_slabsize - keg->uk_ipers * keg->uk_rsize);
1204#endif
1205 keg->uk_flags |= UMA_ZONE_OFFPAGE;
1206 }
1207
1208 if ((keg->uk_flags & UMA_ZONE_OFFPAGE) &&
1209 (keg->uk_flags & UMA_ZONE_VTOSLAB) == 0)
1210 keg->uk_flags |= UMA_ZONE_HASH;
1211}
1212
1213/*
1214 * Finish creating a large (> UMA_SLAB_SIZE) uma kegs. Just give in and do
1215 * OFFPAGE for now. When I can allow for more dynamic slab sizes this will be
1216 * more complicated.
1217 *
1218 * Arguments
1219 * keg The keg we should initialize
1220 *
1221 * Returns
1222 * Nothing
1223 */
1224static void
1225keg_large_init(uma_keg_t keg)
1226{
1227
1228 KASSERT(keg != NULL, ("Keg is null in keg_large_init"));
1229 KASSERT((keg->uk_flags & UMA_ZFLAG_CACHEONLY) == 0,
1230 ("keg_large_init: Cannot large-init a UMA_ZFLAG_CACHEONLY keg"));
1231 KASSERT((keg->uk_flags & UMA_ZONE_PCPU) == 0,
1232 ("%s: Cannot large-init a UMA_ZONE_PCPU keg", __func__));
1233
1234 keg->uk_ppera = howmany(keg->uk_size, PAGE_SIZE);
1235 keg->uk_slabsize = keg->uk_ppera * PAGE_SIZE;
1236 keg->uk_ipers = 1;
1237 keg->uk_rsize = keg->uk_size;
1238
1239 /* We can't do OFFPAGE if we're internal, bail out here. */
1240 if (keg->uk_flags & UMA_ZFLAG_INTERNAL)
1241 return;
1242
1243 keg->uk_flags |= UMA_ZONE_OFFPAGE;
1244 if ((keg->uk_flags & UMA_ZONE_VTOSLAB) == 0)
1245 keg->uk_flags |= UMA_ZONE_HASH;
1246}
1247
1248static void
1249keg_cachespread_init(uma_keg_t keg)
1250{
1251 int alignsize;
1252 int trailer;
1253 int pages;
1254 int rsize;
1255
1256 KASSERT((keg->uk_flags & UMA_ZONE_PCPU) == 0,
1257 ("%s: Cannot cachespread-init a UMA_ZONE_PCPU keg", __func__));
1258
1259 alignsize = keg->uk_align + 1;
1260 rsize = keg->uk_size;
1261 /*
1262 * We want one item to start on every align boundary in a page. To
1263 * do this we will span pages. We will also extend the item by the
1264 * size of align if it is an even multiple of align. Otherwise, it
1265 * would fall on the same boundary every time.
1266 */
1267 if (rsize & keg->uk_align)
1268 rsize = (rsize & ~keg->uk_align) + alignsize;
1269 if ((rsize & alignsize) == 0)
1270 rsize += alignsize;
1271 trailer = rsize - keg->uk_size;
1272 pages = (rsize * (PAGE_SIZE / alignsize)) / PAGE_SIZE;
1273 pages = MIN(pages, (128 * 1024) / PAGE_SIZE);
1274 keg->uk_rsize = rsize;
1275 keg->uk_ppera = pages;
1276 keg->uk_slabsize = UMA_SLAB_SIZE;
1277 keg->uk_ipers = ((pages * PAGE_SIZE) + trailer) / rsize;
1278 keg->uk_flags |= UMA_ZONE_OFFPAGE | UMA_ZONE_VTOSLAB;
1279 KASSERT(keg->uk_ipers <= uma_max_ipers,
1280 ("%s: keg->uk_ipers too high(%d) increase max_ipers", __func__,
1281 keg->uk_ipers));
1282}
1283
1284/*
1285 * Keg header ctor. This initializes all fields, locks, etc. And inserts
1286 * the keg onto the global keg list.
1287 *
1288 * Arguments/Returns follow uma_ctor specifications
1289 * udata Actually uma_kctor_args
1290 */
1291static int
1292keg_ctor(void *mem, int size, void *udata, int flags)
1293{
1294 struct uma_kctor_args *arg = udata;
1295 uma_keg_t keg = mem;
1296 uma_zone_t zone;
1297
1298 bzero(keg, size);
1299 keg->uk_size = arg->size;
1300 keg->uk_init = arg->uminit;
1301 keg->uk_fini = arg->fini;
1302 keg->uk_align = arg->align;
1303 keg->uk_free = 0;
1304 keg->uk_pages = 0;
1305 keg->uk_flags = arg->flags;
1306 keg->uk_allocf = page_alloc;
1307 keg->uk_freef = page_free;
1308 keg->uk_slabzone = NULL;
1309
1310 /*
1311 * The master zone is passed to us at keg-creation time.
1312 */
1313 zone = arg->zone;
1314 keg->uk_name = zone->uz_name;
1315
1316 if (arg->flags & UMA_ZONE_VM)
1317 keg->uk_flags |= UMA_ZFLAG_CACHEONLY;
1318
1319 if (arg->flags & UMA_ZONE_ZINIT)
1320 keg->uk_init = zero_init;
1321
1322 if (arg->flags & UMA_ZONE_REFCNT || arg->flags & UMA_ZONE_MALLOC)
1323 keg->uk_flags |= UMA_ZONE_VTOSLAB;
1324
1325 if (arg->flags & UMA_ZONE_PCPU)
1326#ifdef SMP
1327 keg->uk_flags |= UMA_ZONE_OFFPAGE;
1328#else
1329 keg->uk_flags &= ~UMA_ZONE_PCPU;
1330#endif
1331
1332 if (keg->uk_flags & UMA_ZONE_CACHESPREAD) {
1333 keg_cachespread_init(keg);
1334 } else if (keg->uk_flags & UMA_ZONE_REFCNT) {
1335 if (keg->uk_size >
1336 (UMA_SLAB_SIZE - sizeof(struct uma_slab_refcnt) -
1337 sizeof(uint32_t)))
1338 keg_large_init(keg);
1339 else
1340 keg_small_init(keg);
1341 } else {
1342 if (keg->uk_size > (UMA_SLAB_SIZE - sizeof(struct uma_slab)))
1343 keg_large_init(keg);
1344 else
1345 keg_small_init(keg);
1346 }
1347
1348 if (keg->uk_flags & UMA_ZONE_OFFPAGE) {
1349 if (keg->uk_flags & UMA_ZONE_REFCNT) {
1350 if (keg->uk_ipers > uma_max_ipers_ref)
1351 panic("Too many ref items per zone: %d > %d\n",
1352 keg->uk_ipers, uma_max_ipers_ref);
1353 keg->uk_slabzone = slabrefzone;
1354 } else
1355 keg->uk_slabzone = slabzone;
1356 }
1357
1358 /*
1359 * If we haven't booted yet we need allocations to go through the
1360 * startup cache until the vm is ready.
1361 */
1362 if (keg->uk_ppera == 1) {
1363#ifdef UMA_MD_SMALL_ALLOC
1364 keg->uk_allocf = uma_small_alloc;
1365 keg->uk_freef = uma_small_free;
1366
1367 if (booted < UMA_STARTUP)
1368 keg->uk_allocf = startup_alloc;
1369#else
1370 if (booted < UMA_STARTUP2)
1371 keg->uk_allocf = startup_alloc;
1372#endif
1373 } else if (booted < UMA_STARTUP2 &&
1374 (keg->uk_flags & UMA_ZFLAG_INTERNAL))
1375 keg->uk_allocf = startup_alloc;
1376
1377 /*
805 mtx_lock(&uma_mtx);
806 }
807 zone->uz_flags |= UMA_ZFLAG_DRAINING;
808 bucket_cache_drain(zone);
809 ZONE_UNLOCK(zone);
810 /*
811 * The DRAINING flag protects us from being freed while
812 * we're running. Normally the uma_mtx would protect us but we
813 * must be able to release and acquire the right lock for each keg.
814 */
815 zone_foreach_keg(zone, &keg_drain);
816 ZONE_LOCK(zone);
817 zone->uz_flags &= ~UMA_ZFLAG_DRAINING;
818 wakeup(zone);
819out:
820 ZONE_UNLOCK(zone);
821}
822
823void
824zone_drain(uma_zone_t zone)
825{
826
827 zone_drain_wait(zone, M_NOWAIT);
828}
829
830/*
831 * Allocate a new slab for a keg. This does not insert the slab onto a list.
832 *
833 * Arguments:
834 * wait Shall we wait?
835 *
836 * Returns:
837 * The slab that was allocated or NULL if there is no memory and the
838 * caller specified M_NOWAIT.
839 */
840static uma_slab_t
841keg_alloc_slab(uma_keg_t keg, uma_zone_t zone, int wait)
842{
843 uma_slabrefcnt_t slabref;
844 uma_alloc allocf;
845 uma_slab_t slab;
846 uint8_t *mem;
847 uint8_t flags;
848 int i;
849
850 mtx_assert(&keg->uk_lock, MA_OWNED);
851 slab = NULL;
852 mem = NULL;
853
854#ifdef UMA_DEBUG
855 printf("alloc_slab: Allocating a new slab for %s\n", keg->uk_name);
856#endif
857 allocf = keg->uk_allocf;
858 KEG_UNLOCK(keg);
859
860 if (keg->uk_flags & UMA_ZONE_OFFPAGE) {
861 slab = zone_alloc_item(keg->uk_slabzone, NULL, wait);
862 if (slab == NULL)
863 goto out;
864 }
865
866 /*
867 * This reproduces the old vm_zone behavior of zero filling pages the
868 * first time they are added to a zone.
869 *
870 * Malloced items are zeroed in uma_zalloc.
871 */
872
873 if ((keg->uk_flags & UMA_ZONE_MALLOC) == 0)
874 wait |= M_ZERO;
875 else
876 wait &= ~M_ZERO;
877
878 if (keg->uk_flags & UMA_ZONE_NODUMP)
879 wait |= M_NODUMP;
880
881 /* zone is passed for legacy reasons. */
882 mem = allocf(zone, keg->uk_ppera * PAGE_SIZE, &flags, wait);
883 if (mem == NULL) {
884 if (keg->uk_flags & UMA_ZONE_OFFPAGE)
885 zone_free_item(keg->uk_slabzone, slab, NULL, SKIP_NONE);
886 slab = NULL;
887 goto out;
888 }
889
890 /* Point the slab into the allocated memory */
891 if (!(keg->uk_flags & UMA_ZONE_OFFPAGE))
892 slab = (uma_slab_t )(mem + keg->uk_pgoff);
893
894 if (keg->uk_flags & UMA_ZONE_VTOSLAB)
895 for (i = 0; i < keg->uk_ppera; i++)
896 vsetslab((vm_offset_t)mem + (i * PAGE_SIZE), slab);
897
898 slab->us_keg = keg;
899 slab->us_data = mem;
900 slab->us_freecount = keg->uk_ipers;
901 slab->us_flags = flags;
902 BIT_FILL(SLAB_SETSIZE, &slab->us_free);
903#ifdef INVARIANTS
904 BIT_ZERO(SLAB_SETSIZE, &slab->us_debugfree);
905#endif
906 if (keg->uk_flags & UMA_ZONE_REFCNT) {
907 slabref = (uma_slabrefcnt_t)slab;
908 for (i = 0; i < keg->uk_ipers; i++)
909 slabref->us_refcnt[i] = 0;
910 }
911
912 if (keg->uk_init != NULL) {
913 for (i = 0; i < keg->uk_ipers; i++)
914 if (keg->uk_init(slab->us_data + (keg->uk_rsize * i),
915 keg->uk_size, wait) != 0)
916 break;
917 if (i != keg->uk_ipers) {
918 keg_free_slab(keg, slab, i);
919 slab = NULL;
920 goto out;
921 }
922 }
923out:
924 KEG_LOCK(keg);
925
926 if (slab != NULL) {
927 if (keg->uk_flags & UMA_ZONE_HASH)
928 UMA_HASH_INSERT(&keg->uk_hash, slab, mem);
929
930 keg->uk_pages += keg->uk_ppera;
931 keg->uk_free += keg->uk_ipers;
932 }
933
934 return (slab);
935}
936
937/*
938 * This function is intended to be used early on in place of page_alloc() so
939 * that we may use the boot time page cache to satisfy allocations before
940 * the VM is ready.
941 */
942static void *
943startup_alloc(uma_zone_t zone, int bytes, uint8_t *pflag, int wait)
944{
945 uma_keg_t keg;
946 uma_slab_t tmps;
947 int pages, check_pages;
948
949 keg = zone_first_keg(zone);
950 pages = howmany(bytes, PAGE_SIZE);
951 check_pages = pages - 1;
952 KASSERT(pages > 0, ("startup_alloc can't reserve 0 pages\n"));
953
954 /*
955 * Check our small startup cache to see if it has pages remaining.
956 */
957 mtx_lock(&uma_boot_pages_mtx);
958
959 /* First check if we have enough room. */
960 tmps = LIST_FIRST(&uma_boot_pages);
961 while (tmps != NULL && check_pages-- > 0)
962 tmps = LIST_NEXT(tmps, us_link);
963 if (tmps != NULL) {
964 /*
965 * It's ok to lose tmps references. The last one will
966 * have tmps->us_data pointing to the start address of
967 * "pages" contiguous pages of memory.
968 */
969 while (pages-- > 0) {
970 tmps = LIST_FIRST(&uma_boot_pages);
971 LIST_REMOVE(tmps, us_link);
972 }
973 mtx_unlock(&uma_boot_pages_mtx);
974 *pflag = tmps->us_flags;
975 return (tmps->us_data);
976 }
977 mtx_unlock(&uma_boot_pages_mtx);
978 if (booted < UMA_STARTUP2)
979 panic("UMA: Increase vm.boot_pages");
980 /*
981 * Now that we've booted reset these users to their real allocator.
982 */
983#ifdef UMA_MD_SMALL_ALLOC
984 keg->uk_allocf = (keg->uk_ppera > 1) ? page_alloc : uma_small_alloc;
985#else
986 keg->uk_allocf = page_alloc;
987#endif
988 return keg->uk_allocf(zone, bytes, pflag, wait);
989}
990
991/*
992 * Allocates a number of pages from the system
993 *
994 * Arguments:
995 * bytes The number of bytes requested
996 * wait Shall we wait?
997 *
998 * Returns:
999 * A pointer to the alloced memory or possibly
1000 * NULL if M_NOWAIT is set.
1001 */
1002static void *
1003page_alloc(uma_zone_t zone, int bytes, uint8_t *pflag, int wait)
1004{
1005 void *p; /* Returned page */
1006
1007 *pflag = UMA_SLAB_KMEM;
1008 p = (void *) kmem_malloc(kmem_map, bytes, wait);
1009
1010 return (p);
1011}
1012
1013/*
1014 * Allocates a number of pages from within an object
1015 *
1016 * Arguments:
1017 * bytes The number of bytes requested
1018 * wait Shall we wait?
1019 *
1020 * Returns:
1021 * A pointer to the alloced memory or possibly
1022 * NULL if M_NOWAIT is set.
1023 */
1024static void *
1025noobj_alloc(uma_zone_t zone, int bytes, uint8_t *flags, int wait)
1026{
1027 TAILQ_HEAD(, vm_page) alloctail;
1028 u_long npages;
1029 vm_offset_t retkva, zkva;
1030 vm_page_t p, p_next;
1031 uma_keg_t keg;
1032
1033 TAILQ_INIT(&alloctail);
1034 keg = zone_first_keg(zone);
1035
1036 npages = howmany(bytes, PAGE_SIZE);
1037 while (npages > 0) {
1038 p = vm_page_alloc(NULL, 0, VM_ALLOC_INTERRUPT |
1039 VM_ALLOC_WIRED | VM_ALLOC_NOOBJ);
1040 if (p != NULL) {
1041 /*
1042 * Since the page does not belong to an object, its
1043 * listq is unused.
1044 */
1045 TAILQ_INSERT_TAIL(&alloctail, p, listq);
1046 npages--;
1047 continue;
1048 }
1049 if (wait & M_WAITOK) {
1050 VM_WAIT;
1051 continue;
1052 }
1053
1054 /*
1055 * Page allocation failed, free intermediate pages and
1056 * exit.
1057 */
1058 TAILQ_FOREACH_SAFE(p, &alloctail, listq, p_next) {
1059 vm_page_unwire(p, 0);
1060 vm_page_free(p);
1061 }
1062 return (NULL);
1063 }
1064 *flags = UMA_SLAB_PRIV;
1065 zkva = keg->uk_kva +
1066 atomic_fetchadd_long(&keg->uk_offset, round_page(bytes));
1067 retkva = zkva;
1068 TAILQ_FOREACH(p, &alloctail, listq) {
1069 pmap_qenter(zkva, &p, 1);
1070 zkva += PAGE_SIZE;
1071 }
1072
1073 return ((void *)retkva);
1074}
1075
1076/*
1077 * Frees a number of pages to the system
1078 *
1079 * Arguments:
1080 * mem A pointer to the memory to be freed
1081 * size The size of the memory being freed
1082 * flags The original p->us_flags field
1083 *
1084 * Returns:
1085 * Nothing
1086 */
1087static void
1088page_free(void *mem, int size, uint8_t flags)
1089{
1090 vm_map_t map;
1091
1092 if (flags & UMA_SLAB_KMEM)
1093 map = kmem_map;
1094 else if (flags & UMA_SLAB_KERNEL)
1095 map = kernel_map;
1096 else
1097 panic("UMA: page_free used with invalid flags %d", flags);
1098
1099 kmem_free(map, (vm_offset_t)mem, size);
1100}
1101
1102/*
1103 * Zero fill initializer
1104 *
1105 * Arguments/Returns follow uma_init specifications
1106 */
1107static int
1108zero_init(void *mem, int size, int flags)
1109{
1110 bzero(mem, size);
1111 return (0);
1112}
1113
1114/*
1115 * Finish creating a small uma keg. This calculates ipers, and the keg size.
1116 *
1117 * Arguments
1118 * keg The zone we should initialize
1119 *
1120 * Returns
1121 * Nothing
1122 */
1123static void
1124keg_small_init(uma_keg_t keg)
1125{
1126 u_int rsize;
1127 u_int memused;
1128 u_int wastedspace;
1129 u_int shsize;
1130
1131 if (keg->uk_flags & UMA_ZONE_PCPU) {
1132 KASSERT(mp_ncpus > 0, ("%s: ncpus %d\n", __func__, mp_ncpus));
1133 keg->uk_slabsize = sizeof(struct pcpu);
1134 keg->uk_ppera = howmany(mp_ncpus * sizeof(struct pcpu),
1135 PAGE_SIZE);
1136 } else {
1137 keg->uk_slabsize = UMA_SLAB_SIZE;
1138 keg->uk_ppera = 1;
1139 }
1140
1141 /*
1142 * Calculate the size of each allocation (rsize) according to
1143 * alignment. If the requested size is smaller than we have
1144 * allocation bits for we round it up.
1145 */
1146 rsize = keg->uk_size;
1147 if (rsize < keg->uk_slabsize / SLAB_SETSIZE)
1148 rsize = keg->uk_slabsize / SLAB_SETSIZE;
1149 if (rsize & keg->uk_align)
1150 rsize = (rsize & ~keg->uk_align) + (keg->uk_align + 1);
1151 keg->uk_rsize = rsize;
1152
1153 KASSERT((keg->uk_flags & UMA_ZONE_PCPU) == 0 ||
1154 keg->uk_rsize < sizeof(struct pcpu),
1155 ("%s: size %u too large", __func__, keg->uk_rsize));
1156
1157 if (keg->uk_flags & UMA_ZONE_REFCNT)
1158 rsize += sizeof(uint32_t);
1159
1160 if (keg->uk_flags & UMA_ZONE_OFFPAGE)
1161 shsize = 0;
1162 else
1163 shsize = sizeof(struct uma_slab);
1164
1165 keg->uk_ipers = (keg->uk_slabsize - shsize) / rsize;
1166 KASSERT(keg->uk_ipers > 0 && keg->uk_ipers <= SLAB_SETSIZE,
1167 ("%s: keg->uk_ipers %u", __func__, keg->uk_ipers));
1168
1169 memused = keg->uk_ipers * rsize + shsize;
1170 wastedspace = keg->uk_slabsize - memused;
1171
1172 /*
1173 * We can't do OFFPAGE if we're internal or if we've been
1174 * asked to not go to the VM for buckets. If we do this we
1175 * may end up going to the VM (kmem_map) for slabs which we
1176 * do not want to do if we're UMA_ZFLAG_CACHEONLY as a
1177 * result of UMA_ZONE_VM, which clearly forbids it.
1178 */
1179 if ((keg->uk_flags & UMA_ZFLAG_INTERNAL) ||
1180 (keg->uk_flags & UMA_ZFLAG_CACHEONLY))
1181 return;
1182
1183 /*
1184 * See if using an OFFPAGE slab will limit our waste. Only do
1185 * this if it permits more items per-slab.
1186 *
1187 * XXX We could try growing slabsize to limit max waste as well.
1188 * Historically this was not done because the VM could not
1189 * efficiently handle contiguous allocations.
1190 */
1191 if ((wastedspace >= keg->uk_slabsize / UMA_MAX_WASTE) &&
1192 (keg->uk_ipers < (keg->uk_slabsize / keg->uk_rsize))) {
1193 keg->uk_ipers = keg->uk_slabsize / keg->uk_rsize;
1194 KASSERT(keg->uk_ipers > 0 && keg->uk_ipers <= SLAB_SETSIZE,
1195 ("%s: keg->uk_ipers %u", __func__, keg->uk_ipers));
1196#ifdef UMA_DEBUG
1197 printf("UMA decided we need offpage slab headers for "
1198 "keg: %s, calculated wastedspace = %d, "
1199 "maximum wasted space allowed = %d, "
1200 "calculated ipers = %d, "
1201 "new wasted space = %d\n", keg->uk_name, wastedspace,
1202 keg->uk_slabsize / UMA_MAX_WASTE, keg->uk_ipers,
1203 keg->uk_slabsize - keg->uk_ipers * keg->uk_rsize);
1204#endif
1205 keg->uk_flags |= UMA_ZONE_OFFPAGE;
1206 }
1207
1208 if ((keg->uk_flags & UMA_ZONE_OFFPAGE) &&
1209 (keg->uk_flags & UMA_ZONE_VTOSLAB) == 0)
1210 keg->uk_flags |= UMA_ZONE_HASH;
1211}
1212
1213/*
1214 * Finish creating a large (> UMA_SLAB_SIZE) uma kegs. Just give in and do
1215 * OFFPAGE for now. When I can allow for more dynamic slab sizes this will be
1216 * more complicated.
1217 *
1218 * Arguments
1219 * keg The keg we should initialize
1220 *
1221 * Returns
1222 * Nothing
1223 */
1224static void
1225keg_large_init(uma_keg_t keg)
1226{
1227
1228 KASSERT(keg != NULL, ("Keg is null in keg_large_init"));
1229 KASSERT((keg->uk_flags & UMA_ZFLAG_CACHEONLY) == 0,
1230 ("keg_large_init: Cannot large-init a UMA_ZFLAG_CACHEONLY keg"));
1231 KASSERT((keg->uk_flags & UMA_ZONE_PCPU) == 0,
1232 ("%s: Cannot large-init a UMA_ZONE_PCPU keg", __func__));
1233
1234 keg->uk_ppera = howmany(keg->uk_size, PAGE_SIZE);
1235 keg->uk_slabsize = keg->uk_ppera * PAGE_SIZE;
1236 keg->uk_ipers = 1;
1237 keg->uk_rsize = keg->uk_size;
1238
1239 /* We can't do OFFPAGE if we're internal, bail out here. */
1240 if (keg->uk_flags & UMA_ZFLAG_INTERNAL)
1241 return;
1242
1243 keg->uk_flags |= UMA_ZONE_OFFPAGE;
1244 if ((keg->uk_flags & UMA_ZONE_VTOSLAB) == 0)
1245 keg->uk_flags |= UMA_ZONE_HASH;
1246}
1247
1248static void
1249keg_cachespread_init(uma_keg_t keg)
1250{
1251 int alignsize;
1252 int trailer;
1253 int pages;
1254 int rsize;
1255
1256 KASSERT((keg->uk_flags & UMA_ZONE_PCPU) == 0,
1257 ("%s: Cannot cachespread-init a UMA_ZONE_PCPU keg", __func__));
1258
1259 alignsize = keg->uk_align + 1;
1260 rsize = keg->uk_size;
1261 /*
1262 * We want one item to start on every align boundary in a page. To
1263 * do this we will span pages. We will also extend the item by the
1264 * size of align if it is an even multiple of align. Otherwise, it
1265 * would fall on the same boundary every time.
1266 */
1267 if (rsize & keg->uk_align)
1268 rsize = (rsize & ~keg->uk_align) + alignsize;
1269 if ((rsize & alignsize) == 0)
1270 rsize += alignsize;
1271 trailer = rsize - keg->uk_size;
1272 pages = (rsize * (PAGE_SIZE / alignsize)) / PAGE_SIZE;
1273 pages = MIN(pages, (128 * 1024) / PAGE_SIZE);
1274 keg->uk_rsize = rsize;
1275 keg->uk_ppera = pages;
1276 keg->uk_slabsize = UMA_SLAB_SIZE;
1277 keg->uk_ipers = ((pages * PAGE_SIZE) + trailer) / rsize;
1278 keg->uk_flags |= UMA_ZONE_OFFPAGE | UMA_ZONE_VTOSLAB;
1279 KASSERT(keg->uk_ipers <= uma_max_ipers,
1280 ("%s: keg->uk_ipers too high(%d) increase max_ipers", __func__,
1281 keg->uk_ipers));
1282}
1283
1284/*
1285 * Keg header ctor. This initializes all fields, locks, etc. And inserts
1286 * the keg onto the global keg list.
1287 *
1288 * Arguments/Returns follow uma_ctor specifications
1289 * udata Actually uma_kctor_args
1290 */
1291static int
1292keg_ctor(void *mem, int size, void *udata, int flags)
1293{
1294 struct uma_kctor_args *arg = udata;
1295 uma_keg_t keg = mem;
1296 uma_zone_t zone;
1297
1298 bzero(keg, size);
1299 keg->uk_size = arg->size;
1300 keg->uk_init = arg->uminit;
1301 keg->uk_fini = arg->fini;
1302 keg->uk_align = arg->align;
1303 keg->uk_free = 0;
1304 keg->uk_pages = 0;
1305 keg->uk_flags = arg->flags;
1306 keg->uk_allocf = page_alloc;
1307 keg->uk_freef = page_free;
1308 keg->uk_slabzone = NULL;
1309
1310 /*
1311 * The master zone is passed to us at keg-creation time.
1312 */
1313 zone = arg->zone;
1314 keg->uk_name = zone->uz_name;
1315
1316 if (arg->flags & UMA_ZONE_VM)
1317 keg->uk_flags |= UMA_ZFLAG_CACHEONLY;
1318
1319 if (arg->flags & UMA_ZONE_ZINIT)
1320 keg->uk_init = zero_init;
1321
1322 if (arg->flags & UMA_ZONE_REFCNT || arg->flags & UMA_ZONE_MALLOC)
1323 keg->uk_flags |= UMA_ZONE_VTOSLAB;
1324
1325 if (arg->flags & UMA_ZONE_PCPU)
1326#ifdef SMP
1327 keg->uk_flags |= UMA_ZONE_OFFPAGE;
1328#else
1329 keg->uk_flags &= ~UMA_ZONE_PCPU;
1330#endif
1331
1332 if (keg->uk_flags & UMA_ZONE_CACHESPREAD) {
1333 keg_cachespread_init(keg);
1334 } else if (keg->uk_flags & UMA_ZONE_REFCNT) {
1335 if (keg->uk_size >
1336 (UMA_SLAB_SIZE - sizeof(struct uma_slab_refcnt) -
1337 sizeof(uint32_t)))
1338 keg_large_init(keg);
1339 else
1340 keg_small_init(keg);
1341 } else {
1342 if (keg->uk_size > (UMA_SLAB_SIZE - sizeof(struct uma_slab)))
1343 keg_large_init(keg);
1344 else
1345 keg_small_init(keg);
1346 }
1347
1348 if (keg->uk_flags & UMA_ZONE_OFFPAGE) {
1349 if (keg->uk_flags & UMA_ZONE_REFCNT) {
1350 if (keg->uk_ipers > uma_max_ipers_ref)
1351 panic("Too many ref items per zone: %d > %d\n",
1352 keg->uk_ipers, uma_max_ipers_ref);
1353 keg->uk_slabzone = slabrefzone;
1354 } else
1355 keg->uk_slabzone = slabzone;
1356 }
1357
1358 /*
1359 * If we haven't booted yet we need allocations to go through the
1360 * startup cache until the vm is ready.
1361 */
1362 if (keg->uk_ppera == 1) {
1363#ifdef UMA_MD_SMALL_ALLOC
1364 keg->uk_allocf = uma_small_alloc;
1365 keg->uk_freef = uma_small_free;
1366
1367 if (booted < UMA_STARTUP)
1368 keg->uk_allocf = startup_alloc;
1369#else
1370 if (booted < UMA_STARTUP2)
1371 keg->uk_allocf = startup_alloc;
1372#endif
1373 } else if (booted < UMA_STARTUP2 &&
1374 (keg->uk_flags & UMA_ZFLAG_INTERNAL))
1375 keg->uk_allocf = startup_alloc;
1376
1377 /*
1378 * Initialize keg's lock (shared among zones).
1378 * Initialize keg's lock
1379 */
1379 */
1380 if (arg->flags & UMA_ZONE_MTXCLASS)
1381 KEG_LOCK_INIT(keg, 1);
1382 else
1383 KEG_LOCK_INIT(keg, 0);
1380 KEG_LOCK_INIT(keg, (arg->flags & UMA_ZONE_MTXCLASS));
1384
1385 /*
1386 * If we're putting the slab header in the actual page we need to
1387 * figure out where in each page it goes. This calculates a right
1388 * justified offset into the memory on an ALIGN_PTR boundary.
1389 */
1390 if (!(keg->uk_flags & UMA_ZONE_OFFPAGE)) {
1391 u_int totsize;
1392
1393 /* Size of the slab struct and free list */
1394 totsize = sizeof(struct uma_slab);
1395
1396 /* Size of the reference counts. */
1397 if (keg->uk_flags & UMA_ZONE_REFCNT)
1398 totsize += keg->uk_ipers * sizeof(uint32_t);
1399
1400 if (totsize & UMA_ALIGN_PTR)
1401 totsize = (totsize & ~UMA_ALIGN_PTR) +
1402 (UMA_ALIGN_PTR + 1);
1403 keg->uk_pgoff = (PAGE_SIZE * keg->uk_ppera) - totsize;
1404
1405 /*
1406 * The only way the following is possible is if with our
1407 * UMA_ALIGN_PTR adjustments we are now bigger than
1408 * UMA_SLAB_SIZE. I haven't checked whether this is
1409 * mathematically possible for all cases, so we make
1410 * sure here anyway.
1411 */
1412 totsize = keg->uk_pgoff + sizeof(struct uma_slab);
1413 if (keg->uk_flags & UMA_ZONE_REFCNT)
1414 totsize += keg->uk_ipers * sizeof(uint32_t);
1415 if (totsize > PAGE_SIZE * keg->uk_ppera) {
1416 printf("zone %s ipers %d rsize %d size %d\n",
1417 zone->uz_name, keg->uk_ipers, keg->uk_rsize,
1418 keg->uk_size);
1419 panic("UMA slab won't fit.");
1420 }
1421 }
1422
1423 if (keg->uk_flags & UMA_ZONE_HASH)
1424 hash_alloc(&keg->uk_hash);
1425
1426#ifdef UMA_DEBUG
1427 printf("UMA: %s(%p) size %d(%d) flags %#x ipers %d ppera %d out %d free %d\n",
1428 zone->uz_name, zone, keg->uk_size, keg->uk_rsize, keg->uk_flags,
1429 keg->uk_ipers, keg->uk_ppera,
1430 (keg->uk_ipers * keg->uk_pages) - keg->uk_free, keg->uk_free);
1431#endif
1432
1433 LIST_INSERT_HEAD(&keg->uk_zones, zone, uz_link);
1434
1435 mtx_lock(&uma_mtx);
1436 LIST_INSERT_HEAD(&uma_kegs, keg, uk_link);
1437 mtx_unlock(&uma_mtx);
1438 return (0);
1439}
1440
1441/*
1442 * Zone header ctor. This initializes all fields, locks, etc.
1443 *
1444 * Arguments/Returns follow uma_ctor specifications
1445 * udata Actually uma_zctor_args
1446 */
1447static int
1448zone_ctor(void *mem, int size, void *udata, int flags)
1449{
1450 struct uma_zctor_args *arg = udata;
1451 uma_zone_t zone = mem;
1452 uma_zone_t z;
1453 uma_keg_t keg;
1454
1455 bzero(zone, size);
1456 zone->uz_name = arg->name;
1457 zone->uz_ctor = arg->ctor;
1458 zone->uz_dtor = arg->dtor;
1459 zone->uz_slab = zone_fetch_slab;
1460 zone->uz_init = NULL;
1461 zone->uz_fini = NULL;
1462 zone->uz_allocs = 0;
1463 zone->uz_frees = 0;
1464 zone->uz_fails = 0;
1465 zone->uz_sleeps = 0;
1466 zone->uz_count = 0;
1467 zone->uz_flags = 0;
1468 zone->uz_warning = NULL;
1469 timevalclear(&zone->uz_ratecheck);
1470 keg = arg->keg;
1471
1381
1382 /*
1383 * If we're putting the slab header in the actual page we need to
1384 * figure out where in each page it goes. This calculates a right
1385 * justified offset into the memory on an ALIGN_PTR boundary.
1386 */
1387 if (!(keg->uk_flags & UMA_ZONE_OFFPAGE)) {
1388 u_int totsize;
1389
1390 /* Size of the slab struct and free list */
1391 totsize = sizeof(struct uma_slab);
1392
1393 /* Size of the reference counts. */
1394 if (keg->uk_flags & UMA_ZONE_REFCNT)
1395 totsize += keg->uk_ipers * sizeof(uint32_t);
1396
1397 if (totsize & UMA_ALIGN_PTR)
1398 totsize = (totsize & ~UMA_ALIGN_PTR) +
1399 (UMA_ALIGN_PTR + 1);
1400 keg->uk_pgoff = (PAGE_SIZE * keg->uk_ppera) - totsize;
1401
1402 /*
1403 * The only way the following is possible is if with our
1404 * UMA_ALIGN_PTR adjustments we are now bigger than
1405 * UMA_SLAB_SIZE. I haven't checked whether this is
1406 * mathematically possible for all cases, so we make
1407 * sure here anyway.
1408 */
1409 totsize = keg->uk_pgoff + sizeof(struct uma_slab);
1410 if (keg->uk_flags & UMA_ZONE_REFCNT)
1411 totsize += keg->uk_ipers * sizeof(uint32_t);
1412 if (totsize > PAGE_SIZE * keg->uk_ppera) {
1413 printf("zone %s ipers %d rsize %d size %d\n",
1414 zone->uz_name, keg->uk_ipers, keg->uk_rsize,
1415 keg->uk_size);
1416 panic("UMA slab won't fit.");
1417 }
1418 }
1419
1420 if (keg->uk_flags & UMA_ZONE_HASH)
1421 hash_alloc(&keg->uk_hash);
1422
1423#ifdef UMA_DEBUG
1424 printf("UMA: %s(%p) size %d(%d) flags %#x ipers %d ppera %d out %d free %d\n",
1425 zone->uz_name, zone, keg->uk_size, keg->uk_rsize, keg->uk_flags,
1426 keg->uk_ipers, keg->uk_ppera,
1427 (keg->uk_ipers * keg->uk_pages) - keg->uk_free, keg->uk_free);
1428#endif
1429
1430 LIST_INSERT_HEAD(&keg->uk_zones, zone, uz_link);
1431
1432 mtx_lock(&uma_mtx);
1433 LIST_INSERT_HEAD(&uma_kegs, keg, uk_link);
1434 mtx_unlock(&uma_mtx);
1435 return (0);
1436}
1437
1438/*
1439 * Zone header ctor. This initializes all fields, locks, etc.
1440 *
1441 * Arguments/Returns follow uma_ctor specifications
1442 * udata Actually uma_zctor_args
1443 */
1444static int
1445zone_ctor(void *mem, int size, void *udata, int flags)
1446{
1447 struct uma_zctor_args *arg = udata;
1448 uma_zone_t zone = mem;
1449 uma_zone_t z;
1450 uma_keg_t keg;
1451
1452 bzero(zone, size);
1453 zone->uz_name = arg->name;
1454 zone->uz_ctor = arg->ctor;
1455 zone->uz_dtor = arg->dtor;
1456 zone->uz_slab = zone_fetch_slab;
1457 zone->uz_init = NULL;
1458 zone->uz_fini = NULL;
1459 zone->uz_allocs = 0;
1460 zone->uz_frees = 0;
1461 zone->uz_fails = 0;
1462 zone->uz_sleeps = 0;
1463 zone->uz_count = 0;
1464 zone->uz_flags = 0;
1465 zone->uz_warning = NULL;
1466 timevalclear(&zone->uz_ratecheck);
1467 keg = arg->keg;
1468
1469 ZONE_LOCK_INIT(zone, (arg->flags & UMA_ZONE_MTXCLASS));
1470
1472 /*
1473 * This is a pure cache zone, no kegs.
1474 */
1475 if (arg->import) {
1471 /*
1472 * This is a pure cache zone, no kegs.
1473 */
1474 if (arg->import) {
1475 zone->uz_size = arg->size;
1476 zone->uz_import = arg->import;
1477 zone->uz_release = arg->release;
1478 zone->uz_arg = arg->arg;
1476 zone->uz_import = arg->import;
1477 zone->uz_release = arg->release;
1478 zone->uz_arg = arg->arg;
1479 zone->uz_count = BUCKET_MAX;
1480 return (0);
1479 zone->uz_lockptr = &zone->uz_lock;
1480 goto out;
1481 }
1482
1483 /*
1484 * Use the regular zone/keg/slab allocator.
1485 */
1486 zone->uz_import = (uma_import)zone_import;
1487 zone->uz_release = (uma_release)zone_release;
1488 zone->uz_arg = zone;
1489
1490 if (arg->flags & UMA_ZONE_SECONDARY) {
1491 KASSERT(arg->keg != NULL, ("Secondary zone on zero'd keg"));
1492 zone->uz_init = arg->uminit;
1493 zone->uz_fini = arg->fini;
1481 }
1482
1483 /*
1484 * Use the regular zone/keg/slab allocator.
1485 */
1486 zone->uz_import = (uma_import)zone_import;
1487 zone->uz_release = (uma_release)zone_release;
1488 zone->uz_arg = zone;
1489
1490 if (arg->flags & UMA_ZONE_SECONDARY) {
1491 KASSERT(arg->keg != NULL, ("Secondary zone on zero'd keg"));
1492 zone->uz_init = arg->uminit;
1493 zone->uz_fini = arg->fini;
1494 zone->uz_lock = &keg->uk_lock;
1494 zone->uz_lockptr = &keg->uk_lock;
1495 zone->uz_flags |= UMA_ZONE_SECONDARY;
1496 mtx_lock(&uma_mtx);
1497 ZONE_LOCK(zone);
1498 LIST_FOREACH(z, &keg->uk_zones, uz_link) {
1499 if (LIST_NEXT(z, uz_link) == NULL) {
1500 LIST_INSERT_AFTER(z, zone, uz_link);
1501 break;
1502 }
1503 }
1504 ZONE_UNLOCK(zone);
1505 mtx_unlock(&uma_mtx);
1506 } else if (keg == NULL) {
1507 if ((keg = uma_kcreate(zone, arg->size, arg->uminit, arg->fini,
1508 arg->align, arg->flags)) == NULL)
1509 return (ENOMEM);
1510 } else {
1511 struct uma_kctor_args karg;
1512 int error;
1513
1514 /* We should only be here from uma_startup() */
1515 karg.size = arg->size;
1516 karg.uminit = arg->uminit;
1517 karg.fini = arg->fini;
1518 karg.align = arg->align;
1519 karg.flags = arg->flags;
1520 karg.zone = zone;
1521 error = keg_ctor(arg->keg, sizeof(struct uma_keg), &karg,
1522 flags);
1523 if (error)
1524 return (error);
1525 }
1526
1527 /*
1528 * Link in the first keg.
1529 */
1530 zone->uz_klink.kl_keg = keg;
1531 LIST_INSERT_HEAD(&zone->uz_kegs, &zone->uz_klink, kl_link);
1495 zone->uz_flags |= UMA_ZONE_SECONDARY;
1496 mtx_lock(&uma_mtx);
1497 ZONE_LOCK(zone);
1498 LIST_FOREACH(z, &keg->uk_zones, uz_link) {
1499 if (LIST_NEXT(z, uz_link) == NULL) {
1500 LIST_INSERT_AFTER(z, zone, uz_link);
1501 break;
1502 }
1503 }
1504 ZONE_UNLOCK(zone);
1505 mtx_unlock(&uma_mtx);
1506 } else if (keg == NULL) {
1507 if ((keg = uma_kcreate(zone, arg->size, arg->uminit, arg->fini,
1508 arg->align, arg->flags)) == NULL)
1509 return (ENOMEM);
1510 } else {
1511 struct uma_kctor_args karg;
1512 int error;
1513
1514 /* We should only be here from uma_startup() */
1515 karg.size = arg->size;
1516 karg.uminit = arg->uminit;
1517 karg.fini = arg->fini;
1518 karg.align = arg->align;
1519 karg.flags = arg->flags;
1520 karg.zone = zone;
1521 error = keg_ctor(arg->keg, sizeof(struct uma_keg), &karg,
1522 flags);
1523 if (error)
1524 return (error);
1525 }
1526
1527 /*
1528 * Link in the first keg.
1529 */
1530 zone->uz_klink.kl_keg = keg;
1531 LIST_INSERT_HEAD(&zone->uz_kegs, &zone->uz_klink, kl_link);
1532 zone->uz_lock = &keg->uk_lock;
1532 zone->uz_lockptr = &keg->uk_lock;
1533 zone->uz_size = keg->uk_size;
1534 zone->uz_flags |= (keg->uk_flags &
1535 (UMA_ZONE_INHERIT | UMA_ZFLAG_INHERIT));
1536
1537 /*
1538 * Some internal zones don't have room allocated for the per cpu
1539 * caches. If we're internal, bail out here.
1540 */
1541 if (keg->uk_flags & UMA_ZFLAG_INTERNAL) {
1542 KASSERT((zone->uz_flags & UMA_ZONE_SECONDARY) == 0,
1543 ("Secondary zone requested UMA_ZFLAG_INTERNAL"));
1544 return (0);
1545 }
1546
1533 zone->uz_size = keg->uk_size;
1534 zone->uz_flags |= (keg->uk_flags &
1535 (UMA_ZONE_INHERIT | UMA_ZFLAG_INHERIT));
1536
1537 /*
1538 * Some internal zones don't have room allocated for the per cpu
1539 * caches. If we're internal, bail out here.
1540 */
1541 if (keg->uk_flags & UMA_ZFLAG_INTERNAL) {
1542 KASSERT((zone->uz_flags & UMA_ZONE_SECONDARY) == 0,
1543 ("Secondary zone requested UMA_ZFLAG_INTERNAL"));
1544 return (0);
1545 }
1546
1547 if ((keg->uk_flags & UMA_ZONE_MAXBUCKET) == 0)
1548 zone->uz_count = bucket_select(keg->uk_rsize);
1547out:
1548 if ((arg->flags & UMA_ZONE_MAXBUCKET) == 0)
1549 zone->uz_count = bucket_select(zone->uz_size);
1549 else
1550 zone->uz_count = BUCKET_MAX;
1551
1552 return (0);
1553}
1554
1555/*
1556 * Keg header dtor. This frees all data, destroys locks, frees the hash
1557 * table and removes the keg from the global list.
1558 *
1559 * Arguments/Returns follow uma_dtor specifications
1560 * udata unused
1561 */
1562static void
1563keg_dtor(void *arg, int size, void *udata)
1564{
1565 uma_keg_t keg;
1566
1567 keg = (uma_keg_t)arg;
1568 KEG_LOCK(keg);
1569 if (keg->uk_free != 0) {
1570 printf("Freed UMA keg was not empty (%d items). "
1571 " Lost %d pages of memory.\n",
1572 keg->uk_free, keg->uk_pages);
1573 }
1574 KEG_UNLOCK(keg);
1575
1576 hash_free(&keg->uk_hash);
1577
1578 KEG_LOCK_FINI(keg);
1579}
1580
1581/*
1582 * Zone header dtor.
1583 *
1584 * Arguments/Returns follow uma_dtor specifications
1585 * udata unused
1586 */
1587static void
1588zone_dtor(void *arg, int size, void *udata)
1589{
1590 uma_klink_t klink;
1591 uma_zone_t zone;
1592 uma_keg_t keg;
1593
1594 zone = (uma_zone_t)arg;
1595 keg = zone_first_keg(zone);
1596
1597 if (!(zone->uz_flags & UMA_ZFLAG_INTERNAL))
1598 cache_drain(zone);
1599
1600 mtx_lock(&uma_mtx);
1601 LIST_REMOVE(zone, uz_link);
1602 mtx_unlock(&uma_mtx);
1603 /*
1604 * XXX there are some races here where
1605 * the zone can be drained but zone lock
1606 * released and then refilled before we
1607 * remove it... we dont care for now
1608 */
1609 zone_drain_wait(zone, M_WAITOK);
1610 /*
1611 * Unlink all of our kegs.
1612 */
1613 while ((klink = LIST_FIRST(&zone->uz_kegs)) != NULL) {
1614 klink->kl_keg = NULL;
1615 LIST_REMOVE(klink, kl_link);
1616 if (klink == &zone->uz_klink)
1617 continue;
1618 free(klink, M_TEMP);
1619 }
1620 /*
1621 * We only destroy kegs from non secondary zones.
1622 */
1623 if (keg != NULL && (zone->uz_flags & UMA_ZONE_SECONDARY) == 0) {
1624 mtx_lock(&uma_mtx);
1625 LIST_REMOVE(keg, uk_link);
1626 mtx_unlock(&uma_mtx);
1627 zone_free_item(kegs, keg, NULL, SKIP_NONE);
1628 }
1550 else
1551 zone->uz_count = BUCKET_MAX;
1552
1553 return (0);
1554}
1555
1556/*
1557 * Keg header dtor. This frees all data, destroys locks, frees the hash
1558 * table and removes the keg from the global list.
1559 *
1560 * Arguments/Returns follow uma_dtor specifications
1561 * udata unused
1562 */
1563static void
1564keg_dtor(void *arg, int size, void *udata)
1565{
1566 uma_keg_t keg;
1567
1568 keg = (uma_keg_t)arg;
1569 KEG_LOCK(keg);
1570 if (keg->uk_free != 0) {
1571 printf("Freed UMA keg was not empty (%d items). "
1572 " Lost %d pages of memory.\n",
1573 keg->uk_free, keg->uk_pages);
1574 }
1575 KEG_UNLOCK(keg);
1576
1577 hash_free(&keg->uk_hash);
1578
1579 KEG_LOCK_FINI(keg);
1580}
1581
1582/*
1583 * Zone header dtor.
1584 *
1585 * Arguments/Returns follow uma_dtor specifications
1586 * udata unused
1587 */
1588static void
1589zone_dtor(void *arg, int size, void *udata)
1590{
1591 uma_klink_t klink;
1592 uma_zone_t zone;
1593 uma_keg_t keg;
1594
1595 zone = (uma_zone_t)arg;
1596 keg = zone_first_keg(zone);
1597
1598 if (!(zone->uz_flags & UMA_ZFLAG_INTERNAL))
1599 cache_drain(zone);
1600
1601 mtx_lock(&uma_mtx);
1602 LIST_REMOVE(zone, uz_link);
1603 mtx_unlock(&uma_mtx);
1604 /*
1605 * XXX there are some races here where
1606 * the zone can be drained but zone lock
1607 * released and then refilled before we
1608 * remove it... we dont care for now
1609 */
1610 zone_drain_wait(zone, M_WAITOK);
1611 /*
1612 * Unlink all of our kegs.
1613 */
1614 while ((klink = LIST_FIRST(&zone->uz_kegs)) != NULL) {
1615 klink->kl_keg = NULL;
1616 LIST_REMOVE(klink, kl_link);
1617 if (klink == &zone->uz_klink)
1618 continue;
1619 free(klink, M_TEMP);
1620 }
1621 /*
1622 * We only destroy kegs from non secondary zones.
1623 */
1624 if (keg != NULL && (zone->uz_flags & UMA_ZONE_SECONDARY) == 0) {
1625 mtx_lock(&uma_mtx);
1626 LIST_REMOVE(keg, uk_link);
1627 mtx_unlock(&uma_mtx);
1628 zone_free_item(kegs, keg, NULL, SKIP_NONE);
1629 }
1630 ZONE_LOCK_FINI(zone);
1629}
1630
1631/*
1632 * Traverses every zone in the system and calls a callback
1633 *
1634 * Arguments:
1635 * zfunc A pointer to a function which accepts a zone
1636 * as an argument.
1637 *
1638 * Returns:
1639 * Nothing
1640 */
1641static void
1642zone_foreach(void (*zfunc)(uma_zone_t))
1643{
1644 uma_keg_t keg;
1645 uma_zone_t zone;
1646
1647 mtx_lock(&uma_mtx);
1648 LIST_FOREACH(keg, &uma_kegs, uk_link) {
1649 LIST_FOREACH(zone, &keg->uk_zones, uz_link)
1650 zfunc(zone);
1651 }
1652 mtx_unlock(&uma_mtx);
1653}
1654
1655/* Public functions */
1656/* See uma.h */
1657void
1658uma_startup(void *bootmem, int boot_pages)
1659{
1660 struct uma_zctor_args args;
1661 uma_slab_t slab;
1662 u_int slabsize;
1663 int i;
1664
1665#ifdef UMA_DEBUG
1666 printf("Creating uma keg headers zone and keg.\n");
1667#endif
1668 mtx_init(&uma_mtx, "UMA lock", NULL, MTX_DEF);
1669
1670 /* "manually" create the initial zone */
1671 memset(&args, 0, sizeof(args));
1672 args.name = "UMA Kegs";
1673 args.size = sizeof(struct uma_keg);
1674 args.ctor = keg_ctor;
1675 args.dtor = keg_dtor;
1676 args.uminit = zero_init;
1677 args.fini = NULL;
1678 args.keg = &masterkeg;
1679 args.align = 32 - 1;
1680 args.flags = UMA_ZFLAG_INTERNAL;
1681 /* The initial zone has no Per cpu queues so it's smaller */
1682 zone_ctor(kegs, sizeof(struct uma_zone), &args, M_WAITOK);
1683
1684#ifdef UMA_DEBUG
1685 printf("Filling boot free list.\n");
1686#endif
1687 for (i = 0; i < boot_pages; i++) {
1688 slab = (uma_slab_t)((uint8_t *)bootmem + (i * UMA_SLAB_SIZE));
1689 slab->us_data = (uint8_t *)slab;
1690 slab->us_flags = UMA_SLAB_BOOT;
1691 LIST_INSERT_HEAD(&uma_boot_pages, slab, us_link);
1692 }
1693 mtx_init(&uma_boot_pages_mtx, "UMA boot pages", NULL, MTX_DEF);
1694
1695#ifdef UMA_DEBUG
1696 printf("Creating uma zone headers zone and keg.\n");
1697#endif
1698 args.name = "UMA Zones";
1699 args.size = sizeof(struct uma_zone) +
1700 (sizeof(struct uma_cache) * (mp_maxid + 1));
1701 args.ctor = zone_ctor;
1702 args.dtor = zone_dtor;
1703 args.uminit = zero_init;
1704 args.fini = NULL;
1705 args.keg = NULL;
1706 args.align = 32 - 1;
1707 args.flags = UMA_ZFLAG_INTERNAL;
1708 /* The initial zone has no Per cpu queues so it's smaller */
1709 zone_ctor(zones, sizeof(struct uma_zone), &args, M_WAITOK);
1710
1711#ifdef UMA_DEBUG
1712 printf("Initializing pcpu cache locks.\n");
1713#endif
1714#ifdef UMA_DEBUG
1715 printf("Creating slab and hash zones.\n");
1716#endif
1717
1718 /* Now make a zone for slab headers */
1719 slabzone = uma_zcreate("UMA Slabs",
1720 sizeof(struct uma_slab),
1721 NULL, NULL, NULL, NULL,
1722 UMA_ALIGN_PTR, UMA_ZFLAG_INTERNAL);
1723
1724 /*
1725 * We also create a zone for the bigger slabs with reference
1726 * counts in them, to accomodate UMA_ZONE_REFCNT zones.
1727 */
1728 slabsize = sizeof(struct uma_slab_refcnt);
1729 slabsize += uma_max_ipers_ref * sizeof(uint32_t);
1730 slabrefzone = uma_zcreate("UMA RCntSlabs",
1731 slabsize,
1732 NULL, NULL, NULL, NULL,
1733 UMA_ALIGN_PTR,
1734 UMA_ZFLAG_INTERNAL);
1735
1736 hashzone = uma_zcreate("UMA Hash",
1737 sizeof(struct slabhead *) * UMA_HASH_SIZE_INIT,
1738 NULL, NULL, NULL, NULL,
1739 UMA_ALIGN_PTR, UMA_ZFLAG_INTERNAL);
1740
1741 bucket_init();
1742
1743 booted = UMA_STARTUP;
1744
1745#ifdef UMA_DEBUG
1746 printf("UMA startup complete.\n");
1747#endif
1748}
1749
1750/* see uma.h */
1751void
1752uma_startup2(void)
1753{
1754 booted = UMA_STARTUP2;
1755 bucket_enable();
1756#ifdef UMA_DEBUG
1757 printf("UMA startup2 complete.\n");
1758#endif
1759}
1760
1761/*
1762 * Initialize our callout handle
1763 *
1764 */
1765
1766static void
1767uma_startup3(void)
1768{
1769#ifdef UMA_DEBUG
1770 printf("Starting callout.\n");
1771#endif
1772 callout_init(&uma_callout, CALLOUT_MPSAFE);
1773 callout_reset(&uma_callout, UMA_TIMEOUT * hz, uma_timeout, NULL);
1774#ifdef UMA_DEBUG
1775 printf("UMA startup3 complete.\n");
1776#endif
1777}
1778
1779static uma_keg_t
1780uma_kcreate(uma_zone_t zone, size_t size, uma_init uminit, uma_fini fini,
1781 int align, uint32_t flags)
1782{
1783 struct uma_kctor_args args;
1784
1785 args.size = size;
1786 args.uminit = uminit;
1787 args.fini = fini;
1788 args.align = (align == UMA_ALIGN_CACHE) ? uma_align_cache : align;
1789 args.flags = flags;
1790 args.zone = zone;
1791 return (zone_alloc_item(kegs, &args, M_WAITOK));
1792}
1793
1794/* See uma.h */
1795void
1796uma_set_align(int align)
1797{
1798
1799 if (align != UMA_ALIGN_CACHE)
1800 uma_align_cache = align;
1801}
1802
1803/* See uma.h */
1804uma_zone_t
1805uma_zcreate(const char *name, size_t size, uma_ctor ctor, uma_dtor dtor,
1806 uma_init uminit, uma_fini fini, int align, uint32_t flags)
1807
1808{
1809 struct uma_zctor_args args;
1810
1811 /* This stuff is essential for the zone ctor */
1812 memset(&args, 0, sizeof(args));
1813 args.name = name;
1814 args.size = size;
1815 args.ctor = ctor;
1816 args.dtor = dtor;
1817 args.uminit = uminit;
1818 args.fini = fini;
1819 args.align = align;
1820 args.flags = flags;
1821 args.keg = NULL;
1822
1823 return (zone_alloc_item(zones, &args, M_WAITOK));
1824}
1825
1826/* See uma.h */
1827uma_zone_t
1828uma_zsecond_create(char *name, uma_ctor ctor, uma_dtor dtor,
1829 uma_init zinit, uma_fini zfini, uma_zone_t master)
1830{
1831 struct uma_zctor_args args;
1832 uma_keg_t keg;
1833
1834 keg = zone_first_keg(master);
1835 memset(&args, 0, sizeof(args));
1836 args.name = name;
1837 args.size = keg->uk_size;
1838 args.ctor = ctor;
1839 args.dtor = dtor;
1840 args.uminit = zinit;
1841 args.fini = zfini;
1842 args.align = keg->uk_align;
1843 args.flags = keg->uk_flags | UMA_ZONE_SECONDARY;
1844 args.keg = keg;
1845
1846 /* XXX Attaches only one keg of potentially many. */
1847 return (zone_alloc_item(zones, &args, M_WAITOK));
1848}
1849
1850/* See uma.h */
1851uma_zone_t
1631}
1632
1633/*
1634 * Traverses every zone in the system and calls a callback
1635 *
1636 * Arguments:
1637 * zfunc A pointer to a function which accepts a zone
1638 * as an argument.
1639 *
1640 * Returns:
1641 * Nothing
1642 */
1643static void
1644zone_foreach(void (*zfunc)(uma_zone_t))
1645{
1646 uma_keg_t keg;
1647 uma_zone_t zone;
1648
1649 mtx_lock(&uma_mtx);
1650 LIST_FOREACH(keg, &uma_kegs, uk_link) {
1651 LIST_FOREACH(zone, &keg->uk_zones, uz_link)
1652 zfunc(zone);
1653 }
1654 mtx_unlock(&uma_mtx);
1655}
1656
1657/* Public functions */
1658/* See uma.h */
1659void
1660uma_startup(void *bootmem, int boot_pages)
1661{
1662 struct uma_zctor_args args;
1663 uma_slab_t slab;
1664 u_int slabsize;
1665 int i;
1666
1667#ifdef UMA_DEBUG
1668 printf("Creating uma keg headers zone and keg.\n");
1669#endif
1670 mtx_init(&uma_mtx, "UMA lock", NULL, MTX_DEF);
1671
1672 /* "manually" create the initial zone */
1673 memset(&args, 0, sizeof(args));
1674 args.name = "UMA Kegs";
1675 args.size = sizeof(struct uma_keg);
1676 args.ctor = keg_ctor;
1677 args.dtor = keg_dtor;
1678 args.uminit = zero_init;
1679 args.fini = NULL;
1680 args.keg = &masterkeg;
1681 args.align = 32 - 1;
1682 args.flags = UMA_ZFLAG_INTERNAL;
1683 /* The initial zone has no Per cpu queues so it's smaller */
1684 zone_ctor(kegs, sizeof(struct uma_zone), &args, M_WAITOK);
1685
1686#ifdef UMA_DEBUG
1687 printf("Filling boot free list.\n");
1688#endif
1689 for (i = 0; i < boot_pages; i++) {
1690 slab = (uma_slab_t)((uint8_t *)bootmem + (i * UMA_SLAB_SIZE));
1691 slab->us_data = (uint8_t *)slab;
1692 slab->us_flags = UMA_SLAB_BOOT;
1693 LIST_INSERT_HEAD(&uma_boot_pages, slab, us_link);
1694 }
1695 mtx_init(&uma_boot_pages_mtx, "UMA boot pages", NULL, MTX_DEF);
1696
1697#ifdef UMA_DEBUG
1698 printf("Creating uma zone headers zone and keg.\n");
1699#endif
1700 args.name = "UMA Zones";
1701 args.size = sizeof(struct uma_zone) +
1702 (sizeof(struct uma_cache) * (mp_maxid + 1));
1703 args.ctor = zone_ctor;
1704 args.dtor = zone_dtor;
1705 args.uminit = zero_init;
1706 args.fini = NULL;
1707 args.keg = NULL;
1708 args.align = 32 - 1;
1709 args.flags = UMA_ZFLAG_INTERNAL;
1710 /* The initial zone has no Per cpu queues so it's smaller */
1711 zone_ctor(zones, sizeof(struct uma_zone), &args, M_WAITOK);
1712
1713#ifdef UMA_DEBUG
1714 printf("Initializing pcpu cache locks.\n");
1715#endif
1716#ifdef UMA_DEBUG
1717 printf("Creating slab and hash zones.\n");
1718#endif
1719
1720 /* Now make a zone for slab headers */
1721 slabzone = uma_zcreate("UMA Slabs",
1722 sizeof(struct uma_slab),
1723 NULL, NULL, NULL, NULL,
1724 UMA_ALIGN_PTR, UMA_ZFLAG_INTERNAL);
1725
1726 /*
1727 * We also create a zone for the bigger slabs with reference
1728 * counts in them, to accomodate UMA_ZONE_REFCNT zones.
1729 */
1730 slabsize = sizeof(struct uma_slab_refcnt);
1731 slabsize += uma_max_ipers_ref * sizeof(uint32_t);
1732 slabrefzone = uma_zcreate("UMA RCntSlabs",
1733 slabsize,
1734 NULL, NULL, NULL, NULL,
1735 UMA_ALIGN_PTR,
1736 UMA_ZFLAG_INTERNAL);
1737
1738 hashzone = uma_zcreate("UMA Hash",
1739 sizeof(struct slabhead *) * UMA_HASH_SIZE_INIT,
1740 NULL, NULL, NULL, NULL,
1741 UMA_ALIGN_PTR, UMA_ZFLAG_INTERNAL);
1742
1743 bucket_init();
1744
1745 booted = UMA_STARTUP;
1746
1747#ifdef UMA_DEBUG
1748 printf("UMA startup complete.\n");
1749#endif
1750}
1751
1752/* see uma.h */
1753void
1754uma_startup2(void)
1755{
1756 booted = UMA_STARTUP2;
1757 bucket_enable();
1758#ifdef UMA_DEBUG
1759 printf("UMA startup2 complete.\n");
1760#endif
1761}
1762
1763/*
1764 * Initialize our callout handle
1765 *
1766 */
1767
1768static void
1769uma_startup3(void)
1770{
1771#ifdef UMA_DEBUG
1772 printf("Starting callout.\n");
1773#endif
1774 callout_init(&uma_callout, CALLOUT_MPSAFE);
1775 callout_reset(&uma_callout, UMA_TIMEOUT * hz, uma_timeout, NULL);
1776#ifdef UMA_DEBUG
1777 printf("UMA startup3 complete.\n");
1778#endif
1779}
1780
1781static uma_keg_t
1782uma_kcreate(uma_zone_t zone, size_t size, uma_init uminit, uma_fini fini,
1783 int align, uint32_t flags)
1784{
1785 struct uma_kctor_args args;
1786
1787 args.size = size;
1788 args.uminit = uminit;
1789 args.fini = fini;
1790 args.align = (align == UMA_ALIGN_CACHE) ? uma_align_cache : align;
1791 args.flags = flags;
1792 args.zone = zone;
1793 return (zone_alloc_item(kegs, &args, M_WAITOK));
1794}
1795
1796/* See uma.h */
1797void
1798uma_set_align(int align)
1799{
1800
1801 if (align != UMA_ALIGN_CACHE)
1802 uma_align_cache = align;
1803}
1804
1805/* See uma.h */
1806uma_zone_t
1807uma_zcreate(const char *name, size_t size, uma_ctor ctor, uma_dtor dtor,
1808 uma_init uminit, uma_fini fini, int align, uint32_t flags)
1809
1810{
1811 struct uma_zctor_args args;
1812
1813 /* This stuff is essential for the zone ctor */
1814 memset(&args, 0, sizeof(args));
1815 args.name = name;
1816 args.size = size;
1817 args.ctor = ctor;
1818 args.dtor = dtor;
1819 args.uminit = uminit;
1820 args.fini = fini;
1821 args.align = align;
1822 args.flags = flags;
1823 args.keg = NULL;
1824
1825 return (zone_alloc_item(zones, &args, M_WAITOK));
1826}
1827
1828/* See uma.h */
1829uma_zone_t
1830uma_zsecond_create(char *name, uma_ctor ctor, uma_dtor dtor,
1831 uma_init zinit, uma_fini zfini, uma_zone_t master)
1832{
1833 struct uma_zctor_args args;
1834 uma_keg_t keg;
1835
1836 keg = zone_first_keg(master);
1837 memset(&args, 0, sizeof(args));
1838 args.name = name;
1839 args.size = keg->uk_size;
1840 args.ctor = ctor;
1841 args.dtor = dtor;
1842 args.uminit = zinit;
1843 args.fini = zfini;
1844 args.align = keg->uk_align;
1845 args.flags = keg->uk_flags | UMA_ZONE_SECONDARY;
1846 args.keg = keg;
1847
1848 /* XXX Attaches only one keg of potentially many. */
1849 return (zone_alloc_item(zones, &args, M_WAITOK));
1850}
1851
1852/* See uma.h */
1853uma_zone_t
1852uma_zcache_create(char *name, uma_ctor ctor, uma_dtor dtor, uma_init zinit,
1853 uma_fini zfini, uma_import zimport, uma_release zrelease,
1854 void *arg, int flags)
1854uma_zcache_create(char *name, int size, uma_ctor ctor, uma_dtor dtor,
1855 uma_init zinit, uma_fini zfini, uma_import zimport,
1856 uma_release zrelease, void *arg, int flags)
1855{
1856 struct uma_zctor_args args;
1857
1858 memset(&args, 0, sizeof(args));
1859 args.name = name;
1857{
1858 struct uma_zctor_args args;
1859
1860 memset(&args, 0, sizeof(args));
1861 args.name = name;
1860 args.size = 0;
1862 args.size = size;
1861 args.ctor = ctor;
1862 args.dtor = dtor;
1863 args.uminit = zinit;
1864 args.fini = zfini;
1865 args.import = zimport;
1866 args.release = zrelease;
1867 args.arg = arg;
1868 args.align = 0;
1869 args.flags = flags;
1870
1871 return (zone_alloc_item(zones, &args, M_WAITOK));
1872}
1873
1874static void
1875zone_lock_pair(uma_zone_t a, uma_zone_t b)
1876{
1877 if (a < b) {
1878 ZONE_LOCK(a);
1863 args.ctor = ctor;
1864 args.dtor = dtor;
1865 args.uminit = zinit;
1866 args.fini = zfini;
1867 args.import = zimport;
1868 args.release = zrelease;
1869 args.arg = arg;
1870 args.align = 0;
1871 args.flags = flags;
1872
1873 return (zone_alloc_item(zones, &args, M_WAITOK));
1874}
1875
1876static void
1877zone_lock_pair(uma_zone_t a, uma_zone_t b)
1878{
1879 if (a < b) {
1880 ZONE_LOCK(a);
1879 mtx_lock_flags(b->uz_lock, MTX_DUPOK);
1881 mtx_lock_flags(b->uz_lockptr, MTX_DUPOK);
1880 } else {
1881 ZONE_LOCK(b);
1882 } else {
1883 ZONE_LOCK(b);
1882 mtx_lock_flags(a->uz_lock, MTX_DUPOK);
1884 mtx_lock_flags(a->uz_lockptr, MTX_DUPOK);
1883 }
1884}
1885
1886static void
1887zone_unlock_pair(uma_zone_t a, uma_zone_t b)
1888{
1889
1890 ZONE_UNLOCK(a);
1891 ZONE_UNLOCK(b);
1892}
1893
1894int
1895uma_zsecond_add(uma_zone_t zone, uma_zone_t master)
1896{
1897 uma_klink_t klink;
1898 uma_klink_t kl;
1899 int error;
1900
1901 error = 0;
1902 klink = malloc(sizeof(*klink), M_TEMP, M_WAITOK | M_ZERO);
1903
1904 zone_lock_pair(zone, master);
1905 /*
1906 * zone must use vtoslab() to resolve objects and must already be
1907 * a secondary.
1908 */
1909 if ((zone->uz_flags & (UMA_ZONE_VTOSLAB | UMA_ZONE_SECONDARY))
1910 != (UMA_ZONE_VTOSLAB | UMA_ZONE_SECONDARY)) {
1911 error = EINVAL;
1912 goto out;
1913 }
1914 /*
1915 * The new master must also use vtoslab().
1916 */
1917 if ((zone->uz_flags & UMA_ZONE_VTOSLAB) != UMA_ZONE_VTOSLAB) {
1918 error = EINVAL;
1919 goto out;
1920 }
1921 /*
1922 * Both must either be refcnt, or not be refcnt.
1923 */
1924 if ((zone->uz_flags & UMA_ZONE_REFCNT) !=
1925 (master->uz_flags & UMA_ZONE_REFCNT)) {
1926 error = EINVAL;
1927 goto out;
1928 }
1929 /*
1930 * The underlying object must be the same size. rsize
1931 * may be different.
1932 */
1933 if (master->uz_size != zone->uz_size) {
1934 error = E2BIG;
1935 goto out;
1936 }
1937 /*
1938 * Put it at the end of the list.
1939 */
1940 klink->kl_keg = zone_first_keg(master);
1941 LIST_FOREACH(kl, &zone->uz_kegs, kl_link) {
1942 if (LIST_NEXT(kl, kl_link) == NULL) {
1943 LIST_INSERT_AFTER(kl, klink, kl_link);
1944 break;
1945 }
1946 }
1947 klink = NULL;
1948 zone->uz_flags |= UMA_ZFLAG_MULTI;
1949 zone->uz_slab = zone_fetch_slab_multi;
1950
1951out:
1952 zone_unlock_pair(zone, master);
1953 if (klink != NULL)
1954 free(klink, M_TEMP);
1955
1956 return (error);
1957}
1958
1959
1960/* See uma.h */
1961void
1962uma_zdestroy(uma_zone_t zone)
1963{
1964
1965 zone_free_item(zones, zone, NULL, SKIP_NONE);
1966}
1967
1968/* See uma.h */
1969void *
1970uma_zalloc_arg(uma_zone_t zone, void *udata, int flags)
1971{
1972 void *item;
1973 uma_cache_t cache;
1974 uma_bucket_t bucket;
1975 int lockfail;
1976 int cpu;
1977
1978 /* This is the fast path allocation */
1979#ifdef UMA_DEBUG_ALLOC_1
1980 printf("Allocating one item from %s(%p)\n", zone->uz_name, zone);
1981#endif
1982 CTR3(KTR_UMA, "uma_zalloc_arg thread %x zone %s flags %d", curthread,
1983 zone->uz_name, flags);
1984
1985 if (flags & M_WAITOK) {
1986 WITNESS_WARN(WARN_GIANTOK | WARN_SLEEPOK, NULL,
1987 "uma_zalloc_arg: zone \"%s\"", zone->uz_name);
1988 }
1989#ifdef DEBUG_MEMGUARD
1990 if (memguard_cmp_zone(zone)) {
1991 item = memguard_alloc(zone->uz_size, flags);
1992 if (item != NULL) {
1993 /*
1994 * Avoid conflict with the use-after-free
1995 * protecting infrastructure from INVARIANTS.
1996 */
1997 if (zone->uz_init != NULL &&
1998 zone->uz_init != mtrash_init &&
1999 zone->uz_init(item, zone->uz_size, flags) != 0)
2000 return (NULL);
2001 if (zone->uz_ctor != NULL &&
2002 zone->uz_ctor != mtrash_ctor &&
2003 zone->uz_ctor(item, zone->uz_size, udata,
2004 flags) != 0) {
2005 zone->uz_fini(item, zone->uz_size);
2006 return (NULL);
2007 }
2008 return (item);
2009 }
2010 /* This is unfortunate but should not be fatal. */
2011 }
2012#endif
2013 /*
2014 * If possible, allocate from the per-CPU cache. There are two
2015 * requirements for safe access to the per-CPU cache: (1) the thread
2016 * accessing the cache must not be preempted or yield during access,
2017 * and (2) the thread must not migrate CPUs without switching which
2018 * cache it accesses. We rely on a critical section to prevent
2019 * preemption and migration. We release the critical section in
2020 * order to acquire the zone mutex if we are unable to allocate from
2021 * the current cache; when we re-acquire the critical section, we
2022 * must detect and handle migration if it has occurred.
2023 */
2024 critical_enter();
2025 cpu = curcpu;
2026 cache = &zone->uz_cpu[cpu];
2027
2028zalloc_start:
2029 bucket = cache->uc_allocbucket;
2030 if (bucket != NULL && bucket->ub_cnt > 0) {
2031 bucket->ub_cnt--;
2032 item = bucket->ub_bucket[bucket->ub_cnt];
2033#ifdef INVARIANTS
2034 bucket->ub_bucket[bucket->ub_cnt] = NULL;
2035#endif
2036 KASSERT(item != NULL, ("uma_zalloc: Bucket pointer mangled."));
2037 cache->uc_allocs++;
2038 critical_exit();
2039 if (zone->uz_ctor != NULL &&
2040 zone->uz_ctor(item, zone->uz_size, udata, flags) != 0) {
2041 atomic_add_long(&zone->uz_fails, 1);
2042 zone_free_item(zone, item, udata, SKIP_DTOR);
2043 return (NULL);
2044 }
2045#ifdef INVARIANTS
2046 uma_dbg_alloc(zone, NULL, item);
2047#endif
2048 if (flags & M_ZERO)
2049 bzero(item, zone->uz_size);
2050 return (item);
2051 }
2052
2053 /*
2054 * We have run out of items in our alloc bucket.
2055 * See if we can switch with our free bucket.
2056 */
2057 bucket = cache->uc_freebucket;
2058 if (bucket != NULL && bucket->ub_cnt > 0) {
2059#ifdef UMA_DEBUG_ALLOC
2060 printf("uma_zalloc: Swapping empty with alloc.\n");
2061#endif
2062 cache->uc_freebucket = cache->uc_allocbucket;
2063 cache->uc_allocbucket = bucket;
2064 goto zalloc_start;
2065 }
2066
2067 /*
2068 * Discard any empty allocation bucket while we hold no locks.
2069 */
2070 bucket = cache->uc_allocbucket;
2071 cache->uc_allocbucket = NULL;
2072 critical_exit();
2073 if (bucket != NULL)
1885 }
1886}
1887
1888static void
1889zone_unlock_pair(uma_zone_t a, uma_zone_t b)
1890{
1891
1892 ZONE_UNLOCK(a);
1893 ZONE_UNLOCK(b);
1894}
1895
1896int
1897uma_zsecond_add(uma_zone_t zone, uma_zone_t master)
1898{
1899 uma_klink_t klink;
1900 uma_klink_t kl;
1901 int error;
1902
1903 error = 0;
1904 klink = malloc(sizeof(*klink), M_TEMP, M_WAITOK | M_ZERO);
1905
1906 zone_lock_pair(zone, master);
1907 /*
1908 * zone must use vtoslab() to resolve objects and must already be
1909 * a secondary.
1910 */
1911 if ((zone->uz_flags & (UMA_ZONE_VTOSLAB | UMA_ZONE_SECONDARY))
1912 != (UMA_ZONE_VTOSLAB | UMA_ZONE_SECONDARY)) {
1913 error = EINVAL;
1914 goto out;
1915 }
1916 /*
1917 * The new master must also use vtoslab().
1918 */
1919 if ((zone->uz_flags & UMA_ZONE_VTOSLAB) != UMA_ZONE_VTOSLAB) {
1920 error = EINVAL;
1921 goto out;
1922 }
1923 /*
1924 * Both must either be refcnt, or not be refcnt.
1925 */
1926 if ((zone->uz_flags & UMA_ZONE_REFCNT) !=
1927 (master->uz_flags & UMA_ZONE_REFCNT)) {
1928 error = EINVAL;
1929 goto out;
1930 }
1931 /*
1932 * The underlying object must be the same size. rsize
1933 * may be different.
1934 */
1935 if (master->uz_size != zone->uz_size) {
1936 error = E2BIG;
1937 goto out;
1938 }
1939 /*
1940 * Put it at the end of the list.
1941 */
1942 klink->kl_keg = zone_first_keg(master);
1943 LIST_FOREACH(kl, &zone->uz_kegs, kl_link) {
1944 if (LIST_NEXT(kl, kl_link) == NULL) {
1945 LIST_INSERT_AFTER(kl, klink, kl_link);
1946 break;
1947 }
1948 }
1949 klink = NULL;
1950 zone->uz_flags |= UMA_ZFLAG_MULTI;
1951 zone->uz_slab = zone_fetch_slab_multi;
1952
1953out:
1954 zone_unlock_pair(zone, master);
1955 if (klink != NULL)
1956 free(klink, M_TEMP);
1957
1958 return (error);
1959}
1960
1961
1962/* See uma.h */
1963void
1964uma_zdestroy(uma_zone_t zone)
1965{
1966
1967 zone_free_item(zones, zone, NULL, SKIP_NONE);
1968}
1969
1970/* See uma.h */
1971void *
1972uma_zalloc_arg(uma_zone_t zone, void *udata, int flags)
1973{
1974 void *item;
1975 uma_cache_t cache;
1976 uma_bucket_t bucket;
1977 int lockfail;
1978 int cpu;
1979
1980 /* This is the fast path allocation */
1981#ifdef UMA_DEBUG_ALLOC_1
1982 printf("Allocating one item from %s(%p)\n", zone->uz_name, zone);
1983#endif
1984 CTR3(KTR_UMA, "uma_zalloc_arg thread %x zone %s flags %d", curthread,
1985 zone->uz_name, flags);
1986
1987 if (flags & M_WAITOK) {
1988 WITNESS_WARN(WARN_GIANTOK | WARN_SLEEPOK, NULL,
1989 "uma_zalloc_arg: zone \"%s\"", zone->uz_name);
1990 }
1991#ifdef DEBUG_MEMGUARD
1992 if (memguard_cmp_zone(zone)) {
1993 item = memguard_alloc(zone->uz_size, flags);
1994 if (item != NULL) {
1995 /*
1996 * Avoid conflict with the use-after-free
1997 * protecting infrastructure from INVARIANTS.
1998 */
1999 if (zone->uz_init != NULL &&
2000 zone->uz_init != mtrash_init &&
2001 zone->uz_init(item, zone->uz_size, flags) != 0)
2002 return (NULL);
2003 if (zone->uz_ctor != NULL &&
2004 zone->uz_ctor != mtrash_ctor &&
2005 zone->uz_ctor(item, zone->uz_size, udata,
2006 flags) != 0) {
2007 zone->uz_fini(item, zone->uz_size);
2008 return (NULL);
2009 }
2010 return (item);
2011 }
2012 /* This is unfortunate but should not be fatal. */
2013 }
2014#endif
2015 /*
2016 * If possible, allocate from the per-CPU cache. There are two
2017 * requirements for safe access to the per-CPU cache: (1) the thread
2018 * accessing the cache must not be preempted or yield during access,
2019 * and (2) the thread must not migrate CPUs without switching which
2020 * cache it accesses. We rely on a critical section to prevent
2021 * preemption and migration. We release the critical section in
2022 * order to acquire the zone mutex if we are unable to allocate from
2023 * the current cache; when we re-acquire the critical section, we
2024 * must detect and handle migration if it has occurred.
2025 */
2026 critical_enter();
2027 cpu = curcpu;
2028 cache = &zone->uz_cpu[cpu];
2029
2030zalloc_start:
2031 bucket = cache->uc_allocbucket;
2032 if (bucket != NULL && bucket->ub_cnt > 0) {
2033 bucket->ub_cnt--;
2034 item = bucket->ub_bucket[bucket->ub_cnt];
2035#ifdef INVARIANTS
2036 bucket->ub_bucket[bucket->ub_cnt] = NULL;
2037#endif
2038 KASSERT(item != NULL, ("uma_zalloc: Bucket pointer mangled."));
2039 cache->uc_allocs++;
2040 critical_exit();
2041 if (zone->uz_ctor != NULL &&
2042 zone->uz_ctor(item, zone->uz_size, udata, flags) != 0) {
2043 atomic_add_long(&zone->uz_fails, 1);
2044 zone_free_item(zone, item, udata, SKIP_DTOR);
2045 return (NULL);
2046 }
2047#ifdef INVARIANTS
2048 uma_dbg_alloc(zone, NULL, item);
2049#endif
2050 if (flags & M_ZERO)
2051 bzero(item, zone->uz_size);
2052 return (item);
2053 }
2054
2055 /*
2056 * We have run out of items in our alloc bucket.
2057 * See if we can switch with our free bucket.
2058 */
2059 bucket = cache->uc_freebucket;
2060 if (bucket != NULL && bucket->ub_cnt > 0) {
2061#ifdef UMA_DEBUG_ALLOC
2062 printf("uma_zalloc: Swapping empty with alloc.\n");
2063#endif
2064 cache->uc_freebucket = cache->uc_allocbucket;
2065 cache->uc_allocbucket = bucket;
2066 goto zalloc_start;
2067 }
2068
2069 /*
2070 * Discard any empty allocation bucket while we hold no locks.
2071 */
2072 bucket = cache->uc_allocbucket;
2073 cache->uc_allocbucket = NULL;
2074 critical_exit();
2075 if (bucket != NULL)
2074 bucket_free(bucket);
2076 bucket_free(zone, bucket);
2075
2076 /* Short-circuit for zones without buckets and low memory. */
2077 if (zone->uz_count == 0 || bucketdisable)
2078 goto zalloc_item;
2079
2080 /*
2081 * Attempt to retrieve the item from the per-CPU cache has failed, so
2082 * we must go back to the zone. This requires the zone lock, so we
2083 * must drop the critical section, then re-acquire it when we go back
2084 * to the cache. Since the critical section is released, we may be
2085 * preempted or migrate. As such, make sure not to maintain any
2086 * thread-local state specific to the cache from prior to releasing
2087 * the critical section.
2088 */
2089 lockfail = 0;
2090 if (ZONE_TRYLOCK(zone) == 0) {
2091 /* Record contention to size the buckets. */
2092 ZONE_LOCK(zone);
2093 lockfail = 1;
2094 }
2095 critical_enter();
2096 cpu = curcpu;
2097 cache = &zone->uz_cpu[cpu];
2098
2099 /*
2100 * Since we have locked the zone we may as well send back our stats.
2101 */
2102 atomic_add_long(&zone->uz_allocs, cache->uc_allocs);
2103 atomic_add_long(&zone->uz_frees, cache->uc_frees);
2104 cache->uc_allocs = 0;
2105 cache->uc_frees = 0;
2106
2107 /* See if we lost the race to fill the cache. */
2108 if (cache->uc_allocbucket != NULL) {
2109 ZONE_UNLOCK(zone);
2110 goto zalloc_start;
2111 }
2112
2113 /*
2114 * Check the zone's cache of buckets.
2115 */
2116 if ((bucket = LIST_FIRST(&zone->uz_buckets)) != NULL) {
2117 KASSERT(bucket->ub_cnt != 0,
2118 ("uma_zalloc_arg: Returning an empty bucket."));
2119
2120 LIST_REMOVE(bucket, ub_link);
2121 cache->uc_allocbucket = bucket;
2122 ZONE_UNLOCK(zone);
2123 goto zalloc_start;
2124 }
2125 /* We are no longer associated with this CPU. */
2126 critical_exit();
2127
2128 /*
2129 * We bump the uz count when the cache size is insufficient to
2130 * handle the working set.
2131 */
2132 if (lockfail && zone->uz_count < BUCKET_MAX && zone->uz_count != 0 &&
2133 zone != largebucket)
2134 zone->uz_count++;
2135 ZONE_UNLOCK(zone);
2136
2137 /*
2138 * Now lets just fill a bucket and put it on the free list. If that
2139 * works we'll restart the allocation from the begining and it
2140 * will use the just filled bucket.
2141 */
2142 bucket = zone_alloc_bucket(zone, flags);
2143 if (bucket != NULL) {
2144 ZONE_LOCK(zone);
2145 critical_enter();
2146 cpu = curcpu;
2147 cache = &zone->uz_cpu[cpu];
2148 /*
2149 * See if we lost the race or were migrated. Cache the
2150 * initialized bucket to make this less likely or claim
2151 * the memory directly.
2152 */
2153 if (cache->uc_allocbucket == NULL)
2154 cache->uc_allocbucket = bucket;
2155 else
2156 LIST_INSERT_HEAD(&zone->uz_buckets, bucket, ub_link);
2157 ZONE_UNLOCK(zone);
2158 goto zalloc_start;
2159 }
2160
2161 /*
2162 * We may not be able to get a bucket so return an actual item.
2163 */
2164#ifdef UMA_DEBUG
2165 printf("uma_zalloc_arg: Bucketzone returned NULL\n");
2166#endif
2167
2168zalloc_item:
2169 item = zone_alloc_item(zone, udata, flags);
2170
2171 return (item);
2172}
2173
2174static uma_slab_t
2175keg_fetch_slab(uma_keg_t keg, uma_zone_t zone, int flags)
2176{
2177 uma_slab_t slab;
2178
2179 mtx_assert(&keg->uk_lock, MA_OWNED);
2180 slab = NULL;
2181
2182 for (;;) {
2183 /*
2184 * Find a slab with some space. Prefer slabs that are partially
2185 * used over those that are totally full. This helps to reduce
2186 * fragmentation.
2187 */
2188 if (keg->uk_free != 0) {
2189 if (!LIST_EMPTY(&keg->uk_part_slab)) {
2190 slab = LIST_FIRST(&keg->uk_part_slab);
2191 } else {
2192 slab = LIST_FIRST(&keg->uk_free_slab);
2193 LIST_REMOVE(slab, us_link);
2194 LIST_INSERT_HEAD(&keg->uk_part_slab, slab,
2195 us_link);
2196 }
2197 MPASS(slab->us_keg == keg);
2198 return (slab);
2199 }
2200
2201 /*
2202 * M_NOVM means don't ask at all!
2203 */
2204 if (flags & M_NOVM)
2205 break;
2206
2207 if (keg->uk_maxpages && keg->uk_pages >= keg->uk_maxpages) {
2208 keg->uk_flags |= UMA_ZFLAG_FULL;
2209 /*
2210 * If this is not a multi-zone, set the FULL bit.
2211 * Otherwise slab_multi() takes care of it.
2212 */
2213 if ((zone->uz_flags & UMA_ZFLAG_MULTI) == 0) {
2214 zone->uz_flags |= UMA_ZFLAG_FULL;
2215 zone_log_warning(zone);
2216 }
2217 if (flags & M_NOWAIT)
2218 break;
2219 zone->uz_sleeps++;
2220 msleep(keg, &keg->uk_lock, PVM, "keglimit", 0);
2221 continue;
2222 }
2223 slab = keg_alloc_slab(keg, zone, flags);
2224 /*
2225 * If we got a slab here it's safe to mark it partially used
2226 * and return. We assume that the caller is going to remove
2227 * at least one item.
2228 */
2229 if (slab) {
2230 MPASS(slab->us_keg == keg);
2231 LIST_INSERT_HEAD(&keg->uk_part_slab, slab, us_link);
2232 return (slab);
2233 }
2234 /*
2235 * We might not have been able to get a slab but another cpu
2236 * could have while we were unlocked. Check again before we
2237 * fail.
2238 */
2239 flags |= M_NOVM;
2240 }
2241 return (slab);
2242}
2243
2077
2078 /* Short-circuit for zones without buckets and low memory. */
2079 if (zone->uz_count == 0 || bucketdisable)
2080 goto zalloc_item;
2081
2082 /*
2083 * Attempt to retrieve the item from the per-CPU cache has failed, so
2084 * we must go back to the zone. This requires the zone lock, so we
2085 * must drop the critical section, then re-acquire it when we go back
2086 * to the cache. Since the critical section is released, we may be
2087 * preempted or migrate. As such, make sure not to maintain any
2088 * thread-local state specific to the cache from prior to releasing
2089 * the critical section.
2090 */
2091 lockfail = 0;
2092 if (ZONE_TRYLOCK(zone) == 0) {
2093 /* Record contention to size the buckets. */
2094 ZONE_LOCK(zone);
2095 lockfail = 1;
2096 }
2097 critical_enter();
2098 cpu = curcpu;
2099 cache = &zone->uz_cpu[cpu];
2100
2101 /*
2102 * Since we have locked the zone we may as well send back our stats.
2103 */
2104 atomic_add_long(&zone->uz_allocs, cache->uc_allocs);
2105 atomic_add_long(&zone->uz_frees, cache->uc_frees);
2106 cache->uc_allocs = 0;
2107 cache->uc_frees = 0;
2108
2109 /* See if we lost the race to fill the cache. */
2110 if (cache->uc_allocbucket != NULL) {
2111 ZONE_UNLOCK(zone);
2112 goto zalloc_start;
2113 }
2114
2115 /*
2116 * Check the zone's cache of buckets.
2117 */
2118 if ((bucket = LIST_FIRST(&zone->uz_buckets)) != NULL) {
2119 KASSERT(bucket->ub_cnt != 0,
2120 ("uma_zalloc_arg: Returning an empty bucket."));
2121
2122 LIST_REMOVE(bucket, ub_link);
2123 cache->uc_allocbucket = bucket;
2124 ZONE_UNLOCK(zone);
2125 goto zalloc_start;
2126 }
2127 /* We are no longer associated with this CPU. */
2128 critical_exit();
2129
2130 /*
2131 * We bump the uz count when the cache size is insufficient to
2132 * handle the working set.
2133 */
2134 if (lockfail && zone->uz_count < BUCKET_MAX && zone->uz_count != 0 &&
2135 zone != largebucket)
2136 zone->uz_count++;
2137 ZONE_UNLOCK(zone);
2138
2139 /*
2140 * Now lets just fill a bucket and put it on the free list. If that
2141 * works we'll restart the allocation from the begining and it
2142 * will use the just filled bucket.
2143 */
2144 bucket = zone_alloc_bucket(zone, flags);
2145 if (bucket != NULL) {
2146 ZONE_LOCK(zone);
2147 critical_enter();
2148 cpu = curcpu;
2149 cache = &zone->uz_cpu[cpu];
2150 /*
2151 * See if we lost the race or were migrated. Cache the
2152 * initialized bucket to make this less likely or claim
2153 * the memory directly.
2154 */
2155 if (cache->uc_allocbucket == NULL)
2156 cache->uc_allocbucket = bucket;
2157 else
2158 LIST_INSERT_HEAD(&zone->uz_buckets, bucket, ub_link);
2159 ZONE_UNLOCK(zone);
2160 goto zalloc_start;
2161 }
2162
2163 /*
2164 * We may not be able to get a bucket so return an actual item.
2165 */
2166#ifdef UMA_DEBUG
2167 printf("uma_zalloc_arg: Bucketzone returned NULL\n");
2168#endif
2169
2170zalloc_item:
2171 item = zone_alloc_item(zone, udata, flags);
2172
2173 return (item);
2174}
2175
2176static uma_slab_t
2177keg_fetch_slab(uma_keg_t keg, uma_zone_t zone, int flags)
2178{
2179 uma_slab_t slab;
2180
2181 mtx_assert(&keg->uk_lock, MA_OWNED);
2182 slab = NULL;
2183
2184 for (;;) {
2185 /*
2186 * Find a slab with some space. Prefer slabs that are partially
2187 * used over those that are totally full. This helps to reduce
2188 * fragmentation.
2189 */
2190 if (keg->uk_free != 0) {
2191 if (!LIST_EMPTY(&keg->uk_part_slab)) {
2192 slab = LIST_FIRST(&keg->uk_part_slab);
2193 } else {
2194 slab = LIST_FIRST(&keg->uk_free_slab);
2195 LIST_REMOVE(slab, us_link);
2196 LIST_INSERT_HEAD(&keg->uk_part_slab, slab,
2197 us_link);
2198 }
2199 MPASS(slab->us_keg == keg);
2200 return (slab);
2201 }
2202
2203 /*
2204 * M_NOVM means don't ask at all!
2205 */
2206 if (flags & M_NOVM)
2207 break;
2208
2209 if (keg->uk_maxpages && keg->uk_pages >= keg->uk_maxpages) {
2210 keg->uk_flags |= UMA_ZFLAG_FULL;
2211 /*
2212 * If this is not a multi-zone, set the FULL bit.
2213 * Otherwise slab_multi() takes care of it.
2214 */
2215 if ((zone->uz_flags & UMA_ZFLAG_MULTI) == 0) {
2216 zone->uz_flags |= UMA_ZFLAG_FULL;
2217 zone_log_warning(zone);
2218 }
2219 if (flags & M_NOWAIT)
2220 break;
2221 zone->uz_sleeps++;
2222 msleep(keg, &keg->uk_lock, PVM, "keglimit", 0);
2223 continue;
2224 }
2225 slab = keg_alloc_slab(keg, zone, flags);
2226 /*
2227 * If we got a slab here it's safe to mark it partially used
2228 * and return. We assume that the caller is going to remove
2229 * at least one item.
2230 */
2231 if (slab) {
2232 MPASS(slab->us_keg == keg);
2233 LIST_INSERT_HEAD(&keg->uk_part_slab, slab, us_link);
2234 return (slab);
2235 }
2236 /*
2237 * We might not have been able to get a slab but another cpu
2238 * could have while we were unlocked. Check again before we
2239 * fail.
2240 */
2241 flags |= M_NOVM;
2242 }
2243 return (slab);
2244}
2245
2244static inline void
2245zone_relock(uma_zone_t zone, uma_keg_t keg)
2246{
2247 if (zone->uz_lock != &keg->uk_lock) {
2248 KEG_UNLOCK(keg);
2249 ZONE_LOCK(zone);
2250 }
2251}
2252
2253static inline void
2254keg_relock(uma_keg_t keg, uma_zone_t zone)
2255{
2256 if (zone->uz_lock != &keg->uk_lock) {
2257 ZONE_UNLOCK(zone);
2258 KEG_LOCK(keg);
2259 }
2260}
2261
2262static uma_slab_t
2263zone_fetch_slab(uma_zone_t zone, uma_keg_t keg, int flags)
2264{
2265 uma_slab_t slab;
2266
2246static uma_slab_t
2247zone_fetch_slab(uma_zone_t zone, uma_keg_t keg, int flags)
2248{
2249 uma_slab_t slab;
2250
2267 if (keg == NULL)
2251 if (keg == NULL) {
2268 keg = zone_first_keg(zone);
2252 keg = zone_first_keg(zone);
2253 KEG_LOCK(keg);
2254 }
2269
2270 for (;;) {
2271 slab = keg_fetch_slab(keg, zone, flags);
2272 if (slab)
2273 return (slab);
2274 if (flags & (M_NOWAIT | M_NOVM))
2275 break;
2276 }
2255
2256 for (;;) {
2257 slab = keg_fetch_slab(keg, zone, flags);
2258 if (slab)
2259 return (slab);
2260 if (flags & (M_NOWAIT | M_NOVM))
2261 break;
2262 }
2263 KEG_UNLOCK(keg);
2277 return (NULL);
2278}
2279
2280/*
2281 * uma_zone_fetch_slab_multi: Fetches a slab from one available keg. Returns
2264 return (NULL);
2265}
2266
2267/*
2268 * uma_zone_fetch_slab_multi: Fetches a slab from one available keg. Returns
2282 * with the keg locked. Caller must call zone_relock() afterwards if the
2283 * zone lock is required. On NULL the zone lock is held.
2269 * with the keg locked. On NULL no lock is held.
2284 *
2285 * The last pointer is used to seed the search. It is not required.
2286 */
2287static uma_slab_t
2288zone_fetch_slab_multi(uma_zone_t zone, uma_keg_t last, int rflags)
2289{
2290 uma_klink_t klink;
2291 uma_slab_t slab;
2292 uma_keg_t keg;
2293 int flags;
2294 int empty;
2295 int full;
2296
2297 /*
2298 * Don't wait on the first pass. This will skip limit tests
2299 * as well. We don't want to block if we can find a provider
2300 * without blocking.
2301 */
2302 flags = (rflags & ~M_WAITOK) | M_NOWAIT;
2303 /*
2304 * Use the last slab allocated as a hint for where to start
2305 * the search.
2306 */
2270 *
2271 * The last pointer is used to seed the search. It is not required.
2272 */
2273static uma_slab_t
2274zone_fetch_slab_multi(uma_zone_t zone, uma_keg_t last, int rflags)
2275{
2276 uma_klink_t klink;
2277 uma_slab_t slab;
2278 uma_keg_t keg;
2279 int flags;
2280 int empty;
2281 int full;
2282
2283 /*
2284 * Don't wait on the first pass. This will skip limit tests
2285 * as well. We don't want to block if we can find a provider
2286 * without blocking.
2287 */
2288 flags = (rflags & ~M_WAITOK) | M_NOWAIT;
2289 /*
2290 * Use the last slab allocated as a hint for where to start
2291 * the search.
2292 */
2307 if (last) {
2293 if (last != NULL) {
2308 slab = keg_fetch_slab(last, zone, flags);
2309 if (slab)
2310 return (slab);
2294 slab = keg_fetch_slab(last, zone, flags);
2295 if (slab)
2296 return (slab);
2311 zone_relock(zone, last);
2312 last = NULL;
2297 KEG_UNLOCK(last);
2313 }
2314 /*
2315 * Loop until we have a slab incase of transient failures
2316 * while M_WAITOK is specified. I'm not sure this is 100%
2317 * required but we've done it for so long now.
2318 */
2319 for (;;) {
2320 empty = 0;
2321 full = 0;
2322 /*
2323 * Search the available kegs for slabs. Be careful to hold the
2324 * correct lock while calling into the keg layer.
2325 */
2326 LIST_FOREACH(klink, &zone->uz_kegs, kl_link) {
2327 keg = klink->kl_keg;
2298 }
2299 /*
2300 * Loop until we have a slab incase of transient failures
2301 * while M_WAITOK is specified. I'm not sure this is 100%
2302 * required but we've done it for so long now.
2303 */
2304 for (;;) {
2305 empty = 0;
2306 full = 0;
2307 /*
2308 * Search the available kegs for slabs. Be careful to hold the
2309 * correct lock while calling into the keg layer.
2310 */
2311 LIST_FOREACH(klink, &zone->uz_kegs, kl_link) {
2312 keg = klink->kl_keg;
2328 keg_relock(keg, zone);
2313 KEG_LOCK(keg);
2329 if ((keg->uk_flags & UMA_ZFLAG_FULL) == 0) {
2330 slab = keg_fetch_slab(keg, zone, flags);
2331 if (slab)
2332 return (slab);
2333 }
2334 if (keg->uk_flags & UMA_ZFLAG_FULL)
2335 full++;
2336 else
2337 empty++;
2314 if ((keg->uk_flags & UMA_ZFLAG_FULL) == 0) {
2315 slab = keg_fetch_slab(keg, zone, flags);
2316 if (slab)
2317 return (slab);
2318 }
2319 if (keg->uk_flags & UMA_ZFLAG_FULL)
2320 full++;
2321 else
2322 empty++;
2338 zone_relock(zone, keg);
2323 KEG_UNLOCK(keg);
2339 }
2340 if (rflags & (M_NOWAIT | M_NOVM))
2341 break;
2342 flags = rflags;
2343 /*
2344 * All kegs are full. XXX We can't atomically check all kegs
2345 * and sleep so just sleep for a short period and retry.
2346 */
2347 if (full && !empty) {
2324 }
2325 if (rflags & (M_NOWAIT | M_NOVM))
2326 break;
2327 flags = rflags;
2328 /*
2329 * All kegs are full. XXX We can't atomically check all kegs
2330 * and sleep so just sleep for a short period and retry.
2331 */
2332 if (full && !empty) {
2333 ZONE_LOCK(zone);
2348 zone->uz_flags |= UMA_ZFLAG_FULL;
2349 zone->uz_sleeps++;
2350 zone_log_warning(zone);
2334 zone->uz_flags |= UMA_ZFLAG_FULL;
2335 zone->uz_sleeps++;
2336 zone_log_warning(zone);
2351 msleep(zone, zone->uz_lock, PVM, "zonelimit", hz/100);
2337 msleep(zone, zone->uz_lockptr, PVM,
2338 "zonelimit", hz/100);
2352 zone->uz_flags &= ~UMA_ZFLAG_FULL;
2339 zone->uz_flags &= ~UMA_ZFLAG_FULL;
2340 ZONE_UNLOCK(zone);
2353 continue;
2354 }
2355 }
2356 return (NULL);
2357}
2358
2359static void *
2360slab_alloc_item(uma_keg_t keg, uma_slab_t slab)
2361{
2362 void *item;
2363 uint8_t freei;
2364
2365 MPASS(keg == slab->us_keg);
2366 mtx_assert(&keg->uk_lock, MA_OWNED);
2367
2368 freei = BIT_FFS(SLAB_SETSIZE, &slab->us_free) - 1;
2369 BIT_CLR(SLAB_SETSIZE, freei, &slab->us_free);
2370 item = slab->us_data + (keg->uk_rsize * freei);
2371 slab->us_freecount--;
2372 keg->uk_free--;
2373
2374 /* Move this slab to the full list */
2375 if (slab->us_freecount == 0) {
2376 LIST_REMOVE(slab, us_link);
2377 LIST_INSERT_HEAD(&keg->uk_full_slab, slab, us_link);
2378 }
2379
2380 return (item);
2381}
2382
2383static int
2384zone_import(uma_zone_t zone, void **bucket, int max, int flags)
2385{
2386 uma_slab_t slab;
2387 uma_keg_t keg;
2388 int i;
2389
2341 continue;
2342 }
2343 }
2344 return (NULL);
2345}
2346
2347static void *
2348slab_alloc_item(uma_keg_t keg, uma_slab_t slab)
2349{
2350 void *item;
2351 uint8_t freei;
2352
2353 MPASS(keg == slab->us_keg);
2354 mtx_assert(&keg->uk_lock, MA_OWNED);
2355
2356 freei = BIT_FFS(SLAB_SETSIZE, &slab->us_free) - 1;
2357 BIT_CLR(SLAB_SETSIZE, freei, &slab->us_free);
2358 item = slab->us_data + (keg->uk_rsize * freei);
2359 slab->us_freecount--;
2360 keg->uk_free--;
2361
2362 /* Move this slab to the full list */
2363 if (slab->us_freecount == 0) {
2364 LIST_REMOVE(slab, us_link);
2365 LIST_INSERT_HEAD(&keg->uk_full_slab, slab, us_link);
2366 }
2367
2368 return (item);
2369}
2370
2371static int
2372zone_import(uma_zone_t zone, void **bucket, int max, int flags)
2373{
2374 uma_slab_t slab;
2375 uma_keg_t keg;
2376 int i;
2377
2390 ZONE_LOCK(zone);
2391 /* Try to keep the buckets totally full */
2392 slab = NULL;
2393 keg = NULL;
2378 slab = NULL;
2379 keg = NULL;
2380 /* Try to keep the buckets totally full */
2394 for (i = 0; i < max; ) {
2395 if ((slab = zone->uz_slab(zone, keg, flags)) == NULL)
2396 break;
2397 keg = slab->us_keg;
2381 for (i = 0; i < max; ) {
2382 if ((slab = zone->uz_slab(zone, keg, flags)) == NULL)
2383 break;
2384 keg = slab->us_keg;
2398 while (slab->us_freecount && i < max)
2385 while (slab->us_freecount && i < max)
2399 bucket[i++] = slab_alloc_item(keg, slab);
2400
2401 /* Don't block on the next fill */
2402 flags &= ~M_WAITOK;
2403 flags |= M_NOWAIT;
2404 }
2405 if (slab != NULL)
2406 KEG_UNLOCK(keg);
2386 bucket[i++] = slab_alloc_item(keg, slab);
2387
2388 /* Don't block on the next fill */
2389 flags &= ~M_WAITOK;
2390 flags |= M_NOWAIT;
2391 }
2392 if (slab != NULL)
2393 KEG_UNLOCK(keg);
2407 else
2408 ZONE_UNLOCK(zone);
2409
2410 return i;
2411}
2412
2413static uma_bucket_t
2414zone_alloc_bucket(uma_zone_t zone, int flags)
2415{
2416 uma_bucket_t bucket;
2394
2395 return i;
2396}
2397
2398static uma_bucket_t
2399zone_alloc_bucket(uma_zone_t zone, int flags)
2400{
2401 uma_bucket_t bucket;
2417 int bflags;
2418 int max;
2419
2402 int max;
2403
2420 max = zone->uz_count;
2421 bflags = (flags & ~M_WAITOK) | M_NOWAIT;
2422 if (zone->uz_flags & UMA_ZFLAG_CACHEONLY)
2423 bflags |= M_NOVM;
2424 bucket = bucket_alloc(zone->uz_count, bflags);
2404 bucket = bucket_alloc(zone, M_NOWAIT | (flags & M_NOVM));
2425 if (bucket == NULL)
2426 goto out;
2427
2405 if (bucket == NULL)
2406 goto out;
2407
2428 max = MIN(bucket->ub_entries, max);
2408 max = MIN(bucket->ub_entries, zone->uz_count);
2429 bucket->ub_cnt = zone->uz_import(zone->uz_arg, bucket->ub_bucket,
2430 max, flags);
2431
2432 /*
2433 * Initialize the memory if necessary.
2434 */
2435 if (bucket->ub_cnt != 0 && zone->uz_init != NULL) {
2436 int i;
2437
2438 for (i = 0; i < bucket->ub_cnt; i++)
2439 if (zone->uz_init(bucket->ub_bucket[i], zone->uz_size,
2440 flags) != 0)
2441 break;
2442 /*
2443 * If we couldn't initialize the whole bucket, put the
2444 * rest back onto the freelist.
2445 */
2446 if (i != bucket->ub_cnt) {
2409 bucket->ub_cnt = zone->uz_import(zone->uz_arg, bucket->ub_bucket,
2410 max, flags);
2411
2412 /*
2413 * Initialize the memory if necessary.
2414 */
2415 if (bucket->ub_cnt != 0 && zone->uz_init != NULL) {
2416 int i;
2417
2418 for (i = 0; i < bucket->ub_cnt; i++)
2419 if (zone->uz_init(bucket->ub_bucket[i], zone->uz_size,
2420 flags) != 0)
2421 break;
2422 /*
2423 * If we couldn't initialize the whole bucket, put the
2424 * rest back onto the freelist.
2425 */
2426 if (i != bucket->ub_cnt) {
2447 zone->uz_release(zone->uz_arg, bucket->ub_bucket[i],
2427 zone->uz_release(zone->uz_arg, &bucket->ub_bucket[i],
2448 bucket->ub_cnt - i);
2449#ifdef INVARIANTS
2450 bzero(&bucket->ub_bucket[i],
2451 sizeof(void *) * (bucket->ub_cnt - i));
2452#endif
2453 bucket->ub_cnt = i;
2454 }
2455 }
2456
2457out:
2458 if (bucket == NULL || bucket->ub_cnt == 0) {
2459 if (bucket != NULL)
2428 bucket->ub_cnt - i);
2429#ifdef INVARIANTS
2430 bzero(&bucket->ub_bucket[i],
2431 sizeof(void *) * (bucket->ub_cnt - i));
2432#endif
2433 bucket->ub_cnt = i;
2434 }
2435 }
2436
2437out:
2438 if (bucket == NULL || bucket->ub_cnt == 0) {
2439 if (bucket != NULL)
2460 bucket_free(bucket);
2440 bucket_free(zone, bucket);
2461 atomic_add_long(&zone->uz_fails, 1);
2462 return (NULL);
2463 }
2464
2465 return (bucket);
2466}
2467
2468/*
2469 * Allocates a single item from a zone.
2470 *
2471 * Arguments
2472 * zone The zone to alloc for.
2473 * udata The data to be passed to the constructor.
2474 * flags M_WAITOK, M_NOWAIT, M_ZERO.
2475 *
2476 * Returns
2477 * NULL if there is no memory and M_NOWAIT is set
2478 * An item if successful
2479 */
2480
2481static void *
2482zone_alloc_item(uma_zone_t zone, void *udata, int flags)
2483{
2484 void *item;
2485
2486 item = NULL;
2487
2488#ifdef UMA_DEBUG_ALLOC
2489 printf("INTERNAL: Allocating one item from %s(%p)\n", zone->uz_name, zone);
2490#endif
2491 if (zone->uz_import(zone->uz_arg, &item, 1, flags) != 1)
2492 goto fail;
2493 atomic_add_long(&zone->uz_allocs, 1);
2494
2495 /*
2496 * We have to call both the zone's init (not the keg's init)
2497 * and the zone's ctor. This is because the item is going from
2498 * a keg slab directly to the user, and the user is expecting it
2499 * to be both zone-init'd as well as zone-ctor'd.
2500 */
2501 if (zone->uz_init != NULL) {
2502 if (zone->uz_init(item, zone->uz_size, flags) != 0) {
2503 zone_free_item(zone, item, udata, SKIP_FINI);
2504 goto fail;
2505 }
2506 }
2507 if (zone->uz_ctor != NULL) {
2508 if (zone->uz_ctor(item, zone->uz_size, udata, flags) != 0) {
2509 zone_free_item(zone, item, udata, SKIP_DTOR);
2510 goto fail;
2511 }
2512 }
2513#ifdef INVARIANTS
2514 uma_dbg_alloc(zone, NULL, item);
2515#endif
2516 if (flags & M_ZERO)
2517 bzero(item, zone->uz_size);
2518
2519 return (item);
2520
2521fail:
2522 atomic_add_long(&zone->uz_fails, 1);
2523 return (NULL);
2524}
2525
2526/* See uma.h */
2527void
2528uma_zfree_arg(uma_zone_t zone, void *item, void *udata)
2529{
2530 uma_cache_t cache;
2531 uma_bucket_t bucket;
2441 atomic_add_long(&zone->uz_fails, 1);
2442 return (NULL);
2443 }
2444
2445 return (bucket);
2446}
2447
2448/*
2449 * Allocates a single item from a zone.
2450 *
2451 * Arguments
2452 * zone The zone to alloc for.
2453 * udata The data to be passed to the constructor.
2454 * flags M_WAITOK, M_NOWAIT, M_ZERO.
2455 *
2456 * Returns
2457 * NULL if there is no memory and M_NOWAIT is set
2458 * An item if successful
2459 */
2460
2461static void *
2462zone_alloc_item(uma_zone_t zone, void *udata, int flags)
2463{
2464 void *item;
2465
2466 item = NULL;
2467
2468#ifdef UMA_DEBUG_ALLOC
2469 printf("INTERNAL: Allocating one item from %s(%p)\n", zone->uz_name, zone);
2470#endif
2471 if (zone->uz_import(zone->uz_arg, &item, 1, flags) != 1)
2472 goto fail;
2473 atomic_add_long(&zone->uz_allocs, 1);
2474
2475 /*
2476 * We have to call both the zone's init (not the keg's init)
2477 * and the zone's ctor. This is because the item is going from
2478 * a keg slab directly to the user, and the user is expecting it
2479 * to be both zone-init'd as well as zone-ctor'd.
2480 */
2481 if (zone->uz_init != NULL) {
2482 if (zone->uz_init(item, zone->uz_size, flags) != 0) {
2483 zone_free_item(zone, item, udata, SKIP_FINI);
2484 goto fail;
2485 }
2486 }
2487 if (zone->uz_ctor != NULL) {
2488 if (zone->uz_ctor(item, zone->uz_size, udata, flags) != 0) {
2489 zone_free_item(zone, item, udata, SKIP_DTOR);
2490 goto fail;
2491 }
2492 }
2493#ifdef INVARIANTS
2494 uma_dbg_alloc(zone, NULL, item);
2495#endif
2496 if (flags & M_ZERO)
2497 bzero(item, zone->uz_size);
2498
2499 return (item);
2500
2501fail:
2502 atomic_add_long(&zone->uz_fails, 1);
2503 return (NULL);
2504}
2505
2506/* See uma.h */
2507void
2508uma_zfree_arg(uma_zone_t zone, void *item, void *udata)
2509{
2510 uma_cache_t cache;
2511 uma_bucket_t bucket;
2532 int bflags;
2533 int cpu;
2534
2535#ifdef UMA_DEBUG_ALLOC_1
2536 printf("Freeing item %p to %s(%p)\n", item, zone->uz_name, zone);
2537#endif
2538 CTR2(KTR_UMA, "uma_zfree_arg thread %x zone %s", curthread,
2539 zone->uz_name);
2540
2541 /* uma_zfree(..., NULL) does nothing, to match free(9). */
2542 if (item == NULL)
2543 return;
2544#ifdef DEBUG_MEMGUARD
2545 if (is_memguard_addr(item)) {
2546 if (zone->uz_dtor != NULL && zone->uz_dtor != mtrash_dtor)
2547 zone->uz_dtor(item, zone->uz_size, udata);
2548 if (zone->uz_fini != NULL && zone->uz_fini != mtrash_fini)
2549 zone->uz_fini(item, zone->uz_size);
2550 memguard_free(item);
2551 return;
2552 }
2553#endif
2554#ifdef INVARIANTS
2555 if (zone->uz_flags & UMA_ZONE_MALLOC)
2556 uma_dbg_free(zone, udata, item);
2557 else
2558 uma_dbg_free(zone, NULL, item);
2559#endif
2560 if (zone->uz_dtor != NULL)
2561 zone->uz_dtor(item, zone->uz_size, udata);
2562
2563 /*
2564 * The race here is acceptable. If we miss it we'll just have to wait
2565 * a little longer for the limits to be reset.
2566 */
2567 if (zone->uz_flags & UMA_ZFLAG_FULL)
2568 goto zfree_item;
2569
2570 /*
2571 * If possible, free to the per-CPU cache. There are two
2572 * requirements for safe access to the per-CPU cache: (1) the thread
2573 * accessing the cache must not be preempted or yield during access,
2574 * and (2) the thread must not migrate CPUs without switching which
2575 * cache it accesses. We rely on a critical section to prevent
2576 * preemption and migration. We release the critical section in
2577 * order to acquire the zone mutex if we are unable to free to the
2578 * current cache; when we re-acquire the critical section, we must
2579 * detect and handle migration if it has occurred.
2580 */
2581zfree_restart:
2582 critical_enter();
2583 cpu = curcpu;
2584 cache = &zone->uz_cpu[cpu];
2585
2586zfree_start:
2587 /*
2588 * Try to free into the allocbucket first to give LIFO ordering
2589 * for cache-hot datastructures. Spill over into the freebucket
2590 * if necessary. Alloc will swap them if one runs dry.
2591 */
2592 bucket = cache->uc_allocbucket;
2593 if (bucket == NULL || bucket->ub_cnt >= bucket->ub_entries)
2594 bucket = cache->uc_freebucket;
2595 if (bucket != NULL && bucket->ub_cnt < bucket->ub_entries) {
2596 KASSERT(bucket->ub_bucket[bucket->ub_cnt] == NULL,
2597 ("uma_zfree: Freeing to non free bucket index."));
2598 bucket->ub_bucket[bucket->ub_cnt] = item;
2599 bucket->ub_cnt++;
2600 cache->uc_frees++;
2601 critical_exit();
2602 return;
2603 }
2604
2605 /*
2606 * We must go back the zone, which requires acquiring the zone lock,
2607 * which in turn means we must release and re-acquire the critical
2608 * section. Since the critical section is released, we may be
2609 * preempted or migrate. As such, make sure not to maintain any
2610 * thread-local state specific to the cache from prior to releasing
2611 * the critical section.
2612 */
2613 critical_exit();
2614 if (zone->uz_count == 0 || bucketdisable)
2615 goto zfree_item;
2616
2617 ZONE_LOCK(zone);
2618 critical_enter();
2619 cpu = curcpu;
2620 cache = &zone->uz_cpu[cpu];
2621
2622 /*
2623 * Since we have locked the zone we may as well send back our stats.
2624 */
2625 atomic_add_long(&zone->uz_allocs, cache->uc_allocs);
2626 atomic_add_long(&zone->uz_frees, cache->uc_frees);
2627 cache->uc_allocs = 0;
2628 cache->uc_frees = 0;
2629
2630 bucket = cache->uc_freebucket;
2631 if (bucket != NULL && bucket->ub_cnt < bucket->ub_entries) {
2632 ZONE_UNLOCK(zone);
2633 goto zfree_start;
2634 }
2635 cache->uc_freebucket = NULL;
2636
2637 /* Can we throw this on the zone full list? */
2638 if (bucket != NULL) {
2639#ifdef UMA_DEBUG_ALLOC
2640 printf("uma_zfree: Putting old bucket on the free list.\n");
2641#endif
2642 /* ub_cnt is pointing to the last free item */
2643 KASSERT(bucket->ub_cnt != 0,
2644 ("uma_zfree: Attempting to insert an empty bucket onto the full list.\n"));
2645 LIST_INSERT_HEAD(&zone->uz_buckets, bucket, ub_link);
2646 }
2647
2648 /* We are no longer associated with this CPU. */
2649 critical_exit();
2650
2651 /* And the zone.. */
2652 ZONE_UNLOCK(zone);
2653
2654#ifdef UMA_DEBUG_ALLOC
2655 printf("uma_zfree: Allocating new free bucket.\n");
2656#endif
2512 int cpu;
2513
2514#ifdef UMA_DEBUG_ALLOC_1
2515 printf("Freeing item %p to %s(%p)\n", item, zone->uz_name, zone);
2516#endif
2517 CTR2(KTR_UMA, "uma_zfree_arg thread %x zone %s", curthread,
2518 zone->uz_name);
2519
2520 /* uma_zfree(..., NULL) does nothing, to match free(9). */
2521 if (item == NULL)
2522 return;
2523#ifdef DEBUG_MEMGUARD
2524 if (is_memguard_addr(item)) {
2525 if (zone->uz_dtor != NULL && zone->uz_dtor != mtrash_dtor)
2526 zone->uz_dtor(item, zone->uz_size, udata);
2527 if (zone->uz_fini != NULL && zone->uz_fini != mtrash_fini)
2528 zone->uz_fini(item, zone->uz_size);
2529 memguard_free(item);
2530 return;
2531 }
2532#endif
2533#ifdef INVARIANTS
2534 if (zone->uz_flags & UMA_ZONE_MALLOC)
2535 uma_dbg_free(zone, udata, item);
2536 else
2537 uma_dbg_free(zone, NULL, item);
2538#endif
2539 if (zone->uz_dtor != NULL)
2540 zone->uz_dtor(item, zone->uz_size, udata);
2541
2542 /*
2543 * The race here is acceptable. If we miss it we'll just have to wait
2544 * a little longer for the limits to be reset.
2545 */
2546 if (zone->uz_flags & UMA_ZFLAG_FULL)
2547 goto zfree_item;
2548
2549 /*
2550 * If possible, free to the per-CPU cache. There are two
2551 * requirements for safe access to the per-CPU cache: (1) the thread
2552 * accessing the cache must not be preempted or yield during access,
2553 * and (2) the thread must not migrate CPUs without switching which
2554 * cache it accesses. We rely on a critical section to prevent
2555 * preemption and migration. We release the critical section in
2556 * order to acquire the zone mutex if we are unable to free to the
2557 * current cache; when we re-acquire the critical section, we must
2558 * detect and handle migration if it has occurred.
2559 */
2560zfree_restart:
2561 critical_enter();
2562 cpu = curcpu;
2563 cache = &zone->uz_cpu[cpu];
2564
2565zfree_start:
2566 /*
2567 * Try to free into the allocbucket first to give LIFO ordering
2568 * for cache-hot datastructures. Spill over into the freebucket
2569 * if necessary. Alloc will swap them if one runs dry.
2570 */
2571 bucket = cache->uc_allocbucket;
2572 if (bucket == NULL || bucket->ub_cnt >= bucket->ub_entries)
2573 bucket = cache->uc_freebucket;
2574 if (bucket != NULL && bucket->ub_cnt < bucket->ub_entries) {
2575 KASSERT(bucket->ub_bucket[bucket->ub_cnt] == NULL,
2576 ("uma_zfree: Freeing to non free bucket index."));
2577 bucket->ub_bucket[bucket->ub_cnt] = item;
2578 bucket->ub_cnt++;
2579 cache->uc_frees++;
2580 critical_exit();
2581 return;
2582 }
2583
2584 /*
2585 * We must go back the zone, which requires acquiring the zone lock,
2586 * which in turn means we must release and re-acquire the critical
2587 * section. Since the critical section is released, we may be
2588 * preempted or migrate. As such, make sure not to maintain any
2589 * thread-local state specific to the cache from prior to releasing
2590 * the critical section.
2591 */
2592 critical_exit();
2593 if (zone->uz_count == 0 || bucketdisable)
2594 goto zfree_item;
2595
2596 ZONE_LOCK(zone);
2597 critical_enter();
2598 cpu = curcpu;
2599 cache = &zone->uz_cpu[cpu];
2600
2601 /*
2602 * Since we have locked the zone we may as well send back our stats.
2603 */
2604 atomic_add_long(&zone->uz_allocs, cache->uc_allocs);
2605 atomic_add_long(&zone->uz_frees, cache->uc_frees);
2606 cache->uc_allocs = 0;
2607 cache->uc_frees = 0;
2608
2609 bucket = cache->uc_freebucket;
2610 if (bucket != NULL && bucket->ub_cnt < bucket->ub_entries) {
2611 ZONE_UNLOCK(zone);
2612 goto zfree_start;
2613 }
2614 cache->uc_freebucket = NULL;
2615
2616 /* Can we throw this on the zone full list? */
2617 if (bucket != NULL) {
2618#ifdef UMA_DEBUG_ALLOC
2619 printf("uma_zfree: Putting old bucket on the free list.\n");
2620#endif
2621 /* ub_cnt is pointing to the last free item */
2622 KASSERT(bucket->ub_cnt != 0,
2623 ("uma_zfree: Attempting to insert an empty bucket onto the full list.\n"));
2624 LIST_INSERT_HEAD(&zone->uz_buckets, bucket, ub_link);
2625 }
2626
2627 /* We are no longer associated with this CPU. */
2628 critical_exit();
2629
2630 /* And the zone.. */
2631 ZONE_UNLOCK(zone);
2632
2633#ifdef UMA_DEBUG_ALLOC
2634 printf("uma_zfree: Allocating new free bucket.\n");
2635#endif
2657 bflags = M_NOWAIT;
2658 if (zone->uz_flags & UMA_ZFLAG_CACHEONLY)
2659 bflags |= M_NOVM;
2660 bucket = bucket_alloc(zone->uz_count, bflags);
2636 bucket = bucket_alloc(zone, M_NOWAIT);
2661 if (bucket) {
2662 critical_enter();
2663 cpu = curcpu;
2664 cache = &zone->uz_cpu[cpu];
2665 if (cache->uc_freebucket == NULL) {
2666 cache->uc_freebucket = bucket;
2667 goto zfree_start;
2668 }
2669 /*
2670 * We lost the race, start over. We have to drop our
2671 * critical section to free the bucket.
2672 */
2673 critical_exit();
2637 if (bucket) {
2638 critical_enter();
2639 cpu = curcpu;
2640 cache = &zone->uz_cpu[cpu];
2641 if (cache->uc_freebucket == NULL) {
2642 cache->uc_freebucket = bucket;
2643 goto zfree_start;
2644 }
2645 /*
2646 * We lost the race, start over. We have to drop our
2647 * critical section to free the bucket.
2648 */
2649 critical_exit();
2674 bucket_free(bucket);
2650 bucket_free(zone, bucket);
2675 goto zfree_restart;
2676 }
2677
2678 /*
2679 * If nothing else caught this, we'll just do an internal free.
2680 */
2681zfree_item:
2682 zone_free_item(zone, item, udata, SKIP_DTOR);
2683
2684 return;
2685}
2686
2687static void
2688slab_free_item(uma_keg_t keg, uma_slab_t slab, void *item)
2689{
2690 uint8_t freei;
2691
2692 mtx_assert(&keg->uk_lock, MA_OWNED);
2693 MPASS(keg == slab->us_keg);
2694
2695 /* Do we need to remove from any lists? */
2696 if (slab->us_freecount+1 == keg->uk_ipers) {
2697 LIST_REMOVE(slab, us_link);
2698 LIST_INSERT_HEAD(&keg->uk_free_slab, slab, us_link);
2699 } else if (slab->us_freecount == 0) {
2700 LIST_REMOVE(slab, us_link);
2701 LIST_INSERT_HEAD(&keg->uk_part_slab, slab, us_link);
2702 }
2703
2704 /* Slab management. */
2705 freei = ((uintptr_t)item - (uintptr_t)slab->us_data) / keg->uk_rsize;
2706 BIT_SET(SLAB_SETSIZE, freei, &slab->us_free);
2707 slab->us_freecount++;
2708
2709 /* Keg statistics. */
2710 keg->uk_free++;
2711}
2712
2713static void
2714zone_release(uma_zone_t zone, void **bucket, int cnt)
2715{
2716 void *item;
2717 uma_slab_t slab;
2718 uma_keg_t keg;
2719 uint8_t *mem;
2720 int clearfull;
2721 int i;
2722
2723 clearfull = 0;
2651 goto zfree_restart;
2652 }
2653
2654 /*
2655 * If nothing else caught this, we'll just do an internal free.
2656 */
2657zfree_item:
2658 zone_free_item(zone, item, udata, SKIP_DTOR);
2659
2660 return;
2661}
2662
2663static void
2664slab_free_item(uma_keg_t keg, uma_slab_t slab, void *item)
2665{
2666 uint8_t freei;
2667
2668 mtx_assert(&keg->uk_lock, MA_OWNED);
2669 MPASS(keg == slab->us_keg);
2670
2671 /* Do we need to remove from any lists? */
2672 if (slab->us_freecount+1 == keg->uk_ipers) {
2673 LIST_REMOVE(slab, us_link);
2674 LIST_INSERT_HEAD(&keg->uk_free_slab, slab, us_link);
2675 } else if (slab->us_freecount == 0) {
2676 LIST_REMOVE(slab, us_link);
2677 LIST_INSERT_HEAD(&keg->uk_part_slab, slab, us_link);
2678 }
2679
2680 /* Slab management. */
2681 freei = ((uintptr_t)item - (uintptr_t)slab->us_data) / keg->uk_rsize;
2682 BIT_SET(SLAB_SETSIZE, freei, &slab->us_free);
2683 slab->us_freecount++;
2684
2685 /* Keg statistics. */
2686 keg->uk_free++;
2687}
2688
2689static void
2690zone_release(uma_zone_t zone, void **bucket, int cnt)
2691{
2692 void *item;
2693 uma_slab_t slab;
2694 uma_keg_t keg;
2695 uint8_t *mem;
2696 int clearfull;
2697 int i;
2698
2699 clearfull = 0;
2724 ZONE_LOCK(zone);
2725 keg = zone_first_keg(zone);
2700 keg = zone_first_keg(zone);
2701 KEG_LOCK(keg);
2726 for (i = 0; i < cnt; i++) {
2727 item = bucket[i];
2728 if (!(zone->uz_flags & UMA_ZONE_VTOSLAB)) {
2729 mem = (uint8_t *)((uintptr_t)item & (~UMA_SLAB_MASK));
2730 if (zone->uz_flags & UMA_ZONE_HASH) {
2731 slab = hash_sfind(&keg->uk_hash, mem);
2732 } else {
2733 mem += keg->uk_pgoff;
2734 slab = (uma_slab_t)mem;
2735 }
2736 } else {
2737 slab = vtoslab((vm_offset_t)item);
2738 if (slab->us_keg != keg) {
2739 KEG_UNLOCK(keg);
2740 keg = slab->us_keg;
2741 KEG_LOCK(keg);
2742 }
2743 }
2744 slab_free_item(keg, slab, item);
2745 if (keg->uk_flags & UMA_ZFLAG_FULL) {
2746 if (keg->uk_pages < keg->uk_maxpages) {
2747 keg->uk_flags &= ~UMA_ZFLAG_FULL;
2748 clearfull = 1;
2749 }
2750
2751 /*
2752 * We can handle one more allocation. Since we're
2753 * clearing ZFLAG_FULL, wake up all procs blocked
2754 * on pages. This should be uncommon, so keeping this
2755 * simple for now (rather than adding count of blocked
2756 * threads etc).
2757 */
2758 wakeup(keg);
2759 }
2760 }
2702 for (i = 0; i < cnt; i++) {
2703 item = bucket[i];
2704 if (!(zone->uz_flags & UMA_ZONE_VTOSLAB)) {
2705 mem = (uint8_t *)((uintptr_t)item & (~UMA_SLAB_MASK));
2706 if (zone->uz_flags & UMA_ZONE_HASH) {
2707 slab = hash_sfind(&keg->uk_hash, mem);
2708 } else {
2709 mem += keg->uk_pgoff;
2710 slab = (uma_slab_t)mem;
2711 }
2712 } else {
2713 slab = vtoslab((vm_offset_t)item);
2714 if (slab->us_keg != keg) {
2715 KEG_UNLOCK(keg);
2716 keg = slab->us_keg;
2717 KEG_LOCK(keg);
2718 }
2719 }
2720 slab_free_item(keg, slab, item);
2721 if (keg->uk_flags & UMA_ZFLAG_FULL) {
2722 if (keg->uk_pages < keg->uk_maxpages) {
2723 keg->uk_flags &= ~UMA_ZFLAG_FULL;
2724 clearfull = 1;
2725 }
2726
2727 /*
2728 * We can handle one more allocation. Since we're
2729 * clearing ZFLAG_FULL, wake up all procs blocked
2730 * on pages. This should be uncommon, so keeping this
2731 * simple for now (rather than adding count of blocked
2732 * threads etc).
2733 */
2734 wakeup(keg);
2735 }
2736 }
2761 zone_relock(zone, keg);
2737 KEG_UNLOCK(keg);
2762 if (clearfull) {
2738 if (clearfull) {
2739 ZONE_LOCK(zone);
2763 zone->uz_flags &= ~UMA_ZFLAG_FULL;
2764 wakeup(zone);
2740 zone->uz_flags &= ~UMA_ZFLAG_FULL;
2741 wakeup(zone);
2742 ZONE_UNLOCK(zone);
2765 }
2743 }
2766 ZONE_UNLOCK(zone);
2767
2768}
2769
2770/*
2771 * Frees a single item to any zone.
2772 *
2773 * Arguments:
2774 * zone The zone to free to
2775 * item The item we're freeing
2776 * udata User supplied data for the dtor
2777 * skip Skip dtors and finis
2778 */
2779static void
2780zone_free_item(uma_zone_t zone, void *item, void *udata, enum zfreeskip skip)
2781{
2782
2783#ifdef INVARIANTS
2784 if (skip == SKIP_NONE) {
2785 if (zone->uz_flags & UMA_ZONE_MALLOC)
2786 uma_dbg_free(zone, udata, item);
2787 else
2788 uma_dbg_free(zone, NULL, item);
2789 }
2790#endif
2791 if (skip < SKIP_DTOR && zone->uz_dtor)
2792 zone->uz_dtor(item, zone->uz_size, udata);
2793
2794 if (skip < SKIP_FINI && zone->uz_fini)
2795 zone->uz_fini(item, zone->uz_size);
2796
2797 atomic_add_long(&zone->uz_frees, 1);
2798 zone->uz_release(zone->uz_arg, &item, 1);
2799}
2800
2801/* See uma.h */
2802int
2803uma_zone_set_max(uma_zone_t zone, int nitems)
2804{
2805 uma_keg_t keg;
2806
2807 keg = zone_first_keg(zone);
2808 if (keg == NULL)
2809 return (0);
2744
2745}
2746
2747/*
2748 * Frees a single item to any zone.
2749 *
2750 * Arguments:
2751 * zone The zone to free to
2752 * item The item we're freeing
2753 * udata User supplied data for the dtor
2754 * skip Skip dtors and finis
2755 */
2756static void
2757zone_free_item(uma_zone_t zone, void *item, void *udata, enum zfreeskip skip)
2758{
2759
2760#ifdef INVARIANTS
2761 if (skip == SKIP_NONE) {
2762 if (zone->uz_flags & UMA_ZONE_MALLOC)
2763 uma_dbg_free(zone, udata, item);
2764 else
2765 uma_dbg_free(zone, NULL, item);
2766 }
2767#endif
2768 if (skip < SKIP_DTOR && zone->uz_dtor)
2769 zone->uz_dtor(item, zone->uz_size, udata);
2770
2771 if (skip < SKIP_FINI && zone->uz_fini)
2772 zone->uz_fini(item, zone->uz_size);
2773
2774 atomic_add_long(&zone->uz_frees, 1);
2775 zone->uz_release(zone->uz_arg, &item, 1);
2776}
2777
2778/* See uma.h */
2779int
2780uma_zone_set_max(uma_zone_t zone, int nitems)
2781{
2782 uma_keg_t keg;
2783
2784 keg = zone_first_keg(zone);
2785 if (keg == NULL)
2786 return (0);
2810 ZONE_LOCK(zone);
2787 KEG_LOCK(keg);
2811 keg->uk_maxpages = (nitems / keg->uk_ipers) * keg->uk_ppera;
2812 if (keg->uk_maxpages * keg->uk_ipers < nitems)
2813 keg->uk_maxpages += keg->uk_ppera;
2814 nitems = keg->uk_maxpages * keg->uk_ipers;
2788 keg->uk_maxpages = (nitems / keg->uk_ipers) * keg->uk_ppera;
2789 if (keg->uk_maxpages * keg->uk_ipers < nitems)
2790 keg->uk_maxpages += keg->uk_ppera;
2791 nitems = keg->uk_maxpages * keg->uk_ipers;
2815 ZONE_UNLOCK(zone);
2792 KEG_UNLOCK(keg);
2816
2817 return (nitems);
2818}
2819
2820/* See uma.h */
2821int
2822uma_zone_get_max(uma_zone_t zone)
2823{
2824 int nitems;
2825 uma_keg_t keg;
2826
2827 keg = zone_first_keg(zone);
2828 if (keg == NULL)
2829 return (0);
2793
2794 return (nitems);
2795}
2796
2797/* See uma.h */
2798int
2799uma_zone_get_max(uma_zone_t zone)
2800{
2801 int nitems;
2802 uma_keg_t keg;
2803
2804 keg = zone_first_keg(zone);
2805 if (keg == NULL)
2806 return (0);
2830 ZONE_LOCK(zone);
2807 KEG_LOCK(keg);
2831 nitems = keg->uk_maxpages * keg->uk_ipers;
2808 nitems = keg->uk_maxpages * keg->uk_ipers;
2832 ZONE_UNLOCK(zone);
2809 KEG_UNLOCK(keg);
2833
2834 return (nitems);
2835}
2836
2837/* See uma.h */
2838void
2839uma_zone_set_warning(uma_zone_t zone, const char *warning)
2840{
2841
2842 ZONE_LOCK(zone);
2843 zone->uz_warning = warning;
2844 ZONE_UNLOCK(zone);
2845}
2846
2847/* See uma.h */
2848int
2849uma_zone_get_cur(uma_zone_t zone)
2850{
2851 int64_t nitems;
2852 u_int i;
2853
2854 ZONE_LOCK(zone);
2855 nitems = zone->uz_allocs - zone->uz_frees;
2856 CPU_FOREACH(i) {
2857 /*
2858 * See the comment in sysctl_vm_zone_stats() regarding the
2859 * safety of accessing the per-cpu caches. With the zone lock
2860 * held, it is safe, but can potentially result in stale data.
2861 */
2862 nitems += zone->uz_cpu[i].uc_allocs -
2863 zone->uz_cpu[i].uc_frees;
2864 }
2865 ZONE_UNLOCK(zone);
2866
2867 return (nitems < 0 ? 0 : nitems);
2868}
2869
2870/* See uma.h */
2871void
2872uma_zone_set_init(uma_zone_t zone, uma_init uminit)
2873{
2874 uma_keg_t keg;
2875
2810
2811 return (nitems);
2812}
2813
2814/* See uma.h */
2815void
2816uma_zone_set_warning(uma_zone_t zone, const char *warning)
2817{
2818
2819 ZONE_LOCK(zone);
2820 zone->uz_warning = warning;
2821 ZONE_UNLOCK(zone);
2822}
2823
2824/* See uma.h */
2825int
2826uma_zone_get_cur(uma_zone_t zone)
2827{
2828 int64_t nitems;
2829 u_int i;
2830
2831 ZONE_LOCK(zone);
2832 nitems = zone->uz_allocs - zone->uz_frees;
2833 CPU_FOREACH(i) {
2834 /*
2835 * See the comment in sysctl_vm_zone_stats() regarding the
2836 * safety of accessing the per-cpu caches. With the zone lock
2837 * held, it is safe, but can potentially result in stale data.
2838 */
2839 nitems += zone->uz_cpu[i].uc_allocs -
2840 zone->uz_cpu[i].uc_frees;
2841 }
2842 ZONE_UNLOCK(zone);
2843
2844 return (nitems < 0 ? 0 : nitems);
2845}
2846
2847/* See uma.h */
2848void
2849uma_zone_set_init(uma_zone_t zone, uma_init uminit)
2850{
2851 uma_keg_t keg;
2852
2876 ZONE_LOCK(zone);
2877 keg = zone_first_keg(zone);
2878 KASSERT(keg != NULL, ("uma_zone_set_init: Invalid zone type"));
2853 keg = zone_first_keg(zone);
2854 KASSERT(keg != NULL, ("uma_zone_set_init: Invalid zone type"));
2855 KEG_LOCK(keg);
2879 KASSERT(keg->uk_pages == 0,
2880 ("uma_zone_set_init on non-empty keg"));
2881 keg->uk_init = uminit;
2856 KASSERT(keg->uk_pages == 0,
2857 ("uma_zone_set_init on non-empty keg"));
2858 keg->uk_init = uminit;
2882 ZONE_UNLOCK(zone);
2859 KEG_UNLOCK(keg);
2883}
2884
2885/* See uma.h */
2886void
2887uma_zone_set_fini(uma_zone_t zone, uma_fini fini)
2888{
2889 uma_keg_t keg;
2890
2860}
2861
2862/* See uma.h */
2863void
2864uma_zone_set_fini(uma_zone_t zone, uma_fini fini)
2865{
2866 uma_keg_t keg;
2867
2891 ZONE_LOCK(zone);
2892 keg = zone_first_keg(zone);
2893 KASSERT(keg != NULL, ("uma_zone_set_init: Invalid zone type"));
2868 keg = zone_first_keg(zone);
2869 KASSERT(keg != NULL, ("uma_zone_set_init: Invalid zone type"));
2870 KEG_LOCK(keg);
2894 KASSERT(keg->uk_pages == 0,
2895 ("uma_zone_set_fini on non-empty keg"));
2896 keg->uk_fini = fini;
2871 KASSERT(keg->uk_pages == 0,
2872 ("uma_zone_set_fini on non-empty keg"));
2873 keg->uk_fini = fini;
2897 ZONE_UNLOCK(zone);
2874 KEG_UNLOCK(keg);
2898}
2899
2900/* See uma.h */
2901void
2902uma_zone_set_zinit(uma_zone_t zone, uma_init zinit)
2903{
2875}
2876
2877/* See uma.h */
2878void
2879uma_zone_set_zinit(uma_zone_t zone, uma_init zinit)
2880{
2881
2904 ZONE_LOCK(zone);
2905 KASSERT(zone_first_keg(zone)->uk_pages == 0,
2906 ("uma_zone_set_zinit on non-empty keg"));
2907 zone->uz_init = zinit;
2908 ZONE_UNLOCK(zone);
2909}
2910
2911/* See uma.h */
2912void
2913uma_zone_set_zfini(uma_zone_t zone, uma_fini zfini)
2914{
2882 ZONE_LOCK(zone);
2883 KASSERT(zone_first_keg(zone)->uk_pages == 0,
2884 ("uma_zone_set_zinit on non-empty keg"));
2885 zone->uz_init = zinit;
2886 ZONE_UNLOCK(zone);
2887}
2888
2889/* See uma.h */
2890void
2891uma_zone_set_zfini(uma_zone_t zone, uma_fini zfini)
2892{
2893
2915 ZONE_LOCK(zone);
2916 KASSERT(zone_first_keg(zone)->uk_pages == 0,
2917 ("uma_zone_set_zfini on non-empty keg"));
2918 zone->uz_fini = zfini;
2919 ZONE_UNLOCK(zone);
2920}
2921
2922/* See uma.h */
2923/* XXX uk_freef is not actually used with the zone locked */
2924void
2925uma_zone_set_freef(uma_zone_t zone, uma_free freef)
2926{
2927 uma_keg_t keg;
2928
2894 ZONE_LOCK(zone);
2895 KASSERT(zone_first_keg(zone)->uk_pages == 0,
2896 ("uma_zone_set_zfini on non-empty keg"));
2897 zone->uz_fini = zfini;
2898 ZONE_UNLOCK(zone);
2899}
2900
2901/* See uma.h */
2902/* XXX uk_freef is not actually used with the zone locked */
2903void
2904uma_zone_set_freef(uma_zone_t zone, uma_free freef)
2905{
2906 uma_keg_t keg;
2907
2929 ZONE_LOCK(zone);
2930 keg = zone_first_keg(zone);
2931 KASSERT(keg != NULL, ("uma_zone_set_init: Invalid zone type"));
2908 keg = zone_first_keg(zone);
2909 KASSERT(keg != NULL, ("uma_zone_set_init: Invalid zone type"));
2910 KEG_LOCK(keg);
2932 keg->uk_freef = freef;
2911 keg->uk_freef = freef;
2933 ZONE_UNLOCK(zone);
2912 KEG_UNLOCK(keg);
2934}
2935
2936/* See uma.h */
2937/* XXX uk_allocf is not actually used with the zone locked */
2938void
2939uma_zone_set_allocf(uma_zone_t zone, uma_alloc allocf)
2940{
2941 uma_keg_t keg;
2942
2913}
2914
2915/* See uma.h */
2916/* XXX uk_allocf is not actually used with the zone locked */
2917void
2918uma_zone_set_allocf(uma_zone_t zone, uma_alloc allocf)
2919{
2920 uma_keg_t keg;
2921
2943 ZONE_LOCK(zone);
2944 keg = zone_first_keg(zone);
2922 keg = zone_first_keg(zone);
2923 KEG_LOCK(keg);
2945 keg->uk_flags |= UMA_ZFLAG_PRIVALLOC;
2946 keg->uk_allocf = allocf;
2924 keg->uk_flags |= UMA_ZFLAG_PRIVALLOC;
2925 keg->uk_allocf = allocf;
2947 ZONE_UNLOCK(zone);
2926 KEG_UNLOCK(keg);
2948}
2949
2950/* See uma.h */
2951int
2952uma_zone_reserve_kva(uma_zone_t zone, int count)
2953{
2954 uma_keg_t keg;
2955 vm_offset_t kva;
2956 int pages;
2957
2958 keg = zone_first_keg(zone);
2959 if (keg == NULL)
2960 return (0);
2961 pages = count / keg->uk_ipers;
2962
2963 if (pages * keg->uk_ipers < count)
2964 pages++;
2965
2966#ifdef UMA_MD_SMALL_ALLOC
2967 if (keg->uk_ppera > 1) {
2968#else
2969 if (1) {
2970#endif
2971 kva = kmem_alloc_nofault(kernel_map, pages * UMA_SLAB_SIZE);
2972 if (kva == 0)
2973 return (0);
2974 } else
2975 kva = 0;
2927}
2928
2929/* See uma.h */
2930int
2931uma_zone_reserve_kva(uma_zone_t zone, int count)
2932{
2933 uma_keg_t keg;
2934 vm_offset_t kva;
2935 int pages;
2936
2937 keg = zone_first_keg(zone);
2938 if (keg == NULL)
2939 return (0);
2940 pages = count / keg->uk_ipers;
2941
2942 if (pages * keg->uk_ipers < count)
2943 pages++;
2944
2945#ifdef UMA_MD_SMALL_ALLOC
2946 if (keg->uk_ppera > 1) {
2947#else
2948 if (1) {
2949#endif
2950 kva = kmem_alloc_nofault(kernel_map, pages * UMA_SLAB_SIZE);
2951 if (kva == 0)
2952 return (0);
2953 } else
2954 kva = 0;
2976 ZONE_LOCK(zone);
2955 KEG_LOCK(keg);
2977 keg->uk_kva = kva;
2978 keg->uk_offset = 0;
2979 keg->uk_maxpages = pages;
2980#ifdef UMA_MD_SMALL_ALLOC
2981 keg->uk_allocf = (keg->uk_ppera > 1) ? noobj_alloc : uma_small_alloc;
2982#else
2983 keg->uk_allocf = noobj_alloc;
2984#endif
2985 keg->uk_flags |= UMA_ZONE_NOFREE | UMA_ZFLAG_PRIVALLOC;
2956 keg->uk_kva = kva;
2957 keg->uk_offset = 0;
2958 keg->uk_maxpages = pages;
2959#ifdef UMA_MD_SMALL_ALLOC
2960 keg->uk_allocf = (keg->uk_ppera > 1) ? noobj_alloc : uma_small_alloc;
2961#else
2962 keg->uk_allocf = noobj_alloc;
2963#endif
2964 keg->uk_flags |= UMA_ZONE_NOFREE | UMA_ZFLAG_PRIVALLOC;
2986 ZONE_UNLOCK(zone);
2965 KEG_UNLOCK(keg);
2966
2987 return (1);
2988}
2989
2990/* See uma.h */
2991void
2992uma_prealloc(uma_zone_t zone, int items)
2993{
2994 int slabs;
2995 uma_slab_t slab;
2996 uma_keg_t keg;
2997
2998 keg = zone_first_keg(zone);
2999 if (keg == NULL)
3000 return;
2967 return (1);
2968}
2969
2970/* See uma.h */
2971void
2972uma_prealloc(uma_zone_t zone, int items)
2973{
2974 int slabs;
2975 uma_slab_t slab;
2976 uma_keg_t keg;
2977
2978 keg = zone_first_keg(zone);
2979 if (keg == NULL)
2980 return;
3001 ZONE_LOCK(zone);
2981 KEG_LOCK(keg);
3002 slabs = items / keg->uk_ipers;
3003 if (slabs * keg->uk_ipers < items)
3004 slabs++;
3005 while (slabs > 0) {
3006 slab = keg_alloc_slab(keg, zone, M_WAITOK);
3007 if (slab == NULL)
3008 break;
3009 MPASS(slab->us_keg == keg);
3010 LIST_INSERT_HEAD(&keg->uk_free_slab, slab, us_link);
3011 slabs--;
3012 }
2982 slabs = items / keg->uk_ipers;
2983 if (slabs * keg->uk_ipers < items)
2984 slabs++;
2985 while (slabs > 0) {
2986 slab = keg_alloc_slab(keg, zone, M_WAITOK);
2987 if (slab == NULL)
2988 break;
2989 MPASS(slab->us_keg == keg);
2990 LIST_INSERT_HEAD(&keg->uk_free_slab, slab, us_link);
2991 slabs--;
2992 }
3013 ZONE_UNLOCK(zone);
2993 KEG_UNLOCK(keg);
3014}
3015
3016/* See uma.h */
3017uint32_t *
3018uma_find_refcnt(uma_zone_t zone, void *item)
3019{
3020 uma_slabrefcnt_t slabref;
3021 uma_slab_t slab;
3022 uma_keg_t keg;
3023 uint32_t *refcnt;
3024 int idx;
3025
3026 slab = vtoslab((vm_offset_t)item & (~UMA_SLAB_MASK));
3027 slabref = (uma_slabrefcnt_t)slab;
3028 keg = slab->us_keg;
3029 KASSERT(keg->uk_flags & UMA_ZONE_REFCNT,
3030 ("uma_find_refcnt(): zone possibly not UMA_ZONE_REFCNT"));
3031 idx = ((uintptr_t)item - (uintptr_t)slab->us_data) / keg->uk_rsize;
3032 refcnt = &slabref->us_refcnt[idx];
3033 return refcnt;
3034}
3035
3036/* See uma.h */
3037void
3038uma_reclaim(void)
3039{
3040#ifdef UMA_DEBUG
3041 printf("UMA: vm asked us to release pages!\n");
3042#endif
3043 bucket_enable();
3044 zone_foreach(zone_drain);
3045 /*
3046 * Some slabs may have been freed but this zone will be visited early
3047 * we visit again so that we can free pages that are empty once other
3048 * zones are drained. We have to do the same for buckets.
3049 */
3050 zone_drain(slabzone);
3051 zone_drain(slabrefzone);
3052 bucket_zone_drain();
3053}
3054
3055/* See uma.h */
3056int
3057uma_zone_exhausted(uma_zone_t zone)
3058{
3059 int full;
3060
3061 ZONE_LOCK(zone);
3062 full = (zone->uz_flags & UMA_ZFLAG_FULL);
3063 ZONE_UNLOCK(zone);
3064 return (full);
3065}
3066
3067int
3068uma_zone_exhausted_nolock(uma_zone_t zone)
3069{
3070 return (zone->uz_flags & UMA_ZFLAG_FULL);
3071}
3072
3073void *
3074uma_large_malloc(int size, int wait)
3075{
3076 void *mem;
3077 uma_slab_t slab;
3078 uint8_t flags;
3079
3080 slab = zone_alloc_item(slabzone, NULL, wait);
3081 if (slab == NULL)
3082 return (NULL);
3083 mem = page_alloc(NULL, size, &flags, wait);
3084 if (mem) {
3085 vsetslab((vm_offset_t)mem, slab);
3086 slab->us_data = mem;
3087 slab->us_flags = flags | UMA_SLAB_MALLOC;
3088 slab->us_size = size;
3089 } else {
3090 zone_free_item(slabzone, slab, NULL, SKIP_NONE);
3091 }
3092
3093 return (mem);
3094}
3095
3096void
3097uma_large_free(uma_slab_t slab)
3098{
3099 vsetobj((vm_offset_t)slab->us_data, kmem_object);
3100 page_free(slab->us_data, slab->us_size, slab->us_flags);
3101 zone_free_item(slabzone, slab, NULL, SKIP_NONE);
3102}
3103
3104void
3105uma_print_stats(void)
3106{
3107 zone_foreach(uma_print_zone);
3108}
3109
3110static void
3111slab_print(uma_slab_t slab)
3112{
3113 printf("slab: keg %p, data %p, freecount %d\n",
3114 slab->us_keg, slab->us_data, slab->us_freecount);
3115}
3116
3117static void
3118cache_print(uma_cache_t cache)
3119{
3120 printf("alloc: %p(%d), free: %p(%d)\n",
3121 cache->uc_allocbucket,
3122 cache->uc_allocbucket?cache->uc_allocbucket->ub_cnt:0,
3123 cache->uc_freebucket,
3124 cache->uc_freebucket?cache->uc_freebucket->ub_cnt:0);
3125}
3126
3127static void
3128uma_print_keg(uma_keg_t keg)
3129{
3130 uma_slab_t slab;
3131
3132 printf("keg: %s(%p) size %d(%d) flags %#x ipers %d ppera %d "
3133 "out %d free %d limit %d\n",
3134 keg->uk_name, keg, keg->uk_size, keg->uk_rsize, keg->uk_flags,
3135 keg->uk_ipers, keg->uk_ppera,
3136 (keg->uk_ipers * keg->uk_pages) - keg->uk_free, keg->uk_free,
3137 (keg->uk_maxpages / keg->uk_ppera) * keg->uk_ipers);
3138 printf("Part slabs:\n");
3139 LIST_FOREACH(slab, &keg->uk_part_slab, us_link)
3140 slab_print(slab);
3141 printf("Free slabs:\n");
3142 LIST_FOREACH(slab, &keg->uk_free_slab, us_link)
3143 slab_print(slab);
3144 printf("Full slabs:\n");
3145 LIST_FOREACH(slab, &keg->uk_full_slab, us_link)
3146 slab_print(slab);
3147}
3148
3149void
3150uma_print_zone(uma_zone_t zone)
3151{
3152 uma_cache_t cache;
3153 uma_klink_t kl;
3154 int i;
3155
3156 printf("zone: %s(%p) size %d flags %#x\n",
3157 zone->uz_name, zone, zone->uz_size, zone->uz_flags);
3158 LIST_FOREACH(kl, &zone->uz_kegs, kl_link)
3159 uma_print_keg(kl->kl_keg);
3160 CPU_FOREACH(i) {
3161 cache = &zone->uz_cpu[i];
3162 printf("CPU %d Cache:\n", i);
3163 cache_print(cache);
3164 }
3165}
3166
3167#ifdef DDB
3168/*
3169 * Generate statistics across both the zone and its per-cpu cache's. Return
3170 * desired statistics if the pointer is non-NULL for that statistic.
3171 *
3172 * Note: does not update the zone statistics, as it can't safely clear the
3173 * per-CPU cache statistic.
3174 *
3175 * XXXRW: Following the uc_allocbucket and uc_freebucket pointers here isn't
3176 * safe from off-CPU; we should modify the caches to track this information
3177 * directly so that we don't have to.
3178 */
3179static void
3180uma_zone_sumstat(uma_zone_t z, int *cachefreep, uint64_t *allocsp,
3181 uint64_t *freesp, uint64_t *sleepsp)
3182{
3183 uma_cache_t cache;
3184 uint64_t allocs, frees, sleeps;
3185 int cachefree, cpu;
3186
3187 allocs = frees = sleeps = 0;
3188 cachefree = 0;
3189 CPU_FOREACH(cpu) {
3190 cache = &z->uz_cpu[cpu];
3191 if (cache->uc_allocbucket != NULL)
3192 cachefree += cache->uc_allocbucket->ub_cnt;
3193 if (cache->uc_freebucket != NULL)
3194 cachefree += cache->uc_freebucket->ub_cnt;
3195 allocs += cache->uc_allocs;
3196 frees += cache->uc_frees;
3197 }
3198 allocs += z->uz_allocs;
3199 frees += z->uz_frees;
3200 sleeps += z->uz_sleeps;
3201 if (cachefreep != NULL)
3202 *cachefreep = cachefree;
3203 if (allocsp != NULL)
3204 *allocsp = allocs;
3205 if (freesp != NULL)
3206 *freesp = frees;
3207 if (sleepsp != NULL)
3208 *sleepsp = sleeps;
3209}
3210#endif /* DDB */
3211
3212static int
3213sysctl_vm_zone_count(SYSCTL_HANDLER_ARGS)
3214{
3215 uma_keg_t kz;
3216 uma_zone_t z;
3217 int count;
3218
3219 count = 0;
3220 mtx_lock(&uma_mtx);
3221 LIST_FOREACH(kz, &uma_kegs, uk_link) {
3222 LIST_FOREACH(z, &kz->uk_zones, uz_link)
3223 count++;
3224 }
3225 mtx_unlock(&uma_mtx);
3226 return (sysctl_handle_int(oidp, &count, 0, req));
3227}
3228
3229static int
3230sysctl_vm_zone_stats(SYSCTL_HANDLER_ARGS)
3231{
3232 struct uma_stream_header ush;
3233 struct uma_type_header uth;
3234 struct uma_percpu_stat ups;
3235 uma_bucket_t bucket;
3236 struct sbuf sbuf;
3237 uma_cache_t cache;
3238 uma_klink_t kl;
3239 uma_keg_t kz;
3240 uma_zone_t z;
3241 uma_keg_t k;
3242 int count, error, i;
3243
3244 error = sysctl_wire_old_buffer(req, 0);
3245 if (error != 0)
3246 return (error);
3247 sbuf_new_for_sysctl(&sbuf, NULL, 128, req);
3248
3249 count = 0;
3250 mtx_lock(&uma_mtx);
3251 LIST_FOREACH(kz, &uma_kegs, uk_link) {
3252 LIST_FOREACH(z, &kz->uk_zones, uz_link)
3253 count++;
3254 }
3255
3256 /*
3257 * Insert stream header.
3258 */
3259 bzero(&ush, sizeof(ush));
3260 ush.ush_version = UMA_STREAM_VERSION;
3261 ush.ush_maxcpus = (mp_maxid + 1);
3262 ush.ush_count = count;
3263 (void)sbuf_bcat(&sbuf, &ush, sizeof(ush));
3264
3265 LIST_FOREACH(kz, &uma_kegs, uk_link) {
3266 LIST_FOREACH(z, &kz->uk_zones, uz_link) {
3267 bzero(&uth, sizeof(uth));
3268 ZONE_LOCK(z);
3269 strlcpy(uth.uth_name, z->uz_name, UTH_MAX_NAME);
3270 uth.uth_align = kz->uk_align;
3271 uth.uth_size = kz->uk_size;
3272 uth.uth_rsize = kz->uk_rsize;
3273 LIST_FOREACH(kl, &z->uz_kegs, kl_link) {
3274 k = kl->kl_keg;
3275 uth.uth_maxpages += k->uk_maxpages;
3276 uth.uth_pages += k->uk_pages;
3277 uth.uth_keg_free += k->uk_free;
3278 uth.uth_limit = (k->uk_maxpages / k->uk_ppera)
3279 * k->uk_ipers;
3280 }
3281
3282 /*
3283 * A zone is secondary is it is not the first entry
3284 * on the keg's zone list.
3285 */
3286 if ((z->uz_flags & UMA_ZONE_SECONDARY) &&
3287 (LIST_FIRST(&kz->uk_zones) != z))
3288 uth.uth_zone_flags = UTH_ZONE_SECONDARY;
3289
3290 LIST_FOREACH(bucket, &z->uz_buckets, ub_link)
3291 uth.uth_zone_free += bucket->ub_cnt;
3292 uth.uth_allocs = z->uz_allocs;
3293 uth.uth_frees = z->uz_frees;
3294 uth.uth_fails = z->uz_fails;
3295 uth.uth_sleeps = z->uz_sleeps;
3296 (void)sbuf_bcat(&sbuf, &uth, sizeof(uth));
3297 /*
3298 * While it is not normally safe to access the cache
3299 * bucket pointers while not on the CPU that owns the
3300 * cache, we only allow the pointers to be exchanged
3301 * without the zone lock held, not invalidated, so
3302 * accept the possible race associated with bucket
3303 * exchange during monitoring.
3304 */
3305 for (i = 0; i < (mp_maxid + 1); i++) {
3306 bzero(&ups, sizeof(ups));
3307 if (kz->uk_flags & UMA_ZFLAG_INTERNAL)
3308 goto skip;
3309 if (CPU_ABSENT(i))
3310 goto skip;
3311 cache = &z->uz_cpu[i];
3312 if (cache->uc_allocbucket != NULL)
3313 ups.ups_cache_free +=
3314 cache->uc_allocbucket->ub_cnt;
3315 if (cache->uc_freebucket != NULL)
3316 ups.ups_cache_free +=
3317 cache->uc_freebucket->ub_cnt;
3318 ups.ups_allocs = cache->uc_allocs;
3319 ups.ups_frees = cache->uc_frees;
3320skip:
3321 (void)sbuf_bcat(&sbuf, &ups, sizeof(ups));
3322 }
3323 ZONE_UNLOCK(z);
3324 }
3325 }
3326 mtx_unlock(&uma_mtx);
3327 error = sbuf_finish(&sbuf);
3328 sbuf_delete(&sbuf);
3329 return (error);
3330}
3331
3332#ifdef DDB
3333DB_SHOW_COMMAND(uma, db_show_uma)
3334{
3335 uint64_t allocs, frees, sleeps;
3336 uma_bucket_t bucket;
3337 uma_keg_t kz;
3338 uma_zone_t z;
3339 int cachefree;
3340
3341 db_printf("%18s %8s %8s %8s %12s %8s\n", "Zone", "Size", "Used", "Free",
3342 "Requests", "Sleeps");
3343 LIST_FOREACH(kz, &uma_kegs, uk_link) {
3344 LIST_FOREACH(z, &kz->uk_zones, uz_link) {
3345 if (kz->uk_flags & UMA_ZFLAG_INTERNAL) {
3346 allocs = z->uz_allocs;
3347 frees = z->uz_frees;
3348 sleeps = z->uz_sleeps;
3349 cachefree = 0;
3350 } else
3351 uma_zone_sumstat(z, &cachefree, &allocs,
3352 &frees, &sleeps);
3353 if (!((z->uz_flags & UMA_ZONE_SECONDARY) &&
3354 (LIST_FIRST(&kz->uk_zones) != z)))
3355 cachefree += kz->uk_free;
3356 LIST_FOREACH(bucket, &z->uz_buckets, ub_link)
3357 cachefree += bucket->ub_cnt;
3358 db_printf("%18s %8ju %8jd %8d %12ju %8ju\n", z->uz_name,
3359 (uintmax_t)kz->uk_size,
3360 (intmax_t)(allocs - frees), cachefree,
3361 (uintmax_t)allocs, sleeps);
3362 if (db_pager_quit)
3363 return;
3364 }
3365 }
3366}
3367#endif
2994}
2995
2996/* See uma.h */
2997uint32_t *
2998uma_find_refcnt(uma_zone_t zone, void *item)
2999{
3000 uma_slabrefcnt_t slabref;
3001 uma_slab_t slab;
3002 uma_keg_t keg;
3003 uint32_t *refcnt;
3004 int idx;
3005
3006 slab = vtoslab((vm_offset_t)item & (~UMA_SLAB_MASK));
3007 slabref = (uma_slabrefcnt_t)slab;
3008 keg = slab->us_keg;
3009 KASSERT(keg->uk_flags & UMA_ZONE_REFCNT,
3010 ("uma_find_refcnt(): zone possibly not UMA_ZONE_REFCNT"));
3011 idx = ((uintptr_t)item - (uintptr_t)slab->us_data) / keg->uk_rsize;
3012 refcnt = &slabref->us_refcnt[idx];
3013 return refcnt;
3014}
3015
3016/* See uma.h */
3017void
3018uma_reclaim(void)
3019{
3020#ifdef UMA_DEBUG
3021 printf("UMA: vm asked us to release pages!\n");
3022#endif
3023 bucket_enable();
3024 zone_foreach(zone_drain);
3025 /*
3026 * Some slabs may have been freed but this zone will be visited early
3027 * we visit again so that we can free pages that are empty once other
3028 * zones are drained. We have to do the same for buckets.
3029 */
3030 zone_drain(slabzone);
3031 zone_drain(slabrefzone);
3032 bucket_zone_drain();
3033}
3034
3035/* See uma.h */
3036int
3037uma_zone_exhausted(uma_zone_t zone)
3038{
3039 int full;
3040
3041 ZONE_LOCK(zone);
3042 full = (zone->uz_flags & UMA_ZFLAG_FULL);
3043 ZONE_UNLOCK(zone);
3044 return (full);
3045}
3046
3047int
3048uma_zone_exhausted_nolock(uma_zone_t zone)
3049{
3050 return (zone->uz_flags & UMA_ZFLAG_FULL);
3051}
3052
3053void *
3054uma_large_malloc(int size, int wait)
3055{
3056 void *mem;
3057 uma_slab_t slab;
3058 uint8_t flags;
3059
3060 slab = zone_alloc_item(slabzone, NULL, wait);
3061 if (slab == NULL)
3062 return (NULL);
3063 mem = page_alloc(NULL, size, &flags, wait);
3064 if (mem) {
3065 vsetslab((vm_offset_t)mem, slab);
3066 slab->us_data = mem;
3067 slab->us_flags = flags | UMA_SLAB_MALLOC;
3068 slab->us_size = size;
3069 } else {
3070 zone_free_item(slabzone, slab, NULL, SKIP_NONE);
3071 }
3072
3073 return (mem);
3074}
3075
3076void
3077uma_large_free(uma_slab_t slab)
3078{
3079 vsetobj((vm_offset_t)slab->us_data, kmem_object);
3080 page_free(slab->us_data, slab->us_size, slab->us_flags);
3081 zone_free_item(slabzone, slab, NULL, SKIP_NONE);
3082}
3083
3084void
3085uma_print_stats(void)
3086{
3087 zone_foreach(uma_print_zone);
3088}
3089
3090static void
3091slab_print(uma_slab_t slab)
3092{
3093 printf("slab: keg %p, data %p, freecount %d\n",
3094 slab->us_keg, slab->us_data, slab->us_freecount);
3095}
3096
3097static void
3098cache_print(uma_cache_t cache)
3099{
3100 printf("alloc: %p(%d), free: %p(%d)\n",
3101 cache->uc_allocbucket,
3102 cache->uc_allocbucket?cache->uc_allocbucket->ub_cnt:0,
3103 cache->uc_freebucket,
3104 cache->uc_freebucket?cache->uc_freebucket->ub_cnt:0);
3105}
3106
3107static void
3108uma_print_keg(uma_keg_t keg)
3109{
3110 uma_slab_t slab;
3111
3112 printf("keg: %s(%p) size %d(%d) flags %#x ipers %d ppera %d "
3113 "out %d free %d limit %d\n",
3114 keg->uk_name, keg, keg->uk_size, keg->uk_rsize, keg->uk_flags,
3115 keg->uk_ipers, keg->uk_ppera,
3116 (keg->uk_ipers * keg->uk_pages) - keg->uk_free, keg->uk_free,
3117 (keg->uk_maxpages / keg->uk_ppera) * keg->uk_ipers);
3118 printf("Part slabs:\n");
3119 LIST_FOREACH(slab, &keg->uk_part_slab, us_link)
3120 slab_print(slab);
3121 printf("Free slabs:\n");
3122 LIST_FOREACH(slab, &keg->uk_free_slab, us_link)
3123 slab_print(slab);
3124 printf("Full slabs:\n");
3125 LIST_FOREACH(slab, &keg->uk_full_slab, us_link)
3126 slab_print(slab);
3127}
3128
3129void
3130uma_print_zone(uma_zone_t zone)
3131{
3132 uma_cache_t cache;
3133 uma_klink_t kl;
3134 int i;
3135
3136 printf("zone: %s(%p) size %d flags %#x\n",
3137 zone->uz_name, zone, zone->uz_size, zone->uz_flags);
3138 LIST_FOREACH(kl, &zone->uz_kegs, kl_link)
3139 uma_print_keg(kl->kl_keg);
3140 CPU_FOREACH(i) {
3141 cache = &zone->uz_cpu[i];
3142 printf("CPU %d Cache:\n", i);
3143 cache_print(cache);
3144 }
3145}
3146
3147#ifdef DDB
3148/*
3149 * Generate statistics across both the zone and its per-cpu cache's. Return
3150 * desired statistics if the pointer is non-NULL for that statistic.
3151 *
3152 * Note: does not update the zone statistics, as it can't safely clear the
3153 * per-CPU cache statistic.
3154 *
3155 * XXXRW: Following the uc_allocbucket and uc_freebucket pointers here isn't
3156 * safe from off-CPU; we should modify the caches to track this information
3157 * directly so that we don't have to.
3158 */
3159static void
3160uma_zone_sumstat(uma_zone_t z, int *cachefreep, uint64_t *allocsp,
3161 uint64_t *freesp, uint64_t *sleepsp)
3162{
3163 uma_cache_t cache;
3164 uint64_t allocs, frees, sleeps;
3165 int cachefree, cpu;
3166
3167 allocs = frees = sleeps = 0;
3168 cachefree = 0;
3169 CPU_FOREACH(cpu) {
3170 cache = &z->uz_cpu[cpu];
3171 if (cache->uc_allocbucket != NULL)
3172 cachefree += cache->uc_allocbucket->ub_cnt;
3173 if (cache->uc_freebucket != NULL)
3174 cachefree += cache->uc_freebucket->ub_cnt;
3175 allocs += cache->uc_allocs;
3176 frees += cache->uc_frees;
3177 }
3178 allocs += z->uz_allocs;
3179 frees += z->uz_frees;
3180 sleeps += z->uz_sleeps;
3181 if (cachefreep != NULL)
3182 *cachefreep = cachefree;
3183 if (allocsp != NULL)
3184 *allocsp = allocs;
3185 if (freesp != NULL)
3186 *freesp = frees;
3187 if (sleepsp != NULL)
3188 *sleepsp = sleeps;
3189}
3190#endif /* DDB */
3191
3192static int
3193sysctl_vm_zone_count(SYSCTL_HANDLER_ARGS)
3194{
3195 uma_keg_t kz;
3196 uma_zone_t z;
3197 int count;
3198
3199 count = 0;
3200 mtx_lock(&uma_mtx);
3201 LIST_FOREACH(kz, &uma_kegs, uk_link) {
3202 LIST_FOREACH(z, &kz->uk_zones, uz_link)
3203 count++;
3204 }
3205 mtx_unlock(&uma_mtx);
3206 return (sysctl_handle_int(oidp, &count, 0, req));
3207}
3208
3209static int
3210sysctl_vm_zone_stats(SYSCTL_HANDLER_ARGS)
3211{
3212 struct uma_stream_header ush;
3213 struct uma_type_header uth;
3214 struct uma_percpu_stat ups;
3215 uma_bucket_t bucket;
3216 struct sbuf sbuf;
3217 uma_cache_t cache;
3218 uma_klink_t kl;
3219 uma_keg_t kz;
3220 uma_zone_t z;
3221 uma_keg_t k;
3222 int count, error, i;
3223
3224 error = sysctl_wire_old_buffer(req, 0);
3225 if (error != 0)
3226 return (error);
3227 sbuf_new_for_sysctl(&sbuf, NULL, 128, req);
3228
3229 count = 0;
3230 mtx_lock(&uma_mtx);
3231 LIST_FOREACH(kz, &uma_kegs, uk_link) {
3232 LIST_FOREACH(z, &kz->uk_zones, uz_link)
3233 count++;
3234 }
3235
3236 /*
3237 * Insert stream header.
3238 */
3239 bzero(&ush, sizeof(ush));
3240 ush.ush_version = UMA_STREAM_VERSION;
3241 ush.ush_maxcpus = (mp_maxid + 1);
3242 ush.ush_count = count;
3243 (void)sbuf_bcat(&sbuf, &ush, sizeof(ush));
3244
3245 LIST_FOREACH(kz, &uma_kegs, uk_link) {
3246 LIST_FOREACH(z, &kz->uk_zones, uz_link) {
3247 bzero(&uth, sizeof(uth));
3248 ZONE_LOCK(z);
3249 strlcpy(uth.uth_name, z->uz_name, UTH_MAX_NAME);
3250 uth.uth_align = kz->uk_align;
3251 uth.uth_size = kz->uk_size;
3252 uth.uth_rsize = kz->uk_rsize;
3253 LIST_FOREACH(kl, &z->uz_kegs, kl_link) {
3254 k = kl->kl_keg;
3255 uth.uth_maxpages += k->uk_maxpages;
3256 uth.uth_pages += k->uk_pages;
3257 uth.uth_keg_free += k->uk_free;
3258 uth.uth_limit = (k->uk_maxpages / k->uk_ppera)
3259 * k->uk_ipers;
3260 }
3261
3262 /*
3263 * A zone is secondary is it is not the first entry
3264 * on the keg's zone list.
3265 */
3266 if ((z->uz_flags & UMA_ZONE_SECONDARY) &&
3267 (LIST_FIRST(&kz->uk_zones) != z))
3268 uth.uth_zone_flags = UTH_ZONE_SECONDARY;
3269
3270 LIST_FOREACH(bucket, &z->uz_buckets, ub_link)
3271 uth.uth_zone_free += bucket->ub_cnt;
3272 uth.uth_allocs = z->uz_allocs;
3273 uth.uth_frees = z->uz_frees;
3274 uth.uth_fails = z->uz_fails;
3275 uth.uth_sleeps = z->uz_sleeps;
3276 (void)sbuf_bcat(&sbuf, &uth, sizeof(uth));
3277 /*
3278 * While it is not normally safe to access the cache
3279 * bucket pointers while not on the CPU that owns the
3280 * cache, we only allow the pointers to be exchanged
3281 * without the zone lock held, not invalidated, so
3282 * accept the possible race associated with bucket
3283 * exchange during monitoring.
3284 */
3285 for (i = 0; i < (mp_maxid + 1); i++) {
3286 bzero(&ups, sizeof(ups));
3287 if (kz->uk_flags & UMA_ZFLAG_INTERNAL)
3288 goto skip;
3289 if (CPU_ABSENT(i))
3290 goto skip;
3291 cache = &z->uz_cpu[i];
3292 if (cache->uc_allocbucket != NULL)
3293 ups.ups_cache_free +=
3294 cache->uc_allocbucket->ub_cnt;
3295 if (cache->uc_freebucket != NULL)
3296 ups.ups_cache_free +=
3297 cache->uc_freebucket->ub_cnt;
3298 ups.ups_allocs = cache->uc_allocs;
3299 ups.ups_frees = cache->uc_frees;
3300skip:
3301 (void)sbuf_bcat(&sbuf, &ups, sizeof(ups));
3302 }
3303 ZONE_UNLOCK(z);
3304 }
3305 }
3306 mtx_unlock(&uma_mtx);
3307 error = sbuf_finish(&sbuf);
3308 sbuf_delete(&sbuf);
3309 return (error);
3310}
3311
3312#ifdef DDB
3313DB_SHOW_COMMAND(uma, db_show_uma)
3314{
3315 uint64_t allocs, frees, sleeps;
3316 uma_bucket_t bucket;
3317 uma_keg_t kz;
3318 uma_zone_t z;
3319 int cachefree;
3320
3321 db_printf("%18s %8s %8s %8s %12s %8s\n", "Zone", "Size", "Used", "Free",
3322 "Requests", "Sleeps");
3323 LIST_FOREACH(kz, &uma_kegs, uk_link) {
3324 LIST_FOREACH(z, &kz->uk_zones, uz_link) {
3325 if (kz->uk_flags & UMA_ZFLAG_INTERNAL) {
3326 allocs = z->uz_allocs;
3327 frees = z->uz_frees;
3328 sleeps = z->uz_sleeps;
3329 cachefree = 0;
3330 } else
3331 uma_zone_sumstat(z, &cachefree, &allocs,
3332 &frees, &sleeps);
3333 if (!((z->uz_flags & UMA_ZONE_SECONDARY) &&
3334 (LIST_FIRST(&kz->uk_zones) != z)))
3335 cachefree += kz->uk_free;
3336 LIST_FOREACH(bucket, &z->uz_buckets, ub_link)
3337 cachefree += bucket->ub_cnt;
3338 db_printf("%18s %8ju %8jd %8d %12ju %8ju\n", z->uz_name,
3339 (uintmax_t)kz->uk_size,
3340 (intmax_t)(allocs - frees), cachefree,
3341 (uintmax_t)allocs, sleeps);
3342 if (db_pager_quit)
3343 return;
3344 }
3345 }
3346}
3347#endif