Deleted Added
full compact
uma_core.c (92654) uma_core.c (92758)
1/*
2 * Copyright (c) 2002, Jeffrey Roberson <jroberson@chesapeake.net>
3 * All rights reserved.
4 *
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions
7 * are met:
8 * 1. Redistributions of source code must retain the above copyright
9 * notice unmodified, this list of conditions, and the following
10 * disclaimer.
11 * 2. Redistributions in binary form must reproduce the above copyright
12 * notice, this list of conditions and the following disclaimer in the
13 * documentation and/or other materials provided with the distribution.
14 *
15 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
16 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
17 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
18 * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
19 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
20 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
21 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
22 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
23 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
24 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
25 *
1/*
2 * Copyright (c) 2002, Jeffrey Roberson <jroberson@chesapeake.net>
3 * All rights reserved.
4 *
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions
7 * are met:
8 * 1. Redistributions of source code must retain the above copyright
9 * notice unmodified, this list of conditions, and the following
10 * disclaimer.
11 * 2. Redistributions in binary form must reproduce the above copyright
12 * notice, this list of conditions and the following disclaimer in the
13 * documentation and/or other materials provided with the distribution.
14 *
15 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
16 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
17 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
18 * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
19 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
20 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
21 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
22 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
23 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
24 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
25 *
26 * $FreeBSD: head/sys/vm/uma_core.c 92654 2002-03-19 09:11:49Z jeff $
26 * $FreeBSD: head/sys/vm/uma_core.c 92758 2002-03-20 05:28:34Z jeff $
27 *
28 */
29
30/*
31 * uma_core.c Implementation of the Universal Memory allocator
32 *
33 * This allocator is intended to replace the multitude of similar object caches
34 * in the standard FreeBSD kernel. The intent is to be flexible as well as
35 * effecient. A primary design goal is to return unused memory to the rest of
36 * the system. This will make the system as a whole more flexible due to the
37 * ability to move memory to subsystems which most need it instead of leaving
38 * pools of reserved memory unused.
39 *
40 * The basic ideas stem from similar slab/zone based allocators whose algorithms
41 * are well known.
42 *
43 */
44
45/*
46 * TODO:
47 * - Improve memory usage for large allocations
48 * - Improve INVARIANTS (0xdeadc0de write out)
49 * - Investigate cache size adjustments
50 */
51
52/* I should really use ktr.. */
53/*
54#define UMA_DEBUG 1
55#define UMA_DEBUG_ALLOC 1
56#define UMA_DEBUG_ALLOC_1 1
57*/
58
59
60#include "opt_param.h"
61#include <sys/param.h>
62#include <sys/systm.h>
63#include <sys/kernel.h>
64#include <sys/types.h>
65#include <sys/queue.h>
66#include <sys/malloc.h>
67#include <sys/lock.h>
68#include <sys/sysctl.h>
69#include <machine/types.h>
70#include <sys/mutex.h>
71#include <sys/smp.h>
72
73#include <vm/vm.h>
74#include <vm/vm_object.h>
75#include <vm/vm_page.h>
76#include <vm/vm_param.h>
77#include <vm/vm_map.h>
78#include <vm/vm_kern.h>
79#include <vm/vm_extern.h>
80#include <vm/uma.h>
81#include <vm/uma_int.h>
82
83/*
84 * This is the zone from which all zones are spawned. The idea is that even
85 * the zone heads are allocated from the allocator, so we use the bss section
86 * to bootstrap us.
87 */
88static struct uma_zone master_zone;
89static uma_zone_t zones = &master_zone;
90
91/* This is the zone from which all of uma_slab_t's are allocated. */
92static uma_zone_t slabzone;
93
94/*
95 * The initial hash tables come out of this zone so they can be allocated
96 * prior to malloc coming up.
97 */
98static uma_zone_t hashzone;
99
100/*
101 * Zone that buckets come from.
102 */
103static uma_zone_t bucketzone;
104
105/* Linked list of all zones in the system */
106static LIST_HEAD(,uma_zone) uma_zones = LIST_HEAD_INITIALIZER(&uma_zones);
107
108/* This mutex protects the zone list */
109static struct mtx uma_mtx;
110
111/* Linked list of boot time pages */
112static LIST_HEAD(,uma_slab) uma_boot_pages =
113 LIST_HEAD_INITIALIZER(&uma_boot_pages);
114
115/* Count of free boottime pages */
116static int uma_boot_free = 0;
117
118/* Is the VM done starting up? */
119static int booted = 0;
120
121/* This is the handle used to schedule our working set calculator */
122static struct callout uma_callout;
123
124/* This is mp_maxid + 1, for use while looping over each cpu */
125static int maxcpu;
126
127/*
128 * This structure is passed as the zone ctor arg so that I don't have to create
129 * a special allocation function just for zones.
130 */
131struct uma_zctor_args {
132 char *name;
133 int size;
134 uma_ctor ctor;
135 uma_dtor dtor;
136 uma_init uminit;
137 uma_fini fini;
138 int align;
139 u_int16_t flags;
140};
141
142/*
143 * This is the malloc hash table which is used to find the zone that a
144 * malloc allocation came from. It is not currently resizeable. The
145 * memory for the actual hash bucket is allocated in kmeminit.
146 */
147struct uma_hash mhash;
148struct uma_hash *mallochash = &mhash;
149
150/* Prototypes.. */
151
152static void *obj_alloc(uma_zone_t, int, u_int8_t *, int);
153static void *page_alloc(uma_zone_t, int, u_int8_t *, int);
154static void page_free(void *, int, u_int8_t);
155static uma_slab_t slab_zalloc(uma_zone_t, int);
156static void cache_drain(uma_zone_t);
157static void bucket_drain(uma_zone_t, uma_bucket_t);
158static void zone_drain(uma_zone_t);
159static void zone_ctor(void *, int, void *);
160static void zero_init(void *, int);
161static void zone_small_init(uma_zone_t zone);
162static void zone_large_init(uma_zone_t zone);
163static void zone_foreach(void (*zfunc)(uma_zone_t));
164static void zone_timeout(uma_zone_t zone);
165static void hash_expand(struct uma_hash *);
166static void uma_timeout(void *);
167static void uma_startup3(void);
168static void *uma_zalloc_internal(uma_zone_t, void *, int, int *, int);
169static void uma_zfree_internal(uma_zone_t,
170 void *, void *, int);
171void uma_print_zone(uma_zone_t);
172void uma_print_stats(void);
173static int sysctl_vm_zone(SYSCTL_HANDLER_ARGS);
174
175SYSCTL_OID(_vm, OID_AUTO, zone, CTLTYPE_STRING|CTLFLAG_RD,
176 NULL, 0, sysctl_vm_zone, "A", "Zone Info");
177SYSINIT(uma_startup3, SI_SUB_VM_CONF, SI_ORDER_SECOND, uma_startup3, NULL);
178
179
180/*
181 * Routine called by timeout which is used to fire off some time interval
182 * based calculations. (working set, stats, etc.)
183 *
184 * Arguments:
185 * arg Unused
186 *
187 * Returns:
188 * Nothing
189 */
190static void
191uma_timeout(void *unused)
192{
193 zone_foreach(zone_timeout);
194
195 /* Reschedule this event */
196 callout_reset(&uma_callout, UMA_WORKING_TIME * hz, uma_timeout, NULL);
197}
198
199/*
200 * Routine to perform timeout driven calculations. This does the working set
201 * as well as hash expanding, and per cpu statistics aggregation.
202 *
203 * Arguments:
204 * zone The zone to operate on
205 *
206 * Returns:
207 * Nothing
208 */
209static void
210zone_timeout(uma_zone_t zone)
211{
212 uma_cache_t cache;
213 u_int64_t alloc;
214 int free;
215 int cpu;
216
217 alloc = 0;
218 free = 0;
219
220 /*
221 * Aggregate per cpu cache statistics back to the zone.
222 *
223 * I may rewrite this to set a flag in the per cpu cache instead of
224 * locking. If the flag is not cleared on the next round I will have
225 * to lock and do it here instead so that the statistics don't get too
226 * far out of sync.
227 */
228 if (!(zone->uz_flags & UMA_ZFLAG_INTERNAL)) {
229 for (cpu = 0; cpu < maxcpu; cpu++) {
230 if (CPU_ABSENT(cpu))
231 continue;
232 CPU_LOCK(zone, cpu);
233 cache = &zone->uz_cpu[cpu];
234 /* Add them up, and reset */
235 alloc += cache->uc_allocs;
236 cache->uc_allocs = 0;
237 if (cache->uc_allocbucket)
238 free += cache->uc_allocbucket->ub_ptr + 1;
239 if (cache->uc_freebucket)
240 free += cache->uc_freebucket->ub_ptr + 1;
241 CPU_UNLOCK(zone, cpu);
242 }
243 }
244
245 /* Now push these stats back into the zone.. */
246 ZONE_LOCK(zone);
247 zone->uz_allocs += alloc;
248
249 /*
250 * cachefree is an instantanious snapshot of what is in the per cpu
251 * caches, not an accurate counter
252 */
253 zone->uz_cachefree = free;
254
255 /*
256 * Expand the zone hash table.
257 *
258 * This is done if the number of slabs is larger than the hash size.
259 * What I'm trying to do here is completely reduce collisions. This
260 * may be a little aggressive. Should I allow for two collisions max?
261 */
262
263 if ((zone->uz_flags & UMA_ZFLAG_OFFPAGE) &&
264 !(zone->uz_flags & UMA_ZFLAG_MALLOC)) {
265 if (zone->uz_pages / zone->uz_ppera
266 >= zone->uz_hash.uh_hashsize)
267 hash_expand(&zone->uz_hash);
268 }
269
270 /*
271 * Here we compute the working set size as the total number of items
272 * left outstanding since the last time interval. This is slightly
273 * suboptimal. What we really want is the highest number of outstanding
274 * items during the last time quantum. This should be close enough.
275 *
276 * The working set size is used to throttle the zone_drain function.
277 * We don't want to return memory that we may need again immediately.
278 */
279 alloc = zone->uz_allocs - zone->uz_oallocs;
280 zone->uz_oallocs = zone->uz_allocs;
281 zone->uz_wssize = alloc;
282
283 ZONE_UNLOCK(zone);
284}
285
286/*
287 * Expands the hash table for OFFPAGE zones. This is done from zone_timeout
288 * to reduce collisions. This must not be done in the regular allocation path,
289 * otherwise, we can recurse on the vm while allocating pages.
290 *
291 * Arguments:
292 * hash The hash you want to expand by a factor of two.
293 *
294 * Returns:
295 * Nothing
296 *
297 * Discussion:
298 */
299static void
300hash_expand(struct uma_hash *hash)
301{
302 struct slabhead *newhash;
303 struct slabhead *oldhash;
304 uma_slab_t slab;
305 int hzonefree;
306 int hashsize;
307 int alloc;
308 int hval;
309 int i;
310
311
312 /*
313 * Remember the old hash size and see if it has to go back to the
314 * hash zone, or malloc. The hash zone is used for the initial hash
315 */
316
317 hashsize = hash->uh_hashsize;
318 oldhash = hash->uh_slab_hash;
319
320 if (hashsize == UMA_HASH_SIZE_INIT)
321 hzonefree = 1;
322 else
323 hzonefree = 0;
324
325
326 /* We're just going to go to a power of two greater */
327 if (hash->uh_hashsize) {
328 alloc = sizeof(hash->uh_slab_hash[0]) * (hash->uh_hashsize * 2);
329 /* XXX Shouldn't be abusing DEVBUF here */
330 newhash = (struct slabhead *)malloc(alloc, M_DEVBUF, M_NOWAIT);
331 if (newhash == NULL) {
332 return;
333 }
334 hash->uh_hashsize *= 2;
335 } else {
336 alloc = sizeof(hash->uh_slab_hash[0]) * UMA_HASH_SIZE_INIT;
337 newhash = uma_zalloc_internal(hashzone, NULL, M_WAITOK, NULL, -1);
338 hash->uh_hashsize = UMA_HASH_SIZE_INIT;
339 }
340
341 bzero(newhash, alloc);
342
343 hash->uh_hashmask = hash->uh_hashsize - 1;
344
345 /*
346 * I need to investigate hash algorithms for resizing without a
347 * full rehash.
348 */
349
350 for (i = 0; i < hashsize; i++)
351 while (!SLIST_EMPTY(&hash->uh_slab_hash[i])) {
352 slab = SLIST_FIRST(&hash->uh_slab_hash[i]);
353 SLIST_REMOVE_HEAD(&hash->uh_slab_hash[i], us_hlink);
354 hval = UMA_HASH(hash, slab->us_data);
355 SLIST_INSERT_HEAD(&newhash[hval], slab, us_hlink);
356 }
357
358 if (hash->uh_slab_hash) {
359 if (hzonefree)
360 uma_zfree_internal(hashzone,
361 hash->uh_slab_hash, NULL, 0);
362 else
363 free(hash->uh_slab_hash, M_DEVBUF);
364 }
365 hash->uh_slab_hash = newhash;
366
367 return;
368}
369
370/*
371 * Frees all outstanding items in a bucket
372 *
373 * Arguments:
374 * zone The zone to free to, must be unlocked.
375 * bucket The free/alloc bucket with items, cpu queue must be locked.
376 *
377 * Returns:
378 * Nothing
379 */
380
381static void
382bucket_drain(uma_zone_t zone, uma_bucket_t bucket)
383{
384 uma_slab_t slab;
385 int mzone;
386 void *item;
387
388 if (bucket == NULL)
389 return;
390
391 slab = NULL;
392 mzone = 0;
393
394 /* We have to lookup the slab again for malloc.. */
395 if (zone->uz_flags & UMA_ZFLAG_MALLOC)
396 mzone = 1;
397
398 while (bucket->ub_ptr > -1) {
399 item = bucket->ub_bucket[bucket->ub_ptr];
400#ifdef INVARIANTS
401 bucket->ub_bucket[bucket->ub_ptr] = NULL;
402 KASSERT(item != NULL,
403 ("bucket_drain: botched ptr, item is NULL"));
404#endif
405 bucket->ub_ptr--;
406 /*
407 * This is extremely inefficient. The slab pointer was passed
408 * to uma_zfree_arg, but we lost it because the buckets don't
409 * hold them. This will go away when free() gets a size passed
410 * to it.
411 */
412 if (mzone)
413 slab = hash_sfind(mallochash,
414 (u_int8_t *)((unsigned long)item &
415 (~UMA_SLAB_MASK)));
416 uma_zfree_internal(zone, item, slab, 1);
417 }
418}
419
420/*
421 * Drains the per cpu caches for a zone.
422 *
423 * Arguments:
424 * zone The zone to drain, must be unlocked.
425 *
426 * Returns:
427 * Nothing
428 *
429 * This function returns with the zone locked so that the per cpu queues can
430 * not be filled until zone_drain is finished.
431 *
432 */
433static void
434cache_drain(uma_zone_t zone)
435{
436 uma_bucket_t bucket;
437 uma_cache_t cache;
438 int cpu;
439
440 /*
441 * Flush out the per cpu queues.
442 *
443 * XXX This causes unneccisary thrashing due to immediately having
444 * empty per cpu queues. I need to improve this.
445 */
446
447 /*
448 * We have to lock each cpu cache before locking the zone
449 */
450 ZONE_UNLOCK(zone);
451
452 for (cpu = 0; cpu < maxcpu; cpu++) {
453 if (CPU_ABSENT(cpu))
454 continue;
455 CPU_LOCK(zone, cpu);
456 cache = &zone->uz_cpu[cpu];
457 bucket_drain(zone, cache->uc_allocbucket);
458 bucket_drain(zone, cache->uc_freebucket);
459 }
460
461 /*
462 * Drain the bucket queues and free the buckets, we just keep two per
463 * cpu (alloc/free).
464 */
465 ZONE_LOCK(zone);
466 while ((bucket = LIST_FIRST(&zone->uz_full_bucket)) != NULL) {
467 LIST_REMOVE(bucket, ub_link);
468 ZONE_UNLOCK(zone);
469 bucket_drain(zone, bucket);
470 uma_zfree_internal(bucketzone, bucket, NULL, 0);
471 ZONE_LOCK(zone);
472 }
473
474 /* Now we do the free queue.. */
475 while ((bucket = LIST_FIRST(&zone->uz_free_bucket)) != NULL) {
476 LIST_REMOVE(bucket, ub_link);
477 uma_zfree_internal(bucketzone, bucket, NULL, 0);
478 }
479
480 /* We unlock here, but they will all block until the zone is unlocked */
481 for (cpu = 0; cpu < maxcpu; cpu++) {
482 if (CPU_ABSENT(cpu))
483 continue;
484 CPU_UNLOCK(zone, cpu);
485 }
486}
487
488/*
489 * Frees pages from a zone back to the system. This is done on demand from
490 * the pageout daemon.
491 *
492 * Arguments:
493 * zone The zone to free pages from
494 *
495 * Returns:
496 * Nothing.
497 */
498static void
499zone_drain(uma_zone_t zone)
500{
501 uma_slab_t slab;
502 uma_slab_t n;
503 u_int64_t extra;
504 u_int8_t flags;
505 u_int8_t *mem;
506 int i;
507
508 /*
509 * We don't want to take pages from staticly allocated zones at this
510 * time
511 */
512 if (zone->uz_flags & UMA_ZFLAG_NOFREE || zone->uz_freef == NULL)
513 return;
514
515 ZONE_LOCK(zone);
516
517 if (!(zone->uz_flags & UMA_ZFLAG_INTERNAL))
518 cache_drain(zone);
519
520 if (zone->uz_free < zone->uz_wssize)
521 goto finished;
522#ifdef UMA_DEBUG
523 printf("%s working set size: %llu free items: %u\n",
524 zone->uz_name, (unsigned long long)zone->uz_wssize, zone->uz_free);
525#endif
526 extra = zone->uz_wssize - zone->uz_free;
527 extra /= zone->uz_ipers;
528
529 /* extra is now the number of extra slabs that we can free */
530
531 if (extra == 0)
532 goto finished;
533
534 slab = LIST_FIRST(&zone->uz_free_slab);
535 while (slab && extra) {
536 n = LIST_NEXT(slab, us_link);
537
538 /* We have no where to free these to */
539 if (slab->us_flags & UMA_SLAB_BOOT) {
540 slab = n;
541 continue;
542 }
543
544 LIST_REMOVE(slab, us_link);
545 zone->uz_pages -= zone->uz_ppera;
546 zone->uz_free -= zone->uz_ipers;
547 if (zone->uz_fini)
548 for (i = 0; i < zone->uz_ipers; i++)
549 zone->uz_fini(
550 slab->us_data + (zone->uz_rsize * i),
551 zone->uz_size);
552 flags = slab->us_flags;
553 mem = slab->us_data;
554 if (zone->uz_flags & UMA_ZFLAG_OFFPAGE) {
555 if (zone->uz_flags & UMA_ZFLAG_MALLOC) {
556 UMA_HASH_REMOVE(mallochash,
557 slab, slab->us_data);
558 } else {
559 UMA_HASH_REMOVE(&zone->uz_hash,
560 slab, slab->us_data);
561 }
562 uma_zfree_internal(slabzone, slab, NULL, 0);
563 } else if (zone->uz_flags & UMA_ZFLAG_MALLOC)
564 UMA_HASH_REMOVE(mallochash, slab, slab->us_data);
565#ifdef UMA_DEBUG
566 printf("%s: Returning %d bytes.\n",
567 zone->uz_name, UMA_SLAB_SIZE * zone->uz_ppera);
568#endif
569 zone->uz_freef(mem, UMA_SLAB_SIZE * zone->uz_ppera, flags);
570
571 slab = n;
572 extra--;
573 }
574
575finished:
576 ZONE_UNLOCK(zone);
577}
578
579/*
580 * Allocate a new slab for a zone. This does not insert the slab onto a list.
581 *
582 * Arguments:
583 * zone The zone to allocate slabs for
584 * wait Shall we wait?
585 *
586 * Returns:
587 * The slab that was allocated or NULL if there is no memory and the
588 * caller specified M_NOWAIT.
589 *
590 */
591static uma_slab_t
592slab_zalloc(uma_zone_t zone, int wait)
593{
594 uma_slab_t slab; /* Starting slab */
595 u_int8_t *mem;
596 u_int8_t flags;
597 int i;
598
599#ifdef UMA_DEBUG
600 printf("slab_zalloc: Allocating a new slab for %s\n", zone->uz_name);
601#endif
27 *
28 */
29
30/*
31 * uma_core.c Implementation of the Universal Memory allocator
32 *
33 * This allocator is intended to replace the multitude of similar object caches
34 * in the standard FreeBSD kernel. The intent is to be flexible as well as
35 * effecient. A primary design goal is to return unused memory to the rest of
36 * the system. This will make the system as a whole more flexible due to the
37 * ability to move memory to subsystems which most need it instead of leaving
38 * pools of reserved memory unused.
39 *
40 * The basic ideas stem from similar slab/zone based allocators whose algorithms
41 * are well known.
42 *
43 */
44
45/*
46 * TODO:
47 * - Improve memory usage for large allocations
48 * - Improve INVARIANTS (0xdeadc0de write out)
49 * - Investigate cache size adjustments
50 */
51
52/* I should really use ktr.. */
53/*
54#define UMA_DEBUG 1
55#define UMA_DEBUG_ALLOC 1
56#define UMA_DEBUG_ALLOC_1 1
57*/
58
59
60#include "opt_param.h"
61#include <sys/param.h>
62#include <sys/systm.h>
63#include <sys/kernel.h>
64#include <sys/types.h>
65#include <sys/queue.h>
66#include <sys/malloc.h>
67#include <sys/lock.h>
68#include <sys/sysctl.h>
69#include <machine/types.h>
70#include <sys/mutex.h>
71#include <sys/smp.h>
72
73#include <vm/vm.h>
74#include <vm/vm_object.h>
75#include <vm/vm_page.h>
76#include <vm/vm_param.h>
77#include <vm/vm_map.h>
78#include <vm/vm_kern.h>
79#include <vm/vm_extern.h>
80#include <vm/uma.h>
81#include <vm/uma_int.h>
82
83/*
84 * This is the zone from which all zones are spawned. The idea is that even
85 * the zone heads are allocated from the allocator, so we use the bss section
86 * to bootstrap us.
87 */
88static struct uma_zone master_zone;
89static uma_zone_t zones = &master_zone;
90
91/* This is the zone from which all of uma_slab_t's are allocated. */
92static uma_zone_t slabzone;
93
94/*
95 * The initial hash tables come out of this zone so they can be allocated
96 * prior to malloc coming up.
97 */
98static uma_zone_t hashzone;
99
100/*
101 * Zone that buckets come from.
102 */
103static uma_zone_t bucketzone;
104
105/* Linked list of all zones in the system */
106static LIST_HEAD(,uma_zone) uma_zones = LIST_HEAD_INITIALIZER(&uma_zones);
107
108/* This mutex protects the zone list */
109static struct mtx uma_mtx;
110
111/* Linked list of boot time pages */
112static LIST_HEAD(,uma_slab) uma_boot_pages =
113 LIST_HEAD_INITIALIZER(&uma_boot_pages);
114
115/* Count of free boottime pages */
116static int uma_boot_free = 0;
117
118/* Is the VM done starting up? */
119static int booted = 0;
120
121/* This is the handle used to schedule our working set calculator */
122static struct callout uma_callout;
123
124/* This is mp_maxid + 1, for use while looping over each cpu */
125static int maxcpu;
126
127/*
128 * This structure is passed as the zone ctor arg so that I don't have to create
129 * a special allocation function just for zones.
130 */
131struct uma_zctor_args {
132 char *name;
133 int size;
134 uma_ctor ctor;
135 uma_dtor dtor;
136 uma_init uminit;
137 uma_fini fini;
138 int align;
139 u_int16_t flags;
140};
141
142/*
143 * This is the malloc hash table which is used to find the zone that a
144 * malloc allocation came from. It is not currently resizeable. The
145 * memory for the actual hash bucket is allocated in kmeminit.
146 */
147struct uma_hash mhash;
148struct uma_hash *mallochash = &mhash;
149
150/* Prototypes.. */
151
152static void *obj_alloc(uma_zone_t, int, u_int8_t *, int);
153static void *page_alloc(uma_zone_t, int, u_int8_t *, int);
154static void page_free(void *, int, u_int8_t);
155static uma_slab_t slab_zalloc(uma_zone_t, int);
156static void cache_drain(uma_zone_t);
157static void bucket_drain(uma_zone_t, uma_bucket_t);
158static void zone_drain(uma_zone_t);
159static void zone_ctor(void *, int, void *);
160static void zero_init(void *, int);
161static void zone_small_init(uma_zone_t zone);
162static void zone_large_init(uma_zone_t zone);
163static void zone_foreach(void (*zfunc)(uma_zone_t));
164static void zone_timeout(uma_zone_t zone);
165static void hash_expand(struct uma_hash *);
166static void uma_timeout(void *);
167static void uma_startup3(void);
168static void *uma_zalloc_internal(uma_zone_t, void *, int, int *, int);
169static void uma_zfree_internal(uma_zone_t,
170 void *, void *, int);
171void uma_print_zone(uma_zone_t);
172void uma_print_stats(void);
173static int sysctl_vm_zone(SYSCTL_HANDLER_ARGS);
174
175SYSCTL_OID(_vm, OID_AUTO, zone, CTLTYPE_STRING|CTLFLAG_RD,
176 NULL, 0, sysctl_vm_zone, "A", "Zone Info");
177SYSINIT(uma_startup3, SI_SUB_VM_CONF, SI_ORDER_SECOND, uma_startup3, NULL);
178
179
180/*
181 * Routine called by timeout which is used to fire off some time interval
182 * based calculations. (working set, stats, etc.)
183 *
184 * Arguments:
185 * arg Unused
186 *
187 * Returns:
188 * Nothing
189 */
190static void
191uma_timeout(void *unused)
192{
193 zone_foreach(zone_timeout);
194
195 /* Reschedule this event */
196 callout_reset(&uma_callout, UMA_WORKING_TIME * hz, uma_timeout, NULL);
197}
198
199/*
200 * Routine to perform timeout driven calculations. This does the working set
201 * as well as hash expanding, and per cpu statistics aggregation.
202 *
203 * Arguments:
204 * zone The zone to operate on
205 *
206 * Returns:
207 * Nothing
208 */
209static void
210zone_timeout(uma_zone_t zone)
211{
212 uma_cache_t cache;
213 u_int64_t alloc;
214 int free;
215 int cpu;
216
217 alloc = 0;
218 free = 0;
219
220 /*
221 * Aggregate per cpu cache statistics back to the zone.
222 *
223 * I may rewrite this to set a flag in the per cpu cache instead of
224 * locking. If the flag is not cleared on the next round I will have
225 * to lock and do it here instead so that the statistics don't get too
226 * far out of sync.
227 */
228 if (!(zone->uz_flags & UMA_ZFLAG_INTERNAL)) {
229 for (cpu = 0; cpu < maxcpu; cpu++) {
230 if (CPU_ABSENT(cpu))
231 continue;
232 CPU_LOCK(zone, cpu);
233 cache = &zone->uz_cpu[cpu];
234 /* Add them up, and reset */
235 alloc += cache->uc_allocs;
236 cache->uc_allocs = 0;
237 if (cache->uc_allocbucket)
238 free += cache->uc_allocbucket->ub_ptr + 1;
239 if (cache->uc_freebucket)
240 free += cache->uc_freebucket->ub_ptr + 1;
241 CPU_UNLOCK(zone, cpu);
242 }
243 }
244
245 /* Now push these stats back into the zone.. */
246 ZONE_LOCK(zone);
247 zone->uz_allocs += alloc;
248
249 /*
250 * cachefree is an instantanious snapshot of what is in the per cpu
251 * caches, not an accurate counter
252 */
253 zone->uz_cachefree = free;
254
255 /*
256 * Expand the zone hash table.
257 *
258 * This is done if the number of slabs is larger than the hash size.
259 * What I'm trying to do here is completely reduce collisions. This
260 * may be a little aggressive. Should I allow for two collisions max?
261 */
262
263 if ((zone->uz_flags & UMA_ZFLAG_OFFPAGE) &&
264 !(zone->uz_flags & UMA_ZFLAG_MALLOC)) {
265 if (zone->uz_pages / zone->uz_ppera
266 >= zone->uz_hash.uh_hashsize)
267 hash_expand(&zone->uz_hash);
268 }
269
270 /*
271 * Here we compute the working set size as the total number of items
272 * left outstanding since the last time interval. This is slightly
273 * suboptimal. What we really want is the highest number of outstanding
274 * items during the last time quantum. This should be close enough.
275 *
276 * The working set size is used to throttle the zone_drain function.
277 * We don't want to return memory that we may need again immediately.
278 */
279 alloc = zone->uz_allocs - zone->uz_oallocs;
280 zone->uz_oallocs = zone->uz_allocs;
281 zone->uz_wssize = alloc;
282
283 ZONE_UNLOCK(zone);
284}
285
286/*
287 * Expands the hash table for OFFPAGE zones. This is done from zone_timeout
288 * to reduce collisions. This must not be done in the regular allocation path,
289 * otherwise, we can recurse on the vm while allocating pages.
290 *
291 * Arguments:
292 * hash The hash you want to expand by a factor of two.
293 *
294 * Returns:
295 * Nothing
296 *
297 * Discussion:
298 */
299static void
300hash_expand(struct uma_hash *hash)
301{
302 struct slabhead *newhash;
303 struct slabhead *oldhash;
304 uma_slab_t slab;
305 int hzonefree;
306 int hashsize;
307 int alloc;
308 int hval;
309 int i;
310
311
312 /*
313 * Remember the old hash size and see if it has to go back to the
314 * hash zone, or malloc. The hash zone is used for the initial hash
315 */
316
317 hashsize = hash->uh_hashsize;
318 oldhash = hash->uh_slab_hash;
319
320 if (hashsize == UMA_HASH_SIZE_INIT)
321 hzonefree = 1;
322 else
323 hzonefree = 0;
324
325
326 /* We're just going to go to a power of two greater */
327 if (hash->uh_hashsize) {
328 alloc = sizeof(hash->uh_slab_hash[0]) * (hash->uh_hashsize * 2);
329 /* XXX Shouldn't be abusing DEVBUF here */
330 newhash = (struct slabhead *)malloc(alloc, M_DEVBUF, M_NOWAIT);
331 if (newhash == NULL) {
332 return;
333 }
334 hash->uh_hashsize *= 2;
335 } else {
336 alloc = sizeof(hash->uh_slab_hash[0]) * UMA_HASH_SIZE_INIT;
337 newhash = uma_zalloc_internal(hashzone, NULL, M_WAITOK, NULL, -1);
338 hash->uh_hashsize = UMA_HASH_SIZE_INIT;
339 }
340
341 bzero(newhash, alloc);
342
343 hash->uh_hashmask = hash->uh_hashsize - 1;
344
345 /*
346 * I need to investigate hash algorithms for resizing without a
347 * full rehash.
348 */
349
350 for (i = 0; i < hashsize; i++)
351 while (!SLIST_EMPTY(&hash->uh_slab_hash[i])) {
352 slab = SLIST_FIRST(&hash->uh_slab_hash[i]);
353 SLIST_REMOVE_HEAD(&hash->uh_slab_hash[i], us_hlink);
354 hval = UMA_HASH(hash, slab->us_data);
355 SLIST_INSERT_HEAD(&newhash[hval], slab, us_hlink);
356 }
357
358 if (hash->uh_slab_hash) {
359 if (hzonefree)
360 uma_zfree_internal(hashzone,
361 hash->uh_slab_hash, NULL, 0);
362 else
363 free(hash->uh_slab_hash, M_DEVBUF);
364 }
365 hash->uh_slab_hash = newhash;
366
367 return;
368}
369
370/*
371 * Frees all outstanding items in a bucket
372 *
373 * Arguments:
374 * zone The zone to free to, must be unlocked.
375 * bucket The free/alloc bucket with items, cpu queue must be locked.
376 *
377 * Returns:
378 * Nothing
379 */
380
381static void
382bucket_drain(uma_zone_t zone, uma_bucket_t bucket)
383{
384 uma_slab_t slab;
385 int mzone;
386 void *item;
387
388 if (bucket == NULL)
389 return;
390
391 slab = NULL;
392 mzone = 0;
393
394 /* We have to lookup the slab again for malloc.. */
395 if (zone->uz_flags & UMA_ZFLAG_MALLOC)
396 mzone = 1;
397
398 while (bucket->ub_ptr > -1) {
399 item = bucket->ub_bucket[bucket->ub_ptr];
400#ifdef INVARIANTS
401 bucket->ub_bucket[bucket->ub_ptr] = NULL;
402 KASSERT(item != NULL,
403 ("bucket_drain: botched ptr, item is NULL"));
404#endif
405 bucket->ub_ptr--;
406 /*
407 * This is extremely inefficient. The slab pointer was passed
408 * to uma_zfree_arg, but we lost it because the buckets don't
409 * hold them. This will go away when free() gets a size passed
410 * to it.
411 */
412 if (mzone)
413 slab = hash_sfind(mallochash,
414 (u_int8_t *)((unsigned long)item &
415 (~UMA_SLAB_MASK)));
416 uma_zfree_internal(zone, item, slab, 1);
417 }
418}
419
420/*
421 * Drains the per cpu caches for a zone.
422 *
423 * Arguments:
424 * zone The zone to drain, must be unlocked.
425 *
426 * Returns:
427 * Nothing
428 *
429 * This function returns with the zone locked so that the per cpu queues can
430 * not be filled until zone_drain is finished.
431 *
432 */
433static void
434cache_drain(uma_zone_t zone)
435{
436 uma_bucket_t bucket;
437 uma_cache_t cache;
438 int cpu;
439
440 /*
441 * Flush out the per cpu queues.
442 *
443 * XXX This causes unneccisary thrashing due to immediately having
444 * empty per cpu queues. I need to improve this.
445 */
446
447 /*
448 * We have to lock each cpu cache before locking the zone
449 */
450 ZONE_UNLOCK(zone);
451
452 for (cpu = 0; cpu < maxcpu; cpu++) {
453 if (CPU_ABSENT(cpu))
454 continue;
455 CPU_LOCK(zone, cpu);
456 cache = &zone->uz_cpu[cpu];
457 bucket_drain(zone, cache->uc_allocbucket);
458 bucket_drain(zone, cache->uc_freebucket);
459 }
460
461 /*
462 * Drain the bucket queues and free the buckets, we just keep two per
463 * cpu (alloc/free).
464 */
465 ZONE_LOCK(zone);
466 while ((bucket = LIST_FIRST(&zone->uz_full_bucket)) != NULL) {
467 LIST_REMOVE(bucket, ub_link);
468 ZONE_UNLOCK(zone);
469 bucket_drain(zone, bucket);
470 uma_zfree_internal(bucketzone, bucket, NULL, 0);
471 ZONE_LOCK(zone);
472 }
473
474 /* Now we do the free queue.. */
475 while ((bucket = LIST_FIRST(&zone->uz_free_bucket)) != NULL) {
476 LIST_REMOVE(bucket, ub_link);
477 uma_zfree_internal(bucketzone, bucket, NULL, 0);
478 }
479
480 /* We unlock here, but they will all block until the zone is unlocked */
481 for (cpu = 0; cpu < maxcpu; cpu++) {
482 if (CPU_ABSENT(cpu))
483 continue;
484 CPU_UNLOCK(zone, cpu);
485 }
486}
487
488/*
489 * Frees pages from a zone back to the system. This is done on demand from
490 * the pageout daemon.
491 *
492 * Arguments:
493 * zone The zone to free pages from
494 *
495 * Returns:
496 * Nothing.
497 */
498static void
499zone_drain(uma_zone_t zone)
500{
501 uma_slab_t slab;
502 uma_slab_t n;
503 u_int64_t extra;
504 u_int8_t flags;
505 u_int8_t *mem;
506 int i;
507
508 /*
509 * We don't want to take pages from staticly allocated zones at this
510 * time
511 */
512 if (zone->uz_flags & UMA_ZFLAG_NOFREE || zone->uz_freef == NULL)
513 return;
514
515 ZONE_LOCK(zone);
516
517 if (!(zone->uz_flags & UMA_ZFLAG_INTERNAL))
518 cache_drain(zone);
519
520 if (zone->uz_free < zone->uz_wssize)
521 goto finished;
522#ifdef UMA_DEBUG
523 printf("%s working set size: %llu free items: %u\n",
524 zone->uz_name, (unsigned long long)zone->uz_wssize, zone->uz_free);
525#endif
526 extra = zone->uz_wssize - zone->uz_free;
527 extra /= zone->uz_ipers;
528
529 /* extra is now the number of extra slabs that we can free */
530
531 if (extra == 0)
532 goto finished;
533
534 slab = LIST_FIRST(&zone->uz_free_slab);
535 while (slab && extra) {
536 n = LIST_NEXT(slab, us_link);
537
538 /* We have no where to free these to */
539 if (slab->us_flags & UMA_SLAB_BOOT) {
540 slab = n;
541 continue;
542 }
543
544 LIST_REMOVE(slab, us_link);
545 zone->uz_pages -= zone->uz_ppera;
546 zone->uz_free -= zone->uz_ipers;
547 if (zone->uz_fini)
548 for (i = 0; i < zone->uz_ipers; i++)
549 zone->uz_fini(
550 slab->us_data + (zone->uz_rsize * i),
551 zone->uz_size);
552 flags = slab->us_flags;
553 mem = slab->us_data;
554 if (zone->uz_flags & UMA_ZFLAG_OFFPAGE) {
555 if (zone->uz_flags & UMA_ZFLAG_MALLOC) {
556 UMA_HASH_REMOVE(mallochash,
557 slab, slab->us_data);
558 } else {
559 UMA_HASH_REMOVE(&zone->uz_hash,
560 slab, slab->us_data);
561 }
562 uma_zfree_internal(slabzone, slab, NULL, 0);
563 } else if (zone->uz_flags & UMA_ZFLAG_MALLOC)
564 UMA_HASH_REMOVE(mallochash, slab, slab->us_data);
565#ifdef UMA_DEBUG
566 printf("%s: Returning %d bytes.\n",
567 zone->uz_name, UMA_SLAB_SIZE * zone->uz_ppera);
568#endif
569 zone->uz_freef(mem, UMA_SLAB_SIZE * zone->uz_ppera, flags);
570
571 slab = n;
572 extra--;
573 }
574
575finished:
576 ZONE_UNLOCK(zone);
577}
578
579/*
580 * Allocate a new slab for a zone. This does not insert the slab onto a list.
581 *
582 * Arguments:
583 * zone The zone to allocate slabs for
584 * wait Shall we wait?
585 *
586 * Returns:
587 * The slab that was allocated or NULL if there is no memory and the
588 * caller specified M_NOWAIT.
589 *
590 */
591static uma_slab_t
592slab_zalloc(uma_zone_t zone, int wait)
593{
594 uma_slab_t slab; /* Starting slab */
595 u_int8_t *mem;
596 u_int8_t flags;
597 int i;
598
599#ifdef UMA_DEBUG
600 printf("slab_zalloc: Allocating a new slab for %s\n", zone->uz_name);
601#endif
602 if (zone->uz_maxpages &&
603 zone->uz_pages + zone->uz_ppera > zone->uz_maxpages)
604 return (NULL);
602
603 if (booted || (zone->uz_flags & UMA_ZFLAG_PRIVALLOC)) {
604 ZONE_UNLOCK(zone);
605 mtx_lock(&Giant);
606 slab = (uma_slab_t )zone->uz_allocf(zone,
607 zone->uz_ppera * UMA_SLAB_SIZE, &flags, wait);
608 mtx_unlock(&Giant);
609 ZONE_LOCK(zone);
610 if (slab != NULL)
611 slab->us_data = (u_int8_t *)slab;
612 else
613 return (NULL);
614 } else {
615
616 if (zone->uz_ppera > 1)
617 panic("UMA: Attemping to allocate multiple pages before vm has started.\n");
618 if (zone->uz_flags & UMA_ZFLAG_MALLOC)
619 panic("Mallocing before uma_startup2 has been called.\n");
620 if (uma_boot_free == 0)
621 panic("UMA: Ran out of pre init pages, increase UMA_BOOT_PAGES\n");
622 slab = LIST_FIRST(&uma_boot_pages);
623 LIST_REMOVE(slab, us_link);
624 uma_boot_free--;
625 }
626
627 mem = slab->us_data;
628
629 /* Alloc slab structure for offpage, otherwise adjust it's position */
630 if (!(zone->uz_flags & UMA_ZFLAG_OFFPAGE)) {
631 slab = (uma_slab_t )(mem + zone->uz_pgoff);
632 } else {
633 slab = uma_zalloc_internal(slabzone, NULL, wait, NULL, -1);
634 if (slab == NULL) /* XXX This should go away */
635 panic("UMA: No free slab structures");
636 if (!(zone->uz_flags & UMA_ZFLAG_MALLOC))
637 UMA_HASH_INSERT(&zone->uz_hash, slab, mem);
638 }
639 if (zone->uz_flags & UMA_ZFLAG_MALLOC) {
640#ifdef UMA_DEBUG
641 printf("Inserting %p into malloc hash from slab %p\n",
642 mem, slab);
643#endif
644 UMA_HASH_INSERT(mallochash, slab, mem);
645 }
646
647 slab->us_zone = zone;
648 slab->us_data = mem;
649
650 /*
651 * This is intended to spread data out across cache lines.
652 *
653 * This code doesn't seem to work properly on x86, and on alpha
654 * it makes absolutely no performance difference. I'm sure it could
655 * use some tuning, but sun makes outrageous claims about it's
656 * performance.
657 */
658#if 0
659 if (zone->uz_cachemax) {
660 slab->us_data += zone->uz_cacheoff;
661 zone->uz_cacheoff += UMA_CACHE_INC;
662 if (zone->uz_cacheoff > zone->uz_cachemax)
663 zone->uz_cacheoff = 0;
664 }
665#endif
666
667 slab->us_freecount = zone->uz_ipers;
668 slab->us_firstfree = 0;
669 slab->us_flags = flags;
670 for (i = 0; i < zone->uz_ipers; i++)
671 slab->us_freelist[i] = i+1;
672
673 if (zone->uz_init)
674 for (i = 0; i < zone->uz_ipers; i++)
675 zone->uz_init(slab->us_data + (zone->uz_rsize * i),
676 zone->uz_size);
677
678 zone->uz_pages += zone->uz_ppera;
679 zone->uz_free += zone->uz_ipers;
680
681 return (slab);
682}
683
684/*
685 * Allocates a number of pages from the system
686 *
687 * Arguments:
688 * zone Unused
689 * bytes The number of bytes requested
690 * wait Shall we wait?
691 *
692 * Returns:
693 * A pointer to the alloced memory or possibly
694 * NULL if M_NOWAIT is set.
695 */
696static void *
697page_alloc(uma_zone_t zone, int bytes, u_int8_t *pflag, int wait)
698{
699 void *p; /* Returned page */
700
701 /*
702 * XXX The original zone allocator did this, but I don't think it's
703 * neccisary in current.
704 */
705
706 if (lockstatus(&kernel_map->lock, NULL)) {
707 *pflag = UMA_SLAB_KMEM;
708 p = (void *) kmem_malloc(kmem_map, bytes, wait);
709 } else {
710 *pflag = UMA_SLAB_KMAP;
711 p = (void *) kmem_alloc(kernel_map, bytes);
712 }
713
714 return (p);
715}
716
717/*
718 * Allocates a number of pages from within an object
719 *
720 * Arguments:
721 * zone Unused
722 * bytes The number of bytes requested
723 * wait Shall we wait?
724 *
725 * Returns:
726 * A pointer to the alloced memory or possibly
727 * NULL if M_NOWAIT is set.
728 */
729static void *
730obj_alloc(uma_zone_t zone, int bytes, u_int8_t *flags, int wait)
731{
732 vm_offset_t zkva;
733 vm_offset_t retkva;
734 vm_page_t p;
735 int pages;
736
605
606 if (booted || (zone->uz_flags & UMA_ZFLAG_PRIVALLOC)) {
607 ZONE_UNLOCK(zone);
608 mtx_lock(&Giant);
609 slab = (uma_slab_t )zone->uz_allocf(zone,
610 zone->uz_ppera * UMA_SLAB_SIZE, &flags, wait);
611 mtx_unlock(&Giant);
612 ZONE_LOCK(zone);
613 if (slab != NULL)
614 slab->us_data = (u_int8_t *)slab;
615 else
616 return (NULL);
617 } else {
618
619 if (zone->uz_ppera > 1)
620 panic("UMA: Attemping to allocate multiple pages before vm has started.\n");
621 if (zone->uz_flags & UMA_ZFLAG_MALLOC)
622 panic("Mallocing before uma_startup2 has been called.\n");
623 if (uma_boot_free == 0)
624 panic("UMA: Ran out of pre init pages, increase UMA_BOOT_PAGES\n");
625 slab = LIST_FIRST(&uma_boot_pages);
626 LIST_REMOVE(slab, us_link);
627 uma_boot_free--;
628 }
629
630 mem = slab->us_data;
631
632 /* Alloc slab structure for offpage, otherwise adjust it's position */
633 if (!(zone->uz_flags & UMA_ZFLAG_OFFPAGE)) {
634 slab = (uma_slab_t )(mem + zone->uz_pgoff);
635 } else {
636 slab = uma_zalloc_internal(slabzone, NULL, wait, NULL, -1);
637 if (slab == NULL) /* XXX This should go away */
638 panic("UMA: No free slab structures");
639 if (!(zone->uz_flags & UMA_ZFLAG_MALLOC))
640 UMA_HASH_INSERT(&zone->uz_hash, slab, mem);
641 }
642 if (zone->uz_flags & UMA_ZFLAG_MALLOC) {
643#ifdef UMA_DEBUG
644 printf("Inserting %p into malloc hash from slab %p\n",
645 mem, slab);
646#endif
647 UMA_HASH_INSERT(mallochash, slab, mem);
648 }
649
650 slab->us_zone = zone;
651 slab->us_data = mem;
652
653 /*
654 * This is intended to spread data out across cache lines.
655 *
656 * This code doesn't seem to work properly on x86, and on alpha
657 * it makes absolutely no performance difference. I'm sure it could
658 * use some tuning, but sun makes outrageous claims about it's
659 * performance.
660 */
661#if 0
662 if (zone->uz_cachemax) {
663 slab->us_data += zone->uz_cacheoff;
664 zone->uz_cacheoff += UMA_CACHE_INC;
665 if (zone->uz_cacheoff > zone->uz_cachemax)
666 zone->uz_cacheoff = 0;
667 }
668#endif
669
670 slab->us_freecount = zone->uz_ipers;
671 slab->us_firstfree = 0;
672 slab->us_flags = flags;
673 for (i = 0; i < zone->uz_ipers; i++)
674 slab->us_freelist[i] = i+1;
675
676 if (zone->uz_init)
677 for (i = 0; i < zone->uz_ipers; i++)
678 zone->uz_init(slab->us_data + (zone->uz_rsize * i),
679 zone->uz_size);
680
681 zone->uz_pages += zone->uz_ppera;
682 zone->uz_free += zone->uz_ipers;
683
684 return (slab);
685}
686
687/*
688 * Allocates a number of pages from the system
689 *
690 * Arguments:
691 * zone Unused
692 * bytes The number of bytes requested
693 * wait Shall we wait?
694 *
695 * Returns:
696 * A pointer to the alloced memory or possibly
697 * NULL if M_NOWAIT is set.
698 */
699static void *
700page_alloc(uma_zone_t zone, int bytes, u_int8_t *pflag, int wait)
701{
702 void *p; /* Returned page */
703
704 /*
705 * XXX The original zone allocator did this, but I don't think it's
706 * neccisary in current.
707 */
708
709 if (lockstatus(&kernel_map->lock, NULL)) {
710 *pflag = UMA_SLAB_KMEM;
711 p = (void *) kmem_malloc(kmem_map, bytes, wait);
712 } else {
713 *pflag = UMA_SLAB_KMAP;
714 p = (void *) kmem_alloc(kernel_map, bytes);
715 }
716
717 return (p);
718}
719
720/*
721 * Allocates a number of pages from within an object
722 *
723 * Arguments:
724 * zone Unused
725 * bytes The number of bytes requested
726 * wait Shall we wait?
727 *
728 * Returns:
729 * A pointer to the alloced memory or possibly
730 * NULL if M_NOWAIT is set.
731 */
732static void *
733obj_alloc(uma_zone_t zone, int bytes, u_int8_t *flags, int wait)
734{
735 vm_offset_t zkva;
736 vm_offset_t retkva;
737 vm_page_t p;
738 int pages;
739
737
738 if (zone->uz_pages + zone->uz_ppera > zone->uz_maxpages)
739 return (NULL);
740
741 retkva = NULL;
742 pages = zone->uz_pages;
743
744 /*
745 * This looks a little weird since we're getting one page at a time
746 */
747 while (bytes > 0) {
748 p = vm_page_alloc(zone->uz_obj, pages,
749 VM_ALLOC_INTERRUPT);
750 if (p == NULL)
751 return (NULL);
752
753 zkva = zone->uz_kva + pages * PAGE_SIZE;
754 if (retkva == NULL)
755 retkva = zkva;
756 pmap_qenter(zkva, &p, 1);
757 bytes -= PAGE_SIZE;
758 pages += 1;
759 }
760
761 *flags = UMA_SLAB_PRIV;
762
763 return ((void *)retkva);
764}
765
766/*
767 * Frees a number of pages to the system
768 *
769 * Arguments:
770 * mem A pointer to the memory to be freed
771 * size The size of the memory being freed
772 * flags The original p->us_flags field
773 *
774 * Returns:
775 * Nothing
776 *
777 */
778static void
779page_free(void *mem, int size, u_int8_t flags)
780{
781 vm_map_t map;
782 if (flags & UMA_SLAB_KMEM)
783 map = kmem_map;
784 else if (flags & UMA_SLAB_KMAP)
785 map = kernel_map;
786 else
787 panic("UMA: page_free used with invalid flags %d\n", flags);
788
789 kmem_free(map, (vm_offset_t)mem, size);
790}
791
792/*
793 * Zero fill initializer
794 *
795 * Arguments/Returns follow uma_init specifications
796 *
797 */
798static void
799zero_init(void *mem, int size)
800{
801 bzero(mem, size);
802}
803
804/*
805 * Finish creating a small uma zone. This calculates ipers, and the zone size.
806 *
807 * Arguments
808 * zone The zone we should initialize
809 *
810 * Returns
811 * Nothing
812 */
813static void
814zone_small_init(uma_zone_t zone)
815{
816 int rsize;
817 int memused;
818 int ipers;
819
820 rsize = zone->uz_size;
821
822 if (rsize < UMA_SMALLEST_UNIT)
823 rsize = UMA_SMALLEST_UNIT;
824
825 if (rsize & zone->uz_align)
826 rsize = (rsize & ~zone->uz_align) + (zone->uz_align + 1);
827
828 zone->uz_rsize = rsize;
829
830 rsize += 1; /* Account for the byte of linkage */
831 zone->uz_ipers = (UMA_SLAB_SIZE - sizeof(struct uma_slab)) / rsize;
832 zone->uz_ppera = 1;
833
834 memused = zone->uz_ipers * zone->uz_rsize;
835
836 /* Can we do any better? */
837 if ((UMA_SLAB_SIZE - memused) >= UMA_MAX_WASTE) {
838 if (zone->uz_flags & UMA_ZFLAG_INTERNAL)
839 return;
840 ipers = UMA_SLAB_SIZE / zone->uz_rsize;
841 if (ipers > zone->uz_ipers) {
842 zone->uz_flags |= UMA_ZFLAG_OFFPAGE;
843 zone->uz_ipers = ipers;
844 }
845 }
846
847}
848
849/*
850 * Finish creating a large (> UMA_SLAB_SIZE) uma zone. Just give in and do
851 * OFFPAGE for now. When I can allow for more dynamic slab sizes this will be
852 * more complicated.
853 *
854 * Arguments
855 * zone The zone we should initialize
856 *
857 * Returns
858 * Nothing
859 */
860static void
861zone_large_init(uma_zone_t zone)
862{
863 int pages;
864
865 pages = zone->uz_size / UMA_SLAB_SIZE;
866
867 /* Account for remainder */
868 if ((pages * UMA_SLAB_SIZE) < zone->uz_size)
869 pages++;
870
871 zone->uz_ppera = pages;
872 zone->uz_ipers = 1;
873
874 zone->uz_flags |= UMA_ZFLAG_OFFPAGE;
875 zone->uz_rsize = zone->uz_size;
876}
877
878/*
879 * Zone header ctor. This initializes all fields, locks, etc. And inserts
880 * the zone onto the global zone list.
881 *
882 * Arguments/Returns follow uma_ctor specifications
883 * udata Actually uma_zcreat_args
884 *
885 */
886
887static void
888zone_ctor(void *mem, int size, void *udata)
889{
890 struct uma_zctor_args *arg = udata;
891 uma_zone_t zone = mem;
892 int cplen;
893 int cpu;
894
895 bzero(zone, size);
896 zone->uz_name = arg->name;
897 zone->uz_size = arg->size;
898 zone->uz_ctor = arg->ctor;
899 zone->uz_dtor = arg->dtor;
900 zone->uz_init = arg->uminit;
901 zone->uz_align = arg->align;
902 zone->uz_free = 0;
903 zone->uz_pages = 0;
904 zone->uz_flags = 0;
905 zone->uz_allocf = page_alloc;
906 zone->uz_freef = page_free;
907
908 if (arg->flags & UMA_ZONE_ZINIT)
909 zone->uz_init = zero_init;
910
911 if (arg->flags & UMA_ZONE_INTERNAL)
912 zone->uz_flags |= UMA_ZFLAG_INTERNAL;
913
914 if (arg->flags & UMA_ZONE_MALLOC)
915 zone->uz_flags |= UMA_ZFLAG_MALLOC;
916
917 if (arg->flags & UMA_ZONE_NOFREE)
918 zone->uz_flags |= UMA_ZFLAG_NOFREE;
919
920 if (zone->uz_size > UMA_SLAB_SIZE)
921 zone_large_init(zone);
922 else
923 zone_small_init(zone);
924
925 /* We do this so that the per cpu lock name is unique for each zone */
926 memcpy(zone->uz_lname, "PCPU ", 5);
927 cplen = min(strlen(zone->uz_name) + 1, LOCKNAME_LEN - 6);
928 memcpy(zone->uz_lname+5, zone->uz_name, cplen);
929 zone->uz_lname[LOCKNAME_LEN - 1] = '\0';
930
931 /*
932 * If we're putting the slab header in the actual page we need to
933 * figure out where in each page it goes. This calculates a right
934 * justified offset into the memory on a ALIGN_PTR boundary.
935 */
936 if (!(zone->uz_flags & UMA_ZFLAG_OFFPAGE)) {
937 int totsize;
938 int waste;
939
940 /* Size of the slab struct and free list */
941 totsize = sizeof(struct uma_slab) + zone->uz_ipers;
942 if (totsize & UMA_ALIGN_PTR)
943 totsize = (totsize & ~UMA_ALIGN_PTR) +
944 (UMA_ALIGN_PTR + 1);
945 zone->uz_pgoff = UMA_SLAB_SIZE - totsize;
946
947 waste = zone->uz_pgoff;
948 waste -= (zone->uz_ipers * zone->uz_rsize);
949
950 /*
951 * This calculates how much space we have for cache line size
952 * optimizations. It works by offseting each slab slightly.
953 * Currently it breaks on x86, and so it is disabled.
954 */
955
956 if (zone->uz_align < UMA_CACHE_INC && waste > UMA_CACHE_INC) {
957 zone->uz_cachemax = waste - UMA_CACHE_INC;
958 zone->uz_cacheoff = 0;
959 }
960
961 totsize = zone->uz_pgoff + sizeof(struct uma_slab)
962 + zone->uz_ipers;
963 /* I don't think it's possible, but I'll make sure anyway */
964 if (totsize > UMA_SLAB_SIZE) {
965 printf("zone %s ipers %d rsize %d size %d\n",
966 zone->uz_name, zone->uz_ipers, zone->uz_rsize,
967 zone->uz_size);
968 panic("UMA slab won't fit.\n");
969 }
970 } else {
971 /* hash_expand here to allocate the initial hash table */
972 hash_expand(&zone->uz_hash);
973 zone->uz_pgoff = 0;
974 }
975
976#ifdef UMA_DEBUG
977 printf("%s(%p) size = %d ipers = %d ppera = %d pgoff = %d\n",
978 zone->uz_name, zone,
979 zone->uz_size, zone->uz_ipers,
980 zone->uz_ppera, zone->uz_pgoff);
981#endif
982 ZONE_LOCK_INIT(zone);
983
984 mtx_lock(&uma_mtx);
985 LIST_INSERT_HEAD(&uma_zones, zone, uz_link);
986 mtx_unlock(&uma_mtx);
987
988 /*
989 * Some internal zones don't have room allocated for the per cpu
990 * caches. If we're internal, bail out here.
991 */
992
993 if (zone->uz_flags & UMA_ZFLAG_INTERNAL)
994 return;
995
996 for (cpu = 0; cpu < maxcpu; cpu++) {
997 if (zone->uz_ipers < UMA_BUCKET_SIZE)
998 zone->uz_cpu[cpu].uc_count = zone->uz_ipers - 1;
999 else
1000 zone->uz_cpu[cpu].uc_count = UMA_BUCKET_SIZE - 1;
1001 CPU_LOCK_INIT(zone, cpu);
1002 }
1003}
1004
1005/*
1006 * Traverses every zone in the system and calls a callback
1007 *
1008 * Arguments:
1009 * zfunc A pointer to a function which accepts a zone
1010 * as an argument.
1011 *
1012 * Returns:
1013 * Nothing
1014 */
1015static void
1016zone_foreach(void (*zfunc)(uma_zone_t))
1017{
1018 uma_zone_t zone;
1019
1020 mtx_lock(&uma_mtx);
1021 LIST_FOREACH(zone, &uma_zones, uz_link) {
1022 zfunc(zone);
1023 }
1024 mtx_unlock(&uma_mtx);
1025}
1026
1027/* Public functions */
1028/* See uma.h */
1029void
1030uma_startup(void *bootmem)
1031{
1032 struct uma_zctor_args args;
1033 uma_slab_t slab;
1034 int slabsize;
1035 int i;
1036
1037#ifdef UMA_DEBUG
1038 printf("Creating uma zone headers zone.\n");
1039#endif
1040#ifdef SMP
1041 maxcpu = mp_maxid + 1;
1042#else
1043 maxcpu = 1;
1044#endif
1045#ifdef UMA_DEBUG
1046 printf("Max cpu = %d, mp_maxid = %d\n", maxcpu, mp_maxid);
1047 Debugger("stop");
1048#endif
1049 mtx_init(&uma_mtx, "UMA lock", MTX_DEF);
1050 /* "manually" Create the initial zone */
1051 args.name = "UMA Zones";
1052 args.size = sizeof(struct uma_zone) +
1053 (sizeof(struct uma_cache) * (maxcpu - 1));
1054 args.ctor = zone_ctor;
1055 args.dtor = NULL;
1056 args.uminit = zero_init;
1057 args.fini = NULL;
1058 args.align = 32 - 1;
1059 args.flags = UMA_ZONE_INTERNAL;
1060 /* The initial zone has no Per cpu queues so it's smaller */
1061 zone_ctor(zones, sizeof(struct uma_zone), &args);
1062
1063#ifdef UMA_DEBUG
1064 printf("Filling boot free list.\n");
1065#endif
1066 for (i = 0; i < UMA_BOOT_PAGES; i++) {
1067 slab = (uma_slab_t)((u_int8_t *)bootmem + (i * UMA_SLAB_SIZE));
1068 slab->us_data = (u_int8_t *)slab;
1069 slab->us_flags = UMA_SLAB_BOOT;
1070 LIST_INSERT_HEAD(&uma_boot_pages, slab, us_link);
1071 uma_boot_free++;
1072 }
1073
1074#ifdef UMA_DEBUG
1075 printf("Creating slab zone.\n");
1076#endif
1077
1078 /*
1079 * This is the max number of free list items we'll have with
1080 * offpage slabs.
1081 */
1082
1083 slabsize = UMA_SLAB_SIZE - sizeof(struct uma_slab);
1084 slabsize /= UMA_MAX_WASTE;
1085 slabsize++; /* In case there it's rounded */
1086 slabsize += sizeof(struct uma_slab);
1087
1088 /* Now make a zone for slab headers */
1089 slabzone = uma_zcreate("UMA Slabs",
1090 slabsize,
1091 NULL, NULL, NULL, NULL,
1092 UMA_ALIGN_PTR, UMA_ZONE_INTERNAL);
1093
1094 hashzone = uma_zcreate("UMA Hash",
1095 sizeof(struct slabhead *) * UMA_HASH_SIZE_INIT,
1096 NULL, NULL, NULL, NULL,
1097 UMA_ALIGN_PTR, UMA_ZONE_INTERNAL);
1098
1099 bucketzone = uma_zcreate("UMA Buckets", sizeof(struct uma_bucket),
1100 NULL, NULL, NULL, NULL,
1101 UMA_ALIGN_PTR, UMA_ZONE_INTERNAL);
1102
1103
1104#ifdef UMA_DEBUG
1105 printf("UMA startup complete.\n");
1106#endif
1107}
1108
1109/* see uma.h */
1110void
1111uma_startup2(void *hashmem, u_long elems)
1112{
1113 bzero(hashmem, elems * sizeof(void *));
1114 mallochash->uh_slab_hash = hashmem;
1115 mallochash->uh_hashsize = elems;
1116 mallochash->uh_hashmask = elems - 1;
1117 booted = 1;
1118#ifdef UMA_DEBUG
1119 printf("UMA startup2 complete.\n");
1120#endif
1121}
1122
1123/*
1124 * Initialize our callout handle
1125 *
1126 */
1127
1128static void
1129uma_startup3(void)
1130{
1131#ifdef UMA_DEBUG
1132 printf("Starting callout.\n");
1133#endif
1134 /* We'll be mpsafe once the vm is locked. */
1135 callout_init(&uma_callout, 0);
1136 callout_reset(&uma_callout, UMA_WORKING_TIME * hz, uma_timeout, NULL);
1137#ifdef UMA_DEBUG
1138 printf("UMA startup3 complete.\n");
1139#endif
1140}
1141
1142/* See uma.h */
1143uma_zone_t
1144uma_zcreate(char *name, int size, uma_ctor ctor, uma_dtor dtor, uma_init uminit,
1145 uma_fini fini, int align, u_int16_t flags)
1146
1147{
1148 struct uma_zctor_args args;
1149
1150 /* This stuff is essential for the zone ctor */
1151 args.name = name;
1152 args.size = size;
1153 args.ctor = ctor;
1154 args.dtor = dtor;
1155 args.uminit = uminit;
1156 args.fini = fini;
1157 args.align = align;
1158 args.flags = flags;
1159
1160 return (uma_zalloc_internal(zones, &args, M_WAITOK, NULL, -1));
1161}
1162
1163/* See uma.h */
1164void *
1165uma_zalloc_arg(uma_zone_t zone, void *udata, int wait)
1166{
1167 void *item;
1168 uma_cache_t cache;
1169 uma_bucket_t bucket;
1170 int isitem;
1171 int cpu;
1172
1173 /* This is the fast path allocation */
1174#ifdef UMA_DEBUG_ALLOC_1
1175 printf("Allocating one item from %s(%p)\n", zone->uz_name, zone);
1176#endif
1177 cpu = PCPU_GET(cpuid);
1178 CPU_LOCK(zone, cpu);
1179 cache = &zone->uz_cpu[cpu];
1180 cache->uc_allocs++;
1181
1182zalloc_start:
1183 bucket = cache->uc_allocbucket;
1184
1185 if (bucket) {
1186 if (bucket->ub_ptr > -1) {
1187 item = bucket->ub_bucket[bucket->ub_ptr];
1188#ifdef INVARIANTS
1189 bucket->ub_bucket[bucket->ub_ptr] = NULL;
1190#endif
1191 bucket->ub_ptr--;
1192 KASSERT(item != NULL,
1193 ("uma_zalloc: Bucket pointer mangled."));
1194 cache->uc_allocs++;
1195 CPU_UNLOCK(zone, cpu);
1196 if (zone->uz_ctor)
1197 zone->uz_ctor(item, zone->uz_size, udata);
1198 return (item);
1199 } else if (cache->uc_freebucket) {
1200 /*
1201 * We have run out of items in our allocbucket.
1202 * See if we can switch with our free bucket.
1203 */
1204 if (cache->uc_freebucket->ub_ptr > -1) {
1205 uma_bucket_t swap;
1206
1207#ifdef UMA_DEBUG_ALLOC
1208 printf("uma_zalloc: Swapping empty with alloc.\n");
1209#endif
1210 swap = cache->uc_freebucket;
1211 cache->uc_freebucket = cache->uc_allocbucket;
1212 cache->uc_allocbucket = swap;
1213
1214 goto zalloc_start;
1215 }
1216 }
1217 }
1218 /*
1219 * We can get here for three reasons:
1220 *
1221 * 1) The buckets are NULL
1222 * 2) The zone is INTERNAL, and so it has no buckets.
1223 * 3) The alloc and free buckets are both empty.
1224 *
1225 * Just handoff to uma_zalloc_internal to do the hard stuff
1226 *
1227 */
1228#ifdef UMA_DEBUG_ALLOC
1229 printf("uma_zalloc: Falling back to zalloc_internal.\n");
1230#endif
1231
1232 item = uma_zalloc_internal(zone, udata, wait, &isitem, cpu);
1233
1234#ifdef UMA_DEBUG
1235 printf("uma_zalloc: zalloc_internal completed.\n");
1236#endif
1237
1238 if (item && isitem == 0)
1239 goto zalloc_start;
1240
1241 /*
1242 * If isitem is set then we should just return it. The cpu lock
1243 * was unlocked when we couldn't get a bucket.
1244 */
740 retkva = NULL;
741 pages = zone->uz_pages;
742
743 /*
744 * This looks a little weird since we're getting one page at a time
745 */
746 while (bytes > 0) {
747 p = vm_page_alloc(zone->uz_obj, pages,
748 VM_ALLOC_INTERRUPT);
749 if (p == NULL)
750 return (NULL);
751
752 zkva = zone->uz_kva + pages * PAGE_SIZE;
753 if (retkva == NULL)
754 retkva = zkva;
755 pmap_qenter(zkva, &p, 1);
756 bytes -= PAGE_SIZE;
757 pages += 1;
758 }
759
760 *flags = UMA_SLAB_PRIV;
761
762 return ((void *)retkva);
763}
764
765/*
766 * Frees a number of pages to the system
767 *
768 * Arguments:
769 * mem A pointer to the memory to be freed
770 * size The size of the memory being freed
771 * flags The original p->us_flags field
772 *
773 * Returns:
774 * Nothing
775 *
776 */
777static void
778page_free(void *mem, int size, u_int8_t flags)
779{
780 vm_map_t map;
781 if (flags & UMA_SLAB_KMEM)
782 map = kmem_map;
783 else if (flags & UMA_SLAB_KMAP)
784 map = kernel_map;
785 else
786 panic("UMA: page_free used with invalid flags %d\n", flags);
787
788 kmem_free(map, (vm_offset_t)mem, size);
789}
790
791/*
792 * Zero fill initializer
793 *
794 * Arguments/Returns follow uma_init specifications
795 *
796 */
797static void
798zero_init(void *mem, int size)
799{
800 bzero(mem, size);
801}
802
803/*
804 * Finish creating a small uma zone. This calculates ipers, and the zone size.
805 *
806 * Arguments
807 * zone The zone we should initialize
808 *
809 * Returns
810 * Nothing
811 */
812static void
813zone_small_init(uma_zone_t zone)
814{
815 int rsize;
816 int memused;
817 int ipers;
818
819 rsize = zone->uz_size;
820
821 if (rsize < UMA_SMALLEST_UNIT)
822 rsize = UMA_SMALLEST_UNIT;
823
824 if (rsize & zone->uz_align)
825 rsize = (rsize & ~zone->uz_align) + (zone->uz_align + 1);
826
827 zone->uz_rsize = rsize;
828
829 rsize += 1; /* Account for the byte of linkage */
830 zone->uz_ipers = (UMA_SLAB_SIZE - sizeof(struct uma_slab)) / rsize;
831 zone->uz_ppera = 1;
832
833 memused = zone->uz_ipers * zone->uz_rsize;
834
835 /* Can we do any better? */
836 if ((UMA_SLAB_SIZE - memused) >= UMA_MAX_WASTE) {
837 if (zone->uz_flags & UMA_ZFLAG_INTERNAL)
838 return;
839 ipers = UMA_SLAB_SIZE / zone->uz_rsize;
840 if (ipers > zone->uz_ipers) {
841 zone->uz_flags |= UMA_ZFLAG_OFFPAGE;
842 zone->uz_ipers = ipers;
843 }
844 }
845
846}
847
848/*
849 * Finish creating a large (> UMA_SLAB_SIZE) uma zone. Just give in and do
850 * OFFPAGE for now. When I can allow for more dynamic slab sizes this will be
851 * more complicated.
852 *
853 * Arguments
854 * zone The zone we should initialize
855 *
856 * Returns
857 * Nothing
858 */
859static void
860zone_large_init(uma_zone_t zone)
861{
862 int pages;
863
864 pages = zone->uz_size / UMA_SLAB_SIZE;
865
866 /* Account for remainder */
867 if ((pages * UMA_SLAB_SIZE) < zone->uz_size)
868 pages++;
869
870 zone->uz_ppera = pages;
871 zone->uz_ipers = 1;
872
873 zone->uz_flags |= UMA_ZFLAG_OFFPAGE;
874 zone->uz_rsize = zone->uz_size;
875}
876
877/*
878 * Zone header ctor. This initializes all fields, locks, etc. And inserts
879 * the zone onto the global zone list.
880 *
881 * Arguments/Returns follow uma_ctor specifications
882 * udata Actually uma_zcreat_args
883 *
884 */
885
886static void
887zone_ctor(void *mem, int size, void *udata)
888{
889 struct uma_zctor_args *arg = udata;
890 uma_zone_t zone = mem;
891 int cplen;
892 int cpu;
893
894 bzero(zone, size);
895 zone->uz_name = arg->name;
896 zone->uz_size = arg->size;
897 zone->uz_ctor = arg->ctor;
898 zone->uz_dtor = arg->dtor;
899 zone->uz_init = arg->uminit;
900 zone->uz_align = arg->align;
901 zone->uz_free = 0;
902 zone->uz_pages = 0;
903 zone->uz_flags = 0;
904 zone->uz_allocf = page_alloc;
905 zone->uz_freef = page_free;
906
907 if (arg->flags & UMA_ZONE_ZINIT)
908 zone->uz_init = zero_init;
909
910 if (arg->flags & UMA_ZONE_INTERNAL)
911 zone->uz_flags |= UMA_ZFLAG_INTERNAL;
912
913 if (arg->flags & UMA_ZONE_MALLOC)
914 zone->uz_flags |= UMA_ZFLAG_MALLOC;
915
916 if (arg->flags & UMA_ZONE_NOFREE)
917 zone->uz_flags |= UMA_ZFLAG_NOFREE;
918
919 if (zone->uz_size > UMA_SLAB_SIZE)
920 zone_large_init(zone);
921 else
922 zone_small_init(zone);
923
924 /* We do this so that the per cpu lock name is unique for each zone */
925 memcpy(zone->uz_lname, "PCPU ", 5);
926 cplen = min(strlen(zone->uz_name) + 1, LOCKNAME_LEN - 6);
927 memcpy(zone->uz_lname+5, zone->uz_name, cplen);
928 zone->uz_lname[LOCKNAME_LEN - 1] = '\0';
929
930 /*
931 * If we're putting the slab header in the actual page we need to
932 * figure out where in each page it goes. This calculates a right
933 * justified offset into the memory on a ALIGN_PTR boundary.
934 */
935 if (!(zone->uz_flags & UMA_ZFLAG_OFFPAGE)) {
936 int totsize;
937 int waste;
938
939 /* Size of the slab struct and free list */
940 totsize = sizeof(struct uma_slab) + zone->uz_ipers;
941 if (totsize & UMA_ALIGN_PTR)
942 totsize = (totsize & ~UMA_ALIGN_PTR) +
943 (UMA_ALIGN_PTR + 1);
944 zone->uz_pgoff = UMA_SLAB_SIZE - totsize;
945
946 waste = zone->uz_pgoff;
947 waste -= (zone->uz_ipers * zone->uz_rsize);
948
949 /*
950 * This calculates how much space we have for cache line size
951 * optimizations. It works by offseting each slab slightly.
952 * Currently it breaks on x86, and so it is disabled.
953 */
954
955 if (zone->uz_align < UMA_CACHE_INC && waste > UMA_CACHE_INC) {
956 zone->uz_cachemax = waste - UMA_CACHE_INC;
957 zone->uz_cacheoff = 0;
958 }
959
960 totsize = zone->uz_pgoff + sizeof(struct uma_slab)
961 + zone->uz_ipers;
962 /* I don't think it's possible, but I'll make sure anyway */
963 if (totsize > UMA_SLAB_SIZE) {
964 printf("zone %s ipers %d rsize %d size %d\n",
965 zone->uz_name, zone->uz_ipers, zone->uz_rsize,
966 zone->uz_size);
967 panic("UMA slab won't fit.\n");
968 }
969 } else {
970 /* hash_expand here to allocate the initial hash table */
971 hash_expand(&zone->uz_hash);
972 zone->uz_pgoff = 0;
973 }
974
975#ifdef UMA_DEBUG
976 printf("%s(%p) size = %d ipers = %d ppera = %d pgoff = %d\n",
977 zone->uz_name, zone,
978 zone->uz_size, zone->uz_ipers,
979 zone->uz_ppera, zone->uz_pgoff);
980#endif
981 ZONE_LOCK_INIT(zone);
982
983 mtx_lock(&uma_mtx);
984 LIST_INSERT_HEAD(&uma_zones, zone, uz_link);
985 mtx_unlock(&uma_mtx);
986
987 /*
988 * Some internal zones don't have room allocated for the per cpu
989 * caches. If we're internal, bail out here.
990 */
991
992 if (zone->uz_flags & UMA_ZFLAG_INTERNAL)
993 return;
994
995 for (cpu = 0; cpu < maxcpu; cpu++) {
996 if (zone->uz_ipers < UMA_BUCKET_SIZE)
997 zone->uz_cpu[cpu].uc_count = zone->uz_ipers - 1;
998 else
999 zone->uz_cpu[cpu].uc_count = UMA_BUCKET_SIZE - 1;
1000 CPU_LOCK_INIT(zone, cpu);
1001 }
1002}
1003
1004/*
1005 * Traverses every zone in the system and calls a callback
1006 *
1007 * Arguments:
1008 * zfunc A pointer to a function which accepts a zone
1009 * as an argument.
1010 *
1011 * Returns:
1012 * Nothing
1013 */
1014static void
1015zone_foreach(void (*zfunc)(uma_zone_t))
1016{
1017 uma_zone_t zone;
1018
1019 mtx_lock(&uma_mtx);
1020 LIST_FOREACH(zone, &uma_zones, uz_link) {
1021 zfunc(zone);
1022 }
1023 mtx_unlock(&uma_mtx);
1024}
1025
1026/* Public functions */
1027/* See uma.h */
1028void
1029uma_startup(void *bootmem)
1030{
1031 struct uma_zctor_args args;
1032 uma_slab_t slab;
1033 int slabsize;
1034 int i;
1035
1036#ifdef UMA_DEBUG
1037 printf("Creating uma zone headers zone.\n");
1038#endif
1039#ifdef SMP
1040 maxcpu = mp_maxid + 1;
1041#else
1042 maxcpu = 1;
1043#endif
1044#ifdef UMA_DEBUG
1045 printf("Max cpu = %d, mp_maxid = %d\n", maxcpu, mp_maxid);
1046 Debugger("stop");
1047#endif
1048 mtx_init(&uma_mtx, "UMA lock", MTX_DEF);
1049 /* "manually" Create the initial zone */
1050 args.name = "UMA Zones";
1051 args.size = sizeof(struct uma_zone) +
1052 (sizeof(struct uma_cache) * (maxcpu - 1));
1053 args.ctor = zone_ctor;
1054 args.dtor = NULL;
1055 args.uminit = zero_init;
1056 args.fini = NULL;
1057 args.align = 32 - 1;
1058 args.flags = UMA_ZONE_INTERNAL;
1059 /* The initial zone has no Per cpu queues so it's smaller */
1060 zone_ctor(zones, sizeof(struct uma_zone), &args);
1061
1062#ifdef UMA_DEBUG
1063 printf("Filling boot free list.\n");
1064#endif
1065 for (i = 0; i < UMA_BOOT_PAGES; i++) {
1066 slab = (uma_slab_t)((u_int8_t *)bootmem + (i * UMA_SLAB_SIZE));
1067 slab->us_data = (u_int8_t *)slab;
1068 slab->us_flags = UMA_SLAB_BOOT;
1069 LIST_INSERT_HEAD(&uma_boot_pages, slab, us_link);
1070 uma_boot_free++;
1071 }
1072
1073#ifdef UMA_DEBUG
1074 printf("Creating slab zone.\n");
1075#endif
1076
1077 /*
1078 * This is the max number of free list items we'll have with
1079 * offpage slabs.
1080 */
1081
1082 slabsize = UMA_SLAB_SIZE - sizeof(struct uma_slab);
1083 slabsize /= UMA_MAX_WASTE;
1084 slabsize++; /* In case there it's rounded */
1085 slabsize += sizeof(struct uma_slab);
1086
1087 /* Now make a zone for slab headers */
1088 slabzone = uma_zcreate("UMA Slabs",
1089 slabsize,
1090 NULL, NULL, NULL, NULL,
1091 UMA_ALIGN_PTR, UMA_ZONE_INTERNAL);
1092
1093 hashzone = uma_zcreate("UMA Hash",
1094 sizeof(struct slabhead *) * UMA_HASH_SIZE_INIT,
1095 NULL, NULL, NULL, NULL,
1096 UMA_ALIGN_PTR, UMA_ZONE_INTERNAL);
1097
1098 bucketzone = uma_zcreate("UMA Buckets", sizeof(struct uma_bucket),
1099 NULL, NULL, NULL, NULL,
1100 UMA_ALIGN_PTR, UMA_ZONE_INTERNAL);
1101
1102
1103#ifdef UMA_DEBUG
1104 printf("UMA startup complete.\n");
1105#endif
1106}
1107
1108/* see uma.h */
1109void
1110uma_startup2(void *hashmem, u_long elems)
1111{
1112 bzero(hashmem, elems * sizeof(void *));
1113 mallochash->uh_slab_hash = hashmem;
1114 mallochash->uh_hashsize = elems;
1115 mallochash->uh_hashmask = elems - 1;
1116 booted = 1;
1117#ifdef UMA_DEBUG
1118 printf("UMA startup2 complete.\n");
1119#endif
1120}
1121
1122/*
1123 * Initialize our callout handle
1124 *
1125 */
1126
1127static void
1128uma_startup3(void)
1129{
1130#ifdef UMA_DEBUG
1131 printf("Starting callout.\n");
1132#endif
1133 /* We'll be mpsafe once the vm is locked. */
1134 callout_init(&uma_callout, 0);
1135 callout_reset(&uma_callout, UMA_WORKING_TIME * hz, uma_timeout, NULL);
1136#ifdef UMA_DEBUG
1137 printf("UMA startup3 complete.\n");
1138#endif
1139}
1140
1141/* See uma.h */
1142uma_zone_t
1143uma_zcreate(char *name, int size, uma_ctor ctor, uma_dtor dtor, uma_init uminit,
1144 uma_fini fini, int align, u_int16_t flags)
1145
1146{
1147 struct uma_zctor_args args;
1148
1149 /* This stuff is essential for the zone ctor */
1150 args.name = name;
1151 args.size = size;
1152 args.ctor = ctor;
1153 args.dtor = dtor;
1154 args.uminit = uminit;
1155 args.fini = fini;
1156 args.align = align;
1157 args.flags = flags;
1158
1159 return (uma_zalloc_internal(zones, &args, M_WAITOK, NULL, -1));
1160}
1161
1162/* See uma.h */
1163void *
1164uma_zalloc_arg(uma_zone_t zone, void *udata, int wait)
1165{
1166 void *item;
1167 uma_cache_t cache;
1168 uma_bucket_t bucket;
1169 int isitem;
1170 int cpu;
1171
1172 /* This is the fast path allocation */
1173#ifdef UMA_DEBUG_ALLOC_1
1174 printf("Allocating one item from %s(%p)\n", zone->uz_name, zone);
1175#endif
1176 cpu = PCPU_GET(cpuid);
1177 CPU_LOCK(zone, cpu);
1178 cache = &zone->uz_cpu[cpu];
1179 cache->uc_allocs++;
1180
1181zalloc_start:
1182 bucket = cache->uc_allocbucket;
1183
1184 if (bucket) {
1185 if (bucket->ub_ptr > -1) {
1186 item = bucket->ub_bucket[bucket->ub_ptr];
1187#ifdef INVARIANTS
1188 bucket->ub_bucket[bucket->ub_ptr] = NULL;
1189#endif
1190 bucket->ub_ptr--;
1191 KASSERT(item != NULL,
1192 ("uma_zalloc: Bucket pointer mangled."));
1193 cache->uc_allocs++;
1194 CPU_UNLOCK(zone, cpu);
1195 if (zone->uz_ctor)
1196 zone->uz_ctor(item, zone->uz_size, udata);
1197 return (item);
1198 } else if (cache->uc_freebucket) {
1199 /*
1200 * We have run out of items in our allocbucket.
1201 * See if we can switch with our free bucket.
1202 */
1203 if (cache->uc_freebucket->ub_ptr > -1) {
1204 uma_bucket_t swap;
1205
1206#ifdef UMA_DEBUG_ALLOC
1207 printf("uma_zalloc: Swapping empty with alloc.\n");
1208#endif
1209 swap = cache->uc_freebucket;
1210 cache->uc_freebucket = cache->uc_allocbucket;
1211 cache->uc_allocbucket = swap;
1212
1213 goto zalloc_start;
1214 }
1215 }
1216 }
1217 /*
1218 * We can get here for three reasons:
1219 *
1220 * 1) The buckets are NULL
1221 * 2) The zone is INTERNAL, and so it has no buckets.
1222 * 3) The alloc and free buckets are both empty.
1223 *
1224 * Just handoff to uma_zalloc_internal to do the hard stuff
1225 *
1226 */
1227#ifdef UMA_DEBUG_ALLOC
1228 printf("uma_zalloc: Falling back to zalloc_internal.\n");
1229#endif
1230
1231 item = uma_zalloc_internal(zone, udata, wait, &isitem, cpu);
1232
1233#ifdef UMA_DEBUG
1234 printf("uma_zalloc: zalloc_internal completed.\n");
1235#endif
1236
1237 if (item && isitem == 0)
1238 goto zalloc_start;
1239
1240 /*
1241 * If isitem is set then we should just return it. The cpu lock
1242 * was unlocked when we couldn't get a bucket.
1243 */
1245
1246#ifdef INVARIANTS
1247 if (wait == M_WAITOK)
1248 KASSERT(item != NULL,
1249 ("uma_zalloc: WAITOK set but we're returning NULL"));
1250#endif
1251 return item;
1252}
1253
1254/*
1255 * Allocates an item for an internal zone OR fills a bucket
1256 *
1257 * Arguments
1258 * zone The zone to alloc for.
1259 * udata The data to be passed to the constructor.
1260 * wait M_WAITOK or M_NOWAIT.
1261 * isitem The returned value is an item if this is true.
1262 * cpu The cpu # of the cache that we should use, or -1.
1263 *
1264 * Returns
1265 * NULL if there is no memory and M_NOWAIT is set
1266 * An item if called on an interal zone
1267 * Non NULL if called to fill a bucket and it was successful.
1268 *
1269 * Discussion:
1270 * This was much cleaner before it had to do per cpu caches. It is
1271 * complicated now because it has to handle the simple internal case, and
1272 * the more involved bucket filling and allocation. The isitem is there
1273 * to remove a failure case. You shouldn't fail on allocating from a zone
1274 * because there were no buckets. This allows the exported zalloc to just
1275 * return the item.
1276 *
1277 */
1278
1279static void *
1280uma_zalloc_internal(uma_zone_t zone, void *udata, int wait, int *isitem, int cpu)
1281{
1282 uma_bucket_t bucket;
1283 uma_cache_t cache;
1284 uma_slab_t slab;
1285 u_int8_t freei;
1286 void *item;
1287
1288 bucket = NULL;
1289 cache = NULL;
1290 item = NULL;
1291
1292 /*
1293 * This is to stop us from allocating per cpu buckets while we're running
1294 * out of UMA_BOOT_PAGES. Otherwise, we would exhaust the boot pages.
1295 */
1296
1297 if (!booted && zone == bucketzone)
1298 return (NULL);
1299
1300#ifdef UMA_DEBUG_ALLOC
1301 printf("INTERNAL: Allocating one item from %s(%p)\n", zone->uz_name, zone);
1302#endif
1303 if (isitem != NULL)
1304 *isitem = 0;
1305
1306 ZONE_LOCK(zone);
1307
1308 /* We got here because we need to fill some buckets */
1309 if (cpu != -1) {
1310 cache = &zone->uz_cpu[cpu];
1311
1312 zone->uz_allocs += cache->uc_allocs;
1313 /* Check the free list */
1314 bucket = LIST_FIRST(&zone->uz_full_bucket);
1315 if (bucket) {
1316 LIST_REMOVE(bucket, ub_link);
1317 /* Our old one is now a free bucket */
1318 if (cache->uc_allocbucket) {
1319 KASSERT(cache->uc_allocbucket->ub_ptr == -1,
1320 ("uma_zalloc_internal: Freeing a non free bucket."));
1321 LIST_INSERT_HEAD(&zone->uz_free_bucket,
1322 cache->uc_allocbucket, ub_link);
1323 }
1324 KASSERT(bucket->ub_ptr != -1,
1325 ("uma_zalloc_internal: Returning an empty bucket."));
1326 /*zone->uz_free -= bucket->ub_ptr + 1;*/
1327 cache->uc_allocbucket = bucket;
1328 ZONE_UNLOCK(zone);
1329 return (bucket);
1330 }
1331 /* Bump up our uc_count so we get here less */
1332 if (cache->uc_count < UMA_BUCKET_SIZE - 1)
1333 cache->uc_count++;
1334 /* Nothing on the free list, try to re-use the old one */
1335 bucket = cache->uc_allocbucket;
1336 if (bucket == NULL) {
1337 /* Nope, we need a new one */
1338 CPU_UNLOCK(zone, cpu);
1339 ZONE_UNLOCK(zone);
1340 bucket = uma_zalloc_internal(bucketzone,
1341 NULL, wait, NULL, -1);
1342 CPU_LOCK(zone, cpu);
1343 ZONE_LOCK(zone);
1344 /* Did we lose the race? */
1345 if (cache->uc_allocbucket) {
1346#ifdef UMA_DEBUG
1347 printf("uma_zalloc_internal: Lost race with another CPU.\n");
1348#endif
1349 if (bucket)
1350 uma_zfree_internal(bucketzone,
1351 bucket, NULL, 0);
1352 ZONE_UNLOCK(zone);
1353 return (cache->uc_allocbucket);
1354 }
1355 cache->uc_allocbucket = bucket;
1356
1357 if (bucket) {
1358#ifdef INVARIANTS
1359 bzero(bucket, bucketzone->uz_size);
1360#endif
1361 bucket->ub_ptr = -1;
1362 } else {
1363 /*
1364 * We may not get a bucket if we recurse, so
1365 * return an actual item. The rest of this code
1366 * does the right thing if the cache is NULL.
1367 */
1368#ifdef UMA_DEBUG
1369 printf("uma_zalloc_internal: Bucketzone returned NULL\n");
1370#endif
1371 CPU_UNLOCK(zone, cpu);
1372 cache = NULL;
1373 cpu = -1;
1374 }
1375 }
1376 }
1377
1378new_slab:
1379
1380 /* Find a slab with some space */
1381 if (zone->uz_free) {
1382 if (!LIST_EMPTY(&zone->uz_part_slab)) {
1383 slab = LIST_FIRST(&zone->uz_part_slab);
1384 } else {
1385 slab = LIST_FIRST(&zone->uz_free_slab);
1386 LIST_REMOVE(slab, us_link);
1387 LIST_INSERT_HEAD(&zone->uz_part_slab, slab, us_link);
1388 }
1389 } else {
1390 /*
1391 * This is to prevent us from recursively trying to allocate
1392 * buckets. The problem is that if an allocation forces us to
1393 * grab a new bucket we will call page_alloc, which will go off
1394 * and cause the vm to allocate vm_map_entries. If we need new
1395 * buckets there too we will recurse in kmem_alloc and bad
1396 * things happen. So instead we return a NULL bucket, and make
1397 * the code that allocates buckets smart enough to deal with it */
1398 if (zone == bucketzone && zone->uz_recurse != 0) {
1399 ZONE_UNLOCK(zone);
1400 return (NULL);
1401 }
1402 zone->uz_recurse++;
1403 slab = slab_zalloc(zone, wait);
1404 zone->uz_recurse--;
1405 if (slab) {
1406 LIST_INSERT_HEAD(&zone->uz_part_slab, slab, us_link);
1407 /*
1408 * We might not have been able to get a page, but another cpu
1409 * could have while we were unlocked.
1410 */
1411 } else if (zone->uz_free == 0) {
1412 ZONE_UNLOCK(zone);
1413 /* If we're filling a bucket return what we have */
1414 if (bucket != NULL && bucket->ub_ptr != -1) {
1415 return (bucket);
1416 } else
1417 return (NULL);
1418 } else {
1419 /* Another cpu must have succeeded */
1420 if ((slab = LIST_FIRST(&zone->uz_part_slab)) == NULL) {
1421 slab = LIST_FIRST(&zone->uz_free_slab);
1422 LIST_REMOVE(slab, us_link);
1423 LIST_INSERT_HEAD(&zone->uz_part_slab,
1424 slab, us_link);
1425 }
1426 }
1427 }
1428
1429 while (slab->us_freecount) {
1430 freei = slab->us_firstfree;
1431 slab->us_firstfree = slab->us_freelist[freei];
1432#ifdef INVARIANTS
1433 slab->us_freelist[freei] = 255;
1434#endif
1435 slab->us_freecount--;
1436 zone->uz_free--;
1437 item = slab->us_data + (zone->uz_rsize * freei);
1438
1439 if (cache == NULL) {
1440 zone->uz_allocs++;
1441 break;
1442 }
1443
1444 bucket->ub_bucket[++bucket->ub_ptr] = item;
1445
1446 /* Don't overfill the bucket! */
1447 if (bucket->ub_ptr == cache->uc_count)
1448 break;
1449 }
1450
1451 /* Move this slab to the full list */
1452 if (slab->us_freecount == 0) {
1453 LIST_REMOVE(slab, us_link);
1454 LIST_INSERT_HEAD(&zone->uz_full_slab, slab, us_link);
1455 }
1456
1457 if (cache != NULL) {
1458 /* Try to keep the buckets totally full, but don't block */
1459 if (bucket->ub_ptr < cache->uc_count) {
1460 wait = M_NOWAIT;
1461 goto new_slab;
1462 }
1463 }
1464
1465 ZONE_UNLOCK(zone);
1466
1467 /* Only construct at this time if we're not filling a bucket */
1468 if (cache == NULL) {
1469 if (zone->uz_ctor)
1470 zone->uz_ctor(item, zone->uz_size, udata);
1471
1472 if (isitem != NULL)
1473 *isitem = 1;
1474 }
1475
1476 return (item);
1477}
1478
1479/* See uma.h */
1480void
1481uma_zfree_arg(uma_zone_t zone, void *item, void *udata)
1482{
1483 uma_cache_t cache;
1484 uma_bucket_t bucket;
1485 int cpu;
1486
1487 /* This is the fast path free */
1488#ifdef UMA_DEBUG_ALLOC_1
1489 printf("Freeing item %p to %s(%p)\n", item, zone->uz_name, zone);
1490#endif
1491 cpu = PCPU_GET(cpuid);
1492 CPU_LOCK(zone, cpu);
1493 cache = &zone->uz_cpu[cpu];
1494
1495zfree_start:
1496 bucket = cache->uc_freebucket;
1497
1498 if (bucket) {
1499 /* Do we have room in our bucket? */
1500 if (bucket->ub_ptr < cache->uc_count) {
1501 bucket->ub_ptr++;
1502 KASSERT(bucket->ub_bucket[bucket->ub_ptr] == NULL,
1503 ("uma_zfree: Freeing to non free bucket index."));
1504 bucket->ub_bucket[bucket->ub_ptr] = item;
1505 CPU_UNLOCK(zone, cpu);
1506 if (zone->uz_dtor)
1507 zone->uz_dtor(item, zone->uz_size, udata);
1508 return;
1509 } else if (cache->uc_allocbucket) {
1510#ifdef UMA_DEBUG_ALLOC
1511 printf("uma_zfree: Swapping buckets.\n");
1512#endif
1513 /*
1514 * We have run out of space in our freebucket.
1515 * See if we can switch with our alloc bucket.
1516 */
1517 if (cache->uc_allocbucket->ub_ptr <
1518 cache->uc_freebucket->ub_ptr) {
1519 uma_bucket_t swap;
1520
1521 swap = cache->uc_freebucket;
1522 cache->uc_freebucket = cache->uc_allocbucket;
1523 cache->uc_allocbucket = swap;
1524
1525 goto zfree_start;
1526 }
1527 }
1528 }
1529
1530 /*
1531 * We can get here for three reasons:
1532 *
1533 * 1) The buckets are NULL
1534 * 2) The zone is INTERNAL, and so it has no buckets.
1535 * 3) The alloc and free buckets are both somewhat full.
1536 *
1537 */
1538
1539 ZONE_LOCK(zone);
1540
1541 if (!(zone->uz_flags & UMA_ZFLAG_INTERNAL)) {
1542 bucket = cache->uc_freebucket;
1543 cache->uc_freebucket = NULL;
1544
1545 /* Can we throw this on the zone full list? */
1546 if (bucket != NULL) {
1547#ifdef UMA_DEBUG_ALLOC
1548 printf("uma_zfree: Putting old bucket on the free list.\n");
1549#endif
1550 /* ub_ptr is pointing to the last free item */
1551 KASSERT(bucket->ub_ptr != -1,
1552 ("uma_zfree: Attempting to insert an empty bucket onto the full list.\n"));
1553 /*zone->uz_free += bucket->ub_ptr + 1;*/
1554 LIST_INSERT_HEAD(&zone->uz_full_bucket,
1555 bucket, ub_link);
1556 bucket = LIST_FIRST(&zone->uz_free_bucket);
1557 if (bucket)
1558 LIST_REMOVE(bucket, ub_link);
1559 }
1560 /*
1561 * Do we need to alloc one? Either the freebucket was NULL
1562 * or the free_bucket list was empty.
1563 */
1564 if (bucket == NULL) {
1565#ifdef UMA_DEBUG_ALLOC
1566 printf("uma_zfree: Allocating new free bucket.\n");
1567#endif
1568 /* This has to be done so we don't recurse on a lock */
1569 ZONE_UNLOCK(zone);
1570 CPU_UNLOCK(zone, cpu);
1571 bucket = uma_zalloc_internal(bucketzone,
1572 NULL, M_NOWAIT, NULL, -1);
1573 CPU_LOCK(zone, cpu);
1574 ZONE_LOCK(zone);
1575 if (bucket) {
1576#ifdef INVARIANTS
1577 bzero(bucket, bucketzone->uz_size);
1578#endif
1579 bucket->ub_ptr = -1;
1580 }
1581 /* Did we lose the race? */
1582 if (cache->uc_freebucket != NULL) {
1583 if (bucket)
1584 uma_zfree_internal(bucketzone,
1585 bucket, NULL, 0);
1586 ZONE_UNLOCK(zone);
1587 goto zfree_start;
1588 }
1589 /* If we couldn't get one just free directly */
1590 if (bucket == NULL)
1591 goto zfree_internal;
1592 }
1593 cache->uc_freebucket = bucket;
1594 ZONE_UNLOCK(zone);
1595 goto zfree_start;
1596 }
1597
1598zfree_internal:
1599
1600 CPU_UNLOCK(zone, cpu);
1601 ZONE_UNLOCK(zone);
1602 uma_zfree_internal(zone, item, udata, 0);
1603
1604 return;
1605
1606}
1607
1608/*
1609 * Frees an item to an INTERNAL zone or allocates a free bucket
1610 *
1611 * Arguments:
1612 * zone The zone to free to
1613 * item The item we're freeing
1614 * udata User supplied data for the dtor
1615 * skip Skip the dtor, it was done in uma_zfree_arg
1616 */
1617
1618static void
1619uma_zfree_internal(uma_zone_t zone, void *item, void *udata, int skip)
1620{
1621 uma_slab_t slab;
1622 u_int8_t *mem;
1623 u_int8_t freei;
1624
1625 ZONE_LOCK(zone);
1626
1627 if (!(zone->uz_flags & UMA_ZFLAG_MALLOC)) {
1628 mem = (u_int8_t *)((unsigned long)item & (~UMA_SLAB_MASK));
1629 if (zone->uz_flags & UMA_ZFLAG_OFFPAGE)
1630 slab = hash_sfind(&zone->uz_hash, mem);
1631 else {
1632 mem += zone->uz_pgoff;
1633 slab = (uma_slab_t)mem;
1634 }
1635 } else {
1636 slab = (uma_slab_t)udata;
1637 }
1638
1639 /* Do we need to remove from any lists? */
1640 if (slab->us_freecount+1 == zone->uz_ipers) {
1641 LIST_REMOVE(slab, us_link);
1642 LIST_INSERT_HEAD(&zone->uz_free_slab, slab, us_link);
1643 } else if (slab->us_freecount == 0) {
1644 LIST_REMOVE(slab, us_link);
1645 LIST_INSERT_HEAD(&zone->uz_part_slab, slab, us_link);
1646 }
1647
1648 /* Slab management stuff */
1649 freei = ((unsigned long)item - (unsigned long)slab->us_data)
1650 / zone->uz_rsize;
1651#ifdef INVARIANTS
1652 if (((freei * zone->uz_rsize) + slab->us_data) != item)
1653 panic("zone: %s(%p) slab %p freed address %p unaligned.\n",
1654 zone->uz_name, zone, slab, item);
1655 if (freei >= zone->uz_ipers)
1656 panic("zone: %s(%p) slab %p freelist %i out of range 0-%d\n",
1657 zone->uz_name, zone, slab, freei, zone->uz_ipers-1);
1658
1659 if (slab->us_freelist[freei] != 255) {
1660 printf("Slab at %p, freei %d = %d.\n",
1661 slab, freei, slab->us_freelist[freei]);
1662 panic("Duplicate free of item %p from zone %p(%s)\n",
1663 item, zone, zone->uz_name);
1664 }
1665#endif
1666 slab->us_freelist[freei] = slab->us_firstfree;
1667 slab->us_firstfree = freei;
1668 slab->us_freecount++;
1669
1670 /* Zone statistics */
1671 zone->uz_free++;
1672
1673 ZONE_UNLOCK(zone);
1674
1675 if (!skip && zone->uz_dtor)
1676 zone->uz_dtor(item, zone->uz_size, udata);
1677}
1678
1679/* See uma.h */
1680void
1244 return item;
1245}
1246
1247/*
1248 * Allocates an item for an internal zone OR fills a bucket
1249 *
1250 * Arguments
1251 * zone The zone to alloc for.
1252 * udata The data to be passed to the constructor.
1253 * wait M_WAITOK or M_NOWAIT.
1254 * isitem The returned value is an item if this is true.
1255 * cpu The cpu # of the cache that we should use, or -1.
1256 *
1257 * Returns
1258 * NULL if there is no memory and M_NOWAIT is set
1259 * An item if called on an interal zone
1260 * Non NULL if called to fill a bucket and it was successful.
1261 *
1262 * Discussion:
1263 * This was much cleaner before it had to do per cpu caches. It is
1264 * complicated now because it has to handle the simple internal case, and
1265 * the more involved bucket filling and allocation. The isitem is there
1266 * to remove a failure case. You shouldn't fail on allocating from a zone
1267 * because there were no buckets. This allows the exported zalloc to just
1268 * return the item.
1269 *
1270 */
1271
1272static void *
1273uma_zalloc_internal(uma_zone_t zone, void *udata, int wait, int *isitem, int cpu)
1274{
1275 uma_bucket_t bucket;
1276 uma_cache_t cache;
1277 uma_slab_t slab;
1278 u_int8_t freei;
1279 void *item;
1280
1281 bucket = NULL;
1282 cache = NULL;
1283 item = NULL;
1284
1285 /*
1286 * This is to stop us from allocating per cpu buckets while we're running
1287 * out of UMA_BOOT_PAGES. Otherwise, we would exhaust the boot pages.
1288 */
1289
1290 if (!booted && zone == bucketzone)
1291 return (NULL);
1292
1293#ifdef UMA_DEBUG_ALLOC
1294 printf("INTERNAL: Allocating one item from %s(%p)\n", zone->uz_name, zone);
1295#endif
1296 if (isitem != NULL)
1297 *isitem = 0;
1298
1299 ZONE_LOCK(zone);
1300
1301 /* We got here because we need to fill some buckets */
1302 if (cpu != -1) {
1303 cache = &zone->uz_cpu[cpu];
1304
1305 zone->uz_allocs += cache->uc_allocs;
1306 /* Check the free list */
1307 bucket = LIST_FIRST(&zone->uz_full_bucket);
1308 if (bucket) {
1309 LIST_REMOVE(bucket, ub_link);
1310 /* Our old one is now a free bucket */
1311 if (cache->uc_allocbucket) {
1312 KASSERT(cache->uc_allocbucket->ub_ptr == -1,
1313 ("uma_zalloc_internal: Freeing a non free bucket."));
1314 LIST_INSERT_HEAD(&zone->uz_free_bucket,
1315 cache->uc_allocbucket, ub_link);
1316 }
1317 KASSERT(bucket->ub_ptr != -1,
1318 ("uma_zalloc_internal: Returning an empty bucket."));
1319 /*zone->uz_free -= bucket->ub_ptr + 1;*/
1320 cache->uc_allocbucket = bucket;
1321 ZONE_UNLOCK(zone);
1322 return (bucket);
1323 }
1324 /* Bump up our uc_count so we get here less */
1325 if (cache->uc_count < UMA_BUCKET_SIZE - 1)
1326 cache->uc_count++;
1327 /* Nothing on the free list, try to re-use the old one */
1328 bucket = cache->uc_allocbucket;
1329 if (bucket == NULL) {
1330 /* Nope, we need a new one */
1331 CPU_UNLOCK(zone, cpu);
1332 ZONE_UNLOCK(zone);
1333 bucket = uma_zalloc_internal(bucketzone,
1334 NULL, wait, NULL, -1);
1335 CPU_LOCK(zone, cpu);
1336 ZONE_LOCK(zone);
1337 /* Did we lose the race? */
1338 if (cache->uc_allocbucket) {
1339#ifdef UMA_DEBUG
1340 printf("uma_zalloc_internal: Lost race with another CPU.\n");
1341#endif
1342 if (bucket)
1343 uma_zfree_internal(bucketzone,
1344 bucket, NULL, 0);
1345 ZONE_UNLOCK(zone);
1346 return (cache->uc_allocbucket);
1347 }
1348 cache->uc_allocbucket = bucket;
1349
1350 if (bucket) {
1351#ifdef INVARIANTS
1352 bzero(bucket, bucketzone->uz_size);
1353#endif
1354 bucket->ub_ptr = -1;
1355 } else {
1356 /*
1357 * We may not get a bucket if we recurse, so
1358 * return an actual item. The rest of this code
1359 * does the right thing if the cache is NULL.
1360 */
1361#ifdef UMA_DEBUG
1362 printf("uma_zalloc_internal: Bucketzone returned NULL\n");
1363#endif
1364 CPU_UNLOCK(zone, cpu);
1365 cache = NULL;
1366 cpu = -1;
1367 }
1368 }
1369 }
1370
1371new_slab:
1372
1373 /* Find a slab with some space */
1374 if (zone->uz_free) {
1375 if (!LIST_EMPTY(&zone->uz_part_slab)) {
1376 slab = LIST_FIRST(&zone->uz_part_slab);
1377 } else {
1378 slab = LIST_FIRST(&zone->uz_free_slab);
1379 LIST_REMOVE(slab, us_link);
1380 LIST_INSERT_HEAD(&zone->uz_part_slab, slab, us_link);
1381 }
1382 } else {
1383 /*
1384 * This is to prevent us from recursively trying to allocate
1385 * buckets. The problem is that if an allocation forces us to
1386 * grab a new bucket we will call page_alloc, which will go off
1387 * and cause the vm to allocate vm_map_entries. If we need new
1388 * buckets there too we will recurse in kmem_alloc and bad
1389 * things happen. So instead we return a NULL bucket, and make
1390 * the code that allocates buckets smart enough to deal with it */
1391 if (zone == bucketzone && zone->uz_recurse != 0) {
1392 ZONE_UNLOCK(zone);
1393 return (NULL);
1394 }
1395 zone->uz_recurse++;
1396 slab = slab_zalloc(zone, wait);
1397 zone->uz_recurse--;
1398 if (slab) {
1399 LIST_INSERT_HEAD(&zone->uz_part_slab, slab, us_link);
1400 /*
1401 * We might not have been able to get a page, but another cpu
1402 * could have while we were unlocked.
1403 */
1404 } else if (zone->uz_free == 0) {
1405 ZONE_UNLOCK(zone);
1406 /* If we're filling a bucket return what we have */
1407 if (bucket != NULL && bucket->ub_ptr != -1) {
1408 return (bucket);
1409 } else
1410 return (NULL);
1411 } else {
1412 /* Another cpu must have succeeded */
1413 if ((slab = LIST_FIRST(&zone->uz_part_slab)) == NULL) {
1414 slab = LIST_FIRST(&zone->uz_free_slab);
1415 LIST_REMOVE(slab, us_link);
1416 LIST_INSERT_HEAD(&zone->uz_part_slab,
1417 slab, us_link);
1418 }
1419 }
1420 }
1421
1422 while (slab->us_freecount) {
1423 freei = slab->us_firstfree;
1424 slab->us_firstfree = slab->us_freelist[freei];
1425#ifdef INVARIANTS
1426 slab->us_freelist[freei] = 255;
1427#endif
1428 slab->us_freecount--;
1429 zone->uz_free--;
1430 item = slab->us_data + (zone->uz_rsize * freei);
1431
1432 if (cache == NULL) {
1433 zone->uz_allocs++;
1434 break;
1435 }
1436
1437 bucket->ub_bucket[++bucket->ub_ptr] = item;
1438
1439 /* Don't overfill the bucket! */
1440 if (bucket->ub_ptr == cache->uc_count)
1441 break;
1442 }
1443
1444 /* Move this slab to the full list */
1445 if (slab->us_freecount == 0) {
1446 LIST_REMOVE(slab, us_link);
1447 LIST_INSERT_HEAD(&zone->uz_full_slab, slab, us_link);
1448 }
1449
1450 if (cache != NULL) {
1451 /* Try to keep the buckets totally full, but don't block */
1452 if (bucket->ub_ptr < cache->uc_count) {
1453 wait = M_NOWAIT;
1454 goto new_slab;
1455 }
1456 }
1457
1458 ZONE_UNLOCK(zone);
1459
1460 /* Only construct at this time if we're not filling a bucket */
1461 if (cache == NULL) {
1462 if (zone->uz_ctor)
1463 zone->uz_ctor(item, zone->uz_size, udata);
1464
1465 if (isitem != NULL)
1466 *isitem = 1;
1467 }
1468
1469 return (item);
1470}
1471
1472/* See uma.h */
1473void
1474uma_zfree_arg(uma_zone_t zone, void *item, void *udata)
1475{
1476 uma_cache_t cache;
1477 uma_bucket_t bucket;
1478 int cpu;
1479
1480 /* This is the fast path free */
1481#ifdef UMA_DEBUG_ALLOC_1
1482 printf("Freeing item %p to %s(%p)\n", item, zone->uz_name, zone);
1483#endif
1484 cpu = PCPU_GET(cpuid);
1485 CPU_LOCK(zone, cpu);
1486 cache = &zone->uz_cpu[cpu];
1487
1488zfree_start:
1489 bucket = cache->uc_freebucket;
1490
1491 if (bucket) {
1492 /* Do we have room in our bucket? */
1493 if (bucket->ub_ptr < cache->uc_count) {
1494 bucket->ub_ptr++;
1495 KASSERT(bucket->ub_bucket[bucket->ub_ptr] == NULL,
1496 ("uma_zfree: Freeing to non free bucket index."));
1497 bucket->ub_bucket[bucket->ub_ptr] = item;
1498 CPU_UNLOCK(zone, cpu);
1499 if (zone->uz_dtor)
1500 zone->uz_dtor(item, zone->uz_size, udata);
1501 return;
1502 } else if (cache->uc_allocbucket) {
1503#ifdef UMA_DEBUG_ALLOC
1504 printf("uma_zfree: Swapping buckets.\n");
1505#endif
1506 /*
1507 * We have run out of space in our freebucket.
1508 * See if we can switch with our alloc bucket.
1509 */
1510 if (cache->uc_allocbucket->ub_ptr <
1511 cache->uc_freebucket->ub_ptr) {
1512 uma_bucket_t swap;
1513
1514 swap = cache->uc_freebucket;
1515 cache->uc_freebucket = cache->uc_allocbucket;
1516 cache->uc_allocbucket = swap;
1517
1518 goto zfree_start;
1519 }
1520 }
1521 }
1522
1523 /*
1524 * We can get here for three reasons:
1525 *
1526 * 1) The buckets are NULL
1527 * 2) The zone is INTERNAL, and so it has no buckets.
1528 * 3) The alloc and free buckets are both somewhat full.
1529 *
1530 */
1531
1532 ZONE_LOCK(zone);
1533
1534 if (!(zone->uz_flags & UMA_ZFLAG_INTERNAL)) {
1535 bucket = cache->uc_freebucket;
1536 cache->uc_freebucket = NULL;
1537
1538 /* Can we throw this on the zone full list? */
1539 if (bucket != NULL) {
1540#ifdef UMA_DEBUG_ALLOC
1541 printf("uma_zfree: Putting old bucket on the free list.\n");
1542#endif
1543 /* ub_ptr is pointing to the last free item */
1544 KASSERT(bucket->ub_ptr != -1,
1545 ("uma_zfree: Attempting to insert an empty bucket onto the full list.\n"));
1546 /*zone->uz_free += bucket->ub_ptr + 1;*/
1547 LIST_INSERT_HEAD(&zone->uz_full_bucket,
1548 bucket, ub_link);
1549 bucket = LIST_FIRST(&zone->uz_free_bucket);
1550 if (bucket)
1551 LIST_REMOVE(bucket, ub_link);
1552 }
1553 /*
1554 * Do we need to alloc one? Either the freebucket was NULL
1555 * or the free_bucket list was empty.
1556 */
1557 if (bucket == NULL) {
1558#ifdef UMA_DEBUG_ALLOC
1559 printf("uma_zfree: Allocating new free bucket.\n");
1560#endif
1561 /* This has to be done so we don't recurse on a lock */
1562 ZONE_UNLOCK(zone);
1563 CPU_UNLOCK(zone, cpu);
1564 bucket = uma_zalloc_internal(bucketzone,
1565 NULL, M_NOWAIT, NULL, -1);
1566 CPU_LOCK(zone, cpu);
1567 ZONE_LOCK(zone);
1568 if (bucket) {
1569#ifdef INVARIANTS
1570 bzero(bucket, bucketzone->uz_size);
1571#endif
1572 bucket->ub_ptr = -1;
1573 }
1574 /* Did we lose the race? */
1575 if (cache->uc_freebucket != NULL) {
1576 if (bucket)
1577 uma_zfree_internal(bucketzone,
1578 bucket, NULL, 0);
1579 ZONE_UNLOCK(zone);
1580 goto zfree_start;
1581 }
1582 /* If we couldn't get one just free directly */
1583 if (bucket == NULL)
1584 goto zfree_internal;
1585 }
1586 cache->uc_freebucket = bucket;
1587 ZONE_UNLOCK(zone);
1588 goto zfree_start;
1589 }
1590
1591zfree_internal:
1592
1593 CPU_UNLOCK(zone, cpu);
1594 ZONE_UNLOCK(zone);
1595 uma_zfree_internal(zone, item, udata, 0);
1596
1597 return;
1598
1599}
1600
1601/*
1602 * Frees an item to an INTERNAL zone or allocates a free bucket
1603 *
1604 * Arguments:
1605 * zone The zone to free to
1606 * item The item we're freeing
1607 * udata User supplied data for the dtor
1608 * skip Skip the dtor, it was done in uma_zfree_arg
1609 */
1610
1611static void
1612uma_zfree_internal(uma_zone_t zone, void *item, void *udata, int skip)
1613{
1614 uma_slab_t slab;
1615 u_int8_t *mem;
1616 u_int8_t freei;
1617
1618 ZONE_LOCK(zone);
1619
1620 if (!(zone->uz_flags & UMA_ZFLAG_MALLOC)) {
1621 mem = (u_int8_t *)((unsigned long)item & (~UMA_SLAB_MASK));
1622 if (zone->uz_flags & UMA_ZFLAG_OFFPAGE)
1623 slab = hash_sfind(&zone->uz_hash, mem);
1624 else {
1625 mem += zone->uz_pgoff;
1626 slab = (uma_slab_t)mem;
1627 }
1628 } else {
1629 slab = (uma_slab_t)udata;
1630 }
1631
1632 /* Do we need to remove from any lists? */
1633 if (slab->us_freecount+1 == zone->uz_ipers) {
1634 LIST_REMOVE(slab, us_link);
1635 LIST_INSERT_HEAD(&zone->uz_free_slab, slab, us_link);
1636 } else if (slab->us_freecount == 0) {
1637 LIST_REMOVE(slab, us_link);
1638 LIST_INSERT_HEAD(&zone->uz_part_slab, slab, us_link);
1639 }
1640
1641 /* Slab management stuff */
1642 freei = ((unsigned long)item - (unsigned long)slab->us_data)
1643 / zone->uz_rsize;
1644#ifdef INVARIANTS
1645 if (((freei * zone->uz_rsize) + slab->us_data) != item)
1646 panic("zone: %s(%p) slab %p freed address %p unaligned.\n",
1647 zone->uz_name, zone, slab, item);
1648 if (freei >= zone->uz_ipers)
1649 panic("zone: %s(%p) slab %p freelist %i out of range 0-%d\n",
1650 zone->uz_name, zone, slab, freei, zone->uz_ipers-1);
1651
1652 if (slab->us_freelist[freei] != 255) {
1653 printf("Slab at %p, freei %d = %d.\n",
1654 slab, freei, slab->us_freelist[freei]);
1655 panic("Duplicate free of item %p from zone %p(%s)\n",
1656 item, zone, zone->uz_name);
1657 }
1658#endif
1659 slab->us_freelist[freei] = slab->us_firstfree;
1660 slab->us_firstfree = freei;
1661 slab->us_freecount++;
1662
1663 /* Zone statistics */
1664 zone->uz_free++;
1665
1666 ZONE_UNLOCK(zone);
1667
1668 if (!skip && zone->uz_dtor)
1669 zone->uz_dtor(item, zone->uz_size, udata);
1670}
1671
1672/* See uma.h */
1673void
1674uma_zone_set_max(uma_zone_t zone, int nitems)
1675{
1676 ZONE_LOCK(zone);
1677 if (zone->uz_ppera > 1)
1678 zone->uz_maxpages = nitems / zone->uz_ppera;
1679 else
1680 zone->uz_maxpages = nitems / zone->uz_ipers;
1681 ZONE_UNLOCK(zone);
1682}
1683
1684/* See uma.h */
1685void
1681uma_zone_set_freef(uma_zone_t zone, uma_free freef)
1682{
1683 ZONE_LOCK(zone);
1684
1685 zone->uz_freef = freef;
1686
1687 ZONE_UNLOCK(zone);
1688}
1689
1690/* See uma.h */
1691void
1692uma_zone_set_allocf(uma_zone_t zone, uma_alloc allocf)
1693{
1694 ZONE_LOCK(zone);
1695
1696 zone->uz_flags |= UMA_ZFLAG_PRIVALLOC;
1697 zone->uz_allocf = allocf;
1698
1699 ZONE_UNLOCK(zone);
1700}
1701
1702/* See uma.h */
1703int
1704uma_zone_set_obj(uma_zone_t zone, struct vm_object *obj, int count)
1705{
1706 int pages;
1707 vm_offset_t kva;
1708
1709 ZONE_LOCK(zone);
1710 mtx_lock(&Giant);
1711
1712 zone->uz_obj = obj;
1713 pages = count / zone->uz_ipers;
1714
1715 if (pages * zone->uz_ipers < count)
1716 pages++;
1717 zone->uz_kva = NULL;
1718 ZONE_UNLOCK(zone);
1719 kva = kmem_alloc_pageable(kernel_map, pages * UMA_SLAB_SIZE);
1720 ZONE_LOCK(zone);
1721
1722 zone->uz_kva = kva;
1723
1724 if (zone->uz_kva == 0) {
1725 ZONE_UNLOCK(zone);
1726 return (0);
1727 }
1728
1729 zone->uz_maxpages = pages;
1730
1731 if (zone->uz_obj == NULL)
1732 zone->uz_obj = vm_object_allocate(OBJT_DEFAULT,
1733 zone->uz_maxpages);
1734 else
1735 _vm_object_allocate(OBJT_DEFAULT,
1736 zone->uz_maxpages, zone->uz_obj);
1737
1738 zone->uz_allocf = obj_alloc;
1739 zone->uz_flags |= UMA_ZFLAG_NOFREE | UMA_ZFLAG_PRIVALLOC;
1740
1741 mtx_unlock(&Giant);
1742 ZONE_UNLOCK(zone);
1743
1744 return (1);
1745}
1746
1747/* See uma.h */
1748void
1749uma_prealloc(uma_zone_t zone, int items)
1750{
1751 int slabs;
1752 uma_slab_t slab;
1753
1754 ZONE_LOCK(zone);
1755 slabs = items / zone->uz_ipers;
1756 if (slabs * zone->uz_ipers < items)
1757 slabs++;
1758
1759 while (slabs > 0) {
1760 slab = slab_zalloc(zone, M_WAITOK);
1761 LIST_INSERT_HEAD(&zone->uz_free_slab, slab, us_link);
1762 slabs--;
1763 }
1764 ZONE_UNLOCK(zone);
1765}
1766
1767/* See uma.h */
1768void
1769uma_reclaim(void)
1770{
1771 /*
1772 * You might think that the delay below would improve performance since
1773 * the allocator will give away memory that it may ask for immediately.
1774 * Really, it makes things worse, since cpu cycles are so much cheaper
1775 * than disk activity.
1776 */
1777#if 0
1778 static struct timeval tv = {0};
1779 struct timeval now;
1780 getmicrouptime(&now);
1781 if (now.tv_sec > tv.tv_sec + 30)
1782 tv = now;
1783 else
1784 return;
1785#endif
1786#ifdef UMA_DEBUG
1787 printf("UMA: vm asked us to release pages!\n");
1788#endif
1789 zone_foreach(zone_drain);
1790
1791 /*
1792 * Some slabs may have been freed but this zone will be visited early
1793 * we visit again so that we can free pages that are empty once other
1794 * zones are drained. We have to do the same for buckets.
1795 */
1796 zone_drain(slabzone);
1797 zone_drain(bucketzone);
1798}
1799
1800void *
1801uma_large_malloc(int size, int wait)
1802{
1803 void *mem;
1804 uma_slab_t slab;
1805 u_int8_t flags;
1806
1807 slab = uma_zalloc_internal(slabzone, NULL, wait, NULL, -1);
1808 if (slab == NULL)
1809 return (NULL);
1810
1811 mem = page_alloc(NULL, size, &flags, wait);
1812 if (mem) {
1813 slab->us_data = mem;
1814 slab->us_flags = flags | UMA_SLAB_MALLOC;
1815 slab->us_size = size;
1816 UMA_HASH_INSERT(mallochash, slab, mem);
1817 } else {
1818 uma_zfree_internal(slabzone, slab, NULL, 0);
1819 }
1820
1821
1822 return (mem);
1823}
1824
1825void
1826uma_large_free(uma_slab_t slab)
1827{
1828 UMA_HASH_REMOVE(mallochash, slab, slab->us_data);
1829 page_free(slab->us_data, slab->us_size, slab->us_flags);
1830 uma_zfree_internal(slabzone, slab, NULL, 0);
1831}
1832
1833void
1834uma_print_stats(void)
1835{
1836 zone_foreach(uma_print_zone);
1837}
1838
1839void
1840uma_print_zone(uma_zone_t zone)
1841{
1842 printf("%s(%p) size %d(%d) flags %d ipers %d ppera %d out %d free %d\n",
1843 zone->uz_name, zone, zone->uz_size, zone->uz_rsize, zone->uz_flags,
1844 zone->uz_ipers, zone->uz_ppera,
1845 (zone->uz_ipers * zone->uz_pages) - zone->uz_free, zone->uz_free);
1846}
1847
1848/*
1849 * Sysctl handler for vm.zone
1850 *
1851 * stolen from vm_zone.c
1852 */
1853static int
1854sysctl_vm_zone(SYSCTL_HANDLER_ARGS)
1855{
1856 int error, len, cnt;
1857 const int linesize = 128; /* conservative */
1858 int totalfree;
1859 char *tmpbuf, *offset;
1860 uma_zone_t z;
1861 char *p;
1862
1863 cnt = 0;
1864 LIST_FOREACH(z, &uma_zones, uz_link)
1865 cnt++;
1866 MALLOC(tmpbuf, char *, (cnt == 0 ? 1 : cnt) * linesize,
1867 M_TEMP, M_WAITOK);
1868 len = snprintf(tmpbuf, linesize,
1869 "\nITEM SIZE LIMIT USED FREE REQUESTS\n\n");
1870 if (cnt == 0)
1871 tmpbuf[len - 1] = '\0';
1872 error = SYSCTL_OUT(req, tmpbuf, cnt == 0 ? len-1 : len);
1873 if (error || cnt == 0)
1874 goto out;
1875 offset = tmpbuf;
1876 LIST_FOREACH(z, &uma_zones, uz_link) {
1877 if (cnt == 0) /* list may have changed size */
1878 break;
1879 ZONE_LOCK(z);
1880 totalfree = z->uz_free + z->uz_cachefree;
1881 len = snprintf(offset, linesize,
1882 "%-12.12s %6.6u, %8.8u, %6.6u, %6.6u, %8.8llu\n",
1883 z->uz_name, z->uz_size,
1884 z->uz_maxpages * z->uz_ipers,
1885 (z->uz_ipers * (z->uz_pages / z->uz_ppera)) - totalfree,
1886 totalfree,
1887 (unsigned long long)z->uz_allocs);
1888 ZONE_UNLOCK(z);
1889 for (p = offset + 12; p > offset && *p == ' '; --p)
1890 /* nothing */ ;
1891 p[1] = ':';
1892 cnt--;
1893 offset += len;
1894 }
1895 *offset++ = '\0';
1896 error = SYSCTL_OUT(req, tmpbuf, offset - tmpbuf);
1897out:
1898 FREE(tmpbuf, M_TEMP);
1899 return (error);
1900}
1686uma_zone_set_freef(uma_zone_t zone, uma_free freef)
1687{
1688 ZONE_LOCK(zone);
1689
1690 zone->uz_freef = freef;
1691
1692 ZONE_UNLOCK(zone);
1693}
1694
1695/* See uma.h */
1696void
1697uma_zone_set_allocf(uma_zone_t zone, uma_alloc allocf)
1698{
1699 ZONE_LOCK(zone);
1700
1701 zone->uz_flags |= UMA_ZFLAG_PRIVALLOC;
1702 zone->uz_allocf = allocf;
1703
1704 ZONE_UNLOCK(zone);
1705}
1706
1707/* See uma.h */
1708int
1709uma_zone_set_obj(uma_zone_t zone, struct vm_object *obj, int count)
1710{
1711 int pages;
1712 vm_offset_t kva;
1713
1714 ZONE_LOCK(zone);
1715 mtx_lock(&Giant);
1716
1717 zone->uz_obj = obj;
1718 pages = count / zone->uz_ipers;
1719
1720 if (pages * zone->uz_ipers < count)
1721 pages++;
1722 zone->uz_kva = NULL;
1723 ZONE_UNLOCK(zone);
1724 kva = kmem_alloc_pageable(kernel_map, pages * UMA_SLAB_SIZE);
1725 ZONE_LOCK(zone);
1726
1727 zone->uz_kva = kva;
1728
1729 if (zone->uz_kva == 0) {
1730 ZONE_UNLOCK(zone);
1731 return (0);
1732 }
1733
1734 zone->uz_maxpages = pages;
1735
1736 if (zone->uz_obj == NULL)
1737 zone->uz_obj = vm_object_allocate(OBJT_DEFAULT,
1738 zone->uz_maxpages);
1739 else
1740 _vm_object_allocate(OBJT_DEFAULT,
1741 zone->uz_maxpages, zone->uz_obj);
1742
1743 zone->uz_allocf = obj_alloc;
1744 zone->uz_flags |= UMA_ZFLAG_NOFREE | UMA_ZFLAG_PRIVALLOC;
1745
1746 mtx_unlock(&Giant);
1747 ZONE_UNLOCK(zone);
1748
1749 return (1);
1750}
1751
1752/* See uma.h */
1753void
1754uma_prealloc(uma_zone_t zone, int items)
1755{
1756 int slabs;
1757 uma_slab_t slab;
1758
1759 ZONE_LOCK(zone);
1760 slabs = items / zone->uz_ipers;
1761 if (slabs * zone->uz_ipers < items)
1762 slabs++;
1763
1764 while (slabs > 0) {
1765 slab = slab_zalloc(zone, M_WAITOK);
1766 LIST_INSERT_HEAD(&zone->uz_free_slab, slab, us_link);
1767 slabs--;
1768 }
1769 ZONE_UNLOCK(zone);
1770}
1771
1772/* See uma.h */
1773void
1774uma_reclaim(void)
1775{
1776 /*
1777 * You might think that the delay below would improve performance since
1778 * the allocator will give away memory that it may ask for immediately.
1779 * Really, it makes things worse, since cpu cycles are so much cheaper
1780 * than disk activity.
1781 */
1782#if 0
1783 static struct timeval tv = {0};
1784 struct timeval now;
1785 getmicrouptime(&now);
1786 if (now.tv_sec > tv.tv_sec + 30)
1787 tv = now;
1788 else
1789 return;
1790#endif
1791#ifdef UMA_DEBUG
1792 printf("UMA: vm asked us to release pages!\n");
1793#endif
1794 zone_foreach(zone_drain);
1795
1796 /*
1797 * Some slabs may have been freed but this zone will be visited early
1798 * we visit again so that we can free pages that are empty once other
1799 * zones are drained. We have to do the same for buckets.
1800 */
1801 zone_drain(slabzone);
1802 zone_drain(bucketzone);
1803}
1804
1805void *
1806uma_large_malloc(int size, int wait)
1807{
1808 void *mem;
1809 uma_slab_t slab;
1810 u_int8_t flags;
1811
1812 slab = uma_zalloc_internal(slabzone, NULL, wait, NULL, -1);
1813 if (slab == NULL)
1814 return (NULL);
1815
1816 mem = page_alloc(NULL, size, &flags, wait);
1817 if (mem) {
1818 slab->us_data = mem;
1819 slab->us_flags = flags | UMA_SLAB_MALLOC;
1820 slab->us_size = size;
1821 UMA_HASH_INSERT(mallochash, slab, mem);
1822 } else {
1823 uma_zfree_internal(slabzone, slab, NULL, 0);
1824 }
1825
1826
1827 return (mem);
1828}
1829
1830void
1831uma_large_free(uma_slab_t slab)
1832{
1833 UMA_HASH_REMOVE(mallochash, slab, slab->us_data);
1834 page_free(slab->us_data, slab->us_size, slab->us_flags);
1835 uma_zfree_internal(slabzone, slab, NULL, 0);
1836}
1837
1838void
1839uma_print_stats(void)
1840{
1841 zone_foreach(uma_print_zone);
1842}
1843
1844void
1845uma_print_zone(uma_zone_t zone)
1846{
1847 printf("%s(%p) size %d(%d) flags %d ipers %d ppera %d out %d free %d\n",
1848 zone->uz_name, zone, zone->uz_size, zone->uz_rsize, zone->uz_flags,
1849 zone->uz_ipers, zone->uz_ppera,
1850 (zone->uz_ipers * zone->uz_pages) - zone->uz_free, zone->uz_free);
1851}
1852
1853/*
1854 * Sysctl handler for vm.zone
1855 *
1856 * stolen from vm_zone.c
1857 */
1858static int
1859sysctl_vm_zone(SYSCTL_HANDLER_ARGS)
1860{
1861 int error, len, cnt;
1862 const int linesize = 128; /* conservative */
1863 int totalfree;
1864 char *tmpbuf, *offset;
1865 uma_zone_t z;
1866 char *p;
1867
1868 cnt = 0;
1869 LIST_FOREACH(z, &uma_zones, uz_link)
1870 cnt++;
1871 MALLOC(tmpbuf, char *, (cnt == 0 ? 1 : cnt) * linesize,
1872 M_TEMP, M_WAITOK);
1873 len = snprintf(tmpbuf, linesize,
1874 "\nITEM SIZE LIMIT USED FREE REQUESTS\n\n");
1875 if (cnt == 0)
1876 tmpbuf[len - 1] = '\0';
1877 error = SYSCTL_OUT(req, tmpbuf, cnt == 0 ? len-1 : len);
1878 if (error || cnt == 0)
1879 goto out;
1880 offset = tmpbuf;
1881 LIST_FOREACH(z, &uma_zones, uz_link) {
1882 if (cnt == 0) /* list may have changed size */
1883 break;
1884 ZONE_LOCK(z);
1885 totalfree = z->uz_free + z->uz_cachefree;
1886 len = snprintf(offset, linesize,
1887 "%-12.12s %6.6u, %8.8u, %6.6u, %6.6u, %8.8llu\n",
1888 z->uz_name, z->uz_size,
1889 z->uz_maxpages * z->uz_ipers,
1890 (z->uz_ipers * (z->uz_pages / z->uz_ppera)) - totalfree,
1891 totalfree,
1892 (unsigned long long)z->uz_allocs);
1893 ZONE_UNLOCK(z);
1894 for (p = offset + 12; p > offset && *p == ' '; --p)
1895 /* nothing */ ;
1896 p[1] = ':';
1897 cnt--;
1898 offset += len;
1899 }
1900 *offset++ = '\0';
1901 error = SYSCTL_OUT(req, tmpbuf, offset - tmpbuf);
1902out:
1903 FREE(tmpbuf, M_TEMP);
1904 return (error);
1905}