Deleted Added
full compact
uma_core.c (95758) uma_core.c (95766)
1/*
2 * Copyright (c) 2002, Jeffrey Roberson <jroberson@chesapeake.net>
3 * All rights reserved.
4 *
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions
7 * are met:
8 * 1. Redistributions of source code must retain the above copyright
9 * notice unmodified, this list of conditions, and the following
10 * disclaimer.
11 * 2. Redistributions in binary form must reproduce the above copyright
12 * notice, this list of conditions and the following disclaimer in the
13 * documentation and/or other materials provided with the distribution.
14 *
15 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
16 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
17 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
18 * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
19 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
20 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
21 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
22 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
23 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
24 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
25 *
1/*
2 * Copyright (c) 2002, Jeffrey Roberson <jroberson@chesapeake.net>
3 * All rights reserved.
4 *
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions
7 * are met:
8 * 1. Redistributions of source code must retain the above copyright
9 * notice unmodified, this list of conditions, and the following
10 * disclaimer.
11 * 2. Redistributions in binary form must reproduce the above copyright
12 * notice, this list of conditions and the following disclaimer in the
13 * documentation and/or other materials provided with the distribution.
14 *
15 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
16 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
17 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
18 * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
19 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
20 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
21 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
22 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
23 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
24 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
25 *
26 * $FreeBSD: head/sys/vm/uma_core.c 95758 2002-04-29 23:45:41Z jeff $
26 * $FreeBSD: head/sys/vm/uma_core.c 95766 2002-04-30 04:26:34Z jeff $
27 *
28 */
29
30/*
31 * uma_core.c Implementation of the Universal Memory allocator
32 *
33 * This allocator is intended to replace the multitude of similar object caches
34 * in the standard FreeBSD kernel. The intent is to be flexible as well as
35 * effecient. A primary design goal is to return unused memory to the rest of
36 * the system. This will make the system as a whole more flexible due to the
37 * ability to move memory to subsystems which most need it instead of leaving
38 * pools of reserved memory unused.
39 *
40 * The basic ideas stem from similar slab/zone based allocators whose algorithms
41 * are well known.
42 *
43 */
44
45/*
46 * TODO:
47 * - Improve memory usage for large allocations
48 * - Improve INVARIANTS (0xdeadc0de write out)
49 * - Investigate cache size adjustments
50 */
51
52/* I should really use ktr.. */
53/*
54#define UMA_DEBUG 1
55#define UMA_DEBUG_ALLOC 1
56#define UMA_DEBUG_ALLOC_1 1
57*/
58
59
60#include "opt_param.h"
61#include <sys/param.h>
62#include <sys/systm.h>
63#include <sys/kernel.h>
64#include <sys/types.h>
65#include <sys/queue.h>
66#include <sys/malloc.h>
67#include <sys/lock.h>
68#include <sys/sysctl.h>
69#include <sys/mutex.h>
70#include <sys/smp.h>
71#include <sys/vmmeter.h>
72
73#include <machine/types.h>
74
75#include <vm/vm.h>
76#include <vm/vm_object.h>
77#include <vm/vm_page.h>
78#include <vm/vm_param.h>
79#include <vm/vm_map.h>
80#include <vm/vm_kern.h>
81#include <vm/vm_extern.h>
82#include <vm/uma.h>
83#include <vm/uma_int.h>
84
85/*
86 * This is the zone from which all zones are spawned. The idea is that even
87 * the zone heads are allocated from the allocator, so we use the bss section
88 * to bootstrap us.
89 */
90static struct uma_zone masterzone;
91static uma_zone_t zones = &masterzone;
92
93/* This is the zone from which all of uma_slab_t's are allocated. */
94static uma_zone_t slabzone;
95
96/*
97 * The initial hash tables come out of this zone so they can be allocated
98 * prior to malloc coming up.
99 */
100static uma_zone_t hashzone;
101
102/*
103 * Zone that buckets come from.
104 */
105static uma_zone_t bucketzone;
106
107/*
108 * Are we allowed to allocate buckets?
109 */
110static int bucketdisable = 1;
111
112/* Linked list of all zones in the system */
113static LIST_HEAD(,uma_zone) uma_zones = LIST_HEAD_INITIALIZER(&uma_zones);
114
115/* This mutex protects the zone list */
116static struct mtx uma_mtx;
117
118/* Linked list of boot time pages */
119static LIST_HEAD(,uma_slab) uma_boot_pages =
120 LIST_HEAD_INITIALIZER(&uma_boot_pages);
121
122/* Count of free boottime pages */
123static int uma_boot_free = 0;
124
125/* Is the VM done starting up? */
126static int booted = 0;
127
128/* This is the handle used to schedule our working set calculator */
129static struct callout uma_callout;
130
131/* This is mp_maxid + 1, for use while looping over each cpu */
132static int maxcpu;
133
134/*
135 * This structure is passed as the zone ctor arg so that I don't have to create
136 * a special allocation function just for zones.
137 */
138struct uma_zctor_args {
139 char *name;
140 int size;
141 uma_ctor ctor;
142 uma_dtor dtor;
143 uma_init uminit;
144 uma_fini fini;
145 int align;
146 u_int16_t flags;
147};
148
149/*
150 * This is the malloc hash table which is used to find the zone that a
151 * malloc allocation came from. It is not currently resizeable. The
152 * memory for the actual hash bucket is allocated in kmeminit.
153 */
154struct uma_hash mhash;
155struct uma_hash *mallochash = &mhash;
156
157/* Prototypes.. */
158
159static void *obj_alloc(uma_zone_t, int, u_int8_t *, int);
160static void *page_alloc(uma_zone_t, int, u_int8_t *, int);
161static void page_free(void *, int, u_int8_t);
162static uma_slab_t slab_zalloc(uma_zone_t, int);
163static void cache_drain(uma_zone_t);
164static void bucket_drain(uma_zone_t, uma_bucket_t);
165static void zone_drain(uma_zone_t);
166static void zone_ctor(void *, int, void *);
167static void zone_dtor(void *, int, void *);
168static void zero_init(void *, int);
169static void zone_small_init(uma_zone_t zone);
170static void zone_large_init(uma_zone_t zone);
171static void zone_foreach(void (*zfunc)(uma_zone_t));
172static void zone_timeout(uma_zone_t zone);
173static struct slabhead *hash_alloc(int *);
174static void hash_expand(struct uma_hash *, struct slabhead *, int);
175static void hash_free(struct slabhead *hash, int hashsize);
176static void uma_timeout(void *);
177static void uma_startup3(void);
178static void *uma_zalloc_internal(uma_zone_t, void *, int, uma_bucket_t);
179static void uma_zfree_internal(uma_zone_t, void *, void *, int);
180static void bucket_enable(void);
181void uma_print_zone(uma_zone_t);
182void uma_print_stats(void);
183static int sysctl_vm_zone(SYSCTL_HANDLER_ARGS);
184
185SYSCTL_OID(_vm, OID_AUTO, zone, CTLTYPE_STRING|CTLFLAG_RD,
186 NULL, 0, sysctl_vm_zone, "A", "Zone Info");
187SYSINIT(uma_startup3, SI_SUB_VM_CONF, SI_ORDER_SECOND, uma_startup3, NULL);
188
189/*
190 * This routine checks to see whether or not it's safe to enable buckets.
191 */
192
193static void
194bucket_enable(void)
195{
196 if (cnt.v_free_count < cnt.v_free_min)
197 bucketdisable = 1;
198 else
199 bucketdisable = 0;
200}
201
202
203/*
204 * Routine called by timeout which is used to fire off some time interval
205 * based calculations. (working set, stats, etc.)
206 *
207 * Arguments:
208 * arg Unused
209 *
210 * Returns:
211 * Nothing
212 */
213static void
214uma_timeout(void *unused)
215{
216 bucket_enable();
217 zone_foreach(zone_timeout);
218
219 /* Reschedule this event */
220 callout_reset(&uma_callout, UMA_WORKING_TIME * hz, uma_timeout, NULL);
221}
222
223/*
224 * Routine to perform timeout driven calculations. This does the working set
225 * as well as hash expanding, and per cpu statistics aggregation.
226 *
227 * Arguments:
228 * zone The zone to operate on
229 *
230 * Returns:
231 * Nothing
232 */
233static void
234zone_timeout(uma_zone_t zone)
235{
236 uma_cache_t cache;
237 u_int64_t alloc;
238 int free;
239 int cpu;
240
241 alloc = 0;
242 free = 0;
243
244 /*
245 * Aggregate per cpu cache statistics back to the zone.
246 *
247 * I may rewrite this to set a flag in the per cpu cache instead of
248 * locking. If the flag is not cleared on the next round I will have
249 * to lock and do it here instead so that the statistics don't get too
250 * far out of sync.
251 */
252 if (!(zone->uz_flags & UMA_ZFLAG_INTERNAL)) {
253 for (cpu = 0; cpu < maxcpu; cpu++) {
254 if (CPU_ABSENT(cpu))
255 continue;
256 CPU_LOCK(zone, cpu);
257 cache = &zone->uz_cpu[cpu];
258 /* Add them up, and reset */
259 alloc += cache->uc_allocs;
260 cache->uc_allocs = 0;
261 if (cache->uc_allocbucket)
262 free += cache->uc_allocbucket->ub_ptr + 1;
263 if (cache->uc_freebucket)
264 free += cache->uc_freebucket->ub_ptr + 1;
265 CPU_UNLOCK(zone, cpu);
266 }
267 }
268
269 /* Now push these stats back into the zone.. */
270 ZONE_LOCK(zone);
271 zone->uz_allocs += alloc;
272
273 /*
274 * cachefree is an instantanious snapshot of what is in the per cpu
275 * caches, not an accurate counter
276 */
277 zone->uz_cachefree = free;
278
279 /*
280 * Expand the zone hash table.
281 *
282 * This is done if the number of slabs is larger than the hash size.
283 * What I'm trying to do here is completely reduce collisions. This
284 * may be a little aggressive. Should I allow for two collisions max?
285 */
286
287 if ((zone->uz_flags & UMA_ZFLAG_OFFPAGE) &&
288 !(zone->uz_flags & UMA_ZFLAG_MALLOC)) {
289 if (zone->uz_pages / zone->uz_ppera
290 >= zone->uz_hash.uh_hashsize) {
291 struct slabhead *newhash;
292 int newsize;
293
294 newsize = zone->uz_hash.uh_hashsize;
295 ZONE_UNLOCK(zone);
296 newhash = hash_alloc(&newsize);
297 ZONE_LOCK(zone);
298 hash_expand(&zone->uz_hash, newhash, newsize);
299 }
300 }
301
302 /*
303 * Here we compute the working set size as the total number of items
304 * left outstanding since the last time interval. This is slightly
305 * suboptimal. What we really want is the highest number of outstanding
306 * items during the last time quantum. This should be close enough.
307 *
308 * The working set size is used to throttle the zone_drain function.
309 * We don't want to return memory that we may need again immediately.
310 */
311 alloc = zone->uz_allocs - zone->uz_oallocs;
312 zone->uz_oallocs = zone->uz_allocs;
313 zone->uz_wssize = alloc;
314
315 ZONE_UNLOCK(zone);
316}
317
318/*
319 * Allocate and zero fill the next sized hash table from the appropriate
320 * backing store.
321 *
322 * Arguments:
323 * oldsize On input it's the size we're currently at and on output
324 * it is the expanded size.
325 *
326 * Returns:
327 * slabhead The new hash bucket or NULL if the allocation failed.
328 */
329struct slabhead *
330hash_alloc(int *oldsize)
331{
332 struct slabhead *newhash;
333 int newsize;
334 int alloc;
335
336 /* We're just going to go to a power of two greater */
337 if (*oldsize) {
338 newsize = (*oldsize) * 2;
339 alloc = sizeof(newhash[0]) * newsize;
340 /* XXX Shouldn't be abusing DEVBUF here */
341 newhash = (struct slabhead *)malloc(alloc, M_DEVBUF, M_NOWAIT);
342 } else {
343 alloc = sizeof(newhash[0]) * UMA_HASH_SIZE_INIT;
344 newhash = uma_zalloc_internal(hashzone, NULL, M_WAITOK, NULL);
345 newsize = UMA_HASH_SIZE_INIT;
346 }
347 if (newhash)
348 bzero(newhash, alloc);
349
350 *oldsize = newsize;
351
352 return (newhash);
353}
354
355/*
356 * Expands the hash table for OFFPAGE zones. This is done from zone_timeout
357 * to reduce collisions. This must not be done in the regular allocation path,
358 * otherwise, we can recurse on the vm while allocating pages.
359 *
360 * Arguments:
361 * hash The hash you want to expand by a factor of two.
362 *
363 * Returns:
364 * Nothing
365 *
366 * Discussion:
367 */
368static void
369hash_expand(struct uma_hash *hash, struct slabhead *newhash, int newsize)
370{
371 struct slabhead *oldhash;
372 uma_slab_t slab;
373 int oldsize;
374 int hval;
375 int i;
376
377 if (!newhash)
378 return;
379
380 oldsize = hash->uh_hashsize;
381 oldhash = hash->uh_slab_hash;
382
383 if (oldsize >= newsize) {
384 hash_free(newhash, newsize);
385 return;
386 }
387
388 hash->uh_hashmask = newsize - 1;
389
390 /*
391 * I need to investigate hash algorithms for resizing without a
392 * full rehash.
393 */
394
395 for (i = 0; i < oldsize; i++)
396 while (!SLIST_EMPTY(&hash->uh_slab_hash[i])) {
397 slab = SLIST_FIRST(&hash->uh_slab_hash[i]);
398 SLIST_REMOVE_HEAD(&hash->uh_slab_hash[i], us_hlink);
399 hval = UMA_HASH(hash, slab->us_data);
400 SLIST_INSERT_HEAD(&newhash[hval], slab, us_hlink);
401 }
402
403 if (oldhash)
404 hash_free(oldhash, oldsize);
405
406 hash->uh_slab_hash = newhash;
407 hash->uh_hashsize = newsize;
408
409 return;
410}
411
412/*
413 * Free the hash bucket to the appropriate backing store.
414 *
415 * Arguments:
416 * slab_hash The hash bucket we're freeing
417 * hashsize The number of entries in that hash bucket
418 *
419 * Returns:
420 * Nothing
421 */
422static void
423hash_free(struct slabhead *slab_hash, int hashsize)
424{
425 if (hashsize == UMA_HASH_SIZE_INIT)
426 uma_zfree_internal(hashzone,
427 slab_hash, NULL, 0);
428 else
429 free(slab_hash, M_DEVBUF);
430}
431
432/*
433 * Frees all outstanding items in a bucket
434 *
435 * Arguments:
436 * zone The zone to free to, must be unlocked.
437 * bucket The free/alloc bucket with items, cpu queue must be locked.
438 *
439 * Returns:
440 * Nothing
441 */
442
443static void
444bucket_drain(uma_zone_t zone, uma_bucket_t bucket)
445{
446 uma_slab_t slab;
447 int mzone;
448 void *item;
449
450 if (bucket == NULL)
451 return;
452
453 slab = NULL;
454 mzone = 0;
455
456 /* We have to lookup the slab again for malloc.. */
457 if (zone->uz_flags & UMA_ZFLAG_MALLOC)
458 mzone = 1;
459
460 while (bucket->ub_ptr > -1) {
461 item = bucket->ub_bucket[bucket->ub_ptr];
462#ifdef INVARIANTS
463 bucket->ub_bucket[bucket->ub_ptr] = NULL;
464 KASSERT(item != NULL,
465 ("bucket_drain: botched ptr, item is NULL"));
466#endif
467 bucket->ub_ptr--;
468 /*
469 * This is extremely inefficient. The slab pointer was passed
470 * to uma_zfree_arg, but we lost it because the buckets don't
471 * hold them. This will go away when free() gets a size passed
472 * to it.
473 */
474 if (mzone)
475 slab = hash_sfind(mallochash,
476 (u_int8_t *)((unsigned long)item &
477 (~UMA_SLAB_MASK)));
478 uma_zfree_internal(zone, item, slab, 1);
479 }
480}
481
482/*
483 * Drains the per cpu caches for a zone.
484 *
485 * Arguments:
486 * zone The zone to drain, must be unlocked.
487 *
488 * Returns:
489 * Nothing
490 *
491 * This function returns with the zone locked so that the per cpu queues can
492 * not be filled until zone_drain is finished.
493 *
494 */
495static void
496cache_drain(uma_zone_t zone)
497{
498 uma_bucket_t bucket;
499 uma_cache_t cache;
500 int cpu;
501
502 /*
503 * Flush out the per cpu queues.
504 *
505 * XXX This causes unnecessary thrashing due to immediately having
506 * empty per cpu queues. I need to improve this.
507 */
508
509 /*
510 * We have to lock each cpu cache before locking the zone
511 */
512 ZONE_UNLOCK(zone);
513
514 for (cpu = 0; cpu < maxcpu; cpu++) {
515 if (CPU_ABSENT(cpu))
516 continue;
517 CPU_LOCK(zone, cpu);
518 cache = &zone->uz_cpu[cpu];
519 bucket_drain(zone, cache->uc_allocbucket);
520 bucket_drain(zone, cache->uc_freebucket);
521 }
522
523 /*
524 * Drain the bucket queues and free the buckets, we just keep two per
525 * cpu (alloc/free).
526 */
527 ZONE_LOCK(zone);
528 while ((bucket = LIST_FIRST(&zone->uz_full_bucket)) != NULL) {
529 LIST_REMOVE(bucket, ub_link);
530 ZONE_UNLOCK(zone);
531 bucket_drain(zone, bucket);
532 uma_zfree_internal(bucketzone, bucket, NULL, 0);
533 ZONE_LOCK(zone);
534 }
535
536 /* Now we do the free queue.. */
537 while ((bucket = LIST_FIRST(&zone->uz_free_bucket)) != NULL) {
538 LIST_REMOVE(bucket, ub_link);
539 uma_zfree_internal(bucketzone, bucket, NULL, 0);
540 }
541
542 /* We unlock here, but they will all block until the zone is unlocked */
543 for (cpu = 0; cpu < maxcpu; cpu++) {
544 if (CPU_ABSENT(cpu))
545 continue;
546 CPU_UNLOCK(zone, cpu);
547 }
548
549 zone->uz_cachefree = 0;
550}
551
552/*
553 * Frees pages from a zone back to the system. This is done on demand from
554 * the pageout daemon.
555 *
556 * Arguments:
557 * zone The zone to free pages from
558 * all Should we drain all items?
559 *
560 * Returns:
561 * Nothing.
562 */
563static void
564zone_drain(uma_zone_t zone)
565{
566 uma_slab_t slab;
567 uma_slab_t n;
568 u_int64_t extra;
569 u_int8_t flags;
570 u_int8_t *mem;
571 int i;
572
573 /*
574 * We don't want to take pages from staticly allocated zones at this
575 * time
576 */
577 if (zone->uz_flags & UMA_ZFLAG_NOFREE || zone->uz_freef == NULL)
578 return;
579
580 ZONE_LOCK(zone);
581
582 if (!(zone->uz_flags & UMA_ZFLAG_INTERNAL))
583 cache_drain(zone);
584
585 if (zone->uz_free < zone->uz_wssize)
586 goto finished;
587#ifdef UMA_DEBUG
588 printf("%s working set size: %llu free items: %u\n",
589 zone->uz_name, (unsigned long long)zone->uz_wssize, zone->uz_free);
590#endif
591 extra = zone->uz_free - zone->uz_wssize;
592 extra /= zone->uz_ipers;
593
594 /* extra is now the number of extra slabs that we can free */
595
596 if (extra == 0)
597 goto finished;
598
599 slab = LIST_FIRST(&zone->uz_free_slab);
600 while (slab && extra) {
601 n = LIST_NEXT(slab, us_link);
602
603 /* We have no where to free these to */
604 if (slab->us_flags & UMA_SLAB_BOOT) {
605 slab = n;
606 continue;
607 }
608
609 LIST_REMOVE(slab, us_link);
610 zone->uz_pages -= zone->uz_ppera;
611 zone->uz_free -= zone->uz_ipers;
612 if (zone->uz_fini)
613 for (i = 0; i < zone->uz_ipers; i++)
614 zone->uz_fini(
615 slab->us_data + (zone->uz_rsize * i),
616 zone->uz_size);
617 flags = slab->us_flags;
618 mem = slab->us_data;
619 if (zone->uz_flags & UMA_ZFLAG_OFFPAGE) {
620 if (zone->uz_flags & UMA_ZFLAG_MALLOC) {
621 UMA_HASH_REMOVE(mallochash,
622 slab, slab->us_data);
623 } else {
624 UMA_HASH_REMOVE(&zone->uz_hash,
625 slab, slab->us_data);
626 }
627 uma_zfree_internal(slabzone, slab, NULL, 0);
628 } else if (zone->uz_flags & UMA_ZFLAG_MALLOC)
629 UMA_HASH_REMOVE(mallochash, slab, slab->us_data);
630#ifdef UMA_DEBUG
631 printf("%s: Returning %d bytes.\n",
632 zone->uz_name, UMA_SLAB_SIZE * zone->uz_ppera);
633#endif
634 zone->uz_freef(mem, UMA_SLAB_SIZE * zone->uz_ppera, flags);
635
636 slab = n;
637 extra--;
638 }
639
640finished:
641 ZONE_UNLOCK(zone);
642}
643
644/*
645 * Allocate a new slab for a zone. This does not insert the slab onto a list.
646 *
647 * Arguments:
648 * zone The zone to allocate slabs for
649 * wait Shall we wait?
650 *
651 * Returns:
652 * The slab that was allocated or NULL if there is no memory and the
653 * caller specified M_NOWAIT.
654 *
655 */
656static uma_slab_t
657slab_zalloc(uma_zone_t zone, int wait)
658{
659 uma_slab_t slab; /* Starting slab */
660 u_int8_t *mem;
661 u_int8_t flags;
662 int i;
663
664 slab = NULL;
665
666#ifdef UMA_DEBUG
667 printf("slab_zalloc: Allocating a new slab for %s\n", zone->uz_name);
668#endif
669 ZONE_UNLOCK(zone);
670
671 if (zone->uz_flags & UMA_ZFLAG_OFFPAGE) {
672 slab = uma_zalloc_internal(slabzone, NULL, wait, NULL);
673 if (slab == NULL) {
674 ZONE_LOCK(zone);
675 return NULL;
676 }
677 }
678
679 if (booted || (zone->uz_flags & UMA_ZFLAG_PRIVALLOC)) {
680 mtx_lock(&Giant);
681 mem = zone->uz_allocf(zone,
682 zone->uz_ppera * UMA_SLAB_SIZE, &flags, wait);
683 mtx_unlock(&Giant);
684 if (mem == NULL) {
685 ZONE_LOCK(zone);
686 return (NULL);
687 }
688 } else {
689 uma_slab_t tmps;
690
691 if (zone->uz_ppera > 1)
692 panic("UMA: Attemping to allocate multiple pages before vm has started.\n");
693 if (zone->uz_flags & UMA_ZFLAG_MALLOC)
694 panic("Mallocing before uma_startup2 has been called.\n");
695 if (uma_boot_free == 0)
696 panic("UMA: Ran out of pre init pages, increase UMA_BOOT_PAGES\n");
697 tmps = LIST_FIRST(&uma_boot_pages);
698 LIST_REMOVE(tmps, us_link);
699 uma_boot_free--;
700 mem = tmps->us_data;
701 }
702
703 ZONE_LOCK(zone);
704
705 /* Alloc slab structure for offpage, otherwise adjust it's position */
706 if (!(zone->uz_flags & UMA_ZFLAG_OFFPAGE)) {
707 slab = (uma_slab_t )(mem + zone->uz_pgoff);
708 } else {
709 if (!(zone->uz_flags & UMA_ZFLAG_MALLOC))
710 UMA_HASH_INSERT(&zone->uz_hash, slab, mem);
711 }
712 if (zone->uz_flags & UMA_ZFLAG_MALLOC) {
713#ifdef UMA_DEBUG
714 printf("Inserting %p into malloc hash from slab %p\n",
715 mem, slab);
716#endif
717 /* XXX Yikes! No lock on the malloc hash! */
718 UMA_HASH_INSERT(mallochash, slab, mem);
719 }
720
721 slab->us_zone = zone;
722 slab->us_data = mem;
723
724 /*
725 * This is intended to spread data out across cache lines.
726 *
727 * This code doesn't seem to work properly on x86, and on alpha
728 * it makes absolutely no performance difference. I'm sure it could
729 * use some tuning, but sun makes outrageous claims about it's
730 * performance.
731 */
732#if 0
733 if (zone->uz_cachemax) {
734 slab->us_data += zone->uz_cacheoff;
735 zone->uz_cacheoff += UMA_CACHE_INC;
736 if (zone->uz_cacheoff > zone->uz_cachemax)
737 zone->uz_cacheoff = 0;
738 }
739#endif
740
741 slab->us_freecount = zone->uz_ipers;
742 slab->us_firstfree = 0;
743 slab->us_flags = flags;
744 for (i = 0; i < zone->uz_ipers; i++)
745 slab->us_freelist[i] = i+1;
746
747 if (zone->uz_init)
748 for (i = 0; i < zone->uz_ipers; i++)
749 zone->uz_init(slab->us_data + (zone->uz_rsize * i),
750 zone->uz_size);
751
752 zone->uz_pages += zone->uz_ppera;
753 zone->uz_free += zone->uz_ipers;
754
755 return (slab);
756}
757
758/*
759 * Allocates a number of pages from the system
760 *
761 * Arguments:
762 * zone Unused
763 * bytes The number of bytes requested
764 * wait Shall we wait?
765 *
766 * Returns:
767 * A pointer to the alloced memory or possibly
768 * NULL if M_NOWAIT is set.
769 */
770static void *
771page_alloc(uma_zone_t zone, int bytes, u_int8_t *pflag, int wait)
772{
773 void *p; /* Returned page */
774
775 /*
776 * XXX The original zone allocator did this, but I don't think it's
777 * necessary in current.
778 */
779
780 if (lockstatus(&kernel_map->lock, NULL)) {
781 *pflag = UMA_SLAB_KMEM;
782 p = (void *) kmem_malloc(kmem_map, bytes, wait);
783 } else {
784 *pflag = UMA_SLAB_KMAP;
785 p = (void *) kmem_alloc(kernel_map, bytes);
786 }
787
788 return (p);
789}
790
791/*
792 * Allocates a number of pages from within an object
793 *
794 * Arguments:
795 * zone Unused
796 * bytes The number of bytes requested
797 * wait Shall we wait?
798 *
799 * Returns:
800 * A pointer to the alloced memory or possibly
801 * NULL if M_NOWAIT is set.
802 */
803static void *
804obj_alloc(uma_zone_t zone, int bytes, u_int8_t *flags, int wait)
805{
806 vm_offset_t zkva;
807 vm_offset_t retkva;
808 vm_page_t p;
809 int pages;
810
811 retkva = NULL;
812 pages = zone->uz_pages;
813
814 /*
815 * This looks a little weird since we're getting one page at a time
816 */
817 while (bytes > 0) {
818 p = vm_page_alloc(zone->uz_obj, pages,
819 VM_ALLOC_INTERRUPT);
820 if (p == NULL)
821 return (NULL);
822
823 zkva = zone->uz_kva + pages * PAGE_SIZE;
824 if (retkva == NULL)
825 retkva = zkva;
826 pmap_qenter(zkva, &p, 1);
827 bytes -= PAGE_SIZE;
828 pages += 1;
829 }
830
831 *flags = UMA_SLAB_PRIV;
832
833 return ((void *)retkva);
834}
835
836/*
837 * Frees a number of pages to the system
838 *
839 * Arguments:
840 * mem A pointer to the memory to be freed
841 * size The size of the memory being freed
842 * flags The original p->us_flags field
843 *
844 * Returns:
845 * Nothing
846 *
847 */
848static void
849page_free(void *mem, int size, u_int8_t flags)
850{
851 vm_map_t map;
852 if (flags & UMA_SLAB_KMEM)
853 map = kmem_map;
854 else if (flags & UMA_SLAB_KMAP)
855 map = kernel_map;
856 else
857 panic("UMA: page_free used with invalid flags %d\n", flags);
858
859 kmem_free(map, (vm_offset_t)mem, size);
860}
861
862/*
863 * Zero fill initializer
864 *
865 * Arguments/Returns follow uma_init specifications
866 *
867 */
868static void
869zero_init(void *mem, int size)
870{
871 bzero(mem, size);
872}
873
874/*
875 * Finish creating a small uma zone. This calculates ipers, and the zone size.
876 *
877 * Arguments
878 * zone The zone we should initialize
879 *
880 * Returns
881 * Nothing
882 */
883static void
884zone_small_init(uma_zone_t zone)
885{
886 int rsize;
887 int memused;
888 int ipers;
889
890 rsize = zone->uz_size;
891
892 if (rsize < UMA_SMALLEST_UNIT)
893 rsize = UMA_SMALLEST_UNIT;
894
895 if (rsize & zone->uz_align)
896 rsize = (rsize & ~zone->uz_align) + (zone->uz_align + 1);
897
898 zone->uz_rsize = rsize;
899
900 rsize += 1; /* Account for the byte of linkage */
901 zone->uz_ipers = (UMA_SLAB_SIZE - sizeof(struct uma_slab)) / rsize;
902 zone->uz_ppera = 1;
903
904 memused = zone->uz_ipers * zone->uz_rsize;
905
906 /* Can we do any better? */
907 if ((UMA_SLAB_SIZE - memused) >= UMA_MAX_WASTE) {
908 if (zone->uz_flags & UMA_ZFLAG_INTERNAL)
909 return;
910 ipers = UMA_SLAB_SIZE / zone->uz_rsize;
911 if (ipers > zone->uz_ipers) {
912 zone->uz_flags |= UMA_ZFLAG_OFFPAGE;
913 zone->uz_ipers = ipers;
914 }
915 }
916
917}
918
919/*
920 * Finish creating a large (> UMA_SLAB_SIZE) uma zone. Just give in and do
921 * OFFPAGE for now. When I can allow for more dynamic slab sizes this will be
922 * more complicated.
923 *
924 * Arguments
925 * zone The zone we should initialize
926 *
927 * Returns
928 * Nothing
929 */
930static void
931zone_large_init(uma_zone_t zone)
932{
933 int pages;
934
935 pages = zone->uz_size / UMA_SLAB_SIZE;
936
937 /* Account for remainder */
938 if ((pages * UMA_SLAB_SIZE) < zone->uz_size)
939 pages++;
940
941 zone->uz_ppera = pages;
942 zone->uz_ipers = 1;
943
944 zone->uz_flags |= UMA_ZFLAG_OFFPAGE;
945 zone->uz_rsize = zone->uz_size;
946}
947
948/*
949 * Zone header ctor. This initializes all fields, locks, etc. And inserts
950 * the zone onto the global zone list.
951 *
952 * Arguments/Returns follow uma_ctor specifications
953 * udata Actually uma_zcreat_args
954 *
955 */
956
957static void
958zone_ctor(void *mem, int size, void *udata)
959{
960 struct uma_zctor_args *arg = udata;
961 uma_zone_t zone = mem;
962 int privlc;
963 int cplen;
964 int cpu;
965
966 bzero(zone, size);
967 zone->uz_name = arg->name;
968 zone->uz_size = arg->size;
969 zone->uz_ctor = arg->ctor;
970 zone->uz_dtor = arg->dtor;
971 zone->uz_init = arg->uminit;
972 zone->uz_align = arg->align;
973 zone->uz_free = 0;
974 zone->uz_pages = 0;
975 zone->uz_flags = 0;
976 zone->uz_allocf = page_alloc;
977 zone->uz_freef = page_free;
978
979 if (arg->flags & UMA_ZONE_ZINIT)
980 zone->uz_init = zero_init;
981
982 if (arg->flags & UMA_ZONE_INTERNAL)
983 zone->uz_flags |= UMA_ZFLAG_INTERNAL;
984
985 if (arg->flags & UMA_ZONE_MALLOC)
986 zone->uz_flags |= UMA_ZFLAG_MALLOC;
987
988 if (arg->flags & UMA_ZONE_NOFREE)
989 zone->uz_flags |= UMA_ZFLAG_NOFREE;
990
991 if (zone->uz_size > UMA_SLAB_SIZE)
992 zone_large_init(zone);
993 else
994 zone_small_init(zone);
995
996 if (arg->flags & UMA_ZONE_MTXCLASS)
997 privlc = 1;
998 else
999 privlc = 0;
1000
1001 /* We do this so that the per cpu lock name is unique for each zone */
1002 memcpy(zone->uz_lname, "PCPU ", 5);
1003 cplen = min(strlen(zone->uz_name) + 1, LOCKNAME_LEN - 6);
1004 memcpy(zone->uz_lname+5, zone->uz_name, cplen);
1005 zone->uz_lname[LOCKNAME_LEN - 1] = '\0';
1006
1007 /*
1008 * If we're putting the slab header in the actual page we need to
1009 * figure out where in each page it goes. This calculates a right
1010 * justified offset into the memory on a ALIGN_PTR boundary.
1011 */
1012 if (!(zone->uz_flags & UMA_ZFLAG_OFFPAGE)) {
1013 int totsize;
1014 int waste;
1015
1016 /* Size of the slab struct and free list */
1017 totsize = sizeof(struct uma_slab) + zone->uz_ipers;
1018 if (totsize & UMA_ALIGN_PTR)
1019 totsize = (totsize & ~UMA_ALIGN_PTR) +
1020 (UMA_ALIGN_PTR + 1);
1021 zone->uz_pgoff = UMA_SLAB_SIZE - totsize;
1022
1023 waste = zone->uz_pgoff;
1024 waste -= (zone->uz_ipers * zone->uz_rsize);
1025
1026 /*
1027 * This calculates how much space we have for cache line size
1028 * optimizations. It works by offseting each slab slightly.
1029 * Currently it breaks on x86, and so it is disabled.
1030 */
1031
1032 if (zone->uz_align < UMA_CACHE_INC && waste > UMA_CACHE_INC) {
1033 zone->uz_cachemax = waste - UMA_CACHE_INC;
1034 zone->uz_cacheoff = 0;
1035 }
1036
1037 totsize = zone->uz_pgoff + sizeof(struct uma_slab)
1038 + zone->uz_ipers;
1039 /* I don't think it's possible, but I'll make sure anyway */
1040 if (totsize > UMA_SLAB_SIZE) {
1041 printf("zone %s ipers %d rsize %d size %d\n",
1042 zone->uz_name, zone->uz_ipers, zone->uz_rsize,
1043 zone->uz_size);
1044 panic("UMA slab won't fit.\n");
1045 }
1046 } else {
1047 struct slabhead *newhash;
1048 int hashsize;
1049
1050 hashsize = 0;
1051 newhash = hash_alloc(&hashsize);
1052 hash_expand(&zone->uz_hash, newhash, hashsize);
1053 zone->uz_pgoff = 0;
1054 }
1055
1056#ifdef UMA_DEBUG
1057 printf("%s(%p) size = %d ipers = %d ppera = %d pgoff = %d\n",
1058 zone->uz_name, zone,
1059 zone->uz_size, zone->uz_ipers,
1060 zone->uz_ppera, zone->uz_pgoff);
1061#endif
1062 ZONE_LOCK_INIT(zone, privlc);
1063
1064 mtx_lock(&uma_mtx);
1065 LIST_INSERT_HEAD(&uma_zones, zone, uz_link);
1066 mtx_unlock(&uma_mtx);
1067
1068 /*
1069 * Some internal zones don't have room allocated for the per cpu
1070 * caches. If we're internal, bail out here.
1071 */
1072
1073 if (zone->uz_flags & UMA_ZFLAG_INTERNAL)
1074 return;
1075
1076 if (zone->uz_ipers < UMA_BUCKET_SIZE)
1077 zone->uz_count = zone->uz_ipers - 1;
1078 else
1079 zone->uz_count = UMA_BUCKET_SIZE - 1;
1080
1081 for (cpu = 0; cpu < maxcpu; cpu++)
1082 CPU_LOCK_INIT(zone, cpu, privlc);
1083}
1084
1085/*
1086 * Zone header dtor. This frees all data, destroys locks, frees the hash table
1087 * and removes the zone from the global list.
1088 *
1089 * Arguments/Returns follow uma_dtor specifications
1090 * udata unused
1091 */
1092
1093static void
1094zone_dtor(void *arg, int size, void *udata)
1095{
1096 uma_zone_t zone;
1097 int cpu;
1098
1099 zone = (uma_zone_t)arg;
1100
1101 mtx_lock(&uma_mtx);
1102 LIST_REMOVE(zone, uz_link);
1103 mtx_unlock(&uma_mtx);
1104
1105 ZONE_LOCK(zone);
1106 zone->uz_wssize = 0;
1107 ZONE_UNLOCK(zone);
1108
1109 zone_drain(zone);
1110 ZONE_LOCK(zone);
1111 if (zone->uz_free != 0)
1112 printf("Zone %s was not empty. Lost %d pages of memory.\n",
1113 zone->uz_name, zone->uz_pages);
1114
1115 if ((zone->uz_flags & UMA_ZFLAG_INTERNAL) != 0)
1116 for (cpu = 0; cpu < maxcpu; cpu++)
1117 CPU_LOCK_FINI(zone, cpu);
1118
1119 if ((zone->uz_flags & UMA_ZFLAG_OFFPAGE) != 0)
1120 hash_free(zone->uz_hash.uh_slab_hash,
1121 zone->uz_hash.uh_hashsize);
1122
1123 ZONE_UNLOCK(zone);
1124 ZONE_LOCK_FINI(zone);
1125}
1126/*
1127 * Traverses every zone in the system and calls a callback
1128 *
1129 * Arguments:
1130 * zfunc A pointer to a function which accepts a zone
1131 * as an argument.
1132 *
1133 * Returns:
1134 * Nothing
1135 */
1136static void
1137zone_foreach(void (*zfunc)(uma_zone_t))
1138{
1139 uma_zone_t zone;
1140
1141 mtx_lock(&uma_mtx);
1142 LIST_FOREACH(zone, &uma_zones, uz_link) {
1143 zfunc(zone);
1144 }
1145 mtx_unlock(&uma_mtx);
1146}
1147
1148/* Public functions */
1149/* See uma.h */
1150void
1151uma_startup(void *bootmem)
1152{
1153 struct uma_zctor_args args;
1154 uma_slab_t slab;
1155 int slabsize;
1156 int i;
1157
1158#ifdef UMA_DEBUG
1159 printf("Creating uma zone headers zone.\n");
1160#endif
1161#ifdef SMP
1162 maxcpu = mp_maxid + 1;
1163#else
1164 maxcpu = 1;
1165#endif
1166#ifdef UMA_DEBUG
1167 printf("Max cpu = %d, mp_maxid = %d\n", maxcpu, mp_maxid);
1168 Debugger("stop");
1169#endif
1170 mtx_init(&uma_mtx, "UMA lock", NULL, MTX_DEF);
1171 /* "manually" Create the initial zone */
1172 args.name = "UMA Zones";
1173 args.size = sizeof(struct uma_zone) +
1174 (sizeof(struct uma_cache) * (maxcpu - 1));
1175 args.ctor = zone_ctor;
1176 args.dtor = zone_dtor;
1177 args.uminit = zero_init;
1178 args.fini = NULL;
1179 args.align = 32 - 1;
1180 args.flags = UMA_ZONE_INTERNAL;
1181 /* The initial zone has no Per cpu queues so it's smaller */
1182 zone_ctor(zones, sizeof(struct uma_zone), &args);
1183
1184#ifdef UMA_DEBUG
1185 printf("Filling boot free list.\n");
1186#endif
1187 for (i = 0; i < UMA_BOOT_PAGES; i++) {
1188 slab = (uma_slab_t)((u_int8_t *)bootmem + (i * UMA_SLAB_SIZE));
1189 slab->us_data = (u_int8_t *)slab;
1190 slab->us_flags = UMA_SLAB_BOOT;
1191 LIST_INSERT_HEAD(&uma_boot_pages, slab, us_link);
1192 uma_boot_free++;
1193 }
1194
1195#ifdef UMA_DEBUG
1196 printf("Creating slab zone.\n");
1197#endif
1198
1199 /*
1200 * This is the max number of free list items we'll have with
1201 * offpage slabs.
1202 */
1203
1204 slabsize = UMA_SLAB_SIZE - sizeof(struct uma_slab);
1205 slabsize /= UMA_MAX_WASTE;
1206 slabsize++; /* In case there it's rounded */
1207 slabsize += sizeof(struct uma_slab);
1208
1209 /* Now make a zone for slab headers */
1210 slabzone = uma_zcreate("UMA Slabs",
1211 slabsize,
1212 NULL, NULL, NULL, NULL,
1213 UMA_ALIGN_PTR, UMA_ZONE_INTERNAL);
1214
1215 hashzone = uma_zcreate("UMA Hash",
1216 sizeof(struct slabhead *) * UMA_HASH_SIZE_INIT,
1217 NULL, NULL, NULL, NULL,
1218 UMA_ALIGN_PTR, UMA_ZONE_INTERNAL);
1219
1220 bucketzone = uma_zcreate("UMA Buckets", sizeof(struct uma_bucket),
1221 NULL, NULL, NULL, NULL,
1222 UMA_ALIGN_PTR, UMA_ZONE_INTERNAL);
1223
1224
1225#ifdef UMA_DEBUG
1226 printf("UMA startup complete.\n");
1227#endif
1228}
1229
1230/* see uma.h */
1231void
1232uma_startup2(void *hashmem, u_long elems)
1233{
1234 bzero(hashmem, elems * sizeof(void *));
1235 mallochash->uh_slab_hash = hashmem;
1236 mallochash->uh_hashsize = elems;
1237 mallochash->uh_hashmask = elems - 1;
1238 booted = 1;
1239 bucket_enable();
1240#ifdef UMA_DEBUG
1241 printf("UMA startup2 complete.\n");
1242#endif
1243}
1244
1245/*
1246 * Initialize our callout handle
1247 *
1248 */
1249
1250static void
1251uma_startup3(void)
1252{
1253#ifdef UMA_DEBUG
1254 printf("Starting callout.\n");
1255#endif
1256 callout_init(&uma_callout, 0);
1257 callout_reset(&uma_callout, UMA_WORKING_TIME * hz, uma_timeout, NULL);
1258#ifdef UMA_DEBUG
1259 printf("UMA startup3 complete.\n");
1260#endif
1261}
1262
1263/* See uma.h */
1264uma_zone_t
1265uma_zcreate(char *name, int size, uma_ctor ctor, uma_dtor dtor, uma_init uminit,
1266 uma_fini fini, int align, u_int16_t flags)
1267
1268{
1269 struct uma_zctor_args args;
1270
1271 /* This stuff is essential for the zone ctor */
1272 args.name = name;
1273 args.size = size;
1274 args.ctor = ctor;
1275 args.dtor = dtor;
1276 args.uminit = uminit;
1277 args.fini = fini;
1278 args.align = align;
1279 args.flags = flags;
1280
1281 return (uma_zalloc_internal(zones, &args, M_WAITOK, NULL));
1282}
1283
1284/* See uma.h */
1285void
1286uma_zdestroy(uma_zone_t zone)
1287{
1288 uma_zfree_internal(zones, zone, NULL, 0);
1289}
1290
1291/* See uma.h */
1292void *
27 *
28 */
29
30/*
31 * uma_core.c Implementation of the Universal Memory allocator
32 *
33 * This allocator is intended to replace the multitude of similar object caches
34 * in the standard FreeBSD kernel. The intent is to be flexible as well as
35 * effecient. A primary design goal is to return unused memory to the rest of
36 * the system. This will make the system as a whole more flexible due to the
37 * ability to move memory to subsystems which most need it instead of leaving
38 * pools of reserved memory unused.
39 *
40 * The basic ideas stem from similar slab/zone based allocators whose algorithms
41 * are well known.
42 *
43 */
44
45/*
46 * TODO:
47 * - Improve memory usage for large allocations
48 * - Improve INVARIANTS (0xdeadc0de write out)
49 * - Investigate cache size adjustments
50 */
51
52/* I should really use ktr.. */
53/*
54#define UMA_DEBUG 1
55#define UMA_DEBUG_ALLOC 1
56#define UMA_DEBUG_ALLOC_1 1
57*/
58
59
60#include "opt_param.h"
61#include <sys/param.h>
62#include <sys/systm.h>
63#include <sys/kernel.h>
64#include <sys/types.h>
65#include <sys/queue.h>
66#include <sys/malloc.h>
67#include <sys/lock.h>
68#include <sys/sysctl.h>
69#include <sys/mutex.h>
70#include <sys/smp.h>
71#include <sys/vmmeter.h>
72
73#include <machine/types.h>
74
75#include <vm/vm.h>
76#include <vm/vm_object.h>
77#include <vm/vm_page.h>
78#include <vm/vm_param.h>
79#include <vm/vm_map.h>
80#include <vm/vm_kern.h>
81#include <vm/vm_extern.h>
82#include <vm/uma.h>
83#include <vm/uma_int.h>
84
85/*
86 * This is the zone from which all zones are spawned. The idea is that even
87 * the zone heads are allocated from the allocator, so we use the bss section
88 * to bootstrap us.
89 */
90static struct uma_zone masterzone;
91static uma_zone_t zones = &masterzone;
92
93/* This is the zone from which all of uma_slab_t's are allocated. */
94static uma_zone_t slabzone;
95
96/*
97 * The initial hash tables come out of this zone so they can be allocated
98 * prior to malloc coming up.
99 */
100static uma_zone_t hashzone;
101
102/*
103 * Zone that buckets come from.
104 */
105static uma_zone_t bucketzone;
106
107/*
108 * Are we allowed to allocate buckets?
109 */
110static int bucketdisable = 1;
111
112/* Linked list of all zones in the system */
113static LIST_HEAD(,uma_zone) uma_zones = LIST_HEAD_INITIALIZER(&uma_zones);
114
115/* This mutex protects the zone list */
116static struct mtx uma_mtx;
117
118/* Linked list of boot time pages */
119static LIST_HEAD(,uma_slab) uma_boot_pages =
120 LIST_HEAD_INITIALIZER(&uma_boot_pages);
121
122/* Count of free boottime pages */
123static int uma_boot_free = 0;
124
125/* Is the VM done starting up? */
126static int booted = 0;
127
128/* This is the handle used to schedule our working set calculator */
129static struct callout uma_callout;
130
131/* This is mp_maxid + 1, for use while looping over each cpu */
132static int maxcpu;
133
134/*
135 * This structure is passed as the zone ctor arg so that I don't have to create
136 * a special allocation function just for zones.
137 */
138struct uma_zctor_args {
139 char *name;
140 int size;
141 uma_ctor ctor;
142 uma_dtor dtor;
143 uma_init uminit;
144 uma_fini fini;
145 int align;
146 u_int16_t flags;
147};
148
149/*
150 * This is the malloc hash table which is used to find the zone that a
151 * malloc allocation came from. It is not currently resizeable. The
152 * memory for the actual hash bucket is allocated in kmeminit.
153 */
154struct uma_hash mhash;
155struct uma_hash *mallochash = &mhash;
156
157/* Prototypes.. */
158
159static void *obj_alloc(uma_zone_t, int, u_int8_t *, int);
160static void *page_alloc(uma_zone_t, int, u_int8_t *, int);
161static void page_free(void *, int, u_int8_t);
162static uma_slab_t slab_zalloc(uma_zone_t, int);
163static void cache_drain(uma_zone_t);
164static void bucket_drain(uma_zone_t, uma_bucket_t);
165static void zone_drain(uma_zone_t);
166static void zone_ctor(void *, int, void *);
167static void zone_dtor(void *, int, void *);
168static void zero_init(void *, int);
169static void zone_small_init(uma_zone_t zone);
170static void zone_large_init(uma_zone_t zone);
171static void zone_foreach(void (*zfunc)(uma_zone_t));
172static void zone_timeout(uma_zone_t zone);
173static struct slabhead *hash_alloc(int *);
174static void hash_expand(struct uma_hash *, struct slabhead *, int);
175static void hash_free(struct slabhead *hash, int hashsize);
176static void uma_timeout(void *);
177static void uma_startup3(void);
178static void *uma_zalloc_internal(uma_zone_t, void *, int, uma_bucket_t);
179static void uma_zfree_internal(uma_zone_t, void *, void *, int);
180static void bucket_enable(void);
181void uma_print_zone(uma_zone_t);
182void uma_print_stats(void);
183static int sysctl_vm_zone(SYSCTL_HANDLER_ARGS);
184
185SYSCTL_OID(_vm, OID_AUTO, zone, CTLTYPE_STRING|CTLFLAG_RD,
186 NULL, 0, sysctl_vm_zone, "A", "Zone Info");
187SYSINIT(uma_startup3, SI_SUB_VM_CONF, SI_ORDER_SECOND, uma_startup3, NULL);
188
189/*
190 * This routine checks to see whether or not it's safe to enable buckets.
191 */
192
193static void
194bucket_enable(void)
195{
196 if (cnt.v_free_count < cnt.v_free_min)
197 bucketdisable = 1;
198 else
199 bucketdisable = 0;
200}
201
202
203/*
204 * Routine called by timeout which is used to fire off some time interval
205 * based calculations. (working set, stats, etc.)
206 *
207 * Arguments:
208 * arg Unused
209 *
210 * Returns:
211 * Nothing
212 */
213static void
214uma_timeout(void *unused)
215{
216 bucket_enable();
217 zone_foreach(zone_timeout);
218
219 /* Reschedule this event */
220 callout_reset(&uma_callout, UMA_WORKING_TIME * hz, uma_timeout, NULL);
221}
222
223/*
224 * Routine to perform timeout driven calculations. This does the working set
225 * as well as hash expanding, and per cpu statistics aggregation.
226 *
227 * Arguments:
228 * zone The zone to operate on
229 *
230 * Returns:
231 * Nothing
232 */
233static void
234zone_timeout(uma_zone_t zone)
235{
236 uma_cache_t cache;
237 u_int64_t alloc;
238 int free;
239 int cpu;
240
241 alloc = 0;
242 free = 0;
243
244 /*
245 * Aggregate per cpu cache statistics back to the zone.
246 *
247 * I may rewrite this to set a flag in the per cpu cache instead of
248 * locking. If the flag is not cleared on the next round I will have
249 * to lock and do it here instead so that the statistics don't get too
250 * far out of sync.
251 */
252 if (!(zone->uz_flags & UMA_ZFLAG_INTERNAL)) {
253 for (cpu = 0; cpu < maxcpu; cpu++) {
254 if (CPU_ABSENT(cpu))
255 continue;
256 CPU_LOCK(zone, cpu);
257 cache = &zone->uz_cpu[cpu];
258 /* Add them up, and reset */
259 alloc += cache->uc_allocs;
260 cache->uc_allocs = 0;
261 if (cache->uc_allocbucket)
262 free += cache->uc_allocbucket->ub_ptr + 1;
263 if (cache->uc_freebucket)
264 free += cache->uc_freebucket->ub_ptr + 1;
265 CPU_UNLOCK(zone, cpu);
266 }
267 }
268
269 /* Now push these stats back into the zone.. */
270 ZONE_LOCK(zone);
271 zone->uz_allocs += alloc;
272
273 /*
274 * cachefree is an instantanious snapshot of what is in the per cpu
275 * caches, not an accurate counter
276 */
277 zone->uz_cachefree = free;
278
279 /*
280 * Expand the zone hash table.
281 *
282 * This is done if the number of slabs is larger than the hash size.
283 * What I'm trying to do here is completely reduce collisions. This
284 * may be a little aggressive. Should I allow for two collisions max?
285 */
286
287 if ((zone->uz_flags & UMA_ZFLAG_OFFPAGE) &&
288 !(zone->uz_flags & UMA_ZFLAG_MALLOC)) {
289 if (zone->uz_pages / zone->uz_ppera
290 >= zone->uz_hash.uh_hashsize) {
291 struct slabhead *newhash;
292 int newsize;
293
294 newsize = zone->uz_hash.uh_hashsize;
295 ZONE_UNLOCK(zone);
296 newhash = hash_alloc(&newsize);
297 ZONE_LOCK(zone);
298 hash_expand(&zone->uz_hash, newhash, newsize);
299 }
300 }
301
302 /*
303 * Here we compute the working set size as the total number of items
304 * left outstanding since the last time interval. This is slightly
305 * suboptimal. What we really want is the highest number of outstanding
306 * items during the last time quantum. This should be close enough.
307 *
308 * The working set size is used to throttle the zone_drain function.
309 * We don't want to return memory that we may need again immediately.
310 */
311 alloc = zone->uz_allocs - zone->uz_oallocs;
312 zone->uz_oallocs = zone->uz_allocs;
313 zone->uz_wssize = alloc;
314
315 ZONE_UNLOCK(zone);
316}
317
318/*
319 * Allocate and zero fill the next sized hash table from the appropriate
320 * backing store.
321 *
322 * Arguments:
323 * oldsize On input it's the size we're currently at and on output
324 * it is the expanded size.
325 *
326 * Returns:
327 * slabhead The new hash bucket or NULL if the allocation failed.
328 */
329struct slabhead *
330hash_alloc(int *oldsize)
331{
332 struct slabhead *newhash;
333 int newsize;
334 int alloc;
335
336 /* We're just going to go to a power of two greater */
337 if (*oldsize) {
338 newsize = (*oldsize) * 2;
339 alloc = sizeof(newhash[0]) * newsize;
340 /* XXX Shouldn't be abusing DEVBUF here */
341 newhash = (struct slabhead *)malloc(alloc, M_DEVBUF, M_NOWAIT);
342 } else {
343 alloc = sizeof(newhash[0]) * UMA_HASH_SIZE_INIT;
344 newhash = uma_zalloc_internal(hashzone, NULL, M_WAITOK, NULL);
345 newsize = UMA_HASH_SIZE_INIT;
346 }
347 if (newhash)
348 bzero(newhash, alloc);
349
350 *oldsize = newsize;
351
352 return (newhash);
353}
354
355/*
356 * Expands the hash table for OFFPAGE zones. This is done from zone_timeout
357 * to reduce collisions. This must not be done in the regular allocation path,
358 * otherwise, we can recurse on the vm while allocating pages.
359 *
360 * Arguments:
361 * hash The hash you want to expand by a factor of two.
362 *
363 * Returns:
364 * Nothing
365 *
366 * Discussion:
367 */
368static void
369hash_expand(struct uma_hash *hash, struct slabhead *newhash, int newsize)
370{
371 struct slabhead *oldhash;
372 uma_slab_t slab;
373 int oldsize;
374 int hval;
375 int i;
376
377 if (!newhash)
378 return;
379
380 oldsize = hash->uh_hashsize;
381 oldhash = hash->uh_slab_hash;
382
383 if (oldsize >= newsize) {
384 hash_free(newhash, newsize);
385 return;
386 }
387
388 hash->uh_hashmask = newsize - 1;
389
390 /*
391 * I need to investigate hash algorithms for resizing without a
392 * full rehash.
393 */
394
395 for (i = 0; i < oldsize; i++)
396 while (!SLIST_EMPTY(&hash->uh_slab_hash[i])) {
397 slab = SLIST_FIRST(&hash->uh_slab_hash[i]);
398 SLIST_REMOVE_HEAD(&hash->uh_slab_hash[i], us_hlink);
399 hval = UMA_HASH(hash, slab->us_data);
400 SLIST_INSERT_HEAD(&newhash[hval], slab, us_hlink);
401 }
402
403 if (oldhash)
404 hash_free(oldhash, oldsize);
405
406 hash->uh_slab_hash = newhash;
407 hash->uh_hashsize = newsize;
408
409 return;
410}
411
412/*
413 * Free the hash bucket to the appropriate backing store.
414 *
415 * Arguments:
416 * slab_hash The hash bucket we're freeing
417 * hashsize The number of entries in that hash bucket
418 *
419 * Returns:
420 * Nothing
421 */
422static void
423hash_free(struct slabhead *slab_hash, int hashsize)
424{
425 if (hashsize == UMA_HASH_SIZE_INIT)
426 uma_zfree_internal(hashzone,
427 slab_hash, NULL, 0);
428 else
429 free(slab_hash, M_DEVBUF);
430}
431
432/*
433 * Frees all outstanding items in a bucket
434 *
435 * Arguments:
436 * zone The zone to free to, must be unlocked.
437 * bucket The free/alloc bucket with items, cpu queue must be locked.
438 *
439 * Returns:
440 * Nothing
441 */
442
443static void
444bucket_drain(uma_zone_t zone, uma_bucket_t bucket)
445{
446 uma_slab_t slab;
447 int mzone;
448 void *item;
449
450 if (bucket == NULL)
451 return;
452
453 slab = NULL;
454 mzone = 0;
455
456 /* We have to lookup the slab again for malloc.. */
457 if (zone->uz_flags & UMA_ZFLAG_MALLOC)
458 mzone = 1;
459
460 while (bucket->ub_ptr > -1) {
461 item = bucket->ub_bucket[bucket->ub_ptr];
462#ifdef INVARIANTS
463 bucket->ub_bucket[bucket->ub_ptr] = NULL;
464 KASSERT(item != NULL,
465 ("bucket_drain: botched ptr, item is NULL"));
466#endif
467 bucket->ub_ptr--;
468 /*
469 * This is extremely inefficient. The slab pointer was passed
470 * to uma_zfree_arg, but we lost it because the buckets don't
471 * hold them. This will go away when free() gets a size passed
472 * to it.
473 */
474 if (mzone)
475 slab = hash_sfind(mallochash,
476 (u_int8_t *)((unsigned long)item &
477 (~UMA_SLAB_MASK)));
478 uma_zfree_internal(zone, item, slab, 1);
479 }
480}
481
482/*
483 * Drains the per cpu caches for a zone.
484 *
485 * Arguments:
486 * zone The zone to drain, must be unlocked.
487 *
488 * Returns:
489 * Nothing
490 *
491 * This function returns with the zone locked so that the per cpu queues can
492 * not be filled until zone_drain is finished.
493 *
494 */
495static void
496cache_drain(uma_zone_t zone)
497{
498 uma_bucket_t bucket;
499 uma_cache_t cache;
500 int cpu;
501
502 /*
503 * Flush out the per cpu queues.
504 *
505 * XXX This causes unnecessary thrashing due to immediately having
506 * empty per cpu queues. I need to improve this.
507 */
508
509 /*
510 * We have to lock each cpu cache before locking the zone
511 */
512 ZONE_UNLOCK(zone);
513
514 for (cpu = 0; cpu < maxcpu; cpu++) {
515 if (CPU_ABSENT(cpu))
516 continue;
517 CPU_LOCK(zone, cpu);
518 cache = &zone->uz_cpu[cpu];
519 bucket_drain(zone, cache->uc_allocbucket);
520 bucket_drain(zone, cache->uc_freebucket);
521 }
522
523 /*
524 * Drain the bucket queues and free the buckets, we just keep two per
525 * cpu (alloc/free).
526 */
527 ZONE_LOCK(zone);
528 while ((bucket = LIST_FIRST(&zone->uz_full_bucket)) != NULL) {
529 LIST_REMOVE(bucket, ub_link);
530 ZONE_UNLOCK(zone);
531 bucket_drain(zone, bucket);
532 uma_zfree_internal(bucketzone, bucket, NULL, 0);
533 ZONE_LOCK(zone);
534 }
535
536 /* Now we do the free queue.. */
537 while ((bucket = LIST_FIRST(&zone->uz_free_bucket)) != NULL) {
538 LIST_REMOVE(bucket, ub_link);
539 uma_zfree_internal(bucketzone, bucket, NULL, 0);
540 }
541
542 /* We unlock here, but they will all block until the zone is unlocked */
543 for (cpu = 0; cpu < maxcpu; cpu++) {
544 if (CPU_ABSENT(cpu))
545 continue;
546 CPU_UNLOCK(zone, cpu);
547 }
548
549 zone->uz_cachefree = 0;
550}
551
552/*
553 * Frees pages from a zone back to the system. This is done on demand from
554 * the pageout daemon.
555 *
556 * Arguments:
557 * zone The zone to free pages from
558 * all Should we drain all items?
559 *
560 * Returns:
561 * Nothing.
562 */
563static void
564zone_drain(uma_zone_t zone)
565{
566 uma_slab_t slab;
567 uma_slab_t n;
568 u_int64_t extra;
569 u_int8_t flags;
570 u_int8_t *mem;
571 int i;
572
573 /*
574 * We don't want to take pages from staticly allocated zones at this
575 * time
576 */
577 if (zone->uz_flags & UMA_ZFLAG_NOFREE || zone->uz_freef == NULL)
578 return;
579
580 ZONE_LOCK(zone);
581
582 if (!(zone->uz_flags & UMA_ZFLAG_INTERNAL))
583 cache_drain(zone);
584
585 if (zone->uz_free < zone->uz_wssize)
586 goto finished;
587#ifdef UMA_DEBUG
588 printf("%s working set size: %llu free items: %u\n",
589 zone->uz_name, (unsigned long long)zone->uz_wssize, zone->uz_free);
590#endif
591 extra = zone->uz_free - zone->uz_wssize;
592 extra /= zone->uz_ipers;
593
594 /* extra is now the number of extra slabs that we can free */
595
596 if (extra == 0)
597 goto finished;
598
599 slab = LIST_FIRST(&zone->uz_free_slab);
600 while (slab && extra) {
601 n = LIST_NEXT(slab, us_link);
602
603 /* We have no where to free these to */
604 if (slab->us_flags & UMA_SLAB_BOOT) {
605 slab = n;
606 continue;
607 }
608
609 LIST_REMOVE(slab, us_link);
610 zone->uz_pages -= zone->uz_ppera;
611 zone->uz_free -= zone->uz_ipers;
612 if (zone->uz_fini)
613 for (i = 0; i < zone->uz_ipers; i++)
614 zone->uz_fini(
615 slab->us_data + (zone->uz_rsize * i),
616 zone->uz_size);
617 flags = slab->us_flags;
618 mem = slab->us_data;
619 if (zone->uz_flags & UMA_ZFLAG_OFFPAGE) {
620 if (zone->uz_flags & UMA_ZFLAG_MALLOC) {
621 UMA_HASH_REMOVE(mallochash,
622 slab, slab->us_data);
623 } else {
624 UMA_HASH_REMOVE(&zone->uz_hash,
625 slab, slab->us_data);
626 }
627 uma_zfree_internal(slabzone, slab, NULL, 0);
628 } else if (zone->uz_flags & UMA_ZFLAG_MALLOC)
629 UMA_HASH_REMOVE(mallochash, slab, slab->us_data);
630#ifdef UMA_DEBUG
631 printf("%s: Returning %d bytes.\n",
632 zone->uz_name, UMA_SLAB_SIZE * zone->uz_ppera);
633#endif
634 zone->uz_freef(mem, UMA_SLAB_SIZE * zone->uz_ppera, flags);
635
636 slab = n;
637 extra--;
638 }
639
640finished:
641 ZONE_UNLOCK(zone);
642}
643
644/*
645 * Allocate a new slab for a zone. This does not insert the slab onto a list.
646 *
647 * Arguments:
648 * zone The zone to allocate slabs for
649 * wait Shall we wait?
650 *
651 * Returns:
652 * The slab that was allocated or NULL if there is no memory and the
653 * caller specified M_NOWAIT.
654 *
655 */
656static uma_slab_t
657slab_zalloc(uma_zone_t zone, int wait)
658{
659 uma_slab_t slab; /* Starting slab */
660 u_int8_t *mem;
661 u_int8_t flags;
662 int i;
663
664 slab = NULL;
665
666#ifdef UMA_DEBUG
667 printf("slab_zalloc: Allocating a new slab for %s\n", zone->uz_name);
668#endif
669 ZONE_UNLOCK(zone);
670
671 if (zone->uz_flags & UMA_ZFLAG_OFFPAGE) {
672 slab = uma_zalloc_internal(slabzone, NULL, wait, NULL);
673 if (slab == NULL) {
674 ZONE_LOCK(zone);
675 return NULL;
676 }
677 }
678
679 if (booted || (zone->uz_flags & UMA_ZFLAG_PRIVALLOC)) {
680 mtx_lock(&Giant);
681 mem = zone->uz_allocf(zone,
682 zone->uz_ppera * UMA_SLAB_SIZE, &flags, wait);
683 mtx_unlock(&Giant);
684 if (mem == NULL) {
685 ZONE_LOCK(zone);
686 return (NULL);
687 }
688 } else {
689 uma_slab_t tmps;
690
691 if (zone->uz_ppera > 1)
692 panic("UMA: Attemping to allocate multiple pages before vm has started.\n");
693 if (zone->uz_flags & UMA_ZFLAG_MALLOC)
694 panic("Mallocing before uma_startup2 has been called.\n");
695 if (uma_boot_free == 0)
696 panic("UMA: Ran out of pre init pages, increase UMA_BOOT_PAGES\n");
697 tmps = LIST_FIRST(&uma_boot_pages);
698 LIST_REMOVE(tmps, us_link);
699 uma_boot_free--;
700 mem = tmps->us_data;
701 }
702
703 ZONE_LOCK(zone);
704
705 /* Alloc slab structure for offpage, otherwise adjust it's position */
706 if (!(zone->uz_flags & UMA_ZFLAG_OFFPAGE)) {
707 slab = (uma_slab_t )(mem + zone->uz_pgoff);
708 } else {
709 if (!(zone->uz_flags & UMA_ZFLAG_MALLOC))
710 UMA_HASH_INSERT(&zone->uz_hash, slab, mem);
711 }
712 if (zone->uz_flags & UMA_ZFLAG_MALLOC) {
713#ifdef UMA_DEBUG
714 printf("Inserting %p into malloc hash from slab %p\n",
715 mem, slab);
716#endif
717 /* XXX Yikes! No lock on the malloc hash! */
718 UMA_HASH_INSERT(mallochash, slab, mem);
719 }
720
721 slab->us_zone = zone;
722 slab->us_data = mem;
723
724 /*
725 * This is intended to spread data out across cache lines.
726 *
727 * This code doesn't seem to work properly on x86, and on alpha
728 * it makes absolutely no performance difference. I'm sure it could
729 * use some tuning, but sun makes outrageous claims about it's
730 * performance.
731 */
732#if 0
733 if (zone->uz_cachemax) {
734 slab->us_data += zone->uz_cacheoff;
735 zone->uz_cacheoff += UMA_CACHE_INC;
736 if (zone->uz_cacheoff > zone->uz_cachemax)
737 zone->uz_cacheoff = 0;
738 }
739#endif
740
741 slab->us_freecount = zone->uz_ipers;
742 slab->us_firstfree = 0;
743 slab->us_flags = flags;
744 for (i = 0; i < zone->uz_ipers; i++)
745 slab->us_freelist[i] = i+1;
746
747 if (zone->uz_init)
748 for (i = 0; i < zone->uz_ipers; i++)
749 zone->uz_init(slab->us_data + (zone->uz_rsize * i),
750 zone->uz_size);
751
752 zone->uz_pages += zone->uz_ppera;
753 zone->uz_free += zone->uz_ipers;
754
755 return (slab);
756}
757
758/*
759 * Allocates a number of pages from the system
760 *
761 * Arguments:
762 * zone Unused
763 * bytes The number of bytes requested
764 * wait Shall we wait?
765 *
766 * Returns:
767 * A pointer to the alloced memory or possibly
768 * NULL if M_NOWAIT is set.
769 */
770static void *
771page_alloc(uma_zone_t zone, int bytes, u_int8_t *pflag, int wait)
772{
773 void *p; /* Returned page */
774
775 /*
776 * XXX The original zone allocator did this, but I don't think it's
777 * necessary in current.
778 */
779
780 if (lockstatus(&kernel_map->lock, NULL)) {
781 *pflag = UMA_SLAB_KMEM;
782 p = (void *) kmem_malloc(kmem_map, bytes, wait);
783 } else {
784 *pflag = UMA_SLAB_KMAP;
785 p = (void *) kmem_alloc(kernel_map, bytes);
786 }
787
788 return (p);
789}
790
791/*
792 * Allocates a number of pages from within an object
793 *
794 * Arguments:
795 * zone Unused
796 * bytes The number of bytes requested
797 * wait Shall we wait?
798 *
799 * Returns:
800 * A pointer to the alloced memory or possibly
801 * NULL if M_NOWAIT is set.
802 */
803static void *
804obj_alloc(uma_zone_t zone, int bytes, u_int8_t *flags, int wait)
805{
806 vm_offset_t zkva;
807 vm_offset_t retkva;
808 vm_page_t p;
809 int pages;
810
811 retkva = NULL;
812 pages = zone->uz_pages;
813
814 /*
815 * This looks a little weird since we're getting one page at a time
816 */
817 while (bytes > 0) {
818 p = vm_page_alloc(zone->uz_obj, pages,
819 VM_ALLOC_INTERRUPT);
820 if (p == NULL)
821 return (NULL);
822
823 zkva = zone->uz_kva + pages * PAGE_SIZE;
824 if (retkva == NULL)
825 retkva = zkva;
826 pmap_qenter(zkva, &p, 1);
827 bytes -= PAGE_SIZE;
828 pages += 1;
829 }
830
831 *flags = UMA_SLAB_PRIV;
832
833 return ((void *)retkva);
834}
835
836/*
837 * Frees a number of pages to the system
838 *
839 * Arguments:
840 * mem A pointer to the memory to be freed
841 * size The size of the memory being freed
842 * flags The original p->us_flags field
843 *
844 * Returns:
845 * Nothing
846 *
847 */
848static void
849page_free(void *mem, int size, u_int8_t flags)
850{
851 vm_map_t map;
852 if (flags & UMA_SLAB_KMEM)
853 map = kmem_map;
854 else if (flags & UMA_SLAB_KMAP)
855 map = kernel_map;
856 else
857 panic("UMA: page_free used with invalid flags %d\n", flags);
858
859 kmem_free(map, (vm_offset_t)mem, size);
860}
861
862/*
863 * Zero fill initializer
864 *
865 * Arguments/Returns follow uma_init specifications
866 *
867 */
868static void
869zero_init(void *mem, int size)
870{
871 bzero(mem, size);
872}
873
874/*
875 * Finish creating a small uma zone. This calculates ipers, and the zone size.
876 *
877 * Arguments
878 * zone The zone we should initialize
879 *
880 * Returns
881 * Nothing
882 */
883static void
884zone_small_init(uma_zone_t zone)
885{
886 int rsize;
887 int memused;
888 int ipers;
889
890 rsize = zone->uz_size;
891
892 if (rsize < UMA_SMALLEST_UNIT)
893 rsize = UMA_SMALLEST_UNIT;
894
895 if (rsize & zone->uz_align)
896 rsize = (rsize & ~zone->uz_align) + (zone->uz_align + 1);
897
898 zone->uz_rsize = rsize;
899
900 rsize += 1; /* Account for the byte of linkage */
901 zone->uz_ipers = (UMA_SLAB_SIZE - sizeof(struct uma_slab)) / rsize;
902 zone->uz_ppera = 1;
903
904 memused = zone->uz_ipers * zone->uz_rsize;
905
906 /* Can we do any better? */
907 if ((UMA_SLAB_SIZE - memused) >= UMA_MAX_WASTE) {
908 if (zone->uz_flags & UMA_ZFLAG_INTERNAL)
909 return;
910 ipers = UMA_SLAB_SIZE / zone->uz_rsize;
911 if (ipers > zone->uz_ipers) {
912 zone->uz_flags |= UMA_ZFLAG_OFFPAGE;
913 zone->uz_ipers = ipers;
914 }
915 }
916
917}
918
919/*
920 * Finish creating a large (> UMA_SLAB_SIZE) uma zone. Just give in and do
921 * OFFPAGE for now. When I can allow for more dynamic slab sizes this will be
922 * more complicated.
923 *
924 * Arguments
925 * zone The zone we should initialize
926 *
927 * Returns
928 * Nothing
929 */
930static void
931zone_large_init(uma_zone_t zone)
932{
933 int pages;
934
935 pages = zone->uz_size / UMA_SLAB_SIZE;
936
937 /* Account for remainder */
938 if ((pages * UMA_SLAB_SIZE) < zone->uz_size)
939 pages++;
940
941 zone->uz_ppera = pages;
942 zone->uz_ipers = 1;
943
944 zone->uz_flags |= UMA_ZFLAG_OFFPAGE;
945 zone->uz_rsize = zone->uz_size;
946}
947
948/*
949 * Zone header ctor. This initializes all fields, locks, etc. And inserts
950 * the zone onto the global zone list.
951 *
952 * Arguments/Returns follow uma_ctor specifications
953 * udata Actually uma_zcreat_args
954 *
955 */
956
957static void
958zone_ctor(void *mem, int size, void *udata)
959{
960 struct uma_zctor_args *arg = udata;
961 uma_zone_t zone = mem;
962 int privlc;
963 int cplen;
964 int cpu;
965
966 bzero(zone, size);
967 zone->uz_name = arg->name;
968 zone->uz_size = arg->size;
969 zone->uz_ctor = arg->ctor;
970 zone->uz_dtor = arg->dtor;
971 zone->uz_init = arg->uminit;
972 zone->uz_align = arg->align;
973 zone->uz_free = 0;
974 zone->uz_pages = 0;
975 zone->uz_flags = 0;
976 zone->uz_allocf = page_alloc;
977 zone->uz_freef = page_free;
978
979 if (arg->flags & UMA_ZONE_ZINIT)
980 zone->uz_init = zero_init;
981
982 if (arg->flags & UMA_ZONE_INTERNAL)
983 zone->uz_flags |= UMA_ZFLAG_INTERNAL;
984
985 if (arg->flags & UMA_ZONE_MALLOC)
986 zone->uz_flags |= UMA_ZFLAG_MALLOC;
987
988 if (arg->flags & UMA_ZONE_NOFREE)
989 zone->uz_flags |= UMA_ZFLAG_NOFREE;
990
991 if (zone->uz_size > UMA_SLAB_SIZE)
992 zone_large_init(zone);
993 else
994 zone_small_init(zone);
995
996 if (arg->flags & UMA_ZONE_MTXCLASS)
997 privlc = 1;
998 else
999 privlc = 0;
1000
1001 /* We do this so that the per cpu lock name is unique for each zone */
1002 memcpy(zone->uz_lname, "PCPU ", 5);
1003 cplen = min(strlen(zone->uz_name) + 1, LOCKNAME_LEN - 6);
1004 memcpy(zone->uz_lname+5, zone->uz_name, cplen);
1005 zone->uz_lname[LOCKNAME_LEN - 1] = '\0';
1006
1007 /*
1008 * If we're putting the slab header in the actual page we need to
1009 * figure out where in each page it goes. This calculates a right
1010 * justified offset into the memory on a ALIGN_PTR boundary.
1011 */
1012 if (!(zone->uz_flags & UMA_ZFLAG_OFFPAGE)) {
1013 int totsize;
1014 int waste;
1015
1016 /* Size of the slab struct and free list */
1017 totsize = sizeof(struct uma_slab) + zone->uz_ipers;
1018 if (totsize & UMA_ALIGN_PTR)
1019 totsize = (totsize & ~UMA_ALIGN_PTR) +
1020 (UMA_ALIGN_PTR + 1);
1021 zone->uz_pgoff = UMA_SLAB_SIZE - totsize;
1022
1023 waste = zone->uz_pgoff;
1024 waste -= (zone->uz_ipers * zone->uz_rsize);
1025
1026 /*
1027 * This calculates how much space we have for cache line size
1028 * optimizations. It works by offseting each slab slightly.
1029 * Currently it breaks on x86, and so it is disabled.
1030 */
1031
1032 if (zone->uz_align < UMA_CACHE_INC && waste > UMA_CACHE_INC) {
1033 zone->uz_cachemax = waste - UMA_CACHE_INC;
1034 zone->uz_cacheoff = 0;
1035 }
1036
1037 totsize = zone->uz_pgoff + sizeof(struct uma_slab)
1038 + zone->uz_ipers;
1039 /* I don't think it's possible, but I'll make sure anyway */
1040 if (totsize > UMA_SLAB_SIZE) {
1041 printf("zone %s ipers %d rsize %d size %d\n",
1042 zone->uz_name, zone->uz_ipers, zone->uz_rsize,
1043 zone->uz_size);
1044 panic("UMA slab won't fit.\n");
1045 }
1046 } else {
1047 struct slabhead *newhash;
1048 int hashsize;
1049
1050 hashsize = 0;
1051 newhash = hash_alloc(&hashsize);
1052 hash_expand(&zone->uz_hash, newhash, hashsize);
1053 zone->uz_pgoff = 0;
1054 }
1055
1056#ifdef UMA_DEBUG
1057 printf("%s(%p) size = %d ipers = %d ppera = %d pgoff = %d\n",
1058 zone->uz_name, zone,
1059 zone->uz_size, zone->uz_ipers,
1060 zone->uz_ppera, zone->uz_pgoff);
1061#endif
1062 ZONE_LOCK_INIT(zone, privlc);
1063
1064 mtx_lock(&uma_mtx);
1065 LIST_INSERT_HEAD(&uma_zones, zone, uz_link);
1066 mtx_unlock(&uma_mtx);
1067
1068 /*
1069 * Some internal zones don't have room allocated for the per cpu
1070 * caches. If we're internal, bail out here.
1071 */
1072
1073 if (zone->uz_flags & UMA_ZFLAG_INTERNAL)
1074 return;
1075
1076 if (zone->uz_ipers < UMA_BUCKET_SIZE)
1077 zone->uz_count = zone->uz_ipers - 1;
1078 else
1079 zone->uz_count = UMA_BUCKET_SIZE - 1;
1080
1081 for (cpu = 0; cpu < maxcpu; cpu++)
1082 CPU_LOCK_INIT(zone, cpu, privlc);
1083}
1084
1085/*
1086 * Zone header dtor. This frees all data, destroys locks, frees the hash table
1087 * and removes the zone from the global list.
1088 *
1089 * Arguments/Returns follow uma_dtor specifications
1090 * udata unused
1091 */
1092
1093static void
1094zone_dtor(void *arg, int size, void *udata)
1095{
1096 uma_zone_t zone;
1097 int cpu;
1098
1099 zone = (uma_zone_t)arg;
1100
1101 mtx_lock(&uma_mtx);
1102 LIST_REMOVE(zone, uz_link);
1103 mtx_unlock(&uma_mtx);
1104
1105 ZONE_LOCK(zone);
1106 zone->uz_wssize = 0;
1107 ZONE_UNLOCK(zone);
1108
1109 zone_drain(zone);
1110 ZONE_LOCK(zone);
1111 if (zone->uz_free != 0)
1112 printf("Zone %s was not empty. Lost %d pages of memory.\n",
1113 zone->uz_name, zone->uz_pages);
1114
1115 if ((zone->uz_flags & UMA_ZFLAG_INTERNAL) != 0)
1116 for (cpu = 0; cpu < maxcpu; cpu++)
1117 CPU_LOCK_FINI(zone, cpu);
1118
1119 if ((zone->uz_flags & UMA_ZFLAG_OFFPAGE) != 0)
1120 hash_free(zone->uz_hash.uh_slab_hash,
1121 zone->uz_hash.uh_hashsize);
1122
1123 ZONE_UNLOCK(zone);
1124 ZONE_LOCK_FINI(zone);
1125}
1126/*
1127 * Traverses every zone in the system and calls a callback
1128 *
1129 * Arguments:
1130 * zfunc A pointer to a function which accepts a zone
1131 * as an argument.
1132 *
1133 * Returns:
1134 * Nothing
1135 */
1136static void
1137zone_foreach(void (*zfunc)(uma_zone_t))
1138{
1139 uma_zone_t zone;
1140
1141 mtx_lock(&uma_mtx);
1142 LIST_FOREACH(zone, &uma_zones, uz_link) {
1143 zfunc(zone);
1144 }
1145 mtx_unlock(&uma_mtx);
1146}
1147
1148/* Public functions */
1149/* See uma.h */
1150void
1151uma_startup(void *bootmem)
1152{
1153 struct uma_zctor_args args;
1154 uma_slab_t slab;
1155 int slabsize;
1156 int i;
1157
1158#ifdef UMA_DEBUG
1159 printf("Creating uma zone headers zone.\n");
1160#endif
1161#ifdef SMP
1162 maxcpu = mp_maxid + 1;
1163#else
1164 maxcpu = 1;
1165#endif
1166#ifdef UMA_DEBUG
1167 printf("Max cpu = %d, mp_maxid = %d\n", maxcpu, mp_maxid);
1168 Debugger("stop");
1169#endif
1170 mtx_init(&uma_mtx, "UMA lock", NULL, MTX_DEF);
1171 /* "manually" Create the initial zone */
1172 args.name = "UMA Zones";
1173 args.size = sizeof(struct uma_zone) +
1174 (sizeof(struct uma_cache) * (maxcpu - 1));
1175 args.ctor = zone_ctor;
1176 args.dtor = zone_dtor;
1177 args.uminit = zero_init;
1178 args.fini = NULL;
1179 args.align = 32 - 1;
1180 args.flags = UMA_ZONE_INTERNAL;
1181 /* The initial zone has no Per cpu queues so it's smaller */
1182 zone_ctor(zones, sizeof(struct uma_zone), &args);
1183
1184#ifdef UMA_DEBUG
1185 printf("Filling boot free list.\n");
1186#endif
1187 for (i = 0; i < UMA_BOOT_PAGES; i++) {
1188 slab = (uma_slab_t)((u_int8_t *)bootmem + (i * UMA_SLAB_SIZE));
1189 slab->us_data = (u_int8_t *)slab;
1190 slab->us_flags = UMA_SLAB_BOOT;
1191 LIST_INSERT_HEAD(&uma_boot_pages, slab, us_link);
1192 uma_boot_free++;
1193 }
1194
1195#ifdef UMA_DEBUG
1196 printf("Creating slab zone.\n");
1197#endif
1198
1199 /*
1200 * This is the max number of free list items we'll have with
1201 * offpage slabs.
1202 */
1203
1204 slabsize = UMA_SLAB_SIZE - sizeof(struct uma_slab);
1205 slabsize /= UMA_MAX_WASTE;
1206 slabsize++; /* In case there it's rounded */
1207 slabsize += sizeof(struct uma_slab);
1208
1209 /* Now make a zone for slab headers */
1210 slabzone = uma_zcreate("UMA Slabs",
1211 slabsize,
1212 NULL, NULL, NULL, NULL,
1213 UMA_ALIGN_PTR, UMA_ZONE_INTERNAL);
1214
1215 hashzone = uma_zcreate("UMA Hash",
1216 sizeof(struct slabhead *) * UMA_HASH_SIZE_INIT,
1217 NULL, NULL, NULL, NULL,
1218 UMA_ALIGN_PTR, UMA_ZONE_INTERNAL);
1219
1220 bucketzone = uma_zcreate("UMA Buckets", sizeof(struct uma_bucket),
1221 NULL, NULL, NULL, NULL,
1222 UMA_ALIGN_PTR, UMA_ZONE_INTERNAL);
1223
1224
1225#ifdef UMA_DEBUG
1226 printf("UMA startup complete.\n");
1227#endif
1228}
1229
1230/* see uma.h */
1231void
1232uma_startup2(void *hashmem, u_long elems)
1233{
1234 bzero(hashmem, elems * sizeof(void *));
1235 mallochash->uh_slab_hash = hashmem;
1236 mallochash->uh_hashsize = elems;
1237 mallochash->uh_hashmask = elems - 1;
1238 booted = 1;
1239 bucket_enable();
1240#ifdef UMA_DEBUG
1241 printf("UMA startup2 complete.\n");
1242#endif
1243}
1244
1245/*
1246 * Initialize our callout handle
1247 *
1248 */
1249
1250static void
1251uma_startup3(void)
1252{
1253#ifdef UMA_DEBUG
1254 printf("Starting callout.\n");
1255#endif
1256 callout_init(&uma_callout, 0);
1257 callout_reset(&uma_callout, UMA_WORKING_TIME * hz, uma_timeout, NULL);
1258#ifdef UMA_DEBUG
1259 printf("UMA startup3 complete.\n");
1260#endif
1261}
1262
1263/* See uma.h */
1264uma_zone_t
1265uma_zcreate(char *name, int size, uma_ctor ctor, uma_dtor dtor, uma_init uminit,
1266 uma_fini fini, int align, u_int16_t flags)
1267
1268{
1269 struct uma_zctor_args args;
1270
1271 /* This stuff is essential for the zone ctor */
1272 args.name = name;
1273 args.size = size;
1274 args.ctor = ctor;
1275 args.dtor = dtor;
1276 args.uminit = uminit;
1277 args.fini = fini;
1278 args.align = align;
1279 args.flags = flags;
1280
1281 return (uma_zalloc_internal(zones, &args, M_WAITOK, NULL));
1282}
1283
1284/* See uma.h */
1285void
1286uma_zdestroy(uma_zone_t zone)
1287{
1288 uma_zfree_internal(zones, zone, NULL, 0);
1289}
1290
1291/* See uma.h */
1292void *
1293uma_zalloc_arg(uma_zone_t zone, void *udata, int wait)
1293uma_zalloc_arg(uma_zone_t zone, void *udata, int flags)
1294{
1295 void *item;
1296 uma_cache_t cache;
1297 uma_bucket_t bucket;
1298 int cpu;
1299
1300 /* This is the fast path allocation */
1301#ifdef UMA_DEBUG_ALLOC_1
1302 printf("Allocating one item from %s(%p)\n", zone->uz_name, zone);
1303#endif
1304
1305zalloc_restart:
1306 cpu = PCPU_GET(cpuid);
1307 CPU_LOCK(zone, cpu);
1308 cache = &zone->uz_cpu[cpu];
1309
1310zalloc_start:
1311 bucket = cache->uc_allocbucket;
1312
1313 if (bucket) {
1314 if (bucket->ub_ptr > -1) {
1315 item = bucket->ub_bucket[bucket->ub_ptr];
1316#ifdef INVARIANTS
1317 bucket->ub_bucket[bucket->ub_ptr] = NULL;
1318#endif
1319 bucket->ub_ptr--;
1320 KASSERT(item != NULL,
1321 ("uma_zalloc: Bucket pointer mangled."));
1322 cache->uc_allocs++;
1323 CPU_UNLOCK(zone, cpu);
1324 if (zone->uz_ctor)
1325 zone->uz_ctor(item, zone->uz_size, udata);
1294{
1295 void *item;
1296 uma_cache_t cache;
1297 uma_bucket_t bucket;
1298 int cpu;
1299
1300 /* This is the fast path allocation */
1301#ifdef UMA_DEBUG_ALLOC_1
1302 printf("Allocating one item from %s(%p)\n", zone->uz_name, zone);
1303#endif
1304
1305zalloc_restart:
1306 cpu = PCPU_GET(cpuid);
1307 CPU_LOCK(zone, cpu);
1308 cache = &zone->uz_cpu[cpu];
1309
1310zalloc_start:
1311 bucket = cache->uc_allocbucket;
1312
1313 if (bucket) {
1314 if (bucket->ub_ptr > -1) {
1315 item = bucket->ub_bucket[bucket->ub_ptr];
1316#ifdef INVARIANTS
1317 bucket->ub_bucket[bucket->ub_ptr] = NULL;
1318#endif
1319 bucket->ub_ptr--;
1320 KASSERT(item != NULL,
1321 ("uma_zalloc: Bucket pointer mangled."));
1322 cache->uc_allocs++;
1323 CPU_UNLOCK(zone, cpu);
1324 if (zone->uz_ctor)
1325 zone->uz_ctor(item, zone->uz_size, udata);
1326 if (flags & M_ZERO)
1327 bzero(item, zone->uz_size);
1326 return (item);
1327 } else if (cache->uc_freebucket) {
1328 /*
1329 * We have run out of items in our allocbucket.
1330 * See if we can switch with our free bucket.
1331 */
1332 if (cache->uc_freebucket->ub_ptr > -1) {
1333 uma_bucket_t swap;
1334
1335#ifdef UMA_DEBUG_ALLOC
1336 printf("uma_zalloc: Swapping empty with alloc.\n");
1337#endif
1338 swap = cache->uc_freebucket;
1339 cache->uc_freebucket = cache->uc_allocbucket;
1340 cache->uc_allocbucket = swap;
1341
1342 goto zalloc_start;
1343 }
1344 }
1345 }
1346 ZONE_LOCK(zone);
1347 /* Since we have locked the zone we may as well send back our stats */
1348 zone->uz_allocs += cache->uc_allocs;
1349 cache->uc_allocs = 0;
1350
1351 /* Our old one is now a free bucket */
1352 if (cache->uc_allocbucket) {
1353 KASSERT(cache->uc_allocbucket->ub_ptr == -1,
1354 ("uma_zalloc_arg: Freeing a non free bucket."));
1355 LIST_INSERT_HEAD(&zone->uz_free_bucket,
1356 cache->uc_allocbucket, ub_link);
1357 cache->uc_allocbucket = NULL;
1358 }
1359
1360 /* Check the free list for a new alloc bucket */
1361 if ((bucket = LIST_FIRST(&zone->uz_full_bucket)) != NULL) {
1362 KASSERT(bucket->ub_ptr != -1,
1363 ("uma_zalloc_arg: Returning an empty bucket."));
1364
1365 LIST_REMOVE(bucket, ub_link);
1366 cache->uc_allocbucket = bucket;
1367 ZONE_UNLOCK(zone);
1368 goto zalloc_start;
1369 }
1370 /* Bump up our uz_count so we get here less */
1371 if (zone->uz_count < UMA_BUCKET_SIZE - 1)
1372 zone->uz_count++;
1373
1374 /* We are no longer associated with this cpu!!! */
1375 CPU_UNLOCK(zone, cpu);
1376
1377 /*
1378 * Now lets just fill a bucket and put it on the free list. If that
1379 * works we'll restart the allocation from the begining.
1380 *
1381 * Try this zone's free list first so we don't allocate extra buckets.
1382 */
1383
1384 if ((bucket = LIST_FIRST(&zone->uz_free_bucket)) != NULL)
1385 LIST_REMOVE(bucket, ub_link);
1386
1387 /* Now we no longer need the zone lock. */
1388 ZONE_UNLOCK(zone);
1389
1390 if (bucket == NULL)
1391 bucket = uma_zalloc_internal(bucketzone,
1328 return (item);
1329 } else if (cache->uc_freebucket) {
1330 /*
1331 * We have run out of items in our allocbucket.
1332 * See if we can switch with our free bucket.
1333 */
1334 if (cache->uc_freebucket->ub_ptr > -1) {
1335 uma_bucket_t swap;
1336
1337#ifdef UMA_DEBUG_ALLOC
1338 printf("uma_zalloc: Swapping empty with alloc.\n");
1339#endif
1340 swap = cache->uc_freebucket;
1341 cache->uc_freebucket = cache->uc_allocbucket;
1342 cache->uc_allocbucket = swap;
1343
1344 goto zalloc_start;
1345 }
1346 }
1347 }
1348 ZONE_LOCK(zone);
1349 /* Since we have locked the zone we may as well send back our stats */
1350 zone->uz_allocs += cache->uc_allocs;
1351 cache->uc_allocs = 0;
1352
1353 /* Our old one is now a free bucket */
1354 if (cache->uc_allocbucket) {
1355 KASSERT(cache->uc_allocbucket->ub_ptr == -1,
1356 ("uma_zalloc_arg: Freeing a non free bucket."));
1357 LIST_INSERT_HEAD(&zone->uz_free_bucket,
1358 cache->uc_allocbucket, ub_link);
1359 cache->uc_allocbucket = NULL;
1360 }
1361
1362 /* Check the free list for a new alloc bucket */
1363 if ((bucket = LIST_FIRST(&zone->uz_full_bucket)) != NULL) {
1364 KASSERT(bucket->ub_ptr != -1,
1365 ("uma_zalloc_arg: Returning an empty bucket."));
1366
1367 LIST_REMOVE(bucket, ub_link);
1368 cache->uc_allocbucket = bucket;
1369 ZONE_UNLOCK(zone);
1370 goto zalloc_start;
1371 }
1372 /* Bump up our uz_count so we get here less */
1373 if (zone->uz_count < UMA_BUCKET_SIZE - 1)
1374 zone->uz_count++;
1375
1376 /* We are no longer associated with this cpu!!! */
1377 CPU_UNLOCK(zone, cpu);
1378
1379 /*
1380 * Now lets just fill a bucket and put it on the free list. If that
1381 * works we'll restart the allocation from the begining.
1382 *
1383 * Try this zone's free list first so we don't allocate extra buckets.
1384 */
1385
1386 if ((bucket = LIST_FIRST(&zone->uz_free_bucket)) != NULL)
1387 LIST_REMOVE(bucket, ub_link);
1388
1389 /* Now we no longer need the zone lock. */
1390 ZONE_UNLOCK(zone);
1391
1392 if (bucket == NULL)
1393 bucket = uma_zalloc_internal(bucketzone,
1392 NULL, wait, NULL);
1394 NULL, flags, NULL);
1393
1394 if (bucket != NULL) {
1395#ifdef INVARIANTS
1396 bzero(bucket, bucketzone->uz_size);
1397#endif
1398 bucket->ub_ptr = -1;
1399
1395
1396 if (bucket != NULL) {
1397#ifdef INVARIANTS
1398 bzero(bucket, bucketzone->uz_size);
1399#endif
1400 bucket->ub_ptr = -1;
1401
1400 if (uma_zalloc_internal(zone, udata, wait, bucket))
1402 if (uma_zalloc_internal(zone, udata, flags, bucket))
1401 goto zalloc_restart;
1402 else
1403 uma_zfree_internal(bucketzone, bucket, NULL, 0);
1404 }
1405 /*
1406 * We may not get a bucket if we recurse, so
1407 * return an actual item.
1408 */
1409#ifdef UMA_DEBUG
1410 printf("uma_zalloc_arg: Bucketzone returned NULL\n");
1411#endif
1412
1403 goto zalloc_restart;
1404 else
1405 uma_zfree_internal(bucketzone, bucket, NULL, 0);
1406 }
1407 /*
1408 * We may not get a bucket if we recurse, so
1409 * return an actual item.
1410 */
1411#ifdef UMA_DEBUG
1412 printf("uma_zalloc_arg: Bucketzone returned NULL\n");
1413#endif
1414
1413 return (uma_zalloc_internal(zone, udata, wait, NULL));
1415 return (uma_zalloc_internal(zone, udata, flags, NULL));
1414}
1415
1416/*
1417 * Allocates an item for an internal zone OR fills a bucket
1418 *
1419 * Arguments
1420 * zone The zone to alloc for.
1421 * udata The data to be passed to the constructor.
1416}
1417
1418/*
1419 * Allocates an item for an internal zone OR fills a bucket
1420 *
1421 * Arguments
1422 * zone The zone to alloc for.
1423 * udata The data to be passed to the constructor.
1422 * wait M_WAITOK or M_NOWAIT.
1424 * flags M_WAITOK, M_NOWAIT, M_ZERO.
1423 * bucket The bucket to fill or NULL
1424 *
1425 * Returns
1426 * NULL if there is no memory and M_NOWAIT is set
1427 * An item if called on an interal zone
1428 * Non NULL if called to fill a bucket and it was successful.
1429 *
1430 * Discussion:
1431 * This was much cleaner before it had to do per cpu caches. It is
1432 * complicated now because it has to handle the simple internal case, and
1433 * the more involved bucket filling and allocation.
1434 */
1435
1436static void *
1425 * bucket The bucket to fill or NULL
1426 *
1427 * Returns
1428 * NULL if there is no memory and M_NOWAIT is set
1429 * An item if called on an interal zone
1430 * Non NULL if called to fill a bucket and it was successful.
1431 *
1432 * Discussion:
1433 * This was much cleaner before it had to do per cpu caches. It is
1434 * complicated now because it has to handle the simple internal case, and
1435 * the more involved bucket filling and allocation.
1436 */
1437
1438static void *
1437uma_zalloc_internal(uma_zone_t zone, void *udata, int wait, uma_bucket_t bucket)
1439uma_zalloc_internal(uma_zone_t zone, void *udata, int flags, uma_bucket_t bucket)
1438{
1439 uma_slab_t slab;
1440 u_int8_t freei;
1441 void *item;
1442
1443 item = NULL;
1444
1445 /*
1446 * This is to stop us from allocating per cpu buckets while we're
1447 * running out of UMA_BOOT_PAGES. Otherwise, we would exhaust the
1448 * boot pages.
1449 */
1450
1451 if (bucketdisable && zone == bucketzone)
1452 return (NULL);
1453
1454#ifdef UMA_DEBUG_ALLOC
1455 printf("INTERNAL: Allocating one item from %s(%p)\n", zone->uz_name, zone);
1456#endif
1457 ZONE_LOCK(zone);
1458
1459 /*
1460 * This code is here to limit the number of simultaneous bucket fills
1461 * for any given zone to the number of per cpu caches in this zone. This
1462 * is done so that we don't allocate more memory than we really need.
1463 */
1464
1465 if (bucket) {
1466#ifdef SMP
1467 if (zone->uz_fills >= mp_ncpus) {
1468#else
1469 if (zone->uz_fills > 1) {
1470#endif
1471 ZONE_UNLOCK(zone);
1472 return (NULL);
1473 }
1474
1475 zone->uz_fills++;
1476 }
1477
1478new_slab:
1479
1480 /* Find a slab with some space */
1481 if (zone->uz_free) {
1482 if (!LIST_EMPTY(&zone->uz_part_slab)) {
1483 slab = LIST_FIRST(&zone->uz_part_slab);
1484 } else {
1485 slab = LIST_FIRST(&zone->uz_free_slab);
1486 LIST_REMOVE(slab, us_link);
1487 LIST_INSERT_HEAD(&zone->uz_part_slab, slab, us_link);
1488 }
1489 } else {
1490 /*
1491 * This is to prevent us from recursively trying to allocate
1492 * buckets. The problem is that if an allocation forces us to
1493 * grab a new bucket we will call page_alloc, which will go off
1494 * and cause the vm to allocate vm_map_entries. If we need new
1495 * buckets there too we will recurse in kmem_alloc and bad
1496 * things happen. So instead we return a NULL bucket, and make
1497 * the code that allocates buckets smart enough to deal with it */
1498 if (zone == bucketzone && zone->uz_recurse != 0) {
1499 ZONE_UNLOCK(zone);
1500 return (NULL);
1501 }
1502 while (zone->uz_maxpages &&
1503 zone->uz_pages >= zone->uz_maxpages) {
1504 zone->uz_flags |= UMA_ZFLAG_FULL;
1505
1440{
1441 uma_slab_t slab;
1442 u_int8_t freei;
1443 void *item;
1444
1445 item = NULL;
1446
1447 /*
1448 * This is to stop us from allocating per cpu buckets while we're
1449 * running out of UMA_BOOT_PAGES. Otherwise, we would exhaust the
1450 * boot pages.
1451 */
1452
1453 if (bucketdisable && zone == bucketzone)
1454 return (NULL);
1455
1456#ifdef UMA_DEBUG_ALLOC
1457 printf("INTERNAL: Allocating one item from %s(%p)\n", zone->uz_name, zone);
1458#endif
1459 ZONE_LOCK(zone);
1460
1461 /*
1462 * This code is here to limit the number of simultaneous bucket fills
1463 * for any given zone to the number of per cpu caches in this zone. This
1464 * is done so that we don't allocate more memory than we really need.
1465 */
1466
1467 if (bucket) {
1468#ifdef SMP
1469 if (zone->uz_fills >= mp_ncpus) {
1470#else
1471 if (zone->uz_fills > 1) {
1472#endif
1473 ZONE_UNLOCK(zone);
1474 return (NULL);
1475 }
1476
1477 zone->uz_fills++;
1478 }
1479
1480new_slab:
1481
1482 /* Find a slab with some space */
1483 if (zone->uz_free) {
1484 if (!LIST_EMPTY(&zone->uz_part_slab)) {
1485 slab = LIST_FIRST(&zone->uz_part_slab);
1486 } else {
1487 slab = LIST_FIRST(&zone->uz_free_slab);
1488 LIST_REMOVE(slab, us_link);
1489 LIST_INSERT_HEAD(&zone->uz_part_slab, slab, us_link);
1490 }
1491 } else {
1492 /*
1493 * This is to prevent us from recursively trying to allocate
1494 * buckets. The problem is that if an allocation forces us to
1495 * grab a new bucket we will call page_alloc, which will go off
1496 * and cause the vm to allocate vm_map_entries. If we need new
1497 * buckets there too we will recurse in kmem_alloc and bad
1498 * things happen. So instead we return a NULL bucket, and make
1499 * the code that allocates buckets smart enough to deal with it */
1500 if (zone == bucketzone && zone->uz_recurse != 0) {
1501 ZONE_UNLOCK(zone);
1502 return (NULL);
1503 }
1504 while (zone->uz_maxpages &&
1505 zone->uz_pages >= zone->uz_maxpages) {
1506 zone->uz_flags |= UMA_ZFLAG_FULL;
1507
1506 if (wait & M_WAITOK)
1508 if (flags & M_WAITOK)
1507 msleep(zone, &zone->uz_lock, PVM, "zonelimit", 0);
1508 else
1509 goto alloc_fail;
1510
1511 goto new_slab;
1512 }
1513
1514 zone->uz_recurse++;
1509 msleep(zone, &zone->uz_lock, PVM, "zonelimit", 0);
1510 else
1511 goto alloc_fail;
1512
1513 goto new_slab;
1514 }
1515
1516 zone->uz_recurse++;
1515 slab = slab_zalloc(zone, wait);
1517 slab = slab_zalloc(zone, flags);
1516 zone->uz_recurse--;
1517 /*
1518 * We might not have been able to get a slab but another cpu
1519 * could have while we were unlocked. If we did get a slab put
1520 * it on the partially used slab list. If not check the free
1521 * count and restart or fail accordingly.
1522 */
1523 if (slab)
1524 LIST_INSERT_HEAD(&zone->uz_part_slab, slab, us_link);
1525 else if (zone->uz_free == 0)
1526 goto alloc_fail;
1527 else
1528 goto new_slab;
1529 }
1530 /*
1531 * If this is our first time though put this guy on the list.
1532 */
1533 if (bucket != NULL && bucket->ub_ptr == -1)
1534 LIST_INSERT_HEAD(&zone->uz_full_bucket,
1535 bucket, ub_link);
1536
1537
1538 while (slab->us_freecount) {
1539 freei = slab->us_firstfree;
1540 slab->us_firstfree = slab->us_freelist[freei];
1541#ifdef INVARIANTS
1542 slab->us_freelist[freei] = 255;
1543#endif
1544 slab->us_freecount--;
1545 zone->uz_free--;
1546 item = slab->us_data + (zone->uz_rsize * freei);
1547
1548 if (bucket == NULL) {
1549 zone->uz_allocs++;
1550 break;
1551 }
1552 bucket->ub_bucket[++bucket->ub_ptr] = item;
1553
1554 /* Don't overfill the bucket! */
1555 if (bucket->ub_ptr == zone->uz_count)
1556 break;
1557 }
1558
1559 /* Move this slab to the full list */
1560 if (slab->us_freecount == 0) {
1561 LIST_REMOVE(slab, us_link);
1562 LIST_INSERT_HEAD(&zone->uz_full_slab, slab, us_link);
1563 }
1564
1565 if (bucket != NULL) {
1566 /* Try to keep the buckets totally full, but don't block */
1567 if (bucket->ub_ptr < zone->uz_count) {
1518 zone->uz_recurse--;
1519 /*
1520 * We might not have been able to get a slab but another cpu
1521 * could have while we were unlocked. If we did get a slab put
1522 * it on the partially used slab list. If not check the free
1523 * count and restart or fail accordingly.
1524 */
1525 if (slab)
1526 LIST_INSERT_HEAD(&zone->uz_part_slab, slab, us_link);
1527 else if (zone->uz_free == 0)
1528 goto alloc_fail;
1529 else
1530 goto new_slab;
1531 }
1532 /*
1533 * If this is our first time though put this guy on the list.
1534 */
1535 if (bucket != NULL && bucket->ub_ptr == -1)
1536 LIST_INSERT_HEAD(&zone->uz_full_bucket,
1537 bucket, ub_link);
1538
1539
1540 while (slab->us_freecount) {
1541 freei = slab->us_firstfree;
1542 slab->us_firstfree = slab->us_freelist[freei];
1543#ifdef INVARIANTS
1544 slab->us_freelist[freei] = 255;
1545#endif
1546 slab->us_freecount--;
1547 zone->uz_free--;
1548 item = slab->us_data + (zone->uz_rsize * freei);
1549
1550 if (bucket == NULL) {
1551 zone->uz_allocs++;
1552 break;
1553 }
1554 bucket->ub_bucket[++bucket->ub_ptr] = item;
1555
1556 /* Don't overfill the bucket! */
1557 if (bucket->ub_ptr == zone->uz_count)
1558 break;
1559 }
1560
1561 /* Move this slab to the full list */
1562 if (slab->us_freecount == 0) {
1563 LIST_REMOVE(slab, us_link);
1564 LIST_INSERT_HEAD(&zone->uz_full_slab, slab, us_link);
1565 }
1566
1567 if (bucket != NULL) {
1568 /* Try to keep the buckets totally full, but don't block */
1569 if (bucket->ub_ptr < zone->uz_count) {
1568 wait = M_NOWAIT;
1570 flags |= M_NOWAIT;
1571 flags &= ~M_WAITOK;
1569 goto new_slab;
1570 } else
1571 zone->uz_fills--;
1572 }
1573
1574 ZONE_UNLOCK(zone);
1575
1576 /* Only construct at this time if we're not filling a bucket */
1572 goto new_slab;
1573 } else
1574 zone->uz_fills--;
1575 }
1576
1577 ZONE_UNLOCK(zone);
1578
1579 /* Only construct at this time if we're not filling a bucket */
1577 if (bucket == NULL && zone->uz_ctor != NULL)
1580 if (bucket == NULL && zone->uz_ctor != NULL) {
1578 zone->uz_ctor(item, zone->uz_size, udata);
1581 zone->uz_ctor(item, zone->uz_size, udata);
1582 if (flags & M_ZERO)
1583 bzero(item, zone->uz_size);
1584 }
1579
1580 return (item);
1581
1582alloc_fail:
1583 if (bucket != NULL)
1584 zone->uz_fills--;
1585 ZONE_UNLOCK(zone);
1586
1587 if (bucket != NULL && bucket->ub_ptr != -1)
1588 return (bucket);
1589
1590 return (NULL);
1591}
1592
1593/* See uma.h */
1594void
1595uma_zfree_arg(uma_zone_t zone, void *item, void *udata)
1596{
1597 uma_cache_t cache;
1598 uma_bucket_t bucket;
1599 int cpu;
1600
1601 /* This is the fast path free */
1602#ifdef UMA_DEBUG_ALLOC_1
1603 printf("Freeing item %p to %s(%p)\n", item, zone->uz_name, zone);
1604#endif
1605 /*
1606 * The race here is acceptable. If we miss it we'll just have to wait
1607 * a little longer for the limits to be reset.
1608 */
1609
1610 if (zone->uz_flags & UMA_ZFLAG_FULL)
1611 goto zfree_internal;
1612
1613zfree_restart:
1614 cpu = PCPU_GET(cpuid);
1615 CPU_LOCK(zone, cpu);
1616 cache = &zone->uz_cpu[cpu];
1617
1618zfree_start:
1619 bucket = cache->uc_freebucket;
1620
1621 if (bucket) {
1622 /*
1623 * Do we have room in our bucket? It is OK for this uz count
1624 * check to be slightly out of sync.
1625 */
1626
1627 if (bucket->ub_ptr < zone->uz_count) {
1628 bucket->ub_ptr++;
1629 KASSERT(bucket->ub_bucket[bucket->ub_ptr] == NULL,
1630 ("uma_zfree: Freeing to non free bucket index."));
1631 bucket->ub_bucket[bucket->ub_ptr] = item;
1632 if (zone->uz_dtor)
1633 zone->uz_dtor(item, zone->uz_size, udata);
1634 CPU_UNLOCK(zone, cpu);
1635 return;
1636 } else if (cache->uc_allocbucket) {
1637#ifdef UMA_DEBUG_ALLOC
1638 printf("uma_zfree: Swapping buckets.\n");
1639#endif
1640 /*
1641 * We have run out of space in our freebucket.
1642 * See if we can switch with our alloc bucket.
1643 */
1644 if (cache->uc_allocbucket->ub_ptr <
1645 cache->uc_freebucket->ub_ptr) {
1646 uma_bucket_t swap;
1647
1648 swap = cache->uc_freebucket;
1649 cache->uc_freebucket = cache->uc_allocbucket;
1650 cache->uc_allocbucket = swap;
1651
1652 goto zfree_start;
1653 }
1654 }
1655 }
1656
1657 /*
1658 * We can get here for two reasons:
1659 *
1660 * 1) The buckets are NULL
1661 * 2) The alloc and free buckets are both somewhat full.
1662 *
1663 */
1664
1665 ZONE_LOCK(zone);
1666
1667 bucket = cache->uc_freebucket;
1668 cache->uc_freebucket = NULL;
1669
1670 /* Can we throw this on the zone full list? */
1671 if (bucket != NULL) {
1672#ifdef UMA_DEBUG_ALLOC
1673 printf("uma_zfree: Putting old bucket on the free list.\n");
1674#endif
1675 /* ub_ptr is pointing to the last free item */
1676 KASSERT(bucket->ub_ptr != -1,
1677 ("uma_zfree: Attempting to insert an empty bucket onto the full list.\n"));
1678 LIST_INSERT_HEAD(&zone->uz_full_bucket,
1679 bucket, ub_link);
1680 }
1681 if ((bucket = LIST_FIRST(&zone->uz_free_bucket)) != NULL) {
1682 LIST_REMOVE(bucket, ub_link);
1683 ZONE_UNLOCK(zone);
1684 cache->uc_freebucket = bucket;
1685 goto zfree_start;
1686 }
1687 /* We're done with this CPU now */
1688 CPU_UNLOCK(zone, cpu);
1689
1690 /* And the zone.. */
1691 ZONE_UNLOCK(zone);
1692
1693#ifdef UMA_DEBUG_ALLOC
1694 printf("uma_zfree: Allocating new free bucket.\n");
1695#endif
1696 bucket = uma_zalloc_internal(bucketzone,
1697 NULL, M_NOWAIT, NULL);
1698 if (bucket) {
1699#ifdef INVARIANTS
1700 bzero(bucket, bucketzone->uz_size);
1701#endif
1702 bucket->ub_ptr = -1;
1703 ZONE_LOCK(zone);
1704 LIST_INSERT_HEAD(&zone->uz_free_bucket,
1705 bucket, ub_link);
1706 ZONE_UNLOCK(zone);
1707 goto zfree_restart;
1708 }
1709
1710 /*
1711 * If nothing else caught this, we'll just do an internal free.
1712 */
1713
1714zfree_internal:
1715
1716 uma_zfree_internal(zone, item, udata, 0);
1717
1718 return;
1719
1720}
1721
1722/*
1723 * Frees an item to an INTERNAL zone or allocates a free bucket
1724 *
1725 * Arguments:
1726 * zone The zone to free to
1727 * item The item we're freeing
1728 * udata User supplied data for the dtor
1729 * skip Skip the dtor, it was done in uma_zfree_arg
1730 */
1731
1732static void
1733uma_zfree_internal(uma_zone_t zone, void *item, void *udata, int skip)
1734{
1735 uma_slab_t slab;
1736 u_int8_t *mem;
1737 u_int8_t freei;
1738
1739 ZONE_LOCK(zone);
1740
1741 if (!(zone->uz_flags & UMA_ZFLAG_MALLOC)) {
1742 mem = (u_int8_t *)((unsigned long)item & (~UMA_SLAB_MASK));
1743 if (zone->uz_flags & UMA_ZFLAG_OFFPAGE)
1744 slab = hash_sfind(&zone->uz_hash, mem);
1745 else {
1746 mem += zone->uz_pgoff;
1747 slab = (uma_slab_t)mem;
1748 }
1749 } else {
1750 slab = (uma_slab_t)udata;
1751 }
1752
1753 /* Do we need to remove from any lists? */
1754 if (slab->us_freecount+1 == zone->uz_ipers) {
1755 LIST_REMOVE(slab, us_link);
1756 LIST_INSERT_HEAD(&zone->uz_free_slab, slab, us_link);
1757 } else if (slab->us_freecount == 0) {
1758 LIST_REMOVE(slab, us_link);
1759 LIST_INSERT_HEAD(&zone->uz_part_slab, slab, us_link);
1760 }
1761
1762 /* Slab management stuff */
1763 freei = ((unsigned long)item - (unsigned long)slab->us_data)
1764 / zone->uz_rsize;
1765#ifdef INVARIANTS
1766 if (((freei * zone->uz_rsize) + slab->us_data) != item)
1767 panic("zone: %s(%p) slab %p freed address %p unaligned.\n",
1768 zone->uz_name, zone, slab, item);
1769 if (freei >= zone->uz_ipers)
1770 panic("zone: %s(%p) slab %p freelist %i out of range 0-%d\n",
1771 zone->uz_name, zone, slab, freei, zone->uz_ipers-1);
1772
1773 if (slab->us_freelist[freei] != 255) {
1774 printf("Slab at %p, freei %d = %d.\n",
1775 slab, freei, slab->us_freelist[freei]);
1776 panic("Duplicate free of item %p from zone %p(%s)\n",
1777 item, zone, zone->uz_name);
1778 }
1779#endif
1780 slab->us_freelist[freei] = slab->us_firstfree;
1781 slab->us_firstfree = freei;
1782 slab->us_freecount++;
1783
1784 /* Zone statistics */
1785 zone->uz_free++;
1786
1787 if (!skip && zone->uz_dtor)
1788 zone->uz_dtor(item, zone->uz_size, udata);
1789
1790 if (zone->uz_flags & UMA_ZFLAG_FULL) {
1791 if (zone->uz_pages < zone->uz_maxpages)
1792 zone->uz_flags &= ~UMA_ZFLAG_FULL;
1793
1794 /* We can handle one more allocation */
1795 wakeup_one(&zone);
1796 }
1797
1798 ZONE_UNLOCK(zone);
1799}
1800
1801/* See uma.h */
1802void
1803uma_zone_set_max(uma_zone_t zone, int nitems)
1804{
1805 ZONE_LOCK(zone);
1806 if (zone->uz_ppera > 1)
1807 zone->uz_maxpages = nitems * zone->uz_ppera;
1808 else
1809 zone->uz_maxpages = nitems / zone->uz_ipers;
1810
1811 if (zone->uz_maxpages * zone->uz_ipers < nitems)
1812 zone->uz_maxpages++;
1813
1814 ZONE_UNLOCK(zone);
1815}
1816
1817/* See uma.h */
1818void
1819uma_zone_set_freef(uma_zone_t zone, uma_free freef)
1820{
1821 ZONE_LOCK(zone);
1822
1823 zone->uz_freef = freef;
1824
1825 ZONE_UNLOCK(zone);
1826}
1827
1828/* See uma.h */
1829void
1830uma_zone_set_allocf(uma_zone_t zone, uma_alloc allocf)
1831{
1832 ZONE_LOCK(zone);
1833
1834 zone->uz_flags |= UMA_ZFLAG_PRIVALLOC;
1835 zone->uz_allocf = allocf;
1836
1837 ZONE_UNLOCK(zone);
1838}
1839
1840/* See uma.h */
1841int
1842uma_zone_set_obj(uma_zone_t zone, struct vm_object *obj, int count)
1843{
1844 int pages;
1845 vm_offset_t kva;
1846
1847 mtx_lock(&Giant);
1848
1849 pages = count / zone->uz_ipers;
1850
1851 if (pages * zone->uz_ipers < count)
1852 pages++;
1853
1854 kva = kmem_alloc_pageable(kernel_map, pages * UMA_SLAB_SIZE);
1855
1856 if (kva == 0) {
1857 mtx_unlock(&Giant);
1858 return (0);
1859 }
1860
1861
1862 if (obj == NULL)
1863 obj = vm_object_allocate(OBJT_DEFAULT,
1864 zone->uz_maxpages);
1865 else
1866 _vm_object_allocate(OBJT_DEFAULT,
1867 zone->uz_maxpages, obj);
1868
1869 ZONE_LOCK(zone);
1870 zone->uz_kva = kva;
1871 zone->uz_obj = obj;
1872 zone->uz_maxpages = pages;
1873
1874 zone->uz_allocf = obj_alloc;
1875 zone->uz_flags |= UMA_ZFLAG_NOFREE | UMA_ZFLAG_PRIVALLOC;
1876
1877 ZONE_UNLOCK(zone);
1878 mtx_unlock(&Giant);
1879
1880 return (1);
1881}
1882
1883/* See uma.h */
1884void
1885uma_prealloc(uma_zone_t zone, int items)
1886{
1887 int slabs;
1888 uma_slab_t slab;
1889
1890 ZONE_LOCK(zone);
1891 slabs = items / zone->uz_ipers;
1892 if (slabs * zone->uz_ipers < items)
1893 slabs++;
1894
1895 while (slabs > 0) {
1896 slab = slab_zalloc(zone, M_WAITOK);
1897 LIST_INSERT_HEAD(&zone->uz_free_slab, slab, us_link);
1898 slabs--;
1899 }
1900 ZONE_UNLOCK(zone);
1901}
1902
1903/* See uma.h */
1904void
1905uma_reclaim(void)
1906{
1907 /*
1908 * You might think that the delay below would improve performance since
1909 * the allocator will give away memory that it may ask for immediately.
1910 * Really, it makes things worse, since cpu cycles are so much cheaper
1911 * than disk activity.
1912 */
1913#if 0
1914 static struct timeval tv = {0};
1915 struct timeval now;
1916 getmicrouptime(&now);
1917 if (now.tv_sec > tv.tv_sec + 30)
1918 tv = now;
1919 else
1920 return;
1921#endif
1922#ifdef UMA_DEBUG
1923 printf("UMA: vm asked us to release pages!\n");
1924#endif
1925 bucket_enable();
1926 zone_foreach(zone_drain);
1927
1928 /*
1929 * Some slabs may have been freed but this zone will be visited early
1930 * we visit again so that we can free pages that are empty once other
1931 * zones are drained. We have to do the same for buckets.
1932 */
1933 zone_drain(slabzone);
1934 zone_drain(bucketzone);
1935}
1936
1937void *
1938uma_large_malloc(int size, int wait)
1939{
1940 void *mem;
1941 uma_slab_t slab;
1942 u_int8_t flags;
1943
1944 slab = uma_zalloc_internal(slabzone, NULL, wait, NULL);
1945 if (slab == NULL)
1946 return (NULL);
1947
1948 mem = page_alloc(NULL, size, &flags, wait);
1949 if (mem) {
1950 slab->us_data = mem;
1951 slab->us_flags = flags | UMA_SLAB_MALLOC;
1952 slab->us_size = size;
1953 UMA_HASH_INSERT(mallochash, slab, mem);
1954 } else {
1955 uma_zfree_internal(slabzone, slab, NULL, 0);
1956 }
1957
1958
1959 return (mem);
1960}
1961
1962void
1963uma_large_free(uma_slab_t slab)
1964{
1965 UMA_HASH_REMOVE(mallochash, slab, slab->us_data);
1966 page_free(slab->us_data, slab->us_size, slab->us_flags);
1967 uma_zfree_internal(slabzone, slab, NULL, 0);
1968}
1969
1970void
1971uma_print_stats(void)
1972{
1973 zone_foreach(uma_print_zone);
1974}
1975
1976void
1977uma_print_zone(uma_zone_t zone)
1978{
1979 printf("%s(%p) size %d(%d) flags %d ipers %d ppera %d out %d free %d\n",
1980 zone->uz_name, zone, zone->uz_size, zone->uz_rsize, zone->uz_flags,
1981 zone->uz_ipers, zone->uz_ppera,
1982 (zone->uz_ipers * zone->uz_pages) - zone->uz_free, zone->uz_free);
1983}
1984
1985/*
1986 * Sysctl handler for vm.zone
1987 *
1988 * stolen from vm_zone.c
1989 */
1990static int
1991sysctl_vm_zone(SYSCTL_HANDLER_ARGS)
1992{
1993 int error, len, cnt;
1994 const int linesize = 128; /* conservative */
1995 int totalfree;
1996 char *tmpbuf, *offset;
1997 uma_zone_t z;
1998 char *p;
1999
2000 cnt = 0;
2001 mtx_lock(&uma_mtx);
2002 LIST_FOREACH(z, &uma_zones, uz_link)
2003 cnt++;
2004 mtx_unlock(&uma_mtx);
2005 MALLOC(tmpbuf, char *, (cnt == 0 ? 1 : cnt) * linesize,
2006 M_TEMP, M_WAITOK);
2007 len = snprintf(tmpbuf, linesize,
2008 "\nITEM SIZE LIMIT USED FREE REQUESTS\n\n");
2009 if (cnt == 0)
2010 tmpbuf[len - 1] = '\0';
2011 error = SYSCTL_OUT(req, tmpbuf, cnt == 0 ? len-1 : len);
2012 if (error || cnt == 0)
2013 goto out;
2014 offset = tmpbuf;
2015 mtx_lock(&uma_mtx);
2016 LIST_FOREACH(z, &uma_zones, uz_link) {
2017 if (cnt == 0) /* list may have changed size */
2018 break;
2019 ZONE_LOCK(z);
2020 totalfree = z->uz_free + z->uz_cachefree;
2021 len = snprintf(offset, linesize,
2022 "%-12.12s %6.6u, %8.8u, %6.6u, %6.6u, %8.8llu\n",
2023 z->uz_name, z->uz_size,
2024 z->uz_maxpages * z->uz_ipers,
2025 (z->uz_ipers * (z->uz_pages / z->uz_ppera)) - totalfree,
2026 totalfree,
2027 (unsigned long long)z->uz_allocs);
2028 ZONE_UNLOCK(z);
2029 for (p = offset + 12; p > offset && *p == ' '; --p)
2030 /* nothing */ ;
2031 p[1] = ':';
2032 cnt--;
2033 offset += len;
2034 }
2035 mtx_unlock(&uma_mtx);
2036 *offset++ = '\0';
2037 error = SYSCTL_OUT(req, tmpbuf, offset - tmpbuf);
2038out:
2039 FREE(tmpbuf, M_TEMP);
2040 return (error);
2041}
1585
1586 return (item);
1587
1588alloc_fail:
1589 if (bucket != NULL)
1590 zone->uz_fills--;
1591 ZONE_UNLOCK(zone);
1592
1593 if (bucket != NULL && bucket->ub_ptr != -1)
1594 return (bucket);
1595
1596 return (NULL);
1597}
1598
1599/* See uma.h */
1600void
1601uma_zfree_arg(uma_zone_t zone, void *item, void *udata)
1602{
1603 uma_cache_t cache;
1604 uma_bucket_t bucket;
1605 int cpu;
1606
1607 /* This is the fast path free */
1608#ifdef UMA_DEBUG_ALLOC_1
1609 printf("Freeing item %p to %s(%p)\n", item, zone->uz_name, zone);
1610#endif
1611 /*
1612 * The race here is acceptable. If we miss it we'll just have to wait
1613 * a little longer for the limits to be reset.
1614 */
1615
1616 if (zone->uz_flags & UMA_ZFLAG_FULL)
1617 goto zfree_internal;
1618
1619zfree_restart:
1620 cpu = PCPU_GET(cpuid);
1621 CPU_LOCK(zone, cpu);
1622 cache = &zone->uz_cpu[cpu];
1623
1624zfree_start:
1625 bucket = cache->uc_freebucket;
1626
1627 if (bucket) {
1628 /*
1629 * Do we have room in our bucket? It is OK for this uz count
1630 * check to be slightly out of sync.
1631 */
1632
1633 if (bucket->ub_ptr < zone->uz_count) {
1634 bucket->ub_ptr++;
1635 KASSERT(bucket->ub_bucket[bucket->ub_ptr] == NULL,
1636 ("uma_zfree: Freeing to non free bucket index."));
1637 bucket->ub_bucket[bucket->ub_ptr] = item;
1638 if (zone->uz_dtor)
1639 zone->uz_dtor(item, zone->uz_size, udata);
1640 CPU_UNLOCK(zone, cpu);
1641 return;
1642 } else if (cache->uc_allocbucket) {
1643#ifdef UMA_DEBUG_ALLOC
1644 printf("uma_zfree: Swapping buckets.\n");
1645#endif
1646 /*
1647 * We have run out of space in our freebucket.
1648 * See if we can switch with our alloc bucket.
1649 */
1650 if (cache->uc_allocbucket->ub_ptr <
1651 cache->uc_freebucket->ub_ptr) {
1652 uma_bucket_t swap;
1653
1654 swap = cache->uc_freebucket;
1655 cache->uc_freebucket = cache->uc_allocbucket;
1656 cache->uc_allocbucket = swap;
1657
1658 goto zfree_start;
1659 }
1660 }
1661 }
1662
1663 /*
1664 * We can get here for two reasons:
1665 *
1666 * 1) The buckets are NULL
1667 * 2) The alloc and free buckets are both somewhat full.
1668 *
1669 */
1670
1671 ZONE_LOCK(zone);
1672
1673 bucket = cache->uc_freebucket;
1674 cache->uc_freebucket = NULL;
1675
1676 /* Can we throw this on the zone full list? */
1677 if (bucket != NULL) {
1678#ifdef UMA_DEBUG_ALLOC
1679 printf("uma_zfree: Putting old bucket on the free list.\n");
1680#endif
1681 /* ub_ptr is pointing to the last free item */
1682 KASSERT(bucket->ub_ptr != -1,
1683 ("uma_zfree: Attempting to insert an empty bucket onto the full list.\n"));
1684 LIST_INSERT_HEAD(&zone->uz_full_bucket,
1685 bucket, ub_link);
1686 }
1687 if ((bucket = LIST_FIRST(&zone->uz_free_bucket)) != NULL) {
1688 LIST_REMOVE(bucket, ub_link);
1689 ZONE_UNLOCK(zone);
1690 cache->uc_freebucket = bucket;
1691 goto zfree_start;
1692 }
1693 /* We're done with this CPU now */
1694 CPU_UNLOCK(zone, cpu);
1695
1696 /* And the zone.. */
1697 ZONE_UNLOCK(zone);
1698
1699#ifdef UMA_DEBUG_ALLOC
1700 printf("uma_zfree: Allocating new free bucket.\n");
1701#endif
1702 bucket = uma_zalloc_internal(bucketzone,
1703 NULL, M_NOWAIT, NULL);
1704 if (bucket) {
1705#ifdef INVARIANTS
1706 bzero(bucket, bucketzone->uz_size);
1707#endif
1708 bucket->ub_ptr = -1;
1709 ZONE_LOCK(zone);
1710 LIST_INSERT_HEAD(&zone->uz_free_bucket,
1711 bucket, ub_link);
1712 ZONE_UNLOCK(zone);
1713 goto zfree_restart;
1714 }
1715
1716 /*
1717 * If nothing else caught this, we'll just do an internal free.
1718 */
1719
1720zfree_internal:
1721
1722 uma_zfree_internal(zone, item, udata, 0);
1723
1724 return;
1725
1726}
1727
1728/*
1729 * Frees an item to an INTERNAL zone or allocates a free bucket
1730 *
1731 * Arguments:
1732 * zone The zone to free to
1733 * item The item we're freeing
1734 * udata User supplied data for the dtor
1735 * skip Skip the dtor, it was done in uma_zfree_arg
1736 */
1737
1738static void
1739uma_zfree_internal(uma_zone_t zone, void *item, void *udata, int skip)
1740{
1741 uma_slab_t slab;
1742 u_int8_t *mem;
1743 u_int8_t freei;
1744
1745 ZONE_LOCK(zone);
1746
1747 if (!(zone->uz_flags & UMA_ZFLAG_MALLOC)) {
1748 mem = (u_int8_t *)((unsigned long)item & (~UMA_SLAB_MASK));
1749 if (zone->uz_flags & UMA_ZFLAG_OFFPAGE)
1750 slab = hash_sfind(&zone->uz_hash, mem);
1751 else {
1752 mem += zone->uz_pgoff;
1753 slab = (uma_slab_t)mem;
1754 }
1755 } else {
1756 slab = (uma_slab_t)udata;
1757 }
1758
1759 /* Do we need to remove from any lists? */
1760 if (slab->us_freecount+1 == zone->uz_ipers) {
1761 LIST_REMOVE(slab, us_link);
1762 LIST_INSERT_HEAD(&zone->uz_free_slab, slab, us_link);
1763 } else if (slab->us_freecount == 0) {
1764 LIST_REMOVE(slab, us_link);
1765 LIST_INSERT_HEAD(&zone->uz_part_slab, slab, us_link);
1766 }
1767
1768 /* Slab management stuff */
1769 freei = ((unsigned long)item - (unsigned long)slab->us_data)
1770 / zone->uz_rsize;
1771#ifdef INVARIANTS
1772 if (((freei * zone->uz_rsize) + slab->us_data) != item)
1773 panic("zone: %s(%p) slab %p freed address %p unaligned.\n",
1774 zone->uz_name, zone, slab, item);
1775 if (freei >= zone->uz_ipers)
1776 panic("zone: %s(%p) slab %p freelist %i out of range 0-%d\n",
1777 zone->uz_name, zone, slab, freei, zone->uz_ipers-1);
1778
1779 if (slab->us_freelist[freei] != 255) {
1780 printf("Slab at %p, freei %d = %d.\n",
1781 slab, freei, slab->us_freelist[freei]);
1782 panic("Duplicate free of item %p from zone %p(%s)\n",
1783 item, zone, zone->uz_name);
1784 }
1785#endif
1786 slab->us_freelist[freei] = slab->us_firstfree;
1787 slab->us_firstfree = freei;
1788 slab->us_freecount++;
1789
1790 /* Zone statistics */
1791 zone->uz_free++;
1792
1793 if (!skip && zone->uz_dtor)
1794 zone->uz_dtor(item, zone->uz_size, udata);
1795
1796 if (zone->uz_flags & UMA_ZFLAG_FULL) {
1797 if (zone->uz_pages < zone->uz_maxpages)
1798 zone->uz_flags &= ~UMA_ZFLAG_FULL;
1799
1800 /* We can handle one more allocation */
1801 wakeup_one(&zone);
1802 }
1803
1804 ZONE_UNLOCK(zone);
1805}
1806
1807/* See uma.h */
1808void
1809uma_zone_set_max(uma_zone_t zone, int nitems)
1810{
1811 ZONE_LOCK(zone);
1812 if (zone->uz_ppera > 1)
1813 zone->uz_maxpages = nitems * zone->uz_ppera;
1814 else
1815 zone->uz_maxpages = nitems / zone->uz_ipers;
1816
1817 if (zone->uz_maxpages * zone->uz_ipers < nitems)
1818 zone->uz_maxpages++;
1819
1820 ZONE_UNLOCK(zone);
1821}
1822
1823/* See uma.h */
1824void
1825uma_zone_set_freef(uma_zone_t zone, uma_free freef)
1826{
1827 ZONE_LOCK(zone);
1828
1829 zone->uz_freef = freef;
1830
1831 ZONE_UNLOCK(zone);
1832}
1833
1834/* See uma.h */
1835void
1836uma_zone_set_allocf(uma_zone_t zone, uma_alloc allocf)
1837{
1838 ZONE_LOCK(zone);
1839
1840 zone->uz_flags |= UMA_ZFLAG_PRIVALLOC;
1841 zone->uz_allocf = allocf;
1842
1843 ZONE_UNLOCK(zone);
1844}
1845
1846/* See uma.h */
1847int
1848uma_zone_set_obj(uma_zone_t zone, struct vm_object *obj, int count)
1849{
1850 int pages;
1851 vm_offset_t kva;
1852
1853 mtx_lock(&Giant);
1854
1855 pages = count / zone->uz_ipers;
1856
1857 if (pages * zone->uz_ipers < count)
1858 pages++;
1859
1860 kva = kmem_alloc_pageable(kernel_map, pages * UMA_SLAB_SIZE);
1861
1862 if (kva == 0) {
1863 mtx_unlock(&Giant);
1864 return (0);
1865 }
1866
1867
1868 if (obj == NULL)
1869 obj = vm_object_allocate(OBJT_DEFAULT,
1870 zone->uz_maxpages);
1871 else
1872 _vm_object_allocate(OBJT_DEFAULT,
1873 zone->uz_maxpages, obj);
1874
1875 ZONE_LOCK(zone);
1876 zone->uz_kva = kva;
1877 zone->uz_obj = obj;
1878 zone->uz_maxpages = pages;
1879
1880 zone->uz_allocf = obj_alloc;
1881 zone->uz_flags |= UMA_ZFLAG_NOFREE | UMA_ZFLAG_PRIVALLOC;
1882
1883 ZONE_UNLOCK(zone);
1884 mtx_unlock(&Giant);
1885
1886 return (1);
1887}
1888
1889/* See uma.h */
1890void
1891uma_prealloc(uma_zone_t zone, int items)
1892{
1893 int slabs;
1894 uma_slab_t slab;
1895
1896 ZONE_LOCK(zone);
1897 slabs = items / zone->uz_ipers;
1898 if (slabs * zone->uz_ipers < items)
1899 slabs++;
1900
1901 while (slabs > 0) {
1902 slab = slab_zalloc(zone, M_WAITOK);
1903 LIST_INSERT_HEAD(&zone->uz_free_slab, slab, us_link);
1904 slabs--;
1905 }
1906 ZONE_UNLOCK(zone);
1907}
1908
1909/* See uma.h */
1910void
1911uma_reclaim(void)
1912{
1913 /*
1914 * You might think that the delay below would improve performance since
1915 * the allocator will give away memory that it may ask for immediately.
1916 * Really, it makes things worse, since cpu cycles are so much cheaper
1917 * than disk activity.
1918 */
1919#if 0
1920 static struct timeval tv = {0};
1921 struct timeval now;
1922 getmicrouptime(&now);
1923 if (now.tv_sec > tv.tv_sec + 30)
1924 tv = now;
1925 else
1926 return;
1927#endif
1928#ifdef UMA_DEBUG
1929 printf("UMA: vm asked us to release pages!\n");
1930#endif
1931 bucket_enable();
1932 zone_foreach(zone_drain);
1933
1934 /*
1935 * Some slabs may have been freed but this zone will be visited early
1936 * we visit again so that we can free pages that are empty once other
1937 * zones are drained. We have to do the same for buckets.
1938 */
1939 zone_drain(slabzone);
1940 zone_drain(bucketzone);
1941}
1942
1943void *
1944uma_large_malloc(int size, int wait)
1945{
1946 void *mem;
1947 uma_slab_t slab;
1948 u_int8_t flags;
1949
1950 slab = uma_zalloc_internal(slabzone, NULL, wait, NULL);
1951 if (slab == NULL)
1952 return (NULL);
1953
1954 mem = page_alloc(NULL, size, &flags, wait);
1955 if (mem) {
1956 slab->us_data = mem;
1957 slab->us_flags = flags | UMA_SLAB_MALLOC;
1958 slab->us_size = size;
1959 UMA_HASH_INSERT(mallochash, slab, mem);
1960 } else {
1961 uma_zfree_internal(slabzone, slab, NULL, 0);
1962 }
1963
1964
1965 return (mem);
1966}
1967
1968void
1969uma_large_free(uma_slab_t slab)
1970{
1971 UMA_HASH_REMOVE(mallochash, slab, slab->us_data);
1972 page_free(slab->us_data, slab->us_size, slab->us_flags);
1973 uma_zfree_internal(slabzone, slab, NULL, 0);
1974}
1975
1976void
1977uma_print_stats(void)
1978{
1979 zone_foreach(uma_print_zone);
1980}
1981
1982void
1983uma_print_zone(uma_zone_t zone)
1984{
1985 printf("%s(%p) size %d(%d) flags %d ipers %d ppera %d out %d free %d\n",
1986 zone->uz_name, zone, zone->uz_size, zone->uz_rsize, zone->uz_flags,
1987 zone->uz_ipers, zone->uz_ppera,
1988 (zone->uz_ipers * zone->uz_pages) - zone->uz_free, zone->uz_free);
1989}
1990
1991/*
1992 * Sysctl handler for vm.zone
1993 *
1994 * stolen from vm_zone.c
1995 */
1996static int
1997sysctl_vm_zone(SYSCTL_HANDLER_ARGS)
1998{
1999 int error, len, cnt;
2000 const int linesize = 128; /* conservative */
2001 int totalfree;
2002 char *tmpbuf, *offset;
2003 uma_zone_t z;
2004 char *p;
2005
2006 cnt = 0;
2007 mtx_lock(&uma_mtx);
2008 LIST_FOREACH(z, &uma_zones, uz_link)
2009 cnt++;
2010 mtx_unlock(&uma_mtx);
2011 MALLOC(tmpbuf, char *, (cnt == 0 ? 1 : cnt) * linesize,
2012 M_TEMP, M_WAITOK);
2013 len = snprintf(tmpbuf, linesize,
2014 "\nITEM SIZE LIMIT USED FREE REQUESTS\n\n");
2015 if (cnt == 0)
2016 tmpbuf[len - 1] = '\0';
2017 error = SYSCTL_OUT(req, tmpbuf, cnt == 0 ? len-1 : len);
2018 if (error || cnt == 0)
2019 goto out;
2020 offset = tmpbuf;
2021 mtx_lock(&uma_mtx);
2022 LIST_FOREACH(z, &uma_zones, uz_link) {
2023 if (cnt == 0) /* list may have changed size */
2024 break;
2025 ZONE_LOCK(z);
2026 totalfree = z->uz_free + z->uz_cachefree;
2027 len = snprintf(offset, linesize,
2028 "%-12.12s %6.6u, %8.8u, %6.6u, %6.6u, %8.8llu\n",
2029 z->uz_name, z->uz_size,
2030 z->uz_maxpages * z->uz_ipers,
2031 (z->uz_ipers * (z->uz_pages / z->uz_ppera)) - totalfree,
2032 totalfree,
2033 (unsigned long long)z->uz_allocs);
2034 ZONE_UNLOCK(z);
2035 for (p = offset + 12; p > offset && *p == ' '; --p)
2036 /* nothing */ ;
2037 p[1] = ':';
2038 cnt--;
2039 offset += len;
2040 }
2041 mtx_unlock(&uma_mtx);
2042 *offset++ = '\0';
2043 error = SYSCTL_OUT(req, tmpbuf, offset - tmpbuf);
2044out:
2045 FREE(tmpbuf, M_TEMP);
2046 return (error);
2047}