Deleted Added
full compact
uma_core.c (103531) uma_core.c (103623)
1/*
1/*
2 * Copyright (c) 2002, Jeffrey Roberson <jroberson@chesapeake.net>
2 * Copyright (c) 2002, Jeffrey Roberson <jeff@freebsd.org>
3 * All rights reserved.
4 *
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions
7 * are met:
8 * 1. Redistributions of source code must retain the above copyright
9 * notice unmodified, this list of conditions, and the following
10 * disclaimer.
11 * 2. Redistributions in binary form must reproduce the above copyright
12 * notice, this list of conditions and the following disclaimer in the
13 * documentation and/or other materials provided with the distribution.
14 *
15 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
16 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
17 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
18 * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
19 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
20 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
21 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
22 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
23 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
24 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
25 *
3 * All rights reserved.
4 *
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions
7 * are met:
8 * 1. Redistributions of source code must retain the above copyright
9 * notice unmodified, this list of conditions, and the following
10 * disclaimer.
11 * 2. Redistributions in binary form must reproduce the above copyright
12 * notice, this list of conditions and the following disclaimer in the
13 * documentation and/or other materials provided with the distribution.
14 *
15 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
16 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
17 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
18 * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
19 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
20 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
21 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
22 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
23 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
24 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
25 *
26 * $FreeBSD: head/sys/vm/uma_core.c 103531 2002-09-18 08:26:30Z jeff $
26 * $FreeBSD: head/sys/vm/uma_core.c 103623 2002-09-19 06:05:32Z jeff $
27 *
28 */
29
30/*
31 * uma_core.c Implementation of the Universal Memory allocator
32 *
33 * This allocator is intended to replace the multitude of similar object caches
34 * in the standard FreeBSD kernel. The intent is to be flexible as well as
35 * effecient. A primary design goal is to return unused memory to the rest of
36 * the system. This will make the system as a whole more flexible due to the
37 * ability to move memory to subsystems which most need it instead of leaving
38 * pools of reserved memory unused.
39 *
40 * The basic ideas stem from similar slab/zone based allocators whose algorithms
41 * are well known.
42 *
43 */
44
45/*
46 * TODO:
47 * - Improve memory usage for large allocations
48 * - Investigate cache size adjustments
49 */
50
51/* I should really use ktr.. */
52/*
53#define UMA_DEBUG 1
54#define UMA_DEBUG_ALLOC 1
55#define UMA_DEBUG_ALLOC_1 1
56*/
57
58
59#include "opt_param.h"
60#include <sys/param.h>
61#include <sys/systm.h>
62#include <sys/kernel.h>
63#include <sys/types.h>
64#include <sys/queue.h>
65#include <sys/malloc.h>
66#include <sys/lock.h>
67#include <sys/sysctl.h>
68#include <sys/mutex.h>
69#include <sys/proc.h>
70#include <sys/smp.h>
71#include <sys/vmmeter.h>
72
73#include <vm/vm.h>
74#include <vm/vm_object.h>
75#include <vm/vm_page.h>
76#include <vm/vm_param.h>
77#include <vm/vm_map.h>
78#include <vm/vm_kern.h>
79#include <vm/vm_extern.h>
80#include <vm/uma.h>
81#include <vm/uma_int.h>
82#include <vm/uma_dbg.h>
83
84/*
85 * This is the zone from which all zones are spawned. The idea is that even
86 * the zone heads are allocated from the allocator, so we use the bss section
87 * to bootstrap us.
88 */
89static struct uma_zone masterzone;
90static uma_zone_t zones = &masterzone;
91
92/* This is the zone from which all of uma_slab_t's are allocated. */
93static uma_zone_t slabzone;
94
95/*
96 * The initial hash tables come out of this zone so they can be allocated
97 * prior to malloc coming up.
98 */
99static uma_zone_t hashzone;
100
101/*
102 * Zone that buckets come from.
103 */
104static uma_zone_t bucketzone;
105
106/*
107 * Are we allowed to allocate buckets?
108 */
109static int bucketdisable = 1;
110
111/* Linked list of all zones in the system */
112static LIST_HEAD(,uma_zone) uma_zones = LIST_HEAD_INITIALIZER(&uma_zones);
113
114/* This mutex protects the zone list */
115static struct mtx uma_mtx;
116
117/* Linked list of boot time pages */
118static LIST_HEAD(,uma_slab) uma_boot_pages =
119 LIST_HEAD_INITIALIZER(&uma_boot_pages);
120
121/* Count of free boottime pages */
122static int uma_boot_free = 0;
123
124/* Is the VM done starting up? */
125static int booted = 0;
126
127/* This is the handle used to schedule our working set calculator */
128static struct callout uma_callout;
129
130/* This is mp_maxid + 1, for use while looping over each cpu */
131static int maxcpu;
132
133/*
134 * This structure is passed as the zone ctor arg so that I don't have to create
135 * a special allocation function just for zones.
136 */
137struct uma_zctor_args {
138 char *name;
139 size_t size;
140 uma_ctor ctor;
141 uma_dtor dtor;
142 uma_init uminit;
143 uma_fini fini;
144 int align;
145 u_int16_t flags;
146};
147
148/* Prototypes.. */
149
150static void *obj_alloc(uma_zone_t, int, u_int8_t *, int);
151static void *page_alloc(uma_zone_t, int, u_int8_t *, int);
152static void page_free(void *, int, u_int8_t);
153static uma_slab_t slab_zalloc(uma_zone_t, int);
154static void cache_drain(uma_zone_t);
155static void bucket_drain(uma_zone_t, uma_bucket_t);
156static void zone_drain(uma_zone_t);
157static void zone_ctor(void *, int, void *);
158static void zone_dtor(void *, int, void *);
159static void zero_init(void *, int);
160static void zone_small_init(uma_zone_t zone);
161static void zone_large_init(uma_zone_t zone);
162static void zone_foreach(void (*zfunc)(uma_zone_t));
163static void zone_timeout(uma_zone_t zone);
164static int hash_alloc(struct uma_hash *);
165static int hash_expand(struct uma_hash *, struct uma_hash *);
166static void hash_free(struct uma_hash *hash);
167static void uma_timeout(void *);
168static void uma_startup3(void);
169static void *uma_zalloc_internal(uma_zone_t, void *, int, uma_bucket_t);
170static void uma_zfree_internal(uma_zone_t, void *, void *, int);
171static void bucket_enable(void);
172void uma_print_zone(uma_zone_t);
173void uma_print_stats(void);
174static int sysctl_vm_zone(SYSCTL_HANDLER_ARGS);
175
176SYSCTL_OID(_vm, OID_AUTO, zone, CTLTYPE_STRING|CTLFLAG_RD,
177 NULL, 0, sysctl_vm_zone, "A", "Zone Info");
178SYSINIT(uma_startup3, SI_SUB_VM_CONF, SI_ORDER_SECOND, uma_startup3, NULL);
179
180/*
181 * This routine checks to see whether or not it's safe to enable buckets.
182 */
183
184static void
185bucket_enable(void)
186{
187 if (cnt.v_free_count < cnt.v_free_min)
188 bucketdisable = 1;
189 else
190 bucketdisable = 0;
191}
192
193
194/*
195 * Routine called by timeout which is used to fire off some time interval
196 * based calculations. (working set, stats, etc.)
197 *
198 * Arguments:
199 * arg Unused
200 *
201 * Returns:
202 * Nothing
203 */
204static void
205uma_timeout(void *unused)
206{
207 bucket_enable();
208 zone_foreach(zone_timeout);
209
210 /* Reschedule this event */
211 callout_reset(&uma_callout, UMA_WORKING_TIME * hz, uma_timeout, NULL);
212}
213
214/*
215 * Routine to perform timeout driven calculations. This does the working set
216 * as well as hash expanding, and per cpu statistics aggregation.
217 *
218 * Arguments:
219 * zone The zone to operate on
220 *
221 * Returns:
222 * Nothing
223 */
224static void
225zone_timeout(uma_zone_t zone)
226{
227 uma_cache_t cache;
228 u_int64_t alloc;
229 int free;
230 int cpu;
231
232 alloc = 0;
233 free = 0;
234
235 /*
236 * Aggregate per cpu cache statistics back to the zone.
237 *
238 * I may rewrite this to set a flag in the per cpu cache instead of
239 * locking. If the flag is not cleared on the next round I will have
240 * to lock and do it here instead so that the statistics don't get too
241 * far out of sync.
242 */
243 if (!(zone->uz_flags & UMA_ZFLAG_INTERNAL)) {
244 for (cpu = 0; cpu < maxcpu; cpu++) {
245 if (CPU_ABSENT(cpu))
246 continue;
247 CPU_LOCK(zone, cpu);
248 cache = &zone->uz_cpu[cpu];
249 /* Add them up, and reset */
250 alloc += cache->uc_allocs;
251 cache->uc_allocs = 0;
252 if (cache->uc_allocbucket)
253 free += cache->uc_allocbucket->ub_ptr + 1;
254 if (cache->uc_freebucket)
255 free += cache->uc_freebucket->ub_ptr + 1;
256 CPU_UNLOCK(zone, cpu);
257 }
258 }
259
260 /* Now push these stats back into the zone.. */
261 ZONE_LOCK(zone);
262 zone->uz_allocs += alloc;
263
264 /*
265 * cachefree is an instantanious snapshot of what is in the per cpu
266 * caches, not an accurate counter
267 */
268 zone->uz_cachefree = free;
269
270 /*
271 * Expand the zone hash table.
272 *
273 * This is done if the number of slabs is larger than the hash size.
274 * What I'm trying to do here is completely reduce collisions. This
275 * may be a little aggressive. Should I allow for two collisions max?
276 */
277
278 if (zone->uz_flags & UMA_ZFLAG_HASH &&
279 zone->uz_pages / zone->uz_ppera >= zone->uz_hash.uh_hashsize) {
280 struct uma_hash newhash;
281 struct uma_hash oldhash;
282 int ret;
283
284 /*
285 * This is so involved because allocating and freeing
286 * while the zone lock is held will lead to deadlock.
287 * I have to do everything in stages and check for
288 * races.
289 */
290 newhash = zone->uz_hash;
291 ZONE_UNLOCK(zone);
292 ret = hash_alloc(&newhash);
293 ZONE_LOCK(zone);
294 if (ret) {
295 if (hash_expand(&zone->uz_hash, &newhash)) {
296 oldhash = zone->uz_hash;
297 zone->uz_hash = newhash;
298 } else
299 oldhash = newhash;
300
301 ZONE_UNLOCK(zone);
302 hash_free(&oldhash);
303 ZONE_LOCK(zone);
304 }
305 }
306
307 /*
308 * Here we compute the working set size as the total number of items
309 * left outstanding since the last time interval. This is slightly
310 * suboptimal. What we really want is the highest number of outstanding
311 * items during the last time quantum. This should be close enough.
312 *
313 * The working set size is used to throttle the zone_drain function.
314 * We don't want to return memory that we may need again immediately.
315 */
316 alloc = zone->uz_allocs - zone->uz_oallocs;
317 zone->uz_oallocs = zone->uz_allocs;
318 zone->uz_wssize = alloc;
319
320 ZONE_UNLOCK(zone);
321}
322
323/*
324 * Allocate and zero fill the next sized hash table from the appropriate
325 * backing store.
326 *
327 * Arguments:
328 * hash A new hash structure with the old hash size in uh_hashsize
329 *
330 * Returns:
331 * 1 on sucess and 0 on failure.
332 */
333int
334hash_alloc(struct uma_hash *hash)
335{
336 int oldsize;
337 int alloc;
338
339 oldsize = hash->uh_hashsize;
340
341 /* We're just going to go to a power of two greater */
342 if (oldsize) {
343 hash->uh_hashsize = oldsize * 2;
344 alloc = sizeof(hash->uh_slab_hash[0]) * hash->uh_hashsize;
345 /* XXX Shouldn't be abusing DEVBUF here */
346 hash->uh_slab_hash = (struct slabhead *)malloc(alloc,
347 M_DEVBUF, M_NOWAIT);
348 } else {
349 alloc = sizeof(hash->uh_slab_hash[0]) * UMA_HASH_SIZE_INIT;
350 hash->uh_slab_hash = uma_zalloc_internal(hashzone, NULL,
351 M_WAITOK, NULL);
352 hash->uh_hashsize = UMA_HASH_SIZE_INIT;
353 }
354 if (hash->uh_slab_hash) {
355 bzero(hash->uh_slab_hash, alloc);
356 hash->uh_hashmask = hash->uh_hashsize - 1;
357 return (1);
358 }
359
360 return (0);
361}
362
363/*
364 * Expands the hash table for OFFPAGE zones. This is done from zone_timeout
365 * to reduce collisions. This must not be done in the regular allocation path,
366 * otherwise, we can recurse on the vm while allocating pages.
367 *
368 * Arguments:
369 * oldhash The hash you want to expand
370 * newhash The hash structure for the new table
371 *
372 * Returns:
373 * Nothing
374 *
375 * Discussion:
376 */
377static int
378hash_expand(struct uma_hash *oldhash, struct uma_hash *newhash)
379{
380 uma_slab_t slab;
381 int hval;
382 int i;
383
384 if (!newhash->uh_slab_hash)
385 return (0);
386
387 if (oldhash->uh_hashsize >= newhash->uh_hashsize)
388 return (0);
389
390 /*
391 * I need to investigate hash algorithms for resizing without a
392 * full rehash.
393 */
394
395 for (i = 0; i < oldhash->uh_hashsize; i++)
396 while (!SLIST_EMPTY(&oldhash->uh_slab_hash[i])) {
397 slab = SLIST_FIRST(&oldhash->uh_slab_hash[i]);
398 SLIST_REMOVE_HEAD(&oldhash->uh_slab_hash[i], us_hlink);
399 hval = UMA_HASH(newhash, slab->us_data);
400 SLIST_INSERT_HEAD(&newhash->uh_slab_hash[hval],
401 slab, us_hlink);
402 }
403
404 return (1);
405}
406
407/*
408 * Free the hash bucket to the appropriate backing store.
409 *
410 * Arguments:
411 * slab_hash The hash bucket we're freeing
412 * hashsize The number of entries in that hash bucket
413 *
414 * Returns:
415 * Nothing
416 */
417static void
418hash_free(struct uma_hash *hash)
419{
420 if (hash->uh_slab_hash == NULL)
421 return;
422 if (hash->uh_hashsize == UMA_HASH_SIZE_INIT)
423 uma_zfree_internal(hashzone,
424 hash->uh_slab_hash, NULL, 0);
425 else
426 free(hash->uh_slab_hash, M_DEVBUF);
427}
428
429/*
430 * Frees all outstanding items in a bucket
431 *
432 * Arguments:
433 * zone The zone to free to, must be unlocked.
434 * bucket The free/alloc bucket with items, cpu queue must be locked.
435 *
436 * Returns:
437 * Nothing
438 */
439
440static void
441bucket_drain(uma_zone_t zone, uma_bucket_t bucket)
442{
443 uma_slab_t slab;
444 int mzone;
445 void *item;
446
447 if (bucket == NULL)
448 return;
449
450 slab = NULL;
451 mzone = 0;
452
453 /* We have to lookup the slab again for malloc.. */
454 if (zone->uz_flags & UMA_ZFLAG_MALLOC)
455 mzone = 1;
456
457 while (bucket->ub_ptr > -1) {
458 item = bucket->ub_bucket[bucket->ub_ptr];
459#ifdef INVARIANTS
460 bucket->ub_bucket[bucket->ub_ptr] = NULL;
461 KASSERT(item != NULL,
462 ("bucket_drain: botched ptr, item is NULL"));
463#endif
464 bucket->ub_ptr--;
465 /*
466 * This is extremely inefficient. The slab pointer was passed
467 * to uma_zfree_arg, but we lost it because the buckets don't
468 * hold them. This will go away when free() gets a size passed
469 * to it.
470 */
471 if (mzone)
472 slab = vtoslab((vm_offset_t)item & (~UMA_SLAB_MASK));
473 uma_zfree_internal(zone, item, slab, 1);
474 }
475}
476
477/*
478 * Drains the per cpu caches for a zone.
479 *
480 * Arguments:
481 * zone The zone to drain, must be unlocked.
482 *
483 * Returns:
484 * Nothing
485 *
486 * This function returns with the zone locked so that the per cpu queues can
487 * not be filled until zone_drain is finished.
488 *
489 */
490static void
491cache_drain(uma_zone_t zone)
492{
493 uma_bucket_t bucket;
494 uma_cache_t cache;
495 int cpu;
496
497 /*
498 * Flush out the per cpu queues.
499 *
500 * XXX This causes unnecessary thrashing due to immediately having
501 * empty per cpu queues. I need to improve this.
502 */
503
504 /*
505 * We have to lock each cpu cache before locking the zone
506 */
507 ZONE_UNLOCK(zone);
508
509 for (cpu = 0; cpu < maxcpu; cpu++) {
510 if (CPU_ABSENT(cpu))
511 continue;
512 CPU_LOCK(zone, cpu);
513 cache = &zone->uz_cpu[cpu];
514 bucket_drain(zone, cache->uc_allocbucket);
515 bucket_drain(zone, cache->uc_freebucket);
516 }
517
518 /*
519 * Drain the bucket queues and free the buckets, we just keep two per
520 * cpu (alloc/free).
521 */
522 ZONE_LOCK(zone);
523 while ((bucket = LIST_FIRST(&zone->uz_full_bucket)) != NULL) {
524 LIST_REMOVE(bucket, ub_link);
525 ZONE_UNLOCK(zone);
526 bucket_drain(zone, bucket);
527 uma_zfree_internal(bucketzone, bucket, NULL, 0);
528 ZONE_LOCK(zone);
529 }
530
531 /* Now we do the free queue.. */
532 while ((bucket = LIST_FIRST(&zone->uz_free_bucket)) != NULL) {
533 LIST_REMOVE(bucket, ub_link);
534 uma_zfree_internal(bucketzone, bucket, NULL, 0);
535 }
536
537 /* We unlock here, but they will all block until the zone is unlocked */
538 for (cpu = 0; cpu < maxcpu; cpu++) {
539 if (CPU_ABSENT(cpu))
540 continue;
541 CPU_UNLOCK(zone, cpu);
542 }
543
544 zone->uz_cachefree = 0;
545}
546
547/*
548 * Frees pages from a zone back to the system. This is done on demand from
549 * the pageout daemon.
550 *
551 * Arguments:
552 * zone The zone to free pages from
553 * all Should we drain all items?
554 *
555 * Returns:
556 * Nothing.
557 */
558static void
559zone_drain(uma_zone_t zone)
560{
561 struct slabhead freeslabs = {};
562 uma_slab_t slab;
563 uma_slab_t n;
564 u_int64_t extra;
565 u_int8_t flags;
566 u_int8_t *mem;
567 int i;
568
569 /*
570 * We don't want to take pages from staticly allocated zones at this
571 * time
572 */
573 if (zone->uz_flags & UMA_ZFLAG_NOFREE || zone->uz_freef == NULL)
574 return;
575
576 ZONE_LOCK(zone);
577
578 if (!(zone->uz_flags & UMA_ZFLAG_INTERNAL))
579 cache_drain(zone);
580
581 if (zone->uz_free < zone->uz_wssize)
582 goto finished;
583#ifdef UMA_DEBUG
584 printf("%s working set size: %llu free items: %u\n",
585 zone->uz_name, (unsigned long long)zone->uz_wssize, zone->uz_free);
586#endif
587 extra = zone->uz_free - zone->uz_wssize;
588 extra /= zone->uz_ipers;
589
590 /* extra is now the number of extra slabs that we can free */
591
592 if (extra == 0)
593 goto finished;
594
595 slab = LIST_FIRST(&zone->uz_free_slab);
596 while (slab && extra) {
597 n = LIST_NEXT(slab, us_link);
598
599 /* We have no where to free these to */
600 if (slab->us_flags & UMA_SLAB_BOOT) {
601 slab = n;
602 continue;
603 }
604
605 LIST_REMOVE(slab, us_link);
606 zone->uz_pages -= zone->uz_ppera;
607 zone->uz_free -= zone->uz_ipers;
608
609 if (zone->uz_flags & UMA_ZFLAG_HASH)
610 UMA_HASH_REMOVE(&zone->uz_hash, slab, slab->us_data);
611
612 SLIST_INSERT_HEAD(&freeslabs, slab, us_hlink);
613
614 slab = n;
615 extra--;
616 }
617finished:
618 ZONE_UNLOCK(zone);
619
620 while ((slab = SLIST_FIRST(&freeslabs)) != NULL) {
621 SLIST_REMOVE(&freeslabs, slab, uma_slab, us_hlink);
622 if (zone->uz_fini)
623 for (i = 0; i < zone->uz_ipers; i++)
624 zone->uz_fini(
625 slab->us_data + (zone->uz_rsize * i),
626 zone->uz_size);
627 flags = slab->us_flags;
628 mem = slab->us_data;
629
630 if (zone->uz_flags & UMA_ZFLAG_OFFPAGE)
631 uma_zfree_internal(slabzone, slab, NULL, 0);
632 if (zone->uz_flags & UMA_ZFLAG_MALLOC)
633 for (i = 0; i < zone->uz_ppera; i++)
634 vsetobj((vm_offset_t)mem + (i * PAGE_SIZE),
635 kmem_object);
636#ifdef UMA_DEBUG
637 printf("%s: Returning %d bytes.\n",
638 zone->uz_name, UMA_SLAB_SIZE * zone->uz_ppera);
639#endif
640 zone->uz_freef(mem, UMA_SLAB_SIZE * zone->uz_ppera, flags);
641 }
642
643}
644
645/*
646 * Allocate a new slab for a zone. This does not insert the slab onto a list.
647 *
648 * Arguments:
649 * zone The zone to allocate slabs for
650 * wait Shall we wait?
651 *
652 * Returns:
653 * The slab that was allocated or NULL if there is no memory and the
654 * caller specified M_NOWAIT.
655 *
656 */
657static uma_slab_t
658slab_zalloc(uma_zone_t zone, int wait)
659{
660 uma_slab_t slab; /* Starting slab */
661 u_int8_t *mem;
662 u_int8_t flags;
663 int i;
664
665 slab = NULL;
666
667#ifdef UMA_DEBUG
668 printf("slab_zalloc: Allocating a new slab for %s\n", zone->uz_name);
669#endif
670 ZONE_UNLOCK(zone);
671
672 if (zone->uz_flags & UMA_ZFLAG_OFFPAGE) {
673 slab = uma_zalloc_internal(slabzone, NULL, wait, NULL);
674 if (slab == NULL) {
675 ZONE_LOCK(zone);
676 return NULL;
677 }
678 }
679
680 /*
681 * This reproduces the old vm_zone behavior of zero filling pages the
682 * first time they are added to a zone.
683 *
684 * Malloced items are zeroed in uma_zalloc.
685 */
686
687 if ((zone->uz_flags & UMA_ZFLAG_MALLOC) == 0)
688 wait |= M_ZERO;
689 else
690 wait &= ~M_ZERO;
691
692 if (booted || (zone->uz_flags & UMA_ZFLAG_PRIVALLOC)) {
693 mtx_lock(&Giant);
694 mem = zone->uz_allocf(zone,
695 zone->uz_ppera * UMA_SLAB_SIZE, &flags, wait);
696 mtx_unlock(&Giant);
697 if (mem == NULL) {
698 ZONE_LOCK(zone);
699 return (NULL);
700 }
701 } else {
702 uma_slab_t tmps;
703
704 if (zone->uz_ppera > 1)
705 panic("UMA: Attemping to allocate multiple pages before vm has started.\n");
706 if (zone->uz_flags & UMA_ZFLAG_MALLOC)
707 panic("Mallocing before uma_startup2 has been called.\n");
708 if (uma_boot_free == 0)
709 panic("UMA: Ran out of pre init pages, increase UMA_BOOT_PAGES\n");
710 tmps = LIST_FIRST(&uma_boot_pages);
711 LIST_REMOVE(tmps, us_link);
712 uma_boot_free--;
713 mem = tmps->us_data;
714 }
715
716 /* Point the slab into the allocated memory */
717 if (!(zone->uz_flags & UMA_ZFLAG_OFFPAGE))
718 slab = (uma_slab_t )(mem + zone->uz_pgoff);
719
720 if (zone->uz_flags & UMA_ZFLAG_MALLOC)
721 for (i = 0; i < zone->uz_ppera; i++)
722 vsetslab((vm_offset_t)mem + (i * PAGE_SIZE), slab);
723
724 slab->us_zone = zone;
725 slab->us_data = mem;
726
727 /*
728 * This is intended to spread data out across cache lines.
729 *
730 * This code doesn't seem to work properly on x86, and on alpha
731 * it makes absolutely no performance difference. I'm sure it could
732 * use some tuning, but sun makes outrageous claims about it's
733 * performance.
734 */
735#if 0
736 if (zone->uz_cachemax) {
737 slab->us_data += zone->uz_cacheoff;
738 zone->uz_cacheoff += UMA_CACHE_INC;
739 if (zone->uz_cacheoff > zone->uz_cachemax)
740 zone->uz_cacheoff = 0;
741 }
742#endif
743
744 slab->us_freecount = zone->uz_ipers;
745 slab->us_firstfree = 0;
746 slab->us_flags = flags;
747 for (i = 0; i < zone->uz_ipers; i++)
748 slab->us_freelist[i] = i+1;
749
750 if (zone->uz_init)
751 for (i = 0; i < zone->uz_ipers; i++)
752 zone->uz_init(slab->us_data + (zone->uz_rsize * i),
753 zone->uz_size);
754 ZONE_LOCK(zone);
755
756 if (zone->uz_flags & UMA_ZFLAG_HASH)
757 UMA_HASH_INSERT(&zone->uz_hash, slab, mem);
758
759 zone->uz_pages += zone->uz_ppera;
760 zone->uz_free += zone->uz_ipers;
761
762
763 return (slab);
764}
765
766/*
767 * Allocates a number of pages from the system
768 *
769 * Arguments:
770 * zone Unused
771 * bytes The number of bytes requested
772 * wait Shall we wait?
773 *
774 * Returns:
775 * A pointer to the alloced memory or possibly
776 * NULL if M_NOWAIT is set.
777 */
778static void *
779page_alloc(uma_zone_t zone, int bytes, u_int8_t *pflag, int wait)
780{
781 void *p; /* Returned page */
782
783 *pflag = UMA_SLAB_KMEM;
784 p = (void *) kmem_malloc(kmem_map, bytes, wait);
785
786 return (p);
787}
788
789/*
790 * Allocates a number of pages from within an object
791 *
792 * Arguments:
793 * zone Unused
794 * bytes The number of bytes requested
795 * wait Shall we wait?
796 *
797 * Returns:
798 * A pointer to the alloced memory or possibly
799 * NULL if M_NOWAIT is set.
800 *
801 * TODO: If we fail during a multi-page allocation release the pages that have
802 * already been allocated.
803 */
804static void *
805obj_alloc(uma_zone_t zone, int bytes, u_int8_t *flags, int wait)
806{
807 vm_offset_t zkva;
808 vm_offset_t retkva;
809 vm_page_t p;
810 int pages;
811
812 retkva = 0;
813 pages = zone->uz_pages;
814
815 /*
816 * This looks a little weird since we're getting one page at a time
817 */
818 while (bytes > 0) {
819 p = vm_page_alloc(zone->uz_obj, pages,
820 VM_ALLOC_INTERRUPT);
821 if (p == NULL)
822 return (NULL);
823
824 zkva = zone->uz_kva + pages * PAGE_SIZE;
825 if (retkva == 0)
826 retkva = zkva;
827 pmap_qenter(zkva, &p, 1);
828 bytes -= PAGE_SIZE;
829 pages += 1;
830 }
831
832 *flags = UMA_SLAB_PRIV;
833
834 return ((void *)retkva);
835}
836
837/*
838 * Frees a number of pages to the system
839 *
840 * Arguments:
841 * mem A pointer to the memory to be freed
842 * size The size of the memory being freed
843 * flags The original p->us_flags field
844 *
845 * Returns:
846 * Nothing
847 *
848 */
849static void
850page_free(void *mem, int size, u_int8_t flags)
851{
852 vm_map_t map;
853
854 if (flags & UMA_SLAB_KMEM)
855 map = kmem_map;
856 else
857 panic("UMA: page_free used with invalid flags %d\n", flags);
858
859 kmem_free(map, (vm_offset_t)mem, size);
860}
861
862/*
863 * Zero fill initializer
864 *
865 * Arguments/Returns follow uma_init specifications
866 *
867 */
868static void
869zero_init(void *mem, int size)
870{
871 bzero(mem, size);
872}
873
874/*
875 * Finish creating a small uma zone. This calculates ipers, and the zone size.
876 *
877 * Arguments
878 * zone The zone we should initialize
879 *
880 * Returns
881 * Nothing
882 */
883static void
884zone_small_init(uma_zone_t zone)
885{
886 int rsize;
887 int memused;
888 int ipers;
889
890 rsize = zone->uz_size;
891
892 if (rsize < UMA_SMALLEST_UNIT)
893 rsize = UMA_SMALLEST_UNIT;
894
895 if (rsize & zone->uz_align)
896 rsize = (rsize & ~zone->uz_align) + (zone->uz_align + 1);
897
898 zone->uz_rsize = rsize;
899
900 rsize += 1; /* Account for the byte of linkage */
901 zone->uz_ipers = (UMA_SLAB_SIZE - sizeof(struct uma_slab)) / rsize;
902 zone->uz_ppera = 1;
903
904 memused = zone->uz_ipers * zone->uz_rsize;
905
906 /* Can we do any better? */
907 if ((UMA_SLAB_SIZE - memused) >= UMA_MAX_WASTE) {
908 if (zone->uz_flags & UMA_ZFLAG_INTERNAL)
909 return;
910 ipers = UMA_SLAB_SIZE / zone->uz_rsize;
911 if (ipers > zone->uz_ipers) {
912 zone->uz_flags |= UMA_ZFLAG_OFFPAGE;
913 if ((zone->uz_flags & UMA_ZFLAG_MALLOC) == 0)
914 zone->uz_flags |= UMA_ZFLAG_HASH;
915 zone->uz_ipers = ipers;
916 }
917 }
918
919}
920
921/*
922 * Finish creating a large (> UMA_SLAB_SIZE) uma zone. Just give in and do
923 * OFFPAGE for now. When I can allow for more dynamic slab sizes this will be
924 * more complicated.
925 *
926 * Arguments
927 * zone The zone we should initialize
928 *
929 * Returns
930 * Nothing
931 */
932static void
933zone_large_init(uma_zone_t zone)
934{
935 int pages;
936
937 pages = zone->uz_size / UMA_SLAB_SIZE;
938
939 /* Account for remainder */
940 if ((pages * UMA_SLAB_SIZE) < zone->uz_size)
941 pages++;
942
943 zone->uz_ppera = pages;
944 zone->uz_ipers = 1;
945
946 zone->uz_flags |= UMA_ZFLAG_OFFPAGE;
947 if ((zone->uz_flags & UMA_ZFLAG_MALLOC) == 0)
948 zone->uz_flags |= UMA_ZFLAG_HASH;
949
950 zone->uz_rsize = zone->uz_size;
951}
952
953/*
954 * Zone header ctor. This initializes all fields, locks, etc. And inserts
955 * the zone onto the global zone list.
956 *
957 * Arguments/Returns follow uma_ctor specifications
958 * udata Actually uma_zcreat_args
959 *
960 */
961
962static void
963zone_ctor(void *mem, int size, void *udata)
964{
965 struct uma_zctor_args *arg = udata;
966 uma_zone_t zone = mem;
967 int privlc;
968 int cplen;
969 int cpu;
970
971 bzero(zone, size);
972 zone->uz_name = arg->name;
973 zone->uz_size = arg->size;
974 zone->uz_ctor = arg->ctor;
975 zone->uz_dtor = arg->dtor;
976 zone->uz_init = arg->uminit;
977 zone->uz_fini = arg->fini;
978 zone->uz_align = arg->align;
979 zone->uz_free = 0;
980 zone->uz_pages = 0;
981 zone->uz_flags = 0;
982 zone->uz_allocf = page_alloc;
983 zone->uz_freef = page_free;
984
985 if (arg->flags & UMA_ZONE_ZINIT)
986 zone->uz_init = zero_init;
987
988 if (arg->flags & UMA_ZONE_INTERNAL)
989 zone->uz_flags |= UMA_ZFLAG_INTERNAL;
990
991 if (arg->flags & UMA_ZONE_MALLOC)
992 zone->uz_flags |= UMA_ZFLAG_MALLOC;
993
994 if (arg->flags & UMA_ZONE_NOFREE)
995 zone->uz_flags |= UMA_ZFLAG_NOFREE;
996
997 if (arg->flags & UMA_ZONE_VM)
998 zone->uz_flags |= UMA_ZFLAG_BUCKETCACHE;
999
1000 if (zone->uz_size > UMA_SLAB_SIZE)
1001 zone_large_init(zone);
1002 else
1003 zone_small_init(zone);
1004
1005 if (arg->flags & UMA_ZONE_MTXCLASS)
1006 privlc = 1;
1007 else
1008 privlc = 0;
1009
1010 /* We do this so that the per cpu lock name is unique for each zone */
1011 memcpy(zone->uz_lname, "PCPU ", 5);
1012 cplen = min(strlen(zone->uz_name) + 1, LOCKNAME_LEN - 6);
1013 memcpy(zone->uz_lname+5, zone->uz_name, cplen);
1014 zone->uz_lname[LOCKNAME_LEN - 1] = '\0';
1015
1016 /*
1017 * If we're putting the slab header in the actual page we need to
1018 * figure out where in each page it goes. This calculates a right
1019 * justified offset into the memory on a ALIGN_PTR boundary.
1020 */
1021 if (!(zone->uz_flags & UMA_ZFLAG_OFFPAGE)) {
1022 int totsize;
1023 int waste;
1024
1025 /* Size of the slab struct and free list */
1026 totsize = sizeof(struct uma_slab) + zone->uz_ipers;
1027 if (totsize & UMA_ALIGN_PTR)
1028 totsize = (totsize & ~UMA_ALIGN_PTR) +
1029 (UMA_ALIGN_PTR + 1);
1030 zone->uz_pgoff = UMA_SLAB_SIZE - totsize;
1031
1032 waste = zone->uz_pgoff;
1033 waste -= (zone->uz_ipers * zone->uz_rsize);
1034
1035 /*
1036 * This calculates how much space we have for cache line size
1037 * optimizations. It works by offseting each slab slightly.
1038 * Currently it breaks on x86, and so it is disabled.
1039 */
1040
1041 if (zone->uz_align < UMA_CACHE_INC && waste > UMA_CACHE_INC) {
1042 zone->uz_cachemax = waste - UMA_CACHE_INC;
1043 zone->uz_cacheoff = 0;
1044 }
1045
1046 totsize = zone->uz_pgoff + sizeof(struct uma_slab)
1047 + zone->uz_ipers;
1048 /* I don't think it's possible, but I'll make sure anyway */
1049 if (totsize > UMA_SLAB_SIZE) {
1050 printf("zone %s ipers %d rsize %d size %d\n",
1051 zone->uz_name, zone->uz_ipers, zone->uz_rsize,
1052 zone->uz_size);
1053 panic("UMA slab won't fit.\n");
1054 }
1055 }
1056
1057 if (zone->uz_flags & UMA_ZFLAG_HASH)
1058 hash_alloc(&zone->uz_hash);
1059
1060#ifdef UMA_DEBUG
1061 printf("%s(%p) size = %d ipers = %d ppera = %d pgoff = %d\n",
1062 zone->uz_name, zone,
1063 zone->uz_size, zone->uz_ipers,
1064 zone->uz_ppera, zone->uz_pgoff);
1065#endif
1066 ZONE_LOCK_INIT(zone, privlc);
1067
1068 mtx_lock(&uma_mtx);
1069 LIST_INSERT_HEAD(&uma_zones, zone, uz_link);
1070 mtx_unlock(&uma_mtx);
1071
1072 /*
1073 * Some internal zones don't have room allocated for the per cpu
1074 * caches. If we're internal, bail out here.
1075 */
1076
1077 if (zone->uz_flags & UMA_ZFLAG_INTERNAL)
1078 return;
1079
1080 if (zone->uz_ipers < UMA_BUCKET_SIZE)
1081 zone->uz_count = zone->uz_ipers - 1;
1082 else
1083 zone->uz_count = UMA_BUCKET_SIZE - 1;
1084
1085 for (cpu = 0; cpu < maxcpu; cpu++)
1086 CPU_LOCK_INIT(zone, cpu, privlc);
1087}
1088
1089/*
1090 * Zone header dtor. This frees all data, destroys locks, frees the hash table
1091 * and removes the zone from the global list.
1092 *
1093 * Arguments/Returns follow uma_dtor specifications
1094 * udata unused
1095 */
1096
1097static void
1098zone_dtor(void *arg, int size, void *udata)
1099{
1100 uma_zone_t zone;
1101 int cpu;
1102
1103 zone = (uma_zone_t)arg;
1104
1105 ZONE_LOCK(zone);
1106 zone->uz_wssize = 0;
1107 ZONE_UNLOCK(zone);
1108
1109 mtx_lock(&uma_mtx);
1110 LIST_REMOVE(zone, uz_link);
1111 zone_drain(zone);
1112 mtx_unlock(&uma_mtx);
1113
1114 ZONE_LOCK(zone);
1115 if (zone->uz_free != 0)
1116 printf("Zone %s was not empty. Lost %d pages of memory.\n",
1117 zone->uz_name, zone->uz_pages);
1118
1119 if ((zone->uz_flags & UMA_ZFLAG_INTERNAL) == 0)
1120 for (cpu = 0; cpu < maxcpu; cpu++)
1121 CPU_LOCK_FINI(zone, cpu);
1122
1123 ZONE_UNLOCK(zone);
1124 if ((zone->uz_flags & UMA_ZFLAG_OFFPAGE) != 0)
1125 hash_free(&zone->uz_hash);
1126
1127 ZONE_LOCK_FINI(zone);
1128}
1129/*
1130 * Traverses every zone in the system and calls a callback
1131 *
1132 * Arguments:
1133 * zfunc A pointer to a function which accepts a zone
1134 * as an argument.
1135 *
1136 * Returns:
1137 * Nothing
1138 */
1139static void
1140zone_foreach(void (*zfunc)(uma_zone_t))
1141{
1142 uma_zone_t zone;
1143
1144 mtx_lock(&uma_mtx);
1145 LIST_FOREACH(zone, &uma_zones, uz_link) {
1146 zfunc(zone);
1147 }
1148 mtx_unlock(&uma_mtx);
1149}
1150
1151/* Public functions */
1152/* See uma.h */
1153void
1154uma_startup(void *bootmem)
1155{
1156 struct uma_zctor_args args;
1157 uma_slab_t slab;
1158 int slabsize;
1159 int i;
1160
1161#ifdef UMA_DEBUG
1162 printf("Creating uma zone headers zone.\n");
1163#endif
1164#ifdef SMP
1165 maxcpu = mp_maxid + 1;
1166#else
1167 maxcpu = 1;
1168#endif
1169#ifdef UMA_DEBUG
1170 printf("Max cpu = %d, mp_maxid = %d\n", maxcpu, mp_maxid);
1171 Debugger("stop");
1172#endif
1173 mtx_init(&uma_mtx, "UMA lock", NULL, MTX_DEF);
1174 /* "manually" Create the initial zone */
1175 args.name = "UMA Zones";
1176 args.size = sizeof(struct uma_zone) +
1177 (sizeof(struct uma_cache) * (maxcpu - 1));
1178 args.ctor = zone_ctor;
1179 args.dtor = zone_dtor;
1180 args.uminit = zero_init;
1181 args.fini = NULL;
1182 args.align = 32 - 1;
1183 args.flags = UMA_ZONE_INTERNAL;
1184 /* The initial zone has no Per cpu queues so it's smaller */
1185 zone_ctor(zones, sizeof(struct uma_zone), &args);
1186
1187#ifdef UMA_DEBUG
1188 printf("Filling boot free list.\n");
1189#endif
1190 for (i = 0; i < UMA_BOOT_PAGES; i++) {
1191 slab = (uma_slab_t)((u_int8_t *)bootmem + (i * UMA_SLAB_SIZE));
1192 slab->us_data = (u_int8_t *)slab;
1193 slab->us_flags = UMA_SLAB_BOOT;
1194 LIST_INSERT_HEAD(&uma_boot_pages, slab, us_link);
1195 uma_boot_free++;
1196 }
1197
1198#ifdef UMA_DEBUG
1199 printf("Creating slab zone.\n");
1200#endif
1201
1202 /*
1203 * This is the max number of free list items we'll have with
1204 * offpage slabs.
1205 */
1206
1207 slabsize = UMA_SLAB_SIZE - sizeof(struct uma_slab);
1208 slabsize /= UMA_MAX_WASTE;
1209 slabsize++; /* In case there it's rounded */
1210 slabsize += sizeof(struct uma_slab);
1211
1212 /* Now make a zone for slab headers */
1213 slabzone = uma_zcreate("UMA Slabs",
1214 slabsize,
1215 NULL, NULL, NULL, NULL,
1216 UMA_ALIGN_PTR, UMA_ZONE_INTERNAL);
1217
1218 hashzone = uma_zcreate("UMA Hash",
1219 sizeof(struct slabhead *) * UMA_HASH_SIZE_INIT,
1220 NULL, NULL, NULL, NULL,
1221 UMA_ALIGN_PTR, UMA_ZONE_INTERNAL);
1222
1223 bucketzone = uma_zcreate("UMA Buckets", sizeof(struct uma_bucket),
1224 NULL, NULL, NULL, NULL,
1225 UMA_ALIGN_PTR, UMA_ZONE_INTERNAL);
1226
1227
1228#ifdef UMA_DEBUG
1229 printf("UMA startup complete.\n");
1230#endif
1231}
1232
1233/* see uma.h */
1234void
1235uma_startup2(void)
1236{
1237 booted = 1;
1238 bucket_enable();
1239#ifdef UMA_DEBUG
1240 printf("UMA startup2 complete.\n");
1241#endif
1242}
1243
1244/*
1245 * Initialize our callout handle
1246 *
1247 */
1248
1249static void
1250uma_startup3(void)
1251{
1252#ifdef UMA_DEBUG
1253 printf("Starting callout.\n");
1254#endif
1255 callout_init(&uma_callout, 0);
1256 callout_reset(&uma_callout, UMA_WORKING_TIME * hz, uma_timeout, NULL);
1257#ifdef UMA_DEBUG
1258 printf("UMA startup3 complete.\n");
1259#endif
1260}
1261
1262/* See uma.h */
1263uma_zone_t
1264uma_zcreate(char *name, size_t size, uma_ctor ctor, uma_dtor dtor,
1265 uma_init uminit, uma_fini fini, int align, u_int16_t flags)
1266
1267{
1268 struct uma_zctor_args args;
1269
1270 /* This stuff is essential for the zone ctor */
1271 args.name = name;
1272 args.size = size;
1273 args.ctor = ctor;
1274 args.dtor = dtor;
1275 args.uminit = uminit;
1276 args.fini = fini;
1277 args.align = align;
1278 args.flags = flags;
1279
1280 return (uma_zalloc_internal(zones, &args, M_WAITOK, NULL));
1281}
1282
1283/* See uma.h */
1284void
1285uma_zdestroy(uma_zone_t zone)
1286{
1287 uma_zfree_internal(zones, zone, NULL, 0);
1288}
1289
1290/* See uma.h */
1291void *
1292uma_zalloc_arg(uma_zone_t zone, void *udata, int flags)
1293{
1294 void *item;
1295 uma_cache_t cache;
1296 uma_bucket_t bucket;
1297 int cpu;
1298
1299 /* This is the fast path allocation */
1300#ifdef UMA_DEBUG_ALLOC_1
1301 printf("Allocating one item from %s(%p)\n", zone->uz_name, zone);
1302#endif
1303
1304 if (!(flags & M_NOWAIT)) {
1305 KASSERT(curthread->td_intr_nesting_level == 0,
1306 ("malloc(M_WAITOK) in interrupt context"));
1307 WITNESS_SLEEP(1, NULL);
1308 }
1309
1310zalloc_restart:
1311 cpu = PCPU_GET(cpuid);
1312 CPU_LOCK(zone, cpu);
1313 cache = &zone->uz_cpu[cpu];
1314
1315zalloc_start:
1316 bucket = cache->uc_allocbucket;
1317
1318 if (bucket) {
1319 if (bucket->ub_ptr > -1) {
1320 item = bucket->ub_bucket[bucket->ub_ptr];
1321#ifdef INVARIANTS
1322 bucket->ub_bucket[bucket->ub_ptr] = NULL;
1323#endif
1324 bucket->ub_ptr--;
1325 KASSERT(item != NULL,
1326 ("uma_zalloc: Bucket pointer mangled."));
1327 cache->uc_allocs++;
1328#ifdef INVARIANTS
1329 uma_dbg_alloc(zone, NULL, item);
1330#endif
1331 CPU_UNLOCK(zone, cpu);
1332 if (zone->uz_ctor)
1333 zone->uz_ctor(item, zone->uz_size, udata);
1334 if (flags & M_ZERO)
1335 bzero(item, zone->uz_size);
1336 return (item);
1337 } else if (cache->uc_freebucket) {
1338 /*
1339 * We have run out of items in our allocbucket.
1340 * See if we can switch with our free bucket.
1341 */
1342 if (cache->uc_freebucket->ub_ptr > -1) {
1343 uma_bucket_t swap;
1344
1345#ifdef UMA_DEBUG_ALLOC
1346 printf("uma_zalloc: Swapping empty with alloc.\n");
1347#endif
1348 swap = cache->uc_freebucket;
1349 cache->uc_freebucket = cache->uc_allocbucket;
1350 cache->uc_allocbucket = swap;
1351
1352 goto zalloc_start;
1353 }
1354 }
1355 }
1356 ZONE_LOCK(zone);
1357 /* Since we have locked the zone we may as well send back our stats */
1358 zone->uz_allocs += cache->uc_allocs;
1359 cache->uc_allocs = 0;
1360
1361 /* Our old one is now a free bucket */
1362 if (cache->uc_allocbucket) {
1363 KASSERT(cache->uc_allocbucket->ub_ptr == -1,
1364 ("uma_zalloc_arg: Freeing a non free bucket."));
1365 LIST_INSERT_HEAD(&zone->uz_free_bucket,
1366 cache->uc_allocbucket, ub_link);
1367 cache->uc_allocbucket = NULL;
1368 }
1369
1370 /* Check the free list for a new alloc bucket */
1371 if ((bucket = LIST_FIRST(&zone->uz_full_bucket)) != NULL) {
1372 KASSERT(bucket->ub_ptr != -1,
1373 ("uma_zalloc_arg: Returning an empty bucket."));
1374
1375 LIST_REMOVE(bucket, ub_link);
1376 cache->uc_allocbucket = bucket;
1377 ZONE_UNLOCK(zone);
1378 goto zalloc_start;
1379 }
1380 /* Bump up our uz_count so we get here less */
1381 if (zone->uz_count < UMA_BUCKET_SIZE - 1)
1382 zone->uz_count++;
1383
1384 /* We are no longer associated with this cpu!!! */
1385 CPU_UNLOCK(zone, cpu);
1386
1387 /*
1388 * Now lets just fill a bucket and put it on the free list. If that
1389 * works we'll restart the allocation from the begining.
1390 *
1391 * Try this zone's free list first so we don't allocate extra buckets.
1392 */
1393
1394 if ((bucket = LIST_FIRST(&zone->uz_free_bucket)) != NULL)
1395 LIST_REMOVE(bucket, ub_link);
1396
1397 /* Now we no longer need the zone lock. */
1398 ZONE_UNLOCK(zone);
1399
1400 if (bucket == NULL) {
1401 int bflags;
1402
1403 bflags = flags;
1404 if (zone->uz_flags & UMA_ZFLAG_BUCKETCACHE)
1405 bflags |= M_NOVM;
1406
1407 bucket = uma_zalloc_internal(bucketzone,
1408 NULL, bflags, NULL);
1409 }
1410
1411 if (bucket != NULL) {
1412#ifdef INVARIANTS
1413 bzero(bucket, bucketzone->uz_size);
1414#endif
1415 bucket->ub_ptr = -1;
1416
1417 if (uma_zalloc_internal(zone, udata, flags, bucket))
1418 goto zalloc_restart;
1419 else
1420 uma_zfree_internal(bucketzone, bucket, NULL, 0);
1421 }
1422 /*
1423 * We may not get a bucket if we recurse, so
1424 * return an actual item.
1425 */
1426#ifdef UMA_DEBUG
1427 printf("uma_zalloc_arg: Bucketzone returned NULL\n");
1428#endif
1429
1430 return (uma_zalloc_internal(zone, udata, flags, NULL));
1431}
1432
1433/*
1434 * Allocates an item for an internal zone OR fills a bucket
1435 *
1436 * Arguments
1437 * zone The zone to alloc for.
1438 * udata The data to be passed to the constructor.
1439 * flags M_WAITOK, M_NOWAIT, M_ZERO.
1440 * bucket The bucket to fill or NULL
1441 *
1442 * Returns
1443 * NULL if there is no memory and M_NOWAIT is set
1444 * An item if called on an interal zone
1445 * Non NULL if called to fill a bucket and it was successful.
1446 *
1447 * Discussion:
1448 * This was much cleaner before it had to do per cpu caches. It is
1449 * complicated now because it has to handle the simple internal case, and
1450 * the more involved bucket filling and allocation.
1451 */
1452
1453static void *
1454uma_zalloc_internal(uma_zone_t zone, void *udata, int flags, uma_bucket_t bucket)
1455{
1456 uma_slab_t slab;
1457 u_int8_t freei;
1458 void *item;
1459
1460 item = NULL;
1461
1462 /*
1463 * This is to stop us from allocating per cpu buckets while we're
1464 * running out of UMA_BOOT_PAGES. Otherwise, we would exhaust the
1465 * boot pages.
1466 */
1467
1468 if (bucketdisable && zone == bucketzone)
1469 return (NULL);
1470
1471#ifdef UMA_DEBUG_ALLOC
1472 printf("INTERNAL: Allocating one item from %s(%p)\n", zone->uz_name, zone);
1473#endif
1474 ZONE_LOCK(zone);
1475
1476 /*
1477 * This code is here to limit the number of simultaneous bucket fills
1478 * for any given zone to the number of per cpu caches in this zone. This
1479 * is done so that we don't allocate more memory than we really need.
1480 */
1481
1482 if (bucket) {
1483#ifdef SMP
1484 if (zone->uz_fills >= mp_ncpus) {
1485#else
1486 if (zone->uz_fills > 1) {
1487#endif
1488 ZONE_UNLOCK(zone);
1489 return (NULL);
1490 }
1491
1492 zone->uz_fills++;
1493 }
1494
1495new_slab:
1496
1497 /* Find a slab with some space */
1498 if (zone->uz_free) {
1499 if (!LIST_EMPTY(&zone->uz_part_slab)) {
1500 slab = LIST_FIRST(&zone->uz_part_slab);
1501 } else {
1502 slab = LIST_FIRST(&zone->uz_free_slab);
1503 LIST_REMOVE(slab, us_link);
1504 LIST_INSERT_HEAD(&zone->uz_part_slab, slab, us_link);
1505 }
1506 } else {
1507 /*
1508 * This is to prevent us from recursively trying to allocate
1509 * buckets. The problem is that if an allocation forces us to
1510 * grab a new bucket we will call page_alloc, which will go off
1511 * and cause the vm to allocate vm_map_entries. If we need new
1512 * buckets there too we will recurse in kmem_alloc and bad
1513 * things happen. So instead we return a NULL bucket, and make
1514 * the code that allocates buckets smart enough to deal with it
1515 */
1516 if (zone == bucketzone && zone->uz_recurse != 0) {
1517 ZONE_UNLOCK(zone);
1518 return (NULL);
1519 }
1520 while (zone->uz_maxpages &&
1521 zone->uz_pages >= zone->uz_maxpages) {
1522 zone->uz_flags |= UMA_ZFLAG_FULL;
1523
1524 if (flags & M_WAITOK)
1525 msleep(zone, &zone->uz_lock, PVM, "zonelimit", 0);
1526 else
1527 goto alloc_fail;
1528
1529 goto new_slab;
1530 }
1531
1532 if (flags & M_NOVM)
1533 goto alloc_fail;
1534
1535 zone->uz_recurse++;
1536 slab = slab_zalloc(zone, flags);
1537 zone->uz_recurse--;
1538 /*
1539 * We might not have been able to get a slab but another cpu
1540 * could have while we were unlocked. If we did get a slab put
1541 * it on the partially used slab list. If not check the free
1542 * count and restart or fail accordingly.
1543 */
1544 if (slab)
1545 LIST_INSERT_HEAD(&zone->uz_part_slab, slab, us_link);
1546 else if (zone->uz_free == 0)
1547 goto alloc_fail;
1548 else
1549 goto new_slab;
1550 }
1551 /*
1552 * If this is our first time though put this guy on the list.
1553 */
1554 if (bucket != NULL && bucket->ub_ptr == -1)
1555 LIST_INSERT_HEAD(&zone->uz_full_bucket,
1556 bucket, ub_link);
1557
1558
1559 while (slab->us_freecount) {
1560 freei = slab->us_firstfree;
1561 slab->us_firstfree = slab->us_freelist[freei];
1562
1563 item = slab->us_data + (zone->uz_rsize * freei);
1564
1565 slab->us_freecount--;
1566 zone->uz_free--;
1567#ifdef INVARIANTS
1568 uma_dbg_alloc(zone, slab, item);
1569#endif
1570 if (bucket == NULL) {
1571 zone->uz_allocs++;
1572 break;
1573 }
1574 bucket->ub_bucket[++bucket->ub_ptr] = item;
1575
1576 /* Don't overfill the bucket! */
1577 if (bucket->ub_ptr == zone->uz_count)
1578 break;
1579 }
1580
1581 /* Move this slab to the full list */
1582 if (slab->us_freecount == 0) {
1583 LIST_REMOVE(slab, us_link);
1584 LIST_INSERT_HEAD(&zone->uz_full_slab, slab, us_link);
1585 }
1586
1587 if (bucket != NULL) {
1588 /* Try to keep the buckets totally full, but don't block */
1589 if (bucket->ub_ptr < zone->uz_count) {
1590 flags |= M_NOWAIT;
1591 flags &= ~M_WAITOK;
1592 goto new_slab;
1593 } else
1594 zone->uz_fills--;
1595 }
1596
1597 ZONE_UNLOCK(zone);
1598
1599 /* Only construct at this time if we're not filling a bucket */
1600 if (bucket == NULL) {
1601 if (zone->uz_ctor != NULL)
1602 zone->uz_ctor(item, zone->uz_size, udata);
1603 if (flags & M_ZERO)
1604 bzero(item, zone->uz_size);
1605 }
1606
1607 return (item);
1608
1609alloc_fail:
1610 if (bucket != NULL)
1611 zone->uz_fills--;
1612 ZONE_UNLOCK(zone);
1613
1614 if (bucket != NULL && bucket->ub_ptr != -1)
1615 return (bucket);
1616
1617 return (NULL);
1618}
1619
1620/* See uma.h */
1621void
1622uma_zfree_arg(uma_zone_t zone, void *item, void *udata)
1623{
1624 uma_cache_t cache;
1625 uma_bucket_t bucket;
1626 int bflags;
1627 int cpu;
1628
1629 /* This is the fast path free */
1630#ifdef UMA_DEBUG_ALLOC_1
1631 printf("Freeing item %p to %s(%p)\n", item, zone->uz_name, zone);
1632#endif
1633 /*
1634 * The race here is acceptable. If we miss it we'll just have to wait
1635 * a little longer for the limits to be reset.
1636 */
1637
1638 if (zone->uz_flags & UMA_ZFLAG_FULL)
1639 goto zfree_internal;
1640
1641zfree_restart:
1642 cpu = PCPU_GET(cpuid);
1643 CPU_LOCK(zone, cpu);
1644 cache = &zone->uz_cpu[cpu];
1645
1646zfree_start:
1647 bucket = cache->uc_freebucket;
1648
1649 if (bucket) {
1650 /*
1651 * Do we have room in our bucket? It is OK for this uz count
1652 * check to be slightly out of sync.
1653 */
1654
1655 if (bucket->ub_ptr < zone->uz_count) {
1656 bucket->ub_ptr++;
1657 KASSERT(bucket->ub_bucket[bucket->ub_ptr] == NULL,
1658 ("uma_zfree: Freeing to non free bucket index."));
1659 bucket->ub_bucket[bucket->ub_ptr] = item;
1660 if (zone->uz_dtor)
1661 zone->uz_dtor(item, zone->uz_size, udata);
1662#ifdef INVARIANTS
1663 if (zone->uz_flags & UMA_ZFLAG_MALLOC)
1664 uma_dbg_free(zone, udata, item);
1665 else
1666 uma_dbg_free(zone, NULL, item);
1667#endif
1668 CPU_UNLOCK(zone, cpu);
1669 return;
1670 } else if (cache->uc_allocbucket) {
1671#ifdef UMA_DEBUG_ALLOC
1672 printf("uma_zfree: Swapping buckets.\n");
1673#endif
1674 /*
1675 * We have run out of space in our freebucket.
1676 * See if we can switch with our alloc bucket.
1677 */
1678 if (cache->uc_allocbucket->ub_ptr <
1679 cache->uc_freebucket->ub_ptr) {
1680 uma_bucket_t swap;
1681
1682 swap = cache->uc_freebucket;
1683 cache->uc_freebucket = cache->uc_allocbucket;
1684 cache->uc_allocbucket = swap;
1685
1686 goto zfree_start;
1687 }
1688 }
1689 }
1690
1691 /*
1692 * We can get here for two reasons:
1693 *
1694 * 1) The buckets are NULL
1695 * 2) The alloc and free buckets are both somewhat full.
1696 *
1697 */
1698
1699 ZONE_LOCK(zone);
1700
1701 bucket = cache->uc_freebucket;
1702 cache->uc_freebucket = NULL;
1703
1704 /* Can we throw this on the zone full list? */
1705 if (bucket != NULL) {
1706#ifdef UMA_DEBUG_ALLOC
1707 printf("uma_zfree: Putting old bucket on the free list.\n");
1708#endif
1709 /* ub_ptr is pointing to the last free item */
1710 KASSERT(bucket->ub_ptr != -1,
1711 ("uma_zfree: Attempting to insert an empty bucket onto the full list.\n"));
1712 LIST_INSERT_HEAD(&zone->uz_full_bucket,
1713 bucket, ub_link);
1714 }
1715 if ((bucket = LIST_FIRST(&zone->uz_free_bucket)) != NULL) {
1716 LIST_REMOVE(bucket, ub_link);
1717 ZONE_UNLOCK(zone);
1718 cache->uc_freebucket = bucket;
1719 goto zfree_start;
1720 }
1721 /* We're done with this CPU now */
1722 CPU_UNLOCK(zone, cpu);
1723
1724 /* And the zone.. */
1725 ZONE_UNLOCK(zone);
1726
1727#ifdef UMA_DEBUG_ALLOC
1728 printf("uma_zfree: Allocating new free bucket.\n");
1729#endif
1730 bflags = M_NOWAIT;
1731
1732 if (zone->uz_flags & UMA_ZFLAG_BUCKETCACHE)
1733 bflags |= M_NOVM;
1734#ifdef INVARIANTS
1735 bflags |= M_ZERO;
1736#endif
1737 bucket = uma_zalloc_internal(bucketzone,
1738 NULL, bflags, NULL);
1739 if (bucket) {
1740 bucket->ub_ptr = -1;
1741 ZONE_LOCK(zone);
1742 LIST_INSERT_HEAD(&zone->uz_free_bucket,
1743 bucket, ub_link);
1744 ZONE_UNLOCK(zone);
1745 goto zfree_restart;
1746 }
1747
1748 /*
1749 * If nothing else caught this, we'll just do an internal free.
1750 */
1751
1752zfree_internal:
1753
1754 uma_zfree_internal(zone, item, udata, 0);
1755
1756 return;
1757
1758}
1759
1760/*
1761 * Frees an item to an INTERNAL zone or allocates a free bucket
1762 *
1763 * Arguments:
1764 * zone The zone to free to
1765 * item The item we're freeing
1766 * udata User supplied data for the dtor
1767 * skip Skip the dtor, it was done in uma_zfree_arg
1768 */
1769
1770static void
1771uma_zfree_internal(uma_zone_t zone, void *item, void *udata, int skip)
1772{
1773 uma_slab_t slab;
1774 u_int8_t *mem;
1775 u_int8_t freei;
1776
1777 ZONE_LOCK(zone);
1778
1779 if (!(zone->uz_flags & UMA_ZFLAG_MALLOC)) {
1780 mem = (u_int8_t *)((unsigned long)item & (~UMA_SLAB_MASK));
1781 if (zone->uz_flags & UMA_ZFLAG_HASH)
1782 slab = hash_sfind(&zone->uz_hash, mem);
1783 else {
1784 mem += zone->uz_pgoff;
1785 slab = (uma_slab_t)mem;
1786 }
1787 } else {
1788 slab = (uma_slab_t)udata;
1789 }
1790
1791 /* Do we need to remove from any lists? */
1792 if (slab->us_freecount+1 == zone->uz_ipers) {
1793 LIST_REMOVE(slab, us_link);
1794 LIST_INSERT_HEAD(&zone->uz_free_slab, slab, us_link);
1795 } else if (slab->us_freecount == 0) {
1796 LIST_REMOVE(slab, us_link);
1797 LIST_INSERT_HEAD(&zone->uz_part_slab, slab, us_link);
1798 }
1799
1800 /* Slab management stuff */
1801 freei = ((unsigned long)item - (unsigned long)slab->us_data)
1802 / zone->uz_rsize;
1803
1804#ifdef INVARIANTS
1805 if (!skip)
1806 uma_dbg_free(zone, slab, item);
1807#endif
1808
1809 slab->us_freelist[freei] = slab->us_firstfree;
1810 slab->us_firstfree = freei;
1811 slab->us_freecount++;
1812
1813 /* Zone statistics */
1814 zone->uz_free++;
1815
1816 if (!skip && zone->uz_dtor)
1817 zone->uz_dtor(item, zone->uz_size, udata);
1818
1819 if (zone->uz_flags & UMA_ZFLAG_FULL) {
1820 if (zone->uz_pages < zone->uz_maxpages)
1821 zone->uz_flags &= ~UMA_ZFLAG_FULL;
1822
1823 /* We can handle one more allocation */
1824 wakeup_one(&zone);
1825 }
1826
1827 ZONE_UNLOCK(zone);
1828}
1829
1830/* See uma.h */
1831void
1832uma_zone_set_max(uma_zone_t zone, int nitems)
1833{
1834 ZONE_LOCK(zone);
1835 if (zone->uz_ppera > 1)
1836 zone->uz_maxpages = nitems * zone->uz_ppera;
1837 else
1838 zone->uz_maxpages = nitems / zone->uz_ipers;
1839
1840 if (zone->uz_maxpages * zone->uz_ipers < nitems)
1841 zone->uz_maxpages++;
1842
1843 ZONE_UNLOCK(zone);
1844}
1845
1846/* See uma.h */
1847void
1848uma_zone_set_freef(uma_zone_t zone, uma_free freef)
1849{
1850 ZONE_LOCK(zone);
1851
1852 zone->uz_freef = freef;
1853
1854 ZONE_UNLOCK(zone);
1855}
1856
1857/* See uma.h */
1858void
1859uma_zone_set_allocf(uma_zone_t zone, uma_alloc allocf)
1860{
1861 ZONE_LOCK(zone);
1862
1863 zone->uz_flags |= UMA_ZFLAG_PRIVALLOC;
1864 zone->uz_allocf = allocf;
1865
1866 ZONE_UNLOCK(zone);
1867}
1868
1869/* See uma.h */
1870int
1871uma_zone_set_obj(uma_zone_t zone, struct vm_object *obj, int count)
1872{
1873 int pages;
1874 vm_offset_t kva;
1875
1876 mtx_lock(&Giant);
1877
1878 pages = count / zone->uz_ipers;
1879
1880 if (pages * zone->uz_ipers < count)
1881 pages++;
1882
1883 kva = kmem_alloc_pageable(kernel_map, pages * UMA_SLAB_SIZE);
1884
1885 if (kva == 0) {
1886 mtx_unlock(&Giant);
1887 return (0);
1888 }
1889
1890
1891 if (obj == NULL)
1892 obj = vm_object_allocate(OBJT_DEFAULT,
1893 pages);
1894 else
1895 _vm_object_allocate(OBJT_DEFAULT,
1896 pages, obj);
1897
1898 ZONE_LOCK(zone);
1899 zone->uz_kva = kva;
1900 zone->uz_obj = obj;
1901 zone->uz_maxpages = pages;
1902
1903 zone->uz_allocf = obj_alloc;
1904 zone->uz_flags |= UMA_ZFLAG_NOFREE | UMA_ZFLAG_PRIVALLOC;
1905
1906 ZONE_UNLOCK(zone);
1907 mtx_unlock(&Giant);
1908
1909 return (1);
1910}
1911
1912/* See uma.h */
1913void
1914uma_prealloc(uma_zone_t zone, int items)
1915{
1916 int slabs;
1917 uma_slab_t slab;
1918
1919 ZONE_LOCK(zone);
1920 slabs = items / zone->uz_ipers;
1921 if (slabs * zone->uz_ipers < items)
1922 slabs++;
1923
1924 while (slabs > 0) {
1925 slab = slab_zalloc(zone, M_WAITOK);
1926 LIST_INSERT_HEAD(&zone->uz_free_slab, slab, us_link);
1927 slabs--;
1928 }
1929 ZONE_UNLOCK(zone);
1930}
1931
1932/* See uma.h */
1933void
1934uma_reclaim(void)
1935{
1936 /*
1937 * You might think that the delay below would improve performance since
1938 * the allocator will give away memory that it may ask for immediately.
1939 * Really, it makes things worse, since cpu cycles are so much cheaper
1940 * than disk activity.
1941 */
1942#if 0
1943 static struct timeval tv = {0};
1944 struct timeval now;
1945 getmicrouptime(&now);
1946 if (now.tv_sec > tv.tv_sec + 30)
1947 tv = now;
1948 else
1949 return;
1950#endif
1951#ifdef UMA_DEBUG
1952 printf("UMA: vm asked us to release pages!\n");
1953#endif
1954 bucket_enable();
1955 zone_foreach(zone_drain);
1956
1957 /*
1958 * Some slabs may have been freed but this zone will be visited early
1959 * we visit again so that we can free pages that are empty once other
1960 * zones are drained. We have to do the same for buckets.
1961 */
1962 zone_drain(slabzone);
1963 zone_drain(bucketzone);
1964}
1965
1966void *
1967uma_large_malloc(int size, int wait)
1968{
1969 void *mem;
1970 uma_slab_t slab;
1971 u_int8_t flags;
1972
1973 slab = uma_zalloc_internal(slabzone, NULL, wait, NULL);
1974 if (slab == NULL)
1975 return (NULL);
1976
1977 mem = page_alloc(NULL, size, &flags, wait);
1978 if (mem) {
1979 vsetslab((vm_offset_t)mem, slab);
1980 slab->us_data = mem;
1981 slab->us_flags = flags | UMA_SLAB_MALLOC;
1982 slab->us_size = size;
1983 } else {
1984 uma_zfree_internal(slabzone, slab, NULL, 0);
1985 }
1986
1987
1988 return (mem);
1989}
1990
1991void
1992uma_large_free(uma_slab_t slab)
1993{
1994 vsetobj((vm_offset_t)slab->us_data, kmem_object);
1995 page_free(slab->us_data, slab->us_size, slab->us_flags);
1996 uma_zfree_internal(slabzone, slab, NULL, 0);
1997}
1998
1999void
2000uma_print_stats(void)
2001{
2002 zone_foreach(uma_print_zone);
2003}
2004
2005void
2006uma_print_zone(uma_zone_t zone)
2007{
2008 printf("%s(%p) size %d(%d) flags %d ipers %d ppera %d out %d free %d\n",
2009 zone->uz_name, zone, zone->uz_size, zone->uz_rsize, zone->uz_flags,
2010 zone->uz_ipers, zone->uz_ppera,
2011 (zone->uz_ipers * zone->uz_pages) - zone->uz_free, zone->uz_free);
2012}
2013
2014/*
2015 * Sysctl handler for vm.zone
2016 *
2017 * stolen from vm_zone.c
2018 */
2019static int
2020sysctl_vm_zone(SYSCTL_HANDLER_ARGS)
2021{
2022 int error, len, cnt;
2023 const int linesize = 128; /* conservative */
2024 int totalfree;
2025 char *tmpbuf, *offset;
2026 uma_zone_t z;
2027 char *p;
2028
2029 cnt = 0;
2030 mtx_lock(&uma_mtx);
2031 LIST_FOREACH(z, &uma_zones, uz_link)
2032 cnt++;
2033 mtx_unlock(&uma_mtx);
2034 MALLOC(tmpbuf, char *, (cnt == 0 ? 1 : cnt) * linesize,
2035 M_TEMP, M_WAITOK);
2036 len = snprintf(tmpbuf, linesize,
2037 "\nITEM SIZE LIMIT USED FREE REQUESTS\n\n");
2038 if (cnt == 0)
2039 tmpbuf[len - 1] = '\0';
2040 error = SYSCTL_OUT(req, tmpbuf, cnt == 0 ? len-1 : len);
2041 if (error || cnt == 0)
2042 goto out;
2043 offset = tmpbuf;
2044 mtx_lock(&uma_mtx);
2045 LIST_FOREACH(z, &uma_zones, uz_link) {
2046 if (cnt == 0) /* list may have changed size */
2047 break;
2048 ZONE_LOCK(z);
2049 totalfree = z->uz_free + z->uz_cachefree;
2050 len = snprintf(offset, linesize,
2051 "%-12.12s %6.6u, %8.8u, %6.6u, %6.6u, %8.8llu\n",
2052 z->uz_name, z->uz_size,
2053 z->uz_maxpages * z->uz_ipers,
2054 (z->uz_ipers * (z->uz_pages / z->uz_ppera)) - totalfree,
2055 totalfree,
2056 (unsigned long long)z->uz_allocs);
2057 ZONE_UNLOCK(z);
2058 for (p = offset + 12; p > offset && *p == ' '; --p)
2059 /* nothing */ ;
2060 p[1] = ':';
2061 cnt--;
2062 offset += len;
2063 }
2064 mtx_unlock(&uma_mtx);
2065 *offset++ = '\0';
2066 error = SYSCTL_OUT(req, tmpbuf, offset - tmpbuf);
2067out:
2068 FREE(tmpbuf, M_TEMP);
2069 return (error);
2070}
27 *
28 */
29
30/*
31 * uma_core.c Implementation of the Universal Memory allocator
32 *
33 * This allocator is intended to replace the multitude of similar object caches
34 * in the standard FreeBSD kernel. The intent is to be flexible as well as
35 * effecient. A primary design goal is to return unused memory to the rest of
36 * the system. This will make the system as a whole more flexible due to the
37 * ability to move memory to subsystems which most need it instead of leaving
38 * pools of reserved memory unused.
39 *
40 * The basic ideas stem from similar slab/zone based allocators whose algorithms
41 * are well known.
42 *
43 */
44
45/*
46 * TODO:
47 * - Improve memory usage for large allocations
48 * - Investigate cache size adjustments
49 */
50
51/* I should really use ktr.. */
52/*
53#define UMA_DEBUG 1
54#define UMA_DEBUG_ALLOC 1
55#define UMA_DEBUG_ALLOC_1 1
56*/
57
58
59#include "opt_param.h"
60#include <sys/param.h>
61#include <sys/systm.h>
62#include <sys/kernel.h>
63#include <sys/types.h>
64#include <sys/queue.h>
65#include <sys/malloc.h>
66#include <sys/lock.h>
67#include <sys/sysctl.h>
68#include <sys/mutex.h>
69#include <sys/proc.h>
70#include <sys/smp.h>
71#include <sys/vmmeter.h>
72
73#include <vm/vm.h>
74#include <vm/vm_object.h>
75#include <vm/vm_page.h>
76#include <vm/vm_param.h>
77#include <vm/vm_map.h>
78#include <vm/vm_kern.h>
79#include <vm/vm_extern.h>
80#include <vm/uma.h>
81#include <vm/uma_int.h>
82#include <vm/uma_dbg.h>
83
84/*
85 * This is the zone from which all zones are spawned. The idea is that even
86 * the zone heads are allocated from the allocator, so we use the bss section
87 * to bootstrap us.
88 */
89static struct uma_zone masterzone;
90static uma_zone_t zones = &masterzone;
91
92/* This is the zone from which all of uma_slab_t's are allocated. */
93static uma_zone_t slabzone;
94
95/*
96 * The initial hash tables come out of this zone so they can be allocated
97 * prior to malloc coming up.
98 */
99static uma_zone_t hashzone;
100
101/*
102 * Zone that buckets come from.
103 */
104static uma_zone_t bucketzone;
105
106/*
107 * Are we allowed to allocate buckets?
108 */
109static int bucketdisable = 1;
110
111/* Linked list of all zones in the system */
112static LIST_HEAD(,uma_zone) uma_zones = LIST_HEAD_INITIALIZER(&uma_zones);
113
114/* This mutex protects the zone list */
115static struct mtx uma_mtx;
116
117/* Linked list of boot time pages */
118static LIST_HEAD(,uma_slab) uma_boot_pages =
119 LIST_HEAD_INITIALIZER(&uma_boot_pages);
120
121/* Count of free boottime pages */
122static int uma_boot_free = 0;
123
124/* Is the VM done starting up? */
125static int booted = 0;
126
127/* This is the handle used to schedule our working set calculator */
128static struct callout uma_callout;
129
130/* This is mp_maxid + 1, for use while looping over each cpu */
131static int maxcpu;
132
133/*
134 * This structure is passed as the zone ctor arg so that I don't have to create
135 * a special allocation function just for zones.
136 */
137struct uma_zctor_args {
138 char *name;
139 size_t size;
140 uma_ctor ctor;
141 uma_dtor dtor;
142 uma_init uminit;
143 uma_fini fini;
144 int align;
145 u_int16_t flags;
146};
147
148/* Prototypes.. */
149
150static void *obj_alloc(uma_zone_t, int, u_int8_t *, int);
151static void *page_alloc(uma_zone_t, int, u_int8_t *, int);
152static void page_free(void *, int, u_int8_t);
153static uma_slab_t slab_zalloc(uma_zone_t, int);
154static void cache_drain(uma_zone_t);
155static void bucket_drain(uma_zone_t, uma_bucket_t);
156static void zone_drain(uma_zone_t);
157static void zone_ctor(void *, int, void *);
158static void zone_dtor(void *, int, void *);
159static void zero_init(void *, int);
160static void zone_small_init(uma_zone_t zone);
161static void zone_large_init(uma_zone_t zone);
162static void zone_foreach(void (*zfunc)(uma_zone_t));
163static void zone_timeout(uma_zone_t zone);
164static int hash_alloc(struct uma_hash *);
165static int hash_expand(struct uma_hash *, struct uma_hash *);
166static void hash_free(struct uma_hash *hash);
167static void uma_timeout(void *);
168static void uma_startup3(void);
169static void *uma_zalloc_internal(uma_zone_t, void *, int, uma_bucket_t);
170static void uma_zfree_internal(uma_zone_t, void *, void *, int);
171static void bucket_enable(void);
172void uma_print_zone(uma_zone_t);
173void uma_print_stats(void);
174static int sysctl_vm_zone(SYSCTL_HANDLER_ARGS);
175
176SYSCTL_OID(_vm, OID_AUTO, zone, CTLTYPE_STRING|CTLFLAG_RD,
177 NULL, 0, sysctl_vm_zone, "A", "Zone Info");
178SYSINIT(uma_startup3, SI_SUB_VM_CONF, SI_ORDER_SECOND, uma_startup3, NULL);
179
180/*
181 * This routine checks to see whether or not it's safe to enable buckets.
182 */
183
184static void
185bucket_enable(void)
186{
187 if (cnt.v_free_count < cnt.v_free_min)
188 bucketdisable = 1;
189 else
190 bucketdisable = 0;
191}
192
193
194/*
195 * Routine called by timeout which is used to fire off some time interval
196 * based calculations. (working set, stats, etc.)
197 *
198 * Arguments:
199 * arg Unused
200 *
201 * Returns:
202 * Nothing
203 */
204static void
205uma_timeout(void *unused)
206{
207 bucket_enable();
208 zone_foreach(zone_timeout);
209
210 /* Reschedule this event */
211 callout_reset(&uma_callout, UMA_WORKING_TIME * hz, uma_timeout, NULL);
212}
213
214/*
215 * Routine to perform timeout driven calculations. This does the working set
216 * as well as hash expanding, and per cpu statistics aggregation.
217 *
218 * Arguments:
219 * zone The zone to operate on
220 *
221 * Returns:
222 * Nothing
223 */
224static void
225zone_timeout(uma_zone_t zone)
226{
227 uma_cache_t cache;
228 u_int64_t alloc;
229 int free;
230 int cpu;
231
232 alloc = 0;
233 free = 0;
234
235 /*
236 * Aggregate per cpu cache statistics back to the zone.
237 *
238 * I may rewrite this to set a flag in the per cpu cache instead of
239 * locking. If the flag is not cleared on the next round I will have
240 * to lock and do it here instead so that the statistics don't get too
241 * far out of sync.
242 */
243 if (!(zone->uz_flags & UMA_ZFLAG_INTERNAL)) {
244 for (cpu = 0; cpu < maxcpu; cpu++) {
245 if (CPU_ABSENT(cpu))
246 continue;
247 CPU_LOCK(zone, cpu);
248 cache = &zone->uz_cpu[cpu];
249 /* Add them up, and reset */
250 alloc += cache->uc_allocs;
251 cache->uc_allocs = 0;
252 if (cache->uc_allocbucket)
253 free += cache->uc_allocbucket->ub_ptr + 1;
254 if (cache->uc_freebucket)
255 free += cache->uc_freebucket->ub_ptr + 1;
256 CPU_UNLOCK(zone, cpu);
257 }
258 }
259
260 /* Now push these stats back into the zone.. */
261 ZONE_LOCK(zone);
262 zone->uz_allocs += alloc;
263
264 /*
265 * cachefree is an instantanious snapshot of what is in the per cpu
266 * caches, not an accurate counter
267 */
268 zone->uz_cachefree = free;
269
270 /*
271 * Expand the zone hash table.
272 *
273 * This is done if the number of slabs is larger than the hash size.
274 * What I'm trying to do here is completely reduce collisions. This
275 * may be a little aggressive. Should I allow for two collisions max?
276 */
277
278 if (zone->uz_flags & UMA_ZFLAG_HASH &&
279 zone->uz_pages / zone->uz_ppera >= zone->uz_hash.uh_hashsize) {
280 struct uma_hash newhash;
281 struct uma_hash oldhash;
282 int ret;
283
284 /*
285 * This is so involved because allocating and freeing
286 * while the zone lock is held will lead to deadlock.
287 * I have to do everything in stages and check for
288 * races.
289 */
290 newhash = zone->uz_hash;
291 ZONE_UNLOCK(zone);
292 ret = hash_alloc(&newhash);
293 ZONE_LOCK(zone);
294 if (ret) {
295 if (hash_expand(&zone->uz_hash, &newhash)) {
296 oldhash = zone->uz_hash;
297 zone->uz_hash = newhash;
298 } else
299 oldhash = newhash;
300
301 ZONE_UNLOCK(zone);
302 hash_free(&oldhash);
303 ZONE_LOCK(zone);
304 }
305 }
306
307 /*
308 * Here we compute the working set size as the total number of items
309 * left outstanding since the last time interval. This is slightly
310 * suboptimal. What we really want is the highest number of outstanding
311 * items during the last time quantum. This should be close enough.
312 *
313 * The working set size is used to throttle the zone_drain function.
314 * We don't want to return memory that we may need again immediately.
315 */
316 alloc = zone->uz_allocs - zone->uz_oallocs;
317 zone->uz_oallocs = zone->uz_allocs;
318 zone->uz_wssize = alloc;
319
320 ZONE_UNLOCK(zone);
321}
322
323/*
324 * Allocate and zero fill the next sized hash table from the appropriate
325 * backing store.
326 *
327 * Arguments:
328 * hash A new hash structure with the old hash size in uh_hashsize
329 *
330 * Returns:
331 * 1 on sucess and 0 on failure.
332 */
333int
334hash_alloc(struct uma_hash *hash)
335{
336 int oldsize;
337 int alloc;
338
339 oldsize = hash->uh_hashsize;
340
341 /* We're just going to go to a power of two greater */
342 if (oldsize) {
343 hash->uh_hashsize = oldsize * 2;
344 alloc = sizeof(hash->uh_slab_hash[0]) * hash->uh_hashsize;
345 /* XXX Shouldn't be abusing DEVBUF here */
346 hash->uh_slab_hash = (struct slabhead *)malloc(alloc,
347 M_DEVBUF, M_NOWAIT);
348 } else {
349 alloc = sizeof(hash->uh_slab_hash[0]) * UMA_HASH_SIZE_INIT;
350 hash->uh_slab_hash = uma_zalloc_internal(hashzone, NULL,
351 M_WAITOK, NULL);
352 hash->uh_hashsize = UMA_HASH_SIZE_INIT;
353 }
354 if (hash->uh_slab_hash) {
355 bzero(hash->uh_slab_hash, alloc);
356 hash->uh_hashmask = hash->uh_hashsize - 1;
357 return (1);
358 }
359
360 return (0);
361}
362
363/*
364 * Expands the hash table for OFFPAGE zones. This is done from zone_timeout
365 * to reduce collisions. This must not be done in the regular allocation path,
366 * otherwise, we can recurse on the vm while allocating pages.
367 *
368 * Arguments:
369 * oldhash The hash you want to expand
370 * newhash The hash structure for the new table
371 *
372 * Returns:
373 * Nothing
374 *
375 * Discussion:
376 */
377static int
378hash_expand(struct uma_hash *oldhash, struct uma_hash *newhash)
379{
380 uma_slab_t slab;
381 int hval;
382 int i;
383
384 if (!newhash->uh_slab_hash)
385 return (0);
386
387 if (oldhash->uh_hashsize >= newhash->uh_hashsize)
388 return (0);
389
390 /*
391 * I need to investigate hash algorithms for resizing without a
392 * full rehash.
393 */
394
395 for (i = 0; i < oldhash->uh_hashsize; i++)
396 while (!SLIST_EMPTY(&oldhash->uh_slab_hash[i])) {
397 slab = SLIST_FIRST(&oldhash->uh_slab_hash[i]);
398 SLIST_REMOVE_HEAD(&oldhash->uh_slab_hash[i], us_hlink);
399 hval = UMA_HASH(newhash, slab->us_data);
400 SLIST_INSERT_HEAD(&newhash->uh_slab_hash[hval],
401 slab, us_hlink);
402 }
403
404 return (1);
405}
406
407/*
408 * Free the hash bucket to the appropriate backing store.
409 *
410 * Arguments:
411 * slab_hash The hash bucket we're freeing
412 * hashsize The number of entries in that hash bucket
413 *
414 * Returns:
415 * Nothing
416 */
417static void
418hash_free(struct uma_hash *hash)
419{
420 if (hash->uh_slab_hash == NULL)
421 return;
422 if (hash->uh_hashsize == UMA_HASH_SIZE_INIT)
423 uma_zfree_internal(hashzone,
424 hash->uh_slab_hash, NULL, 0);
425 else
426 free(hash->uh_slab_hash, M_DEVBUF);
427}
428
429/*
430 * Frees all outstanding items in a bucket
431 *
432 * Arguments:
433 * zone The zone to free to, must be unlocked.
434 * bucket The free/alloc bucket with items, cpu queue must be locked.
435 *
436 * Returns:
437 * Nothing
438 */
439
440static void
441bucket_drain(uma_zone_t zone, uma_bucket_t bucket)
442{
443 uma_slab_t slab;
444 int mzone;
445 void *item;
446
447 if (bucket == NULL)
448 return;
449
450 slab = NULL;
451 mzone = 0;
452
453 /* We have to lookup the slab again for malloc.. */
454 if (zone->uz_flags & UMA_ZFLAG_MALLOC)
455 mzone = 1;
456
457 while (bucket->ub_ptr > -1) {
458 item = bucket->ub_bucket[bucket->ub_ptr];
459#ifdef INVARIANTS
460 bucket->ub_bucket[bucket->ub_ptr] = NULL;
461 KASSERT(item != NULL,
462 ("bucket_drain: botched ptr, item is NULL"));
463#endif
464 bucket->ub_ptr--;
465 /*
466 * This is extremely inefficient. The slab pointer was passed
467 * to uma_zfree_arg, but we lost it because the buckets don't
468 * hold them. This will go away when free() gets a size passed
469 * to it.
470 */
471 if (mzone)
472 slab = vtoslab((vm_offset_t)item & (~UMA_SLAB_MASK));
473 uma_zfree_internal(zone, item, slab, 1);
474 }
475}
476
477/*
478 * Drains the per cpu caches for a zone.
479 *
480 * Arguments:
481 * zone The zone to drain, must be unlocked.
482 *
483 * Returns:
484 * Nothing
485 *
486 * This function returns with the zone locked so that the per cpu queues can
487 * not be filled until zone_drain is finished.
488 *
489 */
490static void
491cache_drain(uma_zone_t zone)
492{
493 uma_bucket_t bucket;
494 uma_cache_t cache;
495 int cpu;
496
497 /*
498 * Flush out the per cpu queues.
499 *
500 * XXX This causes unnecessary thrashing due to immediately having
501 * empty per cpu queues. I need to improve this.
502 */
503
504 /*
505 * We have to lock each cpu cache before locking the zone
506 */
507 ZONE_UNLOCK(zone);
508
509 for (cpu = 0; cpu < maxcpu; cpu++) {
510 if (CPU_ABSENT(cpu))
511 continue;
512 CPU_LOCK(zone, cpu);
513 cache = &zone->uz_cpu[cpu];
514 bucket_drain(zone, cache->uc_allocbucket);
515 bucket_drain(zone, cache->uc_freebucket);
516 }
517
518 /*
519 * Drain the bucket queues and free the buckets, we just keep two per
520 * cpu (alloc/free).
521 */
522 ZONE_LOCK(zone);
523 while ((bucket = LIST_FIRST(&zone->uz_full_bucket)) != NULL) {
524 LIST_REMOVE(bucket, ub_link);
525 ZONE_UNLOCK(zone);
526 bucket_drain(zone, bucket);
527 uma_zfree_internal(bucketzone, bucket, NULL, 0);
528 ZONE_LOCK(zone);
529 }
530
531 /* Now we do the free queue.. */
532 while ((bucket = LIST_FIRST(&zone->uz_free_bucket)) != NULL) {
533 LIST_REMOVE(bucket, ub_link);
534 uma_zfree_internal(bucketzone, bucket, NULL, 0);
535 }
536
537 /* We unlock here, but they will all block until the zone is unlocked */
538 for (cpu = 0; cpu < maxcpu; cpu++) {
539 if (CPU_ABSENT(cpu))
540 continue;
541 CPU_UNLOCK(zone, cpu);
542 }
543
544 zone->uz_cachefree = 0;
545}
546
547/*
548 * Frees pages from a zone back to the system. This is done on demand from
549 * the pageout daemon.
550 *
551 * Arguments:
552 * zone The zone to free pages from
553 * all Should we drain all items?
554 *
555 * Returns:
556 * Nothing.
557 */
558static void
559zone_drain(uma_zone_t zone)
560{
561 struct slabhead freeslabs = {};
562 uma_slab_t slab;
563 uma_slab_t n;
564 u_int64_t extra;
565 u_int8_t flags;
566 u_int8_t *mem;
567 int i;
568
569 /*
570 * We don't want to take pages from staticly allocated zones at this
571 * time
572 */
573 if (zone->uz_flags & UMA_ZFLAG_NOFREE || zone->uz_freef == NULL)
574 return;
575
576 ZONE_LOCK(zone);
577
578 if (!(zone->uz_flags & UMA_ZFLAG_INTERNAL))
579 cache_drain(zone);
580
581 if (zone->uz_free < zone->uz_wssize)
582 goto finished;
583#ifdef UMA_DEBUG
584 printf("%s working set size: %llu free items: %u\n",
585 zone->uz_name, (unsigned long long)zone->uz_wssize, zone->uz_free);
586#endif
587 extra = zone->uz_free - zone->uz_wssize;
588 extra /= zone->uz_ipers;
589
590 /* extra is now the number of extra slabs that we can free */
591
592 if (extra == 0)
593 goto finished;
594
595 slab = LIST_FIRST(&zone->uz_free_slab);
596 while (slab && extra) {
597 n = LIST_NEXT(slab, us_link);
598
599 /* We have no where to free these to */
600 if (slab->us_flags & UMA_SLAB_BOOT) {
601 slab = n;
602 continue;
603 }
604
605 LIST_REMOVE(slab, us_link);
606 zone->uz_pages -= zone->uz_ppera;
607 zone->uz_free -= zone->uz_ipers;
608
609 if (zone->uz_flags & UMA_ZFLAG_HASH)
610 UMA_HASH_REMOVE(&zone->uz_hash, slab, slab->us_data);
611
612 SLIST_INSERT_HEAD(&freeslabs, slab, us_hlink);
613
614 slab = n;
615 extra--;
616 }
617finished:
618 ZONE_UNLOCK(zone);
619
620 while ((slab = SLIST_FIRST(&freeslabs)) != NULL) {
621 SLIST_REMOVE(&freeslabs, slab, uma_slab, us_hlink);
622 if (zone->uz_fini)
623 for (i = 0; i < zone->uz_ipers; i++)
624 zone->uz_fini(
625 slab->us_data + (zone->uz_rsize * i),
626 zone->uz_size);
627 flags = slab->us_flags;
628 mem = slab->us_data;
629
630 if (zone->uz_flags & UMA_ZFLAG_OFFPAGE)
631 uma_zfree_internal(slabzone, slab, NULL, 0);
632 if (zone->uz_flags & UMA_ZFLAG_MALLOC)
633 for (i = 0; i < zone->uz_ppera; i++)
634 vsetobj((vm_offset_t)mem + (i * PAGE_SIZE),
635 kmem_object);
636#ifdef UMA_DEBUG
637 printf("%s: Returning %d bytes.\n",
638 zone->uz_name, UMA_SLAB_SIZE * zone->uz_ppera);
639#endif
640 zone->uz_freef(mem, UMA_SLAB_SIZE * zone->uz_ppera, flags);
641 }
642
643}
644
645/*
646 * Allocate a new slab for a zone. This does not insert the slab onto a list.
647 *
648 * Arguments:
649 * zone The zone to allocate slabs for
650 * wait Shall we wait?
651 *
652 * Returns:
653 * The slab that was allocated or NULL if there is no memory and the
654 * caller specified M_NOWAIT.
655 *
656 */
657static uma_slab_t
658slab_zalloc(uma_zone_t zone, int wait)
659{
660 uma_slab_t slab; /* Starting slab */
661 u_int8_t *mem;
662 u_int8_t flags;
663 int i;
664
665 slab = NULL;
666
667#ifdef UMA_DEBUG
668 printf("slab_zalloc: Allocating a new slab for %s\n", zone->uz_name);
669#endif
670 ZONE_UNLOCK(zone);
671
672 if (zone->uz_flags & UMA_ZFLAG_OFFPAGE) {
673 slab = uma_zalloc_internal(slabzone, NULL, wait, NULL);
674 if (slab == NULL) {
675 ZONE_LOCK(zone);
676 return NULL;
677 }
678 }
679
680 /*
681 * This reproduces the old vm_zone behavior of zero filling pages the
682 * first time they are added to a zone.
683 *
684 * Malloced items are zeroed in uma_zalloc.
685 */
686
687 if ((zone->uz_flags & UMA_ZFLAG_MALLOC) == 0)
688 wait |= M_ZERO;
689 else
690 wait &= ~M_ZERO;
691
692 if (booted || (zone->uz_flags & UMA_ZFLAG_PRIVALLOC)) {
693 mtx_lock(&Giant);
694 mem = zone->uz_allocf(zone,
695 zone->uz_ppera * UMA_SLAB_SIZE, &flags, wait);
696 mtx_unlock(&Giant);
697 if (mem == NULL) {
698 ZONE_LOCK(zone);
699 return (NULL);
700 }
701 } else {
702 uma_slab_t tmps;
703
704 if (zone->uz_ppera > 1)
705 panic("UMA: Attemping to allocate multiple pages before vm has started.\n");
706 if (zone->uz_flags & UMA_ZFLAG_MALLOC)
707 panic("Mallocing before uma_startup2 has been called.\n");
708 if (uma_boot_free == 0)
709 panic("UMA: Ran out of pre init pages, increase UMA_BOOT_PAGES\n");
710 tmps = LIST_FIRST(&uma_boot_pages);
711 LIST_REMOVE(tmps, us_link);
712 uma_boot_free--;
713 mem = tmps->us_data;
714 }
715
716 /* Point the slab into the allocated memory */
717 if (!(zone->uz_flags & UMA_ZFLAG_OFFPAGE))
718 slab = (uma_slab_t )(mem + zone->uz_pgoff);
719
720 if (zone->uz_flags & UMA_ZFLAG_MALLOC)
721 for (i = 0; i < zone->uz_ppera; i++)
722 vsetslab((vm_offset_t)mem + (i * PAGE_SIZE), slab);
723
724 slab->us_zone = zone;
725 slab->us_data = mem;
726
727 /*
728 * This is intended to spread data out across cache lines.
729 *
730 * This code doesn't seem to work properly on x86, and on alpha
731 * it makes absolutely no performance difference. I'm sure it could
732 * use some tuning, but sun makes outrageous claims about it's
733 * performance.
734 */
735#if 0
736 if (zone->uz_cachemax) {
737 slab->us_data += zone->uz_cacheoff;
738 zone->uz_cacheoff += UMA_CACHE_INC;
739 if (zone->uz_cacheoff > zone->uz_cachemax)
740 zone->uz_cacheoff = 0;
741 }
742#endif
743
744 slab->us_freecount = zone->uz_ipers;
745 slab->us_firstfree = 0;
746 slab->us_flags = flags;
747 for (i = 0; i < zone->uz_ipers; i++)
748 slab->us_freelist[i] = i+1;
749
750 if (zone->uz_init)
751 for (i = 0; i < zone->uz_ipers; i++)
752 zone->uz_init(slab->us_data + (zone->uz_rsize * i),
753 zone->uz_size);
754 ZONE_LOCK(zone);
755
756 if (zone->uz_flags & UMA_ZFLAG_HASH)
757 UMA_HASH_INSERT(&zone->uz_hash, slab, mem);
758
759 zone->uz_pages += zone->uz_ppera;
760 zone->uz_free += zone->uz_ipers;
761
762
763 return (slab);
764}
765
766/*
767 * Allocates a number of pages from the system
768 *
769 * Arguments:
770 * zone Unused
771 * bytes The number of bytes requested
772 * wait Shall we wait?
773 *
774 * Returns:
775 * A pointer to the alloced memory or possibly
776 * NULL if M_NOWAIT is set.
777 */
778static void *
779page_alloc(uma_zone_t zone, int bytes, u_int8_t *pflag, int wait)
780{
781 void *p; /* Returned page */
782
783 *pflag = UMA_SLAB_KMEM;
784 p = (void *) kmem_malloc(kmem_map, bytes, wait);
785
786 return (p);
787}
788
789/*
790 * Allocates a number of pages from within an object
791 *
792 * Arguments:
793 * zone Unused
794 * bytes The number of bytes requested
795 * wait Shall we wait?
796 *
797 * Returns:
798 * A pointer to the alloced memory or possibly
799 * NULL if M_NOWAIT is set.
800 *
801 * TODO: If we fail during a multi-page allocation release the pages that have
802 * already been allocated.
803 */
804static void *
805obj_alloc(uma_zone_t zone, int bytes, u_int8_t *flags, int wait)
806{
807 vm_offset_t zkva;
808 vm_offset_t retkva;
809 vm_page_t p;
810 int pages;
811
812 retkva = 0;
813 pages = zone->uz_pages;
814
815 /*
816 * This looks a little weird since we're getting one page at a time
817 */
818 while (bytes > 0) {
819 p = vm_page_alloc(zone->uz_obj, pages,
820 VM_ALLOC_INTERRUPT);
821 if (p == NULL)
822 return (NULL);
823
824 zkva = zone->uz_kva + pages * PAGE_SIZE;
825 if (retkva == 0)
826 retkva = zkva;
827 pmap_qenter(zkva, &p, 1);
828 bytes -= PAGE_SIZE;
829 pages += 1;
830 }
831
832 *flags = UMA_SLAB_PRIV;
833
834 return ((void *)retkva);
835}
836
837/*
838 * Frees a number of pages to the system
839 *
840 * Arguments:
841 * mem A pointer to the memory to be freed
842 * size The size of the memory being freed
843 * flags The original p->us_flags field
844 *
845 * Returns:
846 * Nothing
847 *
848 */
849static void
850page_free(void *mem, int size, u_int8_t flags)
851{
852 vm_map_t map;
853
854 if (flags & UMA_SLAB_KMEM)
855 map = kmem_map;
856 else
857 panic("UMA: page_free used with invalid flags %d\n", flags);
858
859 kmem_free(map, (vm_offset_t)mem, size);
860}
861
862/*
863 * Zero fill initializer
864 *
865 * Arguments/Returns follow uma_init specifications
866 *
867 */
868static void
869zero_init(void *mem, int size)
870{
871 bzero(mem, size);
872}
873
874/*
875 * Finish creating a small uma zone. This calculates ipers, and the zone size.
876 *
877 * Arguments
878 * zone The zone we should initialize
879 *
880 * Returns
881 * Nothing
882 */
883static void
884zone_small_init(uma_zone_t zone)
885{
886 int rsize;
887 int memused;
888 int ipers;
889
890 rsize = zone->uz_size;
891
892 if (rsize < UMA_SMALLEST_UNIT)
893 rsize = UMA_SMALLEST_UNIT;
894
895 if (rsize & zone->uz_align)
896 rsize = (rsize & ~zone->uz_align) + (zone->uz_align + 1);
897
898 zone->uz_rsize = rsize;
899
900 rsize += 1; /* Account for the byte of linkage */
901 zone->uz_ipers = (UMA_SLAB_SIZE - sizeof(struct uma_slab)) / rsize;
902 zone->uz_ppera = 1;
903
904 memused = zone->uz_ipers * zone->uz_rsize;
905
906 /* Can we do any better? */
907 if ((UMA_SLAB_SIZE - memused) >= UMA_MAX_WASTE) {
908 if (zone->uz_flags & UMA_ZFLAG_INTERNAL)
909 return;
910 ipers = UMA_SLAB_SIZE / zone->uz_rsize;
911 if (ipers > zone->uz_ipers) {
912 zone->uz_flags |= UMA_ZFLAG_OFFPAGE;
913 if ((zone->uz_flags & UMA_ZFLAG_MALLOC) == 0)
914 zone->uz_flags |= UMA_ZFLAG_HASH;
915 zone->uz_ipers = ipers;
916 }
917 }
918
919}
920
921/*
922 * Finish creating a large (> UMA_SLAB_SIZE) uma zone. Just give in and do
923 * OFFPAGE for now. When I can allow for more dynamic slab sizes this will be
924 * more complicated.
925 *
926 * Arguments
927 * zone The zone we should initialize
928 *
929 * Returns
930 * Nothing
931 */
932static void
933zone_large_init(uma_zone_t zone)
934{
935 int pages;
936
937 pages = zone->uz_size / UMA_SLAB_SIZE;
938
939 /* Account for remainder */
940 if ((pages * UMA_SLAB_SIZE) < zone->uz_size)
941 pages++;
942
943 zone->uz_ppera = pages;
944 zone->uz_ipers = 1;
945
946 zone->uz_flags |= UMA_ZFLAG_OFFPAGE;
947 if ((zone->uz_flags & UMA_ZFLAG_MALLOC) == 0)
948 zone->uz_flags |= UMA_ZFLAG_HASH;
949
950 zone->uz_rsize = zone->uz_size;
951}
952
953/*
954 * Zone header ctor. This initializes all fields, locks, etc. And inserts
955 * the zone onto the global zone list.
956 *
957 * Arguments/Returns follow uma_ctor specifications
958 * udata Actually uma_zcreat_args
959 *
960 */
961
962static void
963zone_ctor(void *mem, int size, void *udata)
964{
965 struct uma_zctor_args *arg = udata;
966 uma_zone_t zone = mem;
967 int privlc;
968 int cplen;
969 int cpu;
970
971 bzero(zone, size);
972 zone->uz_name = arg->name;
973 zone->uz_size = arg->size;
974 zone->uz_ctor = arg->ctor;
975 zone->uz_dtor = arg->dtor;
976 zone->uz_init = arg->uminit;
977 zone->uz_fini = arg->fini;
978 zone->uz_align = arg->align;
979 zone->uz_free = 0;
980 zone->uz_pages = 0;
981 zone->uz_flags = 0;
982 zone->uz_allocf = page_alloc;
983 zone->uz_freef = page_free;
984
985 if (arg->flags & UMA_ZONE_ZINIT)
986 zone->uz_init = zero_init;
987
988 if (arg->flags & UMA_ZONE_INTERNAL)
989 zone->uz_flags |= UMA_ZFLAG_INTERNAL;
990
991 if (arg->flags & UMA_ZONE_MALLOC)
992 zone->uz_flags |= UMA_ZFLAG_MALLOC;
993
994 if (arg->flags & UMA_ZONE_NOFREE)
995 zone->uz_flags |= UMA_ZFLAG_NOFREE;
996
997 if (arg->flags & UMA_ZONE_VM)
998 zone->uz_flags |= UMA_ZFLAG_BUCKETCACHE;
999
1000 if (zone->uz_size > UMA_SLAB_SIZE)
1001 zone_large_init(zone);
1002 else
1003 zone_small_init(zone);
1004
1005 if (arg->flags & UMA_ZONE_MTXCLASS)
1006 privlc = 1;
1007 else
1008 privlc = 0;
1009
1010 /* We do this so that the per cpu lock name is unique for each zone */
1011 memcpy(zone->uz_lname, "PCPU ", 5);
1012 cplen = min(strlen(zone->uz_name) + 1, LOCKNAME_LEN - 6);
1013 memcpy(zone->uz_lname+5, zone->uz_name, cplen);
1014 zone->uz_lname[LOCKNAME_LEN - 1] = '\0';
1015
1016 /*
1017 * If we're putting the slab header in the actual page we need to
1018 * figure out where in each page it goes. This calculates a right
1019 * justified offset into the memory on a ALIGN_PTR boundary.
1020 */
1021 if (!(zone->uz_flags & UMA_ZFLAG_OFFPAGE)) {
1022 int totsize;
1023 int waste;
1024
1025 /* Size of the slab struct and free list */
1026 totsize = sizeof(struct uma_slab) + zone->uz_ipers;
1027 if (totsize & UMA_ALIGN_PTR)
1028 totsize = (totsize & ~UMA_ALIGN_PTR) +
1029 (UMA_ALIGN_PTR + 1);
1030 zone->uz_pgoff = UMA_SLAB_SIZE - totsize;
1031
1032 waste = zone->uz_pgoff;
1033 waste -= (zone->uz_ipers * zone->uz_rsize);
1034
1035 /*
1036 * This calculates how much space we have for cache line size
1037 * optimizations. It works by offseting each slab slightly.
1038 * Currently it breaks on x86, and so it is disabled.
1039 */
1040
1041 if (zone->uz_align < UMA_CACHE_INC && waste > UMA_CACHE_INC) {
1042 zone->uz_cachemax = waste - UMA_CACHE_INC;
1043 zone->uz_cacheoff = 0;
1044 }
1045
1046 totsize = zone->uz_pgoff + sizeof(struct uma_slab)
1047 + zone->uz_ipers;
1048 /* I don't think it's possible, but I'll make sure anyway */
1049 if (totsize > UMA_SLAB_SIZE) {
1050 printf("zone %s ipers %d rsize %d size %d\n",
1051 zone->uz_name, zone->uz_ipers, zone->uz_rsize,
1052 zone->uz_size);
1053 panic("UMA slab won't fit.\n");
1054 }
1055 }
1056
1057 if (zone->uz_flags & UMA_ZFLAG_HASH)
1058 hash_alloc(&zone->uz_hash);
1059
1060#ifdef UMA_DEBUG
1061 printf("%s(%p) size = %d ipers = %d ppera = %d pgoff = %d\n",
1062 zone->uz_name, zone,
1063 zone->uz_size, zone->uz_ipers,
1064 zone->uz_ppera, zone->uz_pgoff);
1065#endif
1066 ZONE_LOCK_INIT(zone, privlc);
1067
1068 mtx_lock(&uma_mtx);
1069 LIST_INSERT_HEAD(&uma_zones, zone, uz_link);
1070 mtx_unlock(&uma_mtx);
1071
1072 /*
1073 * Some internal zones don't have room allocated for the per cpu
1074 * caches. If we're internal, bail out here.
1075 */
1076
1077 if (zone->uz_flags & UMA_ZFLAG_INTERNAL)
1078 return;
1079
1080 if (zone->uz_ipers < UMA_BUCKET_SIZE)
1081 zone->uz_count = zone->uz_ipers - 1;
1082 else
1083 zone->uz_count = UMA_BUCKET_SIZE - 1;
1084
1085 for (cpu = 0; cpu < maxcpu; cpu++)
1086 CPU_LOCK_INIT(zone, cpu, privlc);
1087}
1088
1089/*
1090 * Zone header dtor. This frees all data, destroys locks, frees the hash table
1091 * and removes the zone from the global list.
1092 *
1093 * Arguments/Returns follow uma_dtor specifications
1094 * udata unused
1095 */
1096
1097static void
1098zone_dtor(void *arg, int size, void *udata)
1099{
1100 uma_zone_t zone;
1101 int cpu;
1102
1103 zone = (uma_zone_t)arg;
1104
1105 ZONE_LOCK(zone);
1106 zone->uz_wssize = 0;
1107 ZONE_UNLOCK(zone);
1108
1109 mtx_lock(&uma_mtx);
1110 LIST_REMOVE(zone, uz_link);
1111 zone_drain(zone);
1112 mtx_unlock(&uma_mtx);
1113
1114 ZONE_LOCK(zone);
1115 if (zone->uz_free != 0)
1116 printf("Zone %s was not empty. Lost %d pages of memory.\n",
1117 zone->uz_name, zone->uz_pages);
1118
1119 if ((zone->uz_flags & UMA_ZFLAG_INTERNAL) == 0)
1120 for (cpu = 0; cpu < maxcpu; cpu++)
1121 CPU_LOCK_FINI(zone, cpu);
1122
1123 ZONE_UNLOCK(zone);
1124 if ((zone->uz_flags & UMA_ZFLAG_OFFPAGE) != 0)
1125 hash_free(&zone->uz_hash);
1126
1127 ZONE_LOCK_FINI(zone);
1128}
1129/*
1130 * Traverses every zone in the system and calls a callback
1131 *
1132 * Arguments:
1133 * zfunc A pointer to a function which accepts a zone
1134 * as an argument.
1135 *
1136 * Returns:
1137 * Nothing
1138 */
1139static void
1140zone_foreach(void (*zfunc)(uma_zone_t))
1141{
1142 uma_zone_t zone;
1143
1144 mtx_lock(&uma_mtx);
1145 LIST_FOREACH(zone, &uma_zones, uz_link) {
1146 zfunc(zone);
1147 }
1148 mtx_unlock(&uma_mtx);
1149}
1150
1151/* Public functions */
1152/* See uma.h */
1153void
1154uma_startup(void *bootmem)
1155{
1156 struct uma_zctor_args args;
1157 uma_slab_t slab;
1158 int slabsize;
1159 int i;
1160
1161#ifdef UMA_DEBUG
1162 printf("Creating uma zone headers zone.\n");
1163#endif
1164#ifdef SMP
1165 maxcpu = mp_maxid + 1;
1166#else
1167 maxcpu = 1;
1168#endif
1169#ifdef UMA_DEBUG
1170 printf("Max cpu = %d, mp_maxid = %d\n", maxcpu, mp_maxid);
1171 Debugger("stop");
1172#endif
1173 mtx_init(&uma_mtx, "UMA lock", NULL, MTX_DEF);
1174 /* "manually" Create the initial zone */
1175 args.name = "UMA Zones";
1176 args.size = sizeof(struct uma_zone) +
1177 (sizeof(struct uma_cache) * (maxcpu - 1));
1178 args.ctor = zone_ctor;
1179 args.dtor = zone_dtor;
1180 args.uminit = zero_init;
1181 args.fini = NULL;
1182 args.align = 32 - 1;
1183 args.flags = UMA_ZONE_INTERNAL;
1184 /* The initial zone has no Per cpu queues so it's smaller */
1185 zone_ctor(zones, sizeof(struct uma_zone), &args);
1186
1187#ifdef UMA_DEBUG
1188 printf("Filling boot free list.\n");
1189#endif
1190 for (i = 0; i < UMA_BOOT_PAGES; i++) {
1191 slab = (uma_slab_t)((u_int8_t *)bootmem + (i * UMA_SLAB_SIZE));
1192 slab->us_data = (u_int8_t *)slab;
1193 slab->us_flags = UMA_SLAB_BOOT;
1194 LIST_INSERT_HEAD(&uma_boot_pages, slab, us_link);
1195 uma_boot_free++;
1196 }
1197
1198#ifdef UMA_DEBUG
1199 printf("Creating slab zone.\n");
1200#endif
1201
1202 /*
1203 * This is the max number of free list items we'll have with
1204 * offpage slabs.
1205 */
1206
1207 slabsize = UMA_SLAB_SIZE - sizeof(struct uma_slab);
1208 slabsize /= UMA_MAX_WASTE;
1209 slabsize++; /* In case there it's rounded */
1210 slabsize += sizeof(struct uma_slab);
1211
1212 /* Now make a zone for slab headers */
1213 slabzone = uma_zcreate("UMA Slabs",
1214 slabsize,
1215 NULL, NULL, NULL, NULL,
1216 UMA_ALIGN_PTR, UMA_ZONE_INTERNAL);
1217
1218 hashzone = uma_zcreate("UMA Hash",
1219 sizeof(struct slabhead *) * UMA_HASH_SIZE_INIT,
1220 NULL, NULL, NULL, NULL,
1221 UMA_ALIGN_PTR, UMA_ZONE_INTERNAL);
1222
1223 bucketzone = uma_zcreate("UMA Buckets", sizeof(struct uma_bucket),
1224 NULL, NULL, NULL, NULL,
1225 UMA_ALIGN_PTR, UMA_ZONE_INTERNAL);
1226
1227
1228#ifdef UMA_DEBUG
1229 printf("UMA startup complete.\n");
1230#endif
1231}
1232
1233/* see uma.h */
1234void
1235uma_startup2(void)
1236{
1237 booted = 1;
1238 bucket_enable();
1239#ifdef UMA_DEBUG
1240 printf("UMA startup2 complete.\n");
1241#endif
1242}
1243
1244/*
1245 * Initialize our callout handle
1246 *
1247 */
1248
1249static void
1250uma_startup3(void)
1251{
1252#ifdef UMA_DEBUG
1253 printf("Starting callout.\n");
1254#endif
1255 callout_init(&uma_callout, 0);
1256 callout_reset(&uma_callout, UMA_WORKING_TIME * hz, uma_timeout, NULL);
1257#ifdef UMA_DEBUG
1258 printf("UMA startup3 complete.\n");
1259#endif
1260}
1261
1262/* See uma.h */
1263uma_zone_t
1264uma_zcreate(char *name, size_t size, uma_ctor ctor, uma_dtor dtor,
1265 uma_init uminit, uma_fini fini, int align, u_int16_t flags)
1266
1267{
1268 struct uma_zctor_args args;
1269
1270 /* This stuff is essential for the zone ctor */
1271 args.name = name;
1272 args.size = size;
1273 args.ctor = ctor;
1274 args.dtor = dtor;
1275 args.uminit = uminit;
1276 args.fini = fini;
1277 args.align = align;
1278 args.flags = flags;
1279
1280 return (uma_zalloc_internal(zones, &args, M_WAITOK, NULL));
1281}
1282
1283/* See uma.h */
1284void
1285uma_zdestroy(uma_zone_t zone)
1286{
1287 uma_zfree_internal(zones, zone, NULL, 0);
1288}
1289
1290/* See uma.h */
1291void *
1292uma_zalloc_arg(uma_zone_t zone, void *udata, int flags)
1293{
1294 void *item;
1295 uma_cache_t cache;
1296 uma_bucket_t bucket;
1297 int cpu;
1298
1299 /* This is the fast path allocation */
1300#ifdef UMA_DEBUG_ALLOC_1
1301 printf("Allocating one item from %s(%p)\n", zone->uz_name, zone);
1302#endif
1303
1304 if (!(flags & M_NOWAIT)) {
1305 KASSERT(curthread->td_intr_nesting_level == 0,
1306 ("malloc(M_WAITOK) in interrupt context"));
1307 WITNESS_SLEEP(1, NULL);
1308 }
1309
1310zalloc_restart:
1311 cpu = PCPU_GET(cpuid);
1312 CPU_LOCK(zone, cpu);
1313 cache = &zone->uz_cpu[cpu];
1314
1315zalloc_start:
1316 bucket = cache->uc_allocbucket;
1317
1318 if (bucket) {
1319 if (bucket->ub_ptr > -1) {
1320 item = bucket->ub_bucket[bucket->ub_ptr];
1321#ifdef INVARIANTS
1322 bucket->ub_bucket[bucket->ub_ptr] = NULL;
1323#endif
1324 bucket->ub_ptr--;
1325 KASSERT(item != NULL,
1326 ("uma_zalloc: Bucket pointer mangled."));
1327 cache->uc_allocs++;
1328#ifdef INVARIANTS
1329 uma_dbg_alloc(zone, NULL, item);
1330#endif
1331 CPU_UNLOCK(zone, cpu);
1332 if (zone->uz_ctor)
1333 zone->uz_ctor(item, zone->uz_size, udata);
1334 if (flags & M_ZERO)
1335 bzero(item, zone->uz_size);
1336 return (item);
1337 } else if (cache->uc_freebucket) {
1338 /*
1339 * We have run out of items in our allocbucket.
1340 * See if we can switch with our free bucket.
1341 */
1342 if (cache->uc_freebucket->ub_ptr > -1) {
1343 uma_bucket_t swap;
1344
1345#ifdef UMA_DEBUG_ALLOC
1346 printf("uma_zalloc: Swapping empty with alloc.\n");
1347#endif
1348 swap = cache->uc_freebucket;
1349 cache->uc_freebucket = cache->uc_allocbucket;
1350 cache->uc_allocbucket = swap;
1351
1352 goto zalloc_start;
1353 }
1354 }
1355 }
1356 ZONE_LOCK(zone);
1357 /* Since we have locked the zone we may as well send back our stats */
1358 zone->uz_allocs += cache->uc_allocs;
1359 cache->uc_allocs = 0;
1360
1361 /* Our old one is now a free bucket */
1362 if (cache->uc_allocbucket) {
1363 KASSERT(cache->uc_allocbucket->ub_ptr == -1,
1364 ("uma_zalloc_arg: Freeing a non free bucket."));
1365 LIST_INSERT_HEAD(&zone->uz_free_bucket,
1366 cache->uc_allocbucket, ub_link);
1367 cache->uc_allocbucket = NULL;
1368 }
1369
1370 /* Check the free list for a new alloc bucket */
1371 if ((bucket = LIST_FIRST(&zone->uz_full_bucket)) != NULL) {
1372 KASSERT(bucket->ub_ptr != -1,
1373 ("uma_zalloc_arg: Returning an empty bucket."));
1374
1375 LIST_REMOVE(bucket, ub_link);
1376 cache->uc_allocbucket = bucket;
1377 ZONE_UNLOCK(zone);
1378 goto zalloc_start;
1379 }
1380 /* Bump up our uz_count so we get here less */
1381 if (zone->uz_count < UMA_BUCKET_SIZE - 1)
1382 zone->uz_count++;
1383
1384 /* We are no longer associated with this cpu!!! */
1385 CPU_UNLOCK(zone, cpu);
1386
1387 /*
1388 * Now lets just fill a bucket and put it on the free list. If that
1389 * works we'll restart the allocation from the begining.
1390 *
1391 * Try this zone's free list first so we don't allocate extra buckets.
1392 */
1393
1394 if ((bucket = LIST_FIRST(&zone->uz_free_bucket)) != NULL)
1395 LIST_REMOVE(bucket, ub_link);
1396
1397 /* Now we no longer need the zone lock. */
1398 ZONE_UNLOCK(zone);
1399
1400 if (bucket == NULL) {
1401 int bflags;
1402
1403 bflags = flags;
1404 if (zone->uz_flags & UMA_ZFLAG_BUCKETCACHE)
1405 bflags |= M_NOVM;
1406
1407 bucket = uma_zalloc_internal(bucketzone,
1408 NULL, bflags, NULL);
1409 }
1410
1411 if (bucket != NULL) {
1412#ifdef INVARIANTS
1413 bzero(bucket, bucketzone->uz_size);
1414#endif
1415 bucket->ub_ptr = -1;
1416
1417 if (uma_zalloc_internal(zone, udata, flags, bucket))
1418 goto zalloc_restart;
1419 else
1420 uma_zfree_internal(bucketzone, bucket, NULL, 0);
1421 }
1422 /*
1423 * We may not get a bucket if we recurse, so
1424 * return an actual item.
1425 */
1426#ifdef UMA_DEBUG
1427 printf("uma_zalloc_arg: Bucketzone returned NULL\n");
1428#endif
1429
1430 return (uma_zalloc_internal(zone, udata, flags, NULL));
1431}
1432
1433/*
1434 * Allocates an item for an internal zone OR fills a bucket
1435 *
1436 * Arguments
1437 * zone The zone to alloc for.
1438 * udata The data to be passed to the constructor.
1439 * flags M_WAITOK, M_NOWAIT, M_ZERO.
1440 * bucket The bucket to fill or NULL
1441 *
1442 * Returns
1443 * NULL if there is no memory and M_NOWAIT is set
1444 * An item if called on an interal zone
1445 * Non NULL if called to fill a bucket and it was successful.
1446 *
1447 * Discussion:
1448 * This was much cleaner before it had to do per cpu caches. It is
1449 * complicated now because it has to handle the simple internal case, and
1450 * the more involved bucket filling and allocation.
1451 */
1452
1453static void *
1454uma_zalloc_internal(uma_zone_t zone, void *udata, int flags, uma_bucket_t bucket)
1455{
1456 uma_slab_t slab;
1457 u_int8_t freei;
1458 void *item;
1459
1460 item = NULL;
1461
1462 /*
1463 * This is to stop us from allocating per cpu buckets while we're
1464 * running out of UMA_BOOT_PAGES. Otherwise, we would exhaust the
1465 * boot pages.
1466 */
1467
1468 if (bucketdisable && zone == bucketzone)
1469 return (NULL);
1470
1471#ifdef UMA_DEBUG_ALLOC
1472 printf("INTERNAL: Allocating one item from %s(%p)\n", zone->uz_name, zone);
1473#endif
1474 ZONE_LOCK(zone);
1475
1476 /*
1477 * This code is here to limit the number of simultaneous bucket fills
1478 * for any given zone to the number of per cpu caches in this zone. This
1479 * is done so that we don't allocate more memory than we really need.
1480 */
1481
1482 if (bucket) {
1483#ifdef SMP
1484 if (zone->uz_fills >= mp_ncpus) {
1485#else
1486 if (zone->uz_fills > 1) {
1487#endif
1488 ZONE_UNLOCK(zone);
1489 return (NULL);
1490 }
1491
1492 zone->uz_fills++;
1493 }
1494
1495new_slab:
1496
1497 /* Find a slab with some space */
1498 if (zone->uz_free) {
1499 if (!LIST_EMPTY(&zone->uz_part_slab)) {
1500 slab = LIST_FIRST(&zone->uz_part_slab);
1501 } else {
1502 slab = LIST_FIRST(&zone->uz_free_slab);
1503 LIST_REMOVE(slab, us_link);
1504 LIST_INSERT_HEAD(&zone->uz_part_slab, slab, us_link);
1505 }
1506 } else {
1507 /*
1508 * This is to prevent us from recursively trying to allocate
1509 * buckets. The problem is that if an allocation forces us to
1510 * grab a new bucket we will call page_alloc, which will go off
1511 * and cause the vm to allocate vm_map_entries. If we need new
1512 * buckets there too we will recurse in kmem_alloc and bad
1513 * things happen. So instead we return a NULL bucket, and make
1514 * the code that allocates buckets smart enough to deal with it
1515 */
1516 if (zone == bucketzone && zone->uz_recurse != 0) {
1517 ZONE_UNLOCK(zone);
1518 return (NULL);
1519 }
1520 while (zone->uz_maxpages &&
1521 zone->uz_pages >= zone->uz_maxpages) {
1522 zone->uz_flags |= UMA_ZFLAG_FULL;
1523
1524 if (flags & M_WAITOK)
1525 msleep(zone, &zone->uz_lock, PVM, "zonelimit", 0);
1526 else
1527 goto alloc_fail;
1528
1529 goto new_slab;
1530 }
1531
1532 if (flags & M_NOVM)
1533 goto alloc_fail;
1534
1535 zone->uz_recurse++;
1536 slab = slab_zalloc(zone, flags);
1537 zone->uz_recurse--;
1538 /*
1539 * We might not have been able to get a slab but another cpu
1540 * could have while we were unlocked. If we did get a slab put
1541 * it on the partially used slab list. If not check the free
1542 * count and restart or fail accordingly.
1543 */
1544 if (slab)
1545 LIST_INSERT_HEAD(&zone->uz_part_slab, slab, us_link);
1546 else if (zone->uz_free == 0)
1547 goto alloc_fail;
1548 else
1549 goto new_slab;
1550 }
1551 /*
1552 * If this is our first time though put this guy on the list.
1553 */
1554 if (bucket != NULL && bucket->ub_ptr == -1)
1555 LIST_INSERT_HEAD(&zone->uz_full_bucket,
1556 bucket, ub_link);
1557
1558
1559 while (slab->us_freecount) {
1560 freei = slab->us_firstfree;
1561 slab->us_firstfree = slab->us_freelist[freei];
1562
1563 item = slab->us_data + (zone->uz_rsize * freei);
1564
1565 slab->us_freecount--;
1566 zone->uz_free--;
1567#ifdef INVARIANTS
1568 uma_dbg_alloc(zone, slab, item);
1569#endif
1570 if (bucket == NULL) {
1571 zone->uz_allocs++;
1572 break;
1573 }
1574 bucket->ub_bucket[++bucket->ub_ptr] = item;
1575
1576 /* Don't overfill the bucket! */
1577 if (bucket->ub_ptr == zone->uz_count)
1578 break;
1579 }
1580
1581 /* Move this slab to the full list */
1582 if (slab->us_freecount == 0) {
1583 LIST_REMOVE(slab, us_link);
1584 LIST_INSERT_HEAD(&zone->uz_full_slab, slab, us_link);
1585 }
1586
1587 if (bucket != NULL) {
1588 /* Try to keep the buckets totally full, but don't block */
1589 if (bucket->ub_ptr < zone->uz_count) {
1590 flags |= M_NOWAIT;
1591 flags &= ~M_WAITOK;
1592 goto new_slab;
1593 } else
1594 zone->uz_fills--;
1595 }
1596
1597 ZONE_UNLOCK(zone);
1598
1599 /* Only construct at this time if we're not filling a bucket */
1600 if (bucket == NULL) {
1601 if (zone->uz_ctor != NULL)
1602 zone->uz_ctor(item, zone->uz_size, udata);
1603 if (flags & M_ZERO)
1604 bzero(item, zone->uz_size);
1605 }
1606
1607 return (item);
1608
1609alloc_fail:
1610 if (bucket != NULL)
1611 zone->uz_fills--;
1612 ZONE_UNLOCK(zone);
1613
1614 if (bucket != NULL && bucket->ub_ptr != -1)
1615 return (bucket);
1616
1617 return (NULL);
1618}
1619
1620/* See uma.h */
1621void
1622uma_zfree_arg(uma_zone_t zone, void *item, void *udata)
1623{
1624 uma_cache_t cache;
1625 uma_bucket_t bucket;
1626 int bflags;
1627 int cpu;
1628
1629 /* This is the fast path free */
1630#ifdef UMA_DEBUG_ALLOC_1
1631 printf("Freeing item %p to %s(%p)\n", item, zone->uz_name, zone);
1632#endif
1633 /*
1634 * The race here is acceptable. If we miss it we'll just have to wait
1635 * a little longer for the limits to be reset.
1636 */
1637
1638 if (zone->uz_flags & UMA_ZFLAG_FULL)
1639 goto zfree_internal;
1640
1641zfree_restart:
1642 cpu = PCPU_GET(cpuid);
1643 CPU_LOCK(zone, cpu);
1644 cache = &zone->uz_cpu[cpu];
1645
1646zfree_start:
1647 bucket = cache->uc_freebucket;
1648
1649 if (bucket) {
1650 /*
1651 * Do we have room in our bucket? It is OK for this uz count
1652 * check to be slightly out of sync.
1653 */
1654
1655 if (bucket->ub_ptr < zone->uz_count) {
1656 bucket->ub_ptr++;
1657 KASSERT(bucket->ub_bucket[bucket->ub_ptr] == NULL,
1658 ("uma_zfree: Freeing to non free bucket index."));
1659 bucket->ub_bucket[bucket->ub_ptr] = item;
1660 if (zone->uz_dtor)
1661 zone->uz_dtor(item, zone->uz_size, udata);
1662#ifdef INVARIANTS
1663 if (zone->uz_flags & UMA_ZFLAG_MALLOC)
1664 uma_dbg_free(zone, udata, item);
1665 else
1666 uma_dbg_free(zone, NULL, item);
1667#endif
1668 CPU_UNLOCK(zone, cpu);
1669 return;
1670 } else if (cache->uc_allocbucket) {
1671#ifdef UMA_DEBUG_ALLOC
1672 printf("uma_zfree: Swapping buckets.\n");
1673#endif
1674 /*
1675 * We have run out of space in our freebucket.
1676 * See if we can switch with our alloc bucket.
1677 */
1678 if (cache->uc_allocbucket->ub_ptr <
1679 cache->uc_freebucket->ub_ptr) {
1680 uma_bucket_t swap;
1681
1682 swap = cache->uc_freebucket;
1683 cache->uc_freebucket = cache->uc_allocbucket;
1684 cache->uc_allocbucket = swap;
1685
1686 goto zfree_start;
1687 }
1688 }
1689 }
1690
1691 /*
1692 * We can get here for two reasons:
1693 *
1694 * 1) The buckets are NULL
1695 * 2) The alloc and free buckets are both somewhat full.
1696 *
1697 */
1698
1699 ZONE_LOCK(zone);
1700
1701 bucket = cache->uc_freebucket;
1702 cache->uc_freebucket = NULL;
1703
1704 /* Can we throw this on the zone full list? */
1705 if (bucket != NULL) {
1706#ifdef UMA_DEBUG_ALLOC
1707 printf("uma_zfree: Putting old bucket on the free list.\n");
1708#endif
1709 /* ub_ptr is pointing to the last free item */
1710 KASSERT(bucket->ub_ptr != -1,
1711 ("uma_zfree: Attempting to insert an empty bucket onto the full list.\n"));
1712 LIST_INSERT_HEAD(&zone->uz_full_bucket,
1713 bucket, ub_link);
1714 }
1715 if ((bucket = LIST_FIRST(&zone->uz_free_bucket)) != NULL) {
1716 LIST_REMOVE(bucket, ub_link);
1717 ZONE_UNLOCK(zone);
1718 cache->uc_freebucket = bucket;
1719 goto zfree_start;
1720 }
1721 /* We're done with this CPU now */
1722 CPU_UNLOCK(zone, cpu);
1723
1724 /* And the zone.. */
1725 ZONE_UNLOCK(zone);
1726
1727#ifdef UMA_DEBUG_ALLOC
1728 printf("uma_zfree: Allocating new free bucket.\n");
1729#endif
1730 bflags = M_NOWAIT;
1731
1732 if (zone->uz_flags & UMA_ZFLAG_BUCKETCACHE)
1733 bflags |= M_NOVM;
1734#ifdef INVARIANTS
1735 bflags |= M_ZERO;
1736#endif
1737 bucket = uma_zalloc_internal(bucketzone,
1738 NULL, bflags, NULL);
1739 if (bucket) {
1740 bucket->ub_ptr = -1;
1741 ZONE_LOCK(zone);
1742 LIST_INSERT_HEAD(&zone->uz_free_bucket,
1743 bucket, ub_link);
1744 ZONE_UNLOCK(zone);
1745 goto zfree_restart;
1746 }
1747
1748 /*
1749 * If nothing else caught this, we'll just do an internal free.
1750 */
1751
1752zfree_internal:
1753
1754 uma_zfree_internal(zone, item, udata, 0);
1755
1756 return;
1757
1758}
1759
1760/*
1761 * Frees an item to an INTERNAL zone or allocates a free bucket
1762 *
1763 * Arguments:
1764 * zone The zone to free to
1765 * item The item we're freeing
1766 * udata User supplied data for the dtor
1767 * skip Skip the dtor, it was done in uma_zfree_arg
1768 */
1769
1770static void
1771uma_zfree_internal(uma_zone_t zone, void *item, void *udata, int skip)
1772{
1773 uma_slab_t slab;
1774 u_int8_t *mem;
1775 u_int8_t freei;
1776
1777 ZONE_LOCK(zone);
1778
1779 if (!(zone->uz_flags & UMA_ZFLAG_MALLOC)) {
1780 mem = (u_int8_t *)((unsigned long)item & (~UMA_SLAB_MASK));
1781 if (zone->uz_flags & UMA_ZFLAG_HASH)
1782 slab = hash_sfind(&zone->uz_hash, mem);
1783 else {
1784 mem += zone->uz_pgoff;
1785 slab = (uma_slab_t)mem;
1786 }
1787 } else {
1788 slab = (uma_slab_t)udata;
1789 }
1790
1791 /* Do we need to remove from any lists? */
1792 if (slab->us_freecount+1 == zone->uz_ipers) {
1793 LIST_REMOVE(slab, us_link);
1794 LIST_INSERT_HEAD(&zone->uz_free_slab, slab, us_link);
1795 } else if (slab->us_freecount == 0) {
1796 LIST_REMOVE(slab, us_link);
1797 LIST_INSERT_HEAD(&zone->uz_part_slab, slab, us_link);
1798 }
1799
1800 /* Slab management stuff */
1801 freei = ((unsigned long)item - (unsigned long)slab->us_data)
1802 / zone->uz_rsize;
1803
1804#ifdef INVARIANTS
1805 if (!skip)
1806 uma_dbg_free(zone, slab, item);
1807#endif
1808
1809 slab->us_freelist[freei] = slab->us_firstfree;
1810 slab->us_firstfree = freei;
1811 slab->us_freecount++;
1812
1813 /* Zone statistics */
1814 zone->uz_free++;
1815
1816 if (!skip && zone->uz_dtor)
1817 zone->uz_dtor(item, zone->uz_size, udata);
1818
1819 if (zone->uz_flags & UMA_ZFLAG_FULL) {
1820 if (zone->uz_pages < zone->uz_maxpages)
1821 zone->uz_flags &= ~UMA_ZFLAG_FULL;
1822
1823 /* We can handle one more allocation */
1824 wakeup_one(&zone);
1825 }
1826
1827 ZONE_UNLOCK(zone);
1828}
1829
1830/* See uma.h */
1831void
1832uma_zone_set_max(uma_zone_t zone, int nitems)
1833{
1834 ZONE_LOCK(zone);
1835 if (zone->uz_ppera > 1)
1836 zone->uz_maxpages = nitems * zone->uz_ppera;
1837 else
1838 zone->uz_maxpages = nitems / zone->uz_ipers;
1839
1840 if (zone->uz_maxpages * zone->uz_ipers < nitems)
1841 zone->uz_maxpages++;
1842
1843 ZONE_UNLOCK(zone);
1844}
1845
1846/* See uma.h */
1847void
1848uma_zone_set_freef(uma_zone_t zone, uma_free freef)
1849{
1850 ZONE_LOCK(zone);
1851
1852 zone->uz_freef = freef;
1853
1854 ZONE_UNLOCK(zone);
1855}
1856
1857/* See uma.h */
1858void
1859uma_zone_set_allocf(uma_zone_t zone, uma_alloc allocf)
1860{
1861 ZONE_LOCK(zone);
1862
1863 zone->uz_flags |= UMA_ZFLAG_PRIVALLOC;
1864 zone->uz_allocf = allocf;
1865
1866 ZONE_UNLOCK(zone);
1867}
1868
1869/* See uma.h */
1870int
1871uma_zone_set_obj(uma_zone_t zone, struct vm_object *obj, int count)
1872{
1873 int pages;
1874 vm_offset_t kva;
1875
1876 mtx_lock(&Giant);
1877
1878 pages = count / zone->uz_ipers;
1879
1880 if (pages * zone->uz_ipers < count)
1881 pages++;
1882
1883 kva = kmem_alloc_pageable(kernel_map, pages * UMA_SLAB_SIZE);
1884
1885 if (kva == 0) {
1886 mtx_unlock(&Giant);
1887 return (0);
1888 }
1889
1890
1891 if (obj == NULL)
1892 obj = vm_object_allocate(OBJT_DEFAULT,
1893 pages);
1894 else
1895 _vm_object_allocate(OBJT_DEFAULT,
1896 pages, obj);
1897
1898 ZONE_LOCK(zone);
1899 zone->uz_kva = kva;
1900 zone->uz_obj = obj;
1901 zone->uz_maxpages = pages;
1902
1903 zone->uz_allocf = obj_alloc;
1904 zone->uz_flags |= UMA_ZFLAG_NOFREE | UMA_ZFLAG_PRIVALLOC;
1905
1906 ZONE_UNLOCK(zone);
1907 mtx_unlock(&Giant);
1908
1909 return (1);
1910}
1911
1912/* See uma.h */
1913void
1914uma_prealloc(uma_zone_t zone, int items)
1915{
1916 int slabs;
1917 uma_slab_t slab;
1918
1919 ZONE_LOCK(zone);
1920 slabs = items / zone->uz_ipers;
1921 if (slabs * zone->uz_ipers < items)
1922 slabs++;
1923
1924 while (slabs > 0) {
1925 slab = slab_zalloc(zone, M_WAITOK);
1926 LIST_INSERT_HEAD(&zone->uz_free_slab, slab, us_link);
1927 slabs--;
1928 }
1929 ZONE_UNLOCK(zone);
1930}
1931
1932/* See uma.h */
1933void
1934uma_reclaim(void)
1935{
1936 /*
1937 * You might think that the delay below would improve performance since
1938 * the allocator will give away memory that it may ask for immediately.
1939 * Really, it makes things worse, since cpu cycles are so much cheaper
1940 * than disk activity.
1941 */
1942#if 0
1943 static struct timeval tv = {0};
1944 struct timeval now;
1945 getmicrouptime(&now);
1946 if (now.tv_sec > tv.tv_sec + 30)
1947 tv = now;
1948 else
1949 return;
1950#endif
1951#ifdef UMA_DEBUG
1952 printf("UMA: vm asked us to release pages!\n");
1953#endif
1954 bucket_enable();
1955 zone_foreach(zone_drain);
1956
1957 /*
1958 * Some slabs may have been freed but this zone will be visited early
1959 * we visit again so that we can free pages that are empty once other
1960 * zones are drained. We have to do the same for buckets.
1961 */
1962 zone_drain(slabzone);
1963 zone_drain(bucketzone);
1964}
1965
1966void *
1967uma_large_malloc(int size, int wait)
1968{
1969 void *mem;
1970 uma_slab_t slab;
1971 u_int8_t flags;
1972
1973 slab = uma_zalloc_internal(slabzone, NULL, wait, NULL);
1974 if (slab == NULL)
1975 return (NULL);
1976
1977 mem = page_alloc(NULL, size, &flags, wait);
1978 if (mem) {
1979 vsetslab((vm_offset_t)mem, slab);
1980 slab->us_data = mem;
1981 slab->us_flags = flags | UMA_SLAB_MALLOC;
1982 slab->us_size = size;
1983 } else {
1984 uma_zfree_internal(slabzone, slab, NULL, 0);
1985 }
1986
1987
1988 return (mem);
1989}
1990
1991void
1992uma_large_free(uma_slab_t slab)
1993{
1994 vsetobj((vm_offset_t)slab->us_data, kmem_object);
1995 page_free(slab->us_data, slab->us_size, slab->us_flags);
1996 uma_zfree_internal(slabzone, slab, NULL, 0);
1997}
1998
1999void
2000uma_print_stats(void)
2001{
2002 zone_foreach(uma_print_zone);
2003}
2004
2005void
2006uma_print_zone(uma_zone_t zone)
2007{
2008 printf("%s(%p) size %d(%d) flags %d ipers %d ppera %d out %d free %d\n",
2009 zone->uz_name, zone, zone->uz_size, zone->uz_rsize, zone->uz_flags,
2010 zone->uz_ipers, zone->uz_ppera,
2011 (zone->uz_ipers * zone->uz_pages) - zone->uz_free, zone->uz_free);
2012}
2013
2014/*
2015 * Sysctl handler for vm.zone
2016 *
2017 * stolen from vm_zone.c
2018 */
2019static int
2020sysctl_vm_zone(SYSCTL_HANDLER_ARGS)
2021{
2022 int error, len, cnt;
2023 const int linesize = 128; /* conservative */
2024 int totalfree;
2025 char *tmpbuf, *offset;
2026 uma_zone_t z;
2027 char *p;
2028
2029 cnt = 0;
2030 mtx_lock(&uma_mtx);
2031 LIST_FOREACH(z, &uma_zones, uz_link)
2032 cnt++;
2033 mtx_unlock(&uma_mtx);
2034 MALLOC(tmpbuf, char *, (cnt == 0 ? 1 : cnt) * linesize,
2035 M_TEMP, M_WAITOK);
2036 len = snprintf(tmpbuf, linesize,
2037 "\nITEM SIZE LIMIT USED FREE REQUESTS\n\n");
2038 if (cnt == 0)
2039 tmpbuf[len - 1] = '\0';
2040 error = SYSCTL_OUT(req, tmpbuf, cnt == 0 ? len-1 : len);
2041 if (error || cnt == 0)
2042 goto out;
2043 offset = tmpbuf;
2044 mtx_lock(&uma_mtx);
2045 LIST_FOREACH(z, &uma_zones, uz_link) {
2046 if (cnt == 0) /* list may have changed size */
2047 break;
2048 ZONE_LOCK(z);
2049 totalfree = z->uz_free + z->uz_cachefree;
2050 len = snprintf(offset, linesize,
2051 "%-12.12s %6.6u, %8.8u, %6.6u, %6.6u, %8.8llu\n",
2052 z->uz_name, z->uz_size,
2053 z->uz_maxpages * z->uz_ipers,
2054 (z->uz_ipers * (z->uz_pages / z->uz_ppera)) - totalfree,
2055 totalfree,
2056 (unsigned long long)z->uz_allocs);
2057 ZONE_UNLOCK(z);
2058 for (p = offset + 12; p > offset && *p == ' '; --p)
2059 /* nothing */ ;
2060 p[1] = ':';
2061 cnt--;
2062 offset += len;
2063 }
2064 mtx_unlock(&uma_mtx);
2065 *offset++ = '\0';
2066 error = SYSCTL_OUT(req, tmpbuf, offset - tmpbuf);
2067out:
2068 FREE(tmpbuf, M_TEMP);
2069 return (error);
2070}