subr_vmem.c revision 280805
181634Sbrian/*- 281634Sbrian * Copyright (c)2006,2007,2008,2009 YAMAMOTO Takashi, 381634Sbrian * Copyright (c) 2013 EMC Corp. 481634Sbrian * All rights reserved. 581634Sbrian * 681634Sbrian * Redistribution and use in source and binary forms, with or without 781634Sbrian * modification, are permitted provided that the following conditions 881634Sbrian * are met: 981634Sbrian * 1. Redistributions of source code must retain the above copyright 1081634Sbrian * notice, this list of conditions and the following disclaimer. 1181634Sbrian * 2. Redistributions in binary form must reproduce the above copyright 1281634Sbrian * notice, this list of conditions and the following disclaimer in the 1381634Sbrian * documentation and/or other materials provided with the distribution. 1481634Sbrian * 1581634Sbrian * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND 1681634Sbrian * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 1781634Sbrian * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 1881634Sbrian * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE 1981634Sbrian * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 2081634Sbrian * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 2181634Sbrian * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 2281634Sbrian * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 2381634Sbrian * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 2481634Sbrian * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 2581634Sbrian * SUCH DAMAGE. 2681634Sbrian */ 2781634Sbrian 2881634Sbrian/* 2981634Sbrian * From: 3081634Sbrian * $NetBSD: vmem_impl.h,v 1.2 2013/01/29 21:26:24 para Exp $ 3181634Sbrian * $NetBSD: subr_vmem.c,v 1.83 2013/03/06 11:20:10 yamt Exp $ 3281634Sbrian */ 3381634Sbrian 3481634Sbrian/* 3581634Sbrian * reference: 36102558Sbrian * - Magazines and Vmem: Extending the Slab Allocator 37102558Sbrian * to Many CPUs and Arbitrary Resources 3881634Sbrian * http://www.usenix.org/event/usenix01/bonwick.html 3981634Sbrian */ 40102500Sbrian 4181634Sbrian#include <sys/cdefs.h> 4281634Sbrian__FBSDID("$FreeBSD: head/sys/kern/subr_vmem.c 280805 2015-03-29 10:02:29Z mav $"); 4381634Sbrian 4481634Sbrian#include "opt_ddb.h" 45102558Sbrian 4681634Sbrian#include <sys/param.h> 4781634Sbrian#include <sys/systm.h> 4881634Sbrian#include <sys/kernel.h> 4981634Sbrian#include <sys/queue.h> 5081634Sbrian#include <sys/callout.h> 5181634Sbrian#include <sys/hash.h> 5281634Sbrian#include <sys/lock.h> 5381634Sbrian#include <sys/malloc.h> 5481634Sbrian#include <sys/mutex.h> 5581634Sbrian#include <sys/smp.h> 5681634Sbrian#include <sys/condvar.h> 5781634Sbrian#include <sys/sysctl.h> 5881634Sbrian#include <sys/taskqueue.h> 5981634Sbrian#include <sys/vmem.h> 6081634Sbrian 6181634Sbrian#include "opt_vm.h" 6281634Sbrian 6381634Sbrian#include <vm/uma.h> 6481634Sbrian#include <vm/vm.h> 6581634Sbrian#include <vm/pmap.h> 6681634Sbrian#include <vm/vm_map.h> 6781634Sbrian#include <vm/vm_object.h> 6881634Sbrian#include <vm/vm_kern.h> 6981634Sbrian#include <vm/vm_extern.h> 7081634Sbrian#include <vm/vm_param.h> 7181634Sbrian#include <vm/vm_pageout.h> 7281634Sbrian 7381634Sbrian#define VMEM_OPTORDER 5 7481634Sbrian#define VMEM_OPTVALUE (1 << VMEM_OPTORDER) 7581634Sbrian#define VMEM_MAXORDER \ 7681634Sbrian (VMEM_OPTVALUE - 1 + sizeof(vmem_size_t) * NBBY - VMEM_OPTORDER) 7781634Sbrian 7881634Sbrian#define VMEM_HASHSIZE_MIN 16 7981634Sbrian#define VMEM_HASHSIZE_MAX 131072 8081888Sbrian 81116251Skris#define VMEM_QCACHE_IDX_MAX 16 8281634Sbrian 8381634Sbrian#define VMEM_FITMASK (M_BESTFIT | M_FIRSTFIT) 8481634Sbrian 85112618Sume#define VMEM_FLAGS \ 86112618Sume (M_NOWAIT | M_WAITOK | M_USE_RESERVE | M_NOVM | M_BESTFIT | M_FIRSTFIT) 87112618Sume 88112618Sume#define BT_FLAGS (M_NOWAIT | M_WAITOK | M_USE_RESERVE | M_NOVM) 89112618Sume 90112618Sume#define QC_NAME_MAX 16 9181634Sbrian 9281634Sbrian/* 9381634Sbrian * Data structures private to vmem. 9481634Sbrian */ 9581634SbrianMALLOC_DEFINE(M_VMEM, "vmem", "vmem internal structures"); 9681634Sbrian 9781634Sbriantypedef struct vmem_btag bt_t; 9881634Sbrian 9994894SbrianTAILQ_HEAD(vmem_seglist, vmem_btag); 10081634SbrianLIST_HEAD(vmem_freelist, vmem_btag); 10181634SbrianLIST_HEAD(vmem_hashlist, vmem_btag); 10281634Sbrian 10381634Sbrianstruct qcache { 10481634Sbrian uma_zone_t qc_cache; 10581634Sbrian vmem_t *qc_vmem; 10681634Sbrian vmem_size_t qc_size; 10781634Sbrian char qc_name[QC_NAME_MAX]; 10881634Sbrian}; 10981634Sbriantypedef struct qcache qcache_t; 11081634Sbrian#define QC_POOL_TO_QCACHE(pool) ((qcache_t *)(pool->pr_qcache)) 11181634Sbrian 11281634Sbrian#define VMEM_NAME_MAX 16 11381634Sbrian 11481634Sbrian/* vmem arena */ 11581634Sbrianstruct vmem { 116102558Sbrian struct mtx_padalign vm_lock; 117102558Sbrian struct cv vm_cv; 11881634Sbrian char vm_name[VMEM_NAME_MAX+1]; 119102558Sbrian LIST_ENTRY(vmem) vm_alllist; 120102558Sbrian struct vmem_hashlist vm_hash0[VMEM_HASHSIZE_MIN]; 121102558Sbrian struct vmem_freelist vm_freelist[VMEM_MAXORDER]; 122102558Sbrian struct vmem_seglist vm_seglist; 123102558Sbrian struct vmem_hashlist *vm_hashlist; 124102558Sbrian vmem_size_t vm_hashsize; 125102558Sbrian 126102558Sbrian /* Constant after init */ 127102558Sbrian vmem_size_t vm_qcache_max; 128102558Sbrian vmem_size_t vm_quantum_mask; 129102558Sbrian vmem_size_t vm_import_quantum; 130102558Sbrian int vm_quantum_shift; 131102558Sbrian 132102558Sbrian /* Written on alloc/free */ 133102558Sbrian LIST_HEAD(, vmem_btag) vm_freetags; 134102558Sbrian int vm_nfreetags; 135102558Sbrian int vm_nbusytag; 136102558Sbrian vmem_size_t vm_inuse; 137102558Sbrian vmem_size_t vm_size; 138102558Sbrian 139102558Sbrian /* Used on import. */ 140102558Sbrian vmem_import_t *vm_importfn; 141102558Sbrian vmem_release_t *vm_releasefn; 142102558Sbrian void *vm_arg; 143102558Sbrian 144102558Sbrian /* Space exhaustion callback. */ 145102558Sbrian vmem_reclaim_t *vm_reclaimfn; 146102558Sbrian 147102558Sbrian /* quantum cache */ 148102558Sbrian qcache_t vm_qcache[VMEM_QCACHE_IDX_MAX]; 149102558Sbrian}; 150102558Sbrian 151102558Sbrian/* boundary tag */ 152102558Sbrianstruct vmem_btag { 153102558Sbrian TAILQ_ENTRY(vmem_btag) bt_seglist; 154102558Sbrian union { 155102558Sbrian LIST_ENTRY(vmem_btag) u_freelist; /* BT_TYPE_FREE */ 156102558Sbrian LIST_ENTRY(vmem_btag) u_hashlist; /* BT_TYPE_BUSY */ 157102558Sbrian } bt_u; 158102558Sbrian#define bt_hashlist bt_u.u_hashlist 159102558Sbrian#define bt_freelist bt_u.u_freelist 160102558Sbrian vmem_addr_t bt_start; 161102558Sbrian vmem_size_t bt_size; 162102558Sbrian int bt_type; 163102558Sbrian}; 164102558Sbrian 165102558Sbrian#define BT_TYPE_SPAN 1 /* Allocated from importfn */ 166102558Sbrian#define BT_TYPE_SPAN_STATIC 2 /* vmem_add() or create. */ 167102558Sbrian#define BT_TYPE_FREE 3 /* Available space. */ 168102558Sbrian#define BT_TYPE_BUSY 4 /* Used space. */ 169102558Sbrian#define BT_ISSPAN_P(bt) ((bt)->bt_type <= BT_TYPE_SPAN_STATIC) 170102558Sbrian 171102558Sbrian#define BT_END(bt) ((bt)->bt_start + (bt)->bt_size - 1) 172102558Sbrian 173102558Sbrian#if defined(DIAGNOSTIC) 174102558Sbrianstatic int enable_vmem_check = 1; 175102558SbrianSYSCTL_INT(_debug, OID_AUTO, vmem_check, CTLFLAG_RWTUN, 176102558Sbrian &enable_vmem_check, 0, "Enable vmem check"); 177102558Sbrianstatic void vmem_check(vmem_t *); 178102558Sbrian#endif 179102558Sbrian 180102558Sbrianstatic struct callout vmem_periodic_ch; 181102558Sbrianstatic int vmem_periodic_interval; 182102558Sbrianstatic struct task vmem_periodic_wk; 183102558Sbrian 184102558Sbrianstatic struct mtx_padalign vmem_list_lock; 185102558Sbrianstatic LIST_HEAD(, vmem) vmem_list = LIST_HEAD_INITIALIZER(vmem_list); 18681634Sbrian 187102558Sbrian/* ---- misc */ 188102558Sbrian#define VMEM_CONDVAR_INIT(vm, wchan) cv_init(&vm->vm_cv, wchan) 189102558Sbrian#define VMEM_CONDVAR_DESTROY(vm) cv_destroy(&vm->vm_cv) 190102558Sbrian#define VMEM_CONDVAR_WAIT(vm) cv_wait(&vm->vm_cv, &vm->vm_lock) 191102558Sbrian#define VMEM_CONDVAR_BROADCAST(vm) cv_broadcast(&vm->vm_cv) 192102558Sbrian 19381634Sbrian 19481634Sbrian#define VMEM_LOCK(vm) mtx_lock(&vm->vm_lock) 19581634Sbrian#define VMEM_TRYLOCK(vm) mtx_trylock(&vm->vm_lock) 196102558Sbrian#define VMEM_UNLOCK(vm) mtx_unlock(&vm->vm_lock) 19781634Sbrian#define VMEM_LOCK_INIT(vm, name) mtx_init(&vm->vm_lock, (name), NULL, MTX_DEF) 19881634Sbrian#define VMEM_LOCK_DESTROY(vm) mtx_destroy(&vm->vm_lock) 19981634Sbrian#define VMEM_ASSERT_LOCKED(vm) mtx_assert(&vm->vm_lock, MA_OWNED); 200112618Sume 201112618Sume#define VMEM_ALIGNUP(addr, align) (-(-(addr) & -(align))) 20281739Sbrian 20381739Sbrian#define VMEM_CROSS_P(addr1, addr2, boundary) \ 20481634Sbrian ((((addr1) ^ (addr2)) & -(boundary)) != 0) 20581739Sbrian 20681739Sbrian#define ORDER2SIZE(order) ((order) < VMEM_OPTVALUE ? ((order) + 1) : \ 20781739Sbrian (vmem_size_t)1 << ((order) - (VMEM_OPTVALUE - VMEM_OPTORDER - 1))) 20881739Sbrian#define SIZE2ORDER(size) ((size) <= VMEM_OPTVALUE ? ((size) - 1) : \ 20981634Sbrian (flsl(size) + (VMEM_OPTVALUE - VMEM_OPTORDER - 2))) 21081634Sbrian 21181634Sbrian/* 21281634Sbrian * Maximum number of boundary tags that may be required to satisfy an 21381634Sbrian * allocation. Two may be required to import. Another two may be 214102558Sbrian * required to clip edges. 215102558Sbrian */ 216102558Sbrian#define BT_MAXALLOC 4 217102558Sbrian 21881634Sbrian/* 21981634Sbrian * Max free limits the number of locally cached boundary tags. We 22081634Sbrian * just want to avoid hitting the zone allocator for every call. 221102558Sbrian */ 222102558Sbrian#define BT_MAXFREE (BT_MAXALLOC * 8) 223102558Sbrian 224102558Sbrian/* Allocator for boundary tags. */ 22581634Sbrianstatic uma_zone_t vmem_bt_zone; 22681634Sbrian 22781634Sbrian/* boot time arena storage. */ 228113067Sumestatic struct vmem kernel_arena_storage; 22981634Sbrianstatic struct vmem kmem_arena_storage; 23081634Sbrianstatic struct vmem buffer_arena_storage; 23181634Sbrianstatic struct vmem transient_arena_storage; 23281634Sbrianvmem_t *kernel_arena = &kernel_arena_storage; 23381634Sbrianvmem_t *kmem_arena = &kmem_arena_storage; 23481634Sbrianvmem_t *buffer_arena = &buffer_arena_storage; 23581634Sbrianvmem_t *transient_arena = &transient_arena_storage; 23681634Sbrian 23781634Sbrian#ifdef DEBUG_MEMGUARD 238112618Sumestatic struct vmem memguard_arena_storage; 239112618Sumevmem_t *memguard_arena = &memguard_arena_storage; 240112618Sume#endif 241112618Sume 24281634Sbrian/* 24381739Sbrian * Fill the vmem's boundary tag cache. We guarantee that boundary tag 24481739Sbrian * allocation will not fail once bt_fill() passes. To do so we cache 24581739Sbrian * at least the maximum possible tag allocations in the arena. 24681739Sbrian */ 24781739Sbrianstatic int 24881739Sbrianbt_fill(vmem_t *vm, int flags) 24981634Sbrian{ 25081634Sbrian bt_t *bt; 25181634Sbrian 25281634Sbrian VMEM_ASSERT_LOCKED(vm); 25381634Sbrian 25481634Sbrian /* 25581634Sbrian * Only allow the kmem arena to dip into reserve tags. It is the 256116586Sume * vmem where new tags come from. 25781634Sbrian */ 25881634Sbrian flags &= BT_FLAGS; 25981634Sbrian if (vm != kmem_arena) 26081634Sbrian flags &= ~M_USE_RESERVE; 26181634Sbrian 26281634Sbrian /* 26381634Sbrian * Loop until we meet the reserve. To minimize the lock shuffle 26481634Sbrian * and prevent simultaneous fills we first try a NOWAIT regardless 26581634Sbrian * of the caller's flags. Specify M_NOVM so we don't recurse while 26681634Sbrian * holding a vmem lock. 26781634Sbrian */ 26881634Sbrian while (vm->vm_nfreetags < BT_MAXALLOC) { 26981634Sbrian bt = uma_zalloc(vmem_bt_zone, 27081634Sbrian (flags & M_USE_RESERVE) | M_NOWAIT | M_NOVM); 27181634Sbrian if (bt == NULL) { 27281634Sbrian VMEM_UNLOCK(vm); 27381634Sbrian bt = uma_zalloc(vmem_bt_zone, flags); 27481634Sbrian VMEM_LOCK(vm); 27581634Sbrian if (bt == NULL && (flags & M_NOWAIT) != 0) 27681634Sbrian break; 27781634Sbrian } 278102558Sbrian LIST_INSERT_HEAD(&vm->vm_freetags, bt, bt_freelist); 279102558Sbrian vm->vm_nfreetags++; 280102558Sbrian } 281102558Sbrian 28281634Sbrian if (vm->vm_nfreetags < BT_MAXALLOC) 28381897Sbrian return ENOMEM; 28481897Sbrian 28581897Sbrian return 0; 286102558Sbrian} 287102558Sbrian 288102558Sbrian/* 289102558Sbrian * Pop a tag off of the freetag stack. 290102558Sbrian */ 291102558Sbrianstatic bt_t * 29281897Sbrianbt_alloc(vmem_t *vm) 29381897Sbrian{ 29481634Sbrian bt_t *bt; 29581634Sbrian 29681634Sbrian VMEM_ASSERT_LOCKED(vm); 29781634Sbrian bt = LIST_FIRST(&vm->vm_freetags); 29881634Sbrian MPASS(bt != NULL); 29981634Sbrian LIST_REMOVE(bt, bt_freelist); 30081634Sbrian vm->vm_nfreetags--; 30181634Sbrian 30281634Sbrian return bt; 30381634Sbrian} 30481634Sbrian 30581634Sbrian/* 30681634Sbrian * Trim the per-vmem free list. Returns with the lock released to 30781634Sbrian * avoid allocator recursions. 30881634Sbrian */ 30981634Sbrianstatic void 31081634Sbrianbt_freetrim(vmem_t *vm, int freelimit) 31181634Sbrian{ 31281634Sbrian LIST_HEAD(, vmem_btag) freetags; 31381634Sbrian bt_t *bt; 31481634Sbrian 31581634Sbrian LIST_INIT(&freetags); 31681634Sbrian VMEM_ASSERT_LOCKED(vm); 31781634Sbrian while (vm->vm_nfreetags > freelimit) { 31881634Sbrian bt = LIST_FIRST(&vm->vm_freetags); 31981634Sbrian LIST_REMOVE(bt, bt_freelist); 32081634Sbrian vm->vm_nfreetags--; 32181634Sbrian LIST_INSERT_HEAD(&freetags, bt, bt_freelist); 32281634Sbrian } 32381634Sbrian VMEM_UNLOCK(vm); 32481634Sbrian while ((bt = LIST_FIRST(&freetags)) != NULL) { 32581634Sbrian LIST_REMOVE(bt, bt_freelist); 32681634Sbrian uma_zfree(vmem_bt_zone, bt); 32781634Sbrian } 32881634Sbrian} 32981634Sbrian 33081634Sbrianstatic inline void 33194894Sbrianbt_free(vmem_t *vm, bt_t *bt) 33281634Sbrian{ 33394894Sbrian 33481634Sbrian VMEM_ASSERT_LOCKED(vm); 33581634Sbrian MPASS(LIST_FIRST(&vm->vm_freetags) != bt); 33681634Sbrian LIST_INSERT_HEAD(&vm->vm_freetags, bt, bt_freelist); 33781634Sbrian vm->vm_nfreetags++; 33881634Sbrian} 33981634Sbrian 34081634Sbrian/* 34181634Sbrian * freelist[0] ... [1, 1] 34281634Sbrian * freelist[1] ... [2, 2] 34381634Sbrian * : 34481634Sbrian * freelist[29] ... [30, 30] 34581634Sbrian * freelist[30] ... [31, 31] 34681634Sbrian * freelist[31] ... [32, 63] 34781634Sbrian * freelist[33] ... [64, 127] 34881634Sbrian * : 34981634Sbrian * freelist[n] ... [(1 << (n - 26)), (1 << (n - 25)) - 1] 35081634Sbrian * : 35181634Sbrian */ 35281634Sbrian 35381634Sbrianstatic struct vmem_freelist * 35481897Sbrianbt_freehead_tofree(vmem_t *vm, vmem_size_t size) 35581634Sbrian{ 35681634Sbrian const vmem_size_t qsize = size >> vm->vm_quantum_shift; 35781634Sbrian const int idx = SIZE2ORDER(qsize); 35881634Sbrian 35981634Sbrian MPASS(size != 0 && qsize != 0); 36081634Sbrian MPASS((size & vm->vm_quantum_mask) == 0); 36181634Sbrian MPASS(idx >= 0); 36281634Sbrian MPASS(idx < VMEM_MAXORDER); 36381634Sbrian 36481634Sbrian return &vm->vm_freelist[idx]; 36581634Sbrian} 36681634Sbrian 36781634Sbrian/* 36881634Sbrian * bt_freehead_toalloc: return the freelist for the given size and allocation 36981634Sbrian * strategy. 37081634Sbrian * 37181634Sbrian * For M_FIRSTFIT, return the list in which any blocks are large enough 37281634Sbrian * for the requested size. otherwise, return the list which can have blocks 37381634Sbrian * large enough for the requested size. 37481634Sbrian */ 37581634Sbrianstatic struct vmem_freelist * 37681634Sbrianbt_freehead_toalloc(vmem_t *vm, vmem_size_t size, int strat) 37781634Sbrian{ 378134789Sbrian const vmem_size_t qsize = size >> vm->vm_quantum_shift; 379134789Sbrian int idx = SIZE2ORDER(qsize); 38081634Sbrian 38181634Sbrian MPASS(size != 0 && qsize != 0); 38281634Sbrian MPASS((size & vm->vm_quantum_mask) == 0); 38381634Sbrian 384134789Sbrian if (strat == M_FIRSTFIT && ORDER2SIZE(idx) != qsize) { 385134789Sbrian idx++; 38681634Sbrian /* check too large request? */ 38781634Sbrian } 38881634Sbrian MPASS(idx >= 0); 38981634Sbrian MPASS(idx < VMEM_MAXORDER); 39081634Sbrian 39181634Sbrian return &vm->vm_freelist[idx]; 392102558Sbrian} 39381634Sbrian 39481634Sbrian/* ---- boundary tag hash */ 39581634Sbrian 39681634Sbrianstatic struct vmem_hashlist * 39781634Sbrianbt_hashhead(vmem_t *vm, vmem_addr_t addr) 39881634Sbrian{ 39981634Sbrian struct vmem_hashlist *list; 40081634Sbrian unsigned int hash; 40181634Sbrian 40281634Sbrian hash = hash32_buf(&addr, sizeof(addr), 0); 40381634Sbrian list = &vm->vm_hashlist[hash % vm->vm_hashsize]; 40481634Sbrian 40581634Sbrian return list; 40681634Sbrian} 40781634Sbrian 40881634Sbrianstatic bt_t * 40981634Sbrianbt_lookupbusy(vmem_t *vm, vmem_addr_t addr) 41081634Sbrian{ 41181634Sbrian struct vmem_hashlist *list; 41281634Sbrian bt_t *bt; 41381634Sbrian 41481634Sbrian VMEM_ASSERT_LOCKED(vm); 41581634Sbrian list = bt_hashhead(vm, addr); 41681634Sbrian LIST_FOREACH(bt, list, bt_hashlist) { 41781634Sbrian if (bt->bt_start == addr) { 41881634Sbrian break; 41981634Sbrian } 42081634Sbrian } 42181634Sbrian 42281634Sbrian return bt; 42381634Sbrian} 42481634Sbrian 42581634Sbrianstatic void 42681634Sbrianbt_rembusy(vmem_t *vm, bt_t *bt) 42781634Sbrian{ 42881634Sbrian 42981634Sbrian VMEM_ASSERT_LOCKED(vm); 43081634Sbrian MPASS(vm->vm_nbusytag > 0); 43181634Sbrian vm->vm_inuse -= bt->bt_size; 43281634Sbrian vm->vm_nbusytag--; 43381634Sbrian LIST_REMOVE(bt, bt_hashlist); 43481634Sbrian} 43581634Sbrian 43681634Sbrianstatic void 43781634Sbrianbt_insbusy(vmem_t *vm, bt_t *bt) 43881634Sbrian{ 43981634Sbrian struct vmem_hashlist *list; 44081634Sbrian 44181634Sbrian VMEM_ASSERT_LOCKED(vm); 44281634Sbrian MPASS(bt->bt_type == BT_TYPE_BUSY); 44381634Sbrian 44481634Sbrian list = bt_hashhead(vm, bt->bt_start); 44581634Sbrian LIST_INSERT_HEAD(list, bt, bt_hashlist); 44681634Sbrian vm->vm_nbusytag++; 44781634Sbrian vm->vm_inuse += bt->bt_size; 44881634Sbrian} 44981634Sbrian 45081634Sbrian/* ---- boundary tag list */ 45181634Sbrian 45281634Sbrianstatic void 45381634Sbrianbt_remseg(vmem_t *vm, bt_t *bt) 45481634Sbrian{ 45581634Sbrian 45681634Sbrian TAILQ_REMOVE(&vm->vm_seglist, bt, bt_seglist); 45781634Sbrian bt_free(vm, bt); 45881634Sbrian} 45981634Sbrian 46081634Sbrianstatic void 46181634Sbrianbt_insseg(vmem_t *vm, bt_t *bt, bt_t *prev) 46281634Sbrian{ 46381634Sbrian 46481634Sbrian TAILQ_INSERT_AFTER(&vm->vm_seglist, prev, bt, bt_seglist); 46581634Sbrian} 46681634Sbrian 46781634Sbrianstatic void 46881634Sbrianbt_insseg_tail(vmem_t *vm, bt_t *bt) 46981634Sbrian{ 47081634Sbrian 47181634Sbrian TAILQ_INSERT_TAIL(&vm->vm_seglist, bt, bt_seglist); 47281634Sbrian} 47381634Sbrian 47481634Sbrianstatic void 47581634Sbrianbt_remfree(vmem_t *vm, bt_t *bt) 47681634Sbrian{ 477116588Sume 478116588Sume MPASS(bt->bt_type == BT_TYPE_FREE); 479116588Sume 480116588Sume LIST_REMOVE(bt, bt_freelist); 48181634Sbrian} 482116443Sume 483116588Sumestatic void 484116588Sumebt_insfree(vmem_t *vm, bt_t *bt) 485116588Sume{ 486116588Sume struct vmem_freelist *list; 487116588Sume 488116588Sume list = bt_freehead_tofree(vm, bt->bt_size); 489116588Sume LIST_INSERT_HEAD(list, bt, bt_freelist); 490116588Sume} 491116588Sume 492116588Sume/* ---- vmem internal functions */ 493116588Sume 494116588Sume/* 495116443Sume * Import from the arena into the quantum cache in UMA. 496116443Sume */ 497116443Sumestatic int 498116443Sumeqc_import(void *arg, void **store, int cnt, int flags) 499116588Sume{ 500116588Sume qcache_t *qc; 501116588Sume vmem_addr_t addr; 502116588Sume int i; 503116588Sume 504116443Sume qc = arg; 505116443Sume if ((flags & VMEM_FITMASK) == 0) 506116443Sume flags |= M_BESTFIT; 507112613Sume for (i = 0; i < cnt; i++) { 508116443Sume if (vmem_xalloc(qc->qc_vmem, qc->qc_size, 0, 0, 0, 509116443Sume VMEM_ADDR_MIN, VMEM_ADDR_MAX, flags, &addr) != 0) 510112613Sume break; 511112613Sume store[i] = (void *)addr; 51281634Sbrian /* Only guarantee one allocation. */ 51381634Sbrian flags &= ~M_WAITOK; 51481634Sbrian flags |= M_NOWAIT; 51581634Sbrian } 51681634Sbrian return i; 51781634Sbrian} 51881634Sbrian 51981634Sbrian/* 52081634Sbrian * Release memory from the UMA cache to the arena. 52181634Sbrian */ 52281634Sbrianstatic void 52381634Sbrianqc_release(void *arg, void **store, int cnt) 52481634Sbrian{ 52581634Sbrian qcache_t *qc; 52681634Sbrian int i; 52781634Sbrian 52881634Sbrian qc = arg; 52981634Sbrian for (i = 0; i < cnt; i++) 530116588Sume vmem_xfree(qc->qc_vmem, (vmem_addr_t)store[i], qc->qc_size); 531116588Sume} 532116588Sume 53381634Sbrianstatic void 534116443Sumeqc_init(vmem_t *vm, vmem_size_t qcache_max) 535116588Sume{ 536116588Sume qcache_t *qc; 537116588Sume vmem_size_t size; 538116588Sume int qcache_idx_max; 539116588Sume int i; 540116588Sume 541116588Sume MPASS((qcache_max & vm->vm_quantum_mask) == 0); 542116588Sume qcache_idx_max = MIN(qcache_max >> vm->vm_quantum_shift, 543116588Sume VMEM_QCACHE_IDX_MAX); 544116588Sume vm->vm_qcache_max = qcache_idx_max << vm->vm_quantum_shift; 545116588Sume for (i = 0; i < qcache_idx_max; i++) { 546116588Sume qc = &vm->vm_qcache[i]; 547116443Sume size = (i + 1) << vm->vm_quantum_shift; 548116443Sume snprintf(qc->qc_name, sizeof(qc->qc_name), "%s-%zu", 549116443Sume vm->vm_name, size); 550116443Sume qc->qc_vmem = vm; 551116588Sume qc->qc_size = size; 552116588Sume qc->qc_cache = uma_zcache_create(qc->qc_name, size, 553116588Sume NULL, NULL, NULL, NULL, qc_import, qc_release, qc, 554116588Sume UMA_ZONE_VM); 555116588Sume MPASS(qc->qc_cache); 556116443Sume } 557116443Sume} 558116443Sume 559112613Sumestatic void 560116443Sumeqc_destroy(vmem_t *vm) 561116443Sume{ 562112613Sume int qcache_idx_max; 563112613Sume int i; 56481634Sbrian 56581634Sbrian qcache_idx_max = vm->vm_qcache_max >> vm->vm_quantum_shift; 56681634Sbrian for (i = 0; i < qcache_idx_max; i++) 56781634Sbrian uma_zdestroy(vm->vm_qcache[i].qc_cache); 56881634Sbrian} 56981634Sbrian 57081634Sbrianstatic void 57181634Sbrianqc_drain(vmem_t *vm) 57281634Sbrian{ 57381634Sbrian int qcache_idx_max; 57481634Sbrian int i; 57581634Sbrian 57681634Sbrian qcache_idx_max = vm->vm_qcache_max >> vm->vm_quantum_shift; 57781634Sbrian for (i = 0; i < qcache_idx_max; i++) 57881634Sbrian zone_drain(vm->vm_qcache[i].qc_cache); 57981634Sbrian} 58081634Sbrian 58181634Sbrian#ifndef UMA_MD_SMALL_ALLOC 58281634Sbrian 58381634Sbrianstatic struct mtx_padalign vmem_bt_lock; 58481634Sbrian 58581634Sbrian/* 58681634Sbrian * vmem_bt_alloc: Allocate a new page of boundary tags. 58781634Sbrian * 58881634Sbrian * On architectures with uma_small_alloc there is no recursion; no address 58981634Sbrian * space need be allocated to allocate boundary tags. For the others, we 59081634Sbrian * must handle recursion. Boundary tags are necessary to allocate new 59181634Sbrian * boundary tags. 59281634Sbrian * 59381634Sbrian * UMA guarantees that enough tags are held in reserve to allocate a new 59481634Sbrian * page of kva. We dip into this reserve by specifying M_USE_RESERVE only 59581634Sbrian * when allocating the page to hold new boundary tags. In this way the 59681634Sbrian * reserve is automatically filled by the allocation that uses the reserve. 59781634Sbrian * 59881634Sbrian * We still have to guarantee that the new tags are allocated atomically since 59981634Sbrian * many threads may try concurrently. The bt_lock provides this guarantee. 60081634Sbrian * We convert WAITOK allocations to NOWAIT and then handle the blocking here 60181634Sbrian * on failure. It's ok to return NULL for a WAITOK allocation as UMA will 60281634Sbrian * loop again after checking to see if we lost the race to allocate. 60381634Sbrian * 60481634Sbrian * There is a small race between vmem_bt_alloc() returning the page and the 60581634Sbrian * zone lock being acquired to add the page to the zone. For WAITOK 60681634Sbrian * allocations we just pause briefly. NOWAIT may experience a transient 60781634Sbrian * failure. To alleviate this we permit a small number of simultaneous 60881634Sbrian * fills to proceed concurrently so NOWAIT is less likely to fail unless 60981634Sbrian * we are really out of KVA. 61081634Sbrian */ 61181634Sbrianstatic void * 61281634Sbrianvmem_bt_alloc(uma_zone_t zone, int bytes, uint8_t *pflag, int wait) 61381634Sbrian{ 61481634Sbrian vmem_addr_t addr; 61581634Sbrian 61681634Sbrian *pflag = UMA_SLAB_KMEM; 61781634Sbrian 61881634Sbrian /* 619102558Sbrian * Single thread boundary tag allocation so that the address space 62094894Sbrian * and memory are added in one atomic operation. 62181634Sbrian */ 62294894Sbrian mtx_lock(&vmem_bt_lock); 62381634Sbrian if (vmem_xalloc(kmem_arena, bytes, 0, 0, 0, VMEM_ADDR_MIN, 62481634Sbrian VMEM_ADDR_MAX, M_NOWAIT | M_NOVM | M_USE_RESERVE | M_BESTFIT, 625102558Sbrian &addr) == 0) { 626102558Sbrian if (kmem_back(kmem_object, addr, bytes, 62781634Sbrian M_NOWAIT | M_USE_RESERVE) == 0) { 62881634Sbrian mtx_unlock(&vmem_bt_lock); 62981634Sbrian return ((void *)addr); 63081634Sbrian } 63181634Sbrian vmem_xfree(kmem_arena, addr, bytes); 63281634Sbrian mtx_unlock(&vmem_bt_lock); 63381634Sbrian /* 634134789Sbrian * Out of memory, not address space. This may not even be 63581634Sbrian * possible due to M_USE_RESERVE page allocation. 63681634Sbrian */ 63781634Sbrian if (wait & M_WAITOK) 63881634Sbrian VM_WAIT; 63981634Sbrian return (NULL); 64081634Sbrian } 64181634Sbrian mtx_unlock(&vmem_bt_lock); 64281634Sbrian /* 64381634Sbrian * We're either out of address space or lost a fill race. 64481634Sbrian */ 64581634Sbrian if (wait & M_WAITOK) 64681634Sbrian pause("btalloc", 1); 647134789Sbrian 64881634Sbrian return (NULL); 649102558Sbrian} 65081634Sbrian#endif 65181634Sbrian 65281634Sbrianvoid 65381634Sbrianvmem_startup(void) 65481634Sbrian{ 65581634Sbrian 65681634Sbrian mtx_init(&vmem_list_lock, "vmem list lock", NULL, MTX_DEF); 65781634Sbrian vmem_bt_zone = uma_zcreate("vmem btag", 658102558Sbrian sizeof(struct vmem_btag), NULL, NULL, NULL, NULL, 659102558Sbrian UMA_ALIGN_PTR, UMA_ZONE_VM); 66081634Sbrian#ifndef UMA_MD_SMALL_ALLOC 66194894Sbrian mtx_init(&vmem_bt_lock, "btag lock", NULL, MTX_DEF); 662102558Sbrian uma_prealloc(vmem_bt_zone, BT_MAXALLOC); 66394894Sbrian /* 664102558Sbrian * Reserve enough tags to allocate new tags. We allow multiple 66581634Sbrian * CPUs to attempt to allocate new tags concurrently to limit 666102558Sbrian * false restarts in UMA. 667102558Sbrian */ 668102558Sbrian uma_zone_reserve(vmem_bt_zone, BT_MAXALLOC * (mp_ncpus + 1) / 2); 669102558Sbrian uma_zone_set_allocf(vmem_bt_zone, vmem_bt_alloc); 67094894Sbrian#endif 671102558Sbrian} 672102558Sbrian 673102558Sbrian/* ---- rehash */ 67494894Sbrian 67594894Sbrianstatic int 67694894Sbrianvmem_rehash(vmem_t *vm, vmem_size_t newhashsize) 67781634Sbrian{ 67881634Sbrian bt_t *bt; 67981634Sbrian int i; 68094894Sbrian struct vmem_hashlist *newhashlist; 68181634Sbrian struct vmem_hashlist *oldhashlist; 68281634Sbrian vmem_size_t oldhashsize; 68381634Sbrian 68481634Sbrian MPASS(newhashsize > 0); 68594894Sbrian 68681634Sbrian newhashlist = malloc(sizeof(struct vmem_hashlist) * newhashsize, 687102558Sbrian M_VMEM, M_NOWAIT); 68894894Sbrian if (newhashlist == NULL) 68981634Sbrian return ENOMEM; 690102558Sbrian for (i = 0; i < newhashsize; i++) { 691102558Sbrian LIST_INIT(&newhashlist[i]); 692134789Sbrian } 69394894Sbrian 69481634Sbrian VMEM_LOCK(vm); 69581634Sbrian oldhashlist = vm->vm_hashlist; 69694894Sbrian oldhashsize = vm->vm_hashsize; 69794894Sbrian vm->vm_hashlist = newhashlist; 69881634Sbrian vm->vm_hashsize = newhashsize; 69994894Sbrian if (oldhashlist == NULL) { 70081634Sbrian VMEM_UNLOCK(vm); 701102558Sbrian return 0; 702102558Sbrian } 703102558Sbrian for (i = 0; i < oldhashsize; i++) { 70481634Sbrian while ((bt = LIST_FIRST(&oldhashlist[i])) != NULL) { 70581634Sbrian bt_rembusy(vm, bt); 70681634Sbrian bt_insbusy(vm, bt); 70781634Sbrian } 708102558Sbrian } 70994894Sbrian VMEM_UNLOCK(vm); 71081634Sbrian 71181634Sbrian if (oldhashlist != vm->vm_hash0) { 712102558Sbrian free(oldhashlist, M_VMEM); 71394894Sbrian } 714102558Sbrian 71581634Sbrian return 0; 716102558Sbrian} 71794894Sbrian 718102558Sbrianstatic void 719102558Sbrianvmem_periodic_kick(void *dummy) 720102558Sbrian{ 721102558Sbrian 722102558Sbrian taskqueue_enqueue(taskqueue_thread, &vmem_periodic_wk); 72381634Sbrian} 724102558Sbrian 725102558Sbrianstatic void 726102558Sbrianvmem_periodic(void *unused, int pending) 727102558Sbrian{ 728102558Sbrian vmem_t *vm; 729102558Sbrian vmem_size_t desired; 73081634Sbrian vmem_size_t current; 73181634Sbrian 73294894Sbrian mtx_lock(&vmem_list_lock); 733102558Sbrian LIST_FOREACH(vm, &vmem_list, vm_alllist) { 73481634Sbrian#ifdef DIAGNOSTIC 73581634Sbrian /* Convenient time to verify vmem state. */ 736102558Sbrian if (enable_vmem_check == 1) { 737102558Sbrian VMEM_LOCK(vm); 738102558Sbrian vmem_check(vm); 739102558Sbrian VMEM_UNLOCK(vm); 740102558Sbrian } 741102558Sbrian#endif 742102558Sbrian desired = 1 << flsl(vm->vm_nbusytag); 743102558Sbrian desired = MIN(MAX(desired, VMEM_HASHSIZE_MIN), 744102558Sbrian VMEM_HASHSIZE_MAX); 745102558Sbrian current = vm->vm_hashsize; 74681634Sbrian 74781634Sbrian /* Grow in powers of two. Shrink less aggressively. */ 74881634Sbrian if (desired >= current * 2 || desired * 4 <= current) 74994894Sbrian vmem_rehash(vm, desired); 75081634Sbrian } 75181634Sbrian mtx_unlock(&vmem_list_lock); 75294894Sbrian 75394894Sbrian callout_reset(&vmem_periodic_ch, vmem_periodic_interval, 75481634Sbrian vmem_periodic_kick, NULL); 75581634Sbrian} 75681634Sbrian 75781634Sbrianstatic void 75881634Sbrianvmem_start_callout(void *unused) 75994894Sbrian{ 76094894Sbrian 76181634Sbrian TASK_INIT(&vmem_periodic_wk, 0, vmem_periodic, NULL); 76281634Sbrian vmem_periodic_interval = hz * 10; 76381634Sbrian callout_init(&vmem_periodic_ch, CALLOUT_MPSAFE); 76481634Sbrian callout_reset(&vmem_periodic_ch, vmem_periodic_interval, 76581634Sbrian vmem_periodic_kick, NULL); 76681634Sbrian} 76781634SbrianSYSINIT(vfs, SI_SUB_CONFIGURE, SI_ORDER_ANY, vmem_start_callout, NULL); 76881634Sbrian 76981634Sbrianstatic void 77081634Sbrianvmem_add1(vmem_t *vm, vmem_addr_t addr, vmem_size_t size, int type) 77181634Sbrian{ 77281634Sbrian bt_t *btspan; 77381634Sbrian bt_t *btfree; 77481634Sbrian 77581634Sbrian MPASS(type == BT_TYPE_SPAN || type == BT_TYPE_SPAN_STATIC); 77681634Sbrian MPASS((size & vm->vm_quantum_mask) == 0); 77781634Sbrian 778102558Sbrian btspan = bt_alloc(vm); 779102558Sbrian btspan->bt_type = type; 78081634Sbrian btspan->bt_start = addr; 78194894Sbrian btspan->bt_size = size; 78281634Sbrian bt_insseg_tail(vm, btspan); 78381634Sbrian 78481634Sbrian btfree = bt_alloc(vm); 785 btfree->bt_type = BT_TYPE_FREE; 786 btfree->bt_start = addr; 787 btfree->bt_size = size; 788 bt_insseg(vm, btfree, btspan); 789 bt_insfree(vm, btfree); 790 791 vm->vm_size += size; 792} 793 794static void 795vmem_destroy1(vmem_t *vm) 796{ 797 bt_t *bt; 798 799 /* 800 * Drain per-cpu quantum caches. 801 */ 802 qc_destroy(vm); 803 804 /* 805 * The vmem should now only contain empty segments. 806 */ 807 VMEM_LOCK(vm); 808 MPASS(vm->vm_nbusytag == 0); 809 810 while ((bt = TAILQ_FIRST(&vm->vm_seglist)) != NULL) 811 bt_remseg(vm, bt); 812 813 if (vm->vm_hashlist != NULL && vm->vm_hashlist != vm->vm_hash0) 814 free(vm->vm_hashlist, M_VMEM); 815 816 bt_freetrim(vm, 0); 817 818 VMEM_CONDVAR_DESTROY(vm); 819 VMEM_LOCK_DESTROY(vm); 820 free(vm, M_VMEM); 821} 822 823static int 824vmem_import(vmem_t *vm, vmem_size_t size, vmem_size_t align, int flags) 825{ 826 vmem_addr_t addr; 827 int error; 828 829 if (vm->vm_importfn == NULL) 830 return EINVAL; 831 832 /* 833 * To make sure we get a span that meets the alignment we double it 834 * and add the size to the tail. This slightly overestimates. 835 */ 836 if (align != vm->vm_quantum_mask + 1) 837 size = (align * 2) + size; 838 size = roundup(size, vm->vm_import_quantum); 839 840 /* 841 * Hide MAXALLOC tags so we're guaranteed to be able to add this 842 * span and the tag we want to allocate from it. 843 */ 844 MPASS(vm->vm_nfreetags >= BT_MAXALLOC); 845 vm->vm_nfreetags -= BT_MAXALLOC; 846 VMEM_UNLOCK(vm); 847 error = (vm->vm_importfn)(vm->vm_arg, size, flags, &addr); 848 VMEM_LOCK(vm); 849 vm->vm_nfreetags += BT_MAXALLOC; 850 if (error) 851 return ENOMEM; 852 853 vmem_add1(vm, addr, size, BT_TYPE_SPAN); 854 855 return 0; 856} 857 858/* 859 * vmem_fit: check if a bt can satisfy the given restrictions. 860 * 861 * it's a caller's responsibility to ensure the region is big enough 862 * before calling us. 863 */ 864static int 865vmem_fit(const bt_t *bt, vmem_size_t size, vmem_size_t align, 866 vmem_size_t phase, vmem_size_t nocross, vmem_addr_t minaddr, 867 vmem_addr_t maxaddr, vmem_addr_t *addrp) 868{ 869 vmem_addr_t start; 870 vmem_addr_t end; 871 872 MPASS(size > 0); 873 MPASS(bt->bt_size >= size); /* caller's responsibility */ 874 875 /* 876 * XXX assumption: vmem_addr_t and vmem_size_t are 877 * unsigned integer of the same size. 878 */ 879 880 start = bt->bt_start; 881 if (start < minaddr) { 882 start = minaddr; 883 } 884 end = BT_END(bt); 885 if (end > maxaddr) 886 end = maxaddr; 887 if (start > end) 888 return (ENOMEM); 889 890 start = VMEM_ALIGNUP(start - phase, align) + phase; 891 if (start < bt->bt_start) 892 start += align; 893 if (VMEM_CROSS_P(start, start + size - 1, nocross)) { 894 MPASS(align < nocross); 895 start = VMEM_ALIGNUP(start - phase, nocross) + phase; 896 } 897 if (start <= end && end - start >= size - 1) { 898 MPASS((start & (align - 1)) == phase); 899 MPASS(!VMEM_CROSS_P(start, start + size - 1, nocross)); 900 MPASS(minaddr <= start); 901 MPASS(maxaddr == 0 || start + size - 1 <= maxaddr); 902 MPASS(bt->bt_start <= start); 903 MPASS(BT_END(bt) - start >= size - 1); 904 *addrp = start; 905 906 return (0); 907 } 908 return (ENOMEM); 909} 910 911/* 912 * vmem_clip: Trim the boundary tag edges to the requested start and size. 913 */ 914static void 915vmem_clip(vmem_t *vm, bt_t *bt, vmem_addr_t start, vmem_size_t size) 916{ 917 bt_t *btnew; 918 bt_t *btprev; 919 920 VMEM_ASSERT_LOCKED(vm); 921 MPASS(bt->bt_type == BT_TYPE_FREE); 922 MPASS(bt->bt_size >= size); 923 bt_remfree(vm, bt); 924 if (bt->bt_start != start) { 925 btprev = bt_alloc(vm); 926 btprev->bt_type = BT_TYPE_FREE; 927 btprev->bt_start = bt->bt_start; 928 btprev->bt_size = start - bt->bt_start; 929 bt->bt_start = start; 930 bt->bt_size -= btprev->bt_size; 931 bt_insfree(vm, btprev); 932 bt_insseg(vm, btprev, 933 TAILQ_PREV(bt, vmem_seglist, bt_seglist)); 934 } 935 MPASS(bt->bt_start == start); 936 if (bt->bt_size != size && bt->bt_size - size > vm->vm_quantum_mask) { 937 /* split */ 938 btnew = bt_alloc(vm); 939 btnew->bt_type = BT_TYPE_BUSY; 940 btnew->bt_start = bt->bt_start; 941 btnew->bt_size = size; 942 bt->bt_start = bt->bt_start + size; 943 bt->bt_size -= size; 944 bt_insfree(vm, bt); 945 bt_insseg(vm, btnew, 946 TAILQ_PREV(bt, vmem_seglist, bt_seglist)); 947 bt_insbusy(vm, btnew); 948 bt = btnew; 949 } else { 950 bt->bt_type = BT_TYPE_BUSY; 951 bt_insbusy(vm, bt); 952 } 953 MPASS(bt->bt_size >= size); 954 bt->bt_type = BT_TYPE_BUSY; 955} 956 957/* ---- vmem API */ 958 959void 960vmem_set_import(vmem_t *vm, vmem_import_t *importfn, 961 vmem_release_t *releasefn, void *arg, vmem_size_t import_quantum) 962{ 963 964 VMEM_LOCK(vm); 965 vm->vm_importfn = importfn; 966 vm->vm_releasefn = releasefn; 967 vm->vm_arg = arg; 968 vm->vm_import_quantum = import_quantum; 969 VMEM_UNLOCK(vm); 970} 971 972void 973vmem_set_reclaim(vmem_t *vm, vmem_reclaim_t *reclaimfn) 974{ 975 976 VMEM_LOCK(vm); 977 vm->vm_reclaimfn = reclaimfn; 978 VMEM_UNLOCK(vm); 979} 980 981/* 982 * vmem_init: Initializes vmem arena. 983 */ 984vmem_t * 985vmem_init(vmem_t *vm, const char *name, vmem_addr_t base, vmem_size_t size, 986 vmem_size_t quantum, vmem_size_t qcache_max, int flags) 987{ 988 int i; 989 990 MPASS(quantum > 0); 991 MPASS((quantum & (quantum - 1)) == 0); 992 993 bzero(vm, sizeof(*vm)); 994 995 VMEM_CONDVAR_INIT(vm, name); 996 VMEM_LOCK_INIT(vm, name); 997 vm->vm_nfreetags = 0; 998 LIST_INIT(&vm->vm_freetags); 999 strlcpy(vm->vm_name, name, sizeof(vm->vm_name)); 1000 vm->vm_quantum_mask = quantum - 1; 1001 vm->vm_quantum_shift = flsl(quantum) - 1; 1002 vm->vm_nbusytag = 0; 1003 vm->vm_size = 0; 1004 vm->vm_inuse = 0; 1005 qc_init(vm, qcache_max); 1006 1007 TAILQ_INIT(&vm->vm_seglist); 1008 for (i = 0; i < VMEM_MAXORDER; i++) { 1009 LIST_INIT(&vm->vm_freelist[i]); 1010 } 1011 memset(&vm->vm_hash0, 0, sizeof(vm->vm_hash0)); 1012 vm->vm_hashsize = VMEM_HASHSIZE_MIN; 1013 vm->vm_hashlist = vm->vm_hash0; 1014 1015 if (size != 0) { 1016 if (vmem_add(vm, base, size, flags) != 0) { 1017 vmem_destroy1(vm); 1018 return NULL; 1019 } 1020 } 1021 1022 mtx_lock(&vmem_list_lock); 1023 LIST_INSERT_HEAD(&vmem_list, vm, vm_alllist); 1024 mtx_unlock(&vmem_list_lock); 1025 1026 return vm; 1027} 1028 1029/* 1030 * vmem_create: create an arena. 1031 */ 1032vmem_t * 1033vmem_create(const char *name, vmem_addr_t base, vmem_size_t size, 1034 vmem_size_t quantum, vmem_size_t qcache_max, int flags) 1035{ 1036 1037 vmem_t *vm; 1038 1039 vm = malloc(sizeof(*vm), M_VMEM, flags & (M_WAITOK|M_NOWAIT)); 1040 if (vm == NULL) 1041 return (NULL); 1042 if (vmem_init(vm, name, base, size, quantum, qcache_max, 1043 flags) == NULL) { 1044 free(vm, M_VMEM); 1045 return (NULL); 1046 } 1047 return (vm); 1048} 1049 1050void 1051vmem_destroy(vmem_t *vm) 1052{ 1053 1054 mtx_lock(&vmem_list_lock); 1055 LIST_REMOVE(vm, vm_alllist); 1056 mtx_unlock(&vmem_list_lock); 1057 1058 vmem_destroy1(vm); 1059} 1060 1061vmem_size_t 1062vmem_roundup_size(vmem_t *vm, vmem_size_t size) 1063{ 1064 1065 return (size + vm->vm_quantum_mask) & ~vm->vm_quantum_mask; 1066} 1067 1068/* 1069 * vmem_alloc: allocate resource from the arena. 1070 */ 1071int 1072vmem_alloc(vmem_t *vm, vmem_size_t size, int flags, vmem_addr_t *addrp) 1073{ 1074 const int strat __unused = flags & VMEM_FITMASK; 1075 qcache_t *qc; 1076 1077 flags &= VMEM_FLAGS; 1078 MPASS(size > 0); 1079 MPASS(strat == M_BESTFIT || strat == M_FIRSTFIT); 1080 if ((flags & M_NOWAIT) == 0) 1081 WITNESS_WARN(WARN_GIANTOK | WARN_SLEEPOK, NULL, "vmem_alloc"); 1082 1083 if (size <= vm->vm_qcache_max) { 1084 qc = &vm->vm_qcache[(size - 1) >> vm->vm_quantum_shift]; 1085 *addrp = (vmem_addr_t)uma_zalloc(qc->qc_cache, flags); 1086 if (*addrp == 0) 1087 return (ENOMEM); 1088 return (0); 1089 } 1090 1091 return vmem_xalloc(vm, size, 0, 0, 0, VMEM_ADDR_MIN, VMEM_ADDR_MAX, 1092 flags, addrp); 1093} 1094 1095int 1096vmem_xalloc(vmem_t *vm, const vmem_size_t size0, vmem_size_t align, 1097 const vmem_size_t phase, const vmem_size_t nocross, 1098 const vmem_addr_t minaddr, const vmem_addr_t maxaddr, int flags, 1099 vmem_addr_t *addrp) 1100{ 1101 const vmem_size_t size = vmem_roundup_size(vm, size0); 1102 struct vmem_freelist *list; 1103 struct vmem_freelist *first; 1104 struct vmem_freelist *end; 1105 vmem_size_t avail; 1106 bt_t *bt; 1107 int error; 1108 int strat; 1109 1110 flags &= VMEM_FLAGS; 1111 strat = flags & VMEM_FITMASK; 1112 MPASS(size0 > 0); 1113 MPASS(size > 0); 1114 MPASS(strat == M_BESTFIT || strat == M_FIRSTFIT); 1115 MPASS((flags & (M_NOWAIT|M_WAITOK)) != (M_NOWAIT|M_WAITOK)); 1116 if ((flags & M_NOWAIT) == 0) 1117 WITNESS_WARN(WARN_GIANTOK | WARN_SLEEPOK, NULL, "vmem_xalloc"); 1118 MPASS((align & vm->vm_quantum_mask) == 0); 1119 MPASS((align & (align - 1)) == 0); 1120 MPASS((phase & vm->vm_quantum_mask) == 0); 1121 MPASS((nocross & vm->vm_quantum_mask) == 0); 1122 MPASS((nocross & (nocross - 1)) == 0); 1123 MPASS((align == 0 && phase == 0) || phase < align); 1124 MPASS(nocross == 0 || nocross >= size); 1125 MPASS(minaddr <= maxaddr); 1126 MPASS(!VMEM_CROSS_P(phase, phase + size - 1, nocross)); 1127 1128 if (align == 0) 1129 align = vm->vm_quantum_mask + 1; 1130 1131 *addrp = 0; 1132 end = &vm->vm_freelist[VMEM_MAXORDER]; 1133 /* 1134 * choose a free block from which we allocate. 1135 */ 1136 first = bt_freehead_toalloc(vm, size, strat); 1137 VMEM_LOCK(vm); 1138 for (;;) { 1139 /* 1140 * Make sure we have enough tags to complete the 1141 * operation. 1142 */ 1143 if (vm->vm_nfreetags < BT_MAXALLOC && 1144 bt_fill(vm, flags) != 0) { 1145 error = ENOMEM; 1146 break; 1147 } 1148 /* 1149 * Scan freelists looking for a tag that satisfies the 1150 * allocation. If we're doing BESTFIT we may encounter 1151 * sizes below the request. If we're doing FIRSTFIT we 1152 * inspect only the first element from each list. 1153 */ 1154 for (list = first; list < end; list++) { 1155 LIST_FOREACH(bt, list, bt_freelist) { 1156 if (bt->bt_size >= size) { 1157 error = vmem_fit(bt, size, align, phase, 1158 nocross, minaddr, maxaddr, addrp); 1159 if (error == 0) { 1160 vmem_clip(vm, bt, *addrp, size); 1161 goto out; 1162 } 1163 } 1164 /* FIRST skips to the next list. */ 1165 if (strat == M_FIRSTFIT) 1166 break; 1167 } 1168 } 1169 /* 1170 * Retry if the fast algorithm failed. 1171 */ 1172 if (strat == M_FIRSTFIT) { 1173 strat = M_BESTFIT; 1174 first = bt_freehead_toalloc(vm, size, strat); 1175 continue; 1176 } 1177 /* 1178 * XXX it is possible to fail to meet restrictions with the 1179 * imported region. It is up to the user to specify the 1180 * import quantum such that it can satisfy any allocation. 1181 */ 1182 if (vmem_import(vm, size, align, flags) == 0) 1183 continue; 1184 1185 /* 1186 * Try to free some space from the quantum cache or reclaim 1187 * functions if available. 1188 */ 1189 if (vm->vm_qcache_max != 0 || vm->vm_reclaimfn != NULL) { 1190 avail = vm->vm_size - vm->vm_inuse; 1191 VMEM_UNLOCK(vm); 1192 if (vm->vm_qcache_max != 0) 1193 qc_drain(vm); 1194 if (vm->vm_reclaimfn != NULL) 1195 vm->vm_reclaimfn(vm, flags); 1196 VMEM_LOCK(vm); 1197 /* If we were successful retry even NOWAIT. */ 1198 if (vm->vm_size - vm->vm_inuse > avail) 1199 continue; 1200 } 1201 if ((flags & M_NOWAIT) != 0) { 1202 error = ENOMEM; 1203 break; 1204 } 1205 VMEM_CONDVAR_WAIT(vm); 1206 } 1207out: 1208 VMEM_UNLOCK(vm); 1209 if (error != 0 && (flags & M_NOWAIT) == 0) 1210 panic("failed to allocate waiting allocation\n"); 1211 1212 return (error); 1213} 1214 1215/* 1216 * vmem_free: free the resource to the arena. 1217 */ 1218void 1219vmem_free(vmem_t *vm, vmem_addr_t addr, vmem_size_t size) 1220{ 1221 qcache_t *qc; 1222 MPASS(size > 0); 1223 1224 if (size <= vm->vm_qcache_max) { 1225 qc = &vm->vm_qcache[(size - 1) >> vm->vm_quantum_shift]; 1226 uma_zfree(qc->qc_cache, (void *)addr); 1227 } else 1228 vmem_xfree(vm, addr, size); 1229} 1230 1231void 1232vmem_xfree(vmem_t *vm, vmem_addr_t addr, vmem_size_t size) 1233{ 1234 bt_t *bt; 1235 bt_t *t; 1236 1237 MPASS(size > 0); 1238 1239 VMEM_LOCK(vm); 1240 bt = bt_lookupbusy(vm, addr); 1241 MPASS(bt != NULL); 1242 MPASS(bt->bt_start == addr); 1243 MPASS(bt->bt_size == vmem_roundup_size(vm, size) || 1244 bt->bt_size - vmem_roundup_size(vm, size) <= vm->vm_quantum_mask); 1245 MPASS(bt->bt_type == BT_TYPE_BUSY); 1246 bt_rembusy(vm, bt); 1247 bt->bt_type = BT_TYPE_FREE; 1248 1249 /* coalesce */ 1250 t = TAILQ_NEXT(bt, bt_seglist); 1251 if (t != NULL && t->bt_type == BT_TYPE_FREE) { 1252 MPASS(BT_END(bt) < t->bt_start); /* YYY */ 1253 bt->bt_size += t->bt_size; 1254 bt_remfree(vm, t); 1255 bt_remseg(vm, t); 1256 } 1257 t = TAILQ_PREV(bt, vmem_seglist, bt_seglist); 1258 if (t != NULL && t->bt_type == BT_TYPE_FREE) { 1259 MPASS(BT_END(t) < bt->bt_start); /* YYY */ 1260 bt->bt_size += t->bt_size; 1261 bt->bt_start = t->bt_start; 1262 bt_remfree(vm, t); 1263 bt_remseg(vm, t); 1264 } 1265 1266 t = TAILQ_PREV(bt, vmem_seglist, bt_seglist); 1267 MPASS(t != NULL); 1268 MPASS(BT_ISSPAN_P(t) || t->bt_type == BT_TYPE_BUSY); 1269 if (vm->vm_releasefn != NULL && t->bt_type == BT_TYPE_SPAN && 1270 t->bt_size == bt->bt_size) { 1271 vmem_addr_t spanaddr; 1272 vmem_size_t spansize; 1273 1274 MPASS(t->bt_start == bt->bt_start); 1275 spanaddr = bt->bt_start; 1276 spansize = bt->bt_size; 1277 bt_remseg(vm, bt); 1278 bt_remseg(vm, t); 1279 vm->vm_size -= spansize; 1280 VMEM_CONDVAR_BROADCAST(vm); 1281 bt_freetrim(vm, BT_MAXFREE); 1282 (*vm->vm_releasefn)(vm->vm_arg, spanaddr, spansize); 1283 } else { 1284 bt_insfree(vm, bt); 1285 VMEM_CONDVAR_BROADCAST(vm); 1286 bt_freetrim(vm, BT_MAXFREE); 1287 } 1288} 1289 1290/* 1291 * vmem_add: 1292 * 1293 */ 1294int 1295vmem_add(vmem_t *vm, vmem_addr_t addr, vmem_size_t size, int flags) 1296{ 1297 int error; 1298 1299 error = 0; 1300 flags &= VMEM_FLAGS; 1301 VMEM_LOCK(vm); 1302 if (vm->vm_nfreetags >= BT_MAXALLOC || bt_fill(vm, flags) == 0) 1303 vmem_add1(vm, addr, size, BT_TYPE_SPAN_STATIC); 1304 else 1305 error = ENOMEM; 1306 VMEM_UNLOCK(vm); 1307 1308 return (error); 1309} 1310 1311/* 1312 * vmem_size: information about arenas size 1313 */ 1314vmem_size_t 1315vmem_size(vmem_t *vm, int typemask) 1316{ 1317 1318 switch (typemask) { 1319 case VMEM_ALLOC: 1320 return vm->vm_inuse; 1321 case VMEM_FREE: 1322 return vm->vm_size - vm->vm_inuse; 1323 case VMEM_FREE|VMEM_ALLOC: 1324 return vm->vm_size; 1325 default: 1326 panic("vmem_size"); 1327 } 1328} 1329 1330/* ---- debug */ 1331 1332#if defined(DDB) || defined(DIAGNOSTIC) 1333 1334static void bt_dump(const bt_t *, int (*)(const char *, ...) 1335 __printflike(1, 2)); 1336 1337static const char * 1338bt_type_string(int type) 1339{ 1340 1341 switch (type) { 1342 case BT_TYPE_BUSY: 1343 return "busy"; 1344 case BT_TYPE_FREE: 1345 return "free"; 1346 case BT_TYPE_SPAN: 1347 return "span"; 1348 case BT_TYPE_SPAN_STATIC: 1349 return "static span"; 1350 default: 1351 break; 1352 } 1353 return "BOGUS"; 1354} 1355 1356static void 1357bt_dump(const bt_t *bt, int (*pr)(const char *, ...)) 1358{ 1359 1360 (*pr)("\t%p: %jx %jx, %d(%s)\n", 1361 bt, (intmax_t)bt->bt_start, (intmax_t)bt->bt_size, 1362 bt->bt_type, bt_type_string(bt->bt_type)); 1363} 1364 1365static void 1366vmem_dump(const vmem_t *vm , int (*pr)(const char *, ...) __printflike(1, 2)) 1367{ 1368 const bt_t *bt; 1369 int i; 1370 1371 (*pr)("vmem %p '%s'\n", vm, vm->vm_name); 1372 TAILQ_FOREACH(bt, &vm->vm_seglist, bt_seglist) { 1373 bt_dump(bt, pr); 1374 } 1375 1376 for (i = 0; i < VMEM_MAXORDER; i++) { 1377 const struct vmem_freelist *fl = &vm->vm_freelist[i]; 1378 1379 if (LIST_EMPTY(fl)) { 1380 continue; 1381 } 1382 1383 (*pr)("freelist[%d]\n", i); 1384 LIST_FOREACH(bt, fl, bt_freelist) { 1385 bt_dump(bt, pr); 1386 } 1387 } 1388} 1389 1390#endif /* defined(DDB) || defined(DIAGNOSTIC) */ 1391 1392#if defined(DDB) 1393#include <ddb/ddb.h> 1394 1395static bt_t * 1396vmem_whatis_lookup(vmem_t *vm, vmem_addr_t addr) 1397{ 1398 bt_t *bt; 1399 1400 TAILQ_FOREACH(bt, &vm->vm_seglist, bt_seglist) { 1401 if (BT_ISSPAN_P(bt)) { 1402 continue; 1403 } 1404 if (bt->bt_start <= addr && addr <= BT_END(bt)) { 1405 return bt; 1406 } 1407 } 1408 1409 return NULL; 1410} 1411 1412void 1413vmem_whatis(vmem_addr_t addr, int (*pr)(const char *, ...)) 1414{ 1415 vmem_t *vm; 1416 1417 LIST_FOREACH(vm, &vmem_list, vm_alllist) { 1418 bt_t *bt; 1419 1420 bt = vmem_whatis_lookup(vm, addr); 1421 if (bt == NULL) { 1422 continue; 1423 } 1424 (*pr)("%p is %p+%zu in VMEM '%s' (%s)\n", 1425 (void *)addr, (void *)bt->bt_start, 1426 (vmem_size_t)(addr - bt->bt_start), vm->vm_name, 1427 (bt->bt_type == BT_TYPE_BUSY) ? "allocated" : "free"); 1428 } 1429} 1430 1431void 1432vmem_printall(const char *modif, int (*pr)(const char *, ...)) 1433{ 1434 const vmem_t *vm; 1435 1436 LIST_FOREACH(vm, &vmem_list, vm_alllist) { 1437 vmem_dump(vm, pr); 1438 } 1439} 1440 1441void 1442vmem_print(vmem_addr_t addr, const char *modif, int (*pr)(const char *, ...)) 1443{ 1444 const vmem_t *vm = (const void *)addr; 1445 1446 vmem_dump(vm, pr); 1447} 1448 1449DB_SHOW_COMMAND(vmemdump, vmemdump) 1450{ 1451 1452 if (!have_addr) { 1453 db_printf("usage: show vmemdump <addr>\n"); 1454 return; 1455 } 1456 1457 vmem_dump((const vmem_t *)addr, db_printf); 1458} 1459 1460DB_SHOW_ALL_COMMAND(vmemdump, vmemdumpall) 1461{ 1462 const vmem_t *vm; 1463 1464 LIST_FOREACH(vm, &vmem_list, vm_alllist) 1465 vmem_dump(vm, db_printf); 1466} 1467 1468DB_SHOW_COMMAND(vmem, vmem_summ) 1469{ 1470 const vmem_t *vm = (const void *)addr; 1471 const bt_t *bt; 1472 size_t ft[VMEM_MAXORDER], ut[VMEM_MAXORDER]; 1473 size_t fs[VMEM_MAXORDER], us[VMEM_MAXORDER]; 1474 int ord; 1475 1476 if (!have_addr) { 1477 db_printf("usage: show vmem <addr>\n"); 1478 return; 1479 } 1480 1481 db_printf("vmem %p '%s'\n", vm, vm->vm_name); 1482 db_printf("\tquantum:\t%zu\n", vm->vm_quantum_mask + 1); 1483 db_printf("\tsize:\t%zu\n", vm->vm_size); 1484 db_printf("\tinuse:\t%zu\n", vm->vm_inuse); 1485 db_printf("\tfree:\t%zu\n", vm->vm_size - vm->vm_inuse); 1486 db_printf("\tbusy tags:\t%d\n", vm->vm_nbusytag); 1487 db_printf("\tfree tags:\t%d\n", vm->vm_nfreetags); 1488 1489 memset(&ft, 0, sizeof(ft)); 1490 memset(&ut, 0, sizeof(ut)); 1491 memset(&fs, 0, sizeof(fs)); 1492 memset(&us, 0, sizeof(us)); 1493 TAILQ_FOREACH(bt, &vm->vm_seglist, bt_seglist) { 1494 ord = SIZE2ORDER(bt->bt_size >> vm->vm_quantum_shift); 1495 if (bt->bt_type == BT_TYPE_BUSY) { 1496 ut[ord]++; 1497 us[ord] += bt->bt_size; 1498 } else if (bt->bt_type == BT_TYPE_FREE) { 1499 ft[ord]++; 1500 fs[ord] += bt->bt_size; 1501 } 1502 } 1503 db_printf("\t\t\tinuse\tsize\t\tfree\tsize\n"); 1504 for (ord = 0; ord < VMEM_MAXORDER; ord++) { 1505 if (ut[ord] == 0 && ft[ord] == 0) 1506 continue; 1507 db_printf("\t%-15zu %zu\t%-15zu %zu\t%-16zu\n", 1508 ORDER2SIZE(ord) << vm->vm_quantum_shift, 1509 ut[ord], us[ord], ft[ord], fs[ord]); 1510 } 1511} 1512 1513DB_SHOW_ALL_COMMAND(vmem, vmem_summall) 1514{ 1515 const vmem_t *vm; 1516 1517 LIST_FOREACH(vm, &vmem_list, vm_alllist) 1518 vmem_summ((db_expr_t)vm, TRUE, count, modif); 1519} 1520#endif /* defined(DDB) */ 1521 1522#define vmem_printf printf 1523 1524#if defined(DIAGNOSTIC) 1525 1526static bool 1527vmem_check_sanity(vmem_t *vm) 1528{ 1529 const bt_t *bt, *bt2; 1530 1531 MPASS(vm != NULL); 1532 1533 TAILQ_FOREACH(bt, &vm->vm_seglist, bt_seglist) { 1534 if (bt->bt_start > BT_END(bt)) { 1535 printf("corrupted tag\n"); 1536 bt_dump(bt, vmem_printf); 1537 return false; 1538 } 1539 } 1540 TAILQ_FOREACH(bt, &vm->vm_seglist, bt_seglist) { 1541 TAILQ_FOREACH(bt2, &vm->vm_seglist, bt_seglist) { 1542 if (bt == bt2) { 1543 continue; 1544 } 1545 if (BT_ISSPAN_P(bt) != BT_ISSPAN_P(bt2)) { 1546 continue; 1547 } 1548 if (bt->bt_start <= BT_END(bt2) && 1549 bt2->bt_start <= BT_END(bt)) { 1550 printf("overwrapped tags\n"); 1551 bt_dump(bt, vmem_printf); 1552 bt_dump(bt2, vmem_printf); 1553 return false; 1554 } 1555 } 1556 } 1557 1558 return true; 1559} 1560 1561static void 1562vmem_check(vmem_t *vm) 1563{ 1564 1565 if (!vmem_check_sanity(vm)) { 1566 panic("insanity vmem %p", vm); 1567 } 1568} 1569 1570#endif /* defined(DIAGNOSTIC) */ 1571