subr_vmem.c revision 252330
1252330Sjeff/*- 2252330Sjeff * Copyright (c)2006,2007,2008,2009 YAMAMOTO Takashi, 3252330Sjeff * Copyright (c) 2013 EMC Corp. 4252330Sjeff * All rights reserved. 5252330Sjeff * 6252330Sjeff * Redistribution and use in source and binary forms, with or without 7252330Sjeff * modification, are permitted provided that the following conditions 8252330Sjeff * are met: 9252330Sjeff * 1. Redistributions of source code must retain the above copyright 10252330Sjeff * notice, this list of conditions and the following disclaimer. 11252330Sjeff * 2. Redistributions in binary form must reproduce the above copyright 12252330Sjeff * notice, this list of conditions and the following disclaimer in the 13252330Sjeff * documentation and/or other materials provided with the distribution. 14252330Sjeff * 15252330Sjeff * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND 16252330Sjeff * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 17252330Sjeff * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 18252330Sjeff * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE 19252330Sjeff * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 20252330Sjeff * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 21252330Sjeff * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 22252330Sjeff * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 23252330Sjeff * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 24252330Sjeff * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 25252330Sjeff * SUCH DAMAGE. 26252330Sjeff */ 27252330Sjeff 28252330Sjeff/* 29252330Sjeff * From: 30252330Sjeff * $NetBSD: vmem_impl.h,v 1.2 2013/01/29 21:26:24 para Exp $ 31252330Sjeff * $NetBSD: subr_vmem.c,v 1.83 2013/03/06 11:20:10 yamt Exp $ 32252330Sjeff */ 33252330Sjeff 34252330Sjeff/* 35252330Sjeff * reference: 36252330Sjeff * - Magazines and Vmem: Extending the Slab Allocator 37252330Sjeff * to Many CPUs and Arbitrary Resources 38252330Sjeff * http://www.usenix.org/event/usenix01/bonwick.html 39252330Sjeff */ 40252330Sjeff 41252330Sjeff#include <sys/cdefs.h> 42252330Sjeff__FBSDID("$FreeBSD: head/sys/kern/subr_vmem.c 252330 2013-06-28 03:51:20Z jeff $"); 43252330Sjeff 44252330Sjeff#include "opt_ddb.h" 45252330Sjeff 46252330Sjeff#include <sys/param.h> 47252330Sjeff#include <sys/systm.h> 48252330Sjeff#include <sys/kernel.h> 49252330Sjeff#include <sys/queue.h> 50252330Sjeff#include <sys/callout.h> 51252330Sjeff#include <sys/hash.h> 52252330Sjeff#include <sys/lock.h> 53252330Sjeff#include <sys/malloc.h> 54252330Sjeff#include <sys/mutex.h> 55252330Sjeff#include <sys/smp.h> 56252330Sjeff#include <sys/condvar.h> 57252330Sjeff#include <sys/taskqueue.h> 58252330Sjeff#include <sys/vmem.h> 59252330Sjeff 60252330Sjeff#include <vm/uma.h> 61252330Sjeff#include <vm/vm.h> 62252330Sjeff#include <vm/pmap.h> 63252330Sjeff#include <vm/vm_map.h> 64252330Sjeff#include <vm/vm_kern.h> 65252330Sjeff#include <vm/vm_extern.h> 66252330Sjeff#include <vm/vm_param.h> 67252330Sjeff#include <vm/vm_pageout.h> 68252330Sjeff 69252330Sjeff#define VMEM_MAXORDER (sizeof(vmem_size_t) * NBBY) 70252330Sjeff 71252330Sjeff#define VMEM_HASHSIZE_MIN 16 72252330Sjeff#define VMEM_HASHSIZE_MAX 131072 73252330Sjeff 74252330Sjeff#define VMEM_QCACHE_IDX_MAX 16 75252330Sjeff 76252330Sjeff#define VMEM_FITMASK (M_BESTFIT | M_FIRSTFIT) 77252330Sjeff 78252330Sjeff#define VMEM_FLAGS \ 79252330Sjeff (M_NOWAIT | M_WAITOK | M_USE_RESERVE | M_NOVM | M_BESTFIT | M_FIRSTFIT) 80252330Sjeff 81252330Sjeff#define BT_FLAGS (M_NOWAIT | M_WAITOK | M_USE_RESERVE | M_NOVM) 82252330Sjeff 83252330Sjeff#define QC_NAME_MAX 16 84252330Sjeff 85252330Sjeff/* 86252330Sjeff * Data structures private to vmem. 87252330Sjeff */ 88252330SjeffMALLOC_DEFINE(M_VMEM, "vmem", "vmem internal structures"); 89252330Sjeff 90252330Sjefftypedef struct vmem_btag bt_t; 91252330Sjeff 92252330SjeffTAILQ_HEAD(vmem_seglist, vmem_btag); 93252330SjeffLIST_HEAD(vmem_freelist, vmem_btag); 94252330SjeffLIST_HEAD(vmem_hashlist, vmem_btag); 95252330Sjeff 96252330Sjeffstruct qcache { 97252330Sjeff uma_zone_t qc_cache; 98252330Sjeff vmem_t *qc_vmem; 99252330Sjeff vmem_size_t qc_size; 100252330Sjeff char qc_name[QC_NAME_MAX]; 101252330Sjeff}; 102252330Sjefftypedef struct qcache qcache_t; 103252330Sjeff#define QC_POOL_TO_QCACHE(pool) ((qcache_t *)(pool->pr_qcache)) 104252330Sjeff 105252330Sjeff#define VMEM_NAME_MAX 16 106252330Sjeff 107252330Sjeff/* vmem arena */ 108252330Sjeffstruct vmem { 109252330Sjeff struct mtx_padalign vm_lock; 110252330Sjeff struct cv vm_cv; 111252330Sjeff char vm_name[VMEM_NAME_MAX+1]; 112252330Sjeff LIST_ENTRY(vmem) vm_alllist; 113252330Sjeff struct vmem_hashlist vm_hash0[VMEM_HASHSIZE_MIN]; 114252330Sjeff struct vmem_freelist vm_freelist[VMEM_MAXORDER]; 115252330Sjeff struct vmem_seglist vm_seglist; 116252330Sjeff struct vmem_hashlist *vm_hashlist; 117252330Sjeff vmem_size_t vm_hashsize; 118252330Sjeff 119252330Sjeff /* Constant after init */ 120252330Sjeff vmem_size_t vm_qcache_max; 121252330Sjeff vmem_size_t vm_quantum_mask; 122252330Sjeff vmem_size_t vm_import_quantum; 123252330Sjeff int vm_quantum_shift; 124252330Sjeff 125252330Sjeff /* Written on alloc/free */ 126252330Sjeff LIST_HEAD(, vmem_btag) vm_freetags; 127252330Sjeff int vm_nfreetags; 128252330Sjeff int vm_nbusytag; 129252330Sjeff vmem_size_t vm_inuse; 130252330Sjeff vmem_size_t vm_size; 131252330Sjeff 132252330Sjeff /* Used on import. */ 133252330Sjeff vmem_import_t *vm_importfn; 134252330Sjeff vmem_release_t *vm_releasefn; 135252330Sjeff void *vm_arg; 136252330Sjeff 137252330Sjeff /* Space exhaustion callback. */ 138252330Sjeff vmem_reclaim_t *vm_reclaimfn; 139252330Sjeff 140252330Sjeff /* quantum cache */ 141252330Sjeff qcache_t vm_qcache[VMEM_QCACHE_IDX_MAX]; 142252330Sjeff}; 143252330Sjeff 144252330Sjeff/* boundary tag */ 145252330Sjeffstruct vmem_btag { 146252330Sjeff TAILQ_ENTRY(vmem_btag) bt_seglist; 147252330Sjeff union { 148252330Sjeff LIST_ENTRY(vmem_btag) u_freelist; /* BT_TYPE_FREE */ 149252330Sjeff LIST_ENTRY(vmem_btag) u_hashlist; /* BT_TYPE_BUSY */ 150252330Sjeff } bt_u; 151252330Sjeff#define bt_hashlist bt_u.u_hashlist 152252330Sjeff#define bt_freelist bt_u.u_freelist 153252330Sjeff vmem_addr_t bt_start; 154252330Sjeff vmem_size_t bt_size; 155252330Sjeff int bt_type; 156252330Sjeff}; 157252330Sjeff 158252330Sjeff#define BT_TYPE_SPAN 1 /* Allocated from importfn */ 159252330Sjeff#define BT_TYPE_SPAN_STATIC 2 /* vmem_add() or create. */ 160252330Sjeff#define BT_TYPE_FREE 3 /* Available space. */ 161252330Sjeff#define BT_TYPE_BUSY 4 /* Used space. */ 162252330Sjeff#define BT_ISSPAN_P(bt) ((bt)->bt_type <= BT_TYPE_SPAN_STATIC) 163252330Sjeff 164252330Sjeff#define BT_END(bt) ((bt)->bt_start + (bt)->bt_size - 1) 165252330Sjeff 166252330Sjeff#if defined(DIAGNOSTIC) 167252330Sjeffstatic void vmem_check(vmem_t *); 168252330Sjeff#endif 169252330Sjeff 170252330Sjeffstatic struct callout vmem_periodic_ch; 171252330Sjeffstatic int vmem_periodic_interval; 172252330Sjeffstatic struct task vmem_periodic_wk; 173252330Sjeff 174252330Sjeffstatic struct mtx_padalign vmem_list_lock; 175252330Sjeffstatic LIST_HEAD(, vmem) vmem_list = LIST_HEAD_INITIALIZER(vmem_list); 176252330Sjeff 177252330Sjeff/* ---- misc */ 178252330Sjeff#define VMEM_CONDVAR_INIT(vm, wchan) cv_init(&vm->vm_cv, wchan) 179252330Sjeff#define VMEM_CONDVAR_DESTROY(vm) cv_destroy(&vm->vm_cv) 180252330Sjeff#define VMEM_CONDVAR_WAIT(vm) cv_wait(&vm->vm_cv, &vm->vm_lock) 181252330Sjeff#define VMEM_CONDVAR_BROADCAST(vm) cv_broadcast(&vm->vm_cv) 182252330Sjeff 183252330Sjeff 184252330Sjeff#define VMEM_LOCK(vm) mtx_lock(&vm->vm_lock) 185252330Sjeff#define VMEM_TRYLOCK(vm) mtx_trylock(&vm->vm_lock) 186252330Sjeff#define VMEM_UNLOCK(vm) mtx_unlock(&vm->vm_lock) 187252330Sjeff#define VMEM_LOCK_INIT(vm, name) mtx_init(&vm->vm_lock, (name), NULL, MTX_DEF) 188252330Sjeff#define VMEM_LOCK_DESTROY(vm) mtx_destroy(&vm->vm_lock) 189252330Sjeff#define VMEM_ASSERT_LOCKED(vm) mtx_assert(&vm->vm_lock, MA_OWNED); 190252330Sjeff 191252330Sjeff#define VMEM_ALIGNUP(addr, align) (-(-(addr) & -(align))) 192252330Sjeff 193252330Sjeff#define VMEM_CROSS_P(addr1, addr2, boundary) \ 194252330Sjeff ((((addr1) ^ (addr2)) & -(boundary)) != 0) 195252330Sjeff 196252330Sjeff#define ORDER2SIZE(order) ((vmem_size_t)1 << (order)) 197252330Sjeff#define SIZE2ORDER(size) ((int)flsl(size) - 1) 198252330Sjeff 199252330Sjeff/* 200252330Sjeff * Maximum number of boundary tags that may be required to satisfy an 201252330Sjeff * allocation. Two may be required to import. Another two may be 202252330Sjeff * required to clip edges. 203252330Sjeff */ 204252330Sjeff#define BT_MAXALLOC 4 205252330Sjeff 206252330Sjeff/* 207252330Sjeff * Max free limits the number of locally cached boundary tags. We 208252330Sjeff * just want to avoid hitting the zone allocator for every call. 209252330Sjeff */ 210252330Sjeff#define BT_MAXFREE (BT_MAXALLOC * 8) 211252330Sjeff 212252330Sjeff/* Allocator for boundary tags. */ 213252330Sjeffstatic uma_zone_t vmem_bt_zone; 214252330Sjeff 215252330Sjeff/* boot time arena storage. */ 216252330Sjeffstatic struct vmem buffer_arena_storage; 217252330Sjeffstatic struct vmem transient_arena_storage; 218252330Sjeffvmem_t *buffer_arena = &buffer_arena_storage; 219252330Sjeffvmem_t *transient_arena = &transient_arena_storage; 220252330Sjeff 221252330Sjeff/* 222252330Sjeff * Fill the vmem's boundary tag cache. We guarantee that boundary tag 223252330Sjeff * allocation will not fail once bt_fill() passes. To do so we cache 224252330Sjeff * at least the maximum possible tag allocations in the arena. 225252330Sjeff */ 226252330Sjeffstatic int 227252330Sjeffbt_fill(vmem_t *vm, int flags) 228252330Sjeff{ 229252330Sjeff bt_t *bt; 230252330Sjeff 231252330Sjeff VMEM_ASSERT_LOCKED(vm); 232252330Sjeff 233252330Sjeff /* 234252330Sjeff * Loop until we meet the reserve. To minimize the lock shuffle 235252330Sjeff * and prevent simultaneous fills we first try a NOWAIT regardless 236252330Sjeff * of the caller's flags. Specify M_NOVM so we don't recurse while 237252330Sjeff * holding a vmem lock. 238252330Sjeff */ 239252330Sjeff while (vm->vm_nfreetags < BT_MAXALLOC) { 240252330Sjeff bt = uma_zalloc(vmem_bt_zone, 241252330Sjeff (flags & M_USE_RESERVE) | M_NOWAIT | M_NOVM); 242252330Sjeff if (bt == NULL) { 243252330Sjeff VMEM_UNLOCK(vm); 244252330Sjeff bt = uma_zalloc(vmem_bt_zone, flags); 245252330Sjeff VMEM_LOCK(vm); 246252330Sjeff if (bt == NULL && (flags & M_NOWAIT) != 0) 247252330Sjeff break; 248252330Sjeff } 249252330Sjeff LIST_INSERT_HEAD(&vm->vm_freetags, bt, bt_freelist); 250252330Sjeff vm->vm_nfreetags++; 251252330Sjeff } 252252330Sjeff 253252330Sjeff if (vm->vm_nfreetags < BT_MAXALLOC) 254252330Sjeff return ENOMEM; 255252330Sjeff 256252330Sjeff return 0; 257252330Sjeff} 258252330Sjeff 259252330Sjeff/* 260252330Sjeff * Pop a tag off of the freetag stack. 261252330Sjeff */ 262252330Sjeffstatic bt_t * 263252330Sjeffbt_alloc(vmem_t *vm) 264252330Sjeff{ 265252330Sjeff bt_t *bt; 266252330Sjeff 267252330Sjeff VMEM_ASSERT_LOCKED(vm); 268252330Sjeff bt = LIST_FIRST(&vm->vm_freetags); 269252330Sjeff MPASS(bt != NULL); 270252330Sjeff LIST_REMOVE(bt, bt_freelist); 271252330Sjeff vm->vm_nfreetags--; 272252330Sjeff 273252330Sjeff return bt; 274252330Sjeff} 275252330Sjeff 276252330Sjeff/* 277252330Sjeff * Trim the per-vmem free list. Returns with the lock released to 278252330Sjeff * avoid allocator recursions. 279252330Sjeff */ 280252330Sjeffstatic void 281252330Sjeffbt_freetrim(vmem_t *vm, int freelimit) 282252330Sjeff{ 283252330Sjeff LIST_HEAD(, vmem_btag) freetags; 284252330Sjeff bt_t *bt; 285252330Sjeff 286252330Sjeff LIST_INIT(&freetags); 287252330Sjeff VMEM_ASSERT_LOCKED(vm); 288252330Sjeff while (vm->vm_nfreetags > freelimit) { 289252330Sjeff bt = LIST_FIRST(&vm->vm_freetags); 290252330Sjeff LIST_REMOVE(bt, bt_freelist); 291252330Sjeff vm->vm_nfreetags--; 292252330Sjeff LIST_INSERT_HEAD(&freetags, bt, bt_freelist); 293252330Sjeff } 294252330Sjeff VMEM_UNLOCK(vm); 295252330Sjeff while ((bt = LIST_FIRST(&freetags)) != NULL) { 296252330Sjeff LIST_REMOVE(bt, bt_freelist); 297252330Sjeff uma_zfree(vmem_bt_zone, bt); 298252330Sjeff } 299252330Sjeff} 300252330Sjeff 301252330Sjeffstatic inline void 302252330Sjeffbt_free(vmem_t *vm, bt_t *bt) 303252330Sjeff{ 304252330Sjeff 305252330Sjeff VMEM_ASSERT_LOCKED(vm); 306252330Sjeff MPASS(LIST_FIRST(&vm->vm_freetags) != bt); 307252330Sjeff LIST_INSERT_HEAD(&vm->vm_freetags, bt, bt_freelist); 308252330Sjeff vm->vm_nfreetags++; 309252330Sjeff} 310252330Sjeff 311252330Sjeff/* 312252330Sjeff * freelist[0] ... [1, 1] 313252330Sjeff * freelist[1] ... [2, 3] 314252330Sjeff * freelist[2] ... [4, 7] 315252330Sjeff * freelist[3] ... [8, 15] 316252330Sjeff * : 317252330Sjeff * freelist[n] ... [(1 << n), (1 << (n + 1)) - 1] 318252330Sjeff * : 319252330Sjeff */ 320252330Sjeff 321252330Sjeffstatic struct vmem_freelist * 322252330Sjeffbt_freehead_tofree(vmem_t *vm, vmem_size_t size) 323252330Sjeff{ 324252330Sjeff const vmem_size_t qsize = size >> vm->vm_quantum_shift; 325252330Sjeff const int idx = SIZE2ORDER(qsize); 326252330Sjeff 327252330Sjeff MPASS(size != 0 && qsize != 0); 328252330Sjeff MPASS((size & vm->vm_quantum_mask) == 0); 329252330Sjeff MPASS(idx >= 0); 330252330Sjeff MPASS(idx < VMEM_MAXORDER); 331252330Sjeff 332252330Sjeff return &vm->vm_freelist[idx]; 333252330Sjeff} 334252330Sjeff 335252330Sjeff/* 336252330Sjeff * bt_freehead_toalloc: return the freelist for the given size and allocation 337252330Sjeff * strategy. 338252330Sjeff * 339252330Sjeff * For M_FIRSTFIT, return the list in which any blocks are large enough 340252330Sjeff * for the requested size. otherwise, return the list which can have blocks 341252330Sjeff * large enough for the requested size. 342252330Sjeff */ 343252330Sjeffstatic struct vmem_freelist * 344252330Sjeffbt_freehead_toalloc(vmem_t *vm, vmem_size_t size, int strat) 345252330Sjeff{ 346252330Sjeff const vmem_size_t qsize = size >> vm->vm_quantum_shift; 347252330Sjeff int idx = SIZE2ORDER(qsize); 348252330Sjeff 349252330Sjeff MPASS(size != 0 && qsize != 0); 350252330Sjeff MPASS((size & vm->vm_quantum_mask) == 0); 351252330Sjeff 352252330Sjeff if (strat == M_FIRSTFIT && ORDER2SIZE(idx) != qsize) { 353252330Sjeff idx++; 354252330Sjeff /* check too large request? */ 355252330Sjeff } 356252330Sjeff MPASS(idx >= 0); 357252330Sjeff MPASS(idx < VMEM_MAXORDER); 358252330Sjeff 359252330Sjeff return &vm->vm_freelist[idx]; 360252330Sjeff} 361252330Sjeff 362252330Sjeff/* ---- boundary tag hash */ 363252330Sjeff 364252330Sjeffstatic struct vmem_hashlist * 365252330Sjeffbt_hashhead(vmem_t *vm, vmem_addr_t addr) 366252330Sjeff{ 367252330Sjeff struct vmem_hashlist *list; 368252330Sjeff unsigned int hash; 369252330Sjeff 370252330Sjeff hash = hash32_buf(&addr, sizeof(addr), 0); 371252330Sjeff list = &vm->vm_hashlist[hash % vm->vm_hashsize]; 372252330Sjeff 373252330Sjeff return list; 374252330Sjeff} 375252330Sjeff 376252330Sjeffstatic bt_t * 377252330Sjeffbt_lookupbusy(vmem_t *vm, vmem_addr_t addr) 378252330Sjeff{ 379252330Sjeff struct vmem_hashlist *list; 380252330Sjeff bt_t *bt; 381252330Sjeff 382252330Sjeff VMEM_ASSERT_LOCKED(vm); 383252330Sjeff list = bt_hashhead(vm, addr); 384252330Sjeff LIST_FOREACH(bt, list, bt_hashlist) { 385252330Sjeff if (bt->bt_start == addr) { 386252330Sjeff break; 387252330Sjeff } 388252330Sjeff } 389252330Sjeff 390252330Sjeff return bt; 391252330Sjeff} 392252330Sjeff 393252330Sjeffstatic void 394252330Sjeffbt_rembusy(vmem_t *vm, bt_t *bt) 395252330Sjeff{ 396252330Sjeff 397252330Sjeff VMEM_ASSERT_LOCKED(vm); 398252330Sjeff MPASS(vm->vm_nbusytag > 0); 399252330Sjeff vm->vm_inuse -= bt->bt_size; 400252330Sjeff vm->vm_nbusytag--; 401252330Sjeff LIST_REMOVE(bt, bt_hashlist); 402252330Sjeff} 403252330Sjeff 404252330Sjeffstatic void 405252330Sjeffbt_insbusy(vmem_t *vm, bt_t *bt) 406252330Sjeff{ 407252330Sjeff struct vmem_hashlist *list; 408252330Sjeff 409252330Sjeff VMEM_ASSERT_LOCKED(vm); 410252330Sjeff MPASS(bt->bt_type == BT_TYPE_BUSY); 411252330Sjeff 412252330Sjeff list = bt_hashhead(vm, bt->bt_start); 413252330Sjeff LIST_INSERT_HEAD(list, bt, bt_hashlist); 414252330Sjeff vm->vm_nbusytag++; 415252330Sjeff vm->vm_inuse += bt->bt_size; 416252330Sjeff} 417252330Sjeff 418252330Sjeff/* ---- boundary tag list */ 419252330Sjeff 420252330Sjeffstatic void 421252330Sjeffbt_remseg(vmem_t *vm, bt_t *bt) 422252330Sjeff{ 423252330Sjeff 424252330Sjeff TAILQ_REMOVE(&vm->vm_seglist, bt, bt_seglist); 425252330Sjeff bt_free(vm, bt); 426252330Sjeff} 427252330Sjeff 428252330Sjeffstatic void 429252330Sjeffbt_insseg(vmem_t *vm, bt_t *bt, bt_t *prev) 430252330Sjeff{ 431252330Sjeff 432252330Sjeff TAILQ_INSERT_AFTER(&vm->vm_seglist, prev, bt, bt_seglist); 433252330Sjeff} 434252330Sjeff 435252330Sjeffstatic void 436252330Sjeffbt_insseg_tail(vmem_t *vm, bt_t *bt) 437252330Sjeff{ 438252330Sjeff 439252330Sjeff TAILQ_INSERT_TAIL(&vm->vm_seglist, bt, bt_seglist); 440252330Sjeff} 441252330Sjeff 442252330Sjeffstatic void 443252330Sjeffbt_remfree(vmem_t *vm, bt_t *bt) 444252330Sjeff{ 445252330Sjeff 446252330Sjeff MPASS(bt->bt_type == BT_TYPE_FREE); 447252330Sjeff 448252330Sjeff LIST_REMOVE(bt, bt_freelist); 449252330Sjeff} 450252330Sjeff 451252330Sjeffstatic void 452252330Sjeffbt_insfree(vmem_t *vm, bt_t *bt) 453252330Sjeff{ 454252330Sjeff struct vmem_freelist *list; 455252330Sjeff 456252330Sjeff list = bt_freehead_tofree(vm, bt->bt_size); 457252330Sjeff LIST_INSERT_HEAD(list, bt, bt_freelist); 458252330Sjeff} 459252330Sjeff 460252330Sjeff/* ---- vmem internal functions */ 461252330Sjeff 462252330Sjeff/* 463252330Sjeff * Import from the arena into the quantum cache in UMA. 464252330Sjeff */ 465252330Sjeffstatic int 466252330Sjeffqc_import(void *arg, void **store, int cnt, int flags) 467252330Sjeff{ 468252330Sjeff qcache_t *qc; 469252330Sjeff vmem_addr_t addr; 470252330Sjeff int i; 471252330Sjeff 472252330Sjeff qc = arg; 473252330Sjeff flags |= M_BESTFIT; 474252330Sjeff for (i = 0; i < cnt; i++) { 475252330Sjeff if (vmem_xalloc(qc->qc_vmem, qc->qc_size, 0, 0, 0, 476252330Sjeff VMEM_ADDR_MIN, VMEM_ADDR_MAX, flags, &addr) != 0) 477252330Sjeff break; 478252330Sjeff store[i] = (void *)addr; 479252330Sjeff /* Only guarantee one allocation. */ 480252330Sjeff flags &= ~M_WAITOK; 481252330Sjeff flags |= M_NOWAIT; 482252330Sjeff } 483252330Sjeff return i; 484252330Sjeff} 485252330Sjeff 486252330Sjeff/* 487252330Sjeff * Release memory from the UMA cache to the arena. 488252330Sjeff */ 489252330Sjeffstatic void 490252330Sjeffqc_release(void *arg, void **store, int cnt) 491252330Sjeff{ 492252330Sjeff qcache_t *qc; 493252330Sjeff int i; 494252330Sjeff 495252330Sjeff qc = arg; 496252330Sjeff for (i = 0; i < cnt; i++) 497252330Sjeff vmem_xfree(qc->qc_vmem, (vmem_addr_t)store[i], qc->qc_size); 498252330Sjeff} 499252330Sjeff 500252330Sjeffstatic void 501252330Sjeffqc_init(vmem_t *vm, vmem_size_t qcache_max) 502252330Sjeff{ 503252330Sjeff qcache_t *qc; 504252330Sjeff vmem_size_t size; 505252330Sjeff int qcache_idx_max; 506252330Sjeff int i; 507252330Sjeff 508252330Sjeff MPASS((qcache_max & vm->vm_quantum_mask) == 0); 509252330Sjeff qcache_idx_max = MIN(qcache_max >> vm->vm_quantum_shift, 510252330Sjeff VMEM_QCACHE_IDX_MAX); 511252330Sjeff vm->vm_qcache_max = qcache_idx_max << vm->vm_quantum_shift; 512252330Sjeff for (i = 0; i < qcache_idx_max; i++) { 513252330Sjeff qc = &vm->vm_qcache[i]; 514252330Sjeff size = (i + 1) << vm->vm_quantum_shift; 515252330Sjeff snprintf(qc->qc_name, sizeof(qc->qc_name), "%s-%zu", 516252330Sjeff vm->vm_name, size); 517252330Sjeff qc->qc_vmem = vm; 518252330Sjeff qc->qc_size = size; 519252330Sjeff qc->qc_cache = uma_zcache_create(qc->qc_name, size, 520252330Sjeff NULL, NULL, NULL, NULL, qc_import, qc_release, qc, 521252330Sjeff UMA_ZONE_VM); 522252330Sjeff MPASS(qc->qc_cache); 523252330Sjeff } 524252330Sjeff} 525252330Sjeff 526252330Sjeffstatic void 527252330Sjeffqc_destroy(vmem_t *vm) 528252330Sjeff{ 529252330Sjeff int qcache_idx_max; 530252330Sjeff int i; 531252330Sjeff 532252330Sjeff qcache_idx_max = vm->vm_qcache_max >> vm->vm_quantum_shift; 533252330Sjeff for (i = 0; i < qcache_idx_max; i++) 534252330Sjeff uma_zdestroy(vm->vm_qcache[i].qc_cache); 535252330Sjeff} 536252330Sjeff 537252330Sjeffstatic void 538252330Sjeffqc_drain(vmem_t *vm) 539252330Sjeff{ 540252330Sjeff int qcache_idx_max; 541252330Sjeff int i; 542252330Sjeff 543252330Sjeff qcache_idx_max = vm->vm_qcache_max >> vm->vm_quantum_shift; 544252330Sjeff for (i = 0; i < qcache_idx_max; i++) 545252330Sjeff zone_drain(vm->vm_qcache[i].qc_cache); 546252330Sjeff} 547252330Sjeff 548252330Sjeffvoid 549252330Sjeffvmem_startup(void) 550252330Sjeff{ 551252330Sjeff 552252330Sjeff mtx_init(&vmem_list_lock, "vmem list lock", NULL, MTX_DEF); 553252330Sjeff vmem_bt_zone = uma_zcreate("vmem btag", 554252330Sjeff sizeof(struct vmem_btag), NULL, NULL, NULL, NULL, 555252330Sjeff UMA_ALIGN_PTR, UMA_ZONE_VM); 556252330Sjeff} 557252330Sjeff 558252330Sjeff/* ---- rehash */ 559252330Sjeff 560252330Sjeffstatic int 561252330Sjeffvmem_rehash(vmem_t *vm, vmem_size_t newhashsize) 562252330Sjeff{ 563252330Sjeff bt_t *bt; 564252330Sjeff int i; 565252330Sjeff struct vmem_hashlist *newhashlist; 566252330Sjeff struct vmem_hashlist *oldhashlist; 567252330Sjeff vmem_size_t oldhashsize; 568252330Sjeff 569252330Sjeff MPASS(newhashsize > 0); 570252330Sjeff 571252330Sjeff newhashlist = malloc(sizeof(struct vmem_hashlist) * newhashsize, 572252330Sjeff M_VMEM, M_NOWAIT); 573252330Sjeff if (newhashlist == NULL) 574252330Sjeff return ENOMEM; 575252330Sjeff for (i = 0; i < newhashsize; i++) { 576252330Sjeff LIST_INIT(&newhashlist[i]); 577252330Sjeff } 578252330Sjeff 579252330Sjeff VMEM_LOCK(vm); 580252330Sjeff oldhashlist = vm->vm_hashlist; 581252330Sjeff oldhashsize = vm->vm_hashsize; 582252330Sjeff vm->vm_hashlist = newhashlist; 583252330Sjeff vm->vm_hashsize = newhashsize; 584252330Sjeff if (oldhashlist == NULL) { 585252330Sjeff VMEM_UNLOCK(vm); 586252330Sjeff return 0; 587252330Sjeff } 588252330Sjeff for (i = 0; i < oldhashsize; i++) { 589252330Sjeff while ((bt = LIST_FIRST(&oldhashlist[i])) != NULL) { 590252330Sjeff bt_rembusy(vm, bt); 591252330Sjeff bt_insbusy(vm, bt); 592252330Sjeff } 593252330Sjeff } 594252330Sjeff VMEM_UNLOCK(vm); 595252330Sjeff 596252330Sjeff if (oldhashlist != vm->vm_hash0) { 597252330Sjeff free(oldhashlist, M_VMEM); 598252330Sjeff } 599252330Sjeff 600252330Sjeff return 0; 601252330Sjeff} 602252330Sjeff 603252330Sjeffstatic void 604252330Sjeffvmem_periodic_kick(void *dummy) 605252330Sjeff{ 606252330Sjeff 607252330Sjeff taskqueue_enqueue(taskqueue_thread, &vmem_periodic_wk); 608252330Sjeff} 609252330Sjeff 610252330Sjeffstatic void 611252330Sjeffvmem_periodic(void *unused, int pending) 612252330Sjeff{ 613252330Sjeff vmem_t *vm; 614252330Sjeff vmem_size_t desired; 615252330Sjeff vmem_size_t current; 616252330Sjeff 617252330Sjeff mtx_lock(&vmem_list_lock); 618252330Sjeff LIST_FOREACH(vm, &vmem_list, vm_alllist) { 619252330Sjeff#ifdef DIAGNOSTIC 620252330Sjeff /* Convenient time to verify vmem state. */ 621252330Sjeff VMEM_LOCK(vm); 622252330Sjeff vmem_check(vm); 623252330Sjeff VMEM_UNLOCK(vm); 624252330Sjeff#endif 625252330Sjeff desired = 1 << flsl(vm->vm_nbusytag); 626252330Sjeff desired = MIN(MAX(desired, VMEM_HASHSIZE_MIN), 627252330Sjeff VMEM_HASHSIZE_MAX); 628252330Sjeff current = vm->vm_hashsize; 629252330Sjeff 630252330Sjeff /* Grow in powers of two. Shrink less aggressively. */ 631252330Sjeff if (desired >= current * 2 || desired * 4 <= current) 632252330Sjeff vmem_rehash(vm, desired); 633252330Sjeff } 634252330Sjeff mtx_unlock(&vmem_list_lock); 635252330Sjeff 636252330Sjeff callout_reset(&vmem_periodic_ch, vmem_periodic_interval, 637252330Sjeff vmem_periodic_kick, NULL); 638252330Sjeff} 639252330Sjeff 640252330Sjeffstatic void 641252330Sjeffvmem_start_callout(void *unused) 642252330Sjeff{ 643252330Sjeff 644252330Sjeff TASK_INIT(&vmem_periodic_wk, 0, vmem_periodic, NULL); 645252330Sjeff vmem_periodic_interval = hz * 10; 646252330Sjeff callout_init(&vmem_periodic_ch, CALLOUT_MPSAFE); 647252330Sjeff callout_reset(&vmem_periodic_ch, vmem_periodic_interval, 648252330Sjeff vmem_periodic_kick, NULL); 649252330Sjeff} 650252330SjeffSYSINIT(vfs, SI_SUB_CONFIGURE, SI_ORDER_ANY, vmem_start_callout, NULL); 651252330Sjeff 652252330Sjeffstatic void 653252330Sjeffvmem_add1(vmem_t *vm, vmem_addr_t addr, vmem_size_t size, int flags, int type) 654252330Sjeff{ 655252330Sjeff bt_t *btspan; 656252330Sjeff bt_t *btfree; 657252330Sjeff 658252330Sjeff MPASS(type == BT_TYPE_SPAN || type == BT_TYPE_SPAN_STATIC); 659252330Sjeff 660252330Sjeff btspan = bt_alloc(vm); 661252330Sjeff btspan->bt_type = type; 662252330Sjeff btspan->bt_start = addr; 663252330Sjeff btspan->bt_size = size; 664252330Sjeff 665252330Sjeff btfree = bt_alloc(vm); 666252330Sjeff btfree->bt_type = BT_TYPE_FREE; 667252330Sjeff btfree->bt_start = addr; 668252330Sjeff btfree->bt_size = size; 669252330Sjeff 670252330Sjeff bt_insseg_tail(vm, btspan); 671252330Sjeff bt_insseg(vm, btfree, btspan); 672252330Sjeff bt_insfree(vm, btfree); 673252330Sjeff vm->vm_size += size; 674252330Sjeff} 675252330Sjeff 676252330Sjeffstatic void 677252330Sjeffvmem_destroy1(vmem_t *vm) 678252330Sjeff{ 679252330Sjeff bt_t *bt; 680252330Sjeff 681252330Sjeff /* 682252330Sjeff * Drain per-cpu quantum caches. 683252330Sjeff */ 684252330Sjeff qc_destroy(vm); 685252330Sjeff 686252330Sjeff /* 687252330Sjeff * The vmem should now only contain empty segments. 688252330Sjeff */ 689252330Sjeff VMEM_LOCK(vm); 690252330Sjeff MPASS(vm->vm_nbusytag == 0); 691252330Sjeff 692252330Sjeff while ((bt = TAILQ_FIRST(&vm->vm_seglist)) != NULL) 693252330Sjeff bt_remseg(vm, bt); 694252330Sjeff 695252330Sjeff if (vm->vm_hashlist != NULL && vm->vm_hashlist != vm->vm_hash0) 696252330Sjeff free(vm->vm_hashlist, M_VMEM); 697252330Sjeff 698252330Sjeff bt_freetrim(vm, 0); 699252330Sjeff 700252330Sjeff VMEM_CONDVAR_DESTROY(vm); 701252330Sjeff VMEM_LOCK_DESTROY(vm); 702252330Sjeff free(vm, M_VMEM); 703252330Sjeff} 704252330Sjeff 705252330Sjeffstatic int 706252330Sjeffvmem_import(vmem_t *vm, vmem_size_t size, int flags) 707252330Sjeff{ 708252330Sjeff vmem_addr_t addr; 709252330Sjeff int error; 710252330Sjeff 711252330Sjeff if (vm->vm_importfn == NULL) 712252330Sjeff return EINVAL; 713252330Sjeff 714252330Sjeff size = roundup(size, vm->vm_import_quantum); 715252330Sjeff 716252330Sjeff /* 717252330Sjeff * Hide MAXALLOC tags so we're guaranteed to be able to add this 718252330Sjeff * span and the tag we want to allocate from it. 719252330Sjeff */ 720252330Sjeff MPASS(vm->vm_nfreetags >= BT_MAXALLOC); 721252330Sjeff vm->vm_nfreetags -= BT_MAXALLOC; 722252330Sjeff VMEM_UNLOCK(vm); 723252330Sjeff error = (vm->vm_importfn)(vm->vm_arg, size, flags, &addr); 724252330Sjeff VMEM_LOCK(vm); 725252330Sjeff vm->vm_nfreetags += BT_MAXALLOC; 726252330Sjeff if (error) 727252330Sjeff return ENOMEM; 728252330Sjeff 729252330Sjeff vmem_add1(vm, addr, size, flags, BT_TYPE_SPAN); 730252330Sjeff 731252330Sjeff return 0; 732252330Sjeff} 733252330Sjeff 734252330Sjeff/* 735252330Sjeff * vmem_fit: check if a bt can satisfy the given restrictions. 736252330Sjeff * 737252330Sjeff * it's a caller's responsibility to ensure the region is big enough 738252330Sjeff * before calling us. 739252330Sjeff */ 740252330Sjeffstatic int 741252330Sjeffvmem_fit(const bt_t *bt, vmem_size_t size, vmem_size_t align, 742252330Sjeff vmem_size_t phase, vmem_size_t nocross, vmem_addr_t minaddr, 743252330Sjeff vmem_addr_t maxaddr, vmem_addr_t *addrp) 744252330Sjeff{ 745252330Sjeff vmem_addr_t start; 746252330Sjeff vmem_addr_t end; 747252330Sjeff 748252330Sjeff MPASS(size > 0); 749252330Sjeff MPASS(bt->bt_size >= size); /* caller's responsibility */ 750252330Sjeff 751252330Sjeff /* 752252330Sjeff * XXX assumption: vmem_addr_t and vmem_size_t are 753252330Sjeff * unsigned integer of the same size. 754252330Sjeff */ 755252330Sjeff 756252330Sjeff start = bt->bt_start; 757252330Sjeff if (start < minaddr) { 758252330Sjeff start = minaddr; 759252330Sjeff } 760252330Sjeff end = BT_END(bt); 761252330Sjeff if (end > maxaddr) 762252330Sjeff end = maxaddr; 763252330Sjeff if (start > end) 764252330Sjeff return (ENOMEM); 765252330Sjeff 766252330Sjeff start = VMEM_ALIGNUP(start - phase, align) + phase; 767252330Sjeff if (start < bt->bt_start) 768252330Sjeff start += align; 769252330Sjeff if (VMEM_CROSS_P(start, start + size - 1, nocross)) { 770252330Sjeff MPASS(align < nocross); 771252330Sjeff start = VMEM_ALIGNUP(start - phase, nocross) + phase; 772252330Sjeff } 773252330Sjeff if (start <= end && end - start >= size - 1) { 774252330Sjeff MPASS((start & (align - 1)) == phase); 775252330Sjeff MPASS(!VMEM_CROSS_P(start, start + size - 1, nocross)); 776252330Sjeff MPASS(minaddr <= start); 777252330Sjeff MPASS(maxaddr == 0 || start + size - 1 <= maxaddr); 778252330Sjeff MPASS(bt->bt_start <= start); 779252330Sjeff MPASS(BT_END(bt) - start >= size - 1); 780252330Sjeff *addrp = start; 781252330Sjeff 782252330Sjeff return (0); 783252330Sjeff } 784252330Sjeff return (ENOMEM); 785252330Sjeff} 786252330Sjeff 787252330Sjeff/* 788252330Sjeff * vmem_clip: Trim the boundary tag edges to the requested start and size. 789252330Sjeff */ 790252330Sjeffstatic void 791252330Sjeffvmem_clip(vmem_t *vm, bt_t *bt, vmem_addr_t start, vmem_size_t size) 792252330Sjeff{ 793252330Sjeff bt_t *btnew; 794252330Sjeff bt_t *btprev; 795252330Sjeff 796252330Sjeff VMEM_ASSERT_LOCKED(vm); 797252330Sjeff MPASS(bt->bt_type == BT_TYPE_FREE); 798252330Sjeff MPASS(bt->bt_size >= size); 799252330Sjeff bt_remfree(vm, bt); 800252330Sjeff if (bt->bt_start != start) { 801252330Sjeff btprev = bt_alloc(vm); 802252330Sjeff btprev->bt_type = BT_TYPE_FREE; 803252330Sjeff btprev->bt_start = bt->bt_start; 804252330Sjeff btprev->bt_size = start - bt->bt_start; 805252330Sjeff bt->bt_start = start; 806252330Sjeff bt->bt_size -= btprev->bt_size; 807252330Sjeff bt_insfree(vm, btprev); 808252330Sjeff bt_insseg(vm, btprev, 809252330Sjeff TAILQ_PREV(bt, vmem_seglist, bt_seglist)); 810252330Sjeff } 811252330Sjeff MPASS(bt->bt_start == start); 812252330Sjeff if (bt->bt_size != size && bt->bt_size - size > vm->vm_quantum_mask) { 813252330Sjeff /* split */ 814252330Sjeff btnew = bt_alloc(vm); 815252330Sjeff btnew->bt_type = BT_TYPE_BUSY; 816252330Sjeff btnew->bt_start = bt->bt_start; 817252330Sjeff btnew->bt_size = size; 818252330Sjeff bt->bt_start = bt->bt_start + size; 819252330Sjeff bt->bt_size -= size; 820252330Sjeff bt_insfree(vm, bt); 821252330Sjeff bt_insseg(vm, btnew, 822252330Sjeff TAILQ_PREV(bt, vmem_seglist, bt_seglist)); 823252330Sjeff bt_insbusy(vm, btnew); 824252330Sjeff bt = btnew; 825252330Sjeff } else { 826252330Sjeff bt->bt_type = BT_TYPE_BUSY; 827252330Sjeff bt_insbusy(vm, bt); 828252330Sjeff } 829252330Sjeff MPASS(bt->bt_size >= size); 830252330Sjeff bt->bt_type = BT_TYPE_BUSY; 831252330Sjeff} 832252330Sjeff 833252330Sjeff/* ---- vmem API */ 834252330Sjeff 835252330Sjeffvoid 836252330Sjeffvmem_set_import(vmem_t *vm, vmem_import_t *importfn, 837252330Sjeff vmem_release_t *releasefn, void *arg, vmem_size_t import_quantum) 838252330Sjeff{ 839252330Sjeff 840252330Sjeff VMEM_LOCK(vm); 841252330Sjeff vm->vm_importfn = importfn; 842252330Sjeff vm->vm_releasefn = releasefn; 843252330Sjeff vm->vm_arg = arg; 844252330Sjeff vm->vm_import_quantum = import_quantum; 845252330Sjeff VMEM_UNLOCK(vm); 846252330Sjeff} 847252330Sjeff 848252330Sjeffvoid 849252330Sjeffvmem_set_reclaim(vmem_t *vm, vmem_reclaim_t *reclaimfn) 850252330Sjeff{ 851252330Sjeff 852252330Sjeff VMEM_LOCK(vm); 853252330Sjeff vm->vm_reclaimfn = reclaimfn; 854252330Sjeff VMEM_UNLOCK(vm); 855252330Sjeff} 856252330Sjeff 857252330Sjeff/* 858252330Sjeff * vmem_init: Initializes vmem arena. 859252330Sjeff */ 860252330Sjeffvmem_t * 861252330Sjeffvmem_init(vmem_t *vm, const char *name, vmem_addr_t base, vmem_size_t size, 862252330Sjeff vmem_size_t quantum, vmem_size_t qcache_max, int flags) 863252330Sjeff{ 864252330Sjeff int i; 865252330Sjeff 866252330Sjeff MPASS(quantum > 0); 867252330Sjeff 868252330Sjeff bzero(vm, sizeof(*vm)); 869252330Sjeff 870252330Sjeff VMEM_CONDVAR_INIT(vm, name); 871252330Sjeff VMEM_LOCK_INIT(vm, name); 872252330Sjeff vm->vm_nfreetags = 0; 873252330Sjeff LIST_INIT(&vm->vm_freetags); 874252330Sjeff strlcpy(vm->vm_name, name, sizeof(vm->vm_name)); 875252330Sjeff vm->vm_quantum_mask = quantum - 1; 876252330Sjeff vm->vm_quantum_shift = SIZE2ORDER(quantum); 877252330Sjeff MPASS(ORDER2SIZE(vm->vm_quantum_shift) == quantum); 878252330Sjeff vm->vm_nbusytag = 0; 879252330Sjeff vm->vm_size = 0; 880252330Sjeff vm->vm_inuse = 0; 881252330Sjeff qc_init(vm, qcache_max); 882252330Sjeff 883252330Sjeff TAILQ_INIT(&vm->vm_seglist); 884252330Sjeff for (i = 0; i < VMEM_MAXORDER; i++) { 885252330Sjeff LIST_INIT(&vm->vm_freelist[i]); 886252330Sjeff } 887252330Sjeff memset(&vm->vm_hash0, 0, sizeof(vm->vm_hash0)); 888252330Sjeff vm->vm_hashsize = VMEM_HASHSIZE_MIN; 889252330Sjeff vm->vm_hashlist = vm->vm_hash0; 890252330Sjeff 891252330Sjeff if (size != 0) { 892252330Sjeff if (vmem_add(vm, base, size, flags) != 0) { 893252330Sjeff vmem_destroy1(vm); 894252330Sjeff return NULL; 895252330Sjeff } 896252330Sjeff } 897252330Sjeff 898252330Sjeff mtx_lock(&vmem_list_lock); 899252330Sjeff LIST_INSERT_HEAD(&vmem_list, vm, vm_alllist); 900252330Sjeff mtx_unlock(&vmem_list_lock); 901252330Sjeff 902252330Sjeff return vm; 903252330Sjeff} 904252330Sjeff 905252330Sjeff/* 906252330Sjeff * vmem_create: create an arena. 907252330Sjeff */ 908252330Sjeffvmem_t * 909252330Sjeffvmem_create(const char *name, vmem_addr_t base, vmem_size_t size, 910252330Sjeff vmem_size_t quantum, vmem_size_t qcache_max, int flags) 911252330Sjeff{ 912252330Sjeff 913252330Sjeff vmem_t *vm; 914252330Sjeff 915252330Sjeff vm = malloc(sizeof(*vm), M_VMEM, flags & (M_WAITOK|M_NOWAIT)); 916252330Sjeff if (vm == NULL) 917252330Sjeff return (NULL); 918252330Sjeff if (vmem_init(vm, name, base, size, quantum, qcache_max, 919252330Sjeff flags) == NULL) { 920252330Sjeff free(vm, M_VMEM); 921252330Sjeff return (NULL); 922252330Sjeff } 923252330Sjeff return (vm); 924252330Sjeff} 925252330Sjeff 926252330Sjeffvoid 927252330Sjeffvmem_destroy(vmem_t *vm) 928252330Sjeff{ 929252330Sjeff 930252330Sjeff mtx_lock(&vmem_list_lock); 931252330Sjeff LIST_REMOVE(vm, vm_alllist); 932252330Sjeff mtx_unlock(&vmem_list_lock); 933252330Sjeff 934252330Sjeff vmem_destroy1(vm); 935252330Sjeff} 936252330Sjeff 937252330Sjeffvmem_size_t 938252330Sjeffvmem_roundup_size(vmem_t *vm, vmem_size_t size) 939252330Sjeff{ 940252330Sjeff 941252330Sjeff return (size + vm->vm_quantum_mask) & ~vm->vm_quantum_mask; 942252330Sjeff} 943252330Sjeff 944252330Sjeff/* 945252330Sjeff * vmem_alloc: allocate resource from the arena. 946252330Sjeff */ 947252330Sjeffint 948252330Sjeffvmem_alloc(vmem_t *vm, vmem_size_t size, int flags, vmem_addr_t *addrp) 949252330Sjeff{ 950252330Sjeff const int strat __unused = flags & VMEM_FITMASK; 951252330Sjeff qcache_t *qc; 952252330Sjeff 953252330Sjeff flags &= VMEM_FLAGS; 954252330Sjeff MPASS(size > 0); 955252330Sjeff MPASS(strat == M_BESTFIT || strat == M_FIRSTFIT); 956252330Sjeff if ((flags & M_NOWAIT) == 0) 957252330Sjeff WITNESS_WARN(WARN_GIANTOK | WARN_SLEEPOK, NULL, "vmem_alloc"); 958252330Sjeff 959252330Sjeff if (size <= vm->vm_qcache_max) { 960252330Sjeff qc = &vm->vm_qcache[(size - 1) >> vm->vm_quantum_shift]; 961252330Sjeff *addrp = (vmem_addr_t)uma_zalloc(qc->qc_cache, flags); 962252330Sjeff if (*addrp == 0) 963252330Sjeff return (ENOMEM); 964252330Sjeff return (0); 965252330Sjeff } 966252330Sjeff 967252330Sjeff return vmem_xalloc(vm, size, 0, 0, 0, VMEM_ADDR_MIN, VMEM_ADDR_MAX, 968252330Sjeff flags, addrp); 969252330Sjeff} 970252330Sjeff 971252330Sjeffint 972252330Sjeffvmem_xalloc(vmem_t *vm, const vmem_size_t size0, vmem_size_t align, 973252330Sjeff const vmem_size_t phase, const vmem_size_t nocross, 974252330Sjeff const vmem_addr_t minaddr, const vmem_addr_t maxaddr, int flags, 975252330Sjeff vmem_addr_t *addrp) 976252330Sjeff{ 977252330Sjeff const vmem_size_t size = vmem_roundup_size(vm, size0); 978252330Sjeff struct vmem_freelist *list; 979252330Sjeff struct vmem_freelist *first; 980252330Sjeff struct vmem_freelist *end; 981252330Sjeff vmem_size_t avail; 982252330Sjeff bt_t *bt; 983252330Sjeff int error; 984252330Sjeff int strat; 985252330Sjeff 986252330Sjeff flags &= VMEM_FLAGS; 987252330Sjeff strat = flags & VMEM_FITMASK; 988252330Sjeff MPASS(size0 > 0); 989252330Sjeff MPASS(size > 0); 990252330Sjeff MPASS(strat == M_BESTFIT || strat == M_FIRSTFIT); 991252330Sjeff MPASS((flags & (M_NOWAIT|M_WAITOK)) != (M_NOWAIT|M_WAITOK)); 992252330Sjeff if ((flags & M_NOWAIT) == 0) 993252330Sjeff WITNESS_WARN(WARN_GIANTOK | WARN_SLEEPOK, NULL, "vmem_xalloc"); 994252330Sjeff MPASS((align & vm->vm_quantum_mask) == 0); 995252330Sjeff MPASS((align & (align - 1)) == 0); 996252330Sjeff MPASS((phase & vm->vm_quantum_mask) == 0); 997252330Sjeff MPASS((nocross & vm->vm_quantum_mask) == 0); 998252330Sjeff MPASS((nocross & (nocross - 1)) == 0); 999252330Sjeff MPASS((align == 0 && phase == 0) || phase < align); 1000252330Sjeff MPASS(nocross == 0 || nocross >= size); 1001252330Sjeff MPASS(minaddr <= maxaddr); 1002252330Sjeff MPASS(!VMEM_CROSS_P(phase, phase + size - 1, nocross)); 1003252330Sjeff 1004252330Sjeff if (align == 0) 1005252330Sjeff align = vm->vm_quantum_mask + 1; 1006252330Sjeff 1007252330Sjeff *addrp = 0; 1008252330Sjeff end = &vm->vm_freelist[VMEM_MAXORDER]; 1009252330Sjeff /* 1010252330Sjeff * choose a free block from which we allocate. 1011252330Sjeff */ 1012252330Sjeff first = bt_freehead_toalloc(vm, size, strat); 1013252330Sjeff VMEM_LOCK(vm); 1014252330Sjeff for (;;) { 1015252330Sjeff /* 1016252330Sjeff * Make sure we have enough tags to complete the 1017252330Sjeff * operation. 1018252330Sjeff */ 1019252330Sjeff if (vm->vm_nfreetags < BT_MAXALLOC && 1020252330Sjeff bt_fill(vm, flags) != 0) { 1021252330Sjeff error = ENOMEM; 1022252330Sjeff break; 1023252330Sjeff } 1024252330Sjeff /* 1025252330Sjeff * Scan freelists looking for a tag that satisfies the 1026252330Sjeff * allocation. If we're doing BESTFIT we may encounter 1027252330Sjeff * sizes below the request. If we're doing FIRSTFIT we 1028252330Sjeff * inspect only the first element from each list. 1029252330Sjeff */ 1030252330Sjeff for (list = first; list < end; list++) { 1031252330Sjeff LIST_FOREACH(bt, list, bt_freelist) { 1032252330Sjeff if (bt->bt_size >= size) { 1033252330Sjeff error = vmem_fit(bt, size, align, phase, 1034252330Sjeff nocross, minaddr, maxaddr, addrp); 1035252330Sjeff if (error == 0) { 1036252330Sjeff vmem_clip(vm, bt, *addrp, size); 1037252330Sjeff goto out; 1038252330Sjeff } 1039252330Sjeff } 1040252330Sjeff /* FIRST skips to the next list. */ 1041252330Sjeff if (strat == M_FIRSTFIT) 1042252330Sjeff break; 1043252330Sjeff } 1044252330Sjeff } 1045252330Sjeff /* 1046252330Sjeff * Retry if the fast algorithm failed. 1047252330Sjeff */ 1048252330Sjeff if (strat == M_FIRSTFIT) { 1049252330Sjeff strat = M_BESTFIT; 1050252330Sjeff first = bt_freehead_toalloc(vm, size, strat); 1051252330Sjeff continue; 1052252330Sjeff } 1053252330Sjeff /* 1054252330Sjeff * XXX it is possible to fail to meet restrictions with the 1055252330Sjeff * imported region. It is up to the user to specify the 1056252330Sjeff * import quantum such that it can satisfy any allocation. 1057252330Sjeff */ 1058252330Sjeff if (vmem_import(vm, size, flags) == 0) 1059252330Sjeff continue; 1060252330Sjeff 1061252330Sjeff /* 1062252330Sjeff * Try to free some space from the quantum cache or reclaim 1063252330Sjeff * functions if available. 1064252330Sjeff */ 1065252330Sjeff if (vm->vm_qcache_max != 0 || vm->vm_reclaimfn != NULL) { 1066252330Sjeff avail = vm->vm_size - vm->vm_inuse; 1067252330Sjeff VMEM_UNLOCK(vm); 1068252330Sjeff if (vm->vm_qcache_max != 0) 1069252330Sjeff qc_drain(vm); 1070252330Sjeff if (vm->vm_reclaimfn != NULL) 1071252330Sjeff vm->vm_reclaimfn(vm, flags); 1072252330Sjeff VMEM_LOCK(vm); 1073252330Sjeff /* If we were successful retry even NOWAIT. */ 1074252330Sjeff if (vm->vm_size - vm->vm_inuse > avail) 1075252330Sjeff continue; 1076252330Sjeff } 1077252330Sjeff if ((flags & M_NOWAIT) != 0) { 1078252330Sjeff error = ENOMEM; 1079252330Sjeff break; 1080252330Sjeff } 1081252330Sjeff VMEM_CONDVAR_WAIT(vm); 1082252330Sjeff } 1083252330Sjeffout: 1084252330Sjeff VMEM_UNLOCK(vm); 1085252330Sjeff if (error != 0 && (flags & M_NOWAIT) == 0) 1086252330Sjeff panic("failed to allocate waiting allocation\n"); 1087252330Sjeff 1088252330Sjeff return (error); 1089252330Sjeff} 1090252330Sjeff 1091252330Sjeff/* 1092252330Sjeff * vmem_free: free the resource to the arena. 1093252330Sjeff */ 1094252330Sjeffvoid 1095252330Sjeffvmem_free(vmem_t *vm, vmem_addr_t addr, vmem_size_t size) 1096252330Sjeff{ 1097252330Sjeff qcache_t *qc; 1098252330Sjeff MPASS(size > 0); 1099252330Sjeff 1100252330Sjeff if (size <= vm->vm_qcache_max) { 1101252330Sjeff qc = &vm->vm_qcache[(size - 1) >> vm->vm_quantum_shift]; 1102252330Sjeff uma_zfree(qc->qc_cache, (void *)addr); 1103252330Sjeff } else 1104252330Sjeff vmem_xfree(vm, addr, size); 1105252330Sjeff} 1106252330Sjeff 1107252330Sjeffvoid 1108252330Sjeffvmem_xfree(vmem_t *vm, vmem_addr_t addr, vmem_size_t size) 1109252330Sjeff{ 1110252330Sjeff bt_t *bt; 1111252330Sjeff bt_t *t; 1112252330Sjeff 1113252330Sjeff MPASS(size > 0); 1114252330Sjeff 1115252330Sjeff VMEM_LOCK(vm); 1116252330Sjeff bt = bt_lookupbusy(vm, addr); 1117252330Sjeff MPASS(bt != NULL); 1118252330Sjeff MPASS(bt->bt_start == addr); 1119252330Sjeff MPASS(bt->bt_size == vmem_roundup_size(vm, size) || 1120252330Sjeff bt->bt_size - vmem_roundup_size(vm, size) <= vm->vm_quantum_mask); 1121252330Sjeff MPASS(bt->bt_type == BT_TYPE_BUSY); 1122252330Sjeff bt_rembusy(vm, bt); 1123252330Sjeff bt->bt_type = BT_TYPE_FREE; 1124252330Sjeff 1125252330Sjeff /* coalesce */ 1126252330Sjeff t = TAILQ_NEXT(bt, bt_seglist); 1127252330Sjeff if (t != NULL && t->bt_type == BT_TYPE_FREE) { 1128252330Sjeff MPASS(BT_END(bt) < t->bt_start); /* YYY */ 1129252330Sjeff bt->bt_size += t->bt_size; 1130252330Sjeff bt_remfree(vm, t); 1131252330Sjeff bt_remseg(vm, t); 1132252330Sjeff } 1133252330Sjeff t = TAILQ_PREV(bt, vmem_seglist, bt_seglist); 1134252330Sjeff if (t != NULL && t->bt_type == BT_TYPE_FREE) { 1135252330Sjeff MPASS(BT_END(t) < bt->bt_start); /* YYY */ 1136252330Sjeff bt->bt_size += t->bt_size; 1137252330Sjeff bt->bt_start = t->bt_start; 1138252330Sjeff bt_remfree(vm, t); 1139252330Sjeff bt_remseg(vm, t); 1140252330Sjeff } 1141252330Sjeff 1142252330Sjeff t = TAILQ_PREV(bt, vmem_seglist, bt_seglist); 1143252330Sjeff MPASS(t != NULL); 1144252330Sjeff MPASS(BT_ISSPAN_P(t) || t->bt_type == BT_TYPE_BUSY); 1145252330Sjeff if (vm->vm_releasefn != NULL && t->bt_type == BT_TYPE_SPAN && 1146252330Sjeff t->bt_size == bt->bt_size) { 1147252330Sjeff vmem_addr_t spanaddr; 1148252330Sjeff vmem_size_t spansize; 1149252330Sjeff 1150252330Sjeff MPASS(t->bt_start == bt->bt_start); 1151252330Sjeff spanaddr = bt->bt_start; 1152252330Sjeff spansize = bt->bt_size; 1153252330Sjeff bt_remseg(vm, bt); 1154252330Sjeff bt_remseg(vm, t); 1155252330Sjeff vm->vm_size -= spansize; 1156252330Sjeff VMEM_CONDVAR_BROADCAST(vm); 1157252330Sjeff bt_freetrim(vm, BT_MAXFREE); 1158252330Sjeff (*vm->vm_releasefn)(vm->vm_arg, spanaddr, spansize); 1159252330Sjeff } else { 1160252330Sjeff bt_insfree(vm, bt); 1161252330Sjeff VMEM_CONDVAR_BROADCAST(vm); 1162252330Sjeff bt_freetrim(vm, BT_MAXFREE); 1163252330Sjeff } 1164252330Sjeff} 1165252330Sjeff 1166252330Sjeff/* 1167252330Sjeff * vmem_add: 1168252330Sjeff * 1169252330Sjeff */ 1170252330Sjeffint 1171252330Sjeffvmem_add(vmem_t *vm, vmem_addr_t addr, vmem_size_t size, int flags) 1172252330Sjeff{ 1173252330Sjeff int error; 1174252330Sjeff 1175252330Sjeff error = 0; 1176252330Sjeff flags &= VMEM_FLAGS; 1177252330Sjeff VMEM_LOCK(vm); 1178252330Sjeff if (vm->vm_nfreetags >= BT_MAXALLOC || bt_fill(vm, flags) == 0) 1179252330Sjeff vmem_add1(vm, addr, size, flags, BT_TYPE_SPAN_STATIC); 1180252330Sjeff else 1181252330Sjeff error = ENOMEM; 1182252330Sjeff VMEM_UNLOCK(vm); 1183252330Sjeff 1184252330Sjeff return (error); 1185252330Sjeff} 1186252330Sjeff 1187252330Sjeff/* 1188252330Sjeff * vmem_size: information about arenas size 1189252330Sjeff */ 1190252330Sjeffvmem_size_t 1191252330Sjeffvmem_size(vmem_t *vm, int typemask) 1192252330Sjeff{ 1193252330Sjeff 1194252330Sjeff switch (typemask) { 1195252330Sjeff case VMEM_ALLOC: 1196252330Sjeff return vm->vm_inuse; 1197252330Sjeff case VMEM_FREE: 1198252330Sjeff return vm->vm_size - vm->vm_inuse; 1199252330Sjeff case VMEM_FREE|VMEM_ALLOC: 1200252330Sjeff return vm->vm_size; 1201252330Sjeff default: 1202252330Sjeff panic("vmem_size"); 1203252330Sjeff } 1204252330Sjeff} 1205252330Sjeff 1206252330Sjeff/* ---- debug */ 1207252330Sjeff 1208252330Sjeff#if defined(DDB) || defined(DIAGNOSTIC) 1209252330Sjeff 1210252330Sjeffstatic void bt_dump(const bt_t *, int (*)(const char *, ...) 1211252330Sjeff __printflike(1, 2)); 1212252330Sjeff 1213252330Sjeffstatic const char * 1214252330Sjeffbt_type_string(int type) 1215252330Sjeff{ 1216252330Sjeff 1217252330Sjeff switch (type) { 1218252330Sjeff case BT_TYPE_BUSY: 1219252330Sjeff return "busy"; 1220252330Sjeff case BT_TYPE_FREE: 1221252330Sjeff return "free"; 1222252330Sjeff case BT_TYPE_SPAN: 1223252330Sjeff return "span"; 1224252330Sjeff case BT_TYPE_SPAN_STATIC: 1225252330Sjeff return "static span"; 1226252330Sjeff default: 1227252330Sjeff break; 1228252330Sjeff } 1229252330Sjeff return "BOGUS"; 1230252330Sjeff} 1231252330Sjeff 1232252330Sjeffstatic void 1233252330Sjeffbt_dump(const bt_t *bt, int (*pr)(const char *, ...)) 1234252330Sjeff{ 1235252330Sjeff 1236252330Sjeff (*pr)("\t%p: %jx %jx, %d(%s)\n", 1237252330Sjeff bt, (intmax_t)bt->bt_start, (intmax_t)bt->bt_size, 1238252330Sjeff bt->bt_type, bt_type_string(bt->bt_type)); 1239252330Sjeff} 1240252330Sjeff 1241252330Sjeffstatic void 1242252330Sjeffvmem_dump(const vmem_t *vm , int (*pr)(const char *, ...) __printflike(1, 2)) 1243252330Sjeff{ 1244252330Sjeff const bt_t *bt; 1245252330Sjeff int i; 1246252330Sjeff 1247252330Sjeff (*pr)("vmem %p '%s'\n", vm, vm->vm_name); 1248252330Sjeff TAILQ_FOREACH(bt, &vm->vm_seglist, bt_seglist) { 1249252330Sjeff bt_dump(bt, pr); 1250252330Sjeff } 1251252330Sjeff 1252252330Sjeff for (i = 0; i < VMEM_MAXORDER; i++) { 1253252330Sjeff const struct vmem_freelist *fl = &vm->vm_freelist[i]; 1254252330Sjeff 1255252330Sjeff if (LIST_EMPTY(fl)) { 1256252330Sjeff continue; 1257252330Sjeff } 1258252330Sjeff 1259252330Sjeff (*pr)("freelist[%d]\n", i); 1260252330Sjeff LIST_FOREACH(bt, fl, bt_freelist) { 1261252330Sjeff bt_dump(bt, pr); 1262252330Sjeff } 1263252330Sjeff } 1264252330Sjeff} 1265252330Sjeff 1266252330Sjeff#endif /* defined(DDB) || defined(DIAGNOSTIC) */ 1267252330Sjeff 1268252330Sjeff#if defined(DDB) 1269252330Sjeffstatic bt_t * 1270252330Sjeffvmem_whatis_lookup(vmem_t *vm, vmem_addr_t addr) 1271252330Sjeff{ 1272252330Sjeff bt_t *bt; 1273252330Sjeff 1274252330Sjeff TAILQ_FOREACH(bt, &vm->vm_seglist, bt_seglist) { 1275252330Sjeff if (BT_ISSPAN_P(bt)) { 1276252330Sjeff continue; 1277252330Sjeff } 1278252330Sjeff if (bt->bt_start <= addr && addr <= BT_END(bt)) { 1279252330Sjeff return bt; 1280252330Sjeff } 1281252330Sjeff } 1282252330Sjeff 1283252330Sjeff return NULL; 1284252330Sjeff} 1285252330Sjeff 1286252330Sjeffvoid 1287252330Sjeffvmem_whatis(vmem_addr_t addr, int (*pr)(const char *, ...)) 1288252330Sjeff{ 1289252330Sjeff vmem_t *vm; 1290252330Sjeff 1291252330Sjeff LIST_FOREACH(vm, &vmem_list, vm_alllist) { 1292252330Sjeff bt_t *bt; 1293252330Sjeff 1294252330Sjeff bt = vmem_whatis_lookup(vm, addr); 1295252330Sjeff if (bt == NULL) { 1296252330Sjeff continue; 1297252330Sjeff } 1298252330Sjeff (*pr)("%p is %p+%zu in VMEM '%s' (%s)\n", 1299252330Sjeff (void *)addr, (void *)bt->bt_start, 1300252330Sjeff (vmem_size_t)(addr - bt->bt_start), vm->vm_name, 1301252330Sjeff (bt->bt_type == BT_TYPE_BUSY) ? "allocated" : "free"); 1302252330Sjeff } 1303252330Sjeff} 1304252330Sjeff 1305252330Sjeffvoid 1306252330Sjeffvmem_printall(const char *modif, int (*pr)(const char *, ...)) 1307252330Sjeff{ 1308252330Sjeff const vmem_t *vm; 1309252330Sjeff 1310252330Sjeff LIST_FOREACH(vm, &vmem_list, vm_alllist) { 1311252330Sjeff vmem_dump(vm, pr); 1312252330Sjeff } 1313252330Sjeff} 1314252330Sjeff 1315252330Sjeffvoid 1316252330Sjeffvmem_print(vmem_addr_t addr, const char *modif, int (*pr)(const char *, ...)) 1317252330Sjeff{ 1318252330Sjeff const vmem_t *vm = (const void *)addr; 1319252330Sjeff 1320252330Sjeff vmem_dump(vm, pr); 1321252330Sjeff} 1322252330Sjeff#endif /* defined(DDB) */ 1323252330Sjeff 1324252330Sjeff#define vmem_printf printf 1325252330Sjeff 1326252330Sjeff#if defined(DIAGNOSTIC) 1327252330Sjeff 1328252330Sjeffstatic bool 1329252330Sjeffvmem_check_sanity(vmem_t *vm) 1330252330Sjeff{ 1331252330Sjeff const bt_t *bt, *bt2; 1332252330Sjeff 1333252330Sjeff MPASS(vm != NULL); 1334252330Sjeff 1335252330Sjeff TAILQ_FOREACH(bt, &vm->vm_seglist, bt_seglist) { 1336252330Sjeff if (bt->bt_start > BT_END(bt)) { 1337252330Sjeff printf("corrupted tag\n"); 1338252330Sjeff bt_dump(bt, vmem_printf); 1339252330Sjeff return false; 1340252330Sjeff } 1341252330Sjeff } 1342252330Sjeff TAILQ_FOREACH(bt, &vm->vm_seglist, bt_seglist) { 1343252330Sjeff TAILQ_FOREACH(bt2, &vm->vm_seglist, bt_seglist) { 1344252330Sjeff if (bt == bt2) { 1345252330Sjeff continue; 1346252330Sjeff } 1347252330Sjeff if (BT_ISSPAN_P(bt) != BT_ISSPAN_P(bt2)) { 1348252330Sjeff continue; 1349252330Sjeff } 1350252330Sjeff if (bt->bt_start <= BT_END(bt2) && 1351252330Sjeff bt2->bt_start <= BT_END(bt)) { 1352252330Sjeff printf("overwrapped tags\n"); 1353252330Sjeff bt_dump(bt, vmem_printf); 1354252330Sjeff bt_dump(bt2, vmem_printf); 1355252330Sjeff return false; 1356252330Sjeff } 1357252330Sjeff } 1358252330Sjeff } 1359252330Sjeff 1360252330Sjeff return true; 1361252330Sjeff} 1362252330Sjeff 1363252330Sjeffstatic void 1364252330Sjeffvmem_check(vmem_t *vm) 1365252330Sjeff{ 1366252330Sjeff 1367252330Sjeff if (!vmem_check_sanity(vm)) { 1368252330Sjeff panic("insanity vmem %p", vm); 1369252330Sjeff } 1370252330Sjeff} 1371252330Sjeff 1372252330Sjeff#endif /* defined(DIAGNOSTIC) */ 1373