1252330Sjeff/*-
2252330Sjeff * Copyright (c)2006,2007,2008,2009 YAMAMOTO Takashi,
3252330Sjeff * Copyright (c) 2013 EMC Corp.
4252330Sjeff * All rights reserved.
5252330Sjeff *
6252330Sjeff * Redistribution and use in source and binary forms, with or without
7252330Sjeff * modification, are permitted provided that the following conditions
8252330Sjeff * are met:
9252330Sjeff * 1. Redistributions of source code must retain the above copyright
10252330Sjeff *    notice, this list of conditions and the following disclaimer.
11252330Sjeff * 2. Redistributions in binary form must reproduce the above copyright
12252330Sjeff *    notice, this list of conditions and the following disclaimer in the
13252330Sjeff *    documentation and/or other materials provided with the distribution.
14252330Sjeff *
15252330Sjeff * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
16252330Sjeff * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
17252330Sjeff * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
18252330Sjeff * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
19252330Sjeff * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
20252330Sjeff * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
21252330Sjeff * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
22252330Sjeff * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
23252330Sjeff * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
24252330Sjeff * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
25252330Sjeff * SUCH DAMAGE.
26252330Sjeff */
27252330Sjeff
28252330Sjeff/*
29252330Sjeff * From:
30252330Sjeff *	$NetBSD: vmem_impl.h,v 1.2 2013/01/29 21:26:24 para Exp $
31252330Sjeff *	$NetBSD: subr_vmem.c,v 1.83 2013/03/06 11:20:10 yamt Exp $
32252330Sjeff */
33252330Sjeff
34252330Sjeff/*
35252330Sjeff * reference:
36252330Sjeff * -	Magazines and Vmem: Extending the Slab Allocator
37252330Sjeff *	to Many CPUs and Arbitrary Resources
38252330Sjeff *	http://www.usenix.org/event/usenix01/bonwick.html
39252330Sjeff */
40252330Sjeff
41252330Sjeff#include <sys/cdefs.h>
42252330Sjeff__FBSDID("$FreeBSD: stable/10/sys/kern/subr_vmem.c 314667 2017-03-04 13:03:31Z avg $");
43252330Sjeff
44252330Sjeff#include "opt_ddb.h"
45252330Sjeff
46252330Sjeff#include <sys/param.h>
47252330Sjeff#include <sys/systm.h>
48252330Sjeff#include <sys/kernel.h>
49252330Sjeff#include <sys/queue.h>
50252330Sjeff#include <sys/callout.h>
51252330Sjeff#include <sys/hash.h>
52252330Sjeff#include <sys/lock.h>
53252330Sjeff#include <sys/malloc.h>
54252330Sjeff#include <sys/mutex.h>
55252330Sjeff#include <sys/smp.h>
56252330Sjeff#include <sys/condvar.h>
57254558Spho#include <sys/sysctl.h>
58252330Sjeff#include <sys/taskqueue.h>
59252330Sjeff#include <sys/vmem.h>
60252330Sjeff
61254307Sjeff#include "opt_vm.h"
62254307Sjeff
63252330Sjeff#include <vm/uma.h>
64252330Sjeff#include <vm/vm.h>
65252330Sjeff#include <vm/pmap.h>
66252330Sjeff#include <vm/vm_map.h>
67254025Sjeff#include <vm/vm_object.h>
68252330Sjeff#include <vm/vm_kern.h>
69252330Sjeff#include <vm/vm_extern.h>
70252330Sjeff#include <vm/vm_param.h>
71252330Sjeff#include <vm/vm_pageout.h>
72252330Sjeff
73260299Smav#define	VMEM_OPTORDER		5
74260299Smav#define	VMEM_OPTVALUE		(1 << VMEM_OPTORDER)
75260299Smav#define	VMEM_MAXORDER						\
76260299Smav    (VMEM_OPTVALUE - 1 + sizeof(vmem_size_t) * NBBY - VMEM_OPTORDER)
77252330Sjeff
78252330Sjeff#define	VMEM_HASHSIZE_MIN	16
79252330Sjeff#define	VMEM_HASHSIZE_MAX	131072
80252330Sjeff
81252330Sjeff#define	VMEM_QCACHE_IDX_MAX	16
82252330Sjeff
83252330Sjeff#define	VMEM_FITMASK	(M_BESTFIT | M_FIRSTFIT)
84252330Sjeff
85252330Sjeff#define	VMEM_FLAGS						\
86252330Sjeff    (M_NOWAIT | M_WAITOK | M_USE_RESERVE | M_NOVM | M_BESTFIT | M_FIRSTFIT)
87252330Sjeff
88252330Sjeff#define	BT_FLAGS	(M_NOWAIT | M_WAITOK | M_USE_RESERVE | M_NOVM)
89252330Sjeff
90252330Sjeff#define	QC_NAME_MAX	16
91252330Sjeff
92252330Sjeff/*
93252330Sjeff * Data structures private to vmem.
94252330Sjeff */
95252330SjeffMALLOC_DEFINE(M_VMEM, "vmem", "vmem internal structures");
96252330Sjeff
97252330Sjefftypedef struct vmem_btag bt_t;
98252330Sjeff
99252330SjeffTAILQ_HEAD(vmem_seglist, vmem_btag);
100252330SjeffLIST_HEAD(vmem_freelist, vmem_btag);
101252330SjeffLIST_HEAD(vmem_hashlist, vmem_btag);
102252330Sjeff
103252330Sjeffstruct qcache {
104252330Sjeff	uma_zone_t	qc_cache;
105252330Sjeff	vmem_t 		*qc_vmem;
106252330Sjeff	vmem_size_t	qc_size;
107252330Sjeff	char		qc_name[QC_NAME_MAX];
108252330Sjeff};
109252330Sjefftypedef struct qcache qcache_t;
110252330Sjeff#define	QC_POOL_TO_QCACHE(pool)	((qcache_t *)(pool->pr_qcache))
111252330Sjeff
112252330Sjeff#define	VMEM_NAME_MAX	16
113252330Sjeff
114252330Sjeff/* vmem arena */
115252330Sjeffstruct vmem {
116252330Sjeff	struct mtx_padalign	vm_lock;
117252330Sjeff	struct cv		vm_cv;
118252330Sjeff	char			vm_name[VMEM_NAME_MAX+1];
119252330Sjeff	LIST_ENTRY(vmem)	vm_alllist;
120252330Sjeff	struct vmem_hashlist	vm_hash0[VMEM_HASHSIZE_MIN];
121252330Sjeff	struct vmem_freelist	vm_freelist[VMEM_MAXORDER];
122252330Sjeff	struct vmem_seglist	vm_seglist;
123252330Sjeff	struct vmem_hashlist	*vm_hashlist;
124252330Sjeff	vmem_size_t		vm_hashsize;
125252330Sjeff
126252330Sjeff	/* Constant after init */
127252330Sjeff	vmem_size_t		vm_qcache_max;
128252330Sjeff	vmem_size_t		vm_quantum_mask;
129252330Sjeff	vmem_size_t		vm_import_quantum;
130252330Sjeff	int			vm_quantum_shift;
131252330Sjeff
132252330Sjeff	/* Written on alloc/free */
133252330Sjeff	LIST_HEAD(, vmem_btag)	vm_freetags;
134252330Sjeff	int			vm_nfreetags;
135252330Sjeff	int			vm_nbusytag;
136252330Sjeff	vmem_size_t		vm_inuse;
137252330Sjeff	vmem_size_t		vm_size;
138252330Sjeff
139252330Sjeff	/* Used on import. */
140252330Sjeff	vmem_import_t		*vm_importfn;
141252330Sjeff	vmem_release_t		*vm_releasefn;
142252330Sjeff	void			*vm_arg;
143252330Sjeff
144252330Sjeff	/* Space exhaustion callback. */
145252330Sjeff	vmem_reclaim_t		*vm_reclaimfn;
146252330Sjeff
147252330Sjeff	/* quantum cache */
148252330Sjeff	qcache_t		vm_qcache[VMEM_QCACHE_IDX_MAX];
149252330Sjeff};
150252330Sjeff
151252330Sjeff/* boundary tag */
152252330Sjeffstruct vmem_btag {
153252330Sjeff	TAILQ_ENTRY(vmem_btag) bt_seglist;
154252330Sjeff	union {
155252330Sjeff		LIST_ENTRY(vmem_btag) u_freelist; /* BT_TYPE_FREE */
156252330Sjeff		LIST_ENTRY(vmem_btag) u_hashlist; /* BT_TYPE_BUSY */
157252330Sjeff	} bt_u;
158252330Sjeff#define	bt_hashlist	bt_u.u_hashlist
159252330Sjeff#define	bt_freelist	bt_u.u_freelist
160252330Sjeff	vmem_addr_t	bt_start;
161252330Sjeff	vmem_size_t	bt_size;
162252330Sjeff	int		bt_type;
163252330Sjeff};
164252330Sjeff
165252330Sjeff#define	BT_TYPE_SPAN		1	/* Allocated from importfn */
166252330Sjeff#define	BT_TYPE_SPAN_STATIC	2	/* vmem_add() or create. */
167252330Sjeff#define	BT_TYPE_FREE		3	/* Available space. */
168252330Sjeff#define	BT_TYPE_BUSY		4	/* Used space. */
169252330Sjeff#define	BT_ISSPAN_P(bt)	((bt)->bt_type <= BT_TYPE_SPAN_STATIC)
170252330Sjeff
171252330Sjeff#define	BT_END(bt)	((bt)->bt_start + (bt)->bt_size - 1)
172252330Sjeff
173252330Sjeff#if defined(DIAGNOSTIC)
174254558Sphostatic int enable_vmem_check = 1;
175281067SkibSYSCTL_INT(_debug, OID_AUTO, vmem_check, CTLFLAG_RWTUN,
176254558Spho    &enable_vmem_check, 0, "Enable vmem check");
177252330Sjeffstatic void vmem_check(vmem_t *);
178252330Sjeff#endif
179252330Sjeff
180252330Sjeffstatic struct callout	vmem_periodic_ch;
181252330Sjeffstatic int		vmem_periodic_interval;
182252330Sjeffstatic struct task	vmem_periodic_wk;
183252330Sjeff
184252330Sjeffstatic struct mtx_padalign vmem_list_lock;
185252330Sjeffstatic LIST_HEAD(, vmem) vmem_list = LIST_HEAD_INITIALIZER(vmem_list);
186252330Sjeff
187252330Sjeff/* ---- misc */
188252330Sjeff#define	VMEM_CONDVAR_INIT(vm, wchan)	cv_init(&vm->vm_cv, wchan)
189252330Sjeff#define	VMEM_CONDVAR_DESTROY(vm)	cv_destroy(&vm->vm_cv)
190252330Sjeff#define	VMEM_CONDVAR_WAIT(vm)		cv_wait(&vm->vm_cv, &vm->vm_lock)
191252330Sjeff#define	VMEM_CONDVAR_BROADCAST(vm)	cv_broadcast(&vm->vm_cv)
192252330Sjeff
193252330Sjeff
194252330Sjeff#define	VMEM_LOCK(vm)		mtx_lock(&vm->vm_lock)
195252330Sjeff#define	VMEM_TRYLOCK(vm)	mtx_trylock(&vm->vm_lock)
196252330Sjeff#define	VMEM_UNLOCK(vm)		mtx_unlock(&vm->vm_lock)
197252330Sjeff#define	VMEM_LOCK_INIT(vm, name) mtx_init(&vm->vm_lock, (name), NULL, MTX_DEF)
198252330Sjeff#define	VMEM_LOCK_DESTROY(vm)	mtx_destroy(&vm->vm_lock)
199252330Sjeff#define	VMEM_ASSERT_LOCKED(vm)	mtx_assert(&vm->vm_lock, MA_OWNED);
200252330Sjeff
201252330Sjeff#define	VMEM_ALIGNUP(addr, align)	(-(-(addr) & -(align)))
202252330Sjeff
203252330Sjeff#define	VMEM_CROSS_P(addr1, addr2, boundary) \
204252330Sjeff	((((addr1) ^ (addr2)) & -(boundary)) != 0)
205252330Sjeff
206260299Smav#define	ORDER2SIZE(order)	((order) < VMEM_OPTVALUE ? ((order) + 1) : \
207260299Smav    (vmem_size_t)1 << ((order) - (VMEM_OPTVALUE - VMEM_OPTORDER - 1)))
208260299Smav#define	SIZE2ORDER(size)	((size) <= VMEM_OPTVALUE ? ((size) - 1) : \
209260299Smav    (flsl(size) + (VMEM_OPTVALUE - VMEM_OPTORDER - 2)))
210252330Sjeff
211252330Sjeff/*
212252330Sjeff * Maximum number of boundary tags that may be required to satisfy an
213252330Sjeff * allocation.  Two may be required to import.  Another two may be
214252330Sjeff * required to clip edges.
215252330Sjeff */
216252330Sjeff#define	BT_MAXALLOC	4
217252330Sjeff
218252330Sjeff/*
219252330Sjeff * Max free limits the number of locally cached boundary tags.  We
220252330Sjeff * just want to avoid hitting the zone allocator for every call.
221252330Sjeff */
222252330Sjeff#define BT_MAXFREE	(BT_MAXALLOC * 8)
223252330Sjeff
224252330Sjeff/* Allocator for boundary tags. */
225252330Sjeffstatic uma_zone_t vmem_bt_zone;
226252330Sjeff
227252330Sjeff/* boot time arena storage. */
228254025Sjeffstatic struct vmem kernel_arena_storage;
229254025Sjeffstatic struct vmem kmem_arena_storage;
230252330Sjeffstatic struct vmem buffer_arena_storage;
231252330Sjeffstatic struct vmem transient_arena_storage;
232254025Sjeffvmem_t *kernel_arena = &kernel_arena_storage;
233254025Sjeffvmem_t *kmem_arena = &kmem_arena_storage;
234252330Sjeffvmem_t *buffer_arena = &buffer_arena_storage;
235252330Sjeffvmem_t *transient_arena = &transient_arena_storage;
236252330Sjeff
237254307Sjeff#ifdef DEBUG_MEMGUARD
238254307Sjeffstatic struct vmem memguard_arena_storage;
239254307Sjeffvmem_t *memguard_arena = &memguard_arena_storage;
240254307Sjeff#endif
241254307Sjeff
242252330Sjeff/*
243252330Sjeff * Fill the vmem's boundary tag cache.  We guarantee that boundary tag
244252330Sjeff * allocation will not fail once bt_fill() passes.  To do so we cache
245252330Sjeff * at least the maximum possible tag allocations in the arena.
246252330Sjeff */
247252330Sjeffstatic int
248252330Sjeffbt_fill(vmem_t *vm, int flags)
249252330Sjeff{
250252330Sjeff	bt_t *bt;
251252330Sjeff
252252330Sjeff	VMEM_ASSERT_LOCKED(vm);
253252330Sjeff
254252330Sjeff	/*
255254025Sjeff	 * Only allow the kmem arena to dip into reserve tags.  It is the
256254025Sjeff	 * vmem where new tags come from.
257254025Sjeff	 */
258254025Sjeff	flags &= BT_FLAGS;
259254025Sjeff	if (vm != kmem_arena)
260254025Sjeff		flags &= ~M_USE_RESERVE;
261254025Sjeff
262254025Sjeff	/*
263252330Sjeff	 * Loop until we meet the reserve.  To minimize the lock shuffle
264252330Sjeff	 * and prevent simultaneous fills we first try a NOWAIT regardless
265252330Sjeff	 * of the caller's flags.  Specify M_NOVM so we don't recurse while
266252330Sjeff	 * holding a vmem lock.
267252330Sjeff	 */
268252330Sjeff	while (vm->vm_nfreetags < BT_MAXALLOC) {
269252330Sjeff		bt = uma_zalloc(vmem_bt_zone,
270252330Sjeff		    (flags & M_USE_RESERVE) | M_NOWAIT | M_NOVM);
271252330Sjeff		if (bt == NULL) {
272252330Sjeff			VMEM_UNLOCK(vm);
273252330Sjeff			bt = uma_zalloc(vmem_bt_zone, flags);
274252330Sjeff			VMEM_LOCK(vm);
275252330Sjeff			if (bt == NULL && (flags & M_NOWAIT) != 0)
276252330Sjeff				break;
277252330Sjeff		}
278252330Sjeff		LIST_INSERT_HEAD(&vm->vm_freetags, bt, bt_freelist);
279252330Sjeff		vm->vm_nfreetags++;
280252330Sjeff	}
281252330Sjeff
282252330Sjeff	if (vm->vm_nfreetags < BT_MAXALLOC)
283252330Sjeff		return ENOMEM;
284252330Sjeff
285252330Sjeff	return 0;
286252330Sjeff}
287252330Sjeff
288252330Sjeff/*
289252330Sjeff * Pop a tag off of the freetag stack.
290252330Sjeff */
291252330Sjeffstatic bt_t *
292252330Sjeffbt_alloc(vmem_t *vm)
293252330Sjeff{
294252330Sjeff	bt_t *bt;
295252330Sjeff
296252330Sjeff	VMEM_ASSERT_LOCKED(vm);
297252330Sjeff	bt = LIST_FIRST(&vm->vm_freetags);
298252330Sjeff	MPASS(bt != NULL);
299252330Sjeff	LIST_REMOVE(bt, bt_freelist);
300252330Sjeff	vm->vm_nfreetags--;
301252330Sjeff
302252330Sjeff	return bt;
303252330Sjeff}
304252330Sjeff
305252330Sjeff/*
306252330Sjeff * Trim the per-vmem free list.  Returns with the lock released to
307252330Sjeff * avoid allocator recursions.
308252330Sjeff */
309252330Sjeffstatic void
310252330Sjeffbt_freetrim(vmem_t *vm, int freelimit)
311252330Sjeff{
312252330Sjeff	LIST_HEAD(, vmem_btag) freetags;
313252330Sjeff	bt_t *bt;
314252330Sjeff
315252330Sjeff	LIST_INIT(&freetags);
316252330Sjeff	VMEM_ASSERT_LOCKED(vm);
317252330Sjeff	while (vm->vm_nfreetags > freelimit) {
318252330Sjeff		bt = LIST_FIRST(&vm->vm_freetags);
319252330Sjeff		LIST_REMOVE(bt, bt_freelist);
320252330Sjeff		vm->vm_nfreetags--;
321252330Sjeff		LIST_INSERT_HEAD(&freetags, bt, bt_freelist);
322252330Sjeff	}
323252330Sjeff	VMEM_UNLOCK(vm);
324252330Sjeff	while ((bt = LIST_FIRST(&freetags)) != NULL) {
325252330Sjeff		LIST_REMOVE(bt, bt_freelist);
326252330Sjeff		uma_zfree(vmem_bt_zone, bt);
327252330Sjeff	}
328252330Sjeff}
329252330Sjeff
330252330Sjeffstatic inline void
331252330Sjeffbt_free(vmem_t *vm, bt_t *bt)
332252330Sjeff{
333252330Sjeff
334252330Sjeff	VMEM_ASSERT_LOCKED(vm);
335252330Sjeff	MPASS(LIST_FIRST(&vm->vm_freetags) != bt);
336252330Sjeff	LIST_INSERT_HEAD(&vm->vm_freetags, bt, bt_freelist);
337252330Sjeff	vm->vm_nfreetags++;
338252330Sjeff}
339252330Sjeff
340252330Sjeff/*
341252330Sjeff * freelist[0] ... [1, 1]
342260299Smav * freelist[1] ... [2, 2]
343252330Sjeff *  :
344260299Smav * freelist[29] ... [30, 30]
345260299Smav * freelist[30] ... [31, 31]
346260299Smav * freelist[31] ... [32, 63]
347260299Smav * freelist[33] ... [64, 127]
348252330Sjeff *  :
349260299Smav * freelist[n] ... [(1 << (n - 26)), (1 << (n - 25)) - 1]
350260299Smav *  :
351252330Sjeff */
352252330Sjeff
353252330Sjeffstatic struct vmem_freelist *
354252330Sjeffbt_freehead_tofree(vmem_t *vm, vmem_size_t size)
355252330Sjeff{
356252330Sjeff	const vmem_size_t qsize = size >> vm->vm_quantum_shift;
357252330Sjeff	const int idx = SIZE2ORDER(qsize);
358252330Sjeff
359252330Sjeff	MPASS(size != 0 && qsize != 0);
360252330Sjeff	MPASS((size & vm->vm_quantum_mask) == 0);
361252330Sjeff	MPASS(idx >= 0);
362252330Sjeff	MPASS(idx < VMEM_MAXORDER);
363252330Sjeff
364252330Sjeff	return &vm->vm_freelist[idx];
365252330Sjeff}
366252330Sjeff
367252330Sjeff/*
368252330Sjeff * bt_freehead_toalloc: return the freelist for the given size and allocation
369252330Sjeff * strategy.
370252330Sjeff *
371252330Sjeff * For M_FIRSTFIT, return the list in which any blocks are large enough
372252330Sjeff * for the requested size.  otherwise, return the list which can have blocks
373252330Sjeff * large enough for the requested size.
374252330Sjeff */
375252330Sjeffstatic struct vmem_freelist *
376252330Sjeffbt_freehead_toalloc(vmem_t *vm, vmem_size_t size, int strat)
377252330Sjeff{
378252330Sjeff	const vmem_size_t qsize = size >> vm->vm_quantum_shift;
379252330Sjeff	int idx = SIZE2ORDER(qsize);
380252330Sjeff
381252330Sjeff	MPASS(size != 0 && qsize != 0);
382252330Sjeff	MPASS((size & vm->vm_quantum_mask) == 0);
383252330Sjeff
384252330Sjeff	if (strat == M_FIRSTFIT && ORDER2SIZE(idx) != qsize) {
385252330Sjeff		idx++;
386252330Sjeff		/* check too large request? */
387252330Sjeff	}
388252330Sjeff	MPASS(idx >= 0);
389252330Sjeff	MPASS(idx < VMEM_MAXORDER);
390252330Sjeff
391252330Sjeff	return &vm->vm_freelist[idx];
392252330Sjeff}
393252330Sjeff
394252330Sjeff/* ---- boundary tag hash */
395252330Sjeff
396252330Sjeffstatic struct vmem_hashlist *
397252330Sjeffbt_hashhead(vmem_t *vm, vmem_addr_t addr)
398252330Sjeff{
399252330Sjeff	struct vmem_hashlist *list;
400252330Sjeff	unsigned int hash;
401252330Sjeff
402252330Sjeff	hash = hash32_buf(&addr, sizeof(addr), 0);
403252330Sjeff	list = &vm->vm_hashlist[hash % vm->vm_hashsize];
404252330Sjeff
405252330Sjeff	return list;
406252330Sjeff}
407252330Sjeff
408252330Sjeffstatic bt_t *
409252330Sjeffbt_lookupbusy(vmem_t *vm, vmem_addr_t addr)
410252330Sjeff{
411252330Sjeff	struct vmem_hashlist *list;
412252330Sjeff	bt_t *bt;
413252330Sjeff
414252330Sjeff	VMEM_ASSERT_LOCKED(vm);
415252330Sjeff	list = bt_hashhead(vm, addr);
416252330Sjeff	LIST_FOREACH(bt, list, bt_hashlist) {
417252330Sjeff		if (bt->bt_start == addr) {
418252330Sjeff			break;
419252330Sjeff		}
420252330Sjeff	}
421252330Sjeff
422252330Sjeff	return bt;
423252330Sjeff}
424252330Sjeff
425252330Sjeffstatic void
426252330Sjeffbt_rembusy(vmem_t *vm, bt_t *bt)
427252330Sjeff{
428252330Sjeff
429252330Sjeff	VMEM_ASSERT_LOCKED(vm);
430252330Sjeff	MPASS(vm->vm_nbusytag > 0);
431252330Sjeff	vm->vm_inuse -= bt->bt_size;
432252330Sjeff	vm->vm_nbusytag--;
433252330Sjeff	LIST_REMOVE(bt, bt_hashlist);
434252330Sjeff}
435252330Sjeff
436252330Sjeffstatic void
437252330Sjeffbt_insbusy(vmem_t *vm, bt_t *bt)
438252330Sjeff{
439252330Sjeff	struct vmem_hashlist *list;
440252330Sjeff
441252330Sjeff	VMEM_ASSERT_LOCKED(vm);
442252330Sjeff	MPASS(bt->bt_type == BT_TYPE_BUSY);
443252330Sjeff
444252330Sjeff	list = bt_hashhead(vm, bt->bt_start);
445252330Sjeff	LIST_INSERT_HEAD(list, bt, bt_hashlist);
446252330Sjeff	vm->vm_nbusytag++;
447252330Sjeff	vm->vm_inuse += bt->bt_size;
448252330Sjeff}
449252330Sjeff
450252330Sjeff/* ---- boundary tag list */
451252330Sjeff
452252330Sjeffstatic void
453252330Sjeffbt_remseg(vmem_t *vm, bt_t *bt)
454252330Sjeff{
455252330Sjeff
456252330Sjeff	TAILQ_REMOVE(&vm->vm_seglist, bt, bt_seglist);
457252330Sjeff	bt_free(vm, bt);
458252330Sjeff}
459252330Sjeff
460252330Sjeffstatic void
461252330Sjeffbt_insseg(vmem_t *vm, bt_t *bt, bt_t *prev)
462252330Sjeff{
463252330Sjeff
464252330Sjeff	TAILQ_INSERT_AFTER(&vm->vm_seglist, prev, bt, bt_seglist);
465252330Sjeff}
466252330Sjeff
467252330Sjeffstatic void
468252330Sjeffbt_insseg_tail(vmem_t *vm, bt_t *bt)
469252330Sjeff{
470252330Sjeff
471252330Sjeff	TAILQ_INSERT_TAIL(&vm->vm_seglist, bt, bt_seglist);
472252330Sjeff}
473252330Sjeff
474252330Sjeffstatic void
475252330Sjeffbt_remfree(vmem_t *vm, bt_t *bt)
476252330Sjeff{
477252330Sjeff
478252330Sjeff	MPASS(bt->bt_type == BT_TYPE_FREE);
479252330Sjeff
480252330Sjeff	LIST_REMOVE(bt, bt_freelist);
481252330Sjeff}
482252330Sjeff
483252330Sjeffstatic void
484252330Sjeffbt_insfree(vmem_t *vm, bt_t *bt)
485252330Sjeff{
486252330Sjeff	struct vmem_freelist *list;
487252330Sjeff
488252330Sjeff	list = bt_freehead_tofree(vm, bt->bt_size);
489252330Sjeff	LIST_INSERT_HEAD(list, bt, bt_freelist);
490252330Sjeff}
491252330Sjeff
492252330Sjeff/* ---- vmem internal functions */
493252330Sjeff
494252330Sjeff/*
495252330Sjeff * Import from the arena into the quantum cache in UMA.
496252330Sjeff */
497252330Sjeffstatic int
498252330Sjeffqc_import(void *arg, void **store, int cnt, int flags)
499252330Sjeff{
500252330Sjeff	qcache_t *qc;
501252330Sjeff	vmem_addr_t addr;
502252330Sjeff	int i;
503252330Sjeff
504252330Sjeff	qc = arg;
505295870Smarius	if ((flags & VMEM_FITMASK) == 0)
506295870Smarius		flags |= M_BESTFIT;
507252330Sjeff	for (i = 0; i < cnt; i++) {
508252330Sjeff		if (vmem_xalloc(qc->qc_vmem, qc->qc_size, 0, 0, 0,
509252330Sjeff		    VMEM_ADDR_MIN, VMEM_ADDR_MAX, flags, &addr) != 0)
510252330Sjeff			break;
511252330Sjeff		store[i] = (void *)addr;
512252330Sjeff		/* Only guarantee one allocation. */
513252330Sjeff		flags &= ~M_WAITOK;
514252330Sjeff		flags |= M_NOWAIT;
515252330Sjeff	}
516252330Sjeff	return i;
517252330Sjeff}
518252330Sjeff
519252330Sjeff/*
520252330Sjeff * Release memory from the UMA cache to the arena.
521252330Sjeff */
522252330Sjeffstatic void
523252330Sjeffqc_release(void *arg, void **store, int cnt)
524252330Sjeff{
525252330Sjeff	qcache_t *qc;
526252330Sjeff	int i;
527252330Sjeff
528252330Sjeff	qc = arg;
529252330Sjeff	for (i = 0; i < cnt; i++)
530252330Sjeff		vmem_xfree(qc->qc_vmem, (vmem_addr_t)store[i], qc->qc_size);
531252330Sjeff}
532252330Sjeff
533252330Sjeffstatic void
534252330Sjeffqc_init(vmem_t *vm, vmem_size_t qcache_max)
535252330Sjeff{
536252330Sjeff	qcache_t *qc;
537252330Sjeff	vmem_size_t size;
538252330Sjeff	int qcache_idx_max;
539252330Sjeff	int i;
540252330Sjeff
541252330Sjeff	MPASS((qcache_max & vm->vm_quantum_mask) == 0);
542252330Sjeff	qcache_idx_max = MIN(qcache_max >> vm->vm_quantum_shift,
543252330Sjeff	    VMEM_QCACHE_IDX_MAX);
544252330Sjeff	vm->vm_qcache_max = qcache_idx_max << vm->vm_quantum_shift;
545252330Sjeff	for (i = 0; i < qcache_idx_max; i++) {
546252330Sjeff		qc = &vm->vm_qcache[i];
547252330Sjeff		size = (i + 1) << vm->vm_quantum_shift;
548252330Sjeff		snprintf(qc->qc_name, sizeof(qc->qc_name), "%s-%zu",
549252330Sjeff		    vm->vm_name, size);
550252330Sjeff		qc->qc_vmem = vm;
551252330Sjeff		qc->qc_size = size;
552252330Sjeff		qc->qc_cache = uma_zcache_create(qc->qc_name, size,
553252330Sjeff		    NULL, NULL, NULL, NULL, qc_import, qc_release, qc,
554252330Sjeff		    UMA_ZONE_VM);
555252330Sjeff		MPASS(qc->qc_cache);
556252330Sjeff	}
557252330Sjeff}
558252330Sjeff
559252330Sjeffstatic void
560252330Sjeffqc_destroy(vmem_t *vm)
561252330Sjeff{
562252330Sjeff	int qcache_idx_max;
563252330Sjeff	int i;
564252330Sjeff
565252330Sjeff	qcache_idx_max = vm->vm_qcache_max >> vm->vm_quantum_shift;
566252330Sjeff	for (i = 0; i < qcache_idx_max; i++)
567252330Sjeff		uma_zdestroy(vm->vm_qcache[i].qc_cache);
568252330Sjeff}
569252330Sjeff
570252330Sjeffstatic void
571252330Sjeffqc_drain(vmem_t *vm)
572252330Sjeff{
573252330Sjeff	int qcache_idx_max;
574252330Sjeff	int i;
575252330Sjeff
576252330Sjeff	qcache_idx_max = vm->vm_qcache_max >> vm->vm_quantum_shift;
577252330Sjeff	for (i = 0; i < qcache_idx_max; i++)
578252330Sjeff		zone_drain(vm->vm_qcache[i].qc_cache);
579252330Sjeff}
580252330Sjeff
581254025Sjeff#ifndef UMA_MD_SMALL_ALLOC
582254025Sjeff
583254025Sjeffstatic struct mtx_padalign vmem_bt_lock;
584254025Sjeff
585254025Sjeff/*
586254025Sjeff * vmem_bt_alloc:  Allocate a new page of boundary tags.
587254025Sjeff *
588254025Sjeff * On architectures with uma_small_alloc there is no recursion; no address
589254025Sjeff * space need be allocated to allocate boundary tags.  For the others, we
590254025Sjeff * must handle recursion.  Boundary tags are necessary to allocate new
591254025Sjeff * boundary tags.
592254025Sjeff *
593254025Sjeff * UMA guarantees that enough tags are held in reserve to allocate a new
594254025Sjeff * page of kva.  We dip into this reserve by specifying M_USE_RESERVE only
595254025Sjeff * when allocating the page to hold new boundary tags.  In this way the
596254025Sjeff * reserve is automatically filled by the allocation that uses the reserve.
597254025Sjeff *
598254025Sjeff * We still have to guarantee that the new tags are allocated atomically since
599254025Sjeff * many threads may try concurrently.  The bt_lock provides this guarantee.
600254025Sjeff * We convert WAITOK allocations to NOWAIT and then handle the blocking here
601254025Sjeff * on failure.  It's ok to return NULL for a WAITOK allocation as UMA will
602254025Sjeff * loop again after checking to see if we lost the race to allocate.
603254025Sjeff *
604254025Sjeff * There is a small race between vmem_bt_alloc() returning the page and the
605254025Sjeff * zone lock being acquired to add the page to the zone.  For WAITOK
606254025Sjeff * allocations we just pause briefly.  NOWAIT may experience a transient
607254025Sjeff * failure.  To alleviate this we permit a small number of simultaneous
608254025Sjeff * fills to proceed concurrently so NOWAIT is less likely to fail unless
609254025Sjeff * we are really out of KVA.
610254025Sjeff */
611254025Sjeffstatic void *
612287945Srstonevmem_bt_alloc(uma_zone_t zone, vm_size_t bytes, uint8_t *pflag, int wait)
613254025Sjeff{
614254025Sjeff	vmem_addr_t addr;
615254025Sjeff
616254025Sjeff	*pflag = UMA_SLAB_KMEM;
617254025Sjeff
618254025Sjeff	/*
619254025Sjeff	 * Single thread boundary tag allocation so that the address space
620254025Sjeff	 * and memory are added in one atomic operation.
621254025Sjeff	 */
622254025Sjeff	mtx_lock(&vmem_bt_lock);
623254025Sjeff	if (vmem_xalloc(kmem_arena, bytes, 0, 0, 0, VMEM_ADDR_MIN,
624254025Sjeff	    VMEM_ADDR_MAX, M_NOWAIT | M_NOVM | M_USE_RESERVE | M_BESTFIT,
625254025Sjeff	    &addr) == 0) {
626254025Sjeff		if (kmem_back(kmem_object, addr, bytes,
627254025Sjeff		    M_NOWAIT | M_USE_RESERVE) == 0) {
628254025Sjeff			mtx_unlock(&vmem_bt_lock);
629254025Sjeff			return ((void *)addr);
630254025Sjeff		}
631254025Sjeff		vmem_xfree(kmem_arena, addr, bytes);
632254025Sjeff		mtx_unlock(&vmem_bt_lock);
633254025Sjeff		/*
634254025Sjeff		 * Out of memory, not address space.  This may not even be
635254025Sjeff		 * possible due to M_USE_RESERVE page allocation.
636254025Sjeff		 */
637254025Sjeff		if (wait & M_WAITOK)
638254025Sjeff			VM_WAIT;
639254025Sjeff		return (NULL);
640254025Sjeff	}
641254025Sjeff	mtx_unlock(&vmem_bt_lock);
642254025Sjeff	/*
643254025Sjeff	 * We're either out of address space or lost a fill race.
644254025Sjeff	 */
645254025Sjeff	if (wait & M_WAITOK)
646254025Sjeff		pause("btalloc", 1);
647254025Sjeff
648254025Sjeff	return (NULL);
649254025Sjeff}
650254025Sjeff#endif
651254025Sjeff
652252330Sjeffvoid
653252330Sjeffvmem_startup(void)
654252330Sjeff{
655252330Sjeff
656252330Sjeff	mtx_init(&vmem_list_lock, "vmem list lock", NULL, MTX_DEF);
657252330Sjeff	vmem_bt_zone = uma_zcreate("vmem btag",
658252330Sjeff	    sizeof(struct vmem_btag), NULL, NULL, NULL, NULL,
659252330Sjeff	    UMA_ALIGN_PTR, UMA_ZONE_VM);
660254025Sjeff#ifndef UMA_MD_SMALL_ALLOC
661254025Sjeff	mtx_init(&vmem_bt_lock, "btag lock", NULL, MTX_DEF);
662254025Sjeff	uma_prealloc(vmem_bt_zone, BT_MAXALLOC);
663254025Sjeff	/*
664254025Sjeff	 * Reserve enough tags to allocate new tags.  We allow multiple
665254025Sjeff	 * CPUs to attempt to allocate new tags concurrently to limit
666254025Sjeff	 * false restarts in UMA.
667254025Sjeff	 */
668254025Sjeff	uma_zone_reserve(vmem_bt_zone, BT_MAXALLOC * (mp_ncpus + 1) / 2);
669254025Sjeff	uma_zone_set_allocf(vmem_bt_zone, vmem_bt_alloc);
670254025Sjeff#endif
671252330Sjeff}
672252330Sjeff
673252330Sjeff/* ---- rehash */
674252330Sjeff
675252330Sjeffstatic int
676252330Sjeffvmem_rehash(vmem_t *vm, vmem_size_t newhashsize)
677252330Sjeff{
678252330Sjeff	bt_t *bt;
679252330Sjeff	int i;
680252330Sjeff	struct vmem_hashlist *newhashlist;
681252330Sjeff	struct vmem_hashlist *oldhashlist;
682252330Sjeff	vmem_size_t oldhashsize;
683252330Sjeff
684252330Sjeff	MPASS(newhashsize > 0);
685252330Sjeff
686252330Sjeff	newhashlist = malloc(sizeof(struct vmem_hashlist) * newhashsize,
687252330Sjeff	    M_VMEM, M_NOWAIT);
688252330Sjeff	if (newhashlist == NULL)
689252330Sjeff		return ENOMEM;
690252330Sjeff	for (i = 0; i < newhashsize; i++) {
691252330Sjeff		LIST_INIT(&newhashlist[i]);
692252330Sjeff	}
693252330Sjeff
694252330Sjeff	VMEM_LOCK(vm);
695252330Sjeff	oldhashlist = vm->vm_hashlist;
696252330Sjeff	oldhashsize = vm->vm_hashsize;
697252330Sjeff	vm->vm_hashlist = newhashlist;
698252330Sjeff	vm->vm_hashsize = newhashsize;
699252330Sjeff	if (oldhashlist == NULL) {
700252330Sjeff		VMEM_UNLOCK(vm);
701252330Sjeff		return 0;
702252330Sjeff	}
703252330Sjeff	for (i = 0; i < oldhashsize; i++) {
704252330Sjeff		while ((bt = LIST_FIRST(&oldhashlist[i])) != NULL) {
705252330Sjeff			bt_rembusy(vm, bt);
706252330Sjeff			bt_insbusy(vm, bt);
707252330Sjeff		}
708252330Sjeff	}
709252330Sjeff	VMEM_UNLOCK(vm);
710252330Sjeff
711252330Sjeff	if (oldhashlist != vm->vm_hash0) {
712252330Sjeff		free(oldhashlist, M_VMEM);
713252330Sjeff	}
714252330Sjeff
715252330Sjeff	return 0;
716252330Sjeff}
717252330Sjeff
718252330Sjeffstatic void
719252330Sjeffvmem_periodic_kick(void *dummy)
720252330Sjeff{
721252330Sjeff
722252330Sjeff	taskqueue_enqueue(taskqueue_thread, &vmem_periodic_wk);
723252330Sjeff}
724252330Sjeff
725252330Sjeffstatic void
726252330Sjeffvmem_periodic(void *unused, int pending)
727252330Sjeff{
728252330Sjeff	vmem_t *vm;
729252330Sjeff	vmem_size_t desired;
730252330Sjeff	vmem_size_t current;
731252330Sjeff
732252330Sjeff	mtx_lock(&vmem_list_lock);
733252330Sjeff	LIST_FOREACH(vm, &vmem_list, vm_alllist) {
734252330Sjeff#ifdef DIAGNOSTIC
735252330Sjeff		/* Convenient time to verify vmem state. */
736254558Spho		if (enable_vmem_check == 1) {
737254558Spho			VMEM_LOCK(vm);
738254558Spho			vmem_check(vm);
739254558Spho			VMEM_UNLOCK(vm);
740254558Spho		}
741252330Sjeff#endif
742252330Sjeff		desired = 1 << flsl(vm->vm_nbusytag);
743252330Sjeff		desired = MIN(MAX(desired, VMEM_HASHSIZE_MIN),
744252330Sjeff		    VMEM_HASHSIZE_MAX);
745252330Sjeff		current = vm->vm_hashsize;
746252330Sjeff
747252330Sjeff		/* Grow in powers of two.  Shrink less aggressively. */
748252330Sjeff		if (desired >= current * 2 || desired * 4 <= current)
749252330Sjeff			vmem_rehash(vm, desired);
750281487Smav
751281487Smav		/*
752281487Smav		 * Periodically wake up threads waiting for resources,
753281487Smav		 * so they could ask for reclamation again.
754281487Smav		 */
755281487Smav		VMEM_CONDVAR_BROADCAST(vm);
756252330Sjeff	}
757252330Sjeff	mtx_unlock(&vmem_list_lock);
758252330Sjeff
759252330Sjeff	callout_reset(&vmem_periodic_ch, vmem_periodic_interval,
760252330Sjeff	    vmem_periodic_kick, NULL);
761252330Sjeff}
762252330Sjeff
763252330Sjeffstatic void
764252330Sjeffvmem_start_callout(void *unused)
765252330Sjeff{
766252330Sjeff
767252330Sjeff	TASK_INIT(&vmem_periodic_wk, 0, vmem_periodic, NULL);
768252330Sjeff	vmem_periodic_interval = hz * 10;
769314667Savg	callout_init(&vmem_periodic_ch, 1);
770252330Sjeff	callout_reset(&vmem_periodic_ch, vmem_periodic_interval,
771252330Sjeff	    vmem_periodic_kick, NULL);
772252330Sjeff}
773252330SjeffSYSINIT(vfs, SI_SUB_CONFIGURE, SI_ORDER_ANY, vmem_start_callout, NULL);
774252330Sjeff
775252330Sjeffstatic void
776253596Sglebiusvmem_add1(vmem_t *vm, vmem_addr_t addr, vmem_size_t size, int type)
777252330Sjeff{
778252330Sjeff	bt_t *btspan;
779252330Sjeff	bt_t *btfree;
780252330Sjeff
781252330Sjeff	MPASS(type == BT_TYPE_SPAN || type == BT_TYPE_SPAN_STATIC);
782254543Sjeff	MPASS((size & vm->vm_quantum_mask) == 0);
783252330Sjeff
784252330Sjeff	btspan = bt_alloc(vm);
785252330Sjeff	btspan->bt_type = type;
786252330Sjeff	btspan->bt_start = addr;
787252330Sjeff	btspan->bt_size = size;
788254025Sjeff	bt_insseg_tail(vm, btspan);
789252330Sjeff
790252330Sjeff	btfree = bt_alloc(vm);
791252330Sjeff	btfree->bt_type = BT_TYPE_FREE;
792252330Sjeff	btfree->bt_start = addr;
793252330Sjeff	btfree->bt_size = size;
794252330Sjeff	bt_insseg(vm, btfree, btspan);
795252330Sjeff	bt_insfree(vm, btfree);
796254025Sjeff
797252330Sjeff	vm->vm_size += size;
798252330Sjeff}
799252330Sjeff
800252330Sjeffstatic void
801252330Sjeffvmem_destroy1(vmem_t *vm)
802252330Sjeff{
803252330Sjeff	bt_t *bt;
804252330Sjeff
805252330Sjeff	/*
806252330Sjeff	 * Drain per-cpu quantum caches.
807252330Sjeff	 */
808252330Sjeff	qc_destroy(vm);
809252330Sjeff
810252330Sjeff	/*
811252330Sjeff	 * The vmem should now only contain empty segments.
812252330Sjeff	 */
813252330Sjeff	VMEM_LOCK(vm);
814252330Sjeff	MPASS(vm->vm_nbusytag == 0);
815252330Sjeff
816252330Sjeff	while ((bt = TAILQ_FIRST(&vm->vm_seglist)) != NULL)
817252330Sjeff		bt_remseg(vm, bt);
818252330Sjeff
819252330Sjeff	if (vm->vm_hashlist != NULL && vm->vm_hashlist != vm->vm_hash0)
820252330Sjeff		free(vm->vm_hashlist, M_VMEM);
821252330Sjeff
822252330Sjeff	bt_freetrim(vm, 0);
823252330Sjeff
824252330Sjeff	VMEM_CONDVAR_DESTROY(vm);
825252330Sjeff	VMEM_LOCK_DESTROY(vm);
826252330Sjeff	free(vm, M_VMEM);
827252330Sjeff}
828252330Sjeff
829252330Sjeffstatic int
830254543Sjeffvmem_import(vmem_t *vm, vmem_size_t size, vmem_size_t align, int flags)
831252330Sjeff{
832252330Sjeff	vmem_addr_t addr;
833252330Sjeff	int error;
834252330Sjeff
835252330Sjeff	if (vm->vm_importfn == NULL)
836252330Sjeff		return EINVAL;
837252330Sjeff
838254543Sjeff	/*
839254543Sjeff	 * To make sure we get a span that meets the alignment we double it
840254543Sjeff	 * and add the size to the tail.  This slightly overestimates.
841254543Sjeff	 */
842254543Sjeff	if (align != vm->vm_quantum_mask + 1)
843254543Sjeff		size = (align * 2) + size;
844252330Sjeff	size = roundup(size, vm->vm_import_quantum);
845252330Sjeff
846252330Sjeff	/*
847252330Sjeff	 * Hide MAXALLOC tags so we're guaranteed to be able to add this
848252330Sjeff	 * span and the tag we want to allocate from it.
849252330Sjeff	 */
850252330Sjeff	MPASS(vm->vm_nfreetags >= BT_MAXALLOC);
851252330Sjeff	vm->vm_nfreetags -= BT_MAXALLOC;
852252330Sjeff	VMEM_UNLOCK(vm);
853252330Sjeff	error = (vm->vm_importfn)(vm->vm_arg, size, flags, &addr);
854252330Sjeff	VMEM_LOCK(vm);
855252330Sjeff	vm->vm_nfreetags += BT_MAXALLOC;
856252330Sjeff	if (error)
857252330Sjeff		return ENOMEM;
858252330Sjeff
859253596Sglebius	vmem_add1(vm, addr, size, BT_TYPE_SPAN);
860252330Sjeff
861252330Sjeff	return 0;
862252330Sjeff}
863252330Sjeff
864252330Sjeff/*
865252330Sjeff * vmem_fit: check if a bt can satisfy the given restrictions.
866252330Sjeff *
867252330Sjeff * it's a caller's responsibility to ensure the region is big enough
868252330Sjeff * before calling us.
869252330Sjeff */
870252330Sjeffstatic int
871252330Sjeffvmem_fit(const bt_t *bt, vmem_size_t size, vmem_size_t align,
872252330Sjeff    vmem_size_t phase, vmem_size_t nocross, vmem_addr_t minaddr,
873252330Sjeff    vmem_addr_t maxaddr, vmem_addr_t *addrp)
874252330Sjeff{
875252330Sjeff	vmem_addr_t start;
876252330Sjeff	vmem_addr_t end;
877252330Sjeff
878252330Sjeff	MPASS(size > 0);
879252330Sjeff	MPASS(bt->bt_size >= size); /* caller's responsibility */
880252330Sjeff
881252330Sjeff	/*
882252330Sjeff	 * XXX assumption: vmem_addr_t and vmem_size_t are
883252330Sjeff	 * unsigned integer of the same size.
884252330Sjeff	 */
885252330Sjeff
886252330Sjeff	start = bt->bt_start;
887252330Sjeff	if (start < minaddr) {
888252330Sjeff		start = minaddr;
889252330Sjeff	}
890252330Sjeff	end = BT_END(bt);
891252330Sjeff	if (end > maxaddr)
892252330Sjeff		end = maxaddr;
893252330Sjeff	if (start > end)
894252330Sjeff		return (ENOMEM);
895252330Sjeff
896252330Sjeff	start = VMEM_ALIGNUP(start - phase, align) + phase;
897252330Sjeff	if (start < bt->bt_start)
898252330Sjeff		start += align;
899252330Sjeff	if (VMEM_CROSS_P(start, start + size - 1, nocross)) {
900252330Sjeff		MPASS(align < nocross);
901252330Sjeff		start = VMEM_ALIGNUP(start - phase, nocross) + phase;
902252330Sjeff	}
903252330Sjeff	if (start <= end && end - start >= size - 1) {
904252330Sjeff		MPASS((start & (align - 1)) == phase);
905252330Sjeff		MPASS(!VMEM_CROSS_P(start, start + size - 1, nocross));
906252330Sjeff		MPASS(minaddr <= start);
907252330Sjeff		MPASS(maxaddr == 0 || start + size - 1 <= maxaddr);
908252330Sjeff		MPASS(bt->bt_start <= start);
909252330Sjeff		MPASS(BT_END(bt) - start >= size - 1);
910252330Sjeff		*addrp = start;
911252330Sjeff
912252330Sjeff		return (0);
913252330Sjeff	}
914252330Sjeff	return (ENOMEM);
915252330Sjeff}
916252330Sjeff
917252330Sjeff/*
918252330Sjeff * vmem_clip:  Trim the boundary tag edges to the requested start and size.
919252330Sjeff */
920252330Sjeffstatic void
921252330Sjeffvmem_clip(vmem_t *vm, bt_t *bt, vmem_addr_t start, vmem_size_t size)
922252330Sjeff{
923252330Sjeff	bt_t *btnew;
924252330Sjeff	bt_t *btprev;
925252330Sjeff
926252330Sjeff	VMEM_ASSERT_LOCKED(vm);
927252330Sjeff	MPASS(bt->bt_type == BT_TYPE_FREE);
928252330Sjeff	MPASS(bt->bt_size >= size);
929252330Sjeff	bt_remfree(vm, bt);
930252330Sjeff	if (bt->bt_start != start) {
931252330Sjeff		btprev = bt_alloc(vm);
932252330Sjeff		btprev->bt_type = BT_TYPE_FREE;
933252330Sjeff		btprev->bt_start = bt->bt_start;
934252330Sjeff		btprev->bt_size = start - bt->bt_start;
935252330Sjeff		bt->bt_start = start;
936252330Sjeff		bt->bt_size -= btprev->bt_size;
937252330Sjeff		bt_insfree(vm, btprev);
938252330Sjeff		bt_insseg(vm, btprev,
939252330Sjeff		    TAILQ_PREV(bt, vmem_seglist, bt_seglist));
940252330Sjeff	}
941252330Sjeff	MPASS(bt->bt_start == start);
942252330Sjeff	if (bt->bt_size != size && bt->bt_size - size > vm->vm_quantum_mask) {
943252330Sjeff		/* split */
944252330Sjeff		btnew = bt_alloc(vm);
945252330Sjeff		btnew->bt_type = BT_TYPE_BUSY;
946252330Sjeff		btnew->bt_start = bt->bt_start;
947252330Sjeff		btnew->bt_size = size;
948252330Sjeff		bt->bt_start = bt->bt_start + size;
949252330Sjeff		bt->bt_size -= size;
950252330Sjeff		bt_insfree(vm, bt);
951252330Sjeff		bt_insseg(vm, btnew,
952252330Sjeff		    TAILQ_PREV(bt, vmem_seglist, bt_seglist));
953252330Sjeff		bt_insbusy(vm, btnew);
954252330Sjeff		bt = btnew;
955252330Sjeff	} else {
956252330Sjeff		bt->bt_type = BT_TYPE_BUSY;
957252330Sjeff		bt_insbusy(vm, bt);
958252330Sjeff	}
959252330Sjeff	MPASS(bt->bt_size >= size);
960252330Sjeff	bt->bt_type = BT_TYPE_BUSY;
961252330Sjeff}
962252330Sjeff
963252330Sjeff/* ---- vmem API */
964252330Sjeff
965252330Sjeffvoid
966252330Sjeffvmem_set_import(vmem_t *vm, vmem_import_t *importfn,
967252330Sjeff     vmem_release_t *releasefn, void *arg, vmem_size_t import_quantum)
968252330Sjeff{
969252330Sjeff
970252330Sjeff	VMEM_LOCK(vm);
971252330Sjeff	vm->vm_importfn = importfn;
972252330Sjeff	vm->vm_releasefn = releasefn;
973252330Sjeff	vm->vm_arg = arg;
974252330Sjeff	vm->vm_import_quantum = import_quantum;
975252330Sjeff	VMEM_UNLOCK(vm);
976252330Sjeff}
977252330Sjeff
978252330Sjeffvoid
979252330Sjeffvmem_set_reclaim(vmem_t *vm, vmem_reclaim_t *reclaimfn)
980252330Sjeff{
981252330Sjeff
982252330Sjeff	VMEM_LOCK(vm);
983252330Sjeff	vm->vm_reclaimfn = reclaimfn;
984252330Sjeff	VMEM_UNLOCK(vm);
985252330Sjeff}
986252330Sjeff
987252330Sjeff/*
988252330Sjeff * vmem_init: Initializes vmem arena.
989252330Sjeff */
990252330Sjeffvmem_t *
991252330Sjeffvmem_init(vmem_t *vm, const char *name, vmem_addr_t base, vmem_size_t size,
992252330Sjeff    vmem_size_t quantum, vmem_size_t qcache_max, int flags)
993252330Sjeff{
994252330Sjeff	int i;
995252330Sjeff
996252330Sjeff	MPASS(quantum > 0);
997260299Smav	MPASS((quantum & (quantum - 1)) == 0);
998252330Sjeff
999252330Sjeff	bzero(vm, sizeof(*vm));
1000252330Sjeff
1001252330Sjeff	VMEM_CONDVAR_INIT(vm, name);
1002252330Sjeff	VMEM_LOCK_INIT(vm, name);
1003252330Sjeff	vm->vm_nfreetags = 0;
1004252330Sjeff	LIST_INIT(&vm->vm_freetags);
1005252330Sjeff	strlcpy(vm->vm_name, name, sizeof(vm->vm_name));
1006252330Sjeff	vm->vm_quantum_mask = quantum - 1;
1007260299Smav	vm->vm_quantum_shift = flsl(quantum) - 1;
1008252330Sjeff	vm->vm_nbusytag = 0;
1009252330Sjeff	vm->vm_size = 0;
1010252330Sjeff	vm->vm_inuse = 0;
1011252330Sjeff	qc_init(vm, qcache_max);
1012252330Sjeff
1013252330Sjeff	TAILQ_INIT(&vm->vm_seglist);
1014252330Sjeff	for (i = 0; i < VMEM_MAXORDER; i++) {
1015252330Sjeff		LIST_INIT(&vm->vm_freelist[i]);
1016252330Sjeff	}
1017252330Sjeff	memset(&vm->vm_hash0, 0, sizeof(vm->vm_hash0));
1018252330Sjeff	vm->vm_hashsize = VMEM_HASHSIZE_MIN;
1019252330Sjeff	vm->vm_hashlist = vm->vm_hash0;
1020252330Sjeff
1021252330Sjeff	if (size != 0) {
1022252330Sjeff		if (vmem_add(vm, base, size, flags) != 0) {
1023252330Sjeff			vmem_destroy1(vm);
1024252330Sjeff			return NULL;
1025252330Sjeff		}
1026252330Sjeff	}
1027252330Sjeff
1028252330Sjeff	mtx_lock(&vmem_list_lock);
1029252330Sjeff	LIST_INSERT_HEAD(&vmem_list, vm, vm_alllist);
1030252330Sjeff	mtx_unlock(&vmem_list_lock);
1031252330Sjeff
1032252330Sjeff	return vm;
1033252330Sjeff}
1034252330Sjeff
1035252330Sjeff/*
1036252330Sjeff * vmem_create: create an arena.
1037252330Sjeff */
1038252330Sjeffvmem_t *
1039252330Sjeffvmem_create(const char *name, vmem_addr_t base, vmem_size_t size,
1040252330Sjeff    vmem_size_t quantum, vmem_size_t qcache_max, int flags)
1041252330Sjeff{
1042252330Sjeff
1043252330Sjeff	vmem_t *vm;
1044252330Sjeff
1045252330Sjeff	vm = malloc(sizeof(*vm), M_VMEM, flags & (M_WAITOK|M_NOWAIT));
1046252330Sjeff	if (vm == NULL)
1047252330Sjeff		return (NULL);
1048252330Sjeff	if (vmem_init(vm, name, base, size, quantum, qcache_max,
1049301791Sngie	    flags) == NULL)
1050252330Sjeff		return (NULL);
1051252330Sjeff	return (vm);
1052252330Sjeff}
1053252330Sjeff
1054252330Sjeffvoid
1055252330Sjeffvmem_destroy(vmem_t *vm)
1056252330Sjeff{
1057252330Sjeff
1058252330Sjeff	mtx_lock(&vmem_list_lock);
1059252330Sjeff	LIST_REMOVE(vm, vm_alllist);
1060252330Sjeff	mtx_unlock(&vmem_list_lock);
1061252330Sjeff
1062252330Sjeff	vmem_destroy1(vm);
1063252330Sjeff}
1064252330Sjeff
1065252330Sjeffvmem_size_t
1066252330Sjeffvmem_roundup_size(vmem_t *vm, vmem_size_t size)
1067252330Sjeff{
1068252330Sjeff
1069252330Sjeff	return (size + vm->vm_quantum_mask) & ~vm->vm_quantum_mask;
1070252330Sjeff}
1071252330Sjeff
1072252330Sjeff/*
1073252330Sjeff * vmem_alloc: allocate resource from the arena.
1074252330Sjeff */
1075252330Sjeffint
1076252330Sjeffvmem_alloc(vmem_t *vm, vmem_size_t size, int flags, vmem_addr_t *addrp)
1077252330Sjeff{
1078252330Sjeff	const int strat __unused = flags & VMEM_FITMASK;
1079252330Sjeff	qcache_t *qc;
1080252330Sjeff
1081252330Sjeff	flags &= VMEM_FLAGS;
1082252330Sjeff	MPASS(size > 0);
1083252330Sjeff	MPASS(strat == M_BESTFIT || strat == M_FIRSTFIT);
1084252330Sjeff	if ((flags & M_NOWAIT) == 0)
1085252330Sjeff		WITNESS_WARN(WARN_GIANTOK | WARN_SLEEPOK, NULL, "vmem_alloc");
1086252330Sjeff
1087252330Sjeff	if (size <= vm->vm_qcache_max) {
1088252330Sjeff		qc = &vm->vm_qcache[(size - 1) >> vm->vm_quantum_shift];
1089252330Sjeff		*addrp = (vmem_addr_t)uma_zalloc(qc->qc_cache, flags);
1090252330Sjeff		if (*addrp == 0)
1091252330Sjeff			return (ENOMEM);
1092252330Sjeff		return (0);
1093252330Sjeff	}
1094252330Sjeff
1095252330Sjeff	return vmem_xalloc(vm, size, 0, 0, 0, VMEM_ADDR_MIN, VMEM_ADDR_MAX,
1096252330Sjeff	    flags, addrp);
1097252330Sjeff}
1098252330Sjeff
1099252330Sjeffint
1100252330Sjeffvmem_xalloc(vmem_t *vm, const vmem_size_t size0, vmem_size_t align,
1101252330Sjeff    const vmem_size_t phase, const vmem_size_t nocross,
1102252330Sjeff    const vmem_addr_t minaddr, const vmem_addr_t maxaddr, int flags,
1103252330Sjeff    vmem_addr_t *addrp)
1104252330Sjeff{
1105252330Sjeff	const vmem_size_t size = vmem_roundup_size(vm, size0);
1106252330Sjeff	struct vmem_freelist *list;
1107252330Sjeff	struct vmem_freelist *first;
1108252330Sjeff	struct vmem_freelist *end;
1109252330Sjeff	vmem_size_t avail;
1110252330Sjeff	bt_t *bt;
1111252330Sjeff	int error;
1112252330Sjeff	int strat;
1113252330Sjeff
1114252330Sjeff	flags &= VMEM_FLAGS;
1115252330Sjeff	strat = flags & VMEM_FITMASK;
1116252330Sjeff	MPASS(size0 > 0);
1117252330Sjeff	MPASS(size > 0);
1118252330Sjeff	MPASS(strat == M_BESTFIT || strat == M_FIRSTFIT);
1119252330Sjeff	MPASS((flags & (M_NOWAIT|M_WAITOK)) != (M_NOWAIT|M_WAITOK));
1120252330Sjeff	if ((flags & M_NOWAIT) == 0)
1121252330Sjeff		WITNESS_WARN(WARN_GIANTOK | WARN_SLEEPOK, NULL, "vmem_xalloc");
1122252330Sjeff	MPASS((align & vm->vm_quantum_mask) == 0);
1123252330Sjeff	MPASS((align & (align - 1)) == 0);
1124252330Sjeff	MPASS((phase & vm->vm_quantum_mask) == 0);
1125252330Sjeff	MPASS((nocross & vm->vm_quantum_mask) == 0);
1126252330Sjeff	MPASS((nocross & (nocross - 1)) == 0);
1127252330Sjeff	MPASS((align == 0 && phase == 0) || phase < align);
1128252330Sjeff	MPASS(nocross == 0 || nocross >= size);
1129252330Sjeff	MPASS(minaddr <= maxaddr);
1130252330Sjeff	MPASS(!VMEM_CROSS_P(phase, phase + size - 1, nocross));
1131252330Sjeff
1132252330Sjeff	if (align == 0)
1133252330Sjeff		align = vm->vm_quantum_mask + 1;
1134252330Sjeff
1135252330Sjeff	*addrp = 0;
1136252330Sjeff	end = &vm->vm_freelist[VMEM_MAXORDER];
1137252330Sjeff	/*
1138252330Sjeff	 * choose a free block from which we allocate.
1139252330Sjeff	 */
1140252330Sjeff	first = bt_freehead_toalloc(vm, size, strat);
1141252330Sjeff	VMEM_LOCK(vm);
1142252330Sjeff	for (;;) {
1143252330Sjeff		/*
1144252330Sjeff		 * Make sure we have enough tags to complete the
1145252330Sjeff		 * operation.
1146252330Sjeff		 */
1147252330Sjeff		if (vm->vm_nfreetags < BT_MAXALLOC &&
1148252330Sjeff		    bt_fill(vm, flags) != 0) {
1149252330Sjeff			error = ENOMEM;
1150252330Sjeff			break;
1151252330Sjeff		}
1152252330Sjeff		/*
1153252330Sjeff	 	 * Scan freelists looking for a tag that satisfies the
1154252330Sjeff		 * allocation.  If we're doing BESTFIT we may encounter
1155252330Sjeff		 * sizes below the request.  If we're doing FIRSTFIT we
1156252330Sjeff		 * inspect only the first element from each list.
1157252330Sjeff		 */
1158252330Sjeff		for (list = first; list < end; list++) {
1159252330Sjeff			LIST_FOREACH(bt, list, bt_freelist) {
1160252330Sjeff				if (bt->bt_size >= size) {
1161252330Sjeff					error = vmem_fit(bt, size, align, phase,
1162252330Sjeff					    nocross, minaddr, maxaddr, addrp);
1163252330Sjeff					if (error == 0) {
1164252330Sjeff						vmem_clip(vm, bt, *addrp, size);
1165252330Sjeff						goto out;
1166252330Sjeff					}
1167252330Sjeff				}
1168252330Sjeff				/* FIRST skips to the next list. */
1169252330Sjeff				if (strat == M_FIRSTFIT)
1170252330Sjeff					break;
1171252330Sjeff			}
1172252330Sjeff		}
1173252330Sjeff		/*
1174252330Sjeff		 * Retry if the fast algorithm failed.
1175252330Sjeff		 */
1176252330Sjeff		if (strat == M_FIRSTFIT) {
1177252330Sjeff			strat = M_BESTFIT;
1178252330Sjeff			first = bt_freehead_toalloc(vm, size, strat);
1179252330Sjeff			continue;
1180252330Sjeff		}
1181252330Sjeff		/*
1182252330Sjeff		 * XXX it is possible to fail to meet restrictions with the
1183252330Sjeff		 * imported region.  It is up to the user to specify the
1184252330Sjeff		 * import quantum such that it can satisfy any allocation.
1185252330Sjeff		 */
1186254543Sjeff		if (vmem_import(vm, size, align, flags) == 0)
1187252330Sjeff			continue;
1188252330Sjeff
1189252330Sjeff		/*
1190252330Sjeff		 * Try to free some space from the quantum cache or reclaim
1191252330Sjeff		 * functions if available.
1192252330Sjeff		 */
1193252330Sjeff		if (vm->vm_qcache_max != 0 || vm->vm_reclaimfn != NULL) {
1194252330Sjeff			avail = vm->vm_size - vm->vm_inuse;
1195252330Sjeff			VMEM_UNLOCK(vm);
1196252330Sjeff			if (vm->vm_qcache_max != 0)
1197252330Sjeff				qc_drain(vm);
1198252330Sjeff			if (vm->vm_reclaimfn != NULL)
1199252330Sjeff				vm->vm_reclaimfn(vm, flags);
1200252330Sjeff			VMEM_LOCK(vm);
1201252330Sjeff			/* If we were successful retry even NOWAIT. */
1202252330Sjeff			if (vm->vm_size - vm->vm_inuse > avail)
1203252330Sjeff				continue;
1204252330Sjeff		}
1205252330Sjeff		if ((flags & M_NOWAIT) != 0) {
1206252330Sjeff			error = ENOMEM;
1207252330Sjeff			break;
1208252330Sjeff		}
1209252330Sjeff		VMEM_CONDVAR_WAIT(vm);
1210252330Sjeff	}
1211252330Sjeffout:
1212252330Sjeff	VMEM_UNLOCK(vm);
1213252330Sjeff	if (error != 0 && (flags & M_NOWAIT) == 0)
1214252330Sjeff		panic("failed to allocate waiting allocation\n");
1215252330Sjeff
1216252330Sjeff	return (error);
1217252330Sjeff}
1218252330Sjeff
1219252330Sjeff/*
1220252330Sjeff * vmem_free: free the resource to the arena.
1221252330Sjeff */
1222252330Sjeffvoid
1223252330Sjeffvmem_free(vmem_t *vm, vmem_addr_t addr, vmem_size_t size)
1224252330Sjeff{
1225252330Sjeff	qcache_t *qc;
1226252330Sjeff	MPASS(size > 0);
1227252330Sjeff
1228252330Sjeff	if (size <= vm->vm_qcache_max) {
1229252330Sjeff		qc = &vm->vm_qcache[(size - 1) >> vm->vm_quantum_shift];
1230252330Sjeff		uma_zfree(qc->qc_cache, (void *)addr);
1231252330Sjeff	} else
1232252330Sjeff		vmem_xfree(vm, addr, size);
1233252330Sjeff}
1234252330Sjeff
1235252330Sjeffvoid
1236252330Sjeffvmem_xfree(vmem_t *vm, vmem_addr_t addr, vmem_size_t size)
1237252330Sjeff{
1238252330Sjeff	bt_t *bt;
1239252330Sjeff	bt_t *t;
1240252330Sjeff
1241252330Sjeff	MPASS(size > 0);
1242252330Sjeff
1243252330Sjeff	VMEM_LOCK(vm);
1244252330Sjeff	bt = bt_lookupbusy(vm, addr);
1245252330Sjeff	MPASS(bt != NULL);
1246252330Sjeff	MPASS(bt->bt_start == addr);
1247252330Sjeff	MPASS(bt->bt_size == vmem_roundup_size(vm, size) ||
1248252330Sjeff	    bt->bt_size - vmem_roundup_size(vm, size) <= vm->vm_quantum_mask);
1249252330Sjeff	MPASS(bt->bt_type == BT_TYPE_BUSY);
1250252330Sjeff	bt_rembusy(vm, bt);
1251252330Sjeff	bt->bt_type = BT_TYPE_FREE;
1252252330Sjeff
1253252330Sjeff	/* coalesce */
1254252330Sjeff	t = TAILQ_NEXT(bt, bt_seglist);
1255252330Sjeff	if (t != NULL && t->bt_type == BT_TYPE_FREE) {
1256252330Sjeff		MPASS(BT_END(bt) < t->bt_start);	/* YYY */
1257252330Sjeff		bt->bt_size += t->bt_size;
1258252330Sjeff		bt_remfree(vm, t);
1259252330Sjeff		bt_remseg(vm, t);
1260252330Sjeff	}
1261252330Sjeff	t = TAILQ_PREV(bt, vmem_seglist, bt_seglist);
1262252330Sjeff	if (t != NULL && t->bt_type == BT_TYPE_FREE) {
1263252330Sjeff		MPASS(BT_END(t) < bt->bt_start);	/* YYY */
1264252330Sjeff		bt->bt_size += t->bt_size;
1265252330Sjeff		bt->bt_start = t->bt_start;
1266252330Sjeff		bt_remfree(vm, t);
1267252330Sjeff		bt_remseg(vm, t);
1268252330Sjeff	}
1269252330Sjeff
1270252330Sjeff	t = TAILQ_PREV(bt, vmem_seglist, bt_seglist);
1271252330Sjeff	MPASS(t != NULL);
1272252330Sjeff	MPASS(BT_ISSPAN_P(t) || t->bt_type == BT_TYPE_BUSY);
1273252330Sjeff	if (vm->vm_releasefn != NULL && t->bt_type == BT_TYPE_SPAN &&
1274252330Sjeff	    t->bt_size == bt->bt_size) {
1275252330Sjeff		vmem_addr_t spanaddr;
1276252330Sjeff		vmem_size_t spansize;
1277252330Sjeff
1278252330Sjeff		MPASS(t->bt_start == bt->bt_start);
1279252330Sjeff		spanaddr = bt->bt_start;
1280252330Sjeff		spansize = bt->bt_size;
1281252330Sjeff		bt_remseg(vm, bt);
1282252330Sjeff		bt_remseg(vm, t);
1283252330Sjeff		vm->vm_size -= spansize;
1284252330Sjeff		VMEM_CONDVAR_BROADCAST(vm);
1285252330Sjeff		bt_freetrim(vm, BT_MAXFREE);
1286252330Sjeff		(*vm->vm_releasefn)(vm->vm_arg, spanaddr, spansize);
1287252330Sjeff	} else {
1288252330Sjeff		bt_insfree(vm, bt);
1289252330Sjeff		VMEM_CONDVAR_BROADCAST(vm);
1290252330Sjeff		bt_freetrim(vm, BT_MAXFREE);
1291252330Sjeff	}
1292252330Sjeff}
1293252330Sjeff
1294252330Sjeff/*
1295252330Sjeff * vmem_add:
1296252330Sjeff *
1297252330Sjeff */
1298252330Sjeffint
1299252330Sjeffvmem_add(vmem_t *vm, vmem_addr_t addr, vmem_size_t size, int flags)
1300252330Sjeff{
1301252330Sjeff	int error;
1302252330Sjeff
1303252330Sjeff	error = 0;
1304252330Sjeff	flags &= VMEM_FLAGS;
1305252330Sjeff	VMEM_LOCK(vm);
1306252330Sjeff	if (vm->vm_nfreetags >= BT_MAXALLOC || bt_fill(vm, flags) == 0)
1307253596Sglebius		vmem_add1(vm, addr, size, BT_TYPE_SPAN_STATIC);
1308252330Sjeff	else
1309252330Sjeff		error = ENOMEM;
1310252330Sjeff	VMEM_UNLOCK(vm);
1311252330Sjeff
1312252330Sjeff	return (error);
1313252330Sjeff}
1314252330Sjeff
1315252330Sjeff/*
1316252330Sjeff * vmem_size: information about arenas size
1317252330Sjeff */
1318252330Sjeffvmem_size_t
1319252330Sjeffvmem_size(vmem_t *vm, int typemask)
1320252330Sjeff{
1321282361Smav	int i;
1322252330Sjeff
1323252330Sjeff	switch (typemask) {
1324252330Sjeff	case VMEM_ALLOC:
1325252330Sjeff		return vm->vm_inuse;
1326252330Sjeff	case VMEM_FREE:
1327252330Sjeff		return vm->vm_size - vm->vm_inuse;
1328252330Sjeff	case VMEM_FREE|VMEM_ALLOC:
1329252330Sjeff		return vm->vm_size;
1330282361Smav	case VMEM_MAXFREE:
1331282361Smav		VMEM_LOCK(vm);
1332282361Smav		for (i = VMEM_MAXORDER - 1; i >= 0; i--) {
1333282361Smav			if (LIST_EMPTY(&vm->vm_freelist[i]))
1334282361Smav				continue;
1335282361Smav			VMEM_UNLOCK(vm);
1336282361Smav			return ((vmem_size_t)ORDER2SIZE(i) <<
1337282361Smav			    vm->vm_quantum_shift);
1338282361Smav		}
1339282361Smav		VMEM_UNLOCK(vm);
1340282361Smav		return (0);
1341252330Sjeff	default:
1342252330Sjeff		panic("vmem_size");
1343252330Sjeff	}
1344252330Sjeff}
1345252330Sjeff
1346252330Sjeff/* ---- debug */
1347252330Sjeff
1348252330Sjeff#if defined(DDB) || defined(DIAGNOSTIC)
1349252330Sjeff
1350252330Sjeffstatic void bt_dump(const bt_t *, int (*)(const char *, ...)
1351252330Sjeff    __printflike(1, 2));
1352252330Sjeff
1353252330Sjeffstatic const char *
1354252330Sjeffbt_type_string(int type)
1355252330Sjeff{
1356252330Sjeff
1357252330Sjeff	switch (type) {
1358252330Sjeff	case BT_TYPE_BUSY:
1359252330Sjeff		return "busy";
1360252330Sjeff	case BT_TYPE_FREE:
1361252330Sjeff		return "free";
1362252330Sjeff	case BT_TYPE_SPAN:
1363252330Sjeff		return "span";
1364252330Sjeff	case BT_TYPE_SPAN_STATIC:
1365252330Sjeff		return "static span";
1366252330Sjeff	default:
1367252330Sjeff		break;
1368252330Sjeff	}
1369252330Sjeff	return "BOGUS";
1370252330Sjeff}
1371252330Sjeff
1372252330Sjeffstatic void
1373252330Sjeffbt_dump(const bt_t *bt, int (*pr)(const char *, ...))
1374252330Sjeff{
1375252330Sjeff
1376252330Sjeff	(*pr)("\t%p: %jx %jx, %d(%s)\n",
1377252330Sjeff	    bt, (intmax_t)bt->bt_start, (intmax_t)bt->bt_size,
1378252330Sjeff	    bt->bt_type, bt_type_string(bt->bt_type));
1379252330Sjeff}
1380252330Sjeff
1381252330Sjeffstatic void
1382252330Sjeffvmem_dump(const vmem_t *vm , int (*pr)(const char *, ...) __printflike(1, 2))
1383252330Sjeff{
1384252330Sjeff	const bt_t *bt;
1385252330Sjeff	int i;
1386252330Sjeff
1387252330Sjeff	(*pr)("vmem %p '%s'\n", vm, vm->vm_name);
1388252330Sjeff	TAILQ_FOREACH(bt, &vm->vm_seglist, bt_seglist) {
1389252330Sjeff		bt_dump(bt, pr);
1390252330Sjeff	}
1391252330Sjeff
1392252330Sjeff	for (i = 0; i < VMEM_MAXORDER; i++) {
1393252330Sjeff		const struct vmem_freelist *fl = &vm->vm_freelist[i];
1394252330Sjeff
1395252330Sjeff		if (LIST_EMPTY(fl)) {
1396252330Sjeff			continue;
1397252330Sjeff		}
1398252330Sjeff
1399252330Sjeff		(*pr)("freelist[%d]\n", i);
1400252330Sjeff		LIST_FOREACH(bt, fl, bt_freelist) {
1401252330Sjeff			bt_dump(bt, pr);
1402252330Sjeff		}
1403252330Sjeff	}
1404252330Sjeff}
1405252330Sjeff
1406252330Sjeff#endif /* defined(DDB) || defined(DIAGNOSTIC) */
1407252330Sjeff
1408252330Sjeff#if defined(DDB)
1409295870Smarius#include <ddb/ddb.h>
1410295870Smarius
1411252330Sjeffstatic bt_t *
1412252330Sjeffvmem_whatis_lookup(vmem_t *vm, vmem_addr_t addr)
1413252330Sjeff{
1414252330Sjeff	bt_t *bt;
1415252330Sjeff
1416252330Sjeff	TAILQ_FOREACH(bt, &vm->vm_seglist, bt_seglist) {
1417252330Sjeff		if (BT_ISSPAN_P(bt)) {
1418252330Sjeff			continue;
1419252330Sjeff		}
1420252330Sjeff		if (bt->bt_start <= addr && addr <= BT_END(bt)) {
1421252330Sjeff			return bt;
1422252330Sjeff		}
1423252330Sjeff	}
1424252330Sjeff
1425252330Sjeff	return NULL;
1426252330Sjeff}
1427252330Sjeff
1428252330Sjeffvoid
1429252330Sjeffvmem_whatis(vmem_addr_t addr, int (*pr)(const char *, ...))
1430252330Sjeff{
1431252330Sjeff	vmem_t *vm;
1432252330Sjeff
1433252330Sjeff	LIST_FOREACH(vm, &vmem_list, vm_alllist) {
1434252330Sjeff		bt_t *bt;
1435252330Sjeff
1436252330Sjeff		bt = vmem_whatis_lookup(vm, addr);
1437252330Sjeff		if (bt == NULL) {
1438252330Sjeff			continue;
1439252330Sjeff		}
1440252330Sjeff		(*pr)("%p is %p+%zu in VMEM '%s' (%s)\n",
1441252330Sjeff		    (void *)addr, (void *)bt->bt_start,
1442252330Sjeff		    (vmem_size_t)(addr - bt->bt_start), vm->vm_name,
1443252330Sjeff		    (bt->bt_type == BT_TYPE_BUSY) ? "allocated" : "free");
1444252330Sjeff	}
1445252330Sjeff}
1446252330Sjeff
1447252330Sjeffvoid
1448252330Sjeffvmem_printall(const char *modif, int (*pr)(const char *, ...))
1449252330Sjeff{
1450252330Sjeff	const vmem_t *vm;
1451252330Sjeff
1452252330Sjeff	LIST_FOREACH(vm, &vmem_list, vm_alllist) {
1453252330Sjeff		vmem_dump(vm, pr);
1454252330Sjeff	}
1455252330Sjeff}
1456252330Sjeff
1457252330Sjeffvoid
1458252330Sjeffvmem_print(vmem_addr_t addr, const char *modif, int (*pr)(const char *, ...))
1459252330Sjeff{
1460252330Sjeff	const vmem_t *vm = (const void *)addr;
1461252330Sjeff
1462252330Sjeff	vmem_dump(vm, pr);
1463252330Sjeff}
1464295870Smarius
1465295870SmariusDB_SHOW_COMMAND(vmemdump, vmemdump)
1466295870Smarius{
1467295870Smarius
1468295870Smarius	if (!have_addr) {
1469295870Smarius		db_printf("usage: show vmemdump <addr>\n");
1470295870Smarius		return;
1471295870Smarius	}
1472295870Smarius
1473295870Smarius	vmem_dump((const vmem_t *)addr, db_printf);
1474295870Smarius}
1475295870Smarius
1476295870SmariusDB_SHOW_ALL_COMMAND(vmemdump, vmemdumpall)
1477295870Smarius{
1478295870Smarius	const vmem_t *vm;
1479295870Smarius
1480295870Smarius	LIST_FOREACH(vm, &vmem_list, vm_alllist)
1481295870Smarius		vmem_dump(vm, db_printf);
1482295870Smarius}
1483295870Smarius
1484295870SmariusDB_SHOW_COMMAND(vmem, vmem_summ)
1485295870Smarius{
1486295870Smarius	const vmem_t *vm = (const void *)addr;
1487295870Smarius	const bt_t *bt;
1488295870Smarius	size_t ft[VMEM_MAXORDER], ut[VMEM_MAXORDER];
1489295870Smarius	size_t fs[VMEM_MAXORDER], us[VMEM_MAXORDER];
1490295870Smarius	int ord;
1491295870Smarius
1492295870Smarius	if (!have_addr) {
1493295870Smarius		db_printf("usage: show vmem <addr>\n");
1494295870Smarius		return;
1495295870Smarius	}
1496295870Smarius
1497295870Smarius	db_printf("vmem %p '%s'\n", vm, vm->vm_name);
1498295870Smarius	db_printf("\tquantum:\t%zu\n", vm->vm_quantum_mask + 1);
1499295870Smarius	db_printf("\tsize:\t%zu\n", vm->vm_size);
1500295870Smarius	db_printf("\tinuse:\t%zu\n", vm->vm_inuse);
1501295870Smarius	db_printf("\tfree:\t%zu\n", vm->vm_size - vm->vm_inuse);
1502295870Smarius	db_printf("\tbusy tags:\t%d\n", vm->vm_nbusytag);
1503295870Smarius	db_printf("\tfree tags:\t%d\n", vm->vm_nfreetags);
1504295870Smarius
1505295870Smarius	memset(&ft, 0, sizeof(ft));
1506295870Smarius	memset(&ut, 0, sizeof(ut));
1507295870Smarius	memset(&fs, 0, sizeof(fs));
1508295870Smarius	memset(&us, 0, sizeof(us));
1509295870Smarius	TAILQ_FOREACH(bt, &vm->vm_seglist, bt_seglist) {
1510295870Smarius		ord = SIZE2ORDER(bt->bt_size >> vm->vm_quantum_shift);
1511295870Smarius		if (bt->bt_type == BT_TYPE_BUSY) {
1512295870Smarius			ut[ord]++;
1513295870Smarius			us[ord] += bt->bt_size;
1514295870Smarius		} else if (bt->bt_type == BT_TYPE_FREE) {
1515295870Smarius			ft[ord]++;
1516295870Smarius			fs[ord] += bt->bt_size;
1517295870Smarius		}
1518295870Smarius	}
1519295870Smarius	db_printf("\t\t\tinuse\tsize\t\tfree\tsize\n");
1520295870Smarius	for (ord = 0; ord < VMEM_MAXORDER; ord++) {
1521295870Smarius		if (ut[ord] == 0 && ft[ord] == 0)
1522295870Smarius			continue;
1523295870Smarius		db_printf("\t%-15zu %zu\t%-15zu %zu\t%-16zu\n",
1524295870Smarius		    ORDER2SIZE(ord) << vm->vm_quantum_shift,
1525295870Smarius		    ut[ord], us[ord], ft[ord], fs[ord]);
1526295870Smarius	}
1527295870Smarius}
1528295870Smarius
1529295870SmariusDB_SHOW_ALL_COMMAND(vmem, vmem_summall)
1530295870Smarius{
1531295870Smarius	const vmem_t *vm;
1532295870Smarius
1533295870Smarius	LIST_FOREACH(vm, &vmem_list, vm_alllist)
1534295870Smarius		vmem_summ((db_expr_t)vm, TRUE, count, modif);
1535295870Smarius}
1536252330Sjeff#endif /* defined(DDB) */
1537252330Sjeff
1538252330Sjeff#define vmem_printf printf
1539252330Sjeff
1540252330Sjeff#if defined(DIAGNOSTIC)
1541252330Sjeff
1542252330Sjeffstatic bool
1543252330Sjeffvmem_check_sanity(vmem_t *vm)
1544252330Sjeff{
1545252330Sjeff	const bt_t *bt, *bt2;
1546252330Sjeff
1547252330Sjeff	MPASS(vm != NULL);
1548252330Sjeff
1549252330Sjeff	TAILQ_FOREACH(bt, &vm->vm_seglist, bt_seglist) {
1550252330Sjeff		if (bt->bt_start > BT_END(bt)) {
1551252330Sjeff			printf("corrupted tag\n");
1552252330Sjeff			bt_dump(bt, vmem_printf);
1553252330Sjeff			return false;
1554252330Sjeff		}
1555252330Sjeff	}
1556252330Sjeff	TAILQ_FOREACH(bt, &vm->vm_seglist, bt_seglist) {
1557252330Sjeff		TAILQ_FOREACH(bt2, &vm->vm_seglist, bt_seglist) {
1558252330Sjeff			if (bt == bt2) {
1559252330Sjeff				continue;
1560252330Sjeff			}
1561252330Sjeff			if (BT_ISSPAN_P(bt) != BT_ISSPAN_P(bt2)) {
1562252330Sjeff				continue;
1563252330Sjeff			}
1564252330Sjeff			if (bt->bt_start <= BT_END(bt2) &&
1565252330Sjeff			    bt2->bt_start <= BT_END(bt)) {
1566252330Sjeff				printf("overwrapped tags\n");
1567252330Sjeff				bt_dump(bt, vmem_printf);
1568252330Sjeff				bt_dump(bt2, vmem_printf);
1569252330Sjeff				return false;
1570252330Sjeff			}
1571252330Sjeff		}
1572252330Sjeff	}
1573252330Sjeff
1574252330Sjeff	return true;
1575252330Sjeff}
1576252330Sjeff
1577252330Sjeffstatic void
1578252330Sjeffvmem_check(vmem_t *vm)
1579252330Sjeff{
1580252330Sjeff
1581252330Sjeff	if (!vmem_check_sanity(vm)) {
1582252330Sjeff		panic("insanity vmem %p", vm);
1583252330Sjeff	}
1584252330Sjeff}
1585252330Sjeff
1586252330Sjeff#endif /* defined(DIAGNOSTIC) */
1587