1/*	$OpenBSD: vfs_biomem.c,v 1.51 2021/10/24 00:02:25 jsg Exp $ */
2
3/*
4 * Copyright (c) 2007 Artur Grabowski <art@openbsd.org>
5 * Copyright (c) 2012-2016,2019 Bob Beck <beck@openbsd.org>
6 *
7 * Permission to use, copy, modify, and distribute this software for any
8 * purpose with or without fee is hereby granted, provided that the above
9 * copyright notice and this permission notice appear in all copies.
10 *
11 * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
12 * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
13 * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
14 * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
15 * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
16 * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
17 * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
18 */
19
20
21#include <sys/param.h>
22#include <sys/systm.h>
23#include <sys/buf.h>
24#include <sys/pool.h>
25#include <sys/proc.h>		/* XXX for atomic */
26#include <sys/mount.h>
27
28#include <uvm/uvm_extern.h>
29
30vaddr_t buf_kva_start, buf_kva_end;
31int buf_needva;
32TAILQ_HEAD(,buf) buf_valist;
33
34extern struct bcachestats bcstats;
35
36vaddr_t buf_unmap(struct buf *);
37
38void
39buf_mem_init(vsize_t size)
40{
41	TAILQ_INIT(&buf_valist);
42
43	buf_kva_start = vm_map_min(kernel_map);
44	if (uvm_map(kernel_map, &buf_kva_start, size, NULL,
45	    UVM_UNKNOWN_OFFSET, PAGE_SIZE, UVM_MAPFLAG(PROT_NONE,
46	    PROT_NONE, MAP_INHERIT_NONE, MADV_NORMAL, 0)))
47		panic("%s: can't reserve VM for buffers", __func__);
48	buf_kva_end = buf_kva_start + size;
49
50	/* Contiguous mapping */
51	bcstats.kvaslots = bcstats.kvaslots_avail = size / MAXPHYS;
52}
53
54/*
55 * buf_acquire and buf_release manage the kvm mappings of buffers.
56 */
57void
58buf_acquire(struct buf *bp)
59{
60	KASSERT((bp->b_flags & B_BUSY) == 0);
61	splassert(IPL_BIO);
62	/*
63	 * Busy before waiting for kvm.
64	 */
65	SET(bp->b_flags, B_BUSY);
66	buf_map(bp);
67}
68
69/*
70 * Acquire a buf but do not map it. Preserve any mapping it did have.
71 */
72void
73buf_acquire_nomap(struct buf *bp)
74{
75	splassert(IPL_BIO);
76	SET(bp->b_flags, B_BUSY);
77	if (bp->b_data != NULL) {
78		TAILQ_REMOVE(&buf_valist, bp, b_valist);
79		bcstats.kvaslots_avail--;
80		bcstats.busymapped++;
81	}
82}
83
84void
85buf_map(struct buf *bp)
86{
87	vaddr_t va;
88
89	splassert(IPL_BIO);
90
91	if (bp->b_data == NULL) {
92		unsigned long i;
93
94		/*
95		 * First, just use the pre-allocated space until we run out.
96		 */
97		if (buf_kva_start < buf_kva_end) {
98			va = buf_kva_start;
99			buf_kva_start += MAXPHYS;
100			bcstats.kvaslots_avail--;
101		} else {
102			struct buf *vbp;
103
104			/*
105			 * Find some buffer we can steal the space from.
106			 */
107			vbp = TAILQ_FIRST(&buf_valist);
108			while ((curproc != syncerproc &&
109			   curproc != cleanerproc &&
110			   bcstats.kvaslots_avail <= RESERVE_SLOTS) ||
111			   vbp == NULL) {
112				buf_needva++;
113				tsleep_nsec(&buf_needva, PRIBIO, "buf_needva",
114				    INFSLP);
115				vbp = TAILQ_FIRST(&buf_valist);
116			}
117			va = buf_unmap(vbp);
118		}
119
120		for (i = 0; i < atop(bp->b_bufsize); i++) {
121			struct vm_page *pg = uvm_pagelookup(bp->b_pobj,
122			    bp->b_poffs + ptoa(i));
123
124			KASSERT(pg != NULL);
125
126			pmap_kenter_pa(va + ptoa(i), VM_PAGE_TO_PHYS(pg),
127			    PROT_READ | PROT_WRITE);
128		}
129		pmap_update(pmap_kernel());
130		bp->b_data = (caddr_t)va;
131	} else {
132		TAILQ_REMOVE(&buf_valist, bp, b_valist);
133		bcstats.kvaslots_avail--;
134	}
135
136	bcstats.busymapped++;
137}
138
139void
140buf_release(struct buf *bp)
141{
142
143	KASSERT(bp->b_flags & B_BUSY);
144	splassert(IPL_BIO);
145
146	if (bp->b_data) {
147		bcstats.busymapped--;
148		TAILQ_INSERT_TAIL(&buf_valist, bp, b_valist);
149		bcstats.kvaslots_avail++;
150		if (buf_needva) {
151			buf_needva=0;
152			wakeup(&buf_needva);
153		}
154	}
155	CLR(bp->b_flags, B_BUSY);
156}
157
158/*
159 * Deallocate all memory resources for this buffer. We need to be careful
160 * to not drop kvm since we have no way to reclaim it. So, if the buffer
161 * has kvm, we need to free it later. We put it on the front of the
162 * freelist just so it gets picked up faster.
163 *
164 * Also, lots of assertions count on bp->b_data being NULL, so we
165 * set it temporarily to NULL.
166 *
167 * Return non-zero if we take care of the freeing later.
168 */
169int
170buf_dealloc_mem(struct buf *bp)
171{
172	caddr_t data;
173
174	splassert(IPL_BIO);
175
176	data = bp->b_data;
177	bp->b_data = NULL;
178
179	if (data) {
180		if (bp->b_flags & B_BUSY)
181			bcstats.busymapped--;
182		pmap_kremove((vaddr_t)data, bp->b_bufsize);
183		pmap_update(pmap_kernel());
184	}
185
186	if (bp->b_pobj)
187		buf_free_pages(bp);
188
189	if (data == NULL)
190		return (0);
191
192	bp->b_data = data;
193	if (!(bp->b_flags & B_BUSY)) {		/* XXX - need better test */
194		TAILQ_REMOVE(&buf_valist, bp, b_valist);
195		bcstats.kvaslots_avail--;
196	} else {
197		CLR(bp->b_flags, B_BUSY);
198		if (buf_needva) {
199			buf_needva = 0;
200			wakeup(&buf_needva);
201		}
202	}
203	SET(bp->b_flags, B_RELEASED);
204	TAILQ_INSERT_HEAD(&buf_valist, bp, b_valist);
205	bcstats.kvaslots_avail++;
206
207	return (1);
208}
209
210/*
211 * Only used by bread_cluster.
212 */
213void
214buf_fix_mapping(struct buf *bp, vsize_t newsize)
215{
216	vaddr_t va = (vaddr_t)bp->b_data;
217
218	if (newsize < bp->b_bufsize) {
219		pmap_kremove(va + newsize, bp->b_bufsize - newsize);
220		pmap_update(pmap_kernel());
221		/*
222		 * Note: the size we lost is actually with the other
223		 * buffers read in by bread_cluster
224		 */
225		bp->b_bufsize = newsize;
226	}
227}
228
229vaddr_t
230buf_unmap(struct buf *bp)
231{
232	vaddr_t va;
233
234	KASSERT((bp->b_flags & B_BUSY) == 0);
235	KASSERT(bp->b_data != NULL);
236	splassert(IPL_BIO);
237
238	TAILQ_REMOVE(&buf_valist, bp, b_valist);
239	bcstats.kvaslots_avail--;
240	va = (vaddr_t)bp->b_data;
241	bp->b_data = NULL;
242	pmap_kremove(va, bp->b_bufsize);
243	pmap_update(pmap_kernel());
244
245	if (bp->b_flags & B_RELEASED)
246		pool_put(&bufpool, bp);
247
248	return (va);
249}
250
251/* Always allocates in dma-reachable memory */
252void
253buf_alloc_pages(struct buf *bp, vsize_t size)
254{
255	int i;
256
257	KASSERT(size == round_page(size));
258	KASSERT(bp->b_pobj == NULL);
259	KASSERT(bp->b_data == NULL);
260	splassert(IPL_BIO);
261
262	uvm_obj_init(&bp->b_uobj, &bufcache_pager, 1);
263
264	/*
265	 * Attempt to allocate with NOWAIT. if we can't, then throw
266	 * away some clean pages and try again. Finally, if that
267	 * fails, do a WAITOK allocation so the page daemon can find
268	 * memory for us.
269	 */
270	do {
271		i = uvm_pagealloc_multi(&bp->b_uobj, 0, size,
272		    UVM_PLA_NOWAIT | UVM_PLA_NOWAKE);
273		if (i == 0)
274			break;
275	} while	(bufbackoff(&dma_constraint, size) == 0);
276	if (i != 0)
277		i = uvm_pagealloc_multi(&bp->b_uobj, 0, size,
278		    UVM_PLA_WAITOK);
279	/* should not happen */
280	if (i != 0)
281		panic("uvm_pagealloc_multi unable to allocate an buf_object "
282		    "of size %lu", size);
283
284	bcstats.numbufpages += atop(size);
285	bcstats.dmapages += atop(size);
286	SET(bp->b_flags, B_DMA);
287	bp->b_pobj = &bp->b_uobj;
288	bp->b_poffs = 0;
289	bp->b_bufsize = size;
290}
291
292void
293buf_free_pages(struct buf *bp)
294{
295	struct uvm_object *uobj = bp->b_pobj;
296	struct vm_page *pg;
297	voff_t off, i;
298
299	KASSERT(bp->b_data == NULL);
300	KASSERT(uobj != NULL);
301	splassert(IPL_BIO);
302
303	off = bp->b_poffs;
304	bp->b_pobj = NULL;
305	bp->b_poffs = 0;
306
307	for (i = 0; i < atop(bp->b_bufsize); i++) {
308		pg = uvm_pagelookup(uobj, off + ptoa(i));
309		KASSERT(pg != NULL);
310		KASSERT(pg->wire_count == 1);
311		pg->wire_count = 0;
312		bcstats.numbufpages--;
313		if (ISSET(bp->b_flags, B_DMA))
314			bcstats.dmapages--;
315	}
316	CLR(bp->b_flags, B_DMA);
317
318	/* XXX refactor to do this without splbio later */
319	uvm_obj_free(uobj);
320}
321
322/* Reallocate a buf into a particular pmem range specified by "where". */
323int
324buf_realloc_pages(struct buf *bp, struct uvm_constraint_range *where,
325    int flags)
326{
327	vaddr_t va;
328	int dma;
329  	int i, r;
330	KASSERT(!(flags & UVM_PLA_WAITOK) ^ !(flags & UVM_PLA_NOWAIT));
331
332	splassert(IPL_BIO);
333	KASSERT(ISSET(bp->b_flags, B_BUSY));
334	dma = ISSET(bp->b_flags, B_DMA);
335
336	/* if the original buf is mapped, unmap it */
337	if (bp->b_data != NULL) {
338		va = (vaddr_t)bp->b_data;
339		pmap_kremove(va, bp->b_bufsize);
340		pmap_update(pmap_kernel());
341	}
342
343	do {
344		r = uvm_pagerealloc_multi(bp->b_pobj, bp->b_poffs,
345		    bp->b_bufsize, UVM_PLA_NOWAIT | UVM_PLA_NOWAKE, where);
346		if (r == 0)
347			break;
348	} while	((bufbackoff(where, atop(bp->b_bufsize)) == 0));
349
350	/*
351	 * bufbackoff() failed, so there's no more we can do without
352	 * waiting.  If allowed do, make that attempt.
353	 */
354	if (r != 0 && (flags & UVM_PLA_WAITOK))
355		r = uvm_pagerealloc_multi(bp->b_pobj, bp->b_poffs,
356		    bp->b_bufsize, flags, where);
357
358	/*
359	 * If the allocation has succeeded, we may be somewhere different.
360	 * If the allocation has failed, we are in the same place.
361	 *
362	 * We still have to re-map the buffer before returning.
363	 */
364
365	/* take it out of dma stats until we know where we are */
366	if (dma)
367		bcstats.dmapages -= atop(bp->b_bufsize);
368
369	dma = 1;
370	/* if the original buf was mapped, re-map it */
371	for (i = 0; i < atop(bp->b_bufsize); i++) {
372		struct vm_page *pg = uvm_pagelookup(bp->b_pobj,
373		    bp->b_poffs + ptoa(i));
374		KASSERT(pg != NULL);
375		if  (!PADDR_IS_DMA_REACHABLE(VM_PAGE_TO_PHYS(pg)))
376			dma = 0;
377		if (bp->b_data != NULL) {
378			pmap_kenter_pa(va + ptoa(i), VM_PAGE_TO_PHYS(pg),
379			    PROT_READ|PROT_WRITE);
380			pmap_update(pmap_kernel());
381		}
382	}
383	if (dma) {
384		SET(bp->b_flags, B_DMA);
385		bcstats.dmapages += atop(bp->b_bufsize);
386	} else
387		CLR(bp->b_flags, B_DMA);
388	return(r);
389}
390