1/*	$OpenBSD: kern_bufq.c,v 1.35 2022/12/05 23:18:37 deraadt Exp $	*/
2/*
3 * Copyright (c) 2010 Thordur I. Bjornsson <thib@openbsd.org>
4 * Copyright (c) 2010 David Gwynne <dlg@openbsd.org>
5 *
6 * Permission to use, copy, modify, and distribute this software for any
7 * purpose with or without fee is hereby granted, provided that the above
8 * copyright notice and this permission notice appear in all copies.
9 *
10 * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
11 * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
12 * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
13 * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
14 * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
15 * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
16 * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
17 */
18
19#include <sys/param.h>
20#include <sys/systm.h>
21#include <sys/malloc.h>
22#include <sys/mount.h>
23#include <sys/mutex.h>
24#include <sys/buf.h>
25#include <sys/errno.h>
26#include <sys/queue.h>
27
28SLIST_HEAD(, bufq)	bufqs = SLIST_HEAD_INITIALIZER(bufqs);
29struct mutex		bufqs_mtx = MUTEX_INITIALIZER(IPL_NONE);
30int			bufqs_stop;
31
32struct bufq_impl {
33	void		*(*impl_create)(void);
34	void		 (*impl_destroy)(void *);
35
36	void		 (*impl_queue)(void *, struct buf *);
37	struct buf	*(*impl_dequeue)(void *);
38	void		 (*impl_requeue)(void *, struct buf *);
39	int		 (*impl_peek)(void *);
40};
41
42void		*bufq_fifo_create(void);
43void		 bufq_fifo_destroy(void *);
44void		 bufq_fifo_queue(void *, struct buf *);
45struct buf	*bufq_fifo_dequeue(void *);
46void		 bufq_fifo_requeue(void *, struct buf *);
47int		 bufq_fifo_peek(void *);
48
49void		*bufq_nscan_create(void);
50void		 bufq_nscan_destroy(void *);
51void		 bufq_nscan_queue(void *, struct buf *);
52struct buf	*bufq_nscan_dequeue(void *);
53void		 bufq_nscan_requeue(void *, struct buf *);
54int		 bufq_nscan_peek(void *);
55
56const struct bufq_impl bufq_impls[BUFQ_HOWMANY] = {
57	{
58		bufq_fifo_create,
59		bufq_fifo_destroy,
60		bufq_fifo_queue,
61		bufq_fifo_dequeue,
62		bufq_fifo_requeue,
63		bufq_fifo_peek
64	},
65	{
66		bufq_nscan_create,
67		bufq_nscan_destroy,
68		bufq_nscan_queue,
69		bufq_nscan_dequeue,
70		bufq_nscan_requeue,
71		bufq_nscan_peek
72	}
73};
74
75int
76bufq_init(struct bufq *bq, int type)
77{
78	u_int hi = BUFQ_HI, low = BUFQ_LOW;
79
80	if (type >= BUFQ_HOWMANY)
81		panic("bufq_init: type %i unknown", type);
82
83	/*
84	 * Ensure that writes can't consume the entire amount of kva
85	 * available the buffer cache if we only have a limited amount
86	 * of kva available to us.
87	 */
88	if (hi >= (bcstats.kvaslots / 16)) {
89		hi = bcstats.kvaslots / 16;
90		if (hi < 2)
91			hi = 2;
92		low = hi / 2;
93	}
94
95	mtx_init(&bq->bufq_mtx, IPL_BIO);
96	bq->bufq_hi = hi;
97	bq->bufq_low = low;
98	bq->bufq_type = type;
99	bq->bufq_impl = &bufq_impls[type];
100	bq->bufq_data = bq->bufq_impl->impl_create();
101	if (bq->bufq_data == NULL) {
102		/*
103		 * we should actually return failure so disks attaching after
104		 * boot in low memory situations dont panic the system.
105		 */
106		panic("bufq init fail");
107	}
108
109	mtx_enter(&bufqs_mtx);
110	while (bufqs_stop) {
111		msleep_nsec(&bufqs_stop, &bufqs_mtx, PRIBIO, "bqinit", INFSLP);
112	}
113	SLIST_INSERT_HEAD(&bufqs, bq, bufq_entries);
114	mtx_leave(&bufqs_mtx);
115
116	return (0);
117}
118
119int
120bufq_switch(struct bufq *bq, int type)
121{
122	void		*data;
123	void		*odata;
124	int		otype;
125	struct buf	*bp;
126	int		ret;
127
128	mtx_enter(&bq->bufq_mtx);
129	ret = (bq->bufq_type == type);
130	mtx_leave(&bq->bufq_mtx);
131	if (ret)
132		return (0);
133
134	data = bufq_impls[type].impl_create();
135	if (data == NULL)
136		return (ENOMEM);
137
138	mtx_enter(&bq->bufq_mtx);
139	if (bq->bufq_type != type) { /* might have changed during create */
140		odata = bq->bufq_data;
141		otype = bq->bufq_type;
142
143		while ((bp = bufq_impls[otype].impl_dequeue(odata)) != NULL)
144			bufq_impls[type].impl_queue(data, bp);
145
146		bq->bufq_data = data;
147		bq->bufq_type = type;
148		bq->bufq_impl = &bufq_impls[type];
149	} else {
150		otype = type;
151		odata = data;
152	}
153	mtx_leave(&bq->bufq_mtx);
154
155	bufq_impls[otype].impl_destroy(odata);
156
157	return (0);
158}
159
160void
161bufq_destroy(struct bufq *bq)
162{
163	bufq_drain(bq);
164
165	bq->bufq_impl->impl_destroy(bq->bufq_data);
166	bq->bufq_data = NULL;
167
168	mtx_enter(&bufqs_mtx);
169	while (bufqs_stop) {
170		msleep_nsec(&bufqs_stop, &bufqs_mtx, PRIBIO, "bqdest", INFSLP);
171	}
172	SLIST_REMOVE(&bufqs, bq, bufq, bufq_entries);
173	mtx_leave(&bufqs_mtx);
174}
175
176
177void
178bufq_queue(struct bufq *bq, struct buf *bp)
179{
180	mtx_enter(&bq->bufq_mtx);
181	while (bq->bufq_stop) {
182		msleep_nsec(&bq->bufq_stop, &bq->bufq_mtx, PRIBIO, "bqqueue",
183		    INFSLP);
184	}
185
186	bp->b_bq = bq;
187	bq->bufq_outstanding++;
188	bq->bufq_impl->impl_queue(bq->bufq_data, bp);
189	mtx_leave(&bq->bufq_mtx);
190}
191
192struct buf *
193bufq_dequeue(struct bufq *bq)
194{
195	struct buf	*bp;
196
197	mtx_enter(&bq->bufq_mtx);
198	bp = bq->bufq_impl->impl_dequeue(bq->bufq_data);
199	mtx_leave(&bq->bufq_mtx);
200
201	return (bp);
202}
203
204void
205bufq_requeue(struct bufq *bq, struct buf *bp)
206{
207	mtx_enter(&bq->bufq_mtx);
208	bq->bufq_impl->impl_requeue(bq->bufq_data, bp);
209	mtx_leave(&bq->bufq_mtx);
210}
211
212int
213bufq_peek(struct bufq *bq)
214{
215	int		rv;
216
217	mtx_enter(&bq->bufq_mtx);
218	rv = bq->bufq_impl->impl_peek(bq->bufq_data);
219	mtx_leave(&bq->bufq_mtx);
220
221	return (rv);
222}
223
224void
225bufq_drain(struct bufq *bq)
226{
227	struct buf	*bp;
228	int		 s;
229
230	while ((bp = bufq_dequeue(bq)) != NULL) {
231		bp->b_error = ENXIO;
232		bp->b_flags |= B_ERROR;
233		s = splbio();
234		biodone(bp);
235		splx(s);
236	}
237}
238
239void
240bufq_wait(struct bufq *bq)
241{
242	if (bq->bufq_hi) {
243		assertwaitok();
244		mtx_enter(&bq->bufq_mtx);
245		while (bq->bufq_outstanding >= bq->bufq_hi) {
246			bq->bufq_waiting++;
247			msleep_nsec(&bq->bufq_waiting, &bq->bufq_mtx,
248			    PRIBIO, "bqwait", INFSLP);
249			bq->bufq_waiting--;
250		}
251		mtx_leave(&bq->bufq_mtx);
252	}
253}
254
255void
256bufq_done(struct bufq *bq, struct buf *bp)
257{
258	mtx_enter(&bq->bufq_mtx);
259	KASSERT(bq->bufq_outstanding > 0);
260	bq->bufq_outstanding--;
261	if (bq->bufq_stop && bq->bufq_outstanding == 0)
262		wakeup(&bq->bufq_outstanding);
263	if (bq->bufq_waiting && bq->bufq_outstanding < bq->bufq_low)
264		wakeup(&bq->bufq_waiting);
265	mtx_leave(&bq->bufq_mtx);
266	bp->b_bq = NULL;
267}
268
269void
270bufq_quiesce(void)
271{
272	struct bufq		*bq;
273
274	mtx_enter(&bufqs_mtx);
275	bufqs_stop = 1;
276	mtx_leave(&bufqs_mtx);
277	/*
278	 * We can safely walk the list since it can't be modified as
279	 * long as bufqs_stop is non-zero.
280	 */
281	SLIST_FOREACH(bq, &bufqs, bufq_entries) {
282		mtx_enter(&bq->bufq_mtx);
283		bq->bufq_stop = 1;
284		while (bq->bufq_outstanding) {
285			msleep_nsec(&bq->bufq_outstanding, &bq->bufq_mtx,
286			    PRIBIO, "bqquies", INFSLP);
287		}
288		mtx_leave(&bq->bufq_mtx);
289	}
290}
291
292void
293bufq_restart(void)
294{
295	struct bufq		*bq;
296
297	mtx_enter(&bufqs_mtx);
298	SLIST_FOREACH(bq, &bufqs, bufq_entries) {
299		mtx_enter(&bq->bufq_mtx);
300		bq->bufq_stop = 0;
301		wakeup(&bq->bufq_stop);
302		mtx_leave(&bq->bufq_mtx);
303	}
304	bufqs_stop = 0;
305	wakeup(&bufqs_stop);
306	mtx_leave(&bufqs_mtx);
307}
308
309
310/*
311 * fifo implementation
312 */
313
314void *
315bufq_fifo_create(void)
316{
317	struct bufq_fifo_head	*head;
318
319	head = malloc(sizeof(*head), M_DEVBUF, M_NOWAIT | M_ZERO);
320	if (head == NULL)
321		return (NULL);
322
323	SIMPLEQ_INIT(head);
324
325	return (head);
326}
327
328void
329bufq_fifo_destroy(void *data)
330{
331	struct bufq_fifo_head	*head = data;
332
333	free(head, M_DEVBUF, sizeof(*head));
334}
335
336void
337bufq_fifo_queue(void *data, struct buf *bp)
338{
339	struct bufq_fifo_head	*head = data;
340
341	SIMPLEQ_INSERT_TAIL(head, bp, b_bufq.bufq_data_fifo.bqf_entries);
342}
343
344struct buf *
345bufq_fifo_dequeue(void *data)
346{
347	struct bufq_fifo_head	*head = data;
348	struct buf		*bp;
349
350	bp = SIMPLEQ_FIRST(head);
351	if (bp != NULL)
352		SIMPLEQ_REMOVE_HEAD(head, b_bufq.bufq_data_fifo.bqf_entries);
353
354	return (bp);
355}
356
357void
358bufq_fifo_requeue(void *data, struct buf *bp)
359{
360	struct bufq_fifo_head	*head = data;
361
362	SIMPLEQ_INSERT_HEAD(head, bp, b_bufq.bufq_data_fifo.bqf_entries);
363}
364
365int
366bufq_fifo_peek(void *data)
367{
368	struct bufq_fifo_head	*head = data;
369
370	return (SIMPLEQ_FIRST(head) != NULL);
371}
372
373/*
374 * nscan implementation
375 */
376
377#define BUF_INORDER(ba, bb) ((ba)->b_blkno < (bb)->b_blkno)
378
379#define dsentries b_bufq.bufq_data_nscan.bqf_entries
380
381struct bufq_nscan_data {
382	struct bufq_nscan_head sorted;
383	struct bufq_nscan_head fifo;
384	int leftoverroom; /* Remaining number of buffer inserts allowed  */
385};
386
387void bufq_nscan_resort(struct bufq_nscan_data *data);
388void bufq_simple_nscan(struct bufq_nscan_head *, struct buf *);
389
390void
391bufq_simple_nscan(struct bufq_nscan_head *head, struct buf *bp)
392{
393	struct buf *cur, *prev;
394
395	prev = NULL;
396	/*
397	 * We look for the first slot where we would fit, then insert
398	 * after the element we just passed.
399	 */
400	SIMPLEQ_FOREACH(cur, head, dsentries) {
401		if (BUF_INORDER(bp, cur))
402			break;
403		prev = cur;
404	}
405	if (prev)
406		SIMPLEQ_INSERT_AFTER(head, prev, bp, dsentries);
407	else
408		SIMPLEQ_INSERT_HEAD(head, bp, dsentries);
409
410}
411
412/*
413 * Take N elements from the fifo queue and sort them
414 */
415void
416bufq_nscan_resort(struct bufq_nscan_data *data)
417{
418	struct bufq_nscan_head *fifo = &data->fifo;
419	struct bufq_nscan_head *sorted = &data->sorted;
420	int count, segmentsize = BUFQ_NSCAN_N;
421	struct buf *bp;
422
423	for (count = 0; count < segmentsize; count++) {
424		bp = SIMPLEQ_FIRST(fifo);
425		if (!bp)
426			break;
427		SIMPLEQ_REMOVE_HEAD(fifo, dsentries);
428		bufq_simple_nscan(sorted, bp);
429	}
430	data->leftoverroom = segmentsize - count;
431}
432
433void *
434bufq_nscan_create(void)
435{
436	struct bufq_nscan_data *data;
437
438	data = malloc(sizeof(*data), M_DEVBUF, M_NOWAIT | M_ZERO);
439	if (!data)
440		return NULL;
441	SIMPLEQ_INIT(&data->sorted);
442	SIMPLEQ_INIT(&data->fifo);
443
444	return data;
445}
446
447void
448bufq_nscan_destroy(void *vdata)
449{
450	struct bufq_nscan_data *data = vdata;
451
452	free(data, M_DEVBUF, sizeof(*data));
453}
454
455void
456bufq_nscan_queue(void *vdata, struct buf *bp)
457{
458	struct bufq_nscan_data *data = vdata;
459
460	/*
461	 * If the previous sorted segment was small, we will continue
462	 * packing in bufs as long as they're in order.
463	 */
464	if (data->leftoverroom) {
465		struct buf *next = SIMPLEQ_FIRST(&data->sorted);
466		if (next && BUF_INORDER(next, bp)) {
467			bufq_simple_nscan(&data->sorted, bp);
468			data->leftoverroom--;
469			return;
470		}
471	}
472
473	SIMPLEQ_INSERT_TAIL(&data->fifo, bp, dsentries);
474
475}
476
477struct buf *
478bufq_nscan_dequeue(void *vdata)
479{
480	struct bufq_nscan_data *data = vdata;
481	struct bufq_nscan_head *sorted = &data->sorted;
482	struct buf	*bp;
483
484	if (SIMPLEQ_FIRST(sorted) == NULL)
485		bufq_nscan_resort(data);
486
487	bp = SIMPLEQ_FIRST(sorted);
488	if (bp != NULL)
489		SIMPLEQ_REMOVE_HEAD(sorted, dsentries);
490
491	return (bp);
492}
493
494void
495bufq_nscan_requeue(void *vdata, struct buf *bp)
496{
497	struct bufq_nscan_data *data = vdata;
498
499	SIMPLEQ_INSERT_HEAD(&data->fifo, bp, dsentries);
500}
501
502int
503bufq_nscan_peek(void *vdata)
504{
505	struct bufq_nscan_data *data = vdata;
506
507	return (SIMPLEQ_FIRST(&data->sorted) != NULL) ||
508	    (SIMPLEQ_FIRST(&data->fifo) != NULL);
509}
510