1/*	$NetBSD: bufq_priocscan.c,v 1.15 2011/11/02 15:14:49 yamt Exp $	*/
2
3/*-
4 * Copyright (c)2004,2005,2006,2008,2009 YAMAMOTO Takashi,
5 * All rights reserved.
6 *
7 * Redistribution and use in source and binary forms, with or without
8 * modification, are permitted provided that the following conditions
9 * are met:
10 * 1. Redistributions of source code must retain the above copyright
11 *    notice, this list of conditions and the following disclaimer.
12 * 2. Redistributions in binary form must reproduce the above copyright
13 *    notice, this list of conditions and the following disclaimer in the
14 *    documentation and/or other materials provided with the distribution.
15 *
16 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
17 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
18 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
19 * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
20 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
21 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
22 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
23 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
24 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
25 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
26 * SUCH DAMAGE.
27 */
28
29#include <sys/cdefs.h>
30__KERNEL_RCSID(0, "$NetBSD: bufq_priocscan.c,v 1.15 2011/11/02 15:14:49 yamt Exp $");
31
32#include <sys/param.h>
33#include <sys/systm.h>
34#include <sys/buf.h>
35#include <sys/bufq.h>
36#include <sys/bufq_impl.h>
37#include <sys/kmem.h>
38
39/*
40 * Cyclical scan (CSCAN)
41 */
42TAILQ_HEAD(bqhead, buf);
43struct cscan_queue {
44	struct bqhead cq_head[2];	/* actual lists of buffers */
45	unsigned int cq_idx;		/* current list index */
46	int cq_lastcylinder;		/* b_cylinder of the last request */
47	daddr_t cq_lastrawblkno;	/* b_rawblkno of the last request */
48};
49
50static inline int cscan_empty(const struct cscan_queue *);
51static void cscan_put(struct cscan_queue *, struct buf *, int);
52static struct buf *cscan_get(struct cscan_queue *, int);
53static void cscan_init(struct cscan_queue *);
54
55static inline int
56cscan_empty(const struct cscan_queue *q)
57{
58
59	return TAILQ_EMPTY(&q->cq_head[0]) && TAILQ_EMPTY(&q->cq_head[1]);
60}
61
62static void
63cscan_put(struct cscan_queue *q, struct buf *bp, int sortby)
64{
65	struct buf tmp;
66	struct buf *it;
67	struct bqhead *bqh;
68	unsigned int idx;
69
70	tmp.b_cylinder = q->cq_lastcylinder;
71	tmp.b_rawblkno = q->cq_lastrawblkno;
72
73	if (buf_inorder(bp, &tmp, sortby))
74		idx = 1 - q->cq_idx;
75	else
76		idx = q->cq_idx;
77
78	bqh = &q->cq_head[idx];
79
80	TAILQ_FOREACH(it, bqh, b_actq)
81		if (buf_inorder(bp, it, sortby))
82			break;
83
84	if (it != NULL)
85		TAILQ_INSERT_BEFORE(it, bp, b_actq);
86	else
87		TAILQ_INSERT_TAIL(bqh, bp, b_actq);
88}
89
90static struct buf *
91cscan_get(struct cscan_queue *q, int remove)
92{
93	unsigned int idx = q->cq_idx;
94	struct bqhead *bqh;
95	struct buf *bp;
96
97	bqh = &q->cq_head[idx];
98	bp = TAILQ_FIRST(bqh);
99
100	if (bp == NULL) {
101		/* switch queue */
102		idx = 1 - idx;
103		bqh = &q->cq_head[idx];
104		bp = TAILQ_FIRST(bqh);
105	}
106
107	KDASSERT((bp != NULL && !cscan_empty(q)) ||
108	         (bp == NULL && cscan_empty(q)));
109
110	if (bp != NULL && remove) {
111		q->cq_idx = idx;
112		TAILQ_REMOVE(bqh, bp, b_actq);
113
114		q->cq_lastcylinder = bp->b_cylinder;
115		q->cq_lastrawblkno =
116		    bp->b_rawblkno + (bp->b_bcount >> DEV_BSHIFT);
117	}
118
119	return (bp);
120}
121
122static void
123cscan_init(struct cscan_queue *q)
124{
125
126	TAILQ_INIT(&q->cq_head[0]);
127	TAILQ_INIT(&q->cq_head[1]);
128}
129
130
131/*
132 * Per-prioritiy CSCAN.
133 *
134 * XXX probably we should have a way to raise
135 * priority of the on-queue requests.
136 */
137#define	PRIOCSCAN_NQUEUE	3
138
139struct priocscan_queue {
140	struct cscan_queue q_queue;
141	unsigned int q_burst;
142};
143
144struct bufq_priocscan {
145	struct priocscan_queue bq_queue[PRIOCSCAN_NQUEUE];
146
147#if 0
148	/*
149	 * XXX using "global" head position can reduce positioning time
150	 * when switching between queues.
151	 * although it might affect against fairness.
152	 */
153	daddr_t bq_lastrawblkno;
154	int bq_lastcylinder;
155#endif
156};
157
158/*
159 * how many requests to serve when having pending requests on other queues.
160 *
161 * XXX tune
162 * be careful: while making these values larger likely
163 * increases the total throughput, it can also increase latencies
164 * for some workloads.
165 */
166const int priocscan_burst[] = {
167	64, 16, 4
168};
169
170static void bufq_priocscan_init(struct bufq_state *);
171static void bufq_priocscan_put(struct bufq_state *, struct buf *);
172static struct buf *bufq_priocscan_get(struct bufq_state *, int);
173
174BUFQ_DEFINE(priocscan, 40, bufq_priocscan_init);
175
176static inline struct cscan_queue *bufq_priocscan_selectqueue(
177    struct bufq_priocscan *, const struct buf *);
178
179static inline struct cscan_queue *
180bufq_priocscan_selectqueue(struct bufq_priocscan *q, const struct buf *bp)
181{
182	static const int priocscan_priomap[] = {
183		[BPRIO_TIMENONCRITICAL] = 2,
184		[BPRIO_TIMELIMITED] = 1,
185		[BPRIO_TIMECRITICAL] = 0
186	};
187
188	return &q->bq_queue[priocscan_priomap[BIO_GETPRIO(bp)]].q_queue;
189}
190
191static void
192bufq_priocscan_put(struct bufq_state *bufq, struct buf *bp)
193{
194	struct bufq_priocscan *q = bufq->bq_private;
195	struct cscan_queue *cq;
196	const int sortby = bufq->bq_flags & BUFQ_SORT_MASK;
197
198	cq = bufq_priocscan_selectqueue(q, bp);
199	cscan_put(cq, bp, sortby);
200}
201
202static struct buf *
203bufq_priocscan_get(struct bufq_state *bufq, int remove)
204{
205	struct bufq_priocscan *q = bufq->bq_private;
206	struct priocscan_queue *pq, *npq;
207	struct priocscan_queue *first; /* highest priority non-empty queue */
208	const struct priocscan_queue *epq;
209	struct buf *bp;
210	bool single; /* true if there's only one non-empty queue */
211
212	/*
213	 * find the highest priority non-empty queue.
214	 */
215	pq = &q->bq_queue[0];
216	epq = pq + PRIOCSCAN_NQUEUE;
217	for (; pq < epq; pq++) {
218		if (!cscan_empty(&pq->q_queue)) {
219			break;
220		}
221	}
222	if (pq == epq) {
223		/*
224		 * all our queues are empty.  there's nothing to serve.
225		 */
226		return NULL;
227	}
228	first = pq;
229
230	/*
231	 * scan the rest of queues.
232	 *
233	 * if we have two or more non-empty queues, we serve the highest
234	 * priority one with non-zero burst count.
235	 */
236	single = true;
237	for (npq = pq + 1; npq < epq; npq++) {
238		if (!cscan_empty(&npq->q_queue)) {
239			/*
240			 * we found another non-empty queue.
241			 * it means that a queue needs to consume its burst
242			 * count to be served.
243			 */
244			single = false;
245
246			/*
247			 * check if our current candidate queue has already
248			 * exhausted its burst count.
249			 */
250			if (pq->q_burst > 0) {
251				break;
252			}
253			pq = npq;
254		}
255	}
256	if (single) {
257		/*
258		 * there's only a non-empty queue.
259		 * just serve it without consuming its burst count.
260		 */
261		KASSERT(pq == first);
262	} else {
263		/*
264		 * there are two or more non-empty queues.
265		 */
266		if (pq->q_burst == 0) {
267			/*
268			 * no queues can be served because they have already
269			 * exhausted their burst count.
270			 */
271			unsigned int i;
272#ifdef DEBUG
273			for (i = 0; i < PRIOCSCAN_NQUEUE; i++) {
274				pq = &q->bq_queue[i];
275				if (!cscan_empty(&pq->q_queue) && pq->q_burst) {
276					panic("%s: inconsist", __func__);
277				}
278			}
279#endif /* DEBUG */
280			/*
281			 * reset burst counts.
282			 */
283			if (remove) {
284				for (i = 0; i < PRIOCSCAN_NQUEUE; i++) {
285					pq = &q->bq_queue[i];
286					pq->q_burst = priocscan_burst[i];
287				}
288			}
289
290			/*
291			 * serve the highest priority non-empty queue.
292			 */
293			pq = first;
294		}
295		/*
296		 * consume the burst count.
297		 *
298		 * XXX account only by number of requests.  is it good enough?
299		 */
300		if (remove) {
301			KASSERT(pq->q_burst > 0);
302			pq->q_burst--;
303		}
304	}
305
306	/*
307	 * finally, get a request from the selected queue.
308	 */
309	KDASSERT(!cscan_empty(&pq->q_queue));
310	bp = cscan_get(&pq->q_queue, remove);
311	KDASSERT(bp != NULL);
312	KDASSERT(&pq->q_queue == bufq_priocscan_selectqueue(q, bp));
313
314	return bp;
315}
316
317static struct buf *
318bufq_priocscan_cancel(struct bufq_state *bufq, struct buf *bp)
319{
320	struct bufq_priocscan * const q = bufq->bq_private;
321	unsigned int i, j;
322
323	for (i = 0; i < PRIOCSCAN_NQUEUE; i++) {
324		struct cscan_queue * const cq = &q->bq_queue[i].q_queue;
325		for (j = 0; j < 2; j++) {
326			struct bqhead * const bqh = &cq->cq_head[j];
327			struct buf *bq;
328
329			TAILQ_FOREACH(bq, bqh, b_actq) {
330				if (bq == bp) {
331					TAILQ_REMOVE(bqh, bp, b_actq);
332					return bp;
333				}
334			}
335		}
336	}
337	return NULL;
338}
339
340static void
341bufq_priocscan_fini(struct bufq_state *bufq)
342{
343
344	KASSERT(bufq->bq_private != NULL);
345	kmem_free(bufq->bq_private, sizeof(struct bufq_priocscan));
346}
347
348static void
349bufq_priocscan_init(struct bufq_state *bufq)
350{
351	struct bufq_priocscan *q;
352	unsigned int i;
353
354	bufq->bq_get = bufq_priocscan_get;
355	bufq->bq_put = bufq_priocscan_put;
356	bufq->bq_cancel = bufq_priocscan_cancel;
357	bufq->bq_fini = bufq_priocscan_fini;
358	bufq->bq_private = kmem_zalloc(sizeof(struct bufq_priocscan), KM_SLEEP);
359
360	q = bufq->bq_private;
361	for (i = 0; i < PRIOCSCAN_NQUEUE; i++) {
362		struct cscan_queue *cq = &q->bq_queue[i].q_queue;
363
364		cscan_init(cq);
365	}
366}
367