1224135Sdim/*-
2224135Sdim * ----------------------------------------------------------------------------
3224135Sdim * "THE BEER-WARE LICENSE" (Revision 42):
4224135Sdim * <phk@FreeBSD.ORG> wrote this file.  As long as you retain this notice you
5224135Sdim * can do whatever you want with this stuff. If we meet some day, and you think
6224135Sdim * this stuff is worth it, you can buy me a beer in return.   Poul-Henning Kamp
7224135Sdim * ----------------------------------------------------------------------------
8224135Sdim *
9224135Sdim * The bioq_disksort() (and the specification of the bioq API)
10224135Sdim * have been written by Luigi Rizzo and Fabio Checconi under the same
11249423Sdim * license as above.
12249423Sdim */
13243830Sdim
14249423Sdim#include <sys/cdefs.h>
15224135Sdim__FBSDID("$FreeBSD$");
16224135Sdim
17224135Sdim//#include "opt_geom.h"
18224135Sdim
19224135Sdim#include <sys/param.h>
20224135Sdim#include <sys/systm.h>
21224135Sdim#include <sys/bio.h>
22224135Sdim#include <sys/conf.h>
23224135Sdim#include <sys/disk.h>
24224135Sdim#include <geom/geom_disk.h>
25224135Sdim#include "g_sched.h"
26224135Sdim
27224135Sdim/*
28224135Sdim * BIO queue implementation
29224135Sdim *
30224135Sdim * Please read carefully the description below before making any change
31226633Sdim * to the code, or you might change the behaviour of the data structure
32224135Sdim * in undesirable ways.
33224135Sdim *
34224135Sdim * A bioq stores disk I/O request (bio), normally sorted according to
35224135Sdim * the distance of the requested position (bio->bio_offset) from the
36224135Sdim * current head position (bioq->last_offset) in the scan direction, i.e.
37224135Sdim *
38224135Sdim * 	(uoff_t)(bio_offset - last_offset)
39224135Sdim *
40224135Sdim * Note that the cast to unsigned (uoff_t) is fundamental to insure
41224135Sdim * that the distance is computed in the scan direction.
42224135Sdim *
43224135Sdim * The main methods for manipulating the bioq are:
44226633Sdim *
45224135Sdim *   bioq_disksort()	performs an ordered insertion;
46263508Sdim *
47263508Sdim *   bioq_first()	return the head of the queue, without removing;
48263508Sdim *
49224135Sdim *   bioq_takefirst()	return and remove the head of the queue,
50224135Sdim *		updating the 'current head position' as
51226633Sdim *		bioq->last_offset = bio->bio_offset + bio->bio_length;
52224135Sdim *
53234353Sdim * When updating the 'current head position', we assume that the result of
54234353Sdim * bioq_takefirst() is dispatched to the device, so bioq->last_offset
55234353Sdim * represents the head position once the request is complete.
56234353Sdim *
57234353Sdim * If the bioq is manipulated using only the above calls, it starts
58234353Sdim * with a sorted sequence of requests with bio_offset >= last_offset,
59224135Sdim * possibly followed by another sorted sequence of requests with
60224135Sdim * 0 <= bio_offset < bioq->last_offset
61234353Sdim *
62224135Sdim * NOTE: historical behaviour was to ignore bio->bio_length in the
63224135Sdim *	update, but its use tracks the head position in a better way.
64224135Sdim *	Historical behaviour was also to update the head position when
65224135Sdim *	the request under service is complete, rather than when the
66224135Sdim *	request is extracted from the queue. However, the current API
67224135Sdim *	has no method to update the head position; secondly, once
68226633Sdim *	a request has been submitted to the disk, we have no idea of
69234353Sdim *	the actual head position, so the final one is our best guess.
70234353Sdim *
71226633Sdim * --- Direct queue manipulation ---
72226633Sdim *
73226633Sdim * A bioq uses an underlying TAILQ to store requests, so we also
74226633Sdim * export methods to manipulate the TAILQ, in particular:
75224135Sdim *
76226633Sdim * bioq_insert_tail()	insert an entry at the end.
77226633Sdim *		It also creates a 'barrier' so all subsequent
78226633Sdim *		insertions through bioq_disksort() will end up
79239462Sdim *		after this entry;
80239462Sdim *
81239462Sdim * bioq_insert_head()	insert an entry at the head, update
82226633Sdim *		bioq->last_offset = bio->bio_offset so that
83226633Sdim *		all subsequent insertions through bioq_disksort()
84224135Sdim *		will end up after this entry;
85224135Sdim *
86224135Sdim * bioq_remove()	remove a generic element from the queue, act as
87224135Sdim *		bioq_takefirst() if invoked on the head of the queue.
88226633Sdim *
89224135Sdim * The semantic of these methods is the same as the operations
90224135Sdim * on the underlying TAILQ, but with additional guarantees on
91224135Sdim * subsequent bioq_disksort() calls. E.g. bioq_insert_tail()
92224135Sdim * can be useful for making sure that all previous ops are flushed
93224135Sdim * to disk before continuing.
94226633Sdim *
95224135Sdim * Updating bioq->last_offset on a bioq_insert_head() guarantees
96224135Sdim * that the bio inserted with the last bioq_insert_head() will stay
97224135Sdim * at the head of the queue even after subsequent bioq_disksort().
98224135Sdim *
99224135Sdim * Note that when the direct queue manipulation functions are used,
100226633Sdim * the queue may contain multiple inversion points (i.e. more than
101224135Sdim * two sorted sequences of requests).
102224135Sdim *
103224135Sdim */
104224135Sdim
105224135Sdimvoid
106224135Sdimgs_bioq_init(struct bio_queue_head *head)
107224135Sdim{
108224135Sdim
109224135Sdim	TAILQ_INIT(&head->queue);
110224135Sdim	head->last_offset = 0;
111224135Sdim	head->insert_point = NULL;
112226633Sdim}
113224135Sdim
114224135Sdimvoid
115224135Sdimgs_bioq_remove(struct bio_queue_head *head, struct bio *bp)
116224135Sdim{
117226633Sdim
118224135Sdim	if (head->insert_point == NULL) {
119234353Sdim		if (bp == TAILQ_FIRST(&head->queue))
120234353Sdim			head->last_offset = bp->bio_offset + bp->bio_length;
121234353Sdim	} else if (bp == head->insert_point)
122234353Sdim		head->insert_point = NULL;
123234353Sdim
124234353Sdim	TAILQ_REMOVE(&head->queue, bp, bio_queue);
125234353Sdim}
126224135Sdim
127234353Sdimvoid
128224135Sdimgs_bioq_flush(struct bio_queue_head *head, struct devstat *stp, int error)
129263508Sdim{
130224135Sdim	struct bio *bp;
131224135Sdim
132224135Sdim	while ((bp = gs_bioq_takefirst(head)) != NULL)
133224135Sdim		biofinish(bp, stp, error);
134224135Sdim}
135224135Sdim
136224135Sdimvoid
137234353Sdimgs_bioq_insert_head(struct bio_queue_head *head, struct bio *bp)
138224135Sdim{
139224135Sdim
140224135Sdim	if (head->insert_point == NULL)
141224135Sdim		head->last_offset = bp->bio_offset;
142224135Sdim	TAILQ_INSERT_HEAD(&head->queue, bp, bio_queue);
143234353Sdim}
144224135Sdim
145224135Sdimvoid
146224135Sdimgs_bioq_insert_tail(struct bio_queue_head *head, struct bio *bp)
147224135Sdim{
148234353Sdim
149224135Sdim	TAILQ_INSERT_TAIL(&head->queue, bp, bio_queue);
150263508Sdim	head->insert_point = bp;
151263508Sdim	head->last_offset = bp->bio_offset;
152263508Sdim}
153226633Sdim
154224135Sdimstruct bio *
155224135Sdimgs_bioq_first(struct bio_queue_head *head)
156224135Sdim{
157224135Sdim
158224135Sdim	return (TAILQ_FIRST(&head->queue));
159224135Sdim}
160224135Sdim
161224135Sdimstruct bio *
162224135Sdimgs_bioq_takefirst(struct bio_queue_head *head)
163224135Sdim{
164224135Sdim	struct bio *bp;
165224135Sdim
166224135Sdim	bp = TAILQ_FIRST(&head->queue);
167224135Sdim	if (bp != NULL)
168224135Sdim		gs_bioq_remove(head, bp);
169224135Sdim	return (bp);
170226633Sdim}
171226633Sdim
172224135Sdim/*
173224135Sdim * Compute the sorting key. The cast to unsigned is
174224135Sdim * fundamental for correctness, see the description
175224135Sdim * near the beginning of the file.
176224135Sdim */
177263508Sdimstatic inline uoff_t
178263508Sdimgs_bioq_bio_key(struct bio_queue_head *head, struct bio *bp)
179263508Sdim{
180263508Sdim
181263508Sdim	return ((uoff_t)(bp->bio_offset - head->last_offset));
182263508Sdim}
183224135Sdim
184263508Sdim/*
185263508Sdim * Seek sort for disks.
186263508Sdim *
187263508Sdim * Sort all requests in a single queue while keeping
188263508Sdim * track of the current position of the disk with last_offset.
189224135Sdim * See above for details.
190263508Sdim */
191263508Sdimvoid
192263508Sdimgs_bioq_disksort(struct bio_queue_head *head, struct bio *bp)
193224135Sdim{
194224135Sdim	struct bio *cur, *prev;
195224135Sdim	uoff_t key;
196224135Sdim
197224135Sdim	if ((bp->bio_flags & BIO_ORDERED) != 0) {
198224135Sdim		/*
199234353Sdim		 * Ordered transactions can only be dispatched
200224135Sdim		 * after any currently queued transactions.  They
201224135Sdim		 * also have barrier semantics - no transactions
202224135Sdim		 * queued in the future can pass them.
203224135Sdim		 */
204224135Sdim		gs_bioq_insert_tail(head, bp);
205224135Sdim		return;
206224135Sdim	}
207224135Sdim
208224135Sdim	prev = NULL;
209224135Sdim	key = gs_bioq_bio_key(head, bp);
210224135Sdim	cur = TAILQ_FIRST(&head->queue);
211224135Sdim
212224135Sdim	if (head->insert_point) {
213234353Sdim		prev = head->insert_point;
214224135Sdim		cur = TAILQ_NEXT(head->insert_point, bio_queue);
215224135Sdim	}
216224135Sdim
217224135Sdim	while (cur != NULL && key >= gs_bioq_bio_key(head, cur)) {
218224135Sdim		prev = cur;
219224135Sdim		cur = TAILQ_NEXT(cur, bio_queue);
220224135Sdim	}
221224135Sdim
222224135Sdim	if (prev == NULL)
223224135Sdim		TAILQ_INSERT_HEAD(&head->queue, bp, bio_queue);
224224135Sdim	else
225224135Sdim		TAILQ_INSERT_AFTER(&head->queue, prev, bp, bio_queue);
226224135Sdim}
227224135Sdim