1206497Sluigi/*-
2206497Sluigi * ----------------------------------------------------------------------------
3206497Sluigi * "THE BEER-WARE LICENSE" (Revision 42):
4206497Sluigi * <phk@FreeBSD.ORG> wrote this file.  As long as you retain this notice you
5206497Sluigi * can do whatever you want with this stuff. If we meet some day, and you think
6206497Sluigi * this stuff is worth it, you can buy me a beer in return.   Poul-Henning Kamp
7206497Sluigi * ----------------------------------------------------------------------------
8206497Sluigi *
9206497Sluigi * The bioq_disksort() (and the specification of the bioq API)
10206497Sluigi * have been written by Luigi Rizzo and Fabio Checconi under the same
11206497Sluigi * license as above.
12206497Sluigi */
13206497Sluigi
14206497Sluigi#include <sys/cdefs.h>
15206497Sluigi__FBSDID("$FreeBSD: releng/10.3/sys/geom/sched/subr_disk.c 212160 2010-09-02 19:40:28Z gibbs $");
16206497Sluigi
17206497Sluigi//#include "opt_geom.h"
18206497Sluigi
19206497Sluigi#include <sys/param.h>
20206497Sluigi#include <sys/systm.h>
21206497Sluigi#include <sys/bio.h>
22206497Sluigi#include <sys/conf.h>
23206497Sluigi#include <sys/disk.h>
24206497Sluigi#include <geom/geom_disk.h>
25206497Sluigi#include "g_sched.h"
26206497Sluigi
27206497Sluigi/*
28206497Sluigi * BIO queue implementation
29206497Sluigi *
30206497Sluigi * Please read carefully the description below before making any change
31206497Sluigi * to the code, or you might change the behaviour of the data structure
32206497Sluigi * in undesirable ways.
33206497Sluigi *
34206497Sluigi * A bioq stores disk I/O request (bio), normally sorted according to
35206497Sluigi * the distance of the requested position (bio->bio_offset) from the
36206497Sluigi * current head position (bioq->last_offset) in the scan direction, i.e.
37206497Sluigi *
38206497Sluigi * 	(uoff_t)(bio_offset - last_offset)
39206497Sluigi *
40206497Sluigi * Note that the cast to unsigned (uoff_t) is fundamental to insure
41206497Sluigi * that the distance is computed in the scan direction.
42206497Sluigi *
43206497Sluigi * The main methods for manipulating the bioq are:
44206497Sluigi *
45206497Sluigi *   bioq_disksort()	performs an ordered insertion;
46206497Sluigi *
47206497Sluigi *   bioq_first()	return the head of the queue, without removing;
48206497Sluigi *
49206497Sluigi *   bioq_takefirst()	return and remove the head of the queue,
50206497Sluigi *		updating the 'current head position' as
51206497Sluigi *		bioq->last_offset = bio->bio_offset + bio->bio_length;
52206497Sluigi *
53206497Sluigi * When updating the 'current head position', we assume that the result of
54206497Sluigi * bioq_takefirst() is dispatched to the device, so bioq->last_offset
55206497Sluigi * represents the head position once the request is complete.
56206497Sluigi *
57206497Sluigi * If the bioq is manipulated using only the above calls, it starts
58206497Sluigi * with a sorted sequence of requests with bio_offset >= last_offset,
59206497Sluigi * possibly followed by another sorted sequence of requests with
60206497Sluigi * 0 <= bio_offset < bioq->last_offset
61206497Sluigi *
62206497Sluigi * NOTE: historical behaviour was to ignore bio->bio_length in the
63206497Sluigi *	update, but its use tracks the head position in a better way.
64206497Sluigi *	Historical behaviour was also to update the head position when
65206497Sluigi *	the request under service is complete, rather than when the
66206497Sluigi *	request is extracted from the queue. However, the current API
67206497Sluigi *	has no method to update the head position; secondly, once
68206497Sluigi *	a request has been submitted to the disk, we have no idea of
69206497Sluigi *	the actual head position, so the final one is our best guess.
70206497Sluigi *
71206497Sluigi * --- Direct queue manipulation ---
72206497Sluigi *
73206497Sluigi * A bioq uses an underlying TAILQ to store requests, so we also
74206497Sluigi * export methods to manipulate the TAILQ, in particular:
75206497Sluigi *
76206497Sluigi * bioq_insert_tail()	insert an entry at the end.
77206497Sluigi *		It also creates a 'barrier' so all subsequent
78206497Sluigi *		insertions through bioq_disksort() will end up
79206497Sluigi *		after this entry;
80206497Sluigi *
81206497Sluigi * bioq_insert_head()	insert an entry at the head, update
82206497Sluigi *		bioq->last_offset = bio->bio_offset so that
83206497Sluigi *		all subsequent insertions through bioq_disksort()
84206497Sluigi *		will end up after this entry;
85206497Sluigi *
86206497Sluigi * bioq_remove()	remove a generic element from the queue, act as
87206497Sluigi *		bioq_takefirst() if invoked on the head of the queue.
88206497Sluigi *
89212160Sgibbs * The semantic of these methods is the same as the operations
90206497Sluigi * on the underlying TAILQ, but with additional guarantees on
91206497Sluigi * subsequent bioq_disksort() calls. E.g. bioq_insert_tail()
92206497Sluigi * can be useful for making sure that all previous ops are flushed
93206497Sluigi * to disk before continuing.
94206497Sluigi *
95206497Sluigi * Updating bioq->last_offset on a bioq_insert_head() guarantees
96206497Sluigi * that the bio inserted with the last bioq_insert_head() will stay
97206497Sluigi * at the head of the queue even after subsequent bioq_disksort().
98206497Sluigi *
99206497Sluigi * Note that when the direct queue manipulation functions are used,
100206497Sluigi * the queue may contain multiple inversion points (i.e. more than
101206497Sluigi * two sorted sequences of requests).
102206497Sluigi *
103206497Sluigi */
104206497Sluigi
105206497Sluigivoid
106206497Sluigigs_bioq_init(struct bio_queue_head *head)
107206497Sluigi{
108206497Sluigi
109206497Sluigi	TAILQ_INIT(&head->queue);
110206497Sluigi	head->last_offset = 0;
111206497Sluigi	head->insert_point = NULL;
112206497Sluigi}
113206497Sluigi
114206497Sluigivoid
115206497Sluigigs_bioq_remove(struct bio_queue_head *head, struct bio *bp)
116206497Sluigi{
117206497Sluigi
118212160Sgibbs	if (head->insert_point == NULL) {
119212160Sgibbs		if (bp == TAILQ_FIRST(&head->queue))
120212160Sgibbs			head->last_offset = bp->bio_offset + bp->bio_length;
121212160Sgibbs	} else if (bp == head->insert_point)
122206497Sluigi		head->insert_point = NULL;
123206497Sluigi
124206497Sluigi	TAILQ_REMOVE(&head->queue, bp, bio_queue);
125206497Sluigi}
126206497Sluigi
127206497Sluigivoid
128206497Sluigigs_bioq_flush(struct bio_queue_head *head, struct devstat *stp, int error)
129206497Sluigi{
130206497Sluigi	struct bio *bp;
131206497Sluigi
132206497Sluigi	while ((bp = gs_bioq_takefirst(head)) != NULL)
133206497Sluigi		biofinish(bp, stp, error);
134206497Sluigi}
135206497Sluigi
136206497Sluigivoid
137206497Sluigigs_bioq_insert_head(struct bio_queue_head *head, struct bio *bp)
138206497Sluigi{
139206497Sluigi
140212160Sgibbs	if (head->insert_point == NULL)
141212160Sgibbs		head->last_offset = bp->bio_offset;
142206497Sluigi	TAILQ_INSERT_HEAD(&head->queue, bp, bio_queue);
143206497Sluigi}
144206497Sluigi
145206497Sluigivoid
146206497Sluigigs_bioq_insert_tail(struct bio_queue_head *head, struct bio *bp)
147206497Sluigi{
148206497Sluigi
149206497Sluigi	TAILQ_INSERT_TAIL(&head->queue, bp, bio_queue);
150206497Sluigi	head->insert_point = bp;
151212160Sgibbs	head->last_offset = bp->bio_offset;
152206497Sluigi}
153206497Sluigi
154206497Sluigistruct bio *
155206497Sluigigs_bioq_first(struct bio_queue_head *head)
156206497Sluigi{
157206497Sluigi
158206497Sluigi	return (TAILQ_FIRST(&head->queue));
159206497Sluigi}
160206497Sluigi
161206497Sluigistruct bio *
162206497Sluigigs_bioq_takefirst(struct bio_queue_head *head)
163206497Sluigi{
164206497Sluigi	struct bio *bp;
165206497Sluigi
166206497Sluigi	bp = TAILQ_FIRST(&head->queue);
167206497Sluigi	if (bp != NULL)
168206497Sluigi		gs_bioq_remove(head, bp);
169206497Sluigi	return (bp);
170206497Sluigi}
171206497Sluigi
172206497Sluigi/*
173206497Sluigi * Compute the sorting key. The cast to unsigned is
174206497Sluigi * fundamental for correctness, see the description
175206497Sluigi * near the beginning of the file.
176206497Sluigi */
177206497Sluigistatic inline uoff_t
178206497Sluigigs_bioq_bio_key(struct bio_queue_head *head, struct bio *bp)
179206497Sluigi{
180206497Sluigi
181206497Sluigi	return ((uoff_t)(bp->bio_offset - head->last_offset));
182206497Sluigi}
183206497Sluigi
184206497Sluigi/*
185206497Sluigi * Seek sort for disks.
186206497Sluigi *
187206497Sluigi * Sort all requests in a single queue while keeping
188206497Sluigi * track of the current position of the disk with last_offset.
189206497Sluigi * See above for details.
190206497Sluigi */
191206497Sluigivoid
192206497Sluigigs_bioq_disksort(struct bio_queue_head *head, struct bio *bp)
193206497Sluigi{
194212160Sgibbs	struct bio *cur, *prev;
195212160Sgibbs	uoff_t key;
196206497Sluigi
197212160Sgibbs	if ((bp->bio_flags & BIO_ORDERED) != 0) {
198212160Sgibbs		/*
199212160Sgibbs		 * Ordered transactions can only be dispatched
200212160Sgibbs		 * after any currently queued transactions.  They
201212160Sgibbs		 * also have barrier semantics - no transactions
202212160Sgibbs		 * queued in the future can pass them.
203212160Sgibbs		 */
204212160Sgibbs		gs_bioq_insert_tail(head, bp);
205212160Sgibbs		return;
206212160Sgibbs	}
207212160Sgibbs
208212160Sgibbs	prev = NULL;
209212160Sgibbs	key = gs_bioq_bio_key(head, bp);
210206497Sluigi	cur = TAILQ_FIRST(&head->queue);
211206497Sluigi
212212160Sgibbs	if (head->insert_point) {
213212160Sgibbs		prev = head->insert_point;
214212160Sgibbs		cur = TAILQ_NEXT(head->insert_point, bio_queue);
215212160Sgibbs	}
216206497Sluigi
217206497Sluigi	while (cur != NULL && key >= gs_bioq_bio_key(head, cur)) {
218206497Sluigi		prev = cur;
219206497Sluigi		cur = TAILQ_NEXT(cur, bio_queue);
220206497Sluigi	}
221206497Sluigi
222206497Sluigi	if (prev == NULL)
223206497Sluigi		TAILQ_INSERT_HEAD(&head->queue, bp, bio_queue);
224206497Sluigi	else
225206497Sluigi		TAILQ_INSERT_AFTER(&head->queue, prev, bp, bio_queue);
226206497Sluigi}
227