1/*-
2 * ----------------------------------------------------------------------------
3 * "THE BEER-WARE LICENSE" (Revision 42):
4 * <phk@FreeBSD.ORG> wrote this file.  As long as you retain this notice you
5 * can do whatever you want with this stuff. If we meet some day, and you think
6 * this stuff is worth it, you can buy me a beer in return.   Poul-Henning Kamp
7 * ----------------------------------------------------------------------------
8 *
9 * The bioq_disksort() (and the specification of the bioq API)
10 * have been written by Luigi Rizzo and Fabio Checconi under the same
11 * license as above.
12 */
13
14#include <sys/cdefs.h>
15__FBSDID("$FreeBSD: stable/11/sys/kern/subr_disk.c 344072 2019-02-13 00:35:09Z mav $");
16
17#include "opt_geom.h"
18
19#include <sys/param.h>
20#include <sys/systm.h>
21#include <sys/bio.h>
22#include <sys/conf.h>
23#include <sys/disk.h>
24#include <geom/geom_disk.h>
25
26/*-
27 * Disk error is the preface to plaintive error messages
28 * about failing disk transfers.  It prints messages of the form
29 * 	"hp0g: BLABLABLA cmd=read fsbn 12345 of 12344-12347"
30 * blkdone should be -1 if the position of the error is unknown.
31 * The message is printed with printf.
32 */
33void
34disk_err(struct bio *bp, const char *what, int blkdone, int nl)
35{
36	daddr_t sn;
37
38	if (bp->bio_dev != NULL)
39		printf("%s: %s ", devtoname(bp->bio_dev), what);
40	else if (bp->bio_disk != NULL)
41		printf("%s%d: %s ",
42		    bp->bio_disk->d_name, bp->bio_disk->d_unit, what);
43	else
44		printf("disk??: %s ", what);
45	switch(bp->bio_cmd) {
46	case BIO_READ:		printf("cmd=read "); break;
47	case BIO_WRITE:		printf("cmd=write "); break;
48	case BIO_DELETE:	printf("cmd=delete "); break;
49	case BIO_GETATTR:	printf("cmd=getattr "); break;
50	case BIO_FLUSH:		printf("cmd=flush "); break;
51	default:		printf("cmd=%x ", bp->bio_cmd); break;
52	}
53	sn = bp->bio_pblkno;
54	if (bp->bio_bcount <= DEV_BSIZE) {
55		printf("fsbn %jd%s", (intmax_t)sn, nl ? "\n" : "");
56		return;
57	}
58	if (blkdone >= 0) {
59		sn += blkdone;
60		printf("fsbn %jd of ", (intmax_t)sn);
61	}
62	printf("%jd-%jd", (intmax_t)bp->bio_pblkno,
63	    (intmax_t)(bp->bio_pblkno + (bp->bio_bcount - 1) / DEV_BSIZE));
64	if (nl)
65		printf("\n");
66}
67
68/*
69 * BIO queue implementation
70 *
71 * Please read carefully the description below before making any change
72 * to the code, or you might change the behaviour of the data structure
73 * in undesirable ways.
74 *
75 * A bioq stores disk I/O request (bio), normally sorted according to
76 * the distance of the requested position (bio->bio_offset) from the
77 * current head position (bioq->last_offset) in the scan direction, i.e.
78 *
79 * 	(uoff_t)(bio_offset - last_offset)
80 *
81 * Note that the cast to unsigned (uoff_t) is fundamental to insure
82 * that the distance is computed in the scan direction.
83 *
84 * The main methods for manipulating the bioq are:
85 *
86 *   bioq_disksort()	performs an ordered insertion;
87 *
88 *   bioq_first()	return the head of the queue, without removing;
89 *
90 *   bioq_takefirst()	return and remove the head of the queue,
91 *		updating the 'current head position' as
92 *		bioq->last_offset = bio->bio_offset + bio->bio_length;
93 *
94 * When updating the 'current head position', we assume that the result of
95 * bioq_takefirst() is dispatched to the device, so bioq->last_offset
96 * represents the head position once the request is complete.
97 *
98 * If the bioq is manipulated using only the above calls, it starts
99 * with a sorted sequence of requests with bio_offset >= last_offset,
100 * possibly followed by another sorted sequence of requests with
101 * 0 <= bio_offset < bioq->last_offset
102 *
103 * NOTE: historical behaviour was to ignore bio->bio_length in the
104 *	update, but its use tracks the head position in a better way.
105 *	Historical behaviour was also to update the head position when
106 *	the request under service is complete, rather than when the
107 *	request is extracted from the queue. However, the current API
108 *	has no method to update the head position; secondly, once
109 *	a request has been submitted to the disk, we have no idea of
110 *	the actual head position, so the final one is our best guess.
111 *
112 * --- Direct queue manipulation ---
113 *
114 * A bioq uses an underlying TAILQ to store requests, so we also
115 * export methods to manipulate the TAILQ, in particular:
116 *
117 * bioq_insert_tail()	insert an entry at the end.
118 *		It also creates a 'barrier' so all subsequent
119 *		insertions through bioq_disksort() will end up
120 *		after this entry;
121 *
122 * bioq_insert_head()	insert an entry at the head, update
123 *		bioq->last_offset = bio->bio_offset so that
124 *		all subsequent insertions through bioq_disksort()
125 *		will end up after this entry;
126 *
127 * bioq_remove()	remove a generic element from the queue, act as
128 *		bioq_takefirst() if invoked on the head of the queue.
129 *
130 * The semantic of these methods is the same as the operations
131 * on the underlying TAILQ, but with additional guarantees on
132 * subsequent bioq_disksort() calls. E.g. bioq_insert_tail()
133 * can be useful for making sure that all previous ops are flushed
134 * to disk before continuing.
135 *
136 * Updating bioq->last_offset on a bioq_insert_head() guarantees
137 * that the bio inserted with the last bioq_insert_head() will stay
138 * at the head of the queue even after subsequent bioq_disksort().
139 *
140 * Note that when the direct queue manipulation functions are used,
141 * the queue may contain multiple inversion points (i.e. more than
142 * two sorted sequences of requests).
143 *
144 */
145
146void
147bioq_init(struct bio_queue_head *head)
148{
149
150	TAILQ_INIT(&head->queue);
151	head->last_offset = 0;
152	head->insert_point = NULL;
153}
154
155void
156bioq_remove(struct bio_queue_head *head, struct bio *bp)
157{
158
159	if (head->insert_point == NULL) {
160		if (bp == TAILQ_FIRST(&head->queue))
161			head->last_offset = bp->bio_offset + bp->bio_length;
162	} else if (bp == head->insert_point)
163		head->insert_point = NULL;
164
165	TAILQ_REMOVE(&head->queue, bp, bio_queue);
166}
167
168void
169bioq_flush(struct bio_queue_head *head, struct devstat *stp, int error)
170{
171	struct bio *bp;
172
173	while ((bp = bioq_takefirst(head)) != NULL)
174		biofinish(bp, stp, error);
175}
176
177void
178bioq_insert_head(struct bio_queue_head *head, struct bio *bp)
179{
180
181	if (head->insert_point == NULL)
182		head->last_offset = bp->bio_offset;
183	TAILQ_INSERT_HEAD(&head->queue, bp, bio_queue);
184}
185
186void
187bioq_insert_tail(struct bio_queue_head *head, struct bio *bp)
188{
189
190	TAILQ_INSERT_TAIL(&head->queue, bp, bio_queue);
191	head->insert_point = bp;
192	head->last_offset = bp->bio_offset;
193}
194
195struct bio *
196bioq_first(struct bio_queue_head *head)
197{
198
199	return (TAILQ_FIRST(&head->queue));
200}
201
202struct bio *
203bioq_takefirst(struct bio_queue_head *head)
204{
205	struct bio *bp;
206
207	bp = TAILQ_FIRST(&head->queue);
208	if (bp != NULL)
209		bioq_remove(head, bp);
210	return (bp);
211}
212
213/*
214 * Compute the sorting key. The cast to unsigned is
215 * fundamental for correctness, see the description
216 * near the beginning of the file.
217 */
218static inline uoff_t
219bioq_bio_key(struct bio_queue_head *head, struct bio *bp)
220{
221
222	return ((uoff_t)(bp->bio_offset - head->last_offset));
223}
224
225/*
226 * Seek sort for disks.
227 *
228 * Sort all requests in a single queue while keeping
229 * track of the current position of the disk with last_offset.
230 * See above for details.
231 */
232void
233bioq_disksort(struct bio_queue_head *head, struct bio *bp)
234{
235	struct bio *cur, *prev;
236	uoff_t key;
237
238	if ((bp->bio_flags & BIO_ORDERED) != 0) {
239		/*
240		 * Ordered transactions can only be dispatched
241		 * after any currently queued transactions.  They
242		 * also have barrier semantics - no transactions
243		 * queued in the future can pass them.
244		 */
245		bioq_insert_tail(head, bp);
246		return;
247	}
248
249	/*
250	 * We should only sort requests of types that have concept of offset.
251	 * Other types, such as BIO_FLUSH or BIO_ZONE, may imply some degree
252	 * of ordering even if strict ordering is not requested explicitly.
253	 */
254	if (bp->bio_cmd != BIO_READ && bp->bio_cmd != BIO_WRITE &&
255	    bp->bio_cmd != BIO_DELETE) {
256		bioq_insert_tail(head, bp);
257		return;
258	}
259
260	prev = NULL;
261	key = bioq_bio_key(head, bp);
262	cur = TAILQ_FIRST(&head->queue);
263
264	if (head->insert_point) {
265		prev = head->insert_point;
266		cur = TAILQ_NEXT(head->insert_point, bio_queue);
267	}
268
269	while (cur != NULL && key >= bioq_bio_key(head, cur)) {
270		prev = cur;
271		cur = TAILQ_NEXT(cur, bio_queue);
272	}
273
274	if (prev == NULL)
275		TAILQ_INSERT_HEAD(&head->queue, bp, bio_queue);
276	else
277		TAILQ_INSERT_AFTER(&head->queue, prev, bp, bio_queue);
278}
279