1139804Simp/*-
250565Sphk * ----------------------------------------------------------------------------
350565Sphk * "THE BEER-WARE LICENSE" (Revision 42):
450565Sphk * <phk@FreeBSD.ORG> wrote this file.  As long as you retain this notice you
550565Sphk * can do whatever you want with this stuff. If we meet some day, and you think
650565Sphk * this stuff is worth it, you can buy me a beer in return.   Poul-Henning Kamp
750565Sphk * ----------------------------------------------------------------------------
8188571Sluigi *
9188571Sluigi * The bioq_disksort() (and the specification of the bioq API)
10188571Sluigi * have been written by Luigi Rizzo and Fabio Checconi under the same
11188571Sluigi * license as above.
1250565Sphk */
1350565Sphk
14116182Sobrien#include <sys/cdefs.h>
15116182Sobrien__FBSDID("$FreeBSD: stable/11/sys/kern/subr_disk.c 344072 2019-02-13 00:35:09Z mav $");
16116182Sobrien
1792074Sphk#include "opt_geom.h"
1892074Sphk
1950565Sphk#include <sys/param.h>
2050565Sphk#include <sys/systm.h>
2160041Sphk#include <sys/bio.h>
2250565Sphk#include <sys/conf.h>
2350565Sphk#include <sys/disk.h>
24112953Sphk#include <geom/geom_disk.h>
2564880Sphk
26210226Strasz/*-
27103675Sphk * Disk error is the preface to plaintive error messages
28103675Sphk * about failing disk transfers.  It prints messages of the form
29103675Sphk * 	"hp0g: BLABLABLA cmd=read fsbn 12345 of 12344-12347"
30103675Sphk * blkdone should be -1 if the position of the error is unknown.
31103675Sphk * The message is printed with printf.
32103675Sphk */
33103675Sphkvoid
34103675Sphkdisk_err(struct bio *bp, const char *what, int blkdone, int nl)
35103675Sphk{
36103675Sphk	daddr_t sn;
37103675Sphk
38111808Sphk	if (bp->bio_dev != NULL)
39111808Sphk		printf("%s: %s ", devtoname(bp->bio_dev), what);
40111808Sphk	else if (bp->bio_disk != NULL)
41111808Sphk		printf("%s%d: %s ",
42111808Sphk		    bp->bio_disk->d_name, bp->bio_disk->d_unit, what);
43111808Sphk	else
44111808Sphk		printf("disk??: %s ", what);
45103675Sphk	switch(bp->bio_cmd) {
46105365Ssobomax	case BIO_READ:		printf("cmd=read "); break;
47105365Ssobomax	case BIO_WRITE:		printf("cmd=write "); break;
48105365Ssobomax	case BIO_DELETE:	printf("cmd=delete "); break;
49105365Ssobomax	case BIO_GETATTR:	printf("cmd=getattr "); break;
50163832Spjd	case BIO_FLUSH:		printf("cmd=flush "); break;
51105365Ssobomax	default:		printf("cmd=%x ", bp->bio_cmd); break;
52103675Sphk	}
53121210Sphk	sn = bp->bio_pblkno;
54103675Sphk	if (bp->bio_bcount <= DEV_BSIZE) {
55103675Sphk		printf("fsbn %jd%s", (intmax_t)sn, nl ? "\n" : "");
56103675Sphk		return;
57103675Sphk	}
58103675Sphk	if (blkdone >= 0) {
59103675Sphk		sn += blkdone;
60103675Sphk		printf("fsbn %jd of ", (intmax_t)sn);
61103675Sphk	}
62121210Sphk	printf("%jd-%jd", (intmax_t)bp->bio_pblkno,
63121210Sphk	    (intmax_t)(bp->bio_pblkno + (bp->bio_bcount - 1) / DEV_BSIZE));
64103675Sphk	if (nl)
65103675Sphk		printf("\n");
66103675Sphk}
67103683Sphk
68103683Sphk/*
69112846Sphk * BIO queue implementation
70188571Sluigi *
71188571Sluigi * Please read carefully the description below before making any change
72188571Sluigi * to the code, or you might change the behaviour of the data structure
73188571Sluigi * in undesirable ways.
74188571Sluigi *
75188571Sluigi * A bioq stores disk I/O request (bio), normally sorted according to
76188571Sluigi * the distance of the requested position (bio->bio_offset) from the
77188571Sluigi * current head position (bioq->last_offset) in the scan direction, i.e.
78188571Sluigi *
79188571Sluigi * 	(uoff_t)(bio_offset - last_offset)
80188571Sluigi *
81188571Sluigi * Note that the cast to unsigned (uoff_t) is fundamental to insure
82188571Sluigi * that the distance is computed in the scan direction.
83188571Sluigi *
84188571Sluigi * The main methods for manipulating the bioq are:
85188571Sluigi *
86188571Sluigi *   bioq_disksort()	performs an ordered insertion;
87188571Sluigi *
88188571Sluigi *   bioq_first()	return the head of the queue, without removing;
89188571Sluigi *
90188571Sluigi *   bioq_takefirst()	return and remove the head of the queue,
91188571Sluigi *		updating the 'current head position' as
92188571Sluigi *		bioq->last_offset = bio->bio_offset + bio->bio_length;
93188571Sluigi *
94188571Sluigi * When updating the 'current head position', we assume that the result of
95188571Sluigi * bioq_takefirst() is dispatched to the device, so bioq->last_offset
96188571Sluigi * represents the head position once the request is complete.
97188571Sluigi *
98188571Sluigi * If the bioq is manipulated using only the above calls, it starts
99188571Sluigi * with a sorted sequence of requests with bio_offset >= last_offset,
100188571Sluigi * possibly followed by another sorted sequence of requests with
101188571Sluigi * 0 <= bio_offset < bioq->last_offset
102188571Sluigi *
103188571Sluigi * NOTE: historical behaviour was to ignore bio->bio_length in the
104188571Sluigi *	update, but its use tracks the head position in a better way.
105188571Sluigi *	Historical behaviour was also to update the head position when
106188571Sluigi *	the request under service is complete, rather than when the
107188571Sluigi *	request is extracted from the queue. However, the current API
108188571Sluigi *	has no method to update the head position; secondly, once
109188571Sluigi *	a request has been submitted to the disk, we have no idea of
110188571Sluigi *	the actual head position, so the final one is our best guess.
111188571Sluigi *
112188571Sluigi * --- Direct queue manipulation ---
113188571Sluigi *
114188571Sluigi * A bioq uses an underlying TAILQ to store requests, so we also
115188571Sluigi * export methods to manipulate the TAILQ, in particular:
116188571Sluigi *
117188571Sluigi * bioq_insert_tail()	insert an entry at the end.
118188571Sluigi *		It also creates a 'barrier' so all subsequent
119188571Sluigi *		insertions through bioq_disksort() will end up
120188571Sluigi *		after this entry;
121188571Sluigi *
122188571Sluigi * bioq_insert_head()	insert an entry at the head, update
123188571Sluigi *		bioq->last_offset = bio->bio_offset so that
124188571Sluigi *		all subsequent insertions through bioq_disksort()
125188571Sluigi *		will end up after this entry;
126188571Sluigi *
127188571Sluigi * bioq_remove()	remove a generic element from the queue, act as
128188571Sluigi *		bioq_takefirst() if invoked on the head of the queue.
129188571Sluigi *
130212160Sgibbs * The semantic of these methods is the same as the operations
131188571Sluigi * on the underlying TAILQ, but with additional guarantees on
132188571Sluigi * subsequent bioq_disksort() calls. E.g. bioq_insert_tail()
133188571Sluigi * can be useful for making sure that all previous ops are flushed
134188571Sluigi * to disk before continuing.
135188571Sluigi *
136188571Sluigi * Updating bioq->last_offset on a bioq_insert_head() guarantees
137188571Sluigi * that the bio inserted with the last bioq_insert_head() will stay
138188571Sluigi * at the head of the queue even after subsequent bioq_disksort().
139188571Sluigi *
140188571Sluigi * Note that when the direct queue manipulation functions are used,
141188571Sluigi * the queue may contain multiple inversion points (i.e. more than
142188571Sluigi * two sorted sequences of requests).
143188571Sluigi *
144112846Sphk */
145112846Sphk
146112846Sphkvoid
147112846Sphkbioq_init(struct bio_queue_head *head)
148112846Sphk{
149188571Sluigi
150112846Sphk	TAILQ_INIT(&head->queue);
151121207Sphk	head->last_offset = 0;
152112846Sphk	head->insert_point = NULL;
153112846Sphk}
154112846Sphk
155112846Sphkvoid
156112846Sphkbioq_remove(struct bio_queue_head *head, struct bio *bp)
157112846Sphk{
158188571Sluigi
159212160Sgibbs	if (head->insert_point == NULL) {
160212160Sgibbs		if (bp == TAILQ_FIRST(&head->queue))
161212160Sgibbs			head->last_offset = bp->bio_offset + bp->bio_length;
162212160Sgibbs	} else if (bp == head->insert_point)
163188571Sluigi		head->insert_point = NULL;
164188571Sluigi
165112846Sphk	TAILQ_REMOVE(&head->queue, bp, bio_queue);
166112846Sphk}
167112941Sphk
168112846Sphkvoid
169112941Sphkbioq_flush(struct bio_queue_head *head, struct devstat *stp, int error)
170112941Sphk{
171112941Sphk	struct bio *bp;
172112941Sphk
173134038Sphk	while ((bp = bioq_takefirst(head)) != NULL)
174121083Sphk		biofinish(bp, stp, error);
175112941Sphk}
176112941Sphk
177112941Sphkvoid
178138800Spjdbioq_insert_head(struct bio_queue_head *head, struct bio *bp)
179138800Spjd{
180138800Spjd
181212160Sgibbs	if (head->insert_point == NULL)
182212160Sgibbs		head->last_offset = bp->bio_offset;
183138800Spjd	TAILQ_INSERT_HEAD(&head->queue, bp, bio_queue);
184138800Spjd}
185138800Spjd
186138800Spjdvoid
187112846Sphkbioq_insert_tail(struct bio_queue_head *head, struct bio *bp)
188112846Sphk{
189112846Sphk
190112846Sphk	TAILQ_INSERT_TAIL(&head->queue, bp, bio_queue);
191188571Sluigi	head->insert_point = bp;
192212160Sgibbs	head->last_offset = bp->bio_offset;
193112846Sphk}
194112846Sphk
195112846Sphkstruct bio *
196112846Sphkbioq_first(struct bio_queue_head *head)
197112846Sphk{
198112846Sphk
199112846Sphk	return (TAILQ_FIRST(&head->queue));
200112846Sphk}
201112846Sphk
202134038Sphkstruct bio *
203134038Sphkbioq_takefirst(struct bio_queue_head *head)
204134038Sphk{
205134038Sphk	struct bio *bp;
206112846Sphk
207134038Sphk	bp = TAILQ_FIRST(&head->queue);
208134038Sphk	if (bp != NULL)
209134038Sphk		bioq_remove(head, bp);
210134038Sphk	return (bp);
211134038Sphk}
212134038Sphk
213112846Sphk/*
214188571Sluigi * Compute the sorting key. The cast to unsigned is
215188571Sluigi * fundamental for correctness, see the description
216188571Sluigi * near the beginning of the file.
217188571Sluigi */
218188571Sluigistatic inline uoff_t
219188571Sluigibioq_bio_key(struct bio_queue_head *head, struct bio *bp)
220188571Sluigi{
221188571Sluigi
222188571Sluigi	return ((uoff_t)(bp->bio_offset - head->last_offset));
223188571Sluigi}
224188571Sluigi
225188571Sluigi/*
226103683Sphk * Seek sort for disks.
227103683Sphk *
228188571Sluigi * Sort all requests in a single queue while keeping
229188571Sluigi * track of the current position of the disk with last_offset.
230188571Sluigi * See above for details.
231103683Sphk */
232103683Sphkvoid
233188571Sluigibioq_disksort(struct bio_queue_head *head, struct bio *bp)
234103683Sphk{
235212160Sgibbs	struct bio *cur, *prev;
236212160Sgibbs	uoff_t key;
237103683Sphk
238212160Sgibbs	if ((bp->bio_flags & BIO_ORDERED) != 0) {
239212160Sgibbs		/*
240212160Sgibbs		 * Ordered transactions can only be dispatched
241212160Sgibbs		 * after any currently queued transactions.  They
242212160Sgibbs		 * also have barrier semantics - no transactions
243212160Sgibbs		 * queued in the future can pass them.
244212160Sgibbs		 */
245212160Sgibbs		bioq_insert_tail(head, bp);
246212160Sgibbs		return;
247212160Sgibbs	}
248212160Sgibbs
249344072Smav	/*
250344072Smav	 * We should only sort requests of types that have concept of offset.
251344072Smav	 * Other types, such as BIO_FLUSH or BIO_ZONE, may imply some degree
252344072Smav	 * of ordering even if strict ordering is not requested explicitly.
253344072Smav	 */
254344072Smav	if (bp->bio_cmd != BIO_READ && bp->bio_cmd != BIO_WRITE &&
255344072Smav	    bp->bio_cmd != BIO_DELETE) {
256344072Smav		bioq_insert_tail(head, bp);
257344072Smav		return;
258344072Smav	}
259344072Smav
260212160Sgibbs	prev = NULL;
261212160Sgibbs	key = bioq_bio_key(head, bp);
262188571Sluigi	cur = TAILQ_FIRST(&head->queue);
263188571Sluigi
264212160Sgibbs	if (head->insert_point) {
265212160Sgibbs		prev = head->insert_point;
266212160Sgibbs		cur = TAILQ_NEXT(head->insert_point, bio_queue);
267212160Sgibbs	}
268188571Sluigi
269188571Sluigi	while (cur != NULL && key >= bioq_bio_key(head, cur)) {
270188571Sluigi		prev = cur;
271188571Sluigi		cur = TAILQ_NEXT(cur, bio_queue);
272103683Sphk	}
273188571Sluigi
274188571Sluigi	if (prev == NULL)
275188571Sluigi		TAILQ_INSERT_HEAD(&head->queue, bp, bio_queue);
276188571Sluigi	else
277188571Sluigi		TAILQ_INSERT_AFTER(&head->queue, prev, bp, bio_queue);
278103683Sphk}
279