1139804Simp/*- 250565Sphk * ---------------------------------------------------------------------------- 350565Sphk * "THE BEER-WARE LICENSE" (Revision 42): 450565Sphk * <phk@FreeBSD.ORG> wrote this file. As long as you retain this notice you 550565Sphk * can do whatever you want with this stuff. If we meet some day, and you think 650565Sphk * this stuff is worth it, you can buy me a beer in return. Poul-Henning Kamp 750565Sphk * ---------------------------------------------------------------------------- 8188571Sluigi * 9188571Sluigi * The bioq_disksort() (and the specification of the bioq API) 10188571Sluigi * have been written by Luigi Rizzo and Fabio Checconi under the same 11188571Sluigi * license as above. 1250565Sphk */ 1350565Sphk 14116182Sobrien#include <sys/cdefs.h> 15116182Sobrien__FBSDID("$FreeBSD: stable/11/sys/kern/subr_disk.c 344072 2019-02-13 00:35:09Z mav $"); 16116182Sobrien 1792074Sphk#include "opt_geom.h" 1892074Sphk 1950565Sphk#include <sys/param.h> 2050565Sphk#include <sys/systm.h> 2160041Sphk#include <sys/bio.h> 2250565Sphk#include <sys/conf.h> 2350565Sphk#include <sys/disk.h> 24112953Sphk#include <geom/geom_disk.h> 2564880Sphk 26210226Strasz/*- 27103675Sphk * Disk error is the preface to plaintive error messages 28103675Sphk * about failing disk transfers. It prints messages of the form 29103675Sphk * "hp0g: BLABLABLA cmd=read fsbn 12345 of 12344-12347" 30103675Sphk * blkdone should be -1 if the position of the error is unknown. 31103675Sphk * The message is printed with printf. 32103675Sphk */ 33103675Sphkvoid 34103675Sphkdisk_err(struct bio *bp, const char *what, int blkdone, int nl) 35103675Sphk{ 36103675Sphk daddr_t sn; 37103675Sphk 38111808Sphk if (bp->bio_dev != NULL) 39111808Sphk printf("%s: %s ", devtoname(bp->bio_dev), what); 40111808Sphk else if (bp->bio_disk != NULL) 41111808Sphk printf("%s%d: %s ", 42111808Sphk bp->bio_disk->d_name, bp->bio_disk->d_unit, what); 43111808Sphk else 44111808Sphk printf("disk??: %s ", what); 45103675Sphk switch(bp->bio_cmd) { 46105365Ssobomax case BIO_READ: printf("cmd=read "); break; 47105365Ssobomax case BIO_WRITE: printf("cmd=write "); break; 48105365Ssobomax case BIO_DELETE: printf("cmd=delete "); break; 49105365Ssobomax case BIO_GETATTR: printf("cmd=getattr "); break; 50163832Spjd case BIO_FLUSH: printf("cmd=flush "); break; 51105365Ssobomax default: printf("cmd=%x ", bp->bio_cmd); break; 52103675Sphk } 53121210Sphk sn = bp->bio_pblkno; 54103675Sphk if (bp->bio_bcount <= DEV_BSIZE) { 55103675Sphk printf("fsbn %jd%s", (intmax_t)sn, nl ? "\n" : ""); 56103675Sphk return; 57103675Sphk } 58103675Sphk if (blkdone >= 0) { 59103675Sphk sn += blkdone; 60103675Sphk printf("fsbn %jd of ", (intmax_t)sn); 61103675Sphk } 62121210Sphk printf("%jd-%jd", (intmax_t)bp->bio_pblkno, 63121210Sphk (intmax_t)(bp->bio_pblkno + (bp->bio_bcount - 1) / DEV_BSIZE)); 64103675Sphk if (nl) 65103675Sphk printf("\n"); 66103675Sphk} 67103683Sphk 68103683Sphk/* 69112846Sphk * BIO queue implementation 70188571Sluigi * 71188571Sluigi * Please read carefully the description below before making any change 72188571Sluigi * to the code, or you might change the behaviour of the data structure 73188571Sluigi * in undesirable ways. 74188571Sluigi * 75188571Sluigi * A bioq stores disk I/O request (bio), normally sorted according to 76188571Sluigi * the distance of the requested position (bio->bio_offset) from the 77188571Sluigi * current head position (bioq->last_offset) in the scan direction, i.e. 78188571Sluigi * 79188571Sluigi * (uoff_t)(bio_offset - last_offset) 80188571Sluigi * 81188571Sluigi * Note that the cast to unsigned (uoff_t) is fundamental to insure 82188571Sluigi * that the distance is computed in the scan direction. 83188571Sluigi * 84188571Sluigi * The main methods for manipulating the bioq are: 85188571Sluigi * 86188571Sluigi * bioq_disksort() performs an ordered insertion; 87188571Sluigi * 88188571Sluigi * bioq_first() return the head of the queue, without removing; 89188571Sluigi * 90188571Sluigi * bioq_takefirst() return and remove the head of the queue, 91188571Sluigi * updating the 'current head position' as 92188571Sluigi * bioq->last_offset = bio->bio_offset + bio->bio_length; 93188571Sluigi * 94188571Sluigi * When updating the 'current head position', we assume that the result of 95188571Sluigi * bioq_takefirst() is dispatched to the device, so bioq->last_offset 96188571Sluigi * represents the head position once the request is complete. 97188571Sluigi * 98188571Sluigi * If the bioq is manipulated using only the above calls, it starts 99188571Sluigi * with a sorted sequence of requests with bio_offset >= last_offset, 100188571Sluigi * possibly followed by another sorted sequence of requests with 101188571Sluigi * 0 <= bio_offset < bioq->last_offset 102188571Sluigi * 103188571Sluigi * NOTE: historical behaviour was to ignore bio->bio_length in the 104188571Sluigi * update, but its use tracks the head position in a better way. 105188571Sluigi * Historical behaviour was also to update the head position when 106188571Sluigi * the request under service is complete, rather than when the 107188571Sluigi * request is extracted from the queue. However, the current API 108188571Sluigi * has no method to update the head position; secondly, once 109188571Sluigi * a request has been submitted to the disk, we have no idea of 110188571Sluigi * the actual head position, so the final one is our best guess. 111188571Sluigi * 112188571Sluigi * --- Direct queue manipulation --- 113188571Sluigi * 114188571Sluigi * A bioq uses an underlying TAILQ to store requests, so we also 115188571Sluigi * export methods to manipulate the TAILQ, in particular: 116188571Sluigi * 117188571Sluigi * bioq_insert_tail() insert an entry at the end. 118188571Sluigi * It also creates a 'barrier' so all subsequent 119188571Sluigi * insertions through bioq_disksort() will end up 120188571Sluigi * after this entry; 121188571Sluigi * 122188571Sluigi * bioq_insert_head() insert an entry at the head, update 123188571Sluigi * bioq->last_offset = bio->bio_offset so that 124188571Sluigi * all subsequent insertions through bioq_disksort() 125188571Sluigi * will end up after this entry; 126188571Sluigi * 127188571Sluigi * bioq_remove() remove a generic element from the queue, act as 128188571Sluigi * bioq_takefirst() if invoked on the head of the queue. 129188571Sluigi * 130212160Sgibbs * The semantic of these methods is the same as the operations 131188571Sluigi * on the underlying TAILQ, but with additional guarantees on 132188571Sluigi * subsequent bioq_disksort() calls. E.g. bioq_insert_tail() 133188571Sluigi * can be useful for making sure that all previous ops are flushed 134188571Sluigi * to disk before continuing. 135188571Sluigi * 136188571Sluigi * Updating bioq->last_offset on a bioq_insert_head() guarantees 137188571Sluigi * that the bio inserted with the last bioq_insert_head() will stay 138188571Sluigi * at the head of the queue even after subsequent bioq_disksort(). 139188571Sluigi * 140188571Sluigi * Note that when the direct queue manipulation functions are used, 141188571Sluigi * the queue may contain multiple inversion points (i.e. more than 142188571Sluigi * two sorted sequences of requests). 143188571Sluigi * 144112846Sphk */ 145112846Sphk 146112846Sphkvoid 147112846Sphkbioq_init(struct bio_queue_head *head) 148112846Sphk{ 149188571Sluigi 150112846Sphk TAILQ_INIT(&head->queue); 151121207Sphk head->last_offset = 0; 152112846Sphk head->insert_point = NULL; 153112846Sphk} 154112846Sphk 155112846Sphkvoid 156112846Sphkbioq_remove(struct bio_queue_head *head, struct bio *bp) 157112846Sphk{ 158188571Sluigi 159212160Sgibbs if (head->insert_point == NULL) { 160212160Sgibbs if (bp == TAILQ_FIRST(&head->queue)) 161212160Sgibbs head->last_offset = bp->bio_offset + bp->bio_length; 162212160Sgibbs } else if (bp == head->insert_point) 163188571Sluigi head->insert_point = NULL; 164188571Sluigi 165112846Sphk TAILQ_REMOVE(&head->queue, bp, bio_queue); 166112846Sphk} 167112941Sphk 168112846Sphkvoid 169112941Sphkbioq_flush(struct bio_queue_head *head, struct devstat *stp, int error) 170112941Sphk{ 171112941Sphk struct bio *bp; 172112941Sphk 173134038Sphk while ((bp = bioq_takefirst(head)) != NULL) 174121083Sphk biofinish(bp, stp, error); 175112941Sphk} 176112941Sphk 177112941Sphkvoid 178138800Spjdbioq_insert_head(struct bio_queue_head *head, struct bio *bp) 179138800Spjd{ 180138800Spjd 181212160Sgibbs if (head->insert_point == NULL) 182212160Sgibbs head->last_offset = bp->bio_offset; 183138800Spjd TAILQ_INSERT_HEAD(&head->queue, bp, bio_queue); 184138800Spjd} 185138800Spjd 186138800Spjdvoid 187112846Sphkbioq_insert_tail(struct bio_queue_head *head, struct bio *bp) 188112846Sphk{ 189112846Sphk 190112846Sphk TAILQ_INSERT_TAIL(&head->queue, bp, bio_queue); 191188571Sluigi head->insert_point = bp; 192212160Sgibbs head->last_offset = bp->bio_offset; 193112846Sphk} 194112846Sphk 195112846Sphkstruct bio * 196112846Sphkbioq_first(struct bio_queue_head *head) 197112846Sphk{ 198112846Sphk 199112846Sphk return (TAILQ_FIRST(&head->queue)); 200112846Sphk} 201112846Sphk 202134038Sphkstruct bio * 203134038Sphkbioq_takefirst(struct bio_queue_head *head) 204134038Sphk{ 205134038Sphk struct bio *bp; 206112846Sphk 207134038Sphk bp = TAILQ_FIRST(&head->queue); 208134038Sphk if (bp != NULL) 209134038Sphk bioq_remove(head, bp); 210134038Sphk return (bp); 211134038Sphk} 212134038Sphk 213112846Sphk/* 214188571Sluigi * Compute the sorting key. The cast to unsigned is 215188571Sluigi * fundamental for correctness, see the description 216188571Sluigi * near the beginning of the file. 217188571Sluigi */ 218188571Sluigistatic inline uoff_t 219188571Sluigibioq_bio_key(struct bio_queue_head *head, struct bio *bp) 220188571Sluigi{ 221188571Sluigi 222188571Sluigi return ((uoff_t)(bp->bio_offset - head->last_offset)); 223188571Sluigi} 224188571Sluigi 225188571Sluigi/* 226103683Sphk * Seek sort for disks. 227103683Sphk * 228188571Sluigi * Sort all requests in a single queue while keeping 229188571Sluigi * track of the current position of the disk with last_offset. 230188571Sluigi * See above for details. 231103683Sphk */ 232103683Sphkvoid 233188571Sluigibioq_disksort(struct bio_queue_head *head, struct bio *bp) 234103683Sphk{ 235212160Sgibbs struct bio *cur, *prev; 236212160Sgibbs uoff_t key; 237103683Sphk 238212160Sgibbs if ((bp->bio_flags & BIO_ORDERED) != 0) { 239212160Sgibbs /* 240212160Sgibbs * Ordered transactions can only be dispatched 241212160Sgibbs * after any currently queued transactions. They 242212160Sgibbs * also have barrier semantics - no transactions 243212160Sgibbs * queued in the future can pass them. 244212160Sgibbs */ 245212160Sgibbs bioq_insert_tail(head, bp); 246212160Sgibbs return; 247212160Sgibbs } 248212160Sgibbs 249344072Smav /* 250344072Smav * We should only sort requests of types that have concept of offset. 251344072Smav * Other types, such as BIO_FLUSH or BIO_ZONE, may imply some degree 252344072Smav * of ordering even if strict ordering is not requested explicitly. 253344072Smav */ 254344072Smav if (bp->bio_cmd != BIO_READ && bp->bio_cmd != BIO_WRITE && 255344072Smav bp->bio_cmd != BIO_DELETE) { 256344072Smav bioq_insert_tail(head, bp); 257344072Smav return; 258344072Smav } 259344072Smav 260212160Sgibbs prev = NULL; 261212160Sgibbs key = bioq_bio_key(head, bp); 262188571Sluigi cur = TAILQ_FIRST(&head->queue); 263188571Sluigi 264212160Sgibbs if (head->insert_point) { 265212160Sgibbs prev = head->insert_point; 266212160Sgibbs cur = TAILQ_NEXT(head->insert_point, bio_queue); 267212160Sgibbs } 268188571Sluigi 269188571Sluigi while (cur != NULL && key >= bioq_bio_key(head, cur)) { 270188571Sluigi prev = cur; 271188571Sluigi cur = TAILQ_NEXT(cur, bio_queue); 272103683Sphk } 273188571Sluigi 274188571Sluigi if (prev == NULL) 275188571Sluigi TAILQ_INSERT_HEAD(&head->queue, bp, bio_queue); 276188571Sluigi else 277188571Sluigi TAILQ_INSERT_AFTER(&head->queue, prev, bp, bio_queue); 278103683Sphk} 279