1224135Sdim/*- 2224135Sdim * ---------------------------------------------------------------------------- 3224135Sdim * "THE BEER-WARE LICENSE" (Revision 42): 4224135Sdim * <phk@FreeBSD.ORG> wrote this file. As long as you retain this notice you 5224135Sdim * can do whatever you want with this stuff. If we meet some day, and you think 6224135Sdim * this stuff is worth it, you can buy me a beer in return. Poul-Henning Kamp 7224135Sdim * ---------------------------------------------------------------------------- 8224135Sdim * 9224135Sdim * The bioq_disksort() (and the specification of the bioq API) 10224135Sdim * have been written by Luigi Rizzo and Fabio Checconi under the same 11249423Sdim * license as above. 12249423Sdim */ 13243830Sdim 14249423Sdim#include <sys/cdefs.h> 15224135Sdim__FBSDID("$FreeBSD$"); 16224135Sdim 17224135Sdim//#include "opt_geom.h" 18224135Sdim 19224135Sdim#include <sys/param.h> 20224135Sdim#include <sys/systm.h> 21224135Sdim#include <sys/bio.h> 22224135Sdim#include <sys/conf.h> 23224135Sdim#include <sys/disk.h> 24224135Sdim#include <geom/geom_disk.h> 25224135Sdim#include "g_sched.h" 26224135Sdim 27224135Sdim/* 28224135Sdim * BIO queue implementation 29224135Sdim * 30224135Sdim * Please read carefully the description below before making any change 31226633Sdim * to the code, or you might change the behaviour of the data structure 32224135Sdim * in undesirable ways. 33224135Sdim * 34224135Sdim * A bioq stores disk I/O request (bio), normally sorted according to 35224135Sdim * the distance of the requested position (bio->bio_offset) from the 36224135Sdim * current head position (bioq->last_offset) in the scan direction, i.e. 37224135Sdim * 38224135Sdim * (uoff_t)(bio_offset - last_offset) 39224135Sdim * 40224135Sdim * Note that the cast to unsigned (uoff_t) is fundamental to insure 41224135Sdim * that the distance is computed in the scan direction. 42224135Sdim * 43224135Sdim * The main methods for manipulating the bioq are: 44226633Sdim * 45224135Sdim * bioq_disksort() performs an ordered insertion; 46263508Sdim * 47263508Sdim * bioq_first() return the head of the queue, without removing; 48263508Sdim * 49224135Sdim * bioq_takefirst() return and remove the head of the queue, 50224135Sdim * updating the 'current head position' as 51226633Sdim * bioq->last_offset = bio->bio_offset + bio->bio_length; 52224135Sdim * 53234353Sdim * When updating the 'current head position', we assume that the result of 54234353Sdim * bioq_takefirst() is dispatched to the device, so bioq->last_offset 55234353Sdim * represents the head position once the request is complete. 56234353Sdim * 57234353Sdim * If the bioq is manipulated using only the above calls, it starts 58234353Sdim * with a sorted sequence of requests with bio_offset >= last_offset, 59224135Sdim * possibly followed by another sorted sequence of requests with 60224135Sdim * 0 <= bio_offset < bioq->last_offset 61234353Sdim * 62224135Sdim * NOTE: historical behaviour was to ignore bio->bio_length in the 63224135Sdim * update, but its use tracks the head position in a better way. 64224135Sdim * Historical behaviour was also to update the head position when 65224135Sdim * the request under service is complete, rather than when the 66224135Sdim * request is extracted from the queue. However, the current API 67224135Sdim * has no method to update the head position; secondly, once 68226633Sdim * a request has been submitted to the disk, we have no idea of 69234353Sdim * the actual head position, so the final one is our best guess. 70234353Sdim * 71226633Sdim * --- Direct queue manipulation --- 72226633Sdim * 73226633Sdim * A bioq uses an underlying TAILQ to store requests, so we also 74226633Sdim * export methods to manipulate the TAILQ, in particular: 75224135Sdim * 76226633Sdim * bioq_insert_tail() insert an entry at the end. 77226633Sdim * It also creates a 'barrier' so all subsequent 78226633Sdim * insertions through bioq_disksort() will end up 79239462Sdim * after this entry; 80239462Sdim * 81239462Sdim * bioq_insert_head() insert an entry at the head, update 82226633Sdim * bioq->last_offset = bio->bio_offset so that 83226633Sdim * all subsequent insertions through bioq_disksort() 84224135Sdim * will end up after this entry; 85224135Sdim * 86224135Sdim * bioq_remove() remove a generic element from the queue, act as 87224135Sdim * bioq_takefirst() if invoked on the head of the queue. 88226633Sdim * 89224135Sdim * The semantic of these methods is the same as the operations 90224135Sdim * on the underlying TAILQ, but with additional guarantees on 91224135Sdim * subsequent bioq_disksort() calls. E.g. bioq_insert_tail() 92224135Sdim * can be useful for making sure that all previous ops are flushed 93224135Sdim * to disk before continuing. 94226633Sdim * 95224135Sdim * Updating bioq->last_offset on a bioq_insert_head() guarantees 96224135Sdim * that the bio inserted with the last bioq_insert_head() will stay 97224135Sdim * at the head of the queue even after subsequent bioq_disksort(). 98224135Sdim * 99224135Sdim * Note that when the direct queue manipulation functions are used, 100226633Sdim * the queue may contain multiple inversion points (i.e. more than 101224135Sdim * two sorted sequences of requests). 102224135Sdim * 103224135Sdim */ 104224135Sdim 105224135Sdimvoid 106224135Sdimgs_bioq_init(struct bio_queue_head *head) 107224135Sdim{ 108224135Sdim 109224135Sdim TAILQ_INIT(&head->queue); 110224135Sdim head->last_offset = 0; 111224135Sdim head->insert_point = NULL; 112226633Sdim} 113224135Sdim 114224135Sdimvoid 115224135Sdimgs_bioq_remove(struct bio_queue_head *head, struct bio *bp) 116224135Sdim{ 117226633Sdim 118224135Sdim if (head->insert_point == NULL) { 119234353Sdim if (bp == TAILQ_FIRST(&head->queue)) 120234353Sdim head->last_offset = bp->bio_offset + bp->bio_length; 121234353Sdim } else if (bp == head->insert_point) 122234353Sdim head->insert_point = NULL; 123234353Sdim 124234353Sdim TAILQ_REMOVE(&head->queue, bp, bio_queue); 125234353Sdim} 126224135Sdim 127234353Sdimvoid 128224135Sdimgs_bioq_flush(struct bio_queue_head *head, struct devstat *stp, int error) 129263508Sdim{ 130224135Sdim struct bio *bp; 131224135Sdim 132224135Sdim while ((bp = gs_bioq_takefirst(head)) != NULL) 133224135Sdim biofinish(bp, stp, error); 134224135Sdim} 135224135Sdim 136224135Sdimvoid 137234353Sdimgs_bioq_insert_head(struct bio_queue_head *head, struct bio *bp) 138224135Sdim{ 139224135Sdim 140224135Sdim if (head->insert_point == NULL) 141224135Sdim head->last_offset = bp->bio_offset; 142224135Sdim TAILQ_INSERT_HEAD(&head->queue, bp, bio_queue); 143234353Sdim} 144224135Sdim 145224135Sdimvoid 146224135Sdimgs_bioq_insert_tail(struct bio_queue_head *head, struct bio *bp) 147224135Sdim{ 148234353Sdim 149224135Sdim TAILQ_INSERT_TAIL(&head->queue, bp, bio_queue); 150263508Sdim head->insert_point = bp; 151263508Sdim head->last_offset = bp->bio_offset; 152263508Sdim} 153226633Sdim 154224135Sdimstruct bio * 155224135Sdimgs_bioq_first(struct bio_queue_head *head) 156224135Sdim{ 157224135Sdim 158224135Sdim return (TAILQ_FIRST(&head->queue)); 159224135Sdim} 160224135Sdim 161224135Sdimstruct bio * 162224135Sdimgs_bioq_takefirst(struct bio_queue_head *head) 163224135Sdim{ 164224135Sdim struct bio *bp; 165224135Sdim 166224135Sdim bp = TAILQ_FIRST(&head->queue); 167224135Sdim if (bp != NULL) 168224135Sdim gs_bioq_remove(head, bp); 169224135Sdim return (bp); 170226633Sdim} 171226633Sdim 172224135Sdim/* 173224135Sdim * Compute the sorting key. The cast to unsigned is 174224135Sdim * fundamental for correctness, see the description 175224135Sdim * near the beginning of the file. 176224135Sdim */ 177263508Sdimstatic inline uoff_t 178263508Sdimgs_bioq_bio_key(struct bio_queue_head *head, struct bio *bp) 179263508Sdim{ 180263508Sdim 181263508Sdim return ((uoff_t)(bp->bio_offset - head->last_offset)); 182263508Sdim} 183224135Sdim 184263508Sdim/* 185263508Sdim * Seek sort for disks. 186263508Sdim * 187263508Sdim * Sort all requests in a single queue while keeping 188263508Sdim * track of the current position of the disk with last_offset. 189224135Sdim * See above for details. 190263508Sdim */ 191263508Sdimvoid 192263508Sdimgs_bioq_disksort(struct bio_queue_head *head, struct bio *bp) 193224135Sdim{ 194224135Sdim struct bio *cur, *prev; 195224135Sdim uoff_t key; 196224135Sdim 197224135Sdim if ((bp->bio_flags & BIO_ORDERED) != 0) { 198224135Sdim /* 199234353Sdim * Ordered transactions can only be dispatched 200224135Sdim * after any currently queued transactions. They 201224135Sdim * also have barrier semantics - no transactions 202224135Sdim * queued in the future can pass them. 203224135Sdim */ 204224135Sdim gs_bioq_insert_tail(head, bp); 205224135Sdim return; 206224135Sdim } 207224135Sdim 208224135Sdim prev = NULL; 209224135Sdim key = gs_bioq_bio_key(head, bp); 210224135Sdim cur = TAILQ_FIRST(&head->queue); 211224135Sdim 212224135Sdim if (head->insert_point) { 213234353Sdim prev = head->insert_point; 214224135Sdim cur = TAILQ_NEXT(head->insert_point, bio_queue); 215224135Sdim } 216224135Sdim 217224135Sdim while (cur != NULL && key >= gs_bioq_bio_key(head, cur)) { 218224135Sdim prev = cur; 219224135Sdim cur = TAILQ_NEXT(cur, bio_queue); 220224135Sdim } 221224135Sdim 222224135Sdim if (prev == NULL) 223224135Sdim TAILQ_INSERT_HEAD(&head->queue, bp, bio_queue); 224224135Sdim else 225224135Sdim TAILQ_INSERT_AFTER(&head->queue, prev, bp, bio_queue); 226224135Sdim} 227224135Sdim