1206497Sluigi/*- 2206497Sluigi * ---------------------------------------------------------------------------- 3206497Sluigi * "THE BEER-WARE LICENSE" (Revision 42): 4206497Sluigi * <phk@FreeBSD.ORG> wrote this file. As long as you retain this notice you 5206497Sluigi * can do whatever you want with this stuff. If we meet some day, and you think 6206497Sluigi * this stuff is worth it, you can buy me a beer in return. Poul-Henning Kamp 7206497Sluigi * ---------------------------------------------------------------------------- 8206497Sluigi * 9206497Sluigi * The bioq_disksort() (and the specification of the bioq API) 10206497Sluigi * have been written by Luigi Rizzo and Fabio Checconi under the same 11206497Sluigi * license as above. 12206497Sluigi */ 13206497Sluigi 14206497Sluigi#include <sys/cdefs.h> 15206497Sluigi__FBSDID("$FreeBSD: releng/10.3/sys/geom/sched/subr_disk.c 212160 2010-09-02 19:40:28Z gibbs $"); 16206497Sluigi 17206497Sluigi//#include "opt_geom.h" 18206497Sluigi 19206497Sluigi#include <sys/param.h> 20206497Sluigi#include <sys/systm.h> 21206497Sluigi#include <sys/bio.h> 22206497Sluigi#include <sys/conf.h> 23206497Sluigi#include <sys/disk.h> 24206497Sluigi#include <geom/geom_disk.h> 25206497Sluigi#include "g_sched.h" 26206497Sluigi 27206497Sluigi/* 28206497Sluigi * BIO queue implementation 29206497Sluigi * 30206497Sluigi * Please read carefully the description below before making any change 31206497Sluigi * to the code, or you might change the behaviour of the data structure 32206497Sluigi * in undesirable ways. 33206497Sluigi * 34206497Sluigi * A bioq stores disk I/O request (bio), normally sorted according to 35206497Sluigi * the distance of the requested position (bio->bio_offset) from the 36206497Sluigi * current head position (bioq->last_offset) in the scan direction, i.e. 37206497Sluigi * 38206497Sluigi * (uoff_t)(bio_offset - last_offset) 39206497Sluigi * 40206497Sluigi * Note that the cast to unsigned (uoff_t) is fundamental to insure 41206497Sluigi * that the distance is computed in the scan direction. 42206497Sluigi * 43206497Sluigi * The main methods for manipulating the bioq are: 44206497Sluigi * 45206497Sluigi * bioq_disksort() performs an ordered insertion; 46206497Sluigi * 47206497Sluigi * bioq_first() return the head of the queue, without removing; 48206497Sluigi * 49206497Sluigi * bioq_takefirst() return and remove the head of the queue, 50206497Sluigi * updating the 'current head position' as 51206497Sluigi * bioq->last_offset = bio->bio_offset + bio->bio_length; 52206497Sluigi * 53206497Sluigi * When updating the 'current head position', we assume that the result of 54206497Sluigi * bioq_takefirst() is dispatched to the device, so bioq->last_offset 55206497Sluigi * represents the head position once the request is complete. 56206497Sluigi * 57206497Sluigi * If the bioq is manipulated using only the above calls, it starts 58206497Sluigi * with a sorted sequence of requests with bio_offset >= last_offset, 59206497Sluigi * possibly followed by another sorted sequence of requests with 60206497Sluigi * 0 <= bio_offset < bioq->last_offset 61206497Sluigi * 62206497Sluigi * NOTE: historical behaviour was to ignore bio->bio_length in the 63206497Sluigi * update, but its use tracks the head position in a better way. 64206497Sluigi * Historical behaviour was also to update the head position when 65206497Sluigi * the request under service is complete, rather than when the 66206497Sluigi * request is extracted from the queue. However, the current API 67206497Sluigi * has no method to update the head position; secondly, once 68206497Sluigi * a request has been submitted to the disk, we have no idea of 69206497Sluigi * the actual head position, so the final one is our best guess. 70206497Sluigi * 71206497Sluigi * --- Direct queue manipulation --- 72206497Sluigi * 73206497Sluigi * A bioq uses an underlying TAILQ to store requests, so we also 74206497Sluigi * export methods to manipulate the TAILQ, in particular: 75206497Sluigi * 76206497Sluigi * bioq_insert_tail() insert an entry at the end. 77206497Sluigi * It also creates a 'barrier' so all subsequent 78206497Sluigi * insertions through bioq_disksort() will end up 79206497Sluigi * after this entry; 80206497Sluigi * 81206497Sluigi * bioq_insert_head() insert an entry at the head, update 82206497Sluigi * bioq->last_offset = bio->bio_offset so that 83206497Sluigi * all subsequent insertions through bioq_disksort() 84206497Sluigi * will end up after this entry; 85206497Sluigi * 86206497Sluigi * bioq_remove() remove a generic element from the queue, act as 87206497Sluigi * bioq_takefirst() if invoked on the head of the queue. 88206497Sluigi * 89212160Sgibbs * The semantic of these methods is the same as the operations 90206497Sluigi * on the underlying TAILQ, but with additional guarantees on 91206497Sluigi * subsequent bioq_disksort() calls. E.g. bioq_insert_tail() 92206497Sluigi * can be useful for making sure that all previous ops are flushed 93206497Sluigi * to disk before continuing. 94206497Sluigi * 95206497Sluigi * Updating bioq->last_offset on a bioq_insert_head() guarantees 96206497Sluigi * that the bio inserted with the last bioq_insert_head() will stay 97206497Sluigi * at the head of the queue even after subsequent bioq_disksort(). 98206497Sluigi * 99206497Sluigi * Note that when the direct queue manipulation functions are used, 100206497Sluigi * the queue may contain multiple inversion points (i.e. more than 101206497Sluigi * two sorted sequences of requests). 102206497Sluigi * 103206497Sluigi */ 104206497Sluigi 105206497Sluigivoid 106206497Sluigigs_bioq_init(struct bio_queue_head *head) 107206497Sluigi{ 108206497Sluigi 109206497Sluigi TAILQ_INIT(&head->queue); 110206497Sluigi head->last_offset = 0; 111206497Sluigi head->insert_point = NULL; 112206497Sluigi} 113206497Sluigi 114206497Sluigivoid 115206497Sluigigs_bioq_remove(struct bio_queue_head *head, struct bio *bp) 116206497Sluigi{ 117206497Sluigi 118212160Sgibbs if (head->insert_point == NULL) { 119212160Sgibbs if (bp == TAILQ_FIRST(&head->queue)) 120212160Sgibbs head->last_offset = bp->bio_offset + bp->bio_length; 121212160Sgibbs } else if (bp == head->insert_point) 122206497Sluigi head->insert_point = NULL; 123206497Sluigi 124206497Sluigi TAILQ_REMOVE(&head->queue, bp, bio_queue); 125206497Sluigi} 126206497Sluigi 127206497Sluigivoid 128206497Sluigigs_bioq_flush(struct bio_queue_head *head, struct devstat *stp, int error) 129206497Sluigi{ 130206497Sluigi struct bio *bp; 131206497Sluigi 132206497Sluigi while ((bp = gs_bioq_takefirst(head)) != NULL) 133206497Sluigi biofinish(bp, stp, error); 134206497Sluigi} 135206497Sluigi 136206497Sluigivoid 137206497Sluigigs_bioq_insert_head(struct bio_queue_head *head, struct bio *bp) 138206497Sluigi{ 139206497Sluigi 140212160Sgibbs if (head->insert_point == NULL) 141212160Sgibbs head->last_offset = bp->bio_offset; 142206497Sluigi TAILQ_INSERT_HEAD(&head->queue, bp, bio_queue); 143206497Sluigi} 144206497Sluigi 145206497Sluigivoid 146206497Sluigigs_bioq_insert_tail(struct bio_queue_head *head, struct bio *bp) 147206497Sluigi{ 148206497Sluigi 149206497Sluigi TAILQ_INSERT_TAIL(&head->queue, bp, bio_queue); 150206497Sluigi head->insert_point = bp; 151212160Sgibbs head->last_offset = bp->bio_offset; 152206497Sluigi} 153206497Sluigi 154206497Sluigistruct bio * 155206497Sluigigs_bioq_first(struct bio_queue_head *head) 156206497Sluigi{ 157206497Sluigi 158206497Sluigi return (TAILQ_FIRST(&head->queue)); 159206497Sluigi} 160206497Sluigi 161206497Sluigistruct bio * 162206497Sluigigs_bioq_takefirst(struct bio_queue_head *head) 163206497Sluigi{ 164206497Sluigi struct bio *bp; 165206497Sluigi 166206497Sluigi bp = TAILQ_FIRST(&head->queue); 167206497Sluigi if (bp != NULL) 168206497Sluigi gs_bioq_remove(head, bp); 169206497Sluigi return (bp); 170206497Sluigi} 171206497Sluigi 172206497Sluigi/* 173206497Sluigi * Compute the sorting key. The cast to unsigned is 174206497Sluigi * fundamental for correctness, see the description 175206497Sluigi * near the beginning of the file. 176206497Sluigi */ 177206497Sluigistatic inline uoff_t 178206497Sluigigs_bioq_bio_key(struct bio_queue_head *head, struct bio *bp) 179206497Sluigi{ 180206497Sluigi 181206497Sluigi return ((uoff_t)(bp->bio_offset - head->last_offset)); 182206497Sluigi} 183206497Sluigi 184206497Sluigi/* 185206497Sluigi * Seek sort for disks. 186206497Sluigi * 187206497Sluigi * Sort all requests in a single queue while keeping 188206497Sluigi * track of the current position of the disk with last_offset. 189206497Sluigi * See above for details. 190206497Sluigi */ 191206497Sluigivoid 192206497Sluigigs_bioq_disksort(struct bio_queue_head *head, struct bio *bp) 193206497Sluigi{ 194212160Sgibbs struct bio *cur, *prev; 195212160Sgibbs uoff_t key; 196206497Sluigi 197212160Sgibbs if ((bp->bio_flags & BIO_ORDERED) != 0) { 198212160Sgibbs /* 199212160Sgibbs * Ordered transactions can only be dispatched 200212160Sgibbs * after any currently queued transactions. They 201212160Sgibbs * also have barrier semantics - no transactions 202212160Sgibbs * queued in the future can pass them. 203212160Sgibbs */ 204212160Sgibbs gs_bioq_insert_tail(head, bp); 205212160Sgibbs return; 206212160Sgibbs } 207212160Sgibbs 208212160Sgibbs prev = NULL; 209212160Sgibbs key = gs_bioq_bio_key(head, bp); 210206497Sluigi cur = TAILQ_FIRST(&head->queue); 211206497Sluigi 212212160Sgibbs if (head->insert_point) { 213212160Sgibbs prev = head->insert_point; 214212160Sgibbs cur = TAILQ_NEXT(head->insert_point, bio_queue); 215212160Sgibbs } 216206497Sluigi 217206497Sluigi while (cur != NULL && key >= gs_bioq_bio_key(head, cur)) { 218206497Sluigi prev = cur; 219206497Sluigi cur = TAILQ_NEXT(cur, bio_queue); 220206497Sluigi } 221206497Sluigi 222206497Sluigi if (prev == NULL) 223206497Sluigi TAILQ_INSERT_HEAD(&head->queue, bp, bio_queue); 224206497Sluigi else 225206497Sluigi TAILQ_INSERT_AFTER(&head->queue, prev, bp, bio_queue); 226206497Sluigi} 227