1/*
2 *  Deadline i/o scheduler.
3 *
4 *  Copyright (C) 2002 Jens Axboe <axboe@kernel.dk>
5 */
6#include <linux/kernel.h>
7#include <linux/fs.h>
8#include <linux/blkdev.h>
9#include <linux/elevator.h>
10#include <linux/bio.h>
11#include <linux/module.h>
12#include <linux/slab.h>
13#include <linux/init.h>
14#include <linux/compiler.h>
15#include <linux/rbtree.h>
16
17/*
18 * See Documentation/block/deadline-iosched.txt
19 */
20static const int read_expire = HZ / 2;  /* max time before a read is submitted. */
21static const int write_expire = 5 * HZ; /* ditto for writes, these limits are SOFT! */
22static const int writes_starved = 2;    /* max times reads can starve a write */
23static const int fifo_batch = 16;       /* # of sequential requests treated as one
24				     by the above parameters. For throughput. */
25
26struct deadline_data {
27	/*
28	 * run time data
29	 */
30
31	/*
32	 * requests (deadline_rq s) are present on both sort_list and fifo_list
33	 */
34	struct rb_root sort_list[2];
35	struct list_head fifo_list[2];
36
37	/*
38	 * next in sort order. read, write or both are NULL
39	 */
40	struct request *next_rq[2];
41	unsigned int batching;		/* number of sequential requests made */
42	sector_t last_sector;		/* head position */
43	unsigned int starved;		/* times reads have starved writes */
44
45	/*
46	 * settings that change how the i/o scheduler behaves
47	 */
48	int fifo_expire[2];
49	int fifo_batch;
50	int writes_starved;
51	int front_merges;
52};
53
54static void deadline_move_request(struct deadline_data *, struct request *);
55
56#define RQ_RB_ROOT(dd, rq)	(&(dd)->sort_list[rq_data_dir((rq))])
57
58static void
59deadline_add_rq_rb(struct deadline_data *dd, struct request *rq)
60{
61	struct rb_root *root = RQ_RB_ROOT(dd, rq);
62	struct request *__alias;
63
64retry:
65	__alias = elv_rb_add(root, rq);
66	if (unlikely(__alias)) {
67		deadline_move_request(dd, __alias);
68		goto retry;
69	}
70}
71
72static inline void
73deadline_del_rq_rb(struct deadline_data *dd, struct request *rq)
74{
75	const int data_dir = rq_data_dir(rq);
76
77	if (dd->next_rq[data_dir] == rq) {
78		struct rb_node *rbnext = rb_next(&rq->rb_node);
79
80		dd->next_rq[data_dir] = NULL;
81		if (rbnext)
82			dd->next_rq[data_dir] = rb_entry_rq(rbnext);
83	}
84
85	elv_rb_del(RQ_RB_ROOT(dd, rq), rq);
86}
87
88/*
89 * add rq to rbtree and fifo
90 */
91static void
92deadline_add_request(struct request_queue *q, struct request *rq)
93{
94	struct deadline_data *dd = q->elevator->elevator_data;
95	const int data_dir = rq_data_dir(rq);
96
97	deadline_add_rq_rb(dd, rq);
98
99	/*
100	 * set expire time (only used for reads) and add to fifo list
101	 */
102	rq_set_fifo_time(rq, jiffies + dd->fifo_expire[data_dir]);
103	list_add_tail(&rq->queuelist, &dd->fifo_list[data_dir]);
104}
105
106/*
107 * remove rq from rbtree and fifo.
108 */
109static void deadline_remove_request(request_queue_t *q, struct request *rq)
110{
111	struct deadline_data *dd = q->elevator->elevator_data;
112
113	rq_fifo_clear(rq);
114	deadline_del_rq_rb(dd, rq);
115}
116
117static int
118deadline_merge(request_queue_t *q, struct request **req, struct bio *bio)
119{
120	struct deadline_data *dd = q->elevator->elevator_data;
121	struct request *__rq;
122	int ret;
123
124	/*
125	 * check for front merge
126	 */
127	if (dd->front_merges) {
128		sector_t sector = bio->bi_sector + bio_sectors(bio);
129
130		__rq = elv_rb_find(&dd->sort_list[bio_data_dir(bio)], sector);
131		if (__rq) {
132			BUG_ON(sector != __rq->sector);
133
134			if (elv_rq_merge_ok(__rq, bio)) {
135				ret = ELEVATOR_FRONT_MERGE;
136				goto out;
137			}
138		}
139	}
140
141	return ELEVATOR_NO_MERGE;
142out:
143	*req = __rq;
144	return ret;
145}
146
147static void deadline_merged_request(request_queue_t *q, struct request *req,
148				    int type)
149{
150	struct deadline_data *dd = q->elevator->elevator_data;
151
152	/*
153	 * if the merge was a front merge, we need to reposition request
154	 */
155	if (type == ELEVATOR_FRONT_MERGE) {
156		elv_rb_del(RQ_RB_ROOT(dd, req), req);
157		deadline_add_rq_rb(dd, req);
158	}
159}
160
161static void
162deadline_merged_requests(request_queue_t *q, struct request *req,
163			 struct request *next)
164{
165	/*
166	 * if next expires before rq, assign its expire time to rq
167	 * and move into next position (next will be deleted) in fifo
168	 */
169	if (!list_empty(&req->queuelist) && !list_empty(&next->queuelist)) {
170		if (time_before(rq_fifo_time(next), rq_fifo_time(req))) {
171			list_move(&req->queuelist, &next->queuelist);
172			rq_set_fifo_time(req, rq_fifo_time(next));
173		}
174	}
175
176	/*
177	 * kill knowledge of next, this one is a goner
178	 */
179	deadline_remove_request(q, next);
180}
181
182/*
183 * move request from sort list to dispatch queue.
184 */
185static inline void
186deadline_move_to_dispatch(struct deadline_data *dd, struct request *rq)
187{
188	request_queue_t *q = rq->q;
189
190	deadline_remove_request(q, rq);
191	elv_dispatch_add_tail(q, rq);
192}
193
194/*
195 * move an entry to dispatch queue
196 */
197static void
198deadline_move_request(struct deadline_data *dd, struct request *rq)
199{
200	const int data_dir = rq_data_dir(rq);
201	struct rb_node *rbnext = rb_next(&rq->rb_node);
202
203	dd->next_rq[READ] = NULL;
204	dd->next_rq[WRITE] = NULL;
205
206	if (rbnext)
207		dd->next_rq[data_dir] = rb_entry_rq(rbnext);
208
209	dd->last_sector = rq->sector + rq->nr_sectors;
210
211	/*
212	 * take it off the sort and fifo list, move
213	 * to dispatch queue
214	 */
215	deadline_move_to_dispatch(dd, rq);
216}
217
218/*
219 * deadline_check_fifo returns 0 if there are no expired reads on the fifo,
220 * 1 otherwise. Requires !list_empty(&dd->fifo_list[data_dir])
221 */
222static inline int deadline_check_fifo(struct deadline_data *dd, int ddir)
223{
224	struct request *rq = rq_entry_fifo(dd->fifo_list[ddir].next);
225
226	/*
227	 * rq is expired!
228	 */
229	if (time_after(jiffies, rq_fifo_time(rq)))
230		return 1;
231
232	return 0;
233}
234
235/*
236 * deadline_dispatch_requests selects the best request according to
237 * read/write expire, fifo_batch, etc
238 */
239static int deadline_dispatch_requests(request_queue_t *q, int force)
240{
241	struct deadline_data *dd = q->elevator->elevator_data;
242	const int reads = !list_empty(&dd->fifo_list[READ]);
243	const int writes = !list_empty(&dd->fifo_list[WRITE]);
244	struct request *rq;
245	int data_dir;
246
247	/*
248	 * batches are currently reads XOR writes
249	 */
250	if (dd->next_rq[WRITE])
251		rq = dd->next_rq[WRITE];
252	else
253		rq = dd->next_rq[READ];
254
255	if (rq) {
256		/* we have a "next request" */
257
258		if (dd->last_sector != rq->sector)
259			/* end the batch on a non sequential request */
260			dd->batching += dd->fifo_batch;
261
262		if (dd->batching < dd->fifo_batch)
263			/* we are still entitled to batch */
264			goto dispatch_request;
265	}
266
267	/*
268	 * at this point we are not running a batch. select the appropriate
269	 * data direction (read / write)
270	 */
271
272	if (reads) {
273		BUG_ON(RB_EMPTY_ROOT(&dd->sort_list[READ]));
274
275		if (writes && (dd->starved++ >= dd->writes_starved))
276			goto dispatch_writes;
277
278		data_dir = READ;
279
280		goto dispatch_find_request;
281	}
282
283	/*
284	 * there are either no reads or writes have been starved
285	 */
286
287	if (writes) {
288dispatch_writes:
289		BUG_ON(RB_EMPTY_ROOT(&dd->sort_list[WRITE]));
290
291		dd->starved = 0;
292
293		data_dir = WRITE;
294
295		goto dispatch_find_request;
296	}
297
298	return 0;
299
300dispatch_find_request:
301	/*
302	 * we are not running a batch, find best request for selected data_dir
303	 */
304	if (deadline_check_fifo(dd, data_dir)) {
305		/* An expired request exists - satisfy it */
306		dd->batching = 0;
307		rq = rq_entry_fifo(dd->fifo_list[data_dir].next);
308
309	} else if (dd->next_rq[data_dir]) {
310		/*
311		 * The last req was the same dir and we have a next request in
312		 * sort order. No expired requests so continue on from here.
313		 */
314		rq = dd->next_rq[data_dir];
315	} else {
316		struct rb_node *node;
317		/*
318		 * The last req was the other direction or we have run out of
319		 * higher-sectored requests. Go back to the lowest sectored
320		 * request (1 way elevator) and start a new batch.
321		 */
322		dd->batching = 0;
323		node = rb_first(&dd->sort_list[data_dir]);
324		if (node)
325			rq = rb_entry_rq(node);
326	}
327
328dispatch_request:
329	/*
330	 * rq is the selected appropriate request.
331	 */
332	dd->batching++;
333	deadline_move_request(dd, rq);
334
335	return 1;
336}
337
338static int deadline_queue_empty(request_queue_t *q)
339{
340	struct deadline_data *dd = q->elevator->elevator_data;
341
342	return list_empty(&dd->fifo_list[WRITE])
343		&& list_empty(&dd->fifo_list[READ]);
344}
345
346static void deadline_exit_queue(elevator_t *e)
347{
348	struct deadline_data *dd = e->elevator_data;
349
350	BUG_ON(!list_empty(&dd->fifo_list[READ]));
351	BUG_ON(!list_empty(&dd->fifo_list[WRITE]));
352
353	kfree(dd);
354}
355
356/*
357 * initialize elevator private data (deadline_data).
358 */
359static void *deadline_init_queue(request_queue_t *q)
360{
361	struct deadline_data *dd;
362
363	dd = kmalloc_node(sizeof(*dd), GFP_KERNEL, q->node);
364	if (!dd)
365		return NULL;
366	memset(dd, 0, sizeof(*dd));
367
368	INIT_LIST_HEAD(&dd->fifo_list[READ]);
369	INIT_LIST_HEAD(&dd->fifo_list[WRITE]);
370	dd->sort_list[READ] = RB_ROOT;
371	dd->sort_list[WRITE] = RB_ROOT;
372	dd->fifo_expire[READ] = read_expire;
373	dd->fifo_expire[WRITE] = write_expire;
374	dd->writes_starved = writes_starved;
375	dd->front_merges = 1;
376	dd->fifo_batch = fifo_batch;
377	return dd;
378}
379
380/*
381 * sysfs parts below
382 */
383
384static ssize_t
385deadline_var_show(int var, char *page)
386{
387	return sprintf(page, "%d\n", var);
388}
389
390static ssize_t
391deadline_var_store(int *var, const char *page, size_t count)
392{
393	char *p = (char *) page;
394
395	*var = simple_strtol(p, &p, 10);
396	return count;
397}
398
399#define SHOW_FUNCTION(__FUNC, __VAR, __CONV)				\
400static ssize_t __FUNC(elevator_t *e, char *page)			\
401{									\
402	struct deadline_data *dd = e->elevator_data;			\
403	int __data = __VAR;						\
404	if (__CONV)							\
405		__data = jiffies_to_msecs(__data);			\
406	return deadline_var_show(__data, (page));			\
407}
408SHOW_FUNCTION(deadline_read_expire_show, dd->fifo_expire[READ], 1);
409SHOW_FUNCTION(deadline_write_expire_show, dd->fifo_expire[WRITE], 1);
410SHOW_FUNCTION(deadline_writes_starved_show, dd->writes_starved, 0);
411SHOW_FUNCTION(deadline_front_merges_show, dd->front_merges, 0);
412SHOW_FUNCTION(deadline_fifo_batch_show, dd->fifo_batch, 0);
413#undef SHOW_FUNCTION
414
415#define STORE_FUNCTION(__FUNC, __PTR, MIN, MAX, __CONV)			\
416static ssize_t __FUNC(elevator_t *e, const char *page, size_t count)	\
417{									\
418	struct deadline_data *dd = e->elevator_data;			\
419	int __data;							\
420	int ret = deadline_var_store(&__data, (page), count);		\
421	if (__data < (MIN))						\
422		__data = (MIN);						\
423	else if (__data > (MAX))					\
424		__data = (MAX);						\
425	if (__CONV)							\
426		*(__PTR) = msecs_to_jiffies(__data);			\
427	else								\
428		*(__PTR) = __data;					\
429	return ret;							\
430}
431STORE_FUNCTION(deadline_read_expire_store, &dd->fifo_expire[READ], 0, INT_MAX, 1);
432STORE_FUNCTION(deadline_write_expire_store, &dd->fifo_expire[WRITE], 0, INT_MAX, 1);
433STORE_FUNCTION(deadline_writes_starved_store, &dd->writes_starved, INT_MIN, INT_MAX, 0);
434STORE_FUNCTION(deadline_front_merges_store, &dd->front_merges, 0, 1, 0);
435STORE_FUNCTION(deadline_fifo_batch_store, &dd->fifo_batch, 0, INT_MAX, 0);
436#undef STORE_FUNCTION
437
438#define DD_ATTR(name) \
439	__ATTR(name, S_IRUGO|S_IWUSR, deadline_##name##_show, \
440				      deadline_##name##_store)
441
442static struct elv_fs_entry deadline_attrs[] = {
443	DD_ATTR(read_expire),
444	DD_ATTR(write_expire),
445	DD_ATTR(writes_starved),
446	DD_ATTR(front_merges),
447	DD_ATTR(fifo_batch),
448	__ATTR_NULL
449};
450
451static struct elevator_type iosched_deadline = {
452	.ops = {
453		.elevator_merge_fn = 		deadline_merge,
454		.elevator_merged_fn =		deadline_merged_request,
455		.elevator_merge_req_fn =	deadline_merged_requests,
456		.elevator_dispatch_fn =		deadline_dispatch_requests,
457		.elevator_add_req_fn =		deadline_add_request,
458		.elevator_queue_empty_fn =	deadline_queue_empty,
459		.elevator_former_req_fn =	elv_rb_former_request,
460		.elevator_latter_req_fn =	elv_rb_latter_request,
461		.elevator_init_fn =		deadline_init_queue,
462		.elevator_exit_fn =		deadline_exit_queue,
463	},
464
465	.elevator_attrs = deadline_attrs,
466	.elevator_name = "deadline",
467	.elevator_owner = THIS_MODULE,
468};
469
470static int __init deadline_init(void)
471{
472	return elv_register(&iosched_deadline);
473}
474
475static void __exit deadline_exit(void)
476{
477	elv_unregister(&iosched_deadline);
478}
479
480module_init(deadline_init);
481module_exit(deadline_exit);
482
483MODULE_AUTHOR("Jens Axboe");
484MODULE_LICENSE("GPL");
485MODULE_DESCRIPTION("deadline IO scheduler");
486