gs_scheduler.h revision 206552
119304Speter/*-
219304Speter * Copyright (c) 2009-2010 Fabio Checconi
319304Speter * Copyright (c) 2009-2010 Luigi Rizzo, Universita` di Pisa
419304Speter * All rights reserved.
519304Speter *
619304Speter * Redistribution and use in source and binary forms, with or without
719304Speter * modification, are permitted provided that the following conditions
819304Speter * are met:
919304Speter * 1. Redistributions of source code must retain the above copyright
1019304Speter *    notice, this list of conditions and the following disclaimer.
1119304Speter * 2. Redistributions in binary form must reproduce the above copyright
1219304Speter *    notice, this list of conditions and the following disclaimer in the
1319304Speter *    documentation and/or other materials provided with the distribution.
1419304Speter *
1519304Speter * THIS SOFTWARE IS PROVIDED BY THE AUTHORS AND CONTRIBUTORS ``AS IS'' AND
1619304Speter * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
1719304Speter * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
1819304Speter * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHORS OR CONTRIBUTORS BE LIABLE
1919304Speter * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
2019304Speter * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
2119304Speter * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
2219304Speter * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
2319304Speter * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
2419304Speter * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
2519304Speter * SUCH DAMAGE.
2619304Speter */
2719304Speter
2819304Speter/*
2919304Speter * $Id$
3019304Speter * $FreeBSD: head/sys/geom/sched/gs_scheduler.h 206552 2010-04-13 09:56:17Z luigi $
3119304Speter *
3219304Speter * Prototypes for GEOM-based disk scheduling algorithms.
3319304Speter * See g_sched.c for generic documentation.
3419304Speter *
3519304Speter * This file is used by the kernel modules implementing the various
3619304Speter * scheduling algorithms. They should provide all the methods
37 * defined in struct g_gsched, and also invoke the macro
38 *	DECLARE_GSCHED_MODULE
39 * which registers the scheduling algorithm with the geom_sched module.
40 *
41 * The various scheduling algorithms do not need to know anything
42 * about geom, they only need to handle the 'bio' requests they
43 * receive, pass them down when needed, and use the locking interface
44 * defined below.
45 */
46
47#ifndef	_G_GSCHED_H_
48#define	_G_GSCHED_H_
49
50#ifdef _KERNEL
51#include <sys/param.h>
52#include <sys/kernel.h>
53#include <sys/ktr.h>
54#include <sys/module.h>
55#include <sys/queue.h>
56#include <geom/geom.h>
57#include "g_sched.h"
58
59/*
60 * This is the interface exported to scheduling modules.
61 *
62 * gs_init() is called when our scheduling algorithm
63 *    starts being used by a geom 'sched'
64 *
65 * gs_fini() is called when the algorithm is released.
66 *
67 * gs_start() is called when a new request comes in. It should
68 *    enqueue the request and return 0 if success, or return non-zero
69 *    in case of failure (meaning the request is passed down).
70 *    The scheduler can use bio->bio_caller1 to store a non-null
71 *    pointer meaning the request is under its control.
72 *
73 * gs_next() is called in a loop by g_sched_dispatch(), right after
74 *    gs_start(), or on timeouts or 'done' events. It should return
75 *    immediately, either a pointer to the bio to be served or NULL
76 *    if no bio should be served now.  If force is specified, a
77 *    work-conserving behavior is expected.
78 *
79 * gs_done() is called when a request under service completes.
80 *    In turn the scheduler may decide to call the dispatch loop
81 *    to serve other pending requests (or make sure there is a pending
82 *    timeout to avoid stalls).
83 *
84 * gs_init_class() is called when a new client (as determined by
85 *    the classifier) starts being used.
86 *
87 * gs_hash_unref() is called right before the class hashtable is
88 *    destroyed; after this call, the scheduler is supposed to hold no
89 *    more references to the elements in the table.
90 */
91
92/* Forward declarations for prototypes. */
93struct g_geom;
94struct g_sched_class;
95
96typedef void *gs_init_t (struct g_geom *geom);
97typedef void gs_fini_t (void *data);
98typedef int gs_start_t (void *data, struct bio *bio);
99typedef void gs_done_t (void *data, struct bio *bio);
100typedef struct bio *gs_next_t (void *data, int force);
101typedef int gs_init_class_t (void *data, void *priv);
102typedef void gs_fini_class_t (void *data, void *priv);
103typedef void gs_hash_unref_t (void *data);
104
105struct g_gsched {
106	const char	*gs_name;
107	int		gs_refs;
108	int		gs_priv_size;
109
110	gs_init_t	*gs_init;
111	gs_fini_t	*gs_fini;
112	gs_start_t	*gs_start;
113	gs_done_t	*gs_done;
114	gs_next_t	*gs_next;
115	g_dumpconf_t	*gs_dumpconf;
116
117	gs_init_class_t	*gs_init_class;
118	gs_fini_class_t	*gs_fini_class;
119	gs_hash_unref_t *gs_hash_unref;
120
121	LIST_ENTRY(g_gsched) glist;
122};
123
124#define	KTR_GSCHED	KTR_SPARE4
125
126MALLOC_DECLARE(M_GEOM_SCHED);
127
128/*
129 * Basic classification mechanism.  Each request is associated to
130 * a g_sched_class, and each scheduler has the opportunity to set
131 * its own private data for the given (class, geom) pair.  The
132 * private data have a base type of g_sched_private, and are
133 * extended at the end with the actual private fields of each
134 * scheduler.
135 */
136struct g_sched_class {
137	int	gsc_refs;
138	int	gsc_expire;
139	u_long	gsc_key;
140	LIST_ENTRY(g_sched_class) gsc_clist;
141
142	void	*gsc_priv[0];
143};
144
145/*
146 * Manipulate the classifier's data.  g_sched_get_class() gets a reference
147 * to the the class corresponding to bp in gp, allocating and initializing
148 * it if necessary.  g_sched_put_class() releases the reference.
149 * The returned value points to the private data for the class.
150 */
151void *g_sched_get_class(struct g_geom *gp, struct bio *bp);
152void g_sched_put_class(struct g_geom *gp, void *priv);
153
154static inline struct g_sched_class *
155g_sched_priv2class(void *priv)
156{
157
158	return ((struct g_sched_class *)((u_long)priv -
159	    offsetof(struct g_sched_class, gsc_priv)));
160}
161
162static inline void
163g_sched_priv_ref(void *priv)
164{
165	struct g_sched_class *gsc;
166
167	gsc = g_sched_priv2class(priv);
168	gsc->gsc_refs++;
169}
170
171/*
172 * Locking interface.  When each operation registered with the
173 * scheduler is invoked, a per-instance lock is taken to protect
174 * the data associated with it.  If the scheduler needs something
175 * else to access the same data (e.g., a callout) it must use
176 * these functions.
177 */
178void g_sched_lock(struct g_geom *gp);
179void g_sched_unlock(struct g_geom *gp);
180
181/*
182 * Restart request dispatching.  Must be called with the per-instance
183 * mutex held.
184 */
185void g_sched_dispatch(struct g_geom *geom);
186
187/*
188 * Simple gathering of statistical data, used by schedulers to collect
189 * info on process history.  Just keep an exponential average of the
190 * samples, with some extra bits of precision.
191 */
192struct g_savg {
193	uint64_t	gs_avg;
194	unsigned int	gs_smpl;
195};
196
197static inline void
198g_savg_add_sample(struct g_savg *ss, uint64_t sample)
199{
200
201	/* EMA with alpha = 0.125, fixed point, 3 bits of precision. */
202	ss->gs_avg = sample + ss->gs_avg - (ss->gs_avg >> 3);
203	ss->gs_smpl = 1 + ss->gs_smpl - (ss->gs_smpl >> 3);
204}
205
206static inline int
207g_savg_valid(struct g_savg *ss)
208{
209
210	/* We want at least 8 samples to deem an average as valid. */
211	return (ss->gs_smpl > 7);
212}
213
214static inline uint64_t
215g_savg_read(struct g_savg *ss)
216{
217
218	return (ss->gs_avg / ss->gs_smpl);
219}
220
221/*
222 * Declaration of a scheduler module.
223 */
224int g_gsched_modevent(module_t mod, int cmd, void *arg);
225
226#define	DECLARE_GSCHED_MODULE(name, gsched)			\
227	static moduledata_t name##_mod = {			\
228		#name,						\
229		g_gsched_modevent,				\
230		gsched,						\
231	};							\
232	DECLARE_MODULE(name, name##_mod, SI_SUB_DRIVERS, SI_ORDER_MIDDLE); \
233	MODULE_DEPEND(name, geom_sched, 0, 0, 0);
234
235#endif	/* _KERNEL */
236
237#endif	/* _G_GSCHED_H_ */
238