gs_scheduler.h revision 303975
1178431Sscf/*-
2178431Sscf * Copyright (c) 2009-2010 Fabio Checconi
3178431Sscf * Copyright (c) 2009-2010 Luigi Rizzo, Universita` di Pisa
4178431Sscf * All rights reserved.
5178431Sscf *
6178431Sscf * Redistribution and use in source and binary forms, with or without
7178431Sscf * modification, are permitted provided that the following conditions
8178431Sscf * are met:
9178431Sscf * 1. Redistributions of source code must retain the above copyright
10178431Sscf *    notice, this list of conditions and the following disclaimer.
11178431Sscf * 2. Redistributions in binary form must reproduce the above copyright
12178431Sscf *    notice, this list of conditions and the following disclaimer in the
13178431Sscf *    documentation and/or other materials provided with the distribution.
14178431Sscf *
15178431Sscf * THIS SOFTWARE IS PROVIDED BY THE AUTHORS AND CONTRIBUTORS ``AS IS'' AND
16178431Sscf * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
17178431Sscf * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
18178431Sscf * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHORS OR CONTRIBUTORS BE LIABLE
19178431Sscf * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
20178431Sscf * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
21178431Sscf * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
22178431Sscf * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
23178431Sscf * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
24178431Sscf * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
25178431Sscf * SUCH DAMAGE.
26178431Sscf */
27178431Sscf
28178431Sscf/*
29178431Sscf * $Id$
30178431Sscf * $FreeBSD: releng/11.0/sys/geom/sched/gs_scheduler.h 218909 2011-02-21 09:01:34Z brucec $
31184831Sscf *
32178431Sscf * Prototypes for GEOM-based disk scheduling algorithms.
33178431Sscf * See g_sched.c for generic documentation.
34184831Sscf *
35178431Sscf * This file is used by the kernel modules implementing the various
36178431Sscf * scheduling algorithms. They should provide all the methods
37178431Sscf * defined in struct g_gsched, and also invoke the macro
38178431Sscf *	DECLARE_GSCHED_MODULE
39178431Sscf * which registers the scheduling algorithm with the geom_sched module.
40185237Sscf *
41185237Sscf * The various scheduling algorithms do not need to know anything
42185237Sscf * about geom, they only need to handle the 'bio' requests they
43185237Sscf * receive, pass them down when needed, and use the locking interface
44178431Sscf * defined below.
45185237Sscf */
46185237Sscf
47178431Sscf#ifndef	_G_GSCHED_H_
48178431Sscf#define	_G_GSCHED_H_
49178431Sscf
50178431Sscf#ifdef _KERNEL
51178431Sscf#include <sys/param.h>
52178431Sscf#include <sys/kernel.h>
53185237Sscf#include <sys/ktr.h>
54185237Sscf#include <sys/module.h>
55184831Sscf#include <sys/queue.h>
56178431Sscf#include <geom/geom.h>
57178431Sscf#include "g_sched.h"
58185237Sscf
59185237Sscf/*
60185237Sscf * This is the interface exported to scheduling modules.
61185237Sscf *
62185237Sscf * gs_init() is called when our scheduling algorithm
63185237Sscf *    starts being used by a geom 'sched'
64185237Sscf *
65185237Sscf * gs_fini() is called when the algorithm is released.
66185237Sscf *
67185237Sscf * gs_start() is called when a new request comes in. It should
68185237Sscf *    enqueue the request and return 0 if success, or return non-zero
69185237Sscf *    in case of failure (meaning the request is passed down).
70178431Sscf *    The scheduler can use bio->bio_caller1 to store a non-null
71178431Sscf *    pointer meaning the request is under its control.
72185237Sscf *
73185237Sscf * gs_next() is called in a loop by g_sched_dispatch(), right after
74185237Sscf *    gs_start(), or on timeouts or 'done' events. It should return
75185237Sscf *    immediately, either a pointer to the bio to be served or NULL
76185237Sscf *    if no bio should be served now.  If force is specified, a
77185237Sscf *    work-conserving behavior is expected.
78185237Sscf *
79185237Sscf * gs_done() is called when a request under service completes.
80185237Sscf *    In turn the scheduler may decide to call the dispatch loop
81185237Sscf *    to serve other pending requests (or make sure there is a pending
82178431Sscf *    timeout to avoid stalls).
83178431Sscf *
84178431Sscf * gs_init_class() is called when a new client (as determined by
85185237Sscf *    the classifier) starts being used.
86185237Sscf *
87178431Sscf * gs_hash_unref() is called right before the class hashtable is
88178431Sscf *    destroyed; after this call, the scheduler is supposed to hold no
89178431Sscf *    more references to the elements in the table.
90185237Sscf */
91185237Sscf
92178431Sscf/* Forward declarations for prototypes. */
93178431Sscfstruct g_geom;
94185237Sscfstruct g_sched_class;
95178431Sscf
96178431Sscftypedef void *gs_init_t (struct g_geom *geom);
97178431Sscftypedef void gs_fini_t (void *data);
98178431Sscftypedef int gs_start_t (void *data, struct bio *bio);
99178431Sscftypedef void gs_done_t (void *data, struct bio *bio);
100178431Sscftypedef struct bio *gs_next_t (void *data, int force);
101178431Sscftypedef int gs_init_class_t (void *data, void *priv);
102178431Sscftypedef void gs_fini_class_t (void *data, void *priv);
103178431Sscftypedef void gs_hash_unref_t (void *data);
104185237Sscf
105178431Sscfstruct g_gsched {
106178431Sscf	const char	*gs_name;
107178431Sscf	int		gs_refs;
108185237Sscf	int		gs_priv_size;
109178431Sscf
110185237Sscf	gs_init_t	*gs_init;
111185237Sscf	gs_fini_t	*gs_fini;
112185237Sscf	gs_start_t	*gs_start;
113185237Sscf	gs_done_t	*gs_done;
114185237Sscf	gs_next_t	*gs_next;
115185237Sscf	g_dumpconf_t	*gs_dumpconf;
116178431Sscf
117178431Sscf	gs_init_class_t	*gs_init_class;
118185237Sscf	gs_fini_class_t	*gs_fini_class;
119178431Sscf	gs_hash_unref_t *gs_hash_unref;
120185237Sscf
121178431Sscf	LIST_ENTRY(g_gsched) glist;
122185237Sscf};
123185237Sscf
124185237Sscf#define	KTR_GSCHED	KTR_SPARE4
125185237Sscf
126185237SscfMALLOC_DECLARE(M_GEOM_SCHED);
127185237Sscf
128178431Sscf/*
129178431Sscf * Basic classification mechanism.  Each request is associated to
130178431Sscf * a g_sched_class, and each scheduler has the opportunity to set
131178431Sscf * its own private data for the given (class, geom) pair.  The
132178431Sscf * private data have a base type of g_sched_private, and are
133178431Sscf * extended at the end with the actual private fields of each
134178431Sscf * scheduler.
135178431Sscf */
136178431Sscfstruct g_sched_class {
137178431Sscf	int	gsc_refs;
138185237Sscf	int	gsc_expire;
139184831Sscf	u_long	gsc_key;
140185237Sscf	LIST_ENTRY(g_sched_class) gsc_clist;
141178431Sscf
142185237Sscf	void	*gsc_priv[0];
143178431Sscf};
144185237Sscf
145185237Sscf/*
146185237Sscf * Manipulate the classifier's data.  g_sched_get_class() gets a reference
147185237Sscf * to the class corresponding to bp in gp, allocating and initializing
148185237Sscf * it if necessary.  g_sched_put_class() releases the reference.
149185237Sscf * The returned value points to the private data for the class.
150178431Sscf */
151185237Sscfvoid *g_sched_get_class(struct g_geom *gp, struct bio *bp);
152185237Sscfvoid g_sched_put_class(struct g_geom *gp, void *priv);
153185237Sscf
154185237Sscfstatic inline struct g_sched_class *
155185237Sscfg_sched_priv2class(void *priv)
156178431Sscf{
157178431Sscf
158185237Sscf	return ((struct g_sched_class *)((u_long)priv -
159178431Sscf	    offsetof(struct g_sched_class, gsc_priv)));
160185237Sscf}
161178431Sscf
162185237Sscfstatic inline void
163185237Sscfg_sched_priv_ref(void *priv)
164178431Sscf{
165178431Sscf	struct g_sched_class *gsc;
166185237Sscf
167185237Sscf	gsc = g_sched_priv2class(priv);
168178431Sscf	gsc->gsc_refs++;
169185237Sscf}
170178431Sscf
171185237Sscf/*
172185237Sscf * Locking interface.  When each operation registered with the
173185237Sscf * scheduler is invoked, a per-instance lock is taken to protect
174185237Sscf * the data associated with it.  If the scheduler needs something
175178431Sscf * else to access the same data (e.g., a callout) it must use
176185237Sscf * these functions.
177178431Sscf */
178178431Sscfvoid g_sched_lock(struct g_geom *gp);
179185237Sscfvoid g_sched_unlock(struct g_geom *gp);
180178431Sscf
181178431Sscf/*
182178431Sscf * Restart request dispatching.  Must be called with the per-instance
183178431Sscf * mutex held.
184178431Sscf */
185178431Sscfvoid g_sched_dispatch(struct g_geom *geom);
186178431Sscf
187178431Sscf/*
188178431Sscf * Simple gathering of statistical data, used by schedulers to collect
189178431Sscf * info on process history.  Just keep an exponential average of the
190178431Sscf * samples, with some extra bits of precision.
191178431Sscf */
192178431Sscfstruct g_savg {
193178431Sscf	uint64_t	gs_avg;
194178431Sscf	unsigned int	gs_smpl;
195178431Sscf};
196178431Sscf
197184831Sscfstatic inline void
198184831Sscfg_savg_add_sample(struct g_savg *ss, uint64_t sample)
199178431Sscf{
200178431Sscf
201178431Sscf	/* EMA with alpha = 0.125, fixed point, 3 bits of precision. */
202178431Sscf	ss->gs_avg = sample + ss->gs_avg - (ss->gs_avg >> 3);
203178431Sscf	ss->gs_smpl = 1 + ss->gs_smpl - (ss->gs_smpl >> 3);
204184831Sscf}
205178431Sscf
206178431Sscfstatic inline int
207178431Sscfg_savg_valid(struct g_savg *ss)
208178431Sscf{
209178431Sscf
210178431Sscf	/* We want at least 8 samples to deem an average as valid. */
211178431Sscf	return (ss->gs_smpl > 7);
212185237Sscf}
213185237Sscf
214185237Sscfstatic inline uint64_t
215185237Sscfg_savg_read(struct g_savg *ss)
216185237Sscf{
217185237Sscf
218185237Sscf	return (ss->gs_avg / ss->gs_smpl);
219185237Sscf}
220178431Sscf
221178431Sscf/*
222185237Sscf * Declaration of a scheduler module.
223185237Sscf */
224178431Sscfint g_gsched_modevent(module_t mod, int cmd, void *arg);
225178431Sscf
226178431Sscf#define	DECLARE_GSCHED_MODULE(name, gsched)			\
227178431Sscf	static moduledata_t name##_mod = {			\
228178431Sscf		#name,						\
229178431Sscf		g_gsched_modevent,				\
230178431Sscf		gsched,						\
231178431Sscf	};							\
232178431Sscf	DECLARE_MODULE(name, name##_mod, SI_SUB_DRIVERS, SI_ORDER_MIDDLE); \
233178431Sscf	MODULE_DEPEND(name, geom_sched, 0, 0, 0);
234184831Sscf
235185237Sscf#endif	/* _KERNEL */
236185237Sscf
237178431Sscf#endif	/* _G_GSCHED_H_ */
238185237Sscf