1206497Sluigi/*-
2206552Sluigi * Copyright (c) 2009-2010 Fabio Checconi
3206552Sluigi * Copyright (c) 2009-2010 Luigi Rizzo, Universita` di Pisa
4206497Sluigi * All rights reserved.
5206497Sluigi *
6206497Sluigi * Redistribution and use in source and binary forms, with or without
7206497Sluigi * modification, are permitted provided that the following conditions
8206497Sluigi * are met:
9206497Sluigi * 1. Redistributions of source code must retain the above copyright
10206497Sluigi *    notice, this list of conditions and the following disclaimer.
11206497Sluigi * 2. Redistributions in binary form must reproduce the above copyright
12206497Sluigi *    notice, this list of conditions and the following disclaimer in the
13206497Sluigi *    documentation and/or other materials provided with the distribution.
14206497Sluigi *
15206497Sluigi * THIS SOFTWARE IS PROVIDED BY THE AUTHORS AND CONTRIBUTORS ``AS IS'' AND
16206497Sluigi * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
17206497Sluigi * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
18206497Sluigi * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHORS OR CONTRIBUTORS BE LIABLE
19206497Sluigi * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
20206497Sluigi * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
21206497Sluigi * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
22206497Sluigi * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
23206497Sluigi * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
24206497Sluigi * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
25206497Sluigi * SUCH DAMAGE.
26206497Sluigi */
27206497Sluigi
28206497Sluigi/*
29206497Sluigi * $Id$
30206497Sluigi * $FreeBSD$
31206497Sluigi *
32206497Sluigi * Prototypes for GEOM-based disk scheduling algorithms.
33206497Sluigi * See g_sched.c for generic documentation.
34206497Sluigi *
35206497Sluigi * This file is used by the kernel modules implementing the various
36206497Sluigi * scheduling algorithms. They should provide all the methods
37206497Sluigi * defined in struct g_gsched, and also invoke the macro
38206497Sluigi *	DECLARE_GSCHED_MODULE
39206497Sluigi * which registers the scheduling algorithm with the geom_sched module.
40206497Sluigi *
41206497Sluigi * The various scheduling algorithms do not need to know anything
42206497Sluigi * about geom, they only need to handle the 'bio' requests they
43206497Sluigi * receive, pass them down when needed, and use the locking interface
44206497Sluigi * defined below.
45206497Sluigi */
46206497Sluigi
47206497Sluigi#ifndef	_G_GSCHED_H_
48206497Sluigi#define	_G_GSCHED_H_
49206497Sluigi
50206497Sluigi#ifdef _KERNEL
51206497Sluigi#include <sys/param.h>
52206497Sluigi#include <sys/kernel.h>
53206497Sluigi#include <sys/ktr.h>
54206497Sluigi#include <sys/module.h>
55206497Sluigi#include <sys/queue.h>
56206497Sluigi#include <geom/geom.h>
57206497Sluigi#include "g_sched.h"
58206497Sluigi
59206497Sluigi/*
60206497Sluigi * This is the interface exported to scheduling modules.
61206497Sluigi *
62206497Sluigi * gs_init() is called when our scheduling algorithm
63206497Sluigi *    starts being used by a geom 'sched'
64206497Sluigi *
65206497Sluigi * gs_fini() is called when the algorithm is released.
66206497Sluigi *
67206497Sluigi * gs_start() is called when a new request comes in. It should
68206497Sluigi *    enqueue the request and return 0 if success, or return non-zero
69206497Sluigi *    in case of failure (meaning the request is passed down).
70206497Sluigi *    The scheduler can use bio->bio_caller1 to store a non-null
71206497Sluigi *    pointer meaning the request is under its control.
72206497Sluigi *
73206497Sluigi * gs_next() is called in a loop by g_sched_dispatch(), right after
74206497Sluigi *    gs_start(), or on timeouts or 'done' events. It should return
75206497Sluigi *    immediately, either a pointer to the bio to be served or NULL
76206497Sluigi *    if no bio should be served now.  If force is specified, a
77206497Sluigi *    work-conserving behavior is expected.
78206497Sluigi *
79206497Sluigi * gs_done() is called when a request under service completes.
80206497Sluigi *    In turn the scheduler may decide to call the dispatch loop
81206497Sluigi *    to serve other pending requests (or make sure there is a pending
82206497Sluigi *    timeout to avoid stalls).
83206497Sluigi *
84206497Sluigi * gs_init_class() is called when a new client (as determined by
85206497Sluigi *    the classifier) starts being used.
86206497Sluigi *
87206497Sluigi * gs_hash_unref() is called right before the class hashtable is
88206497Sluigi *    destroyed; after this call, the scheduler is supposed to hold no
89206497Sluigi *    more references to the elements in the table.
90206497Sluigi */
91206497Sluigi
92206497Sluigi/* Forward declarations for prototypes. */
93206497Sluigistruct g_geom;
94206497Sluigistruct g_sched_class;
95206497Sluigi
96206497Sluigitypedef void *gs_init_t (struct g_geom *geom);
97206497Sluigitypedef void gs_fini_t (void *data);
98206497Sluigitypedef int gs_start_t (void *data, struct bio *bio);
99206497Sluigitypedef void gs_done_t (void *data, struct bio *bio);
100206497Sluigitypedef struct bio *gs_next_t (void *data, int force);
101206497Sluigitypedef int gs_init_class_t (void *data, void *priv);
102206497Sluigitypedef void gs_fini_class_t (void *data, void *priv);
103206497Sluigitypedef void gs_hash_unref_t (void *data);
104206497Sluigi
105206497Sluigistruct g_gsched {
106206497Sluigi	const char	*gs_name;
107206497Sluigi	int		gs_refs;
108206497Sluigi	int		gs_priv_size;
109206497Sluigi
110206497Sluigi	gs_init_t	*gs_init;
111206497Sluigi	gs_fini_t	*gs_fini;
112206497Sluigi	gs_start_t	*gs_start;
113206497Sluigi	gs_done_t	*gs_done;
114206497Sluigi	gs_next_t	*gs_next;
115206497Sluigi	g_dumpconf_t	*gs_dumpconf;
116206497Sluigi
117206497Sluigi	gs_init_class_t	*gs_init_class;
118206497Sluigi	gs_fini_class_t	*gs_fini_class;
119206497Sluigi	gs_hash_unref_t *gs_hash_unref;
120206497Sluigi
121206497Sluigi	LIST_ENTRY(g_gsched) glist;
122206497Sluigi};
123206497Sluigi
124206497Sluigi#define	KTR_GSCHED	KTR_SPARE4
125206497Sluigi
126206497SluigiMALLOC_DECLARE(M_GEOM_SCHED);
127206497Sluigi
128206497Sluigi/*
129206497Sluigi * Basic classification mechanism.  Each request is associated to
130206497Sluigi * a g_sched_class, and each scheduler has the opportunity to set
131206497Sluigi * its own private data for the given (class, geom) pair.  The
132206497Sluigi * private data have a base type of g_sched_private, and are
133206497Sluigi * extended at the end with the actual private fields of each
134206497Sluigi * scheduler.
135206497Sluigi */
136206497Sluigistruct g_sched_class {
137206497Sluigi	int	gsc_refs;
138206497Sluigi	int	gsc_expire;
139206497Sluigi	u_long	gsc_key;
140206497Sluigi	LIST_ENTRY(g_sched_class) gsc_clist;
141206497Sluigi
142206497Sluigi	void	*gsc_priv[0];
143206497Sluigi};
144206497Sluigi
145206497Sluigi/*
146206497Sluigi * Manipulate the classifier's data.  g_sched_get_class() gets a reference
147218909Sbrucec * to the class corresponding to bp in gp, allocating and initializing
148206497Sluigi * it if necessary.  g_sched_put_class() releases the reference.
149206497Sluigi * The returned value points to the private data for the class.
150206497Sluigi */
151206497Sluigivoid *g_sched_get_class(struct g_geom *gp, struct bio *bp);
152206497Sluigivoid g_sched_put_class(struct g_geom *gp, void *priv);
153206497Sluigi
154206497Sluigistatic inline struct g_sched_class *
155206497Sluigig_sched_priv2class(void *priv)
156206497Sluigi{
157206497Sluigi
158206497Sluigi	return ((struct g_sched_class *)((u_long)priv -
159206497Sluigi	    offsetof(struct g_sched_class, gsc_priv)));
160206497Sluigi}
161206497Sluigi
162206497Sluigistatic inline void
163206497Sluigig_sched_priv_ref(void *priv)
164206497Sluigi{
165206497Sluigi	struct g_sched_class *gsc;
166206497Sluigi
167206497Sluigi	gsc = g_sched_priv2class(priv);
168206497Sluigi	gsc->gsc_refs++;
169206497Sluigi}
170206497Sluigi
171206497Sluigi/*
172206497Sluigi * Locking interface.  When each operation registered with the
173206497Sluigi * scheduler is invoked, a per-instance lock is taken to protect
174206497Sluigi * the data associated with it.  If the scheduler needs something
175206497Sluigi * else to access the same data (e.g., a callout) it must use
176206497Sluigi * these functions.
177206497Sluigi */
178206497Sluigivoid g_sched_lock(struct g_geom *gp);
179206497Sluigivoid g_sched_unlock(struct g_geom *gp);
180206497Sluigi
181206497Sluigi/*
182206497Sluigi * Restart request dispatching.  Must be called with the per-instance
183206497Sluigi * mutex held.
184206497Sluigi */
185206497Sluigivoid g_sched_dispatch(struct g_geom *geom);
186206497Sluigi
187206497Sluigi/*
188206497Sluigi * Simple gathering of statistical data, used by schedulers to collect
189206497Sluigi * info on process history.  Just keep an exponential average of the
190206497Sluigi * samples, with some extra bits of precision.
191206497Sluigi */
192206497Sluigistruct g_savg {
193206497Sluigi	uint64_t	gs_avg;
194206497Sluigi	unsigned int	gs_smpl;
195206497Sluigi};
196206497Sluigi
197206497Sluigistatic inline void
198206497Sluigig_savg_add_sample(struct g_savg *ss, uint64_t sample)
199206497Sluigi{
200206497Sluigi
201206497Sluigi	/* EMA with alpha = 0.125, fixed point, 3 bits of precision. */
202206497Sluigi	ss->gs_avg = sample + ss->gs_avg - (ss->gs_avg >> 3);
203206497Sluigi	ss->gs_smpl = 1 + ss->gs_smpl - (ss->gs_smpl >> 3);
204206497Sluigi}
205206497Sluigi
206206497Sluigistatic inline int
207206497Sluigig_savg_valid(struct g_savg *ss)
208206497Sluigi{
209206497Sluigi
210206497Sluigi	/* We want at least 8 samples to deem an average as valid. */
211206497Sluigi	return (ss->gs_smpl > 7);
212206497Sluigi}
213206497Sluigi
214206497Sluigistatic inline uint64_t
215206497Sluigig_savg_read(struct g_savg *ss)
216206497Sluigi{
217206497Sluigi
218206497Sluigi	return (ss->gs_avg / ss->gs_smpl);
219206497Sluigi}
220206497Sluigi
221206497Sluigi/*
222206497Sluigi * Declaration of a scheduler module.
223206497Sluigi */
224206497Sluigiint g_gsched_modevent(module_t mod, int cmd, void *arg);
225206497Sluigi
226206497Sluigi#define	DECLARE_GSCHED_MODULE(name, gsched)			\
227206497Sluigi	static moduledata_t name##_mod = {			\
228206497Sluigi		#name,						\
229206497Sluigi		g_gsched_modevent,				\
230206497Sluigi		gsched,						\
231206497Sluigi	};							\
232206497Sluigi	DECLARE_MODULE(name, name##_mod, SI_SUB_DRIVERS, SI_ORDER_MIDDLE); \
233206497Sluigi	MODULE_DEPEND(name, geom_sched, 0, 0, 0);
234206497Sluigi
235206497Sluigi#endif	/* _KERNEL */
236206497Sluigi
237206497Sluigi#endif	/* _G_GSCHED_H_ */
238