1/*-
2 * SPDX-License-Identifier: BSD-2-Clause-FreeBSD
3 *
4 * Copyright (c) 2009-2010 Fabio Checconi
5 * Copyright (c) 2009-2010 Luigi Rizzo, Universita` di Pisa
6 * All rights reserved.
7 *
8 * Redistribution and use in source and binary forms, with or without
9 * modification, are permitted provided that the following conditions
10 * are met:
11 * 1. Redistributions of source code must retain the above copyright
12 *    notice, this list of conditions and the following disclaimer.
13 * 2. Redistributions in binary form must reproduce the above copyright
14 *    notice, this list of conditions and the following disclaimer in the
15 *    documentation and/or other materials provided with the distribution.
16 *
17 * THIS SOFTWARE IS PROVIDED BY THE AUTHORS AND CONTRIBUTORS ``AS IS'' AND
18 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
19 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
20 * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHORS OR CONTRIBUTORS BE LIABLE
21 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
22 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
23 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
24 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
25 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
26 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
27 * SUCH DAMAGE.
28 */
29
30/*
31 * $Id$
32 * $FreeBSD$
33 *
34 * Prototypes for GEOM-based disk scheduling algorithms.
35 * See g_sched.c for generic documentation.
36 *
37 * This file is used by the kernel modules implementing the various
38 * scheduling algorithms. They should provide all the methods
39 * defined in struct g_gsched, and also invoke the macro
40 *	DECLARE_GSCHED_MODULE
41 * which registers the scheduling algorithm with the geom_sched module.
42 *
43 * The various scheduling algorithms do not need to know anything
44 * about geom, they only need to handle the 'bio' requests they
45 * receive, pass them down when needed, and use the locking interface
46 * defined below.
47 */
48
49#ifndef	_G_GSCHED_H_
50#define	_G_GSCHED_H_
51
52#ifdef _KERNEL
53#include <sys/param.h>
54#include <sys/kernel.h>
55#include <sys/ktr.h>
56#include <sys/module.h>
57#include <sys/queue.h>
58#include <geom/geom.h>
59#include "g_sched.h"
60
61/*
62 * This is the interface exported to scheduling modules.
63 *
64 * gs_init() is called when our scheduling algorithm
65 *    starts being used by a geom 'sched'
66 *
67 * gs_fini() is called when the algorithm is released.
68 *
69 * gs_start() is called when a new request comes in. It should
70 *    enqueue the request and return 0 if success, or return non-zero
71 *    in case of failure (meaning the request is passed down).
72 *    The scheduler can use bio->bio_caller1 to store a non-null
73 *    pointer meaning the request is under its control.
74 *
75 * gs_next() is called in a loop by g_sched_dispatch(), right after
76 *    gs_start(), or on timeouts or 'done' events. It should return
77 *    immediately, either a pointer to the bio to be served or NULL
78 *    if no bio should be served now.  If force is specified, a
79 *    work-conserving behavior is expected.
80 *
81 * gs_done() is called when a request under service completes.
82 *    In turn the scheduler may decide to call the dispatch loop
83 *    to serve other pending requests (or make sure there is a pending
84 *    timeout to avoid stalls).
85 *
86 * gs_init_class() is called when a new client (as determined by
87 *    the classifier) starts being used.
88 *
89 * gs_hash_unref() is called right before the class hashtable is
90 *    destroyed; after this call, the scheduler is supposed to hold no
91 *    more references to the elements in the table.
92 */
93
94/* Forward declarations for prototypes. */
95struct g_geom;
96struct g_sched_class;
97
98typedef void *gs_init_t (struct g_geom *geom);
99typedef void gs_fini_t (void *data);
100typedef int gs_start_t (void *data, struct bio *bio);
101typedef void gs_done_t (void *data, struct bio *bio);
102typedef struct bio *gs_next_t (void *data, int force);
103typedef int gs_init_class_t (void *data, void *priv);
104typedef void gs_fini_class_t (void *data, void *priv);
105typedef void gs_hash_unref_t (void *data);
106
107struct g_gsched {
108	const char	*gs_name;
109	int		gs_refs;
110	int		gs_priv_size;
111
112	gs_init_t	*gs_init;
113	gs_fini_t	*gs_fini;
114	gs_start_t	*gs_start;
115	gs_done_t	*gs_done;
116	gs_next_t	*gs_next;
117	g_dumpconf_t	*gs_dumpconf;
118
119	gs_init_class_t	*gs_init_class;
120	gs_fini_class_t	*gs_fini_class;
121	gs_hash_unref_t *gs_hash_unref;
122
123	LIST_ENTRY(g_gsched) glist;
124};
125
126#define	KTR_GSCHED	KTR_SPARE4
127
128MALLOC_DECLARE(M_GEOM_SCHED);
129
130/*
131 * Basic classification mechanism.  Each request is associated to
132 * a g_sched_class, and each scheduler has the opportunity to set
133 * its own private data for the given (class, geom) pair.  The
134 * private data have a base type of g_sched_private, and are
135 * extended at the end with the actual private fields of each
136 * scheduler.
137 */
138struct g_sched_class {
139	int	gsc_refs;
140	int	gsc_expire;
141	u_long	gsc_key;
142	LIST_ENTRY(g_sched_class) gsc_clist;
143
144	void	*gsc_priv[0];
145};
146
147/*
148 * Manipulate the classifier's data.  g_sched_get_class() gets a reference
149 * to the class corresponding to bp in gp, allocating and initializing
150 * it if necessary.  g_sched_put_class() releases the reference.
151 * The returned value points to the private data for the class.
152 */
153void *g_sched_get_class(struct g_geom *gp, struct bio *bp);
154void g_sched_put_class(struct g_geom *gp, void *priv);
155
156static inline struct g_sched_class *
157g_sched_priv2class(void *priv)
158{
159
160	return ((struct g_sched_class *)((u_long)priv -
161	    offsetof(struct g_sched_class, gsc_priv)));
162}
163
164static inline void
165g_sched_priv_ref(void *priv)
166{
167	struct g_sched_class *gsc;
168
169	gsc = g_sched_priv2class(priv);
170	gsc->gsc_refs++;
171}
172
173/*
174 * Locking interface.  When each operation registered with the
175 * scheduler is invoked, a per-instance lock is taken to protect
176 * the data associated with it.  If the scheduler needs something
177 * else to access the same data (e.g., a callout) it must use
178 * these functions.
179 */
180void g_sched_lock(struct g_geom *gp);
181void g_sched_unlock(struct g_geom *gp);
182
183/*
184 * Restart request dispatching.  Must be called with the per-instance
185 * mutex held.
186 */
187void g_sched_dispatch(struct g_geom *geom);
188
189/*
190 * Simple gathering of statistical data, used by schedulers to collect
191 * info on process history.  Just keep an exponential average of the
192 * samples, with some extra bits of precision.
193 */
194struct g_savg {
195	uint64_t	gs_avg;
196	unsigned int	gs_smpl;
197};
198
199static inline void
200g_savg_add_sample(struct g_savg *ss, uint64_t sample)
201{
202
203	/* EMA with alpha = 0.125, fixed point, 3 bits of precision. */
204	ss->gs_avg = sample + ss->gs_avg - (ss->gs_avg >> 3);
205	ss->gs_smpl = 1 + ss->gs_smpl - (ss->gs_smpl >> 3);
206}
207
208static inline int
209g_savg_valid(struct g_savg *ss)
210{
211
212	/* We want at least 8 samples to deem an average as valid. */
213	return (ss->gs_smpl > 7);
214}
215
216static inline uint64_t
217g_savg_read(struct g_savg *ss)
218{
219
220	return (ss->gs_avg / ss->gs_smpl);
221}
222
223/*
224 * Declaration of a scheduler module.
225 */
226int g_gsched_modevent(module_t mod, int cmd, void *arg);
227
228#define	DECLARE_GSCHED_MODULE(name, gsched)			\
229	static moduledata_t name##_mod = {			\
230		#name,						\
231		g_gsched_modevent,				\
232		gsched,						\
233	};							\
234	DECLARE_MODULE(name, name##_mod, SI_SUB_DRIVERS, SI_ORDER_MIDDLE); \
235	MODULE_DEPEND(name, geom_sched, 0, 0, 0);
236
237#endif	/* _KERNEL */
238
239#endif	/* _G_GSCHED_H_ */
240