Deleted Added
full compact
nfs_fha.c (259765) nfs_fha.c (260097)
1/*-
2 * Copyright (c) 2008 Isilon Inc http://www.isilon.com/
3 *
4 * Redistribution and use in source and binary forms, with or without
5 * modification, are permitted provided that the following conditions
6 * are met:
7 * 1. Redistributions of source code must retain the above copyright
8 * notice, this list of conditions and the following disclaimer.
9 * 2. Redistributions in binary form must reproduce the above copyright
10 * notice, this list of conditions and the following disclaimer in the
11 * documentation and/or other materials provided with the distribution.
12 *
13 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
14 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
15 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
16 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
17 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
18 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
19 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
20 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
21 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
22 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
23 * SUCH DAMAGE.
24 */
25
26#include <sys/cdefs.h>
1/*-
2 * Copyright (c) 2008 Isilon Inc http://www.isilon.com/
3 *
4 * Redistribution and use in source and binary forms, with or without
5 * modification, are permitted provided that the following conditions
6 * are met:
7 * 1. Redistributions of source code must retain the above copyright
8 * notice, this list of conditions and the following disclaimer.
9 * 2. Redistributions in binary form must reproduce the above copyright
10 * notice, this list of conditions and the following disclaimer in the
11 * documentation and/or other materials provided with the distribution.
12 *
13 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
14 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
15 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
16 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
17 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
18 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
19 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
20 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
21 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
22 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
23 * SUCH DAMAGE.
24 */
25
26#include <sys/cdefs.h>
27__FBSDID("$FreeBSD: head/sys/nfs/nfs_fha.c 259765 2013-12-23 08:43:16Z mav $");
27__FBSDID("$FreeBSD: head/sys/nfs/nfs_fha.c 260097 2013-12-30 20:23:15Z mav $");
28
29#include <sys/param.h>
30#include <sys/systm.h>
31#include <sys/sysproto.h>
32#include <sys/kernel.h>
33#include <sys/sysctl.h>
34#include <sys/vnode.h>
35#include <sys/malloc.h>
36#include <sys/mount.h>
37#include <sys/mbuf.h>
38#include <sys/sbuf.h>
39
40#include <rpc/rpc.h>
41#include <nfs/nfs_fha.h>
42
43static MALLOC_DEFINE(M_NFS_FHA, "NFS FHA", "NFS FHA");
44
45/*
46 * XXX need to commonize definitions between old and new NFS code. Define
47 * this here so we don't include one nfsproto.h over the other.
48 */
49#define NFS_PROG 100003
50
51void
52fha_init(struct fha_params *softc)
53{
54 char tmpstr[128];
28
29#include <sys/param.h>
30#include <sys/systm.h>
31#include <sys/sysproto.h>
32#include <sys/kernel.h>
33#include <sys/sysctl.h>
34#include <sys/vnode.h>
35#include <sys/malloc.h>
36#include <sys/mount.h>
37#include <sys/mbuf.h>
38#include <sys/sbuf.h>
39
40#include <rpc/rpc.h>
41#include <nfs/nfs_fha.h>
42
43static MALLOC_DEFINE(M_NFS_FHA, "NFS FHA", "NFS FHA");
44
45/*
46 * XXX need to commonize definitions between old and new NFS code. Define
47 * this here so we don't include one nfsproto.h over the other.
48 */
49#define NFS_PROG 100003
50
51void
52fha_init(struct fha_params *softc)
53{
54 char tmpstr[128];
55 int i;
55
56
56 /*
57 * A small hash table to map filehandles to fha_hash_entry
58 * structures.
59 */
60 softc->g_fha.hashtable = hashinit(256, M_NFS_FHA,
61 &softc->g_fha.hashmask);
57 for (i = 0; i < FHA_HASH_SIZE; i++)
58 mtx_init(&softc->fha_hash[i].mtx, "fhalock", NULL, MTX_DEF);
62
63 /*
64 * Set the default tuning parameters.
65 */
66 softc->ctls.enable = FHA_DEF_ENABLE;
67 softc->ctls.bin_shift = FHA_DEF_BIN_SHIFT;
68 softc->ctls.max_nfsds_per_fh = FHA_DEF_MAX_NFSDS_PER_FH;
69 softc->ctls.max_reqs_per_nfsd = FHA_DEF_MAX_REQS_PER_NFSD;
70
71 /*
72 * Allow the user to override the defaults at boot time with
73 * tunables.
74 */
75 snprintf(tmpstr, sizeof(tmpstr), "vfs.%s.fha.enable",
76 softc->server_name);
77 TUNABLE_INT_FETCH(tmpstr, &softc->ctls.enable);
78 snprintf(tmpstr, sizeof(tmpstr), "vfs.%s.fha.bin_shift",
79 softc->server_name);
80 TUNABLE_INT_FETCH(tmpstr, &softc->ctls.bin_shift);
81 snprintf(tmpstr, sizeof(tmpstr), "vfs.%s.fha.max_nfsds_per_fh",
82 softc->server_name);
83 TUNABLE_INT_FETCH(tmpstr, &softc->ctls.max_nfsds_per_fh);
84 snprintf(tmpstr, sizeof(tmpstr), "vfs.%s.fha.max_reqs_per_nfsd",
85 softc->server_name);
86 TUNABLE_INT_FETCH(tmpstr, &softc->ctls.max_reqs_per_nfsd);
87
88 /*
89 * Add sysctls so the user can change the tuning parameters at
90 * runtime.
91 */
92 SYSCTL_ADD_UINT(&softc->sysctl_ctx, SYSCTL_CHILDREN(softc->sysctl_tree),
93 OID_AUTO, "enable", CTLFLAG_RW,
94 &softc->ctls.enable, 0, "Enable NFS File Handle Affinity (FHA)");
95
96 SYSCTL_ADD_UINT(&softc->sysctl_ctx, SYSCTL_CHILDREN(softc->sysctl_tree),
97 OID_AUTO, "bin_shift", CTLFLAG_RW,
98 &softc->ctls.bin_shift, 0, "For FHA reads, no two requests will "
99 "contend if they're 2^(bin_shift) bytes apart");
100
101 SYSCTL_ADD_UINT(&softc->sysctl_ctx, SYSCTL_CHILDREN(softc->sysctl_tree),
102 OID_AUTO, "max_nfsds_per_fh", CTLFLAG_RW,
103 &softc->ctls.max_nfsds_per_fh, 0, "Maximum nfsd threads that "
104 "should be working on requests for the same file handle");
105
106 SYSCTL_ADD_UINT(&softc->sysctl_ctx, SYSCTL_CHILDREN(softc->sysctl_tree),
107 OID_AUTO, "max_reqs_per_nfsd", CTLFLAG_RW,
108 &softc->ctls.max_reqs_per_nfsd, 0, "Maximum requests that "
109 "single nfsd thread should be working on at any time");
110
111 SYSCTL_ADD_OID(&softc->sysctl_ctx, SYSCTL_CHILDREN(softc->sysctl_tree),
112 OID_AUTO, "fhe_stats", CTLTYPE_STRING | CTLFLAG_RD, 0, 0,
113 softc->callbacks.fhe_stats_sysctl, "A", "");
114
115}
116
117void
118fha_uninit(struct fha_params *softc)
119{
59
60 /*
61 * Set the default tuning parameters.
62 */
63 softc->ctls.enable = FHA_DEF_ENABLE;
64 softc->ctls.bin_shift = FHA_DEF_BIN_SHIFT;
65 softc->ctls.max_nfsds_per_fh = FHA_DEF_MAX_NFSDS_PER_FH;
66 softc->ctls.max_reqs_per_nfsd = FHA_DEF_MAX_REQS_PER_NFSD;
67
68 /*
69 * Allow the user to override the defaults at boot time with
70 * tunables.
71 */
72 snprintf(tmpstr, sizeof(tmpstr), "vfs.%s.fha.enable",
73 softc->server_name);
74 TUNABLE_INT_FETCH(tmpstr, &softc->ctls.enable);
75 snprintf(tmpstr, sizeof(tmpstr), "vfs.%s.fha.bin_shift",
76 softc->server_name);
77 TUNABLE_INT_FETCH(tmpstr, &softc->ctls.bin_shift);
78 snprintf(tmpstr, sizeof(tmpstr), "vfs.%s.fha.max_nfsds_per_fh",
79 softc->server_name);
80 TUNABLE_INT_FETCH(tmpstr, &softc->ctls.max_nfsds_per_fh);
81 snprintf(tmpstr, sizeof(tmpstr), "vfs.%s.fha.max_reqs_per_nfsd",
82 softc->server_name);
83 TUNABLE_INT_FETCH(tmpstr, &softc->ctls.max_reqs_per_nfsd);
84
85 /*
86 * Add sysctls so the user can change the tuning parameters at
87 * runtime.
88 */
89 SYSCTL_ADD_UINT(&softc->sysctl_ctx, SYSCTL_CHILDREN(softc->sysctl_tree),
90 OID_AUTO, "enable", CTLFLAG_RW,
91 &softc->ctls.enable, 0, "Enable NFS File Handle Affinity (FHA)");
92
93 SYSCTL_ADD_UINT(&softc->sysctl_ctx, SYSCTL_CHILDREN(softc->sysctl_tree),
94 OID_AUTO, "bin_shift", CTLFLAG_RW,
95 &softc->ctls.bin_shift, 0, "For FHA reads, no two requests will "
96 "contend if they're 2^(bin_shift) bytes apart");
97
98 SYSCTL_ADD_UINT(&softc->sysctl_ctx, SYSCTL_CHILDREN(softc->sysctl_tree),
99 OID_AUTO, "max_nfsds_per_fh", CTLFLAG_RW,
100 &softc->ctls.max_nfsds_per_fh, 0, "Maximum nfsd threads that "
101 "should be working on requests for the same file handle");
102
103 SYSCTL_ADD_UINT(&softc->sysctl_ctx, SYSCTL_CHILDREN(softc->sysctl_tree),
104 OID_AUTO, "max_reqs_per_nfsd", CTLFLAG_RW,
105 &softc->ctls.max_reqs_per_nfsd, 0, "Maximum requests that "
106 "single nfsd thread should be working on at any time");
107
108 SYSCTL_ADD_OID(&softc->sysctl_ctx, SYSCTL_CHILDREN(softc->sysctl_tree),
109 OID_AUTO, "fhe_stats", CTLTYPE_STRING | CTLFLAG_RD, 0, 0,
110 softc->callbacks.fhe_stats_sysctl, "A", "");
111
112}
113
114void
115fha_uninit(struct fha_params *softc)
116{
117 int i;
118
120 sysctl_ctx_free(&softc->sysctl_ctx);
119 sysctl_ctx_free(&softc->sysctl_ctx);
121 hashdestroy(softc->g_fha.hashtable, M_NFS_FHA, softc->g_fha.hashmask);
120 for (i = 0; i < FHA_HASH_SIZE; i++)
121 mtx_destroy(&softc->fha_hash[i].mtx);
122}
123
124/*
125 * This just specifies that offsets should obey affinity when within
126 * the same 1Mbyte (1<<20) chunk for the file (reads only for now).
127 */
128static void
129fha_extract_info(struct svc_req *req, struct fha_info *i,
130 struct fha_callbacks *cb)
131{
132 struct mbuf *md;
133 caddr_t dpos;
134 static u_int64_t random_fh = 0;
135 int error;
136 int v3 = (req->rq_vers == 3);
137 rpcproc_t procnum;
138
139 /*
140 * We start off with a random fh. If we get a reasonable
141 * procnum, we set the fh. If there's a concept of offset
142 * that we're interested in, we set that.
143 */
144 i->fh = ++random_fh;
145 i->offset = 0;
146 i->locktype = LK_EXCLUSIVE;
147
148 /*
149 * Extract the procnum and convert to v3 form if necessary,
150 * taking care to deal with out-of-range procnums. Caller will
151 * ensure that rq_vers is either 2 or 3.
152 */
153 procnum = req->rq_proc;
154 if (!v3) {
155 rpcproc_t tmp_procnum;
156
157 tmp_procnum = cb->get_procnum(procnum);
158 if (tmp_procnum == -1)
159 goto out;
160 procnum = tmp_procnum;
161 }
162
163 /*
164 * We do affinity for most. However, we divide a realm of affinity
165 * by file offset so as to allow for concurrent random access. We
166 * only do this for reads today, but this may change when IFS supports
167 * efficient concurrent writes.
168 */
169 if (cb->no_offset(procnum))
170 goto out;
171
172 error = cb->realign(&req->rq_args, M_NOWAIT);
173 if (error)
174 goto out;
175 md = req->rq_args;
176 dpos = mtod(md, caddr_t);
177
178 /* Grab the filehandle. */
179 error = cb->get_fh(&i->fh, v3, &md, &dpos);
180 if (error)
181 goto out;
182
183 /* Content ourselves with zero offset for all but reads. */
184 if (cb->is_read(procnum) || cb->is_write(procnum))
185 cb->get_offset(&md, &dpos, v3, i);
186
187out:
188 cb->set_locktype(procnum, i);
189}
190
191static struct fha_hash_entry *
192fha_hash_entry_new(u_int64_t fh)
193{
194 struct fha_hash_entry *e;
195
196 e = malloc(sizeof(*e), M_NFS_FHA, M_WAITOK);
197 e->fh = fh;
198 e->num_rw = 0;
199 e->num_exclusive = 0;
200 e->num_threads = 0;
201 LIST_INIT(&e->threads);
202
203 return (e);
204}
205
206static void
207fha_hash_entry_destroy(struct fha_hash_entry *e)
208{
209
122}
123
124/*
125 * This just specifies that offsets should obey affinity when within
126 * the same 1Mbyte (1<<20) chunk for the file (reads only for now).
127 */
128static void
129fha_extract_info(struct svc_req *req, struct fha_info *i,
130 struct fha_callbacks *cb)
131{
132 struct mbuf *md;
133 caddr_t dpos;
134 static u_int64_t random_fh = 0;
135 int error;
136 int v3 = (req->rq_vers == 3);
137 rpcproc_t procnum;
138
139 /*
140 * We start off with a random fh. If we get a reasonable
141 * procnum, we set the fh. If there's a concept of offset
142 * that we're interested in, we set that.
143 */
144 i->fh = ++random_fh;
145 i->offset = 0;
146 i->locktype = LK_EXCLUSIVE;
147
148 /*
149 * Extract the procnum and convert to v3 form if necessary,
150 * taking care to deal with out-of-range procnums. Caller will
151 * ensure that rq_vers is either 2 or 3.
152 */
153 procnum = req->rq_proc;
154 if (!v3) {
155 rpcproc_t tmp_procnum;
156
157 tmp_procnum = cb->get_procnum(procnum);
158 if (tmp_procnum == -1)
159 goto out;
160 procnum = tmp_procnum;
161 }
162
163 /*
164 * We do affinity for most. However, we divide a realm of affinity
165 * by file offset so as to allow for concurrent random access. We
166 * only do this for reads today, but this may change when IFS supports
167 * efficient concurrent writes.
168 */
169 if (cb->no_offset(procnum))
170 goto out;
171
172 error = cb->realign(&req->rq_args, M_NOWAIT);
173 if (error)
174 goto out;
175 md = req->rq_args;
176 dpos = mtod(md, caddr_t);
177
178 /* Grab the filehandle. */
179 error = cb->get_fh(&i->fh, v3, &md, &dpos);
180 if (error)
181 goto out;
182
183 /* Content ourselves with zero offset for all but reads. */
184 if (cb->is_read(procnum) || cb->is_write(procnum))
185 cb->get_offset(&md, &dpos, v3, i);
186
187out:
188 cb->set_locktype(procnum, i);
189}
190
191static struct fha_hash_entry *
192fha_hash_entry_new(u_int64_t fh)
193{
194 struct fha_hash_entry *e;
195
196 e = malloc(sizeof(*e), M_NFS_FHA, M_WAITOK);
197 e->fh = fh;
198 e->num_rw = 0;
199 e->num_exclusive = 0;
200 e->num_threads = 0;
201 LIST_INIT(&e->threads);
202
203 return (e);
204}
205
206static void
207fha_hash_entry_destroy(struct fha_hash_entry *e)
208{
209
210 if (e->num_rw + e->num_exclusive)
211 panic("nonempty fhe");
210 mtx_assert(e->mtx, MA_OWNED);
211 KASSERT(e->num_rw == 0,
212 ("%d reqs on destroyed fhe %p", e->num_rw, e));
213 KASSERT(e->num_exclusive == 0,
214 ("%d exclusive reqs on destroyed fhe %p", e->num_exclusive, e));
215 KASSERT(e->num_threads == 0,
216 ("%d threads on destroyed fhe %p", e->num_threads, e));
212 free(e, M_NFS_FHA);
213}
214
215static void
216fha_hash_entry_remove(struct fha_hash_entry *e)
217{
218
217 free(e, M_NFS_FHA);
218}
219
220static void
221fha_hash_entry_remove(struct fha_hash_entry *e)
222{
223
224 mtx_assert(e->mtx, MA_OWNED);
219 LIST_REMOVE(e, link);
220 fha_hash_entry_destroy(e);
221}
222
223static struct fha_hash_entry *
224fha_hash_entry_lookup(struct fha_params *softc, u_int64_t fh)
225{
226 SVCPOOL *pool;
225 LIST_REMOVE(e, link);
226 fha_hash_entry_destroy(e);
227}
228
229static struct fha_hash_entry *
230fha_hash_entry_lookup(struct fha_params *softc, u_int64_t fh)
231{
232 SVCPOOL *pool;
227
228 pool = *softc->pool;
229
233 struct fha_hash_slot *fhs;
230 struct fha_hash_entry *fhe, *new_fhe;
231
234 struct fha_hash_entry *fhe, *new_fhe;
235
232 LIST_FOREACH(fhe, &softc->g_fha.hashtable[fh % softc->g_fha.hashmask],
233 link)
236 pool = *softc->pool;
237 fhs = &softc->fha_hash[fh % FHA_HASH_SIZE];
238 new_fhe = fha_hash_entry_new(fh);
239 new_fhe->mtx = &fhs->mtx;
240 mtx_lock(&fhs->mtx);
241 LIST_FOREACH(fhe, &fhs->list, link)
234 if (fhe->fh == fh)
235 break;
242 if (fhe->fh == fh)
243 break;
236
237 if (!fhe) {
244 if (!fhe) {
238 /* Allocate a new entry. */
239 mtx_unlock(&pool->sp_lock);
240 new_fhe = fha_hash_entry_new(fh);
241 mtx_lock(&pool->sp_lock);
242
243 /* Double-check to make sure we still need the new entry. */
244 LIST_FOREACH(fhe,
245 &softc->g_fha.hashtable[fh % softc->g_fha.hashmask], link)
246 if (fhe->fh == fh)
247 break;
248 if (!fhe) {
249 fhe = new_fhe;
250 LIST_INSERT_HEAD(
251 &softc->g_fha.hashtable[fh % softc->g_fha.hashmask],
252 fhe, link);
253 } else
254 fha_hash_entry_destroy(new_fhe);
255 }
256
245 fhe = new_fhe;
246 LIST_INSERT_HEAD(&fhs->list, fhe, link);
247 } else
248 fha_hash_entry_destroy(new_fhe);
257 return (fhe);
258}
259
260static void
261fha_hash_entry_add_thread(struct fha_hash_entry *fhe, SVCTHREAD *thread)
262{
263
249 return (fhe);
250}
251
252static void
253fha_hash_entry_add_thread(struct fha_hash_entry *fhe, SVCTHREAD *thread)
254{
255
256 mtx_assert(fhe->mtx, MA_OWNED);
257 thread->st_p2 = 0;
264 LIST_INSERT_HEAD(&fhe->threads, thread, st_alink);
265 fhe->num_threads++;
266}
267
268static void
269fha_hash_entry_remove_thread(struct fha_hash_entry *fhe, SVCTHREAD *thread)
270{
271
258 LIST_INSERT_HEAD(&fhe->threads, thread, st_alink);
259 fhe->num_threads++;
260}
261
262static void
263fha_hash_entry_remove_thread(struct fha_hash_entry *fhe, SVCTHREAD *thread)
264{
265
266 mtx_assert(fhe->mtx, MA_OWNED);
267 KASSERT(thread->st_p2 == 0,
268 ("%d reqs on removed thread %p", thread->st_p2, thread));
272 LIST_REMOVE(thread, st_alink);
273 fhe->num_threads--;
274}
275
276/*
277 * Account for an ongoing operation associated with this file.
278 */
279static void
280fha_hash_entry_add_op(struct fha_hash_entry *fhe, int locktype, int count)
281{
282
269 LIST_REMOVE(thread, st_alink);
270 fhe->num_threads--;
271}
272
273/*
274 * Account for an ongoing operation associated with this file.
275 */
276static void
277fha_hash_entry_add_op(struct fha_hash_entry *fhe, int locktype, int count)
278{
279
280 mtx_assert(fhe->mtx, MA_OWNED);
283 if (LK_EXCLUSIVE == locktype)
284 fhe->num_exclusive += count;
285 else
286 fhe->num_rw += count;
287}
288
289/*
290 * Get the service thread currently associated with the fhe that is
291 * appropriate to handle this operation.
292 */
293SVCTHREAD *
294fha_hash_entry_choose_thread(struct fha_params *softc,
295 struct fha_hash_entry *fhe, struct fha_info *i, SVCTHREAD *this_thread);
296
297SVCTHREAD *
298fha_hash_entry_choose_thread(struct fha_params *softc,
299 struct fha_hash_entry *fhe, struct fha_info *i, SVCTHREAD *this_thread)
300{
301 SVCTHREAD *thread, *min_thread = NULL;
302 SVCPOOL *pool;
303 int req_count, min_count = 0;
304 off_t offset1, offset2;
305
306 pool = *softc->pool;
307
308 LIST_FOREACH(thread, &fhe->threads, st_alink) {
281 if (LK_EXCLUSIVE == locktype)
282 fhe->num_exclusive += count;
283 else
284 fhe->num_rw += count;
285}
286
287/*
288 * Get the service thread currently associated with the fhe that is
289 * appropriate to handle this operation.
290 */
291SVCTHREAD *
292fha_hash_entry_choose_thread(struct fha_params *softc,
293 struct fha_hash_entry *fhe, struct fha_info *i, SVCTHREAD *this_thread);
294
295SVCTHREAD *
296fha_hash_entry_choose_thread(struct fha_params *softc,
297 struct fha_hash_entry *fhe, struct fha_info *i, SVCTHREAD *this_thread)
298{
299 SVCTHREAD *thread, *min_thread = NULL;
300 SVCPOOL *pool;
301 int req_count, min_count = 0;
302 off_t offset1, offset2;
303
304 pool = *softc->pool;
305
306 LIST_FOREACH(thread, &fhe->threads, st_alink) {
309 req_count = thread->st_reqcount;
307 req_count = thread->st_p2;
310
311 /* If there are any writes in progress, use the first thread. */
312 if (fhe->num_exclusive) {
313#if 0
314 ITRACE_CURPROC(ITRACE_NFS, ITRACE_INFO,
315 "fha: %p(%d)w", thread, req_count);
316#endif
317 return (thread);
318 }
319
320 /*
321 * Check for read locality, making sure that we won't
322 * exceed our per-thread load limit in the process.
323 */
324 offset1 = i->offset;
308
309 /* If there are any writes in progress, use the first thread. */
310 if (fhe->num_exclusive) {
311#if 0
312 ITRACE_CURPROC(ITRACE_NFS, ITRACE_INFO,
313 "fha: %p(%d)w", thread, req_count);
314#endif
315 return (thread);
316 }
317
318 /*
319 * Check for read locality, making sure that we won't
320 * exceed our per-thread load limit in the process.
321 */
322 offset1 = i->offset;
325 offset2 = STAILQ_FIRST(&thread->st_reqs)->rq_p3;
323 offset2 = thread->st_p3;
326
327 if (((offset1 >= offset2)
328 && ((offset1 - offset2) < (1 << softc->ctls.bin_shift)))
329 || ((offset2 > offset1)
330 && ((offset2 - offset1) < (1 << softc->ctls.bin_shift)))) {
331 if ((softc->ctls.max_reqs_per_nfsd == 0) ||
332 (req_count < softc->ctls.max_reqs_per_nfsd)) {
333#if 0
334 ITRACE_CURPROC(ITRACE_NFS, ITRACE_INFO,
335 "fha: %p(%d)r", thread, req_count);
336#endif
337 return (thread);
338 }
339 }
340
341 /*
342 * We don't have a locality match, so skip this thread,
343 * but keep track of the most attractive thread in case
344 * we need to come back to it later.
345 */
346#if 0
347 ITRACE_CURPROC(ITRACE_NFS, ITRACE_INFO,
348 "fha: %p(%d)s off1 %llu off2 %llu", thread,
349 req_count, offset1, offset2);
350#endif
351 if ((min_thread == NULL) || (req_count < min_count)) {
352 min_count = req_count;
353 min_thread = thread;
354 }
355 }
356
357 /*
358 * We didn't find a good match yet. See if we can add
359 * a new thread to this file handle entry's thread list.
360 */
361 if ((softc->ctls.max_nfsds_per_fh == 0) ||
362 (fhe->num_threads < softc->ctls.max_nfsds_per_fh)) {
324
325 if (((offset1 >= offset2)
326 && ((offset1 - offset2) < (1 << softc->ctls.bin_shift)))
327 || ((offset2 > offset1)
328 && ((offset2 - offset1) < (1 << softc->ctls.bin_shift)))) {
329 if ((softc->ctls.max_reqs_per_nfsd == 0) ||
330 (req_count < softc->ctls.max_reqs_per_nfsd)) {
331#if 0
332 ITRACE_CURPROC(ITRACE_NFS, ITRACE_INFO,
333 "fha: %p(%d)r", thread, req_count);
334#endif
335 return (thread);
336 }
337 }
338
339 /*
340 * We don't have a locality match, so skip this thread,
341 * but keep track of the most attractive thread in case
342 * we need to come back to it later.
343 */
344#if 0
345 ITRACE_CURPROC(ITRACE_NFS, ITRACE_INFO,
346 "fha: %p(%d)s off1 %llu off2 %llu", thread,
347 req_count, offset1, offset2);
348#endif
349 if ((min_thread == NULL) || (req_count < min_count)) {
350 min_count = req_count;
351 min_thread = thread;
352 }
353 }
354
355 /*
356 * We didn't find a good match yet. See if we can add
357 * a new thread to this file handle entry's thread list.
358 */
359 if ((softc->ctls.max_nfsds_per_fh == 0) ||
360 (fhe->num_threads < softc->ctls.max_nfsds_per_fh)) {
363 /*
364 * We can add a new thread, so try for an idle thread
365 * first, and fall back to this_thread if none are idle.
366 */
367 if (STAILQ_EMPTY(&this_thread->st_reqs)) {
368 thread = this_thread;
361 thread = this_thread;
369#if 0
362#if 0
370 ITRACE_CURPROC(ITRACE_NFS, ITRACE_INFO,
371 "fha: %p(%d)t", thread, thread->st_reqcount);
363 ITRACE_CURPROC(ITRACE_NFS, ITRACE_INFO,
364 "fha: %p(%d)t", thread, thread->st_p2);
372#endif
365#endif
373 } else if ((thread = LIST_FIRST(&pool->sp_idlethreads))) {
374#if 0
375 ITRACE_CURPROC(ITRACE_NFS, ITRACE_INFO,
376 "fha: %p(%d)i", thread, thread->st_reqcount);
377#endif
378 } else {
379 thread = this_thread;
380#if 0
381 ITRACE_CURPROC(ITRACE_NFS, ITRACE_INFO,
382 "fha: %p(%d)b", thread, thread->st_reqcount);
383#endif
384 }
385 fha_hash_entry_add_thread(fhe, thread);
386 } else {
387 /*
388 * We don't want to use any more threads for this file, so
389 * go back to the most attractive nfsd we're already using.
390 */
391 thread = min_thread;
392 }
393
394 return (thread);
395}
396
397/*
398 * After getting a request, try to assign it to some thread. Usually we
399 * handle it ourselves.
400 */
401SVCTHREAD *
402fha_assign(SVCTHREAD *this_thread, struct svc_req *req,
403 struct fha_params *softc)
404{
405 SVCTHREAD *thread;
406 struct fha_info i;
407 struct fha_hash_entry *fhe;
408 struct fha_callbacks *cb;
409
410 cb = &softc->callbacks;
411
412 /* Check to see whether we're enabled. */
413 if (softc->ctls.enable == 0)
366 fha_hash_entry_add_thread(fhe, thread);
367 } else {
368 /*
369 * We don't want to use any more threads for this file, so
370 * go back to the most attractive nfsd we're already using.
371 */
372 thread = min_thread;
373 }
374
375 return (thread);
376}
377
378/*
379 * After getting a request, try to assign it to some thread. Usually we
380 * handle it ourselves.
381 */
382SVCTHREAD *
383fha_assign(SVCTHREAD *this_thread, struct svc_req *req,
384 struct fha_params *softc)
385{
386 SVCTHREAD *thread;
387 struct fha_info i;
388 struct fha_hash_entry *fhe;
389 struct fha_callbacks *cb;
390
391 cb = &softc->callbacks;
392
393 /* Check to see whether we're enabled. */
394 if (softc->ctls.enable == 0)
414 return (this_thread);
395 goto thist;
415
416 /*
417 * Only do placement if this is an NFS request.
418 */
419 if (req->rq_prog != NFS_PROG)
396
397 /*
398 * Only do placement if this is an NFS request.
399 */
400 if (req->rq_prog != NFS_PROG)
420 return (this_thread);
401 goto thist;
421
422 if (req->rq_vers != 2 && req->rq_vers != 3)
402
403 if (req->rq_vers != 2 && req->rq_vers != 3)
423 return (this_thread);
404 goto thist;
424
425 fha_extract_info(req, &i, cb);
426
427 /*
428 * We save the offset associated with this request for later
429 * nfsd matching.
430 */
431 fhe = fha_hash_entry_lookup(softc, i.fh);
432 req->rq_p1 = fhe;
433 req->rq_p2 = i.locktype;
434 req->rq_p3 = i.offset;
435
436 /*
437 * Choose a thread, taking into consideration locality, thread load,
438 * and the number of threads already working on this file.
439 */
440 thread = fha_hash_entry_choose_thread(softc, fhe, &i, this_thread);
441 KASSERT(thread, ("fha_assign: NULL thread!"));
442 fha_hash_entry_add_op(fhe, i.locktype, 1);
405
406 fha_extract_info(req, &i, cb);
407
408 /*
409 * We save the offset associated with this request for later
410 * nfsd matching.
411 */
412 fhe = fha_hash_entry_lookup(softc, i.fh);
413 req->rq_p1 = fhe;
414 req->rq_p2 = i.locktype;
415 req->rq_p3 = i.offset;
416
417 /*
418 * Choose a thread, taking into consideration locality, thread load,
419 * and the number of threads already working on this file.
420 */
421 thread = fha_hash_entry_choose_thread(softc, fhe, &i, this_thread);
422 KASSERT(thread, ("fha_assign: NULL thread!"));
423 fha_hash_entry_add_op(fhe, i.locktype, 1);
424 thread->st_p2++;
425 thread->st_p3 = i.offset;
443
426
427 /*
428 * Grab the pool lock here to not let chosen thread go away before
429 * the new request inserted to its queue while we drop fhe lock.
430 */
431 mtx_lock(&(*softc->pool)->sp_lock);
432 mtx_unlock(fhe->mtx);
433
444 return (thread);
434 return (thread);
435thist:
436 req->rq_p1 = NULL;
437 mtx_lock(&(*softc->pool)->sp_lock);
438 return (this_thread);
445}
446
447/*
448 * Called when we're done with an operation. The request has already
449 * been de-queued.
450 */
451void
452fha_nd_complete(SVCTHREAD *thread, struct svc_req *req)
453{
454 struct fha_hash_entry *fhe = req->rq_p1;
439}
440
441/*
442 * Called when we're done with an operation. The request has already
443 * been de-queued.
444 */
445void
446fha_nd_complete(SVCTHREAD *thread, struct svc_req *req)
447{
448 struct fha_hash_entry *fhe = req->rq_p1;
449 struct mtx *mtx;
455
456 /*
457 * This may be called for reqs that didn't go through
458 * fha_assign (e.g. extra NULL ops used for RPCSEC_GSS.
459 */
460 if (!fhe)
461 return;
462
450
451 /*
452 * This may be called for reqs that didn't go through
453 * fha_assign (e.g. extra NULL ops used for RPCSEC_GSS.
454 */
455 if (!fhe)
456 return;
457
458 mtx = fhe->mtx;
459 mtx_lock(mtx);
463 fha_hash_entry_add_op(fhe, req->rq_p2, -1);
460 fha_hash_entry_add_op(fhe, req->rq_p2, -1);
464
465 if (thread->st_reqcount == 0) {
461 thread->st_p2--;
462 KASSERT(thread->st_p2 >= 0, ("Negative request count %d on %p",
463 thread->st_p2, thread));
464 if (thread->st_p2 == 0) {
466 fha_hash_entry_remove_thread(fhe, thread);
467 if (0 == fhe->num_rw + fhe->num_exclusive)
468 fha_hash_entry_remove(fhe);
469 }
465 fha_hash_entry_remove_thread(fhe, thread);
466 if (0 == fhe->num_rw + fhe->num_exclusive)
467 fha_hash_entry_remove(fhe);
468 }
469 mtx_unlock(mtx);
470}
471
472int
473fhe_stats_sysctl(SYSCTL_HANDLER_ARGS, struct fha_params *softc)
474{
475 int error, count, i;
476 struct sbuf sb;
477 struct fha_hash_entry *fhe;
478 bool_t first = TRUE;
479 SVCTHREAD *thread;
480 SVCPOOL *pool;
481
482 sbuf_new(&sb, NULL, 4096, SBUF_FIXEDLEN);
483
484 pool = NULL;
485
486 if (!*softc->pool) {
487 sbuf_printf(&sb, "NFSD not running\n");
488 goto out;
489 }
490 pool = *softc->pool;
491
470}
471
472int
473fhe_stats_sysctl(SYSCTL_HANDLER_ARGS, struct fha_params *softc)
474{
475 int error, count, i;
476 struct sbuf sb;
477 struct fha_hash_entry *fhe;
478 bool_t first = TRUE;
479 SVCTHREAD *thread;
480 SVCPOOL *pool;
481
482 sbuf_new(&sb, NULL, 4096, SBUF_FIXEDLEN);
483
484 pool = NULL;
485
486 if (!*softc->pool) {
487 sbuf_printf(&sb, "NFSD not running\n");
488 goto out;
489 }
490 pool = *softc->pool;
491
492 mtx_lock(&pool->sp_lock);
493 count = 0;
492 count = 0;
494 for (i = 0; i <= softc->g_fha.hashmask; i++)
495 if (!LIST_EMPTY(&softc->g_fha.hashtable[i]))
493 for (i = 0; i < FHA_HASH_SIZE; i++)
494 if (!LIST_EMPTY(&softc->fha_hash[i].list))
496 count++;
497
498 if (count == 0) {
499 sbuf_printf(&sb, "No file handle entries.\n");
500 goto out;
501 }
502
495 count++;
496
497 if (count == 0) {
498 sbuf_printf(&sb, "No file handle entries.\n");
499 goto out;
500 }
501
503 for (i = 0; i <= softc->g_fha.hashmask; i++) {
504 LIST_FOREACH(fhe, &softc->g_fha.hashtable[i], link) {
502 for (i = 0; i < FHA_HASH_SIZE; i++) {
503 mtx_lock(&softc->fha_hash[i].mtx);
504 LIST_FOREACH(fhe, &softc->fha_hash[i].list, link) {
505 sbuf_printf(&sb, "%sfhe %p: {\n", first ? "" : ", ", fhe);
506
507 sbuf_printf(&sb, " fh: %ju\n", (uintmax_t) fhe->fh);
508 sbuf_printf(&sb, " num_rw: %d\n", fhe->num_rw);
509 sbuf_printf(&sb, " num_exclusive: %d\n", fhe->num_exclusive);
510 sbuf_printf(&sb, " num_threads: %d\n", fhe->num_threads);
511
512 LIST_FOREACH(thread, &fhe->threads, st_alink) {
513 sbuf_printf(&sb, " thread %p offset %ju "
514 "(count %d)\n", thread,
505 sbuf_printf(&sb, "%sfhe %p: {\n", first ? "" : ", ", fhe);
506
507 sbuf_printf(&sb, " fh: %ju\n", (uintmax_t) fhe->fh);
508 sbuf_printf(&sb, " num_rw: %d\n", fhe->num_rw);
509 sbuf_printf(&sb, " num_exclusive: %d\n", fhe->num_exclusive);
510 sbuf_printf(&sb, " num_threads: %d\n", fhe->num_threads);
511
512 LIST_FOREACH(thread, &fhe->threads, st_alink) {
513 sbuf_printf(&sb, " thread %p offset %ju "
514 "(count %d)\n", thread,
515 STAILQ_FIRST(&thread->st_reqs)->rq_p3,
516 thread->st_reqcount);
515 thread->st_p3, thread->st_p2);
517 }
518
519 sbuf_printf(&sb, "}");
520 first = FALSE;
521
522 /* Limit the output. */
523 if (++count > 128) {
524 sbuf_printf(&sb, "...");
525 break;
526 }
527 }
516 }
517
518 sbuf_printf(&sb, "}");
519 first = FALSE;
520
521 /* Limit the output. */
522 if (++count > 128) {
523 sbuf_printf(&sb, "...");
524 break;
525 }
526 }
527 mtx_unlock(&softc->fha_hash[i].mtx);
528 }
529
530 out:
528 }
529
530 out:
531 if (pool)
532 mtx_unlock(&pool->sp_lock);
533 sbuf_trim(&sb);
534 sbuf_finish(&sb);
535 error = sysctl_handle_string(oidp, sbuf_data(&sb), sbuf_len(&sb), req);
536 sbuf_delete(&sb);
537 return (error);
538}
531 sbuf_trim(&sb);
532 sbuf_finish(&sb);
533 error = sysctl_handle_string(oidp, sbuf_data(&sb), sbuf_len(&sb), req);
534 sbuf_delete(&sb);
535 return (error);
536}