1/*-
2 * SPDX-License-Identifier: BSD-3-Clause
3 *
4 * Copyright (c) 1989, 1993
5 *	The Regents of the University of California.  All rights reserved.
6 *
7 * This code is derived from software contributed to Berkeley by
8 * Rick Macklem at The University of Guelph.
9 *
10 * Redistribution and use in source and binary forms, with or without
11 * modification, are permitted provided that the following conditions
12 * are met:
13 * 1. Redistributions of source code must retain the above copyright
14 *    notice, this list of conditions and the following disclaimer.
15 * 2. Redistributions in binary form must reproduce the above copyright
16 *    notice, this list of conditions and the following disclaimer in the
17 *    documentation and/or other materials provided with the distribution.
18 * 3. Neither the name of the University nor the names of its contributors
19 *    may be used to endorse or promote products derived from this software
20 *    without specific prior written permission.
21 *
22 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
23 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
24 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
25 * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
26 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
27 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
28 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
29 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
30 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
31 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
32 * SUCH DAMAGE.
33 *
34 *	from nfs_syscalls.c	8.5 (Berkeley) 3/30/95
35 */
36
37#include <sys/cdefs.h>
38__FBSDID("$FreeBSD$");
39
40#include <sys/param.h>
41#include <sys/systm.h>
42#include <sys/sysproto.h>
43#include <sys/kernel.h>
44#include <sys/sysctl.h>
45#include <sys/file.h>
46#include <sys/vnode.h>
47#include <sys/malloc.h>
48#include <sys/mount.h>
49#include <sys/proc.h>
50#include <sys/bio.h>
51#include <sys/buf.h>
52#include <sys/mbuf.h>
53#include <sys/socket.h>
54#include <sys/socketvar.h>
55#include <sys/domain.h>
56#include <sys/protosw.h>
57#include <sys/namei.h>
58#include <sys/unistd.h>
59#include <sys/kthread.h>
60#include <sys/fcntl.h>
61#include <sys/lockf.h>
62#include <sys/mutex.h>
63#include <sys/taskqueue.h>
64
65#include <netinet/in.h>
66#include <netinet/tcp.h>
67
68#include <fs/nfs/nfsport.h>
69#include <fs/nfsclient/nfsmount.h>
70#include <fs/nfsclient/nfs.h>
71#include <fs/nfsclient/nfsnode.h>
72
73extern struct mtx	ncl_iod_mutex;
74extern struct task	ncl_nfsiodnew_task;
75
76int ncl_numasync;
77enum nfsiod_state ncl_iodwant[NFS_MAXASYNCDAEMON];
78struct nfsmount *ncl_iodmount[NFS_MAXASYNCDAEMON];
79
80static void	nfssvc_iod(void *);
81
82static int nfs_asyncdaemon[NFS_MAXASYNCDAEMON];
83
84SYSCTL_DECL(_vfs_nfs);
85
86/* Maximum number of seconds a nfsiod kthread will sleep before exiting */
87static unsigned int nfs_iodmaxidle = 120;
88SYSCTL_UINT(_vfs_nfs, OID_AUTO, iodmaxidle, CTLFLAG_RW, &nfs_iodmaxidle, 0,
89    "Max number of seconds an nfsiod kthread will sleep before exiting");
90
91/* Maximum number of nfsiod kthreads */
92unsigned int ncl_iodmax = 20;
93
94/* Minimum number of nfsiod kthreads to keep as spares */
95static unsigned int nfs_iodmin = 0;
96
97static int nfs_nfsiodnew_sync(void);
98
99static int
100sysctl_iodmin(SYSCTL_HANDLER_ARGS)
101{
102	int error, i;
103	int newmin;
104
105	newmin = nfs_iodmin;
106	error = sysctl_handle_int(oidp, &newmin, 0, req);
107	if (error || (req->newptr == NULL))
108		return (error);
109	NFSLOCKIOD();
110	if (newmin > ncl_iodmax) {
111		error = EINVAL;
112		goto out;
113	}
114	nfs_iodmin = newmin;
115	if (ncl_numasync >= nfs_iodmin)
116		goto out;
117	/*
118	 * If the current number of nfsiod is lower
119	 * than the new minimum, create some more.
120	 */
121	for (i = nfs_iodmin - ncl_numasync; i > 0; i--)
122		nfs_nfsiodnew_sync();
123out:
124	NFSUNLOCKIOD();
125	return (0);
126}
127SYSCTL_PROC(_vfs_nfs, OID_AUTO, iodmin,
128    CTLTYPE_UINT | CTLFLAG_RW | CTLFLAG_MPSAFE, 0, sizeof (nfs_iodmin),
129    sysctl_iodmin, "IU",
130    "Min number of nfsiod kthreads to keep as spares");
131
132static int
133sysctl_iodmax(SYSCTL_HANDLER_ARGS)
134{
135	int error, i;
136	int iod, newmax;
137
138	newmax = ncl_iodmax;
139	error = sysctl_handle_int(oidp, &newmax, 0, req);
140	if (error || (req->newptr == NULL))
141		return (error);
142	if (newmax > NFS_MAXASYNCDAEMON)
143		return (EINVAL);
144	NFSLOCKIOD();
145	ncl_iodmax = newmax;
146	if (ncl_numasync <= ncl_iodmax)
147		goto out;
148	/*
149	 * If there are some asleep nfsiods that should
150	 * exit, wakeup() them so that they check ncl_iodmax
151	 * and exit.  Those who are active will exit as
152	 * soon as they finish I/O.
153	 */
154	iod = ncl_numasync - 1;
155	for (i = 0; i < ncl_numasync - ncl_iodmax; i++) {
156		if (ncl_iodwant[iod] == NFSIOD_AVAILABLE)
157			wakeup(&ncl_iodwant[iod]);
158		iod--;
159	}
160out:
161	NFSUNLOCKIOD();
162	return (0);
163}
164SYSCTL_PROC(_vfs_nfs, OID_AUTO, iodmax,
165    CTLTYPE_UINT | CTLFLAG_RW | CTLFLAG_MPSAFE, 0, sizeof (ncl_iodmax),
166    sysctl_iodmax, "IU",
167    "Max number of nfsiod kthreads");
168
169static int
170nfs_nfsiodnew_sync(void)
171{
172	int error, i;
173
174	NFSASSERTIOD();
175	for (i = 0; i < ncl_iodmax; i++) {
176		if (nfs_asyncdaemon[i] == 0) {
177			nfs_asyncdaemon[i] = 1;
178			break;
179		}
180	}
181	if (i == ncl_iodmax)
182		return (0);
183	NFSUNLOCKIOD();
184	error = kproc_create(nfssvc_iod, nfs_asyncdaemon + i, NULL,
185	    RFHIGHPID, 0, "newnfs %d", i);
186	NFSLOCKIOD();
187	if (error == 0) {
188		ncl_numasync++;
189		ncl_iodwant[i] = NFSIOD_AVAILABLE;
190	} else
191		nfs_asyncdaemon[i] = 0;
192	return (error);
193}
194
195void
196ncl_nfsiodnew_tq(__unused void *arg, int pending)
197{
198
199	NFSLOCKIOD();
200	while (pending > 0) {
201		pending--;
202		nfs_nfsiodnew_sync();
203	}
204	NFSUNLOCKIOD();
205}
206
207void
208ncl_nfsiodnew(void)
209{
210
211	NFSASSERTIOD();
212	taskqueue_enqueue(taskqueue_thread, &ncl_nfsiodnew_task);
213}
214
215static void
216nfsiod_setup(void *dummy)
217{
218	int error;
219
220	TUNABLE_INT_FETCH("vfs.nfs.iodmin", &nfs_iodmin);
221	nfscl_init();
222	NFSLOCKIOD();
223	/* Silently limit the start number of nfsiod's */
224	if (nfs_iodmin > NFS_MAXASYNCDAEMON)
225		nfs_iodmin = NFS_MAXASYNCDAEMON;
226
227	while (ncl_numasync < nfs_iodmin) {
228		error = nfs_nfsiodnew_sync();
229		if (error == -1)
230			panic("nfsiod_setup: nfs_nfsiodnew failed");
231	}
232	NFSUNLOCKIOD();
233}
234SYSINIT(newnfsiod, SI_SUB_KTHREAD_IDLE, SI_ORDER_ANY, nfsiod_setup, NULL);
235
236static int nfs_defect = 0;
237SYSCTL_INT(_vfs_nfs, OID_AUTO, defect, CTLFLAG_RW, &nfs_defect, 0,
238    "Allow nfsiods to migrate serving different mounts");
239
240/*
241 * Asynchronous I/O daemons for client nfs.
242 * They do read-ahead and write-behind operations on the block I/O cache.
243 * Returns if we hit the timeout defined by the iodmaxidle sysctl.
244 */
245static void
246nfssvc_iod(void *instance)
247{
248	struct buf *bp;
249	struct nfsmount *nmp;
250	int myiod, timo;
251	int error = 0;
252
253	NFSLOCKIOD();
254	myiod = (int *)instance - nfs_asyncdaemon;
255	/*
256	 * Main loop
257	 */
258	for (;;) {
259	    while (((nmp = ncl_iodmount[myiod]) == NULL)
260		   || !TAILQ_FIRST(&nmp->nm_bufq)) {
261		if (myiod >= ncl_iodmax)
262			goto finish;
263		if (nmp)
264			nmp->nm_bufqiods--;
265		if (ncl_iodwant[myiod] == NFSIOD_NOT_AVAILABLE)
266			ncl_iodwant[myiod] = NFSIOD_AVAILABLE;
267		ncl_iodmount[myiod] = NULL;
268		/*
269		 * Always keep at least nfs_iodmin kthreads.
270		 */
271		timo = (myiod < nfs_iodmin) ? 0 : nfs_iodmaxidle * hz;
272		error = msleep(&ncl_iodwant[myiod], &ncl_iod_mutex, PWAIT | PCATCH,
273		    "-", timo);
274		if (error) {
275			nmp = ncl_iodmount[myiod];
276			/*
277			 * Rechecking the nm_bufq closes a rare race where the
278			 * nfsiod is woken up at the exact time the idle timeout
279			 * fires
280			 */
281			if (nmp && TAILQ_FIRST(&nmp->nm_bufq))
282				error = 0;
283			break;
284		}
285	    }
286	    if (error)
287		    break;
288	    while ((bp = TAILQ_FIRST(&nmp->nm_bufq)) != NULL) {
289		/* Take one off the front of the list */
290		TAILQ_REMOVE(&nmp->nm_bufq, bp, b_freelist);
291		nmp->nm_bufqlen--;
292		if (nmp->nm_bufqwant && nmp->nm_bufqlen <= ncl_numasync) {
293		    nmp->nm_bufqwant = 0;
294		    wakeup(&nmp->nm_bufq);
295		}
296		NFSUNLOCKIOD();
297		if (bp->b_flags & B_DIRECT) {
298			KASSERT((bp->b_iocmd == BIO_WRITE), ("nfscvs_iod: BIO_WRITE not set"));
299			(void)ncl_doio_directwrite(bp);
300		} else {
301			if (bp->b_iocmd == BIO_READ)
302				(void) ncl_doio(bp->b_vp, bp, bp->b_rcred,
303				    NULL, 0);
304			else
305				(void) ncl_doio(bp->b_vp, bp, bp->b_wcred,
306				    NULL, 0);
307		}
308		NFSLOCKIOD();
309		/*
310		 * Make sure the nmp hasn't been dismounted as soon as
311		 * ncl_doio() completes for the last buffer.
312		 */
313		nmp = ncl_iodmount[myiod];
314		if (nmp == NULL)
315			break;
316
317		/*
318		 * If there are more than one iod on this mount, then defect
319		 * so that the iods can be shared out fairly between the mounts
320		 */
321		if (nfs_defect && nmp->nm_bufqiods > 1) {
322		    NFS_DPF(ASYNCIO,
323			    ("nfssvc_iod: iod %d defecting from mount %p\n",
324			     myiod, nmp));
325		    ncl_iodmount[myiod] = NULL;
326		    nmp->nm_bufqiods--;
327		    break;
328		}
329	    }
330	}
331finish:
332	nfs_asyncdaemon[myiod] = 0;
333	if (nmp)
334	    nmp->nm_bufqiods--;
335	ncl_iodwant[myiod] = NFSIOD_NOT_AVAILABLE;
336	ncl_iodmount[myiod] = NULL;
337	/* Someone may be waiting for the last nfsiod to terminate. */
338	if (--ncl_numasync == 0)
339		wakeup(&ncl_numasync);
340	NFSUNLOCKIOD();
341	if ((error == 0) || (error == EWOULDBLOCK))
342		kproc_exit(0);
343	/* Abnormal termination */
344	kproc_exit(1);
345}
346