1191783Srmacklem/*- 2191783Srmacklem * Copyright (c) 1989, 1993 3191783Srmacklem * The Regents of the University of California. All rights reserved. 4191783Srmacklem * 5191783Srmacklem * This code is derived from software contributed to Berkeley by 6191783Srmacklem * Rick Macklem at The University of Guelph. 7191783Srmacklem * 8191783Srmacklem * Redistribution and use in source and binary forms, with or without 9191783Srmacklem * modification, are permitted provided that the following conditions 10191783Srmacklem * are met: 11191783Srmacklem * 1. Redistributions of source code must retain the above copyright 12191783Srmacklem * notice, this list of conditions and the following disclaimer. 13191783Srmacklem * 2. Redistributions in binary form must reproduce the above copyright 14191783Srmacklem * notice, this list of conditions and the following disclaimer in the 15191783Srmacklem * documentation and/or other materials provided with the distribution. 16191783Srmacklem * 4. Neither the name of the University nor the names of its contributors 17191783Srmacklem * may be used to endorse or promote products derived from this software 18191783Srmacklem * without specific prior written permission. 19191783Srmacklem * 20191783Srmacklem * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 21191783Srmacklem * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 22191783Srmacklem * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 23191783Srmacklem * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 24191783Srmacklem * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 25191783Srmacklem * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 26191783Srmacklem * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 27191783Srmacklem * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 28191783Srmacklem * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 29191783Srmacklem * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 30191783Srmacklem * SUCH DAMAGE. 31191783Srmacklem * 32191783Srmacklem * from nfs_syscalls.c 8.5 (Berkeley) 3/30/95 33191783Srmacklem */ 34191783Srmacklem 35191783Srmacklem#include <sys/cdefs.h> 36191783Srmacklem__FBSDID("$FreeBSD$"); 37191783Srmacklem 38191783Srmacklem#include <sys/param.h> 39191783Srmacklem#include <sys/systm.h> 40191783Srmacklem#include <sys/sysproto.h> 41191783Srmacklem#include <sys/kernel.h> 42191783Srmacklem#include <sys/sysctl.h> 43191783Srmacklem#include <sys/file.h> 44191783Srmacklem#include <sys/filedesc.h> 45191783Srmacklem#include <sys/vnode.h> 46191783Srmacklem#include <sys/malloc.h> 47191783Srmacklem#include <sys/mount.h> 48191783Srmacklem#include <sys/proc.h> 49191783Srmacklem#include <sys/bio.h> 50191783Srmacklem#include <sys/buf.h> 51191783Srmacklem#include <sys/mbuf.h> 52191783Srmacklem#include <sys/socket.h> 53191783Srmacklem#include <sys/socketvar.h> 54191783Srmacklem#include <sys/domain.h> 55191783Srmacklem#include <sys/protosw.h> 56191783Srmacklem#include <sys/namei.h> 57191783Srmacklem#include <sys/unistd.h> 58191783Srmacklem#include <sys/kthread.h> 59191783Srmacklem#include <sys/fcntl.h> 60191783Srmacklem#include <sys/lockf.h> 61191783Srmacklem#include <sys/mutex.h> 62220683Srmacklem#include <sys/taskqueue.h> 63191783Srmacklem 64191783Srmacklem#include <netinet/in.h> 65191783Srmacklem#include <netinet/tcp.h> 66191783Srmacklem 67191783Srmacklem#include <fs/nfs/nfsport.h> 68191783Srmacklem#include <fs/nfsclient/nfsmount.h> 69191783Srmacklem#include <fs/nfsclient/nfs.h> 70191783Srmacklem#include <fs/nfsclient/nfsnode.h> 71191783Srmacklem 72220683Srmacklemextern struct mtx ncl_iod_mutex; 73220683Srmacklemextern struct task ncl_nfsiodnew_task; 74191783Srmacklem 75191783Srmacklemint ncl_numasync; 76220683Srmacklemenum nfsiod_state ncl_iodwant[NFS_MAXASYNCDAEMON]; 77220683Srmacklemstruct nfsmount *ncl_iodmount[NFS_MAXASYNCDAEMON]; 78191783Srmacklem 79191783Srmacklemstatic void nfssvc_iod(void *); 80191783Srmacklem 81220683Srmacklemstatic int nfs_asyncdaemon[NFS_MAXASYNCDAEMON]; 82191783Srmacklem 83221973SrmacklemSYSCTL_DECL(_vfs_nfs); 84191783Srmacklem 85191783Srmacklem/* Maximum number of seconds a nfsiod kthread will sleep before exiting */ 86220683Srmacklemstatic unsigned int nfs_iodmaxidle = 120; 87221973SrmacklemSYSCTL_UINT(_vfs_nfs, OID_AUTO, iodmaxidle, CTLFLAG_RW, &nfs_iodmaxidle, 0, 88220683Srmacklem "Max number of seconds an nfsiod kthread will sleep before exiting"); 89191783Srmacklem 90191783Srmacklem/* Maximum number of nfsiod kthreads */ 91220683Srmacklemunsigned int ncl_iodmax = 20; 92191783Srmacklem 93191783Srmacklem/* Minimum number of nfsiod kthreads to keep as spares */ 94191783Srmacklemstatic unsigned int nfs_iodmin = 0; 95191783Srmacklem 96220683Srmacklemstatic int nfs_nfsiodnew_sync(void); 97220683Srmacklem 98191783Srmacklemstatic int 99191783Srmacklemsysctl_iodmin(SYSCTL_HANDLER_ARGS) 100191783Srmacklem{ 101191783Srmacklem int error, i; 102191783Srmacklem int newmin; 103191783Srmacklem 104191783Srmacklem newmin = nfs_iodmin; 105191783Srmacklem error = sysctl_handle_int(oidp, &newmin, 0, req); 106191783Srmacklem if (error || (req->newptr == NULL)) 107191783Srmacklem return (error); 108191783Srmacklem mtx_lock(&ncl_iod_mutex); 109191783Srmacklem if (newmin > ncl_iodmax) { 110191783Srmacklem error = EINVAL; 111191783Srmacklem goto out; 112191783Srmacklem } 113191783Srmacklem nfs_iodmin = newmin; 114191783Srmacklem if (ncl_numasync >= nfs_iodmin) 115191783Srmacklem goto out; 116191783Srmacklem /* 117191783Srmacklem * If the current number of nfsiod is lower 118191783Srmacklem * than the new minimum, create some more. 119191783Srmacklem */ 120191783Srmacklem for (i = nfs_iodmin - ncl_numasync; i > 0; i--) 121220683Srmacklem nfs_nfsiodnew_sync(); 122191783Srmacklemout: 123191783Srmacklem mtx_unlock(&ncl_iod_mutex); 124191783Srmacklem return (0); 125191783Srmacklem} 126221973SrmacklemSYSCTL_PROC(_vfs_nfs, OID_AUTO, iodmin, CTLTYPE_UINT | CTLFLAG_RW, 0, 127220683Srmacklem sizeof (nfs_iodmin), sysctl_iodmin, "IU", 128220683Srmacklem "Min number of nfsiod kthreads to keep as spares"); 129191783Srmacklem 130191783Srmacklemstatic int 131191783Srmacklemsysctl_iodmax(SYSCTL_HANDLER_ARGS) 132191783Srmacklem{ 133191783Srmacklem int error, i; 134191783Srmacklem int iod, newmax; 135191783Srmacklem 136191783Srmacklem newmax = ncl_iodmax; 137191783Srmacklem error = sysctl_handle_int(oidp, &newmax, 0, req); 138191783Srmacklem if (error || (req->newptr == NULL)) 139191783Srmacklem return (error); 140220683Srmacklem if (newmax > NFS_MAXASYNCDAEMON) 141191783Srmacklem return (EINVAL); 142191783Srmacklem mtx_lock(&ncl_iod_mutex); 143191783Srmacklem ncl_iodmax = newmax; 144191783Srmacklem if (ncl_numasync <= ncl_iodmax) 145191783Srmacklem goto out; 146191783Srmacklem /* 147191783Srmacklem * If there are some asleep nfsiods that should 148191783Srmacklem * exit, wakeup() them so that they check ncl_iodmax 149191783Srmacklem * and exit. Those who are active will exit as 150191783Srmacklem * soon as they finish I/O. 151191783Srmacklem */ 152191783Srmacklem iod = ncl_numasync - 1; 153191783Srmacklem for (i = 0; i < ncl_numasync - ncl_iodmax; i++) { 154203119Srmacklem if (ncl_iodwant[iod] == NFSIOD_AVAILABLE) 155191783Srmacklem wakeup(&ncl_iodwant[iod]); 156191783Srmacklem iod--; 157191783Srmacklem } 158191783Srmacklemout: 159191783Srmacklem mtx_unlock(&ncl_iod_mutex); 160191783Srmacklem return (0); 161191783Srmacklem} 162221973SrmacklemSYSCTL_PROC(_vfs_nfs, OID_AUTO, iodmax, CTLTYPE_UINT | CTLFLAG_RW, 0, 163220683Srmacklem sizeof (ncl_iodmax), sysctl_iodmax, "IU", 164220683Srmacklem "Max number of nfsiod kthreads"); 165191783Srmacklem 166220683Srmacklemstatic int 167220683Srmacklemnfs_nfsiodnew_sync(void) 168191783Srmacklem{ 169191783Srmacklem int error, i; 170191783Srmacklem 171220683Srmacklem mtx_assert(&ncl_iod_mutex, MA_OWNED); 172220683Srmacklem for (i = 0; i < ncl_iodmax; i++) { 173191783Srmacklem if (nfs_asyncdaemon[i] == 0) { 174220683Srmacklem nfs_asyncdaemon[i] = 1; 175191783Srmacklem break; 176191783Srmacklem } 177220683Srmacklem } 178220683Srmacklem if (i == ncl_iodmax) 179220683Srmacklem return (0); 180191783Srmacklem mtx_unlock(&ncl_iod_mutex); 181220683Srmacklem error = kproc_create(nfssvc_iod, nfs_asyncdaemon + i, NULL, 182220683Srmacklem RFHIGHPID, 0, "newnfs %d", i); 183191783Srmacklem mtx_lock(&ncl_iod_mutex); 184220683Srmacklem if (error == 0) { 185220683Srmacklem ncl_numasync++; 186220683Srmacklem ncl_iodwant[i] = NFSIOD_AVAILABLE; 187220683Srmacklem } else 188220683Srmacklem nfs_asyncdaemon[i] = 0; 189220683Srmacklem return (error); 190220683Srmacklem} 191220683Srmacklem 192220683Srmacklemvoid 193220683Srmacklemncl_nfsiodnew_tq(__unused void *arg, int pending) 194220683Srmacklem{ 195220683Srmacklem 196220683Srmacklem mtx_lock(&ncl_iod_mutex); 197220683Srmacklem while (pending > 0) { 198220683Srmacklem pending--; 199220683Srmacklem nfs_nfsiodnew_sync(); 200203119Srmacklem } 201220683Srmacklem mtx_unlock(&ncl_iod_mutex); 202191783Srmacklem} 203191783Srmacklem 204220683Srmacklemvoid 205220683Srmacklemncl_nfsiodnew(void) 206220683Srmacklem{ 207220683Srmacklem 208220683Srmacklem mtx_assert(&ncl_iod_mutex, MA_OWNED); 209220683Srmacklem taskqueue_enqueue(taskqueue_thread, &ncl_nfsiodnew_task); 210220683Srmacklem} 211220683Srmacklem 212191783Srmacklemstatic void 213191783Srmacklemnfsiod_setup(void *dummy) 214191783Srmacklem{ 215191783Srmacklem int error; 216191783Srmacklem 217221973Srmacklem TUNABLE_INT_FETCH("vfs.nfs.iodmin", &nfs_iodmin); 218191783Srmacklem nfscl_init(); 219191783Srmacklem mtx_lock(&ncl_iod_mutex); 220191783Srmacklem /* Silently limit the start number of nfsiod's */ 221220683Srmacklem if (nfs_iodmin > NFS_MAXASYNCDAEMON) 222220683Srmacklem nfs_iodmin = NFS_MAXASYNCDAEMON; 223191783Srmacklem 224220683Srmacklem while (ncl_numasync < nfs_iodmin) { 225220683Srmacklem error = nfs_nfsiodnew_sync(); 226191783Srmacklem if (error == -1) 227220683Srmacklem panic("nfsiod_setup: nfs_nfsiodnew failed"); 228191783Srmacklem } 229191783Srmacklem mtx_unlock(&ncl_iod_mutex); 230191783Srmacklem} 231191783SrmacklemSYSINIT(newnfsiod, SI_SUB_KTHREAD_IDLE, SI_ORDER_ANY, nfsiod_setup, NULL); 232191783Srmacklem 233191783Srmacklemstatic int nfs_defect = 0; 234221973SrmacklemSYSCTL_INT(_vfs_nfs, OID_AUTO, defect, CTLFLAG_RW, &nfs_defect, 0, 235220683Srmacklem "Allow nfsiods to migrate serving different mounts"); 236191783Srmacklem 237191783Srmacklem/* 238191783Srmacklem * Asynchronous I/O daemons for client nfs. 239191783Srmacklem * They do read-ahead and write-behind operations on the block I/O cache. 240191783Srmacklem * Returns if we hit the timeout defined by the iodmaxidle sysctl. 241191783Srmacklem */ 242191783Srmacklemstatic void 243191783Srmacklemnfssvc_iod(void *instance) 244191783Srmacklem{ 245191783Srmacklem struct buf *bp; 246191783Srmacklem struct nfsmount *nmp; 247191783Srmacklem int myiod, timo; 248191783Srmacklem int error = 0; 249191783Srmacklem 250191783Srmacklem mtx_lock(&ncl_iod_mutex); 251191783Srmacklem myiod = (int *)instance - nfs_asyncdaemon; 252191783Srmacklem /* 253191783Srmacklem * Main loop 254191783Srmacklem */ 255191783Srmacklem for (;;) { 256191783Srmacklem while (((nmp = ncl_iodmount[myiod]) == NULL) 257191783Srmacklem || !TAILQ_FIRST(&nmp->nm_bufq)) { 258191783Srmacklem if (myiod >= ncl_iodmax) 259191783Srmacklem goto finish; 260191783Srmacklem if (nmp) 261191783Srmacklem nmp->nm_bufqiods--; 262203119Srmacklem if (ncl_iodwant[myiod] == NFSIOD_NOT_AVAILABLE) 263203119Srmacklem ncl_iodwant[myiod] = NFSIOD_AVAILABLE; 264191783Srmacklem ncl_iodmount[myiod] = NULL; 265191783Srmacklem /* 266191783Srmacklem * Always keep at least nfs_iodmin kthreads. 267191783Srmacklem */ 268220683Srmacklem timo = (myiod < nfs_iodmin) ? 0 : nfs_iodmaxidle * hz; 269191783Srmacklem error = msleep(&ncl_iodwant[myiod], &ncl_iod_mutex, PWAIT | PCATCH, 270191783Srmacklem "-", timo); 271191783Srmacklem if (error) { 272191783Srmacklem nmp = ncl_iodmount[myiod]; 273191783Srmacklem /* 274191783Srmacklem * Rechecking the nm_bufq closes a rare race where the 275191783Srmacklem * nfsiod is woken up at the exact time the idle timeout 276191783Srmacklem * fires 277191783Srmacklem */ 278191783Srmacklem if (nmp && TAILQ_FIRST(&nmp->nm_bufq)) 279191783Srmacklem error = 0; 280191783Srmacklem break; 281191783Srmacklem } 282191783Srmacklem } 283191783Srmacklem if (error) 284191783Srmacklem break; 285191783Srmacklem while ((bp = TAILQ_FIRST(&nmp->nm_bufq)) != NULL) { 286191783Srmacklem /* Take one off the front of the list */ 287191783Srmacklem TAILQ_REMOVE(&nmp->nm_bufq, bp, b_freelist); 288191783Srmacklem nmp->nm_bufqlen--; 289191783Srmacklem if (nmp->nm_bufqwant && nmp->nm_bufqlen <= ncl_numasync) { 290191783Srmacklem nmp->nm_bufqwant = 0; 291191783Srmacklem wakeup(&nmp->nm_bufq); 292191783Srmacklem } 293191783Srmacklem mtx_unlock(&ncl_iod_mutex); 294191783Srmacklem if (bp->b_flags & B_DIRECT) { 295191783Srmacklem KASSERT((bp->b_iocmd == BIO_WRITE), ("nfscvs_iod: BIO_WRITE not set")); 296191783Srmacklem (void)ncl_doio_directwrite(bp); 297191783Srmacklem } else { 298191783Srmacklem if (bp->b_iocmd == BIO_READ) 299207082Srmacklem (void) ncl_doio(bp->b_vp, bp, bp->b_rcred, 300207082Srmacklem NULL, 0); 301191783Srmacklem else 302207082Srmacklem (void) ncl_doio(bp->b_vp, bp, bp->b_wcred, 303207082Srmacklem NULL, 0); 304191783Srmacklem } 305191783Srmacklem mtx_lock(&ncl_iod_mutex); 306191783Srmacklem /* 307249630Srmacklem * Make sure the nmp hasn't been dismounted as soon as 308249630Srmacklem * ncl_doio() completes for the last buffer. 309249630Srmacklem */ 310249630Srmacklem nmp = ncl_iodmount[myiod]; 311249630Srmacklem if (nmp == NULL) 312249630Srmacklem break; 313249630Srmacklem 314249630Srmacklem /* 315191783Srmacklem * If there are more than one iod on this mount, then defect 316191783Srmacklem * so that the iods can be shared out fairly between the mounts 317191783Srmacklem */ 318191783Srmacklem if (nfs_defect && nmp->nm_bufqiods > 1) { 319191783Srmacklem NFS_DPF(ASYNCIO, 320191783Srmacklem ("nfssvc_iod: iod %d defecting from mount %p\n", 321191783Srmacklem myiod, nmp)); 322191783Srmacklem ncl_iodmount[myiod] = NULL; 323191783Srmacklem nmp->nm_bufqiods--; 324191783Srmacklem break; 325191783Srmacklem } 326191783Srmacklem } 327191783Srmacklem } 328191783Srmacklemfinish: 329191783Srmacklem nfs_asyncdaemon[myiod] = 0; 330191783Srmacklem if (nmp) 331191783Srmacklem nmp->nm_bufqiods--; 332203119Srmacklem ncl_iodwant[myiod] = NFSIOD_NOT_AVAILABLE; 333191783Srmacklem ncl_iodmount[myiod] = NULL; 334191783Srmacklem /* Someone may be waiting for the last nfsiod to terminate. */ 335191783Srmacklem if (--ncl_numasync == 0) 336191783Srmacklem wakeup(&ncl_numasync); 337191783Srmacklem mtx_unlock(&ncl_iod_mutex); 338191783Srmacklem if ((error == 0) || (error == EWOULDBLOCK)) 339191783Srmacklem kproc_exit(0); 340191783Srmacklem /* Abnormal termination */ 341191783Srmacklem kproc_exit(1); 342191783Srmacklem} 343