1/* 2 * Copyright (c) 2000-2011 Apple Inc. All rights reserved. 3 * 4 * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ 5 * 6 * This file contains Original Code and/or Modifications of Original Code 7 * as defined in and that are subject to the Apple Public Source License 8 * Version 2.0 (the 'License'). You may not use this file except in 9 * compliance with the License. The rights granted to you under the License 10 * may not be used to create, or enable the creation or redistribution of, 11 * unlawful or unlicensed copies of an Apple operating system, or to 12 * circumvent, violate, or enable the circumvention or violation of, any 13 * terms of an Apple operating system software license agreement. 14 * 15 * Please obtain a copy of the License at 16 * http://www.opensource.apple.com/apsl/ and read it before using this file. 17 * 18 * The Original Code and all software distributed under the License are 19 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER 20 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, 21 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, 22 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT. 23 * Please see the License for the specific language governing rights and 24 * limitations under the License. 25 * 26 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@ 27 */ 28/* Copyright (c) 1995 NeXT Computer, Inc. All Rights Reserved */ 29/* 30 * Copyright (c) 1989, 1993 31 * The Regents of the University of California. All rights reserved. 32 * 33 * This code is derived from software contributed to Berkeley by 34 * Rick Macklem at The University of Guelph. 35 * 36 * Redistribution and use in source and binary forms, with or without 37 * modification, are permitted provided that the following conditions 38 * are met: 39 * 1. Redistributions of source code must retain the above copyright 40 * notice, this list of conditions and the following disclaimer. 41 * 2. Redistributions in binary form must reproduce the above copyright 42 * notice, this list of conditions and the following disclaimer in the 43 * documentation and/or other materials provided with the distribution. 44 * 3. All advertising materials mentioning features or use of this software 45 * must display the following acknowledgement: 46 * This product includes software developed by the University of 47 * California, Berkeley and its contributors. 48 * 4. Neither the name of the University nor the names of its contributors 49 * may be used to endorse or promote products derived from this software 50 * without specific prior written permission. 51 * 52 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 53 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 54 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 55 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 56 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 57 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 58 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 59 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 60 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 61 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 62 * SUCH DAMAGE. 63 * 64 * @(#)nfs_syscalls.c 8.5 (Berkeley) 3/30/95 65 * FreeBSD-Id: nfs_syscalls.c,v 1.32 1997/11/07 08:53:25 phk Exp $ 66 */ 67/* 68 * NOTICE: This file was modified by SPARTA, Inc. in 2005 to introduce 69 * support for mandatory and extensible security protections. This notice 70 * is included in support of clause 2.2 (b) of the Apple Public License, 71 * Version 2.0. 72 */ 73 74#include <sys/param.h> 75#include <sys/systm.h> 76#include <sys/kernel.h> 77#include <sys/file_internal.h> 78#include <sys/filedesc.h> 79#include <sys/stat.h> 80#include <sys/vnode_internal.h> 81#include <sys/mount_internal.h> 82#include <sys/proc_internal.h> /* for fdflags */ 83#include <sys/kauth.h> 84#include <sys/sysctl.h> 85#include <sys/ubc.h> 86#include <sys/uio.h> 87#include <sys/malloc.h> 88#include <sys/kpi_mbuf.h> 89#include <sys/socket.h> 90#include <sys/socketvar.h> 91#include <sys/domain.h> 92#include <sys/protosw.h> 93#include <sys/fcntl.h> 94#include <sys/lockf.h> 95#include <sys/syslog.h> 96#include <sys/user.h> 97#include <sys/sysproto.h> 98#include <sys/kpi_socket.h> 99#include <sys/fsevents.h> 100#include <libkern/OSAtomic.h> 101#include <kern/thread_call.h> 102#include <kern/task.h> 103 104#include <security/audit/audit.h> 105 106#include <netinet/in.h> 107#include <netinet/tcp.h> 108#include <nfs/xdr_subs.h> 109#include <nfs/rpcv2.h> 110#include <nfs/nfsproto.h> 111#include <nfs/nfs.h> 112#include <nfs/nfsm_subs.h> 113#include <nfs/nfsrvcache.h> 114#include <nfs/nfs_gss.h> 115#include <nfs/nfsmount.h> 116#include <nfs/nfsnode.h> 117#include <nfs/nfs_lock.h> 118#if CONFIG_MACF 119#include <security/mac_framework.h> 120#endif 121 122kern_return_t thread_terminate(thread_t); /* XXX */ 123 124#if NFSSERVER 125 126extern int (*nfsrv_procs[NFS_NPROCS])(struct nfsrv_descript *nd, 127 struct nfsrv_sock *slp, 128 vfs_context_t ctx, 129 mbuf_t *mrepp); 130extern int nfsrv_wg_delay; 131extern int nfsrv_wg_delay_v3; 132 133static int nfsrv_require_resv_port = 0; 134static int nfsrv_deadsock_timer_on = 0; 135 136int nfssvc_export(user_addr_t argp); 137int nfssvc_nfsd(void); 138int nfssvc_addsock(socket_t, mbuf_t); 139void nfsrv_zapsock(struct nfsrv_sock *); 140void nfsrv_slpderef(struct nfsrv_sock *); 141void nfsrv_slpfree(struct nfsrv_sock *); 142 143#endif /* NFSSERVER */ 144 145/* 146 * sysctl stuff 147 */ 148SYSCTL_DECL(_vfs_generic); 149SYSCTL_NODE(_vfs_generic, OID_AUTO, nfs, CTLFLAG_RW|CTLFLAG_LOCKED, 0, "nfs hinge"); 150 151#if NFSCLIENT 152SYSCTL_NODE(_vfs_generic_nfs, OID_AUTO, client, CTLFLAG_RW|CTLFLAG_LOCKED, 0, "nfs client hinge"); 153SYSCTL_INT(_vfs_generic_nfs_client, OID_AUTO, initialdowndelay, CTLFLAG_RW | CTLFLAG_LOCKED, &nfs_tprintf_initial_delay, 0, ""); 154SYSCTL_INT(_vfs_generic_nfs_client, OID_AUTO, nextdowndelay, CTLFLAG_RW | CTLFLAG_LOCKED, &nfs_tprintf_delay, 0, ""); 155SYSCTL_INT(_vfs_generic_nfs_client, OID_AUTO, iosize, CTLFLAG_RW | CTLFLAG_LOCKED, &nfs_iosize, 0, ""); 156SYSCTL_INT(_vfs_generic_nfs_client, OID_AUTO, access_cache_timeout, CTLFLAG_RW | CTLFLAG_LOCKED, &nfs_access_cache_timeout, 0, ""); 157SYSCTL_INT(_vfs_generic_nfs_client, OID_AUTO, allow_async, CTLFLAG_RW | CTLFLAG_LOCKED, &nfs_allow_async, 0, ""); 158SYSCTL_INT(_vfs_generic_nfs_client, OID_AUTO, statfs_rate_limit, CTLFLAG_RW | CTLFLAG_LOCKED, &nfs_statfs_rate_limit, 0, ""); 159SYSCTL_INT(_vfs_generic_nfs_client, OID_AUTO, nfsiod_thread_max, CTLFLAG_RW | CTLFLAG_LOCKED, &nfsiod_thread_max, 0, ""); 160SYSCTL_INT(_vfs_generic_nfs_client, OID_AUTO, nfsiod_thread_count, CTLFLAG_RD | CTLFLAG_LOCKED, &nfsiod_thread_count, 0, ""); 161SYSCTL_INT(_vfs_generic_nfs_client, OID_AUTO, lockd_mounts, CTLFLAG_RD | CTLFLAG_LOCKED, &nfs_lockd_mounts, 0, ""); 162SYSCTL_INT(_vfs_generic_nfs_client, OID_AUTO, max_async_writes, CTLFLAG_RW | CTLFLAG_LOCKED, &nfs_max_async_writes, 0, ""); 163SYSCTL_INT(_vfs_generic_nfs_client, OID_AUTO, single_des, CTLFLAG_RW | CTLFLAG_LOCKED, &nfs_single_des, 0, ""); 164SYSCTL_INT(_vfs_generic_nfs_client, OID_AUTO, access_delete, CTLFLAG_RW | CTLFLAG_LOCKED, &nfs_access_delete, 0, ""); 165SYSCTL_INT(_vfs_generic_nfs_client, OID_AUTO, access_dotzfs, CTLFLAG_RW | CTLFLAG_LOCKED, &nfs_access_dotzfs, 0, ""); 166SYSCTL_INT(_vfs_generic_nfs_client, OID_AUTO, access_for_getattr, CTLFLAG_RW | CTLFLAG_LOCKED, &nfs_access_for_getattr, 0, ""); 167SYSCTL_INT(_vfs_generic_nfs_client, OID_AUTO, idmap_ctrl, CTLFLAG_RW | CTLFLAG_LOCKED, &nfs_idmap_ctrl, 0, ""); 168SYSCTL_INT(_vfs_generic_nfs_client, OID_AUTO, callback_port, CTLFLAG_RW | CTLFLAG_LOCKED, &nfs_callback_port, 0, ""); 169SYSCTL_INT(_vfs_generic_nfs_client, OID_AUTO, is_mobile, CTLFLAG_RW | CTLFLAG_LOCKED, &nfs_is_mobile, 0, ""); 170SYSCTL_INT(_vfs_generic_nfs_client, OID_AUTO, squishy_flags, CTLFLAG_RW | CTLFLAG_LOCKED, &nfs_squishy_flags, 0, ""); 171SYSCTL_UINT(_vfs_generic_nfs_client, OID_AUTO, debug_ctl, CTLFLAG_RW | CTLFLAG_LOCKED, &nfs_debug_ctl, 0, ""); 172 173 174#endif /* NFSCLIENT */ 175 176#if NFSSERVER 177SYSCTL_NODE(_vfs_generic_nfs, OID_AUTO, server, CTLFLAG_RW|CTLFLAG_LOCKED, 0, "nfs server hinge"); 178SYSCTL_INT(_vfs_generic_nfs_server, OID_AUTO, wg_delay, CTLFLAG_RW | CTLFLAG_LOCKED, &nfsrv_wg_delay, 0, ""); 179SYSCTL_INT(_vfs_generic_nfs_server, OID_AUTO, wg_delay_v3, CTLFLAG_RW | CTLFLAG_LOCKED, &nfsrv_wg_delay_v3, 0, ""); 180SYSCTL_INT(_vfs_generic_nfs_server, OID_AUTO, require_resv_port, CTLFLAG_RW | CTLFLAG_LOCKED, &nfsrv_require_resv_port, 0, ""); 181SYSCTL_INT(_vfs_generic_nfs_server, OID_AUTO, async, CTLFLAG_RW | CTLFLAG_LOCKED, &nfsrv_async, 0, ""); 182SYSCTL_INT(_vfs_generic_nfs_server, OID_AUTO, export_hash_size, CTLFLAG_RW | CTLFLAG_LOCKED, &nfsrv_export_hash_size, 0, ""); 183SYSCTL_INT(_vfs_generic_nfs_server, OID_AUTO, reqcache_size, CTLFLAG_RW | CTLFLAG_LOCKED, &nfsrv_reqcache_size, 0, ""); 184SYSCTL_INT(_vfs_generic_nfs_server, OID_AUTO, request_queue_length, CTLFLAG_RW | CTLFLAG_LOCKED, &nfsrv_sock_max_rec_queue_length, 0, ""); 185SYSCTL_INT(_vfs_generic_nfs_server, OID_AUTO, user_stats, CTLFLAG_RW | CTLFLAG_LOCKED, &nfsrv_user_stat_enabled, 0, ""); 186SYSCTL_UINT(_vfs_generic_nfs_server, OID_AUTO, gss_context_ttl, CTLFLAG_RW | CTLFLAG_LOCKED, &nfsrv_gss_context_ttl, 0, ""); 187#if CONFIG_FSE 188SYSCTL_INT(_vfs_generic_nfs_server, OID_AUTO, fsevents, CTLFLAG_RW | CTLFLAG_LOCKED, &nfsrv_fsevents_enabled, 0, ""); 189#endif 190SYSCTL_INT(_vfs_generic_nfs_server, OID_AUTO, nfsd_thread_max, CTLFLAG_RW | CTLFLAG_LOCKED, &nfsd_thread_max, 0, ""); 191SYSCTL_INT(_vfs_generic_nfs_server, OID_AUTO, nfsd_thread_count, CTLFLAG_RD | CTLFLAG_LOCKED, &nfsd_thread_count, 0, ""); 192#ifdef NFS_UC_Q_DEBUG 193SYSCTL_INT(_vfs_generic_nfs_server, OID_AUTO, use_upcall_svc, CTLFLAG_RW | CTLFLAG_LOCKED, &nfsrv_uc_use_proxy, 0, ""); 194SYSCTL_INT(_vfs_generic_nfs_server, OID_AUTO, upcall_queue_limit, CTLFLAG_RW | CTLFLAG_LOCKED, &nfsrv_uc_queue_limit, 0, ""); 195SYSCTL_INT(_vfs_generic_nfs_server, OID_AUTO, upcall_queue_max_seen, CTLFLAG_RW | CTLFLAG_LOCKED, &nfsrv_uc_queue_max_seen, 0, ""); 196SYSCTL_INT(_vfs_generic_nfs_server, OID_AUTO, upcall_queue_count, CTLFLAG_RD | CTLFLAG_LOCKED, (int *)&nfsrv_uc_queue_count, 0, ""); 197#endif 198#endif /* NFSSERVER */ 199 200 201#if NFSCLIENT 202 203int 204nfsclnt(proc_t p, struct nfsclnt_args *uap, __unused int *retval) 205{ 206 struct lockd_ans la; 207 int error; 208 209 switch (uap->flag) { 210 case NFSCLNT_LOCKDANS: 211 error = copyin(uap->argp, &la, sizeof(la)); 212 if (!error) 213 error = nfslockdans(p, &la); 214 break; 215 case NFSCLNT_LOCKDNOTIFY: 216 error = nfslockdnotify(p, uap->argp); 217 break; 218 default: 219 error = EINVAL; 220 } 221 return (error); 222} 223 224/* 225 * Asynchronous I/O threads for client NFS. 226 * They do read-ahead and write-behind operations on the block I/O cache. 227 * 228 * The pool of up to nfsiod_thread_max threads is launched on demand and exit 229 * when unused for a while. There are as many nfsiod structs as there are 230 * nfsiod threads; however there's no strict tie between a thread and a struct. 231 * Each thread puts an nfsiod on the free list and sleeps on it. When it wakes 232 * up, it removes the next struct nfsiod from the queue and services it. Then 233 * it will put the struct at the head of free list and sleep on it. 234 * Async requests will pull the next struct nfsiod from the head of the free list, 235 * put it on the work queue, and wake whatever thread is waiting on that struct. 236 */ 237 238/* 239 * nfsiod thread exit routine 240 * 241 * Must be called with nfsiod_mutex held so that the 242 * decision to terminate is atomic with the termination. 243 */ 244void 245nfsiod_terminate(struct nfsiod *niod) 246{ 247 nfsiod_thread_count--; 248 lck_mtx_unlock(nfsiod_mutex); 249 if (niod) 250 FREE(niod, M_TEMP); 251 else 252 printf("nfsiod: terminating without niod\n"); 253 thread_terminate(current_thread()); 254 /*NOTREACHED*/ 255} 256 257/* nfsiod thread startup routine */ 258void 259nfsiod_thread(void) 260{ 261 struct nfsiod *niod; 262 int error; 263 264 MALLOC(niod, struct nfsiod *, sizeof(struct nfsiod), M_TEMP, M_WAITOK); 265 if (!niod) { 266 lck_mtx_lock(nfsiod_mutex); 267 nfsiod_thread_count--; 268 wakeup(current_thread()); 269 lck_mtx_unlock(nfsiod_mutex); 270 thread_terminate(current_thread()); 271 /*NOTREACHED*/ 272 } 273 bzero(niod, sizeof(*niod)); 274 lck_mtx_lock(nfsiod_mutex); 275 TAILQ_INSERT_HEAD(&nfsiodfree, niod, niod_link); 276 wakeup(current_thread()); 277 error = msleep0(niod, nfsiod_mutex, PWAIT | PDROP, "nfsiod", NFS_ASYNCTHREADMAXIDLE*hz, nfsiod_continue); 278 /* shouldn't return... so we have an error */ 279 /* remove an old nfsiod struct and terminate */ 280 lck_mtx_lock(nfsiod_mutex); 281 if ((niod = TAILQ_LAST(&nfsiodfree, nfsiodlist))) 282 TAILQ_REMOVE(&nfsiodfree, niod, niod_link); 283 nfsiod_terminate(niod); 284 /*NOTREACHED*/ 285} 286 287/* 288 * Start up another nfsiod thread. 289 * (unless we're already maxed out and there are nfsiods running) 290 */ 291int 292nfsiod_start(void) 293{ 294 thread_t thd = THREAD_NULL; 295 296 lck_mtx_lock(nfsiod_mutex); 297 if ((nfsiod_thread_count >= NFSIOD_MAX) && (nfsiod_thread_count > 0)) { 298 lck_mtx_unlock(nfsiod_mutex); 299 return (EBUSY); 300 } 301 nfsiod_thread_count++; 302 if (kernel_thread_start((thread_continue_t)nfsiod_thread, NULL, &thd) != KERN_SUCCESS) { 303 lck_mtx_unlock(nfsiod_mutex); 304 return (EBUSY); 305 } 306 /* wait for the thread to complete startup */ 307 msleep(thd, nfsiod_mutex, PWAIT | PDROP, "nfsiodw", NULL); 308 thread_deallocate(thd); 309 return (0); 310} 311 312/* 313 * Continuation for Asynchronous I/O threads for NFS client. 314 * 315 * Grab an nfsiod struct to work on, do some work, then drop it 316 */ 317int 318nfsiod_continue(int error) 319{ 320 struct nfsiod *niod; 321 struct nfsmount *nmp; 322 struct nfsreq *req, *treq; 323 struct nfs_reqqhead iodq; 324 int morework; 325 326 lck_mtx_lock(nfsiod_mutex); 327 niod = TAILQ_FIRST(&nfsiodwork); 328 if (!niod) { 329 /* there's no work queued up */ 330 /* remove an old nfsiod struct and terminate */ 331 if ((niod = TAILQ_LAST(&nfsiodfree, nfsiodlist))) 332 TAILQ_REMOVE(&nfsiodfree, niod, niod_link); 333 nfsiod_terminate(niod); 334 /*NOTREACHED*/ 335 } 336 TAILQ_REMOVE(&nfsiodwork, niod, niod_link); 337 338worktodo: 339 while ((nmp = niod->niod_nmp)) { 340 /* 341 * Service this mount's async I/O queue. 342 * 343 * In order to ensure some level of fairness between mounts, 344 * we grab all the work up front before processing it so any 345 * new work that arrives will be serviced on a subsequent 346 * iteration - and we have a chance to see if other work needs 347 * to be done (e.g. the delayed write queue needs to be pushed 348 * or other mounts are waiting for an nfsiod). 349 */ 350 /* grab the current contents of the queue */ 351 TAILQ_INIT(&iodq); 352 TAILQ_CONCAT(&iodq, &nmp->nm_iodq, r_achain); 353 lck_mtx_unlock(nfsiod_mutex); 354 355 /* process the queue */ 356 TAILQ_FOREACH_SAFE(req, &iodq, r_achain, treq) { 357 TAILQ_REMOVE(&iodq, req, r_achain); 358 req->r_achain.tqe_next = NFSREQNOLIST; 359 req->r_callback.rcb_func(req); 360 } 361 362 /* now check if there's more/other work to be done */ 363 lck_mtx_lock(nfsiod_mutex); 364 morework = !TAILQ_EMPTY(&nmp->nm_iodq); 365 if (!morework || !TAILQ_EMPTY(&nfsiodmounts)) { 366 /* we're going to stop working on this mount */ 367 if (morework) /* mount still needs more work so queue it up */ 368 TAILQ_INSERT_TAIL(&nfsiodmounts, nmp, nm_iodlink); 369 nmp->nm_niod = NULL; 370 niod->niod_nmp = NULL; 371 } 372 } 373 374 /* loop if there's still a mount to work on */ 375 if (!niod->niod_nmp && !TAILQ_EMPTY(&nfsiodmounts)) { 376 niod->niod_nmp = TAILQ_FIRST(&nfsiodmounts); 377 TAILQ_REMOVE(&nfsiodmounts, niod->niod_nmp, nm_iodlink); 378 } 379 if (niod->niod_nmp) 380 goto worktodo; 381 382 /* queue ourselves back up - if there aren't too many threads running */ 383 if (nfsiod_thread_count <= NFSIOD_MAX) { 384 TAILQ_INSERT_HEAD(&nfsiodfree, niod, niod_link); 385 error = msleep0(niod, nfsiod_mutex, PWAIT | PDROP, "nfsiod", NFS_ASYNCTHREADMAXIDLE*hz, nfsiod_continue); 386 /* shouldn't return... so we have an error */ 387 /* remove an old nfsiod struct and terminate */ 388 lck_mtx_lock(nfsiod_mutex); 389 if ((niod = TAILQ_LAST(&nfsiodfree, nfsiodlist))) 390 TAILQ_REMOVE(&nfsiodfree, niod, niod_link); 391 } 392 nfsiod_terminate(niod); 393 /*NOTREACHED*/ 394 return (0); 395} 396 397#endif /* NFSCLIENT */ 398 399 400#if NFSSERVER 401 402/* 403 * NFS server system calls 404 * getfh() lives here too, but maybe should move to kern/vfs_syscalls.c 405 */ 406 407/* 408 * Get file handle system call 409 */ 410int 411getfh(proc_t p, struct getfh_args *uap, __unused int *retval) 412{ 413 vnode_t vp; 414 struct nfs_filehandle nfh; 415 int error, fhlen, fidlen; 416 struct nameidata nd; 417 char path[MAXPATHLEN], *ptr; 418 size_t pathlen; 419 struct nfs_exportfs *nxfs; 420 struct nfs_export *nx; 421 422 /* 423 * Must be super user 424 */ 425 error = proc_suser(p); 426 if (error) 427 return (error); 428 429 error = copyinstr(uap->fname, path, MAXPATHLEN, &pathlen); 430 if (!error) 431 error = copyin(uap->fhp, &fhlen, sizeof(fhlen)); 432 if (error) 433 return (error); 434 /* limit fh size to length specified (or v3 size by default) */ 435 if ((fhlen != NFSV2_MAX_FH_SIZE) && (fhlen != NFSV3_MAX_FH_SIZE)) 436 fhlen = NFSV3_MAX_FH_SIZE; 437 fidlen = fhlen - sizeof(struct nfs_exphandle); 438 439 if (!nfsrv_is_initialized()) 440 return (EINVAL); 441 442 NDINIT(&nd, LOOKUP, OP_LOOKUP, FOLLOW | LOCKLEAF | AUDITVNPATH1, 443 UIO_SYSSPACE, CAST_USER_ADDR_T(path), vfs_context_current()); 444 error = namei(&nd); 445 if (error) 446 return (error); 447 nameidone(&nd); 448 449 vp = nd.ni_vp; 450 451 // find exportfs that matches f_mntonname 452 lck_rw_lock_shared(&nfsrv_export_rwlock); 453 ptr = vnode_mount(vp)->mnt_vfsstat.f_mntonname; 454 LIST_FOREACH(nxfs, &nfsrv_exports, nxfs_next) { 455 if (!strncmp(nxfs->nxfs_path, ptr, MAXPATHLEN)) 456 break; 457 } 458 if (!nxfs || strncmp(nxfs->nxfs_path, path, strlen(nxfs->nxfs_path))) { 459 error = EINVAL; 460 goto out; 461 } 462 // find export that best matches remainder of path 463 ptr = path + strlen(nxfs->nxfs_path); 464 while (*ptr && (*ptr == '/')) 465 ptr++; 466 LIST_FOREACH(nx, &nxfs->nxfs_exports, nx_next) { 467 int len = strlen(nx->nx_path); 468 if (len == 0) // we've hit the export entry for the root directory 469 break; 470 if (!strncmp(nx->nx_path, ptr, len)) 471 break; 472 } 473 if (!nx) { 474 error = EINVAL; 475 goto out; 476 } 477 478 bzero(&nfh, sizeof(nfh)); 479 nfh.nfh_xh.nxh_version = htonl(NFS_FH_VERSION); 480 nfh.nfh_xh.nxh_fsid = htonl(nxfs->nxfs_id); 481 nfh.nfh_xh.nxh_expid = htonl(nx->nx_id); 482 nfh.nfh_xh.nxh_flags = 0; 483 nfh.nfh_xh.nxh_reserved = 0; 484 nfh.nfh_len = fidlen; 485 error = VFS_VPTOFH(vp, (int*)&nfh.nfh_len, &nfh.nfh_fid[0], NULL); 486 if (nfh.nfh_len > (uint32_t)fidlen) 487 error = EOVERFLOW; 488 nfh.nfh_xh.nxh_fidlen = nfh.nfh_len; 489 nfh.nfh_len += sizeof(nfh.nfh_xh); 490 nfh.nfh_fhp = (u_char*)&nfh.nfh_xh; 491 492out: 493 lck_rw_done(&nfsrv_export_rwlock); 494 vnode_put(vp); 495 if (error) 496 return (error); 497 error = copyout((caddr_t)&nfh, uap->fhp, sizeof(fhandle_t)); 498 return (error); 499} 500 501extern const struct fileops vnops; 502 503/* 504 * syscall for the rpc.lockd to use to translate a NFS file handle into 505 * an open descriptor. 506 * 507 * warning: do not remove the suser() call or this becomes one giant 508 * security hole. 509 */ 510int 511fhopen( proc_t p, 512 struct fhopen_args *uap, 513 int32_t *retval) 514{ 515 vnode_t vp; 516 struct nfs_filehandle nfh; 517 struct nfs_export *nx; 518 struct nfs_export_options *nxo; 519 struct flock lf; 520 struct fileproc *fp, *nfp; 521 int fmode, error, type; 522 int indx; 523 vfs_context_t ctx = vfs_context_current(); 524 kauth_action_t action; 525 526 /* 527 * Must be super user 528 */ 529 error = suser(vfs_context_ucred(ctx), 0); 530 if (error) { 531 return (error); 532 } 533 534 if (!nfsrv_is_initialized()) { 535 return (EINVAL); 536 } 537 538 fmode = FFLAGS(uap->flags); 539 /* why not allow a non-read/write open for our lockd? */ 540 if (((fmode & (FREAD | FWRITE)) == 0) || (fmode & O_CREAT)) 541 return (EINVAL); 542 543 error = copyin(uap->u_fhp, &nfh.nfh_len, sizeof(nfh.nfh_len)); 544 if (error) 545 return (error); 546 if ((nfh.nfh_len < (int)sizeof(struct nfs_exphandle)) || 547 (nfh.nfh_len > (int)NFSV3_MAX_FH_SIZE)) 548 return (EINVAL); 549 error = copyin(uap->u_fhp, &nfh, sizeof(nfh.nfh_len) + nfh.nfh_len); 550 if (error) 551 return (error); 552 nfh.nfh_fhp = (u_char*)&nfh.nfh_xh; 553 554 lck_rw_lock_shared(&nfsrv_export_rwlock); 555 /* now give me my vnode, it gets returned to me with a reference */ 556 error = nfsrv_fhtovp(&nfh, NULL, &vp, &nx, &nxo); 557 lck_rw_done(&nfsrv_export_rwlock); 558 if (error) { 559 if (error == NFSERR_TRYLATER) 560 error = EAGAIN; // XXX EBUSY? Or just leave as TRYLATER? 561 return (error); 562 } 563 564 /* 565 * From now on we have to make sure not 566 * to forget about the vnode. 567 * Any error that causes an abort must vnode_put(vp). 568 * Just set error = err and 'goto bad;'. 569 */ 570 571 /* 572 * from vn_open 573 */ 574 if (vnode_vtype(vp) == VSOCK) { 575 error = EOPNOTSUPP; 576 goto bad; 577 } 578 579 /* disallow write operations on directories */ 580 if (vnode_isdir(vp) && (fmode & (FWRITE | O_TRUNC))) { 581 error = EISDIR; 582 goto bad; 583 } 584 585 /* compute action to be authorized */ 586 action = 0; 587 if (fmode & FREAD) 588 action |= KAUTH_VNODE_READ_DATA; 589 if (fmode & (FWRITE | O_TRUNC)) 590 action |= KAUTH_VNODE_WRITE_DATA; 591 if ((error = vnode_authorize(vp, NULL, action, ctx)) != 0) 592 goto bad; 593 594 if ((error = VNOP_OPEN(vp, fmode, ctx))) 595 goto bad; 596 if ((error = vnode_ref_ext(vp, fmode, 0))) 597 goto bad; 598 599 /* 600 * end of vn_open code 601 */ 602 603 // starting here... error paths should call vn_close/vnode_put 604 if ((error = falloc(p, &nfp, &indx, ctx)) != 0) { 605 vn_close(vp, fmode & FMASK, ctx); 606 goto bad; 607 } 608 fp = nfp; 609 610 fp->f_fglob->fg_flag = fmode & FMASK; 611 fp->f_fglob->fg_ops = &vnops; 612 fp->f_fglob->fg_data = (caddr_t)vp; 613 614 // XXX do we really need to support this with fhopen()? 615 if (fmode & (O_EXLOCK | O_SHLOCK)) { 616 lf.l_whence = SEEK_SET; 617 lf.l_start = 0; 618 lf.l_len = 0; 619 if (fmode & O_EXLOCK) 620 lf.l_type = F_WRLCK; 621 else 622 lf.l_type = F_RDLCK; 623 type = F_FLOCK; 624 if ((fmode & FNONBLOCK) == 0) 625 type |= F_WAIT; 626 if ((error = VNOP_ADVLOCK(vp, (caddr_t)fp->f_fglob, F_SETLK, &lf, type, ctx, NULL))) { 627 struct vfs_context context = *vfs_context_current(); 628 /* Modify local copy (to not damage thread copy) */ 629 context.vc_ucred = fp->f_fglob->fg_cred; 630 631 vn_close(vp, fp->f_fglob->fg_flag, &context); 632 fp_free(p, indx, fp); 633 return (error); 634 } 635 fp->f_fglob->fg_flag |= FHASLOCK; 636 } 637 638 vnode_put(vp); 639 640 proc_fdlock(p); 641 procfdtbl_releasefd(p, indx, NULL); 642 fp_drop(p, indx, fp, 1); 643 proc_fdunlock(p); 644 645 *retval = indx; 646 return (0); 647 648bad: 649 vnode_put(vp); 650 return (error); 651} 652 653/* 654 * NFS server pseudo system call 655 */ 656int 657nfssvc(proc_t p, struct nfssvc_args *uap, __unused int *retval) 658{ 659 mbuf_t nam; 660 struct user_nfsd_args user_nfsdarg; 661 socket_t so; 662 int error; 663 664 AUDIT_ARG(cmd, uap->flag); 665 666 /* 667 * Must be super user for most operations (export ops checked later). 668 */ 669 if ((uap->flag != NFSSVC_EXPORT) && ((error = proc_suser(p)))) 670 return (error); 671#if CONFIG_MACF 672 error = mac_system_check_nfsd(kauth_cred_get()); 673 if (error) 674 return (error); 675#endif 676 677 /* make sure NFS server data structures have been initialized */ 678 nfsrv_init(); 679 680 if (uap->flag & NFSSVC_ADDSOCK) { 681 if (IS_64BIT_PROCESS(p)) { 682 error = copyin(uap->argp, (caddr_t)&user_nfsdarg, sizeof(user_nfsdarg)); 683 } else { 684 struct nfsd_args tmp_args; 685 error = copyin(uap->argp, (caddr_t)&tmp_args, sizeof(tmp_args)); 686 if (error == 0) { 687 user_nfsdarg.sock = tmp_args.sock; 688 user_nfsdarg.name = CAST_USER_ADDR_T(tmp_args.name); 689 user_nfsdarg.namelen = tmp_args.namelen; 690 } 691 } 692 if (error) 693 return (error); 694 /* get the socket */ 695 error = file_socket(user_nfsdarg.sock, &so); 696 if (error) 697 return (error); 698 /* Get the client address for connected sockets. */ 699 if (user_nfsdarg.name == USER_ADDR_NULL || user_nfsdarg.namelen == 0) { 700 nam = NULL; 701 } else { 702 error = sockargs(&nam, user_nfsdarg.name, user_nfsdarg.namelen, MBUF_TYPE_SONAME); 703 if (error) { 704 /* drop the iocount file_socket() grabbed on the file descriptor */ 705 file_drop(user_nfsdarg.sock); 706 return (error); 707 } 708 } 709 /* 710 * nfssvc_addsock() will grab a retain count on the socket 711 * to keep the socket from being closed when nfsd closes its 712 * file descriptor for it. 713 */ 714 error = nfssvc_addsock(so, nam); 715 /* drop the iocount file_socket() grabbed on the file descriptor */ 716 file_drop(user_nfsdarg.sock); 717 } else if (uap->flag & NFSSVC_NFSD) { 718 error = nfssvc_nfsd(); 719 } else if (uap->flag & NFSSVC_EXPORT) { 720 error = nfssvc_export(uap->argp); 721 } else { 722 error = EINVAL; 723 } 724 if (error == EINTR || error == ERESTART) 725 error = 0; 726 return (error); 727} 728 729/* 730 * Adds a socket to the list for servicing by nfsds. 731 */ 732int 733nfssvc_addsock(socket_t so, mbuf_t mynam) 734{ 735 struct nfsrv_sock *slp; 736 int error = 0, sodomain, sotype, soprotocol, on = 1; 737 int first; 738 struct timeval timeo; 739 740 /* make sure mbuf constants are set up */ 741 if (!nfs_mbuf_mhlen) 742 nfs_mbuf_init(); 743 744 sock_gettype(so, &sodomain, &sotype, &soprotocol); 745 746 /* There should be only one UDP socket for each of IPv4 and IPv6 */ 747 if ((sodomain == AF_INET) && (soprotocol == IPPROTO_UDP) && nfsrv_udpsock) { 748 mbuf_freem(mynam); 749 return (EEXIST); 750 } 751 if ((sodomain == AF_INET6) && (soprotocol == IPPROTO_UDP) && nfsrv_udp6sock) { 752 mbuf_freem(mynam); 753 return (EEXIST); 754 } 755 756 /* Set protocol options and reserve some space (for UDP). */ 757 if (sotype == SOCK_STREAM) 758 sock_setsockopt(so, SOL_SOCKET, SO_KEEPALIVE, &on, sizeof(on)); 759 if ((sodomain == AF_INET) && (soprotocol == IPPROTO_TCP)) 760 sock_setsockopt(so, IPPROTO_TCP, TCP_NODELAY, &on, sizeof(on)); 761 if (sotype == SOCK_DGRAM) { /* set socket buffer sizes for UDP */ 762 int reserve = NFS_UDPSOCKBUF; 763 error |= sock_setsockopt(so, SOL_SOCKET, SO_SNDBUF, &reserve, sizeof(reserve)); 764 error |= sock_setsockopt(so, SOL_SOCKET, SO_RCVBUF, &reserve, sizeof(reserve)); 765 if (error) { 766 log(LOG_INFO, "nfssvc_addsock: UDP socket buffer setting error(s) %d\n", error); 767 error = 0; 768 } 769 } 770 sock_nointerrupt(so, 0); 771 772 /* 773 * Set socket send/receive timeouts. 774 * Receive timeout shouldn't matter, but setting the send timeout 775 * will make sure that an unresponsive client can't hang the server. 776 */ 777 timeo.tv_usec = 0; 778 timeo.tv_sec = 1; 779 error |= sock_setsockopt(so, SOL_SOCKET, SO_RCVTIMEO, &timeo, sizeof(timeo)); 780 timeo.tv_sec = 30; 781 error |= sock_setsockopt(so, SOL_SOCKET, SO_SNDTIMEO, &timeo, sizeof(timeo)); 782 if (error) { 783 log(LOG_INFO, "nfssvc_addsock: socket timeout setting error(s) %d\n", error); 784 error = 0; 785 } 786 787 MALLOC(slp, struct nfsrv_sock *, sizeof(struct nfsrv_sock), M_NFSSVC, M_WAITOK); 788 if (!slp) { 789 mbuf_freem(mynam); 790 return (ENOMEM); 791 } 792 bzero((caddr_t)slp, sizeof (struct nfsrv_sock)); 793 lck_rw_init(&slp->ns_rwlock, nfsrv_slp_rwlock_group, LCK_ATTR_NULL); 794 lck_mtx_init(&slp->ns_wgmutex, nfsrv_slp_mutex_group, LCK_ATTR_NULL); 795 796 lck_mtx_lock(nfsd_mutex); 797 798 if (soprotocol == IPPROTO_UDP) { 799 if (sodomain == AF_INET) { 800 /* There should be only one UDP/IPv4 socket */ 801 if (nfsrv_udpsock) { 802 lck_mtx_unlock(nfsd_mutex); 803 nfsrv_slpfree(slp); 804 mbuf_freem(mynam); 805 return (EEXIST); 806 } 807 nfsrv_udpsock = slp; 808 } 809 if (sodomain == AF_INET6) { 810 /* There should be only one UDP/IPv6 socket */ 811 if (nfsrv_udp6sock) { 812 lck_mtx_unlock(nfsd_mutex); 813 nfsrv_slpfree(slp); 814 mbuf_freem(mynam); 815 return (EEXIST); 816 } 817 nfsrv_udp6sock = slp; 818 } 819 } 820 821 /* add the socket to the list */ 822 first = TAILQ_EMPTY(&nfsrv_socklist); 823 TAILQ_INSERT_TAIL(&nfsrv_socklist, slp, ns_chain); 824 825 sock_retain(so); /* grab a retain count on the socket */ 826 slp->ns_so = so; 827 slp->ns_sotype = sotype; 828 slp->ns_nam = mynam; 829 830 /* set up the socket up-call */ 831 nfsrv_uc_addsock(slp, first); 832 833 /* mark that the socket is not in the nfsrv_sockwg list */ 834 slp->ns_wgq.tqe_next = SLPNOLIST; 835 836 slp->ns_flag = SLP_VALID | SLP_NEEDQ; 837 838 nfsrv_wakenfsd(slp); 839 lck_mtx_unlock(nfsd_mutex); 840 841 return (0); 842} 843 844/* 845 * nfssvc_nfsd() 846 * 847 * nfsd theory of operation: 848 * 849 * The first nfsd thread stays in user mode accepting new TCP connections 850 * which are then added via the "addsock" call. The rest of the nfsd threads 851 * simply call into the kernel and remain there in a loop handling NFS 852 * requests until killed by a signal. 853 * 854 * There's a list of nfsd threads (nfsd_head). 855 * There's an nfsd queue that contains only those nfsds that are 856 * waiting for work to do (nfsd_queue). 857 * 858 * There's a list of all NFS sockets (nfsrv_socklist) and two queues for 859 * managing the work on the sockets: 860 * nfsrv_sockwait - sockets w/new data waiting to be worked on 861 * nfsrv_sockwork - sockets being worked on which may have more work to do 862 * nfsrv_sockwg -- sockets which have pending write gather data 863 * When a socket receives data, if it is not currently queued, it 864 * will be placed at the end of the "wait" queue. 865 * Whenever a socket needs servicing we make sure it is queued and 866 * wake up a waiting nfsd (if there is one). 867 * 868 * nfsds will service at most 8 requests from the same socket before 869 * defecting to work on another socket. 870 * nfsds will defect immediately if there are any sockets in the "wait" queue 871 * nfsds looking for a socket to work on check the "wait" queue first and 872 * then check the "work" queue. 873 * When an nfsd starts working on a socket, it removes it from the head of 874 * the queue it's currently on and moves it to the end of the "work" queue. 875 * When nfsds are checking the queues for work, any sockets found not to 876 * have any work are simply dropped from the queue. 877 * 878 */ 879int 880nfssvc_nfsd(void) 881{ 882 mbuf_t m, mrep; 883 struct nfsrv_sock *slp; 884 struct nfsd *nfsd; 885 struct nfsrv_descript *nd = NULL; 886 int error = 0, cacherep, writes_todo; 887 int siz, procrastinate, opcnt = 0; 888 u_quad_t cur_usec; 889 struct timeval now; 890 struct vfs_context context; 891 struct timespec to; 892 893#ifndef nolint 894 cacherep = RC_DOIT; 895 writes_todo = 0; 896#endif 897 898 MALLOC(nfsd, struct nfsd *, sizeof(struct nfsd), M_NFSD, M_WAITOK); 899 if (!nfsd) 900 return (ENOMEM); 901 bzero(nfsd, sizeof(struct nfsd)); 902 lck_mtx_lock(nfsd_mutex); 903 if (nfsd_thread_count++ == 0) 904 nfsrv_initcache(); /* Init the server request cache */ 905 906 TAILQ_INSERT_TAIL(&nfsd_head, nfsd, nfsd_chain); 907 lck_mtx_unlock(nfsd_mutex); 908 909 context.vc_thread = current_thread(); 910 911 /* Set time out so that nfsd threads can wake up a see if they are still needed. */ 912 to.tv_sec = 5; 913 to.tv_nsec = 0; 914 915 /* 916 * Loop getting rpc requests until SIGKILL. 917 */ 918 for (;;) { 919 if (nfsd_thread_max <= 0) { 920 /* NFS server shutting down, get out ASAP */ 921 error = EINTR; 922 slp = nfsd->nfsd_slp; 923 } else if (nfsd->nfsd_flag & NFSD_REQINPROG) { 924 /* already have some work to do */ 925 error = 0; 926 slp = nfsd->nfsd_slp; 927 } else { 928 /* need to find work to do */ 929 error = 0; 930 lck_mtx_lock(nfsd_mutex); 931 while (!nfsd->nfsd_slp && TAILQ_EMPTY(&nfsrv_sockwait) && TAILQ_EMPTY(&nfsrv_sockwork)) { 932 if (nfsd_thread_count > nfsd_thread_max) { 933 /* 934 * If we have no socket and there are more 935 * nfsd threads than configured, let's exit. 936 */ 937 error = 0; 938 goto done; 939 } 940 nfsd->nfsd_flag |= NFSD_WAITING; 941 TAILQ_INSERT_HEAD(&nfsd_queue, nfsd, nfsd_queue); 942 error = msleep(nfsd, nfsd_mutex, PSOCK | PCATCH, "nfsd", &to); 943 if (error) { 944 if (nfsd->nfsd_flag & NFSD_WAITING) { 945 TAILQ_REMOVE(&nfsd_queue, nfsd, nfsd_queue); 946 nfsd->nfsd_flag &= ~NFSD_WAITING; 947 } 948 if (error == EWOULDBLOCK) 949 continue; 950 goto done; 951 } 952 } 953 slp = nfsd->nfsd_slp; 954 if (!slp && !TAILQ_EMPTY(&nfsrv_sockwait)) { 955 /* look for a socket to work on in the wait queue */ 956 while ((slp = TAILQ_FIRST(&nfsrv_sockwait))) { 957 lck_rw_lock_exclusive(&slp->ns_rwlock); 958 /* remove from the head of the queue */ 959 TAILQ_REMOVE(&nfsrv_sockwait, slp, ns_svcq); 960 slp->ns_flag &= ~SLP_WAITQ; 961 if ((slp->ns_flag & SLP_VALID) && (slp->ns_flag & SLP_WORKTODO)) 962 break; 963 /* nothing to do, so skip this socket */ 964 lck_rw_done(&slp->ns_rwlock); 965 } 966 } 967 if (!slp && !TAILQ_EMPTY(&nfsrv_sockwork)) { 968 /* look for a socket to work on in the work queue */ 969 while ((slp = TAILQ_FIRST(&nfsrv_sockwork))) { 970 lck_rw_lock_exclusive(&slp->ns_rwlock); 971 /* remove from the head of the queue */ 972 TAILQ_REMOVE(&nfsrv_sockwork, slp, ns_svcq); 973 slp->ns_flag &= ~SLP_WORKQ; 974 if ((slp->ns_flag & SLP_VALID) && (slp->ns_flag & SLP_WORKTODO)) 975 break; 976 /* nothing to do, so skip this socket */ 977 lck_rw_done(&slp->ns_rwlock); 978 } 979 } 980 if (!nfsd->nfsd_slp && slp) { 981 /* we found a socket to work on, grab a reference */ 982 slp->ns_sref++; 983 nfsd->nfsd_slp = slp; 984 opcnt = 0; 985 /* and put it at the back of the work queue */ 986 TAILQ_INSERT_TAIL(&nfsrv_sockwork, slp, ns_svcq); 987 slp->ns_flag |= SLP_WORKQ; 988 lck_rw_done(&slp->ns_rwlock); 989 } 990 lck_mtx_unlock(nfsd_mutex); 991 if (!slp) 992 continue; 993 lck_rw_lock_exclusive(&slp->ns_rwlock); 994 if (slp->ns_flag & SLP_VALID) { 995 if ((slp->ns_flag & (SLP_NEEDQ|SLP_DISCONN)) == SLP_NEEDQ) { 996 slp->ns_flag &= ~SLP_NEEDQ; 997 nfsrv_rcv_locked(slp->ns_so, slp, MBUF_WAITOK); 998 } 999 if (slp->ns_flag & SLP_DISCONN) 1000 nfsrv_zapsock(slp); 1001 error = nfsrv_dorec(slp, nfsd, &nd); 1002 if (error == EINVAL) { // RPCSEC_GSS drop 1003 if (slp->ns_sotype == SOCK_STREAM) 1004 nfsrv_zapsock(slp); // drop connection 1005 } 1006 writes_todo = 0; 1007 if (error && (slp->ns_wgtime || (slp->ns_flag & SLP_DOWRITES))) { 1008 microuptime(&now); 1009 cur_usec = (u_quad_t)now.tv_sec * 1000000 + 1010 (u_quad_t)now.tv_usec; 1011 if (slp->ns_wgtime <= cur_usec) { 1012 error = 0; 1013 cacherep = RC_DOIT; 1014 writes_todo = 1; 1015 } 1016 slp->ns_flag &= ~SLP_DOWRITES; 1017 } 1018 nfsd->nfsd_flag |= NFSD_REQINPROG; 1019 } 1020 lck_rw_done(&slp->ns_rwlock); 1021 } 1022 if (error || (slp && !(slp->ns_flag & SLP_VALID))) { 1023 if (nd) { 1024 nfsm_chain_cleanup(&nd->nd_nmreq); 1025 if (nd->nd_nam2) 1026 mbuf_freem(nd->nd_nam2); 1027 if (IS_VALID_CRED(nd->nd_cr)) 1028 kauth_cred_unref(&nd->nd_cr); 1029 if (nd->nd_gss_context) 1030 nfs_gss_svc_ctx_deref(nd->nd_gss_context); 1031 FREE_ZONE(nd, sizeof(*nd), M_NFSRVDESC); 1032 nd = NULL; 1033 } 1034 nfsd->nfsd_slp = NULL; 1035 nfsd->nfsd_flag &= ~NFSD_REQINPROG; 1036 if (slp) 1037 nfsrv_slpderef(slp); 1038 if (nfsd_thread_max <= 0) 1039 break; 1040 continue; 1041 } 1042 if (nd) { 1043 microuptime(&nd->nd_starttime); 1044 if (nd->nd_nam2) 1045 nd->nd_nam = nd->nd_nam2; 1046 else 1047 nd->nd_nam = slp->ns_nam; 1048 1049 cacherep = nfsrv_getcache(nd, slp, &mrep); 1050 1051 if (nfsrv_require_resv_port) { 1052 /* Check if source port is a reserved port */ 1053 in_port_t port = 0; 1054 struct sockaddr *saddr = mbuf_data(nd->nd_nam); 1055 1056 if (saddr->sa_family == AF_INET) 1057 port = ntohs(((struct sockaddr_in*)saddr)->sin_port); 1058 else if (saddr->sa_family == AF_INET6) 1059 port = ntohs(((struct sockaddr_in6*)saddr)->sin6_port); 1060 if ((port >= IPPORT_RESERVED) && (nd->nd_procnum != NFSPROC_NULL)) { 1061 nd->nd_procnum = NFSPROC_NOOP; 1062 nd->nd_repstat = (NFSERR_AUTHERR | AUTH_TOOWEAK); 1063 cacherep = RC_DOIT; 1064 } 1065 } 1066 1067 } 1068 1069 /* 1070 * Loop to get all the write RPC replies that have been 1071 * gathered together. 1072 */ 1073 do { 1074 switch (cacherep) { 1075 case RC_DOIT: 1076 if (nd && (nd->nd_vers == NFS_VER3)) 1077 procrastinate = nfsrv_wg_delay_v3; 1078 else 1079 procrastinate = nfsrv_wg_delay; 1080 lck_rw_lock_shared(&nfsrv_export_rwlock); 1081 context.vc_ucred = NULL; 1082 if (writes_todo || ((nd->nd_procnum == NFSPROC_WRITE) && (procrastinate > 0))) 1083 error = nfsrv_writegather(&nd, slp, &context, &mrep); 1084 else 1085 error = (*(nfsrv_procs[nd->nd_procnum]))(nd, slp, &context, &mrep); 1086 lck_rw_done(&nfsrv_export_rwlock); 1087 if (mrep == NULL) { 1088 /* 1089 * If this is a stream socket and we are not going 1090 * to send a reply we better close the connection 1091 * so the client doesn't hang. 1092 */ 1093 if (error && slp->ns_sotype == SOCK_STREAM) { 1094 lck_rw_lock_exclusive(&slp->ns_rwlock); 1095 nfsrv_zapsock(slp); 1096 lck_rw_done(&slp->ns_rwlock); 1097 printf("NFS server: NULL reply from proc = %d error = %d\n", 1098 nd->nd_procnum, error); 1099 } 1100 break; 1101 1102 } 1103 if (error) { 1104 OSAddAtomic64(1, &nfsstats.srv_errs); 1105 nfsrv_updatecache(nd, FALSE, mrep); 1106 if (nd->nd_nam2) { 1107 mbuf_freem(nd->nd_nam2); 1108 nd->nd_nam2 = NULL; 1109 } 1110 break; 1111 } 1112 OSAddAtomic64(1, &nfsstats.srvrpccnt[nd->nd_procnum]); 1113 nfsrv_updatecache(nd, TRUE, mrep); 1114 /* FALLTHRU */ 1115 1116 case RC_REPLY: 1117 if (nd->nd_gss_mb != NULL) { // It's RPCSEC_GSS 1118 /* 1119 * Need to checksum or encrypt the reply 1120 */ 1121 error = nfs_gss_svc_protect_reply(nd, mrep); 1122 if (error) { 1123 mbuf_freem(mrep); 1124 break; 1125 } 1126 } 1127 1128 /* 1129 * Get the total size of the reply 1130 */ 1131 m = mrep; 1132 siz = 0; 1133 while (m) { 1134 siz += mbuf_len(m); 1135 m = mbuf_next(m); 1136 } 1137 if (siz <= 0 || siz > NFS_MAXPACKET) { 1138 printf("mbuf siz=%d\n",siz); 1139 panic("Bad nfs svc reply"); 1140 } 1141 m = mrep; 1142 mbuf_pkthdr_setlen(m, siz); 1143 error = mbuf_pkthdr_setrcvif(m, NULL); 1144 if (error) 1145 panic("nfsd setrcvif failed: %d", error); 1146 /* 1147 * For stream protocols, prepend a Sun RPC 1148 * Record Mark. 1149 */ 1150 if (slp->ns_sotype == SOCK_STREAM) { 1151 error = mbuf_prepend(&m, NFSX_UNSIGNED, MBUF_WAITOK); 1152 if (!error) 1153 *(u_int32_t*)mbuf_data(m) = htonl(0x80000000 | siz); 1154 } 1155 if (!error) { 1156 if (slp->ns_flag & SLP_VALID) { 1157 error = nfsrv_send(slp, nd->nd_nam2, m); 1158 } else { 1159 error = EPIPE; 1160 mbuf_freem(m); 1161 } 1162 } else { 1163 mbuf_freem(m); 1164 } 1165 mrep = NULL; 1166 if (nd->nd_nam2) { 1167 mbuf_freem(nd->nd_nam2); 1168 nd->nd_nam2 = NULL; 1169 } 1170 if (error == EPIPE) { 1171 lck_rw_lock_exclusive(&slp->ns_rwlock); 1172 nfsrv_zapsock(slp); 1173 lck_rw_done(&slp->ns_rwlock); 1174 } 1175 if (error == EINTR || error == ERESTART) { 1176 nfsm_chain_cleanup(&nd->nd_nmreq); 1177 if (IS_VALID_CRED(nd->nd_cr)) 1178 kauth_cred_unref(&nd->nd_cr); 1179 if (nd->nd_gss_context) 1180 nfs_gss_svc_ctx_deref(nd->nd_gss_context); 1181 FREE_ZONE(nd, sizeof(*nd), M_NFSRVDESC); 1182 nfsrv_slpderef(slp); 1183 lck_mtx_lock(nfsd_mutex); 1184 goto done; 1185 } 1186 break; 1187 case RC_DROPIT: 1188 mbuf_freem(nd->nd_nam2); 1189 nd->nd_nam2 = NULL; 1190 break; 1191 }; 1192 opcnt++; 1193 if (nd) { 1194 nfsm_chain_cleanup(&nd->nd_nmreq); 1195 if (nd->nd_nam2) 1196 mbuf_freem(nd->nd_nam2); 1197 if (IS_VALID_CRED(nd->nd_cr)) 1198 kauth_cred_unref(&nd->nd_cr); 1199 if (nd->nd_gss_context) 1200 nfs_gss_svc_ctx_deref(nd->nd_gss_context); 1201 FREE_ZONE(nd, sizeof(*nd), M_NFSRVDESC); 1202 nd = NULL; 1203 } 1204 1205 /* 1206 * Check to see if there are outstanding writes that 1207 * need to be serviced. 1208 */ 1209 writes_todo = 0; 1210 if (slp->ns_wgtime) { 1211 microuptime(&now); 1212 cur_usec = (u_quad_t)now.tv_sec * 1000000 + 1213 (u_quad_t)now.tv_usec; 1214 if (slp->ns_wgtime <= cur_usec) { 1215 cacherep = RC_DOIT; 1216 writes_todo = 1; 1217 } 1218 } 1219 } while (writes_todo); 1220 1221 nd = NULL; 1222 if (TAILQ_EMPTY(&nfsrv_sockwait) && (opcnt < 8)) { 1223 lck_rw_lock_exclusive(&slp->ns_rwlock); 1224 error = nfsrv_dorec(slp, nfsd, &nd); 1225 if (error == EINVAL) { // RPCSEC_GSS drop 1226 if (slp->ns_sotype == SOCK_STREAM) 1227 nfsrv_zapsock(slp); // drop connection 1228 } 1229 lck_rw_done(&slp->ns_rwlock); 1230 } 1231 if (!nd) { 1232 /* drop our reference on the socket */ 1233 nfsd->nfsd_flag &= ~NFSD_REQINPROG; 1234 nfsd->nfsd_slp = NULL; 1235 nfsrv_slpderef(slp); 1236 } 1237 } 1238 lck_mtx_lock(nfsd_mutex); 1239done: 1240 TAILQ_REMOVE(&nfsd_head, nfsd, nfsd_chain); 1241 FREE(nfsd, M_NFSD); 1242 if (--nfsd_thread_count == 0) 1243 nfsrv_cleanup(); 1244 lck_mtx_unlock(nfsd_mutex); 1245 return (error); 1246} 1247 1248int 1249nfssvc_export(user_addr_t argp) 1250{ 1251 int error = 0, is_64bit; 1252 struct user_nfs_export_args unxa; 1253 vfs_context_t ctx = vfs_context_current(); 1254 1255 is_64bit = IS_64BIT_PROCESS(vfs_context_proc(ctx)); 1256 1257 /* copy in pointers to path and export args */ 1258 if (is_64bit) { 1259 error = copyin(argp, (caddr_t)&unxa, sizeof(unxa)); 1260 } else { 1261 struct nfs_export_args tnxa; 1262 error = copyin(argp, (caddr_t)&tnxa, sizeof(tnxa)); 1263 if (error == 0) { 1264 /* munge into LP64 version of nfs_export_args structure */ 1265 unxa.nxa_fsid = tnxa.nxa_fsid; 1266 unxa.nxa_expid = tnxa.nxa_expid; 1267 unxa.nxa_fspath = CAST_USER_ADDR_T(tnxa.nxa_fspath); 1268 unxa.nxa_exppath = CAST_USER_ADDR_T(tnxa.nxa_exppath); 1269 unxa.nxa_flags = tnxa.nxa_flags; 1270 unxa.nxa_netcount = tnxa.nxa_netcount; 1271 unxa.nxa_nets = CAST_USER_ADDR_T(tnxa.nxa_nets); 1272 } 1273 } 1274 if (error) 1275 return (error); 1276 1277 error = nfsrv_export(&unxa, ctx); 1278 1279 return (error); 1280} 1281 1282/* 1283 * Shut down a socket associated with an nfsrv_sock structure. 1284 * Should be called with the send lock set, if required. 1285 * The trick here is to increment the sref at the start, so that the nfsds 1286 * will stop using it and clear ns_flag at the end so that it will not be 1287 * reassigned during cleanup. 1288 */ 1289void 1290nfsrv_zapsock(struct nfsrv_sock *slp) 1291{ 1292 socket_t so; 1293 1294 if ((slp->ns_flag & SLP_VALID) == 0) 1295 return; 1296 slp->ns_flag &= ~SLP_ALLFLAGS; 1297 1298 so = slp->ns_so; 1299 if (so == NULL) 1300 return; 1301 1302 /* 1303 * Attempt to deter future up-calls, but leave the 1304 * up-call info in place to avoid a race with the 1305 * networking code. 1306 */ 1307 socket_lock(so, 1); 1308 so->so_rcv.sb_flags &= ~SB_UPCALL; 1309 socket_unlock(so, 1); 1310 1311 sock_shutdown(so, SHUT_RDWR); 1312 1313 /* 1314 * Remove from the up-call queue 1315 */ 1316 nfsrv_uc_dequeue(slp); 1317} 1318 1319/* 1320 * cleanup and release a server socket structure. 1321 */ 1322void 1323nfsrv_slpfree(struct nfsrv_sock *slp) 1324{ 1325 struct nfsrv_descript *nwp, *nnwp; 1326 1327 if (slp->ns_so) { 1328 sock_release(slp->ns_so); 1329 slp->ns_so = NULL; 1330 } 1331 if (slp->ns_nam) 1332 mbuf_free(slp->ns_nam); 1333 if (slp->ns_raw) 1334 mbuf_freem(slp->ns_raw); 1335 if (slp->ns_rec) 1336 mbuf_freem(slp->ns_rec); 1337 if (slp->ns_frag) 1338 mbuf_freem(slp->ns_frag); 1339 slp->ns_nam = slp->ns_raw = slp->ns_rec = slp->ns_frag = NULL; 1340 slp->ns_reccnt = 0; 1341 1342 if (slp->ns_ua) 1343 FREE(slp->ns_ua, M_NFSSVC); 1344 1345 for (nwp = slp->ns_tq.lh_first; nwp; nwp = nnwp) { 1346 nnwp = nwp->nd_tq.le_next; 1347 LIST_REMOVE(nwp, nd_tq); 1348 nfsm_chain_cleanup(&nwp->nd_nmreq); 1349 if (nwp->nd_mrep) 1350 mbuf_freem(nwp->nd_mrep); 1351 if (nwp->nd_nam2) 1352 mbuf_freem(nwp->nd_nam2); 1353 if (IS_VALID_CRED(nwp->nd_cr)) 1354 kauth_cred_unref(&nwp->nd_cr); 1355 if (nwp->nd_gss_context) 1356 nfs_gss_svc_ctx_deref(nwp->nd_gss_context); 1357 FREE_ZONE(nwp, sizeof(*nwp), M_NFSRVDESC); 1358 } 1359 LIST_INIT(&slp->ns_tq); 1360 1361 lck_rw_destroy(&slp->ns_rwlock, nfsrv_slp_rwlock_group); 1362 lck_mtx_destroy(&slp->ns_wgmutex, nfsrv_slp_mutex_group); 1363 FREE(slp, M_NFSSVC); 1364} 1365 1366/* 1367 * Derefence a server socket structure. If it has no more references and 1368 * is no longer valid, you can throw it away. 1369 */ 1370void 1371nfsrv_slpderef(struct nfsrv_sock *slp) 1372{ 1373 struct timeval now; 1374 1375 lck_mtx_lock(nfsd_mutex); 1376 lck_rw_lock_exclusive(&slp->ns_rwlock); 1377 slp->ns_sref--; 1378 1379 if (slp->ns_sref || (slp->ns_flag & SLP_VALID)) { 1380 if ((slp->ns_flag & SLP_QUEUED) && !(slp->ns_flag & SLP_WORKTODO)) { 1381 /* remove socket from queue since there's no work */ 1382 if (slp->ns_flag & SLP_WAITQ) 1383 TAILQ_REMOVE(&nfsrv_sockwait, slp, ns_svcq); 1384 else 1385 TAILQ_REMOVE(&nfsrv_sockwork, slp, ns_svcq); 1386 slp->ns_flag &= ~SLP_QUEUED; 1387 } 1388 lck_rw_done(&slp->ns_rwlock); 1389 lck_mtx_unlock(nfsd_mutex); 1390 return; 1391 } 1392 1393 /* This socket is no longer valid, so we'll get rid of it */ 1394 1395 if (slp->ns_flag & SLP_QUEUED) { 1396 if (slp->ns_flag & SLP_WAITQ) 1397 TAILQ_REMOVE(&nfsrv_sockwait, slp, ns_svcq); 1398 else 1399 TAILQ_REMOVE(&nfsrv_sockwork, slp, ns_svcq); 1400 slp->ns_flag &= ~SLP_QUEUED; 1401 } 1402 1403 /* 1404 * Queue the socket up for deletion 1405 * and start the timer to delete it 1406 * after it has been in limbo for 1407 * a while. 1408 */ 1409 microuptime(&now); 1410 slp->ns_timestamp = now.tv_sec; 1411 TAILQ_REMOVE(&nfsrv_socklist, slp, ns_chain); 1412 TAILQ_INSERT_TAIL(&nfsrv_deadsocklist, slp, ns_chain); 1413 if (!nfsrv_deadsock_timer_on) { 1414 nfsrv_deadsock_timer_on = 1; 1415 nfs_interval_timer_start(nfsrv_deadsock_timer_call, 1416 NFSRV_DEADSOCKDELAY * 1000); 1417 } 1418 1419 lck_rw_done(&slp->ns_rwlock); 1420 /* now remove from the write gather socket list */ 1421 if (slp->ns_wgq.tqe_next != SLPNOLIST) { 1422 TAILQ_REMOVE(&nfsrv_sockwg, slp, ns_wgq); 1423 slp->ns_wgq.tqe_next = SLPNOLIST; 1424 } 1425 lck_mtx_unlock(nfsd_mutex); 1426} 1427 1428/* 1429 * Check periodically for dead sockets pending delete. 1430 * If a socket has been dead for more than NFSRV_DEADSOCKDELAY 1431 * seconds then we assume it's safe to free. 1432 */ 1433void 1434nfsrv_deadsock_timer(__unused void *param0, __unused void *param1) 1435{ 1436 struct nfsrv_sock *slp; 1437 struct timeval now; 1438 time_t time_to_wait; 1439 1440 microuptime(&now); 1441 lck_mtx_lock(nfsd_mutex); 1442 1443 while ((slp = TAILQ_FIRST(&nfsrv_deadsocklist))) { 1444 if ((slp->ns_timestamp + NFSRV_DEADSOCKDELAY) > now.tv_sec) 1445 break; 1446 TAILQ_REMOVE(&nfsrv_deadsocklist, slp, ns_chain); 1447 nfsrv_slpfree(slp); 1448 } 1449 if (TAILQ_EMPTY(&nfsrv_deadsocklist)) { 1450 nfsrv_deadsock_timer_on = 0; 1451 lck_mtx_unlock(nfsd_mutex); 1452 return; 1453 } 1454 time_to_wait = (slp->ns_timestamp + NFSRV_DEADSOCKDELAY) - now.tv_sec; 1455 if (time_to_wait < 1) 1456 time_to_wait = 1; 1457 1458 lck_mtx_unlock(nfsd_mutex); 1459 1460 nfs_interval_timer_start(nfsrv_deadsock_timer_call, 1461 time_to_wait * 1000); 1462} 1463 1464/* 1465 * Clean up the data structures for the server. 1466 */ 1467void 1468nfsrv_cleanup(void) 1469{ 1470 struct nfsrv_sock *slp, *nslp; 1471 struct timeval now; 1472#if CONFIG_FSE 1473 struct nfsrv_fmod *fp, *nfp; 1474 int i; 1475#endif 1476 1477 microuptime(&now); 1478 for (slp = TAILQ_FIRST(&nfsrv_socklist); slp != 0; slp = nslp) { 1479 nslp = TAILQ_NEXT(slp, ns_chain); 1480 if (slp->ns_flag & SLP_VALID) { 1481 lck_rw_lock_exclusive(&slp->ns_rwlock); 1482 nfsrv_zapsock(slp); 1483 lck_rw_done(&slp->ns_rwlock); 1484 } 1485 if (slp->ns_flag & SLP_QUEUED) { 1486 if (slp->ns_flag & SLP_WAITQ) 1487 TAILQ_REMOVE(&nfsrv_sockwait, slp, ns_svcq); 1488 else 1489 TAILQ_REMOVE(&nfsrv_sockwork, slp, ns_svcq); 1490 slp->ns_flag &= ~SLP_QUEUED; 1491 } 1492 if (slp->ns_wgq.tqe_next != SLPNOLIST) { 1493 TAILQ_REMOVE(&nfsrv_sockwg, slp, ns_wgq); 1494 slp->ns_wgq.tqe_next = SLPNOLIST; 1495 } 1496 /* queue the socket up for deletion */ 1497 slp->ns_timestamp = now.tv_sec; 1498 TAILQ_REMOVE(&nfsrv_socklist, slp, ns_chain); 1499 TAILQ_INSERT_TAIL(&nfsrv_deadsocklist, slp, ns_chain); 1500 if (!nfsrv_deadsock_timer_on) { 1501 nfsrv_deadsock_timer_on = 1; 1502 nfs_interval_timer_start(nfsrv_deadsock_timer_call, 1503 NFSRV_DEADSOCKDELAY * 1000); 1504 } 1505 } 1506 1507#if CONFIG_FSE 1508 /* 1509 * Flush pending file write fsevents 1510 */ 1511 lck_mtx_lock(nfsrv_fmod_mutex); 1512 for (i = 0; i < NFSRVFMODHASHSZ; i++) { 1513 for (fp = LIST_FIRST(&nfsrv_fmod_hashtbl[i]); fp; fp = nfp) { 1514 /* 1515 * Fire off the content modified fsevent for each 1516 * entry, remove it from the list, and free it. 1517 */ 1518 if (nfsrv_fsevents_enabled) { 1519 fp->fm_context.vc_thread = current_thread(); 1520 add_fsevent(FSE_CONTENT_MODIFIED, &fp->fm_context, 1521 FSE_ARG_VNODE, fp->fm_vp, 1522 FSE_ARG_DONE); 1523 } 1524 vnode_put(fp->fm_vp); 1525 kauth_cred_unref(&fp->fm_context.vc_ucred); 1526 nfp = LIST_NEXT(fp, fm_link); 1527 LIST_REMOVE(fp, fm_link); 1528 FREE(fp, M_TEMP); 1529 } 1530 } 1531 nfsrv_fmod_pending = 0; 1532 lck_mtx_unlock(nfsrv_fmod_mutex); 1533#endif 1534 1535 nfsrv_uc_cleanup(); /* Stop nfs socket up-call threads */ 1536 1537 nfs_gss_svc_cleanup(); /* Remove any RPCSEC_GSS contexts */ 1538 1539 nfsrv_cleancache(); /* And clear out server cache */ 1540 1541 nfsrv_udpsock = NULL; 1542 nfsrv_udp6sock = NULL; 1543} 1544 1545#endif /* NFS_NOSERVER */ 1546