1/* 2 * Copyright (c) 2000-2011 Apple Inc. All rights reserved. 3 * 4 * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ 5 * 6 * This file contains Original Code and/or Modifications of Original Code 7 * as defined in and that are subject to the Apple Public Source License 8 * Version 2.0 (the 'License'). You may not use this file except in 9 * compliance with the License. The rights granted to you under the License 10 * may not be used to create, or enable the creation or redistribution of, 11 * unlawful or unlicensed copies of an Apple operating system, or to 12 * circumvent, violate, or enable the circumvention or violation of, any 13 * terms of an Apple operating system software license agreement. 14 * 15 * Please obtain a copy of the License at 16 * http://www.opensource.apple.com/apsl/ and read it before using this file. 17 * 18 * The Original Code and all software distributed under the License are 19 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER 20 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, 21 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, 22 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT. 23 * Please see the License for the specific language governing rights and 24 * limitations under the License. 25 * 26 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@ 27 */ 28/* Copyright (c) 1995 NeXT Computer, Inc. All Rights Reserved */ 29/* 30 * Copyright (c) 1989, 1993 31 * The Regents of the University of California. All rights reserved. 32 * 33 * This code is derived from software contributed to Berkeley by 34 * Rick Macklem at The University of Guelph. 35 * 36 * Redistribution and use in source and binary forms, with or without 37 * modification, are permitted provided that the following conditions 38 * are met: 39 * 1. Redistributions of source code must retain the above copyright 40 * notice, this list of conditions and the following disclaimer. 41 * 2. Redistributions in binary form must reproduce the above copyright 42 * notice, this list of conditions and the following disclaimer in the 43 * documentation and/or other materials provided with the distribution. 44 * 3. All advertising materials mentioning features or use of this software 45 * must display the following acknowledgement: 46 * This product includes software developed by the University of 47 * California, Berkeley and its contributors. 48 * 4. Neither the name of the University nor the names of its contributors 49 * may be used to endorse or promote products derived from this software 50 * without specific prior written permission. 51 * 52 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 53 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 54 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 55 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 56 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 57 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 58 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 59 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 60 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 61 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 62 * SUCH DAMAGE. 63 * 64 * @(#)nfs_syscalls.c 8.5 (Berkeley) 3/30/95 65 * FreeBSD-Id: nfs_syscalls.c,v 1.32 1997/11/07 08:53:25 phk Exp $ 66 */ 67/* 68 * NOTICE: This file was modified by SPARTA, Inc. in 2005 to introduce 69 * support for mandatory and extensible security protections. This notice 70 * is included in support of clause 2.2 (b) of the Apple Public License, 71 * Version 2.0. 72 */ 73 74#include <sys/param.h> 75#include <sys/systm.h> 76#include <sys/kernel.h> 77#include <sys/file_internal.h> 78#include <sys/filedesc.h> 79#include <sys/stat.h> 80#include <sys/vnode_internal.h> 81#include <sys/mount_internal.h> 82#include <sys/proc_internal.h> /* for fdflags */ 83#include <sys/kauth.h> 84#include <sys/sysctl.h> 85#include <sys/ubc.h> 86#include <sys/uio.h> 87#include <sys/malloc.h> 88#include <sys/kpi_mbuf.h> 89#include <sys/socket.h> 90#include <sys/socketvar.h> 91#include <sys/domain.h> 92#include <sys/protosw.h> 93#include <sys/fcntl.h> 94#include <sys/lockf.h> 95#include <sys/syslog.h> 96#include <sys/user.h> 97#include <sys/sysproto.h> 98#include <sys/kpi_socket.h> 99#include <sys/fsevents.h> 100#include <libkern/OSAtomic.h> 101#include <kern/thread_call.h> 102#include <kern/task.h> 103 104#include <security/audit/audit.h> 105 106#include <netinet/in.h> 107#include <netinet/tcp.h> 108#include <nfs/xdr_subs.h> 109#include <nfs/rpcv2.h> 110#include <nfs/nfsproto.h> 111#include <nfs/nfs.h> 112#include <nfs/nfsm_subs.h> 113#include <nfs/nfsrvcache.h> 114#include <nfs/nfs_gss.h> 115#include <nfs/nfsmount.h> 116#include <nfs/nfsnode.h> 117#include <nfs/nfs_lock.h> 118#if CONFIG_MACF 119#include <security/mac_framework.h> 120#endif 121 122kern_return_t thread_terminate(thread_t); /* XXX */ 123 124#if NFSSERVER 125 126extern int (*nfsrv_procs[NFS_NPROCS])(struct nfsrv_descript *nd, 127 struct nfsrv_sock *slp, 128 vfs_context_t ctx, 129 mbuf_t *mrepp); 130extern int nfsrv_wg_delay; 131extern int nfsrv_wg_delay_v3; 132 133static int nfsrv_require_resv_port = 0; 134static int nfsrv_deadsock_timer_on = 0; 135 136int nfssvc_export(user_addr_t argp); 137int nfssvc_nfsd(void); 138int nfssvc_addsock(socket_t, mbuf_t); 139void nfsrv_zapsock(struct nfsrv_sock *); 140void nfsrv_slpderef(struct nfsrv_sock *); 141void nfsrv_slpfree(struct nfsrv_sock *); 142 143#endif /* NFSSERVER */ 144 145/* 146 * sysctl stuff 147 */ 148SYSCTL_DECL(_vfs_generic); 149SYSCTL_NODE(_vfs_generic, OID_AUTO, nfs, CTLFLAG_RW|CTLFLAG_LOCKED, 0, "nfs hinge"); 150 151#if NFSCLIENT 152SYSCTL_NODE(_vfs_generic_nfs, OID_AUTO, client, CTLFLAG_RW|CTLFLAG_LOCKED, 0, "nfs client hinge"); 153SYSCTL_INT(_vfs_generic_nfs_client, OID_AUTO, initialdowndelay, CTLFLAG_RW | CTLFLAG_LOCKED, &nfs_tprintf_initial_delay, 0, ""); 154SYSCTL_INT(_vfs_generic_nfs_client, OID_AUTO, nextdowndelay, CTLFLAG_RW | CTLFLAG_LOCKED, &nfs_tprintf_delay, 0, ""); 155SYSCTL_INT(_vfs_generic_nfs_client, OID_AUTO, iosize, CTLFLAG_RW | CTLFLAG_LOCKED, &nfs_iosize, 0, ""); 156SYSCTL_INT(_vfs_generic_nfs_client, OID_AUTO, access_cache_timeout, CTLFLAG_RW | CTLFLAG_LOCKED, &nfs_access_cache_timeout, 0, ""); 157SYSCTL_INT(_vfs_generic_nfs_client, OID_AUTO, allow_async, CTLFLAG_RW | CTLFLAG_LOCKED, &nfs_allow_async, 0, ""); 158SYSCTL_INT(_vfs_generic_nfs_client, OID_AUTO, statfs_rate_limit, CTLFLAG_RW | CTLFLAG_LOCKED, &nfs_statfs_rate_limit, 0, ""); 159SYSCTL_INT(_vfs_generic_nfs_client, OID_AUTO, nfsiod_thread_max, CTLFLAG_RW | CTLFLAG_LOCKED, &nfsiod_thread_max, 0, ""); 160SYSCTL_INT(_vfs_generic_nfs_client, OID_AUTO, nfsiod_thread_count, CTLFLAG_RD | CTLFLAG_LOCKED, &nfsiod_thread_count, 0, ""); 161SYSCTL_INT(_vfs_generic_nfs_client, OID_AUTO, lockd_mounts, CTLFLAG_RD | CTLFLAG_LOCKED, &nfs_lockd_mounts, 0, ""); 162SYSCTL_INT(_vfs_generic_nfs_client, OID_AUTO, max_async_writes, CTLFLAG_RW | CTLFLAG_LOCKED, &nfs_max_async_writes, 0, ""); 163SYSCTL_INT(_vfs_generic_nfs_client, OID_AUTO, single_des, CTLFLAG_RW | CTLFLAG_LOCKED, &nfs_single_des, 0, ""); 164SYSCTL_INT(_vfs_generic_nfs_client, OID_AUTO, access_delete, CTLFLAG_RW | CTLFLAG_LOCKED, &nfs_access_delete, 0, ""); 165SYSCTL_INT(_vfs_generic_nfs_client, OID_AUTO, access_dotzfs, CTLFLAG_RW | CTLFLAG_LOCKED, &nfs_access_dotzfs, 0, ""); 166SYSCTL_INT(_vfs_generic_nfs_client, OID_AUTO, access_for_getattr, CTLFLAG_RW | CTLFLAG_LOCKED, &nfs_access_for_getattr, 0, ""); 167SYSCTL_INT(_vfs_generic_nfs_client, OID_AUTO, idmap_ctrl, CTLFLAG_RW | CTLFLAG_LOCKED, &nfs_idmap_ctrl, 0, ""); 168SYSCTL_INT(_vfs_generic_nfs_client, OID_AUTO, callback_port, CTLFLAG_RW | CTLFLAG_LOCKED, &nfs_callback_port, 0, ""); 169SYSCTL_INT(_vfs_generic_nfs_client, OID_AUTO, is_mobile, CTLFLAG_RW | CTLFLAG_LOCKED, &nfs_is_mobile, 0, ""); 170SYSCTL_INT(_vfs_generic_nfs_client, OID_AUTO, squishy_flags, CTLFLAG_RW | CTLFLAG_LOCKED, &nfs_squishy_flags, 0, ""); 171 172#endif /* NFSCLIENT */ 173 174#if NFSSERVER 175SYSCTL_NODE(_vfs_generic_nfs, OID_AUTO, server, CTLFLAG_RW|CTLFLAG_LOCKED, 0, "nfs server hinge"); 176SYSCTL_INT(_vfs_generic_nfs_server, OID_AUTO, wg_delay, CTLFLAG_RW | CTLFLAG_LOCKED, &nfsrv_wg_delay, 0, ""); 177SYSCTL_INT(_vfs_generic_nfs_server, OID_AUTO, wg_delay_v3, CTLFLAG_RW | CTLFLAG_LOCKED, &nfsrv_wg_delay_v3, 0, ""); 178SYSCTL_INT(_vfs_generic_nfs_server, OID_AUTO, require_resv_port, CTLFLAG_RW | CTLFLAG_LOCKED, &nfsrv_require_resv_port, 0, ""); 179SYSCTL_INT(_vfs_generic_nfs_server, OID_AUTO, async, CTLFLAG_RW | CTLFLAG_LOCKED, &nfsrv_async, 0, ""); 180SYSCTL_INT(_vfs_generic_nfs_server, OID_AUTO, export_hash_size, CTLFLAG_RW | CTLFLAG_LOCKED, &nfsrv_export_hash_size, 0, ""); 181SYSCTL_INT(_vfs_generic_nfs_server, OID_AUTO, reqcache_size, CTLFLAG_RW | CTLFLAG_LOCKED, &nfsrv_reqcache_size, 0, ""); 182SYSCTL_INT(_vfs_generic_nfs_server, OID_AUTO, request_queue_length, CTLFLAG_RW | CTLFLAG_LOCKED, &nfsrv_sock_max_rec_queue_length, 0, ""); 183SYSCTL_INT(_vfs_generic_nfs_server, OID_AUTO, user_stats, CTLFLAG_RW | CTLFLAG_LOCKED, &nfsrv_user_stat_enabled, 0, ""); 184SYSCTL_UINT(_vfs_generic_nfs_server, OID_AUTO, gss_context_ttl, CTLFLAG_RW | CTLFLAG_LOCKED, &nfsrv_gss_context_ttl, 0, ""); 185#if CONFIG_FSE 186SYSCTL_INT(_vfs_generic_nfs_server, OID_AUTO, fsevents, CTLFLAG_RW | CTLFLAG_LOCKED, &nfsrv_fsevents_enabled, 0, ""); 187#endif 188SYSCTL_INT(_vfs_generic_nfs_server, OID_AUTO, nfsd_thread_max, CTLFLAG_RW | CTLFLAG_LOCKED, &nfsd_thread_max, 0, ""); 189SYSCTL_INT(_vfs_generic_nfs_server, OID_AUTO, nfsd_thread_count, CTLFLAG_RD | CTLFLAG_LOCKED, &nfsd_thread_count, 0, ""); 190#ifdef NFS_UC_Q_DEBUG 191SYSCTL_INT(_vfs_generic_nfs_server, OID_AUTO, use_upcall_svc, CTLFLAG_RW | CTLFLAG_LOCKED, &nfsrv_uc_use_proxy, 0, ""); 192SYSCTL_INT(_vfs_generic_nfs_server, OID_AUTO, upcall_queue_limit, CTLFLAG_RW | CTLFLAG_LOCKED, &nfsrv_uc_queue_limit, 0, ""); 193SYSCTL_INT(_vfs_generic_nfs_server, OID_AUTO, upcall_queue_max_seen, CTLFLAG_RW | CTLFLAG_LOCKED, &nfsrv_uc_queue_max_seen, 0, ""); 194SYSCTL_INT(_vfs_generic_nfs_server, OID_AUTO, upcall_queue_count, CTLFLAG_RD | CTLFLAG_LOCKED, (int *)&nfsrv_uc_queue_count, 0, ""); 195#endif 196#endif /* NFSSERVER */ 197 198 199#if NFSCLIENT 200 201int 202nfsclnt(proc_t p, struct nfsclnt_args *uap, __unused int *retval) 203{ 204 struct lockd_ans la; 205 int error; 206 207 switch (uap->flag) { 208 case NFSCLNT_LOCKDANS: 209 error = copyin(uap->argp, &la, sizeof(la)); 210 if (!error) 211 error = nfslockdans(p, &la); 212 break; 213 case NFSCLNT_LOCKDNOTIFY: 214 error = nfslockdnotify(p, uap->argp); 215 break; 216 default: 217 error = EINVAL; 218 } 219 return (error); 220} 221 222/* 223 * Asynchronous I/O threads for client NFS. 224 * They do read-ahead and write-behind operations on the block I/O cache. 225 * 226 * The pool of up to nfsiod_thread_max threads is launched on demand and exit 227 * when unused for a while. There are as many nfsiod structs as there are 228 * nfsiod threads; however there's no strict tie between a thread and a struct. 229 * Each thread puts an nfsiod on the free list and sleeps on it. When it wakes 230 * up, it removes the next struct nfsiod from the queue and services it. Then 231 * it will put the struct at the head of free list and sleep on it. 232 * Async requests will pull the next struct nfsiod from the head of the free list, 233 * put it on the work queue, and wake whatever thread is waiting on that struct. 234 */ 235 236/* 237 * nfsiod thread exit routine 238 * 239 * Must be called with nfsiod_mutex held so that the 240 * decision to terminate is atomic with the termination. 241 */ 242void 243nfsiod_terminate(struct nfsiod *niod) 244{ 245 nfsiod_thread_count--; 246 lck_mtx_unlock(nfsiod_mutex); 247 if (niod) 248 FREE(niod, M_TEMP); 249 else 250 printf("nfsiod: terminating without niod\n"); 251 thread_terminate(current_thread()); 252 /*NOTREACHED*/ 253} 254 255/* nfsiod thread startup routine */ 256void 257nfsiod_thread(void) 258{ 259 struct nfsiod *niod; 260 int error; 261 262 MALLOC(niod, struct nfsiod *, sizeof(struct nfsiod), M_TEMP, M_WAITOK); 263 if (!niod) { 264 lck_mtx_lock(nfsiod_mutex); 265 nfsiod_thread_count--; 266 wakeup(current_thread()); 267 lck_mtx_unlock(nfsiod_mutex); 268 thread_terminate(current_thread()); 269 /*NOTREACHED*/ 270 } 271 bzero(niod, sizeof(*niod)); 272 lck_mtx_lock(nfsiod_mutex); 273 TAILQ_INSERT_HEAD(&nfsiodfree, niod, niod_link); 274 wakeup(current_thread()); 275 error = msleep0(niod, nfsiod_mutex, PWAIT | PDROP, "nfsiod", NFS_ASYNCTHREADMAXIDLE*hz, nfsiod_continue); 276 /* shouldn't return... so we have an error */ 277 /* remove an old nfsiod struct and terminate */ 278 lck_mtx_lock(nfsiod_mutex); 279 if ((niod = TAILQ_LAST(&nfsiodfree, nfsiodlist))) 280 TAILQ_REMOVE(&nfsiodfree, niod, niod_link); 281 nfsiod_terminate(niod); 282 /*NOTREACHED*/ 283} 284 285/* 286 * Start up another nfsiod thread. 287 * (unless we're already maxed out and there are nfsiods running) 288 */ 289int 290nfsiod_start(void) 291{ 292 thread_t thd = THREAD_NULL; 293 294 lck_mtx_lock(nfsiod_mutex); 295 if ((nfsiod_thread_count >= NFSIOD_MAX) && (nfsiod_thread_count > 0)) { 296 lck_mtx_unlock(nfsiod_mutex); 297 return (EBUSY); 298 } 299 nfsiod_thread_count++; 300 if (kernel_thread_start((thread_continue_t)nfsiod_thread, NULL, &thd) != KERN_SUCCESS) { 301 lck_mtx_unlock(nfsiod_mutex); 302 return (EBUSY); 303 } 304 /* wait for the thread to complete startup */ 305 msleep(thd, nfsiod_mutex, PWAIT | PDROP, "nfsiodw", NULL); 306 thread_deallocate(thd); 307 return (0); 308} 309 310/* 311 * Continuation for Asynchronous I/O threads for NFS client. 312 * 313 * Grab an nfsiod struct to work on, do some work, then drop it 314 */ 315int 316nfsiod_continue(int error) 317{ 318 struct nfsiod *niod; 319 struct nfsmount *nmp; 320 struct nfsreq *req, *treq; 321 struct nfs_reqqhead iodq; 322 int morework; 323 324 lck_mtx_lock(nfsiod_mutex); 325 niod = TAILQ_FIRST(&nfsiodwork); 326 if (!niod) { 327 /* there's no work queued up */ 328 /* remove an old nfsiod struct and terminate */ 329 if ((niod = TAILQ_LAST(&nfsiodfree, nfsiodlist))) 330 TAILQ_REMOVE(&nfsiodfree, niod, niod_link); 331 nfsiod_terminate(niod); 332 /*NOTREACHED*/ 333 } 334 TAILQ_REMOVE(&nfsiodwork, niod, niod_link); 335 336worktodo: 337 while ((nmp = niod->niod_nmp)) { 338 /* 339 * Service this mount's async I/O queue. 340 * 341 * In order to ensure some level of fairness between mounts, 342 * we grab all the work up front before processing it so any 343 * new work that arrives will be serviced on a subsequent 344 * iteration - and we have a chance to see if other work needs 345 * to be done (e.g. the delayed write queue needs to be pushed 346 * or other mounts are waiting for an nfsiod). 347 */ 348 /* grab the current contents of the queue */ 349 TAILQ_INIT(&iodq); 350 TAILQ_CONCAT(&iodq, &nmp->nm_iodq, r_achain); 351 lck_mtx_unlock(nfsiod_mutex); 352 353 /* process the queue */ 354 TAILQ_FOREACH_SAFE(req, &iodq, r_achain, treq) { 355 TAILQ_REMOVE(&iodq, req, r_achain); 356 req->r_achain.tqe_next = NFSREQNOLIST; 357 req->r_callback.rcb_func(req); 358 } 359 360 /* now check if there's more/other work to be done */ 361 lck_mtx_lock(nfsiod_mutex); 362 morework = !TAILQ_EMPTY(&nmp->nm_iodq); 363 if (!morework || !TAILQ_EMPTY(&nfsiodmounts)) { 364 /* we're going to stop working on this mount */ 365 if (morework) /* mount still needs more work so queue it up */ 366 TAILQ_INSERT_TAIL(&nfsiodmounts, nmp, nm_iodlink); 367 nmp->nm_niod = NULL; 368 niod->niod_nmp = NULL; 369 } 370 } 371 372 /* loop if there's still a mount to work on */ 373 if (!niod->niod_nmp && !TAILQ_EMPTY(&nfsiodmounts)) { 374 niod->niod_nmp = TAILQ_FIRST(&nfsiodmounts); 375 TAILQ_REMOVE(&nfsiodmounts, niod->niod_nmp, nm_iodlink); 376 } 377 if (niod->niod_nmp) 378 goto worktodo; 379 380 /* queue ourselves back up - if there aren't too many threads running */ 381 if (nfsiod_thread_count <= NFSIOD_MAX) { 382 TAILQ_INSERT_HEAD(&nfsiodfree, niod, niod_link); 383 error = msleep0(niod, nfsiod_mutex, PWAIT | PDROP, "nfsiod", NFS_ASYNCTHREADMAXIDLE*hz, nfsiod_continue); 384 /* shouldn't return... so we have an error */ 385 /* remove an old nfsiod struct and terminate */ 386 lck_mtx_lock(nfsiod_mutex); 387 if ((niod = TAILQ_LAST(&nfsiodfree, nfsiodlist))) 388 TAILQ_REMOVE(&nfsiodfree, niod, niod_link); 389 } 390 nfsiod_terminate(niod); 391 /*NOTREACHED*/ 392 return (0); 393} 394 395#endif /* NFSCLIENT */ 396 397 398#if NFSSERVER 399 400/* 401 * NFS server system calls 402 * getfh() lives here too, but maybe should move to kern/vfs_syscalls.c 403 */ 404 405/* 406 * Get file handle system call 407 */ 408int 409getfh(proc_t p, struct getfh_args *uap, __unused int *retval) 410{ 411 vnode_t vp; 412 struct nfs_filehandle nfh; 413 int error, fhlen, fidlen; 414 struct nameidata nd; 415 char path[MAXPATHLEN], *ptr; 416 size_t pathlen; 417 struct nfs_exportfs *nxfs; 418 struct nfs_export *nx; 419 420 /* 421 * Must be super user 422 */ 423 error = proc_suser(p); 424 if (error) 425 return (error); 426 427 error = copyinstr(uap->fname, path, MAXPATHLEN, &pathlen); 428 if (!error) 429 error = copyin(uap->fhp, &fhlen, sizeof(fhlen)); 430 if (error) 431 return (error); 432 /* limit fh size to length specified (or v3 size by default) */ 433 if ((fhlen != NFSV2_MAX_FH_SIZE) && (fhlen != NFSV3_MAX_FH_SIZE)) 434 fhlen = NFSV3_MAX_FH_SIZE; 435 fidlen = fhlen - sizeof(struct nfs_exphandle); 436 437 if (!nfsrv_is_initialized()) 438 return (EINVAL); 439 440 NDINIT(&nd, LOOKUP, OP_LOOKUP, FOLLOW | LOCKLEAF | AUDITVNPATH1, 441 UIO_SYSSPACE, CAST_USER_ADDR_T(path), vfs_context_current()); 442 error = namei(&nd); 443 if (error) 444 return (error); 445 nameidone(&nd); 446 447 vp = nd.ni_vp; 448 449 // find exportfs that matches f_mntonname 450 lck_rw_lock_shared(&nfsrv_export_rwlock); 451 ptr = vnode_mount(vp)->mnt_vfsstat.f_mntonname; 452 LIST_FOREACH(nxfs, &nfsrv_exports, nxfs_next) { 453 if (!strncmp(nxfs->nxfs_path, ptr, MAXPATHLEN)) 454 break; 455 } 456 if (!nxfs || strncmp(nxfs->nxfs_path, path, strlen(nxfs->nxfs_path))) { 457 error = EINVAL; 458 goto out; 459 } 460 // find export that best matches remainder of path 461 ptr = path + strlen(nxfs->nxfs_path); 462 while (*ptr && (*ptr == '/')) 463 ptr++; 464 LIST_FOREACH(nx, &nxfs->nxfs_exports, nx_next) { 465 int len = strlen(nx->nx_path); 466 if (len == 0) // we've hit the export entry for the root directory 467 break; 468 if (!strncmp(nx->nx_path, ptr, len)) 469 break; 470 } 471 if (!nx) { 472 error = EINVAL; 473 goto out; 474 } 475 476 bzero(&nfh, sizeof(nfh)); 477 nfh.nfh_xh.nxh_version = htonl(NFS_FH_VERSION); 478 nfh.nfh_xh.nxh_fsid = htonl(nxfs->nxfs_id); 479 nfh.nfh_xh.nxh_expid = htonl(nx->nx_id); 480 nfh.nfh_xh.nxh_flags = 0; 481 nfh.nfh_xh.nxh_reserved = 0; 482 nfh.nfh_len = fidlen; 483 error = VFS_VPTOFH(vp, (int*)&nfh.nfh_len, &nfh.nfh_fid[0], NULL); 484 if (nfh.nfh_len > (uint32_t)fidlen) 485 error = EOVERFLOW; 486 nfh.nfh_xh.nxh_fidlen = nfh.nfh_len; 487 nfh.nfh_len += sizeof(nfh.nfh_xh); 488 nfh.nfh_fhp = (u_char*)&nfh.nfh_xh; 489 490out: 491 lck_rw_done(&nfsrv_export_rwlock); 492 vnode_put(vp); 493 if (error) 494 return (error); 495 error = copyout((caddr_t)&nfh, uap->fhp, sizeof(fhandle_t)); 496 return (error); 497} 498 499extern struct fileops vnops; 500 501/* 502 * syscall for the rpc.lockd to use to translate a NFS file handle into 503 * an open descriptor. 504 * 505 * warning: do not remove the suser() call or this becomes one giant 506 * security hole. 507 */ 508int 509fhopen( proc_t p, 510 struct fhopen_args *uap, 511 int32_t *retval) 512{ 513 vnode_t vp; 514 struct nfs_filehandle nfh; 515 struct nfs_export *nx; 516 struct nfs_export_options *nxo; 517 struct flock lf; 518 struct fileproc *fp, *nfp; 519 int fmode, error, type; 520 int indx; 521 vfs_context_t ctx = vfs_context_current(); 522 kauth_action_t action; 523 524 /* 525 * Must be super user 526 */ 527 error = suser(vfs_context_ucred(ctx), 0); 528 if (error) { 529 return (error); 530 } 531 532 if (!nfsrv_is_initialized()) { 533 return (EINVAL); 534 } 535 536 fmode = FFLAGS(uap->flags); 537 /* why not allow a non-read/write open for our lockd? */ 538 if (((fmode & (FREAD | FWRITE)) == 0) || (fmode & O_CREAT)) 539 return (EINVAL); 540 541 error = copyin(uap->u_fhp, &nfh.nfh_len, sizeof(nfh.nfh_len)); 542 if (error) 543 return (error); 544 if ((nfh.nfh_len < (int)sizeof(struct nfs_exphandle)) || 545 (nfh.nfh_len > (int)NFSV3_MAX_FH_SIZE)) 546 return (EINVAL); 547 error = copyin(uap->u_fhp, &nfh, sizeof(nfh.nfh_len) + nfh.nfh_len); 548 if (error) 549 return (error); 550 nfh.nfh_fhp = (u_char*)&nfh.nfh_xh; 551 552 lck_rw_lock_shared(&nfsrv_export_rwlock); 553 /* now give me my vnode, it gets returned to me with a reference */ 554 error = nfsrv_fhtovp(&nfh, NULL, &vp, &nx, &nxo); 555 lck_rw_done(&nfsrv_export_rwlock); 556 if (error) { 557 if (error == NFSERR_TRYLATER) 558 error = EAGAIN; // XXX EBUSY? Or just leave as TRYLATER? 559 return (error); 560 } 561 562 /* 563 * From now on we have to make sure not 564 * to forget about the vnode. 565 * Any error that causes an abort must vnode_put(vp). 566 * Just set error = err and 'goto bad;'. 567 */ 568 569 /* 570 * from vn_open 571 */ 572 if (vnode_vtype(vp) == VSOCK) { 573 error = EOPNOTSUPP; 574 goto bad; 575 } 576 577 /* disallow write operations on directories */ 578 if (vnode_isdir(vp) && (fmode & (FWRITE | O_TRUNC))) { 579 error = EISDIR; 580 goto bad; 581 } 582 583 /* compute action to be authorized */ 584 action = 0; 585 if (fmode & FREAD) 586 action |= KAUTH_VNODE_READ_DATA; 587 if (fmode & (FWRITE | O_TRUNC)) 588 action |= KAUTH_VNODE_WRITE_DATA; 589 if ((error = vnode_authorize(vp, NULL, action, ctx)) != 0) 590 goto bad; 591 592 if ((error = VNOP_OPEN(vp, fmode, ctx))) 593 goto bad; 594 if ((error = vnode_ref_ext(vp, fmode, 0))) 595 goto bad; 596 597 /* 598 * end of vn_open code 599 */ 600 601 // starting here... error paths should call vn_close/vnode_put 602 if ((error = falloc(p, &nfp, &indx, ctx)) != 0) { 603 vn_close(vp, fmode & FMASK, ctx); 604 goto bad; 605 } 606 fp = nfp; 607 608 fp->f_fglob->fg_flag = fmode & FMASK; 609 fp->f_fglob->fg_type = DTYPE_VNODE; 610 fp->f_fglob->fg_ops = &vnops; 611 fp->f_fglob->fg_data = (caddr_t)vp; 612 613 // XXX do we really need to support this with fhopen()? 614 if (fmode & (O_EXLOCK | O_SHLOCK)) { 615 lf.l_whence = SEEK_SET; 616 lf.l_start = 0; 617 lf.l_len = 0; 618 if (fmode & O_EXLOCK) 619 lf.l_type = F_WRLCK; 620 else 621 lf.l_type = F_RDLCK; 622 type = F_FLOCK; 623 if ((fmode & FNONBLOCK) == 0) 624 type |= F_WAIT; 625 if ((error = VNOP_ADVLOCK(vp, (caddr_t)fp->f_fglob, F_SETLK, &lf, type, ctx))) { 626 struct vfs_context context = *vfs_context_current(); 627 /* Modify local copy (to not damage thread copy) */ 628 context.vc_ucred = fp->f_fglob->fg_cred; 629 630 vn_close(vp, fp->f_fglob->fg_flag, &context); 631 fp_free(p, indx, fp); 632 return (error); 633 } 634 fp->f_fglob->fg_flag |= FHASLOCK; 635 } 636 637 vnode_put(vp); 638 639 proc_fdlock(p); 640 procfdtbl_releasefd(p, indx, NULL); 641 fp_drop(p, indx, fp, 1); 642 proc_fdunlock(p); 643 644 *retval = indx; 645 return (0); 646 647bad: 648 vnode_put(vp); 649 return (error); 650} 651 652/* 653 * NFS server pseudo system call 654 */ 655int 656nfssvc(proc_t p, struct nfssvc_args *uap, __unused int *retval) 657{ 658 mbuf_t nam; 659 struct user_nfsd_args user_nfsdarg; 660 socket_t so; 661 int error; 662 663 AUDIT_ARG(cmd, uap->flag); 664 665 /* 666 * Must be super user for most operations (export ops checked later). 667 */ 668 if ((uap->flag != NFSSVC_EXPORT) && ((error = proc_suser(p)))) 669 return (error); 670#if CONFIG_MACF 671 error = mac_system_check_nfsd(kauth_cred_get()); 672 if (error) 673 return (error); 674#endif 675 676 /* make sure NFS server data structures have been initialized */ 677 nfsrv_init(); 678 679 if (uap->flag & NFSSVC_ADDSOCK) { 680 if (IS_64BIT_PROCESS(p)) { 681 error = copyin(uap->argp, (caddr_t)&user_nfsdarg, sizeof(user_nfsdarg)); 682 } else { 683 struct nfsd_args tmp_args; 684 error = copyin(uap->argp, (caddr_t)&tmp_args, sizeof(tmp_args)); 685 if (error == 0) { 686 user_nfsdarg.sock = tmp_args.sock; 687 user_nfsdarg.name = CAST_USER_ADDR_T(tmp_args.name); 688 user_nfsdarg.namelen = tmp_args.namelen; 689 } 690 } 691 if (error) 692 return (error); 693 /* get the socket */ 694 error = file_socket(user_nfsdarg.sock, &so); 695 if (error) 696 return (error); 697 /* Get the client address for connected sockets. */ 698 if (user_nfsdarg.name == USER_ADDR_NULL || user_nfsdarg.namelen == 0) { 699 nam = NULL; 700 } else { 701 error = sockargs(&nam, user_nfsdarg.name, user_nfsdarg.namelen, MBUF_TYPE_SONAME); 702 if (error) { 703 /* drop the iocount file_socket() grabbed on the file descriptor */ 704 file_drop(user_nfsdarg.sock); 705 return (error); 706 } 707 } 708 /* 709 * nfssvc_addsock() will grab a retain count on the socket 710 * to keep the socket from being closed when nfsd closes its 711 * file descriptor for it. 712 */ 713 error = nfssvc_addsock(so, nam); 714 /* drop the iocount file_socket() grabbed on the file descriptor */ 715 file_drop(user_nfsdarg.sock); 716 } else if (uap->flag & NFSSVC_NFSD) { 717 error = nfssvc_nfsd(); 718 } else if (uap->flag & NFSSVC_EXPORT) { 719 error = nfssvc_export(uap->argp); 720 } else { 721 error = EINVAL; 722 } 723 if (error == EINTR || error == ERESTART) 724 error = 0; 725 return (error); 726} 727 728/* 729 * Adds a socket to the list for servicing by nfsds. 730 */ 731int 732nfssvc_addsock(socket_t so, mbuf_t mynam) 733{ 734 struct nfsrv_sock *slp; 735 int error = 0, sodomain, sotype, soprotocol, on = 1; 736 int first; 737 struct timeval timeo; 738 739 /* make sure mbuf constants are set up */ 740 if (!nfs_mbuf_mhlen) 741 nfs_mbuf_init(); 742 743 sock_gettype(so, &sodomain, &sotype, &soprotocol); 744 745 /* There should be only one UDP socket for each of IPv4 and IPv6 */ 746 if ((sodomain == AF_INET) && (soprotocol == IPPROTO_UDP) && nfsrv_udpsock) { 747 mbuf_freem(mynam); 748 return (EEXIST); 749 } 750 if ((sodomain == AF_INET6) && (soprotocol == IPPROTO_UDP) && nfsrv_udp6sock) { 751 mbuf_freem(mynam); 752 return (EEXIST); 753 } 754 755 /* Set protocol options and reserve some space (for UDP). */ 756 if (sotype == SOCK_STREAM) 757 sock_setsockopt(so, SOL_SOCKET, SO_KEEPALIVE, &on, sizeof(on)); 758 if ((sodomain == AF_INET) && (soprotocol == IPPROTO_TCP)) 759 sock_setsockopt(so, IPPROTO_TCP, TCP_NODELAY, &on, sizeof(on)); 760 if (sotype == SOCK_DGRAM) { /* set socket buffer sizes for UDP */ 761 int reserve = NFS_UDPSOCKBUF; 762 error |= sock_setsockopt(so, SOL_SOCKET, SO_SNDBUF, &reserve, sizeof(reserve)); 763 error |= sock_setsockopt(so, SOL_SOCKET, SO_RCVBUF, &reserve, sizeof(reserve)); 764 if (error) { 765 log(LOG_INFO, "nfssvc_addsock: UDP socket buffer setting error(s) %d\n", error); 766 error = 0; 767 } 768 } 769 sock_nointerrupt(so, 0); 770 771 /* 772 * Set socket send/receive timeouts. 773 * Receive timeout shouldn't matter, but setting the send timeout 774 * will make sure that an unresponsive client can't hang the server. 775 */ 776 timeo.tv_usec = 0; 777 timeo.tv_sec = 1; 778 error |= sock_setsockopt(so, SOL_SOCKET, SO_RCVTIMEO, &timeo, sizeof(timeo)); 779 timeo.tv_sec = 30; 780 error |= sock_setsockopt(so, SOL_SOCKET, SO_SNDTIMEO, &timeo, sizeof(timeo)); 781 if (error) { 782 log(LOG_INFO, "nfssvc_addsock: socket timeout setting error(s) %d\n", error); 783 error = 0; 784 } 785 786 MALLOC(slp, struct nfsrv_sock *, sizeof(struct nfsrv_sock), M_NFSSVC, M_WAITOK); 787 if (!slp) { 788 mbuf_freem(mynam); 789 return (ENOMEM); 790 } 791 bzero((caddr_t)slp, sizeof (struct nfsrv_sock)); 792 lck_rw_init(&slp->ns_rwlock, nfsrv_slp_rwlock_group, LCK_ATTR_NULL); 793 lck_mtx_init(&slp->ns_wgmutex, nfsrv_slp_mutex_group, LCK_ATTR_NULL); 794 795 lck_mtx_lock(nfsd_mutex); 796 797 if (soprotocol == IPPROTO_UDP) { 798 if (sodomain == AF_INET) { 799 /* There should be only one UDP/IPv4 socket */ 800 if (nfsrv_udpsock) { 801 lck_mtx_unlock(nfsd_mutex); 802 nfsrv_slpfree(slp); 803 mbuf_freem(mynam); 804 return (EEXIST); 805 } 806 nfsrv_udpsock = slp; 807 } 808 if (sodomain == AF_INET6) { 809 /* There should be only one UDP/IPv6 socket */ 810 if (nfsrv_udp6sock) { 811 lck_mtx_unlock(nfsd_mutex); 812 nfsrv_slpfree(slp); 813 mbuf_freem(mynam); 814 return (EEXIST); 815 } 816 nfsrv_udp6sock = slp; 817 } 818 } 819 820 /* add the socket to the list */ 821 first = TAILQ_EMPTY(&nfsrv_socklist); 822 TAILQ_INSERT_TAIL(&nfsrv_socklist, slp, ns_chain); 823 824 sock_retain(so); /* grab a retain count on the socket */ 825 slp->ns_so = so; 826 slp->ns_sotype = sotype; 827 slp->ns_nam = mynam; 828 829 /* set up the socket up-call */ 830 nfsrv_uc_addsock(slp, first); 831 832 /* mark that the socket is not in the nfsrv_sockwg list */ 833 slp->ns_wgq.tqe_next = SLPNOLIST; 834 835 slp->ns_flag = SLP_VALID | SLP_NEEDQ; 836 837 nfsrv_wakenfsd(slp); 838 lck_mtx_unlock(nfsd_mutex); 839 840 return (0); 841} 842 843/* 844 * nfssvc_nfsd() 845 * 846 * nfsd theory of operation: 847 * 848 * The first nfsd thread stays in user mode accepting new TCP connections 849 * which are then added via the "addsock" call. The rest of the nfsd threads 850 * simply call into the kernel and remain there in a loop handling NFS 851 * requests until killed by a signal. 852 * 853 * There's a list of nfsd threads (nfsd_head). 854 * There's an nfsd queue that contains only those nfsds that are 855 * waiting for work to do (nfsd_queue). 856 * 857 * There's a list of all NFS sockets (nfsrv_socklist) and two queues for 858 * managing the work on the sockets: 859 * nfsrv_sockwait - sockets w/new data waiting to be worked on 860 * nfsrv_sockwork - sockets being worked on which may have more work to do 861 * nfsrv_sockwg -- sockets which have pending write gather data 862 * When a socket receives data, if it is not currently queued, it 863 * will be placed at the end of the "wait" queue. 864 * Whenever a socket needs servicing we make sure it is queued and 865 * wake up a waiting nfsd (if there is one). 866 * 867 * nfsds will service at most 8 requests from the same socket before 868 * defecting to work on another socket. 869 * nfsds will defect immediately if there are any sockets in the "wait" queue 870 * nfsds looking for a socket to work on check the "wait" queue first and 871 * then check the "work" queue. 872 * When an nfsd starts working on a socket, it removes it from the head of 873 * the queue it's currently on and moves it to the end of the "work" queue. 874 * When nfsds are checking the queues for work, any sockets found not to 875 * have any work are simply dropped from the queue. 876 * 877 */ 878int 879nfssvc_nfsd(void) 880{ 881 mbuf_t m, mrep; 882 struct nfsrv_sock *slp; 883 struct nfsd *nfsd; 884 struct nfsrv_descript *nd = NULL; 885 int error = 0, cacherep, writes_todo; 886 int siz, procrastinate, opcnt = 0; 887 u_quad_t cur_usec; 888 struct timeval now; 889 struct vfs_context context; 890 struct timespec to; 891 892#ifndef nolint 893 cacherep = RC_DOIT; 894 writes_todo = 0; 895#endif 896 897 MALLOC(nfsd, struct nfsd *, sizeof(struct nfsd), M_NFSD, M_WAITOK); 898 if (!nfsd) 899 return (ENOMEM); 900 bzero(nfsd, sizeof(struct nfsd)); 901 lck_mtx_lock(nfsd_mutex); 902 if (nfsd_thread_count++ == 0) 903 nfsrv_initcache(); /* Init the server request cache */ 904 905 TAILQ_INSERT_TAIL(&nfsd_head, nfsd, nfsd_chain); 906 lck_mtx_unlock(nfsd_mutex); 907 908 context.vc_thread = current_thread(); 909 910 /* Set time out so that nfsd threads can wake up a see if they are still needed. */ 911 to.tv_sec = 5; 912 to.tv_nsec = 0; 913 914 /* 915 * Loop getting rpc requests until SIGKILL. 916 */ 917 for (;;) { 918 if (nfsd_thread_max <= 0) { 919 /* NFS server shutting down, get out ASAP */ 920 error = EINTR; 921 slp = nfsd->nfsd_slp; 922 } else if (nfsd->nfsd_flag & NFSD_REQINPROG) { 923 /* already have some work to do */ 924 error = 0; 925 slp = nfsd->nfsd_slp; 926 } else { 927 /* need to find work to do */ 928 error = 0; 929 lck_mtx_lock(nfsd_mutex); 930 while (!nfsd->nfsd_slp && TAILQ_EMPTY(&nfsrv_sockwait) && TAILQ_EMPTY(&nfsrv_sockwork)) { 931 if (nfsd_thread_count > nfsd_thread_max) { 932 /* 933 * If we have no socket and there are more 934 * nfsd threads than configured, let's exit. 935 */ 936 error = 0; 937 goto done; 938 } 939 nfsd->nfsd_flag |= NFSD_WAITING; 940 TAILQ_INSERT_HEAD(&nfsd_queue, nfsd, nfsd_queue); 941 error = msleep(nfsd, nfsd_mutex, PSOCK | PCATCH, "nfsd", &to); 942 if (error) { 943 if (nfsd->nfsd_flag & NFSD_WAITING) { 944 TAILQ_REMOVE(&nfsd_queue, nfsd, nfsd_queue); 945 nfsd->nfsd_flag &= ~NFSD_WAITING; 946 } 947 if (error == EWOULDBLOCK) 948 continue; 949 goto done; 950 } 951 } 952 slp = nfsd->nfsd_slp; 953 if (!slp && !TAILQ_EMPTY(&nfsrv_sockwait)) { 954 /* look for a socket to work on in the wait queue */ 955 while ((slp = TAILQ_FIRST(&nfsrv_sockwait))) { 956 lck_rw_lock_exclusive(&slp->ns_rwlock); 957 /* remove from the head of the queue */ 958 TAILQ_REMOVE(&nfsrv_sockwait, slp, ns_svcq); 959 slp->ns_flag &= ~SLP_WAITQ; 960 if ((slp->ns_flag & SLP_VALID) && (slp->ns_flag & SLP_WORKTODO)) 961 break; 962 /* nothing to do, so skip this socket */ 963 lck_rw_done(&slp->ns_rwlock); 964 } 965 } 966 if (!slp && !TAILQ_EMPTY(&nfsrv_sockwork)) { 967 /* look for a socket to work on in the work queue */ 968 while ((slp = TAILQ_FIRST(&nfsrv_sockwork))) { 969 lck_rw_lock_exclusive(&slp->ns_rwlock); 970 /* remove from the head of the queue */ 971 TAILQ_REMOVE(&nfsrv_sockwork, slp, ns_svcq); 972 slp->ns_flag &= ~SLP_WORKQ; 973 if ((slp->ns_flag & SLP_VALID) && (slp->ns_flag & SLP_WORKTODO)) 974 break; 975 /* nothing to do, so skip this socket */ 976 lck_rw_done(&slp->ns_rwlock); 977 } 978 } 979 if (!nfsd->nfsd_slp && slp) { 980 /* we found a socket to work on, grab a reference */ 981 slp->ns_sref++; 982 nfsd->nfsd_slp = slp; 983 opcnt = 0; 984 /* and put it at the back of the work queue */ 985 TAILQ_INSERT_TAIL(&nfsrv_sockwork, slp, ns_svcq); 986 slp->ns_flag |= SLP_WORKQ; 987 lck_rw_done(&slp->ns_rwlock); 988 } 989 lck_mtx_unlock(nfsd_mutex); 990 if (!slp) 991 continue; 992 lck_rw_lock_exclusive(&slp->ns_rwlock); 993 if (slp->ns_flag & SLP_VALID) { 994 if ((slp->ns_flag & (SLP_NEEDQ|SLP_DISCONN)) == SLP_NEEDQ) { 995 slp->ns_flag &= ~SLP_NEEDQ; 996 nfsrv_rcv_locked(slp->ns_so, slp, MBUF_WAITOK); 997 } 998 if (slp->ns_flag & SLP_DISCONN) 999 nfsrv_zapsock(slp); 1000 error = nfsrv_dorec(slp, nfsd, &nd); 1001 if (error == EINVAL) { // RPCSEC_GSS drop 1002 if (slp->ns_sotype == SOCK_STREAM) 1003 nfsrv_zapsock(slp); // drop connection 1004 } 1005 writes_todo = 0; 1006 if (error && (slp->ns_wgtime || (slp->ns_flag & SLP_DOWRITES))) { 1007 microuptime(&now); 1008 cur_usec = (u_quad_t)now.tv_sec * 1000000 + 1009 (u_quad_t)now.tv_usec; 1010 if (slp->ns_wgtime <= cur_usec) { 1011 error = 0; 1012 cacherep = RC_DOIT; 1013 writes_todo = 1; 1014 } 1015 slp->ns_flag &= ~SLP_DOWRITES; 1016 } 1017 nfsd->nfsd_flag |= NFSD_REQINPROG; 1018 } 1019 lck_rw_done(&slp->ns_rwlock); 1020 } 1021 if (error || (slp && !(slp->ns_flag & SLP_VALID))) { 1022 if (nd) { 1023 nfsm_chain_cleanup(&nd->nd_nmreq); 1024 if (nd->nd_nam2) 1025 mbuf_freem(nd->nd_nam2); 1026 if (IS_VALID_CRED(nd->nd_cr)) 1027 kauth_cred_unref(&nd->nd_cr); 1028 if (nd->nd_gss_context) 1029 nfs_gss_svc_ctx_deref(nd->nd_gss_context); 1030 FREE_ZONE(nd, sizeof(*nd), M_NFSRVDESC); 1031 nd = NULL; 1032 } 1033 nfsd->nfsd_slp = NULL; 1034 nfsd->nfsd_flag &= ~NFSD_REQINPROG; 1035 if (slp) 1036 nfsrv_slpderef(slp); 1037 if (nfsd_thread_max <= 0) 1038 break; 1039 continue; 1040 } 1041 if (nd) { 1042 microuptime(&nd->nd_starttime); 1043 if (nd->nd_nam2) 1044 nd->nd_nam = nd->nd_nam2; 1045 else 1046 nd->nd_nam = slp->ns_nam; 1047 1048 cacherep = nfsrv_getcache(nd, slp, &mrep); 1049 1050 if (nfsrv_require_resv_port) { 1051 /* Check if source port is a reserved port */ 1052 in_port_t port = 0; 1053 struct sockaddr *saddr = mbuf_data(nd->nd_nam); 1054 1055 if (saddr->sa_family == AF_INET) 1056 port = ntohs(((struct sockaddr_in*)saddr)->sin_port); 1057 else if (saddr->sa_family == AF_INET6) 1058 port = ntohs(((struct sockaddr_in6*)saddr)->sin6_port); 1059 if ((port >= IPPORT_RESERVED) && (nd->nd_procnum != NFSPROC_NULL)) { 1060 nd->nd_procnum = NFSPROC_NOOP; 1061 nd->nd_repstat = (NFSERR_AUTHERR | AUTH_TOOWEAK); 1062 cacherep = RC_DOIT; 1063 } 1064 } 1065 1066 } 1067 1068 /* 1069 * Loop to get all the write RPC replies that have been 1070 * gathered together. 1071 */ 1072 do { 1073 switch (cacherep) { 1074 case RC_DOIT: 1075 if (nd && (nd->nd_vers == NFS_VER3)) 1076 procrastinate = nfsrv_wg_delay_v3; 1077 else 1078 procrastinate = nfsrv_wg_delay; 1079 lck_rw_lock_shared(&nfsrv_export_rwlock); 1080 context.vc_ucred = NULL; 1081 if (writes_todo || ((nd->nd_procnum == NFSPROC_WRITE) && (procrastinate > 0))) 1082 error = nfsrv_writegather(&nd, slp, &context, &mrep); 1083 else 1084 error = (*(nfsrv_procs[nd->nd_procnum]))(nd, slp, &context, &mrep); 1085 lck_rw_done(&nfsrv_export_rwlock); 1086 if (mrep == NULL) { 1087 /* 1088 * If this is a stream socket and we are not going 1089 * to send a reply we better close the connection 1090 * so the client doesn't hang. 1091 */ 1092 if (error && slp->ns_sotype == SOCK_STREAM) { 1093 lck_rw_lock_exclusive(&slp->ns_rwlock); 1094 nfsrv_zapsock(slp); 1095 lck_rw_done(&slp->ns_rwlock); 1096 printf("NFS server: NULL reply from proc = %d error = %d\n", 1097 nd->nd_procnum, error); 1098 } 1099 break; 1100 1101 } 1102 if (error) { 1103 OSAddAtomic64(1, &nfsstats.srv_errs); 1104 nfsrv_updatecache(nd, FALSE, mrep); 1105 if (nd->nd_nam2) { 1106 mbuf_freem(nd->nd_nam2); 1107 nd->nd_nam2 = NULL; 1108 } 1109 break; 1110 } 1111 OSAddAtomic64(1, &nfsstats.srvrpccnt[nd->nd_procnum]); 1112 nfsrv_updatecache(nd, TRUE, mrep); 1113 /* FALLTHRU */ 1114 1115 case RC_REPLY: 1116 if (nd->nd_gss_mb != NULL) { // It's RPCSEC_GSS 1117 /* 1118 * Need to checksum or encrypt the reply 1119 */ 1120 error = nfs_gss_svc_protect_reply(nd, mrep); 1121 if (error) { 1122 mbuf_freem(mrep); 1123 break; 1124 } 1125 } 1126 1127 /* 1128 * Get the total size of the reply 1129 */ 1130 m = mrep; 1131 siz = 0; 1132 while (m) { 1133 siz += mbuf_len(m); 1134 m = mbuf_next(m); 1135 } 1136 if (siz <= 0 || siz > NFS_MAXPACKET) { 1137 printf("mbuf siz=%d\n",siz); 1138 panic("Bad nfs svc reply"); 1139 } 1140 m = mrep; 1141 mbuf_pkthdr_setlen(m, siz); 1142 error = mbuf_pkthdr_setrcvif(m, NULL); 1143 if (error) 1144 panic("nfsd setrcvif failed: %d", error); 1145 /* 1146 * For stream protocols, prepend a Sun RPC 1147 * Record Mark. 1148 */ 1149 if (slp->ns_sotype == SOCK_STREAM) { 1150 error = mbuf_prepend(&m, NFSX_UNSIGNED, MBUF_WAITOK); 1151 if (!error) 1152 *(u_int32_t*)mbuf_data(m) = htonl(0x80000000 | siz); 1153 } 1154 if (!error) { 1155 if (slp->ns_flag & SLP_VALID) { 1156 error = nfsrv_send(slp, nd->nd_nam2, m); 1157 } else { 1158 error = EPIPE; 1159 mbuf_freem(m); 1160 } 1161 } else { 1162 mbuf_freem(m); 1163 } 1164 mrep = NULL; 1165 if (nd->nd_nam2) { 1166 mbuf_freem(nd->nd_nam2); 1167 nd->nd_nam2 = NULL; 1168 } 1169 if (error == EPIPE) { 1170 lck_rw_lock_exclusive(&slp->ns_rwlock); 1171 nfsrv_zapsock(slp); 1172 lck_rw_done(&slp->ns_rwlock); 1173 } 1174 if (error == EINTR || error == ERESTART) { 1175 nfsm_chain_cleanup(&nd->nd_nmreq); 1176 if (IS_VALID_CRED(nd->nd_cr)) 1177 kauth_cred_unref(&nd->nd_cr); 1178 if (nd->nd_gss_context) 1179 nfs_gss_svc_ctx_deref(nd->nd_gss_context); 1180 FREE_ZONE(nd, sizeof(*nd), M_NFSRVDESC); 1181 nfsrv_slpderef(slp); 1182 lck_mtx_lock(nfsd_mutex); 1183 goto done; 1184 } 1185 break; 1186 case RC_DROPIT: 1187 mbuf_freem(nd->nd_nam2); 1188 nd->nd_nam2 = NULL; 1189 break; 1190 }; 1191 opcnt++; 1192 if (nd) { 1193 nfsm_chain_cleanup(&nd->nd_nmreq); 1194 if (nd->nd_nam2) 1195 mbuf_freem(nd->nd_nam2); 1196 if (IS_VALID_CRED(nd->nd_cr)) 1197 kauth_cred_unref(&nd->nd_cr); 1198 if (nd->nd_gss_context) 1199 nfs_gss_svc_ctx_deref(nd->nd_gss_context); 1200 FREE_ZONE(nd, sizeof(*nd), M_NFSRVDESC); 1201 nd = NULL; 1202 } 1203 1204 /* 1205 * Check to see if there are outstanding writes that 1206 * need to be serviced. 1207 */ 1208 writes_todo = 0; 1209 if (slp->ns_wgtime) { 1210 microuptime(&now); 1211 cur_usec = (u_quad_t)now.tv_sec * 1000000 + 1212 (u_quad_t)now.tv_usec; 1213 if (slp->ns_wgtime <= cur_usec) { 1214 cacherep = RC_DOIT; 1215 writes_todo = 1; 1216 } 1217 } 1218 } while (writes_todo); 1219 1220 nd = NULL; 1221 if (TAILQ_EMPTY(&nfsrv_sockwait) && (opcnt < 8)) { 1222 lck_rw_lock_exclusive(&slp->ns_rwlock); 1223 error = nfsrv_dorec(slp, nfsd, &nd); 1224 if (error == EINVAL) { // RPCSEC_GSS drop 1225 if (slp->ns_sotype == SOCK_STREAM) 1226 nfsrv_zapsock(slp); // drop connection 1227 } 1228 lck_rw_done(&slp->ns_rwlock); 1229 } 1230 if (!nd) { 1231 /* drop our reference on the socket */ 1232 nfsd->nfsd_flag &= ~NFSD_REQINPROG; 1233 nfsd->nfsd_slp = NULL; 1234 nfsrv_slpderef(slp); 1235 } 1236 } 1237 lck_mtx_lock(nfsd_mutex); 1238done: 1239 TAILQ_REMOVE(&nfsd_head, nfsd, nfsd_chain); 1240 FREE(nfsd, M_NFSD); 1241 if (--nfsd_thread_count == 0) 1242 nfsrv_cleanup(); 1243 lck_mtx_unlock(nfsd_mutex); 1244 return (error); 1245} 1246 1247int 1248nfssvc_export(user_addr_t argp) 1249{ 1250 int error = 0, is_64bit; 1251 struct user_nfs_export_args unxa; 1252 vfs_context_t ctx = vfs_context_current(); 1253 1254 is_64bit = IS_64BIT_PROCESS(vfs_context_proc(ctx)); 1255 1256 /* copy in pointers to path and export args */ 1257 if (is_64bit) { 1258 error = copyin(argp, (caddr_t)&unxa, sizeof(unxa)); 1259 } else { 1260 struct nfs_export_args tnxa; 1261 error = copyin(argp, (caddr_t)&tnxa, sizeof(tnxa)); 1262 if (error == 0) { 1263 /* munge into LP64 version of nfs_export_args structure */ 1264 unxa.nxa_fsid = tnxa.nxa_fsid; 1265 unxa.nxa_expid = tnxa.nxa_expid; 1266 unxa.nxa_fspath = CAST_USER_ADDR_T(tnxa.nxa_fspath); 1267 unxa.nxa_exppath = CAST_USER_ADDR_T(tnxa.nxa_exppath); 1268 unxa.nxa_flags = tnxa.nxa_flags; 1269 unxa.nxa_netcount = tnxa.nxa_netcount; 1270 unxa.nxa_nets = CAST_USER_ADDR_T(tnxa.nxa_nets); 1271 } 1272 } 1273 if (error) 1274 return (error); 1275 1276 error = nfsrv_export(&unxa, ctx); 1277 1278 return (error); 1279} 1280 1281/* 1282 * Shut down a socket associated with an nfsrv_sock structure. 1283 * Should be called with the send lock set, if required. 1284 * The trick here is to increment the sref at the start, so that the nfsds 1285 * will stop using it and clear ns_flag at the end so that it will not be 1286 * reassigned during cleanup. 1287 */ 1288void 1289nfsrv_zapsock(struct nfsrv_sock *slp) 1290{ 1291 socket_t so; 1292 1293 if ((slp->ns_flag & SLP_VALID) == 0) 1294 return; 1295 slp->ns_flag &= ~SLP_ALLFLAGS; 1296 1297 so = slp->ns_so; 1298 if (so == NULL) 1299 return; 1300 1301 /* 1302 * Attempt to deter future up-calls, but leave the 1303 * up-call info in place to avoid a race with the 1304 * networking code. 1305 */ 1306 socket_lock(so, 1); 1307 so->so_rcv.sb_flags &= ~SB_UPCALL; 1308 socket_unlock(so, 1); 1309 1310 sock_shutdown(so, SHUT_RDWR); 1311 1312 /* 1313 * Remove from the up-call queue 1314 */ 1315 nfsrv_uc_dequeue(slp); 1316} 1317 1318/* 1319 * cleanup and release a server socket structure. 1320 */ 1321void 1322nfsrv_slpfree(struct nfsrv_sock *slp) 1323{ 1324 struct nfsrv_descript *nwp, *nnwp; 1325 1326 if (slp->ns_so) { 1327 sock_release(slp->ns_so); 1328 slp->ns_so = NULL; 1329 } 1330 if (slp->ns_nam) 1331 mbuf_free(slp->ns_nam); 1332 if (slp->ns_raw) 1333 mbuf_freem(slp->ns_raw); 1334 if (slp->ns_rec) 1335 mbuf_freem(slp->ns_rec); 1336 if (slp->ns_frag) 1337 mbuf_freem(slp->ns_frag); 1338 slp->ns_nam = slp->ns_raw = slp->ns_rec = slp->ns_frag = NULL; 1339 slp->ns_reccnt = 0; 1340 1341 if (slp->ns_ua) 1342 FREE(slp->ns_ua, M_NFSSVC); 1343 1344 for (nwp = slp->ns_tq.lh_first; nwp; nwp = nnwp) { 1345 nnwp = nwp->nd_tq.le_next; 1346 LIST_REMOVE(nwp, nd_tq); 1347 nfsm_chain_cleanup(&nwp->nd_nmreq); 1348 if (nwp->nd_mrep) 1349 mbuf_freem(nwp->nd_mrep); 1350 if (nwp->nd_nam2) 1351 mbuf_freem(nwp->nd_nam2); 1352 if (IS_VALID_CRED(nwp->nd_cr)) 1353 kauth_cred_unref(&nwp->nd_cr); 1354 if (nwp->nd_gss_context) 1355 nfs_gss_svc_ctx_deref(nwp->nd_gss_context); 1356 FREE_ZONE(nwp, sizeof(*nwp), M_NFSRVDESC); 1357 } 1358 LIST_INIT(&slp->ns_tq); 1359 1360 lck_rw_destroy(&slp->ns_rwlock, nfsrv_slp_rwlock_group); 1361 lck_mtx_destroy(&slp->ns_wgmutex, nfsrv_slp_mutex_group); 1362 FREE(slp, M_NFSSVC); 1363} 1364 1365/* 1366 * Derefence a server socket structure. If it has no more references and 1367 * is no longer valid, you can throw it away. 1368 */ 1369void 1370nfsrv_slpderef(struct nfsrv_sock *slp) 1371{ 1372 struct timeval now; 1373 1374 lck_mtx_lock(nfsd_mutex); 1375 lck_rw_lock_exclusive(&slp->ns_rwlock); 1376 slp->ns_sref--; 1377 1378 if (slp->ns_sref || (slp->ns_flag & SLP_VALID)) { 1379 if ((slp->ns_flag & SLP_QUEUED) && !(slp->ns_flag & SLP_WORKTODO)) { 1380 /* remove socket from queue since there's no work */ 1381 if (slp->ns_flag & SLP_WAITQ) 1382 TAILQ_REMOVE(&nfsrv_sockwait, slp, ns_svcq); 1383 else 1384 TAILQ_REMOVE(&nfsrv_sockwork, slp, ns_svcq); 1385 slp->ns_flag &= ~SLP_QUEUED; 1386 } 1387 lck_rw_done(&slp->ns_rwlock); 1388 lck_mtx_unlock(nfsd_mutex); 1389 return; 1390 } 1391 1392 /* This socket is no longer valid, so we'll get rid of it */ 1393 1394 if (slp->ns_flag & SLP_QUEUED) { 1395 if (slp->ns_flag & SLP_WAITQ) 1396 TAILQ_REMOVE(&nfsrv_sockwait, slp, ns_svcq); 1397 else 1398 TAILQ_REMOVE(&nfsrv_sockwork, slp, ns_svcq); 1399 slp->ns_flag &= ~SLP_QUEUED; 1400 } 1401 1402 /* 1403 * Queue the socket up for deletion 1404 * and start the timer to delete it 1405 * after it has been in limbo for 1406 * a while. 1407 */ 1408 microuptime(&now); 1409 slp->ns_timestamp = now.tv_sec; 1410 TAILQ_REMOVE(&nfsrv_socklist, slp, ns_chain); 1411 TAILQ_INSERT_TAIL(&nfsrv_deadsocklist, slp, ns_chain); 1412 if (!nfsrv_deadsock_timer_on) { 1413 nfsrv_deadsock_timer_on = 1; 1414 nfs_interval_timer_start(nfsrv_deadsock_timer_call, 1415 NFSRV_DEADSOCKDELAY * 1000); 1416 } 1417 1418 lck_rw_done(&slp->ns_rwlock); 1419 /* now remove from the write gather socket list */ 1420 if (slp->ns_wgq.tqe_next != SLPNOLIST) { 1421 TAILQ_REMOVE(&nfsrv_sockwg, slp, ns_wgq); 1422 slp->ns_wgq.tqe_next = SLPNOLIST; 1423 } 1424 lck_mtx_unlock(nfsd_mutex); 1425} 1426 1427/* 1428 * Check periodically for dead sockets pending delete. 1429 * If a socket has been dead for more than NFSRV_DEADSOCKDELAY 1430 * seconds then we assume it's safe to free. 1431 */ 1432void 1433nfsrv_deadsock_timer(__unused void *param0, __unused void *param1) 1434{ 1435 struct nfsrv_sock *slp; 1436 struct timeval now; 1437 time_t time_to_wait; 1438 1439 microuptime(&now); 1440 lck_mtx_lock(nfsd_mutex); 1441 1442 while ((slp = TAILQ_FIRST(&nfsrv_deadsocklist))) { 1443 if ((slp->ns_timestamp + NFSRV_DEADSOCKDELAY) > now.tv_sec) 1444 break; 1445 TAILQ_REMOVE(&nfsrv_deadsocklist, slp, ns_chain); 1446 nfsrv_slpfree(slp); 1447 } 1448 if (TAILQ_EMPTY(&nfsrv_deadsocklist)) { 1449 nfsrv_deadsock_timer_on = 0; 1450 lck_mtx_unlock(nfsd_mutex); 1451 return; 1452 } 1453 time_to_wait = (slp->ns_timestamp + NFSRV_DEADSOCKDELAY) - now.tv_sec; 1454 if (time_to_wait < 1) 1455 time_to_wait = 1; 1456 1457 lck_mtx_unlock(nfsd_mutex); 1458 1459 nfs_interval_timer_start(nfsrv_deadsock_timer_call, 1460 time_to_wait * 1000); 1461} 1462 1463/* 1464 * Clean up the data structures for the server. 1465 */ 1466void 1467nfsrv_cleanup(void) 1468{ 1469 struct nfsrv_sock *slp, *nslp; 1470 struct timeval now; 1471#if CONFIG_FSE 1472 struct nfsrv_fmod *fp, *nfp; 1473 int i; 1474#endif 1475 1476 microuptime(&now); 1477 for (slp = TAILQ_FIRST(&nfsrv_socklist); slp != 0; slp = nslp) { 1478 nslp = TAILQ_NEXT(slp, ns_chain); 1479 if (slp->ns_flag & SLP_VALID) { 1480 lck_rw_lock_exclusive(&slp->ns_rwlock); 1481 nfsrv_zapsock(slp); 1482 lck_rw_done(&slp->ns_rwlock); 1483 } 1484 if (slp->ns_flag & SLP_QUEUED) { 1485 if (slp->ns_flag & SLP_WAITQ) 1486 TAILQ_REMOVE(&nfsrv_sockwait, slp, ns_svcq); 1487 else 1488 TAILQ_REMOVE(&nfsrv_sockwork, slp, ns_svcq); 1489 slp->ns_flag &= ~SLP_QUEUED; 1490 } 1491 if (slp->ns_wgq.tqe_next != SLPNOLIST) { 1492 TAILQ_REMOVE(&nfsrv_sockwg, slp, ns_wgq); 1493 slp->ns_wgq.tqe_next = SLPNOLIST; 1494 } 1495 /* queue the socket up for deletion */ 1496 slp->ns_timestamp = now.tv_sec; 1497 TAILQ_REMOVE(&nfsrv_socklist, slp, ns_chain); 1498 TAILQ_INSERT_TAIL(&nfsrv_deadsocklist, slp, ns_chain); 1499 if (!nfsrv_deadsock_timer_on) { 1500 nfsrv_deadsock_timer_on = 1; 1501 nfs_interval_timer_start(nfsrv_deadsock_timer_call, 1502 NFSRV_DEADSOCKDELAY * 1000); 1503 } 1504 } 1505 1506#if CONFIG_FSE 1507 /* 1508 * Flush pending file write fsevents 1509 */ 1510 lck_mtx_lock(nfsrv_fmod_mutex); 1511 for (i = 0; i < NFSRVFMODHASHSZ; i++) { 1512 for (fp = LIST_FIRST(&nfsrv_fmod_hashtbl[i]); fp; fp = nfp) { 1513 /* 1514 * Fire off the content modified fsevent for each 1515 * entry, remove it from the list, and free it. 1516 */ 1517 if (nfsrv_fsevents_enabled) { 1518 fp->fm_context.vc_thread = current_thread(); 1519 add_fsevent(FSE_CONTENT_MODIFIED, &fp->fm_context, 1520 FSE_ARG_VNODE, fp->fm_vp, 1521 FSE_ARG_DONE); 1522 } 1523 vnode_put(fp->fm_vp); 1524 kauth_cred_unref(&fp->fm_context.vc_ucred); 1525 nfp = LIST_NEXT(fp, fm_link); 1526 LIST_REMOVE(fp, fm_link); 1527 FREE(fp, M_TEMP); 1528 } 1529 } 1530 nfsrv_fmod_pending = 0; 1531 lck_mtx_unlock(nfsrv_fmod_mutex); 1532#endif 1533 1534 nfsrv_uc_cleanup(); /* Stop nfs socket up-call threads */ 1535 1536 nfs_gss_svc_cleanup(); /* Remove any RPCSEC_GSS contexts */ 1537 1538 nfsrv_cleancache(); /* And clear out server cache */ 1539 1540 nfsrv_udpsock = NULL; 1541 nfsrv_udp6sock = NULL; 1542} 1543 1544#endif /* NFS_NOSERVER */ 1545