1/* 2 * Copyright (c) 2000-2014 Apple Inc. All rights reserved. 3 * 4 * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ 5 * 6 * This file contains Original Code and/or Modifications of Original Code 7 * as defined in and that are subject to the Apple Public Source License 8 * Version 2.0 (the 'License'). You may not use this file except in 9 * compliance with the License. The rights granted to you under the License 10 * may not be used to create, or enable the creation or redistribution of, 11 * unlawful or unlicensed copies of an Apple operating system, or to 12 * circumvent, violate, or enable the circumvention or violation of, any 13 * terms of an Apple operating system software license agreement. 14 * 15 * Please obtain a copy of the License at 16 * http://www.opensource.apple.com/apsl/ and read it before using this file. 17 * 18 * The Original Code and all software distributed under the License are 19 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER 20 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, 21 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, 22 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT. 23 * Please see the License for the specific language governing rights and 24 * limitations under the License. 25 * 26 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@ 27 */ 28/* Copyright (c) 1995 NeXT Computer, Inc. All Rights Reserved */ 29/* 30 * Copyright (c) 1989, 1993 31 * The Regents of the University of California. All rights reserved. 32 * 33 * This code is derived from software contributed to Berkeley by 34 * Rick Macklem at The University of Guelph. 35 * 36 * Redistribution and use in source and binary forms, with or without 37 * modification, are permitted provided that the following conditions 38 * are met: 39 * 1. Redistributions of source code must retain the above copyright 40 * notice, this list of conditions and the following disclaimer. 41 * 2. Redistributions in binary form must reproduce the above copyright 42 * notice, this list of conditions and the following disclaimer in the 43 * documentation and/or other materials provided with the distribution. 44 * 3. All advertising materials mentioning features or use of this software 45 * must display the following acknowledgement: 46 * This product includes software developed by the University of 47 * California, Berkeley and its contributors. 48 * 4. Neither the name of the University nor the names of its contributors 49 * may be used to endorse or promote products derived from this software 50 * without specific prior written permission. 51 * 52 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 53 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 54 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 55 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 56 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 57 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 58 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 59 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 60 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 61 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 62 * SUCH DAMAGE. 63 * 64 * @(#)nfs_syscalls.c 8.5 (Berkeley) 3/30/95 65 * FreeBSD-Id: nfs_syscalls.c,v 1.32 1997/11/07 08:53:25 phk Exp $ 66 */ 67/* 68 * NOTICE: This file was modified by SPARTA, Inc. in 2005 to introduce 69 * support for mandatory and extensible security protections. This notice 70 * is included in support of clause 2.2 (b) of the Apple Public License, 71 * Version 2.0. 72 */ 73 74#include <sys/param.h> 75#include <sys/systm.h> 76#include <sys/kernel.h> 77#include <sys/file_internal.h> 78#include <sys/filedesc.h> 79#include <sys/stat.h> 80#include <sys/vnode_internal.h> 81#include <sys/mount_internal.h> 82#include <sys/proc_internal.h> /* for fdflags */ 83#include <sys/kauth.h> 84#include <sys/sysctl.h> 85#include <sys/ubc.h> 86#include <sys/uio.h> 87#include <sys/malloc.h> 88#include <sys/kpi_mbuf.h> 89#include <sys/socket.h> 90#include <sys/socketvar.h> 91#include <sys/domain.h> 92#include <sys/protosw.h> 93#include <sys/fcntl.h> 94#include <sys/lockf.h> 95#include <sys/syslog.h> 96#include <sys/user.h> 97#include <sys/sysproto.h> 98#include <sys/kpi_socket.h> 99#include <sys/fsevents.h> 100#include <libkern/OSAtomic.h> 101#include <kern/thread_call.h> 102#include <kern/task.h> 103 104#include <security/audit/audit.h> 105 106#include <netinet/in.h> 107#include <netinet/tcp.h> 108#include <nfs/xdr_subs.h> 109#include <nfs/rpcv2.h> 110#include <nfs/nfsproto.h> 111#include <nfs/nfs.h> 112#include <nfs/nfsm_subs.h> 113#include <nfs/nfsrvcache.h> 114#include <nfs/nfs_gss.h> 115#include <nfs/nfsmount.h> 116#include <nfs/nfsnode.h> 117#include <nfs/nfs_lock.h> 118#if CONFIG_MACF 119#include <security/mac_framework.h> 120#endif 121 122kern_return_t thread_terminate(thread_t); /* XXX */ 123 124#if NFSSERVER 125 126extern int (*nfsrv_procs[NFS_NPROCS])(struct nfsrv_descript *nd, 127 struct nfsrv_sock *slp, 128 vfs_context_t ctx, 129 mbuf_t *mrepp); 130extern int nfsrv_wg_delay; 131extern int nfsrv_wg_delay_v3; 132 133static int nfsrv_require_resv_port = 0; 134static int nfsrv_deadsock_timer_on = 0; 135 136int nfssvc_export(user_addr_t argp); 137int nfssvc_nfsd(void); 138int nfssvc_addsock(socket_t, mbuf_t); 139void nfsrv_zapsock(struct nfsrv_sock *); 140void nfsrv_slpderef(struct nfsrv_sock *); 141void nfsrv_slpfree(struct nfsrv_sock *); 142 143#endif /* NFSSERVER */ 144 145/* 146 * sysctl stuff 147 */ 148SYSCTL_DECL(_vfs_generic); 149SYSCTL_NODE(_vfs_generic, OID_AUTO, nfs, CTLFLAG_RW|CTLFLAG_LOCKED, 0, "nfs hinge"); 150 151#if NFSCLIENT 152SYSCTL_NODE(_vfs_generic_nfs, OID_AUTO, client, CTLFLAG_RW|CTLFLAG_LOCKED, 0, "nfs client hinge"); 153SYSCTL_INT(_vfs_generic_nfs_client, OID_AUTO, initialdowndelay, CTLFLAG_RW | CTLFLAG_LOCKED, &nfs_tprintf_initial_delay, 0, ""); 154SYSCTL_INT(_vfs_generic_nfs_client, OID_AUTO, nextdowndelay, CTLFLAG_RW | CTLFLAG_LOCKED, &nfs_tprintf_delay, 0, ""); 155SYSCTL_INT(_vfs_generic_nfs_client, OID_AUTO, iosize, CTLFLAG_RW | CTLFLAG_LOCKED, &nfs_iosize, 0, ""); 156SYSCTL_INT(_vfs_generic_nfs_client, OID_AUTO, access_cache_timeout, CTLFLAG_RW | CTLFLAG_LOCKED, &nfs_access_cache_timeout, 0, ""); 157SYSCTL_INT(_vfs_generic_nfs_client, OID_AUTO, allow_async, CTLFLAG_RW | CTLFLAG_LOCKED, &nfs_allow_async, 0, ""); 158SYSCTL_INT(_vfs_generic_nfs_client, OID_AUTO, statfs_rate_limit, CTLFLAG_RW | CTLFLAG_LOCKED, &nfs_statfs_rate_limit, 0, ""); 159SYSCTL_INT(_vfs_generic_nfs_client, OID_AUTO, nfsiod_thread_max, CTLFLAG_RW | CTLFLAG_LOCKED, &nfsiod_thread_max, 0, ""); 160SYSCTL_INT(_vfs_generic_nfs_client, OID_AUTO, nfsiod_thread_count, CTLFLAG_RD | CTLFLAG_LOCKED, &nfsiod_thread_count, 0, ""); 161SYSCTL_INT(_vfs_generic_nfs_client, OID_AUTO, lockd_mounts, CTLFLAG_RD | CTLFLAG_LOCKED, &nfs_lockd_mounts, 0, ""); 162SYSCTL_INT(_vfs_generic_nfs_client, OID_AUTO, max_async_writes, CTLFLAG_RW | CTLFLAG_LOCKED, &nfs_max_async_writes, 0, ""); 163SYSCTL_INT(_vfs_generic_nfs_client, OID_AUTO, single_des, CTLFLAG_RW | CTLFLAG_LOCKED, &nfs_single_des, 0, ""); 164SYSCTL_INT(_vfs_generic_nfs_client, OID_AUTO, access_delete, CTLFLAG_RW | CTLFLAG_LOCKED, &nfs_access_delete, 0, ""); 165SYSCTL_INT(_vfs_generic_nfs_client, OID_AUTO, access_dotzfs, CTLFLAG_RW | CTLFLAG_LOCKED, &nfs_access_dotzfs, 0, ""); 166SYSCTL_INT(_vfs_generic_nfs_client, OID_AUTO, access_for_getattr, CTLFLAG_RW | CTLFLAG_LOCKED, &nfs_access_for_getattr, 0, ""); 167SYSCTL_INT(_vfs_generic_nfs_client, OID_AUTO, idmap_ctrl, CTLFLAG_RW | CTLFLAG_LOCKED, &nfs_idmap_ctrl, 0, ""); 168SYSCTL_INT(_vfs_generic_nfs_client, OID_AUTO, callback_port, CTLFLAG_RW | CTLFLAG_LOCKED, &nfs_callback_port, 0, ""); 169SYSCTL_INT(_vfs_generic_nfs_client, OID_AUTO, is_mobile, CTLFLAG_RW | CTLFLAG_LOCKED, &nfs_is_mobile, 0, ""); 170SYSCTL_INT(_vfs_generic_nfs_client, OID_AUTO, squishy_flags, CTLFLAG_RW | CTLFLAG_LOCKED, &nfs_squishy_flags, 0, ""); 171SYSCTL_UINT(_vfs_generic_nfs_client, OID_AUTO, debug_ctl, CTLFLAG_RW | CTLFLAG_LOCKED, &nfs_debug_ctl, 0, ""); 172SYSCTL_INT(_vfs_generic_nfs_client, OID_AUTO, readlink_nocache, CTLFLAG_RW | CTLFLAG_LOCKED, &nfs_readlink_nocache, 0, ""); 173 174#endif /* NFSCLIENT */ 175 176#if NFSSERVER 177SYSCTL_NODE(_vfs_generic_nfs, OID_AUTO, server, CTLFLAG_RW|CTLFLAG_LOCKED, 0, "nfs server hinge"); 178SYSCTL_INT(_vfs_generic_nfs_server, OID_AUTO, wg_delay, CTLFLAG_RW | CTLFLAG_LOCKED, &nfsrv_wg_delay, 0, ""); 179SYSCTL_INT(_vfs_generic_nfs_server, OID_AUTO, wg_delay_v3, CTLFLAG_RW | CTLFLAG_LOCKED, &nfsrv_wg_delay_v3, 0, ""); 180SYSCTL_INT(_vfs_generic_nfs_server, OID_AUTO, require_resv_port, CTLFLAG_RW | CTLFLAG_LOCKED, &nfsrv_require_resv_port, 0, ""); 181SYSCTL_INT(_vfs_generic_nfs_server, OID_AUTO, async, CTLFLAG_RW | CTLFLAG_LOCKED, &nfsrv_async, 0, ""); 182SYSCTL_INT(_vfs_generic_nfs_server, OID_AUTO, export_hash_size, CTLFLAG_RW | CTLFLAG_LOCKED, &nfsrv_export_hash_size, 0, ""); 183SYSCTL_INT(_vfs_generic_nfs_server, OID_AUTO, reqcache_size, CTLFLAG_RW | CTLFLAG_LOCKED, &nfsrv_reqcache_size, 0, ""); 184SYSCTL_INT(_vfs_generic_nfs_server, OID_AUTO, request_queue_length, CTLFLAG_RW | CTLFLAG_LOCKED, &nfsrv_sock_max_rec_queue_length, 0, ""); 185SYSCTL_INT(_vfs_generic_nfs_server, OID_AUTO, user_stats, CTLFLAG_RW | CTLFLAG_LOCKED, &nfsrv_user_stat_enabled, 0, ""); 186SYSCTL_UINT(_vfs_generic_nfs_server, OID_AUTO, gss_context_ttl, CTLFLAG_RW | CTLFLAG_LOCKED, &nfsrv_gss_context_ttl, 0, ""); 187#if CONFIG_FSE 188SYSCTL_INT(_vfs_generic_nfs_server, OID_AUTO, fsevents, CTLFLAG_RW | CTLFLAG_LOCKED, &nfsrv_fsevents_enabled, 0, ""); 189#endif 190SYSCTL_INT(_vfs_generic_nfs_server, OID_AUTO, nfsd_thread_max, CTLFLAG_RW | CTLFLAG_LOCKED, &nfsd_thread_max, 0, ""); 191SYSCTL_INT(_vfs_generic_nfs_server, OID_AUTO, nfsd_thread_count, CTLFLAG_RD | CTLFLAG_LOCKED, &nfsd_thread_count, 0, ""); 192#ifdef NFS_UC_Q_DEBUG 193SYSCTL_INT(_vfs_generic_nfs_server, OID_AUTO, use_upcall_svc, CTLFLAG_RW | CTLFLAG_LOCKED, &nfsrv_uc_use_proxy, 0, ""); 194SYSCTL_INT(_vfs_generic_nfs_server, OID_AUTO, upcall_queue_limit, CTLFLAG_RW | CTLFLAG_LOCKED, &nfsrv_uc_queue_limit, 0, ""); 195SYSCTL_INT(_vfs_generic_nfs_server, OID_AUTO, upcall_queue_max_seen, CTLFLAG_RW | CTLFLAG_LOCKED, &nfsrv_uc_queue_max_seen, 0, ""); 196SYSCTL_INT(_vfs_generic_nfs_server, OID_AUTO, upcall_queue_count, CTLFLAG_RD | CTLFLAG_LOCKED, (int *)&nfsrv_uc_queue_count, 0, ""); 197#endif 198#endif /* NFSSERVER */ 199 200 201#if NFSCLIENT 202 203static int 204mapname2id(struct nfs_testmapid *map) 205{ 206 int error; 207 208 error = nfs4_id2guid(map->ntm_name, &map->ntm_guid, map->ntm_grpflag); 209 if (error) 210 return (error); 211 212 if (map->ntm_grpflag) 213 error = kauth_cred_guid2gid(&map->ntm_guid, (gid_t *)&map->ntm_id); 214 else 215 error = kauth_cred_guid2uid(&map->ntm_guid, (uid_t *)&map->ntm_id); 216 217 return (error); 218} 219 220static int 221mapid2name(struct nfs_testmapid *map) 222{ 223 int error; 224 int len = sizeof(map->ntm_name); 225 226 if (map->ntm_grpflag) 227 error = kauth_cred_gid2guid((gid_t)map->ntm_id, &map->ntm_guid); 228 else 229 error = kauth_cred_uid2guid((uid_t)map->ntm_id, &map->ntm_guid); 230 231 if (error) 232 return (error); 233 234 error = nfs4_guid2id(&map->ntm_guid, map->ntm_name, &len, map->ntm_grpflag); 235 236 return (error); 237 238} 239 240 241static int 242nfsclnt_testidmap(proc_t p, user_addr_t argp) 243{ 244 struct nfs_testmapid mapid; 245 int error, coerror; 246 247 /* Let root make this call. */ 248 error = proc_suser(p); 249 if (error) 250 return (error); 251 252 error = copyin(argp, &mapid, sizeof(mapid)); 253 if (error) 254 return (error); 255 if (mapid.ntm_name2id) 256 error = mapname2id(&mapid); 257 else 258 error = mapid2name(&mapid); 259 260 coerror = copyout(&mapid, argp, sizeof(mapid)); 261 262 return (error ? error : coerror); 263} 264 265int 266nfsclnt(proc_t p, struct nfsclnt_args *uap, __unused int *retval) 267{ 268 struct lockd_ans la; 269 int error; 270 271 switch (uap->flag) { 272 case NFSCLNT_LOCKDANS: 273 error = copyin(uap->argp, &la, sizeof(la)); 274 if (!error) 275 error = nfslockdans(p, &la); 276 break; 277 case NFSCLNT_LOCKDNOTIFY: 278 error = nfslockdnotify(p, uap->argp); 279 break; 280 case NFSCLNT_TESTIDMAP: 281 error = nfsclnt_testidmap(p, uap->argp); 282 break; 283 default: 284 error = EINVAL; 285 } 286 return (error); 287} 288 289 290/* 291 * Asynchronous I/O threads for client NFS. 292 * They do read-ahead and write-behind operations on the block I/O cache. 293 * 294 * The pool of up to nfsiod_thread_max threads is launched on demand and exit 295 * when unused for a while. There are as many nfsiod structs as there are 296 * nfsiod threads; however there's no strict tie between a thread and a struct. 297 * Each thread puts an nfsiod on the free list and sleeps on it. When it wakes 298 * up, it removes the next struct nfsiod from the queue and services it. Then 299 * it will put the struct at the head of free list and sleep on it. 300 * Async requests will pull the next struct nfsiod from the head of the free list, 301 * put it on the work queue, and wake whatever thread is waiting on that struct. 302 */ 303 304/* 305 * nfsiod thread exit routine 306 * 307 * Must be called with nfsiod_mutex held so that the 308 * decision to terminate is atomic with the termination. 309 */ 310void 311nfsiod_terminate(struct nfsiod *niod) 312{ 313 nfsiod_thread_count--; 314 lck_mtx_unlock(nfsiod_mutex); 315 if (niod) 316 FREE(niod, M_TEMP); 317 else 318 printf("nfsiod: terminating without niod\n"); 319 thread_terminate(current_thread()); 320 /*NOTREACHED*/ 321} 322 323/* nfsiod thread startup routine */ 324void 325nfsiod_thread(void) 326{ 327 struct nfsiod *niod; 328 int error; 329 330 MALLOC(niod, struct nfsiod *, sizeof(struct nfsiod), M_TEMP, M_WAITOK); 331 if (!niod) { 332 lck_mtx_lock(nfsiod_mutex); 333 nfsiod_thread_count--; 334 wakeup(current_thread()); 335 lck_mtx_unlock(nfsiod_mutex); 336 thread_terminate(current_thread()); 337 /*NOTREACHED*/ 338 } 339 bzero(niod, sizeof(*niod)); 340 lck_mtx_lock(nfsiod_mutex); 341 TAILQ_INSERT_HEAD(&nfsiodfree, niod, niod_link); 342 wakeup(current_thread()); 343 error = msleep0(niod, nfsiod_mutex, PWAIT | PDROP, "nfsiod", NFS_ASYNCTHREADMAXIDLE*hz, nfsiod_continue); 344 /* shouldn't return... so we have an error */ 345 /* remove an old nfsiod struct and terminate */ 346 lck_mtx_lock(nfsiod_mutex); 347 if ((niod = TAILQ_LAST(&nfsiodfree, nfsiodlist))) 348 TAILQ_REMOVE(&nfsiodfree, niod, niod_link); 349 nfsiod_terminate(niod); 350 /*NOTREACHED*/ 351} 352 353/* 354 * Start up another nfsiod thread. 355 * (unless we're already maxed out and there are nfsiods running) 356 */ 357int 358nfsiod_start(void) 359{ 360 thread_t thd = THREAD_NULL; 361 362 lck_mtx_lock(nfsiod_mutex); 363 if ((nfsiod_thread_count >= NFSIOD_MAX) && (nfsiod_thread_count > 0)) { 364 lck_mtx_unlock(nfsiod_mutex); 365 return (EBUSY); 366 } 367 nfsiod_thread_count++; 368 if (kernel_thread_start((thread_continue_t)nfsiod_thread, NULL, &thd) != KERN_SUCCESS) { 369 lck_mtx_unlock(nfsiod_mutex); 370 return (EBUSY); 371 } 372 /* wait for the thread to complete startup */ 373 msleep(thd, nfsiod_mutex, PWAIT | PDROP, "nfsiodw", NULL); 374 thread_deallocate(thd); 375 return (0); 376} 377 378/* 379 * Continuation for Asynchronous I/O threads for NFS client. 380 * 381 * Grab an nfsiod struct to work on, do some work, then drop it 382 */ 383int 384nfsiod_continue(int error) 385{ 386 struct nfsiod *niod; 387 struct nfsmount *nmp; 388 struct nfsreq *req, *treq; 389 struct nfs_reqqhead iodq; 390 int morework; 391 392 lck_mtx_lock(nfsiod_mutex); 393 niod = TAILQ_FIRST(&nfsiodwork); 394 if (!niod) { 395 /* there's no work queued up */ 396 /* remove an old nfsiod struct and terminate */ 397 if ((niod = TAILQ_LAST(&nfsiodfree, nfsiodlist))) 398 TAILQ_REMOVE(&nfsiodfree, niod, niod_link); 399 nfsiod_terminate(niod); 400 /*NOTREACHED*/ 401 } 402 TAILQ_REMOVE(&nfsiodwork, niod, niod_link); 403 404worktodo: 405 while ((nmp = niod->niod_nmp)) { 406 if (nmp == NULL){ 407 niod->niod_nmp = NULL; 408 break; 409 } 410 411 /* 412 * Service this mount's async I/O queue. 413 * 414 * In order to ensure some level of fairness between mounts, 415 * we grab all the work up front before processing it so any 416 * new work that arrives will be serviced on a subsequent 417 * iteration - and we have a chance to see if other work needs 418 * to be done (e.g. the delayed write queue needs to be pushed 419 * or other mounts are waiting for an nfsiod). 420 */ 421 /* grab the current contents of the queue */ 422 TAILQ_INIT(&iodq); 423 TAILQ_CONCAT(&iodq, &nmp->nm_iodq, r_achain); 424 lck_mtx_unlock(nfsiod_mutex); 425 426 /* process the queue */ 427 TAILQ_FOREACH_SAFE(req, &iodq, r_achain, treq) { 428 TAILQ_REMOVE(&iodq, req, r_achain); 429 lck_mtx_lock(nfsiod_mutex); 430 req->r_achain.tqe_next = NFSIODCOMPLETING; 431 lck_mtx_unlock(nfsiod_mutex); 432 req->r_callback.rcb_func(req); 433 } 434 435 /* now check if there's more/other work to be done */ 436 lck_mtx_lock(nfsiod_mutex); 437 morework = !TAILQ_EMPTY(&nmp->nm_iodq); 438 if (!morework || !TAILQ_EMPTY(&nfsiodmounts)) { 439 /* 440 * we're going to stop working on this mount but if the 441 * mount still needs more work so queue it up 442 */ 443 if (morework && nmp->nm_iodlink.tqe_next == NFSNOLIST) 444 TAILQ_INSERT_TAIL(&nfsiodmounts, nmp, nm_iodlink); 445 nmp->nm_niod = NULL; 446 niod->niod_nmp = NULL; 447 } 448 } 449 450 /* loop if there's still a mount to work on */ 451 if (!niod->niod_nmp && !TAILQ_EMPTY(&nfsiodmounts)) { 452 niod->niod_nmp = TAILQ_FIRST(&nfsiodmounts); 453 TAILQ_REMOVE(&nfsiodmounts, niod->niod_nmp, nm_iodlink); 454 niod->niod_nmp->nm_iodlink.tqe_next = NFSNOLIST; 455 } 456 if (niod->niod_nmp) 457 goto worktodo; 458 459 /* queue ourselves back up - if there aren't too many threads running */ 460 if (nfsiod_thread_count <= NFSIOD_MAX) { 461 TAILQ_INSERT_HEAD(&nfsiodfree, niod, niod_link); 462 error = msleep0(niod, nfsiod_mutex, PWAIT | PDROP, "nfsiod", NFS_ASYNCTHREADMAXIDLE*hz, nfsiod_continue); 463 /* shouldn't return... so we have an error */ 464 /* remove an old nfsiod struct and terminate */ 465 lck_mtx_lock(nfsiod_mutex); 466 if ((niod = TAILQ_LAST(&nfsiodfree, nfsiodlist))) 467 TAILQ_REMOVE(&nfsiodfree, niod, niod_link); 468 } 469 nfsiod_terminate(niod); 470 /*NOTREACHED*/ 471 return (0); 472} 473 474#endif /* NFSCLIENT */ 475 476 477#if NFSSERVER 478 479/* 480 * NFS server system calls 481 * getfh() lives here too, but maybe should move to kern/vfs_syscalls.c 482 */ 483 484/* 485 * Get file handle system call 486 */ 487int 488getfh(proc_t p, struct getfh_args *uap, __unused int *retval) 489{ 490 vnode_t vp; 491 struct nfs_filehandle nfh; 492 int error, fhlen, fidlen; 493 struct nameidata nd; 494 char path[MAXPATHLEN], *ptr; 495 size_t pathlen; 496 struct nfs_exportfs *nxfs; 497 struct nfs_export *nx; 498 499 /* 500 * Must be super user 501 */ 502 error = proc_suser(p); 503 if (error) 504 return (error); 505 506 error = copyinstr(uap->fname, path, MAXPATHLEN, &pathlen); 507 if (!error) 508 error = copyin(uap->fhp, &fhlen, sizeof(fhlen)); 509 if (error) 510 return (error); 511 /* limit fh size to length specified (or v3 size by default) */ 512 if ((fhlen != NFSV2_MAX_FH_SIZE) && (fhlen != NFSV3_MAX_FH_SIZE)) 513 fhlen = NFSV3_MAX_FH_SIZE; 514 fidlen = fhlen - sizeof(struct nfs_exphandle); 515 516 if (!nfsrv_is_initialized()) 517 return (EINVAL); 518 519 NDINIT(&nd, LOOKUP, OP_LOOKUP, FOLLOW | LOCKLEAF | AUDITVNPATH1, 520 UIO_SYSSPACE, CAST_USER_ADDR_T(path), vfs_context_current()); 521 error = namei(&nd); 522 if (error) 523 return (error); 524 nameidone(&nd); 525 526 vp = nd.ni_vp; 527 528 // find exportfs that matches f_mntonname 529 lck_rw_lock_shared(&nfsrv_export_rwlock); 530 ptr = vnode_mount(vp)->mnt_vfsstat.f_mntonname; 531 LIST_FOREACH(nxfs, &nfsrv_exports, nxfs_next) { 532 if (!strncmp(nxfs->nxfs_path, ptr, MAXPATHLEN)) 533 break; 534 } 535 if (!nxfs || strncmp(nxfs->nxfs_path, path, strlen(nxfs->nxfs_path))) { 536 error = EINVAL; 537 goto out; 538 } 539 // find export that best matches remainder of path 540 ptr = path + strlen(nxfs->nxfs_path); 541 while (*ptr && (*ptr == '/')) 542 ptr++; 543 LIST_FOREACH(nx, &nxfs->nxfs_exports, nx_next) { 544 int len = strlen(nx->nx_path); 545 if (len == 0) // we've hit the export entry for the root directory 546 break; 547 if (!strncmp(nx->nx_path, ptr, len)) 548 break; 549 } 550 if (!nx) { 551 error = EINVAL; 552 goto out; 553 } 554 555 bzero(&nfh, sizeof(nfh)); 556 nfh.nfh_xh.nxh_version = htonl(NFS_FH_VERSION); 557 nfh.nfh_xh.nxh_fsid = htonl(nxfs->nxfs_id); 558 nfh.nfh_xh.nxh_expid = htonl(nx->nx_id); 559 nfh.nfh_xh.nxh_flags = 0; 560 nfh.nfh_xh.nxh_reserved = 0; 561 nfh.nfh_len = fidlen; 562 error = VFS_VPTOFH(vp, (int*)&nfh.nfh_len, &nfh.nfh_fid[0], NULL); 563 if (nfh.nfh_len > (uint32_t)fidlen) 564 error = EOVERFLOW; 565 nfh.nfh_xh.nxh_fidlen = nfh.nfh_len; 566 nfh.nfh_len += sizeof(nfh.nfh_xh); 567 nfh.nfh_fhp = (u_char*)&nfh.nfh_xh; 568 569out: 570 lck_rw_done(&nfsrv_export_rwlock); 571 vnode_put(vp); 572 if (error) 573 return (error); 574 error = copyout((caddr_t)&nfh, uap->fhp, sizeof(fhandle_t)); 575 return (error); 576} 577 578extern const struct fileops vnops; 579 580/* 581 * syscall for the rpc.lockd to use to translate a NFS file handle into 582 * an open descriptor. 583 * 584 * warning: do not remove the suser() call or this becomes one giant 585 * security hole. 586 */ 587int 588fhopen( proc_t p, 589 struct fhopen_args *uap, 590 int32_t *retval) 591{ 592 vnode_t vp; 593 struct nfs_filehandle nfh; 594 struct nfs_export *nx; 595 struct nfs_export_options *nxo; 596 struct flock lf; 597 struct fileproc *fp, *nfp; 598 int fmode, error, type; 599 int indx; 600 vfs_context_t ctx = vfs_context_current(); 601 kauth_action_t action; 602 603 /* 604 * Must be super user 605 */ 606 error = suser(vfs_context_ucred(ctx), 0); 607 if (error) { 608 return (error); 609 } 610 611 if (!nfsrv_is_initialized()) { 612 return (EINVAL); 613 } 614 615 fmode = FFLAGS(uap->flags); 616 /* why not allow a non-read/write open for our lockd? */ 617 if (((fmode & (FREAD | FWRITE)) == 0) || (fmode & O_CREAT)) 618 return (EINVAL); 619 620 error = copyin(uap->u_fhp, &nfh.nfh_len, sizeof(nfh.nfh_len)); 621 if (error) 622 return (error); 623 if ((nfh.nfh_len < (int)sizeof(struct nfs_exphandle)) || 624 (nfh.nfh_len > (int)NFSV3_MAX_FH_SIZE)) 625 return (EINVAL); 626 error = copyin(uap->u_fhp, &nfh, sizeof(nfh.nfh_len) + nfh.nfh_len); 627 if (error) 628 return (error); 629 nfh.nfh_fhp = (u_char*)&nfh.nfh_xh; 630 631 lck_rw_lock_shared(&nfsrv_export_rwlock); 632 /* now give me my vnode, it gets returned to me with a reference */ 633 error = nfsrv_fhtovp(&nfh, NULL, &vp, &nx, &nxo); 634 lck_rw_done(&nfsrv_export_rwlock); 635 if (error) { 636 if (error == NFSERR_TRYLATER) 637 error = EAGAIN; // XXX EBUSY? Or just leave as TRYLATER? 638 return (error); 639 } 640 641 /* 642 * From now on we have to make sure not 643 * to forget about the vnode. 644 * Any error that causes an abort must vnode_put(vp). 645 * Just set error = err and 'goto bad;'. 646 */ 647 648 /* 649 * from vn_open 650 */ 651 if (vnode_vtype(vp) == VSOCK) { 652 error = EOPNOTSUPP; 653 goto bad; 654 } 655 656 /* disallow write operations on directories */ 657 if (vnode_isdir(vp) && (fmode & (FWRITE | O_TRUNC))) { 658 error = EISDIR; 659 goto bad; 660 } 661 662 /* compute action to be authorized */ 663 action = 0; 664 if (fmode & FREAD) 665 action |= KAUTH_VNODE_READ_DATA; 666 if (fmode & (FWRITE | O_TRUNC)) 667 action |= KAUTH_VNODE_WRITE_DATA; 668 if ((error = vnode_authorize(vp, NULL, action, ctx)) != 0) 669 goto bad; 670 671 if ((error = VNOP_OPEN(vp, fmode, ctx))) 672 goto bad; 673 if ((error = vnode_ref_ext(vp, fmode, 0))) 674 goto bad; 675 676 /* 677 * end of vn_open code 678 */ 679 680 // starting here... error paths should call vn_close/vnode_put 681 if ((error = falloc(p, &nfp, &indx, ctx)) != 0) { 682 vn_close(vp, fmode & FMASK, ctx); 683 goto bad; 684 } 685 fp = nfp; 686 687 fp->f_fglob->fg_flag = fmode & FMASK; 688 fp->f_fglob->fg_ops = &vnops; 689 fp->f_fglob->fg_data = (caddr_t)vp; 690 691 // XXX do we really need to support this with fhopen()? 692 if (fmode & (O_EXLOCK | O_SHLOCK)) { 693 lf.l_whence = SEEK_SET; 694 lf.l_start = 0; 695 lf.l_len = 0; 696 if (fmode & O_EXLOCK) 697 lf.l_type = F_WRLCK; 698 else 699 lf.l_type = F_RDLCK; 700 type = F_FLOCK; 701 if ((fmode & FNONBLOCK) == 0) 702 type |= F_WAIT; 703 if ((error = VNOP_ADVLOCK(vp, (caddr_t)fp->f_fglob, F_SETLK, &lf, type, ctx, NULL))) { 704 struct vfs_context context = *vfs_context_current(); 705 /* Modify local copy (to not damage thread copy) */ 706 context.vc_ucred = fp->f_fglob->fg_cred; 707 708 vn_close(vp, fp->f_fglob->fg_flag, &context); 709 fp_free(p, indx, fp); 710 return (error); 711 } 712 fp->f_fglob->fg_flag |= FHASLOCK; 713 } 714 715 vnode_put(vp); 716 717 proc_fdlock(p); 718 procfdtbl_releasefd(p, indx, NULL); 719 fp_drop(p, indx, fp, 1); 720 proc_fdunlock(p); 721 722 *retval = indx; 723 return (0); 724 725bad: 726 vnode_put(vp); 727 return (error); 728} 729 730/* 731 * NFS server pseudo system call 732 */ 733int 734nfssvc(proc_t p, struct nfssvc_args *uap, __unused int *retval) 735{ 736 mbuf_t nam; 737 struct user_nfsd_args user_nfsdarg; 738 socket_t so; 739 int error; 740 741 AUDIT_ARG(cmd, uap->flag); 742 743 /* 744 * Must be super user for most operations (export ops checked later). 745 */ 746 if ((uap->flag != NFSSVC_EXPORT) && ((error = proc_suser(p)))) 747 return (error); 748#if CONFIG_MACF 749 error = mac_system_check_nfsd(kauth_cred_get()); 750 if (error) 751 return (error); 752#endif 753 754 /* make sure NFS server data structures have been initialized */ 755 nfsrv_init(); 756 757 if (uap->flag & NFSSVC_ADDSOCK) { 758 if (IS_64BIT_PROCESS(p)) { 759 error = copyin(uap->argp, (caddr_t)&user_nfsdarg, sizeof(user_nfsdarg)); 760 } else { 761 struct nfsd_args tmp_args; 762 error = copyin(uap->argp, (caddr_t)&tmp_args, sizeof(tmp_args)); 763 if (error == 0) { 764 user_nfsdarg.sock = tmp_args.sock; 765 user_nfsdarg.name = CAST_USER_ADDR_T(tmp_args.name); 766 user_nfsdarg.namelen = tmp_args.namelen; 767 } 768 } 769 if (error) 770 return (error); 771 /* get the socket */ 772 error = file_socket(user_nfsdarg.sock, &so); 773 if (error) 774 return (error); 775 /* Get the client address for connected sockets. */ 776 if (user_nfsdarg.name == USER_ADDR_NULL || user_nfsdarg.namelen == 0) { 777 nam = NULL; 778 } else { 779 error = sockargs(&nam, user_nfsdarg.name, user_nfsdarg.namelen, MBUF_TYPE_SONAME); 780 if (error) { 781 /* drop the iocount file_socket() grabbed on the file descriptor */ 782 file_drop(user_nfsdarg.sock); 783 return (error); 784 } 785 } 786 /* 787 * nfssvc_addsock() will grab a retain count on the socket 788 * to keep the socket from being closed when nfsd closes its 789 * file descriptor for it. 790 */ 791 error = nfssvc_addsock(so, nam); 792 /* drop the iocount file_socket() grabbed on the file descriptor */ 793 file_drop(user_nfsdarg.sock); 794 } else if (uap->flag & NFSSVC_NFSD) { 795 error = nfssvc_nfsd(); 796 } else if (uap->flag & NFSSVC_EXPORT) { 797 error = nfssvc_export(uap->argp); 798 } else { 799 error = EINVAL; 800 } 801 if (error == EINTR || error == ERESTART) 802 error = 0; 803 return (error); 804} 805 806/* 807 * Adds a socket to the list for servicing by nfsds. 808 */ 809int 810nfssvc_addsock(socket_t so, mbuf_t mynam) 811{ 812 struct nfsrv_sock *slp; 813 int error = 0, sodomain, sotype, soprotocol, on = 1; 814 int first; 815 struct timeval timeo; 816 817 /* make sure mbuf constants are set up */ 818 if (!nfs_mbuf_mhlen) 819 nfs_mbuf_init(); 820 821 sock_gettype(so, &sodomain, &sotype, &soprotocol); 822 823 /* There should be only one UDP socket for each of IPv4 and IPv6 */ 824 if ((sodomain == AF_INET) && (soprotocol == IPPROTO_UDP) && nfsrv_udpsock) { 825 mbuf_freem(mynam); 826 return (EEXIST); 827 } 828 if ((sodomain == AF_INET6) && (soprotocol == IPPROTO_UDP) && nfsrv_udp6sock) { 829 mbuf_freem(mynam); 830 return (EEXIST); 831 } 832 833 /* Set protocol options and reserve some space (for UDP). */ 834 if (sotype == SOCK_STREAM) 835 sock_setsockopt(so, SOL_SOCKET, SO_KEEPALIVE, &on, sizeof(on)); 836 if ((sodomain == AF_INET) && (soprotocol == IPPROTO_TCP)) 837 sock_setsockopt(so, IPPROTO_TCP, TCP_NODELAY, &on, sizeof(on)); 838 if (sotype == SOCK_DGRAM) { /* set socket buffer sizes for UDP */ 839 int reserve = NFS_UDPSOCKBUF; 840 error |= sock_setsockopt(so, SOL_SOCKET, SO_SNDBUF, &reserve, sizeof(reserve)); 841 error |= sock_setsockopt(so, SOL_SOCKET, SO_RCVBUF, &reserve, sizeof(reserve)); 842 if (error) { 843 log(LOG_INFO, "nfssvc_addsock: UDP socket buffer setting error(s) %d\n", error); 844 error = 0; 845 } 846 } 847 sock_nointerrupt(so, 0); 848 849 /* 850 * Set socket send/receive timeouts. 851 * Receive timeout shouldn't matter, but setting the send timeout 852 * will make sure that an unresponsive client can't hang the server. 853 */ 854 timeo.tv_usec = 0; 855 timeo.tv_sec = 1; 856 error |= sock_setsockopt(so, SOL_SOCKET, SO_RCVTIMEO, &timeo, sizeof(timeo)); 857 timeo.tv_sec = 30; 858 error |= sock_setsockopt(so, SOL_SOCKET, SO_SNDTIMEO, &timeo, sizeof(timeo)); 859 if (error) { 860 log(LOG_INFO, "nfssvc_addsock: socket timeout setting error(s) %d\n", error); 861 error = 0; 862 } 863 864 MALLOC(slp, struct nfsrv_sock *, sizeof(struct nfsrv_sock), M_NFSSVC, M_WAITOK); 865 if (!slp) { 866 mbuf_freem(mynam); 867 return (ENOMEM); 868 } 869 bzero((caddr_t)slp, sizeof (struct nfsrv_sock)); 870 lck_rw_init(&slp->ns_rwlock, nfsrv_slp_rwlock_group, LCK_ATTR_NULL); 871 lck_mtx_init(&slp->ns_wgmutex, nfsrv_slp_mutex_group, LCK_ATTR_NULL); 872 873 lck_mtx_lock(nfsd_mutex); 874 875 if (soprotocol == IPPROTO_UDP) { 876 if (sodomain == AF_INET) { 877 /* There should be only one UDP/IPv4 socket */ 878 if (nfsrv_udpsock) { 879 lck_mtx_unlock(nfsd_mutex); 880 nfsrv_slpfree(slp); 881 mbuf_freem(mynam); 882 return (EEXIST); 883 } 884 nfsrv_udpsock = slp; 885 } 886 if (sodomain == AF_INET6) { 887 /* There should be only one UDP/IPv6 socket */ 888 if (nfsrv_udp6sock) { 889 lck_mtx_unlock(nfsd_mutex); 890 nfsrv_slpfree(slp); 891 mbuf_freem(mynam); 892 return (EEXIST); 893 } 894 nfsrv_udp6sock = slp; 895 } 896 } 897 898 /* add the socket to the list */ 899 first = TAILQ_EMPTY(&nfsrv_socklist); 900 TAILQ_INSERT_TAIL(&nfsrv_socklist, slp, ns_chain); 901 902 sock_retain(so); /* grab a retain count on the socket */ 903 slp->ns_so = so; 904 slp->ns_sotype = sotype; 905 slp->ns_nam = mynam; 906 907 /* set up the socket up-call */ 908 nfsrv_uc_addsock(slp, first); 909 910 /* mark that the socket is not in the nfsrv_sockwg list */ 911 slp->ns_wgq.tqe_next = SLPNOLIST; 912 913 slp->ns_flag = SLP_VALID | SLP_NEEDQ; 914 915 nfsrv_wakenfsd(slp); 916 lck_mtx_unlock(nfsd_mutex); 917 918 return (0); 919} 920 921/* 922 * nfssvc_nfsd() 923 * 924 * nfsd theory of operation: 925 * 926 * The first nfsd thread stays in user mode accepting new TCP connections 927 * which are then added via the "addsock" call. The rest of the nfsd threads 928 * simply call into the kernel and remain there in a loop handling NFS 929 * requests until killed by a signal. 930 * 931 * There's a list of nfsd threads (nfsd_head). 932 * There's an nfsd queue that contains only those nfsds that are 933 * waiting for work to do (nfsd_queue). 934 * 935 * There's a list of all NFS sockets (nfsrv_socklist) and two queues for 936 * managing the work on the sockets: 937 * nfsrv_sockwait - sockets w/new data waiting to be worked on 938 * nfsrv_sockwork - sockets being worked on which may have more work to do 939 * nfsrv_sockwg -- sockets which have pending write gather data 940 * When a socket receives data, if it is not currently queued, it 941 * will be placed at the end of the "wait" queue. 942 * Whenever a socket needs servicing we make sure it is queued and 943 * wake up a waiting nfsd (if there is one). 944 * 945 * nfsds will service at most 8 requests from the same socket before 946 * defecting to work on another socket. 947 * nfsds will defect immediately if there are any sockets in the "wait" queue 948 * nfsds looking for a socket to work on check the "wait" queue first and 949 * then check the "work" queue. 950 * When an nfsd starts working on a socket, it removes it from the head of 951 * the queue it's currently on and moves it to the end of the "work" queue. 952 * When nfsds are checking the queues for work, any sockets found not to 953 * have any work are simply dropped from the queue. 954 * 955 */ 956int 957nfssvc_nfsd(void) 958{ 959 mbuf_t m, mrep; 960 struct nfsrv_sock *slp; 961 struct nfsd *nfsd; 962 struct nfsrv_descript *nd = NULL; 963 int error = 0, cacherep, writes_todo; 964 int siz, procrastinate, opcnt = 0; 965 u_quad_t cur_usec; 966 struct timeval now; 967 struct vfs_context context; 968 struct timespec to; 969 970#ifndef nolint 971 cacherep = RC_DOIT; 972 writes_todo = 0; 973#endif 974 975 MALLOC(nfsd, struct nfsd *, sizeof(struct nfsd), M_NFSD, M_WAITOK); 976 if (!nfsd) 977 return (ENOMEM); 978 bzero(nfsd, sizeof(struct nfsd)); 979 lck_mtx_lock(nfsd_mutex); 980 if (nfsd_thread_count++ == 0) 981 nfsrv_initcache(); /* Init the server request cache */ 982 983 TAILQ_INSERT_TAIL(&nfsd_head, nfsd, nfsd_chain); 984 lck_mtx_unlock(nfsd_mutex); 985 986 context.vc_thread = current_thread(); 987 988 /* Set time out so that nfsd threads can wake up a see if they are still needed. */ 989 to.tv_sec = 5; 990 to.tv_nsec = 0; 991 992 /* 993 * Loop getting rpc requests until SIGKILL. 994 */ 995 for (;;) { 996 if (nfsd_thread_max <= 0) { 997 /* NFS server shutting down, get out ASAP */ 998 error = EINTR; 999 slp = nfsd->nfsd_slp; 1000 } else if (nfsd->nfsd_flag & NFSD_REQINPROG) { 1001 /* already have some work to do */ 1002 error = 0; 1003 slp = nfsd->nfsd_slp; 1004 } else { 1005 /* need to find work to do */ 1006 error = 0; 1007 lck_mtx_lock(nfsd_mutex); 1008 while (!nfsd->nfsd_slp && TAILQ_EMPTY(&nfsrv_sockwait) && TAILQ_EMPTY(&nfsrv_sockwork)) { 1009 if (nfsd_thread_count > nfsd_thread_max) { 1010 /* 1011 * If we have no socket and there are more 1012 * nfsd threads than configured, let's exit. 1013 */ 1014 error = 0; 1015 goto done; 1016 } 1017 nfsd->nfsd_flag |= NFSD_WAITING; 1018 TAILQ_INSERT_HEAD(&nfsd_queue, nfsd, nfsd_queue); 1019 error = msleep(nfsd, nfsd_mutex, PSOCK | PCATCH, "nfsd", &to); 1020 if (error) { 1021 if (nfsd->nfsd_flag & NFSD_WAITING) { 1022 TAILQ_REMOVE(&nfsd_queue, nfsd, nfsd_queue); 1023 nfsd->nfsd_flag &= ~NFSD_WAITING; 1024 } 1025 if (error == EWOULDBLOCK) 1026 continue; 1027 goto done; 1028 } 1029 } 1030 slp = nfsd->nfsd_slp; 1031 if (!slp && !TAILQ_EMPTY(&nfsrv_sockwait)) { 1032 /* look for a socket to work on in the wait queue */ 1033 while ((slp = TAILQ_FIRST(&nfsrv_sockwait))) { 1034 lck_rw_lock_exclusive(&slp->ns_rwlock); 1035 /* remove from the head of the queue */ 1036 TAILQ_REMOVE(&nfsrv_sockwait, slp, ns_svcq); 1037 slp->ns_flag &= ~SLP_WAITQ; 1038 if ((slp->ns_flag & SLP_VALID) && (slp->ns_flag & SLP_WORKTODO)) 1039 break; 1040 /* nothing to do, so skip this socket */ 1041 lck_rw_done(&slp->ns_rwlock); 1042 } 1043 } 1044 if (!slp && !TAILQ_EMPTY(&nfsrv_sockwork)) { 1045 /* look for a socket to work on in the work queue */ 1046 while ((slp = TAILQ_FIRST(&nfsrv_sockwork))) { 1047 lck_rw_lock_exclusive(&slp->ns_rwlock); 1048 /* remove from the head of the queue */ 1049 TAILQ_REMOVE(&nfsrv_sockwork, slp, ns_svcq); 1050 slp->ns_flag &= ~SLP_WORKQ; 1051 if ((slp->ns_flag & SLP_VALID) && (slp->ns_flag & SLP_WORKTODO)) 1052 break; 1053 /* nothing to do, so skip this socket */ 1054 lck_rw_done(&slp->ns_rwlock); 1055 } 1056 } 1057 if (!nfsd->nfsd_slp && slp) { 1058 /* we found a socket to work on, grab a reference */ 1059 slp->ns_sref++; 1060 nfsd->nfsd_slp = slp; 1061 opcnt = 0; 1062 /* and put it at the back of the work queue */ 1063 TAILQ_INSERT_TAIL(&nfsrv_sockwork, slp, ns_svcq); 1064 slp->ns_flag |= SLP_WORKQ; 1065 lck_rw_done(&slp->ns_rwlock); 1066 } 1067 lck_mtx_unlock(nfsd_mutex); 1068 if (!slp) 1069 continue; 1070 lck_rw_lock_exclusive(&slp->ns_rwlock); 1071 if (slp->ns_flag & SLP_VALID) { 1072 if ((slp->ns_flag & (SLP_NEEDQ|SLP_DISCONN)) == SLP_NEEDQ) { 1073 slp->ns_flag &= ~SLP_NEEDQ; 1074 nfsrv_rcv_locked(slp->ns_so, slp, MBUF_WAITOK); 1075 } 1076 if (slp->ns_flag & SLP_DISCONN) 1077 nfsrv_zapsock(slp); 1078 error = nfsrv_dorec(slp, nfsd, &nd); 1079 if (error == EINVAL) { // RPCSEC_GSS drop 1080 if (slp->ns_sotype == SOCK_STREAM) 1081 nfsrv_zapsock(slp); // drop connection 1082 } 1083 writes_todo = 0; 1084 if (error && (slp->ns_wgtime || (slp->ns_flag & SLP_DOWRITES))) { 1085 microuptime(&now); 1086 cur_usec = (u_quad_t)now.tv_sec * 1000000 + 1087 (u_quad_t)now.tv_usec; 1088 if (slp->ns_wgtime <= cur_usec) { 1089 error = 0; 1090 cacherep = RC_DOIT; 1091 writes_todo = 1; 1092 } 1093 slp->ns_flag &= ~SLP_DOWRITES; 1094 } 1095 nfsd->nfsd_flag |= NFSD_REQINPROG; 1096 } 1097 lck_rw_done(&slp->ns_rwlock); 1098 } 1099 if (error || (slp && !(slp->ns_flag & SLP_VALID))) { 1100 if (nd) { 1101 nfsm_chain_cleanup(&nd->nd_nmreq); 1102 if (nd->nd_nam2) 1103 mbuf_freem(nd->nd_nam2); 1104 if (IS_VALID_CRED(nd->nd_cr)) 1105 kauth_cred_unref(&nd->nd_cr); 1106 if (nd->nd_gss_context) 1107 nfs_gss_svc_ctx_deref(nd->nd_gss_context); 1108 FREE_ZONE(nd, sizeof(*nd), M_NFSRVDESC); 1109 nd = NULL; 1110 } 1111 nfsd->nfsd_slp = NULL; 1112 nfsd->nfsd_flag &= ~NFSD_REQINPROG; 1113 if (slp) 1114 nfsrv_slpderef(slp); 1115 if (nfsd_thread_max <= 0) 1116 break; 1117 continue; 1118 } 1119 if (nd) { 1120 microuptime(&nd->nd_starttime); 1121 if (nd->nd_nam2) 1122 nd->nd_nam = nd->nd_nam2; 1123 else 1124 nd->nd_nam = slp->ns_nam; 1125 1126 cacherep = nfsrv_getcache(nd, slp, &mrep); 1127 1128 if (nfsrv_require_resv_port) { 1129 /* Check if source port is a reserved port */ 1130 in_port_t port = 0; 1131 struct sockaddr *saddr = mbuf_data(nd->nd_nam); 1132 1133 if (saddr->sa_family == AF_INET) 1134 port = ntohs(((struct sockaddr_in*)saddr)->sin_port); 1135 else if (saddr->sa_family == AF_INET6) 1136 port = ntohs(((struct sockaddr_in6*)saddr)->sin6_port); 1137 if ((port >= IPPORT_RESERVED) && (nd->nd_procnum != NFSPROC_NULL)) { 1138 nd->nd_procnum = NFSPROC_NOOP; 1139 nd->nd_repstat = (NFSERR_AUTHERR | AUTH_TOOWEAK); 1140 cacherep = RC_DOIT; 1141 } 1142 } 1143 1144 } 1145 1146 /* 1147 * Loop to get all the write RPC replies that have been 1148 * gathered together. 1149 */ 1150 do { 1151 switch (cacherep) { 1152 case RC_DOIT: 1153 if (nd && (nd->nd_vers == NFS_VER3)) 1154 procrastinate = nfsrv_wg_delay_v3; 1155 else 1156 procrastinate = nfsrv_wg_delay; 1157 lck_rw_lock_shared(&nfsrv_export_rwlock); 1158 context.vc_ucred = NULL; 1159 if (writes_todo || ((nd->nd_procnum == NFSPROC_WRITE) && (procrastinate > 0))) 1160 error = nfsrv_writegather(&nd, slp, &context, &mrep); 1161 else 1162 error = (*(nfsrv_procs[nd->nd_procnum]))(nd, slp, &context, &mrep); 1163 lck_rw_done(&nfsrv_export_rwlock); 1164 if (mrep == NULL) { 1165 /* 1166 * If this is a stream socket and we are not going 1167 * to send a reply we better close the connection 1168 * so the client doesn't hang. 1169 */ 1170 if (error && slp->ns_sotype == SOCK_STREAM) { 1171 lck_rw_lock_exclusive(&slp->ns_rwlock); 1172 nfsrv_zapsock(slp); 1173 lck_rw_done(&slp->ns_rwlock); 1174 printf("NFS server: NULL reply from proc = %d error = %d\n", 1175 nd->nd_procnum, error); 1176 } 1177 break; 1178 1179 } 1180 if (error) { 1181 OSAddAtomic64(1, &nfsstats.srv_errs); 1182 nfsrv_updatecache(nd, FALSE, mrep); 1183 if (nd->nd_nam2) { 1184 mbuf_freem(nd->nd_nam2); 1185 nd->nd_nam2 = NULL; 1186 } 1187 break; 1188 } 1189 OSAddAtomic64(1, &nfsstats.srvrpccnt[nd->nd_procnum]); 1190 nfsrv_updatecache(nd, TRUE, mrep); 1191 /* FALLTHRU */ 1192 1193 case RC_REPLY: 1194 if (nd->nd_gss_mb != NULL) { // It's RPCSEC_GSS 1195 /* 1196 * Need to checksum or encrypt the reply 1197 */ 1198 error = nfs_gss_svc_protect_reply(nd, mrep); 1199 if (error) { 1200 mbuf_freem(mrep); 1201 break; 1202 } 1203 } 1204 1205 /* 1206 * Get the total size of the reply 1207 */ 1208 m = mrep; 1209 siz = 0; 1210 while (m) { 1211 siz += mbuf_len(m); 1212 m = mbuf_next(m); 1213 } 1214 if (siz <= 0 || siz > NFS_MAXPACKET) { 1215 printf("mbuf siz=%d\n",siz); 1216 panic("Bad nfs svc reply"); 1217 } 1218 m = mrep; 1219 mbuf_pkthdr_setlen(m, siz); 1220 error = mbuf_pkthdr_setrcvif(m, NULL); 1221 if (error) 1222 panic("nfsd setrcvif failed: %d", error); 1223 /* 1224 * For stream protocols, prepend a Sun RPC 1225 * Record Mark. 1226 */ 1227 if (slp->ns_sotype == SOCK_STREAM) { 1228 error = mbuf_prepend(&m, NFSX_UNSIGNED, MBUF_WAITOK); 1229 if (!error) 1230 *(u_int32_t*)mbuf_data(m) = htonl(0x80000000 | siz); 1231 } 1232 if (!error) { 1233 if (slp->ns_flag & SLP_VALID) { 1234 error = nfsrv_send(slp, nd->nd_nam2, m); 1235 } else { 1236 error = EPIPE; 1237 mbuf_freem(m); 1238 } 1239 } else { 1240 mbuf_freem(m); 1241 } 1242 mrep = NULL; 1243 if (nd->nd_nam2) { 1244 mbuf_freem(nd->nd_nam2); 1245 nd->nd_nam2 = NULL; 1246 } 1247 if (error == EPIPE) { 1248 lck_rw_lock_exclusive(&slp->ns_rwlock); 1249 nfsrv_zapsock(slp); 1250 lck_rw_done(&slp->ns_rwlock); 1251 } 1252 if (error == EINTR || error == ERESTART) { 1253 nfsm_chain_cleanup(&nd->nd_nmreq); 1254 if (IS_VALID_CRED(nd->nd_cr)) 1255 kauth_cred_unref(&nd->nd_cr); 1256 if (nd->nd_gss_context) 1257 nfs_gss_svc_ctx_deref(nd->nd_gss_context); 1258 FREE_ZONE(nd, sizeof(*nd), M_NFSRVDESC); 1259 nfsrv_slpderef(slp); 1260 lck_mtx_lock(nfsd_mutex); 1261 goto done; 1262 } 1263 break; 1264 case RC_DROPIT: 1265 mbuf_freem(nd->nd_nam2); 1266 nd->nd_nam2 = NULL; 1267 break; 1268 }; 1269 opcnt++; 1270 if (nd) { 1271 nfsm_chain_cleanup(&nd->nd_nmreq); 1272 if (nd->nd_nam2) 1273 mbuf_freem(nd->nd_nam2); 1274 if (IS_VALID_CRED(nd->nd_cr)) 1275 kauth_cred_unref(&nd->nd_cr); 1276 if (nd->nd_gss_context) 1277 nfs_gss_svc_ctx_deref(nd->nd_gss_context); 1278 FREE_ZONE(nd, sizeof(*nd), M_NFSRVDESC); 1279 nd = NULL; 1280 } 1281 1282 /* 1283 * Check to see if there are outstanding writes that 1284 * need to be serviced. 1285 */ 1286 writes_todo = 0; 1287 if (slp->ns_wgtime) { 1288 microuptime(&now); 1289 cur_usec = (u_quad_t)now.tv_sec * 1000000 + 1290 (u_quad_t)now.tv_usec; 1291 if (slp->ns_wgtime <= cur_usec) { 1292 cacherep = RC_DOIT; 1293 writes_todo = 1; 1294 } 1295 } 1296 } while (writes_todo); 1297 1298 nd = NULL; 1299 if (TAILQ_EMPTY(&nfsrv_sockwait) && (opcnt < 8)) { 1300 lck_rw_lock_exclusive(&slp->ns_rwlock); 1301 error = nfsrv_dorec(slp, nfsd, &nd); 1302 if (error == EINVAL) { // RPCSEC_GSS drop 1303 if (slp->ns_sotype == SOCK_STREAM) 1304 nfsrv_zapsock(slp); // drop connection 1305 } 1306 lck_rw_done(&slp->ns_rwlock); 1307 } 1308 if (!nd) { 1309 /* drop our reference on the socket */ 1310 nfsd->nfsd_flag &= ~NFSD_REQINPROG; 1311 nfsd->nfsd_slp = NULL; 1312 nfsrv_slpderef(slp); 1313 } 1314 } 1315 lck_mtx_lock(nfsd_mutex); 1316done: 1317 TAILQ_REMOVE(&nfsd_head, nfsd, nfsd_chain); 1318 FREE(nfsd, M_NFSD); 1319 if (--nfsd_thread_count == 0) 1320 nfsrv_cleanup(); 1321 lck_mtx_unlock(nfsd_mutex); 1322 return (error); 1323} 1324 1325int 1326nfssvc_export(user_addr_t argp) 1327{ 1328 int error = 0, is_64bit; 1329 struct user_nfs_export_args unxa; 1330 vfs_context_t ctx = vfs_context_current(); 1331 1332 is_64bit = IS_64BIT_PROCESS(vfs_context_proc(ctx)); 1333 1334 /* copy in pointers to path and export args */ 1335 if (is_64bit) { 1336 error = copyin(argp, (caddr_t)&unxa, sizeof(unxa)); 1337 } else { 1338 struct nfs_export_args tnxa; 1339 error = copyin(argp, (caddr_t)&tnxa, sizeof(tnxa)); 1340 if (error == 0) { 1341 /* munge into LP64 version of nfs_export_args structure */ 1342 unxa.nxa_fsid = tnxa.nxa_fsid; 1343 unxa.nxa_expid = tnxa.nxa_expid; 1344 unxa.nxa_fspath = CAST_USER_ADDR_T(tnxa.nxa_fspath); 1345 unxa.nxa_exppath = CAST_USER_ADDR_T(tnxa.nxa_exppath); 1346 unxa.nxa_flags = tnxa.nxa_flags; 1347 unxa.nxa_netcount = tnxa.nxa_netcount; 1348 unxa.nxa_nets = CAST_USER_ADDR_T(tnxa.nxa_nets); 1349 } 1350 } 1351 if (error) 1352 return (error); 1353 1354 error = nfsrv_export(&unxa, ctx); 1355 1356 return (error); 1357} 1358 1359/* 1360 * Shut down a socket associated with an nfsrv_sock structure. 1361 * Should be called with the send lock set, if required. 1362 * The trick here is to increment the sref at the start, so that the nfsds 1363 * will stop using it and clear ns_flag at the end so that it will not be 1364 * reassigned during cleanup. 1365 */ 1366void 1367nfsrv_zapsock(struct nfsrv_sock *slp) 1368{ 1369 socket_t so; 1370 1371 if ((slp->ns_flag & SLP_VALID) == 0) 1372 return; 1373 slp->ns_flag &= ~SLP_ALLFLAGS; 1374 1375 so = slp->ns_so; 1376 if (so == NULL) 1377 return; 1378 1379 /* 1380 * Attempt to deter future up-calls, but leave the 1381 * up-call info in place to avoid a race with the 1382 * networking code. 1383 */ 1384 socket_lock(so, 1); 1385 so->so_rcv.sb_flags &= ~SB_UPCALL; 1386 socket_unlock(so, 1); 1387 1388 sock_shutdown(so, SHUT_RDWR); 1389 1390 /* 1391 * Remove from the up-call queue 1392 */ 1393 nfsrv_uc_dequeue(slp); 1394} 1395 1396/* 1397 * cleanup and release a server socket structure. 1398 */ 1399void 1400nfsrv_slpfree(struct nfsrv_sock *slp) 1401{ 1402 struct nfsrv_descript *nwp, *nnwp; 1403 1404 if (slp->ns_so) { 1405 sock_release(slp->ns_so); 1406 slp->ns_so = NULL; 1407 } 1408 if (slp->ns_nam) 1409 mbuf_free(slp->ns_nam); 1410 if (slp->ns_raw) 1411 mbuf_freem(slp->ns_raw); 1412 if (slp->ns_rec) 1413 mbuf_freem(slp->ns_rec); 1414 if (slp->ns_frag) 1415 mbuf_freem(slp->ns_frag); 1416 slp->ns_nam = slp->ns_raw = slp->ns_rec = slp->ns_frag = NULL; 1417 slp->ns_reccnt = 0; 1418 1419 if (slp->ns_ua) 1420 FREE(slp->ns_ua, M_NFSSVC); 1421 1422 for (nwp = slp->ns_tq.lh_first; nwp; nwp = nnwp) { 1423 nnwp = nwp->nd_tq.le_next; 1424 LIST_REMOVE(nwp, nd_tq); 1425 nfsm_chain_cleanup(&nwp->nd_nmreq); 1426 if (nwp->nd_mrep) 1427 mbuf_freem(nwp->nd_mrep); 1428 if (nwp->nd_nam2) 1429 mbuf_freem(nwp->nd_nam2); 1430 if (IS_VALID_CRED(nwp->nd_cr)) 1431 kauth_cred_unref(&nwp->nd_cr); 1432 if (nwp->nd_gss_context) 1433 nfs_gss_svc_ctx_deref(nwp->nd_gss_context); 1434 FREE_ZONE(nwp, sizeof(*nwp), M_NFSRVDESC); 1435 } 1436 LIST_INIT(&slp->ns_tq); 1437 1438 lck_rw_destroy(&slp->ns_rwlock, nfsrv_slp_rwlock_group); 1439 lck_mtx_destroy(&slp->ns_wgmutex, nfsrv_slp_mutex_group); 1440 FREE(slp, M_NFSSVC); 1441} 1442 1443/* 1444 * Derefence a server socket structure. If it has no more references and 1445 * is no longer valid, you can throw it away. 1446 */ 1447void 1448nfsrv_slpderef(struct nfsrv_sock *slp) 1449{ 1450 struct timeval now; 1451 1452 lck_mtx_lock(nfsd_mutex); 1453 lck_rw_lock_exclusive(&slp->ns_rwlock); 1454 slp->ns_sref--; 1455 1456 if (slp->ns_sref || (slp->ns_flag & SLP_VALID)) { 1457 if ((slp->ns_flag & SLP_QUEUED) && !(slp->ns_flag & SLP_WORKTODO)) { 1458 /* remove socket from queue since there's no work */ 1459 if (slp->ns_flag & SLP_WAITQ) 1460 TAILQ_REMOVE(&nfsrv_sockwait, slp, ns_svcq); 1461 else 1462 TAILQ_REMOVE(&nfsrv_sockwork, slp, ns_svcq); 1463 slp->ns_flag &= ~SLP_QUEUED; 1464 } 1465 lck_rw_done(&slp->ns_rwlock); 1466 lck_mtx_unlock(nfsd_mutex); 1467 return; 1468 } 1469 1470 /* This socket is no longer valid, so we'll get rid of it */ 1471 1472 if (slp->ns_flag & SLP_QUEUED) { 1473 if (slp->ns_flag & SLP_WAITQ) 1474 TAILQ_REMOVE(&nfsrv_sockwait, slp, ns_svcq); 1475 else 1476 TAILQ_REMOVE(&nfsrv_sockwork, slp, ns_svcq); 1477 slp->ns_flag &= ~SLP_QUEUED; 1478 } 1479 1480 /* 1481 * Queue the socket up for deletion 1482 * and start the timer to delete it 1483 * after it has been in limbo for 1484 * a while. 1485 */ 1486 microuptime(&now); 1487 slp->ns_timestamp = now.tv_sec; 1488 TAILQ_REMOVE(&nfsrv_socklist, slp, ns_chain); 1489 TAILQ_INSERT_TAIL(&nfsrv_deadsocklist, slp, ns_chain); 1490 if (!nfsrv_deadsock_timer_on) { 1491 nfsrv_deadsock_timer_on = 1; 1492 nfs_interval_timer_start(nfsrv_deadsock_timer_call, 1493 NFSRV_DEADSOCKDELAY * 1000); 1494 } 1495 1496 lck_rw_done(&slp->ns_rwlock); 1497 /* now remove from the write gather socket list */ 1498 if (slp->ns_wgq.tqe_next != SLPNOLIST) { 1499 TAILQ_REMOVE(&nfsrv_sockwg, slp, ns_wgq); 1500 slp->ns_wgq.tqe_next = SLPNOLIST; 1501 } 1502 lck_mtx_unlock(nfsd_mutex); 1503} 1504 1505/* 1506 * Check periodically for dead sockets pending delete. 1507 * If a socket has been dead for more than NFSRV_DEADSOCKDELAY 1508 * seconds then we assume it's safe to free. 1509 */ 1510void 1511nfsrv_deadsock_timer(__unused void *param0, __unused void *param1) 1512{ 1513 struct nfsrv_sock *slp; 1514 struct timeval now; 1515 time_t time_to_wait; 1516 1517 microuptime(&now); 1518 lck_mtx_lock(nfsd_mutex); 1519 1520 while ((slp = TAILQ_FIRST(&nfsrv_deadsocklist))) { 1521 if ((slp->ns_timestamp + NFSRV_DEADSOCKDELAY) > now.tv_sec) 1522 break; 1523 TAILQ_REMOVE(&nfsrv_deadsocklist, slp, ns_chain); 1524 nfsrv_slpfree(slp); 1525 } 1526 if (TAILQ_EMPTY(&nfsrv_deadsocklist)) { 1527 nfsrv_deadsock_timer_on = 0; 1528 lck_mtx_unlock(nfsd_mutex); 1529 return; 1530 } 1531 time_to_wait = (slp->ns_timestamp + NFSRV_DEADSOCKDELAY) - now.tv_sec; 1532 if (time_to_wait < 1) 1533 time_to_wait = 1; 1534 1535 lck_mtx_unlock(nfsd_mutex); 1536 1537 nfs_interval_timer_start(nfsrv_deadsock_timer_call, 1538 time_to_wait * 1000); 1539} 1540 1541/* 1542 * Clean up the data structures for the server. 1543 */ 1544void 1545nfsrv_cleanup(void) 1546{ 1547 struct nfsrv_sock *slp, *nslp; 1548 struct timeval now; 1549#if CONFIG_FSE 1550 struct nfsrv_fmod *fp, *nfp; 1551 int i; 1552#endif 1553 1554 microuptime(&now); 1555 for (slp = TAILQ_FIRST(&nfsrv_socklist); slp != 0; slp = nslp) { 1556 nslp = TAILQ_NEXT(slp, ns_chain); 1557 if (slp->ns_flag & SLP_VALID) { 1558 lck_rw_lock_exclusive(&slp->ns_rwlock); 1559 nfsrv_zapsock(slp); 1560 lck_rw_done(&slp->ns_rwlock); 1561 } 1562 if (slp->ns_flag & SLP_QUEUED) { 1563 if (slp->ns_flag & SLP_WAITQ) 1564 TAILQ_REMOVE(&nfsrv_sockwait, slp, ns_svcq); 1565 else 1566 TAILQ_REMOVE(&nfsrv_sockwork, slp, ns_svcq); 1567 slp->ns_flag &= ~SLP_QUEUED; 1568 } 1569 if (slp->ns_wgq.tqe_next != SLPNOLIST) { 1570 TAILQ_REMOVE(&nfsrv_sockwg, slp, ns_wgq); 1571 slp->ns_wgq.tqe_next = SLPNOLIST; 1572 } 1573 /* queue the socket up for deletion */ 1574 slp->ns_timestamp = now.tv_sec; 1575 TAILQ_REMOVE(&nfsrv_socklist, slp, ns_chain); 1576 TAILQ_INSERT_TAIL(&nfsrv_deadsocklist, slp, ns_chain); 1577 if (!nfsrv_deadsock_timer_on) { 1578 nfsrv_deadsock_timer_on = 1; 1579 nfs_interval_timer_start(nfsrv_deadsock_timer_call, 1580 NFSRV_DEADSOCKDELAY * 1000); 1581 } 1582 } 1583 1584#if CONFIG_FSE 1585 /* 1586 * Flush pending file write fsevents 1587 */ 1588 lck_mtx_lock(nfsrv_fmod_mutex); 1589 for (i = 0; i < NFSRVFMODHASHSZ; i++) { 1590 for (fp = LIST_FIRST(&nfsrv_fmod_hashtbl[i]); fp; fp = nfp) { 1591 /* 1592 * Fire off the content modified fsevent for each 1593 * entry, remove it from the list, and free it. 1594 */ 1595 if (nfsrv_fsevents_enabled) { 1596 fp->fm_context.vc_thread = current_thread(); 1597 add_fsevent(FSE_CONTENT_MODIFIED, &fp->fm_context, 1598 FSE_ARG_VNODE, fp->fm_vp, 1599 FSE_ARG_DONE); 1600 } 1601 vnode_put(fp->fm_vp); 1602 kauth_cred_unref(&fp->fm_context.vc_ucred); 1603 nfp = LIST_NEXT(fp, fm_link); 1604 LIST_REMOVE(fp, fm_link); 1605 FREE(fp, M_TEMP); 1606 } 1607 } 1608 nfsrv_fmod_pending = 0; 1609 lck_mtx_unlock(nfsrv_fmod_mutex); 1610#endif 1611 1612 nfsrv_uc_cleanup(); /* Stop nfs socket up-call threads */ 1613 1614 nfs_gss_svc_cleanup(); /* Remove any RPCSEC_GSS contexts */ 1615 1616 nfsrv_cleancache(); /* And clear out server cache */ 1617 1618 nfsrv_udpsock = NULL; 1619 nfsrv_udp6sock = NULL; 1620} 1621 1622#endif /* NFS_NOSERVER */ 1623