1/* 2 * Copyright (c) 2002-2007 Apple Inc. All rights reserved. 3 * 4 * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ 5 * 6 * This file contains Original Code and/or Modifications of Original Code 7 * as defined in and that are subject to the Apple Public Source License 8 * Version 2.0 (the 'License'). You may not use this file except in 9 * compliance with the License. The rights granted to you under the License 10 * may not be used to create, or enable the creation or redistribution of, 11 * unlawful or unlicensed copies of an Apple operating system, or to 12 * circumvent, violate, or enable the circumvention or violation of, any 13 * terms of an Apple operating system software license agreement. 14 * 15 * Please obtain a copy of the License at 16 * http://www.opensource.apple.com/apsl/ and read it before using this file. 17 * 18 * The Original Code and all software distributed under the License are 19 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER 20 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, 21 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, 22 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT. 23 * Please see the License for the specific language governing rights and 24 * limitations under the License. 25 * 26 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@ 27 */ 28/*- 29 * Copyright (c) 1997 Berkeley Software Design, Inc. All rights reserved. 30 * 31 * Redistribution and use in source and binary forms, with or without 32 * modification, are permitted provided that the following conditions 33 * are met: 34 * 1. Redistributions of source code must retain the above copyright 35 * notice, this list of conditions and the following disclaimer. 36 * 2. Redistributions in binary form must reproduce the above copyright 37 * notice, this list of conditions and the following disclaimer in the 38 * documentation and/or other materials provided with the distribution. 39 * 3. Berkeley Software Design Inc's name may not be used to endorse or 40 * promote products derived from this software without specific prior 41 * written permission. 42 * 43 * THIS SOFTWARE IS PROVIDED BY BERKELEY SOFTWARE DESIGN INC ``AS IS'' AND 44 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 45 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 46 * ARE DISCLAIMED. IN NO EVENT SHALL BERKELEY SOFTWARE DESIGN INC BE LIABLE 47 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 48 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 49 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 50 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 51 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 52 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 53 * SUCH DAMAGE. 54 * 55 * from BSDI nfs_lock.c,v 2.4 1998/12/14 23:49:56 jch Exp 56 */ 57 58#include <sys/cdefs.h> 59#include <sys/param.h> 60#include <sys/systm.h> 61#include <sys/fcntl.h> 62#include <sys/kernel.h> /* for hz */ 63#include <sys/file_internal.h> 64#include <sys/malloc.h> 65#include <sys/lockf.h> /* for hz */ /* Must come after sys/malloc.h */ 66#include <sys/kpi_mbuf.h> 67#include <sys/mount_internal.h> 68#include <sys/proc_internal.h> /* for p_start */ 69#include <sys/kauth.h> 70#include <sys/resourcevar.h> 71#include <sys/socket.h> 72#include <sys/unistd.h> 73#include <sys/user.h> 74#include <sys/vnode_internal.h> 75 76#include <kern/thread.h> 77#include <kern/host.h> 78 79#include <machine/limits.h> 80 81#include <net/if.h> 82 83#include <nfs/rpcv2.h> 84#include <nfs/nfsproto.h> 85#include <nfs/nfs.h> 86#include <nfs/nfs_gss.h> 87#include <nfs/nfsmount.h> 88#include <nfs/nfsnode.h> 89#include <nfs/nfs_lock.h> 90 91#include <mach/host_priv.h> 92#include <mach/mig_errors.h> 93#include <mach/host_special_ports.h> 94#include <lockd/lockd_mach.h> 95 96extern void ipc_port_release_send(ipc_port_t); 97 98#define OFF_MAX QUAD_MAX 99 100/* 101 * pending lock request messages are kept in this queue which is 102 * kept sorted by transaction ID (xid). 103 */ 104static uint64_t nfs_lockxid = 0; 105static LOCKD_MSG_QUEUE nfs_pendlockq; 106 107/* 108 * This structure is used to identify processes which have acquired NFS locks. 109 * Knowing which processes have ever acquired locks allows us to short-circuit 110 * unlock requests for processes that have never had an NFS file lock. Thus 111 * avoiding a costly and unnecessary lockd request. 112 */ 113struct nfs_lock_pid { 114 TAILQ_ENTRY(nfs_lock_pid) lp_lru; /* LRU list */ 115 LIST_ENTRY(nfs_lock_pid) lp_hash; /* hash chain */ 116 int lp_valid; /* valid entry? */ 117 int lp_time; /* last time seen valid */ 118 pid_t lp_pid; /* The process ID. */ 119 struct timeval lp_pid_start; /* Start time of process id */ 120}; 121 122#define NFS_LOCK_PID_HASH_SIZE 64 // XXX tune me 123#define NFS_LOCK_PID_HASH(pid) \ 124 (&nfs_lock_pid_hash_tbl[(pid) & nfs_lock_pid_hash]) 125static LIST_HEAD(, nfs_lock_pid) *nfs_lock_pid_hash_tbl; 126static TAILQ_HEAD(, nfs_lock_pid) nfs_lock_pid_lru; 127static u_long nfs_lock_pid_hash, nfs_lock_pid_hash_trusted; 128 129static lck_grp_t *nfs_lock_lck_grp; 130static lck_mtx_t *nfs_lock_mutex; 131 132 133/* 134 * initialize global nfs lock state 135 */ 136void 137nfs_lockinit(void) 138{ 139 TAILQ_INIT(&nfs_pendlockq); 140 nfs_lock_pid_hash_trusted = 1; 141 nfs_lock_pid_hash_tbl = hashinit(NFS_LOCK_PID_HASH_SIZE, 142 M_TEMP, &nfs_lock_pid_hash); 143 TAILQ_INIT(&nfs_lock_pid_lru); 144 145 nfs_lock_lck_grp = lck_grp_alloc_init("nfs_lock", LCK_GRP_ATTR_NULL); 146 nfs_lock_mutex = lck_mtx_alloc_init(nfs_lock_lck_grp, LCK_ATTR_NULL); 147} 148 149/* 150 * change the count of NFS mounts that may need to make lockd requests 151 * 152 * If the mount count drops to zero, then send a shutdown request to 153 * lockd if we've sent any requests to it. 154 */ 155void 156nfs_lockd_mount_change(int i) 157{ 158 mach_port_t lockd_port = IPC_PORT_NULL; 159 kern_return_t kr; 160 int send_shutdown; 161 162 lck_mtx_lock(nfs_lock_mutex); 163 164 nfs_lockd_mounts += i; 165 166 /* send a shutdown request if there are no more lockd mounts */ 167 send_shutdown = ((nfs_lockd_mounts == 0) && nfs_lockd_request_sent); 168 if (send_shutdown) 169 nfs_lockd_request_sent = 0; 170 171 lck_mtx_unlock(nfs_lock_mutex); 172 173 if (!send_shutdown) 174 return; 175 176 /* 177 * Let lockd know that it is no longer need for any NFS mounts 178 */ 179 kr = host_get_lockd_port(host_priv_self(), &lockd_port); 180 if ((kr != KERN_SUCCESS) || !IPC_PORT_VALID(lockd_port)) { 181 printf("nfs_lockd_mount_change: shutdown couldn't get port, kr %d, port %s\n", 182 kr, (lockd_port == IPC_PORT_NULL) ? "NULL" : 183 (lockd_port == IPC_PORT_DEAD) ? "DEAD" : "VALID"); 184 return; 185 } 186 187 kr = lockd_shutdown(lockd_port); 188 if (kr != KERN_SUCCESS) 189 printf("nfs_lockd_mount_change: shutdown %d\n", kr); 190 191 ipc_port_release_send(lockd_port); 192} 193 194/* 195 * insert a lock request message into the pending queue 196 * (nfs_lock_mutex must be held) 197 */ 198static inline void 199nfs_lockdmsg_enqueue(LOCKD_MSG_REQUEST *msgreq) 200{ 201 LOCKD_MSG_REQUEST *mr; 202 203 mr = TAILQ_LAST(&nfs_pendlockq, nfs_lock_msg_queue); 204 if (!mr || (msgreq->lmr_msg.lm_xid > mr->lmr_msg.lm_xid)) { 205 /* fast path: empty queue or new largest xid */ 206 TAILQ_INSERT_TAIL(&nfs_pendlockq, msgreq, lmr_next); 207 return; 208 } 209 /* slow path: need to walk list to find insertion point */ 210 while (mr && (msgreq->lmr_msg.lm_xid > mr->lmr_msg.lm_xid)) { 211 mr = TAILQ_PREV(mr, nfs_lock_msg_queue, lmr_next); 212 } 213 if (mr) { 214 TAILQ_INSERT_AFTER(&nfs_pendlockq, mr, msgreq, lmr_next); 215 } else { 216 TAILQ_INSERT_HEAD(&nfs_pendlockq, msgreq, lmr_next); 217 } 218} 219 220/* 221 * remove a lock request message from the pending queue 222 * (nfs_lock_mutex must be held) 223 */ 224static inline void 225nfs_lockdmsg_dequeue(LOCKD_MSG_REQUEST *msgreq) 226{ 227 TAILQ_REMOVE(&nfs_pendlockq, msgreq, lmr_next); 228} 229 230/* 231 * find a pending lock request message by xid 232 * 233 * We search from the head of the list assuming that the message we're 234 * looking for is for an older request (because we have an answer to it). 235 * This assumes that lock request will be answered primarily in FIFO order. 236 * However, this may not be the case if there are blocked requests. We may 237 * want to move blocked requests to a separate queue (but that'll complicate 238 * duplicate xid checking). 239 * 240 * (nfs_lock_mutex must be held) 241 */ 242static inline LOCKD_MSG_REQUEST * 243nfs_lockdmsg_find_by_xid(uint64_t lockxid) 244{ 245 LOCKD_MSG_REQUEST *mr; 246 247 TAILQ_FOREACH(mr, &nfs_pendlockq, lmr_next) { 248 if (mr->lmr_msg.lm_xid == lockxid) 249 return mr; 250 if (mr->lmr_msg.lm_xid > lockxid) 251 return NULL; 252 } 253 return mr; 254} 255 256/* 257 * Because we can't depend on nlm_granted messages containing the same 258 * cookie we sent with the original lock request, we need code test if 259 * an nlm_granted answer matches the lock request. We also need code 260 * that can find a lockd message based solely on the nlm_granted answer. 261 */ 262 263/* 264 * compare lockd message to answer 265 * 266 * returns 0 on equality and 1 if different 267 */ 268static inline int 269nfs_lockdmsg_compare_to_answer(LOCKD_MSG_REQUEST *msgreq, struct lockd_ans *ansp) 270{ 271 if (!(ansp->la_flags & LOCKD_ANS_LOCK_INFO)) 272 return 1; 273 if (msgreq->lmr_msg.lm_fl.l_pid != ansp->la_pid) 274 return 1; 275 if (msgreq->lmr_msg.lm_fl.l_start != ansp->la_start) 276 return 1; 277 if (msgreq->lmr_msg.lm_fl.l_len != ansp->la_len) 278 return 1; 279 if (msgreq->lmr_msg.lm_fh_len != ansp->la_fh_len) 280 return 1; 281 if (bcmp(msgreq->lmr_msg.lm_fh, ansp->la_fh, ansp->la_fh_len)) 282 return 1; 283 return 0; 284} 285 286/* 287 * find a pending lock request message based on the lock info provided 288 * in the lockd_ans/nlm_granted data. We need this because we can't 289 * depend on nlm_granted messages containing the same cookie we sent 290 * with the original lock request. 291 * 292 * We search from the head of the list assuming that the message we're 293 * looking for is for an older request (because we have an answer to it). 294 * This assumes that lock request will be answered primarily in FIFO order. 295 * However, this may not be the case if there are blocked requests. We may 296 * want to move blocked requests to a separate queue (but that'll complicate 297 * duplicate xid checking). 298 * 299 * (nfs_lock_mutex must be held) 300 */ 301static inline LOCKD_MSG_REQUEST * 302nfs_lockdmsg_find_by_answer(struct lockd_ans *ansp) 303{ 304 LOCKD_MSG_REQUEST *mr; 305 306 if (!(ansp->la_flags & LOCKD_ANS_LOCK_INFO)) 307 return NULL; 308 TAILQ_FOREACH(mr, &nfs_pendlockq, lmr_next) { 309 if (!nfs_lockdmsg_compare_to_answer(mr, ansp)) 310 break; 311 } 312 return mr; 313} 314 315/* 316 * return the next unique lock request transaction ID 317 * (nfs_lock_mutex must be held) 318 */ 319static inline uint64_t 320nfs_lockxid_get(void) 321{ 322 LOCKD_MSG_REQUEST *mr; 323 324 /* derive initial lock xid from system time */ 325 if (!nfs_lockxid) { 326 /* 327 * Note: it's OK if this code inits nfs_lockxid to 0 (for example, 328 * due to a broken clock) because we immediately increment it 329 * and we guarantee to never use xid 0. So, nfs_lockxid should only 330 * ever be 0 the first time this function is called. 331 */ 332 struct timeval tv; 333 microtime(&tv); 334 nfs_lockxid = (uint64_t)tv.tv_sec << 12; 335 } 336 337 /* make sure we get a unique xid */ 338 do { 339 /* Skip zero xid if it should ever happen. */ 340 if (++nfs_lockxid == 0) 341 nfs_lockxid++; 342 if (!(mr = TAILQ_LAST(&nfs_pendlockq, nfs_lock_msg_queue)) || 343 (mr->lmr_msg.lm_xid < nfs_lockxid)) { 344 /* fast path: empty queue or new largest xid */ 345 break; 346 } 347 /* check if xid is already in use */ 348 } while (nfs_lockdmsg_find_by_xid(nfs_lockxid)); 349 350 return nfs_lockxid; 351} 352 353 354/* 355 * Check the nfs_lock_pid hash table for an entry and, if requested, 356 * add the entry if it is not found. 357 * 358 * (Also, if adding, try to clean up some stale entries.) 359 * (nfs_lock_mutex must be held) 360 */ 361static int 362nfs_lock_pid_check(proc_t p, int addflag) 363{ 364 struct nfs_lock_pid *lp, *lplru, *lplru_next, *mlp; 365 TAILQ_HEAD(, nfs_lock_pid) nfs_lock_pid_free; 366 proc_t plru = PROC_NULL; 367 pid_t pid; 368 int error = 0; 369 struct timeval now; 370 371 TAILQ_INIT(&nfs_lock_pid_free); 372 mlp = NULL; 373 374loop: 375 /* Search hash chain */ 376 pid = proc_pid(p); 377 error = ENOENT; 378 lp = NFS_LOCK_PID_HASH(pid)->lh_first; 379 for (; lp != NULL; lp = lp->lp_hash.le_next) 380 if (lp->lp_pid == pid) { 381 /* found pid... */ 382 if (timevalcmp(&lp->lp_pid_start, &p->p_start, ==)) { 383 /* ...and it's valid */ 384 /* move to tail of LRU */ 385 TAILQ_REMOVE(&nfs_lock_pid_lru, lp, lp_lru); 386 microuptime(&now); 387 lp->lp_time = now.tv_sec; 388 TAILQ_INSERT_TAIL(&nfs_lock_pid_lru, lp, lp_lru); 389 error = 0; 390 break; 391 } 392 /* ...but it's no longer valid */ 393 /* remove from hash, invalidate, and move to lru head */ 394 LIST_REMOVE(lp, lp_hash); 395 lp->lp_valid = 0; 396 TAILQ_REMOVE(&nfs_lock_pid_lru, lp, lp_lru); 397 TAILQ_INSERT_HEAD(&nfs_lock_pid_lru, lp, lp_lru); 398 lp = NULL; 399 break; 400 } 401 402 /* if we didn't find it (valid), use any newly allocated one */ 403 if (!lp) 404 lp = mlp; 405 406 /* if we don't have an lp and we've been asked to add it */ 407 if ((error == ENOENT) && addflag && !lp) { 408 /* scan lru list for invalid, stale entries to reuse/free */ 409 int lrucnt = 0; 410 microuptime(&now); 411 for (lplru = TAILQ_FIRST(&nfs_lock_pid_lru); lplru; lplru = lplru_next) { 412 lplru_next = TAILQ_NEXT(lplru, lp_lru); 413 if (lplru->lp_valid && (lplru->lp_time >= (now.tv_sec - 2))) { 414 /* 415 * If the oldest LRU entry is relatively new, then don't 416 * bother scanning any further. 417 */ 418 break; 419 } 420 /* remove entry from LRU, and check if it's still in use */ 421 TAILQ_REMOVE(&nfs_lock_pid_lru, lplru, lp_lru); 422 if (!lplru->lp_valid || !(plru = proc_find(lplru->lp_pid)) || 423 timevalcmp(&lplru->lp_pid_start, &plru->p_start, !=)) { 424 if (plru != PROC_NULL) { 425 proc_rele(plru); 426 plru = PROC_NULL; 427 } 428 /* no longer in use */ 429 LIST_REMOVE(lplru, lp_hash); 430 if (!lp) { 431 /* we'll reuse this one */ 432 lp = lplru; 433 } else { 434 /* queue it up for freeing */ 435 TAILQ_INSERT_HEAD(&nfs_lock_pid_free, lplru, lp_lru); 436 } 437 } else { 438 /* still in use */ 439 if (plru != PROC_NULL) { 440 proc_rele(plru); 441 plru = PROC_NULL; 442 } 443 lplru->lp_time = now.tv_sec; 444 TAILQ_INSERT_TAIL(&nfs_lock_pid_lru, lplru, lp_lru); 445 } 446 /* don't check too many entries at once */ 447 if (++lrucnt > 8) 448 break; 449 } 450 if (!lp) { 451 /* we need to allocate a new one */ 452 lck_mtx_unlock(nfs_lock_mutex); 453 MALLOC(mlp, struct nfs_lock_pid *, sizeof(struct nfs_lock_pid), 454 M_TEMP, M_WAITOK | M_ZERO); 455 lck_mtx_lock(nfs_lock_mutex); 456 if (mlp) /* make sure somebody hasn't already added this guy */ 457 goto loop; 458 error = ENOMEM; 459 } 460 } 461 if ((error == ENOENT) && addflag && lp) { 462 /* (re)initialize nfs_lock_pid info */ 463 lp->lp_pid = pid; 464 lp->lp_pid_start = p->p_start; 465 /* insert pid in hash */ 466 LIST_INSERT_HEAD(NFS_LOCK_PID_HASH(lp->lp_pid), lp, lp_hash); 467 lp->lp_valid = 1; 468 lp->lp_time = now.tv_sec; 469 TAILQ_INSERT_TAIL(&nfs_lock_pid_lru, lp, lp_lru); 470 error = 0; 471 } 472 473 if ((mlp && (lp != mlp)) || TAILQ_FIRST(&nfs_lock_pid_free)) { 474 lck_mtx_unlock(nfs_lock_mutex); 475 if (mlp && (lp != mlp)) { 476 /* we didn't need this one, so we can free it */ 477 FREE(mlp, M_TEMP); 478 } 479 /* free up any stale entries */ 480 while ((lp = TAILQ_FIRST(&nfs_lock_pid_free))) { 481 TAILQ_REMOVE(&nfs_lock_pid_free, lp, lp_lru); 482 FREE(lp, M_TEMP); 483 } 484 lck_mtx_lock(nfs_lock_mutex); 485 } 486 487 return (error); 488} 489 490#define MACH_MAX_TRIES 3 491 492static int 493send_request(LOCKD_MSG *msg, int interruptable) 494{ 495 kern_return_t kr; 496 int retries = 0; 497 mach_port_t lockd_port = IPC_PORT_NULL; 498 499 kr = host_get_lockd_port(host_priv_self(), &lockd_port); 500 if (kr != KERN_SUCCESS || !IPC_PORT_VALID(lockd_port)) 501 return (ENOTSUP); 502 503 do { 504 /* In the kernel all mach messaging is interruptable */ 505 do { 506 kr = lockd_request( 507 lockd_port, 508 msg->lm_version, 509 msg->lm_flags, 510 msg->lm_xid, 511 msg->lm_fl.l_start, 512 msg->lm_fl.l_len, 513 msg->lm_fl.l_pid, 514 msg->lm_fl.l_type, 515 msg->lm_fl.l_whence, 516 (uint32_t *)&msg->lm_addr, 517 (uint32_t *)&msg->lm_cred, 518 msg->lm_fh_len, 519 msg->lm_fh); 520 if (kr != KERN_SUCCESS) 521 printf("lockd_request received %d!\n", kr); 522 } while (!interruptable && kr == MACH_SEND_INTERRUPTED); 523 } while (kr == MIG_SERVER_DIED && retries++ < MACH_MAX_TRIES); 524 525 ipc_port_release_send(lockd_port); 526 switch (kr) { 527 case MACH_SEND_INTERRUPTED: 528 return (EINTR); 529 default: 530 /* 531 * Other MACH or MIG errors we will retry. Eventually 532 * we will call nfs_down and allow the user to disable 533 * locking. 534 */ 535 return (EAGAIN); 536 } 537 return (kr); 538} 539 540 541/* 542 * NFS advisory byte-level locks (client) 543 */ 544int 545nfs3_vnop_advlock( 546 struct vnop_advlock_args /* { 547 struct vnodeop_desc *a_desc; 548 vnode_t a_vp; 549 caddr_t a_id; 550 int a_op; 551 struct flock *a_fl; 552 int a_flags; 553 vfs_context_t a_context; 554 } */ *ap) 555{ 556 vfs_context_t ctx; 557 proc_t p; 558 LOCKD_MSG_REQUEST msgreq; 559 LOCKD_MSG *msg; 560 vnode_t vp; 561 nfsnode_t np; 562 int error, error2; 563 int interruptable; 564 struct flock *fl; 565 struct nfsmount *nmp; 566 struct nfs_vattr nvattr; 567 off_t start, end; 568 struct timeval now; 569 int timeo, endtime, lastmsg, wentdown = 0; 570 int lockpidcheck, nfsvers; 571 struct sockaddr *saddr; 572 struct timespec ts; 573 574 ctx = ap->a_context; 575 p = vfs_context_proc(ctx); 576 vp = ap->a_vp; 577 fl = ap->a_fl; 578 np = VTONFS(vp); 579 580 nmp = VTONMP(vp); 581 if (!nmp) 582 return (ENXIO); 583 lck_mtx_lock(&nmp->nm_lock); 584 if (nmp->nm_flag & NFSMNT_NOLOCKS) { 585 lck_mtx_unlock(&nmp->nm_lock); 586 return (ENOTSUP); 587 } 588 nfsvers = nmp->nm_vers; 589 lck_mtx_unlock(&nmp->nm_lock); 590 591 /* 592 * The NLM protocol doesn't allow the server to return an error 593 * on ranges, so we do it. Pre LFS (Large File Summit) 594 * standards required EINVAL for the range errors. More recent 595 * standards use EOVERFLOW, but their EINVAL wording still 596 * encompasses these errors. 597 * Any code sensitive to this is either: 598 * 1) written pre-LFS and so can handle only EINVAL, or 599 * 2) written post-LFS and thus ought to be tolerant of pre-LFS 600 * implementations. 601 * Since returning EOVERFLOW certainly breaks 1), we return EINVAL. 602 */ 603 if (fl->l_whence != SEEK_END) { 604 if ((fl->l_whence != SEEK_CUR && fl->l_whence != SEEK_SET) || 605 fl->l_start < 0 || 606 (fl->l_len > 0 && fl->l_len - 1 > OFF_MAX - fl->l_start) || 607 (fl->l_len < 0 && fl->l_start + fl->l_len < 0)) 608 return (EINVAL); 609 } 610 611 lck_mtx_lock(nfs_lock_mutex); 612 613 /* 614 * Need to check if this process has successfully acquired an NFS lock before. 615 * If not, and this is an unlock request we can simply return success here. 616 */ 617 lockpidcheck = nfs_lock_pid_check(p, 0); 618 lck_mtx_unlock(nfs_lock_mutex); 619 if (lockpidcheck) { 620 if (lockpidcheck != ENOENT) 621 return (lockpidcheck); 622 if ((ap->a_op == F_UNLCK) && nfs_lock_pid_hash_trusted) 623 return (0); 624 } 625 626 /* 627 * The NFS Lock Manager protocol doesn't directly handle 628 * negative lengths or SEEK_END, so we need to normalize 629 * things here where we have all the info. 630 * (Note: SEEK_CUR is already adjusted for at this point) 631 */ 632 /* Convert the flock structure into a start and end. */ 633 switch (fl->l_whence) { 634 case SEEK_SET: 635 case SEEK_CUR: 636 /* 637 * Caller is responsible for adding any necessary offset 638 * to fl->l_start when SEEK_CUR is used. 639 */ 640 start = fl->l_start; 641 break; 642 case SEEK_END: 643 /* need to flush, and refetch attributes to make */ 644 /* sure we have the correct end of file offset */ 645 error = nfs_lock(np, NFS_NODE_LOCK_EXCLUSIVE); 646 if (error) 647 return (error); 648 NATTRINVALIDATE(np); 649 if (np->n_flag & NMODIFIED) { 650 nfs_unlock(np); 651 error = nfs_vinvalbuf(vp, V_SAVE, ctx, 1); 652 if (error) 653 return (error); 654 } else 655 nfs_unlock(np); 656 657 error = nfs_getattr(np, &nvattr, ctx, 0); 658 nfs_data_lock(np, NFS_NODE_LOCK_SHARED); 659 if (!error) 660 error = nfs_lock(np, NFS_NODE_LOCK_SHARED); 661 if (error) { 662 nfs_data_unlock(np); 663 return (error); 664 } 665 start = np->n_size + fl->l_start; 666 nfs_unlock(np); 667 nfs_data_unlock(np); 668 break; 669 default: 670 return (EINVAL); 671 } 672 if (fl->l_len == 0) 673 end = -1; 674 else if (fl->l_len > 0) 675 end = start + fl->l_len - 1; 676 else { /* l_len is negative */ 677 end = start - 1; 678 start += fl->l_len; 679 } 680 if (start < 0) 681 return (EINVAL); 682 683 if ((nfsvers == NFS_VER2) && 684 ((start >= 0x80000000) || (end >= 0x80000000))) 685 return (EINVAL); 686 687 /* 688 * Fill in the information structure. 689 * We set all values to zero with bzero to clear 690 * out any information in the sockaddr_storage 691 * and nfs_filehandle contained in msgreq so that 692 * we will not leak extraneous information out of 693 * the kernel when calling up to lockd via our mig 694 * generated routine. 695 */ 696 bzero(&msgreq, sizeof(msgreq)); 697 msg = &msgreq.lmr_msg; 698 msg->lm_version = LOCKD_MSG_VERSION; 699 msg->lm_flags = 0; 700 701 msg->lm_fl = *fl; 702 msg->lm_fl.l_start = start; 703 if (end != -1) 704 msg->lm_fl.l_len = end - start + 1; 705 msg->lm_fl.l_pid = vfs_context_pid(ctx); 706 707 if (ap->a_flags & F_WAIT) 708 msg->lm_flags |= LOCKD_MSG_BLOCK; 709 if (ap->a_op == F_GETLK) 710 msg->lm_flags |= LOCKD_MSG_TEST; 711 712 nmp = VTONMP(vp); 713 if (!nmp) 714 return (ENXIO); 715 716 lck_mtx_lock(&nmp->nm_lock); 717 saddr = mbuf_data(nmp->nm_nam); 718 bcopy(saddr, &msg->lm_addr, min(sizeof msg->lm_addr, saddr->sa_len)); 719 msg->lm_fh_len = (nfsvers == NFS_VER2) ? NFSX_V2FH : np->n_fhsize; 720 bcopy(np->n_fhp, msg->lm_fh, msg->lm_fh_len); 721 if (nfsvers == NFS_VER3) 722 msg->lm_flags |= LOCKD_MSG_NFSV3; 723 cru2x(vfs_context_ucred(ctx), &msg->lm_cred); 724 725 microuptime(&now); 726 lastmsg = now.tv_sec - ((nmp->nm_tprintf_delay) - (nmp->nm_tprintf_initial_delay)); 727 interruptable = nmp->nm_flag & NFSMNT_INT; 728 lck_mtx_unlock(&nmp->nm_lock); 729 730 lck_mtx_lock(nfs_lock_mutex); 731 732 /* allocate unique xid */ 733 msg->lm_xid = nfs_lockxid_get(); 734 nfs_lockdmsg_enqueue(&msgreq); 735 736 timeo = 2; 737 738 for (;;) { 739 nfs_lockd_request_sent = 1; 740 741 /* need to drop nfs_lock_mutex while calling send_request() */ 742 lck_mtx_unlock(nfs_lock_mutex); 743 error = send_request(msg, interruptable); 744 lck_mtx_lock(nfs_lock_mutex); 745 if (error && error != EAGAIN) 746 break; 747 748 /* 749 * Always wait for an answer. Not waiting for unlocks could 750 * cause a lock to be left if the unlock request gets dropped. 751 */ 752 753 /* 754 * Retry if it takes too long to get a response. 755 * 756 * The timeout numbers were picked out of thin air... they start 757 * at 2 and double each timeout with a max of 60 seconds. 758 * 759 * In order to maintain responsiveness, we pass a small timeout 760 * to msleep and calculate the timeouts ourselves. This allows 761 * us to pick up on mount changes quicker. 762 */ 763wait_for_granted: 764 error = EWOULDBLOCK; 765 ts.tv_sec = 2; 766 ts.tv_nsec = 0; 767 microuptime(&now); 768 endtime = now.tv_sec + timeo; 769 while (now.tv_sec < endtime) { 770 error = error2 = 0; 771 if (!msgreq.lmr_answered) 772 error = msleep(&msgreq, nfs_lock_mutex, PCATCH | PUSER, "lockd", &ts); 773 if (msgreq.lmr_answered) { 774 /* 775 * Note: it's possible to have a lock granted at 776 * essentially the same time that we get interrupted. 777 * Since the lock may be granted, we can't return an 778 * error from this request or we might not unlock the 779 * lock that's been granted. 780 */ 781 nmp = VTONMP(vp); 782 if ((msgreq.lmr_errno == ENOTSUP) && nmp && 783 (nmp->nm_state & NFSSTA_LOCKSWORK)) { 784 /* 785 * We have evidence that locks work, yet lockd 786 * returned ENOTSUP. This is probably because 787 * it was unable to contact the server's lockd 788 * to send it the request. 789 * 790 * Because we know locks work, we'll consider 791 * this failure to be a timeout. 792 */ 793 error = EWOULDBLOCK; 794 } else { 795 error = 0; 796 } 797 break; 798 } 799 if (error != EWOULDBLOCK) 800 break; 801 /* check that we still have our mount... */ 802 /* ...and that we still support locks */ 803 nmp = VTONMP(vp); 804 if ((error2 = nfs_sigintr(nmp, NULL, vfs_context_thread(ctx), 0))) { 805 error = error2; 806 if (fl->l_type == F_UNLCK) 807 printf("nfs_vnop_advlock: aborting unlock request, error %d\n", error); 808 break; 809 } 810 lck_mtx_lock(&nmp->nm_lock); 811 if (nmp->nm_flag & NFSMNT_NOLOCKS) { 812 lck_mtx_unlock(&nmp->nm_lock); 813 break; 814 } 815 interruptable = nmp->nm_flag & NFSMNT_INT; 816 lck_mtx_unlock(&nmp->nm_lock); 817 microuptime(&now); 818 } 819 if (error) { 820 /* check that we still have our mount... */ 821 nmp = VTONMP(vp); 822 if ((error2 = nfs_sigintr(nmp, NULL, vfs_context_thread(ctx), 0))) { 823 error = error2; 824 if (error2 != EINTR) { 825 if (fl->l_type == F_UNLCK) 826 printf("nfs_vnop_advlock: aborting unlock request, error %d\n", error); 827 break; 828 } 829 } 830 /* ...and that we still support locks */ 831 lck_mtx_lock(&nmp->nm_lock); 832 if (nmp->nm_flag & NFSMNT_NOLOCKS) { 833 if (error == EWOULDBLOCK) 834 error = ENOTSUP; 835 lck_mtx_unlock(&nmp->nm_lock); 836 break; 837 } 838 interruptable = nmp->nm_flag & NFSMNT_INT; 839 if (error != EWOULDBLOCK) { 840 lck_mtx_unlock(&nmp->nm_lock); 841 /* 842 * We're going to bail on this request. 843 * If we were a blocked lock request, send a cancel. 844 */ 845 if ((msgreq.lmr_errno == EINPROGRESS) && 846 !(msg->lm_flags & LOCKD_MSG_CANCEL)) { 847 /* set this request up as a cancel */ 848 msg->lm_flags |= LOCKD_MSG_CANCEL; 849 nfs_lockdmsg_dequeue(&msgreq); 850 msg->lm_xid = nfs_lockxid_get(); 851 nfs_lockdmsg_enqueue(&msgreq); 852 msgreq.lmr_saved_errno = error; 853 msgreq.lmr_errno = 0; 854 msgreq.lmr_answered = 0; 855 /* reset timeout */ 856 timeo = 2; 857 /* send cancel request */ 858 continue; 859 } 860 break; 861 } 862 863 /* warn if we're not getting any response */ 864 microuptime(&now); 865 if ((msgreq.lmr_errno != EINPROGRESS) && 866 (nmp->nm_tprintf_initial_delay != 0) && 867 ((lastmsg + nmp->nm_tprintf_delay) < now.tv_sec)) { 868 lck_mtx_unlock(&nmp->nm_lock); 869 lastmsg = now.tv_sec; 870 nfs_down(nmp, vfs_context_thread(ctx), 0, NFSSTA_LOCKTIMEO, "lockd not responding"); 871 wentdown = 1; 872 } else 873 lck_mtx_unlock(&nmp->nm_lock); 874 875 if (msgreq.lmr_errno == EINPROGRESS) { 876 /* 877 * We've got a blocked lock request that we are 878 * going to retry. First, we'll want to try to 879 * send a cancel for the previous request. 880 * 881 * Clear errno so if we don't get a response 882 * to the resend we'll call nfs_down(). 883 * Also reset timeout because we'll expect a 884 * quick response to the cancel/resend (even if 885 * it is NLM_BLOCKED). 886 */ 887 msg->lm_flags |= LOCKD_MSG_CANCEL; 888 nfs_lockdmsg_dequeue(&msgreq); 889 msg->lm_xid = nfs_lockxid_get(); 890 nfs_lockdmsg_enqueue(&msgreq); 891 msgreq.lmr_saved_errno = msgreq.lmr_errno; 892 msgreq.lmr_errno = 0; 893 msgreq.lmr_answered = 0; 894 timeo = 2; 895 /* send cancel then resend request */ 896 continue; 897 } 898 /* 899 * We timed out, so we will resend the request. 900 */ 901 timeo *= 2; 902 if (timeo > 60) 903 timeo = 60; 904 /* resend request */ 905 continue; 906 } 907 908 /* we got a reponse, so the server's lockd is OK */ 909 nfs_up(VTONMP(vp), vfs_context_thread(ctx), NFSSTA_LOCKTIMEO, 910 wentdown ? "lockd alive again" : NULL); 911 wentdown = 0; 912 913 if (msgreq.lmr_errno == EINPROGRESS) { 914 /* got NLM_BLOCKED response */ 915 /* need to wait for NLM_GRANTED */ 916 timeo = 60; 917 msgreq.lmr_answered = 0; 918 goto wait_for_granted; 919 } 920 921 if ((msg->lm_flags & LOCKD_MSG_CANCEL) && 922 (msgreq.lmr_saved_errno == EINPROGRESS)) { 923 /* 924 * We just got a successful reply to the 925 * cancel of the previous blocked lock request. 926 * Now, go ahead and resend the request. 927 */ 928 msg->lm_flags &= ~LOCKD_MSG_CANCEL; 929 nfs_lockdmsg_dequeue(&msgreq); 930 msg->lm_xid = nfs_lockxid_get(); 931 nfs_lockdmsg_enqueue(&msgreq); 932 msgreq.lmr_saved_errno = 0; 933 msgreq.lmr_errno = 0; 934 msgreq.lmr_answered = 0; 935 timeo = 2; 936 /* resend request */ 937 continue; 938 } 939 940 if ((msg->lm_flags & LOCKD_MSG_TEST) && msgreq.lmr_errno == 0) { 941 if (msg->lm_fl.l_type != F_UNLCK) { 942 fl->l_type = msg->lm_fl.l_type; 943 fl->l_pid = msg->lm_fl.l_pid; 944 fl->l_start = msg->lm_fl.l_start; 945 fl->l_len = msg->lm_fl.l_len; 946 fl->l_whence = SEEK_SET; 947 } else 948 fl->l_type = F_UNLCK; 949 } 950 951 /* 952 * If the blocked lock request was cancelled. 953 * Restore the error condition from when we 954 * originally bailed on the request. 955 */ 956 if (msg->lm_flags & LOCKD_MSG_CANCEL) { 957 msg->lm_flags &= ~LOCKD_MSG_CANCEL; 958 error = msgreq.lmr_saved_errno; 959 } else 960 error = msgreq.lmr_errno; 961 962 nmp = VTONMP(vp); 963 if ((error == ENOTSUP) && nmp && !(nmp->nm_state & NFSSTA_LOCKSWORK)) { 964 /* 965 * We have NO evidence that locks work and lockd 966 * returned ENOTSUP. Let's take this as a hint 967 * that locks aren't supported and disable them 968 * for this mount. 969 */ 970 lck_mtx_lock(&nmp->nm_lock); 971 nmp->nm_flag |= NFSMNT_NOLOCKS; 972 nmp->nm_state &= ~NFSSTA_LOCKTIMEO; 973 lck_mtx_unlock(&nmp->nm_lock); 974 printf("lockd returned ENOTSUP, disabling locks for nfs server: %s\n", 975 vfs_statfs(nmp->nm_mountp)->f_mntfromname); 976 } 977 if (!error) { 978 /* record that NFS file locking has worked on this mount */ 979 if (nmp) { 980 lck_mtx_lock(&nmp->nm_lock); 981 if (!(nmp->nm_state & NFSSTA_LOCKSWORK)) 982 nmp->nm_state |= NFSSTA_LOCKSWORK; 983 lck_mtx_unlock(&nmp->nm_lock); 984 } 985 /* 986 * If we successfully acquired a lock, make sure this pid 987 * is in the nfs_lock_pid hash table so we know we can't 988 * short-circuit unlock requests. 989 */ 990 if ((lockpidcheck == ENOENT) && 991 ((ap->a_op == F_SETLK) || (ap->a_op == F_SETLKW))) { 992 error = nfs_lock_pid_check(p, 1); 993 if (error) { 994 /* 995 * We couldn't add the pid to the table, 996 * so we can no longer trust that a pid 997 * not in the table has no locks. 998 */ 999 nfs_lock_pid_hash_trusted = 0; 1000 printf("nfs_vnop_advlock: pid add failed - no longer trusted\n"); 1001 } 1002 } 1003 } 1004 break; 1005 } 1006 1007 nfs_lockdmsg_dequeue(&msgreq); 1008 1009 lck_mtx_unlock(nfs_lock_mutex); 1010 1011 return (error); 1012} 1013 1014/* 1015 * nfslockdans -- 1016 * NFS advisory byte-level locks answer from the lock daemon. 1017 */ 1018int 1019nfslockdans(proc_t p, struct lockd_ans *ansp) 1020{ 1021 LOCKD_MSG_REQUEST *msgreq; 1022 int error; 1023 1024 /* Let root make this call. */ 1025 error = proc_suser(p); 1026 if (error) 1027 return (error); 1028 1029 /* the version should match, or we're out of sync */ 1030 if (ansp->la_version != LOCKD_ANS_VERSION) 1031 return (EINVAL); 1032 1033 lck_mtx_lock(nfs_lock_mutex); 1034 1035 /* try to find the lockd message by transaction id (cookie) */ 1036 msgreq = nfs_lockdmsg_find_by_xid(ansp->la_xid); 1037 if (ansp->la_flags & LOCKD_ANS_GRANTED) { 1038 /* 1039 * We can't depend on the granted message having our cookie, 1040 * so we check the answer against the lockd message found. 1041 * If no message was found or it doesn't match the answer, 1042 * we look for the lockd message by the answer's lock info. 1043 */ 1044 if (!msgreq || nfs_lockdmsg_compare_to_answer(msgreq, ansp)) 1045 msgreq = nfs_lockdmsg_find_by_answer(ansp); 1046 /* 1047 * We need to make sure this request isn't being cancelled 1048 * If it is, we don't want to accept the granted message. 1049 */ 1050 if (msgreq && (msgreq->lmr_msg.lm_flags & LOCKD_MSG_CANCEL)) 1051 msgreq = NULL; 1052 } 1053 if (!msgreq) { 1054 lck_mtx_unlock(nfs_lock_mutex); 1055 return (EPIPE); 1056 } 1057 1058 msgreq->lmr_errno = ansp->la_errno; 1059 if ((msgreq->lmr_msg.lm_flags & LOCKD_MSG_TEST) && msgreq->lmr_errno == 0) { 1060 if (ansp->la_flags & LOCKD_ANS_LOCK_INFO) { 1061 if (ansp->la_flags & LOCKD_ANS_LOCK_EXCL) 1062 msgreq->lmr_msg.lm_fl.l_type = F_WRLCK; 1063 else 1064 msgreq->lmr_msg.lm_fl.l_type = F_RDLCK; 1065 msgreq->lmr_msg.lm_fl.l_pid = ansp->la_pid; 1066 msgreq->lmr_msg.lm_fl.l_start = ansp->la_start; 1067 msgreq->lmr_msg.lm_fl.l_len = ansp->la_len; 1068 } else { 1069 msgreq->lmr_msg.lm_fl.l_type = F_UNLCK; 1070 } 1071 } 1072 1073 msgreq->lmr_answered = 1; 1074 lck_mtx_unlock(nfs_lock_mutex); 1075 wakeup(msgreq); 1076 1077 return (0); 1078} 1079 1080