1/* 2 * Copyright (c) 2002-2010 Apple Inc. All rights reserved. 3 * 4 * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ 5 * 6 * This file contains Original Code and/or Modifications of Original Code 7 * as defined in and that are subject to the Apple Public Source License 8 * Version 2.0 (the 'License'). You may not use this file except in 9 * compliance with the License. The rights granted to you under the License 10 * may not be used to create, or enable the creation or redistribution of, 11 * unlawful or unlicensed copies of an Apple operating system, or to 12 * circumvent, violate, or enable the circumvention or violation of, any 13 * terms of an Apple operating system software license agreement. 14 * 15 * Please obtain a copy of the License at 16 * http://www.opensource.apple.com/apsl/ and read it before using this file. 17 * 18 * The Original Code and all software distributed under the License are 19 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER 20 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, 21 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, 22 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT. 23 * Please see the License for the specific language governing rights and 24 * limitations under the License. 25 * 26 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@ 27 */ 28/*- 29 * Copyright (c) 1997 Berkeley Software Design, Inc. All rights reserved. 30 * 31 * Redistribution and use in source and binary forms, with or without 32 * modification, are permitted provided that the following conditions 33 * are met: 34 * 1. Redistributions of source code must retain the above copyright 35 * notice, this list of conditions and the following disclaimer. 36 * 2. Redistributions in binary form must reproduce the above copyright 37 * notice, this list of conditions and the following disclaimer in the 38 * documentation and/or other materials provided with the distribution. 39 * 3. Berkeley Software Design Inc's name may not be used to endorse or 40 * promote products derived from this software without specific prior 41 * written permission. 42 * 43 * THIS SOFTWARE IS PROVIDED BY BERKELEY SOFTWARE DESIGN INC ``AS IS'' AND 44 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 45 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 46 * ARE DISCLAIMED. IN NO EVENT SHALL BERKELEY SOFTWARE DESIGN INC BE LIABLE 47 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 48 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 49 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 50 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 51 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 52 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 53 * SUCH DAMAGE. 54 * 55 * from BSDI nfs_lock.c,v 2.4 1998/12/14 23:49:56 jch Exp 56 */ 57 58#include <sys/cdefs.h> 59#include <sys/param.h> 60#include <sys/systm.h> 61#include <sys/fcntl.h> 62#include <sys/kernel.h> /* for hz */ 63#include <sys/file_internal.h> 64#include <sys/malloc.h> 65#include <sys/lockf.h> /* for hz */ /* Must come after sys/malloc.h */ 66#include <sys/kpi_mbuf.h> 67#include <sys/mount_internal.h> 68#include <sys/proc_internal.h> /* for p_start */ 69#include <sys/kauth.h> 70#include <sys/resourcevar.h> 71#include <sys/socket.h> 72#include <sys/unistd.h> 73#include <sys/user.h> 74#include <sys/vnode_internal.h> 75 76#include <kern/thread.h> 77#include <kern/host.h> 78 79#include <machine/limits.h> 80 81#include <net/if.h> 82 83#include <nfs/rpcv2.h> 84#include <nfs/nfsproto.h> 85#include <nfs/nfs.h> 86#include <nfs/nfs_gss.h> 87#include <nfs/nfsmount.h> 88#include <nfs/nfsnode.h> 89#include <nfs/nfs_lock.h> 90 91#include <mach/host_priv.h> 92#include <mach/mig_errors.h> 93#include <mach/host_special_ports.h> 94#include <lockd/lockd_mach.h> 95 96extern void ipc_port_release_send(ipc_port_t); 97 98/* 99 * pending lock request messages are kept in this queue which is 100 * kept sorted by transaction ID (xid). 101 */ 102static uint64_t nfs_lockxid = 0; 103static LOCKD_MSG_QUEUE nfs_pendlockq; 104 105/* list of mounts that are (potentially) making lockd requests */ 106TAILQ_HEAD(nfs_lockd_mount_list,nfsmount) nfs_lockd_mount_list; 107 108static lck_grp_t *nfs_lock_lck_grp; 109static lck_mtx_t *nfs_lock_mutex; 110 111void nfs_lockdmsg_enqueue(LOCKD_MSG_REQUEST *); 112void nfs_lockdmsg_dequeue(LOCKD_MSG_REQUEST *); 113int nfs_lockdmsg_compare_to_answer(LOCKD_MSG_REQUEST *, struct lockd_ans *); 114LOCKD_MSG_REQUEST *nfs_lockdmsg_find_by_answer(struct lockd_ans *); 115LOCKD_MSG_REQUEST *nfs_lockdmsg_find_by_xid(uint64_t); 116uint64_t nfs_lockxid_get(void); 117int nfs_lockd_send_request(LOCKD_MSG *, int); 118 119/* 120 * initialize global nfs lock state 121 */ 122void 123nfs_lockinit(void) 124{ 125 TAILQ_INIT(&nfs_pendlockq); 126 TAILQ_INIT(&nfs_lockd_mount_list); 127 128 nfs_lock_lck_grp = lck_grp_alloc_init("nfs_lock", LCK_GRP_ATTR_NULL); 129 nfs_lock_mutex = lck_mtx_alloc_init(nfs_lock_lck_grp, LCK_ATTR_NULL); 130} 131 132/* 133 * Register a mount as (potentially) making lockd requests. 134 */ 135void 136nfs_lockd_mount_register(struct nfsmount *nmp) 137{ 138 lck_mtx_lock(nfs_lock_mutex); 139 TAILQ_INSERT_HEAD(&nfs_lockd_mount_list, nmp, nm_ldlink); 140 nfs_lockd_mounts++; 141 lck_mtx_unlock(nfs_lock_mutex); 142} 143 144/* 145 * Unregister a mount as (potentially) making lockd requests. 146 * 147 * When the lockd mount count drops to zero, then send a shutdown request to 148 * lockd if we've sent any requests to it. 149 */ 150void 151nfs_lockd_mount_unregister(struct nfsmount *nmp) 152{ 153 int send_shutdown; 154 mach_port_t lockd_port = IPC_PORT_NULL; 155 kern_return_t kr; 156 157 lck_mtx_lock(nfs_lock_mutex); 158 TAILQ_REMOVE(&nfs_lockd_mount_list, nmp, nm_ldlink); 159 nfs_lockd_mounts--; 160 161 /* send a shutdown request if there are no more lockd mounts */ 162 send_shutdown = ((nfs_lockd_mounts == 0) && nfs_lockd_request_sent); 163 if (send_shutdown) 164 nfs_lockd_request_sent = 0; 165 166 lck_mtx_unlock(nfs_lock_mutex); 167 168 if (!send_shutdown) 169 return; 170 171 /* 172 * Let lockd know that it is no longer needed for any NFS mounts 173 */ 174 kr = host_get_lockd_port(host_priv_self(), &lockd_port); 175 if ((kr != KERN_SUCCESS) || !IPC_PORT_VALID(lockd_port)) { 176 printf("nfs_lockd_mount_change: shutdown couldn't get port, kr %d, port %s\n", 177 kr, (lockd_port == IPC_PORT_NULL) ? "NULL" : 178 (lockd_port == IPC_PORT_DEAD) ? "DEAD" : "VALID"); 179 return; 180 } 181 182 kr = lockd_shutdown(lockd_port); 183 if (kr != KERN_SUCCESS) 184 printf("nfs_lockd_mount_change: shutdown %d\n", kr); 185 186 ipc_port_release_send(lockd_port); 187} 188 189/* 190 * insert a lock request message into the pending queue 191 * (nfs_lock_mutex must be held) 192 */ 193void 194nfs_lockdmsg_enqueue(LOCKD_MSG_REQUEST *msgreq) 195{ 196 LOCKD_MSG_REQUEST *mr; 197 198 mr = TAILQ_LAST(&nfs_pendlockq, nfs_lock_msg_queue); 199 if (!mr || (msgreq->lmr_msg.lm_xid > mr->lmr_msg.lm_xid)) { 200 /* fast path: empty queue or new largest xid */ 201 TAILQ_INSERT_TAIL(&nfs_pendlockq, msgreq, lmr_next); 202 return; 203 } 204 /* slow path: need to walk list to find insertion point */ 205 while (mr && (msgreq->lmr_msg.lm_xid > mr->lmr_msg.lm_xid)) { 206 mr = TAILQ_PREV(mr, nfs_lock_msg_queue, lmr_next); 207 } 208 if (mr) { 209 TAILQ_INSERT_AFTER(&nfs_pendlockq, mr, msgreq, lmr_next); 210 } else { 211 TAILQ_INSERT_HEAD(&nfs_pendlockq, msgreq, lmr_next); 212 } 213} 214 215/* 216 * remove a lock request message from the pending queue 217 * (nfs_lock_mutex must be held) 218 */ 219void 220nfs_lockdmsg_dequeue(LOCKD_MSG_REQUEST *msgreq) 221{ 222 TAILQ_REMOVE(&nfs_pendlockq, msgreq, lmr_next); 223} 224 225/* 226 * find a pending lock request message by xid 227 * 228 * We search from the head of the list assuming that the message we're 229 * looking for is for an older request (because we have an answer to it). 230 * This assumes that lock request will be answered primarily in FIFO order. 231 * However, this may not be the case if there are blocked requests. We may 232 * want to move blocked requests to a separate queue (but that'll complicate 233 * duplicate xid checking). 234 * 235 * (nfs_lock_mutex must be held) 236 */ 237LOCKD_MSG_REQUEST * 238nfs_lockdmsg_find_by_xid(uint64_t lockxid) 239{ 240 LOCKD_MSG_REQUEST *mr; 241 242 TAILQ_FOREACH(mr, &nfs_pendlockq, lmr_next) { 243 if (mr->lmr_msg.lm_xid == lockxid) 244 return mr; 245 if (mr->lmr_msg.lm_xid > lockxid) 246 return NULL; 247 } 248 return mr; 249} 250 251/* 252 * Because we can't depend on nlm_granted messages containing the same 253 * cookie we sent with the original lock request, we need code to test 254 * if an nlm_granted answer matches the lock request. We also need code 255 * that can find a lockd message based solely on the nlm_granted answer. 256 */ 257 258/* 259 * compare lockd message to answer 260 * 261 * returns 0 on equality and 1 if different 262 */ 263int 264nfs_lockdmsg_compare_to_answer(LOCKD_MSG_REQUEST *msgreq, struct lockd_ans *ansp) 265{ 266 if (!(ansp->la_flags & LOCKD_ANS_LOCK_INFO)) 267 return 1; 268 if (msgreq->lmr_msg.lm_fl.l_pid != ansp->la_pid) 269 return 1; 270 if (msgreq->lmr_msg.lm_fl.l_start != ansp->la_start) 271 return 1; 272 if (msgreq->lmr_msg.lm_fl.l_len != ansp->la_len) 273 return 1; 274 if (msgreq->lmr_msg.lm_fh_len != ansp->la_fh_len) 275 return 1; 276 if (bcmp(msgreq->lmr_msg.lm_fh, ansp->la_fh, ansp->la_fh_len)) 277 return 1; 278 return 0; 279} 280 281/* 282 * find a pending lock request message based on the lock info provided 283 * in the lockd_ans/nlm_granted data. We need this because we can't 284 * depend on nlm_granted messages containing the same cookie we sent 285 * with the original lock request. 286 * 287 * We search from the head of the list assuming that the message we're 288 * looking for is for an older request (because we have an answer to it). 289 * This assumes that lock request will be answered primarily in FIFO order. 290 * However, this may not be the case if there are blocked requests. We may 291 * want to move blocked requests to a separate queue (but that'll complicate 292 * duplicate xid checking). 293 * 294 * (nfs_lock_mutex must be held) 295 */ 296LOCKD_MSG_REQUEST * 297nfs_lockdmsg_find_by_answer(struct lockd_ans *ansp) 298{ 299 LOCKD_MSG_REQUEST *mr; 300 301 if (!(ansp->la_flags & LOCKD_ANS_LOCK_INFO)) 302 return NULL; 303 TAILQ_FOREACH(mr, &nfs_pendlockq, lmr_next) { 304 if (!nfs_lockdmsg_compare_to_answer(mr, ansp)) 305 break; 306 } 307 return mr; 308} 309 310/* 311 * return the next unique lock request transaction ID 312 * (nfs_lock_mutex must be held) 313 */ 314uint64_t 315nfs_lockxid_get(void) 316{ 317 LOCKD_MSG_REQUEST *mr; 318 319 /* derive initial lock xid from system time */ 320 if (!nfs_lockxid) { 321 /* 322 * Note: it's OK if this code inits nfs_lockxid to 0 (for example, 323 * due to a broken clock) because we immediately increment it 324 * and we guarantee to never use xid 0. So, nfs_lockxid should only 325 * ever be 0 the first time this function is called. 326 */ 327 struct timeval tv; 328 microtime(&tv); 329 nfs_lockxid = (uint64_t)tv.tv_sec << 12; 330 } 331 332 /* make sure we get a unique xid */ 333 do { 334 /* Skip zero xid if it should ever happen. */ 335 if (++nfs_lockxid == 0) 336 nfs_lockxid++; 337 if (!(mr = TAILQ_LAST(&nfs_pendlockq, nfs_lock_msg_queue)) || 338 (mr->lmr_msg.lm_xid < nfs_lockxid)) { 339 /* fast path: empty queue or new largest xid */ 340 break; 341 } 342 /* check if xid is already in use */ 343 } while (nfs_lockdmsg_find_by_xid(nfs_lockxid)); 344 345 return nfs_lockxid; 346} 347 348#define MACH_MAX_TRIES 3 349 350int 351nfs_lockd_send_request(LOCKD_MSG *msg, int interruptable) 352{ 353 kern_return_t kr; 354 int retries = 0; 355 mach_port_t lockd_port = IPC_PORT_NULL; 356 357 kr = host_get_lockd_port(host_priv_self(), &lockd_port); 358 if (kr != KERN_SUCCESS || !IPC_PORT_VALID(lockd_port)) 359 return (ENOTSUP); 360 361 do { 362 /* In the kernel all mach messaging is interruptable */ 363 do { 364 kr = lockd_request( 365 lockd_port, 366 msg->lm_version, 367 msg->lm_flags, 368 msg->lm_xid, 369 msg->lm_fl.l_start, 370 msg->lm_fl.l_len, 371 msg->lm_fl.l_pid, 372 msg->lm_fl.l_type, 373 msg->lm_fl.l_whence, 374 (uint32_t *)&msg->lm_addr, 375 (uint32_t *)&msg->lm_cred, 376 msg->lm_fh_len, 377 msg->lm_fh); 378 if (kr != KERN_SUCCESS) 379 printf("lockd_request received %d!\n", kr); 380 } while (!interruptable && kr == MACH_SEND_INTERRUPTED); 381 } while (kr == MIG_SERVER_DIED && retries++ < MACH_MAX_TRIES); 382 383 ipc_port_release_send(lockd_port); 384 switch (kr) { 385 case MACH_SEND_INTERRUPTED: 386 return (EINTR); 387 default: 388 /* 389 * Other MACH or MIG errors we will retry. Eventually 390 * we will call nfs_down and allow the user to disable 391 * locking. 392 */ 393 return (EAGAIN); 394 } 395 return (kr); 396} 397 398 399/* 400 * NFS advisory byte-level locks (client) 401 */ 402int 403nfs3_lockd_request( 404 nfsnode_t np, 405 int type, 406 LOCKD_MSG_REQUEST *msgreq, 407 int flags, 408 thread_t thd) 409{ 410 LOCKD_MSG *msg = &msgreq->lmr_msg; 411 int error, error2; 412 int interruptable, slpflag; 413 struct nfsmount *nmp; 414 struct timeval now; 415 int timeo, starttime, endtime, lastmsg, wentdown = 0; 416 struct timespec ts; 417 struct sockaddr *saddr; 418 419 nmp = NFSTONMP(np); 420 if (!nmp || !nmp->nm_saddr) 421 return (ENXIO); 422 423 lck_mtx_lock(&nmp->nm_lock); 424 saddr = nmp->nm_saddr; 425 bcopy(saddr, &msg->lm_addr, min(sizeof msg->lm_addr, saddr->sa_len)); 426 if (nmp->nm_vers == NFS_VER3) 427 msg->lm_flags |= LOCKD_MSG_NFSV3; 428 429 if (nmp->nm_sotype != SOCK_DGRAM) 430 msg->lm_flags |= LOCKD_MSG_TCP; 431 432 microuptime(&now); 433 starttime = now.tv_sec; 434 lastmsg = now.tv_sec - ((nmp->nm_tprintf_delay) - (nmp->nm_tprintf_initial_delay)); 435 interruptable = NMFLAG(nmp, INTR); 436 lck_mtx_unlock(&nmp->nm_lock); 437 438 lck_mtx_lock(nfs_lock_mutex); 439 440 /* allocate unique xid */ 441 msg->lm_xid = nfs_lockxid_get(); 442 nfs_lockdmsg_enqueue(msgreq); 443 444 timeo = 4; 445 446 for (;;) { 447 nfs_lockd_request_sent = 1; 448 449 /* need to drop nfs_lock_mutex while calling nfs_lockd_send_request() */ 450 lck_mtx_unlock(nfs_lock_mutex); 451 error = nfs_lockd_send_request(msg, interruptable); 452 lck_mtx_lock(nfs_lock_mutex); 453 if (error && error != EAGAIN) 454 break; 455 456 /* 457 * Always wait for an answer. Not waiting for unlocks could 458 * cause a lock to be left if the unlock request gets dropped. 459 */ 460 461 /* 462 * Retry if it takes too long to get a response. 463 * 464 * The timeout numbers were picked out of thin air... they start 465 * at 4 and double each timeout with a max of 30 seconds. 466 * 467 * In order to maintain responsiveness, we pass a small timeout 468 * to msleep and calculate the timeouts ourselves. This allows 469 * us to pick up on mount changes quicker. 470 */ 471wait_for_granted: 472 error = EWOULDBLOCK; 473 slpflag = (interruptable && (type != F_UNLCK)) ? PCATCH : 0; 474 ts.tv_sec = 2; 475 ts.tv_nsec = 0; 476 microuptime(&now); 477 endtime = now.tv_sec + timeo; 478 while (now.tv_sec < endtime) { 479 error = error2 = 0; 480 if (!msgreq->lmr_answered) { 481 error = msleep(msgreq, nfs_lock_mutex, slpflag | PUSER, "lockd", &ts); 482 slpflag = 0; 483 } 484 if (msgreq->lmr_answered) { 485 /* 486 * Note: it's possible to have a lock granted at 487 * essentially the same time that we get interrupted. 488 * Since the lock may be granted, we can't return an 489 * error from this request or we might not unlock the 490 * lock that's been granted. 491 */ 492 nmp = NFSTONMP(np); 493 if ((msgreq->lmr_errno == ENOTSUP) && nmp && 494 (nmp->nm_state & NFSSTA_LOCKSWORK)) { 495 /* 496 * We have evidence that locks work, yet lockd 497 * returned ENOTSUP. This is probably because 498 * it was unable to contact the server's lockd 499 * to send it the request. 500 * 501 * Because we know locks work, we'll consider 502 * this failure to be a timeout. 503 */ 504 error = EWOULDBLOCK; 505 } else { 506 error = 0; 507 } 508 break; 509 } 510 if (error != EWOULDBLOCK) 511 break; 512 /* check that we still have our mount... */ 513 /* ...and that we still support locks */ 514 /* ...and that there isn't a recovery pending */ 515 nmp = NFSTONMP(np); 516 if ((error2 = nfs_sigintr(nmp, NULL, NULL, 0))) { 517 error = error2; 518 if (type == F_UNLCK) 519 printf("nfs3_lockd_request: aborting unlock request, error %d\n", error); 520 break; 521 } 522 lck_mtx_lock(&nmp->nm_lock); 523 if (nmp->nm_lockmode == NFS_LOCK_MODE_DISABLED) { 524 lck_mtx_unlock(&nmp->nm_lock); 525 break; 526 } 527 if ((nmp->nm_state & NFSSTA_RECOVER) && !(flags & R_RECOVER)) { 528 /* recovery pending... return an error that'll get this operation restarted */ 529 error = NFSERR_GRACE; 530 lck_mtx_unlock(&nmp->nm_lock); 531 break; 532 } 533 interruptable = NMFLAG(nmp, INTR); 534 lck_mtx_unlock(&nmp->nm_lock); 535 microuptime(&now); 536 } 537 if (error) { 538 /* check that we still have our mount... */ 539 nmp = NFSTONMP(np); 540 if ((error2 = nfs_sigintr(nmp, NULL, NULL, 0))) { 541 error = error2; 542 if (error2 != EINTR) { 543 if (type == F_UNLCK) 544 printf("nfs3_lockd_request: aborting unlock request, error %d\n", error); 545 break; 546 } 547 } 548 /* ...and that we still support locks */ 549 lck_mtx_lock(&nmp->nm_lock); 550 if (nmp->nm_lockmode == NFS_LOCK_MODE_DISABLED) { 551 if (error == EWOULDBLOCK) 552 error = ENOTSUP; 553 lck_mtx_unlock(&nmp->nm_lock); 554 break; 555 } 556 /* ...and that there isn't a recovery pending */ 557 if ((error == EWOULDBLOCK) && (nmp->nm_state & NFSSTA_RECOVER) && !(flags & R_RECOVER)) { 558 /* recovery pending... return to allow recovery to occur */ 559 error = NFSERR_DENIED; 560 lck_mtx_unlock(&nmp->nm_lock); 561 break; 562 } 563 interruptable = NMFLAG(nmp, INTR); 564 if ((error != EWOULDBLOCK) || 565 ((nmp->nm_state & NFSSTA_RECOVER) && !(flags & R_RECOVER)) || 566 ((flags & R_RECOVER) && ((now.tv_sec - starttime) > 30))) { 567 if ((error == EWOULDBLOCK) && (flags & R_RECOVER)) { 568 /* give up if this is for recovery and taking too long */ 569 error = ETIMEDOUT; 570 } else if ((nmp->nm_state & NFSSTA_RECOVER) && !(flags & R_RECOVER)) { 571 /* recovery pending... return an error that'll get this operation restarted */ 572 error = NFSERR_GRACE; 573 } 574 lck_mtx_unlock(&nmp->nm_lock); 575 /* 576 * We're going to bail on this request. 577 * If we were a blocked lock request, send a cancel. 578 */ 579 if ((msgreq->lmr_errno == EINPROGRESS) && 580 !(msg->lm_flags & LOCKD_MSG_CANCEL)) { 581 /* set this request up as a cancel */ 582 msg->lm_flags |= LOCKD_MSG_CANCEL; 583 nfs_lockdmsg_dequeue(msgreq); 584 msg->lm_xid = nfs_lockxid_get(); 585 nfs_lockdmsg_enqueue(msgreq); 586 msgreq->lmr_saved_errno = error; 587 msgreq->lmr_errno = 0; 588 msgreq->lmr_answered = 0; 589 /* reset timeout */ 590 timeo = 2; 591 /* send cancel request */ 592 continue; 593 } 594 break; 595 } 596 597 /* warn if we're not getting any response */ 598 microuptime(&now); 599 if ((msgreq->lmr_errno != EINPROGRESS) && 600 !(msg->lm_flags & LOCKD_MSG_DENIED_GRACE) && 601 (nmp->nm_tprintf_initial_delay != 0) && 602 ((lastmsg + nmp->nm_tprintf_delay) < now.tv_sec)) { 603 lck_mtx_unlock(&nmp->nm_lock); 604 lastmsg = now.tv_sec; 605 nfs_down(nmp, thd, 0, NFSSTA_LOCKTIMEO, "lockd not responding"); 606 wentdown = 1; 607 } else 608 lck_mtx_unlock(&nmp->nm_lock); 609 610 if (msgreq->lmr_errno == EINPROGRESS) { 611 /* 612 * We've got a blocked lock request that we are 613 * going to retry. First, we'll want to try to 614 * send a cancel for the previous request. 615 * 616 * Clear errno so if we don't get a response 617 * to the resend we'll call nfs_down(). 618 * Also reset timeout because we'll expect a 619 * quick response to the cancel/resend (even if 620 * it is NLM_BLOCKED). 621 */ 622 msg->lm_flags |= LOCKD_MSG_CANCEL; 623 nfs_lockdmsg_dequeue(msgreq); 624 msg->lm_xid = nfs_lockxid_get(); 625 nfs_lockdmsg_enqueue(msgreq); 626 msgreq->lmr_saved_errno = msgreq->lmr_errno; 627 msgreq->lmr_errno = 0; 628 msgreq->lmr_answered = 0; 629 timeo = 2; 630 /* send cancel then resend request */ 631 continue; 632 } 633 634 /* 635 * We timed out, so we will resend the request. 636 */ 637 if (!(flags & R_RECOVER)) 638 timeo *= 2; 639 if (timeo > 30) 640 timeo = 30; 641 /* resend request */ 642 continue; 643 } 644 645 /* we got a reponse, so the server's lockd is OK */ 646 nfs_up(NFSTONMP(np), thd, NFSSTA_LOCKTIMEO, 647 wentdown ? "lockd alive again" : NULL); 648 wentdown = 0; 649 650 if (msgreq->lmr_answered && (msg->lm_flags & LOCKD_MSG_DENIED_GRACE)) { 651 /* 652 * The lock request was denied because the server lockd is 653 * still in its grace period. So, we need to try the 654 * request again in a little bit. Return the GRACE error so 655 * the higher levels can perform the retry. 656 */ 657 msgreq->lmr_saved_errno = msgreq->lmr_errno = error = NFSERR_GRACE; 658 } 659 660 if (msgreq->lmr_errno == EINPROGRESS) { 661 /* got NLM_BLOCKED response */ 662 /* need to wait for NLM_GRANTED */ 663 timeo = 30; 664 msgreq->lmr_answered = 0; 665 goto wait_for_granted; 666 } 667 668 if ((msg->lm_flags & LOCKD_MSG_CANCEL) && 669 (msgreq->lmr_saved_errno == EINPROGRESS)) { 670 /* 671 * We just got a successful reply to the 672 * cancel of the previous blocked lock request. 673 * Now, go ahead and return a DENIED error so the 674 * higher levels can resend the request. 675 */ 676 msg->lm_flags &= ~LOCKD_MSG_CANCEL; 677 nfs_lockdmsg_dequeue(msgreq); 678 error = NFSERR_DENIED; 679 break; 680 } 681 682 /* 683 * If the blocked lock request was cancelled. 684 * Restore the error condition from when we 685 * originally bailed on the request. 686 */ 687 if (msg->lm_flags & LOCKD_MSG_CANCEL) { 688 msg->lm_flags &= ~LOCKD_MSG_CANCEL; 689 error = msgreq->lmr_saved_errno; 690 } else { 691 error = msgreq->lmr_errno; 692 } 693 694 nmp = NFSTONMP(np); 695 if ((error == ENOTSUP) && nmp && !(nmp->nm_state & NFSSTA_LOCKSWORK)) { 696 /* 697 * We have NO evidence that locks work and lockd 698 * returned ENOTSUP. Let's take this as a hint 699 * that locks aren't supported and disable them 700 * for this mount. 701 */ 702 nfs_lockdmsg_dequeue(msgreq); 703 lck_mtx_unlock(nfs_lock_mutex); 704 lck_mtx_lock(&nmp->nm_lock); 705 if (nmp->nm_lockmode == NFS_LOCK_MODE_ENABLED) { 706 nmp->nm_lockmode = NFS_LOCK_MODE_DISABLED; 707 nfs_lockd_mount_unregister(nmp); 708 } 709 nmp->nm_state &= ~NFSSTA_LOCKTIMEO; 710 lck_mtx_unlock(&nmp->nm_lock); 711 printf("lockd returned ENOTSUP, disabling locks for nfs server: %s\n", 712 vfs_statfs(nmp->nm_mountp)->f_mntfromname); 713 return (error); 714 } 715 if (!error) { 716 /* record that NFS file locking has worked on this mount */ 717 if (nmp) { 718 lck_mtx_lock(&nmp->nm_lock); 719 if (!(nmp->nm_state & NFSSTA_LOCKSWORK)) 720 nmp->nm_state |= NFSSTA_LOCKSWORK; 721 lck_mtx_unlock(&nmp->nm_lock); 722 } 723 } 724 break; 725 } 726 727 nfs_lockdmsg_dequeue(msgreq); 728 729 lck_mtx_unlock(nfs_lock_mutex); 730 731 return (error); 732} 733 734/* 735 * Send an NLM LOCK message to the server 736 */ 737int 738nfs3_setlock_rpc( 739 nfsnode_t np, 740 struct nfs_open_file *nofp, 741 struct nfs_file_lock *nflp, 742 int reclaim, 743 int flags, 744 thread_t thd, 745 kauth_cred_t cred) 746{ 747 struct nfs_lock_owner *nlop = nflp->nfl_owner; 748 struct nfsmount *nmp; 749 int error; 750 LOCKD_MSG_REQUEST msgreq; 751 LOCKD_MSG *msg; 752 753 nmp = NFSTONMP(np); 754 if (!nmp) 755 return (ENXIO); 756 757 if (!nlop->nlo_open_owner) { 758 nfs_open_owner_ref(nofp->nof_owner); 759 nlop->nlo_open_owner = nofp->nof_owner; 760 } 761 if ((error = nfs_lock_owner_set_busy(nlop, thd))) 762 return (error); 763 764 /* set up lock message request structure */ 765 bzero(&msgreq, sizeof(msgreq)); 766 msg = &msgreq.lmr_msg; 767 msg->lm_version = LOCKD_MSG_VERSION; 768 if ((nflp->nfl_flags & NFS_FILE_LOCK_WAIT) && !reclaim) 769 msg->lm_flags |= LOCKD_MSG_BLOCK; 770 if (reclaim) 771 msg->lm_flags |= LOCKD_MSG_RECLAIM; 772 msg->lm_fh_len = (nmp->nm_vers == NFS_VER2) ? NFSX_V2FH : np->n_fhsize; 773 bcopy(np->n_fhp, msg->lm_fh, msg->lm_fh_len); 774 cru2x(cred, &msg->lm_cred); 775 776 msg->lm_fl.l_whence = SEEK_SET; 777 msg->lm_fl.l_start = nflp->nfl_start; 778 msg->lm_fl.l_len = NFS_FLOCK_LENGTH(nflp->nfl_start, nflp->nfl_end); 779 msg->lm_fl.l_type = nflp->nfl_type; 780 msg->lm_fl.l_pid = nlop->nlo_pid; 781 782 error = nfs3_lockd_request(np, 0, &msgreq, flags, thd); 783 784 nfs_lock_owner_clear_busy(nlop); 785 return (error); 786} 787 788/* 789 * Send an NLM UNLOCK message to the server 790 */ 791int 792nfs3_unlock_rpc( 793 nfsnode_t np, 794 struct nfs_lock_owner *nlop, 795 __unused int type, 796 uint64_t start, 797 uint64_t end, 798 int flags, 799 thread_t thd, 800 kauth_cred_t cred) 801{ 802 struct nfsmount *nmp; 803 LOCKD_MSG_REQUEST msgreq; 804 LOCKD_MSG *msg; 805 806 nmp = NFSTONMP(np); 807 if (!nmp) 808 return (ENXIO); 809 810 /* set up lock message request structure */ 811 bzero(&msgreq, sizeof(msgreq)); 812 msg = &msgreq.lmr_msg; 813 msg->lm_version = LOCKD_MSG_VERSION; 814 msg->lm_fh_len = (nmp->nm_vers == NFS_VER2) ? NFSX_V2FH : np->n_fhsize; 815 bcopy(np->n_fhp, msg->lm_fh, msg->lm_fh_len); 816 cru2x(cred, &msg->lm_cred); 817 818 msg->lm_fl.l_whence = SEEK_SET; 819 msg->lm_fl.l_start = start; 820 msg->lm_fl.l_len = NFS_FLOCK_LENGTH(start, end); 821 msg->lm_fl.l_type = F_UNLCK; 822 msg->lm_fl.l_pid = nlop->nlo_pid; 823 824 return (nfs3_lockd_request(np, F_UNLCK, &msgreq, flags, thd)); 825} 826 827/* 828 * Send an NLM LOCK TEST message to the server 829 */ 830int 831nfs3_getlock_rpc( 832 nfsnode_t np, 833 struct nfs_lock_owner *nlop, 834 struct flock *fl, 835 uint64_t start, 836 uint64_t end, 837 vfs_context_t ctx) 838{ 839 struct nfsmount *nmp; 840 int error; 841 LOCKD_MSG_REQUEST msgreq; 842 LOCKD_MSG *msg; 843 844 nmp = NFSTONMP(np); 845 if (!nmp) 846 return (ENXIO); 847 848 /* set up lock message request structure */ 849 bzero(&msgreq, sizeof(msgreq)); 850 msg = &msgreq.lmr_msg; 851 msg->lm_version = LOCKD_MSG_VERSION; 852 msg->lm_flags |= LOCKD_MSG_TEST; 853 msg->lm_fh_len = (nmp->nm_vers == NFS_VER2) ? NFSX_V2FH : np->n_fhsize; 854 bcopy(np->n_fhp, msg->lm_fh, msg->lm_fh_len); 855 cru2x(vfs_context_ucred(ctx), &msg->lm_cred); 856 857 msg->lm_fl.l_whence = SEEK_SET; 858 msg->lm_fl.l_start = start; 859 msg->lm_fl.l_len = NFS_FLOCK_LENGTH(start, end); 860 msg->lm_fl.l_type = fl->l_type; 861 msg->lm_fl.l_pid = nlop->nlo_pid; 862 863 error = nfs3_lockd_request(np, 0, &msgreq, 0, vfs_context_thread(ctx)); 864 865 if (!error && (msg->lm_flags & LOCKD_MSG_TEST) && !msgreq.lmr_errno) { 866 if (msg->lm_fl.l_type != F_UNLCK) { 867 fl->l_type = msg->lm_fl.l_type; 868 fl->l_pid = msg->lm_fl.l_pid; 869 fl->l_start = msg->lm_fl.l_start; 870 fl->l_len = msg->lm_fl.l_len; 871 fl->l_whence = SEEK_SET; 872 } else 873 fl->l_type = F_UNLCK; 874 } 875 876 return (error); 877} 878 879/* 880 * nfslockdans -- 881 * NFS advisory byte-level locks answer from the lock daemon. 882 */ 883int 884nfslockdans(proc_t p, struct lockd_ans *ansp) 885{ 886 LOCKD_MSG_REQUEST *msgreq; 887 int error; 888 889 /* Let root make this call. */ 890 error = proc_suser(p); 891 if (error) 892 return (error); 893 894 /* the version should match, or we're out of sync */ 895 if (ansp->la_version != LOCKD_ANS_VERSION) 896 return (EINVAL); 897 898 lck_mtx_lock(nfs_lock_mutex); 899 900 /* try to find the lockd message by transaction id (cookie) */ 901 msgreq = nfs_lockdmsg_find_by_xid(ansp->la_xid); 902 if (ansp->la_flags & LOCKD_ANS_GRANTED) { 903 /* 904 * We can't depend on the granted message having our cookie, 905 * so we check the answer against the lockd message found. 906 * If no message was found or it doesn't match the answer, 907 * we look for the lockd message by the answer's lock info. 908 */ 909 if (!msgreq || nfs_lockdmsg_compare_to_answer(msgreq, ansp)) 910 msgreq = nfs_lockdmsg_find_by_answer(ansp); 911 /* 912 * We need to make sure this request isn't being cancelled 913 * If it is, we don't want to accept the granted message. 914 */ 915 if (msgreq && (msgreq->lmr_msg.lm_flags & LOCKD_MSG_CANCEL)) 916 msgreq = NULL; 917 } 918 if (!msgreq) { 919 lck_mtx_unlock(nfs_lock_mutex); 920 return (EPIPE); 921 } 922 923 msgreq->lmr_errno = ansp->la_errno; 924 if ((msgreq->lmr_msg.lm_flags & LOCKD_MSG_TEST) && msgreq->lmr_errno == 0) { 925 if (ansp->la_flags & LOCKD_ANS_LOCK_INFO) { 926 if (ansp->la_flags & LOCKD_ANS_LOCK_EXCL) 927 msgreq->lmr_msg.lm_fl.l_type = F_WRLCK; 928 else 929 msgreq->lmr_msg.lm_fl.l_type = F_RDLCK; 930 msgreq->lmr_msg.lm_fl.l_pid = ansp->la_pid; 931 msgreq->lmr_msg.lm_fl.l_start = ansp->la_start; 932 msgreq->lmr_msg.lm_fl.l_len = ansp->la_len; 933 } else { 934 msgreq->lmr_msg.lm_fl.l_type = F_UNLCK; 935 } 936 } 937 if (ansp->la_flags & LOCKD_ANS_DENIED_GRACE) 938 msgreq->lmr_msg.lm_flags |= LOCKD_MSG_DENIED_GRACE; 939 940 msgreq->lmr_answered = 1; 941 lck_mtx_unlock(nfs_lock_mutex); 942 wakeup(msgreq); 943 944 return (0); 945} 946 947/* 948 * nfslockdnotify -- 949 * NFS host restart notification from the lock daemon. 950 * 951 * Used to initiate reclaiming of held locks when a server we 952 * have mounted reboots. 953 */ 954int 955nfslockdnotify(proc_t p, user_addr_t argp) 956{ 957 int error, i, headsize; 958 struct lockd_notify ln; 959 struct nfsmount *nmp; 960 struct sockaddr *saddr; 961 962 /* Let root make this call. */ 963 error = proc_suser(p); 964 if (error) 965 return (error); 966 967 headsize = (char*)&ln.ln_addr[0] - (char*)&ln.ln_version; 968 error = copyin(argp, &ln, headsize); 969 if (error) 970 return (error); 971 if (ln.ln_version != LOCKD_NOTIFY_VERSION) 972 return (EINVAL); 973 if ((ln.ln_addrcount < 1) || (ln.ln_addrcount > 128)) 974 return (EINVAL); 975 argp += headsize; 976 saddr = (struct sockaddr *)&ln.ln_addr[0]; 977 978 lck_mtx_lock(nfs_lock_mutex); 979 980 for (i=0; i < ln.ln_addrcount; i++) { 981 error = copyin(argp, &ln.ln_addr[0], sizeof(ln.ln_addr[0])); 982 if (error) 983 break; 984 argp += sizeof(ln.ln_addr[0]); 985 /* scan lockd mount list for match to this address */ 986 TAILQ_FOREACH(nmp, &nfs_lockd_mount_list, nm_ldlink) { 987 /* check if address matches this mount's server address */ 988 if (!nmp->nm_saddr || nfs_sockaddr_cmp(saddr, nmp->nm_saddr)) 989 continue; 990 /* We have a match! Mark it as needing recovery. */ 991 lck_mtx_lock(&nmp->nm_lock); 992 nfs_need_recover(nmp, 0); 993 lck_mtx_unlock(&nmp->nm_lock); 994 } 995 } 996 997 lck_mtx_unlock(nfs_lock_mutex); 998 999 return (error); 1000} 1001 1002