nlm_advlock.c revision 302192
1/*- 2 * Copyright (c) 2008 Isilon Inc http://www.isilon.com/ 3 * Authors: Doug Rabson <dfr@rabson.org> 4 * Developed with Red Inc: Alfred Perlstein <alfred@freebsd.org> 5 * 6 * Redistribution and use in source and binary forms, with or without 7 * modification, are permitted provided that the following conditions 8 * are met: 9 * 1. Redistributions of source code must retain the above copyright 10 * notice, this list of conditions and the following disclaimer. 11 * 2. Redistributions in binary form must reproduce the above copyright 12 * notice, this list of conditions and the following disclaimer in the 13 * documentation and/or other materials provided with the distribution. 14 * 15 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND 16 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 17 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 18 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE 19 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 20 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 21 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 22 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 23 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 24 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 25 * SUCH DAMAGE. 26 */ 27 28#include <sys/cdefs.h> 29__FBSDID("$FreeBSD: stable/10/sys/nlm/nlm_advlock.c 302192 2016-06-25 09:32:35Z kib $"); 30 31#include <sys/param.h> 32#include <sys/fcntl.h> 33#include <sys/jail.h> 34#include <sys/kernel.h> 35#include <sys/limits.h> 36#include <sys/lock.h> 37#include <sys/lockf.h> 38#include <sys/malloc.h> 39#include <sys/mbuf.h> 40#include <sys/mount.h> 41#include <sys/mutex.h> 42#include <sys/proc.h> 43#include <sys/socket.h> 44#include <sys/syslog.h> 45#include <sys/systm.h> 46#include <sys/unistd.h> 47#include <sys/vnode.h> 48 49#include <nfs/nfsproto.h> 50#include <nfsclient/nfs.h> 51#include <nfsclient/nfsmount.h> 52 53#include <nlm/nlm_prot.h> 54#include <nlm/nlm.h> 55 56/* 57 * We need to keep track of the svid values used for F_FLOCK locks. 58 */ 59struct nlm_file_svid { 60 int ns_refs; /* thread count + 1 if active */ 61 int ns_svid; /* on-the-wire SVID for this file */ 62 struct ucred *ns_ucred; /* creds to use for lock recovery */ 63 void *ns_id; /* local struct file pointer */ 64 bool_t ns_active; /* TRUE if we own a lock */ 65 LIST_ENTRY(nlm_file_svid) ns_link; 66}; 67LIST_HEAD(nlm_file_svid_list, nlm_file_svid); 68 69#define NLM_SVID_HASH_SIZE 256 70struct nlm_file_svid_list nlm_file_svids[NLM_SVID_HASH_SIZE]; 71 72struct mtx nlm_svid_lock; 73static struct unrhdr *nlm_svid_allocator; 74static volatile u_int nlm_xid = 1; 75 76static int nlm_setlock(struct nlm_host *host, struct rpc_callextra *ext, 77 rpcvers_t vers, struct timeval *timo, int retries, 78 struct vnode *vp, int op, struct flock *fl, int flags, 79 int svid, size_t fhlen, void *fh, off_t size, bool_t reclaim); 80static int nlm_clearlock(struct nlm_host *host, struct rpc_callextra *ext, 81 rpcvers_t vers, struct timeval *timo, int retries, 82 struct vnode *vp, int op, struct flock *fl, int flags, 83 int svid, size_t fhlen, void *fh, off_t size); 84static int nlm_getlock(struct nlm_host *host, struct rpc_callextra *ext, 85 rpcvers_t vers, struct timeval *timo, int retries, 86 struct vnode *vp, int op, struct flock *fl, int flags, 87 int svid, size_t fhlen, void *fh, off_t size); 88static int nlm_map_status(nlm4_stats stat); 89static struct nlm_file_svid *nlm_find_svid(void *id); 90static void nlm_free_svid(struct nlm_file_svid *nf); 91static int nlm_init_lock(struct flock *fl, int flags, int svid, 92 rpcvers_t vers, size_t fhlen, void *fh, off_t size, 93 struct nlm4_lock *lock, char oh_space[32]); 94 95static void 96nlm_client_init(void *dummy) 97{ 98 int i; 99 100 mtx_init(&nlm_svid_lock, "NLM svid lock", NULL, MTX_DEF); 101 /* pid_max cannot be greater than PID_MAX */ 102 nlm_svid_allocator = new_unrhdr(PID_MAX + 2, INT_MAX, &nlm_svid_lock); 103 for (i = 0; i < NLM_SVID_HASH_SIZE; i++) 104 LIST_INIT(&nlm_file_svids[i]); 105} 106SYSINIT(nlm_client_init, SI_SUB_LOCK, SI_ORDER_FIRST, nlm_client_init, NULL); 107 108static int 109nlm_msg(struct thread *td, const char *server, const char *msg, int error) 110{ 111 struct proc *p; 112 113 p = td ? td->td_proc : NULL; 114 if (error) { 115 tprintf(p, LOG_INFO, "nfs server %s: %s, error %d\n", server, 116 msg, error); 117 } else { 118 tprintf(p, LOG_INFO, "nfs server %s: %s\n", server, msg); 119 } 120 return (0); 121} 122 123struct nlm_feedback_arg { 124 bool_t nf_printed; 125 struct nfsmount *nf_nmp; 126}; 127 128static void 129nlm_down(struct nlm_feedback_arg *nf, struct thread *td, 130 const char *msg, int error) 131{ 132 struct nfsmount *nmp = nf->nf_nmp; 133 134 if (nmp == NULL) 135 return; 136 mtx_lock(&nmp->nm_mtx); 137 if (!(nmp->nm_state & NFSSTA_LOCKTIMEO)) { 138 nmp->nm_state |= NFSSTA_LOCKTIMEO; 139 mtx_unlock(&nmp->nm_mtx); 140 vfs_event_signal(&nmp->nm_mountp->mnt_stat.f_fsid, 141 VQ_NOTRESPLOCK, 0); 142 } else { 143 mtx_unlock(&nmp->nm_mtx); 144 } 145 146 nf->nf_printed = TRUE; 147 nlm_msg(td, nmp->nm_mountp->mnt_stat.f_mntfromname, msg, error); 148} 149 150static void 151nlm_up(struct nlm_feedback_arg *nf, struct thread *td, 152 const char *msg) 153{ 154 struct nfsmount *nmp = nf->nf_nmp; 155 156 if (!nf->nf_printed) 157 return; 158 159 nlm_msg(td, nmp->nm_mountp->mnt_stat.f_mntfromname, msg, 0); 160 161 mtx_lock(&nmp->nm_mtx); 162 if (nmp->nm_state & NFSSTA_LOCKTIMEO) { 163 nmp->nm_state &= ~NFSSTA_LOCKTIMEO; 164 mtx_unlock(&nmp->nm_mtx); 165 vfs_event_signal(&nmp->nm_mountp->mnt_stat.f_fsid, 166 VQ_NOTRESPLOCK, 1); 167 } else { 168 mtx_unlock(&nmp->nm_mtx); 169 } 170} 171 172static void 173nlm_feedback(int type, int proc, void *arg) 174{ 175 struct thread *td = curthread; 176 struct nlm_feedback_arg *nf = (struct nlm_feedback_arg *) arg; 177 178 switch (type) { 179 case FEEDBACK_REXMIT2: 180 case FEEDBACK_RECONNECT: 181 nlm_down(nf, td, "lockd not responding", 0); 182 break; 183 184 case FEEDBACK_OK: 185 nlm_up(nf, td, "lockd is alive again"); 186 break; 187 } 188} 189 190/* 191 * nlm_advlock -- 192 * NFS advisory byte-level locks. 193 */ 194static int 195nlm_advlock_internal(struct vnode *vp, void *id, int op, struct flock *fl, 196 int flags, bool_t reclaim, bool_t unlock_vp) 197{ 198 struct thread *td = curthread; 199 struct nfsmount *nmp; 200 off_t size; 201 size_t fhlen; 202 union nfsfh fh; 203 struct sockaddr *sa; 204 struct sockaddr_storage ss; 205 char servername[MNAMELEN]; 206 struct timeval timo; 207 int retries; 208 rpcvers_t vers; 209 struct nlm_host *host; 210 struct rpc_callextra ext; 211 struct nlm_feedback_arg nf; 212 AUTH *auth; 213 struct ucred *cred, *cred1; 214 struct nlm_file_svid *ns; 215 int svid; 216 int error; 217 int is_v3; 218 219 ASSERT_VOP_LOCKED(vp, "nlm_advlock_1"); 220 221 nmp = VFSTONFS(vp->v_mount); 222 /* 223 * Push any pending writes to the server and flush our cache 224 * so that if we are contending with another machine for a 225 * file, we get whatever they wrote and vice-versa. 226 */ 227 if (op == F_SETLK || op == F_UNLCK) 228 nmp->nm_vinvalbuf(vp, V_SAVE, td, 1); 229 230 strcpy(servername, nmp->nm_hostname); 231 nmp->nm_getinfo(vp, fh.fh_bytes, &fhlen, &ss, &is_v3, &size, &timo); 232 sa = (struct sockaddr *) &ss; 233 if (is_v3 != 0) 234 vers = NLM_VERS4; 235 else 236 vers = NLM_VERS; 237 238 if (nmp->nm_flag & NFSMNT_SOFT) 239 retries = nmp->nm_retry; 240 else 241 retries = INT_MAX; 242 243 /* 244 * We need to switch to mount-point creds so that we can send 245 * packets from a privileged port. Reference mnt_cred and 246 * switch to them before unlocking the vnode, since mount 247 * point could be unmounted right after unlock. 248 */ 249 cred = td->td_ucred; 250 td->td_ucred = vp->v_mount->mnt_cred; 251 crhold(td->td_ucred); 252 if (unlock_vp) 253 VOP_UNLOCK(vp, 0); 254 255 host = nlm_find_host_by_name(servername, sa, vers); 256 auth = authunix_create(cred); 257 memset(&ext, 0, sizeof(ext)); 258 259 nf.nf_printed = FALSE; 260 nf.nf_nmp = nmp; 261 ext.rc_auth = auth; 262 263 ext.rc_feedback = nlm_feedback; 264 ext.rc_feedback_arg = &nf; 265 ext.rc_timers = NULL; 266 267 ns = NULL; 268 if (flags & F_FLOCK) { 269 ns = nlm_find_svid(id); 270 KASSERT(fl->l_start == 0 && fl->l_len == 0, 271 ("F_FLOCK lock requests must be whole-file locks")); 272 if (!ns->ns_ucred) { 273 /* 274 * Remember the creds used for locking in case 275 * we need to recover the lock later. 276 */ 277 ns->ns_ucred = crdup(cred); 278 } 279 svid = ns->ns_svid; 280 } else if (flags & F_REMOTE) { 281 /* 282 * If we are recovering after a server restart or 283 * trashing locks on a force unmount, use the same 284 * svid as last time. 285 */ 286 svid = fl->l_pid; 287 } else { 288 svid = ((struct proc *) id)->p_pid; 289 } 290 291 switch(op) { 292 case F_SETLK: 293 if ((flags & (F_FLOCK|F_WAIT)) == (F_FLOCK|F_WAIT) 294 && fl->l_type == F_WRLCK) { 295 /* 296 * The semantics for flock(2) require that any 297 * shared lock on the file must be released 298 * before an exclusive lock is granted. The 299 * local locking code interprets this by 300 * unlocking the file before sleeping on a 301 * blocked exclusive lock request. We 302 * approximate this by first attempting 303 * non-blocking and if that fails, we unlock 304 * the file and block. 305 */ 306 error = nlm_setlock(host, &ext, vers, &timo, retries, 307 vp, F_SETLK, fl, flags & ~F_WAIT, 308 svid, fhlen, &fh.fh_bytes, size, reclaim); 309 if (error == EAGAIN) { 310 fl->l_type = F_UNLCK; 311 error = nlm_clearlock(host, &ext, vers, &timo, 312 retries, vp, F_UNLCK, fl, flags, 313 svid, fhlen, &fh.fh_bytes, size); 314 fl->l_type = F_WRLCK; 315 if (!error) { 316 mtx_lock(&nlm_svid_lock); 317 if (ns->ns_active) { 318 ns->ns_refs--; 319 ns->ns_active = FALSE; 320 } 321 mtx_unlock(&nlm_svid_lock); 322 flags |= F_WAIT; 323 error = nlm_setlock(host, &ext, vers, 324 &timo, retries, vp, F_SETLK, fl, 325 flags, svid, fhlen, &fh.fh_bytes, 326 size, reclaim); 327 } 328 } 329 } else { 330 error = nlm_setlock(host, &ext, vers, &timo, retries, 331 vp, op, fl, flags, svid, fhlen, &fh.fh_bytes, 332 size, reclaim); 333 } 334 if (!error && ns) { 335 mtx_lock(&nlm_svid_lock); 336 if (!ns->ns_active) { 337 /* 338 * Add one to the reference count to 339 * hold onto the SVID for the lifetime 340 * of the lock. Note that since 341 * F_FLOCK only supports whole-file 342 * locks, there can only be one active 343 * lock for this SVID. 344 */ 345 ns->ns_refs++; 346 ns->ns_active = TRUE; 347 } 348 mtx_unlock(&nlm_svid_lock); 349 } 350 break; 351 352 case F_UNLCK: 353 error = nlm_clearlock(host, &ext, vers, &timo, retries, 354 vp, op, fl, flags, svid, fhlen, &fh.fh_bytes, size); 355 if (!error && ns) { 356 mtx_lock(&nlm_svid_lock); 357 if (ns->ns_active) { 358 ns->ns_refs--; 359 ns->ns_active = FALSE; 360 } 361 mtx_unlock(&nlm_svid_lock); 362 } 363 break; 364 365 case F_GETLK: 366 error = nlm_getlock(host, &ext, vers, &timo, retries, 367 vp, op, fl, flags, svid, fhlen, &fh.fh_bytes, size); 368 break; 369 370 default: 371 error = EINVAL; 372 break; 373 } 374 375 if (ns) 376 nlm_free_svid(ns); 377 378 cred1 = td->td_ucred; 379 td->td_ucred = cred; 380 crfree(cred1); 381 AUTH_DESTROY(auth); 382 383 nlm_host_release(host); 384 385 return (error); 386} 387 388int 389nlm_advlock(struct vop_advlock_args *ap) 390{ 391 392 return (nlm_advlock_internal(ap->a_vp, ap->a_id, ap->a_op, ap->a_fl, 393 ap->a_flags, FALSE, TRUE)); 394} 395 396/* 397 * Set the creds of td to the creds of the given lock's owner. The new 398 * creds reference count will be incremented via crhold. The caller is 399 * responsible for calling crfree and restoring td's original creds. 400 */ 401static void 402nlm_set_creds_for_lock(struct thread *td, struct flock *fl) 403{ 404 int i; 405 struct nlm_file_svid *ns; 406 struct proc *p; 407 struct ucred *cred; 408 409 cred = NULL; 410 if (fl->l_pid > PID_MAX) { 411 /* 412 * If this was originally a F_FLOCK-style lock, we 413 * recorded the creds used when it was originally 414 * locked in the nlm_file_svid structure. 415 */ 416 mtx_lock(&nlm_svid_lock); 417 for (i = 0; i < NLM_SVID_HASH_SIZE; i++) { 418 for (ns = LIST_FIRST(&nlm_file_svids[i]); ns; 419 ns = LIST_NEXT(ns, ns_link)) { 420 if (ns->ns_svid == fl->l_pid) { 421 cred = crhold(ns->ns_ucred); 422 break; 423 } 424 } 425 } 426 mtx_unlock(&nlm_svid_lock); 427 } else { 428 /* 429 * This lock is owned by a process. Get a reference to 430 * the process creds. 431 */ 432 p = pfind(fl->l_pid); 433 if (p) { 434 cred = crhold(p->p_ucred); 435 PROC_UNLOCK(p); 436 } 437 } 438 439 /* 440 * If we can't find a cred, fall back on the recovery 441 * thread's cred. 442 */ 443 if (!cred) { 444 cred = crhold(td->td_ucred); 445 } 446 447 td->td_ucred = cred; 448} 449 450static int 451nlm_reclaim_free_lock(struct vnode *vp, struct flock *fl, void *arg) 452{ 453 struct flock newfl; 454 struct thread *td = curthread; 455 struct ucred *oldcred; 456 int error; 457 458 newfl = *fl; 459 newfl.l_type = F_UNLCK; 460 461 oldcred = td->td_ucred; 462 nlm_set_creds_for_lock(td, &newfl); 463 464 error = nlm_advlock_internal(vp, NULL, F_UNLCK, &newfl, F_REMOTE, 465 FALSE, FALSE); 466 467 crfree(td->td_ucred); 468 td->td_ucred = oldcred; 469 470 return (error); 471} 472 473int 474nlm_reclaim(struct vop_reclaim_args *ap) 475{ 476 477 nlm_cancel_wait(ap->a_vp); 478 lf_iteratelocks_vnode(ap->a_vp, nlm_reclaim_free_lock, NULL); 479 return (0); 480} 481 482struct nlm_recovery_context { 483 struct nlm_host *nr_host; /* host we are recovering */ 484 int nr_state; /* remote NSM state for recovery */ 485}; 486 487static int 488nlm_client_recover_lock(struct vnode *vp, struct flock *fl, void *arg) 489{ 490 struct nlm_recovery_context *nr = (struct nlm_recovery_context *) arg; 491 struct thread *td = curthread; 492 struct ucred *oldcred; 493 int state, error; 494 495 /* 496 * If the remote NSM state changes during recovery, the host 497 * must have rebooted a second time. In that case, we must 498 * restart the recovery. 499 */ 500 state = nlm_host_get_state(nr->nr_host); 501 if (nr->nr_state != state) 502 return (ERESTART); 503 504 error = vn_lock(vp, LK_SHARED); 505 if (error) 506 return (error); 507 508 oldcred = td->td_ucred; 509 nlm_set_creds_for_lock(td, fl); 510 511 error = nlm_advlock_internal(vp, NULL, F_SETLK, fl, F_REMOTE, 512 TRUE, TRUE); 513 514 crfree(td->td_ucred); 515 td->td_ucred = oldcred; 516 517 return (error); 518} 519 520void 521nlm_client_recovery(struct nlm_host *host) 522{ 523 struct nlm_recovery_context nr; 524 int sysid, error; 525 526 sysid = NLM_SYSID_CLIENT | nlm_host_get_sysid(host); 527 do { 528 nr.nr_host = host; 529 nr.nr_state = nlm_host_get_state(host); 530 error = lf_iteratelocks_sysid(sysid, 531 nlm_client_recover_lock, &nr); 532 } while (error == ERESTART); 533} 534 535static void 536nlm_convert_to_nlm_lock(struct nlm_lock *dst, struct nlm4_lock *src) 537{ 538 539 dst->caller_name = src->caller_name; 540 dst->fh = src->fh; 541 dst->oh = src->oh; 542 dst->svid = src->svid; 543 dst->l_offset = src->l_offset; 544 dst->l_len = src->l_len; 545} 546 547static void 548nlm_convert_to_nlm4_holder(struct nlm4_holder *dst, struct nlm_holder *src) 549{ 550 551 dst->exclusive = src->exclusive; 552 dst->svid = src->svid; 553 dst->oh = src->oh; 554 dst->l_offset = src->l_offset; 555 dst->l_len = src->l_len; 556} 557 558static void 559nlm_convert_to_nlm4_res(struct nlm4_res *dst, struct nlm_res *src) 560{ 561 dst->cookie = src->cookie; 562 dst->stat.stat = (enum nlm4_stats) src->stat.stat; 563} 564 565static enum clnt_stat 566nlm_test_rpc(rpcvers_t vers, nlm4_testargs *args, nlm4_testres *res, CLIENT *client, 567 struct rpc_callextra *ext, struct timeval timo) 568{ 569 if (vers == NLM_VERS4) { 570 return nlm4_test_4(args, res, client, ext, timo); 571 } else { 572 nlm_testargs args1; 573 nlm_testres res1; 574 enum clnt_stat stat; 575 576 args1.cookie = args->cookie; 577 args1.exclusive = args->exclusive; 578 nlm_convert_to_nlm_lock(&args1.alock, &args->alock); 579 memset(&res1, 0, sizeof(res1)); 580 581 stat = nlm_test_1(&args1, &res1, client, ext, timo); 582 583 if (stat == RPC_SUCCESS) { 584 res->cookie = res1.cookie; 585 res->stat.stat = (enum nlm4_stats) res1.stat.stat; 586 if (res1.stat.stat == nlm_denied) 587 nlm_convert_to_nlm4_holder( 588 &res->stat.nlm4_testrply_u.holder, 589 &res1.stat.nlm_testrply_u.holder); 590 } 591 592 return (stat); 593 } 594} 595 596static enum clnt_stat 597nlm_lock_rpc(rpcvers_t vers, nlm4_lockargs *args, nlm4_res *res, CLIENT *client, 598 struct rpc_callextra *ext, struct timeval timo) 599{ 600 if (vers == NLM_VERS4) { 601 return nlm4_lock_4(args, res, client, ext, timo); 602 } else { 603 nlm_lockargs args1; 604 nlm_res res1; 605 enum clnt_stat stat; 606 607 args1.cookie = args->cookie; 608 args1.block = args->block; 609 args1.exclusive = args->exclusive; 610 nlm_convert_to_nlm_lock(&args1.alock, &args->alock); 611 args1.reclaim = args->reclaim; 612 args1.state = args->state; 613 memset(&res1, 0, sizeof(res1)); 614 615 stat = nlm_lock_1(&args1, &res1, client, ext, timo); 616 617 if (stat == RPC_SUCCESS) { 618 nlm_convert_to_nlm4_res(res, &res1); 619 } 620 621 return (stat); 622 } 623} 624 625static enum clnt_stat 626nlm_cancel_rpc(rpcvers_t vers, nlm4_cancargs *args, nlm4_res *res, CLIENT *client, 627 struct rpc_callextra *ext, struct timeval timo) 628{ 629 if (vers == NLM_VERS4) { 630 return nlm4_cancel_4(args, res, client, ext, timo); 631 } else { 632 nlm_cancargs args1; 633 nlm_res res1; 634 enum clnt_stat stat; 635 636 args1.cookie = args->cookie; 637 args1.block = args->block; 638 args1.exclusive = args->exclusive; 639 nlm_convert_to_nlm_lock(&args1.alock, &args->alock); 640 memset(&res1, 0, sizeof(res1)); 641 642 stat = nlm_cancel_1(&args1, &res1, client, ext, timo); 643 644 if (stat == RPC_SUCCESS) { 645 nlm_convert_to_nlm4_res(res, &res1); 646 } 647 648 return (stat); 649 } 650} 651 652static enum clnt_stat 653nlm_unlock_rpc(rpcvers_t vers, nlm4_unlockargs *args, nlm4_res *res, CLIENT *client, 654 struct rpc_callextra *ext, struct timeval timo) 655{ 656 if (vers == NLM_VERS4) { 657 return nlm4_unlock_4(args, res, client, ext, timo); 658 } else { 659 nlm_unlockargs args1; 660 nlm_res res1; 661 enum clnt_stat stat; 662 663 args1.cookie = args->cookie; 664 nlm_convert_to_nlm_lock(&args1.alock, &args->alock); 665 memset(&res1, 0, sizeof(res1)); 666 667 stat = nlm_unlock_1(&args1, &res1, client, ext, timo); 668 669 if (stat == RPC_SUCCESS) { 670 nlm_convert_to_nlm4_res(res, &res1); 671 } 672 673 return (stat); 674 } 675} 676 677/* 678 * Called after a lock request (set or clear) succeeded. We record the 679 * details in the local lock manager. Note that since the remote 680 * server has granted the lock, we can be sure that it doesn't 681 * conflict with any other locks we have in the local lock manager. 682 * 683 * Since it is possible that host may also make NLM client requests to 684 * our NLM server, we use a different sysid value to record our own 685 * client locks. 686 * 687 * Note that since it is possible for us to receive replies from the 688 * server in a different order than the locks were granted (e.g. if 689 * many local threads are contending for the same lock), we must use a 690 * blocking operation when registering with the local lock manager. 691 * We expect that any actual wait will be rare and short hence we 692 * ignore signals for this. 693 */ 694static void 695nlm_record_lock(struct vnode *vp, int op, struct flock *fl, 696 int svid, int sysid, off_t size) 697{ 698 struct vop_advlockasync_args a; 699 struct flock newfl; 700 int error; 701 702 a.a_vp = vp; 703 a.a_id = NULL; 704 a.a_op = op; 705 a.a_fl = &newfl; 706 a.a_flags = F_REMOTE|F_WAIT|F_NOINTR; 707 a.a_task = NULL; 708 a.a_cookiep = NULL; 709 newfl.l_start = fl->l_start; 710 newfl.l_len = fl->l_len; 711 newfl.l_type = fl->l_type; 712 newfl.l_whence = fl->l_whence; 713 newfl.l_pid = svid; 714 newfl.l_sysid = NLM_SYSID_CLIENT | sysid; 715 716 error = lf_advlockasync(&a, &vp->v_lockf, size); 717 KASSERT(error == 0 || error == ENOENT, 718 ("Failed to register NFS lock locally - error=%d", error)); 719} 720 721static int 722nlm_setlock(struct nlm_host *host, struct rpc_callextra *ext, 723 rpcvers_t vers, struct timeval *timo, int retries, 724 struct vnode *vp, int op, struct flock *fl, int flags, 725 int svid, size_t fhlen, void *fh, off_t size, bool_t reclaim) 726{ 727 struct nlm4_lockargs args; 728 char oh_space[32]; 729 struct nlm4_res res; 730 u_int xid; 731 CLIENT *client; 732 enum clnt_stat stat; 733 int retry, block, exclusive; 734 void *wait_handle = NULL; 735 int error; 736 737 memset(&args, 0, sizeof(args)); 738 memset(&res, 0, sizeof(res)); 739 740 block = (flags & F_WAIT) ? TRUE : FALSE; 741 exclusive = (fl->l_type == F_WRLCK); 742 743 error = nlm_init_lock(fl, flags, svid, vers, fhlen, fh, size, 744 &args.alock, oh_space); 745 if (error) 746 return (error); 747 args.block = block; 748 args.exclusive = exclusive; 749 args.reclaim = reclaim; 750 args.state = nlm_nsm_state; 751 752 retry = 5*hz; 753 for (;;) { 754 client = nlm_host_get_rpc(host, FALSE); 755 if (!client) 756 return (ENOLCK); /* XXX retry? */ 757 758 if (block) 759 wait_handle = nlm_register_wait_lock(&args.alock, vp); 760 761 xid = atomic_fetchadd_int(&nlm_xid, 1); 762 args.cookie.n_len = sizeof(xid); 763 args.cookie.n_bytes = (char*) &xid; 764 765 stat = nlm_lock_rpc(vers, &args, &res, client, ext, *timo); 766 767 CLNT_RELEASE(client); 768 769 if (stat != RPC_SUCCESS) { 770 if (block) 771 nlm_deregister_wait_lock(wait_handle); 772 if (retries) { 773 retries--; 774 continue; 775 } 776 return (EINVAL); 777 } 778 779 /* 780 * Free res.cookie. 781 */ 782 xdr_free((xdrproc_t) xdr_nlm4_res, &res); 783 784 if (block && res.stat.stat != nlm4_blocked) 785 nlm_deregister_wait_lock(wait_handle); 786 787 if (res.stat.stat == nlm4_denied_grace_period) { 788 /* 789 * The server has recently rebooted and is 790 * giving old clients a change to reclaim 791 * their locks. Wait for a few seconds and try 792 * again. 793 */ 794 error = tsleep(&args, PCATCH, "nlmgrace", retry); 795 if (error && error != EWOULDBLOCK) 796 return (error); 797 retry = 2*retry; 798 if (retry > 30*hz) 799 retry = 30*hz; 800 continue; 801 } 802 803 if (block && res.stat.stat == nlm4_blocked) { 804 /* 805 * The server should call us back with a 806 * granted message when the lock succeeds. In 807 * order to deal with broken servers, lost 808 * granted messages and server reboots, we 809 * will also re-try every few seconds. 810 */ 811 error = nlm_wait_lock(wait_handle, retry); 812 if (error == EWOULDBLOCK) { 813 retry = 2*retry; 814 if (retry > 30*hz) 815 retry = 30*hz; 816 continue; 817 } 818 if (error) { 819 /* 820 * We need to call the server to 821 * cancel our lock request. 822 */ 823 nlm4_cancargs cancel; 824 825 memset(&cancel, 0, sizeof(cancel)); 826 827 xid = atomic_fetchadd_int(&nlm_xid, 1); 828 cancel.cookie.n_len = sizeof(xid); 829 cancel.cookie.n_bytes = (char*) &xid; 830 cancel.block = block; 831 cancel.exclusive = exclusive; 832 cancel.alock = args.alock; 833 834 do { 835 client = nlm_host_get_rpc(host, FALSE); 836 if (!client) 837 /* XXX retry? */ 838 return (ENOLCK); 839 840 stat = nlm_cancel_rpc(vers, &cancel, 841 &res, client, ext, *timo); 842 843 CLNT_RELEASE(client); 844 845 if (stat != RPC_SUCCESS) { 846 /* 847 * We need to cope 848 * with temporary 849 * network partitions 850 * as well as server 851 * reboots. This means 852 * we have to keep 853 * trying to cancel 854 * until the server 855 * wakes up again. 856 */ 857 pause("nlmcancel", 10*hz); 858 } 859 } while (stat != RPC_SUCCESS); 860 861 /* 862 * Free res.cookie. 863 */ 864 xdr_free((xdrproc_t) xdr_nlm4_res, &res); 865 866 switch (res.stat.stat) { 867 case nlm_denied: 868 /* 869 * There was nothing 870 * to cancel. We are 871 * going to go ahead 872 * and assume we got 873 * the lock. 874 */ 875 error = 0; 876 break; 877 878 case nlm4_denied_grace_period: 879 /* 880 * The server has 881 * recently rebooted - 882 * treat this as a 883 * successful 884 * cancellation. 885 */ 886 break; 887 888 case nlm4_granted: 889 /* 890 * We managed to 891 * cancel. 892 */ 893 break; 894 895 default: 896 /* 897 * Broken server 898 * implementation - 899 * can't really do 900 * anything here. 901 */ 902 break; 903 } 904 905 } 906 } else { 907 error = nlm_map_status(res.stat.stat); 908 } 909 910 if (!error && !reclaim) { 911 nlm_record_lock(vp, op, fl, args.alock.svid, 912 nlm_host_get_sysid(host), size); 913 nlm_host_monitor(host, 0); 914 } 915 916 return (error); 917 } 918} 919 920static int 921nlm_clearlock(struct nlm_host *host, struct rpc_callextra *ext, 922 rpcvers_t vers, struct timeval *timo, int retries, 923 struct vnode *vp, int op, struct flock *fl, int flags, 924 int svid, size_t fhlen, void *fh, off_t size) 925{ 926 struct nlm4_unlockargs args; 927 char oh_space[32]; 928 struct nlm4_res res; 929 u_int xid; 930 CLIENT *client; 931 enum clnt_stat stat; 932 int error; 933 934 memset(&args, 0, sizeof(args)); 935 memset(&res, 0, sizeof(res)); 936 937 error = nlm_init_lock(fl, flags, svid, vers, fhlen, fh, size, 938 &args.alock, oh_space); 939 if (error) 940 return (error); 941 942 for (;;) { 943 client = nlm_host_get_rpc(host, FALSE); 944 if (!client) 945 return (ENOLCK); /* XXX retry? */ 946 947 xid = atomic_fetchadd_int(&nlm_xid, 1); 948 args.cookie.n_len = sizeof(xid); 949 args.cookie.n_bytes = (char*) &xid; 950 951 stat = nlm_unlock_rpc(vers, &args, &res, client, ext, *timo); 952 953 CLNT_RELEASE(client); 954 955 if (stat != RPC_SUCCESS) { 956 if (retries) { 957 retries--; 958 continue; 959 } 960 return (EINVAL); 961 } 962 963 /* 964 * Free res.cookie. 965 */ 966 xdr_free((xdrproc_t) xdr_nlm4_res, &res); 967 968 if (res.stat.stat == nlm4_denied_grace_period) { 969 /* 970 * The server has recently rebooted and is 971 * giving old clients a change to reclaim 972 * their locks. Wait for a few seconds and try 973 * again. 974 */ 975 error = tsleep(&args, PCATCH, "nlmgrace", 5*hz); 976 if (error && error != EWOULDBLOCK) 977 return (error); 978 continue; 979 } 980 981 /* 982 * If we are being called via nlm_reclaim (which will 983 * use the F_REMOTE flag), don't record the lock 984 * operation in the local lock manager since the vnode 985 * is going away. 986 */ 987 if (!(flags & F_REMOTE)) 988 nlm_record_lock(vp, op, fl, args.alock.svid, 989 nlm_host_get_sysid(host), size); 990 991 return (0); 992 } 993} 994 995static int 996nlm_getlock(struct nlm_host *host, struct rpc_callextra *ext, 997 rpcvers_t vers, struct timeval *timo, int retries, 998 struct vnode *vp, int op, struct flock *fl, int flags, 999 int svid, size_t fhlen, void *fh, off_t size) 1000{ 1001 struct nlm4_testargs args; 1002 char oh_space[32]; 1003 struct nlm4_testres res; 1004 u_int xid; 1005 CLIENT *client; 1006 enum clnt_stat stat; 1007 int exclusive; 1008 int error; 1009 1010 KASSERT(!(flags & F_FLOCK), ("unexpected F_FLOCK for F_GETLK")); 1011 1012 memset(&args, 0, sizeof(args)); 1013 memset(&res, 0, sizeof(res)); 1014 1015 exclusive = (fl->l_type == F_WRLCK); 1016 1017 error = nlm_init_lock(fl, flags, svid, vers, fhlen, fh, size, 1018 &args.alock, oh_space); 1019 if (error) 1020 return (error); 1021 args.exclusive = exclusive; 1022 1023 for (;;) { 1024 client = nlm_host_get_rpc(host, FALSE); 1025 if (!client) 1026 return (ENOLCK); /* XXX retry? */ 1027 1028 xid = atomic_fetchadd_int(&nlm_xid, 1); 1029 args.cookie.n_len = sizeof(xid); 1030 args.cookie.n_bytes = (char*) &xid; 1031 1032 stat = nlm_test_rpc(vers, &args, &res, client, ext, *timo); 1033 1034 CLNT_RELEASE(client); 1035 1036 if (stat != RPC_SUCCESS) { 1037 if (retries) { 1038 retries--; 1039 continue; 1040 } 1041 return (EINVAL); 1042 } 1043 1044 if (res.stat.stat == nlm4_denied_grace_period) { 1045 /* 1046 * The server has recently rebooted and is 1047 * giving old clients a change to reclaim 1048 * their locks. Wait for a few seconds and try 1049 * again. 1050 */ 1051 xdr_free((xdrproc_t) xdr_nlm4_testres, &res); 1052 error = tsleep(&args, PCATCH, "nlmgrace", 5*hz); 1053 if (error && error != EWOULDBLOCK) 1054 return (error); 1055 continue; 1056 } 1057 1058 if (res.stat.stat == nlm4_denied) { 1059 struct nlm4_holder *h = 1060 &res.stat.nlm4_testrply_u.holder; 1061 fl->l_start = h->l_offset; 1062 fl->l_len = h->l_len; 1063 fl->l_pid = h->svid; 1064 if (h->exclusive) 1065 fl->l_type = F_WRLCK; 1066 else 1067 fl->l_type = F_RDLCK; 1068 fl->l_whence = SEEK_SET; 1069 fl->l_sysid = 0; 1070 } else { 1071 fl->l_type = F_UNLCK; 1072 } 1073 1074 xdr_free((xdrproc_t) xdr_nlm4_testres, &res); 1075 1076 return (0); 1077 } 1078} 1079 1080static int 1081nlm_map_status(nlm4_stats stat) 1082{ 1083 switch (stat) { 1084 case nlm4_granted: 1085 return (0); 1086 1087 case nlm4_denied: 1088 return (EAGAIN); 1089 1090 case nlm4_denied_nolocks: 1091 return (ENOLCK); 1092 1093 case nlm4_deadlck: 1094 return (EDEADLK); 1095 1096 case nlm4_rofs: 1097 return (EROFS); 1098 1099 case nlm4_stale_fh: 1100 return (ESTALE); 1101 1102 case nlm4_fbig: 1103 return (EFBIG); 1104 1105 case nlm4_failed: 1106 return (EACCES); 1107 1108 default: 1109 return (EINVAL); 1110 } 1111} 1112 1113static struct nlm_file_svid * 1114nlm_find_svid(void *id) 1115{ 1116 struct nlm_file_svid *ns, *newns; 1117 int h; 1118 1119 h = (((uintptr_t) id) >> 7) % NLM_SVID_HASH_SIZE; 1120 1121 mtx_lock(&nlm_svid_lock); 1122 LIST_FOREACH(ns, &nlm_file_svids[h], ns_link) { 1123 if (ns->ns_id == id) { 1124 ns->ns_refs++; 1125 break; 1126 } 1127 } 1128 mtx_unlock(&nlm_svid_lock); 1129 if (!ns) { 1130 int svid = alloc_unr(nlm_svid_allocator); 1131 newns = malloc(sizeof(struct nlm_file_svid), M_NLM, 1132 M_WAITOK); 1133 newns->ns_refs = 1; 1134 newns->ns_id = id; 1135 newns->ns_svid = svid; 1136 newns->ns_ucred = NULL; 1137 newns->ns_active = FALSE; 1138 1139 /* 1140 * We need to check for a race with some other 1141 * thread allocating a svid for this file. 1142 */ 1143 mtx_lock(&nlm_svid_lock); 1144 LIST_FOREACH(ns, &nlm_file_svids[h], ns_link) { 1145 if (ns->ns_id == id) { 1146 ns->ns_refs++; 1147 break; 1148 } 1149 } 1150 if (ns) { 1151 mtx_unlock(&nlm_svid_lock); 1152 free_unr(nlm_svid_allocator, newns->ns_svid); 1153 free(newns, M_NLM); 1154 } else { 1155 LIST_INSERT_HEAD(&nlm_file_svids[h], newns, 1156 ns_link); 1157 ns = newns; 1158 mtx_unlock(&nlm_svid_lock); 1159 } 1160 } 1161 1162 return (ns); 1163} 1164 1165static void 1166nlm_free_svid(struct nlm_file_svid *ns) 1167{ 1168 1169 mtx_lock(&nlm_svid_lock); 1170 ns->ns_refs--; 1171 if (!ns->ns_refs) { 1172 KASSERT(!ns->ns_active, ("Freeing active SVID")); 1173 LIST_REMOVE(ns, ns_link); 1174 mtx_unlock(&nlm_svid_lock); 1175 free_unr(nlm_svid_allocator, ns->ns_svid); 1176 if (ns->ns_ucred) 1177 crfree(ns->ns_ucred); 1178 free(ns, M_NLM); 1179 } else { 1180 mtx_unlock(&nlm_svid_lock); 1181 } 1182} 1183 1184static int 1185nlm_init_lock(struct flock *fl, int flags, int svid, 1186 rpcvers_t vers, size_t fhlen, void *fh, off_t size, 1187 struct nlm4_lock *lock, char oh_space[32]) 1188{ 1189 size_t oh_len; 1190 off_t start, len; 1191 1192 if (fl->l_whence == SEEK_END) { 1193 if (size > OFF_MAX 1194 || (fl->l_start > 0 && size > OFF_MAX - fl->l_start)) 1195 return (EOVERFLOW); 1196 start = size + fl->l_start; 1197 } else if (fl->l_whence == SEEK_SET || fl->l_whence == SEEK_CUR) { 1198 start = fl->l_start; 1199 } else { 1200 return (EINVAL); 1201 } 1202 if (start < 0) 1203 return (EINVAL); 1204 if (fl->l_len < 0) { 1205 len = -fl->l_len; 1206 start -= len; 1207 if (start < 0) 1208 return (EINVAL); 1209 } else { 1210 len = fl->l_len; 1211 } 1212 1213 if (vers == NLM_VERS) { 1214 /* 1215 * Enforce range limits on V1 locks 1216 */ 1217 if (start > 0xffffffffLL || len > 0xffffffffLL) 1218 return (EOVERFLOW); 1219 } 1220 1221 snprintf(oh_space, 32, "%d@", svid); 1222 oh_len = strlen(oh_space); 1223 getcredhostname(NULL, oh_space + oh_len, 32 - oh_len); 1224 oh_len = strlen(oh_space); 1225 1226 memset(lock, 0, sizeof(*lock)); 1227 lock->caller_name = prison0.pr_hostname; 1228 lock->fh.n_len = fhlen; 1229 lock->fh.n_bytes = fh; 1230 lock->oh.n_len = oh_len; 1231 lock->oh.n_bytes = oh_space; 1232 lock->svid = svid; 1233 lock->l_offset = start; 1234 lock->l_len = len; 1235 1236 return (0); 1237} 1238