nlm_prot_impl.c revision 179488
1/*- 2 * Copyright (c) 2008 Isilon Inc http://www.isilon.com/ 3 * Authors: Doug Rabson <dfr@rabson.org> 4 * Developed with Red Inc: Alfred Perlstein <alfred@freebsd.org> 5 * 6 * Redistribution and use in source and binary forms, with or without 7 * modification, are permitted provided that the following conditions 8 * are met: 9 * 1. Redistributions of source code must retain the above copyright 10 * notice, this list of conditions and the following disclaimer. 11 * 2. Redistributions in binary form must reproduce the above copyright 12 * notice, this list of conditions and the following disclaimer in the 13 * documentation and/or other materials provided with the distribution. 14 * 15 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND 16 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 17 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 18 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE 19 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 20 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 21 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 22 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 23 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 24 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 25 * SUCH DAMAGE. 26 */ 27 28#include "opt_inet6.h" 29 30#include <sys/cdefs.h> 31__FBSDID("$FreeBSD: head/sys/nlm/nlm_prot_impl.c 179488 2008-06-02 15:59:10Z dfr $"); 32 33#include <sys/param.h> 34#include <sys/fcntl.h> 35#include <sys/kernel.h> 36#include <sys/lockf.h> 37#include <sys/malloc.h> 38#include <sys/mount.h> 39#if __FreeBSD_version >= 700000 40#include <sys/priv.h> 41#endif 42#include <sys/proc.h> 43#include <sys/socket.h> 44#include <sys/socketvar.h> 45#include <sys/syscall.h> 46#include <sys/sysctl.h> 47#include <sys/sysent.h> 48#include <sys/sysproto.h> 49#include <sys/systm.h> 50#include <sys/taskqueue.h> 51#include <sys/unistd.h> 52#include <sys/vnode.h> 53 54#include <nlm/nlm_prot.h> 55#include <nlm/sm_inter.h> 56#include <nlm/nlm.h> 57#include <rpc/rpc_com.h> 58#include <rpc/rpcb_prot.h> 59 60MALLOC_DEFINE(M_NLM, "NLM", "Network Lock Manager"); 61 62/* 63 * If a host is inactive (and holds no locks) for this amount of 64 * seconds, we consider it idle and stop tracking it. 65 */ 66#define NLM_IDLE_TIMEOUT 30 67 68/* 69 * We check the host list for idle every few seconds. 70 */ 71#define NLM_IDLE_PERIOD 5 72 73/* 74 * Support for sysctl vfs.nlm.sysid 75 */ 76SYSCTL_NODE(_vfs, OID_AUTO, nlm, CTLFLAG_RW, NULL, "Network Lock Manager"); 77SYSCTL_NODE(_vfs_nlm, OID_AUTO, sysid, CTLFLAG_RW, NULL, ""); 78 79/* 80 * Syscall hooks 81 */ 82static int nlm_syscall_offset = SYS_nlm_syscall; 83static struct sysent nlm_syscall_prev_sysent; 84#if __FreeBSD_version < 700000 85static struct sysent nlm_syscall_sysent = { 86 (sizeof(struct nlm_syscall_args) / sizeof(register_t)) | SYF_MPSAFE, 87 (sy_call_t *) nlm_syscall 88}; 89#else 90MAKE_SYSENT(nlm_syscall); 91#endif 92static bool_t nlm_syscall_registered = FALSE; 93 94/* 95 * Debug level passed in from userland. We also support a sysctl hook 96 * so that it can be changed on a live system. 97 */ 98static int nlm_debug_level; 99SYSCTL_INT(_debug, OID_AUTO, nlm_debug, CTLFLAG_RW, &nlm_debug_level, 0, ""); 100 101/* 102 * Grace period handling. The value of nlm_grace_threshold is the 103 * value of time_uptime after which we are serving requests normally. 104 */ 105static time_t nlm_grace_threshold; 106 107/* 108 * We check for idle hosts if time_uptime is greater than 109 * nlm_next_idle_check, 110 */ 111static time_t nlm_next_idle_check; 112 113/* 114 * A socket to use for RPC - shared by all IPv4 RPC clients. 115 */ 116static struct socket *nlm_socket; 117 118#ifdef INET6 119 120/* 121 * A socket to use for RPC - shared by all IPv6 RPC clients. 122 */ 123static struct socket *nlm_socket6; 124 125#endif 126 127/* 128 * An RPC client handle that can be used to communicate with the local 129 * NSM. 130 */ 131static CLIENT *nlm_nsm; 132 133/* 134 * An RPC client handle that can be used to communicate with the 135 * userland part of lockd. 136 */ 137static CLIENT *nlm_lockd; 138 139/* 140 * Locks: 141 * (l) locked by nh_lock 142 * (s) only accessed via server RPC which is single threaded 143 * (c) const until freeing 144 */ 145 146/* 147 * A pending asynchronous lock request, stored on the nh_pending list 148 * of the NLM host. 149 */ 150struct nlm_async_lock { 151 TAILQ_ENTRY(nlm_async_lock) af_link; /* (l) host's list of locks */ 152 struct task af_task; /* (c) async callback details */ 153 void *af_cookie; /* (l) lock manager cancel token */ 154 struct vnode *af_vp; /* (l) vnode to lock */ 155 struct flock af_fl; /* (c) lock details */ 156 struct nlm_host *af_host; /* (c) host which is locking */ 157 nlm4_testargs af_granted; /* (c) notification details */ 158}; 159TAILQ_HEAD(nlm_async_lock_list, nlm_async_lock); 160 161/* 162 * NLM host. 163 */ 164enum nlm_host_state { 165 NLM_UNMONITORED, 166 NLM_MONITORED, 167 NLM_MONITOR_FAILED 168}; 169struct nlm_host { 170 struct mtx nh_lock; 171 TAILQ_ENTRY(nlm_host) nh_link; /* (s) global list of hosts */ 172 char *nh_caller_name; /* (c) printable name of host */ 173 uint32_t nh_sysid; /* (c) our allocaed system ID */ 174 char nh_sysid_string[10]; /* (c) string rep. of sysid */ 175 struct sockaddr_storage nh_addr; /* (s) remote address of host */ 176 CLIENT *nh_rpc; /* (s) RPC handle to send to host */ 177 rpcvers_t nh_vers; /* (s) NLM version of host */ 178 int nh_state; /* (s) last seen NSM state of host */ 179 enum nlm_host_state nh_monstate; /* (s) local NSM monitoring state */ 180 time_t nh_idle_timeout; /* (s) Time at which host is idle */ 181 time_t nh_rpc_create_time; /* (s) Time we create RPC client */ 182 struct sysctl_ctx_list nh_sysctl; /* (c) vfs.nlm.sysid nodes */ 183 struct nlm_async_lock_list nh_pending; /* (l) pending async locks */ 184 struct nlm_async_lock_list nh_finished; /* (l) finished async locks */ 185}; 186TAILQ_HEAD(nlm_host_list, nlm_host); 187 188static struct nlm_host_list nlm_hosts; 189static uint32_t nlm_next_sysid = 1; 190 191static void nlm_host_unmonitor(struct nlm_host *); 192 193/**********************************************************************/ 194 195/* 196 * Initialise NLM globals. 197 */ 198static void 199nlm_init(void *dummy) 200{ 201 int error; 202 203 TAILQ_INIT(&nlm_hosts); 204 205 error = syscall_register(&nlm_syscall_offset, &nlm_syscall_sysent, 206 &nlm_syscall_prev_sysent); 207 if (error) 208 printf("Can't register NLM syscall\n"); 209 else 210 nlm_syscall_registered = TRUE; 211} 212SYSINIT(nlm_init, SI_SUB_LOCK, SI_ORDER_FIRST, nlm_init, NULL); 213 214static void 215nlm_uninit(void *dummy) 216{ 217 218 if (nlm_syscall_registered) 219 syscall_deregister(&nlm_syscall_offset, 220 &nlm_syscall_prev_sysent); 221} 222SYSUNINIT(nlm_uninit, SI_SUB_LOCK, SI_ORDER_FIRST, nlm_uninit, NULL); 223 224/* 225 * Copy a struct netobj. 226 */ 227void 228nlm_copy_netobj(struct netobj *dst, struct netobj *src, 229 struct malloc_type *type) 230{ 231 232 dst->n_len = src->n_len; 233 dst->n_bytes = malloc(src->n_len, type, M_WAITOK); 234 memcpy(dst->n_bytes, src->n_bytes, src->n_len); 235} 236 237/* 238 * Create an RPC client handle for the given (address,prog,vers) 239 * triple using UDP. 240 */ 241static CLIENT * 242nlm_get_rpc(struct sockaddr *sa, rpcprog_t prog, rpcvers_t vers) 243{ 244 const char *wchan = "nlmrcv"; 245 const char* protofmly; 246 struct sockaddr_storage ss; 247 struct socket *so; 248 CLIENT *rpcb; 249 struct timeval timo; 250 RPCB parms; 251 char *uaddr; 252 enum clnt_stat stat; 253 int rpcvers; 254 255 /* 256 * First we need to contact the remote RPCBIND service to find 257 * the right port. 258 */ 259 memcpy(&ss, sa, sa->sa_len); 260 switch (ss.ss_family) { 261 case AF_INET: 262 ((struct sockaddr_in *)&ss)->sin_port = htons(111); 263 protofmly = "inet"; 264 so = nlm_socket; 265 break; 266 267#ifdef INET6 268 case AF_INET6: 269 ((struct sockaddr_in6 *)&ss)->sin6_port = htons(111); 270 protofmly = "inet6"; 271 so = nlm_socket6; 272 break; 273#endif 274 275 default: 276 /* 277 * Unsupported address family - fail. 278 */ 279 return (NULL); 280 } 281 282 rpcb = clnt_dg_create(so, (struct sockaddr *)&ss, 283 RPCBPROG, RPCBVERS4, 0, 0); 284 if (!rpcb) 285 return (NULL); 286 287 parms.r_prog = prog; 288 parms.r_vers = vers; 289 parms.r_netid = "udp"; 290 parms.r_addr = ""; 291 parms.r_owner = ""; 292 293 /* 294 * Use the default timeout. 295 */ 296 timo.tv_sec = 25; 297 timo.tv_usec = 0; 298again: 299 uaddr = NULL; 300 stat = CLNT_CALL(rpcb, (rpcprog_t) RPCBPROC_GETADDR, 301 (xdrproc_t) xdr_rpcb, &parms, 302 (xdrproc_t) xdr_wrapstring, &uaddr, timo); 303 if (stat == RPC_PROGVERSMISMATCH) { 304 /* 305 * Try RPCBIND version 3 if we haven't already. 306 * 307 * XXX fall back to portmap? 308 */ 309 CLNT_CONTROL(rpcb, CLGET_VERS, &rpcvers); 310 if (rpcvers == RPCBVERS4) { 311 rpcvers = RPCBVERS; 312 CLNT_CONTROL(rpcb, CLSET_VERS, &rpcvers); 313 goto again; 314 } 315 } 316 317 if (stat == RPC_SUCCESS) { 318 /* 319 * We have a reply from the remote RPCBIND - turn it into an 320 * appropriate address and make a new client that can talk to 321 * the remote NLM. 322 * 323 * XXX fixup IPv6 scope ID. 324 */ 325 struct netbuf *a; 326 a = __rpc_uaddr2taddr_af(ss.ss_family, uaddr); 327 if (!a) { 328 CLNT_DESTROY(rpcb); 329 return (NULL); 330 } 331 memcpy(&ss, a->buf, a->len); 332 free(a->buf, M_RPC); 333 free(a, M_RPC); 334 xdr_free((xdrproc_t) xdr_wrapstring, &uaddr); 335 } else if (stat == RPC_PROGVERSMISMATCH) { 336 /* 337 * Try portmap. 338 */ 339 struct pmap mapping; 340 u_short port; 341 342 rpcvers = PMAPVERS; 343 CLNT_CONTROL(rpcb, CLSET_VERS, &rpcvers); 344 345 mapping.pm_prog = parms.r_prog; 346 mapping.pm_vers = parms.r_vers; 347 mapping.pm_prot = IPPROTO_UDP; 348 mapping.pm_port = 0; 349 350 stat = CLNT_CALL(rpcb, (rpcprog_t) PMAPPROC_GETPORT, 351 (xdrproc_t) xdr_pmap, &mapping, 352 (xdrproc_t) xdr_u_short, &port, timo); 353 354 if (stat == RPC_SUCCESS) { 355 switch (ss.ss_family) { 356 case AF_INET: 357 ((struct sockaddr_in *)&ss)->sin_port = 358 htons(port); 359 break; 360 361#ifdef INET6 362 case AF_INET6: 363 ((struct sockaddr_in6 *)&ss)->sin6_port = 364 htons(port); 365 break; 366#endif 367 } 368 } 369 } 370 if (stat != RPC_SUCCESS) { 371 printf("NLM: failed to contact remote rpcbind, stat = %d\n", 372 (int) stat); 373 CLNT_DESTROY(rpcb); 374 return (NULL); 375 } 376 377 /* 378 * Re-use the client we used to speak to rpcbind. 379 */ 380 CLNT_CONTROL(rpcb, CLSET_SVC_ADDR, &ss); 381 CLNT_CONTROL(rpcb, CLSET_PROG, &prog); 382 CLNT_CONTROL(rpcb, CLSET_VERS, &vers); 383 CLNT_CONTROL(rpcb, CLSET_WAITCHAN, &wchan); 384 rpcb->cl_auth = authunix_create(curthread->td_ucred); 385 386 return (rpcb); 387} 388 389/* 390 * This async callback after when an async lock request has been 391 * granted. We notify the host which initiated the request. 392 */ 393static void 394nlm_lock_callback(void *arg, int pending) 395{ 396 struct nlm_async_lock *af = (struct nlm_async_lock *) arg; 397 398 if (nlm_debug_level >= 2) 399 printf("NLM: async lock %p for %s (sysid %d) granted\n", 400 af, af->af_host->nh_caller_name, 401 af->af_host->nh_sysid); 402 403 /* 404 * Send the results back to the host. 405 * 406 * Note: there is a possible race here with nlm_host_notify 407 * destroying the RPC client. To avoid problems, the first 408 * thing nlm_host_notify does is to cancel pending async lock 409 * requests. 410 */ 411 if (af->af_host->nh_vers == NLM_VERS4) { 412 nlm4_granted_msg_4(&af->af_granted, 413 NULL, af->af_host->nh_rpc); 414 } else { 415 /* 416 * Back-convert to legacy protocol 417 */ 418 nlm_testargs granted; 419 granted.cookie = af->af_granted.cookie; 420 granted.exclusive = af->af_granted.exclusive; 421 granted.alock.caller_name = 422 af->af_granted.alock.caller_name; 423 granted.alock.fh = af->af_granted.alock.fh; 424 granted.alock.oh = af->af_granted.alock.oh; 425 granted.alock.svid = af->af_granted.alock.svid; 426 granted.alock.l_offset = 427 af->af_granted.alock.l_offset; 428 granted.alock.l_len = 429 af->af_granted.alock.l_len; 430 431 nlm_granted_msg_1(&granted, 432 NULL, af->af_host->nh_rpc); 433 } 434 435 /* 436 * Move this entry to the nh_finished list. Someone else will 437 * free it later - its too hard to do it here safely without 438 * racing with cancel. 439 * 440 * XXX possibly we should have a third "granted sent but not 441 * ack'ed" list so that we can re-send the granted message. 442 */ 443 mtx_lock(&af->af_host->nh_lock); 444 TAILQ_REMOVE(&af->af_host->nh_pending, af, af_link); 445 TAILQ_INSERT_TAIL(&af->af_host->nh_finished, af, af_link); 446 mtx_unlock(&af->af_host->nh_lock); 447} 448 449/* 450 * Free an async lock request. The request must have been removed from 451 * any list. 452 */ 453static void 454nlm_free_async_lock(struct nlm_async_lock *af) 455{ 456 /* 457 * Free an async lock. 458 */ 459 xdr_free((xdrproc_t) xdr_nlm4_testargs, &af->af_granted); 460 if (af->af_vp) 461 vrele(af->af_vp); 462 free(af, M_NLM); 463} 464 465/* 466 * Cancel our async request - this must be called with 467 * af->nh_host->nh_lock held. This is slightly complicated by a 468 * potential race with our own callback. If we fail to cancel the 469 * lock, it must already have been granted - we make sure our async 470 * task has completed by calling taskqueue_drain in this case. 471 */ 472static int 473nlm_cancel_async_lock(struct nlm_async_lock *af) 474{ 475 struct nlm_host *host = af->af_host; 476 int error; 477 478 mtx_assert(&host->nh_lock, MA_OWNED); 479 480 mtx_unlock(&host->nh_lock); 481 482 error = VOP_ADVLOCKASYNC(af->af_vp, NULL, F_CANCEL, &af->af_fl, 483 F_REMOTE, NULL, &af->af_cookie); 484 485 if (error) { 486 /* 487 * We failed to cancel - make sure our callback has 488 * completed before we continue. 489 */ 490 taskqueue_drain(taskqueue_thread, &af->af_task); 491 } 492 493 mtx_lock(&host->nh_lock); 494 495 if (!error) { 496 if (nlm_debug_level >= 2) 497 printf("NLM: async lock %p for %s (sysid %d) " 498 "cancelled\n", 499 af, host->nh_caller_name, host->nh_sysid); 500 501 /* 502 * Remove from the nh_pending list and free now that 503 * we are safe from the callback. 504 */ 505 TAILQ_REMOVE(&host->nh_pending, af, af_link); 506 mtx_unlock(&host->nh_lock); 507 nlm_free_async_lock(af); 508 mtx_lock(&host->nh_lock); 509 } 510 511 return (error); 512} 513 514static void 515nlm_free_finished_locks(struct nlm_host *host) 516{ 517 struct nlm_async_lock *af; 518 519 mtx_lock(&host->nh_lock); 520 while ((af = TAILQ_FIRST(&host->nh_finished)) != NULL) { 521 TAILQ_REMOVE(&host->nh_finished, af, af_link); 522 mtx_unlock(&host->nh_lock); 523 nlm_free_async_lock(af); 524 mtx_lock(&host->nh_lock); 525 } 526 mtx_unlock(&host->nh_lock); 527} 528 529/* 530 * This is called when we receive a host state change 531 * notification. We unlock any active locks owned by the host. 532 */ 533static void 534nlm_host_notify(struct nlm_host *host, int newstate, bool_t destroy) 535{ 536 struct nlm_async_lock *af; 537 538 if (newstate) { 539 if (nlm_debug_level >= 1) 540 printf("NLM: host %s (sysid %d) rebooted, new " 541 "state is %d\n", 542 host->nh_caller_name, host->nh_sysid, newstate); 543 } 544 545 /* 546 * Cancel any pending async locks for this host. 547 */ 548 mtx_lock(&host->nh_lock); 549 while ((af = TAILQ_FIRST(&host->nh_pending)) != NULL) { 550 /* 551 * nlm_cancel_async_lock will remove the entry from 552 * nh_pending and free it. 553 */ 554 nlm_cancel_async_lock(af); 555 } 556 mtx_unlock(&host->nh_lock); 557 nlm_free_finished_locks(host); 558 559 /* 560 * The host just rebooted - trash its locks and forget any 561 * RPC client handle that we may have for it. 562 */ 563 lf_clearremotesys(host->nh_sysid); 564 if (host->nh_rpc) { 565 AUTH_DESTROY(host->nh_rpc->cl_auth); 566 CLNT_DESTROY(host->nh_rpc); 567 host->nh_rpc = NULL; 568 } 569 host->nh_state = newstate; 570 571 /* 572 * Destroy the host if the caller believes that it won't be 573 * used again. This is safe enough - if we see the same name 574 * again, we will just create a new host. 575 */ 576 if (destroy) { 577 TAILQ_REMOVE(&nlm_hosts, host, nh_link); 578 mtx_destroy(&host->nh_lock); 579 sysctl_ctx_free(&host->nh_sysctl); 580 free(host->nh_caller_name, M_NLM); 581 free(host, M_NLM); 582 } 583} 584 585/* 586 * Sysctl handler to count the number of locks for a sysid. 587 */ 588static int 589nlm_host_lock_count_sysctl(SYSCTL_HANDLER_ARGS) 590{ 591 struct nlm_host *host; 592 int count; 593 594 host = oidp->oid_arg1; 595 count = lf_countlocks(host->nh_sysid); 596 return sysctl_handle_int(oidp, &count, 0, req); 597} 598 599/* 600 * Create a new NLM host. 601 */ 602static struct nlm_host * 603nlm_create_host(const char* caller_name) 604{ 605 struct nlm_host *host; 606 struct sysctl_oid *oid; 607 608 if (nlm_debug_level >= 1) 609 printf("NLM: new host %s (sysid %d)\n", 610 caller_name, nlm_next_sysid); 611 host = malloc(sizeof(struct nlm_host), M_NLM, M_WAITOK|M_ZERO); 612 mtx_init(&host->nh_lock, "nh_lock", NULL, MTX_DEF); 613 host->nh_caller_name = strdup(caller_name, M_NLM); 614 host->nh_sysid = nlm_next_sysid++; 615 snprintf(host->nh_sysid_string, sizeof(host->nh_sysid_string), 616 "%d", host->nh_sysid); 617 host->nh_rpc = NULL; 618 host->nh_vers = 0; 619 host->nh_state = 0; 620 host->nh_monstate = NLM_UNMONITORED; 621 TAILQ_INIT(&host->nh_pending); 622 TAILQ_INIT(&host->nh_finished); 623 TAILQ_INSERT_TAIL(&nlm_hosts, host, nh_link); 624 625 sysctl_ctx_init(&host->nh_sysctl); 626 oid = SYSCTL_ADD_NODE(&host->nh_sysctl, 627 SYSCTL_STATIC_CHILDREN(_vfs_nlm_sysid), 628 OID_AUTO, host->nh_sysid_string, CTLFLAG_RD, NULL, ""); 629 SYSCTL_ADD_STRING(&host->nh_sysctl, SYSCTL_CHILDREN(oid), OID_AUTO, 630 "hostname", CTLFLAG_RD, host->nh_caller_name, 0, ""); 631 SYSCTL_ADD_INT(&host->nh_sysctl, SYSCTL_CHILDREN(oid), OID_AUTO, 632 "version", CTLFLAG_RD, &host->nh_vers, 0, ""); 633 SYSCTL_ADD_INT(&host->nh_sysctl, SYSCTL_CHILDREN(oid), OID_AUTO, 634 "monitored", CTLFLAG_RD, &host->nh_monstate, 0, ""); 635 SYSCTL_ADD_PROC(&host->nh_sysctl, SYSCTL_CHILDREN(oid), OID_AUTO, 636 "lock_count", CTLTYPE_INT | CTLFLAG_RD, host, 0, 637 nlm_host_lock_count_sysctl, "I", ""); 638 639 return (host); 640} 641 642/* 643 * Return non-zero if the address parts of the two sockaddrs are the 644 * same. 645 */ 646static int 647nlm_compare_addr(const struct sockaddr *a, const struct sockaddr *b) 648{ 649 const struct sockaddr_in *a4, *b4; 650#ifdef INET6 651 const struct sockaddr_in6 *a6, *b6; 652#endif 653 654 if (a->sa_family != b->sa_family) 655 return (FALSE); 656 657 switch (a->sa_family) { 658 case AF_INET: 659 a4 = (const struct sockaddr_in *) a; 660 b4 = (const struct sockaddr_in *) b; 661 return !memcmp(&a4->sin_addr, &b4->sin_addr, 662 sizeof(a4->sin_addr)); 663#ifdef INET6 664 case AF_INET6: 665 a6 = (const struct sockaddr_in6 *) a; 666 b6 = (const struct sockaddr_in6 *) b; 667 return !memcmp(&a6->sin6_addr, &b6->sin6_addr, 668 sizeof(a6->sin6_addr)); 669#endif 670 } 671 672 return (0); 673} 674 675/* 676 * Check for idle hosts and stop monitoring them. We could also free 677 * the host structure here, possibly after a larger timeout but that 678 * would require some care to avoid races with 679 * e.g. nlm_host_lock_count_sysctl. 680 */ 681static void 682nlm_check_idle(void) 683{ 684 struct nlm_host *host; 685 686 if (time_uptime <= nlm_next_idle_check) 687 return; 688 689 nlm_next_idle_check = time_uptime + NLM_IDLE_PERIOD; 690 691 TAILQ_FOREACH(host, &nlm_hosts, nh_link) { 692 if (host->nh_monstate == NLM_MONITORED 693 && time_uptime > host->nh_idle_timeout) { 694 if (lf_countlocks(host->nh_sysid) > 0) { 695 host->nh_idle_timeout = 696 time_uptime + NLM_IDLE_TIMEOUT; 697 mtx_lock(&nlm_global_lock); 698 continue; 699 } 700 nlm_host_unmonitor(host); 701 } 702 } 703} 704 705/* 706 * Search for an existing NLM host that matches the given name 707 * (typically the caller_name element of an nlm4_lock). If none is 708 * found, create a new host. If 'rqstp' is non-NULL, record the remote 709 * address of the host so that we can call it back for async 710 * responses. 711 */ 712struct nlm_host * 713nlm_find_host_by_name(const char *name, struct svc_req *rqstp) 714{ 715 struct nlm_host *host; 716 717 nlm_check_idle(); 718 719 /* 720 * The remote host is determined by caller_name. 721 */ 722 TAILQ_FOREACH(host, &nlm_hosts, nh_link) { 723 if (!strcmp(host->nh_caller_name, name)) 724 break; 725 } 726 727 if (!host) 728 host = nlm_create_host(name); 729 host->nh_idle_timeout = time_uptime + NLM_IDLE_TIMEOUT; 730 731 /* 732 * If we have an RPC request, record the remote address so 733 * that can send async replies etc. 734 */ 735 if (rqstp) { 736 struct netbuf *addr = &rqstp->rq_xprt->xp_rtaddr; 737 738 KASSERT(addr->len < sizeof(struct sockaddr_storage), 739 ("Strange remote transport address length")); 740 741 /* 742 * If we have seen an address before and we currently 743 * have an RPC client handle, make sure the address is 744 * the same, otherwise discard the client handle. 745 */ 746 if (host->nh_addr.ss_len && host->nh_rpc) { 747 if (!nlm_compare_addr( 748 (struct sockaddr *) &host->nh_addr, 749 (struct sockaddr *) addr->buf) 750 || host->nh_vers != rqstp->rq_vers) { 751 AUTH_DESTROY(host->nh_rpc->cl_auth); 752 CLNT_DESTROY(host->nh_rpc); 753 host->nh_rpc = NULL; 754 } 755 } 756 memcpy(&host->nh_addr, addr->buf, addr->len); 757 host->nh_vers = rqstp->rq_vers; 758 } 759 760 return (host); 761} 762 763/* 764 * Search for an existing NLM host that matches the given remote 765 * address. If none is found, create a new host with the requested 766 * address and remember 'vers' as the NLM protocol version to use for 767 * that host. 768 */ 769struct nlm_host * 770nlm_find_host_by_addr(const struct sockaddr *addr, int vers) 771{ 772 struct nlm_host *host; 773 774 nlm_check_idle(); 775 776 /* 777 * The remote host is determined by caller_name. 778 */ 779 TAILQ_FOREACH(host, &nlm_hosts, nh_link) { 780 if (nlm_compare_addr(addr, 781 (const struct sockaddr *) &host->nh_addr)) 782 break; 783 } 784 785 if (!host) { 786 /* 787 * Fake up a name using inet_ntop. This buffer is 788 * large enough for an IPv6 address. 789 */ 790 char tmp[sizeof "ffff:ffff:ffff:ffff:ffff:ffff:255.255.255.255"]; 791 switch (addr->sa_family) { 792 case AF_INET: 793 __rpc_inet_ntop(AF_INET, 794 &((const struct sockaddr_in *) addr)->sin_addr, 795 tmp, sizeof tmp); 796 break; 797#ifdef INET6 798 case AF_INET6: 799 __rpc_inet_ntop(AF_INET6, 800 &((const struct sockaddr_in6 *) addr)->sin6_addr, 801 tmp, sizeof tmp); 802 break; 803#endif 804 default: 805 strcmp(tmp, "<unknown>"); 806 } 807 host = nlm_create_host(tmp); 808 memcpy(&host->nh_addr, addr, addr->sa_len); 809 host->nh_vers = vers; 810 } 811 host->nh_idle_timeout = time_uptime + NLM_IDLE_TIMEOUT; 812 813 return (host); 814} 815 816/* 817 * Find the NLM host that matches the value of 'sysid'. If none 818 * exists, return NULL. 819 */ 820static struct nlm_host * 821nlm_find_host_by_sysid(int sysid) 822{ 823 struct nlm_host *host; 824 825 TAILQ_FOREACH(host, &nlm_hosts, nh_link) { 826 if (host->nh_sysid == sysid) 827 return (host); 828 } 829 830 return (NULL); 831} 832 833/* 834 * Unregister this NLM host with the local NSM due to idleness. 835 */ 836static void 837nlm_host_unmonitor(struct nlm_host *host) 838{ 839 mon_id smmonid; 840 sm_stat_res smstat; 841 struct timeval timo; 842 enum clnt_stat stat; 843 844 if (nlm_debug_level >= 1) 845 printf("NLM: unmonitoring %s (sysid %d)\n", 846 host->nh_caller_name, host->nh_sysid); 847 848 /* 849 * We put our assigned system ID value in the priv field to 850 * make it simpler to find the host if we are notified of a 851 * host restart. 852 */ 853 smmonid.mon_name = host->nh_caller_name; 854 smmonid.my_id.my_name = "localhost"; 855 smmonid.my_id.my_prog = NLM_PROG; 856 smmonid.my_id.my_vers = NLM_SM; 857 smmonid.my_id.my_proc = NLM_SM_NOTIFY; 858 859 timo.tv_sec = 25; 860 timo.tv_usec = 0; 861 stat = CLNT_CALL(nlm_nsm, SM_UNMON, 862 (xdrproc_t) xdr_mon, &smmonid, 863 (xdrproc_t) xdr_sm_stat, &smstat, timo); 864 865 if (stat != RPC_SUCCESS) { 866 printf("Failed to contact local NSM - rpc error %d\n", stat); 867 return; 868 } 869 if (smstat.res_stat == stat_fail) { 870 printf("Local NSM refuses to unmonitor %s\n", 871 host->nh_caller_name); 872 return; 873 } 874 875 host->nh_monstate = NLM_UNMONITORED; 876} 877 878/* 879 * Register this NLM host with the local NSM so that we can be 880 * notified if it reboots. 881 */ 882static void 883nlm_host_monitor(struct nlm_host *host, int state) 884{ 885 mon smmon; 886 sm_stat_res smstat; 887 struct timeval timo; 888 enum clnt_stat stat; 889 890 if (state && !host->nh_state) { 891 /* 892 * This is the first time we have seen an NSM state 893 * value for this host. We record it here to help 894 * detect host reboots. 895 */ 896 host->nh_state = state; 897 if (nlm_debug_level >= 1) 898 printf("NLM: host %s (sysid %d) has NSM state %d\n", 899 host->nh_caller_name, host->nh_sysid, state); 900 } 901 902 if (host->nh_monstate != NLM_UNMONITORED) 903 return; 904 905 if (nlm_debug_level >= 1) 906 printf("NLM: monitoring %s (sysid %d)\n", 907 host->nh_caller_name, host->nh_sysid); 908 909 /* 910 * We put our assigned system ID value in the priv field to 911 * make it simpler to find the host if we are notified of a 912 * host restart. 913 */ 914 smmon.mon_id.mon_name = host->nh_caller_name; 915 smmon.mon_id.my_id.my_name = "localhost"; 916 smmon.mon_id.my_id.my_prog = NLM_PROG; 917 smmon.mon_id.my_id.my_vers = NLM_SM; 918 smmon.mon_id.my_id.my_proc = NLM_SM_NOTIFY; 919 memcpy(smmon.priv, &host->nh_sysid, sizeof(host->nh_sysid)); 920 921 timo.tv_sec = 25; 922 timo.tv_usec = 0; 923 stat = CLNT_CALL(nlm_nsm, SM_MON, 924 (xdrproc_t) xdr_mon, &smmon, 925 (xdrproc_t) xdr_sm_stat, &smstat, timo); 926 927 if (stat != RPC_SUCCESS) { 928 printf("Failed to contact local NSM - rpc error %d\n", stat); 929 return; 930 } 931 if (smstat.res_stat == stat_fail) { 932 printf("Local NSM refuses to monitor %s\n", 933 host->nh_caller_name); 934 host->nh_monstate = NLM_MONITOR_FAILED; 935 return; 936 } 937 938 host->nh_monstate = NLM_MONITORED; 939} 940 941/* 942 * Return an RPC client handle that can be used to talk to the NLM 943 * running on the given host. 944 */ 945CLIENT * 946nlm_host_get_rpc(struct nlm_host *host) 947{ 948 struct timeval zero; 949 950 /* 951 * We can't hold onto RPC handles for too long - the async 952 * call/reply protocol used by some NLM clients makes it hard 953 * to tell when they change port numbers (e.g. after a 954 * reboot). Note that if a client reboots while it isn't 955 * holding any locks, it won't bother to notify us. We 956 * expire the RPC handles after two minutes. 957 */ 958 if (host->nh_rpc && time_uptime > host->nh_rpc_create_time + 2*60) { 959 CLIENT *client; 960 client = host->nh_rpc; 961 host->nh_rpc = NULL; 962 CLNT_DESTROY(client); 963 } 964 965 if (host->nh_rpc) 966 return (host->nh_rpc); 967 968 /* 969 * Set the send timeout to zero - we only use this rpc handle 970 * for sending async replies which have no return value. 971 */ 972 host->nh_rpc = nlm_get_rpc((struct sockaddr *)&host->nh_addr, 973 NLM_PROG, host->nh_vers); 974 975 if (host->nh_rpc) { 976 zero.tv_sec = 0; 977 zero.tv_usec = 0; 978 CLNT_CONTROL(host->nh_rpc, CLSET_TIMEOUT, &zero); 979 980 host->nh_rpc_create_time = time_uptime; 981 } 982 983 return (host->nh_rpc); 984} 985 986/**********************************************************************/ 987 988/* 989 * Syscall interface with userland. 990 */ 991 992extern void nlm_prog_0(struct svc_req *rqstp, SVCXPRT *transp); 993extern void nlm_prog_1(struct svc_req *rqstp, SVCXPRT *transp); 994extern void nlm_prog_3(struct svc_req *rqstp, SVCXPRT *transp); 995extern void nlm_prog_4(struct svc_req *rqstp, SVCXPRT *transp); 996 997static int 998nlm_register_services(SVCPOOL *pool, int addr_count, char **addrs) 999{ 1000 static rpcvers_t versions[] = { 1001 NLM_SM, NLM_VERS, NLM_VERSX, NLM_VERS4 1002 }; 1003 static void (*dispatchers[])(struct svc_req *, SVCXPRT *) = { 1004 nlm_prog_0, nlm_prog_1, nlm_prog_3, nlm_prog_4 1005 }; 1006 static const int version_count = sizeof(versions) / sizeof(versions[0]); 1007 1008 SVCXPRT **xprts; 1009 char netid[16]; 1010 char uaddr[128]; 1011 struct netconfig *nconf; 1012 int i, j, error; 1013 1014 if (!addr_count) { 1015 printf("NLM: no service addresses given - can't start server"); 1016 return (EINVAL); 1017 } 1018 1019 xprts = malloc(addr_count * sizeof(SVCXPRT *), M_NLM, M_WAITOK); 1020 for (i = 0; i < version_count; i++) { 1021 for (j = 0; j < addr_count; j++) { 1022 /* 1023 * Create transports for the first version and 1024 * then just register everything else to the 1025 * same transports. 1026 */ 1027 if (i == 0) { 1028 char *up; 1029 1030 error = copyin(&addrs[2*j], &up, 1031 sizeof(char*)); 1032 if (error) 1033 goto out; 1034 error = copyinstr(up, netid, sizeof(netid), 1035 NULL); 1036 if (error) 1037 goto out; 1038 error = copyin(&addrs[2*j+1], &up, 1039 sizeof(char*)); 1040 if (error) 1041 goto out; 1042 error = copyinstr(up, uaddr, sizeof(uaddr), 1043 NULL); 1044 if (error) 1045 goto out; 1046 nconf = getnetconfigent(netid); 1047 if (!nconf) { 1048 printf("Can't lookup netid %s\n", 1049 netid); 1050 error = EINVAL; 1051 goto out; 1052 } 1053 xprts[j] = svc_tp_create(pool, dispatchers[i], 1054 NLM_PROG, versions[i], uaddr, nconf); 1055 if (!xprts[j]) { 1056 printf("NLM: unable to create " 1057 "(NLM_PROG, %d).\n", versions[i]); 1058 error = EINVAL; 1059 goto out; 1060 } 1061 freenetconfigent(nconf); 1062 } else { 1063 nconf = getnetconfigent(xprts[j]->xp_netid); 1064 rpcb_unset(NLM_PROG, versions[i], nconf); 1065 if (!svc_reg(xprts[j], NLM_PROG, versions[i], 1066 dispatchers[i], nconf)) { 1067 printf("NLM: can't register " 1068 "(NLM_PROG, %d)\n", versions[i]); 1069 error = EINVAL; 1070 goto out; 1071 } 1072 } 1073 } 1074 } 1075 error = 0; 1076out: 1077 free(xprts, M_NLM); 1078 return (error); 1079} 1080 1081/* 1082 * Main server entry point. Contacts the local NSM to get its current 1083 * state and send SM_UNMON_ALL. Registers the NLM services and then 1084 * services requests. Does not return until the server is interrupted 1085 * by a signal. 1086 */ 1087static int 1088nlm_server_main(int addr_count, char **addrs) 1089{ 1090 struct thread *td = curthread; 1091 int error; 1092 SVCPOOL *pool = NULL; 1093 struct sockopt opt; 1094 int portlow; 1095#ifdef INET6 1096 struct sockaddr_in6 sin6; 1097#endif 1098 struct sockaddr_in sin; 1099 my_id id; 1100 sm_stat smstat; 1101 struct timeval timo; 1102 enum clnt_stat stat; 1103 struct nlm_host *host; 1104 1105 if (nlm_socket) { 1106 printf("NLM: can't start server - it appears to be running already\n"); 1107 return (EPERM); 1108 } 1109 1110 memset(&opt, 0, sizeof(opt)); 1111 1112 nlm_socket = NULL; 1113 error = socreate(AF_INET, &nlm_socket, SOCK_DGRAM, 0, 1114 td->td_ucred, td); 1115 if (error) { 1116 printf("NLM: can't create IPv4 socket - error %d\n", error); 1117 return (error); 1118 } 1119 opt.sopt_dir = SOPT_SET; 1120 opt.sopt_level = IPPROTO_IP; 1121 opt.sopt_name = IP_PORTRANGE; 1122 portlow = IP_PORTRANGE_LOW; 1123 opt.sopt_val = &portlow; 1124 opt.sopt_valsize = sizeof(portlow); 1125 sosetopt(nlm_socket, &opt); 1126 1127#ifdef INET6 1128 nlm_socket6 = NULL; 1129 error = socreate(AF_INET6, &nlm_socket6, SOCK_DGRAM, 0, 1130 td->td_ucred, td); 1131 if (error) { 1132 printf("NLM: can't create IPv6 socket - error %d\n", error); 1133 return (error); 1134 } 1135 opt.sopt_dir = SOPT_SET; 1136 opt.sopt_level = IPPROTO_IPV6; 1137 opt.sopt_name = IPV6_PORTRANGE; 1138 portlow = IPV6_PORTRANGE_LOW; 1139 opt.sopt_val = &portlow; 1140 opt.sopt_valsize = sizeof(portlow); 1141 sosetopt(nlm_socket6, &opt); 1142#endif 1143 1144#ifdef INET6 1145 memset(&sin6, 0, sizeof(sin6)); 1146 sin6.sin6_len = sizeof(sin6); 1147 sin6.sin6_family = AF_INET6; 1148 sin6.sin6_addr = in6addr_loopback; 1149 nlm_nsm = nlm_get_rpc((struct sockaddr *) &sin6, SM_PROG, SM_VERS); 1150 if (!nlm_nsm) { 1151#endif 1152 memset(&sin, 0, sizeof(sin)); 1153 sin.sin_len = sizeof(sin); 1154 sin.sin_family = AF_INET; 1155 sin.sin_addr.s_addr = htonl(INADDR_LOOPBACK); 1156 nlm_nsm = nlm_get_rpc((struct sockaddr *) &sin, SM_PROG, 1157 SM_VERS); 1158#ifdef INET6 1159 } 1160#endif 1161 1162 if (!nlm_nsm) { 1163 printf("Can't start NLM - unable to contact NSM\n"); 1164 error = EINVAL; 1165 goto out; 1166 } 1167 1168 pool = svcpool_create(); 1169 1170 error = nlm_register_services(pool, addr_count, addrs); 1171 if (error) 1172 goto out; 1173 1174 memset(&id, 0, sizeof(id)); 1175 id.my_name = "NFS NLM"; 1176 1177 timo.tv_sec = 25; 1178 timo.tv_usec = 0; 1179 stat = CLNT_CALL(nlm_nsm, SM_UNMON_ALL, 1180 (xdrproc_t) xdr_my_id, &id, 1181 (xdrproc_t) xdr_sm_stat, &smstat, timo); 1182 1183 if (stat != RPC_SUCCESS) { 1184 struct rpc_err err; 1185 1186 CLNT_GETERR(nlm_nsm, &err); 1187 printf("NLM: unexpected error contacting NSM, stat=%d, errno=%d\n", 1188 stat, err.re_errno); 1189 error = EINVAL; 1190 goto out; 1191 } 1192 1193 if (nlm_debug_level >= 1) 1194 printf("NLM: local NSM state is %d\n", smstat.state); 1195 1196 svc_run(pool); 1197 error = 0; 1198 1199out: 1200 if (pool) 1201 svcpool_destroy(pool); 1202 1203 /* 1204 * Trash all the existing state so that if the server 1205 * restarts, it gets a clean slate. 1206 */ 1207 while ((host = TAILQ_FIRST(&nlm_hosts)) != NULL) { 1208 nlm_host_notify(host, 0, TRUE); 1209 } 1210 if (nlm_nsm) { 1211 AUTH_DESTROY(nlm_nsm->cl_auth); 1212 CLNT_DESTROY(nlm_nsm); 1213 nlm_nsm = NULL; 1214 } 1215 if (nlm_lockd) { 1216 AUTH_DESTROY(nlm_lockd->cl_auth); 1217 CLNT_DESTROY(nlm_lockd); 1218 nlm_lockd = NULL; 1219 } 1220 1221 soclose(nlm_socket); 1222 nlm_socket = NULL; 1223#ifdef INET6 1224 soclose(nlm_socket6); 1225 nlm_socket6 = NULL; 1226#endif 1227 1228 return (error); 1229} 1230 1231int 1232nlm_syscall(struct thread *td, struct nlm_syscall_args *uap) 1233{ 1234 int error; 1235 1236#if __FreeBSD_version >= 700000 1237 error = priv_check(td, PRIV_NFS_LOCKD); 1238#else 1239 error = suser(td); 1240#endif 1241 if (error) 1242 return (error); 1243 1244 nlm_debug_level = uap->debug_level; 1245 nlm_grace_threshold = time_uptime + uap->grace_period; 1246 nlm_next_idle_check = time_uptime + NLM_IDLE_PERIOD; 1247 1248 return nlm_server_main(uap->addr_count, uap->addrs); 1249} 1250 1251/**********************************************************************/ 1252 1253/* 1254 * NLM implementation details, called from the RPC stubs. 1255 */ 1256 1257 1258void 1259nlm_sm_notify(struct nlm_sm_status *argp) 1260{ 1261 uint32_t sysid; 1262 struct nlm_host *host; 1263 1264 if (nlm_debug_level >= 3) 1265 printf("nlm_sm_notify(): mon_name = %s\n", argp->mon_name); 1266 memcpy(&sysid, &argp->priv, sizeof(sysid)); 1267 host = nlm_find_host_by_sysid(sysid); 1268 if (host) 1269 nlm_host_notify(host, argp->state, FALSE); 1270} 1271 1272static void 1273nlm_convert_to_fhandle_t(fhandle_t *fhp, struct netobj *p) 1274{ 1275 memcpy(fhp, p->n_bytes, sizeof(fhandle_t)); 1276} 1277 1278struct vfs_state { 1279 struct mount *vs_mp; 1280 struct vnode *vs_vp; 1281 int vs_vfslocked; 1282 int vs_vnlocked; 1283}; 1284 1285static int 1286nlm_get_vfs_state(struct nlm_host *host, struct svc_req *rqstp, 1287 fhandle_t *fhp, struct vfs_state *vs) 1288{ 1289 int error, exflags, freecred; 1290 struct ucred *cred = NULL, *credanon; 1291 1292 memset(vs, 0, sizeof(*vs)); 1293 freecred = FALSE; 1294 1295 vs->vs_mp = vfs_getvfs(&fhp->fh_fsid); 1296 if (!vs->vs_mp) { 1297 return (ESTALE); 1298 } 1299 vs->vs_vfslocked = VFS_LOCK_GIANT(vs->vs_mp); 1300 1301 error = VFS_CHECKEXP(vs->vs_mp, (struct sockaddr *)&host->nh_addr, 1302 &exflags, &credanon); 1303 if (error) 1304 goto out; 1305 1306 if (exflags & MNT_EXRDONLY || (vs->vs_mp->mnt_flag & MNT_RDONLY)) { 1307 error = EROFS; 1308 goto out; 1309 } 1310 1311 error = VFS_FHTOVP(vs->vs_mp, &fhp->fh_fid, &vs->vs_vp); 1312 if (error) 1313 goto out; 1314 vs->vs_vnlocked = TRUE; 1315 1316 cred = crget(); 1317 freecred = TRUE; 1318 if (!svc_getcred(rqstp, cred, NULL)) { 1319 error = EINVAL; 1320 goto out; 1321 } 1322 if (cred->cr_uid == 0 || (exflags & MNT_EXPORTANON)) { 1323 crfree(cred); 1324 cred = credanon; 1325 freecred = FALSE; 1326 } 1327 1328 /* 1329 * Check cred. 1330 */ 1331 error = VOP_ACCESS(vs->vs_vp, VWRITE, cred, curthread); 1332 if (error) 1333 goto out; 1334 1335#if __FreeBSD_version < 800011 1336 VOP_UNLOCK(vs->vs_vp, 0, curthread); 1337#else 1338 VOP_UNLOCK(vs->vs_vp, 0); 1339#endif 1340 vs->vs_vnlocked = FALSE; 1341 1342out: 1343 if (freecred) 1344 crfree(cred); 1345 1346 return (error); 1347} 1348 1349static void 1350nlm_release_vfs_state(struct vfs_state *vs) 1351{ 1352 1353 if (vs->vs_vp) { 1354 if (vs->vs_vnlocked) 1355 vput(vs->vs_vp); 1356 else 1357 vrele(vs->vs_vp); 1358 } 1359 if (vs->vs_mp) 1360 vfs_rel(vs->vs_mp); 1361 VFS_UNLOCK_GIANT(vs->vs_vfslocked); 1362} 1363 1364static nlm4_stats 1365nlm_convert_error(int error) 1366{ 1367 1368 if (error == ESTALE) 1369 return nlm4_stale_fh; 1370 else if (error == EROFS) 1371 return nlm4_rofs; 1372 else 1373 return nlm4_failed; 1374} 1375 1376struct nlm_host * 1377nlm_do_test(nlm4_testargs *argp, nlm4_testres *result, struct svc_req *rqstp) 1378{ 1379 fhandle_t fh; 1380 struct vfs_state vs; 1381 struct nlm_host *host, *bhost; 1382 int error, sysid; 1383 struct flock fl; 1384 1385 memset(result, 0, sizeof(*result)); 1386 1387 host = nlm_find_host_by_name(argp->alock.caller_name, rqstp); 1388 if (!host) { 1389 result->stat.stat = nlm4_denied_nolocks; 1390 return (NULL); 1391 } 1392 1393 if (nlm_debug_level >= 3) 1394 printf("nlm_do_test(): caller_name = %s (sysid = %d)\n", 1395 host->nh_caller_name, host->nh_sysid); 1396 1397 nlm_free_finished_locks(host); 1398 sysid = host->nh_sysid; 1399 1400 nlm_convert_to_fhandle_t(&fh, &argp->alock.fh); 1401 nlm_copy_netobj(&result->cookie, &argp->cookie, M_RPC); 1402 1403 if (time_uptime < nlm_grace_threshold) { 1404 result->stat.stat = nlm4_denied_grace_period; 1405 return (host); 1406 } 1407 1408 error = nlm_get_vfs_state(host, rqstp, &fh, &vs); 1409 if (error) { 1410 result->stat.stat = nlm_convert_error(error); 1411 goto out; 1412 } 1413 1414 fl.l_start = argp->alock.l_offset; 1415 fl.l_len = argp->alock.l_len; 1416 fl.l_pid = argp->alock.svid; 1417 fl.l_sysid = sysid; 1418 fl.l_whence = SEEK_SET; 1419 if (argp->exclusive) 1420 fl.l_type = F_WRLCK; 1421 else 1422 fl.l_type = F_RDLCK; 1423 error = VOP_ADVLOCK(vs.vs_vp, NULL, F_GETLK, &fl, F_REMOTE); 1424 if (error) { 1425 result->stat.stat = nlm4_failed; 1426 goto out; 1427 } 1428 1429 if (fl.l_type == F_UNLCK) { 1430 result->stat.stat = nlm4_granted; 1431 } else { 1432 result->stat.stat = nlm4_denied; 1433 result->stat.nlm4_testrply_u.holder.exclusive = 1434 (fl.l_type == F_WRLCK); 1435 result->stat.nlm4_testrply_u.holder.svid = fl.l_pid; 1436 bhost = nlm_find_host_by_sysid(fl.l_sysid); 1437 if (bhost) { 1438 /* 1439 * We don't have any useful way of recording 1440 * the value of oh used in the original lock 1441 * request. Ideally, the test reply would have 1442 * a space for the owning host's name allowing 1443 * our caller's NLM to keep track. 1444 * 1445 * As far as I can see, Solaris uses an eight 1446 * byte structure for oh which contains a four 1447 * byte pid encoded in local byte order and 1448 * the first four bytes of the host 1449 * name. Linux uses a variable length string 1450 * 'pid@hostname' in ascii but doesn't even 1451 * return that in test replies. 1452 * 1453 * For the moment, return nothing in oh 1454 * (already zero'ed above). 1455 */ 1456 } 1457 result->stat.nlm4_testrply_u.holder.l_offset = fl.l_start; 1458 result->stat.nlm4_testrply_u.holder.l_len = fl.l_len; 1459 } 1460 1461out: 1462 nlm_release_vfs_state(&vs); 1463 return (host); 1464} 1465 1466struct nlm_host * 1467nlm_do_lock(nlm4_lockargs *argp, nlm4_res *result, struct svc_req *rqstp, 1468 bool_t monitor) 1469{ 1470 fhandle_t fh; 1471 struct vfs_state vs; 1472 struct nlm_host *host; 1473 int error, sysid; 1474 struct flock fl; 1475 1476 memset(result, 0, sizeof(*result)); 1477 1478 host = nlm_find_host_by_name(argp->alock.caller_name, rqstp); 1479 if (!host) { 1480 result->stat.stat = nlm4_denied_nolocks; 1481 return (NULL); 1482 } 1483 1484 if (nlm_debug_level >= 3) 1485 printf("nlm_do_lock(): caller_name = %s (sysid = %d)\n", 1486 host->nh_caller_name, host->nh_sysid); 1487 1488 if (monitor && host->nh_state && argp->state 1489 && host->nh_state != argp->state) { 1490 /* 1491 * The host rebooted without telling us. Trash its 1492 * locks. 1493 */ 1494 nlm_host_notify(host, argp->state, FALSE); 1495 } 1496 1497 nlm_free_finished_locks(host); 1498 sysid = host->nh_sysid; 1499 1500 nlm_convert_to_fhandle_t(&fh, &argp->alock.fh); 1501 nlm_copy_netobj(&result->cookie, &argp->cookie, M_RPC); 1502 1503 if (time_uptime < nlm_grace_threshold && !argp->reclaim) { 1504 result->stat.stat = nlm4_denied_grace_period; 1505 return (host); 1506 } 1507 1508 error = nlm_get_vfs_state(host, rqstp, &fh, &vs); 1509 if (error) { 1510 result->stat.stat = nlm_convert_error(error); 1511 goto out; 1512 } 1513 1514 fl.l_start = argp->alock.l_offset; 1515 fl.l_len = argp->alock.l_len; 1516 fl.l_pid = argp->alock.svid; 1517 fl.l_sysid = sysid; 1518 fl.l_whence = SEEK_SET; 1519 if (argp->exclusive) 1520 fl.l_type = F_WRLCK; 1521 else 1522 fl.l_type = F_RDLCK; 1523 if (argp->block) { 1524 struct nlm_async_lock *af; 1525 1526 /* 1527 * First, make sure we can contact the host's NLM. 1528 */ 1529 if (!nlm_host_get_rpc(host)) { 1530 result->stat.stat = nlm4_failed; 1531 goto out; 1532 } 1533 1534 /* 1535 * First we need to check and see if there is an 1536 * existing blocked lock that matches. This could be a 1537 * badly behaved client or an RPC re-send. If we find 1538 * one, just return nlm4_blocked. 1539 */ 1540 mtx_lock(&host->nh_lock); 1541 TAILQ_FOREACH(af, &host->nh_pending, af_link) { 1542 if (af->af_fl.l_start == fl.l_start 1543 && af->af_fl.l_len == fl.l_len 1544 && af->af_fl.l_pid == fl.l_pid 1545 && af->af_fl.l_type == fl.l_type) { 1546 break; 1547 } 1548 } 1549 mtx_unlock(&host->nh_lock); 1550 if (af) { 1551 result->stat.stat = nlm4_blocked; 1552 goto out; 1553 } 1554 1555 af = malloc(sizeof(struct nlm_async_lock), M_NLM, 1556 M_WAITOK|M_ZERO); 1557 TASK_INIT(&af->af_task, 0, nlm_lock_callback, af); 1558 af->af_vp = vs.vs_vp; 1559 af->af_fl = fl; 1560 af->af_host = host; 1561 /* 1562 * We use M_RPC here so that we can xdr_free the thing 1563 * later. 1564 */ 1565 af->af_granted.exclusive = argp->exclusive; 1566 af->af_granted.alock.caller_name = 1567 strdup(argp->alock.caller_name, M_RPC); 1568 nlm_copy_netobj(&af->af_granted.alock.fh, 1569 &argp->alock.fh, M_RPC); 1570 nlm_copy_netobj(&af->af_granted.alock.oh, 1571 &argp->alock.oh, M_RPC); 1572 af->af_granted.alock.svid = argp->alock.svid; 1573 af->af_granted.alock.l_offset = argp->alock.l_offset; 1574 af->af_granted.alock.l_len = argp->alock.l_len; 1575 1576 /* 1577 * Put the entry on the pending list before calling 1578 * VOP_ADVLOCKASYNC. We do this in case the lock 1579 * request was blocked (returning EINPROGRESS) but 1580 * then granted before we manage to run again. The 1581 * client may receive the granted message before we 1582 * send our blocked reply but thats their problem. 1583 */ 1584 mtx_lock(&host->nh_lock); 1585 TAILQ_INSERT_TAIL(&host->nh_pending, af, af_link); 1586 mtx_unlock(&host->nh_lock); 1587 1588 error = VOP_ADVLOCKASYNC(vs.vs_vp, NULL, F_SETLK, &fl, F_REMOTE, 1589 &af->af_task, &af->af_cookie); 1590 1591 /* 1592 * If the lock completed synchronously, just free the 1593 * tracking structure now. 1594 */ 1595 if (error != EINPROGRESS) { 1596 mtx_lock(&host->nh_lock); 1597 TAILQ_REMOVE(&host->nh_pending, af, af_link); 1598 mtx_unlock(&host->nh_lock); 1599 xdr_free((xdrproc_t) xdr_nlm4_testargs, 1600 &af->af_granted); 1601 free(af, M_NLM); 1602 } else { 1603 if (nlm_debug_level >= 2) 1604 printf("NLM: pending async lock %p for %s " 1605 "(sysid %d)\n", 1606 af, host->nh_caller_name, sysid); 1607 /* 1608 * Don't vrele the vnode just yet - this must 1609 * wait until either the async callback 1610 * happens or the lock is cancelled. 1611 */ 1612 vs.vs_vp = NULL; 1613 } 1614 } else { 1615 error = VOP_ADVLOCK(vs.vs_vp, NULL, F_SETLK, &fl, F_REMOTE); 1616 } 1617 1618 if (error) { 1619 if (error == EINPROGRESS) { 1620 result->stat.stat = nlm4_blocked; 1621 } else if (error == EDEADLK) { 1622 result->stat.stat = nlm4_deadlck; 1623 } else if (error == EAGAIN) { 1624 result->stat.stat = nlm4_denied; 1625 } else { 1626 result->stat.stat = nlm4_failed; 1627 } 1628 } else { 1629 if (monitor) 1630 nlm_host_monitor(host, argp->state); 1631 result->stat.stat = nlm4_granted; 1632 } 1633 1634out: 1635 nlm_release_vfs_state(&vs); 1636 1637 return (host); 1638} 1639 1640struct nlm_host * 1641nlm_do_cancel(nlm4_cancargs *argp, nlm4_res *result, struct svc_req *rqstp) 1642{ 1643 fhandle_t fh; 1644 struct vfs_state vs; 1645 struct nlm_host *host; 1646 int error, sysid; 1647 struct flock fl; 1648 struct nlm_async_lock *af; 1649 1650 memset(result, 0, sizeof(*result)); 1651 1652 host = nlm_find_host_by_name(argp->alock.caller_name, rqstp); 1653 if (!host) { 1654 result->stat.stat = nlm4_denied_nolocks; 1655 return (NULL); 1656 } 1657 1658 if (nlm_debug_level >= 3) 1659 printf("nlm_do_cancel(): caller_name = %s (sysid = %d)\n", 1660 host->nh_caller_name, host->nh_sysid); 1661 1662 nlm_free_finished_locks(host); 1663 sysid = host->nh_sysid; 1664 1665 nlm_convert_to_fhandle_t(&fh, &argp->alock.fh); 1666 nlm_copy_netobj(&result->cookie, &argp->cookie, M_RPC); 1667 1668 if (time_uptime < nlm_grace_threshold) { 1669 result->stat.stat = nlm4_denied_grace_period; 1670 return (host); 1671 } 1672 1673 error = nlm_get_vfs_state(host, rqstp, &fh, &vs); 1674 if (error) { 1675 result->stat.stat = nlm_convert_error(error); 1676 goto out; 1677 } 1678 1679 fl.l_start = argp->alock.l_offset; 1680 fl.l_len = argp->alock.l_len; 1681 fl.l_pid = argp->alock.svid; 1682 fl.l_sysid = sysid; 1683 fl.l_whence = SEEK_SET; 1684 if (argp->exclusive) 1685 fl.l_type = F_WRLCK; 1686 else 1687 fl.l_type = F_RDLCK; 1688 1689 /* 1690 * First we need to try and find the async lock request - if 1691 * there isn't one, we give up and return nlm4_denied. 1692 */ 1693 mtx_lock(&host->nh_lock); 1694 1695 TAILQ_FOREACH(af, &host->nh_pending, af_link) { 1696 if (af->af_fl.l_start == fl.l_start 1697 && af->af_fl.l_len == fl.l_len 1698 && af->af_fl.l_pid == fl.l_pid 1699 && af->af_fl.l_type == fl.l_type) { 1700 break; 1701 } 1702 } 1703 1704 if (!af) { 1705 mtx_unlock(&host->nh_lock); 1706 result->stat.stat = nlm4_denied; 1707 goto out; 1708 } 1709 1710 error = nlm_cancel_async_lock(af); 1711 1712 if (error) { 1713 result->stat.stat = nlm4_denied; 1714 } else { 1715 result->stat.stat = nlm4_granted; 1716 } 1717 1718 mtx_unlock(&host->nh_lock); 1719 1720out: 1721 nlm_release_vfs_state(&vs); 1722 1723 return (host); 1724} 1725 1726struct nlm_host * 1727nlm_do_unlock(nlm4_unlockargs *argp, nlm4_res *result, struct svc_req *rqstp) 1728{ 1729 fhandle_t fh; 1730 struct vfs_state vs; 1731 struct nlm_host *host; 1732 int error, sysid; 1733 struct flock fl; 1734 1735 memset(result, 0, sizeof(*result)); 1736 1737 host = nlm_find_host_by_name(argp->alock.caller_name, rqstp); 1738 if (!host) { 1739 result->stat.stat = nlm4_denied_nolocks; 1740 return (NULL); 1741 } 1742 1743 if (nlm_debug_level >= 3) 1744 printf("nlm_do_unlock(): caller_name = %s (sysid = %d)\n", 1745 host->nh_caller_name, host->nh_sysid); 1746 1747 nlm_free_finished_locks(host); 1748 sysid = host->nh_sysid; 1749 1750 nlm_convert_to_fhandle_t(&fh, &argp->alock.fh); 1751 nlm_copy_netobj(&result->cookie, &argp->cookie, M_RPC); 1752 1753 if (time_uptime < nlm_grace_threshold) { 1754 result->stat.stat = nlm4_denied_grace_period; 1755 return (host); 1756 } 1757 1758 error = nlm_get_vfs_state(host, rqstp, &fh, &vs); 1759 if (error) { 1760 result->stat.stat = nlm_convert_error(error); 1761 goto out; 1762 } 1763 1764 fl.l_start = argp->alock.l_offset; 1765 fl.l_len = argp->alock.l_len; 1766 fl.l_pid = argp->alock.svid; 1767 fl.l_sysid = sysid; 1768 fl.l_whence = SEEK_SET; 1769 fl.l_type = F_UNLCK; 1770 error = VOP_ADVLOCK(vs.vs_vp, NULL, F_UNLCK, &fl, F_REMOTE); 1771 1772 /* 1773 * Ignore the error - there is no result code for failure, 1774 * only for grace period. 1775 */ 1776 result->stat.stat = nlm4_granted; 1777 1778out: 1779 nlm_release_vfs_state(&vs); 1780 1781 return (host); 1782} 1783 1784void 1785nlm_do_free_all(nlm4_notify *argp) 1786{ 1787 struct nlm_host *host, *thost; 1788 1789 TAILQ_FOREACH_SAFE(host, &nlm_hosts, nh_link, thost) { 1790 if (!strcmp(host->nh_caller_name, argp->name)) 1791 nlm_host_notify(host, argp->state, FALSE); 1792 } 1793} 1794 1795#define _PATH_RPCLOCKDSOCK "/var/run/rpclockd.sock" 1796 1797/* 1798 * Make a connection to the userland lockd - we push anything we can't 1799 * handle out to userland. 1800 */ 1801CLIENT * 1802nlm_user_lockd(void) 1803{ 1804 struct sockaddr_un sun; 1805 struct netconfig *nconf; 1806 struct timeval zero; 1807 1808 if (nlm_lockd) 1809 return (nlm_lockd); 1810 1811 sun.sun_family = AF_LOCAL; 1812 strcpy(sun.sun_path, _PATH_RPCLOCKDSOCK); 1813 sun.sun_len = SUN_LEN(&sun); 1814 1815 nconf = getnetconfigent("local"); 1816 nlm_lockd = clnt_reconnect_create(nconf, (struct sockaddr *) &sun, 1817 NLM_PROG, NLM_VERS4, RPC_MAXDATASIZE, RPC_MAXDATASIZE); 1818 1819 /* 1820 * Set the send timeout to zero - we only use this rpc handle 1821 * for sending async replies which have no return value. 1822 */ 1823 zero.tv_sec = 0; 1824 zero.tv_usec = 0; 1825 CLNT_CONTROL(nlm_lockd, CLSET_TIMEOUT, &zero); 1826 1827 return (nlm_lockd); 1828} 1829 1830/* 1831 * Kernel module glue 1832 */ 1833static int 1834nfslockd_modevent(module_t mod, int type, void *data) 1835{ 1836 1837 return (0); 1838} 1839static moduledata_t nfslockd_mod = { 1840 "nfslockd", 1841 nfslockd_modevent, 1842 NULL, 1843}; 1844DECLARE_MODULE(nfslockd, nfslockd_mod, SI_SUB_VFS, SI_ORDER_ANY); 1845 1846/* So that loader and kldload(2) can find us, wherever we are.. */ 1847MODULE_DEPEND(nfslockd, krpc, 1, 1, 1); 1848MODULE_VERSION(nfslockd, 1); 1849