nlm_prot_impl.c revision 179425
1/*- 2 * Copyright (c) 2008 Isilon Inc http://www.isilon.com/ 3 * Authors: Doug Rabson <dfr@rabson.org> 4 * Developed with Red Inc: Alfred Perlstein <alfred@freebsd.org> 5 * 6 * Redistribution and use in source and binary forms, with or without 7 * modification, are permitted provided that the following conditions 8 * are met: 9 * 1. Redistributions of source code must retain the above copyright 10 * notice, this list of conditions and the following disclaimer. 11 * 2. Redistributions in binary form must reproduce the above copyright 12 * notice, this list of conditions and the following disclaimer in the 13 * documentation and/or other materials provided with the distribution. 14 * 15 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND 16 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 17 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 18 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE 19 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 20 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 21 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 22 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 23 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 24 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 25 * SUCH DAMAGE. 26 */ 27 28#include "opt_inet6.h" 29 30#include <sys/cdefs.h> 31__FBSDID("$FreeBSD: head/sys/nlm/nlm_prot_impl.c 179425 2008-05-30 09:34:08Z dfr $"); 32 33#include <sys/param.h> 34#include <sys/fcntl.h> 35#include <sys/kernel.h> 36#include <sys/lockf.h> 37#include <sys/malloc.h> 38#include <sys/mount.h> 39#if __FreeBSD_version >= 700000 40#include <sys/priv.h> 41#endif 42#include <sys/proc.h> 43#include <sys/socket.h> 44#include <sys/socketvar.h> 45#include <sys/syscall.h> 46#include <sys/sysctl.h> 47#include <sys/sysent.h> 48#include <sys/sysproto.h> 49#include <sys/systm.h> 50#include <sys/taskqueue.h> 51#include <sys/unistd.h> 52#include <sys/vnode.h> 53 54#include <nlm/nlm_prot.h> 55#include <nlm/sm_inter.h> 56#include <nlm/nlm.h> 57#include <rpc/rpc_com.h> 58#include <rpc/rpcb_prot.h> 59 60MALLOC_DEFINE(M_NLM, "NLM", "Network Lock Manager"); 61 62/* 63 * If a host is inactive (and holds no locks) for this amount of 64 * seconds, we consider it idle and stop tracking it. 65 */ 66#define NLM_IDLE_TIMEOUT 30 67 68/* 69 * We check the host list for idle every few seconds. 70 */ 71#define NLM_IDLE_PERIOD 5 72 73/* 74 * Support for sysctl vfs.nlm.sysid 75 */ 76SYSCTL_NODE(_vfs, OID_AUTO, nlm, CTLFLAG_RW, NULL, "Network Lock Manager"); 77SYSCTL_NODE(_vfs_nlm, OID_AUTO, sysid, CTLFLAG_RW, NULL, ""); 78 79/* 80 * Syscall hooks 81 */ 82static int nlm_syscall_offset = SYS_nlm_syscall; 83static struct sysent nlm_syscall_prev_sysent; 84#if __FreeBSD_version < 700000 85static struct sysent nlm_syscall_sysent = { 86 (sizeof(struct nlm_syscall_args) / sizeof(register_t)) | SYF_MPSAFE, 87 (sy_call_t *) nlm_syscall 88}; 89#else 90MAKE_SYSENT(nlm_syscall); 91#endif 92static bool_t nlm_syscall_registered = FALSE; 93 94/* 95 * Debug level passed in from userland. We also support a sysctl hook 96 * so that it can be changed on a live system. 97 */ 98static int nlm_debug_level; 99SYSCTL_INT(_debug, OID_AUTO, nlm_debug, CTLFLAG_RW, &nlm_debug_level, 0, ""); 100 101/* 102 * Grace period handling. The value of nlm_grace_threshold is the 103 * value of time_uptime after which we are serving requests normally. 104 */ 105static time_t nlm_grace_threshold; 106 107/* 108 * We check for idle hosts if time_uptime is greater than 109 * nlm_next_idle_check, 110 */ 111static time_t nlm_next_idle_check; 112 113/* 114 * A socket to use for RPC - shared by all IPv4 RPC clients. 115 */ 116static struct socket *nlm_socket; 117 118#ifdef INET6 119 120/* 121 * A socket to use for RPC - shared by all IPv6 RPC clients. 122 */ 123static struct socket *nlm_socket6; 124 125#endif 126 127/* 128 * An RPC client handle that can be used to communicate with the local 129 * NSM. 130 */ 131static CLIENT *nlm_nsm; 132 133/* 134 * An RPC client handle that can be used to communicate with the 135 * userland part of lockd. 136 */ 137static CLIENT *nlm_lockd; 138 139/* 140 * Locks: 141 * (l) locked by nh_lock 142 * (s) only accessed via server RPC which is single threaded 143 * (c) const until freeing 144 */ 145 146/* 147 * A pending asynchronous lock request, stored on the nh_pending list 148 * of the NLM host. 149 */ 150struct nlm_async_lock { 151 TAILQ_ENTRY(nlm_async_lock) af_link; /* (l) host's list of locks */ 152 struct task af_task; /* (c) async callback details */ 153 void *af_cookie; /* (l) lock manager cancel token */ 154 struct vnode *af_vp; /* (l) vnode to lock */ 155 struct flock af_fl; /* (c) lock details */ 156 struct nlm_host *af_host; /* (c) host which is locking */ 157 nlm4_testargs af_granted; /* (c) notification details */ 158}; 159TAILQ_HEAD(nlm_async_lock_list, nlm_async_lock); 160 161/* 162 * NLM host. 163 */ 164enum nlm_host_state { 165 NLM_UNMONITORED, 166 NLM_MONITORED, 167 NLM_MONITOR_FAILED 168}; 169struct nlm_host { 170 struct mtx nh_lock; 171 TAILQ_ENTRY(nlm_host) nh_link; /* (s) global list of hosts */ 172 char *nh_caller_name; /* (c) printable name of host */ 173 uint32_t nh_sysid; /* (c) our allocaed system ID */ 174 char nh_sysid_string[10]; /* (c) string rep. of sysid */ 175 struct sockaddr_storage nh_addr; /* (s) remote address of host */ 176 CLIENT *nh_rpc; /* (s) RPC handle to send to host */ 177 rpcvers_t nh_vers; /* (s) NLM version of host */ 178 int nh_state; /* (s) last seen NSM state of host */ 179 enum nlm_host_state nh_monstate; /* (s) local NSM monitoring state */ 180 time_t nh_idle_timeout; /* (s) Time at which host is idle */ 181 time_t nh_rpc_create_time; /* (s) Time we create RPC client */ 182 struct sysctl_ctx_list nh_sysctl; /* (c) vfs.nlm.sysid nodes */ 183 struct nlm_async_lock_list nh_pending; /* (l) pending async locks */ 184 struct nlm_async_lock_list nh_finished; /* (l) finished async locks */ 185}; 186TAILQ_HEAD(nlm_host_list, nlm_host); 187 188static struct nlm_host_list nlm_hosts; 189static uint32_t nlm_next_sysid = 1; 190 191static void nlm_host_unmonitor(struct nlm_host *); 192 193/**********************************************************************/ 194 195/* 196 * Initialise NLM globals. 197 */ 198static void 199nlm_init(void *dummy) 200{ 201 int error; 202 203 TAILQ_INIT(&nlm_hosts); 204 205 error = syscall_register(&nlm_syscall_offset, &nlm_syscall_sysent, 206 &nlm_syscall_prev_sysent); 207 if (error) 208 printf("Can't register NLM syscall\n"); 209 else 210 nlm_syscall_registered = TRUE; 211} 212SYSINIT(nlm_init, SI_SUB_LOCK, SI_ORDER_FIRST, nlm_init, NULL); 213 214static void 215nlm_uninit(void *dummy) 216{ 217 218 if (nlm_syscall_registered) 219 syscall_deregister(&nlm_syscall_offset, 220 &nlm_syscall_prev_sysent); 221} 222SYSUNINIT(nlm_uninit, SI_SUB_LOCK, SI_ORDER_FIRST, nlm_uninit, NULL); 223 224/* 225 * Copy a struct netobj. 226 */ 227void 228nlm_copy_netobj(struct netobj *dst, struct netobj *src, 229 struct malloc_type *type) 230{ 231 232 dst->n_len = src->n_len; 233 dst->n_bytes = malloc(src->n_len, type, M_WAITOK); 234 memcpy(dst->n_bytes, src->n_bytes, src->n_len); 235} 236 237/* 238 * Create an RPC client handle for the given (address,prog,vers) 239 * triple using UDP. 240 */ 241static CLIENT * 242nlm_get_rpc(struct sockaddr *sa, rpcprog_t prog, rpcvers_t vers) 243{ 244 const char *wchan = "nlmrcv"; 245 const char* protofmly; 246 struct sockaddr_storage ss; 247 struct socket *so; 248 CLIENT *rpcb; 249 struct timeval timo; 250 RPCB parms; 251 char *uaddr; 252 enum clnt_stat stat; 253 int rpcvers; 254 255 /* 256 * First we need to contact the remote RPCBIND service to find 257 * the right port. 258 */ 259 memcpy(&ss, sa, sa->sa_len); 260 switch (ss.ss_family) { 261 case AF_INET: 262 ((struct sockaddr_in *)&ss)->sin_port = htons(111); 263 protofmly = "inet"; 264 so = nlm_socket; 265 break; 266 267#ifdef INET6 268 case AF_INET6: 269 ((struct sockaddr_in6 *)&ss)->sin6_port = htons(111); 270 protofmly = "inet6"; 271 so = nlm_socket6; 272 break; 273#endif 274 275 default: 276 /* 277 * Unsupported address family - fail. 278 */ 279 return (NULL); 280 } 281 282 rpcb = clnt_dg_create(so, (struct sockaddr *)&ss, 283 RPCBPROG, RPCBVERS4, 0, 0); 284 if (!rpcb) 285 return (NULL); 286 287 parms.r_prog = prog; 288 parms.r_vers = vers; 289 parms.r_netid = "udp"; 290 parms.r_addr = ""; 291 parms.r_owner = ""; 292 293 /* 294 * Use the default timeout. 295 */ 296 timo.tv_sec = 25; 297 timo.tv_usec = 0; 298again: 299 uaddr = NULL; 300 stat = CLNT_CALL(rpcb, (rpcprog_t) RPCBPROC_GETADDR, 301 (xdrproc_t) xdr_rpcb, &parms, 302 (xdrproc_t) xdr_wrapstring, &uaddr, timo); 303 if (stat == RPC_PROGVERSMISMATCH) { 304 /* 305 * Try RPCBIND version 3 if we haven't already. 306 * 307 * XXX fall back to portmap? 308 */ 309 CLNT_CONTROL(rpcb, CLGET_VERS, &rpcvers); 310 if (rpcvers == RPCBVERS4) { 311 rpcvers = RPCBVERS; 312 CLNT_CONTROL(rpcb, CLSET_VERS, &rpcvers); 313 goto again; 314 } 315 } 316 317 if (stat == RPC_SUCCESS) { 318 /* 319 * We have a reply from the remote RPCBIND - turn it into an 320 * appropriate address and make a new client that can talk to 321 * the remote NLM. 322 * 323 * XXX fixup IPv6 scope ID. 324 */ 325 struct netbuf *a; 326 a = __rpc_uaddr2taddr_af(ss.ss_family, uaddr); 327 if (!a) { 328 CLNT_DESTROY(rpcb); 329 return (NULL); 330 } 331 memcpy(&ss, a->buf, a->len); 332 free(a->buf, M_RPC); 333 free(a, M_RPC); 334 xdr_free((xdrproc_t) xdr_wrapstring, &uaddr); 335 } else if (stat == RPC_PROGVERSMISMATCH) { 336 /* 337 * Try portmap. 338 */ 339 struct pmap mapping; 340 u_short port; 341 342 rpcvers = PMAPVERS; 343 CLNT_CONTROL(rpcb, CLSET_VERS, &rpcvers); 344 345 mapping.pm_prog = parms.r_prog; 346 mapping.pm_vers = parms.r_vers; 347 mapping.pm_prot = IPPROTO_UDP; 348 mapping.pm_port = 0; 349 350 stat = CLNT_CALL(rpcb, (rpcprog_t) PMAPPROC_GETPORT, 351 (xdrproc_t) xdr_pmap, &mapping, 352 (xdrproc_t) xdr_u_short, &port, timo); 353 354 if (stat == RPC_SUCCESS) { 355 switch (ss.ss_family) { 356 case AF_INET: 357 ((struct sockaddr_in *)&ss)->sin_port = 358 htons(port); 359 break; 360 361#ifdef INET6 362 case AF_INET6: 363 ((struct sockaddr_in6 *)&ss)->sin6_port = 364 htons(port); 365 break; 366#endif 367 } 368 } 369 } 370 if (stat != RPC_SUCCESS) { 371 printf("NLM: failed to contact remote rpcbind, stat = %d\n", 372 (int) stat); 373 CLNT_DESTROY(rpcb); 374 return (NULL); 375 } 376 377 /* 378 * Re-use the client we used to speak to rpcbind. 379 */ 380 CLNT_CONTROL(rpcb, CLSET_SVC_ADDR, &ss); 381 CLNT_CONTROL(rpcb, CLSET_PROG, &prog); 382 CLNT_CONTROL(rpcb, CLSET_VERS, &vers); 383 CLNT_CONTROL(rpcb, CLSET_WAITCHAN, &wchan); 384 rpcb->cl_auth = authunix_create(curthread->td_ucred); 385 386 return (rpcb); 387} 388 389/* 390 * This async callback after when an async lock request has been 391 * granted. We notify the host which initiated the request. 392 */ 393static void 394nlm_lock_callback(void *arg, int pending) 395{ 396 struct nlm_async_lock *af = (struct nlm_async_lock *) arg; 397 398 if (nlm_debug_level >= 2) 399 printf("NLM: async lock %p for %s (sysid %d) granted\n", 400 af, af->af_host->nh_caller_name, 401 af->af_host->nh_sysid); 402 403 /* 404 * Send the results back to the host. 405 * 406 * Note: there is a possible race here with nlm_host_notify 407 * destroying the RPC client. To avoid problems, the first 408 * thing nlm_host_notify does is to cancel pending async lock 409 * requests. 410 */ 411 if (af->af_host->nh_vers == NLM_VERS4) { 412 nlm4_granted_msg_4(&af->af_granted, 413 NULL, af->af_host->nh_rpc); 414 } else { 415 /* 416 * Back-convert to legacy protocol 417 */ 418 nlm_testargs granted; 419 granted.cookie = af->af_granted.cookie; 420 granted.exclusive = af->af_granted.exclusive; 421 granted.alock.caller_name = 422 af->af_granted.alock.caller_name; 423 granted.alock.fh = af->af_granted.alock.fh; 424 granted.alock.oh = af->af_granted.alock.oh; 425 granted.alock.svid = af->af_granted.alock.svid; 426 granted.alock.l_offset = 427 af->af_granted.alock.l_offset; 428 granted.alock.l_len = 429 af->af_granted.alock.l_len; 430 431 nlm_granted_msg_1(&granted, 432 NULL, af->af_host->nh_rpc); 433 } 434 435 /* 436 * Move this entry to the nh_finished list. Someone else will 437 * free it later - its too hard to do it here safely without 438 * racing with cancel. 439 * 440 * XXX possibly we should have a third "granted sent but not 441 * ack'ed" list so that we can re-send the granted message. 442 */ 443 mtx_lock(&af->af_host->nh_lock); 444 TAILQ_REMOVE(&af->af_host->nh_pending, af, af_link); 445 TAILQ_INSERT_TAIL(&af->af_host->nh_finished, af, af_link); 446 mtx_unlock(&af->af_host->nh_lock); 447} 448 449/* 450 * Free an async lock request. The request must have been removed from 451 * any list. 452 */ 453static void 454nlm_free_async_lock(struct nlm_async_lock *af) 455{ 456 /* 457 * Free an async lock. 458 */ 459 xdr_free((xdrproc_t) xdr_nlm4_testargs, &af->af_granted); 460 if (af->af_vp) 461 vrele(af->af_vp); 462 free(af, M_NLM); 463} 464 465/* 466 * Cancel our async request - this must be called with 467 * af->nh_host->nh_lock held. This is slightly complicated by a 468 * potential race with our own callback. If we fail to cancel the 469 * lock, it must already have been granted - we make sure our async 470 * task has completed by calling taskqueue_drain in this case. 471 */ 472static int 473nlm_cancel_async_lock(struct nlm_async_lock *af) 474{ 475 struct nlm_host *host = af->af_host; 476 int error; 477 478 mtx_assert(&host->nh_lock, MA_OWNED); 479 480 mtx_unlock(&host->nh_lock); 481 482 error = VOP_ADVLOCKASYNC(af->af_vp, NULL, F_CANCEL, &af->af_fl, 483 F_REMOTE, NULL, &af->af_cookie); 484 485 if (error) { 486 /* 487 * We failed to cancel - make sure our callback has 488 * completed before we continue. 489 */ 490 taskqueue_drain(taskqueue_thread, &af->af_task); 491 } 492 493 mtx_lock(&host->nh_lock); 494 495 if (!error) { 496 if (nlm_debug_level >= 2) 497 printf("NLM: async lock %p for %s (sysid %d) " 498 "cancelled\n", 499 af, host->nh_caller_name, host->nh_sysid); 500 501 /* 502 * Remove from the nh_pending list and free now that 503 * we are safe from the callback. 504 */ 505 TAILQ_REMOVE(&host->nh_pending, af, af_link); 506 mtx_unlock(&host->nh_lock); 507 nlm_free_async_lock(af); 508 mtx_lock(&host->nh_lock); 509 } 510 511 return (error); 512} 513 514static void 515nlm_free_finished_locks(struct nlm_host *host) 516{ 517 struct nlm_async_lock *af; 518 519 mtx_lock(&host->nh_lock); 520 while ((af = TAILQ_FIRST(&host->nh_finished)) != NULL) { 521 TAILQ_REMOVE(&host->nh_finished, af, af_link); 522 mtx_unlock(&host->nh_lock); 523 nlm_free_async_lock(af); 524 mtx_lock(&host->nh_lock); 525 } 526 mtx_unlock(&host->nh_lock); 527} 528 529/* 530 * This is called when we receive a host state change 531 * notification. We unlock any active locks owned by the host. 532 */ 533static void 534nlm_host_notify(struct nlm_host *host, int newstate, bool_t destroy) 535{ 536 struct nlm_async_lock *af; 537 538 if (newstate) { 539 if (nlm_debug_level >= 1) 540 printf("NLM: host %s (sysid %d) rebooted, new " 541 "state is %d\n", 542 host->nh_caller_name, host->nh_sysid, newstate); 543 } 544 545 /* 546 * Cancel any pending async locks for this host. 547 */ 548 mtx_lock(&host->nh_lock); 549 while ((af = TAILQ_FIRST(&host->nh_pending)) != NULL) { 550 /* 551 * nlm_cancel_async_lock will remove the entry from 552 * nh_pending and free it. 553 */ 554 nlm_cancel_async_lock(af); 555 } 556 mtx_unlock(&host->nh_lock); 557 nlm_free_finished_locks(host); 558 559 /* 560 * The host just rebooted - trash its locks and forget any 561 * RPC client handle that we may have for it. 562 */ 563 lf_clearremotesys(host->nh_sysid); 564 if (host->nh_rpc) { 565 AUTH_DESTROY(host->nh_rpc->cl_auth); 566 CLNT_DESTROY(host->nh_rpc); 567 host->nh_rpc = NULL; 568 } 569 host->nh_state = newstate; 570 571 /* 572 * Destroy the host if the caller believes that it won't be 573 * used again. This is safe enough - if we see the same name 574 * again, we will just create a new host. 575 */ 576 if (destroy) { 577 TAILQ_REMOVE(&nlm_hosts, host, nh_link); 578 mtx_destroy(&host->nh_lock); 579 sysctl_ctx_free(&host->nh_sysctl); 580 free(host->nh_caller_name, M_NLM); 581 free(host, M_NLM); 582 } 583} 584 585/* 586 * Sysctl handler to count the number of locks for a sysid. 587 */ 588static int 589nlm_host_lock_count_sysctl(SYSCTL_HANDLER_ARGS) 590{ 591 struct nlm_host *host; 592 int count; 593 594 host = oidp->oid_arg1; 595 count = lf_countlocks(host->nh_sysid); 596 return sysctl_handle_int(oidp, &count, 0, req); 597} 598 599/* 600 * Create a new NLM host. 601 */ 602static struct nlm_host * 603nlm_create_host(const char* caller_name) 604{ 605 struct nlm_host *host; 606 struct sysctl_oid *oid; 607 608 if (nlm_debug_level >= 1) 609 printf("NLM: new host %s (sysid %d)\n", 610 caller_name, nlm_next_sysid); 611 host = malloc(sizeof(struct nlm_host), M_NLM, M_WAITOK|M_ZERO); 612 mtx_init(&host->nh_lock, "nh_lock", NULL, MTX_DEF); 613 host->nh_caller_name = strdup(caller_name, M_NLM); 614 host->nh_sysid = nlm_next_sysid++; 615 snprintf(host->nh_sysid_string, sizeof(host->nh_sysid_string), 616 "%d", host->nh_sysid); 617 host->nh_rpc = NULL; 618 host->nh_vers = 0; 619 host->nh_state = 0; 620 host->nh_monstate = NLM_UNMONITORED; 621 TAILQ_INIT(&host->nh_pending); 622 TAILQ_INIT(&host->nh_finished); 623 TAILQ_INSERT_TAIL(&nlm_hosts, host, nh_link); 624 625 sysctl_ctx_init(&host->nh_sysctl); 626 oid = SYSCTL_ADD_NODE(&host->nh_sysctl, 627 SYSCTL_STATIC_CHILDREN(_vfs_nlm_sysid), 628 OID_AUTO, host->nh_sysid_string, CTLFLAG_RD, NULL, ""); 629 SYSCTL_ADD_STRING(&host->nh_sysctl, SYSCTL_CHILDREN(oid), OID_AUTO, 630 "hostname", CTLFLAG_RD, host->nh_caller_name, 0, ""); 631 SYSCTL_ADD_INT(&host->nh_sysctl, SYSCTL_CHILDREN(oid), OID_AUTO, 632 "version", CTLFLAG_RD, &host->nh_vers, 0, ""); 633 SYSCTL_ADD_INT(&host->nh_sysctl, SYSCTL_CHILDREN(oid), OID_AUTO, 634 "monitored", CTLFLAG_RD, &host->nh_monstate, 0, ""); 635 SYSCTL_ADD_PROC(&host->nh_sysctl, SYSCTL_CHILDREN(oid), OID_AUTO, 636 "lock_count", CTLTYPE_INT | CTLFLAG_RD, host, 0, 637 nlm_host_lock_count_sysctl, "I", ""); 638 639 return (host); 640} 641 642/* 643 * Return non-zero if the address parts of the two sockaddrs are the 644 * same. 645 */ 646static int 647nlm_compare_addr(const struct sockaddr *a, const struct sockaddr *b) 648{ 649 const struct sockaddr_in *a4, *b4; 650#ifdef INET6 651 const struct sockaddr_in6 *a6, *b6; 652#endif 653 654 if (a->sa_family != b->sa_family) 655 return (FALSE); 656 657 switch (a->sa_family) { 658 case AF_INET: 659 a4 = (const struct sockaddr_in *) a; 660 b4 = (const struct sockaddr_in *) b; 661 return !memcmp(&a4->sin_addr, &b4->sin_addr, 662 sizeof(a4->sin_addr)); 663#ifdef INET6 664 case AF_INET6: 665 a6 = (const struct sockaddr_in6 *) a; 666 b6 = (const struct sockaddr_in6 *) b; 667 return !memcmp(&a6->sin6_addr, &b6->sin6_addr, 668 sizeof(a6->sin6_addr)); 669#endif 670 } 671 672 return (0); 673} 674 675/* 676 * Check for idle hosts and stop monitoring them. We could also free 677 * the host structure here, possibly after a larger timeout but that 678 * would require some care to avoid races with 679 * e.g. nlm_host_lock_count_sysctl. 680 */ 681static void 682nlm_check_idle(void) 683{ 684 struct nlm_host *host; 685 686 if (time_uptime <= nlm_next_idle_check) 687 return; 688 689 nlm_next_idle_check = time_uptime + NLM_IDLE_PERIOD; 690 691 TAILQ_FOREACH(host, &nlm_hosts, nh_link) { 692 if (host->nh_monstate == NLM_MONITORED 693 && time_uptime > host->nh_idle_timeout) { 694 if (lf_countlocks(host->nh_sysid) > 0) { 695 host->nh_idle_timeout = 696 time_uptime + NLM_IDLE_TIMEOUT; 697 continue; 698 } 699 nlm_host_unmonitor(host); 700 } 701 } 702} 703 704/* 705 * Search for an existing NLM host that matches the given name 706 * (typically the caller_name element of an nlm4_lock). If none is 707 * found, create a new host. If 'rqstp' is non-NULL, record the remote 708 * address of the host so that we can call it back for async 709 * responses. 710 */ 711struct nlm_host * 712nlm_find_host_by_name(const char *name, struct svc_req *rqstp) 713{ 714 struct nlm_host *host; 715 716 nlm_check_idle(); 717 718 /* 719 * The remote host is determined by caller_name. 720 */ 721 TAILQ_FOREACH(host, &nlm_hosts, nh_link) { 722 if (!strcmp(host->nh_caller_name, name)) 723 break; 724 } 725 726 if (!host) 727 host = nlm_create_host(name); 728 host->nh_idle_timeout = time_uptime + NLM_IDLE_TIMEOUT; 729 730 /* 731 * If we have an RPC request, record the remote address so 732 * that can send async replies etc. 733 */ 734 if (rqstp) { 735 struct netbuf *addr = &rqstp->rq_xprt->xp_rtaddr; 736 737 KASSERT(addr->len < sizeof(struct sockaddr_storage), 738 ("Strange remote transport address length")); 739 740 /* 741 * If we have seen an address before and we currently 742 * have an RPC client handle, make sure the address is 743 * the same, otherwise discard the client handle. 744 */ 745 if (host->nh_addr.ss_len && host->nh_rpc) { 746 if (!nlm_compare_addr( 747 (struct sockaddr *) &host->nh_addr, 748 (struct sockaddr *) addr->buf) 749 || host->nh_vers != rqstp->rq_vers) { 750 AUTH_DESTROY(host->nh_rpc->cl_auth); 751 CLNT_DESTROY(host->nh_rpc); 752 host->nh_rpc = NULL; 753 } 754 } 755 memcpy(&host->nh_addr, addr->buf, addr->len); 756 host->nh_vers = rqstp->rq_vers; 757 } 758 759 return (host); 760} 761 762/* 763 * Search for an existing NLM host that matches the given remote 764 * address. If none is found, create a new host with the requested 765 * address and remember 'vers' as the NLM protocol version to use for 766 * that host. 767 */ 768struct nlm_host * 769nlm_find_host_by_addr(const struct sockaddr *addr, int vers) 770{ 771 struct nlm_host *host; 772 773 nlm_check_idle(); 774 775 /* 776 * The remote host is determined by caller_name. 777 */ 778 TAILQ_FOREACH(host, &nlm_hosts, nh_link) { 779 if (nlm_compare_addr(addr, 780 (const struct sockaddr *) &host->nh_addr)) 781 break; 782 } 783 784 if (!host) { 785 /* 786 * Fake up a name using inet_ntop. This buffer is 787 * large enough for an IPv6 address. 788 */ 789 char tmp[sizeof "ffff:ffff:ffff:ffff:ffff:ffff:255.255.255.255"]; 790 switch (addr->sa_family) { 791 case AF_INET: 792 __rpc_inet_ntop(AF_INET, 793 &((const struct sockaddr_in *) addr)->sin_addr, 794 tmp, sizeof tmp); 795 break; 796#ifdef INET6 797 case AF_INET6: 798 __rpc_inet_ntop(AF_INET6, 799 &((const struct sockaddr_in6 *) addr)->sin6_addr, 800 tmp, sizeof tmp); 801 break; 802#endif 803 default: 804 strcmp(tmp, "<unknown>"); 805 } 806 host = nlm_create_host(tmp); 807 memcpy(&host->nh_addr, addr, addr->sa_len); 808 host->nh_vers = vers; 809 } 810 host->nh_idle_timeout = time_uptime + NLM_IDLE_TIMEOUT; 811 812 return (host); 813} 814 815/* 816 * Find the NLM host that matches the value of 'sysid'. If none 817 * exists, return NULL. 818 */ 819static struct nlm_host * 820nlm_find_host_by_sysid(int sysid) 821{ 822 struct nlm_host *host; 823 824 TAILQ_FOREACH(host, &nlm_hosts, nh_link) { 825 if (host->nh_sysid == sysid) 826 return (host); 827 } 828 829 return (NULL); 830} 831 832/* 833 * Unregister this NLM host with the local NSM due to idleness. 834 */ 835static void 836nlm_host_unmonitor(struct nlm_host *host) 837{ 838 mon_id smmonid; 839 sm_stat_res smstat; 840 struct timeval timo; 841 enum clnt_stat stat; 842 843 if (nlm_debug_level >= 1) 844 printf("NLM: unmonitoring %s (sysid %d)\n", 845 host->nh_caller_name, host->nh_sysid); 846 847 /* 848 * We put our assigned system ID value in the priv field to 849 * make it simpler to find the host if we are notified of a 850 * host restart. 851 */ 852 smmonid.mon_name = host->nh_caller_name; 853 smmonid.my_id.my_name = "localhost"; 854 smmonid.my_id.my_prog = NLM_PROG; 855 smmonid.my_id.my_vers = NLM_SM; 856 smmonid.my_id.my_proc = NLM_SM_NOTIFY; 857 858 timo.tv_sec = 25; 859 timo.tv_usec = 0; 860 stat = CLNT_CALL(nlm_nsm, SM_UNMON, 861 (xdrproc_t) xdr_mon, &smmonid, 862 (xdrproc_t) xdr_sm_stat, &smstat, timo); 863 864 if (stat != RPC_SUCCESS) { 865 printf("Failed to contact local NSM - rpc error %d\n", stat); 866 return; 867 } 868 if (smstat.res_stat == stat_fail) { 869 printf("Local NSM refuses to unmonitor %s\n", 870 host->nh_caller_name); 871 return; 872 } 873 874 host->nh_monstate = NLM_UNMONITORED; 875} 876 877/* 878 * Register this NLM host with the local NSM so that we can be 879 * notified if it reboots. 880 */ 881static void 882nlm_host_monitor(struct nlm_host *host, int state) 883{ 884 mon smmon; 885 sm_stat_res smstat; 886 struct timeval timo; 887 enum clnt_stat stat; 888 889 if (host->nh_state && state && host->nh_state != state) { 890 /* 891 * The host rebooted without telling us. Trash its 892 * locks. 893 */ 894 nlm_host_notify(host, state, FALSE); 895 } 896 897 if (state && !host->nh_state) { 898 /* 899 * This is the first time we have seen an NSM state 900 * value for this host. We record it here to help 901 * detect host reboots. 902 */ 903 host->nh_state = state; 904 if (nlm_debug_level >= 1) 905 printf("NLM: host %s (sysid %d) has NSM state %d\n", 906 host->nh_caller_name, host->nh_sysid, state); 907 } 908 909 if (host->nh_monstate != NLM_UNMONITORED) 910 return; 911 912 if (nlm_debug_level >= 1) 913 printf("NLM: monitoring %s (sysid %d)\n", 914 host->nh_caller_name, host->nh_sysid); 915 916 /* 917 * We put our assigned system ID value in the priv field to 918 * make it simpler to find the host if we are notified of a 919 * host restart. 920 */ 921 smmon.mon_id.mon_name = host->nh_caller_name; 922 smmon.mon_id.my_id.my_name = "localhost"; 923 smmon.mon_id.my_id.my_prog = NLM_PROG; 924 smmon.mon_id.my_id.my_vers = NLM_SM; 925 smmon.mon_id.my_id.my_proc = NLM_SM_NOTIFY; 926 memcpy(smmon.priv, &host->nh_sysid, sizeof(host->nh_sysid)); 927 928 timo.tv_sec = 25; 929 timo.tv_usec = 0; 930 stat = CLNT_CALL(nlm_nsm, SM_MON, 931 (xdrproc_t) xdr_mon, &smmon, 932 (xdrproc_t) xdr_sm_stat, &smstat, timo); 933 934 if (stat != RPC_SUCCESS) { 935 printf("Failed to contact local NSM - rpc error %d\n", stat); 936 return; 937 } 938 if (smstat.res_stat == stat_fail) { 939 printf("Local NSM refuses to monitor %s\n", 940 host->nh_caller_name); 941 host->nh_monstate = NLM_MONITOR_FAILED; 942 return; 943 } 944 945 host->nh_monstate = NLM_MONITORED; 946} 947 948/* 949 * Return an RPC client handle that can be used to talk to the NLM 950 * running on the given host. 951 */ 952CLIENT * 953nlm_host_get_rpc(struct nlm_host *host) 954{ 955 struct timeval zero; 956 957 /* 958 * We can't hold onto RPC handles for too long - the async 959 * call/reply protocol used by some NLM clients makes it hard 960 * to tell when they change port numbers (e.g. after a 961 * reboot). Note that if a client reboots while it isn't 962 * holding any locks, it won't bother to notify us. We 963 * expire the RPC handles after two minutes. 964 */ 965 if (host->nh_rpc && time_uptime > host->nh_rpc_create_time + 2*60) { 966 CLIENT *client; 967 client = host->nh_rpc; 968 host->nh_rpc = NULL; 969 CLNT_DESTROY(client); 970 } 971 972 if (host->nh_rpc) 973 return (host->nh_rpc); 974 975 /* 976 * Set the send timeout to zero - we only use this rpc handle 977 * for sending async replies which have no return value. 978 */ 979 host->nh_rpc = nlm_get_rpc((struct sockaddr *)&host->nh_addr, 980 NLM_PROG, host->nh_vers); 981 982 if (host->nh_rpc) { 983 zero.tv_sec = 0; 984 zero.tv_usec = 0; 985 CLNT_CONTROL(host->nh_rpc, CLSET_TIMEOUT, &zero); 986 987 host->nh_rpc_create_time = time_uptime; 988 } 989 990 return (host->nh_rpc); 991} 992 993/**********************************************************************/ 994 995/* 996 * Syscall interface with userland. 997 */ 998 999extern void nlm_prog_0(struct svc_req *rqstp, SVCXPRT *transp); 1000extern void nlm_prog_1(struct svc_req *rqstp, SVCXPRT *transp); 1001extern void nlm_prog_3(struct svc_req *rqstp, SVCXPRT *transp); 1002extern void nlm_prog_4(struct svc_req *rqstp, SVCXPRT *transp); 1003 1004static int 1005nlm_register_services(SVCPOOL *pool, int addr_count, char **addrs) 1006{ 1007 static rpcvers_t versions[] = { 1008 NLM_SM, NLM_VERS, NLM_VERSX, NLM_VERS4 1009 }; 1010 static void (*dispatchers[])(struct svc_req *, SVCXPRT *) = { 1011 nlm_prog_0, nlm_prog_1, nlm_prog_3, nlm_prog_4 1012 }; 1013 static const int version_count = sizeof(versions) / sizeof(versions[0]); 1014 1015 SVCXPRT **xprts; 1016 char netid[16]; 1017 char uaddr[128]; 1018 struct netconfig *nconf; 1019 int i, j, error; 1020 1021 if (!addr_count) { 1022 printf("NLM: no service addresses given - can't start server"); 1023 return (EINVAL); 1024 } 1025 1026 xprts = malloc(addr_count * sizeof(SVCXPRT *), M_NLM, M_WAITOK); 1027 for (i = 0; i < version_count; i++) { 1028 for (j = 0; j < addr_count; j++) { 1029 /* 1030 * Create transports for the first version and 1031 * then just register everything else to the 1032 * same transports. 1033 */ 1034 if (i == 0) { 1035 char *up; 1036 1037 error = copyin(&addrs[2*j], &up, 1038 sizeof(char*)); 1039 if (error) 1040 goto out; 1041 error = copyinstr(up, netid, sizeof(netid), 1042 NULL); 1043 if (error) 1044 goto out; 1045 error = copyin(&addrs[2*j+1], &up, 1046 sizeof(char*)); 1047 if (error) 1048 goto out; 1049 error = copyinstr(up, uaddr, sizeof(uaddr), 1050 NULL); 1051 if (error) 1052 goto out; 1053 nconf = getnetconfigent(netid); 1054 if (!nconf) { 1055 printf("Can't lookup netid %s\n", 1056 netid); 1057 error = EINVAL; 1058 goto out; 1059 } 1060 xprts[j] = svc_tp_create(pool, dispatchers[i], 1061 NLM_PROG, versions[i], uaddr, nconf); 1062 if (!xprts[j]) { 1063 printf("NLM: unable to create " 1064 "(NLM_PROG, %d).\n", versions[i]); 1065 error = EINVAL; 1066 goto out; 1067 } 1068 freenetconfigent(nconf); 1069 } else { 1070 nconf = getnetconfigent(xprts[j]->xp_netid); 1071 rpcb_unset(NLM_PROG, versions[i], nconf); 1072 if (!svc_reg(xprts[j], NLM_PROG, versions[i], 1073 dispatchers[i], nconf)) { 1074 printf("NLM: can't register " 1075 "(NLM_PROG, %d)\n", versions[i]); 1076 error = EINVAL; 1077 goto out; 1078 } 1079 } 1080 } 1081 } 1082 error = 0; 1083out: 1084 free(xprts, M_NLM); 1085 return (error); 1086} 1087 1088/* 1089 * Main server entry point. Contacts the local NSM to get its current 1090 * state and send SM_UNMON_ALL. Registers the NLM services and then 1091 * services requests. Does not return until the server is interrupted 1092 * by a signal. 1093 */ 1094static int 1095nlm_server_main(int addr_count, char **addrs) 1096{ 1097 struct thread *td = curthread; 1098 int error; 1099 SVCPOOL *pool = NULL; 1100 struct sockopt opt; 1101 int portlow; 1102#ifdef INET6 1103 struct sockaddr_in6 sin6; 1104#endif 1105 struct sockaddr_in sin; 1106 my_id id; 1107 sm_stat smstat; 1108 struct timeval timo; 1109 enum clnt_stat stat; 1110 struct nlm_host *host; 1111 1112 if (nlm_socket) { 1113 printf("NLM: can't start server - it appears to be running already\n"); 1114 return (EPERM); 1115 } 1116 1117 memset(&opt, 0, sizeof(opt)); 1118 1119 nlm_socket = NULL; 1120 error = socreate(AF_INET, &nlm_socket, SOCK_DGRAM, 0, 1121 td->td_ucred, td); 1122 if (error) { 1123 printf("NLM: can't create IPv4 socket - error %d\n", error); 1124 return (error); 1125 } 1126 opt.sopt_dir = SOPT_SET; 1127 opt.sopt_level = IPPROTO_IP; 1128 opt.sopt_name = IP_PORTRANGE; 1129 portlow = IP_PORTRANGE_LOW; 1130 opt.sopt_val = &portlow; 1131 opt.sopt_valsize = sizeof(portlow); 1132 sosetopt(nlm_socket, &opt); 1133 1134#ifdef INET6 1135 nlm_socket6 = NULL; 1136 error = socreate(AF_INET6, &nlm_socket6, SOCK_DGRAM, 0, 1137 td->td_ucred, td); 1138 if (error) { 1139 printf("NLM: can't create IPv6 socket - error %d\n", error); 1140 return (error); 1141 } 1142 opt.sopt_dir = SOPT_SET; 1143 opt.sopt_level = IPPROTO_IPV6; 1144 opt.sopt_name = IPV6_PORTRANGE; 1145 portlow = IPV6_PORTRANGE_LOW; 1146 opt.sopt_val = &portlow; 1147 opt.sopt_valsize = sizeof(portlow); 1148 sosetopt(nlm_socket6, &opt); 1149#endif 1150 1151#ifdef INET6 1152 memset(&sin6, 0, sizeof(sin6)); 1153 sin6.sin6_len = sizeof(sin6); 1154 sin6.sin6_family = AF_INET6; 1155 sin6.sin6_addr = in6addr_loopback; 1156 nlm_nsm = nlm_get_rpc((struct sockaddr *) &sin6, SM_PROG, SM_VERS); 1157 if (!nlm_nsm) { 1158#endif 1159 memset(&sin, 0, sizeof(sin)); 1160 sin.sin_len = sizeof(sin); 1161 sin.sin_family = AF_INET; 1162 sin.sin_addr.s_addr = htonl(INADDR_LOOPBACK); 1163 nlm_nsm = nlm_get_rpc((struct sockaddr *) &sin, SM_PROG, 1164 SM_VERS); 1165#ifdef INET6 1166 } 1167#endif 1168 1169 if (!nlm_nsm) { 1170 printf("Can't start NLM - unable to contact NSM\n"); 1171 error = EINVAL; 1172 goto out; 1173 } 1174 1175 pool = svcpool_create(); 1176 1177 error = nlm_register_services(pool, addr_count, addrs); 1178 if (error) 1179 goto out; 1180 1181 memset(&id, 0, sizeof(id)); 1182 id.my_name = "NFS NLM"; 1183 1184 timo.tv_sec = 25; 1185 timo.tv_usec = 0; 1186 stat = CLNT_CALL(nlm_nsm, SM_UNMON_ALL, 1187 (xdrproc_t) xdr_my_id, &id, 1188 (xdrproc_t) xdr_sm_stat, &smstat, timo); 1189 1190 if (stat != RPC_SUCCESS) { 1191 struct rpc_err err; 1192 1193 CLNT_GETERR(nlm_nsm, &err); 1194 printf("NLM: unexpected error contacting NSM, stat=%d, errno=%d\n", 1195 stat, err.re_errno); 1196 error = EINVAL; 1197 goto out; 1198 } 1199 1200 if (nlm_debug_level >= 1) 1201 printf("NLM: local NSM state is %d\n", smstat.state); 1202 1203 svc_run(pool); 1204 error = 0; 1205 1206out: 1207 if (pool) 1208 svcpool_destroy(pool); 1209 1210 /* 1211 * Trash all the existing state so that if the server 1212 * restarts, it gets a clean slate. 1213 */ 1214 while ((host = TAILQ_FIRST(&nlm_hosts)) != NULL) { 1215 nlm_host_notify(host, 0, TRUE); 1216 } 1217 if (nlm_nsm) { 1218 AUTH_DESTROY(nlm_nsm->cl_auth); 1219 CLNT_DESTROY(nlm_nsm); 1220 nlm_nsm = NULL; 1221 } 1222 if (nlm_lockd) { 1223 AUTH_DESTROY(nlm_lockd->cl_auth); 1224 CLNT_DESTROY(nlm_lockd); 1225 nlm_lockd = NULL; 1226 } 1227 1228 soclose(nlm_socket); 1229 nlm_socket = NULL; 1230#ifdef INET6 1231 soclose(nlm_socket6); 1232 nlm_socket6 = NULL; 1233#endif 1234 1235 return (error); 1236} 1237 1238int 1239nlm_syscall(struct thread *td, struct nlm_syscall_args *uap) 1240{ 1241 int error; 1242 1243#if __FreeBSD_version >= 700000 1244 error = priv_check(td, PRIV_NFS_LOCKD); 1245#else 1246 error = suser(td); 1247#endif 1248 if (error) 1249 return (error); 1250 1251 nlm_debug_level = uap->debug_level; 1252 nlm_grace_threshold = time_uptime + uap->grace_period; 1253 nlm_next_idle_check = time_uptime + NLM_IDLE_PERIOD; 1254 1255 return nlm_server_main(uap->addr_count, uap->addrs); 1256} 1257 1258/**********************************************************************/ 1259 1260/* 1261 * NLM implementation details, called from the RPC stubs. 1262 */ 1263 1264 1265void 1266nlm_sm_notify(struct nlm_sm_status *argp) 1267{ 1268 uint32_t sysid; 1269 struct nlm_host *host; 1270 1271 if (nlm_debug_level >= 3) 1272 printf("nlm_sm_notify(): mon_name = %s\n", argp->mon_name); 1273 memcpy(&sysid, &argp->priv, sizeof(sysid)); 1274 host = nlm_find_host_by_sysid(sysid); 1275 if (host) 1276 nlm_host_notify(host, argp->state, FALSE); 1277} 1278 1279static void 1280nlm_convert_to_fhandle_t(fhandle_t *fhp, struct netobj *p) 1281{ 1282 memcpy(fhp, p->n_bytes, sizeof(fhandle_t)); 1283} 1284 1285struct vfs_state { 1286 struct mount *vs_mp; 1287 struct vnode *vs_vp; 1288 int vs_vfslocked; 1289 int vs_vnlocked; 1290}; 1291 1292static int 1293nlm_get_vfs_state(struct nlm_host *host, struct svc_req *rqstp, 1294 fhandle_t *fhp, struct vfs_state *vs) 1295{ 1296 int error, exflags, freecred; 1297 struct ucred *cred = NULL, *credanon; 1298 1299 memset(vs, 0, sizeof(*vs)); 1300 freecred = FALSE; 1301 1302 vs->vs_mp = vfs_getvfs(&fhp->fh_fsid); 1303 if (!vs->vs_mp) { 1304 return (ESTALE); 1305 } 1306 vs->vs_vfslocked = VFS_LOCK_GIANT(vs->vs_mp); 1307 1308 error = VFS_CHECKEXP(vs->vs_mp, (struct sockaddr *)&host->nh_addr, 1309 &exflags, &credanon); 1310 if (error) 1311 goto out; 1312 1313 if (exflags & MNT_EXRDONLY || (vs->vs_mp->mnt_flag & MNT_RDONLY)) { 1314 error = EROFS; 1315 goto out; 1316 } 1317 1318 error = VFS_FHTOVP(vs->vs_mp, &fhp->fh_fid, &vs->vs_vp); 1319 if (error) 1320 goto out; 1321 vs->vs_vnlocked = TRUE; 1322 1323 cred = crget(); 1324 freecred = TRUE; 1325 if (!svc_getcred(rqstp, cred, NULL)) { 1326 error = EINVAL; 1327 goto out; 1328 } 1329 if (cred->cr_uid == 0 || (exflags & MNT_EXPORTANON)) { 1330 crfree(cred); 1331 cred = credanon; 1332 freecred = FALSE; 1333 } 1334 1335 /* 1336 * Check cred. 1337 */ 1338 error = VOP_ACCESS(vs->vs_vp, VWRITE, cred, curthread); 1339 if (error) 1340 goto out; 1341 1342#if __FreeBSD_version < 800011 1343 VOP_UNLOCK(vs->vs_vp, 0, curthread); 1344#else 1345 VOP_UNLOCK(vs->vs_vp, 0); 1346#endif 1347 vs->vs_vnlocked = FALSE; 1348 1349out: 1350 if (freecred) 1351 crfree(cred); 1352 1353 return (error); 1354} 1355 1356static void 1357nlm_release_vfs_state(struct vfs_state *vs) 1358{ 1359 1360 if (vs->vs_vp) { 1361 if (vs->vs_vnlocked) 1362 vput(vs->vs_vp); 1363 else 1364 vrele(vs->vs_vp); 1365 } 1366 if (vs->vs_mp) 1367 vfs_rel(vs->vs_mp); 1368 VFS_UNLOCK_GIANT(vs->vs_vfslocked); 1369} 1370 1371static nlm4_stats 1372nlm_convert_error(int error) 1373{ 1374 1375 if (error == ESTALE) 1376 return nlm4_stale_fh; 1377 else if (error == EROFS) 1378 return nlm4_rofs; 1379 else 1380 return nlm4_failed; 1381} 1382 1383struct nlm_host * 1384nlm_do_test(nlm4_testargs *argp, nlm4_testres *result, struct svc_req *rqstp) 1385{ 1386 fhandle_t fh; 1387 struct vfs_state vs; 1388 struct nlm_host *host, *bhost; 1389 int error, sysid; 1390 struct flock fl; 1391 1392 memset(result, 0, sizeof(*result)); 1393 1394 host = nlm_find_host_by_name(argp->alock.caller_name, rqstp); 1395 if (!host) { 1396 result->stat.stat = nlm4_denied_nolocks; 1397 return (NULL); 1398 } 1399 1400 if (nlm_debug_level >= 3) 1401 printf("nlm_do_test(): caller_name = %s (sysid = %d)\n", 1402 host->nh_caller_name, host->nh_sysid); 1403 1404 nlm_free_finished_locks(host); 1405 sysid = host->nh_sysid; 1406 1407 nlm_convert_to_fhandle_t(&fh, &argp->alock.fh); 1408 nlm_copy_netobj(&result->cookie, &argp->cookie, M_RPC); 1409 1410 if (time_uptime < nlm_grace_threshold) { 1411 result->stat.stat = nlm4_denied_grace_period; 1412 return (host); 1413 } 1414 1415 error = nlm_get_vfs_state(host, rqstp, &fh, &vs); 1416 if (error) { 1417 result->stat.stat = nlm_convert_error(error); 1418 goto out; 1419 } 1420 1421 fl.l_start = argp->alock.l_offset; 1422 fl.l_len = argp->alock.l_len; 1423 fl.l_pid = argp->alock.svid; 1424 fl.l_sysid = sysid; 1425 fl.l_whence = SEEK_SET; 1426 if (argp->exclusive) 1427 fl.l_type = F_WRLCK; 1428 else 1429 fl.l_type = F_RDLCK; 1430 error = VOP_ADVLOCK(vs.vs_vp, NULL, F_GETLK, &fl, F_REMOTE); 1431 if (error) { 1432 result->stat.stat = nlm4_failed; 1433 goto out; 1434 } 1435 1436 if (fl.l_type == F_UNLCK) { 1437 result->stat.stat = nlm4_granted; 1438 } else { 1439 result->stat.stat = nlm4_denied; 1440 result->stat.nlm4_testrply_u.holder.exclusive = 1441 (fl.l_type == F_WRLCK); 1442 result->stat.nlm4_testrply_u.holder.svid = fl.l_pid; 1443 bhost = nlm_find_host_by_sysid(fl.l_sysid); 1444 if (bhost) { 1445 /* 1446 * We don't have any useful way of recording 1447 * the value of oh used in the original lock 1448 * request. Ideally, the test reply would have 1449 * a space for the owning host's name allowing 1450 * our caller's NLM to keep track. 1451 * 1452 * As far as I can see, Solaris uses an eight 1453 * byte structure for oh which contains a four 1454 * byte pid encoded in local byte order and 1455 * the first four bytes of the host 1456 * name. Linux uses a variable length string 1457 * 'pid@hostname' in ascii but doesn't even 1458 * return that in test replies. 1459 * 1460 * For the moment, return nothing in oh 1461 * (already zero'ed above). 1462 */ 1463 } 1464 result->stat.nlm4_testrply_u.holder.l_offset = fl.l_start; 1465 result->stat.nlm4_testrply_u.holder.l_len = fl.l_len; 1466 } 1467 1468out: 1469 nlm_release_vfs_state(&vs); 1470 return (host); 1471} 1472 1473struct nlm_host * 1474nlm_do_lock(nlm4_lockargs *argp, nlm4_res *result, struct svc_req *rqstp, 1475 bool_t monitor) 1476{ 1477 fhandle_t fh; 1478 struct vfs_state vs; 1479 struct nlm_host *host; 1480 int error, sysid; 1481 struct flock fl; 1482 1483 memset(result, 0, sizeof(*result)); 1484 1485 host = nlm_find_host_by_name(argp->alock.caller_name, rqstp); 1486 if (!host) { 1487 result->stat.stat = nlm4_denied_nolocks; 1488 return (NULL); 1489 } 1490 1491 if (nlm_debug_level >= 3) 1492 printf("nlm_do_lock(): caller_name = %s (sysid = %d)\n", 1493 host->nh_caller_name, host->nh_sysid); 1494 1495 nlm_free_finished_locks(host); 1496 sysid = host->nh_sysid; 1497 1498 nlm_convert_to_fhandle_t(&fh, &argp->alock.fh); 1499 nlm_copy_netobj(&result->cookie, &argp->cookie, M_RPC); 1500 1501 if (time_uptime < nlm_grace_threshold && !argp->reclaim) { 1502 result->stat.stat = nlm4_denied_grace_period; 1503 return (host); 1504 } 1505 1506 error = nlm_get_vfs_state(host, rqstp, &fh, &vs); 1507 if (error) { 1508 result->stat.stat = nlm_convert_error(error); 1509 goto out; 1510 } 1511 1512 fl.l_start = argp->alock.l_offset; 1513 fl.l_len = argp->alock.l_len; 1514 fl.l_pid = argp->alock.svid; 1515 fl.l_sysid = sysid; 1516 fl.l_whence = SEEK_SET; 1517 if (argp->exclusive) 1518 fl.l_type = F_WRLCK; 1519 else 1520 fl.l_type = F_RDLCK; 1521 if (argp->block) { 1522 struct nlm_async_lock *af; 1523 1524 /* 1525 * First, make sure we can contact the host's NLM. 1526 */ 1527 if (!nlm_host_get_rpc(host)) { 1528 result->stat.stat = nlm4_failed; 1529 goto out; 1530 } 1531 1532 /* 1533 * First we need to check and see if there is an 1534 * existing blocked lock that matches. This could be a 1535 * badly behaved client or an RPC re-send. If we find 1536 * one, just return nlm4_blocked. 1537 */ 1538 mtx_lock(&host->nh_lock); 1539 TAILQ_FOREACH(af, &host->nh_pending, af_link) { 1540 if (af->af_fl.l_start == fl.l_start 1541 && af->af_fl.l_len == fl.l_len 1542 && af->af_fl.l_pid == fl.l_pid 1543 && af->af_fl.l_type == fl.l_type) { 1544 break; 1545 } 1546 } 1547 mtx_unlock(&host->nh_lock); 1548 if (af) { 1549 result->stat.stat = nlm4_blocked; 1550 goto out; 1551 } 1552 1553 af = malloc(sizeof(struct nlm_async_lock), M_NLM, 1554 M_WAITOK|M_ZERO); 1555 TASK_INIT(&af->af_task, 0, nlm_lock_callback, af); 1556 af->af_vp = vs.vs_vp; 1557 af->af_fl = fl; 1558 af->af_host = host; 1559 /* 1560 * We use M_RPC here so that we can xdr_free the thing 1561 * later. 1562 */ 1563 af->af_granted.exclusive = argp->exclusive; 1564 af->af_granted.alock.caller_name = 1565 strdup(argp->alock.caller_name, M_RPC); 1566 nlm_copy_netobj(&af->af_granted.alock.fh, 1567 &argp->alock.fh, M_RPC); 1568 nlm_copy_netobj(&af->af_granted.alock.oh, 1569 &argp->alock.oh, M_RPC); 1570 af->af_granted.alock.svid = argp->alock.svid; 1571 af->af_granted.alock.l_offset = argp->alock.l_offset; 1572 af->af_granted.alock.l_len = argp->alock.l_len; 1573 1574 /* 1575 * Put the entry on the pending list before calling 1576 * VOP_ADVLOCKASYNC. We do this in case the lock 1577 * request was blocked (returning EINPROGRESS) but 1578 * then granted before we manage to run again. The 1579 * client may receive the granted message before we 1580 * send our blocked reply but thats their problem. 1581 */ 1582 mtx_lock(&host->nh_lock); 1583 TAILQ_INSERT_TAIL(&host->nh_pending, af, af_link); 1584 mtx_unlock(&host->nh_lock); 1585 1586 error = VOP_ADVLOCKASYNC(vs.vs_vp, NULL, F_SETLK, &fl, F_REMOTE, 1587 &af->af_task, &af->af_cookie); 1588 1589 /* 1590 * If the lock completed synchronously, just free the 1591 * tracking structure now. 1592 */ 1593 if (error != EINPROGRESS) { 1594 mtx_lock(&host->nh_lock); 1595 TAILQ_REMOVE(&host->nh_pending, af, af_link); 1596 mtx_unlock(&host->nh_lock); 1597 xdr_free((xdrproc_t) xdr_nlm4_testargs, 1598 &af->af_granted); 1599 free(af, M_NLM); 1600 } else { 1601 if (nlm_debug_level >= 2) 1602 printf("NLM: pending async lock %p for %s " 1603 "(sysid %d)\n", 1604 af, host->nh_caller_name, sysid); 1605 /* 1606 * Don't vrele the vnode just yet - this must 1607 * wait until either the async callback 1608 * happens or the lock is cancelled. 1609 */ 1610 vs.vs_vp = NULL; 1611 } 1612 } else { 1613 error = VOP_ADVLOCK(vs.vs_vp, NULL, F_SETLK, &fl, F_REMOTE); 1614 } 1615 1616 if (error) { 1617 if (error == EINPROGRESS) { 1618 result->stat.stat = nlm4_blocked; 1619 } else if (error == EDEADLK) { 1620 result->stat.stat = nlm4_deadlck; 1621 } else if (error == EAGAIN) { 1622 result->stat.stat = nlm4_denied; 1623 } else { 1624 result->stat.stat = nlm4_failed; 1625 } 1626 } else { 1627 if (monitor) 1628 nlm_host_monitor(host, argp->state); 1629 result->stat.stat = nlm4_granted; 1630 } 1631 1632out: 1633 nlm_release_vfs_state(&vs); 1634 1635 return (host); 1636} 1637 1638struct nlm_host * 1639nlm_do_cancel(nlm4_cancargs *argp, nlm4_res *result, struct svc_req *rqstp) 1640{ 1641 fhandle_t fh; 1642 struct vfs_state vs; 1643 struct nlm_host *host; 1644 int error, sysid; 1645 struct flock fl; 1646 struct nlm_async_lock *af; 1647 1648 memset(result, 0, sizeof(*result)); 1649 1650 host = nlm_find_host_by_name(argp->alock.caller_name, rqstp); 1651 if (!host) { 1652 result->stat.stat = nlm4_denied_nolocks; 1653 return (NULL); 1654 } 1655 1656 if (nlm_debug_level >= 3) 1657 printf("nlm_do_cancel(): caller_name = %s (sysid = %d)\n", 1658 host->nh_caller_name, host->nh_sysid); 1659 1660 nlm_free_finished_locks(host); 1661 sysid = host->nh_sysid; 1662 1663 nlm_convert_to_fhandle_t(&fh, &argp->alock.fh); 1664 nlm_copy_netobj(&result->cookie, &argp->cookie, M_RPC); 1665 1666 if (time_uptime < nlm_grace_threshold) { 1667 result->stat.stat = nlm4_denied_grace_period; 1668 return (host); 1669 } 1670 1671 error = nlm_get_vfs_state(host, rqstp, &fh, &vs); 1672 if (error) { 1673 result->stat.stat = nlm_convert_error(error); 1674 goto out; 1675 } 1676 1677 fl.l_start = argp->alock.l_offset; 1678 fl.l_len = argp->alock.l_len; 1679 fl.l_pid = argp->alock.svid; 1680 fl.l_sysid = sysid; 1681 fl.l_whence = SEEK_SET; 1682 if (argp->exclusive) 1683 fl.l_type = F_WRLCK; 1684 else 1685 fl.l_type = F_RDLCK; 1686 1687 /* 1688 * First we need to try and find the async lock request - if 1689 * there isn't one, we give up and return nlm4_denied. 1690 */ 1691 mtx_lock(&host->nh_lock); 1692 1693 TAILQ_FOREACH(af, &host->nh_pending, af_link) { 1694 if (af->af_fl.l_start == fl.l_start 1695 && af->af_fl.l_len == fl.l_len 1696 && af->af_fl.l_pid == fl.l_pid 1697 && af->af_fl.l_type == fl.l_type) { 1698 break; 1699 } 1700 } 1701 1702 if (!af) { 1703 mtx_unlock(&host->nh_lock); 1704 result->stat.stat = nlm4_denied; 1705 goto out; 1706 } 1707 1708 error = nlm_cancel_async_lock(af); 1709 1710 if (error) { 1711 result->stat.stat = nlm4_denied; 1712 } else { 1713 result->stat.stat = nlm4_granted; 1714 } 1715 1716 mtx_unlock(&host->nh_lock); 1717 1718out: 1719 nlm_release_vfs_state(&vs); 1720 1721 return (host); 1722} 1723 1724struct nlm_host * 1725nlm_do_unlock(nlm4_unlockargs *argp, nlm4_res *result, struct svc_req *rqstp) 1726{ 1727 fhandle_t fh; 1728 struct vfs_state vs; 1729 struct nlm_host *host; 1730 int error, sysid; 1731 struct flock fl; 1732 1733 memset(result, 0, sizeof(*result)); 1734 1735 host = nlm_find_host_by_name(argp->alock.caller_name, rqstp); 1736 if (!host) { 1737 result->stat.stat = nlm4_denied_nolocks; 1738 return (NULL); 1739 } 1740 1741 if (nlm_debug_level >= 3) 1742 printf("nlm_do_unlock(): caller_name = %s (sysid = %d)\n", 1743 host->nh_caller_name, host->nh_sysid); 1744 1745 nlm_free_finished_locks(host); 1746 sysid = host->nh_sysid; 1747 1748 nlm_convert_to_fhandle_t(&fh, &argp->alock.fh); 1749 nlm_copy_netobj(&result->cookie, &argp->cookie, M_RPC); 1750 1751 if (time_uptime < nlm_grace_threshold) { 1752 result->stat.stat = nlm4_denied_grace_period; 1753 return (host); 1754 } 1755 1756 error = nlm_get_vfs_state(host, rqstp, &fh, &vs); 1757 if (error) { 1758 result->stat.stat = nlm_convert_error(error); 1759 goto out; 1760 } 1761 1762 fl.l_start = argp->alock.l_offset; 1763 fl.l_len = argp->alock.l_len; 1764 fl.l_pid = argp->alock.svid; 1765 fl.l_sysid = sysid; 1766 fl.l_whence = SEEK_SET; 1767 fl.l_type = F_UNLCK; 1768 error = VOP_ADVLOCK(vs.vs_vp, NULL, F_UNLCK, &fl, F_REMOTE); 1769 1770 /* 1771 * Ignore the error - there is no result code for failure, 1772 * only for grace period. 1773 */ 1774 result->stat.stat = nlm4_granted; 1775 1776out: 1777 nlm_release_vfs_state(&vs); 1778 1779 return (host); 1780} 1781 1782void 1783nlm_do_free_all(nlm4_notify *argp) 1784{ 1785 struct nlm_host *host, *thost; 1786 1787 TAILQ_FOREACH_SAFE(host, &nlm_hosts, nh_link, thost) { 1788 if (!strcmp(host->nh_caller_name, argp->name)) 1789 nlm_host_notify(host, argp->state, FALSE); 1790 } 1791} 1792 1793#define _PATH_RPCLOCKDSOCK "/var/run/rpclockd.sock" 1794 1795/* 1796 * Make a connection to the userland lockd - we push anything we can't 1797 * handle out to userland. 1798 */ 1799CLIENT * 1800nlm_user_lockd(void) 1801{ 1802 struct sockaddr_un sun; 1803 struct netconfig *nconf; 1804 struct timeval zero; 1805 1806 if (nlm_lockd) 1807 return (nlm_lockd); 1808 1809 sun.sun_family = AF_LOCAL; 1810 strcpy(sun.sun_path, _PATH_RPCLOCKDSOCK); 1811 sun.sun_len = SUN_LEN(&sun); 1812 1813 nconf = getnetconfigent("local"); 1814 nlm_lockd = clnt_reconnect_create(nconf, (struct sockaddr *) &sun, 1815 NLM_PROG, NLM_VERS4, RPC_MAXDATASIZE, RPC_MAXDATASIZE); 1816 1817 /* 1818 * Set the send timeout to zero - we only use this rpc handle 1819 * for sending async replies which have no return value. 1820 */ 1821 zero.tv_sec = 0; 1822 zero.tv_usec = 0; 1823 CLNT_CONTROL(nlm_lockd, CLSET_TIMEOUT, &zero); 1824 1825 return (nlm_lockd); 1826} 1827 1828/* 1829 * Kernel module glue 1830 */ 1831static int 1832nfslockd_modevent(module_t mod, int type, void *data) 1833{ 1834 1835 return (0); 1836} 1837static moduledata_t nfslockd_mod = { 1838 "nfslockd", 1839 nfslockd_modevent, 1840 NULL, 1841}; 1842DECLARE_MODULE(nfslockd, nfslockd_mod, SI_SUB_VFS, SI_ORDER_ANY); 1843 1844/* So that loader and kldload(2) can find us, wherever we are.. */ 1845MODULE_DEPEND(nfslockd, krpc, 1, 1, 1); 1846MODULE_VERSION(nfslockd, 1); 1847