nlm_prot_impl.c revision 194407
1177633Sdfr/*- 2177633Sdfr * Copyright (c) 2008 Isilon Inc http://www.isilon.com/ 3177633Sdfr * Authors: Doug Rabson <dfr@rabson.org> 4177633Sdfr * Developed with Red Inc: Alfred Perlstein <alfred@freebsd.org> 5177633Sdfr * 6177633Sdfr * Redistribution and use in source and binary forms, with or without 7177633Sdfr * modification, are permitted provided that the following conditions 8177633Sdfr * are met: 9177633Sdfr * 1. Redistributions of source code must retain the above copyright 10177633Sdfr * notice, this list of conditions and the following disclaimer. 11177633Sdfr * 2. Redistributions in binary form must reproduce the above copyright 12177633Sdfr * notice, this list of conditions and the following disclaimer in the 13177633Sdfr * documentation and/or other materials provided with the distribution. 14177633Sdfr * 15177633Sdfr * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND 16177633Sdfr * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 17177633Sdfr * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 18177633Sdfr * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE 19177633Sdfr * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 20177633Sdfr * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 21177633Sdfr * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 22177633Sdfr * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 23177633Sdfr * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 24177633Sdfr * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 25177633Sdfr * SUCH DAMAGE. 26177633Sdfr */ 27177633Sdfr 28177633Sdfr#include "opt_inet6.h" 29184588Sdfr#include "opt_nfs.h" 30177633Sdfr 31177633Sdfr#include <sys/cdefs.h> 32177633Sdfr__FBSDID("$FreeBSD: head/sys/nlm/nlm_prot_impl.c 194407 2009-06-17 22:50:26Z rmacklem $"); 33177633Sdfr 34177633Sdfr#include <sys/param.h> 35177633Sdfr#include <sys/fcntl.h> 36177633Sdfr#include <sys/kernel.h> 37180025Sdfr#include <sys/kthread.h> 38177633Sdfr#include <sys/lockf.h> 39177633Sdfr#include <sys/malloc.h> 40177633Sdfr#include <sys/mount.h> 41177685Sdfr#if __FreeBSD_version >= 700000 42177633Sdfr#include <sys/priv.h> 43177685Sdfr#endif 44177633Sdfr#include <sys/proc.h> 45177633Sdfr#include <sys/socket.h> 46177633Sdfr#include <sys/socketvar.h> 47177633Sdfr#include <sys/syscall.h> 48177633Sdfr#include <sys/sysctl.h> 49177633Sdfr#include <sys/sysent.h> 50191918Sdfr#include <sys/syslog.h> 51177633Sdfr#include <sys/sysproto.h> 52177633Sdfr#include <sys/systm.h> 53177633Sdfr#include <sys/taskqueue.h> 54177633Sdfr#include <sys/unistd.h> 55177633Sdfr#include <sys/vnode.h> 56177633Sdfr 57180025Sdfr#include <nfs/nfsproto.h> 58180025Sdfr#include <nfsclient/nfs.h> 59180025Sdfr#include <nfsclient/nfsnode.h> 60180025Sdfr 61177685Sdfr#include <nlm/nlm_prot.h> 62177685Sdfr#include <nlm/sm_inter.h> 63177685Sdfr#include <nlm/nlm.h> 64177633Sdfr#include <rpc/rpc_com.h> 65177633Sdfr#include <rpc/rpcb_prot.h> 66177633Sdfr 67177633SdfrMALLOC_DEFINE(M_NLM, "NLM", "Network Lock Manager"); 68177633Sdfr 69177633Sdfr/* 70177633Sdfr * If a host is inactive (and holds no locks) for this amount of 71177633Sdfr * seconds, we consider it idle and stop tracking it. 72177633Sdfr */ 73177633Sdfr#define NLM_IDLE_TIMEOUT 30 74177633Sdfr 75177633Sdfr/* 76177633Sdfr * We check the host list for idle every few seconds. 77177633Sdfr */ 78177633Sdfr#define NLM_IDLE_PERIOD 5 79177633Sdfr 80177633Sdfr/* 81177633Sdfr * Support for sysctl vfs.nlm.sysid 82177633Sdfr */ 83177633SdfrSYSCTL_NODE(_vfs, OID_AUTO, nlm, CTLFLAG_RW, NULL, "Network Lock Manager"); 84177633SdfrSYSCTL_NODE(_vfs_nlm, OID_AUTO, sysid, CTLFLAG_RW, NULL, ""); 85177633Sdfr 86177633Sdfr/* 87177633Sdfr * Syscall hooks 88177633Sdfr */ 89177633Sdfrstatic int nlm_syscall_offset = SYS_nlm_syscall; 90177633Sdfrstatic struct sysent nlm_syscall_prev_sysent; 91177685Sdfr#if __FreeBSD_version < 700000 92177685Sdfrstatic struct sysent nlm_syscall_sysent = { 93177685Sdfr (sizeof(struct nlm_syscall_args) / sizeof(register_t)) | SYF_MPSAFE, 94177685Sdfr (sy_call_t *) nlm_syscall 95177685Sdfr}; 96177685Sdfr#else 97177633SdfrMAKE_SYSENT(nlm_syscall); 98177685Sdfr#endif 99177633Sdfrstatic bool_t nlm_syscall_registered = FALSE; 100177633Sdfr 101177633Sdfr/* 102177633Sdfr * Debug level passed in from userland. We also support a sysctl hook 103177633Sdfr * so that it can be changed on a live system. 104177633Sdfr */ 105177633Sdfrstatic int nlm_debug_level; 106177633SdfrSYSCTL_INT(_debug, OID_AUTO, nlm_debug, CTLFLAG_RW, &nlm_debug_level, 0, ""); 107177633Sdfr 108191918Sdfr#define NLM_DEBUG(_level, args...) \ 109191918Sdfr do { \ 110191918Sdfr if (nlm_debug_level >= (_level)) \ 111191918Sdfr log(LOG_DEBUG, args); \ 112191918Sdfr } while(0) 113191918Sdfr#define NLM_ERR(args...) \ 114191918Sdfr do { \ 115191918Sdfr log(LOG_ERR, args); \ 116191918Sdfr } while(0) 117191918Sdfr 118177633Sdfr/* 119177633Sdfr * Grace period handling. The value of nlm_grace_threshold is the 120177633Sdfr * value of time_uptime after which we are serving requests normally. 121177633Sdfr */ 122177633Sdfrstatic time_t nlm_grace_threshold; 123177633Sdfr 124177633Sdfr/* 125177633Sdfr * We check for idle hosts if time_uptime is greater than 126177633Sdfr * nlm_next_idle_check, 127177633Sdfr */ 128177633Sdfrstatic time_t nlm_next_idle_check; 129177633Sdfr 130177633Sdfr/* 131177633Sdfr * A socket to use for RPC - shared by all IPv4 RPC clients. 132177633Sdfr */ 133177633Sdfrstatic struct socket *nlm_socket; 134177633Sdfr 135177633Sdfr#ifdef INET6 136177633Sdfr 137177633Sdfr/* 138177633Sdfr * A socket to use for RPC - shared by all IPv6 RPC clients. 139177633Sdfr */ 140177633Sdfrstatic struct socket *nlm_socket6; 141177633Sdfr 142177633Sdfr#endif 143177633Sdfr 144177633Sdfr/* 145177633Sdfr * An RPC client handle that can be used to communicate with the local 146177633Sdfr * NSM. 147177633Sdfr */ 148177633Sdfrstatic CLIENT *nlm_nsm; 149177633Sdfr 150177633Sdfr/* 151180025Sdfr * An AUTH handle for the server's creds. 152177633Sdfr */ 153180025Sdfrstatic AUTH *nlm_auth; 154177633Sdfr 155177633Sdfr/* 156180025Sdfr * A zero timeval for sending async RPC messages. 157180025Sdfr */ 158180025Sdfrstruct timeval nlm_zero_tv = { 0, 0 }; 159180025Sdfr 160180025Sdfr/* 161180025Sdfr * The local NSM state number 162180025Sdfr */ 163180025Sdfrint nlm_nsm_state; 164180025Sdfr 165180025Sdfr 166180025Sdfr/* 167180025Sdfr * A lock to protect the host list and waiting lock list. 168180025Sdfr */ 169180025Sdfrstatic struct mtx nlm_global_lock; 170180025Sdfr 171180025Sdfr/* 172177633Sdfr * Locks: 173177633Sdfr * (l) locked by nh_lock 174177633Sdfr * (s) only accessed via server RPC which is single threaded 175180025Sdfr * (g) locked by nlm_global_lock 176177633Sdfr * (c) const until freeing 177180025Sdfr * (a) modified using atomic ops 178177633Sdfr */ 179177633Sdfr 180177633Sdfr/* 181180025Sdfr * A pending client-side lock request, stored on the nlm_waiting_locks 182180025Sdfr * list. 183177633Sdfr */ 184180025Sdfrstruct nlm_waiting_lock { 185180025Sdfr TAILQ_ENTRY(nlm_waiting_lock) nw_link; /* (g) */ 186180025Sdfr bool_t nw_waiting; /* (g) */ 187180025Sdfr nlm4_lock nw_lock; /* (c) */ 188180025Sdfr union nfsfh nw_fh; /* (c) */ 189180025Sdfr struct vnode *nw_vp; /* (c) */ 190180025Sdfr}; 191180025SdfrTAILQ_HEAD(nlm_waiting_lock_list, nlm_waiting_lock); 192180025Sdfr 193180025Sdfrstruct nlm_waiting_lock_list nlm_waiting_locks; /* (g) */ 194180025Sdfr 195180025Sdfr/* 196180025Sdfr * A pending server-side asynchronous lock request, stored on the 197180025Sdfr * nh_pending list of the NLM host. 198180025Sdfr */ 199177633Sdfrstruct nlm_async_lock { 200177633Sdfr TAILQ_ENTRY(nlm_async_lock) af_link; /* (l) host's list of locks */ 201177633Sdfr struct task af_task; /* (c) async callback details */ 202177633Sdfr void *af_cookie; /* (l) lock manager cancel token */ 203177633Sdfr struct vnode *af_vp; /* (l) vnode to lock */ 204177633Sdfr struct flock af_fl; /* (c) lock details */ 205177633Sdfr struct nlm_host *af_host; /* (c) host which is locking */ 206180025Sdfr CLIENT *af_rpc; /* (c) rpc client to send message */ 207177633Sdfr nlm4_testargs af_granted; /* (c) notification details */ 208177633Sdfr}; 209177633SdfrTAILQ_HEAD(nlm_async_lock_list, nlm_async_lock); 210177633Sdfr 211177633Sdfr/* 212177633Sdfr * NLM host. 213177633Sdfr */ 214178112Sdfrenum nlm_host_state { 215178112Sdfr NLM_UNMONITORED, 216178112Sdfr NLM_MONITORED, 217180025Sdfr NLM_MONITOR_FAILED, 218180025Sdfr NLM_RECOVERING 219178112Sdfr}; 220184588Sdfr 221184588Sdfrstruct nlm_rpc { 222184588Sdfr CLIENT *nr_client; /* (l) RPC client handle */ 223184588Sdfr time_t nr_create_time; /* (l) when client was created */ 224184588Sdfr}; 225184588Sdfr 226177633Sdfrstruct nlm_host { 227177633Sdfr struct mtx nh_lock; 228180025Sdfr volatile u_int nh_refs; /* (a) reference count */ 229180025Sdfr TAILQ_ENTRY(nlm_host) nh_link; /* (g) global list of hosts */ 230180025Sdfr char nh_caller_name[MAXNAMELEN]; /* (c) printable name of host */ 231177633Sdfr uint32_t nh_sysid; /* (c) our allocaed system ID */ 232177633Sdfr char nh_sysid_string[10]; /* (c) string rep. of sysid */ 233177633Sdfr struct sockaddr_storage nh_addr; /* (s) remote address of host */ 234184588Sdfr struct nlm_rpc nh_srvrpc; /* (l) RPC for server replies */ 235184588Sdfr struct nlm_rpc nh_clntrpc; /* (l) RPC for client requests */ 236177633Sdfr rpcvers_t nh_vers; /* (s) NLM version of host */ 237177633Sdfr int nh_state; /* (s) last seen NSM state of host */ 238180025Sdfr enum nlm_host_state nh_monstate; /* (l) local NSM monitoring state */ 239177633Sdfr time_t nh_idle_timeout; /* (s) Time at which host is idle */ 240177633Sdfr struct sysctl_ctx_list nh_sysctl; /* (c) vfs.nlm.sysid nodes */ 241177633Sdfr struct nlm_async_lock_list nh_pending; /* (l) pending async locks */ 242177633Sdfr struct nlm_async_lock_list nh_finished; /* (l) finished async locks */ 243177633Sdfr}; 244177633SdfrTAILQ_HEAD(nlm_host_list, nlm_host); 245177633Sdfr 246180025Sdfrstatic struct nlm_host_list nlm_hosts; /* (g) */ 247180025Sdfrstatic uint32_t nlm_next_sysid = 1; /* (g) */ 248177633Sdfr 249177633Sdfrstatic void nlm_host_unmonitor(struct nlm_host *); 250177633Sdfr 251177633Sdfr/**********************************************************************/ 252177633Sdfr 253177633Sdfr/* 254177633Sdfr * Initialise NLM globals. 255177633Sdfr */ 256177633Sdfrstatic void 257177633Sdfrnlm_init(void *dummy) 258177633Sdfr{ 259177633Sdfr int error; 260177633Sdfr 261180025Sdfr mtx_init(&nlm_global_lock, "nlm_global_lock", NULL, MTX_DEF); 262180025Sdfr TAILQ_INIT(&nlm_waiting_locks); 263177633Sdfr TAILQ_INIT(&nlm_hosts); 264177633Sdfr 265177633Sdfr error = syscall_register(&nlm_syscall_offset, &nlm_syscall_sysent, 266177633Sdfr &nlm_syscall_prev_sysent); 267177633Sdfr if (error) 268191918Sdfr NLM_ERR("Can't register NLM syscall\n"); 269177633Sdfr else 270177633Sdfr nlm_syscall_registered = TRUE; 271177633Sdfr} 272177633SdfrSYSINIT(nlm_init, SI_SUB_LOCK, SI_ORDER_FIRST, nlm_init, NULL); 273177633Sdfr 274177633Sdfrstatic void 275177633Sdfrnlm_uninit(void *dummy) 276177633Sdfr{ 277177633Sdfr 278177633Sdfr if (nlm_syscall_registered) 279177633Sdfr syscall_deregister(&nlm_syscall_offset, 280177633Sdfr &nlm_syscall_prev_sysent); 281177633Sdfr} 282177633SdfrSYSUNINIT(nlm_uninit, SI_SUB_LOCK, SI_ORDER_FIRST, nlm_uninit, NULL); 283177633Sdfr 284177633Sdfr/* 285177633Sdfr * Copy a struct netobj. 286177633Sdfr */ 287177633Sdfrvoid 288177633Sdfrnlm_copy_netobj(struct netobj *dst, struct netobj *src, 289177633Sdfr struct malloc_type *type) 290177633Sdfr{ 291177633Sdfr 292177633Sdfr dst->n_len = src->n_len; 293177633Sdfr dst->n_bytes = malloc(src->n_len, type, M_WAITOK); 294177633Sdfr memcpy(dst->n_bytes, src->n_bytes, src->n_len); 295177633Sdfr} 296177633Sdfr 297177633Sdfr/* 298177633Sdfr * Create an RPC client handle for the given (address,prog,vers) 299177633Sdfr * triple using UDP. 300177633Sdfr */ 301177633Sdfrstatic CLIENT * 302177633Sdfrnlm_get_rpc(struct sockaddr *sa, rpcprog_t prog, rpcvers_t vers) 303177633Sdfr{ 304184588Sdfr char *wchan = "nlmrcv"; 305177633Sdfr const char* protofmly; 306177633Sdfr struct sockaddr_storage ss; 307177633Sdfr struct socket *so; 308177633Sdfr CLIENT *rpcb; 309177633Sdfr struct timeval timo; 310177633Sdfr RPCB parms; 311177633Sdfr char *uaddr; 312181683Sdfr enum clnt_stat stat = RPC_SUCCESS; 313181683Sdfr int rpcvers = RPCBVERS4; 314181683Sdfr bool_t do_tcp = FALSE; 315191937Sdfr bool_t tryagain = FALSE; 316182154Sdfr struct portmap mapping; 317181683Sdfr u_short port = 0; 318177633Sdfr 319177633Sdfr /* 320177633Sdfr * First we need to contact the remote RPCBIND service to find 321177633Sdfr * the right port. 322177633Sdfr */ 323177633Sdfr memcpy(&ss, sa, sa->sa_len); 324177633Sdfr switch (ss.ss_family) { 325177633Sdfr case AF_INET: 326177633Sdfr ((struct sockaddr_in *)&ss)->sin_port = htons(111); 327177633Sdfr protofmly = "inet"; 328177633Sdfr so = nlm_socket; 329177633Sdfr break; 330177633Sdfr 331177633Sdfr#ifdef INET6 332177633Sdfr case AF_INET6: 333177633Sdfr ((struct sockaddr_in6 *)&ss)->sin6_port = htons(111); 334177633Sdfr protofmly = "inet6"; 335177633Sdfr so = nlm_socket6; 336177633Sdfr break; 337177633Sdfr#endif 338177633Sdfr 339177633Sdfr default: 340177633Sdfr /* 341177633Sdfr * Unsupported address family - fail. 342177633Sdfr */ 343177633Sdfr return (NULL); 344177633Sdfr } 345177633Sdfr 346177633Sdfr rpcb = clnt_dg_create(so, (struct sockaddr *)&ss, 347181683Sdfr RPCBPROG, rpcvers, 0, 0); 348177633Sdfr if (!rpcb) 349177633Sdfr return (NULL); 350177633Sdfr 351181683Sdfrtry_tcp: 352177633Sdfr parms.r_prog = prog; 353177633Sdfr parms.r_vers = vers; 354181683Sdfr if (do_tcp) 355181683Sdfr parms.r_netid = "tcp"; 356181683Sdfr else 357181683Sdfr parms.r_netid = "udp"; 358177633Sdfr parms.r_addr = ""; 359177633Sdfr parms.r_owner = ""; 360177633Sdfr 361177633Sdfr /* 362177633Sdfr * Use the default timeout. 363177633Sdfr */ 364177633Sdfr timo.tv_sec = 25; 365177633Sdfr timo.tv_usec = 0; 366177633Sdfragain: 367181683Sdfr switch (rpcvers) { 368181683Sdfr case RPCBVERS4: 369181683Sdfr case RPCBVERS: 370177633Sdfr /* 371181683Sdfr * Try RPCBIND 4 then 3. 372177633Sdfr */ 373181683Sdfr uaddr = NULL; 374181683Sdfr stat = CLNT_CALL(rpcb, (rpcprog_t) RPCBPROC_GETADDR, 375181683Sdfr (xdrproc_t) xdr_rpcb, &parms, 376181683Sdfr (xdrproc_t) xdr_wrapstring, &uaddr, timo); 377191937Sdfr if (stat == RPC_SUCCESS) { 378181683Sdfr /* 379181683Sdfr * We have a reply from the remote RPCBIND - turn it 380181683Sdfr * into an appropriate address and make a new client 381181683Sdfr * that can talk to the remote NLM. 382181683Sdfr * 383181683Sdfr * XXX fixup IPv6 scope ID. 384181683Sdfr */ 385181683Sdfr struct netbuf *a; 386181683Sdfr a = __rpc_uaddr2taddr_af(ss.ss_family, uaddr); 387181683Sdfr if (!a) { 388191937Sdfr tryagain = TRUE; 389191937Sdfr } else { 390191937Sdfr tryagain = FALSE; 391191937Sdfr memcpy(&ss, a->buf, a->len); 392191937Sdfr free(a->buf, M_RPC); 393191937Sdfr free(a, M_RPC); 394191937Sdfr xdr_free((xdrproc_t) xdr_wrapstring, &uaddr); 395181683Sdfr } 396177633Sdfr } 397191937Sdfr if (tryagain || stat == RPC_PROGVERSMISMATCH) { 398191937Sdfr if (rpcvers == RPCBVERS4) 399191937Sdfr rpcvers = RPCBVERS; 400191937Sdfr else if (rpcvers == RPCBVERS) 401191937Sdfr rpcvers = PMAPVERS; 402191937Sdfr CLNT_CONTROL(rpcb, CLSET_VERS, &rpcvers); 403191937Sdfr goto again; 404191937Sdfr } 405181683Sdfr break; 406181683Sdfr case PMAPVERS: 407177633Sdfr /* 408177633Sdfr * Try portmap. 409177633Sdfr */ 410177633Sdfr mapping.pm_prog = parms.r_prog; 411177633Sdfr mapping.pm_vers = parms.r_vers; 412181683Sdfr mapping.pm_prot = do_tcp ? IPPROTO_TCP : IPPROTO_UDP; 413177633Sdfr mapping.pm_port = 0; 414177633Sdfr 415177633Sdfr stat = CLNT_CALL(rpcb, (rpcprog_t) PMAPPROC_GETPORT, 416182154Sdfr (xdrproc_t) xdr_portmap, &mapping, 417177633Sdfr (xdrproc_t) xdr_u_short, &port, timo); 418177633Sdfr 419177633Sdfr if (stat == RPC_SUCCESS) { 420177633Sdfr switch (ss.ss_family) { 421177633Sdfr case AF_INET: 422177633Sdfr ((struct sockaddr_in *)&ss)->sin_port = 423177633Sdfr htons(port); 424177633Sdfr break; 425177633Sdfr 426177633Sdfr#ifdef INET6 427177633Sdfr case AF_INET6: 428177633Sdfr ((struct sockaddr_in6 *)&ss)->sin6_port = 429177633Sdfr htons(port); 430177633Sdfr break; 431177633Sdfr#endif 432177633Sdfr } 433177633Sdfr } 434181683Sdfr break; 435181683Sdfr default: 436181683Sdfr panic("invalid rpcvers %d", rpcvers); 437177633Sdfr } 438181683Sdfr /* 439181683Sdfr * We may have a positive response from the portmapper, but the NLM 440181683Sdfr * service was not found. Make sure we received a valid port. 441181683Sdfr */ 442181683Sdfr switch (ss.ss_family) { 443181683Sdfr case AF_INET: 444181683Sdfr port = ((struct sockaddr_in *)&ss)->sin_port; 445181683Sdfr break; 446181683Sdfr#ifdef INET6 447181683Sdfr case AF_INET6: 448181683Sdfr port = ((struct sockaddr_in6 *)&ss)->sin6_port; 449181683Sdfr break; 450181683Sdfr#endif 451181683Sdfr } 452181683Sdfr if (stat != RPC_SUCCESS || !port) { 453181683Sdfr /* 454181683Sdfr * If we were able to talk to rpcbind or portmap, but the udp 455181683Sdfr * variant wasn't available, ask about tcp. 456181683Sdfr * 457181683Sdfr * XXX - We could also check for a TCP portmapper, but 458181683Sdfr * if the host is running a portmapper at all, we should be able 459181683Sdfr * to hail it over UDP. 460181683Sdfr */ 461181683Sdfr if (stat == RPC_SUCCESS && !do_tcp) { 462181683Sdfr do_tcp = TRUE; 463181683Sdfr goto try_tcp; 464181683Sdfr } 465181683Sdfr 466181683Sdfr /* Otherwise, bad news. */ 467191918Sdfr NLM_ERR("NLM: failed to contact remote rpcbind, " 468191918Sdfr "stat = %d, port = %d\n", (int) stat, port); 469178241Sdfr CLNT_DESTROY(rpcb); 470177633Sdfr return (NULL); 471177633Sdfr } 472177633Sdfr 473181683Sdfr if (do_tcp) { 474181683Sdfr /* 475181683Sdfr * Destroy the UDP client we used to speak to rpcbind and 476181683Sdfr * recreate as a TCP client. 477181683Sdfr */ 478181683Sdfr struct netconfig *nconf = NULL; 479177633Sdfr 480181683Sdfr CLNT_DESTROY(rpcb); 481181683Sdfr 482181683Sdfr switch (ss.ss_family) { 483181683Sdfr case AF_INET: 484181683Sdfr nconf = getnetconfigent("tcp"); 485181683Sdfr break; 486181683Sdfr#ifdef INET6 487181683Sdfr case AF_INET6: 488181683Sdfr nconf = getnetconfigent("tcp6"); 489181683Sdfr break; 490181683Sdfr#endif 491181683Sdfr } 492181683Sdfr 493181683Sdfr rpcb = clnt_reconnect_create(nconf, (struct sockaddr *)&ss, 494181683Sdfr prog, vers, 0, 0); 495184588Sdfr CLNT_CONTROL(rpcb, CLSET_WAITCHAN, wchan); 496181683Sdfr rpcb->cl_auth = nlm_auth; 497181683Sdfr 498181683Sdfr } else { 499181683Sdfr /* 500181683Sdfr * Re-use the client we used to speak to rpcbind. 501181683Sdfr */ 502181683Sdfr CLNT_CONTROL(rpcb, CLSET_SVC_ADDR, &ss); 503181683Sdfr CLNT_CONTROL(rpcb, CLSET_PROG, &prog); 504181683Sdfr CLNT_CONTROL(rpcb, CLSET_VERS, &vers); 505184588Sdfr CLNT_CONTROL(rpcb, CLSET_WAITCHAN, wchan); 506181683Sdfr rpcb->cl_auth = nlm_auth; 507181683Sdfr } 508181683Sdfr 509177633Sdfr return (rpcb); 510177633Sdfr} 511177633Sdfr 512177633Sdfr/* 513177633Sdfr * This async callback after when an async lock request has been 514177633Sdfr * granted. We notify the host which initiated the request. 515177633Sdfr */ 516177633Sdfrstatic void 517177633Sdfrnlm_lock_callback(void *arg, int pending) 518177633Sdfr{ 519177633Sdfr struct nlm_async_lock *af = (struct nlm_async_lock *) arg; 520180025Sdfr struct rpc_callextra ext; 521177633Sdfr 522191918Sdfr NLM_DEBUG(2, "NLM: async lock %p for %s (sysid %d) granted\n", 523191918Sdfr af, af->af_host->nh_caller_name, af->af_host->nh_sysid); 524177633Sdfr 525177633Sdfr /* 526177633Sdfr * Send the results back to the host. 527177633Sdfr * 528177633Sdfr * Note: there is a possible race here with nlm_host_notify 529178241Sdfr * destroying the RPC client. To avoid problems, the first 530177633Sdfr * thing nlm_host_notify does is to cancel pending async lock 531177633Sdfr * requests. 532177633Sdfr */ 533180025Sdfr memset(&ext, 0, sizeof(ext)); 534180025Sdfr ext.rc_auth = nlm_auth; 535177633Sdfr if (af->af_host->nh_vers == NLM_VERS4) { 536177633Sdfr nlm4_granted_msg_4(&af->af_granted, 537180025Sdfr NULL, af->af_rpc, &ext, nlm_zero_tv); 538177633Sdfr } else { 539177633Sdfr /* 540177633Sdfr * Back-convert to legacy protocol 541177633Sdfr */ 542177633Sdfr nlm_testargs granted; 543177633Sdfr granted.cookie = af->af_granted.cookie; 544177633Sdfr granted.exclusive = af->af_granted.exclusive; 545177633Sdfr granted.alock.caller_name = 546177633Sdfr af->af_granted.alock.caller_name; 547177633Sdfr granted.alock.fh = af->af_granted.alock.fh; 548177633Sdfr granted.alock.oh = af->af_granted.alock.oh; 549177633Sdfr granted.alock.svid = af->af_granted.alock.svid; 550177633Sdfr granted.alock.l_offset = 551177633Sdfr af->af_granted.alock.l_offset; 552177633Sdfr granted.alock.l_len = 553177633Sdfr af->af_granted.alock.l_len; 554177633Sdfr 555177633Sdfr nlm_granted_msg_1(&granted, 556180025Sdfr NULL, af->af_rpc, &ext, nlm_zero_tv); 557177633Sdfr } 558177633Sdfr 559177633Sdfr /* 560177633Sdfr * Move this entry to the nh_finished list. Someone else will 561177633Sdfr * free it later - its too hard to do it here safely without 562177633Sdfr * racing with cancel. 563177633Sdfr * 564177633Sdfr * XXX possibly we should have a third "granted sent but not 565177633Sdfr * ack'ed" list so that we can re-send the granted message. 566177633Sdfr */ 567177633Sdfr mtx_lock(&af->af_host->nh_lock); 568177633Sdfr TAILQ_REMOVE(&af->af_host->nh_pending, af, af_link); 569177633Sdfr TAILQ_INSERT_TAIL(&af->af_host->nh_finished, af, af_link); 570177633Sdfr mtx_unlock(&af->af_host->nh_lock); 571177633Sdfr} 572177633Sdfr 573177633Sdfr/* 574177633Sdfr * Free an async lock request. The request must have been removed from 575177633Sdfr * any list. 576177633Sdfr */ 577177633Sdfrstatic void 578177633Sdfrnlm_free_async_lock(struct nlm_async_lock *af) 579177633Sdfr{ 580177633Sdfr /* 581177633Sdfr * Free an async lock. 582177633Sdfr */ 583180025Sdfr if (af->af_rpc) 584180025Sdfr CLNT_RELEASE(af->af_rpc); 585177633Sdfr xdr_free((xdrproc_t) xdr_nlm4_testargs, &af->af_granted); 586177633Sdfr if (af->af_vp) 587177633Sdfr vrele(af->af_vp); 588177633Sdfr free(af, M_NLM); 589177633Sdfr} 590177633Sdfr 591177633Sdfr/* 592177633Sdfr * Cancel our async request - this must be called with 593177633Sdfr * af->nh_host->nh_lock held. This is slightly complicated by a 594177633Sdfr * potential race with our own callback. If we fail to cancel the 595177633Sdfr * lock, it must already have been granted - we make sure our async 596177633Sdfr * task has completed by calling taskqueue_drain in this case. 597177633Sdfr */ 598177633Sdfrstatic int 599177633Sdfrnlm_cancel_async_lock(struct nlm_async_lock *af) 600177633Sdfr{ 601177633Sdfr struct nlm_host *host = af->af_host; 602177633Sdfr int error; 603177633Sdfr 604177633Sdfr mtx_assert(&host->nh_lock, MA_OWNED); 605177633Sdfr 606177633Sdfr mtx_unlock(&host->nh_lock); 607177633Sdfr 608177633Sdfr error = VOP_ADVLOCKASYNC(af->af_vp, NULL, F_CANCEL, &af->af_fl, 609177633Sdfr F_REMOTE, NULL, &af->af_cookie); 610177633Sdfr 611177633Sdfr if (error) { 612177633Sdfr /* 613177633Sdfr * We failed to cancel - make sure our callback has 614177633Sdfr * completed before we continue. 615177633Sdfr */ 616177633Sdfr taskqueue_drain(taskqueue_thread, &af->af_task); 617177633Sdfr } 618177633Sdfr 619177633Sdfr mtx_lock(&host->nh_lock); 620177633Sdfr 621177633Sdfr if (!error) { 622191918Sdfr NLM_DEBUG(2, "NLM: async lock %p for %s (sysid %d) " 623191918Sdfr "cancelled\n", af, host->nh_caller_name, host->nh_sysid); 624177633Sdfr 625177633Sdfr /* 626177633Sdfr * Remove from the nh_pending list and free now that 627177633Sdfr * we are safe from the callback. 628177633Sdfr */ 629177633Sdfr TAILQ_REMOVE(&host->nh_pending, af, af_link); 630177633Sdfr mtx_unlock(&host->nh_lock); 631177633Sdfr nlm_free_async_lock(af); 632177633Sdfr mtx_lock(&host->nh_lock); 633177633Sdfr } 634177633Sdfr 635177633Sdfr return (error); 636177633Sdfr} 637177633Sdfr 638177633Sdfrstatic void 639177633Sdfrnlm_free_finished_locks(struct nlm_host *host) 640177633Sdfr{ 641177633Sdfr struct nlm_async_lock *af; 642177633Sdfr 643177633Sdfr mtx_lock(&host->nh_lock); 644177633Sdfr while ((af = TAILQ_FIRST(&host->nh_finished)) != NULL) { 645177633Sdfr TAILQ_REMOVE(&host->nh_finished, af, af_link); 646177633Sdfr mtx_unlock(&host->nh_lock); 647177633Sdfr nlm_free_async_lock(af); 648177633Sdfr mtx_lock(&host->nh_lock); 649177633Sdfr } 650177633Sdfr mtx_unlock(&host->nh_lock); 651177633Sdfr} 652177633Sdfr 653177633Sdfr/* 654180025Sdfr * Free resources used by a host. This is called after the reference 655180025Sdfr * count has reached zero so it doesn't need to worry about locks. 656177633Sdfr */ 657177633Sdfrstatic void 658180025Sdfrnlm_host_destroy(struct nlm_host *host) 659177633Sdfr{ 660180025Sdfr 661180025Sdfr mtx_lock(&nlm_global_lock); 662180025Sdfr TAILQ_REMOVE(&nlm_hosts, host, nh_link); 663180025Sdfr mtx_unlock(&nlm_global_lock); 664180025Sdfr 665184588Sdfr if (host->nh_srvrpc.nr_client) 666184588Sdfr CLNT_RELEASE(host->nh_srvrpc.nr_client); 667184588Sdfr if (host->nh_clntrpc.nr_client) 668184588Sdfr CLNT_RELEASE(host->nh_clntrpc.nr_client); 669180025Sdfr mtx_destroy(&host->nh_lock); 670180025Sdfr sysctl_ctx_free(&host->nh_sysctl); 671180025Sdfr free(host, M_NLM); 672180025Sdfr} 673180025Sdfr 674184588Sdfr#ifdef NFSCLIENT 675184588Sdfr 676180025Sdfr/* 677180025Sdfr * Thread start callback for client lock recovery 678180025Sdfr */ 679180025Sdfrstatic void 680180025Sdfrnlm_client_recovery_start(void *arg) 681180025Sdfr{ 682180025Sdfr struct nlm_host *host = (struct nlm_host *) arg; 683180025Sdfr 684191918Sdfr NLM_DEBUG(1, "NLM: client lock recovery for %s started\n", 685191918Sdfr host->nh_caller_name); 686180025Sdfr 687180025Sdfr nlm_client_recovery(host); 688180025Sdfr 689191918Sdfr NLM_DEBUG(1, "NLM: client lock recovery for %s completed\n", 690191918Sdfr host->nh_caller_name); 691180025Sdfr 692180025Sdfr host->nh_monstate = NLM_MONITORED; 693180025Sdfr nlm_host_release(host); 694180025Sdfr 695180025Sdfr kthread_exit(); 696180025Sdfr} 697180025Sdfr 698184588Sdfr#endif 699184588Sdfr 700180025Sdfr/* 701180025Sdfr * This is called when we receive a host state change notification. We 702180025Sdfr * unlock any active locks owned by the host. When rpc.lockd is 703180025Sdfr * shutting down, this function is called with newstate set to zero 704180025Sdfr * which allows us to cancel any pending async locks and clear the 705180025Sdfr * locking state. 706180025Sdfr */ 707180025Sdfrstatic void 708180025Sdfrnlm_host_notify(struct nlm_host *host, int newstate) 709180025Sdfr{ 710177633Sdfr struct nlm_async_lock *af; 711177633Sdfr 712177633Sdfr if (newstate) { 713191918Sdfr NLM_DEBUG(1, "NLM: host %s (sysid %d) rebooted, new " 714191918Sdfr "state is %d\n", host->nh_caller_name, 715191918Sdfr host->nh_sysid, newstate); 716177633Sdfr } 717177633Sdfr 718177633Sdfr /* 719177633Sdfr * Cancel any pending async locks for this host. 720177633Sdfr */ 721177633Sdfr mtx_lock(&host->nh_lock); 722177633Sdfr while ((af = TAILQ_FIRST(&host->nh_pending)) != NULL) { 723177633Sdfr /* 724177633Sdfr * nlm_cancel_async_lock will remove the entry from 725177633Sdfr * nh_pending and free it. 726177633Sdfr */ 727177633Sdfr nlm_cancel_async_lock(af); 728177633Sdfr } 729177633Sdfr mtx_unlock(&host->nh_lock); 730177633Sdfr nlm_free_finished_locks(host); 731177633Sdfr 732177633Sdfr /* 733180025Sdfr * The host just rebooted - trash its locks. 734177633Sdfr */ 735177633Sdfr lf_clearremotesys(host->nh_sysid); 736177633Sdfr host->nh_state = newstate; 737177633Sdfr 738184588Sdfr#ifdef NFSCLIENT 739177633Sdfr /* 740180025Sdfr * If we have any remote locks for this host (i.e. it 741180025Sdfr * represents a remote NFS server that our local NFS client 742180025Sdfr * has locks for), start a recovery thread. 743177633Sdfr */ 744180025Sdfr if (newstate != 0 745180025Sdfr && host->nh_monstate != NLM_RECOVERING 746180025Sdfr && lf_countlocks(NLM_SYSID_CLIENT | host->nh_sysid) > 0) { 747180025Sdfr struct thread *td; 748180025Sdfr host->nh_monstate = NLM_RECOVERING; 749180025Sdfr refcount_acquire(&host->nh_refs); 750180025Sdfr kthread_add(nlm_client_recovery_start, host, curproc, &td, 0, 0, 751180025Sdfr "NFS lock recovery for %s", host->nh_caller_name); 752177633Sdfr } 753184588Sdfr#endif 754177633Sdfr} 755177633Sdfr 756177633Sdfr/* 757177633Sdfr * Sysctl handler to count the number of locks for a sysid. 758177633Sdfr */ 759177633Sdfrstatic int 760177633Sdfrnlm_host_lock_count_sysctl(SYSCTL_HANDLER_ARGS) 761177633Sdfr{ 762177633Sdfr struct nlm_host *host; 763177633Sdfr int count; 764177633Sdfr 765177633Sdfr host = oidp->oid_arg1; 766177633Sdfr count = lf_countlocks(host->nh_sysid); 767177633Sdfr return sysctl_handle_int(oidp, &count, 0, req); 768177633Sdfr} 769177633Sdfr 770177633Sdfr/* 771180025Sdfr * Sysctl handler to count the number of client locks for a sysid. 772180025Sdfr */ 773180025Sdfrstatic int 774180025Sdfrnlm_host_client_lock_count_sysctl(SYSCTL_HANDLER_ARGS) 775180025Sdfr{ 776180025Sdfr struct nlm_host *host; 777180025Sdfr int count; 778180025Sdfr 779180025Sdfr host = oidp->oid_arg1; 780180025Sdfr count = lf_countlocks(NLM_SYSID_CLIENT | host->nh_sysid); 781180025Sdfr return sysctl_handle_int(oidp, &count, 0, req); 782180025Sdfr} 783180025Sdfr 784180025Sdfr/* 785177633Sdfr * Create a new NLM host. 786177633Sdfr */ 787177633Sdfrstatic struct nlm_host * 788177633Sdfrnlm_create_host(const char* caller_name) 789177633Sdfr{ 790177633Sdfr struct nlm_host *host; 791177633Sdfr struct sysctl_oid *oid; 792177633Sdfr 793180025Sdfr mtx_assert(&nlm_global_lock, MA_OWNED); 794180025Sdfr 795191918Sdfr NLM_DEBUG(1, "NLM: new host %s (sysid %d)\n", 796191918Sdfr caller_name, nlm_next_sysid); 797180025Sdfr host = malloc(sizeof(struct nlm_host), M_NLM, M_NOWAIT|M_ZERO); 798180025Sdfr if (!host) 799180025Sdfr return (NULL); 800177633Sdfr mtx_init(&host->nh_lock, "nh_lock", NULL, MTX_DEF); 801180025Sdfr host->nh_refs = 1; 802180025Sdfr strlcpy(host->nh_caller_name, caller_name, MAXNAMELEN); 803177633Sdfr host->nh_sysid = nlm_next_sysid++; 804177633Sdfr snprintf(host->nh_sysid_string, sizeof(host->nh_sysid_string), 805177633Sdfr "%d", host->nh_sysid); 806177633Sdfr host->nh_vers = 0; 807177633Sdfr host->nh_state = 0; 808178112Sdfr host->nh_monstate = NLM_UNMONITORED; 809177633Sdfr TAILQ_INIT(&host->nh_pending); 810177633Sdfr TAILQ_INIT(&host->nh_finished); 811177633Sdfr TAILQ_INSERT_TAIL(&nlm_hosts, host, nh_link); 812177633Sdfr 813180025Sdfr mtx_unlock(&nlm_global_lock); 814180025Sdfr 815177633Sdfr sysctl_ctx_init(&host->nh_sysctl); 816177633Sdfr oid = SYSCTL_ADD_NODE(&host->nh_sysctl, 817177633Sdfr SYSCTL_STATIC_CHILDREN(_vfs_nlm_sysid), 818177633Sdfr OID_AUTO, host->nh_sysid_string, CTLFLAG_RD, NULL, ""); 819177633Sdfr SYSCTL_ADD_STRING(&host->nh_sysctl, SYSCTL_CHILDREN(oid), OID_AUTO, 820177633Sdfr "hostname", CTLFLAG_RD, host->nh_caller_name, 0, ""); 821177633Sdfr SYSCTL_ADD_INT(&host->nh_sysctl, SYSCTL_CHILDREN(oid), OID_AUTO, 822177633Sdfr "version", CTLFLAG_RD, &host->nh_vers, 0, ""); 823177633Sdfr SYSCTL_ADD_INT(&host->nh_sysctl, SYSCTL_CHILDREN(oid), OID_AUTO, 824178112Sdfr "monitored", CTLFLAG_RD, &host->nh_monstate, 0, ""); 825177633Sdfr SYSCTL_ADD_PROC(&host->nh_sysctl, SYSCTL_CHILDREN(oid), OID_AUTO, 826177633Sdfr "lock_count", CTLTYPE_INT | CTLFLAG_RD, host, 0, 827177633Sdfr nlm_host_lock_count_sysctl, "I", ""); 828180025Sdfr SYSCTL_ADD_PROC(&host->nh_sysctl, SYSCTL_CHILDREN(oid), OID_AUTO, 829180025Sdfr "client_lock_count", CTLTYPE_INT | CTLFLAG_RD, host, 0, 830180025Sdfr nlm_host_client_lock_count_sysctl, "I", ""); 831177633Sdfr 832180025Sdfr mtx_lock(&nlm_global_lock); 833180025Sdfr 834177633Sdfr return (host); 835177633Sdfr} 836177633Sdfr 837177633Sdfr/* 838192501Srmacklem * Acquire the next sysid for remote locks not handled by the NLM. 839192501Srmacklem */ 840192501Srmacklemuint32_t 841192501Srmacklemnlm_acquire_next_sysid(void) 842192501Srmacklem{ 843192501Srmacklem uint32_t next_sysid; 844192501Srmacklem 845192501Srmacklem mtx_lock(&nlm_global_lock); 846192501Srmacklem next_sysid = nlm_next_sysid++; 847192501Srmacklem mtx_unlock(&nlm_global_lock); 848192501Srmacklem return (next_sysid); 849192501Srmacklem} 850192501Srmacklem 851192501Srmacklem/* 852177633Sdfr * Return non-zero if the address parts of the two sockaddrs are the 853177633Sdfr * same. 854177633Sdfr */ 855177633Sdfrstatic int 856177633Sdfrnlm_compare_addr(const struct sockaddr *a, const struct sockaddr *b) 857177633Sdfr{ 858177633Sdfr const struct sockaddr_in *a4, *b4; 859177633Sdfr#ifdef INET6 860177633Sdfr const struct sockaddr_in6 *a6, *b6; 861177633Sdfr#endif 862177633Sdfr 863177633Sdfr if (a->sa_family != b->sa_family) 864177633Sdfr return (FALSE); 865177633Sdfr 866177633Sdfr switch (a->sa_family) { 867177633Sdfr case AF_INET: 868177633Sdfr a4 = (const struct sockaddr_in *) a; 869177633Sdfr b4 = (const struct sockaddr_in *) b; 870177633Sdfr return !memcmp(&a4->sin_addr, &b4->sin_addr, 871177633Sdfr sizeof(a4->sin_addr)); 872177633Sdfr#ifdef INET6 873177633Sdfr case AF_INET6: 874177633Sdfr a6 = (const struct sockaddr_in6 *) a; 875177633Sdfr b6 = (const struct sockaddr_in6 *) b; 876177633Sdfr return !memcmp(&a6->sin6_addr, &b6->sin6_addr, 877177633Sdfr sizeof(a6->sin6_addr)); 878177633Sdfr#endif 879177633Sdfr } 880177633Sdfr 881177633Sdfr return (0); 882177633Sdfr} 883177633Sdfr 884177633Sdfr/* 885177633Sdfr * Check for idle hosts and stop monitoring them. We could also free 886177633Sdfr * the host structure here, possibly after a larger timeout but that 887177633Sdfr * would require some care to avoid races with 888177633Sdfr * e.g. nlm_host_lock_count_sysctl. 889177633Sdfr */ 890177633Sdfrstatic void 891177633Sdfrnlm_check_idle(void) 892177633Sdfr{ 893177633Sdfr struct nlm_host *host; 894177633Sdfr 895180025Sdfr mtx_assert(&nlm_global_lock, MA_OWNED); 896180025Sdfr 897177633Sdfr if (time_uptime <= nlm_next_idle_check) 898177633Sdfr return; 899177633Sdfr 900177633Sdfr nlm_next_idle_check = time_uptime + NLM_IDLE_PERIOD; 901177633Sdfr 902177633Sdfr TAILQ_FOREACH(host, &nlm_hosts, nh_link) { 903178112Sdfr if (host->nh_monstate == NLM_MONITORED 904177633Sdfr && time_uptime > host->nh_idle_timeout) { 905180025Sdfr mtx_unlock(&nlm_global_lock); 906180025Sdfr if (lf_countlocks(host->nh_sysid) > 0 907180025Sdfr || lf_countlocks(NLM_SYSID_CLIENT 908180025Sdfr + host->nh_sysid)) { 909177633Sdfr host->nh_idle_timeout = 910177633Sdfr time_uptime + NLM_IDLE_TIMEOUT; 911180025Sdfr mtx_lock(&nlm_global_lock); 912177633Sdfr continue; 913177633Sdfr } 914177633Sdfr nlm_host_unmonitor(host); 915180025Sdfr mtx_lock(&nlm_global_lock); 916177633Sdfr } 917177633Sdfr } 918177633Sdfr} 919177633Sdfr 920177633Sdfr/* 921177633Sdfr * Search for an existing NLM host that matches the given name 922177633Sdfr * (typically the caller_name element of an nlm4_lock). If none is 923180025Sdfr * found, create a new host. If 'addr' is non-NULL, record the remote 924177633Sdfr * address of the host so that we can call it back for async 925180025Sdfr * responses. If 'vers' is greater than zero then record the NLM 926180025Sdfr * program version to use to communicate with this client. 927177633Sdfr */ 928177633Sdfrstruct nlm_host * 929180025Sdfrnlm_find_host_by_name(const char *name, const struct sockaddr *addr, 930180025Sdfr rpcvers_t vers) 931177633Sdfr{ 932177633Sdfr struct nlm_host *host; 933177633Sdfr 934180025Sdfr mtx_lock(&nlm_global_lock); 935177633Sdfr 936177633Sdfr /* 937177633Sdfr * The remote host is determined by caller_name. 938177633Sdfr */ 939177633Sdfr TAILQ_FOREACH(host, &nlm_hosts, nh_link) { 940177633Sdfr if (!strcmp(host->nh_caller_name, name)) 941177633Sdfr break; 942177633Sdfr } 943177633Sdfr 944180025Sdfr if (!host) { 945177633Sdfr host = nlm_create_host(name); 946180025Sdfr if (!host) { 947180025Sdfr mtx_unlock(&nlm_global_lock); 948180025Sdfr return (NULL); 949180025Sdfr } 950180025Sdfr } 951180025Sdfr refcount_acquire(&host->nh_refs); 952180025Sdfr 953177633Sdfr host->nh_idle_timeout = time_uptime + NLM_IDLE_TIMEOUT; 954177633Sdfr 955177633Sdfr /* 956180025Sdfr * If we have an address for the host, record it so that we 957180025Sdfr * can send async replies etc. 958177633Sdfr */ 959180025Sdfr if (addr) { 960177633Sdfr 961180025Sdfr KASSERT(addr->sa_len < sizeof(struct sockaddr_storage), 962177633Sdfr ("Strange remote transport address length")); 963177633Sdfr 964177633Sdfr /* 965177633Sdfr * If we have seen an address before and we currently 966177633Sdfr * have an RPC client handle, make sure the address is 967177633Sdfr * the same, otherwise discard the client handle. 968177633Sdfr */ 969184588Sdfr if (host->nh_addr.ss_len && host->nh_srvrpc.nr_client) { 970177633Sdfr if (!nlm_compare_addr( 971177633Sdfr (struct sockaddr *) &host->nh_addr, 972180025Sdfr addr) 973180025Sdfr || host->nh_vers != vers) { 974180025Sdfr CLIENT *client; 975180025Sdfr mtx_lock(&host->nh_lock); 976184588Sdfr client = host->nh_srvrpc.nr_client; 977184588Sdfr host->nh_srvrpc.nr_client = NULL; 978180025Sdfr mtx_unlock(&host->nh_lock); 979180025Sdfr if (client) { 980180025Sdfr CLNT_RELEASE(client); 981180025Sdfr } 982177633Sdfr } 983177633Sdfr } 984180025Sdfr memcpy(&host->nh_addr, addr, addr->sa_len); 985180025Sdfr host->nh_vers = vers; 986177633Sdfr } 987177633Sdfr 988180025Sdfr nlm_check_idle(); 989180025Sdfr 990180025Sdfr mtx_unlock(&nlm_global_lock); 991180025Sdfr 992177633Sdfr return (host); 993177633Sdfr} 994177633Sdfr 995177633Sdfr/* 996177633Sdfr * Search for an existing NLM host that matches the given remote 997177633Sdfr * address. If none is found, create a new host with the requested 998177633Sdfr * address and remember 'vers' as the NLM protocol version to use for 999177633Sdfr * that host. 1000177633Sdfr */ 1001177633Sdfrstruct nlm_host * 1002177633Sdfrnlm_find_host_by_addr(const struct sockaddr *addr, int vers) 1003177633Sdfr{ 1004180025Sdfr /* 1005180025Sdfr * Fake up a name using inet_ntop. This buffer is 1006180025Sdfr * large enough for an IPv6 address. 1007180025Sdfr */ 1008180025Sdfr char tmp[sizeof "ffff:ffff:ffff:ffff:ffff:ffff:255.255.255.255"]; 1009177633Sdfr struct nlm_host *host; 1010177633Sdfr 1011180025Sdfr switch (addr->sa_family) { 1012180025Sdfr case AF_INET: 1013180025Sdfr __rpc_inet_ntop(AF_INET, 1014180025Sdfr &((const struct sockaddr_in *) addr)->sin_addr, 1015180025Sdfr tmp, sizeof tmp); 1016180025Sdfr break; 1017180025Sdfr#ifdef INET6 1018180025Sdfr case AF_INET6: 1019180025Sdfr __rpc_inet_ntop(AF_INET6, 1020180025Sdfr &((const struct sockaddr_in6 *) addr)->sin6_addr, 1021180025Sdfr tmp, sizeof tmp); 1022180025Sdfr break; 1023180025Sdfr#endif 1024180025Sdfr default: 1025180025Sdfr strcmp(tmp, "<unknown>"); 1026180025Sdfr } 1027177633Sdfr 1028180025Sdfr 1029180025Sdfr mtx_lock(&nlm_global_lock); 1030180025Sdfr 1031177633Sdfr /* 1032177633Sdfr * The remote host is determined by caller_name. 1033177633Sdfr */ 1034177633Sdfr TAILQ_FOREACH(host, &nlm_hosts, nh_link) { 1035177633Sdfr if (nlm_compare_addr(addr, 1036177633Sdfr (const struct sockaddr *) &host->nh_addr)) 1037177633Sdfr break; 1038177633Sdfr } 1039177633Sdfr 1040177633Sdfr if (!host) { 1041180025Sdfr host = nlm_create_host(tmp); 1042180025Sdfr if (!host) { 1043180025Sdfr mtx_unlock(&nlm_global_lock); 1044180025Sdfr return (NULL); 1045177633Sdfr } 1046177633Sdfr memcpy(&host->nh_addr, addr, addr->sa_len); 1047177633Sdfr host->nh_vers = vers; 1048177633Sdfr } 1049180025Sdfr refcount_acquire(&host->nh_refs); 1050180025Sdfr 1051177633Sdfr host->nh_idle_timeout = time_uptime + NLM_IDLE_TIMEOUT; 1052177633Sdfr 1053180025Sdfr nlm_check_idle(); 1054180025Sdfr 1055180025Sdfr mtx_unlock(&nlm_global_lock); 1056180025Sdfr 1057177633Sdfr return (host); 1058177633Sdfr} 1059177633Sdfr 1060177633Sdfr/* 1061177633Sdfr * Find the NLM host that matches the value of 'sysid'. If none 1062177633Sdfr * exists, return NULL. 1063177633Sdfr */ 1064177633Sdfrstatic struct nlm_host * 1065177633Sdfrnlm_find_host_by_sysid(int sysid) 1066177633Sdfr{ 1067177633Sdfr struct nlm_host *host; 1068177633Sdfr 1069177633Sdfr TAILQ_FOREACH(host, &nlm_hosts, nh_link) { 1070180025Sdfr if (host->nh_sysid == sysid) { 1071180025Sdfr refcount_acquire(&host->nh_refs); 1072177633Sdfr return (host); 1073180025Sdfr } 1074177633Sdfr } 1075177633Sdfr 1076177633Sdfr return (NULL); 1077177633Sdfr} 1078177633Sdfr 1079180025Sdfrvoid nlm_host_release(struct nlm_host *host) 1080180025Sdfr{ 1081180025Sdfr if (refcount_release(&host->nh_refs)) { 1082180025Sdfr /* 1083180025Sdfr * Free the host 1084180025Sdfr */ 1085180025Sdfr nlm_host_destroy(host); 1086180025Sdfr } 1087180025Sdfr} 1088180025Sdfr 1089177633Sdfr/* 1090177633Sdfr * Unregister this NLM host with the local NSM due to idleness. 1091177633Sdfr */ 1092177633Sdfrstatic void 1093177633Sdfrnlm_host_unmonitor(struct nlm_host *host) 1094177633Sdfr{ 1095177633Sdfr mon_id smmonid; 1096177633Sdfr sm_stat_res smstat; 1097177633Sdfr struct timeval timo; 1098177633Sdfr enum clnt_stat stat; 1099177633Sdfr 1100191918Sdfr NLM_DEBUG(1, "NLM: unmonitoring %s (sysid %d)\n", 1101191918Sdfr host->nh_caller_name, host->nh_sysid); 1102177633Sdfr 1103177633Sdfr /* 1104177633Sdfr * We put our assigned system ID value in the priv field to 1105177633Sdfr * make it simpler to find the host if we are notified of a 1106177633Sdfr * host restart. 1107177633Sdfr */ 1108177633Sdfr smmonid.mon_name = host->nh_caller_name; 1109177633Sdfr smmonid.my_id.my_name = "localhost"; 1110177633Sdfr smmonid.my_id.my_prog = NLM_PROG; 1111177633Sdfr smmonid.my_id.my_vers = NLM_SM; 1112177633Sdfr smmonid.my_id.my_proc = NLM_SM_NOTIFY; 1113177633Sdfr 1114177633Sdfr timo.tv_sec = 25; 1115177633Sdfr timo.tv_usec = 0; 1116177633Sdfr stat = CLNT_CALL(nlm_nsm, SM_UNMON, 1117177633Sdfr (xdrproc_t) xdr_mon, &smmonid, 1118177633Sdfr (xdrproc_t) xdr_sm_stat, &smstat, timo); 1119177633Sdfr 1120177633Sdfr if (stat != RPC_SUCCESS) { 1121191918Sdfr NLM_ERR("Failed to contact local NSM - rpc error %d\n", stat); 1122177633Sdfr return; 1123177633Sdfr } 1124177633Sdfr if (smstat.res_stat == stat_fail) { 1125191918Sdfr NLM_ERR("Local NSM refuses to unmonitor %s\n", 1126177633Sdfr host->nh_caller_name); 1127177633Sdfr return; 1128177633Sdfr } 1129177633Sdfr 1130178112Sdfr host->nh_monstate = NLM_UNMONITORED; 1131177633Sdfr} 1132177633Sdfr 1133177633Sdfr/* 1134177633Sdfr * Register this NLM host with the local NSM so that we can be 1135177633Sdfr * notified if it reboots. 1136177633Sdfr */ 1137180025Sdfrvoid 1138177633Sdfrnlm_host_monitor(struct nlm_host *host, int state) 1139177633Sdfr{ 1140177633Sdfr mon smmon; 1141177633Sdfr sm_stat_res smstat; 1142177633Sdfr struct timeval timo; 1143177633Sdfr enum clnt_stat stat; 1144177633Sdfr 1145177633Sdfr if (state && !host->nh_state) { 1146177633Sdfr /* 1147177633Sdfr * This is the first time we have seen an NSM state 1148177633Sdfr * value for this host. We record it here to help 1149177633Sdfr * detect host reboots. 1150177633Sdfr */ 1151177633Sdfr host->nh_state = state; 1152191918Sdfr NLM_DEBUG(1, "NLM: host %s (sysid %d) has NSM state %d\n", 1153191918Sdfr host->nh_caller_name, host->nh_sysid, state); 1154177633Sdfr } 1155177633Sdfr 1156180025Sdfr mtx_lock(&host->nh_lock); 1157180025Sdfr if (host->nh_monstate != NLM_UNMONITORED) { 1158180025Sdfr mtx_unlock(&host->nh_lock); 1159177633Sdfr return; 1160180025Sdfr } 1161180025Sdfr host->nh_monstate = NLM_MONITORED; 1162180025Sdfr mtx_unlock(&host->nh_lock); 1163177633Sdfr 1164191918Sdfr NLM_DEBUG(1, "NLM: monitoring %s (sysid %d)\n", 1165191918Sdfr host->nh_caller_name, host->nh_sysid); 1166177633Sdfr 1167177633Sdfr /* 1168177633Sdfr * We put our assigned system ID value in the priv field to 1169177633Sdfr * make it simpler to find the host if we are notified of a 1170177633Sdfr * host restart. 1171177633Sdfr */ 1172177633Sdfr smmon.mon_id.mon_name = host->nh_caller_name; 1173177633Sdfr smmon.mon_id.my_id.my_name = "localhost"; 1174177633Sdfr smmon.mon_id.my_id.my_prog = NLM_PROG; 1175177633Sdfr smmon.mon_id.my_id.my_vers = NLM_SM; 1176177633Sdfr smmon.mon_id.my_id.my_proc = NLM_SM_NOTIFY; 1177177633Sdfr memcpy(smmon.priv, &host->nh_sysid, sizeof(host->nh_sysid)); 1178177633Sdfr 1179177633Sdfr timo.tv_sec = 25; 1180177633Sdfr timo.tv_usec = 0; 1181177633Sdfr stat = CLNT_CALL(nlm_nsm, SM_MON, 1182177633Sdfr (xdrproc_t) xdr_mon, &smmon, 1183177633Sdfr (xdrproc_t) xdr_sm_stat, &smstat, timo); 1184177633Sdfr 1185177633Sdfr if (stat != RPC_SUCCESS) { 1186191918Sdfr NLM_ERR("Failed to contact local NSM - rpc error %d\n", stat); 1187177633Sdfr return; 1188177633Sdfr } 1189177633Sdfr if (smstat.res_stat == stat_fail) { 1190191918Sdfr NLM_ERR("Local NSM refuses to monitor %s\n", 1191177633Sdfr host->nh_caller_name); 1192180025Sdfr mtx_lock(&host->nh_lock); 1193178112Sdfr host->nh_monstate = NLM_MONITOR_FAILED; 1194180025Sdfr mtx_unlock(&host->nh_lock); 1195177633Sdfr return; 1196177633Sdfr } 1197177633Sdfr 1198178112Sdfr host->nh_monstate = NLM_MONITORED; 1199177633Sdfr} 1200177633Sdfr 1201177633Sdfr/* 1202177633Sdfr * Return an RPC client handle that can be used to talk to the NLM 1203177633Sdfr * running on the given host. 1204177633Sdfr */ 1205177633SdfrCLIENT * 1206184588Sdfrnlm_host_get_rpc(struct nlm_host *host, bool_t isserver) 1207177633Sdfr{ 1208184588Sdfr struct nlm_rpc *rpc; 1209180025Sdfr CLIENT *client; 1210177633Sdfr 1211180025Sdfr mtx_lock(&host->nh_lock); 1212180025Sdfr 1213184588Sdfr if (isserver) 1214184588Sdfr rpc = &host->nh_srvrpc; 1215184588Sdfr else 1216184588Sdfr rpc = &host->nh_clntrpc; 1217184588Sdfr 1218179425Sdfr /* 1219180025Sdfr * We can't hold onto RPC handles for too long - the async 1220179425Sdfr * call/reply protocol used by some NLM clients makes it hard 1221179425Sdfr * to tell when they change port numbers (e.g. after a 1222179425Sdfr * reboot). Note that if a client reboots while it isn't 1223179425Sdfr * holding any locks, it won't bother to notify us. We 1224179425Sdfr * expire the RPC handles after two minutes. 1225179425Sdfr */ 1226184588Sdfr if (rpc->nr_client && time_uptime > rpc->nr_create_time + 2*60) { 1227184588Sdfr client = rpc->nr_client; 1228184588Sdfr rpc->nr_client = NULL; 1229180025Sdfr mtx_unlock(&host->nh_lock); 1230180025Sdfr CLNT_RELEASE(client); 1231180025Sdfr mtx_lock(&host->nh_lock); 1232179425Sdfr } 1233179425Sdfr 1234184588Sdfr if (!rpc->nr_client) { 1235180025Sdfr mtx_unlock(&host->nh_lock); 1236180025Sdfr client = nlm_get_rpc((struct sockaddr *)&host->nh_addr, 1237180025Sdfr NLM_PROG, host->nh_vers); 1238180025Sdfr mtx_lock(&host->nh_lock); 1239177633Sdfr 1240180025Sdfr if (client) { 1241184588Sdfr if (rpc->nr_client) { 1242180025Sdfr mtx_unlock(&host->nh_lock); 1243180025Sdfr CLNT_DESTROY(client); 1244180025Sdfr mtx_lock(&host->nh_lock); 1245180025Sdfr } else { 1246184588Sdfr rpc->nr_client = client; 1247184588Sdfr rpc->nr_create_time = time_uptime; 1248180025Sdfr } 1249180025Sdfr } 1250180025Sdfr } 1251180025Sdfr 1252184588Sdfr client = rpc->nr_client; 1253180025Sdfr if (client) 1254180025Sdfr CLNT_ACQUIRE(client); 1255180025Sdfr mtx_unlock(&host->nh_lock); 1256180025Sdfr 1257180025Sdfr return (client); 1258180025Sdfr 1259180025Sdfr} 1260180025Sdfr 1261180025Sdfrint nlm_host_get_sysid(struct nlm_host *host) 1262180025Sdfr{ 1263180025Sdfr 1264180025Sdfr return (host->nh_sysid); 1265180025Sdfr} 1266180025Sdfr 1267180025Sdfrint 1268180025Sdfrnlm_host_get_state(struct nlm_host *host) 1269180025Sdfr{ 1270180025Sdfr 1271180025Sdfr return (host->nh_state); 1272180025Sdfr} 1273180025Sdfr 1274180025Sdfrvoid * 1275180025Sdfrnlm_register_wait_lock(struct nlm4_lock *lock, struct vnode *vp) 1276180025Sdfr{ 1277180025Sdfr struct nlm_waiting_lock *nw; 1278180025Sdfr 1279180025Sdfr nw = malloc(sizeof(struct nlm_waiting_lock), M_NLM, M_WAITOK); 1280180025Sdfr nw->nw_lock = *lock; 1281180025Sdfr memcpy(&nw->nw_fh.fh_bytes, nw->nw_lock.fh.n_bytes, 1282180025Sdfr nw->nw_lock.fh.n_len); 1283180025Sdfr nw->nw_lock.fh.n_bytes = nw->nw_fh.fh_bytes; 1284180025Sdfr nw->nw_waiting = TRUE; 1285180025Sdfr nw->nw_vp = vp; 1286180025Sdfr mtx_lock(&nlm_global_lock); 1287180025Sdfr TAILQ_INSERT_TAIL(&nlm_waiting_locks, nw, nw_link); 1288180025Sdfr mtx_unlock(&nlm_global_lock); 1289180025Sdfr 1290180025Sdfr return nw; 1291180025Sdfr} 1292180025Sdfr 1293180025Sdfrvoid 1294180025Sdfrnlm_deregister_wait_lock(void *handle) 1295180025Sdfr{ 1296180025Sdfr struct nlm_waiting_lock *nw = handle; 1297180025Sdfr 1298180025Sdfr mtx_lock(&nlm_global_lock); 1299180025Sdfr TAILQ_REMOVE(&nlm_waiting_locks, nw, nw_link); 1300180025Sdfr mtx_unlock(&nlm_global_lock); 1301180025Sdfr 1302180025Sdfr free(nw, M_NLM); 1303180025Sdfr} 1304180025Sdfr 1305180025Sdfrint 1306180025Sdfrnlm_wait_lock(void *handle, int timo) 1307180025Sdfr{ 1308180025Sdfr struct nlm_waiting_lock *nw = handle; 1309180025Sdfr int error; 1310180025Sdfr 1311177633Sdfr /* 1312180025Sdfr * If the granted message arrived before we got here, 1313180025Sdfr * nw->nw_waiting will be FALSE - in that case, don't sleep. 1314177633Sdfr */ 1315180025Sdfr mtx_lock(&nlm_global_lock); 1316180025Sdfr error = 0; 1317180025Sdfr if (nw->nw_waiting) 1318180025Sdfr error = msleep(nw, &nlm_global_lock, PCATCH, "nlmlock", timo); 1319180025Sdfr TAILQ_REMOVE(&nlm_waiting_locks, nw, nw_link); 1320180025Sdfr if (error) { 1321180025Sdfr /* 1322180025Sdfr * The granted message may arrive after the 1323180025Sdfr * interrupt/timeout but before we manage to lock the 1324180025Sdfr * mutex. Detect this by examining nw_lock. 1325180025Sdfr */ 1326180025Sdfr if (!nw->nw_waiting) 1327180025Sdfr error = 0; 1328180025Sdfr } else { 1329180025Sdfr /* 1330180025Sdfr * If nlm_cancel_wait is called, then error will be 1331180025Sdfr * zero but nw_waiting will still be TRUE. We 1332180025Sdfr * translate this into EINTR. 1333180025Sdfr */ 1334180025Sdfr if (nw->nw_waiting) 1335180025Sdfr error = EINTR; 1336180025Sdfr } 1337180025Sdfr mtx_unlock(&nlm_global_lock); 1338177633Sdfr 1339180025Sdfr free(nw, M_NLM); 1340177633Sdfr 1341180025Sdfr return (error); 1342180025Sdfr} 1343180025Sdfr 1344180025Sdfrvoid 1345180025Sdfrnlm_cancel_wait(struct vnode *vp) 1346180025Sdfr{ 1347180025Sdfr struct nlm_waiting_lock *nw; 1348180025Sdfr 1349180025Sdfr mtx_lock(&nlm_global_lock); 1350180025Sdfr TAILQ_FOREACH(nw, &nlm_waiting_locks, nw_link) { 1351180025Sdfr if (nw->nw_vp == vp) { 1352180025Sdfr wakeup(nw); 1353180025Sdfr } 1354177633Sdfr } 1355180025Sdfr mtx_unlock(&nlm_global_lock); 1356177633Sdfr} 1357177633Sdfr 1358180025Sdfr 1359177633Sdfr/**********************************************************************/ 1360177633Sdfr 1361177633Sdfr/* 1362177633Sdfr * Syscall interface with userland. 1363177633Sdfr */ 1364177633Sdfr 1365177633Sdfrextern void nlm_prog_0(struct svc_req *rqstp, SVCXPRT *transp); 1366177633Sdfrextern void nlm_prog_1(struct svc_req *rqstp, SVCXPRT *transp); 1367177633Sdfrextern void nlm_prog_3(struct svc_req *rqstp, SVCXPRT *transp); 1368177633Sdfrextern void nlm_prog_4(struct svc_req *rqstp, SVCXPRT *transp); 1369177633Sdfr 1370177633Sdfrstatic int 1371177633Sdfrnlm_register_services(SVCPOOL *pool, int addr_count, char **addrs) 1372177633Sdfr{ 1373177633Sdfr static rpcvers_t versions[] = { 1374177633Sdfr NLM_SM, NLM_VERS, NLM_VERSX, NLM_VERS4 1375177633Sdfr }; 1376177633Sdfr static void (*dispatchers[])(struct svc_req *, SVCXPRT *) = { 1377177633Sdfr nlm_prog_0, nlm_prog_1, nlm_prog_3, nlm_prog_4 1378177633Sdfr }; 1379177633Sdfr static const int version_count = sizeof(versions) / sizeof(versions[0]); 1380177633Sdfr 1381177633Sdfr SVCXPRT **xprts; 1382177633Sdfr char netid[16]; 1383177633Sdfr char uaddr[128]; 1384177633Sdfr struct netconfig *nconf; 1385177633Sdfr int i, j, error; 1386177633Sdfr 1387177633Sdfr if (!addr_count) { 1388191918Sdfr NLM_ERR("NLM: no service addresses given - can't start server"); 1389177633Sdfr return (EINVAL); 1390177633Sdfr } 1391177633Sdfr 1392194407Srmacklem xprts = malloc(addr_count * sizeof(SVCXPRT *), M_NLM, M_WAITOK|M_ZERO); 1393177633Sdfr for (i = 0; i < version_count; i++) { 1394177633Sdfr for (j = 0; j < addr_count; j++) { 1395177633Sdfr /* 1396177633Sdfr * Create transports for the first version and 1397177633Sdfr * then just register everything else to the 1398177633Sdfr * same transports. 1399177633Sdfr */ 1400177633Sdfr if (i == 0) { 1401177633Sdfr char *up; 1402177633Sdfr 1403177633Sdfr error = copyin(&addrs[2*j], &up, 1404177633Sdfr sizeof(char*)); 1405177633Sdfr if (error) 1406177633Sdfr goto out; 1407177633Sdfr error = copyinstr(up, netid, sizeof(netid), 1408177633Sdfr NULL); 1409177633Sdfr if (error) 1410177633Sdfr goto out; 1411177633Sdfr error = copyin(&addrs[2*j+1], &up, 1412177633Sdfr sizeof(char*)); 1413177633Sdfr if (error) 1414177633Sdfr goto out; 1415177633Sdfr error = copyinstr(up, uaddr, sizeof(uaddr), 1416177633Sdfr NULL); 1417177633Sdfr if (error) 1418177633Sdfr goto out; 1419177633Sdfr nconf = getnetconfigent(netid); 1420177633Sdfr if (!nconf) { 1421191918Sdfr NLM_ERR("Can't lookup netid %s\n", 1422177633Sdfr netid); 1423177633Sdfr error = EINVAL; 1424177633Sdfr goto out; 1425177633Sdfr } 1426177633Sdfr xprts[j] = svc_tp_create(pool, dispatchers[i], 1427177633Sdfr NLM_PROG, versions[i], uaddr, nconf); 1428177633Sdfr if (!xprts[j]) { 1429191918Sdfr NLM_ERR("NLM: unable to create " 1430177633Sdfr "(NLM_PROG, %d).\n", versions[i]); 1431177633Sdfr error = EINVAL; 1432177633Sdfr goto out; 1433177633Sdfr } 1434177633Sdfr freenetconfigent(nconf); 1435177633Sdfr } else { 1436177633Sdfr nconf = getnetconfigent(xprts[j]->xp_netid); 1437177633Sdfr rpcb_unset(NLM_PROG, versions[i], nconf); 1438177633Sdfr if (!svc_reg(xprts[j], NLM_PROG, versions[i], 1439177633Sdfr dispatchers[i], nconf)) { 1440191918Sdfr NLM_ERR("NLM: can't register " 1441177633Sdfr "(NLM_PROG, %d)\n", versions[i]); 1442177633Sdfr error = EINVAL; 1443177633Sdfr goto out; 1444177633Sdfr } 1445177633Sdfr } 1446177633Sdfr } 1447177633Sdfr } 1448177633Sdfr error = 0; 1449177633Sdfrout: 1450194407Srmacklem for (j = 0; j < addr_count; j++) { 1451194407Srmacklem if (xprts[j]) 1452194407Srmacklem SVC_RELEASE(xprts[j]); 1453194407Srmacklem } 1454177633Sdfr free(xprts, M_NLM); 1455177633Sdfr return (error); 1456177633Sdfr} 1457177633Sdfr 1458177633Sdfr/* 1459177633Sdfr * Main server entry point. Contacts the local NSM to get its current 1460177633Sdfr * state and send SM_UNMON_ALL. Registers the NLM services and then 1461177633Sdfr * services requests. Does not return until the server is interrupted 1462177633Sdfr * by a signal. 1463177633Sdfr */ 1464177633Sdfrstatic int 1465177633Sdfrnlm_server_main(int addr_count, char **addrs) 1466177633Sdfr{ 1467177633Sdfr struct thread *td = curthread; 1468177633Sdfr int error; 1469178033Sdfr SVCPOOL *pool = NULL; 1470177633Sdfr struct sockopt opt; 1471177633Sdfr int portlow; 1472177633Sdfr#ifdef INET6 1473177633Sdfr struct sockaddr_in6 sin6; 1474177633Sdfr#endif 1475177633Sdfr struct sockaddr_in sin; 1476177633Sdfr my_id id; 1477177633Sdfr sm_stat smstat; 1478177633Sdfr struct timeval timo; 1479177633Sdfr enum clnt_stat stat; 1480180025Sdfr struct nlm_host *host, *nhost; 1481180025Sdfr struct nlm_waiting_lock *nw; 1482184588Sdfr#ifdef NFSCLIENT 1483180025Sdfr vop_advlock_t *old_nfs_advlock; 1484180025Sdfr vop_reclaim_t *old_nfs_reclaim; 1485184588Sdfr#endif 1486180069Savatar int v4_used; 1487180069Savatar#ifdef INET6 1488180069Savatar int v6_used; 1489180069Savatar#endif 1490177633Sdfr 1491177633Sdfr if (nlm_socket) { 1492191918Sdfr NLM_ERR("NLM: can't start server - " 1493191918Sdfr "it appears to be running already\n"); 1494177633Sdfr return (EPERM); 1495177633Sdfr } 1496177633Sdfr 1497177633Sdfr memset(&opt, 0, sizeof(opt)); 1498177633Sdfr 1499177633Sdfr nlm_socket = NULL; 1500177633Sdfr error = socreate(AF_INET, &nlm_socket, SOCK_DGRAM, 0, 1501177633Sdfr td->td_ucred, td); 1502177633Sdfr if (error) { 1503191918Sdfr NLM_ERR("NLM: can't create IPv4 socket - error %d\n", error); 1504177633Sdfr return (error); 1505177633Sdfr } 1506177633Sdfr opt.sopt_dir = SOPT_SET; 1507177633Sdfr opt.sopt_level = IPPROTO_IP; 1508177633Sdfr opt.sopt_name = IP_PORTRANGE; 1509177633Sdfr portlow = IP_PORTRANGE_LOW; 1510177633Sdfr opt.sopt_val = &portlow; 1511177633Sdfr opt.sopt_valsize = sizeof(portlow); 1512177633Sdfr sosetopt(nlm_socket, &opt); 1513177633Sdfr 1514177633Sdfr#ifdef INET6 1515177633Sdfr nlm_socket6 = NULL; 1516177633Sdfr error = socreate(AF_INET6, &nlm_socket6, SOCK_DGRAM, 0, 1517177633Sdfr td->td_ucred, td); 1518177633Sdfr if (error) { 1519191918Sdfr NLM_ERR("NLM: can't create IPv6 socket - error %d\n", error); 1520180025Sdfr goto out; 1521177633Sdfr return (error); 1522177633Sdfr } 1523177633Sdfr opt.sopt_dir = SOPT_SET; 1524177633Sdfr opt.sopt_level = IPPROTO_IPV6; 1525177633Sdfr opt.sopt_name = IPV6_PORTRANGE; 1526177633Sdfr portlow = IPV6_PORTRANGE_LOW; 1527177633Sdfr opt.sopt_val = &portlow; 1528177633Sdfr opt.sopt_valsize = sizeof(portlow); 1529177633Sdfr sosetopt(nlm_socket6, &opt); 1530177633Sdfr#endif 1531177633Sdfr 1532180025Sdfr nlm_auth = authunix_create(curthread->td_ucred); 1533180025Sdfr 1534177633Sdfr#ifdef INET6 1535177633Sdfr memset(&sin6, 0, sizeof(sin6)); 1536177633Sdfr sin6.sin6_len = sizeof(sin6); 1537177633Sdfr sin6.sin6_family = AF_INET6; 1538177633Sdfr sin6.sin6_addr = in6addr_loopback; 1539177633Sdfr nlm_nsm = nlm_get_rpc((struct sockaddr *) &sin6, SM_PROG, SM_VERS); 1540177633Sdfr if (!nlm_nsm) { 1541177633Sdfr#endif 1542177633Sdfr memset(&sin, 0, sizeof(sin)); 1543177633Sdfr sin.sin_len = sizeof(sin); 1544178033Sdfr sin.sin_family = AF_INET; 1545177633Sdfr sin.sin_addr.s_addr = htonl(INADDR_LOOPBACK); 1546177633Sdfr nlm_nsm = nlm_get_rpc((struct sockaddr *) &sin, SM_PROG, 1547177633Sdfr SM_VERS); 1548177633Sdfr#ifdef INET6 1549177633Sdfr } 1550177633Sdfr#endif 1551177633Sdfr 1552177633Sdfr if (!nlm_nsm) { 1553191918Sdfr NLM_ERR("Can't start NLM - unable to contact NSM\n"); 1554178033Sdfr error = EINVAL; 1555178033Sdfr goto out; 1556177633Sdfr } 1557177633Sdfr 1558184588Sdfr pool = svcpool_create("NLM", NULL); 1559177633Sdfr 1560177633Sdfr error = nlm_register_services(pool, addr_count, addrs); 1561177633Sdfr if (error) 1562177633Sdfr goto out; 1563177633Sdfr 1564177633Sdfr memset(&id, 0, sizeof(id)); 1565177633Sdfr id.my_name = "NFS NLM"; 1566177633Sdfr 1567177633Sdfr timo.tv_sec = 25; 1568177633Sdfr timo.tv_usec = 0; 1569177633Sdfr stat = CLNT_CALL(nlm_nsm, SM_UNMON_ALL, 1570177633Sdfr (xdrproc_t) xdr_my_id, &id, 1571177633Sdfr (xdrproc_t) xdr_sm_stat, &smstat, timo); 1572177633Sdfr 1573177633Sdfr if (stat != RPC_SUCCESS) { 1574177633Sdfr struct rpc_err err; 1575177633Sdfr 1576177633Sdfr CLNT_GETERR(nlm_nsm, &err); 1577191918Sdfr NLM_ERR("NLM: unexpected error contacting NSM, " 1578191918Sdfr "stat=%d, errno=%d\n", stat, err.re_errno); 1579177633Sdfr error = EINVAL; 1580177633Sdfr goto out; 1581177633Sdfr } 1582177633Sdfr 1583191918Sdfr NLM_DEBUG(1, "NLM: local NSM state is %d\n", smstat.state); 1584180025Sdfr nlm_nsm_state = smstat.state; 1585177633Sdfr 1586184588Sdfr#ifdef NFSCLIENT 1587180025Sdfr old_nfs_advlock = nfs_advlock_p; 1588180025Sdfr nfs_advlock_p = nlm_advlock; 1589180025Sdfr old_nfs_reclaim = nfs_reclaim_p; 1590180025Sdfr nfs_reclaim_p = nlm_reclaim; 1591184588Sdfr#endif 1592180025Sdfr 1593177633Sdfr svc_run(pool); 1594177633Sdfr error = 0; 1595177633Sdfr 1596184588Sdfr#ifdef NFSCLIENT 1597180025Sdfr nfs_advlock_p = old_nfs_advlock; 1598180025Sdfr nfs_reclaim_p = old_nfs_reclaim; 1599184588Sdfr#endif 1600180025Sdfr 1601177633Sdfrout: 1602177633Sdfr if (pool) 1603177633Sdfr svcpool_destroy(pool); 1604177633Sdfr 1605177633Sdfr /* 1606180025Sdfr * We are finished communicating with the NSM. 1607177633Sdfr */ 1608177633Sdfr if (nlm_nsm) { 1609180025Sdfr CLNT_RELEASE(nlm_nsm); 1610177633Sdfr nlm_nsm = NULL; 1611177633Sdfr } 1612180025Sdfr 1613180025Sdfr /* 1614180025Sdfr * Trash all the existing state so that if the server 1615180025Sdfr * restarts, it gets a clean slate. This is complicated by the 1616180025Sdfr * possibility that there may be other threads trying to make 1617180025Sdfr * client locking requests. 1618180025Sdfr * 1619180025Sdfr * First we fake a client reboot notification which will 1620180025Sdfr * cancel any pending async locks and purge remote lock state 1621180025Sdfr * from the local lock manager. We release the reference from 1622180025Sdfr * nlm_hosts to the host (which may remove it from the list 1623180025Sdfr * and free it). After this phase, the only entries in the 1624180025Sdfr * nlm_host list should be from other threads performing 1625180025Sdfr * client lock requests. We arrange to defer closing the 1626180025Sdfr * sockets until the last RPC client handle is released. 1627180025Sdfr */ 1628180025Sdfr v4_used = 0; 1629180025Sdfr#ifdef INET6 1630180025Sdfr v6_used = 0; 1631180025Sdfr#endif 1632180025Sdfr mtx_lock(&nlm_global_lock); 1633180025Sdfr TAILQ_FOREACH(nw, &nlm_waiting_locks, nw_link) { 1634180025Sdfr wakeup(nw); 1635177633Sdfr } 1636180025Sdfr TAILQ_FOREACH_SAFE(host, &nlm_hosts, nh_link, nhost) { 1637180025Sdfr mtx_unlock(&nlm_global_lock); 1638180025Sdfr nlm_host_notify(host, 0); 1639180025Sdfr nlm_host_release(host); 1640180025Sdfr mtx_lock(&nlm_global_lock); 1641180025Sdfr } 1642180025Sdfr TAILQ_FOREACH_SAFE(host, &nlm_hosts, nh_link, nhost) { 1643180025Sdfr mtx_lock(&host->nh_lock); 1644184588Sdfr if (host->nh_srvrpc.nr_client 1645184588Sdfr || host->nh_clntrpc.nr_client) { 1646180025Sdfr if (host->nh_addr.ss_family == AF_INET) 1647180025Sdfr v4_used++; 1648180025Sdfr#ifdef INET6 1649180025Sdfr if (host->nh_addr.ss_family == AF_INET6) 1650180025Sdfr v6_used++; 1651180025Sdfr#endif 1652180025Sdfr /* 1653180025Sdfr * Note that the rpc over udp code copes 1654180025Sdfr * correctly with the fact that a socket may 1655180025Sdfr * be used by many rpc handles. 1656180025Sdfr */ 1657184588Sdfr if (host->nh_srvrpc.nr_client) 1658184588Sdfr CLNT_CONTROL(host->nh_srvrpc.nr_client, 1659184588Sdfr CLSET_FD_CLOSE, 0); 1660184588Sdfr if (host->nh_clntrpc.nr_client) 1661184588Sdfr CLNT_CONTROL(host->nh_clntrpc.nr_client, 1662184588Sdfr CLSET_FD_CLOSE, 0); 1663180025Sdfr } 1664180025Sdfr mtx_unlock(&host->nh_lock); 1665180025Sdfr } 1666180025Sdfr mtx_unlock(&nlm_global_lock); 1667177633Sdfr 1668180025Sdfr AUTH_DESTROY(nlm_auth); 1669180025Sdfr 1670180025Sdfr if (!v4_used) 1671180025Sdfr soclose(nlm_socket); 1672177633Sdfr nlm_socket = NULL; 1673177633Sdfr#ifdef INET6 1674180025Sdfr if (!v6_used) 1675180025Sdfr soclose(nlm_socket6); 1676177633Sdfr nlm_socket6 = NULL; 1677177633Sdfr#endif 1678177633Sdfr 1679177633Sdfr return (error); 1680177633Sdfr} 1681177633Sdfr 1682177633Sdfrint 1683177633Sdfrnlm_syscall(struct thread *td, struct nlm_syscall_args *uap) 1684177633Sdfr{ 1685177633Sdfr int error; 1686177633Sdfr 1687177685Sdfr#if __FreeBSD_version >= 700000 1688177633Sdfr error = priv_check(td, PRIV_NFS_LOCKD); 1689177685Sdfr#else 1690177685Sdfr error = suser(td); 1691177685Sdfr#endif 1692177633Sdfr if (error) 1693177633Sdfr return (error); 1694177633Sdfr 1695177633Sdfr nlm_debug_level = uap->debug_level; 1696177633Sdfr nlm_grace_threshold = time_uptime + uap->grace_period; 1697177633Sdfr nlm_next_idle_check = time_uptime + NLM_IDLE_PERIOD; 1698177633Sdfr 1699177633Sdfr return nlm_server_main(uap->addr_count, uap->addrs); 1700177633Sdfr} 1701177633Sdfr 1702177633Sdfr/**********************************************************************/ 1703177633Sdfr 1704177633Sdfr/* 1705177633Sdfr * NLM implementation details, called from the RPC stubs. 1706177633Sdfr */ 1707177633Sdfr 1708177633Sdfr 1709177633Sdfrvoid 1710177633Sdfrnlm_sm_notify(struct nlm_sm_status *argp) 1711177633Sdfr{ 1712177633Sdfr uint32_t sysid; 1713177633Sdfr struct nlm_host *host; 1714177633Sdfr 1715191918Sdfr NLM_DEBUG(3, "nlm_sm_notify(): mon_name = %s\n", argp->mon_name); 1716177633Sdfr memcpy(&sysid, &argp->priv, sizeof(sysid)); 1717177633Sdfr host = nlm_find_host_by_sysid(sysid); 1718180025Sdfr if (host) { 1719180025Sdfr nlm_host_notify(host, argp->state); 1720180025Sdfr nlm_host_release(host); 1721180025Sdfr } 1722177633Sdfr} 1723177633Sdfr 1724177633Sdfrstatic void 1725177633Sdfrnlm_convert_to_fhandle_t(fhandle_t *fhp, struct netobj *p) 1726177633Sdfr{ 1727177633Sdfr memcpy(fhp, p->n_bytes, sizeof(fhandle_t)); 1728177633Sdfr} 1729177633Sdfr 1730177633Sdfrstruct vfs_state { 1731177633Sdfr struct mount *vs_mp; 1732177633Sdfr struct vnode *vs_vp; 1733177633Sdfr int vs_vfslocked; 1734178112Sdfr int vs_vnlocked; 1735177633Sdfr}; 1736177633Sdfr 1737177633Sdfrstatic int 1738177633Sdfrnlm_get_vfs_state(struct nlm_host *host, struct svc_req *rqstp, 1739177633Sdfr fhandle_t *fhp, struct vfs_state *vs) 1740177633Sdfr{ 1741184588Sdfr int error, exflags; 1742177633Sdfr struct ucred *cred = NULL, *credanon; 1743177633Sdfr 1744177633Sdfr memset(vs, 0, sizeof(*vs)); 1745177633Sdfr 1746177633Sdfr vs->vs_mp = vfs_getvfs(&fhp->fh_fsid); 1747177633Sdfr if (!vs->vs_mp) { 1748177633Sdfr return (ESTALE); 1749177633Sdfr } 1750177633Sdfr vs->vs_vfslocked = VFS_LOCK_GIANT(vs->vs_mp); 1751177633Sdfr 1752177633Sdfr error = VFS_CHECKEXP(vs->vs_mp, (struct sockaddr *)&host->nh_addr, 1753184588Sdfr &exflags, &credanon, NULL, NULL); 1754177633Sdfr if (error) 1755177633Sdfr goto out; 1756177633Sdfr 1757177633Sdfr if (exflags & MNT_EXRDONLY || (vs->vs_mp->mnt_flag & MNT_RDONLY)) { 1758177633Sdfr error = EROFS; 1759177633Sdfr goto out; 1760177633Sdfr } 1761177633Sdfr 1762177633Sdfr error = VFS_FHTOVP(vs->vs_mp, &fhp->fh_fid, &vs->vs_vp); 1763177633Sdfr if (error) 1764177633Sdfr goto out; 1765178112Sdfr vs->vs_vnlocked = TRUE; 1766177633Sdfr 1767184588Sdfr if (!svc_getcred(rqstp, &cred, NULL)) { 1768177633Sdfr error = EINVAL; 1769177633Sdfr goto out; 1770177633Sdfr } 1771177633Sdfr if (cred->cr_uid == 0 || (exflags & MNT_EXPORTANON)) { 1772177633Sdfr crfree(cred); 1773191940Skan cred = credanon; 1774191940Skan credanon = NULL; 1775177633Sdfr } 1776177633Sdfr 1777177633Sdfr /* 1778177633Sdfr * Check cred. 1779177633Sdfr */ 1780177633Sdfr error = VOP_ACCESS(vs->vs_vp, VWRITE, cred, curthread); 1781177633Sdfr if (error) 1782177633Sdfr goto out; 1783177633Sdfr 1784178112Sdfr#if __FreeBSD_version < 800011 1785178112Sdfr VOP_UNLOCK(vs->vs_vp, 0, curthread); 1786178112Sdfr#else 1787178112Sdfr VOP_UNLOCK(vs->vs_vp, 0); 1788178112Sdfr#endif 1789178112Sdfr vs->vs_vnlocked = FALSE; 1790178112Sdfr 1791177633Sdfrout: 1792184588Sdfr if (cred) 1793177633Sdfr crfree(cred); 1794191940Skan if (credanon) 1795191940Skan crfree(credanon); 1796177633Sdfr 1797177633Sdfr return (error); 1798177633Sdfr} 1799177633Sdfr 1800177633Sdfrstatic void 1801177633Sdfrnlm_release_vfs_state(struct vfs_state *vs) 1802177633Sdfr{ 1803177633Sdfr 1804178112Sdfr if (vs->vs_vp) { 1805178112Sdfr if (vs->vs_vnlocked) 1806178112Sdfr vput(vs->vs_vp); 1807178112Sdfr else 1808178112Sdfr vrele(vs->vs_vp); 1809178112Sdfr } 1810177633Sdfr if (vs->vs_mp) 1811177633Sdfr vfs_rel(vs->vs_mp); 1812177633Sdfr VFS_UNLOCK_GIANT(vs->vs_vfslocked); 1813177633Sdfr} 1814177633Sdfr 1815177633Sdfrstatic nlm4_stats 1816177633Sdfrnlm_convert_error(int error) 1817177633Sdfr{ 1818177633Sdfr 1819177633Sdfr if (error == ESTALE) 1820177633Sdfr return nlm4_stale_fh; 1821177633Sdfr else if (error == EROFS) 1822177633Sdfr return nlm4_rofs; 1823177633Sdfr else 1824177633Sdfr return nlm4_failed; 1825177633Sdfr} 1826177633Sdfr 1827180025Sdfrint 1828180025Sdfrnlm_do_test(nlm4_testargs *argp, nlm4_testres *result, struct svc_req *rqstp, 1829180025Sdfr CLIENT **rpcp) 1830177633Sdfr{ 1831177633Sdfr fhandle_t fh; 1832177633Sdfr struct vfs_state vs; 1833177633Sdfr struct nlm_host *host, *bhost; 1834177633Sdfr int error, sysid; 1835177633Sdfr struct flock fl; 1836177633Sdfr 1837177633Sdfr memset(result, 0, sizeof(*result)); 1838180025Sdfr memset(&vs, 0, sizeof(vs)); 1839177633Sdfr 1840180025Sdfr host = nlm_find_host_by_name(argp->alock.caller_name, 1841184588Sdfr svc_getrpccaller(rqstp), rqstp->rq_vers); 1842177633Sdfr if (!host) { 1843177633Sdfr result->stat.stat = nlm4_denied_nolocks; 1844180025Sdfr return (ENOMEM); 1845177633Sdfr } 1846177633Sdfr 1847191918Sdfr NLM_DEBUG(3, "nlm_do_test(): caller_name = %s (sysid = %d)\n", 1848191918Sdfr host->nh_caller_name, host->nh_sysid); 1849177633Sdfr 1850177633Sdfr nlm_free_finished_locks(host); 1851177633Sdfr sysid = host->nh_sysid; 1852177633Sdfr 1853177633Sdfr nlm_convert_to_fhandle_t(&fh, &argp->alock.fh); 1854177633Sdfr nlm_copy_netobj(&result->cookie, &argp->cookie, M_RPC); 1855177633Sdfr 1856177633Sdfr if (time_uptime < nlm_grace_threshold) { 1857177633Sdfr result->stat.stat = nlm4_denied_grace_period; 1858180025Sdfr goto out; 1859177633Sdfr } 1860177633Sdfr 1861177633Sdfr error = nlm_get_vfs_state(host, rqstp, &fh, &vs); 1862177633Sdfr if (error) { 1863177633Sdfr result->stat.stat = nlm_convert_error(error); 1864177633Sdfr goto out; 1865177633Sdfr } 1866177633Sdfr 1867177633Sdfr fl.l_start = argp->alock.l_offset; 1868177633Sdfr fl.l_len = argp->alock.l_len; 1869177633Sdfr fl.l_pid = argp->alock.svid; 1870177633Sdfr fl.l_sysid = sysid; 1871177633Sdfr fl.l_whence = SEEK_SET; 1872177633Sdfr if (argp->exclusive) 1873177633Sdfr fl.l_type = F_WRLCK; 1874177633Sdfr else 1875177633Sdfr fl.l_type = F_RDLCK; 1876177633Sdfr error = VOP_ADVLOCK(vs.vs_vp, NULL, F_GETLK, &fl, F_REMOTE); 1877177633Sdfr if (error) { 1878177633Sdfr result->stat.stat = nlm4_failed; 1879177633Sdfr goto out; 1880177633Sdfr } 1881177633Sdfr 1882177633Sdfr if (fl.l_type == F_UNLCK) { 1883177633Sdfr result->stat.stat = nlm4_granted; 1884177633Sdfr } else { 1885177633Sdfr result->stat.stat = nlm4_denied; 1886177633Sdfr result->stat.nlm4_testrply_u.holder.exclusive = 1887177633Sdfr (fl.l_type == F_WRLCK); 1888177633Sdfr result->stat.nlm4_testrply_u.holder.svid = fl.l_pid; 1889177633Sdfr bhost = nlm_find_host_by_sysid(fl.l_sysid); 1890177633Sdfr if (bhost) { 1891177633Sdfr /* 1892177633Sdfr * We don't have any useful way of recording 1893177633Sdfr * the value of oh used in the original lock 1894177633Sdfr * request. Ideally, the test reply would have 1895177633Sdfr * a space for the owning host's name allowing 1896177633Sdfr * our caller's NLM to keep track. 1897177633Sdfr * 1898177633Sdfr * As far as I can see, Solaris uses an eight 1899177633Sdfr * byte structure for oh which contains a four 1900177633Sdfr * byte pid encoded in local byte order and 1901177633Sdfr * the first four bytes of the host 1902177633Sdfr * name. Linux uses a variable length string 1903177633Sdfr * 'pid@hostname' in ascii but doesn't even 1904177633Sdfr * return that in test replies. 1905177633Sdfr * 1906177633Sdfr * For the moment, return nothing in oh 1907177633Sdfr * (already zero'ed above). 1908177633Sdfr */ 1909180025Sdfr nlm_host_release(bhost); 1910177633Sdfr } 1911177633Sdfr result->stat.nlm4_testrply_u.holder.l_offset = fl.l_start; 1912177633Sdfr result->stat.nlm4_testrply_u.holder.l_len = fl.l_len; 1913177633Sdfr } 1914177633Sdfr 1915177633Sdfrout: 1916177633Sdfr nlm_release_vfs_state(&vs); 1917180025Sdfr if (rpcp) 1918184588Sdfr *rpcp = nlm_host_get_rpc(host, TRUE); 1919180025Sdfr nlm_host_release(host); 1920180025Sdfr return (0); 1921177633Sdfr} 1922177633Sdfr 1923180025Sdfrint 1924177633Sdfrnlm_do_lock(nlm4_lockargs *argp, nlm4_res *result, struct svc_req *rqstp, 1925180025Sdfr bool_t monitor, CLIENT **rpcp) 1926177633Sdfr{ 1927177633Sdfr fhandle_t fh; 1928177633Sdfr struct vfs_state vs; 1929177633Sdfr struct nlm_host *host; 1930177633Sdfr int error, sysid; 1931177633Sdfr struct flock fl; 1932177633Sdfr 1933177633Sdfr memset(result, 0, sizeof(*result)); 1934180025Sdfr memset(&vs, 0, sizeof(vs)); 1935177633Sdfr 1936180025Sdfr host = nlm_find_host_by_name(argp->alock.caller_name, 1937184588Sdfr svc_getrpccaller(rqstp), rqstp->rq_vers); 1938177633Sdfr if (!host) { 1939177633Sdfr result->stat.stat = nlm4_denied_nolocks; 1940180025Sdfr return (ENOMEM); 1941177633Sdfr } 1942177633Sdfr 1943191918Sdfr NLM_DEBUG(3, "nlm_do_lock(): caller_name = %s (sysid = %d)\n", 1944191918Sdfr host->nh_caller_name, host->nh_sysid); 1945177633Sdfr 1946179488Sdfr if (monitor && host->nh_state && argp->state 1947179488Sdfr && host->nh_state != argp->state) { 1948179488Sdfr /* 1949179488Sdfr * The host rebooted without telling us. Trash its 1950179488Sdfr * locks. 1951179488Sdfr */ 1952180025Sdfr nlm_host_notify(host, argp->state); 1953179488Sdfr } 1954179488Sdfr 1955177633Sdfr nlm_free_finished_locks(host); 1956177633Sdfr sysid = host->nh_sysid; 1957177633Sdfr 1958177633Sdfr nlm_convert_to_fhandle_t(&fh, &argp->alock.fh); 1959177633Sdfr nlm_copy_netobj(&result->cookie, &argp->cookie, M_RPC); 1960177633Sdfr 1961177633Sdfr if (time_uptime < nlm_grace_threshold && !argp->reclaim) { 1962177633Sdfr result->stat.stat = nlm4_denied_grace_period; 1963180025Sdfr goto out; 1964177633Sdfr } 1965177633Sdfr 1966177633Sdfr error = nlm_get_vfs_state(host, rqstp, &fh, &vs); 1967177633Sdfr if (error) { 1968177633Sdfr result->stat.stat = nlm_convert_error(error); 1969177633Sdfr goto out; 1970177633Sdfr } 1971177633Sdfr 1972177633Sdfr fl.l_start = argp->alock.l_offset; 1973177633Sdfr fl.l_len = argp->alock.l_len; 1974177633Sdfr fl.l_pid = argp->alock.svid; 1975177633Sdfr fl.l_sysid = sysid; 1976177633Sdfr fl.l_whence = SEEK_SET; 1977177633Sdfr if (argp->exclusive) 1978177633Sdfr fl.l_type = F_WRLCK; 1979177633Sdfr else 1980177633Sdfr fl.l_type = F_RDLCK; 1981177633Sdfr if (argp->block) { 1982177633Sdfr struct nlm_async_lock *af; 1983180025Sdfr CLIENT *client; 1984177633Sdfr 1985177633Sdfr /* 1986177633Sdfr * First, make sure we can contact the host's NLM. 1987177633Sdfr */ 1988184588Sdfr client = nlm_host_get_rpc(host, TRUE); 1989180025Sdfr if (!client) { 1990177633Sdfr result->stat.stat = nlm4_failed; 1991177633Sdfr goto out; 1992177633Sdfr } 1993177633Sdfr 1994177633Sdfr /* 1995177633Sdfr * First we need to check and see if there is an 1996177633Sdfr * existing blocked lock that matches. This could be a 1997177633Sdfr * badly behaved client or an RPC re-send. If we find 1998177633Sdfr * one, just return nlm4_blocked. 1999177633Sdfr */ 2000177633Sdfr mtx_lock(&host->nh_lock); 2001177633Sdfr TAILQ_FOREACH(af, &host->nh_pending, af_link) { 2002177633Sdfr if (af->af_fl.l_start == fl.l_start 2003177633Sdfr && af->af_fl.l_len == fl.l_len 2004177633Sdfr && af->af_fl.l_pid == fl.l_pid 2005177633Sdfr && af->af_fl.l_type == fl.l_type) { 2006177633Sdfr break; 2007177633Sdfr } 2008177633Sdfr } 2009177633Sdfr mtx_unlock(&host->nh_lock); 2010177633Sdfr if (af) { 2011180025Sdfr CLNT_RELEASE(client); 2012177633Sdfr result->stat.stat = nlm4_blocked; 2013177633Sdfr goto out; 2014177633Sdfr } 2015177633Sdfr 2016177633Sdfr af = malloc(sizeof(struct nlm_async_lock), M_NLM, 2017177633Sdfr M_WAITOK|M_ZERO); 2018177633Sdfr TASK_INIT(&af->af_task, 0, nlm_lock_callback, af); 2019177633Sdfr af->af_vp = vs.vs_vp; 2020177633Sdfr af->af_fl = fl; 2021177633Sdfr af->af_host = host; 2022180025Sdfr af->af_rpc = client; 2023177633Sdfr /* 2024177633Sdfr * We use M_RPC here so that we can xdr_free the thing 2025177633Sdfr * later. 2026177633Sdfr */ 2027177633Sdfr af->af_granted.exclusive = argp->exclusive; 2028177633Sdfr af->af_granted.alock.caller_name = 2029177633Sdfr strdup(argp->alock.caller_name, M_RPC); 2030177633Sdfr nlm_copy_netobj(&af->af_granted.alock.fh, 2031177633Sdfr &argp->alock.fh, M_RPC); 2032177633Sdfr nlm_copy_netobj(&af->af_granted.alock.oh, 2033177633Sdfr &argp->alock.oh, M_RPC); 2034177633Sdfr af->af_granted.alock.svid = argp->alock.svid; 2035177633Sdfr af->af_granted.alock.l_offset = argp->alock.l_offset; 2036177633Sdfr af->af_granted.alock.l_len = argp->alock.l_len; 2037177633Sdfr 2038177633Sdfr /* 2039177633Sdfr * Put the entry on the pending list before calling 2040177633Sdfr * VOP_ADVLOCKASYNC. We do this in case the lock 2041177633Sdfr * request was blocked (returning EINPROGRESS) but 2042177633Sdfr * then granted before we manage to run again. The 2043177633Sdfr * client may receive the granted message before we 2044177633Sdfr * send our blocked reply but thats their problem. 2045177633Sdfr */ 2046177633Sdfr mtx_lock(&host->nh_lock); 2047177633Sdfr TAILQ_INSERT_TAIL(&host->nh_pending, af, af_link); 2048177633Sdfr mtx_unlock(&host->nh_lock); 2049177633Sdfr 2050177633Sdfr error = VOP_ADVLOCKASYNC(vs.vs_vp, NULL, F_SETLK, &fl, F_REMOTE, 2051177633Sdfr &af->af_task, &af->af_cookie); 2052177633Sdfr 2053177633Sdfr /* 2054177633Sdfr * If the lock completed synchronously, just free the 2055177633Sdfr * tracking structure now. 2056177633Sdfr */ 2057177633Sdfr if (error != EINPROGRESS) { 2058180025Sdfr CLNT_RELEASE(af->af_rpc); 2059177633Sdfr mtx_lock(&host->nh_lock); 2060177633Sdfr TAILQ_REMOVE(&host->nh_pending, af, af_link); 2061177633Sdfr mtx_unlock(&host->nh_lock); 2062177633Sdfr xdr_free((xdrproc_t) xdr_nlm4_testargs, 2063177633Sdfr &af->af_granted); 2064177633Sdfr free(af, M_NLM); 2065177633Sdfr } else { 2066191918Sdfr NLM_DEBUG(2, "NLM: pending async lock %p for %s " 2067191918Sdfr "(sysid %d)\n", af, host->nh_caller_name, sysid); 2068177633Sdfr /* 2069177633Sdfr * Don't vrele the vnode just yet - this must 2070177633Sdfr * wait until either the async callback 2071177633Sdfr * happens or the lock is cancelled. 2072177633Sdfr */ 2073177633Sdfr vs.vs_vp = NULL; 2074177633Sdfr } 2075177633Sdfr } else { 2076177633Sdfr error = VOP_ADVLOCK(vs.vs_vp, NULL, F_SETLK, &fl, F_REMOTE); 2077177633Sdfr } 2078177633Sdfr 2079177633Sdfr if (error) { 2080177633Sdfr if (error == EINPROGRESS) { 2081177633Sdfr result->stat.stat = nlm4_blocked; 2082177633Sdfr } else if (error == EDEADLK) { 2083177633Sdfr result->stat.stat = nlm4_deadlck; 2084177633Sdfr } else if (error == EAGAIN) { 2085177633Sdfr result->stat.stat = nlm4_denied; 2086177633Sdfr } else { 2087177633Sdfr result->stat.stat = nlm4_failed; 2088177633Sdfr } 2089177633Sdfr } else { 2090177633Sdfr if (monitor) 2091177633Sdfr nlm_host_monitor(host, argp->state); 2092177633Sdfr result->stat.stat = nlm4_granted; 2093177633Sdfr } 2094177633Sdfr 2095177633Sdfrout: 2096177633Sdfr nlm_release_vfs_state(&vs); 2097180025Sdfr if (rpcp) 2098184588Sdfr *rpcp = nlm_host_get_rpc(host, TRUE); 2099180025Sdfr nlm_host_release(host); 2100180025Sdfr return (0); 2101177633Sdfr} 2102177633Sdfr 2103180025Sdfrint 2104180025Sdfrnlm_do_cancel(nlm4_cancargs *argp, nlm4_res *result, struct svc_req *rqstp, 2105180025Sdfr CLIENT **rpcp) 2106177633Sdfr{ 2107177633Sdfr fhandle_t fh; 2108177633Sdfr struct vfs_state vs; 2109177633Sdfr struct nlm_host *host; 2110177633Sdfr int error, sysid; 2111177633Sdfr struct flock fl; 2112177633Sdfr struct nlm_async_lock *af; 2113177633Sdfr 2114177633Sdfr memset(result, 0, sizeof(*result)); 2115180025Sdfr memset(&vs, 0, sizeof(vs)); 2116177633Sdfr 2117180025Sdfr host = nlm_find_host_by_name(argp->alock.caller_name, 2118184588Sdfr svc_getrpccaller(rqstp), rqstp->rq_vers); 2119177633Sdfr if (!host) { 2120177633Sdfr result->stat.stat = nlm4_denied_nolocks; 2121180025Sdfr return (ENOMEM); 2122177633Sdfr } 2123177633Sdfr 2124191918Sdfr NLM_DEBUG(3, "nlm_do_cancel(): caller_name = %s (sysid = %d)\n", 2125191918Sdfr host->nh_caller_name, host->nh_sysid); 2126177633Sdfr 2127177633Sdfr nlm_free_finished_locks(host); 2128177633Sdfr sysid = host->nh_sysid; 2129177633Sdfr 2130177633Sdfr nlm_convert_to_fhandle_t(&fh, &argp->alock.fh); 2131177633Sdfr nlm_copy_netobj(&result->cookie, &argp->cookie, M_RPC); 2132177633Sdfr 2133177633Sdfr if (time_uptime < nlm_grace_threshold) { 2134177633Sdfr result->stat.stat = nlm4_denied_grace_period; 2135180025Sdfr goto out; 2136177633Sdfr } 2137177633Sdfr 2138177633Sdfr error = nlm_get_vfs_state(host, rqstp, &fh, &vs); 2139177633Sdfr if (error) { 2140177633Sdfr result->stat.stat = nlm_convert_error(error); 2141177633Sdfr goto out; 2142177633Sdfr } 2143177633Sdfr 2144177633Sdfr fl.l_start = argp->alock.l_offset; 2145177633Sdfr fl.l_len = argp->alock.l_len; 2146177633Sdfr fl.l_pid = argp->alock.svid; 2147177633Sdfr fl.l_sysid = sysid; 2148177633Sdfr fl.l_whence = SEEK_SET; 2149177633Sdfr if (argp->exclusive) 2150177633Sdfr fl.l_type = F_WRLCK; 2151177633Sdfr else 2152177633Sdfr fl.l_type = F_RDLCK; 2153177633Sdfr 2154177633Sdfr /* 2155177633Sdfr * First we need to try and find the async lock request - if 2156177633Sdfr * there isn't one, we give up and return nlm4_denied. 2157177633Sdfr */ 2158177633Sdfr mtx_lock(&host->nh_lock); 2159177633Sdfr 2160177633Sdfr TAILQ_FOREACH(af, &host->nh_pending, af_link) { 2161177633Sdfr if (af->af_fl.l_start == fl.l_start 2162177633Sdfr && af->af_fl.l_len == fl.l_len 2163177633Sdfr && af->af_fl.l_pid == fl.l_pid 2164177633Sdfr && af->af_fl.l_type == fl.l_type) { 2165177633Sdfr break; 2166177633Sdfr } 2167177633Sdfr } 2168177633Sdfr 2169177633Sdfr if (!af) { 2170177633Sdfr mtx_unlock(&host->nh_lock); 2171177633Sdfr result->stat.stat = nlm4_denied; 2172177633Sdfr goto out; 2173177633Sdfr } 2174177633Sdfr 2175177633Sdfr error = nlm_cancel_async_lock(af); 2176177633Sdfr 2177177633Sdfr if (error) { 2178177633Sdfr result->stat.stat = nlm4_denied; 2179177633Sdfr } else { 2180177633Sdfr result->stat.stat = nlm4_granted; 2181177633Sdfr } 2182177633Sdfr 2183177633Sdfr mtx_unlock(&host->nh_lock); 2184177633Sdfr 2185177633Sdfrout: 2186177633Sdfr nlm_release_vfs_state(&vs); 2187180025Sdfr if (rpcp) 2188184588Sdfr *rpcp = nlm_host_get_rpc(host, TRUE); 2189180025Sdfr nlm_host_release(host); 2190180025Sdfr return (0); 2191177633Sdfr} 2192177633Sdfr 2193180025Sdfrint 2194180025Sdfrnlm_do_unlock(nlm4_unlockargs *argp, nlm4_res *result, struct svc_req *rqstp, 2195180025Sdfr CLIENT **rpcp) 2196177633Sdfr{ 2197177633Sdfr fhandle_t fh; 2198177633Sdfr struct vfs_state vs; 2199177633Sdfr struct nlm_host *host; 2200177633Sdfr int error, sysid; 2201177633Sdfr struct flock fl; 2202177633Sdfr 2203177633Sdfr memset(result, 0, sizeof(*result)); 2204180025Sdfr memset(&vs, 0, sizeof(vs)); 2205177633Sdfr 2206180025Sdfr host = nlm_find_host_by_name(argp->alock.caller_name, 2207184588Sdfr svc_getrpccaller(rqstp), rqstp->rq_vers); 2208177633Sdfr if (!host) { 2209177633Sdfr result->stat.stat = nlm4_denied_nolocks; 2210180025Sdfr return (ENOMEM); 2211177633Sdfr } 2212177633Sdfr 2213191918Sdfr NLM_DEBUG(3, "nlm_do_unlock(): caller_name = %s (sysid = %d)\n", 2214191918Sdfr host->nh_caller_name, host->nh_sysid); 2215177633Sdfr 2216177633Sdfr nlm_free_finished_locks(host); 2217177633Sdfr sysid = host->nh_sysid; 2218177633Sdfr 2219177633Sdfr nlm_convert_to_fhandle_t(&fh, &argp->alock.fh); 2220177633Sdfr nlm_copy_netobj(&result->cookie, &argp->cookie, M_RPC); 2221177633Sdfr 2222177633Sdfr if (time_uptime < nlm_grace_threshold) { 2223177633Sdfr result->stat.stat = nlm4_denied_grace_period; 2224180025Sdfr goto out; 2225177633Sdfr } 2226177633Sdfr 2227177633Sdfr error = nlm_get_vfs_state(host, rqstp, &fh, &vs); 2228177633Sdfr if (error) { 2229177633Sdfr result->stat.stat = nlm_convert_error(error); 2230177633Sdfr goto out; 2231177633Sdfr } 2232177633Sdfr 2233177633Sdfr fl.l_start = argp->alock.l_offset; 2234177633Sdfr fl.l_len = argp->alock.l_len; 2235177633Sdfr fl.l_pid = argp->alock.svid; 2236177633Sdfr fl.l_sysid = sysid; 2237177633Sdfr fl.l_whence = SEEK_SET; 2238177633Sdfr fl.l_type = F_UNLCK; 2239177633Sdfr error = VOP_ADVLOCK(vs.vs_vp, NULL, F_UNLCK, &fl, F_REMOTE); 2240177633Sdfr 2241177633Sdfr /* 2242177633Sdfr * Ignore the error - there is no result code for failure, 2243177633Sdfr * only for grace period. 2244177633Sdfr */ 2245177633Sdfr result->stat.stat = nlm4_granted; 2246177633Sdfr 2247177633Sdfrout: 2248177633Sdfr nlm_release_vfs_state(&vs); 2249180025Sdfr if (rpcp) 2250184588Sdfr *rpcp = nlm_host_get_rpc(host, TRUE); 2251180025Sdfr nlm_host_release(host); 2252180025Sdfr return (0); 2253180025Sdfr} 2254177633Sdfr 2255180025Sdfrint 2256180025Sdfrnlm_do_granted(nlm4_testargs *argp, nlm4_res *result, struct svc_req *rqstp, 2257180025Sdfr 2258180025Sdfr CLIENT **rpcp) 2259180025Sdfr{ 2260180025Sdfr struct nlm_host *host; 2261180025Sdfr struct nlm_waiting_lock *nw; 2262180025Sdfr 2263180025Sdfr memset(result, 0, sizeof(*result)); 2264180025Sdfr 2265184588Sdfr host = nlm_find_host_by_addr(svc_getrpccaller(rqstp), rqstp->rq_vers); 2266180025Sdfr if (!host) { 2267180025Sdfr result->stat.stat = nlm4_denied_nolocks; 2268180025Sdfr return (ENOMEM); 2269180025Sdfr } 2270180025Sdfr 2271180025Sdfr nlm_copy_netobj(&result->cookie, &argp->cookie, M_RPC); 2272180025Sdfr result->stat.stat = nlm4_denied; 2273180025Sdfr 2274180025Sdfr mtx_lock(&nlm_global_lock); 2275180025Sdfr TAILQ_FOREACH(nw, &nlm_waiting_locks, nw_link) { 2276180025Sdfr if (!nw->nw_waiting) 2277180025Sdfr continue; 2278180025Sdfr if (argp->alock.svid == nw->nw_lock.svid 2279180025Sdfr && argp->alock.l_offset == nw->nw_lock.l_offset 2280180025Sdfr && argp->alock.l_len == nw->nw_lock.l_len 2281180025Sdfr && argp->alock.fh.n_len == nw->nw_lock.fh.n_len 2282180025Sdfr && !memcmp(argp->alock.fh.n_bytes, nw->nw_lock.fh.n_bytes, 2283180025Sdfr nw->nw_lock.fh.n_len)) { 2284180025Sdfr nw->nw_waiting = FALSE; 2285180025Sdfr wakeup(nw); 2286180025Sdfr result->stat.stat = nlm4_granted; 2287180025Sdfr break; 2288180025Sdfr } 2289180025Sdfr } 2290180025Sdfr mtx_unlock(&nlm_global_lock); 2291180025Sdfr if (rpcp) 2292184588Sdfr *rpcp = nlm_host_get_rpc(host, TRUE); 2293180025Sdfr nlm_host_release(host); 2294180025Sdfr return (0); 2295177633Sdfr} 2296177633Sdfr 2297177633Sdfrvoid 2298177633Sdfrnlm_do_free_all(nlm4_notify *argp) 2299177633Sdfr{ 2300177633Sdfr struct nlm_host *host, *thost; 2301177633Sdfr 2302177633Sdfr TAILQ_FOREACH_SAFE(host, &nlm_hosts, nh_link, thost) { 2303177633Sdfr if (!strcmp(host->nh_caller_name, argp->name)) 2304180025Sdfr nlm_host_notify(host, argp->state); 2305177633Sdfr } 2306177633Sdfr} 2307177633Sdfr 2308177633Sdfr/* 2309177662Sdfr * Kernel module glue 2310177662Sdfr */ 2311177662Sdfrstatic int 2312177662Sdfrnfslockd_modevent(module_t mod, int type, void *data) 2313177662Sdfr{ 2314177662Sdfr 2315177662Sdfr return (0); 2316177662Sdfr} 2317177662Sdfrstatic moduledata_t nfslockd_mod = { 2318177662Sdfr "nfslockd", 2319177662Sdfr nfslockd_modevent, 2320177662Sdfr NULL, 2321177662Sdfr}; 2322177662SdfrDECLARE_MODULE(nfslockd, nfslockd_mod, SI_SUB_VFS, SI_ORDER_ANY); 2323177662Sdfr 2324177662Sdfr/* So that loader and kldload(2) can find us, wherever we are.. */ 2325177662SdfrMODULE_DEPEND(nfslockd, krpc, 1, 1, 1); 2326180217SdfrMODULE_DEPEND(nfslockd, nfs, 1, 1, 1); 2327177662SdfrMODULE_VERSION(nfslockd, 1); 2328