1180025Sdfr/*-
2180025Sdfr * Copyright (c) 2008 Isilon Inc http://www.isilon.com/
3180025Sdfr * Authors: Doug Rabson <dfr@rabson.org>
4180025Sdfr * Developed with Red Inc: Alfred Perlstein <alfred@freebsd.org>
5180025Sdfr *
6180025Sdfr * Redistribution and use in source and binary forms, with or without
7180025Sdfr * modification, are permitted provided that the following conditions
8180025Sdfr * are met:
9180025Sdfr * 1. Redistributions of source code must retain the above copyright
10180025Sdfr *    notice, this list of conditions and the following disclaimer.
11180025Sdfr * 2. Redistributions in binary form must reproduce the above copyright
12180025Sdfr *    notice, this list of conditions and the following disclaimer in the
13180025Sdfr *    documentation and/or other materials provided with the distribution.
14180025Sdfr *
15180025Sdfr * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
16180025Sdfr * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
17180025Sdfr * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
18180025Sdfr * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
19180025Sdfr * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
20180025Sdfr * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
21180025Sdfr * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
22180025Sdfr * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
23180025Sdfr * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
24180025Sdfr * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
25180025Sdfr * SUCH DAMAGE.
26180025Sdfr */
27180025Sdfr
28180025Sdfr#include <sys/cdefs.h>
29180025Sdfr__FBSDID("$FreeBSD$");
30180025Sdfr
31180025Sdfr#include <sys/param.h>
32180025Sdfr#include <sys/fcntl.h>
33193066Sjamie#include <sys/jail.h>
34180025Sdfr#include <sys/kernel.h>
35180025Sdfr#include <sys/limits.h>
36180025Sdfr#include <sys/lock.h>
37180025Sdfr#include <sys/lockf.h>
38180025Sdfr#include <sys/malloc.h>
39192578Srwatson#include <sys/mbuf.h>
40180025Sdfr#include <sys/mount.h>
41180025Sdfr#include <sys/mutex.h>
42180025Sdfr#include <sys/proc.h>
43214048Srmacklem#include <sys/socket.h>
44180025Sdfr#include <sys/syslog.h>
45180025Sdfr#include <sys/systm.h>
46180025Sdfr#include <sys/unistd.h>
47180025Sdfr#include <sys/vnode.h>
48180025Sdfr
49180025Sdfr#include <nfs/nfsproto.h>
50180025Sdfr#include <nfsclient/nfs.h>
51180025Sdfr#include <nfsclient/nfsmount.h>
52180025Sdfr
53180025Sdfr#include <nlm/nlm_prot.h>
54180025Sdfr#include <nlm/nlm.h>
55180025Sdfr
56180025Sdfr/*
57180025Sdfr * We need to keep track of the svid values used for F_FLOCK locks.
58180025Sdfr */
59180025Sdfrstruct nlm_file_svid {
60180025Sdfr	int		ns_refs;	/* thread count + 1 if active */
61180025Sdfr	int		ns_svid;	/* on-the-wire SVID for this file */
62180025Sdfr	struct ucred	*ns_ucred;	/* creds to use for lock recovery */
63180025Sdfr	void		*ns_id;		/* local struct file pointer */
64180025Sdfr	bool_t		ns_active;	/* TRUE if we own a lock */
65180025Sdfr	LIST_ENTRY(nlm_file_svid) ns_link;
66180025Sdfr};
67180025SdfrLIST_HEAD(nlm_file_svid_list, nlm_file_svid);
68180025Sdfr
69180025Sdfr#define NLM_SVID_HASH_SIZE	256
70180025Sdfrstruct nlm_file_svid_list nlm_file_svids[NLM_SVID_HASH_SIZE];
71180025Sdfr
72180025Sdfrstruct mtx nlm_svid_lock;
73180025Sdfrstatic struct unrhdr *nlm_svid_allocator;
74180025Sdfrstatic volatile u_int nlm_xid = 1;
75180025Sdfr
76180025Sdfrstatic int nlm_setlock(struct nlm_host *host, struct rpc_callextra *ext,
77180025Sdfr    rpcvers_t vers, struct timeval *timo, int retries,
78180025Sdfr    struct vnode *vp, int op, struct flock *fl, int flags,
79180025Sdfr    int svid, size_t fhlen, void *fh, off_t size, bool_t reclaim);
80180025Sdfrstatic int nlm_clearlock(struct nlm_host *host,  struct rpc_callextra *ext,
81180025Sdfr    rpcvers_t vers, struct timeval *timo, int retries,
82180025Sdfr    struct vnode *vp, int op, struct flock *fl, int flags,
83180025Sdfr    int svid, size_t fhlen, void *fh, off_t size);
84180025Sdfrstatic int nlm_getlock(struct nlm_host *host, struct rpc_callextra *ext,
85180025Sdfr    rpcvers_t vers, struct timeval *timo, int retries,
86180025Sdfr    struct vnode *vp, int op, struct flock *fl, int flags,
87180025Sdfr    int svid, size_t fhlen, void *fh, off_t size);
88180025Sdfrstatic int nlm_map_status(nlm4_stats stat);
89180025Sdfrstatic struct nlm_file_svid *nlm_find_svid(void *id);
90180025Sdfrstatic void nlm_free_svid(struct nlm_file_svid *nf);
91180025Sdfrstatic int nlm_init_lock(struct flock *fl, int flags, int svid,
92180025Sdfr    rpcvers_t vers, size_t fhlen, void *fh, off_t size,
93180025Sdfr    struct nlm4_lock *lock, char oh_space[32]);
94180025Sdfr
95180025Sdfrstatic void
96180025Sdfrnlm_client_init(void *dummy)
97180025Sdfr{
98180025Sdfr	int i;
99180025Sdfr
100180025Sdfr	mtx_init(&nlm_svid_lock, "NLM svid lock", NULL, MTX_DEF);
101239582Skib	/* pid_max cannot be greater than PID_MAX */
102180025Sdfr	nlm_svid_allocator = new_unrhdr(PID_MAX + 2, INT_MAX, &nlm_svid_lock);
103180025Sdfr	for (i = 0; i < NLM_SVID_HASH_SIZE; i++)
104180025Sdfr		LIST_INIT(&nlm_file_svids[i]);
105180025Sdfr}
106180025SdfrSYSINIT(nlm_client_init, SI_SUB_LOCK, SI_ORDER_FIRST, nlm_client_init, NULL);
107180025Sdfr
108180025Sdfrstatic int
109180025Sdfrnlm_msg(struct thread *td, const char *server, const char *msg, int error)
110180025Sdfr{
111180025Sdfr	struct proc *p;
112180025Sdfr
113180025Sdfr	p = td ? td->td_proc : NULL;
114180025Sdfr	if (error) {
115180025Sdfr		tprintf(p, LOG_INFO, "nfs server %s: %s, error %d\n", server,
116180025Sdfr		    msg, error);
117180025Sdfr	} else {
118180025Sdfr		tprintf(p, LOG_INFO, "nfs server %s: %s\n", server, msg);
119180025Sdfr	}
120180025Sdfr	return (0);
121180025Sdfr}
122180025Sdfr
123180025Sdfrstruct nlm_feedback_arg {
124180025Sdfr	bool_t	nf_printed;
125180025Sdfr	struct nfsmount *nf_nmp;
126180025Sdfr};
127180025Sdfr
128180025Sdfrstatic void
129180025Sdfrnlm_down(struct nlm_feedback_arg *nf, struct thread *td,
130180025Sdfr    const char *msg, int error)
131180025Sdfr{
132180025Sdfr	struct nfsmount *nmp = nf->nf_nmp;
133180025Sdfr
134180025Sdfr	if (nmp == NULL)
135180025Sdfr		return;
136180025Sdfr	mtx_lock(&nmp->nm_mtx);
137180025Sdfr	if (!(nmp->nm_state & NFSSTA_LOCKTIMEO)) {
138180025Sdfr		nmp->nm_state |= NFSSTA_LOCKTIMEO;
139180025Sdfr		mtx_unlock(&nmp->nm_mtx);
140180025Sdfr		vfs_event_signal(&nmp->nm_mountp->mnt_stat.f_fsid,
141180025Sdfr		    VQ_NOTRESPLOCK, 0);
142180025Sdfr	} else {
143180025Sdfr		mtx_unlock(&nmp->nm_mtx);
144180025Sdfr	}
145180025Sdfr
146180025Sdfr	nf->nf_printed = TRUE;
147180025Sdfr	nlm_msg(td, nmp->nm_mountp->mnt_stat.f_mntfromname, msg, error);
148180025Sdfr}
149180025Sdfr
150180025Sdfrstatic void
151180025Sdfrnlm_up(struct nlm_feedback_arg *nf, struct thread *td,
152180025Sdfr    const char *msg)
153180025Sdfr{
154180025Sdfr	struct nfsmount *nmp = nf->nf_nmp;
155180025Sdfr
156180025Sdfr	if (!nf->nf_printed)
157180025Sdfr		return;
158180025Sdfr
159180025Sdfr	nlm_msg(td, nmp->nm_mountp->mnt_stat.f_mntfromname, msg, 0);
160180025Sdfr
161180025Sdfr	mtx_lock(&nmp->nm_mtx);
162180025Sdfr	if (nmp->nm_state & NFSSTA_LOCKTIMEO) {
163180025Sdfr		nmp->nm_state &= ~NFSSTA_LOCKTIMEO;
164180025Sdfr		mtx_unlock(&nmp->nm_mtx);
165180025Sdfr		vfs_event_signal(&nmp->nm_mountp->mnt_stat.f_fsid,
166180025Sdfr		    VQ_NOTRESPLOCK, 1);
167180025Sdfr	} else {
168180025Sdfr		mtx_unlock(&nmp->nm_mtx);
169180025Sdfr	}
170180025Sdfr}
171180025Sdfr
172180025Sdfrstatic void
173180025Sdfrnlm_feedback(int type, int proc, void *arg)
174180025Sdfr{
175180025Sdfr	struct thread *td = curthread;
176180025Sdfr	struct nlm_feedback_arg *nf = (struct nlm_feedback_arg *) arg;
177180025Sdfr
178180025Sdfr	switch (type) {
179180025Sdfr	case FEEDBACK_REXMIT2:
180180025Sdfr	case FEEDBACK_RECONNECT:
181180025Sdfr		nlm_down(nf, td, "lockd not responding", 0);
182180025Sdfr		break;
183180025Sdfr
184180025Sdfr	case FEEDBACK_OK:
185180025Sdfr		nlm_up(nf, td, "lockd is alive again");
186180025Sdfr		break;
187180025Sdfr	}
188180025Sdfr}
189180025Sdfr
190180025Sdfr/*
191180025Sdfr * nlm_advlock --
192180025Sdfr *      NFS advisory byte-level locks.
193180025Sdfr */
194180025Sdfrstatic int
195180025Sdfrnlm_advlock_internal(struct vnode *vp, void *id, int op, struct flock *fl,
196180025Sdfr    int flags, bool_t reclaim, bool_t unlock_vp)
197180025Sdfr{
198180025Sdfr	struct thread *td = curthread;
199180025Sdfr	struct nfsmount *nmp;
200180025Sdfr	off_t size;
201180025Sdfr	size_t fhlen;
202180025Sdfr	union nfsfh fh;
203180025Sdfr	struct sockaddr *sa;
204180025Sdfr	struct sockaddr_storage ss;
205180025Sdfr	char servername[MNAMELEN];
206180025Sdfr	struct timeval timo;
207180025Sdfr	int retries;
208180025Sdfr	rpcvers_t vers;
209180025Sdfr	struct nlm_host *host;
210180025Sdfr	struct rpc_callextra ext;
211180025Sdfr	struct nlm_feedback_arg nf;
212180025Sdfr	AUTH *auth;
213180025Sdfr	struct ucred *cred;
214180025Sdfr	struct nlm_file_svid *ns;
215180025Sdfr	int svid;
216180025Sdfr	int error;
217214048Srmacklem	int is_v3;
218180025Sdfr
219180025Sdfr	ASSERT_VOP_LOCKED(vp, "nlm_advlock_1");
220180025Sdfr
221216931Srmacklem	nmp = VFSTONFS(vp->v_mount);
222180025Sdfr	/*
223180025Sdfr	 * Push any pending writes to the server and flush our cache
224180025Sdfr	 * so that if we are contending with another machine for a
225180025Sdfr	 * file, we get whatever they wrote and vice-versa.
226180025Sdfr	 */
227180025Sdfr	if (op == F_SETLK || op == F_UNLCK)
228216931Srmacklem		nmp->nm_vinvalbuf(vp, V_SAVE, td, 1);
229180025Sdfr
230214048Srmacklem	strcpy(servername, nmp->nm_hostname);
231216931Srmacklem	nmp->nm_getinfo(vp, fh.fh_bytes, &fhlen, &ss, &is_v3, &size, &timo);
232180025Sdfr	sa = (struct sockaddr *) &ss;
233214048Srmacklem	if (is_v3 != 0)
234180025Sdfr		vers = NLM_VERS4;
235180025Sdfr	else
236180025Sdfr		vers = NLM_VERS;
237180025Sdfr
238180025Sdfr	if (nmp->nm_flag & NFSMNT_SOFT)
239180025Sdfr		retries = nmp->nm_retry;
240180025Sdfr	else
241180025Sdfr		retries = INT_MAX;
242180025Sdfr
243180025Sdfr	if (unlock_vp)
244180025Sdfr		VOP_UNLOCK(vp, 0);
245180025Sdfr
246180025Sdfr	/*
247180025Sdfr	 * We need to switch to mount-point creds so that we can send
248180025Sdfr	 * packets from a privileged port.
249180025Sdfr	 */
250180025Sdfr	cred = td->td_ucred;
251180025Sdfr	td->td_ucred = vp->v_mount->mnt_cred;
252180025Sdfr
253180025Sdfr	host = nlm_find_host_by_name(servername, sa, vers);
254180025Sdfr	auth = authunix_create(cred);
255180025Sdfr	memset(&ext, 0, sizeof(ext));
256180025Sdfr
257180025Sdfr	nf.nf_printed = FALSE;
258180025Sdfr	nf.nf_nmp = nmp;
259180025Sdfr	ext.rc_auth = auth;
260180025Sdfr
261180025Sdfr	ext.rc_feedback = nlm_feedback;
262180025Sdfr	ext.rc_feedback_arg = &nf;
263184588Sdfr	ext.rc_timers = NULL;
264180025Sdfr
265180025Sdfr	ns = NULL;
266180025Sdfr	if (flags & F_FLOCK) {
267180025Sdfr		ns = nlm_find_svid(id);
268180025Sdfr		KASSERT(fl->l_start == 0 && fl->l_len == 0,
269180025Sdfr		    ("F_FLOCK lock requests must be whole-file locks"));
270180025Sdfr		if (!ns->ns_ucred) {
271180025Sdfr			/*
272180025Sdfr			 * Remember the creds used for locking in case
273180025Sdfr			 * we need to recover the lock later.
274180025Sdfr			 */
275180025Sdfr			ns->ns_ucred = crdup(cred);
276180025Sdfr		}
277180025Sdfr		svid = ns->ns_svid;
278180025Sdfr	} else if (flags & F_REMOTE) {
279180025Sdfr		/*
280180025Sdfr		 * If we are recovering after a server restart or
281180025Sdfr		 * trashing locks on a force unmount, use the same
282180025Sdfr		 * svid as last time.
283180025Sdfr		 */
284180025Sdfr		svid = fl->l_pid;
285180025Sdfr	} else {
286180025Sdfr		svid = ((struct proc *) id)->p_pid;
287180025Sdfr	}
288180025Sdfr
289180025Sdfr	switch(op) {
290180025Sdfr	case F_SETLK:
291180025Sdfr		if ((flags & (F_FLOCK|F_WAIT)) == (F_FLOCK|F_WAIT)
292180025Sdfr		    && fl->l_type == F_WRLCK) {
293180025Sdfr			/*
294180025Sdfr			 * The semantics for flock(2) require that any
295180025Sdfr			 * shared lock on the file must be released
296180025Sdfr			 * before an exclusive lock is granted. The
297180025Sdfr			 * local locking code interprets this by
298180025Sdfr			 * unlocking the file before sleeping on a
299180025Sdfr			 * blocked exclusive lock request. We
300180025Sdfr			 * approximate this by first attempting
301180025Sdfr			 * non-blocking and if that fails, we unlock
302180025Sdfr			 * the file and block.
303180025Sdfr			 */
304180025Sdfr			error = nlm_setlock(host, &ext, vers, &timo, retries,
305180025Sdfr			    vp, F_SETLK, fl, flags & ~F_WAIT,
306180025Sdfr			    svid, fhlen, &fh.fh_bytes, size, reclaim);
307180025Sdfr			if (error == EAGAIN) {
308180025Sdfr				fl->l_type = F_UNLCK;
309180025Sdfr				error = nlm_clearlock(host, &ext, vers, &timo,
310180025Sdfr				    retries, vp, F_UNLCK, fl, flags,
311180025Sdfr				    svid, fhlen, &fh.fh_bytes, size);
312180025Sdfr				fl->l_type = F_WRLCK;
313180025Sdfr				if (!error) {
314180025Sdfr					mtx_lock(&nlm_svid_lock);
315180025Sdfr					if (ns->ns_active) {
316180025Sdfr						ns->ns_refs--;
317180025Sdfr						ns->ns_active = FALSE;
318180025Sdfr					}
319180025Sdfr					mtx_unlock(&nlm_svid_lock);
320180025Sdfr					flags |= F_WAIT;
321180025Sdfr					error = nlm_setlock(host, &ext, vers,
322180025Sdfr					    &timo, retries, vp, F_SETLK, fl,
323180025Sdfr					    flags, svid, fhlen, &fh.fh_bytes,
324180025Sdfr					    size, reclaim);
325180025Sdfr				}
326180025Sdfr			}
327180025Sdfr		} else {
328180025Sdfr			error = nlm_setlock(host, &ext, vers, &timo, retries,
329180025Sdfr			    vp, op, fl, flags, svid, fhlen, &fh.fh_bytes,
330180025Sdfr			    size, reclaim);
331180025Sdfr		}
332180025Sdfr		if (!error && ns) {
333180025Sdfr			mtx_lock(&nlm_svid_lock);
334180025Sdfr			if (!ns->ns_active) {
335180025Sdfr				/*
336180025Sdfr				 * Add one to the reference count to
337180025Sdfr				 * hold onto the SVID for the lifetime
338180025Sdfr				 * of the lock. Note that since
339180025Sdfr				 * F_FLOCK only supports whole-file
340180025Sdfr				 * locks, there can only be one active
341180025Sdfr				 * lock for this SVID.
342180025Sdfr				 */
343180025Sdfr				ns->ns_refs++;
344180025Sdfr				ns->ns_active = TRUE;
345180025Sdfr			}
346180025Sdfr			mtx_unlock(&nlm_svid_lock);
347180025Sdfr		}
348180025Sdfr		break;
349180025Sdfr
350180025Sdfr	case F_UNLCK:
351180025Sdfr		error = nlm_clearlock(host, &ext, vers, &timo, retries,
352180025Sdfr		    vp, op, fl, flags, svid, fhlen, &fh.fh_bytes, size);
353180025Sdfr		if (!error && ns) {
354180025Sdfr			mtx_lock(&nlm_svid_lock);
355180025Sdfr			if (ns->ns_active) {
356180025Sdfr				ns->ns_refs--;
357180025Sdfr				ns->ns_active = FALSE;
358180025Sdfr			}
359180025Sdfr			mtx_unlock(&nlm_svid_lock);
360180025Sdfr		}
361180025Sdfr		break;
362180025Sdfr
363180025Sdfr	case F_GETLK:
364180025Sdfr		error = nlm_getlock(host, &ext, vers, &timo, retries,
365180025Sdfr		    vp, op, fl, flags, svid, fhlen, &fh.fh_bytes, size);
366180025Sdfr		break;
367180025Sdfr
368180025Sdfr	default:
369180025Sdfr		error = EINVAL;
370180025Sdfr		break;
371180025Sdfr	}
372180025Sdfr
373180025Sdfr	if (ns)
374180025Sdfr		nlm_free_svid(ns);
375180025Sdfr
376180025Sdfr	td->td_ucred = cred;
377180025Sdfr	AUTH_DESTROY(auth);
378180025Sdfr
379180025Sdfr	nlm_host_release(host);
380180025Sdfr
381180025Sdfr	return (error);
382180025Sdfr}
383180025Sdfr
384180025Sdfrint
385180025Sdfrnlm_advlock(struct vop_advlock_args *ap)
386180025Sdfr{
387180025Sdfr
388180025Sdfr	return (nlm_advlock_internal(ap->a_vp, ap->a_id, ap->a_op, ap->a_fl,
389180025Sdfr		ap->a_flags, FALSE, TRUE));
390180025Sdfr}
391180025Sdfr
392180025Sdfr/*
393180025Sdfr * Set the creds of td to the creds of the given lock's owner. The new
394180025Sdfr * creds reference count will be incremented via crhold. The caller is
395180025Sdfr * responsible for calling crfree and restoring td's original creds.
396180025Sdfr */
397180025Sdfrstatic void
398180025Sdfrnlm_set_creds_for_lock(struct thread *td, struct flock *fl)
399180025Sdfr{
400180025Sdfr	int i;
401180025Sdfr	struct nlm_file_svid *ns;
402180025Sdfr	struct proc *p;
403180025Sdfr	struct ucred *cred;
404180025Sdfr
405180025Sdfr	cred = NULL;
406180025Sdfr	if (fl->l_pid > PID_MAX) {
407180025Sdfr		/*
408180025Sdfr		 * If this was originally a F_FLOCK-style lock, we
409180025Sdfr		 * recorded the creds used when it was originally
410180025Sdfr		 * locked in the nlm_file_svid structure.
411180025Sdfr		 */
412180025Sdfr		mtx_lock(&nlm_svid_lock);
413180025Sdfr		for (i = 0; i < NLM_SVID_HASH_SIZE; i++) {
414180025Sdfr			for (ns = LIST_FIRST(&nlm_file_svids[i]); ns;
415180025Sdfr			     ns = LIST_NEXT(ns, ns_link)) {
416180025Sdfr				if (ns->ns_svid == fl->l_pid) {
417180025Sdfr					cred = crhold(ns->ns_ucred);
418180025Sdfr					break;
419180025Sdfr				}
420180025Sdfr			}
421180025Sdfr		}
422180025Sdfr		mtx_unlock(&nlm_svid_lock);
423180025Sdfr	} else {
424180025Sdfr		/*
425180025Sdfr		 * This lock is owned by a process. Get a reference to
426180025Sdfr		 * the process creds.
427180025Sdfr		 */
428180025Sdfr		p = pfind(fl->l_pid);
429180025Sdfr		if (p) {
430180025Sdfr			cred = crhold(p->p_ucred);
431180025Sdfr			PROC_UNLOCK(p);
432180025Sdfr		}
433180025Sdfr	}
434180025Sdfr
435180025Sdfr	/*
436180025Sdfr	 * If we can't find a cred, fall back on the recovery
437180025Sdfr	 * thread's cred.
438180025Sdfr	 */
439180025Sdfr	if (!cred) {
440180025Sdfr		cred = crhold(td->td_ucred);
441180025Sdfr	}
442180025Sdfr
443180025Sdfr	td->td_ucred = cred;
444180025Sdfr}
445180025Sdfr
446180025Sdfrstatic int
447180025Sdfrnlm_reclaim_free_lock(struct vnode *vp, struct flock *fl, void *arg)
448180025Sdfr{
449180025Sdfr	struct flock newfl;
450180025Sdfr	struct thread *td = curthread;
451180025Sdfr	struct ucred *oldcred;
452180025Sdfr	int error;
453180025Sdfr
454180025Sdfr	newfl = *fl;
455180025Sdfr	newfl.l_type = F_UNLCK;
456180025Sdfr
457180025Sdfr	oldcred = td->td_ucred;
458180025Sdfr	nlm_set_creds_for_lock(td, &newfl);
459180025Sdfr
460180025Sdfr	error = nlm_advlock_internal(vp, NULL, F_UNLCK, &newfl, F_REMOTE,
461180025Sdfr	    FALSE, FALSE);
462180025Sdfr
463180025Sdfr	crfree(td->td_ucred);
464180025Sdfr	td->td_ucred = oldcred;
465180025Sdfr
466180025Sdfr	return (error);
467180025Sdfr}
468180025Sdfr
469180025Sdfrint
470180025Sdfrnlm_reclaim(struct vop_reclaim_args *ap)
471180025Sdfr{
472180025Sdfr
473180025Sdfr	nlm_cancel_wait(ap->a_vp);
474180025Sdfr	lf_iteratelocks_vnode(ap->a_vp, nlm_reclaim_free_lock, NULL);
475180025Sdfr	return (0);
476180025Sdfr}
477180025Sdfr
478180025Sdfrstruct nlm_recovery_context {
479180025Sdfr	struct nlm_host	*nr_host;	/* host we are recovering */
480180025Sdfr	int		nr_state;	/* remote NSM state for recovery */
481180025Sdfr};
482180025Sdfr
483180025Sdfrstatic int
484180025Sdfrnlm_client_recover_lock(struct vnode *vp, struct flock *fl, void *arg)
485180025Sdfr{
486180025Sdfr	struct nlm_recovery_context *nr = (struct nlm_recovery_context *) arg;
487180025Sdfr	struct thread *td = curthread;
488180025Sdfr	struct ucred *oldcred;
489180025Sdfr	int state, error;
490180025Sdfr
491180025Sdfr	/*
492180025Sdfr	 * If the remote NSM state changes during recovery, the host
493180025Sdfr	 * must have rebooted a second time. In that case, we must
494180025Sdfr	 * restart the recovery.
495180025Sdfr	 */
496180025Sdfr	state = nlm_host_get_state(nr->nr_host);
497180025Sdfr	if (nr->nr_state != state)
498180025Sdfr		return (ERESTART);
499180025Sdfr
500180025Sdfr	error = vn_lock(vp, LK_SHARED);
501180025Sdfr	if (error)
502180025Sdfr		return (error);
503180025Sdfr
504180025Sdfr	oldcred = td->td_ucred;
505180025Sdfr	nlm_set_creds_for_lock(td, fl);
506180025Sdfr
507180025Sdfr	error = nlm_advlock_internal(vp, NULL, F_SETLK, fl, F_REMOTE,
508180025Sdfr	    TRUE, TRUE);
509180025Sdfr
510180025Sdfr	crfree(td->td_ucred);
511180025Sdfr	td->td_ucred = oldcred;
512180025Sdfr
513180025Sdfr	return (error);
514180025Sdfr}
515180025Sdfr
516180025Sdfrvoid
517180025Sdfrnlm_client_recovery(struct nlm_host *host)
518180025Sdfr{
519180025Sdfr	struct nlm_recovery_context nr;
520180025Sdfr	int sysid, error;
521180025Sdfr
522180025Sdfr	sysid = NLM_SYSID_CLIENT | nlm_host_get_sysid(host);
523180025Sdfr	do {
524180025Sdfr		nr.nr_host = host;
525180025Sdfr		nr.nr_state = nlm_host_get_state(host);
526180025Sdfr		error = lf_iteratelocks_sysid(sysid,
527180025Sdfr		    nlm_client_recover_lock, &nr);
528180025Sdfr	} while (error == ERESTART);
529180025Sdfr}
530180025Sdfr
531180025Sdfrstatic void
532180025Sdfrnlm_convert_to_nlm_lock(struct nlm_lock *dst, struct nlm4_lock *src)
533180025Sdfr{
534180025Sdfr
535180025Sdfr	dst->caller_name = src->caller_name;
536180025Sdfr	dst->fh = src->fh;
537180025Sdfr	dst->oh = src->oh;
538180025Sdfr	dst->svid = src->svid;
539180025Sdfr	dst->l_offset = src->l_offset;
540180025Sdfr	dst->l_len = src->l_len;
541180025Sdfr}
542180025Sdfr
543180025Sdfrstatic void
544180025Sdfrnlm_convert_to_nlm4_holder(struct nlm4_holder *dst, struct nlm_holder *src)
545180025Sdfr{
546180025Sdfr
547180025Sdfr	dst->exclusive = src->exclusive;
548180025Sdfr	dst->svid = src->svid;
549180025Sdfr	dst->oh = src->oh;
550180025Sdfr	dst->l_offset = src->l_offset;
551180025Sdfr	dst->l_len = src->l_len;
552180025Sdfr}
553180025Sdfr
554180025Sdfrstatic void
555180025Sdfrnlm_convert_to_nlm4_res(struct nlm4_res *dst, struct nlm_res *src)
556180025Sdfr{
557180025Sdfr	dst->cookie = src->cookie;
558180025Sdfr	dst->stat.stat = (enum nlm4_stats) src->stat.stat;
559180025Sdfr}
560180025Sdfr
561180025Sdfrstatic enum clnt_stat
562180025Sdfrnlm_test_rpc(rpcvers_t vers, nlm4_testargs *args, nlm4_testres *res, CLIENT *client,
563180025Sdfr    struct rpc_callextra *ext, struct timeval timo)
564180025Sdfr{
565180025Sdfr	if (vers == NLM_VERS4) {
566180025Sdfr		return nlm4_test_4(args, res, client, ext, timo);
567180025Sdfr	} else {
568180025Sdfr		nlm_testargs args1;
569180025Sdfr		nlm_testres res1;
570180025Sdfr		enum clnt_stat stat;
571180025Sdfr
572180025Sdfr		args1.cookie = args->cookie;
573180025Sdfr		args1.exclusive = args->exclusive;
574180025Sdfr		nlm_convert_to_nlm_lock(&args1.alock, &args->alock);
575180025Sdfr		memset(&res1, 0, sizeof(res1));
576180025Sdfr
577180025Sdfr		stat = nlm_test_1(&args1, &res1, client, ext, timo);
578180025Sdfr
579180025Sdfr		if (stat == RPC_SUCCESS) {
580180025Sdfr			res->cookie = res1.cookie;
581180025Sdfr			res->stat.stat = (enum nlm4_stats) res1.stat.stat;
582180025Sdfr			if (res1.stat.stat == nlm_denied)
583180025Sdfr				nlm_convert_to_nlm4_holder(
584180025Sdfr					&res->stat.nlm4_testrply_u.holder,
585180025Sdfr					&res1.stat.nlm_testrply_u.holder);
586180025Sdfr		}
587180025Sdfr
588180025Sdfr		return (stat);
589180025Sdfr	}
590180025Sdfr}
591180025Sdfr
592180025Sdfrstatic enum clnt_stat
593180025Sdfrnlm_lock_rpc(rpcvers_t vers, nlm4_lockargs *args, nlm4_res *res, CLIENT *client,
594180025Sdfr    struct rpc_callextra *ext, struct timeval timo)
595180025Sdfr{
596180025Sdfr	if (vers == NLM_VERS4) {
597180025Sdfr		return nlm4_lock_4(args, res, client, ext, timo);
598180025Sdfr	} else {
599180025Sdfr		nlm_lockargs args1;
600180025Sdfr		nlm_res res1;
601180025Sdfr		enum clnt_stat stat;
602180025Sdfr
603180025Sdfr		args1.cookie = args->cookie;
604180025Sdfr		args1.block = args->block;
605180025Sdfr		args1.exclusive = args->exclusive;
606180025Sdfr		nlm_convert_to_nlm_lock(&args1.alock, &args->alock);
607180025Sdfr		args1.reclaim = args->reclaim;
608180025Sdfr		args1.state = args->state;
609180025Sdfr		memset(&res1, 0, sizeof(res1));
610180025Sdfr
611180025Sdfr		stat = nlm_lock_1(&args1, &res1, client, ext, timo);
612180025Sdfr
613180025Sdfr		if (stat == RPC_SUCCESS) {
614180025Sdfr			nlm_convert_to_nlm4_res(res, &res1);
615180025Sdfr		}
616180025Sdfr
617180025Sdfr		return (stat);
618180025Sdfr	}
619180025Sdfr}
620180025Sdfr
621180025Sdfrstatic enum clnt_stat
622180025Sdfrnlm_cancel_rpc(rpcvers_t vers, nlm4_cancargs *args, nlm4_res *res, CLIENT *client,
623180025Sdfr    struct rpc_callextra *ext, struct timeval timo)
624180025Sdfr{
625180025Sdfr	if (vers == NLM_VERS4) {
626180025Sdfr		return nlm4_cancel_4(args, res, client, ext, timo);
627180025Sdfr	} else {
628180025Sdfr		nlm_cancargs args1;
629180025Sdfr		nlm_res res1;
630180025Sdfr		enum clnt_stat stat;
631180025Sdfr
632180025Sdfr		args1.cookie = args->cookie;
633180025Sdfr		args1.block = args->block;
634180025Sdfr		args1.exclusive = args->exclusive;
635180025Sdfr		nlm_convert_to_nlm_lock(&args1.alock, &args->alock);
636180025Sdfr		memset(&res1, 0, sizeof(res1));
637180025Sdfr
638180025Sdfr		stat = nlm_cancel_1(&args1, &res1, client, ext, timo);
639180025Sdfr
640180025Sdfr		if (stat == RPC_SUCCESS) {
641180025Sdfr			nlm_convert_to_nlm4_res(res, &res1);
642180025Sdfr		}
643180025Sdfr
644180025Sdfr		return (stat);
645180025Sdfr	}
646180025Sdfr}
647180025Sdfr
648180025Sdfrstatic enum clnt_stat
649180025Sdfrnlm_unlock_rpc(rpcvers_t vers, nlm4_unlockargs *args, nlm4_res *res, CLIENT *client,
650180025Sdfr    struct rpc_callextra *ext, struct timeval timo)
651180025Sdfr{
652180025Sdfr	if (vers == NLM_VERS4) {
653180025Sdfr		return nlm4_unlock_4(args, res, client, ext, timo);
654180025Sdfr	} else {
655180025Sdfr		nlm_unlockargs args1;
656180025Sdfr		nlm_res res1;
657180025Sdfr		enum clnt_stat stat;
658180025Sdfr
659180025Sdfr		args1.cookie = args->cookie;
660180025Sdfr		nlm_convert_to_nlm_lock(&args1.alock, &args->alock);
661180025Sdfr		memset(&res1, 0, sizeof(res1));
662180025Sdfr
663180025Sdfr		stat = nlm_unlock_1(&args1, &res1, client, ext, timo);
664180025Sdfr
665180025Sdfr		if (stat == RPC_SUCCESS) {
666180025Sdfr			nlm_convert_to_nlm4_res(res, &res1);
667180025Sdfr		}
668180025Sdfr
669180025Sdfr		return (stat);
670180025Sdfr	}
671180025Sdfr}
672180025Sdfr
673180025Sdfr/*
674180025Sdfr * Called after a lock request (set or clear) succeeded. We record the
675180025Sdfr * details in the local lock manager. Note that since the remote
676180025Sdfr * server has granted the lock, we can be sure that it doesn't
677180025Sdfr * conflict with any other locks we have in the local lock manager.
678180025Sdfr *
679180025Sdfr * Since it is possible that host may also make NLM client requests to
680180025Sdfr * our NLM server, we use a different sysid value to record our own
681180025Sdfr * client locks.
682180025Sdfr *
683180025Sdfr * Note that since it is possible for us to receive replies from the
684180025Sdfr * server in a different order than the locks were granted (e.g. if
685180025Sdfr * many local threads are contending for the same lock), we must use a
686180025Sdfr * blocking operation when registering with the local lock manager.
687180025Sdfr * We expect that any actual wait will be rare and short hence we
688180025Sdfr * ignore signals for this.
689180025Sdfr */
690180025Sdfrstatic void
691180025Sdfrnlm_record_lock(struct vnode *vp, int op, struct flock *fl,
692180025Sdfr    int svid, int sysid, off_t size)
693180025Sdfr{
694180025Sdfr	struct vop_advlockasync_args a;
695180025Sdfr	struct flock newfl;
696180025Sdfr	int error;
697180025Sdfr
698180025Sdfr	a.a_vp = vp;
699180025Sdfr	a.a_id = NULL;
700180025Sdfr	a.a_op = op;
701180025Sdfr	a.a_fl = &newfl;
702180025Sdfr	a.a_flags = F_REMOTE|F_WAIT|F_NOINTR;
703180025Sdfr	a.a_task = NULL;
704180025Sdfr	a.a_cookiep = NULL;
705180025Sdfr	newfl.l_start = fl->l_start;
706180025Sdfr	newfl.l_len = fl->l_len;
707180025Sdfr	newfl.l_type = fl->l_type;
708180025Sdfr	newfl.l_whence = fl->l_whence;
709180025Sdfr	newfl.l_pid = svid;
710180025Sdfr	newfl.l_sysid = NLM_SYSID_CLIENT | sysid;
711180025Sdfr
712180025Sdfr	error = lf_advlockasync(&a, &vp->v_lockf, size);
713193434Sed	KASSERT(error == 0 || error == ENOENT,
714193432Sdfr	    ("Failed to register NFS lock locally - error=%d", error));
715180025Sdfr}
716180025Sdfr
717180025Sdfrstatic int
718180025Sdfrnlm_setlock(struct nlm_host *host, struct rpc_callextra *ext,
719180025Sdfr    rpcvers_t vers, struct timeval *timo, int retries,
720180025Sdfr    struct vnode *vp, int op, struct flock *fl, int flags,
721180025Sdfr    int svid, size_t fhlen, void *fh, off_t size, bool_t reclaim)
722180025Sdfr{
723180025Sdfr	struct nlm4_lockargs args;
724180025Sdfr	char oh_space[32];
725180025Sdfr	struct nlm4_res res;
726180025Sdfr	u_int xid;
727180025Sdfr	CLIENT *client;
728180025Sdfr	enum clnt_stat stat;
729180025Sdfr	int retry, block, exclusive;
730180025Sdfr	void *wait_handle = NULL;
731180025Sdfr	int error;
732180025Sdfr
733180025Sdfr	memset(&args, 0, sizeof(args));
734180025Sdfr	memset(&res, 0, sizeof(res));
735180025Sdfr
736180025Sdfr	block = (flags & F_WAIT) ? TRUE : FALSE;
737180025Sdfr	exclusive = (fl->l_type == F_WRLCK);
738180025Sdfr
739180025Sdfr	error = nlm_init_lock(fl, flags, svid, vers, fhlen, fh, size,
740180025Sdfr	    &args.alock, oh_space);
741180025Sdfr	if (error)
742180025Sdfr		return (error);
743180025Sdfr	args.block = block;
744180025Sdfr	args.exclusive = exclusive;
745180025Sdfr	args.reclaim = reclaim;
746180025Sdfr	args.state = nlm_nsm_state;
747180025Sdfr
748180025Sdfr	retry = 5*hz;
749180025Sdfr	for (;;) {
750184588Sdfr		client = nlm_host_get_rpc(host, FALSE);
751180025Sdfr		if (!client)
752180025Sdfr			return (ENOLCK); /* XXX retry? */
753180025Sdfr
754180025Sdfr		if (block)
755180025Sdfr			wait_handle = nlm_register_wait_lock(&args.alock, vp);
756180025Sdfr
757180025Sdfr		xid = atomic_fetchadd_int(&nlm_xid, 1);
758180025Sdfr		args.cookie.n_len = sizeof(xid);
759180025Sdfr		args.cookie.n_bytes = (char*) &xid;
760180025Sdfr
761180025Sdfr		stat = nlm_lock_rpc(vers, &args, &res, client, ext, *timo);
762180025Sdfr
763180025Sdfr		CLNT_RELEASE(client);
764180025Sdfr
765180025Sdfr		if (stat != RPC_SUCCESS) {
766180025Sdfr			if (block)
767180025Sdfr				nlm_deregister_wait_lock(wait_handle);
768180025Sdfr			if (retries) {
769180025Sdfr				retries--;
770180025Sdfr				continue;
771180025Sdfr			}
772180025Sdfr			return (EINVAL);
773180025Sdfr		}
774180025Sdfr
775180025Sdfr		/*
776180025Sdfr		 * Free res.cookie.
777180025Sdfr		 */
778180025Sdfr		xdr_free((xdrproc_t) xdr_nlm4_res, &res);
779180025Sdfr
780180025Sdfr		if (block && res.stat.stat != nlm4_blocked)
781180025Sdfr			nlm_deregister_wait_lock(wait_handle);
782180025Sdfr
783180025Sdfr		if (res.stat.stat == nlm4_denied_grace_period) {
784180025Sdfr			/*
785180025Sdfr			 * The server has recently rebooted and is
786180025Sdfr			 * giving old clients a change to reclaim
787180025Sdfr			 * their locks. Wait for a few seconds and try
788180025Sdfr			 * again.
789180025Sdfr			 */
790180025Sdfr			error = tsleep(&args, PCATCH, "nlmgrace", retry);
791180025Sdfr			if (error && error != EWOULDBLOCK)
792180025Sdfr				return (error);
793180025Sdfr			retry = 2*retry;
794180025Sdfr			if (retry > 30*hz)
795180025Sdfr				retry = 30*hz;
796180025Sdfr			continue;
797180025Sdfr		}
798180025Sdfr
799180025Sdfr		if (block && res.stat.stat == nlm4_blocked) {
800180025Sdfr			/*
801180025Sdfr			 * The server should call us back with a
802180025Sdfr			 * granted message when the lock succeeds. In
803180025Sdfr			 * order to deal with broken servers, lost
804180025Sdfr			 * granted messages and server reboots, we
805180025Sdfr			 * will also re-try every few seconds.
806180025Sdfr			 */
807180025Sdfr			error = nlm_wait_lock(wait_handle, retry);
808180025Sdfr			if (error == EWOULDBLOCK) {
809180025Sdfr				retry = 2*retry;
810180025Sdfr				if (retry > 30*hz)
811180025Sdfr					retry = 30*hz;
812180025Sdfr				continue;
813180025Sdfr			}
814180025Sdfr			if (error) {
815180025Sdfr				/*
816180025Sdfr				 * We need to call the server to
817180025Sdfr				 * cancel our lock request.
818180025Sdfr				 */
819180025Sdfr				nlm4_cancargs cancel;
820180025Sdfr
821180025Sdfr				memset(&cancel, 0, sizeof(cancel));
822180025Sdfr
823180025Sdfr				xid = atomic_fetchadd_int(&nlm_xid, 1);
824180025Sdfr				cancel.cookie.n_len = sizeof(xid);
825180025Sdfr				cancel.cookie.n_bytes = (char*) &xid;
826180025Sdfr				cancel.block = block;
827180025Sdfr				cancel.exclusive = exclusive;
828180025Sdfr				cancel.alock = args.alock;
829180025Sdfr
830180025Sdfr				do {
831184588Sdfr					client = nlm_host_get_rpc(host, FALSE);
832180025Sdfr					if (!client)
833180025Sdfr						/* XXX retry? */
834180025Sdfr						return (ENOLCK);
835180025Sdfr
836180025Sdfr					stat = nlm_cancel_rpc(vers, &cancel,
837180025Sdfr					    &res, client, ext, *timo);
838180025Sdfr
839180025Sdfr					CLNT_RELEASE(client);
840180025Sdfr
841180025Sdfr					if (stat != RPC_SUCCESS) {
842180025Sdfr						/*
843180025Sdfr						 * We need to cope
844180025Sdfr						 * with temporary
845180025Sdfr						 * network partitions
846180025Sdfr						 * as well as server
847180025Sdfr						 * reboots. This means
848180025Sdfr						 * we have to keep
849180025Sdfr						 * trying to cancel
850180025Sdfr						 * until the server
851180025Sdfr						 * wakes up again.
852180025Sdfr						 */
853180025Sdfr						pause("nlmcancel", 10*hz);
854180025Sdfr					}
855180025Sdfr				} while (stat != RPC_SUCCESS);
856180025Sdfr
857180025Sdfr				/*
858180025Sdfr				 * Free res.cookie.
859180025Sdfr				 */
860180025Sdfr				xdr_free((xdrproc_t) xdr_nlm4_res, &res);
861180025Sdfr
862180025Sdfr				switch (res.stat.stat) {
863180025Sdfr				case nlm_denied:
864180025Sdfr					/*
865180025Sdfr					 * There was nothing
866180025Sdfr					 * to cancel. We are
867180025Sdfr					 * going to go ahead
868180025Sdfr					 * and assume we got
869180025Sdfr					 * the lock.
870180025Sdfr					 */
871180025Sdfr					error = 0;
872180025Sdfr					break;
873180025Sdfr
874180025Sdfr				case nlm4_denied_grace_period:
875180025Sdfr					/*
876180025Sdfr					 * The server has
877180025Sdfr					 * recently rebooted -
878180025Sdfr					 * treat this as a
879180025Sdfr					 * successful
880180025Sdfr					 * cancellation.
881180025Sdfr					 */
882180025Sdfr					break;
883180025Sdfr
884180025Sdfr				case nlm4_granted:
885180025Sdfr					/*
886180025Sdfr					 * We managed to
887180025Sdfr					 * cancel.
888180025Sdfr					 */
889180025Sdfr					break;
890180025Sdfr
891180025Sdfr				default:
892180025Sdfr					/*
893180025Sdfr					 * Broken server
894180025Sdfr					 * implementation -
895180025Sdfr					 * can't really do
896180025Sdfr					 * anything here.
897180025Sdfr					 */
898180025Sdfr					break;
899180025Sdfr				}
900180025Sdfr
901180025Sdfr			}
902180025Sdfr		} else {
903180025Sdfr			error = nlm_map_status(res.stat.stat);
904180025Sdfr		}
905180025Sdfr
906180025Sdfr		if (!error && !reclaim) {
907180025Sdfr			nlm_record_lock(vp, op, fl, args.alock.svid,
908180025Sdfr			    nlm_host_get_sysid(host), size);
909180025Sdfr			nlm_host_monitor(host, 0);
910180025Sdfr		}
911180025Sdfr
912180025Sdfr		return (error);
913180025Sdfr	}
914180025Sdfr}
915180025Sdfr
916180025Sdfrstatic int
917180025Sdfrnlm_clearlock(struct nlm_host *host, struct rpc_callextra *ext,
918180025Sdfr    rpcvers_t vers, struct timeval *timo, int retries,
919180025Sdfr    struct vnode *vp, int op, struct flock *fl, int flags,
920180025Sdfr    int svid, size_t fhlen, void *fh, off_t size)
921180025Sdfr{
922180025Sdfr	struct nlm4_unlockargs args;
923180025Sdfr	char oh_space[32];
924180025Sdfr	struct nlm4_res res;
925180025Sdfr	u_int xid;
926180025Sdfr	CLIENT *client;
927180025Sdfr	enum clnt_stat stat;
928180025Sdfr	int error;
929180025Sdfr
930180025Sdfr	memset(&args, 0, sizeof(args));
931180025Sdfr	memset(&res, 0, sizeof(res));
932180025Sdfr
933180025Sdfr	error = nlm_init_lock(fl, flags, svid, vers, fhlen, fh, size,
934180025Sdfr	    &args.alock, oh_space);
935180025Sdfr	if (error)
936180025Sdfr		return (error);
937180025Sdfr
938180025Sdfr	for (;;) {
939184588Sdfr		client = nlm_host_get_rpc(host, FALSE);
940180025Sdfr		if (!client)
941180025Sdfr			return (ENOLCK); /* XXX retry? */
942180025Sdfr
943180025Sdfr		xid = atomic_fetchadd_int(&nlm_xid, 1);
944180025Sdfr		args.cookie.n_len = sizeof(xid);
945180025Sdfr		args.cookie.n_bytes = (char*) &xid;
946180025Sdfr
947180025Sdfr		stat = nlm_unlock_rpc(vers, &args, &res, client, ext, *timo);
948180025Sdfr
949180025Sdfr		CLNT_RELEASE(client);
950180025Sdfr
951180025Sdfr		if (stat != RPC_SUCCESS) {
952180025Sdfr			if (retries) {
953180025Sdfr				retries--;
954180025Sdfr				continue;
955180025Sdfr			}
956180025Sdfr			return (EINVAL);
957180025Sdfr		}
958180025Sdfr
959180025Sdfr		/*
960180025Sdfr		 * Free res.cookie.
961180025Sdfr		 */
962180025Sdfr		xdr_free((xdrproc_t) xdr_nlm4_res, &res);
963180025Sdfr
964180025Sdfr		if (res.stat.stat == nlm4_denied_grace_period) {
965180025Sdfr			/*
966180025Sdfr			 * The server has recently rebooted and is
967180025Sdfr			 * giving old clients a change to reclaim
968180025Sdfr			 * their locks. Wait for a few seconds and try
969180025Sdfr			 * again.
970180025Sdfr			 */
971180025Sdfr			error = tsleep(&args, PCATCH, "nlmgrace", 5*hz);
972180025Sdfr			if (error && error != EWOULDBLOCK)
973180025Sdfr				return (error);
974180025Sdfr			continue;
975180025Sdfr		}
976180025Sdfr
977180025Sdfr		/*
978180025Sdfr		 * If we are being called via nlm_reclaim (which will
979180025Sdfr		 * use the F_REMOTE flag), don't record the lock
980180025Sdfr		 * operation in the local lock manager since the vnode
981180025Sdfr		 * is going away.
982180025Sdfr		 */
983180025Sdfr		if (!(flags & F_REMOTE))
984180025Sdfr			nlm_record_lock(vp, op, fl, args.alock.svid,
985180025Sdfr			    nlm_host_get_sysid(host), size);
986180025Sdfr
987180025Sdfr		return (0);
988180025Sdfr	}
989180025Sdfr}
990180025Sdfr
991180025Sdfrstatic int
992180025Sdfrnlm_getlock(struct nlm_host *host, struct rpc_callextra *ext,
993180025Sdfr    rpcvers_t vers, struct timeval *timo, int retries,
994180025Sdfr    struct vnode *vp, int op, struct flock *fl, int flags,
995180025Sdfr    int svid, size_t fhlen, void *fh, off_t size)
996180025Sdfr{
997180025Sdfr	struct nlm4_testargs args;
998180025Sdfr	char oh_space[32];
999180025Sdfr	struct nlm4_testres res;
1000180025Sdfr	u_int xid;
1001180025Sdfr	CLIENT *client;
1002180025Sdfr	enum clnt_stat stat;
1003180025Sdfr	int exclusive;
1004180025Sdfr	int error;
1005180025Sdfr
1006180025Sdfr	KASSERT(!(flags & F_FLOCK), ("unexpected F_FLOCK for F_GETLK"));
1007180025Sdfr
1008180025Sdfr	memset(&args, 0, sizeof(args));
1009180025Sdfr	memset(&res, 0, sizeof(res));
1010180025Sdfr
1011180025Sdfr	exclusive = (fl->l_type == F_WRLCK);
1012180025Sdfr
1013180025Sdfr	error = nlm_init_lock(fl, flags, svid, vers, fhlen, fh, size,
1014180025Sdfr	    &args.alock, oh_space);
1015180025Sdfr	if (error)
1016180025Sdfr		return (error);
1017180025Sdfr	args.exclusive = exclusive;
1018180025Sdfr
1019180025Sdfr	for (;;) {
1020184588Sdfr		client = nlm_host_get_rpc(host, FALSE);
1021180025Sdfr		if (!client)
1022180025Sdfr			return (ENOLCK); /* XXX retry? */
1023180025Sdfr
1024180025Sdfr		xid = atomic_fetchadd_int(&nlm_xid, 1);
1025180025Sdfr		args.cookie.n_len = sizeof(xid);
1026180025Sdfr		args.cookie.n_bytes = (char*) &xid;
1027180025Sdfr
1028180025Sdfr		stat = nlm_test_rpc(vers, &args, &res, client, ext, *timo);
1029180025Sdfr
1030180025Sdfr		CLNT_RELEASE(client);
1031180025Sdfr
1032180025Sdfr		if (stat != RPC_SUCCESS) {
1033180025Sdfr			if (retries) {
1034180025Sdfr				retries--;
1035180025Sdfr				continue;
1036180025Sdfr			}
1037180025Sdfr			return (EINVAL);
1038180025Sdfr		}
1039180025Sdfr
1040180025Sdfr		if (res.stat.stat == nlm4_denied_grace_period) {
1041180025Sdfr			/*
1042180025Sdfr			 * The server has recently rebooted and is
1043180025Sdfr			 * giving old clients a change to reclaim
1044180025Sdfr			 * their locks. Wait for a few seconds and try
1045180025Sdfr			 * again.
1046180025Sdfr			 */
1047180025Sdfr			xdr_free((xdrproc_t) xdr_nlm4_testres, &res);
1048180025Sdfr			error = tsleep(&args, PCATCH, "nlmgrace", 5*hz);
1049180025Sdfr			if (error && error != EWOULDBLOCK)
1050180025Sdfr				return (error);
1051180025Sdfr			continue;
1052180025Sdfr		}
1053180025Sdfr
1054180025Sdfr		if (res.stat.stat == nlm4_denied) {
1055180025Sdfr			struct nlm4_holder *h =
1056180025Sdfr				&res.stat.nlm4_testrply_u.holder;
1057180025Sdfr			fl->l_start = h->l_offset;
1058180025Sdfr			fl->l_len = h->l_len;
1059180025Sdfr			fl->l_pid = h->svid;
1060180025Sdfr			if (h->exclusive)
1061180025Sdfr				fl->l_type = F_WRLCK;
1062180025Sdfr			else
1063180025Sdfr				fl->l_type = F_RDLCK;
1064180025Sdfr			fl->l_whence = SEEK_SET;
1065180025Sdfr			fl->l_sysid = 0;
1066180025Sdfr		} else {
1067180025Sdfr			fl->l_type = F_UNLCK;
1068180025Sdfr		}
1069180025Sdfr
1070180025Sdfr		xdr_free((xdrproc_t) xdr_nlm4_testres, &res);
1071180025Sdfr
1072180025Sdfr		return (0);
1073180025Sdfr	}
1074180025Sdfr}
1075180025Sdfr
1076180025Sdfrstatic int
1077180025Sdfrnlm_map_status(nlm4_stats stat)
1078180025Sdfr{
1079180025Sdfr	switch (stat) {
1080180025Sdfr	case nlm4_granted:
1081180025Sdfr		return (0);
1082180025Sdfr
1083180025Sdfr	case nlm4_denied:
1084180025Sdfr		return (EAGAIN);
1085180025Sdfr
1086180025Sdfr	case nlm4_denied_nolocks:
1087180025Sdfr		return (ENOLCK);
1088180025Sdfr
1089180025Sdfr	case nlm4_deadlck:
1090180025Sdfr		return (EDEADLK);
1091180025Sdfr
1092180025Sdfr	case nlm4_rofs:
1093180025Sdfr		return (EROFS);
1094180025Sdfr
1095180025Sdfr	case nlm4_stale_fh:
1096180025Sdfr		return (ESTALE);
1097180025Sdfr
1098180025Sdfr	case nlm4_fbig:
1099180025Sdfr		return (EFBIG);
1100180025Sdfr
1101180025Sdfr	case nlm4_failed:
1102180025Sdfr		return (EACCES);
1103180025Sdfr
1104180025Sdfr	default:
1105180025Sdfr		return (EINVAL);
1106180025Sdfr	}
1107180025Sdfr}
1108180025Sdfr
1109180025Sdfrstatic struct nlm_file_svid *
1110180025Sdfrnlm_find_svid(void *id)
1111180025Sdfr{
1112180025Sdfr	struct nlm_file_svid *ns, *newns;
1113180025Sdfr	int h;
1114180025Sdfr
1115180025Sdfr	h = (((uintptr_t) id) >> 7) % NLM_SVID_HASH_SIZE;
1116180025Sdfr
1117180025Sdfr	mtx_lock(&nlm_svid_lock);
1118180025Sdfr	LIST_FOREACH(ns, &nlm_file_svids[h], ns_link) {
1119180025Sdfr		if (ns->ns_id == id) {
1120180025Sdfr			ns->ns_refs++;
1121180025Sdfr			break;
1122180025Sdfr		}
1123180025Sdfr	}
1124180025Sdfr	mtx_unlock(&nlm_svid_lock);
1125180025Sdfr	if (!ns) {
1126180025Sdfr		int svid = alloc_unr(nlm_svid_allocator);
1127180025Sdfr		newns = malloc(sizeof(struct nlm_file_svid), M_NLM,
1128180025Sdfr		    M_WAITOK);
1129180025Sdfr		newns->ns_refs = 1;
1130180025Sdfr		newns->ns_id = id;
1131180025Sdfr		newns->ns_svid = svid;
1132180025Sdfr		newns->ns_ucred = NULL;
1133180025Sdfr		newns->ns_active = FALSE;
1134180025Sdfr
1135180025Sdfr		/*
1136180025Sdfr		 * We need to check for a race with some other
1137180025Sdfr		 * thread allocating a svid for this file.
1138180025Sdfr		 */
1139180025Sdfr		mtx_lock(&nlm_svid_lock);
1140180025Sdfr		LIST_FOREACH(ns, &nlm_file_svids[h], ns_link) {
1141180025Sdfr			if (ns->ns_id == id) {
1142180025Sdfr				ns->ns_refs++;
1143180025Sdfr				break;
1144180025Sdfr			}
1145180025Sdfr		}
1146180025Sdfr		if (ns) {
1147180025Sdfr			mtx_unlock(&nlm_svid_lock);
1148180025Sdfr			free_unr(nlm_svid_allocator, newns->ns_svid);
1149180025Sdfr			free(newns, M_NLM);
1150180025Sdfr		} else {
1151180025Sdfr			LIST_INSERT_HEAD(&nlm_file_svids[h], newns,
1152180025Sdfr			    ns_link);
1153180025Sdfr			ns = newns;
1154180025Sdfr			mtx_unlock(&nlm_svid_lock);
1155180025Sdfr		}
1156180025Sdfr	}
1157180025Sdfr
1158180025Sdfr	return (ns);
1159180025Sdfr}
1160180025Sdfr
1161180025Sdfrstatic void
1162180025Sdfrnlm_free_svid(struct nlm_file_svid *ns)
1163180025Sdfr{
1164180025Sdfr
1165180025Sdfr	mtx_lock(&nlm_svid_lock);
1166180025Sdfr	ns->ns_refs--;
1167180025Sdfr	if (!ns->ns_refs) {
1168180025Sdfr		KASSERT(!ns->ns_active, ("Freeing active SVID"));
1169180025Sdfr		LIST_REMOVE(ns, ns_link);
1170180025Sdfr		mtx_unlock(&nlm_svid_lock);
1171180025Sdfr		free_unr(nlm_svid_allocator, ns->ns_svid);
1172180025Sdfr		if (ns->ns_ucred)
1173180025Sdfr			crfree(ns->ns_ucred);
1174180025Sdfr		free(ns, M_NLM);
1175180025Sdfr	} else {
1176180025Sdfr		mtx_unlock(&nlm_svid_lock);
1177180025Sdfr	}
1178180025Sdfr}
1179180025Sdfr
1180180025Sdfrstatic int
1181180025Sdfrnlm_init_lock(struct flock *fl, int flags, int svid,
1182180025Sdfr    rpcvers_t vers, size_t fhlen, void *fh, off_t size,
1183180025Sdfr    struct nlm4_lock *lock, char oh_space[32])
1184180025Sdfr{
1185180025Sdfr	size_t oh_len;
1186180025Sdfr	off_t start, len;
1187180025Sdfr
1188180025Sdfr	if (fl->l_whence == SEEK_END) {
1189180025Sdfr		if (size > OFF_MAX
1190180025Sdfr		    || (fl->l_start > 0 && size > OFF_MAX - fl->l_start))
1191180025Sdfr			return (EOVERFLOW);
1192180025Sdfr		start = size + fl->l_start;
1193180025Sdfr	} else if (fl->l_whence == SEEK_SET || fl->l_whence == SEEK_CUR) {
1194180025Sdfr		start = fl->l_start;
1195180025Sdfr	} else {
1196180025Sdfr		return (EINVAL);
1197180025Sdfr	}
1198180025Sdfr	if (start < 0)
1199180025Sdfr		return (EINVAL);
1200180025Sdfr	if (fl->l_len < 0) {
1201180025Sdfr		len = -fl->l_len;
1202180025Sdfr		start -= len;
1203180025Sdfr		if (start < 0)
1204180025Sdfr			return (EINVAL);
1205180025Sdfr	} else {
1206180025Sdfr		len = fl->l_len;
1207180025Sdfr	}
1208180025Sdfr
1209180025Sdfr	if (vers == NLM_VERS) {
1210180025Sdfr		/*
1211180025Sdfr		 * Enforce range limits on V1 locks
1212180025Sdfr		 */
1213180025Sdfr		if (start > 0xffffffffLL || len > 0xffffffffLL)
1214180025Sdfr			return (EOVERFLOW);
1215180025Sdfr	}
1216180025Sdfr
1217193066Sjamie	snprintf(oh_space, 32, "%d@", svid);
1218180025Sdfr	oh_len = strlen(oh_space);
1219193066Sjamie	getcredhostname(NULL, oh_space + oh_len, 32 - oh_len);
1220193066Sjamie	oh_len = strlen(oh_space);
1221180025Sdfr
1222180025Sdfr	memset(lock, 0, sizeof(*lock));
1223194118Sjamie	lock->caller_name = prison0.pr_hostname;
1224180025Sdfr	lock->fh.n_len = fhlen;
1225180025Sdfr	lock->fh.n_bytes = fh;
1226180025Sdfr	lock->oh.n_len = oh_len;
1227180025Sdfr	lock->oh.n_bytes = oh_space;
1228180025Sdfr	lock->svid = svid;
1229180025Sdfr	lock->l_offset = start;
1230180025Sdfr	lock->l_len = len;
1231180025Sdfr
1232180025Sdfr	return (0);
1233180025Sdfr}
1234