175631Salfred/*-
275631Salfred * Copyright (c) 1997 Berkeley Software Design, Inc. All rights reserved.
375631Salfred *
475631Salfred * Redistribution and use in source and binary forms, with or without
575631Salfred * modification, are permitted provided that the following conditions
675631Salfred * are met:
775631Salfred * 1. Redistributions of source code must retain the above copyright
875631Salfred *    notice, this list of conditions and the following disclaimer.
975631Salfred * 2. Redistributions in binary form must reproduce the above copyright
1075631Salfred *    notice, this list of conditions and the following disclaimer in the
1175631Salfred *    documentation and/or other materials provided with the distribution.
1275631Salfred * 3. Berkeley Software Design Inc's name may not be used to endorse or
1375631Salfred *    promote products derived from this software without specific prior
1475631Salfred *    written permission.
1575631Salfred *
1675631Salfred * THIS SOFTWARE IS PROVIDED BY BERKELEY SOFTWARE DESIGN INC ``AS IS'' AND
1775631Salfred * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
1875631Salfred * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
1975631Salfred * ARE DISCLAIMED.  IN NO EVENT SHALL BERKELEY SOFTWARE DESIGN INC BE LIABLE
2075631Salfred * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
2175631Salfred * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
2275631Salfred * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
2375631Salfred * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
2475631Salfred * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
2575631Salfred * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
2675631Salfred * SUCH DAMAGE.
2775631Salfred *
2875631Salfred *      from BSDI nfs_lock.c,v 2.4 1998/12/14 23:49:56 jch Exp
2975631Salfred */
3075631Salfred
3183651Speter#include <sys/cdefs.h>
3283651Speter__FBSDID("$FreeBSD$");
3383651Speter
3475631Salfred#include <sys/param.h>
3575631Salfred#include <sys/systm.h>
36138430Sphk#include <sys/conf.h>
3775631Salfred#include <sys/fcntl.h>
3876166Smarkm#include <sys/kernel.h>		/* for hz */
39114216Skan#include <sys/limits.h>
4076166Smarkm#include <sys/lock.h>
4175631Salfred#include <sys/malloc.h>
4276166Smarkm#include <sys/lockf.h>		/* for hz */ /* Must come after sys/malloc.h */
4375631Salfred#include <sys/mbuf.h>
4475631Salfred#include <sys/mount.h>
4575631Salfred#include <sys/namei.h>
46168931Srwatson#include <sys/priv.h>
4775631Salfred#include <sys/proc.h>
4876166Smarkm#include <sys/resourcevar.h>
4975631Salfred#include <sys/socket.h>
5076166Smarkm#include <sys/socket.h>
5175631Salfred#include <sys/unistd.h>
5275631Salfred#include <sys/vnode.h>
5375631Salfred
5475631Salfred#include <net/if.h>
5575631Salfred
5675631Salfred#include <nfs/nfsproto.h>
57210455Srmacklem#include <nfs/nfs_lock.h>
5883651Speter#include <nfsclient/nfs.h>
5983651Speter#include <nfsclient/nfsmount.h>
6083651Speter#include <nfsclient/nfsnode.h>
6183651Speter#include <nfsclient/nlminfo.h>
6275631Salfred
63151695Sglebiusextern void (*nlminfo_release_p)(struct proc *p);
64151695Sglebius
65214048Srmacklemvop_advlock_t	*nfs_advlock_p = nfs_dolock;
66214048Srmacklemvop_reclaim_t	*nfs_reclaim_p = NULL;
67214048Srmacklem
68227293Sedstatic MALLOC_DEFINE(M_NFSLOCK, "nfsclient_lock", "NFS lock request");
69227293Sedstatic MALLOC_DEFINE(M_NLMINFO, "nfsclient_nlminfo",
70227293Sed    "NFS lock process structure");
71138430Sphk
72138430Sphkstatic int nfslockdans(struct thread *td, struct lockd_ans *ansp);
73151695Sglebiusstatic void nlminfo_release(struct proc *p);
7475631Salfred/*
75138430Sphk * --------------------------------------------------------------------
76138430Sphk * A miniature device driver which the userland uses to talk to us.
77138430Sphk *
78138430Sphk */
79138430Sphk
80138430Sphkstatic struct cdev *nfslock_dev;
81138430Sphkstatic struct mtx nfslock_mtx;
82138430Sphkstatic int nfslock_isopen;
83138430Sphkstatic TAILQ_HEAD(,__lock_msg)	nfslock_list;
84138430Sphk
85138430Sphkstatic int
86138430Sphknfslock_open(struct cdev *dev, int oflags, int devtype, struct thread *td)
87138430Sphk{
88138430Sphk	int error;
89138430Sphk
90168931Srwatson	error = priv_check(td, PRIV_NFS_LOCKD);
91168931Srwatson	if (error)
92168931Srwatson		return (error);
93168931Srwatson
94138430Sphk	mtx_lock(&nfslock_mtx);
95138430Sphk	if (!nfslock_isopen) {
96138430Sphk		error = 0;
97138430Sphk		nfslock_isopen = 1;
98138430Sphk	} else {
99138430Sphk		error = EOPNOTSUPP;
100138430Sphk	}
101138430Sphk	mtx_unlock(&nfslock_mtx);
102138430Sphk
103138430Sphk	return (error);
104138430Sphk}
105138430Sphk
106138430Sphkstatic int
107138430Sphknfslock_close(struct cdev *dev, int fflag, int devtype, struct thread *td)
108138430Sphk{
109138430Sphk	struct __lock_msg *lm;
110138430Sphk
111138430Sphk	mtx_lock(&nfslock_mtx);
112138430Sphk	nfslock_isopen = 0;
113138430Sphk	while (!TAILQ_EMPTY(&nfslock_list)) {
114138430Sphk		lm = TAILQ_FIRST(&nfslock_list);
115138430Sphk		/* XXX: answer request */
116138430Sphk		TAILQ_REMOVE(&nfslock_list, lm, lm_link);
117138430Sphk		free(lm, M_NFSLOCK);
118138430Sphk	}
119138430Sphk	mtx_unlock(&nfslock_mtx);
120138430Sphk	return (0);
121138430Sphk}
122138430Sphk
123138430Sphkstatic int
124138430Sphknfslock_read(struct cdev *dev, struct uio *uio, int ioflag)
125138430Sphk{
126138430Sphk	int error;
127138430Sphk	struct __lock_msg *lm;
128138430Sphk
129138430Sphk	if (uio->uio_resid != sizeof *lm)
130138430Sphk		return (EOPNOTSUPP);
131138430Sphk	lm = NULL;
132138430Sphk	error = 0;
133138430Sphk	mtx_lock(&nfslock_mtx);
134138430Sphk	while (TAILQ_EMPTY(&nfslock_list)) {
135138430Sphk		error = msleep(&nfslock_list, &nfslock_mtx, PSOCK | PCATCH,
136138430Sphk		    "nfslockd", 0);
137138430Sphk		if (error)
138138430Sphk			break;
139138430Sphk	}
140138430Sphk	if (!error) {
141138430Sphk		lm = TAILQ_FIRST(&nfslock_list);
142138430Sphk		TAILQ_REMOVE(&nfslock_list, lm, lm_link);
143138430Sphk	}
144138430Sphk	mtx_unlock(&nfslock_mtx);
145138430Sphk	if (!error) {
146138430Sphk		error = uiomove(lm, sizeof *lm, uio);
147138430Sphk		free(lm, M_NFSLOCK);
148138430Sphk	}
149138430Sphk	return (error);
150138430Sphk}
151138430Sphk
152138430Sphkstatic int
153138430Sphknfslock_write(struct cdev *dev, struct uio *uio, int ioflag)
154138430Sphk{
155138430Sphk	struct lockd_ans la;
156138430Sphk	int error;
157138430Sphk
158138430Sphk	if (uio->uio_resid != sizeof la)
159138430Sphk		return (EOPNOTSUPP);
160138430Sphk	error = uiomove(&la, sizeof la, uio);
161138430Sphk	if (!error)
162138430Sphk		error = nfslockdans(curthread, &la);
163138430Sphk	return (error);
164138430Sphk}
165138430Sphk
166138430Sphkstatic int
167138430Sphknfslock_send(struct __lock_msg *lm)
168138430Sphk{
169138430Sphk	struct __lock_msg *lm2;
170138430Sphk	int error;
171138430Sphk
172138430Sphk	error = 0;
173138430Sphk	lm2 = malloc(sizeof *lm2, M_NFSLOCK, M_WAITOK);
174138430Sphk	mtx_lock(&nfslock_mtx);
175138430Sphk	if (nfslock_isopen) {
176138430Sphk		memcpy(lm2, lm, sizeof *lm2);
177138430Sphk		TAILQ_INSERT_TAIL(&nfslock_list, lm2, lm_link);
178138430Sphk		wakeup(&nfslock_list);
179138430Sphk	} else {
180138430Sphk		error = EOPNOTSUPP;
181138430Sphk	}
182138430Sphk	mtx_unlock(&nfslock_mtx);
183138430Sphk	if (error)
184138430Sphk		free(lm2, M_NFSLOCK);
185138430Sphk	return (error);
186138430Sphk}
187138430Sphk
188138430Sphkstatic struct cdevsw nfslock_cdevsw = {
189138430Sphk	.d_version =	D_VERSION,
190138430Sphk	.d_open =	nfslock_open,
191138430Sphk	.d_close =	nfslock_close,
192138430Sphk	.d_read =	nfslock_read,
193138430Sphk	.d_write =	nfslock_write,
194138430Sphk	.d_name =	"nfslock"
195138430Sphk};
196138430Sphk
197138430Sphkstatic int
198138430Sphknfslock_modevent(module_t mod __unused, int type, void *data __unused)
199138430Sphk{
200138430Sphk
201138430Sphk	switch (type) {
202138430Sphk	case MOD_LOAD:
203138430Sphk		if (bootverbose)
204138430Sphk			printf("nfslock: pseudo-device\n");
205138430Sphk		mtx_init(&nfslock_mtx, "nfslock", NULL, MTX_DEF);
206138430Sphk		TAILQ_INIT(&nfslock_list);
207151695Sglebius		nlminfo_release_p = nlminfo_release;
208138430Sphk		nfslock_dev = make_dev(&nfslock_cdevsw, 0,
209138430Sphk		    UID_ROOT, GID_KMEM, 0600, _PATH_NFSLCKDEV);
210138430Sphk		return (0);
211138430Sphk	default:
212138430Sphk		return (EOPNOTSUPP);
213138430Sphk	}
214138430Sphk}
215138430Sphk
216138430SphkDEV_MODULE(nfslock, nfslock_modevent, NULL);
217138430SphkMODULE_VERSION(nfslock, 1);
218138430Sphk
219138430Sphk
220138430Sphk/*
22175631Salfred * XXX
22275631Salfred * We have to let the process know if the call succeeded.  I'm using an extra
22383651Speter * field in the p_nlminfo field in the proc structure, as it is already for
22475631Salfred * lockd stuff.
22575631Salfred */
22675631Salfred
22775631Salfred/*
22875631Salfred * nfs_advlock --
22975631Salfred *      NFS advisory byte-level locks.
230178243Skib *
231178243Skib * The vnode shall be (shared) locked on the entry, it is
232178243Skib * unconditionally unlocked after.
23375631Salfred */
23475631Salfredint
23583651Speternfs_dolock(struct vop_advlock_args *ap)
23675631Salfred{
23775631Salfred	LOCKD_MSG msg;
23883366Sjulian	struct thread *td;
239138430Sphk	struct vnode *vp;
240138430Sphk	int error;
24175631Salfred	struct flock *fl;
24283366Sjulian	struct proc *p;
243214048Srmacklem	struct nfsmount *nmp;
244304843Skib	struct timeval boottime;
24575631Salfred
24683366Sjulian	td = curthread;
24783366Sjulian	p = td->td_proc;
24883366Sjulian
24975631Salfred	vp = ap->a_vp;
25075631Salfred	fl = ap->a_fl;
251214048Srmacklem	nmp = VFSTONFS(vp->v_mount);
25275631Salfred
253178243Skib	ASSERT_VOP_LOCKED(vp, "nfs_dolock");
254178243Skib
255214048Srmacklem	nmp->nm_getinfo(vp, msg.lm_fh, &msg.lm_fh_len, &msg.lm_addr,
256216931Srmacklem	    &msg.lm_nfsv3, NULL, NULL);
257178243Skib	VOP_UNLOCK(vp, 0);
258178243Skib
25975631Salfred	/*
26075631Salfred	 * the NLM protocol doesn't allow the server to return an error
26182174Sache	 * on ranges, so we do it.
26275631Salfred	 */
26382194Sache	if (fl->l_whence != SEEK_END) {
26482213Sache		if ((fl->l_whence != SEEK_CUR && fl->l_whence != SEEK_SET) ||
26582204Sache		    fl->l_start < 0 ||
26682204Sache		    (fl->l_len < 0 &&
26782204Sache		     (fl->l_start == 0 || fl->l_start + fl->l_len < 0)))
26882194Sache			return (EINVAL);
26982204Sache		if (fl->l_len > 0 &&
27082204Sache			 (fl->l_len - 1 > OFF_MAX - fl->l_start))
27182194Sache			return (EOVERFLOW);
27282194Sache	}
27375631Salfred
27475631Salfred	/*
27575631Salfred	 * Fill in the information structure.
27675631Salfred	 */
27775631Salfred	msg.lm_version = LOCKD_MSG_VERSION;
27875631Salfred	msg.lm_msg_ident.pid = p->p_pid;
279178243Skib
280178243Skib	mtx_lock(&Giant);
28175631Salfred	/*
28275631Salfred	 * if there is no nfsowner table yet, allocate one.
28375631Salfred	 */
28475631Salfred	if (p->p_nlminfo == NULL) {
285184214Sdes		p->p_nlminfo = malloc(sizeof(struct nlminfo),
286184214Sdes		    M_NLMINFO, M_WAITOK | M_ZERO);
28775631Salfred		p->p_nlminfo->pid_start = p->p_stats->p_start;
288304843Skib		getboottime(&boottime);
289114434Sdes		timevaladd(&p->p_nlminfo->pid_start, &boottime);
29075631Salfred	}
29175631Salfred	msg.lm_msg_ident.pid_start = p->p_nlminfo->pid_start;
29275631Salfred	msg.lm_msg_ident.msg_seq = ++(p->p_nlminfo->msg_seq);
29375631Salfred
29475631Salfred	msg.lm_fl = *fl;
29575631Salfred	msg.lm_wait = ap->a_flags & F_WAIT;
29675631Salfred	msg.lm_getlk = ap->a_op == F_GETLK;
297101947Salfred	cru2x(td->td_ucred, &msg.lm_cred);
29875631Salfred
29975631Salfred	for (;;) {
300138430Sphk		error = nfslock_send(&msg);
301138430Sphk		if (error)
302178243Skib			goto out;
30375631Salfred
304138430Sphk		/* Unlocks succeed immediately.  */
30575631Salfred		if (fl->l_type == F_UNLCK)
306178243Skib			goto out;
30775631Salfred
30875631Salfred		/*
309161371Sthomas		 * Retry after 20 seconds if we haven't gotten a response yet.
31075631Salfred		 * This number was picked out of thin air... but is longer
31175631Salfred		 * then even a reasonably loaded system should take (at least
31283651Speter		 * on a local network).  XXX Probably should use a back-off
31375631Salfred		 * scheme.
314116185Srwatson		 *
315116185Srwatson		 * XXX: No PCATCH here since we currently have no useful
316116185Srwatson		 * way to signal to the userland rpc.lockd that the request
317116185Srwatson		 * has been aborted.  Once the rpc.lockd implementation
318116185Srwatson		 * can handle aborts, and we report them properly,
319116185Srwatson		 * PCATCH can be put back.  In the mean time, if we did
320116185Srwatson		 * permit aborting, the lock attempt would "get lost"
321116185Srwatson		 * and the lock would get stuck in the locked state.
32275631Salfred		 */
323115415Srwatson		error = tsleep(p->p_nlminfo, PUSER, "lockd", 20*hz);
324107104Salfred		if (error != 0) {
32575631Salfred			if (error == EWOULDBLOCK) {
32675631Salfred				/*
32775631Salfred				 * We timed out, so we rewrite the request
328154316Srwatson				 * to the fifo.
32975631Salfred				 */
33075631Salfred				continue;
33175631Salfred			}
33275631Salfred
33375631Salfred			break;
33475631Salfred		}
33575631Salfred
33675631Salfred		if (msg.lm_getlk && p->p_nlminfo->retcode == 0) {
33775631Salfred			if (p->p_nlminfo->set_getlk_pid) {
338177633Sdfr				fl->l_sysid = 0; /* XXX */
33975631Salfred				fl->l_pid = p->p_nlminfo->getlk_pid;
34075631Salfred			} else {
34175631Salfred				fl->l_type = F_UNLCK;
34275631Salfred			}
34375631Salfred		}
34475631Salfred		error = p->p_nlminfo->retcode;
34575631Salfred		break;
34675631Salfred	}
347178243Skib out:
348178243Skib	mtx_unlock(&Giant);
349138430Sphk	return (error);
35075631Salfred}
35175631Salfred
35275631Salfred/*
35375631Salfred * nfslockdans --
35475631Salfred *      NFS advisory byte-level locks answer from the lock daemon.
35575631Salfred */
356138430Sphkstatic int
35786363Srwatsonnfslockdans(struct thread *td, struct lockd_ans *ansp)
35875631Salfred{
35986363Srwatson	struct proc *targetp;
36075631Salfred
36175631Salfred	/* the version should match, or we're out of sync */
36275631Salfred	if (ansp->la_vers != LOCKD_ANS_VERSION)
36375631Salfred		return (EINVAL);
36475631Salfred
36575631Salfred	/* Find the process, set its return errno and wake it up. */
36686363Srwatson	if ((targetp = pfind(ansp->la_msg_ident.pid)) == NULL)
36775631Salfred		return (ESRCH);
36875631Salfred
36983651Speter	/* verify the pid hasn't been reused (if we can), and it isn't waiting
37075631Salfred	 * for an answer from a more recent request.  We return an EPIPE if
37175631Salfred	 * the match fails, because we've already used ESRCH above, and this
37275631Salfred	 * is sort of like writing on a pipe after the reader has closed it.
37375631Salfred	 */
37486363Srwatson	if (targetp->p_nlminfo == NULL ||
37575631Salfred	    ((ansp->la_msg_ident.msg_seq != -1) &&
37686363Srwatson	      (timevalcmp(&targetp->p_nlminfo->pid_start,
37775631Salfred			&ansp->la_msg_ident.pid_start, !=) ||
37886363Srwatson	       targetp->p_nlminfo->msg_seq != ansp->la_msg_ident.msg_seq))) {
37986363Srwatson		PROC_UNLOCK(targetp);
38075631Salfred		return (EPIPE);
38177563Sjake	}
38275631Salfred
38386363Srwatson	targetp->p_nlminfo->retcode = ansp->la_errno;
38486363Srwatson	targetp->p_nlminfo->set_getlk_pid = ansp->la_set_getlk_pid;
38586363Srwatson	targetp->p_nlminfo->getlk_pid = ansp->la_getlk_pid;
38675631Salfred
387107104Salfred	wakeup(targetp->p_nlminfo);
38875631Salfred
38986363Srwatson	PROC_UNLOCK(targetp);
39075631Salfred	return (0);
39175631Salfred}
392138430Sphk
393151695Sglebius/*
394151695Sglebius * Free nlminfo attached to process.
395151695Sglebius */
396151695Sglebiusvoid
397151695Sglebiusnlminfo_release(struct proc *p)
398151695Sglebius{
399151695Sglebius	free(p->p_nlminfo, M_NLMINFO);
400151695Sglebius	p->p_nlminfo = NULL;
401151695Sglebius}
402