1/*-
2 * Copyright (c) 2009 Rick Macklem, University of Guelph
3 * All rights reserved.
4 *
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions
7 * are met:
8 * 1. Redistributions of source code must retain the above copyright
9 *    notice, this list of conditions and the following disclaimer.
10 * 2. Redistributions in binary form must reproduce the above copyright
11 *    notice, this list of conditions and the following disclaimer in the
12 *    documentation and/or other materials provided with the distribution.
13 *
14 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
15 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
16 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
17 * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
18 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
19 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
20 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
21 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
22 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
23 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
24 * SUCH DAMAGE.
25 *
26 */
27
28#include <sys/cdefs.h>
29__FBSDID("$FreeBSD: stable/11/sys/fs/nfsserver/nfs_nfsdstate.c 366241 2020-09-29 02:03:24Z rmacklem $");
30
31#include "opt_inet.h"
32#include "opt_inet6.h"
33#include <fs/nfs/nfsport.h>
34
35struct nfsrv_stablefirst nfsrv_stablefirst;
36int nfsrv_issuedelegs = 0;
37int nfsrv_dolocallocks = 0;
38struct nfsv4lock nfsv4rootfs_lock;
39
40extern int newnfs_numnfsd;
41extern struct nfsstatsv1 nfsstatsv1;
42extern int nfsrv_lease;
43extern struct timeval nfsboottime;
44extern u_int32_t newnfs_true, newnfs_false;
45extern int nfsd_debuglevel;
46NFSV4ROOTLOCKMUTEX;
47NFSSTATESPINLOCK;
48
49SYSCTL_DECL(_vfs_nfsd);
50int	nfsrv_statehashsize = NFSSTATEHASHSIZE;
51SYSCTL_INT(_vfs_nfsd, OID_AUTO, statehashsize, CTLFLAG_RDTUN,
52    &nfsrv_statehashsize, 0,
53    "Size of state hash table set via loader.conf");
54
55int	nfsrv_clienthashsize = NFSCLIENTHASHSIZE;
56SYSCTL_INT(_vfs_nfsd, OID_AUTO, clienthashsize, CTLFLAG_RDTUN,
57    &nfsrv_clienthashsize, 0,
58    "Size of client hash table set via loader.conf");
59
60int	nfsrv_lockhashsize = NFSLOCKHASHSIZE;
61SYSCTL_INT(_vfs_nfsd, OID_AUTO, fhhashsize, CTLFLAG_RDTUN,
62    &nfsrv_lockhashsize, 0,
63    "Size of file handle hash table set via loader.conf");
64
65int	nfsrv_sessionhashsize = NFSSESSIONHASHSIZE;
66SYSCTL_INT(_vfs_nfsd, OID_AUTO, sessionhashsize, CTLFLAG_RDTUN,
67    &nfsrv_sessionhashsize, 0,
68    "Size of session hash table set via loader.conf");
69
70static int	nfsrv_v4statelimit = NFSRV_V4STATELIMIT;
71SYSCTL_INT(_vfs_nfsd, OID_AUTO, v4statelimit, CTLFLAG_RWTUN,
72    &nfsrv_v4statelimit, 0,
73    "High water limit for NFSv4 opens+locks+delegations");
74
75static int	nfsrv_writedelegifpos = 0;
76SYSCTL_INT(_vfs_nfsd, OID_AUTO, writedelegifpos, CTLFLAG_RW,
77    &nfsrv_writedelegifpos, 0,
78    "Issue a write delegation for read opens if possible");
79
80static int	nfsrv_allowreadforwriteopen = 1;
81SYSCTL_INT(_vfs_nfsd, OID_AUTO, allowreadforwriteopen, CTLFLAG_RW,
82    &nfsrv_allowreadforwriteopen, 0,
83    "Allow Reads to be done with Write Access StateIDs");
84
85/*
86 * Hash lists for nfs V4.
87 */
88struct nfsclienthashhead	*nfsclienthash;
89struct nfslockhashhead		*nfslockhash;
90struct nfssessionhash		*nfssessionhash;
91
92static u_int32_t nfsrv_openpluslock = 0, nfsrv_delegatecnt = 0;
93static time_t nfsrvboottime;
94static int nfsrv_returnoldstateid = 0, nfsrv_clients = 0;
95static int nfsrv_clienthighwater = NFSRV_CLIENTHIGHWATER;
96static int nfsrv_nogsscallback = 0;
97static volatile int nfsrv_writedelegcnt = 0;
98
99/* local functions */
100static void nfsrv_dumpaclient(struct nfsclient *clp,
101    struct nfsd_dumpclients *dumpp);
102static void nfsrv_freeopenowner(struct nfsstate *stp, int cansleep,
103    NFSPROC_T *p);
104static int nfsrv_freeopen(struct nfsstate *stp, vnode_t vp, int cansleep,
105    NFSPROC_T *p);
106static void nfsrv_freelockowner(struct nfsstate *stp, vnode_t vp, int cansleep,
107    NFSPROC_T *p);
108static void nfsrv_freeallnfslocks(struct nfsstate *stp, vnode_t vp,
109    int cansleep, NFSPROC_T *p);
110static void nfsrv_freenfslock(struct nfslock *lop);
111static void nfsrv_freenfslockfile(struct nfslockfile *lfp);
112static void nfsrv_freedeleg(struct nfsstate *);
113static int nfsrv_getstate(struct nfsclient *clp, nfsv4stateid_t *stateidp,
114    u_int32_t flags, struct nfsstate **stpp);
115static void nfsrv_getowner(struct nfsstatehead *hp, struct nfsstate *new_stp,
116    struct nfsstate **stpp);
117static int nfsrv_getlockfh(vnode_t vp, u_short flags,
118    struct nfslockfile *new_lfp, fhandle_t *nfhp, NFSPROC_T *p);
119static int nfsrv_getlockfile(u_short flags, struct nfslockfile **new_lfpp,
120    struct nfslockfile **lfpp, fhandle_t *nfhp, int lockit);
121static void nfsrv_insertlock(struct nfslock *new_lop,
122    struct nfslock *insert_lop, struct nfsstate *stp, struct nfslockfile *lfp);
123static void nfsrv_updatelock(struct nfsstate *stp, struct nfslock **new_lopp,
124    struct nfslock **other_lopp, struct nfslockfile *lfp);
125static int nfsrv_getipnumber(u_char *cp);
126static int nfsrv_checkrestart(nfsquad_t clientid, u_int32_t flags,
127    nfsv4stateid_t *stateidp, int specialid);
128static int nfsrv_checkgrace(struct nfsrv_descript *nd, struct nfsclient *clp,
129    u_int32_t flags);
130static int nfsrv_docallback(struct nfsclient *clp, int procnum,
131    nfsv4stateid_t *stateidp, int trunc, fhandle_t *fhp,
132    struct nfsvattr *nap, nfsattrbit_t *attrbitp, NFSPROC_T *p);
133static int nfsrv_cbcallargs(struct nfsrv_descript *nd, struct nfsclient *clp,
134    uint32_t callback, int op, const char *optag, struct nfsdsession **sepp);
135static u_int32_t nfsrv_nextclientindex(void);
136static u_int32_t nfsrv_nextstateindex(struct nfsclient *clp);
137static void nfsrv_markstable(struct nfsclient *clp);
138static void nfsrv_markreclaim(struct nfsclient *clp);
139static int nfsrv_checkstable(struct nfsclient *clp);
140static int nfsrv_clientconflict(struct nfsclient *clp, int *haslockp, struct
141    vnode *vp, NFSPROC_T *p);
142static int nfsrv_delegconflict(struct nfsstate *stp, int *haslockp,
143    NFSPROC_T *p, vnode_t vp);
144static int nfsrv_cleandeleg(vnode_t vp, struct nfslockfile *lfp,
145    struct nfsclient *clp, int *haslockp, NFSPROC_T *p);
146static int nfsrv_notsamecredname(struct nfsrv_descript *nd,
147    struct nfsclient *clp);
148static time_t nfsrv_leaseexpiry(void);
149static void nfsrv_delaydelegtimeout(struct nfsstate *stp);
150static int nfsrv_checkseqid(struct nfsrv_descript *nd, u_int32_t seqid,
151    struct nfsstate *stp, struct nfsrvcache *op);
152static int nfsrv_nootherstate(struct nfsstate *stp);
153static int nfsrv_locallock(vnode_t vp, struct nfslockfile *lfp, int flags,
154    uint64_t first, uint64_t end, struct nfslockconflict *cfp, NFSPROC_T *p);
155static void nfsrv_localunlock(vnode_t vp, struct nfslockfile *lfp,
156    uint64_t init_first, uint64_t init_end, NFSPROC_T *p);
157static int nfsrv_dolocal(vnode_t vp, struct nfslockfile *lfp, int flags,
158    int oldflags, uint64_t first, uint64_t end, struct nfslockconflict *cfp,
159    NFSPROC_T *p);
160static void nfsrv_locallock_rollback(vnode_t vp, struct nfslockfile *lfp,
161    NFSPROC_T *p);
162static void nfsrv_locallock_commit(struct nfslockfile *lfp, int flags,
163    uint64_t first, uint64_t end);
164static void nfsrv_locklf(struct nfslockfile *lfp);
165static void nfsrv_unlocklf(struct nfslockfile *lfp);
166static struct nfsdsession *nfsrv_findsession(uint8_t *sessionid);
167static int nfsrv_freesession(struct nfsdsession *sep, uint8_t *sessionid);
168static int nfsv4_setcbsequence(struct nfsrv_descript *nd, struct nfsclient *clp,
169    int dont_replycache, struct nfsdsession **sepp);
170static int nfsv4_getcbsession(struct nfsclient *clp, struct nfsdsession **sepp);
171
172/*
173 * Scan the client list for a match and either return the current one,
174 * create a new entry or return an error.
175 * If returning a non-error, the clp structure must either be linked into
176 * the client list or free'd.
177 */
178int
179nfsrv_setclient(struct nfsrv_descript *nd, struct nfsclient **new_clpp,
180    nfsquad_t *clientidp, nfsquad_t *confirmp, NFSPROC_T *p)
181{
182	struct nfsclient *clp = NULL, *new_clp = *new_clpp;
183	int i, error = 0, ret;
184	struct nfsstate *stp, *tstp;
185#ifdef INET
186	struct sockaddr_in *sin, *rin;
187#endif
188#ifdef INET6
189	struct sockaddr_in6 *sin6, *rin6;
190#endif
191	struct nfsdsession *sep, *nsep;
192	int zapit = 0, gotit, hasstate = 0, igotlock;
193	static u_int64_t confirm_index = 0;
194
195	/*
196	 * Check for state resource limit exceeded.
197	 */
198	if (nfsrv_openpluslock > nfsrv_v4statelimit) {
199		error = NFSERR_RESOURCE;
200		goto out;
201	}
202
203	if (nfsrv_issuedelegs == 0 ||
204	    ((nd->nd_flag & ND_GSS) != 0 && nfsrv_nogsscallback != 0))
205		/*
206		 * Don't do callbacks when delegations are disabled or
207		 * for AUTH_GSS unless enabled via nfsrv_nogsscallback.
208		 * If establishing a callback connection is attempted
209		 * when a firewall is blocking the callback path, the
210		 * server may wait too long for the connect attempt to
211		 * succeed during the Open. Some clients, such as Linux,
212		 * may timeout and give up on the Open before the server
213		 * replies. Also, since AUTH_GSS callbacks are not
214		 * yet interoperability tested, they might cause the
215		 * server to crap out, if they get past the Init call to
216		 * the client.
217		 */
218		new_clp->lc_program = 0;
219
220	/* Lock out other nfsd threads */
221	NFSLOCKV4ROOTMUTEX();
222	nfsv4_relref(&nfsv4rootfs_lock);
223	do {
224		igotlock = nfsv4_lock(&nfsv4rootfs_lock, 1, NULL,
225		    NFSV4ROOTLOCKMUTEXPTR, NULL);
226	} while (!igotlock);
227	NFSUNLOCKV4ROOTMUTEX();
228
229	/*
230	 * Search for a match in the client list.
231	 */
232	gotit = i = 0;
233	while (i < nfsrv_clienthashsize && !gotit) {
234	    LIST_FOREACH(clp, &nfsclienthash[i], lc_hash) {
235		if (new_clp->lc_idlen == clp->lc_idlen &&
236		    !NFSBCMP(new_clp->lc_id, clp->lc_id, clp->lc_idlen)) {
237			gotit = 1;
238			break;
239		}
240	    }
241	    if (gotit == 0)
242		i++;
243	}
244	if (!gotit ||
245	    (clp->lc_flags & (LCL_NEEDSCONFIRM | LCL_ADMINREVOKED))) {
246		if ((nd->nd_flag & ND_NFSV41) != 0 && confirmp->lval[1] != 0) {
247			/*
248			 * For NFSv4.1, if confirmp->lval[1] is non-zero, the
249			 * client is trying to update a confirmed clientid.
250			 */
251			NFSLOCKV4ROOTMUTEX();
252			nfsv4_unlock(&nfsv4rootfs_lock, 1);
253			NFSUNLOCKV4ROOTMUTEX();
254			confirmp->lval[1] = 0;
255			error = NFSERR_NOENT;
256			goto out;
257		}
258		/*
259		 * Get rid of the old one.
260		 */
261		if (i != nfsrv_clienthashsize) {
262			LIST_REMOVE(clp, lc_hash);
263			nfsrv_cleanclient(clp, p);
264			nfsrv_freedeleglist(&clp->lc_deleg);
265			nfsrv_freedeleglist(&clp->lc_olddeleg);
266			zapit = 1;
267		}
268		/*
269		 * Add it after assigning a client id to it.
270		 */
271		new_clp->lc_flags |= LCL_NEEDSCONFIRM;
272		if ((nd->nd_flag & ND_NFSV41) != 0)
273			new_clp->lc_confirm.lval[0] = confirmp->lval[0] =
274			    ++confirm_index;
275		else
276			confirmp->qval = new_clp->lc_confirm.qval =
277			    ++confirm_index;
278		clientidp->lval[0] = new_clp->lc_clientid.lval[0] =
279		    (u_int32_t)nfsrvboottime;
280		clientidp->lval[1] = new_clp->lc_clientid.lval[1] =
281		    nfsrv_nextclientindex();
282		new_clp->lc_stateindex = 0;
283		new_clp->lc_statemaxindex = 0;
284		new_clp->lc_cbref = 0;
285		new_clp->lc_expiry = nfsrv_leaseexpiry();
286		LIST_INIT(&new_clp->lc_open);
287		LIST_INIT(&new_clp->lc_deleg);
288		LIST_INIT(&new_clp->lc_olddeleg);
289		LIST_INIT(&new_clp->lc_session);
290		for (i = 0; i < nfsrv_statehashsize; i++)
291			LIST_INIT(&new_clp->lc_stateid[i]);
292		LIST_INSERT_HEAD(NFSCLIENTHASH(new_clp->lc_clientid), new_clp,
293		    lc_hash);
294		nfsstatsv1.srvclients++;
295		nfsrv_openpluslock++;
296		nfsrv_clients++;
297		NFSLOCKV4ROOTMUTEX();
298		nfsv4_unlock(&nfsv4rootfs_lock, 1);
299		NFSUNLOCKV4ROOTMUTEX();
300		if (zapit)
301			nfsrv_zapclient(clp, p);
302		*new_clpp = NULL;
303		goto out;
304	}
305
306	/*
307	 * Now, handle the cases where the id is already issued.
308	 */
309	if (nfsrv_notsamecredname(nd, clp)) {
310	    /*
311	     * Check to see if there is expired state that should go away.
312	     */
313	    if (clp->lc_expiry < NFSD_MONOSEC &&
314	        (!LIST_EMPTY(&clp->lc_open) || !LIST_EMPTY(&clp->lc_deleg))) {
315		nfsrv_cleanclient(clp, p);
316		nfsrv_freedeleglist(&clp->lc_deleg);
317	    }
318
319	    /*
320	     * If there is outstanding state, then reply NFSERR_CLIDINUSE per
321	     * RFC3530 Sec. 8.1.2 last para.
322	     */
323	    if (!LIST_EMPTY(&clp->lc_deleg)) {
324		hasstate = 1;
325	    } else if (LIST_EMPTY(&clp->lc_open)) {
326		hasstate = 0;
327	    } else {
328		hasstate = 0;
329		/* Look for an Open on the OpenOwner */
330		LIST_FOREACH(stp, &clp->lc_open, ls_list) {
331		    if (!LIST_EMPTY(&stp->ls_open)) {
332			hasstate = 1;
333			break;
334		    }
335		}
336	    }
337	    if (hasstate) {
338		/*
339		 * If the uid doesn't match, return NFSERR_CLIDINUSE after
340		 * filling out the correct ipaddr and portnum.
341		 */
342		switch (clp->lc_req.nr_nam->sa_family) {
343#ifdef INET
344		case AF_INET:
345			sin = (struct sockaddr_in *)new_clp->lc_req.nr_nam;
346			rin = (struct sockaddr_in *)clp->lc_req.nr_nam;
347			sin->sin_addr.s_addr = rin->sin_addr.s_addr;
348			sin->sin_port = rin->sin_port;
349			break;
350#endif
351#ifdef INET6
352		case AF_INET6:
353			sin6 = (struct sockaddr_in6 *)new_clp->lc_req.nr_nam;
354			rin6 = (struct sockaddr_in6 *)clp->lc_req.nr_nam;
355			sin6->sin6_addr = rin6->sin6_addr;
356			sin6->sin6_port = rin6->sin6_port;
357			break;
358#endif
359		}
360		NFSLOCKV4ROOTMUTEX();
361		nfsv4_unlock(&nfsv4rootfs_lock, 1);
362		NFSUNLOCKV4ROOTMUTEX();
363		error = NFSERR_CLIDINUSE;
364		goto out;
365	    }
366	}
367
368	if (NFSBCMP(new_clp->lc_verf, clp->lc_verf, NFSX_VERF)) {
369		/*
370		 * If the verifier has changed, the client has rebooted
371		 * and a new client id is issued. The old state info
372		 * can be thrown away once the SETCLIENTID_CONFIRM occurs.
373		 */
374		LIST_REMOVE(clp, lc_hash);
375
376		/* Get rid of all sessions on this clientid. */
377		LIST_FOREACH_SAFE(sep, &clp->lc_session, sess_list, nsep) {
378			ret = nfsrv_freesession(sep, NULL);
379			if (ret != 0)
380				printf("nfsrv_setclient: verifier changed free"
381				    " session failed=%d\n", ret);
382		}
383
384		new_clp->lc_flags |= LCL_NEEDSCONFIRM;
385		if ((nd->nd_flag & ND_NFSV41) != 0)
386			new_clp->lc_confirm.lval[0] = confirmp->lval[0] =
387			    ++confirm_index;
388		else
389			confirmp->qval = new_clp->lc_confirm.qval =
390			    ++confirm_index;
391		clientidp->lval[0] = new_clp->lc_clientid.lval[0] =
392		    nfsrvboottime;
393		clientidp->lval[1] = new_clp->lc_clientid.lval[1] =
394		    nfsrv_nextclientindex();
395		new_clp->lc_stateindex = 0;
396		new_clp->lc_statemaxindex = 0;
397		new_clp->lc_cbref = 0;
398		new_clp->lc_expiry = nfsrv_leaseexpiry();
399
400		/*
401		 * Save the state until confirmed.
402		 */
403		LIST_NEWHEAD(&new_clp->lc_open, &clp->lc_open, ls_list);
404		LIST_FOREACH(tstp, &new_clp->lc_open, ls_list)
405			tstp->ls_clp = new_clp;
406		LIST_NEWHEAD(&new_clp->lc_deleg, &clp->lc_deleg, ls_list);
407		LIST_FOREACH(tstp, &new_clp->lc_deleg, ls_list)
408			tstp->ls_clp = new_clp;
409		LIST_NEWHEAD(&new_clp->lc_olddeleg, &clp->lc_olddeleg,
410		    ls_list);
411		LIST_FOREACH(tstp, &new_clp->lc_olddeleg, ls_list)
412			tstp->ls_clp = new_clp;
413		for (i = 0; i < nfsrv_statehashsize; i++) {
414			LIST_NEWHEAD(&new_clp->lc_stateid[i],
415			    &clp->lc_stateid[i], ls_hash);
416			LIST_FOREACH(tstp, &new_clp->lc_stateid[i], ls_hash)
417				tstp->ls_clp = new_clp;
418		}
419		LIST_INIT(&new_clp->lc_session);
420		LIST_INSERT_HEAD(NFSCLIENTHASH(new_clp->lc_clientid), new_clp,
421		    lc_hash);
422		nfsstatsv1.srvclients++;
423		nfsrv_openpluslock++;
424		nfsrv_clients++;
425		NFSLOCKV4ROOTMUTEX();
426		nfsv4_unlock(&nfsv4rootfs_lock, 1);
427		NFSUNLOCKV4ROOTMUTEX();
428
429		/*
430		 * Must wait until any outstanding callback on the old clp
431		 * completes.
432		 */
433		NFSLOCKSTATE();
434		while (clp->lc_cbref) {
435			clp->lc_flags |= LCL_WAKEUPWANTED;
436			(void)mtx_sleep(clp, NFSSTATEMUTEXPTR, PZERO - 1,
437			    "nfsd clp", 10 * hz);
438		}
439		NFSUNLOCKSTATE();
440		nfsrv_zapclient(clp, p);
441		*new_clpp = NULL;
442		goto out;
443	}
444
445	/* For NFSv4.1, mark that we found a confirmed clientid. */
446	if ((nd->nd_flag & ND_NFSV41) != 0) {
447		clientidp->lval[0] = clp->lc_clientid.lval[0];
448		clientidp->lval[1] = clp->lc_clientid.lval[1];
449		confirmp->lval[0] = 0;	/* Ignored by client */
450		confirmp->lval[1] = 1;
451	} else {
452		/*
453		 * id and verifier match, so update the net address info
454		 * and get rid of any existing callback authentication
455		 * handle, so a new one will be acquired.
456		 */
457		LIST_REMOVE(clp, lc_hash);
458		new_clp->lc_flags |= (LCL_NEEDSCONFIRM | LCL_DONTCLEAN);
459		new_clp->lc_expiry = nfsrv_leaseexpiry();
460		confirmp->qval = new_clp->lc_confirm.qval = ++confirm_index;
461		clientidp->lval[0] = new_clp->lc_clientid.lval[0] =
462		    clp->lc_clientid.lval[0];
463		clientidp->lval[1] = new_clp->lc_clientid.lval[1] =
464		    clp->lc_clientid.lval[1];
465		new_clp->lc_delegtime = clp->lc_delegtime;
466		new_clp->lc_stateindex = clp->lc_stateindex;
467		new_clp->lc_statemaxindex = clp->lc_statemaxindex;
468		new_clp->lc_cbref = 0;
469		LIST_NEWHEAD(&new_clp->lc_open, &clp->lc_open, ls_list);
470		LIST_FOREACH(tstp, &new_clp->lc_open, ls_list)
471			tstp->ls_clp = new_clp;
472		LIST_NEWHEAD(&new_clp->lc_deleg, &clp->lc_deleg, ls_list);
473		LIST_FOREACH(tstp, &new_clp->lc_deleg, ls_list)
474			tstp->ls_clp = new_clp;
475		LIST_NEWHEAD(&new_clp->lc_olddeleg, &clp->lc_olddeleg, ls_list);
476		LIST_FOREACH(tstp, &new_clp->lc_olddeleg, ls_list)
477			tstp->ls_clp = new_clp;
478		for (i = 0; i < nfsrv_statehashsize; i++) {
479			LIST_NEWHEAD(&new_clp->lc_stateid[i],
480			    &clp->lc_stateid[i], ls_hash);
481			LIST_FOREACH(tstp, &new_clp->lc_stateid[i], ls_hash)
482				tstp->ls_clp = new_clp;
483		}
484		LIST_INIT(&new_clp->lc_session);
485		LIST_INSERT_HEAD(NFSCLIENTHASH(new_clp->lc_clientid), new_clp,
486		    lc_hash);
487		nfsstatsv1.srvclients++;
488		nfsrv_openpluslock++;
489		nfsrv_clients++;
490	}
491	NFSLOCKV4ROOTMUTEX();
492	nfsv4_unlock(&nfsv4rootfs_lock, 1);
493	NFSUNLOCKV4ROOTMUTEX();
494
495	if ((nd->nd_flag & ND_NFSV41) == 0) {
496		/*
497		 * Must wait until any outstanding callback on the old clp
498		 * completes.
499		 */
500		NFSLOCKSTATE();
501		while (clp->lc_cbref) {
502			clp->lc_flags |= LCL_WAKEUPWANTED;
503			(void)mtx_sleep(clp, NFSSTATEMUTEXPTR, PZERO - 1,
504			    "nfsdclp", 10 * hz);
505		}
506		NFSUNLOCKSTATE();
507		nfsrv_zapclient(clp, p);
508		*new_clpp = NULL;
509	}
510
511out:
512	NFSEXITCODE2(error, nd);
513	return (error);
514}
515
516/*
517 * Check to see if the client id exists and optionally confirm it.
518 */
519int
520nfsrv_getclient(nfsquad_t clientid, int opflags, struct nfsclient **clpp,
521    struct nfsdsession *nsep, nfsquad_t confirm, uint32_t cbprogram,
522    struct nfsrv_descript *nd, NFSPROC_T *p)
523{
524	struct nfsclient *clp;
525	struct nfsstate *stp;
526	int i;
527	struct nfsclienthashhead *hp;
528	int error = 0, igotlock, doneok;
529	struct nfssessionhash *shp;
530	struct nfsdsession *sep;
531	uint64_t sessid[2];
532	static uint64_t next_sess = 0;
533
534	if (clpp)
535		*clpp = NULL;
536	if ((nd == NULL || (nd->nd_flag & ND_NFSV41) == 0 ||
537	    opflags != CLOPS_RENEW) && nfsrvboottime != clientid.lval[0]) {
538		error = NFSERR_STALECLIENTID;
539		goto out;
540	}
541
542	/*
543	 * If called with opflags == CLOPS_RENEW, the State Lock is
544	 * already held. Otherwise, we need to get either that or,
545	 * for the case of Confirm, lock out the nfsd threads.
546	 */
547	if (opflags & CLOPS_CONFIRM) {
548		NFSLOCKV4ROOTMUTEX();
549		nfsv4_relref(&nfsv4rootfs_lock);
550		do {
551			igotlock = nfsv4_lock(&nfsv4rootfs_lock, 1, NULL,
552			    NFSV4ROOTLOCKMUTEXPTR, NULL);
553		} while (!igotlock);
554		/*
555		 * Create a new sessionid here, since we need to do it where
556		 * there is a mutex held to serialize update of next_sess.
557		 */
558		if ((nd->nd_flag & ND_NFSV41) != 0) {
559			sessid[0] = ++next_sess;
560			sessid[1] = clientid.qval;
561		}
562		NFSUNLOCKV4ROOTMUTEX();
563	} else if (opflags != CLOPS_RENEW) {
564		NFSLOCKSTATE();
565	}
566
567	/* For NFSv4.1, the clp is acquired from the associated session. */
568	if (nd != NULL && (nd->nd_flag & ND_NFSV41) != 0 &&
569	    opflags == CLOPS_RENEW) {
570		clp = NULL;
571		if ((nd->nd_flag & ND_HASSEQUENCE) != 0) {
572			shp = NFSSESSIONHASH(nd->nd_sessionid);
573			NFSLOCKSESSION(shp);
574			sep = nfsrv_findsession(nd->nd_sessionid);
575			if (sep != NULL)
576				clp = sep->sess_clp;
577			NFSUNLOCKSESSION(shp);
578		}
579	} else {
580		hp = NFSCLIENTHASH(clientid);
581		LIST_FOREACH(clp, hp, lc_hash) {
582			if (clp->lc_clientid.lval[1] == clientid.lval[1])
583				break;
584		}
585	}
586	if (clp == NULL) {
587		if (opflags & CLOPS_CONFIRM)
588			error = NFSERR_STALECLIENTID;
589		else
590			error = NFSERR_EXPIRED;
591	} else if (clp->lc_flags & LCL_ADMINREVOKED) {
592		/*
593		 * If marked admin revoked, just return the error.
594		 */
595		error = NFSERR_ADMINREVOKED;
596	}
597	if (error) {
598		if (opflags & CLOPS_CONFIRM) {
599			NFSLOCKV4ROOTMUTEX();
600			nfsv4_unlock(&nfsv4rootfs_lock, 1);
601			NFSUNLOCKV4ROOTMUTEX();
602		} else if (opflags != CLOPS_RENEW) {
603			NFSUNLOCKSTATE();
604		}
605		goto out;
606	}
607
608	/*
609	 * Perform any operations specified by the opflags.
610	 */
611	if (opflags & CLOPS_CONFIRM) {
612		if (((nd->nd_flag & ND_NFSV41) != 0 &&
613		     clp->lc_confirm.lval[0] != confirm.lval[0]) ||
614		    ((nd->nd_flag & ND_NFSV41) == 0 &&
615		     clp->lc_confirm.qval != confirm.qval))
616			error = NFSERR_STALECLIENTID;
617		else if (nfsrv_notsamecredname(nd, clp))
618			error = NFSERR_CLIDINUSE;
619
620		if (!error) {
621		    if ((clp->lc_flags & (LCL_NEEDSCONFIRM | LCL_DONTCLEAN)) ==
622			LCL_NEEDSCONFIRM) {
623			/*
624			 * Hang onto the delegations (as old delegations)
625			 * for an Open with CLAIM_DELEGATE_PREV unless in
626			 * grace, but get rid of the rest of the state.
627			 */
628			nfsrv_cleanclient(clp, p);
629			nfsrv_freedeleglist(&clp->lc_olddeleg);
630			if (nfsrv_checkgrace(nd, clp, 0)) {
631			    /* In grace, so just delete delegations */
632			    nfsrv_freedeleglist(&clp->lc_deleg);
633			} else {
634			    LIST_FOREACH(stp, &clp->lc_deleg, ls_list)
635				stp->ls_flags |= NFSLCK_OLDDELEG;
636			    clp->lc_delegtime = NFSD_MONOSEC +
637				nfsrv_lease + NFSRV_LEASEDELTA;
638			    LIST_NEWHEAD(&clp->lc_olddeleg, &clp->lc_deleg,
639				ls_list);
640			}
641			if ((nd->nd_flag & ND_NFSV41) != 0)
642			    clp->lc_program = cbprogram;
643		    }
644		    clp->lc_flags &= ~(LCL_NEEDSCONFIRM | LCL_DONTCLEAN);
645		    if (clp->lc_program)
646			clp->lc_flags |= LCL_NEEDSCBNULL;
647		    /* For NFSv4.1, link the session onto the client. */
648		    if (nsep != NULL) {
649			/* Hold a reference on the xprt for a backchannel. */
650			if ((nsep->sess_crflags & NFSV4CRSESS_CONNBACKCHAN)
651			    != 0) {
652			    if (clp->lc_req.nr_client == NULL)
653				clp->lc_req.nr_client = (struct __rpc_client *)
654				    clnt_bck_create(nd->nd_xprt->xp_socket,
655				    cbprogram, NFSV4_CBVERS);
656			    if (clp->lc_req.nr_client != NULL) {
657				SVC_ACQUIRE(nd->nd_xprt);
658				nd->nd_xprt->xp_p2 =
659				    clp->lc_req.nr_client->cl_private;
660				/* Disable idle timeout. */
661				nd->nd_xprt->xp_idletimeout = 0;
662				nsep->sess_cbsess.nfsess_xprt = nd->nd_xprt;
663			    } else
664				nsep->sess_crflags &= ~NFSV4CRSESS_CONNBACKCHAN;
665			}
666			NFSBCOPY(sessid, nsep->sess_sessionid,
667			    NFSX_V4SESSIONID);
668			NFSBCOPY(sessid, nsep->sess_cbsess.nfsess_sessionid,
669			    NFSX_V4SESSIONID);
670			shp = NFSSESSIONHASH(nsep->sess_sessionid);
671			NFSLOCKSTATE();
672			NFSLOCKSESSION(shp);
673			LIST_INSERT_HEAD(&shp->list, nsep, sess_hash);
674			LIST_INSERT_HEAD(&clp->lc_session, nsep, sess_list);
675			nsep->sess_clp = clp;
676			NFSUNLOCKSESSION(shp);
677			NFSUNLOCKSTATE();
678		    }
679		}
680	} else if (clp->lc_flags & LCL_NEEDSCONFIRM) {
681		error = NFSERR_EXPIRED;
682	}
683
684	/*
685	 * If called by the Renew Op, we must check the principal.
686	 */
687	if (!error && (opflags & CLOPS_RENEWOP)) {
688	    if (nfsrv_notsamecredname(nd, clp)) {
689		doneok = 0;
690		for (i = 0; i < nfsrv_statehashsize && doneok == 0; i++) {
691		    LIST_FOREACH(stp, &clp->lc_stateid[i], ls_hash) {
692			if ((stp->ls_flags & NFSLCK_OPEN) &&
693			    stp->ls_uid == nd->nd_cred->cr_uid) {
694				doneok = 1;
695				break;
696			}
697		    }
698		}
699		if (!doneok)
700			error = NFSERR_ACCES;
701	    }
702	    if (!error && (clp->lc_flags & LCL_CBDOWN))
703		error = NFSERR_CBPATHDOWN;
704	}
705	if ((!error || error == NFSERR_CBPATHDOWN) &&
706	     (opflags & CLOPS_RENEW)) {
707		clp->lc_expiry = nfsrv_leaseexpiry();
708	}
709	if (opflags & CLOPS_CONFIRM) {
710		NFSLOCKV4ROOTMUTEX();
711		nfsv4_unlock(&nfsv4rootfs_lock, 1);
712		NFSUNLOCKV4ROOTMUTEX();
713	} else if (opflags != CLOPS_RENEW) {
714		NFSUNLOCKSTATE();
715	}
716	if (clpp)
717		*clpp = clp;
718
719out:
720	NFSEXITCODE2(error, nd);
721	return (error);
722}
723
724/*
725 * Perform the NFSv4.1 destroy clientid.
726 */
727int
728nfsrv_destroyclient(nfsquad_t clientid, NFSPROC_T *p)
729{
730	struct nfsclient *clp;
731	struct nfsclienthashhead *hp;
732	int error = 0, i, igotlock;
733
734	if (nfsrvboottime != clientid.lval[0]) {
735		error = NFSERR_STALECLIENTID;
736		goto out;
737	}
738
739	/* Lock out other nfsd threads */
740	NFSLOCKV4ROOTMUTEX();
741	nfsv4_relref(&nfsv4rootfs_lock);
742	do {
743		igotlock = nfsv4_lock(&nfsv4rootfs_lock, 1, NULL,
744		    NFSV4ROOTLOCKMUTEXPTR, NULL);
745	} while (igotlock == 0);
746	NFSUNLOCKV4ROOTMUTEX();
747
748	hp = NFSCLIENTHASH(clientid);
749	LIST_FOREACH(clp, hp, lc_hash) {
750		if (clp->lc_clientid.lval[1] == clientid.lval[1])
751			break;
752	}
753	if (clp == NULL) {
754		NFSLOCKV4ROOTMUTEX();
755		nfsv4_unlock(&nfsv4rootfs_lock, 1);
756		NFSUNLOCKV4ROOTMUTEX();
757		/* Just return ok, since it is gone. */
758		goto out;
759	}
760
761	/* Scan for state on the clientid. */
762	for (i = 0; i < nfsrv_statehashsize; i++)
763		if (!LIST_EMPTY(&clp->lc_stateid[i])) {
764			NFSLOCKV4ROOTMUTEX();
765			nfsv4_unlock(&nfsv4rootfs_lock, 1);
766			NFSUNLOCKV4ROOTMUTEX();
767			error = NFSERR_CLIENTIDBUSY;
768			goto out;
769		}
770	if (!LIST_EMPTY(&clp->lc_session) || !LIST_EMPTY(&clp->lc_deleg)) {
771		NFSLOCKV4ROOTMUTEX();
772		nfsv4_unlock(&nfsv4rootfs_lock, 1);
773		NFSUNLOCKV4ROOTMUTEX();
774		error = NFSERR_CLIENTIDBUSY;
775		goto out;
776	}
777
778	/* Destroy the clientid and return ok. */
779	nfsrv_cleanclient(clp, p);
780	nfsrv_freedeleglist(&clp->lc_deleg);
781	nfsrv_freedeleglist(&clp->lc_olddeleg);
782	LIST_REMOVE(clp, lc_hash);
783	NFSLOCKV4ROOTMUTEX();
784	nfsv4_unlock(&nfsv4rootfs_lock, 1);
785	NFSUNLOCKV4ROOTMUTEX();
786	nfsrv_zapclient(clp, p);
787out:
788	NFSEXITCODE2(error, nd);
789	return (error);
790}
791
792/*
793 * Called from the new nfssvc syscall to admin revoke a clientid.
794 * Returns 0 for success, error otherwise.
795 */
796int
797nfsrv_adminrevoke(struct nfsd_clid *revokep, NFSPROC_T *p)
798{
799	struct nfsclient *clp = NULL;
800	int i, error = 0;
801	int gotit, igotlock;
802
803	/*
804	 * First, lock out the nfsd so that state won't change while the
805	 * revocation record is being written to the stable storage restart
806	 * file.
807	 */
808	NFSLOCKV4ROOTMUTEX();
809	do {
810		igotlock = nfsv4_lock(&nfsv4rootfs_lock, 1, NULL,
811		    NFSV4ROOTLOCKMUTEXPTR, NULL);
812	} while (!igotlock);
813	NFSUNLOCKV4ROOTMUTEX();
814
815	/*
816	 * Search for a match in the client list.
817	 */
818	gotit = i = 0;
819	while (i < nfsrv_clienthashsize && !gotit) {
820	    LIST_FOREACH(clp, &nfsclienthash[i], lc_hash) {
821		if (revokep->nclid_idlen == clp->lc_idlen &&
822		    !NFSBCMP(revokep->nclid_id, clp->lc_id, clp->lc_idlen)) {
823			gotit = 1;
824			break;
825		}
826	    }
827	    i++;
828	}
829	if (!gotit) {
830		NFSLOCKV4ROOTMUTEX();
831		nfsv4_unlock(&nfsv4rootfs_lock, 0);
832		NFSUNLOCKV4ROOTMUTEX();
833		error = EPERM;
834		goto out;
835	}
836
837	/*
838	 * Now, write out the revocation record
839	 */
840	nfsrv_writestable(clp->lc_id, clp->lc_idlen, NFSNST_REVOKE, p);
841	nfsrv_backupstable();
842
843	/*
844	 * and clear out the state, marking the clientid revoked.
845	 */
846	clp->lc_flags &= ~LCL_CALLBACKSON;
847	clp->lc_flags |= LCL_ADMINREVOKED;
848	nfsrv_cleanclient(clp, p);
849	nfsrv_freedeleglist(&clp->lc_deleg);
850	nfsrv_freedeleglist(&clp->lc_olddeleg);
851	NFSLOCKV4ROOTMUTEX();
852	nfsv4_unlock(&nfsv4rootfs_lock, 0);
853	NFSUNLOCKV4ROOTMUTEX();
854
855out:
856	NFSEXITCODE(error);
857	return (error);
858}
859
860/*
861 * Dump out stats for all clients. Called from nfssvc(2), that is used
862 * nfsstatsv1.
863 */
864void
865nfsrv_dumpclients(struct nfsd_dumpclients *dumpp, int maxcnt)
866{
867	struct nfsclient *clp;
868	int i = 0, cnt = 0;
869
870	/*
871	 * First, get a reference on the nfsv4rootfs_lock so that an
872	 * exclusive lock cannot be acquired while dumping the clients.
873	 */
874	NFSLOCKV4ROOTMUTEX();
875	nfsv4_getref(&nfsv4rootfs_lock, NULL, NFSV4ROOTLOCKMUTEXPTR, NULL);
876	NFSUNLOCKV4ROOTMUTEX();
877	NFSLOCKSTATE();
878	/*
879	 * Rattle through the client lists until done.
880	 */
881	while (i < nfsrv_clienthashsize && cnt < maxcnt) {
882	    clp = LIST_FIRST(&nfsclienthash[i]);
883	    while (clp != LIST_END(&nfsclienthash[i]) && cnt < maxcnt) {
884		nfsrv_dumpaclient(clp, &dumpp[cnt]);
885		cnt++;
886		clp = LIST_NEXT(clp, lc_hash);
887	    }
888	    i++;
889	}
890	if (cnt < maxcnt)
891	    dumpp[cnt].ndcl_clid.nclid_idlen = 0;
892	NFSUNLOCKSTATE();
893	NFSLOCKV4ROOTMUTEX();
894	nfsv4_relref(&nfsv4rootfs_lock);
895	NFSUNLOCKV4ROOTMUTEX();
896}
897
898/*
899 * Dump stats for a client. Must be called with the NFSSTATELOCK and spl'd.
900 */
901static void
902nfsrv_dumpaclient(struct nfsclient *clp, struct nfsd_dumpclients *dumpp)
903{
904	struct nfsstate *stp, *openstp, *lckownstp;
905	struct nfslock *lop;
906	sa_family_t af;
907#ifdef INET
908	struct sockaddr_in *rin;
909#endif
910#ifdef INET6
911	struct sockaddr_in6 *rin6;
912#endif
913
914	dumpp->ndcl_nopenowners = dumpp->ndcl_nlockowners = 0;
915	dumpp->ndcl_nopens = dumpp->ndcl_nlocks = 0;
916	dumpp->ndcl_ndelegs = dumpp->ndcl_nolddelegs = 0;
917	dumpp->ndcl_flags = clp->lc_flags;
918	dumpp->ndcl_clid.nclid_idlen = clp->lc_idlen;
919	NFSBCOPY(clp->lc_id, dumpp->ndcl_clid.nclid_id, clp->lc_idlen);
920	af = clp->lc_req.nr_nam->sa_family;
921	dumpp->ndcl_addrfam = af;
922	switch (af) {
923#ifdef INET
924	case AF_INET:
925		rin = (struct sockaddr_in *)clp->lc_req.nr_nam;
926		dumpp->ndcl_cbaddr.sin_addr = rin->sin_addr;
927		break;
928#endif
929#ifdef INET6
930	case AF_INET6:
931		rin6 = (struct sockaddr_in6 *)clp->lc_req.nr_nam;
932		dumpp->ndcl_cbaddr.sin6_addr = rin6->sin6_addr;
933		break;
934#endif
935	}
936
937	/*
938	 * Now, scan the state lists and total up the opens and locks.
939	 */
940	LIST_FOREACH(stp, &clp->lc_open, ls_list) {
941	    dumpp->ndcl_nopenowners++;
942	    LIST_FOREACH(openstp, &stp->ls_open, ls_list) {
943		dumpp->ndcl_nopens++;
944		LIST_FOREACH(lckownstp, &openstp->ls_open, ls_list) {
945		    dumpp->ndcl_nlockowners++;
946		    LIST_FOREACH(lop, &lckownstp->ls_lock, lo_lckowner) {
947			dumpp->ndcl_nlocks++;
948		    }
949		}
950	    }
951	}
952
953	/*
954	 * and the delegation lists.
955	 */
956	LIST_FOREACH(stp, &clp->lc_deleg, ls_list) {
957	    dumpp->ndcl_ndelegs++;
958	}
959	LIST_FOREACH(stp, &clp->lc_olddeleg, ls_list) {
960	    dumpp->ndcl_nolddelegs++;
961	}
962}
963
964/*
965 * Dump out lock stats for a file.
966 */
967void
968nfsrv_dumplocks(vnode_t vp, struct nfsd_dumplocks *ldumpp, int maxcnt,
969    NFSPROC_T *p)
970{
971	struct nfsstate *stp;
972	struct nfslock *lop;
973	int cnt = 0;
974	struct nfslockfile *lfp;
975	sa_family_t af;
976#ifdef INET
977	struct sockaddr_in *rin;
978#endif
979#ifdef INET6
980	struct sockaddr_in6 *rin6;
981#endif
982	int ret;
983	fhandle_t nfh;
984
985	ret = nfsrv_getlockfh(vp, 0, NULL, &nfh, p);
986	/*
987	 * First, get a reference on the nfsv4rootfs_lock so that an
988	 * exclusive lock on it cannot be acquired while dumping the locks.
989	 */
990	NFSLOCKV4ROOTMUTEX();
991	nfsv4_getref(&nfsv4rootfs_lock, NULL, NFSV4ROOTLOCKMUTEXPTR, NULL);
992	NFSUNLOCKV4ROOTMUTEX();
993	NFSLOCKSTATE();
994	if (!ret)
995		ret = nfsrv_getlockfile(0, NULL, &lfp, &nfh, 0);
996	if (ret) {
997		ldumpp[0].ndlck_clid.nclid_idlen = 0;
998		NFSUNLOCKSTATE();
999		NFSLOCKV4ROOTMUTEX();
1000		nfsv4_relref(&nfsv4rootfs_lock);
1001		NFSUNLOCKV4ROOTMUTEX();
1002		return;
1003	}
1004
1005	/*
1006	 * For each open share on file, dump it out.
1007	 */
1008	stp = LIST_FIRST(&lfp->lf_open);
1009	while (stp != LIST_END(&lfp->lf_open) && cnt < maxcnt) {
1010		ldumpp[cnt].ndlck_flags = stp->ls_flags;
1011		ldumpp[cnt].ndlck_stateid.seqid = stp->ls_stateid.seqid;
1012		ldumpp[cnt].ndlck_stateid.other[0] = stp->ls_stateid.other[0];
1013		ldumpp[cnt].ndlck_stateid.other[1] = stp->ls_stateid.other[1];
1014		ldumpp[cnt].ndlck_stateid.other[2] = stp->ls_stateid.other[2];
1015		ldumpp[cnt].ndlck_owner.nclid_idlen =
1016		    stp->ls_openowner->ls_ownerlen;
1017		NFSBCOPY(stp->ls_openowner->ls_owner,
1018		    ldumpp[cnt].ndlck_owner.nclid_id,
1019		    stp->ls_openowner->ls_ownerlen);
1020		ldumpp[cnt].ndlck_clid.nclid_idlen = stp->ls_clp->lc_idlen;
1021		NFSBCOPY(stp->ls_clp->lc_id, ldumpp[cnt].ndlck_clid.nclid_id,
1022		    stp->ls_clp->lc_idlen);
1023		af = stp->ls_clp->lc_req.nr_nam->sa_family;
1024		ldumpp[cnt].ndlck_addrfam = af;
1025		switch (af) {
1026#ifdef INET
1027		case AF_INET:
1028			rin = (struct sockaddr_in *)stp->ls_clp->lc_req.nr_nam;
1029			ldumpp[cnt].ndlck_cbaddr.sin_addr = rin->sin_addr;
1030			break;
1031#endif
1032#ifdef INET6
1033		case AF_INET6:
1034			rin6 = (struct sockaddr_in6 *)
1035			    stp->ls_clp->lc_req.nr_nam;
1036			ldumpp[cnt].ndlck_cbaddr.sin6_addr = rin6->sin6_addr;
1037			break;
1038#endif
1039		}
1040		stp = LIST_NEXT(stp, ls_file);
1041		cnt++;
1042	}
1043
1044	/*
1045	 * and all locks.
1046	 */
1047	lop = LIST_FIRST(&lfp->lf_lock);
1048	while (lop != LIST_END(&lfp->lf_lock) && cnt < maxcnt) {
1049		stp = lop->lo_stp;
1050		ldumpp[cnt].ndlck_flags = lop->lo_flags;
1051		ldumpp[cnt].ndlck_first = lop->lo_first;
1052		ldumpp[cnt].ndlck_end = lop->lo_end;
1053		ldumpp[cnt].ndlck_stateid.seqid = stp->ls_stateid.seqid;
1054		ldumpp[cnt].ndlck_stateid.other[0] = stp->ls_stateid.other[0];
1055		ldumpp[cnt].ndlck_stateid.other[1] = stp->ls_stateid.other[1];
1056		ldumpp[cnt].ndlck_stateid.other[2] = stp->ls_stateid.other[2];
1057		ldumpp[cnt].ndlck_owner.nclid_idlen = stp->ls_ownerlen;
1058		NFSBCOPY(stp->ls_owner, ldumpp[cnt].ndlck_owner.nclid_id,
1059		    stp->ls_ownerlen);
1060		ldumpp[cnt].ndlck_clid.nclid_idlen = stp->ls_clp->lc_idlen;
1061		NFSBCOPY(stp->ls_clp->lc_id, ldumpp[cnt].ndlck_clid.nclid_id,
1062		    stp->ls_clp->lc_idlen);
1063		af = stp->ls_clp->lc_req.nr_nam->sa_family;
1064		ldumpp[cnt].ndlck_addrfam = af;
1065		switch (af) {
1066#ifdef INET
1067		case AF_INET:
1068			rin = (struct sockaddr_in *)stp->ls_clp->lc_req.nr_nam;
1069			ldumpp[cnt].ndlck_cbaddr.sin_addr = rin->sin_addr;
1070			break;
1071#endif
1072#ifdef INET6
1073		case AF_INET6:
1074			rin6 = (struct sockaddr_in6 *)
1075			    stp->ls_clp->lc_req.nr_nam;
1076			ldumpp[cnt].ndlck_cbaddr.sin6_addr = rin6->sin6_addr;
1077			break;
1078#endif
1079		}
1080		lop = LIST_NEXT(lop, lo_lckfile);
1081		cnt++;
1082	}
1083
1084	/*
1085	 * and the delegations.
1086	 */
1087	stp = LIST_FIRST(&lfp->lf_deleg);
1088	while (stp != LIST_END(&lfp->lf_deleg) && cnt < maxcnt) {
1089		ldumpp[cnt].ndlck_flags = stp->ls_flags;
1090		ldumpp[cnt].ndlck_stateid.seqid = stp->ls_stateid.seqid;
1091		ldumpp[cnt].ndlck_stateid.other[0] = stp->ls_stateid.other[0];
1092		ldumpp[cnt].ndlck_stateid.other[1] = stp->ls_stateid.other[1];
1093		ldumpp[cnt].ndlck_stateid.other[2] = stp->ls_stateid.other[2];
1094		ldumpp[cnt].ndlck_owner.nclid_idlen = 0;
1095		ldumpp[cnt].ndlck_clid.nclid_idlen = stp->ls_clp->lc_idlen;
1096		NFSBCOPY(stp->ls_clp->lc_id, ldumpp[cnt].ndlck_clid.nclid_id,
1097		    stp->ls_clp->lc_idlen);
1098		af = stp->ls_clp->lc_req.nr_nam->sa_family;
1099		ldumpp[cnt].ndlck_addrfam = af;
1100		switch (af) {
1101#ifdef INET
1102		case AF_INET:
1103			rin = (struct sockaddr_in *)stp->ls_clp->lc_req.nr_nam;
1104			ldumpp[cnt].ndlck_cbaddr.sin_addr = rin->sin_addr;
1105			break;
1106#endif
1107#ifdef INET6
1108		case AF_INET6:
1109			rin6 = (struct sockaddr_in6 *)
1110			    stp->ls_clp->lc_req.nr_nam;
1111			ldumpp[cnt].ndlck_cbaddr.sin6_addr = rin6->sin6_addr;
1112			break;
1113#endif
1114		}
1115		stp = LIST_NEXT(stp, ls_file);
1116		cnt++;
1117	}
1118
1119	/*
1120	 * If list isn't full, mark end of list by setting the client name
1121	 * to zero length.
1122	 */
1123	if (cnt < maxcnt)
1124		ldumpp[cnt].ndlck_clid.nclid_idlen = 0;
1125	NFSUNLOCKSTATE();
1126	NFSLOCKV4ROOTMUTEX();
1127	nfsv4_relref(&nfsv4rootfs_lock);
1128	NFSUNLOCKV4ROOTMUTEX();
1129}
1130
1131/*
1132 * Server timer routine. It can scan any linked list, so long
1133 * as it holds the spin/mutex lock and there is no exclusive lock on
1134 * nfsv4rootfs_lock.
1135 * (For OpenBSD, a kthread is ok. For FreeBSD, I think it is ok
1136 *  to do this from a callout, since the spin locks work. For
1137 *  Darwin, I'm not sure what will work correctly yet.)
1138 * Should be called once per second.
1139 */
1140void
1141nfsrv_servertimer(void)
1142{
1143	struct nfsclient *clp, *nclp;
1144	struct nfsstate *stp, *nstp;
1145	int got_ref, i;
1146
1147	/*
1148	 * Make sure nfsboottime is set. This is used by V3 as well
1149	 * as V4. Note that nfsboottime is not nfsrvboottime, which is
1150	 * only used by the V4 server for leases.
1151	 */
1152	if (nfsboottime.tv_sec == 0)
1153		NFSSETBOOTTIME(nfsboottime);
1154
1155	/*
1156	 * If server hasn't started yet, just return.
1157	 */
1158	NFSLOCKSTATE();
1159	if (nfsrv_stablefirst.nsf_eograce == 0) {
1160		NFSUNLOCKSTATE();
1161		return;
1162	}
1163	if (!(nfsrv_stablefirst.nsf_flags & NFSNSF_UPDATEDONE)) {
1164		if (!(nfsrv_stablefirst.nsf_flags & NFSNSF_GRACEOVER) &&
1165		    NFSD_MONOSEC > nfsrv_stablefirst.nsf_eograce)
1166			nfsrv_stablefirst.nsf_flags |=
1167			    (NFSNSF_GRACEOVER | NFSNSF_NEEDLOCK);
1168		NFSUNLOCKSTATE();
1169		return;
1170	}
1171
1172	/*
1173	 * Try and get a reference count on the nfsv4rootfs_lock so that
1174	 * no nfsd thread can acquire an exclusive lock on it before this
1175	 * call is done. If it is already exclusively locked, just return.
1176	 */
1177	NFSLOCKV4ROOTMUTEX();
1178	got_ref = nfsv4_getref_nonblock(&nfsv4rootfs_lock);
1179	NFSUNLOCKV4ROOTMUTEX();
1180	if (got_ref == 0) {
1181		NFSUNLOCKSTATE();
1182		return;
1183	}
1184
1185	/*
1186	 * For each client...
1187	 */
1188	for (i = 0; i < nfsrv_clienthashsize; i++) {
1189	    clp = LIST_FIRST(&nfsclienthash[i]);
1190	    while (clp != LIST_END(&nfsclienthash[i])) {
1191		nclp = LIST_NEXT(clp, lc_hash);
1192		if (!(clp->lc_flags & LCL_EXPIREIT)) {
1193		    if (((clp->lc_expiry + NFSRV_STALELEASE) < NFSD_MONOSEC
1194			 && ((LIST_EMPTY(&clp->lc_deleg)
1195			      && LIST_EMPTY(&clp->lc_open)) ||
1196			     nfsrv_clients > nfsrv_clienthighwater)) ||
1197			(clp->lc_expiry + NFSRV_MOULDYLEASE) < NFSD_MONOSEC ||
1198			(clp->lc_expiry < NFSD_MONOSEC &&
1199			 (nfsrv_openpluslock * 10 / 9) > nfsrv_v4statelimit)) {
1200			/*
1201			 * Lease has expired several nfsrv_lease times ago:
1202			 * PLUS
1203			 *    - no state is associated with it
1204			 *    OR
1205			 *    - above high water mark for number of clients
1206			 *      (nfsrv_clienthighwater should be large enough
1207			 *       that this only occurs when clients fail to
1208			 *       use the same nfs_client_id4.id. Maybe somewhat
1209			 *       higher that the maximum number of clients that
1210			 *       will mount this server?)
1211			 * OR
1212			 * Lease has expired a very long time ago
1213			 * OR
1214			 * Lease has expired PLUS the number of opens + locks
1215			 * has exceeded 90% of capacity
1216			 *
1217			 * --> Mark for expiry. The actual expiry will be done
1218			 *     by an nfsd sometime soon.
1219			 */
1220			clp->lc_flags |= LCL_EXPIREIT;
1221			nfsrv_stablefirst.nsf_flags |=
1222			    (NFSNSF_NEEDLOCK | NFSNSF_EXPIREDCLIENT);
1223		    } else {
1224			/*
1225			 * If there are no opens, increment no open tick cnt
1226			 * If time exceeds NFSNOOPEN, mark it to be thrown away
1227			 * otherwise, if there is an open, reset no open time
1228			 * Hopefully, this will avoid excessive re-creation
1229			 * of open owners and subsequent open confirms.
1230			 */
1231			stp = LIST_FIRST(&clp->lc_open);
1232			while (stp != LIST_END(&clp->lc_open)) {
1233				nstp = LIST_NEXT(stp, ls_list);
1234				if (LIST_EMPTY(&stp->ls_open)) {
1235					stp->ls_noopens++;
1236					if (stp->ls_noopens > NFSNOOPEN ||
1237					    (nfsrv_openpluslock * 2) >
1238					    nfsrv_v4statelimit)
1239						nfsrv_stablefirst.nsf_flags |=
1240							NFSNSF_NOOPENS;
1241				} else {
1242					stp->ls_noopens = 0;
1243				}
1244				stp = nstp;
1245			}
1246		    }
1247		}
1248		clp = nclp;
1249	    }
1250	}
1251	NFSUNLOCKSTATE();
1252	NFSLOCKV4ROOTMUTEX();
1253	nfsv4_relref(&nfsv4rootfs_lock);
1254	NFSUNLOCKV4ROOTMUTEX();
1255}
1256
1257/*
1258 * The following set of functions free up the various data structures.
1259 */
1260/*
1261 * Clear out all open/lock state related to this nfsclient.
1262 * Caller must hold an exclusive lock on nfsv4rootfs_lock, so that
1263 * there are no other active nfsd threads.
1264 */
1265void
1266nfsrv_cleanclient(struct nfsclient *clp, NFSPROC_T *p)
1267{
1268	struct nfsstate *stp, *nstp;
1269	struct nfsdsession *sep, *nsep;
1270
1271	LIST_FOREACH_SAFE(stp, &clp->lc_open, ls_list, nstp)
1272		nfsrv_freeopenowner(stp, 1, p);
1273	if ((clp->lc_flags & LCL_ADMINREVOKED) == 0)
1274		LIST_FOREACH_SAFE(sep, &clp->lc_session, sess_list, nsep)
1275			(void)nfsrv_freesession(sep, NULL);
1276}
1277
1278/*
1279 * Free a client that has been cleaned. It should also already have been
1280 * removed from the lists.
1281 * (Just to be safe w.r.t. newnfs_disconnect(), call this function when
1282 *  softclock interrupts are enabled.)
1283 */
1284void
1285nfsrv_zapclient(struct nfsclient *clp, NFSPROC_T *p)
1286{
1287
1288#ifdef notyet
1289	if ((clp->lc_flags & (LCL_GSS | LCL_CALLBACKSON)) ==
1290	     (LCL_GSS | LCL_CALLBACKSON) &&
1291	    (clp->lc_hand.nfsh_flag & NFSG_COMPLETE) &&
1292	    clp->lc_handlelen > 0) {
1293		clp->lc_hand.nfsh_flag &= ~NFSG_COMPLETE;
1294		clp->lc_hand.nfsh_flag |= NFSG_DESTROYED;
1295		(void) nfsrv_docallback(clp, NFSV4PROC_CBNULL,
1296			NULL, 0, NULL, NULL, NULL, p);
1297	}
1298#endif
1299	newnfs_disconnect(&clp->lc_req);
1300	NFSSOCKADDRFREE(clp->lc_req.nr_nam);
1301	NFSFREEMUTEX(&clp->lc_req.nr_mtx);
1302	free(clp->lc_stateid, M_NFSDCLIENT);
1303	free(clp, M_NFSDCLIENT);
1304	NFSLOCKSTATE();
1305	nfsstatsv1.srvclients--;
1306	nfsrv_openpluslock--;
1307	nfsrv_clients--;
1308	NFSUNLOCKSTATE();
1309}
1310
1311/*
1312 * Free a list of delegation state structures.
1313 * (This function will also free all nfslockfile structures that no
1314 *  longer have associated state.)
1315 */
1316void
1317nfsrv_freedeleglist(struct nfsstatehead *sthp)
1318{
1319	struct nfsstate *stp, *nstp;
1320
1321	LIST_FOREACH_SAFE(stp, sthp, ls_list, nstp) {
1322		nfsrv_freedeleg(stp);
1323	}
1324	LIST_INIT(sthp);
1325}
1326
1327/*
1328 * Free up a delegation.
1329 */
1330static void
1331nfsrv_freedeleg(struct nfsstate *stp)
1332{
1333	struct nfslockfile *lfp;
1334
1335	LIST_REMOVE(stp, ls_hash);
1336	LIST_REMOVE(stp, ls_list);
1337	LIST_REMOVE(stp, ls_file);
1338	if ((stp->ls_flags & NFSLCK_DELEGWRITE) != 0)
1339		nfsrv_writedelegcnt--;
1340	lfp = stp->ls_lfp;
1341	if (LIST_EMPTY(&lfp->lf_open) &&
1342	    LIST_EMPTY(&lfp->lf_lock) && LIST_EMPTY(&lfp->lf_deleg) &&
1343	    LIST_EMPTY(&lfp->lf_locallock) && LIST_EMPTY(&lfp->lf_rollback) &&
1344	    lfp->lf_usecount == 0 &&
1345	    nfsv4_testlock(&lfp->lf_locallock_lck) == 0)
1346		nfsrv_freenfslockfile(lfp);
1347	FREE((caddr_t)stp, M_NFSDSTATE);
1348	nfsstatsv1.srvdelegates--;
1349	nfsrv_openpluslock--;
1350	nfsrv_delegatecnt--;
1351}
1352
1353/*
1354 * This function frees an open owner and all associated opens.
1355 */
1356static void
1357nfsrv_freeopenowner(struct nfsstate *stp, int cansleep, NFSPROC_T *p)
1358{
1359	struct nfsstate *nstp, *tstp;
1360
1361	LIST_REMOVE(stp, ls_list);
1362	/*
1363	 * Now, free all associated opens.
1364	 */
1365	nstp = LIST_FIRST(&stp->ls_open);
1366	while (nstp != LIST_END(&stp->ls_open)) {
1367		tstp = nstp;
1368		nstp = LIST_NEXT(nstp, ls_list);
1369		(void) nfsrv_freeopen(tstp, NULL, cansleep, p);
1370	}
1371	if (stp->ls_op)
1372		nfsrvd_derefcache(stp->ls_op);
1373	FREE((caddr_t)stp, M_NFSDSTATE);
1374	nfsstatsv1.srvopenowners--;
1375	nfsrv_openpluslock--;
1376}
1377
1378/*
1379 * This function frees an open (nfsstate open structure) with all associated
1380 * lock_owners and locks. It also frees the nfslockfile structure iff there
1381 * are no other opens on the file.
1382 * Returns 1 if it free'd the nfslockfile, 0 otherwise.
1383 */
1384static int
1385nfsrv_freeopen(struct nfsstate *stp, vnode_t vp, int cansleep, NFSPROC_T *p)
1386{
1387	struct nfsstate *nstp, *tstp;
1388	struct nfslockfile *lfp;
1389	int ret;
1390
1391	LIST_REMOVE(stp, ls_hash);
1392	LIST_REMOVE(stp, ls_list);
1393	LIST_REMOVE(stp, ls_file);
1394
1395	lfp = stp->ls_lfp;
1396	/*
1397	 * Now, free all lockowners associated with this open.
1398	 */
1399	LIST_FOREACH_SAFE(tstp, &stp->ls_open, ls_list, nstp)
1400		nfsrv_freelockowner(tstp, vp, cansleep, p);
1401
1402	/*
1403	 * The nfslockfile is freed here if there are no locks
1404	 * associated with the open.
1405	 * If there are locks associated with the open, the
1406	 * nfslockfile structure can be freed via nfsrv_freelockowner().
1407	 * Acquire the state mutex to avoid races with calls to
1408	 * nfsrv_getlockfile().
1409	 */
1410	if (cansleep != 0)
1411		NFSLOCKSTATE();
1412	if (lfp != NULL && LIST_EMPTY(&lfp->lf_open) &&
1413	    LIST_EMPTY(&lfp->lf_deleg) && LIST_EMPTY(&lfp->lf_lock) &&
1414	    LIST_EMPTY(&lfp->lf_locallock) && LIST_EMPTY(&lfp->lf_rollback) &&
1415	    lfp->lf_usecount == 0 &&
1416	    (cansleep != 0 || nfsv4_testlock(&lfp->lf_locallock_lck) == 0)) {
1417		nfsrv_freenfslockfile(lfp);
1418		ret = 1;
1419	} else
1420		ret = 0;
1421	if (cansleep != 0)
1422		NFSUNLOCKSTATE();
1423	FREE((caddr_t)stp, M_NFSDSTATE);
1424	nfsstatsv1.srvopens--;
1425	nfsrv_openpluslock--;
1426	return (ret);
1427}
1428
1429/*
1430 * Frees a lockowner and all associated locks.
1431 */
1432static void
1433nfsrv_freelockowner(struct nfsstate *stp, vnode_t vp, int cansleep,
1434    NFSPROC_T *p)
1435{
1436
1437	LIST_REMOVE(stp, ls_hash);
1438	LIST_REMOVE(stp, ls_list);
1439	nfsrv_freeallnfslocks(stp, vp, cansleep, p);
1440	if (stp->ls_op)
1441		nfsrvd_derefcache(stp->ls_op);
1442	FREE((caddr_t)stp, M_NFSDSTATE);
1443	nfsstatsv1.srvlockowners--;
1444	nfsrv_openpluslock--;
1445}
1446
1447/*
1448 * Free all the nfs locks on a lockowner.
1449 */
1450static void
1451nfsrv_freeallnfslocks(struct nfsstate *stp, vnode_t vp, int cansleep,
1452    NFSPROC_T *p)
1453{
1454	struct nfslock *lop, *nlop;
1455	struct nfsrollback *rlp, *nrlp;
1456	struct nfslockfile *lfp = NULL;
1457	int gottvp = 0;
1458	vnode_t tvp = NULL;
1459	uint64_t first, end;
1460
1461	if (vp != NULL)
1462		ASSERT_VOP_UNLOCKED(vp, "nfsrv_freeallnfslocks: vnode locked");
1463	lop = LIST_FIRST(&stp->ls_lock);
1464	while (lop != LIST_END(&stp->ls_lock)) {
1465		nlop = LIST_NEXT(lop, lo_lckowner);
1466		/*
1467		 * Since all locks should be for the same file, lfp should
1468		 * not change.
1469		 */
1470		if (lfp == NULL)
1471			lfp = lop->lo_lfp;
1472		else if (lfp != lop->lo_lfp)
1473			panic("allnfslocks");
1474		/*
1475		 * If vp is NULL and cansleep != 0, a vnode must be acquired
1476		 * from the file handle. This only occurs when called from
1477		 * nfsrv_cleanclient().
1478		 */
1479		if (gottvp == 0) {
1480			if (nfsrv_dolocallocks == 0)
1481				tvp = NULL;
1482			else if (vp == NULL && cansleep != 0) {
1483				tvp = nfsvno_getvp(&lfp->lf_fh);
1484				if (tvp != NULL)
1485					NFSVOPUNLOCK(tvp, 0);
1486			} else
1487				tvp = vp;
1488			gottvp = 1;
1489		}
1490
1491		if (tvp != NULL) {
1492			if (cansleep == 0)
1493				panic("allnfs2");
1494			first = lop->lo_first;
1495			end = lop->lo_end;
1496			nfsrv_freenfslock(lop);
1497			nfsrv_localunlock(tvp, lfp, first, end, p);
1498			LIST_FOREACH_SAFE(rlp, &lfp->lf_rollback, rlck_list,
1499			    nrlp)
1500				free(rlp, M_NFSDROLLBACK);
1501			LIST_INIT(&lfp->lf_rollback);
1502		} else
1503			nfsrv_freenfslock(lop);
1504		lop = nlop;
1505	}
1506	if (vp == NULL && tvp != NULL)
1507		vrele(tvp);
1508}
1509
1510/*
1511 * Free an nfslock structure.
1512 */
1513static void
1514nfsrv_freenfslock(struct nfslock *lop)
1515{
1516
1517	if (lop->lo_lckfile.le_prev != NULL) {
1518		LIST_REMOVE(lop, lo_lckfile);
1519		nfsstatsv1.srvlocks--;
1520		nfsrv_openpluslock--;
1521	}
1522	LIST_REMOVE(lop, lo_lckowner);
1523	FREE((caddr_t)lop, M_NFSDLOCK);
1524}
1525
1526/*
1527 * This function frees an nfslockfile structure.
1528 */
1529static void
1530nfsrv_freenfslockfile(struct nfslockfile *lfp)
1531{
1532
1533	LIST_REMOVE(lfp, lf_hash);
1534	FREE((caddr_t)lfp, M_NFSDLOCKFILE);
1535}
1536
1537/*
1538 * This function looks up an nfsstate structure via stateid.
1539 */
1540static int
1541nfsrv_getstate(struct nfsclient *clp, nfsv4stateid_t *stateidp, __unused u_int32_t flags,
1542    struct nfsstate **stpp)
1543{
1544	struct nfsstate *stp;
1545	struct nfsstatehead *hp;
1546	int error = 0;
1547
1548	*stpp = NULL;
1549	hp = NFSSTATEHASH(clp, *stateidp);
1550	LIST_FOREACH(stp, hp, ls_hash) {
1551		if (!NFSBCMP(stp->ls_stateid.other, stateidp->other,
1552			NFSX_STATEIDOTHER))
1553			break;
1554	}
1555
1556	/*
1557	 * If no state id in list, return NFSERR_BADSTATEID.
1558	 */
1559	if (stp == LIST_END(hp)) {
1560		error = NFSERR_BADSTATEID;
1561		goto out;
1562	}
1563	*stpp = stp;
1564
1565out:
1566	NFSEXITCODE(error);
1567	return (error);
1568}
1569
1570/*
1571 * This function gets an nfsstate structure via owner string.
1572 */
1573static void
1574nfsrv_getowner(struct nfsstatehead *hp, struct nfsstate *new_stp,
1575    struct nfsstate **stpp)
1576{
1577	struct nfsstate *stp;
1578
1579	*stpp = NULL;
1580	LIST_FOREACH(stp, hp, ls_list) {
1581		if (new_stp->ls_ownerlen == stp->ls_ownerlen &&
1582		  !NFSBCMP(new_stp->ls_owner,stp->ls_owner,stp->ls_ownerlen)) {
1583			*stpp = stp;
1584			return;
1585		}
1586	}
1587}
1588
1589/*
1590 * Lock control function called to update lock status.
1591 * Returns 0 upon success, -1 if there is no lock and the flags indicate
1592 * that one isn't to be created and an NFSERR_xxx for other errors.
1593 * The structures new_stp and new_lop are passed in as pointers that should
1594 * be set to NULL if the structure is used and shouldn't be free'd.
1595 * For the NFSLCK_TEST and NFSLCK_CHECK cases, the structures are
1596 * never used and can safely be allocated on the stack. For all other
1597 * cases, *new_stpp and *new_lopp should be malloc'd before the call,
1598 * in case they are used.
1599 */
1600int
1601nfsrv_lockctrl(vnode_t vp, struct nfsstate **new_stpp,
1602    struct nfslock **new_lopp, struct nfslockconflict *cfp,
1603    nfsquad_t clientid, nfsv4stateid_t *stateidp,
1604    __unused struct nfsexstuff *exp,
1605    struct nfsrv_descript *nd, NFSPROC_T *p)
1606{
1607	struct nfslock *lop;
1608	struct nfsstate *new_stp = *new_stpp;
1609	struct nfslock *new_lop = *new_lopp;
1610	struct nfsstate *tstp, *mystp, *nstp;
1611	int specialid = 0;
1612	struct nfslockfile *lfp;
1613	struct nfslock *other_lop = NULL;
1614	struct nfsstate *stp, *lckstp = NULL;
1615	struct nfsclient *clp = NULL;
1616	u_int32_t bits;
1617	int error = 0, haslock = 0, ret, reterr;
1618	int getlckret, delegation = 0, filestruct_locked, vnode_unlocked = 0;
1619	fhandle_t nfh;
1620	uint64_t first, end;
1621	uint32_t lock_flags;
1622
1623	if (new_stp->ls_flags & (NFSLCK_CHECK | NFSLCK_SETATTR)) {
1624		/*
1625		 * Note the special cases of "all 1s" or "all 0s" stateids and
1626		 * let reads with all 1s go ahead.
1627		 */
1628		if (new_stp->ls_stateid.seqid == 0x0 &&
1629		    new_stp->ls_stateid.other[0] == 0x0 &&
1630		    new_stp->ls_stateid.other[1] == 0x0 &&
1631		    new_stp->ls_stateid.other[2] == 0x0)
1632			specialid = 1;
1633		else if (new_stp->ls_stateid.seqid == 0xffffffff &&
1634		    new_stp->ls_stateid.other[0] == 0xffffffff &&
1635		    new_stp->ls_stateid.other[1] == 0xffffffff &&
1636		    new_stp->ls_stateid.other[2] == 0xffffffff)
1637			specialid = 2;
1638	}
1639
1640	/*
1641	 * Check for restart conditions (client and server).
1642	 */
1643	error = nfsrv_checkrestart(clientid, new_stp->ls_flags,
1644	    &new_stp->ls_stateid, specialid);
1645	if (error)
1646		goto out;
1647
1648	/*
1649	 * Check for state resource limit exceeded.
1650	 */
1651	if ((new_stp->ls_flags & NFSLCK_LOCK) &&
1652	    nfsrv_openpluslock > nfsrv_v4statelimit) {
1653		error = NFSERR_RESOURCE;
1654		goto out;
1655	}
1656
1657	/*
1658	 * For the lock case, get another nfslock structure,
1659	 * just in case we need it.
1660	 * Malloc now, before we start sifting through the linked lists,
1661	 * in case we have to wait for memory.
1662	 */
1663tryagain:
1664	if (new_stp->ls_flags & NFSLCK_LOCK)
1665		MALLOC(other_lop, struct nfslock *, sizeof (struct nfslock),
1666		    M_NFSDLOCK, M_WAITOK);
1667	filestruct_locked = 0;
1668	reterr = 0;
1669	lfp = NULL;
1670
1671	/*
1672	 * Get the lockfile structure for CFH now, so we can do a sanity
1673	 * check against the stateid, before incrementing the seqid#, since
1674	 * we want to return NFSERR_BADSTATEID on failure and the seqid#
1675	 * shouldn't be incremented for this case.
1676	 * If nfsrv_getlockfile() returns -1, it means "not found", which
1677	 * will be handled later.
1678	 * If we are doing Lock/LockU and local locking is enabled, sleep
1679	 * lock the nfslockfile structure.
1680	 */
1681	getlckret = nfsrv_getlockfh(vp, new_stp->ls_flags, NULL, &nfh, p);
1682	NFSLOCKSTATE();
1683	if (getlckret == 0) {
1684		if ((new_stp->ls_flags & (NFSLCK_LOCK | NFSLCK_UNLOCK)) != 0 &&
1685		    nfsrv_dolocallocks != 0 && nd->nd_repstat == 0) {
1686			getlckret = nfsrv_getlockfile(new_stp->ls_flags, NULL,
1687			    &lfp, &nfh, 1);
1688			if (getlckret == 0)
1689				filestruct_locked = 1;
1690		} else
1691			getlckret = nfsrv_getlockfile(new_stp->ls_flags, NULL,
1692			    &lfp, &nfh, 0);
1693	}
1694	if (getlckret != 0 && getlckret != -1)
1695		reterr = getlckret;
1696
1697	if (filestruct_locked != 0) {
1698		LIST_INIT(&lfp->lf_rollback);
1699		if ((new_stp->ls_flags & NFSLCK_LOCK)) {
1700			/*
1701			 * For local locking, do the advisory locking now, so
1702			 * that any conflict can be detected. A failure later
1703			 * can be rolled back locally. If an error is returned,
1704			 * struct nfslockfile has been unlocked and any local
1705			 * locking rolled back.
1706			 */
1707			NFSUNLOCKSTATE();
1708			if (vnode_unlocked == 0) {
1709				ASSERT_VOP_ELOCKED(vp, "nfsrv_lockctrl1");
1710				vnode_unlocked = 1;
1711				NFSVOPUNLOCK(vp, 0);
1712			}
1713			reterr = nfsrv_locallock(vp, lfp,
1714			    (new_lop->lo_flags & (NFSLCK_READ | NFSLCK_WRITE)),
1715			    new_lop->lo_first, new_lop->lo_end, cfp, p);
1716			NFSLOCKSTATE();
1717		}
1718	}
1719
1720	if (specialid == 0) {
1721	    if (new_stp->ls_flags & NFSLCK_TEST) {
1722		/*
1723		 * RFC 3530 does not list LockT as an op that renews a
1724		 * lease, but the consensus seems to be that it is ok
1725		 * for a server to do so.
1726		 */
1727		error = nfsrv_getclient(clientid, CLOPS_RENEW, &clp, NULL,
1728		    (nfsquad_t)((u_quad_t)0), 0, nd, p);
1729
1730		/*
1731		 * Since NFSERR_EXPIRED, NFSERR_ADMINREVOKED are not valid
1732		 * error returns for LockT, just go ahead and test for a lock,
1733		 * since there are no locks for this client, but other locks
1734		 * can conflict. (ie. same client will always be false)
1735		 */
1736		if (error == NFSERR_EXPIRED || error == NFSERR_ADMINREVOKED)
1737		    error = 0;
1738		lckstp = new_stp;
1739	    } else {
1740	      error = nfsrv_getclient(clientid, CLOPS_RENEW, &clp, NULL,
1741		(nfsquad_t)((u_quad_t)0), 0, nd, p);
1742	      if (error == 0)
1743		/*
1744		 * Look up the stateid
1745		 */
1746		error = nfsrv_getstate(clp, &new_stp->ls_stateid,
1747		  new_stp->ls_flags, &stp);
1748	      /*
1749	       * do some sanity checks for an unconfirmed open or a
1750	       * stateid that refers to the wrong file, for an open stateid
1751	       */
1752	      if (error == 0 && (stp->ls_flags & NFSLCK_OPEN) &&
1753		  ((stp->ls_openowner->ls_flags & NFSLCK_NEEDSCONFIRM) ||
1754		   (getlckret == 0 && stp->ls_lfp != lfp))){
1755		      /*
1756		       * NFSLCK_SETATTR should return OK rather than NFSERR_BADSTATEID
1757		       * The only exception is using SETATTR with SIZE.
1758		       * */
1759                    if ((new_stp->ls_flags &
1760                         (NFSLCK_SETATTR | NFSLCK_CHECK)) != NFSLCK_SETATTR)
1761			     error = NFSERR_BADSTATEID;
1762	      }
1763
1764		if (error == 0 &&
1765		  (stp->ls_flags & (NFSLCK_DELEGREAD | NFSLCK_DELEGWRITE)) &&
1766		  getlckret == 0 && stp->ls_lfp != lfp)
1767			error = NFSERR_BADSTATEID;
1768
1769	      /*
1770	       * If the lockowner stateid doesn't refer to the same file,
1771	       * I believe that is considered ok, since some clients will
1772	       * only create a single lockowner and use that for all locks
1773	       * on all files.
1774	       * For now, log it as a diagnostic, instead of considering it
1775	       * a BadStateid.
1776	       */
1777	      if (error == 0 && (stp->ls_flags &
1778		  (NFSLCK_OPEN | NFSLCK_DELEGREAD | NFSLCK_DELEGWRITE)) == 0 &&
1779		  getlckret == 0 && stp->ls_lfp != lfp) {
1780#ifdef DIAGNOSTIC
1781		  printf("Got a lock statid for different file open\n");
1782#endif
1783		  /*
1784		  error = NFSERR_BADSTATEID;
1785		  */
1786	      }
1787
1788	      if (error == 0) {
1789		    if (new_stp->ls_flags & NFSLCK_OPENTOLOCK) {
1790			/*
1791			 * If haslock set, we've already checked the seqid.
1792			 */
1793			if (!haslock) {
1794			    if (stp->ls_flags & NFSLCK_OPEN)
1795				error = nfsrv_checkseqid(nd, new_stp->ls_seq,
1796				    stp->ls_openowner, new_stp->ls_op);
1797			    else
1798				error = NFSERR_BADSTATEID;
1799			}
1800			if (!error)
1801			    nfsrv_getowner(&stp->ls_open, new_stp, &lckstp);
1802			if (lckstp) {
1803			    /*
1804			     * For NFSv4.1 and NFSv4.2 allow an
1805			     * open_to_lock_owner when the lock_owner already
1806			     * exists.  Just clear NFSLCK_OPENTOLOCK so that
1807			     * a new lock_owner will not be created.
1808			     * RFC7530 states that the error for NFSv4.0
1809			     * is NFS4ERR_BAD_SEQID.
1810			     */
1811			    if ((nd->nd_flag & ND_NFSV41) != 0)
1812				new_stp->ls_flags &= ~NFSLCK_OPENTOLOCK;
1813			    else
1814				error = NFSERR_BADSEQID;
1815			} else
1816			    lckstp = new_stp;
1817		    } else if (new_stp->ls_flags&(NFSLCK_LOCK|NFSLCK_UNLOCK)) {
1818			/*
1819			 * If haslock set, ditto above.
1820			 */
1821			if (!haslock) {
1822			    if (stp->ls_flags & NFSLCK_OPEN)
1823				error = NFSERR_BADSTATEID;
1824			    else
1825				error = nfsrv_checkseqid(nd, new_stp->ls_seq,
1826				    stp, new_stp->ls_op);
1827			}
1828			lckstp = stp;
1829		    } else {
1830			lckstp = stp;
1831		    }
1832	      }
1833	      /*
1834	       * If the seqid part of the stateid isn't the same, return
1835	       * NFSERR_OLDSTATEID for cases other than I/O Ops.
1836	       * For I/O Ops, only return NFSERR_OLDSTATEID if
1837	       * nfsrv_returnoldstateid is set. (The consensus on the email
1838	       * list was that most clients would prefer to not receive
1839	       * NFSERR_OLDSTATEID for I/O Ops, but the RFC suggests that that
1840	       * is what will happen, so I use the nfsrv_returnoldstateid to
1841	       * allow for either server configuration.)
1842	       */
1843	      if (!error && stp->ls_stateid.seqid!=new_stp->ls_stateid.seqid &&
1844		  (((nd->nd_flag & ND_NFSV41) == 0 &&
1845		   (!(new_stp->ls_flags & NFSLCK_CHECK) ||
1846		    nfsrv_returnoldstateid)) ||
1847		   ((nd->nd_flag & ND_NFSV41) != 0 &&
1848		    new_stp->ls_stateid.seqid != 0)))
1849		    error = NFSERR_OLDSTATEID;
1850	    }
1851	}
1852
1853	/*
1854	 * Now we can check for grace.
1855	 */
1856	if (!error)
1857		error = nfsrv_checkgrace(nd, clp, new_stp->ls_flags);
1858	if ((new_stp->ls_flags & NFSLCK_RECLAIM) && !error &&
1859		nfsrv_checkstable(clp))
1860		error = NFSERR_NOGRACE;
1861	/*
1862	 * If we successfully Reclaimed state, note that.
1863	 */
1864	if ((new_stp->ls_flags & NFSLCK_RECLAIM) && !error)
1865		nfsrv_markstable(clp);
1866
1867	/*
1868	 * At this point, either error == NFSERR_BADSTATEID or the
1869	 * seqid# has been updated, so we can return any error.
1870	 * If error == 0, there may be an error in:
1871	 *    nd_repstat - Set by the calling function.
1872	 *    reterr - Set above, if getting the nfslockfile structure
1873	 *       or acquiring the local lock failed.
1874	 *    (If both of these are set, nd_repstat should probably be
1875	 *     returned, since that error was detected before this
1876	 *     function call.)
1877	 */
1878	if (error != 0 || nd->nd_repstat != 0 || reterr != 0) {
1879		if (error == 0) {
1880			if (nd->nd_repstat != 0)
1881				error = nd->nd_repstat;
1882			else
1883				error = reterr;
1884		}
1885		if (filestruct_locked != 0) {
1886			/* Roll back local locks. */
1887			NFSUNLOCKSTATE();
1888			if (vnode_unlocked == 0) {
1889				ASSERT_VOP_ELOCKED(vp, "nfsrv_lockctrl2");
1890				vnode_unlocked = 1;
1891				NFSVOPUNLOCK(vp, 0);
1892			}
1893			nfsrv_locallock_rollback(vp, lfp, p);
1894			NFSLOCKSTATE();
1895			nfsrv_unlocklf(lfp);
1896		}
1897		NFSUNLOCKSTATE();
1898		goto out;
1899	}
1900
1901	/*
1902	 * Check the nfsrv_getlockfile return.
1903	 * Returned -1 if no structure found.
1904	 */
1905	if (getlckret == -1) {
1906		error = NFSERR_EXPIRED;
1907		/*
1908		 * Called from lockt, so no lock is OK.
1909		 */
1910		if (new_stp->ls_flags & NFSLCK_TEST) {
1911			error = 0;
1912		} else if (new_stp->ls_flags &
1913		    (NFSLCK_CHECK | NFSLCK_SETATTR)) {
1914			/*
1915			 * Called to check for a lock, OK if the stateid is all
1916			 * 1s or all 0s, but there should be an nfsstate
1917			 * otherwise.
1918			 * (ie. If there is no open, I'll assume no share
1919			 *  deny bits.)
1920			 */
1921			if (specialid)
1922				error = 0;
1923			else
1924				error = NFSERR_BADSTATEID;
1925		}
1926		NFSUNLOCKSTATE();
1927		goto out;
1928	}
1929
1930	/*
1931	 * For NFSLCK_CHECK and NFSLCK_LOCK, test for a share conflict.
1932	 * For NFSLCK_CHECK, allow a read if write access is granted,
1933	 * but check for a deny. For NFSLCK_LOCK, require correct access,
1934	 * which implies a conflicting deny can't exist.
1935	 */
1936	if (new_stp->ls_flags & (NFSLCK_CHECK | NFSLCK_LOCK)) {
1937	    /*
1938	     * Four kinds of state id:
1939	     * - specialid (all 0s or all 1s), only for NFSLCK_CHECK
1940	     * - stateid for an open
1941	     * - stateid for a delegation
1942	     * - stateid for a lock owner
1943	     */
1944	    if (!specialid) {
1945		if (stp->ls_flags & (NFSLCK_DELEGREAD | NFSLCK_DELEGWRITE)) {
1946		    delegation = 1;
1947		    mystp = stp;
1948		    nfsrv_delaydelegtimeout(stp);
1949	        } else if (stp->ls_flags & NFSLCK_OPEN) {
1950		    mystp = stp;
1951		} else {
1952		    mystp = stp->ls_openstp;
1953		}
1954		/*
1955		 * If locking or checking, require correct access
1956		 * bit set.
1957		 */
1958		if (((new_stp->ls_flags & NFSLCK_LOCK) &&
1959		     !((new_lop->lo_flags >> NFSLCK_LOCKSHIFT) &
1960		       mystp->ls_flags & NFSLCK_ACCESSBITS)) ||
1961		    ((new_stp->ls_flags & (NFSLCK_CHECK|NFSLCK_READACCESS)) ==
1962		      (NFSLCK_CHECK | NFSLCK_READACCESS) &&
1963		     !(mystp->ls_flags & NFSLCK_READACCESS) &&
1964		     nfsrv_allowreadforwriteopen == 0) ||
1965		    ((new_stp->ls_flags & (NFSLCK_CHECK|NFSLCK_WRITEACCESS)) ==
1966		      (NFSLCK_CHECK | NFSLCK_WRITEACCESS) &&
1967		     !(mystp->ls_flags & NFSLCK_WRITEACCESS))) {
1968			if (filestruct_locked != 0) {
1969				/* Roll back local locks. */
1970				NFSUNLOCKSTATE();
1971				if (vnode_unlocked == 0) {
1972					ASSERT_VOP_ELOCKED(vp,
1973					    "nfsrv_lockctrl3");
1974					vnode_unlocked = 1;
1975					NFSVOPUNLOCK(vp, 0);
1976				}
1977				nfsrv_locallock_rollback(vp, lfp, p);
1978				NFSLOCKSTATE();
1979				nfsrv_unlocklf(lfp);
1980			}
1981			NFSUNLOCKSTATE();
1982			error = NFSERR_OPENMODE;
1983			goto out;
1984		}
1985	    } else
1986		mystp = NULL;
1987	    if ((new_stp->ls_flags & NFSLCK_CHECK) && !delegation) {
1988		/*
1989		 * Check for a conflicting deny bit.
1990		 */
1991		LIST_FOREACH(tstp, &lfp->lf_open, ls_file) {
1992		    if (tstp != mystp) {
1993			bits = tstp->ls_flags;
1994			bits >>= NFSLCK_SHIFT;
1995			if (new_stp->ls_flags & bits & NFSLCK_ACCESSBITS) {
1996			    KASSERT(vnode_unlocked == 0,
1997				("nfsrv_lockctrl: vnode unlocked1"));
1998			    ret = nfsrv_clientconflict(tstp->ls_clp, &haslock,
1999				vp, p);
2000			    if (ret == 1) {
2001				/*
2002				* nfsrv_clientconflict unlocks state
2003				 * when it returns non-zero.
2004				 */
2005				lckstp = NULL;
2006				goto tryagain;
2007			    }
2008			    if (ret == 0)
2009				NFSUNLOCKSTATE();
2010			    if (ret == 2)
2011				error = NFSERR_PERM;
2012			    else
2013				error = NFSERR_OPENMODE;
2014			    goto out;
2015			}
2016		    }
2017		}
2018
2019		/* We're outta here */
2020		NFSUNLOCKSTATE();
2021		goto out;
2022	    }
2023	}
2024
2025	/*
2026	 * For setattr, just get rid of all the Delegations for other clients.
2027	 */
2028	if (new_stp->ls_flags & NFSLCK_SETATTR) {
2029		KASSERT(vnode_unlocked == 0,
2030		    ("nfsrv_lockctrl: vnode unlocked2"));
2031		ret = nfsrv_cleandeleg(vp, lfp, clp, &haslock, p);
2032		if (ret) {
2033			/*
2034			 * nfsrv_cleandeleg() unlocks state when it
2035			 * returns non-zero.
2036			 */
2037			if (ret == -1) {
2038				lckstp = NULL;
2039				goto tryagain;
2040			}
2041			error = ret;
2042			goto out;
2043		}
2044		if (!(new_stp->ls_flags & NFSLCK_CHECK) ||
2045		    (LIST_EMPTY(&lfp->lf_open) && LIST_EMPTY(&lfp->lf_lock) &&
2046		     LIST_EMPTY(&lfp->lf_deleg))) {
2047			NFSUNLOCKSTATE();
2048			goto out;
2049		}
2050	}
2051
2052	/*
2053	 * Check for a conflicting delegation. If one is found, call
2054	 * nfsrv_delegconflict() to handle it. If the v4root lock hasn't
2055	 * been set yet, it will get the lock. Otherwise, it will recall
2056	 * the delegation. Then, we try try again...
2057	 * I currently believe the conflict algorithm to be:
2058	 * For Lock Ops (Lock/LockT/LockU)
2059	 * - there is a conflict iff a different client has a write delegation
2060	 * For Reading (Read Op)
2061	 * - there is a conflict iff a different client has a write delegation
2062	 *   (the specialids are always a different client)
2063	 * For Writing (Write/Setattr of size)
2064	 * - there is a conflict if a different client has any delegation
2065	 * - there is a conflict if the same client has a read delegation
2066	 *   (I don't understand why this isn't allowed, but that seems to be
2067	 *    the current consensus?)
2068	 */
2069	tstp = LIST_FIRST(&lfp->lf_deleg);
2070	while (tstp != LIST_END(&lfp->lf_deleg)) {
2071	    nstp = LIST_NEXT(tstp, ls_file);
2072	    if ((((new_stp->ls_flags&(NFSLCK_LOCK|NFSLCK_UNLOCK|NFSLCK_TEST))||
2073		 ((new_stp->ls_flags & NFSLCK_CHECK) &&
2074		  (new_lop->lo_flags & NFSLCK_READ))) &&
2075		  clp != tstp->ls_clp &&
2076		 (tstp->ls_flags & NFSLCK_DELEGWRITE)) ||
2077		 ((new_stp->ls_flags & NFSLCK_CHECK) &&
2078		   (new_lop->lo_flags & NFSLCK_WRITE) &&
2079		  (clp != tstp->ls_clp ||
2080		   (tstp->ls_flags & NFSLCK_DELEGREAD)))) {
2081		ret = 0;
2082		if (filestruct_locked != 0) {
2083			/* Roll back local locks. */
2084			NFSUNLOCKSTATE();
2085			if (vnode_unlocked == 0) {
2086				ASSERT_VOP_ELOCKED(vp, "nfsrv_lockctrl4");
2087				NFSVOPUNLOCK(vp, 0);
2088			}
2089			nfsrv_locallock_rollback(vp, lfp, p);
2090			NFSLOCKSTATE();
2091			nfsrv_unlocklf(lfp);
2092			NFSUNLOCKSTATE();
2093			NFSVOPLOCK(vp, LK_EXCLUSIVE | LK_RETRY);
2094			vnode_unlocked = 0;
2095			if ((vp->v_iflag & VI_DOOMED) != 0)
2096				ret = NFSERR_SERVERFAULT;
2097			NFSLOCKSTATE();
2098		}
2099		if (ret == 0)
2100			ret = nfsrv_delegconflict(tstp, &haslock, p, vp);
2101		if (ret) {
2102		    /*
2103		     * nfsrv_delegconflict unlocks state when it
2104		     * returns non-zero, which it always does.
2105		     */
2106		    if (other_lop) {
2107			FREE((caddr_t)other_lop, M_NFSDLOCK);
2108			other_lop = NULL;
2109		    }
2110		    if (ret == -1) {
2111			lckstp = NULL;
2112			goto tryagain;
2113		    }
2114		    error = ret;
2115		    goto out;
2116		}
2117		/* Never gets here. */
2118	    }
2119	    tstp = nstp;
2120	}
2121
2122	/*
2123	 * Handle the unlock case by calling nfsrv_updatelock().
2124	 * (Should I have done some access checking above for unlock? For now,
2125	 *  just let it happen.)
2126	 */
2127	if (new_stp->ls_flags & NFSLCK_UNLOCK) {
2128		first = new_lop->lo_first;
2129		end = new_lop->lo_end;
2130		nfsrv_updatelock(stp, new_lopp, &other_lop, lfp);
2131		stateidp->seqid = ++(stp->ls_stateid.seqid);
2132		if ((nd->nd_flag & ND_NFSV41) != 0 && stateidp->seqid == 0)
2133			stateidp->seqid = stp->ls_stateid.seqid = 1;
2134		stateidp->other[0] = stp->ls_stateid.other[0];
2135		stateidp->other[1] = stp->ls_stateid.other[1];
2136		stateidp->other[2] = stp->ls_stateid.other[2];
2137		if (filestruct_locked != 0) {
2138			NFSUNLOCKSTATE();
2139			if (vnode_unlocked == 0) {
2140				ASSERT_VOP_ELOCKED(vp, "nfsrv_lockctrl5");
2141				vnode_unlocked = 1;
2142				NFSVOPUNLOCK(vp, 0);
2143			}
2144			/* Update the local locks. */
2145			nfsrv_localunlock(vp, lfp, first, end, p);
2146			NFSLOCKSTATE();
2147			nfsrv_unlocklf(lfp);
2148		}
2149		NFSUNLOCKSTATE();
2150		goto out;
2151	}
2152
2153	/*
2154	 * Search for a conflicting lock. A lock conflicts if:
2155	 * - the lock range overlaps and
2156	 * - at least one lock is a write lock and
2157	 * - it is not owned by the same lock owner
2158	 */
2159	if (!delegation) {
2160	  LIST_FOREACH(lop, &lfp->lf_lock, lo_lckfile) {
2161	    if (new_lop->lo_end > lop->lo_first &&
2162		new_lop->lo_first < lop->lo_end &&
2163		(new_lop->lo_flags == NFSLCK_WRITE ||
2164		 lop->lo_flags == NFSLCK_WRITE) &&
2165		lckstp != lop->lo_stp &&
2166		(clp != lop->lo_stp->ls_clp ||
2167		 lckstp->ls_ownerlen != lop->lo_stp->ls_ownerlen ||
2168		 NFSBCMP(lckstp->ls_owner, lop->lo_stp->ls_owner,
2169		    lckstp->ls_ownerlen))) {
2170		if (other_lop) {
2171		    FREE((caddr_t)other_lop, M_NFSDLOCK);
2172		    other_lop = NULL;
2173		}
2174		if (vnode_unlocked != 0)
2175		    ret = nfsrv_clientconflict(lop->lo_stp->ls_clp, &haslock,
2176			NULL, p);
2177		else
2178		    ret = nfsrv_clientconflict(lop->lo_stp->ls_clp, &haslock,
2179			vp, p);
2180		if (ret == 1) {
2181		    if (filestruct_locked != 0) {
2182			if (vnode_unlocked == 0) {
2183				ASSERT_VOP_ELOCKED(vp, "nfsrv_lockctrl6");
2184				NFSVOPUNLOCK(vp, 0);
2185			}
2186			/* Roll back local locks. */
2187			nfsrv_locallock_rollback(vp, lfp, p);
2188			NFSLOCKSTATE();
2189			nfsrv_unlocklf(lfp);
2190			NFSUNLOCKSTATE();
2191			NFSVOPLOCK(vp, LK_EXCLUSIVE | LK_RETRY);
2192			vnode_unlocked = 0;
2193			if ((vp->v_iflag & VI_DOOMED) != 0) {
2194				error = NFSERR_SERVERFAULT;
2195				goto out;
2196			}
2197		    }
2198		    /*
2199		     * nfsrv_clientconflict() unlocks state when it
2200		     * returns non-zero.
2201		     */
2202		    lckstp = NULL;
2203		    goto tryagain;
2204		}
2205		/*
2206		 * Found a conflicting lock, so record the conflict and
2207		 * return the error.
2208		 */
2209		if (cfp != NULL && ret == 0) {
2210		    cfp->cl_clientid.lval[0]=lop->lo_stp->ls_stateid.other[0];
2211		    cfp->cl_clientid.lval[1]=lop->lo_stp->ls_stateid.other[1];
2212		    cfp->cl_first = lop->lo_first;
2213		    cfp->cl_end = lop->lo_end;
2214		    cfp->cl_flags = lop->lo_flags;
2215		    cfp->cl_ownerlen = lop->lo_stp->ls_ownerlen;
2216		    NFSBCOPY(lop->lo_stp->ls_owner, cfp->cl_owner,
2217			cfp->cl_ownerlen);
2218		}
2219		if (ret == 2)
2220		    error = NFSERR_PERM;
2221		else if (new_stp->ls_flags & NFSLCK_RECLAIM)
2222		    error = NFSERR_RECLAIMCONFLICT;
2223		else if (new_stp->ls_flags & NFSLCK_CHECK)
2224		    error = NFSERR_LOCKED;
2225		else
2226		    error = NFSERR_DENIED;
2227		if (filestruct_locked != 0 && ret == 0) {
2228			/* Roll back local locks. */
2229			NFSUNLOCKSTATE();
2230			if (vnode_unlocked == 0) {
2231				ASSERT_VOP_ELOCKED(vp, "nfsrv_lockctrl7");
2232				vnode_unlocked = 1;
2233				NFSVOPUNLOCK(vp, 0);
2234			}
2235			nfsrv_locallock_rollback(vp, lfp, p);
2236			NFSLOCKSTATE();
2237			nfsrv_unlocklf(lfp);
2238		}
2239		if (ret == 0)
2240			NFSUNLOCKSTATE();
2241		goto out;
2242	    }
2243	  }
2244	}
2245
2246	/*
2247	 * We only get here if there was no lock that conflicted.
2248	 */
2249	if (new_stp->ls_flags & (NFSLCK_TEST | NFSLCK_CHECK)) {
2250		NFSUNLOCKSTATE();
2251		goto out;
2252	}
2253
2254	/*
2255	 * We only get here when we are creating or modifying a lock.
2256	 * There are two variants:
2257	 * - exist_lock_owner where lock_owner exists
2258	 * - open_to_lock_owner with new lock_owner
2259	 */
2260	first = new_lop->lo_first;
2261	end = new_lop->lo_end;
2262	lock_flags = new_lop->lo_flags;
2263	if (!(new_stp->ls_flags & NFSLCK_OPENTOLOCK)) {
2264		nfsrv_updatelock(lckstp, new_lopp, &other_lop, lfp);
2265		stateidp->seqid = ++(lckstp->ls_stateid.seqid);
2266		if ((nd->nd_flag & ND_NFSV41) != 0 && stateidp->seqid == 0)
2267			stateidp->seqid = lckstp->ls_stateid.seqid = 1;
2268		stateidp->other[0] = lckstp->ls_stateid.other[0];
2269		stateidp->other[1] = lckstp->ls_stateid.other[1];
2270		stateidp->other[2] = lckstp->ls_stateid.other[2];
2271	} else {
2272		/*
2273		 * The new open_to_lock_owner case.
2274		 * Link the new nfsstate into the lists.
2275		 */
2276		new_stp->ls_seq = new_stp->ls_opentolockseq;
2277		nfsrvd_refcache(new_stp->ls_op);
2278		stateidp->seqid = new_stp->ls_stateid.seqid = 1;
2279		stateidp->other[0] = new_stp->ls_stateid.other[0] =
2280		    clp->lc_clientid.lval[0];
2281		stateidp->other[1] = new_stp->ls_stateid.other[1] =
2282		    clp->lc_clientid.lval[1];
2283		stateidp->other[2] = new_stp->ls_stateid.other[2] =
2284		    nfsrv_nextstateindex(clp);
2285		new_stp->ls_clp = clp;
2286		LIST_INIT(&new_stp->ls_lock);
2287		new_stp->ls_openstp = stp;
2288		new_stp->ls_lfp = lfp;
2289		nfsrv_insertlock(new_lop, (struct nfslock *)new_stp, new_stp,
2290		    lfp);
2291		LIST_INSERT_HEAD(NFSSTATEHASH(clp, new_stp->ls_stateid),
2292		    new_stp, ls_hash);
2293		LIST_INSERT_HEAD(&stp->ls_open, new_stp, ls_list);
2294		*new_lopp = NULL;
2295		*new_stpp = NULL;
2296		nfsstatsv1.srvlockowners++;
2297		nfsrv_openpluslock++;
2298	}
2299	if (filestruct_locked != 0) {
2300		NFSUNLOCKSTATE();
2301		nfsrv_locallock_commit(lfp, lock_flags, first, end);
2302		NFSLOCKSTATE();
2303		nfsrv_unlocklf(lfp);
2304	}
2305	NFSUNLOCKSTATE();
2306
2307out:
2308	if (haslock) {
2309		NFSLOCKV4ROOTMUTEX();
2310		nfsv4_unlock(&nfsv4rootfs_lock, 1);
2311		NFSUNLOCKV4ROOTMUTEX();
2312	}
2313	if (vnode_unlocked != 0) {
2314		NFSVOPLOCK(vp, LK_EXCLUSIVE | LK_RETRY);
2315		if (error == 0 && (vp->v_iflag & VI_DOOMED) != 0)
2316			error = NFSERR_SERVERFAULT;
2317	}
2318	if (other_lop)
2319		FREE((caddr_t)other_lop, M_NFSDLOCK);
2320	NFSEXITCODE2(error, nd);
2321	return (error);
2322}
2323
2324/*
2325 * Check for state errors for Open.
2326 * repstat is passed back out as an error if more critical errors
2327 * are not detected.
2328 */
2329int
2330nfsrv_opencheck(nfsquad_t clientid, nfsv4stateid_t *stateidp,
2331    struct nfsstate *new_stp, vnode_t vp, struct nfsrv_descript *nd,
2332    NFSPROC_T *p, int repstat)
2333{
2334	struct nfsstate *stp, *nstp;
2335	struct nfsclient *clp;
2336	struct nfsstate *ownerstp;
2337	struct nfslockfile *lfp, *new_lfp;
2338	int error = 0, haslock = 0, ret, readonly = 0, getfhret = 0;
2339
2340	if ((new_stp->ls_flags & NFSLCK_SHAREBITS) == NFSLCK_READACCESS)
2341		readonly = 1;
2342	/*
2343	 * Check for restart conditions (client and server).
2344	 */
2345	error = nfsrv_checkrestart(clientid, new_stp->ls_flags,
2346		&new_stp->ls_stateid, 0);
2347	if (error)
2348		goto out;
2349
2350	/*
2351	 * Check for state resource limit exceeded.
2352	 * Technically this should be SMP protected, but the worst
2353	 * case error is "out by one or two" on the count when it
2354	 * returns NFSERR_RESOURCE and the limit is just a rather
2355	 * arbitrary high water mark, so no harm is done.
2356	 */
2357	if (nfsrv_openpluslock > nfsrv_v4statelimit) {
2358		error = NFSERR_RESOURCE;
2359		goto out;
2360	}
2361
2362tryagain:
2363	MALLOC(new_lfp, struct nfslockfile *, sizeof (struct nfslockfile),
2364	    M_NFSDLOCKFILE, M_WAITOK);
2365	if (vp)
2366		getfhret = nfsrv_getlockfh(vp, new_stp->ls_flags, new_lfp,
2367		    NULL, p);
2368	NFSLOCKSTATE();
2369	/*
2370	 * Get the nfsclient structure.
2371	 */
2372	error = nfsrv_getclient(clientid, CLOPS_RENEW, &clp, NULL,
2373	    (nfsquad_t)((u_quad_t)0), 0, nd, p);
2374
2375	/*
2376	 * Look up the open owner. See if it needs confirmation and
2377	 * check the seq#, as required.
2378	 */
2379	if (!error)
2380		nfsrv_getowner(&clp->lc_open, new_stp, &ownerstp);
2381
2382	if (!error && ownerstp) {
2383		error = nfsrv_checkseqid(nd, new_stp->ls_seq, ownerstp,
2384		    new_stp->ls_op);
2385		/*
2386		 * If the OpenOwner hasn't been confirmed, assume the
2387		 * old one was a replay and this one is ok.
2388		 * See: RFC3530 Sec. 14.2.18.
2389		 */
2390		if (error == NFSERR_BADSEQID &&
2391		    (ownerstp->ls_flags & NFSLCK_NEEDSCONFIRM))
2392			error = 0;
2393	}
2394
2395	/*
2396	 * Check for grace.
2397	 */
2398	if (!error)
2399		error = nfsrv_checkgrace(nd, clp, new_stp->ls_flags);
2400	if ((new_stp->ls_flags & NFSLCK_RECLAIM) && !error &&
2401		nfsrv_checkstable(clp))
2402		error = NFSERR_NOGRACE;
2403
2404	/*
2405	 * If none of the above errors occurred, let repstat be
2406	 * returned.
2407	 */
2408	if (repstat && !error)
2409		error = repstat;
2410	if (error) {
2411		NFSUNLOCKSTATE();
2412		if (haslock) {
2413			NFSLOCKV4ROOTMUTEX();
2414			nfsv4_unlock(&nfsv4rootfs_lock, 1);
2415			NFSUNLOCKV4ROOTMUTEX();
2416		}
2417		free((caddr_t)new_lfp, M_NFSDLOCKFILE);
2418		goto out;
2419	}
2420
2421	/*
2422	 * If vp == NULL, the file doesn't exist yet, so return ok.
2423	 * (This always happens on the first pass, so haslock must be 0.)
2424	 */
2425	if (vp == NULL) {
2426		NFSUNLOCKSTATE();
2427		FREE((caddr_t)new_lfp, M_NFSDLOCKFILE);
2428		goto out;
2429	}
2430
2431	/*
2432	 * Get the structure for the underlying file.
2433	 */
2434	if (getfhret)
2435		error = getfhret;
2436	else
2437		error = nfsrv_getlockfile(new_stp->ls_flags, &new_lfp, &lfp,
2438		    NULL, 0);
2439	if (new_lfp)
2440		FREE((caddr_t)new_lfp, M_NFSDLOCKFILE);
2441	if (error) {
2442		NFSUNLOCKSTATE();
2443		if (haslock) {
2444			NFSLOCKV4ROOTMUTEX();
2445			nfsv4_unlock(&nfsv4rootfs_lock, 1);
2446			NFSUNLOCKV4ROOTMUTEX();
2447		}
2448		goto out;
2449	}
2450
2451	/*
2452	 * Search for a conflicting open/share.
2453	 */
2454	if (new_stp->ls_flags & NFSLCK_DELEGCUR) {
2455	    /*
2456	     * For Delegate_Cur, search for the matching Delegation,
2457	     * which indicates no conflict.
2458	     * An old delegation should have been recovered by the
2459	     * client doing a Claim_DELEGATE_Prev, so I won't let
2460	     * it match and return NFSERR_EXPIRED. Should I let it
2461	     * match?
2462	     */
2463	    LIST_FOREACH(stp, &lfp->lf_deleg, ls_file) {
2464		if (!(stp->ls_flags & NFSLCK_OLDDELEG) &&
2465		    (((nd->nd_flag & ND_NFSV41) != 0 &&
2466		    stateidp->seqid == 0) ||
2467		    stateidp->seqid == stp->ls_stateid.seqid) &&
2468		    !NFSBCMP(stateidp->other, stp->ls_stateid.other,
2469			  NFSX_STATEIDOTHER))
2470			break;
2471	    }
2472	    if (stp == LIST_END(&lfp->lf_deleg) ||
2473		((new_stp->ls_flags & NFSLCK_WRITEACCESS) &&
2474		 (stp->ls_flags & NFSLCK_DELEGREAD))) {
2475		NFSUNLOCKSTATE();
2476		if (haslock) {
2477			NFSLOCKV4ROOTMUTEX();
2478			nfsv4_unlock(&nfsv4rootfs_lock, 1);
2479			NFSUNLOCKV4ROOTMUTEX();
2480		}
2481		error = NFSERR_EXPIRED;
2482		goto out;
2483	    }
2484	}
2485
2486	/*
2487	 * Check for access/deny bit conflicts. I check for the same
2488	 * owner as well, in case the client didn't bother.
2489	 */
2490	LIST_FOREACH(stp, &lfp->lf_open, ls_file) {
2491		if (!(new_stp->ls_flags & NFSLCK_DELEGCUR) &&
2492		    (((new_stp->ls_flags & NFSLCK_ACCESSBITS) &
2493		      ((stp->ls_flags>>NFSLCK_SHIFT) & NFSLCK_ACCESSBITS))||
2494		     ((stp->ls_flags & NFSLCK_ACCESSBITS) &
2495		      ((new_stp->ls_flags>>NFSLCK_SHIFT)&NFSLCK_ACCESSBITS)))){
2496			ret = nfsrv_clientconflict(stp->ls_clp,&haslock,vp,p);
2497			if (ret == 1) {
2498				/*
2499				 * nfsrv_clientconflict() unlocks
2500				 * state when it returns non-zero.
2501				 */
2502				goto tryagain;
2503			}
2504			if (ret == 2)
2505				error = NFSERR_PERM;
2506			else if (new_stp->ls_flags & NFSLCK_RECLAIM)
2507				error = NFSERR_RECLAIMCONFLICT;
2508			else
2509				error = NFSERR_SHAREDENIED;
2510			if (ret == 0)
2511				NFSUNLOCKSTATE();
2512			if (haslock) {
2513				NFSLOCKV4ROOTMUTEX();
2514				nfsv4_unlock(&nfsv4rootfs_lock, 1);
2515				NFSUNLOCKV4ROOTMUTEX();
2516			}
2517			goto out;
2518		}
2519	}
2520
2521	/*
2522	 * Check for a conflicting delegation. If one is found, call
2523	 * nfsrv_delegconflict() to handle it. If the v4root lock hasn't
2524	 * been set yet, it will get the lock. Otherwise, it will recall
2525	 * the delegation. Then, we try try again...
2526	 * (If NFSLCK_DELEGCUR is set, it has a delegation, so there
2527	 *  isn't a conflict.)
2528	 * I currently believe the conflict algorithm to be:
2529	 * For Open with Read Access and Deny None
2530	 * - there is a conflict iff a different client has a write delegation
2531	 * For Open with other Write Access or any Deny except None
2532	 * - there is a conflict if a different client has any delegation
2533	 * - there is a conflict if the same client has a read delegation
2534	 *   (The current consensus is that this last case should be
2535	 *    considered a conflict since the client with a read delegation
2536	 *    could have done an Open with ReadAccess and WriteDeny
2537	 *    locally and then not have checked for the WriteDeny.)
2538	 * Don't check for a Reclaim, since that will be dealt with
2539	 * by nfsrv_openctrl().
2540	 */
2541	if (!(new_stp->ls_flags &
2542		(NFSLCK_DELEGPREV | NFSLCK_DELEGCUR | NFSLCK_RECLAIM))) {
2543	    stp = LIST_FIRST(&lfp->lf_deleg);
2544	    while (stp != LIST_END(&lfp->lf_deleg)) {
2545		nstp = LIST_NEXT(stp, ls_file);
2546		if ((readonly && stp->ls_clp != clp &&
2547		       (stp->ls_flags & NFSLCK_DELEGWRITE)) ||
2548		    (!readonly && (stp->ls_clp != clp ||
2549		         (stp->ls_flags & NFSLCK_DELEGREAD)))) {
2550			ret = nfsrv_delegconflict(stp, &haslock, p, vp);
2551			if (ret) {
2552			    /*
2553			     * nfsrv_delegconflict() unlocks state
2554			     * when it returns non-zero.
2555			     */
2556			    if (ret == -1)
2557				goto tryagain;
2558			    error = ret;
2559			    goto out;
2560			}
2561		}
2562		stp = nstp;
2563	    }
2564	}
2565	NFSUNLOCKSTATE();
2566	if (haslock) {
2567		NFSLOCKV4ROOTMUTEX();
2568		nfsv4_unlock(&nfsv4rootfs_lock, 1);
2569		NFSUNLOCKV4ROOTMUTEX();
2570	}
2571
2572out:
2573	NFSEXITCODE2(error, nd);
2574	return (error);
2575}
2576
2577/*
2578 * Open control function to create/update open state for an open.
2579 */
2580int
2581nfsrv_openctrl(struct nfsrv_descript *nd, vnode_t vp,
2582    struct nfsstate **new_stpp, nfsquad_t clientid, nfsv4stateid_t *stateidp,
2583    nfsv4stateid_t *delegstateidp, u_int32_t *rflagsp, struct nfsexstuff *exp,
2584    NFSPROC_T *p, u_quad_t filerev)
2585{
2586	struct nfsstate *new_stp = *new_stpp;
2587	struct nfsstate *stp, *nstp;
2588	struct nfsstate *openstp = NULL, *new_open, *ownerstp, *new_deleg;
2589	struct nfslockfile *lfp, *new_lfp;
2590	struct nfsclient *clp;
2591	int error = 0, haslock = 0, ret, delegate = 1, writedeleg = 1;
2592	int readonly = 0, cbret = 1, getfhret = 0;
2593	int gotstate = 0, len = 0;
2594	u_char *clidp = NULL;
2595
2596	if ((new_stp->ls_flags & NFSLCK_SHAREBITS) == NFSLCK_READACCESS)
2597		readonly = 1;
2598	/*
2599	 * Check for restart conditions (client and server).
2600	 * (Paranoia, should have been detected by nfsrv_opencheck().)
2601	 * If an error does show up, return NFSERR_EXPIRED, since the
2602	 * the seqid# has already been incremented.
2603	 */
2604	error = nfsrv_checkrestart(clientid, new_stp->ls_flags,
2605	    &new_stp->ls_stateid, 0);
2606	if (error) {
2607		printf("Nfsd: openctrl unexpected restart err=%d\n",
2608		    error);
2609		error = NFSERR_EXPIRED;
2610		goto out;
2611	}
2612
2613	clidp = malloc(NFSV4_OPAQUELIMIT, M_TEMP, M_WAITOK);
2614tryagain:
2615	MALLOC(new_lfp, struct nfslockfile *, sizeof (struct nfslockfile),
2616	    M_NFSDLOCKFILE, M_WAITOK);
2617	MALLOC(new_open, struct nfsstate *, sizeof (struct nfsstate),
2618	    M_NFSDSTATE, M_WAITOK);
2619	MALLOC(new_deleg, struct nfsstate *, sizeof (struct nfsstate),
2620	    M_NFSDSTATE, M_WAITOK);
2621	getfhret = nfsrv_getlockfh(vp, new_stp->ls_flags, new_lfp,
2622	    NULL, p);
2623	NFSLOCKSTATE();
2624	/*
2625	 * Get the client structure. Since the linked lists could be changed
2626	 * by other nfsd processes if this process does a tsleep(), one of
2627	 * two things must be done.
2628	 * 1 - don't tsleep()
2629	 * or
2630	 * 2 - get the nfsv4_lock() { indicated by haslock == 1 }
2631	 *     before using the lists, since this lock stops the other
2632	 *     nfsd. This should only be used for rare cases, since it
2633	 *     essentially single threads the nfsd.
2634	 *     At this time, it is only done for cases where the stable
2635	 *     storage file must be written prior to completion of state
2636	 *     expiration.
2637	 */
2638	error = nfsrv_getclient(clientid, CLOPS_RENEW, &clp, NULL,
2639	    (nfsquad_t)((u_quad_t)0), 0, nd, p);
2640	if (!error && (clp->lc_flags & LCL_NEEDSCBNULL) &&
2641	    clp->lc_program) {
2642		/*
2643		 * This happens on the first open for a client
2644		 * that supports callbacks.
2645		 */
2646		NFSUNLOCKSTATE();
2647		/*
2648		 * Although nfsrv_docallback() will sleep, clp won't
2649		 * go away, since they are only removed when the
2650		 * nfsv4_lock() has blocked the nfsd threads. The
2651		 * fields in clp can change, but having multiple
2652		 * threads do this Null callback RPC should be
2653		 * harmless.
2654		 */
2655		cbret = nfsrv_docallback(clp, NFSV4PROC_CBNULL,
2656		    NULL, 0, NULL, NULL, NULL, p);
2657		NFSLOCKSTATE();
2658		clp->lc_flags &= ~LCL_NEEDSCBNULL;
2659		if (!cbret)
2660			clp->lc_flags |= LCL_CALLBACKSON;
2661	}
2662
2663	/*
2664	 * Look up the open owner. See if it needs confirmation and
2665	 * check the seq#, as required.
2666	 */
2667	if (!error)
2668		nfsrv_getowner(&clp->lc_open, new_stp, &ownerstp);
2669
2670	if (error) {
2671		NFSUNLOCKSTATE();
2672		printf("Nfsd: openctrl unexpected state err=%d\n",
2673			error);
2674		free((caddr_t)new_lfp, M_NFSDLOCKFILE);
2675		free((caddr_t)new_open, M_NFSDSTATE);
2676		free((caddr_t)new_deleg, M_NFSDSTATE);
2677		if (haslock) {
2678			NFSLOCKV4ROOTMUTEX();
2679			nfsv4_unlock(&nfsv4rootfs_lock, 1);
2680			NFSUNLOCKV4ROOTMUTEX();
2681		}
2682		error = NFSERR_EXPIRED;
2683		goto out;
2684	}
2685
2686	if (new_stp->ls_flags & NFSLCK_RECLAIM)
2687		nfsrv_markstable(clp);
2688
2689	/*
2690	 * Get the structure for the underlying file.
2691	 */
2692	if (getfhret)
2693		error = getfhret;
2694	else
2695		error = nfsrv_getlockfile(new_stp->ls_flags, &new_lfp, &lfp,
2696		    NULL, 0);
2697	if (new_lfp)
2698		FREE((caddr_t)new_lfp, M_NFSDLOCKFILE);
2699	if (error) {
2700		NFSUNLOCKSTATE();
2701		printf("Nfsd openctrl unexpected getlockfile err=%d\n",
2702		    error);
2703		free((caddr_t)new_open, M_NFSDSTATE);
2704		free((caddr_t)new_deleg, M_NFSDSTATE);
2705		if (haslock) {
2706			NFSLOCKV4ROOTMUTEX();
2707			nfsv4_unlock(&nfsv4rootfs_lock, 1);
2708			NFSUNLOCKV4ROOTMUTEX();
2709		}
2710		goto out;
2711	}
2712
2713	/*
2714	 * Search for a conflicting open/share.
2715	 */
2716	if (new_stp->ls_flags & NFSLCK_DELEGCUR) {
2717	    /*
2718	     * For Delegate_Cur, search for the matching Delegation,
2719	     * which indicates no conflict.
2720	     * An old delegation should have been recovered by the
2721	     * client doing a Claim_DELEGATE_Prev, so I won't let
2722	     * it match and return NFSERR_EXPIRED. Should I let it
2723	     * match?
2724	     */
2725	    LIST_FOREACH(stp, &lfp->lf_deleg, ls_file) {
2726		if (!(stp->ls_flags & NFSLCK_OLDDELEG) &&
2727		    (((nd->nd_flag & ND_NFSV41) != 0 &&
2728		    stateidp->seqid == 0) ||
2729		    stateidp->seqid == stp->ls_stateid.seqid) &&
2730		    !NFSBCMP(stateidp->other, stp->ls_stateid.other,
2731			NFSX_STATEIDOTHER))
2732			break;
2733	    }
2734	    if (stp == LIST_END(&lfp->lf_deleg) ||
2735		((new_stp->ls_flags & NFSLCK_WRITEACCESS) &&
2736		 (stp->ls_flags & NFSLCK_DELEGREAD))) {
2737		NFSUNLOCKSTATE();
2738		printf("Nfsd openctrl unexpected expiry\n");
2739		free((caddr_t)new_open, M_NFSDSTATE);
2740		free((caddr_t)new_deleg, M_NFSDSTATE);
2741		if (haslock) {
2742			NFSLOCKV4ROOTMUTEX();
2743			nfsv4_unlock(&nfsv4rootfs_lock, 1);
2744			NFSUNLOCKV4ROOTMUTEX();
2745		}
2746		error = NFSERR_EXPIRED;
2747		goto out;
2748	    }
2749
2750	    /*
2751	     * Don't issue a Delegation, since one already exists and
2752	     * delay delegation timeout, as required.
2753	     */
2754	    delegate = 0;
2755	    nfsrv_delaydelegtimeout(stp);
2756	}
2757
2758	/*
2759	 * Check for access/deny bit conflicts. I also check for the
2760	 * same owner, since the client might not have bothered to check.
2761	 * Also, note an open for the same file and owner, if found,
2762	 * which is all we do here for Delegate_Cur, since conflict
2763	 * checking is already done.
2764	 */
2765	LIST_FOREACH(stp, &lfp->lf_open, ls_file) {
2766		if (ownerstp && stp->ls_openowner == ownerstp)
2767			openstp = stp;
2768		if (!(new_stp->ls_flags & NFSLCK_DELEGCUR)) {
2769		    /*
2770		     * If another client has the file open, the only
2771		     * delegation that can be issued is a Read delegation
2772		     * and only if it is a Read open with Deny none.
2773		     */
2774		    if (clp != stp->ls_clp) {
2775			if ((stp->ls_flags & NFSLCK_SHAREBITS) ==
2776			    NFSLCK_READACCESS)
2777			    writedeleg = 0;
2778			else
2779			    delegate = 0;
2780		    }
2781		    if(((new_stp->ls_flags & NFSLCK_ACCESSBITS) &
2782		        ((stp->ls_flags>>NFSLCK_SHIFT) & NFSLCK_ACCESSBITS))||
2783		       ((stp->ls_flags & NFSLCK_ACCESSBITS) &
2784		        ((new_stp->ls_flags>>NFSLCK_SHIFT)&NFSLCK_ACCESSBITS))){
2785			ret = nfsrv_clientconflict(stp->ls_clp,&haslock,vp,p);
2786			if (ret == 1) {
2787				/*
2788				 * nfsrv_clientconflict() unlocks state
2789				 * when it returns non-zero.
2790				 */
2791				free((caddr_t)new_open, M_NFSDSTATE);
2792				free((caddr_t)new_deleg, M_NFSDSTATE);
2793				openstp = NULL;
2794				goto tryagain;
2795			}
2796			if (ret == 2)
2797				error = NFSERR_PERM;
2798			else if (new_stp->ls_flags & NFSLCK_RECLAIM)
2799				error = NFSERR_RECLAIMCONFLICT;
2800			else
2801				error = NFSERR_SHAREDENIED;
2802			if (ret == 0)
2803				NFSUNLOCKSTATE();
2804			if (haslock) {
2805				NFSLOCKV4ROOTMUTEX();
2806				nfsv4_unlock(&nfsv4rootfs_lock, 1);
2807				NFSUNLOCKV4ROOTMUTEX();
2808			}
2809			free((caddr_t)new_open, M_NFSDSTATE);
2810			free((caddr_t)new_deleg, M_NFSDSTATE);
2811			printf("nfsd openctrl unexpected client cnfl\n");
2812			goto out;
2813		    }
2814		}
2815	}
2816
2817	/*
2818	 * Check for a conflicting delegation. If one is found, call
2819	 * nfsrv_delegconflict() to handle it. If the v4root lock hasn't
2820	 * been set yet, it will get the lock. Otherwise, it will recall
2821	 * the delegation. Then, we try try again...
2822	 * (If NFSLCK_DELEGCUR is set, it has a delegation, so there
2823	 *  isn't a conflict.)
2824	 * I currently believe the conflict algorithm to be:
2825	 * For Open with Read Access and Deny None
2826	 * - there is a conflict iff a different client has a write delegation
2827	 * For Open with other Write Access or any Deny except None
2828	 * - there is a conflict if a different client has any delegation
2829	 * - there is a conflict if the same client has a read delegation
2830	 *   (The current consensus is that this last case should be
2831	 *    considered a conflict since the client with a read delegation
2832	 *    could have done an Open with ReadAccess and WriteDeny
2833	 *    locally and then not have checked for the WriteDeny.)
2834	 */
2835	if (!(new_stp->ls_flags & (NFSLCK_DELEGPREV | NFSLCK_DELEGCUR))) {
2836	    stp = LIST_FIRST(&lfp->lf_deleg);
2837	    while (stp != LIST_END(&lfp->lf_deleg)) {
2838		nstp = LIST_NEXT(stp, ls_file);
2839		if (stp->ls_clp != clp && (stp->ls_flags & NFSLCK_DELEGREAD))
2840			writedeleg = 0;
2841		else
2842			delegate = 0;
2843		if ((readonly && stp->ls_clp != clp &&
2844		       (stp->ls_flags & NFSLCK_DELEGWRITE)) ||
2845		    (!readonly && (stp->ls_clp != clp ||
2846		         (stp->ls_flags & NFSLCK_DELEGREAD)))) {
2847		    if (new_stp->ls_flags & NFSLCK_RECLAIM) {
2848			delegate = 2;
2849		    } else {
2850			ret = nfsrv_delegconflict(stp, &haslock, p, vp);
2851			if (ret) {
2852			    /*
2853			     * nfsrv_delegconflict() unlocks state
2854			     * when it returns non-zero.
2855			     */
2856			    printf("Nfsd openctrl unexpected deleg cnfl\n");
2857			    free((caddr_t)new_open, M_NFSDSTATE);
2858			    free((caddr_t)new_deleg, M_NFSDSTATE);
2859			    if (ret == -1) {
2860				openstp = NULL;
2861				goto tryagain;
2862			    }
2863			    error = ret;
2864			    goto out;
2865			}
2866		    }
2867		}
2868		stp = nstp;
2869	    }
2870	}
2871
2872	/*
2873	 * We only get here if there was no open that conflicted.
2874	 * If an open for the owner exists, or in the access/deny bits.
2875	 * Otherwise it is a new open. If the open_owner hasn't been
2876	 * confirmed, replace the open with the new one needing confirmation,
2877	 * otherwise add the open.
2878	 */
2879	if (new_stp->ls_flags & NFSLCK_DELEGPREV) {
2880	    /*
2881	     * Handle NFSLCK_DELEGPREV by searching the old delegations for
2882	     * a match. If found, just move the old delegation to the current
2883	     * delegation list and issue open. If not found, return
2884	     * NFSERR_EXPIRED.
2885	     */
2886	    LIST_FOREACH(stp, &clp->lc_olddeleg, ls_list) {
2887		if (stp->ls_lfp == lfp) {
2888		    /* Found it */
2889		    if (stp->ls_clp != clp)
2890			panic("olddeleg clp");
2891		    LIST_REMOVE(stp, ls_list);
2892		    LIST_REMOVE(stp, ls_hash);
2893		    stp->ls_flags &= ~NFSLCK_OLDDELEG;
2894		    stp->ls_stateid.seqid = delegstateidp->seqid = 1;
2895		    stp->ls_stateid.other[0] = delegstateidp->other[0] =
2896			clp->lc_clientid.lval[0];
2897		    stp->ls_stateid.other[1] = delegstateidp->other[1] =
2898			clp->lc_clientid.lval[1];
2899		    stp->ls_stateid.other[2] = delegstateidp->other[2] =
2900			nfsrv_nextstateindex(clp);
2901		    stp->ls_compref = nd->nd_compref;
2902		    LIST_INSERT_HEAD(&clp->lc_deleg, stp, ls_list);
2903		    LIST_INSERT_HEAD(NFSSTATEHASH(clp,
2904			stp->ls_stateid), stp, ls_hash);
2905		    if (stp->ls_flags & NFSLCK_DELEGWRITE)
2906			*rflagsp |= NFSV4OPEN_WRITEDELEGATE;
2907		    else
2908			*rflagsp |= NFSV4OPEN_READDELEGATE;
2909		    clp->lc_delegtime = NFSD_MONOSEC +
2910			nfsrv_lease + NFSRV_LEASEDELTA;
2911
2912		    /*
2913		     * Now, do the associated open.
2914		     */
2915		    new_open->ls_stateid.seqid = 1;
2916		    new_open->ls_stateid.other[0] = clp->lc_clientid.lval[0];
2917		    new_open->ls_stateid.other[1] = clp->lc_clientid.lval[1];
2918		    new_open->ls_stateid.other[2] = nfsrv_nextstateindex(clp);
2919		    new_open->ls_flags = (new_stp->ls_flags&NFSLCK_DENYBITS)|
2920			NFSLCK_OPEN;
2921		    if (stp->ls_flags & NFSLCK_DELEGWRITE)
2922			new_open->ls_flags |= (NFSLCK_READACCESS |
2923			    NFSLCK_WRITEACCESS);
2924		    else
2925			new_open->ls_flags |= NFSLCK_READACCESS;
2926		    new_open->ls_uid = new_stp->ls_uid;
2927		    new_open->ls_lfp = lfp;
2928		    new_open->ls_clp = clp;
2929		    LIST_INIT(&new_open->ls_open);
2930		    LIST_INSERT_HEAD(&lfp->lf_open, new_open, ls_file);
2931		    LIST_INSERT_HEAD(NFSSTATEHASH(clp, new_open->ls_stateid),
2932			new_open, ls_hash);
2933		    /*
2934		     * and handle the open owner
2935		     */
2936		    if (ownerstp) {
2937			new_open->ls_openowner = ownerstp;
2938			LIST_INSERT_HEAD(&ownerstp->ls_open,new_open,ls_list);
2939		    } else {
2940			new_open->ls_openowner = new_stp;
2941			new_stp->ls_flags = 0;
2942			nfsrvd_refcache(new_stp->ls_op);
2943			new_stp->ls_noopens = 0;
2944			LIST_INIT(&new_stp->ls_open);
2945			LIST_INSERT_HEAD(&new_stp->ls_open, new_open, ls_list);
2946			LIST_INSERT_HEAD(&clp->lc_open, new_stp, ls_list);
2947			*new_stpp = NULL;
2948			nfsstatsv1.srvopenowners++;
2949			nfsrv_openpluslock++;
2950		    }
2951		    openstp = new_open;
2952		    new_open = NULL;
2953		    nfsstatsv1.srvopens++;
2954		    nfsrv_openpluslock++;
2955		    break;
2956		}
2957	    }
2958	    if (stp == LIST_END(&clp->lc_olddeleg))
2959		error = NFSERR_EXPIRED;
2960	} else if (new_stp->ls_flags & (NFSLCK_DELEGREAD | NFSLCK_DELEGWRITE)) {
2961	    /*
2962	     * Scan to see that no delegation for this client and file
2963	     * doesn't already exist.
2964	     * There also shouldn't yet be an Open for this file and
2965	     * openowner.
2966	     */
2967	    LIST_FOREACH(stp, &lfp->lf_deleg, ls_file) {
2968		if (stp->ls_clp == clp)
2969		    break;
2970	    }
2971	    if (stp == LIST_END(&lfp->lf_deleg) && openstp == NULL) {
2972		/*
2973		 * This is the Claim_Previous case with a delegation
2974		 * type != Delegate_None.
2975		 */
2976		/*
2977		 * First, add the delegation. (Although we must issue the
2978		 * delegation, we can also ask for an immediate return.)
2979		 */
2980		new_deleg->ls_stateid.seqid = delegstateidp->seqid = 1;
2981		new_deleg->ls_stateid.other[0] = delegstateidp->other[0] =
2982		    clp->lc_clientid.lval[0];
2983		new_deleg->ls_stateid.other[1] = delegstateidp->other[1] =
2984		    clp->lc_clientid.lval[1];
2985		new_deleg->ls_stateid.other[2] = delegstateidp->other[2] =
2986		    nfsrv_nextstateindex(clp);
2987		if (new_stp->ls_flags & NFSLCK_DELEGWRITE) {
2988		    new_deleg->ls_flags = (NFSLCK_DELEGWRITE |
2989			NFSLCK_READACCESS | NFSLCK_WRITEACCESS);
2990		    *rflagsp |= NFSV4OPEN_WRITEDELEGATE;
2991		    nfsrv_writedelegcnt++;
2992		} else {
2993		    new_deleg->ls_flags = (NFSLCK_DELEGREAD |
2994			NFSLCK_READACCESS);
2995		    *rflagsp |= NFSV4OPEN_READDELEGATE;
2996		}
2997		new_deleg->ls_uid = new_stp->ls_uid;
2998		new_deleg->ls_lfp = lfp;
2999		new_deleg->ls_clp = clp;
3000		new_deleg->ls_filerev = filerev;
3001		new_deleg->ls_compref = nd->nd_compref;
3002		LIST_INSERT_HEAD(&lfp->lf_deleg, new_deleg, ls_file);
3003		LIST_INSERT_HEAD(NFSSTATEHASH(clp,
3004		    new_deleg->ls_stateid), new_deleg, ls_hash);
3005		LIST_INSERT_HEAD(&clp->lc_deleg, new_deleg, ls_list);
3006		new_deleg = NULL;
3007		if (delegate == 2 || nfsrv_issuedelegs == 0 ||
3008		    (clp->lc_flags & (LCL_CALLBACKSON | LCL_CBDOWN)) !=
3009		     LCL_CALLBACKSON ||
3010		    NFSRV_V4DELEGLIMIT(nfsrv_delegatecnt) ||
3011		    !NFSVNO_DELEGOK(vp))
3012		    *rflagsp |= NFSV4OPEN_RECALL;
3013		nfsstatsv1.srvdelegates++;
3014		nfsrv_openpluslock++;
3015		nfsrv_delegatecnt++;
3016
3017		/*
3018		 * Now, do the associated open.
3019		 */
3020		new_open->ls_stateid.seqid = 1;
3021		new_open->ls_stateid.other[0] = clp->lc_clientid.lval[0];
3022		new_open->ls_stateid.other[1] = clp->lc_clientid.lval[1];
3023		new_open->ls_stateid.other[2] = nfsrv_nextstateindex(clp);
3024		new_open->ls_flags = (new_stp->ls_flags & NFSLCK_DENYBITS) |
3025		    NFSLCK_OPEN;
3026		if (new_stp->ls_flags & NFSLCK_DELEGWRITE)
3027			new_open->ls_flags |= (NFSLCK_READACCESS |
3028			    NFSLCK_WRITEACCESS);
3029		else
3030			new_open->ls_flags |= NFSLCK_READACCESS;
3031		new_open->ls_uid = new_stp->ls_uid;
3032		new_open->ls_lfp = lfp;
3033		new_open->ls_clp = clp;
3034		LIST_INIT(&new_open->ls_open);
3035		LIST_INSERT_HEAD(&lfp->lf_open, new_open, ls_file);
3036		LIST_INSERT_HEAD(NFSSTATEHASH(clp, new_open->ls_stateid),
3037		   new_open, ls_hash);
3038		/*
3039		 * and handle the open owner
3040		 */
3041		if (ownerstp) {
3042		    new_open->ls_openowner = ownerstp;
3043		    LIST_INSERT_HEAD(&ownerstp->ls_open, new_open, ls_list);
3044		} else {
3045		    new_open->ls_openowner = new_stp;
3046		    new_stp->ls_flags = 0;
3047		    nfsrvd_refcache(new_stp->ls_op);
3048		    new_stp->ls_noopens = 0;
3049		    LIST_INIT(&new_stp->ls_open);
3050		    LIST_INSERT_HEAD(&new_stp->ls_open, new_open, ls_list);
3051		    LIST_INSERT_HEAD(&clp->lc_open, new_stp, ls_list);
3052		    *new_stpp = NULL;
3053		    nfsstatsv1.srvopenowners++;
3054		    nfsrv_openpluslock++;
3055		}
3056		openstp = new_open;
3057		new_open = NULL;
3058		nfsstatsv1.srvopens++;
3059		nfsrv_openpluslock++;
3060	    } else {
3061		error = NFSERR_RECLAIMCONFLICT;
3062	    }
3063	} else if (ownerstp) {
3064		if (ownerstp->ls_flags & NFSLCK_NEEDSCONFIRM) {
3065		    /* Replace the open */
3066		    if (ownerstp->ls_op)
3067			nfsrvd_derefcache(ownerstp->ls_op);
3068		    ownerstp->ls_op = new_stp->ls_op;
3069		    nfsrvd_refcache(ownerstp->ls_op);
3070		    ownerstp->ls_seq = new_stp->ls_seq;
3071		    *rflagsp |= NFSV4OPEN_RESULTCONFIRM;
3072		    stp = LIST_FIRST(&ownerstp->ls_open);
3073		    stp->ls_flags = (new_stp->ls_flags & NFSLCK_SHAREBITS) |
3074			NFSLCK_OPEN;
3075		    stp->ls_stateid.seqid = 1;
3076		    stp->ls_uid = new_stp->ls_uid;
3077		    if (lfp != stp->ls_lfp) {
3078			LIST_REMOVE(stp, ls_file);
3079			LIST_INSERT_HEAD(&lfp->lf_open, stp, ls_file);
3080			stp->ls_lfp = lfp;
3081		    }
3082		    openstp = stp;
3083		} else if (openstp) {
3084		    openstp->ls_flags |= (new_stp->ls_flags & NFSLCK_SHAREBITS);
3085		    openstp->ls_stateid.seqid++;
3086		    if ((nd->nd_flag & ND_NFSV41) != 0 &&
3087			openstp->ls_stateid.seqid == 0)
3088			openstp->ls_stateid.seqid = 1;
3089
3090		    /*
3091		     * This is where we can choose to issue a delegation.
3092		     */
3093		    if ((new_stp->ls_flags & NFSLCK_WANTNODELEG) != 0)
3094			*rflagsp |= NFSV4OPEN_WDNOTWANTED;
3095		    else if (nfsrv_issuedelegs == 0)
3096			*rflagsp |= NFSV4OPEN_WDSUPPFTYPE;
3097		    else if (NFSRV_V4DELEGLIMIT(nfsrv_delegatecnt))
3098			*rflagsp |= NFSV4OPEN_WDRESOURCE;
3099		    else if (delegate == 0 || writedeleg == 0 ||
3100			NFSVNO_EXRDONLY(exp) || (readonly != 0 &&
3101			nfsrv_writedelegifpos == 0) ||
3102			!NFSVNO_DELEGOK(vp) ||
3103			(new_stp->ls_flags & NFSLCK_WANTRDELEG) != 0 ||
3104			(clp->lc_flags & (LCL_CALLBACKSON | LCL_CBDOWN)) !=
3105			 LCL_CALLBACKSON)
3106			*rflagsp |= NFSV4OPEN_WDCONTENTION;
3107		    else {
3108			new_deleg->ls_stateid.seqid = delegstateidp->seqid = 1;
3109			new_deleg->ls_stateid.other[0] = delegstateidp->other[0]
3110			    = clp->lc_clientid.lval[0];
3111			new_deleg->ls_stateid.other[1] = delegstateidp->other[1]
3112			    = clp->lc_clientid.lval[1];
3113			new_deleg->ls_stateid.other[2] = delegstateidp->other[2]
3114			    = nfsrv_nextstateindex(clp);
3115			new_deleg->ls_flags = (NFSLCK_DELEGWRITE |
3116			    NFSLCK_READACCESS | NFSLCK_WRITEACCESS);
3117			*rflagsp |= NFSV4OPEN_WRITEDELEGATE;
3118			new_deleg->ls_uid = new_stp->ls_uid;
3119			new_deleg->ls_lfp = lfp;
3120			new_deleg->ls_clp = clp;
3121			new_deleg->ls_filerev = filerev;
3122			new_deleg->ls_compref = nd->nd_compref;
3123			nfsrv_writedelegcnt++;
3124			LIST_INSERT_HEAD(&lfp->lf_deleg, new_deleg, ls_file);
3125			LIST_INSERT_HEAD(NFSSTATEHASH(clp,
3126			    new_deleg->ls_stateid), new_deleg, ls_hash);
3127			LIST_INSERT_HEAD(&clp->lc_deleg, new_deleg, ls_list);
3128			new_deleg = NULL;
3129			nfsstatsv1.srvdelegates++;
3130			nfsrv_openpluslock++;
3131			nfsrv_delegatecnt++;
3132		    }
3133		} else {
3134		    new_open->ls_stateid.seqid = 1;
3135		    new_open->ls_stateid.other[0] = clp->lc_clientid.lval[0];
3136		    new_open->ls_stateid.other[1] = clp->lc_clientid.lval[1];
3137		    new_open->ls_stateid.other[2] = nfsrv_nextstateindex(clp);
3138		    new_open->ls_flags = (new_stp->ls_flags & NFSLCK_SHAREBITS)|
3139			NFSLCK_OPEN;
3140		    new_open->ls_uid = new_stp->ls_uid;
3141		    new_open->ls_openowner = ownerstp;
3142		    new_open->ls_lfp = lfp;
3143		    new_open->ls_clp = clp;
3144		    LIST_INIT(&new_open->ls_open);
3145		    LIST_INSERT_HEAD(&lfp->lf_open, new_open, ls_file);
3146		    LIST_INSERT_HEAD(&ownerstp->ls_open, new_open, ls_list);
3147		    LIST_INSERT_HEAD(NFSSTATEHASH(clp, new_open->ls_stateid),
3148			new_open, ls_hash);
3149		    openstp = new_open;
3150		    new_open = NULL;
3151		    nfsstatsv1.srvopens++;
3152		    nfsrv_openpluslock++;
3153
3154		    /*
3155		     * This is where we can choose to issue a delegation.
3156		     */
3157		    if ((new_stp->ls_flags & NFSLCK_WANTNODELEG) != 0)
3158			*rflagsp |= NFSV4OPEN_WDNOTWANTED;
3159		    else if (nfsrv_issuedelegs == 0)
3160			*rflagsp |= NFSV4OPEN_WDSUPPFTYPE;
3161		    else if (NFSRV_V4DELEGLIMIT(nfsrv_delegatecnt))
3162			*rflagsp |= NFSV4OPEN_WDRESOURCE;
3163		    else if (delegate == 0 || (writedeleg == 0 &&
3164			readonly == 0) || !NFSVNO_DELEGOK(vp) ||
3165			(clp->lc_flags & (LCL_CALLBACKSON | LCL_CBDOWN)) !=
3166			 LCL_CALLBACKSON)
3167			*rflagsp |= NFSV4OPEN_WDCONTENTION;
3168		    else {
3169			new_deleg->ls_stateid.seqid = delegstateidp->seqid = 1;
3170			new_deleg->ls_stateid.other[0] = delegstateidp->other[0]
3171			    = clp->lc_clientid.lval[0];
3172			new_deleg->ls_stateid.other[1] = delegstateidp->other[1]
3173			    = clp->lc_clientid.lval[1];
3174			new_deleg->ls_stateid.other[2] = delegstateidp->other[2]
3175			    = nfsrv_nextstateindex(clp);
3176			if (writedeleg && !NFSVNO_EXRDONLY(exp) &&
3177			    (nfsrv_writedelegifpos || !readonly) &&
3178			    (new_stp->ls_flags & NFSLCK_WANTRDELEG) == 0) {
3179			    new_deleg->ls_flags = (NFSLCK_DELEGWRITE |
3180				NFSLCK_READACCESS | NFSLCK_WRITEACCESS);
3181			    *rflagsp |= NFSV4OPEN_WRITEDELEGATE;
3182			    nfsrv_writedelegcnt++;
3183			} else {
3184			    new_deleg->ls_flags = (NFSLCK_DELEGREAD |
3185				NFSLCK_READACCESS);
3186			    *rflagsp |= NFSV4OPEN_READDELEGATE;
3187			}
3188			new_deleg->ls_uid = new_stp->ls_uid;
3189			new_deleg->ls_lfp = lfp;
3190			new_deleg->ls_clp = clp;
3191			new_deleg->ls_filerev = filerev;
3192			new_deleg->ls_compref = nd->nd_compref;
3193			LIST_INSERT_HEAD(&lfp->lf_deleg, new_deleg, ls_file);
3194			LIST_INSERT_HEAD(NFSSTATEHASH(clp,
3195			    new_deleg->ls_stateid), new_deleg, ls_hash);
3196			LIST_INSERT_HEAD(&clp->lc_deleg, new_deleg, ls_list);
3197			new_deleg = NULL;
3198			nfsstatsv1.srvdelegates++;
3199			nfsrv_openpluslock++;
3200			nfsrv_delegatecnt++;
3201		    }
3202		}
3203	} else {
3204		/*
3205		 * New owner case. Start the open_owner sequence with a
3206		 * Needs confirmation (unless a reclaim) and hang the
3207		 * new open off it.
3208		 */
3209		new_open->ls_stateid.seqid = 1;
3210		new_open->ls_stateid.other[0] = clp->lc_clientid.lval[0];
3211		new_open->ls_stateid.other[1] = clp->lc_clientid.lval[1];
3212		new_open->ls_stateid.other[2] = nfsrv_nextstateindex(clp);
3213		new_open->ls_flags = (new_stp->ls_flags & NFSLCK_SHAREBITS) |
3214		    NFSLCK_OPEN;
3215		new_open->ls_uid = new_stp->ls_uid;
3216		LIST_INIT(&new_open->ls_open);
3217		new_open->ls_openowner = new_stp;
3218		new_open->ls_lfp = lfp;
3219		new_open->ls_clp = clp;
3220		LIST_INSERT_HEAD(&lfp->lf_open, new_open, ls_file);
3221		if (new_stp->ls_flags & NFSLCK_RECLAIM) {
3222			new_stp->ls_flags = 0;
3223		} else if ((nd->nd_flag & ND_NFSV41) != 0) {
3224			/* NFSv4.1 never needs confirmation. */
3225			new_stp->ls_flags = 0;
3226
3227			/*
3228			 * This is where we can choose to issue a delegation.
3229			 */
3230			if (delegate && nfsrv_issuedelegs &&
3231			    (writedeleg || readonly) &&
3232			    (clp->lc_flags & (LCL_CALLBACKSON | LCL_CBDOWN)) ==
3233			     LCL_CALLBACKSON &&
3234			    !NFSRV_V4DELEGLIMIT(nfsrv_delegatecnt) &&
3235			    NFSVNO_DELEGOK(vp) &&
3236			    ((nd->nd_flag & ND_NFSV41) == 0 ||
3237			     (new_stp->ls_flags & NFSLCK_WANTNODELEG) == 0)) {
3238				new_deleg->ls_stateid.seqid =
3239				    delegstateidp->seqid = 1;
3240				new_deleg->ls_stateid.other[0] =
3241				    delegstateidp->other[0]
3242				    = clp->lc_clientid.lval[0];
3243				new_deleg->ls_stateid.other[1] =
3244				    delegstateidp->other[1]
3245				    = clp->lc_clientid.lval[1];
3246				new_deleg->ls_stateid.other[2] =
3247				    delegstateidp->other[2]
3248				    = nfsrv_nextstateindex(clp);
3249				if (writedeleg && !NFSVNO_EXRDONLY(exp) &&
3250				    (nfsrv_writedelegifpos || !readonly) &&
3251				    ((nd->nd_flag & ND_NFSV41) == 0 ||
3252				     (new_stp->ls_flags & NFSLCK_WANTRDELEG) ==
3253				     0)) {
3254					new_deleg->ls_flags =
3255					    (NFSLCK_DELEGWRITE |
3256					     NFSLCK_READACCESS |
3257					     NFSLCK_WRITEACCESS);
3258					*rflagsp |= NFSV4OPEN_WRITEDELEGATE;
3259					nfsrv_writedelegcnt++;
3260				} else {
3261					new_deleg->ls_flags =
3262					    (NFSLCK_DELEGREAD |
3263					     NFSLCK_READACCESS);
3264					*rflagsp |= NFSV4OPEN_READDELEGATE;
3265				}
3266				new_deleg->ls_uid = new_stp->ls_uid;
3267				new_deleg->ls_lfp = lfp;
3268				new_deleg->ls_clp = clp;
3269				new_deleg->ls_filerev = filerev;
3270				new_deleg->ls_compref = nd->nd_compref;
3271				LIST_INSERT_HEAD(&lfp->lf_deleg, new_deleg,
3272				    ls_file);
3273				LIST_INSERT_HEAD(NFSSTATEHASH(clp,
3274				    new_deleg->ls_stateid), new_deleg, ls_hash);
3275				LIST_INSERT_HEAD(&clp->lc_deleg, new_deleg,
3276				    ls_list);
3277				new_deleg = NULL;
3278				nfsstatsv1.srvdelegates++;
3279				nfsrv_openpluslock++;
3280				nfsrv_delegatecnt++;
3281			}
3282			/*
3283			 * Since NFSv4.1 never does an OpenConfirm, the first
3284			 * open state will be acquired here.
3285			 */
3286			if (!(clp->lc_flags & LCL_STAMPEDSTABLE)) {
3287				clp->lc_flags |= LCL_STAMPEDSTABLE;
3288				len = clp->lc_idlen;
3289				NFSBCOPY(clp->lc_id, clidp, len);
3290				gotstate = 1;
3291			}
3292		} else {
3293			*rflagsp |= NFSV4OPEN_RESULTCONFIRM;
3294			new_stp->ls_flags = NFSLCK_NEEDSCONFIRM;
3295		}
3296		nfsrvd_refcache(new_stp->ls_op);
3297		new_stp->ls_noopens = 0;
3298		LIST_INIT(&new_stp->ls_open);
3299		LIST_INSERT_HEAD(&new_stp->ls_open, new_open, ls_list);
3300		LIST_INSERT_HEAD(&clp->lc_open, new_stp, ls_list);
3301		LIST_INSERT_HEAD(NFSSTATEHASH(clp, new_open->ls_stateid),
3302		    new_open, ls_hash);
3303		openstp = new_open;
3304		new_open = NULL;
3305		*new_stpp = NULL;
3306		nfsstatsv1.srvopens++;
3307		nfsrv_openpluslock++;
3308		nfsstatsv1.srvopenowners++;
3309		nfsrv_openpluslock++;
3310	}
3311	if (!error) {
3312		stateidp->seqid = openstp->ls_stateid.seqid;
3313		stateidp->other[0] = openstp->ls_stateid.other[0];
3314		stateidp->other[1] = openstp->ls_stateid.other[1];
3315		stateidp->other[2] = openstp->ls_stateid.other[2];
3316	}
3317	NFSUNLOCKSTATE();
3318	if (haslock) {
3319		NFSLOCKV4ROOTMUTEX();
3320		nfsv4_unlock(&nfsv4rootfs_lock, 1);
3321		NFSUNLOCKV4ROOTMUTEX();
3322	}
3323	if (new_open)
3324		FREE((caddr_t)new_open, M_NFSDSTATE);
3325	if (new_deleg)
3326		FREE((caddr_t)new_deleg, M_NFSDSTATE);
3327
3328	/*
3329	 * If the NFSv4.1 client just acquired its first open, write a timestamp
3330	 * to the stable storage file.
3331	 */
3332	if (gotstate != 0) {
3333		nfsrv_writestable(clidp, len, NFSNST_NEWSTATE, p);
3334		nfsrv_backupstable();
3335	}
3336
3337out:
3338	free(clidp, M_TEMP);
3339	NFSEXITCODE2(error, nd);
3340	return (error);
3341}
3342
3343/*
3344 * Open update. Does the confirm, downgrade and close.
3345 */
3346int
3347nfsrv_openupdate(vnode_t vp, struct nfsstate *new_stp, nfsquad_t clientid,
3348    nfsv4stateid_t *stateidp, struct nfsrv_descript *nd, NFSPROC_T *p)
3349{
3350	struct nfsstate *stp, *ownerstp;
3351	struct nfsclient *clp;
3352	struct nfslockfile *lfp;
3353	u_int32_t bits;
3354	int error = 0, gotstate = 0, len = 0;
3355	u_char *clidp = NULL;
3356
3357	/*
3358	 * Check for restart conditions (client and server).
3359	 */
3360	error = nfsrv_checkrestart(clientid, new_stp->ls_flags,
3361	    &new_stp->ls_stateid, 0);
3362	if (error)
3363		goto out;
3364
3365	clidp = malloc(NFSV4_OPAQUELIMIT, M_TEMP, M_WAITOK);
3366	NFSLOCKSTATE();
3367	/*
3368	 * Get the open structure via clientid and stateid.
3369	 */
3370	error = nfsrv_getclient(clientid, CLOPS_RENEW, &clp, NULL,
3371	    (nfsquad_t)((u_quad_t)0), 0, nd, p);
3372	if (!error)
3373		error = nfsrv_getstate(clp, &new_stp->ls_stateid,
3374		    new_stp->ls_flags, &stp);
3375
3376	/*
3377	 * Sanity check the open.
3378	 */
3379	if (!error && (!(stp->ls_flags & NFSLCK_OPEN) ||
3380		(!(new_stp->ls_flags & NFSLCK_CONFIRM) &&
3381		 (stp->ls_openowner->ls_flags & NFSLCK_NEEDSCONFIRM)) ||
3382		((new_stp->ls_flags & NFSLCK_CONFIRM) &&
3383		 (!(stp->ls_openowner->ls_flags & NFSLCK_NEEDSCONFIRM)))))
3384		error = NFSERR_BADSTATEID;
3385
3386	if (!error)
3387		error = nfsrv_checkseqid(nd, new_stp->ls_seq,
3388		    stp->ls_openowner, new_stp->ls_op);
3389	if (!error && stp->ls_stateid.seqid != new_stp->ls_stateid.seqid &&
3390	    (((nd->nd_flag & ND_NFSV41) == 0 &&
3391	      !(new_stp->ls_flags & NFSLCK_CONFIRM)) ||
3392	     ((nd->nd_flag & ND_NFSV41) != 0 &&
3393	      new_stp->ls_stateid.seqid != 0)))
3394		error = NFSERR_OLDSTATEID;
3395	if (!error && vnode_vtype(vp) != VREG) {
3396		if (vnode_vtype(vp) == VDIR)
3397			error = NFSERR_ISDIR;
3398		else
3399			error = NFSERR_INVAL;
3400	}
3401
3402	if (error) {
3403		/*
3404		 * If a client tries to confirm an Open with a bad
3405		 * seqid# and there are no byte range locks or other Opens
3406		 * on the openowner, just throw it away, so the next use of the
3407		 * openowner will start a fresh seq#.
3408		 */
3409		if (error == NFSERR_BADSEQID &&
3410		    (new_stp->ls_flags & NFSLCK_CONFIRM) &&
3411		    nfsrv_nootherstate(stp))
3412			nfsrv_freeopenowner(stp->ls_openowner, 0, p);
3413		NFSUNLOCKSTATE();
3414		goto out;
3415	}
3416
3417	/*
3418	 * Set the return stateid.
3419	 */
3420	stateidp->seqid = stp->ls_stateid.seqid + 1;
3421	if ((nd->nd_flag & ND_NFSV41) != 0 && stateidp->seqid == 0)
3422		stateidp->seqid = 1;
3423	stateidp->other[0] = stp->ls_stateid.other[0];
3424	stateidp->other[1] = stp->ls_stateid.other[1];
3425	stateidp->other[2] = stp->ls_stateid.other[2];
3426	/*
3427	 * Now, handle the three cases.
3428	 */
3429	if (new_stp->ls_flags & NFSLCK_CONFIRM) {
3430		/*
3431		 * If the open doesn't need confirmation, it seems to me that
3432		 * there is a client error, but I'll just log it and keep going?
3433		 */
3434		if (!(stp->ls_openowner->ls_flags & NFSLCK_NEEDSCONFIRM))
3435			printf("Nfsv4d: stray open confirm\n");
3436		stp->ls_openowner->ls_flags = 0;
3437		stp->ls_stateid.seqid++;
3438		if ((nd->nd_flag & ND_NFSV41) != 0 &&
3439		    stp->ls_stateid.seqid == 0)
3440			stp->ls_stateid.seqid = 1;
3441		if (!(clp->lc_flags & LCL_STAMPEDSTABLE)) {
3442			clp->lc_flags |= LCL_STAMPEDSTABLE;
3443			len = clp->lc_idlen;
3444			NFSBCOPY(clp->lc_id, clidp, len);
3445			gotstate = 1;
3446		}
3447		NFSUNLOCKSTATE();
3448	} else if (new_stp->ls_flags & NFSLCK_CLOSE) {
3449		ownerstp = stp->ls_openowner;
3450		lfp = stp->ls_lfp;
3451		if (nfsrv_dolocallocks != 0 && !LIST_EMPTY(&stp->ls_open)) {
3452			/* Get the lf lock */
3453			nfsrv_locklf(lfp);
3454			NFSUNLOCKSTATE();
3455			ASSERT_VOP_ELOCKED(vp, "nfsrv_openupdate");
3456			NFSVOPUNLOCK(vp, 0);
3457			if (nfsrv_freeopen(stp, vp, 1, p) == 0) {
3458				NFSLOCKSTATE();
3459				nfsrv_unlocklf(lfp);
3460				NFSUNLOCKSTATE();
3461			}
3462			NFSVOPLOCK(vp, LK_EXCLUSIVE | LK_RETRY);
3463		} else {
3464			(void) nfsrv_freeopen(stp, NULL, 0, p);
3465			NFSUNLOCKSTATE();
3466		}
3467	} else {
3468		/*
3469		 * Update the share bits, making sure that the new set are a
3470		 * subset of the old ones.
3471		 */
3472		bits = (new_stp->ls_flags & NFSLCK_SHAREBITS);
3473		if (~(stp->ls_flags) & bits) {
3474			NFSUNLOCKSTATE();
3475			error = NFSERR_INVAL;
3476			goto out;
3477		}
3478		stp->ls_flags = (bits | NFSLCK_OPEN);
3479		stp->ls_stateid.seqid++;
3480		if ((nd->nd_flag & ND_NFSV41) != 0 &&
3481		    stp->ls_stateid.seqid == 0)
3482			stp->ls_stateid.seqid = 1;
3483		NFSUNLOCKSTATE();
3484	}
3485
3486	/*
3487	 * If the client just confirmed its first open, write a timestamp
3488	 * to the stable storage file.
3489	 */
3490	if (gotstate != 0) {
3491		nfsrv_writestable(clidp, len, NFSNST_NEWSTATE, p);
3492		nfsrv_backupstable();
3493	}
3494
3495out:
3496	free(clidp, M_TEMP);
3497	NFSEXITCODE2(error, nd);
3498	return (error);
3499}
3500
3501/*
3502 * Delegation update. Does the purge and return.
3503 */
3504int
3505nfsrv_delegupdate(struct nfsrv_descript *nd, nfsquad_t clientid,
3506    nfsv4stateid_t *stateidp, vnode_t vp, int op, struct ucred *cred,
3507    NFSPROC_T *p)
3508{
3509	struct nfsstate *stp;
3510	struct nfsclient *clp;
3511	int error = 0;
3512	fhandle_t fh;
3513
3514	/*
3515	 * Do a sanity check against the file handle for DelegReturn.
3516	 */
3517	if (vp) {
3518		error = nfsvno_getfh(vp, &fh, p);
3519		if (error)
3520			goto out;
3521	}
3522	/*
3523	 * Check for restart conditions (client and server).
3524	 */
3525	if (op == NFSV4OP_DELEGRETURN)
3526		error = nfsrv_checkrestart(clientid, NFSLCK_DELEGRETURN,
3527			stateidp, 0);
3528	else
3529		error = nfsrv_checkrestart(clientid, NFSLCK_DELEGPURGE,
3530			stateidp, 0);
3531
3532	NFSLOCKSTATE();
3533	/*
3534	 * Get the open structure via clientid and stateid.
3535	 */
3536	if (!error)
3537	    error = nfsrv_getclient(clientid, CLOPS_RENEW, &clp, NULL,
3538		(nfsquad_t)((u_quad_t)0), 0, nd, p);
3539	if (error) {
3540		if (error == NFSERR_CBPATHDOWN)
3541			error = 0;
3542		if (error == NFSERR_STALECLIENTID && op == NFSV4OP_DELEGRETURN)
3543			error = NFSERR_STALESTATEID;
3544	}
3545	if (!error && op == NFSV4OP_DELEGRETURN) {
3546	    error = nfsrv_getstate(clp, stateidp, NFSLCK_DELEGRETURN, &stp);
3547	    if (!error && stp->ls_stateid.seqid != stateidp->seqid &&
3548		((nd->nd_flag & ND_NFSV41) == 0 || stateidp->seqid != 0))
3549		error = NFSERR_OLDSTATEID;
3550	}
3551	/*
3552	 * NFSERR_EXPIRED means that the state has gone away,
3553	 * so Delegations have been purged. Just return ok.
3554	 */
3555	if (error == NFSERR_EXPIRED && op == NFSV4OP_DELEGPURGE) {
3556		NFSUNLOCKSTATE();
3557		error = 0;
3558		goto out;
3559	}
3560	if (error) {
3561		NFSUNLOCKSTATE();
3562		goto out;
3563	}
3564
3565	if (op == NFSV4OP_DELEGRETURN) {
3566		if (NFSBCMP((caddr_t)&fh, (caddr_t)&stp->ls_lfp->lf_fh,
3567		    sizeof (fhandle_t))) {
3568			NFSUNLOCKSTATE();
3569			error = NFSERR_BADSTATEID;
3570			goto out;
3571		}
3572		nfsrv_freedeleg(stp);
3573	} else {
3574		nfsrv_freedeleglist(&clp->lc_olddeleg);
3575	}
3576	NFSUNLOCKSTATE();
3577	error = 0;
3578
3579out:
3580	NFSEXITCODE(error);
3581	return (error);
3582}
3583
3584/*
3585 * Release lock owner.
3586 */
3587int
3588nfsrv_releaselckown(struct nfsstate *new_stp, nfsquad_t clientid,
3589    NFSPROC_T *p)
3590{
3591	struct nfsstate *stp, *nstp, *openstp, *ownstp;
3592	struct nfsclient *clp;
3593	int error = 0;
3594
3595	/*
3596	 * Check for restart conditions (client and server).
3597	 */
3598	error = nfsrv_checkrestart(clientid, new_stp->ls_flags,
3599	    &new_stp->ls_stateid, 0);
3600	if (error)
3601		goto out;
3602
3603	NFSLOCKSTATE();
3604	/*
3605	 * Get the lock owner by name.
3606	 */
3607	error = nfsrv_getclient(clientid, CLOPS_RENEW, &clp, NULL,
3608	    (nfsquad_t)((u_quad_t)0), 0, NULL, p);
3609	if (error) {
3610		NFSUNLOCKSTATE();
3611		goto out;
3612	}
3613	LIST_FOREACH(ownstp, &clp->lc_open, ls_list) {
3614	    LIST_FOREACH(openstp, &ownstp->ls_open, ls_list) {
3615		stp = LIST_FIRST(&openstp->ls_open);
3616		while (stp != LIST_END(&openstp->ls_open)) {
3617		    nstp = LIST_NEXT(stp, ls_list);
3618		    /*
3619		     * If the owner matches, check for locks and
3620		     * then free or return an error.
3621		     */
3622		    if (stp->ls_ownerlen == new_stp->ls_ownerlen &&
3623			!NFSBCMP(stp->ls_owner, new_stp->ls_owner,
3624			 stp->ls_ownerlen)){
3625			if (LIST_EMPTY(&stp->ls_lock)) {
3626			    nfsrv_freelockowner(stp, NULL, 0, p);
3627			} else {
3628			    NFSUNLOCKSTATE();
3629			    error = NFSERR_LOCKSHELD;
3630			    goto out;
3631			}
3632		    }
3633		    stp = nstp;
3634		}
3635	    }
3636	}
3637	NFSUNLOCKSTATE();
3638
3639out:
3640	NFSEXITCODE(error);
3641	return (error);
3642}
3643
3644/*
3645 * Get the file handle for a lock structure.
3646 */
3647static int
3648nfsrv_getlockfh(vnode_t vp, u_short flags, struct nfslockfile *new_lfp,
3649    fhandle_t *nfhp, NFSPROC_T *p)
3650{
3651	fhandle_t *fhp = NULL;
3652	int error;
3653
3654	/*
3655	 * For lock, use the new nfslock structure, otherwise just
3656	 * a fhandle_t on the stack.
3657	 */
3658	if (flags & NFSLCK_OPEN) {
3659		KASSERT(new_lfp != NULL, ("nfsrv_getlockfh: new_lfp NULL"));
3660		fhp = &new_lfp->lf_fh;
3661	} else if (nfhp) {
3662		fhp = nfhp;
3663	} else {
3664		panic("nfsrv_getlockfh");
3665	}
3666	error = nfsvno_getfh(vp, fhp, p);
3667	NFSEXITCODE(error);
3668	return (error);
3669}
3670
3671/*
3672 * Get an nfs lock structure. Allocate one, as required, and return a
3673 * pointer to it.
3674 * Returns an NFSERR_xxx upon failure or -1 to indicate no current lock.
3675 */
3676static int
3677nfsrv_getlockfile(u_short flags, struct nfslockfile **new_lfpp,
3678    struct nfslockfile **lfpp, fhandle_t *nfhp, int lockit)
3679{
3680	struct nfslockfile *lfp;
3681	fhandle_t *fhp = NULL, *tfhp;
3682	struct nfslockhashhead *hp;
3683	struct nfslockfile *new_lfp = NULL;
3684
3685	/*
3686	 * For lock, use the new nfslock structure, otherwise just
3687	 * a fhandle_t on the stack.
3688	 */
3689	if (flags & NFSLCK_OPEN) {
3690		new_lfp = *new_lfpp;
3691		fhp = &new_lfp->lf_fh;
3692	} else if (nfhp) {
3693		fhp = nfhp;
3694	} else {
3695		panic("nfsrv_getlockfile");
3696	}
3697
3698	hp = NFSLOCKHASH(fhp);
3699	LIST_FOREACH(lfp, hp, lf_hash) {
3700		tfhp = &lfp->lf_fh;
3701		if (NFSVNO_CMPFH(fhp, tfhp)) {
3702			if (lockit)
3703				nfsrv_locklf(lfp);
3704			*lfpp = lfp;
3705			return (0);
3706		}
3707	}
3708	if (!(flags & NFSLCK_OPEN))
3709		return (-1);
3710
3711	/*
3712	 * No match, so chain the new one into the list.
3713	 */
3714	LIST_INIT(&new_lfp->lf_open);
3715	LIST_INIT(&new_lfp->lf_lock);
3716	LIST_INIT(&new_lfp->lf_deleg);
3717	LIST_INIT(&new_lfp->lf_locallock);
3718	LIST_INIT(&new_lfp->lf_rollback);
3719	new_lfp->lf_locallock_lck.nfslock_usecnt = 0;
3720	new_lfp->lf_locallock_lck.nfslock_lock = 0;
3721	new_lfp->lf_usecount = 0;
3722	LIST_INSERT_HEAD(hp, new_lfp, lf_hash);
3723	*lfpp = new_lfp;
3724	*new_lfpp = NULL;
3725	return (0);
3726}
3727
3728/*
3729 * This function adds a nfslock lock structure to the list for the associated
3730 * nfsstate and nfslockfile structures. It will be inserted after the
3731 * entry pointed at by insert_lop.
3732 */
3733static void
3734nfsrv_insertlock(struct nfslock *new_lop, struct nfslock *insert_lop,
3735    struct nfsstate *stp, struct nfslockfile *lfp)
3736{
3737	struct nfslock *lop, *nlop;
3738
3739	new_lop->lo_stp = stp;
3740	new_lop->lo_lfp = lfp;
3741
3742	if (stp != NULL) {
3743		/* Insert in increasing lo_first order */
3744		lop = LIST_FIRST(&lfp->lf_lock);
3745		if (lop == LIST_END(&lfp->lf_lock) ||
3746		    new_lop->lo_first <= lop->lo_first) {
3747			LIST_INSERT_HEAD(&lfp->lf_lock, new_lop, lo_lckfile);
3748		} else {
3749			nlop = LIST_NEXT(lop, lo_lckfile);
3750			while (nlop != LIST_END(&lfp->lf_lock) &&
3751			       nlop->lo_first < new_lop->lo_first) {
3752				lop = nlop;
3753				nlop = LIST_NEXT(lop, lo_lckfile);
3754			}
3755			LIST_INSERT_AFTER(lop, new_lop, lo_lckfile);
3756		}
3757	} else {
3758		new_lop->lo_lckfile.le_prev = NULL;	/* list not used */
3759	}
3760
3761	/*
3762	 * Insert after insert_lop, which is overloaded as stp or lfp for
3763	 * an empty list.
3764	 */
3765	if (stp == NULL && (struct nfslockfile *)insert_lop == lfp)
3766		LIST_INSERT_HEAD(&lfp->lf_locallock, new_lop, lo_lckowner);
3767	else if ((struct nfsstate *)insert_lop == stp)
3768		LIST_INSERT_HEAD(&stp->ls_lock, new_lop, lo_lckowner);
3769	else
3770		LIST_INSERT_AFTER(insert_lop, new_lop, lo_lckowner);
3771	if (stp != NULL) {
3772		nfsstatsv1.srvlocks++;
3773		nfsrv_openpluslock++;
3774	}
3775}
3776
3777/*
3778 * This function updates the locking for a lock owner and given file. It
3779 * maintains a list of lock ranges ordered on increasing file offset that
3780 * are NFSLCK_READ or NFSLCK_WRITE and non-overlapping (aka POSIX style).
3781 * It always adds new_lop to the list and sometimes uses the one pointed
3782 * at by other_lopp.
3783 */
3784static void
3785nfsrv_updatelock(struct nfsstate *stp, struct nfslock **new_lopp,
3786    struct nfslock **other_lopp, struct nfslockfile *lfp)
3787{
3788	struct nfslock *new_lop = *new_lopp;
3789	struct nfslock *lop, *tlop, *ilop;
3790	struct nfslock *other_lop = *other_lopp;
3791	int unlock = 0, myfile = 0;
3792	u_int64_t tmp;
3793
3794	/*
3795	 * Work down the list until the lock is merged.
3796	 */
3797	if (new_lop->lo_flags & NFSLCK_UNLOCK)
3798		unlock = 1;
3799	if (stp != NULL) {
3800		ilop = (struct nfslock *)stp;
3801		lop = LIST_FIRST(&stp->ls_lock);
3802	} else {
3803		ilop = (struct nfslock *)lfp;
3804		lop = LIST_FIRST(&lfp->lf_locallock);
3805	}
3806	while (lop != NULL) {
3807	    /*
3808	     * Only check locks for this file that aren't before the start of
3809	     * new lock's range.
3810	     */
3811	    if (lop->lo_lfp == lfp) {
3812	      myfile = 1;
3813	      if (lop->lo_end >= new_lop->lo_first) {
3814		if (new_lop->lo_end < lop->lo_first) {
3815			/*
3816			 * If the new lock ends before the start of the
3817			 * current lock's range, no merge, just insert
3818			 * the new lock.
3819			 */
3820			break;
3821		}
3822		if (new_lop->lo_flags == lop->lo_flags ||
3823		    (new_lop->lo_first <= lop->lo_first &&
3824		     new_lop->lo_end >= lop->lo_end)) {
3825			/*
3826			 * This lock can be absorbed by the new lock/unlock.
3827			 * This happens when it covers the entire range
3828			 * of the old lock or is contiguous
3829			 * with the old lock and is of the same type or an
3830			 * unlock.
3831			 */
3832			if (lop->lo_first < new_lop->lo_first)
3833				new_lop->lo_first = lop->lo_first;
3834			if (lop->lo_end > new_lop->lo_end)
3835				new_lop->lo_end = lop->lo_end;
3836			tlop = lop;
3837			lop = LIST_NEXT(lop, lo_lckowner);
3838			nfsrv_freenfslock(tlop);
3839			continue;
3840		}
3841
3842		/*
3843		 * All these cases are for contiguous locks that are not the
3844		 * same type, so they can't be merged.
3845		 */
3846		if (new_lop->lo_first <= lop->lo_first) {
3847			/*
3848			 * This case is where the new lock overlaps with the
3849			 * first part of the old lock. Move the start of the
3850			 * old lock to just past the end of the new lock. The
3851			 * new lock will be inserted in front of the old, since
3852			 * ilop hasn't been updated. (We are done now.)
3853			 */
3854			lop->lo_first = new_lop->lo_end;
3855			break;
3856		}
3857		if (new_lop->lo_end >= lop->lo_end) {
3858			/*
3859			 * This case is where the new lock overlaps with the
3860			 * end of the old lock's range. Move the old lock's
3861			 * end to just before the new lock's first and insert
3862			 * the new lock after the old lock.
3863			 * Might not be done yet, since the new lock could
3864			 * overlap further locks with higher ranges.
3865			 */
3866			lop->lo_end = new_lop->lo_first;
3867			ilop = lop;
3868			lop = LIST_NEXT(lop, lo_lckowner);
3869			continue;
3870		}
3871		/*
3872		 * The final case is where the new lock's range is in the
3873		 * middle of the current lock's and splits the current lock
3874		 * up. Use *other_lopp to handle the second part of the
3875		 * split old lock range. (We are done now.)
3876		 * For unlock, we use new_lop as other_lop and tmp, since
3877		 * other_lop and new_lop are the same for this case.
3878		 * We noted the unlock case above, so we don't need
3879		 * new_lop->lo_flags any longer.
3880		 */
3881		tmp = new_lop->lo_first;
3882		if (other_lop == NULL) {
3883			if (!unlock)
3884				panic("nfsd srv update unlock");
3885			other_lop = new_lop;
3886			*new_lopp = NULL;
3887		}
3888		other_lop->lo_first = new_lop->lo_end;
3889		other_lop->lo_end = lop->lo_end;
3890		other_lop->lo_flags = lop->lo_flags;
3891		other_lop->lo_stp = stp;
3892		other_lop->lo_lfp = lfp;
3893		lop->lo_end = tmp;
3894		nfsrv_insertlock(other_lop, lop, stp, lfp);
3895		*other_lopp = NULL;
3896		ilop = lop;
3897		break;
3898	      }
3899	    }
3900	    ilop = lop;
3901	    lop = LIST_NEXT(lop, lo_lckowner);
3902	    if (myfile && (lop == NULL || lop->lo_lfp != lfp))
3903		break;
3904	}
3905
3906	/*
3907	 * Insert the new lock in the list at the appropriate place.
3908	 */
3909	if (!unlock) {
3910		nfsrv_insertlock(new_lop, ilop, stp, lfp);
3911		*new_lopp = NULL;
3912	}
3913}
3914
3915/*
3916 * This function handles sequencing of locks, etc.
3917 * It returns an error that indicates what the caller should do.
3918 */
3919static int
3920nfsrv_checkseqid(struct nfsrv_descript *nd, u_int32_t seqid,
3921    struct nfsstate *stp, struct nfsrvcache *op)
3922{
3923	int error = 0;
3924
3925	if ((nd->nd_flag & ND_NFSV41) != 0)
3926		/* NFSv4.1 ignores the open_seqid and lock_seqid. */
3927		goto out;
3928	if (op != nd->nd_rp)
3929		panic("nfsrvstate checkseqid");
3930	if (!(op->rc_flag & RC_INPROG))
3931		panic("nfsrvstate not inprog");
3932	if (stp->ls_op && stp->ls_op->rc_refcnt <= 0) {
3933		printf("refcnt=%d\n", stp->ls_op->rc_refcnt);
3934		panic("nfsrvstate op refcnt");
3935	}
3936	if ((stp->ls_seq + 1) == seqid) {
3937		if (stp->ls_op)
3938			nfsrvd_derefcache(stp->ls_op);
3939		stp->ls_op = op;
3940		nfsrvd_refcache(op);
3941		stp->ls_seq = seqid;
3942		goto out;
3943	} else if (stp->ls_seq == seqid && stp->ls_op &&
3944		op->rc_xid == stp->ls_op->rc_xid &&
3945		op->rc_refcnt == 0 &&
3946		op->rc_reqlen == stp->ls_op->rc_reqlen &&
3947		op->rc_cksum == stp->ls_op->rc_cksum) {
3948		if (stp->ls_op->rc_flag & RC_INPROG) {
3949			error = NFSERR_DONTREPLY;
3950			goto out;
3951		}
3952		nd->nd_rp = stp->ls_op;
3953		nd->nd_rp->rc_flag |= RC_INPROG;
3954		nfsrvd_delcache(op);
3955		error = NFSERR_REPLYFROMCACHE;
3956		goto out;
3957	}
3958	error = NFSERR_BADSEQID;
3959
3960out:
3961	NFSEXITCODE2(error, nd);
3962	return (error);
3963}
3964
3965/*
3966 * Get the client ip address for callbacks. If the strings can't be parsed,
3967 * just set lc_program to 0 to indicate no callbacks are possible.
3968 * (For cases where the address can't be parsed or is 0.0.0.0.0.0, set
3969 *  the address to the client's transport address. This won't be used
3970 *  for callbacks, but can be printed out by nfsstats for info.)
3971 * Return error if the xdr can't be parsed, 0 otherwise.
3972 */
3973int
3974nfsrv_getclientipaddr(struct nfsrv_descript *nd, struct nfsclient *clp)
3975{
3976	u_int32_t *tl;
3977	u_char *cp, *cp2;
3978	int i, j, maxalen = 0, minalen = 0;
3979	sa_family_t af;
3980#ifdef INET
3981	struct sockaddr_in *rin, *sin;
3982#endif
3983#ifdef INET6
3984	struct sockaddr_in6 *rin6, *sin6;
3985#endif
3986	u_char *addr;
3987	int error = 0, cantparse = 0;
3988	union {
3989		in_addr_t ival;
3990		u_char cval[4];
3991	} ip;
3992	union {
3993		in_port_t sval;
3994		u_char cval[2];
3995	} port;
3996
3997	/* 8 is the maximum length of the port# string. */
3998	addr = malloc(INET6_ADDRSTRLEN + 8, M_TEMP, M_WAITOK);
3999	clp->lc_req.nr_client = NULL;
4000	clp->lc_req.nr_lock = 0;
4001	af = AF_UNSPEC;
4002	NFSM_DISSECT(tl, u_int32_t *, NFSX_UNSIGNED);
4003	i = fxdr_unsigned(int, *tl);
4004	if (i >= 3 && i <= 4) {
4005		error = nfsrv_mtostr(nd, addr, i);
4006		if (error)
4007			goto nfsmout;
4008#ifdef INET
4009		if (!strcmp(addr, "tcp")) {
4010			clp->lc_flags |= LCL_TCPCALLBACK;
4011			clp->lc_req.nr_sotype = SOCK_STREAM;
4012			clp->lc_req.nr_soproto = IPPROTO_TCP;
4013			af = AF_INET;
4014		} else if (!strcmp(addr, "udp")) {
4015			clp->lc_req.nr_sotype = SOCK_DGRAM;
4016			clp->lc_req.nr_soproto = IPPROTO_UDP;
4017			af = AF_INET;
4018		}
4019#endif
4020#ifdef INET6
4021		if (af == AF_UNSPEC) {
4022			if (!strcmp(addr, "tcp6")) {
4023				clp->lc_flags |= LCL_TCPCALLBACK;
4024				clp->lc_req.nr_sotype = SOCK_STREAM;
4025				clp->lc_req.nr_soproto = IPPROTO_TCP;
4026				af = AF_INET6;
4027			} else if (!strcmp(addr, "udp6")) {
4028				clp->lc_req.nr_sotype = SOCK_DGRAM;
4029				clp->lc_req.nr_soproto = IPPROTO_UDP;
4030				af = AF_INET6;
4031			}
4032		}
4033#endif
4034		if (af == AF_UNSPEC) {
4035			cantparse = 1;
4036		}
4037	} else {
4038		cantparse = 1;
4039		if (i > 0) {
4040			error = nfsm_advance(nd, NFSM_RNDUP(i), -1);
4041			if (error)
4042				goto nfsmout;
4043		}
4044	}
4045	/*
4046	 * The caller has allocated clp->lc_req.nr_nam to be large enough
4047	 * for either AF_INET or AF_INET6 and zeroed out the contents.
4048	 * maxalen is set to the maximum length of the host IP address string
4049	 * plus 8 for the maximum length of the port#.
4050	 * minalen is set to the minimum length of the host IP address string
4051	 * plus 4 for the minimum length of the port#.
4052	 * These lengths do not include NULL termination,
4053	 * so INET[6]_ADDRSTRLEN - 1 is used in the calculations.
4054	 */
4055	switch (af) {
4056#ifdef INET
4057	case AF_INET:
4058		rin = (struct sockaddr_in *)clp->lc_req.nr_nam;
4059		rin->sin_family = AF_INET;
4060		rin->sin_len = sizeof(struct sockaddr_in);
4061		maxalen = INET_ADDRSTRLEN - 1 + 8;
4062		minalen = 7 + 4;
4063		break;
4064#endif
4065#ifdef INET6
4066	case AF_INET6:
4067		rin6 = (struct sockaddr_in6 *)clp->lc_req.nr_nam;
4068		rin6->sin6_family = AF_INET6;
4069		rin6->sin6_len = sizeof(struct sockaddr_in6);
4070		maxalen = INET6_ADDRSTRLEN - 1 + 8;
4071		minalen = 3 + 4;
4072		break;
4073#endif
4074	}
4075	NFSM_DISSECT(tl, u_int32_t *, NFSX_UNSIGNED);
4076	i = fxdr_unsigned(int, *tl);
4077	if (i < 0) {
4078		error = NFSERR_BADXDR;
4079		goto nfsmout;
4080	} else if (i == 0) {
4081		cantparse = 1;
4082	} else if (!cantparse && i <= maxalen && i >= minalen) {
4083		error = nfsrv_mtostr(nd, addr, i);
4084		if (error)
4085			goto nfsmout;
4086
4087		/*
4088		 * Parse out the address fields. We expect 6 decimal numbers
4089		 * separated by '.'s for AF_INET and two decimal numbers
4090		 * preceeded by '.'s for AF_INET6.
4091		 */
4092		cp = NULL;
4093		switch (af) {
4094#ifdef INET6
4095		/*
4096		 * For AF_INET6, first parse the host address.
4097		 */
4098		case AF_INET6:
4099			cp = strchr(addr, '.');
4100			if (cp != NULL) {
4101				*cp++ = '\0';
4102				if (inet_pton(af, addr, &rin6->sin6_addr) == 1)
4103					i = 4;
4104				else {
4105					cp = NULL;
4106					cantparse = 1;
4107				}
4108			}
4109			break;
4110#endif
4111#ifdef INET
4112		case AF_INET:
4113			cp = addr;
4114			i = 0;
4115			break;
4116#endif
4117		}
4118		while (cp != NULL && *cp && i < 6) {
4119			cp2 = cp;
4120			while (*cp2 && *cp2 != '.')
4121				cp2++;
4122			if (*cp2)
4123				*cp2++ = '\0';
4124			else if (i != 5) {
4125				cantparse = 1;
4126				break;
4127			}
4128			j = nfsrv_getipnumber(cp);
4129			if (j >= 0) {
4130				if (i < 4)
4131					ip.cval[3 - i] = j;
4132				else
4133					port.cval[5 - i] = j;
4134			} else {
4135				cantparse = 1;
4136				break;
4137			}
4138			cp = cp2;
4139			i++;
4140		}
4141		if (!cantparse) {
4142			/*
4143			 * The host address INADDR_ANY is (mis)used to indicate
4144			 * "there is no valid callback address".
4145			 */
4146			switch (af) {
4147#ifdef INET6
4148			case AF_INET6:
4149				if (!IN6_ARE_ADDR_EQUAL(&rin6->sin6_addr,
4150				    &in6addr_any))
4151					rin6->sin6_port = htons(port.sval);
4152				else
4153					cantparse = 1;
4154				break;
4155#endif
4156#ifdef INET
4157			case AF_INET:
4158				if (ip.ival != INADDR_ANY) {
4159					rin->sin_addr.s_addr = htonl(ip.ival);
4160					rin->sin_port = htons(port.sval);
4161				} else {
4162					cantparse = 1;
4163				}
4164				break;
4165#endif
4166			}
4167		}
4168	} else {
4169		cantparse = 1;
4170		if (i > 0) {
4171			error = nfsm_advance(nd, NFSM_RNDUP(i), -1);
4172			if (error)
4173				goto nfsmout;
4174		}
4175	}
4176	if (cantparse) {
4177		switch (nd->nd_nam->sa_family) {
4178#ifdef INET
4179		case AF_INET:
4180			sin = (struct sockaddr_in *)nd->nd_nam;
4181			rin = (struct sockaddr_in *)clp->lc_req.nr_nam;
4182			rin->sin_family = AF_INET;
4183			rin->sin_len = sizeof(struct sockaddr_in);
4184			rin->sin_addr.s_addr = sin->sin_addr.s_addr;
4185			rin->sin_port = 0x0;
4186			break;
4187#endif
4188#ifdef INET6
4189		case AF_INET6:
4190			sin6 = (struct sockaddr_in6 *)nd->nd_nam;
4191			rin6 = (struct sockaddr_in6 *)clp->lc_req.nr_nam;
4192			rin6->sin6_family = AF_INET6;
4193			rin6->sin6_len = sizeof(struct sockaddr_in6);
4194			rin6->sin6_addr = sin6->sin6_addr;
4195			rin6->sin6_port = 0x0;
4196			break;
4197#endif
4198		}
4199		clp->lc_program = 0;
4200	}
4201nfsmout:
4202	free(addr, M_TEMP);
4203	NFSEXITCODE2(error, nd);
4204	return (error);
4205}
4206
4207/*
4208 * Turn a string of up to three decimal digits into a number. Return -1 upon
4209 * error.
4210 */
4211static int
4212nfsrv_getipnumber(u_char *cp)
4213{
4214	int i = 0, j = 0;
4215
4216	while (*cp) {
4217		if (j > 2 || *cp < '0' || *cp > '9')
4218			return (-1);
4219		i *= 10;
4220		i += (*cp - '0');
4221		cp++;
4222		j++;
4223	}
4224	if (i < 256)
4225		return (i);
4226	return (-1);
4227}
4228
4229/*
4230 * This function checks for restart conditions.
4231 */
4232static int
4233nfsrv_checkrestart(nfsquad_t clientid, u_int32_t flags,
4234    nfsv4stateid_t *stateidp, int specialid)
4235{
4236	int ret = 0;
4237
4238	/*
4239	 * First check for a server restart. Open, LockT, ReleaseLockOwner
4240	 * and DelegPurge have a clientid, the rest a stateid.
4241	 */
4242	if (flags &
4243	    (NFSLCK_OPEN | NFSLCK_TEST | NFSLCK_RELEASE | NFSLCK_DELEGPURGE)) {
4244		if (clientid.lval[0] != nfsrvboottime) {
4245			ret = NFSERR_STALECLIENTID;
4246			goto out;
4247		}
4248	} else if (stateidp->other[0] != nfsrvboottime &&
4249		specialid == 0) {
4250		ret = NFSERR_STALESTATEID;
4251		goto out;
4252	}
4253
4254	/*
4255	 * Read, Write, Setattr and LockT can return NFSERR_GRACE and do
4256	 * not use a lock/open owner seqid#, so the check can be done now.
4257	 * (The others will be checked, as required, later.)
4258	 */
4259	if (!(flags & (NFSLCK_CHECK | NFSLCK_TEST)))
4260		goto out;
4261
4262	NFSLOCKSTATE();
4263	ret = nfsrv_checkgrace(NULL, NULL, flags);
4264	NFSUNLOCKSTATE();
4265
4266out:
4267	NFSEXITCODE(ret);
4268	return (ret);
4269}
4270
4271/*
4272 * Check for grace.
4273 */
4274static int
4275nfsrv_checkgrace(struct nfsrv_descript *nd, struct nfsclient *clp,
4276    u_int32_t flags)
4277{
4278	int error = 0, notreclaimed;
4279	struct nfsrv_stable *sp;
4280
4281	if ((nfsrv_stablefirst.nsf_flags & (NFSNSF_UPDATEDONE |
4282	     NFSNSF_GRACEOVER)) == 0) {
4283		/*
4284		 * First, check to see if all of the clients have done a
4285		 * ReclaimComplete.  If so, grace can end now.
4286		 */
4287		notreclaimed = 0;
4288		LIST_FOREACH(sp, &nfsrv_stablefirst.nsf_head, nst_list) {
4289			if ((sp->nst_flag & NFSNST_RECLAIMED) == 0) {
4290				notreclaimed = 1;
4291				break;
4292			}
4293		}
4294		if (notreclaimed == 0)
4295			nfsrv_stablefirst.nsf_flags |= (NFSNSF_GRACEOVER |
4296			    NFSNSF_NEEDLOCK);
4297	}
4298
4299	if ((nfsrv_stablefirst.nsf_flags & NFSNSF_GRACEOVER) != 0) {
4300		if (flags & NFSLCK_RECLAIM) {
4301			error = NFSERR_NOGRACE;
4302			goto out;
4303		}
4304	} else {
4305		if (!(flags & NFSLCK_RECLAIM)) {
4306			error = NFSERR_GRACE;
4307			goto out;
4308		}
4309		if (nd != NULL && clp != NULL &&
4310		    (nd->nd_flag & ND_NFSV41) != 0 &&
4311		    (clp->lc_flags & LCL_RECLAIMCOMPLETE) != 0) {
4312			error = NFSERR_NOGRACE;
4313			goto out;
4314		}
4315
4316		/*
4317		 * If grace is almost over and we are still getting Reclaims,
4318		 * extend grace a bit.
4319		 */
4320		if ((NFSD_MONOSEC + NFSRV_LEASEDELTA) >
4321		    nfsrv_stablefirst.nsf_eograce)
4322			nfsrv_stablefirst.nsf_eograce = NFSD_MONOSEC +
4323				NFSRV_LEASEDELTA;
4324	}
4325
4326out:
4327	NFSEXITCODE(error);
4328	return (error);
4329}
4330
4331/*
4332 * Do a server callback.
4333 */
4334static int
4335nfsrv_docallback(struct nfsclient *clp, int procnum,
4336    nfsv4stateid_t *stateidp, int trunc, fhandle_t *fhp,
4337    struct nfsvattr *nap, nfsattrbit_t *attrbitp, NFSPROC_T *p)
4338{
4339	mbuf_t m;
4340	u_int32_t *tl;
4341	struct nfsrv_descript nfsd, *nd = &nfsd;
4342	struct ucred *cred;
4343	int error = 0;
4344	u_int32_t callback;
4345	struct nfsdsession *sep = NULL;
4346
4347	cred = newnfs_getcred();
4348	NFSLOCKSTATE();	/* mostly for lc_cbref++ */
4349	if (clp->lc_flags & LCL_NEEDSCONFIRM) {
4350		NFSUNLOCKSTATE();
4351		panic("docallb");
4352	}
4353	clp->lc_cbref++;
4354
4355	/*
4356	 * Fill the callback program# and version into the request
4357	 * structure for newnfs_connect() to use.
4358	 */
4359	clp->lc_req.nr_prog = clp->lc_program;
4360#ifdef notnow
4361	if ((clp->lc_flags & LCL_NFSV41) != 0)
4362		clp->lc_req.nr_vers = NFSV41_CBVERS;
4363	else
4364#endif
4365		clp->lc_req.nr_vers = NFSV4_CBVERS;
4366
4367	/*
4368	 * First, fill in some of the fields of nd and cr.
4369	 */
4370	nd->nd_flag = ND_NFSV4;
4371	if (clp->lc_flags & LCL_GSS)
4372		nd->nd_flag |= ND_KERBV;
4373	if ((clp->lc_flags & LCL_NFSV41) != 0)
4374		nd->nd_flag |= ND_NFSV41;
4375	nd->nd_repstat = 0;
4376	cred->cr_uid = clp->lc_uid;
4377	cred->cr_gid = clp->lc_gid;
4378	callback = clp->lc_callback;
4379	NFSUNLOCKSTATE();
4380	cred->cr_ngroups = 1;
4381
4382	/*
4383	 * Get the first mbuf for the request.
4384	 */
4385	MGET(m, M_WAITOK, MT_DATA);
4386	mbuf_setlen(m, 0);
4387	nd->nd_mreq = nd->nd_mb = m;
4388	nd->nd_bpos = NFSMTOD(m, caddr_t);
4389
4390	/*
4391	 * and build the callback request.
4392	 */
4393	if (procnum == NFSV4OP_CBGETATTR) {
4394		nd->nd_procnum = NFSV4PROC_CBCOMPOUND;
4395		error = nfsrv_cbcallargs(nd, clp, callback, NFSV4OP_CBGETATTR,
4396		    "CB Getattr", &sep);
4397		if (error != 0) {
4398			mbuf_freem(nd->nd_mreq);
4399			goto errout;
4400		}
4401		(void)nfsm_fhtom(nd, (u_int8_t *)fhp, NFSX_MYFH, 0);
4402		(void)nfsrv_putattrbit(nd, attrbitp);
4403	} else if (procnum == NFSV4OP_CBRECALL) {
4404		nd->nd_procnum = NFSV4PROC_CBCOMPOUND;
4405		error = nfsrv_cbcallargs(nd, clp, callback, NFSV4OP_CBRECALL,
4406		    "CB Recall", &sep);
4407		if (error != 0) {
4408			mbuf_freem(nd->nd_mreq);
4409			goto errout;
4410		}
4411		NFSM_BUILD(tl, u_int32_t *, NFSX_UNSIGNED + NFSX_STATEID);
4412		*tl++ = txdr_unsigned(stateidp->seqid);
4413		NFSBCOPY((caddr_t)stateidp->other, (caddr_t)tl,
4414		    NFSX_STATEIDOTHER);
4415		tl += (NFSX_STATEIDOTHER / NFSX_UNSIGNED);
4416		if (trunc)
4417			*tl = newnfs_true;
4418		else
4419			*tl = newnfs_false;
4420		(void)nfsm_fhtom(nd, (u_int8_t *)fhp, NFSX_MYFH, 0);
4421	} else if (procnum == NFSV4PROC_CBNULL) {
4422		nd->nd_procnum = NFSV4PROC_CBNULL;
4423		if ((clp->lc_flags & LCL_NFSV41) != 0) {
4424			error = nfsv4_getcbsession(clp, &sep);
4425			if (error != 0) {
4426				mbuf_freem(nd->nd_mreq);
4427				goto errout;
4428			}
4429		}
4430	} else {
4431		error = NFSERR_SERVERFAULT;
4432		mbuf_freem(nd->nd_mreq);
4433		goto errout;
4434	}
4435
4436	/*
4437	 * Call newnfs_connect(), as required, and then newnfs_request().
4438	 */
4439	(void) newnfs_sndlock(&clp->lc_req.nr_lock);
4440	if (clp->lc_req.nr_client == NULL) {
4441		if ((clp->lc_flags & LCL_NFSV41) != 0) {
4442			error = ECONNREFUSED;
4443			nfsrv_freesession(sep, NULL);
4444		} else if (nd->nd_procnum == NFSV4PROC_CBNULL)
4445			error = newnfs_connect(NULL, &clp->lc_req, cred,
4446			    NULL, 1);
4447		else
4448			error = newnfs_connect(NULL, &clp->lc_req, cred,
4449			    NULL, 3);
4450	}
4451	newnfs_sndunlock(&clp->lc_req.nr_lock);
4452	if (!error) {
4453		if ((nd->nd_flag & ND_NFSV41) != 0) {
4454			KASSERT(sep != NULL, ("sep NULL"));
4455			if (sep->sess_cbsess.nfsess_xprt != NULL)
4456				error = newnfs_request(nd, NULL, clp,
4457				    &clp->lc_req, NULL, NULL, cred,
4458				    clp->lc_program, clp->lc_req.nr_vers, NULL,
4459				    1, NULL, &sep->sess_cbsess);
4460			else {
4461				/*
4462				 * This should probably never occur, but if a
4463				 * client somehow does an RPC without a
4464				 * SequenceID Op that causes a callback just
4465				 * after the nfsd threads have been terminated
4466				 * and restared we could conceivably get here
4467				 * without a backchannel xprt.
4468				 */
4469				printf("nfsrv_docallback: no xprt\n");
4470				error = ECONNREFUSED;
4471			}
4472			nfsrv_freesession(sep, NULL);
4473		} else
4474			error = newnfs_request(nd, NULL, clp, &clp->lc_req,
4475			    NULL, NULL, cred, clp->lc_program,
4476			    clp->lc_req.nr_vers, NULL, 1, NULL, NULL);
4477	}
4478errout:
4479	NFSFREECRED(cred);
4480
4481	/*
4482	 * If error is set here, the Callback path isn't working
4483	 * properly, so twiddle the appropriate LCL_ flags.
4484	 * (nd_repstat != 0 indicates the Callback path is working,
4485	 *  but the callback failed on the client.)
4486	 */
4487	if (error) {
4488		/*
4489		 * Mark the callback pathway down, which disabled issuing
4490		 * of delegations and gets Renew to return NFSERR_CBPATHDOWN.
4491		 */
4492		NFSLOCKSTATE();
4493		clp->lc_flags |= LCL_CBDOWN;
4494		NFSUNLOCKSTATE();
4495	} else {
4496		/*
4497		 * Callback worked. If the callback path was down, disable
4498		 * callbacks, so no more delegations will be issued. (This
4499		 * is done on the assumption that the callback pathway is
4500		 * flakey.)
4501		 */
4502		NFSLOCKSTATE();
4503		if (clp->lc_flags & LCL_CBDOWN)
4504			clp->lc_flags &= ~(LCL_CBDOWN | LCL_CALLBACKSON);
4505		NFSUNLOCKSTATE();
4506		if (nd->nd_repstat)
4507			error = nd->nd_repstat;
4508		else if (error == 0 && procnum == NFSV4OP_CBGETATTR)
4509			error = nfsv4_loadattr(nd, NULL, nap, NULL, NULL, 0,
4510			    NULL, NULL, NULL, NULL, NULL, 0, NULL, NULL, NULL,
4511			    p, NULL);
4512		mbuf_freem(nd->nd_mrep);
4513	}
4514	NFSLOCKSTATE();
4515	clp->lc_cbref--;
4516	if ((clp->lc_flags & LCL_WAKEUPWANTED) && clp->lc_cbref == 0) {
4517		clp->lc_flags &= ~LCL_WAKEUPWANTED;
4518		wakeup(clp);
4519	}
4520	NFSUNLOCKSTATE();
4521
4522	NFSEXITCODE(error);
4523	return (error);
4524}
4525
4526/*
4527 * Set up the compound RPC for the callback.
4528 */
4529static int
4530nfsrv_cbcallargs(struct nfsrv_descript *nd, struct nfsclient *clp,
4531    uint32_t callback, int op, const char *optag, struct nfsdsession **sepp)
4532{
4533	uint32_t *tl;
4534	int error, len;
4535
4536	len = strlen(optag);
4537	(void)nfsm_strtom(nd, optag, len);
4538	NFSM_BUILD(tl, uint32_t *, 4 * NFSX_UNSIGNED);
4539	if ((nd->nd_flag & ND_NFSV41) != 0) {
4540		*tl++ = txdr_unsigned(NFSV41_MINORVERSION);
4541		*tl++ = txdr_unsigned(callback);
4542		*tl++ = txdr_unsigned(2);
4543		*tl = txdr_unsigned(NFSV4OP_CBSEQUENCE);
4544		error = nfsv4_setcbsequence(nd, clp, 1, sepp);
4545		if (error != 0)
4546			return (error);
4547		NFSM_BUILD(tl, u_int32_t *, NFSX_UNSIGNED);
4548		*tl = txdr_unsigned(op);
4549	} else {
4550		*tl++ = txdr_unsigned(NFSV4_MINORVERSION);
4551		*tl++ = txdr_unsigned(callback);
4552		*tl++ = txdr_unsigned(1);
4553		*tl = txdr_unsigned(op);
4554	}
4555	return (0);
4556}
4557
4558/*
4559 * Return the next index# for a clientid. Mostly just increment and return
4560 * the next one, but... if the 32bit unsigned does actually wrap around,
4561 * it should be rebooted.
4562 * At an average rate of one new client per second, it will wrap around in
4563 * approximately 136 years. (I think the server will have been shut
4564 * down or rebooted before then.)
4565 */
4566static u_int32_t
4567nfsrv_nextclientindex(void)
4568{
4569	static u_int32_t client_index = 0;
4570
4571	client_index++;
4572	if (client_index != 0)
4573		return (client_index);
4574
4575	printf("%s: out of clientids\n", __func__);
4576	return (client_index);
4577}
4578
4579/*
4580 * Return the next index# for a stateid. Mostly just increment and return
4581 * the next one, but... if the 32bit unsigned does actually wrap around
4582 * (will a BSD server stay up that long?), find
4583 * new start and end values.
4584 */
4585static u_int32_t
4586nfsrv_nextstateindex(struct nfsclient *clp)
4587{
4588	struct nfsstate *stp;
4589	int i;
4590	u_int32_t canuse, min_index, max_index;
4591
4592	if (!(clp->lc_flags & LCL_INDEXNOTOK)) {
4593		clp->lc_stateindex++;
4594		if (clp->lc_stateindex != clp->lc_statemaxindex)
4595			return (clp->lc_stateindex);
4596	}
4597
4598	/*
4599	 * Yuck, we've hit the end.
4600	 * Look for a new min and max.
4601	 */
4602	min_index = 0;
4603	max_index = 0xffffffff;
4604	for (i = 0; i < nfsrv_statehashsize; i++) {
4605	    LIST_FOREACH(stp, &clp->lc_stateid[i], ls_hash) {
4606		if (stp->ls_stateid.other[2] > 0x80000000) {
4607		    if (stp->ls_stateid.other[2] < max_index)
4608			max_index = stp->ls_stateid.other[2];
4609		} else {
4610		    if (stp->ls_stateid.other[2] > min_index)
4611			min_index = stp->ls_stateid.other[2];
4612		}
4613	    }
4614	}
4615
4616	/*
4617	 * Yikes, highly unlikely, but I'll handle it anyhow.
4618	 */
4619	if (min_index == 0x80000000 && max_index == 0x80000001) {
4620	    canuse = 0;
4621	    /*
4622	     * Loop around until we find an unused entry. Return that
4623	     * and set LCL_INDEXNOTOK, so the search will continue next time.
4624	     * (This is one of those rare cases where a goto is the
4625	     *  cleanest way to code the loop.)
4626	     */
4627tryagain:
4628	    for (i = 0; i < nfsrv_statehashsize; i++) {
4629		LIST_FOREACH(stp, &clp->lc_stateid[i], ls_hash) {
4630		    if (stp->ls_stateid.other[2] == canuse) {
4631			canuse++;
4632			goto tryagain;
4633		    }
4634		}
4635	    }
4636	    clp->lc_flags |= LCL_INDEXNOTOK;
4637	    return (canuse);
4638	}
4639
4640	/*
4641	 * Ok to start again from min + 1.
4642	 */
4643	clp->lc_stateindex = min_index + 1;
4644	clp->lc_statemaxindex = max_index;
4645	clp->lc_flags &= ~LCL_INDEXNOTOK;
4646	return (clp->lc_stateindex);
4647}
4648
4649/*
4650 * The following functions handle the stable storage file that deals with
4651 * the edge conditions described in RFC3530 Sec. 8.6.3.
4652 * The file is as follows:
4653 * - a single record at the beginning that has the lease time of the
4654 *   previous server instance (before the last reboot) and the nfsrvboottime
4655 *   values for the previous server boots.
4656 *   These previous boot times are used to ensure that the current
4657 *   nfsrvboottime does not, somehow, get set to a previous one.
4658 *   (This is important so that Stale ClientIDs and StateIDs can
4659 *    be recognized.)
4660 *   The number of previous nfsvrboottime values precedes the list.
4661 * - followed by some number of appended records with:
4662 *   - client id string
4663 *   - flag that indicates it is a record revoking state via lease
4664 *     expiration or similar
4665 *     OR has successfully acquired state.
4666 * These structures vary in length, with the client string at the end, up
4667 * to NFSV4_OPAQUELIMIT in size.
4668 *
4669 * At the end of the grace period, the file is truncated, the first
4670 * record is rewritten with updated information and any acquired state
4671 * records for successful reclaims of state are written.
4672 *
4673 * Subsequent records are appended when the first state is issued to
4674 * a client and when state is revoked for a client.
4675 *
4676 * When reading the file in, state issued records that come later in
4677 * the file override older ones, since the append log is in cronological order.
4678 * If, for some reason, the file can't be read, the grace period is
4679 * immediately terminated and all reclaims get NFSERR_NOGRACE.
4680 */
4681
4682/*
4683 * Read in the stable storage file. Called by nfssvc() before the nfsd
4684 * processes start servicing requests.
4685 */
4686void
4687nfsrv_setupstable(NFSPROC_T *p)
4688{
4689	struct nfsrv_stablefirst *sf = &nfsrv_stablefirst;
4690	struct nfsrv_stable *sp, *nsp;
4691	struct nfst_rec *tsp;
4692	int error, i, tryagain;
4693	off_t off = 0;
4694	ssize_t aresid, len;
4695
4696	/*
4697	 * If NFSNSF_UPDATEDONE is set, this is a restart of the nfsds without
4698	 * a reboot, so state has not been lost.
4699	 */
4700	if (sf->nsf_flags & NFSNSF_UPDATEDONE)
4701		return;
4702	/*
4703	 * Set Grace over just until the file reads successfully.
4704	 */
4705	nfsrvboottime = time_second;
4706	LIST_INIT(&sf->nsf_head);
4707	sf->nsf_flags = (NFSNSF_GRACEOVER | NFSNSF_NEEDLOCK);
4708	sf->nsf_eograce = NFSD_MONOSEC + NFSRV_LEASEDELTA;
4709	if (sf->nsf_fp == NULL)
4710		return;
4711	error = NFSD_RDWR(UIO_READ, NFSFPVNODE(sf->nsf_fp),
4712	    (caddr_t)&sf->nsf_rec, sizeof (struct nfsf_rec), off, UIO_SYSSPACE,
4713	    0, NFSFPCRED(sf->nsf_fp), &aresid, p);
4714	if (error || aresid || sf->nsf_numboots == 0 ||
4715		sf->nsf_numboots > NFSNSF_MAXNUMBOOTS)
4716		return;
4717
4718	/*
4719	 * Now, read in the boottimes.
4720	 */
4721	sf->nsf_bootvals = (time_t *)malloc((sf->nsf_numboots + 1) *
4722		sizeof (time_t), M_TEMP, M_WAITOK);
4723	off = sizeof (struct nfsf_rec);
4724	error = NFSD_RDWR(UIO_READ, NFSFPVNODE(sf->nsf_fp),
4725	    (caddr_t)sf->nsf_bootvals, sf->nsf_numboots * sizeof (time_t), off,
4726	    UIO_SYSSPACE, 0, NFSFPCRED(sf->nsf_fp), &aresid, p);
4727	if (error || aresid) {
4728		free((caddr_t)sf->nsf_bootvals, M_TEMP);
4729		sf->nsf_bootvals = NULL;
4730		return;
4731	}
4732
4733	/*
4734	 * Make sure this nfsrvboottime is different from all recorded
4735	 * previous ones.
4736	 */
4737	do {
4738		tryagain = 0;
4739		for (i = 0; i < sf->nsf_numboots; i++) {
4740			if (nfsrvboottime == sf->nsf_bootvals[i]) {
4741				nfsrvboottime++;
4742				tryagain = 1;
4743				break;
4744			}
4745		}
4746	} while (tryagain);
4747
4748	sf->nsf_flags |= NFSNSF_OK;
4749	off += (sf->nsf_numboots * sizeof (time_t));
4750
4751	/*
4752	 * Read through the file, building a list of records for grace
4753	 * checking.
4754	 * Each record is between sizeof (struct nfst_rec) and
4755	 * sizeof (struct nfst_rec) + NFSV4_OPAQUELIMIT - 1
4756	 * and is actually sizeof (struct nfst_rec) + nst_len - 1.
4757	 */
4758	tsp = (struct nfst_rec *)malloc(sizeof (struct nfst_rec) +
4759		NFSV4_OPAQUELIMIT - 1, M_TEMP, M_WAITOK);
4760	do {
4761	    error = NFSD_RDWR(UIO_READ, NFSFPVNODE(sf->nsf_fp),
4762	        (caddr_t)tsp, sizeof (struct nfst_rec) + NFSV4_OPAQUELIMIT - 1,
4763	        off, UIO_SYSSPACE, 0, NFSFPCRED(sf->nsf_fp), &aresid, p);
4764	    len = (sizeof (struct nfst_rec) + NFSV4_OPAQUELIMIT - 1) - aresid;
4765	    if (error || (len > 0 && (len < sizeof (struct nfst_rec) ||
4766		len < (sizeof (struct nfst_rec) + tsp->len - 1)))) {
4767		/*
4768		 * Yuck, the file has been corrupted, so just return
4769		 * after clearing out any restart state, so the grace period
4770		 * is over.
4771		 */
4772		LIST_FOREACH_SAFE(sp, &sf->nsf_head, nst_list, nsp) {
4773			LIST_REMOVE(sp, nst_list);
4774			free((caddr_t)sp, M_TEMP);
4775		}
4776		free((caddr_t)tsp, M_TEMP);
4777		sf->nsf_flags &= ~NFSNSF_OK;
4778		free((caddr_t)sf->nsf_bootvals, M_TEMP);
4779		sf->nsf_bootvals = NULL;
4780		return;
4781	    }
4782	    if (len > 0) {
4783		off += sizeof (struct nfst_rec) + tsp->len - 1;
4784		/*
4785		 * Search the list for a matching client.
4786		 */
4787		LIST_FOREACH(sp, &sf->nsf_head, nst_list) {
4788			if (tsp->len == sp->nst_len &&
4789			    !NFSBCMP(tsp->client, sp->nst_client, tsp->len))
4790				break;
4791		}
4792		if (sp == LIST_END(&sf->nsf_head)) {
4793			sp = (struct nfsrv_stable *)malloc(tsp->len +
4794				sizeof (struct nfsrv_stable) - 1, M_TEMP,
4795				M_WAITOK);
4796			NFSBCOPY((caddr_t)tsp, (caddr_t)&sp->nst_rec,
4797				sizeof (struct nfst_rec) + tsp->len - 1);
4798			LIST_INSERT_HEAD(&sf->nsf_head, sp, nst_list);
4799		} else {
4800			if (tsp->flag == NFSNST_REVOKE)
4801				sp->nst_flag |= NFSNST_REVOKE;
4802			else
4803				/*
4804				 * A subsequent timestamp indicates the client
4805				 * did a setclientid/confirm and any previous
4806				 * revoke is no longer relevant.
4807				 */
4808				sp->nst_flag &= ~NFSNST_REVOKE;
4809		}
4810	    }
4811	} while (len > 0);
4812	free((caddr_t)tsp, M_TEMP);
4813	sf->nsf_flags = NFSNSF_OK;
4814	sf->nsf_eograce = NFSD_MONOSEC + sf->nsf_lease +
4815		NFSRV_LEASEDELTA;
4816}
4817
4818/*
4819 * Update the stable storage file, now that the grace period is over.
4820 */
4821void
4822nfsrv_updatestable(NFSPROC_T *p)
4823{
4824	struct nfsrv_stablefirst *sf = &nfsrv_stablefirst;
4825	struct nfsrv_stable *sp, *nsp;
4826	int i;
4827	struct nfsvattr nva;
4828	vnode_t vp;
4829#if defined(__FreeBSD_version) && (__FreeBSD_version >= 500000)
4830	mount_t mp = NULL;
4831#endif
4832	int error;
4833
4834	if (sf->nsf_fp == NULL || (sf->nsf_flags & NFSNSF_UPDATEDONE))
4835		return;
4836	sf->nsf_flags |= NFSNSF_UPDATEDONE;
4837	/*
4838	 * Ok, we need to rewrite the stable storage file.
4839	 * - truncate to 0 length
4840	 * - write the new first structure
4841	 * - loop through the data structures, writing out any that
4842	 *   have timestamps older than the old boot
4843	 */
4844	if (sf->nsf_bootvals) {
4845		sf->nsf_numboots++;
4846		for (i = sf->nsf_numboots - 2; i >= 0; i--)
4847			sf->nsf_bootvals[i + 1] = sf->nsf_bootvals[i];
4848	} else {
4849		sf->nsf_numboots = 1;
4850		sf->nsf_bootvals = (time_t *)malloc(sizeof (time_t),
4851			M_TEMP, M_WAITOK);
4852	}
4853	sf->nsf_bootvals[0] = nfsrvboottime;
4854	sf->nsf_lease = nfsrv_lease;
4855	NFSVNO_ATTRINIT(&nva);
4856	NFSVNO_SETATTRVAL(&nva, size, 0);
4857	vp = NFSFPVNODE(sf->nsf_fp);
4858	vn_start_write(vp, &mp, V_WAIT);
4859	if (NFSVOPLOCK(vp, LK_EXCLUSIVE) == 0) {
4860		error = nfsvno_setattr(vp, &nva, NFSFPCRED(sf->nsf_fp), p,
4861		    NULL);
4862		NFSVOPUNLOCK(vp, 0);
4863	} else
4864		error = EPERM;
4865	vn_finished_write(mp);
4866	if (!error)
4867	    error = NFSD_RDWR(UIO_WRITE, vp,
4868		(caddr_t)&sf->nsf_rec, sizeof (struct nfsf_rec), (off_t)0,
4869		UIO_SYSSPACE, IO_SYNC, NFSFPCRED(sf->nsf_fp), NULL, p);
4870	if (!error)
4871	    error = NFSD_RDWR(UIO_WRITE, vp,
4872		(caddr_t)sf->nsf_bootvals,
4873		sf->nsf_numboots * sizeof (time_t),
4874		(off_t)(sizeof (struct nfsf_rec)),
4875		UIO_SYSSPACE, IO_SYNC, NFSFPCRED(sf->nsf_fp), NULL, p);
4876	free((caddr_t)sf->nsf_bootvals, M_TEMP);
4877	sf->nsf_bootvals = NULL;
4878	if (error) {
4879		sf->nsf_flags &= ~NFSNSF_OK;
4880		printf("EEK! Can't write NfsV4 stable storage file\n");
4881		return;
4882	}
4883	sf->nsf_flags |= NFSNSF_OK;
4884
4885	/*
4886	 * Loop through the list and write out timestamp records for
4887	 * any clients that successfully reclaimed state.
4888	 */
4889	LIST_FOREACH_SAFE(sp, &sf->nsf_head, nst_list, nsp) {
4890		if (sp->nst_flag & NFSNST_GOTSTATE) {
4891			nfsrv_writestable(sp->nst_client, sp->nst_len,
4892				NFSNST_NEWSTATE, p);
4893			sp->nst_clp->lc_flags |= LCL_STAMPEDSTABLE;
4894		}
4895		LIST_REMOVE(sp, nst_list);
4896		free((caddr_t)sp, M_TEMP);
4897	}
4898	nfsrv_backupstable();
4899}
4900
4901/*
4902 * Append a record to the stable storage file.
4903 */
4904void
4905nfsrv_writestable(u_char *client, int len, int flag, NFSPROC_T *p)
4906{
4907	struct nfsrv_stablefirst *sf = &nfsrv_stablefirst;
4908	struct nfst_rec *sp;
4909	int error;
4910
4911	if (!(sf->nsf_flags & NFSNSF_OK) || sf->nsf_fp == NULL)
4912		return;
4913	sp = (struct nfst_rec *)malloc(sizeof (struct nfst_rec) +
4914		len - 1, M_TEMP, M_WAITOK);
4915	sp->len = len;
4916	NFSBCOPY(client, sp->client, len);
4917	sp->flag = flag;
4918	error = NFSD_RDWR(UIO_WRITE, NFSFPVNODE(sf->nsf_fp),
4919	    (caddr_t)sp, sizeof (struct nfst_rec) + len - 1, (off_t)0,
4920	    UIO_SYSSPACE, (IO_SYNC | IO_APPEND), NFSFPCRED(sf->nsf_fp), NULL, p);
4921	free((caddr_t)sp, M_TEMP);
4922	if (error) {
4923		sf->nsf_flags &= ~NFSNSF_OK;
4924		printf("EEK! Can't write NfsV4 stable storage file\n");
4925	}
4926}
4927
4928/*
4929 * This function is called during the grace period to mark a client
4930 * that successfully reclaimed state.
4931 */
4932static void
4933nfsrv_markstable(struct nfsclient *clp)
4934{
4935	struct nfsrv_stable *sp;
4936
4937	/*
4938	 * First find the client structure.
4939	 */
4940	LIST_FOREACH(sp, &nfsrv_stablefirst.nsf_head, nst_list) {
4941		if (sp->nst_len == clp->lc_idlen &&
4942		    !NFSBCMP(sp->nst_client, clp->lc_id, sp->nst_len))
4943			break;
4944	}
4945	if (sp == LIST_END(&nfsrv_stablefirst.nsf_head))
4946		return;
4947
4948	/*
4949	 * Now, just mark it and set the nfsclient back pointer.
4950	 */
4951	sp->nst_flag |= NFSNST_GOTSTATE;
4952	sp->nst_clp = clp;
4953}
4954
4955/*
4956 * This function is called when a NFSv4.1 client does a ReclaimComplete.
4957 * Very similar to nfsrv_markstable(), except for the flag being set.
4958 */
4959static void
4960nfsrv_markreclaim(struct nfsclient *clp)
4961{
4962	struct nfsrv_stable *sp;
4963
4964	/*
4965	 * First find the client structure.
4966	 */
4967	LIST_FOREACH(sp, &nfsrv_stablefirst.nsf_head, nst_list) {
4968		if (sp->nst_len == clp->lc_idlen &&
4969		    !NFSBCMP(sp->nst_client, clp->lc_id, sp->nst_len))
4970			break;
4971	}
4972	if (sp == LIST_END(&nfsrv_stablefirst.nsf_head))
4973		return;
4974
4975	/*
4976	 * Now, just set the flag.
4977	 */
4978	sp->nst_flag |= NFSNST_RECLAIMED;
4979}
4980
4981/*
4982 * This function is called for a reclaim, to see if it gets grace.
4983 * It returns 0 if a reclaim is allowed, 1 otherwise.
4984 */
4985static int
4986nfsrv_checkstable(struct nfsclient *clp)
4987{
4988	struct nfsrv_stable *sp;
4989
4990	/*
4991	 * First, find the entry for the client.
4992	 */
4993	LIST_FOREACH(sp, &nfsrv_stablefirst.nsf_head, nst_list) {
4994		if (sp->nst_len == clp->lc_idlen &&
4995		    !NFSBCMP(sp->nst_client, clp->lc_id, sp->nst_len))
4996			break;
4997	}
4998
4999	/*
5000	 * If not in the list, state was revoked or no state was issued
5001	 * since the previous reboot, a reclaim is denied.
5002	 */
5003	if (sp == LIST_END(&nfsrv_stablefirst.nsf_head) ||
5004	    (sp->nst_flag & NFSNST_REVOKE) ||
5005	    !(nfsrv_stablefirst.nsf_flags & NFSNSF_OK))
5006		return (1);
5007	return (0);
5008}
5009
5010/*
5011 * Test for and try to clear out a conflicting client. This is called by
5012 * nfsrv_lockctrl() and nfsrv_openctrl() when conflicts with other clients
5013 * a found.
5014 * The trick here is that it can't revoke a conflicting client with an
5015 * expired lease unless it holds the v4root lock, so...
5016 * If no v4root lock, get the lock and return 1 to indicate "try again".
5017 * Return 0 to indicate the conflict can't be revoked and 1 to indicate
5018 * the revocation worked and the conflicting client is "bye, bye", so it
5019 * can be tried again.
5020 * Return 2 to indicate that the vnode is VI_DOOMED after NFSVOPLOCK().
5021 * Unlocks State before a non-zero value is returned.
5022 */
5023static int
5024nfsrv_clientconflict(struct nfsclient *clp, int *haslockp, vnode_t vp,
5025    NFSPROC_T *p)
5026{
5027	int gotlock, lktype = 0;
5028
5029	/*
5030	 * If lease hasn't expired, we can't fix it.
5031	 */
5032	if (clp->lc_expiry >= NFSD_MONOSEC ||
5033	    !(nfsrv_stablefirst.nsf_flags & NFSNSF_UPDATEDONE))
5034		return (0);
5035	if (*haslockp == 0) {
5036		NFSUNLOCKSTATE();
5037		if (vp != NULL) {
5038			lktype = NFSVOPISLOCKED(vp);
5039			NFSVOPUNLOCK(vp, 0);
5040		}
5041		NFSLOCKV4ROOTMUTEX();
5042		nfsv4_relref(&nfsv4rootfs_lock);
5043		do {
5044			gotlock = nfsv4_lock(&nfsv4rootfs_lock, 1, NULL,
5045			    NFSV4ROOTLOCKMUTEXPTR, NULL);
5046		} while (!gotlock);
5047		NFSUNLOCKV4ROOTMUTEX();
5048		*haslockp = 1;
5049		if (vp != NULL) {
5050			NFSVOPLOCK(vp, lktype | LK_RETRY);
5051			if ((vp->v_iflag & VI_DOOMED) != 0)
5052				return (2);
5053		}
5054		return (1);
5055	}
5056	NFSUNLOCKSTATE();
5057
5058	/*
5059	 * Ok, we can expire the conflicting client.
5060	 */
5061	nfsrv_writestable(clp->lc_id, clp->lc_idlen, NFSNST_REVOKE, p);
5062	nfsrv_backupstable();
5063	nfsrv_cleanclient(clp, p);
5064	nfsrv_freedeleglist(&clp->lc_deleg);
5065	nfsrv_freedeleglist(&clp->lc_olddeleg);
5066	LIST_REMOVE(clp, lc_hash);
5067	nfsrv_zapclient(clp, p);
5068	return (1);
5069}
5070
5071/*
5072 * Resolve a delegation conflict.
5073 * Returns 0 to indicate the conflict was resolved without sleeping.
5074 * Return -1 to indicate that the caller should check for conflicts again.
5075 * Return > 0 for an error that should be returned, normally NFSERR_DELAY.
5076 *
5077 * Also, manipulate the nfsv4root_lock, as required. It isn't changed
5078 * for a return of 0, since there was no sleep and it could be required
5079 * later. It is released for a return of NFSERR_DELAY, since the caller
5080 * will return that error. It is released when a sleep was done waiting
5081 * for the delegation to be returned or expire (so that other nfsds can
5082 * handle ops). Then, it must be acquired for the write to stable storage.
5083 * (This function is somewhat similar to nfsrv_clientconflict(), but
5084 *  the semantics differ in a couple of subtle ways. The return of 0
5085 *  indicates the conflict was resolved without sleeping here, not
5086 *  that the conflict can't be resolved and the handling of nfsv4root_lock
5087 *  differs, as noted above.)
5088 * Unlocks State before returning a non-zero value.
5089 */
5090static int
5091nfsrv_delegconflict(struct nfsstate *stp, int *haslockp, NFSPROC_T *p,
5092    vnode_t vp)
5093{
5094	struct nfsclient *clp = stp->ls_clp;
5095	int gotlock, error, lktype = 0, retrycnt, zapped_clp;
5096	nfsv4stateid_t tstateid;
5097	fhandle_t tfh;
5098
5099	/*
5100	 * If the conflict is with an old delegation...
5101	 */
5102	if (stp->ls_flags & NFSLCK_OLDDELEG) {
5103		/*
5104		 * You can delete it, if it has expired.
5105		 */
5106		if (clp->lc_delegtime < NFSD_MONOSEC) {
5107			nfsrv_freedeleg(stp);
5108			NFSUNLOCKSTATE();
5109			error = -1;
5110			goto out;
5111		}
5112		NFSUNLOCKSTATE();
5113		/*
5114		 * During this delay, the old delegation could expire or it
5115		 * could be recovered by the client via an Open with
5116		 * CLAIM_DELEGATE_PREV.
5117		 * Release the nfsv4root_lock, if held.
5118		 */
5119		if (*haslockp) {
5120			*haslockp = 0;
5121			NFSLOCKV4ROOTMUTEX();
5122			nfsv4_unlock(&nfsv4rootfs_lock, 1);
5123			NFSUNLOCKV4ROOTMUTEX();
5124		}
5125		error = NFSERR_DELAY;
5126		goto out;
5127	}
5128
5129	/*
5130	 * It's a current delegation, so:
5131	 * - check to see if the delegation has expired
5132	 *   - if so, get the v4root lock and then expire it
5133	 */
5134	if (!(stp->ls_flags & NFSLCK_DELEGRECALL)) {
5135		/*
5136		 * - do a recall callback, since not yet done
5137		 * For now, never allow truncate to be set. To use
5138		 * truncate safely, it must be guaranteed that the
5139		 * Remove, Rename or Setattr with size of 0 will
5140		 * succeed and that would require major changes to
5141		 * the VFS/Vnode OPs.
5142		 * Set the expiry time large enough so that it won't expire
5143		 * until after the callback, then set it correctly, once
5144		 * the callback is done. (The delegation will now time
5145		 * out whether or not the Recall worked ok. The timeout
5146		 * will be extended when ops are done on the delegation
5147		 * stateid, up to the timelimit.)
5148		 */
5149		stp->ls_delegtime = NFSD_MONOSEC + (2 * nfsrv_lease) +
5150		    NFSRV_LEASEDELTA;
5151		stp->ls_delegtimelimit = NFSD_MONOSEC + (6 * nfsrv_lease) +
5152		    NFSRV_LEASEDELTA;
5153		stp->ls_flags |= NFSLCK_DELEGRECALL;
5154
5155		/*
5156		 * Loop NFSRV_CBRETRYCNT times while the CBRecall replies
5157		 * NFSERR_BADSTATEID or NFSERR_BADHANDLE. This is done
5158		 * in order to try and avoid a race that could happen
5159		 * when a CBRecall request passed the Open reply with
5160		 * the delegation in it when transitting the network.
5161		 * Since nfsrv_docallback will sleep, don't use stp after
5162		 * the call.
5163		 */
5164		NFSBCOPY((caddr_t)&stp->ls_stateid, (caddr_t)&tstateid,
5165		    sizeof (tstateid));
5166		NFSBCOPY((caddr_t)&stp->ls_lfp->lf_fh, (caddr_t)&tfh,
5167		    sizeof (tfh));
5168		NFSUNLOCKSTATE();
5169		if (*haslockp) {
5170			*haslockp = 0;
5171			NFSLOCKV4ROOTMUTEX();
5172			nfsv4_unlock(&nfsv4rootfs_lock, 1);
5173			NFSUNLOCKV4ROOTMUTEX();
5174		}
5175		retrycnt = 0;
5176		do {
5177		    error = nfsrv_docallback(clp, NFSV4OP_CBRECALL,
5178			&tstateid, 0, &tfh, NULL, NULL, p);
5179		    retrycnt++;
5180		} while ((error == NFSERR_BADSTATEID ||
5181		    error == NFSERR_BADHANDLE) && retrycnt < NFSV4_CBRETRYCNT);
5182		error = NFSERR_DELAY;
5183		goto out;
5184	}
5185
5186	if (clp->lc_expiry >= NFSD_MONOSEC &&
5187	    stp->ls_delegtime >= NFSD_MONOSEC) {
5188		NFSUNLOCKSTATE();
5189		/*
5190		 * A recall has been done, but it has not yet expired.
5191		 * So, RETURN_DELAY.
5192		 */
5193		if (*haslockp) {
5194			*haslockp = 0;
5195			NFSLOCKV4ROOTMUTEX();
5196			nfsv4_unlock(&nfsv4rootfs_lock, 1);
5197			NFSUNLOCKV4ROOTMUTEX();
5198		}
5199		error = NFSERR_DELAY;
5200		goto out;
5201	}
5202
5203	/*
5204	 * If we don't yet have the lock, just get it and then return,
5205	 * since we need that before deleting expired state, such as
5206	 * this delegation.
5207	 * When getting the lock, unlock the vnode, so other nfsds that
5208	 * are in progress, won't get stuck waiting for the vnode lock.
5209	 */
5210	if (*haslockp == 0) {
5211		NFSUNLOCKSTATE();
5212		if (vp != NULL) {
5213			lktype = NFSVOPISLOCKED(vp);
5214			NFSVOPUNLOCK(vp, 0);
5215		}
5216		NFSLOCKV4ROOTMUTEX();
5217		nfsv4_relref(&nfsv4rootfs_lock);
5218		do {
5219			gotlock = nfsv4_lock(&nfsv4rootfs_lock, 1, NULL,
5220			    NFSV4ROOTLOCKMUTEXPTR, NULL);
5221		} while (!gotlock);
5222		NFSUNLOCKV4ROOTMUTEX();
5223		*haslockp = 1;
5224		if (vp != NULL) {
5225			NFSVOPLOCK(vp, lktype | LK_RETRY);
5226			if ((vp->v_iflag & VI_DOOMED) != 0) {
5227				*haslockp = 0;
5228				NFSLOCKV4ROOTMUTEX();
5229				nfsv4_unlock(&nfsv4rootfs_lock, 1);
5230				NFSUNLOCKV4ROOTMUTEX();
5231				error = NFSERR_PERM;
5232				goto out;
5233			}
5234		}
5235		error = -1;
5236		goto out;
5237	}
5238
5239	NFSUNLOCKSTATE();
5240	/*
5241	 * Ok, we can delete the expired delegation.
5242	 * First, write the Revoke record to stable storage and then
5243	 * clear out the conflict.
5244	 * Since all other nfsd threads are now blocked, we can safely
5245	 * sleep without the state changing.
5246	 */
5247	nfsrv_writestable(clp->lc_id, clp->lc_idlen, NFSNST_REVOKE, p);
5248	nfsrv_backupstable();
5249	if (clp->lc_expiry < NFSD_MONOSEC) {
5250		nfsrv_cleanclient(clp, p);
5251		nfsrv_freedeleglist(&clp->lc_deleg);
5252		nfsrv_freedeleglist(&clp->lc_olddeleg);
5253		LIST_REMOVE(clp, lc_hash);
5254		zapped_clp = 1;
5255	} else {
5256		nfsrv_freedeleg(stp);
5257		zapped_clp = 0;
5258	}
5259	if (zapped_clp)
5260		nfsrv_zapclient(clp, p);
5261	error = -1;
5262
5263out:
5264	NFSEXITCODE(error);
5265	return (error);
5266}
5267
5268/*
5269 * Check for a remove allowed, if remove is set to 1 and get rid of
5270 * delegations.
5271 */
5272int
5273nfsrv_checkremove(vnode_t vp, int remove, NFSPROC_T *p)
5274{
5275	struct nfsstate *stp;
5276	struct nfslockfile *lfp;
5277	int error, haslock = 0;
5278	fhandle_t nfh;
5279
5280	/*
5281	 * First, get the lock file structure.
5282	 * (A return of -1 means no associated state, so remove ok.)
5283	 */
5284	error = nfsrv_getlockfh(vp, NFSLCK_CHECK, NULL, &nfh, p);
5285tryagain:
5286	NFSLOCKSTATE();
5287	if (!error)
5288		error = nfsrv_getlockfile(NFSLCK_CHECK, NULL, &lfp, &nfh, 0);
5289	if (error) {
5290		NFSUNLOCKSTATE();
5291		if (haslock) {
5292			NFSLOCKV4ROOTMUTEX();
5293			nfsv4_unlock(&nfsv4rootfs_lock, 1);
5294			NFSUNLOCKV4ROOTMUTEX();
5295		}
5296		if (error == -1)
5297			error = 0;
5298		goto out;
5299	}
5300
5301	/*
5302	 * Now, we must Recall any delegations.
5303	 */
5304	error = nfsrv_cleandeleg(vp, lfp, NULL, &haslock, p);
5305	if (error) {
5306		/*
5307		 * nfsrv_cleandeleg() unlocks state for non-zero
5308		 * return.
5309		 */
5310		if (error == -1)
5311			goto tryagain;
5312		if (haslock) {
5313			NFSLOCKV4ROOTMUTEX();
5314			nfsv4_unlock(&nfsv4rootfs_lock, 1);
5315			NFSUNLOCKV4ROOTMUTEX();
5316		}
5317		goto out;
5318	}
5319
5320	/*
5321	 * Now, look for a conflicting open share.
5322	 */
5323	if (remove) {
5324		/*
5325		 * If the entry in the directory was the last reference to the
5326		 * corresponding filesystem object, the object can be destroyed
5327		 * */
5328		if(lfp->lf_usecount>1)
5329			LIST_FOREACH(stp, &lfp->lf_open, ls_file) {
5330				if (stp->ls_flags & NFSLCK_WRITEDENY) {
5331					error = NFSERR_FILEOPEN;
5332					break;
5333				}
5334			}
5335	}
5336
5337	NFSUNLOCKSTATE();
5338	if (haslock) {
5339		NFSLOCKV4ROOTMUTEX();
5340		nfsv4_unlock(&nfsv4rootfs_lock, 1);
5341		NFSUNLOCKV4ROOTMUTEX();
5342	}
5343
5344out:
5345	NFSEXITCODE(error);
5346	return (error);
5347}
5348
5349/*
5350 * Clear out all delegations for the file referred to by lfp.
5351 * May return NFSERR_DELAY, if there will be a delay waiting for
5352 * delegations to expire.
5353 * Returns -1 to indicate it slept while recalling a delegation.
5354 * This function has the side effect of deleting the nfslockfile structure,
5355 * if it no longer has associated state and didn't have to sleep.
5356 * Unlocks State before a non-zero value is returned.
5357 */
5358static int
5359nfsrv_cleandeleg(vnode_t vp, struct nfslockfile *lfp,
5360    struct nfsclient *clp, int *haslockp, NFSPROC_T *p)
5361{
5362	struct nfsstate *stp, *nstp;
5363	int ret = 0;
5364
5365	stp = LIST_FIRST(&lfp->lf_deleg);
5366	while (stp != LIST_END(&lfp->lf_deleg)) {
5367		nstp = LIST_NEXT(stp, ls_file);
5368		if (stp->ls_clp != clp) {
5369			ret = nfsrv_delegconflict(stp, haslockp, p, vp);
5370			if (ret) {
5371				/*
5372				 * nfsrv_delegconflict() unlocks state
5373				 * when it returns non-zero.
5374				 */
5375				goto out;
5376			}
5377		}
5378		stp = nstp;
5379	}
5380out:
5381	NFSEXITCODE(ret);
5382	return (ret);
5383}
5384
5385/*
5386 * There are certain operations that, when being done outside of NFSv4,
5387 * require that any NFSv4 delegation for the file be recalled.
5388 * This function is to be called for those cases:
5389 * VOP_RENAME() - When a delegation is being recalled for any reason,
5390 *	the client may have to do Opens against the server, using the file's
5391 *	final component name. If the file has been renamed on the server,
5392 *	that component name will be incorrect and the Open will fail.
5393 * VOP_REMOVE() - Theoretically, a client could Open a file after it has
5394 *	been removed on the server, if there is a delegation issued to
5395 *	that client for the file. I say "theoretically" since clients
5396 *	normally do an Access Op before the Open and that Access Op will
5397 *	fail with ESTALE. Note that NFSv2 and 3 don't even do Opens, so
5398 *	they will detect the file's removal in the same manner. (There is
5399 *	one case where RFC3530 allows a client to do an Open without first
5400 *	doing an Access Op, which is passage of a check against the ACE
5401 *	returned with a Write delegation, but current practice is to ignore
5402 *	the ACE and always do an Access Op.)
5403 *	Since the functions can only be called with an unlocked vnode, this
5404 *	can't be done at this time.
5405 * VOP_ADVLOCK() - When a client holds a delegation, it can issue byte range
5406 *	locks locally in the client, which are not visible to the server. To
5407 *	deal with this, issuing of delegations for a vnode must be disabled
5408 *	and all delegations for the vnode recalled. This is done via the
5409 *	second function, using the VV_DISABLEDELEG vflag on the vnode.
5410 */
5411void
5412nfsd_recalldelegation(vnode_t vp, NFSPROC_T *p)
5413{
5414	time_t starttime;
5415	int error;
5416
5417	/*
5418	 * First, check to see if the server is currently running and it has
5419	 * been called for a regular file when issuing delegations.
5420	 */
5421	if (newnfs_numnfsd == 0 || vp->v_type != VREG ||
5422	    nfsrv_issuedelegs == 0)
5423		return;
5424
5425	KASSERT((NFSVOPISLOCKED(vp) != LK_EXCLUSIVE), ("vp %p is locked", vp));
5426	/*
5427	 * First, get a reference on the nfsv4rootfs_lock so that an
5428	 * exclusive lock cannot be acquired by another thread.
5429	 */
5430	NFSLOCKV4ROOTMUTEX();
5431	nfsv4_getref(&nfsv4rootfs_lock, NULL, NFSV4ROOTLOCKMUTEXPTR, NULL);
5432	NFSUNLOCKV4ROOTMUTEX();
5433
5434	/*
5435	 * Now, call nfsrv_checkremove() in a loop while it returns
5436	 * NFSERR_DELAY. Return upon any other error or when timed out.
5437	 */
5438	starttime = NFSD_MONOSEC;
5439	do {
5440		if (NFSVOPLOCK(vp, LK_EXCLUSIVE) == 0) {
5441			error = nfsrv_checkremove(vp, 0, p);
5442			NFSVOPUNLOCK(vp, 0);
5443		} else
5444			error = EPERM;
5445		if (error == NFSERR_DELAY) {
5446			if (NFSD_MONOSEC - starttime > NFS_REMOVETIMEO)
5447				break;
5448			/* Sleep for a short period of time */
5449			(void) nfs_catnap(PZERO, 0, "nfsremove");
5450		}
5451	} while (error == NFSERR_DELAY);
5452	NFSLOCKV4ROOTMUTEX();
5453	nfsv4_relref(&nfsv4rootfs_lock);
5454	NFSUNLOCKV4ROOTMUTEX();
5455}
5456
5457void
5458nfsd_disabledelegation(vnode_t vp, NFSPROC_T *p)
5459{
5460
5461#ifdef VV_DISABLEDELEG
5462	/*
5463	 * First, flag issuance of delegations disabled.
5464	 */
5465	atomic_set_long(&vp->v_vflag, VV_DISABLEDELEG);
5466#endif
5467
5468	/*
5469	 * Then call nfsd_recalldelegation() to get rid of all extant
5470	 * delegations.
5471	 */
5472	nfsd_recalldelegation(vp, p);
5473}
5474
5475/*
5476 * Check for conflicting locks, etc. and then get rid of delegations.
5477 * (At one point I thought that I should get rid of delegations for any
5478 *  Setattr, since it could potentially disallow the I/O op (read or write)
5479 *  allowed by the delegation. However, Setattr Ops that aren't changing
5480 *  the size get a stateid of all 0s, so you can't tell if it is a delegation
5481 *  for the same client or a different one, so I decided to only get rid
5482 *  of delegations for other clients when the size is being changed.)
5483 * In general, a Setattr can disable NFS I/O Ops that are outstanding, such
5484 * as Write backs, even if there is no delegation, so it really isn't any
5485 * different?)
5486 */
5487int
5488nfsrv_checksetattr(vnode_t vp, struct nfsrv_descript *nd,
5489    nfsv4stateid_t *stateidp, struct nfsvattr *nvap, nfsattrbit_t *attrbitp,
5490    struct nfsexstuff *exp, NFSPROC_T *p)
5491{
5492	struct nfsstate st, *stp = &st;
5493	struct nfslock lo, *lop = &lo;
5494	int error = 0;
5495	nfsquad_t clientid;
5496
5497	if (NFSISSET_ATTRBIT(attrbitp, NFSATTRBIT_SIZE)) {
5498		stp->ls_flags = (NFSLCK_CHECK | NFSLCK_WRITEACCESS);
5499		lop->lo_first = nvap->na_size;
5500	} else {
5501		stp->ls_flags = 0;
5502		lop->lo_first = 0;
5503	}
5504	if (NFSISSET_ATTRBIT(attrbitp, NFSATTRBIT_OWNER) ||
5505	    NFSISSET_ATTRBIT(attrbitp, NFSATTRBIT_OWNERGROUP) ||
5506	    NFSISSET_ATTRBIT(attrbitp, NFSATTRBIT_MODE) ||
5507	    NFSISSET_ATTRBIT(attrbitp, NFSATTRBIT_ACL))
5508		stp->ls_flags |= NFSLCK_SETATTR;
5509	if (stp->ls_flags == 0)
5510		goto out;
5511	lop->lo_end = NFS64BITSSET;
5512	lop->lo_flags = NFSLCK_WRITE;
5513	stp->ls_ownerlen = 0;
5514	stp->ls_op = NULL;
5515	stp->ls_uid = nd->nd_cred->cr_uid;
5516	stp->ls_stateid.seqid = stateidp->seqid;
5517	clientid.lval[0] = stp->ls_stateid.other[0] = stateidp->other[0];
5518	clientid.lval[1] = stp->ls_stateid.other[1] = stateidp->other[1];
5519	stp->ls_stateid.other[2] = stateidp->other[2];
5520	error = nfsrv_lockctrl(vp, &stp, &lop, NULL, clientid,
5521	    stateidp, exp, nd, p);
5522
5523out:
5524	NFSEXITCODE2(error, nd);
5525	return (error);
5526}
5527
5528/*
5529 * Check for a write delegation and do a CBGETATTR if there is one, updating
5530 * the attributes, as required.
5531 * Should I return an error if I can't get the attributes? (For now, I'll
5532 * just return ok.
5533 */
5534int
5535nfsrv_checkgetattr(struct nfsrv_descript *nd, vnode_t vp,
5536    struct nfsvattr *nvap, nfsattrbit_t *attrbitp, struct ucred *cred,
5537    NFSPROC_T *p)
5538{
5539	struct nfsstate *stp;
5540	struct nfslockfile *lfp;
5541	struct nfsclient *clp;
5542	struct nfsvattr nva;
5543	fhandle_t nfh;
5544	int error = 0;
5545	nfsattrbit_t cbbits;
5546	u_quad_t delegfilerev;
5547
5548	NFSCBGETATTR_ATTRBIT(attrbitp, &cbbits);
5549	if (!NFSNONZERO_ATTRBIT(&cbbits))
5550		goto out;
5551	if (nfsrv_writedelegcnt == 0)
5552		goto out;
5553
5554	/*
5555	 * Get the lock file structure.
5556	 * (A return of -1 means no associated state, so return ok.)
5557	 */
5558	error = nfsrv_getlockfh(vp, NFSLCK_CHECK, NULL, &nfh, p);
5559	NFSLOCKSTATE();
5560	if (!error)
5561		error = nfsrv_getlockfile(NFSLCK_CHECK, NULL, &lfp, &nfh, 0);
5562	if (error) {
5563		NFSUNLOCKSTATE();
5564		if (error == -1)
5565			error = 0;
5566		goto out;
5567	}
5568
5569	/*
5570	 * Now, look for a write delegation.
5571	 */
5572	LIST_FOREACH(stp, &lfp->lf_deleg, ls_file) {
5573		if (stp->ls_flags & NFSLCK_DELEGWRITE)
5574			break;
5575	}
5576	if (stp == LIST_END(&lfp->lf_deleg)) {
5577		NFSUNLOCKSTATE();
5578		goto out;
5579	}
5580	clp = stp->ls_clp;
5581
5582	/* If the clientid is not confirmed, ignore the delegation. */
5583	if (clp->lc_flags & LCL_NEEDSCONFIRM) {
5584		NFSUNLOCKSTATE();
5585		goto out;
5586	}
5587
5588	delegfilerev = stp->ls_filerev;
5589	/*
5590	 * If the Write delegation was issued as a part of this Compound RPC
5591	 * or if we have an Implied Clientid (used in a previous Op in this
5592	 * compound) and it is the client the delegation was issued to,
5593	 * just return ok.
5594	 * I also assume that it is from the same client iff the network
5595	 * host IP address is the same as the callback address. (Not
5596	 * exactly correct by the RFC, but avoids a lot of Getattr
5597	 * callbacks.)
5598	 */
5599	if (nd->nd_compref == stp->ls_compref ||
5600	    ((nd->nd_flag & ND_IMPLIEDCLID) &&
5601	     clp->lc_clientid.qval == nd->nd_clientid.qval) ||
5602	     nfsaddr2_match(clp->lc_req.nr_nam, nd->nd_nam)) {
5603		NFSUNLOCKSTATE();
5604		goto out;
5605	}
5606
5607	/*
5608	 * We are now done with the delegation state structure,
5609	 * so the statelock can be released and we can now tsleep().
5610	 */
5611
5612	/*
5613	 * Now, we must do the CB Getattr callback, to see if Change or Size
5614	 * has changed.
5615	 */
5616	if (clp->lc_expiry >= NFSD_MONOSEC) {
5617		NFSUNLOCKSTATE();
5618		NFSVNO_ATTRINIT(&nva);
5619		nva.na_filerev = NFS64BITSSET;
5620		error = nfsrv_docallback(clp, NFSV4OP_CBGETATTR, NULL,
5621		    0, &nfh, &nva, &cbbits, p);
5622		if (!error) {
5623			if ((nva.na_filerev != NFS64BITSSET &&
5624			    nva.na_filerev > delegfilerev) ||
5625			    (NFSVNO_ISSETSIZE(&nva) &&
5626			     nva.na_size != nvap->na_size)) {
5627				error = nfsvno_updfilerev(vp, nvap, cred, p);
5628				if (NFSVNO_ISSETSIZE(&nva))
5629					nvap->na_size = nva.na_size;
5630			}
5631		} else
5632			error = 0;	/* Ignore callback errors for now. */
5633	} else {
5634		NFSUNLOCKSTATE();
5635	}
5636
5637out:
5638	NFSEXITCODE2(error, nd);
5639	return (error);
5640}
5641
5642/*
5643 * This function looks for openowners that haven't had any opens for
5644 * a while and throws them away. Called by an nfsd when NFSNSF_NOOPENS
5645 * is set.
5646 */
5647void
5648nfsrv_throwawayopens(NFSPROC_T *p)
5649{
5650	struct nfsclient *clp, *nclp;
5651	struct nfsstate *stp, *nstp;
5652	int i;
5653
5654	NFSLOCKSTATE();
5655	nfsrv_stablefirst.nsf_flags &= ~NFSNSF_NOOPENS;
5656	/*
5657	 * For each client...
5658	 */
5659	for (i = 0; i < nfsrv_clienthashsize; i++) {
5660	    LIST_FOREACH_SAFE(clp, &nfsclienthash[i], lc_hash, nclp) {
5661		LIST_FOREACH_SAFE(stp, &clp->lc_open, ls_list, nstp) {
5662			if (LIST_EMPTY(&stp->ls_open) &&
5663			    (stp->ls_noopens > NFSNOOPEN ||
5664			     (nfsrv_openpluslock * 2) >
5665			     nfsrv_v4statelimit))
5666				nfsrv_freeopenowner(stp, 0, p);
5667		}
5668	    }
5669	}
5670	NFSUNLOCKSTATE();
5671}
5672
5673/*
5674 * This function checks to see if the credentials are the same.
5675 * Returns 1 for not same, 0 otherwise.
5676 */
5677static int
5678nfsrv_notsamecredname(struct nfsrv_descript *nd, struct nfsclient *clp)
5679{
5680
5681	if (nd->nd_flag & ND_GSS) {
5682		if (!(clp->lc_flags & LCL_GSS))
5683			return (1);
5684		if (clp->lc_flags & LCL_NAME) {
5685			if (nd->nd_princlen != clp->lc_namelen ||
5686			    NFSBCMP(nd->nd_principal, clp->lc_name,
5687				clp->lc_namelen))
5688				return (1);
5689			else
5690				return (0);
5691		}
5692		if (nd->nd_cred->cr_uid == clp->lc_uid)
5693			return (0);
5694		else
5695			return (1);
5696	} else if (clp->lc_flags & LCL_GSS)
5697		return (1);
5698	/*
5699	 * For AUTH_SYS, allow the same uid or root. (This is underspecified
5700	 * in RFC3530, which talks about principals, but doesn't say anything
5701	 * about uids for AUTH_SYS.)
5702	 */
5703	if (nd->nd_cred->cr_uid == clp->lc_uid || nd->nd_cred->cr_uid == 0)
5704		return (0);
5705	else
5706		return (1);
5707}
5708
5709/*
5710 * Calculate the lease expiry time.
5711 */
5712static time_t
5713nfsrv_leaseexpiry(void)
5714{
5715
5716	if (nfsrv_stablefirst.nsf_eograce > NFSD_MONOSEC)
5717		return (NFSD_MONOSEC + 2 * (nfsrv_lease + NFSRV_LEASEDELTA));
5718	return (NFSD_MONOSEC + nfsrv_lease + NFSRV_LEASEDELTA);
5719}
5720
5721/*
5722 * Delay the delegation timeout as far as ls_delegtimelimit, as required.
5723 */
5724static void
5725nfsrv_delaydelegtimeout(struct nfsstate *stp)
5726{
5727
5728	if ((stp->ls_flags & NFSLCK_DELEGRECALL) == 0)
5729		return;
5730
5731	if ((stp->ls_delegtime + 15) > NFSD_MONOSEC &&
5732	    stp->ls_delegtime < stp->ls_delegtimelimit) {
5733		stp->ls_delegtime += nfsrv_lease;
5734		if (stp->ls_delegtime > stp->ls_delegtimelimit)
5735			stp->ls_delegtime = stp->ls_delegtimelimit;
5736	}
5737}
5738
5739/*
5740 * This function checks to see if there is any other state associated
5741 * with the openowner for this Open.
5742 * It returns 1 if there is no other state, 0 otherwise.
5743 */
5744static int
5745nfsrv_nootherstate(struct nfsstate *stp)
5746{
5747	struct nfsstate *tstp;
5748
5749	LIST_FOREACH(tstp, &stp->ls_openowner->ls_open, ls_list) {
5750		if (tstp != stp || !LIST_EMPTY(&tstp->ls_lock))
5751			return (0);
5752	}
5753	return (1);
5754}
5755
5756/*
5757 * Create a list of lock deltas (changes to local byte range locking
5758 * that can be rolled back using the list) and apply the changes via
5759 * nfsvno_advlock(). Optionally, lock the list. It is expected that either
5760 * the rollback or update function will be called after this.
5761 * It returns an error (and rolls back, as required), if any nfsvno_advlock()
5762 * call fails. If it returns an error, it will unlock the list.
5763 */
5764static int
5765nfsrv_locallock(vnode_t vp, struct nfslockfile *lfp, int flags,
5766    uint64_t first, uint64_t end, struct nfslockconflict *cfp, NFSPROC_T *p)
5767{
5768	struct nfslock *lop, *nlop;
5769	int error = 0;
5770
5771	/* Loop through the list of locks. */
5772	lop = LIST_FIRST(&lfp->lf_locallock);
5773	while (first < end && lop != NULL) {
5774		nlop = LIST_NEXT(lop, lo_lckowner);
5775		if (first >= lop->lo_end) {
5776			/* not there yet */
5777			lop = nlop;
5778		} else if (first < lop->lo_first) {
5779			/* new one starts before entry in list */
5780			if (end <= lop->lo_first) {
5781				/* no overlap between old and new */
5782				error = nfsrv_dolocal(vp, lfp, flags,
5783				    NFSLCK_UNLOCK, first, end, cfp, p);
5784				if (error != 0)
5785					break;
5786				first = end;
5787			} else {
5788				/* handle fragment overlapped with new one */
5789				error = nfsrv_dolocal(vp, lfp, flags,
5790				    NFSLCK_UNLOCK, first, lop->lo_first, cfp,
5791				    p);
5792				if (error != 0)
5793					break;
5794				first = lop->lo_first;
5795			}
5796		} else {
5797			/* new one overlaps this entry in list */
5798			if (end <= lop->lo_end) {
5799				/* overlaps all of new one */
5800				error = nfsrv_dolocal(vp, lfp, flags,
5801				    lop->lo_flags, first, end, cfp, p);
5802				if (error != 0)
5803					break;
5804				first = end;
5805			} else {
5806				/* handle fragment overlapped with new one */
5807				error = nfsrv_dolocal(vp, lfp, flags,
5808				    lop->lo_flags, first, lop->lo_end, cfp, p);
5809				if (error != 0)
5810					break;
5811				first = lop->lo_end;
5812				lop = nlop;
5813			}
5814		}
5815	}
5816	if (first < end && error == 0)
5817		/* handle fragment past end of list */
5818		error = nfsrv_dolocal(vp, lfp, flags, NFSLCK_UNLOCK, first,
5819		    end, cfp, p);
5820
5821	NFSEXITCODE(error);
5822	return (error);
5823}
5824
5825/*
5826 * Local lock unlock. Unlock all byte ranges that are no longer locked
5827 * by NFSv4. To do this, unlock any subranges of first-->end that
5828 * do not overlap with the byte ranges of any lock in the lfp->lf_lock
5829 * list. This list has all locks for the file held by other
5830 * <clientid, lockowner> tuples. The list is ordered by increasing
5831 * lo_first value, but may have entries that overlap each other, for
5832 * the case of read locks.
5833 */
5834static void
5835nfsrv_localunlock(vnode_t vp, struct nfslockfile *lfp, uint64_t init_first,
5836    uint64_t init_end, NFSPROC_T *p)
5837{
5838	struct nfslock *lop;
5839	uint64_t first, end, prevfirst;
5840
5841	first = init_first;
5842	end = init_end;
5843	while (first < init_end) {
5844		/* Loop through all nfs locks, adjusting first and end */
5845		prevfirst = 0;
5846		LIST_FOREACH(lop, &lfp->lf_lock, lo_lckfile) {
5847			KASSERT(prevfirst <= lop->lo_first,
5848			    ("nfsv4 locks out of order"));
5849			KASSERT(lop->lo_first < lop->lo_end,
5850			    ("nfsv4 bogus lock"));
5851			prevfirst = lop->lo_first;
5852			if (first >= lop->lo_first &&
5853			    first < lop->lo_end)
5854				/*
5855				 * Overlaps with initial part, so trim
5856				 * off that initial part by moving first past
5857				 * it.
5858				 */
5859				first = lop->lo_end;
5860			else if (end > lop->lo_first &&
5861			    lop->lo_first > first) {
5862				/*
5863				 * This lock defines the end of the
5864				 * segment to unlock, so set end to the
5865				 * start of it and break out of the loop.
5866				 */
5867				end = lop->lo_first;
5868				break;
5869			}
5870			if (first >= end)
5871				/*
5872				 * There is no segment left to do, so
5873				 * break out of this loop and then exit
5874				 * the outer while() since first will be set
5875				 * to end, which must equal init_end here.
5876				 */
5877				break;
5878		}
5879		if (first < end) {
5880			/* Unlock this segment */
5881			(void) nfsrv_dolocal(vp, lfp, NFSLCK_UNLOCK,
5882			    NFSLCK_READ, first, end, NULL, p);
5883			nfsrv_locallock_commit(lfp, NFSLCK_UNLOCK,
5884			    first, end);
5885		}
5886		/*
5887		 * Now move past this segment and look for any further
5888		 * segment in the range, if there is one.
5889		 */
5890		first = end;
5891		end = init_end;
5892	}
5893}
5894
5895/*
5896 * Do the local lock operation and update the rollback list, as required.
5897 * Perform the rollback and return the error if nfsvno_advlock() fails.
5898 */
5899static int
5900nfsrv_dolocal(vnode_t vp, struct nfslockfile *lfp, int flags, int oldflags,
5901    uint64_t first, uint64_t end, struct nfslockconflict *cfp, NFSPROC_T *p)
5902{
5903	struct nfsrollback *rlp;
5904	int error = 0, ltype, oldltype;
5905
5906	if (flags & NFSLCK_WRITE)
5907		ltype = F_WRLCK;
5908	else if (flags & NFSLCK_READ)
5909		ltype = F_RDLCK;
5910	else
5911		ltype = F_UNLCK;
5912	if (oldflags & NFSLCK_WRITE)
5913		oldltype = F_WRLCK;
5914	else if (oldflags & NFSLCK_READ)
5915		oldltype = F_RDLCK;
5916	else
5917		oldltype = F_UNLCK;
5918	if (ltype == oldltype || (oldltype == F_WRLCK && ltype == F_RDLCK))
5919		/* nothing to do */
5920		goto out;
5921	error = nfsvno_advlock(vp, ltype, first, end, p);
5922	if (error != 0) {
5923		if (cfp != NULL) {
5924			cfp->cl_clientid.lval[0] = 0;
5925			cfp->cl_clientid.lval[1] = 0;
5926			cfp->cl_first = 0;
5927			cfp->cl_end = NFS64BITSSET;
5928			cfp->cl_flags = NFSLCK_WRITE;
5929			cfp->cl_ownerlen = 5;
5930			NFSBCOPY("LOCAL", cfp->cl_owner, 5);
5931		}
5932		nfsrv_locallock_rollback(vp, lfp, p);
5933	} else if (ltype != F_UNLCK) {
5934		rlp = malloc(sizeof (struct nfsrollback), M_NFSDROLLBACK,
5935		    M_WAITOK);
5936		rlp->rlck_first = first;
5937		rlp->rlck_end = end;
5938		rlp->rlck_type = oldltype;
5939		LIST_INSERT_HEAD(&lfp->lf_rollback, rlp, rlck_list);
5940	}
5941
5942out:
5943	NFSEXITCODE(error);
5944	return (error);
5945}
5946
5947/*
5948 * Roll back local lock changes and free up the rollback list.
5949 */
5950static void
5951nfsrv_locallock_rollback(vnode_t vp, struct nfslockfile *lfp, NFSPROC_T *p)
5952{
5953	struct nfsrollback *rlp, *nrlp;
5954
5955	LIST_FOREACH_SAFE(rlp, &lfp->lf_rollback, rlck_list, nrlp) {
5956		(void) nfsvno_advlock(vp, rlp->rlck_type, rlp->rlck_first,
5957		    rlp->rlck_end, p);
5958		free(rlp, M_NFSDROLLBACK);
5959	}
5960	LIST_INIT(&lfp->lf_rollback);
5961}
5962
5963/*
5964 * Update local lock list and delete rollback list (ie now committed to the
5965 * local locks). Most of the work is done by the internal function.
5966 */
5967static void
5968nfsrv_locallock_commit(struct nfslockfile *lfp, int flags, uint64_t first,
5969    uint64_t end)
5970{
5971	struct nfsrollback *rlp, *nrlp;
5972	struct nfslock *new_lop, *other_lop;
5973
5974	new_lop = malloc(sizeof (struct nfslock), M_NFSDLOCK, M_WAITOK);
5975	if (flags & (NFSLCK_READ | NFSLCK_WRITE))
5976		other_lop = malloc(sizeof (struct nfslock), M_NFSDLOCK,
5977		    M_WAITOK);
5978	else
5979		other_lop = NULL;
5980	new_lop->lo_flags = flags;
5981	new_lop->lo_first = first;
5982	new_lop->lo_end = end;
5983	nfsrv_updatelock(NULL, &new_lop, &other_lop, lfp);
5984	if (new_lop != NULL)
5985		free(new_lop, M_NFSDLOCK);
5986	if (other_lop != NULL)
5987		free(other_lop, M_NFSDLOCK);
5988
5989	/* and get rid of the rollback list */
5990	LIST_FOREACH_SAFE(rlp, &lfp->lf_rollback, rlck_list, nrlp)
5991		free(rlp, M_NFSDROLLBACK);
5992	LIST_INIT(&lfp->lf_rollback);
5993}
5994
5995/*
5996 * Lock the struct nfslockfile for local lock updating.
5997 */
5998static void
5999nfsrv_locklf(struct nfslockfile *lfp)
6000{
6001	int gotlock;
6002
6003	/* lf_usecount ensures *lfp won't be free'd */
6004	lfp->lf_usecount++;
6005	do {
6006		gotlock = nfsv4_lock(&lfp->lf_locallock_lck, 1, NULL,
6007		    NFSSTATEMUTEXPTR, NULL);
6008	} while (gotlock == 0);
6009	lfp->lf_usecount--;
6010}
6011
6012/*
6013 * Unlock the struct nfslockfile after local lock updating.
6014 */
6015static void
6016nfsrv_unlocklf(struct nfslockfile *lfp)
6017{
6018
6019	nfsv4_unlock(&lfp->lf_locallock_lck, 0);
6020}
6021
6022/*
6023 * Clear out all state for the NFSv4 server.
6024 * Must be called by a thread that can sleep when no nfsds are running.
6025 */
6026void
6027nfsrv_throwawayallstate(NFSPROC_T *p)
6028{
6029	struct nfsclient *clp, *nclp;
6030	struct nfslockfile *lfp, *nlfp;
6031	int i;
6032
6033	/*
6034	 * For each client, clean out the state and then free the structure.
6035	 */
6036	for (i = 0; i < nfsrv_clienthashsize; i++) {
6037		LIST_FOREACH_SAFE(clp, &nfsclienthash[i], lc_hash, nclp) {
6038			nfsrv_cleanclient(clp, p);
6039			nfsrv_freedeleglist(&clp->lc_deleg);
6040			nfsrv_freedeleglist(&clp->lc_olddeleg);
6041			free(clp->lc_stateid, M_NFSDCLIENT);
6042			free(clp, M_NFSDCLIENT);
6043		}
6044	}
6045
6046	/*
6047	 * Also, free up any remaining lock file structures.
6048	 */
6049	for (i = 0; i < nfsrv_lockhashsize; i++) {
6050		LIST_FOREACH_SAFE(lfp, &nfslockhash[i], lf_hash, nlfp) {
6051			printf("nfsd unload: fnd a lock file struct\n");
6052			nfsrv_freenfslockfile(lfp);
6053		}
6054	}
6055}
6056
6057/*
6058 * Check the sequence# for the session and slot provided as an argument.
6059 * Also, renew the lease if the session will return NFS_OK.
6060 */
6061int
6062nfsrv_checksequence(struct nfsrv_descript *nd, uint32_t sequenceid,
6063    uint32_t *highest_slotidp, uint32_t *target_highest_slotidp, int cache_this,
6064    uint32_t *sflagsp, NFSPROC_T *p)
6065{
6066	struct nfsdsession *sep;
6067	struct nfssessionhash *shp;
6068	int error;
6069	SVCXPRT *savxprt;
6070
6071	shp = NFSSESSIONHASH(nd->nd_sessionid);
6072	NFSLOCKSESSION(shp);
6073	sep = nfsrv_findsession(nd->nd_sessionid);
6074	if (sep == NULL) {
6075		NFSUNLOCKSESSION(shp);
6076		return (NFSERR_BADSESSION);
6077	}
6078	error = nfsv4_seqsession(sequenceid, nd->nd_slotid, *highest_slotidp,
6079	    sep->sess_slots, NULL, NFSV4_SLOTS - 1);
6080	if (error != 0) {
6081		NFSUNLOCKSESSION(shp);
6082		return (error);
6083	}
6084	if (cache_this != 0)
6085		nd->nd_flag |= ND_SAVEREPLY;
6086	/* Renew the lease. */
6087	sep->sess_clp->lc_expiry = nfsrv_leaseexpiry();
6088	nd->nd_clientid.qval = sep->sess_clp->lc_clientid.qval;
6089	nd->nd_flag |= ND_IMPLIEDCLID;
6090
6091	/*
6092	 * If this session handles the backchannel, save the nd_xprt for this
6093	 * RPC, since this is the one being used.
6094	 * RFC-5661 specifies that the fore channel will be implicitly
6095	 * bound by a Sequence operation.  However, since some NFSv4.1 clients
6096	 * erroneously assumed that the back channel would be implicitly
6097	 * bound as well, do the implicit binding unless a
6098	 * BindConnectiontoSession has already been done on the session.
6099	 */
6100	savxprt = NULL;
6101	if (sep->sess_clp->lc_req.nr_client != NULL &&
6102	    sep->sess_cbsess.nfsess_xprt != nd->nd_xprt &&
6103	    (sep->sess_crflags & NFSV4CRSESS_CONNBACKCHAN) != 0 &&
6104	    (sep->sess_clp->lc_flags & LCL_DONEBINDCONN) == 0) {
6105		NFSD_DEBUG(2,
6106		    "nfsrv_checksequence: implicit back channel bind\n");
6107		savxprt = sep->sess_cbsess.nfsess_xprt;
6108		SVC_ACQUIRE(nd->nd_xprt);
6109		nd->nd_xprt->xp_p2 =
6110		    sep->sess_clp->lc_req.nr_client->cl_private;
6111		nd->nd_xprt->xp_idletimeout = 0;	/* Disable timeout. */
6112		sep->sess_cbsess.nfsess_xprt = nd->nd_xprt;
6113	}
6114
6115	*sflagsp = 0;
6116	if (sep->sess_clp->lc_req.nr_client == NULL)
6117		*sflagsp |= NFSV4SEQ_CBPATHDOWN;
6118	NFSUNLOCKSESSION(shp);
6119	if (savxprt != NULL)
6120		SVC_RELEASE(savxprt);
6121	if (error == NFSERR_EXPIRED) {
6122		*sflagsp |= NFSV4SEQ_EXPIREDALLSTATEREVOKED;
6123		error = 0;
6124	} else if (error == NFSERR_ADMINREVOKED) {
6125		*sflagsp |= NFSV4SEQ_ADMINSTATEREVOKED;
6126		error = 0;
6127	}
6128	*highest_slotidp = *target_highest_slotidp = NFSV4_SLOTS - 1;
6129	return (0);
6130}
6131
6132/*
6133 * Check/set reclaim complete for this session/clientid.
6134 */
6135int
6136nfsrv_checkreclaimcomplete(struct nfsrv_descript *nd, int onefs)
6137{
6138	struct nfsdsession *sep;
6139	struct nfssessionhash *shp;
6140	int error = 0;
6141
6142	shp = NFSSESSIONHASH(nd->nd_sessionid);
6143	NFSLOCKSTATE();
6144	NFSLOCKSESSION(shp);
6145	sep = nfsrv_findsession(nd->nd_sessionid);
6146	if (sep == NULL) {
6147		NFSUNLOCKSESSION(shp);
6148		NFSUNLOCKSTATE();
6149		return (NFSERR_BADSESSION);
6150	}
6151
6152	if (onefs != 0)
6153		sep->sess_clp->lc_flags |= LCL_RECLAIMONEFS;
6154		/* Check to see if reclaim complete has already happened. */
6155	else if ((sep->sess_clp->lc_flags & LCL_RECLAIMCOMPLETE) != 0)
6156		error = NFSERR_COMPLETEALREADY;
6157	else {
6158		sep->sess_clp->lc_flags |= LCL_RECLAIMCOMPLETE;
6159		nfsrv_markreclaim(sep->sess_clp);
6160	}
6161	NFSUNLOCKSESSION(shp);
6162	NFSUNLOCKSTATE();
6163	return (error);
6164}
6165
6166/*
6167 * Cache the reply in a session slot.
6168 */
6169void
6170nfsrv_cache_session(struct nfsrv_descript *nd, struct mbuf **m)
6171{
6172	struct nfsdsession *sep;
6173	struct nfssessionhash *shp;
6174	char *buf, *cp;
6175#ifdef INET
6176	struct sockaddr_in *sin;
6177#endif
6178#ifdef INET6
6179	struct sockaddr_in6 *sin6;
6180#endif
6181
6182	shp = NFSSESSIONHASH(nd->nd_sessionid);
6183	NFSLOCKSESSION(shp);
6184	sep = nfsrv_findsession(nd->nd_sessionid);
6185	if (sep == NULL) {
6186		NFSUNLOCKSESSION(shp);
6187		if ((nfsrv_stablefirst.nsf_flags & NFSNSF_GRACEOVER) != 0) {
6188			buf = malloc(INET6_ADDRSTRLEN, M_TEMP, M_WAITOK);
6189			switch (nd->nd_nam->sa_family) {
6190#ifdef INET
6191			case AF_INET:
6192				sin = (struct sockaddr_in *)nd->nd_nam;
6193				cp = inet_ntop(sin->sin_family,
6194				    &sin->sin_addr.s_addr, buf,
6195				    INET6_ADDRSTRLEN);
6196				break;
6197#endif
6198#ifdef INET6
6199			case AF_INET6:
6200				sin6 = (struct sockaddr_in6 *)nd->nd_nam;
6201				cp = inet_ntop(sin6->sin6_family,
6202				    &sin6->sin6_addr, buf, INET6_ADDRSTRLEN);
6203				break;
6204#endif
6205			default:
6206				cp = NULL;
6207			}
6208			if (cp != NULL)
6209				printf("nfsrv_cache_session: no session "
6210				    "IPaddr=%s\n", cp);
6211			else
6212				printf("nfsrv_cache_session: no session\n");
6213			free(buf, M_TEMP);
6214		}
6215		m_freem(*m);
6216		return;
6217	}
6218	nfsv4_seqsess_cacherep(nd->nd_slotid, sep->sess_slots, nd->nd_repstat,
6219	    m);
6220	NFSUNLOCKSESSION(shp);
6221}
6222
6223/*
6224 * Search for a session that matches the sessionid.
6225 */
6226static struct nfsdsession *
6227nfsrv_findsession(uint8_t *sessionid)
6228{
6229	struct nfsdsession *sep;
6230	struct nfssessionhash *shp;
6231
6232	shp = NFSSESSIONHASH(sessionid);
6233	LIST_FOREACH(sep, &shp->list, sess_hash) {
6234		if (!NFSBCMP(sessionid, sep->sess_sessionid, NFSX_V4SESSIONID))
6235			break;
6236	}
6237	return (sep);
6238}
6239
6240/*
6241 * Destroy a session.
6242 */
6243int
6244nfsrv_destroysession(struct nfsrv_descript *nd, uint8_t *sessionid)
6245{
6246	int error, igotlock, samesess;
6247
6248	samesess = 0;
6249	if (!NFSBCMP(sessionid, nd->nd_sessionid, NFSX_V4SESSIONID) &&
6250	    (nd->nd_flag & ND_HASSEQUENCE) != 0) {
6251		samesess = 1;
6252		if ((nd->nd_flag & ND_LASTOP) == 0)
6253			return (NFSERR_BADSESSION);
6254	}
6255
6256	/* Lock out other nfsd threads */
6257	NFSLOCKV4ROOTMUTEX();
6258	nfsv4_relref(&nfsv4rootfs_lock);
6259	do {
6260		igotlock = nfsv4_lock(&nfsv4rootfs_lock, 1, NULL,
6261		    NFSV4ROOTLOCKMUTEXPTR, NULL);
6262	} while (igotlock == 0);
6263	NFSUNLOCKV4ROOTMUTEX();
6264
6265	error = nfsrv_freesession(NULL, sessionid);
6266	if (error == 0 && samesess != 0)
6267		nd->nd_flag &= ~ND_HASSEQUENCE;
6268
6269	NFSLOCKV4ROOTMUTEX();
6270	nfsv4_unlock(&nfsv4rootfs_lock, 1);
6271	NFSUNLOCKV4ROOTMUTEX();
6272	return (error);
6273}
6274
6275/*
6276 * Bind a connection to a session.
6277 * For now, only certain variants are supported, since the current session
6278 * structure can only handle a single backchannel entry, which will be
6279 * applied to all connections if it is set.
6280 */
6281int
6282nfsrv_bindconnsess(struct nfsrv_descript *nd, uint8_t *sessionid, int *foreaftp)
6283{
6284	struct nfssessionhash *shp;
6285	struct nfsdsession *sep;
6286	struct nfsclient *clp;
6287	SVCXPRT *savxprt;
6288	int error;
6289
6290	error = 0;
6291	savxprt = NULL;
6292	shp = NFSSESSIONHASH(sessionid);
6293	NFSLOCKSTATE();
6294	NFSLOCKSESSION(shp);
6295	sep = nfsrv_findsession(sessionid);
6296	if (sep != NULL) {
6297		clp = sep->sess_clp;
6298		if (*foreaftp == NFSCDFC4_BACK ||
6299		    *foreaftp == NFSCDFC4_BACK_OR_BOTH ||
6300		    *foreaftp == NFSCDFC4_FORE_OR_BOTH) {
6301			/* Try to set up a backchannel. */
6302			if (clp->lc_req.nr_client == NULL) {
6303				NFSD_DEBUG(2, "nfsrv_bindconnsess: acquire "
6304				    "backchannel\n");
6305				clp->lc_req.nr_client = (struct __rpc_client *)
6306				    clnt_bck_create(nd->nd_xprt->xp_socket,
6307				    sep->sess_cbprogram, NFSV4_CBVERS);
6308			}
6309			if (clp->lc_req.nr_client != NULL) {
6310				NFSD_DEBUG(2, "nfsrv_bindconnsess: set up "
6311				    "backchannel\n");
6312				savxprt = sep->sess_cbsess.nfsess_xprt;
6313				SVC_ACQUIRE(nd->nd_xprt);
6314				nd->nd_xprt->xp_p2 =
6315				    clp->lc_req.nr_client->cl_private;
6316				/* Disable idle timeout. */
6317				nd->nd_xprt->xp_idletimeout = 0;
6318				sep->sess_cbsess.nfsess_xprt = nd->nd_xprt;
6319				sep->sess_crflags |= NFSV4CRSESS_CONNBACKCHAN;
6320				clp->lc_flags |= LCL_DONEBINDCONN;
6321				if (*foreaftp == NFSCDFS4_BACK)
6322					*foreaftp = NFSCDFS4_BACK;
6323				else
6324					*foreaftp = NFSCDFS4_BOTH;
6325			} else if (*foreaftp != NFSCDFC4_BACK) {
6326				NFSD_DEBUG(2, "nfsrv_bindconnsess: can't set "
6327				    "up backchannel\n");
6328				sep->sess_crflags &= ~NFSV4CRSESS_CONNBACKCHAN;
6329				clp->lc_flags |= LCL_DONEBINDCONN;
6330				*foreaftp = NFSCDFS4_FORE;
6331			} else {
6332				error = NFSERR_NOTSUPP;
6333				printf("nfsrv_bindconnsess: Can't add "
6334				    "backchannel\n");
6335			}
6336		} else {
6337			NFSD_DEBUG(2, "nfsrv_bindconnsess: Set forechannel\n");
6338			clp->lc_flags |= LCL_DONEBINDCONN;
6339			*foreaftp = NFSCDFS4_FORE;
6340		}
6341	} else
6342		error = NFSERR_BADSESSION;
6343	NFSUNLOCKSESSION(shp);
6344	NFSUNLOCKSTATE();
6345	if (savxprt != NULL)
6346		SVC_RELEASE(savxprt);
6347	return (error);
6348}
6349
6350/*
6351 * Free up a session structure.
6352 */
6353static int
6354nfsrv_freesession(struct nfsdsession *sep, uint8_t *sessionid)
6355{
6356	struct nfssessionhash *shp;
6357	int i;
6358
6359	NFSLOCKSTATE();
6360	if (sep == NULL) {
6361		shp = NFSSESSIONHASH(sessionid);
6362		NFSLOCKSESSION(shp);
6363		sep = nfsrv_findsession(sessionid);
6364	} else {
6365		shp = NFSSESSIONHASH(sep->sess_sessionid);
6366		NFSLOCKSESSION(shp);
6367	}
6368	if (sep != NULL) {
6369		sep->sess_refcnt--;
6370		if (sep->sess_refcnt > 0) {
6371			NFSUNLOCKSESSION(shp);
6372			NFSUNLOCKSTATE();
6373			return (NFSERR_BACKCHANBUSY);
6374		}
6375		LIST_REMOVE(sep, sess_hash);
6376		LIST_REMOVE(sep, sess_list);
6377	}
6378	NFSUNLOCKSESSION(shp);
6379	NFSUNLOCKSTATE();
6380	if (sep == NULL)
6381		return (NFSERR_BADSESSION);
6382	for (i = 0; i < NFSV4_SLOTS; i++)
6383		if (sep->sess_slots[i].nfssl_reply != NULL)
6384			m_freem(sep->sess_slots[i].nfssl_reply);
6385	if (sep->sess_cbsess.nfsess_xprt != NULL)
6386		SVC_RELEASE(sep->sess_cbsess.nfsess_xprt);
6387	free(sep, M_NFSDSESSION);
6388	return (0);
6389}
6390
6391/*
6392 * Free a stateid.
6393 * RFC5661 says that it should fail when there are associated opens, locks
6394 * or delegations. Since stateids represent opens, I don't see how you can
6395 * free an open stateid (it will be free'd when closed), so this function
6396 * only works for lock stateids (freeing the lock_owner) or delegations.
6397 */
6398int
6399nfsrv_freestateid(struct nfsrv_descript *nd, nfsv4stateid_t *stateidp,
6400    NFSPROC_T *p)
6401{
6402	struct nfsclient *clp;
6403	struct nfsstate *stp;
6404	int error;
6405
6406	NFSLOCKSTATE();
6407	/*
6408	 * Look up the stateid
6409	 */
6410	error = nfsrv_getclient((nfsquad_t)((u_quad_t)0), CLOPS_RENEW, &clp,
6411	    NULL, (nfsquad_t)((u_quad_t)0), 0, nd, p);
6412	if (error == 0) {
6413		/* First, check for a delegation. */
6414		LIST_FOREACH(stp, &clp->lc_deleg, ls_list) {
6415			if (!NFSBCMP(stp->ls_stateid.other, stateidp->other,
6416			    NFSX_STATEIDOTHER))
6417				break;
6418		}
6419		if (stp != NULL) {
6420			nfsrv_freedeleg(stp);
6421			NFSUNLOCKSTATE();
6422			return (error);
6423		}
6424	}
6425	/* Not a delegation, try for a lock_owner. */
6426	if (error == 0)
6427		error = nfsrv_getstate(clp, stateidp, 0, &stp);
6428	if (error == 0 && ((stp->ls_flags & (NFSLCK_OPEN | NFSLCK_DELEGREAD |
6429	    NFSLCK_DELEGWRITE)) != 0 || (stp->ls_flags & NFSLCK_LOCK) == 0))
6430		/* Not a lock_owner stateid. */
6431		error = NFSERR_LOCKSHELD;
6432	if (error == 0 && !LIST_EMPTY(&stp->ls_lock))
6433		error = NFSERR_LOCKSHELD;
6434	if (error == 0)
6435		nfsrv_freelockowner(stp, NULL, 0, p);
6436	NFSUNLOCKSTATE();
6437	return (error);
6438}
6439
6440/*
6441 * Test a stateid.
6442 */
6443int
6444nfsrv_teststateid(struct nfsrv_descript *nd, nfsv4stateid_t *stateidp,
6445    NFSPROC_T *p)
6446{
6447	struct nfsclient *clp;
6448	struct nfsstate *stp;
6449	int error;
6450
6451	NFSLOCKSTATE();
6452	/*
6453	 * Look up the stateid
6454	 */
6455	error = nfsrv_getclient((nfsquad_t)((u_quad_t)0), CLOPS_RENEW, &clp,
6456	    NULL, (nfsquad_t)((u_quad_t)0), 0, nd, p);
6457	if (error == 0)
6458		error = nfsrv_getstate(clp, stateidp, 0, &stp);
6459	if (error == 0 && stateidp->seqid != 0 &&
6460	    SEQ_LT(stateidp->seqid, stp->ls_stateid.seqid))
6461		error = NFSERR_OLDSTATEID;
6462	NFSUNLOCKSTATE();
6463	return (error);
6464}
6465
6466/*
6467 * Generate the xdr for an NFSv4.1 CBSequence Operation.
6468 */
6469static int
6470nfsv4_setcbsequence(struct nfsrv_descript *nd, struct nfsclient *clp,
6471    int dont_replycache, struct nfsdsession **sepp)
6472{
6473	struct nfsdsession *sep;
6474	uint32_t *tl, slotseq = 0;
6475	int maxslot, slotpos;
6476	uint8_t sessionid[NFSX_V4SESSIONID];
6477	int error;
6478
6479	error = nfsv4_getcbsession(clp, sepp);
6480	if (error != 0)
6481		return (error);
6482	sep = *sepp;
6483	(void)nfsv4_sequencelookup(NULL, &sep->sess_cbsess, &slotpos, &maxslot,
6484	    &slotseq, sessionid);
6485	KASSERT(maxslot >= 0, ("nfsv4_setcbsequence neg maxslot"));
6486
6487	/* Build the Sequence arguments. */
6488	NFSM_BUILD(tl, uint32_t *, NFSX_V4SESSIONID + 5 * NFSX_UNSIGNED);
6489	bcopy(sessionid, tl, NFSX_V4SESSIONID);
6490	tl += NFSX_V4SESSIONID / NFSX_UNSIGNED;
6491	nd->nd_slotseq = tl;
6492	*tl++ = txdr_unsigned(slotseq);
6493	*tl++ = txdr_unsigned(slotpos);
6494	*tl++ = txdr_unsigned(maxslot);
6495	if (dont_replycache == 0)
6496		*tl++ = newnfs_true;
6497	else
6498		*tl++ = newnfs_false;
6499	*tl = 0;			/* No referring call list, for now. */
6500	nd->nd_flag |= ND_HASSEQUENCE;
6501	return (0);
6502}
6503
6504/*
6505 * Get a session for the callback.
6506 */
6507static int
6508nfsv4_getcbsession(struct nfsclient *clp, struct nfsdsession **sepp)
6509{
6510	struct nfsdsession *sep;
6511
6512	NFSLOCKSTATE();
6513	LIST_FOREACH(sep, &clp->lc_session, sess_list) {
6514		if ((sep->sess_crflags & NFSV4CRSESS_CONNBACKCHAN) != 0)
6515			break;
6516	}
6517	if (sep == NULL) {
6518		NFSUNLOCKSTATE();
6519		return (NFSERR_BADSESSION);
6520	}
6521	sep->sess_refcnt++;
6522	*sepp = sep;
6523	NFSUNLOCKSTATE();
6524	return (0);
6525}
6526
6527/*
6528 * Free up all backchannel xprts.  This needs to be done when the nfsd threads
6529 * exit, since those transports will all be going away.
6530 * This is only called after all the nfsd threads are done performing RPCs,
6531 * so locking shouldn't be an issue.
6532 */
6533void
6534nfsrv_freeallbackchannel_xprts(void)
6535{
6536	struct nfsdsession *sep;
6537	struct nfsclient *clp;
6538	SVCXPRT *xprt;
6539	int i;
6540
6541	for (i = 0; i < nfsrv_clienthashsize; i++) {
6542		LIST_FOREACH(clp, &nfsclienthash[i], lc_hash) {
6543			LIST_FOREACH(sep, &clp->lc_session, sess_list) {
6544				xprt = sep->sess_cbsess.nfsess_xprt;
6545				sep->sess_cbsess.nfsess_xprt = NULL;
6546				if (xprt != NULL)
6547					SVC_RELEASE(xprt);
6548			}
6549		}
6550	}
6551}
6552
6553