1/*-
2 * Copyright (c) 2009 Rick Macklem, University of Guelph
3 * All rights reserved.
4 *
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions
7 * are met:
8 * 1. Redistributions of source code must retain the above copyright
9 *    notice, this list of conditions and the following disclaimer.
10 * 2. Redistributions in binary form must reproduce the above copyright
11 *    notice, this list of conditions and the following disclaimer in the
12 *    documentation and/or other materials provided with the distribution.
13 *
14 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
15 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
16 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
17 * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
18 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
19 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
20 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
21 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
22 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
23 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
24 * SUCH DAMAGE.
25 *
26 */
27
28#include <sys/cdefs.h>
29__FBSDID("$FreeBSD$");
30
31#ifndef APPLEKEXT
32#include <fs/nfs/nfsport.h>
33
34struct nfsrv_stablefirst nfsrv_stablefirst;
35int nfsrv_issuedelegs = 0;
36int nfsrv_dolocallocks = 0;
37struct nfsv4lock nfsv4rootfs_lock;
38
39extern int newnfs_numnfsd;
40extern struct nfsstats newnfsstats;
41extern int nfsrv_lease;
42extern struct timeval nfsboottime;
43extern u_int32_t newnfs_true, newnfs_false;
44NFSV4ROOTLOCKMUTEX;
45NFSSTATESPINLOCK;
46
47/*
48 * Hash lists for nfs V4.
49 * (Some would put them in the .h file, but I don't like declaring storage
50 *  in a .h)
51 */
52struct nfsclienthashhead nfsclienthash[NFSCLIENTHASHSIZE];
53struct nfslockhashhead nfslockhash[NFSLOCKHASHSIZE];
54#endif	/* !APPLEKEXT */
55
56static u_int32_t nfsrv_openpluslock = 0, nfsrv_delegatecnt = 0;
57static time_t nfsrvboottime;
58static int nfsrv_writedelegifpos = 1;
59static int nfsrv_returnoldstateid = 0, nfsrv_clients = 0;
60static int nfsrv_clienthighwater = NFSRV_CLIENTHIGHWATER;
61static int nfsrv_nogsscallback = 0;
62
63/* local functions */
64static void nfsrv_dumpaclient(struct nfsclient *clp,
65    struct nfsd_dumpclients *dumpp);
66static void nfsrv_freeopenowner(struct nfsstate *stp, int cansleep,
67    NFSPROC_T *p);
68static int nfsrv_freeopen(struct nfsstate *stp, vnode_t vp, int cansleep,
69    NFSPROC_T *p);
70static void nfsrv_freelockowner(struct nfsstate *stp, vnode_t vp, int cansleep,
71    NFSPROC_T *p);
72static void nfsrv_freeallnfslocks(struct nfsstate *stp, vnode_t vp,
73    int cansleep, NFSPROC_T *p);
74static void nfsrv_freenfslock(struct nfslock *lop);
75static void nfsrv_freenfslockfile(struct nfslockfile *lfp);
76static void nfsrv_freedeleg(struct nfsstate *);
77static int nfsrv_getstate(struct nfsclient *clp, nfsv4stateid_t *stateidp,
78    u_int32_t flags, struct nfsstate **stpp);
79static void nfsrv_getowner(struct nfsstatehead *hp, struct nfsstate *new_stp,
80    struct nfsstate **stpp);
81static int nfsrv_getlockfh(vnode_t vp, u_short flags,
82    struct nfslockfile *new_lfp, fhandle_t *nfhp, NFSPROC_T *p);
83static int nfsrv_getlockfile(u_short flags, struct nfslockfile **new_lfpp,
84    struct nfslockfile **lfpp, fhandle_t *nfhp, int lockit);
85static void nfsrv_insertlock(struct nfslock *new_lop,
86    struct nfslock *insert_lop, struct nfsstate *stp, struct nfslockfile *lfp);
87static void nfsrv_updatelock(struct nfsstate *stp, struct nfslock **new_lopp,
88    struct nfslock **other_lopp, struct nfslockfile *lfp);
89static int nfsrv_getipnumber(u_char *cp);
90static int nfsrv_checkrestart(nfsquad_t clientid, u_int32_t flags,
91    nfsv4stateid_t *stateidp, int specialid);
92static int nfsrv_checkgrace(u_int32_t flags);
93static int nfsrv_docallback(struct nfsclient *clp, int procnum,
94    nfsv4stateid_t *stateidp, int trunc, fhandle_t *fhp,
95    struct nfsvattr *nap, nfsattrbit_t *attrbitp, NFSPROC_T *p);
96static u_int32_t nfsrv_nextclientindex(void);
97static u_int32_t nfsrv_nextstateindex(struct nfsclient *clp);
98static void nfsrv_markstable(struct nfsclient *clp);
99static int nfsrv_checkstable(struct nfsclient *clp);
100static int nfsrv_clientconflict(struct nfsclient *clp, int *haslockp, struct
101    vnode *vp, NFSPROC_T *p);
102static int nfsrv_delegconflict(struct nfsstate *stp, int *haslockp,
103    NFSPROC_T *p, vnode_t vp);
104static int nfsrv_cleandeleg(vnode_t vp, struct nfslockfile *lfp,
105    struct nfsclient *clp, int *haslockp, NFSPROC_T *p);
106static int nfsrv_notsamecredname(struct nfsrv_descript *nd,
107    struct nfsclient *clp);
108static time_t nfsrv_leaseexpiry(void);
109static void nfsrv_delaydelegtimeout(struct nfsstate *stp);
110static int nfsrv_checkseqid(struct nfsrv_descript *nd, u_int32_t seqid,
111    struct nfsstate *stp, struct nfsrvcache *op);
112static int nfsrv_nootherstate(struct nfsstate *stp);
113static int nfsrv_locallock(vnode_t vp, struct nfslockfile *lfp, int flags,
114    uint64_t first, uint64_t end, struct nfslockconflict *cfp, NFSPROC_T *p);
115static void nfsrv_localunlock(vnode_t vp, struct nfslockfile *lfp,
116    uint64_t init_first, uint64_t init_end, NFSPROC_T *p);
117static int nfsrv_dolocal(vnode_t vp, struct nfslockfile *lfp, int flags,
118    int oldflags, uint64_t first, uint64_t end, struct nfslockconflict *cfp,
119    NFSPROC_T *p);
120static void nfsrv_locallock_rollback(vnode_t vp, struct nfslockfile *lfp,
121    NFSPROC_T *p);
122static void nfsrv_locallock_commit(struct nfslockfile *lfp, int flags,
123    uint64_t first, uint64_t end);
124static void nfsrv_locklf(struct nfslockfile *lfp);
125static void nfsrv_unlocklf(struct nfslockfile *lfp);
126
127/*
128 * Scan the client list for a match and either return the current one,
129 * create a new entry or return an error.
130 * If returning a non-error, the clp structure must either be linked into
131 * the client list or free'd.
132 */
133APPLESTATIC int
134nfsrv_setclient(struct nfsrv_descript *nd, struct nfsclient **new_clpp,
135    nfsquad_t *clientidp, nfsquad_t *confirmp, NFSPROC_T *p)
136{
137	struct nfsclient *clp = NULL, *new_clp = *new_clpp;
138	int i, error = 0;
139	struct nfsstate *stp, *tstp;
140	struct sockaddr_in *sad, *rad;
141	int zapit = 0, gotit, hasstate = 0, igotlock;
142	static u_int64_t confirm_index = 0;
143
144	/*
145	 * Check for state resource limit exceeded.
146	 */
147	if (nfsrv_openpluslock > NFSRV_V4STATELIMIT) {
148		error = NFSERR_RESOURCE;
149		goto out;
150	}
151
152	if (nfsrv_issuedelegs == 0 ||
153	    ((nd->nd_flag & ND_GSS) != 0 && nfsrv_nogsscallback != 0))
154		/*
155		 * Don't do callbacks when delegations are disabled or
156		 * for AUTH_GSS unless enabled via nfsrv_nogsscallback.
157		 * If establishing a callback connection is attempted
158		 * when a firewall is blocking the callback path, the
159		 * server may wait too long for the connect attempt to
160		 * succeed during the Open. Some clients, such as Linux,
161		 * may timeout and give up on the Open before the server
162		 * replies. Also, since AUTH_GSS callbacks are not
163		 * yet interoperability tested, they might cause the
164		 * server to crap out, if they get past the Init call to
165		 * the client.
166		 */
167		new_clp->lc_program = 0;
168
169	/* Lock out other nfsd threads */
170	NFSLOCKV4ROOTMUTEX();
171	nfsv4_relref(&nfsv4rootfs_lock);
172	do {
173		igotlock = nfsv4_lock(&nfsv4rootfs_lock, 1, NULL,
174		    NFSV4ROOTLOCKMUTEXPTR, NULL);
175	} while (!igotlock);
176	NFSUNLOCKV4ROOTMUTEX();
177
178	/*
179	 * Search for a match in the client list.
180	 */
181	gotit = i = 0;
182	while (i < NFSCLIENTHASHSIZE && !gotit) {
183	    LIST_FOREACH(clp, &nfsclienthash[i], lc_hash) {
184		if (new_clp->lc_idlen == clp->lc_idlen &&
185		    !NFSBCMP(new_clp->lc_id, clp->lc_id, clp->lc_idlen)) {
186			gotit = 1;
187			break;
188		}
189	    }
190	    i++;
191	}
192	if (!gotit ||
193	    (clp->lc_flags & (LCL_NEEDSCONFIRM | LCL_ADMINREVOKED))) {
194		/*
195		 * Get rid of the old one.
196		 */
197		if (i != NFSCLIENTHASHSIZE) {
198			LIST_REMOVE(clp, lc_hash);
199			nfsrv_cleanclient(clp, p);
200			nfsrv_freedeleglist(&clp->lc_deleg);
201			nfsrv_freedeleglist(&clp->lc_olddeleg);
202			zapit = 1;
203		}
204		/*
205		 * Add it after assigning a client id to it.
206		 */
207		new_clp->lc_flags |= LCL_NEEDSCONFIRM;
208		confirmp->qval = new_clp->lc_confirm.qval = ++confirm_index;
209		clientidp->lval[0] = new_clp->lc_clientid.lval[0] =
210		    (u_int32_t)nfsrvboottime;
211		clientidp->lval[1] = new_clp->lc_clientid.lval[1] =
212		    nfsrv_nextclientindex();
213		new_clp->lc_stateindex = 0;
214		new_clp->lc_statemaxindex = 0;
215		new_clp->lc_cbref = 0;
216		new_clp->lc_expiry = nfsrv_leaseexpiry();
217		LIST_INIT(&new_clp->lc_open);
218		LIST_INIT(&new_clp->lc_deleg);
219		LIST_INIT(&new_clp->lc_olddeleg);
220		for (i = 0; i < NFSSTATEHASHSIZE; i++)
221			LIST_INIT(&new_clp->lc_stateid[i]);
222		LIST_INSERT_HEAD(NFSCLIENTHASH(new_clp->lc_clientid), new_clp,
223		    lc_hash);
224		newnfsstats.srvclients++;
225		nfsrv_openpluslock++;
226		nfsrv_clients++;
227		NFSLOCKV4ROOTMUTEX();
228		nfsv4_unlock(&nfsv4rootfs_lock, 1);
229		NFSUNLOCKV4ROOTMUTEX();
230		if (zapit)
231			nfsrv_zapclient(clp, p);
232		*new_clpp = NULL;
233		goto out;
234	}
235
236	/*
237	 * Now, handle the cases where the id is already issued.
238	 */
239	if (nfsrv_notsamecredname(nd, clp)) {
240	    /*
241	     * Check to see if there is expired state that should go away.
242	     */
243	    if (clp->lc_expiry < NFSD_MONOSEC &&
244	        (!LIST_EMPTY(&clp->lc_open) || !LIST_EMPTY(&clp->lc_deleg))) {
245		nfsrv_cleanclient(clp, p);
246		nfsrv_freedeleglist(&clp->lc_deleg);
247	    }
248
249	    /*
250	     * If there is outstanding state, then reply NFSERR_CLIDINUSE per
251	     * RFC3530 Sec. 8.1.2 last para.
252	     */
253	    if (!LIST_EMPTY(&clp->lc_deleg)) {
254		hasstate = 1;
255	    } else if (LIST_EMPTY(&clp->lc_open)) {
256		hasstate = 0;
257	    } else {
258		hasstate = 0;
259		/* Look for an Open on the OpenOwner */
260		LIST_FOREACH(stp, &clp->lc_open, ls_list) {
261		    if (!LIST_EMPTY(&stp->ls_open)) {
262			hasstate = 1;
263			break;
264		    }
265		}
266	    }
267	    if (hasstate) {
268		/*
269		 * If the uid doesn't match, return NFSERR_CLIDINUSE after
270		 * filling out the correct ipaddr and portnum.
271		 */
272		sad = NFSSOCKADDR(new_clp->lc_req.nr_nam, struct sockaddr_in *);
273		rad = NFSSOCKADDR(clp->lc_req.nr_nam, struct sockaddr_in *);
274		sad->sin_addr.s_addr = rad->sin_addr.s_addr;
275		sad->sin_port = rad->sin_port;
276		NFSLOCKV4ROOTMUTEX();
277		nfsv4_unlock(&nfsv4rootfs_lock, 1);
278		NFSUNLOCKV4ROOTMUTEX();
279		error = NFSERR_CLIDINUSE;
280		goto out;
281	    }
282	}
283
284	if (NFSBCMP(new_clp->lc_verf, clp->lc_verf, NFSX_VERF)) {
285		/*
286		 * If the verifier has changed, the client has rebooted
287		 * and a new client id is issued. The old state info
288		 * can be thrown away once the SETCLIENTID_CONFIRM occurs.
289		 */
290		LIST_REMOVE(clp, lc_hash);
291		new_clp->lc_flags |= LCL_NEEDSCONFIRM;
292		confirmp->qval = new_clp->lc_confirm.qval = ++confirm_index;
293		clientidp->lval[0] = new_clp->lc_clientid.lval[0] =
294		    nfsrvboottime;
295		clientidp->lval[1] = new_clp->lc_clientid.lval[1] =
296		    nfsrv_nextclientindex();
297		new_clp->lc_stateindex = 0;
298		new_clp->lc_statemaxindex = 0;
299		new_clp->lc_cbref = 0;
300		new_clp->lc_expiry = nfsrv_leaseexpiry();
301
302		/*
303		 * Save the state until confirmed.
304		 */
305		LIST_NEWHEAD(&new_clp->lc_open, &clp->lc_open, ls_list);
306		LIST_FOREACH(tstp, &new_clp->lc_open, ls_list)
307			tstp->ls_clp = new_clp;
308		LIST_NEWHEAD(&new_clp->lc_deleg, &clp->lc_deleg, ls_list);
309		LIST_FOREACH(tstp, &new_clp->lc_deleg, ls_list)
310			tstp->ls_clp = new_clp;
311		LIST_NEWHEAD(&new_clp->lc_olddeleg, &clp->lc_olddeleg,
312		    ls_list);
313		LIST_FOREACH(tstp, &new_clp->lc_olddeleg, ls_list)
314			tstp->ls_clp = new_clp;
315		for (i = 0; i < NFSSTATEHASHSIZE; i++) {
316			LIST_NEWHEAD(&new_clp->lc_stateid[i],
317			    &clp->lc_stateid[i], ls_hash);
318			LIST_FOREACH(tstp, &new_clp->lc_stateid[i], ls_hash)
319				tstp->ls_clp = new_clp;
320		}
321		LIST_INSERT_HEAD(NFSCLIENTHASH(new_clp->lc_clientid), new_clp,
322		    lc_hash);
323		newnfsstats.srvclients++;
324		nfsrv_openpluslock++;
325		nfsrv_clients++;
326		NFSLOCKV4ROOTMUTEX();
327		nfsv4_unlock(&nfsv4rootfs_lock, 1);
328		NFSUNLOCKV4ROOTMUTEX();
329
330		/*
331		 * Must wait until any outstanding callback on the old clp
332		 * completes.
333		 */
334		NFSLOCKSTATE();
335		while (clp->lc_cbref) {
336			clp->lc_flags |= LCL_WAKEUPWANTED;
337			(void)mtx_sleep(clp, NFSSTATEMUTEXPTR, PZERO - 1,
338			    "nfsd clp", 10 * hz);
339		}
340		NFSUNLOCKSTATE();
341		nfsrv_zapclient(clp, p);
342		*new_clpp = NULL;
343		goto out;
344	}
345	/*
346	 * id and verifier match, so update the net address info
347	 * and get rid of any existing callback authentication
348	 * handle, so a new one will be acquired.
349	 */
350	LIST_REMOVE(clp, lc_hash);
351	new_clp->lc_flags |= (LCL_NEEDSCONFIRM | LCL_DONTCLEAN);
352	new_clp->lc_expiry = nfsrv_leaseexpiry();
353	confirmp->qval = new_clp->lc_confirm.qval = ++confirm_index;
354	clientidp->lval[0] = new_clp->lc_clientid.lval[0] =
355	    clp->lc_clientid.lval[0];
356	clientidp->lval[1] = new_clp->lc_clientid.lval[1] =
357	    clp->lc_clientid.lval[1];
358	new_clp->lc_delegtime = clp->lc_delegtime;
359	new_clp->lc_stateindex = clp->lc_stateindex;
360	new_clp->lc_statemaxindex = clp->lc_statemaxindex;
361	new_clp->lc_cbref = 0;
362	LIST_NEWHEAD(&new_clp->lc_open, &clp->lc_open, ls_list);
363	LIST_FOREACH(tstp, &new_clp->lc_open, ls_list)
364		tstp->ls_clp = new_clp;
365	LIST_NEWHEAD(&new_clp->lc_deleg, &clp->lc_deleg, ls_list);
366	LIST_FOREACH(tstp, &new_clp->lc_deleg, ls_list)
367		tstp->ls_clp = new_clp;
368	LIST_NEWHEAD(&new_clp->lc_olddeleg, &clp->lc_olddeleg, ls_list);
369	LIST_FOREACH(tstp, &new_clp->lc_olddeleg, ls_list)
370		tstp->ls_clp = new_clp;
371	for (i = 0; i < NFSSTATEHASHSIZE; i++) {
372		LIST_NEWHEAD(&new_clp->lc_stateid[i], &clp->lc_stateid[i],
373		    ls_hash);
374		LIST_FOREACH(tstp, &new_clp->lc_stateid[i], ls_hash)
375			tstp->ls_clp = new_clp;
376	}
377	LIST_INSERT_HEAD(NFSCLIENTHASH(new_clp->lc_clientid), new_clp,
378	    lc_hash);
379	newnfsstats.srvclients++;
380	nfsrv_openpluslock++;
381	nfsrv_clients++;
382	NFSLOCKV4ROOTMUTEX();
383	nfsv4_unlock(&nfsv4rootfs_lock, 1);
384	NFSUNLOCKV4ROOTMUTEX();
385
386	/*
387	 * Must wait until any outstanding callback on the old clp
388	 * completes.
389	 */
390	NFSLOCKSTATE();
391	while (clp->lc_cbref) {
392		clp->lc_flags |= LCL_WAKEUPWANTED;
393		(void)mtx_sleep(clp, NFSSTATEMUTEXPTR, PZERO - 1, "nfsd clp",
394		    10 * hz);
395	}
396	NFSUNLOCKSTATE();
397	nfsrv_zapclient(clp, p);
398	*new_clpp = NULL;
399
400out:
401	NFSEXITCODE2(error, nd);
402	return (error);
403}
404
405/*
406 * Check to see if the client id exists and optionally confirm it.
407 */
408APPLESTATIC int
409nfsrv_getclient(nfsquad_t clientid, int opflags, struct nfsclient **clpp,
410    nfsquad_t confirm, struct nfsrv_descript *nd, NFSPROC_T *p)
411{
412	struct nfsclient *clp;
413	struct nfsstate *stp;
414	int i;
415	struct nfsclienthashhead *hp;
416	int error = 0, igotlock, doneok;
417
418	if (clpp)
419		*clpp = NULL;
420	if (nfsrvboottime != clientid.lval[0]) {
421		error = NFSERR_STALECLIENTID;
422		goto out;
423	}
424
425	/*
426	 * If called with opflags == CLOPS_RENEW, the State Lock is
427	 * already held. Otherwise, we need to get either that or,
428	 * for the case of Confirm, lock out the nfsd threads.
429	 */
430	if (opflags & CLOPS_CONFIRM) {
431		NFSLOCKV4ROOTMUTEX();
432		nfsv4_relref(&nfsv4rootfs_lock);
433		do {
434			igotlock = nfsv4_lock(&nfsv4rootfs_lock, 1, NULL,
435			    NFSV4ROOTLOCKMUTEXPTR, NULL);
436		} while (!igotlock);
437		NFSUNLOCKV4ROOTMUTEX();
438	} else if (opflags != CLOPS_RENEW) {
439		NFSLOCKSTATE();
440	}
441
442	hp = NFSCLIENTHASH(clientid);
443	LIST_FOREACH(clp, hp, lc_hash) {
444		if (clp->lc_clientid.lval[1] == clientid.lval[1])
445			break;
446	}
447	if (clp == LIST_END(hp)) {
448		if (opflags & CLOPS_CONFIRM)
449			error = NFSERR_STALECLIENTID;
450		else
451			error = NFSERR_EXPIRED;
452	} else if (clp->lc_flags & LCL_ADMINREVOKED) {
453		/*
454		 * If marked admin revoked, just return the error.
455		 */
456		error = NFSERR_ADMINREVOKED;
457	}
458	if (error) {
459		if (opflags & CLOPS_CONFIRM) {
460			NFSLOCKV4ROOTMUTEX();
461			nfsv4_unlock(&nfsv4rootfs_lock, 1);
462			NFSUNLOCKV4ROOTMUTEX();
463		} else if (opflags != CLOPS_RENEW) {
464			NFSUNLOCKSTATE();
465		}
466		goto out;
467	}
468
469	/*
470	 * Perform any operations specified by the opflags.
471	 */
472	if (opflags & CLOPS_CONFIRM) {
473		if (clp->lc_confirm.qval != confirm.qval)
474			error = NFSERR_STALECLIENTID;
475		else if (nfsrv_notsamecredname(nd, clp))
476			error = NFSERR_CLIDINUSE;
477
478		if (!error) {
479		    if ((clp->lc_flags & (LCL_NEEDSCONFIRM | LCL_DONTCLEAN)) ==
480			LCL_NEEDSCONFIRM) {
481			/*
482			 * Hang onto the delegations (as old delegations)
483			 * for an Open with CLAIM_DELEGATE_PREV unless in
484			 * grace, but get rid of the rest of the state.
485			 */
486			nfsrv_cleanclient(clp, p);
487			nfsrv_freedeleglist(&clp->lc_olddeleg);
488			if (nfsrv_checkgrace(0)) {
489			    /* In grace, so just delete delegations */
490			    nfsrv_freedeleglist(&clp->lc_deleg);
491			} else {
492			    LIST_FOREACH(stp, &clp->lc_deleg, ls_list)
493				stp->ls_flags |= NFSLCK_OLDDELEG;
494			    clp->lc_delegtime = NFSD_MONOSEC +
495				nfsrv_lease + NFSRV_LEASEDELTA;
496			    LIST_NEWHEAD(&clp->lc_olddeleg, &clp->lc_deleg,
497				ls_list);
498			}
499		    }
500		    clp->lc_flags &= ~(LCL_NEEDSCONFIRM | LCL_DONTCLEAN);
501		    if (clp->lc_program)
502			clp->lc_flags |= LCL_NEEDSCBNULL;
503		}
504	} else if (clp->lc_flags & LCL_NEEDSCONFIRM) {
505		error = NFSERR_EXPIRED;
506	}
507
508	/*
509	 * If called by the Renew Op, we must check the principal.
510	 */
511	if (!error && (opflags & CLOPS_RENEWOP)) {
512	    if (nfsrv_notsamecredname(nd, clp)) {
513		doneok = 0;
514		for (i = 0; i < NFSSTATEHASHSIZE && doneok == 0; i++) {
515		    LIST_FOREACH(stp, &clp->lc_stateid[i], ls_hash) {
516			if ((stp->ls_flags & NFSLCK_OPEN) &&
517			    stp->ls_uid == nd->nd_cred->cr_uid) {
518				doneok = 1;
519				break;
520			}
521		    }
522		}
523		if (!doneok)
524			error = NFSERR_ACCES;
525	    }
526	    if (!error && (clp->lc_flags & LCL_CBDOWN))
527		error = NFSERR_CBPATHDOWN;
528	}
529	if ((!error || error == NFSERR_CBPATHDOWN) &&
530	     (opflags & CLOPS_RENEW)) {
531		clp->lc_expiry = nfsrv_leaseexpiry();
532	}
533	if (opflags & CLOPS_CONFIRM) {
534		NFSLOCKV4ROOTMUTEX();
535		nfsv4_unlock(&nfsv4rootfs_lock, 1);
536		NFSUNLOCKV4ROOTMUTEX();
537	} else if (opflags != CLOPS_RENEW) {
538		NFSUNLOCKSTATE();
539	}
540	if (clpp)
541		*clpp = clp;
542
543out:
544	NFSEXITCODE2(error, nd);
545	return (error);
546}
547
548/*
549 * Called from the new nfssvc syscall to admin revoke a clientid.
550 * Returns 0 for success, error otherwise.
551 */
552APPLESTATIC int
553nfsrv_adminrevoke(struct nfsd_clid *revokep, NFSPROC_T *p)
554{
555	struct nfsclient *clp = NULL;
556	int i, error = 0;
557	int gotit, igotlock;
558
559	/*
560	 * First, lock out the nfsd so that state won't change while the
561	 * revocation record is being written to the stable storage restart
562	 * file.
563	 */
564	NFSLOCKV4ROOTMUTEX();
565	do {
566		igotlock = nfsv4_lock(&nfsv4rootfs_lock, 1, NULL,
567		    NFSV4ROOTLOCKMUTEXPTR, NULL);
568	} while (!igotlock);
569	NFSUNLOCKV4ROOTMUTEX();
570
571	/*
572	 * Search for a match in the client list.
573	 */
574	gotit = i = 0;
575	while (i < NFSCLIENTHASHSIZE && !gotit) {
576	    LIST_FOREACH(clp, &nfsclienthash[i], lc_hash) {
577		if (revokep->nclid_idlen == clp->lc_idlen &&
578		    !NFSBCMP(revokep->nclid_id, clp->lc_id, clp->lc_idlen)) {
579			gotit = 1;
580			break;
581		}
582	    }
583	    i++;
584	}
585	if (!gotit) {
586		NFSLOCKV4ROOTMUTEX();
587		nfsv4_unlock(&nfsv4rootfs_lock, 0);
588		NFSUNLOCKV4ROOTMUTEX();
589		error = EPERM;
590		goto out;
591	}
592
593	/*
594	 * Now, write out the revocation record
595	 */
596	nfsrv_writestable(clp->lc_id, clp->lc_idlen, NFSNST_REVOKE, p);
597	nfsrv_backupstable();
598
599	/*
600	 * and clear out the state, marking the clientid revoked.
601	 */
602	clp->lc_flags &= ~LCL_CALLBACKSON;
603	clp->lc_flags |= LCL_ADMINREVOKED;
604	nfsrv_cleanclient(clp, p);
605	nfsrv_freedeleglist(&clp->lc_deleg);
606	nfsrv_freedeleglist(&clp->lc_olddeleg);
607	NFSLOCKV4ROOTMUTEX();
608	nfsv4_unlock(&nfsv4rootfs_lock, 0);
609	NFSUNLOCKV4ROOTMUTEX();
610
611out:
612	NFSEXITCODE(error);
613	return (error);
614}
615
616/*
617 * Dump out stats for all clients. Called from nfssvc(2), that is used
618 * newnfsstats.
619 */
620APPLESTATIC void
621nfsrv_dumpclients(struct nfsd_dumpclients *dumpp, int maxcnt)
622{
623	struct nfsclient *clp;
624	int i = 0, cnt = 0;
625
626	/*
627	 * First, get a reference on the nfsv4rootfs_lock so that an
628	 * exclusive lock cannot be acquired while dumping the clients.
629	 */
630	NFSLOCKV4ROOTMUTEX();
631	nfsv4_getref(&nfsv4rootfs_lock, NULL, NFSV4ROOTLOCKMUTEXPTR, NULL);
632	NFSUNLOCKV4ROOTMUTEX();
633	NFSLOCKSTATE();
634	/*
635	 * Rattle through the client lists until done.
636	 */
637	while (i < NFSCLIENTHASHSIZE && cnt < maxcnt) {
638	    clp = LIST_FIRST(&nfsclienthash[i]);
639	    while (clp != LIST_END(&nfsclienthash[i]) && cnt < maxcnt) {
640		nfsrv_dumpaclient(clp, &dumpp[cnt]);
641		cnt++;
642		clp = LIST_NEXT(clp, lc_hash);
643	    }
644	    i++;
645	}
646	if (cnt < maxcnt)
647	    dumpp[cnt].ndcl_clid.nclid_idlen = 0;
648	NFSUNLOCKSTATE();
649	NFSLOCKV4ROOTMUTEX();
650	nfsv4_relref(&nfsv4rootfs_lock);
651	NFSUNLOCKV4ROOTMUTEX();
652}
653
654/*
655 * Dump stats for a client. Must be called with the NFSSTATELOCK and spl'd.
656 */
657static void
658nfsrv_dumpaclient(struct nfsclient *clp, struct nfsd_dumpclients *dumpp)
659{
660	struct nfsstate *stp, *openstp, *lckownstp;
661	struct nfslock *lop;
662	struct sockaddr *sad;
663	struct sockaddr_in *rad;
664	struct sockaddr_in6 *rad6;
665
666	dumpp->ndcl_nopenowners = dumpp->ndcl_nlockowners = 0;
667	dumpp->ndcl_nopens = dumpp->ndcl_nlocks = 0;
668	dumpp->ndcl_ndelegs = dumpp->ndcl_nolddelegs = 0;
669	dumpp->ndcl_flags = clp->lc_flags;
670	dumpp->ndcl_clid.nclid_idlen = clp->lc_idlen;
671	NFSBCOPY(clp->lc_id, dumpp->ndcl_clid.nclid_id, clp->lc_idlen);
672	sad = NFSSOCKADDR(clp->lc_req.nr_nam, struct sockaddr *);
673	dumpp->ndcl_addrfam = sad->sa_family;
674	if (sad->sa_family == AF_INET) {
675		rad = (struct sockaddr_in *)sad;
676		dumpp->ndcl_cbaddr.sin_addr = rad->sin_addr;
677	} else {
678		rad6 = (struct sockaddr_in6 *)sad;
679		dumpp->ndcl_cbaddr.sin6_addr = rad6->sin6_addr;
680	}
681
682	/*
683	 * Now, scan the state lists and total up the opens and locks.
684	 */
685	LIST_FOREACH(stp, &clp->lc_open, ls_list) {
686	    dumpp->ndcl_nopenowners++;
687	    LIST_FOREACH(openstp, &stp->ls_open, ls_list) {
688		dumpp->ndcl_nopens++;
689		LIST_FOREACH(lckownstp, &openstp->ls_open, ls_list) {
690		    dumpp->ndcl_nlockowners++;
691		    LIST_FOREACH(lop, &lckownstp->ls_lock, lo_lckowner) {
692			dumpp->ndcl_nlocks++;
693		    }
694		}
695	    }
696	}
697
698	/*
699	 * and the delegation lists.
700	 */
701	LIST_FOREACH(stp, &clp->lc_deleg, ls_list) {
702	    dumpp->ndcl_ndelegs++;
703	}
704	LIST_FOREACH(stp, &clp->lc_olddeleg, ls_list) {
705	    dumpp->ndcl_nolddelegs++;
706	}
707}
708
709/*
710 * Dump out lock stats for a file.
711 */
712APPLESTATIC void
713nfsrv_dumplocks(vnode_t vp, struct nfsd_dumplocks *ldumpp, int maxcnt,
714    NFSPROC_T *p)
715{
716	struct nfsstate *stp;
717	struct nfslock *lop;
718	int cnt = 0;
719	struct nfslockfile *lfp;
720	struct sockaddr *sad;
721	struct sockaddr_in *rad;
722	struct sockaddr_in6 *rad6;
723	int ret;
724	fhandle_t nfh;
725
726	ret = nfsrv_getlockfh(vp, 0, NULL, &nfh, p);
727	/*
728	 * First, get a reference on the nfsv4rootfs_lock so that an
729	 * exclusive lock on it cannot be acquired while dumping the locks.
730	 */
731	NFSLOCKV4ROOTMUTEX();
732	nfsv4_getref(&nfsv4rootfs_lock, NULL, NFSV4ROOTLOCKMUTEXPTR, NULL);
733	NFSUNLOCKV4ROOTMUTEX();
734	NFSLOCKSTATE();
735	if (!ret)
736		ret = nfsrv_getlockfile(0, NULL, &lfp, &nfh, 0);
737	if (ret) {
738		ldumpp[0].ndlck_clid.nclid_idlen = 0;
739		NFSUNLOCKSTATE();
740		NFSLOCKV4ROOTMUTEX();
741		nfsv4_relref(&nfsv4rootfs_lock);
742		NFSUNLOCKV4ROOTMUTEX();
743		return;
744	}
745
746	/*
747	 * For each open share on file, dump it out.
748	 */
749	stp = LIST_FIRST(&lfp->lf_open);
750	while (stp != LIST_END(&lfp->lf_open) && cnt < maxcnt) {
751		ldumpp[cnt].ndlck_flags = stp->ls_flags;
752		ldumpp[cnt].ndlck_stateid.seqid = stp->ls_stateid.seqid;
753		ldumpp[cnt].ndlck_stateid.other[0] = stp->ls_stateid.other[0];
754		ldumpp[cnt].ndlck_stateid.other[1] = stp->ls_stateid.other[1];
755		ldumpp[cnt].ndlck_stateid.other[2] = stp->ls_stateid.other[2];
756		ldumpp[cnt].ndlck_owner.nclid_idlen =
757		    stp->ls_openowner->ls_ownerlen;
758		NFSBCOPY(stp->ls_openowner->ls_owner,
759		    ldumpp[cnt].ndlck_owner.nclid_id,
760		    stp->ls_openowner->ls_ownerlen);
761		ldumpp[cnt].ndlck_clid.nclid_idlen = stp->ls_clp->lc_idlen;
762		NFSBCOPY(stp->ls_clp->lc_id, ldumpp[cnt].ndlck_clid.nclid_id,
763		    stp->ls_clp->lc_idlen);
764		sad=NFSSOCKADDR(stp->ls_clp->lc_req.nr_nam, struct sockaddr *);
765		ldumpp[cnt].ndlck_addrfam = sad->sa_family;
766		if (sad->sa_family == AF_INET) {
767			rad = (struct sockaddr_in *)sad;
768			ldumpp[cnt].ndlck_cbaddr.sin_addr = rad->sin_addr;
769		} else {
770			rad6 = (struct sockaddr_in6 *)sad;
771			ldumpp[cnt].ndlck_cbaddr.sin6_addr = rad6->sin6_addr;
772		}
773		stp = LIST_NEXT(stp, ls_file);
774		cnt++;
775	}
776
777	/*
778	 * and all locks.
779	 */
780	lop = LIST_FIRST(&lfp->lf_lock);
781	while (lop != LIST_END(&lfp->lf_lock) && cnt < maxcnt) {
782		stp = lop->lo_stp;
783		ldumpp[cnt].ndlck_flags = lop->lo_flags;
784		ldumpp[cnt].ndlck_first = lop->lo_first;
785		ldumpp[cnt].ndlck_end = lop->lo_end;
786		ldumpp[cnt].ndlck_stateid.seqid = stp->ls_stateid.seqid;
787		ldumpp[cnt].ndlck_stateid.other[0] = stp->ls_stateid.other[0];
788		ldumpp[cnt].ndlck_stateid.other[1] = stp->ls_stateid.other[1];
789		ldumpp[cnt].ndlck_stateid.other[2] = stp->ls_stateid.other[2];
790		ldumpp[cnt].ndlck_owner.nclid_idlen = stp->ls_ownerlen;
791		NFSBCOPY(stp->ls_owner, ldumpp[cnt].ndlck_owner.nclid_id,
792		    stp->ls_ownerlen);
793		ldumpp[cnt].ndlck_clid.nclid_idlen = stp->ls_clp->lc_idlen;
794		NFSBCOPY(stp->ls_clp->lc_id, ldumpp[cnt].ndlck_clid.nclid_id,
795		    stp->ls_clp->lc_idlen);
796		sad=NFSSOCKADDR(stp->ls_clp->lc_req.nr_nam, struct sockaddr *);
797		ldumpp[cnt].ndlck_addrfam = sad->sa_family;
798		if (sad->sa_family == AF_INET) {
799			rad = (struct sockaddr_in *)sad;
800			ldumpp[cnt].ndlck_cbaddr.sin_addr = rad->sin_addr;
801		} else {
802			rad6 = (struct sockaddr_in6 *)sad;
803			ldumpp[cnt].ndlck_cbaddr.sin6_addr = rad6->sin6_addr;
804		}
805		lop = LIST_NEXT(lop, lo_lckfile);
806		cnt++;
807	}
808
809	/*
810	 * and the delegations.
811	 */
812	stp = LIST_FIRST(&lfp->lf_deleg);
813	while (stp != LIST_END(&lfp->lf_deleg) && cnt < maxcnt) {
814		ldumpp[cnt].ndlck_flags = stp->ls_flags;
815		ldumpp[cnt].ndlck_stateid.seqid = stp->ls_stateid.seqid;
816		ldumpp[cnt].ndlck_stateid.other[0] = stp->ls_stateid.other[0];
817		ldumpp[cnt].ndlck_stateid.other[1] = stp->ls_stateid.other[1];
818		ldumpp[cnt].ndlck_stateid.other[2] = stp->ls_stateid.other[2];
819		ldumpp[cnt].ndlck_owner.nclid_idlen = 0;
820		ldumpp[cnt].ndlck_clid.nclid_idlen = stp->ls_clp->lc_idlen;
821		NFSBCOPY(stp->ls_clp->lc_id, ldumpp[cnt].ndlck_clid.nclid_id,
822		    stp->ls_clp->lc_idlen);
823		sad=NFSSOCKADDR(stp->ls_clp->lc_req.nr_nam, struct sockaddr *);
824		ldumpp[cnt].ndlck_addrfam = sad->sa_family;
825		if (sad->sa_family == AF_INET) {
826			rad = (struct sockaddr_in *)sad;
827			ldumpp[cnt].ndlck_cbaddr.sin_addr = rad->sin_addr;
828		} else {
829			rad6 = (struct sockaddr_in6 *)sad;
830			ldumpp[cnt].ndlck_cbaddr.sin6_addr = rad6->sin6_addr;
831		}
832		stp = LIST_NEXT(stp, ls_file);
833		cnt++;
834	}
835
836	/*
837	 * If list isn't full, mark end of list by setting the client name
838	 * to zero length.
839	 */
840	if (cnt < maxcnt)
841		ldumpp[cnt].ndlck_clid.nclid_idlen = 0;
842	NFSUNLOCKSTATE();
843	NFSLOCKV4ROOTMUTEX();
844	nfsv4_relref(&nfsv4rootfs_lock);
845	NFSUNLOCKV4ROOTMUTEX();
846}
847
848/*
849 * Server timer routine. It can scan any linked list, so long
850 * as it holds the spin/mutex lock and there is no exclusive lock on
851 * nfsv4rootfs_lock.
852 * (For OpenBSD, a kthread is ok. For FreeBSD, I think it is ok
853 *  to do this from a callout, since the spin locks work. For
854 *  Darwin, I'm not sure what will work correctly yet.)
855 * Should be called once per second.
856 */
857APPLESTATIC void
858nfsrv_servertimer(void)
859{
860	struct nfsclient *clp, *nclp;
861	struct nfsstate *stp, *nstp;
862	int got_ref, i;
863
864	/*
865	 * Make sure nfsboottime is set. This is used by V3 as well
866	 * as V4. Note that nfsboottime is not nfsrvboottime, which is
867	 * only used by the V4 server for leases.
868	 */
869	if (nfsboottime.tv_sec == 0)
870		NFSSETBOOTTIME(nfsboottime);
871
872	/*
873	 * If server hasn't started yet, just return.
874	 */
875	NFSLOCKSTATE();
876	if (nfsrv_stablefirst.nsf_eograce == 0) {
877		NFSUNLOCKSTATE();
878		return;
879	}
880	if (!(nfsrv_stablefirst.nsf_flags & NFSNSF_UPDATEDONE)) {
881		if (!(nfsrv_stablefirst.nsf_flags & NFSNSF_GRACEOVER) &&
882		    NFSD_MONOSEC > nfsrv_stablefirst.nsf_eograce)
883			nfsrv_stablefirst.nsf_flags |=
884			    (NFSNSF_GRACEOVER | NFSNSF_NEEDLOCK);
885		NFSUNLOCKSTATE();
886		return;
887	}
888
889	/*
890	 * Try and get a reference count on the nfsv4rootfs_lock so that
891	 * no nfsd thread can acquire an exclusive lock on it before this
892	 * call is done. If it is already exclusively locked, just return.
893	 */
894	NFSLOCKV4ROOTMUTEX();
895	got_ref = nfsv4_getref_nonblock(&nfsv4rootfs_lock);
896	NFSUNLOCKV4ROOTMUTEX();
897	if (got_ref == 0) {
898		NFSUNLOCKSTATE();
899		return;
900	}
901
902	/*
903	 * For each client...
904	 */
905	for (i = 0; i < NFSCLIENTHASHSIZE; i++) {
906	    clp = LIST_FIRST(&nfsclienthash[i]);
907	    while (clp != LIST_END(&nfsclienthash[i])) {
908		nclp = LIST_NEXT(clp, lc_hash);
909		if (!(clp->lc_flags & LCL_EXPIREIT)) {
910		    if (((clp->lc_expiry + NFSRV_STALELEASE) < NFSD_MONOSEC
911			 && ((LIST_EMPTY(&clp->lc_deleg)
912			      && LIST_EMPTY(&clp->lc_open)) ||
913			     nfsrv_clients > nfsrv_clienthighwater)) ||
914			(clp->lc_expiry + NFSRV_MOULDYLEASE) < NFSD_MONOSEC ||
915			(clp->lc_expiry < NFSD_MONOSEC &&
916			 (nfsrv_openpluslock * 10 / 9) > NFSRV_V4STATELIMIT)) {
917			/*
918			 * Lease has expired several nfsrv_lease times ago:
919			 * PLUS
920			 *    - no state is associated with it
921			 *    OR
922			 *    - above high water mark for number of clients
923			 *      (nfsrv_clienthighwater should be large enough
924			 *       that this only occurs when clients fail to
925			 *       use the same nfs_client_id4.id. Maybe somewhat
926			 *       higher that the maximum number of clients that
927			 *       will mount this server?)
928			 * OR
929			 * Lease has expired a very long time ago
930			 * OR
931			 * Lease has expired PLUS the number of opens + locks
932			 * has exceeded 90% of capacity
933			 *
934			 * --> Mark for expiry. The actual expiry will be done
935			 *     by an nfsd sometime soon.
936			 */
937			clp->lc_flags |= LCL_EXPIREIT;
938			nfsrv_stablefirst.nsf_flags |=
939			    (NFSNSF_NEEDLOCK | NFSNSF_EXPIREDCLIENT);
940		    } else {
941			/*
942			 * If there are no opens, increment no open tick cnt
943			 * If time exceeds NFSNOOPEN, mark it to be thrown away
944			 * otherwise, if there is an open, reset no open time
945			 * Hopefully, this will avoid excessive re-creation
946			 * of open owners and subsequent open confirms.
947			 */
948			stp = LIST_FIRST(&clp->lc_open);
949			while (stp != LIST_END(&clp->lc_open)) {
950				nstp = LIST_NEXT(stp, ls_list);
951				if (LIST_EMPTY(&stp->ls_open)) {
952					stp->ls_noopens++;
953					if (stp->ls_noopens > NFSNOOPEN ||
954					    (nfsrv_openpluslock * 2) >
955					    NFSRV_V4STATELIMIT)
956						nfsrv_stablefirst.nsf_flags |=
957							NFSNSF_NOOPENS;
958				} else {
959					stp->ls_noopens = 0;
960				}
961				stp = nstp;
962			}
963		    }
964		}
965		clp = nclp;
966	    }
967	}
968	NFSUNLOCKSTATE();
969	NFSLOCKV4ROOTMUTEX();
970	nfsv4_relref(&nfsv4rootfs_lock);
971	NFSUNLOCKV4ROOTMUTEX();
972}
973
974/*
975 * The following set of functions free up the various data structures.
976 */
977/*
978 * Clear out all open/lock state related to this nfsclient.
979 * Caller must hold an exclusive lock on nfsv4rootfs_lock, so that
980 * there are no other active nfsd threads.
981 */
982APPLESTATIC void
983nfsrv_cleanclient(struct nfsclient *clp, NFSPROC_T *p)
984{
985	struct nfsstate *stp, *nstp;
986
987	LIST_FOREACH_SAFE(stp, &clp->lc_open, ls_list, nstp)
988		nfsrv_freeopenowner(stp, 1, p);
989}
990
991/*
992 * Free a client that has been cleaned. It should also already have been
993 * removed from the lists.
994 * (Just to be safe w.r.t. newnfs_disconnect(), call this function when
995 *  softclock interrupts are enabled.)
996 */
997APPLESTATIC void
998nfsrv_zapclient(struct nfsclient *clp, NFSPROC_T *p)
999{
1000
1001#ifdef notyet
1002	if ((clp->lc_flags & (LCL_GSS | LCL_CALLBACKSON)) ==
1003	     (LCL_GSS | LCL_CALLBACKSON) &&
1004	    (clp->lc_hand.nfsh_flag & NFSG_COMPLETE) &&
1005	    clp->lc_handlelen > 0) {
1006		clp->lc_hand.nfsh_flag &= ~NFSG_COMPLETE;
1007		clp->lc_hand.nfsh_flag |= NFSG_DESTROYED;
1008		(void) nfsrv_docallback(clp, NFSV4PROC_CBNULL,
1009			NULL, 0, NULL, NULL, NULL, p);
1010	}
1011#endif
1012	newnfs_disconnect(&clp->lc_req);
1013	NFSSOCKADDRFREE(clp->lc_req.nr_nam);
1014	NFSFREEMUTEX(&clp->lc_req.nr_mtx);
1015	free((caddr_t)clp, M_NFSDCLIENT);
1016	NFSLOCKSTATE();
1017	newnfsstats.srvclients--;
1018	nfsrv_openpluslock--;
1019	nfsrv_clients--;
1020	NFSUNLOCKSTATE();
1021}
1022
1023/*
1024 * Free a list of delegation state structures.
1025 * (This function will also free all nfslockfile structures that no
1026 *  longer have associated state.)
1027 */
1028APPLESTATIC void
1029nfsrv_freedeleglist(struct nfsstatehead *sthp)
1030{
1031	struct nfsstate *stp, *nstp;
1032
1033	LIST_FOREACH_SAFE(stp, sthp, ls_list, nstp) {
1034		nfsrv_freedeleg(stp);
1035	}
1036	LIST_INIT(sthp);
1037}
1038
1039/*
1040 * Free up a delegation.
1041 */
1042static void
1043nfsrv_freedeleg(struct nfsstate *stp)
1044{
1045	struct nfslockfile *lfp;
1046
1047	LIST_REMOVE(stp, ls_hash);
1048	LIST_REMOVE(stp, ls_list);
1049	LIST_REMOVE(stp, ls_file);
1050	lfp = stp->ls_lfp;
1051	if (LIST_EMPTY(&lfp->lf_open) &&
1052	    LIST_EMPTY(&lfp->lf_lock) && LIST_EMPTY(&lfp->lf_deleg) &&
1053	    LIST_EMPTY(&lfp->lf_locallock) && LIST_EMPTY(&lfp->lf_rollback) &&
1054	    lfp->lf_usecount == 0 &&
1055	    nfsv4_testlock(&lfp->lf_locallock_lck) == 0)
1056		nfsrv_freenfslockfile(lfp);
1057	FREE((caddr_t)stp, M_NFSDSTATE);
1058	newnfsstats.srvdelegates--;
1059	nfsrv_openpluslock--;
1060	nfsrv_delegatecnt--;
1061}
1062
1063/*
1064 * This function frees an open owner and all associated opens.
1065 */
1066static void
1067nfsrv_freeopenowner(struct nfsstate *stp, int cansleep, NFSPROC_T *p)
1068{
1069	struct nfsstate *nstp, *tstp;
1070
1071	LIST_REMOVE(stp, ls_list);
1072	/*
1073	 * Now, free all associated opens.
1074	 */
1075	nstp = LIST_FIRST(&stp->ls_open);
1076	while (nstp != LIST_END(&stp->ls_open)) {
1077		tstp = nstp;
1078		nstp = LIST_NEXT(nstp, ls_list);
1079		(void) nfsrv_freeopen(tstp, NULL, cansleep, p);
1080	}
1081	if (stp->ls_op)
1082		nfsrvd_derefcache(stp->ls_op);
1083	FREE((caddr_t)stp, M_NFSDSTATE);
1084	newnfsstats.srvopenowners--;
1085	nfsrv_openpluslock--;
1086}
1087
1088/*
1089 * This function frees an open (nfsstate open structure) with all associated
1090 * lock_owners and locks. It also frees the nfslockfile structure iff there
1091 * are no other opens on the file.
1092 * Returns 1 if it free'd the nfslockfile, 0 otherwise.
1093 */
1094static int
1095nfsrv_freeopen(struct nfsstate *stp, vnode_t vp, int cansleep, NFSPROC_T *p)
1096{
1097	struct nfsstate *nstp, *tstp;
1098	struct nfslockfile *lfp;
1099	int ret;
1100
1101	LIST_REMOVE(stp, ls_hash);
1102	LIST_REMOVE(stp, ls_list);
1103	LIST_REMOVE(stp, ls_file);
1104
1105	lfp = stp->ls_lfp;
1106	/*
1107	 * Now, free all lockowners associated with this open.
1108	 */
1109	LIST_FOREACH_SAFE(tstp, &stp->ls_open, ls_list, nstp)
1110		nfsrv_freelockowner(tstp, vp, cansleep, p);
1111
1112	/*
1113	 * The nfslockfile is freed here if there are no locks
1114	 * associated with the open.
1115	 * If there are locks associated with the open, the
1116	 * nfslockfile structure can be freed via nfsrv_freelockowner().
1117	 * Acquire the state mutex to avoid races with calls to
1118	 * nfsrv_getlockfile().
1119	 */
1120	if (cansleep != 0)
1121		NFSLOCKSTATE();
1122	if (lfp != NULL && LIST_EMPTY(&lfp->lf_open) &&
1123	    LIST_EMPTY(&lfp->lf_deleg) && LIST_EMPTY(&lfp->lf_lock) &&
1124	    LIST_EMPTY(&lfp->lf_locallock) && LIST_EMPTY(&lfp->lf_rollback) &&
1125	    lfp->lf_usecount == 0 &&
1126	    (cansleep != 0 || nfsv4_testlock(&lfp->lf_locallock_lck) == 0)) {
1127		nfsrv_freenfslockfile(lfp);
1128		ret = 1;
1129	} else
1130		ret = 0;
1131	if (cansleep != 0)
1132		NFSUNLOCKSTATE();
1133	FREE((caddr_t)stp, M_NFSDSTATE);
1134	newnfsstats.srvopens--;
1135	nfsrv_openpluslock--;
1136	return (ret);
1137}
1138
1139/*
1140 * Frees a lockowner and all associated locks.
1141 */
1142static void
1143nfsrv_freelockowner(struct nfsstate *stp, vnode_t vp, int cansleep,
1144    NFSPROC_T *p)
1145{
1146
1147	LIST_REMOVE(stp, ls_hash);
1148	LIST_REMOVE(stp, ls_list);
1149	nfsrv_freeallnfslocks(stp, vp, cansleep, p);
1150	if (stp->ls_op)
1151		nfsrvd_derefcache(stp->ls_op);
1152	FREE((caddr_t)stp, M_NFSDSTATE);
1153	newnfsstats.srvlockowners--;
1154	nfsrv_openpluslock--;
1155}
1156
1157/*
1158 * Free all the nfs locks on a lockowner.
1159 */
1160static void
1161nfsrv_freeallnfslocks(struct nfsstate *stp, vnode_t vp, int cansleep,
1162    NFSPROC_T *p)
1163{
1164	struct nfslock *lop, *nlop;
1165	struct nfsrollback *rlp, *nrlp;
1166	struct nfslockfile *lfp = NULL;
1167	int gottvp = 0;
1168	vnode_t tvp = NULL;
1169	uint64_t first, end;
1170
1171	lop = LIST_FIRST(&stp->ls_lock);
1172	while (lop != LIST_END(&stp->ls_lock)) {
1173		nlop = LIST_NEXT(lop, lo_lckowner);
1174		/*
1175		 * Since all locks should be for the same file, lfp should
1176		 * not change.
1177		 */
1178		if (lfp == NULL)
1179			lfp = lop->lo_lfp;
1180		else if (lfp != lop->lo_lfp)
1181			panic("allnfslocks");
1182		/*
1183		 * If vp is NULL and cansleep != 0, a vnode must be acquired
1184		 * from the file handle. This only occurs when called from
1185		 * nfsrv_cleanclient().
1186		 */
1187		if (gottvp == 0) {
1188			if (nfsrv_dolocallocks == 0)
1189				tvp = NULL;
1190			else if (vp == NULL && cansleep != 0)
1191				tvp = nfsvno_getvp(&lfp->lf_fh);
1192			else
1193				tvp = vp;
1194			gottvp = 1;
1195		}
1196
1197		if (tvp != NULL) {
1198			if (cansleep == 0)
1199				panic("allnfs2");
1200			first = lop->lo_first;
1201			end = lop->lo_end;
1202			nfsrv_freenfslock(lop);
1203			nfsrv_localunlock(tvp, lfp, first, end, p);
1204			LIST_FOREACH_SAFE(rlp, &lfp->lf_rollback, rlck_list,
1205			    nrlp)
1206				free(rlp, M_NFSDROLLBACK);
1207			LIST_INIT(&lfp->lf_rollback);
1208		} else
1209			nfsrv_freenfslock(lop);
1210		lop = nlop;
1211	}
1212	if (vp == NULL && tvp != NULL)
1213		vput(tvp);
1214}
1215
1216/*
1217 * Free an nfslock structure.
1218 */
1219static void
1220nfsrv_freenfslock(struct nfslock *lop)
1221{
1222
1223	if (lop->lo_lckfile.le_prev != NULL) {
1224		LIST_REMOVE(lop, lo_lckfile);
1225		newnfsstats.srvlocks--;
1226		nfsrv_openpluslock--;
1227	}
1228	LIST_REMOVE(lop, lo_lckowner);
1229	FREE((caddr_t)lop, M_NFSDLOCK);
1230}
1231
1232/*
1233 * This function frees an nfslockfile structure.
1234 */
1235static void
1236nfsrv_freenfslockfile(struct nfslockfile *lfp)
1237{
1238
1239	LIST_REMOVE(lfp, lf_hash);
1240	FREE((caddr_t)lfp, M_NFSDLOCKFILE);
1241}
1242
1243/*
1244 * This function looks up an nfsstate structure via stateid.
1245 */
1246static int
1247nfsrv_getstate(struct nfsclient *clp, nfsv4stateid_t *stateidp, __unused u_int32_t flags,
1248    struct nfsstate **stpp)
1249{
1250	struct nfsstate *stp;
1251	struct nfsstatehead *hp;
1252	int error = 0;
1253
1254	*stpp = NULL;
1255	hp = NFSSTATEHASH(clp, *stateidp);
1256	LIST_FOREACH(stp, hp, ls_hash) {
1257		if (!NFSBCMP(stp->ls_stateid.other, stateidp->other,
1258			NFSX_STATEIDOTHER))
1259			break;
1260	}
1261
1262	/*
1263	 * If no state id in list, return NFSERR_BADSTATEID.
1264	 */
1265	if (stp == LIST_END(hp)) {
1266		error = NFSERR_BADSTATEID;
1267		goto out;
1268	}
1269	*stpp = stp;
1270
1271out:
1272	NFSEXITCODE(error);
1273	return (error);
1274}
1275
1276/*
1277 * This function gets an nfsstate structure via owner string.
1278 */
1279static void
1280nfsrv_getowner(struct nfsstatehead *hp, struct nfsstate *new_stp,
1281    struct nfsstate **stpp)
1282{
1283	struct nfsstate *stp;
1284
1285	*stpp = NULL;
1286	LIST_FOREACH(stp, hp, ls_list) {
1287		if (new_stp->ls_ownerlen == stp->ls_ownerlen &&
1288		  !NFSBCMP(new_stp->ls_owner,stp->ls_owner,stp->ls_ownerlen)) {
1289			*stpp = stp;
1290			return;
1291		}
1292	}
1293}
1294
1295/*
1296 * Lock control function called to update lock status.
1297 * Returns 0 upon success, -1 if there is no lock and the flags indicate
1298 * that one isn't to be created and an NFSERR_xxx for other errors.
1299 * The structures new_stp and new_lop are passed in as pointers that should
1300 * be set to NULL if the structure is used and shouldn't be free'd.
1301 * For the NFSLCK_TEST and NFSLCK_CHECK cases, the structures are
1302 * never used and can safely be allocated on the stack. For all other
1303 * cases, *new_stpp and *new_lopp should be malloc'd before the call,
1304 * in case they are used.
1305 */
1306APPLESTATIC int
1307nfsrv_lockctrl(vnode_t vp, struct nfsstate **new_stpp,
1308    struct nfslock **new_lopp, struct nfslockconflict *cfp,
1309    nfsquad_t clientid, nfsv4stateid_t *stateidp,
1310    __unused struct nfsexstuff *exp,
1311    struct nfsrv_descript *nd, NFSPROC_T *p)
1312{
1313	struct nfslock *lop;
1314	struct nfsstate *new_stp = *new_stpp;
1315	struct nfslock *new_lop = *new_lopp;
1316	struct nfsstate *tstp, *mystp, *nstp;
1317	int specialid = 0;
1318	struct nfslockfile *lfp;
1319	struct nfslock *other_lop = NULL;
1320	struct nfsstate *stp, *lckstp = NULL;
1321	struct nfsclient *clp = NULL;
1322	u_int32_t bits;
1323	int error = 0, haslock = 0, ret, reterr;
1324	int getlckret, delegation = 0, filestruct_locked;
1325	fhandle_t nfh;
1326	uint64_t first, end;
1327	uint32_t lock_flags;
1328
1329	if (new_stp->ls_flags & (NFSLCK_CHECK | NFSLCK_SETATTR)) {
1330		/*
1331		 * Note the special cases of "all 1s" or "all 0s" stateids and
1332		 * let reads with all 1s go ahead.
1333		 */
1334		if (new_stp->ls_stateid.seqid == 0x0 &&
1335		    new_stp->ls_stateid.other[0] == 0x0 &&
1336		    new_stp->ls_stateid.other[1] == 0x0 &&
1337		    new_stp->ls_stateid.other[2] == 0x0)
1338			specialid = 1;
1339		else if (new_stp->ls_stateid.seqid == 0xffffffff &&
1340		    new_stp->ls_stateid.other[0] == 0xffffffff &&
1341		    new_stp->ls_stateid.other[1] == 0xffffffff &&
1342		    new_stp->ls_stateid.other[2] == 0xffffffff)
1343			specialid = 2;
1344	}
1345
1346	/*
1347	 * Check for restart conditions (client and server).
1348	 */
1349	error = nfsrv_checkrestart(clientid, new_stp->ls_flags,
1350	    &new_stp->ls_stateid, specialid);
1351	if (error)
1352		goto out;
1353
1354	/*
1355	 * Check for state resource limit exceeded.
1356	 */
1357	if ((new_stp->ls_flags & NFSLCK_LOCK) &&
1358	    nfsrv_openpluslock > NFSRV_V4STATELIMIT) {
1359		error = NFSERR_RESOURCE;
1360		goto out;
1361	}
1362
1363	/*
1364	 * For the lock case, get another nfslock structure,
1365	 * just in case we need it.
1366	 * Malloc now, before we start sifting through the linked lists,
1367	 * in case we have to wait for memory.
1368	 */
1369tryagain:
1370	if (new_stp->ls_flags & NFSLCK_LOCK)
1371		MALLOC(other_lop, struct nfslock *, sizeof (struct nfslock),
1372		    M_NFSDLOCK, M_WAITOK);
1373	filestruct_locked = 0;
1374	reterr = 0;
1375	lfp = NULL;
1376
1377	/*
1378	 * Get the lockfile structure for CFH now, so we can do a sanity
1379	 * check against the stateid, before incrementing the seqid#, since
1380	 * we want to return NFSERR_BADSTATEID on failure and the seqid#
1381	 * shouldn't be incremented for this case.
1382	 * If nfsrv_getlockfile() returns -1, it means "not found", which
1383	 * will be handled later.
1384	 * If we are doing Lock/LockU and local locking is enabled, sleep
1385	 * lock the nfslockfile structure.
1386	 */
1387	getlckret = nfsrv_getlockfh(vp, new_stp->ls_flags, NULL, &nfh, p);
1388	NFSLOCKSTATE();
1389	if (getlckret == 0) {
1390		if ((new_stp->ls_flags & (NFSLCK_LOCK | NFSLCK_UNLOCK)) != 0 &&
1391		    nfsrv_dolocallocks != 0 && nd->nd_repstat == 0) {
1392			getlckret = nfsrv_getlockfile(new_stp->ls_flags, NULL,
1393			    &lfp, &nfh, 1);
1394			if (getlckret == 0)
1395				filestruct_locked = 1;
1396		} else
1397			getlckret = nfsrv_getlockfile(new_stp->ls_flags, NULL,
1398			    &lfp, &nfh, 0);
1399	}
1400	if (getlckret != 0 && getlckret != -1)
1401		reterr = getlckret;
1402
1403	if (filestruct_locked != 0) {
1404		LIST_INIT(&lfp->lf_rollback);
1405		if ((new_stp->ls_flags & NFSLCK_LOCK)) {
1406			/*
1407			 * For local locking, do the advisory locking now, so
1408			 * that any conflict can be detected. A failure later
1409			 * can be rolled back locally. If an error is returned,
1410			 * struct nfslockfile has been unlocked and any local
1411			 * locking rolled back.
1412			 */
1413			NFSUNLOCKSTATE();
1414			reterr = nfsrv_locallock(vp, lfp,
1415			    (new_lop->lo_flags & (NFSLCK_READ | NFSLCK_WRITE)),
1416			    new_lop->lo_first, new_lop->lo_end, cfp, p);
1417			NFSLOCKSTATE();
1418		}
1419	}
1420
1421	if (specialid == 0) {
1422	    if (new_stp->ls_flags & NFSLCK_TEST) {
1423		/*
1424		 * RFC 3530 does not list LockT as an op that renews a
1425		 * lease, but the concensus seems to be that it is ok
1426		 * for a server to do so.
1427		 */
1428		error = nfsrv_getclient(clientid, CLOPS_RENEW, &clp,
1429		    (nfsquad_t)((u_quad_t)0), NULL, p);
1430
1431		/*
1432		 * Since NFSERR_EXPIRED, NFSERR_ADMINREVOKED are not valid
1433		 * error returns for LockT, just go ahead and test for a lock,
1434		 * since there are no locks for this client, but other locks
1435		 * can conflict. (ie. same client will always be false)
1436		 */
1437		if (error == NFSERR_EXPIRED || error == NFSERR_ADMINREVOKED)
1438		    error = 0;
1439		lckstp = new_stp;
1440	    } else {
1441	      error = nfsrv_getclient(clientid, CLOPS_RENEW, &clp,
1442		(nfsquad_t)((u_quad_t)0), NULL, p);
1443	      if (error == 0)
1444		/*
1445		 * Look up the stateid
1446		 */
1447		error = nfsrv_getstate(clp, &new_stp->ls_stateid,
1448		  new_stp->ls_flags, &stp);
1449	      /*
1450	       * do some sanity checks for an unconfirmed open or a
1451	       * stateid that refers to the wrong file, for an open stateid
1452	       */
1453	      if (error == 0 && (stp->ls_flags & NFSLCK_OPEN) &&
1454		  ((stp->ls_openowner->ls_flags & NFSLCK_NEEDSCONFIRM) ||
1455		   (getlckret == 0 && stp->ls_lfp != lfp)))
1456			error = NFSERR_BADSTATEID;
1457	      if (error == 0 &&
1458		  (stp->ls_flags & (NFSLCK_DELEGREAD | NFSLCK_DELEGWRITE)) &&
1459		  getlckret == 0 && stp->ls_lfp != lfp)
1460			error = NFSERR_BADSTATEID;
1461
1462	      /*
1463	       * If the lockowner stateid doesn't refer to the same file,
1464	       * I believe that is considered ok, since some clients will
1465	       * only create a single lockowner and use that for all locks
1466	       * on all files.
1467	       * For now, log it as a diagnostic, instead of considering it
1468	       * a BadStateid.
1469	       */
1470	      if (error == 0 && (stp->ls_flags &
1471		  (NFSLCK_OPEN | NFSLCK_DELEGREAD | NFSLCK_DELEGWRITE)) == 0 &&
1472		  getlckret == 0 && stp->ls_lfp != lfp) {
1473#ifdef DIAGNOSTIC
1474		  printf("Got a lock statid for different file open\n");
1475#endif
1476		  /*
1477		  error = NFSERR_BADSTATEID;
1478		  */
1479	      }
1480
1481	      if (error == 0) {
1482		    if (new_stp->ls_flags & NFSLCK_OPENTOLOCK) {
1483			/*
1484			 * If haslock set, we've already checked the seqid.
1485			 */
1486			if (!haslock) {
1487			    if (stp->ls_flags & NFSLCK_OPEN)
1488				error = nfsrv_checkseqid(nd, new_stp->ls_seq,
1489				    stp->ls_openowner, new_stp->ls_op);
1490			    else
1491				error = NFSERR_BADSTATEID;
1492			}
1493			if (!error)
1494			    nfsrv_getowner(&stp->ls_open, new_stp, &lckstp);
1495			if (lckstp)
1496			    /*
1497			     * I believe this should be an error, but it
1498			     * isn't obvious what NFSERR_xxx would be
1499			     * appropriate, so I'll use NFSERR_INVAL for now.
1500			     */
1501			    error = NFSERR_INVAL;
1502			else
1503			    lckstp = new_stp;
1504		    } else if (new_stp->ls_flags&(NFSLCK_LOCK|NFSLCK_UNLOCK)) {
1505			/*
1506			 * If haslock set, ditto above.
1507			 */
1508			if (!haslock) {
1509			    if (stp->ls_flags & NFSLCK_OPEN)
1510				error = NFSERR_BADSTATEID;
1511			    else
1512				error = nfsrv_checkseqid(nd, new_stp->ls_seq,
1513				    stp, new_stp->ls_op);
1514			}
1515			lckstp = stp;
1516		    } else {
1517			lckstp = stp;
1518		    }
1519	      }
1520	      /*
1521	       * If the seqid part of the stateid isn't the same, return
1522	       * NFSERR_OLDSTATEID for cases other than I/O Ops.
1523	       * For I/O Ops, only return NFSERR_OLDSTATEID if
1524	       * nfsrv_returnoldstateid is set. (The concensus on the email
1525	       * list was that most clients would prefer to not receive
1526	       * NFSERR_OLDSTATEID for I/O Ops, but the RFC suggests that that
1527	       * is what will happen, so I use the nfsrv_returnoldstateid to
1528	       * allow for either server configuration.)
1529	       */
1530	      if (!error && stp->ls_stateid.seqid!=new_stp->ls_stateid.seqid &&
1531		  (!(new_stp->ls_flags & NFSLCK_CHECK) ||
1532		   nfsrv_returnoldstateid))
1533		    error = NFSERR_OLDSTATEID;
1534	    }
1535	}
1536
1537	/*
1538	 * Now we can check for grace.
1539	 */
1540	if (!error)
1541		error = nfsrv_checkgrace(new_stp->ls_flags);
1542	if ((new_stp->ls_flags & NFSLCK_RECLAIM) && !error &&
1543		nfsrv_checkstable(clp))
1544		error = NFSERR_NOGRACE;
1545	/*
1546	 * If we successfully Reclaimed state, note that.
1547	 */
1548	if ((new_stp->ls_flags & NFSLCK_RECLAIM) && !error)
1549		nfsrv_markstable(clp);
1550
1551	/*
1552	 * At this point, either error == NFSERR_BADSTATEID or the
1553	 * seqid# has been updated, so we can return any error.
1554	 * If error == 0, there may be an error in:
1555	 *    nd_repstat - Set by the calling function.
1556	 *    reterr - Set above, if getting the nfslockfile structure
1557	 *       or acquiring the local lock failed.
1558	 *    (If both of these are set, nd_repstat should probably be
1559	 *     returned, since that error was detected before this
1560	 *     function call.)
1561	 */
1562	if (error != 0 || nd->nd_repstat != 0 || reterr != 0) {
1563		if (error == 0) {
1564			if (nd->nd_repstat != 0)
1565				error = nd->nd_repstat;
1566			else
1567				error = reterr;
1568		}
1569		if (filestruct_locked != 0) {
1570			/* Roll back local locks. */
1571			NFSUNLOCKSTATE();
1572			nfsrv_locallock_rollback(vp, lfp, p);
1573			NFSLOCKSTATE();
1574			nfsrv_unlocklf(lfp);
1575		}
1576		NFSUNLOCKSTATE();
1577		goto out;
1578	}
1579
1580	/*
1581	 * Check the nfsrv_getlockfile return.
1582	 * Returned -1 if no structure found.
1583	 */
1584	if (getlckret == -1) {
1585		error = NFSERR_EXPIRED;
1586		/*
1587		 * Called from lockt, so no lock is OK.
1588		 */
1589		if (new_stp->ls_flags & NFSLCK_TEST) {
1590			error = 0;
1591		} else if (new_stp->ls_flags &
1592		    (NFSLCK_CHECK | NFSLCK_SETATTR)) {
1593			/*
1594			 * Called to check for a lock, OK if the stateid is all
1595			 * 1s or all 0s, but there should be an nfsstate
1596			 * otherwise.
1597			 * (ie. If there is no open, I'll assume no share
1598			 *  deny bits.)
1599			 */
1600			if (specialid)
1601				error = 0;
1602			else
1603				error = NFSERR_BADSTATEID;
1604		}
1605		NFSUNLOCKSTATE();
1606		goto out;
1607	}
1608
1609	/*
1610	 * For NFSLCK_CHECK and NFSLCK_LOCK, test for a share conflict.
1611	 * For NFSLCK_CHECK, allow a read if write access is granted,
1612	 * but check for a deny. For NFSLCK_LOCK, require correct access,
1613	 * which implies a conflicting deny can't exist.
1614	 */
1615	if (new_stp->ls_flags & (NFSLCK_CHECK | NFSLCK_LOCK)) {
1616	    /*
1617	     * Four kinds of state id:
1618	     * - specialid (all 0s or all 1s), only for NFSLCK_CHECK
1619	     * - stateid for an open
1620	     * - stateid for a delegation
1621	     * - stateid for a lock owner
1622	     */
1623	    if (!specialid) {
1624		if (stp->ls_flags & (NFSLCK_DELEGREAD | NFSLCK_DELEGWRITE)) {
1625		    delegation = 1;
1626		    mystp = stp;
1627		    nfsrv_delaydelegtimeout(stp);
1628	        } else if (stp->ls_flags & NFSLCK_OPEN) {
1629		    mystp = stp;
1630		} else {
1631		    mystp = stp->ls_openstp;
1632		}
1633		/*
1634		 * If locking or checking, require correct access
1635		 * bit set.
1636		 */
1637		if (((new_stp->ls_flags & NFSLCK_LOCK) &&
1638		     !((new_lop->lo_flags >> NFSLCK_LOCKSHIFT) &
1639		       mystp->ls_flags & NFSLCK_ACCESSBITS)) ||
1640		    ((new_stp->ls_flags & (NFSLCK_CHECK|NFSLCK_READACCESS)) ==
1641		      (NFSLCK_CHECK | NFSLCK_READACCESS) &&
1642		     !(mystp->ls_flags & NFSLCK_READACCESS)) ||
1643		    ((new_stp->ls_flags & (NFSLCK_CHECK|NFSLCK_WRITEACCESS)) ==
1644		      (NFSLCK_CHECK | NFSLCK_WRITEACCESS) &&
1645		     !(mystp->ls_flags & NFSLCK_WRITEACCESS))) {
1646			if (filestruct_locked != 0) {
1647				/* Roll back local locks. */
1648				NFSUNLOCKSTATE();
1649				nfsrv_locallock_rollback(vp, lfp, p);
1650				NFSLOCKSTATE();
1651				nfsrv_unlocklf(lfp);
1652			}
1653			NFSUNLOCKSTATE();
1654			error = NFSERR_OPENMODE;
1655			goto out;
1656		}
1657	    } else
1658		mystp = NULL;
1659	    if ((new_stp->ls_flags & NFSLCK_CHECK) && !delegation) {
1660		/*
1661		 * Check for a conflicting deny bit.
1662		 */
1663		LIST_FOREACH(tstp, &lfp->lf_open, ls_file) {
1664		    if (tstp != mystp) {
1665			bits = tstp->ls_flags;
1666			bits >>= NFSLCK_SHIFT;
1667			if (new_stp->ls_flags & bits & NFSLCK_ACCESSBITS) {
1668			    ret = nfsrv_clientconflict(tstp->ls_clp, &haslock,
1669				vp, p);
1670			    if (ret == 1) {
1671				/*
1672				* nfsrv_clientconflict unlocks state
1673				 * when it returns non-zero.
1674				 */
1675				lckstp = NULL;
1676				goto tryagain;
1677			    }
1678			    if (ret == 0)
1679				NFSUNLOCKSTATE();
1680			    if (ret == 2)
1681				error = NFSERR_PERM;
1682			    else
1683				error = NFSERR_OPENMODE;
1684			    goto out;
1685			}
1686		    }
1687		}
1688
1689		/* We're outta here */
1690		NFSUNLOCKSTATE();
1691		goto out;
1692	    }
1693	}
1694
1695	/*
1696	 * For setattr, just get rid of all the Delegations for other clients.
1697	 */
1698	if (new_stp->ls_flags & NFSLCK_SETATTR) {
1699		ret = nfsrv_cleandeleg(vp, lfp, clp, &haslock, p);
1700		if (ret) {
1701			/*
1702			 * nfsrv_cleandeleg() unlocks state when it
1703			 * returns non-zero.
1704			 */
1705			if (ret == -1) {
1706				lckstp = NULL;
1707				goto tryagain;
1708			}
1709			error = ret;
1710			goto out;
1711		}
1712		if (!(new_stp->ls_flags & NFSLCK_CHECK) ||
1713		    (LIST_EMPTY(&lfp->lf_open) && LIST_EMPTY(&lfp->lf_lock) &&
1714		     LIST_EMPTY(&lfp->lf_deleg))) {
1715			NFSUNLOCKSTATE();
1716			goto out;
1717		}
1718	}
1719
1720	/*
1721	 * Check for a conflicting delegation. If one is found, call
1722	 * nfsrv_delegconflict() to handle it. If the v4root lock hasn't
1723	 * been set yet, it will get the lock. Otherwise, it will recall
1724	 * the delegation. Then, we try try again...
1725	 * I currently believe the conflict algorithm to be:
1726	 * For Lock Ops (Lock/LockT/LockU)
1727	 * - there is a conflict iff a different client has a write delegation
1728	 * For Reading (Read Op)
1729	 * - there is a conflict iff a different client has a write delegation
1730	 *   (the specialids are always a different client)
1731	 * For Writing (Write/Setattr of size)
1732	 * - there is a conflict if a different client has any delegation
1733	 * - there is a conflict if the same client has a read delegation
1734	 *   (I don't understand why this isn't allowed, but that seems to be
1735	 *    the current concensus?)
1736	 */
1737	tstp = LIST_FIRST(&lfp->lf_deleg);
1738	while (tstp != LIST_END(&lfp->lf_deleg)) {
1739	    nstp = LIST_NEXT(tstp, ls_file);
1740	    if ((((new_stp->ls_flags&(NFSLCK_LOCK|NFSLCK_UNLOCK|NFSLCK_TEST))||
1741		 ((new_stp->ls_flags & NFSLCK_CHECK) &&
1742		  (new_lop->lo_flags & NFSLCK_READ))) &&
1743		  clp != tstp->ls_clp &&
1744		 (tstp->ls_flags & NFSLCK_DELEGWRITE)) ||
1745		 ((new_stp->ls_flags & NFSLCK_CHECK) &&
1746		   (new_lop->lo_flags & NFSLCK_WRITE) &&
1747		  (clp != tstp->ls_clp ||
1748		   (tstp->ls_flags & NFSLCK_DELEGREAD)))) {
1749		if (filestruct_locked != 0) {
1750			/* Roll back local locks. */
1751			NFSUNLOCKSTATE();
1752			nfsrv_locallock_rollback(vp, lfp, p);
1753			NFSLOCKSTATE();
1754			nfsrv_unlocklf(lfp);
1755		}
1756		ret = nfsrv_delegconflict(tstp, &haslock, p, vp);
1757		if (ret) {
1758		    /*
1759		     * nfsrv_delegconflict unlocks state when it
1760		     * returns non-zero, which it always does.
1761		     */
1762		    if (other_lop) {
1763			FREE((caddr_t)other_lop, M_NFSDLOCK);
1764			other_lop = NULL;
1765		    }
1766		    if (ret == -1) {
1767			lckstp = NULL;
1768			goto tryagain;
1769		    }
1770		    error = ret;
1771		    goto out;
1772		}
1773		/* Never gets here. */
1774	    }
1775	    tstp = nstp;
1776	}
1777
1778	/*
1779	 * Handle the unlock case by calling nfsrv_updatelock().
1780	 * (Should I have done some access checking above for unlock? For now,
1781	 *  just let it happen.)
1782	 */
1783	if (new_stp->ls_flags & NFSLCK_UNLOCK) {
1784		first = new_lop->lo_first;
1785		end = new_lop->lo_end;
1786		nfsrv_updatelock(stp, new_lopp, &other_lop, lfp);
1787		stateidp->seqid = ++(stp->ls_stateid.seqid);
1788		stateidp->other[0] = stp->ls_stateid.other[0];
1789		stateidp->other[1] = stp->ls_stateid.other[1];
1790		stateidp->other[2] = stp->ls_stateid.other[2];
1791		if (filestruct_locked != 0) {
1792			NFSUNLOCKSTATE();
1793			/* Update the local locks. */
1794			nfsrv_localunlock(vp, lfp, first, end, p);
1795			NFSLOCKSTATE();
1796			nfsrv_unlocklf(lfp);
1797		}
1798		NFSUNLOCKSTATE();
1799		goto out;
1800	}
1801
1802	/*
1803	 * Search for a conflicting lock. A lock conflicts if:
1804	 * - the lock range overlaps and
1805	 * - at least one lock is a write lock and
1806	 * - it is not owned by the same lock owner
1807	 */
1808	if (!delegation) {
1809	  LIST_FOREACH(lop, &lfp->lf_lock, lo_lckfile) {
1810	    if (new_lop->lo_end > lop->lo_first &&
1811		new_lop->lo_first < lop->lo_end &&
1812		(new_lop->lo_flags == NFSLCK_WRITE ||
1813		 lop->lo_flags == NFSLCK_WRITE) &&
1814		lckstp != lop->lo_stp &&
1815		(clp != lop->lo_stp->ls_clp ||
1816		 lckstp->ls_ownerlen != lop->lo_stp->ls_ownerlen ||
1817		 NFSBCMP(lckstp->ls_owner, lop->lo_stp->ls_owner,
1818		    lckstp->ls_ownerlen))) {
1819		if (other_lop) {
1820		    FREE((caddr_t)other_lop, M_NFSDLOCK);
1821		    other_lop = NULL;
1822		}
1823		ret = nfsrv_clientconflict(lop->lo_stp->ls_clp,&haslock,vp,p);
1824		if (ret == 1) {
1825		    if (filestruct_locked != 0) {
1826			/* Roll back local locks. */
1827			nfsrv_locallock_rollback(vp, lfp, p);
1828			NFSLOCKSTATE();
1829			nfsrv_unlocklf(lfp);
1830			NFSUNLOCKSTATE();
1831		    }
1832		    /*
1833		     * nfsrv_clientconflict() unlocks state when it
1834		     * returns non-zero.
1835		     */
1836		    lckstp = NULL;
1837		    goto tryagain;
1838		}
1839		/*
1840		 * Found a conflicting lock, so record the conflict and
1841		 * return the error.
1842		 */
1843		if (cfp != NULL && ret == 0) {
1844		    cfp->cl_clientid.lval[0]=lop->lo_stp->ls_stateid.other[0];
1845		    cfp->cl_clientid.lval[1]=lop->lo_stp->ls_stateid.other[1];
1846		    cfp->cl_first = lop->lo_first;
1847		    cfp->cl_end = lop->lo_end;
1848		    cfp->cl_flags = lop->lo_flags;
1849		    cfp->cl_ownerlen = lop->lo_stp->ls_ownerlen;
1850		    NFSBCOPY(lop->lo_stp->ls_owner, cfp->cl_owner,
1851			cfp->cl_ownerlen);
1852		}
1853		if (ret == 2)
1854		    error = NFSERR_PERM;
1855		else if (new_stp->ls_flags & NFSLCK_RECLAIM)
1856		    error = NFSERR_RECLAIMCONFLICT;
1857		else if (new_stp->ls_flags & NFSLCK_CHECK)
1858		    error = NFSERR_LOCKED;
1859		else
1860		    error = NFSERR_DENIED;
1861		if (filestruct_locked != 0 && ret == 0) {
1862			/* Roll back local locks. */
1863			NFSUNLOCKSTATE();
1864			nfsrv_locallock_rollback(vp, lfp, p);
1865			NFSLOCKSTATE();
1866			nfsrv_unlocklf(lfp);
1867		}
1868		if (ret == 0)
1869			NFSUNLOCKSTATE();
1870		goto out;
1871	    }
1872	  }
1873	}
1874
1875	/*
1876	 * We only get here if there was no lock that conflicted.
1877	 */
1878	if (new_stp->ls_flags & (NFSLCK_TEST | NFSLCK_CHECK)) {
1879		NFSUNLOCKSTATE();
1880		goto out;
1881	}
1882
1883	/*
1884	 * We only get here when we are creating or modifying a lock.
1885	 * There are two variants:
1886	 * - exist_lock_owner where lock_owner exists
1887	 * - open_to_lock_owner with new lock_owner
1888	 */
1889	first = new_lop->lo_first;
1890	end = new_lop->lo_end;
1891	lock_flags = new_lop->lo_flags;
1892	if (!(new_stp->ls_flags & NFSLCK_OPENTOLOCK)) {
1893		nfsrv_updatelock(lckstp, new_lopp, &other_lop, lfp);
1894		stateidp->seqid = ++(lckstp->ls_stateid.seqid);
1895		stateidp->other[0] = lckstp->ls_stateid.other[0];
1896		stateidp->other[1] = lckstp->ls_stateid.other[1];
1897		stateidp->other[2] = lckstp->ls_stateid.other[2];
1898	} else {
1899		/*
1900		 * The new open_to_lock_owner case.
1901		 * Link the new nfsstate into the lists.
1902		 */
1903		new_stp->ls_seq = new_stp->ls_opentolockseq;
1904		nfsrvd_refcache(new_stp->ls_op);
1905		stateidp->seqid = new_stp->ls_stateid.seqid = 1;
1906		stateidp->other[0] = new_stp->ls_stateid.other[0] =
1907		    clp->lc_clientid.lval[0];
1908		stateidp->other[1] = new_stp->ls_stateid.other[1] =
1909		    clp->lc_clientid.lval[1];
1910		stateidp->other[2] = new_stp->ls_stateid.other[2] =
1911		    nfsrv_nextstateindex(clp);
1912		new_stp->ls_clp = clp;
1913		LIST_INIT(&new_stp->ls_lock);
1914		new_stp->ls_openstp = stp;
1915		new_stp->ls_lfp = lfp;
1916		nfsrv_insertlock(new_lop, (struct nfslock *)new_stp, new_stp,
1917		    lfp);
1918		LIST_INSERT_HEAD(NFSSTATEHASH(clp, new_stp->ls_stateid),
1919		    new_stp, ls_hash);
1920		LIST_INSERT_HEAD(&stp->ls_open, new_stp, ls_list);
1921		*new_lopp = NULL;
1922		*new_stpp = NULL;
1923		newnfsstats.srvlockowners++;
1924		nfsrv_openpluslock++;
1925	}
1926	if (filestruct_locked != 0) {
1927		NFSUNLOCKSTATE();
1928		nfsrv_locallock_commit(lfp, lock_flags, first, end);
1929		NFSLOCKSTATE();
1930		nfsrv_unlocklf(lfp);
1931	}
1932	NFSUNLOCKSTATE();
1933
1934out:
1935	if (haslock) {
1936		NFSLOCKV4ROOTMUTEX();
1937		nfsv4_unlock(&nfsv4rootfs_lock, 1);
1938		NFSUNLOCKV4ROOTMUTEX();
1939	}
1940	if (other_lop)
1941		FREE((caddr_t)other_lop, M_NFSDLOCK);
1942	NFSEXITCODE2(error, nd);
1943	return (error);
1944}
1945
1946/*
1947 * Check for state errors for Open.
1948 * repstat is passed back out as an error if more critical errors
1949 * are not detected.
1950 */
1951APPLESTATIC int
1952nfsrv_opencheck(nfsquad_t clientid, nfsv4stateid_t *stateidp,
1953    struct nfsstate *new_stp, vnode_t vp, struct nfsrv_descript *nd,
1954    NFSPROC_T *p, int repstat)
1955{
1956	struct nfsstate *stp, *nstp;
1957	struct nfsclient *clp;
1958	struct nfsstate *ownerstp;
1959	struct nfslockfile *lfp, *new_lfp;
1960	int error = 0, haslock = 0, ret, readonly = 0, getfhret = 0;
1961
1962	if ((new_stp->ls_flags & NFSLCK_SHAREBITS) == NFSLCK_READACCESS)
1963		readonly = 1;
1964	/*
1965	 * Check for restart conditions (client and server).
1966	 */
1967	error = nfsrv_checkrestart(clientid, new_stp->ls_flags,
1968		&new_stp->ls_stateid, 0);
1969	if (error)
1970		goto out;
1971
1972	/*
1973	 * Check for state resource limit exceeded.
1974	 * Technically this should be SMP protected, but the worst
1975	 * case error is "out by one or two" on the count when it
1976	 * returns NFSERR_RESOURCE and the limit is just a rather
1977	 * arbitrary high water mark, so no harm is done.
1978	 */
1979	if (nfsrv_openpluslock > NFSRV_V4STATELIMIT) {
1980		error = NFSERR_RESOURCE;
1981		goto out;
1982	}
1983
1984tryagain:
1985	MALLOC(new_lfp, struct nfslockfile *, sizeof (struct nfslockfile),
1986	    M_NFSDLOCKFILE, M_WAITOK);
1987	if (vp)
1988		getfhret = nfsrv_getlockfh(vp, new_stp->ls_flags, new_lfp,
1989		    NULL, p);
1990	NFSLOCKSTATE();
1991	/*
1992	 * Get the nfsclient structure.
1993	 */
1994	error = nfsrv_getclient(clientid, CLOPS_RENEW, &clp,
1995	    (nfsquad_t)((u_quad_t)0), NULL, p);
1996
1997	/*
1998	 * Look up the open owner. See if it needs confirmation and
1999	 * check the seq#, as required.
2000	 */
2001	if (!error)
2002		nfsrv_getowner(&clp->lc_open, new_stp, &ownerstp);
2003
2004	if (!error && ownerstp) {
2005		error = nfsrv_checkseqid(nd, new_stp->ls_seq, ownerstp,
2006		    new_stp->ls_op);
2007		/*
2008		 * If the OpenOwner hasn't been confirmed, assume the
2009		 * old one was a replay and this one is ok.
2010		 * See: RFC3530 Sec. 14.2.18.
2011		 */
2012		if (error == NFSERR_BADSEQID &&
2013		    (ownerstp->ls_flags & NFSLCK_NEEDSCONFIRM))
2014			error = 0;
2015	}
2016
2017	/*
2018	 * Check for grace.
2019	 */
2020	if (!error)
2021		error = nfsrv_checkgrace(new_stp->ls_flags);
2022	if ((new_stp->ls_flags & NFSLCK_RECLAIM) && !error &&
2023		nfsrv_checkstable(clp))
2024		error = NFSERR_NOGRACE;
2025
2026	/*
2027	 * If none of the above errors occurred, let repstat be
2028	 * returned.
2029	 */
2030	if (repstat && !error)
2031		error = repstat;
2032	if (error) {
2033		NFSUNLOCKSTATE();
2034		if (haslock) {
2035			NFSLOCKV4ROOTMUTEX();
2036			nfsv4_unlock(&nfsv4rootfs_lock, 1);
2037			NFSUNLOCKV4ROOTMUTEX();
2038		}
2039		free((caddr_t)new_lfp, M_NFSDLOCKFILE);
2040		goto out;
2041	}
2042
2043	/*
2044	 * If vp == NULL, the file doesn't exist yet, so return ok.
2045	 * (This always happens on the first pass, so haslock must be 0.)
2046	 */
2047	if (vp == NULL) {
2048		NFSUNLOCKSTATE();
2049		FREE((caddr_t)new_lfp, M_NFSDLOCKFILE);
2050		goto out;
2051	}
2052
2053	/*
2054	 * Get the structure for the underlying file.
2055	 */
2056	if (getfhret)
2057		error = getfhret;
2058	else
2059		error = nfsrv_getlockfile(new_stp->ls_flags, &new_lfp, &lfp,
2060		    NULL, 0);
2061	if (new_lfp)
2062		FREE((caddr_t)new_lfp, M_NFSDLOCKFILE);
2063	if (error) {
2064		NFSUNLOCKSTATE();
2065		if (haslock) {
2066			NFSLOCKV4ROOTMUTEX();
2067			nfsv4_unlock(&nfsv4rootfs_lock, 1);
2068			NFSUNLOCKV4ROOTMUTEX();
2069		}
2070		goto out;
2071	}
2072
2073	/*
2074	 * Search for a conflicting open/share.
2075	 */
2076	if (new_stp->ls_flags & NFSLCK_DELEGCUR) {
2077	    /*
2078	     * For Delegate_Cur, search for the matching Delegation,
2079	     * which indicates no conflict.
2080	     * An old delegation should have been recovered by the
2081	     * client doing a Claim_DELEGATE_Prev, so I won't let
2082	     * it match and return NFSERR_EXPIRED. Should I let it
2083	     * match?
2084	     */
2085	    LIST_FOREACH(stp, &lfp->lf_deleg, ls_file) {
2086		if (!(stp->ls_flags & NFSLCK_OLDDELEG) &&
2087		    stateidp->seqid == stp->ls_stateid.seqid &&
2088		    !NFSBCMP(stateidp->other, stp->ls_stateid.other,
2089			  NFSX_STATEIDOTHER))
2090			break;
2091	    }
2092	    if (stp == LIST_END(&lfp->lf_deleg) ||
2093		((new_stp->ls_flags & NFSLCK_WRITEACCESS) &&
2094		 (stp->ls_flags & NFSLCK_DELEGREAD))) {
2095		NFSUNLOCKSTATE();
2096		if (haslock) {
2097			NFSLOCKV4ROOTMUTEX();
2098			nfsv4_unlock(&nfsv4rootfs_lock, 1);
2099			NFSUNLOCKV4ROOTMUTEX();
2100		}
2101		error = NFSERR_EXPIRED;
2102		goto out;
2103	    }
2104	}
2105
2106	/*
2107	 * Check for access/deny bit conflicts. I check for the same
2108	 * owner as well, in case the client didn't bother.
2109	 */
2110	LIST_FOREACH(stp, &lfp->lf_open, ls_file) {
2111		if (!(new_stp->ls_flags & NFSLCK_DELEGCUR) &&
2112		    (((new_stp->ls_flags & NFSLCK_ACCESSBITS) &
2113		      ((stp->ls_flags>>NFSLCK_SHIFT) & NFSLCK_ACCESSBITS))||
2114		     ((stp->ls_flags & NFSLCK_ACCESSBITS) &
2115		      ((new_stp->ls_flags>>NFSLCK_SHIFT)&NFSLCK_ACCESSBITS)))){
2116			ret = nfsrv_clientconflict(stp->ls_clp,&haslock,vp,p);
2117			if (ret == 1) {
2118				/*
2119				 * nfsrv_clientconflict() unlocks
2120				 * state when it returns non-zero.
2121				 */
2122				goto tryagain;
2123			}
2124			if (ret == 2)
2125				error = NFSERR_PERM;
2126			else if (new_stp->ls_flags & NFSLCK_RECLAIM)
2127				error = NFSERR_RECLAIMCONFLICT;
2128			else
2129				error = NFSERR_SHAREDENIED;
2130			if (ret == 0)
2131				NFSUNLOCKSTATE();
2132			if (haslock) {
2133				NFSLOCKV4ROOTMUTEX();
2134				nfsv4_unlock(&nfsv4rootfs_lock, 1);
2135				NFSUNLOCKV4ROOTMUTEX();
2136			}
2137			goto out;
2138		}
2139	}
2140
2141	/*
2142	 * Check for a conflicting delegation. If one is found, call
2143	 * nfsrv_delegconflict() to handle it. If the v4root lock hasn't
2144	 * been set yet, it will get the lock. Otherwise, it will recall
2145	 * the delegation. Then, we try try again...
2146	 * (If NFSLCK_DELEGCUR is set, it has a delegation, so there
2147	 *  isn't a conflict.)
2148	 * I currently believe the conflict algorithm to be:
2149	 * For Open with Read Access and Deny None
2150	 * - there is a conflict iff a different client has a write delegation
2151	 * For Open with other Write Access or any Deny except None
2152	 * - there is a conflict if a different client has any delegation
2153	 * - there is a conflict if the same client has a read delegation
2154	 *   (The current concensus is that this last case should be
2155	 *    considered a conflict since the client with a read delegation
2156	 *    could have done an Open with ReadAccess and WriteDeny
2157	 *    locally and then not have checked for the WriteDeny.)
2158	 * Don't check for a Reclaim, since that will be dealt with
2159	 * by nfsrv_openctrl().
2160	 */
2161	if (!(new_stp->ls_flags &
2162		(NFSLCK_DELEGPREV | NFSLCK_DELEGCUR | NFSLCK_RECLAIM))) {
2163	    stp = LIST_FIRST(&lfp->lf_deleg);
2164	    while (stp != LIST_END(&lfp->lf_deleg)) {
2165		nstp = LIST_NEXT(stp, ls_file);
2166		if ((readonly && stp->ls_clp != clp &&
2167		       (stp->ls_flags & NFSLCK_DELEGWRITE)) ||
2168		    (!readonly && (stp->ls_clp != clp ||
2169		         (stp->ls_flags & NFSLCK_DELEGREAD)))) {
2170			ret = nfsrv_delegconflict(stp, &haslock, p, vp);
2171			if (ret) {
2172			    /*
2173			     * nfsrv_delegconflict() unlocks state
2174			     * when it returns non-zero.
2175			     */
2176			    if (ret == -1)
2177				goto tryagain;
2178			    error = ret;
2179			    goto out;
2180			}
2181		}
2182		stp = nstp;
2183	    }
2184	}
2185	NFSUNLOCKSTATE();
2186	if (haslock) {
2187		NFSLOCKV4ROOTMUTEX();
2188		nfsv4_unlock(&nfsv4rootfs_lock, 1);
2189		NFSUNLOCKV4ROOTMUTEX();
2190	}
2191
2192out:
2193	NFSEXITCODE2(error, nd);
2194	return (error);
2195}
2196
2197/*
2198 * Open control function to create/update open state for an open.
2199 */
2200APPLESTATIC int
2201nfsrv_openctrl(struct nfsrv_descript *nd, vnode_t vp,
2202    struct nfsstate **new_stpp, nfsquad_t clientid, nfsv4stateid_t *stateidp,
2203    nfsv4stateid_t *delegstateidp, u_int32_t *rflagsp, struct nfsexstuff *exp,
2204    NFSPROC_T *p, u_quad_t filerev)
2205{
2206	struct nfsstate *new_stp = *new_stpp;
2207	struct nfsstate *stp, *nstp;
2208	struct nfsstate *openstp = NULL, *new_open, *ownerstp, *new_deleg;
2209	struct nfslockfile *lfp, *new_lfp;
2210	struct nfsclient *clp;
2211	int error = 0, haslock = 0, ret, delegate = 1, writedeleg = 1;
2212	int readonly = 0, cbret = 1, getfhret = 0;
2213
2214	if ((new_stp->ls_flags & NFSLCK_SHAREBITS) == NFSLCK_READACCESS)
2215		readonly = 1;
2216	/*
2217	 * Check for restart conditions (client and server).
2218	 * (Paranoia, should have been detected by nfsrv_opencheck().)
2219	 * If an error does show up, return NFSERR_EXPIRED, since the
2220	 * the seqid# has already been incremented.
2221	 */
2222	error = nfsrv_checkrestart(clientid, new_stp->ls_flags,
2223	    &new_stp->ls_stateid, 0);
2224	if (error) {
2225		printf("Nfsd: openctrl unexpected restart err=%d\n",
2226		    error);
2227		error = NFSERR_EXPIRED;
2228		goto out;
2229	}
2230
2231tryagain:
2232	MALLOC(new_lfp, struct nfslockfile *, sizeof (struct nfslockfile),
2233	    M_NFSDLOCKFILE, M_WAITOK);
2234	MALLOC(new_open, struct nfsstate *, sizeof (struct nfsstate),
2235	    M_NFSDSTATE, M_WAITOK);
2236	MALLOC(new_deleg, struct nfsstate *, sizeof (struct nfsstate),
2237	    M_NFSDSTATE, M_WAITOK);
2238	getfhret = nfsrv_getlockfh(vp, new_stp->ls_flags, new_lfp,
2239	    NULL, p);
2240	NFSLOCKSTATE();
2241	/*
2242	 * Get the client structure. Since the linked lists could be changed
2243	 * by other nfsd processes if this process does a tsleep(), one of
2244	 * two things must be done.
2245	 * 1 - don't tsleep()
2246	 * or
2247	 * 2 - get the nfsv4_lock() { indicated by haslock == 1 }
2248	 *     before using the lists, since this lock stops the other
2249	 *     nfsd. This should only be used for rare cases, since it
2250	 *     essentially single threads the nfsd.
2251	 *     At this time, it is only done for cases where the stable
2252	 *     storage file must be written prior to completion of state
2253	 *     expiration.
2254	 */
2255	error = nfsrv_getclient(clientid, CLOPS_RENEW, &clp,
2256	    (nfsquad_t)((u_quad_t)0), NULL, p);
2257	if (!error && (clp->lc_flags & LCL_NEEDSCBNULL) &&
2258	    clp->lc_program) {
2259		/*
2260		 * This happens on the first open for a client
2261		 * that supports callbacks.
2262		 */
2263		NFSUNLOCKSTATE();
2264		/*
2265		 * Although nfsrv_docallback() will sleep, clp won't
2266		 * go away, since they are only removed when the
2267		 * nfsv4_lock() has blocked the nfsd threads. The
2268		 * fields in clp can change, but having multiple
2269		 * threads do this Null callback RPC should be
2270		 * harmless.
2271		 */
2272		cbret = nfsrv_docallback(clp, NFSV4PROC_CBNULL,
2273		    NULL, 0, NULL, NULL, NULL, p);
2274		NFSLOCKSTATE();
2275		clp->lc_flags &= ~LCL_NEEDSCBNULL;
2276		if (!cbret)
2277			clp->lc_flags |= LCL_CALLBACKSON;
2278	}
2279
2280	/*
2281	 * Look up the open owner. See if it needs confirmation and
2282	 * check the seq#, as required.
2283	 */
2284	if (!error)
2285		nfsrv_getowner(&clp->lc_open, new_stp, &ownerstp);
2286
2287	if (error) {
2288		NFSUNLOCKSTATE();
2289		printf("Nfsd: openctrl unexpected state err=%d\n",
2290			error);
2291		free((caddr_t)new_lfp, M_NFSDLOCKFILE);
2292		free((caddr_t)new_open, M_NFSDSTATE);
2293		free((caddr_t)new_deleg, M_NFSDSTATE);
2294		if (haslock) {
2295			NFSLOCKV4ROOTMUTEX();
2296			nfsv4_unlock(&nfsv4rootfs_lock, 1);
2297			NFSUNLOCKV4ROOTMUTEX();
2298		}
2299		error = NFSERR_EXPIRED;
2300		goto out;
2301	}
2302
2303	if (new_stp->ls_flags & NFSLCK_RECLAIM)
2304		nfsrv_markstable(clp);
2305
2306	/*
2307	 * Get the structure for the underlying file.
2308	 */
2309	if (getfhret)
2310		error = getfhret;
2311	else
2312		error = nfsrv_getlockfile(new_stp->ls_flags, &new_lfp, &lfp,
2313		    NULL, 0);
2314	if (new_lfp)
2315		FREE((caddr_t)new_lfp, M_NFSDLOCKFILE);
2316	if (error) {
2317		NFSUNLOCKSTATE();
2318		printf("Nfsd openctrl unexpected getlockfile err=%d\n",
2319		    error);
2320		free((caddr_t)new_open, M_NFSDSTATE);
2321		free((caddr_t)new_deleg, M_NFSDSTATE);
2322		if (haslock) {
2323			NFSLOCKV4ROOTMUTEX();
2324			nfsv4_unlock(&nfsv4rootfs_lock, 1);
2325			NFSUNLOCKV4ROOTMUTEX();
2326		}
2327		goto out;
2328	}
2329
2330	/*
2331	 * Search for a conflicting open/share.
2332	 */
2333	if (new_stp->ls_flags & NFSLCK_DELEGCUR) {
2334	    /*
2335	     * For Delegate_Cur, search for the matching Delegation,
2336	     * which indicates no conflict.
2337	     * An old delegation should have been recovered by the
2338	     * client doing a Claim_DELEGATE_Prev, so I won't let
2339	     * it match and return NFSERR_EXPIRED. Should I let it
2340	     * match?
2341	     */
2342	    LIST_FOREACH(stp, &lfp->lf_deleg, ls_file) {
2343		if (!(stp->ls_flags & NFSLCK_OLDDELEG) &&
2344		    stateidp->seqid == stp->ls_stateid.seqid &&
2345		    !NFSBCMP(stateidp->other, stp->ls_stateid.other,
2346			NFSX_STATEIDOTHER))
2347			break;
2348	    }
2349	    if (stp == LIST_END(&lfp->lf_deleg) ||
2350		((new_stp->ls_flags & NFSLCK_WRITEACCESS) &&
2351		 (stp->ls_flags & NFSLCK_DELEGREAD))) {
2352		NFSUNLOCKSTATE();
2353		printf("Nfsd openctrl unexpected expiry\n");
2354		free((caddr_t)new_open, M_NFSDSTATE);
2355		free((caddr_t)new_deleg, M_NFSDSTATE);
2356		if (haslock) {
2357			NFSLOCKV4ROOTMUTEX();
2358			nfsv4_unlock(&nfsv4rootfs_lock, 1);
2359			NFSUNLOCKV4ROOTMUTEX();
2360		}
2361		error = NFSERR_EXPIRED;
2362		goto out;
2363	    }
2364
2365	    /*
2366	     * Don't issue a Delegation, since one already exists and
2367	     * delay delegation timeout, as required.
2368	     */
2369	    delegate = 0;
2370	    nfsrv_delaydelegtimeout(stp);
2371	}
2372
2373	/*
2374	 * Check for access/deny bit conflicts. I also check for the
2375	 * same owner, since the client might not have bothered to check.
2376	 * Also, note an open for the same file and owner, if found,
2377	 * which is all we do here for Delegate_Cur, since conflict
2378	 * checking is already done.
2379	 */
2380	LIST_FOREACH(stp, &lfp->lf_open, ls_file) {
2381		if (ownerstp && stp->ls_openowner == ownerstp)
2382			openstp = stp;
2383		if (!(new_stp->ls_flags & NFSLCK_DELEGCUR)) {
2384		    /*
2385		     * If another client has the file open, the only
2386		     * delegation that can be issued is a Read delegation
2387		     * and only if it is a Read open with Deny none.
2388		     */
2389		    if (clp != stp->ls_clp) {
2390			if ((stp->ls_flags & NFSLCK_SHAREBITS) ==
2391			    NFSLCK_READACCESS)
2392			    writedeleg = 0;
2393			else
2394			    delegate = 0;
2395		    }
2396		    if(((new_stp->ls_flags & NFSLCK_ACCESSBITS) &
2397		        ((stp->ls_flags>>NFSLCK_SHIFT) & NFSLCK_ACCESSBITS))||
2398		       ((stp->ls_flags & NFSLCK_ACCESSBITS) &
2399		        ((new_stp->ls_flags>>NFSLCK_SHIFT)&NFSLCK_ACCESSBITS))){
2400			ret = nfsrv_clientconflict(stp->ls_clp,&haslock,vp,p);
2401			if (ret == 1) {
2402				/*
2403				 * nfsrv_clientconflict() unlocks state
2404				 * when it returns non-zero.
2405				 */
2406				free((caddr_t)new_open, M_NFSDSTATE);
2407				free((caddr_t)new_deleg, M_NFSDSTATE);
2408				openstp = NULL;
2409				goto tryagain;
2410			}
2411			if (ret == 2)
2412				error = NFSERR_PERM;
2413			else if (new_stp->ls_flags & NFSLCK_RECLAIM)
2414				error = NFSERR_RECLAIMCONFLICT;
2415			else
2416				error = NFSERR_SHAREDENIED;
2417			if (ret == 0)
2418				NFSUNLOCKSTATE();
2419			if (haslock) {
2420				NFSLOCKV4ROOTMUTEX();
2421				nfsv4_unlock(&nfsv4rootfs_lock, 1);
2422				NFSUNLOCKV4ROOTMUTEX();
2423			}
2424			free((caddr_t)new_open, M_NFSDSTATE);
2425			free((caddr_t)new_deleg, M_NFSDSTATE);
2426			printf("nfsd openctrl unexpected client cnfl\n");
2427			goto out;
2428		    }
2429		}
2430	}
2431
2432	/*
2433	 * Check for a conflicting delegation. If one is found, call
2434	 * nfsrv_delegconflict() to handle it. If the v4root lock hasn't
2435	 * been set yet, it will get the lock. Otherwise, it will recall
2436	 * the delegation. Then, we try try again...
2437	 * (If NFSLCK_DELEGCUR is set, it has a delegation, so there
2438	 *  isn't a conflict.)
2439	 * I currently believe the conflict algorithm to be:
2440	 * For Open with Read Access and Deny None
2441	 * - there is a conflict iff a different client has a write delegation
2442	 * For Open with other Write Access or any Deny except None
2443	 * - there is a conflict if a different client has any delegation
2444	 * - there is a conflict if the same client has a read delegation
2445	 *   (The current concensus is that this last case should be
2446	 *    considered a conflict since the client with a read delegation
2447	 *    could have done an Open with ReadAccess and WriteDeny
2448	 *    locally and then not have checked for the WriteDeny.)
2449	 */
2450	if (!(new_stp->ls_flags & (NFSLCK_DELEGPREV | NFSLCK_DELEGCUR))) {
2451	    stp = LIST_FIRST(&lfp->lf_deleg);
2452	    while (stp != LIST_END(&lfp->lf_deleg)) {
2453		nstp = LIST_NEXT(stp, ls_file);
2454		if (stp->ls_clp != clp && (stp->ls_flags & NFSLCK_DELEGREAD))
2455			writedeleg = 0;
2456		else
2457			delegate = 0;
2458		if ((readonly && stp->ls_clp != clp &&
2459		       (stp->ls_flags & NFSLCK_DELEGWRITE)) ||
2460		    (!readonly && (stp->ls_clp != clp ||
2461		         (stp->ls_flags & NFSLCK_DELEGREAD)))) {
2462		    if (new_stp->ls_flags & NFSLCK_RECLAIM) {
2463			delegate = 2;
2464		    } else {
2465			ret = nfsrv_delegconflict(stp, &haslock, p, vp);
2466			if (ret) {
2467			    /*
2468			     * nfsrv_delegconflict() unlocks state
2469			     * when it returns non-zero.
2470			     */
2471			    printf("Nfsd openctrl unexpected deleg cnfl\n");
2472			    free((caddr_t)new_open, M_NFSDSTATE);
2473			    free((caddr_t)new_deleg, M_NFSDSTATE);
2474			    if (ret == -1) {
2475				openstp = NULL;
2476				goto tryagain;
2477			    }
2478			    error = ret;
2479			    goto out;
2480			}
2481		    }
2482		}
2483		stp = nstp;
2484	    }
2485	}
2486
2487	/*
2488	 * We only get here if there was no open that conflicted.
2489	 * If an open for the owner exists, or in the access/deny bits.
2490	 * Otherwise it is a new open. If the open_owner hasn't been
2491	 * confirmed, replace the open with the new one needing confirmation,
2492	 * otherwise add the open.
2493	 */
2494	if (new_stp->ls_flags & NFSLCK_DELEGPREV) {
2495	    /*
2496	     * Handle NFSLCK_DELEGPREV by searching the old delegations for
2497	     * a match. If found, just move the old delegation to the current
2498	     * delegation list and issue open. If not found, return
2499	     * NFSERR_EXPIRED.
2500	     */
2501	    LIST_FOREACH(stp, &clp->lc_olddeleg, ls_list) {
2502		if (stp->ls_lfp == lfp) {
2503		    /* Found it */
2504		    if (stp->ls_clp != clp)
2505			panic("olddeleg clp");
2506		    LIST_REMOVE(stp, ls_list);
2507		    LIST_REMOVE(stp, ls_hash);
2508		    stp->ls_flags &= ~NFSLCK_OLDDELEG;
2509		    stp->ls_stateid.seqid = delegstateidp->seqid = 0;
2510		    stp->ls_stateid.other[0] = delegstateidp->other[0] =
2511			clp->lc_clientid.lval[0];
2512		    stp->ls_stateid.other[1] = delegstateidp->other[1] =
2513			clp->lc_clientid.lval[1];
2514		    stp->ls_stateid.other[2] = delegstateidp->other[2] =
2515			nfsrv_nextstateindex(clp);
2516		    stp->ls_compref = nd->nd_compref;
2517		    LIST_INSERT_HEAD(&clp->lc_deleg, stp, ls_list);
2518		    LIST_INSERT_HEAD(NFSSTATEHASH(clp,
2519			stp->ls_stateid), stp, ls_hash);
2520		    if (stp->ls_flags & NFSLCK_DELEGWRITE)
2521			*rflagsp |= NFSV4OPEN_WRITEDELEGATE;
2522		    else
2523			*rflagsp |= NFSV4OPEN_READDELEGATE;
2524		    clp->lc_delegtime = NFSD_MONOSEC +
2525			nfsrv_lease + NFSRV_LEASEDELTA;
2526
2527		    /*
2528		     * Now, do the associated open.
2529		     */
2530		    new_open->ls_stateid.seqid = 0;
2531		    new_open->ls_stateid.other[0] = clp->lc_clientid.lval[0];
2532		    new_open->ls_stateid.other[1] = clp->lc_clientid.lval[1];
2533		    new_open->ls_stateid.other[2] = nfsrv_nextstateindex(clp);
2534		    new_open->ls_flags = (new_stp->ls_flags&NFSLCK_DENYBITS)|
2535			NFSLCK_OPEN;
2536		    if (stp->ls_flags & NFSLCK_DELEGWRITE)
2537			new_open->ls_flags |= (NFSLCK_READACCESS |
2538			    NFSLCK_WRITEACCESS);
2539		    else
2540			new_open->ls_flags |= NFSLCK_READACCESS;
2541		    new_open->ls_uid = new_stp->ls_uid;
2542		    new_open->ls_lfp = lfp;
2543		    new_open->ls_clp = clp;
2544		    LIST_INIT(&new_open->ls_open);
2545		    LIST_INSERT_HEAD(&lfp->lf_open, new_open, ls_file);
2546		    LIST_INSERT_HEAD(NFSSTATEHASH(clp, new_open->ls_stateid),
2547			new_open, ls_hash);
2548		    /*
2549		     * and handle the open owner
2550		     */
2551		    if (ownerstp) {
2552			new_open->ls_openowner = ownerstp;
2553			LIST_INSERT_HEAD(&ownerstp->ls_open,new_open,ls_list);
2554		    } else {
2555			new_open->ls_openowner = new_stp;
2556			new_stp->ls_flags = 0;
2557			nfsrvd_refcache(new_stp->ls_op);
2558			new_stp->ls_noopens = 0;
2559			LIST_INIT(&new_stp->ls_open);
2560			LIST_INSERT_HEAD(&new_stp->ls_open, new_open, ls_list);
2561			LIST_INSERT_HEAD(&clp->lc_open, new_stp, ls_list);
2562			*new_stpp = NULL;
2563			newnfsstats.srvopenowners++;
2564			nfsrv_openpluslock++;
2565		    }
2566		    openstp = new_open;
2567		    new_open = NULL;
2568		    newnfsstats.srvopens++;
2569		    nfsrv_openpluslock++;
2570		    break;
2571		}
2572	    }
2573	    if (stp == LIST_END(&clp->lc_olddeleg))
2574		error = NFSERR_EXPIRED;
2575	} else if (new_stp->ls_flags & (NFSLCK_DELEGREAD | NFSLCK_DELEGWRITE)) {
2576	    /*
2577	     * Scan to see that no delegation for this client and file
2578	     * doesn't already exist.
2579	     * There also shouldn't yet be an Open for this file and
2580	     * openowner.
2581	     */
2582	    LIST_FOREACH(stp, &lfp->lf_deleg, ls_file) {
2583		if (stp->ls_clp == clp)
2584		    break;
2585	    }
2586	    if (stp == LIST_END(&lfp->lf_deleg) && openstp == NULL) {
2587		/*
2588		 * This is the Claim_Previous case with a delegation
2589		 * type != Delegate_None.
2590		 */
2591		/*
2592		 * First, add the delegation. (Although we must issue the
2593		 * delegation, we can also ask for an immediate return.)
2594		 */
2595		new_deleg->ls_stateid.seqid = delegstateidp->seqid = 0;
2596		new_deleg->ls_stateid.other[0] = delegstateidp->other[0] =
2597		    clp->lc_clientid.lval[0];
2598		new_deleg->ls_stateid.other[1] = delegstateidp->other[1] =
2599		    clp->lc_clientid.lval[1];
2600		new_deleg->ls_stateid.other[2] = delegstateidp->other[2] =
2601		    nfsrv_nextstateindex(clp);
2602		if (new_stp->ls_flags & NFSLCK_DELEGWRITE) {
2603		    new_deleg->ls_flags = (NFSLCK_DELEGWRITE |
2604			NFSLCK_READACCESS | NFSLCK_WRITEACCESS);
2605		    *rflagsp |= NFSV4OPEN_WRITEDELEGATE;
2606		} else {
2607		    new_deleg->ls_flags = (NFSLCK_DELEGREAD |
2608			NFSLCK_READACCESS);
2609		    *rflagsp |= NFSV4OPEN_READDELEGATE;
2610		}
2611		new_deleg->ls_uid = new_stp->ls_uid;
2612		new_deleg->ls_lfp = lfp;
2613		new_deleg->ls_clp = clp;
2614		new_deleg->ls_filerev = filerev;
2615		new_deleg->ls_compref = nd->nd_compref;
2616		LIST_INSERT_HEAD(&lfp->lf_deleg, new_deleg, ls_file);
2617		LIST_INSERT_HEAD(NFSSTATEHASH(clp,
2618		    new_deleg->ls_stateid), new_deleg, ls_hash);
2619		LIST_INSERT_HEAD(&clp->lc_deleg, new_deleg, ls_list);
2620		new_deleg = NULL;
2621		if (delegate == 2 || nfsrv_issuedelegs == 0 ||
2622		    (clp->lc_flags & (LCL_CALLBACKSON | LCL_CBDOWN)) !=
2623		     LCL_CALLBACKSON ||
2624		    NFSRV_V4DELEGLIMIT(nfsrv_delegatecnt) ||
2625		    !NFSVNO_DELEGOK(vp))
2626		    *rflagsp |= NFSV4OPEN_RECALL;
2627		newnfsstats.srvdelegates++;
2628		nfsrv_openpluslock++;
2629		nfsrv_delegatecnt++;
2630
2631		/*
2632		 * Now, do the associated open.
2633		 */
2634		new_open->ls_stateid.seqid = 0;
2635		new_open->ls_stateid.other[0] = clp->lc_clientid.lval[0];
2636		new_open->ls_stateid.other[1] = clp->lc_clientid.lval[1];
2637		new_open->ls_stateid.other[2] = nfsrv_nextstateindex(clp);
2638		new_open->ls_flags = (new_stp->ls_flags & NFSLCK_DENYBITS) |
2639		    NFSLCK_OPEN;
2640		if (new_stp->ls_flags & NFSLCK_DELEGWRITE)
2641			new_open->ls_flags |= (NFSLCK_READACCESS |
2642			    NFSLCK_WRITEACCESS);
2643		else
2644			new_open->ls_flags |= NFSLCK_READACCESS;
2645		new_open->ls_uid = new_stp->ls_uid;
2646		new_open->ls_lfp = lfp;
2647		new_open->ls_clp = clp;
2648		LIST_INIT(&new_open->ls_open);
2649		LIST_INSERT_HEAD(&lfp->lf_open, new_open, ls_file);
2650		LIST_INSERT_HEAD(NFSSTATEHASH(clp, new_open->ls_stateid),
2651		   new_open, ls_hash);
2652		/*
2653		 * and handle the open owner
2654		 */
2655		if (ownerstp) {
2656		    new_open->ls_openowner = ownerstp;
2657		    LIST_INSERT_HEAD(&ownerstp->ls_open, new_open, ls_list);
2658		} else {
2659		    new_open->ls_openowner = new_stp;
2660		    new_stp->ls_flags = 0;
2661		    nfsrvd_refcache(new_stp->ls_op);
2662		    new_stp->ls_noopens = 0;
2663		    LIST_INIT(&new_stp->ls_open);
2664		    LIST_INSERT_HEAD(&new_stp->ls_open, new_open, ls_list);
2665		    LIST_INSERT_HEAD(&clp->lc_open, new_stp, ls_list);
2666		    *new_stpp = NULL;
2667		    newnfsstats.srvopenowners++;
2668		    nfsrv_openpluslock++;
2669		}
2670		openstp = new_open;
2671		new_open = NULL;
2672		newnfsstats.srvopens++;
2673		nfsrv_openpluslock++;
2674	    } else {
2675		error = NFSERR_RECLAIMCONFLICT;
2676	    }
2677	} else if (ownerstp) {
2678		if (ownerstp->ls_flags & NFSLCK_NEEDSCONFIRM) {
2679		    /* Replace the open */
2680		    if (ownerstp->ls_op)
2681			nfsrvd_derefcache(ownerstp->ls_op);
2682		    ownerstp->ls_op = new_stp->ls_op;
2683		    nfsrvd_refcache(ownerstp->ls_op);
2684		    ownerstp->ls_seq = new_stp->ls_seq;
2685		    *rflagsp |= NFSV4OPEN_RESULTCONFIRM;
2686		    stp = LIST_FIRST(&ownerstp->ls_open);
2687		    stp->ls_flags = (new_stp->ls_flags & NFSLCK_SHAREBITS) |
2688			NFSLCK_OPEN;
2689		    stp->ls_stateid.seqid = 0;
2690		    stp->ls_uid = new_stp->ls_uid;
2691		    if (lfp != stp->ls_lfp) {
2692			LIST_REMOVE(stp, ls_file);
2693			LIST_INSERT_HEAD(&lfp->lf_open, stp, ls_file);
2694			stp->ls_lfp = lfp;
2695		    }
2696		    openstp = stp;
2697		} else if (openstp) {
2698		    openstp->ls_flags |= (new_stp->ls_flags & NFSLCK_SHAREBITS);
2699		    openstp->ls_stateid.seqid++;
2700
2701		    /*
2702		     * This is where we can choose to issue a delegation.
2703		     */
2704		    if (delegate && nfsrv_issuedelegs &&
2705			writedeleg && !NFSVNO_EXRDONLY(exp) &&
2706			(nfsrv_writedelegifpos || !readonly) &&
2707			(clp->lc_flags & (LCL_CALLBACKSON | LCL_CBDOWN)) ==
2708			 LCL_CALLBACKSON &&
2709			!NFSRV_V4DELEGLIMIT(nfsrv_delegatecnt) &&
2710			NFSVNO_DELEGOK(vp)) {
2711			new_deleg->ls_stateid.seqid = delegstateidp->seqid = 0;
2712			new_deleg->ls_stateid.other[0] = delegstateidp->other[0]
2713			    = clp->lc_clientid.lval[0];
2714			new_deleg->ls_stateid.other[1] = delegstateidp->other[1]
2715			    = clp->lc_clientid.lval[1];
2716			new_deleg->ls_stateid.other[2] = delegstateidp->other[2]
2717			    = nfsrv_nextstateindex(clp);
2718			new_deleg->ls_flags = (NFSLCK_DELEGWRITE |
2719			    NFSLCK_READACCESS | NFSLCK_WRITEACCESS);
2720			*rflagsp |= NFSV4OPEN_WRITEDELEGATE;
2721			new_deleg->ls_uid = new_stp->ls_uid;
2722			new_deleg->ls_lfp = lfp;
2723			new_deleg->ls_clp = clp;
2724			new_deleg->ls_filerev = filerev;
2725			new_deleg->ls_compref = nd->nd_compref;
2726			LIST_INSERT_HEAD(&lfp->lf_deleg, new_deleg, ls_file);
2727			LIST_INSERT_HEAD(NFSSTATEHASH(clp,
2728			    new_deleg->ls_stateid), new_deleg, ls_hash);
2729			LIST_INSERT_HEAD(&clp->lc_deleg, new_deleg, ls_list);
2730			new_deleg = NULL;
2731			newnfsstats.srvdelegates++;
2732			nfsrv_openpluslock++;
2733			nfsrv_delegatecnt++;
2734		    }
2735		} else {
2736		    new_open->ls_stateid.seqid = 0;
2737		    new_open->ls_stateid.other[0] = clp->lc_clientid.lval[0];
2738		    new_open->ls_stateid.other[1] = clp->lc_clientid.lval[1];
2739		    new_open->ls_stateid.other[2] = nfsrv_nextstateindex(clp);
2740		    new_open->ls_flags = (new_stp->ls_flags & NFSLCK_SHAREBITS)|
2741			NFSLCK_OPEN;
2742		    new_open->ls_uid = new_stp->ls_uid;
2743		    new_open->ls_openowner = ownerstp;
2744		    new_open->ls_lfp = lfp;
2745		    new_open->ls_clp = clp;
2746		    LIST_INIT(&new_open->ls_open);
2747		    LIST_INSERT_HEAD(&lfp->lf_open, new_open, ls_file);
2748		    LIST_INSERT_HEAD(&ownerstp->ls_open, new_open, ls_list);
2749		    LIST_INSERT_HEAD(NFSSTATEHASH(clp, new_open->ls_stateid),
2750			new_open, ls_hash);
2751		    openstp = new_open;
2752		    new_open = NULL;
2753		    newnfsstats.srvopens++;
2754		    nfsrv_openpluslock++;
2755
2756		    /*
2757		     * This is where we can choose to issue a delegation.
2758		     */
2759		    if (delegate && nfsrv_issuedelegs &&
2760			(writedeleg || readonly) &&
2761			(clp->lc_flags & (LCL_CALLBACKSON | LCL_CBDOWN)) ==
2762			 LCL_CALLBACKSON &&
2763			!NFSRV_V4DELEGLIMIT(nfsrv_delegatecnt) &&
2764			NFSVNO_DELEGOK(vp)) {
2765			new_deleg->ls_stateid.seqid = delegstateidp->seqid = 0;
2766			new_deleg->ls_stateid.other[0] = delegstateidp->other[0]
2767			    = clp->lc_clientid.lval[0];
2768			new_deleg->ls_stateid.other[1] = delegstateidp->other[1]
2769			    = clp->lc_clientid.lval[1];
2770			new_deleg->ls_stateid.other[2] = delegstateidp->other[2]
2771			    = nfsrv_nextstateindex(clp);
2772			if (writedeleg && !NFSVNO_EXRDONLY(exp) &&
2773			    (nfsrv_writedelegifpos || !readonly)) {
2774			    new_deleg->ls_flags = (NFSLCK_DELEGWRITE |
2775				NFSLCK_READACCESS | NFSLCK_WRITEACCESS);
2776			    *rflagsp |= NFSV4OPEN_WRITEDELEGATE;
2777			} else {
2778			    new_deleg->ls_flags = (NFSLCK_DELEGREAD |
2779				NFSLCK_READACCESS);
2780			    *rflagsp |= NFSV4OPEN_READDELEGATE;
2781			}
2782			new_deleg->ls_uid = new_stp->ls_uid;
2783			new_deleg->ls_lfp = lfp;
2784			new_deleg->ls_clp = clp;
2785			new_deleg->ls_filerev = filerev;
2786			new_deleg->ls_compref = nd->nd_compref;
2787			LIST_INSERT_HEAD(&lfp->lf_deleg, new_deleg, ls_file);
2788			LIST_INSERT_HEAD(NFSSTATEHASH(clp,
2789			    new_deleg->ls_stateid), new_deleg, ls_hash);
2790			LIST_INSERT_HEAD(&clp->lc_deleg, new_deleg, ls_list);
2791			new_deleg = NULL;
2792			newnfsstats.srvdelegates++;
2793			nfsrv_openpluslock++;
2794			nfsrv_delegatecnt++;
2795		    }
2796		}
2797	} else {
2798		/*
2799		 * New owner case. Start the open_owner sequence with a
2800		 * Needs confirmation (unless a reclaim) and hang the
2801		 * new open off it.
2802		 */
2803		new_open->ls_stateid.seqid = 0;
2804		new_open->ls_stateid.other[0] = clp->lc_clientid.lval[0];
2805		new_open->ls_stateid.other[1] = clp->lc_clientid.lval[1];
2806		new_open->ls_stateid.other[2] = nfsrv_nextstateindex(clp);
2807		new_open->ls_flags = (new_stp->ls_flags & NFSLCK_SHAREBITS) |
2808		    NFSLCK_OPEN;
2809		new_open->ls_uid = new_stp->ls_uid;
2810		LIST_INIT(&new_open->ls_open);
2811		new_open->ls_openowner = new_stp;
2812		new_open->ls_lfp = lfp;
2813		new_open->ls_clp = clp;
2814		LIST_INSERT_HEAD(&lfp->lf_open, new_open, ls_file);
2815		if (new_stp->ls_flags & NFSLCK_RECLAIM) {
2816			new_stp->ls_flags = 0;
2817		} else {
2818			*rflagsp |= NFSV4OPEN_RESULTCONFIRM;
2819			new_stp->ls_flags = NFSLCK_NEEDSCONFIRM;
2820		}
2821		nfsrvd_refcache(new_stp->ls_op);
2822		new_stp->ls_noopens = 0;
2823		LIST_INIT(&new_stp->ls_open);
2824		LIST_INSERT_HEAD(&new_stp->ls_open, new_open, ls_list);
2825		LIST_INSERT_HEAD(&clp->lc_open, new_stp, ls_list);
2826		LIST_INSERT_HEAD(NFSSTATEHASH(clp, new_open->ls_stateid),
2827		    new_open, ls_hash);
2828		openstp = new_open;
2829		new_open = NULL;
2830		*new_stpp = NULL;
2831		newnfsstats.srvopens++;
2832		nfsrv_openpluslock++;
2833		newnfsstats.srvopenowners++;
2834		nfsrv_openpluslock++;
2835	}
2836	if (!error) {
2837		stateidp->seqid = openstp->ls_stateid.seqid;
2838		stateidp->other[0] = openstp->ls_stateid.other[0];
2839		stateidp->other[1] = openstp->ls_stateid.other[1];
2840		stateidp->other[2] = openstp->ls_stateid.other[2];
2841	}
2842	NFSUNLOCKSTATE();
2843	if (haslock) {
2844		NFSLOCKV4ROOTMUTEX();
2845		nfsv4_unlock(&nfsv4rootfs_lock, 1);
2846		NFSUNLOCKV4ROOTMUTEX();
2847	}
2848	if (new_open)
2849		FREE((caddr_t)new_open, M_NFSDSTATE);
2850	if (new_deleg)
2851		FREE((caddr_t)new_deleg, M_NFSDSTATE);
2852
2853out:
2854	NFSEXITCODE2(error, nd);
2855	return (error);
2856}
2857
2858/*
2859 * Open update. Does the confirm, downgrade and close.
2860 */
2861APPLESTATIC int
2862nfsrv_openupdate(vnode_t vp, struct nfsstate *new_stp, nfsquad_t clientid,
2863    nfsv4stateid_t *stateidp, struct nfsrv_descript *nd, NFSPROC_T *p)
2864{
2865	struct nfsstate *stp, *ownerstp;
2866	struct nfsclient *clp;
2867	struct nfslockfile *lfp;
2868	u_int32_t bits;
2869	int error = 0, gotstate = 0, len = 0;
2870	u_char client[NFSV4_OPAQUELIMIT];
2871
2872	/*
2873	 * Check for restart conditions (client and server).
2874	 */
2875	error = nfsrv_checkrestart(clientid, new_stp->ls_flags,
2876	    &new_stp->ls_stateid, 0);
2877	if (error)
2878		goto out;
2879
2880	NFSLOCKSTATE();
2881	/*
2882	 * Get the open structure via clientid and stateid.
2883	 */
2884	error = nfsrv_getclient(clientid, CLOPS_RENEW, &clp,
2885	    (nfsquad_t)((u_quad_t)0), NULL, p);
2886	if (!error)
2887		error = nfsrv_getstate(clp, &new_stp->ls_stateid,
2888		    new_stp->ls_flags, &stp);
2889
2890	/*
2891	 * Sanity check the open.
2892	 */
2893	if (!error && (!(stp->ls_flags & NFSLCK_OPEN) ||
2894		(!(new_stp->ls_flags & NFSLCK_CONFIRM) &&
2895		 (stp->ls_openowner->ls_flags & NFSLCK_NEEDSCONFIRM)) ||
2896		((new_stp->ls_flags & NFSLCK_CONFIRM) &&
2897		 (!(stp->ls_openowner->ls_flags & NFSLCK_NEEDSCONFIRM)))))
2898		error = NFSERR_BADSTATEID;
2899
2900	if (!error)
2901		error = nfsrv_checkseqid(nd, new_stp->ls_seq,
2902		    stp->ls_openowner, new_stp->ls_op);
2903	if (!error && stp->ls_stateid.seqid != new_stp->ls_stateid.seqid &&
2904	    !(new_stp->ls_flags & NFSLCK_CONFIRM))
2905		error = NFSERR_OLDSTATEID;
2906	if (!error && vnode_vtype(vp) != VREG) {
2907		if (vnode_vtype(vp) == VDIR)
2908			error = NFSERR_ISDIR;
2909		else
2910			error = NFSERR_INVAL;
2911	}
2912
2913	if (error) {
2914		/*
2915		 * If a client tries to confirm an Open with a bad
2916		 * seqid# and there are no byte range locks or other Opens
2917		 * on the openowner, just throw it away, so the next use of the
2918		 * openowner will start a fresh seq#.
2919		 */
2920		if (error == NFSERR_BADSEQID &&
2921		    (new_stp->ls_flags & NFSLCK_CONFIRM) &&
2922		    nfsrv_nootherstate(stp))
2923			nfsrv_freeopenowner(stp->ls_openowner, 0, p);
2924		NFSUNLOCKSTATE();
2925		goto out;
2926	}
2927
2928	/*
2929	 * Set the return stateid.
2930	 */
2931	stateidp->seqid = stp->ls_stateid.seqid + 1;
2932	stateidp->other[0] = stp->ls_stateid.other[0];
2933	stateidp->other[1] = stp->ls_stateid.other[1];
2934	stateidp->other[2] = stp->ls_stateid.other[2];
2935	/*
2936	 * Now, handle the three cases.
2937	 */
2938	if (new_stp->ls_flags & NFSLCK_CONFIRM) {
2939		/*
2940		 * If the open doesn't need confirmation, it seems to me that
2941		 * there is a client error, but I'll just log it and keep going?
2942		 */
2943		if (!(stp->ls_openowner->ls_flags & NFSLCK_NEEDSCONFIRM))
2944			printf("Nfsv4d: stray open confirm\n");
2945		stp->ls_openowner->ls_flags = 0;
2946		stp->ls_stateid.seqid++;
2947		if (!(clp->lc_flags & LCL_STAMPEDSTABLE)) {
2948			clp->lc_flags |= LCL_STAMPEDSTABLE;
2949			len = clp->lc_idlen;
2950			NFSBCOPY(clp->lc_id, client, len);
2951			gotstate = 1;
2952		}
2953		NFSUNLOCKSTATE();
2954	} else if (new_stp->ls_flags & NFSLCK_CLOSE) {
2955		ownerstp = stp->ls_openowner;
2956		lfp = stp->ls_lfp;
2957		if (nfsrv_dolocallocks != 0 && !LIST_EMPTY(&stp->ls_open)) {
2958			/* Get the lf lock */
2959			nfsrv_locklf(lfp);
2960			NFSUNLOCKSTATE();
2961			if (nfsrv_freeopen(stp, vp, 1, p) == 0) {
2962				NFSLOCKSTATE();
2963				nfsrv_unlocklf(lfp);
2964				NFSUNLOCKSTATE();
2965			}
2966		} else {
2967			(void) nfsrv_freeopen(stp, NULL, 0, p);
2968			NFSUNLOCKSTATE();
2969		}
2970	} else {
2971		/*
2972		 * Update the share bits, making sure that the new set are a
2973		 * subset of the old ones.
2974		 */
2975		bits = (new_stp->ls_flags & NFSLCK_SHAREBITS);
2976		if (~(stp->ls_flags) & bits) {
2977			NFSUNLOCKSTATE();
2978			error = NFSERR_INVAL;
2979			goto out;
2980		}
2981		stp->ls_flags = (bits | NFSLCK_OPEN);
2982		stp->ls_stateid.seqid++;
2983		NFSUNLOCKSTATE();
2984	}
2985
2986	/*
2987	 * If the client just confirmed its first open, write a timestamp
2988	 * to the stable storage file.
2989	 */
2990	if (gotstate != 0) {
2991		nfsrv_writestable(client, len, NFSNST_NEWSTATE, p);
2992		nfsrv_backupstable();
2993	}
2994
2995out:
2996	NFSEXITCODE2(error, nd);
2997	return (error);
2998}
2999
3000/*
3001 * Delegation update. Does the purge and return.
3002 */
3003APPLESTATIC int
3004nfsrv_delegupdate(nfsquad_t clientid, nfsv4stateid_t *stateidp,
3005    vnode_t vp, int op, struct ucred *cred, NFSPROC_T *p)
3006{
3007	struct nfsstate *stp;
3008	struct nfsclient *clp;
3009	int error = 0;
3010	fhandle_t fh;
3011
3012	/*
3013	 * Do a sanity check against the file handle for DelegReturn.
3014	 */
3015	if (vp) {
3016		error = nfsvno_getfh(vp, &fh, p);
3017		if (error)
3018			goto out;
3019	}
3020	/*
3021	 * Check for restart conditions (client and server).
3022	 */
3023	if (op == NFSV4OP_DELEGRETURN)
3024		error = nfsrv_checkrestart(clientid, NFSLCK_DELEGRETURN,
3025			stateidp, 0);
3026	else
3027		error = nfsrv_checkrestart(clientid, NFSLCK_DELEGPURGE,
3028			stateidp, 0);
3029
3030	NFSLOCKSTATE();
3031	/*
3032	 * Get the open structure via clientid and stateid.
3033	 */
3034	if (!error)
3035	    error = nfsrv_getclient(clientid, CLOPS_RENEW, &clp,
3036		(nfsquad_t)((u_quad_t)0), NULL, p);
3037	if (error) {
3038		if (error == NFSERR_CBPATHDOWN)
3039			error = 0;
3040		if (error == NFSERR_STALECLIENTID && op == NFSV4OP_DELEGRETURN)
3041			error = NFSERR_STALESTATEID;
3042	}
3043	if (!error && op == NFSV4OP_DELEGRETURN) {
3044	    error = nfsrv_getstate(clp, stateidp, NFSLCK_DELEGRETURN, &stp);
3045	    if (!error && stp->ls_stateid.seqid != stateidp->seqid)
3046		error = NFSERR_OLDSTATEID;
3047	}
3048	/*
3049	 * NFSERR_EXPIRED means that the state has gone away,
3050	 * so Delegations have been purged. Just return ok.
3051	 */
3052	if (error == NFSERR_EXPIRED && op == NFSV4OP_DELEGPURGE) {
3053		NFSUNLOCKSTATE();
3054		error = 0;
3055		goto out;
3056	}
3057	if (error) {
3058		NFSUNLOCKSTATE();
3059		goto out;
3060	}
3061
3062	if (op == NFSV4OP_DELEGRETURN) {
3063		if (NFSBCMP((caddr_t)&fh, (caddr_t)&stp->ls_lfp->lf_fh,
3064		    sizeof (fhandle_t))) {
3065			NFSUNLOCKSTATE();
3066			error = NFSERR_BADSTATEID;
3067			goto out;
3068		}
3069		nfsrv_freedeleg(stp);
3070	} else {
3071		nfsrv_freedeleglist(&clp->lc_olddeleg);
3072	}
3073	NFSUNLOCKSTATE();
3074	error = 0;
3075
3076out:
3077	NFSEXITCODE(error);
3078	return (error);
3079}
3080
3081/*
3082 * Release lock owner.
3083 */
3084APPLESTATIC int
3085nfsrv_releaselckown(struct nfsstate *new_stp, nfsquad_t clientid,
3086    NFSPROC_T *p)
3087{
3088	struct nfsstate *stp, *nstp, *openstp, *ownstp;
3089	struct nfsclient *clp;
3090	int error = 0;
3091
3092	/*
3093	 * Check for restart conditions (client and server).
3094	 */
3095	error = nfsrv_checkrestart(clientid, new_stp->ls_flags,
3096	    &new_stp->ls_stateid, 0);
3097	if (error)
3098		goto out;
3099
3100	NFSLOCKSTATE();
3101	/*
3102	 * Get the lock owner by name.
3103	 */
3104	error = nfsrv_getclient(clientid, CLOPS_RENEW, &clp,
3105	    (nfsquad_t)((u_quad_t)0), NULL, p);
3106	if (error) {
3107		NFSUNLOCKSTATE();
3108		goto out;
3109	}
3110	LIST_FOREACH(ownstp, &clp->lc_open, ls_list) {
3111	    LIST_FOREACH(openstp, &ownstp->ls_open, ls_list) {
3112		stp = LIST_FIRST(&openstp->ls_open);
3113		while (stp != LIST_END(&openstp->ls_open)) {
3114		    nstp = LIST_NEXT(stp, ls_list);
3115		    /*
3116		     * If the owner matches, check for locks and
3117		     * then free or return an error.
3118		     */
3119		    if (stp->ls_ownerlen == new_stp->ls_ownerlen &&
3120			!NFSBCMP(stp->ls_owner, new_stp->ls_owner,
3121			 stp->ls_ownerlen)){
3122			if (LIST_EMPTY(&stp->ls_lock)) {
3123			    nfsrv_freelockowner(stp, NULL, 0, p);
3124			} else {
3125			    NFSUNLOCKSTATE();
3126			    error = NFSERR_LOCKSHELD;
3127			    goto out;
3128			}
3129		    }
3130		    stp = nstp;
3131		}
3132	    }
3133	}
3134	NFSUNLOCKSTATE();
3135
3136out:
3137	NFSEXITCODE(error);
3138	return (error);
3139}
3140
3141/*
3142 * Get the file handle for a lock structure.
3143 */
3144static int
3145nfsrv_getlockfh(vnode_t vp, u_short flags, struct nfslockfile *new_lfp,
3146    fhandle_t *nfhp, NFSPROC_T *p)
3147{
3148	fhandle_t *fhp = NULL;
3149	int error;
3150
3151	/*
3152	 * For lock, use the new nfslock structure, otherwise just
3153	 * a fhandle_t on the stack.
3154	 */
3155	if (flags & NFSLCK_OPEN) {
3156		KASSERT(new_lfp != NULL, ("nfsrv_getlockfh: new_lfp NULL"));
3157		fhp = &new_lfp->lf_fh;
3158	} else if (nfhp) {
3159		fhp = nfhp;
3160	} else {
3161		panic("nfsrv_getlockfh");
3162	}
3163	error = nfsvno_getfh(vp, fhp, p);
3164	NFSEXITCODE(error);
3165	return (error);
3166}
3167
3168/*
3169 * Get an nfs lock structure. Allocate one, as required, and return a
3170 * pointer to it.
3171 * Returns an NFSERR_xxx upon failure or -1 to indicate no current lock.
3172 */
3173static int
3174nfsrv_getlockfile(u_short flags, struct nfslockfile **new_lfpp,
3175    struct nfslockfile **lfpp, fhandle_t *nfhp, int lockit)
3176{
3177	struct nfslockfile *lfp;
3178	fhandle_t *fhp = NULL, *tfhp;
3179	struct nfslockhashhead *hp;
3180	struct nfslockfile *new_lfp = NULL;
3181
3182	/*
3183	 * For lock, use the new nfslock structure, otherwise just
3184	 * a fhandle_t on the stack.
3185	 */
3186	if (flags & NFSLCK_OPEN) {
3187		new_lfp = *new_lfpp;
3188		fhp = &new_lfp->lf_fh;
3189	} else if (nfhp) {
3190		fhp = nfhp;
3191	} else {
3192		panic("nfsrv_getlockfile");
3193	}
3194
3195	hp = NFSLOCKHASH(fhp);
3196	LIST_FOREACH(lfp, hp, lf_hash) {
3197		tfhp = &lfp->lf_fh;
3198		if (NFSVNO_CMPFH(fhp, tfhp)) {
3199			if (lockit)
3200				nfsrv_locklf(lfp);
3201			*lfpp = lfp;
3202			return (0);
3203		}
3204	}
3205	if (!(flags & NFSLCK_OPEN))
3206		return (-1);
3207
3208	/*
3209	 * No match, so chain the new one into the list.
3210	 */
3211	LIST_INIT(&new_lfp->lf_open);
3212	LIST_INIT(&new_lfp->lf_lock);
3213	LIST_INIT(&new_lfp->lf_deleg);
3214	LIST_INIT(&new_lfp->lf_locallock);
3215	LIST_INIT(&new_lfp->lf_rollback);
3216	new_lfp->lf_locallock_lck.nfslock_usecnt = 0;
3217	new_lfp->lf_locallock_lck.nfslock_lock = 0;
3218	new_lfp->lf_usecount = 0;
3219	LIST_INSERT_HEAD(hp, new_lfp, lf_hash);
3220	*lfpp = new_lfp;
3221	*new_lfpp = NULL;
3222	return (0);
3223}
3224
3225/*
3226 * This function adds a nfslock lock structure to the list for the associated
3227 * nfsstate and nfslockfile structures. It will be inserted after the
3228 * entry pointed at by insert_lop.
3229 */
3230static void
3231nfsrv_insertlock(struct nfslock *new_lop, struct nfslock *insert_lop,
3232    struct nfsstate *stp, struct nfslockfile *lfp)
3233{
3234	struct nfslock *lop, *nlop;
3235
3236	new_lop->lo_stp = stp;
3237	new_lop->lo_lfp = lfp;
3238
3239	if (stp != NULL) {
3240		/* Insert in increasing lo_first order */
3241		lop = LIST_FIRST(&lfp->lf_lock);
3242		if (lop == LIST_END(&lfp->lf_lock) ||
3243		    new_lop->lo_first <= lop->lo_first) {
3244			LIST_INSERT_HEAD(&lfp->lf_lock, new_lop, lo_lckfile);
3245		} else {
3246			nlop = LIST_NEXT(lop, lo_lckfile);
3247			while (nlop != LIST_END(&lfp->lf_lock) &&
3248			       nlop->lo_first < new_lop->lo_first) {
3249				lop = nlop;
3250				nlop = LIST_NEXT(lop, lo_lckfile);
3251			}
3252			LIST_INSERT_AFTER(lop, new_lop, lo_lckfile);
3253		}
3254	} else {
3255		new_lop->lo_lckfile.le_prev = NULL;	/* list not used */
3256	}
3257
3258	/*
3259	 * Insert after insert_lop, which is overloaded as stp or lfp for
3260	 * an empty list.
3261	 */
3262	if (stp == NULL && (struct nfslockfile *)insert_lop == lfp)
3263		LIST_INSERT_HEAD(&lfp->lf_locallock, new_lop, lo_lckowner);
3264	else if ((struct nfsstate *)insert_lop == stp)
3265		LIST_INSERT_HEAD(&stp->ls_lock, new_lop, lo_lckowner);
3266	else
3267		LIST_INSERT_AFTER(insert_lop, new_lop, lo_lckowner);
3268	if (stp != NULL) {
3269		newnfsstats.srvlocks++;
3270		nfsrv_openpluslock++;
3271	}
3272}
3273
3274/*
3275 * This function updates the locking for a lock owner and given file. It
3276 * maintains a list of lock ranges ordered on increasing file offset that
3277 * are NFSLCK_READ or NFSLCK_WRITE and non-overlapping (aka POSIX style).
3278 * It always adds new_lop to the list and sometimes uses the one pointed
3279 * at by other_lopp.
3280 */
3281static void
3282nfsrv_updatelock(struct nfsstate *stp, struct nfslock **new_lopp,
3283    struct nfslock **other_lopp, struct nfslockfile *lfp)
3284{
3285	struct nfslock *new_lop = *new_lopp;
3286	struct nfslock *lop, *tlop, *ilop;
3287	struct nfslock *other_lop = *other_lopp;
3288	int unlock = 0, myfile = 0;
3289	u_int64_t tmp;
3290
3291	/*
3292	 * Work down the list until the lock is merged.
3293	 */
3294	if (new_lop->lo_flags & NFSLCK_UNLOCK)
3295		unlock = 1;
3296	if (stp != NULL) {
3297		ilop = (struct nfslock *)stp;
3298		lop = LIST_FIRST(&stp->ls_lock);
3299	} else {
3300		ilop = (struct nfslock *)lfp;
3301		lop = LIST_FIRST(&lfp->lf_locallock);
3302	}
3303	while (lop != NULL) {
3304	    /*
3305	     * Only check locks for this file that aren't before the start of
3306	     * new lock's range.
3307	     */
3308	    if (lop->lo_lfp == lfp) {
3309	      myfile = 1;
3310	      if (lop->lo_end >= new_lop->lo_first) {
3311		if (new_lop->lo_end < lop->lo_first) {
3312			/*
3313			 * If the new lock ends before the start of the
3314			 * current lock's range, no merge, just insert
3315			 * the new lock.
3316			 */
3317			break;
3318		}
3319		if (new_lop->lo_flags == lop->lo_flags ||
3320		    (new_lop->lo_first <= lop->lo_first &&
3321		     new_lop->lo_end >= lop->lo_end)) {
3322			/*
3323			 * This lock can be absorbed by the new lock/unlock.
3324			 * This happens when it covers the entire range
3325			 * of the old lock or is contiguous
3326			 * with the old lock and is of the same type or an
3327			 * unlock.
3328			 */
3329			if (lop->lo_first < new_lop->lo_first)
3330				new_lop->lo_first = lop->lo_first;
3331			if (lop->lo_end > new_lop->lo_end)
3332				new_lop->lo_end = lop->lo_end;
3333			tlop = lop;
3334			lop = LIST_NEXT(lop, lo_lckowner);
3335			nfsrv_freenfslock(tlop);
3336			continue;
3337		}
3338
3339		/*
3340		 * All these cases are for contiguous locks that are not the
3341		 * same type, so they can't be merged.
3342		 */
3343		if (new_lop->lo_first <= lop->lo_first) {
3344			/*
3345			 * This case is where the new lock overlaps with the
3346			 * first part of the old lock. Move the start of the
3347			 * old lock to just past the end of the new lock. The
3348			 * new lock will be inserted in front of the old, since
3349			 * ilop hasn't been updated. (We are done now.)
3350			 */
3351			lop->lo_first = new_lop->lo_end;
3352			break;
3353		}
3354		if (new_lop->lo_end >= lop->lo_end) {
3355			/*
3356			 * This case is where the new lock overlaps with the
3357			 * end of the old lock's range. Move the old lock's
3358			 * end to just before the new lock's first and insert
3359			 * the new lock after the old lock.
3360			 * Might not be done yet, since the new lock could
3361			 * overlap further locks with higher ranges.
3362			 */
3363			lop->lo_end = new_lop->lo_first;
3364			ilop = lop;
3365			lop = LIST_NEXT(lop, lo_lckowner);
3366			continue;
3367		}
3368		/*
3369		 * The final case is where the new lock's range is in the
3370		 * middle of the current lock's and splits the current lock
3371		 * up. Use *other_lopp to handle the second part of the
3372		 * split old lock range. (We are done now.)
3373		 * For unlock, we use new_lop as other_lop and tmp, since
3374		 * other_lop and new_lop are the same for this case.
3375		 * We noted the unlock case above, so we don't need
3376		 * new_lop->lo_flags any longer.
3377		 */
3378		tmp = new_lop->lo_first;
3379		if (other_lop == NULL) {
3380			if (!unlock)
3381				panic("nfsd srv update unlock");
3382			other_lop = new_lop;
3383			*new_lopp = NULL;
3384		}
3385		other_lop->lo_first = new_lop->lo_end;
3386		other_lop->lo_end = lop->lo_end;
3387		other_lop->lo_flags = lop->lo_flags;
3388		other_lop->lo_stp = stp;
3389		other_lop->lo_lfp = lfp;
3390		lop->lo_end = tmp;
3391		nfsrv_insertlock(other_lop, lop, stp, lfp);
3392		*other_lopp = NULL;
3393		ilop = lop;
3394		break;
3395	      }
3396	    }
3397	    ilop = lop;
3398	    lop = LIST_NEXT(lop, lo_lckowner);
3399	    if (myfile && (lop == NULL || lop->lo_lfp != lfp))
3400		break;
3401	}
3402
3403	/*
3404	 * Insert the new lock in the list at the appropriate place.
3405	 */
3406	if (!unlock) {
3407		nfsrv_insertlock(new_lop, ilop, stp, lfp);
3408		*new_lopp = NULL;
3409	}
3410}
3411
3412/*
3413 * This function handles sequencing of locks, etc.
3414 * It returns an error that indicates what the caller should do.
3415 */
3416static int
3417nfsrv_checkseqid(struct nfsrv_descript *nd, u_int32_t seqid,
3418    struct nfsstate *stp, struct nfsrvcache *op)
3419{
3420	int error = 0;
3421
3422	if (op != nd->nd_rp)
3423		panic("nfsrvstate checkseqid");
3424	if (!(op->rc_flag & RC_INPROG))
3425		panic("nfsrvstate not inprog");
3426	if (stp->ls_op && stp->ls_op->rc_refcnt <= 0) {
3427		printf("refcnt=%d\n", stp->ls_op->rc_refcnt);
3428		panic("nfsrvstate op refcnt");
3429	}
3430	if ((stp->ls_seq + 1) == seqid) {
3431		if (stp->ls_op)
3432			nfsrvd_derefcache(stp->ls_op);
3433		stp->ls_op = op;
3434		nfsrvd_refcache(op);
3435		stp->ls_seq = seqid;
3436		goto out;
3437	} else if (stp->ls_seq == seqid && stp->ls_op &&
3438		op->rc_xid == stp->ls_op->rc_xid &&
3439		op->rc_refcnt == 0 &&
3440		op->rc_reqlen == stp->ls_op->rc_reqlen &&
3441		op->rc_cksum == stp->ls_op->rc_cksum) {
3442		if (stp->ls_op->rc_flag & RC_INPROG) {
3443			error = NFSERR_DONTREPLY;
3444			goto out;
3445		}
3446		nd->nd_rp = stp->ls_op;
3447		nd->nd_rp->rc_flag |= RC_INPROG;
3448		nfsrvd_delcache(op);
3449		error = NFSERR_REPLYFROMCACHE;
3450		goto out;
3451	}
3452	error = NFSERR_BADSEQID;
3453
3454out:
3455	NFSEXITCODE2(error, nd);
3456	return (error);
3457}
3458
3459/*
3460 * Get the client ip address for callbacks. If the strings can't be parsed,
3461 * just set lc_program to 0 to indicate no callbacks are possible.
3462 * (For cases where the address can't be parsed or is 0.0.0.0.0.0, set
3463 *  the address to the client's transport address. This won't be used
3464 *  for callbacks, but can be printed out by newnfsstats for info.)
3465 * Return error if the xdr can't be parsed, 0 otherwise.
3466 */
3467APPLESTATIC int
3468nfsrv_getclientipaddr(struct nfsrv_descript *nd, struct nfsclient *clp)
3469{
3470	u_int32_t *tl;
3471	u_char *cp, *cp2;
3472	int i, j;
3473	struct sockaddr_in *rad, *sad;
3474	u_char protocol[5], addr[24];
3475	int error = 0, cantparse = 0;
3476	union {
3477		u_long ival;
3478		u_char cval[4];
3479	} ip;
3480	union {
3481		u_short sval;
3482		u_char cval[2];
3483	} port;
3484
3485	rad = NFSSOCKADDR(clp->lc_req.nr_nam, struct sockaddr_in *);
3486	rad->sin_family = AF_INET;
3487	rad->sin_len = sizeof (struct sockaddr_in);
3488	rad->sin_addr.s_addr = 0;
3489	rad->sin_port = 0;
3490	clp->lc_req.nr_client = NULL;
3491	clp->lc_req.nr_lock = 0;
3492	NFSM_DISSECT(tl, u_int32_t *, NFSX_UNSIGNED);
3493	i = fxdr_unsigned(int, *tl);
3494	if (i >= 3 && i <= 4) {
3495		error = nfsrv_mtostr(nd, protocol, i);
3496		if (error)
3497			goto nfsmout;
3498		if (!strcmp(protocol, "tcp")) {
3499			clp->lc_flags |= LCL_TCPCALLBACK;
3500			clp->lc_req.nr_sotype = SOCK_STREAM;
3501			clp->lc_req.nr_soproto = IPPROTO_TCP;
3502		} else if (!strcmp(protocol, "udp")) {
3503			clp->lc_req.nr_sotype = SOCK_DGRAM;
3504			clp->lc_req.nr_soproto = IPPROTO_UDP;
3505		} else {
3506			cantparse = 1;
3507		}
3508	} else {
3509		cantparse = 1;
3510		if (i > 0) {
3511			error = nfsm_advance(nd, NFSM_RNDUP(i), -1);
3512			if (error)
3513				goto nfsmout;
3514		}
3515	}
3516	NFSM_DISSECT(tl, u_int32_t *, NFSX_UNSIGNED);
3517	i = fxdr_unsigned(int, *tl);
3518	if (i < 0) {
3519		error = NFSERR_BADXDR;
3520		goto nfsmout;
3521	} else if (i == 0) {
3522		cantparse = 1;
3523	} else if (!cantparse && i <= 23 && i >= 11) {
3524		error = nfsrv_mtostr(nd, addr, i);
3525		if (error)
3526			goto nfsmout;
3527
3528		/*
3529		 * Parse out the address fields. We expect 6 decimal numbers
3530		 * separated by '.'s.
3531		 */
3532		cp = addr;
3533		i = 0;
3534		while (*cp && i < 6) {
3535			cp2 = cp;
3536			while (*cp2 && *cp2 != '.')
3537				cp2++;
3538			if (*cp2)
3539				*cp2++ = '\0';
3540			else if (i != 5) {
3541				cantparse = 1;
3542				break;
3543			}
3544			j = nfsrv_getipnumber(cp);
3545			if (j >= 0) {
3546				if (i < 4)
3547					ip.cval[3 - i] = j;
3548				else
3549					port.cval[5 - i] = j;
3550			} else {
3551				cantparse = 1;
3552				break;
3553			}
3554			cp = cp2;
3555			i++;
3556		}
3557		if (!cantparse) {
3558			if (ip.ival != 0x0) {
3559				rad->sin_addr.s_addr = htonl(ip.ival);
3560				rad->sin_port = htons(port.sval);
3561			} else {
3562				cantparse = 1;
3563			}
3564		}
3565	} else {
3566		cantparse = 1;
3567		if (i > 0) {
3568			error = nfsm_advance(nd, NFSM_RNDUP(i), -1);
3569			if (error)
3570				goto nfsmout;
3571		}
3572	}
3573	if (cantparse) {
3574		sad = NFSSOCKADDR(nd->nd_nam, struct sockaddr_in *);
3575		rad->sin_addr.s_addr = sad->sin_addr.s_addr;
3576		rad->sin_port = 0x0;
3577		clp->lc_program = 0;
3578	}
3579nfsmout:
3580	NFSEXITCODE2(error, nd);
3581	return (error);
3582}
3583
3584/*
3585 * Turn a string of up to three decimal digits into a number. Return -1 upon
3586 * error.
3587 */
3588static int
3589nfsrv_getipnumber(u_char *cp)
3590{
3591	int i = 0, j = 0;
3592
3593	while (*cp) {
3594		if (j > 2 || *cp < '0' || *cp > '9')
3595			return (-1);
3596		i *= 10;
3597		i += (*cp - '0');
3598		cp++;
3599		j++;
3600	}
3601	if (i < 256)
3602		return (i);
3603	return (-1);
3604}
3605
3606/*
3607 * This function checks for restart conditions.
3608 */
3609static int
3610nfsrv_checkrestart(nfsquad_t clientid, u_int32_t flags,
3611    nfsv4stateid_t *stateidp, int specialid)
3612{
3613	int ret = 0;
3614
3615	/*
3616	 * First check for a server restart. Open, LockT, ReleaseLockOwner
3617	 * and DelegPurge have a clientid, the rest a stateid.
3618	 */
3619	if (flags &
3620	    (NFSLCK_OPEN | NFSLCK_TEST | NFSLCK_RELEASE | NFSLCK_DELEGPURGE)) {
3621		if (clientid.lval[0] != nfsrvboottime) {
3622			ret = NFSERR_STALECLIENTID;
3623			goto out;
3624		}
3625	} else if (stateidp->other[0] != nfsrvboottime &&
3626		specialid == 0) {
3627		ret = NFSERR_STALESTATEID;
3628		goto out;
3629	}
3630
3631	/*
3632	 * Read, Write, Setattr and LockT can return NFSERR_GRACE and do
3633	 * not use a lock/open owner seqid#, so the check can be done now.
3634	 * (The others will be checked, as required, later.)
3635	 */
3636	if (!(flags & (NFSLCK_CHECK | NFSLCK_TEST)))
3637		goto out;
3638
3639	NFSLOCKSTATE();
3640	ret = nfsrv_checkgrace(flags);
3641	NFSUNLOCKSTATE();
3642
3643out:
3644	NFSEXITCODE(ret);
3645	return (ret);
3646}
3647
3648/*
3649 * Check for grace.
3650 */
3651static int
3652nfsrv_checkgrace(u_int32_t flags)
3653{
3654	int error = 0;
3655
3656	if (nfsrv_stablefirst.nsf_flags & NFSNSF_GRACEOVER) {
3657		if (flags & NFSLCK_RECLAIM) {
3658			error = NFSERR_NOGRACE;
3659			goto out;
3660		}
3661	} else {
3662		if (!(flags & NFSLCK_RECLAIM)) {
3663			error = NFSERR_GRACE;
3664			goto out;
3665		}
3666
3667		/*
3668		 * If grace is almost over and we are still getting Reclaims,
3669		 * extend grace a bit.
3670		 */
3671		if ((NFSD_MONOSEC + NFSRV_LEASEDELTA) >
3672		    nfsrv_stablefirst.nsf_eograce)
3673			nfsrv_stablefirst.nsf_eograce = NFSD_MONOSEC +
3674				NFSRV_LEASEDELTA;
3675	}
3676
3677out:
3678	NFSEXITCODE(error);
3679	return (error);
3680}
3681
3682/*
3683 * Do a server callback.
3684 */
3685static int
3686nfsrv_docallback(struct nfsclient *clp, int procnum,
3687    nfsv4stateid_t *stateidp, int trunc, fhandle_t *fhp,
3688    struct nfsvattr *nap, nfsattrbit_t *attrbitp, NFSPROC_T *p)
3689{
3690	mbuf_t m;
3691	u_int32_t *tl;
3692	struct nfsrv_descript nfsd, *nd = &nfsd;
3693	struct ucred *cred;
3694	int error = 0;
3695	u_int32_t callback;
3696
3697	cred = newnfs_getcred();
3698	NFSLOCKSTATE();	/* mostly for lc_cbref++ */
3699	if (clp->lc_flags & LCL_NEEDSCONFIRM) {
3700		NFSUNLOCKSTATE();
3701		panic("docallb");
3702	}
3703	clp->lc_cbref++;
3704
3705	/*
3706	 * Fill the callback program# and version into the request
3707	 * structure for newnfs_connect() to use.
3708	 */
3709	clp->lc_req.nr_prog = clp->lc_program;
3710	clp->lc_req.nr_vers = NFSV4_CBVERS;
3711
3712	/*
3713	 * First, fill in some of the fields of nd and cr.
3714	 */
3715	nd->nd_flag = ND_NFSV4;
3716	if (clp->lc_flags & LCL_GSS)
3717		nd->nd_flag |= ND_KERBV;
3718	nd->nd_repstat = 0;
3719	cred->cr_uid = clp->lc_uid;
3720	cred->cr_gid = clp->lc_gid;
3721	callback = clp->lc_callback;
3722	NFSUNLOCKSTATE();
3723	cred->cr_ngroups = 1;
3724
3725	/*
3726	 * Get the first mbuf for the request.
3727	 */
3728	MGET(m, M_WAIT, MT_DATA);
3729	mbuf_setlen(m, 0);
3730	nd->nd_mreq = nd->nd_mb = m;
3731	nd->nd_bpos = NFSMTOD(m, caddr_t);
3732
3733	/*
3734	 * and build the callback request.
3735	 */
3736	if (procnum == NFSV4OP_CBGETATTR) {
3737		nd->nd_procnum = NFSV4PROC_CBCOMPOUND;
3738		(void) nfsm_strtom(nd, "CB Getattr", 10);
3739		NFSM_BUILD(tl, u_int32_t *, 4 * NFSX_UNSIGNED);
3740		*tl++ = txdr_unsigned(NFSV4_MINORVERSION);
3741		*tl++ = txdr_unsigned(callback);
3742		*tl++ = txdr_unsigned(1);
3743		*tl = txdr_unsigned(NFSV4OP_CBGETATTR);
3744		(void) nfsm_fhtom(nd, (u_int8_t *)fhp, NFSX_MYFH, 0);
3745		(void) nfsrv_putattrbit(nd, attrbitp);
3746	} else if (procnum == NFSV4OP_CBRECALL) {
3747		nd->nd_procnum = NFSV4PROC_CBCOMPOUND;
3748		(void) nfsm_strtom(nd, "CB Recall", 9);
3749		NFSM_BUILD(tl, u_int32_t *, 5 * NFSX_UNSIGNED + NFSX_STATEID);
3750		*tl++ = txdr_unsigned(NFSV4_MINORVERSION);
3751		*tl++ = txdr_unsigned(callback);
3752		*tl++ = txdr_unsigned(1);
3753		*tl++ = txdr_unsigned(NFSV4OP_CBRECALL);
3754		*tl++ = txdr_unsigned(stateidp->seqid);
3755		NFSBCOPY((caddr_t)stateidp->other, (caddr_t)tl,
3756		    NFSX_STATEIDOTHER);
3757		tl += (NFSX_STATEIDOTHER / NFSX_UNSIGNED);
3758		if (trunc)
3759			*tl = newnfs_true;
3760		else
3761			*tl = newnfs_false;
3762		(void) nfsm_fhtom(nd, (u_int8_t *)fhp, NFSX_MYFH, 0);
3763	} else {
3764		nd->nd_procnum = NFSV4PROC_CBNULL;
3765	}
3766
3767	/*
3768	 * Call newnfs_connect(), as required, and then newnfs_request().
3769	 */
3770	(void) newnfs_sndlock(&clp->lc_req.nr_lock);
3771	if (clp->lc_req.nr_client == NULL) {
3772		if (nd->nd_procnum == NFSV4PROC_CBNULL)
3773			error = newnfs_connect(NULL, &clp->lc_req, cred,
3774			    NULL, 1);
3775		else
3776			error = newnfs_connect(NULL, &clp->lc_req, cred,
3777			    NULL, 3);
3778	}
3779	newnfs_sndunlock(&clp->lc_req.nr_lock);
3780	if (!error) {
3781		error = newnfs_request(nd, NULL, clp, &clp->lc_req, NULL,
3782		    NULL, cred, clp->lc_program, NFSV4_CBVERS, NULL, 1, NULL);
3783	}
3784	NFSFREECRED(cred);
3785
3786	/*
3787	 * If error is set here, the Callback path isn't working
3788	 * properly, so twiddle the appropriate LCL_ flags.
3789	 * (nd_repstat != 0 indicates the Callback path is working,
3790	 *  but the callback failed on the client.)
3791	 */
3792	if (error) {
3793		/*
3794		 * Mark the callback pathway down, which disabled issuing
3795		 * of delegations and gets Renew to return NFSERR_CBPATHDOWN.
3796		 */
3797		NFSLOCKSTATE();
3798		clp->lc_flags |= LCL_CBDOWN;
3799		NFSUNLOCKSTATE();
3800	} else {
3801		/*
3802		 * Callback worked. If the callback path was down, disable
3803		 * callbacks, so no more delegations will be issued. (This
3804		 * is done on the assumption that the callback pathway is
3805		 * flakey.)
3806		 */
3807		NFSLOCKSTATE();
3808		if (clp->lc_flags & LCL_CBDOWN)
3809			clp->lc_flags &= ~(LCL_CBDOWN | LCL_CALLBACKSON);
3810		NFSUNLOCKSTATE();
3811		if (nd->nd_repstat)
3812			error = nd->nd_repstat;
3813		else if (procnum == NFSV4OP_CBGETATTR)
3814			error = nfsv4_loadattr(nd, NULL, nap, NULL, NULL, 0,
3815			    NULL, NULL, NULL, NULL, NULL, 0, NULL, NULL, NULL,
3816			    p, NULL);
3817		mbuf_freem(nd->nd_mrep);
3818	}
3819	NFSLOCKSTATE();
3820	clp->lc_cbref--;
3821	if ((clp->lc_flags & LCL_WAKEUPWANTED) && clp->lc_cbref == 0) {
3822		clp->lc_flags &= ~LCL_WAKEUPWANTED;
3823		wakeup(clp);
3824	}
3825	NFSUNLOCKSTATE();
3826
3827	NFSEXITCODE(error);
3828	return (error);
3829}
3830
3831/*
3832 * Return the next index# for a clientid. Mostly just increment and return
3833 * the next one, but... if the 32bit unsigned does actually wrap around,
3834 * it should be rebooted.
3835 * At an average rate of one new client per second, it will wrap around in
3836 * approximately 136 years. (I think the server will have been shut
3837 * down or rebooted before then.)
3838 */
3839static u_int32_t
3840nfsrv_nextclientindex(void)
3841{
3842	static u_int32_t client_index = 0;
3843
3844	client_index++;
3845	if (client_index != 0)
3846		return (client_index);
3847
3848	printf("%s: out of clientids\n", __func__);
3849	return (client_index);
3850}
3851
3852/*
3853 * Return the next index# for a stateid. Mostly just increment and return
3854 * the next one, but... if the 32bit unsigned does actually wrap around
3855 * (will a BSD server stay up that long?), find
3856 * new start and end values.
3857 */
3858static u_int32_t
3859nfsrv_nextstateindex(struct nfsclient *clp)
3860{
3861	struct nfsstate *stp;
3862	int i;
3863	u_int32_t canuse, min_index, max_index;
3864
3865	if (!(clp->lc_flags & LCL_INDEXNOTOK)) {
3866		clp->lc_stateindex++;
3867		if (clp->lc_stateindex != clp->lc_statemaxindex)
3868			return (clp->lc_stateindex);
3869	}
3870
3871	/*
3872	 * Yuck, we've hit the end.
3873	 * Look for a new min and max.
3874	 */
3875	min_index = 0;
3876	max_index = 0xffffffff;
3877	for (i = 0; i < NFSSTATEHASHSIZE; i++) {
3878	    LIST_FOREACH(stp, &clp->lc_stateid[i], ls_hash) {
3879		if (stp->ls_stateid.other[2] > 0x80000000) {
3880		    if (stp->ls_stateid.other[2] < max_index)
3881			max_index = stp->ls_stateid.other[2];
3882		} else {
3883		    if (stp->ls_stateid.other[2] > min_index)
3884			min_index = stp->ls_stateid.other[2];
3885		}
3886	    }
3887	}
3888
3889	/*
3890	 * Yikes, highly unlikely, but I'll handle it anyhow.
3891	 */
3892	if (min_index == 0x80000000 && max_index == 0x80000001) {
3893	    canuse = 0;
3894	    /*
3895	     * Loop around until we find an unused entry. Return that
3896	     * and set LCL_INDEXNOTOK, so the search will continue next time.
3897	     * (This is one of those rare cases where a goto is the
3898	     *  cleanest way to code the loop.)
3899	     */
3900tryagain:
3901	    for (i = 0; i < NFSSTATEHASHSIZE; i++) {
3902		LIST_FOREACH(stp, &clp->lc_stateid[i], ls_hash) {
3903		    if (stp->ls_stateid.other[2] == canuse) {
3904			canuse++;
3905			goto tryagain;
3906		    }
3907		}
3908	    }
3909	    clp->lc_flags |= LCL_INDEXNOTOK;
3910	    return (canuse);
3911	}
3912
3913	/*
3914	 * Ok to start again from min + 1.
3915	 */
3916	clp->lc_stateindex = min_index + 1;
3917	clp->lc_statemaxindex = max_index;
3918	clp->lc_flags &= ~LCL_INDEXNOTOK;
3919	return (clp->lc_stateindex);
3920}
3921
3922/*
3923 * The following functions handle the stable storage file that deals with
3924 * the edge conditions described in RFC3530 Sec. 8.6.3.
3925 * The file is as follows:
3926 * - a single record at the beginning that has the lease time of the
3927 *   previous server instance (before the last reboot) and the nfsrvboottime
3928 *   values for the previous server boots.
3929 *   These previous boot times are used to ensure that the current
3930 *   nfsrvboottime does not, somehow, get set to a previous one.
3931 *   (This is important so that Stale ClientIDs and StateIDs can
3932 *    be recognized.)
3933 *   The number of previous nfsvrboottime values preceeds the list.
3934 * - followed by some number of appended records with:
3935 *   - client id string
3936 *   - flag that indicates it is a record revoking state via lease
3937 *     expiration or similar
3938 *     OR has successfully acquired state.
3939 * These structures vary in length, with the client string at the end, up
3940 * to NFSV4_OPAQUELIMIT in size.
3941 *
3942 * At the end of the grace period, the file is truncated, the first
3943 * record is rewritten with updated information and any acquired state
3944 * records for successful reclaims of state are written.
3945 *
3946 * Subsequent records are appended when the first state is issued to
3947 * a client and when state is revoked for a client.
3948 *
3949 * When reading the file in, state issued records that come later in
3950 * the file override older ones, since the append log is in cronological order.
3951 * If, for some reason, the file can't be read, the grace period is
3952 * immediately terminated and all reclaims get NFSERR_NOGRACE.
3953 */
3954
3955/*
3956 * Read in the stable storage file. Called by nfssvc() before the nfsd
3957 * processes start servicing requests.
3958 */
3959APPLESTATIC void
3960nfsrv_setupstable(NFSPROC_T *p)
3961{
3962	struct nfsrv_stablefirst *sf = &nfsrv_stablefirst;
3963	struct nfsrv_stable *sp, *nsp;
3964	struct nfst_rec *tsp;
3965	int error, i, tryagain;
3966	off_t off = 0;
3967	ssize_t aresid, len;
3968
3969	/*
3970	 * If NFSNSF_UPDATEDONE is set, this is a restart of the nfsds without
3971	 * a reboot, so state has not been lost.
3972	 */
3973	if (sf->nsf_flags & NFSNSF_UPDATEDONE)
3974		return;
3975	/*
3976	 * Set Grace over just until the file reads successfully.
3977	 */
3978	nfsrvboottime = time_second;
3979	LIST_INIT(&sf->nsf_head);
3980	sf->nsf_flags = (NFSNSF_GRACEOVER | NFSNSF_NEEDLOCK);
3981	sf->nsf_eograce = NFSD_MONOSEC + NFSRV_LEASEDELTA;
3982	if (sf->nsf_fp == NULL)
3983		return;
3984	error = NFSD_RDWR(UIO_READ, NFSFPVNODE(sf->nsf_fp),
3985	    (caddr_t)&sf->nsf_rec, sizeof (struct nfsf_rec), off, UIO_SYSSPACE,
3986	    0, NFSFPCRED(sf->nsf_fp), &aresid, p);
3987	if (error || aresid || sf->nsf_numboots == 0 ||
3988		sf->nsf_numboots > NFSNSF_MAXNUMBOOTS)
3989		return;
3990
3991	/*
3992	 * Now, read in the boottimes.
3993	 */
3994	sf->nsf_bootvals = (time_t *)malloc((sf->nsf_numboots + 1) *
3995		sizeof (time_t), M_TEMP, M_WAITOK);
3996	off = sizeof (struct nfsf_rec);
3997	error = NFSD_RDWR(UIO_READ, NFSFPVNODE(sf->nsf_fp),
3998	    (caddr_t)sf->nsf_bootvals, sf->nsf_numboots * sizeof (time_t), off,
3999	    UIO_SYSSPACE, 0, NFSFPCRED(sf->nsf_fp), &aresid, p);
4000	if (error || aresid) {
4001		free((caddr_t)sf->nsf_bootvals, M_TEMP);
4002		sf->nsf_bootvals = NULL;
4003		return;
4004	}
4005
4006	/*
4007	 * Make sure this nfsrvboottime is different from all recorded
4008	 * previous ones.
4009	 */
4010	do {
4011		tryagain = 0;
4012		for (i = 0; i < sf->nsf_numboots; i++) {
4013			if (nfsrvboottime == sf->nsf_bootvals[i]) {
4014				nfsrvboottime++;
4015				tryagain = 1;
4016				break;
4017			}
4018		}
4019	} while (tryagain);
4020
4021	sf->nsf_flags |= NFSNSF_OK;
4022	off += (sf->nsf_numboots * sizeof (time_t));
4023
4024	/*
4025	 * Read through the file, building a list of records for grace
4026	 * checking.
4027	 * Each record is between sizeof (struct nfst_rec) and
4028	 * sizeof (struct nfst_rec) + NFSV4_OPAQUELIMIT - 1
4029	 * and is actually sizeof (struct nfst_rec) + nst_len - 1.
4030	 */
4031	tsp = (struct nfst_rec *)malloc(sizeof (struct nfst_rec) +
4032		NFSV4_OPAQUELIMIT - 1, M_TEMP, M_WAITOK);
4033	do {
4034	    error = NFSD_RDWR(UIO_READ, NFSFPVNODE(sf->nsf_fp),
4035	        (caddr_t)tsp, sizeof (struct nfst_rec) + NFSV4_OPAQUELIMIT - 1,
4036	        off, UIO_SYSSPACE, 0, NFSFPCRED(sf->nsf_fp), &aresid, p);
4037	    len = (sizeof (struct nfst_rec) + NFSV4_OPAQUELIMIT - 1) - aresid;
4038	    if (error || (len > 0 && (len < sizeof (struct nfst_rec) ||
4039		len < (sizeof (struct nfst_rec) + tsp->len - 1)))) {
4040		/*
4041		 * Yuck, the file has been corrupted, so just return
4042		 * after clearing out any restart state, so the grace period
4043		 * is over.
4044		 */
4045		LIST_FOREACH_SAFE(sp, &sf->nsf_head, nst_list, nsp) {
4046			LIST_REMOVE(sp, nst_list);
4047			free((caddr_t)sp, M_TEMP);
4048		}
4049		free((caddr_t)tsp, M_TEMP);
4050		sf->nsf_flags &= ~NFSNSF_OK;
4051		free((caddr_t)sf->nsf_bootvals, M_TEMP);
4052		sf->nsf_bootvals = NULL;
4053		return;
4054	    }
4055	    if (len > 0) {
4056		off += sizeof (struct nfst_rec) + tsp->len - 1;
4057		/*
4058		 * Search the list for a matching client.
4059		 */
4060		LIST_FOREACH(sp, &sf->nsf_head, nst_list) {
4061			if (tsp->len == sp->nst_len &&
4062			    !NFSBCMP(tsp->client, sp->nst_client, tsp->len))
4063				break;
4064		}
4065		if (sp == LIST_END(&sf->nsf_head)) {
4066			sp = (struct nfsrv_stable *)malloc(tsp->len +
4067				sizeof (struct nfsrv_stable) - 1, M_TEMP,
4068				M_WAITOK);
4069			NFSBCOPY((caddr_t)tsp, (caddr_t)&sp->nst_rec,
4070				sizeof (struct nfst_rec) + tsp->len - 1);
4071			LIST_INSERT_HEAD(&sf->nsf_head, sp, nst_list);
4072		} else {
4073			if (tsp->flag == NFSNST_REVOKE)
4074				sp->nst_flag |= NFSNST_REVOKE;
4075			else
4076				/*
4077				 * A subsequent timestamp indicates the client
4078				 * did a setclientid/confirm and any previous
4079				 * revoke is no longer relevant.
4080				 */
4081				sp->nst_flag &= ~NFSNST_REVOKE;
4082		}
4083	    }
4084	} while (len > 0);
4085	free((caddr_t)tsp, M_TEMP);
4086	sf->nsf_flags = NFSNSF_OK;
4087	sf->nsf_eograce = NFSD_MONOSEC + sf->nsf_lease +
4088		NFSRV_LEASEDELTA;
4089}
4090
4091/*
4092 * Update the stable storage file, now that the grace period is over.
4093 */
4094APPLESTATIC void
4095nfsrv_updatestable(NFSPROC_T *p)
4096{
4097	struct nfsrv_stablefirst *sf = &nfsrv_stablefirst;
4098	struct nfsrv_stable *sp, *nsp;
4099	int i;
4100	struct nfsvattr nva;
4101	vnode_t vp;
4102#if defined(__FreeBSD_version) && (__FreeBSD_version >= 500000)
4103	mount_t mp = NULL;
4104#endif
4105	int error;
4106
4107	if (sf->nsf_fp == NULL || (sf->nsf_flags & NFSNSF_UPDATEDONE))
4108		return;
4109	sf->nsf_flags |= NFSNSF_UPDATEDONE;
4110	/*
4111	 * Ok, we need to rewrite the stable storage file.
4112	 * - truncate to 0 length
4113	 * - write the new first structure
4114	 * - loop through the data structures, writing out any that
4115	 *   have timestamps older than the old boot
4116	 */
4117	if (sf->nsf_bootvals) {
4118		sf->nsf_numboots++;
4119		for (i = sf->nsf_numboots - 2; i >= 0; i--)
4120			sf->nsf_bootvals[i + 1] = sf->nsf_bootvals[i];
4121	} else {
4122		sf->nsf_numboots = 1;
4123		sf->nsf_bootvals = (time_t *)malloc(sizeof (time_t),
4124			M_TEMP, M_WAITOK);
4125	}
4126	sf->nsf_bootvals[0] = nfsrvboottime;
4127	sf->nsf_lease = nfsrv_lease;
4128	NFSVNO_ATTRINIT(&nva);
4129	NFSVNO_SETATTRVAL(&nva, size, 0);
4130	vp = NFSFPVNODE(sf->nsf_fp);
4131	vn_start_write(vp, &mp, V_WAIT);
4132	if (NFSVOPLOCK(vp, LK_EXCLUSIVE) == 0) {
4133		error = nfsvno_setattr(vp, &nva, NFSFPCRED(sf->nsf_fp), p,
4134		    NULL);
4135		NFSVOPUNLOCK(vp, 0);
4136	} else
4137		error = EPERM;
4138	vn_finished_write(mp);
4139	if (!error)
4140	    error = NFSD_RDWR(UIO_WRITE, vp,
4141		(caddr_t)&sf->nsf_rec, sizeof (struct nfsf_rec), (off_t)0,
4142		UIO_SYSSPACE, IO_SYNC, NFSFPCRED(sf->nsf_fp), NULL, p);
4143	if (!error)
4144	    error = NFSD_RDWR(UIO_WRITE, vp,
4145		(caddr_t)sf->nsf_bootvals,
4146		sf->nsf_numboots * sizeof (time_t),
4147		(off_t)(sizeof (struct nfsf_rec)),
4148		UIO_SYSSPACE, IO_SYNC, NFSFPCRED(sf->nsf_fp), NULL, p);
4149	free((caddr_t)sf->nsf_bootvals, M_TEMP);
4150	sf->nsf_bootvals = NULL;
4151	if (error) {
4152		sf->nsf_flags &= ~NFSNSF_OK;
4153		printf("EEK! Can't write NfsV4 stable storage file\n");
4154		return;
4155	}
4156	sf->nsf_flags |= NFSNSF_OK;
4157
4158	/*
4159	 * Loop through the list and write out timestamp records for
4160	 * any clients that successfully reclaimed state.
4161	 */
4162	LIST_FOREACH_SAFE(sp, &sf->nsf_head, nst_list, nsp) {
4163		if (sp->nst_flag & NFSNST_GOTSTATE) {
4164			nfsrv_writestable(sp->nst_client, sp->nst_len,
4165				NFSNST_NEWSTATE, p);
4166			sp->nst_clp->lc_flags |= LCL_STAMPEDSTABLE;
4167		}
4168		LIST_REMOVE(sp, nst_list);
4169		free((caddr_t)sp, M_TEMP);
4170	}
4171	nfsrv_backupstable();
4172}
4173
4174/*
4175 * Append a record to the stable storage file.
4176 */
4177APPLESTATIC void
4178nfsrv_writestable(u_char *client, int len, int flag, NFSPROC_T *p)
4179{
4180	struct nfsrv_stablefirst *sf = &nfsrv_stablefirst;
4181	struct nfst_rec *sp;
4182	int error;
4183
4184	if (!(sf->nsf_flags & NFSNSF_OK) || sf->nsf_fp == NULL)
4185		return;
4186	sp = (struct nfst_rec *)malloc(sizeof (struct nfst_rec) +
4187		len - 1, M_TEMP, M_WAITOK);
4188	sp->len = len;
4189	NFSBCOPY(client, sp->client, len);
4190	sp->flag = flag;
4191	error = NFSD_RDWR(UIO_WRITE, NFSFPVNODE(sf->nsf_fp),
4192	    (caddr_t)sp, sizeof (struct nfst_rec) + len - 1, (off_t)0,
4193	    UIO_SYSSPACE, (IO_SYNC | IO_APPEND), NFSFPCRED(sf->nsf_fp), NULL, p);
4194	free((caddr_t)sp, M_TEMP);
4195	if (error) {
4196		sf->nsf_flags &= ~NFSNSF_OK;
4197		printf("EEK! Can't write NfsV4 stable storage file\n");
4198	}
4199}
4200
4201/*
4202 * This function is called during the grace period to mark a client
4203 * that successfully reclaimed state.
4204 */
4205static void
4206nfsrv_markstable(struct nfsclient *clp)
4207{
4208	struct nfsrv_stable *sp;
4209
4210	/*
4211	 * First find the client structure.
4212	 */
4213	LIST_FOREACH(sp, &nfsrv_stablefirst.nsf_head, nst_list) {
4214		if (sp->nst_len == clp->lc_idlen &&
4215		    !NFSBCMP(sp->nst_client, clp->lc_id, sp->nst_len))
4216			break;
4217	}
4218	if (sp == LIST_END(&nfsrv_stablefirst.nsf_head))
4219		return;
4220
4221	/*
4222	 * Now, just mark it and set the nfsclient back pointer.
4223	 */
4224	sp->nst_flag |= NFSNST_GOTSTATE;
4225	sp->nst_clp = clp;
4226}
4227
4228/*
4229 * This function is called for a reclaim, to see if it gets grace.
4230 * It returns 0 if a reclaim is allowed, 1 otherwise.
4231 */
4232static int
4233nfsrv_checkstable(struct nfsclient *clp)
4234{
4235	struct nfsrv_stable *sp;
4236
4237	/*
4238	 * First, find the entry for the client.
4239	 */
4240	LIST_FOREACH(sp, &nfsrv_stablefirst.nsf_head, nst_list) {
4241		if (sp->nst_len == clp->lc_idlen &&
4242		    !NFSBCMP(sp->nst_client, clp->lc_id, sp->nst_len))
4243			break;
4244	}
4245
4246	/*
4247	 * If not in the list, state was revoked or no state was issued
4248	 * since the previous reboot, a reclaim is denied.
4249	 */
4250	if (sp == LIST_END(&nfsrv_stablefirst.nsf_head) ||
4251	    (sp->nst_flag & NFSNST_REVOKE) ||
4252	    !(nfsrv_stablefirst.nsf_flags & NFSNSF_OK))
4253		return (1);
4254	return (0);
4255}
4256
4257/*
4258 * Test for and try to clear out a conflicting client. This is called by
4259 * nfsrv_lockctrl() and nfsrv_openctrl() when conflicts with other clients
4260 * a found.
4261 * The trick here is that it can't revoke a conflicting client with an
4262 * expired lease unless it holds the v4root lock, so...
4263 * If no v4root lock, get the lock and return 1 to indicate "try again".
4264 * Return 0 to indicate the conflict can't be revoked and 1 to indicate
4265 * the revocation worked and the conflicting client is "bye, bye", so it
4266 * can be tried again.
4267 * Return 2 to indicate that the vnode is VI_DOOMED after NFSVOPLOCK().
4268 * Unlocks State before a non-zero value is returned.
4269 */
4270static int
4271nfsrv_clientconflict(struct nfsclient *clp, int *haslockp, vnode_t vp,
4272    NFSPROC_T *p)
4273{
4274	int gotlock, lktype;
4275
4276	/*
4277	 * If lease hasn't expired, we can't fix it.
4278	 */
4279	if (clp->lc_expiry >= NFSD_MONOSEC ||
4280	    !(nfsrv_stablefirst.nsf_flags & NFSNSF_UPDATEDONE))
4281		return (0);
4282	if (*haslockp == 0) {
4283		NFSUNLOCKSTATE();
4284		lktype = NFSVOPISLOCKED(vp);
4285		NFSVOPUNLOCK(vp, 0);
4286		NFSLOCKV4ROOTMUTEX();
4287		nfsv4_relref(&nfsv4rootfs_lock);
4288		do {
4289			gotlock = nfsv4_lock(&nfsv4rootfs_lock, 1, NULL,
4290			    NFSV4ROOTLOCKMUTEXPTR, NULL);
4291		} while (!gotlock);
4292		NFSUNLOCKV4ROOTMUTEX();
4293		*haslockp = 1;
4294		NFSVOPLOCK(vp, lktype | LK_RETRY);
4295		if ((vp->v_iflag & VI_DOOMED) != 0)
4296			return (2);
4297		else
4298			return (1);
4299	}
4300	NFSUNLOCKSTATE();
4301
4302	/*
4303	 * Ok, we can expire the conflicting client.
4304	 */
4305	nfsrv_writestable(clp->lc_id, clp->lc_idlen, NFSNST_REVOKE, p);
4306	nfsrv_backupstable();
4307	nfsrv_cleanclient(clp, p);
4308	nfsrv_freedeleglist(&clp->lc_deleg);
4309	nfsrv_freedeleglist(&clp->lc_olddeleg);
4310	LIST_REMOVE(clp, lc_hash);
4311	nfsrv_zapclient(clp, p);
4312	return (1);
4313}
4314
4315/*
4316 * Resolve a delegation conflict.
4317 * Returns 0 to indicate the conflict was resolved without sleeping.
4318 * Return -1 to indicate that the caller should check for conflicts again.
4319 * Return > 0 for an error that should be returned, normally NFSERR_DELAY.
4320 *
4321 * Also, manipulate the nfsv4root_lock, as required. It isn't changed
4322 * for a return of 0, since there was no sleep and it could be required
4323 * later. It is released for a return of NFSERR_DELAY, since the caller
4324 * will return that error. It is released when a sleep was done waiting
4325 * for the delegation to be returned or expire (so that other nfsds can
4326 * handle ops). Then, it must be acquired for the write to stable storage.
4327 * (This function is somewhat similar to nfsrv_clientconflict(), but
4328 *  the semantics differ in a couple of subtle ways. The return of 0
4329 *  indicates the conflict was resolved without sleeping here, not
4330 *  that the conflict can't be resolved and the handling of nfsv4root_lock
4331 *  differs, as noted above.)
4332 * Unlocks State before returning a non-zero value.
4333 */
4334static int
4335nfsrv_delegconflict(struct nfsstate *stp, int *haslockp, NFSPROC_T *p,
4336    vnode_t vp)
4337{
4338	struct nfsclient *clp = stp->ls_clp;
4339	int gotlock, error, lktype, retrycnt, zapped_clp;
4340	nfsv4stateid_t tstateid;
4341	fhandle_t tfh;
4342
4343	/*
4344	 * If the conflict is with an old delegation...
4345	 */
4346	if (stp->ls_flags & NFSLCK_OLDDELEG) {
4347		/*
4348		 * You can delete it, if it has expired.
4349		 */
4350		if (clp->lc_delegtime < NFSD_MONOSEC) {
4351			nfsrv_freedeleg(stp);
4352			NFSUNLOCKSTATE();
4353			error = -1;
4354			goto out;
4355		}
4356		NFSUNLOCKSTATE();
4357		/*
4358		 * During this delay, the old delegation could expire or it
4359		 * could be recovered by the client via an Open with
4360		 * CLAIM_DELEGATE_PREV.
4361		 * Release the nfsv4root_lock, if held.
4362		 */
4363		if (*haslockp) {
4364			*haslockp = 0;
4365			NFSLOCKV4ROOTMUTEX();
4366			nfsv4_unlock(&nfsv4rootfs_lock, 1);
4367			NFSUNLOCKV4ROOTMUTEX();
4368		}
4369		error = NFSERR_DELAY;
4370		goto out;
4371	}
4372
4373	/*
4374	 * It's a current delegation, so:
4375	 * - check to see if the delegation has expired
4376	 *   - if so, get the v4root lock and then expire it
4377	 */
4378	if (!(stp->ls_flags & NFSLCK_DELEGRECALL)) {
4379		/*
4380		 * - do a recall callback, since not yet done
4381		 * For now, never allow truncate to be set. To use
4382		 * truncate safely, it must be guaranteed that the
4383		 * Remove, Rename or Setattr with size of 0 will
4384		 * succeed and that would require major changes to
4385		 * the VFS/Vnode OPs.
4386		 * Set the expiry time large enough so that it won't expire
4387		 * until after the callback, then set it correctly, once
4388		 * the callback is done. (The delegation will now time
4389		 * out whether or not the Recall worked ok. The timeout
4390		 * will be extended when ops are done on the delegation
4391		 * stateid, up to the timelimit.)
4392		 */
4393		stp->ls_delegtime = NFSD_MONOSEC + (2 * nfsrv_lease) +
4394		    NFSRV_LEASEDELTA;
4395		stp->ls_delegtimelimit = NFSD_MONOSEC + (6 * nfsrv_lease) +
4396		    NFSRV_LEASEDELTA;
4397		stp->ls_flags |= NFSLCK_DELEGRECALL;
4398
4399		/*
4400		 * Loop NFSRV_CBRETRYCNT times while the CBRecall replies
4401		 * NFSERR_BADSTATEID or NFSERR_BADHANDLE. This is done
4402		 * in order to try and avoid a race that could happen
4403		 * when a CBRecall request passed the Open reply with
4404		 * the delegation in it when transitting the network.
4405		 * Since nfsrv_docallback will sleep, don't use stp after
4406		 * the call.
4407		 */
4408		NFSBCOPY((caddr_t)&stp->ls_stateid, (caddr_t)&tstateid,
4409		    sizeof (tstateid));
4410		NFSBCOPY((caddr_t)&stp->ls_lfp->lf_fh, (caddr_t)&tfh,
4411		    sizeof (tfh));
4412		NFSUNLOCKSTATE();
4413		if (*haslockp) {
4414			*haslockp = 0;
4415			NFSLOCKV4ROOTMUTEX();
4416			nfsv4_unlock(&nfsv4rootfs_lock, 1);
4417			NFSUNLOCKV4ROOTMUTEX();
4418		}
4419		retrycnt = 0;
4420		do {
4421		    error = nfsrv_docallback(clp, NFSV4OP_CBRECALL,
4422			&tstateid, 0, &tfh, NULL, NULL, p);
4423		    retrycnt++;
4424		} while ((error == NFSERR_BADSTATEID ||
4425		    error == NFSERR_BADHANDLE) && retrycnt < NFSV4_CBRETRYCNT);
4426		error = NFSERR_DELAY;
4427		goto out;
4428	}
4429
4430	if (clp->lc_expiry >= NFSD_MONOSEC &&
4431	    stp->ls_delegtime >= NFSD_MONOSEC) {
4432		NFSUNLOCKSTATE();
4433		/*
4434		 * A recall has been done, but it has not yet expired.
4435		 * So, RETURN_DELAY.
4436		 */
4437		if (*haslockp) {
4438			*haslockp = 0;
4439			NFSLOCKV4ROOTMUTEX();
4440			nfsv4_unlock(&nfsv4rootfs_lock, 1);
4441			NFSUNLOCKV4ROOTMUTEX();
4442		}
4443		error = NFSERR_DELAY;
4444		goto out;
4445	}
4446
4447	/*
4448	 * If we don't yet have the lock, just get it and then return,
4449	 * since we need that before deleting expired state, such as
4450	 * this delegation.
4451	 * When getting the lock, unlock the vnode, so other nfsds that
4452	 * are in progress, won't get stuck waiting for the vnode lock.
4453	 */
4454	if (*haslockp == 0) {
4455		NFSUNLOCKSTATE();
4456		lktype = NFSVOPISLOCKED(vp);
4457		NFSVOPUNLOCK(vp, 0);
4458		NFSLOCKV4ROOTMUTEX();
4459		nfsv4_relref(&nfsv4rootfs_lock);
4460		do {
4461			gotlock = nfsv4_lock(&nfsv4rootfs_lock, 1, NULL,
4462			    NFSV4ROOTLOCKMUTEXPTR, NULL);
4463		} while (!gotlock);
4464		NFSUNLOCKV4ROOTMUTEX();
4465		*haslockp = 1;
4466		NFSVOPLOCK(vp, lktype | LK_RETRY);
4467		if ((vp->v_iflag & VI_DOOMED) != 0) {
4468			*haslockp = 0;
4469			NFSLOCKV4ROOTMUTEX();
4470			nfsv4_unlock(&nfsv4rootfs_lock, 1);
4471			NFSUNLOCKV4ROOTMUTEX();
4472			error = NFSERR_PERM;
4473			goto out;
4474		}
4475		error = -1;
4476		goto out;
4477	}
4478
4479	NFSUNLOCKSTATE();
4480	/*
4481	 * Ok, we can delete the expired delegation.
4482	 * First, write the Revoke record to stable storage and then
4483	 * clear out the conflict.
4484	 * Since all other nfsd threads are now blocked, we can safely
4485	 * sleep without the state changing.
4486	 */
4487	nfsrv_writestable(clp->lc_id, clp->lc_idlen, NFSNST_REVOKE, p);
4488	nfsrv_backupstable();
4489	if (clp->lc_expiry < NFSD_MONOSEC) {
4490		nfsrv_cleanclient(clp, p);
4491		nfsrv_freedeleglist(&clp->lc_deleg);
4492		nfsrv_freedeleglist(&clp->lc_olddeleg);
4493		LIST_REMOVE(clp, lc_hash);
4494		zapped_clp = 1;
4495	} else {
4496		nfsrv_freedeleg(stp);
4497		zapped_clp = 0;
4498	}
4499	if (zapped_clp)
4500		nfsrv_zapclient(clp, p);
4501	error = -1;
4502
4503out:
4504	NFSEXITCODE(error);
4505	return (error);
4506}
4507
4508/*
4509 * Check for a remove allowed, if remove is set to 1 and get rid of
4510 * delegations.
4511 */
4512APPLESTATIC int
4513nfsrv_checkremove(vnode_t vp, int remove, NFSPROC_T *p)
4514{
4515	struct nfsstate *stp;
4516	struct nfslockfile *lfp;
4517	int error, haslock = 0;
4518	fhandle_t nfh;
4519
4520	/*
4521	 * First, get the lock file structure.
4522	 * (A return of -1 means no associated state, so remove ok.)
4523	 */
4524	error = nfsrv_getlockfh(vp, NFSLCK_CHECK, NULL, &nfh, p);
4525tryagain:
4526	NFSLOCKSTATE();
4527	if (!error)
4528		error = nfsrv_getlockfile(NFSLCK_CHECK, NULL, &lfp, &nfh, 0);
4529	if (error) {
4530		NFSUNLOCKSTATE();
4531		if (haslock) {
4532			NFSLOCKV4ROOTMUTEX();
4533			nfsv4_unlock(&nfsv4rootfs_lock, 1);
4534			NFSUNLOCKV4ROOTMUTEX();
4535		}
4536		if (error == -1)
4537			error = 0;
4538		goto out;
4539	}
4540
4541	/*
4542	 * Now, we must Recall any delegations.
4543	 */
4544	error = nfsrv_cleandeleg(vp, lfp, NULL, &haslock, p);
4545	if (error) {
4546		/*
4547		 * nfsrv_cleandeleg() unlocks state for non-zero
4548		 * return.
4549		 */
4550		if (error == -1)
4551			goto tryagain;
4552		if (haslock) {
4553			NFSLOCKV4ROOTMUTEX();
4554			nfsv4_unlock(&nfsv4rootfs_lock, 1);
4555			NFSUNLOCKV4ROOTMUTEX();
4556		}
4557		goto out;
4558	}
4559
4560	/*
4561	 * Now, look for a conflicting open share.
4562	 */
4563	if (remove) {
4564		LIST_FOREACH(stp, &lfp->lf_open, ls_file) {
4565			if (stp->ls_flags & NFSLCK_WRITEDENY) {
4566				error = NFSERR_FILEOPEN;
4567				break;
4568			}
4569		}
4570	}
4571
4572	NFSUNLOCKSTATE();
4573	if (haslock) {
4574		NFSLOCKV4ROOTMUTEX();
4575		nfsv4_unlock(&nfsv4rootfs_lock, 1);
4576		NFSUNLOCKV4ROOTMUTEX();
4577	}
4578
4579out:
4580	NFSEXITCODE(error);
4581	return (error);
4582}
4583
4584/*
4585 * Clear out all delegations for the file referred to by lfp.
4586 * May return NFSERR_DELAY, if there will be a delay waiting for
4587 * delegations to expire.
4588 * Returns -1 to indicate it slept while recalling a delegation.
4589 * This function has the side effect of deleting the nfslockfile structure,
4590 * if it no longer has associated state and didn't have to sleep.
4591 * Unlocks State before a non-zero value is returned.
4592 */
4593static int
4594nfsrv_cleandeleg(vnode_t vp, struct nfslockfile *lfp,
4595    struct nfsclient *clp, int *haslockp, NFSPROC_T *p)
4596{
4597	struct nfsstate *stp, *nstp;
4598	int ret = 0;
4599
4600	stp = LIST_FIRST(&lfp->lf_deleg);
4601	while (stp != LIST_END(&lfp->lf_deleg)) {
4602		nstp = LIST_NEXT(stp, ls_file);
4603		if (stp->ls_clp != clp) {
4604			ret = nfsrv_delegconflict(stp, haslockp, p, vp);
4605			if (ret) {
4606				/*
4607				 * nfsrv_delegconflict() unlocks state
4608				 * when it returns non-zero.
4609				 */
4610				goto out;
4611			}
4612		}
4613		stp = nstp;
4614	}
4615out:
4616	NFSEXITCODE(ret);
4617	return (ret);
4618}
4619
4620/*
4621 * There are certain operations that, when being done outside of NFSv4,
4622 * require that any NFSv4 delegation for the file be recalled.
4623 * This function is to be called for those cases:
4624 * VOP_RENAME() - When a delegation is being recalled for any reason,
4625 *	the client may have to do Opens against the server, using the file's
4626 *	final component name. If the file has been renamed on the server,
4627 *	that component name will be incorrect and the Open will fail.
4628 * VOP_REMOVE() - Theoretically, a client could Open a file after it has
4629 *	been removed on the server, if there is a delegation issued to
4630 *	that client for the file. I say "theoretically" since clients
4631 *	normally do an Access Op before the Open and that Access Op will
4632 *	fail with ESTALE. Note that NFSv2 and 3 don't even do Opens, so
4633 *	they will detect the file's removal in the same manner. (There is
4634 *	one case where RFC3530 allows a client to do an Open without first
4635 *	doing an Access Op, which is passage of a check against the ACE
4636 *	returned with a Write delegation, but current practice is to ignore
4637 *	the ACE and always do an Access Op.)
4638 *	Since the functions can only be called with an unlocked vnode, this
4639 *	can't be done at this time.
4640 * VOP_ADVLOCK() - When a client holds a delegation, it can issue byte range
4641 *	locks locally in the client, which are not visible to the server. To
4642 *	deal with this, issuing of delegations for a vnode must be disabled
4643 *	and all delegations for the vnode recalled. This is done via the
4644 *	second function, using the VV_DISABLEDELEG vflag on the vnode.
4645 */
4646APPLESTATIC void
4647nfsd_recalldelegation(vnode_t vp, NFSPROC_T *p)
4648{
4649	time_t starttime;
4650	int error;
4651
4652	/*
4653	 * First, check to see if the server is currently running and it has
4654	 * been called for a regular file when issuing delegations.
4655	 */
4656	if (newnfs_numnfsd == 0 || vp->v_type != VREG ||
4657	    nfsrv_issuedelegs == 0)
4658		return;
4659
4660	KASSERT((NFSVOPISLOCKED(vp) != LK_EXCLUSIVE), ("vp %p is locked", vp));
4661	/*
4662	 * First, get a reference on the nfsv4rootfs_lock so that an
4663	 * exclusive lock cannot be acquired by another thread.
4664	 */
4665	NFSLOCKV4ROOTMUTEX();
4666	nfsv4_getref(&nfsv4rootfs_lock, NULL, NFSV4ROOTLOCKMUTEXPTR, NULL);
4667	NFSUNLOCKV4ROOTMUTEX();
4668
4669	/*
4670	 * Now, call nfsrv_checkremove() in a loop while it returns
4671	 * NFSERR_DELAY. Return upon any other error or when timed out.
4672	 */
4673	starttime = NFSD_MONOSEC;
4674	do {
4675		if (NFSVOPLOCK(vp, LK_EXCLUSIVE) == 0) {
4676			error = nfsrv_checkremove(vp, 0, p);
4677			NFSVOPUNLOCK(vp, 0);
4678		} else
4679			error = EPERM;
4680		if (error == NFSERR_DELAY) {
4681			if (NFSD_MONOSEC - starttime > NFS_REMOVETIMEO)
4682				break;
4683			/* Sleep for a short period of time */
4684			(void) nfs_catnap(PZERO, 0, "nfsremove");
4685		}
4686	} while (error == NFSERR_DELAY);
4687	NFSLOCKV4ROOTMUTEX();
4688	nfsv4_relref(&nfsv4rootfs_lock);
4689	NFSUNLOCKV4ROOTMUTEX();
4690}
4691
4692APPLESTATIC void
4693nfsd_disabledelegation(vnode_t vp, NFSPROC_T *p)
4694{
4695
4696#ifdef VV_DISABLEDELEG
4697	/*
4698	 * First, flag issuance of delegations disabled.
4699	 */
4700	atomic_set_long(&vp->v_vflag, VV_DISABLEDELEG);
4701#endif
4702
4703	/*
4704	 * Then call nfsd_recalldelegation() to get rid of all extant
4705	 * delegations.
4706	 */
4707	nfsd_recalldelegation(vp, p);
4708}
4709
4710/*
4711 * Check for conflicting locks, etc. and then get rid of delegations.
4712 * (At one point I thought that I should get rid of delegations for any
4713 *  Setattr, since it could potentially disallow the I/O op (read or write)
4714 *  allowed by the delegation. However, Setattr Ops that aren't changing
4715 *  the size get a stateid of all 0s, so you can't tell if it is a delegation
4716 *  for the same client or a different one, so I decided to only get rid
4717 *  of delegations for other clients when the size is being changed.)
4718 * In general, a Setattr can disable NFS I/O Ops that are outstanding, such
4719 * as Write backs, even if there is no delegation, so it really isn't any
4720 * different?)
4721 */
4722APPLESTATIC int
4723nfsrv_checksetattr(vnode_t vp, struct nfsrv_descript *nd,
4724    nfsv4stateid_t *stateidp, struct nfsvattr *nvap, nfsattrbit_t *attrbitp,
4725    struct nfsexstuff *exp, NFSPROC_T *p)
4726{
4727	struct nfsstate st, *stp = &st;
4728	struct nfslock lo, *lop = &lo;
4729	int error = 0;
4730	nfsquad_t clientid;
4731
4732	if (NFSISSET_ATTRBIT(attrbitp, NFSATTRBIT_SIZE)) {
4733		stp->ls_flags = (NFSLCK_CHECK | NFSLCK_WRITEACCESS);
4734		lop->lo_first = nvap->na_size;
4735	} else {
4736		stp->ls_flags = 0;
4737		lop->lo_first = 0;
4738	}
4739	if (NFSISSET_ATTRBIT(attrbitp, NFSATTRBIT_OWNER) ||
4740	    NFSISSET_ATTRBIT(attrbitp, NFSATTRBIT_OWNERGROUP) ||
4741	    NFSISSET_ATTRBIT(attrbitp, NFSATTRBIT_MODE) ||
4742	    NFSISSET_ATTRBIT(attrbitp, NFSATTRBIT_ACL))
4743		stp->ls_flags |= NFSLCK_SETATTR;
4744	if (stp->ls_flags == 0)
4745		goto out;
4746	lop->lo_end = NFS64BITSSET;
4747	lop->lo_flags = NFSLCK_WRITE;
4748	stp->ls_ownerlen = 0;
4749	stp->ls_op = NULL;
4750	stp->ls_uid = nd->nd_cred->cr_uid;
4751	stp->ls_stateid.seqid = stateidp->seqid;
4752	clientid.lval[0] = stp->ls_stateid.other[0] = stateidp->other[0];
4753	clientid.lval[1] = stp->ls_stateid.other[1] = stateidp->other[1];
4754	stp->ls_stateid.other[2] = stateidp->other[2];
4755	error = nfsrv_lockctrl(vp, &stp, &lop, NULL, clientid,
4756	    stateidp, exp, nd, p);
4757
4758out:
4759	NFSEXITCODE2(error, nd);
4760	return (error);
4761}
4762
4763/*
4764 * Check for a write delegation and do a CBGETATTR if there is one, updating
4765 * the attributes, as required.
4766 * Should I return an error if I can't get the attributes? (For now, I'll
4767 * just return ok.
4768 */
4769APPLESTATIC int
4770nfsrv_checkgetattr(struct nfsrv_descript *nd, vnode_t vp,
4771    struct nfsvattr *nvap, nfsattrbit_t *attrbitp, struct ucred *cred,
4772    NFSPROC_T *p)
4773{
4774	struct nfsstate *stp;
4775	struct nfslockfile *lfp;
4776	struct nfsclient *clp;
4777	struct nfsvattr nva;
4778	fhandle_t nfh;
4779	int error = 0;
4780	nfsattrbit_t cbbits;
4781	u_quad_t delegfilerev;
4782
4783	NFSCBGETATTR_ATTRBIT(attrbitp, &cbbits);
4784	if (!NFSNONZERO_ATTRBIT(&cbbits))
4785		goto out;
4786
4787	/*
4788	 * Get the lock file structure.
4789	 * (A return of -1 means no associated state, so return ok.)
4790	 */
4791	error = nfsrv_getlockfh(vp, NFSLCK_CHECK, NULL, &nfh, p);
4792	NFSLOCKSTATE();
4793	if (!error)
4794		error = nfsrv_getlockfile(NFSLCK_CHECK, NULL, &lfp, &nfh, 0);
4795	if (error) {
4796		NFSUNLOCKSTATE();
4797		if (error == -1)
4798			error = 0;
4799		goto out;
4800	}
4801
4802	/*
4803	 * Now, look for a write delegation.
4804	 */
4805	LIST_FOREACH(stp, &lfp->lf_deleg, ls_file) {
4806		if (stp->ls_flags & NFSLCK_DELEGWRITE)
4807			break;
4808	}
4809	if (stp == LIST_END(&lfp->lf_deleg)) {
4810		NFSUNLOCKSTATE();
4811		goto out;
4812	}
4813	clp = stp->ls_clp;
4814	delegfilerev = stp->ls_filerev;
4815
4816	/*
4817	 * If the Write delegation was issued as a part of this Compound RPC
4818	 * or if we have an Implied Clientid (used in a previous Op in this
4819	 * compound) and it is the client the delegation was issued to,
4820	 * just return ok.
4821	 * I also assume that it is from the same client iff the network
4822	 * host IP address is the same as the callback address. (Not
4823	 * exactly correct by the RFC, but avoids a lot of Getattr
4824	 * callbacks.)
4825	 */
4826	if (nd->nd_compref == stp->ls_compref ||
4827	    ((nd->nd_flag & ND_IMPLIEDCLID) &&
4828	     clp->lc_clientid.qval == nd->nd_clientid.qval) ||
4829	     nfsaddr2_match(clp->lc_req.nr_nam, nd->nd_nam)) {
4830		NFSUNLOCKSTATE();
4831		goto out;
4832	}
4833
4834	/*
4835	 * We are now done with the delegation state structure,
4836	 * so the statelock can be released and we can now tsleep().
4837	 */
4838
4839	/*
4840	 * Now, we must do the CB Getattr callback, to see if Change or Size
4841	 * has changed.
4842	 */
4843	if (clp->lc_expiry >= NFSD_MONOSEC) {
4844		NFSUNLOCKSTATE();
4845		NFSVNO_ATTRINIT(&nva);
4846		nva.na_filerev = NFS64BITSSET;
4847		error = nfsrv_docallback(clp, NFSV4OP_CBGETATTR, NULL,
4848		    0, &nfh, &nva, &cbbits, p);
4849		if (!error) {
4850			if ((nva.na_filerev != NFS64BITSSET &&
4851			    nva.na_filerev > delegfilerev) ||
4852			    (NFSVNO_ISSETSIZE(&nva) &&
4853			     nva.na_size != nvap->na_size)) {
4854				error = nfsvno_updfilerev(vp, nvap, cred, p);
4855				if (NFSVNO_ISSETSIZE(&nva))
4856					nvap->na_size = nva.na_size;
4857			}
4858		} else
4859			error = 0;	/* Ignore callback errors for now. */
4860	} else {
4861		NFSUNLOCKSTATE();
4862	}
4863
4864out:
4865	NFSEXITCODE2(error, nd);
4866	return (error);
4867}
4868
4869/*
4870 * This function looks for openowners that haven't had any opens for
4871 * a while and throws them away. Called by an nfsd when NFSNSF_NOOPENS
4872 * is set.
4873 */
4874APPLESTATIC void
4875nfsrv_throwawayopens(NFSPROC_T *p)
4876{
4877	struct nfsclient *clp, *nclp;
4878	struct nfsstate *stp, *nstp;
4879	int i;
4880
4881	NFSLOCKSTATE();
4882	nfsrv_stablefirst.nsf_flags &= ~NFSNSF_NOOPENS;
4883	/*
4884	 * For each client...
4885	 */
4886	for (i = 0; i < NFSCLIENTHASHSIZE; i++) {
4887	    LIST_FOREACH_SAFE(clp, &nfsclienthash[i], lc_hash, nclp) {
4888		LIST_FOREACH_SAFE(stp, &clp->lc_open, ls_list, nstp) {
4889			if (LIST_EMPTY(&stp->ls_open) &&
4890			    (stp->ls_noopens > NFSNOOPEN ||
4891			     (nfsrv_openpluslock * 2) >
4892			     NFSRV_V4STATELIMIT))
4893				nfsrv_freeopenowner(stp, 0, p);
4894		}
4895	    }
4896	}
4897	NFSUNLOCKSTATE();
4898}
4899
4900/*
4901 * This function checks to see if the credentials are the same.
4902 * Returns 1 for not same, 0 otherwise.
4903 */
4904static int
4905nfsrv_notsamecredname(struct nfsrv_descript *nd, struct nfsclient *clp)
4906{
4907
4908	if (nd->nd_flag & ND_GSS) {
4909		if (!(clp->lc_flags & LCL_GSS))
4910			return (1);
4911		if (clp->lc_flags & LCL_NAME) {
4912			if (nd->nd_princlen != clp->lc_namelen ||
4913			    NFSBCMP(nd->nd_principal, clp->lc_name,
4914				clp->lc_namelen))
4915				return (1);
4916			else
4917				return (0);
4918		}
4919		if (nd->nd_cred->cr_uid == clp->lc_uid)
4920			return (0);
4921		else
4922			return (1);
4923	} else if (clp->lc_flags & LCL_GSS)
4924		return (1);
4925	/*
4926	 * For AUTH_SYS, allow the same uid or root. (This is underspecified
4927	 * in RFC3530, which talks about principals, but doesn't say anything
4928	 * about uids for AUTH_SYS.)
4929	 */
4930	if (nd->nd_cred->cr_uid == clp->lc_uid || nd->nd_cred->cr_uid == 0)
4931		return (0);
4932	else
4933		return (1);
4934}
4935
4936/*
4937 * Calculate the lease expiry time.
4938 */
4939static time_t
4940nfsrv_leaseexpiry(void)
4941{
4942
4943	if (nfsrv_stablefirst.nsf_eograce > NFSD_MONOSEC)
4944		return (NFSD_MONOSEC + 2 * (nfsrv_lease + NFSRV_LEASEDELTA));
4945	return (NFSD_MONOSEC + nfsrv_lease + NFSRV_LEASEDELTA);
4946}
4947
4948/*
4949 * Delay the delegation timeout as far as ls_delegtimelimit, as required.
4950 */
4951static void
4952nfsrv_delaydelegtimeout(struct nfsstate *stp)
4953{
4954
4955	if ((stp->ls_flags & NFSLCK_DELEGRECALL) == 0)
4956		return;
4957
4958	if ((stp->ls_delegtime + 15) > NFSD_MONOSEC &&
4959	    stp->ls_delegtime < stp->ls_delegtimelimit) {
4960		stp->ls_delegtime += nfsrv_lease;
4961		if (stp->ls_delegtime > stp->ls_delegtimelimit)
4962			stp->ls_delegtime = stp->ls_delegtimelimit;
4963	}
4964}
4965
4966/*
4967 * This function checks to see if there is any other state associated
4968 * with the openowner for this Open.
4969 * It returns 1 if there is no other state, 0 otherwise.
4970 */
4971static int
4972nfsrv_nootherstate(struct nfsstate *stp)
4973{
4974	struct nfsstate *tstp;
4975
4976	LIST_FOREACH(tstp, &stp->ls_openowner->ls_open, ls_list) {
4977		if (tstp != stp || !LIST_EMPTY(&tstp->ls_lock))
4978			return (0);
4979	}
4980	return (1);
4981}
4982
4983/*
4984 * Create a list of lock deltas (changes to local byte range locking
4985 * that can be rolled back using the list) and apply the changes via
4986 * nfsvno_advlock(). Optionally, lock the list. It is expected that either
4987 * the rollback or update function will be called after this.
4988 * It returns an error (and rolls back, as required), if any nfsvno_advlock()
4989 * call fails. If it returns an error, it will unlock the list.
4990 */
4991static int
4992nfsrv_locallock(vnode_t vp, struct nfslockfile *lfp, int flags,
4993    uint64_t first, uint64_t end, struct nfslockconflict *cfp, NFSPROC_T *p)
4994{
4995	struct nfslock *lop, *nlop;
4996	int error = 0;
4997
4998	/* Loop through the list of locks. */
4999	lop = LIST_FIRST(&lfp->lf_locallock);
5000	while (first < end && lop != NULL) {
5001		nlop = LIST_NEXT(lop, lo_lckowner);
5002		if (first >= lop->lo_end) {
5003			/* not there yet */
5004			lop = nlop;
5005		} else if (first < lop->lo_first) {
5006			/* new one starts before entry in list */
5007			if (end <= lop->lo_first) {
5008				/* no overlap between old and new */
5009				error = nfsrv_dolocal(vp, lfp, flags,
5010				    NFSLCK_UNLOCK, first, end, cfp, p);
5011				if (error != 0)
5012					break;
5013				first = end;
5014			} else {
5015				/* handle fragment overlapped with new one */
5016				error = nfsrv_dolocal(vp, lfp, flags,
5017				    NFSLCK_UNLOCK, first, lop->lo_first, cfp,
5018				    p);
5019				if (error != 0)
5020					break;
5021				first = lop->lo_first;
5022			}
5023		} else {
5024			/* new one overlaps this entry in list */
5025			if (end <= lop->lo_end) {
5026				/* overlaps all of new one */
5027				error = nfsrv_dolocal(vp, lfp, flags,
5028				    lop->lo_flags, first, end, cfp, p);
5029				if (error != 0)
5030					break;
5031				first = end;
5032			} else {
5033				/* handle fragment overlapped with new one */
5034				error = nfsrv_dolocal(vp, lfp, flags,
5035				    lop->lo_flags, first, lop->lo_end, cfp, p);
5036				if (error != 0)
5037					break;
5038				first = lop->lo_end;
5039				lop = nlop;
5040			}
5041		}
5042	}
5043	if (first < end && error == 0)
5044		/* handle fragment past end of list */
5045		error = nfsrv_dolocal(vp, lfp, flags, NFSLCK_UNLOCK, first,
5046		    end, cfp, p);
5047
5048	NFSEXITCODE(error);
5049	return (error);
5050}
5051
5052/*
5053 * Local lock unlock. Unlock all byte ranges that are no longer locked
5054 * by NFSv4. To do this, unlock any subranges of first-->end that
5055 * do not overlap with the byte ranges of any lock in the lfp->lf_lock
5056 * list. This list has all locks for the file held by other
5057 * <clientid, lockowner> tuples. The list is ordered by increasing
5058 * lo_first value, but may have entries that overlap each other, for
5059 * the case of read locks.
5060 */
5061static void
5062nfsrv_localunlock(vnode_t vp, struct nfslockfile *lfp, uint64_t init_first,
5063    uint64_t init_end, NFSPROC_T *p)
5064{
5065	struct nfslock *lop;
5066	uint64_t first, end, prevfirst;
5067
5068	first = init_first;
5069	end = init_end;
5070	while (first < init_end) {
5071		/* Loop through all nfs locks, adjusting first and end */
5072		prevfirst = 0;
5073		LIST_FOREACH(lop, &lfp->lf_lock, lo_lckfile) {
5074			KASSERT(prevfirst <= lop->lo_first,
5075			    ("nfsv4 locks out of order"));
5076			KASSERT(lop->lo_first < lop->lo_end,
5077			    ("nfsv4 bogus lock"));
5078			prevfirst = lop->lo_first;
5079			if (first >= lop->lo_first &&
5080			    first < lop->lo_end)
5081				/*
5082				 * Overlaps with initial part, so trim
5083				 * off that initial part by moving first past
5084				 * it.
5085				 */
5086				first = lop->lo_end;
5087			else if (end > lop->lo_first &&
5088			    lop->lo_first > first) {
5089				/*
5090				 * This lock defines the end of the
5091				 * segment to unlock, so set end to the
5092				 * start of it and break out of the loop.
5093				 */
5094				end = lop->lo_first;
5095				break;
5096			}
5097			if (first >= end)
5098				/*
5099				 * There is no segment left to do, so
5100				 * break out of this loop and then exit
5101				 * the outer while() since first will be set
5102				 * to end, which must equal init_end here.
5103				 */
5104				break;
5105		}
5106		if (first < end) {
5107			/* Unlock this segment */
5108			(void) nfsrv_dolocal(vp, lfp, NFSLCK_UNLOCK,
5109			    NFSLCK_READ, first, end, NULL, p);
5110			nfsrv_locallock_commit(lfp, NFSLCK_UNLOCK,
5111			    first, end);
5112		}
5113		/*
5114		 * Now move past this segment and look for any further
5115		 * segment in the range, if there is one.
5116		 */
5117		first = end;
5118		end = init_end;
5119	}
5120}
5121
5122/*
5123 * Do the local lock operation and update the rollback list, as required.
5124 * Perform the rollback and return the error if nfsvno_advlock() fails.
5125 */
5126static int
5127nfsrv_dolocal(vnode_t vp, struct nfslockfile *lfp, int flags, int oldflags,
5128    uint64_t first, uint64_t end, struct nfslockconflict *cfp, NFSPROC_T *p)
5129{
5130	struct nfsrollback *rlp;
5131	int error = 0, ltype, oldltype;
5132
5133	if (flags & NFSLCK_WRITE)
5134		ltype = F_WRLCK;
5135	else if (flags & NFSLCK_READ)
5136		ltype = F_RDLCK;
5137	else
5138		ltype = F_UNLCK;
5139	if (oldflags & NFSLCK_WRITE)
5140		oldltype = F_WRLCK;
5141	else if (oldflags & NFSLCK_READ)
5142		oldltype = F_RDLCK;
5143	else
5144		oldltype = F_UNLCK;
5145	if (ltype == oldltype || (oldltype == F_WRLCK && ltype == F_RDLCK))
5146		/* nothing to do */
5147		goto out;
5148	error = nfsvno_advlock(vp, ltype, first, end, p);
5149	if (error != 0) {
5150		if (cfp != NULL) {
5151			cfp->cl_clientid.lval[0] = 0;
5152			cfp->cl_clientid.lval[1] = 0;
5153			cfp->cl_first = 0;
5154			cfp->cl_end = NFS64BITSSET;
5155			cfp->cl_flags = NFSLCK_WRITE;
5156			cfp->cl_ownerlen = 5;
5157			NFSBCOPY("LOCAL", cfp->cl_owner, 5);
5158		}
5159		nfsrv_locallock_rollback(vp, lfp, p);
5160	} else if (ltype != F_UNLCK) {
5161		rlp = malloc(sizeof (struct nfsrollback), M_NFSDROLLBACK,
5162		    M_WAITOK);
5163		rlp->rlck_first = first;
5164		rlp->rlck_end = end;
5165		rlp->rlck_type = oldltype;
5166		LIST_INSERT_HEAD(&lfp->lf_rollback, rlp, rlck_list);
5167	}
5168
5169out:
5170	NFSEXITCODE(error);
5171	return (error);
5172}
5173
5174/*
5175 * Roll back local lock changes and free up the rollback list.
5176 */
5177static void
5178nfsrv_locallock_rollback(vnode_t vp, struct nfslockfile *lfp, NFSPROC_T *p)
5179{
5180	struct nfsrollback *rlp, *nrlp;
5181
5182	LIST_FOREACH_SAFE(rlp, &lfp->lf_rollback, rlck_list, nrlp) {
5183		(void) nfsvno_advlock(vp, rlp->rlck_type, rlp->rlck_first,
5184		    rlp->rlck_end, p);
5185		free(rlp, M_NFSDROLLBACK);
5186	}
5187	LIST_INIT(&lfp->lf_rollback);
5188}
5189
5190/*
5191 * Update local lock list and delete rollback list (ie now committed to the
5192 * local locks). Most of the work is done by the internal function.
5193 */
5194static void
5195nfsrv_locallock_commit(struct nfslockfile *lfp, int flags, uint64_t first,
5196    uint64_t end)
5197{
5198	struct nfsrollback *rlp, *nrlp;
5199	struct nfslock *new_lop, *other_lop;
5200
5201	new_lop = malloc(sizeof (struct nfslock), M_NFSDLOCK, M_WAITOK);
5202	if (flags & (NFSLCK_READ | NFSLCK_WRITE))
5203		other_lop = malloc(sizeof (struct nfslock), M_NFSDLOCK,
5204		    M_WAITOK);
5205	else
5206		other_lop = NULL;
5207	new_lop->lo_flags = flags;
5208	new_lop->lo_first = first;
5209	new_lop->lo_end = end;
5210	nfsrv_updatelock(NULL, &new_lop, &other_lop, lfp);
5211	if (new_lop != NULL)
5212		free(new_lop, M_NFSDLOCK);
5213	if (other_lop != NULL)
5214		free(other_lop, M_NFSDLOCK);
5215
5216	/* and get rid of the rollback list */
5217	LIST_FOREACH_SAFE(rlp, &lfp->lf_rollback, rlck_list, nrlp)
5218		free(rlp, M_NFSDROLLBACK);
5219	LIST_INIT(&lfp->lf_rollback);
5220}
5221
5222/*
5223 * Lock the struct nfslockfile for local lock updating.
5224 */
5225static void
5226nfsrv_locklf(struct nfslockfile *lfp)
5227{
5228	int gotlock;
5229
5230	/* lf_usecount ensures *lfp won't be free'd */
5231	lfp->lf_usecount++;
5232	do {
5233		gotlock = nfsv4_lock(&lfp->lf_locallock_lck, 1, NULL,
5234		    NFSSTATEMUTEXPTR, NULL);
5235	} while (gotlock == 0);
5236	lfp->lf_usecount--;
5237}
5238
5239/*
5240 * Unlock the struct nfslockfile after local lock updating.
5241 */
5242static void
5243nfsrv_unlocklf(struct nfslockfile *lfp)
5244{
5245
5246	nfsv4_unlock(&lfp->lf_locallock_lck, 0);
5247}
5248
5249/*
5250 * Clear out all state for the NFSv4 server.
5251 * Must be called by a thread that can sleep when no nfsds are running.
5252 */
5253void
5254nfsrv_throwawayallstate(NFSPROC_T *p)
5255{
5256	struct nfsclient *clp, *nclp;
5257	struct nfslockfile *lfp, *nlfp;
5258	int i;
5259
5260	/*
5261	 * For each client, clean out the state and then free the structure.
5262	 */
5263	for (i = 0; i < NFSCLIENTHASHSIZE; i++) {
5264		LIST_FOREACH_SAFE(clp, &nfsclienthash[i], lc_hash, nclp) {
5265			nfsrv_cleanclient(clp, p);
5266			nfsrv_freedeleglist(&clp->lc_deleg);
5267			nfsrv_freedeleglist(&clp->lc_olddeleg);
5268			free(clp, M_NFSDCLIENT);
5269		}
5270	}
5271
5272	/*
5273	 * Also, free up any remaining lock file structures.
5274	 */
5275	for (i = 0; i < NFSLOCKHASHSIZE; i++) {
5276		LIST_FOREACH_SAFE(lfp, &nfslockhash[i], lf_hash, nlfp) {
5277			printf("nfsd unload: fnd a lock file struct\n");
5278			nfsrv_freenfslockfile(lfp);
5279		}
5280	}
5281}
5282
5283