1/*-
2 * Copyright (c) 1989, 1993
3 *	The Regents of the University of California.  All rights reserved.
4 *
5 * This code is derived from software contributed to Berkeley by
6 * Rick Macklem at The University of Guelph.
7 *
8 * Redistribution and use in source and binary forms, with or without
9 * modification, are permitted provided that the following conditions
10 * are met:
11 * 1. Redistributions of source code must retain the above copyright
12 *    notice, this list of conditions and the following disclaimer.
13 * 2. Redistributions in binary form must reproduce the above copyright
14 *    notice, this list of conditions and the following disclaimer in the
15 *    documentation and/or other materials provided with the distribution.
16 * 4. Neither the name of the University nor the names of its contributors
17 *    may be used to endorse or promote products derived from this software
18 *    without specific prior written permission.
19 *
20 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
21 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
22 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
23 * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
24 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
25 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
26 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
27 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
28 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
29 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
30 * SUCH DAMAGE.
31 *
32 */
33
34#include <sys/cdefs.h>
35__FBSDID("$FreeBSD: stable/11/sys/fs/nfsclient/nfs_clrpcops.c 361236 2020-05-19 01:43:00Z freqlabs $");
36
37/*
38 * Rpc op calls, generally called from the vnode op calls or through the
39 * buffer cache, for NFS v2, 3 and 4.
40 * These do not normally make any changes to vnode arguments or use
41 * structures that might change between the VFS variants. The returned
42 * arguments are all at the end, after the NFSPROC_T *p one.
43 */
44
45#include "opt_inet6.h"
46
47#include <fs/nfs/nfsport.h>
48#include <sys/sysctl.h>
49
50SYSCTL_DECL(_vfs_nfs);
51
52static int	nfsignore_eexist = 0;
53SYSCTL_INT(_vfs_nfs, OID_AUTO, ignore_eexist, CTLFLAG_RW,
54    &nfsignore_eexist, 0, "NFS ignore EEXIST replies for mkdir/symlink");
55
56/*
57 * Global variables
58 */
59extern int nfs_numnfscbd;
60extern struct timeval nfsboottime;
61extern u_int32_t newnfs_false, newnfs_true;
62extern nfstype nfsv34_type[9];
63extern int nfsrv_useacl;
64extern char nfsv4_callbackaddr[INET6_ADDRSTRLEN];
65extern int nfscl_debuglevel;
66NFSCLSTATEMUTEX;
67int nfstest_outofseq = 0;
68int nfscl_assumeposixlocks = 1;
69int nfscl_enablecallb = 0;
70short nfsv4_cbport = NFSV4_CBPORT;
71int nfstest_openallsetattr = 0;
72
73#define	DIRHDSIZ	(sizeof (struct dirent) - (MAXNAMLEN + 1))
74
75/*
76 * nfscl_getsameserver() can return one of three values:
77 * NFSDSP_USETHISSESSION - Use this session for the DS.
78 * NFSDSP_SEQTHISSESSION - Use the nfsclds_sequence field of this dsp for new
79 *     session.
80 * NFSDSP_NOTFOUND - No matching server was found.
81 */
82enum nfsclds_state {
83	NFSDSP_USETHISSESSION = 0,
84	NFSDSP_SEQTHISSESSION = 1,
85	NFSDSP_NOTFOUND = 2,
86};
87
88static int nfsrpc_setattrrpc(vnode_t , struct vattr *, nfsv4stateid_t *,
89    struct ucred *, NFSPROC_T *, struct nfsvattr *, int *, void *);
90static int nfsrpc_readrpc(vnode_t , struct uio *, struct ucred *,
91    nfsv4stateid_t *, NFSPROC_T *, struct nfsvattr *, int *, void *);
92static int nfsrpc_writerpc(vnode_t , struct uio *, int *, int *,
93    struct ucred *, nfsv4stateid_t *, NFSPROC_T *, struct nfsvattr *, int *,
94    void *);
95static int nfsrpc_createv23(vnode_t , char *, int, struct vattr *,
96    nfsquad_t, int, struct ucred *, NFSPROC_T *, struct nfsvattr *,
97    struct nfsvattr *, struct nfsfh **, int *, int *, void *);
98static int nfsrpc_createv4(vnode_t , char *, int, struct vattr *,
99    nfsquad_t, int, struct nfsclowner *, struct nfscldeleg **, struct ucred *,
100    NFSPROC_T *, struct nfsvattr *, struct nfsvattr *, struct nfsfh **, int *,
101    int *, void *, int *);
102static int nfsrpc_locku(struct nfsrv_descript *, struct nfsmount *,
103    struct nfscllockowner *, u_int64_t, u_int64_t,
104    u_int32_t, struct ucred *, NFSPROC_T *, int);
105static int nfsrpc_setaclrpc(vnode_t, struct ucred *, NFSPROC_T *,
106    struct acl *, nfsv4stateid_t *, void *);
107static int nfsrpc_getlayout(struct nfsmount *, vnode_t, struct nfsfh *, int,
108    uint32_t *, nfsv4stateid_t *, uint64_t, struct nfscllayout **,
109    struct ucred *, NFSPROC_T *);
110static int nfsrpc_fillsa(struct nfsmount *, struct sockaddr_storage *,
111    struct nfsclds **, NFSPROC_T *);
112static void nfscl_initsessionslots(struct nfsclsession *);
113static int nfscl_doflayoutio(vnode_t, struct uio *, int *, int *, int *,
114    nfsv4stateid_t *, int, struct nfscldevinfo *, struct nfscllayout *,
115    struct nfsclflayout *, uint64_t, uint64_t, int, struct ucred *,
116    NFSPROC_T *);
117static int nfsrpc_readds(vnode_t, struct uio *, nfsv4stateid_t *, int *,
118    struct nfsclds *, uint64_t, int, struct nfsfh *, struct ucred *,
119    NFSPROC_T *);
120static int nfsrpc_writeds(vnode_t, struct uio *, int *, int *,
121    nfsv4stateid_t *, struct nfsclds *, uint64_t, int,
122    struct nfsfh *, int, struct ucred *, NFSPROC_T *);
123static enum nfsclds_state nfscl_getsameserver(struct nfsmount *,
124    struct nfsclds *, struct nfsclds **);
125static int nfsrpc_commitds(vnode_t, uint64_t, int, struct nfsclds *,
126    struct nfsfh *, struct ucred *, NFSPROC_T *);
127static void nfsrv_setuplayoutget(struct nfsrv_descript *, int, uint64_t,
128    uint64_t, uint64_t, nfsv4stateid_t *, int, int);
129static int nfsrv_parselayoutget(struct nfsrv_descript *, nfsv4stateid_t *,
130    int *, struct nfsclflayouthead *);
131static int nfsrpc_getopenlayout(struct nfsmount *, vnode_t, u_int8_t *,
132    int, uint8_t *, int, uint32_t, struct nfsclopen *, uint8_t *, int,
133    struct nfscldeleg **, struct ucred *, NFSPROC_T *);
134static int nfsrpc_getcreatelayout(vnode_t, char *, int, struct vattr *,
135    nfsquad_t, int, struct nfsclowner *, struct nfscldeleg **,
136    struct ucred *, NFSPROC_T *, struct nfsvattr *, struct nfsvattr *,
137    struct nfsfh **, int *, int *, void *, int *);
138static int nfsrpc_openlayoutrpc(struct nfsmount *, vnode_t, u_int8_t *,
139    int, uint8_t *, int, uint32_t, struct nfsclopen *, uint8_t *, int,
140    struct nfscldeleg **, nfsv4stateid_t *, int, int, int *,
141    struct nfsclflayouthead *, int *, struct ucred *, NFSPROC_T *);
142static int nfsrpc_createlayout(vnode_t, char *, int, struct vattr *,
143    nfsquad_t, int, struct nfsclowner *, struct nfscldeleg **,
144    struct ucred *, NFSPROC_T *, struct nfsvattr *, struct nfsvattr *,
145    struct nfsfh **, int *, int *, void *, int *, nfsv4stateid_t *,
146    int, int, int *, struct nfsclflayouthead *, int *);
147static int nfsrpc_layoutgetres(struct nfsmount *, vnode_t, uint8_t *,
148    int, nfsv4stateid_t *, int, uint32_t *, struct nfscllayout **,
149    struct nfsclflayouthead *, int, int *, struct ucred *, NFSPROC_T *);
150
151/*
152 * nfs null call from vfs.
153 */
154int
155nfsrpc_null(vnode_t vp, struct ucred *cred, NFSPROC_T *p)
156{
157	int error;
158	struct nfsrv_descript nfsd, *nd = &nfsd;
159
160	NFSCL_REQSTART(nd, NFSPROC_NULL, vp);
161	error = nfscl_request(nd, vp, p, cred, NULL);
162	if (nd->nd_repstat && !error)
163		error = nd->nd_repstat;
164	mbuf_freem(nd->nd_mrep);
165	return (error);
166}
167
168/*
169 * nfs access rpc op.
170 * For nfs version 3 and 4, use the access rpc to check accessibility. If file
171 * modes are changed on the server, accesses might still fail later.
172 */
173int
174nfsrpc_access(vnode_t vp, int acmode, struct ucred *cred,
175    NFSPROC_T *p, struct nfsvattr *nap, int *attrflagp)
176{
177	int error;
178	u_int32_t mode, rmode;
179
180	if (acmode & VREAD)
181		mode = NFSACCESS_READ;
182	else
183		mode = 0;
184	if (vnode_vtype(vp) == VDIR) {
185		if (acmode & VWRITE)
186			mode |= (NFSACCESS_MODIFY | NFSACCESS_EXTEND |
187				 NFSACCESS_DELETE);
188		if (acmode & VEXEC)
189			mode |= NFSACCESS_LOOKUP;
190	} else {
191		if (acmode & VWRITE)
192			mode |= (NFSACCESS_MODIFY | NFSACCESS_EXTEND);
193		if (acmode & VEXEC)
194			mode |= NFSACCESS_EXECUTE;
195	}
196
197	/*
198	 * Now, just call nfsrpc_accessrpc() to do the actual RPC.
199	 */
200	error = nfsrpc_accessrpc(vp, mode, cred, p, nap, attrflagp, &rmode,
201	    NULL);
202
203	/*
204	 * The NFS V3 spec does not clarify whether or not
205	 * the returned access bits can be a superset of
206	 * the ones requested, so...
207	 */
208	if (!error && (rmode & mode) != mode)
209		error = EACCES;
210	return (error);
211}
212
213/*
214 * The actual rpc, separated out for Darwin.
215 */
216int
217nfsrpc_accessrpc(vnode_t vp, u_int32_t mode, struct ucred *cred,
218    NFSPROC_T *p, struct nfsvattr *nap, int *attrflagp, u_int32_t *rmodep,
219    void *stuff)
220{
221	u_int32_t *tl;
222	u_int32_t supported, rmode;
223	int error;
224	struct nfsrv_descript nfsd, *nd = &nfsd;
225	nfsattrbit_t attrbits;
226
227	*attrflagp = 0;
228	supported = mode;
229	NFSCL_REQSTART(nd, NFSPROC_ACCESS, vp);
230	NFSM_BUILD(tl, u_int32_t *, NFSX_UNSIGNED);
231	*tl = txdr_unsigned(mode);
232	if (nd->nd_flag & ND_NFSV4) {
233		/*
234		 * And do a Getattr op.
235		 */
236		NFSM_BUILD(tl, u_int32_t *, NFSX_UNSIGNED);
237		*tl = txdr_unsigned(NFSV4OP_GETATTR);
238		NFSGETATTR_ATTRBIT(&attrbits);
239		(void) nfsrv_putattrbit(nd, &attrbits);
240	}
241	error = nfscl_request(nd, vp, p, cred, stuff);
242	if (error)
243		return (error);
244	if (nd->nd_flag & ND_NFSV3) {
245		error = nfscl_postop_attr(nd, nap, attrflagp, stuff);
246		if (error)
247			goto nfsmout;
248	}
249	if (!nd->nd_repstat) {
250		if (nd->nd_flag & ND_NFSV4) {
251			NFSM_DISSECT(tl, u_int32_t *, 2 * NFSX_UNSIGNED);
252			supported = fxdr_unsigned(u_int32_t, *tl++);
253		} else {
254			NFSM_DISSECT(tl, u_int32_t *, NFSX_UNSIGNED);
255		}
256		rmode = fxdr_unsigned(u_int32_t, *tl);
257		if (nd->nd_flag & ND_NFSV4)
258			error = nfscl_postop_attr(nd, nap, attrflagp, stuff);
259
260		/*
261		 * It's not obvious what should be done about
262		 * unsupported access modes. For now, be paranoid
263		 * and clear the unsupported ones.
264		 */
265		rmode &= supported;
266		*rmodep = rmode;
267	} else
268		error = nd->nd_repstat;
269nfsmout:
270	mbuf_freem(nd->nd_mrep);
271	return (error);
272}
273
274/*
275 * nfs open rpc
276 */
277int
278nfsrpc_open(vnode_t vp, int amode, struct ucred *cred, NFSPROC_T *p)
279{
280	struct nfsclopen *op;
281	struct nfscldeleg *dp;
282	struct nfsfh *nfhp;
283	struct nfsnode *np = VTONFS(vp);
284	struct nfsmount *nmp = VFSTONFS(vnode_mount(vp));
285	u_int32_t mode, clidrev;
286	int ret, newone, error, expireret = 0, retrycnt;
287
288	/*
289	 * For NFSv4, Open Ops are only done on Regular Files.
290	 */
291	if (vnode_vtype(vp) != VREG)
292		return (0);
293	mode = 0;
294	if (amode & FREAD)
295		mode |= NFSV4OPEN_ACCESSREAD;
296	if (amode & FWRITE)
297		mode |= NFSV4OPEN_ACCESSWRITE;
298	nfhp = np->n_fhp;
299
300	retrycnt = 0;
301#ifdef notdef
302{ char name[100]; int namel;
303namel = (np->n_v4->n4_namelen < 100) ? np->n_v4->n4_namelen : 99;
304bcopy(NFS4NODENAME(np->n_v4), name, namel);
305name[namel] = '\0';
306printf("rpcopen p=0x%x name=%s",p->p_pid,name);
307if (nfhp->nfh_len > 0) printf(" fh=0x%x\n",nfhp->nfh_fh[12]);
308else printf(" fhl=0\n");
309}
310#endif
311	do {
312	    dp = NULL;
313	    error = nfscl_open(vp, nfhp->nfh_fh, nfhp->nfh_len, mode, 1,
314		cred, p, NULL, &op, &newone, &ret, 1);
315	    if (error) {
316		return (error);
317	    }
318	    if (nmp->nm_clp != NULL)
319		clidrev = nmp->nm_clp->nfsc_clientidrev;
320	    else
321		clidrev = 0;
322	    if (ret == NFSCLOPEN_DOOPEN) {
323		if (np->n_v4 != NULL) {
324			/*
325			 * For the first attempt, try and get a layout, if
326			 * pNFS is enabled for the mount.
327			 */
328			if (!NFSHASPNFS(nmp) || nfscl_enablecallb == 0 ||
329			    nfs_numnfscbd == 0 ||
330			    (np->n_flag & NNOLAYOUT) != 0 || retrycnt > 0)
331				error = nfsrpc_openrpc(nmp, vp,
332				    np->n_v4->n4_data,
333				    np->n_v4->n4_fhlen, np->n_fhp->nfh_fh,
334				    np->n_fhp->nfh_len, mode, op,
335				    NFS4NODENAME(np->n_v4),
336				    np->n_v4->n4_namelen,
337				    &dp, 0, 0x0, cred, p, 0, 0);
338			else
339				error = nfsrpc_getopenlayout(nmp, vp,
340				    np->n_v4->n4_data,
341				    np->n_v4->n4_fhlen, np->n_fhp->nfh_fh,
342				    np->n_fhp->nfh_len, mode, op,
343				    NFS4NODENAME(np->n_v4),
344				    np->n_v4->n4_namelen, &dp, cred, p);
345			if (dp != NULL) {
346#ifdef APPLE
347				OSBitAndAtomic((int32_t)~NDELEGMOD, (UInt32 *)&np->n_flag);
348#else
349				NFSLOCKNODE(np);
350				np->n_flag &= ~NDELEGMOD;
351				/*
352				 * Invalidate the attribute cache, so that
353				 * attributes that pre-date the issue of a
354				 * delegation are not cached, since the
355				 * cached attributes will remain valid while
356				 * the delegation is held.
357				 */
358				NFSINVALATTRCACHE(np);
359				NFSUNLOCKNODE(np);
360#endif
361				(void) nfscl_deleg(nmp->nm_mountp,
362				    op->nfso_own->nfsow_clp,
363				    nfhp->nfh_fh, nfhp->nfh_len, cred, p, &dp);
364			}
365		} else {
366			error = EIO;
367		}
368		newnfs_copyincred(cred, &op->nfso_cred);
369	    } else if (ret == NFSCLOPEN_SETCRED)
370		/*
371		 * This is a new local open on a delegation. It needs
372		 * to have credentials so that an open can be done
373		 * against the server during recovery.
374		 */
375		newnfs_copyincred(cred, &op->nfso_cred);
376
377	    /*
378	     * nfso_opencnt is the count of how many VOP_OPEN()s have
379	     * been done on this Open successfully and a VOP_CLOSE()
380	     * is expected for each of these.
381	     * If error is non-zero, don't increment it, since the Open
382	     * hasn't succeeded yet.
383	     */
384	    if (!error)
385		op->nfso_opencnt++;
386	    nfscl_openrelease(nmp, op, error, newone);
387	    if (error == NFSERR_GRACE || error == NFSERR_STALECLIENTID ||
388		error == NFSERR_STALEDONTRECOVER || error == NFSERR_DELAY ||
389		error == NFSERR_BADSESSION) {
390		(void) nfs_catnap(PZERO, error, "nfs_open");
391	    } else if ((error == NFSERR_EXPIRED || error == NFSERR_BADSTATEID)
392		&& clidrev != 0) {
393		expireret = nfscl_hasexpired(nmp->nm_clp, clidrev, p);
394		retrycnt++;
395	    }
396	} while (error == NFSERR_GRACE || error == NFSERR_STALECLIENTID ||
397	    error == NFSERR_STALEDONTRECOVER || error == NFSERR_DELAY ||
398	    error == NFSERR_BADSESSION ||
399	    ((error == NFSERR_EXPIRED || error == NFSERR_BADSTATEID) &&
400	     expireret == 0 && clidrev != 0 && retrycnt < 4));
401	if (error && retrycnt >= 4)
402		error = EIO;
403	return (error);
404}
405
406/*
407 * the actual open rpc
408 */
409int
410nfsrpc_openrpc(struct nfsmount *nmp, vnode_t vp, u_int8_t *nfhp, int fhlen,
411    u_int8_t *newfhp, int newfhlen, u_int32_t mode, struct nfsclopen *op,
412    u_int8_t *name, int namelen, struct nfscldeleg **dpp,
413    int reclaim, u_int32_t delegtype, struct ucred *cred, NFSPROC_T *p,
414    int syscred, int recursed)
415{
416	u_int32_t *tl;
417	struct nfsrv_descript nfsd, *nd = &nfsd;
418	struct nfscldeleg *dp, *ndp = NULL;
419	struct nfsvattr nfsva;
420	u_int32_t rflags, deleg;
421	nfsattrbit_t attrbits;
422	int error, ret, acesize, limitby;
423	struct nfsclsession *tsep;
424
425	dp = *dpp;
426	*dpp = NULL;
427	nfscl_reqstart(nd, NFSPROC_OPEN, nmp, nfhp, fhlen, NULL, NULL);
428	NFSM_BUILD(tl, u_int32_t *, 5 * NFSX_UNSIGNED);
429	*tl++ = txdr_unsigned(op->nfso_own->nfsow_seqid);
430	*tl++ = txdr_unsigned(mode & NFSV4OPEN_ACCESSBOTH);
431	*tl++ = txdr_unsigned((mode >> NFSLCK_SHIFT) & NFSV4OPEN_DENYBOTH);
432	tsep = nfsmnt_mdssession(nmp);
433	*tl++ = tsep->nfsess_clientid.lval[0];
434	*tl = tsep->nfsess_clientid.lval[1];
435	(void) nfsm_strtom(nd, op->nfso_own->nfsow_owner, NFSV4CL_LOCKNAMELEN);
436	NFSM_BUILD(tl, u_int32_t *, 2 * NFSX_UNSIGNED);
437	*tl++ = txdr_unsigned(NFSV4OPEN_NOCREATE);
438	if (reclaim) {
439		*tl = txdr_unsigned(NFSV4OPEN_CLAIMPREVIOUS);
440		NFSM_BUILD(tl, u_int32_t *, NFSX_UNSIGNED);
441		*tl = txdr_unsigned(delegtype);
442	} else {
443		if (dp != NULL) {
444			*tl = txdr_unsigned(NFSV4OPEN_CLAIMDELEGATECUR);
445			NFSM_BUILD(tl, u_int32_t *, NFSX_STATEID);
446			if (NFSHASNFSV4N(nmp))
447				*tl++ = 0;
448			else
449				*tl++ = dp->nfsdl_stateid.seqid;
450			*tl++ = dp->nfsdl_stateid.other[0];
451			*tl++ = dp->nfsdl_stateid.other[1];
452			*tl = dp->nfsdl_stateid.other[2];
453		} else {
454			*tl = txdr_unsigned(NFSV4OPEN_CLAIMNULL);
455		}
456		(void) nfsm_strtom(nd, name, namelen);
457	}
458	NFSM_BUILD(tl, u_int32_t *, NFSX_UNSIGNED);
459	*tl = txdr_unsigned(NFSV4OP_GETATTR);
460	NFSZERO_ATTRBIT(&attrbits);
461	NFSSETBIT_ATTRBIT(&attrbits, NFSATTRBIT_CHANGE);
462	NFSSETBIT_ATTRBIT(&attrbits, NFSATTRBIT_TIMEMODIFY);
463	(void) nfsrv_putattrbit(nd, &attrbits);
464	if (syscred)
465		nd->nd_flag |= ND_USEGSSNAME;
466	error = newnfs_request(nd, nmp, NULL, &nmp->nm_sockreq, vp, p, cred,
467	    NFS_PROG, NFS_VER4, NULL, 1, NULL, NULL);
468	if (error)
469		return (error);
470	NFSCL_INCRSEQID(op->nfso_own->nfsow_seqid, nd);
471	if (!nd->nd_repstat) {
472		NFSM_DISSECT(tl, u_int32_t *, NFSX_STATEID +
473		    6 * NFSX_UNSIGNED);
474		op->nfso_stateid.seqid = *tl++;
475		op->nfso_stateid.other[0] = *tl++;
476		op->nfso_stateid.other[1] = *tl++;
477		op->nfso_stateid.other[2] = *tl;
478		rflags = fxdr_unsigned(u_int32_t, *(tl + 6));
479		error = nfsrv_getattrbits(nd, &attrbits, NULL, NULL);
480		if (error)
481			goto nfsmout;
482		NFSM_DISSECT(tl, u_int32_t *, NFSX_UNSIGNED);
483		deleg = fxdr_unsigned(u_int32_t, *tl);
484		if (deleg == NFSV4OPEN_DELEGATEREAD ||
485		    deleg == NFSV4OPEN_DELEGATEWRITE) {
486			if (!(op->nfso_own->nfsow_clp->nfsc_flags &
487			      NFSCLFLAGS_FIRSTDELEG))
488				op->nfso_own->nfsow_clp->nfsc_flags |=
489				  (NFSCLFLAGS_FIRSTDELEG | NFSCLFLAGS_GOTDELEG);
490			MALLOC(ndp, struct nfscldeleg *,
491			    sizeof (struct nfscldeleg) + newfhlen,
492			    M_NFSCLDELEG, M_WAITOK);
493			LIST_INIT(&ndp->nfsdl_owner);
494			LIST_INIT(&ndp->nfsdl_lock);
495			ndp->nfsdl_clp = op->nfso_own->nfsow_clp;
496			ndp->nfsdl_fhlen = newfhlen;
497			NFSBCOPY(newfhp, ndp->nfsdl_fh, newfhlen);
498			newnfs_copyincred(cred, &ndp->nfsdl_cred);
499			nfscl_lockinit(&ndp->nfsdl_rwlock);
500			NFSM_DISSECT(tl, u_int32_t *, NFSX_STATEID +
501			    NFSX_UNSIGNED);
502			ndp->nfsdl_stateid.seqid = *tl++;
503			ndp->nfsdl_stateid.other[0] = *tl++;
504			ndp->nfsdl_stateid.other[1] = *tl++;
505			ndp->nfsdl_stateid.other[2] = *tl++;
506			ret = fxdr_unsigned(int, *tl);
507			if (deleg == NFSV4OPEN_DELEGATEWRITE) {
508				ndp->nfsdl_flags = NFSCLDL_WRITE;
509				/*
510				 * Indicates how much the file can grow.
511				 */
512				NFSM_DISSECT(tl, u_int32_t *,
513				    3 * NFSX_UNSIGNED);
514				limitby = fxdr_unsigned(int, *tl++);
515				switch (limitby) {
516				case NFSV4OPEN_LIMITSIZE:
517					ndp->nfsdl_sizelimit = fxdr_hyper(tl);
518					break;
519				case NFSV4OPEN_LIMITBLOCKS:
520					ndp->nfsdl_sizelimit =
521					    fxdr_unsigned(u_int64_t, *tl++);
522					ndp->nfsdl_sizelimit *=
523					    fxdr_unsigned(u_int64_t, *tl);
524					break;
525				default:
526					error = NFSERR_BADXDR;
527					goto nfsmout;
528				}
529			} else {
530				ndp->nfsdl_flags = NFSCLDL_READ;
531			}
532			if (ret)
533				ndp->nfsdl_flags |= NFSCLDL_RECALL;
534			error = nfsrv_dissectace(nd, &ndp->nfsdl_ace, &ret,
535			    &acesize, p);
536			if (error)
537				goto nfsmout;
538		} else if (deleg != NFSV4OPEN_DELEGATENONE) {
539			error = NFSERR_BADXDR;
540			goto nfsmout;
541		}
542		NFSM_DISSECT(tl, u_int32_t *, 2 * NFSX_UNSIGNED);
543		error = nfsv4_loadattr(nd, NULL, &nfsva, NULL,
544		    NULL, 0, NULL, NULL, NULL, NULL, NULL, 0,
545		    NULL, NULL, NULL, p, cred);
546		if (error)
547			goto nfsmout;
548		if (ndp != NULL) {
549			ndp->nfsdl_change = nfsva.na_filerev;
550			ndp->nfsdl_modtime = nfsva.na_mtime;
551			ndp->nfsdl_flags |= NFSCLDL_MODTIMESET;
552		}
553		if (!reclaim && (rflags & NFSV4OPEN_RESULTCONFIRM)) {
554		    do {
555			ret = nfsrpc_openconfirm(vp, newfhp, newfhlen, op,
556			    cred, p);
557			if (ret == NFSERR_DELAY)
558			    (void) nfs_catnap(PZERO, ret, "nfs_open");
559		    } while (ret == NFSERR_DELAY);
560		    error = ret;
561		}
562		if ((rflags & NFSV4OPEN_LOCKTYPEPOSIX) ||
563		    nfscl_assumeposixlocks)
564		    op->nfso_posixlock = 1;
565		else
566		    op->nfso_posixlock = 0;
567
568		/*
569		 * If the server is handing out delegations, but we didn't
570		 * get one because an OpenConfirm was required, try the
571		 * Open again, to get a delegation. This is a harmless no-op,
572		 * from a server's point of view.
573		 */
574		if (!reclaim && (rflags & NFSV4OPEN_RESULTCONFIRM) &&
575		    (op->nfso_own->nfsow_clp->nfsc_flags & NFSCLFLAGS_GOTDELEG)
576		    && !error && dp == NULL && ndp == NULL && !recursed) {
577		    do {
578			ret = nfsrpc_openrpc(nmp, vp, nfhp, fhlen, newfhp,
579			    newfhlen, mode, op, name, namelen, &ndp, 0, 0x0,
580			    cred, p, syscred, 1);
581			if (ret == NFSERR_DELAY)
582			    (void) nfs_catnap(PZERO, ret, "nfs_open2");
583		    } while (ret == NFSERR_DELAY);
584		    if (ret) {
585			if (ndp != NULL) {
586				FREE((caddr_t)ndp, M_NFSCLDELEG);
587				ndp = NULL;
588			}
589			if (ret == NFSERR_STALECLIENTID ||
590			    ret == NFSERR_STALEDONTRECOVER ||
591			    ret == NFSERR_BADSESSION)
592				error = ret;
593		    }
594		}
595	}
596	if (nd->nd_repstat != 0 && error == 0)
597		error = nd->nd_repstat;
598	if (error == NFSERR_STALECLIENTID)
599		nfscl_initiate_recovery(op->nfso_own->nfsow_clp);
600nfsmout:
601	if (!error)
602		*dpp = ndp;
603	else if (ndp != NULL)
604		FREE((caddr_t)ndp, M_NFSCLDELEG);
605	mbuf_freem(nd->nd_mrep);
606	return (error);
607}
608
609/*
610 * open downgrade rpc
611 */
612int
613nfsrpc_opendowngrade(vnode_t vp, u_int32_t mode, struct nfsclopen *op,
614    struct ucred *cred, NFSPROC_T *p)
615{
616	u_int32_t *tl;
617	struct nfsrv_descript nfsd, *nd = &nfsd;
618	int error;
619
620	NFSCL_REQSTART(nd, NFSPROC_OPENDOWNGRADE, vp);
621	NFSM_BUILD(tl, u_int32_t *, NFSX_STATEID + 3 * NFSX_UNSIGNED);
622	if (NFSHASNFSV4N(VFSTONFS(vnode_mount(vp))))
623		*tl++ = 0;
624	else
625		*tl++ = op->nfso_stateid.seqid;
626	*tl++ = op->nfso_stateid.other[0];
627	*tl++ = op->nfso_stateid.other[1];
628	*tl++ = op->nfso_stateid.other[2];
629	*tl++ = txdr_unsigned(op->nfso_own->nfsow_seqid);
630	*tl++ = txdr_unsigned(mode & NFSV4OPEN_ACCESSBOTH);
631	*tl = txdr_unsigned((mode >> NFSLCK_SHIFT) & NFSV4OPEN_DENYBOTH);
632	error = nfscl_request(nd, vp, p, cred, NULL);
633	if (error)
634		return (error);
635	NFSCL_INCRSEQID(op->nfso_own->nfsow_seqid, nd);
636	if (!nd->nd_repstat) {
637		NFSM_DISSECT(tl, u_int32_t *, NFSX_STATEID);
638		op->nfso_stateid.seqid = *tl++;
639		op->nfso_stateid.other[0] = *tl++;
640		op->nfso_stateid.other[1] = *tl++;
641		op->nfso_stateid.other[2] = *tl;
642	}
643	if (nd->nd_repstat && error == 0)
644		error = nd->nd_repstat;
645	if (error == NFSERR_STALESTATEID)
646		nfscl_initiate_recovery(op->nfso_own->nfsow_clp);
647nfsmout:
648	mbuf_freem(nd->nd_mrep);
649	return (error);
650}
651
652/*
653 * V4 Close operation.
654 */
655int
656nfsrpc_close(vnode_t vp, int doclose, NFSPROC_T *p)
657{
658	struct nfsclclient *clp;
659	int error;
660
661	if (vnode_vtype(vp) != VREG)
662		return (0);
663	if (doclose)
664		error = nfscl_doclose(vp, &clp, p);
665	else
666		error = nfscl_getclose(vp, &clp);
667	if (error)
668		return (error);
669
670	nfscl_clientrelease(clp);
671	return (0);
672}
673
674/*
675 * Close the open.
676 */
677void
678nfsrpc_doclose(struct nfsmount *nmp, struct nfsclopen *op, NFSPROC_T *p)
679{
680	struct nfsrv_descript nfsd, *nd = &nfsd;
681	struct nfscllockowner *lp, *nlp;
682	struct nfscllock *lop, *nlop;
683	struct ucred *tcred;
684	u_int64_t off = 0, len = 0;
685	u_int32_t type = NFSV4LOCKT_READ;
686	int error, do_unlock, trycnt;
687
688	tcred = newnfs_getcred();
689	newnfs_copycred(&op->nfso_cred, tcred);
690	/*
691	 * (Theoretically this could be done in the same
692	 *  compound as the close, but having multiple
693	 *  sequenced Ops in the same compound might be
694	 *  too scary for some servers.)
695	 */
696	if (op->nfso_posixlock) {
697		off = 0;
698		len = NFS64BITSSET;
699		type = NFSV4LOCKT_READ;
700	}
701
702	/*
703	 * Since this function is only called from VOP_INACTIVE(), no
704	 * other thread will be manipulating this Open. As such, the
705	 * lock lists are not being changed by other threads, so it should
706	 * be safe to do this without locking.
707	 */
708	LIST_FOREACH(lp, &op->nfso_lock, nfsl_list) {
709		do_unlock = 1;
710		LIST_FOREACH_SAFE(lop, &lp->nfsl_lock, nfslo_list, nlop) {
711			if (op->nfso_posixlock == 0) {
712				off = lop->nfslo_first;
713				len = lop->nfslo_end - lop->nfslo_first;
714				if (lop->nfslo_type == F_WRLCK)
715					type = NFSV4LOCKT_WRITE;
716				else
717					type = NFSV4LOCKT_READ;
718			}
719			if (do_unlock) {
720				trycnt = 0;
721				do {
722					error = nfsrpc_locku(nd, nmp, lp, off,
723					    len, type, tcred, p, 0);
724					if ((nd->nd_repstat == NFSERR_GRACE ||
725					    nd->nd_repstat == NFSERR_DELAY) &&
726					    error == 0)
727						(void) nfs_catnap(PZERO,
728						    (int)nd->nd_repstat,
729						    "nfs_close");
730				} while ((nd->nd_repstat == NFSERR_GRACE ||
731				    nd->nd_repstat == NFSERR_DELAY) &&
732				    error == 0 && trycnt++ < 5);
733				if (op->nfso_posixlock)
734					do_unlock = 0;
735			}
736			nfscl_freelock(lop, 0);
737		}
738		/*
739		 * Do a ReleaseLockOwner.
740		 * The lock owner name nfsl_owner may be used by other opens for
741		 * other files but the lock_owner4 name that nfsrpc_rellockown()
742		 * puts on the wire has the file handle for this file appended
743		 * to it, so it can be done now.
744		 */
745		(void)nfsrpc_rellockown(nmp, lp, lp->nfsl_open->nfso_fh,
746		    lp->nfsl_open->nfso_fhlen, tcred, p);
747	}
748
749	/*
750	 * There could be other Opens for different files on the same
751	 * OpenOwner, so locking is required.
752	 */
753	NFSLOCKCLSTATE();
754	nfscl_lockexcl(&op->nfso_own->nfsow_rwlock, NFSCLSTATEMUTEXPTR);
755	NFSUNLOCKCLSTATE();
756	do {
757		error = nfscl_tryclose(op, tcred, nmp, p);
758		if (error == NFSERR_GRACE)
759			(void) nfs_catnap(PZERO, error, "nfs_close");
760	} while (error == NFSERR_GRACE);
761	NFSLOCKCLSTATE();
762	nfscl_lockunlock(&op->nfso_own->nfsow_rwlock);
763
764	LIST_FOREACH_SAFE(lp, &op->nfso_lock, nfsl_list, nlp)
765		nfscl_freelockowner(lp, 0);
766	nfscl_freeopen(op, 0);
767	NFSUNLOCKCLSTATE();
768	NFSFREECRED(tcred);
769}
770
771/*
772 * The actual Close RPC.
773 */
774int
775nfsrpc_closerpc(struct nfsrv_descript *nd, struct nfsmount *nmp,
776    struct nfsclopen *op, struct ucred *cred, NFSPROC_T *p,
777    int syscred)
778{
779	u_int32_t *tl;
780	int error;
781
782	nfscl_reqstart(nd, NFSPROC_CLOSE, nmp, op->nfso_fh,
783	    op->nfso_fhlen, NULL, NULL);
784	NFSM_BUILD(tl, u_int32_t *, NFSX_UNSIGNED + NFSX_STATEID);
785	*tl++ = txdr_unsigned(op->nfso_own->nfsow_seqid);
786	if (NFSHASNFSV4N(nmp))
787		*tl++ = 0;
788	else
789		*tl++ = op->nfso_stateid.seqid;
790	*tl++ = op->nfso_stateid.other[0];
791	*tl++ = op->nfso_stateid.other[1];
792	*tl = op->nfso_stateid.other[2];
793	if (syscred)
794		nd->nd_flag |= ND_USEGSSNAME;
795	error = newnfs_request(nd, nmp, NULL, &nmp->nm_sockreq, NULL, p, cred,
796	    NFS_PROG, NFS_VER4, NULL, 1, NULL, NULL);
797	if (error)
798		return (error);
799	NFSCL_INCRSEQID(op->nfso_own->nfsow_seqid, nd);
800	if (nd->nd_repstat == 0)
801		NFSM_DISSECT(tl, u_int32_t *, NFSX_STATEID);
802	error = nd->nd_repstat;
803	if (error == NFSERR_STALESTATEID)
804		nfscl_initiate_recovery(op->nfso_own->nfsow_clp);
805nfsmout:
806	mbuf_freem(nd->nd_mrep);
807	return (error);
808}
809
810/*
811 * V4 Open Confirm RPC.
812 */
813int
814nfsrpc_openconfirm(vnode_t vp, u_int8_t *nfhp, int fhlen,
815    struct nfsclopen *op, struct ucred *cred, NFSPROC_T *p)
816{
817	u_int32_t *tl;
818	struct nfsrv_descript nfsd, *nd = &nfsd;
819	struct nfsmount *nmp;
820	int error;
821
822	nmp = VFSTONFS(vnode_mount(vp));
823	if (NFSHASNFSV4N(nmp))
824		return (0);		/* No confirmation for NFSv4.1. */
825	nfscl_reqstart(nd, NFSPROC_OPENCONFIRM, nmp, nfhp, fhlen, NULL, NULL);
826	NFSM_BUILD(tl, u_int32_t *, NFSX_UNSIGNED + NFSX_STATEID);
827	*tl++ = op->nfso_stateid.seqid;
828	*tl++ = op->nfso_stateid.other[0];
829	*tl++ = op->nfso_stateid.other[1];
830	*tl++ = op->nfso_stateid.other[2];
831	*tl = txdr_unsigned(op->nfso_own->nfsow_seqid);
832	error = nfscl_request(nd, vp, p, cred, NULL);
833	if (error)
834		return (error);
835	NFSCL_INCRSEQID(op->nfso_own->nfsow_seqid, nd);
836	if (!nd->nd_repstat) {
837		NFSM_DISSECT(tl, u_int32_t *, NFSX_STATEID);
838		op->nfso_stateid.seqid = *tl++;
839		op->nfso_stateid.other[0] = *tl++;
840		op->nfso_stateid.other[1] = *tl++;
841		op->nfso_stateid.other[2] = *tl;
842	}
843	error = nd->nd_repstat;
844	if (error == NFSERR_STALESTATEID)
845		nfscl_initiate_recovery(op->nfso_own->nfsow_clp);
846nfsmout:
847	mbuf_freem(nd->nd_mrep);
848	return (error);
849}
850
851/*
852 * Do the setclientid and setclientid confirm RPCs. Called from nfs_statfs()
853 * when a mount has just occurred and when the server replies NFSERR_EXPIRED.
854 */
855int
856nfsrpc_setclient(struct nfsmount *nmp, struct nfsclclient *clp, int reclaim,
857    struct ucred *cred, NFSPROC_T *p)
858{
859	u_int32_t *tl;
860	struct nfsrv_descript nfsd;
861	struct nfsrv_descript *nd = &nfsd;
862	nfsattrbit_t attrbits;
863	u_int8_t *cp = NULL, *cp2, addr[INET6_ADDRSTRLEN + 9];
864	u_short port;
865	int error, isinet6 = 0, callblen;
866	nfsquad_t confirm;
867	u_int32_t lease;
868	static u_int32_t rev = 0;
869	struct nfsclds *dsp;
870	struct in6_addr a6;
871	struct nfsclsession *tsep;
872
873	if (nfsboottime.tv_sec == 0)
874		NFSSETBOOTTIME(nfsboottime);
875	clp->nfsc_rev = rev++;
876	if (NFSHASNFSV4N(nmp)) {
877		/*
878		 * Either there was no previous session or the
879		 * previous session has failed, so...
880		 * do an ExchangeID followed by the CreateSession.
881		 */
882		error = nfsrpc_exchangeid(nmp, clp, &nmp->nm_sockreq,
883		    NFSV4EXCH_USEPNFSMDS | NFSV4EXCH_USENONPNFS, &dsp, cred, p);
884		NFSCL_DEBUG(1, "aft exch=%d\n", error);
885		if (error == 0)
886			error = nfsrpc_createsession(nmp, &dsp->nfsclds_sess,
887			    &nmp->nm_sockreq,
888			    dsp->nfsclds_sess.nfsess_sequenceid, 1, cred, p);
889		if (error == 0) {
890			NFSLOCKMNT(nmp);
891			/*
892			 * The old sessions cannot be safely free'd
893			 * here, since they may still be used by
894			 * in-progress RPCs.
895			 */
896			tsep = NULL;
897			if (TAILQ_FIRST(&nmp->nm_sess) != NULL)
898				tsep = NFSMNT_MDSSESSION(nmp);
899			TAILQ_INSERT_HEAD(&nmp->nm_sess, dsp,
900			    nfsclds_list);
901			/*
902			 * Wake up RPCs waiting for a slot on the
903			 * old session. These will then fail with
904			 * NFSERR_BADSESSION and be retried with the
905			 * new session by nfsv4_setsequence().
906			 * Also wakeup() processes waiting for the
907			 * new session.
908			 */
909			if (tsep != NULL)
910				wakeup(&tsep->nfsess_slots);
911			wakeup(&nmp->nm_sess);
912			NFSUNLOCKMNT(nmp);
913		} else
914			nfscl_freenfsclds(dsp);
915		NFSCL_DEBUG(1, "aft createsess=%d\n", error);
916		if (error == 0 && reclaim == 0) {
917			error = nfsrpc_reclaimcomplete(nmp, cred, p);
918			NFSCL_DEBUG(1, "aft reclaimcomp=%d\n", error);
919			if (error == NFSERR_COMPLETEALREADY ||
920			    error == NFSERR_NOTSUPP)
921				/* Ignore this error. */
922				error = 0;
923		}
924		return (error);
925	}
926
927	/*
928	 * Allocate a single session structure for NFSv4.0, because some of
929	 * the fields are used by NFSv4.0 although it doesn't do a session.
930	 */
931	dsp = malloc(sizeof(struct nfsclds), M_NFSCLDS, M_WAITOK | M_ZERO);
932	mtx_init(&dsp->nfsclds_mtx, "nfsds", NULL, MTX_DEF);
933	mtx_init(&dsp->nfsclds_sess.nfsess_mtx, "nfssession", NULL, MTX_DEF);
934	NFSLOCKMNT(nmp);
935	TAILQ_INSERT_HEAD(&nmp->nm_sess, dsp, nfsclds_list);
936	tsep = NFSMNT_MDSSESSION(nmp);
937	NFSUNLOCKMNT(nmp);
938
939	nfscl_reqstart(nd, NFSPROC_SETCLIENTID, nmp, NULL, 0, NULL, NULL);
940	NFSM_BUILD(tl, u_int32_t *, 2 * NFSX_UNSIGNED);
941	*tl++ = txdr_unsigned(nfsboottime.tv_sec);
942	*tl = txdr_unsigned(clp->nfsc_rev);
943	(void) nfsm_strtom(nd, clp->nfsc_id, clp->nfsc_idlen);
944
945	/*
946	 * set up the callback address
947	 */
948	NFSM_BUILD(tl, u_int32_t *, NFSX_UNSIGNED);
949	*tl = txdr_unsigned(NFS_CALLBCKPROG);
950	callblen = strlen(nfsv4_callbackaddr);
951	if (callblen == 0)
952		cp = nfscl_getmyip(nmp, &a6, &isinet6);
953	if (nfscl_enablecallb && nfs_numnfscbd > 0 &&
954	    (callblen > 0 || cp != NULL)) {
955		port = htons(nfsv4_cbport);
956		cp2 = (u_int8_t *)&port;
957#ifdef INET6
958		if ((callblen > 0 &&
959		     strchr(nfsv4_callbackaddr, ':')) || isinet6) {
960			char ip6buf[INET6_ADDRSTRLEN], *ip6add;
961
962			(void) nfsm_strtom(nd, "tcp6", 4);
963			if (callblen == 0) {
964				ip6_sprintf(ip6buf, (struct in6_addr *)cp);
965				ip6add = ip6buf;
966			} else {
967				ip6add = nfsv4_callbackaddr;
968			}
969			snprintf(addr, INET6_ADDRSTRLEN + 9, "%s.%d.%d",
970			    ip6add, cp2[0], cp2[1]);
971		} else
972#endif
973		{
974			(void) nfsm_strtom(nd, "tcp", 3);
975			if (callblen == 0)
976				snprintf(addr, INET6_ADDRSTRLEN + 9,
977				    "%d.%d.%d.%d.%d.%d", cp[0], cp[1],
978				    cp[2], cp[3], cp2[0], cp2[1]);
979			else
980				snprintf(addr, INET6_ADDRSTRLEN + 9,
981				    "%s.%d.%d", nfsv4_callbackaddr,
982				    cp2[0], cp2[1]);
983		}
984		(void) nfsm_strtom(nd, addr, strlen(addr));
985	} else {
986		(void) nfsm_strtom(nd, "tcp", 3);
987		(void) nfsm_strtom(nd, "0.0.0.0.0.0", 11);
988	}
989	NFSM_BUILD(tl, u_int32_t *, NFSX_UNSIGNED);
990	*tl = txdr_unsigned(clp->nfsc_cbident);
991	nd->nd_flag |= ND_USEGSSNAME;
992	error = newnfs_request(nd, nmp, NULL, &nmp->nm_sockreq, NULL, p, cred,
993		NFS_PROG, NFS_VER4, NULL, 1, NULL, NULL);
994	if (error)
995		return (error);
996	if (nd->nd_repstat == 0) {
997	    NFSM_DISSECT(tl, u_int32_t *, 4 * NFSX_UNSIGNED);
998	    tsep->nfsess_clientid.lval[0] = *tl++;
999	    tsep->nfsess_clientid.lval[1] = *tl++;
1000	    confirm.lval[0] = *tl++;
1001	    confirm.lval[1] = *tl;
1002	    mbuf_freem(nd->nd_mrep);
1003	    nd->nd_mrep = NULL;
1004
1005	    /*
1006	     * and confirm it.
1007	     */
1008	    nfscl_reqstart(nd, NFSPROC_SETCLIENTIDCFRM, nmp, NULL, 0, NULL,
1009		NULL);
1010	    NFSM_BUILD(tl, u_int32_t *, 4 * NFSX_UNSIGNED);
1011	    *tl++ = tsep->nfsess_clientid.lval[0];
1012	    *tl++ = tsep->nfsess_clientid.lval[1];
1013	    *tl++ = confirm.lval[0];
1014	    *tl = confirm.lval[1];
1015	    nd->nd_flag |= ND_USEGSSNAME;
1016	    error = newnfs_request(nd, nmp, NULL, &nmp->nm_sockreq, NULL, p,
1017		cred, NFS_PROG, NFS_VER4, NULL, 1, NULL, NULL);
1018	    if (error)
1019		return (error);
1020	    mbuf_freem(nd->nd_mrep);
1021	    nd->nd_mrep = NULL;
1022	    if (nd->nd_repstat == 0) {
1023		nfscl_reqstart(nd, NFSPROC_GETATTR, nmp, nmp->nm_fh,
1024		    nmp->nm_fhsize, NULL, NULL);
1025		NFSZERO_ATTRBIT(&attrbits);
1026		NFSSETBIT_ATTRBIT(&attrbits, NFSATTRBIT_LEASETIME);
1027		(void) nfsrv_putattrbit(nd, &attrbits);
1028		nd->nd_flag |= ND_USEGSSNAME;
1029		error = newnfs_request(nd, nmp, NULL, &nmp->nm_sockreq, NULL, p,
1030		    cred, NFS_PROG, NFS_VER4, NULL, 1, NULL, NULL);
1031		if (error)
1032		    return (error);
1033		if (nd->nd_repstat == 0) {
1034		    error = nfsv4_loadattr(nd, NULL, NULL, NULL, NULL, 0, NULL,
1035			NULL, NULL, NULL, NULL, 0, NULL, &lease, NULL, p, cred);
1036		    if (error)
1037			goto nfsmout;
1038		    clp->nfsc_renew = NFSCL_RENEW(lease);
1039		    clp->nfsc_expire = NFSD_MONOSEC + clp->nfsc_renew;
1040		    clp->nfsc_clientidrev++;
1041		    if (clp->nfsc_clientidrev == 0)
1042			clp->nfsc_clientidrev++;
1043		}
1044	    }
1045	}
1046	error = nd->nd_repstat;
1047nfsmout:
1048	mbuf_freem(nd->nd_mrep);
1049	return (error);
1050}
1051
1052/*
1053 * nfs getattr call.
1054 */
1055int
1056nfsrpc_getattr(vnode_t vp, struct ucred *cred, NFSPROC_T *p,
1057    struct nfsvattr *nap, void *stuff)
1058{
1059	struct nfsrv_descript nfsd, *nd = &nfsd;
1060	int error;
1061	nfsattrbit_t attrbits;
1062
1063	NFSCL_REQSTART(nd, NFSPROC_GETATTR, vp);
1064	if (nd->nd_flag & ND_NFSV4) {
1065		NFSGETATTR_ATTRBIT(&attrbits);
1066		(void) nfsrv_putattrbit(nd, &attrbits);
1067	}
1068	error = nfscl_request(nd, vp, p, cred, stuff);
1069	if (error)
1070		return (error);
1071	if (!nd->nd_repstat)
1072		error = nfsm_loadattr(nd, nap);
1073	else
1074		error = nd->nd_repstat;
1075	mbuf_freem(nd->nd_mrep);
1076	return (error);
1077}
1078
1079/*
1080 * nfs getattr call with non-vnode arguemnts.
1081 */
1082int
1083nfsrpc_getattrnovp(struct nfsmount *nmp, u_int8_t *fhp, int fhlen, int syscred,
1084    struct ucred *cred, NFSPROC_T *p, struct nfsvattr *nap, u_int64_t *xidp,
1085    uint32_t *leasep)
1086{
1087	struct nfsrv_descript nfsd, *nd = &nfsd;
1088	int error, vers = NFS_VER2;
1089	nfsattrbit_t attrbits;
1090
1091	nfscl_reqstart(nd, NFSPROC_GETATTR, nmp, fhp, fhlen, NULL, NULL);
1092	if (nd->nd_flag & ND_NFSV4) {
1093		vers = NFS_VER4;
1094		NFSGETATTR_ATTRBIT(&attrbits);
1095		NFSSETBIT_ATTRBIT(&attrbits, NFSATTRBIT_LEASETIME);
1096		(void) nfsrv_putattrbit(nd, &attrbits);
1097	} else if (nd->nd_flag & ND_NFSV3) {
1098		vers = NFS_VER3;
1099	}
1100	if (syscred)
1101		nd->nd_flag |= ND_USEGSSNAME;
1102	error = newnfs_request(nd, nmp, NULL, &nmp->nm_sockreq, NULL, p, cred,
1103	    NFS_PROG, vers, NULL, 1, xidp, NULL);
1104	if (error)
1105		return (error);
1106	if (nd->nd_repstat == 0) {
1107		if ((nd->nd_flag & ND_NFSV4) != 0)
1108			error = nfsv4_loadattr(nd, NULL, nap, NULL, NULL, 0,
1109			    NULL, NULL, NULL, NULL, NULL, 0, NULL, leasep, NULL,
1110			    NULL, NULL);
1111		else
1112			error = nfsm_loadattr(nd, nap);
1113	} else
1114		error = nd->nd_repstat;
1115	mbuf_freem(nd->nd_mrep);
1116	return (error);
1117}
1118
1119/*
1120 * Do an nfs setattr operation.
1121 */
1122int
1123nfsrpc_setattr(vnode_t vp, struct vattr *vap, NFSACL_T *aclp,
1124    struct ucred *cred, NFSPROC_T *p, struct nfsvattr *rnap, int *attrflagp,
1125    void *stuff)
1126{
1127	int error, expireret = 0, openerr, retrycnt;
1128	u_int32_t clidrev = 0, mode;
1129	struct nfsmount *nmp = VFSTONFS(vnode_mount(vp));
1130	struct nfsfh *nfhp;
1131	nfsv4stateid_t stateid;
1132	void *lckp;
1133
1134	if (nmp->nm_clp != NULL)
1135		clidrev = nmp->nm_clp->nfsc_clientidrev;
1136	if (vap != NULL && NFSATTRISSET(u_quad_t, vap, va_size))
1137		mode = NFSV4OPEN_ACCESSWRITE;
1138	else
1139		mode = NFSV4OPEN_ACCESSREAD;
1140	retrycnt = 0;
1141	do {
1142		lckp = NULL;
1143		openerr = 1;
1144		if (NFSHASNFSV4(nmp)) {
1145			nfhp = VTONFS(vp)->n_fhp;
1146			error = nfscl_getstateid(vp, nfhp->nfh_fh,
1147			    nfhp->nfh_len, mode, 0, cred, p, &stateid, &lckp);
1148			if (error && vnode_vtype(vp) == VREG &&
1149			    (mode == NFSV4OPEN_ACCESSWRITE ||
1150			     nfstest_openallsetattr)) {
1151				/*
1152				 * No Open stateid, so try and open the file
1153				 * now.
1154				 */
1155				if (mode == NFSV4OPEN_ACCESSWRITE)
1156					openerr = nfsrpc_open(vp, FWRITE, cred,
1157					    p);
1158				else
1159					openerr = nfsrpc_open(vp, FREAD, cred,
1160					    p);
1161				if (!openerr)
1162					(void) nfscl_getstateid(vp,
1163					    nfhp->nfh_fh, nfhp->nfh_len,
1164					    mode, 0, cred, p, &stateid, &lckp);
1165			}
1166		}
1167		if (vap != NULL)
1168			error = nfsrpc_setattrrpc(vp, vap, &stateid, cred, p,
1169			    rnap, attrflagp, stuff);
1170		else
1171			error = nfsrpc_setaclrpc(vp, cred, p, aclp, &stateid,
1172			    stuff);
1173		if (error == NFSERR_OPENMODE && mode == NFSV4OPEN_ACCESSREAD) {
1174			NFSLOCKMNT(nmp);
1175			nmp->nm_state |= NFSSTA_OPENMODE;
1176			NFSUNLOCKMNT(nmp);
1177		}
1178		if (error == NFSERR_STALESTATEID)
1179			nfscl_initiate_recovery(nmp->nm_clp);
1180		if (lckp != NULL)
1181			nfscl_lockderef(lckp);
1182		if (!openerr)
1183			(void) nfsrpc_close(vp, 0, p);
1184		if (error == NFSERR_GRACE || error == NFSERR_STALESTATEID ||
1185		    error == NFSERR_STALEDONTRECOVER || error == NFSERR_DELAY ||
1186		    error == NFSERR_OLDSTATEID || error == NFSERR_BADSESSION) {
1187			(void) nfs_catnap(PZERO, error, "nfs_setattr");
1188		} else if ((error == NFSERR_EXPIRED ||
1189		    error == NFSERR_BADSTATEID) && clidrev != 0) {
1190			expireret = nfscl_hasexpired(nmp->nm_clp, clidrev, p);
1191		}
1192		retrycnt++;
1193	} while (error == NFSERR_GRACE || error == NFSERR_STALESTATEID ||
1194	    error == NFSERR_STALEDONTRECOVER || error == NFSERR_DELAY ||
1195	    error == NFSERR_BADSESSION ||
1196	    (error == NFSERR_OLDSTATEID && retrycnt < 20) ||
1197	    ((error == NFSERR_EXPIRED || error == NFSERR_BADSTATEID) &&
1198	     expireret == 0 && clidrev != 0 && retrycnt < 4) ||
1199	    (error == NFSERR_OPENMODE && mode == NFSV4OPEN_ACCESSREAD &&
1200	     retrycnt < 4));
1201	if (error && retrycnt >= 4)
1202		error = EIO;
1203	return (error);
1204}
1205
1206static int
1207nfsrpc_setattrrpc(vnode_t vp, struct vattr *vap,
1208    nfsv4stateid_t *stateidp, struct ucred *cred, NFSPROC_T *p,
1209    struct nfsvattr *rnap, int *attrflagp, void *stuff)
1210{
1211	u_int32_t *tl;
1212	struct nfsrv_descript nfsd, *nd = &nfsd;
1213	int error;
1214	nfsattrbit_t attrbits;
1215
1216	*attrflagp = 0;
1217	NFSCL_REQSTART(nd, NFSPROC_SETATTR, vp);
1218	if (nd->nd_flag & ND_NFSV4)
1219		nfsm_stateidtom(nd, stateidp, NFSSTATEID_PUTSTATEID);
1220	vap->va_type = vnode_vtype(vp);
1221	nfscl_fillsattr(nd, vap, vp, NFSSATTR_FULL, 0);
1222	if (nd->nd_flag & ND_NFSV3) {
1223		NFSM_BUILD(tl, u_int32_t *, NFSX_UNSIGNED);
1224		*tl = newnfs_false;
1225	} else if (nd->nd_flag & ND_NFSV4) {
1226		NFSM_BUILD(tl, u_int32_t *, NFSX_UNSIGNED);
1227		*tl = txdr_unsigned(NFSV4OP_GETATTR);
1228		NFSGETATTR_ATTRBIT(&attrbits);
1229		(void) nfsrv_putattrbit(nd, &attrbits);
1230	}
1231	error = nfscl_request(nd, vp, p, cred, stuff);
1232	if (error)
1233		return (error);
1234	if (nd->nd_flag & (ND_NFSV3 | ND_NFSV4))
1235		error = nfscl_wcc_data(nd, vp, rnap, attrflagp, NULL, stuff);
1236	if ((nd->nd_flag & (ND_NFSV4 | ND_NOMOREDATA)) == ND_NFSV4 && !error)
1237		error = nfsrv_getattrbits(nd, &attrbits, NULL, NULL);
1238	if (!(nd->nd_flag & ND_NFSV3) && !nd->nd_repstat && !error)
1239		error = nfscl_postop_attr(nd, rnap, attrflagp, stuff);
1240	mbuf_freem(nd->nd_mrep);
1241	if (nd->nd_repstat && !error)
1242		error = nd->nd_repstat;
1243	return (error);
1244}
1245
1246/*
1247 * nfs lookup rpc
1248 */
1249int
1250nfsrpc_lookup(vnode_t dvp, char *name, int len, struct ucred *cred,
1251    NFSPROC_T *p, struct nfsvattr *dnap, struct nfsvattr *nap,
1252    struct nfsfh **nfhpp, int *attrflagp, int *dattrflagp, void *stuff)
1253{
1254	u_int32_t *tl;
1255	struct nfsrv_descript nfsd, *nd = &nfsd;
1256	struct nfsmount *nmp;
1257	struct nfsnode *np;
1258	struct nfsfh *nfhp;
1259	nfsattrbit_t attrbits;
1260	int error = 0, lookupp = 0;
1261
1262	*attrflagp = 0;
1263	*dattrflagp = 0;
1264	if (vnode_vtype(dvp) != VDIR)
1265		return (ENOTDIR);
1266	nmp = VFSTONFS(vnode_mount(dvp));
1267	if (len > NFS_MAXNAMLEN)
1268		return (ENAMETOOLONG);
1269	if (NFSHASNFSV4(nmp) && len == 1 &&
1270		name[0] == '.') {
1271		/*
1272		 * Just return the current dir's fh.
1273		 */
1274		np = VTONFS(dvp);
1275		MALLOC(nfhp, struct nfsfh *, sizeof (struct nfsfh) +
1276			np->n_fhp->nfh_len, M_NFSFH, M_WAITOK);
1277		nfhp->nfh_len = np->n_fhp->nfh_len;
1278		NFSBCOPY(np->n_fhp->nfh_fh, nfhp->nfh_fh, nfhp->nfh_len);
1279		*nfhpp = nfhp;
1280		return (0);
1281	}
1282	if (NFSHASNFSV4(nmp) && len == 2 &&
1283		name[0] == '.' && name[1] == '.') {
1284		lookupp = 1;
1285		NFSCL_REQSTART(nd, NFSPROC_LOOKUPP, dvp);
1286	} else {
1287		NFSCL_REQSTART(nd, NFSPROC_LOOKUP, dvp);
1288		(void) nfsm_strtom(nd, name, len);
1289	}
1290	if (nd->nd_flag & ND_NFSV4) {
1291		NFSGETATTR_ATTRBIT(&attrbits);
1292		NFSM_BUILD(tl, u_int32_t *, 2 * NFSX_UNSIGNED);
1293		*tl++ = txdr_unsigned(NFSV4OP_GETFH);
1294		*tl = txdr_unsigned(NFSV4OP_GETATTR);
1295		(void) nfsrv_putattrbit(nd, &attrbits);
1296	}
1297	error = nfscl_request(nd, dvp, p, cred, stuff);
1298	if (error)
1299		return (error);
1300	if (nd->nd_repstat) {
1301		/*
1302		 * When an NFSv4 Lookupp returns ENOENT, it means that
1303		 * the lookup is at the root of an fs, so return this dir.
1304		 */
1305		if (nd->nd_repstat == NFSERR_NOENT && lookupp) {
1306		    np = VTONFS(dvp);
1307		    MALLOC(nfhp, struct nfsfh *, sizeof (struct nfsfh) +
1308			np->n_fhp->nfh_len, M_NFSFH, M_WAITOK);
1309		    nfhp->nfh_len = np->n_fhp->nfh_len;
1310		    NFSBCOPY(np->n_fhp->nfh_fh, nfhp->nfh_fh, nfhp->nfh_len);
1311		    *nfhpp = nfhp;
1312		    mbuf_freem(nd->nd_mrep);
1313		    return (0);
1314		}
1315		if (nd->nd_flag & ND_NFSV3)
1316		    error = nfscl_postop_attr(nd, dnap, dattrflagp, stuff);
1317		else if ((nd->nd_flag & (ND_NFSV4 | ND_NOMOREDATA)) ==
1318		    ND_NFSV4) {
1319			/* Load the directory attributes. */
1320			error = nfsm_loadattr(nd, dnap);
1321			if (error == 0)
1322				*dattrflagp = 1;
1323		}
1324		goto nfsmout;
1325	}
1326	if ((nd->nd_flag & (ND_NFSV4 | ND_NOMOREDATA)) == ND_NFSV4) {
1327		/* Load the directory attributes. */
1328		error = nfsm_loadattr(nd, dnap);
1329		if (error != 0)
1330			goto nfsmout;
1331		*dattrflagp = 1;
1332		/* Skip over the Lookup and GetFH operation status values. */
1333		NFSM_DISSECT(tl, u_int32_t *, 4 * NFSX_UNSIGNED);
1334	}
1335	error = nfsm_getfh(nd, nfhpp);
1336	if (error)
1337		goto nfsmout;
1338
1339	error = nfscl_postop_attr(nd, nap, attrflagp, stuff);
1340	if ((nd->nd_flag & ND_NFSV3) && !error)
1341		error = nfscl_postop_attr(nd, dnap, dattrflagp, stuff);
1342nfsmout:
1343	mbuf_freem(nd->nd_mrep);
1344	if (!error && nd->nd_repstat)
1345		error = nd->nd_repstat;
1346	return (error);
1347}
1348
1349/*
1350 * Do a readlink rpc.
1351 */
1352int
1353nfsrpc_readlink(vnode_t vp, struct uio *uiop, struct ucred *cred,
1354    NFSPROC_T *p, struct nfsvattr *nap, int *attrflagp, void *stuff)
1355{
1356	u_int32_t *tl;
1357	struct nfsrv_descript nfsd, *nd = &nfsd;
1358	struct nfsnode *np = VTONFS(vp);
1359	nfsattrbit_t attrbits;
1360	int error, len, cangetattr = 1;
1361
1362	*attrflagp = 0;
1363	NFSCL_REQSTART(nd, NFSPROC_READLINK, vp);
1364	if (nd->nd_flag & ND_NFSV4) {
1365		/*
1366		 * And do a Getattr op.
1367		 */
1368		NFSM_BUILD(tl, u_int32_t *, NFSX_UNSIGNED);
1369		*tl = txdr_unsigned(NFSV4OP_GETATTR);
1370		NFSGETATTR_ATTRBIT(&attrbits);
1371		(void) nfsrv_putattrbit(nd, &attrbits);
1372	}
1373	error = nfscl_request(nd, vp, p, cred, stuff);
1374	if (error)
1375		return (error);
1376	if (nd->nd_flag & ND_NFSV3)
1377		error = nfscl_postop_attr(nd, nap, attrflagp, stuff);
1378	if (!nd->nd_repstat && !error) {
1379		NFSM_STRSIZ(len, NFS_MAXPATHLEN);
1380		/*
1381		 * This seems weird to me, but must have been added to
1382		 * FreeBSD for some reason. The only thing I can think of
1383		 * is that there was/is some server that replies with
1384		 * more link data than it should?
1385		 */
1386		if (len == NFS_MAXPATHLEN) {
1387			NFSLOCKNODE(np);
1388			if (np->n_size > 0 && np->n_size < NFS_MAXPATHLEN) {
1389				len = np->n_size;
1390				cangetattr = 0;
1391			}
1392			NFSUNLOCKNODE(np);
1393		}
1394		error = nfsm_mbufuio(nd, uiop, len);
1395		if ((nd->nd_flag & ND_NFSV4) && !error && cangetattr)
1396			error = nfscl_postop_attr(nd, nap, attrflagp, stuff);
1397	}
1398	if (nd->nd_repstat && !error)
1399		error = nd->nd_repstat;
1400nfsmout:
1401	mbuf_freem(nd->nd_mrep);
1402	return (error);
1403}
1404
1405/*
1406 * Read operation.
1407 */
1408int
1409nfsrpc_read(vnode_t vp, struct uio *uiop, struct ucred *cred,
1410    NFSPROC_T *p, struct nfsvattr *nap, int *attrflagp, void *stuff)
1411{
1412	int error, expireret = 0, retrycnt;
1413	u_int32_t clidrev = 0;
1414	struct nfsmount *nmp = VFSTONFS(vnode_mount(vp));
1415	struct nfsnode *np = VTONFS(vp);
1416	struct ucred *newcred;
1417	struct nfsfh *nfhp = NULL;
1418	nfsv4stateid_t stateid;
1419	void *lckp;
1420
1421	if (nmp->nm_clp != NULL)
1422		clidrev = nmp->nm_clp->nfsc_clientidrev;
1423	newcred = cred;
1424	if (NFSHASNFSV4(nmp)) {
1425		nfhp = np->n_fhp;
1426		newcred = NFSNEWCRED(cred);
1427	}
1428	retrycnt = 0;
1429	do {
1430		lckp = NULL;
1431		if (NFSHASNFSV4(nmp))
1432			(void)nfscl_getstateid(vp, nfhp->nfh_fh, nfhp->nfh_len,
1433			    NFSV4OPEN_ACCESSREAD, 0, newcred, p, &stateid,
1434			    &lckp);
1435		error = nfsrpc_readrpc(vp, uiop, newcred, &stateid, p, nap,
1436		    attrflagp, stuff);
1437		if (error == NFSERR_OPENMODE) {
1438			NFSLOCKMNT(nmp);
1439			nmp->nm_state |= NFSSTA_OPENMODE;
1440			NFSUNLOCKMNT(nmp);
1441		}
1442		if (error == NFSERR_STALESTATEID)
1443			nfscl_initiate_recovery(nmp->nm_clp);
1444		if (lckp != NULL)
1445			nfscl_lockderef(lckp);
1446		if (error == NFSERR_GRACE || error == NFSERR_STALESTATEID ||
1447		    error == NFSERR_STALEDONTRECOVER || error == NFSERR_DELAY ||
1448		    error == NFSERR_OLDSTATEID || error == NFSERR_BADSESSION) {
1449			(void) nfs_catnap(PZERO, error, "nfs_read");
1450		} else if ((error == NFSERR_EXPIRED ||
1451		    error == NFSERR_BADSTATEID) && clidrev != 0) {
1452			expireret = nfscl_hasexpired(nmp->nm_clp, clidrev, p);
1453		}
1454		retrycnt++;
1455	} while (error == NFSERR_GRACE || error == NFSERR_STALESTATEID ||
1456	    error == NFSERR_STALEDONTRECOVER || error == NFSERR_DELAY ||
1457	    error == NFSERR_BADSESSION ||
1458	    (error == NFSERR_OLDSTATEID && retrycnt < 20) ||
1459	    ((error == NFSERR_EXPIRED || error == NFSERR_BADSTATEID) &&
1460	     expireret == 0 && clidrev != 0 && retrycnt < 4) ||
1461	    (error == NFSERR_OPENMODE && retrycnt < 4));
1462	if (error && retrycnt >= 4)
1463		error = EIO;
1464	if (NFSHASNFSV4(nmp))
1465		NFSFREECRED(newcred);
1466	return (error);
1467}
1468
1469/*
1470 * The actual read RPC.
1471 */
1472static int
1473nfsrpc_readrpc(vnode_t vp, struct uio *uiop, struct ucred *cred,
1474    nfsv4stateid_t *stateidp, NFSPROC_T *p, struct nfsvattr *nap,
1475    int *attrflagp, void *stuff)
1476{
1477	u_int32_t *tl;
1478	int error = 0, len, retlen, tsiz, eof = 0;
1479	struct nfsrv_descript nfsd;
1480	struct nfsmount *nmp = VFSTONFS(vnode_mount(vp));
1481	struct nfsrv_descript *nd = &nfsd;
1482	int rsize;
1483	off_t tmp_off;
1484
1485	*attrflagp = 0;
1486	tsiz = uio_uio_resid(uiop);
1487	tmp_off = uiop->uio_offset + tsiz;
1488	NFSLOCKMNT(nmp);
1489	if (tmp_off > nmp->nm_maxfilesize || tmp_off < uiop->uio_offset) {
1490		NFSUNLOCKMNT(nmp);
1491		return (EFBIG);
1492	}
1493	rsize = nmp->nm_rsize;
1494	NFSUNLOCKMNT(nmp);
1495	nd->nd_mrep = NULL;
1496	while (tsiz > 0) {
1497		*attrflagp = 0;
1498		len = (tsiz > rsize) ? rsize : tsiz;
1499		NFSCL_REQSTART(nd, NFSPROC_READ, vp);
1500		if (nd->nd_flag & ND_NFSV4)
1501			nfsm_stateidtom(nd, stateidp, NFSSTATEID_PUTSTATEID);
1502		NFSM_BUILD(tl, u_int32_t *, NFSX_UNSIGNED * 3);
1503		if (nd->nd_flag & ND_NFSV2) {
1504			*tl++ = txdr_unsigned(uiop->uio_offset);
1505			*tl++ = txdr_unsigned(len);
1506			*tl = 0;
1507		} else {
1508			txdr_hyper(uiop->uio_offset, tl);
1509			*(tl + 2) = txdr_unsigned(len);
1510		}
1511		/*
1512		 * Since I can't do a Getattr for NFSv4 for Write, there
1513		 * doesn't seem any point in doing one here, either.
1514		 * (See the comment in nfsrpc_writerpc() for more info.)
1515		 */
1516		error = nfscl_request(nd, vp, p, cred, stuff);
1517		if (error)
1518			return (error);
1519		if (nd->nd_flag & ND_NFSV3) {
1520			error = nfscl_postop_attr(nd, nap, attrflagp, stuff);
1521		} else if (!nd->nd_repstat && (nd->nd_flag & ND_NFSV2)) {
1522			error = nfsm_loadattr(nd, nap);
1523			if (!error)
1524				*attrflagp = 1;
1525		}
1526		if (nd->nd_repstat || error) {
1527			if (!error)
1528				error = nd->nd_repstat;
1529			goto nfsmout;
1530		}
1531		if (nd->nd_flag & ND_NFSV3) {
1532			NFSM_DISSECT(tl, u_int32_t *, 2 * NFSX_UNSIGNED);
1533			eof = fxdr_unsigned(int, *(tl + 1));
1534		} else if (nd->nd_flag & ND_NFSV4) {
1535			NFSM_DISSECT(tl, u_int32_t *, NFSX_UNSIGNED);
1536			eof = fxdr_unsigned(int, *tl);
1537		}
1538		NFSM_STRSIZ(retlen, len);
1539		error = nfsm_mbufuio(nd, uiop, retlen);
1540		if (error)
1541			goto nfsmout;
1542		mbuf_freem(nd->nd_mrep);
1543		nd->nd_mrep = NULL;
1544		tsiz -= retlen;
1545		if (!(nd->nd_flag & ND_NFSV2)) {
1546			if (eof || retlen == 0)
1547				tsiz = 0;
1548		} else if (retlen < len)
1549			tsiz = 0;
1550	}
1551	return (0);
1552nfsmout:
1553	if (nd->nd_mrep != NULL)
1554		mbuf_freem(nd->nd_mrep);
1555	return (error);
1556}
1557
1558/*
1559 * nfs write operation
1560 * When called_from_strategy != 0, it should return EIO for an error that
1561 * indicates recovery is in progress, so that the buffer will be left
1562 * dirty and be written back to the server later. If it loops around,
1563 * the recovery thread could get stuck waiting for the buffer and recovery
1564 * will then deadlock.
1565 */
1566int
1567nfsrpc_write(vnode_t vp, struct uio *uiop, int *iomode, int *must_commit,
1568    struct ucred *cred, NFSPROC_T *p, struct nfsvattr *nap, int *attrflagp,
1569    void *stuff, int called_from_strategy)
1570{
1571	int error, expireret = 0, retrycnt, nostateid;
1572	u_int32_t clidrev = 0;
1573	struct nfsmount *nmp = VFSTONFS(vnode_mount(vp));
1574	struct nfsnode *np = VTONFS(vp);
1575	struct ucred *newcred;
1576	struct nfsfh *nfhp = NULL;
1577	nfsv4stateid_t stateid;
1578	void *lckp;
1579
1580	*must_commit = 0;
1581	if (nmp->nm_clp != NULL)
1582		clidrev = nmp->nm_clp->nfsc_clientidrev;
1583	newcred = cred;
1584	if (NFSHASNFSV4(nmp)) {
1585		newcred = NFSNEWCRED(cred);
1586		nfhp = np->n_fhp;
1587	}
1588	retrycnt = 0;
1589	do {
1590		lckp = NULL;
1591		nostateid = 0;
1592		if (NFSHASNFSV4(nmp)) {
1593			(void)nfscl_getstateid(vp, nfhp->nfh_fh, nfhp->nfh_len,
1594			    NFSV4OPEN_ACCESSWRITE, 0, newcred, p, &stateid,
1595			    &lckp);
1596			if (stateid.other[0] == 0 && stateid.other[1] == 0 &&
1597			    stateid.other[2] == 0) {
1598				nostateid = 1;
1599				NFSCL_DEBUG(1, "stateid0 in write\n");
1600			}
1601		}
1602
1603		/*
1604		 * If there is no stateid for NFSv4, it means this is an
1605		 * extraneous write after close. Basically a poorly
1606		 * implemented buffer cache. Just don't do the write.
1607		 */
1608		if (nostateid)
1609			error = 0;
1610		else
1611			error = nfsrpc_writerpc(vp, uiop, iomode, must_commit,
1612			    newcred, &stateid, p, nap, attrflagp, stuff);
1613		if (error == NFSERR_STALESTATEID)
1614			nfscl_initiate_recovery(nmp->nm_clp);
1615		if (lckp != NULL)
1616			nfscl_lockderef(lckp);
1617		if (error == NFSERR_GRACE || error == NFSERR_STALESTATEID ||
1618		    error == NFSERR_STALEDONTRECOVER || error == NFSERR_DELAY ||
1619		    error == NFSERR_OLDSTATEID || error == NFSERR_BADSESSION) {
1620			(void) nfs_catnap(PZERO, error, "nfs_write");
1621		} else if ((error == NFSERR_EXPIRED ||
1622		    error == NFSERR_BADSTATEID) && clidrev != 0) {
1623			expireret = nfscl_hasexpired(nmp->nm_clp, clidrev, p);
1624		}
1625		retrycnt++;
1626	} while (error == NFSERR_GRACE || error == NFSERR_DELAY ||
1627	    ((error == NFSERR_STALESTATEID || error == NFSERR_BADSESSION ||
1628	      error == NFSERR_STALEDONTRECOVER) && called_from_strategy == 0) ||
1629	    (error == NFSERR_OLDSTATEID && retrycnt < 20) ||
1630	    ((error == NFSERR_EXPIRED || error == NFSERR_BADSTATEID) &&
1631	     expireret == 0 && clidrev != 0 && retrycnt < 4));
1632	if (error != 0 && (retrycnt >= 4 ||
1633	    ((error == NFSERR_STALESTATEID || error == NFSERR_BADSESSION ||
1634	      error == NFSERR_STALEDONTRECOVER) && called_from_strategy != 0)))
1635		error = EIO;
1636	if (NFSHASNFSV4(nmp))
1637		NFSFREECRED(newcred);
1638	return (error);
1639}
1640
1641/*
1642 * The actual write RPC.
1643 */
1644static int
1645nfsrpc_writerpc(vnode_t vp, struct uio *uiop, int *iomode,
1646    int *must_commit, struct ucred *cred, nfsv4stateid_t *stateidp,
1647    NFSPROC_T *p, struct nfsvattr *nap, int *attrflagp, void *stuff)
1648{
1649	u_int32_t *tl;
1650	struct nfsmount *nmp = VFSTONFS(vnode_mount(vp));
1651	struct nfsnode *np = VTONFS(vp);
1652	int error = 0, len, tsiz, rlen, commit, committed = NFSWRITE_FILESYNC;
1653	int wccflag = 0, wsize;
1654	int32_t backup;
1655	struct nfsrv_descript nfsd;
1656	struct nfsrv_descript *nd = &nfsd;
1657	nfsattrbit_t attrbits;
1658	off_t tmp_off;
1659
1660	KASSERT(uiop->uio_iovcnt == 1, ("nfs: writerpc iovcnt > 1"));
1661	*attrflagp = 0;
1662	tsiz = uio_uio_resid(uiop);
1663	tmp_off = uiop->uio_offset + tsiz;
1664	NFSLOCKMNT(nmp);
1665	if (tmp_off > nmp->nm_maxfilesize || tmp_off < uiop->uio_offset) {
1666		NFSUNLOCKMNT(nmp);
1667		return (EFBIG);
1668	}
1669	wsize = nmp->nm_wsize;
1670	NFSUNLOCKMNT(nmp);
1671	nd->nd_mrep = NULL;	/* NFSv2 sometimes does a write with */
1672	nd->nd_repstat = 0;	/* uio_resid == 0, so the while is not done */
1673	while (tsiz > 0) {
1674		*attrflagp = 0;
1675		len = (tsiz > wsize) ? wsize : tsiz;
1676		NFSCL_REQSTART(nd, NFSPROC_WRITE, vp);
1677		if (nd->nd_flag & ND_NFSV4) {
1678			nfsm_stateidtom(nd, stateidp, NFSSTATEID_PUTSTATEID);
1679			NFSM_BUILD(tl, u_int32_t *, NFSX_HYPER+2*NFSX_UNSIGNED);
1680			txdr_hyper(uiop->uio_offset, tl);
1681			tl += 2;
1682			*tl++ = txdr_unsigned(*iomode);
1683			*tl = txdr_unsigned(len);
1684		} else if (nd->nd_flag & ND_NFSV3) {
1685			NFSM_BUILD(tl, u_int32_t *, NFSX_HYPER+3*NFSX_UNSIGNED);
1686			txdr_hyper(uiop->uio_offset, tl);
1687			tl += 2;
1688			*tl++ = txdr_unsigned(len);
1689			*tl++ = txdr_unsigned(*iomode);
1690			*tl = txdr_unsigned(len);
1691		} else {
1692			u_int32_t x;
1693
1694			NFSM_BUILD(tl, u_int32_t *, 4 * NFSX_UNSIGNED);
1695			/*
1696			 * Not sure why someone changed this, since the
1697			 * RFC clearly states that "beginoffset" and
1698			 * "totalcount" are ignored, but it wouldn't
1699			 * surprise me if there's a busted server out there.
1700			 */
1701			/* Set both "begin" and "current" to non-garbage. */
1702			x = txdr_unsigned((u_int32_t)uiop->uio_offset);
1703			*tl++ = x;      /* "begin offset" */
1704			*tl++ = x;      /* "current offset" */
1705			x = txdr_unsigned(len);
1706			*tl++ = x;      /* total to this offset */
1707			*tl = x;        /* size of this write */
1708
1709		}
1710		nfsm_uiombuf(nd, uiop, len);
1711		/*
1712		 * Although it is tempting to do a normal Getattr Op in the
1713		 * NFSv4 compound, the result can be a nearly hung client
1714		 * system if the Getattr asks for Owner and/or OwnerGroup.
1715		 * It occurs when the client can't map either the Owner or
1716		 * Owner_group name in the Getattr reply to a uid/gid. When
1717		 * there is a cache miss, the kernel does an upcall to the
1718		 * nfsuserd. Then, it can try and read the local /etc/passwd
1719		 * or /etc/group file. It can then block in getnewbuf(),
1720		 * waiting for dirty writes to be pushed to the NFS server.
1721		 * The only reason this doesn't result in a complete
1722		 * deadlock, is that the upcall times out and allows
1723		 * the write to complete. However, progress is so slow
1724		 * that it might just as well be deadlocked.
1725		 * As such, we get the rest of the attributes, but not
1726		 * Owner or Owner_group.
1727		 * nb: nfscl_loadattrcache() needs to be told that these
1728		 *     partial attributes from a write rpc are being
1729		 *     passed in, via a argument flag.
1730		 */
1731		if (nd->nd_flag & ND_NFSV4) {
1732			NFSWRITEGETATTR_ATTRBIT(&attrbits);
1733			NFSM_BUILD(tl, u_int32_t *, NFSX_UNSIGNED);
1734			*tl = txdr_unsigned(NFSV4OP_GETATTR);
1735			(void) nfsrv_putattrbit(nd, &attrbits);
1736		}
1737		error = nfscl_request(nd, vp, p, cred, stuff);
1738		if (error)
1739			return (error);
1740		if (nd->nd_repstat) {
1741			/*
1742			 * In case the rpc gets retried, roll
1743			 * the uio fileds changed by nfsm_uiombuf()
1744			 * back.
1745			 */
1746			uiop->uio_offset -= len;
1747			uio_uio_resid_add(uiop, len);
1748			uio_iov_base_add(uiop, -len);
1749			uio_iov_len_add(uiop, len);
1750		}
1751		if (nd->nd_flag & (ND_NFSV3 | ND_NFSV4)) {
1752			error = nfscl_wcc_data(nd, vp, nap, attrflagp,
1753			    &wccflag, stuff);
1754			if (error)
1755				goto nfsmout;
1756		}
1757		if (!nd->nd_repstat) {
1758			if (nd->nd_flag & (ND_NFSV3 | ND_NFSV4)) {
1759				NFSM_DISSECT(tl, u_int32_t *, 2 * NFSX_UNSIGNED
1760					+ NFSX_VERF);
1761				rlen = fxdr_unsigned(int, *tl++);
1762				if (rlen == 0) {
1763					error = NFSERR_IO;
1764					goto nfsmout;
1765				} else if (rlen < len) {
1766					backup = len - rlen;
1767					uio_iov_base_add(uiop, -(backup));
1768					uio_iov_len_add(uiop, backup);
1769					uiop->uio_offset -= backup;
1770					uio_uio_resid_add(uiop, backup);
1771					len = rlen;
1772				}
1773				commit = fxdr_unsigned(int, *tl++);
1774
1775				/*
1776				 * Return the lowest commitment level
1777				 * obtained by any of the RPCs.
1778				 */
1779				if (committed == NFSWRITE_FILESYNC)
1780					committed = commit;
1781				else if (committed == NFSWRITE_DATASYNC &&
1782					commit == NFSWRITE_UNSTABLE)
1783					committed = commit;
1784				NFSLOCKMNT(nmp);
1785				if (!NFSHASWRITEVERF(nmp)) {
1786					NFSBCOPY((caddr_t)tl,
1787					    (caddr_t)&nmp->nm_verf[0],
1788					    NFSX_VERF);
1789					NFSSETWRITEVERF(nmp);
1790	    			} else if (NFSBCMP(tl, nmp->nm_verf,
1791				    NFSX_VERF)) {
1792					*must_commit = 1;
1793					NFSBCOPY(tl, nmp->nm_verf, NFSX_VERF);
1794				}
1795				NFSUNLOCKMNT(nmp);
1796			}
1797			if (nd->nd_flag & ND_NFSV4)
1798				NFSM_DISSECT(tl, u_int32_t *, 2 * NFSX_UNSIGNED);
1799			if (nd->nd_flag & (ND_NFSV2 | ND_NFSV4)) {
1800				error = nfsm_loadattr(nd, nap);
1801				if (!error)
1802					*attrflagp = NFS_LATTR_NOSHRINK;
1803			}
1804		} else {
1805			error = nd->nd_repstat;
1806		}
1807		if (error)
1808			goto nfsmout;
1809		NFSWRITERPC_SETTIME(wccflag, np, nap, (nd->nd_flag & ND_NFSV4));
1810		mbuf_freem(nd->nd_mrep);
1811		nd->nd_mrep = NULL;
1812		tsiz -= len;
1813	}
1814nfsmout:
1815	if (nd->nd_mrep != NULL)
1816		mbuf_freem(nd->nd_mrep);
1817	*iomode = committed;
1818	if (nd->nd_repstat && !error)
1819		error = nd->nd_repstat;
1820	return (error);
1821}
1822
1823/*
1824 * nfs mknod rpc
1825 * For NFS v2 this is a kludge. Use a create rpc but with the IFMT bits of the
1826 * mode set to specify the file type and the size field for rdev.
1827 */
1828int
1829nfsrpc_mknod(vnode_t dvp, char *name, int namelen, struct vattr *vap,
1830    u_int32_t rdev, enum vtype vtyp, struct ucred *cred, NFSPROC_T *p,
1831    struct nfsvattr *dnap, struct nfsvattr *nnap, struct nfsfh **nfhpp,
1832    int *attrflagp, int *dattrflagp, void *dstuff)
1833{
1834	u_int32_t *tl;
1835	int error = 0;
1836	struct nfsrv_descript nfsd, *nd = &nfsd;
1837	nfsattrbit_t attrbits;
1838
1839	*nfhpp = NULL;
1840	*attrflagp = 0;
1841	*dattrflagp = 0;
1842	if (namelen > NFS_MAXNAMLEN)
1843		return (ENAMETOOLONG);
1844	NFSCL_REQSTART(nd, NFSPROC_MKNOD, dvp);
1845	if (nd->nd_flag & ND_NFSV4) {
1846		if (vtyp == VBLK || vtyp == VCHR) {
1847			NFSM_BUILD(tl, u_int32_t *, 3 * NFSX_UNSIGNED);
1848			*tl++ = vtonfsv34_type(vtyp);
1849			*tl++ = txdr_unsigned(NFSMAJOR(rdev));
1850			*tl = txdr_unsigned(NFSMINOR(rdev));
1851		} else {
1852			NFSM_BUILD(tl, u_int32_t *, NFSX_UNSIGNED);
1853			*tl = vtonfsv34_type(vtyp);
1854		}
1855	}
1856	(void) nfsm_strtom(nd, name, namelen);
1857	if (nd->nd_flag & ND_NFSV3) {
1858		NFSM_BUILD(tl, u_int32_t *, NFSX_UNSIGNED);
1859		*tl = vtonfsv34_type(vtyp);
1860	}
1861	if (nd->nd_flag & (ND_NFSV3 | ND_NFSV4))
1862		nfscl_fillsattr(nd, vap, dvp, 0, 0);
1863	if ((nd->nd_flag & ND_NFSV3) &&
1864	    (vtyp == VCHR || vtyp == VBLK)) {
1865		NFSM_BUILD(tl, u_int32_t *, 2 * NFSX_UNSIGNED);
1866		*tl++ = txdr_unsigned(NFSMAJOR(rdev));
1867		*tl = txdr_unsigned(NFSMINOR(rdev));
1868	}
1869	if (nd->nd_flag & ND_NFSV4) {
1870		NFSGETATTR_ATTRBIT(&attrbits);
1871		NFSM_BUILD(tl, u_int32_t *, 2 * NFSX_UNSIGNED);
1872		*tl++ = txdr_unsigned(NFSV4OP_GETFH);
1873		*tl = txdr_unsigned(NFSV4OP_GETATTR);
1874		(void) nfsrv_putattrbit(nd, &attrbits);
1875	}
1876	if (nd->nd_flag & ND_NFSV2)
1877		nfscl_fillsattr(nd, vap, dvp, NFSSATTR_SIZERDEV, rdev);
1878	error = nfscl_request(nd, dvp, p, cred, dstuff);
1879	if (error)
1880		return (error);
1881	if (nd->nd_flag & ND_NFSV4)
1882		error = nfscl_wcc_data(nd, dvp, dnap, dattrflagp, NULL, dstuff);
1883	if (!nd->nd_repstat) {
1884		if (nd->nd_flag & ND_NFSV4) {
1885			NFSM_DISSECT(tl, u_int32_t *, 5 * NFSX_UNSIGNED);
1886			error = nfsrv_getattrbits(nd, &attrbits, NULL, NULL);
1887			if (error)
1888				goto nfsmout;
1889		}
1890		error = nfscl_mtofh(nd, nfhpp, nnap, attrflagp);
1891		if (error)
1892			goto nfsmout;
1893	}
1894	if (nd->nd_flag & ND_NFSV3)
1895		error = nfscl_wcc_data(nd, dvp, dnap, dattrflagp, NULL, dstuff);
1896	if (!error && nd->nd_repstat)
1897		error = nd->nd_repstat;
1898nfsmout:
1899	mbuf_freem(nd->nd_mrep);
1900	return (error);
1901}
1902
1903/*
1904 * nfs file create call
1905 * Mostly just call the approriate routine. (I separated out v4, so that
1906 * error recovery wouldn't be as difficult.)
1907 */
1908int
1909nfsrpc_create(vnode_t dvp, char *name, int namelen, struct vattr *vap,
1910    nfsquad_t cverf, int fmode, struct ucred *cred, NFSPROC_T *p,
1911    struct nfsvattr *dnap, struct nfsvattr *nnap, struct nfsfh **nfhpp,
1912    int *attrflagp, int *dattrflagp, void *dstuff)
1913{
1914	int error = 0, newone, expireret = 0, retrycnt, unlocked;
1915	struct nfsclowner *owp;
1916	struct nfscldeleg *dp;
1917	struct nfsmount *nmp = VFSTONFS(vnode_mount(dvp));
1918	u_int32_t clidrev;
1919
1920	if (NFSHASNFSV4(nmp)) {
1921	    retrycnt = 0;
1922	    do {
1923		dp = NULL;
1924		error = nfscl_open(dvp, NULL, 0, (NFSV4OPEN_ACCESSWRITE |
1925		    NFSV4OPEN_ACCESSREAD), 0, cred, p, &owp, NULL, &newone,
1926		    NULL, 1);
1927		if (error)
1928			return (error);
1929		if (nmp->nm_clp != NULL)
1930			clidrev = nmp->nm_clp->nfsc_clientidrev;
1931		else
1932			clidrev = 0;
1933		if (!NFSHASPNFS(nmp) || nfscl_enablecallb == 0 ||
1934		    nfs_numnfscbd == 0 || retrycnt > 0)
1935			error = nfsrpc_createv4(dvp, name, namelen, vap, cverf,
1936			  fmode, owp, &dp, cred, p, dnap, nnap, nfhpp,
1937			  attrflagp, dattrflagp, dstuff, &unlocked);
1938		else
1939			error = nfsrpc_getcreatelayout(dvp, name, namelen, vap,
1940			  cverf, fmode, owp, &dp, cred, p, dnap, nnap, nfhpp,
1941			  attrflagp, dattrflagp, dstuff, &unlocked);
1942		/*
1943		 * There is no need to invalidate cached attributes here,
1944		 * since new post-delegation issue attributes are always
1945		 * returned by nfsrpc_createv4() and these will update the
1946		 * attribute cache.
1947		 */
1948		if (dp != NULL)
1949			(void) nfscl_deleg(nmp->nm_mountp, owp->nfsow_clp,
1950			    (*nfhpp)->nfh_fh, (*nfhpp)->nfh_len, cred, p, &dp);
1951		nfscl_ownerrelease(nmp, owp, error, newone, unlocked);
1952		if (error == NFSERR_GRACE || error == NFSERR_STALECLIENTID ||
1953		    error == NFSERR_STALEDONTRECOVER || error == NFSERR_DELAY ||
1954		    error == NFSERR_BADSESSION) {
1955			(void) nfs_catnap(PZERO, error, "nfs_open");
1956		} else if ((error == NFSERR_EXPIRED ||
1957		    error == NFSERR_BADSTATEID) && clidrev != 0) {
1958			expireret = nfscl_hasexpired(nmp->nm_clp, clidrev, p);
1959			retrycnt++;
1960		}
1961	    } while (error == NFSERR_GRACE || error == NFSERR_STALECLIENTID ||
1962		error == NFSERR_STALEDONTRECOVER || error == NFSERR_DELAY ||
1963		error == NFSERR_BADSESSION ||
1964		((error == NFSERR_EXPIRED || error == NFSERR_BADSTATEID) &&
1965		 expireret == 0 && clidrev != 0 && retrycnt < 4));
1966	    if (error && retrycnt >= 4)
1967		    error = EIO;
1968	} else {
1969		error = nfsrpc_createv23(dvp, name, namelen, vap, cverf,
1970		    fmode, cred, p, dnap, nnap, nfhpp, attrflagp, dattrflagp,
1971		    dstuff);
1972	}
1973	return (error);
1974}
1975
1976/*
1977 * The create rpc for v2 and 3.
1978 */
1979static int
1980nfsrpc_createv23(vnode_t dvp, char *name, int namelen, struct vattr *vap,
1981    nfsquad_t cverf, int fmode, struct ucred *cred, NFSPROC_T *p,
1982    struct nfsvattr *dnap, struct nfsvattr *nnap, struct nfsfh **nfhpp,
1983    int *attrflagp, int *dattrflagp, void *dstuff)
1984{
1985	u_int32_t *tl;
1986	int error = 0;
1987	struct nfsrv_descript nfsd, *nd = &nfsd;
1988
1989	*nfhpp = NULL;
1990	*attrflagp = 0;
1991	*dattrflagp = 0;
1992	if (namelen > NFS_MAXNAMLEN)
1993		return (ENAMETOOLONG);
1994	NFSCL_REQSTART(nd, NFSPROC_CREATE, dvp);
1995	(void) nfsm_strtom(nd, name, namelen);
1996	if (nd->nd_flag & ND_NFSV3) {
1997		NFSM_BUILD(tl, u_int32_t *, NFSX_UNSIGNED);
1998		if (fmode & O_EXCL) {
1999			*tl = txdr_unsigned(NFSCREATE_EXCLUSIVE);
2000			NFSM_BUILD(tl, u_int32_t *, NFSX_VERF);
2001			*tl++ = cverf.lval[0];
2002			*tl = cverf.lval[1];
2003		} else {
2004			*tl = txdr_unsigned(NFSCREATE_UNCHECKED);
2005			nfscl_fillsattr(nd, vap, dvp, 0, 0);
2006		}
2007	} else {
2008		nfscl_fillsattr(nd, vap, dvp, NFSSATTR_SIZE0, 0);
2009	}
2010	error = nfscl_request(nd, dvp, p, cred, dstuff);
2011	if (error)
2012		return (error);
2013	if (nd->nd_repstat == 0) {
2014		error = nfscl_mtofh(nd, nfhpp, nnap, attrflagp);
2015		if (error)
2016			goto nfsmout;
2017	}
2018	if (nd->nd_flag & ND_NFSV3)
2019		error = nfscl_wcc_data(nd, dvp, dnap, dattrflagp, NULL, dstuff);
2020	if (nd->nd_repstat != 0 && error == 0)
2021		error = nd->nd_repstat;
2022nfsmout:
2023	mbuf_freem(nd->nd_mrep);
2024	return (error);
2025}
2026
2027static int
2028nfsrpc_createv4(vnode_t dvp, char *name, int namelen, struct vattr *vap,
2029    nfsquad_t cverf, int fmode, struct nfsclowner *owp, struct nfscldeleg **dpp,
2030    struct ucred *cred, NFSPROC_T *p, struct nfsvattr *dnap,
2031    struct nfsvattr *nnap, struct nfsfh **nfhpp, int *attrflagp,
2032    int *dattrflagp, void *dstuff, int *unlockedp)
2033{
2034	u_int32_t *tl;
2035	int error = 0, deleg, newone, ret, acesize, limitby;
2036	struct nfsrv_descript nfsd, *nd = &nfsd;
2037	struct nfsclopen *op;
2038	struct nfscldeleg *dp = NULL;
2039	struct nfsnode *np;
2040	struct nfsfh *nfhp;
2041	nfsattrbit_t attrbits;
2042	nfsv4stateid_t stateid;
2043	u_int32_t rflags;
2044	struct nfsmount *nmp;
2045	struct nfsclsession *tsep;
2046
2047	nmp = VFSTONFS(dvp->v_mount);
2048	np = VTONFS(dvp);
2049	*unlockedp = 0;
2050	*nfhpp = NULL;
2051	*dpp = NULL;
2052	*attrflagp = 0;
2053	*dattrflagp = 0;
2054	if (namelen > NFS_MAXNAMLEN)
2055		return (ENAMETOOLONG);
2056	NFSCL_REQSTART(nd, NFSPROC_CREATE, dvp);
2057	/*
2058	 * For V4, this is actually an Open op.
2059	 */
2060	NFSM_BUILD(tl, u_int32_t *, 5 * NFSX_UNSIGNED);
2061	*tl++ = txdr_unsigned(owp->nfsow_seqid);
2062	*tl++ = txdr_unsigned(NFSV4OPEN_ACCESSWRITE |
2063	    NFSV4OPEN_ACCESSREAD);
2064	*tl++ = txdr_unsigned(NFSV4OPEN_DENYNONE);
2065	tsep = nfsmnt_mdssession(nmp);
2066	*tl++ = tsep->nfsess_clientid.lval[0];
2067	*tl = tsep->nfsess_clientid.lval[1];
2068	(void) nfsm_strtom(nd, owp->nfsow_owner, NFSV4CL_LOCKNAMELEN);
2069	NFSM_BUILD(tl, u_int32_t *, 2 * NFSX_UNSIGNED);
2070	*tl++ = txdr_unsigned(NFSV4OPEN_CREATE);
2071	if (fmode & O_EXCL) {
2072		if (NFSHASNFSV4N(nmp)) {
2073			if (NFSHASSESSPERSIST(nmp)) {
2074				/* Use GUARDED for persistent sessions. */
2075				*tl = txdr_unsigned(NFSCREATE_GUARDED);
2076				nfscl_fillsattr(nd, vap, dvp, 0, 0);
2077			} else {
2078				/* Otherwise, use EXCLUSIVE4_1. */
2079				*tl = txdr_unsigned(NFSCREATE_EXCLUSIVE41);
2080				NFSM_BUILD(tl, u_int32_t *, NFSX_VERF);
2081				*tl++ = cverf.lval[0];
2082				*tl = cverf.lval[1];
2083				nfscl_fillsattr(nd, vap, dvp, 0, 0);
2084			}
2085		} else {
2086			/* NFSv4.0 */
2087			*tl = txdr_unsigned(NFSCREATE_EXCLUSIVE);
2088			NFSM_BUILD(tl, u_int32_t *, NFSX_VERF);
2089			*tl++ = cverf.lval[0];
2090			*tl = cverf.lval[1];
2091		}
2092	} else {
2093		*tl = txdr_unsigned(NFSCREATE_UNCHECKED);
2094		nfscl_fillsattr(nd, vap, dvp, 0, 0);
2095	}
2096	NFSM_BUILD(tl, u_int32_t *, NFSX_UNSIGNED);
2097	*tl = txdr_unsigned(NFSV4OPEN_CLAIMNULL);
2098	(void) nfsm_strtom(nd, name, namelen);
2099	/* Get the new file's handle and attributes. */
2100	NFSM_BUILD(tl, u_int32_t *, 2 * NFSX_UNSIGNED);
2101	*tl++ = txdr_unsigned(NFSV4OP_GETFH);
2102	*tl = txdr_unsigned(NFSV4OP_GETATTR);
2103	NFSGETATTR_ATTRBIT(&attrbits);
2104	(void) nfsrv_putattrbit(nd, &attrbits);
2105	/* Get the directory's post-op attributes. */
2106	NFSM_BUILD(tl, u_int32_t *, NFSX_UNSIGNED);
2107	*tl = txdr_unsigned(NFSV4OP_PUTFH);
2108	(void) nfsm_fhtom(nd, np->n_fhp->nfh_fh, np->n_fhp->nfh_len, 0);
2109	NFSM_BUILD(tl, u_int32_t *, NFSX_UNSIGNED);
2110	*tl = txdr_unsigned(NFSV4OP_GETATTR);
2111	(void) nfsrv_putattrbit(nd, &attrbits);
2112	error = nfscl_request(nd, dvp, p, cred, dstuff);
2113	if (error)
2114		return (error);
2115	NFSCL_INCRSEQID(owp->nfsow_seqid, nd);
2116	if (nd->nd_repstat == 0) {
2117		NFSM_DISSECT(tl, u_int32_t *, NFSX_STATEID +
2118		    6 * NFSX_UNSIGNED);
2119		stateid.seqid = *tl++;
2120		stateid.other[0] = *tl++;
2121		stateid.other[1] = *tl++;
2122		stateid.other[2] = *tl;
2123		rflags = fxdr_unsigned(u_int32_t, *(tl + 6));
2124		(void) nfsrv_getattrbits(nd, &attrbits, NULL, NULL);
2125		NFSM_DISSECT(tl, u_int32_t *, NFSX_UNSIGNED);
2126		deleg = fxdr_unsigned(int, *tl);
2127		if (deleg == NFSV4OPEN_DELEGATEREAD ||
2128		    deleg == NFSV4OPEN_DELEGATEWRITE) {
2129			if (!(owp->nfsow_clp->nfsc_flags &
2130			      NFSCLFLAGS_FIRSTDELEG))
2131				owp->nfsow_clp->nfsc_flags |=
2132				  (NFSCLFLAGS_FIRSTDELEG | NFSCLFLAGS_GOTDELEG);
2133			MALLOC(dp, struct nfscldeleg *,
2134			    sizeof (struct nfscldeleg) + NFSX_V4FHMAX,
2135			    M_NFSCLDELEG, M_WAITOK);
2136			LIST_INIT(&dp->nfsdl_owner);
2137			LIST_INIT(&dp->nfsdl_lock);
2138			dp->nfsdl_clp = owp->nfsow_clp;
2139			newnfs_copyincred(cred, &dp->nfsdl_cred);
2140			nfscl_lockinit(&dp->nfsdl_rwlock);
2141			NFSM_DISSECT(tl, u_int32_t *, NFSX_STATEID +
2142			    NFSX_UNSIGNED);
2143			dp->nfsdl_stateid.seqid = *tl++;
2144			dp->nfsdl_stateid.other[0] = *tl++;
2145			dp->nfsdl_stateid.other[1] = *tl++;
2146			dp->nfsdl_stateid.other[2] = *tl++;
2147			ret = fxdr_unsigned(int, *tl);
2148			if (deleg == NFSV4OPEN_DELEGATEWRITE) {
2149				dp->nfsdl_flags = NFSCLDL_WRITE;
2150				/*
2151				 * Indicates how much the file can grow.
2152				 */
2153				NFSM_DISSECT(tl, u_int32_t *,
2154				    3 * NFSX_UNSIGNED);
2155				limitby = fxdr_unsigned(int, *tl++);
2156				switch (limitby) {
2157				case NFSV4OPEN_LIMITSIZE:
2158					dp->nfsdl_sizelimit = fxdr_hyper(tl);
2159					break;
2160				case NFSV4OPEN_LIMITBLOCKS:
2161					dp->nfsdl_sizelimit =
2162					    fxdr_unsigned(u_int64_t, *tl++);
2163					dp->nfsdl_sizelimit *=
2164					    fxdr_unsigned(u_int64_t, *tl);
2165					break;
2166				default:
2167					error = NFSERR_BADXDR;
2168					goto nfsmout;
2169				}
2170			} else {
2171				dp->nfsdl_flags = NFSCLDL_READ;
2172			}
2173			if (ret)
2174				dp->nfsdl_flags |= NFSCLDL_RECALL;
2175			error = nfsrv_dissectace(nd, &dp->nfsdl_ace, &ret,
2176			    &acesize, p);
2177			if (error)
2178				goto nfsmout;
2179		} else if (deleg != NFSV4OPEN_DELEGATENONE) {
2180			error = NFSERR_BADXDR;
2181			goto nfsmout;
2182		}
2183		error = nfscl_mtofh(nd, nfhpp, nnap, attrflagp);
2184		if (error)
2185			goto nfsmout;
2186		/* Get rid of the PutFH and Getattr status values. */
2187		NFSM_DISSECT(tl, u_int32_t *, 4 * NFSX_UNSIGNED);
2188		/* Load the directory attributes. */
2189		error = nfsm_loadattr(nd, dnap);
2190		if (error)
2191			goto nfsmout;
2192		*dattrflagp = 1;
2193		if (dp != NULL && *attrflagp) {
2194			dp->nfsdl_change = nnap->na_filerev;
2195			dp->nfsdl_modtime = nnap->na_mtime;
2196			dp->nfsdl_flags |= NFSCLDL_MODTIMESET;
2197		}
2198		/*
2199		 * We can now complete the Open state.
2200		 */
2201		nfhp = *nfhpp;
2202		if (dp != NULL) {
2203			dp->nfsdl_fhlen = nfhp->nfh_len;
2204			NFSBCOPY(nfhp->nfh_fh, dp->nfsdl_fh, nfhp->nfh_len);
2205		}
2206		/*
2207		 * Get an Open structure that will be
2208		 * attached to the OpenOwner, acquired already.
2209		 */
2210		error = nfscl_open(dvp, nfhp->nfh_fh, nfhp->nfh_len,
2211		    (NFSV4OPEN_ACCESSWRITE | NFSV4OPEN_ACCESSREAD), 0,
2212		    cred, p, NULL, &op, &newone, NULL, 0);
2213		if (error)
2214			goto nfsmout;
2215		op->nfso_stateid = stateid;
2216		newnfs_copyincred(cred, &op->nfso_cred);
2217		if ((rflags & NFSV4OPEN_RESULTCONFIRM)) {
2218		    do {
2219			ret = nfsrpc_openconfirm(dvp, nfhp->nfh_fh,
2220			    nfhp->nfh_len, op, cred, p);
2221			if (ret == NFSERR_DELAY)
2222			    (void) nfs_catnap(PZERO, ret, "nfs_create");
2223		    } while (ret == NFSERR_DELAY);
2224		    error = ret;
2225		}
2226
2227		/*
2228		 * If the server is handing out delegations, but we didn't
2229		 * get one because an OpenConfirm was required, try the
2230		 * Open again, to get a delegation. This is a harmless no-op,
2231		 * from a server's point of view.
2232		 */
2233		if ((rflags & NFSV4OPEN_RESULTCONFIRM) &&
2234		    (owp->nfsow_clp->nfsc_flags & NFSCLFLAGS_GOTDELEG) &&
2235		    !error && dp == NULL) {
2236		    do {
2237			ret = nfsrpc_openrpc(VFSTONFS(vnode_mount(dvp)), dvp,
2238			    np->n_fhp->nfh_fh, np->n_fhp->nfh_len,
2239			    nfhp->nfh_fh, nfhp->nfh_len,
2240			    (NFSV4OPEN_ACCESSWRITE | NFSV4OPEN_ACCESSREAD), op,
2241			    name, namelen, &dp, 0, 0x0, cred, p, 0, 1);
2242			if (ret == NFSERR_DELAY)
2243			    (void) nfs_catnap(PZERO, ret, "nfs_crt2");
2244		    } while (ret == NFSERR_DELAY);
2245		    if (ret) {
2246			if (dp != NULL) {
2247				FREE((caddr_t)dp, M_NFSCLDELEG);
2248				dp = NULL;
2249			}
2250			if (ret == NFSERR_STALECLIENTID ||
2251			    ret == NFSERR_STALEDONTRECOVER ||
2252			    ret == NFSERR_BADSESSION)
2253				error = ret;
2254		    }
2255		}
2256		nfscl_openrelease(nmp, op, error, newone);
2257		*unlockedp = 1;
2258	}
2259	if (nd->nd_repstat != 0 && error == 0)
2260		error = nd->nd_repstat;
2261	if (error == NFSERR_STALECLIENTID)
2262		nfscl_initiate_recovery(owp->nfsow_clp);
2263nfsmout:
2264	if (!error)
2265		*dpp = dp;
2266	else if (dp != NULL)
2267		FREE((caddr_t)dp, M_NFSCLDELEG);
2268	mbuf_freem(nd->nd_mrep);
2269	return (error);
2270}
2271
2272/*
2273 * Nfs remove rpc
2274 */
2275int
2276nfsrpc_remove(vnode_t dvp, char *name, int namelen, vnode_t vp,
2277    struct ucred *cred, NFSPROC_T *p, struct nfsvattr *dnap, int *dattrflagp,
2278    void *dstuff)
2279{
2280	u_int32_t *tl;
2281	struct nfsrv_descript nfsd, *nd = &nfsd;
2282	struct nfsnode *np;
2283	struct nfsmount *nmp;
2284	nfsv4stateid_t dstateid;
2285	int error, ret = 0, i;
2286
2287	*dattrflagp = 0;
2288	if (namelen > NFS_MAXNAMLEN)
2289		return (ENAMETOOLONG);
2290	nmp = VFSTONFS(vnode_mount(dvp));
2291tryagain:
2292	if (NFSHASNFSV4(nmp) && ret == 0) {
2293		ret = nfscl_removedeleg(vp, p, &dstateid);
2294		if (ret == 1) {
2295			NFSCL_REQSTART(nd, NFSPROC_RETDELEGREMOVE, vp);
2296			NFSM_BUILD(tl, u_int32_t *, NFSX_STATEID +
2297			    NFSX_UNSIGNED);
2298			if (NFSHASNFSV4N(nmp))
2299				*tl++ = 0;
2300			else
2301				*tl++ = dstateid.seqid;
2302			*tl++ = dstateid.other[0];
2303			*tl++ = dstateid.other[1];
2304			*tl++ = dstateid.other[2];
2305			*tl = txdr_unsigned(NFSV4OP_PUTFH);
2306			np = VTONFS(dvp);
2307			(void) nfsm_fhtom(nd, np->n_fhp->nfh_fh,
2308			    np->n_fhp->nfh_len, 0);
2309			NFSM_BUILD(tl, u_int32_t *, NFSX_UNSIGNED);
2310			*tl = txdr_unsigned(NFSV4OP_REMOVE);
2311		}
2312	} else {
2313		ret = 0;
2314	}
2315	if (ret == 0)
2316		NFSCL_REQSTART(nd, NFSPROC_REMOVE, dvp);
2317	(void) nfsm_strtom(nd, name, namelen);
2318	error = nfscl_request(nd, dvp, p, cred, dstuff);
2319	if (error)
2320		return (error);
2321	if (nd->nd_flag & (ND_NFSV3 | ND_NFSV4)) {
2322		/* For NFSv4, parse out any Delereturn replies. */
2323		if (ret > 0 && nd->nd_repstat != 0 &&
2324		    (nd->nd_flag & ND_NOMOREDATA)) {
2325			/*
2326			 * If the Delegreturn failed, try again without
2327			 * it. The server will Recall, as required.
2328			 */
2329			mbuf_freem(nd->nd_mrep);
2330			goto tryagain;
2331		}
2332		for (i = 0; i < (ret * 2); i++) {
2333			if ((nd->nd_flag & (ND_NFSV4 | ND_NOMOREDATA)) ==
2334			    ND_NFSV4) {
2335			    NFSM_DISSECT(tl, u_int32_t *, 2 * NFSX_UNSIGNED);
2336			    if (*(tl + 1))
2337				nd->nd_flag |= ND_NOMOREDATA;
2338			}
2339		}
2340		error = nfscl_wcc_data(nd, dvp, dnap, dattrflagp, NULL, dstuff);
2341	}
2342	if (nd->nd_repstat && !error)
2343		error = nd->nd_repstat;
2344nfsmout:
2345	mbuf_freem(nd->nd_mrep);
2346	return (error);
2347}
2348
2349/*
2350 * Do an nfs rename rpc.
2351 */
2352int
2353nfsrpc_rename(vnode_t fdvp, vnode_t fvp, char *fnameptr, int fnamelen,
2354    vnode_t tdvp, vnode_t tvp, char *tnameptr, int tnamelen, struct ucred *cred,
2355    NFSPROC_T *p, struct nfsvattr *fnap, struct nfsvattr *tnap,
2356    int *fattrflagp, int *tattrflagp, void *fstuff, void *tstuff)
2357{
2358	u_int32_t *tl;
2359	struct nfsrv_descript nfsd, *nd = &nfsd;
2360	struct nfsmount *nmp;
2361	struct nfsnode *np;
2362	nfsattrbit_t attrbits;
2363	nfsv4stateid_t fdstateid, tdstateid;
2364	int error = 0, ret = 0, gottd = 0, gotfd = 0, i;
2365
2366	*fattrflagp = 0;
2367	*tattrflagp = 0;
2368	nmp = VFSTONFS(vnode_mount(fdvp));
2369	if (fnamelen > NFS_MAXNAMLEN || tnamelen > NFS_MAXNAMLEN)
2370		return (ENAMETOOLONG);
2371tryagain:
2372	if (NFSHASNFSV4(nmp) && ret == 0) {
2373		ret = nfscl_renamedeleg(fvp, &fdstateid, &gotfd, tvp,
2374		    &tdstateid, &gottd, p);
2375		if (gotfd && gottd) {
2376			NFSCL_REQSTART(nd, NFSPROC_RETDELEGRENAME2, fvp);
2377		} else if (gotfd) {
2378			NFSCL_REQSTART(nd, NFSPROC_RETDELEGRENAME1, fvp);
2379		} else if (gottd) {
2380			NFSCL_REQSTART(nd, NFSPROC_RETDELEGRENAME1, tvp);
2381		}
2382		if (gotfd) {
2383			NFSM_BUILD(tl, u_int32_t *, NFSX_STATEID);
2384			if (NFSHASNFSV4N(nmp))
2385				*tl++ = 0;
2386			else
2387				*tl++ = fdstateid.seqid;
2388			*tl++ = fdstateid.other[0];
2389			*tl++ = fdstateid.other[1];
2390			*tl = fdstateid.other[2];
2391			if (gottd) {
2392				NFSM_BUILD(tl, u_int32_t *, NFSX_UNSIGNED);
2393				*tl = txdr_unsigned(NFSV4OP_PUTFH);
2394				np = VTONFS(tvp);
2395				(void) nfsm_fhtom(nd, np->n_fhp->nfh_fh,
2396				    np->n_fhp->nfh_len, 0);
2397				NFSM_BUILD(tl, u_int32_t *, NFSX_UNSIGNED);
2398				*tl = txdr_unsigned(NFSV4OP_DELEGRETURN);
2399			}
2400		}
2401		if (gottd) {
2402			NFSM_BUILD(tl, u_int32_t *, NFSX_STATEID);
2403			if (NFSHASNFSV4N(nmp))
2404				*tl++ = 0;
2405			else
2406				*tl++ = tdstateid.seqid;
2407			*tl++ = tdstateid.other[0];
2408			*tl++ = tdstateid.other[1];
2409			*tl = tdstateid.other[2];
2410		}
2411		if (ret > 0) {
2412			NFSM_BUILD(tl, u_int32_t *, NFSX_UNSIGNED);
2413			*tl = txdr_unsigned(NFSV4OP_PUTFH);
2414			np = VTONFS(fdvp);
2415			(void) nfsm_fhtom(nd, np->n_fhp->nfh_fh,
2416			    np->n_fhp->nfh_len, 0);
2417			NFSM_BUILD(tl, u_int32_t *, NFSX_UNSIGNED);
2418			*tl = txdr_unsigned(NFSV4OP_SAVEFH);
2419		}
2420	} else {
2421		ret = 0;
2422	}
2423	if (ret == 0)
2424		NFSCL_REQSTART(nd, NFSPROC_RENAME, fdvp);
2425	if (nd->nd_flag & ND_NFSV4) {
2426		NFSM_BUILD(tl, u_int32_t *, NFSX_UNSIGNED);
2427		*tl = txdr_unsigned(NFSV4OP_GETATTR);
2428		NFSWCCATTR_ATTRBIT(&attrbits);
2429		(void) nfsrv_putattrbit(nd, &attrbits);
2430		NFSM_BUILD(tl, u_int32_t *, NFSX_UNSIGNED);
2431		*tl = txdr_unsigned(NFSV4OP_PUTFH);
2432		(void) nfsm_fhtom(nd, VTONFS(tdvp)->n_fhp->nfh_fh,
2433		    VTONFS(tdvp)->n_fhp->nfh_len, 0);
2434		NFSM_BUILD(tl, u_int32_t *, NFSX_UNSIGNED);
2435		*tl = txdr_unsigned(NFSV4OP_GETATTR);
2436		(void) nfsrv_putattrbit(nd, &attrbits);
2437		nd->nd_flag |= ND_V4WCCATTR;
2438		NFSM_BUILD(tl, u_int32_t *, NFSX_UNSIGNED);
2439		*tl = txdr_unsigned(NFSV4OP_RENAME);
2440	}
2441	(void) nfsm_strtom(nd, fnameptr, fnamelen);
2442	if (!(nd->nd_flag & ND_NFSV4))
2443		(void) nfsm_fhtom(nd, VTONFS(tdvp)->n_fhp->nfh_fh,
2444			VTONFS(tdvp)->n_fhp->nfh_len, 0);
2445	(void) nfsm_strtom(nd, tnameptr, tnamelen);
2446	error = nfscl_request(nd, fdvp, p, cred, fstuff);
2447	if (error)
2448		return (error);
2449	if (nd->nd_flag & (ND_NFSV3 | ND_NFSV4)) {
2450		/* For NFSv4, parse out any Delereturn replies. */
2451		if (ret > 0 && nd->nd_repstat != 0 &&
2452		    (nd->nd_flag & ND_NOMOREDATA)) {
2453			/*
2454			 * If the Delegreturn failed, try again without
2455			 * it. The server will Recall, as required.
2456			 */
2457			mbuf_freem(nd->nd_mrep);
2458			goto tryagain;
2459		}
2460		for (i = 0; i < (ret * 2); i++) {
2461			if ((nd->nd_flag & (ND_NFSV4 | ND_NOMOREDATA)) ==
2462			    ND_NFSV4) {
2463			    NFSM_DISSECT(tl, u_int32_t *, 2 * NFSX_UNSIGNED);
2464			    if (*(tl + 1)) {
2465				if (i == 0 && ret > 1) {
2466				    /*
2467				     * If the Delegreturn failed, try again
2468				     * without it. The server will Recall, as
2469				     * required.
2470				     * If ret > 1, the first iteration of this
2471				     * loop is the second DelegReturn result.
2472				     */
2473				    mbuf_freem(nd->nd_mrep);
2474				    goto tryagain;
2475				} else {
2476				    nd->nd_flag |= ND_NOMOREDATA;
2477				}
2478			    }
2479			}
2480		}
2481		/* Now, the first wcc attribute reply. */
2482		if ((nd->nd_flag & (ND_NFSV4 | ND_NOMOREDATA)) == ND_NFSV4) {
2483			NFSM_DISSECT(tl, u_int32_t *, 2 * NFSX_UNSIGNED);
2484			if (*(tl + 1))
2485				nd->nd_flag |= ND_NOMOREDATA;
2486		}
2487		error = nfscl_wcc_data(nd, fdvp, fnap, fattrflagp, NULL,
2488		    fstuff);
2489		/* and the second wcc attribute reply. */
2490		if ((nd->nd_flag & (ND_NFSV4 | ND_NOMOREDATA)) == ND_NFSV4 &&
2491		    !error) {
2492			NFSM_DISSECT(tl, u_int32_t *, 2 * NFSX_UNSIGNED);
2493			if (*(tl + 1))
2494				nd->nd_flag |= ND_NOMOREDATA;
2495		}
2496		if (!error)
2497			error = nfscl_wcc_data(nd, tdvp, tnap, tattrflagp,
2498			    NULL, tstuff);
2499	}
2500	if (nd->nd_repstat && !error)
2501		error = nd->nd_repstat;
2502nfsmout:
2503	mbuf_freem(nd->nd_mrep);
2504	return (error);
2505}
2506
2507/*
2508 * nfs hard link create rpc
2509 */
2510int
2511nfsrpc_link(vnode_t dvp, vnode_t vp, char *name, int namelen,
2512    struct ucred *cred, NFSPROC_T *p, struct nfsvattr *dnap,
2513    struct nfsvattr *nap, int *attrflagp, int *dattrflagp, void *dstuff)
2514{
2515	u_int32_t *tl;
2516	struct nfsrv_descript nfsd, *nd = &nfsd;
2517	nfsattrbit_t attrbits;
2518	int error = 0;
2519
2520	*attrflagp = 0;
2521	*dattrflagp = 0;
2522	if (namelen > NFS_MAXNAMLEN)
2523		return (ENAMETOOLONG);
2524	NFSCL_REQSTART(nd, NFSPROC_LINK, vp);
2525	if (nd->nd_flag & ND_NFSV4) {
2526		NFSM_BUILD(tl, u_int32_t *, NFSX_UNSIGNED);
2527		*tl = txdr_unsigned(NFSV4OP_PUTFH);
2528	}
2529	(void) nfsm_fhtom(nd, VTONFS(dvp)->n_fhp->nfh_fh,
2530		VTONFS(dvp)->n_fhp->nfh_len, 0);
2531	if (nd->nd_flag & ND_NFSV4) {
2532		NFSM_BUILD(tl, u_int32_t *, NFSX_UNSIGNED);
2533		*tl = txdr_unsigned(NFSV4OP_GETATTR);
2534		NFSWCCATTR_ATTRBIT(&attrbits);
2535		(void) nfsrv_putattrbit(nd, &attrbits);
2536		nd->nd_flag |= ND_V4WCCATTR;
2537		NFSM_BUILD(tl, u_int32_t *, NFSX_UNSIGNED);
2538		*tl = txdr_unsigned(NFSV4OP_LINK);
2539	}
2540	(void) nfsm_strtom(nd, name, namelen);
2541	error = nfscl_request(nd, vp, p, cred, dstuff);
2542	if (error)
2543		return (error);
2544	if (nd->nd_flag & ND_NFSV3) {
2545		error = nfscl_postop_attr(nd, nap, attrflagp, dstuff);
2546		if (!error)
2547			error = nfscl_wcc_data(nd, dvp, dnap, dattrflagp,
2548			    NULL, dstuff);
2549	} else if ((nd->nd_flag & (ND_NFSV4 | ND_NOMOREDATA)) == ND_NFSV4) {
2550		/*
2551		 * First, parse out the PutFH and Getattr result.
2552		 */
2553		NFSM_DISSECT(tl, u_int32_t *, 2 * NFSX_UNSIGNED);
2554		if (!(*(tl + 1)))
2555			NFSM_DISSECT(tl, u_int32_t *, 2 * NFSX_UNSIGNED);
2556		if (*(tl + 1))
2557			nd->nd_flag |= ND_NOMOREDATA;
2558		/*
2559		 * Get the pre-op attributes.
2560		 */
2561		error = nfscl_wcc_data(nd, dvp, dnap, dattrflagp, NULL, dstuff);
2562	}
2563	if (nd->nd_repstat && !error)
2564		error = nd->nd_repstat;
2565nfsmout:
2566	mbuf_freem(nd->nd_mrep);
2567	return (error);
2568}
2569
2570/*
2571 * nfs symbolic link create rpc
2572 */
2573int
2574nfsrpc_symlink(vnode_t dvp, char *name, int namelen, char *target,
2575    struct vattr *vap, struct ucred *cred, NFSPROC_T *p, struct nfsvattr *dnap,
2576    struct nfsvattr *nnap, struct nfsfh **nfhpp, int *attrflagp,
2577    int *dattrflagp, void *dstuff)
2578{
2579	u_int32_t *tl;
2580	struct nfsrv_descript nfsd, *nd = &nfsd;
2581	struct nfsmount *nmp;
2582	int slen, error = 0;
2583
2584	*nfhpp = NULL;
2585	*attrflagp = 0;
2586	*dattrflagp = 0;
2587	nmp = VFSTONFS(vnode_mount(dvp));
2588	slen = strlen(target);
2589	if (slen > NFS_MAXPATHLEN || namelen > NFS_MAXNAMLEN)
2590		return (ENAMETOOLONG);
2591	NFSCL_REQSTART(nd, NFSPROC_SYMLINK, dvp);
2592	if (nd->nd_flag & ND_NFSV4) {
2593		NFSM_BUILD(tl, u_int32_t *, NFSX_UNSIGNED);
2594		*tl = txdr_unsigned(NFLNK);
2595		(void) nfsm_strtom(nd, target, slen);
2596	}
2597	(void) nfsm_strtom(nd, name, namelen);
2598	if (nd->nd_flag & (ND_NFSV3 | ND_NFSV4))
2599		nfscl_fillsattr(nd, vap, dvp, 0, 0);
2600	if (!(nd->nd_flag & ND_NFSV4))
2601		(void) nfsm_strtom(nd, target, slen);
2602	if (nd->nd_flag & ND_NFSV2)
2603		nfscl_fillsattr(nd, vap, dvp, NFSSATTR_SIZENEG1, 0);
2604	error = nfscl_request(nd, dvp, p, cred, dstuff);
2605	if (error)
2606		return (error);
2607	if (nd->nd_flag & ND_NFSV4)
2608		error = nfscl_wcc_data(nd, dvp, dnap, dattrflagp, NULL, dstuff);
2609	if ((nd->nd_flag & ND_NFSV3) && !error) {
2610		if (!nd->nd_repstat)
2611			error = nfscl_mtofh(nd, nfhpp, nnap, attrflagp);
2612		if (!error)
2613			error = nfscl_wcc_data(nd, dvp, dnap, dattrflagp,
2614			    NULL, dstuff);
2615	}
2616	if (nd->nd_repstat && !error)
2617		error = nd->nd_repstat;
2618	mbuf_freem(nd->nd_mrep);
2619	/*
2620	 * Kludge: Map EEXIST => 0 assuming that it is a reply to a retry.
2621	 * Only do this if vfs.nfs.ignore_eexist is set.
2622	 * Never do this for NFSv4.1 or later minor versions, since sessions
2623	 * should guarantee "exactly once" RPC semantics.
2624	 */
2625	if (error == EEXIST && nfsignore_eexist != 0 && (!NFSHASNFSV4(nmp) ||
2626	    nmp->nm_minorvers == 0))
2627		error = 0;
2628	return (error);
2629}
2630
2631/*
2632 * nfs make dir rpc
2633 */
2634int
2635nfsrpc_mkdir(vnode_t dvp, char *name, int namelen, struct vattr *vap,
2636    struct ucred *cred, NFSPROC_T *p, struct nfsvattr *dnap,
2637    struct nfsvattr *nnap, struct nfsfh **nfhpp, int *attrflagp,
2638    int *dattrflagp, void *dstuff)
2639{
2640	u_int32_t *tl;
2641	struct nfsrv_descript nfsd, *nd = &nfsd;
2642	nfsattrbit_t attrbits;
2643	int error = 0;
2644	struct nfsfh *fhp;
2645	struct nfsmount *nmp;
2646
2647	*nfhpp = NULL;
2648	*attrflagp = 0;
2649	*dattrflagp = 0;
2650	nmp = VFSTONFS(vnode_mount(dvp));
2651	fhp = VTONFS(dvp)->n_fhp;
2652	if (namelen > NFS_MAXNAMLEN)
2653		return (ENAMETOOLONG);
2654	NFSCL_REQSTART(nd, NFSPROC_MKDIR, dvp);
2655	if (nd->nd_flag & ND_NFSV4) {
2656		NFSM_BUILD(tl, u_int32_t *, NFSX_UNSIGNED);
2657		*tl = txdr_unsigned(NFDIR);
2658	}
2659	(void) nfsm_strtom(nd, name, namelen);
2660	nfscl_fillsattr(nd, vap, dvp, NFSSATTR_SIZENEG1, 0);
2661	if (nd->nd_flag & ND_NFSV4) {
2662		NFSGETATTR_ATTRBIT(&attrbits);
2663		NFSM_BUILD(tl, u_int32_t *, 2 * NFSX_UNSIGNED);
2664		*tl++ = txdr_unsigned(NFSV4OP_GETFH);
2665		*tl = txdr_unsigned(NFSV4OP_GETATTR);
2666		(void) nfsrv_putattrbit(nd, &attrbits);
2667		NFSM_BUILD(tl, u_int32_t *, NFSX_UNSIGNED);
2668		*tl = txdr_unsigned(NFSV4OP_PUTFH);
2669		(void) nfsm_fhtom(nd, fhp->nfh_fh, fhp->nfh_len, 0);
2670		NFSM_BUILD(tl, u_int32_t *, NFSX_UNSIGNED);
2671		*tl = txdr_unsigned(NFSV4OP_GETATTR);
2672		(void) nfsrv_putattrbit(nd, &attrbits);
2673	}
2674	error = nfscl_request(nd, dvp, p, cred, dstuff);
2675	if (error)
2676		return (error);
2677	if (nd->nd_flag & ND_NFSV4)
2678		error = nfscl_wcc_data(nd, dvp, dnap, dattrflagp, NULL, dstuff);
2679	if (!nd->nd_repstat && !error) {
2680		if (nd->nd_flag & ND_NFSV4) {
2681			NFSM_DISSECT(tl, u_int32_t *, 5 * NFSX_UNSIGNED);
2682			error = nfsrv_getattrbits(nd, &attrbits, NULL, NULL);
2683		}
2684		if (!error)
2685			error = nfscl_mtofh(nd, nfhpp, nnap, attrflagp);
2686		if (error == 0 && (nd->nd_flag & ND_NFSV4) != 0) {
2687			/* Get rid of the PutFH and Getattr status values. */
2688			NFSM_DISSECT(tl, u_int32_t *, 4 * NFSX_UNSIGNED);
2689			/* Load the directory attributes. */
2690			error = nfsm_loadattr(nd, dnap);
2691			if (error == 0)
2692				*dattrflagp = 1;
2693		}
2694	}
2695	if ((nd->nd_flag & ND_NFSV3) && !error)
2696		error = nfscl_wcc_data(nd, dvp, dnap, dattrflagp, NULL, dstuff);
2697	if (nd->nd_repstat && !error)
2698		error = nd->nd_repstat;
2699nfsmout:
2700	mbuf_freem(nd->nd_mrep);
2701	/*
2702	 * Kludge: Map EEXIST => 0 assuming that it is a reply to a retry.
2703	 * Only do this if vfs.nfs.ignore_eexist is set.
2704	 * Never do this for NFSv4.1 or later minor versions, since sessions
2705	 * should guarantee "exactly once" RPC semantics.
2706	 */
2707	if (error == EEXIST && nfsignore_eexist != 0 && (!NFSHASNFSV4(nmp) ||
2708	    nmp->nm_minorvers == 0))
2709		error = 0;
2710	return (error);
2711}
2712
2713/*
2714 * nfs remove directory call
2715 */
2716int
2717nfsrpc_rmdir(vnode_t dvp, char *name, int namelen, struct ucred *cred,
2718    NFSPROC_T *p, struct nfsvattr *dnap, int *dattrflagp, void *dstuff)
2719{
2720	struct nfsrv_descript nfsd, *nd = &nfsd;
2721	int error = 0;
2722
2723	*dattrflagp = 0;
2724	if (namelen > NFS_MAXNAMLEN)
2725		return (ENAMETOOLONG);
2726	NFSCL_REQSTART(nd, NFSPROC_RMDIR, dvp);
2727	(void) nfsm_strtom(nd, name, namelen);
2728	error = nfscl_request(nd, dvp, p, cred, dstuff);
2729	if (error)
2730		return (error);
2731	if (nd->nd_flag & (ND_NFSV3 | ND_NFSV4))
2732		error = nfscl_wcc_data(nd, dvp, dnap, dattrflagp, NULL, dstuff);
2733	if (nd->nd_repstat && !error)
2734		error = nd->nd_repstat;
2735	mbuf_freem(nd->nd_mrep);
2736	/*
2737	 * Kludge: Map ENOENT => 0 assuming that you have a reply to a retry.
2738	 */
2739	if (error == ENOENT)
2740		error = 0;
2741	return (error);
2742}
2743
2744/*
2745 * Readdir rpc.
2746 * Always returns with either uio_resid unchanged, if you are at the
2747 * end of the directory, or uio_resid == 0, with all DIRBLKSIZ chunks
2748 * filled in.
2749 * I felt this would allow caching of directory blocks more easily
2750 * than returning a pertially filled block.
2751 * Directory offset cookies:
2752 * Oh my, what to do with them...
2753 * I can think of three ways to deal with them:
2754 * 1 - have the layer above these RPCs maintain a map between logical
2755 *     directory byte offsets and the NFS directory offset cookies
2756 * 2 - pass the opaque directory offset cookies up into userland
2757 *     and let the libc functions deal with them, via the system call
2758 * 3 - return them to userland in the "struct dirent", so future versions
2759 *     of libc can use them and do whatever is necessary to make things work
2760 *     above these rpc calls, in the meantime
2761 * For now, I do #3 by "hiding" the directory offset cookies after the
2762 * d_name field in struct dirent. This is space inside d_reclen that
2763 * will be ignored by anything that doesn't know about them.
2764 * The directory offset cookies are filled in as the last 8 bytes of
2765 * each directory entry, after d_name. Someday, the userland libc
2766 * functions may be able to use these. In the meantime, it satisfies
2767 * OpenBSD's requirements for cookies being returned.
2768 * If expects the directory offset cookie for the read to be in uio_offset
2769 * and returns the one for the next entry after this directory block in
2770 * there, as well.
2771 */
2772int
2773nfsrpc_readdir(vnode_t vp, struct uio *uiop, nfsuint64 *cookiep,
2774    struct ucred *cred, NFSPROC_T *p, struct nfsvattr *nap, int *attrflagp,
2775    int *eofp, void *stuff)
2776{
2777	int len, left;
2778	struct dirent *dp = NULL;
2779	u_int32_t *tl;
2780	nfsquad_t cookie, ncookie;
2781	struct nfsmount *nmp = VFSTONFS(vnode_mount(vp));
2782	struct nfsnode *dnp = VTONFS(vp);
2783	struct nfsvattr nfsva;
2784	struct nfsrv_descript nfsd, *nd = &nfsd;
2785	int error = 0, tlen, more_dirs = 1, blksiz = 0, bigenough = 1;
2786	int reqsize, tryformoredirs = 1, readsize, eof = 0, gotmnton = 0;
2787	long dotfileid, dotdotfileid = 0;
2788	u_int32_t fakefileno = 0xffffffff, rderr;
2789	char *cp;
2790	nfsattrbit_t attrbits, dattrbits;
2791	u_int32_t *tl2 = NULL;
2792	size_t tresid;
2793
2794	KASSERT(uiop->uio_iovcnt == 1 &&
2795	    (uio_uio_resid(uiop) & (DIRBLKSIZ - 1)) == 0,
2796	    ("nfs readdirrpc bad uio"));
2797
2798	/*
2799	 * There is no point in reading a lot more than uio_resid, however
2800	 * adding one additional DIRBLKSIZ makes sense. Since uio_resid
2801	 * and nm_readdirsize are both exact multiples of DIRBLKSIZ, this
2802	 * will never make readsize > nm_readdirsize.
2803	 */
2804	readsize = nmp->nm_readdirsize;
2805	if (readsize > uio_uio_resid(uiop))
2806		readsize = uio_uio_resid(uiop) + DIRBLKSIZ;
2807
2808	*attrflagp = 0;
2809	if (eofp)
2810		*eofp = 0;
2811	tresid = uio_uio_resid(uiop);
2812	cookie.lval[0] = cookiep->nfsuquad[0];
2813	cookie.lval[1] = cookiep->nfsuquad[1];
2814	nd->nd_mrep = NULL;
2815
2816	/*
2817	 * For NFSv4, first create the "." and ".." entries.
2818	 */
2819	if (NFSHASNFSV4(nmp)) {
2820		reqsize = 6 * NFSX_UNSIGNED;
2821		NFSGETATTR_ATTRBIT(&dattrbits);
2822		NFSZERO_ATTRBIT(&attrbits);
2823		NFSSETBIT_ATTRBIT(&attrbits, NFSATTRBIT_FILEID);
2824		NFSSETBIT_ATTRBIT(&attrbits, NFSATTRBIT_TYPE);
2825		if (NFSISSET_ATTRBIT(&dnp->n_vattr.na_suppattr,
2826		    NFSATTRBIT_MOUNTEDONFILEID)) {
2827			NFSSETBIT_ATTRBIT(&attrbits,
2828			    NFSATTRBIT_MOUNTEDONFILEID);
2829			gotmnton = 1;
2830		} else {
2831			/*
2832			 * Must fake it. Use the fileno, except when the
2833			 * fsid is != to that of the directory. For that
2834			 * case, generate a fake fileno that is not the same.
2835			 */
2836			NFSSETBIT_ATTRBIT(&attrbits, NFSATTRBIT_FSID);
2837			gotmnton = 0;
2838		}
2839
2840		/*
2841		 * Joy, oh joy. For V4 we get to hand craft '.' and '..'.
2842		 */
2843		if (uiop->uio_offset == 0) {
2844			NFSCL_REQSTART(nd, NFSPROC_LOOKUPP, vp);
2845			NFSM_BUILD(tl, u_int32_t *, 2 * NFSX_UNSIGNED);
2846			*tl++ = txdr_unsigned(NFSV4OP_GETFH);
2847			*tl = txdr_unsigned(NFSV4OP_GETATTR);
2848			(void) nfsrv_putattrbit(nd, &attrbits);
2849			error = nfscl_request(nd, vp, p, cred, stuff);
2850			if (error)
2851			    return (error);
2852			dotfileid = 0;	/* Fake out the compiler. */
2853			if ((nd->nd_flag & ND_NOMOREDATA) == 0) {
2854			    error = nfsm_loadattr(nd, &nfsva);
2855			    if (error != 0)
2856				goto nfsmout;
2857			    dotfileid = nfsva.na_fileid;
2858			}
2859			if (nd->nd_repstat == 0) {
2860			    NFSM_DISSECT(tl, u_int32_t *, 5 * NFSX_UNSIGNED);
2861			    len = fxdr_unsigned(int, *(tl + 4));
2862			    if (len > 0 && len <= NFSX_V4FHMAX)
2863				error = nfsm_advance(nd, NFSM_RNDUP(len), -1);
2864			    else
2865				error = EPERM;
2866			    if (!error) {
2867				NFSM_DISSECT(tl, u_int32_t *, 2*NFSX_UNSIGNED);
2868				nfsva.na_mntonfileno = 0xffffffff;
2869				error = nfsv4_loadattr(nd, NULL, &nfsva, NULL,
2870				    NULL, 0, NULL, NULL, NULL, NULL, NULL, 0,
2871				    NULL, NULL, NULL, p, cred);
2872				if (error) {
2873				    dotdotfileid = dotfileid;
2874				} else if (gotmnton) {
2875				    if (nfsva.na_mntonfileno != 0xffffffff)
2876					dotdotfileid = nfsva.na_mntonfileno;
2877				    else
2878					dotdotfileid = nfsva.na_fileid;
2879				} else if (nfsva.na_filesid[0] ==
2880				    dnp->n_vattr.na_filesid[0] &&
2881				    nfsva.na_filesid[1] ==
2882				    dnp->n_vattr.na_filesid[1]) {
2883				    dotdotfileid = nfsva.na_fileid;
2884				} else {
2885				    do {
2886					fakefileno--;
2887				    } while (fakefileno ==
2888					nfsva.na_fileid);
2889				    dotdotfileid = fakefileno;
2890				}
2891			    }
2892			} else if (nd->nd_repstat == NFSERR_NOENT) {
2893			    /*
2894			     * Lookupp returns NFSERR_NOENT when we are
2895			     * at the root, so just use the current dir.
2896			     */
2897			    nd->nd_repstat = 0;
2898			    dotdotfileid = dotfileid;
2899			} else {
2900			    error = nd->nd_repstat;
2901			}
2902			mbuf_freem(nd->nd_mrep);
2903			if (error)
2904			    return (error);
2905			nd->nd_mrep = NULL;
2906			dp = (struct dirent *) CAST_DOWN(caddr_t, uio_iov_base(uiop));
2907			dp->d_type = DT_DIR;
2908			dp->d_fileno = dotfileid;
2909			dp->d_namlen = 1;
2910			dp->d_name[0] = '.';
2911			dp->d_name[1] = '\0';
2912			dp->d_reclen = DIRENT_SIZE(dp) + NFSX_HYPER;
2913			/*
2914			 * Just make these offset cookie 0.
2915			 */
2916			tl = (u_int32_t *)&dp->d_name[4];
2917			*tl++ = 0;
2918			*tl = 0;
2919			blksiz += dp->d_reclen;
2920			uio_uio_resid_add(uiop, -(dp->d_reclen));
2921			uiop->uio_offset += dp->d_reclen;
2922			uio_iov_base_add(uiop, dp->d_reclen);
2923			uio_iov_len_add(uiop, -(dp->d_reclen));
2924			dp = (struct dirent *) CAST_DOWN(caddr_t, uio_iov_base(uiop));
2925			dp->d_type = DT_DIR;
2926			dp->d_fileno = dotdotfileid;
2927			dp->d_namlen = 2;
2928			dp->d_name[0] = '.';
2929			dp->d_name[1] = '.';
2930			dp->d_name[2] = '\0';
2931			dp->d_reclen = DIRENT_SIZE(dp) + NFSX_HYPER;
2932			/*
2933			 * Just make these offset cookie 0.
2934			 */
2935			tl = (u_int32_t *)&dp->d_name[4];
2936			*tl++ = 0;
2937			*tl = 0;
2938			blksiz += dp->d_reclen;
2939			uio_uio_resid_add(uiop, -(dp->d_reclen));
2940			uiop->uio_offset += dp->d_reclen;
2941			uio_iov_base_add(uiop, dp->d_reclen);
2942			uio_iov_len_add(uiop, -(dp->d_reclen));
2943		}
2944		NFSSETBIT_ATTRBIT(&attrbits, NFSATTRBIT_RDATTRERROR);
2945	} else {
2946		reqsize = 5 * NFSX_UNSIGNED;
2947	}
2948
2949
2950	/*
2951	 * Loop around doing readdir rpc's of size readsize.
2952	 * The stopping criteria is EOF or buffer full.
2953	 */
2954	while (more_dirs && bigenough) {
2955		*attrflagp = 0;
2956		NFSCL_REQSTART(nd, NFSPROC_READDIR, vp);
2957		if (nd->nd_flag & ND_NFSV2) {
2958			NFSM_BUILD(tl, u_int32_t *, 2 * NFSX_UNSIGNED);
2959			*tl++ = cookie.lval[1];
2960			*tl = txdr_unsigned(readsize);
2961		} else {
2962			NFSM_BUILD(tl, u_int32_t *, reqsize);
2963			*tl++ = cookie.lval[0];
2964			*tl++ = cookie.lval[1];
2965			if (cookie.qval == 0) {
2966				*tl++ = 0;
2967				*tl++ = 0;
2968			} else {
2969				NFSLOCKNODE(dnp);
2970				*tl++ = dnp->n_cookieverf.nfsuquad[0];
2971				*tl++ = dnp->n_cookieverf.nfsuquad[1];
2972				NFSUNLOCKNODE(dnp);
2973			}
2974			if (nd->nd_flag & ND_NFSV4) {
2975				*tl++ = txdr_unsigned(readsize);
2976				*tl = txdr_unsigned(readsize);
2977				(void) nfsrv_putattrbit(nd, &attrbits);
2978				NFSM_BUILD(tl, u_int32_t *, NFSX_UNSIGNED);
2979				*tl = txdr_unsigned(NFSV4OP_GETATTR);
2980				(void) nfsrv_putattrbit(nd, &dattrbits);
2981			} else {
2982				*tl = txdr_unsigned(readsize);
2983			}
2984		}
2985		error = nfscl_request(nd, vp, p, cred, stuff);
2986		if (error)
2987			return (error);
2988		if (!(nd->nd_flag & ND_NFSV2)) {
2989			if (nd->nd_flag & ND_NFSV3)
2990				error = nfscl_postop_attr(nd, nap, attrflagp,
2991				    stuff);
2992			if (!nd->nd_repstat && !error) {
2993				NFSM_DISSECT(tl, u_int32_t *, NFSX_HYPER);
2994				NFSLOCKNODE(dnp);
2995				dnp->n_cookieverf.nfsuquad[0] = *tl++;
2996				dnp->n_cookieverf.nfsuquad[1] = *tl;
2997				NFSUNLOCKNODE(dnp);
2998			}
2999		}
3000		if (nd->nd_repstat || error) {
3001			if (!error)
3002				error = nd->nd_repstat;
3003			goto nfsmout;
3004		}
3005		NFSM_DISSECT(tl, u_int32_t *, NFSX_UNSIGNED);
3006		more_dirs = fxdr_unsigned(int, *tl);
3007		if (!more_dirs)
3008			tryformoredirs = 0;
3009
3010		/* loop through the dir entries, doctoring them to 4bsd form */
3011		while (more_dirs && bigenough) {
3012			if (nd->nd_flag & ND_NFSV4) {
3013				NFSM_DISSECT(tl, u_int32_t *, 3*NFSX_UNSIGNED);
3014				ncookie.lval[0] = *tl++;
3015				ncookie.lval[1] = *tl++;
3016				len = fxdr_unsigned(int, *tl);
3017			} else if (nd->nd_flag & ND_NFSV3) {
3018				NFSM_DISSECT(tl, u_int32_t *, 3*NFSX_UNSIGNED);
3019				nfsva.na_fileid = fxdr_hyper(tl);
3020				tl += 2;
3021				len = fxdr_unsigned(int, *tl);
3022			} else {
3023				NFSM_DISSECT(tl, u_int32_t *, 2*NFSX_UNSIGNED);
3024				nfsva.na_fileid =
3025				    fxdr_unsigned(long, *tl++);
3026				len = fxdr_unsigned(int, *tl);
3027			}
3028			if (len <= 0 || len > NFS_MAXNAMLEN) {
3029				error = EBADRPC;
3030				goto nfsmout;
3031			}
3032			tlen = NFSM_RNDUP(len);
3033			if (tlen == len)
3034				tlen += 4;  /* To ensure null termination */
3035			left = DIRBLKSIZ - blksiz;
3036			if ((int)(tlen + DIRHDSIZ + NFSX_HYPER) > left) {
3037				NFSBZERO(uio_iov_base(uiop), left);
3038				dp->d_reclen += left;
3039				uio_iov_base_add(uiop, left);
3040				uio_iov_len_add(uiop, -(left));
3041				uio_uio_resid_add(uiop, -(left));
3042				uiop->uio_offset += left;
3043				blksiz = 0;
3044			}
3045			if ((int)(tlen + DIRHDSIZ + NFSX_HYPER) > uio_uio_resid(uiop))
3046				bigenough = 0;
3047			if (bigenough) {
3048				dp = (struct dirent *) CAST_DOWN(caddr_t, uio_iov_base(uiop));
3049				dp->d_namlen = len;
3050				dp->d_reclen = tlen + DIRHDSIZ + NFSX_HYPER;
3051				dp->d_type = DT_UNKNOWN;
3052				blksiz += dp->d_reclen;
3053				if (blksiz == DIRBLKSIZ)
3054					blksiz = 0;
3055				uio_uio_resid_add(uiop, -(DIRHDSIZ));
3056				uiop->uio_offset += DIRHDSIZ;
3057				uio_iov_base_add(uiop, DIRHDSIZ);
3058				uio_iov_len_add(uiop, -(DIRHDSIZ));
3059				error = nfsm_mbufuio(nd, uiop, len);
3060				if (error)
3061					goto nfsmout;
3062				cp = CAST_DOWN(caddr_t, uio_iov_base(uiop));
3063				tlen -= len;
3064				NFSBZERO(cp, tlen);
3065				cp += tlen;	/* points to cookie storage */
3066				tl2 = (u_int32_t *)cp;
3067				uio_iov_base_add(uiop, (tlen + NFSX_HYPER));
3068				uio_iov_len_add(uiop, -(tlen + NFSX_HYPER));
3069				uio_uio_resid_add(uiop, -(tlen + NFSX_HYPER));
3070				uiop->uio_offset += (tlen + NFSX_HYPER);
3071			} else {
3072				error = nfsm_advance(nd, NFSM_RNDUP(len), -1);
3073				if (error)
3074					goto nfsmout;
3075			}
3076			if (nd->nd_flag & ND_NFSV4) {
3077				rderr = 0;
3078				nfsva.na_mntonfileno = 0xffffffff;
3079				error = nfsv4_loadattr(nd, NULL, &nfsva, NULL,
3080				    NULL, 0, NULL, NULL, NULL, NULL, NULL, 0,
3081				    NULL, NULL, &rderr, p, cred);
3082				if (error)
3083					goto nfsmout;
3084				NFSM_DISSECT(tl, u_int32_t *, NFSX_UNSIGNED);
3085			} else if (nd->nd_flag & ND_NFSV3) {
3086				NFSM_DISSECT(tl, u_int32_t *, 3*NFSX_UNSIGNED);
3087				ncookie.lval[0] = *tl++;
3088				ncookie.lval[1] = *tl++;
3089			} else {
3090				NFSM_DISSECT(tl, u_int32_t *, 2*NFSX_UNSIGNED);
3091				ncookie.lval[0] = 0;
3092				ncookie.lval[1] = *tl++;
3093			}
3094			if (bigenough) {
3095			    if (nd->nd_flag & ND_NFSV4) {
3096				if (rderr) {
3097				    dp->d_fileno = 0;
3098				} else {
3099				    if (gotmnton) {
3100					if (nfsva.na_mntonfileno != 0xffffffff)
3101					    dp->d_fileno = nfsva.na_mntonfileno;
3102					else
3103					    dp->d_fileno = nfsva.na_fileid;
3104				    } else if (nfsva.na_filesid[0] ==
3105					dnp->n_vattr.na_filesid[0] &&
3106					nfsva.na_filesid[1] ==
3107					dnp->n_vattr.na_filesid[1]) {
3108					dp->d_fileno = nfsva.na_fileid;
3109				    } else {
3110					do {
3111					    fakefileno--;
3112					} while (fakefileno ==
3113					    nfsva.na_fileid);
3114					dp->d_fileno = fakefileno;
3115				    }
3116				    dp->d_type = vtonfs_dtype(nfsva.na_type);
3117				}
3118			    } else {
3119				dp->d_fileno = nfsva.na_fileid;
3120			    }
3121			    *tl2++ = cookiep->nfsuquad[0] = cookie.lval[0] =
3122				ncookie.lval[0];
3123			    *tl2 = cookiep->nfsuquad[1] = cookie.lval[1] =
3124				ncookie.lval[1];
3125			}
3126			more_dirs = fxdr_unsigned(int, *tl);
3127		}
3128		/*
3129		 * If at end of rpc data, get the eof boolean
3130		 */
3131		if (!more_dirs) {
3132			NFSM_DISSECT(tl, u_int32_t *, NFSX_UNSIGNED);
3133			eof = fxdr_unsigned(int, *tl);
3134			if (tryformoredirs)
3135				more_dirs = !eof;
3136			if (nd->nd_flag & ND_NFSV4) {
3137				error = nfscl_postop_attr(nd, nap, attrflagp,
3138				    stuff);
3139				if (error)
3140					goto nfsmout;
3141			}
3142		}
3143		mbuf_freem(nd->nd_mrep);
3144		nd->nd_mrep = NULL;
3145	}
3146	/*
3147	 * Fill last record, iff any, out to a multiple of DIRBLKSIZ
3148	 * by increasing d_reclen for the last record.
3149	 */
3150	if (blksiz > 0) {
3151		left = DIRBLKSIZ - blksiz;
3152		NFSBZERO(uio_iov_base(uiop), left);
3153		dp->d_reclen += left;
3154		uio_iov_base_add(uiop, left);
3155		uio_iov_len_add(uiop, -(left));
3156		uio_uio_resid_add(uiop, -(left));
3157		uiop->uio_offset += left;
3158	}
3159
3160	/*
3161	 * If returning no data, assume end of file.
3162	 * If not bigenough, return not end of file, since you aren't
3163	 *    returning all the data
3164	 * Otherwise, return the eof flag from the server.
3165	 */
3166	if (eofp) {
3167		if (tresid == ((size_t)(uio_uio_resid(uiop))))
3168			*eofp = 1;
3169		else if (!bigenough)
3170			*eofp = 0;
3171		else
3172			*eofp = eof;
3173	}
3174
3175	/*
3176	 * Add extra empty records to any remaining DIRBLKSIZ chunks.
3177	 */
3178	while (uio_uio_resid(uiop) > 0 && ((size_t)(uio_uio_resid(uiop))) != tresid) {
3179		dp = (struct dirent *) CAST_DOWN(caddr_t, uio_iov_base(uiop));
3180		NFSBZERO(dp, DIRBLKSIZ);
3181		dp->d_type = DT_UNKNOWN;
3182		tl = (u_int32_t *)&dp->d_name[4];
3183		*tl++ = cookie.lval[0];
3184		*tl = cookie.lval[1];
3185		dp->d_reclen = DIRBLKSIZ;
3186		uio_iov_base_add(uiop, DIRBLKSIZ);
3187		uio_iov_len_add(uiop, -(DIRBLKSIZ));
3188		uio_uio_resid_add(uiop, -(DIRBLKSIZ));
3189		uiop->uio_offset += DIRBLKSIZ;
3190	}
3191
3192nfsmout:
3193	if (nd->nd_mrep != NULL)
3194		mbuf_freem(nd->nd_mrep);
3195	return (error);
3196}
3197
3198#ifndef APPLE
3199/*
3200 * NFS V3 readdir plus RPC. Used in place of nfsrpc_readdir().
3201 * (Also used for NFS V4 when mount flag set.)
3202 * (ditto above w.r.t. multiple of DIRBLKSIZ, etc.)
3203 */
3204int
3205nfsrpc_readdirplus(vnode_t vp, struct uio *uiop, nfsuint64 *cookiep,
3206    struct ucred *cred, NFSPROC_T *p, struct nfsvattr *nap, int *attrflagp,
3207    int *eofp, void *stuff)
3208{
3209	int len, left;
3210	struct dirent *dp = NULL;
3211	u_int32_t *tl;
3212	vnode_t newvp = NULLVP;
3213	struct nfsrv_descript nfsd, *nd = &nfsd;
3214	struct nameidata nami, *ndp = &nami;
3215	struct componentname *cnp = &ndp->ni_cnd;
3216	struct nfsmount *nmp = VFSTONFS(vnode_mount(vp));
3217	struct nfsnode *dnp = VTONFS(vp), *np;
3218	struct nfsvattr nfsva;
3219	struct nfsfh *nfhp;
3220	nfsquad_t cookie, ncookie;
3221	int error = 0, tlen, more_dirs = 1, blksiz = 0, bigenough = 1;
3222	int attrflag, tryformoredirs = 1, eof = 0, gotmnton = 0;
3223	int isdotdot = 0, unlocknewvp = 0;
3224	long dotfileid, dotdotfileid = 0, fileno = 0;
3225	char *cp;
3226	nfsattrbit_t attrbits, dattrbits;
3227	size_t tresid;
3228	u_int32_t *tl2 = NULL, fakefileno = 0xffffffff, rderr;
3229	struct timespec dctime;
3230
3231	KASSERT(uiop->uio_iovcnt == 1 &&
3232	    (uio_uio_resid(uiop) & (DIRBLKSIZ - 1)) == 0,
3233	    ("nfs readdirplusrpc bad uio"));
3234	timespecclear(&dctime);
3235	*attrflagp = 0;
3236	if (eofp != NULL)
3237		*eofp = 0;
3238	ndp->ni_dvp = vp;
3239	nd->nd_mrep = NULL;
3240	cookie.lval[0] = cookiep->nfsuquad[0];
3241	cookie.lval[1] = cookiep->nfsuquad[1];
3242	tresid = uio_uio_resid(uiop);
3243
3244	/*
3245	 * For NFSv4, first create the "." and ".." entries.
3246	 */
3247	if (NFSHASNFSV4(nmp)) {
3248		NFSGETATTR_ATTRBIT(&dattrbits);
3249		NFSZERO_ATTRBIT(&attrbits);
3250		NFSSETBIT_ATTRBIT(&attrbits, NFSATTRBIT_FILEID);
3251		if (NFSISSET_ATTRBIT(&dnp->n_vattr.na_suppattr,
3252		    NFSATTRBIT_MOUNTEDONFILEID)) {
3253			NFSSETBIT_ATTRBIT(&attrbits,
3254			    NFSATTRBIT_MOUNTEDONFILEID);
3255			gotmnton = 1;
3256		} else {
3257			/*
3258			 * Must fake it. Use the fileno, except when the
3259			 * fsid is != to that of the directory. For that
3260			 * case, generate a fake fileno that is not the same.
3261			 */
3262			NFSSETBIT_ATTRBIT(&attrbits, NFSATTRBIT_FSID);
3263			gotmnton = 0;
3264		}
3265
3266		/*
3267		 * Joy, oh joy. For V4 we get to hand craft '.' and '..'.
3268		 */
3269		if (uiop->uio_offset == 0) {
3270			NFSCL_REQSTART(nd, NFSPROC_LOOKUPP, vp);
3271			NFSM_BUILD(tl, u_int32_t *, 2 * NFSX_UNSIGNED);
3272			*tl++ = txdr_unsigned(NFSV4OP_GETFH);
3273			*tl = txdr_unsigned(NFSV4OP_GETATTR);
3274			(void) nfsrv_putattrbit(nd, &attrbits);
3275			error = nfscl_request(nd, vp, p, cred, stuff);
3276			if (error)
3277			    return (error);
3278			dotfileid = 0;	/* Fake out the compiler. */
3279			if ((nd->nd_flag & ND_NOMOREDATA) == 0) {
3280			    error = nfsm_loadattr(nd, &nfsva);
3281			    if (error != 0)
3282				goto nfsmout;
3283			    dctime = nfsva.na_ctime;
3284			    dotfileid = nfsva.na_fileid;
3285			}
3286			if (nd->nd_repstat == 0) {
3287			    NFSM_DISSECT(tl, u_int32_t *, 5 * NFSX_UNSIGNED);
3288			    len = fxdr_unsigned(int, *(tl + 4));
3289			    if (len > 0 && len <= NFSX_V4FHMAX)
3290				error = nfsm_advance(nd, NFSM_RNDUP(len), -1);
3291			    else
3292				error = EPERM;
3293			    if (!error) {
3294				NFSM_DISSECT(tl, u_int32_t *, 2*NFSX_UNSIGNED);
3295				nfsva.na_mntonfileno = 0xffffffff;
3296				error = nfsv4_loadattr(nd, NULL, &nfsva, NULL,
3297				    NULL, 0, NULL, NULL, NULL, NULL, NULL, 0,
3298				    NULL, NULL, NULL, p, cred);
3299				if (error) {
3300				    dotdotfileid = dotfileid;
3301				} else if (gotmnton) {
3302				    if (nfsva.na_mntonfileno != 0xffffffff)
3303					dotdotfileid = nfsva.na_mntonfileno;
3304				    else
3305					dotdotfileid = nfsva.na_fileid;
3306				} else if (nfsva.na_filesid[0] ==
3307				    dnp->n_vattr.na_filesid[0] &&
3308				    nfsva.na_filesid[1] ==
3309				    dnp->n_vattr.na_filesid[1]) {
3310				    dotdotfileid = nfsva.na_fileid;
3311				} else {
3312				    do {
3313					fakefileno--;
3314				    } while (fakefileno ==
3315					nfsva.na_fileid);
3316				    dotdotfileid = fakefileno;
3317				}
3318			    }
3319			} else if (nd->nd_repstat == NFSERR_NOENT) {
3320			    /*
3321			     * Lookupp returns NFSERR_NOENT when we are
3322			     * at the root, so just use the current dir.
3323			     */
3324			    nd->nd_repstat = 0;
3325			    dotdotfileid = dotfileid;
3326			} else {
3327			    error = nd->nd_repstat;
3328			}
3329			mbuf_freem(nd->nd_mrep);
3330			if (error)
3331			    return (error);
3332			nd->nd_mrep = NULL;
3333			dp = (struct dirent *)uio_iov_base(uiop);
3334			dp->d_type = DT_DIR;
3335			dp->d_fileno = dotfileid;
3336			dp->d_namlen = 1;
3337			dp->d_name[0] = '.';
3338			dp->d_name[1] = '\0';
3339			dp->d_reclen = DIRENT_SIZE(dp) + NFSX_HYPER;
3340			/*
3341			 * Just make these offset cookie 0.
3342			 */
3343			tl = (u_int32_t *)&dp->d_name[4];
3344			*tl++ = 0;
3345			*tl = 0;
3346			blksiz += dp->d_reclen;
3347			uio_uio_resid_add(uiop, -(dp->d_reclen));
3348			uiop->uio_offset += dp->d_reclen;
3349			uio_iov_base_add(uiop, dp->d_reclen);
3350			uio_iov_len_add(uiop, -(dp->d_reclen));
3351			dp = (struct dirent *)uio_iov_base(uiop);
3352			dp->d_type = DT_DIR;
3353			dp->d_fileno = dotdotfileid;
3354			dp->d_namlen = 2;
3355			dp->d_name[0] = '.';
3356			dp->d_name[1] = '.';
3357			dp->d_name[2] = '\0';
3358			dp->d_reclen = DIRENT_SIZE(dp) + NFSX_HYPER;
3359			/*
3360			 * Just make these offset cookie 0.
3361			 */
3362			tl = (u_int32_t *)&dp->d_name[4];
3363			*tl++ = 0;
3364			*tl = 0;
3365			blksiz += dp->d_reclen;
3366			uio_uio_resid_add(uiop, -(dp->d_reclen));
3367			uiop->uio_offset += dp->d_reclen;
3368			uio_iov_base_add(uiop, dp->d_reclen);
3369			uio_iov_len_add(uiop, -(dp->d_reclen));
3370		}
3371		NFSREADDIRPLUS_ATTRBIT(&attrbits);
3372		if (gotmnton)
3373			NFSSETBIT_ATTRBIT(&attrbits,
3374			    NFSATTRBIT_MOUNTEDONFILEID);
3375	}
3376
3377	/*
3378	 * Loop around doing readdir rpc's of size nm_readdirsize.
3379	 * The stopping criteria is EOF or buffer full.
3380	 */
3381	while (more_dirs && bigenough) {
3382		*attrflagp = 0;
3383		NFSCL_REQSTART(nd, NFSPROC_READDIRPLUS, vp);
3384 		NFSM_BUILD(tl, u_int32_t *, 6 * NFSX_UNSIGNED);
3385		*tl++ = cookie.lval[0];
3386		*tl++ = cookie.lval[1];
3387		if (cookie.qval == 0) {
3388			*tl++ = 0;
3389			*tl++ = 0;
3390		} else {
3391			NFSLOCKNODE(dnp);
3392			*tl++ = dnp->n_cookieverf.nfsuquad[0];
3393			*tl++ = dnp->n_cookieverf.nfsuquad[1];
3394			NFSUNLOCKNODE(dnp);
3395		}
3396		*tl++ = txdr_unsigned(nmp->nm_readdirsize);
3397		*tl = txdr_unsigned(nmp->nm_readdirsize);
3398		if (nd->nd_flag & ND_NFSV4) {
3399			(void) nfsrv_putattrbit(nd, &attrbits);
3400			NFSM_BUILD(tl, u_int32_t *, NFSX_UNSIGNED);
3401			*tl = txdr_unsigned(NFSV4OP_GETATTR);
3402			(void) nfsrv_putattrbit(nd, &dattrbits);
3403		}
3404		error = nfscl_request(nd, vp, p, cred, stuff);
3405		if (error)
3406			return (error);
3407		if (nd->nd_flag & ND_NFSV3)
3408			error = nfscl_postop_attr(nd, nap, attrflagp, stuff);
3409		if (nd->nd_repstat || error) {
3410			if (!error)
3411				error = nd->nd_repstat;
3412			goto nfsmout;
3413		}
3414		if ((nd->nd_flag & ND_NFSV3) != 0 && *attrflagp != 0)
3415			dctime = nap->na_ctime;
3416		NFSM_DISSECT(tl, u_int32_t *, 3 * NFSX_UNSIGNED);
3417		NFSLOCKNODE(dnp);
3418		dnp->n_cookieverf.nfsuquad[0] = *tl++;
3419		dnp->n_cookieverf.nfsuquad[1] = *tl++;
3420		NFSUNLOCKNODE(dnp);
3421		more_dirs = fxdr_unsigned(int, *tl);
3422		if (!more_dirs)
3423			tryformoredirs = 0;
3424
3425		/* loop through the dir entries, doctoring them to 4bsd form */
3426		while (more_dirs && bigenough) {
3427			NFSM_DISSECT(tl, u_int32_t *, 3 * NFSX_UNSIGNED);
3428			if (nd->nd_flag & ND_NFSV4) {
3429				ncookie.lval[0] = *tl++;
3430				ncookie.lval[1] = *tl++;
3431			} else {
3432				fileno = fxdr_unsigned(long, *++tl);
3433				tl++;
3434			}
3435			len = fxdr_unsigned(int, *tl);
3436			if (len <= 0 || len > NFS_MAXNAMLEN) {
3437				error = EBADRPC;
3438				goto nfsmout;
3439			}
3440			tlen = NFSM_RNDUP(len);
3441			if (tlen == len)
3442				tlen += 4;  /* To ensure null termination */
3443			left = DIRBLKSIZ - blksiz;
3444			if ((tlen + DIRHDSIZ + NFSX_HYPER) > left) {
3445				NFSBZERO(uio_iov_base(uiop), left);
3446				dp->d_reclen += left;
3447				uio_iov_base_add(uiop, left);
3448				uio_iov_len_add(uiop, -(left));
3449				uio_uio_resid_add(uiop, -(left));
3450				uiop->uio_offset += left;
3451				blksiz = 0;
3452			}
3453			if ((tlen + DIRHDSIZ + NFSX_HYPER) > uio_uio_resid(uiop))
3454				bigenough = 0;
3455			if (bigenough) {
3456				dp = (struct dirent *)uio_iov_base(uiop);
3457				dp->d_namlen = len;
3458				dp->d_reclen = tlen + DIRHDSIZ + NFSX_HYPER;
3459				dp->d_type = DT_UNKNOWN;
3460				blksiz += dp->d_reclen;
3461				if (blksiz == DIRBLKSIZ)
3462					blksiz = 0;
3463				uio_uio_resid_add(uiop, -(DIRHDSIZ));
3464				uiop->uio_offset += DIRHDSIZ;
3465				uio_iov_base_add(uiop, DIRHDSIZ);
3466				uio_iov_len_add(uiop, -(DIRHDSIZ));
3467				cnp->cn_nameptr = uio_iov_base(uiop);
3468				cnp->cn_namelen = len;
3469				NFSCNHASHZERO(cnp);
3470				error = nfsm_mbufuio(nd, uiop, len);
3471				if (error)
3472					goto nfsmout;
3473				cp = uio_iov_base(uiop);
3474				tlen -= len;
3475				NFSBZERO(cp, tlen);
3476				cp += tlen;	/* points to cookie storage */
3477				tl2 = (u_int32_t *)cp;
3478				if (len == 2 && cnp->cn_nameptr[0] == '.' &&
3479				    cnp->cn_nameptr[1] == '.')
3480					isdotdot = 1;
3481				else
3482					isdotdot = 0;
3483				uio_iov_base_add(uiop, (tlen + NFSX_HYPER));
3484				uio_iov_len_add(uiop, -(tlen + NFSX_HYPER));
3485				uio_uio_resid_add(uiop, -(tlen + NFSX_HYPER));
3486				uiop->uio_offset += (tlen + NFSX_HYPER);
3487			} else {
3488				error = nfsm_advance(nd, NFSM_RNDUP(len), -1);
3489				if (error)
3490					goto nfsmout;
3491			}
3492			nfhp = NULL;
3493			if (nd->nd_flag & ND_NFSV3) {
3494				NFSM_DISSECT(tl, u_int32_t *, 3*NFSX_UNSIGNED);
3495				ncookie.lval[0] = *tl++;
3496				ncookie.lval[1] = *tl++;
3497				attrflag = fxdr_unsigned(int, *tl);
3498				if (attrflag) {
3499				  error = nfsm_loadattr(nd, &nfsva);
3500				  if (error)
3501					goto nfsmout;
3502				}
3503				NFSM_DISSECT(tl,u_int32_t *,NFSX_UNSIGNED);
3504				if (*tl) {
3505					error = nfsm_getfh(nd, &nfhp);
3506					if (error)
3507					    goto nfsmout;
3508				}
3509				if (!attrflag && nfhp != NULL) {
3510					FREE((caddr_t)nfhp, M_NFSFH);
3511					nfhp = NULL;
3512				}
3513			} else {
3514				rderr = 0;
3515				nfsva.na_mntonfileno = 0xffffffff;
3516				error = nfsv4_loadattr(nd, NULL, &nfsva, &nfhp,
3517				    NULL, 0, NULL, NULL, NULL, NULL, NULL, 0,
3518				    NULL, NULL, &rderr, p, cred);
3519				if (error)
3520					goto nfsmout;
3521			}
3522
3523			if (bigenough) {
3524			    if (nd->nd_flag & ND_NFSV4) {
3525				if (rderr) {
3526				    dp->d_fileno = 0;
3527				} else if (gotmnton) {
3528				    if (nfsva.na_mntonfileno != 0xffffffff)
3529					dp->d_fileno = nfsva.na_mntonfileno;
3530				    else
3531					dp->d_fileno = nfsva.na_fileid;
3532				} else if (nfsva.na_filesid[0] ==
3533				    dnp->n_vattr.na_filesid[0] &&
3534				    nfsva.na_filesid[1] ==
3535				    dnp->n_vattr.na_filesid[1]) {
3536				    dp->d_fileno = nfsva.na_fileid;
3537				} else {
3538				    do {
3539					fakefileno--;
3540				    } while (fakefileno ==
3541					nfsva.na_fileid);
3542				    dp->d_fileno = fakefileno;
3543				}
3544			    } else {
3545				dp->d_fileno = fileno;
3546			    }
3547			    *tl2++ = cookiep->nfsuquad[0] = cookie.lval[0] =
3548				ncookie.lval[0];
3549			    *tl2 = cookiep->nfsuquad[1] = cookie.lval[1] =
3550				ncookie.lval[1];
3551
3552			    if (nfhp != NULL) {
3553				if (NFSRV_CMPFH(nfhp->nfh_fh, nfhp->nfh_len,
3554				    dnp->n_fhp->nfh_fh, dnp->n_fhp->nfh_len)) {
3555				    VREF(vp);
3556				    newvp = vp;
3557				    unlocknewvp = 0;
3558				    FREE((caddr_t)nfhp, M_NFSFH);
3559				    np = dnp;
3560				} else if (isdotdot != 0) {
3561				    /*
3562				     * Skip doing a nfscl_nget() call for "..".
3563				     * There's a race between acquiring the nfs
3564				     * node here and lookups that look for the
3565				     * directory being read (in the parent).
3566				     * It would try to get a lock on ".." here,
3567				     * owning the lock on the directory being
3568				     * read. Lookup will hold the lock on ".."
3569				     * and try to acquire the lock on the
3570				     * directory being read.
3571				     * If the directory is unlocked/relocked,
3572				     * then there is a LOR with the buflock
3573				     * vp is relocked.
3574				     */
3575				    free(nfhp, M_NFSFH);
3576				} else {
3577				    error = nfscl_nget(vnode_mount(vp), vp,
3578				      nfhp, cnp, p, &np, NULL, LK_EXCLUSIVE);
3579				    if (!error) {
3580					newvp = NFSTOV(np);
3581					unlocknewvp = 1;
3582				    }
3583				}
3584				nfhp = NULL;
3585				if (newvp != NULLVP) {
3586				    error = nfscl_loadattrcache(&newvp,
3587					&nfsva, NULL, NULL, 0, 0);
3588				    if (error) {
3589					if (unlocknewvp)
3590					    vput(newvp);
3591					else
3592					    vrele(newvp);
3593					goto nfsmout;
3594				    }
3595				    dp->d_type =
3596					vtonfs_dtype(np->n_vattr.na_type);
3597				    ndp->ni_vp = newvp;
3598				    NFSCNHASH(cnp, HASHINIT);
3599				    if (cnp->cn_namelen <= NCHNAMLEN &&
3600					(newvp->v_type != VDIR ||
3601					 dctime.tv_sec != 0)) {
3602					cache_enter_time(ndp->ni_dvp,
3603					    ndp->ni_vp, cnp,
3604					    &nfsva.na_ctime,
3605					    newvp->v_type != VDIR ? NULL :
3606					    &dctime);
3607				    }
3608				    if (unlocknewvp)
3609					vput(newvp);
3610				    else
3611					vrele(newvp);
3612				    newvp = NULLVP;
3613				}
3614			    }
3615			} else if (nfhp != NULL) {
3616			    FREE((caddr_t)nfhp, M_NFSFH);
3617			}
3618			NFSM_DISSECT(tl, u_int32_t *, NFSX_UNSIGNED);
3619			more_dirs = fxdr_unsigned(int, *tl);
3620		}
3621		/*
3622		 * If at end of rpc data, get the eof boolean
3623		 */
3624		if (!more_dirs) {
3625			NFSM_DISSECT(tl, u_int32_t *, NFSX_UNSIGNED);
3626			eof = fxdr_unsigned(int, *tl);
3627			if (tryformoredirs)
3628				more_dirs = !eof;
3629			if (nd->nd_flag & ND_NFSV4) {
3630				error = nfscl_postop_attr(nd, nap, attrflagp,
3631				    stuff);
3632				if (error)
3633					goto nfsmout;
3634			}
3635		}
3636		mbuf_freem(nd->nd_mrep);
3637		nd->nd_mrep = NULL;
3638	}
3639	/*
3640	 * Fill last record, iff any, out to a multiple of DIRBLKSIZ
3641	 * by increasing d_reclen for the last record.
3642	 */
3643	if (blksiz > 0) {
3644		left = DIRBLKSIZ - blksiz;
3645		NFSBZERO(uio_iov_base(uiop), left);
3646		dp->d_reclen += left;
3647		uio_iov_base_add(uiop, left);
3648		uio_iov_len_add(uiop, -(left));
3649		uio_uio_resid_add(uiop, -(left));
3650		uiop->uio_offset += left;
3651	}
3652
3653	/*
3654	 * If returning no data, assume end of file.
3655	 * If not bigenough, return not end of file, since you aren't
3656	 *    returning all the data
3657	 * Otherwise, return the eof flag from the server.
3658	 */
3659	if (eofp != NULL) {
3660		if (tresid == uio_uio_resid(uiop))
3661			*eofp = 1;
3662		else if (!bigenough)
3663			*eofp = 0;
3664		else
3665			*eofp = eof;
3666	}
3667
3668	/*
3669	 * Add extra empty records to any remaining DIRBLKSIZ chunks.
3670	 */
3671	while (uio_uio_resid(uiop) > 0 && uio_uio_resid(uiop) != tresid) {
3672		dp = (struct dirent *)uio_iov_base(uiop);
3673		NFSBZERO(dp, DIRBLKSIZ);
3674		dp->d_type = DT_UNKNOWN;
3675		tl = (u_int32_t *)&dp->d_name[4];
3676		*tl++ = cookie.lval[0];
3677		*tl = cookie.lval[1];
3678		dp->d_reclen = DIRBLKSIZ;
3679		uio_iov_base_add(uiop, DIRBLKSIZ);
3680		uio_iov_len_add(uiop, -(DIRBLKSIZ));
3681		uio_uio_resid_add(uiop, -(DIRBLKSIZ));
3682		uiop->uio_offset += DIRBLKSIZ;
3683	}
3684
3685nfsmout:
3686	if (nd->nd_mrep != NULL)
3687		mbuf_freem(nd->nd_mrep);
3688	return (error);
3689}
3690#endif	/* !APPLE */
3691
3692/*
3693 * Nfs commit rpc
3694 */
3695int
3696nfsrpc_commit(vnode_t vp, u_quad_t offset, int cnt, struct ucred *cred,
3697    NFSPROC_T *p, struct nfsvattr *nap, int *attrflagp, void *stuff)
3698{
3699	u_int32_t *tl;
3700	struct nfsrv_descript nfsd, *nd = &nfsd;
3701	nfsattrbit_t attrbits;
3702	int error;
3703	struct nfsmount *nmp = VFSTONFS(vnode_mount(vp));
3704
3705	*attrflagp = 0;
3706	NFSCL_REQSTART(nd, NFSPROC_COMMIT, vp);
3707	NFSM_BUILD(tl, u_int32_t *, 3 * NFSX_UNSIGNED);
3708	txdr_hyper(offset, tl);
3709	tl += 2;
3710	*tl = txdr_unsigned(cnt);
3711	if (nd->nd_flag & ND_NFSV4) {
3712		/*
3713		 * And do a Getattr op.
3714		 */
3715		NFSM_BUILD(tl, u_int32_t *, NFSX_UNSIGNED);
3716		*tl = txdr_unsigned(NFSV4OP_GETATTR);
3717		NFSGETATTR_ATTRBIT(&attrbits);
3718		(void) nfsrv_putattrbit(nd, &attrbits);
3719	}
3720	error = nfscl_request(nd, vp, p, cred, stuff);
3721	if (error)
3722		return (error);
3723	error = nfscl_wcc_data(nd, vp, nap, attrflagp, NULL, stuff);
3724	if (!error && !nd->nd_repstat) {
3725		NFSM_DISSECT(tl, u_int32_t *, NFSX_VERF);
3726		NFSLOCKMNT(nmp);
3727		if (NFSBCMP(nmp->nm_verf, tl, NFSX_VERF)) {
3728			NFSBCOPY(tl, nmp->nm_verf, NFSX_VERF);
3729			nd->nd_repstat = NFSERR_STALEWRITEVERF;
3730		}
3731		NFSUNLOCKMNT(nmp);
3732		if (nd->nd_flag & ND_NFSV4)
3733			error = nfscl_postop_attr(nd, nap, attrflagp, stuff);
3734	}
3735nfsmout:
3736	if (!error && nd->nd_repstat)
3737		error = nd->nd_repstat;
3738	mbuf_freem(nd->nd_mrep);
3739	return (error);
3740}
3741
3742/*
3743 * NFS byte range lock rpc.
3744 * (Mostly just calls one of the three lower level RPC routines.)
3745 */
3746int
3747nfsrpc_advlock(vnode_t vp, off_t size, int op, struct flock *fl,
3748    int reclaim, struct ucred *cred, NFSPROC_T *p, void *id, int flags)
3749{
3750	struct nfscllockowner *lp;
3751	struct nfsclclient *clp;
3752	struct nfsfh *nfhp;
3753	struct nfsrv_descript nfsd, *nd = &nfsd;
3754	struct nfsmount *nmp = VFSTONFS(vnode_mount(vp));
3755	u_int64_t off, len;
3756	off_t start, end;
3757	u_int32_t clidrev = 0;
3758	int error = 0, newone = 0, expireret = 0, retrycnt, donelocally;
3759	int callcnt, dorpc;
3760
3761	/*
3762	 * Convert the flock structure into a start and end and do POSIX
3763	 * bounds checking.
3764	 */
3765	switch (fl->l_whence) {
3766	case SEEK_SET:
3767	case SEEK_CUR:
3768		/*
3769		 * Caller is responsible for adding any necessary offset
3770		 * when SEEK_CUR is used.
3771		 */
3772		start = fl->l_start;
3773		off = fl->l_start;
3774		break;
3775	case SEEK_END:
3776		start = size + fl->l_start;
3777		off = size + fl->l_start;
3778		break;
3779	default:
3780		return (EINVAL);
3781	}
3782	if (start < 0)
3783		return (EINVAL);
3784	if (fl->l_len != 0) {
3785		end = start + fl->l_len - 1;
3786		if (end < start)
3787			return (EINVAL);
3788	}
3789
3790	len = fl->l_len;
3791	if (len == 0)
3792		len = NFS64BITSSET;
3793	retrycnt = 0;
3794	do {
3795	    nd->nd_repstat = 0;
3796	    if (op == F_GETLK) {
3797		error = nfscl_getcl(vnode_mount(vp), cred, p, 1, &clp);
3798		if (error)
3799			return (error);
3800		error = nfscl_lockt(vp, clp, off, len, fl, p, id, flags);
3801		if (!error) {
3802			clidrev = clp->nfsc_clientidrev;
3803			error = nfsrpc_lockt(nd, vp, clp, off, len, fl, cred,
3804			    p, id, flags);
3805		} else if (error == -1) {
3806			error = 0;
3807		}
3808		nfscl_clientrelease(clp);
3809	    } else if (op == F_UNLCK && fl->l_type == F_UNLCK) {
3810		/*
3811		 * We must loop around for all lockowner cases.
3812		 */
3813		callcnt = 0;
3814		error = nfscl_getcl(vnode_mount(vp), cred, p, 1, &clp);
3815		if (error)
3816			return (error);
3817		do {
3818		    error = nfscl_relbytelock(vp, off, len, cred, p, callcnt,
3819			clp, id, flags, &lp, &dorpc);
3820		    /*
3821		     * If it returns a NULL lp, we're done.
3822		     */
3823		    if (lp == NULL) {
3824			if (callcnt == 0)
3825			    nfscl_clientrelease(clp);
3826			else
3827			    nfscl_releasealllocks(clp, vp, p, id, flags);
3828			return (error);
3829		    }
3830		    if (nmp->nm_clp != NULL)
3831			clidrev = nmp->nm_clp->nfsc_clientidrev;
3832		    else
3833			clidrev = 0;
3834		    /*
3835		     * If the server doesn't support Posix lock semantics,
3836		     * only allow locks on the entire file, since it won't
3837		     * handle overlapping byte ranges.
3838		     * There might still be a problem when a lock
3839		     * upgrade/downgrade (read<->write) occurs, since the
3840		     * server "might" expect an unlock first?
3841		     */
3842		    if (dorpc && (lp->nfsl_open->nfso_posixlock ||
3843			(off == 0 && len == NFS64BITSSET))) {
3844			/*
3845			 * Since the lock records will go away, we must
3846			 * wait for grace and delay here.
3847			 */
3848			do {
3849			    error = nfsrpc_locku(nd, nmp, lp, off, len,
3850				NFSV4LOCKT_READ, cred, p, 0);
3851			    if ((nd->nd_repstat == NFSERR_GRACE ||
3852				 nd->nd_repstat == NFSERR_DELAY) &&
3853				error == 0)
3854				(void) nfs_catnap(PZERO, (int)nd->nd_repstat,
3855				    "nfs_advlock");
3856			} while ((nd->nd_repstat == NFSERR_GRACE ||
3857			    nd->nd_repstat == NFSERR_DELAY) && error == 0);
3858		    }
3859		    callcnt++;
3860		} while (error == 0 && nd->nd_repstat == 0);
3861		nfscl_releasealllocks(clp, vp, p, id, flags);
3862	    } else if (op == F_SETLK) {
3863		error = nfscl_getbytelock(vp, off, len, fl->l_type, cred, p,
3864		    NULL, 0, id, flags, NULL, NULL, &lp, &newone, &donelocally);
3865		if (error || donelocally) {
3866			return (error);
3867		}
3868		if (nmp->nm_clp != NULL)
3869			clidrev = nmp->nm_clp->nfsc_clientidrev;
3870		else
3871			clidrev = 0;
3872		nfhp = VTONFS(vp)->n_fhp;
3873		if (!lp->nfsl_open->nfso_posixlock &&
3874		    (off != 0 || len != NFS64BITSSET)) {
3875			error = EINVAL;
3876		} else {
3877			error = nfsrpc_lock(nd, nmp, vp, nfhp->nfh_fh,
3878			    nfhp->nfh_len, lp, newone, reclaim, off,
3879			    len, fl->l_type, cred, p, 0);
3880		}
3881		if (!error)
3882			error = nd->nd_repstat;
3883		nfscl_lockrelease(lp, error, newone);
3884	    } else {
3885		error = EINVAL;
3886	    }
3887	    if (!error)
3888	        error = nd->nd_repstat;
3889	    if (error == NFSERR_GRACE || error == NFSERR_STALESTATEID ||
3890		error == NFSERR_STALEDONTRECOVER ||
3891		error == NFSERR_STALECLIENTID || error == NFSERR_DELAY ||
3892		error == NFSERR_BADSESSION) {
3893		(void) nfs_catnap(PZERO, error, "nfs_advlock");
3894	    } else if ((error == NFSERR_EXPIRED || error == NFSERR_BADSTATEID)
3895		&& clidrev != 0) {
3896		expireret = nfscl_hasexpired(nmp->nm_clp, clidrev, p);
3897		retrycnt++;
3898	    }
3899	} while (error == NFSERR_GRACE ||
3900	    error == NFSERR_STALECLIENTID || error == NFSERR_DELAY ||
3901	    error == NFSERR_STALEDONTRECOVER || error == NFSERR_STALESTATEID ||
3902	    error == NFSERR_BADSESSION ||
3903	    ((error == NFSERR_EXPIRED || error == NFSERR_BADSTATEID) &&
3904	     expireret == 0 && clidrev != 0 && retrycnt < 4));
3905	if (error && retrycnt >= 4)
3906		error = EIO;
3907	return (error);
3908}
3909
3910/*
3911 * The lower level routine for the LockT case.
3912 */
3913int
3914nfsrpc_lockt(struct nfsrv_descript *nd, vnode_t vp,
3915    struct nfsclclient *clp, u_int64_t off, u_int64_t len, struct flock *fl,
3916    struct ucred *cred, NFSPROC_T *p, void *id, int flags)
3917{
3918	u_int32_t *tl;
3919	int error, type, size;
3920	uint8_t own[NFSV4CL_LOCKNAMELEN + NFSX_V4FHMAX];
3921	struct nfsnode *np;
3922	struct nfsmount *nmp;
3923	struct nfsclsession *tsep;
3924
3925	nmp = VFSTONFS(vp->v_mount);
3926	NFSCL_REQSTART(nd, NFSPROC_LOCKT, vp);
3927	NFSM_BUILD(tl, u_int32_t *, 7 * NFSX_UNSIGNED);
3928	if (fl->l_type == F_RDLCK)
3929		*tl++ = txdr_unsigned(NFSV4LOCKT_READ);
3930	else
3931		*tl++ = txdr_unsigned(NFSV4LOCKT_WRITE);
3932	txdr_hyper(off, tl);
3933	tl += 2;
3934	txdr_hyper(len, tl);
3935	tl += 2;
3936	tsep = nfsmnt_mdssession(nmp);
3937	*tl++ = tsep->nfsess_clientid.lval[0];
3938	*tl = tsep->nfsess_clientid.lval[1];
3939	nfscl_filllockowner(id, own, flags);
3940	np = VTONFS(vp);
3941	NFSBCOPY(np->n_fhp->nfh_fh, &own[NFSV4CL_LOCKNAMELEN],
3942	    np->n_fhp->nfh_len);
3943	(void)nfsm_strtom(nd, own, NFSV4CL_LOCKNAMELEN + np->n_fhp->nfh_len);
3944	error = nfscl_request(nd, vp, p, cred, NULL);
3945	if (error)
3946		return (error);
3947	if (nd->nd_repstat == 0) {
3948		fl->l_type = F_UNLCK;
3949	} else if (nd->nd_repstat == NFSERR_DENIED) {
3950		nd->nd_repstat = 0;
3951		fl->l_whence = SEEK_SET;
3952		NFSM_DISSECT(tl, u_int32_t *, 8 * NFSX_UNSIGNED);
3953		fl->l_start = fxdr_hyper(tl);
3954		tl += 2;
3955		len = fxdr_hyper(tl);
3956		tl += 2;
3957		if (len == NFS64BITSSET)
3958			fl->l_len = 0;
3959		else
3960			fl->l_len = len;
3961		type = fxdr_unsigned(int, *tl++);
3962		if (type == NFSV4LOCKT_WRITE)
3963			fl->l_type = F_WRLCK;
3964		else
3965			fl->l_type = F_RDLCK;
3966		/*
3967		 * XXX For now, I have no idea what to do with the
3968		 * conflicting lock_owner, so I'll just set the pid == 0
3969		 * and skip over the lock_owner.
3970		 */
3971		fl->l_pid = (pid_t)0;
3972		tl += 2;
3973		size = fxdr_unsigned(int, *tl);
3974		if (size < 0 || size > NFSV4_OPAQUELIMIT)
3975			error = EBADRPC;
3976		if (!error)
3977			error = nfsm_advance(nd, NFSM_RNDUP(size), -1);
3978	} else if (nd->nd_repstat == NFSERR_STALECLIENTID)
3979		nfscl_initiate_recovery(clp);
3980nfsmout:
3981	mbuf_freem(nd->nd_mrep);
3982	return (error);
3983}
3984
3985/*
3986 * Lower level function that performs the LockU RPC.
3987 */
3988static int
3989nfsrpc_locku(struct nfsrv_descript *nd, struct nfsmount *nmp,
3990    struct nfscllockowner *lp, u_int64_t off, u_int64_t len,
3991    u_int32_t type, struct ucred *cred, NFSPROC_T *p, int syscred)
3992{
3993	u_int32_t *tl;
3994	int error;
3995
3996	nfscl_reqstart(nd, NFSPROC_LOCKU, nmp, lp->nfsl_open->nfso_fh,
3997	    lp->nfsl_open->nfso_fhlen, NULL, NULL);
3998	NFSM_BUILD(tl, u_int32_t *, NFSX_STATEID + 6 * NFSX_UNSIGNED);
3999	*tl++ = txdr_unsigned(type);
4000	*tl = txdr_unsigned(lp->nfsl_seqid);
4001	if (nfstest_outofseq &&
4002	    (arc4random() % nfstest_outofseq) == 0)
4003		*tl = txdr_unsigned(lp->nfsl_seqid + 1);
4004	tl++;
4005	if (NFSHASNFSV4N(nmp))
4006		*tl++ = 0;
4007	else
4008		*tl++ = lp->nfsl_stateid.seqid;
4009	*tl++ = lp->nfsl_stateid.other[0];
4010	*tl++ = lp->nfsl_stateid.other[1];
4011	*tl++ = lp->nfsl_stateid.other[2];
4012	txdr_hyper(off, tl);
4013	tl += 2;
4014	txdr_hyper(len, tl);
4015	if (syscred)
4016		nd->nd_flag |= ND_USEGSSNAME;
4017	error = newnfs_request(nd, nmp, NULL, &nmp->nm_sockreq, NULL, p, cred,
4018	    NFS_PROG, NFS_VER4, NULL, 1, NULL, NULL);
4019	NFSCL_INCRSEQID(lp->nfsl_seqid, nd);
4020	if (error)
4021		return (error);
4022	if (nd->nd_repstat == 0) {
4023		NFSM_DISSECT(tl, u_int32_t *, NFSX_STATEID);
4024		lp->nfsl_stateid.seqid = *tl++;
4025		lp->nfsl_stateid.other[0] = *tl++;
4026		lp->nfsl_stateid.other[1] = *tl++;
4027		lp->nfsl_stateid.other[2] = *tl;
4028	} else if (nd->nd_repstat == NFSERR_STALESTATEID)
4029		nfscl_initiate_recovery(lp->nfsl_open->nfso_own->nfsow_clp);
4030nfsmout:
4031	mbuf_freem(nd->nd_mrep);
4032	return (error);
4033}
4034
4035/*
4036 * The actual Lock RPC.
4037 */
4038int
4039nfsrpc_lock(struct nfsrv_descript *nd, struct nfsmount *nmp, vnode_t vp,
4040    u_int8_t *nfhp, int fhlen, struct nfscllockowner *lp, int newone,
4041    int reclaim, u_int64_t off, u_int64_t len, short type, struct ucred *cred,
4042    NFSPROC_T *p, int syscred)
4043{
4044	u_int32_t *tl;
4045	int error, size;
4046	uint8_t own[NFSV4CL_LOCKNAMELEN + NFSX_V4FHMAX];
4047	struct nfsclsession *tsep;
4048
4049	nfscl_reqstart(nd, NFSPROC_LOCK, nmp, nfhp, fhlen, NULL, NULL);
4050	NFSM_BUILD(tl, u_int32_t *, 7 * NFSX_UNSIGNED);
4051	if (type == F_RDLCK)
4052		*tl++ = txdr_unsigned(NFSV4LOCKT_READ);
4053	else
4054		*tl++ = txdr_unsigned(NFSV4LOCKT_WRITE);
4055	*tl++ = txdr_unsigned(reclaim);
4056	txdr_hyper(off, tl);
4057	tl += 2;
4058	txdr_hyper(len, tl);
4059	tl += 2;
4060	if (newone) {
4061	    *tl = newnfs_true;
4062	    NFSM_BUILD(tl, u_int32_t *, NFSX_STATEID +
4063		2 * NFSX_UNSIGNED + NFSX_HYPER);
4064	    *tl++ = txdr_unsigned(lp->nfsl_open->nfso_own->nfsow_seqid);
4065	    if (NFSHASNFSV4N(nmp))
4066		*tl++ = 0;
4067	    else
4068		*tl++ = lp->nfsl_open->nfso_stateid.seqid;
4069	    *tl++ = lp->nfsl_open->nfso_stateid.other[0];
4070	    *tl++ = lp->nfsl_open->nfso_stateid.other[1];
4071	    *tl++ = lp->nfsl_open->nfso_stateid.other[2];
4072	    *tl++ = txdr_unsigned(lp->nfsl_seqid);
4073	    tsep = nfsmnt_mdssession(nmp);
4074	    *tl++ = tsep->nfsess_clientid.lval[0];
4075	    *tl = tsep->nfsess_clientid.lval[1];
4076	    NFSBCOPY(lp->nfsl_owner, own, NFSV4CL_LOCKNAMELEN);
4077	    NFSBCOPY(nfhp, &own[NFSV4CL_LOCKNAMELEN], fhlen);
4078	    (void)nfsm_strtom(nd, own, NFSV4CL_LOCKNAMELEN + fhlen);
4079	} else {
4080	    *tl = newnfs_false;
4081	    NFSM_BUILD(tl, u_int32_t *, NFSX_STATEID + NFSX_UNSIGNED);
4082	    if (NFSHASNFSV4N(nmp))
4083		*tl++ = 0;
4084	    else
4085		*tl++ = lp->nfsl_stateid.seqid;
4086	    *tl++ = lp->nfsl_stateid.other[0];
4087	    *tl++ = lp->nfsl_stateid.other[1];
4088	    *tl++ = lp->nfsl_stateid.other[2];
4089	    *tl = txdr_unsigned(lp->nfsl_seqid);
4090	    if (nfstest_outofseq &&
4091		(arc4random() % nfstest_outofseq) == 0)
4092		    *tl = txdr_unsigned(lp->nfsl_seqid + 1);
4093	}
4094	if (syscred)
4095		nd->nd_flag |= ND_USEGSSNAME;
4096	error = newnfs_request(nd, nmp, NULL, &nmp->nm_sockreq, vp, p, cred,
4097	    NFS_PROG, NFS_VER4, NULL, 1, NULL, NULL);
4098	if (error)
4099		return (error);
4100	if (newone)
4101	    NFSCL_INCRSEQID(lp->nfsl_open->nfso_own->nfsow_seqid, nd);
4102	NFSCL_INCRSEQID(lp->nfsl_seqid, nd);
4103	if (nd->nd_repstat == 0) {
4104		NFSM_DISSECT(tl, u_int32_t *, NFSX_STATEID);
4105		lp->nfsl_stateid.seqid = *tl++;
4106		lp->nfsl_stateid.other[0] = *tl++;
4107		lp->nfsl_stateid.other[1] = *tl++;
4108		lp->nfsl_stateid.other[2] = *tl;
4109	} else if (nd->nd_repstat == NFSERR_DENIED) {
4110		NFSM_DISSECT(tl, u_int32_t *, 8 * NFSX_UNSIGNED);
4111		size = fxdr_unsigned(int, *(tl + 7));
4112		if (size < 0 || size > NFSV4_OPAQUELIMIT)
4113			error = EBADRPC;
4114		if (!error)
4115			error = nfsm_advance(nd, NFSM_RNDUP(size), -1);
4116	} else if (nd->nd_repstat == NFSERR_STALESTATEID)
4117		nfscl_initiate_recovery(lp->nfsl_open->nfso_own->nfsow_clp);
4118nfsmout:
4119	mbuf_freem(nd->nd_mrep);
4120	return (error);
4121}
4122
4123/*
4124 * nfs statfs rpc
4125 * (always called with the vp for the mount point)
4126 */
4127int
4128nfsrpc_statfs(vnode_t vp, struct nfsstatfs *sbp, struct nfsfsinfo *fsp,
4129    struct ucred *cred, NFSPROC_T *p, struct nfsvattr *nap, int *attrflagp,
4130    void *stuff)
4131{
4132	u_int32_t *tl = NULL;
4133	struct nfsrv_descript nfsd, *nd = &nfsd;
4134	struct nfsmount *nmp;
4135	nfsattrbit_t attrbits;
4136	int error;
4137
4138	*attrflagp = 0;
4139	nmp = VFSTONFS(vnode_mount(vp));
4140	if (NFSHASNFSV4(nmp)) {
4141		/*
4142		 * For V4, you actually do a getattr.
4143		 */
4144		NFSCL_REQSTART(nd, NFSPROC_GETATTR, vp);
4145		NFSSTATFS_GETATTRBIT(&attrbits);
4146		(void) nfsrv_putattrbit(nd, &attrbits);
4147		nd->nd_flag |= ND_USEGSSNAME;
4148		error = nfscl_request(nd, vp, p, cred, stuff);
4149		if (error)
4150			return (error);
4151		if (nd->nd_repstat == 0) {
4152			error = nfsv4_loadattr(nd, NULL, nap, NULL, NULL, 0,
4153			    NULL, NULL, sbp, fsp, NULL, 0, NULL, NULL, NULL, p,
4154			    cred);
4155			if (!error) {
4156				nmp->nm_fsid[0] = nap->na_filesid[0];
4157				nmp->nm_fsid[1] = nap->na_filesid[1];
4158				NFSSETHASSETFSID(nmp);
4159				*attrflagp = 1;
4160			}
4161		} else {
4162			error = nd->nd_repstat;
4163		}
4164		if (error)
4165			goto nfsmout;
4166	} else {
4167		NFSCL_REQSTART(nd, NFSPROC_FSSTAT, vp);
4168		error = nfscl_request(nd, vp, p, cred, stuff);
4169		if (error)
4170			return (error);
4171		if (nd->nd_flag & ND_NFSV3) {
4172			error = nfscl_postop_attr(nd, nap, attrflagp, stuff);
4173			if (error)
4174				goto nfsmout;
4175		}
4176		if (nd->nd_repstat) {
4177			error = nd->nd_repstat;
4178			goto nfsmout;
4179		}
4180		NFSM_DISSECT(tl, u_int32_t *,
4181		    NFSX_STATFS(nd->nd_flag & ND_NFSV3));
4182	}
4183	if (NFSHASNFSV3(nmp)) {
4184		sbp->sf_tbytes = fxdr_hyper(tl); tl += 2;
4185		sbp->sf_fbytes = fxdr_hyper(tl); tl += 2;
4186		sbp->sf_abytes = fxdr_hyper(tl); tl += 2;
4187		sbp->sf_tfiles = fxdr_hyper(tl); tl += 2;
4188		sbp->sf_ffiles = fxdr_hyper(tl); tl += 2;
4189		sbp->sf_afiles = fxdr_hyper(tl); tl += 2;
4190		sbp->sf_invarsec = fxdr_unsigned(u_int32_t, *tl);
4191	} else if (NFSHASNFSV4(nmp) == 0) {
4192		sbp->sf_tsize = fxdr_unsigned(u_int32_t, *tl++);
4193		sbp->sf_bsize = fxdr_unsigned(u_int32_t, *tl++);
4194		sbp->sf_blocks = fxdr_unsigned(u_int32_t, *tl++);
4195		sbp->sf_bfree = fxdr_unsigned(u_int32_t, *tl++);
4196		sbp->sf_bavail = fxdr_unsigned(u_int32_t, *tl);
4197	}
4198nfsmout:
4199	mbuf_freem(nd->nd_mrep);
4200	return (error);
4201}
4202
4203/*
4204 * nfs pathconf rpc
4205 */
4206int
4207nfsrpc_pathconf(vnode_t vp, struct nfsv3_pathconf *pc,
4208    struct ucred *cred, NFSPROC_T *p, struct nfsvattr *nap, int *attrflagp,
4209    void *stuff)
4210{
4211	struct nfsrv_descript nfsd, *nd = &nfsd;
4212	struct nfsmount *nmp;
4213	u_int32_t *tl;
4214	nfsattrbit_t attrbits;
4215	int error;
4216
4217	*attrflagp = 0;
4218	nmp = VFSTONFS(vnode_mount(vp));
4219	if (NFSHASNFSV4(nmp)) {
4220		/*
4221		 * For V4, you actually do a getattr.
4222		 */
4223		NFSCL_REQSTART(nd, NFSPROC_GETATTR, vp);
4224		NFSPATHCONF_GETATTRBIT(&attrbits);
4225		(void) nfsrv_putattrbit(nd, &attrbits);
4226		nd->nd_flag |= ND_USEGSSNAME;
4227		error = nfscl_request(nd, vp, p, cred, stuff);
4228		if (error)
4229			return (error);
4230		if (nd->nd_repstat == 0) {
4231			error = nfsv4_loadattr(nd, NULL, nap, NULL, NULL, 0,
4232			    pc, NULL, NULL, NULL, NULL, 0, NULL, NULL, NULL, p,
4233			    cred);
4234			if (!error)
4235				*attrflagp = 1;
4236		} else {
4237			error = nd->nd_repstat;
4238		}
4239	} else {
4240		NFSCL_REQSTART(nd, NFSPROC_PATHCONF, vp);
4241		error = nfscl_request(nd, vp, p, cred, stuff);
4242		if (error)
4243			return (error);
4244		error = nfscl_postop_attr(nd, nap, attrflagp, stuff);
4245		if (nd->nd_repstat && !error)
4246			error = nd->nd_repstat;
4247		if (!error) {
4248			NFSM_DISSECT(tl, u_int32_t *, NFSX_V3PATHCONF);
4249			pc->pc_linkmax = fxdr_unsigned(u_int32_t, *tl++);
4250			pc->pc_namemax = fxdr_unsigned(u_int32_t, *tl++);
4251			pc->pc_notrunc = fxdr_unsigned(u_int32_t, *tl++);
4252			pc->pc_chownrestricted =
4253			    fxdr_unsigned(u_int32_t, *tl++);
4254			pc->pc_caseinsensitive =
4255			    fxdr_unsigned(u_int32_t, *tl++);
4256			pc->pc_casepreserving = fxdr_unsigned(u_int32_t, *tl);
4257		}
4258	}
4259nfsmout:
4260	mbuf_freem(nd->nd_mrep);
4261	return (error);
4262}
4263
4264/*
4265 * nfs version 3 fsinfo rpc call
4266 */
4267int
4268nfsrpc_fsinfo(vnode_t vp, struct nfsfsinfo *fsp, struct ucred *cred,
4269    NFSPROC_T *p, struct nfsvattr *nap, int *attrflagp, void *stuff)
4270{
4271	u_int32_t *tl;
4272	struct nfsrv_descript nfsd, *nd = &nfsd;
4273	int error;
4274
4275	*attrflagp = 0;
4276	NFSCL_REQSTART(nd, NFSPROC_FSINFO, vp);
4277	error = nfscl_request(nd, vp, p, cred, stuff);
4278	if (error)
4279		return (error);
4280	error = nfscl_postop_attr(nd, nap, attrflagp, stuff);
4281	if (nd->nd_repstat && !error)
4282		error = nd->nd_repstat;
4283	if (!error) {
4284		NFSM_DISSECT(tl, u_int32_t *, NFSX_V3FSINFO);
4285		fsp->fs_rtmax = fxdr_unsigned(u_int32_t, *tl++);
4286		fsp->fs_rtpref = fxdr_unsigned(u_int32_t, *tl++);
4287		fsp->fs_rtmult = fxdr_unsigned(u_int32_t, *tl++);
4288		fsp->fs_wtmax = fxdr_unsigned(u_int32_t, *tl++);
4289		fsp->fs_wtpref = fxdr_unsigned(u_int32_t, *tl++);
4290		fsp->fs_wtmult = fxdr_unsigned(u_int32_t, *tl++);
4291		fsp->fs_dtpref = fxdr_unsigned(u_int32_t, *tl++);
4292		fsp->fs_maxfilesize = fxdr_hyper(tl);
4293		tl += 2;
4294		fxdr_nfsv3time(tl, &fsp->fs_timedelta);
4295		tl += 2;
4296		fsp->fs_properties = fxdr_unsigned(u_int32_t, *tl);
4297	}
4298nfsmout:
4299	mbuf_freem(nd->nd_mrep);
4300	return (error);
4301}
4302
4303/*
4304 * This function performs the Renew RPC.
4305 */
4306int
4307nfsrpc_renew(struct nfsclclient *clp, struct nfsclds *dsp, struct ucred *cred,
4308    NFSPROC_T *p)
4309{
4310	u_int32_t *tl;
4311	struct nfsrv_descript nfsd;
4312	struct nfsrv_descript *nd = &nfsd;
4313	struct nfsmount *nmp;
4314	int error;
4315	struct nfssockreq *nrp;
4316	struct nfsclsession *tsep;
4317
4318	nmp = clp->nfsc_nmp;
4319	if (nmp == NULL)
4320		return (0);
4321	if (dsp == NULL)
4322		nfscl_reqstart(nd, NFSPROC_RENEW, nmp, NULL, 0, NULL, NULL);
4323	else
4324		nfscl_reqstart(nd, NFSPROC_RENEW, nmp, NULL, 0, NULL,
4325		    &dsp->nfsclds_sess);
4326	if (!NFSHASNFSV4N(nmp)) {
4327		/* NFSv4.1 just uses a Sequence Op and not a Renew. */
4328		NFSM_BUILD(tl, u_int32_t *, 2 * NFSX_UNSIGNED);
4329		tsep = nfsmnt_mdssession(nmp);
4330		*tl++ = tsep->nfsess_clientid.lval[0];
4331		*tl = tsep->nfsess_clientid.lval[1];
4332	}
4333	nrp = NULL;
4334	if (dsp != NULL)
4335		nrp = dsp->nfsclds_sockp;
4336	if (nrp == NULL)
4337		/* If NULL, use the MDS socket. */
4338		nrp = &nmp->nm_sockreq;
4339	nd->nd_flag |= ND_USEGSSNAME;
4340	if (dsp == NULL)
4341		error = newnfs_request(nd, nmp, NULL, nrp, NULL, p, cred,
4342		    NFS_PROG, NFS_VER4, NULL, 1, NULL, NULL);
4343	else
4344		error = newnfs_request(nd, nmp, NULL, nrp, NULL, p, cred,
4345		    NFS_PROG, NFS_VER4, NULL, 1, NULL, &dsp->nfsclds_sess);
4346	if (error)
4347		return (error);
4348	error = nd->nd_repstat;
4349	mbuf_freem(nd->nd_mrep);
4350	return (error);
4351}
4352
4353/*
4354 * This function performs the Releaselockowner RPC.
4355 */
4356int
4357nfsrpc_rellockown(struct nfsmount *nmp, struct nfscllockowner *lp,
4358    uint8_t *fh, int fhlen, struct ucred *cred, NFSPROC_T *p)
4359{
4360	struct nfsrv_descript nfsd, *nd = &nfsd;
4361	u_int32_t *tl;
4362	int error;
4363	uint8_t own[NFSV4CL_LOCKNAMELEN + NFSX_V4FHMAX];
4364	struct nfsclsession *tsep;
4365
4366	if (NFSHASNFSV4N(nmp)) {
4367		/* For NFSv4.1, do a FreeStateID. */
4368		nfscl_reqstart(nd, NFSPROC_FREESTATEID, nmp, NULL, 0, NULL,
4369		    NULL);
4370		nfsm_stateidtom(nd, &lp->nfsl_stateid, NFSSTATEID_PUTSTATEID);
4371	} else {
4372		nfscl_reqstart(nd, NFSPROC_RELEASELCKOWN, nmp, NULL, 0, NULL,
4373		    NULL);
4374		NFSM_BUILD(tl, u_int32_t *, 2 * NFSX_UNSIGNED);
4375		tsep = nfsmnt_mdssession(nmp);
4376		*tl++ = tsep->nfsess_clientid.lval[0];
4377		*tl = tsep->nfsess_clientid.lval[1];
4378		NFSBCOPY(lp->nfsl_owner, own, NFSV4CL_LOCKNAMELEN);
4379		NFSBCOPY(fh, &own[NFSV4CL_LOCKNAMELEN], fhlen);
4380		(void)nfsm_strtom(nd, own, NFSV4CL_LOCKNAMELEN + fhlen);
4381	}
4382	nd->nd_flag |= ND_USEGSSNAME;
4383	error = newnfs_request(nd, nmp, NULL, &nmp->nm_sockreq, NULL, p, cred,
4384	    NFS_PROG, NFS_VER4, NULL, 1, NULL, NULL);
4385	if (error)
4386		return (error);
4387	error = nd->nd_repstat;
4388	mbuf_freem(nd->nd_mrep);
4389	return (error);
4390}
4391
4392/*
4393 * This function performs the Compound to get the mount pt FH.
4394 */
4395int
4396nfsrpc_getdirpath(struct nfsmount *nmp, u_char *dirpath, struct ucred *cred,
4397    NFSPROC_T *p)
4398{
4399	u_int32_t *tl;
4400	struct nfsrv_descript nfsd;
4401	struct nfsrv_descript *nd = &nfsd;
4402	u_char *cp, *cp2;
4403	int error, cnt, len, setnil;
4404	u_int32_t *opcntp;
4405
4406	nfscl_reqstart(nd, NFSPROC_PUTROOTFH, nmp, NULL, 0, &opcntp, NULL);
4407	cp = dirpath;
4408	cnt = 0;
4409	do {
4410		setnil = 0;
4411		while (*cp == '/')
4412			cp++;
4413		cp2 = cp;
4414		while (*cp2 != '\0' && *cp2 != '/')
4415			cp2++;
4416		if (*cp2 == '/') {
4417			setnil = 1;
4418			*cp2 = '\0';
4419		}
4420		if (cp2 != cp) {
4421			NFSM_BUILD(tl, u_int32_t *, NFSX_UNSIGNED);
4422			*tl = txdr_unsigned(NFSV4OP_LOOKUP);
4423			nfsm_strtom(nd, cp, strlen(cp));
4424			cnt++;
4425		}
4426		if (setnil)
4427			*cp2++ = '/';
4428		cp = cp2;
4429	} while (*cp != '\0');
4430	if (NFSHASNFSV4N(nmp))
4431		/* Has a Sequence Op done by nfscl_reqstart(). */
4432		*opcntp = txdr_unsigned(3 + cnt);
4433	else
4434		*opcntp = txdr_unsigned(2 + cnt);
4435	NFSM_BUILD(tl, u_int32_t *, NFSX_UNSIGNED);
4436	*tl = txdr_unsigned(NFSV4OP_GETFH);
4437	nd->nd_flag |= ND_USEGSSNAME;
4438	error = newnfs_request(nd, nmp, NULL, &nmp->nm_sockreq, NULL, p, cred,
4439		NFS_PROG, NFS_VER4, NULL, 1, NULL, NULL);
4440	if (error)
4441		return (error);
4442	if (nd->nd_repstat == 0) {
4443		NFSM_DISSECT(tl, u_int32_t *, (3 + 2 * cnt) * NFSX_UNSIGNED);
4444		tl += (2 + 2 * cnt);
4445		if ((len = fxdr_unsigned(int, *tl)) <= 0 ||
4446			len > NFSX_FHMAX) {
4447			nd->nd_repstat = NFSERR_BADXDR;
4448		} else {
4449			nd->nd_repstat = nfsrv_mtostr(nd, nmp->nm_fh, len);
4450			if (nd->nd_repstat == 0)
4451				nmp->nm_fhsize = len;
4452		}
4453	}
4454	error = nd->nd_repstat;
4455nfsmout:
4456	mbuf_freem(nd->nd_mrep);
4457	return (error);
4458}
4459
4460/*
4461 * This function performs the Delegreturn RPC.
4462 */
4463int
4464nfsrpc_delegreturn(struct nfscldeleg *dp, struct ucred *cred,
4465    struct nfsmount *nmp, NFSPROC_T *p, int syscred)
4466{
4467	u_int32_t *tl;
4468	struct nfsrv_descript nfsd;
4469	struct nfsrv_descript *nd = &nfsd;
4470	int error;
4471
4472	nfscl_reqstart(nd, NFSPROC_DELEGRETURN, nmp, dp->nfsdl_fh,
4473	    dp->nfsdl_fhlen, NULL, NULL);
4474	NFSM_BUILD(tl, u_int32_t *, NFSX_STATEID);
4475	if (NFSHASNFSV4N(nmp))
4476		*tl++ = 0;
4477	else
4478		*tl++ = dp->nfsdl_stateid.seqid;
4479	*tl++ = dp->nfsdl_stateid.other[0];
4480	*tl++ = dp->nfsdl_stateid.other[1];
4481	*tl = dp->nfsdl_stateid.other[2];
4482	if (syscred)
4483		nd->nd_flag |= ND_USEGSSNAME;
4484	error = newnfs_request(nd, nmp, NULL, &nmp->nm_sockreq, NULL, p, cred,
4485	    NFS_PROG, NFS_VER4, NULL, 1, NULL, NULL);
4486	if (error)
4487		return (error);
4488	error = nd->nd_repstat;
4489	mbuf_freem(nd->nd_mrep);
4490	return (error);
4491}
4492
4493/*
4494 * nfs getacl call.
4495 */
4496int
4497nfsrpc_getacl(vnode_t vp, struct ucred *cred, NFSPROC_T *p,
4498    struct acl *aclp, void *stuff)
4499{
4500	struct nfsrv_descript nfsd, *nd = &nfsd;
4501	int error;
4502	nfsattrbit_t attrbits;
4503	struct nfsmount *nmp = VFSTONFS(vnode_mount(vp));
4504
4505	if (nfsrv_useacl == 0 || !NFSHASNFSV4(nmp))
4506		return (EOPNOTSUPP);
4507	NFSCL_REQSTART(nd, NFSPROC_GETACL, vp);
4508	NFSZERO_ATTRBIT(&attrbits);
4509	NFSSETBIT_ATTRBIT(&attrbits, NFSATTRBIT_ACL);
4510	(void) nfsrv_putattrbit(nd, &attrbits);
4511	error = nfscl_request(nd, vp, p, cred, stuff);
4512	if (error)
4513		return (error);
4514	if (!nd->nd_repstat)
4515		error = nfsv4_loadattr(nd, vp, NULL, NULL, NULL, 0, NULL,
4516		    NULL, NULL, NULL, aclp, 0, NULL, NULL, NULL, p, cred);
4517	else
4518		error = nd->nd_repstat;
4519	mbuf_freem(nd->nd_mrep);
4520	return (error);
4521}
4522
4523/*
4524 * nfs setacl call.
4525 */
4526int
4527nfsrpc_setacl(vnode_t vp, struct ucred *cred, NFSPROC_T *p,
4528    struct acl *aclp, void *stuff)
4529{
4530	int error;
4531	struct nfsmount *nmp = VFSTONFS(vnode_mount(vp));
4532
4533	if (nfsrv_useacl == 0 || !NFSHASNFSV4(nmp))
4534		return (EOPNOTSUPP);
4535	error = nfsrpc_setattr(vp, NULL, aclp, cred, p, NULL, NULL, stuff);
4536	return (error);
4537}
4538
4539/*
4540 * nfs setacl call.
4541 */
4542static int
4543nfsrpc_setaclrpc(vnode_t vp, struct ucred *cred, NFSPROC_T *p,
4544    struct acl *aclp, nfsv4stateid_t *stateidp, void *stuff)
4545{
4546	struct nfsrv_descript nfsd, *nd = &nfsd;
4547	int error;
4548	nfsattrbit_t attrbits;
4549	struct nfsmount *nmp = VFSTONFS(vnode_mount(vp));
4550
4551	if (!NFSHASNFSV4(nmp))
4552		return (EOPNOTSUPP);
4553	NFSCL_REQSTART(nd, NFSPROC_SETACL, vp);
4554	nfsm_stateidtom(nd, stateidp, NFSSTATEID_PUTSTATEID);
4555	NFSZERO_ATTRBIT(&attrbits);
4556	NFSSETBIT_ATTRBIT(&attrbits, NFSATTRBIT_ACL);
4557	(void) nfsv4_fillattr(nd, vnode_mount(vp), vp, aclp, NULL, NULL, 0,
4558	    &attrbits, NULL, NULL, 0, 0, 0, 0, (uint64_t)0);
4559	error = nfscl_request(nd, vp, p, cred, stuff);
4560	if (error)
4561		return (error);
4562	/* Don't care about the pre/postop attributes */
4563	mbuf_freem(nd->nd_mrep);
4564	return (nd->nd_repstat);
4565}
4566
4567/*
4568 * Do the NFSv4.1 Exchange ID.
4569 */
4570int
4571nfsrpc_exchangeid(struct nfsmount *nmp, struct nfsclclient *clp,
4572    struct nfssockreq *nrp, uint32_t exchflags, struct nfsclds **dspp,
4573    struct ucred *cred, NFSPROC_T *p)
4574{
4575	uint32_t *tl, v41flags;
4576	struct nfsrv_descript nfsd;
4577	struct nfsrv_descript *nd = &nfsd;
4578	struct nfsclds *dsp;
4579	struct timespec verstime;
4580	int error, len;
4581
4582	*dspp = NULL;
4583	nfscl_reqstart(nd, NFSPROC_EXCHANGEID, nmp, NULL, 0, NULL, NULL);
4584	NFSM_BUILD(tl, uint32_t *, 2 * NFSX_UNSIGNED);
4585	*tl++ = txdr_unsigned(nfsboottime.tv_sec);	/* Client owner */
4586	*tl = txdr_unsigned(clp->nfsc_rev);
4587	(void) nfsm_strtom(nd, clp->nfsc_id, clp->nfsc_idlen);
4588
4589	NFSM_BUILD(tl, uint32_t *, 3 * NFSX_UNSIGNED);
4590	*tl++ = txdr_unsigned(exchflags);
4591	*tl++ = txdr_unsigned(NFSV4EXCH_SP4NONE);
4592
4593	/* Set the implementation id4 */
4594	*tl = txdr_unsigned(1);
4595	(void) nfsm_strtom(nd, "freebsd.org", strlen("freebsd.org"));
4596	(void) nfsm_strtom(nd, version, strlen(version));
4597	NFSM_BUILD(tl, uint32_t *, NFSX_V4TIME);
4598	verstime.tv_sec = 1293840000;		/* Jan 1, 2011 */
4599	verstime.tv_nsec = 0;
4600	txdr_nfsv4time(&verstime, tl);
4601	nd->nd_flag |= ND_USEGSSNAME;
4602	error = newnfs_request(nd, nmp, NULL, nrp, NULL, p, cred,
4603	    NFS_PROG, NFS_VER4, NULL, 1, NULL, NULL);
4604	NFSCL_DEBUG(1, "exchangeid err=%d reps=%d\n", error,
4605	    (int)nd->nd_repstat);
4606	if (error != 0)
4607		return (error);
4608	if (nd->nd_repstat == 0) {
4609		NFSM_DISSECT(tl, uint32_t *, 6 * NFSX_UNSIGNED + NFSX_HYPER);
4610		len = fxdr_unsigned(int, *(tl + 7));
4611		if (len < 0 || len > NFSV4_OPAQUELIMIT) {
4612			error = NFSERR_BADXDR;
4613			goto nfsmout;
4614		}
4615		dsp = malloc(sizeof(struct nfsclds) + len + 1, M_NFSCLDS,
4616		    M_WAITOK | M_ZERO);
4617		dsp->nfsclds_expire = NFSD_MONOSEC + clp->nfsc_renew;
4618		dsp->nfsclds_servownlen = len;
4619		dsp->nfsclds_sess.nfsess_clientid.lval[0] = *tl++;
4620		dsp->nfsclds_sess.nfsess_clientid.lval[1] = *tl++;
4621		dsp->nfsclds_sess.nfsess_sequenceid =
4622		    fxdr_unsigned(uint32_t, *tl++);
4623		v41flags = fxdr_unsigned(uint32_t, *tl);
4624		if ((v41flags & NFSV4EXCH_USEPNFSMDS) != 0 &&
4625		    NFSHASPNFSOPT(nmp)) {
4626			NFSCL_DEBUG(1, "set PNFS\n");
4627			NFSLOCKMNT(nmp);
4628			nmp->nm_state |= NFSSTA_PNFS;
4629			NFSUNLOCKMNT(nmp);
4630			dsp->nfsclds_flags |= NFSCLDS_MDS;
4631		}
4632		if ((v41flags & NFSV4EXCH_USEPNFSDS) != 0)
4633			dsp->nfsclds_flags |= NFSCLDS_DS;
4634		if (len > 0)
4635			nd->nd_repstat = nfsrv_mtostr(nd,
4636			    dsp->nfsclds_serverown, len);
4637		if (nd->nd_repstat == 0) {
4638			mtx_init(&dsp->nfsclds_mtx, "nfsds", NULL, MTX_DEF);
4639			mtx_init(&dsp->nfsclds_sess.nfsess_mtx, "nfssession",
4640			    NULL, MTX_DEF);
4641			nfscl_initsessionslots(&dsp->nfsclds_sess);
4642			*dspp = dsp;
4643		} else
4644			free(dsp, M_NFSCLDS);
4645	}
4646	error = nd->nd_repstat;
4647nfsmout:
4648	mbuf_freem(nd->nd_mrep);
4649	return (error);
4650}
4651
4652/*
4653 * Do the NFSv4.1 Create Session.
4654 */
4655int
4656nfsrpc_createsession(struct nfsmount *nmp, struct nfsclsession *sep,
4657    struct nfssockreq *nrp, uint32_t sequenceid, int mds, struct ucred *cred,
4658    NFSPROC_T *p)
4659{
4660	uint32_t crflags, maxval, *tl;
4661	struct nfsrv_descript nfsd;
4662	struct nfsrv_descript *nd = &nfsd;
4663	int error, irdcnt;
4664
4665	/* Make sure nm_rsize, nm_wsize is set. */
4666	if (nmp->nm_rsize > NFS_MAXBSIZE || nmp->nm_rsize == 0)
4667		nmp->nm_rsize = NFS_MAXBSIZE;
4668	if (nmp->nm_wsize > NFS_MAXBSIZE || nmp->nm_wsize == 0)
4669		nmp->nm_wsize = NFS_MAXBSIZE;
4670	nfscl_reqstart(nd, NFSPROC_CREATESESSION, nmp, NULL, 0, NULL, NULL);
4671	NFSM_BUILD(tl, uint32_t *, 4 * NFSX_UNSIGNED);
4672	*tl++ = sep->nfsess_clientid.lval[0];
4673	*tl++ = sep->nfsess_clientid.lval[1];
4674	*tl++ = txdr_unsigned(sequenceid);
4675	crflags = (NFSMNT_RDONLY(nmp->nm_mountp) ? 0 : NFSV4CRSESS_PERSIST);
4676	if (nfscl_enablecallb != 0 && nfs_numnfscbd > 0 && mds != 0)
4677		crflags |= NFSV4CRSESS_CONNBACKCHAN;
4678	*tl = txdr_unsigned(crflags);
4679
4680	/* Fill in fore channel attributes. */
4681	NFSM_BUILD(tl, uint32_t *, 7 * NFSX_UNSIGNED);
4682	*tl++ = 0;				/* Header pad size */
4683	*tl++ = txdr_unsigned(nmp->nm_wsize + NFS_MAXXDR);/* Max request size */
4684	*tl++ = txdr_unsigned(nmp->nm_rsize + NFS_MAXXDR);/* Max reply size */
4685	*tl++ = txdr_unsigned(4096);		/* Max response size cached */
4686	*tl++ = txdr_unsigned(20);		/* Max operations */
4687	*tl++ = txdr_unsigned(64);		/* Max slots */
4688	*tl = 0;				/* No rdma ird */
4689
4690	/* Fill in back channel attributes. */
4691	NFSM_BUILD(tl, uint32_t *, 7 * NFSX_UNSIGNED);
4692	*tl++ = 0;				/* Header pad size */
4693	*tl++ = txdr_unsigned(10000);		/* Max request size */
4694	*tl++ = txdr_unsigned(10000);		/* Max response size */
4695	*tl++ = txdr_unsigned(4096);		/* Max response size cached */
4696	*tl++ = txdr_unsigned(4);		/* Max operations */
4697	*tl++ = txdr_unsigned(NFSV4_CBSLOTS);	/* Max slots */
4698	*tl = 0;				/* No rdma ird */
4699
4700	NFSM_BUILD(tl, uint32_t *, 8 * NFSX_UNSIGNED);
4701	*tl++ = txdr_unsigned(NFS_CALLBCKPROG);	/* Call back prog # */
4702
4703	/* Allow AUTH_SYS callbacks as uid, gid == 0. */
4704	*tl++ = txdr_unsigned(1);		/* Auth_sys only */
4705	*tl++ = txdr_unsigned(AUTH_SYS);	/* AUTH_SYS type */
4706	*tl++ = txdr_unsigned(nfsboottime.tv_sec); /* time stamp */
4707	*tl++ = 0;				/* Null machine name */
4708	*tl++ = 0;				/* Uid == 0 */
4709	*tl++ = 0;				/* Gid == 0 */
4710	*tl = 0;				/* No additional gids */
4711	nd->nd_flag |= ND_USEGSSNAME;
4712	error = newnfs_request(nd, nmp, NULL, nrp, NULL, p, cred, NFS_PROG,
4713	    NFS_VER4, NULL, 1, NULL, NULL);
4714	if (error != 0)
4715		return (error);
4716	if (nd->nd_repstat == 0) {
4717		NFSM_DISSECT(tl, uint32_t *, NFSX_V4SESSIONID +
4718		    2 * NFSX_UNSIGNED);
4719		bcopy(tl, sep->nfsess_sessionid, NFSX_V4SESSIONID);
4720		tl += NFSX_V4SESSIONID / NFSX_UNSIGNED;
4721		sep->nfsess_sequenceid = fxdr_unsigned(uint32_t, *tl++);
4722		crflags = fxdr_unsigned(uint32_t, *tl);
4723		if ((crflags & NFSV4CRSESS_PERSIST) != 0 && mds != 0) {
4724			NFSLOCKMNT(nmp);
4725			nmp->nm_state |= NFSSTA_SESSPERSIST;
4726			NFSUNLOCKMNT(nmp);
4727		}
4728
4729		/* Get the fore channel slot count. */
4730		NFSM_DISSECT(tl, uint32_t *, 7 * NFSX_UNSIGNED);
4731		tl++;			/* Skip the header pad size. */
4732
4733		/* Make sure nm_wsize is small enough. */
4734		maxval = fxdr_unsigned(uint32_t, *tl++);
4735		while (maxval < nmp->nm_wsize + NFS_MAXXDR) {
4736			if (nmp->nm_wsize > 8096)
4737				nmp->nm_wsize /= 2;
4738			else
4739				break;
4740		}
4741
4742		/* Make sure nm_rsize is small enough. */
4743		maxval = fxdr_unsigned(uint32_t, *tl++);
4744		while (maxval < nmp->nm_rsize + NFS_MAXXDR) {
4745			if (nmp->nm_rsize > 8096)
4746				nmp->nm_rsize /= 2;
4747			else
4748				break;
4749		}
4750
4751		sep->nfsess_maxcache = fxdr_unsigned(int, *tl++);
4752		tl++;
4753		sep->nfsess_foreslots = fxdr_unsigned(uint16_t, *tl++);
4754		NFSCL_DEBUG(4, "fore slots=%d\n", (int)sep->nfsess_foreslots);
4755		irdcnt = fxdr_unsigned(int, *tl);
4756		if (irdcnt > 0)
4757			NFSM_DISSECT(tl, uint32_t *, irdcnt * NFSX_UNSIGNED);
4758
4759		/* and the back channel slot count. */
4760		NFSM_DISSECT(tl, uint32_t *, 7 * NFSX_UNSIGNED);
4761		tl += 5;
4762		sep->nfsess_backslots = fxdr_unsigned(uint16_t, *tl);
4763		NFSCL_DEBUG(4, "back slots=%d\n", (int)sep->nfsess_backslots);
4764	}
4765	error = nd->nd_repstat;
4766nfsmout:
4767	mbuf_freem(nd->nd_mrep);
4768	return (error);
4769}
4770
4771/*
4772 * Do the NFSv4.1 Destroy Session.
4773 */
4774int
4775nfsrpc_destroysession(struct nfsmount *nmp, struct nfsclclient *clp,
4776    struct ucred *cred, NFSPROC_T *p)
4777{
4778	uint32_t *tl;
4779	struct nfsrv_descript nfsd;
4780	struct nfsrv_descript *nd = &nfsd;
4781	int error;
4782	struct nfsclsession *tsep;
4783
4784	nfscl_reqstart(nd, NFSPROC_DESTROYSESSION, nmp, NULL, 0, NULL, NULL);
4785	NFSM_BUILD(tl, uint32_t *, NFSX_V4SESSIONID);
4786	tsep = nfsmnt_mdssession(nmp);
4787	bcopy(tsep->nfsess_sessionid, tl, NFSX_V4SESSIONID);
4788	nd->nd_flag |= ND_USEGSSNAME;
4789	error = newnfs_request(nd, nmp, NULL, &nmp->nm_sockreq, NULL, p, cred,
4790	    NFS_PROG, NFS_VER4, NULL, 1, NULL, NULL);
4791	if (error != 0)
4792		return (error);
4793	error = nd->nd_repstat;
4794	mbuf_freem(nd->nd_mrep);
4795	return (error);
4796}
4797
4798/*
4799 * Do the NFSv4.1 Destroy Client.
4800 */
4801int
4802nfsrpc_destroyclient(struct nfsmount *nmp, struct nfsclclient *clp,
4803    struct ucred *cred, NFSPROC_T *p)
4804{
4805	uint32_t *tl;
4806	struct nfsrv_descript nfsd;
4807	struct nfsrv_descript *nd = &nfsd;
4808	int error;
4809	struct nfsclsession *tsep;
4810
4811	nfscl_reqstart(nd, NFSPROC_DESTROYCLIENT, nmp, NULL, 0, NULL, NULL);
4812	NFSM_BUILD(tl, uint32_t *, 2 * NFSX_UNSIGNED);
4813	tsep = nfsmnt_mdssession(nmp);
4814	*tl++ = tsep->nfsess_clientid.lval[0];
4815	*tl = tsep->nfsess_clientid.lval[1];
4816	nd->nd_flag |= ND_USEGSSNAME;
4817	error = newnfs_request(nd, nmp, NULL, &nmp->nm_sockreq, NULL, p, cred,
4818	    NFS_PROG, NFS_VER4, NULL, 1, NULL, NULL);
4819	if (error != 0)
4820		return (error);
4821	error = nd->nd_repstat;
4822	mbuf_freem(nd->nd_mrep);
4823	return (error);
4824}
4825
4826/*
4827 * Do the NFSv4.1 LayoutGet.
4828 */
4829int
4830nfsrpc_layoutget(struct nfsmount *nmp, uint8_t *fhp, int fhlen, int iomode,
4831    uint64_t offset, uint64_t len, uint64_t minlen, int layoutlen,
4832    nfsv4stateid_t *stateidp, int *retonclosep, struct nfsclflayouthead *flhp,
4833    struct ucred *cred, NFSPROC_T *p, void *stuff)
4834{
4835	struct nfsrv_descript nfsd, *nd = &nfsd;
4836	int error;
4837
4838	nfscl_reqstart(nd, NFSPROC_LAYOUTGET, nmp, fhp, fhlen, NULL, NULL);
4839	nfsrv_setuplayoutget(nd, iomode, offset, len, minlen, stateidp,
4840	    layoutlen, 0);
4841	nd->nd_flag |= ND_USEGSSNAME;
4842	error = newnfs_request(nd, nmp, NULL, &nmp->nm_sockreq, NULL, p, cred,
4843	    NFS_PROG, NFS_VER4, NULL, 1, NULL, NULL);
4844	NFSCL_DEBUG(4, "layget err=%d st=%d\n", error, nd->nd_repstat);
4845	if (error != 0)
4846		return (error);
4847	if (nd->nd_repstat == 0)
4848		error = nfsrv_parselayoutget(nd, stateidp, retonclosep, flhp);
4849	if (error == 0 && nd->nd_repstat != 0)
4850		error = nd->nd_repstat;
4851	mbuf_freem(nd->nd_mrep);
4852	return (error);
4853}
4854
4855/*
4856 * Do the NFSv4.1 Get Device Info.
4857 */
4858int
4859nfsrpc_getdeviceinfo(struct nfsmount *nmp, uint8_t *deviceid, int layouttype,
4860    uint32_t *notifybitsp, struct nfscldevinfo **ndip, struct ucred *cred,
4861    NFSPROC_T *p)
4862{
4863	uint32_t cnt, *tl;
4864	struct nfsrv_descript nfsd;
4865	struct nfsrv_descript *nd = &nfsd;
4866	struct sockaddr_storage ss;
4867	struct nfsclds *dsp = NULL, **dspp;
4868	struct nfscldevinfo *ndi;
4869	int addrcnt, bitcnt, error, i, isudp, j, pos, safilled, stripecnt;
4870	uint8_t stripeindex;
4871
4872	*ndip = NULL;
4873	ndi = NULL;
4874	nfscl_reqstart(nd, NFSPROC_GETDEVICEINFO, nmp, NULL, 0, NULL, NULL);
4875	NFSM_BUILD(tl, uint32_t *, NFSX_V4DEVICEID + 3 * NFSX_UNSIGNED);
4876	NFSBCOPY(deviceid, tl, NFSX_V4DEVICEID);
4877	tl += (NFSX_V4DEVICEID / NFSX_UNSIGNED);
4878	*tl++ = txdr_unsigned(layouttype);
4879	*tl++ = txdr_unsigned(100000);
4880	if (notifybitsp != NULL && *notifybitsp != 0) {
4881		*tl = txdr_unsigned(1);		/* One word of bits. */
4882		NFSM_BUILD(tl, uint32_t *, NFSX_UNSIGNED);
4883		*tl = txdr_unsigned(*notifybitsp);
4884	} else
4885		*tl = txdr_unsigned(0);
4886	nd->nd_flag |= ND_USEGSSNAME;
4887	error = newnfs_request(nd, nmp, NULL, &nmp->nm_sockreq, NULL, p, cred,
4888	    NFS_PROG, NFS_VER4, NULL, 1, NULL, NULL);
4889	if (error != 0)
4890		return (error);
4891	if (nd->nd_repstat == 0) {
4892		NFSM_DISSECT(tl, uint32_t *, 3 * NFSX_UNSIGNED);
4893		if (layouttype != fxdr_unsigned(int, *tl++))
4894			printf("EEK! devinfo layout type not same!\n");
4895		stripecnt = fxdr_unsigned(int, *++tl);
4896		NFSCL_DEBUG(4, "stripecnt=%d\n", stripecnt);
4897		if (stripecnt < 1 || stripecnt > 4096) {
4898			printf("NFS devinfo stripecnt %d: out of range\n",
4899			    stripecnt);
4900			error = NFSERR_BADXDR;
4901			goto nfsmout;
4902		}
4903		NFSM_DISSECT(tl, uint32_t *, (stripecnt + 1) * NFSX_UNSIGNED);
4904		addrcnt = fxdr_unsigned(int, *(tl + stripecnt));
4905		NFSCL_DEBUG(4, "addrcnt=%d\n", addrcnt);
4906		if (addrcnt < 1 || addrcnt > 128) {
4907			printf("NFS devinfo addrcnt %d: out of range\n",
4908			    addrcnt);
4909			error = NFSERR_BADXDR;
4910			goto nfsmout;
4911		}
4912
4913		/*
4914		 * Now we know how many stripe indices and addresses, so
4915		 * we can allocate the structure the correct size.
4916		 */
4917		i = (stripecnt * sizeof(uint8_t)) / sizeof(struct nfsclds *)
4918		    + 1;
4919		NFSCL_DEBUG(4, "stripeindices=%d\n", i);
4920		ndi = malloc(sizeof(*ndi) + (addrcnt + i) *
4921		    sizeof(struct nfsclds *), M_NFSDEVINFO, M_WAITOK | M_ZERO);
4922		NFSBCOPY(deviceid, ndi->nfsdi_deviceid, NFSX_V4DEVICEID);
4923		ndi->nfsdi_refcnt = 0;
4924		ndi->nfsdi_stripecnt = stripecnt;
4925		ndi->nfsdi_addrcnt = addrcnt;
4926		/* Fill in the stripe indices. */
4927		for (i = 0; i < stripecnt; i++) {
4928			stripeindex = fxdr_unsigned(uint8_t, *tl++);
4929			NFSCL_DEBUG(4, "stripeind=%d\n", stripeindex);
4930			if (stripeindex >= addrcnt) {
4931				printf("NFS devinfo stripeindex %d: too big\n",
4932				    (int)stripeindex);
4933				error = NFSERR_BADXDR;
4934				goto nfsmout;
4935			}
4936			nfsfldi_setstripeindex(ndi, i, stripeindex);
4937		}
4938
4939		/* Now, dissect the server address(es). */
4940		safilled = 0;
4941		for (i = 0; i < addrcnt; i++) {
4942			NFSM_DISSECT(tl, uint32_t *, NFSX_UNSIGNED);
4943			cnt = fxdr_unsigned(uint32_t, *tl);
4944			if (cnt == 0) {
4945				printf("NFS devinfo 0 len addrlist\n");
4946				error = NFSERR_BADXDR;
4947				goto nfsmout;
4948			}
4949			dspp = nfsfldi_addr(ndi, i);
4950			pos = arc4random() % cnt;	/* Choose one. */
4951			safilled = 0;
4952			for (j = 0; j < cnt; j++) {
4953				error = nfsv4_getipaddr(nd, &ss, &isudp);
4954				if (error != 0 && error != EPERM) {
4955					error = NFSERR_BADXDR;
4956					goto nfsmout;
4957				}
4958				if (error == 0 && isudp == 0) {
4959					/*
4960					 * The algorithm is:
4961					 * - use "pos" entry if it is of the
4962					 *   same af_family or none of them
4963					 *   is of the same af_family
4964					 * else
4965					 * - use the first one of the same
4966					 *   af_family.
4967					 */
4968					if ((safilled == 0 && ss.ss_family ==
4969					     nmp->nm_nam->sa_family) ||
4970					    (j == pos &&
4971					     (safilled == 0 || ss.ss_family ==
4972					      nmp->nm_nam->sa_family)) ||
4973					    (safilled == 1 && ss.ss_family ==
4974					     nmp->nm_nam->sa_family)) {
4975						error = nfsrpc_fillsa(nmp, &ss,
4976						    &dsp, p);
4977						if (error == 0) {
4978							*dspp = dsp;
4979							if (ss.ss_family ==
4980							 nmp->nm_nam->sa_family)
4981								safilled = 2;
4982							else
4983								safilled = 1;
4984						}
4985					}
4986				}
4987			}
4988			if (safilled == 0)
4989				break;
4990		}
4991
4992		/* And the notify bits. */
4993		NFSM_DISSECT(tl, uint32_t *, NFSX_UNSIGNED);
4994		if (safilled != 0) {
4995			bitcnt = fxdr_unsigned(int, *tl);
4996			if (bitcnt > 0) {
4997				NFSM_DISSECT(tl, uint32_t *, NFSX_UNSIGNED);
4998				if (notifybitsp != NULL)
4999					*notifybitsp =
5000					    fxdr_unsigned(uint32_t, *tl);
5001			}
5002			*ndip = ndi;
5003		} else
5004			error = EPERM;
5005	}
5006	if (nd->nd_repstat != 0)
5007		error = nd->nd_repstat;
5008nfsmout:
5009	if (error != 0 && ndi != NULL)
5010		nfscl_freedevinfo(ndi);
5011	mbuf_freem(nd->nd_mrep);
5012	return (error);
5013}
5014
5015/*
5016 * Do the NFSv4.1 LayoutCommit.
5017 */
5018int
5019nfsrpc_layoutcommit(struct nfsmount *nmp, uint8_t *fh, int fhlen, int reclaim,
5020    uint64_t off, uint64_t len, uint64_t lastbyte, nfsv4stateid_t *stateidp,
5021    int layouttype, int layoutupdatecnt, uint8_t *layp, struct ucred *cred,
5022    NFSPROC_T *p, void *stuff)
5023{
5024	uint32_t *tl;
5025	struct nfsrv_descript nfsd, *nd = &nfsd;
5026	int error, outcnt, i;
5027	uint8_t *cp;
5028
5029	nfscl_reqstart(nd, NFSPROC_LAYOUTCOMMIT, nmp, fh, fhlen, NULL, NULL);
5030	NFSM_BUILD(tl, uint32_t *, 5 * NFSX_UNSIGNED + 3 * NFSX_HYPER +
5031	    NFSX_STATEID);
5032	txdr_hyper(off, tl);
5033	tl += 2;
5034	txdr_hyper(len, tl);
5035	tl += 2;
5036	if (reclaim != 0)
5037		*tl++ = newnfs_true;
5038	else
5039		*tl++ = newnfs_false;
5040	*tl++ = txdr_unsigned(stateidp->seqid);
5041	*tl++ = stateidp->other[0];
5042	*tl++ = stateidp->other[1];
5043	*tl++ = stateidp->other[2];
5044	*tl++ = newnfs_true;
5045	if (lastbyte < off)
5046		lastbyte = off;
5047	else if (lastbyte >= (off + len))
5048		lastbyte = off + len - 1;
5049	txdr_hyper(lastbyte, tl);
5050	tl += 2;
5051	*tl++ = newnfs_false;
5052	*tl++ = txdr_unsigned(layouttype);
5053	*tl = txdr_unsigned(layoutupdatecnt);
5054	if (layoutupdatecnt > 0) {
5055		KASSERT(layouttype != NFSLAYOUT_NFSV4_1_FILES,
5056		    ("Must be nil for Files Layout"));
5057		outcnt = NFSM_RNDUP(layoutupdatecnt);
5058		NFSM_BUILD(cp, uint8_t *, outcnt);
5059		NFSBCOPY(layp, cp, layoutupdatecnt);
5060		cp += layoutupdatecnt;
5061		for (i = 0; i < (outcnt - layoutupdatecnt); i++)
5062			*cp++ = 0x0;
5063	}
5064	nd->nd_flag |= ND_USEGSSNAME;
5065	error = newnfs_request(nd, nmp, NULL, &nmp->nm_sockreq, NULL, p, cred,
5066	    NFS_PROG, NFS_VER4, NULL, 1, NULL, NULL);
5067	if (error != 0)
5068		return (error);
5069	error = nd->nd_repstat;
5070	mbuf_freem(nd->nd_mrep);
5071	return (error);
5072}
5073
5074/*
5075 * Do the NFSv4.1 LayoutReturn.
5076 */
5077int
5078nfsrpc_layoutreturn(struct nfsmount *nmp, uint8_t *fh, int fhlen, int reclaim,
5079    int layouttype, uint32_t iomode, int layoutreturn, uint64_t offset,
5080    uint64_t len, nfsv4stateid_t *stateidp, int layoutcnt, uint32_t *layp,
5081    struct ucred *cred, NFSPROC_T *p, void *stuff)
5082{
5083	uint32_t *tl;
5084	struct nfsrv_descript nfsd, *nd = &nfsd;
5085	int error, outcnt, i;
5086	uint8_t *cp;
5087
5088	nfscl_reqstart(nd, NFSPROC_LAYOUTRETURN, nmp, fh, fhlen, NULL, NULL);
5089	NFSM_BUILD(tl, uint32_t *, 4 * NFSX_UNSIGNED);
5090	if (reclaim != 0)
5091		*tl++ = newnfs_true;
5092	else
5093		*tl++ = newnfs_false;
5094	*tl++ = txdr_unsigned(layouttype);
5095	*tl++ = txdr_unsigned(iomode);
5096	*tl = txdr_unsigned(layoutreturn);
5097	if (layoutreturn == NFSLAYOUTRETURN_FILE) {
5098		NFSM_BUILD(tl, uint32_t *, 2 * NFSX_HYPER + NFSX_STATEID +
5099		    NFSX_UNSIGNED);
5100		txdr_hyper(offset, tl);
5101		tl += 2;
5102		txdr_hyper(len, tl);
5103		tl += 2;
5104		NFSCL_DEBUG(4, "layoutret stseq=%d\n", (int)stateidp->seqid);
5105		*tl++ = txdr_unsigned(stateidp->seqid);
5106		*tl++ = stateidp->other[0];
5107		*tl++ = stateidp->other[1];
5108		*tl++ = stateidp->other[2];
5109		*tl = txdr_unsigned(layoutcnt);
5110		if (layoutcnt > 0) {
5111			outcnt = NFSM_RNDUP(layoutcnt);
5112			NFSM_BUILD(cp, uint8_t *, outcnt);
5113			NFSBCOPY(layp, cp, layoutcnt);
5114			cp += layoutcnt;
5115			for (i = 0; i < (outcnt - layoutcnt); i++)
5116				*cp++ = 0x0;
5117		}
5118	}
5119	nd->nd_flag |= ND_USEGSSNAME;
5120	error = newnfs_request(nd, nmp, NULL, &nmp->nm_sockreq, NULL, p, cred,
5121	    NFS_PROG, NFS_VER4, NULL, 1, NULL, NULL);
5122	if (error != 0)
5123		return (error);
5124	if (nd->nd_repstat == 0) {
5125		NFSM_DISSECT(tl, uint32_t *, NFSX_UNSIGNED);
5126		if (*tl != 0) {
5127			NFSM_DISSECT(tl, uint32_t *, NFSX_STATEID);
5128			stateidp->seqid = fxdr_unsigned(uint32_t, *tl++);
5129			stateidp->other[0] = *tl++;
5130			stateidp->other[1] = *tl++;
5131			stateidp->other[2] = *tl;
5132		}
5133	} else
5134		error = nd->nd_repstat;
5135nfsmout:
5136	mbuf_freem(nd->nd_mrep);
5137	return (error);
5138}
5139
5140/*
5141 * Acquire a layout and devinfo, if possible. The caller must have acquired
5142 * a reference count on the nfsclclient structure before calling this.
5143 * Return the layout in lypp with a reference count on it, if successful.
5144 */
5145static int
5146nfsrpc_getlayout(struct nfsmount *nmp, vnode_t vp, struct nfsfh *nfhp,
5147    int iomode, uint32_t *notifybitsp, nfsv4stateid_t *stateidp, uint64_t off,
5148    struct nfscllayout **lypp, struct ucred *cred, NFSPROC_T *p)
5149{
5150	struct nfscllayout *lyp;
5151	struct nfsclflayout *flp;
5152	struct nfsclflayouthead flh;
5153	int error = 0, islocked, layoutlen, recalled, retonclose;
5154	nfsv4stateid_t stateid;
5155	struct nfsclsession *tsep;
5156
5157	*lypp = NULL;
5158	/*
5159	 * If lyp is returned non-NULL, there will be a refcnt (shared lock)
5160	 * on it, iff flp != NULL or a lock (exclusive lock) on it iff
5161	 * flp == NULL.
5162	 */
5163	lyp = nfscl_getlayout(nmp->nm_clp, nfhp->nfh_fh, nfhp->nfh_len,
5164	    off, &flp, &recalled);
5165	islocked = 0;
5166	if (lyp == NULL || flp == NULL) {
5167		if (recalled != 0)
5168			return (EIO);
5169		LIST_INIT(&flh);
5170		tsep = nfsmnt_mdssession(nmp);
5171		layoutlen = tsep->nfsess_maxcache -
5172		    (NFSX_STATEID + 3 * NFSX_UNSIGNED);
5173		if (lyp == NULL) {
5174			stateid.seqid = 0;
5175			stateid.other[0] = stateidp->other[0];
5176			stateid.other[1] = stateidp->other[1];
5177			stateid.other[2] = stateidp->other[2];
5178			error = nfsrpc_layoutget(nmp, nfhp->nfh_fh,
5179			    nfhp->nfh_len, iomode, (uint64_t)0, UINT64_MAX,
5180			    (uint64_t)0, layoutlen, &stateid, &retonclose,
5181			    &flh, cred, p, NULL);
5182		} else {
5183			islocked = 1;
5184			stateid.seqid = lyp->nfsly_stateid.seqid;
5185			stateid.other[0] = lyp->nfsly_stateid.other[0];
5186			stateid.other[1] = lyp->nfsly_stateid.other[1];
5187			stateid.other[2] = lyp->nfsly_stateid.other[2];
5188			error = nfsrpc_layoutget(nmp, nfhp->nfh_fh,
5189			    nfhp->nfh_len, iomode, off, UINT64_MAX,
5190			    (uint64_t)0, layoutlen, &stateid, &retonclose,
5191			    &flh, cred, p, NULL);
5192		}
5193		error = nfsrpc_layoutgetres(nmp, vp, nfhp->nfh_fh,
5194		    nfhp->nfh_len, &stateid, retonclose, notifybitsp, &lyp,
5195		    &flh, error, NULL, cred, p);
5196		if (error == 0)
5197			*lypp = lyp;
5198		else if (islocked != 0)
5199			nfscl_rellayout(lyp, 1);
5200	} else
5201		*lypp = lyp;
5202	return (error);
5203}
5204
5205/*
5206 * Do a TCP connection plus exchange id and create session.
5207 * If successful, a "struct nfsclds" is linked into the list for the
5208 * mount point and a pointer to it is returned.
5209 */
5210static int
5211nfsrpc_fillsa(struct nfsmount *nmp, struct sockaddr_storage *ssp,
5212    struct nfsclds **dspp, NFSPROC_T *p)
5213{
5214	struct sockaddr_in *msad, *sad, *ssd;
5215	struct sockaddr_in6 *msad6, *sad6, *ssd6;
5216	struct nfsclclient *clp;
5217	struct nfssockreq *nrp;
5218	struct nfsclds *dsp, *tdsp;
5219	int error;
5220	enum nfsclds_state retv;
5221	uint32_t sequenceid;
5222
5223	KASSERT(nmp->nm_sockreq.nr_cred != NULL,
5224	    ("nfsrpc_fillsa: NULL nr_cred"));
5225	NFSLOCKCLSTATE();
5226	clp = nmp->nm_clp;
5227	NFSUNLOCKCLSTATE();
5228	if (clp == NULL)
5229		return (EPERM);
5230	if (ssp->ss_family == AF_INET) {
5231		ssd = (struct sockaddr_in *)ssp;
5232		NFSLOCKMNT(nmp);
5233
5234		/*
5235		 * Check to see if we already have a session for this
5236		 * address that is usable for a DS.
5237		 * Note that the MDS's address is in a different place
5238		 * than the sessions already acquired for DS's.
5239		 */
5240		msad = (struct sockaddr_in *)nmp->nm_sockreq.nr_nam;
5241		tdsp = TAILQ_FIRST(&nmp->nm_sess);
5242		while (tdsp != NULL) {
5243			if (msad != NULL && msad->sin_family == AF_INET &&
5244			    ssd->sin_addr.s_addr == msad->sin_addr.s_addr &&
5245			    ssd->sin_port == msad->sin_port &&
5246			    (tdsp->nfsclds_flags & NFSCLDS_DS) != 0 &&
5247			    tdsp->nfsclds_sess.nfsess_defunct == 0) {
5248				*dspp = tdsp;
5249				NFSUNLOCKMNT(nmp);
5250				NFSCL_DEBUG(4, "fnd same addr\n");
5251				return (0);
5252			}
5253			tdsp = TAILQ_NEXT(tdsp, nfsclds_list);
5254			if (tdsp != NULL && tdsp->nfsclds_sockp != NULL)
5255				msad = (struct sockaddr_in *)
5256				    tdsp->nfsclds_sockp->nr_nam;
5257			else
5258				msad = NULL;
5259		}
5260		NFSUNLOCKMNT(nmp);
5261
5262		/* No IP address match, so look for new/trunked one. */
5263		sad = malloc(sizeof(*sad), M_SONAME, M_WAITOK | M_ZERO);
5264		sad->sin_len = sizeof(*sad);
5265		sad->sin_family = AF_INET;
5266		sad->sin_port = ssd->sin_port;
5267		sad->sin_addr.s_addr = ssd->sin_addr.s_addr;
5268		nrp = malloc(sizeof(*nrp), M_NFSSOCKREQ, M_WAITOK | M_ZERO);
5269		nrp->nr_nam = (struct sockaddr *)sad;
5270	} else if (ssp->ss_family == AF_INET6) {
5271		ssd6 = (struct sockaddr_in6 *)ssp;
5272		NFSLOCKMNT(nmp);
5273
5274		/*
5275		 * Check to see if we already have a session for this
5276		 * address that is usable for a DS.
5277		 * Note that the MDS's address is in a different place
5278		 * than the sessions already acquired for DS's.
5279		 */
5280		msad6 = (struct sockaddr_in6 *)nmp->nm_sockreq.nr_nam;
5281		tdsp = TAILQ_FIRST(&nmp->nm_sess);
5282		while (tdsp != NULL) {
5283			if (msad6 != NULL && msad6->sin6_family == AF_INET6 &&
5284			    IN6_ARE_ADDR_EQUAL(&ssd6->sin6_addr,
5285			    &msad6->sin6_addr) &&
5286			    ssd6->sin6_port == msad6->sin6_port &&
5287			    (tdsp->nfsclds_flags & NFSCLDS_DS) != 0 &&
5288			    tdsp->nfsclds_sess.nfsess_defunct == 0) {
5289				*dspp = tdsp;
5290				NFSUNLOCKMNT(nmp);
5291				return (0);
5292			}
5293			tdsp = TAILQ_NEXT(tdsp, nfsclds_list);
5294			if (tdsp != NULL && tdsp->nfsclds_sockp != NULL)
5295				msad6 = (struct sockaddr_in6 *)
5296				    tdsp->nfsclds_sockp->nr_nam;
5297			else
5298				msad6 = NULL;
5299		}
5300		NFSUNLOCKMNT(nmp);
5301
5302		/* No IP address match, so look for new/trunked one. */
5303		sad6 = malloc(sizeof(*sad6), M_SONAME, M_WAITOK | M_ZERO);
5304		sad6->sin6_len = sizeof(*sad6);
5305		sad6->sin6_family = AF_INET6;
5306		sad6->sin6_port = ssd6->sin6_port;
5307		NFSBCOPY(&ssd6->sin6_addr, &sad6->sin6_addr,
5308		    sizeof(struct in6_addr));
5309		nrp = malloc(sizeof(*nrp), M_NFSSOCKREQ, M_WAITOK | M_ZERO);
5310		nrp->nr_nam = (struct sockaddr *)sad6;
5311	} else
5312		return (EPERM);
5313
5314	nrp->nr_sotype = SOCK_STREAM;
5315	mtx_init(&nrp->nr_mtx, "nfssock", NULL, MTX_DEF);
5316	nrp->nr_prog = NFS_PROG;
5317	nrp->nr_vers = NFS_VER4;
5318
5319	/*
5320	 * Use the credentials that were used for the mount, which are
5321	 * in nmp->nm_sockreq.nr_cred for newnfs_connect() etc.
5322	 * Ref. counting the credentials with crhold() is probably not
5323	 * necessary, since nm_sockreq.nr_cred won't be crfree()'d until
5324	 * unmount, but I did it anyhow.
5325	 */
5326	nrp->nr_cred = crhold(nmp->nm_sockreq.nr_cred);
5327	error = newnfs_connect(nmp, nrp, NULL, p, 0);
5328	NFSCL_DEBUG(3, "DS connect=%d\n", error);
5329
5330	/* Now, do the exchangeid and create session. */
5331	if (error == 0) {
5332		error = nfsrpc_exchangeid(nmp, clp, nrp, NFSV4EXCH_USEPNFSDS,
5333		    &dsp, nrp->nr_cred, p);
5334		NFSCL_DEBUG(3, "DS exchangeid=%d\n", error);
5335		if (error != 0)
5336			newnfs_disconnect(nrp);
5337	}
5338	if (error == 0) {
5339		dsp->nfsclds_sockp = nrp;
5340		NFSLOCKMNT(nmp);
5341		retv = nfscl_getsameserver(nmp, dsp, &tdsp);
5342		NFSCL_DEBUG(3, "getsame ret=%d\n", retv);
5343		if (retv == NFSDSP_USETHISSESSION) {
5344			NFSUNLOCKMNT(nmp);
5345			/*
5346			 * If there is already a session for this server,
5347			 * use it.
5348			 */
5349			(void)newnfs_disconnect(nrp);
5350			nfscl_freenfsclds(dsp);
5351			*dspp = tdsp;
5352			return (0);
5353		}
5354		if (retv == NFSDSP_SEQTHISSESSION)
5355			sequenceid = tdsp->nfsclds_sess.nfsess_sequenceid;
5356		else
5357			sequenceid = dsp->nfsclds_sess.nfsess_sequenceid;
5358		NFSUNLOCKMNT(nmp);
5359		error = nfsrpc_createsession(nmp, &dsp->nfsclds_sess,
5360		    nrp, sequenceid, 0, nrp->nr_cred, p);
5361		NFSCL_DEBUG(3, "DS createsess=%d\n", error);
5362	} else {
5363		NFSFREECRED(nrp->nr_cred);
5364		NFSFREEMUTEX(&nrp->nr_mtx);
5365		free(nrp->nr_nam, M_SONAME);
5366		free(nrp, M_NFSSOCKREQ);
5367	}
5368	if (error == 0) {
5369		NFSCL_DEBUG(3, "add DS session\n");
5370		/*
5371		 * Put it at the end of the list. That way the list
5372		 * is ordered by when the entry was added. This matters
5373		 * since the one done first is the one that should be
5374		 * used for sequencid'ing any subsequent create sessions.
5375		 */
5376		NFSLOCKMNT(nmp);
5377		TAILQ_INSERT_TAIL(&nmp->nm_sess, dsp, nfsclds_list);
5378		NFSUNLOCKMNT(nmp);
5379		*dspp = dsp;
5380	} else if (dsp != NULL) {
5381		newnfs_disconnect(nrp);
5382		nfscl_freenfsclds(dsp);
5383	}
5384	return (error);
5385}
5386
5387/*
5388 * Do the NFSv4.1 Reclaim Complete.
5389 */
5390int
5391nfsrpc_reclaimcomplete(struct nfsmount *nmp, struct ucred *cred, NFSPROC_T *p)
5392{
5393	uint32_t *tl;
5394	struct nfsrv_descript nfsd;
5395	struct nfsrv_descript *nd = &nfsd;
5396	int error;
5397
5398	nfscl_reqstart(nd, NFSPROC_RECLAIMCOMPL, nmp, NULL, 0, NULL, NULL);
5399	NFSM_BUILD(tl, uint32_t *, NFSX_UNSIGNED);
5400	*tl = newnfs_false;
5401	nd->nd_flag |= ND_USEGSSNAME;
5402	error = newnfs_request(nd, nmp, NULL, &nmp->nm_sockreq, NULL, p, cred,
5403	    NFS_PROG, NFS_VER4, NULL, 1, NULL, NULL);
5404	if (error != 0)
5405		return (error);
5406	error = nd->nd_repstat;
5407	mbuf_freem(nd->nd_mrep);
5408	return (error);
5409}
5410
5411/*
5412 * Initialize the slot tables for a session.
5413 */
5414static void
5415nfscl_initsessionslots(struct nfsclsession *sep)
5416{
5417	int i;
5418
5419	for (i = 0; i < NFSV4_CBSLOTS; i++) {
5420		if (sep->nfsess_cbslots[i].nfssl_reply != NULL)
5421			m_freem(sep->nfsess_cbslots[i].nfssl_reply);
5422		NFSBZERO(&sep->nfsess_cbslots[i], sizeof(struct nfsslot));
5423	}
5424	for (i = 0; i < 64; i++)
5425		sep->nfsess_slotseq[i] = 0;
5426	sep->nfsess_slots = 0;
5427}
5428
5429/*
5430 * Called to try and do an I/O operation via an NFSv4.1 Data Server (DS).
5431 */
5432int
5433nfscl_doiods(vnode_t vp, struct uio *uiop, int *iomode, int *must_commit,
5434    uint32_t rwaccess, int docommit, struct ucred *cred, NFSPROC_T *p)
5435{
5436	struct nfsnode *np = VTONFS(vp);
5437	struct nfsmount *nmp = VFSTONFS(vnode_mount(vp));
5438	struct nfscllayout *layp;
5439	struct nfscldevinfo *dip;
5440	struct nfsclflayout *rflp;
5441	nfsv4stateid_t stateid;
5442	struct ucred *newcred;
5443	uint64_t lastbyte, len, off, oresid, xfer;
5444	int eof, error, iolaymode, recalled;
5445	void *lckp;
5446
5447	if (!NFSHASPNFS(nmp) || nfscl_enablecallb == 0 || nfs_numnfscbd == 0 ||
5448	    (np->n_flag & NNOLAYOUT) != 0)
5449		return (EIO);
5450	/* Now, get a reference cnt on the clientid for this mount. */
5451	if (nfscl_getref(nmp) == 0)
5452		return (EIO);
5453
5454	/* Find an appropriate stateid. */
5455	newcred = NFSNEWCRED(cred);
5456	error = nfscl_getstateid(vp, np->n_fhp->nfh_fh, np->n_fhp->nfh_len,
5457	    rwaccess, 1, newcred, p, &stateid, &lckp);
5458	if (error != 0) {
5459		NFSFREECRED(newcred);
5460		nfscl_relref(nmp);
5461		return (error);
5462	}
5463	/* Search for a layout for this file. */
5464	off = uiop->uio_offset;
5465	layp = nfscl_getlayout(nmp->nm_clp, np->n_fhp->nfh_fh,
5466	    np->n_fhp->nfh_len, off, &rflp, &recalled);
5467	if (layp == NULL || rflp == NULL) {
5468		if (recalled != 0) {
5469			NFSFREECRED(newcred);
5470			nfscl_relref(nmp);
5471			return (EIO);
5472		}
5473		if (layp != NULL) {
5474			nfscl_rellayout(layp, (rflp == NULL) ? 1 : 0);
5475			layp = NULL;
5476		}
5477		/* Try and get a Layout, if it is supported. */
5478		if (rwaccess == NFSV4OPEN_ACCESSWRITE ||
5479		    (np->n_flag & NWRITEOPENED) != 0)
5480			iolaymode = NFSLAYOUTIOMODE_RW;
5481		else
5482			iolaymode = NFSLAYOUTIOMODE_READ;
5483		error = nfsrpc_getlayout(nmp, vp, np->n_fhp, iolaymode,
5484		    NULL, &stateid, off, &layp, newcred, p);
5485		if (error != 0) {
5486			NFSLOCKNODE(np);
5487			np->n_flag |= NNOLAYOUT;
5488			NFSUNLOCKNODE(np);
5489			if (lckp != NULL)
5490				nfscl_lockderef(lckp);
5491			NFSFREECRED(newcred);
5492			if (layp != NULL)
5493				nfscl_rellayout(layp, 0);
5494			nfscl_relref(nmp);
5495			return (error);
5496		}
5497	}
5498
5499	/*
5500	 * Loop around finding a layout that works for the first part of
5501	 * this I/O operation, and then call the function that actually
5502	 * does the RPC.
5503	 */
5504	eof = 0;
5505	len = (uint64_t)uiop->uio_resid;
5506	while (len > 0 && error == 0 && eof == 0) {
5507		off = uiop->uio_offset;
5508		error = nfscl_findlayoutforio(layp, off, rwaccess, &rflp);
5509		if (error == 0) {
5510			oresid = xfer = (uint64_t)uiop->uio_resid;
5511			if (xfer > (rflp->nfsfl_end - rflp->nfsfl_off))
5512				xfer = rflp->nfsfl_end - rflp->nfsfl_off;
5513			dip = nfscl_getdevinfo(nmp->nm_clp, rflp->nfsfl_dev,
5514			    rflp->nfsfl_devp);
5515			if (dip != NULL) {
5516				error = nfscl_doflayoutio(vp, uiop, iomode,
5517				    must_commit, &eof, &stateid, rwaccess, dip,
5518				    layp, rflp, off, xfer, docommit, newcred,
5519				    p);
5520				nfscl_reldevinfo(dip);
5521				lastbyte = off + xfer - 1;
5522				if (error == 0) {
5523					NFSLOCKCLSTATE();
5524					if (lastbyte > layp->nfsly_lastbyte)
5525						layp->nfsly_lastbyte = lastbyte;
5526					NFSUNLOCKCLSTATE();
5527				} else if (error == NFSERR_OPENMODE &&
5528				    rwaccess == NFSV4OPEN_ACCESSREAD) {
5529					NFSLOCKMNT(nmp);
5530					nmp->nm_state |= NFSSTA_OPENMODE;
5531					NFSUNLOCKMNT(nmp);
5532				}
5533			} else
5534				error = EIO;
5535			if (error == 0)
5536				len -= (oresid - (uint64_t)uiop->uio_resid);
5537		}
5538	}
5539	if (lckp != NULL)
5540		nfscl_lockderef(lckp);
5541	NFSFREECRED(newcred);
5542	nfscl_rellayout(layp, 0);
5543	nfscl_relref(nmp);
5544	return (error);
5545}
5546
5547/*
5548 * Find a file layout that will handle the first bytes of the requested
5549 * range and return the information from it needed to the I/O operation.
5550 */
5551int
5552nfscl_findlayoutforio(struct nfscllayout *lyp, uint64_t off, uint32_t rwaccess,
5553    struct nfsclflayout **retflpp)
5554{
5555	struct nfsclflayout *flp, *nflp, *rflp;
5556	uint32_t rw;
5557
5558	rflp = NULL;
5559	rw = rwaccess;
5560	/* For reading, do the Read list first and then the Write list. */
5561	do {
5562		if (rw == NFSV4OPEN_ACCESSREAD)
5563			flp = LIST_FIRST(&lyp->nfsly_flayread);
5564		else
5565			flp = LIST_FIRST(&lyp->nfsly_flayrw);
5566		while (flp != NULL) {
5567			nflp = LIST_NEXT(flp, nfsfl_list);
5568			if (flp->nfsfl_off > off)
5569				break;
5570			if (flp->nfsfl_end > off &&
5571			    (rflp == NULL || rflp->nfsfl_end < flp->nfsfl_end))
5572				rflp = flp;
5573			flp = nflp;
5574		}
5575		if (rw == NFSV4OPEN_ACCESSREAD)
5576			rw = NFSV4OPEN_ACCESSWRITE;
5577		else
5578			rw = 0;
5579	} while (rw != 0);
5580	if (rflp != NULL) {
5581		/* This one covers the most bytes starting at off. */
5582		*retflpp = rflp;
5583		return (0);
5584	}
5585	return (EIO);
5586}
5587
5588/*
5589 * Do I/O using an NFSv4.1 file layout.
5590 */
5591static int
5592nfscl_doflayoutio(vnode_t vp, struct uio *uiop, int *iomode, int *must_commit,
5593    int *eofp, nfsv4stateid_t *stateidp, int rwflag, struct nfscldevinfo *dp,
5594    struct nfscllayout *lyp, struct nfsclflayout *flp, uint64_t off,
5595    uint64_t len, int docommit, struct ucred *cred, NFSPROC_T *p)
5596{
5597	uint64_t io_off, rel_off, stripe_unit_size, transfer, xfer;
5598	int commit_thru_mds, error, stripe_index, stripe_pos;
5599	struct nfsnode *np;
5600	struct nfsfh *fhp;
5601	struct nfsclds **dspp;
5602
5603	np = VTONFS(vp);
5604	rel_off = off - flp->nfsfl_patoff;
5605	stripe_unit_size = (flp->nfsfl_util >> 6) & 0x3ffffff;
5606	stripe_pos = (rel_off / stripe_unit_size + flp->nfsfl_stripe1) %
5607	    dp->nfsdi_stripecnt;
5608	transfer = stripe_unit_size - (rel_off % stripe_unit_size);
5609	error = 0;
5610
5611	/* Loop around, doing I/O for each stripe unit. */
5612	while (len > 0 && error == 0) {
5613		stripe_index = nfsfldi_stripeindex(dp, stripe_pos);
5614		dspp = nfsfldi_addr(dp, stripe_index);
5615		if (len > transfer && docommit == 0)
5616			xfer = transfer;
5617		else
5618			xfer = len;
5619		if ((flp->nfsfl_util & NFSFLAYUTIL_DENSE) != 0) {
5620			/* Dense layout. */
5621			if (stripe_pos >= flp->nfsfl_fhcnt)
5622				return (EIO);
5623			fhp = flp->nfsfl_fh[stripe_pos];
5624			io_off = (rel_off / (stripe_unit_size *
5625			    dp->nfsdi_stripecnt)) * stripe_unit_size +
5626			    rel_off % stripe_unit_size;
5627		} else {
5628			/* Sparse layout. */
5629			if (flp->nfsfl_fhcnt > 1) {
5630				if (stripe_index >= flp->nfsfl_fhcnt)
5631					return (EIO);
5632				fhp = flp->nfsfl_fh[stripe_index];
5633			} else if (flp->nfsfl_fhcnt == 1)
5634				fhp = flp->nfsfl_fh[0];
5635			else
5636				fhp = np->n_fhp;
5637			io_off = off;
5638		}
5639		if ((flp->nfsfl_util & NFSFLAYUTIL_COMMIT_THRU_MDS) != 0) {
5640			commit_thru_mds = 1;
5641			if (docommit != 0)
5642				error = EIO;
5643		} else {
5644			commit_thru_mds = 0;
5645			mtx_lock(&np->n_mtx);
5646			np->n_flag |= NDSCOMMIT;
5647			mtx_unlock(&np->n_mtx);
5648		}
5649		if (docommit != 0) {
5650			if (error == 0)
5651				error = nfsrpc_commitds(vp, io_off, xfer,
5652				    *dspp, fhp, cred, p);
5653			if (error == 0) {
5654				/*
5655				 * Set both eof and uio_resid = 0 to end any
5656				 * loops.
5657				 */
5658				*eofp = 1;
5659				uiop->uio_resid = 0;
5660			} else {
5661				mtx_lock(&np->n_mtx);
5662				np->n_flag &= ~NDSCOMMIT;
5663				mtx_unlock(&np->n_mtx);
5664			}
5665		} else if (rwflag == NFSV4OPEN_ACCESSREAD)
5666			error = nfsrpc_readds(vp, uiop, stateidp, eofp, *dspp,
5667			    io_off, xfer, fhp, cred, p);
5668		else {
5669			error = nfsrpc_writeds(vp, uiop, iomode, must_commit,
5670			    stateidp, *dspp, io_off, xfer, fhp, commit_thru_mds,
5671			    cred, p);
5672			if (error == 0) {
5673				NFSLOCKCLSTATE();
5674				lyp->nfsly_flags |= NFSLY_WRITTEN;
5675				NFSUNLOCKCLSTATE();
5676			}
5677		}
5678		if (error == 0) {
5679			transfer = stripe_unit_size;
5680			stripe_pos = (stripe_pos + 1) % dp->nfsdi_stripecnt;
5681			len -= xfer;
5682			off += xfer;
5683		}
5684	}
5685	return (error);
5686}
5687
5688/*
5689 * The actual read RPC done to a DS.
5690 */
5691static int
5692nfsrpc_readds(vnode_t vp, struct uio *uiop, nfsv4stateid_t *stateidp, int *eofp,
5693    struct nfsclds *dsp, uint64_t io_off, int len, struct nfsfh *fhp,
5694    struct ucred *cred, NFSPROC_T *p)
5695{
5696	uint32_t *tl;
5697	int error, retlen;
5698	struct nfsrv_descript nfsd;
5699	struct nfsmount *nmp = VFSTONFS(vnode_mount(vp));
5700	struct nfsrv_descript *nd = &nfsd;
5701	struct nfssockreq *nrp;
5702
5703	nd->nd_mrep = NULL;
5704	nfscl_reqstart(nd, NFSPROC_READDS, nmp, fhp->nfh_fh, fhp->nfh_len,
5705	    NULL, &dsp->nfsclds_sess);
5706	nfsm_stateidtom(nd, stateidp, NFSSTATEID_PUTSEQIDZERO);
5707	NFSM_BUILD(tl, uint32_t *, NFSX_UNSIGNED * 3);
5708	txdr_hyper(io_off, tl);
5709	*(tl + 2) = txdr_unsigned(len);
5710	nrp = dsp->nfsclds_sockp;
5711	if (nrp == NULL)
5712		/* If NULL, use the MDS socket. */
5713		nrp = &nmp->nm_sockreq;
5714	error = newnfs_request(nd, nmp, NULL, nrp, vp, p, cred,
5715	    NFS_PROG, NFS_VER4, NULL, 1, NULL, &dsp->nfsclds_sess);
5716	if (error != 0)
5717		return (error);
5718	if (nd->nd_repstat != 0) {
5719		error = nd->nd_repstat;
5720		goto nfsmout;
5721	}
5722	NFSM_DISSECT(tl, uint32_t *, NFSX_UNSIGNED);
5723	*eofp = fxdr_unsigned(int, *tl);
5724	NFSM_STRSIZ(retlen, len);
5725	error = nfsm_mbufuio(nd, uiop, retlen);
5726nfsmout:
5727	if (nd->nd_mrep != NULL)
5728		mbuf_freem(nd->nd_mrep);
5729	return (error);
5730}
5731
5732/*
5733 * The actual write RPC done to a DS.
5734 */
5735static int
5736nfsrpc_writeds(vnode_t vp, struct uio *uiop, int *iomode, int *must_commit,
5737    nfsv4stateid_t *stateidp, struct nfsclds *dsp, uint64_t io_off, int len,
5738    struct nfsfh *fhp, int commit_thru_mds, struct ucred *cred, NFSPROC_T *p)
5739{
5740	uint32_t *tl;
5741	struct nfsmount *nmp = VFSTONFS(vnode_mount(vp));
5742	int error, rlen, commit, committed = NFSWRITE_FILESYNC;
5743	int32_t backup;
5744	struct nfsrv_descript nfsd;
5745	struct nfsrv_descript *nd = &nfsd;
5746	struct nfssockreq *nrp;
5747
5748	KASSERT(uiop->uio_iovcnt == 1, ("nfs: writerpc iovcnt > 1"));
5749	nd->nd_mrep = NULL;
5750	nfscl_reqstart(nd, NFSPROC_WRITEDS, nmp, fhp->nfh_fh, fhp->nfh_len,
5751	    NULL, &dsp->nfsclds_sess);
5752	nfsm_stateidtom(nd, stateidp, NFSSTATEID_PUTSEQIDZERO);
5753	NFSM_BUILD(tl, uint32_t *, NFSX_HYPER + 2 * NFSX_UNSIGNED);
5754	txdr_hyper(io_off, tl);
5755	tl += 2;
5756	*tl++ = txdr_unsigned(*iomode);
5757	*tl = txdr_unsigned(len);
5758	nfsm_uiombuf(nd, uiop, len);
5759	nrp = dsp->nfsclds_sockp;
5760	if (nrp == NULL)
5761		/* If NULL, use the MDS socket. */
5762		nrp = &nmp->nm_sockreq;
5763	error = newnfs_request(nd, nmp, NULL, nrp, vp, p, cred,
5764	    NFS_PROG, NFS_VER4, NULL, 1, NULL, &dsp->nfsclds_sess);
5765	if (error != 0)
5766		return (error);
5767	if (nd->nd_repstat != 0) {
5768		/*
5769		 * In case the rpc gets retried, roll
5770		 * the uio fileds changed by nfsm_uiombuf()
5771		 * back.
5772		 */
5773		uiop->uio_offset -= len;
5774		uio_uio_resid_add(uiop, len);
5775		uio_iov_base_add(uiop, -len);
5776		uio_iov_len_add(uiop, len);
5777		error = nd->nd_repstat;
5778	} else {
5779		NFSM_DISSECT(tl, uint32_t *, 2 * NFSX_UNSIGNED + NFSX_VERF);
5780		rlen = fxdr_unsigned(int, *tl++);
5781		if (rlen == 0) {
5782			error = NFSERR_IO;
5783			goto nfsmout;
5784		} else if (rlen < len) {
5785			backup = len - rlen;
5786			uio_iov_base_add(uiop, -(backup));
5787			uio_iov_len_add(uiop, backup);
5788			uiop->uio_offset -= backup;
5789			uio_uio_resid_add(uiop, backup);
5790			len = rlen;
5791		}
5792		commit = fxdr_unsigned(int, *tl++);
5793
5794		/*
5795		 * Return the lowest commitment level
5796		 * obtained by any of the RPCs.
5797		 */
5798		if (committed == NFSWRITE_FILESYNC)
5799			committed = commit;
5800		else if (committed == NFSWRITE_DATASYNC &&
5801		    commit == NFSWRITE_UNSTABLE)
5802			committed = commit;
5803		if (commit_thru_mds != 0) {
5804			NFSLOCKMNT(nmp);
5805			if (!NFSHASWRITEVERF(nmp)) {
5806				NFSBCOPY(tl, nmp->nm_verf, NFSX_VERF);
5807				NFSSETWRITEVERF(nmp);
5808	    		} else if (NFSBCMP(tl, nmp->nm_verf, NFSX_VERF)) {
5809				*must_commit = 1;
5810				NFSBCOPY(tl, nmp->nm_verf, NFSX_VERF);
5811			}
5812			NFSUNLOCKMNT(nmp);
5813		} else {
5814			NFSLOCKDS(dsp);
5815			if ((dsp->nfsclds_flags & NFSCLDS_HASWRITEVERF) == 0) {
5816				NFSBCOPY(tl, dsp->nfsclds_verf, NFSX_VERF);
5817				dsp->nfsclds_flags |= NFSCLDS_HASWRITEVERF;
5818			} else if (NFSBCMP(tl, dsp->nfsclds_verf, NFSX_VERF)) {
5819				*must_commit = 1;
5820				NFSBCOPY(tl, dsp->nfsclds_verf, NFSX_VERF);
5821			}
5822			NFSUNLOCKDS(dsp);
5823		}
5824	}
5825nfsmout:
5826	if (nd->nd_mrep != NULL)
5827		mbuf_freem(nd->nd_mrep);
5828	*iomode = committed;
5829	if (nd->nd_repstat != 0 && error == 0)
5830		error = nd->nd_repstat;
5831	return (error);
5832}
5833
5834/*
5835 * Free up the nfsclds structure.
5836 */
5837void
5838nfscl_freenfsclds(struct nfsclds *dsp)
5839{
5840	int i;
5841
5842	if (dsp == NULL)
5843		return;
5844	if (dsp->nfsclds_sockp != NULL) {
5845		NFSFREECRED(dsp->nfsclds_sockp->nr_cred);
5846		NFSFREEMUTEX(&dsp->nfsclds_sockp->nr_mtx);
5847		free(dsp->nfsclds_sockp->nr_nam, M_SONAME);
5848		free(dsp->nfsclds_sockp, M_NFSSOCKREQ);
5849	}
5850	NFSFREEMUTEX(&dsp->nfsclds_mtx);
5851	NFSFREEMUTEX(&dsp->nfsclds_sess.nfsess_mtx);
5852	for (i = 0; i < NFSV4_CBSLOTS; i++) {
5853		if (dsp->nfsclds_sess.nfsess_cbslots[i].nfssl_reply != NULL)
5854			m_freem(
5855			    dsp->nfsclds_sess.nfsess_cbslots[i].nfssl_reply);
5856	}
5857	free(dsp, M_NFSCLDS);
5858}
5859
5860static enum nfsclds_state
5861nfscl_getsameserver(struct nfsmount *nmp, struct nfsclds *newdsp,
5862    struct nfsclds **retdspp)
5863{
5864	struct nfsclds *dsp, *cur_dsp;
5865
5866	/*
5867	 * Search the list of nfsclds structures for one with the same
5868	 * server.
5869	 */
5870	cur_dsp = NULL;
5871	TAILQ_FOREACH(dsp, &nmp->nm_sess, nfsclds_list) {
5872		if (dsp->nfsclds_servownlen == newdsp->nfsclds_servownlen &&
5873		    dsp->nfsclds_servownlen != 0 &&
5874		    !NFSBCMP(dsp->nfsclds_serverown, newdsp->nfsclds_serverown,
5875		    dsp->nfsclds_servownlen) &&
5876		    dsp->nfsclds_sess.nfsess_defunct == 0) {
5877			NFSCL_DEBUG(4, "fnd same fdsp=%p dsp=%p flg=0x%x\n",
5878			    TAILQ_FIRST(&nmp->nm_sess), dsp,
5879			    dsp->nfsclds_flags);
5880			/* Server major id matches. */
5881			if ((dsp->nfsclds_flags & NFSCLDS_DS) != 0) {
5882				*retdspp = dsp;
5883				return (NFSDSP_USETHISSESSION);
5884			}
5885
5886			/*
5887			 * Note the first match, so it can be used for
5888			 * sequence'ing new sessions.
5889			 */
5890			if (cur_dsp == NULL)
5891				cur_dsp = dsp;
5892		}
5893	}
5894	if (cur_dsp != NULL) {
5895		*retdspp = cur_dsp;
5896		return (NFSDSP_SEQTHISSESSION);
5897	}
5898	return (NFSDSP_NOTFOUND);
5899}
5900
5901/*
5902 * NFS commit rpc to a NFSv4.1 DS.
5903 */
5904static int
5905nfsrpc_commitds(vnode_t vp, uint64_t offset, int cnt, struct nfsclds *dsp,
5906    struct nfsfh *fhp, struct ucred *cred, NFSPROC_T *p)
5907{
5908	uint32_t *tl;
5909	struct nfsrv_descript nfsd, *nd = &nfsd;
5910	struct nfsmount *nmp = VFSTONFS(vnode_mount(vp));
5911	struct nfssockreq *nrp;
5912	int error;
5913
5914	nd->nd_mrep = NULL;
5915	nfscl_reqstart(nd, NFSPROC_COMMITDS, nmp, fhp->nfh_fh, fhp->nfh_len,
5916	    NULL, &dsp->nfsclds_sess);
5917	NFSM_BUILD(tl, uint32_t *, NFSX_HYPER + NFSX_UNSIGNED);
5918	txdr_hyper(offset, tl);
5919	tl += 2;
5920	*tl = txdr_unsigned(cnt);
5921	nrp = dsp->nfsclds_sockp;
5922	if (nrp == NULL)
5923		/* If NULL, use the MDS socket. */
5924		nrp = &nmp->nm_sockreq;
5925	error = newnfs_request(nd, nmp, NULL, nrp, vp, p, cred,
5926	    NFS_PROG, NFS_VER4, NULL, 1, NULL, &dsp->nfsclds_sess);
5927	if (error != 0)
5928		return (error);
5929	if (nd->nd_repstat == 0) {
5930		NFSM_DISSECT(tl, u_int32_t *, NFSX_VERF);
5931		NFSLOCKDS(dsp);
5932		if (NFSBCMP(tl, dsp->nfsclds_verf, NFSX_VERF)) {
5933			NFSBCOPY(tl, dsp->nfsclds_verf, NFSX_VERF);
5934			error = NFSERR_STALEWRITEVERF;
5935		}
5936		NFSUNLOCKDS(dsp);
5937	}
5938nfsmout:
5939	if (error == 0 && nd->nd_repstat != 0)
5940		error = nd->nd_repstat;
5941	mbuf_freem(nd->nd_mrep);
5942	return (error);
5943}
5944
5945/*
5946 * Set up the XDR arguments for the LayoutGet operation.
5947 */
5948static void
5949nfsrv_setuplayoutget(struct nfsrv_descript *nd, int iomode, uint64_t offset,
5950    uint64_t len, uint64_t minlen, nfsv4stateid_t *stateidp, int layoutlen,
5951    int usecurstateid)
5952{
5953	uint32_t *tl;
5954
5955	NFSM_BUILD(tl, uint32_t *, 4 * NFSX_UNSIGNED + 3 * NFSX_HYPER +
5956	    NFSX_STATEID);
5957	*tl++ = newnfs_false;		/* Don't signal availability. */
5958	*tl++ = txdr_unsigned(NFSLAYOUT_NFSV4_1_FILES);
5959	*tl++ = txdr_unsigned(iomode);
5960	txdr_hyper(offset, tl);
5961	tl += 2;
5962	txdr_hyper(len, tl);
5963	tl += 2;
5964	txdr_hyper(minlen, tl);
5965	tl += 2;
5966	if (usecurstateid != 0) {
5967		/* Special stateid for Current stateid. */
5968		*tl++ = txdr_unsigned(1);
5969		*tl++ = 0;
5970		*tl++ = 0;
5971		*tl++ = 0;
5972	} else {
5973		*tl++ = txdr_unsigned(stateidp->seqid);
5974		NFSCL_DEBUG(4, "layget seq=%d\n", (int)stateidp->seqid);
5975		*tl++ = stateidp->other[0];
5976		*tl++ = stateidp->other[1];
5977		*tl++ = stateidp->other[2];
5978	}
5979	*tl = txdr_unsigned(layoutlen);
5980}
5981
5982/*
5983 * Parse the reply for a successful LayoutGet operation.
5984 */
5985static int
5986nfsrv_parselayoutget(struct nfsrv_descript *nd, nfsv4stateid_t *stateidp,
5987    int *retonclosep, struct nfsclflayouthead *flhp)
5988{
5989	uint32_t *tl;
5990	struct nfsclflayout *flp, *prevflp, *tflp;
5991	int cnt, error, gotiomode, fhcnt, nfhlen, i, j;
5992	uint64_t retlen;
5993	struct nfsfh *nfhp;
5994	uint8_t *cp;
5995
5996	error = 0;
5997	flp = NULL;
5998	gotiomode = -1;
5999	NFSM_DISSECT(tl, uint32_t *, 2 * NFSX_UNSIGNED + NFSX_STATEID);
6000	if (*tl++ != 0)
6001		*retonclosep = 1;
6002	else
6003		*retonclosep = 0;
6004	stateidp->seqid = fxdr_unsigned(uint32_t, *tl++);
6005	NFSCL_DEBUG(4, "retoncls=%d stseq=%d\n", *retonclosep,
6006	    (int)stateidp->seqid);
6007	stateidp->other[0] = *tl++;
6008	stateidp->other[1] = *tl++;
6009	stateidp->other[2] = *tl++;
6010	cnt = fxdr_unsigned(int, *tl);
6011	NFSCL_DEBUG(4, "layg cnt=%d\n", cnt);
6012	if (cnt <= 0 || cnt > 10000) {
6013		/* Don't accept more than 10000 layouts in reply. */
6014		error = NFSERR_BADXDR;
6015		goto nfsmout;
6016	}
6017	for (i = 0; i < cnt; i++) {
6018		/* Dissect all the way to the file handle cnt. */
6019		NFSM_DISSECT(tl, uint32_t *, 3 * NFSX_HYPER +
6020		    6 * NFSX_UNSIGNED + NFSX_V4DEVICEID);
6021		fhcnt = fxdr_unsigned(int, *(tl + 11 +
6022		    NFSX_V4DEVICEID / NFSX_UNSIGNED));
6023		NFSCL_DEBUG(4, "fhcnt=%d\n", fhcnt);
6024		if (fhcnt < 0 || fhcnt > 100) {
6025			/* Don't accept more than 100 file handles. */
6026			error = NFSERR_BADXDR;
6027			goto nfsmout;
6028		}
6029		if (fhcnt > 1)
6030			flp = malloc(sizeof(*flp) + (fhcnt - 1) *
6031			    sizeof(struct nfsfh *), M_NFSFLAYOUT, M_WAITOK);
6032		else
6033			flp = malloc(sizeof(*flp), M_NFSFLAYOUT, M_WAITOK);
6034		flp->nfsfl_flags = 0;
6035		flp->nfsfl_fhcnt = 0;
6036		flp->nfsfl_devp = NULL;
6037		flp->nfsfl_off = fxdr_hyper(tl); tl += 2;
6038		retlen = fxdr_hyper(tl); tl += 2;
6039		if (flp->nfsfl_off + retlen < flp->nfsfl_off)
6040			flp->nfsfl_end = UINT64_MAX - flp->nfsfl_off;
6041		else
6042			flp->nfsfl_end = flp->nfsfl_off + retlen;
6043		flp->nfsfl_iomode = fxdr_unsigned(int, *tl++);
6044		if (gotiomode == -1)
6045			gotiomode = flp->nfsfl_iomode;
6046		if (fxdr_unsigned(int, *tl++) != NFSLAYOUT_NFSV4_1_FILES) {
6047			printf("NFSv4.1: got non-files layout\n");
6048			error = NFSERR_BADXDR;
6049			goto nfsmout;
6050		}
6051		NFSBCOPY(++tl, flp->nfsfl_dev, NFSX_V4DEVICEID);
6052		tl += (NFSX_V4DEVICEID / NFSX_UNSIGNED);
6053		flp->nfsfl_util = fxdr_unsigned(uint32_t, *tl++);
6054		NFSCL_DEBUG(4, "flutil=0x%x\n", flp->nfsfl_util);
6055		flp->nfsfl_stripe1 = fxdr_unsigned(uint32_t, *tl++);
6056		flp->nfsfl_patoff = fxdr_hyper(tl); tl += 2;
6057		if (fxdr_unsigned(int, *tl) != fhcnt) {
6058			printf("EEK! bad fhcnt\n");
6059			error = NFSERR_BADXDR;
6060			goto nfsmout;
6061		}
6062		for (j = 0; j < fhcnt; j++) {
6063			NFSM_DISSECT(tl, uint32_t *, NFSX_UNSIGNED);
6064			nfhlen = fxdr_unsigned(int, *tl);
6065			if (nfhlen <= 0 || nfhlen > NFSX_V4FHMAX) {
6066				error = NFSERR_BADXDR;
6067				goto nfsmout;
6068			}
6069			nfhp = malloc(sizeof(*nfhp) + nfhlen - 1, M_NFSFH,
6070			    M_WAITOK);
6071			flp->nfsfl_fh[j] = nfhp;
6072			flp->nfsfl_fhcnt++;
6073			nfhp->nfh_len = nfhlen;
6074			NFSM_DISSECT(cp, uint8_t *, NFSM_RNDUP(nfhlen));
6075			NFSBCOPY(cp, nfhp->nfh_fh, nfhlen);
6076		}
6077		if (flp->nfsfl_iomode == gotiomode) {
6078			/* Keep the list in increasing offset order. */
6079			tflp = LIST_FIRST(flhp);
6080			prevflp = NULL;
6081			while (tflp != NULL &&
6082			    tflp->nfsfl_off < flp->nfsfl_off) {
6083				prevflp = tflp;
6084				tflp = LIST_NEXT(tflp, nfsfl_list);
6085			}
6086			if (prevflp == NULL)
6087				LIST_INSERT_HEAD(flhp, flp, nfsfl_list);
6088			else
6089				LIST_INSERT_AFTER(prevflp, flp,
6090				    nfsfl_list);
6091		} else {
6092			printf("nfscl_layoutget(): got wrong iomode\n");
6093			nfscl_freeflayout(flp);
6094		}
6095		flp = NULL;
6096	}
6097nfsmout:
6098	if (error != 0 && flp != NULL)
6099		nfscl_freeflayout(flp);
6100	return (error);
6101}
6102
6103/*
6104 * Similar to nfsrpc_getlayout(), except that it uses nfsrpc_openlayget(),
6105 * so that it does both an Open and a Layoutget.
6106 */
6107static int
6108nfsrpc_getopenlayout(struct nfsmount *nmp, vnode_t vp, u_int8_t *nfhp,
6109    int fhlen, uint8_t *newfhp, int newfhlen, uint32_t mode,
6110    struct nfsclopen *op, uint8_t *name, int namelen, struct nfscldeleg **dpp,
6111    struct ucred *cred, NFSPROC_T *p)
6112{
6113	struct nfscllayout *lyp;
6114	struct nfsclflayout *flp;
6115	struct nfsclflayouthead flh;
6116	int error, islocked, layoutlen, recalled, retonclose, usecurstateid;
6117	int laystat;
6118	nfsv4stateid_t stateid;
6119	struct nfsclsession *tsep;
6120
6121	error = 0;
6122	/*
6123	 * If lyp is returned non-NULL, there will be a refcnt (shared lock)
6124	 * on it, iff flp != NULL or a lock (exclusive lock) on it iff
6125	 * flp == NULL.
6126	 */
6127	lyp = nfscl_getlayout(nmp->nm_clp, newfhp, newfhlen, 0, &flp,
6128	    &recalled);
6129	NFSCL_DEBUG(4, "nfsrpc_getopenlayout nfscl_getlayout lyp=%p\n", lyp);
6130	if (lyp == NULL)
6131		islocked = 0;
6132	else if (flp != NULL)
6133		islocked = 1;
6134	else
6135		islocked = 2;
6136	if ((lyp == NULL || flp == NULL) && recalled == 0) {
6137		LIST_INIT(&flh);
6138		tsep = nfsmnt_mdssession(nmp);
6139		layoutlen = tsep->nfsess_maxcache - (NFSX_STATEID +
6140		    3 * NFSX_UNSIGNED);
6141		if (lyp == NULL)
6142			usecurstateid = 1;
6143		else {
6144			usecurstateid = 0;
6145			stateid.seqid = lyp->nfsly_stateid.seqid;
6146			stateid.other[0] = lyp->nfsly_stateid.other[0];
6147			stateid.other[1] = lyp->nfsly_stateid.other[1];
6148			stateid.other[2] = lyp->nfsly_stateid.other[2];
6149		}
6150		error = nfsrpc_openlayoutrpc(nmp, vp, nfhp, fhlen,
6151		    newfhp, newfhlen, mode, op, name, namelen,
6152		    dpp, &stateid, usecurstateid, layoutlen,
6153		    &retonclose, &flh, &laystat, cred, p);
6154		NFSCL_DEBUG(4, "aft nfsrpc_openlayoutrpc laystat=%d err=%d\n",
6155		    laystat, error);
6156		laystat = nfsrpc_layoutgetres(nmp, vp, newfhp, newfhlen,
6157		    &stateid, retonclose, NULL, &lyp, &flh, laystat, &islocked,
6158		    cred, p);
6159	} else
6160		error = nfsrpc_openrpc(nmp, vp, nfhp, fhlen, newfhp, newfhlen,
6161		    mode, op, name, namelen, dpp, 0, 0, cred, p, 0, 0);
6162	if (islocked == 2)
6163		nfscl_rellayout(lyp, 1);
6164	else if (islocked == 1)
6165		nfscl_rellayout(lyp, 0);
6166	return (error);
6167}
6168
6169/*
6170 * This function does an Open+LayoutGet for an NFSv4.1 mount with pNFS
6171 * enabled, only for the CLAIM_NULL case.  All other NFSv4 Opens are
6172 * handled by nfsrpc_openrpc().
6173 * For the case where op == NULL, dvp is the directory.  When op != NULL, it
6174 * can be NULL.
6175 */
6176static int
6177nfsrpc_openlayoutrpc(struct nfsmount *nmp, vnode_t vp, u_int8_t *nfhp,
6178    int fhlen, uint8_t *newfhp, int newfhlen, uint32_t mode,
6179    struct nfsclopen *op, uint8_t *name, int namelen, struct nfscldeleg **dpp,
6180    nfsv4stateid_t *stateidp, int usecurstateid,
6181    int layoutlen, int *retonclosep, struct nfsclflayouthead *flhp,
6182    int *laystatp, struct ucred *cred, NFSPROC_T *p)
6183{
6184	uint32_t *tl;
6185	struct nfsrv_descript nfsd, *nd = &nfsd;
6186	struct nfscldeleg *ndp = NULL;
6187	struct nfsvattr nfsva;
6188	struct nfsclsession *tsep;
6189	uint32_t rflags, deleg;
6190	nfsattrbit_t attrbits;
6191	int error, ret, acesize, limitby, iomode;
6192
6193	*dpp = NULL;
6194	*laystatp = ENXIO;
6195	nfscl_reqstart(nd, NFSPROC_OPENLAYGET, nmp, nfhp, fhlen, NULL, NULL);
6196	NFSM_BUILD(tl, uint32_t *, 5 * NFSX_UNSIGNED);
6197	*tl++ = txdr_unsigned(op->nfso_own->nfsow_seqid);
6198	*tl++ = txdr_unsigned(mode & NFSV4OPEN_ACCESSBOTH);
6199	*tl++ = txdr_unsigned((mode >> NFSLCK_SHIFT) & NFSV4OPEN_DENYBOTH);
6200	tsep = nfsmnt_mdssession(nmp);
6201	*tl++ = tsep->nfsess_clientid.lval[0];
6202	*tl = tsep->nfsess_clientid.lval[1];
6203	nfsm_strtom(nd, op->nfso_own->nfsow_owner, NFSV4CL_LOCKNAMELEN);
6204	NFSM_BUILD(tl, uint32_t *, 2 * NFSX_UNSIGNED);
6205	*tl++ = txdr_unsigned(NFSV4OPEN_NOCREATE);
6206	*tl = txdr_unsigned(NFSV4OPEN_CLAIMNULL);
6207	nfsm_strtom(nd, name, namelen);
6208	NFSM_BUILD(tl, uint32_t *, NFSX_UNSIGNED);
6209	*tl = txdr_unsigned(NFSV4OP_GETATTR);
6210	NFSZERO_ATTRBIT(&attrbits);
6211	NFSSETBIT_ATTRBIT(&attrbits, NFSATTRBIT_CHANGE);
6212	NFSSETBIT_ATTRBIT(&attrbits, NFSATTRBIT_TIMEMODIFY);
6213	nfsrv_putattrbit(nd, &attrbits);
6214	NFSM_BUILD(tl, uint32_t *, NFSX_UNSIGNED);
6215	*tl = txdr_unsigned(NFSV4OP_LAYOUTGET);
6216	if ((mode & NFSV4OPEN_ACCESSWRITE) != 0)
6217		iomode = NFSLAYOUTIOMODE_RW;
6218	else
6219		iomode = NFSLAYOUTIOMODE_READ;
6220	nfsrv_setuplayoutget(nd, iomode, 0, UINT64_MAX, 0, stateidp,
6221	    layoutlen, usecurstateid);
6222	error = newnfs_request(nd, nmp, NULL, &nmp->nm_sockreq, vp, p, cred,
6223	    NFS_PROG, NFS_VER4, NULL, 1, NULL, NULL);
6224	if (error != 0)
6225		return (error);
6226	NFSCL_INCRSEQID(op->nfso_own->nfsow_seqid, nd);
6227	if (nd->nd_repstat != 0)
6228		*laystatp = nd->nd_repstat;
6229	if ((nd->nd_flag & ND_NOMOREDATA) == 0) {
6230		/* ND_NOMOREDATA will be set if the Open operation failed. */
6231		NFSM_DISSECT(tl, u_int32_t *, NFSX_STATEID +
6232		    6 * NFSX_UNSIGNED);
6233		op->nfso_stateid.seqid = *tl++;
6234		op->nfso_stateid.other[0] = *tl++;
6235		op->nfso_stateid.other[1] = *tl++;
6236		op->nfso_stateid.other[2] = *tl;
6237		rflags = fxdr_unsigned(u_int32_t, *(tl + 6));
6238		error = nfsrv_getattrbits(nd, &attrbits, NULL, NULL);
6239		if (error != 0)
6240			goto nfsmout;
6241		NFSM_DISSECT(tl, u_int32_t *, NFSX_UNSIGNED);
6242		deleg = fxdr_unsigned(u_int32_t, *tl);
6243		if (deleg == NFSV4OPEN_DELEGATEREAD ||
6244		    deleg == NFSV4OPEN_DELEGATEWRITE) {
6245			if (!(op->nfso_own->nfsow_clp->nfsc_flags &
6246			      NFSCLFLAGS_FIRSTDELEG))
6247				op->nfso_own->nfsow_clp->nfsc_flags |=
6248				  (NFSCLFLAGS_FIRSTDELEG | NFSCLFLAGS_GOTDELEG);
6249			ndp = malloc(sizeof(struct nfscldeleg) + newfhlen,
6250			    M_NFSCLDELEG, M_WAITOK);
6251			LIST_INIT(&ndp->nfsdl_owner);
6252			LIST_INIT(&ndp->nfsdl_lock);
6253			ndp->nfsdl_clp = op->nfso_own->nfsow_clp;
6254			ndp->nfsdl_fhlen = newfhlen;
6255			NFSBCOPY(newfhp, ndp->nfsdl_fh, newfhlen);
6256			newnfs_copyincred(cred, &ndp->nfsdl_cred);
6257			nfscl_lockinit(&ndp->nfsdl_rwlock);
6258			NFSM_DISSECT(tl, u_int32_t *, NFSX_STATEID +
6259			    NFSX_UNSIGNED);
6260			ndp->nfsdl_stateid.seqid = *tl++;
6261			ndp->nfsdl_stateid.other[0] = *tl++;
6262			ndp->nfsdl_stateid.other[1] = *tl++;
6263			ndp->nfsdl_stateid.other[2] = *tl++;
6264			ret = fxdr_unsigned(int, *tl);
6265			if (deleg == NFSV4OPEN_DELEGATEWRITE) {
6266				ndp->nfsdl_flags = NFSCLDL_WRITE;
6267				/*
6268				 * Indicates how much the file can grow.
6269				 */
6270				NFSM_DISSECT(tl, u_int32_t *,
6271				    3 * NFSX_UNSIGNED);
6272				limitby = fxdr_unsigned(int, *tl++);
6273				switch (limitby) {
6274				case NFSV4OPEN_LIMITSIZE:
6275					ndp->nfsdl_sizelimit = fxdr_hyper(tl);
6276					break;
6277				case NFSV4OPEN_LIMITBLOCKS:
6278					ndp->nfsdl_sizelimit =
6279					    fxdr_unsigned(u_int64_t, *tl++);
6280					ndp->nfsdl_sizelimit *=
6281					    fxdr_unsigned(u_int64_t, *tl);
6282					break;
6283				default:
6284					error = NFSERR_BADXDR;
6285					goto nfsmout;
6286				};
6287			} else
6288				ndp->nfsdl_flags = NFSCLDL_READ;
6289			if (ret != 0)
6290				ndp->nfsdl_flags |= NFSCLDL_RECALL;
6291			error = nfsrv_dissectace(nd, &ndp->nfsdl_ace, &ret,
6292			    &acesize, p);
6293			if (error != 0)
6294				goto nfsmout;
6295		} else if (deleg != NFSV4OPEN_DELEGATENONE) {
6296			error = NFSERR_BADXDR;
6297			goto nfsmout;
6298		}
6299		if ((rflags & NFSV4OPEN_LOCKTYPEPOSIX) != 0 ||
6300		    nfscl_assumeposixlocks)
6301			op->nfso_posixlock = 1;
6302		else
6303			op->nfso_posixlock = 0;
6304		NFSM_DISSECT(tl, u_int32_t *, 2 * NFSX_UNSIGNED);
6305		/* If the 2nd element == NFS_OK, the Getattr succeeded. */
6306		if (*++tl == 0) {
6307			error = nfsv4_loadattr(nd, NULL, &nfsva, NULL,
6308			    NULL, 0, NULL, NULL, NULL, NULL, NULL, 0,
6309			    NULL, NULL, NULL, p, cred);
6310			if (error != 0)
6311				goto nfsmout;
6312			if (ndp != NULL) {
6313				ndp->nfsdl_change = nfsva.na_filerev;
6314				ndp->nfsdl_modtime = nfsva.na_mtime;
6315				ndp->nfsdl_flags |= NFSCLDL_MODTIMESET;
6316				*dpp = ndp;
6317				ndp = NULL;
6318			}
6319			/*
6320			 * At this point, the Open has succeeded, so set
6321			 * nd_repstat = NFS_OK.  If the Layoutget failed,
6322			 * this function just won't return a layout.
6323			 */
6324			if (nd->nd_repstat == 0) {
6325				NFSM_DISSECT(tl, uint32_t *, 2 * NFSX_UNSIGNED);
6326				*laystatp = fxdr_unsigned(int, *++tl);
6327				if (*laystatp == 0) {
6328					error = nfsrv_parselayoutget(nd,
6329					    stateidp, retonclosep, flhp);
6330					if (error != 0)
6331						*laystatp = error;
6332				}
6333			} else
6334				nd->nd_repstat = 0;	/* Return 0 for Open. */
6335		}
6336	}
6337	if (nd->nd_repstat != 0 && error == 0)
6338		error = nd->nd_repstat;
6339nfsmout:
6340	free(ndp, M_NFSCLDELEG);
6341	mbuf_freem(nd->nd_mrep);
6342	return (error);
6343}
6344
6345/*
6346 * Similar nfsrpc_createv4(), but also does the LayoutGet operation.
6347 * Used only for mounts with pNFS enabled.
6348 */
6349static int
6350nfsrpc_createlayout(vnode_t dvp, char *name, int namelen, struct vattr *vap,
6351    nfsquad_t cverf, int fmode, struct nfsclowner *owp, struct nfscldeleg **dpp,
6352    struct ucred *cred, NFSPROC_T *p, struct nfsvattr *dnap,
6353    struct nfsvattr *nnap, struct nfsfh **nfhpp, int *attrflagp,
6354    int *dattrflagp, void *dstuff, int *unlockedp, nfsv4stateid_t *stateidp,
6355    int usecurstateid, int layoutlen, int *retonclosep,
6356    struct nfsclflayouthead *flhp, int *laystatp)
6357{
6358	uint32_t *tl;
6359	int error = 0, deleg, newone, ret, acesize, limitby;
6360	struct nfsrv_descript nfsd, *nd = &nfsd;
6361	struct nfsclopen *op;
6362	struct nfscldeleg *dp = NULL;
6363	struct nfsnode *np;
6364	struct nfsfh *nfhp;
6365	struct nfsclsession *tsep;
6366	nfsattrbit_t attrbits;
6367	nfsv4stateid_t stateid;
6368	uint32_t rflags;
6369	struct nfsmount *nmp;
6370
6371	nmp = VFSTONFS(dvp->v_mount);
6372	np = VTONFS(dvp);
6373	*laystatp = ENXIO;
6374	*unlockedp = 0;
6375	*nfhpp = NULL;
6376	*dpp = NULL;
6377	*attrflagp = 0;
6378	*dattrflagp = 0;
6379	if (namelen > NFS_MAXNAMLEN)
6380		return (ENAMETOOLONG);
6381	NFSCL_REQSTART(nd, NFSPROC_CREATELAYGET, dvp);
6382	/*
6383	 * For V4, this is actually an Open op.
6384	 */
6385	NFSM_BUILD(tl, u_int32_t *, 5 * NFSX_UNSIGNED);
6386	*tl++ = txdr_unsigned(owp->nfsow_seqid);
6387	*tl++ = txdr_unsigned(NFSV4OPEN_ACCESSWRITE |
6388	    NFSV4OPEN_ACCESSREAD);
6389	*tl++ = txdr_unsigned(NFSV4OPEN_DENYNONE);
6390	tsep = nfsmnt_mdssession(nmp);
6391	*tl++ = tsep->nfsess_clientid.lval[0];
6392	*tl = tsep->nfsess_clientid.lval[1];
6393	nfsm_strtom(nd, owp->nfsow_owner, NFSV4CL_LOCKNAMELEN);
6394	NFSM_BUILD(tl, u_int32_t *, 2 * NFSX_UNSIGNED);
6395	*tl++ = txdr_unsigned(NFSV4OPEN_CREATE);
6396	if ((fmode & O_EXCL) != 0) {
6397		if (NFSHASSESSPERSIST(nmp)) {
6398			/* Use GUARDED for persistent sessions. */
6399			*tl = txdr_unsigned(NFSCREATE_GUARDED);
6400			nfscl_fillsattr(nd, vap, dvp, 0, 0);
6401		} else {
6402			/* Otherwise, use EXCLUSIVE4_1. */
6403			*tl = txdr_unsigned(NFSCREATE_EXCLUSIVE41);
6404			NFSM_BUILD(tl, u_int32_t *, NFSX_VERF);
6405			*tl++ = cverf.lval[0];
6406			*tl = cverf.lval[1];
6407			nfscl_fillsattr(nd, vap, dvp, 0, 0);
6408		}
6409	} else {
6410		*tl = txdr_unsigned(NFSCREATE_UNCHECKED);
6411		nfscl_fillsattr(nd, vap, dvp, 0, 0);
6412	}
6413	NFSM_BUILD(tl, u_int32_t *, NFSX_UNSIGNED);
6414	*tl = txdr_unsigned(NFSV4OPEN_CLAIMNULL);
6415	nfsm_strtom(nd, name, namelen);
6416	/* Get the new file's handle and attributes, plus save the FH. */
6417	NFSM_BUILD(tl, u_int32_t *, 3 * NFSX_UNSIGNED);
6418	*tl++ = txdr_unsigned(NFSV4OP_SAVEFH);
6419	*tl++ = txdr_unsigned(NFSV4OP_GETFH);
6420	*tl = txdr_unsigned(NFSV4OP_GETATTR);
6421	NFSGETATTR_ATTRBIT(&attrbits);
6422	nfsrv_putattrbit(nd, &attrbits);
6423	/* Get the directory's post-op attributes. */
6424	NFSM_BUILD(tl, u_int32_t *, NFSX_UNSIGNED);
6425	*tl = txdr_unsigned(NFSV4OP_PUTFH);
6426	nfsm_fhtom(nd, np->n_fhp->nfh_fh, np->n_fhp->nfh_len, 0);
6427	NFSM_BUILD(tl, u_int32_t *, NFSX_UNSIGNED);
6428	*tl = txdr_unsigned(NFSV4OP_GETATTR);
6429	nfsrv_putattrbit(nd, &attrbits);
6430	NFSM_BUILD(tl, u_int32_t *, 2 * NFSX_UNSIGNED);
6431	*tl++ = txdr_unsigned(NFSV4OP_RESTOREFH);
6432	*tl = txdr_unsigned(NFSV4OP_LAYOUTGET);
6433	nfsrv_setuplayoutget(nd, NFSLAYOUTIOMODE_RW, 0, UINT64_MAX, 0, stateidp,
6434	    layoutlen, usecurstateid);
6435	error = nfscl_request(nd, dvp, p, cred, dstuff);
6436	if (error != 0)
6437		return (error);
6438	NFSCL_DEBUG(4, "nfsrpc_createlayout stat=%d err=%d\n", nd->nd_repstat,
6439	    error);
6440	if (nd->nd_repstat != 0)
6441		*laystatp = nd->nd_repstat;
6442	NFSCL_INCRSEQID(owp->nfsow_seqid, nd);
6443	if ((nd->nd_flag & ND_NOMOREDATA) == 0) {
6444		NFSCL_DEBUG(4, "nfsrpc_createlayout open succeeded\n");
6445		NFSM_DISSECT(tl, u_int32_t *, NFSX_STATEID +
6446		    6 * NFSX_UNSIGNED);
6447		stateid.seqid = *tl++;
6448		stateid.other[0] = *tl++;
6449		stateid.other[1] = *tl++;
6450		stateid.other[2] = *tl;
6451		rflags = fxdr_unsigned(u_int32_t, *(tl + 6));
6452		nfsrv_getattrbits(nd, &attrbits, NULL, NULL);
6453		NFSM_DISSECT(tl, u_int32_t *, NFSX_UNSIGNED);
6454		deleg = fxdr_unsigned(int, *tl);
6455		if (deleg == NFSV4OPEN_DELEGATEREAD ||
6456		    deleg == NFSV4OPEN_DELEGATEWRITE) {
6457			if (!(owp->nfsow_clp->nfsc_flags &
6458			      NFSCLFLAGS_FIRSTDELEG))
6459				owp->nfsow_clp->nfsc_flags |=
6460				  (NFSCLFLAGS_FIRSTDELEG | NFSCLFLAGS_GOTDELEG);
6461			dp = malloc(sizeof(struct nfscldeleg) + NFSX_V4FHMAX,
6462			    M_NFSCLDELEG, M_WAITOK);
6463			LIST_INIT(&dp->nfsdl_owner);
6464			LIST_INIT(&dp->nfsdl_lock);
6465			dp->nfsdl_clp = owp->nfsow_clp;
6466			newnfs_copyincred(cred, &dp->nfsdl_cred);
6467			nfscl_lockinit(&dp->nfsdl_rwlock);
6468			NFSM_DISSECT(tl, u_int32_t *, NFSX_STATEID +
6469			    NFSX_UNSIGNED);
6470			dp->nfsdl_stateid.seqid = *tl++;
6471			dp->nfsdl_stateid.other[0] = *tl++;
6472			dp->nfsdl_stateid.other[1] = *tl++;
6473			dp->nfsdl_stateid.other[2] = *tl++;
6474			ret = fxdr_unsigned(int, *tl);
6475			if (deleg == NFSV4OPEN_DELEGATEWRITE) {
6476				dp->nfsdl_flags = NFSCLDL_WRITE;
6477				/*
6478				 * Indicates how much the file can grow.
6479				 */
6480				NFSM_DISSECT(tl, u_int32_t *,
6481				    3 * NFSX_UNSIGNED);
6482				limitby = fxdr_unsigned(int, *tl++);
6483				switch (limitby) {
6484				case NFSV4OPEN_LIMITSIZE:
6485					dp->nfsdl_sizelimit = fxdr_hyper(tl);
6486					break;
6487				case NFSV4OPEN_LIMITBLOCKS:
6488					dp->nfsdl_sizelimit =
6489					    fxdr_unsigned(u_int64_t, *tl++);
6490					dp->nfsdl_sizelimit *=
6491					    fxdr_unsigned(u_int64_t, *tl);
6492					break;
6493				default:
6494					error = NFSERR_BADXDR;
6495					goto nfsmout;
6496				};
6497			} else {
6498				dp->nfsdl_flags = NFSCLDL_READ;
6499			}
6500			if (ret != 0)
6501				dp->nfsdl_flags |= NFSCLDL_RECALL;
6502			error = nfsrv_dissectace(nd, &dp->nfsdl_ace, &ret,
6503			    &acesize, p);
6504			if (error != 0)
6505				goto nfsmout;
6506		} else if (deleg != NFSV4OPEN_DELEGATENONE) {
6507			error = NFSERR_BADXDR;
6508			goto nfsmout;
6509		}
6510
6511		/* Now, we should have the status for the SaveFH. */
6512		NFSM_DISSECT(tl, uint32_t *, 2 * NFSX_UNSIGNED);
6513		if (*++tl == 0) {
6514			NFSCL_DEBUG(4, "nfsrpc_createlayout SaveFH ok\n");
6515			/*
6516			 * Now, process the GetFH and Getattr for the newly
6517			 * created file. nfscl_mtofh() will set
6518			 * ND_NOMOREDATA if these weren't successful.
6519			 */
6520			error = nfscl_mtofh(nd, nfhpp, nnap, attrflagp);
6521			NFSCL_DEBUG(4, "aft nfscl_mtofh err=%d\n", error);
6522			if (error != 0)
6523				goto nfsmout;
6524		} else
6525			nd->nd_flag |= ND_NOMOREDATA;
6526		/* Now we have the PutFH and Getattr for the directory. */
6527		if ((nd->nd_flag & ND_NOMOREDATA) == 0) {
6528			NFSM_DISSECT(tl, uint32_t *, 2 * NFSX_UNSIGNED);
6529			if (*++tl != 0)
6530				nd->nd_flag |= ND_NOMOREDATA;
6531			else {
6532				NFSM_DISSECT(tl, uint32_t *, 2 *
6533				    NFSX_UNSIGNED);
6534				if (*++tl != 0)
6535					nd->nd_flag |= ND_NOMOREDATA;
6536			}
6537		}
6538		if ((nd->nd_flag & ND_NOMOREDATA) == 0) {
6539			/* Load the directory attributes. */
6540			error = nfsm_loadattr(nd, dnap);
6541			NFSCL_DEBUG(4, "aft nfsm_loadattr err=%d\n", error);
6542			if (error != 0)
6543				goto nfsmout;
6544			*dattrflagp = 1;
6545			if (dp != NULL && *attrflagp != 0) {
6546				dp->nfsdl_change = nnap->na_filerev;
6547				dp->nfsdl_modtime = nnap->na_mtime;
6548				dp->nfsdl_flags |= NFSCLDL_MODTIMESET;
6549			}
6550			/*
6551			 * We can now complete the Open state.
6552			 */
6553			nfhp = *nfhpp;
6554			if (dp != NULL) {
6555				dp->nfsdl_fhlen = nfhp->nfh_len;
6556				NFSBCOPY(nfhp->nfh_fh, dp->nfsdl_fh,
6557				    nfhp->nfh_len);
6558			}
6559			/*
6560			 * Get an Open structure that will be
6561			 * attached to the OpenOwner, acquired already.
6562			 */
6563			error = nfscl_open(dvp, nfhp->nfh_fh, nfhp->nfh_len,
6564			    (NFSV4OPEN_ACCESSWRITE | NFSV4OPEN_ACCESSREAD), 0,
6565			    cred, p, NULL, &op, &newone, NULL, 0);
6566			if (error != 0)
6567				goto nfsmout;
6568			op->nfso_stateid = stateid;
6569			newnfs_copyincred(cred, &op->nfso_cred);
6570
6571			nfscl_openrelease(nmp, op, error, newone);
6572			*unlockedp = 1;
6573
6574			/* Now, handle the RestoreFH and LayoutGet. */
6575			if (nd->nd_repstat == 0) {
6576				NFSM_DISSECT(tl, uint32_t *, 4 * NFSX_UNSIGNED);
6577				*laystatp = fxdr_unsigned(int, *(tl + 3));
6578				if (*laystatp == 0) {
6579					error = nfsrv_parselayoutget(nd,
6580					    stateidp, retonclosep, flhp);
6581					if (error != 0)
6582						*laystatp = error;
6583				}
6584				NFSCL_DEBUG(4, "aft nfsrv_parselayout err=%d\n",
6585				    error);
6586			} else
6587				nd->nd_repstat = 0;
6588		}
6589	}
6590	if (nd->nd_repstat != 0 && error == 0)
6591		error = nd->nd_repstat;
6592	if (error == NFSERR_STALECLIENTID || error == NFSERR_BADSESSION)
6593		nfscl_initiate_recovery(owp->nfsow_clp);
6594nfsmout:
6595	NFSCL_DEBUG(4, "eo nfsrpc_createlayout err=%d\n", error);
6596	if (error == 0)
6597		*dpp = dp;
6598	else
6599		free(dp, M_NFSCLDELEG);
6600	mbuf_freem(nd->nd_mrep);
6601	return (error);
6602}
6603
6604/*
6605 * Similar to nfsrpc_getopenlayout(), except that it used for the Create case.
6606 */
6607static int
6608nfsrpc_getcreatelayout(vnode_t dvp, char *name, int namelen, struct vattr *vap,
6609    nfsquad_t cverf, int fmode, struct nfsclowner *owp, struct nfscldeleg **dpp,
6610    struct ucred *cred, NFSPROC_T *p, struct nfsvattr *dnap,
6611    struct nfsvattr *nnap, struct nfsfh **nfhpp, int *attrflagp,
6612    int *dattrflagp, void *dstuff, int *unlockedp)
6613{
6614	struct nfscllayout *lyp;
6615	struct nfsclflayouthead flh;
6616	struct nfsfh *nfhp;
6617	struct nfsclsession *tsep;
6618	struct nfsmount *nmp;
6619	nfsv4stateid_t stateid;
6620	int error, layoutlen, retonclose, laystat;
6621
6622	error = 0;
6623	nmp = VFSTONFS(dvp->v_mount);
6624	LIST_INIT(&flh);
6625	tsep = nfsmnt_mdssession(nmp);
6626	layoutlen = tsep->nfsess_maxcache - (NFSX_STATEID + 3 * NFSX_UNSIGNED);
6627	error = nfsrpc_createlayout(dvp, name, namelen, vap, cverf, fmode,
6628	    owp, dpp, cred, p, dnap, nnap, nfhpp, attrflagp, dattrflagp,
6629	    dstuff, unlockedp, &stateid, 1, layoutlen, &retonclose, &flh,
6630	    &laystat);
6631	NFSCL_DEBUG(4, "aft nfsrpc_createlayoutrpc laystat=%d err=%d\n",
6632	    laystat, error);
6633	lyp = NULL;
6634	if (laystat == 0) {
6635		nfhp = *nfhpp;
6636		laystat = nfsrpc_layoutgetres(nmp, dvp, nfhp->nfh_fh,
6637		    nfhp->nfh_len, &stateid, retonclose, NULL, &lyp, &flh,
6638		    laystat, NULL, cred, p);
6639	} else
6640		laystat = nfsrpc_layoutgetres(nmp, dvp, NULL, 0, &stateid,
6641		    retonclose, NULL, &lyp, &flh, laystat, NULL, cred, p);
6642	if (laystat == 0)
6643		nfscl_rellayout(lyp, 0);
6644	return (error);
6645}
6646
6647/*
6648 * Process the results of a layoutget() operation.
6649 */
6650static int
6651nfsrpc_layoutgetres(struct nfsmount *nmp, vnode_t vp, uint8_t *newfhp,
6652    int newfhlen, nfsv4stateid_t *stateidp, int retonclose, uint32_t *notifybit,
6653    struct nfscllayout **lypp, struct nfsclflayouthead *flhp,
6654    int laystat, int *islockedp, struct ucred *cred, NFSPROC_T *p)
6655{
6656	struct nfsclflayout *tflp;
6657	struct nfscldevinfo *dip;
6658
6659	if (laystat == NFSERR_UNKNLAYOUTTYPE) {
6660		/* Disable PNFS. */
6661		NFSCL_DEBUG(1, "disable PNFS\n");
6662		NFSLOCKMNT(nmp);
6663		nmp->nm_state &= ~NFSSTA_PNFS;
6664		NFSUNLOCKMNT(nmp);
6665	}
6666	if (laystat == 0) {
6667		NFSCL_DEBUG(4, "nfsrpc_layoutgetres at FOREACH\n");
6668		LIST_FOREACH(tflp, flhp, nfsfl_list) {
6669			laystat = nfscl_adddevinfo(nmp, NULL, tflp);
6670			NFSCL_DEBUG(4, "aft adddev=%d\n", laystat);
6671			if (laystat != 0) {
6672				laystat = nfsrpc_getdeviceinfo(nmp,
6673				    tflp->nfsfl_dev, NFSLAYOUT_NFSV4_1_FILES,
6674				    notifybit, &dip, cred, p);
6675				NFSCL_DEBUG(4, "aft nfsrpc_gdi=%d\n",
6676				    laystat);
6677				if (laystat != 0)
6678					break;
6679				laystat = nfscl_adddevinfo(nmp, dip, tflp);
6680				if (laystat != 0)
6681					printf("getlayout: cannot add\n");
6682			}
6683		}
6684	}
6685	if (laystat == 0) {
6686		/*
6687		 * nfscl_layout() always returns with the nfsly_lock
6688		 * set to a refcnt (shared lock).
6689		 * Passing in dvp is sufficient, since it is only used to
6690		 * get the fsid for the file system.
6691		 */
6692		laystat = nfscl_layout(nmp, vp, newfhp, newfhlen, stateidp,
6693		    retonclose, flhp, lypp, cred, p);
6694		NFSCL_DEBUG(4, "nfsrpc_layoutgetres: aft nfscl_layout=%d\n",
6695		    laystat);
6696		if (laystat == 0 && islockedp != NULL)
6697			*islockedp = 1;
6698	}
6699	return (laystat);
6700}
6701
6702