1/*-
2 * SPDX-License-Identifier: BSD-3-Clause
3 *
4 * Copyright (c) 1989, 1993
5 *	The Regents of the University of California.  All rights reserved.
6 *
7 * This code is derived from software contributed to Berkeley by
8 * Rick Macklem at The University of Guelph.
9 *
10 * Redistribution and use in source and binary forms, with or without
11 * modification, are permitted provided that the following conditions
12 * are met:
13 * 1. Redistributions of source code must retain the above copyright
14 *    notice, this list of conditions and the following disclaimer.
15 * 2. Redistributions in binary form must reproduce the above copyright
16 *    notice, this list of conditions and the following disclaimer in the
17 *    documentation and/or other materials provided with the distribution.
18 * 3. Neither the name of the University nor the names of its contributors
19 *    may be used to endorse or promote products derived from this software
20 *    without specific prior written permission.
21 *
22 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
23 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
24 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
25 * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
26 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
27 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
28 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
29 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
30 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
31 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
32 * SUCH DAMAGE.
33 *
34 */
35
36#include <sys/cdefs.h>
37/*
38 * Rpc op calls, generally called from the vnode op calls or through the
39 * buffer cache, for NFS v2, 3 and 4.
40 * These do not normally make any changes to vnode arguments or use
41 * structures that might change between the VFS variants. The returned
42 * arguments are all at the end, after the NFSPROC_T *p one.
43 */
44
45#include "opt_inet6.h"
46
47#include <fs/nfs/nfsport.h>
48#include <fs/nfsclient/nfs.h>
49#include <sys/extattr.h>
50#include <sys/sysctl.h>
51#include <sys/taskqueue.h>
52
53SYSCTL_DECL(_vfs_nfs);
54
55static int	nfsignore_eexist = 0;
56SYSCTL_INT(_vfs_nfs, OID_AUTO, ignore_eexist, CTLFLAG_RW,
57    &nfsignore_eexist, 0, "NFS ignore EEXIST replies for mkdir/symlink");
58
59static int	nfscl_dssameconn = 0;
60SYSCTL_INT(_vfs_nfs, OID_AUTO, dssameconn, CTLFLAG_RW,
61    &nfscl_dssameconn, 0, "Use same TCP connection to multiple DSs");
62
63static uint64_t nfs_maxcopyrange = SSIZE_MAX;
64SYSCTL_U64(_vfs_nfs, OID_AUTO, maxcopyrange, CTLFLAG_RW,
65    &nfs_maxcopyrange, 0, "Max size of a Copy so RPC times reasonable");
66
67/*
68 * Global variables
69 */
70extern struct nfsstatsv1 nfsstatsv1;
71extern int nfs_numnfscbd;
72extern struct timeval nfsboottime;
73extern u_int32_t newnfs_false, newnfs_true;
74extern nfstype nfsv34_type[9];
75extern int nfsrv_useacl;
76extern char nfsv4_callbackaddr[INET6_ADDRSTRLEN];
77extern int nfscl_debuglevel;
78extern int nfs_pnfsiothreads;
79extern u_long sb_max_adj;
80NFSCLSTATEMUTEX;
81int nfstest_outofseq = 0;
82int nfscl_assumeposixlocks = 1;
83int nfscl_enablecallb = 0;
84short nfsv4_cbport = NFSV4_CBPORT;
85int nfstest_openallsetattr = 0;
86
87#define	DIRHDSIZ	offsetof(struct dirent, d_name)
88
89/*
90 * nfscl_getsameserver() can return one of three values:
91 * NFSDSP_USETHISSESSION - Use this session for the DS.
92 * NFSDSP_SEQTHISSESSION - Use the nfsclds_sequence field of this dsp for new
93 *     session.
94 * NFSDSP_NOTFOUND - No matching server was found.
95 */
96enum nfsclds_state {
97	NFSDSP_USETHISSESSION = 0,
98	NFSDSP_SEQTHISSESSION = 1,
99	NFSDSP_NOTFOUND = 2,
100};
101
102/*
103 * Do a write RPC on a DS data file, using this structure for the arguments,
104 * so that this function can be executed by a separate kernel process.
105 */
106struct nfsclwritedsdorpc {
107	int			done;
108	int			inprog;
109	struct task		tsk;
110	struct vnode		*vp;
111	int			iomode;
112	int			must_commit;
113	nfsv4stateid_t		*stateidp;
114	struct nfsclds		*dsp;
115	uint64_t		off;
116	int			len;
117#ifdef notyet
118	int			advise;
119#endif
120	struct nfsfh		*fhp;
121	struct mbuf		*m;
122	int			vers;
123	int			minorvers;
124	struct ucred		*cred;
125	NFSPROC_T		*p;
126	int			err;
127};
128
129static int nfsrpc_setattrrpc(vnode_t , struct vattr *, nfsv4stateid_t *,
130    struct ucred *, NFSPROC_T *, struct nfsvattr *, int *);
131static int nfsrpc_readrpc(vnode_t , struct uio *, struct ucred *,
132    nfsv4stateid_t *, NFSPROC_T *, struct nfsvattr *, int *);
133static int nfsrpc_writerpc(vnode_t , struct uio *, int *, int *,
134    struct ucred *, nfsv4stateid_t *, NFSPROC_T *, struct nfsvattr *, int *,
135    int);
136static int nfsrpc_deallocaterpc(vnode_t, off_t, off_t, nfsv4stateid_t *,
137    struct nfsvattr *, int *, struct ucred *, NFSPROC_T *);
138static int nfsrpc_createv23(vnode_t , char *, int, struct vattr *,
139    nfsquad_t, int, struct ucred *, NFSPROC_T *, struct nfsvattr *,
140    struct nfsvattr *, struct nfsfh **, int *, int *);
141static int nfsrpc_createv4(vnode_t , char *, int, struct vattr *,
142    nfsquad_t, int, struct nfsclowner *, struct nfscldeleg **, struct ucred *,
143    NFSPROC_T *, struct nfsvattr *, struct nfsvattr *, struct nfsfh **, int *,
144    int *, int *);
145static int nfsrpc_locku(struct nfsrv_descript *, struct nfsmount *,
146    struct nfscllockowner *, u_int64_t, u_int64_t,
147    u_int32_t, struct ucred *, NFSPROC_T *, int);
148static int nfsrpc_setaclrpc(vnode_t, struct ucred *, NFSPROC_T *,
149    struct acl *, nfsv4stateid_t *);
150static int nfsrpc_layouterror(struct nfsmount *, uint8_t *, int, uint64_t,
151    uint64_t, nfsv4stateid_t *, struct ucred *, NFSPROC_T *, uint32_t,
152    uint32_t, char *);
153static int nfsrpc_getlayout(struct nfsmount *, vnode_t, struct nfsfh *, int,
154    uint32_t, uint32_t *, nfsv4stateid_t *, uint64_t, struct nfscllayout **,
155    struct ucred *, NFSPROC_T *);
156static int nfsrpc_fillsa(struct nfsmount *, struct sockaddr_in *,
157    struct sockaddr_in6 *, sa_family_t, int, int, struct nfsclds **,
158    NFSPROC_T *);
159static void nfscl_initsessionslots(struct nfsclsession *);
160static int nfscl_doflayoutio(vnode_t, struct uio *, int *, int *, int *,
161    nfsv4stateid_t *, int, struct nfscldevinfo *, struct nfscllayout *,
162    struct nfsclflayout *, uint64_t, uint64_t, int, struct ucred *,
163    NFSPROC_T *);
164static int nfscl_dofflayoutio(vnode_t, struct uio *, int *, int *, int *,
165    nfsv4stateid_t *, int, struct nfscldevinfo *, struct nfscllayout *,
166    struct nfsclflayout *, uint64_t, uint64_t, int, int, struct mbuf *,
167    struct nfsclwritedsdorpc *, struct ucred *, NFSPROC_T *);
168static int nfsrpc_readds(vnode_t, struct uio *, nfsv4stateid_t *, int *,
169    struct nfsclds *, uint64_t, int, struct nfsfh *, int, int, int,
170    struct ucred *, NFSPROC_T *);
171static int nfsrpc_writeds(vnode_t, struct uio *, int *, int *,
172    nfsv4stateid_t *, struct nfsclds *, uint64_t, int,
173    struct nfsfh *, int, int, int, int, struct ucred *, NFSPROC_T *);
174static int nfsio_writedsmir(vnode_t, int *, int *, nfsv4stateid_t *,
175    struct nfsclds *, uint64_t, int, struct nfsfh *, struct mbuf *, int, int,
176    struct nfsclwritedsdorpc *, struct ucred *, NFSPROC_T *);
177static int nfsrpc_writedsmir(vnode_t, int *, int *, nfsv4stateid_t *,
178    struct nfsclds *, uint64_t, int, struct nfsfh *, struct mbuf *, int, int,
179    struct ucred *, NFSPROC_T *);
180static enum nfsclds_state nfscl_getsameserver(struct nfsmount *,
181    struct nfsclds *, struct nfsclds **, uint32_t *);
182static int nfsio_commitds(vnode_t, uint64_t, int, struct nfsclds *,
183    struct nfsfh *, int, int, struct nfsclwritedsdorpc *, struct ucred *,
184    NFSPROC_T *);
185static int nfsrpc_commitds(vnode_t, uint64_t, int, struct nfsclds *,
186    struct nfsfh *, int, int, struct ucred *, NFSPROC_T *);
187#ifdef notyet
188static int nfsio_adviseds(vnode_t, uint64_t, int, int, struct nfsclds *,
189    struct nfsfh *, int, int, struct nfsclwritedsdorpc *, struct ucred *,
190    NFSPROC_T *);
191static int nfsrpc_adviseds(vnode_t, uint64_t, int, int, struct nfsclds *,
192    struct nfsfh *, int, int, struct ucred *, NFSPROC_T *);
193#endif
194static int nfsrpc_allocaterpc(vnode_t, off_t, off_t, nfsv4stateid_t *,
195    struct nfsvattr *, int *, struct ucred *, NFSPROC_T *);
196static void nfsrv_setuplayoutget(struct nfsrv_descript *, int, uint64_t,
197    uint64_t, uint64_t, nfsv4stateid_t *, int, int, int);
198static int nfsrv_parseug(struct nfsrv_descript *, int, uid_t *, gid_t *,
199    NFSPROC_T *);
200static int nfsrv_parselayoutget(struct nfsmount *, struct nfsrv_descript *,
201    nfsv4stateid_t *, int *, struct nfsclflayouthead *);
202static int nfsrpc_getopenlayout(struct nfsmount *, vnode_t, u_int8_t *,
203    int, uint8_t *, int, uint32_t, struct nfsclopen *, uint8_t *, int,
204    struct nfscldeleg **, struct ucred *, NFSPROC_T *);
205static int nfsrpc_getcreatelayout(vnode_t, char *, int, struct vattr *,
206    nfsquad_t, int, struct nfsclowner *, struct nfscldeleg **,
207    struct ucred *, NFSPROC_T *, struct nfsvattr *, struct nfsvattr *,
208    struct nfsfh **, int *, int *, int *);
209static int nfsrpc_openlayoutrpc(struct nfsmount *, vnode_t, u_int8_t *,
210    int, uint8_t *, int, uint32_t, struct nfsclopen *, uint8_t *, int,
211    struct nfscldeleg **, nfsv4stateid_t *, int, int, int, int *,
212    struct nfsclflayouthead *, int *, struct ucred *, NFSPROC_T *);
213static int nfsrpc_createlayout(vnode_t, char *, int, struct vattr *,
214    nfsquad_t, int, struct nfsclowner *, struct nfscldeleg **,
215    struct ucred *, NFSPROC_T *, struct nfsvattr *, struct nfsvattr *,
216    struct nfsfh **, int *, int *, int *, nfsv4stateid_t *,
217    int, int, int, int *, struct nfsclflayouthead *, int *);
218static int nfsrpc_layoutget(struct nfsmount *, uint8_t *, int, int, uint64_t,
219    uint64_t, uint64_t, int, int, nfsv4stateid_t *, int *,
220    struct nfsclflayouthead *, struct ucred *, NFSPROC_T *);
221static int nfsrpc_layoutgetres(struct nfsmount *, vnode_t, uint8_t *,
222    int, nfsv4stateid_t *, int, uint32_t *, struct nfscllayout **,
223    struct nfsclflayouthead *, int, int, int *, struct ucred *, NFSPROC_T *);
224static int nfsrpc_copyrpc(vnode_t, off_t, vnode_t, off_t, size_t *,
225    nfsv4stateid_t *, nfsv4stateid_t *, struct nfsvattr *, int *,
226    struct nfsvattr *, int *, bool, int *, struct ucred *, NFSPROC_T *);
227static int nfsrpc_seekrpc(vnode_t, off_t *, nfsv4stateid_t *, bool *,
228    int, struct nfsvattr *, int *, struct ucred *);
229static struct mbuf *nfsm_split(struct mbuf *, uint64_t);
230static void nfscl_statfs(struct vnode *, struct ucred *, NFSPROC_T *);
231
232int nfs_pnfsio(task_fn_t *, void *);
233
234/*
235 * nfs null call from vfs.
236 */
237int
238nfsrpc_null(vnode_t vp, struct ucred *cred, NFSPROC_T *p)
239{
240	int error;
241	struct nfsrv_descript nfsd, *nd = &nfsd;
242
243	NFSCL_REQSTART(nd, NFSPROC_NULL, vp, NULL);
244	error = nfscl_request(nd, vp, p, cred);
245	if (nd->nd_repstat && !error)
246		error = nd->nd_repstat;
247	m_freem(nd->nd_mrep);
248	return (error);
249}
250
251/*
252 * nfs access rpc op.
253 * For nfs version 3 and 4, use the access rpc to check accessibility. If file
254 * modes are changed on the server, accesses might still fail later.
255 */
256int
257nfsrpc_access(vnode_t vp, int acmode, struct ucred *cred,
258    NFSPROC_T *p, struct nfsvattr *nap, int *attrflagp)
259{
260	int error;
261	u_int32_t mode, rmode;
262
263	if (acmode & VREAD)
264		mode = NFSACCESS_READ;
265	else
266		mode = 0;
267	if (vp->v_type == VDIR) {
268		if (acmode & VWRITE)
269			mode |= (NFSACCESS_MODIFY | NFSACCESS_EXTEND |
270				 NFSACCESS_DELETE);
271		if (acmode & VEXEC)
272			mode |= NFSACCESS_LOOKUP;
273	} else {
274		if (acmode & VWRITE)
275			mode |= (NFSACCESS_MODIFY | NFSACCESS_EXTEND);
276		if (acmode & VEXEC)
277			mode |= NFSACCESS_EXECUTE;
278	}
279
280	/*
281	 * Now, just call nfsrpc_accessrpc() to do the actual RPC.
282	 */
283	error = nfsrpc_accessrpc(vp, mode, cred, p, nap, attrflagp, &rmode);
284
285	/*
286	 * The NFS V3 spec does not clarify whether or not
287	 * the returned access bits can be a superset of
288	 * the ones requested, so...
289	 */
290	if (!error && (rmode & mode) != mode)
291		error = EACCES;
292	return (error);
293}
294
295/*
296 * The actual rpc, separated out for Darwin.
297 */
298int
299nfsrpc_accessrpc(vnode_t vp, u_int32_t mode, struct ucred *cred,
300    NFSPROC_T *p, struct nfsvattr *nap, int *attrflagp, u_int32_t *rmodep)
301{
302	u_int32_t *tl;
303	u_int32_t supported, rmode;
304	int error;
305	struct nfsrv_descript nfsd, *nd = &nfsd;
306	nfsattrbit_t attrbits;
307	struct nfsmount *nmp;
308	struct nfsnode *np;
309
310	*attrflagp = 0;
311	supported = mode;
312	nmp = VFSTONFS(vp->v_mount);
313	np = VTONFS(vp);
314	if ((nmp->nm_privflag & NFSMNTP_FAKEROOTFH) != 0 &&
315	    nmp->nm_fhsize == 0) {
316		/* Attempt to get the actual root file handle. */
317		error = nfsrpc_getdirpath(nmp, NFSMNT_DIRPATH(nmp), cred, p);
318		if (error != 0)
319			return (EACCES);
320		if (np->n_fhp->nfh_len == NFSX_FHMAX + 1)
321			nfscl_statfs(vp, cred, p);
322	}
323	NFSCL_REQSTART(nd, NFSPROC_ACCESS, vp, cred);
324	NFSM_BUILD(tl, u_int32_t *, NFSX_UNSIGNED);
325	*tl = txdr_unsigned(mode);
326	if (nd->nd_flag & ND_NFSV4) {
327		/*
328		 * And do a Getattr op.
329		 */
330		NFSM_BUILD(tl, u_int32_t *, NFSX_UNSIGNED);
331		*tl = txdr_unsigned(NFSV4OP_GETATTR);
332		NFSGETATTR_ATTRBIT(&attrbits);
333		(void) nfsrv_putattrbit(nd, &attrbits);
334	}
335	error = nfscl_request(nd, vp, p, cred);
336	if (error)
337		return (error);
338	if (nd->nd_flag & ND_NFSV3) {
339		error = nfscl_postop_attr(nd, nap, attrflagp);
340		if (error)
341			goto nfsmout;
342	}
343	if (!nd->nd_repstat) {
344		if (nd->nd_flag & ND_NFSV4) {
345			NFSM_DISSECT(tl, u_int32_t *, 2 * NFSX_UNSIGNED);
346			supported = fxdr_unsigned(u_int32_t, *tl++);
347		} else {
348			NFSM_DISSECT(tl, u_int32_t *, NFSX_UNSIGNED);
349		}
350		rmode = fxdr_unsigned(u_int32_t, *tl);
351		if (nd->nd_flag & ND_NFSV4)
352			error = nfscl_postop_attr(nd, nap, attrflagp);
353
354		/*
355		 * It's not obvious what should be done about
356		 * unsupported access modes. For now, be paranoid
357		 * and clear the unsupported ones.
358		 */
359		rmode &= supported;
360		*rmodep = rmode;
361	} else
362		error = nd->nd_repstat;
363nfsmout:
364	m_freem(nd->nd_mrep);
365	return (error);
366}
367
368/*
369 * nfs open rpc
370 */
371int
372nfsrpc_open(vnode_t vp, int amode, struct ucred *cred, NFSPROC_T *p)
373{
374	struct nfsclopen *op;
375	struct nfscldeleg *dp;
376	struct nfsfh *nfhp;
377	struct nfsnode *np = VTONFS(vp);
378	struct nfsmount *nmp = VFSTONFS(vp->v_mount);
379	u_int32_t mode, clidrev;
380	int ret, newone, error, expireret = 0, retrycnt;
381
382	/*
383	 * For NFSv4, Open Ops are only done on Regular Files.
384	 */
385	if (vp->v_type != VREG)
386		return (0);
387	mode = 0;
388	if (amode & FREAD)
389		mode |= NFSV4OPEN_ACCESSREAD;
390	if (amode & FWRITE)
391		mode |= NFSV4OPEN_ACCESSWRITE;
392	nfhp = np->n_fhp;
393
394	retrycnt = 0;
395	do {
396	    dp = NULL;
397	    error = nfscl_open(vp, nfhp->nfh_fh, nfhp->nfh_len, mode, 1,
398		cred, p, NULL, &op, &newone, &ret, 1, true);
399	    if (error) {
400		return (error);
401	    }
402	    if (nmp->nm_clp != NULL)
403		clidrev = nmp->nm_clp->nfsc_clientidrev;
404	    else
405		clidrev = 0;
406	    if (ret == NFSCLOPEN_DOOPEN) {
407		if (np->n_v4 != NULL) {
408			/*
409			 * For the first attempt, try and get a layout, if
410			 * pNFS is enabled for the mount.
411			 */
412			if (!NFSHASPNFS(nmp) || nfscl_enablecallb == 0 ||
413			    nfs_numnfscbd == 0 ||
414			    (np->n_flag & NNOLAYOUT) != 0 || retrycnt > 0)
415				error = nfsrpc_openrpc(nmp, vp,
416				    np->n_v4->n4_data,
417				    np->n_v4->n4_fhlen, np->n_fhp->nfh_fh,
418				    np->n_fhp->nfh_len, mode, op,
419				    NFS4NODENAME(np->n_v4),
420				    np->n_v4->n4_namelen,
421				    &dp, 0, 0x0, cred, p, 0, 0);
422			else
423				error = nfsrpc_getopenlayout(nmp, vp,
424				    np->n_v4->n4_data,
425				    np->n_v4->n4_fhlen, np->n_fhp->nfh_fh,
426				    np->n_fhp->nfh_len, mode, op,
427				    NFS4NODENAME(np->n_v4),
428				    np->n_v4->n4_namelen, &dp, cred, p);
429			if (dp != NULL) {
430				NFSLOCKNODE(np);
431				np->n_flag &= ~NDELEGMOD;
432				/*
433				 * Invalidate the attribute cache, so that
434				 * attributes that pre-date the issue of a
435				 * delegation are not cached, since the
436				 * cached attributes will remain valid while
437				 * the delegation is held.
438				 */
439				NFSINVALATTRCACHE(np);
440				NFSUNLOCKNODE(np);
441				(void) nfscl_deleg(nmp->nm_mountp,
442				    op->nfso_own->nfsow_clp,
443				    nfhp->nfh_fh, nfhp->nfh_len, cred, p, &dp);
444			}
445		} else if (NFSHASNFSV4N(nmp)) {
446			/*
447			 * For the first attempt, try and get a layout, if
448			 * pNFS is enabled for the mount.
449			 */
450			if (!NFSHASPNFS(nmp) || nfscl_enablecallb == 0 ||
451			    nfs_numnfscbd == 0 ||
452			    (np->n_flag & NNOLAYOUT) != 0 || retrycnt > 0)
453				error = nfsrpc_openrpc(nmp, vp, nfhp->nfh_fh,
454				    nfhp->nfh_len, nfhp->nfh_fh, nfhp->nfh_len,
455				    mode, op, NULL, 0, &dp, 0, 0x0, cred, p, 0,
456				    0);
457			else
458				error = nfsrpc_getopenlayout(nmp, vp,
459				    nfhp->nfh_fh, nfhp->nfh_len, nfhp->nfh_fh,
460				    nfhp->nfh_len, mode, op, NULL, 0, &dp,
461				    cred, p);
462			if (dp != NULL) {
463				NFSLOCKNODE(np);
464				np->n_flag &= ~NDELEGMOD;
465				/*
466				 * Invalidate the attribute cache, so that
467				 * attributes that pre-date the issue of a
468				 * delegation are not cached, since the
469				 * cached attributes will remain valid while
470				 * the delegation is held.
471				 */
472				NFSINVALATTRCACHE(np);
473				NFSUNLOCKNODE(np);
474				(void) nfscl_deleg(nmp->nm_mountp,
475				    op->nfso_own->nfsow_clp,
476				    nfhp->nfh_fh, nfhp->nfh_len, cred, p, &dp);
477			}
478		} else {
479			error = EIO;
480		}
481		newnfs_copyincred(cred, &op->nfso_cred);
482	    } else if (ret == NFSCLOPEN_SETCRED)
483		/*
484		 * This is a new local open on a delegation. It needs
485		 * to have credentials so that an open can be done
486		 * against the server during recovery.
487		 */
488		newnfs_copyincred(cred, &op->nfso_cred);
489
490	    /*
491	     * nfso_opencnt is the count of how many VOP_OPEN()s have
492	     * been done on this Open successfully and a VOP_CLOSE()
493	     * is expected for each of these.
494	     * If error is non-zero, don't increment it, since the Open
495	     * hasn't succeeded yet.
496	     */
497	    if (!error) {
498		op->nfso_opencnt++;
499		if (NFSHASNFSV4N(nmp) && NFSHASONEOPENOWN(nmp)) {
500		    NFSLOCKNODE(np);
501		    np->n_openstateid = op;
502		    NFSUNLOCKNODE(np);
503		}
504	    }
505	    nfscl_openrelease(nmp, op, error, newone);
506	    if (error == NFSERR_GRACE || error == NFSERR_STALECLIENTID ||
507		error == NFSERR_STALEDONTRECOVER || error == NFSERR_DELAY ||
508		error == NFSERR_BADSESSION) {
509		(void) nfs_catnap(PZERO, error, "nfs_open");
510	    } else if ((error == NFSERR_EXPIRED || error == NFSERR_BADSTATEID)
511		&& clidrev != 0) {
512		expireret = nfscl_hasexpired(nmp->nm_clp, clidrev, p);
513		retrycnt++;
514	    }
515	} while (error == NFSERR_GRACE || error == NFSERR_STALECLIENTID ||
516	    error == NFSERR_STALEDONTRECOVER || error == NFSERR_DELAY ||
517	    error == NFSERR_BADSESSION ||
518	    ((error == NFSERR_EXPIRED || error == NFSERR_BADSTATEID) &&
519	     expireret == 0 && clidrev != 0 && retrycnt < 4));
520	if (error && retrycnt >= 4)
521		error = EIO;
522	return (error);
523}
524
525/*
526 * the actual open rpc
527 */
528int
529nfsrpc_openrpc(struct nfsmount *nmp, vnode_t vp, u_int8_t *nfhp, int fhlen,
530    u_int8_t *newfhp, int newfhlen, u_int32_t mode, struct nfsclopen *op,
531    u_int8_t *name, int namelen, struct nfscldeleg **dpp,
532    int reclaim, u_int32_t delegtype, struct ucred *cred, NFSPROC_T *p,
533    int syscred, int recursed)
534{
535	u_int32_t *tl;
536	struct nfsrv_descript nfsd, *nd = &nfsd;
537	struct nfscldeleg *dp, *ndp = NULL;
538	struct nfsvattr nfsva;
539	u_int32_t rflags, deleg;
540	nfsattrbit_t attrbits;
541	int error, ret, acesize, limitby;
542	struct nfsclsession *tsep;
543
544	dp = *dpp;
545	*dpp = NULL;
546	nfscl_reqstart(nd, NFSPROC_OPEN, nmp, nfhp, fhlen, NULL, NULL, 0, 0,
547	    cred);
548	NFSM_BUILD(tl, u_int32_t *, 5 * NFSX_UNSIGNED);
549	*tl++ = txdr_unsigned(op->nfso_own->nfsow_seqid);
550	*tl++ = txdr_unsigned(mode & NFSV4OPEN_ACCESSBOTH);
551	*tl++ = txdr_unsigned((mode >> NFSLCK_SHIFT) & NFSV4OPEN_DENYBOTH);
552	tsep = nfsmnt_mdssession(nmp);
553	*tl++ = tsep->nfsess_clientid.lval[0];
554	*tl = tsep->nfsess_clientid.lval[1];
555	(void) nfsm_strtom(nd, op->nfso_own->nfsow_owner, NFSV4CL_LOCKNAMELEN);
556	NFSM_BUILD(tl, u_int32_t *, 2 * NFSX_UNSIGNED);
557	*tl++ = txdr_unsigned(NFSV4OPEN_NOCREATE);
558	if (reclaim) {
559		*tl = txdr_unsigned(NFSV4OPEN_CLAIMPREVIOUS);
560		NFSM_BUILD(tl, u_int32_t *, NFSX_UNSIGNED);
561		*tl = txdr_unsigned(delegtype);
562	} else {
563		if (dp != NULL) {
564			if (NFSHASNFSV4N(nmp))
565				*tl = txdr_unsigned(
566				    NFSV4OPEN_CLAIMDELEGATECURFH);
567			else
568				*tl = txdr_unsigned(NFSV4OPEN_CLAIMDELEGATECUR);
569			NFSM_BUILD(tl, u_int32_t *, NFSX_STATEID);
570			if (NFSHASNFSV4N(nmp))
571				*tl++ = 0;
572			else
573				*tl++ = dp->nfsdl_stateid.seqid;
574			*tl++ = dp->nfsdl_stateid.other[0];
575			*tl++ = dp->nfsdl_stateid.other[1];
576			*tl = dp->nfsdl_stateid.other[2];
577			if (!NFSHASNFSV4N(nmp))
578				(void)nfsm_strtom(nd, name, namelen);
579		} else if (NFSHASNFSV4N(nmp)) {
580			*tl = txdr_unsigned(NFSV4OPEN_CLAIMFH);
581		} else {
582			*tl = txdr_unsigned(NFSV4OPEN_CLAIMNULL);
583			(void)nfsm_strtom(nd, name, namelen);
584		}
585	}
586	NFSM_BUILD(tl, u_int32_t *, NFSX_UNSIGNED);
587	*tl = txdr_unsigned(NFSV4OP_GETATTR);
588	NFSZERO_ATTRBIT(&attrbits);
589	NFSSETBIT_ATTRBIT(&attrbits, NFSATTRBIT_CHANGE);
590	NFSSETBIT_ATTRBIT(&attrbits, NFSATTRBIT_TIMEMODIFY);
591	(void) nfsrv_putattrbit(nd, &attrbits);
592	if (syscred)
593		nd->nd_flag |= ND_USEGSSNAME;
594	error = newnfs_request(nd, nmp, NULL, &nmp->nm_sockreq, vp, p, cred,
595	    NFS_PROG, NFS_VER4, NULL, 1, NULL, NULL);
596	if (error)
597		return (error);
598	NFSCL_INCRSEQID(op->nfso_own->nfsow_seqid, nd);
599	if (nd->nd_repstat == 0 || (nd->nd_repstat == NFSERR_DELAY &&
600	    reclaim != 0 && (nd->nd_flag & ND_NOMOREDATA) == 0)) {
601		NFSM_DISSECT(tl, u_int32_t *, NFSX_STATEID +
602		    6 * NFSX_UNSIGNED);
603		op->nfso_stateid.seqid = *tl++;
604		op->nfso_stateid.other[0] = *tl++;
605		op->nfso_stateid.other[1] = *tl++;
606		op->nfso_stateid.other[2] = *tl;
607		rflags = fxdr_unsigned(u_int32_t, *(tl + 6));
608		error = nfsrv_getattrbits(nd, &attrbits, NULL, NULL);
609		if (error)
610			goto nfsmout;
611		NFSM_DISSECT(tl, u_int32_t *, NFSX_UNSIGNED);
612		deleg = fxdr_unsigned(u_int32_t, *tl);
613		if (deleg == NFSV4OPEN_DELEGATEREAD ||
614		    deleg == NFSV4OPEN_DELEGATEWRITE) {
615			if (!(op->nfso_own->nfsow_clp->nfsc_flags &
616			      NFSCLFLAGS_FIRSTDELEG))
617				op->nfso_own->nfsow_clp->nfsc_flags |=
618				  (NFSCLFLAGS_FIRSTDELEG | NFSCLFLAGS_GOTDELEG);
619			ndp = malloc(
620			    sizeof (struct nfscldeleg) + newfhlen,
621			    M_NFSCLDELEG, M_WAITOK);
622			LIST_INIT(&ndp->nfsdl_owner);
623			LIST_INIT(&ndp->nfsdl_lock);
624			ndp->nfsdl_clp = op->nfso_own->nfsow_clp;
625			ndp->nfsdl_fhlen = newfhlen;
626			NFSBCOPY(newfhp, ndp->nfsdl_fh, newfhlen);
627			newnfs_copyincred(cred, &ndp->nfsdl_cred);
628			nfscl_lockinit(&ndp->nfsdl_rwlock);
629			NFSM_DISSECT(tl, u_int32_t *, NFSX_STATEID +
630			    NFSX_UNSIGNED);
631			ndp->nfsdl_stateid.seqid = *tl++;
632			ndp->nfsdl_stateid.other[0] = *tl++;
633			ndp->nfsdl_stateid.other[1] = *tl++;
634			ndp->nfsdl_stateid.other[2] = *tl++;
635			ret = fxdr_unsigned(int, *tl);
636			if (deleg == NFSV4OPEN_DELEGATEWRITE) {
637				ndp->nfsdl_flags = NFSCLDL_WRITE;
638				/*
639				 * Indicates how much the file can grow.
640				 */
641				NFSM_DISSECT(tl, u_int32_t *,
642				    3 * NFSX_UNSIGNED);
643				limitby = fxdr_unsigned(int, *tl++);
644				switch (limitby) {
645				case NFSV4OPEN_LIMITSIZE:
646					ndp->nfsdl_sizelimit = fxdr_hyper(tl);
647					break;
648				case NFSV4OPEN_LIMITBLOCKS:
649					ndp->nfsdl_sizelimit =
650					    fxdr_unsigned(u_int64_t, *tl++);
651					ndp->nfsdl_sizelimit *=
652					    fxdr_unsigned(u_int64_t, *tl);
653					break;
654				default:
655					error = NFSERR_BADXDR;
656					goto nfsmout;
657				}
658			} else {
659				ndp->nfsdl_flags = NFSCLDL_READ;
660			}
661			if (ret)
662				ndp->nfsdl_flags |= NFSCLDL_RECALL;
663			error = nfsrv_dissectace(nd, &ndp->nfsdl_ace, false,
664			    &ret, &acesize, p);
665			if (error)
666				goto nfsmout;
667		} else if (deleg != NFSV4OPEN_DELEGATENONE) {
668			error = NFSERR_BADXDR;
669			goto nfsmout;
670		}
671		NFSM_DISSECT(tl, u_int32_t *, 2 * NFSX_UNSIGNED);
672		/* If the 2nd element == NFS_OK, the Getattr succeeded. */
673		if (*++tl == 0) {
674			KASSERT(nd->nd_repstat == 0,
675			    ("nfsrpc_openrpc: Getattr repstat"));
676			error = nfsv4_loadattr(nd, NULL, &nfsva, NULL,
677			    NULL, 0, NULL, NULL, NULL, NULL, NULL, 0,
678			    NULL, NULL, NULL, p, cred);
679			if (error)
680				goto nfsmout;
681		}
682		if (ndp != NULL) {
683			if (reclaim != 0 && dp != NULL) {
684				ndp->nfsdl_change = dp->nfsdl_change;
685				ndp->nfsdl_modtime = dp->nfsdl_modtime;
686				ndp->nfsdl_flags |= NFSCLDL_MODTIMESET;
687			} else if (nd->nd_repstat == 0) {
688				ndp->nfsdl_change = nfsva.na_filerev;
689				ndp->nfsdl_modtime = nfsva.na_mtime;
690				ndp->nfsdl_flags |= NFSCLDL_MODTIMESET;
691			} else
692				ndp->nfsdl_flags |= NFSCLDL_RECALL;
693		}
694		nd->nd_repstat = 0;
695		if (!reclaim && (rflags & NFSV4OPEN_RESULTCONFIRM)) {
696		    do {
697			ret = nfsrpc_openconfirm(vp, newfhp, newfhlen, op,
698			    cred, p);
699			if (ret == NFSERR_DELAY)
700			    (void) nfs_catnap(PZERO, ret, "nfs_open");
701		    } while (ret == NFSERR_DELAY);
702		    error = ret;
703		}
704		if ((rflags & NFSV4OPEN_LOCKTYPEPOSIX) ||
705		    nfscl_assumeposixlocks)
706		    op->nfso_posixlock = 1;
707		else
708		    op->nfso_posixlock = 0;
709
710		/*
711		 * If the server is handing out delegations, but we didn't
712		 * get one because an OpenConfirm was required, try the
713		 * Open again, to get a delegation. This is a harmless no-op,
714		 * from a server's point of view.
715		 */
716		if (!reclaim && (rflags & NFSV4OPEN_RESULTCONFIRM) &&
717		    (op->nfso_own->nfsow_clp->nfsc_flags & NFSCLFLAGS_GOTDELEG)
718		    && !error && dp == NULL && ndp == NULL && !recursed) {
719		    do {
720			ret = nfsrpc_openrpc(nmp, vp, nfhp, fhlen, newfhp,
721			    newfhlen, mode, op, name, namelen, &ndp, 0, 0x0,
722			    cred, p, syscred, 1);
723			if (ret == NFSERR_DELAY)
724			    (void) nfs_catnap(PZERO, ret, "nfs_open2");
725		    } while (ret == NFSERR_DELAY);
726		    if (ret) {
727			if (ndp != NULL) {
728				free(ndp, M_NFSCLDELEG);
729				ndp = NULL;
730			}
731			if (ret == NFSERR_STALECLIENTID ||
732			    ret == NFSERR_STALEDONTRECOVER ||
733			    ret == NFSERR_BADSESSION)
734				error = ret;
735		    }
736		}
737	}
738	if (nd->nd_repstat != 0 && error == 0)
739		error = nd->nd_repstat;
740	if (error == NFSERR_STALECLIENTID)
741		nfscl_initiate_recovery(op->nfso_own->nfsow_clp);
742nfsmout:
743	if (!error)
744		*dpp = ndp;
745	else if (ndp != NULL)
746		free(ndp, M_NFSCLDELEG);
747	m_freem(nd->nd_mrep);
748	return (error);
749}
750
751/*
752 * open downgrade rpc
753 */
754int
755nfsrpc_opendowngrade(vnode_t vp, u_int32_t mode, struct nfsclopen *op,
756    struct ucred *cred, NFSPROC_T *p)
757{
758	u_int32_t *tl;
759	struct nfsrv_descript nfsd, *nd = &nfsd;
760	int error;
761
762	NFSCL_REQSTART(nd, NFSPROC_OPENDOWNGRADE, vp, cred);
763	NFSM_BUILD(tl, u_int32_t *, NFSX_STATEID + 3 * NFSX_UNSIGNED);
764	if (NFSHASNFSV4N(VFSTONFS(vp->v_mount)))
765		*tl++ = 0;
766	else
767		*tl++ = op->nfso_stateid.seqid;
768	*tl++ = op->nfso_stateid.other[0];
769	*tl++ = op->nfso_stateid.other[1];
770	*tl++ = op->nfso_stateid.other[2];
771	*tl++ = txdr_unsigned(op->nfso_own->nfsow_seqid);
772	*tl++ = txdr_unsigned(mode & NFSV4OPEN_ACCESSBOTH);
773	*tl = txdr_unsigned((mode >> NFSLCK_SHIFT) & NFSV4OPEN_DENYBOTH);
774	error = nfscl_request(nd, vp, p, cred);
775	if (error)
776		return (error);
777	NFSCL_INCRSEQID(op->nfso_own->nfsow_seqid, nd);
778	if (!nd->nd_repstat) {
779		NFSM_DISSECT(tl, u_int32_t *, NFSX_STATEID);
780		op->nfso_stateid.seqid = *tl++;
781		op->nfso_stateid.other[0] = *tl++;
782		op->nfso_stateid.other[1] = *tl++;
783		op->nfso_stateid.other[2] = *tl;
784	}
785	if (nd->nd_repstat && error == 0)
786		error = nd->nd_repstat;
787	if (error == NFSERR_STALESTATEID)
788		nfscl_initiate_recovery(op->nfso_own->nfsow_clp);
789nfsmout:
790	m_freem(nd->nd_mrep);
791	return (error);
792}
793
794/*
795 * V4 Close operation.
796 */
797int
798nfsrpc_close(vnode_t vp, int doclose, NFSPROC_T *p)
799{
800	struct nfsclclient *clp;
801	int error;
802
803	if (vp->v_type != VREG)
804		return (0);
805	if (doclose)
806		error = nfscl_doclose(vp, &clp, p);
807	else {
808		error = nfscl_getclose(vp, &clp);
809		if (error == 0)
810			nfscl_clientrelease(clp);
811	}
812	return (error);
813}
814
815/*
816 * Close the open.
817 */
818int
819nfsrpc_doclose(struct nfsmount *nmp, struct nfsclopen *op, NFSPROC_T *p,
820    bool loop_on_delayed, bool freeop)
821{
822	struct nfsrv_descript nfsd, *nd = &nfsd;
823	struct nfscllockowner *lp, *nlp;
824	struct nfscllock *lop, *nlop;
825	struct ucred *tcred;
826	u_int64_t off = 0, len = 0;
827	u_int32_t type = NFSV4LOCKT_READ;
828	int error, do_unlock, trycnt;
829	bool own_not_null;
830
831	tcred = newnfs_getcred();
832	newnfs_copycred(&op->nfso_cred, tcred);
833	/*
834	 * (Theoretically this could be done in the same
835	 *  compound as the close, but having multiple
836	 *  sequenced Ops in the same compound might be
837	 *  too scary for some servers.)
838	 */
839	if (op->nfso_posixlock) {
840		off = 0;
841		len = NFS64BITSSET;
842		type = NFSV4LOCKT_READ;
843	}
844
845	/*
846	 * Since this function is only called from VOP_INACTIVE(), no
847	 * other thread will be manipulating this Open. As such, the
848	 * lock lists are not being changed by other threads, so it should
849	 * be safe to do this without locking.
850	 */
851	LIST_FOREACH(lp, &op->nfso_lock, nfsl_list) {
852		do_unlock = 1;
853		LIST_FOREACH_SAFE(lop, &lp->nfsl_lock, nfslo_list, nlop) {
854			if (op->nfso_posixlock == 0) {
855				off = lop->nfslo_first;
856				len = lop->nfslo_end - lop->nfslo_first;
857				if (lop->nfslo_type == F_WRLCK)
858					type = NFSV4LOCKT_WRITE;
859				else
860					type = NFSV4LOCKT_READ;
861			}
862			if (do_unlock) {
863				trycnt = 0;
864				do {
865					error = nfsrpc_locku(nd, nmp, lp, off,
866					    len, type, tcred, p, 0);
867					if ((nd->nd_repstat == NFSERR_GRACE ||
868					    nd->nd_repstat == NFSERR_DELAY) &&
869					    error == 0)
870						(void) nfs_catnap(PZERO,
871						    (int)nd->nd_repstat,
872						    "nfs_close");
873				} while ((nd->nd_repstat == NFSERR_GRACE ||
874				    nd->nd_repstat == NFSERR_DELAY) &&
875				    error == 0 && trycnt++ < 5);
876				if (op->nfso_posixlock)
877					do_unlock = 0;
878			}
879			nfscl_freelock(lop, 0);
880		}
881		/*
882		 * Do a ReleaseLockOwner.
883		 * The lock owner name nfsl_owner may be used by other opens for
884		 * other files but the lock_owner4 name that nfsrpc_rellockown()
885		 * puts on the wire has the file handle for this file appended
886		 * to it, so it can be done now.
887		 */
888		(void)nfsrpc_rellockown(nmp, lp, lp->nfsl_open->nfso_fh,
889		    lp->nfsl_open->nfso_fhlen, tcred, p);
890	}
891
892	/*
893	 * There could be other Opens for different files on the same
894	 * OpenOwner, so locking is required.
895	 */
896	own_not_null = false;
897	if (op->nfso_own != NULL) {
898		own_not_null = true;
899		NFSLOCKCLSTATE();
900		nfscl_lockexcl(&op->nfso_own->nfsow_rwlock, NFSCLSTATEMUTEXPTR);
901		NFSUNLOCKCLSTATE();
902	}
903	do {
904		error = nfscl_tryclose(op, tcred, nmp, p, loop_on_delayed);
905		if (error == NFSERR_GRACE)
906			(void) nfs_catnap(PZERO, error, "nfs_close");
907	} while (error == NFSERR_GRACE);
908	if (own_not_null) {
909		NFSLOCKCLSTATE();
910		nfscl_lockunlock(&op->nfso_own->nfsow_rwlock);
911	}
912
913	LIST_FOREACH_SAFE(lp, &op->nfso_lock, nfsl_list, nlp)
914		nfscl_freelockowner(lp, 0);
915	if (freeop && error != NFSERR_DELAY)
916		nfscl_freeopen(op, 0, true);
917	if (own_not_null)
918		NFSUNLOCKCLSTATE();
919	NFSFREECRED(tcred);
920	return (error);
921}
922
923/*
924 * The actual Close RPC.
925 */
926int
927nfsrpc_closerpc(struct nfsrv_descript *nd, struct nfsmount *nmp,
928    struct nfsclopen *op, struct ucred *cred, NFSPROC_T *p,
929    int syscred)
930{
931	u_int32_t *tl;
932	int error;
933
934	nfscl_reqstart(nd, NFSPROC_CLOSE, nmp, op->nfso_fh,
935	    op->nfso_fhlen, NULL, NULL, 0, 0, cred);
936	NFSM_BUILD(tl, u_int32_t *, NFSX_UNSIGNED + NFSX_STATEID);
937	if (NFSHASNFSV4N(nmp)) {
938		*tl++ = 0;
939		*tl++ = 0;
940	} else {
941		*tl++ = txdr_unsigned(op->nfso_own->nfsow_seqid);
942		*tl++ = op->nfso_stateid.seqid;
943	}
944	*tl++ = op->nfso_stateid.other[0];
945	*tl++ = op->nfso_stateid.other[1];
946	*tl = op->nfso_stateid.other[2];
947	if (syscred)
948		nd->nd_flag |= ND_USEGSSNAME;
949	error = newnfs_request(nd, nmp, NULL, &nmp->nm_sockreq, NULL, p, cred,
950	    NFS_PROG, NFS_VER4, NULL, 1, NULL, NULL);
951	if (error)
952		return (error);
953	if (!NFSHASNFSV4N(nmp))
954		NFSCL_INCRSEQID(op->nfso_own->nfsow_seqid, nd);
955	if (nd->nd_repstat == 0)
956		NFSM_DISSECT(tl, u_int32_t *, NFSX_STATEID);
957	error = nd->nd_repstat;
958	if (!NFSHASNFSV4N(nmp) && error == NFSERR_STALESTATEID)
959		nfscl_initiate_recovery(op->nfso_own->nfsow_clp);
960nfsmout:
961	m_freem(nd->nd_mrep);
962	return (error);
963}
964
965/*
966 * V4 Open Confirm RPC.
967 */
968int
969nfsrpc_openconfirm(vnode_t vp, u_int8_t *nfhp, int fhlen,
970    struct nfsclopen *op, struct ucred *cred, NFSPROC_T *p)
971{
972	u_int32_t *tl;
973	struct nfsrv_descript nfsd, *nd = &nfsd;
974	struct nfsmount *nmp;
975	int error;
976
977	nmp = VFSTONFS(vp->v_mount);
978	if (NFSHASNFSV4N(nmp))
979		return (0);		/* No confirmation for NFSv4.1. */
980	nfscl_reqstart(nd, NFSPROC_OPENCONFIRM, nmp, nfhp, fhlen, NULL, NULL,
981	    0, 0, NULL);
982	NFSM_BUILD(tl, u_int32_t *, NFSX_UNSIGNED + NFSX_STATEID);
983	*tl++ = op->nfso_stateid.seqid;
984	*tl++ = op->nfso_stateid.other[0];
985	*tl++ = op->nfso_stateid.other[1];
986	*tl++ = op->nfso_stateid.other[2];
987	*tl = txdr_unsigned(op->nfso_own->nfsow_seqid);
988	error = nfscl_request(nd, vp, p, cred);
989	if (error)
990		return (error);
991	NFSCL_INCRSEQID(op->nfso_own->nfsow_seqid, nd);
992	if (!nd->nd_repstat) {
993		NFSM_DISSECT(tl, u_int32_t *, NFSX_STATEID);
994		op->nfso_stateid.seqid = *tl++;
995		op->nfso_stateid.other[0] = *tl++;
996		op->nfso_stateid.other[1] = *tl++;
997		op->nfso_stateid.other[2] = *tl;
998	}
999	error = nd->nd_repstat;
1000	if (error == NFSERR_STALESTATEID)
1001		nfscl_initiate_recovery(op->nfso_own->nfsow_clp);
1002nfsmout:
1003	m_freem(nd->nd_mrep);
1004	return (error);
1005}
1006
1007/*
1008 * Do the setclientid and setclientid confirm RPCs. Called from nfs_statfs()
1009 * when a mount has just occurred and when the server replies NFSERR_EXPIRED.
1010 */
1011int
1012nfsrpc_setclient(struct nfsmount *nmp, struct nfsclclient *clp, int reclaim,
1013    bool *retokp, struct ucred *cred, NFSPROC_T *p)
1014{
1015	u_int32_t *tl;
1016	struct nfsrv_descript nfsd;
1017	struct nfsrv_descript *nd = &nfsd;
1018	u_int8_t *cp = NULL, *cp2, addr[INET6_ADDRSTRLEN + 9];
1019	u_short port;
1020	int error, isinet6 = 0, callblen;
1021	nfsquad_t confirm;
1022	static u_int32_t rev = 0;
1023	struct nfsclds *dsp, *odsp;
1024	struct in6_addr a6;
1025	struct nfsclsession *tsep;
1026	struct rpc_reconupcall recon;
1027	struct nfscl_reconarg *rcp;
1028
1029	if (nfsboottime.tv_sec == 0)
1030		NFSSETBOOTTIME(nfsboottime);
1031	if (NFSHASNFSV4N(nmp)) {
1032		error = NFSERR_BADSESSION;
1033		odsp = dsp = NULL;
1034		if (retokp != NULL) {
1035			NFSLOCKMNT(nmp);
1036			odsp = TAILQ_FIRST(&nmp->nm_sess);
1037			NFSUNLOCKMNT(nmp);
1038		}
1039		if (odsp != NULL) {
1040			/*
1041			 * When a session already exists, first try a
1042			 * CreateSession with the extant ClientID.
1043			 */
1044			dsp = malloc(sizeof(struct nfsclds) +
1045			    odsp->nfsclds_servownlen + 1, M_NFSCLDS,
1046			    M_WAITOK | M_ZERO);
1047			dsp->nfsclds_expire = NFSD_MONOSEC + clp->nfsc_renew;
1048			dsp->nfsclds_servownlen = odsp->nfsclds_servownlen;
1049			dsp->nfsclds_sess.nfsess_clientid =
1050			    odsp->nfsclds_sess.nfsess_clientid;
1051			dsp->nfsclds_sess.nfsess_sequenceid =
1052			    odsp->nfsclds_sess.nfsess_sequenceid + 1;
1053			dsp->nfsclds_flags = odsp->nfsclds_flags;
1054			if (dsp->nfsclds_servownlen > 0)
1055				memcpy(dsp->nfsclds_serverown,
1056				    odsp->nfsclds_serverown,
1057				    dsp->nfsclds_servownlen + 1);
1058			mtx_init(&dsp->nfsclds_mtx, "nfsds", NULL, MTX_DEF);
1059			mtx_init(&dsp->nfsclds_sess.nfsess_mtx, "nfssession",
1060			    NULL, MTX_DEF);
1061			nfscl_initsessionslots(&dsp->nfsclds_sess);
1062			error = nfsrpc_createsession(nmp, &dsp->nfsclds_sess,
1063			    &nmp->nm_sockreq, NULL,
1064			    dsp->nfsclds_sess.nfsess_sequenceid, 1, cred, p);
1065			NFSCL_DEBUG(1, "create session for extant "
1066			    "ClientID=%d\n", error);
1067			if (error != 0) {
1068				nfscl_freenfsclds(dsp);
1069				dsp = NULL;
1070				/*
1071				 * If *retokp is true, return any error other
1072				 * than NFSERR_STALECLIENTID,
1073				 * NFSERR_BADSESSION or NFSERR_STALEDONTRECOVER
1074				 * so that nfscl_recover() will not loop.
1075				 */
1076				if (*retokp)
1077					return (NFSERR_IO);
1078			} else
1079				*retokp = true;
1080		} else if (retokp != NULL && *retokp)
1081			return (NFSERR_IO);
1082		if (error != 0) {
1083			/*
1084			 * Either there was no previous session or the
1085			 * CreateSession attempt failed, so...
1086			 * do an ExchangeID followed by the CreateSession.
1087			 */
1088			clp->nfsc_rev = rev++;
1089			error = nfsrpc_exchangeid(nmp, clp, &nmp->nm_sockreq, 0,
1090			    NFSV4EXCH_USEPNFSMDS | NFSV4EXCH_USENONPNFS, &dsp,
1091			    cred, p);
1092			NFSCL_DEBUG(1, "aft exch=%d\n", error);
1093			if (error == 0)
1094				error = nfsrpc_createsession(nmp,
1095				    &dsp->nfsclds_sess, &nmp->nm_sockreq, NULL,
1096				    dsp->nfsclds_sess.nfsess_sequenceid, 1,
1097				    cred, p);
1098			NFSCL_DEBUG(1, "aft createsess=%d\n", error);
1099		}
1100		if (error == 0) {
1101			/*
1102			 * If the session supports a backchannel, set up
1103			 * the BindConnectionToSession call in the krpc
1104			 * so that it is done on a reconnection.
1105			 */
1106			if (nfscl_enablecallb != 0 && nfs_numnfscbd > 0) {
1107				rcp = mem_alloc(sizeof(*rcp));
1108				rcp->minorvers = nmp->nm_minorvers;
1109				memcpy(rcp->sessionid,
1110				    dsp->nfsclds_sess.nfsess_sessionid,
1111				    NFSX_V4SESSIONID);
1112				recon.call = nfsrpc_bindconnsess;
1113				recon.arg = rcp;
1114				CLNT_CONTROL(nmp->nm_client, CLSET_RECONUPCALL,
1115				    &recon);
1116			}
1117
1118			NFSLOCKMNT(nmp);
1119			/*
1120			 * The old sessions cannot be safely free'd
1121			 * here, since they may still be used by
1122			 * in-progress RPCs.
1123			 */
1124			tsep = NULL;
1125			if (TAILQ_FIRST(&nmp->nm_sess) != NULL) {
1126				/*
1127				 * Mark the old session defunct.  Needed
1128				 * when called from nfscl_hasexpired().
1129				 */
1130				tsep = NFSMNT_MDSSESSION(nmp);
1131				tsep->nfsess_defunct = 1;
1132			}
1133			TAILQ_INSERT_HEAD(&nmp->nm_sess, dsp,
1134			    nfsclds_list);
1135			/*
1136			 * Wake up RPCs waiting for a slot on the
1137			 * old session. These will then fail with
1138			 * NFSERR_BADSESSION and be retried with the
1139			 * new session by nfsv4_setsequence().
1140			 * Also wakeup() processes waiting for the
1141			 * new session.
1142			 */
1143			if (tsep != NULL)
1144				wakeup(&tsep->nfsess_slots);
1145			wakeup(&nmp->nm_sess);
1146			NFSUNLOCKMNT(nmp);
1147		} else if (dsp != NULL)
1148			nfscl_freenfsclds(dsp);
1149		if (error == 0 && reclaim == 0) {
1150			error = nfsrpc_reclaimcomplete(nmp, cred, p);
1151			NFSCL_DEBUG(1, "aft reclaimcomp=%d\n", error);
1152			if (error == NFSERR_COMPLETEALREADY ||
1153			    error == NFSERR_NOTSUPP)
1154				/* Ignore this error. */
1155				error = 0;
1156		}
1157		return (error);
1158	} else if (retokp != NULL && *retokp)
1159		return (NFSERR_IO);
1160	clp->nfsc_rev = rev++;
1161
1162	/*
1163	 * Allocate a single session structure for NFSv4.0, because some of
1164	 * the fields are used by NFSv4.0 although it doesn't do a session.
1165	 */
1166	dsp = malloc(sizeof(struct nfsclds), M_NFSCLDS, M_WAITOK | M_ZERO);
1167	mtx_init(&dsp->nfsclds_mtx, "nfsds", NULL, MTX_DEF);
1168	mtx_init(&dsp->nfsclds_sess.nfsess_mtx, "nfssession", NULL, MTX_DEF);
1169	NFSLOCKMNT(nmp);
1170	TAILQ_INSERT_HEAD(&nmp->nm_sess, dsp, nfsclds_list);
1171	tsep = NFSMNT_MDSSESSION(nmp);
1172	NFSUNLOCKMNT(nmp);
1173
1174	nfscl_reqstart(nd, NFSPROC_SETCLIENTID, nmp, NULL, 0, NULL, NULL, 0, 0,
1175	    NULL);
1176	NFSM_BUILD(tl, u_int32_t *, 2 * NFSX_UNSIGNED);
1177	*tl++ = txdr_unsigned(nfsboottime.tv_sec);
1178	*tl = txdr_unsigned(clp->nfsc_rev);
1179	(void) nfsm_strtom(nd, clp->nfsc_id, clp->nfsc_idlen);
1180
1181	/*
1182	 * set up the callback address
1183	 */
1184	NFSM_BUILD(tl, u_int32_t *, NFSX_UNSIGNED);
1185	*tl = txdr_unsigned(NFS_CALLBCKPROG);
1186	callblen = strlen(nfsv4_callbackaddr);
1187	if (callblen == 0)
1188		cp = nfscl_getmyip(nmp, &a6, &isinet6);
1189	if (nfscl_enablecallb && nfs_numnfscbd > 0 &&
1190	    (callblen > 0 || cp != NULL)) {
1191		port = htons(nfsv4_cbport);
1192		cp2 = (u_int8_t *)&port;
1193#ifdef INET6
1194		if ((callblen > 0 &&
1195		     strchr(nfsv4_callbackaddr, ':')) || isinet6) {
1196			char ip6buf[INET6_ADDRSTRLEN], *ip6add;
1197
1198			(void) nfsm_strtom(nd, "tcp6", 4);
1199			if (callblen == 0) {
1200				ip6_sprintf(ip6buf, (struct in6_addr *)cp);
1201				ip6add = ip6buf;
1202			} else {
1203				ip6add = nfsv4_callbackaddr;
1204			}
1205			snprintf(addr, INET6_ADDRSTRLEN + 9, "%s.%d.%d",
1206			    ip6add, cp2[0], cp2[1]);
1207		} else
1208#endif
1209		{
1210			(void) nfsm_strtom(nd, "tcp", 3);
1211			if (callblen == 0)
1212				snprintf(addr, INET6_ADDRSTRLEN + 9,
1213				    "%d.%d.%d.%d.%d.%d", cp[0], cp[1],
1214				    cp[2], cp[3], cp2[0], cp2[1]);
1215			else
1216				snprintf(addr, INET6_ADDRSTRLEN + 9,
1217				    "%s.%d.%d", nfsv4_callbackaddr,
1218				    cp2[0], cp2[1]);
1219		}
1220		(void) nfsm_strtom(nd, addr, strlen(addr));
1221	} else {
1222		(void) nfsm_strtom(nd, "tcp", 3);
1223		(void) nfsm_strtom(nd, "0.0.0.0.0.0", 11);
1224	}
1225	NFSM_BUILD(tl, u_int32_t *, NFSX_UNSIGNED);
1226	*tl = txdr_unsigned(clp->nfsc_cbident);
1227	nd->nd_flag |= ND_USEGSSNAME;
1228	error = newnfs_request(nd, nmp, NULL, &nmp->nm_sockreq, NULL, p, cred,
1229		NFS_PROG, NFS_VER4, NULL, 1, NULL, NULL);
1230	if (error)
1231		return (error);
1232	if (nd->nd_repstat == 0) {
1233	    NFSM_DISSECT(tl, u_int32_t *, 4 * NFSX_UNSIGNED);
1234	    tsep->nfsess_clientid.lval[0] = *tl++;
1235	    tsep->nfsess_clientid.lval[1] = *tl++;
1236	    confirm.lval[0] = *tl++;
1237	    confirm.lval[1] = *tl;
1238	    m_freem(nd->nd_mrep);
1239	    nd->nd_mrep = NULL;
1240
1241	    /*
1242	     * and confirm it.
1243	     */
1244	    nfscl_reqstart(nd, NFSPROC_SETCLIENTIDCFRM, nmp, NULL, 0, NULL,
1245		NULL, 0, 0, NULL);
1246	    NFSM_BUILD(tl, u_int32_t *, 4 * NFSX_UNSIGNED);
1247	    *tl++ = tsep->nfsess_clientid.lval[0];
1248	    *tl++ = tsep->nfsess_clientid.lval[1];
1249	    *tl++ = confirm.lval[0];
1250	    *tl = confirm.lval[1];
1251	    nd->nd_flag |= ND_USEGSSNAME;
1252	    error = newnfs_request(nd, nmp, NULL, &nmp->nm_sockreq, NULL, p,
1253		cred, NFS_PROG, NFS_VER4, NULL, 1, NULL, NULL);
1254	    if (error)
1255		return (error);
1256	    m_freem(nd->nd_mrep);
1257	    nd->nd_mrep = NULL;
1258	}
1259	error = nd->nd_repstat;
1260nfsmout:
1261	m_freem(nd->nd_mrep);
1262	return (error);
1263}
1264
1265/*
1266 * nfs getattr call.
1267 */
1268int
1269nfsrpc_getattr(vnode_t vp, struct ucred *cred, NFSPROC_T *p,
1270    struct nfsvattr *nap)
1271{
1272	struct nfsrv_descript nfsd, *nd = &nfsd;
1273	int error;
1274	nfsattrbit_t attrbits;
1275	struct nfsnode *np;
1276	struct nfsmount *nmp;
1277
1278	nmp = VFSTONFS(vp->v_mount);
1279	np = VTONFS(vp);
1280	if ((nmp->nm_privflag & NFSMNTP_FAKEROOTFH) != 0 &&
1281	    nmp->nm_fhsize == 0) {
1282		/* Attempt to get the actual root file handle. */
1283		error = nfsrpc_getdirpath(nmp, NFSMNT_DIRPATH(nmp), cred, p);
1284		if (error != 0)
1285			return (EACCES);
1286		if (np->n_fhp->nfh_len == NFSX_FHMAX + 1)
1287			nfscl_statfs(vp, cred, p);
1288	}
1289	NFSCL_REQSTART(nd, NFSPROC_GETATTR, vp, cred);
1290	if (nd->nd_flag & ND_NFSV4) {
1291		NFSGETATTR_ATTRBIT(&attrbits);
1292		(void) nfsrv_putattrbit(nd, &attrbits);
1293	}
1294	error = nfscl_request(nd, vp, p, cred);
1295	if (error)
1296		return (error);
1297	if (!nd->nd_repstat)
1298		error = nfsm_loadattr(nd, nap);
1299	else
1300		error = nd->nd_repstat;
1301	m_freem(nd->nd_mrep);
1302	return (error);
1303}
1304
1305/*
1306 * nfs getattr call with non-vnode arguments.
1307 */
1308int
1309nfsrpc_getattrnovp(struct nfsmount *nmp, u_int8_t *fhp, int fhlen, int syscred,
1310    struct ucred *cred, NFSPROC_T *p, struct nfsvattr *nap, u_int64_t *xidp,
1311    uint32_t *leasep)
1312{
1313	struct nfsrv_descript nfsd, *nd = &nfsd;
1314	int error, vers = NFS_VER2;
1315	nfsattrbit_t attrbits;
1316
1317	nfscl_reqstart(nd, NFSPROC_GETATTR, nmp, fhp, fhlen, NULL, NULL, 0, 0,
1318	    cred);
1319	if (nd->nd_flag & ND_NFSV4) {
1320		vers = NFS_VER4;
1321		NFSGETATTR_ATTRBIT(&attrbits);
1322		NFSSETBIT_ATTRBIT(&attrbits, NFSATTRBIT_LEASETIME);
1323		(void) nfsrv_putattrbit(nd, &attrbits);
1324	} else if (nd->nd_flag & ND_NFSV3) {
1325		vers = NFS_VER3;
1326	}
1327	if (syscred)
1328		nd->nd_flag |= ND_USEGSSNAME;
1329	error = newnfs_request(nd, nmp, NULL, &nmp->nm_sockreq, NULL, p, cred,
1330	    NFS_PROG, vers, NULL, 1, xidp, NULL);
1331	if (error)
1332		return (error);
1333	if (nd->nd_repstat == 0) {
1334		if ((nd->nd_flag & ND_NFSV4) != 0)
1335			error = nfsv4_loadattr(nd, NULL, nap, NULL, NULL, 0,
1336			    NULL, NULL, NULL, NULL, NULL, 0, NULL, leasep, NULL,
1337			    NULL, NULL);
1338		else
1339			error = nfsm_loadattr(nd, nap);
1340	} else
1341		error = nd->nd_repstat;
1342	m_freem(nd->nd_mrep);
1343	return (error);
1344}
1345
1346/*
1347 * Do an nfs setattr operation.
1348 */
1349int
1350nfsrpc_setattr(vnode_t vp, struct vattr *vap, NFSACL_T *aclp,
1351    struct ucred *cred, NFSPROC_T *p, struct nfsvattr *rnap, int *attrflagp)
1352{
1353	int error, expireret = 0, openerr, retrycnt;
1354	u_int32_t clidrev = 0, mode;
1355	struct nfsmount *nmp = VFSTONFS(vp->v_mount);
1356	struct nfsfh *nfhp;
1357	nfsv4stateid_t stateid;
1358	void *lckp;
1359
1360	if (nmp->nm_clp != NULL)
1361		clidrev = nmp->nm_clp->nfsc_clientidrev;
1362	if (vap != NULL && NFSATTRISSET(u_quad_t, vap, va_size))
1363		mode = NFSV4OPEN_ACCESSWRITE;
1364	else
1365		mode = NFSV4OPEN_ACCESSREAD;
1366	retrycnt = 0;
1367	do {
1368		lckp = NULL;
1369		openerr = 1;
1370		if (NFSHASNFSV4(nmp)) {
1371			nfhp = VTONFS(vp)->n_fhp;
1372			error = nfscl_getstateid(vp, nfhp->nfh_fh,
1373			    nfhp->nfh_len, mode, 0, cred, p, &stateid, &lckp);
1374			if (error && vp->v_type == VREG &&
1375			    (mode == NFSV4OPEN_ACCESSWRITE ||
1376			     nfstest_openallsetattr)) {
1377				/*
1378				 * No Open stateid, so try and open the file
1379				 * now.
1380				 */
1381				if (mode == NFSV4OPEN_ACCESSWRITE)
1382					openerr = nfsrpc_open(vp, FWRITE, cred,
1383					    p);
1384				else
1385					openerr = nfsrpc_open(vp, FREAD, cred,
1386					    p);
1387				if (!openerr)
1388					(void) nfscl_getstateid(vp,
1389					    nfhp->nfh_fh, nfhp->nfh_len,
1390					    mode, 0, cred, p, &stateid, &lckp);
1391			}
1392		}
1393		if (vap != NULL)
1394			error = nfsrpc_setattrrpc(vp, vap, &stateid, cred, p,
1395			    rnap, attrflagp);
1396		else
1397			error = nfsrpc_setaclrpc(vp, cred, p, aclp, &stateid);
1398		if (error == NFSERR_OPENMODE && mode == NFSV4OPEN_ACCESSREAD) {
1399			NFSLOCKMNT(nmp);
1400			nmp->nm_state |= NFSSTA_OPENMODE;
1401			NFSUNLOCKMNT(nmp);
1402		}
1403		if (error == NFSERR_STALESTATEID)
1404			nfscl_initiate_recovery(nmp->nm_clp);
1405		if (lckp != NULL)
1406			nfscl_lockderef(lckp);
1407		if (!openerr)
1408			(void) nfsrpc_close(vp, 0, p);
1409		if (error == NFSERR_GRACE || error == NFSERR_STALESTATEID ||
1410		    error == NFSERR_STALEDONTRECOVER || error == NFSERR_DELAY ||
1411		    error == NFSERR_OLDSTATEID || error == NFSERR_BADSESSION) {
1412			(void) nfs_catnap(PZERO, error, "nfs_setattr");
1413		} else if ((error == NFSERR_EXPIRED ||
1414		    ((!NFSHASINT(nmp) || !NFSHASNFSV4N(nmp)) &&
1415		    error == NFSERR_BADSTATEID)) && clidrev != 0) {
1416			expireret = nfscl_hasexpired(nmp->nm_clp, clidrev, p);
1417		} else if (error == NFSERR_BADSTATEID && NFSHASINT(nmp) &&
1418		    NFSHASNFSV4N(nmp)) {
1419			error = EIO;
1420		}
1421		retrycnt++;
1422	} while (error == NFSERR_GRACE || error == NFSERR_STALESTATEID ||
1423	    error == NFSERR_STALEDONTRECOVER || error == NFSERR_DELAY ||
1424	    error == NFSERR_BADSESSION ||
1425	    (error == NFSERR_OLDSTATEID && retrycnt < 20) ||
1426	    ((error == NFSERR_EXPIRED || error == NFSERR_BADSTATEID) &&
1427	     expireret == 0 && clidrev != 0 && retrycnt < 4) ||
1428	    (error == NFSERR_OPENMODE && mode == NFSV4OPEN_ACCESSREAD &&
1429	     retrycnt < 4));
1430	if (error && retrycnt >= 4)
1431		error = EIO;
1432	return (error);
1433}
1434
1435static int
1436nfsrpc_setattrrpc(vnode_t vp, struct vattr *vap,
1437    nfsv4stateid_t *stateidp, struct ucred *cred, NFSPROC_T *p,
1438    struct nfsvattr *rnap, int *attrflagp)
1439{
1440	u_int32_t *tl;
1441	struct nfsrv_descript nfsd, *nd = &nfsd;
1442	int error;
1443	nfsattrbit_t attrbits;
1444
1445	*attrflagp = 0;
1446	NFSCL_REQSTART(nd, NFSPROC_SETATTR, vp, cred);
1447	if (nd->nd_flag & ND_NFSV4)
1448		nfsm_stateidtom(nd, stateidp, NFSSTATEID_PUTSTATEID);
1449	vap->va_type = vp->v_type;
1450	nfscl_fillsattr(nd, vap, vp, NFSSATTR_FULL, 0);
1451	if (nd->nd_flag & ND_NFSV3) {
1452		NFSM_BUILD(tl, u_int32_t *, NFSX_UNSIGNED);
1453		*tl = newnfs_false;
1454	} else if (nd->nd_flag & ND_NFSV4) {
1455		NFSM_BUILD(tl, u_int32_t *, NFSX_UNSIGNED);
1456		*tl = txdr_unsigned(NFSV4OP_GETATTR);
1457		NFSGETATTR_ATTRBIT(&attrbits);
1458		(void) nfsrv_putattrbit(nd, &attrbits);
1459	}
1460	error = nfscl_request(nd, vp, p, cred);
1461	if (error)
1462		return (error);
1463	if (nd->nd_flag & (ND_NFSV3 | ND_NFSV4))
1464		error = nfscl_wcc_data(nd, vp, rnap, attrflagp, NULL, NULL);
1465	if ((nd->nd_flag & (ND_NFSV4 | ND_NOMOREDATA)) == ND_NFSV4 && !error)
1466		error = nfsrv_getattrbits(nd, &attrbits, NULL, NULL);
1467	if (!(nd->nd_flag & ND_NFSV3) && !nd->nd_repstat && !error)
1468		error = nfscl_postop_attr(nd, rnap, attrflagp);
1469	m_freem(nd->nd_mrep);
1470	if (nd->nd_repstat && !error)
1471		error = nd->nd_repstat;
1472	return (error);
1473}
1474
1475/*
1476 * nfs lookup rpc
1477 */
1478int
1479nfsrpc_lookup(vnode_t dvp, char *name, int len, struct ucred *cred,
1480    NFSPROC_T *p, struct nfsvattr *dnap, struct nfsvattr *nap,
1481    struct nfsfh **nfhpp, int *attrflagp, int *dattrflagp, uint32_t openmode)
1482{
1483	uint32_t deleg, rflags, *tl;
1484	struct nfsrv_descript nfsd, *nd = &nfsd;
1485	struct nfsmount *nmp;
1486	struct nfsnode *np;
1487	struct nfsfh *nfhp;
1488	nfsattrbit_t attrbits;
1489	int error = 0, lookupp = 0, newone, ret, retop;
1490	uint8_t own[NFSV4CL_LOCKNAMELEN];
1491	struct nfsclopen *op;
1492	struct nfscldeleg *ndp;
1493	nfsv4stateid_t stateid;
1494
1495	*attrflagp = 0;
1496	*dattrflagp = 0;
1497	if (dvp->v_type != VDIR)
1498		return (ENOTDIR);
1499	nmp = VFSTONFS(dvp->v_mount);
1500	if (len > NFS_MAXNAMLEN)
1501		return (ENAMETOOLONG);
1502	if (NFSHASNFSV4(nmp) && len == 1 &&
1503		name[0] == '.') {
1504		/*
1505		 * Just return the current dir's fh.
1506		 */
1507		np = VTONFS(dvp);
1508		nfhp = malloc(sizeof (struct nfsfh) +
1509			np->n_fhp->nfh_len, M_NFSFH, M_WAITOK);
1510		nfhp->nfh_len = np->n_fhp->nfh_len;
1511		NFSBCOPY(np->n_fhp->nfh_fh, nfhp->nfh_fh, nfhp->nfh_len);
1512		*nfhpp = nfhp;
1513		return (0);
1514	}
1515	if (NFSHASNFSV4(nmp) && len == 2 &&
1516		name[0] == '.' && name[1] == '.') {
1517		lookupp = 1;
1518		openmode = 0;
1519		NFSCL_REQSTART(nd, NFSPROC_LOOKUPP, dvp, cred);
1520	} else if (openmode != 0) {
1521		NFSCL_REQSTART(nd, NFSPROC_LOOKUPOPEN, dvp, cred);
1522		nfsm_strtom(nd, name, len);
1523	} else {
1524		NFSCL_REQSTART(nd, NFSPROC_LOOKUP, dvp, cred);
1525		(void) nfsm_strtom(nd, name, len);
1526	}
1527	if (nd->nd_flag & ND_NFSV4) {
1528		NFSGETATTR_ATTRBIT(&attrbits);
1529		NFSM_BUILD(tl, u_int32_t *, 2 * NFSX_UNSIGNED);
1530		*tl++ = txdr_unsigned(NFSV4OP_GETFH);
1531		*tl = txdr_unsigned(NFSV4OP_GETATTR);
1532		(void) nfsrv_putattrbit(nd, &attrbits);
1533		if (openmode != 0) {
1534			/* Test for a VREG file. */
1535			NFSZERO_ATTRBIT(&attrbits);
1536			NFSSETBIT_ATTRBIT(&attrbits, NFSATTRBIT_TYPE);
1537			NFSM_BUILD(tl, uint32_t *, NFSX_UNSIGNED);
1538			*tl = txdr_unsigned(NFSV4OP_VERIFY);
1539			nfsrv_putattrbit(nd, &attrbits);
1540			NFSM_BUILD(tl, uint32_t *, 2 * NFSX_UNSIGNED);
1541			*tl++ = txdr_unsigned(NFSX_UNSIGNED);
1542			*tl = vtonfsv34_type(VREG);
1543
1544			/* Attempt the Open for VREG. */
1545			nfscl_filllockowner(NULL, own, F_POSIX);
1546			NFSM_BUILD(tl, uint32_t *, 6 * NFSX_UNSIGNED);
1547			*tl++ = txdr_unsigned(NFSV4OP_OPEN);
1548			*tl++ = 0;		/* seqid, ignored. */
1549			*tl++ = txdr_unsigned(openmode);
1550			*tl++ = txdr_unsigned(NFSV4OPEN_DENYNONE);
1551			*tl++ = 0;		/* ClientID, ignored. */
1552			*tl = 0;
1553			nfsm_strtom(nd, own, NFSV4CL_LOCKNAMELEN);
1554			NFSM_BUILD(tl, uint32_t *, 2 * NFSX_UNSIGNED);
1555			*tl++ = txdr_unsigned(NFSV4OPEN_NOCREATE);
1556			*tl = txdr_unsigned(NFSV4OPEN_CLAIMFH);
1557		}
1558	}
1559	error = nfscl_request(nd, dvp, p, cred);
1560	if (error)
1561		return (error);
1562	ndp = NULL;
1563	if (nd->nd_repstat) {
1564		/*
1565		 * When an NFSv4 Lookupp returns ENOENT, it means that
1566		 * the lookup is at the root of an fs, so return this dir.
1567		 */
1568		if (nd->nd_repstat == NFSERR_NOENT && lookupp) {
1569		    np = VTONFS(dvp);
1570		    nfhp = malloc(sizeof (struct nfsfh) +
1571			np->n_fhp->nfh_len, M_NFSFH, M_WAITOK);
1572		    nfhp->nfh_len = np->n_fhp->nfh_len;
1573		    NFSBCOPY(np->n_fhp->nfh_fh, nfhp->nfh_fh, nfhp->nfh_len);
1574		    *nfhpp = nfhp;
1575		    m_freem(nd->nd_mrep);
1576		    return (0);
1577		}
1578		if (nd->nd_flag & ND_NFSV3)
1579		    error = nfscl_postop_attr(nd, dnap, dattrflagp);
1580		else if ((nd->nd_flag & (ND_NFSV4 | ND_NOMOREDATA)) ==
1581		    ND_NFSV4) {
1582			/* Load the directory attributes. */
1583			error = nfsm_loadattr(nd, dnap);
1584			if (error != 0)
1585				goto nfsmout;
1586			*dattrflagp = 1;
1587		}
1588		/* Check Lookup operation reply status. */
1589		if (openmode != 0 && (nd->nd_flag & ND_NOMOREDATA) == 0) {
1590			NFSM_DISSECT(tl, uint32_t *, 2 * NFSX_UNSIGNED);
1591			if (*++tl != 0)
1592				goto nfsmout;
1593		}
1594		/* Look for GetFH reply. */
1595		if (openmode != 0 && (nd->nd_flag & ND_NOMOREDATA) == 0) {
1596			NFSM_DISSECT(tl, uint32_t *, 2 * NFSX_UNSIGNED);
1597			if (*++tl != 0)
1598				goto nfsmout;
1599			error = nfsm_getfh(nd, nfhpp);
1600			if (error)
1601				goto nfsmout;
1602		}
1603		/* Look for Getattr reply. */
1604		if (openmode != 0 && (nd->nd_flag & ND_NOMOREDATA) == 0) {
1605			NFSM_DISSECT(tl, uint32_t *, 2 * NFSX_UNSIGNED);
1606			if (*++tl != 0)
1607				goto nfsmout;
1608			error = nfsm_loadattr(nd, nap);
1609			if (error == 0) {
1610				/*
1611				 * We have now successfully completed the
1612				 * lookup, so set nd_repstat to 0.
1613				 */
1614				nd->nd_repstat = 0;
1615				*attrflagp = 1;
1616			}
1617		}
1618		goto nfsmout;
1619	}
1620	if ((nd->nd_flag & (ND_NFSV4 | ND_NOMOREDATA)) == ND_NFSV4) {
1621		/* Load the directory attributes. */
1622		error = nfsm_loadattr(nd, dnap);
1623		if (error != 0)
1624			goto nfsmout;
1625		*dattrflagp = 1;
1626		/* Skip over the Lookup and GetFH operation status values. */
1627		NFSM_DISSECT(tl, u_int32_t *, 4 * NFSX_UNSIGNED);
1628	}
1629	error = nfsm_getfh(nd, nfhpp);
1630	if (error)
1631		goto nfsmout;
1632
1633	error = nfscl_postop_attr(nd, nap, attrflagp);
1634	if (openmode != 0 && error == 0) {
1635		NFSM_DISSECT(tl, uint32_t *, NFSX_STATEID +
1636		    10 * NFSX_UNSIGNED);
1637		tl += 4;	/* Skip over Verify+Open status. */
1638		stateid.seqid = *tl++;
1639		stateid.other[0] = *tl++;
1640		stateid.other[1] = *tl++;
1641		stateid.other[2] = *tl;
1642		rflags = fxdr_unsigned(uint32_t, *(tl + 6));
1643		error = nfsrv_getattrbits(nd, &attrbits, NULL, NULL);
1644		if (error != 0)
1645			goto nfsmout;
1646		NFSM_DISSECT(tl, uint32_t *, NFSX_UNSIGNED);
1647		deleg = fxdr_unsigned(uint32_t, *tl);
1648		if (deleg == NFSV4OPEN_DELEGATEREAD ||
1649		    deleg == NFSV4OPEN_DELEGATEWRITE) {
1650			/*
1651			 * Just need to fill in the fields used by
1652			 * nfscl_trydelegreturn().
1653			 * Mark the mount point as acquiring
1654			 * delegations, so NFSPROC_LOOKUPOPEN will
1655			 * no longer be done.
1656			 */
1657			NFSLOCKMNT(nmp);
1658			nmp->nm_privflag |= NFSMNTP_DELEGISSUED;
1659			NFSUNLOCKMNT(nmp);
1660			ndp = malloc(sizeof(struct nfscldeleg) +
1661			    (*nfhpp)->nfh_len, M_NFSCLDELEG, M_WAITOK);
1662			ndp->nfsdl_fhlen = (*nfhpp)->nfh_len;
1663			NFSBCOPY((*nfhpp)->nfh_fh, ndp->nfsdl_fh,
1664			    ndp->nfsdl_fhlen);
1665			newnfs_copyincred(cred, &ndp->nfsdl_cred);
1666			NFSM_DISSECT(tl, uint32_t *, NFSX_STATEID);
1667			ndp->nfsdl_stateid.seqid = *tl++;
1668			ndp->nfsdl_stateid.other[0] = *tl++;
1669			ndp->nfsdl_stateid.other[1] = *tl++;
1670			ndp->nfsdl_stateid.other[2] = *tl++;
1671		} else if (deleg != NFSV4OPEN_DELEGATENONE) {
1672			error = NFSERR_BADXDR;
1673			goto nfsmout;
1674		}
1675		ret = nfscl_open(dvp, (*nfhpp)->nfh_fh, (*nfhpp)->nfh_len,
1676		    openmode, 0, cred, p, NULL, &op, &newone, &retop, 1, true);
1677		if (ret != 0)
1678			goto nfsmout;
1679		if (newone != 0) {
1680			op->nfso_stateid.seqid = stateid.seqid;
1681			op->nfso_stateid.other[0] = stateid.other[0];
1682			op->nfso_stateid.other[1] = stateid.other[1];
1683			op->nfso_stateid.other[2] = stateid.other[2];
1684			op->nfso_mode = openmode;
1685		} else {
1686			op->nfso_stateid.seqid = stateid.seqid;
1687			if (retop == NFSCLOPEN_DOOPEN)
1688				op->nfso_mode |= openmode;
1689		}
1690		if ((rflags & NFSV4OPEN_LOCKTYPEPOSIX) != 0 ||
1691		    nfscl_assumeposixlocks)
1692			op->nfso_posixlock = 1;
1693		else
1694			op->nfso_posixlock = 0;
1695		nfscl_openrelease(nmp, op, 0, 0);
1696		if (ndp != NULL) {
1697			/*
1698			 * Since we do not have the vnode, we
1699			 * cannot invalidate cached attributes.
1700			 * Just return the delegation.
1701			 */
1702			nfscl_trydelegreturn(ndp, cred, nmp, p);
1703		}
1704	}
1705	if ((nd->nd_flag & ND_NFSV3) && !error)
1706		error = nfscl_postop_attr(nd, dnap, dattrflagp);
1707nfsmout:
1708	m_freem(nd->nd_mrep);
1709	if (!error && nd->nd_repstat)
1710		error = nd->nd_repstat;
1711	free(ndp, M_NFSCLDELEG);
1712	return (error);
1713}
1714
1715/*
1716 * Do a readlink rpc.
1717 */
1718int
1719nfsrpc_readlink(vnode_t vp, struct uio *uiop, struct ucred *cred,
1720    NFSPROC_T *p, struct nfsvattr *nap, int *attrflagp)
1721{
1722	u_int32_t *tl;
1723	struct nfsrv_descript nfsd, *nd = &nfsd;
1724	struct nfsnode *np = VTONFS(vp);
1725	nfsattrbit_t attrbits;
1726	int error, len, cangetattr = 1;
1727
1728	*attrflagp = 0;
1729	NFSCL_REQSTART(nd, NFSPROC_READLINK, vp, cred);
1730	if (nd->nd_flag & ND_NFSV4) {
1731		/*
1732		 * And do a Getattr op.
1733		 */
1734		NFSM_BUILD(tl, u_int32_t *, NFSX_UNSIGNED);
1735		*tl = txdr_unsigned(NFSV4OP_GETATTR);
1736		NFSGETATTR_ATTRBIT(&attrbits);
1737		(void) nfsrv_putattrbit(nd, &attrbits);
1738	}
1739	error = nfscl_request(nd, vp, p, cred);
1740	if (error)
1741		return (error);
1742	if (nd->nd_flag & ND_NFSV3)
1743		error = nfscl_postop_attr(nd, nap, attrflagp);
1744	if (!nd->nd_repstat && !error) {
1745		NFSM_STRSIZ(len, NFS_MAXPATHLEN);
1746		/*
1747		 * This seems weird to me, but must have been added to
1748		 * FreeBSD for some reason. The only thing I can think of
1749		 * is that there was/is some server that replies with
1750		 * more link data than it should?
1751		 */
1752		if (len == NFS_MAXPATHLEN) {
1753			NFSLOCKNODE(np);
1754			if (np->n_size > 0 && np->n_size < NFS_MAXPATHLEN) {
1755				len = np->n_size;
1756				cangetattr = 0;
1757			}
1758			NFSUNLOCKNODE(np);
1759		}
1760		error = nfsm_mbufuio(nd, uiop, len);
1761		if ((nd->nd_flag & ND_NFSV4) && !error && cangetattr)
1762			error = nfscl_postop_attr(nd, nap, attrflagp);
1763	}
1764	if (nd->nd_repstat && !error)
1765		error = nd->nd_repstat;
1766nfsmout:
1767	m_freem(nd->nd_mrep);
1768	return (error);
1769}
1770
1771/*
1772 * Read operation.
1773 */
1774int
1775nfsrpc_read(vnode_t vp, struct uio *uiop, struct ucred *cred,
1776    NFSPROC_T *p, struct nfsvattr *nap, int *attrflagp)
1777{
1778	int error, expireret = 0, retrycnt;
1779	u_int32_t clidrev = 0;
1780	struct nfsmount *nmp = VFSTONFS(vp->v_mount);
1781	struct nfsnode *np = VTONFS(vp);
1782	struct ucred *newcred;
1783	struct nfsfh *nfhp = NULL;
1784	nfsv4stateid_t stateid;
1785	void *lckp;
1786
1787	if (nmp->nm_clp != NULL)
1788		clidrev = nmp->nm_clp->nfsc_clientidrev;
1789	newcred = cred;
1790	if (NFSHASNFSV4(nmp)) {
1791		nfhp = np->n_fhp;
1792		newcred = NFSNEWCRED(cred);
1793	}
1794	retrycnt = 0;
1795	do {
1796		lckp = NULL;
1797		if (NFSHASNFSV4(nmp))
1798			(void)nfscl_getstateid(vp, nfhp->nfh_fh, nfhp->nfh_len,
1799			    NFSV4OPEN_ACCESSREAD, 0, newcred, p, &stateid,
1800			    &lckp);
1801		error = nfsrpc_readrpc(vp, uiop, newcred, &stateid, p, nap,
1802		    attrflagp);
1803		if (error == NFSERR_OPENMODE) {
1804			NFSLOCKMNT(nmp);
1805			nmp->nm_state |= NFSSTA_OPENMODE;
1806			NFSUNLOCKMNT(nmp);
1807		}
1808		if (error == NFSERR_STALESTATEID)
1809			nfscl_initiate_recovery(nmp->nm_clp);
1810		if (lckp != NULL)
1811			nfscl_lockderef(lckp);
1812		if (error == NFSERR_GRACE || error == NFSERR_STALESTATEID ||
1813		    error == NFSERR_STALEDONTRECOVER || error == NFSERR_DELAY ||
1814		    error == NFSERR_OLDSTATEID || error == NFSERR_BADSESSION) {
1815			(void) nfs_catnap(PZERO, error, "nfs_read");
1816		} else if ((error == NFSERR_EXPIRED ||
1817		    ((!NFSHASINT(nmp) || !NFSHASNFSV4N(nmp)) &&
1818		    error == NFSERR_BADSTATEID)) && clidrev != 0) {
1819			expireret = nfscl_hasexpired(nmp->nm_clp, clidrev, p);
1820		} else if (error == NFSERR_BADSTATEID && NFSHASINT(nmp) &&
1821		    NFSHASNFSV4N(nmp)) {
1822			error = EIO;
1823		}
1824		retrycnt++;
1825	} while (error == NFSERR_GRACE || error == NFSERR_STALESTATEID ||
1826	    error == NFSERR_STALEDONTRECOVER || error == NFSERR_DELAY ||
1827	    error == NFSERR_BADSESSION ||
1828	    (error == NFSERR_OLDSTATEID && retrycnt < 20) ||
1829	    ((error == NFSERR_EXPIRED || error == NFSERR_BADSTATEID) &&
1830	     expireret == 0 && clidrev != 0 && retrycnt < 4) ||
1831	    (error == NFSERR_OPENMODE && retrycnt < 4));
1832	if (error && retrycnt >= 4)
1833		error = EIO;
1834	if (NFSHASNFSV4(nmp))
1835		NFSFREECRED(newcred);
1836	return (error);
1837}
1838
1839/*
1840 * The actual read RPC.
1841 */
1842static int
1843nfsrpc_readrpc(vnode_t vp, struct uio *uiop, struct ucred *cred,
1844    nfsv4stateid_t *stateidp, NFSPROC_T *p, struct nfsvattr *nap,
1845    int *attrflagp)
1846{
1847	u_int32_t *tl;
1848	int error = 0, len, retlen, tsiz, eof = 0;
1849	struct nfsrv_descript nfsd;
1850	struct nfsmount *nmp = VFSTONFS(vp->v_mount);
1851	struct nfsrv_descript *nd = &nfsd;
1852	int rsize;
1853	off_t tmp_off;
1854
1855	*attrflagp = 0;
1856	tsiz = uiop->uio_resid;
1857	tmp_off = uiop->uio_offset + tsiz;
1858	NFSLOCKMNT(nmp);
1859	if (tmp_off > nmp->nm_maxfilesize || tmp_off < uiop->uio_offset) {
1860		NFSUNLOCKMNT(nmp);
1861		return (EFBIG);
1862	}
1863	rsize = nmp->nm_rsize;
1864	NFSUNLOCKMNT(nmp);
1865	nd->nd_mrep = NULL;
1866	while (tsiz > 0) {
1867		*attrflagp = 0;
1868		len = (tsiz > rsize) ? rsize : tsiz;
1869		NFSCL_REQSTART(nd, NFSPROC_READ, vp, cred);
1870		if (nd->nd_flag & ND_NFSV4)
1871			nfsm_stateidtom(nd, stateidp, NFSSTATEID_PUTSTATEID);
1872		NFSM_BUILD(tl, u_int32_t *, NFSX_UNSIGNED * 3);
1873		if (nd->nd_flag & ND_NFSV2) {
1874			*tl++ = txdr_unsigned(uiop->uio_offset);
1875			*tl++ = txdr_unsigned(len);
1876			*tl = 0;
1877		} else {
1878			txdr_hyper(uiop->uio_offset, tl);
1879			*(tl + 2) = txdr_unsigned(len);
1880		}
1881		/*
1882		 * Since I can't do a Getattr for NFSv4 for Write, there
1883		 * doesn't seem any point in doing one here, either.
1884		 * (See the comment in nfsrpc_writerpc() for more info.)
1885		 */
1886		error = nfscl_request(nd, vp, p, cred);
1887		if (error)
1888			return (error);
1889		if (nd->nd_flag & ND_NFSV3) {
1890			error = nfscl_postop_attr(nd, nap, attrflagp);
1891		} else if (!nd->nd_repstat && (nd->nd_flag & ND_NFSV2)) {
1892			error = nfsm_loadattr(nd, nap);
1893			if (!error)
1894				*attrflagp = 1;
1895		}
1896		if (nd->nd_repstat || error) {
1897			if (!error)
1898				error = nd->nd_repstat;
1899			goto nfsmout;
1900		}
1901		if (nd->nd_flag & ND_NFSV3) {
1902			NFSM_DISSECT(tl, u_int32_t *, 2 * NFSX_UNSIGNED);
1903			eof = fxdr_unsigned(int, *(tl + 1));
1904		} else if (nd->nd_flag & ND_NFSV4) {
1905			NFSM_DISSECT(tl, u_int32_t *, NFSX_UNSIGNED);
1906			eof = fxdr_unsigned(int, *tl);
1907		}
1908		NFSM_STRSIZ(retlen, len);
1909		error = nfsm_mbufuio(nd, uiop, retlen);
1910		if (error)
1911			goto nfsmout;
1912		m_freem(nd->nd_mrep);
1913		nd->nd_mrep = NULL;
1914		tsiz -= retlen;
1915		if (!(nd->nd_flag & ND_NFSV2)) {
1916			if (eof || retlen == 0)
1917				tsiz = 0;
1918		} else if (retlen < len)
1919			tsiz = 0;
1920	}
1921	return (0);
1922nfsmout:
1923	if (nd->nd_mrep != NULL)
1924		m_freem(nd->nd_mrep);
1925	return (error);
1926}
1927
1928/*
1929 * nfs write operation
1930 * When called_from_strategy != 0, it should return EIO for an error that
1931 * indicates recovery is in progress, so that the buffer will be left
1932 * dirty and be written back to the server later. If it loops around,
1933 * the recovery thread could get stuck waiting for the buffer and recovery
1934 * will then deadlock.
1935 */
1936int
1937nfsrpc_write(vnode_t vp, struct uio *uiop, int *iomode, int *must_commit,
1938    struct ucred *cred, NFSPROC_T *p, struct nfsvattr *nap, int *attrflagp,
1939    int called_from_strategy, int ioflag)
1940{
1941	int error, expireret = 0, retrycnt, nostateid;
1942	u_int32_t clidrev = 0;
1943	struct nfsmount *nmp = VFSTONFS(vp->v_mount);
1944	struct nfsnode *np = VTONFS(vp);
1945	struct ucred *newcred;
1946	struct nfsfh *nfhp = NULL;
1947	nfsv4stateid_t stateid;
1948	void *lckp;
1949
1950	KASSERT(*must_commit >= 0 && *must_commit <= 2,
1951	    ("nfsrpc_write: must_commit out of range=%d", *must_commit));
1952	if (nmp->nm_clp != NULL)
1953		clidrev = nmp->nm_clp->nfsc_clientidrev;
1954	newcred = cred;
1955	if (NFSHASNFSV4(nmp)) {
1956		newcred = NFSNEWCRED(cred);
1957		nfhp = np->n_fhp;
1958	}
1959	retrycnt = 0;
1960	do {
1961		lckp = NULL;
1962		nostateid = 0;
1963		if (NFSHASNFSV4(nmp)) {
1964			(void)nfscl_getstateid(vp, nfhp->nfh_fh, nfhp->nfh_len,
1965			    NFSV4OPEN_ACCESSWRITE, 0, newcred, p, &stateid,
1966			    &lckp);
1967			if (stateid.other[0] == 0 && stateid.other[1] == 0 &&
1968			    stateid.other[2] == 0) {
1969				nostateid = 1;
1970				NFSCL_DEBUG(1, "stateid0 in write\n");
1971			}
1972		}
1973
1974		/*
1975		 * If there is no stateid for NFSv4, it means this is an
1976		 * extraneous write after close. Basically a poorly
1977		 * implemented buffer cache. Just don't do the write.
1978		 */
1979		if (nostateid)
1980			error = 0;
1981		else
1982			error = nfsrpc_writerpc(vp, uiop, iomode, must_commit,
1983			    newcred, &stateid, p, nap, attrflagp, ioflag);
1984		if (error == NFSERR_STALESTATEID)
1985			nfscl_initiate_recovery(nmp->nm_clp);
1986		if (lckp != NULL)
1987			nfscl_lockderef(lckp);
1988		if (error == NFSERR_GRACE || error == NFSERR_STALESTATEID ||
1989		    error == NFSERR_STALEDONTRECOVER || error == NFSERR_DELAY ||
1990		    error == NFSERR_OLDSTATEID || error == NFSERR_BADSESSION) {
1991			(void) nfs_catnap(PZERO, error, "nfs_write");
1992		} else if ((error == NFSERR_EXPIRED ||
1993		    ((!NFSHASINT(nmp) || !NFSHASNFSV4N(nmp)) &&
1994		    error == NFSERR_BADSTATEID)) && clidrev != 0) {
1995			expireret = nfscl_hasexpired(nmp->nm_clp, clidrev, p);
1996		} else if (error == NFSERR_BADSTATEID && NFSHASINT(nmp) &&
1997		    NFSHASNFSV4N(nmp)) {
1998			error = EIO;
1999		}
2000		retrycnt++;
2001	} while (error == NFSERR_GRACE || error == NFSERR_DELAY ||
2002	    ((error == NFSERR_STALESTATEID || error == NFSERR_BADSESSION ||
2003	      error == NFSERR_STALEDONTRECOVER) && called_from_strategy == 0) ||
2004	    (error == NFSERR_OLDSTATEID && retrycnt < 20) ||
2005	    ((error == NFSERR_EXPIRED || error == NFSERR_BADSTATEID) &&
2006	     expireret == 0 && clidrev != 0 && retrycnt < 4));
2007	if (error != 0 && (retrycnt >= 4 ||
2008	    ((error == NFSERR_STALESTATEID || error == NFSERR_BADSESSION ||
2009	      error == NFSERR_STALEDONTRECOVER) && called_from_strategy != 0)))
2010		error = EIO;
2011	if (NFSHASNFSV4(nmp))
2012		NFSFREECRED(newcred);
2013	return (error);
2014}
2015
2016/*
2017 * The actual write RPC.
2018 */
2019static int
2020nfsrpc_writerpc(vnode_t vp, struct uio *uiop, int *iomode,
2021    int *must_commit, struct ucred *cred, nfsv4stateid_t *stateidp,
2022    NFSPROC_T *p, struct nfsvattr *nap, int *attrflagp, int ioflag)
2023{
2024	u_int32_t *tl;
2025	struct nfsmount *nmp = VFSTONFS(vp->v_mount);
2026	struct nfsnode *np = VTONFS(vp);
2027	int error = 0, len, rlen, commit, committed = NFSWRITE_FILESYNC;
2028	int wccflag = 0;
2029	int32_t backup;
2030	struct nfsrv_descript *nd;
2031	nfsattrbit_t attrbits;
2032	uint64_t tmp_off;
2033	ssize_t tsiz, wsize;
2034	bool do_append;
2035
2036	KASSERT(uiop->uio_iovcnt == 1, ("nfs: writerpc iovcnt > 1"));
2037	*attrflagp = 0;
2038	tsiz = uiop->uio_resid;
2039	tmp_off = uiop->uio_offset + tsiz;
2040	NFSLOCKMNT(nmp);
2041	if (tmp_off > nmp->nm_maxfilesize || tmp_off < uiop->uio_offset) {
2042		NFSUNLOCKMNT(nmp);
2043		return (EFBIG);
2044	}
2045	wsize = nmp->nm_wsize;
2046	do_append = false;
2047	if ((ioflag & IO_APPEND) != 0 && NFSHASNFSV4(nmp) && !NFSHASPNFS(nmp))
2048		do_append = true;
2049	NFSUNLOCKMNT(nmp);
2050	nd = malloc(sizeof(*nd), M_TEMP, M_WAITOK);
2051	nd->nd_mrep = NULL;	/* NFSv2 sometimes does a write with */
2052	nd->nd_repstat = 0;	/* uio_resid == 0, so the while is not done */
2053	while (tsiz > 0) {
2054		*attrflagp = 0;
2055		len = (tsiz > wsize) ? wsize : tsiz;
2056		if (do_append)
2057			NFSCL_REQSTART(nd, NFSPROC_APPENDWRITE, vp, cred);
2058		else
2059			NFSCL_REQSTART(nd, NFSPROC_WRITE, vp, cred);
2060		if (nd->nd_flag & ND_NFSV4) {
2061			if (do_append) {
2062				NFSZERO_ATTRBIT(&attrbits);
2063				NFSSETBIT_ATTRBIT(&attrbits, NFSATTRBIT_SIZE);
2064				nfsrv_putattrbit(nd, &attrbits);
2065				NFSM_BUILD(tl, uint32_t *, 2 * NFSX_UNSIGNED +
2066				    NFSX_HYPER);
2067				*tl++ = txdr_unsigned(NFSX_HYPER);
2068				txdr_hyper(uiop->uio_offset, tl); tl += 2;
2069				*tl = txdr_unsigned(NFSV4OP_WRITE);
2070			}
2071			nfsm_stateidtom(nd, stateidp, NFSSTATEID_PUTSTATEID);
2072			NFSM_BUILD(tl, u_int32_t *, NFSX_HYPER+2*NFSX_UNSIGNED);
2073			txdr_hyper(uiop->uio_offset, tl);
2074			tl += 2;
2075			*tl++ = txdr_unsigned(*iomode);
2076			*tl = txdr_unsigned(len);
2077		} else if (nd->nd_flag & ND_NFSV3) {
2078			NFSM_BUILD(tl, u_int32_t *, NFSX_HYPER+3*NFSX_UNSIGNED);
2079			txdr_hyper(uiop->uio_offset, tl);
2080			tl += 2;
2081			*tl++ = txdr_unsigned(len);
2082			*tl++ = txdr_unsigned(*iomode);
2083			*tl = txdr_unsigned(len);
2084		} else {
2085			u_int32_t x;
2086
2087			NFSM_BUILD(tl, u_int32_t *, 4 * NFSX_UNSIGNED);
2088			/*
2089			 * Not sure why someone changed this, since the
2090			 * RFC clearly states that "beginoffset" and
2091			 * "totalcount" are ignored, but it wouldn't
2092			 * surprise me if there's a busted server out there.
2093			 */
2094			/* Set both "begin" and "current" to non-garbage. */
2095			x = txdr_unsigned((u_int32_t)uiop->uio_offset);
2096			*tl++ = x;      /* "begin offset" */
2097			*tl++ = x;      /* "current offset" */
2098			x = txdr_unsigned(len);
2099			*tl++ = x;      /* total to this offset */
2100			*tl = x;        /* size of this write */
2101		}
2102		error = nfsm_uiombuf(nd, uiop, len);
2103		if (error != 0) {
2104			m_freem(nd->nd_mreq);
2105			free(nd, M_TEMP);
2106			return (error);
2107		}
2108		/*
2109		 * Although it is tempting to do a normal Getattr Op in the
2110		 * NFSv4 compound, the result can be a nearly hung client
2111		 * system if the Getattr asks for Owner and/or OwnerGroup.
2112		 * It occurs when the client can't map either the Owner or
2113		 * Owner_group name in the Getattr reply to a uid/gid. When
2114		 * there is a cache miss, the kernel does an upcall to the
2115		 * nfsuserd. Then, it can try and read the local /etc/passwd
2116		 * or /etc/group file. It can then block in getnewbuf(),
2117		 * waiting for dirty writes to be pushed to the NFS server.
2118		 * The only reason this doesn't result in a complete
2119		 * deadlock, is that the upcall times out and allows
2120		 * the write to complete. However, progress is so slow
2121		 * that it might just as well be deadlocked.
2122		 * As such, we get the rest of the attributes, but not
2123		 * Owner or Owner_group.
2124		 * nb: nfscl_loadattrcache() needs to be told that these
2125		 *     partial attributes from a write rpc are being
2126		 *     passed in, via a argument flag.
2127		 */
2128		if (nd->nd_flag & ND_NFSV4) {
2129			NFSWRITEGETATTR_ATTRBIT(&attrbits);
2130			NFSM_BUILD(tl, u_int32_t *, NFSX_UNSIGNED);
2131			*tl = txdr_unsigned(NFSV4OP_GETATTR);
2132			(void) nfsrv_putattrbit(nd, &attrbits);
2133		}
2134		error = nfscl_request(nd, vp, p, cred);
2135		if (error) {
2136			free(nd, M_TEMP);
2137			return (error);
2138		}
2139		if (nd->nd_repstat) {
2140			/*
2141			 * In case the rpc gets retried, roll
2142			 * the uio fields changed by nfsm_uiombuf()
2143			 * back.
2144			 */
2145			uiop->uio_offset -= len;
2146			uiop->uio_resid += len;
2147			uiop->uio_iov->iov_base =
2148			    (char *)uiop->uio_iov->iov_base - len;
2149			uiop->uio_iov->iov_len += len;
2150		}
2151		if (nd->nd_flag & (ND_NFSV3 | ND_NFSV4)) {
2152			error = nfscl_wcc_data(nd, vp, nap, attrflagp,
2153			    &wccflag, &tmp_off);
2154			if (error)
2155				goto nfsmout;
2156		}
2157		if ((nd->nd_flag & (ND_NFSV4 | ND_NOMOREDATA)) ==
2158		    (ND_NFSV4 | ND_NOMOREDATA) &&
2159		    nd->nd_repstat == NFSERR_NOTSAME && do_append) {
2160			/*
2161			 * Verify of the file's size failed, so redo the
2162			 * write using the file's size as returned in
2163			 * the wcc attributes.
2164			 */
2165			if (tmp_off + tsiz <= nmp->nm_maxfilesize) {
2166				do_append = false;
2167				uiop->uio_offset = tmp_off;
2168				m_freem(nd->nd_mrep);
2169				nd->nd_mrep = NULL;
2170				continue;
2171			} else
2172				nd->nd_repstat = EFBIG;
2173		}
2174		if (!nd->nd_repstat) {
2175			if (do_append) {
2176				/* Strip off the Write reply status. */
2177				do_append = false;
2178				NFSM_DISSECT(tl, uint32_t *, 2 * NFSX_UNSIGNED);
2179			}
2180			if (nd->nd_flag & (ND_NFSV3 | ND_NFSV4)) {
2181				NFSM_DISSECT(tl, u_int32_t *, 2 * NFSX_UNSIGNED
2182					+ NFSX_VERF);
2183				rlen = fxdr_unsigned(int, *tl++);
2184				if (rlen == 0) {
2185					error = NFSERR_IO;
2186					goto nfsmout;
2187				} else if (rlen < len) {
2188					backup = len - rlen;
2189					uiop->uio_iov->iov_base =
2190					    (char *)uiop->uio_iov->iov_base -
2191					    backup;
2192					uiop->uio_iov->iov_len += backup;
2193					uiop->uio_offset -= backup;
2194					uiop->uio_resid += backup;
2195					len = rlen;
2196				}
2197				commit = fxdr_unsigned(int, *tl++);
2198
2199				/*
2200				 * Return the lowest commitment level
2201				 * obtained by any of the RPCs.
2202				 */
2203				if (committed == NFSWRITE_FILESYNC)
2204					committed = commit;
2205				else if (committed == NFSWRITE_DATASYNC &&
2206					commit == NFSWRITE_UNSTABLE)
2207					committed = commit;
2208				NFSLOCKMNT(nmp);
2209				if (!NFSHASWRITEVERF(nmp)) {
2210					NFSBCOPY((caddr_t)tl,
2211					    (caddr_t)&nmp->nm_verf[0],
2212					    NFSX_VERF);
2213					NFSSETWRITEVERF(nmp);
2214	    			} else if (NFSBCMP(tl, nmp->nm_verf,
2215				    NFSX_VERF) && *must_commit != 2) {
2216					*must_commit = 1;
2217					NFSBCOPY(tl, nmp->nm_verf, NFSX_VERF);
2218				}
2219				NFSUNLOCKMNT(nmp);
2220			}
2221			if (nd->nd_flag & ND_NFSV4)
2222				NFSM_DISSECT(tl, u_int32_t *, 2 * NFSX_UNSIGNED);
2223			if (nd->nd_flag & (ND_NFSV2 | ND_NFSV4)) {
2224				error = nfsm_loadattr(nd, nap);
2225				if (!error)
2226					*attrflagp = NFS_LATTR_NOSHRINK;
2227			}
2228		} else {
2229			error = nd->nd_repstat;
2230		}
2231		if (error)
2232			goto nfsmout;
2233		NFSWRITERPC_SETTIME(wccflag, np, nap, (nd->nd_flag & ND_NFSV4));
2234		m_freem(nd->nd_mrep);
2235		nd->nd_mrep = NULL;
2236		tsiz -= len;
2237	}
2238nfsmout:
2239	if (nd->nd_mrep != NULL)
2240		m_freem(nd->nd_mrep);
2241	*iomode = committed;
2242	if (nd->nd_repstat && !error)
2243		error = nd->nd_repstat;
2244	free(nd, M_TEMP);
2245	return (error);
2246}
2247
2248/*
2249 * Do an nfs deallocate operation.
2250 */
2251int
2252nfsrpc_deallocate(vnode_t vp, off_t offs, off_t len, struct nfsvattr *nap,
2253    int *attrflagp, struct ucred *cred, NFSPROC_T *p)
2254{
2255	int error, expireret = 0, openerr, retrycnt;
2256	uint32_t clidrev = 0;
2257	struct nfsmount *nmp = VFSTONFS(vp->v_mount);
2258	struct nfsfh *nfhp;
2259	nfsv4stateid_t stateid;
2260	void *lckp;
2261
2262	if (nmp->nm_clp != NULL)
2263		clidrev = nmp->nm_clp->nfsc_clientidrev;
2264	retrycnt = 0;
2265	do {
2266		lckp = NULL;
2267		openerr = 1;
2268		nfhp = VTONFS(vp)->n_fhp;
2269		error = nfscl_getstateid(vp, nfhp->nfh_fh, nfhp->nfh_len,
2270		    NFSV4OPEN_ACCESSWRITE, 0, cred, p, &stateid, &lckp);
2271		if (error != 0) {
2272			/*
2273			 * No Open stateid, so try and open the file
2274			 * now.
2275			 */
2276			openerr = nfsrpc_open(vp, FWRITE, cred, p);
2277			if (openerr == 0)
2278				nfscl_getstateid(vp, nfhp->nfh_fh,
2279				    nfhp->nfh_len, NFSV4OPEN_ACCESSWRITE, 0,
2280				    cred, p, &stateid, &lckp);
2281		}
2282		error = nfsrpc_deallocaterpc(vp, offs, len, &stateid, nap,
2283		    attrflagp, cred, p);
2284		if (error == NFSERR_STALESTATEID)
2285			nfscl_initiate_recovery(nmp->nm_clp);
2286		if (lckp != NULL)
2287			nfscl_lockderef(lckp);
2288		if (openerr == 0)
2289			nfsrpc_close(vp, 0, p);
2290		if (error == NFSERR_GRACE || error == NFSERR_STALESTATEID ||
2291		    error == NFSERR_STALEDONTRECOVER || error == NFSERR_DELAY ||
2292		    error == NFSERR_OLDSTATEID || error == NFSERR_BADSESSION) {
2293			(void) nfs_catnap(PZERO, error, "nfs_deallocate");
2294		} else if ((error == NFSERR_EXPIRED || (!NFSHASINT(nmp) &&
2295		    error == NFSERR_BADSTATEID)) && clidrev != 0) {
2296			expireret = nfscl_hasexpired(nmp->nm_clp, clidrev, p);
2297		} else if (error == NFSERR_BADSTATEID && NFSHASINT(nmp)) {
2298			error = EIO;
2299		}
2300		retrycnt++;
2301	} while (error == NFSERR_GRACE || error == NFSERR_STALESTATEID ||
2302	    error == NFSERR_STALEDONTRECOVER || error == NFSERR_DELAY ||
2303	    error == NFSERR_BADSESSION ||
2304	    (error == NFSERR_OLDSTATEID && retrycnt < 20) ||
2305	    ((error == NFSERR_EXPIRED || error == NFSERR_BADSTATEID) &&
2306	     expireret == 0 && clidrev != 0 && retrycnt < 4));
2307	if (error && retrycnt >= 4)
2308		error = EIO;
2309	return (error);
2310}
2311
2312/*
2313 * The actual deallocate RPC.
2314 */
2315static int
2316nfsrpc_deallocaterpc(vnode_t vp, off_t offs, off_t len,
2317    nfsv4stateid_t *stateidp, struct nfsvattr *nap, int *attrflagp,
2318    struct ucred *cred, NFSPROC_T *p)
2319{
2320	uint32_t *tl;
2321	struct nfsnode *np = VTONFS(vp);
2322	int error, wccflag;
2323	struct nfsrv_descript nfsd;
2324	struct nfsrv_descript *nd = &nfsd;
2325	nfsattrbit_t attrbits;
2326
2327	*attrflagp = 0;
2328	NFSCL_REQSTART(nd, NFSPROC_DEALLOCATE, vp, cred);
2329	nfsm_stateidtom(nd, stateidp, NFSSTATEID_PUTSTATEID);
2330	NFSM_BUILD(tl, uint32_t *, 2 * NFSX_HYPER);
2331	txdr_hyper(offs, tl);
2332	tl += 2;
2333	txdr_hyper(len, tl);
2334	NFSWRITEGETATTR_ATTRBIT(&attrbits);
2335	NFSM_BUILD(tl, uint32_t *, NFSX_UNSIGNED);
2336	*tl = txdr_unsigned(NFSV4OP_GETATTR);
2337	nfsrv_putattrbit(nd, &attrbits);
2338	error = nfscl_request(nd, vp, p, cred);
2339	if (error != 0)
2340		return (error);
2341	wccflag = 0;
2342	error = nfscl_wcc_data(nd, vp, nap, attrflagp, &wccflag, NULL);
2343	if (error != 0)
2344		goto nfsmout;
2345	if (nd->nd_repstat == 0) {
2346		NFSM_DISSECT(tl, uint32_t *, 2 * NFSX_UNSIGNED);
2347		error = nfsm_loadattr(nd, nap);
2348		if (error != 0)
2349			goto nfsmout;
2350		*attrflagp = NFS_LATTR_NOSHRINK;
2351	}
2352	NFSWRITERPC_SETTIME(wccflag, np, nap, 1);
2353nfsmout:
2354	m_freem(nd->nd_mrep);
2355	if (nd->nd_repstat != 0 && error == 0)
2356		error = nd->nd_repstat;
2357	return (error);
2358}
2359
2360/*
2361 * nfs mknod rpc
2362 * For NFS v2 this is a kludge. Use a create rpc but with the IFMT bits of the
2363 * mode set to specify the file type and the size field for rdev.
2364 */
2365int
2366nfsrpc_mknod(vnode_t dvp, char *name, int namelen, struct vattr *vap,
2367    u_int32_t rdev, __enum_uint8(vtype) vtyp, struct ucred *cred, NFSPROC_T *p,
2368    struct nfsvattr *dnap, struct nfsvattr *nnap, struct nfsfh **nfhpp,
2369    int *attrflagp, int *dattrflagp)
2370{
2371	u_int32_t *tl;
2372	int error = 0;
2373	struct nfsrv_descript nfsd, *nd = &nfsd;
2374	nfsattrbit_t attrbits;
2375
2376	*nfhpp = NULL;
2377	*attrflagp = 0;
2378	*dattrflagp = 0;
2379	if (namelen > NFS_MAXNAMLEN)
2380		return (ENAMETOOLONG);
2381	NFSCL_REQSTART(nd, NFSPROC_MKNOD, dvp, cred);
2382	if (nd->nd_flag & ND_NFSV4) {
2383		if (vtyp == VBLK || vtyp == VCHR) {
2384			NFSM_BUILD(tl, u_int32_t *, 3 * NFSX_UNSIGNED);
2385			*tl++ = vtonfsv34_type(vtyp);
2386			*tl++ = txdr_unsigned(NFSMAJOR(rdev));
2387			*tl = txdr_unsigned(NFSMINOR(rdev));
2388		} else {
2389			NFSM_BUILD(tl, u_int32_t *, NFSX_UNSIGNED);
2390			*tl = vtonfsv34_type(vtyp);
2391		}
2392	}
2393	(void) nfsm_strtom(nd, name, namelen);
2394	if (nd->nd_flag & ND_NFSV3) {
2395		NFSM_BUILD(tl, u_int32_t *, NFSX_UNSIGNED);
2396		*tl = vtonfsv34_type(vtyp);
2397	}
2398	if (nd->nd_flag & (ND_NFSV3 | ND_NFSV4))
2399		nfscl_fillsattr(nd, vap, dvp, 0, 0);
2400	if ((nd->nd_flag & ND_NFSV3) &&
2401	    (vtyp == VCHR || vtyp == VBLK)) {
2402		NFSM_BUILD(tl, u_int32_t *, 2 * NFSX_UNSIGNED);
2403		*tl++ = txdr_unsigned(NFSMAJOR(rdev));
2404		*tl = txdr_unsigned(NFSMINOR(rdev));
2405	}
2406	if (nd->nd_flag & ND_NFSV4) {
2407		NFSGETATTR_ATTRBIT(&attrbits);
2408		NFSM_BUILD(tl, u_int32_t *, 2 * NFSX_UNSIGNED);
2409		*tl++ = txdr_unsigned(NFSV4OP_GETFH);
2410		*tl = txdr_unsigned(NFSV4OP_GETATTR);
2411		(void) nfsrv_putattrbit(nd, &attrbits);
2412	}
2413	if (nd->nd_flag & ND_NFSV2)
2414		nfscl_fillsattr(nd, vap, dvp, NFSSATTR_SIZERDEV, rdev);
2415	error = nfscl_request(nd, dvp, p, cred);
2416	if (error)
2417		return (error);
2418	if (nd->nd_flag & ND_NFSV4)
2419		error = nfscl_wcc_data(nd, dvp, dnap, dattrflagp, NULL, NULL);
2420	if (!nd->nd_repstat) {
2421		if (nd->nd_flag & ND_NFSV4) {
2422			NFSM_DISSECT(tl, u_int32_t *, 5 * NFSX_UNSIGNED);
2423			error = nfsrv_getattrbits(nd, &attrbits, NULL, NULL);
2424			if (error)
2425				goto nfsmout;
2426		}
2427		error = nfscl_mtofh(nd, nfhpp, nnap, attrflagp);
2428		if (error)
2429			goto nfsmout;
2430	}
2431	if (nd->nd_flag & ND_NFSV3)
2432		error = nfscl_wcc_data(nd, dvp, dnap, dattrflagp, NULL, NULL);
2433	if (!error && nd->nd_repstat)
2434		error = nd->nd_repstat;
2435nfsmout:
2436	m_freem(nd->nd_mrep);
2437	return (error);
2438}
2439
2440/*
2441 * nfs file create call
2442 * Mostly just call the approriate routine. (I separated out v4, so that
2443 * error recovery wouldn't be as difficult.)
2444 */
2445int
2446nfsrpc_create(vnode_t dvp, char *name, int namelen, struct vattr *vap,
2447    nfsquad_t cverf, int fmode, struct ucred *cred, NFSPROC_T *p,
2448    struct nfsvattr *dnap, struct nfsvattr *nnap, struct nfsfh **nfhpp,
2449    int *attrflagp, int *dattrflagp)
2450{
2451	int error = 0, newone, expireret = 0, retrycnt, unlocked;
2452	struct nfsclowner *owp;
2453	struct nfscldeleg *dp;
2454	struct nfsmount *nmp = VFSTONFS(dvp->v_mount);
2455	u_int32_t clidrev;
2456
2457	if (NFSHASNFSV4(nmp)) {
2458	    retrycnt = 0;
2459	    do {
2460		dp = NULL;
2461		error = nfscl_open(dvp, NULL, 0, (NFSV4OPEN_ACCESSWRITE |
2462		    NFSV4OPEN_ACCESSREAD), 0, cred, p, &owp, NULL, &newone,
2463		    NULL, 1, true);
2464		if (error)
2465			return (error);
2466		if (nmp->nm_clp != NULL)
2467			clidrev = nmp->nm_clp->nfsc_clientidrev;
2468		else
2469			clidrev = 0;
2470		if (!NFSHASPNFS(nmp) || nfscl_enablecallb == 0 ||
2471		    nfs_numnfscbd == 0 || retrycnt > 0)
2472			error = nfsrpc_createv4(dvp, name, namelen, vap, cverf,
2473			  fmode, owp, &dp, cred, p, dnap, nnap, nfhpp,
2474			  attrflagp, dattrflagp, &unlocked);
2475		else
2476			error = nfsrpc_getcreatelayout(dvp, name, namelen, vap,
2477			  cverf, fmode, owp, &dp, cred, p, dnap, nnap, nfhpp,
2478			  attrflagp, dattrflagp, &unlocked);
2479		/*
2480		 * There is no need to invalidate cached attributes here,
2481		 * since new post-delegation issue attributes are always
2482		 * returned by nfsrpc_createv4() and these will update the
2483		 * attribute cache.
2484		 */
2485		if (dp != NULL)
2486			(void) nfscl_deleg(nmp->nm_mountp, owp->nfsow_clp,
2487			    (*nfhpp)->nfh_fh, (*nfhpp)->nfh_len, cred, p, &dp);
2488		nfscl_ownerrelease(nmp, owp, error, newone, unlocked);
2489		if (error == NFSERR_GRACE || error == NFSERR_STALECLIENTID ||
2490		    error == NFSERR_STALEDONTRECOVER || error == NFSERR_DELAY ||
2491		    error == NFSERR_BADSESSION) {
2492			(void) nfs_catnap(PZERO, error, "nfs_open");
2493		} else if ((error == NFSERR_EXPIRED ||
2494		    error == NFSERR_BADSTATEID) && clidrev != 0) {
2495			expireret = nfscl_hasexpired(nmp->nm_clp, clidrev, p);
2496			retrycnt++;
2497		}
2498	    } while (error == NFSERR_GRACE || error == NFSERR_STALECLIENTID ||
2499		error == NFSERR_STALEDONTRECOVER || error == NFSERR_DELAY ||
2500		error == NFSERR_BADSESSION ||
2501		((error == NFSERR_EXPIRED || error == NFSERR_BADSTATEID) &&
2502		 expireret == 0 && clidrev != 0 && retrycnt < 4));
2503	    if (error && retrycnt >= 4)
2504		    error = EIO;
2505	} else {
2506		error = nfsrpc_createv23(dvp, name, namelen, vap, cverf,
2507		    fmode, cred, p, dnap, nnap, nfhpp, attrflagp, dattrflagp);
2508	}
2509	return (error);
2510}
2511
2512/*
2513 * The create rpc for v2 and 3.
2514 */
2515static int
2516nfsrpc_createv23(vnode_t dvp, char *name, int namelen, struct vattr *vap,
2517    nfsquad_t cverf, int fmode, struct ucred *cred, NFSPROC_T *p,
2518    struct nfsvattr *dnap, struct nfsvattr *nnap, struct nfsfh **nfhpp,
2519    int *attrflagp, int *dattrflagp)
2520{
2521	u_int32_t *tl;
2522	int error = 0;
2523	struct nfsrv_descript nfsd, *nd = &nfsd;
2524
2525	*nfhpp = NULL;
2526	*attrflagp = 0;
2527	*dattrflagp = 0;
2528	if (namelen > NFS_MAXNAMLEN)
2529		return (ENAMETOOLONG);
2530	NFSCL_REQSTART(nd, NFSPROC_CREATE, dvp, cred);
2531	(void) nfsm_strtom(nd, name, namelen);
2532	if (nd->nd_flag & ND_NFSV3) {
2533		NFSM_BUILD(tl, u_int32_t *, NFSX_UNSIGNED);
2534		if (fmode & O_EXCL) {
2535			*tl = txdr_unsigned(NFSCREATE_EXCLUSIVE);
2536			NFSM_BUILD(tl, u_int32_t *, NFSX_VERF);
2537			*tl++ = cverf.lval[0];
2538			*tl = cverf.lval[1];
2539		} else {
2540			*tl = txdr_unsigned(NFSCREATE_UNCHECKED);
2541			nfscl_fillsattr(nd, vap, dvp, 0, 0);
2542		}
2543	} else {
2544		nfscl_fillsattr(nd, vap, dvp, NFSSATTR_SIZE0, 0);
2545	}
2546	error = nfscl_request(nd, dvp, p, cred);
2547	if (error)
2548		return (error);
2549	if (nd->nd_repstat == 0) {
2550		error = nfscl_mtofh(nd, nfhpp, nnap, attrflagp);
2551		if (error)
2552			goto nfsmout;
2553	}
2554	if (nd->nd_flag & ND_NFSV3)
2555		error = nfscl_wcc_data(nd, dvp, dnap, dattrflagp, NULL, NULL);
2556	if (nd->nd_repstat != 0 && error == 0)
2557		error = nd->nd_repstat;
2558nfsmout:
2559	m_freem(nd->nd_mrep);
2560	return (error);
2561}
2562
2563static int
2564nfsrpc_createv4(vnode_t dvp, char *name, int namelen, struct vattr *vap,
2565    nfsquad_t cverf, int fmode, struct nfsclowner *owp, struct nfscldeleg **dpp,
2566    struct ucred *cred, NFSPROC_T *p, struct nfsvattr *dnap,
2567    struct nfsvattr *nnap, struct nfsfh **nfhpp, int *attrflagp,
2568    int *dattrflagp, int *unlockedp)
2569{
2570	u_int32_t *tl;
2571	int error = 0, deleg, newone, ret, acesize, limitby;
2572	struct nfsrv_descript nfsd, *nd = &nfsd;
2573	struct nfsclopen *op;
2574	struct nfscldeleg *dp = NULL;
2575	struct nfsnode *np;
2576	struct nfsfh *nfhp;
2577	nfsattrbit_t attrbits;
2578	nfsv4stateid_t stateid;
2579	u_int32_t rflags;
2580	struct nfsmount *nmp;
2581	struct nfsclsession *tsep;
2582
2583	nmp = VFSTONFS(dvp->v_mount);
2584	np = VTONFS(dvp);
2585	*unlockedp = 0;
2586	*nfhpp = NULL;
2587	*dpp = NULL;
2588	*attrflagp = 0;
2589	*dattrflagp = 0;
2590	if (namelen > NFS_MAXNAMLEN)
2591		return (ENAMETOOLONG);
2592	NFSCL_REQSTART(nd, NFSPROC_CREATE, dvp, cred);
2593	/*
2594	 * For V4, this is actually an Open op.
2595	 */
2596	NFSM_BUILD(tl, u_int32_t *, 5 * NFSX_UNSIGNED);
2597	*tl++ = txdr_unsigned(owp->nfsow_seqid);
2598	*tl++ = txdr_unsigned(NFSV4OPEN_ACCESSWRITE |
2599	    NFSV4OPEN_ACCESSREAD);
2600	*tl++ = txdr_unsigned(NFSV4OPEN_DENYNONE);
2601	tsep = nfsmnt_mdssession(nmp);
2602	*tl++ = tsep->nfsess_clientid.lval[0];
2603	*tl = tsep->nfsess_clientid.lval[1];
2604	(void) nfsm_strtom(nd, owp->nfsow_owner, NFSV4CL_LOCKNAMELEN);
2605	NFSM_BUILD(tl, u_int32_t *, 2 * NFSX_UNSIGNED);
2606	*tl++ = txdr_unsigned(NFSV4OPEN_CREATE);
2607	if (fmode & O_EXCL) {
2608		if (NFSHASNFSV4N(nmp)) {
2609			if (NFSHASSESSPERSIST(nmp)) {
2610				/* Use GUARDED for persistent sessions. */
2611				*tl = txdr_unsigned(NFSCREATE_GUARDED);
2612				nfscl_fillsattr(nd, vap, dvp, 0, 0);
2613			} else {
2614				/* Otherwise, use EXCLUSIVE4_1. */
2615				*tl = txdr_unsigned(NFSCREATE_EXCLUSIVE41);
2616				NFSM_BUILD(tl, u_int32_t *, NFSX_VERF);
2617				*tl++ = cverf.lval[0];
2618				*tl = cverf.lval[1];
2619				nfscl_fillsattr(nd, vap, dvp, 0, 0);
2620			}
2621		} else {
2622			/* NFSv4.0 */
2623			*tl = txdr_unsigned(NFSCREATE_EXCLUSIVE);
2624			NFSM_BUILD(tl, u_int32_t *, NFSX_VERF);
2625			*tl++ = cverf.lval[0];
2626			*tl = cverf.lval[1];
2627		}
2628	} else {
2629		*tl = txdr_unsigned(NFSCREATE_UNCHECKED);
2630		nfscl_fillsattr(nd, vap, dvp, 0, 0);
2631	}
2632	NFSM_BUILD(tl, u_int32_t *, NFSX_UNSIGNED);
2633	*tl = txdr_unsigned(NFSV4OPEN_CLAIMNULL);
2634	(void) nfsm_strtom(nd, name, namelen);
2635	/* Get the new file's handle and attributes. */
2636	NFSM_BUILD(tl, u_int32_t *, 2 * NFSX_UNSIGNED);
2637	*tl++ = txdr_unsigned(NFSV4OP_GETFH);
2638	*tl = txdr_unsigned(NFSV4OP_GETATTR);
2639	NFSGETATTR_ATTRBIT(&attrbits);
2640	(void) nfsrv_putattrbit(nd, &attrbits);
2641	/* Get the directory's post-op attributes. */
2642	NFSM_BUILD(tl, u_int32_t *, NFSX_UNSIGNED);
2643	*tl = txdr_unsigned(NFSV4OP_PUTFH);
2644	(void)nfsm_fhtom(nmp, nd, np->n_fhp->nfh_fh, np->n_fhp->nfh_len, 0);
2645	NFSM_BUILD(tl, u_int32_t *, NFSX_UNSIGNED);
2646	*tl = txdr_unsigned(NFSV4OP_GETATTR);
2647	(void) nfsrv_putattrbit(nd, &attrbits);
2648	error = nfscl_request(nd, dvp, p, cred);
2649	if (error)
2650		return (error);
2651	NFSCL_INCRSEQID(owp->nfsow_seqid, nd);
2652	if (nd->nd_repstat == 0) {
2653		NFSM_DISSECT(tl, u_int32_t *, NFSX_STATEID +
2654		    6 * NFSX_UNSIGNED);
2655		stateid.seqid = *tl++;
2656		stateid.other[0] = *tl++;
2657		stateid.other[1] = *tl++;
2658		stateid.other[2] = *tl;
2659		rflags = fxdr_unsigned(u_int32_t, *(tl + 6));
2660		error = nfsrv_getattrbits(nd, &attrbits, NULL, NULL);
2661		if (error)
2662			goto nfsmout;
2663		NFSM_DISSECT(tl, u_int32_t *, NFSX_UNSIGNED);
2664		deleg = fxdr_unsigned(int, *tl);
2665		if (deleg == NFSV4OPEN_DELEGATEREAD ||
2666		    deleg == NFSV4OPEN_DELEGATEWRITE) {
2667			if (!(owp->nfsow_clp->nfsc_flags &
2668			      NFSCLFLAGS_FIRSTDELEG))
2669				owp->nfsow_clp->nfsc_flags |=
2670				  (NFSCLFLAGS_FIRSTDELEG | NFSCLFLAGS_GOTDELEG);
2671			dp = malloc(
2672			    sizeof (struct nfscldeleg) + NFSX_V4FHMAX,
2673			    M_NFSCLDELEG, M_WAITOK);
2674			LIST_INIT(&dp->nfsdl_owner);
2675			LIST_INIT(&dp->nfsdl_lock);
2676			dp->nfsdl_clp = owp->nfsow_clp;
2677			newnfs_copyincred(cred, &dp->nfsdl_cred);
2678			nfscl_lockinit(&dp->nfsdl_rwlock);
2679			NFSM_DISSECT(tl, u_int32_t *, NFSX_STATEID +
2680			    NFSX_UNSIGNED);
2681			dp->nfsdl_stateid.seqid = *tl++;
2682			dp->nfsdl_stateid.other[0] = *tl++;
2683			dp->nfsdl_stateid.other[1] = *tl++;
2684			dp->nfsdl_stateid.other[2] = *tl++;
2685			ret = fxdr_unsigned(int, *tl);
2686			if (deleg == NFSV4OPEN_DELEGATEWRITE) {
2687				dp->nfsdl_flags = NFSCLDL_WRITE;
2688				/*
2689				 * Indicates how much the file can grow.
2690				 */
2691				NFSM_DISSECT(tl, u_int32_t *,
2692				    3 * NFSX_UNSIGNED);
2693				limitby = fxdr_unsigned(int, *tl++);
2694				switch (limitby) {
2695				case NFSV4OPEN_LIMITSIZE:
2696					dp->nfsdl_sizelimit = fxdr_hyper(tl);
2697					break;
2698				case NFSV4OPEN_LIMITBLOCKS:
2699					dp->nfsdl_sizelimit =
2700					    fxdr_unsigned(u_int64_t, *tl++);
2701					dp->nfsdl_sizelimit *=
2702					    fxdr_unsigned(u_int64_t, *tl);
2703					break;
2704				default:
2705					error = NFSERR_BADXDR;
2706					goto nfsmout;
2707				}
2708			} else {
2709				dp->nfsdl_flags = NFSCLDL_READ;
2710			}
2711			if (ret)
2712				dp->nfsdl_flags |= NFSCLDL_RECALL;
2713			error = nfsrv_dissectace(nd, &dp->nfsdl_ace, false,
2714			    &ret, &acesize, p);
2715			if (error)
2716				goto nfsmout;
2717		} else if (deleg != NFSV4OPEN_DELEGATENONE) {
2718			error = NFSERR_BADXDR;
2719			goto nfsmout;
2720		}
2721		error = nfscl_mtofh(nd, nfhpp, nnap, attrflagp);
2722		if (error)
2723			goto nfsmout;
2724		/* Get rid of the PutFH and Getattr status values. */
2725		NFSM_DISSECT(tl, u_int32_t *, 4 * NFSX_UNSIGNED);
2726		/* Load the directory attributes. */
2727		error = nfsm_loadattr(nd, dnap);
2728		if (error)
2729			goto nfsmout;
2730		*dattrflagp = 1;
2731		if (dp != NULL && *attrflagp) {
2732			dp->nfsdl_change = nnap->na_filerev;
2733			dp->nfsdl_modtime = nnap->na_mtime;
2734			dp->nfsdl_flags |= NFSCLDL_MODTIMESET;
2735		}
2736		/*
2737		 * We can now complete the Open state.
2738		 */
2739		nfhp = *nfhpp;
2740		if (dp != NULL) {
2741			dp->nfsdl_fhlen = nfhp->nfh_len;
2742			NFSBCOPY(nfhp->nfh_fh, dp->nfsdl_fh, nfhp->nfh_len);
2743		}
2744		/*
2745		 * Get an Open structure that will be
2746		 * attached to the OpenOwner, acquired already.
2747		 */
2748		error = nfscl_open(dvp, nfhp->nfh_fh, nfhp->nfh_len,
2749		    (NFSV4OPEN_ACCESSWRITE | NFSV4OPEN_ACCESSREAD), 0,
2750		    cred, p, NULL, &op, &newone, NULL, 0, false);
2751		if (error)
2752			goto nfsmout;
2753		op->nfso_stateid = stateid;
2754		newnfs_copyincred(cred, &op->nfso_cred);
2755		if ((rflags & NFSV4OPEN_RESULTCONFIRM)) {
2756		    do {
2757			ret = nfsrpc_openconfirm(dvp, nfhp->nfh_fh,
2758			    nfhp->nfh_len, op, cred, p);
2759			if (ret == NFSERR_DELAY)
2760			    (void) nfs_catnap(PZERO, ret, "nfs_create");
2761		    } while (ret == NFSERR_DELAY);
2762		    error = ret;
2763		}
2764
2765		/*
2766		 * If the server is handing out delegations, but we didn't
2767		 * get one because an OpenConfirm was required, try the
2768		 * Open again, to get a delegation. This is a harmless no-op,
2769		 * from a server's point of view.
2770		 */
2771		if ((rflags & NFSV4OPEN_RESULTCONFIRM) &&
2772		    (owp->nfsow_clp->nfsc_flags & NFSCLFLAGS_GOTDELEG) &&
2773		    !error && dp == NULL) {
2774		    KASSERT(!NFSHASNFSV4N(nmp),
2775			("nfsrpc_createv4: result confirm"));
2776		    do {
2777			ret = nfsrpc_openrpc(VFSTONFS(dvp->v_mount), dvp,
2778			    np->n_fhp->nfh_fh, np->n_fhp->nfh_len,
2779			    nfhp->nfh_fh, nfhp->nfh_len,
2780			    (NFSV4OPEN_ACCESSWRITE | NFSV4OPEN_ACCESSREAD), op,
2781			    name, namelen, &dp, 0, 0x0, cred, p, 0, 1);
2782			if (ret == NFSERR_DELAY)
2783			    (void) nfs_catnap(PZERO, ret, "nfs_crt2");
2784		    } while (ret == NFSERR_DELAY);
2785		    if (ret) {
2786			if (dp != NULL) {
2787				free(dp, M_NFSCLDELEG);
2788				dp = NULL;
2789			}
2790			if (ret == NFSERR_STALECLIENTID ||
2791			    ret == NFSERR_STALEDONTRECOVER ||
2792			    ret == NFSERR_BADSESSION)
2793				error = ret;
2794		    }
2795		}
2796		nfscl_openrelease(nmp, op, error, newone);
2797		*unlockedp = 1;
2798	}
2799	if (nd->nd_repstat != 0 && error == 0)
2800		error = nd->nd_repstat;
2801	if (error == NFSERR_STALECLIENTID)
2802		nfscl_initiate_recovery(owp->nfsow_clp);
2803nfsmout:
2804	if (!error)
2805		*dpp = dp;
2806	else if (dp != NULL)
2807		free(dp, M_NFSCLDELEG);
2808	m_freem(nd->nd_mrep);
2809	return (error);
2810}
2811
2812/*
2813 * Nfs remove rpc
2814 */
2815int
2816nfsrpc_remove(vnode_t dvp, char *name, int namelen, vnode_t vp,
2817    struct ucred *cred, NFSPROC_T *p, struct nfsvattr *dnap, int *dattrflagp)
2818{
2819	u_int32_t *tl;
2820	struct nfsrv_descript nfsd, *nd = &nfsd;
2821	struct nfsnode *np;
2822	struct nfsmount *nmp;
2823	nfsv4stateid_t dstateid;
2824	int error, ret = 0, i;
2825
2826	*dattrflagp = 0;
2827	if (namelen > NFS_MAXNAMLEN)
2828		return (ENAMETOOLONG);
2829	nmp = VFSTONFS(dvp->v_mount);
2830tryagain:
2831	if (NFSHASNFSV4(nmp) && ret == 0) {
2832		ret = nfscl_removedeleg(vp, p, &dstateid);
2833		if (ret == 1) {
2834			NFSCL_REQSTART(nd, NFSPROC_RETDELEGREMOVE, vp, cred);
2835			NFSM_BUILD(tl, u_int32_t *, NFSX_STATEID +
2836			    NFSX_UNSIGNED);
2837			if (NFSHASNFSV4N(nmp))
2838				*tl++ = 0;
2839			else
2840				*tl++ = dstateid.seqid;
2841			*tl++ = dstateid.other[0];
2842			*tl++ = dstateid.other[1];
2843			*tl++ = dstateid.other[2];
2844			*tl = txdr_unsigned(NFSV4OP_PUTFH);
2845			np = VTONFS(dvp);
2846			(void)nfsm_fhtom(nmp, nd, np->n_fhp->nfh_fh,
2847			    np->n_fhp->nfh_len, 0);
2848			NFSM_BUILD(tl, u_int32_t *, NFSX_UNSIGNED);
2849			*tl = txdr_unsigned(NFSV4OP_REMOVE);
2850		}
2851	} else {
2852		ret = 0;
2853	}
2854	if (ret == 0)
2855		NFSCL_REQSTART(nd, NFSPROC_REMOVE, dvp, cred);
2856	(void) nfsm_strtom(nd, name, namelen);
2857	error = nfscl_request(nd, dvp, p, cred);
2858	if (error)
2859		return (error);
2860	if (nd->nd_flag & (ND_NFSV3 | ND_NFSV4)) {
2861		/* For NFSv4, parse out any Delereturn replies. */
2862		if (ret > 0 && nd->nd_repstat != 0 &&
2863		    (nd->nd_flag & ND_NOMOREDATA)) {
2864			/*
2865			 * If the Delegreturn failed, try again without
2866			 * it. The server will Recall, as required.
2867			 */
2868			m_freem(nd->nd_mrep);
2869			goto tryagain;
2870		}
2871		for (i = 0; i < (ret * 2); i++) {
2872			if ((nd->nd_flag & (ND_NFSV4 | ND_NOMOREDATA)) ==
2873			    ND_NFSV4) {
2874			    NFSM_DISSECT(tl, u_int32_t *, 2 * NFSX_UNSIGNED);
2875			    if (*(tl + 1))
2876				nd->nd_flag |= ND_NOMOREDATA;
2877			}
2878		}
2879		error = nfscl_wcc_data(nd, dvp, dnap, dattrflagp, NULL, NULL);
2880	}
2881	if (nd->nd_repstat && !error)
2882		error = nd->nd_repstat;
2883nfsmout:
2884	m_freem(nd->nd_mrep);
2885	return (error);
2886}
2887
2888/*
2889 * Do an nfs rename rpc.
2890 */
2891int
2892nfsrpc_rename(vnode_t fdvp, vnode_t fvp, char *fnameptr, int fnamelen,
2893    vnode_t tdvp, vnode_t tvp, char *tnameptr, int tnamelen, struct ucred *cred,
2894    NFSPROC_T *p, struct nfsvattr *fnap, struct nfsvattr *tnap,
2895    int *fattrflagp, int *tattrflagp)
2896{
2897	u_int32_t *tl;
2898	struct nfsrv_descript nfsd, *nd = &nfsd;
2899	struct nfsmount *nmp;
2900	struct nfsnode *np;
2901	nfsattrbit_t attrbits;
2902	nfsv4stateid_t fdstateid, tdstateid;
2903	int error = 0, ret = 0, gottd = 0, gotfd = 0, i;
2904
2905	*fattrflagp = 0;
2906	*tattrflagp = 0;
2907	nmp = VFSTONFS(fdvp->v_mount);
2908	if (fnamelen > NFS_MAXNAMLEN || tnamelen > NFS_MAXNAMLEN)
2909		return (ENAMETOOLONG);
2910tryagain:
2911	if (NFSHASNFSV4(nmp) && ret == 0) {
2912		ret = nfscl_renamedeleg(fvp, &fdstateid, &gotfd, tvp,
2913		    &tdstateid, &gottd, p);
2914		if (gotfd && gottd) {
2915			NFSCL_REQSTART(nd, NFSPROC_RETDELEGRENAME2, fvp, cred);
2916		} else if (gotfd) {
2917			NFSCL_REQSTART(nd, NFSPROC_RETDELEGRENAME1, fvp, cred);
2918		} else if (gottd) {
2919			NFSCL_REQSTART(nd, NFSPROC_RETDELEGRENAME1, tvp, cred);
2920		}
2921		if (gotfd) {
2922			NFSM_BUILD(tl, u_int32_t *, NFSX_STATEID);
2923			if (NFSHASNFSV4N(nmp))
2924				*tl++ = 0;
2925			else
2926				*tl++ = fdstateid.seqid;
2927			*tl++ = fdstateid.other[0];
2928			*tl++ = fdstateid.other[1];
2929			*tl = fdstateid.other[2];
2930			if (gottd) {
2931				NFSM_BUILD(tl, u_int32_t *, NFSX_UNSIGNED);
2932				*tl = txdr_unsigned(NFSV4OP_PUTFH);
2933				np = VTONFS(tvp);
2934				(void)nfsm_fhtom(nmp, nd, np->n_fhp->nfh_fh,
2935				    np->n_fhp->nfh_len, 0);
2936				NFSM_BUILD(tl, u_int32_t *, NFSX_UNSIGNED);
2937				*tl = txdr_unsigned(NFSV4OP_DELEGRETURN);
2938			}
2939		}
2940		if (gottd) {
2941			NFSM_BUILD(tl, u_int32_t *, NFSX_STATEID);
2942			if (NFSHASNFSV4N(nmp))
2943				*tl++ = 0;
2944			else
2945				*tl++ = tdstateid.seqid;
2946			*tl++ = tdstateid.other[0];
2947			*tl++ = tdstateid.other[1];
2948			*tl = tdstateid.other[2];
2949		}
2950		if (ret > 0) {
2951			NFSM_BUILD(tl, u_int32_t *, NFSX_UNSIGNED);
2952			*tl = txdr_unsigned(NFSV4OP_PUTFH);
2953			np = VTONFS(fdvp);
2954			(void)nfsm_fhtom(nmp, nd, np->n_fhp->nfh_fh,
2955			    np->n_fhp->nfh_len, 0);
2956			NFSM_BUILD(tl, u_int32_t *, NFSX_UNSIGNED);
2957			*tl = txdr_unsigned(NFSV4OP_SAVEFH);
2958		}
2959	} else {
2960		ret = 0;
2961	}
2962	if (ret == 0)
2963		NFSCL_REQSTART(nd, NFSPROC_RENAME, fdvp, cred);
2964	if (nd->nd_flag & ND_NFSV4) {
2965		NFSM_BUILD(tl, u_int32_t *, NFSX_UNSIGNED);
2966		*tl = txdr_unsigned(NFSV4OP_GETATTR);
2967		NFSWCCATTR_ATTRBIT(&attrbits);
2968		(void) nfsrv_putattrbit(nd, &attrbits);
2969		NFSM_BUILD(tl, u_int32_t *, NFSX_UNSIGNED);
2970		*tl = txdr_unsigned(NFSV4OP_PUTFH);
2971		(void)nfsm_fhtom(nmp, nd, VTONFS(tdvp)->n_fhp->nfh_fh,
2972		    VTONFS(tdvp)->n_fhp->nfh_len, 0);
2973		NFSM_BUILD(tl, u_int32_t *, NFSX_UNSIGNED);
2974		*tl = txdr_unsigned(NFSV4OP_GETATTR);
2975		(void) nfsrv_putattrbit(nd, &attrbits);
2976		nd->nd_flag |= ND_V4WCCATTR;
2977		NFSM_BUILD(tl, u_int32_t *, NFSX_UNSIGNED);
2978		*tl = txdr_unsigned(NFSV4OP_RENAME);
2979	}
2980	(void) nfsm_strtom(nd, fnameptr, fnamelen);
2981	if (!(nd->nd_flag & ND_NFSV4))
2982		(void)nfsm_fhtom(nmp, nd, VTONFS(tdvp)->n_fhp->nfh_fh,
2983			VTONFS(tdvp)->n_fhp->nfh_len, 0);
2984	(void) nfsm_strtom(nd, tnameptr, tnamelen);
2985	error = nfscl_request(nd, fdvp, p, cred);
2986	if (error)
2987		return (error);
2988	if (nd->nd_flag & (ND_NFSV3 | ND_NFSV4)) {
2989		/* For NFSv4, parse out any Delereturn replies. */
2990		if (ret > 0 && nd->nd_repstat != 0 &&
2991		    (nd->nd_flag & ND_NOMOREDATA)) {
2992			/*
2993			 * If the Delegreturn failed, try again without
2994			 * it. The server will Recall, as required.
2995			 */
2996			m_freem(nd->nd_mrep);
2997			goto tryagain;
2998		}
2999		for (i = 0; i < (ret * 2); i++) {
3000			if ((nd->nd_flag & (ND_NFSV4 | ND_NOMOREDATA)) ==
3001			    ND_NFSV4) {
3002			    NFSM_DISSECT(tl, u_int32_t *, 2 * NFSX_UNSIGNED);
3003			    if (*(tl + 1)) {
3004				if (i == 1 && ret > 1) {
3005				    /*
3006				     * If the Delegreturn failed, try again
3007				     * without it. The server will Recall, as
3008				     * required.
3009				     * If ret > 1, the second iteration of this
3010				     * loop is the second DelegReturn result.
3011				     */
3012				    m_freem(nd->nd_mrep);
3013				    goto tryagain;
3014				} else {
3015				    nd->nd_flag |= ND_NOMOREDATA;
3016				}
3017			    }
3018			}
3019		}
3020		/* Now, the first wcc attribute reply. */
3021		if ((nd->nd_flag & (ND_NFSV4 | ND_NOMOREDATA)) == ND_NFSV4) {
3022			NFSM_DISSECT(tl, u_int32_t *, 2 * NFSX_UNSIGNED);
3023			if (*(tl + 1))
3024				nd->nd_flag |= ND_NOMOREDATA;
3025		}
3026		error = nfscl_wcc_data(nd, fdvp, fnap, fattrflagp, NULL, NULL);
3027		/* and the second wcc attribute reply. */
3028		if ((nd->nd_flag & (ND_NFSV4 | ND_NOMOREDATA)) == ND_NFSV4 &&
3029		    !error) {
3030			NFSM_DISSECT(tl, u_int32_t *, 2 * NFSX_UNSIGNED);
3031			if (*(tl + 1))
3032				nd->nd_flag |= ND_NOMOREDATA;
3033		}
3034		if (!error)
3035			error = nfscl_wcc_data(nd, tdvp, tnap, tattrflagp,
3036			    NULL, NULL);
3037	}
3038	if (nd->nd_repstat && !error)
3039		error = nd->nd_repstat;
3040nfsmout:
3041	m_freem(nd->nd_mrep);
3042	return (error);
3043}
3044
3045/*
3046 * nfs hard link create rpc
3047 */
3048int
3049nfsrpc_link(vnode_t dvp, vnode_t vp, char *name, int namelen,
3050    struct ucred *cred, NFSPROC_T *p, struct nfsvattr *dnap,
3051    struct nfsvattr *nap, int *attrflagp, int *dattrflagp)
3052{
3053	u_int32_t *tl;
3054	struct nfsrv_descript nfsd, *nd = &nfsd;
3055	nfsattrbit_t attrbits;
3056	int error = 0;
3057
3058	*attrflagp = 0;
3059	*dattrflagp = 0;
3060	if (namelen > NFS_MAXNAMLEN)
3061		return (ENAMETOOLONG);
3062	NFSCL_REQSTART(nd, NFSPROC_LINK, vp, cred);
3063	if (nd->nd_flag & ND_NFSV4) {
3064		NFSM_BUILD(tl, u_int32_t *, NFSX_UNSIGNED);
3065		*tl = txdr_unsigned(NFSV4OP_PUTFH);
3066	}
3067	(void)nfsm_fhtom(VFSTONFS(dvp->v_mount), nd, VTONFS(dvp)->n_fhp->nfh_fh,
3068		VTONFS(dvp)->n_fhp->nfh_len, 0);
3069	if (nd->nd_flag & ND_NFSV4) {
3070		NFSM_BUILD(tl, u_int32_t *, NFSX_UNSIGNED);
3071		*tl = txdr_unsigned(NFSV4OP_GETATTR);
3072		NFSWCCATTR_ATTRBIT(&attrbits);
3073		(void) nfsrv_putattrbit(nd, &attrbits);
3074		nd->nd_flag |= ND_V4WCCATTR;
3075		NFSM_BUILD(tl, u_int32_t *, NFSX_UNSIGNED);
3076		*tl = txdr_unsigned(NFSV4OP_LINK);
3077	}
3078	(void) nfsm_strtom(nd, name, namelen);
3079	error = nfscl_request(nd, vp, p, cred);
3080	if (error)
3081		return (error);
3082	if (nd->nd_flag & ND_NFSV3) {
3083		error = nfscl_postop_attr(nd, nap, attrflagp);
3084		if (!error)
3085			error = nfscl_wcc_data(nd, dvp, dnap, dattrflagp,
3086			    NULL, NULL);
3087	} else if ((nd->nd_flag & (ND_NFSV4 | ND_NOMOREDATA)) == ND_NFSV4) {
3088		/*
3089		 * First, parse out the PutFH and Getattr result.
3090		 */
3091		NFSM_DISSECT(tl, u_int32_t *, 2 * NFSX_UNSIGNED);
3092		if (!(*(tl + 1)))
3093			NFSM_DISSECT(tl, u_int32_t *, 2 * NFSX_UNSIGNED);
3094		if (*(tl + 1))
3095			nd->nd_flag |= ND_NOMOREDATA;
3096		/*
3097		 * Get the pre-op attributes.
3098		 */
3099		error = nfscl_wcc_data(nd, dvp, dnap, dattrflagp, NULL, NULL);
3100	}
3101	if (nd->nd_repstat && !error)
3102		error = nd->nd_repstat;
3103nfsmout:
3104	m_freem(nd->nd_mrep);
3105	return (error);
3106}
3107
3108/*
3109 * nfs symbolic link create rpc
3110 */
3111int
3112nfsrpc_symlink(vnode_t dvp, char *name, int namelen, const char *target,
3113    struct vattr *vap, struct ucred *cred, NFSPROC_T *p, struct nfsvattr *dnap,
3114    struct nfsvattr *nnap, struct nfsfh **nfhpp, int *attrflagp,
3115    int *dattrflagp)
3116{
3117	u_int32_t *tl;
3118	struct nfsrv_descript nfsd, *nd = &nfsd;
3119	struct nfsmount *nmp;
3120	int slen, error = 0;
3121
3122	*nfhpp = NULL;
3123	*attrflagp = 0;
3124	*dattrflagp = 0;
3125	nmp = VFSTONFS(dvp->v_mount);
3126	slen = strlen(target);
3127	if (slen > NFS_MAXPATHLEN || namelen > NFS_MAXNAMLEN)
3128		return (ENAMETOOLONG);
3129	NFSCL_REQSTART(nd, NFSPROC_SYMLINK, dvp, cred);
3130	if (nd->nd_flag & ND_NFSV4) {
3131		NFSM_BUILD(tl, u_int32_t *, NFSX_UNSIGNED);
3132		*tl = txdr_unsigned(NFLNK);
3133		(void) nfsm_strtom(nd, target, slen);
3134	}
3135	(void) nfsm_strtom(nd, name, namelen);
3136	if (nd->nd_flag & (ND_NFSV3 | ND_NFSV4))
3137		nfscl_fillsattr(nd, vap, dvp, 0, 0);
3138	if (!(nd->nd_flag & ND_NFSV4))
3139		(void) nfsm_strtom(nd, target, slen);
3140	if (nd->nd_flag & ND_NFSV2)
3141		nfscl_fillsattr(nd, vap, dvp, NFSSATTR_SIZENEG1, 0);
3142	error = nfscl_request(nd, dvp, p, cred);
3143	if (error)
3144		return (error);
3145	if (nd->nd_flag & ND_NFSV4)
3146		error = nfscl_wcc_data(nd, dvp, dnap, dattrflagp, NULL, NULL);
3147	if ((nd->nd_flag & ND_NFSV3) && !error) {
3148		if (!nd->nd_repstat)
3149			error = nfscl_mtofh(nd, nfhpp, nnap, attrflagp);
3150		if (!error)
3151			error = nfscl_wcc_data(nd, dvp, dnap, dattrflagp,
3152			    NULL, NULL);
3153	}
3154	if (nd->nd_repstat && !error)
3155		error = nd->nd_repstat;
3156	m_freem(nd->nd_mrep);
3157	/*
3158	 * Kludge: Map EEXIST => 0 assuming that it is a reply to a retry.
3159	 * Only do this if vfs.nfs.ignore_eexist is set.
3160	 * Never do this for NFSv4.1 or later minor versions, since sessions
3161	 * should guarantee "exactly once" RPC semantics.
3162	 */
3163	if (error == EEXIST && nfsignore_eexist != 0 && (!NFSHASNFSV4(nmp) ||
3164	    nmp->nm_minorvers == 0))
3165		error = 0;
3166	return (error);
3167}
3168
3169/*
3170 * nfs make dir rpc
3171 */
3172int
3173nfsrpc_mkdir(vnode_t dvp, char *name, int namelen, struct vattr *vap,
3174    struct ucred *cred, NFSPROC_T *p, struct nfsvattr *dnap,
3175    struct nfsvattr *nnap, struct nfsfh **nfhpp, int *attrflagp,
3176    int *dattrflagp)
3177{
3178	u_int32_t *tl;
3179	struct nfsrv_descript nfsd, *nd = &nfsd;
3180	nfsattrbit_t attrbits;
3181	int error = 0;
3182	struct nfsfh *fhp;
3183	struct nfsmount *nmp;
3184
3185	*nfhpp = NULL;
3186	*attrflagp = 0;
3187	*dattrflagp = 0;
3188	nmp = VFSTONFS(dvp->v_mount);
3189	fhp = VTONFS(dvp)->n_fhp;
3190	if (namelen > NFS_MAXNAMLEN)
3191		return (ENAMETOOLONG);
3192	NFSCL_REQSTART(nd, NFSPROC_MKDIR, dvp, cred);
3193	if (nd->nd_flag & ND_NFSV4) {
3194		NFSM_BUILD(tl, u_int32_t *, NFSX_UNSIGNED);
3195		*tl = txdr_unsigned(NFDIR);
3196	}
3197	(void) nfsm_strtom(nd, name, namelen);
3198	nfscl_fillsattr(nd, vap, dvp, NFSSATTR_SIZENEG1, 0);
3199	if (nd->nd_flag & ND_NFSV4) {
3200		NFSGETATTR_ATTRBIT(&attrbits);
3201		NFSM_BUILD(tl, u_int32_t *, 2 * NFSX_UNSIGNED);
3202		*tl++ = txdr_unsigned(NFSV4OP_GETFH);
3203		*tl = txdr_unsigned(NFSV4OP_GETATTR);
3204		(void) nfsrv_putattrbit(nd, &attrbits);
3205		NFSM_BUILD(tl, u_int32_t *, NFSX_UNSIGNED);
3206		*tl = txdr_unsigned(NFSV4OP_PUTFH);
3207		(void)nfsm_fhtom(nmp, nd, fhp->nfh_fh, fhp->nfh_len, 0);
3208		NFSM_BUILD(tl, u_int32_t *, NFSX_UNSIGNED);
3209		*tl = txdr_unsigned(NFSV4OP_GETATTR);
3210		(void) nfsrv_putattrbit(nd, &attrbits);
3211	}
3212	error = nfscl_request(nd, dvp, p, cred);
3213	if (error)
3214		return (error);
3215	if (nd->nd_flag & ND_NFSV4)
3216		error = nfscl_wcc_data(nd, dvp, dnap, dattrflagp, NULL, NULL);
3217	if (!nd->nd_repstat && !error) {
3218		if (nd->nd_flag & ND_NFSV4) {
3219			NFSM_DISSECT(tl, u_int32_t *, 5 * NFSX_UNSIGNED);
3220			error = nfsrv_getattrbits(nd, &attrbits, NULL, NULL);
3221		}
3222		if (!error)
3223			error = nfscl_mtofh(nd, nfhpp, nnap, attrflagp);
3224		if (error == 0 && (nd->nd_flag & ND_NFSV4) != 0) {
3225			/* Get rid of the PutFH and Getattr status values. */
3226			NFSM_DISSECT(tl, u_int32_t *, 4 * NFSX_UNSIGNED);
3227			/* Load the directory attributes. */
3228			error = nfsm_loadattr(nd, dnap);
3229			if (error == 0)
3230				*dattrflagp = 1;
3231		}
3232	}
3233	if ((nd->nd_flag & ND_NFSV3) && !error)
3234		error = nfscl_wcc_data(nd, dvp, dnap, dattrflagp, NULL, NULL);
3235	if (nd->nd_repstat && !error)
3236		error = nd->nd_repstat;
3237nfsmout:
3238	m_freem(nd->nd_mrep);
3239	/*
3240	 * Kludge: Map EEXIST => 0 assuming that it is a reply to a retry.
3241	 * Only do this if vfs.nfs.ignore_eexist is set.
3242	 * Never do this for NFSv4.1 or later minor versions, since sessions
3243	 * should guarantee "exactly once" RPC semantics.
3244	 */
3245	if (error == EEXIST && nfsignore_eexist != 0 && (!NFSHASNFSV4(nmp) ||
3246	    nmp->nm_minorvers == 0))
3247		error = 0;
3248	return (error);
3249}
3250
3251/*
3252 * nfs remove directory call
3253 */
3254int
3255nfsrpc_rmdir(vnode_t dvp, char *name, int namelen, struct ucred *cred,
3256    NFSPROC_T *p, struct nfsvattr *dnap, int *dattrflagp)
3257{
3258	struct nfsrv_descript nfsd, *nd = &nfsd;
3259	int error = 0;
3260
3261	*dattrflagp = 0;
3262	if (namelen > NFS_MAXNAMLEN)
3263		return (ENAMETOOLONG);
3264	NFSCL_REQSTART(nd, NFSPROC_RMDIR, dvp, cred);
3265	(void) nfsm_strtom(nd, name, namelen);
3266	error = nfscl_request(nd, dvp, p, cred);
3267	if (error)
3268		return (error);
3269	if (nd->nd_flag & (ND_NFSV3 | ND_NFSV4))
3270		error = nfscl_wcc_data(nd, dvp, dnap, dattrflagp, NULL, NULL);
3271	if (nd->nd_repstat && !error)
3272		error = nd->nd_repstat;
3273	m_freem(nd->nd_mrep);
3274	/*
3275	 * Kludge: Map ENOENT => 0 assuming that you have a reply to a retry.
3276	 */
3277	if (error == ENOENT)
3278		error = 0;
3279	return (error);
3280}
3281
3282/*
3283 * Readdir rpc.
3284 * Always returns with either uio_resid unchanged, if you are at the
3285 * end of the directory, or uio_resid == 0, with all DIRBLKSIZ chunks
3286 * filled in.
3287 * I felt this would allow caching of directory blocks more easily
3288 * than returning a pertially filled block.
3289 * Directory offset cookies:
3290 * Oh my, what to do with them...
3291 * I can think of three ways to deal with them:
3292 * 1 - have the layer above these RPCs maintain a map between logical
3293 *     directory byte offsets and the NFS directory offset cookies
3294 * 2 - pass the opaque directory offset cookies up into userland
3295 *     and let the libc functions deal with them, via the system call
3296 * 3 - return them to userland in the "struct dirent", so future versions
3297 *     of libc can use them and do whatever is necessary to make things work
3298 *     above these rpc calls, in the meantime
3299 * For now, I do #3 by "hiding" the directory offset cookies after the
3300 * d_name field in struct dirent. This is space inside d_reclen that
3301 * will be ignored by anything that doesn't know about them.
3302 * The directory offset cookies are filled in as the last 8 bytes of
3303 * each directory entry, after d_name. Someday, the userland libc
3304 * functions may be able to use these. In the meantime, it satisfies
3305 * OpenBSD's requirements for cookies being returned.
3306 * If expects the directory offset cookie for the read to be in uio_offset
3307 * and returns the one for the next entry after this directory block in
3308 * there, as well.
3309 */
3310int
3311nfsrpc_readdir(vnode_t vp, struct uio *uiop, nfsuint64 *cookiep,
3312    struct ucred *cred, NFSPROC_T *p, struct nfsvattr *nap, int *attrflagp,
3313    int *eofp)
3314{
3315	int len, left;
3316	struct dirent *dp = NULL;
3317	u_int32_t *tl;
3318	nfsquad_t cookie, ncookie;
3319	struct nfsmount *nmp = VFSTONFS(vp->v_mount);
3320	struct nfsnode *dnp = VTONFS(vp);
3321	struct nfsvattr nfsva;
3322	struct nfsrv_descript nfsd, *nd = &nfsd;
3323	int error = 0, tlen, more_dirs = 1, blksiz = 0, bigenough = 1;
3324	int reqsize, tryformoredirs = 1, readsize, eof = 0, gotmnton = 0;
3325	u_int64_t dotfileid, dotdotfileid = 0, fakefileno = UINT64_MAX;
3326	char *cp;
3327	nfsattrbit_t attrbits, dattrbits;
3328	u_int32_t rderr, *tl2 = NULL;
3329	size_t tresid;
3330
3331	KASSERT(uiop->uio_iovcnt == 1 &&
3332	    (uiop->uio_resid & (DIRBLKSIZ - 1)) == 0,
3333	    ("nfs readdirrpc bad uio"));
3334	ncookie.lval[0] = ncookie.lval[1] = 0;
3335	/*
3336	 * There is no point in reading a lot more than uio_resid, however
3337	 * adding one additional DIRBLKSIZ makes sense. Since uio_resid
3338	 * and nm_readdirsize are both exact multiples of DIRBLKSIZ, this
3339	 * will never make readsize > nm_readdirsize.
3340	 */
3341	readsize = nmp->nm_readdirsize;
3342	if (readsize > uiop->uio_resid)
3343		readsize = uiop->uio_resid + DIRBLKSIZ;
3344
3345	*attrflagp = 0;
3346	if (eofp)
3347		*eofp = 0;
3348	tresid = uiop->uio_resid;
3349	cookie.lval[0] = cookiep->nfsuquad[0];
3350	cookie.lval[1] = cookiep->nfsuquad[1];
3351	nd->nd_mrep = NULL;
3352
3353	/*
3354	 * For NFSv4, first create the "." and ".." entries.
3355	 */
3356	if (NFSHASNFSV4(nmp)) {
3357		reqsize = 6 * NFSX_UNSIGNED;
3358		NFSGETATTR_ATTRBIT(&dattrbits);
3359		NFSZERO_ATTRBIT(&attrbits);
3360		NFSSETBIT_ATTRBIT(&attrbits, NFSATTRBIT_FILEID);
3361		NFSSETBIT_ATTRBIT(&attrbits, NFSATTRBIT_TYPE);
3362		if (NFSISSET_ATTRBIT(&dnp->n_vattr.na_suppattr,
3363		    NFSATTRBIT_MOUNTEDONFILEID)) {
3364			NFSSETBIT_ATTRBIT(&attrbits,
3365			    NFSATTRBIT_MOUNTEDONFILEID);
3366			gotmnton = 1;
3367		} else {
3368			/*
3369			 * Must fake it. Use the fileno, except when the
3370			 * fsid is != to that of the directory. For that
3371			 * case, generate a fake fileno that is not the same.
3372			 */
3373			NFSSETBIT_ATTRBIT(&attrbits, NFSATTRBIT_FSID);
3374			gotmnton = 0;
3375		}
3376
3377		/*
3378		 * Joy, oh joy. For V4 we get to hand craft '.' and '..'.
3379		 */
3380		if (uiop->uio_offset == 0) {
3381			NFSCL_REQSTART(nd, NFSPROC_LOOKUPP, vp, cred);
3382			NFSM_BUILD(tl, u_int32_t *, 2 * NFSX_UNSIGNED);
3383			*tl++ = txdr_unsigned(NFSV4OP_GETFH);
3384			*tl = txdr_unsigned(NFSV4OP_GETATTR);
3385			(void) nfsrv_putattrbit(nd, &attrbits);
3386			error = nfscl_request(nd, vp, p, cred);
3387			if (error)
3388			    return (error);
3389			dotfileid = 0;	/* Fake out the compiler. */
3390			if ((nd->nd_flag & ND_NOMOREDATA) == 0) {
3391			    error = nfsm_loadattr(nd, &nfsva);
3392			    if (error != 0)
3393				goto nfsmout;
3394			    dotfileid = nfsva.na_fileid;
3395			}
3396			if (nd->nd_repstat == 0) {
3397			    NFSM_DISSECT(tl, u_int32_t *, 5 * NFSX_UNSIGNED);
3398			    len = fxdr_unsigned(int, *(tl + 4));
3399			    if (len > 0 && len <= NFSX_V4FHMAX)
3400				error = nfsm_advance(nd, NFSM_RNDUP(len), -1);
3401			    else
3402				error = EPERM;
3403			    if (!error) {
3404				NFSM_DISSECT(tl, u_int32_t *, 2*NFSX_UNSIGNED);
3405				nfsva.na_mntonfileno = UINT64_MAX;
3406				error = nfsv4_loadattr(nd, NULL, &nfsva, NULL,
3407				    NULL, 0, NULL, NULL, NULL, NULL, NULL, 0,
3408				    NULL, NULL, NULL, p, cred);
3409				if (error) {
3410				    dotdotfileid = dotfileid;
3411				} else if (gotmnton) {
3412				    if (nfsva.na_mntonfileno != UINT64_MAX)
3413					dotdotfileid = nfsva.na_mntonfileno;
3414				    else
3415					dotdotfileid = nfsva.na_fileid;
3416				} else if (nfsva.na_filesid[0] ==
3417				    dnp->n_vattr.na_filesid[0] &&
3418				    nfsva.na_filesid[1] ==
3419				    dnp->n_vattr.na_filesid[1]) {
3420				    dotdotfileid = nfsva.na_fileid;
3421				} else {
3422				    do {
3423					fakefileno--;
3424				    } while (fakefileno ==
3425					nfsva.na_fileid);
3426				    dotdotfileid = fakefileno;
3427				}
3428			    }
3429			} else if (nd->nd_repstat == NFSERR_NOENT) {
3430			    /*
3431			     * Lookupp returns NFSERR_NOENT when we are
3432			     * at the root, so just use the current dir.
3433			     */
3434			    nd->nd_repstat = 0;
3435			    dotdotfileid = dotfileid;
3436			} else {
3437			    error = nd->nd_repstat;
3438			}
3439			m_freem(nd->nd_mrep);
3440			if (error)
3441			    return (error);
3442			nd->nd_mrep = NULL;
3443			dp = (struct dirent *)uiop->uio_iov->iov_base;
3444			dp->d_pad0 = dp->d_pad1 = 0;
3445			dp->d_off = 0;
3446			dp->d_type = DT_DIR;
3447			dp->d_fileno = dotfileid;
3448			dp->d_namlen = 1;
3449			*((uint64_t *)dp->d_name) = 0;	/* Zero pad it. */
3450			dp->d_name[0] = '.';
3451			dp->d_reclen = _GENERIC_DIRSIZ(dp) + NFSX_HYPER;
3452			/*
3453			 * Just make these offset cookie 0.
3454			 */
3455			tl = (u_int32_t *)&dp->d_name[8];
3456			*tl++ = 0;
3457			*tl = 0;
3458			blksiz += dp->d_reclen;
3459			uiop->uio_resid -= dp->d_reclen;
3460			uiop->uio_offset += dp->d_reclen;
3461			uiop->uio_iov->iov_base =
3462			    (char *)uiop->uio_iov->iov_base + dp->d_reclen;
3463			uiop->uio_iov->iov_len -= dp->d_reclen;
3464			dp = (struct dirent *)uiop->uio_iov->iov_base;
3465			dp->d_pad0 = dp->d_pad1 = 0;
3466			dp->d_off = 0;
3467			dp->d_type = DT_DIR;
3468			dp->d_fileno = dotdotfileid;
3469			dp->d_namlen = 2;
3470			*((uint64_t *)dp->d_name) = 0;
3471			dp->d_name[0] = '.';
3472			dp->d_name[1] = '.';
3473			dp->d_reclen = _GENERIC_DIRSIZ(dp) + NFSX_HYPER;
3474			/*
3475			 * Just make these offset cookie 0.
3476			 */
3477			tl = (u_int32_t *)&dp->d_name[8];
3478			*tl++ = 0;
3479			*tl = 0;
3480			blksiz += dp->d_reclen;
3481			uiop->uio_resid -= dp->d_reclen;
3482			uiop->uio_offset += dp->d_reclen;
3483			uiop->uio_iov->iov_base =
3484			    (char *)uiop->uio_iov->iov_base + dp->d_reclen;
3485			uiop->uio_iov->iov_len -= dp->d_reclen;
3486		}
3487		NFSSETBIT_ATTRBIT(&attrbits, NFSATTRBIT_RDATTRERROR);
3488	} else {
3489		reqsize = 5 * NFSX_UNSIGNED;
3490	}
3491
3492	/*
3493	 * Loop around doing readdir rpc's of size readsize.
3494	 * The stopping criteria is EOF or buffer full.
3495	 */
3496	while (more_dirs && bigenough) {
3497		*attrflagp = 0;
3498		NFSCL_REQSTART(nd, NFSPROC_READDIR, vp, cred);
3499		if (nd->nd_flag & ND_NFSV2) {
3500			NFSM_BUILD(tl, u_int32_t *, 2 * NFSX_UNSIGNED);
3501			*tl++ = cookie.lval[1];
3502			*tl = txdr_unsigned(readsize);
3503		} else {
3504			NFSM_BUILD(tl, u_int32_t *, reqsize);
3505			*tl++ = cookie.lval[0];
3506			*tl++ = cookie.lval[1];
3507			if (cookie.qval == 0) {
3508				*tl++ = 0;
3509				*tl++ = 0;
3510			} else {
3511				NFSLOCKNODE(dnp);
3512				*tl++ = dnp->n_cookieverf.nfsuquad[0];
3513				*tl++ = dnp->n_cookieverf.nfsuquad[1];
3514				NFSUNLOCKNODE(dnp);
3515			}
3516			if (nd->nd_flag & ND_NFSV4) {
3517				*tl++ = txdr_unsigned(readsize);
3518				*tl = txdr_unsigned(readsize);
3519				(void) nfsrv_putattrbit(nd, &attrbits);
3520				NFSM_BUILD(tl, u_int32_t *, NFSX_UNSIGNED);
3521				*tl = txdr_unsigned(NFSV4OP_GETATTR);
3522				(void) nfsrv_putattrbit(nd, &dattrbits);
3523			} else {
3524				*tl = txdr_unsigned(readsize);
3525			}
3526		}
3527		error = nfscl_request(nd, vp, p, cred);
3528		if (error)
3529			return (error);
3530		if (!(nd->nd_flag & ND_NFSV2)) {
3531			if (nd->nd_flag & ND_NFSV3)
3532				error = nfscl_postop_attr(nd, nap, attrflagp);
3533			if (!nd->nd_repstat && !error) {
3534				NFSM_DISSECT(tl, u_int32_t *, NFSX_HYPER);
3535				NFSLOCKNODE(dnp);
3536				dnp->n_cookieverf.nfsuquad[0] = *tl++;
3537				dnp->n_cookieverf.nfsuquad[1] = *tl;
3538				NFSUNLOCKNODE(dnp);
3539			}
3540		}
3541		if (nd->nd_repstat || error) {
3542			if (!error)
3543				error = nd->nd_repstat;
3544			goto nfsmout;
3545		}
3546		NFSM_DISSECT(tl, u_int32_t *, NFSX_UNSIGNED);
3547		more_dirs = fxdr_unsigned(int, *tl);
3548		if (!more_dirs)
3549			tryformoredirs = 0;
3550
3551		/* loop through the dir entries, doctoring them to 4bsd form */
3552		while (more_dirs && bigenough) {
3553			if (nd->nd_flag & ND_NFSV4) {
3554				NFSM_DISSECT(tl, u_int32_t *, 3*NFSX_UNSIGNED);
3555				ncookie.lval[0] = *tl++;
3556				ncookie.lval[1] = *tl++;
3557				len = fxdr_unsigned(int, *tl);
3558			} else if (nd->nd_flag & ND_NFSV3) {
3559				NFSM_DISSECT(tl, u_int32_t *, 3*NFSX_UNSIGNED);
3560				nfsva.na_fileid = fxdr_hyper(tl);
3561				tl += 2;
3562				len = fxdr_unsigned(int, *tl);
3563			} else {
3564				NFSM_DISSECT(tl, u_int32_t *, 2*NFSX_UNSIGNED);
3565				nfsva.na_fileid = fxdr_unsigned(uint64_t,
3566				    *tl++);
3567				len = fxdr_unsigned(int, *tl);
3568			}
3569			if (len <= 0 || len > NFS_MAXNAMLEN) {
3570				error = EBADRPC;
3571				goto nfsmout;
3572			}
3573			tlen = roundup2(len, 8);
3574			if (tlen == len)
3575				tlen += 8;  /* To ensure null termination. */
3576			left = DIRBLKSIZ - blksiz;
3577			if (_GENERIC_DIRLEN(len) + NFSX_HYPER > left) {
3578				NFSBZERO(uiop->uio_iov->iov_base, left);
3579				dp->d_reclen += left;
3580				uiop->uio_iov->iov_base =
3581				    (char *)uiop->uio_iov->iov_base + left;
3582				uiop->uio_iov->iov_len -= left;
3583				uiop->uio_resid -= left;
3584				uiop->uio_offset += left;
3585				blksiz = 0;
3586			}
3587			if (_GENERIC_DIRLEN(len) + NFSX_HYPER >
3588			    uiop->uio_resid)
3589				bigenough = 0;
3590			if (bigenough) {
3591				dp = (struct dirent *)uiop->uio_iov->iov_base;
3592				dp->d_pad0 = dp->d_pad1 = 0;
3593				dp->d_off = 0;
3594				dp->d_namlen = len;
3595				dp->d_reclen = _GENERIC_DIRLEN(len) +
3596				    NFSX_HYPER;
3597				dp->d_type = DT_UNKNOWN;
3598				blksiz += dp->d_reclen;
3599				if (blksiz == DIRBLKSIZ)
3600					blksiz = 0;
3601				uiop->uio_resid -= DIRHDSIZ;
3602				uiop->uio_offset += DIRHDSIZ;
3603				uiop->uio_iov->iov_base =
3604				    (char *)uiop->uio_iov->iov_base + DIRHDSIZ;
3605				uiop->uio_iov->iov_len -= DIRHDSIZ;
3606				error = nfsm_mbufuio(nd, uiop, len);
3607				if (error)
3608					goto nfsmout;
3609				cp = uiop->uio_iov->iov_base;
3610				tlen -= len;
3611				NFSBZERO(cp, tlen);
3612				cp += tlen;	/* points to cookie storage */
3613				tl2 = (u_int32_t *)cp;
3614				uiop->uio_iov->iov_base =
3615				    (char *)uiop->uio_iov->iov_base + tlen +
3616				    NFSX_HYPER;
3617				uiop->uio_iov->iov_len -= tlen + NFSX_HYPER;
3618				uiop->uio_resid -= tlen + NFSX_HYPER;
3619				uiop->uio_offset += (tlen + NFSX_HYPER);
3620			} else {
3621				error = nfsm_advance(nd, NFSM_RNDUP(len), -1);
3622				if (error)
3623					goto nfsmout;
3624			}
3625			if (nd->nd_flag & ND_NFSV4) {
3626				rderr = 0;
3627				nfsva.na_mntonfileno = UINT64_MAX;
3628				error = nfsv4_loadattr(nd, NULL, &nfsva, NULL,
3629				    NULL, 0, NULL, NULL, NULL, NULL, NULL, 0,
3630				    NULL, NULL, &rderr, p, cred);
3631				if (error)
3632					goto nfsmout;
3633				NFSM_DISSECT(tl, u_int32_t *, NFSX_UNSIGNED);
3634			} else if (nd->nd_flag & ND_NFSV3) {
3635				NFSM_DISSECT(tl, u_int32_t *, 3*NFSX_UNSIGNED);
3636				ncookie.lval[0] = *tl++;
3637				ncookie.lval[1] = *tl++;
3638			} else {
3639				NFSM_DISSECT(tl, u_int32_t *, 2*NFSX_UNSIGNED);
3640				ncookie.lval[0] = 0;
3641				ncookie.lval[1] = *tl++;
3642			}
3643			if (bigenough) {
3644			    if (nd->nd_flag & ND_NFSV4) {
3645				if (rderr) {
3646				    dp->d_fileno = 0;
3647				} else {
3648				    if (gotmnton) {
3649					if (nfsva.na_mntonfileno != UINT64_MAX)
3650					    dp->d_fileno = nfsva.na_mntonfileno;
3651					else
3652					    dp->d_fileno = nfsva.na_fileid;
3653				    } else if (nfsva.na_filesid[0] ==
3654					dnp->n_vattr.na_filesid[0] &&
3655					nfsva.na_filesid[1] ==
3656					dnp->n_vattr.na_filesid[1]) {
3657					dp->d_fileno = nfsva.na_fileid;
3658				    } else {
3659					do {
3660					    fakefileno--;
3661					} while (fakefileno ==
3662					    nfsva.na_fileid);
3663					dp->d_fileno = fakefileno;
3664				    }
3665				    dp->d_type = vtonfs_dtype(nfsva.na_type);
3666				}
3667			    } else {
3668				dp->d_fileno = nfsva.na_fileid;
3669			    }
3670			    *tl2++ = cookiep->nfsuquad[0] = cookie.lval[0] =
3671				ncookie.lval[0];
3672			    *tl2 = cookiep->nfsuquad[1] = cookie.lval[1] =
3673				ncookie.lval[1];
3674			}
3675			more_dirs = fxdr_unsigned(int, *tl);
3676		}
3677		/*
3678		 * If at end of rpc data, get the eof boolean
3679		 */
3680		if (!more_dirs) {
3681			NFSM_DISSECT(tl, u_int32_t *, NFSX_UNSIGNED);
3682			eof = fxdr_unsigned(int, *tl);
3683			if (tryformoredirs)
3684				more_dirs = !eof;
3685			if (nd->nd_flag & ND_NFSV4) {
3686				error = nfscl_postop_attr(nd, nap, attrflagp);
3687				if (error)
3688					goto nfsmout;
3689			}
3690		}
3691		m_freem(nd->nd_mrep);
3692		nd->nd_mrep = NULL;
3693	}
3694	/*
3695	 * Fill last record, iff any, out to a multiple of DIRBLKSIZ
3696	 * by increasing d_reclen for the last record.
3697	 */
3698	if (blksiz > 0) {
3699		left = DIRBLKSIZ - blksiz;
3700		NFSBZERO(uiop->uio_iov->iov_base, left);
3701		dp->d_reclen += left;
3702		uiop->uio_iov->iov_base = (char *)uiop->uio_iov->iov_base +
3703		    left;
3704		uiop->uio_iov->iov_len -= left;
3705		uiop->uio_resid -= left;
3706		uiop->uio_offset += left;
3707	}
3708
3709	/*
3710	 * If returning no data, assume end of file.
3711	 * If not bigenough, return not end of file, since you aren't
3712	 *    returning all the data
3713	 * Otherwise, return the eof flag from the server.
3714	 */
3715	if (eofp) {
3716		if (tresid == ((size_t)(uiop->uio_resid)))
3717			*eofp = 1;
3718		else if (!bigenough)
3719			*eofp = 0;
3720		else
3721			*eofp = eof;
3722	}
3723
3724	/*
3725	 * Add extra empty records to any remaining DIRBLKSIZ chunks.
3726	 */
3727	while (uiop->uio_resid > 0 && uiop->uio_resid != tresid) {
3728		dp = (struct dirent *)uiop->uio_iov->iov_base;
3729		NFSBZERO(dp, DIRBLKSIZ);
3730		dp->d_type = DT_UNKNOWN;
3731		tl = (u_int32_t *)&dp->d_name[4];
3732		*tl++ = cookie.lval[0];
3733		*tl = cookie.lval[1];
3734		dp->d_reclen = DIRBLKSIZ;
3735		uiop->uio_iov->iov_base = (char *)uiop->uio_iov->iov_base +
3736		    DIRBLKSIZ;
3737		uiop->uio_iov->iov_len -= DIRBLKSIZ;
3738		uiop->uio_resid -= DIRBLKSIZ;
3739		uiop->uio_offset += DIRBLKSIZ;
3740	}
3741
3742nfsmout:
3743	if (nd->nd_mrep != NULL)
3744		m_freem(nd->nd_mrep);
3745	return (error);
3746}
3747
3748/*
3749 * NFS V3 readdir plus RPC. Used in place of nfsrpc_readdir().
3750 * (Also used for NFS V4 when mount flag set.)
3751 * (ditto above w.r.t. multiple of DIRBLKSIZ, etc.)
3752 */
3753int
3754nfsrpc_readdirplus(vnode_t vp, struct uio *uiop, nfsuint64 *cookiep,
3755    struct ucred *cred, NFSPROC_T *p, struct nfsvattr *nap, int *attrflagp,
3756    int *eofp)
3757{
3758	int len, left;
3759	struct dirent *dp = NULL;
3760	u_int32_t *tl;
3761	vnode_t newvp = NULLVP;
3762	struct nfsrv_descript nfsd, *nd = &nfsd;
3763	struct nameidata nami, *ndp = &nami;
3764	struct componentname *cnp = &ndp->ni_cnd;
3765	struct nfsmount *nmp = VFSTONFS(vp->v_mount);
3766	struct nfsnode *dnp = VTONFS(vp), *np;
3767	struct nfsvattr nfsva;
3768	struct nfsfh *nfhp;
3769	nfsquad_t cookie, ncookie;
3770	int error = 0, tlen, more_dirs = 1, blksiz = 0, bigenough = 1;
3771	int attrflag, tryformoredirs = 1, eof = 0, gotmnton = 0;
3772	int isdotdot = 0, unlocknewvp = 0;
3773	u_int64_t dotfileid, dotdotfileid = 0, fakefileno = UINT64_MAX;
3774	u_int64_t fileno = 0;
3775	char *cp;
3776	nfsattrbit_t attrbits, dattrbits;
3777	size_t tresid;
3778	u_int32_t *tl2 = NULL, rderr;
3779	struct timespec dctime, ts;
3780	bool attr_ok;
3781
3782	KASSERT(uiop->uio_iovcnt == 1 &&
3783	    (uiop->uio_resid & (DIRBLKSIZ - 1)) == 0,
3784	    ("nfs readdirplusrpc bad uio"));
3785	ncookie.lval[0] = ncookie.lval[1] = 0;
3786	timespecclear(&dctime);
3787	*attrflagp = 0;
3788	if (eofp != NULL)
3789		*eofp = 0;
3790	ndp->ni_dvp = vp;
3791	nd->nd_mrep = NULL;
3792	cookie.lval[0] = cookiep->nfsuquad[0];
3793	cookie.lval[1] = cookiep->nfsuquad[1];
3794	tresid = uiop->uio_resid;
3795
3796	/*
3797	 * For NFSv4, first create the "." and ".." entries.
3798	 */
3799	if (NFSHASNFSV4(nmp)) {
3800		NFSGETATTR_ATTRBIT(&dattrbits);
3801		NFSZERO_ATTRBIT(&attrbits);
3802		NFSSETBIT_ATTRBIT(&attrbits, NFSATTRBIT_FILEID);
3803		if (NFSISSET_ATTRBIT(&dnp->n_vattr.na_suppattr,
3804		    NFSATTRBIT_MOUNTEDONFILEID)) {
3805			NFSSETBIT_ATTRBIT(&attrbits,
3806			    NFSATTRBIT_MOUNTEDONFILEID);
3807			gotmnton = 1;
3808		} else {
3809			/*
3810			 * Must fake it. Use the fileno, except when the
3811			 * fsid is != to that of the directory. For that
3812			 * case, generate a fake fileno that is not the same.
3813			 */
3814			NFSSETBIT_ATTRBIT(&attrbits, NFSATTRBIT_FSID);
3815			gotmnton = 0;
3816		}
3817
3818		/*
3819		 * Joy, oh joy. For V4 we get to hand craft '.' and '..'.
3820		 */
3821		if (uiop->uio_offset == 0) {
3822			NFSCL_REQSTART(nd, NFSPROC_LOOKUPP, vp, cred);
3823			NFSM_BUILD(tl, u_int32_t *, 2 * NFSX_UNSIGNED);
3824			*tl++ = txdr_unsigned(NFSV4OP_GETFH);
3825			*tl = txdr_unsigned(NFSV4OP_GETATTR);
3826			(void) nfsrv_putattrbit(nd, &attrbits);
3827			error = nfscl_request(nd, vp, p, cred);
3828			if (error)
3829			    return (error);
3830			dotfileid = 0;	/* Fake out the compiler. */
3831			if ((nd->nd_flag & ND_NOMOREDATA) == 0) {
3832			    error = nfsm_loadattr(nd, &nfsva);
3833			    if (error != 0)
3834				goto nfsmout;
3835			    dctime = nfsva.na_ctime;
3836			    dotfileid = nfsva.na_fileid;
3837			}
3838			if (nd->nd_repstat == 0) {
3839			    NFSM_DISSECT(tl, u_int32_t *, 5 * NFSX_UNSIGNED);
3840			    len = fxdr_unsigned(int, *(tl + 4));
3841			    if (len > 0 && len <= NFSX_V4FHMAX)
3842				error = nfsm_advance(nd, NFSM_RNDUP(len), -1);
3843			    else
3844				error = EPERM;
3845			    if (!error) {
3846				NFSM_DISSECT(tl, u_int32_t *, 2*NFSX_UNSIGNED);
3847				nfsva.na_mntonfileno = UINT64_MAX;
3848				error = nfsv4_loadattr(nd, NULL, &nfsva, NULL,
3849				    NULL, 0, NULL, NULL, NULL, NULL, NULL, 0,
3850				    NULL, NULL, NULL, p, cred);
3851				if (error) {
3852				    dotdotfileid = dotfileid;
3853				} else if (gotmnton) {
3854				    if (nfsva.na_mntonfileno != UINT64_MAX)
3855					dotdotfileid = nfsva.na_mntonfileno;
3856				    else
3857					dotdotfileid = nfsva.na_fileid;
3858				} else if (nfsva.na_filesid[0] ==
3859				    dnp->n_vattr.na_filesid[0] &&
3860				    nfsva.na_filesid[1] ==
3861				    dnp->n_vattr.na_filesid[1]) {
3862				    dotdotfileid = nfsva.na_fileid;
3863				} else {
3864				    do {
3865					fakefileno--;
3866				    } while (fakefileno ==
3867					nfsva.na_fileid);
3868				    dotdotfileid = fakefileno;
3869				}
3870			    }
3871			} else if (nd->nd_repstat == NFSERR_NOENT) {
3872			    /*
3873			     * Lookupp returns NFSERR_NOENT when we are
3874			     * at the root, so just use the current dir.
3875			     */
3876			    nd->nd_repstat = 0;
3877			    dotdotfileid = dotfileid;
3878			} else {
3879			    error = nd->nd_repstat;
3880			}
3881			m_freem(nd->nd_mrep);
3882			if (error)
3883			    return (error);
3884			nd->nd_mrep = NULL;
3885			dp = (struct dirent *)uiop->uio_iov->iov_base;
3886			dp->d_pad0 = dp->d_pad1 = 0;
3887			dp->d_off = 0;
3888			dp->d_type = DT_DIR;
3889			dp->d_fileno = dotfileid;
3890			dp->d_namlen = 1;
3891			*((uint64_t *)dp->d_name) = 0;	/* Zero pad it. */
3892			dp->d_name[0] = '.';
3893			dp->d_reclen = _GENERIC_DIRSIZ(dp) + NFSX_HYPER;
3894			/*
3895			 * Just make these offset cookie 0.
3896			 */
3897			tl = (u_int32_t *)&dp->d_name[8];
3898			*tl++ = 0;
3899			*tl = 0;
3900			blksiz += dp->d_reclen;
3901			uiop->uio_resid -= dp->d_reclen;
3902			uiop->uio_offset += dp->d_reclen;
3903			uiop->uio_iov->iov_base =
3904			    (char *)uiop->uio_iov->iov_base + dp->d_reclen;
3905			uiop->uio_iov->iov_len -= dp->d_reclen;
3906			dp = (struct dirent *)uiop->uio_iov->iov_base;
3907			dp->d_pad0 = dp->d_pad1 = 0;
3908			dp->d_off = 0;
3909			dp->d_type = DT_DIR;
3910			dp->d_fileno = dotdotfileid;
3911			dp->d_namlen = 2;
3912			*((uint64_t *)dp->d_name) = 0;
3913			dp->d_name[0] = '.';
3914			dp->d_name[1] = '.';
3915			dp->d_reclen = _GENERIC_DIRSIZ(dp) + NFSX_HYPER;
3916			/*
3917			 * Just make these offset cookie 0.
3918			 */
3919			tl = (u_int32_t *)&dp->d_name[8];
3920			*tl++ = 0;
3921			*tl = 0;
3922			blksiz += dp->d_reclen;
3923			uiop->uio_resid -= dp->d_reclen;
3924			uiop->uio_offset += dp->d_reclen;
3925			uiop->uio_iov->iov_base =
3926			    (char *)uiop->uio_iov->iov_base + dp->d_reclen;
3927			uiop->uio_iov->iov_len -= dp->d_reclen;
3928		}
3929		NFSREADDIRPLUS_ATTRBIT(&attrbits);
3930		if (gotmnton)
3931			NFSSETBIT_ATTRBIT(&attrbits,
3932			    NFSATTRBIT_MOUNTEDONFILEID);
3933		if (!NFSISSET_ATTRBIT(&dnp->n_vattr.na_suppattr,
3934		    NFSATTRBIT_TIMECREATE))
3935			NFSCLRBIT_ATTRBIT(&attrbits, NFSATTRBIT_TIMECREATE);
3936	}
3937
3938	/*
3939	 * Loop around doing readdir rpc's of size nm_readdirsize.
3940	 * The stopping criteria is EOF or buffer full.
3941	 */
3942	while (more_dirs && bigenough) {
3943		*attrflagp = 0;
3944		NFSCL_REQSTART(nd, NFSPROC_READDIRPLUS, vp, cred);
3945 		NFSM_BUILD(tl, u_int32_t *, 6 * NFSX_UNSIGNED);
3946		*tl++ = cookie.lval[0];
3947		*tl++ = cookie.lval[1];
3948		if (cookie.qval == 0) {
3949			*tl++ = 0;
3950			*tl++ = 0;
3951		} else {
3952			NFSLOCKNODE(dnp);
3953			*tl++ = dnp->n_cookieverf.nfsuquad[0];
3954			*tl++ = dnp->n_cookieverf.nfsuquad[1];
3955			NFSUNLOCKNODE(dnp);
3956		}
3957		*tl++ = txdr_unsigned(nmp->nm_readdirsize);
3958		*tl = txdr_unsigned(nmp->nm_readdirsize);
3959		if (nd->nd_flag & ND_NFSV4) {
3960			(void) nfsrv_putattrbit(nd, &attrbits);
3961			NFSM_BUILD(tl, u_int32_t *, NFSX_UNSIGNED);
3962			*tl = txdr_unsigned(NFSV4OP_GETATTR);
3963			(void) nfsrv_putattrbit(nd, &dattrbits);
3964		}
3965		nanouptime(&ts);
3966		error = nfscl_request(nd, vp, p, cred);
3967		if (error)
3968			return (error);
3969		if (nd->nd_flag & ND_NFSV3)
3970			error = nfscl_postop_attr(nd, nap, attrflagp);
3971		if (nd->nd_repstat || error) {
3972			if (!error)
3973				error = nd->nd_repstat;
3974			goto nfsmout;
3975		}
3976		if ((nd->nd_flag & ND_NFSV3) != 0 && *attrflagp != 0)
3977			dctime = nap->na_ctime;
3978		NFSM_DISSECT(tl, u_int32_t *, 3 * NFSX_UNSIGNED);
3979		NFSLOCKNODE(dnp);
3980		dnp->n_cookieverf.nfsuquad[0] = *tl++;
3981		dnp->n_cookieverf.nfsuquad[1] = *tl++;
3982		NFSUNLOCKNODE(dnp);
3983		more_dirs = fxdr_unsigned(int, *tl);
3984		if (!more_dirs)
3985			tryformoredirs = 0;
3986
3987		/* loop through the dir entries, doctoring them to 4bsd form */
3988		while (more_dirs && bigenough) {
3989			NFSM_DISSECT(tl, u_int32_t *, 3 * NFSX_UNSIGNED);
3990			if (nd->nd_flag & ND_NFSV4) {
3991				ncookie.lval[0] = *tl++;
3992				ncookie.lval[1] = *tl++;
3993			} else {
3994				fileno = fxdr_hyper(tl);
3995				tl += 2;
3996			}
3997			len = fxdr_unsigned(int, *tl);
3998			if (len <= 0 || len > NFS_MAXNAMLEN) {
3999				error = EBADRPC;
4000				goto nfsmout;
4001			}
4002			tlen = roundup2(len, 8);
4003			if (tlen == len)
4004				tlen += 8;  /* To ensure null termination. */
4005			left = DIRBLKSIZ - blksiz;
4006			if (_GENERIC_DIRLEN(len) + NFSX_HYPER > left) {
4007				NFSBZERO(uiop->uio_iov->iov_base, left);
4008				dp->d_reclen += left;
4009				uiop->uio_iov->iov_base =
4010				    (char *)uiop->uio_iov->iov_base + left;
4011				uiop->uio_iov->iov_len -= left;
4012				uiop->uio_resid -= left;
4013				uiop->uio_offset += left;
4014				blksiz = 0;
4015			}
4016			if (_GENERIC_DIRLEN(len) + NFSX_HYPER >
4017			    uiop->uio_resid)
4018				bigenough = 0;
4019			if (bigenough) {
4020				dp = (struct dirent *)uiop->uio_iov->iov_base;
4021				dp->d_pad0 = dp->d_pad1 = 0;
4022				dp->d_off = 0;
4023				dp->d_namlen = len;
4024				dp->d_reclen = _GENERIC_DIRLEN(len) +
4025				    NFSX_HYPER;
4026				dp->d_type = DT_UNKNOWN;
4027				blksiz += dp->d_reclen;
4028				if (blksiz == DIRBLKSIZ)
4029					blksiz = 0;
4030				uiop->uio_resid -= DIRHDSIZ;
4031				uiop->uio_offset += DIRHDSIZ;
4032				uiop->uio_iov->iov_base =
4033				    (char *)uiop->uio_iov->iov_base + DIRHDSIZ;
4034				uiop->uio_iov->iov_len -= DIRHDSIZ;
4035				cnp->cn_nameptr = uiop->uio_iov->iov_base;
4036				cnp->cn_namelen = len;
4037				NFSCNHASHZERO(cnp);
4038				error = nfsm_mbufuio(nd, uiop, len);
4039				if (error)
4040					goto nfsmout;
4041				cp = uiop->uio_iov->iov_base;
4042				tlen -= len;
4043				NFSBZERO(cp, tlen);
4044				cp += tlen;	/* points to cookie storage */
4045				tl2 = (u_int32_t *)cp;
4046				if (len == 2 && cnp->cn_nameptr[0] == '.' &&
4047				    cnp->cn_nameptr[1] == '.')
4048					isdotdot = 1;
4049				else
4050					isdotdot = 0;
4051				uiop->uio_iov->iov_base =
4052				    (char *)uiop->uio_iov->iov_base + tlen +
4053				    NFSX_HYPER;
4054				uiop->uio_iov->iov_len -= tlen + NFSX_HYPER;
4055				uiop->uio_resid -= tlen + NFSX_HYPER;
4056				uiop->uio_offset += (tlen + NFSX_HYPER);
4057			} else {
4058				error = nfsm_advance(nd, NFSM_RNDUP(len), -1);
4059				if (error)
4060					goto nfsmout;
4061			}
4062			nfhp = NULL;
4063			if (nd->nd_flag & ND_NFSV3) {
4064				NFSM_DISSECT(tl, u_int32_t *, 3*NFSX_UNSIGNED);
4065				ncookie.lval[0] = *tl++;
4066				ncookie.lval[1] = *tl++;
4067				attrflag = fxdr_unsigned(int, *tl);
4068				if (attrflag) {
4069				  error = nfsm_loadattr(nd, &nfsva);
4070				  if (error)
4071					goto nfsmout;
4072				}
4073				NFSM_DISSECT(tl,u_int32_t *,NFSX_UNSIGNED);
4074				if (*tl) {
4075					error = nfsm_getfh(nd, &nfhp);
4076					if (error)
4077					    goto nfsmout;
4078				}
4079				if (!attrflag && nfhp != NULL) {
4080					free(nfhp, M_NFSFH);
4081					nfhp = NULL;
4082				}
4083			} else {
4084				rderr = 0;
4085				nfsva.na_mntonfileno = 0xffffffff;
4086				error = nfsv4_loadattr(nd, NULL, &nfsva, &nfhp,
4087				    NULL, 0, NULL, NULL, NULL, NULL, NULL, 0,
4088				    NULL, NULL, &rderr, p, cred);
4089				if (error)
4090					goto nfsmout;
4091			}
4092
4093			if (bigenough) {
4094			    if (nd->nd_flag & ND_NFSV4) {
4095				if (rderr) {
4096				    dp->d_fileno = 0;
4097				} else if (gotmnton) {
4098				    if (nfsva.na_mntonfileno != 0xffffffff)
4099					dp->d_fileno = nfsva.na_mntonfileno;
4100				    else
4101					dp->d_fileno = nfsva.na_fileid;
4102				} else if (nfsva.na_filesid[0] ==
4103				    dnp->n_vattr.na_filesid[0] &&
4104				    nfsva.na_filesid[1] ==
4105				    dnp->n_vattr.na_filesid[1]) {
4106				    dp->d_fileno = nfsva.na_fileid;
4107				} else {
4108				    do {
4109					fakefileno--;
4110				    } while (fakefileno ==
4111					nfsva.na_fileid);
4112				    dp->d_fileno = fakefileno;
4113				}
4114			    } else {
4115				dp->d_fileno = fileno;
4116			    }
4117			    *tl2++ = cookiep->nfsuquad[0] = cookie.lval[0] =
4118				ncookie.lval[0];
4119			    *tl2 = cookiep->nfsuquad[1] = cookie.lval[1] =
4120				ncookie.lval[1];
4121
4122			    if (nfhp != NULL) {
4123				attr_ok = true;
4124				if (NFSRV_CMPFH(nfhp->nfh_fh, nfhp->nfh_len,
4125				    dnp->n_fhp->nfh_fh, dnp->n_fhp->nfh_len)) {
4126				    VREF(vp);
4127				    newvp = vp;
4128				    unlocknewvp = 0;
4129				    free(nfhp, M_NFSFH);
4130				    np = dnp;
4131				} else if (isdotdot != 0) {
4132				    /*
4133				     * Skip doing a nfscl_nget() call for "..".
4134				     * There's a race between acquiring the nfs
4135				     * node here and lookups that look for the
4136				     * directory being read (in the parent).
4137				     * It would try to get a lock on ".." here,
4138				     * owning the lock on the directory being
4139				     * read. Lookup will hold the lock on ".."
4140				     * and try to acquire the lock on the
4141				     * directory being read.
4142				     * If the directory is unlocked/relocked,
4143				     * then there is a LOR with the buflock
4144				     * vp is relocked.
4145				     */
4146				    free(nfhp, M_NFSFH);
4147				} else {
4148				    error = nfscl_nget(vp->v_mount, vp,
4149				      nfhp, cnp, p, &np, LK_EXCLUSIVE);
4150				    if (!error) {
4151					newvp = NFSTOV(np);
4152					unlocknewvp = 1;
4153					/*
4154					 * If n_localmodtime >= time before RPC,
4155					 * then a file modification operation,
4156					 * such as VOP_SETATTR() of size, has
4157					 * occurred while the Lookup RPC and
4158					 * acquisition of the vnode happened. As
4159					 * such, the attributes might be stale,
4160					 * with possibly an incorrect size.
4161					 */
4162					NFSLOCKNODE(np);
4163					if (timespecisset(
4164					    &np->n_localmodtime) &&
4165					    timespeccmp(&np->n_localmodtime,
4166					    &ts, >=)) {
4167					    NFSCL_DEBUG(4, "nfsrpc_readdirplus:"
4168						" localmod stale attributes\n");
4169					    attr_ok = false;
4170					}
4171					NFSUNLOCKNODE(np);
4172				    }
4173				}
4174				nfhp = NULL;
4175				if (newvp != NULLVP) {
4176				    if (attr_ok)
4177					error = nfscl_loadattrcache(&newvp,
4178					    &nfsva, NULL, 0, 0);
4179				    if (error) {
4180					if (unlocknewvp)
4181					    vput(newvp);
4182					else
4183					    vrele(newvp);
4184					goto nfsmout;
4185				    }
4186				    dp->d_type =
4187					vtonfs_dtype(np->n_vattr.na_type);
4188				    ndp->ni_vp = newvp;
4189				    NFSCNHASH(cnp, HASHINIT);
4190				    if (cnp->cn_namelen <= NCHNAMLEN &&
4191					ndp->ni_dvp != ndp->ni_vp &&
4192					(newvp->v_type != VDIR ||
4193					 dctime.tv_sec != 0)) {
4194					cache_enter_time_flags(ndp->ni_dvp,
4195					    ndp->ni_vp, cnp,
4196					    &nfsva.na_ctime,
4197					    newvp->v_type != VDIR ? NULL :
4198					    &dctime, VFS_CACHE_DROPOLD);
4199				    }
4200				    if (unlocknewvp)
4201					vput(newvp);
4202				    else
4203					vrele(newvp);
4204				    newvp = NULLVP;
4205				}
4206			    }
4207			} else if (nfhp != NULL) {
4208			    free(nfhp, M_NFSFH);
4209			}
4210			NFSM_DISSECT(tl, u_int32_t *, NFSX_UNSIGNED);
4211			more_dirs = fxdr_unsigned(int, *tl);
4212		}
4213		/*
4214		 * If at end of rpc data, get the eof boolean
4215		 */
4216		if (!more_dirs) {
4217			NFSM_DISSECT(tl, u_int32_t *, NFSX_UNSIGNED);
4218			eof = fxdr_unsigned(int, *tl);
4219			if (tryformoredirs)
4220				more_dirs = !eof;
4221			if (nd->nd_flag & ND_NFSV4) {
4222				error = nfscl_postop_attr(nd, nap, attrflagp);
4223				if (error)
4224					goto nfsmout;
4225			}
4226		}
4227		m_freem(nd->nd_mrep);
4228		nd->nd_mrep = NULL;
4229	}
4230	/*
4231	 * Fill last record, iff any, out to a multiple of DIRBLKSIZ
4232	 * by increasing d_reclen for the last record.
4233	 */
4234	if (blksiz > 0) {
4235		left = DIRBLKSIZ - blksiz;
4236		NFSBZERO(uiop->uio_iov->iov_base, left);
4237		dp->d_reclen += left;
4238		uiop->uio_iov->iov_base = (char *)uiop->uio_iov->iov_base +
4239		    left;
4240		uiop->uio_iov->iov_len -= left;
4241		uiop->uio_resid -= left;
4242		uiop->uio_offset += left;
4243	}
4244
4245	/*
4246	 * If returning no data, assume end of file.
4247	 * If not bigenough, return not end of file, since you aren't
4248	 *    returning all the data
4249	 * Otherwise, return the eof flag from the server.
4250	 */
4251	if (eofp != NULL) {
4252		if (tresid == uiop->uio_resid)
4253			*eofp = 1;
4254		else if (!bigenough)
4255			*eofp = 0;
4256		else
4257			*eofp = eof;
4258	}
4259
4260	/*
4261	 * Add extra empty records to any remaining DIRBLKSIZ chunks.
4262	 */
4263	while (uiop->uio_resid > 0 && uiop->uio_resid != tresid) {
4264		dp = (struct dirent *)uiop->uio_iov->iov_base;
4265		NFSBZERO(dp, DIRBLKSIZ);
4266		dp->d_type = DT_UNKNOWN;
4267		tl = (u_int32_t *)&dp->d_name[4];
4268		*tl++ = cookie.lval[0];
4269		*tl = cookie.lval[1];
4270		dp->d_reclen = DIRBLKSIZ;
4271		uiop->uio_iov->iov_base = (char *)uiop->uio_iov->iov_base +
4272		    DIRBLKSIZ;
4273		uiop->uio_iov->iov_len -= DIRBLKSIZ;
4274		uiop->uio_resid -= DIRBLKSIZ;
4275		uiop->uio_offset += DIRBLKSIZ;
4276	}
4277
4278nfsmout:
4279	if (nd->nd_mrep != NULL)
4280		m_freem(nd->nd_mrep);
4281	return (error);
4282}
4283
4284/*
4285 * Nfs commit rpc
4286 */
4287int
4288nfsrpc_commit(vnode_t vp, u_quad_t offset, int cnt, struct ucred *cred,
4289    NFSPROC_T *p, struct nfsvattr *nap, int *attrflagp)
4290{
4291	u_int32_t *tl;
4292	struct nfsrv_descript nfsd, *nd = &nfsd;
4293	nfsattrbit_t attrbits;
4294	int error;
4295	struct nfsmount *nmp = VFSTONFS(vp->v_mount);
4296
4297	*attrflagp = 0;
4298	NFSCL_REQSTART(nd, NFSPROC_COMMIT, vp, cred);
4299	NFSM_BUILD(tl, u_int32_t *, 3 * NFSX_UNSIGNED);
4300	txdr_hyper(offset, tl);
4301	tl += 2;
4302	*tl = txdr_unsigned(cnt);
4303	if (nd->nd_flag & ND_NFSV4) {
4304		/*
4305		 * And do a Getattr op.
4306		 */
4307		NFSM_BUILD(tl, u_int32_t *, NFSX_UNSIGNED);
4308		*tl = txdr_unsigned(NFSV4OP_GETATTR);
4309		NFSGETATTR_ATTRBIT(&attrbits);
4310		(void) nfsrv_putattrbit(nd, &attrbits);
4311	}
4312	error = nfscl_request(nd, vp, p, cred);
4313	if (error)
4314		return (error);
4315	error = nfscl_wcc_data(nd, vp, nap, attrflagp, NULL, NULL);
4316	if (!error && !nd->nd_repstat) {
4317		NFSM_DISSECT(tl, u_int32_t *, NFSX_VERF);
4318		NFSLOCKMNT(nmp);
4319		if (NFSBCMP(nmp->nm_verf, tl, NFSX_VERF)) {
4320			NFSBCOPY(tl, nmp->nm_verf, NFSX_VERF);
4321			nd->nd_repstat = NFSERR_STALEWRITEVERF;
4322		}
4323		NFSUNLOCKMNT(nmp);
4324		if (nd->nd_flag & ND_NFSV4)
4325			error = nfscl_postop_attr(nd, nap, attrflagp);
4326	}
4327nfsmout:
4328	if (!error && nd->nd_repstat)
4329		error = nd->nd_repstat;
4330	m_freem(nd->nd_mrep);
4331	return (error);
4332}
4333
4334/*
4335 * NFS byte range lock rpc.
4336 * (Mostly just calls one of the three lower level RPC routines.)
4337 */
4338int
4339nfsrpc_advlock(vnode_t vp, off_t size, int op, struct flock *fl,
4340    int reclaim, struct ucred *cred, NFSPROC_T *p, void *id, int flags)
4341{
4342	struct nfscllockowner *lp;
4343	struct nfsclclient *clp;
4344	struct nfsfh *nfhp;
4345	struct nfsrv_descript nfsd, *nd = &nfsd;
4346	struct nfsmount *nmp = VFSTONFS(vp->v_mount);
4347	u_int64_t off, len;
4348	off_t start, end;
4349	u_int32_t clidrev = 0;
4350	int error = 0, newone = 0, expireret = 0, retrycnt, donelocally;
4351	int callcnt, dorpc;
4352
4353	/*
4354	 * Convert the flock structure into a start and end and do POSIX
4355	 * bounds checking.
4356	 */
4357	switch (fl->l_whence) {
4358	case SEEK_SET:
4359	case SEEK_CUR:
4360		/*
4361		 * Caller is responsible for adding any necessary offset
4362		 * when SEEK_CUR is used.
4363		 */
4364		start = fl->l_start;
4365		off = fl->l_start;
4366		break;
4367	case SEEK_END:
4368		start = size + fl->l_start;
4369		off = size + fl->l_start;
4370		break;
4371	default:
4372		return (EINVAL);
4373	}
4374	if (start < 0)
4375		return (EINVAL);
4376	if (fl->l_len != 0) {
4377		end = start + fl->l_len - 1;
4378		if (end < start)
4379			return (EINVAL);
4380	}
4381
4382	len = fl->l_len;
4383	if (len == 0)
4384		len = NFS64BITSSET;
4385	retrycnt = 0;
4386	do {
4387	    nd->nd_repstat = 0;
4388	    if (op == F_GETLK) {
4389		error = nfscl_getcl(vp->v_mount, cred, p, false, true, &clp);
4390		if (error)
4391			return (error);
4392		error = nfscl_lockt(vp, clp, off, len, fl, p, id, flags);
4393		if (!error) {
4394			clidrev = clp->nfsc_clientidrev;
4395			error = nfsrpc_lockt(nd, vp, clp, off, len, fl, cred,
4396			    p, id, flags);
4397		} else if (error == -1) {
4398			error = 0;
4399		}
4400		nfscl_clientrelease(clp);
4401	    } else if (op == F_UNLCK && fl->l_type == F_UNLCK) {
4402		/*
4403		 * We must loop around for all lockowner cases.
4404		 */
4405		callcnt = 0;
4406		error = nfscl_getcl(vp->v_mount, cred, p, false, true, &clp);
4407		if (error)
4408			return (error);
4409		do {
4410		    error = nfscl_relbytelock(vp, off, len, cred, p, callcnt,
4411			clp, id, flags, &lp, &dorpc);
4412		    /*
4413		     * If it returns a NULL lp, we're done.
4414		     */
4415		    if (lp == NULL) {
4416			if (callcnt == 0)
4417			    nfscl_clientrelease(clp);
4418			else
4419			    nfscl_releasealllocks(clp, vp, p, id, flags);
4420			return (error);
4421		    }
4422		    if (nmp->nm_clp != NULL)
4423			clidrev = nmp->nm_clp->nfsc_clientidrev;
4424		    else
4425			clidrev = 0;
4426		    /*
4427		     * If the server doesn't support Posix lock semantics,
4428		     * only allow locks on the entire file, since it won't
4429		     * handle overlapping byte ranges.
4430		     * There might still be a problem when a lock
4431		     * upgrade/downgrade (read<->write) occurs, since the
4432		     * server "might" expect an unlock first?
4433		     */
4434		    if (dorpc && (lp->nfsl_open->nfso_posixlock ||
4435			(off == 0 && len == NFS64BITSSET))) {
4436			/*
4437			 * Since the lock records will go away, we must
4438			 * wait for grace and delay here.
4439			 */
4440			do {
4441			    error = nfsrpc_locku(nd, nmp, lp, off, len,
4442				NFSV4LOCKT_READ, cred, p, 0);
4443			    if ((nd->nd_repstat == NFSERR_GRACE ||
4444				 nd->nd_repstat == NFSERR_DELAY) &&
4445				error == 0)
4446				(void) nfs_catnap(PZERO, (int)nd->nd_repstat,
4447				    "nfs_advlock");
4448			} while ((nd->nd_repstat == NFSERR_GRACE ||
4449			    nd->nd_repstat == NFSERR_DELAY) && error == 0);
4450		    }
4451		    callcnt++;
4452		} while (error == 0 && nd->nd_repstat == 0);
4453		nfscl_releasealllocks(clp, vp, p, id, flags);
4454	    } else if (op == F_SETLK) {
4455		error = nfscl_getbytelock(vp, off, len, fl->l_type, cred, p,
4456		    NULL, 0, id, flags, NULL, NULL, &lp, &newone, &donelocally);
4457		if (error || donelocally) {
4458			return (error);
4459		}
4460		if (nmp->nm_clp != NULL)
4461			clidrev = nmp->nm_clp->nfsc_clientidrev;
4462		else
4463			clidrev = 0;
4464		nfhp = VTONFS(vp)->n_fhp;
4465		if (!lp->nfsl_open->nfso_posixlock &&
4466		    (off != 0 || len != NFS64BITSSET)) {
4467			error = EINVAL;
4468		} else {
4469			error = nfsrpc_lock(nd, nmp, vp, nfhp->nfh_fh,
4470			    nfhp->nfh_len, lp, newone, reclaim, off,
4471			    len, fl->l_type, cred, p, 0);
4472		}
4473		if (!error)
4474			error = nd->nd_repstat;
4475		nfscl_lockrelease(lp, error, newone);
4476	    } else {
4477		error = EINVAL;
4478	    }
4479	    if (!error)
4480	        error = nd->nd_repstat;
4481	    if (error == NFSERR_GRACE || error == NFSERR_STALESTATEID ||
4482		error == NFSERR_STALEDONTRECOVER ||
4483		error == NFSERR_STALECLIENTID || error == NFSERR_DELAY ||
4484		error == NFSERR_BADSESSION) {
4485		(void) nfs_catnap(PZERO, error, "nfs_advlock");
4486	    } else if ((error == NFSERR_EXPIRED || error == NFSERR_BADSTATEID)
4487		&& clidrev != 0) {
4488		expireret = nfscl_hasexpired(nmp->nm_clp, clidrev, p);
4489		retrycnt++;
4490	    }
4491	} while (error == NFSERR_GRACE ||
4492	    error == NFSERR_STALECLIENTID || error == NFSERR_DELAY ||
4493	    error == NFSERR_STALEDONTRECOVER || error == NFSERR_STALESTATEID ||
4494	    error == NFSERR_BADSESSION ||
4495	    ((error == NFSERR_EXPIRED || error == NFSERR_BADSTATEID) &&
4496	     expireret == 0 && clidrev != 0 && retrycnt < 4));
4497	if (error && retrycnt >= 4)
4498		error = EIO;
4499	return (error);
4500}
4501
4502/*
4503 * The lower level routine for the LockT case.
4504 */
4505int
4506nfsrpc_lockt(struct nfsrv_descript *nd, vnode_t vp,
4507    struct nfsclclient *clp, u_int64_t off, u_int64_t len, struct flock *fl,
4508    struct ucred *cred, NFSPROC_T *p, void *id, int flags)
4509{
4510	u_int32_t *tl;
4511	int error, type, size;
4512	uint8_t own[NFSV4CL_LOCKNAMELEN + NFSX_V4FHMAX];
4513	struct nfsnode *np;
4514	struct nfsmount *nmp;
4515	struct nfsclsession *tsep;
4516
4517	nmp = VFSTONFS(vp->v_mount);
4518	NFSCL_REQSTART(nd, NFSPROC_LOCKT, vp, cred);
4519	NFSM_BUILD(tl, u_int32_t *, 7 * NFSX_UNSIGNED);
4520	if (fl->l_type == F_RDLCK)
4521		*tl++ = txdr_unsigned(NFSV4LOCKT_READ);
4522	else
4523		*tl++ = txdr_unsigned(NFSV4LOCKT_WRITE);
4524	txdr_hyper(off, tl);
4525	tl += 2;
4526	txdr_hyper(len, tl);
4527	tl += 2;
4528	tsep = nfsmnt_mdssession(nmp);
4529	*tl++ = tsep->nfsess_clientid.lval[0];
4530	*tl = tsep->nfsess_clientid.lval[1];
4531	nfscl_filllockowner(id, own, flags);
4532	np = VTONFS(vp);
4533	NFSBCOPY(np->n_fhp->nfh_fh, &own[NFSV4CL_LOCKNAMELEN],
4534	    np->n_fhp->nfh_len);
4535	(void)nfsm_strtom(nd, own, NFSV4CL_LOCKNAMELEN + np->n_fhp->nfh_len);
4536	error = nfscl_request(nd, vp, p, cred);
4537	if (error)
4538		return (error);
4539	if (nd->nd_repstat == 0) {
4540		fl->l_type = F_UNLCK;
4541	} else if (nd->nd_repstat == NFSERR_DENIED) {
4542		nd->nd_repstat = 0;
4543		fl->l_whence = SEEK_SET;
4544		NFSM_DISSECT(tl, u_int32_t *, 8 * NFSX_UNSIGNED);
4545		fl->l_start = fxdr_hyper(tl);
4546		tl += 2;
4547		len = fxdr_hyper(tl);
4548		tl += 2;
4549		if (len == NFS64BITSSET)
4550			fl->l_len = 0;
4551		else
4552			fl->l_len = len;
4553		type = fxdr_unsigned(int, *tl++);
4554		if (type == NFSV4LOCKT_WRITE)
4555			fl->l_type = F_WRLCK;
4556		else
4557			fl->l_type = F_RDLCK;
4558		/*
4559		 * XXX For now, I have no idea what to do with the
4560		 * conflicting lock_owner, so I'll just set the pid == 0
4561		 * and skip over the lock_owner.
4562		 */
4563		fl->l_pid = (pid_t)0;
4564		tl += 2;
4565		size = fxdr_unsigned(int, *tl);
4566		if (size < 0 || size > NFSV4_OPAQUELIMIT)
4567			error = EBADRPC;
4568		if (!error)
4569			error = nfsm_advance(nd, NFSM_RNDUP(size), -1);
4570	} else if (nd->nd_repstat == NFSERR_STALECLIENTID)
4571		nfscl_initiate_recovery(clp);
4572nfsmout:
4573	m_freem(nd->nd_mrep);
4574	return (error);
4575}
4576
4577/*
4578 * Lower level function that performs the LockU RPC.
4579 */
4580static int
4581nfsrpc_locku(struct nfsrv_descript *nd, struct nfsmount *nmp,
4582    struct nfscllockowner *lp, u_int64_t off, u_int64_t len,
4583    u_int32_t type, struct ucred *cred, NFSPROC_T *p, int syscred)
4584{
4585	u_int32_t *tl;
4586	int error;
4587
4588	nfscl_reqstart(nd, NFSPROC_LOCKU, nmp, lp->nfsl_open->nfso_fh,
4589	    lp->nfsl_open->nfso_fhlen, NULL, NULL, 0, 0, cred);
4590	NFSM_BUILD(tl, u_int32_t *, NFSX_STATEID + 6 * NFSX_UNSIGNED);
4591	*tl++ = txdr_unsigned(type);
4592	*tl = txdr_unsigned(lp->nfsl_seqid);
4593	if (nfstest_outofseq &&
4594	    (arc4random() % nfstest_outofseq) == 0)
4595		*tl = txdr_unsigned(lp->nfsl_seqid + 1);
4596	tl++;
4597	if (NFSHASNFSV4N(nmp))
4598		*tl++ = 0;
4599	else
4600		*tl++ = lp->nfsl_stateid.seqid;
4601	*tl++ = lp->nfsl_stateid.other[0];
4602	*tl++ = lp->nfsl_stateid.other[1];
4603	*tl++ = lp->nfsl_stateid.other[2];
4604	txdr_hyper(off, tl);
4605	tl += 2;
4606	txdr_hyper(len, tl);
4607	if (syscred)
4608		nd->nd_flag |= ND_USEGSSNAME;
4609	error = newnfs_request(nd, nmp, NULL, &nmp->nm_sockreq, NULL, p, cred,
4610	    NFS_PROG, NFS_VER4, NULL, 1, NULL, NULL);
4611	NFSCL_INCRSEQID(lp->nfsl_seqid, nd);
4612	if (error)
4613		return (error);
4614	if (nd->nd_repstat == 0) {
4615		NFSM_DISSECT(tl, u_int32_t *, NFSX_STATEID);
4616		lp->nfsl_stateid.seqid = *tl++;
4617		lp->nfsl_stateid.other[0] = *tl++;
4618		lp->nfsl_stateid.other[1] = *tl++;
4619		lp->nfsl_stateid.other[2] = *tl;
4620	} else if (nd->nd_repstat == NFSERR_STALESTATEID)
4621		nfscl_initiate_recovery(lp->nfsl_open->nfso_own->nfsow_clp);
4622nfsmout:
4623	m_freem(nd->nd_mrep);
4624	return (error);
4625}
4626
4627/*
4628 * The actual Lock RPC.
4629 */
4630int
4631nfsrpc_lock(struct nfsrv_descript *nd, struct nfsmount *nmp, vnode_t vp,
4632    u_int8_t *nfhp, int fhlen, struct nfscllockowner *lp, int newone,
4633    int reclaim, u_int64_t off, u_int64_t len, short type, struct ucred *cred,
4634    NFSPROC_T *p, int syscred)
4635{
4636	u_int32_t *tl;
4637	int error, size;
4638	uint8_t own[NFSV4CL_LOCKNAMELEN + NFSX_V4FHMAX];
4639	struct nfsclsession *tsep;
4640
4641	nfscl_reqstart(nd, NFSPROC_LOCK, nmp, nfhp, fhlen, NULL, NULL, 0, 0,
4642	    cred);
4643	NFSM_BUILD(tl, u_int32_t *, 7 * NFSX_UNSIGNED);
4644	if (type == F_RDLCK)
4645		*tl++ = txdr_unsigned(NFSV4LOCKT_READ);
4646	else
4647		*tl++ = txdr_unsigned(NFSV4LOCKT_WRITE);
4648	*tl++ = txdr_unsigned(reclaim);
4649	txdr_hyper(off, tl);
4650	tl += 2;
4651	txdr_hyper(len, tl);
4652	tl += 2;
4653	if (newone) {
4654	    *tl = newnfs_true;
4655	    NFSM_BUILD(tl, u_int32_t *, NFSX_STATEID +
4656		2 * NFSX_UNSIGNED + NFSX_HYPER);
4657	    *tl++ = txdr_unsigned(lp->nfsl_open->nfso_own->nfsow_seqid);
4658	    if (NFSHASNFSV4N(nmp))
4659		*tl++ = 0;
4660	    else
4661		*tl++ = lp->nfsl_open->nfso_stateid.seqid;
4662	    *tl++ = lp->nfsl_open->nfso_stateid.other[0];
4663	    *tl++ = lp->nfsl_open->nfso_stateid.other[1];
4664	    *tl++ = lp->nfsl_open->nfso_stateid.other[2];
4665	    *tl++ = txdr_unsigned(lp->nfsl_seqid);
4666	    tsep = nfsmnt_mdssession(nmp);
4667	    *tl++ = tsep->nfsess_clientid.lval[0];
4668	    *tl = tsep->nfsess_clientid.lval[1];
4669	    NFSBCOPY(lp->nfsl_owner, own, NFSV4CL_LOCKNAMELEN);
4670	    NFSBCOPY(nfhp, &own[NFSV4CL_LOCKNAMELEN], fhlen);
4671	    (void)nfsm_strtom(nd, own, NFSV4CL_LOCKNAMELEN + fhlen);
4672	} else {
4673	    *tl = newnfs_false;
4674	    NFSM_BUILD(tl, u_int32_t *, NFSX_STATEID + NFSX_UNSIGNED);
4675	    if (NFSHASNFSV4N(nmp))
4676		*tl++ = 0;
4677	    else
4678		*tl++ = lp->nfsl_stateid.seqid;
4679	    *tl++ = lp->nfsl_stateid.other[0];
4680	    *tl++ = lp->nfsl_stateid.other[1];
4681	    *tl++ = lp->nfsl_stateid.other[2];
4682	    *tl = txdr_unsigned(lp->nfsl_seqid);
4683	    if (nfstest_outofseq &&
4684		(arc4random() % nfstest_outofseq) == 0)
4685		    *tl = txdr_unsigned(lp->nfsl_seqid + 1);
4686	}
4687	if (syscred)
4688		nd->nd_flag |= ND_USEGSSNAME;
4689	error = newnfs_request(nd, nmp, NULL, &nmp->nm_sockreq, vp, p, cred,
4690	    NFS_PROG, NFS_VER4, NULL, 1, NULL, NULL);
4691	if (error)
4692		return (error);
4693	if (newone)
4694	    NFSCL_INCRSEQID(lp->nfsl_open->nfso_own->nfsow_seqid, nd);
4695	NFSCL_INCRSEQID(lp->nfsl_seqid, nd);
4696	if (nd->nd_repstat == 0) {
4697		NFSM_DISSECT(tl, u_int32_t *, NFSX_STATEID);
4698		lp->nfsl_stateid.seqid = *tl++;
4699		lp->nfsl_stateid.other[0] = *tl++;
4700		lp->nfsl_stateid.other[1] = *tl++;
4701		lp->nfsl_stateid.other[2] = *tl;
4702	} else if (nd->nd_repstat == NFSERR_DENIED) {
4703		NFSM_DISSECT(tl, u_int32_t *, 8 * NFSX_UNSIGNED);
4704		size = fxdr_unsigned(int, *(tl + 7));
4705		if (size < 0 || size > NFSV4_OPAQUELIMIT)
4706			error = EBADRPC;
4707		if (!error)
4708			error = nfsm_advance(nd, NFSM_RNDUP(size), -1);
4709	} else if (nd->nd_repstat == NFSERR_STALESTATEID)
4710		nfscl_initiate_recovery(lp->nfsl_open->nfso_own->nfsow_clp);
4711nfsmout:
4712	m_freem(nd->nd_mrep);
4713	return (error);
4714}
4715
4716/*
4717 * nfs statfs rpc
4718 * (always called with the vp for the mount point)
4719 */
4720int
4721nfsrpc_statfs(vnode_t vp, struct nfsstatfs *sbp, struct nfsfsinfo *fsp,
4722    uint32_t *leasep, struct ucred *cred, NFSPROC_T *p, struct nfsvattr *nap,
4723    int *attrflagp)
4724{
4725	u_int32_t *tl = NULL;
4726	struct nfsrv_descript nfsd, *nd = &nfsd;
4727	struct nfsmount *nmp;
4728	nfsattrbit_t attrbits;
4729	int error;
4730
4731	*attrflagp = 0;
4732	nmp = VFSTONFS(vp->v_mount);
4733	if (NFSHASNFSV4(nmp)) {
4734		/*
4735		 * For V4, you actually do a getattr.
4736		 */
4737		NFSCL_REQSTART(nd, NFSPROC_GETATTR, vp, cred);
4738		if (leasep != NULL)
4739			NFSROOTFS_GETATTRBIT(&attrbits);
4740		else
4741			NFSSTATFS_GETATTRBIT(&attrbits);
4742		(void) nfsrv_putattrbit(nd, &attrbits);
4743		nd->nd_flag |= ND_USEGSSNAME;
4744		error = nfscl_request(nd, vp, p, cred);
4745		if (error)
4746			return (error);
4747		if (nd->nd_repstat == 0) {
4748			error = nfsv4_loadattr(nd, NULL, nap, NULL, NULL, 0,
4749			    NULL, NULL, sbp, fsp, NULL, 0, NULL, leasep, NULL,
4750			    p, cred);
4751			if (!error) {
4752				nmp->nm_fsid[0] = nap->na_filesid[0];
4753				nmp->nm_fsid[1] = nap->na_filesid[1];
4754				NFSSETHASSETFSID(nmp);
4755				*attrflagp = 1;
4756			}
4757		} else {
4758			error = nd->nd_repstat;
4759		}
4760		if (error)
4761			goto nfsmout;
4762	} else {
4763		NFSCL_REQSTART(nd, NFSPROC_FSSTAT, vp, NULL);
4764		error = nfscl_request(nd, vp, p, cred);
4765		if (error)
4766			return (error);
4767		if (nd->nd_flag & ND_NFSV3) {
4768			error = nfscl_postop_attr(nd, nap, attrflagp);
4769			if (error)
4770				goto nfsmout;
4771		}
4772		if (nd->nd_repstat) {
4773			error = nd->nd_repstat;
4774			goto nfsmout;
4775		}
4776		NFSM_DISSECT(tl, u_int32_t *,
4777		    NFSX_STATFS(nd->nd_flag & ND_NFSV3));
4778	}
4779	if (NFSHASNFSV3(nmp)) {
4780		sbp->sf_tbytes = fxdr_hyper(tl); tl += 2;
4781		sbp->sf_fbytes = fxdr_hyper(tl); tl += 2;
4782		sbp->sf_abytes = fxdr_hyper(tl); tl += 2;
4783		sbp->sf_tfiles = fxdr_hyper(tl); tl += 2;
4784		sbp->sf_ffiles = fxdr_hyper(tl); tl += 2;
4785		sbp->sf_afiles = fxdr_hyper(tl); tl += 2;
4786		sbp->sf_invarsec = fxdr_unsigned(u_int32_t, *tl);
4787	} else if (NFSHASNFSV4(nmp) == 0) {
4788		sbp->sf_tsize = fxdr_unsigned(u_int32_t, *tl++);
4789		sbp->sf_bsize = fxdr_unsigned(u_int32_t, *tl++);
4790		sbp->sf_blocks = fxdr_unsigned(u_int32_t, *tl++);
4791		sbp->sf_bfree = fxdr_unsigned(u_int32_t, *tl++);
4792		sbp->sf_bavail = fxdr_unsigned(u_int32_t, *tl);
4793	}
4794nfsmout:
4795	m_freem(nd->nd_mrep);
4796	return (error);
4797}
4798
4799/*
4800 * nfs pathconf rpc
4801 */
4802int
4803nfsrpc_pathconf(vnode_t vp, struct nfsv3_pathconf *pc,
4804    struct ucred *cred, NFSPROC_T *p, struct nfsvattr *nap, int *attrflagp)
4805{
4806	struct nfsrv_descript nfsd, *nd = &nfsd;
4807	struct nfsmount *nmp;
4808	u_int32_t *tl;
4809	nfsattrbit_t attrbits;
4810	int error;
4811	struct nfsnode *np;
4812
4813	*attrflagp = 0;
4814	nmp = VFSTONFS(vp->v_mount);
4815	if (NFSHASNFSV4(nmp)) {
4816		np = VTONFS(vp);
4817		if ((nmp->nm_privflag & NFSMNTP_FAKEROOTFH) != 0 &&
4818		    nmp->nm_fhsize == 0) {
4819			/* Attempt to get the actual root file handle. */
4820			error = nfsrpc_getdirpath(nmp, NFSMNT_DIRPATH(nmp),
4821			    cred, p);
4822			if (error != 0)
4823				return (EACCES);
4824			if (np->n_fhp->nfh_len == NFSX_FHMAX + 1)
4825				nfscl_statfs(vp, cred, p);
4826		}
4827		/*
4828		 * For V4, you actually do a getattr.
4829		 */
4830		NFSCL_REQSTART(nd, NFSPROC_GETATTR, vp, cred);
4831		NFSPATHCONF_GETATTRBIT(&attrbits);
4832		(void) nfsrv_putattrbit(nd, &attrbits);
4833		nd->nd_flag |= ND_USEGSSNAME;
4834		error = nfscl_request(nd, vp, p, cred);
4835		if (error)
4836			return (error);
4837		if (nd->nd_repstat == 0) {
4838			error = nfsv4_loadattr(nd, NULL, nap, NULL, NULL, 0,
4839			    pc, NULL, NULL, NULL, NULL, 0, NULL, NULL, NULL, p,
4840			    cred);
4841			if (!error)
4842				*attrflagp = 1;
4843		} else {
4844			error = nd->nd_repstat;
4845		}
4846	} else {
4847		NFSCL_REQSTART(nd, NFSPROC_PATHCONF, vp, NULL);
4848		error = nfscl_request(nd, vp, p, cred);
4849		if (error)
4850			return (error);
4851		error = nfscl_postop_attr(nd, nap, attrflagp);
4852		if (nd->nd_repstat && !error)
4853			error = nd->nd_repstat;
4854		if (!error) {
4855			NFSM_DISSECT(tl, u_int32_t *, NFSX_V3PATHCONF);
4856			pc->pc_linkmax = fxdr_unsigned(u_int32_t, *tl++);
4857			pc->pc_namemax = fxdr_unsigned(u_int32_t, *tl++);
4858			pc->pc_notrunc = fxdr_unsigned(u_int32_t, *tl++);
4859			pc->pc_chownrestricted =
4860			    fxdr_unsigned(u_int32_t, *tl++);
4861			pc->pc_caseinsensitive =
4862			    fxdr_unsigned(u_int32_t, *tl++);
4863			pc->pc_casepreserving = fxdr_unsigned(u_int32_t, *tl);
4864		}
4865	}
4866nfsmout:
4867	m_freem(nd->nd_mrep);
4868	return (error);
4869}
4870
4871/*
4872 * nfs version 3 fsinfo rpc call
4873 */
4874int
4875nfsrpc_fsinfo(vnode_t vp, struct nfsfsinfo *fsp, struct ucred *cred,
4876    NFSPROC_T *p, struct nfsvattr *nap, int *attrflagp)
4877{
4878	u_int32_t *tl;
4879	struct nfsrv_descript nfsd, *nd = &nfsd;
4880	int error;
4881
4882	*attrflagp = 0;
4883	NFSCL_REQSTART(nd, NFSPROC_FSINFO, vp, NULL);
4884	error = nfscl_request(nd, vp, p, cred);
4885	if (error)
4886		return (error);
4887	error = nfscl_postop_attr(nd, nap, attrflagp);
4888	if (nd->nd_repstat && !error)
4889		error = nd->nd_repstat;
4890	if (!error) {
4891		NFSM_DISSECT(tl, u_int32_t *, NFSX_V3FSINFO);
4892		fsp->fs_rtmax = fxdr_unsigned(u_int32_t, *tl++);
4893		fsp->fs_rtpref = fxdr_unsigned(u_int32_t, *tl++);
4894		fsp->fs_rtmult = fxdr_unsigned(u_int32_t, *tl++);
4895		fsp->fs_wtmax = fxdr_unsigned(u_int32_t, *tl++);
4896		fsp->fs_wtpref = fxdr_unsigned(u_int32_t, *tl++);
4897		fsp->fs_wtmult = fxdr_unsigned(u_int32_t, *tl++);
4898		fsp->fs_dtpref = fxdr_unsigned(u_int32_t, *tl++);
4899		fsp->fs_maxfilesize = fxdr_hyper(tl);
4900		tl += 2;
4901		fxdr_nfsv3time(tl, &fsp->fs_timedelta);
4902		tl += 2;
4903		fsp->fs_properties = fxdr_unsigned(u_int32_t, *tl);
4904	}
4905nfsmout:
4906	m_freem(nd->nd_mrep);
4907	return (error);
4908}
4909
4910/*
4911 * This function performs the Renew RPC.
4912 */
4913int
4914nfsrpc_renew(struct nfsclclient *clp, struct nfsclds *dsp, struct ucred *cred,
4915    NFSPROC_T *p)
4916{
4917	u_int32_t *tl;
4918	struct nfsrv_descript nfsd;
4919	struct nfsrv_descript *nd = &nfsd;
4920	struct nfsmount *nmp;
4921	int error;
4922	struct nfssockreq *nrp;
4923	struct nfsclsession *tsep;
4924
4925	nmp = clp->nfsc_nmp;
4926	if (nmp == NULL)
4927		return (0);
4928	if (dsp == NULL)
4929		nfscl_reqstart(nd, NFSPROC_RENEW, nmp, NULL, 0, NULL, NULL, 0,
4930		    0, cred);
4931	else
4932		nfscl_reqstart(nd, NFSPROC_RENEW, nmp, NULL, 0, NULL,
4933		    &dsp->nfsclds_sess, 0, 0, NULL);
4934	if (!NFSHASNFSV4N(nmp)) {
4935		/* NFSv4.1 just uses a Sequence Op and not a Renew. */
4936		NFSM_BUILD(tl, u_int32_t *, 2 * NFSX_UNSIGNED);
4937		tsep = nfsmnt_mdssession(nmp);
4938		*tl++ = tsep->nfsess_clientid.lval[0];
4939		*tl = tsep->nfsess_clientid.lval[1];
4940	}
4941	nrp = NULL;
4942	if (dsp != NULL)
4943		nrp = dsp->nfsclds_sockp;
4944	if (nrp == NULL)
4945		/* If NULL, use the MDS socket. */
4946		nrp = &nmp->nm_sockreq;
4947	nd->nd_flag |= ND_USEGSSNAME;
4948	if (dsp == NULL)
4949		error = newnfs_request(nd, nmp, NULL, nrp, NULL, p, cred,
4950		    NFS_PROG, NFS_VER4, NULL, 1, NULL, NULL);
4951	else {
4952		error = newnfs_request(nd, nmp, NULL, nrp, NULL, p, cred,
4953		    NFS_PROG, NFS_VER4, NULL, 1, NULL, &dsp->nfsclds_sess);
4954		if (error == ENXIO)
4955			nfscl_cancelreqs(dsp);
4956	}
4957	if (error)
4958		return (error);
4959	error = nd->nd_repstat;
4960	m_freem(nd->nd_mrep);
4961	return (error);
4962}
4963
4964/*
4965 * This function performs the Releaselockowner RPC.
4966 */
4967int
4968nfsrpc_rellockown(struct nfsmount *nmp, struct nfscllockowner *lp,
4969    uint8_t *fh, int fhlen, struct ucred *cred, NFSPROC_T *p)
4970{
4971	struct nfsrv_descript nfsd, *nd = &nfsd;
4972	u_int32_t *tl;
4973	int error;
4974	uint8_t own[NFSV4CL_LOCKNAMELEN + NFSX_V4FHMAX];
4975	struct nfsclsession *tsep;
4976
4977	if (NFSHASNFSV4N(nmp)) {
4978		/* For NFSv4.1, do a FreeStateID. */
4979		nfscl_reqstart(nd, NFSPROC_FREESTATEID, nmp, NULL, 0, NULL,
4980		    NULL, 0, 0, cred);
4981		nfsm_stateidtom(nd, &lp->nfsl_stateid, NFSSTATEID_PUTSTATEID);
4982	} else {
4983		nfscl_reqstart(nd, NFSPROC_RELEASELCKOWN, nmp, NULL, 0, NULL,
4984		    NULL, 0, 0, NULL);
4985		NFSM_BUILD(tl, u_int32_t *, 2 * NFSX_UNSIGNED);
4986		tsep = nfsmnt_mdssession(nmp);
4987		*tl++ = tsep->nfsess_clientid.lval[0];
4988		*tl = tsep->nfsess_clientid.lval[1];
4989		NFSBCOPY(lp->nfsl_owner, own, NFSV4CL_LOCKNAMELEN);
4990		NFSBCOPY(fh, &own[NFSV4CL_LOCKNAMELEN], fhlen);
4991		(void)nfsm_strtom(nd, own, NFSV4CL_LOCKNAMELEN + fhlen);
4992	}
4993	nd->nd_flag |= ND_USEGSSNAME;
4994	error = newnfs_request(nd, nmp, NULL, &nmp->nm_sockreq, NULL, p, cred,
4995	    NFS_PROG, NFS_VER4, NULL, 1, NULL, NULL);
4996	if (error)
4997		return (error);
4998	error = nd->nd_repstat;
4999	m_freem(nd->nd_mrep);
5000	return (error);
5001}
5002
5003/*
5004 * This function performs the Compound to get the mount pt FH.
5005 */
5006int
5007nfsrpc_getdirpath(struct nfsmount *nmp, u_char *dirpath, struct ucred *cred,
5008    NFSPROC_T *p)
5009{
5010	u_int32_t *tl;
5011	struct nfsrv_descript nfsd;
5012	struct nfsrv_descript *nd = &nfsd;
5013	u_char *cp, *cp2, *fhp;
5014	int error, cnt, len, setnil;
5015	u_int32_t *opcntp;
5016
5017	nfscl_reqstart(nd, NFSPROC_PUTROOTFH, nmp, NULL, 0, &opcntp, NULL, 0,
5018	    0, NULL);
5019	cp = dirpath;
5020	cnt = 0;
5021	do {
5022		setnil = 0;
5023		while (*cp == '/')
5024			cp++;
5025		cp2 = cp;
5026		while (*cp2 != '\0' && *cp2 != '/')
5027			cp2++;
5028		if (*cp2 == '/') {
5029			setnil = 1;
5030			*cp2 = '\0';
5031		}
5032		if (cp2 != cp) {
5033			NFSM_BUILD(tl, u_int32_t *, NFSX_UNSIGNED);
5034			*tl = txdr_unsigned(NFSV4OP_LOOKUP);
5035			nfsm_strtom(nd, cp, strlen(cp));
5036			cnt++;
5037		}
5038		if (setnil)
5039			*cp2++ = '/';
5040		cp = cp2;
5041	} while (*cp != '\0');
5042	if (NFSHASNFSV4N(nmp))
5043		/* Has a Sequence Op done by nfscl_reqstart(). */
5044		*opcntp = txdr_unsigned(3 + cnt);
5045	else
5046		*opcntp = txdr_unsigned(2 + cnt);
5047	NFSM_BUILD(tl, u_int32_t *, NFSX_UNSIGNED);
5048	*tl = txdr_unsigned(NFSV4OP_GETFH);
5049	nd->nd_flag |= ND_USEGSSNAME;
5050	error = newnfs_request(nd, nmp, NULL, &nmp->nm_sockreq, NULL, p, cred,
5051		NFS_PROG, NFS_VER4, NULL, 1, NULL, NULL);
5052	if (error)
5053		return (error);
5054	if (nd->nd_repstat == 0) {
5055		NFSM_DISSECT(tl, u_int32_t *, (3 + 2 * cnt) * NFSX_UNSIGNED);
5056		tl += (2 + 2 * cnt);
5057		if ((len = fxdr_unsigned(int, *tl)) <= 0 ||
5058			len > NFSX_FHMAX) {
5059			nd->nd_repstat = NFSERR_BADXDR;
5060		} else {
5061			fhp = malloc(len + 1, M_TEMP, M_WAITOK);
5062			nd->nd_repstat = nfsrv_mtostr(nd, fhp, len);
5063			if (nd->nd_repstat == 0) {
5064				NFSLOCKMNT(nmp);
5065				if (nmp->nm_fhsize == 0) {
5066					NFSBCOPY(fhp, nmp->nm_fh, len);
5067					nmp->nm_fhsize = len;
5068				}
5069				NFSUNLOCKMNT(nmp);
5070			}
5071			free(fhp, M_TEMP);
5072		}
5073	}
5074	error = nd->nd_repstat;
5075nfsmout:
5076	m_freem(nd->nd_mrep);
5077	return (error);
5078}
5079
5080/*
5081 * This function performs the Delegreturn RPC.
5082 */
5083int
5084nfsrpc_delegreturn(struct nfscldeleg *dp, struct ucred *cred,
5085    struct nfsmount *nmp, NFSPROC_T *p, int syscred)
5086{
5087	u_int32_t *tl;
5088	struct nfsrv_descript nfsd;
5089	struct nfsrv_descript *nd = &nfsd;
5090	int error;
5091
5092	nfscl_reqstart(nd, NFSPROC_DELEGRETURN, nmp, dp->nfsdl_fh,
5093	    dp->nfsdl_fhlen, NULL, NULL, 0, 0, cred);
5094	NFSM_BUILD(tl, u_int32_t *, NFSX_STATEID);
5095	if (NFSHASNFSV4N(nmp))
5096		*tl++ = 0;
5097	else
5098		*tl++ = dp->nfsdl_stateid.seqid;
5099	*tl++ = dp->nfsdl_stateid.other[0];
5100	*tl++ = dp->nfsdl_stateid.other[1];
5101	*tl = dp->nfsdl_stateid.other[2];
5102	if (syscred)
5103		nd->nd_flag |= ND_USEGSSNAME;
5104	error = newnfs_request(nd, nmp, NULL, &nmp->nm_sockreq, NULL, p, cred,
5105	    NFS_PROG, NFS_VER4, NULL, 1, NULL, NULL);
5106	if (error)
5107		return (error);
5108	error = nd->nd_repstat;
5109	m_freem(nd->nd_mrep);
5110	return (error);
5111}
5112
5113/*
5114 * nfs getacl call.
5115 */
5116int
5117nfsrpc_getacl(vnode_t vp, struct ucred *cred, NFSPROC_T *p, struct acl *aclp)
5118{
5119	struct nfsrv_descript nfsd, *nd = &nfsd;
5120	int error;
5121	nfsattrbit_t attrbits;
5122	struct nfsmount *nmp = VFSTONFS(vp->v_mount);
5123
5124	if (nfsrv_useacl == 0 || !NFSHASNFSV4(nmp))
5125		return (EOPNOTSUPP);
5126	NFSCL_REQSTART(nd, NFSPROC_GETACL, vp, cred);
5127	NFSZERO_ATTRBIT(&attrbits);
5128	NFSSETBIT_ATTRBIT(&attrbits, NFSATTRBIT_ACL);
5129	(void) nfsrv_putattrbit(nd, &attrbits);
5130	error = nfscl_request(nd, vp, p, cred);
5131	if (error)
5132		return (error);
5133	if (!nd->nd_repstat)
5134		error = nfsv4_loadattr(nd, vp, NULL, NULL, NULL, 0, NULL,
5135		    NULL, NULL, NULL, aclp, 0, NULL, NULL, NULL, p, cred);
5136	else
5137		error = nd->nd_repstat;
5138	m_freem(nd->nd_mrep);
5139	return (error);
5140}
5141
5142/*
5143 * nfs setacl call.
5144 */
5145int
5146nfsrpc_setacl(vnode_t vp, struct ucred *cred, NFSPROC_T *p, struct acl *aclp)
5147{
5148	int error;
5149	struct nfsmount *nmp = VFSTONFS(vp->v_mount);
5150
5151	if (nfsrv_useacl == 0 || !NFSHASNFSV4(nmp))
5152		return (EOPNOTSUPP);
5153	error = nfsrpc_setattr(vp, NULL, aclp, cred, p, NULL, NULL);
5154	return (error);
5155}
5156
5157/*
5158 * nfs setacl call.
5159 */
5160static int
5161nfsrpc_setaclrpc(vnode_t vp, struct ucred *cred, NFSPROC_T *p,
5162    struct acl *aclp, nfsv4stateid_t *stateidp)
5163{
5164	struct nfsrv_descript nfsd, *nd = &nfsd;
5165	int error;
5166	nfsattrbit_t attrbits;
5167	struct nfsmount *nmp = VFSTONFS(vp->v_mount);
5168
5169	if (!NFSHASNFSV4(nmp))
5170		return (EOPNOTSUPP);
5171	NFSCL_REQSTART(nd, NFSPROC_SETACL, vp, cred);
5172	nfsm_stateidtom(nd, stateidp, NFSSTATEID_PUTSTATEID);
5173	NFSZERO_ATTRBIT(&attrbits);
5174	NFSSETBIT_ATTRBIT(&attrbits, NFSATTRBIT_ACL);
5175	(void) nfsv4_fillattr(nd, vp->v_mount, vp, aclp, NULL, NULL, 0,
5176	    &attrbits, NULL, NULL, 0, 0, 0, 0, (uint64_t)0, NULL);
5177	error = nfscl_request(nd, vp, p, cred);
5178	if (error)
5179		return (error);
5180	/* Don't care about the pre/postop attributes */
5181	m_freem(nd->nd_mrep);
5182	return (nd->nd_repstat);
5183}
5184
5185/*
5186 * Do the NFSv4.1 Exchange ID.
5187 */
5188int
5189nfsrpc_exchangeid(struct nfsmount *nmp, struct nfsclclient *clp,
5190    struct nfssockreq *nrp, int minorvers, uint32_t exchflags,
5191    struct nfsclds **dspp, struct ucred *cred, NFSPROC_T *p)
5192{
5193	uint32_t *tl, v41flags;
5194	struct nfsrv_descript nfsd;
5195	struct nfsrv_descript *nd = &nfsd;
5196	struct nfsclds *dsp;
5197	struct timespec verstime;
5198	int error, len;
5199
5200	*dspp = NULL;
5201	if (minorvers == 0)
5202		minorvers = nmp->nm_minorvers;
5203	nfscl_reqstart(nd, NFSPROC_EXCHANGEID, nmp, NULL, 0, NULL, NULL,
5204	    NFS_VER4, minorvers, NULL);
5205	NFSM_BUILD(tl, uint32_t *, 2 * NFSX_UNSIGNED);
5206	*tl++ = txdr_unsigned(nfsboottime.tv_sec);	/* Client owner */
5207	*tl = txdr_unsigned(clp->nfsc_rev);
5208	(void) nfsm_strtom(nd, clp->nfsc_id, clp->nfsc_idlen);
5209
5210	NFSM_BUILD(tl, uint32_t *, 3 * NFSX_UNSIGNED);
5211	*tl++ = txdr_unsigned(exchflags);
5212	*tl++ = txdr_unsigned(NFSV4EXCH_SP4NONE);
5213
5214	/* Set the implementation id4 */
5215	*tl = txdr_unsigned(1);
5216	(void) nfsm_strtom(nd, "freebsd.org", strlen("freebsd.org"));
5217	(void) nfsm_strtom(nd, version, strlen(version));
5218	NFSM_BUILD(tl, uint32_t *, NFSX_V4TIME);
5219	verstime.tv_sec = 1293840000;		/* Jan 1, 2011 */
5220	verstime.tv_nsec = 0;
5221	txdr_nfsv4time(&verstime, tl);
5222	nd->nd_flag |= ND_USEGSSNAME;
5223	error = newnfs_request(nd, nmp, NULL, nrp, NULL, p, cred,
5224	    NFS_PROG, NFS_VER4, NULL, 1, NULL, NULL);
5225	NFSCL_DEBUG(1, "exchangeid err=%d reps=%d\n", error,
5226	    (int)nd->nd_repstat);
5227	if (error != 0)
5228		return (error);
5229	if (nd->nd_repstat == 0) {
5230		NFSM_DISSECT(tl, uint32_t *, 6 * NFSX_UNSIGNED + NFSX_HYPER);
5231		len = fxdr_unsigned(int, *(tl + 7));
5232		if (len < 0 || len > NFSV4_OPAQUELIMIT) {
5233			error = NFSERR_BADXDR;
5234			goto nfsmout;
5235		}
5236		dsp = malloc(sizeof(struct nfsclds) + len + 1, M_NFSCLDS,
5237		    M_WAITOK | M_ZERO);
5238		dsp->nfsclds_expire = NFSD_MONOSEC + clp->nfsc_renew;
5239		dsp->nfsclds_servownlen = len;
5240		dsp->nfsclds_sess.nfsess_clientid.lval[0] = *tl++;
5241		dsp->nfsclds_sess.nfsess_clientid.lval[1] = *tl++;
5242		dsp->nfsclds_sess.nfsess_sequenceid =
5243		    fxdr_unsigned(uint32_t, *tl++);
5244		v41flags = fxdr_unsigned(uint32_t, *tl);
5245		if ((v41flags & NFSV4EXCH_USEPNFSMDS) != 0 &&
5246		    NFSHASPNFSOPT(nmp)) {
5247			NFSCL_DEBUG(1, "set PNFS\n");
5248			NFSLOCKMNT(nmp);
5249			nmp->nm_state |= NFSSTA_PNFS;
5250			NFSUNLOCKMNT(nmp);
5251			dsp->nfsclds_flags |= NFSCLDS_MDS;
5252		}
5253		if ((v41flags & NFSV4EXCH_USEPNFSDS) != 0)
5254			dsp->nfsclds_flags |= NFSCLDS_DS;
5255		if (minorvers == NFSV42_MINORVERSION)
5256			dsp->nfsclds_flags |= NFSCLDS_MINORV2;
5257		if (len > 0)
5258			nd->nd_repstat = nfsrv_mtostr(nd,
5259			    dsp->nfsclds_serverown, len);
5260		if (nd->nd_repstat == 0) {
5261			mtx_init(&dsp->nfsclds_mtx, "nfsds", NULL, MTX_DEF);
5262			mtx_init(&dsp->nfsclds_sess.nfsess_mtx, "nfssession",
5263			    NULL, MTX_DEF);
5264			nfscl_initsessionslots(&dsp->nfsclds_sess);
5265			*dspp = dsp;
5266		} else
5267			free(dsp, M_NFSCLDS);
5268	}
5269	error = nd->nd_repstat;
5270nfsmout:
5271	m_freem(nd->nd_mrep);
5272	return (error);
5273}
5274
5275/*
5276 * Do the NFSv4.1 Create Session.
5277 */
5278int
5279nfsrpc_createsession(struct nfsmount *nmp, struct nfsclsession *sep,
5280    struct nfssockreq *nrp, struct nfsclds *dsp, uint32_t sequenceid, int mds,
5281    struct ucred *cred, NFSPROC_T *p)
5282{
5283	uint32_t crflags, maxval, *tl;
5284	struct nfsrv_descript nfsd;
5285	struct nfsrv_descript *nd = &nfsd;
5286	int error, irdcnt, minorvers;
5287
5288	/* Make sure nm_rsize, nm_wsize is set. */
5289	if (nmp->nm_rsize > NFS_MAXBSIZE || nmp->nm_rsize == 0)
5290		nmp->nm_rsize = NFS_MAXBSIZE;
5291	if (nmp->nm_wsize > NFS_MAXBSIZE || nmp->nm_wsize == 0)
5292		nmp->nm_wsize = NFS_MAXBSIZE;
5293	if (dsp == NULL)
5294		minorvers = nmp->nm_minorvers;
5295	else if ((dsp->nfsclds_flags & NFSCLDS_MINORV2) != 0)
5296		minorvers = NFSV42_MINORVERSION;
5297	else
5298		minorvers = NFSV41_MINORVERSION;
5299	nfscl_reqstart(nd, NFSPROC_CREATESESSION, nmp, NULL, 0, NULL, NULL,
5300	    NFS_VER4, minorvers, NULL);
5301	NFSM_BUILD(tl, uint32_t *, 4 * NFSX_UNSIGNED);
5302	*tl++ = sep->nfsess_clientid.lval[0];
5303	*tl++ = sep->nfsess_clientid.lval[1];
5304	*tl++ = txdr_unsigned(sequenceid);
5305	crflags = (NFSMNT_RDONLY(nmp->nm_mountp) ? 0 : NFSV4CRSESS_PERSIST);
5306	if (nfscl_enablecallb != 0 && nfs_numnfscbd > 0 && mds != 0)
5307		crflags |= NFSV4CRSESS_CONNBACKCHAN;
5308	*tl = txdr_unsigned(crflags);
5309
5310	/* Fill in fore channel attributes. */
5311	NFSM_BUILD(tl, uint32_t *, 7 * NFSX_UNSIGNED);
5312	*tl++ = 0;				/* Header pad size */
5313	if ((nd->nd_flag & ND_NFSV42) != 0 && mds != 0 && sb_max_adj >=
5314	    nmp->nm_wsize && sb_max_adj >= nmp->nm_rsize) {
5315		/*
5316		 * NFSv4.2 Extended Attribute operations may want to do
5317		 * requests/replies that are larger than nm_rsize/nm_wsize.
5318		 */
5319		*tl++ = txdr_unsigned(sb_max_adj - NFS_MAXXDR);
5320		*tl++ = txdr_unsigned(sb_max_adj - NFS_MAXXDR);
5321	} else {
5322		*tl++ = txdr_unsigned(nmp->nm_wsize + NFS_MAXXDR);
5323		*tl++ = txdr_unsigned(nmp->nm_rsize + NFS_MAXXDR);
5324	}
5325	*tl++ = txdr_unsigned(4096);		/* Max response size cached */
5326	*tl++ = txdr_unsigned(20);		/* Max operations */
5327	*tl++ = txdr_unsigned(64);		/* Max slots */
5328	*tl = 0;				/* No rdma ird */
5329
5330	/* Fill in back channel attributes. */
5331	NFSM_BUILD(tl, uint32_t *, 7 * NFSX_UNSIGNED);
5332	*tl++ = 0;				/* Header pad size */
5333	*tl++ = txdr_unsigned(10000);		/* Max request size */
5334	*tl++ = txdr_unsigned(10000);		/* Max response size */
5335	*tl++ = txdr_unsigned(4096);		/* Max response size cached */
5336	*tl++ = txdr_unsigned(4);		/* Max operations */
5337	*tl++ = txdr_unsigned(NFSV4_CBSLOTS);	/* Max slots */
5338	*tl = 0;				/* No rdma ird */
5339
5340	NFSM_BUILD(tl, uint32_t *, 8 * NFSX_UNSIGNED);
5341	*tl++ = txdr_unsigned(NFS_CALLBCKPROG);	/* Call back prog # */
5342
5343	/* Allow AUTH_SYS callbacks as uid, gid == 0. */
5344	*tl++ = txdr_unsigned(1);		/* Auth_sys only */
5345	*tl++ = txdr_unsigned(AUTH_SYS);	/* AUTH_SYS type */
5346	*tl++ = txdr_unsigned(nfsboottime.tv_sec); /* time stamp */
5347	*tl++ = 0;				/* Null machine name */
5348	*tl++ = 0;				/* Uid == 0 */
5349	*tl++ = 0;				/* Gid == 0 */
5350	*tl = 0;				/* No additional gids */
5351	nd->nd_flag |= ND_USEGSSNAME;
5352	error = newnfs_request(nd, nmp, NULL, nrp, NULL, p, cred, NFS_PROG,
5353	    NFS_VER4, NULL, 1, NULL, NULL);
5354	if (error != 0)
5355		return (error);
5356	if (nd->nd_repstat == 0) {
5357		NFSM_DISSECT(tl, uint32_t *, NFSX_V4SESSIONID +
5358		    2 * NFSX_UNSIGNED);
5359		bcopy(tl, sep->nfsess_sessionid, NFSX_V4SESSIONID);
5360		tl += NFSX_V4SESSIONID / NFSX_UNSIGNED;
5361		sep->nfsess_sequenceid = fxdr_unsigned(uint32_t, *tl++);
5362		crflags = fxdr_unsigned(uint32_t, *tl);
5363		if ((crflags & NFSV4CRSESS_PERSIST) != 0 && mds != 0) {
5364			NFSLOCKMNT(nmp);
5365			nmp->nm_state |= NFSSTA_SESSPERSIST;
5366			NFSUNLOCKMNT(nmp);
5367		}
5368
5369		/* Get the fore channel slot count. */
5370		NFSM_DISSECT(tl, uint32_t *, 7 * NFSX_UNSIGNED);
5371		tl++;			/* Skip the header pad size. */
5372
5373		/* Make sure nm_wsize is small enough. */
5374		maxval = fxdr_unsigned(uint32_t, *tl++);
5375		while (maxval < nmp->nm_wsize + NFS_MAXXDR) {
5376			if (nmp->nm_wsize > 8096)
5377				nmp->nm_wsize /= 2;
5378			else
5379				break;
5380		}
5381		sep->nfsess_maxreq = maxval;
5382
5383		/* Make sure nm_rsize is small enough. */
5384		maxval = fxdr_unsigned(uint32_t, *tl++);
5385		while (maxval < nmp->nm_rsize + NFS_MAXXDR) {
5386			if (nmp->nm_rsize > 8096)
5387				nmp->nm_rsize /= 2;
5388			else
5389				break;
5390		}
5391		sep->nfsess_maxresp = maxval;
5392
5393		sep->nfsess_maxcache = fxdr_unsigned(int, *tl++);
5394		tl++;
5395		sep->nfsess_foreslots = fxdr_unsigned(uint16_t, *tl++);
5396		NFSCL_DEBUG(4, "fore slots=%d\n", (int)sep->nfsess_foreslots);
5397		irdcnt = fxdr_unsigned(int, *tl);
5398		if (irdcnt < 0 || irdcnt > 1) {
5399			error = NFSERR_BADXDR;
5400			goto nfsmout;
5401		}
5402		if (irdcnt > 0)
5403			NFSM_DISSECT(tl, uint32_t *, irdcnt * NFSX_UNSIGNED);
5404
5405		/* and the back channel slot count. */
5406		NFSM_DISSECT(tl, uint32_t *, 7 * NFSX_UNSIGNED);
5407		tl += 5;
5408		sep->nfsess_backslots = fxdr_unsigned(uint16_t, *tl);
5409		NFSCL_DEBUG(4, "back slots=%d\n", (int)sep->nfsess_backslots);
5410	}
5411	error = nd->nd_repstat;
5412nfsmout:
5413	m_freem(nd->nd_mrep);
5414	return (error);
5415}
5416
5417/*
5418 * Do the NFSv4.1 Destroy Client.
5419 */
5420int
5421nfsrpc_destroyclient(struct nfsmount *nmp, struct nfsclclient *clp,
5422    struct ucred *cred, NFSPROC_T *p)
5423{
5424	uint32_t *tl;
5425	struct nfsrv_descript nfsd;
5426	struct nfsrv_descript *nd = &nfsd;
5427	int error;
5428	struct nfsclsession *tsep;
5429
5430	nfscl_reqstart(nd, NFSPROC_DESTROYCLIENT, nmp, NULL, 0, NULL, NULL, 0,
5431	    0, NULL);
5432	NFSM_BUILD(tl, uint32_t *, 2 * NFSX_UNSIGNED);
5433	tsep = nfsmnt_mdssession(nmp);
5434	*tl++ = tsep->nfsess_clientid.lval[0];
5435	*tl = tsep->nfsess_clientid.lval[1];
5436	nd->nd_flag |= ND_USEGSSNAME;
5437	error = newnfs_request(nd, nmp, NULL, &nmp->nm_sockreq, NULL, p, cred,
5438	    NFS_PROG, NFS_VER4, NULL, 1, NULL, NULL);
5439	if (error != 0)
5440		return (error);
5441	error = nd->nd_repstat;
5442	m_freem(nd->nd_mrep);
5443	return (error);
5444}
5445
5446/*
5447 * Do the NFSv4.1 LayoutGet.
5448 */
5449static int
5450nfsrpc_layoutget(struct nfsmount *nmp, uint8_t *fhp, int fhlen, int iomode,
5451    uint64_t offset, uint64_t len, uint64_t minlen, int layouttype,
5452    int layoutlen, nfsv4stateid_t *stateidp, int *retonclosep,
5453    struct nfsclflayouthead *flhp, struct ucred *cred, NFSPROC_T *p)
5454{
5455	struct nfsrv_descript nfsd, *nd = &nfsd;
5456	int error;
5457
5458	nfscl_reqstart(nd, NFSPROC_LAYOUTGET, nmp, fhp, fhlen, NULL, NULL, 0,
5459	    0, cred);
5460	nfsrv_setuplayoutget(nd, iomode, offset, len, minlen, stateidp,
5461	    layouttype, layoutlen, 0);
5462	nd->nd_flag |= ND_USEGSSNAME;
5463	error = newnfs_request(nd, nmp, NULL, &nmp->nm_sockreq, NULL, p, cred,
5464	    NFS_PROG, NFS_VER4, NULL, 1, NULL, NULL);
5465	NFSCL_DEBUG(4, "layget err=%d st=%d\n", error, nd->nd_repstat);
5466	if (error != 0)
5467		return (error);
5468	if (nd->nd_repstat == 0)
5469		error = nfsrv_parselayoutget(nmp, nd, stateidp, retonclosep,
5470		    flhp);
5471	if (error == 0 && nd->nd_repstat != 0)
5472		error = nd->nd_repstat;
5473	m_freem(nd->nd_mrep);
5474	return (error);
5475}
5476
5477/*
5478 * Do the NFSv4.1 Get Device Info.
5479 */
5480int
5481nfsrpc_getdeviceinfo(struct nfsmount *nmp, uint8_t *deviceid, int layouttype,
5482    uint32_t *notifybitsp, struct nfscldevinfo **ndip, struct ucred *cred,
5483    NFSPROC_T *p)
5484{
5485	uint32_t cnt, *tl, vers, minorvers;
5486	struct nfsrv_descript nfsd;
5487	struct nfsrv_descript *nd = &nfsd;
5488	struct sockaddr_in sin, ssin;
5489	struct sockaddr_in6 sin6, ssin6;
5490	struct nfsclds *dsp = NULL, **dspp, **gotdspp;
5491	struct nfscldevinfo *ndi;
5492	int addrcnt = 0, bitcnt, error, gotminor, gotvers, i, isudp, j;
5493	int stripecnt;
5494	uint8_t stripeindex;
5495	sa_family_t af, safilled;
5496
5497	ssin.sin_port = 0;		/* To shut up compiler. */
5498	ssin.sin_addr.s_addr = 0;	/* ditto */
5499	*ndip = NULL;
5500	ndi = NULL;
5501	gotdspp = NULL;
5502	nfscl_reqstart(nd, NFSPROC_GETDEVICEINFO, nmp, NULL, 0, NULL, NULL, 0,
5503	    0, cred);
5504	NFSM_BUILD(tl, uint32_t *, NFSX_V4DEVICEID + 3 * NFSX_UNSIGNED);
5505	NFSBCOPY(deviceid, tl, NFSX_V4DEVICEID);
5506	tl += (NFSX_V4DEVICEID / NFSX_UNSIGNED);
5507	*tl++ = txdr_unsigned(layouttype);
5508	*tl++ = txdr_unsigned(100000);
5509	if (notifybitsp != NULL && *notifybitsp != 0) {
5510		*tl = txdr_unsigned(1);		/* One word of bits. */
5511		NFSM_BUILD(tl, uint32_t *, NFSX_UNSIGNED);
5512		*tl = txdr_unsigned(*notifybitsp);
5513	} else
5514		*tl = txdr_unsigned(0);
5515	nd->nd_flag |= ND_USEGSSNAME;
5516	error = newnfs_request(nd, nmp, NULL, &nmp->nm_sockreq, NULL, p, cred,
5517	    NFS_PROG, NFS_VER4, NULL, 1, NULL, NULL);
5518	if (error != 0)
5519		return (error);
5520	if (nd->nd_repstat == 0) {
5521		NFSM_DISSECT(tl, uint32_t *, 2 * NFSX_UNSIGNED);
5522		if (layouttype != fxdr_unsigned(int, *tl))
5523			printf("EEK! devinfo layout type not same!\n");
5524		if (layouttype == NFSLAYOUT_NFSV4_1_FILES) {
5525			NFSM_DISSECT(tl, uint32_t *, NFSX_UNSIGNED);
5526			stripecnt = fxdr_unsigned(int, *tl);
5527			NFSCL_DEBUG(4, "stripecnt=%d\n", stripecnt);
5528			if (stripecnt < 1 || stripecnt > 4096) {
5529				printf("pNFS File layout devinfo stripecnt %d:"
5530				    " out of range\n", stripecnt);
5531				error = NFSERR_BADXDR;
5532				goto nfsmout;
5533			}
5534			NFSM_DISSECT(tl, uint32_t *, (stripecnt + 1) *
5535			    NFSX_UNSIGNED);
5536			addrcnt = fxdr_unsigned(int, *(tl + stripecnt));
5537			NFSCL_DEBUG(4, "addrcnt=%d\n", addrcnt);
5538			if (addrcnt < 1 || addrcnt > 128) {
5539				printf("NFS devinfo addrcnt %d: out of range\n",
5540				    addrcnt);
5541				error = NFSERR_BADXDR;
5542				goto nfsmout;
5543			}
5544
5545			/*
5546			 * Now we know how many stripe indices and addresses, so
5547			 * we can allocate the structure the correct size.
5548			 */
5549			i = (stripecnt * sizeof(uint8_t)) /
5550			    sizeof(struct nfsclds *) + 1;
5551			NFSCL_DEBUG(4, "stripeindices=%d\n", i);
5552			ndi = malloc(sizeof(*ndi) + (addrcnt + i) *
5553			    sizeof(struct nfsclds *), M_NFSDEVINFO, M_WAITOK |
5554			    M_ZERO);
5555			NFSBCOPY(deviceid, ndi->nfsdi_deviceid,
5556			    NFSX_V4DEVICEID);
5557			ndi->nfsdi_refcnt = 0;
5558			ndi->nfsdi_flags = NFSDI_FILELAYOUT;
5559			ndi->nfsdi_stripecnt = stripecnt;
5560			ndi->nfsdi_addrcnt = addrcnt;
5561			/* Fill in the stripe indices. */
5562			for (i = 0; i < stripecnt; i++) {
5563				stripeindex = fxdr_unsigned(uint8_t, *tl++);
5564				NFSCL_DEBUG(4, "stripeind=%d\n", stripeindex);
5565				if (stripeindex >= addrcnt) {
5566					printf("pNFS File Layout devinfo"
5567					    " stripeindex %d: too big\n",
5568					    (int)stripeindex);
5569					error = NFSERR_BADXDR;
5570					goto nfsmout;
5571				}
5572				nfsfldi_setstripeindex(ndi, i, stripeindex);
5573			}
5574		} else if (layouttype == NFSLAYOUT_FLEXFILE) {
5575			/* For Flex File, we only get one address list. */
5576			ndi = malloc(sizeof(*ndi) + sizeof(struct nfsclds *),
5577			    M_NFSDEVINFO, M_WAITOK | M_ZERO);
5578			NFSBCOPY(deviceid, ndi->nfsdi_deviceid,
5579			    NFSX_V4DEVICEID);
5580			ndi->nfsdi_refcnt = 0;
5581			ndi->nfsdi_flags = NFSDI_FLEXFILE;
5582			addrcnt = ndi->nfsdi_addrcnt = 1;
5583		}
5584
5585		/* Now, dissect the server address(es). */
5586		safilled = AF_UNSPEC;
5587		for (i = 0; i < addrcnt; i++) {
5588			NFSM_DISSECT(tl, uint32_t *, NFSX_UNSIGNED);
5589			cnt = fxdr_unsigned(uint32_t, *tl);
5590			if (cnt == 0) {
5591				printf("NFS devinfo 0 len addrlist\n");
5592				error = NFSERR_BADXDR;
5593				goto nfsmout;
5594			}
5595			dspp = nfsfldi_addr(ndi, i);
5596			safilled = AF_UNSPEC;
5597			for (j = 0; j < cnt; j++) {
5598				error = nfsv4_getipaddr(nd, &sin, &sin6, &af,
5599				    &isudp);
5600				if (error != 0 && error != EPERM) {
5601					error = NFSERR_BADXDR;
5602					goto nfsmout;
5603				}
5604				if (error == 0 && isudp == 0) {
5605					/*
5606					 * The priority is:
5607					 * - Same address family.
5608					 * Save the address and dspp, so that
5609					 * the connection can be done after
5610					 * parsing is complete.
5611					 */
5612					if (safilled == AF_UNSPEC ||
5613					    (af == nmp->nm_nam->sa_family &&
5614					     safilled != nmp->nm_nam->sa_family)
5615					   ) {
5616						if (af == AF_INET)
5617							ssin = sin;
5618						else
5619							ssin6 = sin6;
5620						safilled = af;
5621						gotdspp = dspp;
5622					}
5623				}
5624			}
5625		}
5626
5627		gotvers = NFS_VER4;	/* Default NFSv4.1 for File Layout. */
5628		gotminor = NFSV41_MINORVERSION;
5629		/* For Flex File, we will take one of the versions to use. */
5630		if (layouttype == NFSLAYOUT_FLEXFILE) {
5631			NFSM_DISSECT(tl, uint32_t *, NFSX_UNSIGNED);
5632			j = fxdr_unsigned(int, *tl);
5633			if (j < 1 || j > NFSDEV_MAXVERS) {
5634				printf("pNFS: too many versions\n");
5635				error = NFSERR_BADXDR;
5636				goto nfsmout;
5637			}
5638			gotvers = 0;
5639			gotminor = 0;
5640			for (i = 0; i < j; i++) {
5641				NFSM_DISSECT(tl, uint32_t *, 5 * NFSX_UNSIGNED);
5642				vers = fxdr_unsigned(uint32_t, *tl++);
5643				minorvers = fxdr_unsigned(uint32_t, *tl++);
5644				if (vers == NFS_VER3)
5645					minorvers = 0;
5646				if ((vers == NFS_VER4 && ((minorvers ==
5647				    NFSV41_MINORVERSION && gotminor == 0) ||
5648				    minorvers == NFSV42_MINORVERSION)) ||
5649				    (vers == NFS_VER3 && gotvers == 0)) {
5650					gotvers = vers;
5651					gotminor = minorvers;
5652					/* We'll take this one. */
5653					ndi->nfsdi_versindex = i;
5654					ndi->nfsdi_vers = vers;
5655					ndi->nfsdi_minorvers = minorvers;
5656					ndi->nfsdi_rsize = fxdr_unsigned(
5657					    uint32_t, *tl++);
5658					ndi->nfsdi_wsize = fxdr_unsigned(
5659					    uint32_t, *tl++);
5660					if (*tl == newnfs_true)
5661						ndi->nfsdi_flags |=
5662						    NFSDI_TIGHTCOUPLED;
5663					else
5664						ndi->nfsdi_flags &=
5665						    ~NFSDI_TIGHTCOUPLED;
5666				}
5667			}
5668			if (gotvers == 0) {
5669				printf("pNFS: no NFSv3, NFSv4.1 or NFSv4.2\n");
5670				error = NFSERR_BADXDR;
5671				goto nfsmout;
5672			}
5673		}
5674
5675		/* And the notify bits. */
5676		NFSM_DISSECT(tl, uint32_t *, NFSX_UNSIGNED);
5677		bitcnt = fxdr_unsigned(int, *tl);
5678		if (bitcnt > 0) {
5679			NFSM_DISSECT(tl, uint32_t *, NFSX_UNSIGNED);
5680			if (notifybitsp != NULL)
5681				*notifybitsp =
5682				    fxdr_unsigned(uint32_t, *tl);
5683		}
5684		if (safilled != AF_UNSPEC) {
5685			KASSERT(ndi != NULL, ("ndi is NULL"));
5686			*ndip = ndi;
5687		} else
5688			error = EPERM;
5689		if (error == 0) {
5690			/*
5691			 * Now we can do a TCP connection for the correct
5692			 * NFS version and IP address.
5693			 */
5694			error = nfsrpc_fillsa(nmp, &ssin, &ssin6, safilled,
5695			    gotvers, gotminor, &dsp, p);
5696		}
5697		if (error == 0) {
5698			KASSERT(gotdspp != NULL, ("gotdspp is NULL"));
5699			*gotdspp = dsp;
5700		}
5701	}
5702	if (nd->nd_repstat != 0 && error == 0)
5703		error = nd->nd_repstat;
5704nfsmout:
5705	if (error != 0 && ndi != NULL)
5706		nfscl_freedevinfo(ndi);
5707	m_freem(nd->nd_mrep);
5708	return (error);
5709}
5710
5711/*
5712 * Do the NFSv4.1 LayoutCommit.
5713 */
5714int
5715nfsrpc_layoutcommit(struct nfsmount *nmp, uint8_t *fh, int fhlen, int reclaim,
5716    uint64_t off, uint64_t len, uint64_t lastbyte, nfsv4stateid_t *stateidp,
5717    int layouttype, struct ucred *cred, NFSPROC_T *p)
5718{
5719	uint32_t *tl;
5720	struct nfsrv_descript nfsd, *nd = &nfsd;
5721	int error;
5722
5723	nfscl_reqstart(nd, NFSPROC_LAYOUTCOMMIT, nmp, fh, fhlen, NULL, NULL,
5724	    0, 0, cred);
5725	NFSM_BUILD(tl, uint32_t *, 5 * NFSX_UNSIGNED + 3 * NFSX_HYPER +
5726	    NFSX_STATEID);
5727	txdr_hyper(off, tl);
5728	tl += 2;
5729	txdr_hyper(len, tl);
5730	tl += 2;
5731	if (reclaim != 0)
5732		*tl++ = newnfs_true;
5733	else
5734		*tl++ = newnfs_false;
5735	*tl++ = txdr_unsigned(stateidp->seqid);
5736	*tl++ = stateidp->other[0];
5737	*tl++ = stateidp->other[1];
5738	*tl++ = stateidp->other[2];
5739	*tl++ = newnfs_true;
5740	if (lastbyte < off)
5741		lastbyte = off;
5742	else if (lastbyte >= (off + len))
5743		lastbyte = off + len - 1;
5744	txdr_hyper(lastbyte, tl);
5745	tl += 2;
5746	*tl++ = newnfs_false;
5747	*tl++ = txdr_unsigned(layouttype);
5748	/* All supported layouts are 0 length. */
5749	*tl = txdr_unsigned(0);
5750	nd->nd_flag |= ND_USEGSSNAME;
5751	error = newnfs_request(nd, nmp, NULL, &nmp->nm_sockreq, NULL, p, cred,
5752	    NFS_PROG, NFS_VER4, NULL, 1, NULL, NULL);
5753	if (error != 0)
5754		return (error);
5755	error = nd->nd_repstat;
5756	m_freem(nd->nd_mrep);
5757	return (error);
5758}
5759
5760/*
5761 * Do the NFSv4.1 LayoutReturn.
5762 */
5763int
5764nfsrpc_layoutreturn(struct nfsmount *nmp, uint8_t *fh, int fhlen, int reclaim,
5765    int layouttype, uint32_t iomode, int layoutreturn, uint64_t offset,
5766    uint64_t len, nfsv4stateid_t *stateidp, struct ucred *cred, NFSPROC_T *p,
5767    uint32_t stat, uint32_t op, char *devid)
5768{
5769	uint32_t *tl;
5770	struct nfsrv_descript nfsd, *nd = &nfsd;
5771	uint64_t tu64;
5772	int error;
5773
5774	nfscl_reqstart(nd, NFSPROC_LAYOUTRETURN, nmp, fh, fhlen, NULL, NULL,
5775	    0, 0, cred);
5776	NFSM_BUILD(tl, uint32_t *, 4 * NFSX_UNSIGNED);
5777	if (reclaim != 0)
5778		*tl++ = newnfs_true;
5779	else
5780		*tl++ = newnfs_false;
5781	*tl++ = txdr_unsigned(layouttype);
5782	*tl++ = txdr_unsigned(iomode);
5783	*tl = txdr_unsigned(layoutreturn);
5784	if (layoutreturn == NFSLAYOUTRETURN_FILE) {
5785		NFSM_BUILD(tl, uint32_t *, 2 * NFSX_HYPER + NFSX_STATEID +
5786		    NFSX_UNSIGNED);
5787		txdr_hyper(offset, tl);
5788		tl += 2;
5789		txdr_hyper(len, tl);
5790		tl += 2;
5791		NFSCL_DEBUG(4, "layoutret stseq=%d\n", (int)stateidp->seqid);
5792		*tl++ = txdr_unsigned(stateidp->seqid);
5793		*tl++ = stateidp->other[0];
5794		*tl++ = stateidp->other[1];
5795		*tl++ = stateidp->other[2];
5796		if (layouttype == NFSLAYOUT_NFSV4_1_FILES)
5797			*tl = txdr_unsigned(0);
5798		else if (layouttype == NFSLAYOUT_FLEXFILE) {
5799			if (stat != 0) {
5800				*tl = txdr_unsigned(2 * NFSX_HYPER +
5801				    NFSX_STATEID + NFSX_V4DEVICEID + 5 *
5802				    NFSX_UNSIGNED);
5803				NFSM_BUILD(tl, uint32_t *, 2 * NFSX_HYPER +
5804				    NFSX_STATEID + NFSX_V4DEVICEID + 5 *
5805				    NFSX_UNSIGNED);
5806				*tl++ = txdr_unsigned(1);	/* One error. */
5807				tu64 = 0;			/* Offset. */
5808				txdr_hyper(tu64, tl); tl += 2;
5809				tu64 = UINT64_MAX;		/* Length. */
5810				txdr_hyper(tu64, tl); tl += 2;
5811				NFSBCOPY(stateidp, tl, NFSX_STATEID);
5812				tl += (NFSX_STATEID / NFSX_UNSIGNED);
5813				*tl++ = txdr_unsigned(1);	/* One error. */
5814				NFSBCOPY(devid, tl, NFSX_V4DEVICEID);
5815				tl += (NFSX_V4DEVICEID / NFSX_UNSIGNED);
5816				*tl++ = txdr_unsigned(stat);
5817				*tl++ = txdr_unsigned(op);
5818			} else {
5819				*tl = txdr_unsigned(2 * NFSX_UNSIGNED);
5820				NFSM_BUILD(tl, uint32_t *, 2 * NFSX_UNSIGNED);
5821				/* No ioerrs. */
5822				*tl++ = 0;
5823			}
5824			*tl = 0;	/* No stats yet. */
5825		}
5826	}
5827	nd->nd_flag |= ND_USEGSSNAME;
5828	error = newnfs_request(nd, nmp, NULL, &nmp->nm_sockreq, NULL, p, cred,
5829	    NFS_PROG, NFS_VER4, NULL, 1, NULL, NULL);
5830	if (error != 0)
5831		return (error);
5832	if (nd->nd_repstat == 0) {
5833		NFSM_DISSECT(tl, uint32_t *, NFSX_UNSIGNED);
5834		if (*tl != 0) {
5835			NFSM_DISSECT(tl, uint32_t *, NFSX_STATEID);
5836			stateidp->seqid = fxdr_unsigned(uint32_t, *tl++);
5837			stateidp->other[0] = *tl++;
5838			stateidp->other[1] = *tl++;
5839			stateidp->other[2] = *tl;
5840		}
5841	} else
5842		error = nd->nd_repstat;
5843nfsmout:
5844	m_freem(nd->nd_mrep);
5845	return (error);
5846}
5847
5848/*
5849 * Do the NFSv4.2 LayoutError.
5850 */
5851static int
5852nfsrpc_layouterror(struct nfsmount *nmp, uint8_t *fh, int fhlen, uint64_t offset,
5853    uint64_t len, nfsv4stateid_t *stateidp, struct ucred *cred, NFSPROC_T *p,
5854    uint32_t stat, uint32_t op, char *devid)
5855{
5856	uint32_t *tl;
5857	struct nfsrv_descript nfsd, *nd = &nfsd;
5858	int error;
5859
5860	nfscl_reqstart(nd, NFSPROC_LAYOUTERROR, nmp, fh, fhlen, NULL, NULL,
5861	    0, 0, cred);
5862	NFSM_BUILD(tl, uint32_t *, 2 * NFSX_HYPER + NFSX_STATEID +
5863	    NFSX_V4DEVICEID + 3 * NFSX_UNSIGNED);
5864	txdr_hyper(offset, tl); tl += 2;
5865	txdr_hyper(len, tl); tl += 2;
5866	*tl++ = txdr_unsigned(stateidp->seqid);
5867	*tl++ = stateidp->other[0];
5868	*tl++ = stateidp->other[1];
5869	*tl++ = stateidp->other[2];
5870	*tl++ = txdr_unsigned(1);
5871	NFSBCOPY(devid, tl, NFSX_V4DEVICEID);
5872	tl += (NFSX_V4DEVICEID / NFSX_UNSIGNED);
5873	*tl++ = txdr_unsigned(stat);
5874	*tl = txdr_unsigned(op);
5875	nd->nd_flag |= ND_USEGSSNAME;
5876	error = newnfs_request(nd, nmp, NULL, &nmp->nm_sockreq, NULL, p, cred,
5877	    NFS_PROG, NFS_VER4, NULL, 1, NULL, NULL);
5878	if (error != 0)
5879		return (error);
5880	if (nd->nd_repstat != 0)
5881		error = nd->nd_repstat;
5882	m_freem(nd->nd_mrep);
5883	return (error);
5884}
5885
5886/*
5887 * Acquire a layout and devinfo, if possible. The caller must have acquired
5888 * a reference count on the nfsclclient structure before calling this.
5889 * Return the layout in lypp with a reference count on it, if successful.
5890 */
5891static int
5892nfsrpc_getlayout(struct nfsmount *nmp, vnode_t vp, struct nfsfh *nfhp,
5893    int iomode, uint32_t rw, uint32_t *notifybitsp, nfsv4stateid_t *stateidp,
5894    uint64_t off, struct nfscllayout **lypp, struct ucred *cred, NFSPROC_T *p)
5895{
5896	struct nfscllayout *lyp;
5897	struct nfsclflayout *flp;
5898	struct nfsclflayouthead flh;
5899	int error = 0, islocked, layoutlen, layouttype, recalled, retonclose;
5900	nfsv4stateid_t stateid;
5901	struct nfsclsession *tsep;
5902
5903	*lypp = NULL;
5904	if (NFSHASFLEXFILE(nmp))
5905		layouttype = NFSLAYOUT_FLEXFILE;
5906	else
5907		layouttype = NFSLAYOUT_NFSV4_1_FILES;
5908	/*
5909	 * If lyp is returned non-NULL, there will be a refcnt (shared lock)
5910	 * on it, iff flp != NULL or a lock (exclusive lock) on it iff
5911	 * flp == NULL.
5912	 */
5913	lyp = nfscl_getlayout(nmp->nm_clp, nfhp->nfh_fh, nfhp->nfh_len,
5914	    off, rw, &flp, &recalled);
5915	islocked = 0;
5916	if (lyp == NULL || flp == NULL) {
5917		if (recalled != 0)
5918			return (EIO);
5919		LIST_INIT(&flh);
5920		tsep = nfsmnt_mdssession(nmp);
5921		layoutlen = tsep->nfsess_maxcache -
5922		    (NFSX_STATEID + 3 * NFSX_UNSIGNED);
5923		if (lyp == NULL) {
5924			stateid.seqid = 0;
5925			stateid.other[0] = stateidp->other[0];
5926			stateid.other[1] = stateidp->other[1];
5927			stateid.other[2] = stateidp->other[2];
5928			error = nfsrpc_layoutget(nmp, nfhp->nfh_fh,
5929			    nfhp->nfh_len, iomode, (uint64_t)0, UINT64_MAX,
5930			    (uint64_t)0, layouttype, layoutlen, &stateid,
5931			    &retonclose, &flh, cred, p);
5932		} else {
5933			islocked = 1;
5934			stateid.seqid = lyp->nfsly_stateid.seqid;
5935			stateid.other[0] = lyp->nfsly_stateid.other[0];
5936			stateid.other[1] = lyp->nfsly_stateid.other[1];
5937			stateid.other[2] = lyp->nfsly_stateid.other[2];
5938			error = nfsrpc_layoutget(nmp, nfhp->nfh_fh,
5939			    nfhp->nfh_len, iomode, off, UINT64_MAX,
5940			    (uint64_t)0, layouttype, layoutlen, &stateid,
5941			    &retonclose, &flh, cred, p);
5942		}
5943		error = nfsrpc_layoutgetres(nmp, vp, nfhp->nfh_fh,
5944		    nfhp->nfh_len, &stateid, retonclose, notifybitsp, &lyp,
5945		    &flh, layouttype, error, NULL, cred, p);
5946		if (error == 0)
5947			*lypp = lyp;
5948		else if (islocked != 0)
5949			nfscl_rellayout(lyp, 1);
5950	} else
5951		*lypp = lyp;
5952	return (error);
5953}
5954
5955/*
5956 * Do a TCP connection plus exchange id and create session.
5957 * If successful, a "struct nfsclds" is linked into the list for the
5958 * mount point and a pointer to it is returned.
5959 */
5960static int
5961nfsrpc_fillsa(struct nfsmount *nmp, struct sockaddr_in *sin,
5962    struct sockaddr_in6 *sin6, sa_family_t af, int vers, int minorvers,
5963    struct nfsclds **dspp, NFSPROC_T *p)
5964{
5965	struct sockaddr_in *msad, *sad;
5966	struct sockaddr_in6 *msad6, *sad6;
5967	struct nfsclclient *clp;
5968	struct nfssockreq *nrp;
5969	struct nfsclds *dsp, *tdsp;
5970	int error, firsttry;
5971	enum nfsclds_state retv;
5972	uint32_t sequenceid = 0;
5973
5974	KASSERT(nmp->nm_sockreq.nr_cred != NULL,
5975	    ("nfsrpc_fillsa: NULL nr_cred"));
5976	NFSLOCKCLSTATE();
5977	clp = nmp->nm_clp;
5978	NFSUNLOCKCLSTATE();
5979	if (clp == NULL)
5980		return (EPERM);
5981	if (af == AF_INET) {
5982		NFSLOCKMNT(nmp);
5983		/*
5984		 * Check to see if we already have a session for this
5985		 * address that is usable for a DS.
5986		 * Note that the MDS's address is in a different place
5987		 * than the sessions already acquired for DS's.
5988		 */
5989		msad = (struct sockaddr_in *)nmp->nm_sockreq.nr_nam;
5990		tdsp = TAILQ_FIRST(&nmp->nm_sess);
5991		while (tdsp != NULL) {
5992			if (msad != NULL && msad->sin_family == AF_INET &&
5993			    sin->sin_addr.s_addr == msad->sin_addr.s_addr &&
5994			    sin->sin_port == msad->sin_port &&
5995			    (tdsp->nfsclds_flags & NFSCLDS_DS) != 0 &&
5996			    tdsp->nfsclds_sess.nfsess_defunct == 0) {
5997				*dspp = tdsp;
5998				NFSUNLOCKMNT(nmp);
5999				NFSCL_DEBUG(4, "fnd same addr\n");
6000				return (0);
6001			}
6002			tdsp = TAILQ_NEXT(tdsp, nfsclds_list);
6003			if (tdsp != NULL && tdsp->nfsclds_sockp != NULL)
6004				msad = (struct sockaddr_in *)
6005				    tdsp->nfsclds_sockp->nr_nam;
6006			else
6007				msad = NULL;
6008		}
6009		NFSUNLOCKMNT(nmp);
6010
6011		/* No IP address match, so look for new/trunked one. */
6012		sad = malloc(sizeof(*sad), M_SONAME, M_WAITOK | M_ZERO);
6013		sad->sin_len = sizeof(*sad);
6014		sad->sin_family = AF_INET;
6015		sad->sin_port = sin->sin_port;
6016		sad->sin_addr.s_addr = sin->sin_addr.s_addr;
6017		if (NFSHASPNFS(nmp) && NFSHASKERB(nmp)) {
6018			/* For pNFS, a separate server principal is needed. */
6019			nrp = malloc(sizeof(*nrp) + NI_MAXSERV + NI_MAXHOST,
6020			    M_NFSSOCKREQ, M_WAITOK | M_ZERO);
6021			/*
6022			 * Use the latter part of nr_srvprinc as a temporary
6023			 * buffer for the IP address.
6024			 */
6025			inet_ntoa_r(sad->sin_addr,
6026			    &nrp->nr_srvprinc[NI_MAXSERV]);
6027			NFSCL_DEBUG(1, "nfsrpc_fillsa: DS IP=%s\n",
6028			    &nrp->nr_srvprinc[NI_MAXSERV]);
6029			if (!rpc_gss_ip_to_srv_principal_call(
6030			    &nrp->nr_srvprinc[NI_MAXSERV], "nfs",
6031			    nrp->nr_srvprinc))
6032				nrp->nr_srvprinc[0] = '\0';
6033			NFSCL_DEBUG(1, "nfsrpc_fillsa: srv principal=%s\n",
6034			    nrp->nr_srvprinc);
6035		} else
6036			nrp = malloc(sizeof(*nrp), M_NFSSOCKREQ,
6037			    M_WAITOK | M_ZERO);
6038		nrp->nr_nam = (struct sockaddr *)sad;
6039	} else if (af == AF_INET6) {
6040		NFSLOCKMNT(nmp);
6041		/*
6042		 * Check to see if we already have a session for this
6043		 * address that is usable for a DS.
6044		 * Note that the MDS's address is in a different place
6045		 * than the sessions already acquired for DS's.
6046		 */
6047		msad6 = (struct sockaddr_in6 *)nmp->nm_sockreq.nr_nam;
6048		tdsp = TAILQ_FIRST(&nmp->nm_sess);
6049		while (tdsp != NULL) {
6050			if (msad6 != NULL && msad6->sin6_family == AF_INET6 &&
6051			    IN6_ARE_ADDR_EQUAL(&sin6->sin6_addr,
6052			    &msad6->sin6_addr) &&
6053			    sin6->sin6_port == msad6->sin6_port &&
6054			    (tdsp->nfsclds_flags & NFSCLDS_DS) != 0 &&
6055			    tdsp->nfsclds_sess.nfsess_defunct == 0) {
6056				*dspp = tdsp;
6057				NFSUNLOCKMNT(nmp);
6058				return (0);
6059			}
6060			tdsp = TAILQ_NEXT(tdsp, nfsclds_list);
6061			if (tdsp != NULL && tdsp->nfsclds_sockp != NULL)
6062				msad6 = (struct sockaddr_in6 *)
6063				    tdsp->nfsclds_sockp->nr_nam;
6064			else
6065				msad6 = NULL;
6066		}
6067		NFSUNLOCKMNT(nmp);
6068
6069		/* No IP address match, so look for new/trunked one. */
6070		sad6 = malloc(sizeof(*sad6), M_SONAME, M_WAITOK | M_ZERO);
6071		sad6->sin6_len = sizeof(*sad6);
6072		sad6->sin6_family = AF_INET6;
6073		sad6->sin6_port = sin6->sin6_port;
6074		NFSBCOPY(&sin6->sin6_addr, &sad6->sin6_addr,
6075		    sizeof(struct in6_addr));
6076		if (NFSHASPNFS(nmp) && NFSHASKERB(nmp)) {
6077			/* For pNFS, a separate server principal is needed. */
6078			nrp = malloc(sizeof(*nrp) + NI_MAXSERV + NI_MAXHOST,
6079			    M_NFSSOCKREQ, M_WAITOK | M_ZERO);
6080			/*
6081			 * Use the latter part of nr_srvprinc as a temporary
6082			 * buffer for the IP address.
6083			 */
6084			inet_ntop(AF_INET6, &sad6->sin6_addr,
6085			    &nrp->nr_srvprinc[NI_MAXSERV], NI_MAXHOST);
6086			NFSCL_DEBUG(1, "nfsrpc_fillsa: DS IP=%s\n",
6087			    &nrp->nr_srvprinc[NI_MAXSERV]);
6088			if (!rpc_gss_ip_to_srv_principal_call(
6089			    &nrp->nr_srvprinc[NI_MAXSERV], "nfs",
6090			    nrp->nr_srvprinc))
6091				nrp->nr_srvprinc[0] = '\0';
6092			NFSCL_DEBUG(1, "nfsrpc_fillsa: srv principal=%s\n",
6093			    nrp->nr_srvprinc);
6094		} else
6095			nrp = malloc(sizeof(*nrp), M_NFSSOCKREQ,
6096			    M_WAITOK | M_ZERO);
6097		nrp->nr_nam = (struct sockaddr *)sad6;
6098	} else
6099		return (EPERM);
6100
6101	nrp->nr_sotype = SOCK_STREAM;
6102	mtx_init(&nrp->nr_mtx, "nfssock", NULL, MTX_DEF);
6103	nrp->nr_prog = NFS_PROG;
6104	nrp->nr_vers = vers;
6105
6106	/*
6107	 * Use the credentials that were used for the mount, which are
6108	 * in nmp->nm_sockreq.nr_cred for newnfs_connect() etc.
6109	 * Ref. counting the credentials with crhold() is probably not
6110	 * necessary, since nm_sockreq.nr_cred won't be crfree()'d until
6111	 * unmount, but I did it anyhow.
6112	 */
6113	nrp->nr_cred = crhold(nmp->nm_sockreq.nr_cred);
6114	error = newnfs_connect(nmp, nrp, NULL, p, 0, false, &nrp->nr_client);
6115	NFSCL_DEBUG(3, "DS connect=%d\n", error);
6116
6117	dsp = NULL;
6118	/* Now, do the exchangeid and create session. */
6119	if (error == 0) {
6120		if (vers == NFS_VER4) {
6121			firsttry = 0;
6122			do {
6123				error = nfsrpc_exchangeid(nmp, clp, nrp,
6124				    minorvers, NFSV4EXCH_USEPNFSDS, &dsp,
6125				    nrp->nr_cred, p);
6126				NFSCL_DEBUG(3, "DS exchangeid=%d\n", error);
6127				if (error == NFSERR_MINORVERMISMATCH)
6128					minorvers = NFSV42_MINORVERSION;
6129			} while (error == NFSERR_MINORVERMISMATCH &&
6130			    firsttry++ == 0);
6131			if (error != 0)
6132				newnfs_disconnect(NULL, nrp);
6133		} else {
6134			dsp = malloc(sizeof(struct nfsclds), M_NFSCLDS,
6135			    M_WAITOK | M_ZERO);
6136			dsp->nfsclds_flags |= NFSCLDS_DS;
6137			dsp->nfsclds_expire = INT32_MAX; /* No renews needed. */
6138			mtx_init(&dsp->nfsclds_mtx, "nfsds", NULL, MTX_DEF);
6139			mtx_init(&dsp->nfsclds_sess.nfsess_mtx, "nfssession",
6140			    NULL, MTX_DEF);
6141		}
6142	}
6143	if (error == 0) {
6144		dsp->nfsclds_sockp = nrp;
6145		if (vers == NFS_VER4) {
6146			NFSLOCKMNT(nmp);
6147			retv = nfscl_getsameserver(nmp, dsp, &tdsp,
6148			    &sequenceid);
6149			NFSCL_DEBUG(3, "getsame ret=%d\n", retv);
6150			if (retv == NFSDSP_USETHISSESSION &&
6151			    nfscl_dssameconn != 0) {
6152				NFSLOCKDS(tdsp);
6153				tdsp->nfsclds_flags |= NFSCLDS_SAMECONN;
6154				NFSUNLOCKDS(tdsp);
6155				NFSUNLOCKMNT(nmp);
6156				/*
6157				 * If there is already a session for this
6158				 * server, use it.
6159				 */
6160				newnfs_disconnect(NULL, nrp);
6161				nfscl_freenfsclds(dsp);
6162				*dspp = tdsp;
6163				return (0);
6164			}
6165			if (retv == NFSDSP_NOTFOUND)
6166				sequenceid =
6167				    dsp->nfsclds_sess.nfsess_sequenceid;
6168			NFSUNLOCKMNT(nmp);
6169			error = nfsrpc_createsession(nmp, &dsp->nfsclds_sess,
6170			    nrp, dsp, sequenceid, 0, nrp->nr_cred, p);
6171			NFSCL_DEBUG(3, "DS createsess=%d\n", error);
6172		}
6173	} else {
6174		NFSFREECRED(nrp->nr_cred);
6175		NFSFREEMUTEX(&nrp->nr_mtx);
6176		free(nrp->nr_nam, M_SONAME);
6177		free(nrp, M_NFSSOCKREQ);
6178	}
6179	if (error == 0) {
6180		NFSCL_DEBUG(3, "add DS session\n");
6181		/*
6182		 * Put it at the end of the list. That way the list
6183		 * is ordered by when the entry was added. This matters
6184		 * since the one done first is the one that should be
6185		 * used for sequencid'ing any subsequent create sessions.
6186		 */
6187		NFSLOCKMNT(nmp);
6188		TAILQ_INSERT_TAIL(&nmp->nm_sess, dsp, nfsclds_list);
6189		NFSUNLOCKMNT(nmp);
6190		*dspp = dsp;
6191	} else if (dsp != NULL) {
6192		newnfs_disconnect(NULL, nrp);
6193		nfscl_freenfsclds(dsp);
6194	}
6195	return (error);
6196}
6197
6198/*
6199 * Do the NFSv4.1 Reclaim Complete.
6200 */
6201int
6202nfsrpc_reclaimcomplete(struct nfsmount *nmp, struct ucred *cred, NFSPROC_T *p)
6203{
6204	uint32_t *tl;
6205	struct nfsrv_descript nfsd;
6206	struct nfsrv_descript *nd = &nfsd;
6207	int error;
6208
6209	nfscl_reqstart(nd, NFSPROC_RECLAIMCOMPL, nmp, NULL, 0, NULL, NULL, 0,
6210	    0, cred);
6211	NFSM_BUILD(tl, uint32_t *, NFSX_UNSIGNED);
6212	*tl = newnfs_false;
6213	nd->nd_flag |= ND_USEGSSNAME;
6214	error = newnfs_request(nd, nmp, NULL, &nmp->nm_sockreq, NULL, p, cred,
6215	    NFS_PROG, NFS_VER4, NULL, 1, NULL, NULL);
6216	if (error != 0)
6217		return (error);
6218	error = nd->nd_repstat;
6219	m_freem(nd->nd_mrep);
6220	return (error);
6221}
6222
6223/*
6224 * Initialize the slot tables for a session.
6225 */
6226static void
6227nfscl_initsessionslots(struct nfsclsession *sep)
6228{
6229	int i;
6230
6231	for (i = 0; i < NFSV4_CBSLOTS; i++) {
6232		if (sep->nfsess_cbslots[i].nfssl_reply != NULL)
6233			m_freem(sep->nfsess_cbslots[i].nfssl_reply);
6234		NFSBZERO(&sep->nfsess_cbslots[i], sizeof(struct nfsslot));
6235	}
6236	for (i = 0; i < 64; i++)
6237		sep->nfsess_slotseq[i] = 0;
6238	sep->nfsess_slots = 0;
6239	sep->nfsess_badslots = 0;
6240}
6241
6242/*
6243 * Called to try and do an I/O operation via an NFSv4.1 Data Server (DS).
6244 */
6245int
6246nfscl_doiods(vnode_t vp, struct uio *uiop, int *iomode, int *must_commit,
6247    uint32_t rwaccess, int docommit, struct ucred *cred, NFSPROC_T *p)
6248{
6249	struct nfsnode *np = VTONFS(vp);
6250	struct nfsmount *nmp = VFSTONFS(vp->v_mount);
6251	struct nfscllayout *layp;
6252	struct nfscldevinfo *dip;
6253	struct nfsclflayout *rflp;
6254	struct mbuf *m, *m2;
6255	struct nfsclwritedsdorpc *drpc, *tdrpc;
6256	nfsv4stateid_t stateid;
6257	struct ucred *newcred;
6258	uint64_t lastbyte, len, off, oresid, xfer;
6259	int eof, error, firstmirror, i, iolaymode, mirrorcnt, recalled, timo;
6260	void *lckp;
6261	uint8_t *dev;
6262	void *iovbase = NULL;
6263	size_t iovlen = 0;
6264	off_t offs = 0;
6265	ssize_t resid = 0;
6266	uint32_t op;
6267
6268	if (!NFSHASPNFS(nmp) || nfscl_enablecallb == 0 || nfs_numnfscbd == 0 ||
6269	    (np->n_flag & NNOLAYOUT) != 0)
6270		return (EIO);
6271	/* Now, get a reference cnt on the clientid for this mount. */
6272	if (nfscl_getref(nmp) == 0)
6273		return (EIO);
6274
6275	/* Find an appropriate stateid. */
6276	newcred = NFSNEWCRED(cred);
6277	error = nfscl_getstateid(vp, np->n_fhp->nfh_fh, np->n_fhp->nfh_len,
6278	    rwaccess, 1, newcred, p, &stateid, &lckp);
6279	if (error != 0) {
6280		NFSFREECRED(newcred);
6281		nfscl_relref(nmp);
6282		return (error);
6283	}
6284	/* Search for a layout for this file. */
6285	off = uiop->uio_offset;
6286	layp = nfscl_getlayout(nmp->nm_clp, np->n_fhp->nfh_fh,
6287	    np->n_fhp->nfh_len, off, rwaccess, &rflp, &recalled);
6288	if (layp == NULL || rflp == NULL) {
6289		if (recalled != 0) {
6290			NFSFREECRED(newcred);
6291			if (lckp != NULL)
6292				nfscl_lockderef(lckp);
6293			nfscl_relref(nmp);
6294			return (EIO);
6295		}
6296		if (layp != NULL) {
6297			nfscl_rellayout(layp, (rflp == NULL) ? 1 : 0);
6298			layp = NULL;
6299		}
6300		/* Try and get a Layout, if it is supported. */
6301		if (rwaccess == NFSV4OPEN_ACCESSWRITE ||
6302		    (np->n_flag & NWRITEOPENED) != 0)
6303			iolaymode = NFSLAYOUTIOMODE_RW;
6304		else
6305			iolaymode = NFSLAYOUTIOMODE_READ;
6306		error = nfsrpc_getlayout(nmp, vp, np->n_fhp, iolaymode,
6307		    rwaccess, NULL, &stateid, off, &layp, newcred, p);
6308		if (error != 0) {
6309			NFSLOCKNODE(np);
6310			np->n_flag |= NNOLAYOUT;
6311			NFSUNLOCKNODE(np);
6312			if (lckp != NULL)
6313				nfscl_lockderef(lckp);
6314			NFSFREECRED(newcred);
6315			if (layp != NULL)
6316				nfscl_rellayout(layp, 0);
6317			nfscl_relref(nmp);
6318			return (error);
6319		}
6320	}
6321
6322	/*
6323	 * Loop around finding a layout that works for the first part of
6324	 * this I/O operation, and then call the function that actually
6325	 * does the RPC.
6326	 */
6327	eof = 0;
6328	len = (uint64_t)uiop->uio_resid;
6329	while (len > 0 && error == 0 && eof == 0) {
6330		off = uiop->uio_offset;
6331		error = nfscl_findlayoutforio(layp, off, rwaccess, &rflp);
6332		if (error == 0) {
6333			oresid = xfer = (uint64_t)uiop->uio_resid;
6334			if (xfer > (rflp->nfsfl_end - rflp->nfsfl_off))
6335				xfer = rflp->nfsfl_end - rflp->nfsfl_off;
6336			/*
6337			 * For Flex File layout with mirrored DSs, select one
6338			 * of them at random for reads. For writes and commits,
6339			 * do all mirrors.
6340			 */
6341			m = NULL;
6342			tdrpc = drpc = NULL;
6343			firstmirror = 0;
6344			mirrorcnt = 1;
6345			if ((layp->nfsly_flags & NFSLY_FLEXFILE) != 0 &&
6346			    (mirrorcnt = rflp->nfsfl_mirrorcnt) > 1) {
6347				if (rwaccess == NFSV4OPEN_ACCESSREAD) {
6348					firstmirror = arc4random() % mirrorcnt;
6349					mirrorcnt = firstmirror + 1;
6350				} else {
6351					if (docommit == 0) {
6352						/*
6353						 * Save values, so uiop can be
6354						 * rolled back upon a write
6355						 * error.
6356						 */
6357						offs = uiop->uio_offset;
6358						resid = uiop->uio_resid;
6359						iovbase =
6360						    uiop->uio_iov->iov_base;
6361						iovlen = uiop->uio_iov->iov_len;
6362						m = nfsm_uiombuflist(uiop, len,
6363						    0);
6364						if (m == NULL) {
6365							error = EFAULT;
6366							break;
6367						}
6368					}
6369					tdrpc = drpc = malloc(sizeof(*drpc) *
6370					    (mirrorcnt - 1), M_TEMP, M_WAITOK |
6371					    M_ZERO);
6372				}
6373			}
6374			for (i = firstmirror; i < mirrorcnt && error == 0; i++){
6375				m2 = NULL;
6376				if (m != NULL && i < mirrorcnt - 1)
6377					m2 = m_copym(m, 0, M_COPYALL, M_WAITOK);
6378				else {
6379					m2 = m;
6380					m = NULL;
6381				}
6382				if ((layp->nfsly_flags & NFSLY_FLEXFILE) != 0) {
6383					dev = rflp->nfsfl_ffm[i].dev;
6384					dip = nfscl_getdevinfo(nmp->nm_clp, dev,
6385					    rflp->nfsfl_ffm[i].devp);
6386				} else {
6387					dev = rflp->nfsfl_dev;
6388					dip = nfscl_getdevinfo(nmp->nm_clp, dev,
6389					    rflp->nfsfl_devp);
6390				}
6391				if (dip != NULL) {
6392					if ((rflp->nfsfl_flags & NFSFL_FLEXFILE)
6393					    != 0)
6394						error = nfscl_dofflayoutio(vp,
6395						    uiop, iomode, must_commit,
6396						    &eof, &stateid, rwaccess,
6397						    dip, layp, rflp, off, xfer,
6398						    i, docommit, m2, tdrpc,
6399						    newcred, p);
6400					else
6401						error = nfscl_doflayoutio(vp,
6402						    uiop, iomode, must_commit,
6403						    &eof, &stateid, rwaccess,
6404						    dip, layp, rflp, off, xfer,
6405						    docommit, newcred, p);
6406					nfscl_reldevinfo(dip);
6407				} else {
6408					if (m2 != NULL)
6409						m_freem(m2);
6410					error = EIO;
6411				}
6412				tdrpc++;
6413			}
6414			if (m != NULL)
6415				m_freem(m);
6416			tdrpc = drpc;
6417			timo = hz / 50;		/* Wait for 20msec. */
6418			if (timo < 1)
6419				timo = 1;
6420			for (i = firstmirror; i < mirrorcnt - 1 &&
6421			    tdrpc != NULL; i++, tdrpc++) {
6422				/*
6423				 * For the unused drpc entries, both inprog and
6424				 * err == 0, so this loop won't break.
6425				 */
6426				while (tdrpc->inprog != 0 && tdrpc->done == 0)
6427					tsleep(&tdrpc->tsk, PVFS, "clrpcio",
6428					    timo);
6429				if (error == 0 && tdrpc->err != 0)
6430					error = tdrpc->err;
6431				if (rwaccess != NFSV4OPEN_ACCESSREAD &&
6432				    docommit == 0 && *must_commit == 0 &&
6433				    tdrpc->must_commit == 1)
6434					*must_commit = 1;
6435			}
6436			free(drpc, M_TEMP);
6437			if (error == 0) {
6438				if (mirrorcnt > 1 && rwaccess ==
6439				    NFSV4OPEN_ACCESSWRITE && docommit == 0) {
6440					NFSLOCKCLSTATE();
6441					layp->nfsly_flags |= NFSLY_WRITTEN;
6442					NFSUNLOCKCLSTATE();
6443				}
6444				lastbyte = off + xfer - 1;
6445				NFSLOCKCLSTATE();
6446				if (lastbyte > layp->nfsly_lastbyte)
6447					layp->nfsly_lastbyte = lastbyte;
6448				NFSUNLOCKCLSTATE();
6449			} else if (error == NFSERR_OPENMODE &&
6450			    rwaccess == NFSV4OPEN_ACCESSREAD) {
6451				NFSLOCKMNT(nmp);
6452				nmp->nm_state |= NFSSTA_OPENMODE;
6453				NFSUNLOCKMNT(nmp);
6454			} else if ((error == NFSERR_NOSPC ||
6455			    error == NFSERR_IO || error == NFSERR_NXIO) &&
6456			    nmp->nm_minorvers == NFSV42_MINORVERSION) {
6457				if (docommit != 0)
6458					op = NFSV4OP_COMMIT;
6459				else if (rwaccess == NFSV4OPEN_ACCESSREAD)
6460					op = NFSV4OP_READ;
6461				else
6462					op = NFSV4OP_WRITE;
6463				nfsrpc_layouterror(nmp, np->n_fhp->nfh_fh,
6464				    np->n_fhp->nfh_len, off, xfer,
6465				    &layp->nfsly_stateid, newcred, p, error, op,
6466				    dip->nfsdi_deviceid);
6467				error = EIO;
6468			} else
6469				error = EIO;
6470			if (error == 0)
6471				len -= (oresid - (uint64_t)uiop->uio_resid);
6472			else if (mirrorcnt > 1 && rwaccess ==
6473			    NFSV4OPEN_ACCESSWRITE && docommit == 0) {
6474				/*
6475				 * In case the rpc gets retried, roll the
6476				 * uio fields changed by nfsm_uiombuflist()
6477				 * back.
6478				 */
6479				uiop->uio_offset = offs;
6480				uiop->uio_resid = resid;
6481				uiop->uio_iov->iov_base = iovbase;
6482				uiop->uio_iov->iov_len = iovlen;
6483			}
6484		}
6485	}
6486	if (lckp != NULL)
6487		nfscl_lockderef(lckp);
6488	NFSFREECRED(newcred);
6489	nfscl_rellayout(layp, 0);
6490	nfscl_relref(nmp);
6491	return (error);
6492}
6493
6494/*
6495 * Find a file layout that will handle the first bytes of the requested
6496 * range and return the information from it needed to the I/O operation.
6497 */
6498int
6499nfscl_findlayoutforio(struct nfscllayout *lyp, uint64_t off, uint32_t rwaccess,
6500    struct nfsclflayout **retflpp)
6501{
6502	struct nfsclflayout *flp, *nflp, *rflp;
6503	uint32_t rw;
6504
6505	rflp = NULL;
6506	rw = rwaccess;
6507	/* For reading, do the Read list first and then the Write list. */
6508	do {
6509		if (rw == NFSV4OPEN_ACCESSREAD)
6510			flp = LIST_FIRST(&lyp->nfsly_flayread);
6511		else
6512			flp = LIST_FIRST(&lyp->nfsly_flayrw);
6513		while (flp != NULL) {
6514			nflp = LIST_NEXT(flp, nfsfl_list);
6515			if (flp->nfsfl_off > off)
6516				break;
6517			if (flp->nfsfl_end > off &&
6518			    (rflp == NULL || rflp->nfsfl_end < flp->nfsfl_end))
6519				rflp = flp;
6520			flp = nflp;
6521		}
6522		if (rw == NFSV4OPEN_ACCESSREAD)
6523			rw = NFSV4OPEN_ACCESSWRITE;
6524		else
6525			rw = 0;
6526	} while (rw != 0);
6527	if (rflp != NULL) {
6528		/* This one covers the most bytes starting at off. */
6529		*retflpp = rflp;
6530		return (0);
6531	}
6532	return (EIO);
6533}
6534
6535/*
6536 * Do I/O using an NFSv4.1 or NFSv4.2 file layout.
6537 */
6538static int
6539nfscl_doflayoutio(vnode_t vp, struct uio *uiop, int *iomode, int *must_commit,
6540    int *eofp, nfsv4stateid_t *stateidp, int rwflag, struct nfscldevinfo *dp,
6541    struct nfscllayout *lyp, struct nfsclflayout *flp, uint64_t off,
6542    uint64_t len, int docommit, struct ucred *cred, NFSPROC_T *p)
6543{
6544	uint64_t io_off, rel_off, stripe_unit_size, transfer, xfer;
6545	int commit_thru_mds, error, stripe_index, stripe_pos, minorvers;
6546	struct nfsnode *np;
6547	struct nfsfh *fhp;
6548	struct nfsclds **dspp;
6549
6550	np = VTONFS(vp);
6551	rel_off = off - flp->nfsfl_patoff;
6552	stripe_unit_size = flp->nfsfl_util & NFSFLAYUTIL_STRIPE_MASK;
6553	stripe_pos = (rel_off / stripe_unit_size + flp->nfsfl_stripe1) %
6554	    dp->nfsdi_stripecnt;
6555	transfer = stripe_unit_size - (rel_off % stripe_unit_size);
6556	error = 0;
6557
6558	/* Loop around, doing I/O for each stripe unit. */
6559	while (len > 0 && error == 0) {
6560		stripe_index = nfsfldi_stripeindex(dp, stripe_pos);
6561		dspp = nfsfldi_addr(dp, stripe_index);
6562		if (((*dspp)->nfsclds_flags & NFSCLDS_MINORV2) != 0)
6563			minorvers = NFSV42_MINORVERSION;
6564		else
6565			minorvers = NFSV41_MINORVERSION;
6566		if (len > transfer && docommit == 0)
6567			xfer = transfer;
6568		else
6569			xfer = len;
6570		if ((flp->nfsfl_util & NFSFLAYUTIL_DENSE) != 0) {
6571			/* Dense layout. */
6572			if (stripe_pos >= flp->nfsfl_fhcnt)
6573				return (EIO);
6574			fhp = flp->nfsfl_fh[stripe_pos];
6575			io_off = (rel_off / (stripe_unit_size *
6576			    dp->nfsdi_stripecnt)) * stripe_unit_size +
6577			    rel_off % stripe_unit_size;
6578		} else {
6579			/* Sparse layout. */
6580			if (flp->nfsfl_fhcnt > 1) {
6581				if (stripe_index >= flp->nfsfl_fhcnt)
6582					return (EIO);
6583				fhp = flp->nfsfl_fh[stripe_index];
6584			} else if (flp->nfsfl_fhcnt == 1)
6585				fhp = flp->nfsfl_fh[0];
6586			else
6587				fhp = np->n_fhp;
6588			io_off = off;
6589		}
6590		if ((flp->nfsfl_util & NFSFLAYUTIL_COMMIT_THRU_MDS) != 0) {
6591			commit_thru_mds = 1;
6592			if (docommit != 0)
6593				error = EIO;
6594		} else {
6595			commit_thru_mds = 0;
6596			NFSLOCKNODE(np);
6597			np->n_flag |= NDSCOMMIT;
6598			NFSUNLOCKNODE(np);
6599		}
6600		if (docommit != 0) {
6601			if (error == 0)
6602				error = nfsrpc_commitds(vp, io_off, xfer,
6603				    *dspp, fhp, NFS_VER4, minorvers, cred, p);
6604			if (error == 0) {
6605				/*
6606				 * Set both eof and uio_resid = 0 to end any
6607				 * loops.
6608				 */
6609				*eofp = 1;
6610				uiop->uio_resid = 0;
6611			} else {
6612				NFSLOCKNODE(np);
6613				np->n_flag &= ~NDSCOMMIT;
6614				NFSUNLOCKNODE(np);
6615			}
6616		} else if (rwflag == NFSV4OPEN_ACCESSREAD)
6617			error = nfsrpc_readds(vp, uiop, stateidp, eofp, *dspp,
6618			    io_off, xfer, fhp, 0, NFS_VER4, minorvers, cred, p);
6619		else {
6620			error = nfsrpc_writeds(vp, uiop, iomode, must_commit,
6621			    stateidp, *dspp, io_off, xfer, fhp, commit_thru_mds,
6622			    0, NFS_VER4, minorvers, cred, p);
6623			if (error == 0) {
6624				NFSLOCKCLSTATE();
6625				lyp->nfsly_flags |= NFSLY_WRITTEN;
6626				NFSUNLOCKCLSTATE();
6627			}
6628		}
6629		if (error == 0) {
6630			transfer = stripe_unit_size;
6631			stripe_pos = (stripe_pos + 1) % dp->nfsdi_stripecnt;
6632			len -= xfer;
6633			off += xfer;
6634		}
6635	}
6636	return (error);
6637}
6638
6639/*
6640 * Do I/O using an NFSv4.1 flex file layout.
6641 */
6642static int
6643nfscl_dofflayoutio(vnode_t vp, struct uio *uiop, int *iomode, int *must_commit,
6644    int *eofp, nfsv4stateid_t *stateidp, int rwflag, struct nfscldevinfo *dp,
6645    struct nfscllayout *lyp, struct nfsclflayout *flp, uint64_t off,
6646    uint64_t len, int mirror, int docommit, struct mbuf *mp,
6647    struct nfsclwritedsdorpc *drpc, struct ucred *cred, NFSPROC_T *p)
6648{
6649	uint64_t xfer;
6650	int error;
6651	struct nfsnode *np;
6652	struct nfsfh *fhp;
6653	struct nfsclds **dspp;
6654	struct ucred *tcred;
6655	struct mbuf *m, *m2;
6656	uint32_t copylen;
6657
6658	np = VTONFS(vp);
6659	error = 0;
6660	NFSCL_DEBUG(4, "nfscl_dofflayoutio: off=%ju len=%ju\n", (uintmax_t)off,
6661	    (uintmax_t)len);
6662	/* Loop around, doing I/O for each stripe unit. */
6663	while (len > 0 && error == 0) {
6664		dspp = nfsfldi_addr(dp, 0);
6665		fhp = flp->nfsfl_ffm[mirror].fh[dp->nfsdi_versindex];
6666		stateidp = &flp->nfsfl_ffm[mirror].st;
6667		NFSCL_DEBUG(4, "mirror=%d vind=%d fhlen=%d st.seqid=0x%x\n",
6668		    mirror, dp->nfsdi_versindex, fhp->nfh_len, stateidp->seqid);
6669		if ((dp->nfsdi_flags & NFSDI_TIGHTCOUPLED) == 0) {
6670			tcred = NFSNEWCRED(cred);
6671			tcred->cr_uid = flp->nfsfl_ffm[mirror].user;
6672			tcred->cr_groups[0] = flp->nfsfl_ffm[mirror].group;
6673			tcred->cr_ngroups = 1;
6674		} else
6675			tcred = cred;
6676		if (rwflag == NFSV4OPEN_ACCESSREAD)
6677			copylen = dp->nfsdi_rsize;
6678		else {
6679			copylen = dp->nfsdi_wsize;
6680			if (len > copylen && mp != NULL) {
6681				/*
6682				 * When a mirrored configuration needs to do
6683				 * multiple writes to each mirror, all writes
6684				 * except the last one must be a multiple of
6685				 * 4 bytes.  This is required so that the XDR
6686				 * does not need padding.
6687				 * If possible, clip the size to an exact
6688				 * multiple of the mbuf length, so that the
6689				 * split will be on an mbuf boundary.
6690				 */
6691				copylen &= 0xfffffffc;
6692				if (copylen > mp->m_len)
6693					copylen = copylen / mp->m_len *
6694					    mp->m_len;
6695			}
6696		}
6697		NFSLOCKNODE(np);
6698		np->n_flag |= NDSCOMMIT;
6699		NFSUNLOCKNODE(np);
6700		if (len > copylen && docommit == 0)
6701			xfer = copylen;
6702		else
6703			xfer = len;
6704		if (docommit != 0) {
6705			if (error == 0) {
6706				/*
6707				 * Do last mirrored DS commit with this thread.
6708				 */
6709				if (mirror < flp->nfsfl_mirrorcnt - 1)
6710					error = nfsio_commitds(vp, off, xfer,
6711					    *dspp, fhp, dp->nfsdi_vers,
6712					    dp->nfsdi_minorvers, drpc, tcred,
6713					    p);
6714				else
6715					error = nfsrpc_commitds(vp, off, xfer,
6716					    *dspp, fhp, dp->nfsdi_vers,
6717					    dp->nfsdi_minorvers, tcred, p);
6718				NFSCL_DEBUG(4, "commitds=%d\n", error);
6719				if (error != 0 && error != EACCES && error !=
6720				    ESTALE) {
6721					NFSCL_DEBUG(4,
6722					    "DS layreterr for commit\n");
6723					nfscl_dserr(NFSV4OP_COMMIT, error, dp,
6724					    lyp, *dspp);
6725				}
6726			}
6727			NFSCL_DEBUG(4, "aft nfsio_commitds=%d\n", error);
6728			if (error == 0) {
6729				/*
6730				 * Set both eof and uio_resid = 0 to end any
6731				 * loops.
6732				 */
6733				*eofp = 1;
6734				uiop->uio_resid = 0;
6735			} else {
6736				NFSLOCKNODE(np);
6737				np->n_flag &= ~NDSCOMMIT;
6738				NFSUNLOCKNODE(np);
6739			}
6740		} else if (rwflag == NFSV4OPEN_ACCESSREAD) {
6741			error = nfsrpc_readds(vp, uiop, stateidp, eofp, *dspp,
6742			    off, xfer, fhp, 1, dp->nfsdi_vers,
6743			    dp->nfsdi_minorvers, tcred, p);
6744			NFSCL_DEBUG(4, "readds=%d\n", error);
6745			if (error != 0 && error != EACCES && error != ESTALE) {
6746				NFSCL_DEBUG(4, "DS layreterr for read\n");
6747				nfscl_dserr(NFSV4OP_READ, error, dp, lyp,
6748				    *dspp);
6749			}
6750		} else {
6751			if (flp->nfsfl_mirrorcnt == 1) {
6752				error = nfsrpc_writeds(vp, uiop, iomode,
6753				    must_commit, stateidp, *dspp, off, xfer,
6754				    fhp, 0, 1, dp->nfsdi_vers,
6755				    dp->nfsdi_minorvers, tcred, p);
6756				if (error == 0) {
6757					NFSLOCKCLSTATE();
6758					lyp->nfsly_flags |= NFSLY_WRITTEN;
6759					NFSUNLOCKCLSTATE();
6760				}
6761			} else {
6762				m = mp;
6763				if (xfer < len) {
6764					/* The mbuf list must be split. */
6765					m2 = nfsm_split(mp, xfer);
6766					if (m2 != NULL)
6767						mp = m2;
6768					else {
6769						m_freem(mp);
6770						error = EIO;
6771					}
6772				}
6773				NFSCL_DEBUG(4, "mcopy len=%jd xfer=%jd\n",
6774				    (uintmax_t)len, (uintmax_t)xfer);
6775				/*
6776				 * Do last write to a mirrored DS with this
6777				 * thread.
6778				 */
6779				if (error == 0) {
6780					if (mirror < flp->nfsfl_mirrorcnt - 1)
6781						error = nfsio_writedsmir(vp,
6782						    iomode, must_commit,
6783						    stateidp, *dspp, off,
6784						    xfer, fhp, m,
6785						    dp->nfsdi_vers,
6786						    dp->nfsdi_minorvers, drpc,
6787						    tcred, p);
6788					else
6789						error = nfsrpc_writedsmir(vp,
6790						    iomode, must_commit,
6791						    stateidp, *dspp, off,
6792						    xfer, fhp, m,
6793						    dp->nfsdi_vers,
6794						    dp->nfsdi_minorvers, tcred,
6795						    p);
6796				}
6797				NFSCL_DEBUG(4, "nfsio_writedsmir=%d\n", error);
6798				if (error != 0 && error != EACCES && error !=
6799				    ESTALE) {
6800					NFSCL_DEBUG(4,
6801					    "DS layreterr for write\n");
6802					nfscl_dserr(NFSV4OP_WRITE, error, dp,
6803					    lyp, *dspp);
6804				}
6805			}
6806		}
6807		NFSCL_DEBUG(4, "aft read/writeds=%d\n", error);
6808		if (error == 0) {
6809			len -= xfer;
6810			off += xfer;
6811		}
6812		if ((dp->nfsdi_flags & NFSDI_TIGHTCOUPLED) == 0)
6813			NFSFREECRED(tcred);
6814	}
6815	NFSCL_DEBUG(4, "eo nfscl_dofflayoutio=%d\n", error);
6816	return (error);
6817}
6818
6819/*
6820 * The actual read RPC done to a DS.
6821 */
6822static int
6823nfsrpc_readds(vnode_t vp, struct uio *uiop, nfsv4stateid_t *stateidp, int *eofp,
6824    struct nfsclds *dsp, uint64_t io_off, int len, struct nfsfh *fhp, int flex,
6825    int vers, int minorvers, struct ucred *cred, NFSPROC_T *p)
6826{
6827	uint32_t *tl;
6828	int attrflag, error, retlen;
6829	struct nfsrv_descript nfsd;
6830	struct nfsmount *nmp = VFSTONFS(vp->v_mount);
6831	struct nfsrv_descript *nd = &nfsd;
6832	struct nfssockreq *nrp;
6833	struct nfsvattr na;
6834
6835	nd->nd_mrep = NULL;
6836	if (vers == 0 || vers == NFS_VER4) {
6837		nfscl_reqstart(nd, NFSPROC_READDS, nmp, fhp->nfh_fh,
6838		    fhp->nfh_len, NULL, &dsp->nfsclds_sess, vers, minorvers,
6839		    NULL);
6840		vers = NFS_VER4;
6841		NFSCL_DEBUG(4, "nfsrpc_readds: vers4 minvers=%d\n", minorvers);
6842		if (flex != 0)
6843			nfsm_stateidtom(nd, stateidp, NFSSTATEID_PUTSTATEID);
6844		else
6845			nfsm_stateidtom(nd, stateidp, NFSSTATEID_PUTSEQIDZERO);
6846	} else {
6847		nfscl_reqstart(nd, NFSPROC_READ, nmp, fhp->nfh_fh,
6848		    fhp->nfh_len, NULL, &dsp->nfsclds_sess, vers, minorvers,
6849		    NULL);
6850		NFSDECRGLOBAL(nfsstatsv1.rpccnt[NFSPROC_READ]);
6851		NFSINCRGLOBAL(nfsstatsv1.rpccnt[NFSPROC_READDS]);
6852		NFSCL_DEBUG(4, "nfsrpc_readds: vers3\n");
6853	}
6854	NFSM_BUILD(tl, uint32_t *, NFSX_UNSIGNED * 3);
6855	txdr_hyper(io_off, tl);
6856	*(tl + 2) = txdr_unsigned(len);
6857	nrp = dsp->nfsclds_sockp;
6858	NFSCL_DEBUG(4, "nfsrpc_readds: nrp=%p\n", nrp);
6859	if (nrp == NULL)
6860		/* If NULL, use the MDS socket. */
6861		nrp = &nmp->nm_sockreq;
6862	error = newnfs_request(nd, nmp, NULL, nrp, vp, p, cred,
6863	    NFS_PROG, vers, NULL, 1, NULL, &dsp->nfsclds_sess);
6864	NFSCL_DEBUG(4, "nfsrpc_readds: stat=%d err=%d\n", nd->nd_repstat,
6865	    error);
6866	if (error != 0)
6867		return (error);
6868	if (vers == NFS_VER3) {
6869		error = nfscl_postop_attr(nd, &na, &attrflag);
6870		NFSCL_DEBUG(4, "nfsrpc_readds: postop=%d\n", error);
6871		if (error != 0)
6872			goto nfsmout;
6873	}
6874	if (nd->nd_repstat != 0) {
6875		error = nd->nd_repstat;
6876		goto nfsmout;
6877	}
6878	if (vers == NFS_VER3) {
6879		NFSM_DISSECT(tl, uint32_t *, 2 * NFSX_UNSIGNED);
6880		*eofp = fxdr_unsigned(int, *(tl + 1));
6881	} else {
6882		NFSM_DISSECT(tl, uint32_t *, NFSX_UNSIGNED);
6883		*eofp = fxdr_unsigned(int, *tl);
6884	}
6885	NFSM_STRSIZ(retlen, len);
6886	NFSCL_DEBUG(4, "nfsrpc_readds: retlen=%d eof=%d\n", retlen, *eofp);
6887	error = nfsm_mbufuio(nd, uiop, retlen);
6888nfsmout:
6889	if (nd->nd_mrep != NULL)
6890		m_freem(nd->nd_mrep);
6891	return (error);
6892}
6893
6894/*
6895 * The actual write RPC done to a DS.
6896 */
6897static int
6898nfsrpc_writeds(vnode_t vp, struct uio *uiop, int *iomode, int *must_commit,
6899    nfsv4stateid_t *stateidp, struct nfsclds *dsp, uint64_t io_off, int len,
6900    struct nfsfh *fhp, int commit_thru_mds, int flex, int vers, int minorvers,
6901    struct ucred *cred, NFSPROC_T *p)
6902{
6903	uint32_t *tl;
6904	struct nfsmount *nmp = VFSTONFS(vp->v_mount);
6905	int attrflag, error, rlen, commit, committed = NFSWRITE_FILESYNC;
6906	int32_t backup;
6907	struct nfsrv_descript nfsd;
6908	struct nfsrv_descript *nd = &nfsd;
6909	struct nfssockreq *nrp;
6910	struct nfsvattr na;
6911
6912	KASSERT(uiop->uio_iovcnt == 1, ("nfs: writerpc iovcnt > 1"));
6913	nd->nd_mrep = NULL;
6914	if (vers == 0 || vers == NFS_VER4) {
6915		nfscl_reqstart(nd, NFSPROC_WRITEDS, nmp, fhp->nfh_fh,
6916		    fhp->nfh_len, NULL, &dsp->nfsclds_sess, vers, minorvers,
6917		    NULL);
6918		NFSCL_DEBUG(4, "nfsrpc_writeds: vers4 minvers=%d\n", minorvers);
6919		vers = NFS_VER4;
6920		if (flex != 0)
6921			nfsm_stateidtom(nd, stateidp, NFSSTATEID_PUTSTATEID);
6922		else
6923			nfsm_stateidtom(nd, stateidp, NFSSTATEID_PUTSEQIDZERO);
6924		NFSM_BUILD(tl, uint32_t *, NFSX_HYPER + 2 * NFSX_UNSIGNED);
6925	} else {
6926		nfscl_reqstart(nd, NFSPROC_WRITE, nmp, fhp->nfh_fh,
6927		    fhp->nfh_len, NULL, &dsp->nfsclds_sess, vers, minorvers,
6928		    NULL);
6929		NFSDECRGLOBAL(nfsstatsv1.rpccnt[NFSPROC_WRITE]);
6930		NFSINCRGLOBAL(nfsstatsv1.rpccnt[NFSPROC_WRITEDS]);
6931		NFSCL_DEBUG(4, "nfsrpc_writeds: vers3\n");
6932		NFSM_BUILD(tl, uint32_t *, NFSX_HYPER + 3 * NFSX_UNSIGNED);
6933	}
6934	txdr_hyper(io_off, tl);
6935	tl += 2;
6936	if (vers == NFS_VER3)
6937		*tl++ = txdr_unsigned(len);
6938	*tl++ = txdr_unsigned(*iomode);
6939	*tl = txdr_unsigned(len);
6940	error = nfsm_uiombuf(nd, uiop, len);
6941	if (error != 0) {
6942		m_freem(nd->nd_mreq);
6943		return (error);
6944	}
6945	nrp = dsp->nfsclds_sockp;
6946	if (nrp == NULL)
6947		/* If NULL, use the MDS socket. */
6948		nrp = &nmp->nm_sockreq;
6949	error = newnfs_request(nd, nmp, NULL, nrp, vp, p, cred,
6950	    NFS_PROG, vers, NULL, 1, NULL, &dsp->nfsclds_sess);
6951	NFSCL_DEBUG(4, "nfsrpc_writeds: err=%d stat=%d\n", error,
6952	    nd->nd_repstat);
6953	if (error != 0)
6954		return (error);
6955	if (nd->nd_repstat != 0) {
6956		/*
6957		 * In case the rpc gets retried, roll
6958		 * the uio fields changed by nfsm_uiombuf()
6959		 * back.
6960		 */
6961		uiop->uio_offset -= len;
6962		uiop->uio_resid += len;
6963		uiop->uio_iov->iov_base = (char *)uiop->uio_iov->iov_base - len;
6964		uiop->uio_iov->iov_len += len;
6965		error = nd->nd_repstat;
6966	} else {
6967		if (vers == NFS_VER3) {
6968			error = nfscl_wcc_data(nd, vp, &na, &attrflag, NULL,
6969			    NULL);
6970			NFSCL_DEBUG(4, "nfsrpc_writeds: wcc_data=%d\n", error);
6971			if (error != 0)
6972				goto nfsmout;
6973		}
6974		NFSM_DISSECT(tl, uint32_t *, 2 * NFSX_UNSIGNED + NFSX_VERF);
6975		rlen = fxdr_unsigned(int, *tl++);
6976		NFSCL_DEBUG(4, "nfsrpc_writeds: len=%d rlen=%d\n", len, rlen);
6977		if (rlen == 0) {
6978			error = NFSERR_IO;
6979			goto nfsmout;
6980		} else if (rlen < len) {
6981			backup = len - rlen;
6982			uiop->uio_iov->iov_base =
6983			    (char *)uiop->uio_iov->iov_base - backup;
6984			uiop->uio_iov->iov_len += backup;
6985			uiop->uio_offset -= backup;
6986			uiop->uio_resid += backup;
6987			len = rlen;
6988		}
6989		commit = fxdr_unsigned(int, *tl++);
6990
6991		/*
6992		 * Return the lowest commitment level
6993		 * obtained by any of the RPCs.
6994		 */
6995		if (committed == NFSWRITE_FILESYNC)
6996			committed = commit;
6997		else if (committed == NFSWRITE_DATASYNC &&
6998		    commit == NFSWRITE_UNSTABLE)
6999			committed = commit;
7000		if (commit_thru_mds != 0) {
7001			NFSLOCKMNT(nmp);
7002			if (!NFSHASWRITEVERF(nmp)) {
7003				NFSBCOPY(tl, nmp->nm_verf, NFSX_VERF);
7004				NFSSETWRITEVERF(nmp);
7005			} else if (NFSBCMP(tl, nmp->nm_verf, NFSX_VERF) &&
7006			    *must_commit != 2) {
7007				*must_commit = 1;
7008				NFSBCOPY(tl, nmp->nm_verf, NFSX_VERF);
7009			}
7010			NFSUNLOCKMNT(nmp);
7011		} else {
7012			NFSLOCKDS(dsp);
7013			if ((dsp->nfsclds_flags & NFSCLDS_HASWRITEVERF) == 0) {
7014				NFSBCOPY(tl, dsp->nfsclds_verf, NFSX_VERF);
7015				dsp->nfsclds_flags |= NFSCLDS_HASWRITEVERF;
7016			} else if (NFSBCMP(tl, dsp->nfsclds_verf, NFSX_VERF) &&
7017			    *must_commit != 2) {
7018				*must_commit = 1;
7019				NFSBCOPY(tl, dsp->nfsclds_verf, NFSX_VERF);
7020			}
7021			NFSUNLOCKDS(dsp);
7022		}
7023	}
7024nfsmout:
7025	if (nd->nd_mrep != NULL)
7026		m_freem(nd->nd_mrep);
7027	*iomode = committed;
7028	if (nd->nd_repstat != 0 && error == 0)
7029		error = nd->nd_repstat;
7030	return (error);
7031}
7032
7033/*
7034 * The actual write RPC done to a DS.
7035 * This variant is called from a separate kernel process for mirrors.
7036 * Any short write is considered an IO error.
7037 */
7038static int
7039nfsrpc_writedsmir(vnode_t vp, int *iomode, int *must_commit,
7040    nfsv4stateid_t *stateidp, struct nfsclds *dsp, uint64_t io_off, int len,
7041    struct nfsfh *fhp, struct mbuf *m, int vers, int minorvers,
7042    struct ucred *cred, NFSPROC_T *p)
7043{
7044	uint32_t *tl;
7045	struct nfsmount *nmp = VFSTONFS(vp->v_mount);
7046	int attrflag, error, commit, committed = NFSWRITE_FILESYNC, rlen;
7047	struct nfsrv_descript nfsd;
7048	struct nfsrv_descript *nd = &nfsd;
7049	struct nfssockreq *nrp;
7050	struct nfsvattr na;
7051
7052	nd->nd_mrep = NULL;
7053	if (vers == 0 || vers == NFS_VER4) {
7054		nfscl_reqstart(nd, NFSPROC_WRITEDS, nmp, fhp->nfh_fh,
7055		    fhp->nfh_len, NULL, &dsp->nfsclds_sess, vers, minorvers,
7056		    NULL);
7057		vers = NFS_VER4;
7058		NFSCL_DEBUG(4, "nfsrpc_writedsmir: vers4 minvers=%d\n",
7059		    minorvers);
7060		nfsm_stateidtom(nd, stateidp, NFSSTATEID_PUTSTATEID);
7061		NFSM_BUILD(tl, uint32_t *, NFSX_HYPER + 2 * NFSX_UNSIGNED);
7062	} else {
7063		nfscl_reqstart(nd, NFSPROC_WRITE, nmp, fhp->nfh_fh,
7064		    fhp->nfh_len, NULL, &dsp->nfsclds_sess, vers, minorvers,
7065		    NULL);
7066		NFSDECRGLOBAL(nfsstatsv1.rpccnt[NFSPROC_WRITE]);
7067		NFSINCRGLOBAL(nfsstatsv1.rpccnt[NFSPROC_WRITEDS]);
7068		NFSCL_DEBUG(4, "nfsrpc_writedsmir: vers3\n");
7069		NFSM_BUILD(tl, uint32_t *, NFSX_HYPER + 3 * NFSX_UNSIGNED);
7070	}
7071	txdr_hyper(io_off, tl);
7072	tl += 2;
7073	if (vers == NFS_VER3)
7074		*tl++ = txdr_unsigned(len);
7075	*tl++ = txdr_unsigned(*iomode);
7076	*tl = txdr_unsigned(len);
7077	if (len > 0) {
7078		/* Put data in mbuf chain. */
7079		nd->nd_mb->m_next = m;
7080	}
7081	nrp = dsp->nfsclds_sockp;
7082	if (nrp == NULL)
7083		/* If NULL, use the MDS socket. */
7084		nrp = &nmp->nm_sockreq;
7085	error = newnfs_request(nd, nmp, NULL, nrp, vp, p, cred,
7086	    NFS_PROG, vers, NULL, 1, NULL, &dsp->nfsclds_sess);
7087	NFSCL_DEBUG(4, "nfsrpc_writedsmir: err=%d stat=%d\n", error,
7088	    nd->nd_repstat);
7089	if (error != 0)
7090		return (error);
7091	if (nd->nd_repstat != 0)
7092		error = nd->nd_repstat;
7093	else {
7094		if (vers == NFS_VER3) {
7095			error = nfscl_wcc_data(nd, vp, &na, &attrflag, NULL,
7096			    NULL);
7097			NFSCL_DEBUG(4, "nfsrpc_writedsmir: wcc_data=%d\n",
7098			    error);
7099			if (error != 0)
7100				goto nfsmout;
7101		}
7102		NFSM_DISSECT(tl, uint32_t *, 2 * NFSX_UNSIGNED + NFSX_VERF);
7103		rlen = fxdr_unsigned(int, *tl++);
7104		NFSCL_DEBUG(4, "nfsrpc_writedsmir: len=%d rlen=%d\n", len,
7105		    rlen);
7106		if (rlen != len) {
7107			error = NFSERR_IO;
7108			NFSCL_DEBUG(4, "nfsrpc_writedsmir: len=%d rlen=%d\n",
7109			    len, rlen);
7110			goto nfsmout;
7111		}
7112		commit = fxdr_unsigned(int, *tl++);
7113
7114		/*
7115		 * Return the lowest commitment level
7116		 * obtained by any of the RPCs.
7117		 */
7118		if (committed == NFSWRITE_FILESYNC)
7119			committed = commit;
7120		else if (committed == NFSWRITE_DATASYNC &&
7121		    commit == NFSWRITE_UNSTABLE)
7122			committed = commit;
7123		NFSLOCKDS(dsp);
7124		if ((dsp->nfsclds_flags & NFSCLDS_HASWRITEVERF) == 0) {
7125			NFSBCOPY(tl, dsp->nfsclds_verf, NFSX_VERF);
7126			dsp->nfsclds_flags |= NFSCLDS_HASWRITEVERF;
7127		} else if (NFSBCMP(tl, dsp->nfsclds_verf, NFSX_VERF) &&
7128		    *must_commit != 2) {
7129			*must_commit = 1;
7130			NFSBCOPY(tl, dsp->nfsclds_verf, NFSX_VERF);
7131		}
7132		NFSUNLOCKDS(dsp);
7133	}
7134nfsmout:
7135	if (nd->nd_mrep != NULL)
7136		m_freem(nd->nd_mrep);
7137	*iomode = committed;
7138	if (nd->nd_repstat != 0 && error == 0)
7139		error = nd->nd_repstat;
7140	return (error);
7141}
7142
7143/*
7144 * Start up the thread that will execute nfsrpc_writedsmir().
7145 */
7146static void
7147start_writedsmir(void *arg, int pending)
7148{
7149	struct nfsclwritedsdorpc *drpc;
7150
7151	drpc = (struct nfsclwritedsdorpc *)arg;
7152	drpc->err = nfsrpc_writedsmir(drpc->vp, &drpc->iomode,
7153	    &drpc->must_commit, drpc->stateidp, drpc->dsp, drpc->off, drpc->len,
7154	    drpc->fhp, drpc->m, drpc->vers, drpc->minorvers, drpc->cred,
7155	    drpc->p);
7156	drpc->done = 1;
7157	crfree(drpc->cred);
7158	NFSCL_DEBUG(4, "start_writedsmir: err=%d\n", drpc->err);
7159}
7160
7161/*
7162 * Set up the write DS mirror call for the pNFS I/O thread.
7163 */
7164static int
7165nfsio_writedsmir(vnode_t vp, int *iomode, int *must_commit,
7166    nfsv4stateid_t *stateidp, struct nfsclds *dsp, uint64_t off, int len,
7167    struct nfsfh *fhp, struct mbuf *m, int vers, int minorvers,
7168    struct nfsclwritedsdorpc *drpc, struct ucred *cred, NFSPROC_T *p)
7169{
7170	int error, ret;
7171
7172	error = 0;
7173	drpc->done = 0;
7174	drpc->vp = vp;
7175	drpc->iomode = *iomode;
7176	drpc->must_commit = *must_commit;
7177	drpc->stateidp = stateidp;
7178	drpc->dsp = dsp;
7179	drpc->off = off;
7180	drpc->len = len;
7181	drpc->fhp = fhp;
7182	drpc->m = m;
7183	drpc->vers = vers;
7184	drpc->minorvers = minorvers;
7185	drpc->cred = crhold(cred);
7186	drpc->p = p;
7187	drpc->inprog = 0;
7188	ret = EIO;
7189	if (nfs_pnfsiothreads != 0) {
7190		ret = nfs_pnfsio(start_writedsmir, drpc);
7191		NFSCL_DEBUG(4, "nfsio_writedsmir: nfs_pnfsio=%d\n", ret);
7192	}
7193	if (ret != 0) {
7194		error = nfsrpc_writedsmir(vp, iomode, &drpc->must_commit,
7195		    stateidp, dsp, off, len, fhp, m, vers, minorvers, cred, p);
7196		crfree(drpc->cred);
7197	}
7198	NFSCL_DEBUG(4, "nfsio_writedsmir: error=%d\n", error);
7199	return (error);
7200}
7201
7202/*
7203 * Free up the nfsclds structure.
7204 */
7205void
7206nfscl_freenfsclds(struct nfsclds *dsp)
7207{
7208	int i;
7209
7210	if (dsp == NULL)
7211		return;
7212	if (dsp->nfsclds_sockp != NULL) {
7213		NFSFREECRED(dsp->nfsclds_sockp->nr_cred);
7214		NFSFREEMUTEX(&dsp->nfsclds_sockp->nr_mtx);
7215		free(dsp->nfsclds_sockp->nr_nam, M_SONAME);
7216		free(dsp->nfsclds_sockp, M_NFSSOCKREQ);
7217	}
7218	NFSFREEMUTEX(&dsp->nfsclds_mtx);
7219	NFSFREEMUTEX(&dsp->nfsclds_sess.nfsess_mtx);
7220	for (i = 0; i < NFSV4_CBSLOTS; i++) {
7221		if (dsp->nfsclds_sess.nfsess_cbslots[i].nfssl_reply != NULL)
7222			m_freem(
7223			    dsp->nfsclds_sess.nfsess_cbslots[i].nfssl_reply);
7224	}
7225	free(dsp, M_NFSCLDS);
7226}
7227
7228static enum nfsclds_state
7229nfscl_getsameserver(struct nfsmount *nmp, struct nfsclds *newdsp,
7230    struct nfsclds **retdspp, uint32_t *sequencep)
7231{
7232	struct nfsclds *dsp;
7233	int fndseq;
7234
7235	/*
7236	 * Search the list of nfsclds structures for one with the same
7237	 * server.
7238	 */
7239	fndseq = 0;
7240	TAILQ_FOREACH(dsp, &nmp->nm_sess, nfsclds_list) {
7241		if (dsp->nfsclds_servownlen == newdsp->nfsclds_servownlen &&
7242		    dsp->nfsclds_servownlen != 0 &&
7243		    !NFSBCMP(dsp->nfsclds_serverown, newdsp->nfsclds_serverown,
7244		    dsp->nfsclds_servownlen) &&
7245		    dsp->nfsclds_sess.nfsess_defunct == 0) {
7246			NFSCL_DEBUG(4, "fnd same fdsp=%p dsp=%p flg=0x%x\n",
7247			    TAILQ_FIRST(&nmp->nm_sess), dsp,
7248			    dsp->nfsclds_flags);
7249			if (fndseq == 0) {
7250				/* Get sequenceid# from first entry. */
7251				*sequencep =
7252				    dsp->nfsclds_sess.nfsess_sequenceid;
7253				fndseq = 1;
7254			}
7255			/* Server major id matches. */
7256			if ((dsp->nfsclds_flags & NFSCLDS_DS) != 0) {
7257				*retdspp = dsp;
7258				return (NFSDSP_USETHISSESSION);
7259			}
7260		}
7261	}
7262	if (fndseq != 0)
7263		return (NFSDSP_SEQTHISSESSION);
7264	return (NFSDSP_NOTFOUND);
7265}
7266
7267/*
7268 * NFS commit rpc to a NFSv4.1 DS.
7269 */
7270static int
7271nfsrpc_commitds(vnode_t vp, uint64_t offset, int cnt, struct nfsclds *dsp,
7272    struct nfsfh *fhp, int vers, int minorvers, struct ucred *cred,
7273    NFSPROC_T *p)
7274{
7275	uint32_t *tl;
7276	struct nfsrv_descript nfsd, *nd = &nfsd;
7277	struct nfsmount *nmp = VFSTONFS(vp->v_mount);
7278	struct nfssockreq *nrp;
7279	struct nfsvattr na;
7280	int attrflag, error;
7281
7282	nd->nd_mrep = NULL;
7283	if (vers == 0 || vers == NFS_VER4) {
7284		nfscl_reqstart(nd, NFSPROC_COMMITDS, nmp, fhp->nfh_fh,
7285		    fhp->nfh_len, NULL, &dsp->nfsclds_sess, vers, minorvers,
7286		    NULL);
7287		vers = NFS_VER4;
7288	} else {
7289		nfscl_reqstart(nd, NFSPROC_COMMIT, nmp, fhp->nfh_fh,
7290		    fhp->nfh_len, NULL, &dsp->nfsclds_sess, vers, minorvers,
7291		    NULL);
7292		NFSDECRGLOBAL(nfsstatsv1.rpccnt[NFSPROC_COMMIT]);
7293		NFSINCRGLOBAL(nfsstatsv1.rpccnt[NFSPROC_COMMITDS]);
7294	}
7295	NFSCL_DEBUG(4, "nfsrpc_commitds: vers=%d minvers=%d\n", vers,
7296	    minorvers);
7297	NFSM_BUILD(tl, uint32_t *, NFSX_HYPER + NFSX_UNSIGNED);
7298	txdr_hyper(offset, tl);
7299	tl += 2;
7300	*tl = txdr_unsigned(cnt);
7301	nrp = dsp->nfsclds_sockp;
7302	if (nrp == NULL)
7303		/* If NULL, use the MDS socket. */
7304		nrp = &nmp->nm_sockreq;
7305	error = newnfs_request(nd, nmp, NULL, nrp, vp, p, cred,
7306	    NFS_PROG, vers, NULL, 1, NULL, &dsp->nfsclds_sess);
7307	NFSCL_DEBUG(4, "nfsrpc_commitds: err=%d stat=%d\n", error,
7308	    nd->nd_repstat);
7309	if (error != 0)
7310		return (error);
7311	if (nd->nd_repstat == 0) {
7312		if (vers == NFS_VER3) {
7313			error = nfscl_wcc_data(nd, vp, &na, &attrflag, NULL,
7314			    NULL);
7315			NFSCL_DEBUG(4, "nfsrpc_commitds: wccdata=%d\n", error);
7316			if (error != 0)
7317				goto nfsmout;
7318		}
7319		NFSM_DISSECT(tl, u_int32_t *, NFSX_VERF);
7320		NFSLOCKDS(dsp);
7321		if (NFSBCMP(tl, dsp->nfsclds_verf, NFSX_VERF)) {
7322			NFSBCOPY(tl, dsp->nfsclds_verf, NFSX_VERF);
7323			error = NFSERR_STALEWRITEVERF;
7324		}
7325		NFSUNLOCKDS(dsp);
7326	}
7327nfsmout:
7328	if (error == 0 && nd->nd_repstat != 0)
7329		error = nd->nd_repstat;
7330	m_freem(nd->nd_mrep);
7331	return (error);
7332}
7333
7334/*
7335 * Start up the thread that will execute nfsrpc_commitds().
7336 */
7337static void
7338start_commitds(void *arg, int pending)
7339{
7340	struct nfsclwritedsdorpc *drpc;
7341
7342	drpc = (struct nfsclwritedsdorpc *)arg;
7343	drpc->err = nfsrpc_commitds(drpc->vp, drpc->off, drpc->len,
7344	    drpc->dsp, drpc->fhp, drpc->vers, drpc->minorvers, drpc->cred,
7345	    drpc->p);
7346	drpc->done = 1;
7347	crfree(drpc->cred);
7348	NFSCL_DEBUG(4, "start_commitds: err=%d\n", drpc->err);
7349}
7350
7351/*
7352 * Set up the commit DS mirror call for the pNFS I/O thread.
7353 */
7354static int
7355nfsio_commitds(vnode_t vp, uint64_t offset, int cnt, struct nfsclds *dsp,
7356    struct nfsfh *fhp, int vers, int minorvers,
7357    struct nfsclwritedsdorpc *drpc, struct ucred *cred, NFSPROC_T *p)
7358{
7359	int error, ret;
7360
7361	error = 0;
7362	drpc->done = 0;
7363	drpc->vp = vp;
7364	drpc->off = offset;
7365	drpc->len = cnt;
7366	drpc->dsp = dsp;
7367	drpc->fhp = fhp;
7368	drpc->vers = vers;
7369	drpc->minorvers = minorvers;
7370	drpc->cred = crhold(cred);
7371	drpc->p = p;
7372	drpc->inprog = 0;
7373	ret = EIO;
7374	if (nfs_pnfsiothreads != 0) {
7375		ret = nfs_pnfsio(start_commitds, drpc);
7376		NFSCL_DEBUG(4, "nfsio_commitds: nfs_pnfsio=%d\n", ret);
7377	}
7378	if (ret != 0) {
7379		error = nfsrpc_commitds(vp, offset, cnt, dsp, fhp, vers,
7380		    minorvers, cred, p);
7381		crfree(drpc->cred);
7382	}
7383	NFSCL_DEBUG(4, "nfsio_commitds: error=%d\n", error);
7384	return (error);
7385}
7386
7387/*
7388 * NFS Advise rpc
7389 */
7390int
7391nfsrpc_advise(vnode_t vp, off_t offset, uint64_t cnt, int advise,
7392    struct ucred *cred, NFSPROC_T *p)
7393{
7394	u_int32_t *tl;
7395	struct nfsrv_descript nfsd, *nd = &nfsd;
7396	nfsattrbit_t hints;
7397	int error;
7398
7399	NFSZERO_ATTRBIT(&hints);
7400	if (advise == POSIX_FADV_WILLNEED)
7401		NFSSETBIT_ATTRBIT(&hints, NFSV4IOHINT_WILLNEED);
7402	else if (advise == POSIX_FADV_DONTNEED)
7403		NFSSETBIT_ATTRBIT(&hints, NFSV4IOHINT_DONTNEED);
7404	else
7405		return (0);
7406	NFSCL_REQSTART(nd, NFSPROC_IOADVISE, vp, cred);
7407	nfsm_stateidtom(nd, NULL, NFSSTATEID_PUTALLZERO);
7408	NFSM_BUILD(tl, uint32_t *, 2 * NFSX_HYPER);
7409	txdr_hyper(offset, tl);
7410	tl += 2;
7411	txdr_hyper(cnt, tl);
7412	nfsrv_putattrbit(nd, &hints);
7413	error = nfscl_request(nd, vp, p, cred);
7414	if (error != 0)
7415		return (error);
7416	if (nd->nd_repstat != 0)
7417		error = nd->nd_repstat;
7418	m_freem(nd->nd_mrep);
7419	return (error);
7420}
7421
7422#ifdef notyet
7423/*
7424 * NFS advise rpc to a NFSv4.2 DS.
7425 */
7426static int
7427nfsrpc_adviseds(vnode_t vp, uint64_t offset, int cnt, int advise,
7428    struct nfsclds *dsp, struct nfsfh *fhp, int vers, int minorvers,
7429    struct ucred *cred, NFSPROC_T *p)
7430{
7431	uint32_t *tl;
7432	struct nfsrv_descript nfsd, *nd = &nfsd;
7433	struct nfsmount *nmp = VFSTONFS(vp->v_mount);
7434	struct nfssockreq *nrp;
7435	nfsattrbit_t hints;
7436	int error;
7437
7438	/* For NFS DSs prior to NFSv4.2, just return OK. */
7439	if (vers == NFS_VER3 || minorversion < NFSV42_MINORVERSION)
7440		return (0);
7441	NFSZERO_ATTRBIT(&hints);
7442	if (advise == POSIX_FADV_WILLNEED)
7443		NFSSETBIT_ATTRBIT(&hints, NFSV4IOHINT_WILLNEED);
7444	else if (advise == POSIX_FADV_DONTNEED)
7445		NFSSETBIT_ATTRBIT(&hints, NFSV4IOHINT_DONTNEED);
7446	else
7447		return (0);
7448	nd->nd_mrep = NULL;
7449	nfscl_reqstart(nd, NFSPROC_IOADVISEDS, nmp, fhp->nfh_fh,
7450	    fhp->nfh_len, NULL, &dsp->nfsclds_sess, vers, minorvers, NULL);
7451	vers = NFS_VER4;
7452	NFSCL_DEBUG(4, "nfsrpc_adviseds: vers=%d minvers=%d\n", vers,
7453	    minorvers);
7454	nfsm_stateidtom(nd, NULL, NFSSTATEID_PUTALLZERO);
7455	NFSM_BUILD(tl, uint32_t *, NFSX_HYPER + NFSX_UNSIGNED);
7456	txdr_hyper(offset, tl);
7457	tl += 2;
7458	*tl = txdr_unsigned(cnt);
7459	nfsrv_putattrbit(nd, &hints);
7460	nrp = dsp->nfsclds_sockp;
7461	if (nrp == NULL)
7462		/* If NULL, use the MDS socket. */
7463		nrp = &nmp->nm_sockreq;
7464	error = newnfs_request(nd, nmp, NULL, nrp, vp, p, cred,
7465	    NFS_PROG, vers, NULL, 1, NULL, &dsp->nfsclds_sess);
7466	NFSCL_DEBUG(4, "nfsrpc_adviseds: err=%d stat=%d\n", error,
7467	    nd->nd_repstat);
7468	if (error != 0)
7469		return (error);
7470	if (nd->nd_repstat != 0)
7471		error = nd->nd_repstat;
7472	m_freem(nd->nd_mrep);
7473	return (error);
7474}
7475
7476/*
7477 * Start up the thread that will execute nfsrpc_commitds().
7478 */
7479static void
7480start_adviseds(void *arg, int pending)
7481{
7482	struct nfsclwritedsdorpc *drpc;
7483
7484	drpc = (struct nfsclwritedsdorpc *)arg;
7485	drpc->err = nfsrpc_adviseds(drpc->vp, drpc->off, drpc->len,
7486	    drpc->advise, drpc->dsp, drpc->fhp, drpc->vers, drpc->minorvers,
7487	    drpc->cred, drpc->p);
7488	drpc->done = 1;
7489	crfree(drpc->cred);
7490	NFSCL_DEBUG(4, "start_adviseds: err=%d\n", drpc->err);
7491}
7492
7493/*
7494 * Set up the advise DS mirror call for the pNFS I/O thread.
7495 */
7496static int
7497nfsio_adviseds(vnode_t vp, uint64_t offset, int cnt, int advise,
7498    struct nfsclds *dsp, struct nfsfh *fhp, int vers, int minorvers,
7499    struct nfsclwritedsdorpc *drpc, struct ucred *cred, NFSPROC_T *p)
7500{
7501	int error, ret;
7502
7503	error = 0;
7504	drpc->done = 0;
7505	drpc->vp = vp;
7506	drpc->off = offset;
7507	drpc->len = cnt;
7508	drpc->advise = advise;
7509	drpc->dsp = dsp;
7510	drpc->fhp = fhp;
7511	drpc->vers = vers;
7512	drpc->minorvers = minorvers;
7513	drpc->cred = crhold(cred);
7514	drpc->p = p;
7515	drpc->inprog = 0;
7516	ret = EIO;
7517	if (nfs_pnfsiothreads != 0) {
7518		ret = nfs_pnfsio(start_adviseds, drpc);
7519		NFSCL_DEBUG(4, "nfsio_adviseds: nfs_pnfsio=%d\n", ret);
7520	}
7521	if (ret != 0) {
7522		error = nfsrpc_adviseds(vp, offset, cnt, advise, dsp, fhp, vers,
7523		    minorvers, cred, p);
7524		crfree(drpc->cred);
7525	}
7526	NFSCL_DEBUG(4, "nfsio_adviseds: error=%d\n", error);
7527	return (error);
7528}
7529#endif	/* notyet */
7530
7531/*
7532 * Do the Allocate operation, retrying for recovery.
7533 */
7534int
7535nfsrpc_allocate(vnode_t vp, off_t off, off_t len, struct nfsvattr *nap,
7536    int *attrflagp, struct ucred *cred, NFSPROC_T *p)
7537{
7538	int error, expireret = 0, retrycnt, nostateid;
7539	uint32_t clidrev = 0;
7540	struct nfsmount *nmp = VFSTONFS(vp->v_mount);
7541	struct nfsfh *nfhp = NULL;
7542	nfsv4stateid_t stateid;
7543	off_t tmp_off;
7544	void *lckp;
7545
7546	if (len < 0)
7547		return (EINVAL);
7548	if (len == 0)
7549		return (0);
7550	tmp_off = off + len;
7551	NFSLOCKMNT(nmp);
7552	if (tmp_off > nmp->nm_maxfilesize || tmp_off < off) {
7553		NFSUNLOCKMNT(nmp);
7554		return (EFBIG);
7555	}
7556	if (nmp->nm_clp != NULL)
7557		clidrev = nmp->nm_clp->nfsc_clientidrev;
7558	NFSUNLOCKMNT(nmp);
7559	nfhp = VTONFS(vp)->n_fhp;
7560	retrycnt = 0;
7561	do {
7562		lckp = NULL;
7563		nostateid = 0;
7564		nfscl_getstateid(vp, nfhp->nfh_fh, nfhp->nfh_len,
7565		    NFSV4OPEN_ACCESSWRITE, 0, cred, p, &stateid, &lckp);
7566		if (stateid.other[0] == 0 && stateid.other[1] == 0 &&
7567		    stateid.other[2] == 0) {
7568			nostateid = 1;
7569			NFSCL_DEBUG(1, "stateid0 in allocate\n");
7570		}
7571
7572		/*
7573		 * Not finding a stateid should probably never happen,
7574		 * but just return an error for this case.
7575		 */
7576		if (nostateid != 0)
7577			error = EIO;
7578		else
7579			error = nfsrpc_allocaterpc(vp, off, len, &stateid,
7580			    nap, attrflagp, cred, p);
7581		if (error == NFSERR_STALESTATEID)
7582			nfscl_initiate_recovery(nmp->nm_clp);
7583		if (lckp != NULL)
7584			nfscl_lockderef(lckp);
7585		if (error == NFSERR_GRACE || error == NFSERR_STALESTATEID ||
7586		    error == NFSERR_STALEDONTRECOVER || error == NFSERR_DELAY ||
7587		    error == NFSERR_OLDSTATEID || error == NFSERR_BADSESSION) {
7588			(void) nfs_catnap(PZERO, error, "nfs_allocate");
7589		} else if ((error == NFSERR_EXPIRED || (!NFSHASINT(nmp) &&
7590		    error == NFSERR_BADSTATEID)) && clidrev != 0) {
7591			expireret = nfscl_hasexpired(nmp->nm_clp, clidrev, p);
7592		} else if (error == NFSERR_BADSTATEID && NFSHASINT(nmp)) {
7593			error = EIO;
7594		}
7595		retrycnt++;
7596	} while (error == NFSERR_GRACE || error == NFSERR_DELAY ||
7597	    error == NFSERR_STALESTATEID || error == NFSERR_BADSESSION ||
7598	    error == NFSERR_STALEDONTRECOVER ||
7599	    (error == NFSERR_OLDSTATEID && retrycnt < 20) ||
7600	    ((error == NFSERR_EXPIRED || error == NFSERR_BADSTATEID) &&
7601	     expireret == 0 && clidrev != 0 && retrycnt < 4));
7602	if (error != 0 && retrycnt >= 4)
7603		error = EIO;
7604	return (error);
7605}
7606
7607/*
7608 * The allocate RPC.
7609 */
7610static int
7611nfsrpc_allocaterpc(vnode_t vp, off_t off, off_t len, nfsv4stateid_t *stateidp,
7612    struct nfsvattr *nap, int *attrflagp, struct ucred *cred, NFSPROC_T *p)
7613{
7614	uint32_t *tl;
7615	int error;
7616	struct nfsrv_descript nfsd;
7617	struct nfsrv_descript *nd = &nfsd;
7618	nfsattrbit_t attrbits;
7619
7620	*attrflagp = 0;
7621	NFSCL_REQSTART(nd, NFSPROC_ALLOCATE, vp, cred);
7622	nfsm_stateidtom(nd, stateidp, NFSSTATEID_PUTSTATEID);
7623	NFSM_BUILD(tl, uint32_t *, 2 * NFSX_HYPER + NFSX_UNSIGNED);
7624	txdr_hyper(off, tl); tl += 2;
7625	txdr_hyper(len, tl); tl += 2;
7626	*tl = txdr_unsigned(NFSV4OP_GETATTR);
7627	NFSGETATTR_ATTRBIT(&attrbits);
7628	nfsrv_putattrbit(nd, &attrbits);
7629	error = nfscl_request(nd, vp, p, cred);
7630	if (error != 0)
7631		return (error);
7632	if (nd->nd_repstat == 0) {
7633		NFSM_DISSECT(tl, u_int32_t *, 2 * NFSX_UNSIGNED);
7634		error = nfsm_loadattr(nd, nap);
7635		if (error == 0)
7636			*attrflagp = NFS_LATTR_NOSHRINK;
7637	} else
7638		error = nd->nd_repstat;
7639nfsmout:
7640	m_freem(nd->nd_mrep);
7641	return (error);
7642}
7643
7644/*
7645 * Set up the XDR arguments for the LayoutGet operation.
7646 */
7647static void
7648nfsrv_setuplayoutget(struct nfsrv_descript *nd, int iomode, uint64_t offset,
7649    uint64_t len, uint64_t minlen, nfsv4stateid_t *stateidp, int layouttype,
7650    int layoutlen, int usecurstateid)
7651{
7652	uint32_t *tl;
7653
7654	NFSM_BUILD(tl, uint32_t *, 4 * NFSX_UNSIGNED + 3 * NFSX_HYPER +
7655	    NFSX_STATEID);
7656	*tl++ = newnfs_false;		/* Don't signal availability. */
7657	*tl++ = txdr_unsigned(layouttype);
7658	*tl++ = txdr_unsigned(iomode);
7659	txdr_hyper(offset, tl);
7660	tl += 2;
7661	txdr_hyper(len, tl);
7662	tl += 2;
7663	txdr_hyper(minlen, tl);
7664	tl += 2;
7665	if (usecurstateid != 0) {
7666		/* Special stateid for Current stateid. */
7667		*tl++ = txdr_unsigned(1);
7668		*tl++ = 0;
7669		*tl++ = 0;
7670		*tl++ = 0;
7671	} else {
7672		*tl++ = txdr_unsigned(stateidp->seqid);
7673		NFSCL_DEBUG(4, "layget seq=%d\n", (int)stateidp->seqid);
7674		*tl++ = stateidp->other[0];
7675		*tl++ = stateidp->other[1];
7676		*tl++ = stateidp->other[2];
7677	}
7678	*tl = txdr_unsigned(layoutlen);
7679}
7680
7681/*
7682 * Parse the reply for a successful LayoutGet operation.
7683 */
7684static int
7685nfsrv_parselayoutget(struct nfsmount *nmp, struct nfsrv_descript *nd,
7686    nfsv4stateid_t *stateidp, int *retonclosep, struct nfsclflayouthead *flhp)
7687{
7688	uint32_t *tl;
7689	struct nfsclflayout *flp, *prevflp, *tflp;
7690	int cnt, error, fhcnt, gotiomode, i, iomode, j, k, l, laytype, nfhlen;
7691	int m, mirrorcnt;
7692	uint64_t retlen, off;
7693	struct nfsfh *nfhp;
7694	uint8_t *cp;
7695	uid_t user;
7696	gid_t grp;
7697
7698	NFSCL_DEBUG(4, "in nfsrv_parselayoutget\n");
7699	error = 0;
7700	flp = NULL;
7701	gotiomode = -1;
7702	NFSM_DISSECT(tl, uint32_t *, 2 * NFSX_UNSIGNED + NFSX_STATEID);
7703	if (*tl++ != 0)
7704		*retonclosep = 1;
7705	else
7706		*retonclosep = 0;
7707	stateidp->seqid = fxdr_unsigned(uint32_t, *tl++);
7708	NFSCL_DEBUG(4, "retoncls=%d stseq=%d\n", *retonclosep,
7709	    (int)stateidp->seqid);
7710	stateidp->other[0] = *tl++;
7711	stateidp->other[1] = *tl++;
7712	stateidp->other[2] = *tl++;
7713	cnt = fxdr_unsigned(int, *tl);
7714	NFSCL_DEBUG(4, "layg cnt=%d\n", cnt);
7715	if (cnt <= 0 || cnt > 10000) {
7716		/* Don't accept more than 10000 layouts in reply. */
7717		error = NFSERR_BADXDR;
7718		goto nfsmout;
7719	}
7720	for (i = 0; i < cnt; i++) {
7721		/* Dissect to the layout type. */
7722		NFSM_DISSECT(tl, uint32_t *, 2 * NFSX_HYPER +
7723		    3 * NFSX_UNSIGNED);
7724		off = fxdr_hyper(tl); tl += 2;
7725		retlen = fxdr_hyper(tl); tl += 2;
7726		iomode = fxdr_unsigned(int, *tl++);
7727		laytype = fxdr_unsigned(int, *tl);
7728		NFSCL_DEBUG(4, "layt=%d off=%ju len=%ju iom=%d\n", laytype,
7729		    (uintmax_t)off, (uintmax_t)retlen, iomode);
7730		/* Ignore length of layout body for now. */
7731		if (laytype == NFSLAYOUT_NFSV4_1_FILES) {
7732			/* Parse the File layout up to fhcnt. */
7733			NFSM_DISSECT(tl, uint32_t *, 3 * NFSX_UNSIGNED +
7734			    NFSX_HYPER + NFSX_V4DEVICEID);
7735			fhcnt = fxdr_unsigned(int, *(tl + 4 +
7736			    NFSX_V4DEVICEID / NFSX_UNSIGNED));
7737			NFSCL_DEBUG(4, "fhcnt=%d\n", fhcnt);
7738			if (fhcnt < 0 || fhcnt > 100) {
7739				/* Don't accept more than 100 file handles. */
7740				error = NFSERR_BADXDR;
7741				goto nfsmout;
7742			}
7743			if (fhcnt > 0)
7744				flp = malloc(sizeof(*flp) + fhcnt *
7745				    sizeof(struct nfsfh *), M_NFSFLAYOUT,
7746				    M_WAITOK);
7747			else
7748				flp = malloc(sizeof(*flp), M_NFSFLAYOUT,
7749				    M_WAITOK);
7750			flp->nfsfl_flags = NFSFL_FILE;
7751			flp->nfsfl_fhcnt = 0;
7752			flp->nfsfl_devp = NULL;
7753			flp->nfsfl_off = off;
7754			if (flp->nfsfl_off + retlen < flp->nfsfl_off)
7755				flp->nfsfl_end = UINT64_MAX - flp->nfsfl_off;
7756			else
7757				flp->nfsfl_end = flp->nfsfl_off + retlen;
7758			flp->nfsfl_iomode = iomode;
7759			if (gotiomode == -1)
7760				gotiomode = flp->nfsfl_iomode;
7761			/* Ignore layout body length for now. */
7762			NFSBCOPY(tl, flp->nfsfl_dev, NFSX_V4DEVICEID);
7763			tl += (NFSX_V4DEVICEID / NFSX_UNSIGNED);
7764			flp->nfsfl_util = fxdr_unsigned(uint32_t, *tl++);
7765			NFSCL_DEBUG(4, "flutil=0x%x\n", flp->nfsfl_util);
7766			mtx_lock(&nmp->nm_mtx);
7767			if (nmp->nm_minorvers > 1 && (flp->nfsfl_util &
7768			    NFSFLAYUTIL_IOADVISE_THRU_MDS) != 0)
7769				nmp->nm_privflag |= NFSMNTP_IOADVISETHRUMDS;
7770			mtx_unlock(&nmp->nm_mtx);
7771			flp->nfsfl_stripe1 = fxdr_unsigned(uint32_t, *tl++);
7772			flp->nfsfl_patoff = fxdr_hyper(tl); tl += 2;
7773			NFSCL_DEBUG(4, "stripe1=%u poff=%ju\n",
7774			    flp->nfsfl_stripe1, (uintmax_t)flp->nfsfl_patoff);
7775			for (j = 0; j < fhcnt; j++) {
7776				NFSM_DISSECT(tl, uint32_t *, NFSX_UNSIGNED);
7777				nfhlen = fxdr_unsigned(int, *tl);
7778				if (nfhlen <= 0 || nfhlen > NFSX_V4FHMAX) {
7779					error = NFSERR_BADXDR;
7780					goto nfsmout;
7781				}
7782				nfhp = malloc(sizeof(*nfhp) + nfhlen - 1,
7783				    M_NFSFH, M_WAITOK);
7784				flp->nfsfl_fh[j] = nfhp;
7785				flp->nfsfl_fhcnt++;
7786				nfhp->nfh_len = nfhlen;
7787				NFSM_DISSECT(cp, uint8_t *, NFSM_RNDUP(nfhlen));
7788				NFSBCOPY(cp, nfhp->nfh_fh, nfhlen);
7789			}
7790		} else if (laytype == NFSLAYOUT_FLEXFILE) {
7791			NFSM_DISSECT(tl, uint32_t *, NFSX_UNSIGNED +
7792			    NFSX_HYPER);
7793			mirrorcnt = fxdr_unsigned(int, *(tl + 2));
7794			NFSCL_DEBUG(4, "mirrorcnt=%d\n", mirrorcnt);
7795			if (mirrorcnt < 1 || mirrorcnt > NFSDEV_MAXMIRRORS) {
7796				error = NFSERR_BADXDR;
7797				goto nfsmout;
7798			}
7799			flp = malloc(sizeof(*flp) + mirrorcnt *
7800			    sizeof(struct nfsffm), M_NFSFLAYOUT, M_WAITOK);
7801			flp->nfsfl_flags = NFSFL_FLEXFILE;
7802			flp->nfsfl_mirrorcnt = mirrorcnt;
7803			for (j = 0; j < mirrorcnt; j++)
7804				flp->nfsfl_ffm[j].devp = NULL;
7805			flp->nfsfl_off = off;
7806			if (flp->nfsfl_off + retlen < flp->nfsfl_off)
7807				flp->nfsfl_end = UINT64_MAX - flp->nfsfl_off;
7808			else
7809				flp->nfsfl_end = flp->nfsfl_off + retlen;
7810			flp->nfsfl_iomode = iomode;
7811			if (gotiomode == -1)
7812				gotiomode = flp->nfsfl_iomode;
7813			flp->nfsfl_stripeunit = fxdr_hyper(tl);
7814			NFSCL_DEBUG(4, "stripeunit=%ju\n",
7815			    (uintmax_t)flp->nfsfl_stripeunit);
7816			for (j = 0; j < mirrorcnt; j++) {
7817				NFSM_DISSECT(tl, uint32_t *, NFSX_UNSIGNED);
7818				k = fxdr_unsigned(int, *tl);
7819				if (k < 1 || k > 128) {
7820					error = NFSERR_BADXDR;
7821					goto nfsmout;
7822				}
7823				NFSCL_DEBUG(4, "servercnt=%d\n", k);
7824				for (l = 0; l < k; l++) {
7825					NFSM_DISSECT(tl, uint32_t *,
7826					    NFSX_V4DEVICEID + NFSX_STATEID +
7827					    2 * NFSX_UNSIGNED);
7828					if (l == 0) {
7829						/* Just use the first server. */
7830						NFSBCOPY(tl,
7831						    flp->nfsfl_ffm[j].dev,
7832						    NFSX_V4DEVICEID);
7833						tl += (NFSX_V4DEVICEID /
7834						    NFSX_UNSIGNED);
7835						tl++;
7836						flp->nfsfl_ffm[j].st.seqid =
7837						    *tl++;
7838						flp->nfsfl_ffm[j].st.other[0] =
7839						    *tl++;
7840						flp->nfsfl_ffm[j].st.other[1] =
7841						    *tl++;
7842						flp->nfsfl_ffm[j].st.other[2] =
7843						    *tl++;
7844						NFSCL_DEBUG(4, "st.seqid=%u "
7845						 "st.o0=0x%x st.o1=0x%x "
7846						 "st.o2=0x%x\n",
7847						 flp->nfsfl_ffm[j].st.seqid,
7848						 flp->nfsfl_ffm[j].st.other[0],
7849						 flp->nfsfl_ffm[j].st.other[1],
7850						 flp->nfsfl_ffm[j].st.other[2]);
7851					} else
7852						tl += ((NFSX_V4DEVICEID +
7853						    NFSX_STATEID +
7854						    NFSX_UNSIGNED) /
7855						    NFSX_UNSIGNED);
7856					fhcnt = fxdr_unsigned(int, *tl);
7857					NFSCL_DEBUG(4, "fhcnt=%d\n", fhcnt);
7858					if (fhcnt < 1 ||
7859					    fhcnt > NFSDEV_MAXVERS) {
7860						error = NFSERR_BADXDR;
7861						goto nfsmout;
7862					}
7863					for (m = 0; m < fhcnt; m++) {
7864						NFSM_DISSECT(tl, uint32_t *,
7865						    NFSX_UNSIGNED);
7866						nfhlen = fxdr_unsigned(int,
7867						    *tl);
7868						NFSCL_DEBUG(4, "nfhlen=%d\n",
7869						    nfhlen);
7870						if (nfhlen <= 0 || nfhlen >
7871						    NFSX_V4FHMAX) {
7872							error = NFSERR_BADXDR;
7873							goto nfsmout;
7874						}
7875						NFSM_DISSECT(cp, uint8_t *,
7876						    NFSM_RNDUP(nfhlen));
7877						if (l == 0) {
7878							flp->nfsfl_ffm[j].fhcnt
7879							    = fhcnt;
7880							nfhp = malloc(
7881							    sizeof(*nfhp) +
7882							    nfhlen - 1, M_NFSFH,
7883							    M_WAITOK);
7884							flp->nfsfl_ffm[j].fh[m]
7885							    = nfhp;
7886							nfhp->nfh_len = nfhlen;
7887							NFSBCOPY(cp,
7888							    nfhp->nfh_fh,
7889							    nfhlen);
7890							NFSCL_DEBUG(4,
7891							    "got fh\n");
7892						}
7893					}
7894					/* Now, get the ffsd_user/ffds_group. */
7895					error = nfsrv_parseug(nd, 0, &user,
7896					    &grp, curthread);
7897					NFSCL_DEBUG(4, "after parseu=%d\n",
7898					    error);
7899					if (error == 0)
7900						error = nfsrv_parseug(nd, 1,
7901						    &user, &grp, curthread);
7902					NFSCL_DEBUG(4, "aft parseg=%d\n",
7903					    grp);
7904					if (error != 0)
7905						goto nfsmout;
7906					NFSCL_DEBUG(4, "user=%d group=%d\n",
7907					    user, grp);
7908					if (l == 0) {
7909						flp->nfsfl_ffm[j].user = user;
7910						flp->nfsfl_ffm[j].group = grp;
7911						NFSCL_DEBUG(4,
7912						    "usr=%d grp=%d\n", user,
7913						    grp);
7914					}
7915				}
7916			}
7917			NFSM_DISSECT(tl, uint32_t *, 2 * NFSX_UNSIGNED);
7918			flp->nfsfl_fflags = fxdr_unsigned(uint32_t, *tl++);
7919#ifdef notnow
7920			/*
7921			 * At this time, there is no flag.
7922			 * NFSFLEXFLAG_IOADVISE_THRU_MDS might need to be
7923			 * added, or it may never exist?
7924			 */
7925			mtx_lock(&nmp->nm_mtx);
7926			if (nmp->nm_minorvers > 1 && (flp->nfsfl_fflags &
7927			    NFSFLEXFLAG_IOADVISE_THRU_MDS) != 0)
7928				nmp->nm_privflag |= NFSMNTP_IOADVISETHRUMDS;
7929			mtx_unlock(&nmp->nm_mtx);
7930#endif
7931			flp->nfsfl_statshint = fxdr_unsigned(uint32_t, *tl);
7932			NFSCL_DEBUG(4, "fflags=0x%x statshint=%d\n",
7933			    flp->nfsfl_fflags, flp->nfsfl_statshint);
7934		} else {
7935			error = NFSERR_BADXDR;
7936			goto nfsmout;
7937		}
7938		if (flp->nfsfl_iomode == gotiomode) {
7939			/* Keep the list in increasing offset order. */
7940			tflp = LIST_FIRST(flhp);
7941			prevflp = NULL;
7942			while (tflp != NULL &&
7943			    tflp->nfsfl_off < flp->nfsfl_off) {
7944				prevflp = tflp;
7945				tflp = LIST_NEXT(tflp, nfsfl_list);
7946			}
7947			if (prevflp == NULL)
7948				LIST_INSERT_HEAD(flhp, flp, nfsfl_list);
7949			else
7950				LIST_INSERT_AFTER(prevflp, flp,
7951				    nfsfl_list);
7952			NFSCL_DEBUG(4, "flp inserted\n");
7953		} else {
7954			printf("nfscl_layoutget(): got wrong iomode\n");
7955			nfscl_freeflayout(flp);
7956		}
7957		flp = NULL;
7958	}
7959nfsmout:
7960	NFSCL_DEBUG(4, "eo nfsrv_parselayoutget=%d\n", error);
7961	if (error != 0 && flp != NULL)
7962		nfscl_freeflayout(flp);
7963	return (error);
7964}
7965
7966/*
7967 * Parse a user/group digit string.
7968 */
7969static int
7970nfsrv_parseug(struct nfsrv_descript *nd, int dogrp, uid_t *uidp, gid_t *gidp,
7971    NFSPROC_T *p)
7972{
7973	uint32_t *tl;
7974	char *cp, *str, str0[NFSV4_SMALLSTR + 1];
7975	uint32_t len = 0;
7976	int error = 0;
7977
7978	NFSM_DISSECT(tl, uint32_t *, NFSX_UNSIGNED);
7979	len = fxdr_unsigned(uint32_t, *tl);
7980	str = NULL;
7981	if (len > NFSV4_OPAQUELIMIT) {
7982		error = NFSERR_BADXDR;
7983		goto nfsmout;
7984	}
7985	NFSCL_DEBUG(4, "nfsrv_parseug: len=%d\n", len);
7986	if (len == 0) {
7987		if (dogrp != 0)
7988			*gidp = GID_NOGROUP;
7989		else
7990			*uidp = UID_NOBODY;
7991		return (0);
7992	}
7993	if (len > NFSV4_SMALLSTR)
7994		str = malloc(len + 1, M_TEMP, M_WAITOK);
7995	else
7996		str = str0;
7997	NFSM_DISSECT(cp, char *, NFSM_RNDUP(len));
7998	NFSBCOPY(cp, str, len);
7999	str[len] = '\0';
8000	NFSCL_DEBUG(4, "nfsrv_parseug: str=%s\n", str);
8001	if (dogrp != 0)
8002		error = nfsv4_strtogid(nd, str, len, gidp);
8003	else
8004		error = nfsv4_strtouid(nd, str, len, uidp);
8005nfsmout:
8006	if (len > NFSV4_SMALLSTR)
8007		free(str, M_TEMP);
8008	NFSCL_DEBUG(4, "eo nfsrv_parseug=%d\n", error);
8009	return (error);
8010}
8011
8012/*
8013 * Similar to nfsrpc_getlayout(), except that it uses nfsrpc_openlayget(),
8014 * so that it does both an Open and a Layoutget.
8015 */
8016static int
8017nfsrpc_getopenlayout(struct nfsmount *nmp, vnode_t vp, u_int8_t *nfhp,
8018    int fhlen, uint8_t *newfhp, int newfhlen, uint32_t mode,
8019    struct nfsclopen *op, uint8_t *name, int namelen, struct nfscldeleg **dpp,
8020    struct ucred *cred, NFSPROC_T *p)
8021{
8022	struct nfscllayout *lyp;
8023	struct nfsclflayout *flp;
8024	struct nfsclflayouthead flh;
8025	int error, islocked, layoutlen, recalled, retonclose, usecurstateid;
8026	int layouttype, laystat;
8027	nfsv4stateid_t stateid;
8028	struct nfsclsession *tsep;
8029
8030	error = 0;
8031	if (NFSHASFLEXFILE(nmp))
8032		layouttype = NFSLAYOUT_FLEXFILE;
8033	else
8034		layouttype = NFSLAYOUT_NFSV4_1_FILES;
8035	/*
8036	 * If lyp is returned non-NULL, there will be a refcnt (shared lock)
8037	 * on it, iff flp != NULL or a lock (exclusive lock) on it iff
8038	 * flp == NULL.
8039	 */
8040	lyp = nfscl_getlayout(nmp->nm_clp, newfhp, newfhlen, 0, mode, &flp,
8041	    &recalled);
8042	NFSCL_DEBUG(4, "nfsrpc_getopenlayout nfscl_getlayout lyp=%p\n", lyp);
8043	if (lyp == NULL)
8044		islocked = 0;
8045	else if (flp != NULL)
8046		islocked = 1;
8047	else
8048		islocked = 2;
8049	if ((lyp == NULL || flp == NULL) && recalled == 0) {
8050		LIST_INIT(&flh);
8051		tsep = nfsmnt_mdssession(nmp);
8052		layoutlen = tsep->nfsess_maxcache - (NFSX_STATEID +
8053		    3 * NFSX_UNSIGNED);
8054		if (lyp == NULL)
8055			usecurstateid = 1;
8056		else {
8057			usecurstateid = 0;
8058			stateid.seqid = lyp->nfsly_stateid.seqid;
8059			stateid.other[0] = lyp->nfsly_stateid.other[0];
8060			stateid.other[1] = lyp->nfsly_stateid.other[1];
8061			stateid.other[2] = lyp->nfsly_stateid.other[2];
8062		}
8063		error = nfsrpc_openlayoutrpc(nmp, vp, nfhp, fhlen,
8064		    newfhp, newfhlen, mode, op, name, namelen,
8065		    dpp, &stateid, usecurstateid, layouttype, layoutlen,
8066		    &retonclose, &flh, &laystat, cred, p);
8067		NFSCL_DEBUG(4, "aft nfsrpc_openlayoutrpc laystat=%d err=%d\n",
8068		    laystat, error);
8069		laystat = nfsrpc_layoutgetres(nmp, vp, newfhp, newfhlen,
8070		    &stateid, retonclose, NULL, &lyp, &flh, layouttype, laystat,
8071		    &islocked, cred, p);
8072	} else
8073		error = nfsrpc_openrpc(nmp, vp, nfhp, fhlen, newfhp, newfhlen,
8074		    mode, op, name, namelen, dpp, 0, 0, cred, p, 0, 0);
8075	if (islocked == 2)
8076		nfscl_rellayout(lyp, 1);
8077	else if (islocked == 1)
8078		nfscl_rellayout(lyp, 0);
8079	return (error);
8080}
8081
8082/*
8083 * This function does an Open+LayoutGet for an NFSv4.1 mount with pNFS
8084 * enabled, only for the CLAIM_NULL case.  All other NFSv4 Opens are
8085 * handled by nfsrpc_openrpc().
8086 * For the case where op == NULL, dvp is the directory.  When op != NULL, it
8087 * can be NULL.
8088 */
8089static int
8090nfsrpc_openlayoutrpc(struct nfsmount *nmp, vnode_t vp, u_int8_t *nfhp,
8091    int fhlen, uint8_t *newfhp, int newfhlen, uint32_t mode,
8092    struct nfsclopen *op, uint8_t *name, int namelen, struct nfscldeleg **dpp,
8093    nfsv4stateid_t *stateidp, int usecurstateid, int layouttype,
8094    int layoutlen, int *retonclosep, struct nfsclflayouthead *flhp,
8095    int *laystatp, struct ucred *cred, NFSPROC_T *p)
8096{
8097	uint32_t *tl;
8098	struct nfsrv_descript nfsd, *nd = &nfsd;
8099	struct nfscldeleg *ndp = NULL;
8100	struct nfsvattr nfsva;
8101	struct nfsclsession *tsep;
8102	uint32_t rflags, deleg;
8103	nfsattrbit_t attrbits;
8104	int error, ret, acesize, limitby, iomode;
8105
8106	*dpp = NULL;
8107	*laystatp = ENXIO;
8108	nfscl_reqstart(nd, NFSPROC_OPENLAYGET, nmp, nfhp, fhlen, NULL, NULL,
8109	    0, 0, cred);
8110	NFSM_BUILD(tl, uint32_t *, 5 * NFSX_UNSIGNED);
8111	*tl++ = txdr_unsigned(op->nfso_own->nfsow_seqid);
8112	*tl++ = txdr_unsigned(mode & NFSV4OPEN_ACCESSBOTH);
8113	*tl++ = txdr_unsigned((mode >> NFSLCK_SHIFT) & NFSV4OPEN_DENYBOTH);
8114	tsep = nfsmnt_mdssession(nmp);
8115	*tl++ = tsep->nfsess_clientid.lval[0];
8116	*tl = tsep->nfsess_clientid.lval[1];
8117	nfsm_strtom(nd, op->nfso_own->nfsow_owner, NFSV4CL_LOCKNAMELEN);
8118	NFSM_BUILD(tl, uint32_t *, 2 * NFSX_UNSIGNED);
8119	*tl++ = txdr_unsigned(NFSV4OPEN_NOCREATE);
8120	if (NFSHASNFSV4N(nmp)) {
8121		*tl = txdr_unsigned(NFSV4OPEN_CLAIMFH);
8122	} else {
8123		*tl = txdr_unsigned(NFSV4OPEN_CLAIMNULL);
8124		nfsm_strtom(nd, name, namelen);
8125	}
8126	NFSM_BUILD(tl, uint32_t *, NFSX_UNSIGNED);
8127	*tl = txdr_unsigned(NFSV4OP_GETATTR);
8128	NFSZERO_ATTRBIT(&attrbits);
8129	NFSSETBIT_ATTRBIT(&attrbits, NFSATTRBIT_CHANGE);
8130	NFSSETBIT_ATTRBIT(&attrbits, NFSATTRBIT_TIMEMODIFY);
8131	nfsrv_putattrbit(nd, &attrbits);
8132	NFSM_BUILD(tl, uint32_t *, NFSX_UNSIGNED);
8133	*tl = txdr_unsigned(NFSV4OP_LAYOUTGET);
8134	if ((mode & NFSV4OPEN_ACCESSWRITE) != 0)
8135		iomode = NFSLAYOUTIOMODE_RW;
8136	else
8137		iomode = NFSLAYOUTIOMODE_READ;
8138	nfsrv_setuplayoutget(nd, iomode, 0, UINT64_MAX, 0, stateidp,
8139	    layouttype, layoutlen, usecurstateid);
8140	error = newnfs_request(nd, nmp, NULL, &nmp->nm_sockreq, vp, p, cred,
8141	    NFS_PROG, NFS_VER4, NULL, 1, NULL, NULL);
8142	if (error != 0)
8143		return (error);
8144	NFSCL_INCRSEQID(op->nfso_own->nfsow_seqid, nd);
8145	if (nd->nd_repstat != 0)
8146		*laystatp = nd->nd_repstat;
8147	if ((nd->nd_flag & ND_NOMOREDATA) == 0) {
8148		/* ND_NOMOREDATA will be set if the Open operation failed. */
8149		NFSM_DISSECT(tl, u_int32_t *, NFSX_STATEID +
8150		    6 * NFSX_UNSIGNED);
8151		op->nfso_stateid.seqid = *tl++;
8152		op->nfso_stateid.other[0] = *tl++;
8153		op->nfso_stateid.other[1] = *tl++;
8154		op->nfso_stateid.other[2] = *tl;
8155		rflags = fxdr_unsigned(u_int32_t, *(tl + 6));
8156		error = nfsrv_getattrbits(nd, &attrbits, NULL, NULL);
8157		if (error != 0)
8158			goto nfsmout;
8159		NFSM_DISSECT(tl, u_int32_t *, NFSX_UNSIGNED);
8160		deleg = fxdr_unsigned(u_int32_t, *tl);
8161		if (deleg == NFSV4OPEN_DELEGATEREAD ||
8162		    deleg == NFSV4OPEN_DELEGATEWRITE) {
8163			if (!(op->nfso_own->nfsow_clp->nfsc_flags &
8164			      NFSCLFLAGS_FIRSTDELEG))
8165				op->nfso_own->nfsow_clp->nfsc_flags |=
8166				  (NFSCLFLAGS_FIRSTDELEG | NFSCLFLAGS_GOTDELEG);
8167			ndp = malloc(sizeof(struct nfscldeleg) + newfhlen,
8168			    M_NFSCLDELEG, M_WAITOK);
8169			LIST_INIT(&ndp->nfsdl_owner);
8170			LIST_INIT(&ndp->nfsdl_lock);
8171			ndp->nfsdl_clp = op->nfso_own->nfsow_clp;
8172			ndp->nfsdl_fhlen = newfhlen;
8173			NFSBCOPY(newfhp, ndp->nfsdl_fh, newfhlen);
8174			newnfs_copyincred(cred, &ndp->nfsdl_cred);
8175			nfscl_lockinit(&ndp->nfsdl_rwlock);
8176			NFSM_DISSECT(tl, u_int32_t *, NFSX_STATEID +
8177			    NFSX_UNSIGNED);
8178			ndp->nfsdl_stateid.seqid = *tl++;
8179			ndp->nfsdl_stateid.other[0] = *tl++;
8180			ndp->nfsdl_stateid.other[1] = *tl++;
8181			ndp->nfsdl_stateid.other[2] = *tl++;
8182			ret = fxdr_unsigned(int, *tl);
8183			if (deleg == NFSV4OPEN_DELEGATEWRITE) {
8184				ndp->nfsdl_flags = NFSCLDL_WRITE;
8185				/*
8186				 * Indicates how much the file can grow.
8187				 */
8188				NFSM_DISSECT(tl, u_int32_t *,
8189				    3 * NFSX_UNSIGNED);
8190				limitby = fxdr_unsigned(int, *tl++);
8191				switch (limitby) {
8192				case NFSV4OPEN_LIMITSIZE:
8193					ndp->nfsdl_sizelimit = fxdr_hyper(tl);
8194					break;
8195				case NFSV4OPEN_LIMITBLOCKS:
8196					ndp->nfsdl_sizelimit =
8197					    fxdr_unsigned(u_int64_t, *tl++);
8198					ndp->nfsdl_sizelimit *=
8199					    fxdr_unsigned(u_int64_t, *tl);
8200					break;
8201				default:
8202					error = NFSERR_BADXDR;
8203					goto nfsmout;
8204				};
8205			} else
8206				ndp->nfsdl_flags = NFSCLDL_READ;
8207			if (ret != 0)
8208				ndp->nfsdl_flags |= NFSCLDL_RECALL;
8209			error = nfsrv_dissectace(nd, &ndp->nfsdl_ace, false,
8210			    &ret, &acesize, p);
8211			if (error != 0)
8212				goto nfsmout;
8213		} else if (deleg != NFSV4OPEN_DELEGATENONE) {
8214			error = NFSERR_BADXDR;
8215			goto nfsmout;
8216		}
8217		if ((rflags & NFSV4OPEN_LOCKTYPEPOSIX) != 0 ||
8218		    nfscl_assumeposixlocks)
8219			op->nfso_posixlock = 1;
8220		else
8221			op->nfso_posixlock = 0;
8222		NFSM_DISSECT(tl, u_int32_t *, 2 * NFSX_UNSIGNED);
8223		/* If the 2nd element == NFS_OK, the Getattr succeeded. */
8224		if (*++tl == 0) {
8225			error = nfsv4_loadattr(nd, NULL, &nfsva, NULL,
8226			    NULL, 0, NULL, NULL, NULL, NULL, NULL, 0,
8227			    NULL, NULL, NULL, p, cred);
8228			if (error != 0)
8229				goto nfsmout;
8230			if (ndp != NULL) {
8231				ndp->nfsdl_change = nfsva.na_filerev;
8232				ndp->nfsdl_modtime = nfsva.na_mtime;
8233				ndp->nfsdl_flags |= NFSCLDL_MODTIMESET;
8234				*dpp = ndp;
8235				ndp = NULL;
8236			}
8237			/*
8238			 * At this point, the Open has succeeded, so set
8239			 * nd_repstat = NFS_OK.  If the Layoutget failed,
8240			 * this function just won't return a layout.
8241			 */
8242			if (nd->nd_repstat == 0) {
8243				NFSM_DISSECT(tl, uint32_t *, 2 * NFSX_UNSIGNED);
8244				*laystatp = fxdr_unsigned(int, *++tl);
8245				if (*laystatp == 0) {
8246					error = nfsrv_parselayoutget(nmp, nd,
8247					    stateidp, retonclosep, flhp);
8248					if (error != 0)
8249						*laystatp = error;
8250				}
8251			} else
8252				nd->nd_repstat = 0;	/* Return 0 for Open. */
8253		}
8254	}
8255	if (nd->nd_repstat != 0 && error == 0)
8256		error = nd->nd_repstat;
8257nfsmout:
8258	free(ndp, M_NFSCLDELEG);
8259	m_freem(nd->nd_mrep);
8260	return (error);
8261}
8262
8263/*
8264 * Similar nfsrpc_createv4(), but also does the LayoutGet operation.
8265 * Used only for mounts with pNFS enabled.
8266 */
8267static int
8268nfsrpc_createlayout(vnode_t dvp, char *name, int namelen, struct vattr *vap,
8269    nfsquad_t cverf, int fmode, struct nfsclowner *owp, struct nfscldeleg **dpp,
8270    struct ucred *cred, NFSPROC_T *p, struct nfsvattr *dnap,
8271    struct nfsvattr *nnap, struct nfsfh **nfhpp, int *attrflagp,
8272    int *dattrflagp, int *unlockedp, nfsv4stateid_t *stateidp,
8273    int usecurstateid, int layouttype, int layoutlen, int *retonclosep,
8274    struct nfsclflayouthead *flhp, int *laystatp)
8275{
8276	uint32_t *tl;
8277	int error = 0, deleg, newone, ret, acesize, limitby;
8278	struct nfsrv_descript nfsd, *nd = &nfsd;
8279	struct nfsclopen *op;
8280	struct nfscldeleg *dp = NULL;
8281	struct nfsnode *np;
8282	struct nfsfh *nfhp;
8283	struct nfsclsession *tsep;
8284	nfsattrbit_t attrbits;
8285	nfsv4stateid_t stateid;
8286	struct nfsmount *nmp;
8287
8288	nmp = VFSTONFS(dvp->v_mount);
8289	np = VTONFS(dvp);
8290	*laystatp = ENXIO;
8291	*unlockedp = 0;
8292	*nfhpp = NULL;
8293	*dpp = NULL;
8294	*attrflagp = 0;
8295	*dattrflagp = 0;
8296	if (namelen > NFS_MAXNAMLEN)
8297		return (ENAMETOOLONG);
8298	NFSCL_REQSTART(nd, NFSPROC_CREATELAYGET, dvp, cred);
8299	/*
8300	 * For V4, this is actually an Open op.
8301	 */
8302	NFSM_BUILD(tl, u_int32_t *, 5 * NFSX_UNSIGNED);
8303	*tl++ = txdr_unsigned(owp->nfsow_seqid);
8304	*tl++ = txdr_unsigned(NFSV4OPEN_ACCESSWRITE |
8305	    NFSV4OPEN_ACCESSREAD);
8306	*tl++ = txdr_unsigned(NFSV4OPEN_DENYNONE);
8307	tsep = nfsmnt_mdssession(nmp);
8308	*tl++ = tsep->nfsess_clientid.lval[0];
8309	*tl = tsep->nfsess_clientid.lval[1];
8310	nfsm_strtom(nd, owp->nfsow_owner, NFSV4CL_LOCKNAMELEN);
8311	NFSM_BUILD(tl, u_int32_t *, 2 * NFSX_UNSIGNED);
8312	*tl++ = txdr_unsigned(NFSV4OPEN_CREATE);
8313	if ((fmode & O_EXCL) != 0) {
8314		if (NFSHASSESSPERSIST(nmp)) {
8315			/* Use GUARDED for persistent sessions. */
8316			*tl = txdr_unsigned(NFSCREATE_GUARDED);
8317			nfscl_fillsattr(nd, vap, dvp, 0, 0);
8318		} else {
8319			/* Otherwise, use EXCLUSIVE4_1. */
8320			*tl = txdr_unsigned(NFSCREATE_EXCLUSIVE41);
8321			NFSM_BUILD(tl, u_int32_t *, NFSX_VERF);
8322			*tl++ = cverf.lval[0];
8323			*tl = cverf.lval[1];
8324			nfscl_fillsattr(nd, vap, dvp, 0, 0);
8325		}
8326	} else {
8327		*tl = txdr_unsigned(NFSCREATE_UNCHECKED);
8328		nfscl_fillsattr(nd, vap, dvp, 0, 0);
8329	}
8330	NFSM_BUILD(tl, u_int32_t *, NFSX_UNSIGNED);
8331	*tl = txdr_unsigned(NFSV4OPEN_CLAIMNULL);
8332	nfsm_strtom(nd, name, namelen);
8333	/* Get the new file's handle and attributes, plus save the FH. */
8334	NFSM_BUILD(tl, u_int32_t *, 3 * NFSX_UNSIGNED);
8335	*tl++ = txdr_unsigned(NFSV4OP_SAVEFH);
8336	*tl++ = txdr_unsigned(NFSV4OP_GETFH);
8337	*tl = txdr_unsigned(NFSV4OP_GETATTR);
8338	NFSGETATTR_ATTRBIT(&attrbits);
8339	nfsrv_putattrbit(nd, &attrbits);
8340	/* Get the directory's post-op attributes. */
8341	NFSM_BUILD(tl, u_int32_t *, NFSX_UNSIGNED);
8342	*tl = txdr_unsigned(NFSV4OP_PUTFH);
8343	(void)nfsm_fhtom(nmp, nd, np->n_fhp->nfh_fh, np->n_fhp->nfh_len, 0);
8344	NFSM_BUILD(tl, u_int32_t *, NFSX_UNSIGNED);
8345	*tl = txdr_unsigned(NFSV4OP_GETATTR);
8346	nfsrv_putattrbit(nd, &attrbits);
8347	NFSM_BUILD(tl, u_int32_t *, 2 * NFSX_UNSIGNED);
8348	*tl++ = txdr_unsigned(NFSV4OP_RESTOREFH);
8349	*tl = txdr_unsigned(NFSV4OP_LAYOUTGET);
8350	nfsrv_setuplayoutget(nd, NFSLAYOUTIOMODE_RW, 0, UINT64_MAX, 0, stateidp,
8351	    layouttype, layoutlen, usecurstateid);
8352	error = nfscl_request(nd, dvp, p, cred);
8353	if (error != 0)
8354		return (error);
8355	NFSCL_DEBUG(4, "nfsrpc_createlayout stat=%d err=%d\n", nd->nd_repstat,
8356	    error);
8357	if (nd->nd_repstat != 0)
8358		*laystatp = nd->nd_repstat;
8359	NFSCL_INCRSEQID(owp->nfsow_seqid, nd);
8360	if ((nd->nd_flag & ND_NOMOREDATA) == 0) {
8361		NFSCL_DEBUG(4, "nfsrpc_createlayout open succeeded\n");
8362		NFSM_DISSECT(tl, u_int32_t *, NFSX_STATEID +
8363		    6 * NFSX_UNSIGNED);
8364		stateid.seqid = *tl++;
8365		stateid.other[0] = *tl++;
8366		stateid.other[1] = *tl++;
8367		stateid.other[2] = *tl;
8368		error = nfsrv_getattrbits(nd, &attrbits, NULL, NULL);
8369		if (error != 0)
8370			goto nfsmout;
8371		NFSM_DISSECT(tl, u_int32_t *, NFSX_UNSIGNED);
8372		deleg = fxdr_unsigned(int, *tl);
8373		if (deleg == NFSV4OPEN_DELEGATEREAD ||
8374		    deleg == NFSV4OPEN_DELEGATEWRITE) {
8375			if (!(owp->nfsow_clp->nfsc_flags &
8376			      NFSCLFLAGS_FIRSTDELEG))
8377				owp->nfsow_clp->nfsc_flags |=
8378				  (NFSCLFLAGS_FIRSTDELEG | NFSCLFLAGS_GOTDELEG);
8379			dp = malloc(sizeof(struct nfscldeleg) + NFSX_V4FHMAX,
8380			    M_NFSCLDELEG, M_WAITOK);
8381			LIST_INIT(&dp->nfsdl_owner);
8382			LIST_INIT(&dp->nfsdl_lock);
8383			dp->nfsdl_clp = owp->nfsow_clp;
8384			newnfs_copyincred(cred, &dp->nfsdl_cred);
8385			nfscl_lockinit(&dp->nfsdl_rwlock);
8386			NFSM_DISSECT(tl, u_int32_t *, NFSX_STATEID +
8387			    NFSX_UNSIGNED);
8388			dp->nfsdl_stateid.seqid = *tl++;
8389			dp->nfsdl_stateid.other[0] = *tl++;
8390			dp->nfsdl_stateid.other[1] = *tl++;
8391			dp->nfsdl_stateid.other[2] = *tl++;
8392			ret = fxdr_unsigned(int, *tl);
8393			if (deleg == NFSV4OPEN_DELEGATEWRITE) {
8394				dp->nfsdl_flags = NFSCLDL_WRITE;
8395				/*
8396				 * Indicates how much the file can grow.
8397				 */
8398				NFSM_DISSECT(tl, u_int32_t *,
8399				    3 * NFSX_UNSIGNED);
8400				limitby = fxdr_unsigned(int, *tl++);
8401				switch (limitby) {
8402				case NFSV4OPEN_LIMITSIZE:
8403					dp->nfsdl_sizelimit = fxdr_hyper(tl);
8404					break;
8405				case NFSV4OPEN_LIMITBLOCKS:
8406					dp->nfsdl_sizelimit =
8407					    fxdr_unsigned(u_int64_t, *tl++);
8408					dp->nfsdl_sizelimit *=
8409					    fxdr_unsigned(u_int64_t, *tl);
8410					break;
8411				default:
8412					error = NFSERR_BADXDR;
8413					goto nfsmout;
8414				};
8415			} else {
8416				dp->nfsdl_flags = NFSCLDL_READ;
8417			}
8418			if (ret != 0)
8419				dp->nfsdl_flags |= NFSCLDL_RECALL;
8420			error = nfsrv_dissectace(nd, &dp->nfsdl_ace, false,
8421			    &ret, &acesize, p);
8422			if (error != 0)
8423				goto nfsmout;
8424		} else if (deleg != NFSV4OPEN_DELEGATENONE) {
8425			error = NFSERR_BADXDR;
8426			goto nfsmout;
8427		}
8428
8429		/* Now, we should have the status for the SaveFH. */
8430		NFSM_DISSECT(tl, uint32_t *, 2 * NFSX_UNSIGNED);
8431		if (*++tl == 0) {
8432			NFSCL_DEBUG(4, "nfsrpc_createlayout SaveFH ok\n");
8433			/*
8434			 * Now, process the GetFH and Getattr for the newly
8435			 * created file. nfscl_mtofh() will set
8436			 * ND_NOMOREDATA if these weren't successful.
8437			 */
8438			error = nfscl_mtofh(nd, nfhpp, nnap, attrflagp);
8439			NFSCL_DEBUG(4, "aft nfscl_mtofh err=%d\n", error);
8440			if (error != 0)
8441				goto nfsmout;
8442		} else
8443			nd->nd_flag |= ND_NOMOREDATA;
8444		/* Now we have the PutFH and Getattr for the directory. */
8445		if ((nd->nd_flag & ND_NOMOREDATA) == 0) {
8446			NFSM_DISSECT(tl, uint32_t *, 2 * NFSX_UNSIGNED);
8447			if (*++tl != 0)
8448				nd->nd_flag |= ND_NOMOREDATA;
8449			else {
8450				NFSM_DISSECT(tl, uint32_t *, 2 *
8451				    NFSX_UNSIGNED);
8452				if (*++tl != 0)
8453					nd->nd_flag |= ND_NOMOREDATA;
8454			}
8455		}
8456		if ((nd->nd_flag & ND_NOMOREDATA) == 0) {
8457			/* Load the directory attributes. */
8458			error = nfsm_loadattr(nd, dnap);
8459			NFSCL_DEBUG(4, "aft nfsm_loadattr err=%d\n", error);
8460			if (error != 0)
8461				goto nfsmout;
8462			*dattrflagp = 1;
8463			if (dp != NULL && *attrflagp != 0) {
8464				dp->nfsdl_change = nnap->na_filerev;
8465				dp->nfsdl_modtime = nnap->na_mtime;
8466				dp->nfsdl_flags |= NFSCLDL_MODTIMESET;
8467			}
8468			/*
8469			 * We can now complete the Open state.
8470			 */
8471			nfhp = *nfhpp;
8472			if (dp != NULL) {
8473				dp->nfsdl_fhlen = nfhp->nfh_len;
8474				NFSBCOPY(nfhp->nfh_fh, dp->nfsdl_fh,
8475				    nfhp->nfh_len);
8476			}
8477			/*
8478			 * Get an Open structure that will be
8479			 * attached to the OpenOwner, acquired already.
8480			 */
8481			error = nfscl_open(dvp, nfhp->nfh_fh, nfhp->nfh_len,
8482			    (NFSV4OPEN_ACCESSWRITE | NFSV4OPEN_ACCESSREAD), 0,
8483			    cred, p, NULL, &op, &newone, NULL, 0, false);
8484			if (error != 0)
8485				goto nfsmout;
8486			op->nfso_stateid = stateid;
8487			newnfs_copyincred(cred, &op->nfso_cred);
8488
8489			nfscl_openrelease(nmp, op, error, newone);
8490			*unlockedp = 1;
8491
8492			/* Now, handle the RestoreFH and LayoutGet. */
8493			if (nd->nd_repstat == 0) {
8494				NFSM_DISSECT(tl, uint32_t *, 4 * NFSX_UNSIGNED);
8495				*laystatp = fxdr_unsigned(int, *(tl + 3));
8496				if (*laystatp == 0) {
8497					error = nfsrv_parselayoutget(nmp, nd,
8498					    stateidp, retonclosep, flhp);
8499					if (error != 0)
8500						*laystatp = error;
8501				}
8502				NFSCL_DEBUG(4, "aft nfsrv_parselayout err=%d\n",
8503				    error);
8504			} else
8505				nd->nd_repstat = 0;
8506		}
8507	}
8508	if (nd->nd_repstat != 0 && error == 0)
8509		error = nd->nd_repstat;
8510	if (error == NFSERR_STALECLIENTID)
8511		nfscl_initiate_recovery(owp->nfsow_clp);
8512nfsmout:
8513	NFSCL_DEBUG(4, "eo nfsrpc_createlayout err=%d\n", error);
8514	if (error == 0)
8515		*dpp = dp;
8516	else
8517		free(dp, M_NFSCLDELEG);
8518	m_freem(nd->nd_mrep);
8519	return (error);
8520}
8521
8522/*
8523 * Similar to nfsrpc_getopenlayout(), except that it used for the Create case.
8524 */
8525static int
8526nfsrpc_getcreatelayout(vnode_t dvp, char *name, int namelen, struct vattr *vap,
8527    nfsquad_t cverf, int fmode, struct nfsclowner *owp, struct nfscldeleg **dpp,
8528    struct ucred *cred, NFSPROC_T *p, struct nfsvattr *dnap,
8529    struct nfsvattr *nnap, struct nfsfh **nfhpp, int *attrflagp,
8530    int *dattrflagp, int *unlockedp)
8531{
8532	struct nfscllayout *lyp;
8533	struct nfsclflayouthead flh;
8534	struct nfsfh *nfhp;
8535	struct nfsclsession *tsep;
8536	struct nfsmount *nmp;
8537	nfsv4stateid_t stateid;
8538	int error, layoutlen, layouttype, retonclose, laystat;
8539
8540	error = 0;
8541	nmp = VFSTONFS(dvp->v_mount);
8542	if (NFSHASFLEXFILE(nmp))
8543		layouttype = NFSLAYOUT_FLEXFILE;
8544	else
8545		layouttype = NFSLAYOUT_NFSV4_1_FILES;
8546	LIST_INIT(&flh);
8547	tsep = nfsmnt_mdssession(nmp);
8548	layoutlen = tsep->nfsess_maxcache - (NFSX_STATEID + 3 * NFSX_UNSIGNED);
8549	error = nfsrpc_createlayout(dvp, name, namelen, vap, cverf, fmode,
8550	    owp, dpp, cred, p, dnap, nnap, nfhpp, attrflagp, dattrflagp,
8551	    unlockedp, &stateid, 1, layouttype, layoutlen, &retonclose,
8552	    &flh, &laystat);
8553	NFSCL_DEBUG(4, "aft nfsrpc_createlayoutrpc laystat=%d err=%d\n",
8554	    laystat, error);
8555	lyp = NULL;
8556	if (laystat == 0) {
8557		nfhp = *nfhpp;
8558		laystat = nfsrpc_layoutgetres(nmp, dvp, nfhp->nfh_fh,
8559		    nfhp->nfh_len, &stateid, retonclose, NULL, &lyp, &flh,
8560		    layouttype, laystat, NULL, cred, p);
8561	} else
8562		laystat = nfsrpc_layoutgetres(nmp, dvp, NULL, 0, &stateid,
8563		    retonclose, NULL, &lyp, &flh, layouttype, laystat, NULL,
8564		    cred, p);
8565	if (laystat == 0)
8566		nfscl_rellayout(lyp, 0);
8567	return (error);
8568}
8569
8570/*
8571 * Process the results of a layoutget() operation.
8572 */
8573static int
8574nfsrpc_layoutgetres(struct nfsmount *nmp, vnode_t vp, uint8_t *newfhp,
8575    int newfhlen, nfsv4stateid_t *stateidp, int retonclose, uint32_t *notifybit,
8576    struct nfscllayout **lypp, struct nfsclflayouthead *flhp, int layouttype,
8577    int laystat, int *islockedp, struct ucred *cred, NFSPROC_T *p)
8578{
8579	struct nfsclflayout *tflp;
8580	struct nfscldevinfo *dip;
8581	uint8_t *dev;
8582	int i, mirrorcnt;
8583
8584	if (laystat == NFSERR_UNKNLAYOUTTYPE) {
8585		NFSLOCKMNT(nmp);
8586		if (!NFSHASFLEXFILE(nmp)) {
8587			/* Switch to using Flex File Layout. */
8588			nmp->nm_state |= NFSSTA_FLEXFILE;
8589		} else if (layouttype == NFSLAYOUT_FLEXFILE) {
8590			/* Disable pNFS. */
8591			NFSCL_DEBUG(1, "disable PNFS\n");
8592			nmp->nm_state &= ~(NFSSTA_PNFS | NFSSTA_FLEXFILE);
8593		}
8594		NFSUNLOCKMNT(nmp);
8595	}
8596	if (laystat == 0) {
8597		NFSCL_DEBUG(4, "nfsrpc_layoutgetres at FOREACH\n");
8598		LIST_FOREACH(tflp, flhp, nfsfl_list) {
8599			if (layouttype == NFSLAYOUT_FLEXFILE)
8600				mirrorcnt = tflp->nfsfl_mirrorcnt;
8601			else
8602				mirrorcnt = 1;
8603			for (i = 0; i < mirrorcnt; i++) {
8604				laystat = nfscl_adddevinfo(nmp, NULL, i, tflp);
8605				NFSCL_DEBUG(4, "aft adddev=%d\n", laystat);
8606				if (laystat != 0) {
8607					if (layouttype == NFSLAYOUT_FLEXFILE)
8608						dev = tflp->nfsfl_ffm[i].dev;
8609					else
8610						dev = tflp->nfsfl_dev;
8611					laystat = nfsrpc_getdeviceinfo(nmp, dev,
8612					    layouttype, notifybit, &dip, cred,
8613					    p);
8614					NFSCL_DEBUG(4, "aft nfsrpc_gdi=%d\n",
8615					    laystat);
8616					if (laystat != 0)
8617						goto out;
8618					laystat = nfscl_adddevinfo(nmp, dip, i,
8619					    tflp);
8620					if (laystat != 0)
8621						printf("nfsrpc_layoutgetresout"
8622						    ": cannot add\n");
8623				}
8624			}
8625		}
8626	}
8627out:
8628	if (laystat == 0) {
8629		/*
8630		 * nfscl_layout() always returns with the nfsly_lock
8631		 * set to a refcnt (shared lock).
8632		 * Passing in dvp is sufficient, since it is only used to
8633		 * get the fsid for the file system.
8634		 */
8635		laystat = nfscl_layout(nmp, vp, newfhp, newfhlen, stateidp,
8636		    layouttype, retonclose, flhp, lypp, cred, p);
8637		NFSCL_DEBUG(4, "nfsrpc_layoutgetres: aft nfscl_layout=%d\n",
8638		    laystat);
8639		if (laystat == 0 && islockedp != NULL)
8640			*islockedp = 1;
8641	}
8642	return (laystat);
8643}
8644
8645/*
8646 * nfs copy_file_range operation.
8647 */
8648int
8649nfsrpc_copy_file_range(vnode_t invp, off_t *inoffp, vnode_t outvp,
8650    off_t *outoffp, size_t *lenp, unsigned int flags, int *inattrflagp,
8651    struct nfsvattr *innap, int *outattrflagp, struct nfsvattr *outnap,
8652    struct ucred *cred, bool consecutive, bool *must_commitp)
8653{
8654	int commit, error, expireret = 0, retrycnt;
8655	u_int32_t clidrev = 0;
8656	struct nfsmount *nmp = VFSTONFS(invp->v_mount);
8657	struct nfsfh *innfhp = NULL, *outnfhp = NULL;
8658	nfsv4stateid_t instateid, outstateid;
8659	void *inlckp, *outlckp;
8660
8661	if (nmp->nm_clp != NULL)
8662		clidrev = nmp->nm_clp->nfsc_clientidrev;
8663	innfhp = VTONFS(invp)->n_fhp;
8664	outnfhp = VTONFS(outvp)->n_fhp;
8665	retrycnt = 0;
8666	do {
8667		/* Get both stateids. */
8668		inlckp = NULL;
8669		nfscl_getstateid(invp, innfhp->nfh_fh, innfhp->nfh_len,
8670		    NFSV4OPEN_ACCESSREAD, 0, NULL, curthread, &instateid,
8671		    &inlckp);
8672		outlckp = NULL;
8673		nfscl_getstateid(outvp, outnfhp->nfh_fh, outnfhp->nfh_len,
8674		    NFSV4OPEN_ACCESSWRITE, 0, NULL, curthread, &outstateid,
8675		    &outlckp);
8676
8677		error = nfsrpc_copyrpc(invp, *inoffp, outvp, *outoffp, lenp,
8678		    &instateid, &outstateid, innap, inattrflagp, outnap,
8679		    outattrflagp, consecutive, &commit, cred, curthread);
8680		if (error == 0) {
8681			if (commit != NFSWRITE_FILESYNC)
8682				*must_commitp = true;
8683			*inoffp += *lenp;
8684			*outoffp += *lenp;
8685		} else if (error == NFSERR_STALESTATEID)
8686			nfscl_initiate_recovery(nmp->nm_clp);
8687		if (inlckp != NULL)
8688			nfscl_lockderef(inlckp);
8689		if (outlckp != NULL)
8690			nfscl_lockderef(outlckp);
8691		if (error == NFSERR_GRACE || error == NFSERR_STALESTATEID ||
8692		    error == NFSERR_STALEDONTRECOVER || error == NFSERR_DELAY ||
8693		    error == NFSERR_OLDSTATEID || error == NFSERR_BADSESSION) {
8694			(void) nfs_catnap(PZERO, error, "nfs_cfr");
8695		} else if ((error == NFSERR_EXPIRED || (!NFSHASINT(nmp) &&
8696		    error == NFSERR_BADSTATEID)) && clidrev != 0) {
8697			expireret = nfscl_hasexpired(nmp->nm_clp, clidrev,
8698			    curthread);
8699		} else if (error == NFSERR_BADSTATEID && NFSHASINT(nmp)) {
8700			error = EIO;
8701		}
8702		retrycnt++;
8703	} while (error == NFSERR_GRACE || error == NFSERR_DELAY ||
8704	    error == NFSERR_STALESTATEID || error == NFSERR_BADSESSION ||
8705	      error == NFSERR_STALEDONTRECOVER ||
8706	    (error == NFSERR_OLDSTATEID && retrycnt < 20) ||
8707	    ((error == NFSERR_EXPIRED || error == NFSERR_BADSTATEID) &&
8708	     expireret == 0 && clidrev != 0 && retrycnt < 4));
8709	if (error != 0 && (retrycnt >= 4 ||
8710	    error == NFSERR_STALESTATEID || error == NFSERR_BADSESSION ||
8711	      error == NFSERR_STALEDONTRECOVER))
8712		error = EIO;
8713	return (error);
8714}
8715
8716/*
8717 * The copy RPC.
8718 */
8719static int
8720nfsrpc_copyrpc(vnode_t invp, off_t inoff, vnode_t outvp, off_t outoff,
8721    size_t *lenp, nfsv4stateid_t *instateidp, nfsv4stateid_t *outstateidp,
8722    struct nfsvattr *innap, int *inattrflagp, struct nfsvattr *outnap,
8723    int *outattrflagp, bool consecutive, int *commitp, struct ucred *cred,
8724    NFSPROC_T *p)
8725{
8726	uint32_t *tl, *opcntp;
8727	int error;
8728	struct nfsrv_descript nfsd;
8729	struct nfsrv_descript *nd = &nfsd;
8730	struct nfsmount *nmp;
8731	nfsattrbit_t attrbits;
8732	struct vattr va;
8733	uint64_t len;
8734
8735	nmp = VFSTONFS(invp->v_mount);
8736	*inattrflagp = *outattrflagp = 0;
8737	*commitp = NFSWRITE_UNSTABLE;
8738	len = *lenp;
8739	*lenp = 0;
8740	if (len > nfs_maxcopyrange)
8741		len = nfs_maxcopyrange;
8742	nfscl_reqstart(nd, NFSPROC_COPY, nmp, VTONFS(invp)->n_fhp->nfh_fh,
8743	    VTONFS(invp)->n_fhp->nfh_len, &opcntp, NULL, 0, 0, cred);
8744	/*
8745	 * First do a Setattr of atime to the server's clock
8746	 * time.  The FreeBSD "collective" was of the opinion
8747	 * that setting atime was necessary for this syscall.
8748	 * Do the Setattr before the Copy, so that it can be
8749	 * handled well if the server replies NFSERR_DELAY to
8750	 * the Setattr operation.
8751	 */
8752	if ((nmp->nm_mountp->mnt_flag & MNT_NOATIME) == 0) {
8753		NFSM_BUILD(tl, uint32_t *, NFSX_UNSIGNED);
8754		*tl = txdr_unsigned(NFSV4OP_SETATTR);
8755		nfsm_stateidtom(nd, instateidp, NFSSTATEID_PUTSTATEID);
8756		VATTR_NULL(&va);
8757		va.va_atime.tv_sec = va.va_atime.tv_nsec = 0;
8758		va.va_vaflags = VA_UTIMES_NULL;
8759		nfscl_fillsattr(nd, &va, invp, 0, 0);
8760		/* Bump opcnt from 7 to 8. */
8761		*opcntp = txdr_unsigned(8);
8762	}
8763
8764	/* Now Getattr the invp attributes. */
8765	NFSM_BUILD(tl, uint32_t *, NFSX_UNSIGNED);
8766	*tl = txdr_unsigned(NFSV4OP_GETATTR);
8767	NFSGETATTR_ATTRBIT(&attrbits);
8768	nfsrv_putattrbit(nd, &attrbits);
8769
8770	/* Set outvp. */
8771	NFSM_BUILD(tl, uint32_t *, NFSX_UNSIGNED);
8772	*tl = txdr_unsigned(NFSV4OP_PUTFH);
8773	(void)nfsm_fhtom(nmp, nd, VTONFS(outvp)->n_fhp->nfh_fh,
8774	    VTONFS(outvp)->n_fhp->nfh_len, 0);
8775
8776	/* Do the Copy. */
8777	NFSM_BUILD(tl, uint32_t *, NFSX_UNSIGNED);
8778	*tl = txdr_unsigned(NFSV4OP_COPY);
8779	nfsm_stateidtom(nd, instateidp, NFSSTATEID_PUTSTATEID);
8780	nfsm_stateidtom(nd, outstateidp, NFSSTATEID_PUTSTATEID);
8781	NFSM_BUILD(tl, uint32_t *, 3 * NFSX_HYPER + 4 * NFSX_UNSIGNED);
8782	txdr_hyper(inoff, tl); tl += 2;
8783	txdr_hyper(outoff, tl); tl += 2;
8784	txdr_hyper(len, tl); tl += 2;
8785	if (consecutive)
8786		*tl++ = newnfs_true;
8787	else
8788		*tl++ = newnfs_false;
8789	*tl++ = newnfs_true;
8790	*tl++ = 0;
8791
8792	/* Get the outvp attributes. */
8793	*tl = txdr_unsigned(NFSV4OP_GETATTR);
8794	NFSWRITEGETATTR_ATTRBIT(&attrbits);
8795	nfsrv_putattrbit(nd, &attrbits);
8796
8797	error = nfscl_request(nd, invp, p, cred);
8798	if (error != 0)
8799		return (error);
8800	/* Skip over the Setattr reply. */
8801	if ((nd->nd_flag & ND_NOMOREDATA) == 0 &&
8802	    (nmp->nm_mountp->mnt_flag & MNT_NOATIME) == 0) {
8803		NFSM_DISSECT(tl, uint32_t *, 2 * NFSX_UNSIGNED);
8804		if (*(tl + 1) == 0) {
8805			error = nfsrv_getattrbits(nd, &attrbits, NULL, NULL);
8806			if (error != 0)
8807				goto nfsmout;
8808		} else
8809			nd->nd_flag |= ND_NOMOREDATA;
8810	}
8811	if ((nd->nd_flag & ND_NOMOREDATA) == 0) {
8812		/* Get the input file's attributes. */
8813		NFSM_DISSECT(tl, uint32_t *, 2 * NFSX_UNSIGNED);
8814		if (*(tl + 1) == 0) {
8815			error = nfsm_loadattr(nd, innap);
8816			if (error != 0)
8817				goto nfsmout;
8818			*inattrflagp = 1;
8819		} else
8820			nd->nd_flag |= ND_NOMOREDATA;
8821	}
8822	/* Skip over return stat for PutFH. */
8823	if ((nd->nd_flag & ND_NOMOREDATA) == 0) {
8824		NFSM_DISSECT(tl, uint32_t *, 2 * NFSX_UNSIGNED);
8825		if (*++tl != 0)
8826			nd->nd_flag |= ND_NOMOREDATA;
8827	}
8828	/* Skip over return stat for Copy. */
8829	if ((nd->nd_flag & ND_NOMOREDATA) == 0)
8830		NFSM_DISSECT(tl, uint32_t *, 2 * NFSX_UNSIGNED);
8831	if (nd->nd_repstat == 0) {
8832		NFSM_DISSECT(tl, uint32_t *, NFSX_UNSIGNED);
8833		if (*tl != 0) {
8834			/* There should be no callback ids. */
8835			error = NFSERR_BADXDR;
8836			goto nfsmout;
8837		}
8838		NFSM_DISSECT(tl, uint32_t *, NFSX_HYPER + 3 * NFSX_UNSIGNED +
8839		    NFSX_VERF);
8840		len = fxdr_hyper(tl); tl += 2;
8841		*commitp = fxdr_unsigned(int, *tl++);
8842		NFSLOCKMNT(nmp);
8843		if (!NFSHASWRITEVERF(nmp)) {
8844			NFSBCOPY(tl, nmp->nm_verf, NFSX_VERF);
8845			NFSSETWRITEVERF(nmp);
8846	    	} else if (NFSBCMP(tl, nmp->nm_verf, NFSX_VERF)) {
8847			NFSBCOPY(tl, nmp->nm_verf, NFSX_VERF);
8848			nd->nd_repstat = NFSERR_STALEWRITEVERF;
8849		}
8850		NFSUNLOCKMNT(nmp);
8851		tl += (NFSX_VERF / NFSX_UNSIGNED);
8852		if (nd->nd_repstat == 0 && *++tl != newnfs_true)
8853			/* Must be a synchronous copy. */
8854			nd->nd_repstat = NFSERR_NOTSUPP;
8855		NFSM_DISSECT(tl, uint32_t *, 2 * NFSX_UNSIGNED);
8856		error = nfsm_loadattr(nd, outnap);
8857		if (error == 0)
8858			*outattrflagp = NFS_LATTR_NOSHRINK;
8859		if (nd->nd_repstat == 0)
8860			*lenp = len;
8861	} else if (nd->nd_repstat == NFSERR_OFFLOADNOREQS) {
8862		/*
8863		 * For the case where consecutive is not supported, but
8864		 * synchronous is supported, we can try consecutive == false
8865		 * by returning this error.  Otherwise, return NFSERR_NOTSUPP,
8866		 * since Copy cannot be done.
8867		 */
8868		if ((nd->nd_flag & ND_NOMOREDATA) == 0) {
8869			NFSM_DISSECT(tl, uint32_t *, 2 * NFSX_UNSIGNED);
8870			if (!consecutive || *++tl == newnfs_false)
8871				nd->nd_repstat = NFSERR_NOTSUPP;
8872		} else
8873			nd->nd_repstat = NFSERR_BADXDR;
8874	}
8875	if (error == 0)
8876		error = nd->nd_repstat;
8877nfsmout:
8878	m_freem(nd->nd_mrep);
8879	return (error);
8880}
8881
8882/*
8883 * Seek operation.
8884 */
8885int
8886nfsrpc_seek(vnode_t vp, off_t *offp, bool *eofp, int content,
8887    struct ucred *cred, struct nfsvattr *nap, int *attrflagp)
8888{
8889	int error, expireret = 0, retrycnt;
8890	u_int32_t clidrev = 0;
8891	struct nfsmount *nmp = VFSTONFS(vp->v_mount);
8892	struct nfsnode *np = VTONFS(vp);
8893	struct nfsfh *nfhp = NULL;
8894	nfsv4stateid_t stateid;
8895	void *lckp;
8896
8897	if (nmp->nm_clp != NULL)
8898		clidrev = nmp->nm_clp->nfsc_clientidrev;
8899	nfhp = np->n_fhp;
8900	retrycnt = 0;
8901	do {
8902		lckp = NULL;
8903		nfscl_getstateid(vp, nfhp->nfh_fh, nfhp->nfh_len,
8904		    NFSV4OPEN_ACCESSREAD, 0, cred, curthread, &stateid, &lckp);
8905		error = nfsrpc_seekrpc(vp, offp, &stateid, eofp, content,
8906		    nap, attrflagp, cred);
8907		if (error == NFSERR_STALESTATEID)
8908			nfscl_initiate_recovery(nmp->nm_clp);
8909		if (lckp != NULL)
8910			nfscl_lockderef(lckp);
8911		if (error == NFSERR_GRACE || error == NFSERR_STALESTATEID ||
8912		    error == NFSERR_STALEDONTRECOVER || error == NFSERR_DELAY ||
8913		    error == NFSERR_OLDSTATEID || error == NFSERR_BADSESSION) {
8914			(void) nfs_catnap(PZERO, error, "nfs_seek");
8915		} else if ((error == NFSERR_EXPIRED || (!NFSHASINT(nmp) &&
8916		    error == NFSERR_BADSTATEID)) && clidrev != 0) {
8917			expireret = nfscl_hasexpired(nmp->nm_clp, clidrev,
8918			    curthread);
8919		} else if (error == NFSERR_BADSTATEID && NFSHASINT(nmp)) {
8920			error = EIO;
8921		}
8922		retrycnt++;
8923	} while (error == NFSERR_GRACE || error == NFSERR_STALESTATEID ||
8924	    error == NFSERR_STALEDONTRECOVER || error == NFSERR_DELAY ||
8925	    error == NFSERR_BADSESSION ||
8926	    (error == NFSERR_OLDSTATEID && retrycnt < 20) ||
8927	    ((error == NFSERR_EXPIRED || error == NFSERR_BADSTATEID) &&
8928	     expireret == 0 && clidrev != 0 && retrycnt < 4) ||
8929	    (error == NFSERR_OPENMODE && retrycnt < 4));
8930	if (error && retrycnt >= 4)
8931		error = EIO;
8932	return (error);
8933}
8934
8935/*
8936 * The seek RPC.
8937 */
8938static int
8939nfsrpc_seekrpc(vnode_t vp, off_t *offp, nfsv4stateid_t *stateidp, bool *eofp,
8940    int content, struct nfsvattr *nap, int *attrflagp, struct ucred *cred)
8941{
8942	uint32_t *tl;
8943	int error;
8944	struct nfsrv_descript nfsd;
8945	struct nfsrv_descript *nd = &nfsd;
8946	nfsattrbit_t attrbits;
8947
8948	*attrflagp = 0;
8949	NFSCL_REQSTART(nd, NFSPROC_SEEK, vp, cred);
8950	nfsm_stateidtom(nd, stateidp, NFSSTATEID_PUTSTATEID);
8951	NFSM_BUILD(tl, uint32_t *, NFSX_HYPER + 2 * NFSX_UNSIGNED);
8952	txdr_hyper(*offp, tl); tl += 2;
8953	*tl++ = txdr_unsigned(content);
8954	*tl = txdr_unsigned(NFSV4OP_GETATTR);
8955	NFSGETATTR_ATTRBIT(&attrbits);
8956	nfsrv_putattrbit(nd, &attrbits);
8957	error = nfscl_request(nd, vp, curthread, cred);
8958	if (error != 0)
8959		return (error);
8960	if (nd->nd_repstat == 0) {
8961		NFSM_DISSECT(tl, uint32_t *, 3 * NFSX_UNSIGNED + NFSX_HYPER);
8962		if (*tl++ == newnfs_true)
8963			*eofp = true;
8964		else
8965			*eofp = false;
8966		*offp = fxdr_hyper(tl);
8967		/* Just skip over Getattr op status. */
8968		error = nfsm_loadattr(nd, nap);
8969		if (error == 0)
8970			*attrflagp = 1;
8971	}
8972	error = nd->nd_repstat;
8973nfsmout:
8974	m_freem(nd->nd_mrep);
8975	return (error);
8976}
8977
8978/*
8979 * The getextattr RPC.
8980 */
8981int
8982nfsrpc_getextattr(vnode_t vp, const char *name, struct uio *uiop, ssize_t *lenp,
8983    struct nfsvattr *nap, int *attrflagp, struct ucred *cred, NFSPROC_T *p)
8984{
8985	uint32_t *tl;
8986	int error;
8987	struct nfsrv_descript nfsd;
8988	struct nfsrv_descript *nd = &nfsd;
8989	nfsattrbit_t attrbits;
8990	uint32_t len, len2;
8991
8992	*attrflagp = 0;
8993	NFSCL_REQSTART(nd, NFSPROC_GETEXTATTR, vp, cred);
8994	nfsm_strtom(nd, name, strlen(name));
8995	NFSM_BUILD(tl, uint32_t *, NFSX_UNSIGNED);
8996	*tl = txdr_unsigned(NFSV4OP_GETATTR);
8997	NFSGETATTR_ATTRBIT(&attrbits);
8998	nfsrv_putattrbit(nd, &attrbits);
8999	error = nfscl_request(nd, vp, p, cred);
9000	if (error != 0)
9001		return (error);
9002	if (nd->nd_repstat == 0) {
9003		NFSM_DISSECT(tl, uint32_t *, NFSX_UNSIGNED);
9004		len = fxdr_unsigned(uint32_t, *tl);
9005		/* Sanity check lengths. */
9006		if (uiop != NULL && len > 0 && len <= IOSIZE_MAX &&
9007		    uiop->uio_resid <= UINT32_MAX) {
9008			len2 = uiop->uio_resid;
9009			if (len2 >= len)
9010				error = nfsm_mbufuio(nd, uiop, len);
9011			else {
9012				error = nfsm_mbufuio(nd, uiop, len2);
9013				if (error == 0) {
9014					/*
9015					 * nfsm_mbufuio() advances to a multiple
9016					 * of 4, so round up len2 as well.  Then
9017					 * we need to advance over the rest of
9018					 * the data, rounding up the remaining
9019					 * length.
9020					 */
9021					len2 = NFSM_RNDUP(len2);
9022					len2 = NFSM_RNDUP(len - len2);
9023					if (len2 > 0)
9024						error = nfsm_advance(nd, len2,
9025						    -1);
9026				}
9027			}
9028		} else if (uiop == NULL && len > 0) {
9029			/* Just wants the length and not the data. */
9030			error = nfsm_advance(nd, NFSM_RNDUP(len), -1);
9031		} else if (len > 0)
9032			error = ENOATTR;
9033		if (error != 0)
9034			goto nfsmout;
9035		*lenp = len;
9036		/* Just skip over Getattr op status. */
9037		NFSM_DISSECT(tl, uint32_t *, 2 * NFSX_UNSIGNED);
9038		error = nfsm_loadattr(nd, nap);
9039		if (error == 0)
9040			*attrflagp = 1;
9041	}
9042	if (error == 0)
9043		error = nd->nd_repstat;
9044nfsmout:
9045	m_freem(nd->nd_mrep);
9046	return (error);
9047}
9048
9049/*
9050 * The setextattr RPC.
9051 */
9052int
9053nfsrpc_setextattr(vnode_t vp, const char *name, struct uio *uiop,
9054    struct nfsvattr *nap, int *attrflagp, struct ucred *cred, NFSPROC_T *p)
9055{
9056	uint32_t *tl;
9057	int error;
9058	struct nfsrv_descript nfsd;
9059	struct nfsrv_descript *nd = &nfsd;
9060	nfsattrbit_t attrbits;
9061
9062	*attrflagp = 0;
9063	NFSCL_REQSTART(nd, NFSPROC_SETEXTATTR, vp, cred);
9064	if (uiop->uio_resid > nd->nd_maxreq) {
9065		/* nd_maxreq is set by NFSCL_REQSTART(). */
9066		m_freem(nd->nd_mreq);
9067		return (EINVAL);
9068	}
9069	NFSM_BUILD(tl, uint32_t *, NFSX_UNSIGNED);
9070	*tl = txdr_unsigned(NFSV4SXATTR_EITHER);
9071	nfsm_strtom(nd, name, strlen(name));
9072	NFSM_BUILD(tl, uint32_t *, NFSX_UNSIGNED);
9073	*tl = txdr_unsigned(uiop->uio_resid);
9074	error = nfsm_uiombuf(nd, uiop, uiop->uio_resid);
9075	if (error != 0) {
9076		m_freem(nd->nd_mreq);
9077		return (error);
9078	}
9079	NFSM_BUILD(tl, uint32_t *, NFSX_UNSIGNED);
9080	*tl = txdr_unsigned(NFSV4OP_GETATTR);
9081	NFSGETATTR_ATTRBIT(&attrbits);
9082	nfsrv_putattrbit(nd, &attrbits);
9083	error = nfscl_request(nd, vp, p, cred);
9084	if (error != 0)
9085		return (error);
9086	if (nd->nd_repstat == 0) {
9087		/* Just skip over the reply and Getattr op status. */
9088		NFSM_DISSECT(tl, uint32_t *, 2 * NFSX_HYPER + 3 *
9089		    NFSX_UNSIGNED);
9090		error = nfsm_loadattr(nd, nap);
9091		if (error == 0)
9092			*attrflagp = 1;
9093	}
9094	if (error == 0)
9095		error = nd->nd_repstat;
9096nfsmout:
9097	m_freem(nd->nd_mrep);
9098	return (error);
9099}
9100
9101/*
9102 * The removeextattr RPC.
9103 */
9104int
9105nfsrpc_rmextattr(vnode_t vp, const char *name, struct nfsvattr *nap,
9106    int *attrflagp, struct ucred *cred, NFSPROC_T *p)
9107{
9108	uint32_t *tl;
9109	int error;
9110	struct nfsrv_descript nfsd;
9111	struct nfsrv_descript *nd = &nfsd;
9112	nfsattrbit_t attrbits;
9113
9114	*attrflagp = 0;
9115	NFSCL_REQSTART(nd, NFSPROC_RMEXTATTR, vp, cred);
9116	nfsm_strtom(nd, name, strlen(name));
9117	NFSM_BUILD(tl, uint32_t *, NFSX_UNSIGNED);
9118	*tl = txdr_unsigned(NFSV4OP_GETATTR);
9119	NFSGETATTR_ATTRBIT(&attrbits);
9120	nfsrv_putattrbit(nd, &attrbits);
9121	error = nfscl_request(nd, vp, p, cred);
9122	if (error != 0)
9123		return (error);
9124	if (nd->nd_repstat == 0) {
9125		/* Just skip over the reply and Getattr op status. */
9126		NFSM_DISSECT(tl, uint32_t *, 2 * NFSX_HYPER + 3 *
9127		    NFSX_UNSIGNED);
9128		error = nfsm_loadattr(nd, nap);
9129		if (error == 0)
9130			*attrflagp = 1;
9131	}
9132	if (error == 0)
9133		error = nd->nd_repstat;
9134nfsmout:
9135	m_freem(nd->nd_mrep);
9136	return (error);
9137}
9138
9139/*
9140 * The listextattr RPC.
9141 */
9142int
9143nfsrpc_listextattr(vnode_t vp, uint64_t *cookiep, struct uio *uiop,
9144    size_t *lenp, bool *eofp, struct nfsvattr *nap, int *attrflagp,
9145    struct ucred *cred, NFSPROC_T *p)
9146{
9147	uint32_t *tl;
9148	int cnt, error, i, len;
9149	struct nfsrv_descript nfsd;
9150	struct nfsrv_descript *nd = &nfsd;
9151	nfsattrbit_t attrbits;
9152	u_char c;
9153
9154	*attrflagp = 0;
9155	NFSCL_REQSTART(nd, NFSPROC_LISTEXTATTR, vp, cred);
9156	NFSM_BUILD(tl, uint32_t *, NFSX_HYPER + 2 * NFSX_UNSIGNED);
9157	txdr_hyper(*cookiep, tl); tl += 2;
9158	*tl++ = txdr_unsigned(*lenp);
9159	*tl = txdr_unsigned(NFSV4OP_GETATTR);
9160	NFSGETATTR_ATTRBIT(&attrbits);
9161	nfsrv_putattrbit(nd, &attrbits);
9162	error = nfscl_request(nd, vp, p, cred);
9163	if (error != 0)
9164		return (error);
9165	*eofp = true;
9166	*lenp = 0;
9167	if (nd->nd_repstat == 0) {
9168		NFSM_DISSECT(tl, uint32_t *, NFSX_HYPER + NFSX_UNSIGNED);
9169		*cookiep = fxdr_hyper(tl); tl += 2;
9170		cnt = fxdr_unsigned(int, *tl);
9171		if (cnt < 0) {
9172			error = EBADRPC;
9173			goto nfsmout;
9174		}
9175		for (i = 0; i < cnt; i++) {
9176			NFSM_DISSECT(tl, uint32_t *, NFSX_UNSIGNED);
9177			len = fxdr_unsigned(int, *tl);
9178			if (len <= 0 || len > EXTATTR_MAXNAMELEN) {
9179				error = EBADRPC;
9180				goto nfsmout;
9181			}
9182			if (uiop == NULL)
9183				error = nfsm_advance(nd, NFSM_RNDUP(len), -1);
9184			else if (uiop->uio_resid >= len + 1) {
9185				c = len;
9186				error = uiomove(&c, sizeof(c), uiop);
9187				if (error == 0)
9188					error = nfsm_mbufuio(nd, uiop, len);
9189			} else {
9190				error = nfsm_advance(nd, NFSM_RNDUP(len), -1);
9191				*eofp = false;
9192			}
9193			if (error != 0)
9194				goto nfsmout;
9195			*lenp += (len + 1);
9196		}
9197		/* Get the eof and skip over the Getattr op status. */
9198		NFSM_DISSECT(tl, uint32_t *, 3 * NFSX_UNSIGNED);
9199		/*
9200		 * *eofp is set false above, because it wasn't able to copy
9201		 * all of the reply.
9202		 */
9203		if (*eofp && *tl == 0)
9204			*eofp = false;
9205		error = nfsm_loadattr(nd, nap);
9206		if (error == 0)
9207			*attrflagp = 1;
9208	}
9209	if (error == 0)
9210		error = nd->nd_repstat;
9211nfsmout:
9212	m_freem(nd->nd_mrep);
9213	return (error);
9214}
9215
9216/*
9217 * Split an mbuf list.  For non-M_EXTPG mbufs, just use m_split().
9218 */
9219static struct mbuf *
9220nfsm_split(struct mbuf *mp, uint64_t xfer)
9221{
9222	struct mbuf *m, *m2;
9223	vm_page_t pg;
9224	int i, j, left, pgno, plen, trim;
9225	char *cp, *cp2;
9226
9227	if ((mp->m_flags & M_EXTPG) == 0) {
9228		m = m_split(mp, xfer, M_WAITOK);
9229		return (m);
9230	}
9231
9232	/* Find the correct mbuf to split at. */
9233	for (m = mp; m != NULL && xfer > m->m_len; m = m->m_next)
9234		xfer -= m->m_len;
9235	if (m == NULL)
9236		return (NULL);
9237
9238	/* If xfer == m->m_len, we can just split the mbuf list. */
9239	if (xfer == m->m_len) {
9240		m2 = m->m_next;
9241		m->m_next = NULL;
9242		return (m2);
9243	}
9244
9245	/* Find the page to split at. */
9246	pgno = 0;
9247	left = xfer;
9248	do {
9249		if (pgno == 0)
9250			plen = m_epg_pagelen(m, 0, m->m_epg_1st_off);
9251		else
9252			plen = m_epg_pagelen(m, pgno, 0);
9253		if (left <= plen)
9254			break;
9255		left -= plen;
9256		pgno++;
9257	} while (pgno < m->m_epg_npgs);
9258	if (pgno == m->m_epg_npgs)
9259		panic("nfsm_split: eroneous ext_pgs mbuf");
9260
9261	m2 = mb_alloc_ext_pgs(M_WAITOK, mb_free_mext_pgs);
9262	m2->m_epg_flags |= EPG_FLAG_ANON;
9263
9264	/*
9265	 * If left < plen, allocate a new page for the new mbuf
9266	 * and copy the data after left in the page to this new
9267	 * page.
9268	 */
9269	if (left < plen) {
9270		pg = vm_page_alloc_noobj(VM_ALLOC_WAITOK | VM_ALLOC_NODUMP |
9271		    VM_ALLOC_WIRED);
9272		m2->m_epg_pa[0] = VM_PAGE_TO_PHYS(pg);
9273		m2->m_epg_npgs = 1;
9274
9275		/* Copy the data after left to the new page. */
9276		trim = plen - left;
9277		cp = (char *)(void *)PHYS_TO_DMAP(m->m_epg_pa[pgno]);
9278		if (pgno == 0)
9279			cp += m->m_epg_1st_off;
9280		cp += left;
9281		cp2 = (char *)(void *)PHYS_TO_DMAP(m2->m_epg_pa[0]);
9282		if (pgno == m->m_epg_npgs - 1)
9283			m2->m_epg_last_len = trim;
9284		else {
9285			cp2 += PAGE_SIZE - trim;
9286			m2->m_epg_1st_off = PAGE_SIZE - trim;
9287			m2->m_epg_last_len = m->m_epg_last_len;
9288		}
9289		memcpy(cp2, cp, trim);
9290		m2->m_len = trim;
9291	} else {
9292		m2->m_len = 0;
9293		m2->m_epg_last_len = m->m_epg_last_len;
9294	}
9295
9296	/* Move the pages beyond pgno to the new mbuf. */
9297	for (i = pgno + 1, j = m2->m_epg_npgs; i < m->m_epg_npgs; i++, j++) {
9298		m2->m_epg_pa[j] = m->m_epg_pa[i];
9299		/* Never moves page 0. */
9300		m2->m_len += m_epg_pagelen(m, i, 0);
9301	}
9302	m2->m_epg_npgs = j;
9303	m->m_epg_npgs = pgno + 1;
9304	m->m_epg_last_len = left;
9305	m->m_len = xfer;
9306
9307	m2->m_next = m->m_next;
9308	m->m_next = NULL;
9309	return (m2);
9310}
9311
9312/*
9313 * Do the NFSv4.1 Bind Connection to Session.
9314 * Called from the reconnect layer of the krpc (sys/rpc/clnt_rc.c).
9315 */
9316void
9317nfsrpc_bindconnsess(CLIENT *cl, void *arg, struct ucred *cr)
9318{
9319	struct nfscl_reconarg *rcp = (struct nfscl_reconarg *)arg;
9320	uint32_t res, *tl;
9321	struct nfsrv_descript nfsd;
9322	struct nfsrv_descript *nd = &nfsd;
9323	struct rpc_callextra ext;
9324	struct timeval utimeout;
9325	enum clnt_stat stat;
9326	int error;
9327
9328	nfscl_reqstart(nd, NFSPROC_BINDCONNTOSESS, NULL, NULL, 0, NULL, NULL,
9329	    NFS_VER4, rcp->minorvers, NULL);
9330	NFSM_BUILD(tl, uint32_t *, NFSX_V4SESSIONID + 2 * NFSX_UNSIGNED);
9331	memcpy(tl, rcp->sessionid, NFSX_V4SESSIONID);
9332	tl += NFSX_V4SESSIONID / NFSX_UNSIGNED;
9333	*tl++ = txdr_unsigned(NFSCDFC4_FORE_OR_BOTH);
9334	*tl = newnfs_false;
9335
9336	memset(&ext, 0, sizeof(ext));
9337	utimeout.tv_sec = 30;
9338	utimeout.tv_usec = 0;
9339	ext.rc_auth = authunix_create(cr);
9340	nd->nd_mrep = NULL;
9341	stat = CLNT_CALL_MBUF(cl, &ext, NFSV4PROC_COMPOUND, nd->nd_mreq,
9342	    &nd->nd_mrep, utimeout);
9343	AUTH_DESTROY(ext.rc_auth);
9344	if (stat != RPC_SUCCESS) {
9345		printf("nfsrpc_bindconnsess: call failed stat=%d\n", stat);
9346		return;
9347	}
9348	if (nd->nd_mrep == NULL) {
9349		printf("nfsrpc_bindconnsess: no reply args\n");
9350		return;
9351	}
9352	error = 0;
9353	newnfs_realign(&nd->nd_mrep, M_WAITOK);
9354	nd->nd_md = nd->nd_mrep;
9355	nd->nd_dpos = mtod(nd->nd_md, char *);
9356	NFSM_DISSECT(tl, uint32_t *, 2 * NFSX_UNSIGNED);
9357	nd->nd_repstat = fxdr_unsigned(uint32_t, *tl++);
9358	if (nd->nd_repstat == NFSERR_OK) {
9359		res = fxdr_unsigned(uint32_t, *tl);
9360		if (res > 0 && (error = nfsm_advance(nd, NFSM_RNDUP(res),
9361		    -1)) != 0)
9362			goto nfsmout;
9363		NFSM_DISSECT(tl, uint32_t *, NFSX_V4SESSIONID +
9364		    4 * NFSX_UNSIGNED);
9365		tl += 3;
9366		if (!NFSBCMP(tl, rcp->sessionid, NFSX_V4SESSIONID)) {
9367			tl += NFSX_V4SESSIONID / NFSX_UNSIGNED;
9368			res = fxdr_unsigned(uint32_t, *tl);
9369			if (res != NFSCDFS4_BOTH)
9370				printf("nfsrpc_bindconnsess: did not "
9371				    "return FS4_BOTH\n");
9372		} else
9373			printf("nfsrpc_bindconnsess: not same "
9374			    "sessionid\n");
9375	} else if (nd->nd_repstat != NFSERR_BADSESSION)
9376		printf("nfsrpc_bindconnsess: returned %d\n", nd->nd_repstat);
9377nfsmout:
9378	if (error != 0)
9379		printf("nfsrpc_bindconnsess: reply bad xdr\n");
9380	m_freem(nd->nd_mrep);
9381}
9382
9383/*
9384 * Do roughly what nfs_statfs() does for NFSv4, but when called with a shared
9385 * locked vnode.
9386 */
9387static void
9388nfscl_statfs(struct vnode *vp, struct ucred *cred, NFSPROC_T *td)
9389{
9390	struct nfsvattr nfsva;
9391	struct nfsfsinfo fs;
9392	struct nfsstatfs sb;
9393	struct mount *mp;
9394	struct nfsmount *nmp;
9395	uint32_t lease;
9396	int attrflag, error;
9397
9398	mp = vp->v_mount;
9399	nmp = VFSTONFS(mp);
9400	error = nfsrpc_statfs(vp, &sb, &fs, &lease, cred, td, &nfsva,
9401	    &attrflag);
9402	if (attrflag != 0)
9403		(void) nfscl_loadattrcache(&vp, &nfsva, NULL, 0, 1);
9404	if (error == 0) {
9405		NFSLOCKCLSTATE();
9406		if (nmp->nm_clp != NULL)
9407			nmp->nm_clp->nfsc_renew = NFSCL_RENEW(lease);
9408		NFSUNLOCKCLSTATE();
9409		mtx_lock(&nmp->nm_mtx);
9410		nfscl_loadfsinfo(nmp, &fs);
9411		nfscl_loadsbinfo(nmp, &sb, &mp->mnt_stat);
9412		mp->mnt_stat.f_iosize = newnfs_iosize(nmp);
9413		mtx_unlock(&nmp->nm_mtx);
9414	}
9415}
9416