nfs_nfsdport.c revision 241194
1177595Sweongyo/*-
2177595Sweongyo * Copyright (c) 1989, 1993
3177595Sweongyo *	The Regents of the University of California.  All rights reserved.
4177595Sweongyo *
5177595Sweongyo * This code is derived from software contributed to Berkeley by
6177595Sweongyo * Rick Macklem at The University of Guelph.
7177595Sweongyo *
8177595Sweongyo * Redistribution and use in source and binary forms, with or without
9177595Sweongyo * modification, are permitted provided that the following conditions
10177595Sweongyo * are met:
11177595Sweongyo * 1. Redistributions of source code must retain the above copyright
12177595Sweongyo *    notice, this list of conditions and the following disclaimer.
13177595Sweongyo * 2. Redistributions in binary form must reproduce the above copyright
14177595Sweongyo *    notice, this list of conditions and the following disclaimer in the
15177595Sweongyo *    documentation and/or other materials provided with the distribution.
16177595Sweongyo * 4. Neither the name of the University nor the names of its contributors
17177595Sweongyo *    may be used to endorse or promote products derived from this software
18177595Sweongyo *    without specific prior written permission.
19177595Sweongyo *
20177595Sweongyo * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
21177595Sweongyo * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
22177595Sweongyo * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
23177595Sweongyo * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
24177595Sweongyo * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
25177595Sweongyo * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
26177595Sweongyo * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
27177595Sweongyo * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
28177595Sweongyo * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
29177595Sweongyo * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
30177595Sweongyo * SUCH DAMAGE.
31177595Sweongyo *
32177595Sweongyo */
33177595Sweongyo
34177595Sweongyo#include <sys/cdefs.h>
35177595Sweongyo__FBSDID("$FreeBSD: stable/9/sys/fs/nfsserver/nfs_nfsdport.c 241194 2012-10-04 12:43:45Z rmacklem $");
36177595Sweongyo
37178354Ssam#include <sys/capability.h>
38178354Ssam
39177595Sweongyo/*
40177595Sweongyo * Functions that perform the vfs operations required by the routines in
41177595Sweongyo * nfsd_serv.c. It is hoped that this change will make the server more
42177595Sweongyo * portable.
43177595Sweongyo */
44177595Sweongyo
45177595Sweongyo#include <fs/nfs/nfsport.h>
46177595Sweongyo#include <sys/hash.h>
47177595Sweongyo#include <sys/sysctl.h>
48177595Sweongyo#include <nlm/nlm_prot.h>
49177595Sweongyo#include <nlm/nlm.h>
50177595Sweongyo
51177595SweongyoFEATURE(nfsd, "NFSv4 server");
52177595Sweongyo
53177595Sweongyoextern u_int32_t newnfs_true, newnfs_false, newnfs_xdrneg1;
54177595Sweongyoextern int nfsrv_useacl;
55177595Sweongyoextern int newnfs_numnfsd;
56177595Sweongyoextern struct mount nfsv4root_mnt;
57177595Sweongyoextern struct nfsrv_stablefirst nfsrv_stablefirst;
58177595Sweongyoextern void (*nfsd_call_servertimer)(void);
59177595Sweongyoextern SVCPOOL	*nfsrvd_pool;
60177595Sweongyostruct vfsoptlist nfsv4root_opt, nfsv4root_newopt;
61177595SweongyoNFSDLOCKMUTEX;
62177595Sweongyostruct mtx nfs_cache_mutex;
63177595Sweongyostruct mtx nfs_v4root_mutex;
64177595Sweongyostruct nfsrvfh nfs_rootfh, nfs_pubfh;
65177595Sweongyoint nfs_pubfhset = 0, nfs_rootfhset = 0;
66177595Sweongyostruct proc *nfsd_master_proc = NULL;
67177595Sweongyostatic pid_t nfsd_master_pid = (pid_t)-1;
68177595Sweongyostatic char nfsd_master_comm[MAXCOMLEN + 1];
69177595Sweongyostatic struct timeval nfsd_master_start;
70177595Sweongyostatic uint32_t nfsv4_sysid = 0;
71177595Sweongyo
72177595Sweongyostatic int nfssvc_srvcall(struct thread *, struct nfssvc_args *,
73177595Sweongyo    struct ucred *);
74177595Sweongyo
75177595Sweongyoint nfsrv_enable_crossmntpt = 1;
76177595Sweongyostatic int nfs_commit_blks;
77177595Sweongyostatic int nfs_commit_miss;
78177595Sweongyoextern int nfsrv_issuedelegs;
79177595Sweongyoextern int nfsrv_dolocallocks;
80177595Sweongyo
81177595SweongyoSYSCTL_NODE(_vfs, OID_AUTO, nfsd, CTLFLAG_RW, 0, "New NFS server");
82177595SweongyoSYSCTL_INT(_vfs_nfsd, OID_AUTO, mirrormnt, CTLFLAG_RW,
83177595Sweongyo    &nfsrv_enable_crossmntpt, 0, "Enable nfsd to cross mount points");
84177595SweongyoSYSCTL_INT(_vfs_nfsd, OID_AUTO, commit_blks, CTLFLAG_RW, &nfs_commit_blks,
85177595Sweongyo    0, "");
86177595SweongyoSYSCTL_INT(_vfs_nfsd, OID_AUTO, commit_miss, CTLFLAG_RW, &nfs_commit_miss,
87177595Sweongyo    0, "");
88177595SweongyoSYSCTL_INT(_vfs_nfsd, OID_AUTO, issue_delegations, CTLFLAG_RW,
89177595Sweongyo    &nfsrv_issuedelegs, 0, "Enable nfsd to issue delegations");
90177595SweongyoSYSCTL_INT(_vfs_nfsd, OID_AUTO, enable_locallocks, CTLFLAG_RW,
91177595Sweongyo    &nfsrv_dolocallocks, 0, "Enable nfsd to acquire local locks on files");
92177595Sweongyo
93177595Sweongyo#define	MAX_REORDERED_RPC	16
94177595Sweongyo#define	NUM_HEURISTIC		1031
95177595Sweongyo#define	NHUSE_INIT		64
96177595Sweongyo#define	NHUSE_INC		16
97177595Sweongyo#define	NHUSE_MAX		2048
98177595Sweongyo
99177595Sweongyostatic struct nfsheur {
100177595Sweongyo	struct vnode *nh_vp;	/* vp to match (unreferenced pointer) */
101177595Sweongyo	off_t nh_nextoff;	/* next offset for sequential detection */
102177595Sweongyo	int nh_use;		/* use count for selection */
103177595Sweongyo	int nh_seqcount;	/* heuristic */
104177595Sweongyo} nfsheur[NUM_HEURISTIC];
105177595Sweongyo
106177595Sweongyo
107177595Sweongyo/*
108177595Sweongyo * Heuristic to detect sequential operation.
109177595Sweongyo */
110177595Sweongyostatic struct nfsheur *
111177595Sweongyonfsrv_sequential_heuristic(struct uio *uio, struct vnode *vp)
112177595Sweongyo{
113177595Sweongyo	struct nfsheur *nh;
114177595Sweongyo	int hi, try;
115177595Sweongyo
116177595Sweongyo	/* Locate best candidate. */
117177595Sweongyo	try = 32;
118177595Sweongyo	hi = ((int)(vm_offset_t)vp / sizeof(struct vnode)) % NUM_HEURISTIC;
119177595Sweongyo	nh = &nfsheur[hi];
120177595Sweongyo	while (try--) {
121177595Sweongyo		if (nfsheur[hi].nh_vp == vp) {
122177595Sweongyo			nh = &nfsheur[hi];
123177595Sweongyo			break;
124177595Sweongyo		}
125177595Sweongyo		if (nfsheur[hi].nh_use > 0)
126227293Sed			--nfsheur[hi].nh_use;
127177595Sweongyo		hi = (hi + 1) % NUM_HEURISTIC;
128228621Sbschmidt		if (nfsheur[hi].nh_use < nh->nh_use)
129228621Sbschmidt			nh = &nfsheur[hi];
130228621Sbschmidt	}
131228621Sbschmidt
132178354Ssam	/* Initialize hint if this is a new file. */
133177595Sweongyo	if (nh->nh_vp != vp) {
134177595Sweongyo		nh->nh_vp = vp;
135177595Sweongyo		nh->nh_nextoff = uio->uio_offset;
136177595Sweongyo		nh->nh_use = NHUSE_INIT;
137177595Sweongyo		if (uio->uio_offset == 0)
138199559Sjhb			nh->nh_seqcount = 4;
139177595Sweongyo		else
140177595Sweongyo			nh->nh_seqcount = 1;
141178354Ssam	}
142177595Sweongyo
143177595Sweongyo	/* Calculate heuristic. */
144177595Sweongyo	if ((uio->uio_offset == 0 && nh->nh_seqcount > 0) ||
145177595Sweongyo	    uio->uio_offset == nh->nh_nextoff) {
146177595Sweongyo		/* See comments in vfs_vnops.c:sequential_heuristic(). */
147177595Sweongyo		nh->nh_seqcount += howmany(uio->uio_resid, 16384);
148177595Sweongyo		if (nh->nh_seqcount > IO_SEQMAX)
149177595Sweongyo			nh->nh_seqcount = IO_SEQMAX;
150177595Sweongyo	} else if (qabs(uio->uio_offset - nh->nh_nextoff) <= MAX_REORDERED_RPC *
151177595Sweongyo	    imax(vp->v_mount->mnt_stat.f_iosize, uio->uio_resid)) {
152177595Sweongyo		/* Probably a reordered RPC, leave seqcount alone. */
153177595Sweongyo	} else if (nh->nh_seqcount > 1) {
154177595Sweongyo		nh->nh_seqcount /= 2;
155177595Sweongyo	} else {
156177595Sweongyo		nh->nh_seqcount = 0;
157177595Sweongyo	}
158177595Sweongyo	nh->nh_use += NHUSE_INC;
159177595Sweongyo	if (nh->nh_use > NHUSE_MAX)
160177595Sweongyo		nh->nh_use = NHUSE_MAX;
161177595Sweongyo	return (nh);
162177595Sweongyo}
163177595Sweongyo
164177595Sweongyo/*
165177595Sweongyo * Get attributes into nfsvattr structure.
166177595Sweongyo */
167177595Sweongyoint
168177595Sweongyonfsvno_getattr(struct vnode *vp, struct nfsvattr *nvap, struct ucred *cred,
169177595Sweongyo    struct thread *p, int vpislocked)
170177595Sweongyo{
171205843Simp	int error, lockedit = 0;
172278808Smarius
173177595Sweongyo	if (vpislocked == 0) {
174177595Sweongyo		/*
175177595Sweongyo		 * When vpislocked == 0, the vnode is either exclusively
176177595Sweongyo		 * locked by this thread or not locked by this thread.
177177595Sweongyo		 * As such, shared lock it, if not exclusively locked.
178177595Sweongyo		 */
179177595Sweongyo		if (NFSVOPISLOCKED(vp) != LK_EXCLUSIVE) {
180190526Ssam			lockedit = 1;
181178354Ssam			NFSVOPLOCK(vp, LK_SHARED | LK_RETRY);
182177595Sweongyo		}
183177595Sweongyo	}
184177595Sweongyo	error = VOP_GETATTR(vp, &nvap->na_vattr, cred);
185177595Sweongyo	if (lockedit != 0)
186178354Ssam		NFSVOPUNLOCK(vp, 0);
187177595Sweongyo
188177595Sweongyo	NFSEXITCODE(error);
189177595Sweongyo	return (error);
190177595Sweongyo}
191178354Ssam
192177595Sweongyo/*
193177595Sweongyo * Get a file handle for a vnode.
194199559Sjhb */
195177595Sweongyoint
196177595Sweongyonfsvno_getfh(struct vnode *vp, fhandle_t *fhp, struct thread *p)
197177595Sweongyo{
198177595Sweongyo	int error;
199177595Sweongyo
200177595Sweongyo	NFSBZERO((caddr_t)fhp, sizeof(fhandle_t));
201177595Sweongyo	fhp->fh_fsid = vp->v_mount->mnt_stat.f_fsid;
202177595Sweongyo	error = VOP_VPTOFH(vp, &fhp->fh_fid);
203177595Sweongyo
204177595Sweongyo	NFSEXITCODE(error);
205177595Sweongyo	return (error);
206177595Sweongyo}
207177595Sweongyo
208177595Sweongyo/*
209178354Ssam * Perform access checking for vnodes obtained from file handles that would
210178354Ssam * refer to files already opened by a Unix client. You cannot just use
211178354Ssam * vn_writechk() and VOP_ACCESSX() for two reasons.
212178354Ssam * 1 - You must check for exported rdonly as well as MNT_RDONLY for the write
213178354Ssam *     case.
214178354Ssam * 2 - The owner is to be given access irrespective of mode bits for some
215178354Ssam *     operations, so that processes that chmod after opening a file don't
216178354Ssam *     break.
217178354Ssam */
218178354Ssamint
219178354Ssamnfsvno_accchk(struct vnode *vp, accmode_t accmode, struct ucred *cred,
220178354Ssam    struct nfsexstuff *exp, struct thread *p, int override, int vpislocked,
221178354Ssam    u_int32_t *supportedtypep)
222178354Ssam{
223178354Ssam	struct vattr vattr;
224178354Ssam	int error = 0, getret = 0;
225178354Ssam
226178354Ssam	if (vpislocked == 0) {
227178354Ssam		if (NFSVOPLOCK(vp, LK_SHARED) != 0) {
228178354Ssam			error = EPERM;
229178354Ssam			goto out;
230178354Ssam		}
231178354Ssam	}
232178354Ssam	if (accmode & VWRITE) {
233178354Ssam		/* Just vn_writechk() changed to check rdonly */
234178354Ssam		/*
235178354Ssam		 * Disallow write attempts on read-only file systems;
236178354Ssam		 * unless the file is a socket or a block or character
237178354Ssam		 * device resident on the file system.
238178354Ssam		 */
239178354Ssam		if (NFSVNO_EXRDONLY(exp) ||
240178354Ssam		    (vp->v_mount->mnt_flag & MNT_RDONLY)) {
241178354Ssam			switch (vp->v_type) {
242178354Ssam			case VREG:
243178354Ssam			case VDIR:
244178354Ssam			case VLNK:
245178354Ssam				error = EROFS;
246178354Ssam			default:
247178354Ssam				break;
248177595Sweongyo			}
249177595Sweongyo		}
250177595Sweongyo		/*
251177595Sweongyo		 * If there's shared text associated with
252177595Sweongyo		 * the inode, try to free it up once.  If
253177595Sweongyo		 * we fail, we can't allow writing.
254177595Sweongyo		 */
255177595Sweongyo		if ((vp->v_vflag & VV_TEXT) != 0 && error == 0)
256177595Sweongyo			error = ETXTBSY;
257177595Sweongyo	}
258177595Sweongyo	if (error != 0) {
259177595Sweongyo		if (vpislocked == 0)
260177595Sweongyo			NFSVOPUNLOCK(vp, 0);
261178354Ssam		goto out;
262178354Ssam	}
263190552Sweongyo
264177595Sweongyo	/*
265177595Sweongyo	 * Should the override still be applied when ACLs are enabled?
266177595Sweongyo	 */
267177595Sweongyo	error = VOP_ACCESSX(vp, accmode, cred, p);
268177595Sweongyo	if (error != 0 && (accmode & (VDELETE | VDELETE_CHILD))) {
269177595Sweongyo		/*
270177595Sweongyo		 * Try again with VEXPLICIT_DENY, to see if the test for
271177595Sweongyo		 * deletion is supported.
272177595Sweongyo		 */
273177595Sweongyo		error = VOP_ACCESSX(vp, accmode | VEXPLICIT_DENY, cred, p);
274177595Sweongyo		if (error == 0) {
275177595Sweongyo			if (vp->v_type == VDIR) {
276177595Sweongyo				accmode &= ~(VDELETE | VDELETE_CHILD);
277177595Sweongyo				accmode |= VWRITE;
278207554Ssobomax				error = VOP_ACCESSX(vp, accmode, cred, p);
279207554Ssobomax			} else if (supportedtypep != NULL) {
280177595Sweongyo				*supportedtypep &= ~NFSACCESS_DELETE;
281177595Sweongyo			}
282177595Sweongyo		}
283177595Sweongyo	}
284177595Sweongyo
285177595Sweongyo	/*
286177595Sweongyo	 * Allow certain operations for the owner (reads and writes
287178957Ssam	 * on files that are already open).
288178957Ssam	 */
289177595Sweongyo	if (override != NFSACCCHK_NOOVERRIDE &&
290177595Sweongyo	    (error == EPERM || error == EACCES)) {
291177595Sweongyo		if (cred->cr_uid == 0 && (override & NFSACCCHK_ALLOWROOT))
292177595Sweongyo			error = 0;
293177595Sweongyo		else if (override & NFSACCCHK_ALLOWOWNER) {
294177595Sweongyo			getret = VOP_GETATTR(vp, &vattr, cred);
295177595Sweongyo			if (getret == 0 && cred->cr_uid == vattr.va_uid)
296177595Sweongyo				error = 0;
297177595Sweongyo		}
298177595Sweongyo	}
299177595Sweongyo	if (vpislocked == 0)
300177595Sweongyo		NFSVOPUNLOCK(vp, 0);
301177595Sweongyo
302177595Sweongyoout:
303178354Ssam	NFSEXITCODE(error);
304177595Sweongyo	return (error);
305177595Sweongyo}
306190526Ssam
307177595Sweongyo/*
308178354Ssam * Set attribute(s) vnop.
309178354Ssam */
310178354Ssamint
311177595Sweongyonfsvno_setattr(struct vnode *vp, struct nfsvattr *nvap, struct ucred *cred,
312177595Sweongyo    struct thread *p, struct nfsexstuff *exp)
313177595Sweongyo{
314177595Sweongyo	int error;
315177595Sweongyo
316177595Sweongyo	error = VOP_SETATTR(vp, &nvap->na_vattr, cred);
317177595Sweongyo	NFSEXITCODE(error);
318177595Sweongyo	return (error);
319192468Ssam}
320192468Ssam
321192468Ssam/*
322192468Ssam * Set up nameidata for a lookup() call and do it.
323192468Ssam */
324177595Sweongyoint
325177595Sweongyonfsvno_namei(struct nfsrv_descript *nd, struct nameidata *ndp,
326177595Sweongyo    struct vnode *dp, int islocked, struct nfsexstuff *exp, struct thread *p,
327177595Sweongyo    struct vnode **retdirp)
328177595Sweongyo{
329177595Sweongyo	struct componentname *cnp = &ndp->ni_cnd;
330177595Sweongyo	int i;
331177595Sweongyo	struct iovec aiov;
332178354Ssam	struct uio auio;
333177595Sweongyo	int lockleaf = (cnp->cn_flags & LOCKLEAF) != 0, linklen;
334177595Sweongyo	int error = 0, crossmnt;
335190552Sweongyo	char *cp;
336190552Sweongyo
337177595Sweongyo	*retdirp = NULL;
338177595Sweongyo	cnp->cn_nameptr = cnp->cn_pnbuf;
339177595Sweongyo	ndp->ni_strictrelative = 0;
340177595Sweongyo	/*
341177595Sweongyo	 * Extract and set starting directory.
342177595Sweongyo	 */
343177595Sweongyo	if (dp->v_type != VDIR) {
344177595Sweongyo		if (islocked)
345177595Sweongyo			vput(dp);
346178354Ssam		else
347228621Sbschmidt			vrele(dp);
348228621Sbschmidt		nfsvno_relpathbuf(ndp);
349228621Sbschmidt		error = ENOTDIR;
350228621Sbschmidt		goto out1;
351178354Ssam	}
352178354Ssam	if (islocked)
353178354Ssam		NFSVOPUNLOCK(dp, 0);
354178354Ssam	VREF(dp);
355178354Ssam	*retdirp = dp;
356178354Ssam	if (NFSVNO_EXRDONLY(exp))
357178354Ssam		cnp->cn_flags |= RDONLY;
358178354Ssam	ndp->ni_segflg = UIO_SYSSPACE;
359178354Ssam	crossmnt = 1;
360178354Ssam
361178354Ssam	if (nd->nd_flag & ND_PUBLOOKUP) {
362178354Ssam		ndp->ni_loopcnt = 0;
363178354Ssam		if (cnp->cn_pnbuf[0] == '/') {
364178354Ssam			vrele(dp);
365178354Ssam			/*
366178354Ssam			 * Check for degenerate pathnames here, since lookup()
367178354Ssam			 * panics on them.
368178354Ssam			 */
369178354Ssam			for (i = 1; i < ndp->ni_pathlen; i++)
370178354Ssam				if (cnp->cn_pnbuf[i] != '/')
371178354Ssam					break;
372178354Ssam			if (i == ndp->ni_pathlen) {
373178354Ssam				error = NFSERR_ACCES;
374178354Ssam				goto out;
375178354Ssam			}
376178354Ssam			dp = rootvnode;
377178354Ssam			VREF(dp);
378178354Ssam		}
379178354Ssam	} else if ((nfsrv_enable_crossmntpt == 0 && NFSVNO_EXPORTED(exp)) ||
380178354Ssam	    (nd->nd_flag & ND_NFSV4) == 0) {
381178354Ssam		/*
382178354Ssam		 * Only cross mount points for NFSv4 when doing a
383178354Ssam		 * mount while traversing the file system above
384178354Ssam		 * the mount point, unless nfsrv_enable_crossmntpt is set.
385178354Ssam		 */
386178354Ssam		cnp->cn_flags |= NOCROSSMOUNT;
387178354Ssam		crossmnt = 0;
388178354Ssam	}
389178354Ssam
390178354Ssam	/*
391178354Ssam	 * Initialize for scan, set ni_startdir and bump ref on dp again
392178354Ssam	 * becuase lookup() will dereference ni_startdir.
393178354Ssam	 */
394178354Ssam
395178354Ssam	cnp->cn_thread = p;
396178354Ssam	ndp->ni_startdir = dp;
397178354Ssam	ndp->ni_rootdir = rootvnode;
398178354Ssam	ndp->ni_topdir = NULL;
399178354Ssam
400178354Ssam	if (!lockleaf)
401177595Sweongyo		cnp->cn_flags |= LOCKLEAF;
402177595Sweongyo	for (;;) {
403177595Sweongyo		cnp->cn_nameptr = cnp->cn_pnbuf;
404177595Sweongyo		/*
405177595Sweongyo		 * Call lookup() to do the real work.  If an error occurs,
406177595Sweongyo		 * ndp->ni_vp and ni_dvp are left uninitialized or NULL and
407177595Sweongyo		 * we do not have to dereference anything before returning.
408177595Sweongyo		 * In either case ni_startdir will be dereferenced and NULLed
409177595Sweongyo		 * out.
410177595Sweongyo		 */
411177595Sweongyo		error = lookup(ndp);
412177595Sweongyo		if (error)
413177595Sweongyo			break;
414177595Sweongyo
415177595Sweongyo		/*
416177595Sweongyo		 * Check for encountering a symbolic link.  Trivial
417177595Sweongyo		 * termination occurs if no symlink encountered.
418177595Sweongyo		 */
419177595Sweongyo		if ((cnp->cn_flags & ISSYMLINK) == 0) {
420177595Sweongyo			if ((cnp->cn_flags & (SAVENAME | SAVESTART)) == 0)
421177595Sweongyo				nfsvno_relpathbuf(ndp);
422177595Sweongyo			if (ndp->ni_vp && !lockleaf)
423177595Sweongyo				NFSVOPUNLOCK(ndp->ni_vp, 0);
424177595Sweongyo			break;
425177595Sweongyo		}
426177595Sweongyo
427177595Sweongyo		/*
428177595Sweongyo		 * Validate symlink
429177595Sweongyo		 */
430177595Sweongyo		if ((cnp->cn_flags & LOCKPARENT) && ndp->ni_pathlen == 1)
431177595Sweongyo			NFSVOPUNLOCK(ndp->ni_dvp, 0);
432177595Sweongyo		if (!(nd->nd_flag & ND_PUBLOOKUP)) {
433177595Sweongyo			error = EINVAL;
434177595Sweongyo			goto badlink2;
435177595Sweongyo		}
436177595Sweongyo
437177595Sweongyo		if (ndp->ni_loopcnt++ >= MAXSYMLINKS) {
438177595Sweongyo			error = ELOOP;
439177595Sweongyo			goto badlink2;
440177595Sweongyo		}
441177595Sweongyo		if (ndp->ni_pathlen > 1)
442177595Sweongyo			cp = uma_zalloc(namei_zone, M_WAITOK);
443177595Sweongyo		else
444177595Sweongyo			cp = cnp->cn_pnbuf;
445177595Sweongyo		aiov.iov_base = cp;
446177595Sweongyo		aiov.iov_len = MAXPATHLEN;
447177595Sweongyo		auio.uio_iov = &aiov;
448177595Sweongyo		auio.uio_iovcnt = 1;
449177595Sweongyo		auio.uio_offset = 0;
450177595Sweongyo		auio.uio_rw = UIO_READ;
451177595Sweongyo		auio.uio_segflg = UIO_SYSSPACE;
452177595Sweongyo		auio.uio_td = NULL;
453177595Sweongyo		auio.uio_resid = MAXPATHLEN;
454177595Sweongyo		error = VOP_READLINK(ndp->ni_vp, &auio, cnp->cn_cred);
455177595Sweongyo		if (error) {
456177595Sweongyo		badlink1:
457177595Sweongyo			if (ndp->ni_pathlen > 1)
458177595Sweongyo				uma_zfree(namei_zone, cp);
459177595Sweongyo		badlink2:
460177595Sweongyo			vrele(ndp->ni_dvp);
461177595Sweongyo			vput(ndp->ni_vp);
462177595Sweongyo			break;
463177595Sweongyo		}
464177595Sweongyo		linklen = MAXPATHLEN - auio.uio_resid;
465177595Sweongyo		if (linklen == 0) {
466177595Sweongyo			error = ENOENT;
467177595Sweongyo			goto badlink1;
468177595Sweongyo		}
469177595Sweongyo		if (linklen + ndp->ni_pathlen >= MAXPATHLEN) {
470177595Sweongyo			error = ENAMETOOLONG;
471177595Sweongyo			goto badlink1;
472177595Sweongyo		}
473177595Sweongyo
474177595Sweongyo		/*
475177595Sweongyo		 * Adjust or replace path
476177595Sweongyo		 */
477177595Sweongyo		if (ndp->ni_pathlen > 1) {
478177595Sweongyo			NFSBCOPY(ndp->ni_next, cp + linklen, ndp->ni_pathlen);
479177595Sweongyo			uma_zfree(namei_zone, cnp->cn_pnbuf);
480177595Sweongyo			cnp->cn_pnbuf = cp;
481177595Sweongyo		} else
482177595Sweongyo			cnp->cn_pnbuf[linklen] = '\0';
483177595Sweongyo		ndp->ni_pathlen += linklen;
484177595Sweongyo
485177595Sweongyo		/*
486177595Sweongyo		 * Cleanup refs for next loop and check if root directory
487177595Sweongyo		 * should replace current directory.  Normally ni_dvp
488177595Sweongyo		 * becomes the new base directory and is cleaned up when
489177595Sweongyo		 * we loop.  Explicitly null pointers after invalidation
490177595Sweongyo		 * to clarify operation.
491177595Sweongyo		 */
492177595Sweongyo		vput(ndp->ni_vp);
493177595Sweongyo		ndp->ni_vp = NULL;
494177595Sweongyo
495177595Sweongyo		if (cnp->cn_pnbuf[0] == '/') {
496177595Sweongyo			vrele(ndp->ni_dvp);
497177595Sweongyo			ndp->ni_dvp = ndp->ni_rootdir;
498177595Sweongyo			VREF(ndp->ni_dvp);
499177595Sweongyo		}
500177595Sweongyo		ndp->ni_startdir = ndp->ni_dvp;
501177595Sweongyo		ndp->ni_dvp = NULL;
502177595Sweongyo	}
503177595Sweongyo	if (!lockleaf)
504177595Sweongyo		cnp->cn_flags &= ~LOCKLEAF;
505177595Sweongyo
506177595Sweongyoout:
507177595Sweongyo	if (error) {
508177595Sweongyo		uma_zfree(namei_zone, cnp->cn_pnbuf);
509177595Sweongyo		ndp->ni_vp = NULL;
510177595Sweongyo		ndp->ni_dvp = NULL;
511177595Sweongyo		ndp->ni_startdir = NULL;
512177595Sweongyo		cnp->cn_flags &= ~HASBUF;
513177595Sweongyo	} else if ((ndp->ni_cnd.cn_flags & (WANTPARENT|LOCKPARENT)) == 0) {
514177595Sweongyo		ndp->ni_dvp = NULL;
515177595Sweongyo	}
516177595Sweongyo
517177595Sweongyoout1:
518177595Sweongyo	NFSEXITCODE2(error, nd);
519177595Sweongyo	return (error);
520177595Sweongyo}
521177595Sweongyo
522177595Sweongyo/*
523177595Sweongyo * Set up a pathname buffer and return a pointer to it and, optionally
524278808Smarius * set a hash pointer.
525278808Smarius */
526177595Sweongyovoid
527278808Smariusnfsvno_setpathbuf(struct nameidata *ndp, char **bufpp, u_long **hashpp)
528177595Sweongyo{
529177595Sweongyo	struct componentname *cnp = &ndp->ni_cnd;
530177595Sweongyo
531177595Sweongyo	cnp->cn_flags |= (NOMACCHECK | HASBUF);
532177595Sweongyo	cnp->cn_pnbuf = uma_zalloc(namei_zone, M_WAITOK);
533177595Sweongyo	if (hashpp != NULL)
534177595Sweongyo		*hashpp = NULL;
535177595Sweongyo	*bufpp = cnp->cn_pnbuf;
536177595Sweongyo}
537177595Sweongyo
538177595Sweongyo/*
539177595Sweongyo * Release the above path buffer, if not released by nfsvno_namei().
540177595Sweongyo */
541177595Sweongyovoid
542177595Sweongyonfsvno_relpathbuf(struct nameidata *ndp)
543177595Sweongyo{
544177595Sweongyo
545177595Sweongyo	if ((ndp->ni_cnd.cn_flags & HASBUF) == 0)
546177595Sweongyo		panic("nfsrelpath");
547177595Sweongyo	uma_zfree(namei_zone, ndp->ni_cnd.cn_pnbuf);
548177595Sweongyo	ndp->ni_cnd.cn_flags &= ~HASBUF;
549177595Sweongyo}
550177595Sweongyo
551177595Sweongyo/*
552177595Sweongyo * Readlink vnode op into an mbuf list.
553177595Sweongyo */
554177595Sweongyoint
555177595Sweongyonfsvno_readlink(struct vnode *vp, struct ucred *cred, struct thread *p,
556177595Sweongyo    struct mbuf **mpp, struct mbuf **mpendp, int *lenp)
557177595Sweongyo{
558177595Sweongyo	struct iovec iv[(NFS_MAXPATHLEN+MLEN-1)/MLEN];
559177595Sweongyo	struct iovec *ivp = iv;
560177595Sweongyo	struct uio io, *uiop = &io;
561177595Sweongyo	struct mbuf *mp, *mp2 = NULL, *mp3 = NULL;
562177595Sweongyo	int i, len, tlen, error = 0;
563177595Sweongyo
564177595Sweongyo	len = 0;
565177595Sweongyo	i = 0;
566177595Sweongyo	while (len < NFS_MAXPATHLEN) {
567177595Sweongyo		NFSMGET(mp);
568177595Sweongyo		MCLGET(mp, M_WAIT);
569177595Sweongyo		mp->m_len = NFSMSIZ(mp);
570177595Sweongyo		if (len == 0) {
571177595Sweongyo			mp3 = mp2 = mp;
572177595Sweongyo		} else {
573177595Sweongyo			mp2->m_next = mp;
574177595Sweongyo			mp2 = mp;
575177595Sweongyo		}
576177595Sweongyo		if ((len + mp->m_len) > NFS_MAXPATHLEN) {
577177595Sweongyo			mp->m_len = NFS_MAXPATHLEN - len;
578177595Sweongyo			len = NFS_MAXPATHLEN;
579177595Sweongyo		} else {
580177595Sweongyo			len += mp->m_len;
581177595Sweongyo		}
582177595Sweongyo		ivp->iov_base = mtod(mp, caddr_t);
583177595Sweongyo		ivp->iov_len = mp->m_len;
584177595Sweongyo		i++;
585177595Sweongyo		ivp++;
586177595Sweongyo	}
587177595Sweongyo	uiop->uio_iov = iv;
588177595Sweongyo	uiop->uio_iovcnt = i;
589177595Sweongyo	uiop->uio_offset = 0;
590177595Sweongyo	uiop->uio_resid = len;
591177595Sweongyo	uiop->uio_rw = UIO_READ;
592177595Sweongyo	uiop->uio_segflg = UIO_SYSSPACE;
593177595Sweongyo	uiop->uio_td = NULL;
594177595Sweongyo	error = VOP_READLINK(vp, uiop, cred);
595177595Sweongyo	if (error) {
596177595Sweongyo		m_freem(mp3);
597177595Sweongyo		*lenp = 0;
598177595Sweongyo		goto out;
599177595Sweongyo	}
600177595Sweongyo	if (uiop->uio_resid > 0) {
601177595Sweongyo		len -= uiop->uio_resid;
602177595Sweongyo		tlen = NFSM_RNDUP(len);
603177595Sweongyo		nfsrv_adj(mp3, NFS_MAXPATHLEN - tlen, tlen - len);
604177595Sweongyo	}
605177595Sweongyo	*lenp = len;
606177595Sweongyo	*mpp = mp3;
607177595Sweongyo	*mpendp = mp;
608177595Sweongyo
609177595Sweongyoout:
610177595Sweongyo	NFSEXITCODE(error);
611177595Sweongyo	return (error);
612177595Sweongyo}
613177595Sweongyo
614177595Sweongyo/*
615177595Sweongyo * Read vnode op call into mbuf list.
616177595Sweongyo */
617177595Sweongyoint
618177595Sweongyonfsvno_read(struct vnode *vp, off_t off, int cnt, struct ucred *cred,
619177595Sweongyo    struct thread *p, struct mbuf **mpp, struct mbuf **mpendp)
620177595Sweongyo{
621177595Sweongyo	struct mbuf *m;
622177595Sweongyo	int i;
623177595Sweongyo	struct iovec *iv;
624177595Sweongyo	struct iovec *iv2;
625177595Sweongyo	int error = 0, len, left, siz, tlen, ioflag = 0;
626177595Sweongyo	struct mbuf *m2 = NULL, *m3;
627177595Sweongyo	struct uio io, *uiop = &io;
628177595Sweongyo	struct nfsheur *nh;
629177595Sweongyo
630177595Sweongyo	len = left = NFSM_RNDUP(cnt);
631177595Sweongyo	m3 = NULL;
632177595Sweongyo	/*
633177595Sweongyo	 * Generate the mbuf list with the uio_iov ref. to it.
634177595Sweongyo	 */
635177595Sweongyo	i = 0;
636177595Sweongyo	while (left > 0) {
637177595Sweongyo		NFSMGET(m);
638177595Sweongyo		MCLGET(m, M_WAIT);
639177595Sweongyo		m->m_len = 0;
640177595Sweongyo		siz = min(M_TRAILINGSPACE(m), left);
641177595Sweongyo		left -= siz;
642177595Sweongyo		i++;
643177595Sweongyo		if (m3)
644177595Sweongyo			m2->m_next = m;
645177595Sweongyo		else
646177595Sweongyo			m3 = m;
647177595Sweongyo		m2 = m;
648177595Sweongyo	}
649177595Sweongyo	MALLOC(iv, struct iovec *, i * sizeof (struct iovec),
650177595Sweongyo	    M_TEMP, M_WAITOK);
651177595Sweongyo	uiop->uio_iov = iv2 = iv;
652177595Sweongyo	m = m3;
653177595Sweongyo	left = len;
654177595Sweongyo	i = 0;
655177595Sweongyo	while (left > 0) {
656177595Sweongyo		if (m == NULL)
657177595Sweongyo			panic("nfsvno_read iov");
658177595Sweongyo		siz = min(M_TRAILINGSPACE(m), left);
659177595Sweongyo		if (siz > 0) {
660177595Sweongyo			iv->iov_base = mtod(m, caddr_t) + m->m_len;
661177595Sweongyo			iv->iov_len = siz;
662177595Sweongyo			m->m_len += siz;
663177595Sweongyo			left -= siz;
664177595Sweongyo			iv++;
665177595Sweongyo			i++;
666177595Sweongyo		}
667177595Sweongyo		m = m->m_next;
668177595Sweongyo	}
669177595Sweongyo	uiop->uio_iovcnt = i;
670177595Sweongyo	uiop->uio_offset = off;
671177595Sweongyo	uiop->uio_resid = len;
672177595Sweongyo	uiop->uio_rw = UIO_READ;
673177595Sweongyo	uiop->uio_segflg = UIO_SYSSPACE;
674177595Sweongyo	nh = nfsrv_sequential_heuristic(uiop, vp);
675177595Sweongyo	ioflag |= nh->nh_seqcount << IO_SEQSHIFT;
676177595Sweongyo	error = VOP_READ(vp, uiop, IO_NODELOCKED | ioflag, cred);
677177595Sweongyo	FREE((caddr_t)iv2, M_TEMP);
678177595Sweongyo	if (error) {
679177595Sweongyo		m_freem(m3);
680177595Sweongyo		*mpp = NULL;
681177595Sweongyo		goto out;
682177595Sweongyo	}
683177595Sweongyo	nh->nh_nextoff = uiop->uio_offset;
684177595Sweongyo	tlen = len - uiop->uio_resid;
685177595Sweongyo	cnt = cnt < tlen ? cnt : tlen;
686177595Sweongyo	tlen = NFSM_RNDUP(cnt);
687177595Sweongyo	if (tlen == 0) {
688177595Sweongyo		m_freem(m3);
689177595Sweongyo		m3 = NULL;
690177595Sweongyo	} else if (len != tlen || tlen != cnt)
691177595Sweongyo		nfsrv_adj(m3, len - tlen, tlen - cnt);
692177595Sweongyo	*mpp = m3;
693177595Sweongyo	*mpendp = m2;
694177595Sweongyo
695177595Sweongyoout:
696177595Sweongyo	NFSEXITCODE(error);
697177595Sweongyo	return (error);
698177595Sweongyo}
699177595Sweongyo
700177595Sweongyo/*
701177595Sweongyo * Write vnode op from an mbuf list.
702177595Sweongyo */
703177595Sweongyoint
704177595Sweongyonfsvno_write(struct vnode *vp, off_t off, int retlen, int cnt, int stable,
705177595Sweongyo    struct mbuf *mp, char *cp, struct ucred *cred, struct thread *p)
706177595Sweongyo{
707177595Sweongyo	struct iovec *ivp;
708177595Sweongyo	int i, len;
709177595Sweongyo	struct iovec *iv;
710177595Sweongyo	int ioflags, error;
711177595Sweongyo	struct uio io, *uiop = &io;
712177595Sweongyo	struct nfsheur *nh;
713177595Sweongyo
714177595Sweongyo	MALLOC(ivp, struct iovec *, cnt * sizeof (struct iovec), M_TEMP,
715177595Sweongyo	    M_WAITOK);
716177595Sweongyo	uiop->uio_iov = iv = ivp;
717177595Sweongyo	uiop->uio_iovcnt = cnt;
718177595Sweongyo	i = mtod(mp, caddr_t) + mp->m_len - cp;
719177595Sweongyo	len = retlen;
720177595Sweongyo	while (len > 0) {
721177595Sweongyo		if (mp == NULL)
722177595Sweongyo			panic("nfsvno_write");
723177595Sweongyo		if (i > 0) {
724177595Sweongyo			i = min(i, len);
725177595Sweongyo			ivp->iov_base = cp;
726177595Sweongyo			ivp->iov_len = i;
727177595Sweongyo			ivp++;
728177595Sweongyo			len -= i;
729177595Sweongyo		}
730177595Sweongyo		mp = mp->m_next;
731177595Sweongyo		if (mp) {
732177595Sweongyo			i = mp->m_len;
733177595Sweongyo			cp = mtod(mp, caddr_t);
734177595Sweongyo		}
735177595Sweongyo	}
736177595Sweongyo
737177595Sweongyo	if (stable == NFSWRITE_UNSTABLE)
738177595Sweongyo		ioflags = IO_NODELOCKED;
739177595Sweongyo	else
740177595Sweongyo		ioflags = (IO_SYNC | IO_NODELOCKED);
741177595Sweongyo	uiop->uio_resid = retlen;
742177595Sweongyo	uiop->uio_rw = UIO_WRITE;
743177595Sweongyo	uiop->uio_segflg = UIO_SYSSPACE;
744177595Sweongyo	NFSUIOPROC(uiop, p);
745177595Sweongyo	uiop->uio_offset = off;
746177595Sweongyo	nh = nfsrv_sequential_heuristic(uiop, vp);
747177595Sweongyo	ioflags |= nh->nh_seqcount << IO_SEQSHIFT;
748177595Sweongyo	error = VOP_WRITE(vp, uiop, ioflags, cred);
749177595Sweongyo	if (error == 0)
750177595Sweongyo		nh->nh_nextoff = uiop->uio_offset;
751177595Sweongyo	FREE((caddr_t)iv, M_TEMP);
752177595Sweongyo
753177595Sweongyo	NFSEXITCODE(error);
754177595Sweongyo	return (error);
755177595Sweongyo}
756177595Sweongyo
757177595Sweongyo/*
758177595Sweongyo * Common code for creating a regular file (plus special files for V2).
759177595Sweongyo */
760177595Sweongyoint
761177595Sweongyonfsvno_createsub(struct nfsrv_descript *nd, struct nameidata *ndp,
762177595Sweongyo    struct vnode **vpp, struct nfsvattr *nvap, int *exclusive_flagp,
763177595Sweongyo    int32_t *cverf, NFSDEV_T rdev, struct thread *p, struct nfsexstuff *exp)
764177595Sweongyo{
765177595Sweongyo	u_quad_t tempsize;
766177595Sweongyo	int error;
767177595Sweongyo
768177595Sweongyo	error = nd->nd_repstat;
769177595Sweongyo	if (!error && ndp->ni_vp == NULL) {
770177595Sweongyo		if (nvap->na_type == VREG || nvap->na_type == VSOCK) {
771177595Sweongyo			vrele(ndp->ni_startdir);
772177595Sweongyo			error = VOP_CREATE(ndp->ni_dvp,
773177595Sweongyo			    &ndp->ni_vp, &ndp->ni_cnd, &nvap->na_vattr);
774177595Sweongyo			vput(ndp->ni_dvp);
775177595Sweongyo			nfsvno_relpathbuf(ndp);
776177595Sweongyo			if (!error) {
777177595Sweongyo				if (*exclusive_flagp) {
778177595Sweongyo					*exclusive_flagp = 0;
779177595Sweongyo					NFSVNO_ATTRINIT(nvap);
780177595Sweongyo					nvap->na_atime.tv_sec = cverf[0];
781177595Sweongyo					nvap->na_atime.tv_nsec = cverf[1];
782177595Sweongyo					error = VOP_SETATTR(ndp->ni_vp,
783177595Sweongyo					    &nvap->na_vattr, nd->nd_cred);
784177595Sweongyo				}
785177595Sweongyo			}
786177595Sweongyo		/*
787177595Sweongyo		 * NFS V2 Only. nfsrvd_mknod() does this for V3.
788177595Sweongyo		 * (This implies, just get out on an error.)
789177595Sweongyo		 */
790177595Sweongyo		} else if (nvap->na_type == VCHR || nvap->na_type == VBLK ||
791177595Sweongyo			nvap->na_type == VFIFO) {
792177595Sweongyo			if (nvap->na_type == VCHR && rdev == 0xffffffff)
793177595Sweongyo				nvap->na_type = VFIFO;
794177595Sweongyo                        if (nvap->na_type != VFIFO &&
795177595Sweongyo			    (error = priv_check_cred(nd->nd_cred,
796177595Sweongyo			     PRIV_VFS_MKNOD_DEV, 0))) {
797177595Sweongyo				vrele(ndp->ni_startdir);
798177595Sweongyo				nfsvno_relpathbuf(ndp);
799177595Sweongyo				vput(ndp->ni_dvp);
800177595Sweongyo				goto out;
801177595Sweongyo			}
802177595Sweongyo			nvap->na_rdev = rdev;
803177595Sweongyo			error = VOP_MKNOD(ndp->ni_dvp, &ndp->ni_vp,
804177595Sweongyo			    &ndp->ni_cnd, &nvap->na_vattr);
805177595Sweongyo			vput(ndp->ni_dvp);
806177595Sweongyo			nfsvno_relpathbuf(ndp);
807177595Sweongyo			vrele(ndp->ni_startdir);
808177595Sweongyo			if (error)
809177595Sweongyo				goto out;
810177595Sweongyo		} else {
811177595Sweongyo			vrele(ndp->ni_startdir);
812177595Sweongyo			nfsvno_relpathbuf(ndp);
813177595Sweongyo			vput(ndp->ni_dvp);
814177595Sweongyo			error = ENXIO;
815177595Sweongyo			goto out;
816177595Sweongyo		}
817177595Sweongyo		*vpp = ndp->ni_vp;
818177595Sweongyo	} else {
819177595Sweongyo		/*
820177595Sweongyo		 * Handle cases where error is already set and/or
821177595Sweongyo		 * the file exists.
822177595Sweongyo		 * 1 - clean up the lookup
823177595Sweongyo		 * 2 - iff !error and na_size set, truncate it
824177595Sweongyo		 */
825177595Sweongyo		vrele(ndp->ni_startdir);
826177595Sweongyo		nfsvno_relpathbuf(ndp);
827177595Sweongyo		*vpp = ndp->ni_vp;
828177595Sweongyo		if (ndp->ni_dvp == *vpp)
829177595Sweongyo			vrele(ndp->ni_dvp);
830177595Sweongyo		else
831177595Sweongyo			vput(ndp->ni_dvp);
832177595Sweongyo		if (!error && nvap->na_size != VNOVAL) {
833177595Sweongyo			error = nfsvno_accchk(*vpp, VWRITE,
834177595Sweongyo			    nd->nd_cred, exp, p, NFSACCCHK_NOOVERRIDE,
835177595Sweongyo			    NFSACCCHK_VPISLOCKED, NULL);
836177595Sweongyo			if (!error) {
837177595Sweongyo				tempsize = nvap->na_size;
838177595Sweongyo				NFSVNO_ATTRINIT(nvap);
839177595Sweongyo				nvap->na_size = tempsize;
840177595Sweongyo				error = VOP_SETATTR(*vpp,
841177595Sweongyo				    &nvap->na_vattr, nd->nd_cred);
842177595Sweongyo			}
843177595Sweongyo		}
844177595Sweongyo		if (error)
845177595Sweongyo			vput(*vpp);
846177595Sweongyo	}
847177595Sweongyo
848177595Sweongyoout:
849177595Sweongyo	NFSEXITCODE(error);
850177595Sweongyo	return (error);
851177595Sweongyo}
852177595Sweongyo
853177595Sweongyo/*
854177595Sweongyo * Do a mknod vnode op.
855177595Sweongyo */
856177595Sweongyoint
857177595Sweongyonfsvno_mknod(struct nameidata *ndp, struct nfsvattr *nvap, struct ucred *cred,
858243857Sglebius    struct thread *p)
859177595Sweongyo{
860177595Sweongyo	int error = 0;
861177595Sweongyo	enum vtype vtyp;
862177595Sweongyo
863177595Sweongyo	vtyp = nvap->na_type;
864177595Sweongyo	/*
865177595Sweongyo	 * Iff doesn't exist, create it.
866177595Sweongyo	 */
867177595Sweongyo	if (ndp->ni_vp) {
868177595Sweongyo		vrele(ndp->ni_startdir);
869177595Sweongyo		nfsvno_relpathbuf(ndp);
870177595Sweongyo		vput(ndp->ni_dvp);
871177595Sweongyo		vrele(ndp->ni_vp);
872177595Sweongyo		error = EEXIST;
873177595Sweongyo		goto out;
874177595Sweongyo	}
875177595Sweongyo	if (vtyp != VCHR && vtyp != VBLK && vtyp != VSOCK && vtyp != VFIFO) {
876177595Sweongyo		vrele(ndp->ni_startdir);
877177595Sweongyo		nfsvno_relpathbuf(ndp);
878177595Sweongyo		vput(ndp->ni_dvp);
879177595Sweongyo		error = NFSERR_BADTYPE;
880177595Sweongyo		goto out;
881177595Sweongyo	}
882177595Sweongyo	if (vtyp == VSOCK) {
883177595Sweongyo		vrele(ndp->ni_startdir);
884177595Sweongyo		error = VOP_CREATE(ndp->ni_dvp, &ndp->ni_vp,
885177595Sweongyo		    &ndp->ni_cnd, &nvap->na_vattr);
886177595Sweongyo		vput(ndp->ni_dvp);
887177595Sweongyo		nfsvno_relpathbuf(ndp);
888177595Sweongyo	} else {
889177595Sweongyo		if (nvap->na_type != VFIFO &&
890177595Sweongyo		    (error = priv_check_cred(cred, PRIV_VFS_MKNOD_DEV, 0))) {
891177595Sweongyo			vrele(ndp->ni_startdir);
892177595Sweongyo			nfsvno_relpathbuf(ndp);
893177595Sweongyo			vput(ndp->ni_dvp);
894177595Sweongyo			goto out;
895278808Smarius		}
896177595Sweongyo		error = VOP_MKNOD(ndp->ni_dvp, &ndp->ni_vp,
897278808Smarius		    &ndp->ni_cnd, &nvap->na_vattr);
898177595Sweongyo		vput(ndp->ni_dvp);
899177595Sweongyo		nfsvno_relpathbuf(ndp);
900177595Sweongyo		vrele(ndp->ni_startdir);
901177595Sweongyo		/*
902177595Sweongyo		 * Since VOP_MKNOD returns the ni_vp, I can't
903177595Sweongyo		 * see any reason to do the lookup.
904177595Sweongyo		 */
905177595Sweongyo	}
906177595Sweongyo
907177595Sweongyoout:
908177595Sweongyo	NFSEXITCODE(error);
909177595Sweongyo	return (error);
910177595Sweongyo}
911177595Sweongyo
912177595Sweongyo/*
913278808Smarius * Mkdir vnode op.
914177595Sweongyo */
915177595Sweongyoint
916177595Sweongyonfsvno_mkdir(struct nameidata *ndp, struct nfsvattr *nvap, uid_t saved_uid,
917177595Sweongyo    struct ucred *cred, struct thread *p, struct nfsexstuff *exp)
918177595Sweongyo{
919177595Sweongyo	int error = 0;
920177595Sweongyo
921177595Sweongyo	if (ndp->ni_vp != NULL) {
922177595Sweongyo		if (ndp->ni_dvp == ndp->ni_vp)
923177595Sweongyo			vrele(ndp->ni_dvp);
924177595Sweongyo		else
925177595Sweongyo			vput(ndp->ni_dvp);
926177595Sweongyo		vrele(ndp->ni_vp);
927177595Sweongyo		nfsvno_relpathbuf(ndp);
928177595Sweongyo		error = EEXIST;
929177595Sweongyo		goto out;
930177595Sweongyo	}
931177595Sweongyo	error = VOP_MKDIR(ndp->ni_dvp, &ndp->ni_vp, &ndp->ni_cnd,
932177595Sweongyo	    &nvap->na_vattr);
933177595Sweongyo	vput(ndp->ni_dvp);
934177595Sweongyo	nfsvno_relpathbuf(ndp);
935177595Sweongyo
936177595Sweongyoout:
937177595Sweongyo	NFSEXITCODE(error);
938177595Sweongyo	return (error);
939177595Sweongyo}
940177595Sweongyo
941177595Sweongyo/*
942177595Sweongyo * symlink vnode op.
943177595Sweongyo */
944177595Sweongyoint
945177595Sweongyonfsvno_symlink(struct nameidata *ndp, struct nfsvattr *nvap, char *pathcp,
946177595Sweongyo    int pathlen, int not_v2, uid_t saved_uid, struct ucred *cred, struct thread *p,
947177595Sweongyo    struct nfsexstuff *exp)
948177595Sweongyo{
949177595Sweongyo	int error = 0;
950177595Sweongyo
951177595Sweongyo	if (ndp->ni_vp) {
952177595Sweongyo		vrele(ndp->ni_startdir);
953177595Sweongyo		nfsvno_relpathbuf(ndp);
954177595Sweongyo		if (ndp->ni_dvp == ndp->ni_vp)
955177595Sweongyo			vrele(ndp->ni_dvp);
956177595Sweongyo		else
957177595Sweongyo			vput(ndp->ni_dvp);
958177595Sweongyo		vrele(ndp->ni_vp);
959177595Sweongyo		error = EEXIST;
960177595Sweongyo		goto out;
961177595Sweongyo	}
962177595Sweongyo
963177595Sweongyo	error = VOP_SYMLINK(ndp->ni_dvp, &ndp->ni_vp, &ndp->ni_cnd,
964177595Sweongyo	    &nvap->na_vattr, pathcp);
965177595Sweongyo	vput(ndp->ni_dvp);
966177595Sweongyo	vrele(ndp->ni_startdir);
967177595Sweongyo	nfsvno_relpathbuf(ndp);
968177595Sweongyo	/*
969177595Sweongyo	 * Although FreeBSD still had the lookup code in
970177595Sweongyo	 * it for 7/current, there doesn't seem to be any
971177595Sweongyo	 * point, since VOP_SYMLINK() returns the ni_vp.
972177595Sweongyo	 * Just vput it for v2.
973177595Sweongyo	 */
974177595Sweongyo	if (!not_v2 && !error)
975177595Sweongyo		vput(ndp->ni_vp);
976177595Sweongyo
977177595Sweongyoout:
978177595Sweongyo	NFSEXITCODE(error);
979177595Sweongyo	return (error);
980177595Sweongyo}
981177595Sweongyo
982177595Sweongyo/*
983177595Sweongyo * Parse symbolic link arguments.
984177595Sweongyo * This function has an ugly side effect. It will MALLOC() an area for
985177595Sweongyo * the symlink and set iov_base to point to it, only if it succeeds.
986177595Sweongyo * So, if it returns with uiop->uio_iov->iov_base != NULL, that must
987177595Sweongyo * be FREE'd later.
988177595Sweongyo */
989177595Sweongyoint
990177595Sweongyonfsvno_getsymlink(struct nfsrv_descript *nd, struct nfsvattr *nvap,
991177595Sweongyo    struct thread *p, char **pathcpp, int *lenp)
992177595Sweongyo{
993177595Sweongyo	u_int32_t *tl;
994177595Sweongyo	char *pathcp = NULL;
995177595Sweongyo	int error = 0, len;
996177595Sweongyo	struct nfsv2_sattr *sp;
997177595Sweongyo
998177595Sweongyo	*pathcpp = NULL;
999177595Sweongyo	*lenp = 0;
1000177595Sweongyo	if ((nd->nd_flag & ND_NFSV3) &&
1001177595Sweongyo	    (error = nfsrv_sattr(nd, nvap, NULL, NULL, p)))
1002177595Sweongyo		goto nfsmout;
1003177595Sweongyo	NFSM_DISSECT(tl, u_int32_t *, NFSX_UNSIGNED);
1004177595Sweongyo	len = fxdr_unsigned(int, *tl);
1005177595Sweongyo	if (len > NFS_MAXPATHLEN || len <= 0) {
1006177595Sweongyo		error = EBADRPC;
1007177595Sweongyo		goto nfsmout;
1008177595Sweongyo	}
1009177595Sweongyo	MALLOC(pathcp, caddr_t, len + 1, M_TEMP, M_WAITOK);
1010177595Sweongyo	error = nfsrv_mtostr(nd, pathcp, len);
1011177595Sweongyo	if (error)
1012177595Sweongyo		goto nfsmout;
1013177595Sweongyo	if (nd->nd_flag & ND_NFSV2) {
1014177595Sweongyo		NFSM_DISSECT(sp, struct nfsv2_sattr *, NFSX_V2SATTR);
1015177595Sweongyo		nvap->na_mode = fxdr_unsigned(u_int16_t, sp->sa_mode);
1016177595Sweongyo	}
1017177595Sweongyo	*pathcpp = pathcp;
1018177595Sweongyo	*lenp = len;
1019177595Sweongyo	NFSEXITCODE2(0, nd);
1020177595Sweongyo	return (0);
1021177595Sweongyonfsmout:
1022177595Sweongyo	if (pathcp)
1023177595Sweongyo		free(pathcp, M_TEMP);
1024177595Sweongyo	NFSEXITCODE2(error, nd);
1025177595Sweongyo	return (error);
1026177595Sweongyo}
1027177595Sweongyo
1028177595Sweongyo/*
1029177595Sweongyo * Remove a non-directory object.
1030177595Sweongyo */
1031177595Sweongyoint
1032177595Sweongyonfsvno_removesub(struct nameidata *ndp, int is_v4, struct ucred *cred,
1033177595Sweongyo    struct thread *p, struct nfsexstuff *exp)
1034177595Sweongyo{
1035177595Sweongyo	struct vnode *vp;
1036177595Sweongyo	int error = 0;
1037177595Sweongyo
1038177595Sweongyo	vp = ndp->ni_vp;
1039177595Sweongyo	if (vp->v_type == VDIR)
1040177595Sweongyo		error = NFSERR_ISDIR;
1041177595Sweongyo	else if (is_v4)
1042177595Sweongyo		error = nfsrv_checkremove(vp, 1, p);
1043177595Sweongyo	if (!error)
1044177595Sweongyo		error = VOP_REMOVE(ndp->ni_dvp, vp, &ndp->ni_cnd);
1045177595Sweongyo	if (ndp->ni_dvp == vp)
1046177595Sweongyo		vrele(ndp->ni_dvp);
1047177595Sweongyo	else
1048177595Sweongyo		vput(ndp->ni_dvp);
1049177595Sweongyo	vput(vp);
1050177595Sweongyo	if ((ndp->ni_cnd.cn_flags & SAVENAME) != 0)
1051177595Sweongyo		nfsvno_relpathbuf(ndp);
1052177595Sweongyo	NFSEXITCODE(error);
1053177595Sweongyo	return (error);
1054177595Sweongyo}
1055177595Sweongyo
1056177595Sweongyo/*
1057177595Sweongyo * Remove a directory.
1058177595Sweongyo */
1059177595Sweongyoint
1060177595Sweongyonfsvno_rmdirsub(struct nameidata *ndp, int is_v4, struct ucred *cred,
1061177595Sweongyo    struct thread *p, struct nfsexstuff *exp)
1062177595Sweongyo{
1063177595Sweongyo	struct vnode *vp;
1064177595Sweongyo	int error = 0;
1065177595Sweongyo
1066177595Sweongyo	vp = ndp->ni_vp;
1067177595Sweongyo	if (vp->v_type != VDIR) {
1068177595Sweongyo		error = ENOTDIR;
1069177595Sweongyo		goto out;
1070177595Sweongyo	}
1071177595Sweongyo	/*
1072177595Sweongyo	 * No rmdir "." please.
1073177595Sweongyo	 */
1074177595Sweongyo	if (ndp->ni_dvp == vp) {
1075177595Sweongyo		error = EINVAL;
1076177595Sweongyo		goto out;
1077177595Sweongyo	}
1078199559Sjhb	/*
1079177595Sweongyo	 * The root of a mounted filesystem cannot be deleted.
1080177595Sweongyo	 */
1081177595Sweongyo	if (vp->v_vflag & VV_ROOT)
1082177595Sweongyo		error = EBUSY;
1083177595Sweongyoout:
1084177595Sweongyo	if (!error)
1085177595Sweongyo		error = VOP_RMDIR(ndp->ni_dvp, vp, &ndp->ni_cnd);
1086177595Sweongyo	if (ndp->ni_dvp == vp)
1087177595Sweongyo		vrele(ndp->ni_dvp);
1088177595Sweongyo	else
1089177595Sweongyo		vput(ndp->ni_dvp);
1090177595Sweongyo	vput(vp);
1091177595Sweongyo	if ((ndp->ni_cnd.cn_flags & SAVENAME) != 0)
1092177595Sweongyo		nfsvno_relpathbuf(ndp);
1093177595Sweongyo	NFSEXITCODE(error);
1094177595Sweongyo	return (error);
1095178354Ssam}
1096192468Ssam
1097177595Sweongyo/*
1098177595Sweongyo * Rename vnode op.
1099177595Sweongyo */
1100177595Sweongyoint
1101177595Sweongyonfsvno_rename(struct nameidata *fromndp, struct nameidata *tondp,
1102177595Sweongyo    u_int32_t ndstat, u_int32_t ndflag, struct ucred *cred, struct thread *p)
1103262007Skevlo{
1104177595Sweongyo	struct vnode *fvp, *tvp, *tdvp;
1105177595Sweongyo	int error = 0;
1106177595Sweongyo
1107177595Sweongyo	fvp = fromndp->ni_vp;
1108177595Sweongyo	if (ndstat) {
1109177595Sweongyo		vrele(fromndp->ni_dvp);
1110177595Sweongyo		vrele(fvp);
1111177595Sweongyo		error = ndstat;
1112177595Sweongyo		goto out1;
1113177595Sweongyo	}
1114177595Sweongyo	tdvp = tondp->ni_dvp;
1115177595Sweongyo	tvp = tondp->ni_vp;
1116177595Sweongyo	if (tvp != NULL) {
1117177595Sweongyo		if (fvp->v_type == VDIR && tvp->v_type != VDIR) {
1118177595Sweongyo			error = (ndflag & ND_NFSV2) ? EISDIR : EEXIST;
1119177595Sweongyo			goto out;
1120177595Sweongyo		} else if (fvp->v_type != VDIR && tvp->v_type == VDIR) {
1121177595Sweongyo			error = (ndflag & ND_NFSV2) ? ENOTDIR : EEXIST;
1122177595Sweongyo			goto out;
1123177595Sweongyo		}
1124177595Sweongyo		if (tvp->v_type == VDIR && tvp->v_mountedhere) {
1125177595Sweongyo			error = (ndflag & ND_NFSV2) ? ENOTEMPTY : EXDEV;
1126177595Sweongyo			goto out;
1127177595Sweongyo		}
1128177595Sweongyo
1129177595Sweongyo		/*
1130177595Sweongyo		 * A rename to '.' or '..' results in a prematurely
1131178354Ssam		 * unlocked vnode on FreeBSD5, so I'm just going to fail that
1132177595Sweongyo		 * here.
1133177595Sweongyo		 */
1134177595Sweongyo		if ((tondp->ni_cnd.cn_namelen == 1 &&
1135177595Sweongyo		     tondp->ni_cnd.cn_nameptr[0] == '.') ||
1136177595Sweongyo		    (tondp->ni_cnd.cn_namelen == 2 &&
1137177595Sweongyo		     tondp->ni_cnd.cn_nameptr[0] == '.' &&
1138177595Sweongyo		     tondp->ni_cnd.cn_nameptr[1] == '.')) {
1139177595Sweongyo			error = EINVAL;
1140177595Sweongyo			goto out;
1141177595Sweongyo		}
1142177595Sweongyo	}
1143177595Sweongyo	if (fvp->v_type == VDIR && fvp->v_mountedhere) {
1144177595Sweongyo		error = (ndflag & ND_NFSV2) ? ENOTEMPTY : EXDEV;
1145177595Sweongyo		goto out;
1146177595Sweongyo	}
1147177595Sweongyo	if (fvp->v_mount != tdvp->v_mount) {
1148177595Sweongyo		error = (ndflag & ND_NFSV2) ? ENOTEMPTY : EXDEV;
1149177595Sweongyo		goto out;
1150177595Sweongyo	}
1151177595Sweongyo	if (fvp == tdvp) {
1152177595Sweongyo		error = (ndflag & ND_NFSV2) ? ENOTEMPTY : EINVAL;
1153177595Sweongyo		goto out;
1154192468Ssam	}
1155177595Sweongyo	if (fvp == tvp) {
1156177595Sweongyo		/*
1157177595Sweongyo		 * If source and destination are the same, there is nothing to
1158177595Sweongyo		 * do. Set error to -1 to indicate this.
1159177595Sweongyo		 */
1160177595Sweongyo		error = -1;
1161192468Ssam		goto out;
1162177595Sweongyo	}
1163177595Sweongyo	if (ndflag & ND_NFSV4) {
1164177595Sweongyo		if (NFSVOPLOCK(fvp, LK_EXCLUSIVE) == 0) {
1165177595Sweongyo			error = nfsrv_checkremove(fvp, 0, p);
1166177595Sweongyo			NFSVOPUNLOCK(fvp, 0);
1167177595Sweongyo		} else
1168177595Sweongyo			error = EPERM;
1169177595Sweongyo		if (tvp && !error)
1170177595Sweongyo			error = nfsrv_checkremove(tvp, 1, p);
1171177595Sweongyo	} else {
1172177595Sweongyo		/*
1173177595Sweongyo		 * For NFSv2 and NFSv3, try to get rid of the delegation, so
1174177595Sweongyo		 * that the NFSv4 client won't be confused by the rename.
1175177595Sweongyo		 * Since nfsd_recalldelegation() can only be called on an
1176177595Sweongyo		 * unlocked vnode at this point and fvp is the file that will
1177177595Sweongyo		 * still exist after the rename, just do fvp.
1178177595Sweongyo		 */
1179177595Sweongyo		nfsd_recalldelegation(fvp, p);
1180177595Sweongyo	}
1181177595Sweongyoout:
1182177595Sweongyo	if (!error) {
1183177595Sweongyo		error = VOP_RENAME(fromndp->ni_dvp, fromndp->ni_vp,
1184177595Sweongyo		    &fromndp->ni_cnd, tondp->ni_dvp, tondp->ni_vp,
1185177595Sweongyo		    &tondp->ni_cnd);
1186177595Sweongyo	} else {
1187177595Sweongyo		if (tdvp == tvp)
1188177595Sweongyo			vrele(tdvp);
1189177595Sweongyo		else
1190177595Sweongyo			vput(tdvp);
1191177595Sweongyo		if (tvp)
1192177595Sweongyo			vput(tvp);
1193177595Sweongyo		vrele(fromndp->ni_dvp);
1194177595Sweongyo		vrele(fvp);
1195177595Sweongyo		if (error == -1)
1196177595Sweongyo			error = 0;
1197177595Sweongyo	}
1198177595Sweongyo	vrele(tondp->ni_startdir);
1199177595Sweongyo	nfsvno_relpathbuf(tondp);
1200177595Sweongyoout1:
1201177595Sweongyo	vrele(fromndp->ni_startdir);
1202177595Sweongyo	nfsvno_relpathbuf(fromndp);
1203177595Sweongyo	NFSEXITCODE(error);
1204177595Sweongyo	return (error);
1205177595Sweongyo}
1206177595Sweongyo
1207177595Sweongyo/*
1208177595Sweongyo * Link vnode op.
1209177595Sweongyo */
1210177595Sweongyoint
1211177595Sweongyonfsvno_link(struct nameidata *ndp, struct vnode *vp, struct ucred *cred,
1212177595Sweongyo    struct thread *p, struct nfsexstuff *exp)
1213177595Sweongyo{
1214177595Sweongyo	struct vnode *xp;
1215177595Sweongyo	int error = 0;
1216177595Sweongyo
1217177595Sweongyo	xp = ndp->ni_vp;
1218177595Sweongyo	if (xp != NULL) {
1219177595Sweongyo		error = EEXIST;
1220177595Sweongyo	} else {
1221177595Sweongyo		xp = ndp->ni_dvp;
1222177595Sweongyo		if (vp->v_mount != xp->v_mount)
1223177595Sweongyo			error = EXDEV;
1224177595Sweongyo	}
1225177595Sweongyo	if (!error) {
1226177595Sweongyo		NFSVOPLOCK(vp, LK_EXCLUSIVE | LK_RETRY);
1227177595Sweongyo		if ((vp->v_iflag & VI_DOOMED) == 0)
1228177595Sweongyo			error = VOP_LINK(ndp->ni_dvp, vp, &ndp->ni_cnd);
1229177595Sweongyo		else
1230177595Sweongyo			error = EPERM;
1231177595Sweongyo		if (ndp->ni_dvp == vp)
1232177595Sweongyo			vrele(ndp->ni_dvp);
1233177595Sweongyo		else
1234177595Sweongyo			vput(ndp->ni_dvp);
1235177595Sweongyo		NFSVOPUNLOCK(vp, 0);
1236177595Sweongyo	} else {
1237177595Sweongyo		if (ndp->ni_dvp == ndp->ni_vp)
1238177595Sweongyo			vrele(ndp->ni_dvp);
1239177595Sweongyo		else
1240177595Sweongyo			vput(ndp->ni_dvp);
1241177595Sweongyo		if (ndp->ni_vp)
1242177595Sweongyo			vrele(ndp->ni_vp);
1243177595Sweongyo	}
1244177595Sweongyo	nfsvno_relpathbuf(ndp);
1245177595Sweongyo	NFSEXITCODE(error);
1246177595Sweongyo	return (error);
1247177595Sweongyo}
1248177595Sweongyo
1249177595Sweongyo/*
1250177595Sweongyo * Do the fsync() appropriate for the commit.
1251177595Sweongyo */
1252177595Sweongyoint
1253177595Sweongyonfsvno_fsync(struct vnode *vp, u_int64_t off, int cnt, struct ucred *cred,
1254177595Sweongyo    struct thread *td)
1255177595Sweongyo{
1256177595Sweongyo	int error = 0;
1257177595Sweongyo
1258177595Sweongyo	/*
1259177595Sweongyo	 * RFC 1813 3.3.21: if count is 0, a flush from offset to the end of
1260177595Sweongyo	 * file is done.  At this time VOP_FSYNC does not accept offset and
1261177595Sweongyo	 * byte count parameters so call VOP_FSYNC the whole file for now.
1262199559Sjhb	 * The same is true for NFSv4: RFC 3530 Sec. 14.2.3.
1263177595Sweongyo	 */
1264177595Sweongyo	if (cnt == 0 || cnt > MAX_COMMIT_COUNT) {
1265177595Sweongyo		/*
1266177595Sweongyo		 * Give up and do the whole thing
1267177595Sweongyo		 */
1268177595Sweongyo		if (vp->v_object &&
1269177595Sweongyo		   (vp->v_object->flags & OBJ_MIGHTBEDIRTY)) {
1270177595Sweongyo			VM_OBJECT_LOCK(vp->v_object);
1271177595Sweongyo			vm_object_page_clean(vp->v_object, 0, 0, OBJPC_SYNC);
1272177595Sweongyo			VM_OBJECT_UNLOCK(vp->v_object);
1273178354Ssam		}
1274177595Sweongyo		error = VOP_FSYNC(vp, MNT_WAIT, td);
1275177595Sweongyo	} else {
1276178354Ssam		/*
1277177595Sweongyo		 * Locate and synchronously write any buffers that fall
1278177595Sweongyo		 * into the requested range.  Note:  we are assuming that
1279177595Sweongyo		 * f_iosize is a power of 2.
1280177595Sweongyo		 */
1281177595Sweongyo		int iosize = vp->v_mount->mnt_stat.f_iosize;
1282178354Ssam		int iomask = iosize - 1;
1283178354Ssam		struct bufobj *bo;
1284178354Ssam		daddr_t lblkno;
1285178354Ssam
1286178354Ssam		/*
1287178354Ssam		 * Align to iosize boundry, super-align to page boundry.
1288178354Ssam		 */
1289178354Ssam		if (off & iomask) {
1290178354Ssam			cnt += off & iomask;
1291178354Ssam			off &= ~(u_quad_t)iomask;
1292178354Ssam		}
1293178354Ssam		if (off & PAGE_MASK) {
1294178354Ssam			cnt += off & PAGE_MASK;
1295177595Sweongyo			off &= ~(u_quad_t)PAGE_MASK;
1296177595Sweongyo		}
1297177595Sweongyo		lblkno = off / iosize;
1298177595Sweongyo
1299177595Sweongyo		if (vp->v_object &&
1300177595Sweongyo		   (vp->v_object->flags & OBJ_MIGHTBEDIRTY)) {
1301177595Sweongyo			VM_OBJECT_LOCK(vp->v_object);
1302177595Sweongyo			vm_object_page_clean(vp->v_object, off, off + cnt,
1303177595Sweongyo			    OBJPC_SYNC);
1304177595Sweongyo			VM_OBJECT_UNLOCK(vp->v_object);
1305177595Sweongyo		}
1306177595Sweongyo
1307177595Sweongyo		bo = &vp->v_bufobj;
1308177595Sweongyo		BO_LOCK(bo);
1309177595Sweongyo		while (cnt > 0) {
1310177595Sweongyo			struct buf *bp;
1311177595Sweongyo
1312177595Sweongyo			/*
1313177595Sweongyo			 * If we have a buffer and it is marked B_DELWRI we
1314177595Sweongyo			 * have to lock and write it.  Otherwise the prior
1315177595Sweongyo			 * write is assumed to have already been committed.
1316177595Sweongyo			 *
1317177595Sweongyo			 * gbincore() can return invalid buffers now so we
1318177595Sweongyo			 * have to check that bit as well (though B_DELWRI
1319177595Sweongyo			 * should not be set if B_INVAL is set there could be
1320177595Sweongyo			 * a race here since we haven't locked the buffer).
1321177595Sweongyo			 */
1322177595Sweongyo			if ((bp = gbincore(&vp->v_bufobj, lblkno)) != NULL) {
1323177595Sweongyo				if (BUF_LOCK(bp, LK_EXCLUSIVE | LK_SLEEPFAIL |
1324177595Sweongyo				    LK_INTERLOCK, BO_MTX(bo)) == ENOLCK) {
1325177595Sweongyo					BO_LOCK(bo);
1326177595Sweongyo					continue; /* retry */
1327177595Sweongyo				}
1328177595Sweongyo			    	if ((bp->b_flags & (B_DELWRI|B_INVAL)) ==
1329177595Sweongyo				    B_DELWRI) {
1330177595Sweongyo					bremfree(bp);
1331177595Sweongyo					bp->b_flags &= ~B_ASYNC;
1332177595Sweongyo					bwrite(bp);
1333177595Sweongyo					++nfs_commit_miss;
1334177595Sweongyo				} else
1335177595Sweongyo					BUF_UNLOCK(bp);
1336177595Sweongyo				BO_LOCK(bo);
1337177595Sweongyo			}
1338177595Sweongyo			++nfs_commit_blks;
1339177595Sweongyo			if (cnt < iosize)
1340177595Sweongyo				break;
1341199559Sjhb			cnt -= iosize;
1342177595Sweongyo			++lblkno;
1343199559Sjhb		}
1344199559Sjhb		BO_UNLOCK(bo);
1345177595Sweongyo	}
1346199559Sjhb	NFSEXITCODE(error);
1347199559Sjhb	return (error);
1348199559Sjhb}
1349199559Sjhb
1350199559Sjhb/*
1351199559Sjhb * Statfs vnode op.
1352177595Sweongyo */
1353177595Sweongyoint
1354177595Sweongyonfsvno_statfs(struct vnode *vp, struct statfs *sf)
1355177595Sweongyo{
1356177595Sweongyo	int error;
1357177595Sweongyo
1358177595Sweongyo	error = VFS_STATFS(vp->v_mount, sf);
1359177595Sweongyo	if (error == 0) {
1360177595Sweongyo		/*
1361177595Sweongyo		 * Since NFS handles these values as unsigned on the
1362177595Sweongyo		 * wire, there is no way to represent negative values,
1363177595Sweongyo		 * so set them to 0. Without this, they will appear
1364177595Sweongyo		 * to be very large positive values for clients like
1365177595Sweongyo		 * Solaris10.
1366178354Ssam		 */
1367178354Ssam		if (sf->f_bavail < 0)
1368177595Sweongyo			sf->f_bavail = 0;
1369177595Sweongyo		if (sf->f_ffree < 0)
1370177595Sweongyo			sf->f_ffree = 0;
1371177595Sweongyo	}
1372177595Sweongyo	NFSEXITCODE(error);
1373177595Sweongyo	return (error);
1374177595Sweongyo}
1375177595Sweongyo
1376177595Sweongyo/*
1377177595Sweongyo * Do the vnode op stuff for Open. Similar to nfsvno_createsub(), but
1378177595Sweongyo * must handle nfsrv_opencheck() calls after any other access checks.
1379177595Sweongyo */
1380177595Sweongyovoid
1381177595Sweongyonfsvno_open(struct nfsrv_descript *nd, struct nameidata *ndp,
1382177595Sweongyo    nfsquad_t clientid, nfsv4stateid_t *stateidp, struct nfsstate *stp,
1383177595Sweongyo    int *exclusive_flagp, struct nfsvattr *nvap, int32_t *cverf, int create,
1384177595Sweongyo    NFSACL_T *aclp, nfsattrbit_t *attrbitp, struct ucred *cred, struct thread *p,
1385177595Sweongyo    struct nfsexstuff *exp, struct vnode **vpp)
1386177595Sweongyo{
1387177595Sweongyo	struct vnode *vp = NULL;
1388177595Sweongyo	u_quad_t tempsize;
1389177595Sweongyo	struct nfsexstuff nes;
1390177595Sweongyo
1391177595Sweongyo	if (ndp->ni_vp == NULL)
1392177595Sweongyo		nd->nd_repstat = nfsrv_opencheck(clientid,
1393177595Sweongyo		    stateidp, stp, NULL, nd, p, nd->nd_repstat);
1394177595Sweongyo	if (!nd->nd_repstat) {
1395177595Sweongyo		if (ndp->ni_vp == NULL) {
1396177595Sweongyo			vrele(ndp->ni_startdir);
1397177595Sweongyo			nd->nd_repstat = VOP_CREATE(ndp->ni_dvp,
1398243857Sglebius			    &ndp->ni_vp, &ndp->ni_cnd, &nvap->na_vattr);
1399177595Sweongyo			vput(ndp->ni_dvp);
1400177595Sweongyo			nfsvno_relpathbuf(ndp);
1401177595Sweongyo			if (!nd->nd_repstat) {
1402177595Sweongyo				if (*exclusive_flagp) {
1403177595Sweongyo					*exclusive_flagp = 0;
1404177595Sweongyo					NFSVNO_ATTRINIT(nvap);
1405177595Sweongyo					nvap->na_atime.tv_sec = cverf[0];
1406177595Sweongyo					nvap->na_atime.tv_nsec = cverf[1];
1407177595Sweongyo					nd->nd_repstat = VOP_SETATTR(ndp->ni_vp,
1408177595Sweongyo					    &nvap->na_vattr, cred);
1409177595Sweongyo				} else {
1410177595Sweongyo					nfsrv_fixattr(nd, ndp->ni_vp, nvap,
1411177595Sweongyo					    aclp, p, attrbitp, exp);
1412177595Sweongyo				}
1413177595Sweongyo			}
1414177595Sweongyo			vp = ndp->ni_vp;
1415177595Sweongyo		} else {
1416177595Sweongyo			if (ndp->ni_startdir)
1417177595Sweongyo				vrele(ndp->ni_startdir);
1418177595Sweongyo			nfsvno_relpathbuf(ndp);
1419177595Sweongyo			vp = ndp->ni_vp;
1420177595Sweongyo			if (create == NFSV4OPEN_CREATE) {
1421177595Sweongyo				if (ndp->ni_dvp == vp)
1422177595Sweongyo					vrele(ndp->ni_dvp);
1423177595Sweongyo				else
1424177595Sweongyo					vput(ndp->ni_dvp);
1425177595Sweongyo			}
1426177595Sweongyo			if (NFSVNO_ISSETSIZE(nvap) && vp->v_type == VREG) {
1427177595Sweongyo				if (ndp->ni_cnd.cn_flags & RDONLY)
1428177595Sweongyo					NFSVNO_SETEXRDONLY(&nes);
1429177595Sweongyo				else
1430177595Sweongyo					NFSVNO_EXINIT(&nes);
1431177595Sweongyo				nd->nd_repstat = nfsvno_accchk(vp,
1432177595Sweongyo				    VWRITE, cred, &nes, p,
1433177595Sweongyo				    NFSACCCHK_NOOVERRIDE,
1434177595Sweongyo				    NFSACCCHK_VPISLOCKED, NULL);
1435177595Sweongyo				nd->nd_repstat = nfsrv_opencheck(clientid,
1436177595Sweongyo				    stateidp, stp, vp, nd, p, nd->nd_repstat);
1437177595Sweongyo				if (!nd->nd_repstat) {
1438177595Sweongyo					tempsize = nvap->na_size;
1439177595Sweongyo					NFSVNO_ATTRINIT(nvap);
1440177595Sweongyo					nvap->na_size = tempsize;
1441177595Sweongyo					nd->nd_repstat = VOP_SETATTR(vp,
1442177595Sweongyo					    &nvap->na_vattr, cred);
1443177595Sweongyo				}
1444177595Sweongyo			} else if (vp->v_type == VREG) {
1445177595Sweongyo				nd->nd_repstat = nfsrv_opencheck(clientid,
1446177595Sweongyo				    stateidp, stp, vp, nd, p, nd->nd_repstat);
1447177595Sweongyo			}
1448177595Sweongyo		}
1449177595Sweongyo	} else {
1450177595Sweongyo		if (ndp->ni_cnd.cn_flags & HASBUF)
1451177595Sweongyo			nfsvno_relpathbuf(ndp);
1452177595Sweongyo		if (ndp->ni_startdir && create == NFSV4OPEN_CREATE) {
1453177595Sweongyo			vrele(ndp->ni_startdir);
1454177595Sweongyo			if (ndp->ni_dvp == ndp->ni_vp)
1455177595Sweongyo				vrele(ndp->ni_dvp);
1456177595Sweongyo			else
1457177595Sweongyo				vput(ndp->ni_dvp);
1458177595Sweongyo			if (ndp->ni_vp)
1459177595Sweongyo				vput(ndp->ni_vp);
1460177595Sweongyo		}
1461177595Sweongyo	}
1462177595Sweongyo	*vpp = vp;
1463177595Sweongyo
1464177595Sweongyo	NFSEXITCODE2(0, nd);
1465177595Sweongyo}
1466177595Sweongyo
1467177595Sweongyo/*
1468177595Sweongyo * Updates the file rev and sets the mtime and ctime
1469177595Sweongyo * to the current clock time, returning the va_filerev and va_Xtime
1470177595Sweongyo * values.
1471177595Sweongyo */
1472177595Sweongyovoid
1473177595Sweongyonfsvno_updfilerev(struct vnode *vp, struct nfsvattr *nvap,
1474177595Sweongyo    struct ucred *cred, struct thread *p)
1475177595Sweongyo{
1476177595Sweongyo	struct vattr va;
1477177595Sweongyo
1478177595Sweongyo	VATTR_NULL(&va);
1479177595Sweongyo	getnanotime(&va.va_mtime);
1480177595Sweongyo	(void) VOP_SETATTR(vp, &va, cred);
1481177595Sweongyo	(void) nfsvno_getattr(vp, nvap, cred, p, 1);
1482177595Sweongyo}
1483177595Sweongyo
1484177595Sweongyo/*
1485177595Sweongyo * Glue routine to nfsv4_fillattr().
1486177595Sweongyo */
1487177595Sweongyoint
1488177595Sweongyonfsvno_fillattr(struct nfsrv_descript *nd, struct mount *mp, struct vnode *vp,
1489177595Sweongyo    struct nfsvattr *nvap, fhandle_t *fhp, int rderror, nfsattrbit_t *attrbitp,
1490177595Sweongyo    struct ucred *cred, struct thread *p, int isdgram, int reterr,
1491177595Sweongyo    int supports_nfsv4acls, int at_root, uint64_t mounted_on_fileno)
1492177595Sweongyo{
1493177595Sweongyo	int error;
1494177595Sweongyo
1495177595Sweongyo	error = nfsv4_fillattr(nd, mp, vp, NULL, &nvap->na_vattr, fhp, rderror,
1496178354Ssam	    attrbitp, cred, p, isdgram, reterr, supports_nfsv4acls, at_root,
1497177595Sweongyo	    mounted_on_fileno);
1498177595Sweongyo	NFSEXITCODE2(0, nd);
1499177595Sweongyo	return (error);
1500177595Sweongyo}
1501177595Sweongyo
1502177595Sweongyo/* Since the Readdir vnode ops vary, put the entire functions in here. */
1503177595Sweongyo/*
1504177595Sweongyo * nfs readdir service
1505178354Ssam * - mallocs what it thinks is enough to read
1506177595Sweongyo *	count rounded up to a multiple of DIRBLKSIZ <= NFS_MAXREADDIR
1507177595Sweongyo * - calls VOP_READDIR()
1508177595Sweongyo * - loops around building the reply
1509177595Sweongyo *	if the output generated exceeds count break out of loop
1510177595Sweongyo *	The NFSM_CLGET macro is used here so that the reply will be packed
1511177595Sweongyo *	tightly in mbuf clusters.
1512177595Sweongyo * - it trims out records with d_fileno == 0
1513177595Sweongyo *	this doesn't matter for Unix clients, but they might confuse clients
1514177595Sweongyo *	for other os'.
1515177595Sweongyo * - it trims out records with d_type == DT_WHT
1516177595Sweongyo *	these cannot be seen through NFS (unless we extend the protocol)
1517177595Sweongyo *     The alternate call nfsrvd_readdirplus() does lookups as well.
1518178354Ssam * PS: The NFS protocol spec. does not clarify what the "count" byte
1519177595Sweongyo *	argument is a count of.. just name strings and file id's or the
1520177595Sweongyo *	entire reply rpc or ...
1521177595Sweongyo *	I tried just file name and id sizes and it confused the Sun client,
1522177595Sweongyo *	so I am using the full rpc size now. The "paranoia.." comment refers
1523177595Sweongyo *	to including the status longwords that are not a part of the dir.
1524177595Sweongyo *	"entry" structures, but are in the rpc.
1525177595Sweongyo */
1526177595Sweongyoint
1527177595Sweongyonfsrvd_readdir(struct nfsrv_descript *nd, int isdgram,
1528178354Ssam    struct vnode *vp, struct thread *p, struct nfsexstuff *exp)
1529177595Sweongyo{
1530177595Sweongyo	struct dirent *dp;
1531177595Sweongyo	u_int32_t *tl;
1532177595Sweongyo	int dirlen;
1533177595Sweongyo	char *cpos, *cend, *rbuf;
1534177595Sweongyo	struct nfsvattr at;
1535177595Sweongyo	int nlen, error = 0, getret = 1;
1536177595Sweongyo	int siz, cnt, fullsiz, eofflag, ncookies;
1537177595Sweongyo	u_int64_t off, toff, verf;
1538177595Sweongyo	u_long *cookies = NULL, *cookiep;
1539177595Sweongyo	struct uio io;
1540177595Sweongyo	struct iovec iv;
1541177595Sweongyo	int not_zfs;
1542177595Sweongyo
1543177595Sweongyo	if (nd->nd_repstat) {
1544177595Sweongyo		nfsrv_postopattr(nd, getret, &at);
1545199559Sjhb		goto out;
1546178354Ssam	}
1547177595Sweongyo	if (nd->nd_flag & ND_NFSV2) {
1548178354Ssam		NFSM_DISSECT(tl, u_int32_t *, 2 * NFSX_UNSIGNED);
1549178354Ssam		off = fxdr_unsigned(u_quad_t, *tl++);
1550178354Ssam	} else {
1551178354Ssam		NFSM_DISSECT(tl, u_int32_t *, 5 * NFSX_UNSIGNED);
1552178354Ssam		off = fxdr_hyper(tl);
1553178354Ssam		tl += 2;
1554178354Ssam		verf = fxdr_hyper(tl);
1555178354Ssam		tl += 2;
1556178354Ssam	}
1557177595Sweongyo	toff = off;
1558178354Ssam	cnt = fxdr_unsigned(int, *tl);
1559178354Ssam	if (cnt > NFS_SRVMAXDATA(nd) || cnt < 0)
1560177595Sweongyo		cnt = NFS_SRVMAXDATA(nd);
1561177595Sweongyo	siz = ((cnt + DIRBLKSIZ - 1) & ~(DIRBLKSIZ - 1));
1562177595Sweongyo	fullsiz = siz;
1563178354Ssam	if (nd->nd_flag & ND_NFSV3) {
1564178354Ssam		nd->nd_repstat = getret = nfsvno_getattr(vp, &at, nd->nd_cred,
1565177595Sweongyo		    p, 1);
1566177595Sweongyo#if 0
1567177595Sweongyo		/*
1568177595Sweongyo		 * va_filerev is not sufficient as a cookie verifier,
1569177595Sweongyo		 * since it is not supposed to change when entries are
1570177595Sweongyo		 * removed/added unless that offset cookies returned to
1571177595Sweongyo		 * the client are no longer valid.
1572177595Sweongyo		 */
1573178354Ssam		if (!nd->nd_repstat && toff && verf != at.na_filerev)
1574178354Ssam			nd->nd_repstat = NFSERR_BAD_COOKIE;
1575177595Sweongyo#endif
1576177595Sweongyo	}
1577177595Sweongyo	if (nd->nd_repstat == 0 && cnt == 0) {
1578177595Sweongyo		if (nd->nd_flag & ND_NFSV2)
1579177595Sweongyo			/* NFSv2 does not have NFSERR_TOOSMALL */
1580177595Sweongyo			nd->nd_repstat = EPERM;
1581177595Sweongyo		else
1582177595Sweongyo			nd->nd_repstat = NFSERR_TOOSMALL;
1583177595Sweongyo	}
1584177595Sweongyo	if (!nd->nd_repstat)
1585177595Sweongyo		nd->nd_repstat = nfsvno_accchk(vp, VEXEC,
1586177595Sweongyo		    nd->nd_cred, exp, p, NFSACCCHK_NOOVERRIDE,
1587195049Srwatson		    NFSACCCHK_VPISLOCKED, NULL);
1588177595Sweongyo	if (nd->nd_repstat) {
1589177595Sweongyo		vput(vp);
1590177595Sweongyo		if (nd->nd_flag & ND_NFSV3)
1591177595Sweongyo			nfsrv_postopattr(nd, getret, &at);
1592177595Sweongyo		goto out;
1593177595Sweongyo	}
1594195049Srwatson	not_zfs = strcmp(vp->v_mount->mnt_vfc->vfc_name, "zfs");
1595177595Sweongyo	MALLOC(rbuf, caddr_t, siz, M_TEMP, M_WAITOK);
1596177595Sweongyoagain:
1597177595Sweongyo	eofflag = 0;
1598177595Sweongyo	if (cookies) {
1599177595Sweongyo		free((caddr_t)cookies, M_TEMP);
1600177595Sweongyo		cookies = NULL;
1601177595Sweongyo	}
1602195049Srwatson
1603177595Sweongyo	iv.iov_base = rbuf;
1604177595Sweongyo	iv.iov_len = siz;
1605177595Sweongyo	io.uio_iov = &iv;
1606177595Sweongyo	io.uio_iovcnt = 1;
1607177595Sweongyo	io.uio_offset = (off_t)off;
1608177595Sweongyo	io.uio_resid = siz;
1609177595Sweongyo	io.uio_segflg = UIO_SYSSPACE;
1610177595Sweongyo	io.uio_rw = UIO_READ;
1611177595Sweongyo	io.uio_td = NULL;
1612177595Sweongyo	nd->nd_repstat = VOP_READDIR(vp, &io, nd->nd_cred, &eofflag, &ncookies,
1613177595Sweongyo	    &cookies);
1614177595Sweongyo	off = (u_int64_t)io.uio_offset;
1615177595Sweongyo	if (io.uio_resid)
1616177595Sweongyo		siz -= io.uio_resid;
1617178354Ssam
1618178354Ssam	if (!cookies && !nd->nd_repstat)
1619177595Sweongyo		nd->nd_repstat = NFSERR_PERM;
1620177595Sweongyo	if (nd->nd_flag & ND_NFSV3) {
1621177595Sweongyo		getret = nfsvno_getattr(vp, &at, nd->nd_cred, p, 1);
1622177595Sweongyo		if (!nd->nd_repstat)
1623177595Sweongyo			nd->nd_repstat = getret;
1624177595Sweongyo	}
1625177595Sweongyo
1626177595Sweongyo	/*
1627177595Sweongyo	 * Handles the failed cases. nd->nd_repstat == 0 past here.
1628177595Sweongyo	 */
1629177595Sweongyo	if (nd->nd_repstat) {
1630177595Sweongyo		vput(vp);
1631177595Sweongyo		free((caddr_t)rbuf, M_TEMP);
1632177595Sweongyo		if (cookies)
1633177595Sweongyo			free((caddr_t)cookies, M_TEMP);
1634177595Sweongyo		if (nd->nd_flag & ND_NFSV3)
1635177595Sweongyo			nfsrv_postopattr(nd, getret, &at);
1636177595Sweongyo		goto out;
1637177595Sweongyo	}
1638177595Sweongyo	/*
1639177595Sweongyo	 * If nothing read, return eof
1640177595Sweongyo	 * rpc reply
1641177595Sweongyo	 */
1642177595Sweongyo	if (siz == 0) {
1643177595Sweongyo		vput(vp);
1644177595Sweongyo		if (nd->nd_flag & ND_NFSV2) {
1645177595Sweongyo			NFSM_BUILD(tl, u_int32_t *, 2 * NFSX_UNSIGNED);
1646177595Sweongyo		} else {
1647177595Sweongyo			nfsrv_postopattr(nd, getret, &at);
1648177595Sweongyo			NFSM_BUILD(tl, u_int32_t *, 4 * NFSX_UNSIGNED);
1649177595Sweongyo			txdr_hyper(at.na_filerev, tl);
1650177595Sweongyo			tl += 2;
1651177595Sweongyo		}
1652177595Sweongyo		*tl++ = newnfs_false;
1653177595Sweongyo		*tl = newnfs_true;
1654177595Sweongyo		FREE((caddr_t)rbuf, M_TEMP);
1655177595Sweongyo		FREE((caddr_t)cookies, M_TEMP);
1656178354Ssam		goto out;
1657178354Ssam	}
1658177595Sweongyo
1659177595Sweongyo	/*
1660177595Sweongyo	 * Check for degenerate cases of nothing useful read.
1661178354Ssam	 * If so go try again
1662177595Sweongyo	 */
1663177595Sweongyo	cpos = rbuf;
1664177595Sweongyo	cend = rbuf + siz;
1665177595Sweongyo	dp = (struct dirent *)cpos;
1666177595Sweongyo	cookiep = cookies;
1667177595Sweongyo
1668177595Sweongyo	/*
1669177595Sweongyo	 * For some reason FreeBSD's ufs_readdir() chooses to back the
1670177595Sweongyo	 * directory offset up to a block boundary, so it is necessary to
1671177595Sweongyo	 * skip over the records that precede the requested offset. This
1672177595Sweongyo	 * requires the assumption that file offset cookies monotonically
1673177595Sweongyo	 * increase.
1674177595Sweongyo	 * Since the offset cookies don't monotonically increase for ZFS,
1675177595Sweongyo	 * this is not done when ZFS is the file system.
1676177595Sweongyo	 */
1677177595Sweongyo	while (cpos < cend && ncookies > 0 &&
1678177595Sweongyo	    (dp->d_fileno == 0 || dp->d_type == DT_WHT ||
1679177595Sweongyo	     (not_zfs != 0 && ((u_quad_t)(*cookiep)) <= toff))) {
1680177595Sweongyo		cpos += dp->d_reclen;
1681177595Sweongyo		dp = (struct dirent *)cpos;
1682177595Sweongyo		cookiep++;
1683177595Sweongyo		ncookies--;
1684177595Sweongyo	}
1685177595Sweongyo	if (cpos >= cend || ncookies == 0) {
1686177595Sweongyo		siz = fullsiz;
1687177595Sweongyo		toff = off;
1688177595Sweongyo		goto again;
1689178354Ssam	}
1690177595Sweongyo	vput(vp);
1691177595Sweongyo
1692177595Sweongyo	/*
1693177595Sweongyo	 * dirlen is the size of the reply, including all XDR and must
1694177595Sweongyo	 * not exceed cnt. For NFSv2, RFC1094 didn't clearly indicate
1695177595Sweongyo	 * if the XDR should be included in "count", but to be safe, we do.
1696177595Sweongyo	 * (Include the two booleans at the end of the reply in dirlen now.)
1697177595Sweongyo	 */
1698177595Sweongyo	if (nd->nd_flag & ND_NFSV3) {
1699177595Sweongyo		nfsrv_postopattr(nd, getret, &at);
1700177595Sweongyo		NFSM_BUILD(tl, u_int32_t *, 2 * NFSX_UNSIGNED);
1701177595Sweongyo		txdr_hyper(at.na_filerev, tl);
1702177595Sweongyo		dirlen = NFSX_V3POSTOPATTR + NFSX_VERF + 2 * NFSX_UNSIGNED;
1703177595Sweongyo	} else {
1704177595Sweongyo		dirlen = 2 * NFSX_UNSIGNED;
1705177595Sweongyo	}
1706177595Sweongyo
1707177595Sweongyo	/* Loop through the records and build reply */
1708177595Sweongyo	while (cpos < cend && ncookies > 0) {
1709199559Sjhb		nlen = dp->d_namlen;
1710199559Sjhb		if (dp->d_fileno != 0 && dp->d_type != DT_WHT &&
1711178354Ssam			nlen <= NFS_MAXNAMLEN) {
1712178354Ssam			if (nd->nd_flag & ND_NFSV3)
1713178354Ssam				dirlen += (6*NFSX_UNSIGNED + NFSM_RNDUP(nlen));
1714178354Ssam			else
1715177595Sweongyo				dirlen += (4*NFSX_UNSIGNED + NFSM_RNDUP(nlen));
1716177595Sweongyo			if (dirlen > cnt) {
1717177595Sweongyo				eofflag = 0;
1718177595Sweongyo				break;
1719177595Sweongyo			}
1720177595Sweongyo
1721177595Sweongyo			/*
1722177595Sweongyo			 * Build the directory record xdr from
1723177595Sweongyo			 * the dirent entry.
1724177595Sweongyo			 */
1725177595Sweongyo			if (nd->nd_flag & ND_NFSV3) {
1726177595Sweongyo				NFSM_BUILD(tl, u_int32_t *, 3 * NFSX_UNSIGNED);
1727178354Ssam				*tl++ = newnfs_true;
1728178354Ssam				*tl++ = 0;
1729178354Ssam			} else {
1730177595Sweongyo				NFSM_BUILD(tl, u_int32_t *, 2 * NFSX_UNSIGNED);
1731177595Sweongyo				*tl++ = newnfs_true;
1732177595Sweongyo			}
1733177595Sweongyo			*tl = txdr_unsigned(dp->d_fileno);
1734177595Sweongyo			(void) nfsm_strtom(nd, dp->d_name, nlen);
1735177595Sweongyo			if (nd->nd_flag & ND_NFSV3) {
1736177595Sweongyo				NFSM_BUILD(tl, u_int32_t *, 2 * NFSX_UNSIGNED);
1737177595Sweongyo				*tl++ = 0;
1738177595Sweongyo			} else
1739177595Sweongyo				NFSM_BUILD(tl, u_int32_t *, NFSX_UNSIGNED);
1740177595Sweongyo			*tl = txdr_unsigned(*cookiep);
1741177595Sweongyo		}
1742177595Sweongyo		cpos += dp->d_reclen;
1743177595Sweongyo		dp = (struct dirent *)cpos;
1744177595Sweongyo		cookiep++;
1745177595Sweongyo		ncookies--;
1746177595Sweongyo	}
1747177595Sweongyo	if (cpos < cend)
1748177595Sweongyo		eofflag = 0;
1749177595Sweongyo	NFSM_BUILD(tl, u_int32_t *, 2 * NFSX_UNSIGNED);
1750177595Sweongyo	*tl++ = newnfs_false;
1751178354Ssam	if (eofflag)
1752178354Ssam		*tl = newnfs_true;
1753178354Ssam	else
1754178354Ssam		*tl = newnfs_false;
1755177595Sweongyo	FREE((caddr_t)rbuf, M_TEMP);
1756177595Sweongyo	FREE((caddr_t)cookies, M_TEMP);
1757177595Sweongyo
1758178354Ssamout:
1759178354Ssam	NFSEXITCODE2(0, nd);
1760178354Ssam	return (0);
1761177595Sweongyonfsmout:
1762177595Sweongyo	vput(vp);
1763178354Ssam	NFSEXITCODE2(error, nd);
1764177595Sweongyo	return (error);
1765177595Sweongyo}
1766177595Sweongyo
1767177595Sweongyo/*
1768178354Ssam * Readdirplus for V3 and Readdir for V4.
1769178354Ssam */
1770177595Sweongyoint
1771177595Sweongyonfsrvd_readdirplus(struct nfsrv_descript *nd, int isdgram,
1772177595Sweongyo    struct vnode *vp, struct thread *p, struct nfsexstuff *exp)
1773177595Sweongyo{
1774177595Sweongyo	struct dirent *dp;
1775177595Sweongyo	u_int32_t *tl;
1776177595Sweongyo	int dirlen;
1777177595Sweongyo	char *cpos, *cend, *rbuf;
1778177595Sweongyo	struct vnode *nvp;
1779177595Sweongyo	fhandle_t nfh;
1780177595Sweongyo	struct nfsvattr nva, at, *nvap = &nva;
1781177595Sweongyo	struct mbuf *mb0, *mb1;
1782177595Sweongyo	struct nfsreferral *refp;
1783177595Sweongyo	int nlen, r, error = 0, getret = 1, usevget = 1;
1784178354Ssam	int siz, cnt, fullsiz, eofflag, ncookies, entrycnt;
1785177595Sweongyo	caddr_t bpos0, bpos1;
1786177595Sweongyo	u_int64_t off, toff, verf;
1787177595Sweongyo	u_long *cookies = NULL, *cookiep;
1788177595Sweongyo	nfsattrbit_t attrbits, rderrbits, savbits;
1789177595Sweongyo	struct uio io;
1790177595Sweongyo	struct iovec iv;
1791177595Sweongyo	struct componentname cn;
1792177595Sweongyo	int at_root, needs_unbusy, not_zfs, supports_nfsv4acls;
1793177595Sweongyo	struct mount *mp, *new_mp;
1794177595Sweongyo	uint64_t mounted_on_fileno;
1795177595Sweongyo
1796177595Sweongyo	if (nd->nd_repstat) {
1797177595Sweongyo		nfsrv_postopattr(nd, getret, &at);
1798177595Sweongyo		goto out;
1799177595Sweongyo	}
1800177595Sweongyo	NFSM_DISSECT(tl, u_int32_t *, 6 * NFSX_UNSIGNED);
1801177595Sweongyo	off = fxdr_hyper(tl);
1802177595Sweongyo	toff = off;
1803177595Sweongyo	tl += 2;
1804177595Sweongyo	verf = fxdr_hyper(tl);
1805177595Sweongyo	tl += 2;
1806177595Sweongyo	siz = fxdr_unsigned(int, *tl++);
1807177595Sweongyo	cnt = fxdr_unsigned(int, *tl);
1808178354Ssam
1809177595Sweongyo	/*
1810178354Ssam	 * Use the server's maximum data transfer size as the upper bound
1811178354Ssam	 * on reply datalen.
1812177595Sweongyo	 */
1813177595Sweongyo	if (cnt > NFS_SRVMAXDATA(nd) || cnt < 0)
1814177595Sweongyo		cnt = NFS_SRVMAXDATA(nd);
1815177595Sweongyo
1816178354Ssam	/*
1817177595Sweongyo	 * siz is a "hint" of how much directory information (name, fileid,
1818177595Sweongyo	 * cookie) should be in the reply. At least one client "hints" 0,
1819177595Sweongyo	 * so I set it to cnt for that case. I also round it up to the
1820178354Ssam	 * next multiple of DIRBLKSIZ.
1821177595Sweongyo	 */
1822178354Ssam	if (siz <= 0)
1823178354Ssam		siz = cnt;
1824178354Ssam	siz = ((siz + DIRBLKSIZ - 1) & ~(DIRBLKSIZ - 1));
1825178354Ssam
1826178354Ssam	if (nd->nd_flag & ND_NFSV4) {
1827178354Ssam		error = nfsrv_getattrbits(nd, &attrbits, NULL, NULL);
1828178354Ssam		if (error)
1829178354Ssam			goto nfsmout;
1830178354Ssam		NFSSET_ATTRBIT(&savbits, &attrbits);
1831177595Sweongyo		NFSCLRNOTFILLABLE_ATTRBIT(&attrbits);
1832178354Ssam		NFSZERO_ATTRBIT(&rderrbits);
1833178354Ssam		NFSSETBIT_ATTRBIT(&rderrbits, NFSATTRBIT_RDATTRERROR);
1834178354Ssam	} else {
1835177595Sweongyo		NFSZERO_ATTRBIT(&attrbits);
1836178354Ssam	}
1837178354Ssam	fullsiz = siz;
1838177595Sweongyo	nd->nd_repstat = getret = nfsvno_getattr(vp, &at, nd->nd_cred, p, 1);
1839178354Ssam	if (!nd->nd_repstat) {
1840178354Ssam	    if (off && verf != at.na_filerev) {
1841178354Ssam		/*
1842178354Ssam		 * va_filerev is not sufficient as a cookie verifier,
1843178354Ssam		 * since it is not supposed to change when entries are
1844178354Ssam		 * removed/added unless that offset cookies returned to
1845178354Ssam		 * the client are no longer valid.
1846177595Sweongyo		 */
1847178354Ssam#if 0
1848177595Sweongyo		if (nd->nd_flag & ND_NFSV4) {
1849177595Sweongyo			nd->nd_repstat = NFSERR_NOTSAME;
1850177595Sweongyo		} else {
1851177595Sweongyo			nd->nd_repstat = NFSERR_BAD_COOKIE;
1852177595Sweongyo		}
1853177595Sweongyo#endif
1854177595Sweongyo	    } else if ((nd->nd_flag & ND_NFSV4) && off == 0 && verf != 0) {
1855177595Sweongyo		nd->nd_repstat = NFSERR_BAD_COOKIE;
1856177595Sweongyo	    }
1857177595Sweongyo	}
1858177595Sweongyo	if (!nd->nd_repstat && vp->v_type != VDIR)
1859177595Sweongyo		nd->nd_repstat = NFSERR_NOTDIR;
1860177595Sweongyo	if (!nd->nd_repstat && cnt == 0)
1861177595Sweongyo		nd->nd_repstat = NFSERR_TOOSMALL;
1862177595Sweongyo	if (!nd->nd_repstat)
1863177595Sweongyo		nd->nd_repstat = nfsvno_accchk(vp, VEXEC,
1864177595Sweongyo		    nd->nd_cred, exp, p, NFSACCCHK_NOOVERRIDE,
1865177595Sweongyo		    NFSACCCHK_VPISLOCKED, NULL);
1866177595Sweongyo	if (nd->nd_repstat) {
1867177595Sweongyo		vput(vp);
1868177595Sweongyo		if (nd->nd_flag & ND_NFSV3)
1869177595Sweongyo			nfsrv_postopattr(nd, getret, &at);
1870177595Sweongyo		goto out;
1871177595Sweongyo	}
1872177595Sweongyo	not_zfs = strcmp(vp->v_mount->mnt_vfc->vfc_name, "zfs");
1873177595Sweongyo
1874177595Sweongyo	MALLOC(rbuf, caddr_t, siz, M_TEMP, M_WAITOK);
1875177595Sweongyoagain:
1876177595Sweongyo	eofflag = 0;
1877177595Sweongyo	if (cookies) {
1878177595Sweongyo		free((caddr_t)cookies, M_TEMP);
1879177595Sweongyo		cookies = NULL;
1880177595Sweongyo	}
1881177595Sweongyo
1882177595Sweongyo	iv.iov_base = rbuf;
1883177595Sweongyo	iv.iov_len = siz;
1884177595Sweongyo	io.uio_iov = &iv;
1885177595Sweongyo	io.uio_iovcnt = 1;
1886177595Sweongyo	io.uio_offset = (off_t)off;
1887177595Sweongyo	io.uio_resid = siz;
1888177595Sweongyo	io.uio_segflg = UIO_SYSSPACE;
1889177595Sweongyo	io.uio_rw = UIO_READ;
1890177595Sweongyo	io.uio_td = NULL;
1891177595Sweongyo	nd->nd_repstat = VOP_READDIR(vp, &io, nd->nd_cred, &eofflag, &ncookies,
1892177595Sweongyo	    &cookies);
1893177595Sweongyo	off = (u_int64_t)io.uio_offset;
1894177595Sweongyo	if (io.uio_resid)
1895177595Sweongyo		siz -= io.uio_resid;
1896177595Sweongyo
1897177595Sweongyo	getret = nfsvno_getattr(vp, &at, nd->nd_cred, p, 1);
1898177595Sweongyo
1899177595Sweongyo	if (!cookies && !nd->nd_repstat)
1900177595Sweongyo		nd->nd_repstat = NFSERR_PERM;
1901177595Sweongyo	if (!nd->nd_repstat)
1902177595Sweongyo		nd->nd_repstat = getret;
1903177595Sweongyo	if (nd->nd_repstat) {
1904177595Sweongyo		vput(vp);
1905177595Sweongyo		if (cookies)
1906177595Sweongyo			free((caddr_t)cookies, M_TEMP);
1907177595Sweongyo		free((caddr_t)rbuf, M_TEMP);
1908177595Sweongyo		if (nd->nd_flag & ND_NFSV3)
1909177595Sweongyo			nfsrv_postopattr(nd, getret, &at);
1910177595Sweongyo		goto out;
1911177595Sweongyo	}
1912177595Sweongyo	/*
1913177595Sweongyo	 * If nothing read, return eof
1914177595Sweongyo	 * rpc reply
1915177595Sweongyo	 */
1916177595Sweongyo	if (siz == 0) {
1917177595Sweongyo		vput(vp);
1918177595Sweongyo		if (nd->nd_flag & ND_NFSV3)
1919177595Sweongyo			nfsrv_postopattr(nd, getret, &at);
1920177595Sweongyo		NFSM_BUILD(tl, u_int32_t *, 4 * NFSX_UNSIGNED);
1921177595Sweongyo		txdr_hyper(at.na_filerev, tl);
1922177595Sweongyo		tl += 2;
1923177595Sweongyo		*tl++ = newnfs_false;
1924177595Sweongyo		*tl = newnfs_true;
1925177595Sweongyo		free((caddr_t)cookies, M_TEMP);
1926177595Sweongyo		free((caddr_t)rbuf, M_TEMP);
1927177595Sweongyo		goto out;
1928177595Sweongyo	}
1929177595Sweongyo
1930177595Sweongyo	/*
1931177595Sweongyo	 * Check for degenerate cases of nothing useful read.
1932177595Sweongyo	 * If so go try again
1933177595Sweongyo	 */
1934177595Sweongyo	cpos = rbuf;
1935177595Sweongyo	cend = rbuf + siz;
1936177595Sweongyo	dp = (struct dirent *)cpos;
1937177595Sweongyo	cookiep = cookies;
1938177595Sweongyo
1939177595Sweongyo	/*
1940177595Sweongyo	 * For some reason FreeBSD's ufs_readdir() chooses to back the
1941177595Sweongyo	 * directory offset up to a block boundary, so it is necessary to
1942177595Sweongyo	 * skip over the records that precede the requested offset. This
1943177595Sweongyo	 * requires the assumption that file offset cookies monotonically
1944177595Sweongyo	 * increase.
1945177595Sweongyo	 * Since the offset cookies don't monotonically increase for ZFS,
1946177595Sweongyo	 * this is not done when ZFS is the file system.
1947177595Sweongyo	 */
1948177595Sweongyo	while (cpos < cend && ncookies > 0 &&
1949177595Sweongyo	  (dp->d_fileno == 0 || dp->d_type == DT_WHT ||
1950177595Sweongyo	   (not_zfs != 0 && ((u_quad_t)(*cookiep)) <= toff) ||
1951177595Sweongyo	   ((nd->nd_flag & ND_NFSV4) &&
1952177595Sweongyo	    ((dp->d_namlen == 1 && dp->d_name[0] == '.') ||
1953177595Sweongyo	     (dp->d_namlen==2 && dp->d_name[0]=='.' && dp->d_name[1]=='.'))))) {
1954177595Sweongyo		cpos += dp->d_reclen;
1955177595Sweongyo		dp = (struct dirent *)cpos;
1956177595Sweongyo		cookiep++;
1957177595Sweongyo		ncookies--;
1958177595Sweongyo	}
1959177595Sweongyo	if (cpos >= cend || ncookies == 0) {
1960177595Sweongyo		siz = fullsiz;
1961177595Sweongyo		toff = off;
1962177595Sweongyo		goto again;
1963177595Sweongyo	}
1964177595Sweongyo
1965177595Sweongyo	/*
1966177595Sweongyo	 * Busy the file system so that the mount point won't go away
1967177595Sweongyo	 * and, as such, VFS_VGET() can be used safely.
1968177595Sweongyo	 */
1969177595Sweongyo	mp = vp->v_mount;
1970177595Sweongyo	vfs_ref(mp);
1971177595Sweongyo	NFSVOPUNLOCK(vp, 0);
1972177595Sweongyo	nd->nd_repstat = vfs_busy(mp, 0);
1973177595Sweongyo	vfs_rel(mp);
1974177595Sweongyo	if (nd->nd_repstat != 0) {
1975177595Sweongyo		vrele(vp);
1976177595Sweongyo		free(cookies, M_TEMP);
1977177595Sweongyo		free(rbuf, M_TEMP);
1978177595Sweongyo		if (nd->nd_flag & ND_NFSV3)
1979177595Sweongyo			nfsrv_postopattr(nd, getret, &at);
1980177595Sweongyo		goto out;
1981177595Sweongyo	}
1982177595Sweongyo
1983177595Sweongyo	/*
1984177595Sweongyo	 * Save this position, in case there is an error before one entry
1985177595Sweongyo	 * is created.
1986177595Sweongyo	 */
1987177595Sweongyo	mb0 = nd->nd_mb;
1988177595Sweongyo	bpos0 = nd->nd_bpos;
1989177595Sweongyo
1990177595Sweongyo	/*
1991177595Sweongyo	 * Fill in the first part of the reply.
1992177595Sweongyo	 * dirlen is the reply length in bytes and cannot exceed cnt.
1993177595Sweongyo	 * (Include the two booleans at the end of the reply in dirlen now,
1994177595Sweongyo	 *  so we recognize when we have exceeded cnt.)
1995177595Sweongyo	 */
1996177595Sweongyo	if (nd->nd_flag & ND_NFSV3) {
1997177595Sweongyo		dirlen = NFSX_V3POSTOPATTR + NFSX_VERF + 2 * NFSX_UNSIGNED;
1998177595Sweongyo		nfsrv_postopattr(nd, getret, &at);
1999177595Sweongyo	} else {
2000177595Sweongyo		dirlen = NFSX_VERF + 2 * NFSX_UNSIGNED;
2001177595Sweongyo	}
2002177595Sweongyo	NFSM_BUILD(tl, u_int32_t *, NFSX_VERF);
2003177595Sweongyo	txdr_hyper(at.na_filerev, tl);
2004177595Sweongyo
2005177595Sweongyo	/*
2006177595Sweongyo	 * Save this position, in case there is an empty reply needed.
2007177595Sweongyo	 */
2008177595Sweongyo	mb1 = nd->nd_mb;
2009177595Sweongyo	bpos1 = nd->nd_bpos;
2010177595Sweongyo
2011177595Sweongyo	/* Loop through the records and build reply */
2012177595Sweongyo	entrycnt = 0;
2013177595Sweongyo	while (cpos < cend && ncookies > 0 && dirlen < cnt) {
2014177595Sweongyo		nlen = dp->d_namlen;
2015177595Sweongyo		if (dp->d_fileno != 0 && dp->d_type != DT_WHT &&
2016177595Sweongyo		    nlen <= NFS_MAXNAMLEN &&
2017177595Sweongyo		    ((nd->nd_flag & ND_NFSV3) || nlen > 2 ||
2018177595Sweongyo		     (nlen==2 && (dp->d_name[0]!='.' || dp->d_name[1]!='.'))
2019177595Sweongyo		      || (nlen == 1 && dp->d_name[0] != '.'))) {
2020177595Sweongyo			/*
2021177595Sweongyo			 * Save the current position in the reply, in case
2022177595Sweongyo			 * this entry exceeds cnt.
2023177595Sweongyo			 */
2024177595Sweongyo			mb1 = nd->nd_mb;
2025177595Sweongyo			bpos1 = nd->nd_bpos;
2026177595Sweongyo
2027177595Sweongyo			/*
2028177595Sweongyo			 * For readdir_and_lookup get the vnode using
2029177595Sweongyo			 * the file number.
2030177595Sweongyo			 */
2031177595Sweongyo			nvp = NULL;
2032177595Sweongyo			refp = NULL;
2033177595Sweongyo			r = 0;
2034177595Sweongyo			at_root = 0;
2035177595Sweongyo			needs_unbusy = 0;
2036178354Ssam			new_mp = mp;
2037178354Ssam			mounted_on_fileno = (uint64_t)dp->d_fileno;
2038177595Sweongyo			if ((nd->nd_flag & ND_NFSV3) ||
2039177595Sweongyo			    NFSNONZERO_ATTRBIT(&savbits)) {
2040177595Sweongyo				if (nd->nd_flag & ND_NFSV4)
2041177595Sweongyo					refp = nfsv4root_getreferral(NULL,
2042177595Sweongyo					    vp, dp->d_fileno);
2043177595Sweongyo				if (refp == NULL) {
2044177595Sweongyo					if (usevget)
2045177595Sweongyo						r = VFS_VGET(mp, dp->d_fileno,
2046177595Sweongyo						    LK_SHARED, &nvp);
2047177595Sweongyo					else
2048177595Sweongyo						r = EOPNOTSUPP;
2049177595Sweongyo					if (r == EOPNOTSUPP) {
2050177595Sweongyo						if (usevget) {
2051177595Sweongyo							usevget = 0;
2052177595Sweongyo							cn.cn_nameiop = LOOKUP;
2053177595Sweongyo							cn.cn_lkflags =
2054177595Sweongyo							    LK_SHARED |
2055177595Sweongyo							    LK_RETRY;
2056177595Sweongyo							cn.cn_cred =
2057177595Sweongyo							    nd->nd_cred;
2058177595Sweongyo							cn.cn_thread = p;
2059177595Sweongyo						}
2060177595Sweongyo						cn.cn_nameptr = dp->d_name;
2061177595Sweongyo						cn.cn_namelen = nlen;
2062178354Ssam						cn.cn_flags = ISLASTCN |
2063177595Sweongyo						    NOFOLLOW | LOCKLEAF |
2064177595Sweongyo						    MPSAFE;
2065177595Sweongyo						if (nlen == 2 &&
2066177595Sweongyo						    dp->d_name[0] == '.' &&
2067177595Sweongyo						    dp->d_name[1] == '.')
2068177595Sweongyo							cn.cn_flags |=
2069177595Sweongyo							    ISDOTDOT;
2070177595Sweongyo						if (NFSVOPLOCK(vp, LK_SHARED)
2071177595Sweongyo						    != 0) {
2072177595Sweongyo							nd->nd_repstat = EPERM;
2073177595Sweongyo							break;
2074177595Sweongyo						}
2075177595Sweongyo						if ((vp->v_vflag & VV_ROOT) != 0
2076177595Sweongyo						    && (cn.cn_flags & ISDOTDOT)
2077177595Sweongyo						    != 0) {
2078177595Sweongyo							vref(vp);
2079177595Sweongyo							nvp = vp;
2080177595Sweongyo							r = 0;
2081177595Sweongyo						} else {
2082177595Sweongyo							r = VOP_LOOKUP(vp, &nvp,
2083177595Sweongyo							    &cn);
2084177595Sweongyo							if (vp != nvp)
2085177595Sweongyo								NFSVOPUNLOCK(vp,
2086177595Sweongyo								    0);
2087177595Sweongyo						}
2088177595Sweongyo					}
2089177595Sweongyo
2090177595Sweongyo					/*
2091177595Sweongyo					 * For NFSv4, check to see if nvp is
2092177595Sweongyo					 * a mount point and get the mount
2093177595Sweongyo					 * point vnode, as required.
2094177595Sweongyo					 */
2095177595Sweongyo					if (r == 0 &&
2096177595Sweongyo					    nfsrv_enable_crossmntpt != 0 &&
2097177595Sweongyo					    (nd->nd_flag & ND_NFSV4) != 0 &&
2098177595Sweongyo					    nvp->v_type == VDIR &&
2099177595Sweongyo					    nvp->v_mountedhere != NULL) {
2100177595Sweongyo						new_mp = nvp->v_mountedhere;
2101177595Sweongyo						r = vfs_busy(new_mp, 0);
2102177595Sweongyo						vput(nvp);
2103177595Sweongyo						nvp = NULL;
2104177595Sweongyo						if (r == 0) {
2105177595Sweongyo							r = VFS_ROOT(new_mp,
2106177595Sweongyo							    LK_SHARED, &nvp);
2107177595Sweongyo							needs_unbusy = 1;
2108177595Sweongyo							if (r == 0)
2109177595Sweongyo								at_root = 1;
2110201758Smbr						}
2111177595Sweongyo					}
2112177595Sweongyo				}
2113177595Sweongyo				if (!r) {
2114177595Sweongyo				    if (refp == NULL &&
2115178354Ssam					((nd->nd_flag & ND_NFSV3) ||
2116177595Sweongyo					 NFSNONZERO_ATTRBIT(&attrbits))) {
2117177595Sweongyo					r = nfsvno_getfh(nvp, &nfh, p);
2118177595Sweongyo					if (!r)
2119177595Sweongyo					    r = nfsvno_getattr(nvp, nvap,
2120177595Sweongyo						nd->nd_cred, p, 1);
2121177595Sweongyo				    }
2122177595Sweongyo				} else {
2123177595Sweongyo				    nvp = NULL;
2124177595Sweongyo				}
2125177595Sweongyo				if (r) {
2126177595Sweongyo					if (!NFSISSET_ATTRBIT(&attrbits,
2127177595Sweongyo					    NFSATTRBIT_RDATTRERROR)) {
2128177595Sweongyo						if (nvp != NULL)
2129177595Sweongyo							vput(nvp);
2130177595Sweongyo						if (needs_unbusy != 0)
2131177595Sweongyo							vfs_unbusy(new_mp);
2132177595Sweongyo						nd->nd_repstat = r;
2133177595Sweongyo						break;
2134177595Sweongyo					}
2135177595Sweongyo				}
2136177595Sweongyo			}
2137177595Sweongyo
2138177595Sweongyo			/*
2139177595Sweongyo			 * Build the directory record xdr
2140177595Sweongyo			 */
2141177595Sweongyo			if (nd->nd_flag & ND_NFSV3) {
2142177595Sweongyo				NFSM_BUILD(tl, u_int32_t *, 3 * NFSX_UNSIGNED);
2143177595Sweongyo				*tl++ = newnfs_true;
2144177595Sweongyo				*tl++ = 0;
2145177595Sweongyo				*tl = txdr_unsigned(dp->d_fileno);
2146177595Sweongyo				dirlen += nfsm_strtom(nd, dp->d_name, nlen);
2147177595Sweongyo				NFSM_BUILD(tl, u_int32_t *, 2 * NFSX_UNSIGNED);
2148177595Sweongyo				*tl++ = 0;
2149177595Sweongyo				*tl = txdr_unsigned(*cookiep);
2150177595Sweongyo				nfsrv_postopattr(nd, 0, nvap);
2151177595Sweongyo				dirlen += nfsm_fhtom(nd,(u_int8_t *)&nfh,0,1);
2152177595Sweongyo				dirlen += (5*NFSX_UNSIGNED+NFSX_V3POSTOPATTR);
2153177595Sweongyo				if (nvp != NULL)
2154177595Sweongyo					vput(nvp);
2155177595Sweongyo			} else {
2156177595Sweongyo				NFSM_BUILD(tl, u_int32_t *, 3 * NFSX_UNSIGNED);
2157177595Sweongyo				*tl++ = newnfs_true;
2158177595Sweongyo				*tl++ = 0;
2159177595Sweongyo				*tl = txdr_unsigned(*cookiep);
2160177595Sweongyo				dirlen += nfsm_strtom(nd, dp->d_name, nlen);
2161177595Sweongyo				if (nvp != NULL) {
2162192468Ssam					supports_nfsv4acls =
2163177595Sweongyo					    nfs_supportsnfsv4acls(nvp);
2164177595Sweongyo					NFSVOPUNLOCK(nvp, 0);
2165177595Sweongyo				} else
2166177595Sweongyo					supports_nfsv4acls = 0;
2167177595Sweongyo				if (refp != NULL) {
2168177595Sweongyo					dirlen += nfsrv_putreferralattr(nd,
2169177595Sweongyo					    &savbits, refp, 0,
2170177595Sweongyo					    &nd->nd_repstat);
2171177595Sweongyo					if (nd->nd_repstat) {
2172177595Sweongyo						if (nvp != NULL)
2173177595Sweongyo							vrele(nvp);
2174177595Sweongyo						if (needs_unbusy != 0)
2175177595Sweongyo							vfs_unbusy(new_mp);
2176177595Sweongyo						break;
2177177595Sweongyo					}
2178178354Ssam				} else if (r) {
2179178354Ssam					dirlen += nfsvno_fillattr(nd, new_mp,
2180192468Ssam					    nvp, nvap, &nfh, r, &rderrbits,
2181178354Ssam					    nd->nd_cred, p, isdgram, 0,
2182178354Ssam					    supports_nfsv4acls, at_root,
2183192468Ssam					    mounted_on_fileno);
2184177595Sweongyo				} else {
2185177595Sweongyo					dirlen += nfsvno_fillattr(nd, new_mp,
2186177595Sweongyo					    nvp, nvap, &nfh, r, &attrbits,
2187177595Sweongyo					    nd->nd_cred, p, isdgram, 0,
2188177595Sweongyo					    supports_nfsv4acls, at_root,
2189177595Sweongyo					    mounted_on_fileno);
2190177595Sweongyo				}
2191177595Sweongyo				if (nvp != NULL)
2192177595Sweongyo					vrele(nvp);
2193177595Sweongyo				dirlen += (3 * NFSX_UNSIGNED);
2194177595Sweongyo			}
2195177595Sweongyo			if (needs_unbusy != 0)
2196177595Sweongyo				vfs_unbusy(new_mp);
2197177595Sweongyo			if (dirlen <= cnt)
2198177595Sweongyo				entrycnt++;
2199177595Sweongyo		}
2200177595Sweongyo		cpos += dp->d_reclen;
2201177595Sweongyo		dp = (struct dirent *)cpos;
2202177595Sweongyo		cookiep++;
2203177595Sweongyo		ncookies--;
2204177595Sweongyo	}
2205177595Sweongyo	vrele(vp);
2206177595Sweongyo	vfs_unbusy(mp);
2207177595Sweongyo
2208177595Sweongyo	/*
2209177595Sweongyo	 * If dirlen > cnt, we must strip off the last entry. If that
2210177595Sweongyo	 * results in an empty reply, report NFSERR_TOOSMALL.
2211177595Sweongyo	 */
2212177595Sweongyo	if (dirlen > cnt || nd->nd_repstat) {
2213177595Sweongyo		if (!nd->nd_repstat && entrycnt == 0)
2214177595Sweongyo			nd->nd_repstat = NFSERR_TOOSMALL;
2215177595Sweongyo		if (nd->nd_repstat)
2216177595Sweongyo			newnfs_trimtrailing(nd, mb0, bpos0);
2217177595Sweongyo		else
2218177595Sweongyo			newnfs_trimtrailing(nd, mb1, bpos1);
2219177595Sweongyo		eofflag = 0;
2220177595Sweongyo	} else if (cpos < cend)
2221177595Sweongyo		eofflag = 0;
2222177595Sweongyo	if (!nd->nd_repstat) {
2223177595Sweongyo		NFSM_BUILD(tl, u_int32_t *, 2 * NFSX_UNSIGNED);
2224177595Sweongyo		*tl++ = newnfs_false;
2225178354Ssam		if (eofflag)
2226177595Sweongyo			*tl = newnfs_true;
2227177595Sweongyo		else
2228177595Sweongyo			*tl = newnfs_false;
2229177595Sweongyo	}
2230177595Sweongyo	FREE((caddr_t)cookies, M_TEMP);
2231177595Sweongyo	FREE((caddr_t)rbuf, M_TEMP);
2232177595Sweongyo
2233177595Sweongyoout:
2234177595Sweongyo	NFSEXITCODE2(0, nd);
2235177595Sweongyo	return (0);
2236177595Sweongyonfsmout:
2237177595Sweongyo	vput(vp);
2238177595Sweongyo	NFSEXITCODE2(error, nd);
2239177595Sweongyo	return (error);
2240177595Sweongyo}
2241177595Sweongyo
2242177595Sweongyo/*
2243177595Sweongyo * Get the settable attributes out of the mbuf list.
2244177595Sweongyo * (Return 0 or EBADRPC)
2245177595Sweongyo */
2246177595Sweongyoint
2247177595Sweongyonfsrv_sattr(struct nfsrv_descript *nd, struct nfsvattr *nvap,
2248177595Sweongyo    nfsattrbit_t *attrbitp, NFSACL_T *aclp, struct thread *p)
2249177595Sweongyo{
2250177595Sweongyo	u_int32_t *tl;
2251178354Ssam	struct nfsv2_sattr *sp;
2252199559Sjhb	struct timeval curtime;
2253177595Sweongyo	int error = 0, toclient = 0;
2254177595Sweongyo
2255177595Sweongyo	switch (nd->nd_flag & (ND_NFSV2 | ND_NFSV3 | ND_NFSV4)) {
2256177595Sweongyo	case ND_NFSV2:
2257177595Sweongyo		NFSM_DISSECT(sp, struct nfsv2_sattr *, NFSX_V2SATTR);
2258177595Sweongyo		/*
2259177595Sweongyo		 * Some old clients didn't fill in the high order 16bits.
2260177595Sweongyo		 * --> check the low order 2 bytes for 0xffff
2261177595Sweongyo		 */
2262177595Sweongyo		if ((fxdr_unsigned(int, sp->sa_mode) & 0xffff) != 0xffff)
2263177595Sweongyo			nvap->na_mode = nfstov_mode(sp->sa_mode);
2264177595Sweongyo		if (sp->sa_uid != newnfs_xdrneg1)
2265177595Sweongyo			nvap->na_uid = fxdr_unsigned(uid_t, sp->sa_uid);
2266177595Sweongyo		if (sp->sa_gid != newnfs_xdrneg1)
2267177595Sweongyo			nvap->na_gid = fxdr_unsigned(gid_t, sp->sa_gid);
2268177595Sweongyo		if (sp->sa_size != newnfs_xdrneg1)
2269177595Sweongyo			nvap->na_size = fxdr_unsigned(u_quad_t, sp->sa_size);
2270177595Sweongyo		if (sp->sa_atime.nfsv2_sec != newnfs_xdrneg1) {
2271177595Sweongyo#ifdef notyet
2272177595Sweongyo			fxdr_nfsv2time(&sp->sa_atime, &nvap->na_atime);
2273177595Sweongyo#else
2274177595Sweongyo			nvap->na_atime.tv_sec =
2275177595Sweongyo				fxdr_unsigned(u_int32_t,sp->sa_atime.nfsv2_sec);
2276177595Sweongyo			nvap->na_atime.tv_nsec = 0;
2277177595Sweongyo#endif
2278177595Sweongyo		}
2279177595Sweongyo		if (sp->sa_mtime.nfsv2_sec != newnfs_xdrneg1)
2280177595Sweongyo			fxdr_nfsv2time(&sp->sa_mtime, &nvap->na_mtime);
2281177595Sweongyo		break;
2282177595Sweongyo	case ND_NFSV3:
2283177595Sweongyo		NFSM_DISSECT(tl, u_int32_t *, NFSX_UNSIGNED);
2284177595Sweongyo		if (*tl == newnfs_true) {
2285177595Sweongyo			NFSM_DISSECT(tl, u_int32_t *, NFSX_UNSIGNED);
2286177595Sweongyo			nvap->na_mode = nfstov_mode(*tl);
2287177595Sweongyo		}
2288178354Ssam		NFSM_DISSECT(tl, u_int32_t *, NFSX_UNSIGNED);
2289177595Sweongyo		if (*tl == newnfs_true) {
2290177595Sweongyo			NFSM_DISSECT(tl, u_int32_t *, NFSX_UNSIGNED);
2291			nvap->na_uid = fxdr_unsigned(uid_t, *tl);
2292		}
2293		NFSM_DISSECT(tl, u_int32_t *, NFSX_UNSIGNED);
2294		if (*tl == newnfs_true) {
2295			NFSM_DISSECT(tl, u_int32_t *, NFSX_UNSIGNED);
2296			nvap->na_gid = fxdr_unsigned(gid_t, *tl);
2297		}
2298		NFSM_DISSECT(tl, u_int32_t *, NFSX_UNSIGNED);
2299		if (*tl == newnfs_true) {
2300			NFSM_DISSECT(tl, u_int32_t *, 2 * NFSX_UNSIGNED);
2301			nvap->na_size = fxdr_hyper(tl);
2302		}
2303		NFSM_DISSECT(tl, u_int32_t *, NFSX_UNSIGNED);
2304		switch (fxdr_unsigned(int, *tl)) {
2305		case NFSV3SATTRTIME_TOCLIENT:
2306			NFSM_DISSECT(tl, u_int32_t *, 2 * NFSX_UNSIGNED);
2307			fxdr_nfsv3time(tl, &nvap->na_atime);
2308			toclient = 1;
2309			break;
2310		case NFSV3SATTRTIME_TOSERVER:
2311			NFSGETTIME(&curtime);
2312			nvap->na_atime.tv_sec = curtime.tv_sec;
2313			nvap->na_atime.tv_nsec = curtime.tv_usec * 1000;
2314			nvap->na_vaflags |= VA_UTIMES_NULL;
2315			break;
2316		};
2317		NFSM_DISSECT(tl, u_int32_t *, NFSX_UNSIGNED);
2318		switch (fxdr_unsigned(int, *tl)) {
2319		case NFSV3SATTRTIME_TOCLIENT:
2320			NFSM_DISSECT(tl, u_int32_t *, 2 * NFSX_UNSIGNED);
2321			fxdr_nfsv3time(tl, &nvap->na_mtime);
2322			nvap->na_vaflags &= ~VA_UTIMES_NULL;
2323			break;
2324		case NFSV3SATTRTIME_TOSERVER:
2325			NFSGETTIME(&curtime);
2326			nvap->na_mtime.tv_sec = curtime.tv_sec;
2327			nvap->na_mtime.tv_nsec = curtime.tv_usec * 1000;
2328			if (!toclient)
2329				nvap->na_vaflags |= VA_UTIMES_NULL;
2330			break;
2331		};
2332		break;
2333	case ND_NFSV4:
2334		error = nfsv4_sattr(nd, nvap, attrbitp, aclp, p);
2335	};
2336nfsmout:
2337	NFSEXITCODE2(error, nd);
2338	return (error);
2339}
2340
2341/*
2342 * Handle the setable attributes for V4.
2343 * Returns NFSERR_BADXDR if it can't be parsed, 0 otherwise.
2344 */
2345int
2346nfsv4_sattr(struct nfsrv_descript *nd, struct nfsvattr *nvap,
2347    nfsattrbit_t *attrbitp, NFSACL_T *aclp, struct thread *p)
2348{
2349	u_int32_t *tl;
2350	int attrsum = 0;
2351	int i, j;
2352	int error, attrsize, bitpos, aclsize, aceerr, retnotsup = 0;
2353	int toclient = 0;
2354	u_char *cp, namestr[NFSV4_SMALLSTR + 1];
2355	uid_t uid;
2356	gid_t gid;
2357	struct timeval curtime;
2358
2359	error = nfsrv_getattrbits(nd, attrbitp, NULL, &retnotsup);
2360	if (error)
2361		goto nfsmout;
2362	NFSM_DISSECT(tl, u_int32_t *, NFSX_UNSIGNED);
2363	attrsize = fxdr_unsigned(int, *tl);
2364
2365	/*
2366	 * Loop around getting the setable attributes. If an unsupported
2367	 * one is found, set nd_repstat == NFSERR_ATTRNOTSUPP and return.
2368	 */
2369	if (retnotsup) {
2370		nd->nd_repstat = NFSERR_ATTRNOTSUPP;
2371		bitpos = NFSATTRBIT_MAX;
2372	} else {
2373		bitpos = 0;
2374	}
2375	for (; bitpos < NFSATTRBIT_MAX; bitpos++) {
2376	    if (attrsum > attrsize) {
2377		error = NFSERR_BADXDR;
2378		goto nfsmout;
2379	    }
2380	    if (NFSISSET_ATTRBIT(attrbitp, bitpos))
2381		switch (bitpos) {
2382		case NFSATTRBIT_SIZE:
2383			NFSM_DISSECT(tl, u_int32_t *, NFSX_HYPER);
2384			nvap->na_size = fxdr_hyper(tl);
2385			attrsum += NFSX_HYPER;
2386			break;
2387		case NFSATTRBIT_ACL:
2388			error = nfsrv_dissectacl(nd, aclp, &aceerr, &aclsize,
2389			    p);
2390			if (error)
2391				goto nfsmout;
2392			if (aceerr && !nd->nd_repstat)
2393				nd->nd_repstat = aceerr;
2394			attrsum += aclsize;
2395			break;
2396		case NFSATTRBIT_ARCHIVE:
2397			NFSM_DISSECT(tl, u_int32_t *, NFSX_UNSIGNED);
2398			if (!nd->nd_repstat)
2399				nd->nd_repstat = NFSERR_ATTRNOTSUPP;
2400			attrsum += NFSX_UNSIGNED;
2401			break;
2402		case NFSATTRBIT_HIDDEN:
2403			NFSM_DISSECT(tl, u_int32_t *, NFSX_UNSIGNED);
2404			if (!nd->nd_repstat)
2405				nd->nd_repstat = NFSERR_ATTRNOTSUPP;
2406			attrsum += NFSX_UNSIGNED;
2407			break;
2408		case NFSATTRBIT_MIMETYPE:
2409			NFSM_DISSECT(tl, u_int32_t *, NFSX_UNSIGNED);
2410			i = fxdr_unsigned(int, *tl);
2411			error = nfsm_advance(nd, NFSM_RNDUP(i), -1);
2412			if (error)
2413				goto nfsmout;
2414			if (!nd->nd_repstat)
2415				nd->nd_repstat = NFSERR_ATTRNOTSUPP;
2416			attrsum += (NFSX_UNSIGNED + NFSM_RNDUP(i));
2417			break;
2418		case NFSATTRBIT_MODE:
2419			NFSM_DISSECT(tl, u_int32_t *, NFSX_UNSIGNED);
2420			nvap->na_mode = nfstov_mode(*tl);
2421			attrsum += NFSX_UNSIGNED;
2422			break;
2423		case NFSATTRBIT_OWNER:
2424			NFSM_DISSECT(tl, u_int32_t *, NFSX_UNSIGNED);
2425			j = fxdr_unsigned(int, *tl);
2426			if (j < 0) {
2427				error = NFSERR_BADXDR;
2428				goto nfsmout;
2429			}
2430			if (j > NFSV4_SMALLSTR)
2431				cp = malloc(j + 1, M_NFSSTRING, M_WAITOK);
2432			else
2433				cp = namestr;
2434			error = nfsrv_mtostr(nd, cp, j);
2435			if (error) {
2436				if (j > NFSV4_SMALLSTR)
2437					free(cp, M_NFSSTRING);
2438				goto nfsmout;
2439			}
2440			if (!nd->nd_repstat) {
2441				nd->nd_repstat = nfsv4_strtouid(nd, cp, j, &uid,
2442				    p);
2443				if (!nd->nd_repstat)
2444					nvap->na_uid = uid;
2445			}
2446			if (j > NFSV4_SMALLSTR)
2447				free(cp, M_NFSSTRING);
2448			attrsum += (NFSX_UNSIGNED + NFSM_RNDUP(j));
2449			break;
2450		case NFSATTRBIT_OWNERGROUP:
2451			NFSM_DISSECT(tl, u_int32_t *, NFSX_UNSIGNED);
2452			j = fxdr_unsigned(int, *tl);
2453			if (j < 0) {
2454				error = NFSERR_BADXDR;
2455				goto nfsmout;
2456			}
2457			if (j > NFSV4_SMALLSTR)
2458				cp = malloc(j + 1, M_NFSSTRING, M_WAITOK);
2459			else
2460				cp = namestr;
2461			error = nfsrv_mtostr(nd, cp, j);
2462			if (error) {
2463				if (j > NFSV4_SMALLSTR)
2464					free(cp, M_NFSSTRING);
2465				goto nfsmout;
2466			}
2467			if (!nd->nd_repstat) {
2468				nd->nd_repstat = nfsv4_strtogid(nd, cp, j, &gid,
2469				    p);
2470				if (!nd->nd_repstat)
2471					nvap->na_gid = gid;
2472			}
2473			if (j > NFSV4_SMALLSTR)
2474				free(cp, M_NFSSTRING);
2475			attrsum += (NFSX_UNSIGNED + NFSM_RNDUP(j));
2476			break;
2477		case NFSATTRBIT_SYSTEM:
2478			NFSM_DISSECT(tl, u_int32_t *, NFSX_UNSIGNED);
2479			if (!nd->nd_repstat)
2480				nd->nd_repstat = NFSERR_ATTRNOTSUPP;
2481			attrsum += NFSX_UNSIGNED;
2482			break;
2483		case NFSATTRBIT_TIMEACCESSSET:
2484			NFSM_DISSECT(tl, u_int32_t *, NFSX_UNSIGNED);
2485			attrsum += NFSX_UNSIGNED;
2486			if (fxdr_unsigned(int, *tl)==NFSV4SATTRTIME_TOCLIENT) {
2487			    NFSM_DISSECT(tl, u_int32_t *, NFSX_V4TIME);
2488			    fxdr_nfsv4time(tl, &nvap->na_atime);
2489			    toclient = 1;
2490			    attrsum += NFSX_V4TIME;
2491			} else {
2492			    NFSGETTIME(&curtime);
2493			    nvap->na_atime.tv_sec = curtime.tv_sec;
2494			    nvap->na_atime.tv_nsec = curtime.tv_usec * 1000;
2495			    nvap->na_vaflags |= VA_UTIMES_NULL;
2496			}
2497			break;
2498		case NFSATTRBIT_TIMEBACKUP:
2499			NFSM_DISSECT(tl, u_int32_t *, NFSX_V4TIME);
2500			if (!nd->nd_repstat)
2501				nd->nd_repstat = NFSERR_ATTRNOTSUPP;
2502			attrsum += NFSX_V4TIME;
2503			break;
2504		case NFSATTRBIT_TIMECREATE:
2505			NFSM_DISSECT(tl, u_int32_t *, NFSX_V4TIME);
2506			if (!nd->nd_repstat)
2507				nd->nd_repstat = NFSERR_ATTRNOTSUPP;
2508			attrsum += NFSX_V4TIME;
2509			break;
2510		case NFSATTRBIT_TIMEMODIFYSET:
2511			NFSM_DISSECT(tl, u_int32_t *, NFSX_UNSIGNED);
2512			attrsum += NFSX_UNSIGNED;
2513			if (fxdr_unsigned(int, *tl)==NFSV4SATTRTIME_TOCLIENT) {
2514			    NFSM_DISSECT(tl, u_int32_t *, NFSX_V4TIME);
2515			    fxdr_nfsv4time(tl, &nvap->na_mtime);
2516			    nvap->na_vaflags &= ~VA_UTIMES_NULL;
2517			    attrsum += NFSX_V4TIME;
2518			} else {
2519			    NFSGETTIME(&curtime);
2520			    nvap->na_mtime.tv_sec = curtime.tv_sec;
2521			    nvap->na_mtime.tv_nsec = curtime.tv_usec * 1000;
2522			    if (!toclient)
2523				nvap->na_vaflags |= VA_UTIMES_NULL;
2524			}
2525			break;
2526		default:
2527			nd->nd_repstat = NFSERR_ATTRNOTSUPP;
2528			/*
2529			 * set bitpos so we drop out of the loop.
2530			 */
2531			bitpos = NFSATTRBIT_MAX;
2532			break;
2533		};
2534	}
2535
2536	/*
2537	 * some clients pad the attrlist, so we need to skip over the
2538	 * padding.
2539	 */
2540	if (attrsum > attrsize) {
2541		error = NFSERR_BADXDR;
2542	} else {
2543		attrsize = NFSM_RNDUP(attrsize);
2544		if (attrsum < attrsize)
2545			error = nfsm_advance(nd, attrsize - attrsum, -1);
2546	}
2547nfsmout:
2548	NFSEXITCODE2(error, nd);
2549	return (error);
2550}
2551
2552/*
2553 * Check/setup export credentials.
2554 */
2555int
2556nfsd_excred(struct nfsrv_descript *nd, struct nfsexstuff *exp,
2557    struct ucred *credanon)
2558{
2559	int error = 0;
2560
2561	/*
2562	 * Check/setup credentials.
2563	 */
2564	if (nd->nd_flag & ND_GSS)
2565		exp->nes_exflag &= ~MNT_EXPORTANON;
2566
2567	/*
2568	 * Check to see if the operation is allowed for this security flavor.
2569	 * RFC2623 suggests that the NFSv3 Fsinfo RPC be allowed to
2570	 * AUTH_NONE or AUTH_SYS for file systems requiring RPCSEC_GSS.
2571	 * Also, allow Secinfo, so that it can acquire the correct flavor(s).
2572	 */
2573	if (nfsvno_testexp(nd, exp) &&
2574	    nd->nd_procnum != NFSV4OP_SECINFO &&
2575	    nd->nd_procnum != NFSPROC_FSINFO) {
2576		if (nd->nd_flag & ND_NFSV4)
2577			error = NFSERR_WRONGSEC;
2578		else
2579			error = (NFSERR_AUTHERR | AUTH_TOOWEAK);
2580		goto out;
2581	}
2582
2583	/*
2584	 * Check to see if the file system is exported V4 only.
2585	 */
2586	if (NFSVNO_EXV4ONLY(exp) && !(nd->nd_flag & ND_NFSV4)) {
2587		error = NFSERR_PROGNOTV4;
2588		goto out;
2589	}
2590
2591	/*
2592	 * Now, map the user credentials.
2593	 * (Note that ND_AUTHNONE will only be set for an NFSv3
2594	 *  Fsinfo RPC. If set for anything else, this code might need
2595	 *  to change.)
2596	 */
2597	if (NFSVNO_EXPORTED(exp) &&
2598	    ((!(nd->nd_flag & ND_GSS) && nd->nd_cred->cr_uid == 0) ||
2599	     NFSVNO_EXPORTANON(exp) ||
2600	     (nd->nd_flag & ND_AUTHNONE))) {
2601		nd->nd_cred->cr_uid = credanon->cr_uid;
2602		nd->nd_cred->cr_gid = credanon->cr_gid;
2603		crsetgroups(nd->nd_cred, credanon->cr_ngroups,
2604		    credanon->cr_groups);
2605	}
2606
2607out:
2608	NFSEXITCODE2(error, nd);
2609	return (error);
2610}
2611
2612/*
2613 * Check exports.
2614 */
2615int
2616nfsvno_checkexp(struct mount *mp, struct sockaddr *nam, struct nfsexstuff *exp,
2617    struct ucred **credp)
2618{
2619	int i, error, *secflavors;
2620
2621	error = VFS_CHECKEXP(mp, nam, &exp->nes_exflag, credp,
2622	    &exp->nes_numsecflavor, &secflavors);
2623	if (error) {
2624		if (nfs_rootfhset) {
2625			exp->nes_exflag = 0;
2626			exp->nes_numsecflavor = 0;
2627			error = 0;
2628		}
2629	} else {
2630		/* Copy the security flavors. */
2631		for (i = 0; i < exp->nes_numsecflavor; i++)
2632			exp->nes_secflavors[i] = secflavors[i];
2633	}
2634	NFSEXITCODE(error);
2635	return (error);
2636}
2637
2638/*
2639 * Get a vnode for a file handle and export stuff.
2640 */
2641int
2642nfsvno_fhtovp(struct mount *mp, fhandle_t *fhp, struct sockaddr *nam,
2643    int lktype, struct vnode **vpp, struct nfsexstuff *exp,
2644    struct ucred **credp)
2645{
2646	int i, error, *secflavors;
2647
2648	*credp = NULL;
2649	exp->nes_numsecflavor = 0;
2650	if (VFS_NEEDSGIANT(mp))
2651		error = ESTALE;
2652	else
2653		error = VFS_FHTOVP(mp, &fhp->fh_fid, LK_EXCLUSIVE, vpp);
2654	if (error != 0)
2655		/* Make sure the server replies ESTALE to the client. */
2656		error = ESTALE;
2657	if (nam && !error) {
2658		error = VFS_CHECKEXP(mp, nam, &exp->nes_exflag, credp,
2659		    &exp->nes_numsecflavor, &secflavors);
2660		if (error) {
2661			if (nfs_rootfhset) {
2662				exp->nes_exflag = 0;
2663				exp->nes_numsecflavor = 0;
2664				error = 0;
2665			} else {
2666				vput(*vpp);
2667			}
2668		} else {
2669			/* Copy the security flavors. */
2670			for (i = 0; i < exp->nes_numsecflavor; i++)
2671				exp->nes_secflavors[i] = secflavors[i];
2672		}
2673	}
2674	if (error == 0 && lktype == LK_SHARED)
2675		/*
2676		 * It would be much better to pass lktype to VFS_FHTOVP(),
2677		 * but this will have to do until VFS_FHTOVP() has a lock
2678		 * type argument like VFS_VGET().
2679		 */
2680		NFSVOPLOCK(*vpp, LK_DOWNGRADE | LK_RETRY);
2681
2682	NFSEXITCODE(error);
2683	return (error);
2684}
2685
2686/*
2687 * nfsd_fhtovp() - convert a fh to a vnode ptr
2688 * 	- look up fsid in mount list (if not found ret error)
2689 *	- get vp and export rights by calling nfsvno_fhtovp()
2690 *	- if cred->cr_uid == 0 or MNT_EXPORTANON set it to credanon
2691 *	  for AUTH_SYS
2692 *	- if mpp != NULL, return the mount point so that it can
2693 *	  be used for vn_finished_write() by the caller
2694 */
2695void
2696nfsd_fhtovp(struct nfsrv_descript *nd, struct nfsrvfh *nfp, int lktype,
2697    struct vnode **vpp, struct nfsexstuff *exp,
2698    struct mount **mpp, int startwrite, struct thread *p)
2699{
2700	struct mount *mp;
2701	struct ucred *credanon;
2702	fhandle_t *fhp;
2703
2704	fhp = (fhandle_t *)nfp->nfsrvfh_data;
2705	/*
2706	 * Check for the special case of the nfsv4root_fh.
2707	 */
2708	mp = vfs_busyfs(&fhp->fh_fsid);
2709	if (mpp != NULL)
2710		*mpp = mp;
2711	if (mp == NULL) {
2712		*vpp = NULL;
2713		nd->nd_repstat = ESTALE;
2714		goto out;
2715	}
2716
2717	if (startwrite)
2718		vn_start_write(NULL, mpp, V_WAIT);
2719
2720	nd->nd_repstat = nfsvno_fhtovp(mp, fhp, nd->nd_nam, lktype, vpp, exp,
2721	    &credanon);
2722	vfs_unbusy(mp);
2723
2724	/*
2725	 * For NFSv4 without a pseudo root fs, unexported file handles
2726	 * can be returned, so that Lookup works everywhere.
2727	 */
2728	if (!nd->nd_repstat && exp->nes_exflag == 0 &&
2729	    !(nd->nd_flag & ND_NFSV4)) {
2730		vput(*vpp);
2731		nd->nd_repstat = EACCES;
2732	}
2733
2734	/*
2735	 * Personally, I've never seen any point in requiring a
2736	 * reserved port#, since only in the rare case where the
2737	 * clients are all boxes with secure system priviledges,
2738	 * does it provide any enhanced security, but... some people
2739	 * believe it to be useful and keep putting this code back in.
2740	 * (There is also some "security checker" out there that
2741	 *  complains if the nfs server doesn't enforce this.)
2742	 * However, note the following:
2743	 * RFC3530 (NFSv4) specifies that a reserved port# not be
2744	 *	required.
2745	 * RFC2623 recommends that, if a reserved port# is checked for,
2746	 *	that there be a way to turn that off--> ifdef'd.
2747	 */
2748#ifdef NFS_REQRSVPORT
2749	if (!nd->nd_repstat) {
2750		struct sockaddr_in *saddr;
2751		struct sockaddr_in6 *saddr6;
2752
2753		saddr = NFSSOCKADDR(nd->nd_nam, struct sockaddr_in *);
2754		saddr6 = NFSSOCKADDR(nd->nd_nam, struct sockaddr_in6 *);
2755		if (!(nd->nd_flag & ND_NFSV4) &&
2756		    ((saddr->sin_family == AF_INET &&
2757		      ntohs(saddr->sin_port) >= IPPORT_RESERVED) ||
2758		     (saddr6->sin6_family == AF_INET6 &&
2759		      ntohs(saddr6->sin6_port) >= IPPORT_RESERVED))) {
2760			vput(*vpp);
2761			nd->nd_repstat = (NFSERR_AUTHERR | AUTH_TOOWEAK);
2762		}
2763	}
2764#endif	/* NFS_REQRSVPORT */
2765
2766	/*
2767	 * Check/setup credentials.
2768	 */
2769	if (!nd->nd_repstat) {
2770		nd->nd_saveduid = nd->nd_cred->cr_uid;
2771		nd->nd_repstat = nfsd_excred(nd, exp, credanon);
2772		if (nd->nd_repstat)
2773			vput(*vpp);
2774	}
2775	if (credanon != NULL)
2776		crfree(credanon);
2777	if (nd->nd_repstat) {
2778		if (startwrite)
2779			vn_finished_write(mp);
2780		*vpp = NULL;
2781		if (mpp != NULL)
2782			*mpp = NULL;
2783	}
2784
2785out:
2786	NFSEXITCODE2(0, nd);
2787}
2788
2789/*
2790 * glue for fp.
2791 */
2792int
2793fp_getfvp(struct thread *p, int fd, struct file **fpp, struct vnode **vpp)
2794{
2795	struct filedesc *fdp;
2796	struct file *fp;
2797	int error = 0;
2798
2799	fdp = p->td_proc->p_fd;
2800	if (fd >= fdp->fd_nfiles ||
2801	    (fp = fdp->fd_ofiles[fd]) == NULL) {
2802		error = EBADF;
2803		goto out;
2804	}
2805	*fpp = fp;
2806
2807out:
2808	NFSEXITCODE(error);
2809	return (error);
2810}
2811
2812/*
2813 * Called from nfssvc() to update the exports list. Just call
2814 * vfs_export(). This has to be done, since the v4 root fake fs isn't
2815 * in the mount list.
2816 */
2817int
2818nfsrv_v4rootexport(void *argp, struct ucred *cred, struct thread *p)
2819{
2820	struct nfsex_args *nfsexargp = (struct nfsex_args *)argp;
2821	int error = 0;
2822	struct nameidata nd;
2823	fhandle_t fh;
2824
2825	error = vfs_export(&nfsv4root_mnt, &nfsexargp->export);
2826	if ((nfsexargp->export.ex_flags & MNT_DELEXPORT) != 0)
2827		nfs_rootfhset = 0;
2828	else if (error == 0) {
2829		if (nfsexargp->fspec == NULL) {
2830			error = EPERM;
2831			goto out;
2832		}
2833		/*
2834		 * If fspec != NULL, this is the v4root path.
2835		 */
2836		NDINIT(&nd, LOOKUP, FOLLOW | MPSAFE, UIO_USERSPACE,
2837		    nfsexargp->fspec, p);
2838		if ((error = namei(&nd)) != 0)
2839			goto out;
2840		error = nfsvno_getfh(nd.ni_vp, &fh, p);
2841		vrele(nd.ni_vp);
2842		if (!error) {
2843			nfs_rootfh.nfsrvfh_len = NFSX_MYFH;
2844			NFSBCOPY((caddr_t)&fh,
2845			    nfs_rootfh.nfsrvfh_data,
2846			    sizeof (fhandle_t));
2847			nfs_rootfhset = 1;
2848		}
2849	}
2850
2851out:
2852	NFSEXITCODE(error);
2853	return (error);
2854}
2855
2856/*
2857 * Get the tcp socket sequence numbers we need.
2858 * (Maybe this should be moved to the tcp sources?)
2859 */
2860int
2861nfsrv_getsocksndseq(struct socket *so, tcp_seq *maxp, tcp_seq *unap)
2862{
2863	struct inpcb *inp;
2864	struct tcpcb *tp;
2865	int error = 0;
2866
2867	inp = sotoinpcb(so);
2868	KASSERT(inp != NULL, ("nfsrv_getsocksndseq: inp == NULL"));
2869	INP_RLOCK(inp);
2870	if (inp->inp_flags & (INP_TIMEWAIT | INP_DROPPED)) {
2871		INP_RUNLOCK(inp);
2872		error = EPIPE;
2873		goto out;
2874	}
2875	tp = intotcpcb(inp);
2876	if (tp->t_state != TCPS_ESTABLISHED) {
2877		INP_RUNLOCK(inp);
2878		error = EPIPE;
2879		goto out;
2880	}
2881	*maxp = tp->snd_max;
2882	*unap = tp->snd_una;
2883	INP_RUNLOCK(inp);
2884
2885out:
2886	NFSEXITCODE(error);
2887	return (error);
2888}
2889
2890/*
2891 * This function needs to test to see if the system is near its limit
2892 * for memory allocation via malloc() or mget() and return True iff
2893 * either of these resources are near their limit.
2894 * XXX (For now, this is just a stub.)
2895 */
2896int nfsrv_testmalloclimit = 0;
2897int
2898nfsrv_mallocmget_limit(void)
2899{
2900	static int printmesg = 0;
2901	static int testval = 1;
2902
2903	if (nfsrv_testmalloclimit && (testval++ % 1000) == 0) {
2904		if ((printmesg++ % 100) == 0)
2905			printf("nfsd: malloc/mget near limit\n");
2906		return (1);
2907	}
2908	return (0);
2909}
2910
2911/*
2912 * BSD specific initialization of a mount point.
2913 */
2914void
2915nfsd_mntinit(void)
2916{
2917	static int inited = 0;
2918
2919	if (inited)
2920		return;
2921	inited = 1;
2922	nfsv4root_mnt.mnt_flag = (MNT_RDONLY | MNT_EXPORTED);
2923	TAILQ_INIT(&nfsv4root_mnt.mnt_nvnodelist);
2924	TAILQ_INIT(&nfsv4root_mnt.mnt_activevnodelist);
2925	nfsv4root_mnt.mnt_export = NULL;
2926	TAILQ_INIT(&nfsv4root_opt);
2927	TAILQ_INIT(&nfsv4root_newopt);
2928	nfsv4root_mnt.mnt_opt = &nfsv4root_opt;
2929	nfsv4root_mnt.mnt_optnew = &nfsv4root_newopt;
2930	nfsv4root_mnt.mnt_nvnodelistsize = 0;
2931	nfsv4root_mnt.mnt_activevnodelistsize = 0;
2932}
2933
2934/*
2935 * Get a vnode for a file handle, without checking exports, etc.
2936 */
2937struct vnode *
2938nfsvno_getvp(fhandle_t *fhp)
2939{
2940	struct mount *mp;
2941	struct vnode *vp;
2942	int error;
2943
2944	mp = vfs_busyfs(&fhp->fh_fsid);
2945	if (mp == NULL)
2946		return (NULL);
2947	error = VFS_FHTOVP(mp, &fhp->fh_fid, LK_EXCLUSIVE, &vp);
2948	vfs_unbusy(mp);
2949	if (error)
2950		return (NULL);
2951	return (vp);
2952}
2953
2954/*
2955 * Do a local VOP_ADVLOCK().
2956 */
2957int
2958nfsvno_advlock(struct vnode *vp, int ftype, u_int64_t first,
2959    u_int64_t end, struct thread *td)
2960{
2961	int error = 0;
2962	struct flock fl;
2963	u_int64_t tlen;
2964
2965	if (nfsrv_dolocallocks == 0)
2966		goto out;
2967
2968	/* Check for VI_DOOMED here, so that VOP_ADVLOCK() isn't performed. */
2969	if ((vp->v_iflag & VI_DOOMED) != 0) {
2970		error = EPERM;
2971		goto out;
2972	}
2973
2974	fl.l_whence = SEEK_SET;
2975	fl.l_type = ftype;
2976	fl.l_start = (off_t)first;
2977	if (end == NFS64BITSSET) {
2978		fl.l_len = 0;
2979	} else {
2980		tlen = end - first;
2981		fl.l_len = (off_t)tlen;
2982	}
2983	/*
2984	 * For FreeBSD8, the l_pid and l_sysid must be set to the same
2985	 * values for all calls, so that all locks will be held by the
2986	 * nfsd server. (The nfsd server handles conflicts between the
2987	 * various clients.)
2988	 * Since an NFSv4 lockowner is a ClientID plus an array of up to 1024
2989	 * bytes, so it can't be put in l_sysid.
2990	 */
2991	if (nfsv4_sysid == 0)
2992		nfsv4_sysid = nlm_acquire_next_sysid();
2993	fl.l_pid = (pid_t)0;
2994	fl.l_sysid = (int)nfsv4_sysid;
2995
2996	NFSVOPUNLOCK(vp, 0);
2997	if (ftype == F_UNLCK)
2998		error = VOP_ADVLOCK(vp, (caddr_t)td->td_proc, F_UNLCK, &fl,
2999		    (F_POSIX | F_REMOTE));
3000	else
3001		error = VOP_ADVLOCK(vp, (caddr_t)td->td_proc, F_SETLK, &fl,
3002		    (F_POSIX | F_REMOTE));
3003	NFSVOPLOCK(vp, LK_EXCLUSIVE | LK_RETRY);
3004
3005out:
3006	NFSEXITCODE(error);
3007	return (error);
3008}
3009
3010/*
3011 * Check the nfsv4 root exports.
3012 */
3013int
3014nfsvno_v4rootexport(struct nfsrv_descript *nd)
3015{
3016	struct ucred *credanon;
3017	int exflags, error = 0, numsecflavor, *secflavors, i;
3018
3019	error = vfs_stdcheckexp(&nfsv4root_mnt, nd->nd_nam, &exflags,
3020	    &credanon, &numsecflavor, &secflavors);
3021	if (error) {
3022		error = NFSERR_PROGUNAVAIL;
3023		goto out;
3024	}
3025	if (credanon != NULL)
3026		crfree(credanon);
3027	for (i = 0; i < numsecflavor; i++) {
3028		if (secflavors[i] == AUTH_SYS)
3029			nd->nd_flag |= ND_EXAUTHSYS;
3030		else if (secflavors[i] == RPCSEC_GSS_KRB5)
3031			nd->nd_flag |= ND_EXGSS;
3032		else if (secflavors[i] == RPCSEC_GSS_KRB5I)
3033			nd->nd_flag |= ND_EXGSSINTEGRITY;
3034		else if (secflavors[i] == RPCSEC_GSS_KRB5P)
3035			nd->nd_flag |= ND_EXGSSPRIVACY;
3036	}
3037
3038out:
3039	NFSEXITCODE(error);
3040	return (error);
3041}
3042
3043/*
3044 * Nfs server psuedo system call for the nfsd's
3045 */
3046/*
3047 * MPSAFE
3048 */
3049static int
3050nfssvc_nfsd(struct thread *td, struct nfssvc_args *uap)
3051{
3052	struct file *fp;
3053	struct nfsd_addsock_args sockarg;
3054	struct nfsd_nfsd_args nfsdarg;
3055	int error;
3056
3057	if (uap->flag & NFSSVC_NFSDADDSOCK) {
3058		error = copyin(uap->argp, (caddr_t)&sockarg, sizeof (sockarg));
3059		if (error)
3060			goto out;
3061		/*
3062		 * Since we don't know what rights might be required,
3063		 * pretend that we need them all. It is better to be too
3064		 * careful than too reckless.
3065		 */
3066		if ((error = fget(td, sockarg.sock, CAP_SOCK_ALL, &fp)) != 0)
3067			goto out;
3068		if (fp->f_type != DTYPE_SOCKET) {
3069			fdrop(fp, td);
3070			error = EPERM;
3071			goto out;
3072		}
3073		error = nfsrvd_addsock(fp);
3074		fdrop(fp, td);
3075	} else if (uap->flag & NFSSVC_NFSDNFSD) {
3076		if (uap->argp == NULL) {
3077			error = EINVAL;
3078			goto out;
3079		}
3080		error = copyin(uap->argp, (caddr_t)&nfsdarg,
3081		    sizeof (nfsdarg));
3082		if (error)
3083			goto out;
3084		error = nfsrvd_nfsd(td, &nfsdarg);
3085	} else {
3086		error = nfssvc_srvcall(td, uap, td->td_ucred);
3087	}
3088
3089out:
3090	NFSEXITCODE(error);
3091	return (error);
3092}
3093
3094static int
3095nfssvc_srvcall(struct thread *p, struct nfssvc_args *uap, struct ucred *cred)
3096{
3097	struct nfsex_args export;
3098	struct file *fp = NULL;
3099	int stablefd, len;
3100	struct nfsd_clid adminrevoke;
3101	struct nfsd_dumplist dumplist;
3102	struct nfsd_dumpclients *dumpclients;
3103	struct nfsd_dumplocklist dumplocklist;
3104	struct nfsd_dumplocks *dumplocks;
3105	struct nameidata nd;
3106	vnode_t vp;
3107	int error = EINVAL;
3108	struct proc *procp;
3109
3110	if (uap->flag & NFSSVC_PUBLICFH) {
3111		NFSBZERO((caddr_t)&nfs_pubfh.nfsrvfh_data,
3112		    sizeof (fhandle_t));
3113		error = copyin(uap->argp,
3114		    &nfs_pubfh.nfsrvfh_data, sizeof (fhandle_t));
3115		if (!error)
3116			nfs_pubfhset = 1;
3117	} else if (uap->flag & NFSSVC_V4ROOTEXPORT) {
3118		error = copyin(uap->argp,(caddr_t)&export,
3119		    sizeof (struct nfsex_args));
3120		if (!error)
3121			error = nfsrv_v4rootexport(&export, cred, p);
3122	} else if (uap->flag & NFSSVC_NOPUBLICFH) {
3123		nfs_pubfhset = 0;
3124		error = 0;
3125	} else if (uap->flag & NFSSVC_STABLERESTART) {
3126		error = copyin(uap->argp, (caddr_t)&stablefd,
3127		    sizeof (int));
3128		if (!error)
3129			error = fp_getfvp(p, stablefd, &fp, &vp);
3130		if (!error && (NFSFPFLAG(fp) & (FREAD | FWRITE)) != (FREAD | FWRITE))
3131			error = EBADF;
3132		if (!error && newnfs_numnfsd != 0)
3133			error = EPERM;
3134		if (!error) {
3135			nfsrv_stablefirst.nsf_fp = fp;
3136			nfsrv_setupstable(p);
3137		}
3138	} else if (uap->flag & NFSSVC_ADMINREVOKE) {
3139		error = copyin(uap->argp, (caddr_t)&adminrevoke,
3140		    sizeof (struct nfsd_clid));
3141		if (!error)
3142			error = nfsrv_adminrevoke(&adminrevoke, p);
3143	} else if (uap->flag & NFSSVC_DUMPCLIENTS) {
3144		error = copyin(uap->argp, (caddr_t)&dumplist,
3145		    sizeof (struct nfsd_dumplist));
3146		if (!error && (dumplist.ndl_size < 1 ||
3147			dumplist.ndl_size > NFSRV_MAXDUMPLIST))
3148			error = EPERM;
3149		if (!error) {
3150		    len = sizeof (struct nfsd_dumpclients) * dumplist.ndl_size;
3151		    dumpclients = (struct nfsd_dumpclients *)malloc(len,
3152			M_TEMP, M_WAITOK);
3153		    nfsrv_dumpclients(dumpclients, dumplist.ndl_size);
3154		    error = copyout(dumpclients,
3155			CAST_USER_ADDR_T(dumplist.ndl_list), len);
3156		    free((caddr_t)dumpclients, M_TEMP);
3157		}
3158	} else if (uap->flag & NFSSVC_DUMPLOCKS) {
3159		error = copyin(uap->argp, (caddr_t)&dumplocklist,
3160		    sizeof (struct nfsd_dumplocklist));
3161		if (!error && (dumplocklist.ndllck_size < 1 ||
3162			dumplocklist.ndllck_size > NFSRV_MAXDUMPLIST))
3163			error = EPERM;
3164		if (!error)
3165			error = nfsrv_lookupfilename(&nd,
3166				dumplocklist.ndllck_fname, p);
3167		if (!error) {
3168			len = sizeof (struct nfsd_dumplocks) *
3169				dumplocklist.ndllck_size;
3170			dumplocks = (struct nfsd_dumplocks *)malloc(len,
3171				M_TEMP, M_WAITOK);
3172			nfsrv_dumplocks(nd.ni_vp, dumplocks,
3173			    dumplocklist.ndllck_size, p);
3174			vput(nd.ni_vp);
3175			error = copyout(dumplocks,
3176			    CAST_USER_ADDR_T(dumplocklist.ndllck_list), len);
3177			free((caddr_t)dumplocks, M_TEMP);
3178		}
3179	} else if (uap->flag & NFSSVC_BACKUPSTABLE) {
3180		procp = p->td_proc;
3181		PROC_LOCK(procp);
3182		nfsd_master_pid = procp->p_pid;
3183		bcopy(procp->p_comm, nfsd_master_comm, MAXCOMLEN + 1);
3184		nfsd_master_start = procp->p_stats->p_start;
3185		nfsd_master_proc = procp;
3186		PROC_UNLOCK(procp);
3187	}
3188
3189	NFSEXITCODE(error);
3190	return (error);
3191}
3192
3193/*
3194 * Check exports.
3195 * Returns 0 if ok, 1 otherwise.
3196 */
3197int
3198nfsvno_testexp(struct nfsrv_descript *nd, struct nfsexstuff *exp)
3199{
3200	int i;
3201
3202	/*
3203	 * This seems odd, but allow the case where the security flavor
3204	 * list is empty. This happens when NFSv4 is traversing non-exported
3205	 * file systems. Exported file systems should always have a non-empty
3206	 * security flavor list.
3207	 */
3208	if (exp->nes_numsecflavor == 0)
3209		return (0);
3210
3211	for (i = 0; i < exp->nes_numsecflavor; i++) {
3212		/*
3213		 * The tests for privacy and integrity must be first,
3214		 * since ND_GSS is set for everything but AUTH_SYS.
3215		 */
3216		if (exp->nes_secflavors[i] == RPCSEC_GSS_KRB5P &&
3217		    (nd->nd_flag & ND_GSSPRIVACY))
3218			return (0);
3219		if (exp->nes_secflavors[i] == RPCSEC_GSS_KRB5I &&
3220		    (nd->nd_flag & ND_GSSINTEGRITY))
3221			return (0);
3222		if (exp->nes_secflavors[i] == RPCSEC_GSS_KRB5 &&
3223		    (nd->nd_flag & ND_GSS))
3224			return (0);
3225		if (exp->nes_secflavors[i] == AUTH_SYS &&
3226		    (nd->nd_flag & ND_GSS) == 0)
3227			return (0);
3228	}
3229	return (1);
3230}
3231
3232/*
3233 * Calculate a hash value for the fid in a file handle.
3234 */
3235uint32_t
3236nfsrv_hashfh(fhandle_t *fhp)
3237{
3238	uint32_t hashval;
3239
3240	hashval = hash32_buf(&fhp->fh_fid, sizeof(struct fid), 0);
3241	return (hashval);
3242}
3243
3244/*
3245 * Signal the userland master nfsd to backup the stable restart file.
3246 */
3247void
3248nfsrv_backupstable(void)
3249{
3250	struct proc *procp;
3251
3252	if (nfsd_master_proc != NULL) {
3253		procp = pfind(nfsd_master_pid);
3254		/* Try to make sure it is the correct process. */
3255		if (procp == nfsd_master_proc &&
3256		    procp->p_stats->p_start.tv_sec ==
3257		    nfsd_master_start.tv_sec &&
3258		    procp->p_stats->p_start.tv_usec ==
3259		    nfsd_master_start.tv_usec &&
3260		    strcmp(procp->p_comm, nfsd_master_comm) == 0)
3261			kern_psignal(procp, SIGUSR2);
3262		else
3263			nfsd_master_proc = NULL;
3264
3265		if (procp != NULL)
3266			PROC_UNLOCK(procp);
3267	}
3268}
3269
3270extern int (*nfsd_call_nfsd)(struct thread *, struct nfssvc_args *);
3271
3272/*
3273 * Called once to initialize data structures...
3274 */
3275static int
3276nfsd_modevent(module_t mod, int type, void *data)
3277{
3278	int error = 0;
3279	static int loaded = 0;
3280
3281	switch (type) {
3282	case MOD_LOAD:
3283		if (loaded)
3284			goto out;
3285		newnfs_portinit();
3286		mtx_init(&nfs_cache_mutex, "nfs_cache_mutex", NULL, MTX_DEF);
3287		mtx_init(&nfs_v4root_mutex, "nfs_v4root_mutex", NULL, MTX_DEF);
3288		mtx_init(&nfsv4root_mnt.mnt_mtx, "struct mount mtx", NULL,
3289		    MTX_DEF);
3290		lockinit(&nfsv4root_mnt.mnt_explock, PVFS, "explock", 0, 0);
3291		nfsrvd_initcache();
3292		nfsd_init();
3293		NFSD_LOCK();
3294		nfsrvd_init(0);
3295		NFSD_UNLOCK();
3296		nfsd_mntinit();
3297#ifdef VV_DISABLEDELEG
3298		vn_deleg_ops.vndeleg_recall = nfsd_recalldelegation;
3299		vn_deleg_ops.vndeleg_disable = nfsd_disabledelegation;
3300#endif
3301		nfsd_call_servertimer = nfsrv_servertimer;
3302		nfsd_call_nfsd = nfssvc_nfsd;
3303		loaded = 1;
3304		break;
3305
3306	case MOD_UNLOAD:
3307		if (newnfs_numnfsd != 0) {
3308			error = EBUSY;
3309			break;
3310		}
3311
3312#ifdef VV_DISABLEDELEG
3313		vn_deleg_ops.vndeleg_recall = NULL;
3314		vn_deleg_ops.vndeleg_disable = NULL;
3315#endif
3316		nfsd_call_servertimer = NULL;
3317		nfsd_call_nfsd = NULL;
3318
3319		/* Clean out all NFSv4 state. */
3320		nfsrv_throwawayallstate(curthread);
3321
3322		/* Clean the NFS server reply cache */
3323		nfsrvd_cleancache();
3324
3325		/* Free up the krpc server pool. */
3326		if (nfsrvd_pool != NULL)
3327			svcpool_destroy(nfsrvd_pool);
3328
3329		/* and get rid of the locks */
3330		mtx_destroy(&nfs_cache_mutex);
3331		mtx_destroy(&nfs_v4root_mutex);
3332		mtx_destroy(&nfsv4root_mnt.mnt_mtx);
3333		lockdestroy(&nfsv4root_mnt.mnt_explock);
3334		loaded = 0;
3335		break;
3336	default:
3337		error = EOPNOTSUPP;
3338		break;
3339	}
3340
3341out:
3342	NFSEXITCODE(error);
3343	return (error);
3344}
3345static moduledata_t nfsd_mod = {
3346	"nfsd",
3347	nfsd_modevent,
3348	NULL,
3349};
3350DECLARE_MODULE(nfsd, nfsd_mod, SI_SUB_VFS, SI_ORDER_ANY);
3351
3352/* So that loader and kldload(2) can find us, wherever we are.. */
3353MODULE_VERSION(nfsd, 1);
3354MODULE_DEPEND(nfsd, nfscommon, 1, 1, 1);
3355MODULE_DEPEND(nfsd, nfslock, 1, 1, 1);
3356MODULE_DEPEND(nfsd, nfslockd, 1, 1, 1);
3357MODULE_DEPEND(nfsd, krpc, 1, 1, 1);
3358MODULE_DEPEND(nfsd, nfssvc, 1, 1, 1);
3359
3360