nfs_nfsdport.c revision 217176
1/*-
2 * Copyright (c) 1989, 1993
3 *	The Regents of the University of California.  All rights reserved.
4 *
5 * This code is derived from software contributed to Berkeley by
6 * Rick Macklem at The University of Guelph.
7 *
8 * Redistribution and use in source and binary forms, with or without
9 * modification, are permitted provided that the following conditions
10 * are met:
11 * 1. Redistributions of source code must retain the above copyright
12 *    notice, this list of conditions and the following disclaimer.
13 * 2. Redistributions in binary form must reproduce the above copyright
14 *    notice, this list of conditions and the following disclaimer in the
15 *    documentation and/or other materials provided with the distribution.
16 * 4. Neither the name of the University nor the names of its contributors
17 *    may be used to endorse or promote products derived from this software
18 *    without specific prior written permission.
19 *
20 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
21 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
22 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
23 * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
24 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
25 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
26 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
27 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
28 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
29 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
30 * SUCH DAMAGE.
31 *
32 */
33
34#include <sys/cdefs.h>
35__FBSDID("$FreeBSD: head/sys/fs/nfsserver/nfs_nfsdport.c 217176 2011-01-09 02:10:54Z rmacklem $");
36
37/*
38 * Functions that perform the vfs operations required by the routines in
39 * nfsd_serv.c. It is hoped that this change will make the server more
40 * portable.
41 */
42
43#include <fs/nfs/nfsport.h>
44#include <sys/hash.h>
45#include <sys/sysctl.h>
46#include <nlm/nlm_prot.h>
47#include <nlm/nlm.h>
48
49extern u_int32_t newnfs_true, newnfs_false, newnfs_xdrneg1;
50extern int nfsrv_useacl;
51extern int newnfs_numnfsd;
52extern struct mount nfsv4root_mnt;
53extern struct nfsrv_stablefirst nfsrv_stablefirst;
54extern void (*nfsd_call_servertimer)(void);
55struct vfsoptlist nfsv4root_opt, nfsv4root_newopt;
56NFSDLOCKMUTEX;
57struct mtx nfs_cache_mutex;
58struct mtx nfs_v4root_mutex;
59struct nfsrvfh nfs_rootfh, nfs_pubfh;
60int nfs_pubfhset = 0, nfs_rootfhset = 0;
61static uint32_t nfsv4_sysid = 0;
62
63static int nfssvc_srvcall(struct thread *, struct nfssvc_args *,
64    struct ucred *);
65
66static int enable_crossmntpt = 1;
67static int nfs_commit_blks;
68static int nfs_commit_miss;
69extern int nfsrv_issuedelegs;
70extern int nfsrv_dolocallocks;
71
72SYSCTL_DECL(_vfs_newnfs);
73SYSCTL_INT(_vfs_newnfs, OID_AUTO, mirrormnt, CTLFLAG_RW, &enable_crossmntpt,
74    0, "Enable nfsd to cross mount points");
75SYSCTL_INT(_vfs_newnfs, OID_AUTO, commit_blks, CTLFLAG_RW, &nfs_commit_blks,
76    0, "");
77SYSCTL_INT(_vfs_newnfs, OID_AUTO, commit_miss, CTLFLAG_RW, &nfs_commit_miss,
78    0, "");
79SYSCTL_INT(_vfs_newnfs, OID_AUTO, issue_delegations, CTLFLAG_RW,
80    &nfsrv_issuedelegs, 0, "Enable nfsd to issue delegations");
81SYSCTL_INT(_vfs_newnfs, OID_AUTO, enable_locallocks, CTLFLAG_RW,
82    &nfsrv_dolocallocks, 0, "Enable nfsd to acquire local locks on files");
83
84#define	NUM_HEURISTIC		1017
85#define	NHUSE_INIT		64
86#define	NHUSE_INC		16
87#define	NHUSE_MAX		2048
88
89static struct nfsheur {
90	struct vnode *nh_vp;	/* vp to match (unreferenced pointer) */
91	off_t nh_nextr;		/* next offset for sequential detection */
92	int nh_use;		/* use count for selection */
93	int nh_seqcount;	/* heuristic */
94} nfsheur[NUM_HEURISTIC];
95
96
97/*
98 * Get attributes into nfsvattr structure.
99 */
100int
101nfsvno_getattr(struct vnode *vp, struct nfsvattr *nvap, struct ucred *cred,
102    struct thread *p, int vpislocked)
103{
104	int error, lockedit = 0;
105
106	if (vpislocked == 0) {
107		/*
108		 * When vpislocked == 0, the vnode is either exclusively
109		 * locked by this thread or not locked by this thread.
110		 * As such, shared lock it, if not exclusively locked.
111		 */
112		if (VOP_ISLOCKED(vp) != LK_EXCLUSIVE) {
113			lockedit = 1;
114			vn_lock(vp, LK_SHARED | LK_RETRY);
115		}
116	}
117	error = VOP_GETATTR(vp, &nvap->na_vattr, cred);
118	if (lockedit != 0)
119		VOP_UNLOCK(vp, 0);
120	return (error);
121}
122
123/*
124 * Get a file handle for a vnode.
125 */
126int
127nfsvno_getfh(struct vnode *vp, fhandle_t *fhp, struct thread *p)
128{
129	int error;
130
131	NFSBZERO((caddr_t)fhp, sizeof(fhandle_t));
132	fhp->fh_fsid = vp->v_mount->mnt_stat.f_fsid;
133	error = VOP_VPTOFH(vp, &fhp->fh_fid);
134	return (error);
135}
136
137/*
138 * Perform access checking for vnodes obtained from file handles that would
139 * refer to files already opened by a Unix client. You cannot just use
140 * vn_writechk() and VOP_ACCESSX() for two reasons.
141 * 1 - You must check for exported rdonly as well as MNT_RDONLY for the write
142 *     case.
143 * 2 - The owner is to be given access irrespective of mode bits for some
144 *     operations, so that processes that chmod after opening a file don't
145 *     break.
146 */
147int
148nfsvno_accchk(struct vnode *vp, accmode_t accmode, struct ucred *cred,
149    struct nfsexstuff *exp, struct thread *p, int override, int vpislocked,
150    u_int32_t *supportedtypep)
151{
152	struct vattr vattr;
153	int error = 0, getret = 0;
154
155	if (vpislocked == 0) {
156		if (vn_lock(vp, LK_SHARED) != 0)
157			return (EPERM);
158	}
159	if (accmode & VWRITE) {
160		/* Just vn_writechk() changed to check rdonly */
161		/*
162		 * Disallow write attempts on read-only file systems;
163		 * unless the file is a socket or a block or character
164		 * device resident on the file system.
165		 */
166		if (NFSVNO_EXRDONLY(exp) ||
167		    (vp->v_mount->mnt_flag & MNT_RDONLY)) {
168			switch (vp->v_type) {
169			case VREG:
170			case VDIR:
171			case VLNK:
172				error = EROFS;
173			default:
174				break;
175			}
176		}
177		/*
178		 * If there's shared text associated with
179		 * the inode, try to free it up once.  If
180		 * we fail, we can't allow writing.
181		 */
182		if ((vp->v_vflag & VV_TEXT) != 0 && error == 0)
183			error = ETXTBSY;
184	}
185	if (error != 0) {
186		if (vpislocked == 0)
187			VOP_UNLOCK(vp, 0);
188		return (error);
189	}
190
191	/*
192	 * Should the override still be applied when ACLs are enabled?
193	 */
194	error = VOP_ACCESSX(vp, accmode, cred, p);
195	if (error != 0 && (accmode & (VDELETE | VDELETE_CHILD))) {
196		/*
197		 * Try again with VEXPLICIT_DENY, to see if the test for
198		 * deletion is supported.
199		 */
200		error = VOP_ACCESSX(vp, accmode | VEXPLICIT_DENY, cred, p);
201		if (error == 0) {
202			if (vp->v_type == VDIR) {
203				accmode &= ~(VDELETE | VDELETE_CHILD);
204				accmode |= VWRITE;
205				error = VOP_ACCESSX(vp, accmode, cred, p);
206			} else if (supportedtypep != NULL) {
207				*supportedtypep &= ~NFSACCESS_DELETE;
208			}
209		}
210	}
211
212	/*
213	 * Allow certain operations for the owner (reads and writes
214	 * on files that are already open).
215	 */
216	if (override != NFSACCCHK_NOOVERRIDE &&
217	    (error == EPERM || error == EACCES)) {
218		if (cred->cr_uid == 0 && (override & NFSACCCHK_ALLOWROOT))
219			error = 0;
220		else if (override & NFSACCCHK_ALLOWOWNER) {
221			getret = VOP_GETATTR(vp, &vattr, cred);
222			if (getret == 0 && cred->cr_uid == vattr.va_uid)
223				error = 0;
224		}
225	}
226	if (vpislocked == 0)
227		VOP_UNLOCK(vp, 0);
228	return (error);
229}
230
231/*
232 * Set attribute(s) vnop.
233 */
234int
235nfsvno_setattr(struct vnode *vp, struct nfsvattr *nvap, struct ucred *cred,
236    struct thread *p, struct nfsexstuff *exp)
237{
238	int error;
239
240	error = VOP_SETATTR(vp, &nvap->na_vattr, cred);
241	return (error);
242}
243
244/*
245 * Set up nameidata for a lookup() call and do it
246 * For the cases where we are crossing mount points
247 * (looking up the public fh path or the v4 root path when
248 *  not using a pseudo-root fs), set/release the Giant lock,
249 * as required.
250 */
251int
252nfsvno_namei(struct nfsrv_descript *nd, struct nameidata *ndp,
253    struct vnode *dp, int islocked, struct nfsexstuff *exp, struct thread *p,
254    struct vnode **retdirp)
255{
256	struct componentname *cnp = &ndp->ni_cnd;
257	int i;
258	struct iovec aiov;
259	struct uio auio;
260	int lockleaf = (cnp->cn_flags & LOCKLEAF) != 0, linklen;
261	int error = 0, crossmnt;
262	char *cp;
263
264	*retdirp = NULL;
265	cnp->cn_nameptr = cnp->cn_pnbuf;
266	/*
267	 * Extract and set starting directory.
268	 */
269	if (dp->v_type != VDIR) {
270		if (islocked)
271			vput(dp);
272		else
273			vrele(dp);
274		nfsvno_relpathbuf(ndp);
275		return (ENOTDIR);
276	}
277	if (islocked)
278		NFSVOPUNLOCK(dp, 0, p);
279	VREF(dp);
280	*retdirp = dp;
281	if (NFSVNO_EXRDONLY(exp))
282		cnp->cn_flags |= RDONLY;
283	ndp->ni_segflg = UIO_SYSSPACE;
284	crossmnt = 1;
285
286	if (nd->nd_flag & ND_PUBLOOKUP) {
287		ndp->ni_loopcnt = 0;
288		if (cnp->cn_pnbuf[0] == '/') {
289			vrele(dp);
290			/*
291			 * Check for degenerate pathnames here, since lookup()
292			 * panics on them.
293			 */
294			for (i = 1; i < ndp->ni_pathlen; i++)
295				if (cnp->cn_pnbuf[i] != '/')
296					break;
297			if (i == ndp->ni_pathlen) {
298				error = NFSERR_ACCES;
299				goto out;
300			}
301			dp = rootvnode;
302			VREF(dp);
303		}
304	} else if ((enable_crossmntpt == 0 && NFSVNO_EXPORTED(exp)) ||
305	    (nd->nd_flag & ND_NFSV4) == 0) {
306		/*
307		 * Only cross mount points for NFSv4 when doing a
308		 * mount while traversing the file system above
309		 * the mount point, unless enable_crossmntpt is set.
310		 */
311		cnp->cn_flags |= NOCROSSMOUNT;
312		crossmnt = 0;
313	}
314
315	/*
316	 * Initialize for scan, set ni_startdir and bump ref on dp again
317	 * becuase lookup() will dereference ni_startdir.
318	 */
319
320	cnp->cn_thread = p;
321	ndp->ni_startdir = dp;
322	ndp->ni_rootdir = rootvnode;
323
324	if (!lockleaf)
325		cnp->cn_flags |= LOCKLEAF;
326	for (;;) {
327		cnp->cn_nameptr = cnp->cn_pnbuf;
328		/*
329		 * Call lookup() to do the real work.  If an error occurs,
330		 * ndp->ni_vp and ni_dvp are left uninitialized or NULL and
331		 * we do not have to dereference anything before returning.
332		 * In either case ni_startdir will be dereferenced and NULLed
333		 * out.
334		 */
335		error = lookup(ndp);
336		if (error)
337			break;
338
339		/*
340		 * Check for encountering a symbolic link.  Trivial
341		 * termination occurs if no symlink encountered.
342		 */
343		if ((cnp->cn_flags & ISSYMLINK) == 0) {
344			if ((cnp->cn_flags & (SAVENAME | SAVESTART)) == 0)
345				nfsvno_relpathbuf(ndp);
346			if (ndp->ni_vp && !lockleaf)
347				NFSVOPUNLOCK(ndp->ni_vp, 0, p);
348			break;
349		}
350
351		/*
352		 * Validate symlink
353		 */
354		if ((cnp->cn_flags & LOCKPARENT) && ndp->ni_pathlen == 1)
355			NFSVOPUNLOCK(ndp->ni_dvp, 0, p);
356		if (!(nd->nd_flag & ND_PUBLOOKUP)) {
357			error = EINVAL;
358			goto badlink2;
359		}
360
361		if (ndp->ni_loopcnt++ >= MAXSYMLINKS) {
362			error = ELOOP;
363			goto badlink2;
364		}
365		if (ndp->ni_pathlen > 1)
366			cp = uma_zalloc(namei_zone, M_WAITOK);
367		else
368			cp = cnp->cn_pnbuf;
369		aiov.iov_base = cp;
370		aiov.iov_len = MAXPATHLEN;
371		auio.uio_iov = &aiov;
372		auio.uio_iovcnt = 1;
373		auio.uio_offset = 0;
374		auio.uio_rw = UIO_READ;
375		auio.uio_segflg = UIO_SYSSPACE;
376		auio.uio_td = NULL;
377		auio.uio_resid = MAXPATHLEN;
378		error = VOP_READLINK(ndp->ni_vp, &auio, cnp->cn_cred);
379		if (error) {
380		badlink1:
381			if (ndp->ni_pathlen > 1)
382				uma_zfree(namei_zone, cp);
383		badlink2:
384			vrele(ndp->ni_dvp);
385			vput(ndp->ni_vp);
386			break;
387		}
388		linklen = MAXPATHLEN - auio.uio_resid;
389		if (linklen == 0) {
390			error = ENOENT;
391			goto badlink1;
392		}
393		if (linklen + ndp->ni_pathlen >= MAXPATHLEN) {
394			error = ENAMETOOLONG;
395			goto badlink1;
396		}
397
398		/*
399		 * Adjust or replace path
400		 */
401		if (ndp->ni_pathlen > 1) {
402			NFSBCOPY(ndp->ni_next, cp + linklen, ndp->ni_pathlen);
403			uma_zfree(namei_zone, cnp->cn_pnbuf);
404			cnp->cn_pnbuf = cp;
405		} else
406			cnp->cn_pnbuf[linklen] = '\0';
407		ndp->ni_pathlen += linklen;
408
409		/*
410		 * Cleanup refs for next loop and check if root directory
411		 * should replace current directory.  Normally ni_dvp
412		 * becomes the new base directory and is cleaned up when
413		 * we loop.  Explicitly null pointers after invalidation
414		 * to clarify operation.
415		 */
416		vput(ndp->ni_vp);
417		ndp->ni_vp = NULL;
418
419		if (cnp->cn_pnbuf[0] == '/') {
420			vrele(ndp->ni_dvp);
421			ndp->ni_dvp = ndp->ni_rootdir;
422			VREF(ndp->ni_dvp);
423		}
424		ndp->ni_startdir = ndp->ni_dvp;
425		ndp->ni_dvp = NULL;
426	}
427	if (!lockleaf)
428		cnp->cn_flags &= ~LOCKLEAF;
429
430out:
431	if (error) {
432		uma_zfree(namei_zone, cnp->cn_pnbuf);
433		ndp->ni_vp = NULL;
434		ndp->ni_dvp = NULL;
435		ndp->ni_startdir = NULL;
436		cnp->cn_flags &= ~HASBUF;
437	} else if ((ndp->ni_cnd.cn_flags & (WANTPARENT|LOCKPARENT)) == 0) {
438		ndp->ni_dvp = NULL;
439	}
440	return (error);
441}
442
443/*
444 * Set up a pathname buffer and return a pointer to it and, optionally
445 * set a hash pointer.
446 */
447void
448nfsvno_setpathbuf(struct nameidata *ndp, char **bufpp, u_long **hashpp)
449{
450	struct componentname *cnp = &ndp->ni_cnd;
451
452	cnp->cn_flags |= (NOMACCHECK | HASBUF);
453	cnp->cn_pnbuf = uma_zalloc(namei_zone, M_WAITOK);
454	if (hashpp != NULL)
455		*hashpp = NULL;
456	*bufpp = cnp->cn_pnbuf;
457}
458
459/*
460 * Release the above path buffer, if not released by nfsvno_namei().
461 */
462void
463nfsvno_relpathbuf(struct nameidata *ndp)
464{
465
466	if ((ndp->ni_cnd.cn_flags & HASBUF) == 0)
467		panic("nfsrelpath");
468	uma_zfree(namei_zone, ndp->ni_cnd.cn_pnbuf);
469	ndp->ni_cnd.cn_flags &= ~HASBUF;
470}
471
472/*
473 * Readlink vnode op into an mbuf list.
474 */
475int
476nfsvno_readlink(struct vnode *vp, struct ucred *cred, struct thread *p,
477    struct mbuf **mpp, struct mbuf **mpendp, int *lenp)
478{
479	struct iovec iv[(NFS_MAXPATHLEN+MLEN-1)/MLEN];
480	struct iovec *ivp = iv;
481	struct uio io, *uiop = &io;
482	struct mbuf *mp, *mp2 = NULL, *mp3 = NULL;
483	int i, len, tlen, error;
484
485	len = 0;
486	i = 0;
487	while (len < NFS_MAXPATHLEN) {
488		NFSMGET(mp);
489		MCLGET(mp, M_WAIT);
490		mp->m_len = NFSMSIZ(mp);
491		if (len == 0) {
492			mp3 = mp2 = mp;
493		} else {
494			mp2->m_next = mp;
495			mp2 = mp;
496		}
497		if ((len + mp->m_len) > NFS_MAXPATHLEN) {
498			mp->m_len = NFS_MAXPATHLEN - len;
499			len = NFS_MAXPATHLEN;
500		} else {
501			len += mp->m_len;
502		}
503		ivp->iov_base = mtod(mp, caddr_t);
504		ivp->iov_len = mp->m_len;
505		i++;
506		ivp++;
507	}
508	uiop->uio_iov = iv;
509	uiop->uio_iovcnt = i;
510	uiop->uio_offset = 0;
511	uiop->uio_resid = len;
512	uiop->uio_rw = UIO_READ;
513	uiop->uio_segflg = UIO_SYSSPACE;
514	uiop->uio_td = NULL;
515	error = VOP_READLINK(vp, uiop, cred);
516	if (error) {
517		m_freem(mp3);
518		*lenp = 0;
519		return (error);
520	}
521	if (uiop->uio_resid > 0) {
522		len -= uiop->uio_resid;
523		tlen = NFSM_RNDUP(len);
524		nfsrv_adj(mp3, NFS_MAXPATHLEN - tlen, tlen - len);
525	}
526	*lenp = len;
527	*mpp = mp3;
528	*mpendp = mp;
529	return (0);
530}
531
532/*
533 * Read vnode op call into mbuf list.
534 */
535int
536nfsvno_read(struct vnode *vp, off_t off, int cnt, struct ucred *cred,
537    struct thread *p, struct mbuf **mpp, struct mbuf **mpendp)
538{
539	struct mbuf *m;
540	int i;
541	struct iovec *iv;
542	struct iovec *iv2;
543	int error = 0, len, left, siz, tlen, ioflag = 0, hi, try = 32;
544	struct mbuf *m2 = NULL, *m3;
545	struct uio io, *uiop = &io;
546	struct nfsheur *nh;
547
548	/*
549	 * Calculate seqcount for heuristic
550	 */
551	/*
552	 * Locate best candidate
553	 */
554
555	hi = ((int)(vm_offset_t)vp / sizeof(struct vnode)) % NUM_HEURISTIC;
556	nh = &nfsheur[hi];
557
558	while (try--) {
559		if (nfsheur[hi].nh_vp == vp) {
560			nh = &nfsheur[hi];
561			break;
562		}
563		if (nfsheur[hi].nh_use > 0)
564			--nfsheur[hi].nh_use;
565		hi = (hi + 1) % NUM_HEURISTIC;
566		if (nfsheur[hi].nh_use < nh->nh_use)
567			nh = &nfsheur[hi];
568	}
569
570	if (nh->nh_vp != vp) {
571		nh->nh_vp = vp;
572		nh->nh_nextr = off;
573		nh->nh_use = NHUSE_INIT;
574		if (off == 0)
575			nh->nh_seqcount = 4;
576		else
577			nh->nh_seqcount = 1;
578	}
579
580	/*
581	 * Calculate heuristic
582	 */
583
584	if ((off == 0 && nh->nh_seqcount > 0) || off == nh->nh_nextr) {
585		if (++nh->nh_seqcount > IO_SEQMAX)
586			nh->nh_seqcount = IO_SEQMAX;
587	} else if (nh->nh_seqcount > 1) {
588		nh->nh_seqcount = 1;
589	} else {
590		nh->nh_seqcount = 0;
591	}
592	nh->nh_use += NHUSE_INC;
593	if (nh->nh_use > NHUSE_MAX)
594		nh->nh_use = NHUSE_MAX;
595	ioflag |= nh->nh_seqcount << IO_SEQSHIFT;
596
597	len = left = NFSM_RNDUP(cnt);
598	m3 = NULL;
599	/*
600	 * Generate the mbuf list with the uio_iov ref. to it.
601	 */
602	i = 0;
603	while (left > 0) {
604		NFSMGET(m);
605		MCLGET(m, M_WAIT);
606		m->m_len = 0;
607		siz = min(M_TRAILINGSPACE(m), left);
608		left -= siz;
609		i++;
610		if (m3)
611			m2->m_next = m;
612		else
613			m3 = m;
614		m2 = m;
615	}
616	MALLOC(iv, struct iovec *, i * sizeof (struct iovec),
617	    M_TEMP, M_WAITOK);
618	uiop->uio_iov = iv2 = iv;
619	m = m3;
620	left = len;
621	i = 0;
622	while (left > 0) {
623		if (m == NULL)
624			panic("nfsvno_read iov");
625		siz = min(M_TRAILINGSPACE(m), left);
626		if (siz > 0) {
627			iv->iov_base = mtod(m, caddr_t) + m->m_len;
628			iv->iov_len = siz;
629			m->m_len += siz;
630			left -= siz;
631			iv++;
632			i++;
633		}
634		m = m->m_next;
635	}
636	uiop->uio_iovcnt = i;
637	uiop->uio_offset = off;
638	uiop->uio_resid = len;
639	uiop->uio_rw = UIO_READ;
640	uiop->uio_segflg = UIO_SYSSPACE;
641	error = VOP_READ(vp, uiop, IO_NODELOCKED | ioflag, cred);
642	FREE((caddr_t)iv2, M_TEMP);
643	if (error) {
644		m_freem(m3);
645		*mpp = NULL;
646		return (error);
647	}
648	tlen = len - uiop->uio_resid;
649	cnt = cnt < tlen ? cnt : tlen;
650	tlen = NFSM_RNDUP(cnt);
651	if (tlen == 0) {
652		m_freem(m3);
653		m3 = NULL;
654	} else if (len != tlen || tlen != cnt)
655		nfsrv_adj(m3, len - tlen, tlen - cnt);
656	*mpp = m3;
657	*mpendp = m2;
658	return (0);
659}
660
661/*
662 * Write vnode op from an mbuf list.
663 */
664int
665nfsvno_write(struct vnode *vp, off_t off, int retlen, int cnt, int stable,
666    struct mbuf *mp, char *cp, struct ucred *cred, struct thread *p)
667{
668	struct iovec *ivp;
669	int i, len;
670	struct iovec *iv;
671	int ioflags, error;
672	struct uio io, *uiop = &io;
673
674	MALLOC(ivp, struct iovec *, cnt * sizeof (struct iovec), M_TEMP,
675	    M_WAITOK);
676	uiop->uio_iov = iv = ivp;
677	uiop->uio_iovcnt = cnt;
678	i = mtod(mp, caddr_t) + mp->m_len - cp;
679	len = retlen;
680	while (len > 0) {
681		if (mp == NULL)
682			panic("nfsvno_write");
683		if (i > 0) {
684			i = min(i, len);
685			ivp->iov_base = cp;
686			ivp->iov_len = i;
687			ivp++;
688			len -= i;
689		}
690		mp = mp->m_next;
691		if (mp) {
692			i = mp->m_len;
693			cp = mtod(mp, caddr_t);
694		}
695	}
696
697	if (stable == NFSWRITE_UNSTABLE)
698		ioflags = IO_NODELOCKED;
699	else
700		ioflags = (IO_SYNC | IO_NODELOCKED);
701	uiop->uio_resid = retlen;
702	uiop->uio_rw = UIO_WRITE;
703	uiop->uio_segflg = UIO_SYSSPACE;
704	NFSUIOPROC(uiop, p);
705	uiop->uio_offset = off;
706	error = VOP_WRITE(vp, uiop, ioflags, cred);
707	FREE((caddr_t)iv, M_TEMP);
708	return (error);
709}
710
711/*
712 * Common code for creating a regular file (plus special files for V2).
713 */
714int
715nfsvno_createsub(struct nfsrv_descript *nd, struct nameidata *ndp,
716    struct vnode **vpp, struct nfsvattr *nvap, int *exclusive_flagp,
717    int32_t *cverf, NFSDEV_T rdev, struct thread *p, struct nfsexstuff *exp)
718{
719	u_quad_t tempsize;
720	int error;
721
722	error = nd->nd_repstat;
723	if (!error && ndp->ni_vp == NULL) {
724		if (nvap->na_type == VREG || nvap->na_type == VSOCK) {
725			vrele(ndp->ni_startdir);
726			error = VOP_CREATE(ndp->ni_dvp,
727			    &ndp->ni_vp, &ndp->ni_cnd, &nvap->na_vattr);
728			vput(ndp->ni_dvp);
729			nfsvno_relpathbuf(ndp);
730			if (!error) {
731				if (*exclusive_flagp) {
732					*exclusive_flagp = 0;
733					NFSVNO_ATTRINIT(nvap);
734					nvap->na_atime.tv_sec = cverf[0];
735					nvap->na_atime.tv_nsec = cverf[1];
736					error = VOP_SETATTR(ndp->ni_vp,
737					    &nvap->na_vattr, nd->nd_cred);
738				}
739			}
740		/*
741		 * NFS V2 Only. nfsrvd_mknod() does this for V3.
742		 * (This implies, just get out on an error.)
743		 */
744		} else if (nvap->na_type == VCHR || nvap->na_type == VBLK ||
745			nvap->na_type == VFIFO) {
746			if (nvap->na_type == VCHR && rdev == 0xffffffff)
747				nvap->na_type = VFIFO;
748                        if (nvap->na_type != VFIFO &&
749			    (error = priv_check_cred(nd->nd_cred,
750			     PRIV_VFS_MKNOD_DEV, 0))) {
751				vrele(ndp->ni_startdir);
752				nfsvno_relpathbuf(ndp);
753				vput(ndp->ni_dvp);
754				return (error);
755			}
756			nvap->na_rdev = rdev;
757			error = VOP_MKNOD(ndp->ni_dvp, &ndp->ni_vp,
758			    &ndp->ni_cnd, &nvap->na_vattr);
759			vput(ndp->ni_dvp);
760			nfsvno_relpathbuf(ndp);
761			if (error) {
762				vrele(ndp->ni_startdir);
763				return (error);
764			}
765		} else {
766			vrele(ndp->ni_startdir);
767			nfsvno_relpathbuf(ndp);
768			vput(ndp->ni_dvp);
769			return (ENXIO);
770		}
771		*vpp = ndp->ni_vp;
772	} else {
773		/*
774		 * Handle cases where error is already set and/or
775		 * the file exists.
776		 * 1 - clean up the lookup
777		 * 2 - iff !error and na_size set, truncate it
778		 */
779		vrele(ndp->ni_startdir);
780		nfsvno_relpathbuf(ndp);
781		*vpp = ndp->ni_vp;
782		if (ndp->ni_dvp == *vpp)
783			vrele(ndp->ni_dvp);
784		else
785			vput(ndp->ni_dvp);
786		if (!error && nvap->na_size != VNOVAL) {
787			error = nfsvno_accchk(*vpp, VWRITE,
788			    nd->nd_cred, exp, p, NFSACCCHK_NOOVERRIDE,
789			    NFSACCCHK_VPISLOCKED, NULL);
790			if (!error) {
791				tempsize = nvap->na_size;
792				NFSVNO_ATTRINIT(nvap);
793				nvap->na_size = tempsize;
794				error = VOP_SETATTR(*vpp,
795				    &nvap->na_vattr, nd->nd_cred);
796			}
797		}
798		if (error)
799			vput(*vpp);
800	}
801	return (error);
802}
803
804/*
805 * Do a mknod vnode op.
806 */
807int
808nfsvno_mknod(struct nameidata *ndp, struct nfsvattr *nvap, struct ucred *cred,
809    struct thread *p)
810{
811	int error = 0;
812	enum vtype vtyp;
813
814	vtyp = nvap->na_type;
815	/*
816	 * Iff doesn't exist, create it.
817	 */
818	if (ndp->ni_vp) {
819		vrele(ndp->ni_startdir);
820		nfsvno_relpathbuf(ndp);
821		vput(ndp->ni_dvp);
822		vrele(ndp->ni_vp);
823		return (EEXIST);
824	}
825	if (vtyp != VCHR && vtyp != VBLK && vtyp != VSOCK && vtyp != VFIFO) {
826		vrele(ndp->ni_startdir);
827		nfsvno_relpathbuf(ndp);
828		vput(ndp->ni_dvp);
829		return (NFSERR_BADTYPE);
830	}
831	if (vtyp == VSOCK) {
832		vrele(ndp->ni_startdir);
833		error = VOP_CREATE(ndp->ni_dvp, &ndp->ni_vp,
834		    &ndp->ni_cnd, &nvap->na_vattr);
835		vput(ndp->ni_dvp);
836		nfsvno_relpathbuf(ndp);
837	} else {
838		if (nvap->na_type != VFIFO &&
839		    (error = priv_check_cred(cred, PRIV_VFS_MKNOD_DEV, 0))) {
840			vrele(ndp->ni_startdir);
841			nfsvno_relpathbuf(ndp);
842			vput(ndp->ni_dvp);
843			return (error);
844		}
845		error = VOP_MKNOD(ndp->ni_dvp, &ndp->ni_vp,
846		    &ndp->ni_cnd, &nvap->na_vattr);
847		vput(ndp->ni_dvp);
848		nfsvno_relpathbuf(ndp);
849		vrele(ndp->ni_startdir);
850		/*
851		 * Since VOP_MKNOD returns the ni_vp, I can't
852		 * see any reason to do the lookup.
853		 */
854	}
855	return (error);
856}
857
858/*
859 * Mkdir vnode op.
860 */
861int
862nfsvno_mkdir(struct nameidata *ndp, struct nfsvattr *nvap, uid_t saved_uid,
863    struct ucred *cred, struct thread *p, struct nfsexstuff *exp)
864{
865	int error = 0;
866
867	if (ndp->ni_vp != NULL) {
868		if (ndp->ni_dvp == ndp->ni_vp)
869			vrele(ndp->ni_dvp);
870		else
871			vput(ndp->ni_dvp);
872		vrele(ndp->ni_vp);
873		nfsvno_relpathbuf(ndp);
874		return (EEXIST);
875	}
876	error = VOP_MKDIR(ndp->ni_dvp, &ndp->ni_vp, &ndp->ni_cnd,
877	    &nvap->na_vattr);
878	vput(ndp->ni_dvp);
879	nfsvno_relpathbuf(ndp);
880	return (error);
881}
882
883/*
884 * symlink vnode op.
885 */
886int
887nfsvno_symlink(struct nameidata *ndp, struct nfsvattr *nvap, char *pathcp,
888    int pathlen, int not_v2, uid_t saved_uid, struct ucred *cred, struct thread *p,
889    struct nfsexstuff *exp)
890{
891	int error = 0;
892
893	if (ndp->ni_vp) {
894		vrele(ndp->ni_startdir);
895		nfsvno_relpathbuf(ndp);
896		if (ndp->ni_dvp == ndp->ni_vp)
897			vrele(ndp->ni_dvp);
898		else
899			vput(ndp->ni_dvp);
900		vrele(ndp->ni_vp);
901		return (EEXIST);
902	}
903
904	error = VOP_SYMLINK(ndp->ni_dvp, &ndp->ni_vp, &ndp->ni_cnd,
905	    &nvap->na_vattr, pathcp);
906	vput(ndp->ni_dvp);
907	vrele(ndp->ni_startdir);
908	nfsvno_relpathbuf(ndp);
909	/*
910	 * Although FreeBSD still had the lookup code in
911	 * it for 7/current, there doesn't seem to be any
912	 * point, since VOP_SYMLINK() returns the ni_vp.
913	 * Just vput it for v2.
914	 */
915	if (!not_v2 && !error)
916		vput(ndp->ni_vp);
917	return (error);
918}
919
920/*
921 * Parse symbolic link arguments.
922 * This function has an ugly side effect. It will MALLOC() an area for
923 * the symlink and set iov_base to point to it, only if it succeeds.
924 * So, if it returns with uiop->uio_iov->iov_base != NULL, that must
925 * be FREE'd later.
926 */
927int
928nfsvno_getsymlink(struct nfsrv_descript *nd, struct nfsvattr *nvap,
929    struct thread *p, char **pathcpp, int *lenp)
930{
931	u_int32_t *tl;
932	char *pathcp = NULL;
933	int error = 0, len;
934	struct nfsv2_sattr *sp;
935
936	*pathcpp = NULL;
937	*lenp = 0;
938	if ((nd->nd_flag & ND_NFSV3) &&
939	    (error = nfsrv_sattr(nd, nvap, NULL, NULL, p)))
940		goto nfsmout;
941	NFSM_DISSECT(tl, u_int32_t *, NFSX_UNSIGNED);
942	len = fxdr_unsigned(int, *tl);
943	if (len > NFS_MAXPATHLEN || len <= 0) {
944		error = EBADRPC;
945		goto nfsmout;
946	}
947	MALLOC(pathcp, caddr_t, len + 1, M_TEMP, M_WAITOK);
948	error = nfsrv_mtostr(nd, pathcp, len);
949	if (error)
950		goto nfsmout;
951	if (nd->nd_flag & ND_NFSV2) {
952		NFSM_DISSECT(sp, struct nfsv2_sattr *, NFSX_V2SATTR);
953		nvap->na_mode = fxdr_unsigned(u_int16_t, sp->sa_mode);
954	}
955	*pathcpp = pathcp;
956	*lenp = len;
957	return (0);
958nfsmout:
959	if (pathcp)
960		free(pathcp, M_TEMP);
961	return (error);
962}
963
964/*
965 * Remove a non-directory object.
966 */
967int
968nfsvno_removesub(struct nameidata *ndp, int is_v4, struct ucred *cred,
969    struct thread *p, struct nfsexstuff *exp)
970{
971	struct vnode *vp;
972	int error = 0;
973
974	vp = ndp->ni_vp;
975	if (vp->v_type == VDIR)
976		error = NFSERR_ISDIR;
977	else if (is_v4)
978		error = nfsrv_checkremove(vp, 1, p);
979	if (!error)
980		error = VOP_REMOVE(ndp->ni_dvp, vp, &ndp->ni_cnd);
981	if (ndp->ni_dvp == vp)
982		vrele(ndp->ni_dvp);
983	else
984		vput(ndp->ni_dvp);
985	vput(vp);
986	return (error);
987}
988
989/*
990 * Remove a directory.
991 */
992int
993nfsvno_rmdirsub(struct nameidata *ndp, int is_v4, struct ucred *cred,
994    struct thread *p, struct nfsexstuff *exp)
995{
996	struct vnode *vp;
997	int error = 0;
998
999	vp = ndp->ni_vp;
1000	if (vp->v_type != VDIR) {
1001		error = ENOTDIR;
1002		goto out;
1003	}
1004	/*
1005	 * No rmdir "." please.
1006	 */
1007	if (ndp->ni_dvp == vp) {
1008		error = EINVAL;
1009		goto out;
1010	}
1011	/*
1012	 * The root of a mounted filesystem cannot be deleted.
1013	 */
1014	if (vp->v_vflag & VV_ROOT)
1015		error = EBUSY;
1016out:
1017	if (!error)
1018		error = VOP_RMDIR(ndp->ni_dvp, vp, &ndp->ni_cnd);
1019	if (ndp->ni_dvp == vp)
1020		vrele(ndp->ni_dvp);
1021	else
1022		vput(ndp->ni_dvp);
1023	vput(vp);
1024	return (error);
1025}
1026
1027/*
1028 * Rename vnode op.
1029 */
1030int
1031nfsvno_rename(struct nameidata *fromndp, struct nameidata *tondp,
1032    u_int32_t ndstat, u_int32_t ndflag, struct ucred *cred, struct thread *p)
1033{
1034	struct vnode *fvp, *tvp, *tdvp;
1035	int error = 0;
1036
1037	fvp = fromndp->ni_vp;
1038	if (ndstat) {
1039		vrele(fromndp->ni_dvp);
1040		vrele(fvp);
1041		error = ndstat;
1042		goto out1;
1043	}
1044	tdvp = tondp->ni_dvp;
1045	tvp = tondp->ni_vp;
1046	if (tvp != NULL) {
1047		if (fvp->v_type == VDIR && tvp->v_type != VDIR) {
1048			error = (ndflag & ND_NFSV2) ? EISDIR : EEXIST;
1049			goto out;
1050		} else if (fvp->v_type != VDIR && tvp->v_type == VDIR) {
1051			error = (ndflag & ND_NFSV2) ? ENOTDIR : EEXIST;
1052			goto out;
1053		}
1054		if (tvp->v_type == VDIR && tvp->v_mountedhere) {
1055			error = (ndflag & ND_NFSV2) ? ENOTEMPTY : EXDEV;
1056			goto out;
1057		}
1058
1059		/*
1060		 * A rename to '.' or '..' results in a prematurely
1061		 * unlocked vnode on FreeBSD5, so I'm just going to fail that
1062		 * here.
1063		 */
1064		if ((tondp->ni_cnd.cn_namelen == 1 &&
1065		     tondp->ni_cnd.cn_nameptr[0] == '.') ||
1066		    (tondp->ni_cnd.cn_namelen == 2 &&
1067		     tondp->ni_cnd.cn_nameptr[0] == '.' &&
1068		     tondp->ni_cnd.cn_nameptr[1] == '.')) {
1069			error = EINVAL;
1070			goto out;
1071		}
1072	}
1073	if (fvp->v_type == VDIR && fvp->v_mountedhere) {
1074		error = (ndflag & ND_NFSV2) ? ENOTEMPTY : EXDEV;
1075		goto out;
1076	}
1077	if (fvp->v_mount != tdvp->v_mount) {
1078		error = (ndflag & ND_NFSV2) ? ENOTEMPTY : EXDEV;
1079		goto out;
1080	}
1081	if (fvp == tdvp) {
1082		error = (ndflag & ND_NFSV2) ? ENOTEMPTY : EINVAL;
1083		goto out;
1084	}
1085	if (fvp == tvp) {
1086		/*
1087		 * If source and destination are the same, there is nothing to
1088		 * do. Set error to -1 to indicate this.
1089		 */
1090		error = -1;
1091		goto out;
1092	}
1093	if (ndflag & ND_NFSV4) {
1094		if (vn_lock(fvp, LK_EXCLUSIVE) == 0) {
1095			error = nfsrv_checkremove(fvp, 0, p);
1096			VOP_UNLOCK(fvp, 0);
1097		} else
1098			error = EPERM;
1099		if (tvp && !error)
1100			error = nfsrv_checkremove(tvp, 1, p);
1101	} else {
1102		/*
1103		 * For NFSv2 and NFSv3, try to get rid of the delegation, so
1104		 * that the NFSv4 client won't be confused by the rename.
1105		 * Since nfsd_recalldelegation() can only be called on an
1106		 * unlocked vnode at this point and fvp is the file that will
1107		 * still exist after the rename, just do fvp.
1108		 */
1109		nfsd_recalldelegation(fvp, p);
1110	}
1111out:
1112	if (!error) {
1113		error = VOP_RENAME(fromndp->ni_dvp, fromndp->ni_vp,
1114		    &fromndp->ni_cnd, tondp->ni_dvp, tondp->ni_vp,
1115		    &tondp->ni_cnd);
1116	} else {
1117		if (tdvp == tvp)
1118			vrele(tdvp);
1119		else
1120			vput(tdvp);
1121		if (tvp)
1122			vput(tvp);
1123		vrele(fromndp->ni_dvp);
1124		vrele(fvp);
1125		if (error == -1)
1126			error = 0;
1127	}
1128	vrele(tondp->ni_startdir);
1129	nfsvno_relpathbuf(tondp);
1130out1:
1131	vrele(fromndp->ni_startdir);
1132	nfsvno_relpathbuf(fromndp);
1133	return (error);
1134}
1135
1136/*
1137 * Link vnode op.
1138 */
1139int
1140nfsvno_link(struct nameidata *ndp, struct vnode *vp, struct ucred *cred,
1141    struct thread *p, struct nfsexstuff *exp)
1142{
1143	struct vnode *xp;
1144	int error = 0;
1145
1146	xp = ndp->ni_vp;
1147	if (xp != NULL) {
1148		error = EEXIST;
1149	} else {
1150		xp = ndp->ni_dvp;
1151		if (vp->v_mount != xp->v_mount)
1152			error = EXDEV;
1153	}
1154	if (!error) {
1155		vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
1156		if ((vp->v_iflag & VI_DOOMED) == 0)
1157			error = VOP_LINK(ndp->ni_dvp, vp, &ndp->ni_cnd);
1158		else
1159			error = EPERM;
1160		if (ndp->ni_dvp == vp)
1161			vrele(ndp->ni_dvp);
1162		else
1163			vput(ndp->ni_dvp);
1164		VOP_UNLOCK(vp, 0);
1165	} else {
1166		if (ndp->ni_dvp == ndp->ni_vp)
1167			vrele(ndp->ni_dvp);
1168		else
1169			vput(ndp->ni_dvp);
1170		if (ndp->ni_vp)
1171			vrele(ndp->ni_vp);
1172	}
1173	nfsvno_relpathbuf(ndp);
1174	return (error);
1175}
1176
1177/*
1178 * Do the fsync() appropriate for the commit.
1179 */
1180int
1181nfsvno_fsync(struct vnode *vp, u_int64_t off, int cnt, struct ucred *cred,
1182    struct thread *td)
1183{
1184	int error = 0;
1185
1186	if (cnt > MAX_COMMIT_COUNT) {
1187		/*
1188		 * Give up and do the whole thing
1189		 */
1190		if (vp->v_object &&
1191		   (vp->v_object->flags & OBJ_MIGHTBEDIRTY)) {
1192			VM_OBJECT_LOCK(vp->v_object);
1193			vm_object_page_clean(vp->v_object, 0, 0, OBJPC_SYNC);
1194			VM_OBJECT_UNLOCK(vp->v_object);
1195		}
1196		error = VOP_FSYNC(vp, MNT_WAIT, td);
1197	} else {
1198		/*
1199		 * Locate and synchronously write any buffers that fall
1200		 * into the requested range.  Note:  we are assuming that
1201		 * f_iosize is a power of 2.
1202		 */
1203		int iosize = vp->v_mount->mnt_stat.f_iosize;
1204		int iomask = iosize - 1;
1205		struct bufobj *bo;
1206		daddr_t lblkno;
1207
1208		/*
1209		 * Align to iosize boundry, super-align to page boundry.
1210		 */
1211		if (off & iomask) {
1212			cnt += off & iomask;
1213			off &= ~(u_quad_t)iomask;
1214		}
1215		if (off & PAGE_MASK) {
1216			cnt += off & PAGE_MASK;
1217			off &= ~(u_quad_t)PAGE_MASK;
1218		}
1219		lblkno = off / iosize;
1220
1221		if (vp->v_object &&
1222		   (vp->v_object->flags & OBJ_MIGHTBEDIRTY)) {
1223			VM_OBJECT_LOCK(vp->v_object);
1224			vm_object_page_clean(vp->v_object, off / PAGE_SIZE, (cnt + PAGE_MASK) / PAGE_SIZE, OBJPC_SYNC);
1225			VM_OBJECT_UNLOCK(vp->v_object);
1226		}
1227
1228		bo = &vp->v_bufobj;
1229		BO_LOCK(bo);
1230		while (cnt > 0) {
1231			struct buf *bp;
1232
1233			/*
1234			 * If we have a buffer and it is marked B_DELWRI we
1235			 * have to lock and write it.  Otherwise the prior
1236			 * write is assumed to have already been committed.
1237			 *
1238			 * gbincore() can return invalid buffers now so we
1239			 * have to check that bit as well (though B_DELWRI
1240			 * should not be set if B_INVAL is set there could be
1241			 * a race here since we haven't locked the buffer).
1242			 */
1243			if ((bp = gbincore(&vp->v_bufobj, lblkno)) != NULL) {
1244				if (BUF_LOCK(bp, LK_EXCLUSIVE | LK_SLEEPFAIL |
1245				    LK_INTERLOCK, BO_MTX(bo)) == ENOLCK) {
1246					BO_LOCK(bo);
1247					continue; /* retry */
1248				}
1249			    	if ((bp->b_flags & (B_DELWRI|B_INVAL)) ==
1250				    B_DELWRI) {
1251					bremfree(bp);
1252					bp->b_flags &= ~B_ASYNC;
1253					bwrite(bp);
1254					++nfs_commit_miss;
1255				} else
1256					BUF_UNLOCK(bp);
1257				BO_LOCK(bo);
1258			}
1259			++nfs_commit_blks;
1260			if (cnt < iosize)
1261				break;
1262			cnt -= iosize;
1263			++lblkno;
1264		}
1265		BO_UNLOCK(bo);
1266	}
1267	return (error);
1268}
1269
1270/*
1271 * Statfs vnode op.
1272 */
1273int
1274nfsvno_statfs(struct vnode *vp, struct statfs *sf)
1275{
1276
1277	return (VFS_STATFS(vp->v_mount, sf));
1278}
1279
1280/*
1281 * Do the vnode op stuff for Open. Similar to nfsvno_createsub(), but
1282 * must handle nfsrv_opencheck() calls after any other access checks.
1283 */
1284void
1285nfsvno_open(struct nfsrv_descript *nd, struct nameidata *ndp,
1286    nfsquad_t clientid, nfsv4stateid_t *stateidp, struct nfsstate *stp,
1287    int *exclusive_flagp, struct nfsvattr *nvap, int32_t *cverf, int create,
1288    NFSACL_T *aclp, nfsattrbit_t *attrbitp, struct ucred *cred, struct thread *p,
1289    struct nfsexstuff *exp, struct vnode **vpp)
1290{
1291	struct vnode *vp = NULL;
1292	u_quad_t tempsize;
1293	struct nfsexstuff nes;
1294
1295	if (ndp->ni_vp == NULL)
1296		nd->nd_repstat = nfsrv_opencheck(clientid,
1297		    stateidp, stp, NULL, nd, p, nd->nd_repstat);
1298	if (!nd->nd_repstat) {
1299		if (ndp->ni_vp == NULL) {
1300			vrele(ndp->ni_startdir);
1301			nd->nd_repstat = VOP_CREATE(ndp->ni_dvp,
1302			    &ndp->ni_vp, &ndp->ni_cnd, &nvap->na_vattr);
1303			vput(ndp->ni_dvp);
1304			nfsvno_relpathbuf(ndp);
1305			if (!nd->nd_repstat) {
1306				if (*exclusive_flagp) {
1307					*exclusive_flagp = 0;
1308					NFSVNO_ATTRINIT(nvap);
1309					nvap->na_atime.tv_sec = cverf[0];
1310					nvap->na_atime.tv_nsec = cverf[1];
1311					nd->nd_repstat = VOP_SETATTR(ndp->ni_vp,
1312					    &nvap->na_vattr, cred);
1313				} else {
1314					nfsrv_fixattr(nd, ndp->ni_vp, nvap,
1315					    aclp, p, attrbitp, exp);
1316				}
1317			}
1318			vp = ndp->ni_vp;
1319		} else {
1320			if (ndp->ni_startdir)
1321				vrele(ndp->ni_startdir);
1322			nfsvno_relpathbuf(ndp);
1323			vp = ndp->ni_vp;
1324			if (create == NFSV4OPEN_CREATE) {
1325				if (ndp->ni_dvp == vp)
1326					vrele(ndp->ni_dvp);
1327				else
1328					vput(ndp->ni_dvp);
1329			}
1330			if (NFSVNO_ISSETSIZE(nvap) && vp->v_type == VREG) {
1331				if (ndp->ni_cnd.cn_flags & RDONLY)
1332					NFSVNO_SETEXRDONLY(&nes);
1333				else
1334					NFSVNO_EXINIT(&nes);
1335				nd->nd_repstat = nfsvno_accchk(vp,
1336				    VWRITE, cred, &nes, p,
1337				    NFSACCCHK_NOOVERRIDE,
1338				    NFSACCCHK_VPISLOCKED, NULL);
1339				nd->nd_repstat = nfsrv_opencheck(clientid,
1340				    stateidp, stp, vp, nd, p, nd->nd_repstat);
1341				if (!nd->nd_repstat) {
1342					tempsize = nvap->na_size;
1343					NFSVNO_ATTRINIT(nvap);
1344					nvap->na_size = tempsize;
1345					nd->nd_repstat = VOP_SETATTR(vp,
1346					    &nvap->na_vattr, cred);
1347				}
1348			} else if (vp->v_type == VREG) {
1349				nd->nd_repstat = nfsrv_opencheck(clientid,
1350				    stateidp, stp, vp, nd, p, nd->nd_repstat);
1351			}
1352		}
1353	} else {
1354		if (ndp->ni_cnd.cn_flags & HASBUF)
1355			nfsvno_relpathbuf(ndp);
1356		if (ndp->ni_startdir && create == NFSV4OPEN_CREATE) {
1357			vrele(ndp->ni_startdir);
1358			if (ndp->ni_dvp == ndp->ni_vp)
1359				vrele(ndp->ni_dvp);
1360			else
1361				vput(ndp->ni_dvp);
1362			if (ndp->ni_vp)
1363				vput(ndp->ni_vp);
1364		}
1365	}
1366	*vpp = vp;
1367}
1368
1369/*
1370 * Updates the file rev and sets the mtime and ctime
1371 * to the current clock time, returning the va_filerev and va_Xtime
1372 * values.
1373 */
1374void
1375nfsvno_updfilerev(struct vnode *vp, struct nfsvattr *nvap,
1376    struct ucred *cred, struct thread *p)
1377{
1378	struct vattr va;
1379
1380	VATTR_NULL(&va);
1381	getnanotime(&va.va_mtime);
1382	(void) VOP_SETATTR(vp, &va, cred);
1383	(void) nfsvno_getattr(vp, nvap, cred, p, 1);
1384}
1385
1386/*
1387 * Glue routine to nfsv4_fillattr().
1388 */
1389int
1390nfsvno_fillattr(struct nfsrv_descript *nd, struct vnode *vp,
1391    struct nfsvattr *nvap, fhandle_t *fhp, int rderror, nfsattrbit_t *attrbitp,
1392    struct ucred *cred, struct thread *p, int isdgram, int reterr)
1393{
1394	int error;
1395
1396	error = nfsv4_fillattr(nd, vp, NULL, &nvap->na_vattr, fhp, rderror,
1397	    attrbitp, cred, p, isdgram, reterr);
1398	return (error);
1399}
1400
1401/* Since the Readdir vnode ops vary, put the entire functions in here. */
1402/*
1403 * nfs readdir service
1404 * - mallocs what it thinks is enough to read
1405 *	count rounded up to a multiple of DIRBLKSIZ <= NFS_MAXREADDIR
1406 * - calls VOP_READDIR()
1407 * - loops around building the reply
1408 *	if the output generated exceeds count break out of loop
1409 *	The NFSM_CLGET macro is used here so that the reply will be packed
1410 *	tightly in mbuf clusters.
1411 * - it trims out records with d_fileno == 0
1412 *	this doesn't matter for Unix clients, but they might confuse clients
1413 *	for other os'.
1414 * - it trims out records with d_type == DT_WHT
1415 *	these cannot be seen through NFS (unless we extend the protocol)
1416 *     The alternate call nfsrvd_readdirplus() does lookups as well.
1417 * PS: The NFS protocol spec. does not clarify what the "count" byte
1418 *	argument is a count of.. just name strings and file id's or the
1419 *	entire reply rpc or ...
1420 *	I tried just file name and id sizes and it confused the Sun client,
1421 *	so I am using the full rpc size now. The "paranoia.." comment refers
1422 *	to including the status longwords that are not a part of the dir.
1423 *	"entry" structures, but are in the rpc.
1424 */
1425int
1426nfsrvd_readdir(struct nfsrv_descript *nd, int isdgram,
1427    struct vnode *vp, struct thread *p, struct nfsexstuff *exp)
1428{
1429	struct dirent *dp;
1430	u_int32_t *tl;
1431	int dirlen;
1432	char *cpos, *cend, *rbuf;
1433	struct nfsvattr at;
1434	int nlen, error = 0, getret = 1;
1435	int siz, cnt, fullsiz, eofflag, ncookies;
1436	u_int64_t off, toff, verf;
1437	u_long *cookies = NULL, *cookiep;
1438	struct uio io;
1439	struct iovec iv;
1440	int not_zfs;
1441
1442	if (nd->nd_repstat) {
1443		nfsrv_postopattr(nd, getret, &at);
1444		return (0);
1445	}
1446	if (nd->nd_flag & ND_NFSV2) {
1447		NFSM_DISSECT(tl, u_int32_t *, 2 * NFSX_UNSIGNED);
1448		off = fxdr_unsigned(u_quad_t, *tl++);
1449	} else {
1450		NFSM_DISSECT(tl, u_int32_t *, 5 * NFSX_UNSIGNED);
1451		off = fxdr_hyper(tl);
1452		tl += 2;
1453		verf = fxdr_hyper(tl);
1454		tl += 2;
1455	}
1456	toff = off;
1457	cnt = fxdr_unsigned(int, *tl);
1458	if (cnt > NFS_SRVMAXDATA(nd) || cnt < 0)
1459		cnt = NFS_SRVMAXDATA(nd);
1460	siz = ((cnt + DIRBLKSIZ - 1) & ~(DIRBLKSIZ - 1));
1461	fullsiz = siz;
1462	if (nd->nd_flag & ND_NFSV3) {
1463		nd->nd_repstat = getret = nfsvno_getattr(vp, &at, nd->nd_cred,
1464		    p, 1);
1465#if 0
1466		/*
1467		 * va_filerev is not sufficient as a cookie verifier,
1468		 * since it is not supposed to change when entries are
1469		 * removed/added unless that offset cookies returned to
1470		 * the client are no longer valid.
1471		 */
1472		if (!nd->nd_repstat && toff && verf != at.na_filerev)
1473			nd->nd_repstat = NFSERR_BAD_COOKIE;
1474#endif
1475	}
1476	if (nd->nd_repstat == 0 && cnt == 0) {
1477		if (nd->nd_flag & ND_NFSV2)
1478			/* NFSv2 does not have NFSERR_TOOSMALL */
1479			nd->nd_repstat = EPERM;
1480		else
1481			nd->nd_repstat = NFSERR_TOOSMALL;
1482	}
1483	if (!nd->nd_repstat)
1484		nd->nd_repstat = nfsvno_accchk(vp, VEXEC,
1485		    nd->nd_cred, exp, p, NFSACCCHK_NOOVERRIDE,
1486		    NFSACCCHK_VPISLOCKED, NULL);
1487	if (nd->nd_repstat) {
1488		vput(vp);
1489		if (nd->nd_flag & ND_NFSV3)
1490			nfsrv_postopattr(nd, getret, &at);
1491		return (0);
1492	}
1493	not_zfs = strcmp(vp->v_mount->mnt_vfc->vfc_name, "zfs");
1494	MALLOC(rbuf, caddr_t, siz, M_TEMP, M_WAITOK);
1495again:
1496	eofflag = 0;
1497	if (cookies) {
1498		free((caddr_t)cookies, M_TEMP);
1499		cookies = NULL;
1500	}
1501
1502	iv.iov_base = rbuf;
1503	iv.iov_len = siz;
1504	io.uio_iov = &iv;
1505	io.uio_iovcnt = 1;
1506	io.uio_offset = (off_t)off;
1507	io.uio_resid = siz;
1508	io.uio_segflg = UIO_SYSSPACE;
1509	io.uio_rw = UIO_READ;
1510	io.uio_td = NULL;
1511	nd->nd_repstat = VOP_READDIR(vp, &io, nd->nd_cred, &eofflag, &ncookies,
1512	    &cookies);
1513	off = (u_int64_t)io.uio_offset;
1514	if (io.uio_resid)
1515		siz -= io.uio_resid;
1516
1517	if (!cookies && !nd->nd_repstat)
1518		nd->nd_repstat = NFSERR_PERM;
1519	if (nd->nd_flag & ND_NFSV3) {
1520		getret = nfsvno_getattr(vp, &at, nd->nd_cred, p, 1);
1521		if (!nd->nd_repstat)
1522			nd->nd_repstat = getret;
1523	}
1524
1525	/*
1526	 * Handles the failed cases. nd->nd_repstat == 0 past here.
1527	 */
1528	if (nd->nd_repstat) {
1529		vput(vp);
1530		free((caddr_t)rbuf, M_TEMP);
1531		if (cookies)
1532			free((caddr_t)cookies, M_TEMP);
1533		if (nd->nd_flag & ND_NFSV3)
1534			nfsrv_postopattr(nd, getret, &at);
1535		return (0);
1536	}
1537	/*
1538	 * If nothing read, return eof
1539	 * rpc reply
1540	 */
1541	if (siz == 0) {
1542		vput(vp);
1543		if (nd->nd_flag & ND_NFSV2) {
1544			NFSM_BUILD(tl, u_int32_t *, 2 * NFSX_UNSIGNED);
1545		} else {
1546			nfsrv_postopattr(nd, getret, &at);
1547			NFSM_BUILD(tl, u_int32_t *, 4 * NFSX_UNSIGNED);
1548			txdr_hyper(at.na_filerev, tl);
1549			tl += 2;
1550		}
1551		*tl++ = newnfs_false;
1552		*tl = newnfs_true;
1553		FREE((caddr_t)rbuf, M_TEMP);
1554		FREE((caddr_t)cookies, M_TEMP);
1555		return (0);
1556	}
1557
1558	/*
1559	 * Check for degenerate cases of nothing useful read.
1560	 * If so go try again
1561	 */
1562	cpos = rbuf;
1563	cend = rbuf + siz;
1564	dp = (struct dirent *)cpos;
1565	cookiep = cookies;
1566
1567	/*
1568	 * For some reason FreeBSD's ufs_readdir() chooses to back the
1569	 * directory offset up to a block boundary, so it is necessary to
1570	 * skip over the records that precede the requested offset. This
1571	 * requires the assumption that file offset cookies monotonically
1572	 * increase.
1573	 * Since the offset cookies don't monotonically increase for ZFS,
1574	 * this is not done when ZFS is the file system.
1575	 */
1576	while (cpos < cend && ncookies > 0 &&
1577	    (dp->d_fileno == 0 || dp->d_type == DT_WHT ||
1578	     (not_zfs != 0 && ((u_quad_t)(*cookiep)) <= toff))) {
1579		cpos += dp->d_reclen;
1580		dp = (struct dirent *)cpos;
1581		cookiep++;
1582		ncookies--;
1583	}
1584	if (cpos >= cend || ncookies == 0) {
1585		siz = fullsiz;
1586		toff = off;
1587		goto again;
1588	}
1589	vput(vp);
1590
1591	/*
1592	 * dirlen is the size of the reply, including all XDR and must
1593	 * not exceed cnt. For NFSv2, RFC1094 didn't clearly indicate
1594	 * if the XDR should be included in "count", but to be safe, we do.
1595	 * (Include the two booleans at the end of the reply in dirlen now.)
1596	 */
1597	if (nd->nd_flag & ND_NFSV3) {
1598		nfsrv_postopattr(nd, getret, &at);
1599		NFSM_BUILD(tl, u_int32_t *, 2 * NFSX_UNSIGNED);
1600		txdr_hyper(at.na_filerev, tl);
1601		dirlen = NFSX_V3POSTOPATTR + NFSX_VERF + 2 * NFSX_UNSIGNED;
1602	} else {
1603		dirlen = 2 * NFSX_UNSIGNED;
1604	}
1605
1606	/* Loop through the records and build reply */
1607	while (cpos < cend && ncookies > 0) {
1608		nlen = dp->d_namlen;
1609		if (dp->d_fileno != 0 && dp->d_type != DT_WHT &&
1610			nlen <= NFS_MAXNAMLEN) {
1611			if (nd->nd_flag & ND_NFSV3)
1612				dirlen += (6*NFSX_UNSIGNED + NFSM_RNDUP(nlen));
1613			else
1614				dirlen += (4*NFSX_UNSIGNED + NFSM_RNDUP(nlen));
1615			if (dirlen > cnt) {
1616				eofflag = 0;
1617				break;
1618			}
1619
1620			/*
1621			 * Build the directory record xdr from
1622			 * the dirent entry.
1623			 */
1624			if (nd->nd_flag & ND_NFSV3) {
1625				NFSM_BUILD(tl, u_int32_t *, 3 * NFSX_UNSIGNED);
1626				*tl++ = newnfs_true;
1627				*tl++ = 0;
1628			} else {
1629				NFSM_BUILD(tl, u_int32_t *, 2 * NFSX_UNSIGNED);
1630				*tl++ = newnfs_true;
1631			}
1632			*tl = txdr_unsigned(dp->d_fileno);
1633			(void) nfsm_strtom(nd, dp->d_name, nlen);
1634			if (nd->nd_flag & ND_NFSV3) {
1635				NFSM_BUILD(tl, u_int32_t *, 2 * NFSX_UNSIGNED);
1636				*tl++ = 0;
1637			} else
1638				NFSM_BUILD(tl, u_int32_t *, NFSX_UNSIGNED);
1639			*tl = txdr_unsigned(*cookiep);
1640		}
1641		cpos += dp->d_reclen;
1642		dp = (struct dirent *)cpos;
1643		cookiep++;
1644		ncookies--;
1645	}
1646	if (cpos < cend)
1647		eofflag = 0;
1648	NFSM_BUILD(tl, u_int32_t *, 2 * NFSX_UNSIGNED);
1649	*tl++ = newnfs_false;
1650	if (eofflag)
1651		*tl = newnfs_true;
1652	else
1653		*tl = newnfs_false;
1654	FREE((caddr_t)rbuf, M_TEMP);
1655	FREE((caddr_t)cookies, M_TEMP);
1656	return (0);
1657nfsmout:
1658	vput(vp);
1659	return (error);
1660}
1661
1662/*
1663 * Readdirplus for V3 and Readdir for V4.
1664 */
1665int
1666nfsrvd_readdirplus(struct nfsrv_descript *nd, int isdgram,
1667    struct vnode *vp, struct thread *p, struct nfsexstuff *exp)
1668{
1669	struct dirent *dp;
1670	u_int32_t *tl;
1671	int dirlen;
1672	char *cpos, *cend, *rbuf;
1673	struct vnode *nvp;
1674	fhandle_t nfh;
1675	struct nfsvattr nva, at, *nvap = &nva;
1676	struct mbuf *mb0, *mb1;
1677	struct nfsreferral *refp;
1678	int nlen, r, error = 0, getret = 1, usevget = 1;
1679	int siz, cnt, fullsiz, eofflag, ncookies, entrycnt;
1680	caddr_t bpos0, bpos1;
1681	u_int64_t off, toff, verf;
1682	u_long *cookies = NULL, *cookiep;
1683	nfsattrbit_t attrbits, rderrbits, savbits;
1684	struct uio io;
1685	struct iovec iv;
1686	struct componentname cn;
1687	int not_zfs;
1688	struct mount *mp;
1689
1690	if (nd->nd_repstat) {
1691		nfsrv_postopattr(nd, getret, &at);
1692		return (0);
1693	}
1694	NFSM_DISSECT(tl, u_int32_t *, 6 * NFSX_UNSIGNED);
1695	off = fxdr_hyper(tl);
1696	toff = off;
1697	tl += 2;
1698	verf = fxdr_hyper(tl);
1699	tl += 2;
1700	siz = fxdr_unsigned(int, *tl++);
1701	cnt = fxdr_unsigned(int, *tl);
1702
1703	/*
1704	 * Use the server's maximum data transfer size as the upper bound
1705	 * on reply datalen.
1706	 */
1707	if (cnt > NFS_SRVMAXDATA(nd) || cnt < 0)
1708		cnt = NFS_SRVMAXDATA(nd);
1709
1710	/*
1711	 * siz is a "hint" of how much directory information (name, fileid,
1712	 * cookie) should be in the reply. At least one client "hints" 0,
1713	 * so I set it to cnt for that case. I also round it up to the
1714	 * next multiple of DIRBLKSIZ.
1715	 */
1716	if (siz <= 0)
1717		siz = cnt;
1718	siz = ((siz + DIRBLKSIZ - 1) & ~(DIRBLKSIZ - 1));
1719
1720	if (nd->nd_flag & ND_NFSV4) {
1721		error = nfsrv_getattrbits(nd, &attrbits, NULL, NULL);
1722		if (error)
1723			goto nfsmout;
1724		NFSSET_ATTRBIT(&savbits, &attrbits);
1725		NFSCLRNOTFILLABLE_ATTRBIT(&attrbits);
1726		NFSZERO_ATTRBIT(&rderrbits);
1727		NFSSETBIT_ATTRBIT(&rderrbits, NFSATTRBIT_RDATTRERROR);
1728	} else {
1729		NFSZERO_ATTRBIT(&attrbits);
1730	}
1731	fullsiz = siz;
1732	nd->nd_repstat = getret = nfsvno_getattr(vp, &at, nd->nd_cred, p, 1);
1733	if (!nd->nd_repstat) {
1734	    if (off && verf != at.na_filerev) {
1735		/*
1736		 * va_filerev is not sufficient as a cookie verifier,
1737		 * since it is not supposed to change when entries are
1738		 * removed/added unless that offset cookies returned to
1739		 * the client are no longer valid.
1740		 */
1741#if 0
1742		if (nd->nd_flag & ND_NFSV4) {
1743			nd->nd_repstat = NFSERR_NOTSAME;
1744		} else {
1745			nd->nd_repstat = NFSERR_BAD_COOKIE;
1746		}
1747#endif
1748	    } else if ((nd->nd_flag & ND_NFSV4) && off == 0 && verf != 0) {
1749		nd->nd_repstat = NFSERR_BAD_COOKIE;
1750	    }
1751	}
1752	if (!nd->nd_repstat && vp->v_type != VDIR)
1753		nd->nd_repstat = NFSERR_NOTDIR;
1754	if (!nd->nd_repstat && cnt == 0)
1755		nd->nd_repstat = NFSERR_TOOSMALL;
1756	if (!nd->nd_repstat)
1757		nd->nd_repstat = nfsvno_accchk(vp, VEXEC,
1758		    nd->nd_cred, exp, p, NFSACCCHK_NOOVERRIDE,
1759		    NFSACCCHK_VPISLOCKED, NULL);
1760	if (nd->nd_repstat) {
1761		vput(vp);
1762		if (nd->nd_flag & ND_NFSV3)
1763			nfsrv_postopattr(nd, getret, &at);
1764		return (0);
1765	}
1766	not_zfs = strcmp(vp->v_mount->mnt_vfc->vfc_name, "zfs");
1767
1768	MALLOC(rbuf, caddr_t, siz, M_TEMP, M_WAITOK);
1769again:
1770	eofflag = 0;
1771	if (cookies) {
1772		free((caddr_t)cookies, M_TEMP);
1773		cookies = NULL;
1774	}
1775
1776	iv.iov_base = rbuf;
1777	iv.iov_len = siz;
1778	io.uio_iov = &iv;
1779	io.uio_iovcnt = 1;
1780	io.uio_offset = (off_t)off;
1781	io.uio_resid = siz;
1782	io.uio_segflg = UIO_SYSSPACE;
1783	io.uio_rw = UIO_READ;
1784	io.uio_td = NULL;
1785	nd->nd_repstat = VOP_READDIR(vp, &io, nd->nd_cred, &eofflag, &ncookies,
1786	    &cookies);
1787	off = (u_int64_t)io.uio_offset;
1788	if (io.uio_resid)
1789		siz -= io.uio_resid;
1790
1791	getret = nfsvno_getattr(vp, &at, nd->nd_cred, p, 1);
1792
1793	if (!cookies && !nd->nd_repstat)
1794		nd->nd_repstat = NFSERR_PERM;
1795	if (!nd->nd_repstat)
1796		nd->nd_repstat = getret;
1797	if (nd->nd_repstat) {
1798		vput(vp);
1799		if (cookies)
1800			free((caddr_t)cookies, M_TEMP);
1801		free((caddr_t)rbuf, M_TEMP);
1802		if (nd->nd_flag & ND_NFSV3)
1803			nfsrv_postopattr(nd, getret, &at);
1804		return (0);
1805	}
1806	/*
1807	 * If nothing read, return eof
1808	 * rpc reply
1809	 */
1810	if (siz == 0) {
1811		vput(vp);
1812		if (nd->nd_flag & ND_NFSV3)
1813			nfsrv_postopattr(nd, getret, &at);
1814		NFSM_BUILD(tl, u_int32_t *, 4 * NFSX_UNSIGNED);
1815		txdr_hyper(at.na_filerev, tl);
1816		tl += 2;
1817		*tl++ = newnfs_false;
1818		*tl = newnfs_true;
1819		free((caddr_t)cookies, M_TEMP);
1820		free((caddr_t)rbuf, M_TEMP);
1821		return (0);
1822	}
1823
1824	/*
1825	 * Check for degenerate cases of nothing useful read.
1826	 * If so go try again
1827	 */
1828	cpos = rbuf;
1829	cend = rbuf + siz;
1830	dp = (struct dirent *)cpos;
1831	cookiep = cookies;
1832
1833	/*
1834	 * For some reason FreeBSD's ufs_readdir() chooses to back the
1835	 * directory offset up to a block boundary, so it is necessary to
1836	 * skip over the records that precede the requested offset. This
1837	 * requires the assumption that file offset cookies monotonically
1838	 * increase.
1839	 * Since the offset cookies don't monotonically increase for ZFS,
1840	 * this is not done when ZFS is the file system.
1841	 */
1842	while (cpos < cend && ncookies > 0 &&
1843	  (dp->d_fileno == 0 || dp->d_type == DT_WHT ||
1844	   (not_zfs != 0 && ((u_quad_t)(*cookiep)) <= toff) ||
1845	   ((nd->nd_flag & ND_NFSV4) &&
1846	    ((dp->d_namlen == 1 && dp->d_name[0] == '.') ||
1847	     (dp->d_namlen==2 && dp->d_name[0]=='.' && dp->d_name[1]=='.'))))) {
1848		cpos += dp->d_reclen;
1849		dp = (struct dirent *)cpos;
1850		cookiep++;
1851		ncookies--;
1852	}
1853	if (cpos >= cend || ncookies == 0) {
1854		siz = fullsiz;
1855		toff = off;
1856		goto again;
1857	}
1858
1859	/*
1860	 * Busy the file system so that the mount point won't go away
1861	 * and, as such, VFS_VGET() can be used safely.
1862	 */
1863	mp = vp->v_mount;
1864	vfs_ref(mp);
1865	VOP_UNLOCK(vp, 0);
1866	nd->nd_repstat = vfs_busy(mp, 0);
1867	vfs_rel(mp);
1868	if (nd->nd_repstat != 0) {
1869		vrele(vp);
1870		free(cookies, M_TEMP);
1871		free(rbuf, M_TEMP);
1872		if (nd->nd_flag & ND_NFSV3)
1873			nfsrv_postopattr(nd, getret, &at);
1874		return (0);
1875	}
1876
1877	/*
1878	 * Save this position, in case there is an error before one entry
1879	 * is created.
1880	 */
1881	mb0 = nd->nd_mb;
1882	bpos0 = nd->nd_bpos;
1883
1884	/*
1885	 * Fill in the first part of the reply.
1886	 * dirlen is the reply length in bytes and cannot exceed cnt.
1887	 * (Include the two booleans at the end of the reply in dirlen now,
1888	 *  so we recognize when we have exceeded cnt.)
1889	 */
1890	if (nd->nd_flag & ND_NFSV3) {
1891		dirlen = NFSX_V3POSTOPATTR + NFSX_VERF + 2 * NFSX_UNSIGNED;
1892		nfsrv_postopattr(nd, getret, &at);
1893	} else {
1894		dirlen = NFSX_VERF + 2 * NFSX_UNSIGNED;
1895	}
1896	NFSM_BUILD(tl, u_int32_t *, NFSX_VERF);
1897	txdr_hyper(at.na_filerev, tl);
1898
1899	/*
1900	 * Save this position, in case there is an empty reply needed.
1901	 */
1902	mb1 = nd->nd_mb;
1903	bpos1 = nd->nd_bpos;
1904
1905	/* Loop through the records and build reply */
1906	entrycnt = 0;
1907	while (cpos < cend && ncookies > 0 && dirlen < cnt) {
1908		nlen = dp->d_namlen;
1909		if (dp->d_fileno != 0 && dp->d_type != DT_WHT &&
1910		    nlen <= NFS_MAXNAMLEN &&
1911		    ((nd->nd_flag & ND_NFSV3) || nlen > 2 ||
1912		     (nlen==2 && (dp->d_name[0]!='.' || dp->d_name[1]!='.'))
1913		      || (nlen == 1 && dp->d_name[0] != '.'))) {
1914			/*
1915			 * Save the current position in the reply, in case
1916			 * this entry exceeds cnt.
1917			 */
1918			mb1 = nd->nd_mb;
1919			bpos1 = nd->nd_bpos;
1920
1921			/*
1922			 * For readdir_and_lookup get the vnode using
1923			 * the file number.
1924			 */
1925			nvp = NULL;
1926			refp = NULL;
1927			r = 0;
1928			if ((nd->nd_flag & ND_NFSV3) ||
1929			    NFSNONZERO_ATTRBIT(&savbits)) {
1930				if (nd->nd_flag & ND_NFSV4)
1931					refp = nfsv4root_getreferral(NULL,
1932					    vp, dp->d_fileno);
1933				if (refp == NULL) {
1934					if (usevget)
1935						r = VFS_VGET(mp, dp->d_fileno,
1936						    LK_SHARED, &nvp);
1937					else
1938						r = EOPNOTSUPP;
1939					if (r == EOPNOTSUPP) {
1940						if (usevget) {
1941							usevget = 0;
1942							cn.cn_nameiop = LOOKUP;
1943							cn.cn_lkflags =
1944							    LK_SHARED |
1945							    LK_RETRY;
1946							cn.cn_cred =
1947							    nd->nd_cred;
1948							cn.cn_thread = p;
1949						}
1950						cn.cn_nameptr = dp->d_name;
1951						cn.cn_namelen = nlen;
1952						cn.cn_flags = ISLASTCN |
1953						    NOFOLLOW | LOCKLEAF |
1954						    MPSAFE;
1955						if (nlen == 2 &&
1956						    dp->d_name[0] == '.' &&
1957						    dp->d_name[1] == '.')
1958							cn.cn_flags |=
1959							    ISDOTDOT;
1960						if (vn_lock(vp, LK_SHARED)
1961						    != 0) {
1962							nd->nd_repstat = EPERM;
1963							break;
1964						}
1965						if ((vp->v_vflag & VV_ROOT) != 0
1966						    && (cn.cn_flags & ISDOTDOT)
1967						    != 0) {
1968							vref(vp);
1969							nvp = vp;
1970							r = 0;
1971						} else
1972							r = VOP_LOOKUP(vp, &nvp,
1973							    &cn);
1974					}
1975				}
1976				if (!r) {
1977				    if (refp == NULL &&
1978					((nd->nd_flag & ND_NFSV3) ||
1979					 NFSNONZERO_ATTRBIT(&attrbits))) {
1980					r = nfsvno_getfh(nvp, &nfh, p);
1981					if (!r)
1982					    r = nfsvno_getattr(nvp, nvap,
1983						nd->nd_cred, p, 1);
1984				    }
1985				} else {
1986				    nvp = NULL;
1987				}
1988				if (r) {
1989					if (!NFSISSET_ATTRBIT(&attrbits,
1990					    NFSATTRBIT_RDATTRERROR)) {
1991						if (nvp != NULL)
1992							vput(nvp);
1993						nd->nd_repstat = r;
1994						break;
1995					}
1996				}
1997			}
1998
1999			/*
2000			 * Build the directory record xdr
2001			 */
2002			if (nd->nd_flag & ND_NFSV3) {
2003				NFSM_BUILD(tl, u_int32_t *, 3 * NFSX_UNSIGNED);
2004				*tl++ = newnfs_true;
2005				*tl++ = 0;
2006				*tl = txdr_unsigned(dp->d_fileno);
2007				dirlen += nfsm_strtom(nd, dp->d_name, nlen);
2008				NFSM_BUILD(tl, u_int32_t *, 2 * NFSX_UNSIGNED);
2009				*tl++ = 0;
2010				*tl = txdr_unsigned(*cookiep);
2011				nfsrv_postopattr(nd, 0, nvap);
2012				dirlen += nfsm_fhtom(nd,(u_int8_t *)&nfh,0,1);
2013				dirlen += (5*NFSX_UNSIGNED+NFSX_V3POSTOPATTR);
2014				if (nvp != NULL)
2015					vput(nvp);
2016			} else {
2017				NFSM_BUILD(tl, u_int32_t *, 3 * NFSX_UNSIGNED);
2018				*tl++ = newnfs_true;
2019				*tl++ = 0;
2020				*tl = txdr_unsigned(*cookiep);
2021				dirlen += nfsm_strtom(nd, dp->d_name, nlen);
2022				if (nvp != NULL)
2023					VOP_UNLOCK(nvp, 0);
2024				if (refp != NULL) {
2025					dirlen += nfsrv_putreferralattr(nd,
2026					    &savbits, refp, 0,
2027					    &nd->nd_repstat);
2028					if (nd->nd_repstat) {
2029						if (nvp != NULL)
2030							vrele(nvp);
2031						break;
2032					}
2033				} else if (r) {
2034					dirlen += nfsvno_fillattr(nd, nvp, nvap,
2035					    &nfh, r, &rderrbits, nd->nd_cred,
2036					    p, isdgram, 0);
2037				} else {
2038					dirlen += nfsvno_fillattr(nd, nvp, nvap,
2039					    &nfh, r, &attrbits, nd->nd_cred,
2040					    p, isdgram, 0);
2041				}
2042				if (nvp != NULL)
2043					vrele(nvp);
2044				dirlen += (3 * NFSX_UNSIGNED);
2045			}
2046			if (dirlen <= cnt)
2047				entrycnt++;
2048		}
2049		cpos += dp->d_reclen;
2050		dp = (struct dirent *)cpos;
2051		cookiep++;
2052		ncookies--;
2053	}
2054	vrele(vp);
2055	vfs_unbusy(mp);
2056
2057	/*
2058	 * If dirlen > cnt, we must strip off the last entry. If that
2059	 * results in an empty reply, report NFSERR_TOOSMALL.
2060	 */
2061	if (dirlen > cnt || nd->nd_repstat) {
2062		if (!nd->nd_repstat && entrycnt == 0)
2063			nd->nd_repstat = NFSERR_TOOSMALL;
2064		if (nd->nd_repstat)
2065			newnfs_trimtrailing(nd, mb0, bpos0);
2066		else
2067			newnfs_trimtrailing(nd, mb1, bpos1);
2068		eofflag = 0;
2069	} else if (cpos < cend)
2070		eofflag = 0;
2071	if (!nd->nd_repstat) {
2072		NFSM_BUILD(tl, u_int32_t *, 2 * NFSX_UNSIGNED);
2073		*tl++ = newnfs_false;
2074		if (eofflag)
2075			*tl = newnfs_true;
2076		else
2077			*tl = newnfs_false;
2078	}
2079	FREE((caddr_t)cookies, M_TEMP);
2080	FREE((caddr_t)rbuf, M_TEMP);
2081	return (0);
2082nfsmout:
2083	vput(vp);
2084	return (error);
2085}
2086
2087/*
2088 * Get the settable attributes out of the mbuf list.
2089 * (Return 0 or EBADRPC)
2090 */
2091int
2092nfsrv_sattr(struct nfsrv_descript *nd, struct nfsvattr *nvap,
2093    nfsattrbit_t *attrbitp, NFSACL_T *aclp, struct thread *p)
2094{
2095	u_int32_t *tl;
2096	struct nfsv2_sattr *sp;
2097	struct timeval curtime;
2098	int error = 0, toclient = 0;
2099
2100	switch (nd->nd_flag & (ND_NFSV2 | ND_NFSV3 | ND_NFSV4)) {
2101	case ND_NFSV2:
2102		NFSM_DISSECT(sp, struct nfsv2_sattr *, NFSX_V2SATTR);
2103		/*
2104		 * Some old clients didn't fill in the high order 16bits.
2105		 * --> check the low order 2 bytes for 0xffff
2106		 */
2107		if ((fxdr_unsigned(int, sp->sa_mode) & 0xffff) != 0xffff)
2108			nvap->na_mode = nfstov_mode(sp->sa_mode);
2109		if (sp->sa_uid != newnfs_xdrneg1)
2110			nvap->na_uid = fxdr_unsigned(uid_t, sp->sa_uid);
2111		if (sp->sa_gid != newnfs_xdrneg1)
2112			nvap->na_gid = fxdr_unsigned(gid_t, sp->sa_gid);
2113		if (sp->sa_size != newnfs_xdrneg1)
2114			nvap->na_size = fxdr_unsigned(u_quad_t, sp->sa_size);
2115		if (sp->sa_atime.nfsv2_sec != newnfs_xdrneg1) {
2116#ifdef notyet
2117			fxdr_nfsv2time(&sp->sa_atime, &nvap->na_atime);
2118#else
2119			nvap->na_atime.tv_sec =
2120				fxdr_unsigned(u_int32_t,sp->sa_atime.nfsv2_sec);
2121			nvap->na_atime.tv_nsec = 0;
2122#endif
2123		}
2124		if (sp->sa_mtime.nfsv2_sec != newnfs_xdrneg1)
2125			fxdr_nfsv2time(&sp->sa_mtime, &nvap->na_mtime);
2126		break;
2127	case ND_NFSV3:
2128		NFSM_DISSECT(tl, u_int32_t *, NFSX_UNSIGNED);
2129		if (*tl == newnfs_true) {
2130			NFSM_DISSECT(tl, u_int32_t *, NFSX_UNSIGNED);
2131			nvap->na_mode = nfstov_mode(*tl);
2132		}
2133		NFSM_DISSECT(tl, u_int32_t *, NFSX_UNSIGNED);
2134		if (*tl == newnfs_true) {
2135			NFSM_DISSECT(tl, u_int32_t *, NFSX_UNSIGNED);
2136			nvap->na_uid = fxdr_unsigned(uid_t, *tl);
2137		}
2138		NFSM_DISSECT(tl, u_int32_t *, NFSX_UNSIGNED);
2139		if (*tl == newnfs_true) {
2140			NFSM_DISSECT(tl, u_int32_t *, NFSX_UNSIGNED);
2141			nvap->na_gid = fxdr_unsigned(gid_t, *tl);
2142		}
2143		NFSM_DISSECT(tl, u_int32_t *, NFSX_UNSIGNED);
2144		if (*tl == newnfs_true) {
2145			NFSM_DISSECT(tl, u_int32_t *, 2 * NFSX_UNSIGNED);
2146			nvap->na_size = fxdr_hyper(tl);
2147		}
2148		NFSM_DISSECT(tl, u_int32_t *, NFSX_UNSIGNED);
2149		switch (fxdr_unsigned(int, *tl)) {
2150		case NFSV3SATTRTIME_TOCLIENT:
2151			NFSM_DISSECT(tl, u_int32_t *, 2 * NFSX_UNSIGNED);
2152			fxdr_nfsv3time(tl, &nvap->na_atime);
2153			toclient = 1;
2154			break;
2155		case NFSV3SATTRTIME_TOSERVER:
2156			NFSGETTIME(&curtime);
2157			nvap->na_atime.tv_sec = curtime.tv_sec;
2158			nvap->na_atime.tv_nsec = curtime.tv_usec * 1000;
2159			nvap->na_vaflags |= VA_UTIMES_NULL;
2160			break;
2161		};
2162		NFSM_DISSECT(tl, u_int32_t *, NFSX_UNSIGNED);
2163		switch (fxdr_unsigned(int, *tl)) {
2164		case NFSV3SATTRTIME_TOCLIENT:
2165			NFSM_DISSECT(tl, u_int32_t *, 2 * NFSX_UNSIGNED);
2166			fxdr_nfsv3time(tl, &nvap->na_mtime);
2167			nvap->na_vaflags &= ~VA_UTIMES_NULL;
2168			break;
2169		case NFSV3SATTRTIME_TOSERVER:
2170			NFSGETTIME(&curtime);
2171			nvap->na_mtime.tv_sec = curtime.tv_sec;
2172			nvap->na_mtime.tv_nsec = curtime.tv_usec * 1000;
2173			if (!toclient)
2174				nvap->na_vaflags |= VA_UTIMES_NULL;
2175			break;
2176		};
2177		break;
2178	case ND_NFSV4:
2179		error = nfsv4_sattr(nd, nvap, attrbitp, aclp, p);
2180	};
2181nfsmout:
2182	return (error);
2183}
2184
2185/*
2186 * Handle the setable attributes for V4.
2187 * Returns NFSERR_BADXDR if it can't be parsed, 0 otherwise.
2188 */
2189int
2190nfsv4_sattr(struct nfsrv_descript *nd, struct nfsvattr *nvap,
2191    nfsattrbit_t *attrbitp, NFSACL_T *aclp, struct thread *p)
2192{
2193	u_int32_t *tl;
2194	int attrsum = 0;
2195	int i, j;
2196	int error, attrsize, bitpos, aclsize, aceerr, retnotsup = 0;
2197	int toclient = 0;
2198	u_char *cp, namestr[NFSV4_SMALLSTR + 1];
2199	uid_t uid;
2200	gid_t gid;
2201	struct timeval curtime;
2202
2203	error = nfsrv_getattrbits(nd, attrbitp, NULL, &retnotsup);
2204	if (error)
2205		return (error);
2206	NFSM_DISSECT(tl, u_int32_t *, NFSX_UNSIGNED);
2207	attrsize = fxdr_unsigned(int, *tl);
2208
2209	/*
2210	 * Loop around getting the setable attributes. If an unsupported
2211	 * one is found, set nd_repstat == NFSERR_ATTRNOTSUPP and return.
2212	 */
2213	if (retnotsup) {
2214		nd->nd_repstat = NFSERR_ATTRNOTSUPP;
2215		bitpos = NFSATTRBIT_MAX;
2216	} else {
2217		bitpos = 0;
2218	}
2219	for (; bitpos < NFSATTRBIT_MAX; bitpos++) {
2220	    if (attrsum > attrsize) {
2221		error = NFSERR_BADXDR;
2222		goto nfsmout;
2223	    }
2224	    if (NFSISSET_ATTRBIT(attrbitp, bitpos))
2225		switch (bitpos) {
2226		case NFSATTRBIT_SIZE:
2227			NFSM_DISSECT(tl, u_int32_t *, NFSX_HYPER);
2228			nvap->na_size = fxdr_hyper(tl);
2229			attrsum += NFSX_HYPER;
2230			break;
2231		case NFSATTRBIT_ACL:
2232			error = nfsrv_dissectacl(nd, aclp, &aceerr, &aclsize,
2233			    p);
2234			if (error)
2235				goto nfsmout;
2236			if (aceerr && !nd->nd_repstat)
2237				nd->nd_repstat = NFSERR_ATTRNOTSUPP;
2238			attrsum += aclsize;
2239			break;
2240		case NFSATTRBIT_ARCHIVE:
2241			NFSM_DISSECT(tl, u_int32_t *, NFSX_UNSIGNED);
2242			if (!nd->nd_repstat)
2243				nd->nd_repstat = NFSERR_ATTRNOTSUPP;
2244			attrsum += NFSX_UNSIGNED;
2245			break;
2246		case NFSATTRBIT_HIDDEN:
2247			NFSM_DISSECT(tl, u_int32_t *, NFSX_UNSIGNED);
2248			if (!nd->nd_repstat)
2249				nd->nd_repstat = NFSERR_ATTRNOTSUPP;
2250			attrsum += NFSX_UNSIGNED;
2251			break;
2252		case NFSATTRBIT_MIMETYPE:
2253			NFSM_DISSECT(tl, u_int32_t *, NFSX_UNSIGNED);
2254			i = fxdr_unsigned(int, *tl);
2255			error = nfsm_advance(nd, NFSM_RNDUP(i), -1);
2256			if (error)
2257				goto nfsmout;
2258			if (!nd->nd_repstat)
2259				nd->nd_repstat = NFSERR_ATTRNOTSUPP;
2260			attrsum += (NFSX_UNSIGNED + NFSM_RNDUP(i));
2261			break;
2262		case NFSATTRBIT_MODE:
2263			NFSM_DISSECT(tl, u_int32_t *, NFSX_UNSIGNED);
2264			nvap->na_mode = nfstov_mode(*tl);
2265			attrsum += NFSX_UNSIGNED;
2266			break;
2267		case NFSATTRBIT_OWNER:
2268			NFSM_DISSECT(tl, u_int32_t *, NFSX_UNSIGNED);
2269			j = fxdr_unsigned(int, *tl);
2270			if (j < 0)
2271				return (NFSERR_BADXDR);
2272			if (j > NFSV4_SMALLSTR)
2273				cp = malloc(j + 1, M_NFSSTRING, M_WAITOK);
2274			else
2275				cp = namestr;
2276			error = nfsrv_mtostr(nd, cp, j);
2277			if (error) {
2278				if (j > NFSV4_SMALLSTR)
2279					free(cp, M_NFSSTRING);
2280				return (error);
2281			}
2282			if (!nd->nd_repstat) {
2283				nd->nd_repstat = nfsv4_strtouid(cp,j,&uid,p);
2284				if (!nd->nd_repstat)
2285					nvap->na_uid = uid;
2286			}
2287			if (j > NFSV4_SMALLSTR)
2288				free(cp, M_NFSSTRING);
2289			attrsum += (NFSX_UNSIGNED + NFSM_RNDUP(j));
2290			break;
2291		case NFSATTRBIT_OWNERGROUP:
2292			NFSM_DISSECT(tl, u_int32_t *, NFSX_UNSIGNED);
2293			j = fxdr_unsigned(int, *tl);
2294			if (j < 0)
2295				return (NFSERR_BADXDR);
2296			if (j > NFSV4_SMALLSTR)
2297				cp = malloc(j + 1, M_NFSSTRING, M_WAITOK);
2298			else
2299				cp = namestr;
2300			error = nfsrv_mtostr(nd, cp, j);
2301			if (error) {
2302				if (j > NFSV4_SMALLSTR)
2303					free(cp, M_NFSSTRING);
2304				return (error);
2305			}
2306			if (!nd->nd_repstat) {
2307				nd->nd_repstat = nfsv4_strtogid(cp,j,&gid,p);
2308				if (!nd->nd_repstat)
2309					nvap->na_gid = gid;
2310			}
2311			if (j > NFSV4_SMALLSTR)
2312				free(cp, M_NFSSTRING);
2313			attrsum += (NFSX_UNSIGNED + NFSM_RNDUP(j));
2314			break;
2315		case NFSATTRBIT_SYSTEM:
2316			NFSM_DISSECT(tl, u_int32_t *, NFSX_UNSIGNED);
2317			if (!nd->nd_repstat)
2318				nd->nd_repstat = NFSERR_ATTRNOTSUPP;
2319			attrsum += NFSX_UNSIGNED;
2320			break;
2321		case NFSATTRBIT_TIMEACCESSSET:
2322			NFSM_DISSECT(tl, u_int32_t *, NFSX_UNSIGNED);
2323			attrsum += NFSX_UNSIGNED;
2324			if (fxdr_unsigned(int, *tl)==NFSV4SATTRTIME_TOCLIENT) {
2325			    NFSM_DISSECT(tl, u_int32_t *, NFSX_V4TIME);
2326			    fxdr_nfsv4time(tl, &nvap->na_atime);
2327			    toclient = 1;
2328			    attrsum += NFSX_V4TIME;
2329			} else {
2330			    NFSGETTIME(&curtime);
2331			    nvap->na_atime.tv_sec = curtime.tv_sec;
2332			    nvap->na_atime.tv_nsec = curtime.tv_usec * 1000;
2333			    nvap->na_vaflags |= VA_UTIMES_NULL;
2334			}
2335			break;
2336		case NFSATTRBIT_TIMEBACKUP:
2337			NFSM_DISSECT(tl, u_int32_t *, NFSX_V4TIME);
2338			if (!nd->nd_repstat)
2339				nd->nd_repstat = NFSERR_ATTRNOTSUPP;
2340			attrsum += NFSX_V4TIME;
2341			break;
2342		case NFSATTRBIT_TIMECREATE:
2343			NFSM_DISSECT(tl, u_int32_t *, NFSX_V4TIME);
2344			if (!nd->nd_repstat)
2345				nd->nd_repstat = NFSERR_ATTRNOTSUPP;
2346			attrsum += NFSX_V4TIME;
2347			break;
2348		case NFSATTRBIT_TIMEMODIFYSET:
2349			NFSM_DISSECT(tl, u_int32_t *, NFSX_UNSIGNED);
2350			attrsum += NFSX_UNSIGNED;
2351			if (fxdr_unsigned(int, *tl)==NFSV4SATTRTIME_TOCLIENT) {
2352			    NFSM_DISSECT(tl, u_int32_t *, NFSX_V4TIME);
2353			    fxdr_nfsv4time(tl, &nvap->na_mtime);
2354			    nvap->na_vaflags &= ~VA_UTIMES_NULL;
2355			    attrsum += NFSX_V4TIME;
2356			} else {
2357			    NFSGETTIME(&curtime);
2358			    nvap->na_mtime.tv_sec = curtime.tv_sec;
2359			    nvap->na_mtime.tv_nsec = curtime.tv_usec * 1000;
2360			    if (!toclient)
2361				nvap->na_vaflags |= VA_UTIMES_NULL;
2362			}
2363			break;
2364		default:
2365			nd->nd_repstat = NFSERR_ATTRNOTSUPP;
2366			/*
2367			 * set bitpos so we drop out of the loop.
2368			 */
2369			bitpos = NFSATTRBIT_MAX;
2370			break;
2371		};
2372	}
2373
2374	/*
2375	 * some clients pad the attrlist, so we need to skip over the
2376	 * padding.
2377	 */
2378	if (attrsum > attrsize) {
2379		error = NFSERR_BADXDR;
2380	} else {
2381		attrsize = NFSM_RNDUP(attrsize);
2382		if (attrsum < attrsize)
2383			error = nfsm_advance(nd, attrsize - attrsum, -1);
2384	}
2385nfsmout:
2386	return (error);
2387}
2388
2389/*
2390 * Check/setup export credentials.
2391 */
2392int
2393nfsd_excred(struct nfsrv_descript *nd, struct nfsexstuff *exp,
2394    struct ucred *credanon)
2395{
2396	int error = 0;
2397
2398	/*
2399	 * Check/setup credentials.
2400	 */
2401	if (nd->nd_flag & ND_GSS)
2402		exp->nes_exflag &= ~MNT_EXPORTANON;
2403
2404	/*
2405	 * Check to see if the operation is allowed for this security flavor.
2406	 * RFC2623 suggests that the NFSv3 Fsinfo RPC be allowed to
2407	 * AUTH_NONE or AUTH_SYS for file systems requiring RPCSEC_GSS.
2408	 * Also, allow Secinfo, so that it can acquire the correct flavor(s).
2409	 */
2410	if (nfsvno_testexp(nd, exp) &&
2411	    nd->nd_procnum != NFSV4OP_SECINFO &&
2412	    nd->nd_procnum != NFSPROC_FSINFO) {
2413		if (nd->nd_flag & ND_NFSV4)
2414			error = NFSERR_WRONGSEC;
2415		else
2416			error = (NFSERR_AUTHERR | AUTH_TOOWEAK);
2417		return (error);
2418	}
2419
2420	/*
2421	 * Check to see if the file system is exported V4 only.
2422	 */
2423	if (NFSVNO_EXV4ONLY(exp) && !(nd->nd_flag & ND_NFSV4))
2424		return (NFSERR_PROGNOTV4);
2425
2426	/*
2427	 * Now, map the user credentials.
2428	 * (Note that ND_AUTHNONE will only be set for an NFSv3
2429	 *  Fsinfo RPC. If set for anything else, this code might need
2430	 *  to change.)
2431	 */
2432	if (NFSVNO_EXPORTED(exp) &&
2433	    ((!(nd->nd_flag & ND_GSS) && nd->nd_cred->cr_uid == 0) ||
2434	     NFSVNO_EXPORTANON(exp) ||
2435	     (nd->nd_flag & ND_AUTHNONE))) {
2436		nd->nd_cred->cr_uid = credanon->cr_uid;
2437		nd->nd_cred->cr_gid = credanon->cr_gid;
2438		crsetgroups(nd->nd_cred, credanon->cr_ngroups,
2439		    credanon->cr_groups);
2440	}
2441	return (0);
2442}
2443
2444/*
2445 * Check exports.
2446 */
2447int
2448nfsvno_checkexp(struct mount *mp, struct sockaddr *nam, struct nfsexstuff *exp,
2449    struct ucred **credp)
2450{
2451	int i, error, *secflavors;
2452
2453	error = VFS_CHECKEXP(mp, nam, &exp->nes_exflag, credp,
2454	    &exp->nes_numsecflavor, &secflavors);
2455	if (error) {
2456		if (nfs_rootfhset) {
2457			exp->nes_exflag = 0;
2458			exp->nes_numsecflavor = 0;
2459			error = 0;
2460		}
2461	} else {
2462		/* Copy the security flavors. */
2463		for (i = 0; i < exp->nes_numsecflavor; i++)
2464			exp->nes_secflavors[i] = secflavors[i];
2465	}
2466	return (error);
2467}
2468
2469/*
2470 * Get a vnode for a file handle and export stuff.
2471 */
2472int
2473nfsvno_fhtovp(struct mount *mp, fhandle_t *fhp, struct sockaddr *nam,
2474    int lktype, struct vnode **vpp, struct nfsexstuff *exp,
2475    struct ucred **credp)
2476{
2477	int i, error, *secflavors;
2478
2479	*credp = NULL;
2480	exp->nes_numsecflavor = 0;
2481	if (VFS_NEEDSGIANT(mp))
2482		error = ESTALE;
2483	else
2484		error = VFS_FHTOVP(mp, &fhp->fh_fid, vpp);
2485	if (error != 0)
2486		/* Make sure the server replies ESTALE to the client. */
2487		error = ESTALE;
2488	if (nam && !error) {
2489		error = VFS_CHECKEXP(mp, nam, &exp->nes_exflag, credp,
2490		    &exp->nes_numsecflavor, &secflavors);
2491		if (error) {
2492			if (nfs_rootfhset) {
2493				exp->nes_exflag = 0;
2494				exp->nes_numsecflavor = 0;
2495				error = 0;
2496			} else {
2497				vput(*vpp);
2498			}
2499		} else {
2500			/* Copy the security flavors. */
2501			for (i = 0; i < exp->nes_numsecflavor; i++)
2502				exp->nes_secflavors[i] = secflavors[i];
2503		}
2504	}
2505	if (error == 0 && lktype == LK_SHARED)
2506		/*
2507		 * It would be much better to pass lktype to VFS_FHTOVP(),
2508		 * but this will have to do until VFS_FHTOVP() has a lock
2509		 * type argument like VFS_VGET().
2510		 */
2511		vn_lock(*vpp, LK_DOWNGRADE | LK_RETRY);
2512	return (error);
2513}
2514
2515/*
2516 * Do the pathconf vnode op.
2517 */
2518int
2519nfsvno_pathconf(struct vnode *vp, int flag, register_t *retf,
2520    struct ucred *cred, struct thread *p)
2521{
2522	int error;
2523
2524	error = VOP_PATHCONF(vp, flag, retf);
2525	return (error);
2526}
2527
2528/*
2529 * nfsd_fhtovp() - convert a fh to a vnode ptr
2530 * 	- look up fsid in mount list (if not found ret error)
2531 *	- get vp and export rights by calling nfsvno_fhtovp()
2532 *	- if cred->cr_uid == 0 or MNT_EXPORTANON set it to credanon
2533 *	  for AUTH_SYS
2534 *	- if mpp != NULL, return the mount point so that it can
2535 *	  be used for vn_finished_write() by the caller
2536 */
2537void
2538nfsd_fhtovp(struct nfsrv_descript *nd, struct nfsrvfh *nfp, int lktype,
2539    struct vnode **vpp, struct nfsexstuff *exp,
2540    struct mount **mpp, int startwrite, struct thread *p)
2541{
2542	struct mount *mp;
2543	struct ucred *credanon;
2544	fhandle_t *fhp;
2545
2546	fhp = (fhandle_t *)nfp->nfsrvfh_data;
2547	/*
2548	 * Check for the special case of the nfsv4root_fh.
2549	 */
2550	mp = vfs_busyfs(&fhp->fh_fsid);
2551	if (mpp != NULL)
2552		*mpp = mp;
2553	if (mp == NULL) {
2554		*vpp = NULL;
2555		nd->nd_repstat = ESTALE;
2556		return;
2557	}
2558
2559	if (startwrite)
2560		vn_start_write(NULL, mpp, V_WAIT);
2561
2562	nd->nd_repstat = nfsvno_fhtovp(mp, fhp, nd->nd_nam, lktype, vpp, exp,
2563	    &credanon);
2564	vfs_unbusy(mp);
2565
2566	/*
2567	 * For NFSv4 without a pseudo root fs, unexported file handles
2568	 * can be returned, so that Lookup works everywhere.
2569	 */
2570	if (!nd->nd_repstat && exp->nes_exflag == 0 &&
2571	    !(nd->nd_flag & ND_NFSV4)) {
2572		vput(*vpp);
2573		nd->nd_repstat = EACCES;
2574	}
2575
2576	/*
2577	 * Personally, I've never seen any point in requiring a
2578	 * reserved port#, since only in the rare case where the
2579	 * clients are all boxes with secure system priviledges,
2580	 * does it provide any enhanced security, but... some people
2581	 * believe it to be useful and keep putting this code back in.
2582	 * (There is also some "security checker" out there that
2583	 *  complains if the nfs server doesn't enforce this.)
2584	 * However, note the following:
2585	 * RFC3530 (NFSv4) specifies that a reserved port# not be
2586	 *	required.
2587	 * RFC2623 recommends that, if a reserved port# is checked for,
2588	 *	that there be a way to turn that off--> ifdef'd.
2589	 */
2590#ifdef NFS_REQRSVPORT
2591	if (!nd->nd_repstat) {
2592		struct sockaddr_in *saddr;
2593		struct sockaddr_in6 *saddr6;
2594
2595		saddr = NFSSOCKADDR(nd->nd_nam, struct sockaddr_in *);
2596		saddr6 = NFSSOCKADDR(nd->nd_nam, struct sockaddr_in6 *);
2597		if (!(nd->nd_flag & ND_NFSV4) &&
2598		    ((saddr->sin_family == AF_INET &&
2599		      ntohs(saddr->sin_port) >= IPPORT_RESERVED) ||
2600		     (saddr6->sin6_family == AF_INET6 &&
2601		      ntohs(saddr6->sin6_port) >= IPPORT_RESERVED))) {
2602			vput(*vpp);
2603			nd->nd_repstat = (NFSERR_AUTHERR | AUTH_TOOWEAK);
2604		}
2605	}
2606#endif	/* NFS_REQRSVPORT */
2607
2608	/*
2609	 * Check/setup credentials.
2610	 */
2611	if (!nd->nd_repstat) {
2612		nd->nd_saveduid = nd->nd_cred->cr_uid;
2613		nd->nd_repstat = nfsd_excred(nd, exp, credanon);
2614		if (nd->nd_repstat)
2615			vput(*vpp);
2616	}
2617	if (credanon != NULL)
2618		crfree(credanon);
2619	if (nd->nd_repstat) {
2620		if (startwrite)
2621			vn_finished_write(mp);
2622		*vpp = NULL;
2623		if (mpp != NULL)
2624			*mpp = NULL;
2625	}
2626}
2627
2628/*
2629 * glue for fp.
2630 */
2631int
2632fp_getfvp(struct thread *p, int fd, struct file **fpp, struct vnode **vpp)
2633{
2634	struct filedesc *fdp;
2635	struct file *fp;
2636
2637	fdp = p->td_proc->p_fd;
2638	if (fd >= fdp->fd_nfiles ||
2639	    (fp = fdp->fd_ofiles[fd]) == NULL)
2640		return (EBADF);
2641	*fpp = fp;
2642	return (0);
2643}
2644
2645/*
2646 * Called from nfssvc() to update the exports list. Just call
2647 * vfs_export(). This has to be done, since the v4 root fake fs isn't
2648 * in the mount list.
2649 */
2650int
2651nfsrv_v4rootexport(void *argp, struct ucred *cred, struct thread *p)
2652{
2653	struct nfsex_args *nfsexargp = (struct nfsex_args *)argp;
2654	int error;
2655	struct nameidata nd;
2656	fhandle_t fh;
2657
2658	error = vfs_export(&nfsv4root_mnt, &nfsexargp->export);
2659	if ((nfsexargp->export.ex_flags & MNT_DELEXPORT) != 0)
2660		nfs_rootfhset = 0;
2661	else if (error == 0) {
2662		if (nfsexargp->fspec == NULL)
2663			return (EPERM);
2664		/*
2665		 * If fspec != NULL, this is the v4root path.
2666		 */
2667		NDINIT(&nd, LOOKUP, FOLLOW | MPSAFE, UIO_USERSPACE,
2668		    nfsexargp->fspec, p);
2669		if ((error = namei(&nd)) != 0)
2670			return (error);
2671		error = nfsvno_getfh(nd.ni_vp, &fh, p);
2672		vrele(nd.ni_vp);
2673		if (!error) {
2674			nfs_rootfh.nfsrvfh_len = NFSX_MYFH;
2675			NFSBCOPY((caddr_t)&fh,
2676			    nfs_rootfh.nfsrvfh_data,
2677			    sizeof (fhandle_t));
2678			nfs_rootfhset = 1;
2679		}
2680	}
2681	return (error);
2682}
2683
2684/*
2685 * Get the tcp socket sequence numbers we need.
2686 * (Maybe this should be moved to the tcp sources?)
2687 */
2688int
2689nfsrv_getsocksndseq(struct socket *so, tcp_seq *maxp, tcp_seq *unap)
2690{
2691	struct inpcb *inp;
2692	struct tcpcb *tp;
2693
2694	inp = sotoinpcb(so);
2695	KASSERT(inp != NULL, ("nfsrv_getsocksndseq: inp == NULL"));
2696	INP_RLOCK(inp);
2697	if (inp->inp_flags & (INP_TIMEWAIT | INP_DROPPED)) {
2698		INP_RUNLOCK(inp);
2699		return (EPIPE);
2700	}
2701	tp = intotcpcb(inp);
2702	if (tp->t_state != TCPS_ESTABLISHED) {
2703		INP_RUNLOCK(inp);
2704		return (EPIPE);
2705	}
2706	*maxp = tp->snd_max;
2707	*unap = tp->snd_una;
2708	INP_RUNLOCK(inp);
2709	return (0);
2710}
2711
2712/*
2713 * This function needs to test to see if the system is near its limit
2714 * for memory allocation via malloc() or mget() and return True iff
2715 * either of these resources are near their limit.
2716 * XXX (For now, this is just a stub.)
2717 */
2718int nfsrv_testmalloclimit = 0;
2719int
2720nfsrv_mallocmget_limit(void)
2721{
2722	static int printmesg = 0;
2723	static int testval = 1;
2724
2725	if (nfsrv_testmalloclimit && (testval++ % 1000) == 0) {
2726		if ((printmesg++ % 100) == 0)
2727			printf("nfsd: malloc/mget near limit\n");
2728		return (1);
2729	}
2730	return (0);
2731}
2732
2733/*
2734 * BSD specific initialization of a mount point.
2735 */
2736void
2737nfsd_mntinit(void)
2738{
2739	static int inited = 0;
2740
2741	if (inited)
2742		return;
2743	inited = 1;
2744	nfsv4root_mnt.mnt_flag = (MNT_RDONLY | MNT_EXPORTED);
2745	TAILQ_INIT(&nfsv4root_mnt.mnt_nvnodelist);
2746	nfsv4root_mnt.mnt_export = NULL;
2747	TAILQ_INIT(&nfsv4root_opt);
2748	TAILQ_INIT(&nfsv4root_newopt);
2749	nfsv4root_mnt.mnt_opt = &nfsv4root_opt;
2750	nfsv4root_mnt.mnt_optnew = &nfsv4root_newopt;
2751	nfsv4root_mnt.mnt_nvnodelistsize = 0;
2752}
2753
2754/*
2755 * Get a vnode for a file handle, without checking exports, etc.
2756 */
2757struct vnode *
2758nfsvno_getvp(fhandle_t *fhp)
2759{
2760	struct mount *mp;
2761	struct vnode *vp;
2762	int error;
2763
2764	mp = vfs_busyfs(&fhp->fh_fsid);
2765	if (mp == NULL)
2766		return (NULL);
2767	error = VFS_FHTOVP(mp, &fhp->fh_fid, &vp);
2768	vfs_unbusy(mp);
2769	if (error)
2770		return (NULL);
2771	return (vp);
2772}
2773
2774/*
2775 * Do a local VOP_ADVLOCK().
2776 */
2777int
2778nfsvno_advlock(struct vnode *vp, int ftype, u_int64_t first,
2779    u_int64_t end, struct thread *td)
2780{
2781	int error;
2782	struct flock fl;
2783	u_int64_t tlen;
2784
2785	if (nfsrv_dolocallocks == 0)
2786		return (0);
2787
2788	/* Check for VI_DOOMED here, so that VOP_ADVLOCK() isn't performed. */
2789	if ((vp->v_iflag & VI_DOOMED) != 0)
2790		return (EPERM);
2791
2792	fl.l_whence = SEEK_SET;
2793	fl.l_type = ftype;
2794	fl.l_start = (off_t)first;
2795	if (end == NFS64BITSSET) {
2796		fl.l_len = 0;
2797	} else {
2798		tlen = end - first;
2799		fl.l_len = (off_t)tlen;
2800	}
2801	/*
2802	 * For FreeBSD8, the l_pid and l_sysid must be set to the same
2803	 * values for all calls, so that all locks will be held by the
2804	 * nfsd server. (The nfsd server handles conflicts between the
2805	 * various clients.)
2806	 * Since an NFSv4 lockowner is a ClientID plus an array of up to 1024
2807	 * bytes, so it can't be put in l_sysid.
2808	 */
2809	if (nfsv4_sysid == 0)
2810		nfsv4_sysid = nlm_acquire_next_sysid();
2811	fl.l_pid = (pid_t)0;
2812	fl.l_sysid = (int)nfsv4_sysid;
2813
2814	NFSVOPUNLOCK(vp, 0, td);
2815	if (ftype == F_UNLCK)
2816		error = VOP_ADVLOCK(vp, (caddr_t)td->td_proc, F_UNLCK, &fl,
2817		    (F_POSIX | F_REMOTE));
2818	else
2819		error = VOP_ADVLOCK(vp, (caddr_t)td->td_proc, F_SETLK, &fl,
2820		    (F_POSIX | F_REMOTE));
2821	NFSVOPLOCK(vp, LK_EXCLUSIVE | LK_RETRY, td);
2822	return (error);
2823}
2824
2825/*
2826 * Check the nfsv4 root exports.
2827 */
2828int
2829nfsvno_v4rootexport(struct nfsrv_descript *nd)
2830{
2831	struct ucred *credanon;
2832	int exflags, error, numsecflavor, *secflavors, i;
2833
2834	error = vfs_stdcheckexp(&nfsv4root_mnt, nd->nd_nam, &exflags,
2835	    &credanon, &numsecflavor, &secflavors);
2836	if (error)
2837		return (NFSERR_PROGUNAVAIL);
2838	if (credanon != NULL)
2839		crfree(credanon);
2840	for (i = 0; i < numsecflavor; i++) {
2841		if (secflavors[i] == AUTH_SYS)
2842			nd->nd_flag |= ND_EXAUTHSYS;
2843		else if (secflavors[i] == RPCSEC_GSS_KRB5)
2844			nd->nd_flag |= ND_EXGSS;
2845		else if (secflavors[i] == RPCSEC_GSS_KRB5I)
2846			nd->nd_flag |= ND_EXGSSINTEGRITY;
2847		else if (secflavors[i] == RPCSEC_GSS_KRB5P)
2848			nd->nd_flag |= ND_EXGSSPRIVACY;
2849	}
2850	return (0);
2851}
2852
2853/*
2854 * Nfs server psuedo system call for the nfsd's
2855 */
2856/*
2857 * MPSAFE
2858 */
2859static int
2860nfssvc_nfsd(struct thread *td, struct nfssvc_args *uap)
2861{
2862	struct file *fp;
2863	struct nfsd_addsock_args sockarg;
2864	struct nfsd_nfsd_args nfsdarg;
2865	int error;
2866
2867	if (uap->flag & NFSSVC_NFSDADDSOCK) {
2868		error = copyin(uap->argp, (caddr_t)&sockarg, sizeof (sockarg));
2869		if (error)
2870			return (error);
2871		if ((error = fget(td, sockarg.sock, &fp)) != 0) {
2872			return (error);
2873		}
2874		if (fp->f_type != DTYPE_SOCKET) {
2875			fdrop(fp, td);
2876			return (EPERM);
2877		}
2878		error = nfsrvd_addsock(fp);
2879		fdrop(fp, td);
2880	} else if (uap->flag & NFSSVC_NFSDNFSD) {
2881		if (uap->argp == NULL)
2882			return (EINVAL);
2883		error = copyin(uap->argp, (caddr_t)&nfsdarg,
2884		    sizeof (nfsdarg));
2885		if (error)
2886			return (error);
2887		error = nfsrvd_nfsd(td, &nfsdarg);
2888	} else {
2889		error = nfssvc_srvcall(td, uap, td->td_ucred);
2890	}
2891	return (error);
2892}
2893
2894static int
2895nfssvc_srvcall(struct thread *p, struct nfssvc_args *uap, struct ucred *cred)
2896{
2897	struct nfsex_args export;
2898	struct file *fp = NULL;
2899	int stablefd, len;
2900	struct nfsd_clid adminrevoke;
2901	struct nfsd_dumplist dumplist;
2902	struct nfsd_dumpclients *dumpclients;
2903	struct nfsd_dumplocklist dumplocklist;
2904	struct nfsd_dumplocks *dumplocks;
2905	struct nameidata nd;
2906	vnode_t vp;
2907	int error = EINVAL;
2908
2909	if (uap->flag & NFSSVC_PUBLICFH) {
2910		NFSBZERO((caddr_t)&nfs_pubfh.nfsrvfh_data,
2911		    sizeof (fhandle_t));
2912		error = copyin(uap->argp,
2913		    &nfs_pubfh.nfsrvfh_data, sizeof (fhandle_t));
2914		if (!error)
2915			nfs_pubfhset = 1;
2916	} else if (uap->flag & NFSSVC_V4ROOTEXPORT) {
2917		error = copyin(uap->argp,(caddr_t)&export,
2918		    sizeof (struct nfsex_args));
2919		if (!error)
2920			error = nfsrv_v4rootexport(&export, cred, p);
2921	} else if (uap->flag & NFSSVC_NOPUBLICFH) {
2922		nfs_pubfhset = 0;
2923		error = 0;
2924	} else if (uap->flag & NFSSVC_STABLERESTART) {
2925		error = copyin(uap->argp, (caddr_t)&stablefd,
2926		    sizeof (int));
2927		if (!error)
2928			error = fp_getfvp(p, stablefd, &fp, &vp);
2929		if (!error && (NFSFPFLAG(fp) & (FREAD | FWRITE)) != (FREAD | FWRITE))
2930			error = EBADF;
2931		if (!error && newnfs_numnfsd != 0)
2932			error = EPERM;
2933		if (!error) {
2934			nfsrv_stablefirst.nsf_fp = fp;
2935			nfsrv_setupstable(p);
2936		}
2937	} else if (uap->flag & NFSSVC_ADMINREVOKE) {
2938		error = copyin(uap->argp, (caddr_t)&adminrevoke,
2939		    sizeof (struct nfsd_clid));
2940		if (!error)
2941			error = nfsrv_adminrevoke(&adminrevoke, p);
2942	} else if (uap->flag & NFSSVC_DUMPCLIENTS) {
2943		error = copyin(uap->argp, (caddr_t)&dumplist,
2944		    sizeof (struct nfsd_dumplist));
2945		if (!error && (dumplist.ndl_size < 1 ||
2946			dumplist.ndl_size > NFSRV_MAXDUMPLIST))
2947			error = EPERM;
2948		if (!error) {
2949		    len = sizeof (struct nfsd_dumpclients) * dumplist.ndl_size;
2950		    dumpclients = (struct nfsd_dumpclients *)malloc(len,
2951			M_TEMP, M_WAITOK);
2952		    nfsrv_dumpclients(dumpclients, dumplist.ndl_size);
2953		    error = copyout(dumpclients,
2954			CAST_USER_ADDR_T(dumplist.ndl_list), len);
2955		    free((caddr_t)dumpclients, M_TEMP);
2956		}
2957	} else if (uap->flag & NFSSVC_DUMPLOCKS) {
2958		error = copyin(uap->argp, (caddr_t)&dumplocklist,
2959		    sizeof (struct nfsd_dumplocklist));
2960		if (!error && (dumplocklist.ndllck_size < 1 ||
2961			dumplocklist.ndllck_size > NFSRV_MAXDUMPLIST))
2962			error = EPERM;
2963		if (!error)
2964			error = nfsrv_lookupfilename(&nd,
2965				dumplocklist.ndllck_fname, p);
2966		if (!error) {
2967			len = sizeof (struct nfsd_dumplocks) *
2968				dumplocklist.ndllck_size;
2969			dumplocks = (struct nfsd_dumplocks *)malloc(len,
2970				M_TEMP, M_WAITOK);
2971			nfsrv_dumplocks(nd.ni_vp, dumplocks,
2972			    dumplocklist.ndllck_size, p);
2973			vput(nd.ni_vp);
2974			error = copyout(dumplocks,
2975			    CAST_USER_ADDR_T(dumplocklist.ndllck_list), len);
2976			free((caddr_t)dumplocks, M_TEMP);
2977		}
2978	}
2979	return (error);
2980}
2981
2982/*
2983 * Check exports.
2984 * Returns 0 if ok, 1 otherwise.
2985 */
2986int
2987nfsvno_testexp(struct nfsrv_descript *nd, struct nfsexstuff *exp)
2988{
2989	int i;
2990
2991	/*
2992	 * This seems odd, but allow the case where the security flavor
2993	 * list is empty. This happens when NFSv4 is traversing non-exported
2994	 * file systems. Exported file systems should always have a non-empty
2995	 * security flavor list.
2996	 */
2997	if (exp->nes_numsecflavor == 0)
2998		return (0);
2999
3000	for (i = 0; i < exp->nes_numsecflavor; i++) {
3001		/*
3002		 * The tests for privacy and integrity must be first,
3003		 * since ND_GSS is set for everything but AUTH_SYS.
3004		 */
3005		if (exp->nes_secflavors[i] == RPCSEC_GSS_KRB5P &&
3006		    (nd->nd_flag & ND_GSSPRIVACY))
3007			return (0);
3008		if (exp->nes_secflavors[i] == RPCSEC_GSS_KRB5I &&
3009		    (nd->nd_flag & ND_GSSINTEGRITY))
3010			return (0);
3011		if (exp->nes_secflavors[i] == RPCSEC_GSS_KRB5 &&
3012		    (nd->nd_flag & ND_GSS))
3013			return (0);
3014		if (exp->nes_secflavors[i] == AUTH_SYS &&
3015		    (nd->nd_flag & ND_GSS) == 0)
3016			return (0);
3017	}
3018	return (1);
3019}
3020
3021/*
3022 * Calculate a hash value for the fid in a file handle.
3023 */
3024uint32_t
3025nfsrv_hashfh(fhandle_t *fhp)
3026{
3027	uint32_t hashval;
3028
3029	hashval = hash32_buf(&fhp->fh_fid, sizeof(struct fid), 0);
3030	return (hashval);
3031}
3032
3033extern int (*nfsd_call_nfsd)(struct thread *, struct nfssvc_args *);
3034
3035/*
3036 * Called once to initialize data structures...
3037 */
3038static int
3039nfsd_modevent(module_t mod, int type, void *data)
3040{
3041	int error = 0;
3042	static int loaded = 0;
3043
3044	switch (type) {
3045	case MOD_LOAD:
3046		if (loaded)
3047			return (0);
3048		newnfs_portinit();
3049		mtx_init(&nfs_cache_mutex, "nfs_cache_mutex", NULL, MTX_DEF);
3050		mtx_init(&nfs_v4root_mutex, "nfs_v4root_mutex", NULL, MTX_DEF);
3051		mtx_init(&nfsv4root_mnt.mnt_mtx, "struct mount mtx", NULL,
3052		    MTX_DEF);
3053		lockinit(&nfsv4root_mnt.mnt_explock, PVFS, "explock", 0, 0);
3054		nfsrvd_initcache();
3055		nfsd_init();
3056		NFSD_LOCK();
3057		nfsrvd_init(0);
3058		NFSD_UNLOCK();
3059		nfsd_mntinit();
3060#ifdef VV_DISABLEDELEG
3061		vn_deleg_ops.vndeleg_recall = nfsd_recalldelegation;
3062		vn_deleg_ops.vndeleg_disable = nfsd_disabledelegation;
3063#endif
3064		nfsd_call_servertimer = nfsrv_servertimer;
3065		nfsd_call_nfsd = nfssvc_nfsd;
3066		loaded = 1;
3067		break;
3068
3069	case MOD_UNLOAD:
3070		if (newnfs_numnfsd != 0) {
3071			error = EBUSY;
3072			break;
3073		}
3074
3075#ifdef VV_DISABLEDELEG
3076		vn_deleg_ops.vndeleg_recall = NULL;
3077		vn_deleg_ops.vndeleg_disable = NULL;
3078#endif
3079		nfsd_call_servertimer = NULL;
3080		nfsd_call_nfsd = NULL;
3081		/* and get rid of the locks */
3082		mtx_destroy(&nfs_cache_mutex);
3083		mtx_destroy(&nfs_v4root_mutex);
3084		mtx_destroy(&nfsv4root_mnt.mnt_mtx);
3085		lockdestroy(&nfsv4root_mnt.mnt_explock);
3086		loaded = 0;
3087		break;
3088	default:
3089		error = EOPNOTSUPP;
3090		break;
3091	}
3092	return error;
3093}
3094static moduledata_t nfsd_mod = {
3095	"nfsd",
3096	nfsd_modevent,
3097	NULL,
3098};
3099DECLARE_MODULE(nfsd, nfsd_mod, SI_SUB_VFS, SI_ORDER_ANY);
3100
3101/* So that loader and kldload(2) can find us, wherever we are.. */
3102MODULE_VERSION(nfsd, 1);
3103MODULE_DEPEND(nfsd, nfscommon, 1, 1, 1);
3104MODULE_DEPEND(nfsd, nfslock, 1, 1, 1);
3105MODULE_DEPEND(nfsd, nfslockd, 1, 1, 1);
3106MODULE_DEPEND(nfsd, krpc, 1, 1, 1);
3107MODULE_DEPEND(nfsd, nfssvc, 1, 1, 1);
3108
3109