nfs_nfsdport.c revision 214255
11541Srgrimes/*-
21541Srgrimes * Copyright (c) 1989, 1993
31541Srgrimes *	The Regents of the University of California.  All rights reserved.
41541Srgrimes *
551138Salfred * This code is derived from software contributed to Berkeley by
6112895Sjeff * Rick Macklem at The University of Guelph.
71541Srgrimes *
81541Srgrimes * Redistribution and use in source and binary forms, with or without
9106149Sdwmalone * modification, are permitted provided that the following conditions
101541Srgrimes * are met:
1164002Speter * 1. Redistributions of source code must retain the above copyright
121541Srgrimes *    notice, this list of conditions and the following disclaimer.
131541Srgrimes * 2. Redistributions in binary form must reproduce the above copyright
141541Srgrimes *    notice, this list of conditions and the following disclaimer in the
151541Srgrimes *    documentation and/or other materials provided with the distribution.
161541Srgrimes * 4. Neither the name of the University nor the names of its contributors
171541Srgrimes *    may be used to endorse or promote products derived from this software
181541Srgrimes *    without specific prior written permission.
191541Srgrimes *
201541Srgrimes * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
211541Srgrimes * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
221541Srgrimes * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
231541Srgrimes * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
241541Srgrimes * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
251541Srgrimes * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
261541Srgrimes * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
271541Srgrimes * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
281541Srgrimes * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
291541Srgrimes * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
301541Srgrimes * SUCH DAMAGE.
311541Srgrimes *
321541Srgrimes */
331541Srgrimes
341541Srgrimes#include <sys/cdefs.h>
351541Srgrimes__FBSDID("$FreeBSD: head/sys/fs/nfsserver/nfs_nfsdport.c 214255 2010-10-23 22:28:29Z rmacklem $");
361541Srgrimes
371541Srgrimes/*
381541Srgrimes * Functions that perform the vfs operations required by the routines in
391541Srgrimes * nfsd_serv.c. It is hoped that this change will make the server more
401541Srgrimes * portable.
411541Srgrimes */
421541Srgrimes
431541Srgrimes#include <fs/nfs/nfsport.h>
441541Srgrimes#include <sys/hash.h>
451541Srgrimes#include <sys/sysctl.h>
461541Srgrimes#include <nlm/nlm_prot.h>
471541Srgrimes#include <nlm/nlm.h>
481541Srgrimes
491541Srgrimesextern u_int32_t newnfs_true, newnfs_false, newnfs_xdrneg1;
501541Srgrimesextern int nfsv4root_set;
511541Srgrimesextern int nfsrv_useacl;
521541Srgrimesextern int newnfs_numnfsd;
531541Srgrimesextern struct mount nfsv4root_mnt;
541541Srgrimesextern struct nfsrv_stablefirst nfsrv_stablefirst;
551541Srgrimesextern void (*nfsd_call_servertimer)(void);
5652150Smarcelstruct vfsoptlist nfsv4root_opt, nfsv4root_newopt;
571541SrgrimesNFSDLOCKMUTEX;
5852150Smarcelstruct mtx nfs_cache_mutex;
591541Srgrimesstruct mtx nfs_v4root_mutex;
601541Srgrimesstruct nfsrvfh nfs_rootfh, nfs_pubfh;
611541Srgrimesint nfs_pubfhset = 0, nfs_rootfhset = 0;
6252150Smarcelstatic uint32_t nfsv4_sysid = 0;
631541Srgrimes
641541Srgrimesstatic int nfssvc_srvcall(struct thread *, struct nfssvc_args *,
651541Srgrimes    struct ucred *);
661541Srgrimes
671541Srgrimesstatic int enable_crossmntpt = 1;
681541Srgrimesstatic int nfs_commit_blks;
691541Srgrimesstatic int nfs_commit_miss;
701541Srgrimesextern int nfsrv_issuedelegs;
711541Srgrimesextern int nfsrv_dolocallocks;
721541Srgrimes
731541SrgrimesSYSCTL_DECL(_vfs_newnfs);
741541SrgrimesSYSCTL_INT(_vfs_newnfs, OID_AUTO, mirrormnt, CTLFLAG_RW, &enable_crossmntpt,
751541Srgrimes    0, "Enable nfsd to cross mount points");
761541SrgrimesSYSCTL_INT(_vfs_newnfs, OID_AUTO, commit_blks, CTLFLAG_RW, &nfs_commit_blks,
771541Srgrimes    0, "");
781541SrgrimesSYSCTL_INT(_vfs_newnfs, OID_AUTO, commit_miss, CTLFLAG_RW, &nfs_commit_miss,
791541Srgrimes    0, "");
801541SrgrimesSYSCTL_INT(_vfs_newnfs, OID_AUTO, issue_delegations, CTLFLAG_RW,
811541Srgrimes    &nfsrv_issuedelegs, 0, "Enable nfsd to issue delegations");
821541SrgrimesSYSCTL_INT(_vfs_newnfs, OID_AUTO, enable_locallocks, CTLFLAG_RW,
831541Srgrimes    &nfsrv_dolocallocks, 0, "Enable nfsd to acquire local locks on files");
841541Srgrimes
851541Srgrimes#define	NUM_HEURISTIC		1017
861541Srgrimes#define	NHUSE_INIT		64
871541Srgrimes#define	NHUSE_INC		16
881541Srgrimes#define	NHUSE_MAX		2048
891541Srgrimes
901541Srgrimesstatic struct nfsheur {
911541Srgrimes	struct vnode *nh_vp;	/* vp to match (unreferenced pointer) */
921541Srgrimes	off_t nh_nextr;		/* next offset for sequential detection */
931541Srgrimes	int nh_use;		/* use count for selection */
941541Srgrimes	int nh_seqcount;	/* heuristic */
951541Srgrimes} nfsheur[NUM_HEURISTIC];
961541Srgrimes
971541Srgrimes
981541Srgrimes/*
991541Srgrimes * Get attributes into nfsvattr structure.
1001541Srgrimes */
1011541Srgrimesint
1021541Srgrimesnfsvno_getattr(struct vnode *vp, struct nfsvattr *nvap, struct ucred *cred,
1031541Srgrimes    struct thread *p)
1041541Srgrimes{
1051541Srgrimes	int error, lockedit = 0;
1061541Srgrimes
1071541Srgrimes	/* Since FreeBSD insists the vnode be locked... */
1081541Srgrimes	if (VOP_ISLOCKED(vp) != LK_EXCLUSIVE) {
1091541Srgrimes		lockedit = 1;
1101541Srgrimes		NFSVOPLOCK(vp, LK_EXCLUSIVE | LK_RETRY, p);
1111541Srgrimes	}
1121541Srgrimes	error = VOP_GETATTR(vp, &nvap->na_vattr, cred);
113105950Speter	if (lockedit)
1141541Srgrimes		NFSVOPUNLOCK(vp, 0, p);
1151541Srgrimes	return (error);
1161541Srgrimes}
1171541Srgrimes
1181541Srgrimes/*
1191541Srgrimes * Get a file handle for a vnode.
1201541Srgrimes */
12152150Smarcelint
1221541Srgrimesnfsvno_getfh(struct vnode *vp, fhandle_t *fhp, struct thread *p)
1231541Srgrimes{
1241541Srgrimes	int error;
1251541Srgrimes
1261541Srgrimes	NFSBZERO((caddr_t)fhp, sizeof(fhandle_t));
1271541Srgrimes	fhp->fh_fsid = vp->v_mount->mnt_stat.f_fsid;
1281541Srgrimes	error = VOP_VPTOFH(vp, &fhp->fh_fid);
12914220Speter	return (error);
1301541Srgrimes}
1311541Srgrimes
1321541Srgrimes/*
1331541Srgrimes * Perform access checking for vnodes obtained from file handles that would
1341541Srgrimes * refer to files already opened by a Unix client. You cannot just use
1351541Srgrimes * vn_writechk() and VOP_ACCESSX() for two reasons.
1368019Sache * 1 - You must check for exported rdonly as well as MNT_RDONLY for the write
1378019Sache *     case.
1381541Srgrimes * 2 - The owner is to be given access irrespective of mode bits for some
1391541Srgrimes *     operations, so that processes that chmod after opening a file don't
1401541Srgrimes *     break.
1411541Srgrimes */
1421541Srgrimesint
1431541Srgrimesnfsvno_accchk(struct vnode *vp, accmode_t accmode, struct ucred *cred,
1441541Srgrimes    struct nfsexstuff *exp, struct thread *p, int override, int vpislocked,
1451541Srgrimes    u_int32_t *supportedtypep)
1461541Srgrimes{
1471541Srgrimes	struct vattr vattr;
1481541Srgrimes	int error = 0, getret = 0;
1491541Srgrimes
1501541Srgrimes	if (accmode & VWRITE) {
1511541Srgrimes		/* Just vn_writechk() changed to check rdonly */
1521541Srgrimes		/*
1531541Srgrimes		 * Disallow write attempts on read-only file systems;
1541541Srgrimes		 * unless the file is a socket or a block or character
1551541Srgrimes		 * device resident on the file system.
1561541Srgrimes		 */
1571541Srgrimes		if (NFSVNO_EXRDONLY(exp) ||
1581541Srgrimes		    (vp->v_mount->mnt_flag & MNT_RDONLY)) {
1591541Srgrimes			switch (vp->v_type) {
1601541Srgrimes			case VREG:
16114220Speter			case VDIR:
16214220Speter			case VLNK:
16314220Speter				return (EROFS);
1641541Srgrimes			default:
1651541Srgrimes				break;
1661541Srgrimes			}
1671541Srgrimes		}
1681541Srgrimes		/*
1691541Srgrimes		 * If there's shared text associated with
1701541Srgrimes		 * the inode, try to free it up once.  If
1711541Srgrimes		 * we fail, we can't allow writing.
1721549Srgrimes		 */
1731549Srgrimes		if (vp->v_vflag & VV_TEXT)
1741549Srgrimes			return (ETXTBSY);
1751549Srgrimes	}
1762442Sdg	if (vpislocked == 0)
1771541Srgrimes		NFSVOPLOCK(vp, LK_EXCLUSIVE | LK_RETRY, p);
1781541Srgrimes
1792729Sdfr	/*
1802729Sdfr	 * Should the override still be applied when ACLs are enabled?
1811541Srgrimes	 */
1821541Srgrimes	error = VOP_ACCESSX(vp, accmode, cred, p);
18345065Salc	if (error != 0 && (accmode & (VDELETE | VDELETE_CHILD))) {
18445065Salc		/*
1852858Swollman		 * Try again with VEXPLICIT_DENY, to see if the test for
1862297Swollman		 * deletion is supported.
18714220Speter		 */
18814220Speter		error = VOP_ACCESSX(vp, accmode | VEXPLICIT_DENY, cred, p);
18914220Speter		if (error == 0) {
1901541Srgrimes			if (vp->v_type == VDIR) {
1911541Srgrimes				accmode &= ~(VDELETE | VDELETE_CHILD);
1921541Srgrimes				accmode |= VWRITE;
1931541Srgrimes				error = VOP_ACCESSX(vp, accmode, cred, p);
19432889Sphk			} else if (supportedtypep != NULL) {
19532889Sphk				*supportedtypep &= ~NFSACCESS_DELETE;
19632889Sphk			}
19732889Sphk		}
1981541Srgrimes	}
1991541Srgrimes
2001541Srgrimes	/*
2011541Srgrimes	 * Allow certain operations for the owner (reads and writes
2021541Srgrimes	 * on files that are already open).
2031541Srgrimes	 */
2041541Srgrimes	if (override != NFSACCCHK_NOOVERRIDE &&
2051541Srgrimes	    (error == EPERM || error == EACCES)) {
2061541Srgrimes		if (cred->cr_uid == 0 && (override & NFSACCCHK_ALLOWROOT))
2071541Srgrimes			error = 0;
2081541Srgrimes		else if (override & NFSACCCHK_ALLOWOWNER) {
2091541Srgrimes			getret = VOP_GETATTR(vp, &vattr, cred);
2101541Srgrimes			if (getret == 0 && cred->cr_uid == vattr.va_uid)
2111541Srgrimes				error = 0;
2121541Srgrimes		}
2131541Srgrimes	}
2141541Srgrimes	if (vpislocked == 0)
21535938Sdyson		NFSVOPUNLOCK(vp, 0, p);
21635938Sdyson	return (error);
21728400Speter}
21825582Speter
21929349Speter/*
2202124Sdg * Set attribute(s) vnop.
2212124Sdg */
2222124Sdgint
2232124Sdgnfsvno_setattr(struct vnode *vp, struct nfsvattr *nvap, struct ucred *cred,
2242124Sdg    struct thread *p, struct nfsexstuff *exp)
2252124Sdg{
2262124Sdg	int error;
2272124Sdg
2282124Sdg	error = VOP_SETATTR(vp, &nvap->na_vattr, cred);
2292124Sdg	return (error);
23012865Speter}
23112865Speter
23212865Speter/*
23359829Speter * Set up nameidata for a lookup() call and do it
23412865Speter * For the cases where we are crossing mount points
23512865Speter * (looking up the public fh path or the v4 root path when
23612865Speter *  not using a pseudo-root fs), set/release the Giant lock,
23712865Speter * as required.
23812865Speter */
23912865Speterint
24012865Speternfsvno_namei(struct nfsrv_descript *nd, struct nameidata *ndp,
24112865Speter    struct vnode *dp, int islocked, struct nfsexstuff *exp, struct thread *p,
24225582Speter    struct vnode **retdirp)
24325582Speter{
24425582Speter	struct componentname *cnp = &ndp->ni_cnd;
24525582Speter	int i;
24625582Speter	struct iovec aiov;
24725582Speter	struct uio auio;
24825582Speter	int lockleaf = (cnp->cn_flags & LOCKLEAF) != 0, linklen;
24925582Speter	int error = 0, crossmnt;
25025582Speter	char *cp;
25114220Speter
25214220Speter	*retdirp = NULL;
25314220Speter	cnp->cn_nameptr = cnp->cn_pnbuf;
25414220Speter	/*
25514220Speter	 * Extract and set starting directory.
25614220Speter	 */
25714220Speter	if (dp->v_type != VDIR) {
25814220Speter		if (islocked)
25914220Speter			vput(dp);
26014220Speter		else
26114220Speter			vrele(dp);
26229349Speter		nfsvno_relpathbuf(ndp);
26324452Speter		return (ENOTDIR);
26424440Speter	}
26525537Sdfr	if (islocked)
26625537Sdfr		NFSVOPUNLOCK(dp, 0, p);
26725537Sdfr	VREF(dp);
26825537Sdfr	*retdirp = dp;
26925537Sdfr	if (NFSVNO_EXRDONLY(exp))
27025537Sdfr		cnp->cn_flags |= RDONLY;
27125537Sdfr	ndp->ni_segflg = UIO_SYSSPACE;
27225537Sdfr	crossmnt = 1;
27325537Sdfr
27425537Sdfr	if (nd->nd_flag & ND_PUBLOOKUP) {
27525537Sdfr		ndp->ni_loopcnt = 0;
27625537Sdfr		if (cnp->cn_pnbuf[0] == '/') {
27725537Sdfr			vrele(dp);
27825537Sdfr			/*
27925537Sdfr			 * Check for degenerate pathnames here, since lookup()
28025537Sdfr			 * panics on them.
28125537Sdfr			 */
28235938Sdyson			for (i = 1; i < ndp->ni_pathlen; i++)
28325537Sdfr				if (cnp->cn_pnbuf[i] != '/')
28435938Sdyson					break;
28535938Sdyson			if (i == ndp->ni_pathlen) {
28635938Sdyson				error = NFSERR_ACCES;
28735938Sdyson				goto out;
28835938Sdyson			}
28935938Sdyson			dp = rootvnode;
29035938Sdyson			VREF(dp);
29125537Sdfr		}
29225537Sdfr	} else if ((enable_crossmntpt == 0 && NFSVNO_EXPORTED(exp)) ||
29325537Sdfr	    (nd->nd_flag & ND_NFSV4) == 0) {
29425537Sdfr		/*
29525537Sdfr		 * Only cross mount points for NFSv4 when doing a
29625537Sdfr		 * mount while traversing the file system above
29725537Sdfr		 * the mount point, unless enable_crossmntpt is set.
29825537Sdfr		 */
29925537Sdfr		cnp->cn_flags |= NOCROSSMOUNT;
30025537Sdfr		crossmnt = 0;
30125537Sdfr	}
30225537Sdfr
30325537Sdfr	/*
30425537Sdfr	 * Initialize for scan, set ni_startdir and bump ref on dp again
30525537Sdfr	 * becuase lookup() will dereference ni_startdir.
30625537Sdfr	 */
30751138Salfred
30851138Salfred	cnp->cn_thread = p;
30951138Salfred	ndp->ni_startdir = dp;
31025537Sdfr	ndp->ni_rootdir = rootvnode;
31125537Sdfr
31225537Sdfr	if (!lockleaf)
31325537Sdfr		cnp->cn_flags |= LOCKLEAF;
31425537Sdfr	for (;;) {
31525537Sdfr		cnp->cn_nameptr = cnp->cn_pnbuf;
31625537Sdfr		/*
31725537Sdfr		 * Call lookup() to do the real work.  If an error occurs,
31825537Sdfr		 * ndp->ni_vp and ni_dvp are left uninitialized or NULL and
31925537Sdfr		 * we do not have to dereference anything before returning.
32028400Speter		 * In either case ni_startdir will be dereferenced and NULLed
32156115Speter		 * out.
32256115Speter		 */
32336034Speter		if (exp->nes_vfslocked)
32426671Sdyson			ndp->ni_cnd.cn_flags |= GIANTHELD;
32526671Sdyson		error = lookup(ndp);
32626671Sdyson		/*
32726671Sdyson		 * The Giant lock should only change when
32826671Sdyson		 * crossing mount points.
32926671Sdyson		 */
33026671Sdyson		if (crossmnt) {
33126671Sdyson			exp->nes_vfslocked =
33269514Sjake			    (ndp->ni_cnd.cn_flags & GIANTHELD) != 0;
33369514Sjake			ndp->ni_cnd.cn_flags &= ~GIANTHELD;
33426671Sdyson		}
33526671Sdyson		if (error)
33629391Sphk			break;
33734925Sdufault
33834925Sdufault		/*
33934925Sdufault		 * Check for encountering a symbolic link.  Trivial
34034925Sdufault		 * termination occurs if no symlink encountered.
34134925Sdufault		 */
34234925Sdufault		if ((cnp->cn_flags & ISSYMLINK) == 0) {
34334925Sdufault			if ((cnp->cn_flags & (SAVENAME | SAVESTART)) == 0)
34434925Sdufault				nfsvno_relpathbuf(ndp);
34535938Sdyson			if (ndp->ni_vp && !lockleaf)
34699856Salfred				NFSVOPUNLOCK(ndp->ni_vp, 0, p);
34741089Speter			break;
34846155Sphk		}
34949420Sjkh
35051791Smarcel		/*
35151791Smarcel		 * Validate symlink
352105950Speter		 */
35351791Smarcel		if ((cnp->cn_flags & LOCKPARENT) && ndp->ni_pathlen == 1)
354105950Speter			NFSVOPUNLOCK(ndp->ni_dvp, 0, p);
355112895Sjeff		if (!(nd->nd_flag & ND_PUBLOOKUP)) {
356112895Sjeff			error = EINVAL;
35756271Srwatson			goto badlink2;
35856271Srwatson		}
35956271Srwatson
36056271Srwatson		if (ndp->ni_loopcnt++ >= MAXSYMLINKS) {
36156271Srwatson			error = ELOOP;
36256271Srwatson			goto badlink2;
36356271Srwatson		}
36456271Srwatson		if (ndp->ni_pathlen > 1)
36554803Srwatson			cp = uma_zalloc(namei_zone, M_WAITOK);
36654803Srwatson		else
36754803Srwatson			cp = cnp->cn_pnbuf;
36854803Srwatson		aiov.iov_base = cp;
36955943Sjasone		aiov.iov_len = MAXPATHLEN;
37056115Speter		auio.uio_iov = &aiov;
37156115Speter		auio.uio_iovcnt = 1;
37259288Sjlemon		auio.uio_offset = 0;
37359288Sjlemon		auio.uio_rw = UIO_READ;
37498198Srwatson		auio.uio_segflg = UIO_SYSSPACE;
37598198Srwatson		auio.uio_td = NULL;
37698198Srwatson		auio.uio_resid = MAXPATHLEN;
37798198Srwatson		error = VOP_READLINK(ndp->ni_vp, &auio, cnp->cn_cred);
37898198Srwatson		if (error) {
37998198Srwatson		badlink1:
38069449Salfred			if (ndp->ni_pathlen > 1)
38175039Srwatson				uma_zfree(namei_zone, cp);
38275039Srwatson		badlink2:
38375039Srwatson			vrele(ndp->ni_dvp);
38475427Srwatson			vput(ndp->ni_vp);
38583652Speter			break;
38683796Srwatson		}
38784884Srwatson		linklen = MAXPATHLEN - auio.uio_resid;
38885891Sphk		if (linklen == 0) {
38990889Sjulian			error = ENOENT;
39090889Sjulian			goto badlink1;
391103972Sarchie		}
392103972Sarchie		if (linklen + ndp->ni_pathlen >= MAXPATHLEN) {
393103972Sarchie			error = ENAMETOOLONG;
394100897Srwatson			goto badlink1;
395100897Srwatson		}
396100897Srwatson
397100897Srwatson		/*
398100897Srwatson		 * Adjust or replace path
399100897Srwatson		 */
40094936Smux		if (ndp->ni_pathlen > 1) {
40196084Smux			NFSBCOPY(ndp->ni_next, cp + linklen, ndp->ni_pathlen);
40297372Smarcel			uma_zfree(namei_zone, cnp->cn_pnbuf);
40399856Salfred			cnp->cn_pnbuf = cp;
404101426Srwatson		} else
405103575Salfred			cnp->cn_pnbuf[linklen] = '\0';
406103575Salfred		ndp->ni_pathlen += linklen;
407103575Salfred
408103575Salfred		/*
409103575Salfred		 * Cleanup refs for next loop and check if root directory
410103575Salfred		 * should replace current directory.  Normally ni_dvp
411103575Salfred		 * becomes the new base directory and is cleaned up when
412103575Salfred		 * we loop.  Explicitly null pointers after invalidation
413103575Salfred		 * to clarify operation.
414103575Salfred		 */
415103575Salfred		vput(ndp->ni_vp);
416103575Salfred		ndp->ni_vp = NULL;
417103575Salfred
418103575Salfred		if (cnp->cn_pnbuf[0] == '/') {
419105692Srwatson			vrele(ndp->ni_dvp);
420105692Srwatson			ndp->ni_dvp = ndp->ni_rootdir;
421105692Srwatson			VREF(ndp->ni_dvp);
422104731Srwatson		}
423104731Srwatson		ndp->ni_startdir = ndp->ni_dvp;
424104731Srwatson		ndp->ni_dvp = NULL;
425106467Srwatson	}
426105950Speter	if (!lockleaf)
427105950Speter		cnp->cn_flags &= ~LOCKLEAF;
428105692Srwatson
429105692Srwatsonout:
430105692Srwatson	if (error) {
431106978Sdeischen		uma_zfree(namei_zone, cnp->cn_pnbuf);
432106978Sdeischen		ndp->ni_vp = NULL;
433106978Sdeischen		ndp->ni_dvp = NULL;
434107914Sdillon		ndp->ni_startdir = NULL;
435108406Srwatson		cnp->cn_flags &= ~HASBUF;
436108406Srwatson	} else if ((ndp->ni_cnd.cn_flags & (WANTPARENT|LOCKPARENT)) == 0) {
437108406Srwatson		ndp->ni_dvp = NULL;
438108406Srwatson	}
439112895Sjeff	return (error);
4401541Srgrimes}
441
442/*
443 * Set up a pathname buffer and return a pointer to it and, optionally
444 * set a hash pointer.
445 */
446void
447nfsvno_setpathbuf(struct nameidata *ndp, char **bufpp, u_long **hashpp)
448{
449	struct componentname *cnp = &ndp->ni_cnd;
450
451	cnp->cn_flags |= (NOMACCHECK | HASBUF);
452	cnp->cn_pnbuf = uma_zalloc(namei_zone, M_WAITOK);
453	if (hashpp != NULL)
454		*hashpp = NULL;
455	*bufpp = cnp->cn_pnbuf;
456}
457
458/*
459 * Release the above path buffer, if not released by nfsvno_namei().
460 */
461void
462nfsvno_relpathbuf(struct nameidata *ndp)
463{
464
465	if ((ndp->ni_cnd.cn_flags & HASBUF) == 0)
466		panic("nfsrelpath");
467	uma_zfree(namei_zone, ndp->ni_cnd.cn_pnbuf);
468	ndp->ni_cnd.cn_flags &= ~HASBUF;
469}
470
471/*
472 * Readlink vnode op into an mbuf list.
473 */
474int
475nfsvno_readlink(struct vnode *vp, struct ucred *cred, struct thread *p,
476    struct mbuf **mpp, struct mbuf **mpendp, int *lenp)
477{
478	struct iovec iv[(NFS_MAXPATHLEN+MLEN-1)/MLEN];
479	struct iovec *ivp = iv;
480	struct uio io, *uiop = &io;
481	struct mbuf *mp, *mp2 = NULL, *mp3 = NULL;
482	int i, len, tlen, error;
483
484	len = 0;
485	i = 0;
486	while (len < NFS_MAXPATHLEN) {
487		NFSMGET(mp);
488		MCLGET(mp, M_WAIT);
489		mp->m_len = NFSMSIZ(mp);
490		if (len == 0) {
491			mp3 = mp2 = mp;
492		} else {
493			mp2->m_next = mp;
494			mp2 = mp;
495		}
496		if ((len + mp->m_len) > NFS_MAXPATHLEN) {
497			mp->m_len = NFS_MAXPATHLEN - len;
498			len = NFS_MAXPATHLEN;
499		} else {
500			len += mp->m_len;
501		}
502		ivp->iov_base = mtod(mp, caddr_t);
503		ivp->iov_len = mp->m_len;
504		i++;
505		ivp++;
506	}
507	uiop->uio_iov = iv;
508	uiop->uio_iovcnt = i;
509	uiop->uio_offset = 0;
510	uiop->uio_resid = len;
511	uiop->uio_rw = UIO_READ;
512	uiop->uio_segflg = UIO_SYSSPACE;
513	uiop->uio_td = NULL;
514	error = VOP_READLINK(vp, uiop, cred);
515	if (error) {
516		m_freem(mp3);
517		*lenp = 0;
518		return (error);
519	}
520	if (uiop->uio_resid > 0) {
521		len -= uiop->uio_resid;
522		tlen = NFSM_RNDUP(len);
523		nfsrv_adj(mp3, NFS_MAXPATHLEN - tlen, tlen - len);
524	}
525	*lenp = len;
526	*mpp = mp3;
527	*mpendp = mp;
528	return (0);
529}
530
531/*
532 * Read vnode op call into mbuf list.
533 */
534int
535nfsvno_read(struct vnode *vp, off_t off, int cnt, struct ucred *cred,
536    struct thread *p, struct mbuf **mpp, struct mbuf **mpendp)
537{
538	struct mbuf *m;
539	int i;
540	struct iovec *iv;
541	struct iovec *iv2;
542	int error = 0, len, left, siz, tlen, ioflag = 0, hi, try = 32;
543	struct mbuf *m2 = NULL, *m3;
544	struct uio io, *uiop = &io;
545	struct nfsheur *nh;
546
547	/*
548	 * Calculate seqcount for heuristic
549	 */
550	/*
551	 * Locate best candidate
552	 */
553
554	hi = ((int)(vm_offset_t)vp / sizeof(struct vnode)) % NUM_HEURISTIC;
555	nh = &nfsheur[hi];
556
557	while (try--) {
558		if (nfsheur[hi].nh_vp == vp) {
559			nh = &nfsheur[hi];
560			break;
561		}
562		if (nfsheur[hi].nh_use > 0)
563			--nfsheur[hi].nh_use;
564		hi = (hi + 1) % NUM_HEURISTIC;
565		if (nfsheur[hi].nh_use < nh->nh_use)
566			nh = &nfsheur[hi];
567	}
568
569	if (nh->nh_vp != vp) {
570		nh->nh_vp = vp;
571		nh->nh_nextr = off;
572		nh->nh_use = NHUSE_INIT;
573		if (off == 0)
574			nh->nh_seqcount = 4;
575		else
576			nh->nh_seqcount = 1;
577	}
578
579	/*
580	 * Calculate heuristic
581	 */
582
583	if ((off == 0 && nh->nh_seqcount > 0) || off == nh->nh_nextr) {
584		if (++nh->nh_seqcount > IO_SEQMAX)
585			nh->nh_seqcount = IO_SEQMAX;
586	} else if (nh->nh_seqcount > 1) {
587		nh->nh_seqcount = 1;
588	} else {
589		nh->nh_seqcount = 0;
590	}
591	nh->nh_use += NHUSE_INC;
592	if (nh->nh_use > NHUSE_MAX)
593		nh->nh_use = NHUSE_MAX;
594	ioflag |= nh->nh_seqcount << IO_SEQSHIFT;
595
596	len = left = NFSM_RNDUP(cnt);
597	m3 = NULL;
598	/*
599	 * Generate the mbuf list with the uio_iov ref. to it.
600	 */
601	i = 0;
602	while (left > 0) {
603		NFSMGET(m);
604		MCLGET(m, M_WAIT);
605		m->m_len = 0;
606		siz = min(M_TRAILINGSPACE(m), left);
607		left -= siz;
608		i++;
609		if (m3)
610			m2->m_next = m;
611		else
612			m3 = m;
613		m2 = m;
614	}
615	MALLOC(iv, struct iovec *, i * sizeof (struct iovec),
616	    M_TEMP, M_WAITOK);
617	uiop->uio_iov = iv2 = iv;
618	m = m3;
619	left = len;
620	i = 0;
621	while (left > 0) {
622		if (m == NULL)
623			panic("nfsvno_read iov");
624		siz = min(M_TRAILINGSPACE(m), left);
625		if (siz > 0) {
626			iv->iov_base = mtod(m, caddr_t) + m->m_len;
627			iv->iov_len = siz;
628			m->m_len += siz;
629			left -= siz;
630			iv++;
631			i++;
632		}
633		m = m->m_next;
634	}
635	uiop->uio_iovcnt = i;
636	uiop->uio_offset = off;
637	uiop->uio_resid = len;
638	uiop->uio_rw = UIO_READ;
639	uiop->uio_segflg = UIO_SYSSPACE;
640	error = VOP_READ(vp, uiop, IO_NODELOCKED | ioflag, cred);
641	FREE((caddr_t)iv2, M_TEMP);
642	if (error) {
643		m_freem(m3);
644		*mpp = NULL;
645		return (error);
646	}
647	tlen = len - uiop->uio_resid;
648	cnt = cnt < tlen ? cnt : tlen;
649	tlen = NFSM_RNDUP(cnt);
650	if (tlen == 0) {
651		m_freem(m3);
652		m3 = NULL;
653	} else if (len != tlen || tlen != cnt)
654		nfsrv_adj(m3, len - tlen, tlen - cnt);
655	*mpp = m3;
656	*mpendp = m2;
657	return (0);
658}
659
660/*
661 * Write vnode op from an mbuf list.
662 */
663int
664nfsvno_write(struct vnode *vp, off_t off, int retlen, int cnt, int stable,
665    struct mbuf *mp, char *cp, struct ucred *cred, struct thread *p)
666{
667	struct iovec *ivp;
668	int i, len;
669	struct iovec *iv;
670	int ioflags, error;
671	struct uio io, *uiop = &io;
672
673	MALLOC(ivp, struct iovec *, cnt * sizeof (struct iovec), M_TEMP,
674	    M_WAITOK);
675	uiop->uio_iov = iv = ivp;
676	uiop->uio_iovcnt = cnt;
677	i = mtod(mp, caddr_t) + mp->m_len - cp;
678	len = retlen;
679	while (len > 0) {
680		if (mp == NULL)
681			panic("nfsvno_write");
682		if (i > 0) {
683			i = min(i, len);
684			ivp->iov_base = cp;
685			ivp->iov_len = i;
686			ivp++;
687			len -= i;
688		}
689		mp = mp->m_next;
690		if (mp) {
691			i = mp->m_len;
692			cp = mtod(mp, caddr_t);
693		}
694	}
695
696	if (stable == NFSWRITE_UNSTABLE)
697		ioflags = IO_NODELOCKED;
698	else
699		ioflags = (IO_SYNC | IO_NODELOCKED);
700	uiop->uio_resid = retlen;
701	uiop->uio_rw = UIO_WRITE;
702	uiop->uio_segflg = UIO_SYSSPACE;
703	NFSUIOPROC(uiop, p);
704	uiop->uio_offset = off;
705	error = VOP_WRITE(vp, uiop, ioflags, cred);
706	FREE((caddr_t)iv, M_TEMP);
707	return (error);
708}
709
710/*
711 * Common code for creating a regular file (plus special files for V2).
712 */
713int
714nfsvno_createsub(struct nfsrv_descript *nd, struct nameidata *ndp,
715    struct vnode **vpp, struct nfsvattr *nvap, int *exclusive_flagp,
716    int32_t *cverf, NFSDEV_T rdev, struct thread *p, struct nfsexstuff *exp)
717{
718	u_quad_t tempsize;
719	int error;
720
721	error = nd->nd_repstat;
722	if (!error && ndp->ni_vp == NULL) {
723		if (nvap->na_type == VREG || nvap->na_type == VSOCK) {
724			vrele(ndp->ni_startdir);
725			error = VOP_CREATE(ndp->ni_dvp,
726			    &ndp->ni_vp, &ndp->ni_cnd, &nvap->na_vattr);
727			vput(ndp->ni_dvp);
728			nfsvno_relpathbuf(ndp);
729			if (!error) {
730				if (*exclusive_flagp) {
731					*exclusive_flagp = 0;
732					NFSVNO_ATTRINIT(nvap);
733					nvap->na_atime.tv_sec = cverf[0];
734					nvap->na_atime.tv_nsec = cverf[1];
735					error = VOP_SETATTR(ndp->ni_vp,
736					    &nvap->na_vattr, nd->nd_cred);
737				}
738			}
739		/*
740		 * NFS V2 Only. nfsrvd_mknod() does this for V3.
741		 * (This implies, just get out on an error.)
742		 */
743		} else if (nvap->na_type == VCHR || nvap->na_type == VBLK ||
744			nvap->na_type == VFIFO) {
745			if (nvap->na_type == VCHR && rdev == 0xffffffff)
746				nvap->na_type = VFIFO;
747                        if (nvap->na_type != VFIFO &&
748			    (error = priv_check_cred(nd->nd_cred,
749			     PRIV_VFS_MKNOD_DEV, 0))) {
750				vrele(ndp->ni_startdir);
751				nfsvno_relpathbuf(ndp);
752				vput(ndp->ni_dvp);
753				return (error);
754			}
755			nvap->na_rdev = rdev;
756			error = VOP_MKNOD(ndp->ni_dvp, &ndp->ni_vp,
757			    &ndp->ni_cnd, &nvap->na_vattr);
758			vput(ndp->ni_dvp);
759			nfsvno_relpathbuf(ndp);
760			if (error) {
761				vrele(ndp->ni_startdir);
762				return (error);
763			}
764		} else {
765			vrele(ndp->ni_startdir);
766			nfsvno_relpathbuf(ndp);
767			vput(ndp->ni_dvp);
768			return (ENXIO);
769		}
770		*vpp = ndp->ni_vp;
771	} else {
772		/*
773		 * Handle cases where error is already set and/or
774		 * the file exists.
775		 * 1 - clean up the lookup
776		 * 2 - iff !error and na_size set, truncate it
777		 */
778		vrele(ndp->ni_startdir);
779		nfsvno_relpathbuf(ndp);
780		*vpp = ndp->ni_vp;
781		if (ndp->ni_dvp == *vpp)
782			vrele(ndp->ni_dvp);
783		else
784			vput(ndp->ni_dvp);
785		if (!error && nvap->na_size != VNOVAL) {
786			error = nfsvno_accchk(*vpp, VWRITE,
787			    nd->nd_cred, exp, p, NFSACCCHK_NOOVERRIDE,
788			    NFSACCCHK_VPISLOCKED, NULL);
789			if (!error) {
790				tempsize = nvap->na_size;
791				NFSVNO_ATTRINIT(nvap);
792				nvap->na_size = tempsize;
793				error = VOP_SETATTR(*vpp,
794				    &nvap->na_vattr, nd->nd_cred);
795			}
796		}
797		if (error)
798			vput(*vpp);
799	}
800	return (error);
801}
802
803/*
804 * Do a mknod vnode op.
805 */
806int
807nfsvno_mknod(struct nameidata *ndp, struct nfsvattr *nvap, struct ucred *cred,
808    struct thread *p)
809{
810	int error = 0;
811	enum vtype vtyp;
812
813	vtyp = nvap->na_type;
814	/*
815	 * Iff doesn't exist, create it.
816	 */
817	if (ndp->ni_vp) {
818		vrele(ndp->ni_startdir);
819		nfsvno_relpathbuf(ndp);
820		vput(ndp->ni_dvp);
821		vrele(ndp->ni_vp);
822		return (EEXIST);
823	}
824	if (vtyp != VCHR && vtyp != VBLK && vtyp != VSOCK && vtyp != VFIFO) {
825		vrele(ndp->ni_startdir);
826		nfsvno_relpathbuf(ndp);
827		vput(ndp->ni_dvp);
828		return (NFSERR_BADTYPE);
829	}
830	if (vtyp == VSOCK) {
831		vrele(ndp->ni_startdir);
832		error = VOP_CREATE(ndp->ni_dvp, &ndp->ni_vp,
833		    &ndp->ni_cnd, &nvap->na_vattr);
834		vput(ndp->ni_dvp);
835		nfsvno_relpathbuf(ndp);
836	} else {
837		if (nvap->na_type != VFIFO &&
838		    (error = priv_check_cred(cred, PRIV_VFS_MKNOD_DEV, 0))) {
839			vrele(ndp->ni_startdir);
840			nfsvno_relpathbuf(ndp);
841			vput(ndp->ni_dvp);
842			return (error);
843		}
844		error = VOP_MKNOD(ndp->ni_dvp, &ndp->ni_vp,
845		    &ndp->ni_cnd, &nvap->na_vattr);
846		vput(ndp->ni_dvp);
847		nfsvno_relpathbuf(ndp);
848		if (error)
849			vrele(ndp->ni_startdir);
850		/*
851		 * Since VOP_MKNOD returns the ni_vp, I can't
852		 * see any reason to do the lookup.
853		 */
854	}
855	return (error);
856}
857
858/*
859 * Mkdir vnode op.
860 */
861int
862nfsvno_mkdir(struct nameidata *ndp, struct nfsvattr *nvap, uid_t saved_uid,
863    struct ucred *cred, struct thread *p, struct nfsexstuff *exp)
864{
865	int error = 0;
866
867	if (ndp->ni_vp != NULL) {
868		if (ndp->ni_dvp == ndp->ni_vp)
869			vrele(ndp->ni_dvp);
870		else
871			vput(ndp->ni_dvp);
872		vrele(ndp->ni_vp);
873		nfsvno_relpathbuf(ndp);
874		return (EEXIST);
875	}
876	error = VOP_MKDIR(ndp->ni_dvp, &ndp->ni_vp, &ndp->ni_cnd,
877	    &nvap->na_vattr);
878	vput(ndp->ni_dvp);
879	nfsvno_relpathbuf(ndp);
880	return (error);
881}
882
883/*
884 * symlink vnode op.
885 */
886int
887nfsvno_symlink(struct nameidata *ndp, struct nfsvattr *nvap, char *pathcp,
888    int pathlen, int not_v2, uid_t saved_uid, struct ucred *cred, struct thread *p,
889    struct nfsexstuff *exp)
890{
891	int error = 0;
892
893	if (ndp->ni_vp) {
894		vrele(ndp->ni_startdir);
895		nfsvno_relpathbuf(ndp);
896		if (ndp->ni_dvp == ndp->ni_vp)
897			vrele(ndp->ni_dvp);
898		else
899			vput(ndp->ni_dvp);
900		vrele(ndp->ni_vp);
901		return (EEXIST);
902	}
903
904	error = VOP_SYMLINK(ndp->ni_dvp, &ndp->ni_vp, &ndp->ni_cnd,
905	    &nvap->na_vattr, pathcp);
906	vput(ndp->ni_dvp);
907	vrele(ndp->ni_startdir);
908	nfsvno_relpathbuf(ndp);
909	/*
910	 * Although FreeBSD still had the lookup code in
911	 * it for 7/current, there doesn't seem to be any
912	 * point, since VOP_SYMLINK() returns the ni_vp.
913	 * Just vput it for v2.
914	 */
915	if (!not_v2 && !error)
916		vput(ndp->ni_vp);
917	return (error);
918}
919
920/*
921 * Parse symbolic link arguments.
922 * This function has an ugly side effect. It will MALLOC() an area for
923 * the symlink and set iov_base to point to it, only if it succeeds.
924 * So, if it returns with uiop->uio_iov->iov_base != NULL, that must
925 * be FREE'd later.
926 */
927int
928nfsvno_getsymlink(struct nfsrv_descript *nd, struct nfsvattr *nvap,
929    struct thread *p, char **pathcpp, int *lenp)
930{
931	u_int32_t *tl;
932	char *pathcp = NULL;
933	int error = 0, len;
934	struct nfsv2_sattr *sp;
935
936	*pathcpp = NULL;
937	*lenp = 0;
938	if ((nd->nd_flag & ND_NFSV3) &&
939	    (error = nfsrv_sattr(nd, nvap, NULL, NULL, p)))
940		goto nfsmout;
941	NFSM_DISSECT(tl, u_int32_t *, NFSX_UNSIGNED);
942	len = fxdr_unsigned(int, *tl);
943	if (len > NFS_MAXPATHLEN || len <= 0) {
944		error = EBADRPC;
945		goto nfsmout;
946	}
947	MALLOC(pathcp, caddr_t, len + 1, M_TEMP, M_WAITOK);
948	error = nfsrv_mtostr(nd, pathcp, len);
949	if (error)
950		goto nfsmout;
951	if (nd->nd_flag & ND_NFSV2) {
952		NFSM_DISSECT(sp, struct nfsv2_sattr *, NFSX_V2SATTR);
953		nvap->na_mode = fxdr_unsigned(u_int16_t, sp->sa_mode);
954	}
955	*pathcpp = pathcp;
956	*lenp = len;
957	return (0);
958nfsmout:
959	if (pathcp)
960		free(pathcp, M_TEMP);
961	return (error);
962}
963
964/*
965 * Remove a non-directory object.
966 */
967int
968nfsvno_removesub(struct nameidata *ndp, int is_v4, struct ucred *cred,
969    struct thread *p, struct nfsexstuff *exp)
970{
971	struct vnode *vp;
972	int error = 0;
973
974	vp = ndp->ni_vp;
975	if (vp->v_type == VDIR)
976		error = NFSERR_ISDIR;
977	else if (is_v4)
978		error = nfsrv_checkremove(vp, 1, p);
979	if (!error)
980		error = VOP_REMOVE(ndp->ni_dvp, vp, &ndp->ni_cnd);
981	if (ndp->ni_dvp == vp)
982		vrele(ndp->ni_dvp);
983	else
984		vput(ndp->ni_dvp);
985	vput(vp);
986	return (error);
987}
988
989/*
990 * Remove a directory.
991 */
992int
993nfsvno_rmdirsub(struct nameidata *ndp, int is_v4, struct ucred *cred,
994    struct thread *p, struct nfsexstuff *exp)
995{
996	struct vnode *vp;
997	int error = 0;
998
999	vp = ndp->ni_vp;
1000	if (vp->v_type != VDIR) {
1001		error = ENOTDIR;
1002		goto out;
1003	}
1004	/*
1005	 * No rmdir "." please.
1006	 */
1007	if (ndp->ni_dvp == vp) {
1008		error = EINVAL;
1009		goto out;
1010	}
1011	/*
1012	 * The root of a mounted filesystem cannot be deleted.
1013	 */
1014	if (vp->v_vflag & VV_ROOT)
1015		error = EBUSY;
1016out:
1017	if (!error)
1018		error = VOP_RMDIR(ndp->ni_dvp, vp, &ndp->ni_cnd);
1019	if (ndp->ni_dvp == vp)
1020		vrele(ndp->ni_dvp);
1021	else
1022		vput(ndp->ni_dvp);
1023	vput(vp);
1024	return (error);
1025}
1026
1027/*
1028 * Rename vnode op.
1029 */
1030int
1031nfsvno_rename(struct nameidata *fromndp, struct nameidata *tondp,
1032    u_int32_t ndstat, u_int32_t ndflag, struct ucred *cred, struct thread *p)
1033{
1034	struct vnode *fvp, *tvp, *tdvp;
1035	int error = 0;
1036
1037	fvp = fromndp->ni_vp;
1038	if (ndstat) {
1039		vrele(fromndp->ni_dvp);
1040		vrele(fvp);
1041		error = ndstat;
1042		goto out1;
1043	}
1044	tdvp = tondp->ni_dvp;
1045	tvp = tondp->ni_vp;
1046	if (tvp != NULL) {
1047		if (fvp->v_type == VDIR && tvp->v_type != VDIR) {
1048			error = (ndflag & ND_NFSV2) ? EISDIR : EEXIST;
1049			goto out;
1050		} else if (fvp->v_type != VDIR && tvp->v_type == VDIR) {
1051			error = (ndflag & ND_NFSV2) ? ENOTDIR : EEXIST;
1052			goto out;
1053		}
1054		if (tvp->v_type == VDIR && tvp->v_mountedhere) {
1055			error = (ndflag & ND_NFSV2) ? ENOTEMPTY : EXDEV;
1056			goto out;
1057		}
1058
1059		/*
1060		 * A rename to '.' or '..' results in a prematurely
1061		 * unlocked vnode on FreeBSD5, so I'm just going to fail that
1062		 * here.
1063		 */
1064		if ((tondp->ni_cnd.cn_namelen == 1 &&
1065		     tondp->ni_cnd.cn_nameptr[0] == '.') ||
1066		    (tondp->ni_cnd.cn_namelen == 2 &&
1067		     tondp->ni_cnd.cn_nameptr[0] == '.' &&
1068		     tondp->ni_cnd.cn_nameptr[1] == '.')) {
1069			error = EINVAL;
1070			goto out;
1071		}
1072	}
1073	if (fvp->v_type == VDIR && fvp->v_mountedhere) {
1074		error = (ndflag & ND_NFSV2) ? ENOTEMPTY : EXDEV;
1075		goto out;
1076	}
1077	if (fvp->v_mount != tdvp->v_mount) {
1078		error = (ndflag & ND_NFSV2) ? ENOTEMPTY : EXDEV;
1079		goto out;
1080	}
1081	if (fvp == tdvp) {
1082		error = (ndflag & ND_NFSV2) ? ENOTEMPTY : EINVAL;
1083		goto out;
1084	}
1085	if (fvp == tvp) {
1086		/*
1087		 * If source and destination are the same, there is nothing to
1088		 * do. Set error to -1 to indicate this.
1089		 */
1090		error = -1;
1091		goto out;
1092	}
1093	if (ndflag & ND_NFSV4) {
1094		NFSVOPLOCK(fvp, LK_EXCLUSIVE | LK_RETRY, p);
1095		error = nfsrv_checkremove(fvp, 0, p);
1096		NFSVOPUNLOCK(fvp, 0, p);
1097		if (tvp && !error)
1098			error = nfsrv_checkremove(tvp, 1, p);
1099	} else {
1100		/*
1101		 * For NFSv2 and NFSv3, try to get rid of the delegation, so
1102		 * that the NFSv4 client won't be confused by the rename.
1103		 * Since nfsd_recalldelegation() can only be called on an
1104		 * unlocked vnode at this point and fvp is the file that will
1105		 * still exist after the rename, just do fvp.
1106		 */
1107		nfsd_recalldelegation(fvp, p);
1108	}
1109out:
1110	if (!error) {
1111		error = VOP_RENAME(fromndp->ni_dvp, fromndp->ni_vp,
1112		    &fromndp->ni_cnd, tondp->ni_dvp, tondp->ni_vp,
1113		    &tondp->ni_cnd);
1114	} else {
1115		if (tdvp == tvp)
1116			vrele(tdvp);
1117		else
1118			vput(tdvp);
1119		if (tvp)
1120			vput(tvp);
1121		vrele(fromndp->ni_dvp);
1122		vrele(fvp);
1123		if (error == -1)
1124			error = 0;
1125	}
1126	vrele(tondp->ni_startdir);
1127	nfsvno_relpathbuf(tondp);
1128out1:
1129	vrele(fromndp->ni_startdir);
1130	nfsvno_relpathbuf(fromndp);
1131	return (error);
1132}
1133
1134/*
1135 * Link vnode op.
1136 */
1137int
1138nfsvno_link(struct nameidata *ndp, struct vnode *vp, struct ucred *cred,
1139    struct thread *p, struct nfsexstuff *exp)
1140{
1141	struct vnode *xp;
1142	int error = 0;
1143
1144	xp = ndp->ni_vp;
1145	if (xp != NULL) {
1146		error = EEXIST;
1147	} else {
1148		xp = ndp->ni_dvp;
1149		if (vp->v_mount != xp->v_mount)
1150			error = EXDEV;
1151	}
1152	if (!error) {
1153		NFSVOPLOCK(vp, LK_EXCLUSIVE | LK_RETRY, p);
1154		error = VOP_LINK(ndp->ni_dvp, vp, &ndp->ni_cnd);
1155		if (ndp->ni_dvp == vp)
1156			vrele(ndp->ni_dvp);
1157		else
1158			vput(ndp->ni_dvp);
1159		NFSVOPUNLOCK(vp, 0, p);
1160	} else {
1161		if (ndp->ni_dvp == ndp->ni_vp)
1162			vrele(ndp->ni_dvp);
1163		else
1164			vput(ndp->ni_dvp);
1165		if (ndp->ni_vp)
1166			vrele(ndp->ni_vp);
1167	}
1168	nfsvno_relpathbuf(ndp);
1169	return (error);
1170}
1171
1172/*
1173 * Do the fsync() appropriate for the commit.
1174 */
1175int
1176nfsvno_fsync(struct vnode *vp, u_int64_t off, int cnt, struct ucred *cred,
1177    struct thread *td)
1178{
1179	int error = 0;
1180
1181	if (cnt > MAX_COMMIT_COUNT) {
1182		/*
1183		 * Give up and do the whole thing
1184		 */
1185		if (vp->v_object &&
1186		   (vp->v_object->flags & OBJ_MIGHTBEDIRTY)) {
1187			VM_OBJECT_LOCK(vp->v_object);
1188			vm_object_page_clean(vp->v_object, 0, 0, OBJPC_SYNC);
1189			VM_OBJECT_UNLOCK(vp->v_object);
1190		}
1191		error = VOP_FSYNC(vp, MNT_WAIT, td);
1192	} else {
1193		/*
1194		 * Locate and synchronously write any buffers that fall
1195		 * into the requested range.  Note:  we are assuming that
1196		 * f_iosize is a power of 2.
1197		 */
1198		int iosize = vp->v_mount->mnt_stat.f_iosize;
1199		int iomask = iosize - 1;
1200		struct bufobj *bo;
1201		daddr_t lblkno;
1202
1203		/*
1204		 * Align to iosize boundry, super-align to page boundry.
1205		 */
1206		if (off & iomask) {
1207			cnt += off & iomask;
1208			off &= ~(u_quad_t)iomask;
1209		}
1210		if (off & PAGE_MASK) {
1211			cnt += off & PAGE_MASK;
1212			off &= ~(u_quad_t)PAGE_MASK;
1213		}
1214		lblkno = off / iosize;
1215
1216		if (vp->v_object &&
1217		   (vp->v_object->flags & OBJ_MIGHTBEDIRTY)) {
1218			VM_OBJECT_LOCK(vp->v_object);
1219			vm_object_page_clean(vp->v_object, off / PAGE_SIZE, (cnt + PAGE_MASK) / PAGE_SIZE, OBJPC_SYNC);
1220			VM_OBJECT_UNLOCK(vp->v_object);
1221		}
1222
1223		bo = &vp->v_bufobj;
1224		BO_LOCK(bo);
1225		while (cnt > 0) {
1226			struct buf *bp;
1227
1228			/*
1229			 * If we have a buffer and it is marked B_DELWRI we
1230			 * have to lock and write it.  Otherwise the prior
1231			 * write is assumed to have already been committed.
1232			 *
1233			 * gbincore() can return invalid buffers now so we
1234			 * have to check that bit as well (though B_DELWRI
1235			 * should not be set if B_INVAL is set there could be
1236			 * a race here since we haven't locked the buffer).
1237			 */
1238			if ((bp = gbincore(&vp->v_bufobj, lblkno)) != NULL) {
1239				if (BUF_LOCK(bp, LK_EXCLUSIVE | LK_SLEEPFAIL |
1240				    LK_INTERLOCK, BO_MTX(bo)) == ENOLCK) {
1241					BO_LOCK(bo);
1242					continue; /* retry */
1243				}
1244			    	if ((bp->b_flags & (B_DELWRI|B_INVAL)) ==
1245				    B_DELWRI) {
1246					bremfree(bp);
1247					bp->b_flags &= ~B_ASYNC;
1248					bwrite(bp);
1249					++nfs_commit_miss;
1250				} else
1251					BUF_UNLOCK(bp);
1252				BO_LOCK(bo);
1253			}
1254			++nfs_commit_blks;
1255			if (cnt < iosize)
1256				break;
1257			cnt -= iosize;
1258			++lblkno;
1259		}
1260		BO_UNLOCK(bo);
1261	}
1262	return (error);
1263}
1264
1265/*
1266 * Statfs vnode op.
1267 */
1268int
1269nfsvno_statfs(struct vnode *vp, struct statfs *sf)
1270{
1271
1272	return (VFS_STATFS(vp->v_mount, sf));
1273}
1274
1275/*
1276 * Do the vnode op stuff for Open. Similar to nfsvno_createsub(), but
1277 * must handle nfsrv_opencheck() calls after any other access checks.
1278 */
1279void
1280nfsvno_open(struct nfsrv_descript *nd, struct nameidata *ndp,
1281    nfsquad_t clientid, nfsv4stateid_t *stateidp, struct nfsstate *stp,
1282    int *exclusive_flagp, struct nfsvattr *nvap, int32_t *cverf, int create,
1283    NFSACL_T *aclp, nfsattrbit_t *attrbitp, struct ucred *cred, struct thread *p,
1284    struct nfsexstuff *exp, struct vnode **vpp)
1285{
1286	struct vnode *vp = NULL;
1287	u_quad_t tempsize;
1288	struct nfsexstuff nes;
1289
1290	if (ndp->ni_vp == NULL)
1291		nd->nd_repstat = nfsrv_opencheck(clientid,
1292		    stateidp, stp, NULL, nd, p, nd->nd_repstat);
1293	if (!nd->nd_repstat) {
1294		if (ndp->ni_vp == NULL) {
1295			vrele(ndp->ni_startdir);
1296			nd->nd_repstat = VOP_CREATE(ndp->ni_dvp,
1297			    &ndp->ni_vp, &ndp->ni_cnd, &nvap->na_vattr);
1298			vput(ndp->ni_dvp);
1299			nfsvno_relpathbuf(ndp);
1300			if (!nd->nd_repstat) {
1301				if (*exclusive_flagp) {
1302					*exclusive_flagp = 0;
1303					NFSVNO_ATTRINIT(nvap);
1304					nvap->na_atime.tv_sec = cverf[0];
1305					nvap->na_atime.tv_nsec = cverf[1];
1306					nd->nd_repstat = VOP_SETATTR(ndp->ni_vp,
1307					    &nvap->na_vattr, cred);
1308				} else {
1309					nfsrv_fixattr(nd, ndp->ni_vp, nvap,
1310					    aclp, p, attrbitp, exp);
1311				}
1312			}
1313			vp = ndp->ni_vp;
1314		} else {
1315			if (ndp->ni_startdir)
1316				vrele(ndp->ni_startdir);
1317			nfsvno_relpathbuf(ndp);
1318			vp = ndp->ni_vp;
1319			if (create == NFSV4OPEN_CREATE) {
1320				if (ndp->ni_dvp == vp)
1321					vrele(ndp->ni_dvp);
1322				else
1323					vput(ndp->ni_dvp);
1324			}
1325			if (NFSVNO_ISSETSIZE(nvap) && vp->v_type == VREG) {
1326				if (ndp->ni_cnd.cn_flags & RDONLY)
1327					NFSVNO_SETEXRDONLY(&nes);
1328				else
1329					NFSVNO_EXINIT(&nes);
1330				nd->nd_repstat = nfsvno_accchk(vp,
1331				    VWRITE, cred, &nes, p,
1332				    NFSACCCHK_NOOVERRIDE,
1333				    NFSACCCHK_VPISLOCKED, NULL);
1334				nd->nd_repstat = nfsrv_opencheck(clientid,
1335				    stateidp, stp, vp, nd, p, nd->nd_repstat);
1336				if (!nd->nd_repstat) {
1337					tempsize = nvap->na_size;
1338					NFSVNO_ATTRINIT(nvap);
1339					nvap->na_size = tempsize;
1340					nd->nd_repstat = VOP_SETATTR(vp,
1341					    &nvap->na_vattr, cred);
1342				}
1343			} else if (vp->v_type == VREG) {
1344				nd->nd_repstat = nfsrv_opencheck(clientid,
1345				    stateidp, stp, vp, nd, p, nd->nd_repstat);
1346			}
1347		}
1348	} else {
1349		if (ndp->ni_cnd.cn_flags & HASBUF)
1350			nfsvno_relpathbuf(ndp);
1351		if (ndp->ni_startdir && create == NFSV4OPEN_CREATE) {
1352			vrele(ndp->ni_startdir);
1353			if (ndp->ni_dvp == ndp->ni_vp)
1354				vrele(ndp->ni_dvp);
1355			else
1356				vput(ndp->ni_dvp);
1357			if (ndp->ni_vp)
1358				vput(ndp->ni_vp);
1359		}
1360	}
1361	*vpp = vp;
1362}
1363
1364/*
1365 * Updates the file rev and sets the mtime and ctime
1366 * to the current clock time, returning the va_filerev and va_Xtime
1367 * values.
1368 */
1369void
1370nfsvno_updfilerev(struct vnode *vp, struct nfsvattr *nvap,
1371    struct ucred *cred, struct thread *p)
1372{
1373	struct vattr va;
1374
1375	VATTR_NULL(&va);
1376	getnanotime(&va.va_mtime);
1377	(void) VOP_SETATTR(vp, &va, cred);
1378	(void) nfsvno_getattr(vp, nvap, cred, p);
1379}
1380
1381/*
1382 * Glue routine to nfsv4_fillattr().
1383 */
1384int
1385nfsvno_fillattr(struct nfsrv_descript *nd, struct vnode *vp,
1386    struct nfsvattr *nvap, fhandle_t *fhp, int rderror, nfsattrbit_t *attrbitp,
1387    struct ucred *cred, struct thread *p, int isdgram, int reterr)
1388{
1389	int error;
1390
1391	error = nfsv4_fillattr(nd, vp, NULL, &nvap->na_vattr, fhp, rderror,
1392	    attrbitp, cred, p, isdgram, reterr);
1393	return (error);
1394}
1395
1396/* Since the Readdir vnode ops vary, put the entire functions in here. */
1397/*
1398 * nfs readdir service
1399 * - mallocs what it thinks is enough to read
1400 *	count rounded up to a multiple of DIRBLKSIZ <= NFS_MAXREADDIR
1401 * - calls VOP_READDIR()
1402 * - loops around building the reply
1403 *	if the output generated exceeds count break out of loop
1404 *	The NFSM_CLGET macro is used here so that the reply will be packed
1405 *	tightly in mbuf clusters.
1406 * - it trims out records with d_fileno == 0
1407 *	this doesn't matter for Unix clients, but they might confuse clients
1408 *	for other os'.
1409 * - it trims out records with d_type == DT_WHT
1410 *	these cannot be seen through NFS (unless we extend the protocol)
1411 *     The alternate call nfsrvd_readdirplus() does lookups as well.
1412 * PS: The NFS protocol spec. does not clarify what the "count" byte
1413 *	argument is a count of.. just name strings and file id's or the
1414 *	entire reply rpc or ...
1415 *	I tried just file name and id sizes and it confused the Sun client,
1416 *	so I am using the full rpc size now. The "paranoia.." comment refers
1417 *	to including the status longwords that are not a part of the dir.
1418 *	"entry" structures, but are in the rpc.
1419 */
1420int
1421nfsrvd_readdir(struct nfsrv_descript *nd, int isdgram,
1422    struct vnode *vp, struct thread *p, struct nfsexstuff *exp)
1423{
1424	struct dirent *dp;
1425	u_int32_t *tl;
1426	int dirlen;
1427	char *cpos, *cend, *rbuf;
1428	struct nfsvattr at;
1429	int nlen, error = 0, getret = 1;
1430	int siz, cnt, fullsiz, eofflag, ncookies;
1431	u_int64_t off, toff, verf;
1432	u_long *cookies = NULL, *cookiep;
1433	struct uio io;
1434	struct iovec iv;
1435
1436	if (nd->nd_repstat) {
1437		nfsrv_postopattr(nd, getret, &at);
1438		return (0);
1439	}
1440	if (nd->nd_flag & ND_NFSV2) {
1441		NFSM_DISSECT(tl, u_int32_t *, 2 * NFSX_UNSIGNED);
1442		off = fxdr_unsigned(u_quad_t, *tl++);
1443	} else {
1444		NFSM_DISSECT(tl, u_int32_t *, 5 * NFSX_UNSIGNED);
1445		off = fxdr_hyper(tl);
1446		tl += 2;
1447		verf = fxdr_hyper(tl);
1448		tl += 2;
1449	}
1450	toff = off;
1451	cnt = fxdr_unsigned(int, *tl);
1452	if (cnt > NFS_SRVMAXDATA(nd) || cnt < 0)
1453		cnt = NFS_SRVMAXDATA(nd);
1454	siz = ((cnt + DIRBLKSIZ - 1) & ~(DIRBLKSIZ - 1));
1455	fullsiz = siz;
1456	if (nd->nd_flag & ND_NFSV3) {
1457		nd->nd_repstat = getret = nfsvno_getattr(vp, &at, nd->nd_cred,
1458		    p);
1459#if 0
1460		/*
1461		 * va_filerev is not sufficient as a cookie verifier,
1462		 * since it is not supposed to change when entries are
1463		 * removed/added unless that offset cookies returned to
1464		 * the client are no longer valid.
1465		 */
1466		if (!nd->nd_repstat && toff && verf != at.na_filerev)
1467			nd->nd_repstat = NFSERR_BAD_COOKIE;
1468#endif
1469	}
1470	if (nd->nd_repstat == 0 && cnt == 0) {
1471		if (nd->nd_flag & ND_NFSV2)
1472			/* NFSv2 does not have NFSERR_TOOSMALL */
1473			nd->nd_repstat = EPERM;
1474		else
1475			nd->nd_repstat = NFSERR_TOOSMALL;
1476	}
1477	if (!nd->nd_repstat)
1478		nd->nd_repstat = nfsvno_accchk(vp, VEXEC,
1479		    nd->nd_cred, exp, p, NFSACCCHK_NOOVERRIDE,
1480		    NFSACCCHK_VPISLOCKED, NULL);
1481	if (nd->nd_repstat) {
1482		vput(vp);
1483		if (nd->nd_flag & ND_NFSV3)
1484			nfsrv_postopattr(nd, getret, &at);
1485		return (0);
1486	}
1487	NFSVOPUNLOCK(vp, 0, p);
1488	MALLOC(rbuf, caddr_t, siz, M_TEMP, M_WAITOK);
1489again:
1490	eofflag = 0;
1491	if (cookies) {
1492		free((caddr_t)cookies, M_TEMP);
1493		cookies = NULL;
1494	}
1495
1496	iv.iov_base = rbuf;
1497	iv.iov_len = siz;
1498	io.uio_iov = &iv;
1499	io.uio_iovcnt = 1;
1500	io.uio_offset = (off_t)off;
1501	io.uio_resid = siz;
1502	io.uio_segflg = UIO_SYSSPACE;
1503	io.uio_rw = UIO_READ;
1504	io.uio_td = NULL;
1505	NFSVOPLOCK(vp, LK_EXCLUSIVE | LK_RETRY, p);
1506	nd->nd_repstat = VOP_READDIR(vp, &io, nd->nd_cred, &eofflag, &ncookies,
1507	    &cookies);
1508	NFSVOPUNLOCK(vp, 0, p);
1509	off = (u_int64_t)io.uio_offset;
1510	if (io.uio_resid)
1511		siz -= io.uio_resid;
1512
1513	if (!cookies && !nd->nd_repstat)
1514		nd->nd_repstat = NFSERR_PERM;
1515	if (nd->nd_flag & ND_NFSV3) {
1516		getret = nfsvno_getattr(vp, &at, nd->nd_cred, p);
1517		if (!nd->nd_repstat)
1518			nd->nd_repstat = getret;
1519	}
1520
1521	/*
1522	 * Handles the failed cases. nd->nd_repstat == 0 past here.
1523	 */
1524	if (nd->nd_repstat) {
1525		vrele(vp);
1526		free((caddr_t)rbuf, M_TEMP);
1527		if (cookies)
1528			free((caddr_t)cookies, M_TEMP);
1529		if (nd->nd_flag & ND_NFSV3)
1530			nfsrv_postopattr(nd, getret, &at);
1531		return (0);
1532	}
1533	/*
1534	 * If nothing read, return eof
1535	 * rpc reply
1536	 */
1537	if (siz == 0) {
1538		vrele(vp);
1539		if (nd->nd_flag & ND_NFSV2) {
1540			NFSM_BUILD(tl, u_int32_t *, 2 * NFSX_UNSIGNED);
1541		} else {
1542			nfsrv_postopattr(nd, getret, &at);
1543			NFSM_BUILD(tl, u_int32_t *, 4 * NFSX_UNSIGNED);
1544			txdr_hyper(at.na_filerev, tl);
1545			tl += 2;
1546		}
1547		*tl++ = newnfs_false;
1548		*tl = newnfs_true;
1549		FREE((caddr_t)rbuf, M_TEMP);
1550		FREE((caddr_t)cookies, M_TEMP);
1551		return (0);
1552	}
1553
1554	/*
1555	 * Check for degenerate cases of nothing useful read.
1556	 * If so go try again
1557	 */
1558	cpos = rbuf;
1559	cend = rbuf + siz;
1560	dp = (struct dirent *)cpos;
1561	cookiep = cookies;
1562
1563	/*
1564	 * For some reason FreeBSD's ufs_readdir() chooses to back the
1565	 * directory offset up to a block boundary, so it is necessary to
1566	 * skip over the records that precede the requested offset. This
1567	 * requires the assumption that file offset cookies monotonically
1568	 * increase.
1569	 */
1570	while (cpos < cend && ncookies > 0 &&
1571	    (dp->d_fileno == 0 || dp->d_type == DT_WHT ||
1572	     ((u_quad_t)(*cookiep)) <= toff)) {
1573		cpos += dp->d_reclen;
1574		dp = (struct dirent *)cpos;
1575		cookiep++;
1576		ncookies--;
1577	}
1578	if (cpos >= cend || ncookies == 0) {
1579		siz = fullsiz;
1580		toff = off;
1581		goto again;
1582	}
1583
1584	/*
1585	 * dirlen is the size of the reply, including all XDR and must
1586	 * not exceed cnt. For NFSv2, RFC1094 didn't clearly indicate
1587	 * if the XDR should be included in "count", but to be safe, we do.
1588	 * (Include the two booleans at the end of the reply in dirlen now.)
1589	 */
1590	if (nd->nd_flag & ND_NFSV3) {
1591		nfsrv_postopattr(nd, getret, &at);
1592		NFSM_BUILD(tl, u_int32_t *, 2 * NFSX_UNSIGNED);
1593		txdr_hyper(at.na_filerev, tl);
1594		dirlen = NFSX_V3POSTOPATTR + NFSX_VERF + 2 * NFSX_UNSIGNED;
1595	} else {
1596		dirlen = 2 * NFSX_UNSIGNED;
1597	}
1598
1599	/* Loop through the records and build reply */
1600	while (cpos < cend && ncookies > 0) {
1601		nlen = dp->d_namlen;
1602		if (dp->d_fileno != 0 && dp->d_type != DT_WHT &&
1603			nlen <= NFS_MAXNAMLEN) {
1604			if (nd->nd_flag & ND_NFSV3)
1605				dirlen += (6*NFSX_UNSIGNED + NFSM_RNDUP(nlen));
1606			else
1607				dirlen += (4*NFSX_UNSIGNED + NFSM_RNDUP(nlen));
1608			if (dirlen > cnt) {
1609				eofflag = 0;
1610				break;
1611			}
1612
1613			/*
1614			 * Build the directory record xdr from
1615			 * the dirent entry.
1616			 */
1617			if (nd->nd_flag & ND_NFSV3) {
1618				NFSM_BUILD(tl, u_int32_t *, 3 * NFSX_UNSIGNED);
1619				*tl++ = newnfs_true;
1620				*tl++ = 0;
1621			} else {
1622				NFSM_BUILD(tl, u_int32_t *, 2 * NFSX_UNSIGNED);
1623				*tl++ = newnfs_true;
1624			}
1625			*tl = txdr_unsigned(dp->d_fileno);
1626			(void) nfsm_strtom(nd, dp->d_name, nlen);
1627			if (nd->nd_flag & ND_NFSV3) {
1628				NFSM_BUILD(tl, u_int32_t *, 2 * NFSX_UNSIGNED);
1629				*tl++ = 0;
1630			} else
1631				NFSM_BUILD(tl, u_int32_t *, NFSX_UNSIGNED);
1632			*tl = txdr_unsigned(*cookiep);
1633		}
1634		cpos += dp->d_reclen;
1635		dp = (struct dirent *)cpos;
1636		cookiep++;
1637		ncookies--;
1638	}
1639	if (cpos < cend)
1640		eofflag = 0;
1641	vrele(vp);
1642	NFSM_BUILD(tl, u_int32_t *, 2 * NFSX_UNSIGNED);
1643	*tl++ = newnfs_false;
1644	if (eofflag)
1645		*tl = newnfs_true;
1646	else
1647		*tl = newnfs_false;
1648	FREE((caddr_t)rbuf, M_TEMP);
1649	FREE((caddr_t)cookies, M_TEMP);
1650	return (0);
1651nfsmout:
1652	vput(vp);
1653	return (error);
1654}
1655
1656/*
1657 * Readdirplus for V3 and Readdir for V4.
1658 */
1659int
1660nfsrvd_readdirplus(struct nfsrv_descript *nd, int isdgram,
1661    struct vnode *vp, struct thread *p, struct nfsexstuff *exp)
1662{
1663	struct dirent *dp;
1664	u_int32_t *tl;
1665	int dirlen;
1666	char *cpos, *cend, *rbuf;
1667	struct vnode *nvp;
1668	fhandle_t nfh;
1669	struct nfsvattr nva, at, *nvap = &nva;
1670	struct mbuf *mb0, *mb1;
1671	struct nfsreferral *refp;
1672	int nlen, r, error = 0, getret = 1, usevget = 1;
1673	int siz, cnt, fullsiz, eofflag, ncookies, entrycnt;
1674	caddr_t bpos0, bpos1;
1675	u_int64_t off, toff, verf;
1676	u_long *cookies = NULL, *cookiep;
1677	nfsattrbit_t attrbits, rderrbits, savbits;
1678	struct uio io;
1679	struct iovec iv;
1680	struct componentname cn;
1681
1682	if (nd->nd_repstat) {
1683		nfsrv_postopattr(nd, getret, &at);
1684		return (0);
1685	}
1686	NFSM_DISSECT(tl, u_int32_t *, 6 * NFSX_UNSIGNED);
1687	off = fxdr_hyper(tl);
1688	toff = off;
1689	tl += 2;
1690	verf = fxdr_hyper(tl);
1691	tl += 2;
1692	siz = fxdr_unsigned(int, *tl++);
1693	cnt = fxdr_unsigned(int, *tl);
1694
1695	/*
1696	 * Use the server's maximum data transfer size as the upper bound
1697	 * on reply datalen.
1698	 */
1699	if (cnt > NFS_SRVMAXDATA(nd) || cnt < 0)
1700		cnt = NFS_SRVMAXDATA(nd);
1701
1702	/*
1703	 * siz is a "hint" of how much directory information (name, fileid,
1704	 * cookie) should be in the reply. At least one client "hints" 0,
1705	 * so I set it to cnt for that case. I also round it up to the
1706	 * next multiple of DIRBLKSIZ.
1707	 */
1708	if (siz <= 0)
1709		siz = cnt;
1710	siz = ((siz + DIRBLKSIZ - 1) & ~(DIRBLKSIZ - 1));
1711
1712	if (nd->nd_flag & ND_NFSV4) {
1713		error = nfsrv_getattrbits(nd, &attrbits, NULL, NULL);
1714		if (error)
1715			goto nfsmout;
1716		NFSSET_ATTRBIT(&savbits, &attrbits);
1717		NFSCLRNOTFILLABLE_ATTRBIT(&attrbits);
1718		NFSZERO_ATTRBIT(&rderrbits);
1719		NFSSETBIT_ATTRBIT(&rderrbits, NFSATTRBIT_RDATTRERROR);
1720	} else {
1721		NFSZERO_ATTRBIT(&attrbits);
1722	}
1723	fullsiz = siz;
1724	nd->nd_repstat = getret = nfsvno_getattr(vp, &at, nd->nd_cred, p);
1725	if (!nd->nd_repstat) {
1726	    if (off && verf != at.na_filerev) {
1727		/*
1728		 * va_filerev is not sufficient as a cookie verifier,
1729		 * since it is not supposed to change when entries are
1730		 * removed/added unless that offset cookies returned to
1731		 * the client are no longer valid.
1732		 */
1733#if 0
1734		if (nd->nd_flag & ND_NFSV4) {
1735			nd->nd_repstat = NFSERR_NOTSAME;
1736		} else {
1737			nd->nd_repstat = NFSERR_BAD_COOKIE;
1738		}
1739#endif
1740	    } else if ((nd->nd_flag & ND_NFSV4) && off == 0 && verf != 0) {
1741		nd->nd_repstat = NFSERR_BAD_COOKIE;
1742	    }
1743	}
1744	if (!nd->nd_repstat && vp->v_type != VDIR)
1745		nd->nd_repstat = NFSERR_NOTDIR;
1746	if (!nd->nd_repstat && cnt == 0)
1747		nd->nd_repstat = NFSERR_TOOSMALL;
1748	if (!nd->nd_repstat)
1749		nd->nd_repstat = nfsvno_accchk(vp, VEXEC,
1750		    nd->nd_cred, exp, p, NFSACCCHK_NOOVERRIDE,
1751		    NFSACCCHK_VPISLOCKED, NULL);
1752	if (nd->nd_repstat) {
1753		vput(vp);
1754		if (nd->nd_flag & ND_NFSV3)
1755			nfsrv_postopattr(nd, getret, &at);
1756		return (0);
1757	}
1758
1759	MALLOC(rbuf, caddr_t, siz, M_TEMP, M_WAITOK);
1760again:
1761	eofflag = 0;
1762	if (cookies) {
1763		free((caddr_t)cookies, M_TEMP);
1764		cookies = NULL;
1765	}
1766
1767	iv.iov_base = rbuf;
1768	iv.iov_len = siz;
1769	io.uio_iov = &iv;
1770	io.uio_iovcnt = 1;
1771	io.uio_offset = (off_t)off;
1772	io.uio_resid = siz;
1773	io.uio_segflg = UIO_SYSSPACE;
1774	io.uio_rw = UIO_READ;
1775	io.uio_td = NULL;
1776	nd->nd_repstat = VOP_READDIR(vp, &io, nd->nd_cred, &eofflag, &ncookies,
1777	    &cookies);
1778	off = (u_int64_t)io.uio_offset;
1779	if (io.uio_resid)
1780		siz -= io.uio_resid;
1781
1782	getret = nfsvno_getattr(vp, &at, nd->nd_cred, p);
1783
1784	if (!cookies && !nd->nd_repstat)
1785		nd->nd_repstat = NFSERR_PERM;
1786	if (!nd->nd_repstat)
1787		nd->nd_repstat = getret;
1788	if (nd->nd_repstat) {
1789		vput(vp);
1790		if (cookies)
1791			free((caddr_t)cookies, M_TEMP);
1792		free((caddr_t)rbuf, M_TEMP);
1793		if (nd->nd_flag & ND_NFSV3)
1794			nfsrv_postopattr(nd, getret, &at);
1795		return (0);
1796	}
1797	/*
1798	 * If nothing read, return eof
1799	 * rpc reply
1800	 */
1801	if (siz == 0) {
1802		vput(vp);
1803		if (nd->nd_flag & ND_NFSV3)
1804			nfsrv_postopattr(nd, getret, &at);
1805		NFSM_BUILD(tl, u_int32_t *, 4 * NFSX_UNSIGNED);
1806		txdr_hyper(at.na_filerev, tl);
1807		tl += 2;
1808		*tl++ = newnfs_false;
1809		*tl = newnfs_true;
1810		free((caddr_t)cookies, M_TEMP);
1811		free((caddr_t)rbuf, M_TEMP);
1812		return (0);
1813	}
1814
1815	/*
1816	 * Check for degenerate cases of nothing useful read.
1817	 * If so go try again
1818	 */
1819	cpos = rbuf;
1820	cend = rbuf + siz;
1821	dp = (struct dirent *)cpos;
1822	cookiep = cookies;
1823
1824	/*
1825	 * For some reason FreeBSD's ufs_readdir() chooses to back the
1826	 * directory offset up to a block boundary, so it is necessary to
1827	 * skip over the records that precede the requested offset. This
1828	 * requires the assumption that file offset cookies monotonically
1829	 * increase.
1830	 */
1831	while (cpos < cend && ncookies > 0 &&
1832	  (dp->d_fileno == 0 || dp->d_type == DT_WHT ||
1833	   ((u_quad_t)(*cookiep)) <= toff ||
1834	   ((nd->nd_flag & ND_NFSV4) &&
1835	    ((dp->d_namlen == 1 && dp->d_name[0] == '.') ||
1836	     (dp->d_namlen==2 && dp->d_name[0]=='.' && dp->d_name[1]=='.'))))) {
1837		cpos += dp->d_reclen;
1838		dp = (struct dirent *)cpos;
1839		cookiep++;
1840		ncookies--;
1841	}
1842	if (cpos >= cend || ncookies == 0) {
1843		siz = fullsiz;
1844		toff = off;
1845		goto again;
1846	}
1847	NFSVOPUNLOCK(vp, 0, p);
1848
1849	/*
1850	 * Save this position, in case there is an error before one entry
1851	 * is created.
1852	 */
1853	mb0 = nd->nd_mb;
1854	bpos0 = nd->nd_bpos;
1855
1856	/*
1857	 * Fill in the first part of the reply.
1858	 * dirlen is the reply length in bytes and cannot exceed cnt.
1859	 * (Include the two booleans at the end of the reply in dirlen now,
1860	 *  so we recognize when we have exceeded cnt.)
1861	 */
1862	if (nd->nd_flag & ND_NFSV3) {
1863		dirlen = NFSX_V3POSTOPATTR + NFSX_VERF + 2 * NFSX_UNSIGNED;
1864		nfsrv_postopattr(nd, getret, &at);
1865	} else {
1866		dirlen = NFSX_VERF + 2 * NFSX_UNSIGNED;
1867	}
1868	NFSM_BUILD(tl, u_int32_t *, NFSX_VERF);
1869	txdr_hyper(at.na_filerev, tl);
1870
1871	/*
1872	 * Save this position, in case there is an empty reply needed.
1873	 */
1874	mb1 = nd->nd_mb;
1875	bpos1 = nd->nd_bpos;
1876
1877	/* Loop through the records and build reply */
1878	entrycnt = 0;
1879	while (cpos < cend && ncookies > 0 && dirlen < cnt) {
1880		nlen = dp->d_namlen;
1881		if (dp->d_fileno != 0 && dp->d_type != DT_WHT &&
1882		    nlen <= NFS_MAXNAMLEN &&
1883		    ((nd->nd_flag & ND_NFSV3) || nlen > 2 ||
1884		     (nlen==2 && (dp->d_name[0]!='.' || dp->d_name[1]!='.'))
1885		      || (nlen == 1 && dp->d_name[0] != '.'))) {
1886			/*
1887			 * Save the current position in the reply, in case
1888			 * this entry exceeds cnt.
1889			 */
1890			mb1 = nd->nd_mb;
1891			bpos1 = nd->nd_bpos;
1892
1893			/*
1894			 * For readdir_and_lookup get the vnode using
1895			 * the file number.
1896			 */
1897			nvp = NULL;
1898			refp = NULL;
1899			r = 0;
1900			if ((nd->nd_flag & ND_NFSV3) ||
1901			    NFSNONZERO_ATTRBIT(&savbits)) {
1902				if (nd->nd_flag & ND_NFSV4)
1903					refp = nfsv4root_getreferral(NULL,
1904					    vp, dp->d_fileno);
1905				if (refp == NULL) {
1906					if (usevget)
1907						r = VFS_VGET(vp->v_mount,
1908						    dp->d_fileno, LK_EXCLUSIVE,
1909						    &nvp);
1910					else
1911						r = EOPNOTSUPP;
1912					if (r == EOPNOTSUPP) {
1913						if (usevget) {
1914							usevget = 0;
1915							cn.cn_nameiop = LOOKUP;
1916							cn.cn_lkflags =
1917							    LK_EXCLUSIVE |
1918							    LK_RETRY;
1919							cn.cn_cred =
1920							    nd->nd_cred;
1921							cn.cn_thread = p;
1922						}
1923						cn.cn_nameptr = dp->d_name;
1924						cn.cn_namelen = nlen;
1925						cn.cn_flags = ISLASTCN |
1926						    NOFOLLOW | LOCKLEAF |
1927						    MPSAFE;
1928						if (nlen == 2 &&
1929						    dp->d_name[0] == '.' &&
1930						    dp->d_name[1] == '.')
1931							cn.cn_flags |=
1932							    ISDOTDOT;
1933						if (!VOP_ISLOCKED(vp))
1934							vn_lock(vp,
1935							    LK_EXCLUSIVE |
1936							    LK_RETRY);
1937						if ((vp->v_vflag & VV_ROOT) != 0
1938						    && (cn.cn_flags & ISDOTDOT)
1939						    != 0) {
1940							vref(vp);
1941							nvp = vp;
1942							r = 0;
1943						} else
1944							r = VOP_LOOKUP(vp, &nvp,
1945							    &cn);
1946					}
1947				}
1948				if (!r) {
1949				    if (refp == NULL &&
1950					((nd->nd_flag & ND_NFSV3) ||
1951					 NFSNONZERO_ATTRBIT(&attrbits))) {
1952					r = nfsvno_getfh(nvp, &nfh, p);
1953					if (!r)
1954					    r = nfsvno_getattr(nvp, nvap,
1955						nd->nd_cred, p);
1956				    }
1957				} else {
1958				    nvp = NULL;
1959				}
1960				if (r) {
1961					if (!NFSISSET_ATTRBIT(&attrbits,
1962					    NFSATTRBIT_RDATTRERROR)) {
1963						if (nvp != NULL)
1964							vput(nvp);
1965						nd->nd_repstat = r;
1966						break;
1967					}
1968				}
1969			}
1970
1971			/*
1972			 * Build the directory record xdr
1973			 */
1974			if (nd->nd_flag & ND_NFSV3) {
1975				NFSM_BUILD(tl, u_int32_t *, 3 * NFSX_UNSIGNED);
1976				*tl++ = newnfs_true;
1977				*tl++ = 0;
1978				*tl = txdr_unsigned(dp->d_fileno);
1979				dirlen += nfsm_strtom(nd, dp->d_name, nlen);
1980				NFSM_BUILD(tl, u_int32_t *, 2 * NFSX_UNSIGNED);
1981				*tl++ = 0;
1982				*tl = txdr_unsigned(*cookiep);
1983				nfsrv_postopattr(nd, 0, nvap);
1984				dirlen += nfsm_fhtom(nd,(u_int8_t *)&nfh,0,1);
1985				dirlen += (5*NFSX_UNSIGNED+NFSX_V3POSTOPATTR);
1986				if (nvp != NULL)
1987					vput(nvp);
1988			} else {
1989				NFSM_BUILD(tl, u_int32_t *, 3 * NFSX_UNSIGNED);
1990				*tl++ = newnfs_true;
1991				*tl++ = 0;
1992				*tl = txdr_unsigned(*cookiep);
1993				dirlen += nfsm_strtom(nd, dp->d_name, nlen);
1994				if (nvp != NULL)
1995					NFSVOPUNLOCK(nvp, 0, p);
1996				if (refp != NULL) {
1997					dirlen += nfsrv_putreferralattr(nd,
1998					    &savbits, refp, 0,
1999					    &nd->nd_repstat);
2000					if (nd->nd_repstat) {
2001						if (nvp != NULL)
2002							vrele(nvp);
2003						break;
2004					}
2005				} else if (r) {
2006					dirlen += nfsvno_fillattr(nd, nvp, nvap,
2007					    &nfh, r, &rderrbits, nd->nd_cred,
2008					    p, isdgram, 0);
2009				} else {
2010					dirlen += nfsvno_fillattr(nd, nvp, nvap,
2011					    &nfh, r, &attrbits, nd->nd_cred,
2012					    p, isdgram, 0);
2013				}
2014				if (nvp != NULL)
2015					vrele(nvp);
2016				dirlen += (3 * NFSX_UNSIGNED);
2017			}
2018			if (dirlen <= cnt)
2019				entrycnt++;
2020		}
2021		cpos += dp->d_reclen;
2022		dp = (struct dirent *)cpos;
2023		cookiep++;
2024		ncookies--;
2025	}
2026	if (!usevget && VOP_ISLOCKED(vp))
2027		vput(vp);
2028	else
2029		vrele(vp);
2030
2031	/*
2032	 * If dirlen > cnt, we must strip off the last entry. If that
2033	 * results in an empty reply, report NFSERR_TOOSMALL.
2034	 */
2035	if (dirlen > cnt || nd->nd_repstat) {
2036		if (!nd->nd_repstat && entrycnt == 0)
2037			nd->nd_repstat = NFSERR_TOOSMALL;
2038		if (nd->nd_repstat)
2039			newnfs_trimtrailing(nd, mb0, bpos0);
2040		else
2041			newnfs_trimtrailing(nd, mb1, bpos1);
2042		eofflag = 0;
2043	} else if (cpos < cend)
2044		eofflag = 0;
2045	if (!nd->nd_repstat) {
2046		NFSM_BUILD(tl, u_int32_t *, 2 * NFSX_UNSIGNED);
2047		*tl++ = newnfs_false;
2048		if (eofflag)
2049			*tl = newnfs_true;
2050		else
2051			*tl = newnfs_false;
2052	}
2053	FREE((caddr_t)cookies, M_TEMP);
2054	FREE((caddr_t)rbuf, M_TEMP);
2055	return (0);
2056nfsmout:
2057	vput(vp);
2058	return (error);
2059}
2060
2061/*
2062 * Get the settable attributes out of the mbuf list.
2063 * (Return 0 or EBADRPC)
2064 */
2065int
2066nfsrv_sattr(struct nfsrv_descript *nd, struct nfsvattr *nvap,
2067    nfsattrbit_t *attrbitp, NFSACL_T *aclp, struct thread *p)
2068{
2069	u_int32_t *tl;
2070	struct nfsv2_sattr *sp;
2071	struct timeval curtime;
2072	int error = 0, toclient = 0;
2073
2074	switch (nd->nd_flag & (ND_NFSV2 | ND_NFSV3 | ND_NFSV4)) {
2075	case ND_NFSV2:
2076		NFSM_DISSECT(sp, struct nfsv2_sattr *, NFSX_V2SATTR);
2077		/*
2078		 * Some old clients didn't fill in the high order 16bits.
2079		 * --> check the low order 2 bytes for 0xffff
2080		 */
2081		if ((fxdr_unsigned(int, sp->sa_mode) & 0xffff) != 0xffff)
2082			nvap->na_mode = nfstov_mode(sp->sa_mode);
2083		if (sp->sa_uid != newnfs_xdrneg1)
2084			nvap->na_uid = fxdr_unsigned(uid_t, sp->sa_uid);
2085		if (sp->sa_gid != newnfs_xdrneg1)
2086			nvap->na_gid = fxdr_unsigned(gid_t, sp->sa_gid);
2087		if (sp->sa_size != newnfs_xdrneg1)
2088			nvap->na_size = fxdr_unsigned(u_quad_t, sp->sa_size);
2089		if (sp->sa_atime.nfsv2_sec != newnfs_xdrneg1) {
2090#ifdef notyet
2091			fxdr_nfsv2time(&sp->sa_atime, &nvap->na_atime);
2092#else
2093			nvap->na_atime.tv_sec =
2094				fxdr_unsigned(u_int32_t,sp->sa_atime.nfsv2_sec);
2095			nvap->na_atime.tv_nsec = 0;
2096#endif
2097		}
2098		if (sp->sa_mtime.nfsv2_sec != newnfs_xdrneg1)
2099			fxdr_nfsv2time(&sp->sa_mtime, &nvap->na_mtime);
2100		break;
2101	case ND_NFSV3:
2102		NFSM_DISSECT(tl, u_int32_t *, NFSX_UNSIGNED);
2103		if (*tl == newnfs_true) {
2104			NFSM_DISSECT(tl, u_int32_t *, NFSX_UNSIGNED);
2105			nvap->na_mode = nfstov_mode(*tl);
2106		}
2107		NFSM_DISSECT(tl, u_int32_t *, NFSX_UNSIGNED);
2108		if (*tl == newnfs_true) {
2109			NFSM_DISSECT(tl, u_int32_t *, NFSX_UNSIGNED);
2110			nvap->na_uid = fxdr_unsigned(uid_t, *tl);
2111		}
2112		NFSM_DISSECT(tl, u_int32_t *, NFSX_UNSIGNED);
2113		if (*tl == newnfs_true) {
2114			NFSM_DISSECT(tl, u_int32_t *, NFSX_UNSIGNED);
2115			nvap->na_gid = fxdr_unsigned(gid_t, *tl);
2116		}
2117		NFSM_DISSECT(tl, u_int32_t *, NFSX_UNSIGNED);
2118		if (*tl == newnfs_true) {
2119			NFSM_DISSECT(tl, u_int32_t *, 2 * NFSX_UNSIGNED);
2120			nvap->na_size = fxdr_hyper(tl);
2121		}
2122		NFSM_DISSECT(tl, u_int32_t *, NFSX_UNSIGNED);
2123		switch (fxdr_unsigned(int, *tl)) {
2124		case NFSV3SATTRTIME_TOCLIENT:
2125			NFSM_DISSECT(tl, u_int32_t *, 2 * NFSX_UNSIGNED);
2126			fxdr_nfsv3time(tl, &nvap->na_atime);
2127			toclient = 1;
2128			break;
2129		case NFSV3SATTRTIME_TOSERVER:
2130			NFSGETTIME(&curtime);
2131			nvap->na_atime.tv_sec = curtime.tv_sec;
2132			nvap->na_atime.tv_nsec = curtime.tv_usec * 1000;
2133			nvap->na_vaflags |= VA_UTIMES_NULL;
2134			break;
2135		};
2136		NFSM_DISSECT(tl, u_int32_t *, NFSX_UNSIGNED);
2137		switch (fxdr_unsigned(int, *tl)) {
2138		case NFSV3SATTRTIME_TOCLIENT:
2139			NFSM_DISSECT(tl, u_int32_t *, 2 * NFSX_UNSIGNED);
2140			fxdr_nfsv3time(tl, &nvap->na_mtime);
2141			nvap->na_vaflags &= ~VA_UTIMES_NULL;
2142			break;
2143		case NFSV3SATTRTIME_TOSERVER:
2144			NFSGETTIME(&curtime);
2145			nvap->na_mtime.tv_sec = curtime.tv_sec;
2146			nvap->na_mtime.tv_nsec = curtime.tv_usec * 1000;
2147			if (!toclient)
2148				nvap->na_vaflags |= VA_UTIMES_NULL;
2149			break;
2150		};
2151		break;
2152	case ND_NFSV4:
2153		error = nfsv4_sattr(nd, nvap, attrbitp, aclp, p);
2154	};
2155nfsmout:
2156	return (error);
2157}
2158
2159/*
2160 * Handle the setable attributes for V4.
2161 * Returns NFSERR_BADXDR if it can't be parsed, 0 otherwise.
2162 */
2163int
2164nfsv4_sattr(struct nfsrv_descript *nd, struct nfsvattr *nvap,
2165    nfsattrbit_t *attrbitp, NFSACL_T *aclp, struct thread *p)
2166{
2167	u_int32_t *tl;
2168	int attrsum = 0;
2169	int i, j;
2170	int error, attrsize, bitpos, aclsize, aceerr, retnotsup = 0;
2171	int toclient = 0;
2172	u_char *cp, namestr[NFSV4_SMALLSTR + 1];
2173	uid_t uid;
2174	gid_t gid;
2175	struct timeval curtime;
2176
2177	error = nfsrv_getattrbits(nd, attrbitp, NULL, &retnotsup);
2178	if (error)
2179		return (error);
2180	NFSM_DISSECT(tl, u_int32_t *, NFSX_UNSIGNED);
2181	attrsize = fxdr_unsigned(int, *tl);
2182
2183	/*
2184	 * Loop around getting the setable attributes. If an unsupported
2185	 * one is found, set nd_repstat == NFSERR_ATTRNOTSUPP and return.
2186	 */
2187	if (retnotsup) {
2188		nd->nd_repstat = NFSERR_ATTRNOTSUPP;
2189		bitpos = NFSATTRBIT_MAX;
2190	} else {
2191		bitpos = 0;
2192	}
2193	for (; bitpos < NFSATTRBIT_MAX; bitpos++) {
2194	    if (attrsum > attrsize) {
2195		error = NFSERR_BADXDR;
2196		goto nfsmout;
2197	    }
2198	    if (NFSISSET_ATTRBIT(attrbitp, bitpos))
2199		switch (bitpos) {
2200		case NFSATTRBIT_SIZE:
2201			NFSM_DISSECT(tl, u_int32_t *, NFSX_HYPER);
2202			nvap->na_size = fxdr_hyper(tl);
2203			attrsum += NFSX_HYPER;
2204			break;
2205		case NFSATTRBIT_ACL:
2206			error = nfsrv_dissectacl(nd, aclp, &aceerr, &aclsize,
2207			    p);
2208			if (error)
2209				goto nfsmout;
2210			if (aceerr && !nd->nd_repstat)
2211				nd->nd_repstat = NFSERR_ATTRNOTSUPP;
2212			attrsum += aclsize;
2213			break;
2214		case NFSATTRBIT_ARCHIVE:
2215			NFSM_DISSECT(tl, u_int32_t *, NFSX_UNSIGNED);
2216			if (!nd->nd_repstat)
2217				nd->nd_repstat = NFSERR_ATTRNOTSUPP;
2218			attrsum += NFSX_UNSIGNED;
2219			break;
2220		case NFSATTRBIT_HIDDEN:
2221			NFSM_DISSECT(tl, u_int32_t *, NFSX_UNSIGNED);
2222			if (!nd->nd_repstat)
2223				nd->nd_repstat = NFSERR_ATTRNOTSUPP;
2224			attrsum += NFSX_UNSIGNED;
2225			break;
2226		case NFSATTRBIT_MIMETYPE:
2227			NFSM_DISSECT(tl, u_int32_t *, NFSX_UNSIGNED);
2228			i = fxdr_unsigned(int, *tl);
2229			error = nfsm_advance(nd, NFSM_RNDUP(i), -1);
2230			if (error)
2231				goto nfsmout;
2232			if (!nd->nd_repstat)
2233				nd->nd_repstat = NFSERR_ATTRNOTSUPP;
2234			attrsum += (NFSX_UNSIGNED + NFSM_RNDUP(i));
2235			break;
2236		case NFSATTRBIT_MODE:
2237			NFSM_DISSECT(tl, u_int32_t *, NFSX_UNSIGNED);
2238			nvap->na_mode = nfstov_mode(*tl);
2239			attrsum += NFSX_UNSIGNED;
2240			break;
2241		case NFSATTRBIT_OWNER:
2242			NFSM_DISSECT(tl, u_int32_t *, NFSX_UNSIGNED);
2243			j = fxdr_unsigned(int, *tl);
2244			if (j < 0)
2245				return (NFSERR_BADXDR);
2246			if (j > NFSV4_SMALLSTR)
2247				cp = malloc(j + 1, M_NFSSTRING, M_WAITOK);
2248			else
2249				cp = namestr;
2250			error = nfsrv_mtostr(nd, cp, j);
2251			if (error) {
2252				if (j > NFSV4_SMALLSTR)
2253					free(cp, M_NFSSTRING);
2254				return (error);
2255			}
2256			if (!nd->nd_repstat) {
2257				nd->nd_repstat = nfsv4_strtouid(cp,j,&uid,p);
2258				if (!nd->nd_repstat)
2259					nvap->na_uid = uid;
2260			}
2261			if (j > NFSV4_SMALLSTR)
2262				free(cp, M_NFSSTRING);
2263			attrsum += (NFSX_UNSIGNED + NFSM_RNDUP(j));
2264			break;
2265		case NFSATTRBIT_OWNERGROUP:
2266			NFSM_DISSECT(tl, u_int32_t *, NFSX_UNSIGNED);
2267			j = fxdr_unsigned(int, *tl);
2268			if (j < 0)
2269				return (NFSERR_BADXDR);
2270			if (j > NFSV4_SMALLSTR)
2271				cp = malloc(j + 1, M_NFSSTRING, M_WAITOK);
2272			else
2273				cp = namestr;
2274			error = nfsrv_mtostr(nd, cp, j);
2275			if (error) {
2276				if (j > NFSV4_SMALLSTR)
2277					free(cp, M_NFSSTRING);
2278				return (error);
2279			}
2280			if (!nd->nd_repstat) {
2281				nd->nd_repstat = nfsv4_strtogid(cp,j,&gid,p);
2282				if (!nd->nd_repstat)
2283					nvap->na_gid = gid;
2284			}
2285			if (j > NFSV4_SMALLSTR)
2286				free(cp, M_NFSSTRING);
2287			attrsum += (NFSX_UNSIGNED + NFSM_RNDUP(j));
2288			break;
2289		case NFSATTRBIT_SYSTEM:
2290			NFSM_DISSECT(tl, u_int32_t *, NFSX_UNSIGNED);
2291			if (!nd->nd_repstat)
2292				nd->nd_repstat = NFSERR_ATTRNOTSUPP;
2293			attrsum += NFSX_UNSIGNED;
2294			break;
2295		case NFSATTRBIT_TIMEACCESSSET:
2296			NFSM_DISSECT(tl, u_int32_t *, NFSX_UNSIGNED);
2297			attrsum += NFSX_UNSIGNED;
2298			if (fxdr_unsigned(int, *tl)==NFSV4SATTRTIME_TOCLIENT) {
2299			    NFSM_DISSECT(tl, u_int32_t *, NFSX_V4TIME);
2300			    fxdr_nfsv4time(tl, &nvap->na_atime);
2301			    toclient = 1;
2302			    attrsum += NFSX_V4TIME;
2303			} else {
2304			    NFSGETTIME(&curtime);
2305			    nvap->na_atime.tv_sec = curtime.tv_sec;
2306			    nvap->na_atime.tv_nsec = curtime.tv_usec * 1000;
2307			    nvap->na_vaflags |= VA_UTIMES_NULL;
2308			}
2309			break;
2310		case NFSATTRBIT_TIMEBACKUP:
2311			NFSM_DISSECT(tl, u_int32_t *, NFSX_V4TIME);
2312			if (!nd->nd_repstat)
2313				nd->nd_repstat = NFSERR_ATTRNOTSUPP;
2314			attrsum += NFSX_V4TIME;
2315			break;
2316		case NFSATTRBIT_TIMECREATE:
2317			NFSM_DISSECT(tl, u_int32_t *, NFSX_V4TIME);
2318			if (!nd->nd_repstat)
2319				nd->nd_repstat = NFSERR_ATTRNOTSUPP;
2320			attrsum += NFSX_V4TIME;
2321			break;
2322		case NFSATTRBIT_TIMEMODIFYSET:
2323			NFSM_DISSECT(tl, u_int32_t *, NFSX_UNSIGNED);
2324			attrsum += NFSX_UNSIGNED;
2325			if (fxdr_unsigned(int, *tl)==NFSV4SATTRTIME_TOCLIENT) {
2326			    NFSM_DISSECT(tl, u_int32_t *, NFSX_V4TIME);
2327			    fxdr_nfsv4time(tl, &nvap->na_mtime);
2328			    nvap->na_vaflags &= ~VA_UTIMES_NULL;
2329			    attrsum += NFSX_V4TIME;
2330			} else {
2331			    NFSGETTIME(&curtime);
2332			    nvap->na_mtime.tv_sec = curtime.tv_sec;
2333			    nvap->na_mtime.tv_nsec = curtime.tv_usec * 1000;
2334			    if (!toclient)
2335				nvap->na_vaflags |= VA_UTIMES_NULL;
2336			}
2337			break;
2338		default:
2339			nd->nd_repstat = NFSERR_ATTRNOTSUPP;
2340			/*
2341			 * set bitpos so we drop out of the loop.
2342			 */
2343			bitpos = NFSATTRBIT_MAX;
2344			break;
2345		};
2346	}
2347
2348	/*
2349	 * some clients pad the attrlist, so we need to skip over the
2350	 * padding.
2351	 */
2352	if (attrsum > attrsize) {
2353		error = NFSERR_BADXDR;
2354	} else {
2355		attrsize = NFSM_RNDUP(attrsize);
2356		if (attrsum < attrsize)
2357			error = nfsm_advance(nd, attrsize - attrsum, -1);
2358	}
2359nfsmout:
2360	return (error);
2361}
2362
2363/*
2364 * Check/setup export credentials.
2365 */
2366int
2367nfsd_excred(struct nfsrv_descript *nd, struct nfsexstuff *exp,
2368    struct ucred *credanon)
2369{
2370	int error = 0;
2371
2372	/*
2373	 * Check/setup credentials.
2374	 */
2375	if (nd->nd_flag & ND_GSS)
2376		exp->nes_exflag &= ~MNT_EXPORTANON;
2377
2378	/*
2379	 * Check to see if the operation is allowed for this security flavor.
2380	 * RFC2623 suggests that the NFSv3 Fsinfo RPC be allowed to
2381	 * AUTH_NONE or AUTH_SYS for file systems requiring RPCSEC_GSS.
2382	 * Also, allow Secinfo, so that it can acquire the correct flavor(s).
2383	 */
2384	if (nfsvno_testexp(nd, exp) &&
2385	    nd->nd_procnum != NFSV4OP_SECINFO &&
2386	    nd->nd_procnum != NFSPROC_FSINFO) {
2387		if (nd->nd_flag & ND_NFSV4)
2388			error = NFSERR_WRONGSEC;
2389		else
2390			error = (NFSERR_AUTHERR | AUTH_TOOWEAK);
2391		return (error);
2392	}
2393
2394	/*
2395	 * Check to see if the file system is exported V4 only.
2396	 */
2397	if (NFSVNO_EXV4ONLY(exp) && !(nd->nd_flag & ND_NFSV4))
2398		return (NFSERR_PROGNOTV4);
2399
2400	/*
2401	 * Now, map the user credentials.
2402	 * (Note that ND_AUTHNONE will only be set for an NFSv3
2403	 *  Fsinfo RPC. If set for anything else, this code might need
2404	 *  to change.)
2405	 */
2406	if (NFSVNO_EXPORTED(exp) &&
2407	    ((!(nd->nd_flag & ND_GSS) && nd->nd_cred->cr_uid == 0) ||
2408	     NFSVNO_EXPORTANON(exp) ||
2409	     (nd->nd_flag & ND_AUTHNONE))) {
2410		nd->nd_cred->cr_uid = credanon->cr_uid;
2411		nd->nd_cred->cr_gid = credanon->cr_gid;
2412		crsetgroups(nd->nd_cred, credanon->cr_ngroups,
2413		    credanon->cr_groups);
2414	}
2415	return (0);
2416}
2417
2418/*
2419 * Check exports.
2420 */
2421int
2422nfsvno_checkexp(struct mount *mp, struct sockaddr *nam, struct nfsexstuff *exp,
2423    struct ucred **credp)
2424{
2425	int i, error, *secflavors;
2426
2427	error = VFS_CHECKEXP(mp, nam, &exp->nes_exflag, credp,
2428	    &exp->nes_numsecflavor, &secflavors);
2429	if (error) {
2430		if (nfs_rootfhset) {
2431			exp->nes_exflag = 0;
2432			exp->nes_numsecflavor = 0;
2433			error = 0;
2434		}
2435	} else {
2436		/* Copy the security flavors. */
2437		for (i = 0; i < exp->nes_numsecflavor; i++)
2438			exp->nes_secflavors[i] = secflavors[i];
2439	}
2440	return (error);
2441}
2442
2443/*
2444 * Get a vnode for a file handle and export stuff.
2445 */
2446int
2447nfsvno_fhtovp(struct mount *mp, fhandle_t *fhp, struct sockaddr *nam,
2448    struct vnode **vpp, struct nfsexstuff *exp, struct ucred **credp)
2449{
2450	int i, error, *secflavors;
2451
2452	*credp = NULL;
2453	exp->nes_numsecflavor = 0;
2454	error = VFS_FHTOVP(mp, &fhp->fh_fid, vpp);
2455	if (error != 0)
2456		/* Make sure the server replies ESTALE to the client. */
2457		error = ESTALE;
2458	if (nam && !error) {
2459		error = VFS_CHECKEXP(mp, nam, &exp->nes_exflag, credp,
2460		    &exp->nes_numsecflavor, &secflavors);
2461		if (error) {
2462			if (nfs_rootfhset) {
2463				exp->nes_exflag = 0;
2464				exp->nes_numsecflavor = 0;
2465				error = 0;
2466			} else {
2467				vput(*vpp);
2468			}
2469		} else {
2470			/* Copy the security flavors. */
2471			for (i = 0; i < exp->nes_numsecflavor; i++)
2472				exp->nes_secflavors[i] = secflavors[i];
2473		}
2474	}
2475	return (error);
2476}
2477
2478/*
2479 * Do the pathconf vnode op.
2480 */
2481int
2482nfsvno_pathconf(struct vnode *vp, int flag, register_t *retf,
2483    struct ucred *cred, struct thread *p)
2484{
2485	int error;
2486
2487	error = VOP_PATHCONF(vp, flag, retf);
2488	return (error);
2489}
2490
2491/*
2492 * nfsd_fhtovp() - convert a fh to a vnode ptr
2493 * 	- look up fsid in mount list (if not found ret error)
2494 *	- get vp and export rights by calling nfsvno_fhtovp()
2495 *	- if cred->cr_uid == 0 or MNT_EXPORTANON set it to credanon
2496 *	  for AUTH_SYS
2497 * Also handle getting the Giant lock for the file system,
2498 * as required:
2499 * - if same mount point as *mpp
2500 *       do nothing
2501 *   else if *mpp == NULL
2502 *       if already locked
2503 *           leave it locked
2504 *       else
2505 *           call VFS_LOCK_GIANT()
2506 *   else
2507 *       if already locked
2508 *            unlock Giant
2509 *       call VFS_LOCK_GIANT()
2510 */
2511void
2512nfsd_fhtovp(struct nfsrv_descript *nd, struct nfsrvfh *nfp,
2513    struct vnode **vpp, struct nfsexstuff *exp,
2514    struct mount **mpp, int startwrite, struct thread *p)
2515{
2516	struct mount *mp;
2517	struct ucred *credanon;
2518	fhandle_t *fhp;
2519
2520	fhp = (fhandle_t *)nfp->nfsrvfh_data;
2521	/*
2522	 * Check for the special case of the nfsv4root_fh.
2523	 */
2524	mp = vfs_getvfs(&fhp->fh_fsid);
2525	if (!mp) {
2526		*vpp = NULL;
2527		nd->nd_repstat = ESTALE;
2528		if (*mpp && exp->nes_vfslocked)
2529			VFS_UNLOCK_GIANT(*mpp);
2530		*mpp = NULL;
2531		exp->nes_vfslocked = 0;
2532		return;
2533	}
2534
2535	/*
2536	 * Now, handle Giant for the file system.
2537	 */
2538	if (*mpp != NULL && *mpp != mp && exp->nes_vfslocked) {
2539		VFS_UNLOCK_GIANT(*mpp);
2540		exp->nes_vfslocked = 0;
2541	}
2542	if (!exp->nes_vfslocked && *mpp != mp)
2543		exp->nes_vfslocked = VFS_LOCK_GIANT(mp);
2544
2545	*mpp = mp;
2546	if (startwrite)
2547		vn_start_write(NULL, mpp, V_WAIT);
2548
2549	nd->nd_repstat = nfsvno_fhtovp(mp, fhp, nd->nd_nam, vpp, exp,
2550	    &credanon);
2551
2552	/*
2553	 * For NFSv4 without a pseudo root fs, unexported file handles
2554	 * can be returned, so that Lookup works everywhere.
2555	 */
2556	if (!nd->nd_repstat && exp->nes_exflag == 0 &&
2557	    !(nd->nd_flag & ND_NFSV4)) {
2558		vput(*vpp);
2559		nd->nd_repstat = EACCES;
2560	}
2561
2562	/*
2563	 * Personally, I've never seen any point in requiring a
2564	 * reserved port#, since only in the rare case where the
2565	 * clients are all boxes with secure system priviledges,
2566	 * does it provide any enhanced security, but... some people
2567	 * believe it to be useful and keep putting this code back in.
2568	 * (There is also some "security checker" out there that
2569	 *  complains if the nfs server doesn't enforce this.)
2570	 * However, note the following:
2571	 * RFC3530 (NFSv4) specifies that a reserved port# not be
2572	 *	required.
2573	 * RFC2623 recommends that, if a reserved port# is checked for,
2574	 *	that there be a way to turn that off--> ifdef'd.
2575	 */
2576#ifdef NFS_REQRSVPORT
2577	if (!nd->nd_repstat) {
2578		struct sockaddr_in *saddr;
2579		struct sockaddr_in6 *saddr6;
2580
2581		saddr = NFSSOCKADDR(nd->nd_nam, struct sockaddr_in *);
2582		saddr6 = NFSSOCKADDR(nd->nd_nam, struct sockaddr_in6 *);
2583		if (!(nd->nd_flag & ND_NFSV4) &&
2584		    ((saddr->sin_family == AF_INET &&
2585		      ntohs(saddr->sin_port) >= IPPORT_RESERVED) ||
2586		     (saddr6->sin6_family == AF_INET6 &&
2587		      ntohs(saddr6->sin6_port) >= IPPORT_RESERVED))) {
2588			vput(*vpp);
2589			nd->nd_repstat = (NFSERR_AUTHERR | AUTH_TOOWEAK);
2590		}
2591	}
2592#endif	/* NFS_REQRSVPORT */
2593
2594	/*
2595	 * Check/setup credentials.
2596	 */
2597	if (!nd->nd_repstat) {
2598		nd->nd_saveduid = nd->nd_cred->cr_uid;
2599		nd->nd_repstat = nfsd_excred(nd, exp, credanon);
2600		if (nd->nd_repstat)
2601			vput(*vpp);
2602	}
2603	if (credanon != NULL)
2604		crfree(credanon);
2605	if (nd->nd_repstat) {
2606		if (startwrite)
2607			vn_finished_write(mp);
2608		if (exp->nes_vfslocked) {
2609			VFS_UNLOCK_GIANT(mp);
2610			exp->nes_vfslocked = 0;
2611		}
2612		vfs_rel(mp);
2613		*vpp = NULL;
2614		*mpp = NULL;
2615	} else {
2616		vfs_rel(mp);
2617	}
2618}
2619
2620/*
2621 * glue for fp.
2622 */
2623int
2624fp_getfvp(struct thread *p, int fd, struct file **fpp, struct vnode **vpp)
2625{
2626	struct filedesc *fdp;
2627	struct file *fp;
2628
2629	fdp = p->td_proc->p_fd;
2630	if (fd >= fdp->fd_nfiles ||
2631	    (fp = fdp->fd_ofiles[fd]) == NULL)
2632		return (EBADF);
2633	*fpp = fp;
2634	return (0);
2635}
2636
2637/*
2638 * Called from nfssvc() to update the exports list. Just call
2639 * vfs_export(). This has to be done, since the v4 root fake fs isn't
2640 * in the mount list.
2641 */
2642int
2643nfsrv_v4rootexport(void *argp, struct ucred *cred, struct thread *p)
2644{
2645	struct nfsex_args *nfsexargp = (struct nfsex_args *)argp;
2646	int error;
2647	struct nameidata nd;
2648	fhandle_t fh;
2649
2650	error = vfs_export(&nfsv4root_mnt, &nfsexargp->export);
2651	if ((nfsexargp->export.ex_flags & MNT_DELEXPORT)) {
2652		nfs_rootfhset = 0;
2653		nfsv4root_set = 0;
2654	} else if (error == 0) {
2655		if (nfsexargp->fspec == NULL)
2656			return (EPERM);
2657		/*
2658		 * If fspec != NULL, this is the v4root path.
2659		 */
2660		NDINIT(&nd, LOOKUP, FOLLOW | MPSAFE, UIO_USERSPACE,
2661		    nfsexargp->fspec, p);
2662		if ((error = namei(&nd)) != 0)
2663			return (error);
2664		error = nfsvno_getfh(nd.ni_vp, &fh, p);
2665		vrele(nd.ni_vp);
2666		if (!error) {
2667			nfs_rootfh.nfsrvfh_len = NFSX_MYFH;
2668			NFSBCOPY((caddr_t)&fh,
2669			    nfs_rootfh.nfsrvfh_data,
2670			    sizeof (fhandle_t));
2671			nfs_rootfhset = 1;
2672		}
2673	}
2674	return (error);
2675}
2676
2677/*
2678 * Get the tcp socket sequence numbers we need.
2679 * (Maybe this should be moved to the tcp sources?)
2680 */
2681int
2682nfsrv_getsocksndseq(struct socket *so, tcp_seq *maxp, tcp_seq *unap)
2683{
2684	struct inpcb *inp;
2685	struct tcpcb *tp;
2686
2687	inp = sotoinpcb(so);
2688	KASSERT(inp != NULL, ("nfsrv_getsocksndseq: inp == NULL"));
2689	INP_RLOCK(inp);
2690	if (inp->inp_flags & (INP_TIMEWAIT | INP_DROPPED)) {
2691		INP_RUNLOCK(inp);
2692		return (EPIPE);
2693	}
2694	tp = intotcpcb(inp);
2695	if (tp->t_state != TCPS_ESTABLISHED) {
2696		INP_RUNLOCK(inp);
2697		return (EPIPE);
2698	}
2699	*maxp = tp->snd_max;
2700	*unap = tp->snd_una;
2701	INP_RUNLOCK(inp);
2702	return (0);
2703}
2704
2705/*
2706 * This function needs to test to see if the system is near its limit
2707 * for memory allocation via malloc() or mget() and return True iff
2708 * either of these resources are near their limit.
2709 * XXX (For now, this is just a stub.)
2710 */
2711int nfsrv_testmalloclimit = 0;
2712int
2713nfsrv_mallocmget_limit(void)
2714{
2715	static int printmesg = 0;
2716	static int testval = 1;
2717
2718	if (nfsrv_testmalloclimit && (testval++ % 1000) == 0) {
2719		if ((printmesg++ % 100) == 0)
2720			printf("nfsd: malloc/mget near limit\n");
2721		return (1);
2722	}
2723	return (0);
2724}
2725
2726/*
2727 * BSD specific initialization of a mount point.
2728 */
2729void
2730nfsd_mntinit(void)
2731{
2732	static int inited = 0;
2733
2734	if (inited)
2735		return;
2736	inited = 1;
2737	nfsv4root_mnt.mnt_flag = (MNT_RDONLY | MNT_EXPORTED);
2738	TAILQ_INIT(&nfsv4root_mnt.mnt_nvnodelist);
2739	nfsv4root_mnt.mnt_export = NULL;
2740	TAILQ_INIT(&nfsv4root_opt);
2741	TAILQ_INIT(&nfsv4root_newopt);
2742	nfsv4root_mnt.mnt_opt = &nfsv4root_opt;
2743	nfsv4root_mnt.mnt_optnew = &nfsv4root_newopt;
2744	nfsv4root_mnt.mnt_nvnodelistsize = 0;
2745}
2746
2747/*
2748 * Get a vnode for a file handle, without checking exports, etc.
2749 */
2750struct vnode *
2751nfsvno_getvp(fhandle_t *fhp)
2752{
2753	struct mount *mp;
2754	struct vnode *vp;
2755	int error;
2756
2757	mp = vfs_getvfs(&fhp->fh_fsid);
2758	if (mp == NULL)
2759		return (NULL);
2760	error = VFS_FHTOVP(mp, &fhp->fh_fid, &vp);
2761	if (error)
2762		return (NULL);
2763	return (vp);
2764}
2765
2766/*
2767 * Check to see it a byte range lock held by a process running
2768 * locally on the server conflicts with the new lock.
2769 */
2770int
2771nfsvno_localconflict(struct vnode *vp, int ftype, u_int64_t first,
2772    u_int64_t end, struct nfslockconflict *cfp, struct thread *td)
2773{
2774	int error;
2775	struct flock fl;
2776
2777	if (!nfsrv_dolocallocks)
2778		return (0);
2779	fl.l_whence = SEEK_SET;
2780	fl.l_type = ftype;
2781	fl.l_start = (off_t)first;
2782	if (end == NFS64BITSSET)
2783		fl.l_len = 0;
2784	else
2785		fl.l_len = (off_t)(end - first);
2786	/*
2787	 * For FreeBSD8, the l_pid and l_sysid must be set to the same
2788	 * values for all calls, so that all locks will be held by the
2789	 * nfsd server. (The nfsd server handles conflicts between the
2790	 * various clients.)
2791	 * Since an NFSv4 lockowner is a ClientID plus an array of up to 1024
2792	 * bytes, so it can't be put in l_sysid.
2793	 */
2794	if (nfsv4_sysid == 0)
2795		nfsv4_sysid = nlm_acquire_next_sysid();
2796	fl.l_pid = (pid_t)0;
2797	fl.l_sysid = (int)nfsv4_sysid;
2798
2799	NFSVOPUNLOCK(vp, 0, td);
2800	error = VOP_ADVLOCK(vp, (caddr_t)td->td_proc, F_GETLK, &fl,
2801	    (F_POSIX | F_REMOTE));
2802	NFSVOPLOCK(vp, LK_EXCLUSIVE | LK_RETRY, td);
2803	if (error)
2804		return (error);
2805	if (fl.l_type == F_UNLCK)
2806		return (0);
2807	if (cfp != NULL) {
2808		cfp->cl_clientid.lval[0] = cfp->cl_clientid.lval[1] = 0;
2809		cfp->cl_first = (u_int64_t)fl.l_start;
2810		if (fl.l_len == 0)
2811			cfp->cl_end = NFS64BITSSET;
2812		else
2813			cfp->cl_end = (u_int64_t)
2814			    (fl.l_start + fl.l_len);
2815		if (fl.l_type == F_WRLCK)
2816			cfp->cl_flags = NFSLCK_WRITE;
2817		else
2818			cfp->cl_flags = NFSLCK_READ;
2819		sprintf(cfp->cl_owner, "LOCALID%d", fl.l_pid);
2820		cfp->cl_ownerlen = strlen(cfp->cl_owner);
2821		return (NFSERR_DENIED);
2822	}
2823	return (NFSERR_INVAL);
2824}
2825
2826/*
2827 * Do a local VOP_ADVLOCK().
2828 */
2829int
2830nfsvno_advlock(struct vnode *vp, int ftype, u_int64_t first,
2831    u_int64_t end, struct thread *td)
2832{
2833	int error;
2834	struct flock fl;
2835	u_int64_t tlen;
2836
2837	if (nfsrv_dolocallocks == 0)
2838		return (0);
2839	fl.l_whence = SEEK_SET;
2840	fl.l_type = ftype;
2841	fl.l_start = (off_t)first;
2842	if (end == NFS64BITSSET) {
2843		fl.l_len = 0;
2844	} else {
2845		tlen = end - first;
2846		fl.l_len = (off_t)tlen;
2847	}
2848	/*
2849	 * For FreeBSD8, the l_pid and l_sysid must be set to the same
2850	 * values for all calls, so that all locks will be held by the
2851	 * nfsd server. (The nfsd server handles conflicts between the
2852	 * various clients.)
2853	 * Since an NFSv4 lockowner is a ClientID plus an array of up to 1024
2854	 * bytes, so it can't be put in l_sysid.
2855	 */
2856	if (nfsv4_sysid == 0)
2857		nfsv4_sysid = nlm_acquire_next_sysid();
2858	fl.l_pid = (pid_t)0;
2859	fl.l_sysid = (int)nfsv4_sysid;
2860
2861	NFSVOPUNLOCK(vp, 0, td);
2862	if (ftype == F_UNLCK)
2863		error = VOP_ADVLOCK(vp, (caddr_t)td->td_proc, F_UNLCK, &fl,
2864		    (F_POSIX | F_REMOTE));
2865	else
2866		error = VOP_ADVLOCK(vp, (caddr_t)td->td_proc, F_SETLK, &fl,
2867		    (F_POSIX | F_REMOTE));
2868	NFSVOPLOCK(vp, LK_EXCLUSIVE | LK_RETRY, td);
2869	return (error);
2870}
2871
2872/*
2873 * Unlock an underlying local file system.
2874 */
2875void
2876nfsvno_unlockvfs(struct mount *mp)
2877{
2878
2879	VFS_UNLOCK_GIANT(mp);
2880}
2881
2882/*
2883 * Lock an underlying file system, as required, and return
2884 * whether or not it is locked.
2885 */
2886int
2887nfsvno_lockvfs(struct mount *mp)
2888{
2889	int ret;
2890
2891	ret = VFS_LOCK_GIANT(mp);
2892	return (ret);
2893}
2894
2895/*
2896 * Check the nfsv4 root exports.
2897 */
2898int
2899nfsvno_v4rootexport(struct nfsrv_descript *nd)
2900{
2901	struct ucred *credanon;
2902	int exflags, error, numsecflavor, *secflavors, i;
2903
2904	error = vfs_stdcheckexp(&nfsv4root_mnt, nd->nd_nam, &exflags,
2905	    &credanon, &numsecflavor, &secflavors);
2906	if (error)
2907		return (NFSERR_PROGUNAVAIL);
2908	if (credanon != NULL)
2909		crfree(credanon);
2910	for (i = 0; i < numsecflavor; i++) {
2911		if (secflavors[i] == AUTH_SYS)
2912			nd->nd_flag |= ND_EXAUTHSYS;
2913		else if (secflavors[i] == RPCSEC_GSS_KRB5)
2914			nd->nd_flag |= ND_EXGSS;
2915		else if (secflavors[i] == RPCSEC_GSS_KRB5I)
2916			nd->nd_flag |= ND_EXGSSINTEGRITY;
2917		else if (secflavors[i] == RPCSEC_GSS_KRB5P)
2918			nd->nd_flag |= ND_EXGSSPRIVACY;
2919	}
2920	return (0);
2921}
2922
2923/*
2924 * Nfs server psuedo system call for the nfsd's
2925 */
2926/*
2927 * MPSAFE
2928 */
2929static int
2930nfssvc_nfsd(struct thread *td, struct nfssvc_args *uap)
2931{
2932	struct file *fp;
2933	struct nfsd_addsock_args sockarg;
2934	struct nfsd_nfsd_args nfsdarg;
2935	int error;
2936
2937	if (uap->flag & NFSSVC_NFSDADDSOCK) {
2938		error = copyin(uap->argp, (caddr_t)&sockarg, sizeof (sockarg));
2939		if (error)
2940			return (error);
2941		if ((error = fget(td, sockarg.sock, &fp)) != 0) {
2942			return (error);
2943		}
2944		if (fp->f_type != DTYPE_SOCKET) {
2945			fdrop(fp, td);
2946			return (EPERM);
2947		}
2948		error = nfsrvd_addsock(fp);
2949		fdrop(fp, td);
2950	} else if (uap->flag & NFSSVC_NFSDNFSD) {
2951		if (uap->argp == NULL)
2952			return (EINVAL);
2953		error = copyin(uap->argp, (caddr_t)&nfsdarg,
2954		    sizeof (nfsdarg));
2955		if (error)
2956			return (error);
2957		error = nfsrvd_nfsd(td, &nfsdarg);
2958	} else {
2959		error = nfssvc_srvcall(td, uap, td->td_ucred);
2960	}
2961	return (error);
2962}
2963
2964static int
2965nfssvc_srvcall(struct thread *p, struct nfssvc_args *uap, struct ucred *cred)
2966{
2967	struct nfsex_args export;
2968	struct file *fp = NULL;
2969	int stablefd, len;
2970	struct nfsd_clid adminrevoke;
2971	struct nfsd_dumplist dumplist;
2972	struct nfsd_dumpclients *dumpclients;
2973	struct nfsd_dumplocklist dumplocklist;
2974	struct nfsd_dumplocks *dumplocks;
2975	struct nameidata nd;
2976	vnode_t vp;
2977	int error = EINVAL;
2978
2979	if (uap->flag & NFSSVC_PUBLICFH) {
2980		NFSBZERO((caddr_t)&nfs_pubfh.nfsrvfh_data,
2981		    sizeof (fhandle_t));
2982		error = copyin(uap->argp,
2983		    &nfs_pubfh.nfsrvfh_data, sizeof (fhandle_t));
2984		if (!error)
2985			nfs_pubfhset = 1;
2986	} else if (uap->flag & NFSSVC_V4ROOTEXPORT) {
2987		error = copyin(uap->argp,(caddr_t)&export,
2988		    sizeof (struct nfsex_args));
2989		if (!error)
2990			error = nfsrv_v4rootexport(&export, cred, p);
2991	} else if (uap->flag & NFSSVC_NOPUBLICFH) {
2992		nfs_pubfhset = 0;
2993		error = 0;
2994	} else if (uap->flag & NFSSVC_STABLERESTART) {
2995		error = copyin(uap->argp, (caddr_t)&stablefd,
2996		    sizeof (int));
2997		if (!error)
2998			error = fp_getfvp(p, stablefd, &fp, &vp);
2999		if (!error && (NFSFPFLAG(fp) & (FREAD | FWRITE)) != (FREAD | FWRITE))
3000			error = EBADF;
3001		if (!error && newnfs_numnfsd != 0)
3002			error = EPERM;
3003		if (!error) {
3004			nfsrv_stablefirst.nsf_fp = fp;
3005			nfsrv_setupstable(p);
3006		}
3007	} else if (uap->flag & NFSSVC_ADMINREVOKE) {
3008		error = copyin(uap->argp, (caddr_t)&adminrevoke,
3009		    sizeof (struct nfsd_clid));
3010		if (!error)
3011			error = nfsrv_adminrevoke(&adminrevoke, p);
3012	} else if (uap->flag & NFSSVC_DUMPCLIENTS) {
3013		error = copyin(uap->argp, (caddr_t)&dumplist,
3014		    sizeof (struct nfsd_dumplist));
3015		if (!error && (dumplist.ndl_size < 1 ||
3016			dumplist.ndl_size > NFSRV_MAXDUMPLIST))
3017			error = EPERM;
3018		if (!error) {
3019		    len = sizeof (struct nfsd_dumpclients) * dumplist.ndl_size;
3020		    dumpclients = (struct nfsd_dumpclients *)malloc(len,
3021			M_TEMP, M_WAITOK);
3022		    nfsrv_dumpclients(dumpclients, dumplist.ndl_size);
3023		    error = copyout(dumpclients,
3024			CAST_USER_ADDR_T(dumplist.ndl_list), len);
3025		    free((caddr_t)dumpclients, M_TEMP);
3026		}
3027	} else if (uap->flag & NFSSVC_DUMPLOCKS) {
3028		error = copyin(uap->argp, (caddr_t)&dumplocklist,
3029		    sizeof (struct nfsd_dumplocklist));
3030		if (!error && (dumplocklist.ndllck_size < 1 ||
3031			dumplocklist.ndllck_size > NFSRV_MAXDUMPLIST))
3032			error = EPERM;
3033		if (!error)
3034			error = nfsrv_lookupfilename(&nd,
3035				dumplocklist.ndllck_fname, p);
3036		if (!error) {
3037			len = sizeof (struct nfsd_dumplocks) *
3038				dumplocklist.ndllck_size;
3039			dumplocks = (struct nfsd_dumplocks *)malloc(len,
3040				M_TEMP, M_WAITOK);
3041			nfsrv_dumplocks(nd.ni_vp, dumplocks,
3042			    dumplocklist.ndllck_size, p);
3043			vput(nd.ni_vp);
3044			error = copyout(dumplocks,
3045			    CAST_USER_ADDR_T(dumplocklist.ndllck_list), len);
3046			free((caddr_t)dumplocks, M_TEMP);
3047		}
3048	}
3049	return (error);
3050}
3051
3052/*
3053 * Check exports.
3054 * Returns 0 if ok, 1 otherwise.
3055 */
3056int
3057nfsvno_testexp(struct nfsrv_descript *nd, struct nfsexstuff *exp)
3058{
3059	int i;
3060
3061	/*
3062	 * This seems odd, but allow the case where the security flavor
3063	 * list is empty. This happens when NFSv4 is traversing non-exported
3064	 * file systems. Exported file systems should always have a non-empty
3065	 * security flavor list.
3066	 */
3067	if (exp->nes_numsecflavor == 0)
3068		return (0);
3069
3070	for (i = 0; i < exp->nes_numsecflavor; i++) {
3071		/*
3072		 * The tests for privacy and integrity must be first,
3073		 * since ND_GSS is set for everything but AUTH_SYS.
3074		 */
3075		if (exp->nes_secflavors[i] == RPCSEC_GSS_KRB5P &&
3076		    (nd->nd_flag & ND_GSSPRIVACY))
3077			return (0);
3078		if (exp->nes_secflavors[i] == RPCSEC_GSS_KRB5I &&
3079		    (nd->nd_flag & ND_GSSINTEGRITY))
3080			return (0);
3081		if (exp->nes_secflavors[i] == RPCSEC_GSS_KRB5 &&
3082		    (nd->nd_flag & ND_GSS))
3083			return (0);
3084		if (exp->nes_secflavors[i] == AUTH_SYS &&
3085		    (nd->nd_flag & ND_GSS) == 0)
3086			return (0);
3087	}
3088	return (1);
3089}
3090
3091/*
3092 * Calculate a hash value for the fid in a file handle.
3093 */
3094uint32_t
3095nfsrv_hashfh(fhandle_t *fhp)
3096{
3097	uint32_t hashval;
3098
3099	hashval = hash32_buf(&fhp->fh_fid, sizeof(struct fid), 0);
3100	return (hashval);
3101}
3102
3103extern int (*nfsd_call_nfsd)(struct thread *, struct nfssvc_args *);
3104
3105/*
3106 * Called once to initialize data structures...
3107 */
3108static int
3109nfsd_modevent(module_t mod, int type, void *data)
3110{
3111	int error = 0;
3112	static int loaded = 0;
3113
3114	switch (type) {
3115	case MOD_LOAD:
3116		if (loaded)
3117			return (0);
3118		newnfs_portinit();
3119		mtx_init(&nfs_cache_mutex, "nfs_cache_mutex", NULL, MTX_DEF);
3120		mtx_init(&nfs_v4root_mutex, "nfs_v4root_mutex", NULL, MTX_DEF);
3121		mtx_init(&nfsv4root_mnt.mnt_mtx, "struct mount mtx", NULL,
3122		    MTX_DEF);
3123		lockinit(&nfsv4root_mnt.mnt_explock, PVFS, "explock", 0, 0);
3124		nfsrvd_initcache();
3125		nfsd_init();
3126		NFSD_LOCK();
3127		nfsrvd_init(0);
3128		NFSD_UNLOCK();
3129		nfsd_mntinit();
3130#ifdef VV_DISABLEDELEG
3131		vn_deleg_ops.vndeleg_recall = nfsd_recalldelegation;
3132		vn_deleg_ops.vndeleg_disable = nfsd_disabledelegation;
3133#endif
3134		nfsd_call_servertimer = nfsrv_servertimer;
3135		nfsd_call_nfsd = nfssvc_nfsd;
3136		loaded = 1;
3137		break;
3138
3139	case MOD_UNLOAD:
3140		if (newnfs_numnfsd != 0) {
3141			error = EBUSY;
3142			break;
3143		}
3144
3145#ifdef VV_DISABLEDELEG
3146		vn_deleg_ops.vndeleg_recall = NULL;
3147		vn_deleg_ops.vndeleg_disable = NULL;
3148#endif
3149		nfsd_call_servertimer = NULL;
3150		nfsd_call_nfsd = NULL;
3151		/* and get rid of the locks */
3152		mtx_destroy(&nfs_cache_mutex);
3153		mtx_destroy(&nfs_v4root_mutex);
3154		mtx_destroy(&nfsv4root_mnt.mnt_mtx);
3155		lockdestroy(&nfsv4root_mnt.mnt_explock);
3156		loaded = 0;
3157		break;
3158	default:
3159		error = EOPNOTSUPP;
3160		break;
3161	}
3162	return error;
3163}
3164static moduledata_t nfsd_mod = {
3165	"nfsd",
3166	nfsd_modevent,
3167	NULL,
3168};
3169DECLARE_MODULE(nfsd, nfsd_mod, SI_SUB_VFS, SI_ORDER_ANY);
3170
3171/* So that loader and kldload(2) can find us, wherever we are.. */
3172MODULE_VERSION(nfsd, 1);
3173MODULE_DEPEND(nfsd, nfscommon, 1, 1, 1);
3174MODULE_DEPEND(nfsd, nfslockd, 1, 1, 1);
3175MODULE_DEPEND(nfsd, krpc, 1, 1, 1);
3176MODULE_DEPEND(nfsd, nfssvc, 1, 1, 1);
3177
3178