nfs_nfsdport.c revision 206063
133965Sjdp/*-
278828Sobrien * Copyright (c) 1989, 1993
3218822Sdim *	The Regents of the University of California.  All rights reserved.
459343Sobrien *
533965Sjdp * This code is derived from software contributed to Berkeley by
633965Sjdp * Rick Macklem at The University of Guelph.
733965Sjdp *
833965Sjdp * Redistribution and use in source and binary forms, with or without
933965Sjdp * modification, are permitted provided that the following conditions
10130561Sobrien * are met:
1133965Sjdp * 1. Redistributions of source code must retain the above copyright
12130561Sobrien *    notice, this list of conditions and the following disclaimer.
13130561Sobrien * 2. Redistributions in binary form must reproduce the above copyright
14130561Sobrien *    notice, this list of conditions and the following disclaimer in the
15130561Sobrien *    documentation and/or other materials provided with the distribution.
1633965Sjdp * 4. Neither the name of the University nor the names of its contributors
17130561Sobrien *    may be used to endorse or promote products derived from this software
18130561Sobrien *    without specific prior written permission.
19130561Sobrien *
20130561Sobrien * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
2133965Sjdp * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
22130561Sobrien * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
23130561Sobrien * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
24218822Sdim * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
2533965Sjdp * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
2633965Sjdp * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
2733965Sjdp * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
2833965Sjdp * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
2933965Sjdp * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
30130561Sobrien * SUCH DAMAGE.
3133965Sjdp *
3233965Sjdp */
3359343Sobrien
3433965Sjdp#include <sys/cdefs.h>
35130561Sobrien__FBSDID("$FreeBSD: head/sys/fs/nfsserver/nfs_nfsdport.c 206063 2010-04-02 02:19:28Z rmacklem $");
3633965Sjdp
3759343Sobrien/*
38130561Sobrien * Functions that perform the vfs operations required by the routines in
3933965Sjdp * nfsd_serv.c. It is hoped that this change will make the server more
4059343Sobrien * portable.
41130561Sobrien */
4233965Sjdp
4359343Sobrien#include <fs/nfs/nfsport.h>
44130561Sobrien#include <sys/sysctl.h>
4533965Sjdp#include <nlm/nlm_prot.h>
4659343Sobrien#include <nlm/nlm.h>
47130561Sobrien
4833965Sjdpextern u_int32_t newnfs_true, newnfs_false, newnfs_xdrneg1;
4959343Sobrienextern int nfsv4root_set;
50130561Sobrienextern int nfsrv_useacl;
51130561Sobrienextern int newnfs_numnfsd;
52130561Sobrienextern struct mount nfsv4root_mnt;
5333965Sjdpextern struct nfsrv_stablefirst nfsrv_stablefirst;
5459343Sobrienextern void (*nfsd_call_servertimer)(void);
55130561Sobrienstruct vfsoptlist nfsv4root_opt, nfsv4root_newopt;
56130561SobrienNFSDLOCKMUTEX;
57130561Sobrienstruct mtx nfs_cache_mutex;
5833965Sjdpstruct mtx nfs_v4root_mutex;
5959343Sobrienstruct nfsrvfh nfs_rootfh, nfs_pubfh;
6033965Sjdpint nfs_pubfhset = 0, nfs_rootfhset = 0;
61130561Sobrienstatic uint32_t nfsv4_sysid = 0;
62130561Sobrien
63130561Sobrienstatic int nfssvc_srvcall(struct thread *, struct nfssvc_args *,
64130561Sobrien    struct ucred *);
65130561Sobrien
66130561Sobrienstatic int enable_crossmntpt = 1;
67130561Sobrienstatic int nfs_commit_blks;
68130561Sobrienstatic int nfs_commit_miss;
69130561Sobrienextern int nfsrv_issuedelegs;
70130561Sobrienextern int nfsrv_dolocallocks;
71130561Sobrien
72130561SobrienSYSCTL_DECL(_vfs_newnfs);
73130561SobrienSYSCTL_INT(_vfs_newnfs, OID_AUTO, mirrormnt, CTLFLAG_RW, &enable_crossmntpt,
74130561Sobrien    0, "Enable nfsd to cross mount points");
75130561SobrienSYSCTL_INT(_vfs_newnfs, OID_AUTO, commit_blks, CTLFLAG_RW, &nfs_commit_blks,
76130561Sobrien    0, "");
77130561SobrienSYSCTL_INT(_vfs_newnfs, OID_AUTO, commit_miss, CTLFLAG_RW, &nfs_commit_miss,
78130561Sobrien    0, "");
7933965SjdpSYSCTL_INT(_vfs_newnfs, OID_AUTO, issue_delegations, CTLFLAG_RW,
80130561Sobrien    &nfsrv_issuedelegs, 0, "Enable nfsd to issue delegations");
8133965SjdpSYSCTL_INT(_vfs_newnfs, OID_AUTO, enable_locallocks, CTLFLAG_RW,
8259343Sobrien    &nfsrv_dolocallocks, 0, "Enable nfsd to acquire local locks on files");
8359343Sobrien
8459343Sobrien#define	NUM_HEURISTIC		1017
8577298Sobrien#define	NHUSE_INIT		64
8633965Sjdp#define	NHUSE_INC		16
8759343Sobrien#define	NHUSE_MAX		2048
8859343Sobrien
8959343Sobrienstatic struct nfsheur {
9059343Sobrien	struct vnode *nh_vp;	/* vp to match (unreferenced pointer) */
9159343Sobrien	off_t nh_nextr;		/* next offset for sequential detection */
92130561Sobrien	int nh_use;		/* use count for selection */
93130561Sobrien	int nh_seqcount;	/* heuristic */
9459343Sobrien} nfsheur[NUM_HEURISTIC];
9559343Sobrien
9633965Sjdp
9777298Sobrien/*
98104834Sobrien * Get attributes into nfsvattr structure.
9977298Sobrien */
10033965Sjdpint
101130561Sobriennfsvno_getattr(struct vnode *vp, struct nfsvattr *nvap, struct ucred *cred,
102130561Sobrien    struct thread *p)
103130561Sobrien{
104130561Sobrien	int error, lockedit = 0;
105130561Sobrien
106130561Sobrien	/* Since FreeBSD insists the vnode be locked... */
107130561Sobrien	if (VOP_ISLOCKED(vp) != LK_EXCLUSIVE) {
108130561Sobrien		lockedit = 1;
109130561Sobrien		NFSVOPLOCK(vp, LK_EXCLUSIVE | LK_RETRY, p);
110130561Sobrien	}
111130561Sobrien	error = VOP_GETATTR(vp, &nvap->na_vattr, cred);
11233965Sjdp	if (lockedit)
113130561Sobrien		NFSVOPUNLOCK(vp, 0, p);
11433965Sjdp	return (error);
115130561Sobrien}
116130561Sobrien
117130561Sobrien/*
118130561Sobrien * Get a file handle for a vnode.
119130561Sobrien */
120130561Sobrienint
121218822Sdimnfsvno_getfh(struct vnode *vp, fhandle_t *fhp, struct thread *p)
12233965Sjdp{
123130561Sobrien	int error;
124130561Sobrien
125130561Sobrien	NFSBZERO((caddr_t)fhp, sizeof(fhandle_t));
126130561Sobrien	fhp->fh_fsid = vp->v_mount->mnt_stat.f_fsid;
127130561Sobrien	error = VOP_VPTOFH(vp, &fhp->fh_fid);
128130561Sobrien	return (error);
129130561Sobrien}
130130561Sobrien
131130561Sobrien/*
132130561Sobrien * Perform access checking for vnodes obtained from file handles that would
133130561Sobrien * refer to files already opened by a Unix client. You cannot just use
134130561Sobrien * vn_writechk() and VOP_ACCESSX() for two reasons.
135130561Sobrien * 1 - You must check for exported rdonly as well as MNT_RDONLY for the write
136130561Sobrien *     case.
137130561Sobrien * 2 - The owner is to be given access irrespective of mode bits for some
138130561Sobrien *     operations, so that processes that chmod after opening a file don't
139130561Sobrien *     break.
140130561Sobrien */
141130561Sobrienint
142130561Sobriennfsvno_accchk(struct vnode *vp, accmode_t accmode, struct ucred *cred,
143130561Sobrien    struct nfsexstuff *exp, struct thread *p, int override, int vpislocked,
144130561Sobrien    u_int32_t *supportedtypep)
145130561Sobrien{
146130561Sobrien	struct vattr vattr;
147130561Sobrien	int error = 0, getret = 0;
148130561Sobrien
149130561Sobrien	if (accmode & VWRITE) {
150130561Sobrien		/* Just vn_writechk() changed to check rdonly */
15133965Sjdp		/*
152130561Sobrien		 * Disallow write attempts on read-only file systems;
153130561Sobrien		 * unless the file is a socket or a block or character
154130561Sobrien		 * device resident on the file system.
155130561Sobrien		 */
156130561Sobrien		if (NFSVNO_EXRDONLY(exp) ||
157130561Sobrien		    (vp->v_mount->mnt_flag & MNT_RDONLY)) {
158130561Sobrien			switch (vp->v_type) {
159130561Sobrien			case VREG:
160130561Sobrien			case VDIR:
161130561Sobrien			case VLNK:
162130561Sobrien				return (EROFS);
163130561Sobrien			default:
164130561Sobrien				break;
165130561Sobrien			}
166130561Sobrien		}
167130561Sobrien		/*
168130561Sobrien		 * If there's shared text associated with
169130561Sobrien		 * the inode, try to free it up once.  If
170130561Sobrien		 * we fail, we can't allow writing.
171130561Sobrien		 */
172130561Sobrien		if (vp->v_vflag & VV_TEXT)
173130561Sobrien			return (ETXTBSY);
174130561Sobrien	}
175130561Sobrien	if (vpislocked == 0)
176130561Sobrien		NFSVOPLOCK(vp, LK_EXCLUSIVE | LK_RETRY, p);
177130561Sobrien
178130561Sobrien	/*
179130561Sobrien	 * Should the override still be applied when ACLs are enabled?
180130561Sobrien	 */
181130561Sobrien	error = VOP_ACCESSX(vp, accmode, cred, p);
182130561Sobrien	if (error != 0 && (accmode & (VDELETE | VDELETE_CHILD))) {
183130561Sobrien		/*
184218822Sdim		 * Try again with VEXPLICIT_DENY, to see if the test for
185130561Sobrien		 * deletion is supported.
186218822Sdim		 */
187218822Sdim		error = VOP_ACCESSX(vp, accmode | VEXPLICIT_DENY, cred, p);
188218822Sdim		if (error == 0) {
189218822Sdim			if (vp->v_type == VDIR) {
190218822Sdim				accmode &= ~(VDELETE | VDELETE_CHILD);
19159343Sobrien				accmode |= VWRITE;
19233965Sjdp				error = VOP_ACCESSX(vp, accmode, cred, p);
19333965Sjdp			} else if (supportedtypep != NULL) {
19433965Sjdp				*supportedtypep &= ~NFSACCESS_DELETE;
19533965Sjdp			}
19633965Sjdp		}
19733965Sjdp	}
19877298Sobrien
19933965Sjdp	/*
20077298Sobrien	 * Allow certain operations for the owner (reads and writes
201104834Sobrien	 * on files that are already open).
202130561Sobrien	 */
20377298Sobrien	if (override != NFSACCCHK_NOOVERRIDE &&
204218822Sdim	    (error == EPERM || error == EACCES)) {
205218822Sdim		if (cred->cr_uid == 0 && (override & NFSACCCHK_ALLOWROOT))
206218822Sdim			error = 0;
20777298Sobrien		else if (override & NFSACCCHK_ALLOWOWNER) {
208218822Sdim			getret = VOP_GETATTR(vp, &vattr, cred);
209218822Sdim			if (getret == 0 && cred->cr_uid == vattr.va_uid)
21033965Sjdp				error = 0;
211218822Sdim		}
212218822Sdim	}
21359343Sobrien	if (vpislocked == 0)
214218822Sdim		NFSVOPUNLOCK(vp, 0, p);
215218822Sdim	return (error);
21633965Sjdp}
217218822Sdim
218218822Sdim/*
21991041Sobrien * Set attribute(s) vnop.
220218822Sdim */
221218822Sdimint
22233965Sjdpnfsvno_setattr(struct vnode *vp, struct nfsvattr *nvap, struct ucred *cred,
223218822Sdim    struct thread *p, struct nfsexstuff *exp)
224218822Sdim{
22533965Sjdp	int error;
226218822Sdim
227218822Sdim	error = VOP_SETATTR(vp, &nvap->na_vattr, cred);
22889857Sobrien	return (error);
229218822Sdim}
230218822Sdim
231218822Sdim/*
232218822Sdim * Set up nameidata for a lookup() call and do it
233218822Sdim * For the cases where we are crossing mount points
234218822Sdim * (looking up the public fh path or the v4 root path when
235218822Sdim *  not using a pseudo-root fs), set/release the Giant lock,
236218822Sdim * as required.
237218822Sdim */
23833965Sjdpint
239218822Sdimnfsvno_namei(struct nfsrv_descript *nd, struct nameidata *ndp,
24033965Sjdp    struct vnode *dp, int islocked, struct nfsexstuff *exp, struct thread *p,
24159343Sobrien    struct vnode **retdirp)
242218822Sdim{
24333965Sjdp	struct componentname *cnp = &ndp->ni_cnd;
244218822Sdim	int i;
245218822Sdim	struct iovec aiov;
24633965Sjdp	struct uio auio;
247218822Sdim	int lockleaf = (cnp->cn_flags & LOCKLEAF) != 0, linklen;
248218822Sdim	int error = 0, crossmnt;
24933965Sjdp	char *cp;
250218822Sdim
251218822Sdim	*retdirp = NULL;
25259343Sobrien	cnp->cn_nameptr = cnp->cn_pnbuf;
253218822Sdim	/*
254218822Sdim	 * Extract and set starting directory.
25559343Sobrien	 */
256218822Sdim	if (dp->v_type != VDIR) {
257218822Sdim		if (islocked)
25889857Sobrien			vput(dp);
259218822Sdim		else
260218822Sdim			vrele(dp);
261104834Sobrien		nfsvno_relpathbuf(ndp);
262218822Sdim		return (ENOTDIR);
263218822Sdim	}
264218822Sdim	if (islocked)
265218822Sdim		NFSVOPUNLOCK(dp, 0, p);
266218822Sdim	VREF(dp);
267218822Sdim	*retdirp = dp;
268130561Sobrien	if (NFSVNO_EXRDONLY(exp))
26989857Sobrien		cnp->cn_flags |= RDONLY;
270218822Sdim	ndp->ni_segflg = UIO_SYSSPACE;
271218822Sdim	crossmnt = 1;
272218822Sdim
273218822Sdim	if (nd->nd_flag & ND_PUBLOOKUP) {
274130561Sobrien		ndp->ni_loopcnt = 0;
275218822Sdim		if (cnp->cn_pnbuf[0] == '/') {
276218822Sdim			vrele(dp);
277130561Sobrien			/*
278130561Sobrien			 * Check for degenerate pathnames here, since lookup()
279130561Sobrien			 * panics on them.
280130561Sobrien			 */
281218822Sdim			for (i = 1; i < ndp->ni_pathlen; i++)
282218822Sdim				if (cnp->cn_pnbuf[i] != '/')
283130561Sobrien					break;
284218822Sdim			if (i == ndp->ni_pathlen) {
285218822Sdim				error = NFSERR_ACCES;
28633965Sjdp				goto out;
28733965Sjdp			}
28877298Sobrien			dp = rootvnode;
28933965Sjdp			VREF(dp);
29033965Sjdp		}
29133965Sjdp	} else if ((enable_crossmntpt == 0 && NFSVNO_EXPORTED(exp)) ||
29233965Sjdp	    (nd->nd_flag & ND_NFSV4) == 0) {
29377298Sobrien		/*
29433965Sjdp		 * Only cross mount points for NFSv4 when doing a
295130561Sobrien		 * mount while traversing the file system above
29633965Sjdp		 * the mount point, unless enable_crossmntpt is set.
29733965Sjdp		 */
29833965Sjdp		cnp->cn_flags |= NOCROSSMOUNT;
29933965Sjdp		crossmnt = 0;
30033965Sjdp	}
30133965Sjdp
302104834Sobrien	/*
303130561Sobrien	 * Initialize for scan, set ni_startdir and bump ref on dp again
304130561Sobrien	 * becuase lookup() will dereference ni_startdir.
30533965Sjdp	 */
30633965Sjdp
30733965Sjdp	cnp->cn_thread = p;
308218822Sdim	ndp->ni_startdir = dp;
309218822Sdim	ndp->ni_rootdir = rootvnode;
310218822Sdim
311218822Sdim	if (!lockleaf)
31289857Sobrien		cnp->cn_flags |= LOCKLEAF;
31377298Sobrien	for (;;) {
31433965Sjdp		cnp->cn_nameptr = cnp->cn_pnbuf;
31533965Sjdp		/*
31633965Sjdp		 * Call lookup() to do the real work.  If an error occurs,
31733965Sjdp		 * ndp->ni_vp and ni_dvp are left uninitialized or NULL and
31859343Sobrien		 * we do not have to dereference anything before returning.
31959343Sobrien		 * In either case ni_startdir will be dereferenced and NULLed
32033965Sjdp		 * out.
32133965Sjdp		 */
32277298Sobrien		if (exp->nes_vfslocked)
32333965Sjdp			ndp->ni_cnd.cn_flags |= GIANTHELD;
32433965Sjdp		error = lookup(ndp);
32533965Sjdp		/*
32633965Sjdp		 * The Giant lock should only change when
32733965Sjdp		 * crossing mount points.
32833965Sjdp		 */
32933965Sjdp		if (crossmnt) {
33033965Sjdp			exp->nes_vfslocked =
33133965Sjdp			    (ndp->ni_cnd.cn_flags & GIANTHELD) != 0;
33233965Sjdp			ndp->ni_cnd.cn_flags &= ~GIANTHELD;
33333965Sjdp		}
33433965Sjdp		if (error)
33533965Sjdp			break;
33633965Sjdp
337130561Sobrien		/*
338130561Sobrien		 * Check for encountering a symbolic link.  Trivial
33977298Sobrien		 * termination occurs if no symlink encountered.
340130561Sobrien		 */
34177298Sobrien		if ((cnp->cn_flags & ISSYMLINK) == 0) {
34259343Sobrien			if ((cnp->cn_flags & (SAVENAME | SAVESTART)) == 0)
343130561Sobrien				nfsvno_relpathbuf(ndp);
344130561Sobrien			if (ndp->ni_vp && !lockleaf)
34559343Sobrien				NFSVOPUNLOCK(ndp->ni_vp, 0, p);
346218822Sdim			break;
347218822Sdim		}
348104834Sobrien
349104834Sobrien		/*
35033965Sjdp		 * Validate symlink
35133965Sjdp		 */
35233965Sjdp		if ((cnp->cn_flags & LOCKPARENT) && ndp->ni_pathlen == 1)
35333965Sjdp			NFSVOPUNLOCK(ndp->ni_dvp, 0, p);
35433965Sjdp		if (!(nd->nd_flag & ND_PUBLOOKUP)) {
35533965Sjdp			error = EINVAL;
35633965Sjdp			goto badlink2;
35733965Sjdp		}
35833965Sjdp
35933965Sjdp		if (ndp->ni_loopcnt++ >= MAXSYMLINKS) {
36033965Sjdp			error = ELOOP;
36133965Sjdp			goto badlink2;
36233965Sjdp		}
36333965Sjdp		if (ndp->ni_pathlen > 1)
36459343Sobrien			cp = uma_zalloc(namei_zone, M_WAITOK);
36559343Sobrien		else
36633965Sjdp			cp = cnp->cn_pnbuf;
36777298Sobrien		aiov.iov_base = cp;
36833965Sjdp		aiov.iov_len = MAXPATHLEN;
36933965Sjdp		auio.uio_iov = &aiov;
37033965Sjdp		auio.uio_iovcnt = 1;
37133965Sjdp		auio.uio_offset = 0;
37259343Sobrien		auio.uio_rw = UIO_READ;
37359343Sobrien		auio.uio_segflg = UIO_SYSSPACE;
37459343Sobrien		auio.uio_td = NULL;
375130561Sobrien		auio.uio_resid = MAXPATHLEN;
37677298Sobrien		error = VOP_READLINK(ndp->ni_vp, &auio, cnp->cn_cred);
37777298Sobrien		if (error) {
378104834Sobrien		badlink1:
37959343Sobrien			if (ndp->ni_pathlen > 1)
38059343Sobrien				uma_zfree(namei_zone, cp);
38159343Sobrien		badlink2:
38233965Sjdp			vrele(ndp->ni_dvp);
38333965Sjdp			vput(ndp->ni_vp);
38477298Sobrien			break;
38533965Sjdp		}
38633965Sjdp		linklen = MAXPATHLEN - auio.uio_resid;
38733965Sjdp		if (linklen == 0) {
38833965Sjdp			error = ENOENT;
38959343Sobrien			goto badlink1;
390130561Sobrien		}
391130561Sobrien		if (linklen + ndp->ni_pathlen >= MAXPATHLEN) {
39259343Sobrien			error = ENAMETOOLONG;
39333965Sjdp			goto badlink1;
39477298Sobrien		}
39559343Sobrien
39659343Sobrien		/*
39759343Sobrien		 * Adjust or replace path
39859343Sobrien		 */
39959343Sobrien		if (ndp->ni_pathlen > 1) {
40059343Sobrien			NFSBCOPY(ndp->ni_next, cp + linklen, ndp->ni_pathlen);
40159343Sobrien			uma_zfree(namei_zone, cnp->cn_pnbuf);
40259343Sobrien			cnp->cn_pnbuf = cp;
403253451Semaste		} else
404253451Semaste			cnp->cn_pnbuf[linklen] = '\0';
405253451Semaste		ndp->ni_pathlen += linklen;
406253451Semaste
407253451Semaste		/*
408253451Semaste		 * Cleanup refs for next loop and check if root directory
409253451Semaste		 * should replace current directory.  Normally ni_dvp
410253451Semaste		 * becomes the new base directory and is cleaned up when
411253451Semaste		 * we loop.  Explicitly null pointers after invalidation
412253451Semaste		 * to clarify operation.
413253451Semaste		 */
414253451Semaste		vput(ndp->ni_vp);
41589857Sobrien		ndp->ni_vp = NULL;
416253451Semaste
41789857Sobrien		if (cnp->cn_pnbuf[0] == '/') {
41889857Sobrien			vrele(ndp->ni_dvp);
41989857Sobrien			ndp->ni_dvp = ndp->ni_rootdir;
42089857Sobrien			VREF(ndp->ni_dvp);
42189857Sobrien		}
42289857Sobrien		ndp->ni_startdir = ndp->ni_dvp;
42389857Sobrien		ndp->ni_dvp = NULL;
42433965Sjdp	}
42533965Sjdp	if (!lockleaf)
42633965Sjdp		cnp->cn_flags &= ~LOCKLEAF;
42789857Sobrien
42833965Sjdpout:
429104834Sobrien	if (error) {
430104834Sobrien		uma_zfree(namei_zone, cnp->cn_pnbuf);
431104834Sobrien		ndp->ni_vp = NULL;
432104834Sobrien		ndp->ni_dvp = NULL;
433104834Sobrien		ndp->ni_startdir = NULL;
434104834Sobrien		cnp->cn_flags &= ~HASBUF;
435130561Sobrien	} else if ((ndp->ni_cnd.cn_flags & (WANTPARENT|LOCKPARENT)) == 0) {
436130561Sobrien		ndp->ni_dvp = NULL;
437104834Sobrien	}
438104834Sobrien	return (error);
439104834Sobrien}
440104834Sobrien
441104834Sobrien/*
442130561Sobrien * Set up a pathname buffer and return a pointer to it and, optionally
443130561Sobrien * set a hash pointer.
444130561Sobrien */
445130561Sobrienvoid
446104834Sobriennfsvno_setpathbuf(struct nameidata *ndp, char **bufpp, u_long **hashpp)
447104834Sobrien{
448104834Sobrien	struct componentname *cnp = &ndp->ni_cnd;
449104834Sobrien
450246312Sandrew	cnp->cn_flags |= (NOMACCHECK | HASBUF);
451246312Sandrew	cnp->cn_pnbuf = uma_zalloc(namei_zone, M_WAITOK);
452249603Sandrew	if (hashpp != NULL)
453249603Sandrew		*hashpp = NULL;
454246312Sandrew	*bufpp = cnp->cn_pnbuf;
45533965Sjdp}
45633965Sjdp
45777298Sobrien/*
45833965Sjdp * Release the above path buffer, if not released by nfsvno_namei().
45933965Sjdp */
46033965Sjdpvoid
46133965Sjdpnfsvno_relpathbuf(struct nameidata *ndp)
46233965Sjdp{
46359343Sobrien
46459343Sobrien	if ((ndp->ni_cnd.cn_flags & HASBUF) == 0)
46559343Sobrien		panic("nfsrelpath");
46659343Sobrien	uma_zfree(namei_zone, ndp->ni_cnd.cn_pnbuf);
46759343Sobrien	ndp->ni_cnd.cn_flags &= ~HASBUF;
46859343Sobrien}
46959343Sobrien
47059343Sobrien/*
47159343Sobrien * Readlink vnode op into an mbuf list.
47259343Sobrien */
473218822Sdimint
47459343Sobriennfsvno_readlink(struct vnode *vp, struct ucred *cred, struct thread *p,
47559343Sobrien    struct mbuf **mpp, struct mbuf **mpendp, int *lenp)
47659343Sobrien{
47759343Sobrien	struct iovec iv[(NFS_MAXPATHLEN+MLEN-1)/MLEN];
47859343Sobrien	struct iovec *ivp = iv;
47959343Sobrien	struct uio io, *uiop = &io;
48059343Sobrien	struct mbuf *mp, *mp2 = NULL, *mp3 = NULL;
48159343Sobrien	int i, len, tlen, error;
48259343Sobrien
48377298Sobrien	len = 0;
48433965Sjdp	i = 0;
48533965Sjdp	while (len < NFS_MAXPATHLEN) {
48633965Sjdp		NFSMGET(mp);
48733965Sjdp		MCLGET(mp, M_WAIT);
488130561Sobrien		mp->m_len = NFSMSIZ(mp);
489130561Sobrien		if (len == 0) {
49033965Sjdp			mp3 = mp2 = mp;
49133965Sjdp		} else {
49233965Sjdp			mp2->m_next = mp;
49333965Sjdp			mp2 = mp;
49433965Sjdp		}
49533965Sjdp		if ((len + mp->m_len) > NFS_MAXPATHLEN) {
49633965Sjdp			mp->m_len = NFS_MAXPATHLEN - len;
49733965Sjdp			len = NFS_MAXPATHLEN;
49859343Sobrien		} else {
499104834Sobrien			len += mp->m_len;
500218822Sdim		}
501218822Sdim		ivp->iov_base = mtod(mp, caddr_t);
502130561Sobrien		ivp->iov_len = mp->m_len;
503130561Sobrien		i++;
50433965Sjdp		ivp++;
50533965Sjdp	}
50633965Sjdp	uiop->uio_iov = iv;
50733965Sjdp	uiop->uio_iovcnt = i;
50877298Sobrien	uiop->uio_offset = 0;
50933965Sjdp	uiop->uio_resid = len;
51033965Sjdp	uiop->uio_rw = UIO_READ;
51133965Sjdp	uiop->uio_segflg = UIO_SYSSPACE;
51233965Sjdp	uiop->uio_td = NULL;
51333965Sjdp	error = VOP_READLINK(vp, uiop, cred);
514130561Sobrien	if (error) {
515130561Sobrien		m_freem(mp3);
516130561Sobrien		*lenp = 0;
517130561Sobrien		return (error);
518130561Sobrien	}
519130561Sobrien	if (uiop->uio_resid > 0) {
52089857Sobrien		len -= uiop->uio_resid;
52133965Sjdp		tlen = NFSM_RNDUP(len);
52259343Sobrien		nfsrv_adj(mp3, NFS_MAXPATHLEN - tlen, tlen - len);
52359343Sobrien	}
524130561Sobrien	*lenp = len;
52559343Sobrien	*mpp = mp3;
52659343Sobrien	*mpendp = mp;
52759343Sobrien	return (0);
52859343Sobrien}
52959343Sobrien
53077298Sobrien/*
53133965Sjdp * Read vnode op call into mbuf list.
53233965Sjdp */
53333965Sjdpint
53433965Sjdpnfsvno_read(struct vnode *vp, off_t off, int cnt, struct ucred *cred,
53533965Sjdp    struct thread *p, struct mbuf **mpp, struct mbuf **mpendp)
53633965Sjdp{
53733965Sjdp	struct mbuf *m;
538218822Sdim	int i;
53933965Sjdp	struct iovec *iv;
54077298Sobrien	struct iovec *iv2;
54133965Sjdp	int error = 0, len, left, siz, tlen, ioflag = 0, hi, try = 32;
54233965Sjdp	struct mbuf *m2 = NULL, *m3;
54333965Sjdp	struct uio io, *uiop = &io;
54433965Sjdp	struct nfsheur *nh;
54533965Sjdp
54633965Sjdp	/*
54733965Sjdp	 * Calculate seqcount for heuristic
54833965Sjdp	 */
54933965Sjdp	/*
55033965Sjdp	 * Locate best candidate
55133965Sjdp	 */
55233965Sjdp
55333965Sjdp	hi = ((int)(vm_offset_t)vp / sizeof(struct vnode)) % NUM_HEURISTIC;
55433965Sjdp	nh = &nfsheur[hi];
55533965Sjdp
55633965Sjdp	while (try--) {
55733965Sjdp		if (nfsheur[hi].nh_vp == vp) {
55833965Sjdp			nh = &nfsheur[hi];
55933965Sjdp			break;
56033965Sjdp		}
56133965Sjdp		if (nfsheur[hi].nh_use > 0)
56233965Sjdp			--nfsheur[hi].nh_use;
56333965Sjdp		hi = (hi + 1) % NUM_HEURISTIC;
56433965Sjdp		if (nfsheur[hi].nh_use < nh->nh_use)
56533965Sjdp			nh = &nfsheur[hi];
566130561Sobrien	}
567130561Sobrien
568130561Sobrien	if (nh->nh_vp != vp) {
56959343Sobrien		nh->nh_vp = vp;
57059343Sobrien		nh->nh_nextr = off;
571130561Sobrien		nh->nh_use = NHUSE_INIT;
572130561Sobrien		if (off == 0)
573218822Sdim			nh->nh_seqcount = 4;
57459343Sobrien		else
57559343Sobrien			nh->nh_seqcount = 1;
57659343Sobrien	}
57759343Sobrien
57859343Sobrien	/*
579130561Sobrien	 * Calculate heuristic
58059343Sobrien	 */
581130561Sobrien
582130561Sobrien	if ((off == 0 && nh->nh_seqcount > 0) || off == nh->nh_nextr) {
583130561Sobrien		if (++nh->nh_seqcount > IO_SEQMAX)
58459343Sobrien			nh->nh_seqcount = IO_SEQMAX;
585130561Sobrien	} else if (nh->nh_seqcount > 1) {
586130561Sobrien		nh->nh_seqcount = 1;
58759343Sobrien	} else {
58833965Sjdp		nh->nh_seqcount = 0;
589130561Sobrien	}
590130561Sobrien	nh->nh_use += NHUSE_INC;
59159343Sobrien	if (nh->nh_use > NHUSE_MAX)
59259343Sobrien		nh->nh_use = NHUSE_MAX;
593104834Sobrien	ioflag |= nh->nh_seqcount << IO_SEQSHIFT;
594104834Sobrien
595104834Sobrien	len = left = NFSM_RNDUP(cnt);
59668765Sobrien	m3 = NULL;
59759343Sobrien	/*
59859343Sobrien	 * Generate the mbuf list with the uio_iov ref. to it.
59959343Sobrien	 */
60068765Sobrien	i = 0;
60159343Sobrien	while (left > 0) {
60259343Sobrien		NFSMGET(m);
60359343Sobrien		MCLGET(m, M_WAIT);
60459343Sobrien		m->m_len = 0;
60559343Sobrien		siz = min(M_TRAILINGSPACE(m), left);
60659343Sobrien		left -= siz;
607218822Sdim		i++;
608218822Sdim		if (m3)
609218822Sdim			m2->m_next = m;
610104834Sobrien		else
611104834Sobrien			m3 = m;
61268765Sobrien		m2 = m;
61368765Sobrien	}
61468765Sobrien	MALLOC(iv, struct iovec *, i * sizeof (struct iovec),
61568765Sobrien	    M_TEMP, M_WAITOK);
61668765Sobrien	uiop->uio_iov = iv2 = iv;
61759343Sobrien	m = m3;
61859343Sobrien	left = len;
61959343Sobrien	i = 0;
62059343Sobrien	while (left > 0) {
62159343Sobrien		if (m == NULL)
62259343Sobrien			panic("nfsvno_read iov");
62333965Sjdp		siz = min(M_TRAILINGSPACE(m), left);
62433965Sjdp		if (siz > 0) {
62533965Sjdp			iv->iov_base = mtod(m, caddr_t) + m->m_len;
62633965Sjdp			iv->iov_len = siz;
62733965Sjdp			m->m_len += siz;
62833965Sjdp			left -= siz;
62933965Sjdp			iv++;
63033965Sjdp			i++;
63133965Sjdp		}
63233965Sjdp		m = m->m_next;
63333965Sjdp	}
63433965Sjdp	uiop->uio_iovcnt = i;
63533965Sjdp	uiop->uio_offset = off;
63633965Sjdp	uiop->uio_resid = len;
63733965Sjdp	uiop->uio_rw = UIO_READ;
63859343Sobrien	uiop->uio_segflg = UIO_SYSSPACE;
63933965Sjdp	error = VOP_READ(vp, uiop, IO_NODELOCKED | ioflag, cred);
64033965Sjdp	FREE((caddr_t)iv2, M_TEMP);
64168765Sobrien	if (error) {
64268765Sobrien		m_freem(m3);
64359343Sobrien		*mpp = NULL;
64468765Sobrien		return (error);
64559343Sobrien	}
64668765Sobrien	tlen = len - uiop->uio_resid;
64768765Sobrien	cnt = cnt < tlen ? cnt : tlen;
64868765Sobrien	tlen = NFSM_RNDUP(cnt);
64977298Sobrien	if (tlen == 0) {
65068765Sobrien		m_freem(m3);
65168765Sobrien		m3 = NULL;
652130561Sobrien	} else if (len != tlen || tlen != cnt)
65359343Sobrien		nfsrv_adj(m3, len - tlen, tlen - cnt);
65459343Sobrien	*mpp = m3;
65559343Sobrien	*mpendp = m2;
65668765Sobrien	return (0);
65759343Sobrien}
65859343Sobrien
65959343Sobrien/*
66059343Sobrien * Write vnode op from an mbuf list.
66159343Sobrien */
66259343Sobrienint
66359343Sobriennfsvno_write(struct vnode *vp, off_t off, int retlen, int cnt, int stable,
66459343Sobrien    struct mbuf *mp, char *cp, struct ucred *cred, struct thread *p)
66559343Sobrien{
66659343Sobrien	struct iovec *ivp;
66759343Sobrien	int i, len;
66868765Sobrien	struct iovec *iv;
66968765Sobrien	int ioflags, error;
67068765Sobrien	struct uio io, *uiop = &io;
67159343Sobrien
672130561Sobrien	MALLOC(ivp, struct iovec *, cnt * sizeof (struct iovec), M_TEMP,
67359343Sobrien	    M_WAITOK);
67459343Sobrien	uiop->uio_iov = iv = ivp;
67559343Sobrien	uiop->uio_iovcnt = cnt;
67659343Sobrien	i = mtod(mp, caddr_t) + mp->m_len - cp;
677104834Sobrien	len = retlen;
67859343Sobrien	while (len > 0) {
67933965Sjdp		if (mp == NULL)
68033965Sjdp			panic("nfsvno_write");
68133965Sjdp		if (i > 0) {
68233965Sjdp			i = min(i, len);
68333965Sjdp			ivp->iov_base = cp;
68433965Sjdp			ivp->iov_len = i;
68533965Sjdp			ivp++;
68633965Sjdp			len -= i;
68733965Sjdp		}
68833965Sjdp		mp = mp->m_next;
68933965Sjdp		if (mp) {
69033965Sjdp			i = mp->m_len;
69133965Sjdp			cp = mtod(mp, caddr_t);
69233965Sjdp		}
69333965Sjdp	}
69433965Sjdp
69533965Sjdp	if (stable == NFSWRITE_UNSTABLE)
69633965Sjdp		ioflags = IO_NODELOCKED;
69733965Sjdp	else
69833965Sjdp		ioflags = (IO_SYNC | IO_NODELOCKED);
69933965Sjdp	uiop->uio_resid = retlen;
70033965Sjdp	uiop->uio_rw = UIO_WRITE;
70133965Sjdp	uiop->uio_segflg = UIO_SYSSPACE;
70233965Sjdp	NFSUIOPROC(uiop, p);
70333965Sjdp	uiop->uio_offset = off;
70433965Sjdp	error = VOP_WRITE(vp, uiop, ioflags, cred);
70533965Sjdp	FREE((caddr_t)iv, M_TEMP);
70633965Sjdp	return (error);
70733965Sjdp}
70833965Sjdp
70933965Sjdp/*
71033965Sjdp * Common code for creating a regular file (plus special files for V2).
71133965Sjdp */
71233965Sjdpint
71333965Sjdpnfsvno_createsub(struct nfsrv_descript *nd, struct nameidata *ndp,
71433965Sjdp    struct vnode **vpp, struct nfsvattr *nvap, int *exclusive_flagp,
71533965Sjdp    int32_t *cverf, NFSDEV_T rdev, struct thread *p, struct nfsexstuff *exp)
71633965Sjdp{
71733965Sjdp	u_quad_t tempsize;
71833965Sjdp	int error;
71933965Sjdp
72059343Sobrien	error = nd->nd_repstat;
72177298Sobrien	if (!error && ndp->ni_vp == NULL) {
72259343Sobrien		if (nvap->na_type == VREG || nvap->na_type == VSOCK) {
72359343Sobrien			vrele(ndp->ni_startdir);
72459343Sobrien			error = VOP_CREATE(ndp->ni_dvp,
72559343Sobrien			    &ndp->ni_vp, &ndp->ni_cnd, &nvap->na_vattr);
72659343Sobrien			vput(ndp->ni_dvp);
72777298Sobrien			nfsvno_relpathbuf(ndp);
72859343Sobrien			if (!error) {
72959343Sobrien				if (*exclusive_flagp) {
73059343Sobrien					*exclusive_flagp = 0;
73177298Sobrien					NFSVNO_ATTRINIT(nvap);
73277298Sobrien					nvap->na_atime.tv_sec = cverf[0];
73359343Sobrien					nvap->na_atime.tv_nsec = cverf[1];
73477298Sobrien					error = VOP_SETATTR(ndp->ni_vp,
73559343Sobrien					    &nvap->na_vattr, nd->nd_cred);
73659343Sobrien				}
73759343Sobrien			}
73859343Sobrien		/*
739130561Sobrien		 * NFS V2 Only. nfsrvd_mknod() does this for V3.
74077298Sobrien		 * (This implies, just get out on an error.)
74177298Sobrien		 */
74277298Sobrien		} else if (nvap->na_type == VCHR || nvap->na_type == VBLK ||
743130561Sobrien			nvap->na_type == VFIFO) {
744130561Sobrien			if (nvap->na_type == VCHR && rdev == 0xffffffff)
745130561Sobrien				nvap->na_type = VFIFO;
746130561Sobrien                        if (nvap->na_type != VFIFO &&
747130561Sobrien			    (error = priv_check_cred(nd->nd_cred,
748130561Sobrien			     PRIV_VFS_MKNOD_DEV, 0))) {
749130561Sobrien				vrele(ndp->ni_startdir);
750130561Sobrien				nfsvno_relpathbuf(ndp);
751130561Sobrien				vput(ndp->ni_dvp);
752130561Sobrien				return (error);
753130561Sobrien			}
754130561Sobrien			nvap->na_rdev = rdev;
755130561Sobrien			error = VOP_MKNOD(ndp->ni_dvp, &ndp->ni_vp,
756130561Sobrien			    &ndp->ni_cnd, &nvap->na_vattr);
757130561Sobrien			vput(ndp->ni_dvp);
758130561Sobrien			nfsvno_relpathbuf(ndp);
759130561Sobrien			if (error) {
760130561Sobrien				vrele(ndp->ni_startdir);
761130561Sobrien				return (error);
762130561Sobrien			}
763130561Sobrien		} else {
764130561Sobrien			vrele(ndp->ni_startdir);
765130561Sobrien			nfsvno_relpathbuf(ndp);
766130561Sobrien			vput(ndp->ni_dvp);
767130561Sobrien			return (ENXIO);
768130561Sobrien		}
769130561Sobrien		*vpp = ndp->ni_vp;
770130561Sobrien	} else {
771130561Sobrien		/*
772130561Sobrien		 * Handle cases where error is already set and/or
773130561Sobrien		 * the file exists.
774130561Sobrien		 * 1 - clean up the lookup
775130561Sobrien		 * 2 - iff !error and na_size set, truncate it
776130561Sobrien		 */
777130561Sobrien		vrele(ndp->ni_startdir);
778130561Sobrien		nfsvno_relpathbuf(ndp);
779130561Sobrien		*vpp = ndp->ni_vp;
780130561Sobrien		if (ndp->ni_dvp == *vpp)
781130561Sobrien			vrele(ndp->ni_dvp);
782130561Sobrien		else
783130561Sobrien			vput(ndp->ni_dvp);
784130561Sobrien		if (!error && nvap->na_size != VNOVAL) {
785130561Sobrien			error = nfsvno_accchk(*vpp, VWRITE,
786130561Sobrien			    nd->nd_cred, exp, p, NFSACCCHK_NOOVERRIDE,
787130561Sobrien			    NFSACCCHK_VPISLOCKED, NULL);
788130561Sobrien			if (!error) {
789130561Sobrien				tempsize = nvap->na_size;
790130561Sobrien				NFSVNO_ATTRINIT(nvap);
791130561Sobrien				nvap->na_size = tempsize;
792130561Sobrien				error = VOP_SETATTR(*vpp,
793130561Sobrien				    &nvap->na_vattr, nd->nd_cred);
794130561Sobrien			}
79533965Sjdp		}
796		if (error)
797			vput(*vpp);
798	}
799	return (error);
800}
801
802/*
803 * Do a mknod vnode op.
804 */
805int
806nfsvno_mknod(struct nameidata *ndp, struct nfsvattr *nvap, struct ucred *cred,
807    struct thread *p)
808{
809	int error = 0;
810	enum vtype vtyp;
811
812	vtyp = nvap->na_type;
813	/*
814	 * Iff doesn't exist, create it.
815	 */
816	if (ndp->ni_vp) {
817		vrele(ndp->ni_startdir);
818		nfsvno_relpathbuf(ndp);
819		vput(ndp->ni_dvp);
820		vrele(ndp->ni_vp);
821		return (EEXIST);
822	}
823	if (vtyp != VCHR && vtyp != VBLK && vtyp != VSOCK && vtyp != VFIFO) {
824		vrele(ndp->ni_startdir);
825		nfsvno_relpathbuf(ndp);
826		vput(ndp->ni_dvp);
827		return (NFSERR_BADTYPE);
828	}
829	if (vtyp == VSOCK) {
830		vrele(ndp->ni_startdir);
831		error = VOP_CREATE(ndp->ni_dvp, &ndp->ni_vp,
832		    &ndp->ni_cnd, &nvap->na_vattr);
833		vput(ndp->ni_dvp);
834		nfsvno_relpathbuf(ndp);
835	} else {
836		if (nvap->na_type != VFIFO &&
837		    (error = priv_check_cred(cred, PRIV_VFS_MKNOD_DEV, 0))) {
838			vrele(ndp->ni_startdir);
839			nfsvno_relpathbuf(ndp);
840			vput(ndp->ni_dvp);
841			return (error);
842		}
843		error = VOP_MKNOD(ndp->ni_dvp, &ndp->ni_vp,
844		    &ndp->ni_cnd, &nvap->na_vattr);
845		vput(ndp->ni_dvp);
846		nfsvno_relpathbuf(ndp);
847		if (error)
848			vrele(ndp->ni_startdir);
849		/*
850		 * Since VOP_MKNOD returns the ni_vp, I can't
851		 * see any reason to do the lookup.
852		 */
853	}
854	return (error);
855}
856
857/*
858 * Mkdir vnode op.
859 */
860int
861nfsvno_mkdir(struct nameidata *ndp, struct nfsvattr *nvap, uid_t saved_uid,
862    struct ucred *cred, struct thread *p, struct nfsexstuff *exp)
863{
864	int error = 0;
865
866	if (ndp->ni_vp != NULL) {
867		if (ndp->ni_dvp == ndp->ni_vp)
868			vrele(ndp->ni_dvp);
869		else
870			vput(ndp->ni_dvp);
871		vrele(ndp->ni_vp);
872		nfsvno_relpathbuf(ndp);
873		return (EEXIST);
874	}
875	error = VOP_MKDIR(ndp->ni_dvp, &ndp->ni_vp, &ndp->ni_cnd,
876	    &nvap->na_vattr);
877	vput(ndp->ni_dvp);
878	nfsvno_relpathbuf(ndp);
879	return (error);
880}
881
882/*
883 * symlink vnode op.
884 */
885int
886nfsvno_symlink(struct nameidata *ndp, struct nfsvattr *nvap, char *pathcp,
887    int pathlen, int not_v2, uid_t saved_uid, struct ucred *cred, struct thread *p,
888    struct nfsexstuff *exp)
889{
890	int error = 0;
891
892	if (ndp->ni_vp) {
893		vrele(ndp->ni_startdir);
894		nfsvno_relpathbuf(ndp);
895		if (ndp->ni_dvp == ndp->ni_vp)
896			vrele(ndp->ni_dvp);
897		else
898			vput(ndp->ni_dvp);
899		vrele(ndp->ni_vp);
900		return (EEXIST);
901	}
902
903	error = VOP_SYMLINK(ndp->ni_dvp, &ndp->ni_vp, &ndp->ni_cnd,
904	    &nvap->na_vattr, pathcp);
905	vput(ndp->ni_dvp);
906	vrele(ndp->ni_startdir);
907	nfsvno_relpathbuf(ndp);
908	/*
909	 * Although FreeBSD still had the lookup code in
910	 * it for 7/current, there doesn't seem to be any
911	 * point, since VOP_SYMLINK() returns the ni_vp.
912	 * Just vput it for v2.
913	 */
914	if (!not_v2 && !error)
915		vput(ndp->ni_vp);
916	return (error);
917}
918
919/*
920 * Parse symbolic link arguments.
921 * This function has an ugly side effect. It will MALLOC() an area for
922 * the symlink and set iov_base to point to it, only if it succeeds.
923 * So, if it returns with uiop->uio_iov->iov_base != NULL, that must
924 * be FREE'd later.
925 */
926int
927nfsvno_getsymlink(struct nfsrv_descript *nd, struct nfsvattr *nvap,
928    struct thread *p, char **pathcpp, int *lenp)
929{
930	u_int32_t *tl;
931	char *pathcp = NULL;
932	int error = 0, len;
933	struct nfsv2_sattr *sp;
934
935	*pathcpp = NULL;
936	*lenp = 0;
937	if ((nd->nd_flag & ND_NFSV3) &&
938	    (error = nfsrv_sattr(nd, nvap, NULL, NULL, p)))
939		goto nfsmout;
940	NFSM_DISSECT(tl, u_int32_t *, NFSX_UNSIGNED);
941	len = fxdr_unsigned(int, *tl);
942	if (len > NFS_MAXPATHLEN || len <= 0) {
943		error = EBADRPC;
944		goto nfsmout;
945	}
946	MALLOC(pathcp, caddr_t, len + 1, M_TEMP, M_WAITOK);
947	error = nfsrv_mtostr(nd, pathcp, len);
948	if (error)
949		goto nfsmout;
950	if (nd->nd_flag & ND_NFSV2) {
951		NFSM_DISSECT(sp, struct nfsv2_sattr *, NFSX_V2SATTR);
952		nvap->na_mode = fxdr_unsigned(u_int16_t, sp->sa_mode);
953	}
954	*pathcpp = pathcp;
955	*lenp = len;
956	return (0);
957nfsmout:
958	if (pathcp)
959		free(pathcp, M_TEMP);
960	return (error);
961}
962
963/*
964 * Remove a non-directory object.
965 */
966int
967nfsvno_removesub(struct nameidata *ndp, int is_v4, struct ucred *cred,
968    struct thread *p, struct nfsexstuff *exp)
969{
970	struct vnode *vp;
971	int error = 0;
972
973	vp = ndp->ni_vp;
974	if (vp->v_type == VDIR)
975		error = NFSERR_ISDIR;
976	else if (is_v4)
977		error = nfsrv_checkremove(vp, 1, p);
978	if (!error)
979		error = VOP_REMOVE(ndp->ni_dvp, vp, &ndp->ni_cnd);
980	if (ndp->ni_dvp == vp)
981		vrele(ndp->ni_dvp);
982	else
983		vput(ndp->ni_dvp);
984	vput(vp);
985	return (error);
986}
987
988/*
989 * Remove a directory.
990 */
991int
992nfsvno_rmdirsub(struct nameidata *ndp, int is_v4, struct ucred *cred,
993    struct thread *p, struct nfsexstuff *exp)
994{
995	struct vnode *vp;
996	int error = 0;
997
998	vp = ndp->ni_vp;
999	if (vp->v_type != VDIR) {
1000		error = ENOTDIR;
1001		goto out;
1002	}
1003	/*
1004	 * No rmdir "." please.
1005	 */
1006	if (ndp->ni_dvp == vp) {
1007		error = EINVAL;
1008		goto out;
1009	}
1010	/*
1011	 * The root of a mounted filesystem cannot be deleted.
1012	 */
1013	if (vp->v_vflag & VV_ROOT)
1014		error = EBUSY;
1015out:
1016	if (!error)
1017		error = VOP_RMDIR(ndp->ni_dvp, vp, &ndp->ni_cnd);
1018	if (ndp->ni_dvp == vp)
1019		vrele(ndp->ni_dvp);
1020	else
1021		vput(ndp->ni_dvp);
1022	vput(vp);
1023	return (error);
1024}
1025
1026/*
1027 * Rename vnode op.
1028 */
1029int
1030nfsvno_rename(struct nameidata *fromndp, struct nameidata *tondp,
1031    u_int32_t ndstat, u_int32_t ndflag, struct ucred *cred, struct thread *p)
1032{
1033	struct vnode *fvp, *tvp, *tdvp;
1034	int error = 0;
1035
1036	fvp = fromndp->ni_vp;
1037	if (ndstat) {
1038		vrele(fromndp->ni_dvp);
1039		vrele(fvp);
1040		error = ndstat;
1041		goto out1;
1042	}
1043	tdvp = tondp->ni_dvp;
1044	tvp = tondp->ni_vp;
1045	if (tvp != NULL) {
1046		if (fvp->v_type == VDIR && tvp->v_type != VDIR) {
1047			error = (ndflag & ND_NFSV2) ? EISDIR : EEXIST;
1048			goto out;
1049		} else if (fvp->v_type != VDIR && tvp->v_type == VDIR) {
1050			error = (ndflag & ND_NFSV2) ? ENOTDIR : EEXIST;
1051			goto out;
1052		}
1053		if (tvp->v_type == VDIR && tvp->v_mountedhere) {
1054			error = (ndflag & ND_NFSV2) ? ENOTEMPTY : EXDEV;
1055			goto out;
1056		}
1057
1058		/*
1059		 * A rename to '.' or '..' results in a prematurely
1060		 * unlocked vnode on FreeBSD5, so I'm just going to fail that
1061		 * here.
1062		 */
1063		if ((tondp->ni_cnd.cn_namelen == 1 &&
1064		     tondp->ni_cnd.cn_nameptr[0] == '.') ||
1065		    (tondp->ni_cnd.cn_namelen == 2 &&
1066		     tondp->ni_cnd.cn_nameptr[0] == '.' &&
1067		     tondp->ni_cnd.cn_nameptr[1] == '.')) {
1068			error = EINVAL;
1069			goto out;
1070		}
1071	}
1072	if (fvp->v_type == VDIR && fvp->v_mountedhere) {
1073		error = (ndflag & ND_NFSV2) ? ENOTEMPTY : EXDEV;
1074		goto out;
1075	}
1076	if (fvp->v_mount != tdvp->v_mount) {
1077		error = (ndflag & ND_NFSV2) ? ENOTEMPTY : EXDEV;
1078		goto out;
1079	}
1080	if (fvp == tdvp) {
1081		error = (ndflag & ND_NFSV2) ? ENOTEMPTY : EINVAL;
1082		goto out;
1083	}
1084	if (fvp == tvp) {
1085		/*
1086		 * If source and destination are the same, there is nothing to
1087		 * do. Set error to -1 to indicate this.
1088		 */
1089		error = -1;
1090		goto out;
1091	}
1092	if (ndflag & ND_NFSV4) {
1093		NFSVOPLOCK(fvp, LK_EXCLUSIVE | LK_RETRY, p);
1094		error = nfsrv_checkremove(fvp, 0, p);
1095		NFSVOPUNLOCK(fvp, 0, p);
1096		if (tvp && !error)
1097			error = nfsrv_checkremove(tvp, 1, p);
1098	} else {
1099		/*
1100		 * For NFSv2 and NFSv3, try to get rid of the delegation, so
1101		 * that the NFSv4 client won't be confused by the rename.
1102		 * Since nfsd_recalldelegation() can only be called on an
1103		 * unlocked vnode at this point and fvp is the file that will
1104		 * still exist after the rename, just do fvp.
1105		 */
1106		nfsd_recalldelegation(fvp, p);
1107	}
1108out:
1109	if (!error) {
1110		error = VOP_RENAME(fromndp->ni_dvp, fromndp->ni_vp,
1111		    &fromndp->ni_cnd, tondp->ni_dvp, tondp->ni_vp,
1112		    &tondp->ni_cnd);
1113	} else {
1114		if (tdvp == tvp)
1115			vrele(tdvp);
1116		else
1117			vput(tdvp);
1118		if (tvp)
1119			vput(tvp);
1120		vrele(fromndp->ni_dvp);
1121		vrele(fvp);
1122		if (error == -1)
1123			error = 0;
1124	}
1125	vrele(tondp->ni_startdir);
1126	nfsvno_relpathbuf(tondp);
1127out1:
1128	vrele(fromndp->ni_startdir);
1129	nfsvno_relpathbuf(fromndp);
1130	return (error);
1131}
1132
1133/*
1134 * Link vnode op.
1135 */
1136int
1137nfsvno_link(struct nameidata *ndp, struct vnode *vp, struct ucred *cred,
1138    struct thread *p, struct nfsexstuff *exp)
1139{
1140	struct vnode *xp;
1141	int error = 0;
1142
1143	xp = ndp->ni_vp;
1144	if (xp != NULL) {
1145		error = EEXIST;
1146	} else {
1147		xp = ndp->ni_dvp;
1148		if (vp->v_mount != xp->v_mount)
1149			error = EXDEV;
1150	}
1151	if (!error) {
1152		NFSVOPLOCK(vp, LK_EXCLUSIVE | LK_RETRY, p);
1153		error = VOP_LINK(ndp->ni_dvp, vp, &ndp->ni_cnd);
1154		if (ndp->ni_dvp == vp)
1155			vrele(ndp->ni_dvp);
1156		else
1157			vput(ndp->ni_dvp);
1158		NFSVOPUNLOCK(vp, 0, p);
1159	} else {
1160		if (ndp->ni_dvp == ndp->ni_vp)
1161			vrele(ndp->ni_dvp);
1162		else
1163			vput(ndp->ni_dvp);
1164		if (ndp->ni_vp)
1165			vrele(ndp->ni_vp);
1166	}
1167	nfsvno_relpathbuf(ndp);
1168	return (error);
1169}
1170
1171/*
1172 * Do the fsync() appropriate for the commit.
1173 */
1174int
1175nfsvno_fsync(struct vnode *vp, u_int64_t off, int cnt, struct ucred *cred,
1176    struct thread *td)
1177{
1178	int error = 0;
1179
1180	if (cnt > MAX_COMMIT_COUNT) {
1181		/*
1182		 * Give up and do the whole thing
1183		 */
1184		if (vp->v_object &&
1185		   (vp->v_object->flags & OBJ_MIGHTBEDIRTY)) {
1186			VM_OBJECT_LOCK(vp->v_object);
1187			vm_object_page_clean(vp->v_object, 0, 0, OBJPC_SYNC);
1188			VM_OBJECT_UNLOCK(vp->v_object);
1189		}
1190		error = VOP_FSYNC(vp, MNT_WAIT, td);
1191	} else {
1192		/*
1193		 * Locate and synchronously write any buffers that fall
1194		 * into the requested range.  Note:  we are assuming that
1195		 * f_iosize is a power of 2.
1196		 */
1197		int iosize = vp->v_mount->mnt_stat.f_iosize;
1198		int iomask = iosize - 1;
1199		struct bufobj *bo;
1200		daddr_t lblkno;
1201
1202		/*
1203		 * Align to iosize boundry, super-align to page boundry.
1204		 */
1205		if (off & iomask) {
1206			cnt += off & iomask;
1207			off &= ~(u_quad_t)iomask;
1208		}
1209		if (off & PAGE_MASK) {
1210			cnt += off & PAGE_MASK;
1211			off &= ~(u_quad_t)PAGE_MASK;
1212		}
1213		lblkno = off / iosize;
1214
1215		if (vp->v_object &&
1216		   (vp->v_object->flags & OBJ_MIGHTBEDIRTY)) {
1217			VM_OBJECT_LOCK(vp->v_object);
1218			vm_object_page_clean(vp->v_object, off / PAGE_SIZE, (cnt + PAGE_MASK) / PAGE_SIZE, OBJPC_SYNC);
1219			VM_OBJECT_UNLOCK(vp->v_object);
1220		}
1221
1222		bo = &vp->v_bufobj;
1223		BO_LOCK(bo);
1224		while (cnt > 0) {
1225			struct buf *bp;
1226
1227			/*
1228			 * If we have a buffer and it is marked B_DELWRI we
1229			 * have to lock and write it.  Otherwise the prior
1230			 * write is assumed to have already been committed.
1231			 *
1232			 * gbincore() can return invalid buffers now so we
1233			 * have to check that bit as well (though B_DELWRI
1234			 * should not be set if B_INVAL is set there could be
1235			 * a race here since we haven't locked the buffer).
1236			 */
1237			if ((bp = gbincore(&vp->v_bufobj, lblkno)) != NULL) {
1238				if (BUF_LOCK(bp, LK_EXCLUSIVE | LK_SLEEPFAIL |
1239				    LK_INTERLOCK, BO_MTX(bo)) == ENOLCK) {
1240					BO_LOCK(bo);
1241					continue; /* retry */
1242				}
1243			    	if ((bp->b_flags & (B_DELWRI|B_INVAL)) ==
1244				    B_DELWRI) {
1245					bremfree(bp);
1246					bp->b_flags &= ~B_ASYNC;
1247					bwrite(bp);
1248					++nfs_commit_miss;
1249				} else
1250					BUF_UNLOCK(bp);
1251				BO_LOCK(bo);
1252			}
1253			++nfs_commit_blks;
1254			if (cnt < iosize)
1255				break;
1256			cnt -= iosize;
1257			++lblkno;
1258		}
1259		BO_UNLOCK(bo);
1260	}
1261	return (error);
1262}
1263
1264/*
1265 * Statfs vnode op.
1266 */
1267int
1268nfsvno_statfs(struct vnode *vp, struct statfs *sf)
1269{
1270
1271	return (VFS_STATFS(vp->v_mount, sf));
1272}
1273
1274/*
1275 * Do the vnode op stuff for Open. Similar to nfsvno_createsub(), but
1276 * must handle nfsrv_opencheck() calls after any other access checks.
1277 */
1278void
1279nfsvno_open(struct nfsrv_descript *nd, struct nameidata *ndp,
1280    nfsquad_t clientid, nfsv4stateid_t *stateidp, struct nfsstate *stp,
1281    int *exclusive_flagp, struct nfsvattr *nvap, int32_t *cverf, int create,
1282    NFSACL_T *aclp, nfsattrbit_t *attrbitp, struct ucred *cred, struct thread *p,
1283    struct nfsexstuff *exp, struct vnode **vpp)
1284{
1285	struct vnode *vp = NULL;
1286	u_quad_t tempsize;
1287	struct nfsexstuff nes;
1288
1289	if (ndp->ni_vp == NULL)
1290		nd->nd_repstat = nfsrv_opencheck(clientid,
1291		    stateidp, stp, NULL, nd, p, nd->nd_repstat);
1292	if (!nd->nd_repstat) {
1293		if (ndp->ni_vp == NULL) {
1294			vrele(ndp->ni_startdir);
1295			nd->nd_repstat = VOP_CREATE(ndp->ni_dvp,
1296			    &ndp->ni_vp, &ndp->ni_cnd, &nvap->na_vattr);
1297			vput(ndp->ni_dvp);
1298			nfsvno_relpathbuf(ndp);
1299			if (!nd->nd_repstat) {
1300				if (*exclusive_flagp) {
1301					*exclusive_flagp = 0;
1302					NFSVNO_ATTRINIT(nvap);
1303					nvap->na_atime.tv_sec = cverf[0];
1304					nvap->na_atime.tv_nsec = cverf[1];
1305					nd->nd_repstat = VOP_SETATTR(ndp->ni_vp,
1306					    &nvap->na_vattr, cred);
1307				} else {
1308					nfsrv_fixattr(nd, ndp->ni_vp, nvap,
1309					    aclp, p, attrbitp, exp);
1310				}
1311			}
1312			vp = ndp->ni_vp;
1313		} else {
1314			if (ndp->ni_startdir)
1315				vrele(ndp->ni_startdir);
1316			nfsvno_relpathbuf(ndp);
1317			vp = ndp->ni_vp;
1318			if (create == NFSV4OPEN_CREATE) {
1319				if (ndp->ni_dvp == vp)
1320					vrele(ndp->ni_dvp);
1321				else
1322					vput(ndp->ni_dvp);
1323			}
1324			if (NFSVNO_ISSETSIZE(nvap) && vp->v_type == VREG) {
1325				if (ndp->ni_cnd.cn_flags & RDONLY)
1326					NFSVNO_SETEXRDONLY(&nes);
1327				else
1328					NFSVNO_EXINIT(&nes);
1329				nd->nd_repstat = nfsvno_accchk(vp,
1330				    VWRITE, cred, &nes, p,
1331				    NFSACCCHK_NOOVERRIDE,
1332				    NFSACCCHK_VPISLOCKED, NULL);
1333				nd->nd_repstat = nfsrv_opencheck(clientid,
1334				    stateidp, stp, vp, nd, p, nd->nd_repstat);
1335				if (!nd->nd_repstat) {
1336					tempsize = nvap->na_size;
1337					NFSVNO_ATTRINIT(nvap);
1338					nvap->na_size = tempsize;
1339					nd->nd_repstat = VOP_SETATTR(vp,
1340					    &nvap->na_vattr, cred);
1341				}
1342			} else if (vp->v_type == VREG) {
1343				nd->nd_repstat = nfsrv_opencheck(clientid,
1344				    stateidp, stp, vp, nd, p, nd->nd_repstat);
1345			}
1346		}
1347	} else {
1348		if (ndp->ni_cnd.cn_flags & HASBUF)
1349			nfsvno_relpathbuf(ndp);
1350		if (ndp->ni_startdir && create == NFSV4OPEN_CREATE) {
1351			vrele(ndp->ni_startdir);
1352			if (ndp->ni_dvp == ndp->ni_vp)
1353				vrele(ndp->ni_dvp);
1354			else
1355				vput(ndp->ni_dvp);
1356			if (ndp->ni_vp)
1357				vput(ndp->ni_vp);
1358		}
1359	}
1360	*vpp = vp;
1361}
1362
1363/*
1364 * Updates the file rev and sets the mtime and ctime
1365 * to the current clock time, returning the va_filerev and va_Xtime
1366 * values.
1367 */
1368void
1369nfsvno_updfilerev(struct vnode *vp, struct nfsvattr *nvap,
1370    struct ucred *cred, struct thread *p)
1371{
1372	struct vattr va;
1373
1374	VATTR_NULL(&va);
1375	getnanotime(&va.va_mtime);
1376	(void) VOP_SETATTR(vp, &va, cred);
1377	(void) nfsvno_getattr(vp, nvap, cred, p);
1378}
1379
1380/*
1381 * Glue routine to nfsv4_fillattr().
1382 */
1383int
1384nfsvno_fillattr(struct nfsrv_descript *nd, struct vnode *vp,
1385    struct nfsvattr *nvap, fhandle_t *fhp, int rderror, nfsattrbit_t *attrbitp,
1386    struct ucred *cred, struct thread *p, int isdgram, int reterr)
1387{
1388	int error;
1389
1390	error = nfsv4_fillattr(nd, vp, NULL, &nvap->na_vattr, fhp, rderror,
1391	    attrbitp, cred, p, isdgram, reterr);
1392	return (error);
1393}
1394
1395/* Since the Readdir vnode ops vary, put the entire functions in here. */
1396/*
1397 * nfs readdir service
1398 * - mallocs what it thinks is enough to read
1399 *	count rounded up to a multiple of DIRBLKSIZ <= NFS_MAXREADDIR
1400 * - calls nfsvno_readdir()
1401 * - loops around building the reply
1402 *	if the output generated exceeds count break out of loop
1403 *	The NFSM_CLGET macro is used here so that the reply will be packed
1404 *	tightly in mbuf clusters.
1405 * - it only knows that it has encountered eof when the nfsvno_readdir()
1406 *	reads nothing
1407 * - as such one readdir rpc will return eof false although you are there
1408 *	and then the next will return eof
1409 * - it trims out records with d_fileno == 0
1410 *	this doesn't matter for Unix clients, but they might confuse clients
1411 *	for other os'.
1412 * - it trims out records with d_type == DT_WHT
1413 *	these cannot be seen through NFS (unless we extend the protocol)
1414 * NB: It is tempting to set eof to true if the nfsvno_readdir() reads less
1415 *	than requested, but this may not apply to all filesystems. For
1416 *	example, client NFS does not { although it is never remote mounted
1417 *	anyhow }
1418 *     The alternate call nfsrvd_readdirplus() does lookups as well.
1419 * PS: The NFS protocol spec. does not clarify what the "count" byte
1420 *	argument is a count of.. just name strings and file id's or the
1421 *	entire reply rpc or ...
1422 *	I tried just file name and id sizes and it confused the Sun client,
1423 *	so I am using the full rpc size now. The "paranoia.." comment refers
1424 *	to including the status longwords that are not a part of the dir.
1425 *	"entry" structures, but are in the rpc.
1426 */
1427int
1428nfsrvd_readdir(struct nfsrv_descript *nd, int isdgram,
1429    struct vnode *vp, struct thread *p, struct nfsexstuff *exp)
1430{
1431	struct dirent *dp;
1432	u_int32_t *tl;
1433	int dirlen;
1434	char *cpos, *cend, *rbuf;
1435	struct nfsvattr at;
1436	int nlen, error = 0, getret = 1;
1437	int siz, cnt, fullsiz, eofflag, ncookies;
1438	u_int64_t off, toff, verf;
1439	u_long *cookies = NULL, *cookiep;
1440	struct uio io;
1441	struct iovec iv;
1442
1443	if (nd->nd_repstat) {
1444		nfsrv_postopattr(nd, getret, &at);
1445		return (0);
1446	}
1447	if (nd->nd_flag & ND_NFSV2) {
1448		NFSM_DISSECT(tl, u_int32_t *, 2 * NFSX_UNSIGNED);
1449		off = fxdr_unsigned(u_quad_t, *tl++);
1450	} else {
1451		NFSM_DISSECT(tl, u_int32_t *, 5 * NFSX_UNSIGNED);
1452		off = fxdr_hyper(tl);
1453		tl += 2;
1454		verf = fxdr_hyper(tl);
1455		tl += 2;
1456	}
1457	toff = off;
1458	cnt = fxdr_unsigned(int, *tl);
1459	if (cnt > NFS_SRVMAXDATA(nd))
1460		cnt = NFS_SRVMAXDATA(nd);
1461	siz = ((cnt + DIRBLKSIZ - 1) & ~(DIRBLKSIZ - 1));
1462	fullsiz = siz;
1463	if (nd->nd_flag & ND_NFSV3) {
1464		nd->nd_repstat = getret = nfsvno_getattr(vp, &at, nd->nd_cred,
1465		    p);
1466#if 0
1467		/*
1468		 * va_filerev is not sufficient as a cookie verifier,
1469		 * since it is not supposed to change when entries are
1470		 * removed/added unless that offset cookies returned to
1471		 * the client are no longer valid.
1472		 */
1473		if (!nd->nd_repstat && toff && verf != at.na_filerev)
1474			nd->nd_repstat = NFSERR_BAD_COOKIE;
1475#endif
1476	}
1477	if (!nd->nd_repstat)
1478		nd->nd_repstat = nfsvno_accchk(vp, VEXEC,
1479		    nd->nd_cred, exp, p, NFSACCCHK_NOOVERRIDE,
1480		    NFSACCCHK_VPISLOCKED, NULL);
1481	if (nd->nd_repstat) {
1482		vput(vp);
1483		if (nd->nd_flag & ND_NFSV3)
1484			nfsrv_postopattr(nd, getret, &at);
1485		return (0);
1486	}
1487	NFSVOPUNLOCK(vp, 0, p);
1488	MALLOC(rbuf, caddr_t, siz, M_TEMP, M_WAITOK);
1489again:
1490	eofflag = 0;
1491	if (cookies) {
1492		free((caddr_t)cookies, M_TEMP);
1493		cookies = NULL;
1494	}
1495
1496	iv.iov_base = rbuf;
1497	iv.iov_len = siz;
1498	io.uio_iov = &iv;
1499	io.uio_iovcnt = 1;
1500	io.uio_offset = (off_t)off;
1501	io.uio_resid = siz;
1502	io.uio_segflg = UIO_SYSSPACE;
1503	io.uio_rw = UIO_READ;
1504	io.uio_td = NULL;
1505	NFSVOPLOCK(vp, LK_EXCLUSIVE | LK_RETRY, p);
1506	nd->nd_repstat = VOP_READDIR(vp, &io, nd->nd_cred, &eofflag, &ncookies,
1507	    &cookies);
1508	NFSVOPUNLOCK(vp, 0, p);
1509	off = (u_int64_t)io.uio_offset;
1510	if (io.uio_resid)
1511		siz -= io.uio_resid;
1512
1513	if (!cookies && !nd->nd_repstat)
1514		nd->nd_repstat = NFSERR_PERM;
1515	if (nd->nd_flag & ND_NFSV3) {
1516		getret = nfsvno_getattr(vp, &at, nd->nd_cred, p);
1517		if (!nd->nd_repstat)
1518			nd->nd_repstat = getret;
1519	}
1520
1521	/*
1522	 * Handles the failed cases. nd->nd_repstat == 0 past here.
1523	 */
1524	if (nd->nd_repstat) {
1525		vrele(vp);
1526		free((caddr_t)rbuf, M_TEMP);
1527		if (cookies)
1528			free((caddr_t)cookies, M_TEMP);
1529		if (nd->nd_flag & ND_NFSV3)
1530			nfsrv_postopattr(nd, getret, &at);
1531		return (0);
1532	}
1533	/*
1534	 * If nothing read, return eof
1535	 * rpc reply
1536	 */
1537	if (siz == 0) {
1538		vrele(vp);
1539		if (nd->nd_flag & ND_NFSV2) {
1540			NFSM_BUILD(tl, u_int32_t *, 2 * NFSX_UNSIGNED);
1541		} else {
1542			nfsrv_postopattr(nd, getret, &at);
1543			NFSM_BUILD(tl, u_int32_t *, 4 * NFSX_UNSIGNED);
1544			txdr_hyper(at.na_filerev, tl);
1545			tl += 2;
1546		}
1547		*tl++ = newnfs_false;
1548		*tl = newnfs_true;
1549		FREE((caddr_t)rbuf, M_TEMP);
1550		FREE((caddr_t)cookies, M_TEMP);
1551		return (0);
1552	}
1553
1554	/*
1555	 * Check for degenerate cases of nothing useful read.
1556	 * If so go try again
1557	 */
1558	cpos = rbuf;
1559	cend = rbuf + siz;
1560	dp = (struct dirent *)cpos;
1561	cookiep = cookies;
1562
1563	/*
1564	 * For some reason FreeBSD's ufs_readdir() chooses to back the
1565	 * directory offset up to a block boundary, so it is necessary to
1566	 * skip over the records that precede the requested offset. This
1567	 * requires the assumption that file offset cookies monotonically
1568	 * increase.
1569	 */
1570	while (cpos < cend && ncookies > 0 &&
1571	    (dp->d_fileno == 0 || dp->d_type == DT_WHT ||
1572	     ((u_quad_t)(*cookiep)) <= toff)) {
1573		cpos += dp->d_reclen;
1574		dp = (struct dirent *)cpos;
1575		cookiep++;
1576		ncookies--;
1577	}
1578	if (cpos >= cend || ncookies == 0) {
1579		siz = fullsiz;
1580		toff = off;
1581		goto again;
1582	}
1583
1584	/*
1585	 * dirlen is the size of the reply, including all XDR and must
1586	 * not exceed cnt. For NFSv2, RFC1094 didn't clearly indicate
1587	 * if the XDR should be included in "count", but to be safe, we do.
1588	 * (Include the two booleans at the end of the reply in dirlen now.)
1589	 */
1590	if (nd->nd_flag & ND_NFSV3) {
1591		nfsrv_postopattr(nd, getret, &at);
1592		NFSM_BUILD(tl, u_int32_t *, 2 * NFSX_UNSIGNED);
1593		txdr_hyper(at.na_filerev, tl);
1594		dirlen = NFSX_V3POSTOPATTR + NFSX_VERF + 2 * NFSX_UNSIGNED;
1595	} else {
1596		dirlen = 2 * NFSX_UNSIGNED;
1597	}
1598
1599	/* Loop through the records and build reply */
1600	while (cpos < cend && ncookies > 0) {
1601		nlen = dp->d_namlen;
1602		if (dp->d_fileno != 0 && dp->d_type != DT_WHT &&
1603			nlen <= NFS_MAXNAMLEN) {
1604			if (nd->nd_flag & ND_NFSV3)
1605				dirlen += (6*NFSX_UNSIGNED + NFSM_RNDUP(nlen));
1606			else
1607				dirlen += (4*NFSX_UNSIGNED + NFSM_RNDUP(nlen));
1608			if (dirlen > cnt) {
1609				eofflag = 0;
1610				break;
1611			}
1612
1613			/*
1614			 * Build the directory record xdr from
1615			 * the dirent entry.
1616			 */
1617			if (nd->nd_flag & ND_NFSV3) {
1618				NFSM_BUILD(tl, u_int32_t *, 3 * NFSX_UNSIGNED);
1619				*tl++ = newnfs_true;
1620				*tl++ = 0;
1621			} else {
1622				NFSM_BUILD(tl, u_int32_t *, 2 * NFSX_UNSIGNED);
1623				*tl++ = newnfs_true;
1624			}
1625			*tl = txdr_unsigned(dp->d_fileno);
1626			(void) nfsm_strtom(nd, dp->d_name, nlen);
1627			if (nd->nd_flag & ND_NFSV3) {
1628				NFSM_BUILD(tl, u_int32_t *, 2 * NFSX_UNSIGNED);
1629				*tl++ = 0;
1630			} else
1631				NFSM_BUILD(tl, u_int32_t *, NFSX_UNSIGNED);
1632			*tl = txdr_unsigned(*cookiep);
1633		}
1634		cpos += dp->d_reclen;
1635		dp = (struct dirent *)cpos;
1636		cookiep++;
1637		ncookies--;
1638	}
1639	if (cpos < cend)
1640		eofflag = 0;
1641	vrele(vp);
1642	NFSM_BUILD(tl, u_int32_t *, 2 * NFSX_UNSIGNED);
1643	*tl++ = newnfs_false;
1644	if (eofflag)
1645		*tl = newnfs_true;
1646	else
1647		*tl = newnfs_false;
1648	FREE((caddr_t)rbuf, M_TEMP);
1649	FREE((caddr_t)cookies, M_TEMP);
1650	return (0);
1651nfsmout:
1652	vput(vp);
1653	return (error);
1654}
1655
1656/*
1657 * Readdirplus for V3 and Readdir for V4.
1658 */
1659int
1660nfsrvd_readdirplus(struct nfsrv_descript *nd, int isdgram,
1661    struct vnode *vp, struct thread *p, struct nfsexstuff *exp)
1662{
1663	struct dirent *dp;
1664	u_int32_t *tl;
1665	int dirlen;
1666	char *cpos, *cend, *rbuf;
1667	struct vnode *nvp;
1668	fhandle_t nfh;
1669	struct nfsvattr nva, at, *nvap = &nva;
1670	struct mbuf *mb0, *mb1;
1671	struct nfsreferral *refp;
1672	int nlen, r, error = 0, getret = 1, usevget = 1;
1673	int siz, cnt, fullsiz, eofflag, ncookies, entrycnt;
1674	caddr_t bpos0, bpos1;
1675	u_int64_t off, toff, verf;
1676	u_long *cookies = NULL, *cookiep;
1677	nfsattrbit_t attrbits, rderrbits, savbits;
1678	struct uio io;
1679	struct iovec iv;
1680	struct componentname cn;
1681
1682	if (nd->nd_repstat) {
1683		nfsrv_postopattr(nd, getret, &at);
1684		return (0);
1685	}
1686	NFSM_DISSECT(tl, u_int32_t *, 6 * NFSX_UNSIGNED);
1687	off = fxdr_hyper(tl);
1688	toff = off;
1689	tl += 2;
1690	verf = fxdr_hyper(tl);
1691	tl += 2;
1692	siz = fxdr_unsigned(int, *tl++);
1693	cnt = fxdr_unsigned(int, *tl);
1694
1695	/*
1696	 * Use the server's maximum data transfer size as the upper bound
1697	 * on reply datalen.
1698	 */
1699	if (cnt > NFS_SRVMAXDATA(nd))
1700		cnt = NFS_SRVMAXDATA(nd);
1701
1702	/*
1703	 * siz is a "hint" of how much directory information (name, fileid,
1704	 * cookie) should be in the reply. At least one client "hints" 0,
1705	 * so I set it to cnt for that case. I also round it up to the
1706	 * next multiple of DIRBLKSIZ.
1707	 */
1708	if (siz == 0)
1709		siz = cnt;
1710	siz = ((siz + DIRBLKSIZ - 1) & ~(DIRBLKSIZ - 1));
1711
1712	if (nd->nd_flag & ND_NFSV4) {
1713		error = nfsrv_getattrbits(nd, &attrbits, NULL, NULL);
1714		if (error)
1715			goto nfsmout;
1716		NFSSET_ATTRBIT(&savbits, &attrbits);
1717		NFSCLRNOTFILLABLE_ATTRBIT(&attrbits);
1718		NFSZERO_ATTRBIT(&rderrbits);
1719		NFSSETBIT_ATTRBIT(&rderrbits, NFSATTRBIT_RDATTRERROR);
1720	} else {
1721		NFSZERO_ATTRBIT(&attrbits);
1722	}
1723	fullsiz = siz;
1724	nd->nd_repstat = getret = nfsvno_getattr(vp, &at, nd->nd_cred, p);
1725	if (!nd->nd_repstat) {
1726	    if (off && verf != at.na_filerev) {
1727		/*
1728		 * va_filerev is not sufficient as a cookie verifier,
1729		 * since it is not supposed to change when entries are
1730		 * removed/added unless that offset cookies returned to
1731		 * the client are no longer valid.
1732		 */
1733#if 0
1734		if (nd->nd_flag & ND_NFSV4) {
1735			nd->nd_repstat = NFSERR_NOTSAME;
1736		} else {
1737			nd->nd_repstat = NFSERR_BAD_COOKIE;
1738		}
1739#endif
1740	    } else if ((nd->nd_flag & ND_NFSV4) && off == 0 && verf != 0) {
1741		nd->nd_repstat = NFSERR_BAD_COOKIE;
1742	    }
1743	}
1744	if (!nd->nd_repstat && vp->v_type != VDIR)
1745		nd->nd_repstat = NFSERR_NOTDIR;
1746	if (!nd->nd_repstat && cnt == 0)
1747		nd->nd_repstat = NFSERR_TOOSMALL;
1748	if (!nd->nd_repstat)
1749		nd->nd_repstat = nfsvno_accchk(vp, VEXEC,
1750		    nd->nd_cred, exp, p, NFSACCCHK_NOOVERRIDE,
1751		    NFSACCCHK_VPISLOCKED, NULL);
1752	if (nd->nd_repstat) {
1753		vput(vp);
1754		if (nd->nd_flag & ND_NFSV3)
1755			nfsrv_postopattr(nd, getret, &at);
1756		return (0);
1757	}
1758
1759	MALLOC(rbuf, caddr_t, siz, M_TEMP, M_WAITOK);
1760again:
1761	eofflag = 0;
1762	if (cookies) {
1763		free((caddr_t)cookies, M_TEMP);
1764		cookies = NULL;
1765	}
1766
1767	iv.iov_base = rbuf;
1768	iv.iov_len = siz;
1769	io.uio_iov = &iv;
1770	io.uio_iovcnt = 1;
1771	io.uio_offset = (off_t)off;
1772	io.uio_resid = siz;
1773	io.uio_segflg = UIO_SYSSPACE;
1774	io.uio_rw = UIO_READ;
1775	io.uio_td = NULL;
1776	nd->nd_repstat = VOP_READDIR(vp, &io, nd->nd_cred, &eofflag, &ncookies,
1777	    &cookies);
1778	off = (u_int64_t)io.uio_offset;
1779	if (io.uio_resid)
1780		siz -= io.uio_resid;
1781
1782	getret = nfsvno_getattr(vp, &at, nd->nd_cred, p);
1783
1784	if (!cookies && !nd->nd_repstat)
1785		nd->nd_repstat = NFSERR_PERM;
1786	if (!nd->nd_repstat)
1787		nd->nd_repstat = getret;
1788	if (nd->nd_repstat) {
1789		vput(vp);
1790		if (cookies)
1791			free((caddr_t)cookies, M_TEMP);
1792		free((caddr_t)rbuf, M_TEMP);
1793		if (nd->nd_flag & ND_NFSV3)
1794			nfsrv_postopattr(nd, getret, &at);
1795		return (0);
1796	}
1797	/*
1798	 * If nothing read, return eof
1799	 * rpc reply
1800	 */
1801	if (siz == 0) {
1802		vput(vp);
1803		if (nd->nd_flag & ND_NFSV3)
1804			nfsrv_postopattr(nd, getret, &at);
1805		NFSM_BUILD(tl, u_int32_t *, 4 * NFSX_UNSIGNED);
1806		txdr_hyper(at.na_filerev, tl);
1807		tl += 2;
1808		*tl++ = newnfs_false;
1809		*tl = newnfs_true;
1810		free((caddr_t)cookies, M_TEMP);
1811		free((caddr_t)rbuf, M_TEMP);
1812		return (0);
1813	}
1814
1815	/*
1816	 * Check for degenerate cases of nothing useful read.
1817	 * If so go try again
1818	 */
1819	cpos = rbuf;
1820	cend = rbuf + siz;
1821	dp = (struct dirent *)cpos;
1822	cookiep = cookies;
1823
1824	/*
1825	 * For some reason FreeBSD's ufs_readdir() chooses to back the
1826	 * directory offset up to a block boundary, so it is necessary to
1827	 * skip over the records that precede the requested offset. This
1828	 * requires the assumption that file offset cookies monotonically
1829	 * increase.
1830	 */
1831	while (cpos < cend && ncookies > 0 &&
1832	  (dp->d_fileno == 0 || dp->d_type == DT_WHT ||
1833	   ((u_quad_t)(*cookiep)) <= toff ||
1834	   ((nd->nd_flag & ND_NFSV4) &&
1835	    ((dp->d_namlen == 1 && dp->d_name[0] == '.') ||
1836	     (dp->d_namlen==2 && dp->d_name[0]=='.' && dp->d_name[1]=='.'))))) {
1837		cpos += dp->d_reclen;
1838		dp = (struct dirent *)cpos;
1839		cookiep++;
1840		ncookies--;
1841	}
1842	if (cpos >= cend || ncookies == 0) {
1843		siz = fullsiz;
1844		toff = off;
1845		goto again;
1846	}
1847	NFSVOPUNLOCK(vp, 0, p);
1848
1849	/*
1850	 * Save this position, in case there is an error before one entry
1851	 * is created.
1852	 */
1853	mb0 = nd->nd_mb;
1854	bpos0 = nd->nd_bpos;
1855
1856	/*
1857	 * Fill in the first part of the reply.
1858	 * dirlen is the reply length in bytes and cannot exceed cnt.
1859	 * (Include the two booleans at the end of the reply in dirlen now,
1860	 *  so we recognize when we have exceeded cnt.)
1861	 */
1862	if (nd->nd_flag & ND_NFSV3) {
1863		dirlen = NFSX_V3POSTOPATTR + NFSX_VERF + 2 * NFSX_UNSIGNED;
1864		nfsrv_postopattr(nd, getret, &at);
1865	} else {
1866		dirlen = NFSX_VERF + 2 * NFSX_UNSIGNED;
1867	}
1868	NFSM_BUILD(tl, u_int32_t *, NFSX_VERF);
1869	txdr_hyper(at.na_filerev, tl);
1870
1871	/*
1872	 * Save this position, in case there is an empty reply needed.
1873	 */
1874	mb1 = nd->nd_mb;
1875	bpos1 = nd->nd_bpos;
1876
1877	/* Loop through the records and build reply */
1878	entrycnt = 0;
1879	while (cpos < cend && ncookies > 0 && dirlen < cnt) {
1880		nlen = dp->d_namlen;
1881		if (dp->d_fileno != 0 && dp->d_type != DT_WHT &&
1882		    nlen <= NFS_MAXNAMLEN &&
1883		    ((nd->nd_flag & ND_NFSV3) || nlen > 2 ||
1884		     (nlen==2 && (dp->d_name[0]!='.' || dp->d_name[1]!='.'))
1885		      || (nlen == 1 && dp->d_name[0] != '.'))) {
1886			/*
1887			 * Save the current position in the reply, in case
1888			 * this entry exceeds cnt.
1889			 */
1890			mb1 = nd->nd_mb;
1891			bpos1 = nd->nd_bpos;
1892
1893			/*
1894			 * For readdir_and_lookup get the vnode using
1895			 * the file number.
1896			 */
1897			nvp = NULL;
1898			refp = NULL;
1899			r = 0;
1900			if ((nd->nd_flag & ND_NFSV3) ||
1901			    NFSNONZERO_ATTRBIT(&savbits)) {
1902				if (nd->nd_flag & ND_NFSV4)
1903					refp = nfsv4root_getreferral(NULL,
1904					    vp, dp->d_fileno);
1905				if (refp == NULL) {
1906					if (usevget)
1907						r = VFS_VGET(vp->v_mount,
1908						    dp->d_fileno, LK_EXCLUSIVE,
1909						    &nvp);
1910					else
1911						r = EOPNOTSUPP;
1912					if (r == EOPNOTSUPP) {
1913						if (usevget) {
1914							usevget = 0;
1915							cn.cn_nameiop = LOOKUP;
1916							cn.cn_lkflags =
1917							    LK_EXCLUSIVE |
1918							    LK_RETRY;
1919							cn.cn_cred =
1920							    nd->nd_cred;
1921							cn.cn_thread = p;
1922						}
1923						cn.cn_nameptr = dp->d_name;
1924						cn.cn_namelen = nlen;
1925						cn.cn_flags = ISLASTCN |
1926						    NOFOLLOW | LOCKLEAF |
1927						    MPSAFE;
1928						if (nlen == 2 &&
1929						    dp->d_name[0] == '.' &&
1930						    dp->d_name[1] == '.')
1931							cn.cn_flags |=
1932							    ISDOTDOT;
1933						if (!VOP_ISLOCKED(vp))
1934							vn_lock(vp,
1935							    LK_EXCLUSIVE |
1936							    LK_RETRY);
1937						r = VOP_LOOKUP(vp, &nvp, &cn);
1938					}
1939				}
1940				if (!r) {
1941				    if (refp == NULL &&
1942					((nd->nd_flag & ND_NFSV3) ||
1943					 NFSNONZERO_ATTRBIT(&attrbits))) {
1944					r = nfsvno_getfh(nvp, &nfh, p);
1945					if (!r)
1946					    r = nfsvno_getattr(nvp, nvap,
1947						nd->nd_cred, p);
1948				    }
1949				} else {
1950				    nvp = NULL;
1951				}
1952				if (r) {
1953					if (!NFSISSET_ATTRBIT(&attrbits,
1954					    NFSATTRBIT_RDATTRERROR)) {
1955						if (nvp != NULL)
1956							vput(nvp);
1957						nd->nd_repstat = r;
1958						break;
1959					}
1960				}
1961			}
1962
1963			/*
1964			 * Build the directory record xdr
1965			 */
1966			if (nd->nd_flag & ND_NFSV3) {
1967				NFSM_BUILD(tl, u_int32_t *, 3 * NFSX_UNSIGNED);
1968				*tl++ = newnfs_true;
1969				*tl++ = 0;
1970				*tl = txdr_unsigned(dp->d_fileno);
1971				dirlen += nfsm_strtom(nd, dp->d_name, nlen);
1972				NFSM_BUILD(tl, u_int32_t *, 2 * NFSX_UNSIGNED);
1973				*tl++ = 0;
1974				*tl = txdr_unsigned(*cookiep);
1975				nfsrv_postopattr(nd, 0, nvap);
1976				dirlen += nfsm_fhtom(nd,(u_int8_t *)&nfh,0,1);
1977				dirlen += (5*NFSX_UNSIGNED+NFSX_V3POSTOPATTR);
1978				if (nvp != NULL)
1979					vput(nvp);
1980			} else {
1981				NFSM_BUILD(tl, u_int32_t *, 3 * NFSX_UNSIGNED);
1982				*tl++ = newnfs_true;
1983				*tl++ = 0;
1984				*tl = txdr_unsigned(*cookiep);
1985				dirlen += nfsm_strtom(nd, dp->d_name, nlen);
1986				if (nvp != NULL)
1987					NFSVOPUNLOCK(nvp, 0, p);
1988				if (refp != NULL) {
1989					dirlen += nfsrv_putreferralattr(nd,
1990					    &savbits, refp, 0,
1991					    &nd->nd_repstat);
1992					if (nd->nd_repstat) {
1993						if (nvp != NULL)
1994							vrele(nvp);
1995						break;
1996					}
1997				} else if (r) {
1998					dirlen += nfsvno_fillattr(nd, nvp, nvap,
1999					    &nfh, r, &rderrbits, nd->nd_cred,
2000					    p, isdgram, 0);
2001				} else {
2002					dirlen += nfsvno_fillattr(nd, nvp, nvap,
2003					    &nfh, r, &attrbits, nd->nd_cred,
2004					    p, isdgram, 0);
2005				}
2006				if (nvp != NULL)
2007					vrele(nvp);
2008				dirlen += (3 * NFSX_UNSIGNED);
2009			}
2010			if (dirlen <= cnt)
2011				entrycnt++;
2012		}
2013		cpos += dp->d_reclen;
2014		dp = (struct dirent *)cpos;
2015		cookiep++;
2016		ncookies--;
2017	}
2018	if (!usevget && VOP_ISLOCKED(vp))
2019		vput(vp);
2020	else
2021		vrele(vp);
2022
2023	/*
2024	 * If dirlen > cnt, we must strip off the last entry. If that
2025	 * results in an empty reply, report NFSERR_TOOSMALL.
2026	 */
2027	if (dirlen > cnt || nd->nd_repstat) {
2028		if (!nd->nd_repstat && entrycnt == 0)
2029			nd->nd_repstat = NFSERR_TOOSMALL;
2030		if (nd->nd_repstat)
2031			newnfs_trimtrailing(nd, mb0, bpos0);
2032		else
2033			newnfs_trimtrailing(nd, mb1, bpos1);
2034		eofflag = 0;
2035	} else if (cpos < cend)
2036		eofflag = 0;
2037	if (!nd->nd_repstat) {
2038		NFSM_BUILD(tl, u_int32_t *, 2 * NFSX_UNSIGNED);
2039		*tl++ = newnfs_false;
2040		if (eofflag)
2041			*tl = newnfs_true;
2042		else
2043			*tl = newnfs_false;
2044	}
2045	FREE((caddr_t)cookies, M_TEMP);
2046	FREE((caddr_t)rbuf, M_TEMP);
2047	return (0);
2048nfsmout:
2049	vput(vp);
2050	return (error);
2051}
2052
2053/*
2054 * Get the settable attributes out of the mbuf list.
2055 * (Return 0 or EBADRPC)
2056 */
2057int
2058nfsrv_sattr(struct nfsrv_descript *nd, struct nfsvattr *nvap,
2059    nfsattrbit_t *attrbitp, NFSACL_T *aclp, struct thread *p)
2060{
2061	u_int32_t *tl;
2062	struct nfsv2_sattr *sp;
2063	struct timeval curtime;
2064	int error = 0, toclient = 0;
2065
2066	switch (nd->nd_flag & (ND_NFSV2 | ND_NFSV3 | ND_NFSV4)) {
2067	case ND_NFSV2:
2068		NFSM_DISSECT(sp, struct nfsv2_sattr *, NFSX_V2SATTR);
2069		/*
2070		 * Some old clients didn't fill in the high order 16bits.
2071		 * --> check the low order 2 bytes for 0xffff
2072		 */
2073		if ((fxdr_unsigned(int, sp->sa_mode) & 0xffff) != 0xffff)
2074			nvap->na_mode = nfstov_mode(sp->sa_mode);
2075		if (sp->sa_uid != newnfs_xdrneg1)
2076			nvap->na_uid = fxdr_unsigned(uid_t, sp->sa_uid);
2077		if (sp->sa_gid != newnfs_xdrneg1)
2078			nvap->na_gid = fxdr_unsigned(gid_t, sp->sa_gid);
2079		if (sp->sa_size != newnfs_xdrneg1)
2080			nvap->na_size = fxdr_unsigned(u_quad_t, sp->sa_size);
2081		if (sp->sa_atime.nfsv2_sec != newnfs_xdrneg1) {
2082#ifdef notyet
2083			fxdr_nfsv2time(&sp->sa_atime, &nvap->na_atime);
2084#else
2085			nvap->na_atime.tv_sec =
2086				fxdr_unsigned(u_int32_t,sp->sa_atime.nfsv2_sec);
2087			nvap->na_atime.tv_nsec = 0;
2088#endif
2089		}
2090		if (sp->sa_mtime.nfsv2_sec != newnfs_xdrneg1)
2091			fxdr_nfsv2time(&sp->sa_mtime, &nvap->na_mtime);
2092		break;
2093	case ND_NFSV3:
2094		NFSM_DISSECT(tl, u_int32_t *, NFSX_UNSIGNED);
2095		if (*tl == newnfs_true) {
2096			NFSM_DISSECT(tl, u_int32_t *, NFSX_UNSIGNED);
2097			nvap->na_mode = nfstov_mode(*tl);
2098		}
2099		NFSM_DISSECT(tl, u_int32_t *, NFSX_UNSIGNED);
2100		if (*tl == newnfs_true) {
2101			NFSM_DISSECT(tl, u_int32_t *, NFSX_UNSIGNED);
2102			nvap->na_uid = fxdr_unsigned(uid_t, *tl);
2103		}
2104		NFSM_DISSECT(tl, u_int32_t *, NFSX_UNSIGNED);
2105		if (*tl == newnfs_true) {
2106			NFSM_DISSECT(tl, u_int32_t *, NFSX_UNSIGNED);
2107			nvap->na_gid = fxdr_unsigned(gid_t, *tl);
2108		}
2109		NFSM_DISSECT(tl, u_int32_t *, NFSX_UNSIGNED);
2110		if (*tl == newnfs_true) {
2111			NFSM_DISSECT(tl, u_int32_t *, 2 * NFSX_UNSIGNED);
2112			nvap->na_size = fxdr_hyper(tl);
2113		}
2114		NFSM_DISSECT(tl, u_int32_t *, NFSX_UNSIGNED);
2115		switch (fxdr_unsigned(int, *tl)) {
2116		case NFSV3SATTRTIME_TOCLIENT:
2117			NFSM_DISSECT(tl, u_int32_t *, 2 * NFSX_UNSIGNED);
2118			fxdr_nfsv3time(tl, &nvap->na_atime);
2119			toclient = 1;
2120			break;
2121		case NFSV3SATTRTIME_TOSERVER:
2122			NFSGETTIME(&curtime);
2123			nvap->na_atime.tv_sec = curtime.tv_sec;
2124			nvap->na_atime.tv_nsec = curtime.tv_usec * 1000;
2125			nvap->na_vaflags |= VA_UTIMES_NULL;
2126			break;
2127		};
2128		NFSM_DISSECT(tl, u_int32_t *, NFSX_UNSIGNED);
2129		switch (fxdr_unsigned(int, *tl)) {
2130		case NFSV3SATTRTIME_TOCLIENT:
2131			NFSM_DISSECT(tl, u_int32_t *, 2 * NFSX_UNSIGNED);
2132			fxdr_nfsv3time(tl, &nvap->na_mtime);
2133			nvap->na_vaflags &= ~VA_UTIMES_NULL;
2134			break;
2135		case NFSV3SATTRTIME_TOSERVER:
2136			NFSGETTIME(&curtime);
2137			nvap->na_mtime.tv_sec = curtime.tv_sec;
2138			nvap->na_mtime.tv_nsec = curtime.tv_usec * 1000;
2139			if (!toclient)
2140				nvap->na_vaflags |= VA_UTIMES_NULL;
2141			break;
2142		};
2143		break;
2144	case ND_NFSV4:
2145		error = nfsv4_sattr(nd, nvap, attrbitp, aclp, p);
2146	};
2147nfsmout:
2148	return (error);
2149}
2150
2151/*
2152 * Handle the setable attributes for V4.
2153 * Returns NFSERR_BADXDR if it can't be parsed, 0 otherwise.
2154 */
2155int
2156nfsv4_sattr(struct nfsrv_descript *nd, struct nfsvattr *nvap,
2157    nfsattrbit_t *attrbitp, NFSACL_T *aclp, struct thread *p)
2158{
2159	u_int32_t *tl;
2160	int attrsum = 0;
2161	int i, j;
2162	int error, attrsize, bitpos, aclsize, aceerr, retnotsup = 0;
2163	int toclient = 0;
2164	u_char *cp, namestr[NFSV4_SMALLSTR + 1];
2165	uid_t uid;
2166	gid_t gid;
2167	struct timeval curtime;
2168
2169	error = nfsrv_getattrbits(nd, attrbitp, NULL, &retnotsup);
2170	if (error)
2171		return (error);
2172	NFSM_DISSECT(tl, u_int32_t *, NFSX_UNSIGNED);
2173	attrsize = fxdr_unsigned(int, *tl);
2174
2175	/*
2176	 * Loop around getting the setable attributes. If an unsupported
2177	 * one is found, set nd_repstat == NFSERR_ATTRNOTSUPP and return.
2178	 */
2179	if (retnotsup) {
2180		nd->nd_repstat = NFSERR_ATTRNOTSUPP;
2181		bitpos = NFSATTRBIT_MAX;
2182	} else {
2183		bitpos = 0;
2184	}
2185	for (; bitpos < NFSATTRBIT_MAX; bitpos++) {
2186	    if (attrsum > attrsize) {
2187		error = NFSERR_BADXDR;
2188		goto nfsmout;
2189	    }
2190	    if (NFSISSET_ATTRBIT(attrbitp, bitpos))
2191		switch (bitpos) {
2192		case NFSATTRBIT_SIZE:
2193			NFSM_DISSECT(tl, u_int32_t *, NFSX_HYPER);
2194			nvap->na_size = fxdr_hyper(tl);
2195			attrsum += NFSX_HYPER;
2196			break;
2197		case NFSATTRBIT_ACL:
2198			error = nfsrv_dissectacl(nd, aclp, &aceerr, &aclsize,
2199			    p);
2200			if (error)
2201				goto nfsmout;
2202			if (aceerr && !nd->nd_repstat)
2203				nd->nd_repstat = NFSERR_ATTRNOTSUPP;
2204			attrsum += aclsize;
2205			break;
2206		case NFSATTRBIT_ARCHIVE:
2207			NFSM_DISSECT(tl, u_int32_t *, NFSX_UNSIGNED);
2208			if (!nd->nd_repstat)
2209				nd->nd_repstat = NFSERR_ATTRNOTSUPP;
2210			attrsum += NFSX_UNSIGNED;
2211			break;
2212		case NFSATTRBIT_HIDDEN:
2213			NFSM_DISSECT(tl, u_int32_t *, NFSX_UNSIGNED);
2214			if (!nd->nd_repstat)
2215				nd->nd_repstat = NFSERR_ATTRNOTSUPP;
2216			attrsum += NFSX_UNSIGNED;
2217			break;
2218		case NFSATTRBIT_MIMETYPE:
2219			NFSM_DISSECT(tl, u_int32_t *, NFSX_UNSIGNED);
2220			i = fxdr_unsigned(int, *tl);
2221			error = nfsm_advance(nd, NFSM_RNDUP(i), -1);
2222			if (error)
2223				goto nfsmout;
2224			if (!nd->nd_repstat)
2225				nd->nd_repstat = NFSERR_ATTRNOTSUPP;
2226			attrsum += (NFSX_UNSIGNED + NFSM_RNDUP(i));
2227			break;
2228		case NFSATTRBIT_MODE:
2229			NFSM_DISSECT(tl, u_int32_t *, NFSX_UNSIGNED);
2230			nvap->na_mode = nfstov_mode(*tl);
2231			attrsum += NFSX_UNSIGNED;
2232			break;
2233		case NFSATTRBIT_OWNER:
2234			NFSM_DISSECT(tl, u_int32_t *, NFSX_UNSIGNED);
2235			j = fxdr_unsigned(int, *tl);
2236			if (j < 0)
2237				return (NFSERR_BADXDR);
2238			if (j > NFSV4_SMALLSTR)
2239				cp = malloc(j + 1, M_NFSSTRING, M_WAITOK);
2240			else
2241				cp = namestr;
2242			error = nfsrv_mtostr(nd, cp, j);
2243			if (error) {
2244				if (j > NFSV4_SMALLSTR)
2245					free(cp, M_NFSSTRING);
2246				return (error);
2247			}
2248			if (!nd->nd_repstat) {
2249				nd->nd_repstat = nfsv4_strtouid(cp,j,&uid,p);
2250				if (!nd->nd_repstat)
2251					nvap->na_uid = uid;
2252			}
2253			if (j > NFSV4_SMALLSTR)
2254				free(cp, M_NFSSTRING);
2255			attrsum += (NFSX_UNSIGNED + NFSM_RNDUP(j));
2256			break;
2257		case NFSATTRBIT_OWNERGROUP:
2258			NFSM_DISSECT(tl, u_int32_t *, NFSX_UNSIGNED);
2259			j = fxdr_unsigned(int, *tl);
2260			if (j < 0)
2261				return (NFSERR_BADXDR);
2262			if (j > NFSV4_SMALLSTR)
2263				cp = malloc(j + 1, M_NFSSTRING, M_WAITOK);
2264			else
2265				cp = namestr;
2266			error = nfsrv_mtostr(nd, cp, j);
2267			if (error) {
2268				if (j > NFSV4_SMALLSTR)
2269					free(cp, M_NFSSTRING);
2270				return (error);
2271			}
2272			if (!nd->nd_repstat) {
2273				nd->nd_repstat = nfsv4_strtogid(cp,j,&gid,p);
2274				if (!nd->nd_repstat)
2275					nvap->na_gid = gid;
2276			}
2277			if (j > NFSV4_SMALLSTR)
2278				free(cp, M_NFSSTRING);
2279			attrsum += (NFSX_UNSIGNED + NFSM_RNDUP(j));
2280			break;
2281		case NFSATTRBIT_SYSTEM:
2282			NFSM_DISSECT(tl, u_int32_t *, NFSX_UNSIGNED);
2283			if (!nd->nd_repstat)
2284				nd->nd_repstat = NFSERR_ATTRNOTSUPP;
2285			attrsum += NFSX_UNSIGNED;
2286			break;
2287		case NFSATTRBIT_TIMEACCESSSET:
2288			NFSM_DISSECT(tl, u_int32_t *, NFSX_UNSIGNED);
2289			attrsum += NFSX_UNSIGNED;
2290			if (fxdr_unsigned(int, *tl)==NFSV4SATTRTIME_TOCLIENT) {
2291			    NFSM_DISSECT(tl, u_int32_t *, NFSX_V4TIME);
2292			    fxdr_nfsv4time(tl, &nvap->na_atime);
2293			    toclient = 1;
2294			    attrsum += NFSX_V4TIME;
2295			} else {
2296			    NFSGETTIME(&curtime);
2297			    nvap->na_atime.tv_sec = curtime.tv_sec;
2298			    nvap->na_atime.tv_nsec = curtime.tv_usec * 1000;
2299			    nvap->na_vaflags |= VA_UTIMES_NULL;
2300			}
2301			break;
2302		case NFSATTRBIT_TIMEBACKUP:
2303			NFSM_DISSECT(tl, u_int32_t *, NFSX_V4TIME);
2304			if (!nd->nd_repstat)
2305				nd->nd_repstat = NFSERR_ATTRNOTSUPP;
2306			attrsum += NFSX_V4TIME;
2307			break;
2308		case NFSATTRBIT_TIMECREATE:
2309			NFSM_DISSECT(tl, u_int32_t *, NFSX_V4TIME);
2310			if (!nd->nd_repstat)
2311				nd->nd_repstat = NFSERR_ATTRNOTSUPP;
2312			attrsum += NFSX_V4TIME;
2313			break;
2314		case NFSATTRBIT_TIMEMODIFYSET:
2315			NFSM_DISSECT(tl, u_int32_t *, NFSX_UNSIGNED);
2316			attrsum += NFSX_UNSIGNED;
2317			if (fxdr_unsigned(int, *tl)==NFSV4SATTRTIME_TOCLIENT) {
2318			    NFSM_DISSECT(tl, u_int32_t *, NFSX_V4TIME);
2319			    fxdr_nfsv4time(tl, &nvap->na_mtime);
2320			    nvap->na_vaflags &= ~VA_UTIMES_NULL;
2321			    attrsum += NFSX_V4TIME;
2322			} else {
2323			    NFSGETTIME(&curtime);
2324			    nvap->na_mtime.tv_sec = curtime.tv_sec;
2325			    nvap->na_mtime.tv_nsec = curtime.tv_usec * 1000;
2326			    if (!toclient)
2327				nvap->na_vaflags |= VA_UTIMES_NULL;
2328			}
2329			break;
2330		default:
2331			nd->nd_repstat = NFSERR_ATTRNOTSUPP;
2332			/*
2333			 * set bitpos so we drop out of the loop.
2334			 */
2335			bitpos = NFSATTRBIT_MAX;
2336			break;
2337		};
2338	}
2339
2340	/*
2341	 * some clients pad the attrlist, so we need to skip over the
2342	 * padding.
2343	 */
2344	if (attrsum > attrsize) {
2345		error = NFSERR_BADXDR;
2346	} else {
2347		attrsize = NFSM_RNDUP(attrsize);
2348		if (attrsum < attrsize)
2349			error = nfsm_advance(nd, attrsize - attrsum, -1);
2350	}
2351nfsmout:
2352	return (error);
2353}
2354
2355/*
2356 * Check/setup export credentials.
2357 */
2358int
2359nfsd_excred(struct nfsrv_descript *nd, struct nfsexstuff *exp,
2360    struct ucred *credanon)
2361{
2362	int error = 0;
2363
2364	/*
2365	 * Check/setup credentials.
2366	 */
2367	if (nd->nd_flag & ND_GSS)
2368		exp->nes_exflag &= ~MNT_EXPORTANON;
2369
2370	/*
2371	 * Check to see if the operation is allowed for this security flavor.
2372	 * RFC2623 suggests that the NFSv3 Fsinfo RPC be allowed to
2373	 * AUTH_NONE or AUTH_SYS for file systems requiring RPCSEC_GSS.
2374	 * Also, allow Secinfo, so that it can acquire the correct flavor(s).
2375	 */
2376	if (nfsvno_testexp(nd, exp) &&
2377	    nd->nd_procnum != NFSV4OP_SECINFO &&
2378	    nd->nd_procnum != NFSPROC_FSINFO) {
2379		if (nd->nd_flag & ND_NFSV4)
2380			error = NFSERR_WRONGSEC;
2381		else
2382			error = (NFSERR_AUTHERR | AUTH_TOOWEAK);
2383		return (error);
2384	}
2385
2386	/*
2387	 * Check to see if the file system is exported V4 only.
2388	 */
2389	if (NFSVNO_EXV4ONLY(exp) && !(nd->nd_flag & ND_NFSV4))
2390		return (NFSERR_PROGNOTV4);
2391
2392	/*
2393	 * Now, map the user credentials.
2394	 * (Note that ND_AUTHNONE will only be set for an NFSv3
2395	 *  Fsinfo RPC. If set for anything else, this code might need
2396	 *  to change.)
2397	 */
2398	if (NFSVNO_EXPORTED(exp) &&
2399	    ((!(nd->nd_flag & ND_GSS) && nd->nd_cred->cr_uid == 0) ||
2400	     NFSVNO_EXPORTANON(exp) ||
2401	     (nd->nd_flag & ND_AUTHNONE))) {
2402		nd->nd_cred->cr_uid = credanon->cr_uid;
2403		nd->nd_cred->cr_gid = credanon->cr_gid;
2404		crsetgroups(nd->nd_cred, credanon->cr_ngroups,
2405		    credanon->cr_groups);
2406	}
2407	return (0);
2408}
2409
2410/*
2411 * Check exports.
2412 */
2413int
2414nfsvno_checkexp(struct mount *mp, struct sockaddr *nam, struct nfsexstuff *exp,
2415    struct ucred **credp)
2416{
2417	int i, error, *secflavors;
2418
2419	error = VFS_CHECKEXP(mp, nam, &exp->nes_exflag, credp,
2420	    &exp->nes_numsecflavor, &secflavors);
2421	if (error) {
2422		if (nfs_rootfhset) {
2423			exp->nes_exflag = 0;
2424			exp->nes_numsecflavor = 0;
2425			error = 0;
2426		}
2427	} else {
2428		/* Copy the security flavors. */
2429		for (i = 0; i < exp->nes_numsecflavor; i++)
2430			exp->nes_secflavors[i] = secflavors[i];
2431	}
2432	return (error);
2433}
2434
2435/*
2436 * Get a vnode for a file handle and export stuff.
2437 */
2438int
2439nfsvno_fhtovp(struct mount *mp, fhandle_t *fhp, struct sockaddr *nam,
2440    struct vnode **vpp, struct nfsexstuff *exp, struct ucred **credp)
2441{
2442	int i, error, *secflavors;
2443
2444	*credp = NULL;
2445	exp->nes_numsecflavor = 0;
2446	error = VFS_FHTOVP(mp, &fhp->fh_fid, vpp);
2447	if (error != 0)
2448		/* Make sure the server replies ESTALE to the client. */
2449		error = ESTALE;
2450	if (nam && !error) {
2451		error = VFS_CHECKEXP(mp, nam, &exp->nes_exflag, credp,
2452		    &exp->nes_numsecflavor, &secflavors);
2453		if (error) {
2454			if (nfs_rootfhset) {
2455				exp->nes_exflag = 0;
2456				exp->nes_numsecflavor = 0;
2457				error = 0;
2458			} else {
2459				vput(*vpp);
2460			}
2461		} else {
2462			/* Copy the security flavors. */
2463			for (i = 0; i < exp->nes_numsecflavor; i++)
2464				exp->nes_secflavors[i] = secflavors[i];
2465		}
2466	}
2467	return (error);
2468}
2469
2470/*
2471 * Do the pathconf vnode op.
2472 */
2473int
2474nfsvno_pathconf(struct vnode *vp, int flag, register_t *retf,
2475    struct ucred *cred, struct thread *p)
2476{
2477	int error;
2478
2479	error = VOP_PATHCONF(vp, flag, retf);
2480	return (error);
2481}
2482
2483/*
2484 * nfsd_fhtovp() - convert a fh to a vnode ptr
2485 * 	- look up fsid in mount list (if not found ret error)
2486 *	- get vp and export rights by calling nfsvno_fhtovp()
2487 *	- if cred->cr_uid == 0 or MNT_EXPORTANON set it to credanon
2488 *	  for AUTH_SYS
2489 * Also handle getting the Giant lock for the file system,
2490 * as required:
2491 * - if same mount point as *mpp
2492 *       do nothing
2493 *   else if *mpp == NULL
2494 *       if already locked
2495 *           leave it locked
2496 *       else
2497 *           call VFS_LOCK_GIANT()
2498 *   else
2499 *       if already locked
2500 *            unlock Giant
2501 *       call VFS_LOCK_GIANT()
2502 */
2503void
2504nfsd_fhtovp(struct nfsrv_descript *nd, struct nfsrvfh *nfp,
2505    struct vnode **vpp, struct nfsexstuff *exp,
2506    struct mount **mpp, int startwrite, struct thread *p)
2507{
2508	struct mount *mp;
2509	struct ucred *credanon;
2510	fhandle_t *fhp;
2511
2512	fhp = (fhandle_t *)nfp->nfsrvfh_data;
2513	/*
2514	 * Check for the special case of the nfsv4root_fh.
2515	 */
2516	mp = vfs_getvfs(&fhp->fh_fsid);
2517	if (!mp) {
2518		*vpp = NULL;
2519		nd->nd_repstat = ESTALE;
2520		if (*mpp && exp->nes_vfslocked)
2521			VFS_UNLOCK_GIANT(*mpp);
2522		*mpp = NULL;
2523		exp->nes_vfslocked = 0;
2524		return;
2525	}
2526
2527	/*
2528	 * Now, handle Giant for the file system.
2529	 */
2530	if (*mpp != NULL && *mpp != mp && exp->nes_vfslocked) {
2531		VFS_UNLOCK_GIANT(*mpp);
2532		exp->nes_vfslocked = 0;
2533	}
2534	if (!exp->nes_vfslocked && *mpp != mp)
2535		exp->nes_vfslocked = VFS_LOCK_GIANT(mp);
2536
2537	*mpp = mp;
2538	if (startwrite)
2539		vn_start_write(NULL, mpp, V_WAIT);
2540
2541	nd->nd_repstat = nfsvno_fhtovp(mp, fhp, nd->nd_nam, vpp, exp,
2542	    &credanon);
2543
2544	/*
2545	 * For NFSv4 without a pseudo root fs, unexported file handles
2546	 * can be returned, so that Lookup works everywhere.
2547	 */
2548	if (!nd->nd_repstat && exp->nes_exflag == 0 &&
2549	    !(nd->nd_flag & ND_NFSV4)) {
2550		vput(*vpp);
2551		nd->nd_repstat = EACCES;
2552	}
2553
2554	/*
2555	 * Personally, I've never seen any point in requiring a
2556	 * reserved port#, since only in the rare case where the
2557	 * clients are all boxes with secure system priviledges,
2558	 * does it provide any enhanced security, but... some people
2559	 * believe it to be useful and keep putting this code back in.
2560	 * (There is also some "security checker" out there that
2561	 *  complains if the nfs server doesn't enforce this.)
2562	 * However, note the following:
2563	 * RFC3530 (NFSv4) specifies that a reserved port# not be
2564	 *	required.
2565	 * RFC2623 recommends that, if a reserved port# is checked for,
2566	 *	that there be a way to turn that off--> ifdef'd.
2567	 */
2568#ifdef NFS_REQRSVPORT
2569	if (!nd->nd_repstat) {
2570		struct sockaddr_in *saddr;
2571		struct sockaddr_in6 *saddr6;
2572
2573		saddr = NFSSOCKADDR(nd->nd_nam, struct sockaddr_in *);
2574		saddr6 = NFSSOCKADDR(nd->nd_nam, struct sockaddr_in6 *);
2575		if (!(nd->nd_flag & ND_NFSV4) &&
2576		    ((saddr->sin_family == AF_INET &&
2577		      ntohs(saddr->sin_port) >= IPPORT_RESERVED) ||
2578		     (saddr6->sin6_family == AF_INET6 &&
2579		      ntohs(saddr6->sin6_port) >= IPPORT_RESERVED))) {
2580			vput(*vpp);
2581			nd->nd_repstat = (NFSERR_AUTHERR | AUTH_TOOWEAK);
2582		}
2583	}
2584#endif	/* NFS_REQRSVPORT */
2585
2586	/*
2587	 * Check/setup credentials.
2588	 */
2589	if (!nd->nd_repstat) {
2590		nd->nd_saveduid = nd->nd_cred->cr_uid;
2591		nd->nd_repstat = nfsd_excred(nd, exp, credanon);
2592		if (nd->nd_repstat)
2593			vput(*vpp);
2594	}
2595	if (credanon != NULL)
2596		crfree(credanon);
2597	if (nd->nd_repstat) {
2598		if (startwrite)
2599			vn_finished_write(mp);
2600		if (exp->nes_vfslocked) {
2601			VFS_UNLOCK_GIANT(mp);
2602			exp->nes_vfslocked = 0;
2603		}
2604		vfs_rel(mp);
2605		*vpp = NULL;
2606		*mpp = NULL;
2607	} else {
2608		vfs_rel(mp);
2609	}
2610}
2611
2612/*
2613 * glue for fp.
2614 */
2615int
2616fp_getfvp(struct thread *p, int fd, struct file **fpp, struct vnode **vpp)
2617{
2618	struct filedesc *fdp;
2619	struct file *fp;
2620
2621	fdp = p->td_proc->p_fd;
2622	if (fd >= fdp->fd_nfiles ||
2623	    (fp = fdp->fd_ofiles[fd]) == NULL)
2624		return (EBADF);
2625	*fpp = fp;
2626	return (0);
2627}
2628
2629/*
2630 * Called from nfssvc() to update the exports list. Just call
2631 * vfs_export(). This has to be done, since the v4 root fake fs isn't
2632 * in the mount list.
2633 */
2634int
2635nfsrv_v4rootexport(void *argp, struct ucred *cred, struct thread *p)
2636{
2637	struct nfsex_args *nfsexargp = (struct nfsex_args *)argp;
2638	int error;
2639	struct nameidata nd;
2640	fhandle_t fh;
2641
2642	error = vfs_export(&nfsv4root_mnt, &nfsexargp->export);
2643	if ((nfsexargp->export.ex_flags & MNT_DELEXPORT)) {
2644		nfs_rootfhset = 0;
2645		nfsv4root_set = 0;
2646	} else if (error == 0) {
2647		if (nfsexargp->fspec == NULL)
2648			return (EPERM);
2649		/*
2650		 * If fspec != NULL, this is the v4root path.
2651		 */
2652		NDINIT(&nd, LOOKUP, FOLLOW | MPSAFE, UIO_USERSPACE,
2653		    nfsexargp->fspec, p);
2654		if ((error = namei(&nd)) != 0)
2655			return (error);
2656		error = nfsvno_getfh(nd.ni_vp, &fh, p);
2657		vrele(nd.ni_vp);
2658		if (!error) {
2659			nfs_rootfh.nfsrvfh_len = NFSX_MYFH;
2660			NFSBCOPY((caddr_t)&fh,
2661			    nfs_rootfh.nfsrvfh_data,
2662			    sizeof (fhandle_t));
2663			nfs_rootfhset = 1;
2664		}
2665	}
2666	return (error);
2667}
2668
2669/*
2670 * Get the tcp socket sequence numbers we need.
2671 * (Maybe this should be moved to the tcp sources?)
2672 */
2673int
2674nfsrv_getsocksndseq(struct socket *so, tcp_seq *maxp, tcp_seq *unap)
2675{
2676	struct inpcb *inp;
2677	struct tcpcb *tp;
2678
2679	inp = sotoinpcb(so);
2680	KASSERT(inp != NULL, ("nfsrv_getsocksndseq: inp == NULL"));
2681	INP_RLOCK(inp);
2682	if (inp->inp_flags & (INP_TIMEWAIT | INP_DROPPED)) {
2683		INP_RUNLOCK(inp);
2684		return (EPIPE);
2685	}
2686	tp = intotcpcb(inp);
2687	if (tp->t_state != TCPS_ESTABLISHED) {
2688		INP_RUNLOCK(inp);
2689		return (EPIPE);
2690	}
2691	*maxp = tp->snd_max;
2692	*unap = tp->snd_una;
2693	INP_RUNLOCK(inp);
2694	return (0);
2695}
2696
2697/*
2698 * This function needs to test to see if the system is near its limit
2699 * for memory allocation via malloc() or mget() and return True iff
2700 * either of these resources are near their limit.
2701 * XXX (For now, this is just a stub.)
2702 */
2703int nfsrv_testmalloclimit = 0;
2704int
2705nfsrv_mallocmget_limit(void)
2706{
2707	static int printmesg = 0;
2708	static int testval = 1;
2709
2710	if (nfsrv_testmalloclimit && (testval++ % 1000) == 0) {
2711		if ((printmesg++ % 100) == 0)
2712			printf("nfsd: malloc/mget near limit\n");
2713		return (1);
2714	}
2715	return (0);
2716}
2717
2718/*
2719 * BSD specific initialization of a mount point.
2720 */
2721void
2722nfsd_mntinit(void)
2723{
2724	static int inited = 0;
2725
2726	if (inited)
2727		return;
2728	inited = 1;
2729	nfsv4root_mnt.mnt_flag = (MNT_RDONLY | MNT_EXPORTED);
2730	TAILQ_INIT(&nfsv4root_mnt.mnt_nvnodelist);
2731	nfsv4root_mnt.mnt_export = NULL;
2732	TAILQ_INIT(&nfsv4root_opt);
2733	TAILQ_INIT(&nfsv4root_newopt);
2734	nfsv4root_mnt.mnt_opt = &nfsv4root_opt;
2735	nfsv4root_mnt.mnt_optnew = &nfsv4root_newopt;
2736	nfsv4root_mnt.mnt_nvnodelistsize = 0;
2737}
2738
2739/*
2740 * Get a vnode for a file handle, without checking exports, etc.
2741 */
2742struct vnode *
2743nfsvno_getvp(fhandle_t *fhp)
2744{
2745	struct mount *mp;
2746	struct vnode *vp;
2747	int error;
2748
2749	mp = vfs_getvfs(&fhp->fh_fsid);
2750	if (mp == NULL)
2751		return (NULL);
2752	error = VFS_FHTOVP(mp, &fhp->fh_fid, &vp);
2753	if (error)
2754		return (NULL);
2755	return (vp);
2756}
2757
2758/*
2759 * Check to see it a byte range lock held by a process running
2760 * locally on the server conflicts with the new lock.
2761 */
2762int
2763nfsvno_localconflict(struct vnode *vp, int ftype, u_int64_t first,
2764    u_int64_t end, struct nfslockconflict *cfp, struct thread *td)
2765{
2766	int error;
2767	struct flock fl;
2768
2769	if (!nfsrv_dolocallocks)
2770		return (0);
2771	fl.l_whence = SEEK_SET;
2772	fl.l_type = ftype;
2773	fl.l_start = (off_t)first;
2774	if (end == NFS64BITSSET)
2775		fl.l_len = 0;
2776	else
2777		fl.l_len = (off_t)(end - first);
2778	/*
2779	 * For FreeBSD8, the l_pid and l_sysid must be set to the same
2780	 * values for all calls, so that all locks will be held by the
2781	 * nfsd server. (The nfsd server handles conflicts between the
2782	 * various clients.)
2783	 * Since an NFSv4 lockowner is a ClientID plus an array of up to 1024
2784	 * bytes, so it can't be put in l_sysid.
2785	 */
2786	if (nfsv4_sysid == 0)
2787		nfsv4_sysid = nlm_acquire_next_sysid();
2788	fl.l_pid = (pid_t)0;
2789	fl.l_sysid = (int)nfsv4_sysid;
2790
2791	NFSVOPUNLOCK(vp, 0, td);
2792	error = VOP_ADVLOCK(vp, (caddr_t)td->td_proc, F_GETLK, &fl,
2793	    (F_POSIX | F_REMOTE));
2794	NFSVOPLOCK(vp, LK_EXCLUSIVE | LK_RETRY, td);
2795	if (error)
2796		return (error);
2797	if (fl.l_type == F_UNLCK)
2798		return (0);
2799	if (cfp != NULL) {
2800		cfp->cl_clientid.lval[0] = cfp->cl_clientid.lval[1] = 0;
2801		cfp->cl_first = (u_int64_t)fl.l_start;
2802		if (fl.l_len == 0)
2803			cfp->cl_end = NFS64BITSSET;
2804		else
2805			cfp->cl_end = (u_int64_t)
2806			    (fl.l_start + fl.l_len);
2807		if (fl.l_type == F_WRLCK)
2808			cfp->cl_flags = NFSLCK_WRITE;
2809		else
2810			cfp->cl_flags = NFSLCK_READ;
2811		sprintf(cfp->cl_owner, "LOCALID%d", fl.l_pid);
2812		cfp->cl_ownerlen = strlen(cfp->cl_owner);
2813		return (NFSERR_DENIED);
2814	}
2815	return (NFSERR_INVAL);
2816}
2817
2818/*
2819 * Do a local VOP_ADVLOCK().
2820 */
2821int
2822nfsvno_advlock(struct vnode *vp, int ftype, u_int64_t first,
2823    u_int64_t end, struct thread *td)
2824{
2825	int error;
2826	struct flock fl;
2827	u_int64_t tlen;
2828
2829	if (!nfsrv_dolocallocks)
2830		return (0);
2831	fl.l_whence = SEEK_SET;
2832	fl.l_type = ftype;
2833	fl.l_start = (off_t)first;
2834	if (end == NFS64BITSSET) {
2835		fl.l_len = 0;
2836	} else {
2837		tlen = end - first;
2838		fl.l_len = (off_t)tlen;
2839	}
2840	/*
2841	 * For FreeBSD8, the l_pid and l_sysid must be set to the same
2842	 * values for all calls, so that all locks will be held by the
2843	 * nfsd server. (The nfsd server handles conflicts between the
2844	 * various clients.)
2845	 * Since an NFSv4 lockowner is a ClientID plus an array of up to 1024
2846	 * bytes, so it can't be put in l_sysid.
2847	 */
2848	if (nfsv4_sysid == 0)
2849		nfsv4_sysid = nlm_acquire_next_sysid();
2850	fl.l_pid = (pid_t)0;
2851	fl.l_sysid = (int)nfsv4_sysid;
2852
2853	NFSVOPUNLOCK(vp, 0, td);
2854	error = VOP_ADVLOCK(vp, (caddr_t)td->td_proc, F_SETLK, &fl,
2855	    (F_POSIX | F_REMOTE));
2856	NFSVOPLOCK(vp, LK_EXCLUSIVE | LK_RETRY, td);
2857	return (error);
2858}
2859
2860/*
2861 * Unlock an underlying local file system.
2862 */
2863void
2864nfsvno_unlockvfs(struct mount *mp)
2865{
2866
2867	VFS_UNLOCK_GIANT(mp);
2868}
2869
2870/*
2871 * Lock an underlying file system, as required, and return
2872 * whether or not it is locked.
2873 */
2874int
2875nfsvno_lockvfs(struct mount *mp)
2876{
2877	int ret;
2878
2879	ret = VFS_LOCK_GIANT(mp);
2880	return (ret);
2881}
2882
2883/*
2884 * Check the nfsv4 root exports.
2885 */
2886int
2887nfsvno_v4rootexport(struct nfsrv_descript *nd)
2888{
2889	struct ucred *credanon;
2890	int exflags, error, numsecflavor, *secflavors, i;
2891
2892	error = vfs_stdcheckexp(&nfsv4root_mnt, nd->nd_nam, &exflags,
2893	    &credanon, &numsecflavor, &secflavors);
2894	if (error)
2895		return (NFSERR_PROGUNAVAIL);
2896	if (credanon != NULL)
2897		crfree(credanon);
2898	for (i = 0; i < numsecflavor; i++) {
2899		if (secflavors[i] == AUTH_SYS)
2900			nd->nd_flag |= ND_EXAUTHSYS;
2901		else if (secflavors[i] == RPCSEC_GSS_KRB5)
2902			nd->nd_flag |= ND_EXGSS;
2903		else if (secflavors[i] == RPCSEC_GSS_KRB5I)
2904			nd->nd_flag |= ND_EXGSSINTEGRITY;
2905		else if (secflavors[i] == RPCSEC_GSS_KRB5P)
2906			nd->nd_flag |= ND_EXGSSPRIVACY;
2907	}
2908	return (0);
2909}
2910
2911/*
2912 * Nfs server psuedo system call for the nfsd's
2913 */
2914/*
2915 * MPSAFE
2916 */
2917static int
2918nfssvc_nfsd(struct thread *td, struct nfssvc_args *uap)
2919{
2920	struct file *fp;
2921	struct nfsd_addsock_args sockarg;
2922	struct nfsd_nfsd_args nfsdarg;
2923	int error;
2924
2925	if (uap->flag & NFSSVC_NFSDADDSOCK) {
2926		error = copyin(uap->argp, (caddr_t)&sockarg, sizeof (sockarg));
2927		if (error)
2928			return (error);
2929		if ((error = fget(td, sockarg.sock, &fp)) != 0) {
2930			return (error);
2931		}
2932		if (fp->f_type != DTYPE_SOCKET) {
2933			fdrop(fp, td);
2934			return (EPERM);
2935		}
2936		error = nfsrvd_addsock(fp);
2937		fdrop(fp, td);
2938	} else if (uap->flag & NFSSVC_NFSDNFSD) {
2939		if (uap->argp == NULL)
2940			return (EINVAL);
2941		error = copyin(uap->argp, (caddr_t)&nfsdarg,
2942		    sizeof (nfsdarg));
2943		if (error)
2944			return (error);
2945		error = nfsrvd_nfsd(td, &nfsdarg);
2946	} else {
2947		error = nfssvc_srvcall(td, uap, td->td_ucred);
2948	}
2949	return (error);
2950}
2951
2952static int
2953nfssvc_srvcall(struct thread *p, struct nfssvc_args *uap, struct ucred *cred)
2954{
2955	struct nfsex_args export;
2956	struct file *fp = NULL;
2957	int stablefd, len;
2958	struct nfsd_clid adminrevoke;
2959	struct nfsd_dumplist dumplist;
2960	struct nfsd_dumpclients *dumpclients;
2961	struct nfsd_dumplocklist dumplocklist;
2962	struct nfsd_dumplocks *dumplocks;
2963	struct nameidata nd;
2964	vnode_t vp;
2965	int error = EINVAL;
2966
2967	if (uap->flag & NFSSVC_PUBLICFH) {
2968		NFSBZERO((caddr_t)&nfs_pubfh.nfsrvfh_data,
2969		    sizeof (fhandle_t));
2970		error = copyin(uap->argp,
2971		    &nfs_pubfh.nfsrvfh_data, sizeof (fhandle_t));
2972		if (!error)
2973			nfs_pubfhset = 1;
2974	} else if (uap->flag & NFSSVC_V4ROOTEXPORT) {
2975		error = copyin(uap->argp,(caddr_t)&export,
2976		    sizeof (struct nfsex_args));
2977		if (!error)
2978			error = nfsrv_v4rootexport(&export, cred, p);
2979	} else if (uap->flag & NFSSVC_NOPUBLICFH) {
2980		nfs_pubfhset = 0;
2981		error = 0;
2982	} else if (uap->flag & NFSSVC_STABLERESTART) {
2983		error = copyin(uap->argp, (caddr_t)&stablefd,
2984		    sizeof (int));
2985		if (!error)
2986			error = fp_getfvp(p, stablefd, &fp, &vp);
2987		if (!error && (NFSFPFLAG(fp) & (FREAD | FWRITE)) != (FREAD | FWRITE))
2988			error = EBADF;
2989		if (!error && newnfs_numnfsd != 0)
2990			error = EPERM;
2991		if (!error) {
2992			nfsrv_stablefirst.nsf_fp = fp;
2993			nfsrv_setupstable(p);
2994		}
2995	} else if (uap->flag & NFSSVC_ADMINREVOKE) {
2996		error = copyin(uap->argp, (caddr_t)&adminrevoke,
2997		    sizeof (struct nfsd_clid));
2998		if (!error)
2999			error = nfsrv_adminrevoke(&adminrevoke, p);
3000	} else if (uap->flag & NFSSVC_DUMPCLIENTS) {
3001		error = copyin(uap->argp, (caddr_t)&dumplist,
3002		    sizeof (struct nfsd_dumplist));
3003		if (!error && (dumplist.ndl_size < 1 ||
3004			dumplist.ndl_size > NFSRV_MAXDUMPLIST))
3005			error = EPERM;
3006		if (!error) {
3007		    len = sizeof (struct nfsd_dumpclients) * dumplist.ndl_size;
3008		    dumpclients = (struct nfsd_dumpclients *)malloc(len,
3009			M_TEMP, M_WAITOK);
3010		    nfsrv_dumpclients(dumpclients, dumplist.ndl_size);
3011		    error = copyout(dumpclients,
3012			CAST_USER_ADDR_T(dumplist.ndl_list), len);
3013		    free((caddr_t)dumpclients, M_TEMP);
3014		}
3015	} else if (uap->flag & NFSSVC_DUMPLOCKS) {
3016		error = copyin(uap->argp, (caddr_t)&dumplocklist,
3017		    sizeof (struct nfsd_dumplocklist));
3018		if (!error && (dumplocklist.ndllck_size < 1 ||
3019			dumplocklist.ndllck_size > NFSRV_MAXDUMPLIST))
3020			error = EPERM;
3021		if (!error)
3022			error = nfsrv_lookupfilename(&nd,
3023				dumplocklist.ndllck_fname, p);
3024		if (!error) {
3025			len = sizeof (struct nfsd_dumplocks) *
3026				dumplocklist.ndllck_size;
3027			dumplocks = (struct nfsd_dumplocks *)malloc(len,
3028				M_TEMP, M_WAITOK);
3029			nfsrv_dumplocks(nd.ni_vp, dumplocks,
3030			    dumplocklist.ndllck_size, p);
3031			vput(nd.ni_vp);
3032			error = copyout(dumplocks,
3033			    CAST_USER_ADDR_T(dumplocklist.ndllck_list), len);
3034			free((caddr_t)dumplocks, M_TEMP);
3035		}
3036	}
3037	return (error);
3038}
3039
3040/*
3041 * Check exports.
3042 * Returns 0 if ok, 1 otherwise.
3043 */
3044int
3045nfsvno_testexp(struct nfsrv_descript *nd, struct nfsexstuff *exp)
3046{
3047	int i;
3048
3049	/*
3050	 * This seems odd, but allow the case where the security flavor
3051	 * list is empty. This happens when NFSv4 is traversing non-exported
3052	 * file systems. Exported file systems should always have a non-empty
3053	 * security flavor list.
3054	 */
3055	if (exp->nes_numsecflavor == 0)
3056		return (0);
3057
3058	for (i = 0; i < exp->nes_numsecflavor; i++) {
3059		/*
3060		 * The tests for privacy and integrity must be first,
3061		 * since ND_GSS is set for everything but AUTH_SYS.
3062		 */
3063		if (exp->nes_secflavors[i] == RPCSEC_GSS_KRB5P &&
3064		    (nd->nd_flag & ND_GSSPRIVACY))
3065			return (0);
3066		if (exp->nes_secflavors[i] == RPCSEC_GSS_KRB5I &&
3067		    (nd->nd_flag & ND_GSSINTEGRITY))
3068			return (0);
3069		if (exp->nes_secflavors[i] == RPCSEC_GSS_KRB5 &&
3070		    (nd->nd_flag & ND_GSS))
3071			return (0);
3072		if (exp->nes_secflavors[i] == AUTH_SYS &&
3073		    (nd->nd_flag & ND_GSS) == 0)
3074			return (0);
3075	}
3076	return (1);
3077}
3078
3079extern int (*nfsd_call_nfsd)(struct thread *, struct nfssvc_args *);
3080
3081/*
3082 * Called once to initialize data structures...
3083 */
3084static int
3085nfsd_modevent(module_t mod, int type, void *data)
3086{
3087	int error = 0;
3088	static int loaded = 0;
3089
3090	switch (type) {
3091	case MOD_LOAD:
3092		if (loaded)
3093			return (0);
3094		newnfs_portinit();
3095		mtx_init(&nfs_cache_mutex, "nfs_cache_mutex", NULL, MTX_DEF);
3096		mtx_init(&nfs_v4root_mutex, "nfs_v4root_mutex", NULL, MTX_DEF);
3097		mtx_init(&nfsv4root_mnt.mnt_mtx, "struct mount mtx", NULL,
3098		    MTX_DEF);
3099		lockinit(&nfsv4root_mnt.mnt_explock, PVFS, "explock", 0, 0);
3100		nfsrvd_initcache();
3101		nfsd_init();
3102		NFSD_LOCK();
3103		nfsrvd_init(0);
3104		NFSD_UNLOCK();
3105		nfsd_mntinit();
3106#ifdef VV_DISABLEDELEG
3107		vn_deleg_ops.vndeleg_recall = nfsd_recalldelegation;
3108		vn_deleg_ops.vndeleg_disable = nfsd_disabledelegation;
3109#endif
3110		nfsd_call_servertimer = nfsrv_servertimer;
3111		nfsd_call_nfsd = nfssvc_nfsd;
3112		loaded = 1;
3113		break;
3114
3115	case MOD_UNLOAD:
3116		if (newnfs_numnfsd != 0) {
3117			error = EBUSY;
3118			break;
3119		}
3120
3121#ifdef VV_DISABLEDELEG
3122		vn_deleg_ops.vndeleg_recall = NULL;
3123		vn_deleg_ops.vndeleg_disable = NULL;
3124#endif
3125		nfsd_call_servertimer = NULL;
3126		nfsd_call_nfsd = NULL;
3127		/* and get rid of the locks */
3128		mtx_destroy(&nfs_cache_mutex);
3129		mtx_destroy(&nfs_v4root_mutex);
3130		mtx_destroy(&nfsv4root_mnt.mnt_mtx);
3131		lockdestroy(&nfsv4root_mnt.mnt_explock);
3132		loaded = 0;
3133		break;
3134	default:
3135		error = EOPNOTSUPP;
3136		break;
3137	}
3138	return error;
3139}
3140static moduledata_t nfsd_mod = {
3141	"nfsd",
3142	nfsd_modevent,
3143	NULL,
3144};
3145DECLARE_MODULE(nfsd, nfsd_mod, SI_SUB_VFS, SI_ORDER_ANY);
3146
3147/* So that loader and kldload(2) can find us, wherever we are.. */
3148MODULE_VERSION(nfsd, 1);
3149MODULE_DEPEND(nfsd, nfscommon, 1, 1, 1);
3150MODULE_DEPEND(nfsd, nfslockd, 1, 1, 1);
3151
3152