nfs_clvnops.c revision 195821
1/*-
2 * Copyright (c) 1989, 1993
3 *	The Regents of the University of California.  All rights reserved.
4 *
5 * This code is derived from software contributed to Berkeley by
6 * Rick Macklem at The University of Guelph.
7 *
8 * Redistribution and use in source and binary forms, with or without
9 * modification, are permitted provided that the following conditions
10 * are met:
11 * 1. Redistributions of source code must retain the above copyright
12 *    notice, this list of conditions and the following disclaimer.
13 * 2. Redistributions in binary form must reproduce the above copyright
14 *    notice, this list of conditions and the following disclaimer in the
15 *    documentation and/or other materials provided with the distribution.
16 * 4. Neither the name of the University nor the names of its contributors
17 *    may be used to endorse or promote products derived from this software
18 *    without specific prior written permission.
19 *
20 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
21 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
22 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
23 * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
24 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
25 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
26 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
27 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
28 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
29 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
30 * SUCH DAMAGE.
31 *
32 *	from nfs_vnops.c	8.16 (Berkeley) 5/27/95
33 */
34
35#include <sys/cdefs.h>
36__FBSDID("$FreeBSD: head/sys/fs/nfsclient/nfs_clvnops.c 195821 2009-07-22 14:37:53Z rmacklem $");
37
38/*
39 * vnode op calls for Sun NFS version 2, 3 and 4
40 */
41
42#include "opt_inet.h"
43
44#include <sys/param.h>
45#include <sys/kernel.h>
46#include <sys/systm.h>
47#include <sys/resourcevar.h>
48#include <sys/proc.h>
49#include <sys/mount.h>
50#include <sys/bio.h>
51#include <sys/buf.h>
52#include <sys/jail.h>
53#include <sys/malloc.h>
54#include <sys/mbuf.h>
55#include <sys/namei.h>
56#include <sys/socket.h>
57#include <sys/vnode.h>
58#include <sys/dirent.h>
59#include <sys/fcntl.h>
60#include <sys/lockf.h>
61#include <sys/stat.h>
62#include <sys/sysctl.h>
63#include <sys/signalvar.h>
64
65#include <vm/vm.h>
66#include <vm/vm_object.h>
67#include <vm/vm_extern.h>
68#include <vm/vm_object.h>
69
70
71#include <fs/nfs/nfsport.h>
72#include <fs/nfsclient/nfsnode.h>
73#include <fs/nfsclient/nfsmount.h>
74#include <fs/nfsclient/nfs.h>
75#include <fs/nfsclient/nfs_lock.h>
76
77#include <net/if.h>
78#include <netinet/in.h>
79#include <netinet/in_var.h>
80
81/* Defs */
82#define	TRUE	1
83#define	FALSE	0
84
85extern struct nfsstats newnfsstats;
86MALLOC_DECLARE(M_NEWNFSREQ);
87vop_advlock_t	*ncl_advlock_p = ncl_dolock;
88
89/*
90 * Ifdef for FreeBSD-current merged buffer cache. It is unfortunate that these
91 * calls are not in getblk() and brelse() so that they would not be necessary
92 * here.
93 */
94#ifndef B_VMIO
95#define	vfs_busy_pages(bp, f)
96#endif
97
98static vop_read_t	nfsfifo_read;
99static vop_write_t	nfsfifo_write;
100static vop_close_t	nfsfifo_close;
101static int	nfs_setattrrpc(struct vnode *, struct vattr *, struct ucred *,
102		    struct thread *);
103static vop_lookup_t	nfs_lookup;
104static vop_create_t	nfs_create;
105static vop_mknod_t	nfs_mknod;
106static vop_open_t	nfs_open;
107static vop_close_t	nfs_close;
108static vop_access_t	nfs_access;
109static vop_getattr_t	nfs_getattr;
110static vop_setattr_t	nfs_setattr;
111static vop_read_t	nfs_read;
112static vop_fsync_t	nfs_fsync;
113static vop_remove_t	nfs_remove;
114static vop_link_t	nfs_link;
115static vop_rename_t	nfs_rename;
116static vop_mkdir_t	nfs_mkdir;
117static vop_rmdir_t	nfs_rmdir;
118static vop_symlink_t	nfs_symlink;
119static vop_readdir_t	nfs_readdir;
120static vop_strategy_t	nfs_strategy;
121static vop_lock1_t	nfs_lock1;
122static	int	nfs_lookitup(struct vnode *, char *, int,
123		    struct ucred *, struct thread *, struct nfsnode **);
124static	int	nfs_sillyrename(struct vnode *, struct vnode *,
125		    struct componentname *);
126static vop_access_t	nfsspec_access;
127static vop_readlink_t	nfs_readlink;
128static vop_print_t	nfs_print;
129static vop_advlock_t	nfs_advlock;
130static vop_advlockasync_t nfs_advlockasync;
131#ifdef NFS4_ACL_EXTATTR_NAME
132static vop_getacl_t nfs_getacl;
133static vop_setacl_t nfs_setacl;
134#endif
135
136/*
137 * Global vfs data structures for nfs
138 */
139struct vop_vector newnfs_vnodeops = {
140	.vop_default =		&default_vnodeops,
141	.vop_access =		nfs_access,
142	.vop_advlock =		nfs_advlock,
143	.vop_advlockasync =	nfs_advlockasync,
144	.vop_close =		nfs_close,
145	.vop_create =		nfs_create,
146	.vop_fsync =		nfs_fsync,
147	.vop_getattr =		nfs_getattr,
148	.vop_getpages =		ncl_getpages,
149	.vop_putpages =		ncl_putpages,
150	.vop_inactive =		ncl_inactive,
151	.vop_link =		nfs_link,
152	.vop_lock1 = 		nfs_lock1,
153	.vop_lookup =		nfs_lookup,
154	.vop_mkdir =		nfs_mkdir,
155	.vop_mknod =		nfs_mknod,
156	.vop_open =		nfs_open,
157	.vop_print =		nfs_print,
158	.vop_read =		nfs_read,
159	.vop_readdir =		nfs_readdir,
160	.vop_readlink =		nfs_readlink,
161	.vop_reclaim =		ncl_reclaim,
162	.vop_remove =		nfs_remove,
163	.vop_rename =		nfs_rename,
164	.vop_rmdir =		nfs_rmdir,
165	.vop_setattr =		nfs_setattr,
166	.vop_strategy =		nfs_strategy,
167	.vop_symlink =		nfs_symlink,
168	.vop_write =		ncl_write,
169#ifdef NFS4_ACL_EXTATTR_NAME
170	.vop_getacl =		nfs_getacl,
171	.vop_setacl =		nfs_setacl,
172#endif
173};
174
175struct vop_vector newnfs_fifoops = {
176	.vop_default =		&fifo_specops,
177	.vop_access =		nfsspec_access,
178	.vop_close =		nfsfifo_close,
179	.vop_fsync =		nfs_fsync,
180	.vop_getattr =		nfs_getattr,
181	.vop_inactive =		ncl_inactive,
182	.vop_print =		nfs_print,
183	.vop_read =		nfsfifo_read,
184	.vop_reclaim =		ncl_reclaim,
185	.vop_setattr =		nfs_setattr,
186	.vop_write =		nfsfifo_write,
187};
188
189static int nfs_mknodrpc(struct vnode *dvp, struct vnode **vpp,
190    struct componentname *cnp, struct vattr *vap);
191static int nfs_removerpc(struct vnode *dvp, struct vnode *vp, char *name,
192    int namelen, struct ucred *cred, struct thread *td);
193static int nfs_renamerpc(struct vnode *fdvp, struct vnode *fvp,
194    char *fnameptr, int fnamelen, struct vnode *tdvp, struct vnode *tvp,
195    char *tnameptr, int tnamelen, struct ucred *cred, struct thread *td);
196static int nfs_renameit(struct vnode *sdvp, struct vnode *svp,
197    struct componentname *scnp, struct sillyrename *sp);
198
199/*
200 * Global variables
201 */
202#define	DIRHDSIZ	(sizeof (struct dirent) - (MAXNAMLEN + 1))
203
204SYSCTL_DECL(_vfs_newnfs);
205
206static int	nfsaccess_cache_timeout = NFS_MAXATTRTIMO;
207SYSCTL_INT(_vfs_newnfs, OID_AUTO, access_cache_timeout, CTLFLAG_RW,
208	   &nfsaccess_cache_timeout, 0, "NFS ACCESS cache timeout");
209
210static int	nfs_prime_access_cache = 0;
211SYSCTL_INT(_vfs_newnfs, OID_AUTO, prime_access_cache, CTLFLAG_RW,
212	   &nfs_prime_access_cache, 0,
213	   "Prime NFS ACCESS cache when fetching attributes");
214
215static int	newnfs_commit_on_close = 0;
216SYSCTL_INT(_vfs_newnfs, OID_AUTO, commit_on_close, CTLFLAG_RW,
217    &newnfs_commit_on_close, 0, "write+commit on close, else only write");
218
219static int	nfs_clean_pages_on_close = 1;
220SYSCTL_INT(_vfs_newnfs, OID_AUTO, clean_pages_on_close, CTLFLAG_RW,
221	   &nfs_clean_pages_on_close, 0, "NFS clean dirty pages on close");
222
223int newnfs_directio_enable = 0;
224SYSCTL_INT(_vfs_newnfs, OID_AUTO, directio_enable, CTLFLAG_RW,
225	   &newnfs_directio_enable, 0, "Enable NFS directio");
226
227static int newnfs_neglookup_enable = 1;
228SYSCTL_INT(_vfs_newnfs, OID_AUTO, neglookup_enable, CTLFLAG_RW,
229    &newnfs_neglookup_enable, 0, "Enable NFS negative lookup caching");
230
231/*
232 * This sysctl allows other processes to mmap a file that has been opened
233 * O_DIRECT by a process.  In general, having processes mmap the file while
234 * Direct IO is in progress can lead to Data Inconsistencies.  But, we allow
235 * this by default to prevent DoS attacks - to prevent a malicious user from
236 * opening up files O_DIRECT preventing other users from mmap'ing these
237 * files.  "Protected" environments where stricter consistency guarantees are
238 * required can disable this knob.  The process that opened the file O_DIRECT
239 * cannot mmap() the file, because mmap'ed IO on an O_DIRECT open() is not
240 * meaningful.
241 */
242int newnfs_directio_allow_mmap = 1;
243SYSCTL_INT(_vfs_newnfs, OID_AUTO, directio_allow_mmap, CTLFLAG_RW,
244	   &newnfs_directio_allow_mmap, 0, "Enable mmaped IO on file with O_DIRECT opens");
245
246#if 0
247SYSCTL_INT(_vfs_newnfs, OID_AUTO, access_cache_hits, CTLFLAG_RD,
248	   &newnfsstats.accesscache_hits, 0, "NFS ACCESS cache hit count");
249
250SYSCTL_INT(_vfs_newnfs, OID_AUTO, access_cache_misses, CTLFLAG_RD,
251	   &newnfsstats.accesscache_misses, 0, "NFS ACCESS cache miss count");
252#endif
253
254#define	NFSACCESS_ALL (NFSACCESS_READ | NFSACCESS_MODIFY		\
255			 | NFSACCESS_EXTEND | NFSACCESS_EXECUTE	\
256			 | NFSACCESS_DELETE | NFSACCESS_LOOKUP)
257
258/*
259 * SMP Locking Note :
260 * The list of locks after the description of the lock is the ordering
261 * of other locks acquired with the lock held.
262 * np->n_mtx : Protects the fields in the nfsnode.
263       VM Object Lock
264       VI_MTX (acquired indirectly)
265 * nmp->nm_mtx : Protects the fields in the nfsmount.
266       rep->r_mtx
267 * ncl_iod_mutex : Global lock, protects shared nfsiod state.
268 * nfs_reqq_mtx : Global lock, protects the nfs_reqq list.
269       nmp->nm_mtx
270       rep->r_mtx
271 * rep->r_mtx : Protects the fields in an nfsreq.
272 */
273
274static int
275nfs34_access_otw(struct vnode *vp, int wmode, struct thread *td,
276    struct ucred *cred, u_int32_t *retmode)
277{
278	int error = 0, attrflag, i, lrupos;
279	u_int32_t rmode;
280	struct nfsnode *np = VTONFS(vp);
281	struct nfsvattr nfsva;
282
283	error = nfsrpc_accessrpc(vp, wmode, cred, td, &nfsva, &attrflag,
284	    &rmode, NULL);
285	if (attrflag)
286		(void) nfscl_loadattrcache(&vp, &nfsva, NULL, NULL, 0, 1);
287	if (!error) {
288		lrupos = 0;
289		mtx_lock(&np->n_mtx);
290		for (i = 0; i < NFS_ACCESSCACHESIZE; i++) {
291			if (np->n_accesscache[i].uid == cred->cr_uid) {
292				np->n_accesscache[i].mode = rmode;
293				np->n_accesscache[i].stamp = time_second;
294				break;
295			}
296			if (i > 0 && np->n_accesscache[i].stamp <
297			    np->n_accesscache[lrupos].stamp)
298				lrupos = i;
299		}
300		if (i == NFS_ACCESSCACHESIZE) {
301			np->n_accesscache[lrupos].uid = cred->cr_uid;
302			np->n_accesscache[lrupos].mode = rmode;
303			np->n_accesscache[lrupos].stamp = time_second;
304		}
305		mtx_unlock(&np->n_mtx);
306		if (retmode != NULL)
307			*retmode = rmode;
308	} else if (NFS_ISV4(vp)) {
309		error = nfscl_maperr(td, error, (uid_t)0, (gid_t)0);
310	}
311	return (error);
312}
313
314/*
315 * nfs access vnode op.
316 * For nfs version 2, just return ok. File accesses may fail later.
317 * For nfs version 3, use the access rpc to check accessibility. If file modes
318 * are changed on the server, accesses might still fail later.
319 */
320static int
321nfs_access(struct vop_access_args *ap)
322{
323	struct vnode *vp = ap->a_vp;
324	int error = 0, i, gotahit;
325	u_int32_t mode, wmode, rmode;
326	int v34 = NFS_ISV34(vp);
327	struct nfsnode *np = VTONFS(vp);
328
329	/*
330	 * Disallow write attempts on filesystems mounted read-only;
331	 * unless the file is a socket, fifo, or a block or character
332	 * device resident on the filesystem.
333	 */
334	if ((ap->a_accmode & (VWRITE | VAPPEND
335#ifdef NFS4_ACL_EXTATTR_NAME
336	    | VWRITE_NAMED_ATTRS | VDELETE_CHILD | VWRITE_ATTRIBUTES |
337	    VDELETE | VWRITE_ACL | VWRITE_OWNER
338#endif
339	    )) != 0 && (vp->v_mount->mnt_flag & MNT_RDONLY) != 0) {
340		switch (vp->v_type) {
341		case VREG:
342		case VDIR:
343		case VLNK:
344			return (EROFS);
345		default:
346			break;
347		}
348	}
349	/*
350	 * For nfs v3 or v4, check to see if we have done this recently, and if
351	 * so return our cached result instead of making an ACCESS call.
352	 * If not, do an access rpc, otherwise you are stuck emulating
353	 * ufs_access() locally using the vattr. This may not be correct,
354	 * since the server may apply other access criteria such as
355	 * client uid-->server uid mapping that we do not know about.
356	 */
357	if (v34) {
358		if (ap->a_accmode & VREAD)
359			mode = NFSACCESS_READ;
360		else
361			mode = 0;
362		if (vp->v_type != VDIR) {
363			if (ap->a_accmode & VWRITE)
364				mode |= (NFSACCESS_MODIFY | NFSACCESS_EXTEND);
365			if (ap->a_accmode & VAPPEND)
366				mode |= NFSACCESS_EXTEND;
367			if (ap->a_accmode & VEXEC)
368				mode |= NFSACCESS_EXECUTE;
369#ifdef NFS4_ACL_EXTATTR_NAME
370			if (ap->a_accmode & VDELETE)
371				mode |= NFSACCESS_DELETE;
372#endif
373		} else {
374			if (ap->a_accmode & VWRITE)
375				mode |= (NFSACCESS_MODIFY | NFSACCESS_EXTEND);
376			if (ap->a_accmode & VAPPEND)
377				mode |= NFSACCESS_EXTEND;
378			if (ap->a_accmode & VEXEC)
379				mode |= NFSACCESS_LOOKUP;
380#ifdef NFS4_ACL_EXTATTR_NAME
381			if (ap->a_accmode & VDELETE)
382				mode |= NFSACCESS_DELETE;
383			if (ap->a_accmode & VDELETE_CHILD)
384				mode |= NFSACCESS_MODIFY;
385#endif
386		}
387		/* XXX safety belt, only make blanket request if caching */
388		if (nfsaccess_cache_timeout > 0) {
389			wmode = NFSACCESS_READ | NFSACCESS_MODIFY |
390				NFSACCESS_EXTEND | NFSACCESS_EXECUTE |
391				NFSACCESS_DELETE | NFSACCESS_LOOKUP;
392		} else {
393			wmode = mode;
394		}
395
396		/*
397		 * Does our cached result allow us to give a definite yes to
398		 * this request?
399		 */
400		gotahit = 0;
401		mtx_lock(&np->n_mtx);
402		for (i = 0; i < NFS_ACCESSCACHESIZE; i++) {
403			if (ap->a_cred->cr_uid == np->n_accesscache[i].uid) {
404			    if (time_second < (np->n_accesscache[i].stamp
405				+ nfsaccess_cache_timeout) &&
406				(np->n_accesscache[i].mode & mode) == mode) {
407				NFSINCRGLOBAL(newnfsstats.accesscache_hits);
408				gotahit = 1;
409			    }
410			    break;
411			}
412		}
413		mtx_unlock(&np->n_mtx);
414		if (gotahit == 0) {
415			/*
416			 * Either a no, or a don't know.  Go to the wire.
417			 */
418			NFSINCRGLOBAL(newnfsstats.accesscache_misses);
419		        error = nfs34_access_otw(vp, wmode, ap->a_td,
420			    ap->a_cred, &rmode);
421			if (!error &&
422			    (rmode & mode) != mode)
423				error = EACCES;
424		}
425		return (error);
426	} else {
427		if ((error = nfsspec_access(ap)) != 0) {
428			return (error);
429		}
430		/*
431		 * Attempt to prevent a mapped root from accessing a file
432		 * which it shouldn't.  We try to read a byte from the file
433		 * if the user is root and the file is not zero length.
434		 * After calling nfsspec_access, we should have the correct
435		 * file size cached.
436		 */
437		mtx_lock(&np->n_mtx);
438		if (ap->a_cred->cr_uid == 0 && (ap->a_accmode & VREAD)
439		    && VTONFS(vp)->n_size > 0) {
440			struct iovec aiov;
441			struct uio auio;
442			char buf[1];
443
444			mtx_unlock(&np->n_mtx);
445			aiov.iov_base = buf;
446			aiov.iov_len = 1;
447			auio.uio_iov = &aiov;
448			auio.uio_iovcnt = 1;
449			auio.uio_offset = 0;
450			auio.uio_resid = 1;
451			auio.uio_segflg = UIO_SYSSPACE;
452			auio.uio_rw = UIO_READ;
453			auio.uio_td = ap->a_td;
454
455			if (vp->v_type == VREG)
456				error = ncl_readrpc(vp, &auio, ap->a_cred);
457			else if (vp->v_type == VDIR) {
458				char* bp;
459				bp = malloc(NFS_DIRBLKSIZ, M_TEMP, M_WAITOK);
460				aiov.iov_base = bp;
461				aiov.iov_len = auio.uio_resid = NFS_DIRBLKSIZ;
462				error = ncl_readdirrpc(vp, &auio, ap->a_cred,
463				    ap->a_td);
464				free(bp, M_TEMP);
465			} else if (vp->v_type == VLNK)
466				error = ncl_readlinkrpc(vp, &auio, ap->a_cred);
467			else
468				error = EACCES;
469		} else
470			mtx_unlock(&np->n_mtx);
471		return (error);
472	}
473}
474
475
476/*
477 * nfs open vnode op
478 * Check to see if the type is ok
479 * and that deletion is not in progress.
480 * For paged in text files, you will need to flush the page cache
481 * if consistency is lost.
482 */
483/* ARGSUSED */
484static int
485nfs_open(struct vop_open_args *ap)
486{
487	struct vnode *vp = ap->a_vp;
488	struct nfsnode *np = VTONFS(vp);
489	struct vattr vattr;
490	int error;
491	int fmode = ap->a_mode;
492
493	if (vp->v_type != VREG && vp->v_type != VDIR && vp->v_type != VLNK)
494		return (EOPNOTSUPP);
495
496	/*
497	 * For NFSv4, we need to do the Open Op before cache validation,
498	 * so that we conform to RFC3530 Sec. 9.3.1.
499	 */
500	if (NFS_ISV4(vp)) {
501		error = nfsrpc_open(vp, fmode, ap->a_cred, ap->a_td);
502		if (error) {
503			error = nfscl_maperr(ap->a_td, error, (uid_t)0,
504			    (gid_t)0);
505			return (error);
506		}
507	}
508
509	/*
510	 * Now, if this Open will be doing reading, re-validate/flush the
511	 * cache, so that Close/Open coherency is maintained.
512	 */
513	if ((fmode & FREAD) && (!NFS_ISV4(vp) || nfscl_mustflush(vp))) {
514		mtx_lock(&np->n_mtx);
515		if (np->n_flag & NMODIFIED) {
516			mtx_unlock(&np->n_mtx);
517			error = ncl_vinvalbuf(vp, V_SAVE, ap->a_td, 1);
518			if (error == EINTR || error == EIO) {
519				if (NFS_ISV4(vp))
520					(void) nfsrpc_close(vp, 0, ap->a_td);
521				return (error);
522			}
523			np->n_attrstamp = 0;
524			if (vp->v_type == VDIR)
525				np->n_direofoffset = 0;
526			error = VOP_GETATTR(vp, &vattr, ap->a_cred);
527			if (error) {
528				if (NFS_ISV4(vp))
529					(void) nfsrpc_close(vp, 0, ap->a_td);
530				return (error);
531			}
532			mtx_lock(&np->n_mtx);
533			np->n_mtime = vattr.va_mtime;
534			if (NFS_ISV4(vp))
535				np->n_change = vattr.va_filerev;
536			mtx_unlock(&np->n_mtx);
537		} else {
538			struct thread *td = curthread;
539
540			if (np->n_ac_ts_syscalls != td->td_syscalls ||
541			    np->n_ac_ts_tid != td->td_tid ||
542			    td->td_proc == NULL ||
543			    np->n_ac_ts_pid != td->td_proc->p_pid) {
544				np->n_attrstamp = 0;
545			}
546			mtx_unlock(&np->n_mtx);
547			error = VOP_GETATTR(vp, &vattr, ap->a_cred);
548			if (error) {
549				if (NFS_ISV4(vp))
550					(void) nfsrpc_close(vp, 0, ap->a_td);
551				return (error);
552			}
553			mtx_lock(&np->n_mtx);
554			if ((NFS_ISV4(vp) && np->n_change != vattr.va_filerev) ||
555			    NFS_TIMESPEC_COMPARE(&np->n_mtime, &vattr.va_mtime)) {
556				if (vp->v_type == VDIR)
557					np->n_direofoffset = 0;
558				mtx_unlock(&np->n_mtx);
559				error = ncl_vinvalbuf(vp, V_SAVE, ap->a_td, 1);
560				if (error == EINTR || error == EIO) {
561					if (NFS_ISV4(vp))
562						(void) nfsrpc_close(vp, 0,
563						    ap->a_td);
564					return (error);
565				}
566				mtx_lock(&np->n_mtx);
567				np->n_mtime = vattr.va_mtime;
568				if (NFS_ISV4(vp))
569					np->n_change = vattr.va_filerev;
570			}
571			mtx_unlock(&np->n_mtx);
572		}
573	}
574
575	/*
576	 * If the object has >= 1 O_DIRECT active opens, we disable caching.
577	 */
578	if (newnfs_directio_enable && (fmode & O_DIRECT) && (vp->v_type == VREG)) {
579		if (np->n_directio_opens == 0) {
580			error = ncl_vinvalbuf(vp, V_SAVE, ap->a_td, 1);
581			if (error) {
582				if (NFS_ISV4(vp))
583					(void) nfsrpc_close(vp, 0, ap->a_td);
584				return (error);
585			}
586			mtx_lock(&np->n_mtx);
587			np->n_flag |= NNONCACHE;
588		} else {
589			mtx_lock(&np->n_mtx);
590		}
591		np->n_directio_opens++;
592		mtx_unlock(&np->n_mtx);
593	}
594	vnode_create_vobject(vp, vattr.va_size, ap->a_td);
595	return (0);
596}
597
598/*
599 * nfs close vnode op
600 * What an NFS client should do upon close after writing is a debatable issue.
601 * Most NFS clients push delayed writes to the server upon close, basically for
602 * two reasons:
603 * 1 - So that any write errors may be reported back to the client process
604 *     doing the close system call. By far the two most likely errors are
605 *     NFSERR_NOSPC and NFSERR_DQUOT to indicate space allocation failure.
606 * 2 - To put a worst case upper bound on cache inconsistency between
607 *     multiple clients for the file.
608 * There is also a consistency problem for Version 2 of the protocol w.r.t.
609 * not being able to tell if other clients are writing a file concurrently,
610 * since there is no way of knowing if the changed modify time in the reply
611 * is only due to the write for this client.
612 * (NFS Version 3 provides weak cache consistency data in the reply that
613 *  should be sufficient to detect and handle this case.)
614 *
615 * The current code does the following:
616 * for NFS Version 2 - play it safe and flush/invalidate all dirty buffers
617 * for NFS Version 3 - flush dirty buffers to the server but don't invalidate
618 *                     or commit them (this satisfies 1 and 2 except for the
619 *                     case where the server crashes after this close but
620 *                     before the commit RPC, which is felt to be "good
621 *                     enough". Changing the last argument to ncl_flush() to
622 *                     a 1 would force a commit operation, if it is felt a
623 *                     commit is necessary now.
624 * for NFS Version 4 - flush the dirty buffers and commit them, if
625 *		       nfscl_mustflush() says this is necessary.
626 *                     It is necessary if there is no write delegation held,
627 *                     in order to satisfy open/close coherency.
628 *                     If the file isn't cached on local stable storage,
629 *                     it may be necessary in order to detect "out of space"
630 *                     errors from the server, if the write delegation
631 *                     issued by the server doesn't allow the file to grow.
632 */
633/* ARGSUSED */
634static int
635nfs_close(struct vop_close_args *ap)
636{
637	struct vnode *vp = ap->a_vp;
638	struct nfsnode *np = VTONFS(vp);
639	struct nfsvattr nfsva;
640	struct ucred *cred;
641	int error = 0, ret, localcred = 0;
642	int fmode = ap->a_fflag;
643
644	if ((vp->v_mount->mnt_kern_flag & MNTK_UNMOUNTF))
645		return (0);
646	/*
647	 * During shutdown, a_cred isn't valid, so just use root.
648	 */
649	if (ap->a_cred == NOCRED) {
650		cred = newnfs_getcred();
651		localcred = 1;
652	} else {
653		cred = ap->a_cred;
654	}
655	if (vp->v_type == VREG) {
656	    /*
657	     * Examine and clean dirty pages, regardless of NMODIFIED.
658	     * This closes a major hole in close-to-open consistency.
659	     * We want to push out all dirty pages (and buffers) on
660	     * close, regardless of whether they were dirtied by
661	     * mmap'ed writes or via write().
662	     */
663	    if (nfs_clean_pages_on_close && vp->v_object) {
664		VM_OBJECT_LOCK(vp->v_object);
665		vm_object_page_clean(vp->v_object, 0, 0, 0);
666		VM_OBJECT_UNLOCK(vp->v_object);
667	    }
668	    mtx_lock(&np->n_mtx);
669	    if (np->n_flag & NMODIFIED) {
670		mtx_unlock(&np->n_mtx);
671		if (NFS_ISV3(vp)) {
672		    /*
673		     * Under NFSv3 we have dirty buffers to dispose of.  We
674		     * must flush them to the NFS server.  We have the option
675		     * of waiting all the way through the commit rpc or just
676		     * waiting for the initial write.  The default is to only
677		     * wait through the initial write so the data is in the
678		     * server's cache, which is roughly similar to the state
679		     * a standard disk subsystem leaves the file in on close().
680		     *
681		     * We cannot clear the NMODIFIED bit in np->n_flag due to
682		     * potential races with other processes, and certainly
683		     * cannot clear it if we don't commit.
684		     * These races occur when there is no longer the old
685		     * traditional vnode locking implemented for Vnode Ops.
686		     */
687		    int cm = newnfs_commit_on_close ? 1 : 0;
688		    error = ncl_flush(vp, MNT_WAIT, cred, ap->a_td, cm);
689		    /* np->n_flag &= ~NMODIFIED; */
690		} else if (NFS_ISV4(vp) && nfscl_mustflush(vp)) {
691			int cm = newnfs_commit_on_close ? 1 : 0;
692			error = ncl_flush(vp, MNT_WAIT, cred, ap->a_td, cm);
693			/* as above w.r.t. races when clearing NMODIFIED */
694			/* np->n_flag &= ~NMODIFIED; */
695		} else
696		    error = ncl_vinvalbuf(vp, V_SAVE, ap->a_td, 1);
697		mtx_lock(&np->n_mtx);
698	    }
699 	    /*
700 	     * Invalidate the attribute cache in all cases.
701 	     * An open is going to fetch fresh attrs any way, other procs
702 	     * on this node that have file open will be forced to do an
703 	     * otw attr fetch, but this is safe.
704	     * --> A user found that their RPC count dropped by 20% when
705	     *     this was commented out and I can't see any requirement
706	     *     for it, so I've disabled it when negative lookups are
707	     *     enabled. (What does this have to do with negative lookup
708	     *     caching? Well nothing, except it was reported by the
709	     *     same user that needed negative lookup caching and I wanted
710	     *     there to be a way to disable it via sysctl to see if it
711	     *     is the cause of some caching/coherency issue that might
712	     *     crop up.)
713 	     */
714	    if (newnfs_neglookup_enable == 0)
715		    np->n_attrstamp = 0;
716	    if (np->n_flag & NWRITEERR) {
717		np->n_flag &= ~NWRITEERR;
718		error = np->n_error;
719	    }
720	    mtx_unlock(&np->n_mtx);
721	}
722
723	if (NFS_ISV4(vp)) {
724		/*
725		 * Get attributes so "change" is up to date.
726		 */
727		if (!error) {
728			ret = nfsrpc_getattr(vp, cred, ap->a_td, &nfsva,
729			    NULL);
730			if (!ret) {
731				np->n_change = nfsva.na_filerev;
732				(void) nfscl_loadattrcache(&vp, &nfsva, NULL,
733				    NULL, 0, 0);
734			}
735		}
736
737		/*
738		 * and do the close.
739		 */
740		ret = nfsrpc_close(vp, 0, ap->a_td);
741		if (!error && ret)
742			error = ret;
743		if (error)
744			error = nfscl_maperr(ap->a_td, error, (uid_t)0,
745			    (gid_t)0);
746	}
747	if (newnfs_directio_enable)
748		KASSERT((np->n_directio_asyncwr == 0),
749			("nfs_close: dirty unflushed (%d) directio buffers\n",
750			 np->n_directio_asyncwr));
751	if (newnfs_directio_enable && (fmode & O_DIRECT) && (vp->v_type == VREG)) {
752		mtx_lock(&np->n_mtx);
753		KASSERT((np->n_directio_opens > 0),
754			("nfs_close: unexpectedly value (0) of n_directio_opens\n"));
755		np->n_directio_opens--;
756		if (np->n_directio_opens == 0)
757			np->n_flag &= ~NNONCACHE;
758		mtx_unlock(&np->n_mtx);
759	}
760	if (localcred)
761		NFSFREECRED(cred);
762	return (error);
763}
764
765/*
766 * nfs getattr call from vfs.
767 */
768static int
769nfs_getattr(struct vop_getattr_args *ap)
770{
771	struct vnode *vp = ap->a_vp;
772	struct thread *td = curthread;	/* XXX */
773	struct nfsnode *np = VTONFS(vp);
774	int error = 0;
775	struct nfsvattr nfsva;
776	struct vattr *vap = ap->a_vap;
777	struct vattr vattr;
778
779	/*
780	 * Update local times for special files.
781	 */
782	mtx_lock(&np->n_mtx);
783	if (np->n_flag & (NACC | NUPD))
784		np->n_flag |= NCHG;
785	mtx_unlock(&np->n_mtx);
786	/*
787	 * First look in the cache.
788	 */
789	if (ncl_getattrcache(vp, &vattr) == 0) {
790		vap->va_type = vattr.va_type;
791		vap->va_mode = vattr.va_mode;
792		vap->va_nlink = vattr.va_nlink;
793		vap->va_uid = vattr.va_uid;
794		vap->va_gid = vattr.va_gid;
795		vap->va_fsid = vattr.va_fsid;
796		vap->va_fileid = vattr.va_fileid;
797		vap->va_size = vattr.va_size;
798		vap->va_blocksize = vattr.va_blocksize;
799		vap->va_atime = vattr.va_atime;
800		vap->va_mtime = vattr.va_mtime;
801		vap->va_ctime = vattr.va_ctime;
802		vap->va_gen = vattr.va_gen;
803		vap->va_flags = vattr.va_flags;
804		vap->va_rdev = vattr.va_rdev;
805		vap->va_bytes = vattr.va_bytes;
806		vap->va_filerev = vattr.va_filerev;
807		/*
808		 * Get the local modify time for the case of a write
809		 * delegation.
810		 */
811		nfscl_deleggetmodtime(vp, &vap->va_mtime);
812		return (0);
813	}
814
815	if (NFS_ISV34(vp) && nfs_prime_access_cache &&
816	    nfsaccess_cache_timeout > 0) {
817		NFSINCRGLOBAL(newnfsstats.accesscache_misses);
818		nfs34_access_otw(vp, NFSACCESS_ALL, td, ap->a_cred, NULL);
819		if (ncl_getattrcache(vp, ap->a_vap) == 0) {
820			nfscl_deleggetmodtime(vp, &ap->a_vap->va_mtime);
821			return (0);
822		}
823	}
824	error = nfsrpc_getattr(vp, ap->a_cred, td, &nfsva, NULL);
825	if (!error)
826		error = nfscl_loadattrcache(&vp, &nfsva, vap, NULL, 0, 0);
827	if (!error) {
828		/*
829		 * Get the local modify time for the case of a write
830		 * delegation.
831		 */
832		nfscl_deleggetmodtime(vp, &vap->va_mtime);
833	} else if (NFS_ISV4(vp)) {
834		error = nfscl_maperr(td, error, (uid_t)0, (gid_t)0);
835	}
836	return (error);
837}
838
839/*
840 * nfs setattr call.
841 */
842static int
843nfs_setattr(struct vop_setattr_args *ap)
844{
845	struct vnode *vp = ap->a_vp;
846	struct nfsnode *np = VTONFS(vp);
847	struct thread *td = curthread;	/* XXX */
848	struct vattr *vap = ap->a_vap;
849	int error = 0;
850	u_quad_t tsize;
851
852#ifndef nolint
853	tsize = (u_quad_t)0;
854#endif
855
856	/*
857	 * Setting of flags and marking of atimes are not supported.
858	 */
859	if (vap->va_flags != VNOVAL)
860		return (EOPNOTSUPP);
861
862	/*
863	 * Disallow write attempts if the filesystem is mounted read-only.
864	 */
865  	if ((vap->va_flags != VNOVAL || vap->va_uid != (uid_t)VNOVAL ||
866	    vap->va_gid != (gid_t)VNOVAL || vap->va_atime.tv_sec != VNOVAL ||
867	    vap->va_mtime.tv_sec != VNOVAL || vap->va_mode != (mode_t)VNOVAL) &&
868	    (vp->v_mount->mnt_flag & MNT_RDONLY))
869		return (EROFS);
870	if (vap->va_size != VNOVAL) {
871 		switch (vp->v_type) {
872 		case VDIR:
873 			return (EISDIR);
874 		case VCHR:
875 		case VBLK:
876 		case VSOCK:
877 		case VFIFO:
878			if (vap->va_mtime.tv_sec == VNOVAL &&
879			    vap->va_atime.tv_sec == VNOVAL &&
880			    vap->va_mode == (mode_t)VNOVAL &&
881			    vap->va_uid == (uid_t)VNOVAL &&
882			    vap->va_gid == (gid_t)VNOVAL)
883				return (0);
884 			vap->va_size = VNOVAL;
885 			break;
886 		default:
887			/*
888			 * Disallow write attempts if the filesystem is
889			 * mounted read-only.
890			 */
891			if (vp->v_mount->mnt_flag & MNT_RDONLY)
892				return (EROFS);
893			/*
894			 *  We run vnode_pager_setsize() early (why?),
895			 * we must set np->n_size now to avoid vinvalbuf
896			 * V_SAVE races that might setsize a lower
897			 * value.
898			 */
899			mtx_lock(&np->n_mtx);
900			tsize = np->n_size;
901			mtx_unlock(&np->n_mtx);
902			error = ncl_meta_setsize(vp, ap->a_cred, td,
903			    vap->va_size);
904			mtx_lock(&np->n_mtx);
905 			if (np->n_flag & NMODIFIED) {
906			    tsize = np->n_size;
907			    mtx_unlock(&np->n_mtx);
908 			    if (vap->va_size == 0)
909 				error = ncl_vinvalbuf(vp, 0, td, 1);
910 			    else
911 				error = ncl_vinvalbuf(vp, V_SAVE, td, 1);
912 			    if (error) {
913				vnode_pager_setsize(vp, tsize);
914				return (error);
915			    }
916			    /*
917			     * Call nfscl_delegmodtime() to set the modify time
918			     * locally, as required.
919			     */
920			    nfscl_delegmodtime(vp);
921 			} else
922			    mtx_unlock(&np->n_mtx);
923			/*
924			 * np->n_size has already been set to vap->va_size
925			 * in ncl_meta_setsize(). We must set it again since
926			 * nfs_loadattrcache() could be called through
927			 * ncl_meta_setsize() and could modify np->n_size.
928			 */
929			mtx_lock(&np->n_mtx);
930 			np->n_vattr.na_size = np->n_size = vap->va_size;
931			mtx_unlock(&np->n_mtx);
932  		};
933  	} else {
934		mtx_lock(&np->n_mtx);
935		if ((vap->va_mtime.tv_sec != VNOVAL || vap->va_atime.tv_sec != VNOVAL) &&
936		    (np->n_flag & NMODIFIED) && vp->v_type == VREG) {
937			mtx_unlock(&np->n_mtx);
938			if ((error = ncl_vinvalbuf(vp, V_SAVE, td, 1)) != 0 &&
939			    (error == EINTR || error == EIO))
940				return (error);
941		} else
942			mtx_unlock(&np->n_mtx);
943	}
944	error = nfs_setattrrpc(vp, vap, ap->a_cred, td);
945	if (error && vap->va_size != VNOVAL) {
946		mtx_lock(&np->n_mtx);
947		np->n_size = np->n_vattr.na_size = tsize;
948		vnode_pager_setsize(vp, tsize);
949		mtx_unlock(&np->n_mtx);
950	}
951	return (error);
952}
953
954/*
955 * Do an nfs setattr rpc.
956 */
957static int
958nfs_setattrrpc(struct vnode *vp, struct vattr *vap, struct ucred *cred,
959    struct thread *td)
960{
961	struct nfsnode *np = VTONFS(vp);
962	int error, ret, attrflag, i;
963	struct nfsvattr nfsva;
964
965	if (NFS_ISV34(vp)) {
966		mtx_lock(&np->n_mtx);
967		for (i = 0; i < NFS_ACCESSCACHESIZE; i++)
968			np->n_accesscache[i].stamp = 0;
969		np->n_flag |= NDELEGMOD;
970		mtx_unlock(&np->n_mtx);
971	}
972	error = nfsrpc_setattr(vp, vap, NULL, cred, td, &nfsva, &attrflag,
973	    NULL);
974	if (attrflag) {
975		ret = nfscl_loadattrcache(&vp, &nfsva, NULL, NULL, 0, 1);
976		if (ret && !error)
977			error = ret;
978	}
979	if (error && NFS_ISV4(vp))
980		error = nfscl_maperr(td, error, vap->va_uid, vap->va_gid);
981	return (error);
982}
983
984/*
985 * nfs lookup call, one step at a time...
986 * First look in cache
987 * If not found, unlock the directory nfsnode and do the rpc
988 */
989static int
990nfs_lookup(struct vop_lookup_args *ap)
991{
992	struct componentname *cnp = ap->a_cnp;
993	struct vnode *dvp = ap->a_dvp;
994	struct vnode **vpp = ap->a_vpp;
995	struct mount *mp = dvp->v_mount;
996	int flags = cnp->cn_flags;
997	struct vnode *newvp;
998	struct nfsmount *nmp;
999	struct nfsnode *np;
1000	int error = 0, attrflag, dattrflag, ltype;
1001	struct thread *td = cnp->cn_thread;
1002	struct nfsfh *nfhp;
1003	struct nfsvattr dnfsva, nfsva;
1004
1005	*vpp = NULLVP;
1006	if ((flags & ISLASTCN) && (mp->mnt_flag & MNT_RDONLY) &&
1007	    (cnp->cn_nameiop == DELETE || cnp->cn_nameiop == RENAME))
1008		return (EROFS);
1009	if (dvp->v_type != VDIR)
1010		return (ENOTDIR);
1011	nmp = VFSTONFS(mp);
1012	np = VTONFS(dvp);
1013
1014	/* For NFSv4, wait until any remove is done. */
1015	mtx_lock(&np->n_mtx);
1016	while (NFSHASNFSV4(nmp) && (np->n_flag & NREMOVEINPROG)) {
1017		np->n_flag |= NREMOVEWANT;
1018		(void) msleep((caddr_t)np, &np->n_mtx, PZERO, "nfslkup", 0);
1019	}
1020	mtx_unlock(&np->n_mtx);
1021
1022	if ((error = VOP_ACCESS(dvp, VEXEC, cnp->cn_cred, td)) != 0)
1023		return (error);
1024	if ((error = cache_lookup(dvp, vpp, cnp)) &&
1025	    (error != ENOENT || newnfs_neglookup_enable != 0)) {
1026		struct vattr vattr;
1027
1028		if (error == ENOENT) {
1029			if (!VOP_GETATTR(dvp, &vattr, cnp->cn_cred) &&
1030			    vattr.va_mtime.tv_sec == np->n_dmtime) {
1031			     NFSINCRGLOBAL(newnfsstats.lookupcache_hits);
1032				return (ENOENT);
1033			}
1034			cache_purge_negative(dvp);
1035			np->n_dmtime = 0;
1036		} else {
1037			newvp = *vpp;
1038			if (nfscl_nodeleg(newvp, 0) == 0 ||
1039			    (!VOP_GETATTR(newvp, &vattr, cnp->cn_cred) &&
1040			     vattr.va_ctime.tv_sec==VTONFS(newvp)->n_ctime)) {
1041			     NFSINCRGLOBAL(newnfsstats.lookupcache_hits);
1042			     if (cnp->cn_nameiop != LOOKUP &&
1043				 (flags & ISLASTCN))
1044				     cnp->cn_flags |= SAVENAME;
1045			     return (0);
1046			}
1047			cache_purge(newvp);
1048			if (dvp != newvp)
1049				vput(newvp);
1050			else
1051				vrele(newvp);
1052			*vpp = NULLVP;
1053		}
1054	}
1055	error = 0;
1056	newvp = NULLVP;
1057	NFSINCRGLOBAL(newnfsstats.lookupcache_misses);
1058	error = nfsrpc_lookup(dvp, cnp->cn_nameptr, cnp->cn_namelen,
1059	    cnp->cn_cred, td, &dnfsva, &nfsva, &nfhp, &attrflag, &dattrflag,
1060	    NULL);
1061	if (dattrflag)
1062		(void) nfscl_loadattrcache(&dvp, &dnfsva, NULL, NULL, 0, 1);
1063	if (error) {
1064		if (newnfs_neglookup_enable != 0 &&
1065		    error == ENOENT && (cnp->cn_flags & MAKEENTRY) &&
1066		    cnp->cn_nameiop != CREATE) {
1067			if (np->n_dmtime == 0)
1068				np->n_dmtime = np->n_vattr.na_mtime.tv_sec;
1069			cache_enter(dvp, NULL, cnp);
1070		}
1071		if (newvp != NULLVP) {
1072			vput(newvp);
1073			*vpp = NULLVP;
1074		}
1075		if ((cnp->cn_nameiop == CREATE || cnp->cn_nameiop == RENAME) &&
1076		    (flags & ISLASTCN) && error == ENOENT) {
1077			if (mp->mnt_flag & MNT_RDONLY)
1078				error = EROFS;
1079			else
1080				error = EJUSTRETURN;
1081		}
1082		if (cnp->cn_nameiop != LOOKUP && (flags & ISLASTCN))
1083			cnp->cn_flags |= SAVENAME;
1084		if (NFS_ISV4(dvp))
1085			error = nfscl_maperr(td, error, (uid_t)0, (gid_t)0);
1086		return (error);
1087	}
1088
1089	/*
1090	 * Handle RENAME case...
1091	 */
1092	if (cnp->cn_nameiop == RENAME && (flags & ISLASTCN)) {
1093		if (NFS_CMPFH(np, nfhp->nfh_fh, nfhp->nfh_len)) {
1094			FREE((caddr_t)nfhp, M_NFSFH);
1095			return (EISDIR);
1096		}
1097		error = nfscl_nget(mp, dvp, nfhp, cnp, td, &np, NULL);
1098		if (error)
1099			return (error);
1100		newvp = NFSTOV(np);
1101		if (attrflag)
1102			(void) nfscl_loadattrcache(&newvp, &nfsva, NULL, NULL,
1103			    0, 1);
1104		*vpp = newvp;
1105		cnp->cn_flags |= SAVENAME;
1106		return (0);
1107	}
1108
1109	if (flags & ISDOTDOT) {
1110		ltype = VOP_ISLOCKED(dvp);
1111		error = vfs_busy(mp, MBF_NOWAIT);
1112		if (error != 0) {
1113			vfs_ref(mp);
1114			VOP_UNLOCK(dvp, 0);
1115			error = vfs_busy(mp, 0);
1116			vn_lock(dvp, ltype | LK_RETRY);
1117			vfs_rel(mp);
1118			if (error == 0 && (dvp->v_iflag & VI_DOOMED)) {
1119				vfs_unbusy(mp);
1120				error = ENOENT;
1121			}
1122			if (error != 0)
1123				return (error);
1124		}
1125		VOP_UNLOCK(dvp, 0);
1126		error = nfscl_nget(mp, dvp, nfhp, cnp, td, &np, NULL);
1127		if (error == 0)
1128			newvp = NFSTOV(np);
1129		vfs_unbusy(mp);
1130		if (newvp != dvp)
1131			vn_lock(dvp, ltype | LK_RETRY);
1132		if (dvp->v_iflag & VI_DOOMED) {
1133			if (error == 0) {
1134				if (newvp == dvp)
1135					vrele(newvp);
1136				else
1137					vput(newvp);
1138			}
1139			error = ENOENT;
1140		}
1141		if (error != 0)
1142			return (error);
1143		if (attrflag)
1144			(void) nfscl_loadattrcache(&newvp, &nfsva, NULL, NULL,
1145			    0, 1);
1146	} else if (NFS_CMPFH(np, nfhp->nfh_fh, nfhp->nfh_len)) {
1147		FREE((caddr_t)nfhp, M_NFSFH);
1148		VREF(dvp);
1149		newvp = dvp;
1150		if (attrflag)
1151			(void) nfscl_loadattrcache(&newvp, &nfsva, NULL, NULL,
1152			    0, 1);
1153	} else {
1154		error = nfscl_nget(mp, dvp, nfhp, cnp, td, &np, NULL);
1155		if (error)
1156			return (error);
1157		newvp = NFSTOV(np);
1158		if (attrflag)
1159			(void) nfscl_loadattrcache(&newvp, &nfsva, NULL, NULL,
1160			    0, 1);
1161	}
1162	if (cnp->cn_nameiop != LOOKUP && (flags & ISLASTCN))
1163		cnp->cn_flags |= SAVENAME;
1164	if ((cnp->cn_flags & MAKEENTRY) &&
1165	    (cnp->cn_nameiop != DELETE || !(flags & ISLASTCN))) {
1166		np->n_ctime = np->n_vattr.na_vattr.va_ctime.tv_sec;
1167		cache_enter(dvp, newvp, cnp);
1168	}
1169	*vpp = newvp;
1170	return (0);
1171}
1172
1173/*
1174 * nfs read call.
1175 * Just call ncl_bioread() to do the work.
1176 */
1177static int
1178nfs_read(struct vop_read_args *ap)
1179{
1180	struct vnode *vp = ap->a_vp;
1181
1182	switch (vp->v_type) {
1183	case VREG:
1184		return (ncl_bioread(vp, ap->a_uio, ap->a_ioflag, ap->a_cred));
1185	case VDIR:
1186		return (EISDIR);
1187	default:
1188		return (EOPNOTSUPP);
1189	}
1190}
1191
1192/*
1193 * nfs readlink call
1194 */
1195static int
1196nfs_readlink(struct vop_readlink_args *ap)
1197{
1198	struct vnode *vp = ap->a_vp;
1199
1200	if (vp->v_type != VLNK)
1201		return (EINVAL);
1202	return (ncl_bioread(vp, ap->a_uio, 0, ap->a_cred));
1203}
1204
1205/*
1206 * Do a readlink rpc.
1207 * Called by ncl_doio() from below the buffer cache.
1208 */
1209int
1210ncl_readlinkrpc(struct vnode *vp, struct uio *uiop, struct ucred *cred)
1211{
1212	int error, ret, attrflag;
1213	struct nfsvattr nfsva;
1214
1215	error = nfsrpc_readlink(vp, uiop, cred, uiop->uio_td, &nfsva,
1216	    &attrflag, NULL);
1217	if (attrflag) {
1218		ret = nfscl_loadattrcache(&vp, &nfsva, NULL, NULL, 0, 1);
1219		if (ret && !error)
1220			error = ret;
1221	}
1222	if (error && NFS_ISV4(vp))
1223		error = nfscl_maperr(uiop->uio_td, error, (uid_t)0, (gid_t)0);
1224	return (error);
1225}
1226
1227/*
1228 * nfs read rpc call
1229 * Ditto above
1230 */
1231int
1232ncl_readrpc(struct vnode *vp, struct uio *uiop, struct ucred *cred)
1233{
1234	int error, ret, attrflag;
1235	struct nfsvattr nfsva;
1236
1237	error = nfsrpc_read(vp, uiop, cred, uiop->uio_td, &nfsva, &attrflag,
1238	    NULL);
1239	if (attrflag) {
1240		ret = nfscl_loadattrcache(&vp, &nfsva, NULL, NULL, 0, 1);
1241		if (ret && !error)
1242			error = ret;
1243	}
1244	if (error && NFS_ISV4(vp))
1245		error = nfscl_maperr(uiop->uio_td, error, (uid_t)0, (gid_t)0);
1246	return (error);
1247}
1248
1249/*
1250 * nfs write call
1251 */
1252int
1253ncl_writerpc(struct vnode *vp, struct uio *uiop, struct ucred *cred,
1254	     int *iomode, int *must_commit)
1255{
1256	struct nfsvattr nfsva;
1257	int error = 0, attrflag, ret;
1258	u_char verf[NFSX_VERF];
1259	struct nfsmount *nmp = VFSTONFS(vp->v_mount);
1260
1261	*must_commit = 0;
1262	error = nfsrpc_write(vp, uiop, iomode, verf, cred,
1263	    uiop->uio_td, &nfsva, &attrflag, NULL);
1264	NFSLOCKMNT(nmp);
1265	if (!error && NFSHASWRITEVERF(nmp) &&
1266	    NFSBCMP(verf, nmp->nm_verf, NFSX_VERF)) {
1267		*must_commit = 1;
1268		NFSBCOPY(verf, nmp->nm_verf, NFSX_VERF);
1269	}
1270	NFSUNLOCKMNT(nmp);
1271	if (attrflag) {
1272		if (VTONFS(vp)->n_flag & ND_NFSV4)
1273			ret = nfscl_loadattrcache(&vp, &nfsva, NULL, NULL, 1,
1274			    1);
1275		else
1276			ret = nfscl_loadattrcache(&vp, &nfsva, NULL, NULL, 0,
1277			    1);
1278		if (ret && !error)
1279			error = ret;
1280	}
1281	if (vp->v_mount->mnt_kern_flag & MNTK_ASYNC)
1282		*iomode = NFSWRITE_FILESYNC;
1283	if (error && NFS_ISV4(vp))
1284		error = nfscl_maperr(uiop->uio_td, error, (uid_t)0, (gid_t)0);
1285	return (error);
1286}
1287
1288/*
1289 * nfs mknod rpc
1290 * For NFS v2 this is a kludge. Use a create rpc but with the IFMT bits of the
1291 * mode set to specify the file type and the size field for rdev.
1292 */
1293static int
1294nfs_mknodrpc(struct vnode *dvp, struct vnode **vpp, struct componentname *cnp,
1295    struct vattr *vap)
1296{
1297	struct nfsvattr nfsva, dnfsva;
1298	struct vnode *newvp = NULL;
1299	struct nfsnode *np = NULL, *dnp;
1300	struct nfsfh *nfhp;
1301	struct vattr vattr;
1302	int error = 0, attrflag, dattrflag;
1303	u_int32_t rdev;
1304
1305	if (vap->va_type == VCHR || vap->va_type == VBLK)
1306		rdev = vap->va_rdev;
1307	else if (vap->va_type == VFIFO || vap->va_type == VSOCK)
1308		rdev = 0xffffffff;
1309	else
1310		return (EOPNOTSUPP);
1311	if ((error = VOP_GETATTR(dvp, &vattr, cnp->cn_cred)))
1312		return (error);
1313	error = nfsrpc_mknod(dvp, cnp->cn_nameptr, cnp->cn_namelen, vap,
1314	    rdev, vap->va_type, cnp->cn_cred, cnp->cn_thread, &dnfsva,
1315	    &nfsva, &nfhp, &attrflag, &dattrflag, NULL);
1316	if (!error) {
1317		if (!nfhp)
1318			(void) nfsrpc_lookup(dvp, cnp->cn_nameptr,
1319			    cnp->cn_namelen, cnp->cn_cred, cnp->cn_thread,
1320			    &dnfsva, &nfsva, &nfhp, &attrflag, &dattrflag,
1321			    NULL);
1322		if (nfhp)
1323			error = nfscl_nget(dvp->v_mount, dvp, nfhp, cnp,
1324			    cnp->cn_thread, &np, NULL);
1325	}
1326	if (dattrflag)
1327		(void) nfscl_loadattrcache(&dvp, &dnfsva, NULL, NULL, 0, 1);
1328	if (!error) {
1329		newvp = NFSTOV(np);
1330		if (attrflag)
1331			error = nfscl_loadattrcache(&newvp, &nfsva, NULL, NULL,
1332			    0, 1);
1333	}
1334	if (!error) {
1335		if ((cnp->cn_flags & MAKEENTRY))
1336			cache_enter(dvp, newvp, cnp);
1337		*vpp = newvp;
1338	} else if (NFS_ISV4(dvp)) {
1339		error = nfscl_maperr(cnp->cn_thread, error, vap->va_uid,
1340		    vap->va_gid);
1341	}
1342	dnp = VTONFS(dvp);
1343	mtx_lock(&dnp->n_mtx);
1344	dnp->n_flag |= NMODIFIED;
1345	if (!dattrflag)
1346		dnp->n_attrstamp = 0;
1347	mtx_unlock(&dnp->n_mtx);
1348	return (error);
1349}
1350
1351/*
1352 * nfs mknod vop
1353 * just call nfs_mknodrpc() to do the work.
1354 */
1355/* ARGSUSED */
1356static int
1357nfs_mknod(struct vop_mknod_args *ap)
1358{
1359	return (nfs_mknodrpc(ap->a_dvp, ap->a_vpp, ap->a_cnp, ap->a_vap));
1360}
1361
1362static u_long create_verf;
1363/*
1364 * nfs file create call
1365 */
1366static int
1367nfs_create(struct vop_create_args *ap)
1368{
1369	struct vnode *dvp = ap->a_dvp;
1370	struct vattr *vap = ap->a_vap;
1371	struct componentname *cnp = ap->a_cnp;
1372	struct nfsnode *np = NULL, *dnp;
1373	struct vnode *newvp = NULL;
1374	struct nfsmount *nmp;
1375	struct nfsvattr dnfsva, nfsva;
1376	struct nfsfh *nfhp;
1377	nfsquad_t cverf;
1378	int error = 0, attrflag, dattrflag, fmode = 0;
1379	struct vattr vattr;
1380
1381	/*
1382	 * Oops, not for me..
1383	 */
1384	if (vap->va_type == VSOCK)
1385		return (nfs_mknodrpc(dvp, ap->a_vpp, cnp, vap));
1386
1387	if ((error = VOP_GETATTR(dvp, &vattr, cnp->cn_cred)))
1388		return (error);
1389	if (vap->va_vaflags & VA_EXCLUSIVE)
1390		fmode |= O_EXCL;
1391	dnp = VTONFS(dvp);
1392	nmp = VFSTONFS(vnode_mount(dvp));
1393again:
1394	/* For NFSv4, wait until any remove is done. */
1395	mtx_lock(&dnp->n_mtx);
1396	while (NFSHASNFSV4(nmp) && (dnp->n_flag & NREMOVEINPROG)) {
1397		dnp->n_flag |= NREMOVEWANT;
1398		(void) msleep((caddr_t)dnp, &dnp->n_mtx, PZERO, "nfscrt", 0);
1399	}
1400	mtx_unlock(&dnp->n_mtx);
1401
1402	CURVNET_SET(P_TO_VNET(&proc0));
1403#ifdef INET
1404	IN_IFADDR_RLOCK();
1405	if (!TAILQ_EMPTY(&V_in_ifaddrhead))
1406		cverf.lval[0] = IA_SIN(TAILQ_FIRST(&V_in_ifaddrhead))->sin_addr.s_addr;
1407	else
1408#endif
1409		cverf.lval[0] = create_verf;
1410#ifdef INET
1411	IN_IFADDR_RUNLOCK();
1412#endif
1413	cverf.lval[1] = ++create_verf;
1414	CURVNET_RESTORE();
1415	error = nfsrpc_create(dvp, cnp->cn_nameptr, cnp->cn_namelen,
1416	    vap, cverf, fmode, cnp->cn_cred, cnp->cn_thread, &dnfsva, &nfsva,
1417	    &nfhp, &attrflag, &dattrflag, NULL);
1418	if (!error) {
1419		if (nfhp == NULL)
1420			(void) nfsrpc_lookup(dvp, cnp->cn_nameptr,
1421			    cnp->cn_namelen, cnp->cn_cred, cnp->cn_thread,
1422			    &dnfsva, &nfsva, &nfhp, &attrflag, &dattrflag,
1423			    NULL);
1424		if (nfhp != NULL)
1425			error = nfscl_nget(dvp->v_mount, dvp, nfhp, cnp,
1426			    cnp->cn_thread, &np, NULL);
1427	}
1428	if (dattrflag)
1429		(void) nfscl_loadattrcache(&dvp, &dnfsva, NULL, NULL, 0, 1);
1430	if (!error) {
1431		newvp = NFSTOV(np);
1432		if (attrflag)
1433			error = nfscl_loadattrcache(&newvp, &nfsva, NULL, NULL,
1434			    0, 1);
1435	}
1436	if (error) {
1437		if (newvp != NULL) {
1438			vrele(newvp);
1439			newvp = NULL;
1440		}
1441		if (NFS_ISV34(dvp) && (fmode & O_EXCL) &&
1442		    error == NFSERR_NOTSUPP) {
1443			fmode &= ~O_EXCL;
1444			goto again;
1445		}
1446	} else if (NFS_ISV34(dvp) && (fmode & O_EXCL)) {
1447		if (nfscl_checksattr(vap, &nfsva)) {
1448			error = nfsrpc_setattr(newvp, vap, NULL, cnp->cn_cred,
1449			    cnp->cn_thread, &nfsva, &attrflag, NULL);
1450			if (error && (vap->va_uid != (uid_t)VNOVAL ||
1451			    vap->va_gid != (gid_t)VNOVAL)) {
1452				/* try again without setting uid/gid */
1453				vap->va_uid = (uid_t)VNOVAL;
1454				vap->va_gid = (uid_t)VNOVAL;
1455				error = nfsrpc_setattr(newvp, vap, NULL,
1456				    cnp->cn_cred, cnp->cn_thread, &nfsva,
1457				    &attrflag, NULL);
1458			}
1459			if (attrflag)
1460				(void) nfscl_loadattrcache(&newvp, &nfsva, NULL,
1461				    NULL, 0, 1);
1462		}
1463	}
1464	if (!error) {
1465		if (cnp->cn_flags & MAKEENTRY)
1466			cache_enter(dvp, newvp, cnp);
1467		*ap->a_vpp = newvp;
1468	} else if (NFS_ISV4(dvp)) {
1469		error = nfscl_maperr(cnp->cn_thread, error, vap->va_uid,
1470		    vap->va_gid);
1471	}
1472	mtx_lock(&dnp->n_mtx);
1473	dnp->n_flag |= NMODIFIED;
1474	if (!dattrflag)
1475		dnp->n_attrstamp = 0;
1476	mtx_unlock(&dnp->n_mtx);
1477	return (error);
1478}
1479
1480/*
1481 * nfs file remove call
1482 * To try and make nfs semantics closer to ufs semantics, a file that has
1483 * other processes using the vnode is renamed instead of removed and then
1484 * removed later on the last close.
1485 * - If v_usecount > 1
1486 *	  If a rename is not already in the works
1487 *	     call nfs_sillyrename() to set it up
1488 *     else
1489 *	  do the remove rpc
1490 */
1491static int
1492nfs_remove(struct vop_remove_args *ap)
1493{
1494	struct vnode *vp = ap->a_vp;
1495	struct vnode *dvp = ap->a_dvp;
1496	struct componentname *cnp = ap->a_cnp;
1497	struct nfsnode *np = VTONFS(vp);
1498	int error = 0;
1499	struct vattr vattr;
1500
1501#ifndef DIAGNOSTIC
1502	if ((cnp->cn_flags & HASBUF) == 0)
1503		panic("nfs_remove: no name");
1504	if (vrefcnt(vp) < 1)
1505		panic("nfs_remove: bad v_usecount");
1506#endif
1507	if (vp->v_type == VDIR)
1508		error = EPERM;
1509	else if (vrefcnt(vp) == 1 || (np->n_sillyrename &&
1510	    VOP_GETATTR(vp, &vattr, cnp->cn_cred) == 0 &&
1511	    vattr.va_nlink > 1)) {
1512		/*
1513		 * Purge the name cache so that the chance of a lookup for
1514		 * the name succeeding while the remove is in progress is
1515		 * minimized. Without node locking it can still happen, such
1516		 * that an I/O op returns ESTALE, but since you get this if
1517		 * another host removes the file..
1518		 */
1519		cache_purge(vp);
1520		/*
1521		 * throw away biocache buffers, mainly to avoid
1522		 * unnecessary delayed writes later.
1523		 */
1524		error = ncl_vinvalbuf(vp, 0, cnp->cn_thread, 1);
1525		/* Do the rpc */
1526		if (error != EINTR && error != EIO)
1527			error = nfs_removerpc(dvp, vp, cnp->cn_nameptr,
1528			    cnp->cn_namelen, cnp->cn_cred, cnp->cn_thread);
1529		/*
1530		 * Kludge City: If the first reply to the remove rpc is lost..
1531		 *   the reply to the retransmitted request will be ENOENT
1532		 *   since the file was in fact removed
1533		 *   Therefore, we cheat and return success.
1534		 */
1535		if (error == ENOENT)
1536			error = 0;
1537	} else if (!np->n_sillyrename)
1538		error = nfs_sillyrename(dvp, vp, cnp);
1539	np->n_attrstamp = 0;
1540	return (error);
1541}
1542
1543/*
1544 * nfs file remove rpc called from nfs_inactive
1545 */
1546int
1547ncl_removeit(struct sillyrename *sp, struct vnode *vp)
1548{
1549	/*
1550	 * Make sure that the directory vnode is still valid.
1551	 * XXX we should lock sp->s_dvp here.
1552	 */
1553	if (sp->s_dvp->v_type == VBAD)
1554		return (0);
1555	return (nfs_removerpc(sp->s_dvp, vp, sp->s_name, sp->s_namlen,
1556	    sp->s_cred, NULL));
1557}
1558
1559/*
1560 * Nfs remove rpc, called from nfs_remove() and ncl_removeit().
1561 */
1562static int
1563nfs_removerpc(struct vnode *dvp, struct vnode *vp, char *name,
1564    int namelen, struct ucred *cred, struct thread *td)
1565{
1566	struct nfsvattr dnfsva;
1567	struct nfsnode *dnp = VTONFS(dvp);
1568	int error = 0, dattrflag;
1569
1570	mtx_lock(&dnp->n_mtx);
1571	dnp->n_flag |= NREMOVEINPROG;
1572	mtx_unlock(&dnp->n_mtx);
1573	error = nfsrpc_remove(dvp, name, namelen, vp, cred, td, &dnfsva,
1574	    &dattrflag, NULL);
1575	mtx_lock(&dnp->n_mtx);
1576	if ((dnp->n_flag & NREMOVEWANT)) {
1577		dnp->n_flag &= ~(NREMOVEWANT | NREMOVEINPROG);
1578		mtx_unlock(&dnp->n_mtx);
1579		wakeup((caddr_t)dnp);
1580	} else {
1581		dnp->n_flag &= ~NREMOVEINPROG;
1582		mtx_unlock(&dnp->n_mtx);
1583	}
1584	if (dattrflag)
1585		(void) nfscl_loadattrcache(&dvp, &dnfsva, NULL, NULL, 0, 1);
1586	mtx_lock(&dnp->n_mtx);
1587	dnp->n_flag |= NMODIFIED;
1588	if (!dattrflag)
1589		dnp->n_attrstamp = 0;
1590	mtx_unlock(&dnp->n_mtx);
1591	if (error && NFS_ISV4(dvp))
1592		error = nfscl_maperr(td, error, (uid_t)0, (gid_t)0);
1593	return (error);
1594}
1595
1596/*
1597 * nfs file rename call
1598 */
1599static int
1600nfs_rename(struct vop_rename_args *ap)
1601{
1602	struct vnode *fvp = ap->a_fvp;
1603	struct vnode *tvp = ap->a_tvp;
1604	struct vnode *fdvp = ap->a_fdvp;
1605	struct vnode *tdvp = ap->a_tdvp;
1606	struct componentname *tcnp = ap->a_tcnp;
1607	struct componentname *fcnp = ap->a_fcnp;
1608	struct nfsnode *fnp = VTONFS(ap->a_fvp);
1609	struct nfsnode *tdnp = VTONFS(ap->a_tdvp);
1610	struct nfsv4node *newv4 = NULL;
1611	int error;
1612
1613#ifndef DIAGNOSTIC
1614	if ((tcnp->cn_flags & HASBUF) == 0 ||
1615	    (fcnp->cn_flags & HASBUF) == 0)
1616		panic("nfs_rename: no name");
1617#endif
1618	/* Check for cross-device rename */
1619	if ((fvp->v_mount != tdvp->v_mount) ||
1620	    (tvp && (fvp->v_mount != tvp->v_mount))) {
1621		error = EXDEV;
1622		goto out;
1623	}
1624
1625	if (fvp == tvp) {
1626		ncl_printf("nfs_rename: fvp == tvp (can't happen)\n");
1627		error = 0;
1628		goto out;
1629	}
1630	if ((error = vn_lock(fvp, LK_EXCLUSIVE)))
1631		goto out;
1632
1633	/*
1634	 * We have to flush B_DELWRI data prior to renaming
1635	 * the file.  If we don't, the delayed-write buffers
1636	 * can be flushed out later after the file has gone stale
1637	 * under NFSV3.  NFSV2 does not have this problem because
1638	 * ( as far as I can tell ) it flushes dirty buffers more
1639	 * often.
1640	 *
1641	 * Skip the rename operation if the fsync fails, this can happen
1642	 * due to the server's volume being full, when we pushed out data
1643	 * that was written back to our cache earlier. Not checking for
1644	 * this condition can result in potential (silent) data loss.
1645	 */
1646	error = VOP_FSYNC(fvp, MNT_WAIT, fcnp->cn_thread);
1647	VOP_UNLOCK(fvp, 0);
1648	if (!error && tvp)
1649		error = VOP_FSYNC(tvp, MNT_WAIT, tcnp->cn_thread);
1650	if (error)
1651		goto out;
1652
1653	/*
1654	 * If the tvp exists and is in use, sillyrename it before doing the
1655	 * rename of the new file over it.
1656	 * XXX Can't sillyrename a directory.
1657	 */
1658	if (tvp && vrefcnt(tvp) > 1 && !VTONFS(tvp)->n_sillyrename &&
1659		tvp->v_type != VDIR && !nfs_sillyrename(tdvp, tvp, tcnp)) {
1660		vput(tvp);
1661		tvp = NULL;
1662	}
1663
1664	error = nfs_renamerpc(fdvp, fvp, fcnp->cn_nameptr, fcnp->cn_namelen,
1665	    tdvp, tvp, tcnp->cn_nameptr, tcnp->cn_namelen, tcnp->cn_cred,
1666	    tcnp->cn_thread);
1667
1668	if (!error) {
1669		/*
1670		 * For NFSv4, check to see if it is the same name and
1671		 * replace the name, if it is different.
1672		 */
1673		MALLOC(newv4, struct nfsv4node *,
1674		    sizeof (struct nfsv4node) +
1675		    tdnp->n_fhp->nfh_len + tcnp->cn_namelen - 1,
1676		    M_NFSV4NODE, M_WAITOK);
1677		mtx_lock(&tdnp->n_mtx);
1678		mtx_lock(&fnp->n_mtx);
1679		if (fnp->n_v4 != NULL && fvp->v_type == VREG &&
1680		    (fnp->n_v4->n4_namelen != tcnp->cn_namelen ||
1681		      NFSBCMP(tcnp->cn_nameptr, NFS4NODENAME(fnp->n_v4),
1682		      tcnp->cn_namelen) ||
1683		      tdnp->n_fhp->nfh_len != fnp->n_v4->n4_fhlen ||
1684		      NFSBCMP(tdnp->n_fhp->nfh_fh, fnp->n_v4->n4_data,
1685			tdnp->n_fhp->nfh_len))) {
1686#ifdef notdef
1687{ char nnn[100]; int nnnl;
1688nnnl = (tcnp->cn_namelen < 100) ? tcnp->cn_namelen : 99;
1689bcopy(tcnp->cn_nameptr, nnn, nnnl);
1690nnn[nnnl] = '\0';
1691printf("ren replace=%s\n",nnn);
1692}
1693#endif
1694			FREE((caddr_t)fnp->n_v4, M_NFSV4NODE);
1695			fnp->n_v4 = newv4;
1696			newv4 = NULL;
1697			fnp->n_v4->n4_fhlen = tdnp->n_fhp->nfh_len;
1698			fnp->n_v4->n4_namelen = tcnp->cn_namelen;
1699			NFSBCOPY(tdnp->n_fhp->nfh_fh, fnp->n_v4->n4_data,
1700			    tdnp->n_fhp->nfh_len);
1701			NFSBCOPY(tcnp->cn_nameptr,
1702			    NFS4NODENAME(fnp->n_v4), tcnp->cn_namelen);
1703		}
1704		mtx_unlock(&tdnp->n_mtx);
1705		mtx_unlock(&fnp->n_mtx);
1706		if (newv4 != NULL)
1707			FREE((caddr_t)newv4, M_NFSV4NODE);
1708	}
1709
1710	if (fvp->v_type == VDIR) {
1711		if (tvp != NULL && tvp->v_type == VDIR)
1712			cache_purge(tdvp);
1713		cache_purge(fdvp);
1714	}
1715
1716out:
1717	if (tdvp == tvp)
1718		vrele(tdvp);
1719	else
1720		vput(tdvp);
1721	if (tvp)
1722		vput(tvp);
1723	vrele(fdvp);
1724	vrele(fvp);
1725	/*
1726	 * Kludge: Map ENOENT => 0 assuming that it is a reply to a retry.
1727	 */
1728	if (error == ENOENT)
1729		error = 0;
1730	return (error);
1731}
1732
1733/*
1734 * nfs file rename rpc called from nfs_remove() above
1735 */
1736static int
1737nfs_renameit(struct vnode *sdvp, struct vnode *svp, struct componentname *scnp,
1738    struct sillyrename *sp)
1739{
1740
1741	return (nfs_renamerpc(sdvp, svp, scnp->cn_nameptr, scnp->cn_namelen,
1742	    sdvp, NULL, sp->s_name, sp->s_namlen, scnp->cn_cred,
1743	    scnp->cn_thread));
1744}
1745
1746/*
1747 * Do an nfs rename rpc. Called from nfs_rename() and nfs_renameit().
1748 */
1749static int
1750nfs_renamerpc(struct vnode *fdvp, struct vnode *fvp, char *fnameptr,
1751    int fnamelen, struct vnode *tdvp, struct vnode *tvp, char *tnameptr,
1752    int tnamelen, struct ucred *cred, struct thread *td)
1753{
1754	struct nfsvattr fnfsva, tnfsva;
1755	struct nfsnode *fdnp = VTONFS(fdvp);
1756	struct nfsnode *tdnp = VTONFS(tdvp);
1757	int error = 0, fattrflag, tattrflag;
1758
1759	error = nfsrpc_rename(fdvp, fvp, fnameptr, fnamelen, tdvp, tvp,
1760	    tnameptr, tnamelen, cred, td, &fnfsva, &tnfsva, &fattrflag,
1761	    &tattrflag, NULL, NULL);
1762	mtx_lock(&fdnp->n_mtx);
1763	fdnp->n_flag |= NMODIFIED;
1764	mtx_unlock(&fdnp->n_mtx);
1765	mtx_lock(&tdnp->n_mtx);
1766	tdnp->n_flag |= NMODIFIED;
1767	mtx_unlock(&tdnp->n_mtx);
1768	if (fattrflag)
1769		(void) nfscl_loadattrcache(&fdvp, &fnfsva, NULL, NULL, 0, 1);
1770	else
1771		fdnp->n_attrstamp = 0;
1772	if (tattrflag)
1773		(void) nfscl_loadattrcache(&tdvp, &tnfsva, NULL, NULL, 0, 1);
1774	else
1775		tdnp->n_attrstamp = 0;
1776	if (error && NFS_ISV4(fdvp))
1777		error = nfscl_maperr(td, error, (uid_t)0, (gid_t)0);
1778	return (error);
1779}
1780
1781/*
1782 * nfs hard link create call
1783 */
1784static int
1785nfs_link(struct vop_link_args *ap)
1786{
1787	struct vnode *vp = ap->a_vp;
1788	struct vnode *tdvp = ap->a_tdvp;
1789	struct componentname *cnp = ap->a_cnp;
1790	struct nfsnode *tdnp;
1791	struct nfsvattr nfsva, dnfsva;
1792	int error = 0, attrflag, dattrflag;
1793
1794	if (vp->v_mount != tdvp->v_mount) {
1795		return (EXDEV);
1796	}
1797
1798	/*
1799	 * Push all writes to the server, so that the attribute cache
1800	 * doesn't get "out of sync" with the server.
1801	 * XXX There should be a better way!
1802	 */
1803	VOP_FSYNC(vp, MNT_WAIT, cnp->cn_thread);
1804
1805	error = nfsrpc_link(tdvp, vp, cnp->cn_nameptr, cnp->cn_namelen,
1806	    cnp->cn_cred, cnp->cn_thread, &dnfsva, &nfsva, &attrflag,
1807	    &dattrflag, NULL);
1808	tdnp = VTONFS(tdvp);
1809	mtx_lock(&tdnp->n_mtx);
1810	tdnp->n_flag |= NMODIFIED;
1811	mtx_unlock(&tdnp->n_mtx);
1812	if (attrflag)
1813		(void) nfscl_loadattrcache(&vp, &nfsva, NULL, NULL, 0, 1);
1814	else
1815		VTONFS(vp)->n_attrstamp = 0;
1816	if (dattrflag)
1817		(void) nfscl_loadattrcache(&tdvp, &dnfsva, NULL, NULL, 0, 1);
1818	else
1819		tdnp->n_attrstamp = 0;
1820	/*
1821	 * If negative lookup caching is enabled, I might as well
1822	 * add an entry for this node. Not necessary for correctness,
1823	 * but if negative caching is enabled, then the system
1824	 * must care about lookup caching hit rate, so...
1825	 */
1826	if (newnfs_neglookup_enable != 0 &&
1827	    (cnp->cn_flags & MAKEENTRY))
1828		cache_enter(tdvp, vp, cnp);
1829	if (error && NFS_ISV4(vp))
1830		error = nfscl_maperr(cnp->cn_thread, error, (uid_t)0,
1831		    (gid_t)0);
1832	return (error);
1833}
1834
1835/*
1836 * nfs symbolic link create call
1837 */
1838static int
1839nfs_symlink(struct vop_symlink_args *ap)
1840{
1841	struct vnode *dvp = ap->a_dvp;
1842	struct vattr *vap = ap->a_vap;
1843	struct componentname *cnp = ap->a_cnp;
1844	struct nfsvattr nfsva, dnfsva;
1845	struct nfsfh *nfhp;
1846	struct nfsnode *np = NULL, *dnp;
1847	struct vnode *newvp = NULL;
1848	int error = 0, attrflag, dattrflag, ret;
1849
1850	vap->va_type = VLNK;
1851	error = nfsrpc_symlink(dvp, cnp->cn_nameptr, cnp->cn_namelen,
1852	    ap->a_target, vap, cnp->cn_cred, cnp->cn_thread, &dnfsva,
1853	    &nfsva, &nfhp, &attrflag, &dattrflag, NULL);
1854	if (nfhp) {
1855		ret = nfscl_nget(dvp->v_mount, dvp, nfhp, cnp, cnp->cn_thread,
1856		    &np, NULL);
1857		if (!ret)
1858			newvp = NFSTOV(np);
1859		else if (!error)
1860			error = ret;
1861	}
1862	if (newvp != NULL) {
1863		if (attrflag)
1864			(void) nfscl_loadattrcache(&newvp, &nfsva, NULL, NULL,
1865			    0, 1);
1866	} else if (!error) {
1867		/*
1868		 * If we do not have an error and we could not extract the
1869		 * newvp from the response due to the request being NFSv2, we
1870		 * have to do a lookup in order to obtain a newvp to return.
1871		 */
1872		error = nfs_lookitup(dvp, cnp->cn_nameptr, cnp->cn_namelen,
1873		    cnp->cn_cred, cnp->cn_thread, &np);
1874		if (!error)
1875			newvp = NFSTOV(np);
1876	}
1877	if (error) {
1878		if (newvp)
1879			vput(newvp);
1880		if (NFS_ISV4(dvp))
1881			error = nfscl_maperr(cnp->cn_thread, error,
1882			    vap->va_uid, vap->va_gid);
1883	} else {
1884		/*
1885		 * If negative lookup caching is enabled, I might as well
1886		 * add an entry for this node. Not necessary for correctness,
1887		 * but if negative caching is enabled, then the system
1888		 * must care about lookup caching hit rate, so...
1889		 */
1890		if (newnfs_neglookup_enable != 0 &&
1891		    (cnp->cn_flags & MAKEENTRY))
1892			cache_enter(dvp, newvp, cnp);
1893		*ap->a_vpp = newvp;
1894	}
1895
1896	dnp = VTONFS(dvp);
1897	mtx_lock(&dnp->n_mtx);
1898	dnp->n_flag |= NMODIFIED;
1899	mtx_unlock(&dnp->n_mtx);
1900	if (dattrflag)
1901		(void) nfscl_loadattrcache(&dvp, &dnfsva, NULL, NULL, 0, 1);
1902	else
1903		dnp->n_attrstamp = 0;
1904	return (error);
1905}
1906
1907/*
1908 * nfs make dir call
1909 */
1910static int
1911nfs_mkdir(struct vop_mkdir_args *ap)
1912{
1913	struct vnode *dvp = ap->a_dvp;
1914	struct vattr *vap = ap->a_vap;
1915	struct componentname *cnp = ap->a_cnp;
1916	struct nfsnode *np = NULL, *dnp;
1917	struct vnode *newvp = NULL;
1918	struct vattr vattr;
1919	struct nfsfh *nfhp;
1920	struct nfsvattr nfsva, dnfsva;
1921	int error = 0, attrflag, dattrflag, ret;
1922
1923	if ((error = VOP_GETATTR(dvp, &vattr, cnp->cn_cred)))
1924		return (error);
1925	vap->va_type = VDIR;
1926	error = nfsrpc_mkdir(dvp, cnp->cn_nameptr, cnp->cn_namelen,
1927	    vap, cnp->cn_cred, cnp->cn_thread, &dnfsva, &nfsva, &nfhp,
1928	    &attrflag, &dattrflag, NULL);
1929	dnp = VTONFS(dvp);
1930	mtx_lock(&dnp->n_mtx);
1931	dnp->n_flag |= NMODIFIED;
1932	mtx_unlock(&dnp->n_mtx);
1933	if (dattrflag)
1934		(void) nfscl_loadattrcache(&dvp, &dnfsva, NULL, NULL, 0, 1);
1935	else
1936		dnp->n_attrstamp = 0;
1937	if (nfhp) {
1938		ret = nfscl_nget(dvp->v_mount, dvp, nfhp, cnp, cnp->cn_thread,
1939		    &np, NULL);
1940		if (!ret) {
1941			newvp = NFSTOV(np);
1942			if (attrflag)
1943			   (void) nfscl_loadattrcache(&newvp, &nfsva, NULL,
1944				NULL, 0, 1);
1945		} else if (!error)
1946			error = ret;
1947	}
1948	if (!error && newvp == NULL) {
1949		error = nfs_lookitup(dvp, cnp->cn_nameptr, cnp->cn_namelen,
1950		    cnp->cn_cred, cnp->cn_thread, &np);
1951		if (!error) {
1952			newvp = NFSTOV(np);
1953			if (newvp->v_type != VDIR)
1954				error = EEXIST;
1955		}
1956	}
1957	if (error) {
1958		if (newvp)
1959			vput(newvp);
1960		if (NFS_ISV4(dvp))
1961			error = nfscl_maperr(cnp->cn_thread, error,
1962			    vap->va_uid, vap->va_gid);
1963	} else {
1964		/*
1965		 * If negative lookup caching is enabled, I might as well
1966		 * add an entry for this node. Not necessary for correctness,
1967		 * but if negative caching is enabled, then the system
1968		 * must care about lookup caching hit rate, so...
1969		 */
1970		if (newnfs_neglookup_enable != 0 &&
1971		    (cnp->cn_flags & MAKEENTRY))
1972			cache_enter(dvp, newvp, cnp);
1973		*ap->a_vpp = newvp;
1974	}
1975	return (error);
1976}
1977
1978/*
1979 * nfs remove directory call
1980 */
1981static int
1982nfs_rmdir(struct vop_rmdir_args *ap)
1983{
1984	struct vnode *vp = ap->a_vp;
1985	struct vnode *dvp = ap->a_dvp;
1986	struct componentname *cnp = ap->a_cnp;
1987	struct nfsnode *dnp;
1988	struct nfsvattr dnfsva;
1989	int error, dattrflag;
1990
1991	if (dvp == vp)
1992		return (EINVAL);
1993	error = nfsrpc_rmdir(dvp, cnp->cn_nameptr, cnp->cn_namelen,
1994	    cnp->cn_cred, cnp->cn_thread, &dnfsva, &dattrflag, NULL);
1995	dnp = VTONFS(dvp);
1996	mtx_lock(&dnp->n_mtx);
1997	dnp->n_flag |= NMODIFIED;
1998	mtx_unlock(&dnp->n_mtx);
1999	if (dattrflag)
2000		(void) nfscl_loadattrcache(&dvp, &dnfsva, NULL, NULL, 0, 1);
2001	else
2002		dnp->n_attrstamp = 0;
2003
2004	cache_purge(dvp);
2005	cache_purge(vp);
2006	if (error && NFS_ISV4(dvp))
2007		error = nfscl_maperr(cnp->cn_thread, error, (uid_t)0,
2008		    (gid_t)0);
2009	/*
2010	 * Kludge: Map ENOENT => 0 assuming that you have a reply to a retry.
2011	 */
2012	if (error == ENOENT)
2013		error = 0;
2014	return (error);
2015}
2016
2017/*
2018 * nfs readdir call
2019 */
2020static int
2021nfs_readdir(struct vop_readdir_args *ap)
2022{
2023	struct vnode *vp = ap->a_vp;
2024	struct nfsnode *np = VTONFS(vp);
2025	struct uio *uio = ap->a_uio;
2026	int tresid, error = 0;
2027	struct vattr vattr;
2028
2029	if (vp->v_type != VDIR)
2030		return(EPERM);
2031
2032	/*
2033	 * First, check for hit on the EOF offset cache
2034	 */
2035	if (np->n_direofoffset > 0 && uio->uio_offset >= np->n_direofoffset &&
2036	    (np->n_flag & NMODIFIED) == 0) {
2037		if (VOP_GETATTR(vp, &vattr, ap->a_cred) == 0) {
2038			mtx_lock(&np->n_mtx);
2039			if ((NFS_ISV4(vp) && np->n_change == vattr.va_filerev) ||
2040			    !NFS_TIMESPEC_COMPARE(&np->n_mtime, &vattr.va_mtime)) {
2041				mtx_unlock(&np->n_mtx);
2042				NFSINCRGLOBAL(newnfsstats.direofcache_hits);
2043				return (0);
2044			} else
2045				mtx_unlock(&np->n_mtx);
2046		}
2047	}
2048
2049	/*
2050	 * Call ncl_bioread() to do the real work.
2051	 */
2052	tresid = uio->uio_resid;
2053	error = ncl_bioread(vp, uio, 0, ap->a_cred);
2054
2055	if (!error && uio->uio_resid == tresid)
2056		NFSINCRGLOBAL(newnfsstats.direofcache_misses);
2057	return (error);
2058}
2059
2060/*
2061 * Readdir rpc call.
2062 * Called from below the buffer cache by ncl_doio().
2063 */
2064int
2065ncl_readdirrpc(struct vnode *vp, struct uio *uiop, struct ucred *cred,
2066    struct thread *td)
2067{
2068	struct nfsvattr nfsva;
2069	nfsuint64 *cookiep, cookie;
2070	struct nfsnode *dnp = VTONFS(vp);
2071	struct nfsmount *nmp = VFSTONFS(vp->v_mount);
2072	int error = 0, eof, attrflag;
2073
2074#ifndef DIAGNOSTIC
2075	if (uiop->uio_iovcnt != 1 || (uiop->uio_offset & (DIRBLKSIZ - 1)) ||
2076		(uiop->uio_resid & (DIRBLKSIZ - 1)))
2077		panic("nfs readdirrpc bad uio");
2078#endif
2079
2080	/*
2081	 * If there is no cookie, assume directory was stale.
2082	 */
2083	ncl_dircookie_lock(dnp);
2084	cookiep = ncl_getcookie(dnp, uiop->uio_offset, 0);
2085	if (cookiep) {
2086		cookie = *cookiep;
2087		ncl_dircookie_unlock(dnp);
2088	} else {
2089		ncl_dircookie_unlock(dnp);
2090		return (NFSERR_BAD_COOKIE);
2091	}
2092
2093	if (NFSHASNFSV3(nmp) && !NFSHASGOTFSINFO(nmp))
2094		(void)ncl_fsinfo(nmp, vp, cred, td);
2095
2096	error = nfsrpc_readdir(vp, uiop, &cookie, cred, td, &nfsva,
2097	    &attrflag, &eof, NULL);
2098	if (attrflag)
2099		(void) nfscl_loadattrcache(&vp, &nfsva, NULL, NULL, 0, 1);
2100
2101	if (!error) {
2102		/*
2103		 * We are now either at the end of the directory or have filled
2104		 * the block.
2105		 */
2106		if (eof)
2107			dnp->n_direofoffset = uiop->uio_offset;
2108		else {
2109			if (uiop->uio_resid > 0)
2110				ncl_printf("EEK! readdirrpc resid > 0\n");
2111			ncl_dircookie_lock(dnp);
2112			cookiep = ncl_getcookie(dnp, uiop->uio_offset, 1);
2113			*cookiep = cookie;
2114			ncl_dircookie_unlock(dnp);
2115		}
2116	} else if (NFS_ISV4(vp)) {
2117		error = nfscl_maperr(td, error, (uid_t)0, (gid_t)0);
2118	}
2119	return (error);
2120}
2121
2122/*
2123 * NFS V3 readdir plus RPC. Used in place of ncl_readdirrpc().
2124 */
2125int
2126ncl_readdirplusrpc(struct vnode *vp, struct uio *uiop, struct ucred *cred,
2127    struct thread *td)
2128{
2129	struct nfsvattr nfsva;
2130	nfsuint64 *cookiep, cookie;
2131	struct nfsnode *dnp = VTONFS(vp);
2132	struct nfsmount *nmp = VFSTONFS(vp->v_mount);
2133	int error = 0, attrflag, eof;
2134
2135#ifndef DIAGNOSTIC
2136	if (uiop->uio_iovcnt != 1 || (uiop->uio_offset & (DIRBLKSIZ - 1)) ||
2137		(uiop->uio_resid & (DIRBLKSIZ - 1)))
2138		panic("nfs readdirplusrpc bad uio");
2139#endif
2140
2141	/*
2142	 * If there is no cookie, assume directory was stale.
2143	 */
2144	ncl_dircookie_lock(dnp);
2145	cookiep = ncl_getcookie(dnp, uiop->uio_offset, 0);
2146	if (cookiep) {
2147		cookie = *cookiep;
2148		ncl_dircookie_unlock(dnp);
2149	} else {
2150		ncl_dircookie_unlock(dnp);
2151		return (NFSERR_BAD_COOKIE);
2152	}
2153
2154	if (NFSHASNFSV3(nmp) && !NFSHASGOTFSINFO(nmp))
2155		(void)ncl_fsinfo(nmp, vp, cred, td);
2156	error = nfsrpc_readdirplus(vp, uiop, &cookie, cred, td, &nfsva,
2157	    &attrflag, &eof, NULL);
2158	if (attrflag)
2159		(void) nfscl_loadattrcache(&vp, &nfsva, NULL, NULL, 0, 1);
2160
2161	if (!error) {
2162		/*
2163		 * We are now either at end of the directory or have filled the
2164		 * the block.
2165		 */
2166		if (eof)
2167			dnp->n_direofoffset = uiop->uio_offset;
2168		else {
2169			if (uiop->uio_resid > 0)
2170				ncl_printf("EEK! readdirplusrpc resid > 0\n");
2171			ncl_dircookie_lock(dnp);
2172			cookiep = ncl_getcookie(dnp, uiop->uio_offset, 1);
2173			*cookiep = cookie;
2174			ncl_dircookie_unlock(dnp);
2175		}
2176	} else if (NFS_ISV4(vp)) {
2177		error = nfscl_maperr(td, error, (uid_t)0, (gid_t)0);
2178	}
2179	return (error);
2180}
2181
2182/*
2183 * Silly rename. To make the NFS filesystem that is stateless look a little
2184 * more like the "ufs" a remove of an active vnode is translated to a rename
2185 * to a funny looking filename that is removed by nfs_inactive on the
2186 * nfsnode. There is the potential for another process on a different client
2187 * to create the same funny name between the nfs_lookitup() fails and the
2188 * nfs_rename() completes, but...
2189 */
2190static int
2191nfs_sillyrename(struct vnode *dvp, struct vnode *vp, struct componentname *cnp)
2192{
2193	struct sillyrename *sp;
2194	struct nfsnode *np;
2195	int error;
2196	short pid;
2197	unsigned int lticks;
2198
2199	cache_purge(dvp);
2200	np = VTONFS(vp);
2201#ifndef DIAGNOSTIC
2202	if (vp->v_type == VDIR)
2203		panic("nfs: sillyrename dir");
2204#endif
2205	MALLOC(sp, struct sillyrename *, sizeof (struct sillyrename),
2206	    M_NEWNFSREQ, M_WAITOK);
2207	sp->s_cred = crhold(cnp->cn_cred);
2208	sp->s_dvp = dvp;
2209	VREF(dvp);
2210
2211	/*
2212	 * Fudge together a funny name.
2213	 * Changing the format of the funny name to accomodate more
2214	 * sillynames per directory.
2215	 * The name is now changed to .nfs.<ticks>.<pid>.4, where ticks is
2216	 * CPU ticks since boot.
2217	 */
2218	pid = cnp->cn_thread->td_proc->p_pid;
2219	lticks = (unsigned int)ticks;
2220	for ( ; ; ) {
2221		sp->s_namlen = sprintf(sp->s_name,
2222				       ".nfs.%08x.%04x4.4", lticks,
2223				       pid);
2224		if (nfs_lookitup(dvp, sp->s_name, sp->s_namlen, sp->s_cred,
2225				 cnp->cn_thread, NULL))
2226			break;
2227		lticks++;
2228	}
2229	error = nfs_renameit(dvp, vp, cnp, sp);
2230	if (error)
2231		goto bad;
2232	error = nfs_lookitup(dvp, sp->s_name, sp->s_namlen, sp->s_cred,
2233		cnp->cn_thread, &np);
2234	np->n_sillyrename = sp;
2235	return (0);
2236bad:
2237	vrele(sp->s_dvp);
2238	crfree(sp->s_cred);
2239	free((caddr_t)sp, M_NEWNFSREQ);
2240	return (error);
2241}
2242
2243/*
2244 * Look up a file name and optionally either update the file handle or
2245 * allocate an nfsnode, depending on the value of npp.
2246 * npp == NULL	--> just do the lookup
2247 * *npp == NULL --> allocate a new nfsnode and make sure attributes are
2248 *			handled too
2249 * *npp != NULL --> update the file handle in the vnode
2250 */
2251static int
2252nfs_lookitup(struct vnode *dvp, char *name, int len, struct ucred *cred,
2253    struct thread *td, struct nfsnode **npp)
2254{
2255	struct vnode *newvp = NULL, *vp;
2256	struct nfsnode *np, *dnp = VTONFS(dvp);
2257	struct nfsfh *nfhp, *onfhp;
2258	struct nfsvattr nfsva, dnfsva;
2259	struct componentname cn;
2260	int error = 0, attrflag, dattrflag;
2261	u_int hash;
2262
2263	error = nfsrpc_lookup(dvp, name, len, cred, td, &dnfsva, &nfsva,
2264	    &nfhp, &attrflag, &dattrflag, NULL);
2265	if (dattrflag)
2266		(void) nfscl_loadattrcache(&dvp, &dnfsva, NULL, NULL, 0, 1);
2267	if (npp && !error) {
2268		if (*npp != NULL) {
2269		    np = *npp;
2270		    vp = NFSTOV(np);
2271		    /*
2272		     * For NFSv4, check to see if it is the same name and
2273		     * replace the name, if it is different.
2274		     */
2275		    if (np->n_v4 != NULL && nfsva.na_type == VREG &&
2276			(np->n_v4->n4_namelen != len ||
2277			 NFSBCMP(name, NFS4NODENAME(np->n_v4), len) ||
2278			 dnp->n_fhp->nfh_len != np->n_v4->n4_fhlen ||
2279			 NFSBCMP(dnp->n_fhp->nfh_fh, np->n_v4->n4_data,
2280			 dnp->n_fhp->nfh_len))) {
2281#ifdef notdef
2282{ char nnn[100]; int nnnl;
2283nnnl = (len < 100) ? len : 99;
2284bcopy(name, nnn, nnnl);
2285nnn[nnnl] = '\0';
2286printf("replace=%s\n",nnn);
2287}
2288#endif
2289			    FREE((caddr_t)np->n_v4, M_NFSV4NODE);
2290			    MALLOC(np->n_v4, struct nfsv4node *,
2291				sizeof (struct nfsv4node) +
2292				dnp->n_fhp->nfh_len + len - 1,
2293				M_NFSV4NODE, M_WAITOK);
2294			    np->n_v4->n4_fhlen = dnp->n_fhp->nfh_len;
2295			    np->n_v4->n4_namelen = len;
2296			    NFSBCOPY(dnp->n_fhp->nfh_fh, np->n_v4->n4_data,
2297				dnp->n_fhp->nfh_len);
2298			    NFSBCOPY(name, NFS4NODENAME(np->n_v4), len);
2299		    }
2300		    hash = fnv_32_buf(nfhp->nfh_fh, nfhp->nfh_len,
2301			FNV1_32_INIT);
2302		    onfhp = np->n_fhp;
2303		    /*
2304		     * Rehash node for new file handle.
2305		     */
2306		    vfs_hash_rehash(vp, hash);
2307		    np->n_fhp = nfhp;
2308		    if (onfhp != NULL)
2309			FREE((caddr_t)onfhp, M_NFSFH);
2310		    newvp = NFSTOV(np);
2311		} else if (NFS_CMPFH(dnp, nfhp->nfh_fh, nfhp->nfh_len)) {
2312		    FREE((caddr_t)nfhp, M_NFSFH);
2313		    VREF(dvp);
2314		    newvp = dvp;
2315		} else {
2316		    cn.cn_nameptr = name;
2317		    cn.cn_namelen = len;
2318		    error = nfscl_nget(dvp->v_mount, dvp, nfhp, &cn, td,
2319			&np, NULL);
2320		    if (error)
2321			return (error);
2322		    newvp = NFSTOV(np);
2323		}
2324		if (!attrflag && *npp == NULL) {
2325			vrele(newvp);
2326			return (ENOENT);
2327		}
2328		if (attrflag)
2329			(void) nfscl_loadattrcache(&newvp, &nfsva, NULL, NULL,
2330			    0, 1);
2331	}
2332	if (npp && *npp == NULL) {
2333		if (error) {
2334			if (newvp) {
2335				if (newvp == dvp)
2336					vrele(newvp);
2337				else
2338					vput(newvp);
2339			}
2340		} else
2341			*npp = np;
2342	}
2343	if (error && NFS_ISV4(dvp))
2344		error = nfscl_maperr(td, error, (uid_t)0, (gid_t)0);
2345	return (error);
2346}
2347
2348/*
2349 * Nfs Version 3 and 4 commit rpc
2350 */
2351int
2352ncl_commit(struct vnode *vp, u_quad_t offset, int cnt, struct ucred *cred,
2353   struct thread *td)
2354{
2355	struct nfsvattr nfsva;
2356	struct nfsmount *nmp = VFSTONFS(vp->v_mount);
2357	int error, attrflag;
2358	u_char verf[NFSX_VERF];
2359
2360	mtx_lock(&nmp->nm_mtx);
2361	if ((nmp->nm_state & NFSSTA_HASWRITEVERF) == 0) {
2362		mtx_unlock(&nmp->nm_mtx);
2363		return (0);
2364	}
2365	mtx_unlock(&nmp->nm_mtx);
2366	error = nfsrpc_commit(vp, offset, cnt, cred, td, verf, &nfsva,
2367	    &attrflag, NULL);
2368	if (!error) {
2369		if (NFSBCMP((caddr_t)nmp->nm_verf, verf, NFSX_VERF)) {
2370			NFSBCOPY(verf, (caddr_t)nmp->nm_verf, NFSX_VERF);
2371			error = NFSERR_STALEWRITEVERF;
2372		}
2373		if (!error && attrflag)
2374			(void) nfscl_loadattrcache(&vp, &nfsva, NULL, NULL,
2375			    0, 1);
2376	} else if (NFS_ISV4(vp)) {
2377		error = nfscl_maperr(td, error, (uid_t)0, (gid_t)0);
2378	}
2379	return (error);
2380}
2381
2382/*
2383 * Strategy routine.
2384 * For async requests when nfsiod(s) are running, queue the request by
2385 * calling ncl_asyncio(), otherwise just all ncl_doio() to do the
2386 * request.
2387 */
2388static int
2389nfs_strategy(struct vop_strategy_args *ap)
2390{
2391	struct buf *bp = ap->a_bp;
2392	struct ucred *cr;
2393
2394	KASSERT(!(bp->b_flags & B_DONE),
2395	    ("nfs_strategy: buffer %p unexpectedly marked B_DONE", bp));
2396	BUF_ASSERT_HELD(bp);
2397
2398	if (bp->b_iocmd == BIO_READ)
2399		cr = bp->b_rcred;
2400	else
2401		cr = bp->b_wcred;
2402
2403	/*
2404	 * If the op is asynchronous and an i/o daemon is waiting
2405	 * queue the request, wake it up and wait for completion
2406	 * otherwise just do it ourselves.
2407	 */
2408	if ((bp->b_flags & B_ASYNC) == 0 ||
2409	    ncl_asyncio(VFSTONFS(ap->a_vp->v_mount), bp, NOCRED, curthread))
2410		(void)ncl_doio(ap->a_vp, bp, cr, curthread);
2411	return (0);
2412}
2413
2414/*
2415 * fsync vnode op. Just call ncl_flush() with commit == 1.
2416 */
2417/* ARGSUSED */
2418static int
2419nfs_fsync(struct vop_fsync_args *ap)
2420{
2421	return (ncl_flush(ap->a_vp, ap->a_waitfor, NULL, ap->a_td, 1));
2422}
2423
2424/*
2425 * Flush all the blocks associated with a vnode.
2426 * 	Walk through the buffer pool and push any dirty pages
2427 *	associated with the vnode.
2428 */
2429int
2430ncl_flush(struct vnode *vp, int waitfor, struct ucred *cred, struct thread *td,
2431    int commit)
2432{
2433	struct nfsnode *np = VTONFS(vp);
2434	struct buf *bp;
2435	int i;
2436	struct buf *nbp;
2437	struct nfsmount *nmp = VFSTONFS(vp->v_mount);
2438	int error = 0, slptimeo = 0, slpflag = 0, retv, bvecpos;
2439	int passone = 1, trycnt = 0;
2440	u_quad_t off, endoff, toff;
2441	struct ucred* wcred = NULL;
2442	struct buf **bvec = NULL;
2443	struct bufobj *bo;
2444#ifndef NFS_COMMITBVECSIZ
2445#define	NFS_COMMITBVECSIZ	20
2446#endif
2447	struct buf *bvec_on_stack[NFS_COMMITBVECSIZ];
2448	int bvecsize = 0, bveccount;
2449
2450	if (nmp->nm_flag & NFSMNT_INT)
2451		slpflag = NFS_PCATCH;
2452	if (!commit)
2453		passone = 0;
2454	bo = &vp->v_bufobj;
2455	/*
2456	 * A b_flags == (B_DELWRI | B_NEEDCOMMIT) block has been written to the
2457	 * server, but has not been committed to stable storage on the server
2458	 * yet. On the first pass, the byte range is worked out and the commit
2459	 * rpc is done. On the second pass, ncl_writebp() is called to do the
2460	 * job.
2461	 */
2462again:
2463	off = (u_quad_t)-1;
2464	endoff = 0;
2465	bvecpos = 0;
2466	if (NFS_ISV34(vp) && commit) {
2467		if (bvec != NULL && bvec != bvec_on_stack)
2468			free(bvec, M_TEMP);
2469		/*
2470		 * Count up how many buffers waiting for a commit.
2471		 */
2472		bveccount = 0;
2473		BO_LOCK(bo);
2474		TAILQ_FOREACH_SAFE(bp, &bo->bo_dirty.bv_hd, b_bobufs, nbp) {
2475			if (!BUF_ISLOCKED(bp) &&
2476			    (bp->b_flags & (B_DELWRI | B_NEEDCOMMIT))
2477				== (B_DELWRI | B_NEEDCOMMIT))
2478				bveccount++;
2479		}
2480		/*
2481		 * Allocate space to remember the list of bufs to commit.  It is
2482		 * important to use M_NOWAIT here to avoid a race with nfs_write.
2483		 * If we can't get memory (for whatever reason), we will end up
2484		 * committing the buffers one-by-one in the loop below.
2485		 */
2486		if (bveccount > NFS_COMMITBVECSIZ) {
2487			/*
2488			 * Release the vnode interlock to avoid a lock
2489			 * order reversal.
2490			 */
2491			BO_UNLOCK(bo);
2492			bvec = (struct buf **)
2493				malloc(bveccount * sizeof(struct buf *),
2494				       M_TEMP, M_NOWAIT);
2495			BO_LOCK(bo);
2496			if (bvec == NULL) {
2497				bvec = bvec_on_stack;
2498				bvecsize = NFS_COMMITBVECSIZ;
2499			} else
2500				bvecsize = bveccount;
2501		} else {
2502			bvec = bvec_on_stack;
2503			bvecsize = NFS_COMMITBVECSIZ;
2504		}
2505		TAILQ_FOREACH_SAFE(bp, &bo->bo_dirty.bv_hd, b_bobufs, nbp) {
2506			if (bvecpos >= bvecsize)
2507				break;
2508			if (BUF_LOCK(bp, LK_EXCLUSIVE | LK_NOWAIT, NULL)) {
2509				nbp = TAILQ_NEXT(bp, b_bobufs);
2510				continue;
2511			}
2512			if ((bp->b_flags & (B_DELWRI | B_NEEDCOMMIT)) !=
2513			    (B_DELWRI | B_NEEDCOMMIT)) {
2514				BUF_UNLOCK(bp);
2515				nbp = TAILQ_NEXT(bp, b_bobufs);
2516				continue;
2517			}
2518			BO_UNLOCK(bo);
2519			bremfree(bp);
2520			/*
2521			 * Work out if all buffers are using the same cred
2522			 * so we can deal with them all with one commit.
2523			 *
2524			 * NOTE: we are not clearing B_DONE here, so we have
2525			 * to do it later on in this routine if we intend to
2526			 * initiate I/O on the bp.
2527			 *
2528			 * Note: to avoid loopback deadlocks, we do not
2529			 * assign b_runningbufspace.
2530			 */
2531			if (wcred == NULL)
2532				wcred = bp->b_wcred;
2533			else if (wcred != bp->b_wcred)
2534				wcred = NOCRED;
2535			vfs_busy_pages(bp, 1);
2536
2537			BO_LOCK(bo);
2538			/*
2539			 * bp is protected by being locked, but nbp is not
2540			 * and vfs_busy_pages() may sleep.  We have to
2541			 * recalculate nbp.
2542			 */
2543			nbp = TAILQ_NEXT(bp, b_bobufs);
2544
2545			/*
2546			 * A list of these buffers is kept so that the
2547			 * second loop knows which buffers have actually
2548			 * been committed. This is necessary, since there
2549			 * may be a race between the commit rpc and new
2550			 * uncommitted writes on the file.
2551			 */
2552			bvec[bvecpos++] = bp;
2553			toff = ((u_quad_t)bp->b_blkno) * DEV_BSIZE +
2554				bp->b_dirtyoff;
2555			if (toff < off)
2556				off = toff;
2557			toff += (u_quad_t)(bp->b_dirtyend - bp->b_dirtyoff);
2558			if (toff > endoff)
2559				endoff = toff;
2560		}
2561		BO_UNLOCK(bo);
2562	}
2563	if (bvecpos > 0) {
2564		/*
2565		 * Commit data on the server, as required.
2566		 * If all bufs are using the same wcred, then use that with
2567		 * one call for all of them, otherwise commit each one
2568		 * separately.
2569		 */
2570		if (wcred != NOCRED)
2571			retv = ncl_commit(vp, off, (int)(endoff - off),
2572					  wcred, td);
2573		else {
2574			retv = 0;
2575			for (i = 0; i < bvecpos; i++) {
2576				off_t off, size;
2577				bp = bvec[i];
2578				off = ((u_quad_t)bp->b_blkno) * DEV_BSIZE +
2579					bp->b_dirtyoff;
2580				size = (u_quad_t)(bp->b_dirtyend
2581						  - bp->b_dirtyoff);
2582				retv = ncl_commit(vp, off, (int)size,
2583						  bp->b_wcred, td);
2584				if (retv) break;
2585			}
2586		}
2587
2588		if (retv == NFSERR_STALEWRITEVERF)
2589			ncl_clearcommit(vp->v_mount);
2590
2591		/*
2592		 * Now, either mark the blocks I/O done or mark the
2593		 * blocks dirty, depending on whether the commit
2594		 * succeeded.
2595		 */
2596		for (i = 0; i < bvecpos; i++) {
2597			bp = bvec[i];
2598			bp->b_flags &= ~(B_NEEDCOMMIT | B_CLUSTEROK);
2599			if (retv) {
2600				/*
2601				 * Error, leave B_DELWRI intact
2602				 */
2603				vfs_unbusy_pages(bp);
2604				brelse(bp);
2605			} else {
2606				/*
2607				 * Success, remove B_DELWRI ( bundirty() ).
2608				 *
2609				 * b_dirtyoff/b_dirtyend seem to be NFS
2610				 * specific.  We should probably move that
2611				 * into bundirty(). XXX
2612				 */
2613				bufobj_wref(bo);
2614				bp->b_flags |= B_ASYNC;
2615				bundirty(bp);
2616				bp->b_flags &= ~B_DONE;
2617				bp->b_ioflags &= ~BIO_ERROR;
2618				bp->b_dirtyoff = bp->b_dirtyend = 0;
2619				bufdone(bp);
2620			}
2621		}
2622	}
2623
2624	/*
2625	 * Start/do any write(s) that are required.
2626	 */
2627loop:
2628	BO_LOCK(bo);
2629	TAILQ_FOREACH_SAFE(bp, &bo->bo_dirty.bv_hd, b_bobufs, nbp) {
2630		if (BUF_LOCK(bp, LK_EXCLUSIVE | LK_NOWAIT, NULL)) {
2631			if (waitfor != MNT_WAIT || passone)
2632				continue;
2633
2634			error = BUF_TIMELOCK(bp,
2635			    LK_EXCLUSIVE | LK_SLEEPFAIL | LK_INTERLOCK,
2636			    BO_MTX(bo), "nfsfsync", slpflag, slptimeo);
2637			if (error == 0) {
2638				BUF_UNLOCK(bp);
2639				goto loop;
2640			}
2641			if (error == ENOLCK) {
2642				error = 0;
2643				goto loop;
2644			}
2645			if (newnfs_sigintr(nmp, td)) {
2646				error = EINTR;
2647				goto done;
2648			}
2649			if (slpflag & PCATCH) {
2650				slpflag = 0;
2651				slptimeo = 2 * hz;
2652			}
2653			goto loop;
2654		}
2655		if ((bp->b_flags & B_DELWRI) == 0)
2656			panic("nfs_fsync: not dirty");
2657		if ((passone || !commit) && (bp->b_flags & B_NEEDCOMMIT)) {
2658			BUF_UNLOCK(bp);
2659			continue;
2660		}
2661		BO_UNLOCK(bo);
2662		bremfree(bp);
2663		if (passone || !commit)
2664		    bp->b_flags |= B_ASYNC;
2665		else
2666		    bp->b_flags |= B_ASYNC;
2667		bwrite(bp);
2668		if (newnfs_sigintr(nmp, td)) {
2669			error = EINTR;
2670			goto done;
2671		}
2672		goto loop;
2673	}
2674	if (passone) {
2675		passone = 0;
2676		BO_UNLOCK(bo);
2677		goto again;
2678	}
2679	if (waitfor == MNT_WAIT) {
2680		while (bo->bo_numoutput) {
2681			error = bufobj_wwait(bo, slpflag, slptimeo);
2682			if (error) {
2683			    BO_UNLOCK(bo);
2684			    error = newnfs_sigintr(nmp, td);
2685			    if (error)
2686				goto done;
2687			    if (slpflag & PCATCH) {
2688				slpflag = 0;
2689				slptimeo = 2 * hz;
2690			    }
2691			    BO_LOCK(bo);
2692			}
2693		}
2694		if (bo->bo_dirty.bv_cnt != 0 && commit) {
2695			BO_UNLOCK(bo);
2696			goto loop;
2697		}
2698		/*
2699		 * Wait for all the async IO requests to drain
2700		 */
2701		BO_UNLOCK(bo);
2702		mtx_lock(&np->n_mtx);
2703		while (np->n_directio_asyncwr > 0) {
2704			np->n_flag |= NFSYNCWAIT;
2705			error = ncl_msleep(td, (caddr_t)&np->n_directio_asyncwr,
2706					   &np->n_mtx, slpflag | (PRIBIO + 1),
2707					   "nfsfsync", 0);
2708			if (error) {
2709				if (newnfs_sigintr(nmp, td)) {
2710					mtx_unlock(&np->n_mtx);
2711					error = EINTR;
2712					goto done;
2713				}
2714			}
2715		}
2716		mtx_unlock(&np->n_mtx);
2717	} else
2718		BO_UNLOCK(bo);
2719	mtx_lock(&np->n_mtx);
2720	if (np->n_flag & NWRITEERR) {
2721		error = np->n_error;
2722		np->n_flag &= ~NWRITEERR;
2723	}
2724  	if (commit && bo->bo_dirty.bv_cnt == 0 &&
2725	    bo->bo_numoutput == 0 && np->n_directio_asyncwr == 0)
2726  		np->n_flag &= ~NMODIFIED;
2727	mtx_unlock(&np->n_mtx);
2728done:
2729	if (bvec != NULL && bvec != bvec_on_stack)
2730		free(bvec, M_TEMP);
2731	if (error == 0 && commit != 0 && waitfor == MNT_WAIT &&
2732	    (bo->bo_dirty.bv_cnt != 0 || bo->bo_numoutput != 0 ||
2733	     np->n_directio_asyncwr != 0) && trycnt++ < 5) {
2734		/* try, try again... */
2735		passone = 1;
2736		wcred = NULL;
2737		bvec = NULL;
2738		bvecsize = 0;
2739printf("try%d\n", trycnt);
2740		goto again;
2741	}
2742	return (error);
2743}
2744
2745/*
2746 * NFS advisory byte-level locks.
2747 */
2748static int
2749nfs_advlock(struct vop_advlock_args *ap)
2750{
2751	struct vnode *vp = ap->a_vp;
2752	struct ucred *cred;
2753	struct nfsnode *np = VTONFS(ap->a_vp);
2754	struct proc *p = (struct proc *)ap->a_id;
2755	struct thread *td = curthread;	/* XXX */
2756	struct vattr va;
2757	int ret, error = EOPNOTSUPP;
2758	u_quad_t size;
2759
2760	if (NFS_ISV4(vp) && (ap->a_flags & F_POSIX)) {
2761		cred = p->p_ucred;
2762		vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
2763		if (vp->v_iflag & VI_DOOMED) {
2764			VOP_UNLOCK(vp, 0);
2765			return (EBADF);
2766		}
2767
2768		/*
2769		 * If this is unlocking a write locked region, flush and
2770		 * commit them before unlocking. This is required by
2771		 * RFC3530 Sec. 9.3.2.
2772		 */
2773		if (ap->a_op == F_UNLCK &&
2774		    nfscl_checkwritelocked(vp, ap->a_fl, cred, td))
2775			(void) ncl_flush(vp, MNT_WAIT, cred, td, 1);
2776
2777		/*
2778		 * Loop around doing the lock op, while a blocking lock
2779		 * must wait for the lock op to succeed.
2780		 */
2781		do {
2782			ret = nfsrpc_advlock(vp, np->n_size, ap->a_op,
2783			    ap->a_fl, 0, cred, td);
2784			if (ret == NFSERR_DENIED && (ap->a_flags & F_WAIT) &&
2785			    ap->a_op == F_SETLK) {
2786				VOP_UNLOCK(vp, 0);
2787				error = nfs_catnap(PZERO | PCATCH, "ncladvl");
2788				if (error)
2789					return (EINTR);
2790				vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
2791				if (vp->v_iflag & VI_DOOMED) {
2792					VOP_UNLOCK(vp, 0);
2793					return (EBADF);
2794				}
2795			}
2796		} while (ret == NFSERR_DENIED && (ap->a_flags & F_WAIT) &&
2797		     ap->a_op == F_SETLK);
2798		if (ret == NFSERR_DENIED) {
2799			VOP_UNLOCK(vp, 0);
2800			return (EAGAIN);
2801		} else if (ret == EINVAL || ret == EBADF || ret == EINTR) {
2802			VOP_UNLOCK(vp, 0);
2803			return (ret);
2804		} else if (ret != 0) {
2805			VOP_UNLOCK(vp, 0);
2806			return (EACCES);
2807		}
2808
2809		/*
2810		 * Now, if we just got a lock, invalidate data in the buffer
2811		 * cache, as required, so that the coherency conforms with
2812		 * RFC3530 Sec. 9.3.2.
2813		 */
2814		if (ap->a_op == F_SETLK) {
2815			if ((np->n_flag & NMODIFIED) == 0) {
2816				np->n_attrstamp = 0;
2817				ret = VOP_GETATTR(vp, &va, cred);
2818			}
2819			if ((np->n_flag & NMODIFIED) || ret ||
2820			    np->n_change != va.va_filerev) {
2821				(void) ncl_vinvalbuf(vp, V_SAVE, td, 1);
2822				np->n_attrstamp = 0;
2823				ret = VOP_GETATTR(vp, &va, cred);
2824				if (!ret) {
2825					np->n_mtime = va.va_mtime;
2826					np->n_change = va.va_filerev;
2827				}
2828			}
2829		}
2830		VOP_UNLOCK(vp, 0);
2831		return (0);
2832	} else if (!NFS_ISV4(vp)) {
2833		error = vn_lock(vp, LK_SHARED);
2834		if (error)
2835			return (error);
2836		if ((VFSTONFS(vp->v_mount)->nm_flag & NFSMNT_NOLOCKD) != 0) {
2837			size = VTONFS(vp)->n_size;
2838			VOP_UNLOCK(vp, 0);
2839			error = lf_advlock(ap, &(vp->v_lockf), size);
2840		} else {
2841			if (ncl_advlock_p)
2842				error = ncl_advlock_p(ap);
2843			else
2844				error = ENOLCK;
2845		}
2846	}
2847	return (error);
2848}
2849
2850/*
2851 * NFS advisory byte-level locks.
2852 */
2853static int
2854nfs_advlockasync(struct vop_advlockasync_args *ap)
2855{
2856	struct vnode *vp = ap->a_vp;
2857	u_quad_t size;
2858	int error;
2859
2860	if (NFS_ISV4(vp))
2861		return (EOPNOTSUPP);
2862	error = vn_lock(vp, LK_SHARED);
2863	if (error)
2864		return (error);
2865	if ((VFSTONFS(vp->v_mount)->nm_flag & NFSMNT_NOLOCKD) != 0) {
2866		size = VTONFS(vp)->n_size;
2867		VOP_UNLOCK(vp, 0);
2868		error = lf_advlockasync(ap, &(vp->v_lockf), size);
2869	} else {
2870		VOP_UNLOCK(vp, 0);
2871		error = EOPNOTSUPP;
2872	}
2873	return (error);
2874}
2875
2876/*
2877 * Print out the contents of an nfsnode.
2878 */
2879static int
2880nfs_print(struct vop_print_args *ap)
2881{
2882	struct vnode *vp = ap->a_vp;
2883	struct nfsnode *np = VTONFS(vp);
2884
2885	ncl_printf("\tfileid %ld fsid 0x%x",
2886	   np->n_vattr.na_fileid, np->n_vattr.na_fsid);
2887	if (vp->v_type == VFIFO)
2888		fifo_printinfo(vp);
2889	printf("\n");
2890	return (0);
2891}
2892
2893/*
2894 * This is the "real" nfs::bwrite(struct buf*).
2895 * We set B_CACHE if this is a VMIO buffer.
2896 */
2897int
2898ncl_writebp(struct buf *bp, int force __unused, struct thread *td)
2899{
2900	int s;
2901	int oldflags = bp->b_flags;
2902#if 0
2903	int retv = 1;
2904	off_t off;
2905#endif
2906
2907	BUF_ASSERT_HELD(bp);
2908
2909	if (bp->b_flags & B_INVAL) {
2910		brelse(bp);
2911		return(0);
2912	}
2913
2914	bp->b_flags |= B_CACHE;
2915
2916	/*
2917	 * Undirty the bp.  We will redirty it later if the I/O fails.
2918	 */
2919
2920	s = splbio();
2921	bundirty(bp);
2922	bp->b_flags &= ~B_DONE;
2923	bp->b_ioflags &= ~BIO_ERROR;
2924	bp->b_iocmd = BIO_WRITE;
2925
2926	bufobj_wref(bp->b_bufobj);
2927	curthread->td_ru.ru_oublock++;
2928	splx(s);
2929
2930	/*
2931	 * Note: to avoid loopback deadlocks, we do not
2932	 * assign b_runningbufspace.
2933	 */
2934	vfs_busy_pages(bp, 1);
2935
2936	BUF_KERNPROC(bp);
2937	bp->b_iooffset = dbtob(bp->b_blkno);
2938	bstrategy(bp);
2939
2940	if( (oldflags & B_ASYNC) == 0) {
2941		int rtval = bufwait(bp);
2942
2943		if (oldflags & B_DELWRI) {
2944			s = splbio();
2945			reassignbuf(bp);
2946			splx(s);
2947		}
2948		brelse(bp);
2949		return (rtval);
2950	}
2951
2952	return (0);
2953}
2954
2955/*
2956 * nfs special file access vnode op.
2957 * Essentially just get vattr and then imitate iaccess() since the device is
2958 * local to the client.
2959 */
2960static int
2961nfsspec_access(struct vop_access_args *ap)
2962{
2963	struct vattr *vap;
2964	struct ucred *cred = ap->a_cred;
2965	struct vnode *vp = ap->a_vp;
2966	accmode_t accmode = ap->a_accmode;
2967	struct vattr vattr;
2968	int error;
2969
2970	/*
2971	 * Disallow write attempts on filesystems mounted read-only;
2972	 * unless the file is a socket, fifo, or a block or character
2973	 * device resident on the filesystem.
2974	 */
2975	if ((accmode & VWRITE) && (vp->v_mount->mnt_flag & MNT_RDONLY)) {
2976		switch (vp->v_type) {
2977		case VREG:
2978		case VDIR:
2979		case VLNK:
2980			return (EROFS);
2981		default:
2982			break;
2983		}
2984	}
2985	vap = &vattr;
2986	error = VOP_GETATTR(vp, vap, cred);
2987	if (error)
2988		goto out;
2989	error  = vaccess(vp->v_type, vap->va_mode, vap->va_uid, vap->va_gid,
2990	    accmode, cred, NULL);
2991out:
2992	return error;
2993}
2994
2995/*
2996 * Read wrapper for fifos.
2997 */
2998static int
2999nfsfifo_read(struct vop_read_args *ap)
3000{
3001	struct nfsnode *np = VTONFS(ap->a_vp);
3002	int error;
3003
3004	/*
3005	 * Set access flag.
3006	 */
3007	mtx_lock(&np->n_mtx);
3008	np->n_flag |= NACC;
3009	getnanotime(&np->n_atim);
3010	mtx_unlock(&np->n_mtx);
3011	error = fifo_specops.vop_read(ap);
3012	return error;
3013}
3014
3015/*
3016 * Write wrapper for fifos.
3017 */
3018static int
3019nfsfifo_write(struct vop_write_args *ap)
3020{
3021	struct nfsnode *np = VTONFS(ap->a_vp);
3022
3023	/*
3024	 * Set update flag.
3025	 */
3026	mtx_lock(&np->n_mtx);
3027	np->n_flag |= NUPD;
3028	getnanotime(&np->n_mtim);
3029	mtx_unlock(&np->n_mtx);
3030	return(fifo_specops.vop_write(ap));
3031}
3032
3033/*
3034 * Close wrapper for fifos.
3035 *
3036 * Update the times on the nfsnode then do fifo close.
3037 */
3038static int
3039nfsfifo_close(struct vop_close_args *ap)
3040{
3041	struct vnode *vp = ap->a_vp;
3042	struct nfsnode *np = VTONFS(vp);
3043	struct vattr vattr;
3044	struct timespec ts;
3045
3046	mtx_lock(&np->n_mtx);
3047	if (np->n_flag & (NACC | NUPD)) {
3048		getnanotime(&ts);
3049		if (np->n_flag & NACC)
3050			np->n_atim = ts;
3051		if (np->n_flag & NUPD)
3052			np->n_mtim = ts;
3053		np->n_flag |= NCHG;
3054		if (vrefcnt(vp) == 1 &&
3055		    (vp->v_mount->mnt_flag & MNT_RDONLY) == 0) {
3056			VATTR_NULL(&vattr);
3057			if (np->n_flag & NACC)
3058				vattr.va_atime = np->n_atim;
3059			if (np->n_flag & NUPD)
3060				vattr.va_mtime = np->n_mtim;
3061			mtx_unlock(&np->n_mtx);
3062			(void)VOP_SETATTR(vp, &vattr, ap->a_cred);
3063			goto out;
3064		}
3065	}
3066	mtx_unlock(&np->n_mtx);
3067out:
3068	return (fifo_specops.vop_close(ap));
3069}
3070
3071/*
3072 * Just call ncl_writebp() with the force argument set to 1.
3073 *
3074 * NOTE: B_DONE may or may not be set in a_bp on call.
3075 */
3076static int
3077nfs_bwrite(struct buf *bp)
3078{
3079
3080	return (ncl_writebp(bp, 1, curthread));
3081}
3082
3083struct buf_ops buf_ops_newnfs = {
3084	.bop_name	=	"buf_ops_nfs",
3085	.bop_write	=	nfs_bwrite,
3086	.bop_strategy	=	bufstrategy,
3087	.bop_sync	=	bufsync,
3088	.bop_bdflush	=	bufbdflush,
3089};
3090
3091/*
3092 * Cloned from vop_stdlock(), and then the ugly hack added.
3093 */
3094static int
3095nfs_lock1(struct vop_lock1_args *ap)
3096{
3097	struct vnode *vp = ap->a_vp;
3098	int error = 0;
3099
3100	/*
3101	 * Since vfs_hash_get() calls vget() and it will no longer work
3102	 * for FreeBSD8 with flags == 0, I can only think of this horrible
3103	 * hack to work around it. I call vfs_hash_get() with LK_EXCLOTHER
3104	 * and then handle it here. All I want for this case is a v_usecount
3105	 * on the vnode to use for recovery, while another thread might
3106	 * hold a lock on the vnode. I have the other threads blocked, so
3107	 * there isn't any race problem.
3108	 */
3109	if ((ap->a_flags & LK_TYPE_MASK) == LK_EXCLOTHER) {
3110		if ((ap->a_flags & LK_INTERLOCK) == 0)
3111			panic("ncllock1");
3112		if ((vp->v_iflag & VI_DOOMED))
3113			error = ENOENT;
3114		VI_UNLOCK(vp);
3115		return (error);
3116	}
3117	return (_lockmgr_args(vp->v_vnlock, ap->a_flags, VI_MTX(vp),
3118	    LK_WMESG_DEFAULT, LK_PRIO_DEFAULT, LK_TIMO_DEFAULT, ap->a_file,
3119	    ap->a_line));
3120}
3121
3122#ifdef NFS4_ACL_EXTATTR_NAME
3123static int
3124nfs_getacl(struct vop_getacl_args *ap)
3125{
3126	int error;
3127
3128	if (ap->a_type != ACL_TYPE_NFS4)
3129		return (EOPNOTSUPP);
3130	error = nfsrpc_getacl(ap->a_vp, ap->a_cred, ap->a_td, ap->a_aclp,
3131	    NULL);
3132	if (error > NFSERR_STALE) {
3133		(void) nfscl_maperr(ap->a_td, error, (uid_t)0, (gid_t)0);
3134		error = EPERM;
3135	}
3136	return (error);
3137}
3138
3139static int
3140nfs_setacl(struct vop_setacl_args *ap)
3141{
3142	int error;
3143
3144	if (ap->a_type != ACL_TYPE_NFS4)
3145		return (EOPNOTSUPP);
3146	error = nfsrpc_setacl(ap->a_vp, ap->a_cred, ap->a_td, ap->a_aclp,
3147	    NULL);
3148	if (error > NFSERR_STALE) {
3149		(void) nfscl_maperr(ap->a_td, error, (uid_t)0, (gid_t)0);
3150		error = EPERM;
3151	}
3152	return (error);
3153}
3154
3155#endif	/* NFS4_ACL_EXTATTR_NAME */
3156