nfs_vnops.c revision 138529
1/*
2 * Copyright (c) 1989, 1993
3 *	The Regents of the University of California.  All rights reserved.
4 *
5 * This code is derived from software contributed to Berkeley by
6 * Rick Macklem at The University of Guelph.
7 *
8 * Redistribution and use in source and binary forms, with or without
9 * modification, are permitted provided that the following conditions
10 * are met:
11 * 1. Redistributions of source code must retain the above copyright
12 *    notice, this list of conditions and the following disclaimer.
13 * 2. Redistributions in binary form must reproduce the above copyright
14 *    notice, this list of conditions and the following disclaimer in the
15 *    documentation and/or other materials provided with the distribution.
16 * 4. Neither the name of the University nor the names of its contributors
17 *    may be used to endorse or promote products derived from this software
18 *    without specific prior written permission.
19 *
20 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
21 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
22 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
23 * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
24 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
25 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
26 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
27 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
28 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
29 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
30 * SUCH DAMAGE.
31 *
32 *	@(#)nfs_vnops.c	8.16 (Berkeley) 5/27/95
33 */
34
35#include <sys/cdefs.h>
36__FBSDID("$FreeBSD: head/sys/nfsclient/nfs_vnops.c 138529 2004-12-07 21:16:32Z ps $");
37
38/*
39 * vnode op calls for Sun NFS version 2 and 3
40 */
41
42#include "opt_inet.h"
43
44#include <sys/param.h>
45#include <sys/kernel.h>
46#include <sys/systm.h>
47#include <sys/resourcevar.h>
48#include <sys/proc.h>
49#include <sys/mount.h>
50#include <sys/bio.h>
51#include <sys/buf.h>
52#include <sys/malloc.h>
53#include <sys/mbuf.h>
54#include <sys/namei.h>
55#include <sys/socket.h>
56#include <sys/vnode.h>
57#include <sys/dirent.h>
58#include <sys/fcntl.h>
59#include <sys/lockf.h>
60#include <sys/stat.h>
61#include <sys/sysctl.h>
62#include <sys/signalvar.h>
63
64#include <vm/vm.h>
65#include <vm/vm_object.h>
66#include <vm/vm_extern.h>
67#include <vm/vm_object.h>
68
69#include <fs/fifofs/fifo.h>
70
71#include <rpc/rpcclnt.h>
72
73#include <nfs/rpcv2.h>
74#include <nfs/nfsproto.h>
75#include <nfsclient/nfs.h>
76#include <nfsclient/nfsnode.h>
77#include <nfsclient/nfsmount.h>
78#include <nfsclient/nfs_lock.h>
79#include <nfs/xdr_subs.h>
80#include <nfsclient/nfsm_subs.h>
81
82#include <net/if.h>
83#include <netinet/in.h>
84#include <netinet/in_var.h>
85
86/* Defs */
87#define	TRUE	1
88#define	FALSE	0
89
90/*
91 * Ifdef for FreeBSD-current merged buffer cache. It is unfortunate that these
92 * calls are not in getblk() and brelse() so that they would not be necessary
93 * here.
94 */
95#ifndef B_VMIO
96#define vfs_busy_pages(bp, f)
97#endif
98
99static vop_read_t	nfsfifo_read;
100static vop_write_t	nfsfifo_write;
101static vop_close_t	nfsfifo_close;
102static int	nfs_flush(struct vnode *, struct ucred *, int, struct thread *,
103		    int);
104static int	nfs_setattrrpc(struct vnode *, struct vattr *, struct ucred *,
105		    struct thread *);
106static vop_lookup_t	nfs_lookup;
107static vop_create_t	nfs_create;
108static vop_mknod_t	nfs_mknod;
109static vop_open_t	nfs_open;
110static vop_close_t	nfs_close;
111static vop_access_t	nfs_access;
112static vop_getattr_t	nfs_getattr;
113static vop_setattr_t	nfs_setattr;
114static vop_read_t	nfs_read;
115static vop_fsync_t	nfs_fsync;
116static vop_remove_t	nfs_remove;
117static vop_link_t	nfs_link;
118static vop_rename_t	nfs_rename;
119static vop_mkdir_t	nfs_mkdir;
120static vop_rmdir_t	nfs_rmdir;
121static vop_symlink_t	nfs_symlink;
122static vop_readdir_t	nfs_readdir;
123static vop_strategy_t	nfs_strategy;
124static	int	nfs_lookitup(struct vnode *, const char *, int,
125		    struct ucred *, struct thread *, struct nfsnode **);
126static	int	nfs_sillyrename(struct vnode *, struct vnode *,
127		    struct componentname *);
128static vop_access_t	nfsspec_access;
129static vop_readlink_t	nfs_readlink;
130static vop_print_t	nfs_print;
131static vop_advlock_t	nfs_advlock;
132
133/*
134 * Global vfs data structures for nfs
135 */
136struct vop_vector nfs_vnodeops = {
137	.vop_default =		&default_vnodeops,
138	.vop_access =		nfs_access,
139	.vop_advlock =		nfs_advlock,
140	.vop_close =		nfs_close,
141	.vop_create =		nfs_create,
142	.vop_fsync =		nfs_fsync,
143	.vop_getattr =		nfs_getattr,
144	.vop_getpages =		nfs_getpages,
145	.vop_putpages =		nfs_putpages,
146	.vop_inactive =		nfs_inactive,
147	.vop_lease =		VOP_NULL,
148	.vop_link =		nfs_link,
149	.vop_lookup =		nfs_lookup,
150	.vop_mkdir =		nfs_mkdir,
151	.vop_mknod =		nfs_mknod,
152	.vop_open =		nfs_open,
153	.vop_print =		nfs_print,
154	.vop_read =		nfs_read,
155	.vop_readdir =		nfs_readdir,
156	.vop_readlink =		nfs_readlink,
157	.vop_reclaim =		nfs_reclaim,
158	.vop_remove =		nfs_remove,
159	.vop_rename =		nfs_rename,
160	.vop_rmdir =		nfs_rmdir,
161	.vop_setattr =		nfs_setattr,
162	.vop_strategy =		nfs_strategy,
163	.vop_symlink =		nfs_symlink,
164	.vop_write =		nfs_write,
165};
166
167struct vop_vector nfs_fifoops = {
168	.vop_default =		&fifo_specops,
169	.vop_access =		nfsspec_access,
170	.vop_close =		nfsfifo_close,
171	.vop_fsync =		nfs_fsync,
172	.vop_getattr =		nfs_getattr,
173	.vop_inactive =		nfs_inactive,
174	.vop_print =		nfs_print,
175	.vop_read =		nfsfifo_read,
176	.vop_reclaim =		nfs_reclaim,
177	.vop_setattr =		nfs_setattr,
178	.vop_write =		nfsfifo_write,
179};
180
181static int	nfs_mknodrpc(struct vnode *dvp, struct vnode **vpp,
182			     struct componentname *cnp, struct vattr *vap);
183static int	nfs_removerpc(struct vnode *dvp, const char *name, int namelen,
184			      struct ucred *cred, struct thread *td);
185static int	nfs_renamerpc(struct vnode *fdvp, const char *fnameptr,
186			      int fnamelen, struct vnode *tdvp,
187			      const char *tnameptr, int tnamelen,
188			      struct ucred *cred, struct thread *td);
189static int	nfs_renameit(struct vnode *sdvp, struct componentname *scnp,
190			     struct sillyrename *sp);
191
192/*
193 * Global variables
194 */
195struct proc	*nfs_iodwant[NFS_MAXASYNCDAEMON];
196struct nfsmount *nfs_iodmount[NFS_MAXASYNCDAEMON];
197int		 nfs_numasync = 0;
198#define	DIRHDSIZ	(sizeof (struct dirent) - (MAXNAMLEN + 1))
199
200SYSCTL_DECL(_vfs_nfs);
201
202static int	nfsaccess_cache_timeout = NFS_MAXATTRTIMO;
203SYSCTL_INT(_vfs_nfs, OID_AUTO, access_cache_timeout, CTLFLAG_RW,
204	   &nfsaccess_cache_timeout, 0, "NFS ACCESS cache timeout");
205
206static int	nfsv3_commit_on_close = 0;
207SYSCTL_INT(_vfs_nfs, OID_AUTO, nfsv3_commit_on_close, CTLFLAG_RW,
208	   &nfsv3_commit_on_close, 0, "write+commit on close, else only write");
209
210static int	nfs_clean_pages_on_close = 1;
211SYSCTL_INT(_vfs_nfs, OID_AUTO, clean_pages_on_close, CTLFLAG_RW,
212	   &nfs_clean_pages_on_close, 0, "NFS clean dirty pages on close");
213
214#if 0
215SYSCTL_INT(_vfs_nfs, OID_AUTO, access_cache_hits, CTLFLAG_RD,
216	   &nfsstats.accesscache_hits, 0, "NFS ACCESS cache hit count");
217
218SYSCTL_INT(_vfs_nfs, OID_AUTO, access_cache_misses, CTLFLAG_RD,
219	   &nfsstats.accesscache_misses, 0, "NFS ACCESS cache miss count");
220#endif
221
222#define	NFSV3ACCESS_ALL (NFSV3ACCESS_READ | NFSV3ACCESS_MODIFY		\
223			 | NFSV3ACCESS_EXTEND | NFSV3ACCESS_EXECUTE	\
224			 | NFSV3ACCESS_DELETE | NFSV3ACCESS_LOOKUP)
225static int
226nfs3_access_otw(struct vnode *vp, int wmode, struct thread *td,
227    struct ucred *cred)
228{
229	const int v3 = 1;
230	u_int32_t *tl;
231	int error = 0, attrflag;
232
233	struct mbuf *mreq, *mrep, *md, *mb;
234	caddr_t bpos, dpos;
235	u_int32_t rmode;
236	struct nfsnode *np = VTONFS(vp);
237
238	nfsstats.rpccnt[NFSPROC_ACCESS]++;
239	mreq = nfsm_reqhead(vp, NFSPROC_ACCESS, NFSX_FH(v3) + NFSX_UNSIGNED);
240	mb = mreq;
241	bpos = mtod(mb, caddr_t);
242	nfsm_fhtom(vp, v3);
243	tl = nfsm_build(u_int32_t *, NFSX_UNSIGNED);
244	*tl = txdr_unsigned(wmode);
245	nfsm_request(vp, NFSPROC_ACCESS, td, cred);
246	nfsm_postop_attr(vp, attrflag);
247	if (!error) {
248		tl = nfsm_dissect(u_int32_t *, NFSX_UNSIGNED);
249		rmode = fxdr_unsigned(u_int32_t, *tl);
250		np->n_mode = rmode;
251		np->n_modeuid = cred->cr_uid;
252		np->n_modestamp = time_second;
253	}
254	m_freem(mrep);
255nfsmout:
256	return (error);
257}
258
259/*
260 * nfs access vnode op.
261 * For nfs version 2, just return ok. File accesses may fail later.
262 * For nfs version 3, use the access rpc to check accessibility. If file modes
263 * are changed on the server, accesses might still fail later.
264 */
265static int
266nfs_access(struct vop_access_args *ap)
267{
268	struct vnode *vp = ap->a_vp;
269	int error = 0;
270	u_int32_t mode, wmode;
271	int v3 = NFS_ISV3(vp);
272	struct nfsnode *np = VTONFS(vp);
273
274	/*
275	 * Disallow write attempts on filesystems mounted read-only;
276	 * unless the file is a socket, fifo, or a block or character
277	 * device resident on the filesystem.
278	 */
279	if ((ap->a_mode & VWRITE) && (vp->v_mount->mnt_flag & MNT_RDONLY)) {
280		switch (vp->v_type) {
281		case VREG:
282		case VDIR:
283		case VLNK:
284			return (EROFS);
285		default:
286			break;
287		}
288	}
289	/*
290	 * For nfs v3, check to see if we have done this recently, and if
291	 * so return our cached result instead of making an ACCESS call.
292	 * If not, do an access rpc, otherwise you are stuck emulating
293	 * ufs_access() locally using the vattr. This may not be correct,
294	 * since the server may apply other access criteria such as
295	 * client uid-->server uid mapping that we do not know about.
296	 */
297	if (v3) {
298		if (ap->a_mode & VREAD)
299			mode = NFSV3ACCESS_READ;
300		else
301			mode = 0;
302		if (vp->v_type != VDIR) {
303			if (ap->a_mode & VWRITE)
304				mode |= (NFSV3ACCESS_MODIFY | NFSV3ACCESS_EXTEND);
305			if (ap->a_mode & VEXEC)
306				mode |= NFSV3ACCESS_EXECUTE;
307		} else {
308			if (ap->a_mode & VWRITE)
309				mode |= (NFSV3ACCESS_MODIFY | NFSV3ACCESS_EXTEND |
310					 NFSV3ACCESS_DELETE);
311			if (ap->a_mode & VEXEC)
312				mode |= NFSV3ACCESS_LOOKUP;
313		}
314		/* XXX safety belt, only make blanket request if caching */
315		if (nfsaccess_cache_timeout > 0) {
316			wmode = NFSV3ACCESS_READ | NFSV3ACCESS_MODIFY |
317				NFSV3ACCESS_EXTEND | NFSV3ACCESS_EXECUTE |
318				NFSV3ACCESS_DELETE | NFSV3ACCESS_LOOKUP;
319		} else {
320			wmode = mode;
321		}
322
323		/*
324		 * Does our cached result allow us to give a definite yes to
325		 * this request?
326		 */
327		if ((time_second < (np->n_modestamp + nfsaccess_cache_timeout)) &&
328		    (ap->a_cred->cr_uid == np->n_modeuid) &&
329		    ((np->n_mode & mode) == mode)) {
330			nfsstats.accesscache_hits++;
331		} else {
332			/*
333			 * Either a no, or a don't know.  Go to the wire.
334			 */
335			nfsstats.accesscache_misses++;
336		        error = nfs3_access_otw(vp, wmode, ap->a_td,ap->a_cred);
337			if (!error) {
338				if ((np->n_mode & mode) != mode) {
339					error = EACCES;
340				}
341			}
342		}
343		return (error);
344	} else {
345		if ((error = nfsspec_access(ap)) != 0)
346			return (error);
347
348		/*
349		 * Attempt to prevent a mapped root from accessing a file
350		 * which it shouldn't.  We try to read a byte from the file
351		 * if the user is root and the file is not zero length.
352		 * After calling nfsspec_access, we should have the correct
353		 * file size cached.
354		 */
355		if (ap->a_cred->cr_uid == 0 && (ap->a_mode & VREAD)
356		    && VTONFS(vp)->n_size > 0) {
357			struct iovec aiov;
358			struct uio auio;
359			char buf[1];
360
361			aiov.iov_base = buf;
362			aiov.iov_len = 1;
363			auio.uio_iov = &aiov;
364			auio.uio_iovcnt = 1;
365			auio.uio_offset = 0;
366			auio.uio_resid = 1;
367			auio.uio_segflg = UIO_SYSSPACE;
368			auio.uio_rw = UIO_READ;
369			auio.uio_td = ap->a_td;
370
371			if (vp->v_type == VREG)
372				error = nfs_readrpc(vp, &auio, ap->a_cred);
373			else if (vp->v_type == VDIR) {
374				char* bp;
375				bp = malloc(NFS_DIRBLKSIZ, M_TEMP, M_WAITOK);
376				aiov.iov_base = bp;
377				aiov.iov_len = auio.uio_resid = NFS_DIRBLKSIZ;
378				error = nfs_readdirrpc(vp, &auio, ap->a_cred);
379				free(bp, M_TEMP);
380			} else if (vp->v_type == VLNK)
381				error = nfs_readlinkrpc(vp, &auio, ap->a_cred);
382			else
383				error = EACCES;
384		}
385		return (error);
386	}
387}
388
389/*
390 * nfs open vnode op
391 * Check to see if the type is ok
392 * and that deletion is not in progress.
393 * For paged in text files, you will need to flush the page cache
394 * if consistency is lost.
395 */
396/* ARGSUSED */
397static int
398nfs_open(struct vop_open_args *ap)
399{
400	struct vnode *vp = ap->a_vp;
401	struct nfsnode *np = VTONFS(vp);
402	struct vattr vattr;
403	int error;
404
405	if (vp->v_type != VREG && vp->v_type != VDIR && vp->v_type != VLNK)
406		return (EOPNOTSUPP);
407
408	/*
409	 * Get a valid lease. If cached data is stale, flush it.
410	 */
411	if (np->n_flag & NMODIFIED) {
412		error = nfs_vinvalbuf(vp, V_SAVE, ap->a_cred, ap->a_td, 1);
413		if (error == EINTR || error == EIO)
414			return (error);
415		np->n_attrstamp = 0;
416		if (vp->v_type == VDIR)
417			np->n_direofoffset = 0;
418		error = VOP_GETATTR(vp, &vattr, ap->a_cred, ap->a_td);
419		if (error)
420			return (error);
421		np->n_mtime = vattr.va_mtime;
422	} else {
423		np->n_attrstamp = 0;
424		error = VOP_GETATTR(vp, &vattr, ap->a_cred, ap->a_td);
425		if (error)
426			return (error);
427		if (NFS_TIMESPEC_COMPARE(&np->n_mtime, &vattr.va_mtime)) {
428			if (vp->v_type == VDIR)
429				np->n_direofoffset = 0;
430			error = nfs_vinvalbuf(vp, V_SAVE,
431				ap->a_cred, ap->a_td, 1);
432			if (error == EINTR || error == EIO)
433				return (error);
434			np->n_mtime = vattr.va_mtime;
435		}
436	}
437	return (0);
438}
439
440/*
441 * nfs close vnode op
442 * What an NFS client should do upon close after writing is a debatable issue.
443 * Most NFS clients push delayed writes to the server upon close, basically for
444 * two reasons:
445 * 1 - So that any write errors may be reported back to the client process
446 *     doing the close system call. By far the two most likely errors are
447 *     NFSERR_NOSPC and NFSERR_DQUOT to indicate space allocation failure.
448 * 2 - To put a worst case upper bound on cache inconsistency between
449 *     multiple clients for the file.
450 * There is also a consistency problem for Version 2 of the protocol w.r.t.
451 * not being able to tell if other clients are writing a file concurrently,
452 * since there is no way of knowing if the changed modify time in the reply
453 * is only due to the write for this client.
454 * (NFS Version 3 provides weak cache consistency data in the reply that
455 *  should be sufficient to detect and handle this case.)
456 *
457 * The current code does the following:
458 * for NFS Version 2 - play it safe and flush/invalidate all dirty buffers
459 * for NFS Version 3 - flush dirty buffers to the server but don't invalidate
460 *                     or commit them (this satisfies 1 and 2 except for the
461 *                     case where the server crashes after this close but
462 *                     before the commit RPC, which is felt to be "good
463 *                     enough". Changing the last argument to nfs_flush() to
464 *                     a 1 would force a commit operation, if it is felt a
465 *                     commit is necessary now.
466 */
467/* ARGSUSED */
468static int
469nfs_close(struct vop_close_args *ap)
470{
471	struct vnode *vp = ap->a_vp;
472	struct nfsnode *np = VTONFS(vp);
473	int error = 0;
474
475	if (vp->v_type == VREG) {
476	    /*
477	     * Examine and clean dirty pages, regardless of NMODIFIED.
478	     * This closes a major hole in close-to-open consistency.
479	     * We want to push out all dirty pages (and buffers) on
480	     * close, regardless of whether they were dirtied by
481	     * mmap'ed writes or via write().
482	     */
483	    if (nfs_clean_pages_on_close && vp->v_object) {
484		VM_OBJECT_LOCK(vp->v_object);
485		vm_object_page_clean(vp->v_object, 0, 0, 0);
486		VM_OBJECT_UNLOCK(vp->v_object);
487	    }
488	    if (np->n_flag & NMODIFIED) {
489		if (NFS_ISV3(vp)) {
490		    /*
491		     * Under NFSv3 we have dirty buffers to dispose of.  We
492		     * must flush them to the NFS server.  We have the option
493		     * of waiting all the way through the commit rpc or just
494		     * waiting for the initial write.  The default is to only
495		     * wait through the initial write so the data is in the
496		     * server's cache, which is roughly similar to the state
497		     * a standard disk subsystem leaves the file in on close().
498		     *
499		     * We cannot clear the NMODIFIED bit in np->n_flag due to
500		     * potential races with other processes, and certainly
501		     * cannot clear it if we don't commit.
502		     */
503		    int cm = nfsv3_commit_on_close ? 1 : 0;
504		    error = nfs_flush(vp, ap->a_cred, MNT_WAIT, ap->a_td, cm);
505		    /* np->n_flag &= ~NMODIFIED; */
506		} else {
507		    error = nfs_vinvalbuf(vp, V_SAVE, ap->a_cred, ap->a_td, 1);
508		}
509	    }
510 	    /*
511 	     * Invalidate the attribute cache in all cases.
512 	     * An open is going to fetch fresh attrs any way, other procs
513 	     * on this node that have file open will be forced to do an
514 	     * otw attr fetch, but this is safe.
515 	     */
516	    np->n_attrstamp = 0;
517	    if (np->n_flag & NWRITEERR) {
518		np->n_flag &= ~NWRITEERR;
519		error = np->n_error;
520	    }
521	}
522	return (error);
523}
524
525/*
526 * nfs getattr call from vfs.
527 */
528static int
529nfs_getattr(struct vop_getattr_args *ap)
530{
531	struct vnode *vp = ap->a_vp;
532	struct nfsnode *np = VTONFS(vp);
533	caddr_t bpos, dpos;
534	int error = 0;
535	struct mbuf *mreq, *mrep, *md, *mb;
536	int v3 = NFS_ISV3(vp);
537
538	/*
539	 * Update local times for special files.
540	 */
541	if (np->n_flag & (NACC | NUPD))
542		np->n_flag |= NCHG;
543	/*
544	 * First look in the cache.
545	 */
546	if (nfs_getattrcache(vp, ap->a_vap) == 0)
547		return (0);
548
549	if (v3 && nfsaccess_cache_timeout > 0) {
550		nfsstats.accesscache_misses++;
551		nfs3_access_otw(vp, NFSV3ACCESS_ALL, ap->a_td, ap->a_cred);
552		if (nfs_getattrcache(vp, ap->a_vap) == 0)
553			return (0);
554	}
555
556	nfsstats.rpccnt[NFSPROC_GETATTR]++;
557	mreq = nfsm_reqhead(vp, NFSPROC_GETATTR, NFSX_FH(v3));
558	mb = mreq;
559	bpos = mtod(mb, caddr_t);
560	nfsm_fhtom(vp, v3);
561	nfsm_request(vp, NFSPROC_GETATTR, ap->a_td, ap->a_cred);
562	if (!error) {
563		nfsm_loadattr(vp, ap->a_vap);
564	}
565	m_freem(mrep);
566nfsmout:
567	return (error);
568}
569
570/*
571 * nfs setattr call.
572 */
573static int
574nfs_setattr(struct vop_setattr_args *ap)
575{
576	struct vnode *vp = ap->a_vp;
577	struct nfsnode *np = VTONFS(vp);
578	struct vattr *vap = ap->a_vap;
579	int error = 0;
580	u_quad_t tsize;
581
582#ifndef nolint
583	tsize = (u_quad_t)0;
584#endif
585
586	/*
587	 * Setting of flags is not supported.
588	 */
589	if (vap->va_flags != VNOVAL)
590		return (EOPNOTSUPP);
591
592	/*
593	 * Disallow write attempts if the filesystem is mounted read-only.
594	 */
595  	if ((vap->va_flags != VNOVAL || vap->va_uid != (uid_t)VNOVAL ||
596	    vap->va_gid != (gid_t)VNOVAL || vap->va_atime.tv_sec != VNOVAL ||
597	    vap->va_mtime.tv_sec != VNOVAL || vap->va_mode != (mode_t)VNOVAL) &&
598	    (vp->v_mount->mnt_flag & MNT_RDONLY))
599		return (EROFS);
600	if (vap->va_size != VNOVAL) {
601 		switch (vp->v_type) {
602 		case VDIR:
603 			return (EISDIR);
604 		case VCHR:
605 		case VBLK:
606 		case VSOCK:
607 		case VFIFO:
608			if (vap->va_mtime.tv_sec == VNOVAL &&
609			    vap->va_atime.tv_sec == VNOVAL &&
610			    vap->va_mode == (mode_t)VNOVAL &&
611			    vap->va_uid == (uid_t)VNOVAL &&
612			    vap->va_gid == (gid_t)VNOVAL)
613				return (0);
614 			vap->va_size = VNOVAL;
615 			break;
616 		default:
617			/*
618			 * Disallow write attempts if the filesystem is
619			 * mounted read-only.
620			 */
621			if (vp->v_mount->mnt_flag & MNT_RDONLY)
622				return (EROFS);
623
624			/*
625			 *  We run vnode_pager_setsize() early (why?),
626			 * we must set np->n_size now to avoid vinvalbuf
627			 * V_SAVE races that might setsize a lower
628			 * value.
629			 */
630
631			tsize = np->n_size;
632			error = nfs_meta_setsize(vp, ap->a_cred,
633						ap->a_td, vap->va_size);
634
635 			if (np->n_flag & NMODIFIED) {
636 			    if (vap->va_size == 0)
637 				error = nfs_vinvalbuf(vp, 0,
638 					ap->a_cred, ap->a_td, 1);
639 			    else
640 				error = nfs_vinvalbuf(vp, V_SAVE,
641 					ap->a_cred, ap->a_td, 1);
642 			    if (error) {
643				vnode_pager_setsize(vp, np->n_size);
644 				return (error);
645			    }
646 			}
647			/*
648			 * np->n_size has already been set to vap->va_size
649			 * in nfs_meta_setsize(). We must set it again since
650			 * nfs_loadattrcache() could be called through
651			 * nfs_meta_setsize() and could modify np->n_size.
652			 */
653 			np->n_vattr.va_size = np->n_size = vap->va_size;
654  		};
655  	} else if ((vap->va_mtime.tv_sec != VNOVAL ||
656		vap->va_atime.tv_sec != VNOVAL) && (np->n_flag & NMODIFIED) &&
657		vp->v_type == VREG &&
658  		(error = nfs_vinvalbuf(vp, V_SAVE, ap->a_cred,
659		 ap->a_td, 1)) != 0 && (error == EINTR || error == EIO))
660		return (error);
661	error = nfs_setattrrpc(vp, vap, ap->a_cred, ap->a_td);
662	if (error && vap->va_size != VNOVAL) {
663		np->n_size = np->n_vattr.va_size = tsize;
664		vnode_pager_setsize(vp, np->n_size);
665	}
666	return (error);
667}
668
669/*
670 * Do an nfs setattr rpc.
671 */
672static int
673nfs_setattrrpc(struct vnode *vp, struct vattr *vap, struct ucred *cred,
674    struct thread *td)
675{
676	struct nfsv2_sattr *sp;
677	struct nfsnode *np = VTONFS(vp);
678	caddr_t bpos, dpos;
679	u_int32_t *tl;
680	int error = 0, wccflag = NFSV3_WCCRATTR;
681	struct mbuf *mreq, *mrep, *md, *mb;
682	int v3 = NFS_ISV3(vp);
683
684	nfsstats.rpccnt[NFSPROC_SETATTR]++;
685	mreq = nfsm_reqhead(vp, NFSPROC_SETATTR, NFSX_FH(v3) + NFSX_SATTR(v3));
686	mb = mreq;
687	bpos = mtod(mb, caddr_t);
688	nfsm_fhtom(vp, v3);
689	if (v3) {
690		nfsm_v3attrbuild(vap, TRUE);
691		tl = nfsm_build(u_int32_t *, NFSX_UNSIGNED);
692		*tl = nfs_false;
693	} else {
694		sp = nfsm_build(struct nfsv2_sattr *, NFSX_V2SATTR);
695		if (vap->va_mode == (mode_t)VNOVAL)
696			sp->sa_mode = nfs_xdrneg1;
697		else
698			sp->sa_mode = vtonfsv2_mode(vp->v_type, vap->va_mode);
699		if (vap->va_uid == (uid_t)VNOVAL)
700			sp->sa_uid = nfs_xdrneg1;
701		else
702			sp->sa_uid = txdr_unsigned(vap->va_uid);
703		if (vap->va_gid == (gid_t)VNOVAL)
704			sp->sa_gid = nfs_xdrneg1;
705		else
706			sp->sa_gid = txdr_unsigned(vap->va_gid);
707		sp->sa_size = txdr_unsigned(vap->va_size);
708		txdr_nfsv2time(&vap->va_atime, &sp->sa_atime);
709		txdr_nfsv2time(&vap->va_mtime, &sp->sa_mtime);
710	}
711	nfsm_request(vp, NFSPROC_SETATTR, td, cred);
712	if (v3) {
713		np->n_modestamp = 0;
714		nfsm_wcc_data(vp, wccflag);
715	} else
716		nfsm_loadattr(vp, NULL);
717	m_freem(mrep);
718nfsmout:
719	return (error);
720}
721
722/*
723 * nfs lookup call, one step at a time...
724 * First look in cache
725 * If not found, unlock the directory nfsnode and do the rpc
726 */
727static int
728nfs_lookup(struct vop_lookup_args *ap)
729{
730	struct componentname *cnp = ap->a_cnp;
731	struct vnode *dvp = ap->a_dvp;
732	struct vnode **vpp = ap->a_vpp;
733	int flags = cnp->cn_flags;
734	struct vnode *newvp;
735	struct nfsmount *nmp;
736	caddr_t bpos, dpos;
737	struct mbuf *mreq, *mrep, *md, *mb;
738	long len;
739	nfsfh_t *fhp;
740	struct nfsnode *np;
741	int lockparent, wantparent, error = 0, attrflag, fhsize;
742	int v3 = NFS_ISV3(dvp);
743	struct thread *td = cnp->cn_thread;
744
745	*vpp = NULLVP;
746	cnp->cn_flags &= ~PDIRUNLOCK;
747	if ((flags & ISLASTCN) && (dvp->v_mount->mnt_flag & MNT_RDONLY) &&
748	    (cnp->cn_nameiop == DELETE || cnp->cn_nameiop == RENAME))
749		return (EROFS);
750	if (dvp->v_type != VDIR)
751		return (ENOTDIR);
752	lockparent = flags & LOCKPARENT;
753	wantparent = flags & (LOCKPARENT|WANTPARENT);
754	nmp = VFSTONFS(dvp->v_mount);
755	np = VTONFS(dvp);
756	if ((error = cache_lookup(dvp, vpp, cnp)) && error != ENOENT) {
757		struct vattr vattr;
758		int vpid;
759
760		if ((error = VOP_ACCESS(dvp, VEXEC, cnp->cn_cred, td)) != 0) {
761			*vpp = NULLVP;
762			return (error);
763		}
764
765		vhold(*vpp);
766		newvp = *vpp;
767		vpid = newvp->v_id;
768		/*
769		 * See the comment starting `Step through' in ufs/ufs_lookup.c
770		 * for an explanation of the locking protocol
771		 */
772		if (dvp == newvp) {
773			VREF(newvp);
774			error = 0;
775		} else if (flags & ISDOTDOT) {
776			VOP_UNLOCK(dvp, 0, td);
777			cnp->cn_flags |= PDIRUNLOCK;
778			error = vget(newvp, LK_EXCLUSIVE, td);
779			if (!error && lockparent && (flags & ISLASTCN)) {
780				error = vn_lock(dvp, LK_EXCLUSIVE, td);
781				if (error == 0)
782					cnp->cn_flags &= ~PDIRUNLOCK;
783			}
784		} else {
785			error = vget(newvp, LK_EXCLUSIVE, td);
786			if (!lockparent || error || !(flags & ISLASTCN)) {
787				VOP_UNLOCK(dvp, 0, td);
788				cnp->cn_flags |= PDIRUNLOCK;
789			}
790		}
791		if (!error) {
792			if (vpid == newvp->v_id) {
793			   if (!VOP_GETATTR(newvp, &vattr, cnp->cn_cred, td)
794			    && vattr.va_ctime.tv_sec == VTONFS(newvp)->n_ctime) {
795				nfsstats.lookupcache_hits++;
796				if (cnp->cn_nameiop != LOOKUP &&
797				    (flags & ISLASTCN))
798					cnp->cn_flags |= SAVENAME;
799				vdrop(newvp);
800				return (0);
801			   }
802			   cache_purge(newvp);
803			}
804			vput(newvp);
805			if (lockparent && dvp != newvp && (flags & ISLASTCN))
806				VOP_UNLOCK(dvp, 0, td);
807		}
808		vdrop(newvp);
809		error = vn_lock(dvp, LK_EXCLUSIVE, td);
810		*vpp = NULLVP;
811		if (error) {
812			cnp->cn_flags |= PDIRUNLOCK;
813			return (error);
814		}
815		cnp->cn_flags &= ~PDIRUNLOCK;
816	}
817	error = 0;
818	newvp = NULLVP;
819	nfsstats.lookupcache_misses++;
820	nfsstats.rpccnt[NFSPROC_LOOKUP]++;
821	len = cnp->cn_namelen;
822	mreq = nfsm_reqhead(dvp, NFSPROC_LOOKUP,
823		NFSX_FH(v3) + NFSX_UNSIGNED + nfsm_rndup(len));
824	mb = mreq;
825	bpos = mtod(mb, caddr_t);
826	nfsm_fhtom(dvp, v3);
827	nfsm_strtom(cnp->cn_nameptr, len, NFS_MAXNAMLEN);
828	nfsm_request(dvp, NFSPROC_LOOKUP, cnp->cn_thread, cnp->cn_cred);
829	if (error) {
830		nfsm_postop_attr(dvp, attrflag);
831		m_freem(mrep);
832		goto nfsmout;
833	}
834	nfsm_getfh(fhp, fhsize, v3);
835
836	/*
837	 * Handle RENAME case...
838	 */
839	if (cnp->cn_nameiop == RENAME && wantparent && (flags & ISLASTCN)) {
840		if (NFS_CMPFH(np, fhp, fhsize)) {
841			m_freem(mrep);
842			return (EISDIR);
843		}
844		error = nfs_nget(dvp->v_mount, fhp, fhsize, &np);
845		if (error) {
846			m_freem(mrep);
847			return (error);
848		}
849		newvp = NFSTOV(np);
850		if (v3) {
851			nfsm_postop_attr(newvp, attrflag);
852			nfsm_postop_attr(dvp, attrflag);
853		} else
854			nfsm_loadattr(newvp, NULL);
855		*vpp = newvp;
856		m_freem(mrep);
857		cnp->cn_flags |= SAVENAME;
858		if (!lockparent) {
859			VOP_UNLOCK(dvp, 0, td);
860			cnp->cn_flags |= PDIRUNLOCK;
861		}
862		return (0);
863	}
864
865	if (flags & ISDOTDOT) {
866		VOP_UNLOCK(dvp, 0, td);
867		error = nfs_nget(dvp->v_mount, fhp, fhsize, &np);
868		if (error) {
869			vn_lock(dvp, LK_EXCLUSIVE | LK_RETRY, td);
870			return (error);
871		}
872		newvp = NFSTOV(np);
873		if (lockparent && (flags & ISLASTCN)) {
874			error = vn_lock(dvp, LK_EXCLUSIVE, td);
875			if (error) {
876				cnp->cn_flags |= PDIRUNLOCK;
877		    		vput(newvp);
878				return (error);
879			}
880		} else
881			cnp->cn_flags |= PDIRUNLOCK;
882	} else if (NFS_CMPFH(np, fhp, fhsize)) {
883		VREF(dvp);
884		newvp = dvp;
885	} else {
886		error = nfs_nget(dvp->v_mount, fhp, fhsize, &np);
887		if (error) {
888			m_freem(mrep);
889			return (error);
890		}
891		if (!lockparent || !(flags & ISLASTCN)) {
892			cnp->cn_flags |= PDIRUNLOCK;
893			VOP_UNLOCK(dvp, 0, td);
894		}
895		newvp = NFSTOV(np);
896	}
897	if (v3) {
898		nfsm_postop_attr(newvp, attrflag);
899		nfsm_postop_attr(dvp, attrflag);
900	} else
901		nfsm_loadattr(newvp, NULL);
902	if (cnp->cn_nameiop != LOOKUP && (flags & ISLASTCN))
903		cnp->cn_flags |= SAVENAME;
904	if ((cnp->cn_flags & MAKEENTRY) &&
905	    (cnp->cn_nameiop != DELETE || !(flags & ISLASTCN))) {
906		np->n_ctime = np->n_vattr.va_ctime.tv_sec;
907		cache_enter(dvp, newvp, cnp);
908	}
909	*vpp = newvp;
910	m_freem(mrep);
911nfsmout:
912	if (error) {
913		if (newvp != NULLVP) {
914			vrele(newvp);
915			*vpp = NULLVP;
916		}
917		if ((cnp->cn_nameiop == CREATE || cnp->cn_nameiop == RENAME) &&
918		    (flags & ISLASTCN) && error == ENOENT) {
919			if (!lockparent) {
920				VOP_UNLOCK(dvp, 0, td);
921				cnp->cn_flags |= PDIRUNLOCK;
922			}
923			if (dvp->v_mount->mnt_flag & MNT_RDONLY)
924				error = EROFS;
925			else
926				error = EJUSTRETURN;
927		}
928		if (cnp->cn_nameiop != LOOKUP && (flags & ISLASTCN))
929			cnp->cn_flags |= SAVENAME;
930	}
931	return (error);
932}
933
934/*
935 * nfs read call.
936 * Just call nfs_bioread() to do the work.
937 */
938static int
939nfs_read(struct vop_read_args *ap)
940{
941	struct vnode *vp = ap->a_vp;
942
943	switch (vp->v_type) {
944	case VREG:
945		return (nfs_bioread(vp, ap->a_uio, ap->a_ioflag, ap->a_cred));
946	case VDIR:
947		return (EISDIR);
948	default:
949		return (EOPNOTSUPP);
950	}
951}
952
953/*
954 * nfs readlink call
955 */
956static int
957nfs_readlink(struct vop_readlink_args *ap)
958{
959	struct vnode *vp = ap->a_vp;
960
961	if (vp->v_type != VLNK)
962		return (EINVAL);
963	return (nfs_bioread(vp, ap->a_uio, 0, ap->a_cred));
964}
965
966/*
967 * Do a readlink rpc.
968 * Called by nfs_doio() from below the buffer cache.
969 */
970int
971nfs_readlinkrpc(struct vnode *vp, struct uio *uiop, struct ucred *cred)
972{
973	caddr_t bpos, dpos;
974	int error = 0, len, attrflag;
975	struct mbuf *mreq, *mrep, *md, *mb;
976	int v3 = NFS_ISV3(vp);
977
978	nfsstats.rpccnt[NFSPROC_READLINK]++;
979	mreq = nfsm_reqhead(vp, NFSPROC_READLINK, NFSX_FH(v3));
980	mb = mreq;
981	bpos = mtod(mb, caddr_t);
982	nfsm_fhtom(vp, v3);
983	nfsm_request(vp, NFSPROC_READLINK, uiop->uio_td, cred);
984	if (v3)
985		nfsm_postop_attr(vp, attrflag);
986	if (!error) {
987		nfsm_strsiz(len, NFS_MAXPATHLEN);
988		if (len == NFS_MAXPATHLEN) {
989			struct nfsnode *np = VTONFS(vp);
990			if (np->n_size && np->n_size < NFS_MAXPATHLEN)
991				len = np->n_size;
992		}
993		nfsm_mtouio(uiop, len);
994	}
995	m_freem(mrep);
996nfsmout:
997	return (error);
998}
999
1000/*
1001 * nfs read rpc call
1002 * Ditto above
1003 */
1004int
1005nfs_readrpc(struct vnode *vp, struct uio *uiop, struct ucred *cred)
1006{
1007	u_int32_t *tl;
1008	caddr_t bpos, dpos;
1009	struct mbuf *mreq, *mrep, *md, *mb;
1010	struct nfsmount *nmp;
1011	int error = 0, len, retlen, tsiz, eof, attrflag;
1012	int v3 = NFS_ISV3(vp);
1013
1014#ifndef nolint
1015	eof = 0;
1016#endif
1017	nmp = VFSTONFS(vp->v_mount);
1018	tsiz = uiop->uio_resid;
1019	if (uiop->uio_offset + tsiz > nmp->nm_maxfilesize)
1020		return (EFBIG);
1021	while (tsiz > 0) {
1022		nfsstats.rpccnt[NFSPROC_READ]++;
1023		len = (tsiz > nmp->nm_rsize) ? nmp->nm_rsize : tsiz;
1024		mreq = nfsm_reqhead(vp, NFSPROC_READ, NFSX_FH(v3) + NFSX_UNSIGNED * 3);
1025		mb = mreq;
1026		bpos = mtod(mb, caddr_t);
1027		nfsm_fhtom(vp, v3);
1028		tl = nfsm_build(u_int32_t *, NFSX_UNSIGNED * 3);
1029		if (v3) {
1030			txdr_hyper(uiop->uio_offset, tl);
1031			*(tl + 2) = txdr_unsigned(len);
1032		} else {
1033			*tl++ = txdr_unsigned(uiop->uio_offset);
1034			*tl++ = txdr_unsigned(len);
1035			*tl = 0;
1036		}
1037		nfsm_request(vp, NFSPROC_READ, uiop->uio_td, cred);
1038		if (v3) {
1039			nfsm_postop_attr(vp, attrflag);
1040			if (error) {
1041				m_freem(mrep);
1042				goto nfsmout;
1043			}
1044			tl = nfsm_dissect(u_int32_t *, 2 * NFSX_UNSIGNED);
1045			eof = fxdr_unsigned(int, *(tl + 1));
1046		} else
1047			nfsm_loadattr(vp, NULL);
1048		nfsm_strsiz(retlen, nmp->nm_rsize);
1049		nfsm_mtouio(uiop, retlen);
1050		m_freem(mrep);
1051		tsiz -= retlen;
1052		if (v3) {
1053			if (eof || retlen == 0) {
1054				tsiz = 0;
1055			}
1056		} else if (retlen < len) {
1057			tsiz = 0;
1058		}
1059	}
1060nfsmout:
1061	return (error);
1062}
1063
1064/*
1065 * nfs write call
1066 */
1067int
1068nfs_writerpc(struct vnode *vp, struct uio *uiop, struct ucred *cred,
1069    int *iomode, int *must_commit)
1070{
1071	u_int32_t *tl;
1072	int32_t backup;
1073	caddr_t bpos, dpos;
1074	struct mbuf *mreq, *mrep, *md, *mb;
1075	struct nfsmount *nmp = VFSTONFS(vp->v_mount);
1076	int error = 0, len, tsiz, wccflag = NFSV3_WCCRATTR, rlen, commit;
1077	int v3 = NFS_ISV3(vp), committed = NFSV3WRITE_FILESYNC;
1078
1079#ifndef DIAGNOSTIC
1080	if (uiop->uio_iovcnt != 1)
1081		panic("nfs: writerpc iovcnt > 1");
1082#endif
1083	*must_commit = 0;
1084	tsiz = uiop->uio_resid;
1085	if (uiop->uio_offset + tsiz > nmp->nm_maxfilesize)
1086		return (EFBIG);
1087	while (tsiz > 0) {
1088		nfsstats.rpccnt[NFSPROC_WRITE]++;
1089		len = (tsiz > nmp->nm_wsize) ? nmp->nm_wsize : tsiz;
1090		mreq = nfsm_reqhead(vp, NFSPROC_WRITE,
1091			NFSX_FH(v3) + 5 * NFSX_UNSIGNED + nfsm_rndup(len));
1092		mb = mreq;
1093		bpos = mtod(mb, caddr_t);
1094		nfsm_fhtom(vp, v3);
1095		if (v3) {
1096			tl = nfsm_build(u_int32_t *, 5 * NFSX_UNSIGNED);
1097			txdr_hyper(uiop->uio_offset, tl);
1098			tl += 2;
1099			*tl++ = txdr_unsigned(len);
1100			*tl++ = txdr_unsigned(*iomode);
1101			*tl = txdr_unsigned(len);
1102		} else {
1103			u_int32_t x;
1104
1105			tl = nfsm_build(u_int32_t *, 4 * NFSX_UNSIGNED);
1106			/* Set both "begin" and "current" to non-garbage. */
1107			x = txdr_unsigned((u_int32_t)uiop->uio_offset);
1108			*tl++ = x;	/* "begin offset" */
1109			*tl++ = x;	/* "current offset" */
1110			x = txdr_unsigned(len);
1111			*tl++ = x;	/* total to this offset */
1112			*tl = x;	/* size of this write */
1113		}
1114		nfsm_uiotom(uiop, len);
1115		nfsm_request(vp, NFSPROC_WRITE, uiop->uio_td, cred);
1116		if (v3) {
1117			wccflag = NFSV3_WCCCHK;
1118			nfsm_wcc_data(vp, wccflag);
1119			if (!error) {
1120				tl = nfsm_dissect(u_int32_t *, 2 * NFSX_UNSIGNED
1121					+ NFSX_V3WRITEVERF);
1122				rlen = fxdr_unsigned(int, *tl++);
1123				if (rlen == 0) {
1124					error = NFSERR_IO;
1125					m_freem(mrep);
1126					break;
1127				} else if (rlen < len) {
1128					backup = len - rlen;
1129					uiop->uio_iov->iov_base =
1130					    (char *)uiop->uio_iov->iov_base -
1131					    backup;
1132					uiop->uio_iov->iov_len += backup;
1133					uiop->uio_offset -= backup;
1134					uiop->uio_resid += backup;
1135					len = rlen;
1136				}
1137				commit = fxdr_unsigned(int, *tl++);
1138
1139				/*
1140				 * Return the lowest committment level
1141				 * obtained by any of the RPCs.
1142				 */
1143				if (committed == NFSV3WRITE_FILESYNC)
1144					committed = commit;
1145				else if (committed == NFSV3WRITE_DATASYNC &&
1146					commit == NFSV3WRITE_UNSTABLE)
1147					committed = commit;
1148				if ((nmp->nm_state & NFSSTA_HASWRITEVERF) == 0){
1149				    bcopy((caddr_t)tl, (caddr_t)nmp->nm_verf,
1150					NFSX_V3WRITEVERF);
1151				    nmp->nm_state |= NFSSTA_HASWRITEVERF;
1152				} else if (bcmp((caddr_t)tl,
1153				    (caddr_t)nmp->nm_verf, NFSX_V3WRITEVERF)) {
1154				    *must_commit = 1;
1155				    bcopy((caddr_t)tl, (caddr_t)nmp->nm_verf,
1156					NFSX_V3WRITEVERF);
1157				}
1158			}
1159		} else
1160		    nfsm_loadattr(vp, NULL);
1161		if (wccflag)
1162		    VTONFS(vp)->n_mtime = VTONFS(vp)->n_vattr.va_mtime;
1163		m_freem(mrep);
1164		if (error)
1165			break;
1166		tsiz -= len;
1167	}
1168nfsmout:
1169	if (vp->v_mount->mnt_flag & MNT_ASYNC)
1170		committed = NFSV3WRITE_FILESYNC;
1171	*iomode = committed;
1172	if (error)
1173		uiop->uio_resid = tsiz;
1174	return (error);
1175}
1176
1177/*
1178 * nfs mknod rpc
1179 * For NFS v2 this is a kludge. Use a create rpc but with the IFMT bits of the
1180 * mode set to specify the file type and the size field for rdev.
1181 */
1182static int
1183nfs_mknodrpc(struct vnode *dvp, struct vnode **vpp, struct componentname *cnp,
1184    struct vattr *vap)
1185{
1186	struct nfsv2_sattr *sp;
1187	u_int32_t *tl;
1188	struct vnode *newvp = NULL;
1189	struct nfsnode *np = NULL;
1190	struct vattr vattr;
1191	caddr_t bpos, dpos;
1192	int error = 0, wccflag = NFSV3_WCCRATTR, gotvp = 0;
1193	struct mbuf *mreq, *mrep, *md, *mb;
1194	u_int32_t rdev;
1195	int v3 = NFS_ISV3(dvp);
1196
1197	if (vap->va_type == VCHR || vap->va_type == VBLK)
1198		rdev = txdr_unsigned(vap->va_rdev);
1199	else if (vap->va_type == VFIFO || vap->va_type == VSOCK)
1200		rdev = nfs_xdrneg1;
1201	else {
1202		return (EOPNOTSUPP);
1203	}
1204	if ((error = VOP_GETATTR(dvp, &vattr, cnp->cn_cred, cnp->cn_thread)) != 0) {
1205		return (error);
1206	}
1207	nfsstats.rpccnt[NFSPROC_MKNOD]++;
1208	mreq = nfsm_reqhead(dvp, NFSPROC_MKNOD, NFSX_FH(v3) + 4 * NFSX_UNSIGNED +
1209		+ nfsm_rndup(cnp->cn_namelen) + NFSX_SATTR(v3));
1210	mb = mreq;
1211	bpos = mtod(mb, caddr_t);
1212	nfsm_fhtom(dvp, v3);
1213	nfsm_strtom(cnp->cn_nameptr, cnp->cn_namelen, NFS_MAXNAMLEN);
1214	if (v3) {
1215		tl = nfsm_build(u_int32_t *, NFSX_UNSIGNED);
1216		*tl++ = vtonfsv3_type(vap->va_type);
1217		nfsm_v3attrbuild(vap, FALSE);
1218		if (vap->va_type == VCHR || vap->va_type == VBLK) {
1219			tl = nfsm_build(u_int32_t *, 2 * NFSX_UNSIGNED);
1220			*tl++ = txdr_unsigned(umajor(vap->va_rdev));
1221			*tl = txdr_unsigned(uminor(vap->va_rdev));
1222		}
1223	} else {
1224		sp = nfsm_build(struct nfsv2_sattr *, NFSX_V2SATTR);
1225		sp->sa_mode = vtonfsv2_mode(vap->va_type, vap->va_mode);
1226		sp->sa_uid = nfs_xdrneg1;
1227		sp->sa_gid = nfs_xdrneg1;
1228		sp->sa_size = rdev;
1229		txdr_nfsv2time(&vap->va_atime, &sp->sa_atime);
1230		txdr_nfsv2time(&vap->va_mtime, &sp->sa_mtime);
1231	}
1232	nfsm_request(dvp, NFSPROC_MKNOD, cnp->cn_thread, cnp->cn_cred);
1233	if (!error) {
1234		nfsm_mtofh(dvp, newvp, v3, gotvp);
1235		if (!gotvp) {
1236			if (newvp) {
1237				vput(newvp);
1238				newvp = NULL;
1239			}
1240			error = nfs_lookitup(dvp, cnp->cn_nameptr,
1241			    cnp->cn_namelen, cnp->cn_cred, cnp->cn_thread, &np);
1242			if (!error)
1243				newvp = NFSTOV(np);
1244		}
1245	}
1246	if (v3)
1247		nfsm_wcc_data(dvp, wccflag);
1248	m_freem(mrep);
1249nfsmout:
1250	if (error) {
1251		if (newvp)
1252			vput(newvp);
1253	} else {
1254		if (cnp->cn_flags & MAKEENTRY)
1255			cache_enter(dvp, newvp, cnp);
1256		*vpp = newvp;
1257	}
1258	VTONFS(dvp)->n_flag |= NMODIFIED;
1259	if (!wccflag)
1260		VTONFS(dvp)->n_attrstamp = 0;
1261	return (error);
1262}
1263
1264/*
1265 * nfs mknod vop
1266 * just call nfs_mknodrpc() to do the work.
1267 */
1268/* ARGSUSED */
1269static int
1270nfs_mknod(struct vop_mknod_args *ap)
1271{
1272
1273	return (nfs_mknodrpc(ap->a_dvp, ap->a_vpp, ap->a_cnp, ap->a_vap));
1274}
1275
1276static u_long create_verf;
1277/*
1278 * nfs file create call
1279 */
1280static int
1281nfs_create(struct vop_create_args *ap)
1282{
1283	struct vnode *dvp = ap->a_dvp;
1284	struct vattr *vap = ap->a_vap;
1285	struct componentname *cnp = ap->a_cnp;
1286	struct nfsv2_sattr *sp;
1287	u_int32_t *tl;
1288	struct nfsnode *np = NULL;
1289	struct vnode *newvp = NULL;
1290	caddr_t bpos, dpos;
1291	int error = 0, wccflag = NFSV3_WCCRATTR, gotvp = 0, fmode = 0;
1292	struct mbuf *mreq, *mrep, *md, *mb;
1293	struct vattr vattr;
1294	int v3 = NFS_ISV3(dvp);
1295
1296	/*
1297	 * Oops, not for me..
1298	 */
1299	if (vap->va_type == VSOCK)
1300		return (nfs_mknodrpc(dvp, ap->a_vpp, cnp, vap));
1301
1302	if ((error = VOP_GETATTR(dvp, &vattr, cnp->cn_cred, cnp->cn_thread)) != 0) {
1303		return (error);
1304	}
1305	if (vap->va_vaflags & VA_EXCLUSIVE)
1306		fmode |= O_EXCL;
1307again:
1308	nfsstats.rpccnt[NFSPROC_CREATE]++;
1309	mreq = nfsm_reqhead(dvp, NFSPROC_CREATE, NFSX_FH(v3) + 2 * NFSX_UNSIGNED +
1310		nfsm_rndup(cnp->cn_namelen) + NFSX_SATTR(v3));
1311	mb = mreq;
1312	bpos = mtod(mb, caddr_t);
1313	nfsm_fhtom(dvp, v3);
1314	nfsm_strtom(cnp->cn_nameptr, cnp->cn_namelen, NFS_MAXNAMLEN);
1315	if (v3) {
1316		tl = nfsm_build(u_int32_t *, NFSX_UNSIGNED);
1317		if (fmode & O_EXCL) {
1318			*tl = txdr_unsigned(NFSV3CREATE_EXCLUSIVE);
1319			tl = nfsm_build(u_int32_t *, NFSX_V3CREATEVERF);
1320#ifdef INET
1321			if (!TAILQ_EMPTY(&in_ifaddrhead))
1322				*tl++ = IA_SIN(TAILQ_FIRST(&in_ifaddrhead))->sin_addr.s_addr;
1323			else
1324#endif
1325				*tl++ = create_verf;
1326			*tl = ++create_verf;
1327		} else {
1328			*tl = txdr_unsigned(NFSV3CREATE_UNCHECKED);
1329			nfsm_v3attrbuild(vap, FALSE);
1330		}
1331	} else {
1332		sp = nfsm_build(struct nfsv2_sattr *, NFSX_V2SATTR);
1333		sp->sa_mode = vtonfsv2_mode(vap->va_type, vap->va_mode);
1334		sp->sa_uid = nfs_xdrneg1;
1335		sp->sa_gid = nfs_xdrneg1;
1336		sp->sa_size = 0;
1337		txdr_nfsv2time(&vap->va_atime, &sp->sa_atime);
1338		txdr_nfsv2time(&vap->va_mtime, &sp->sa_mtime);
1339	}
1340	nfsm_request(dvp, NFSPROC_CREATE, cnp->cn_thread, cnp->cn_cred);
1341	if (!error) {
1342		nfsm_mtofh(dvp, newvp, v3, gotvp);
1343		if (!gotvp) {
1344			if (newvp) {
1345				vput(newvp);
1346				newvp = NULL;
1347			}
1348			error = nfs_lookitup(dvp, cnp->cn_nameptr,
1349			    cnp->cn_namelen, cnp->cn_cred, cnp->cn_thread, &np);
1350			if (!error)
1351				newvp = NFSTOV(np);
1352		}
1353	}
1354	if (v3)
1355		nfsm_wcc_data(dvp, wccflag);
1356	m_freem(mrep);
1357nfsmout:
1358	if (error) {
1359		if (v3 && (fmode & O_EXCL) && error == NFSERR_NOTSUPP) {
1360			fmode &= ~O_EXCL;
1361			goto again;
1362		}
1363		if (newvp)
1364			vput(newvp);
1365	} else if (v3 && (fmode & O_EXCL)) {
1366		/*
1367		 * We are normally called with only a partially initialized
1368		 * VAP.  Since the NFSv3 spec says that server may use the
1369		 * file attributes to store the verifier, the spec requires
1370		 * us to do a SETATTR RPC. FreeBSD servers store the verifier
1371		 * in atime, but we can't really assume that all servers will
1372		 * so we ensure that our SETATTR sets both atime and mtime.
1373		 */
1374		if (vap->va_mtime.tv_sec == VNOVAL)
1375			vfs_timestamp(&vap->va_mtime);
1376		if (vap->va_atime.tv_sec == VNOVAL)
1377			vap->va_atime = vap->va_mtime;
1378		error = nfs_setattrrpc(newvp, vap, cnp->cn_cred, cnp->cn_thread);
1379	}
1380	if (!error) {
1381		if (cnp->cn_flags & MAKEENTRY)
1382			cache_enter(dvp, newvp, cnp);
1383		*ap->a_vpp = newvp;
1384	}
1385	VTONFS(dvp)->n_flag |= NMODIFIED;
1386	if (!wccflag)
1387		VTONFS(dvp)->n_attrstamp = 0;
1388	return (error);
1389}
1390
1391/*
1392 * nfs file remove call
1393 * To try and make nfs semantics closer to ufs semantics, a file that has
1394 * other processes using the vnode is renamed instead of removed and then
1395 * removed later on the last close.
1396 * - If v_usecount > 1
1397 *	  If a rename is not already in the works
1398 *	     call nfs_sillyrename() to set it up
1399 *     else
1400 *	  do the remove rpc
1401 */
1402static int
1403nfs_remove(struct vop_remove_args *ap)
1404{
1405	struct vnode *vp = ap->a_vp;
1406	struct vnode *dvp = ap->a_dvp;
1407	struct componentname *cnp = ap->a_cnp;
1408	struct nfsnode *np = VTONFS(vp);
1409	int error = 0;
1410	struct vattr vattr;
1411
1412#ifndef DIAGNOSTIC
1413	if ((cnp->cn_flags & HASBUF) == 0)
1414		panic("nfs_remove: no name");
1415	if (vrefcnt(vp) < 1)
1416		panic("nfs_remove: bad v_usecount");
1417#endif
1418	if (vp->v_type == VDIR)
1419		error = EPERM;
1420	else if (vrefcnt(vp) == 1 || (np->n_sillyrename &&
1421	    VOP_GETATTR(vp, &vattr, cnp->cn_cred, cnp->cn_thread) == 0 &&
1422	    vattr.va_nlink > 1)) {
1423		/*
1424		 * Purge the name cache so that the chance of a lookup for
1425		 * the name succeeding while the remove is in progress is
1426		 * minimized. Without node locking it can still happen, such
1427		 * that an I/O op returns ESTALE, but since you get this if
1428		 * another host removes the file..
1429		 */
1430		cache_purge(vp);
1431		/*
1432		 * throw away biocache buffers, mainly to avoid
1433		 * unnecessary delayed writes later.
1434		 */
1435		error = nfs_vinvalbuf(vp, 0, cnp->cn_cred, cnp->cn_thread, 1);
1436		/* Do the rpc */
1437		if (error != EINTR && error != EIO)
1438			error = nfs_removerpc(dvp, cnp->cn_nameptr,
1439				cnp->cn_namelen, cnp->cn_cred, cnp->cn_thread);
1440		/*
1441		 * Kludge City: If the first reply to the remove rpc is lost..
1442		 *   the reply to the retransmitted request will be ENOENT
1443		 *   since the file was in fact removed
1444		 *   Therefore, we cheat and return success.
1445		 */
1446		if (error == ENOENT)
1447			error = 0;
1448	} else if (!np->n_sillyrename)
1449		error = nfs_sillyrename(dvp, vp, cnp);
1450	np->n_attrstamp = 0;
1451	return (error);
1452}
1453
1454/*
1455 * nfs file remove rpc called from nfs_inactive
1456 */
1457int
1458nfs_removeit(struct sillyrename *sp)
1459{
1460
1461	/*
1462	 * Make sure that the directory vnode is still valid.
1463	 * XXX we should lock sp->s_dvp here.
1464	 */
1465	if (sp->s_dvp->v_type == VBAD)
1466		return (0);
1467	return (nfs_removerpc(sp->s_dvp, sp->s_name, sp->s_namlen, sp->s_cred,
1468		NULL));
1469}
1470
1471/*
1472 * Nfs remove rpc, called from nfs_remove() and nfs_removeit().
1473 */
1474static int
1475nfs_removerpc(struct vnode *dvp, const char *name, int namelen,
1476    struct ucred *cred, struct thread *td)
1477{
1478	caddr_t bpos, dpos;
1479	int error = 0, wccflag = NFSV3_WCCRATTR;
1480	struct mbuf *mreq, *mrep, *md, *mb;
1481	int v3 = NFS_ISV3(dvp);
1482
1483	nfsstats.rpccnt[NFSPROC_REMOVE]++;
1484	mreq = nfsm_reqhead(dvp, NFSPROC_REMOVE,
1485		NFSX_FH(v3) + NFSX_UNSIGNED + nfsm_rndup(namelen));
1486	mb = mreq;
1487	bpos = mtod(mb, caddr_t);
1488	nfsm_fhtom(dvp, v3);
1489	nfsm_strtom(name, namelen, NFS_MAXNAMLEN);
1490	nfsm_request(dvp, NFSPROC_REMOVE, td, cred);
1491	if (v3)
1492		nfsm_wcc_data(dvp, wccflag);
1493	m_freem(mrep);
1494nfsmout:
1495	VTONFS(dvp)->n_flag |= NMODIFIED;
1496	if (!wccflag)
1497		VTONFS(dvp)->n_attrstamp = 0;
1498	return (error);
1499}
1500
1501/*
1502 * nfs file rename call
1503 */
1504static int
1505nfs_rename(struct vop_rename_args *ap)
1506{
1507	struct vnode *fvp = ap->a_fvp;
1508	struct vnode *tvp = ap->a_tvp;
1509	struct vnode *fdvp = ap->a_fdvp;
1510	struct vnode *tdvp = ap->a_tdvp;
1511	struct componentname *tcnp = ap->a_tcnp;
1512	struct componentname *fcnp = ap->a_fcnp;
1513	int error;
1514
1515#ifndef DIAGNOSTIC
1516	if ((tcnp->cn_flags & HASBUF) == 0 ||
1517	    (fcnp->cn_flags & HASBUF) == 0)
1518		panic("nfs_rename: no name");
1519#endif
1520	/* Check for cross-device rename */
1521	if ((fvp->v_mount != tdvp->v_mount) ||
1522	    (tvp && (fvp->v_mount != tvp->v_mount))) {
1523		error = EXDEV;
1524		goto out;
1525	}
1526
1527	if (fvp == tvp) {
1528		printf("nfs_rename: fvp == tvp (can't happen)\n");
1529		error = 0;
1530		goto out;
1531	}
1532	if ((error = vn_lock(fvp, LK_EXCLUSIVE, fcnp->cn_thread)) != 0)
1533		goto out;
1534
1535	/*
1536	 * We have to flush B_DELWRI data prior to renaming
1537	 * the file.  If we don't, the delayed-write buffers
1538	 * can be flushed out later after the file has gone stale
1539	 * under NFSV3.  NFSV2 does not have this problem because
1540	 * ( as far as I can tell ) it flushes dirty buffers more
1541	 * often.
1542	 */
1543	VOP_FSYNC(fvp, fcnp->cn_cred, MNT_WAIT, fcnp->cn_thread);
1544	VOP_UNLOCK(fvp, 0, fcnp->cn_thread);
1545	if (tvp)
1546	    VOP_FSYNC(tvp, tcnp->cn_cred, MNT_WAIT, tcnp->cn_thread);
1547
1548	/*
1549	 * If the tvp exists and is in use, sillyrename it before doing the
1550	 * rename of the new file over it.
1551	 * XXX Can't sillyrename a directory.
1552	 */
1553	if (tvp && vrefcnt(tvp) > 1 && !VTONFS(tvp)->n_sillyrename &&
1554		tvp->v_type != VDIR && !nfs_sillyrename(tdvp, tvp, tcnp)) {
1555		vput(tvp);
1556		tvp = NULL;
1557	}
1558
1559	error = nfs_renamerpc(fdvp, fcnp->cn_nameptr, fcnp->cn_namelen,
1560		tdvp, tcnp->cn_nameptr, tcnp->cn_namelen, tcnp->cn_cred,
1561		tcnp->cn_thread);
1562
1563	if (fvp->v_type == VDIR) {
1564		if (tvp != NULL && tvp->v_type == VDIR)
1565			cache_purge(tdvp);
1566		cache_purge(fdvp);
1567	}
1568
1569out:
1570	if (tdvp == tvp)
1571		vrele(tdvp);
1572	else
1573		vput(tdvp);
1574	if (tvp)
1575		vput(tvp);
1576	vrele(fdvp);
1577	vrele(fvp);
1578	/*
1579	 * Kludge: Map ENOENT => 0 assuming that it is a reply to a retry.
1580	 */
1581	if (error == ENOENT)
1582		error = 0;
1583	return (error);
1584}
1585
1586/*
1587 * nfs file rename rpc called from nfs_remove() above
1588 */
1589static int
1590nfs_renameit(struct vnode *sdvp, struct componentname *scnp,
1591    struct sillyrename *sp)
1592{
1593
1594	return (nfs_renamerpc(sdvp, scnp->cn_nameptr, scnp->cn_namelen, sdvp,
1595	    sp->s_name, sp->s_namlen, scnp->cn_cred, scnp->cn_thread));
1596}
1597
1598/*
1599 * Do an nfs rename rpc. Called from nfs_rename() and nfs_renameit().
1600 */
1601static int
1602nfs_renamerpc(struct vnode *fdvp, const char *fnameptr, int fnamelen,
1603    struct vnode *tdvp, const char *tnameptr, int tnamelen, struct ucred *cred,
1604    struct thread *td)
1605{
1606	caddr_t bpos, dpos;
1607	int error = 0, fwccflag = NFSV3_WCCRATTR, twccflag = NFSV3_WCCRATTR;
1608	struct mbuf *mreq, *mrep, *md, *mb;
1609	int v3 = NFS_ISV3(fdvp);
1610
1611	nfsstats.rpccnt[NFSPROC_RENAME]++;
1612	mreq = nfsm_reqhead(fdvp, NFSPROC_RENAME,
1613		(NFSX_FH(v3) + NFSX_UNSIGNED)*2 + nfsm_rndup(fnamelen) +
1614		nfsm_rndup(tnamelen));
1615	mb = mreq;
1616	bpos = mtod(mb, caddr_t);
1617	nfsm_fhtom(fdvp, v3);
1618	nfsm_strtom(fnameptr, fnamelen, NFS_MAXNAMLEN);
1619	nfsm_fhtom(tdvp, v3);
1620	nfsm_strtom(tnameptr, tnamelen, NFS_MAXNAMLEN);
1621	nfsm_request(fdvp, NFSPROC_RENAME, td, cred);
1622	if (v3) {
1623		nfsm_wcc_data(fdvp, fwccflag);
1624		nfsm_wcc_data(tdvp, twccflag);
1625	}
1626	m_freem(mrep);
1627nfsmout:
1628	VTONFS(fdvp)->n_flag |= NMODIFIED;
1629	VTONFS(tdvp)->n_flag |= NMODIFIED;
1630	if (!fwccflag)
1631		VTONFS(fdvp)->n_attrstamp = 0;
1632	if (!twccflag)
1633		VTONFS(tdvp)->n_attrstamp = 0;
1634	return (error);
1635}
1636
1637/*
1638 * nfs hard link create call
1639 */
1640static int
1641nfs_link(struct vop_link_args *ap)
1642{
1643	struct vnode *vp = ap->a_vp;
1644	struct vnode *tdvp = ap->a_tdvp;
1645	struct componentname *cnp = ap->a_cnp;
1646	caddr_t bpos, dpos;
1647	int error = 0, wccflag = NFSV3_WCCRATTR, attrflag = 0;
1648	struct mbuf *mreq, *mrep, *md, *mb;
1649	int v3;
1650
1651	if (vp->v_mount != tdvp->v_mount) {
1652		return (EXDEV);
1653	}
1654
1655	/*
1656	 * Push all writes to the server, so that the attribute cache
1657	 * doesn't get "out of sync" with the server.
1658	 * XXX There should be a better way!
1659	 */
1660	VOP_FSYNC(vp, cnp->cn_cred, MNT_WAIT, cnp->cn_thread);
1661
1662	v3 = NFS_ISV3(vp);
1663	nfsstats.rpccnt[NFSPROC_LINK]++;
1664	mreq = nfsm_reqhead(vp, NFSPROC_LINK,
1665		NFSX_FH(v3)*2 + NFSX_UNSIGNED + nfsm_rndup(cnp->cn_namelen));
1666	mb = mreq;
1667	bpos = mtod(mb, caddr_t);
1668	nfsm_fhtom(vp, v3);
1669	nfsm_fhtom(tdvp, v3);
1670	nfsm_strtom(cnp->cn_nameptr, cnp->cn_namelen, NFS_MAXNAMLEN);
1671	nfsm_request(vp, NFSPROC_LINK, cnp->cn_thread, cnp->cn_cred);
1672	if (v3) {
1673		nfsm_postop_attr(vp, attrflag);
1674		nfsm_wcc_data(tdvp, wccflag);
1675	}
1676	m_freem(mrep);
1677nfsmout:
1678	VTONFS(tdvp)->n_flag |= NMODIFIED;
1679	if (!attrflag)
1680		VTONFS(vp)->n_attrstamp = 0;
1681	if (!wccflag)
1682		VTONFS(tdvp)->n_attrstamp = 0;
1683	/*
1684	 * Kludge: Map EEXIST => 0 assuming that it is a reply to a retry.
1685	 */
1686	if (error == EEXIST)
1687		error = 0;
1688	return (error);
1689}
1690
1691/*
1692 * nfs symbolic link create call
1693 */
1694static int
1695nfs_symlink(struct vop_symlink_args *ap)
1696{
1697	struct vnode *dvp = ap->a_dvp;
1698	struct vattr *vap = ap->a_vap;
1699	struct componentname *cnp = ap->a_cnp;
1700	struct nfsv2_sattr *sp;
1701	caddr_t bpos, dpos;
1702	int slen, error = 0, wccflag = NFSV3_WCCRATTR, gotvp;
1703	struct mbuf *mreq, *mrep, *md, *mb;
1704	struct vnode *newvp = NULL;
1705	int v3 = NFS_ISV3(dvp);
1706
1707	nfsstats.rpccnt[NFSPROC_SYMLINK]++;
1708	slen = strlen(ap->a_target);
1709	mreq = nfsm_reqhead(dvp, NFSPROC_SYMLINK, NFSX_FH(v3) + 2*NFSX_UNSIGNED +
1710	    nfsm_rndup(cnp->cn_namelen) + nfsm_rndup(slen) + NFSX_SATTR(v3));
1711	mb = mreq;
1712	bpos = mtod(mb, caddr_t);
1713	nfsm_fhtom(dvp, v3);
1714	nfsm_strtom(cnp->cn_nameptr, cnp->cn_namelen, NFS_MAXNAMLEN);
1715	if (v3) {
1716		nfsm_v3attrbuild(vap, FALSE);
1717	}
1718	nfsm_strtom(ap->a_target, slen, NFS_MAXPATHLEN);
1719	if (!v3) {
1720		sp = nfsm_build(struct nfsv2_sattr *, NFSX_V2SATTR);
1721		sp->sa_mode = vtonfsv2_mode(VLNK, vap->va_mode);
1722		sp->sa_uid = nfs_xdrneg1;
1723		sp->sa_gid = nfs_xdrneg1;
1724		sp->sa_size = nfs_xdrneg1;
1725		txdr_nfsv2time(&vap->va_atime, &sp->sa_atime);
1726		txdr_nfsv2time(&vap->va_mtime, &sp->sa_mtime);
1727	}
1728
1729	/*
1730	 * Issue the NFS request and get the rpc response.
1731	 *
1732	 * Only NFSv3 responses returning an error of 0 actually return
1733	 * a file handle that can be converted into newvp without having
1734	 * to do an extra lookup rpc.
1735	 */
1736	nfsm_request(dvp, NFSPROC_SYMLINK, cnp->cn_thread, cnp->cn_cred);
1737	if (v3) {
1738		if (error == 0)
1739			nfsm_mtofh(dvp, newvp, v3, gotvp);
1740		nfsm_wcc_data(dvp, wccflag);
1741	}
1742
1743	/*
1744	 * out code jumps -> here, mrep is also freed.
1745	 */
1746
1747	m_freem(mrep);
1748nfsmout:
1749
1750	/*
1751	 * If we get an EEXIST error, silently convert it to no-error
1752	 * in case of an NFS retry.
1753	 */
1754	if (error == EEXIST)
1755		error = 0;
1756
1757	/*
1758	 * If we do not have (or no longer have) an error, and we could
1759	 * not extract the newvp from the response due to the request being
1760	 * NFSv2 or the error being EEXIST.  We have to do a lookup in order
1761	 * to obtain a newvp to return.
1762	 */
1763	if (error == 0 && newvp == NULL) {
1764		struct nfsnode *np = NULL;
1765
1766		error = nfs_lookitup(dvp, cnp->cn_nameptr, cnp->cn_namelen,
1767		    cnp->cn_cred, cnp->cn_thread, &np);
1768		if (!error)
1769			newvp = NFSTOV(np);
1770	}
1771	if (error) {
1772		if (newvp)
1773			vput(newvp);
1774	} else {
1775		*ap->a_vpp = newvp;
1776	}
1777	VTONFS(dvp)->n_flag |= NMODIFIED;
1778	if (!wccflag)
1779		VTONFS(dvp)->n_attrstamp = 0;
1780	return (error);
1781}
1782
1783/*
1784 * nfs make dir call
1785 */
1786static int
1787nfs_mkdir(struct vop_mkdir_args *ap)
1788{
1789	struct vnode *dvp = ap->a_dvp;
1790	struct vattr *vap = ap->a_vap;
1791	struct componentname *cnp = ap->a_cnp;
1792	struct nfsv2_sattr *sp;
1793	int len;
1794	struct nfsnode *np = NULL;
1795	struct vnode *newvp = NULL;
1796	caddr_t bpos, dpos;
1797	int error = 0, wccflag = NFSV3_WCCRATTR;
1798	int gotvp = 0;
1799	struct mbuf *mreq, *mrep, *md, *mb;
1800	struct vattr vattr;
1801	int v3 = NFS_ISV3(dvp);
1802
1803	if ((error = VOP_GETATTR(dvp, &vattr, cnp->cn_cred, cnp->cn_thread)) != 0) {
1804		return (error);
1805	}
1806	len = cnp->cn_namelen;
1807	nfsstats.rpccnt[NFSPROC_MKDIR]++;
1808	mreq = nfsm_reqhead(dvp, NFSPROC_MKDIR,
1809	  NFSX_FH(v3) + NFSX_UNSIGNED + nfsm_rndup(len) + NFSX_SATTR(v3));
1810	mb = mreq;
1811	bpos = mtod(mb, caddr_t);
1812	nfsm_fhtom(dvp, v3);
1813	nfsm_strtom(cnp->cn_nameptr, len, NFS_MAXNAMLEN);
1814	if (v3) {
1815		nfsm_v3attrbuild(vap, FALSE);
1816	} else {
1817		sp = nfsm_build(struct nfsv2_sattr *, NFSX_V2SATTR);
1818		sp->sa_mode = vtonfsv2_mode(VDIR, vap->va_mode);
1819		sp->sa_uid = nfs_xdrneg1;
1820		sp->sa_gid = nfs_xdrneg1;
1821		sp->sa_size = nfs_xdrneg1;
1822		txdr_nfsv2time(&vap->va_atime, &sp->sa_atime);
1823		txdr_nfsv2time(&vap->va_mtime, &sp->sa_mtime);
1824	}
1825	nfsm_request(dvp, NFSPROC_MKDIR, cnp->cn_thread, cnp->cn_cred);
1826	if (!error)
1827		nfsm_mtofh(dvp, newvp, v3, gotvp);
1828	if (v3)
1829		nfsm_wcc_data(dvp, wccflag);
1830	m_freem(mrep);
1831nfsmout:
1832	VTONFS(dvp)->n_flag |= NMODIFIED;
1833	if (!wccflag)
1834		VTONFS(dvp)->n_attrstamp = 0;
1835	/*
1836	 * Kludge: Map EEXIST => 0 assuming that you have a reply to a retry
1837	 * if we can succeed in looking up the directory.
1838	 */
1839	if (error == EEXIST || (!error && !gotvp)) {
1840		if (newvp) {
1841			vput(newvp);
1842			newvp = NULL;
1843		}
1844		error = nfs_lookitup(dvp, cnp->cn_nameptr, len, cnp->cn_cred,
1845			cnp->cn_thread, &np);
1846		if (!error) {
1847			newvp = NFSTOV(np);
1848			if (newvp->v_type != VDIR)
1849				error = EEXIST;
1850		}
1851	}
1852	if (error) {
1853		if (newvp)
1854			vput(newvp);
1855	} else
1856		*ap->a_vpp = newvp;
1857	return (error);
1858}
1859
1860/*
1861 * nfs remove directory call
1862 */
1863static int
1864nfs_rmdir(struct vop_rmdir_args *ap)
1865{
1866	struct vnode *vp = ap->a_vp;
1867	struct vnode *dvp = ap->a_dvp;
1868	struct componentname *cnp = ap->a_cnp;
1869	caddr_t bpos, dpos;
1870	int error = 0, wccflag = NFSV3_WCCRATTR;
1871	struct mbuf *mreq, *mrep, *md, *mb;
1872	int v3 = NFS_ISV3(dvp);
1873
1874	if (dvp == vp)
1875		return (EINVAL);
1876	nfsstats.rpccnt[NFSPROC_RMDIR]++;
1877	mreq = nfsm_reqhead(dvp, NFSPROC_RMDIR,
1878		NFSX_FH(v3) + NFSX_UNSIGNED + nfsm_rndup(cnp->cn_namelen));
1879	mb = mreq;
1880	bpos = mtod(mb, caddr_t);
1881	nfsm_fhtom(dvp, v3);
1882	nfsm_strtom(cnp->cn_nameptr, cnp->cn_namelen, NFS_MAXNAMLEN);
1883	nfsm_request(dvp, NFSPROC_RMDIR, cnp->cn_thread, cnp->cn_cred);
1884	if (v3)
1885		nfsm_wcc_data(dvp, wccflag);
1886	m_freem(mrep);
1887nfsmout:
1888	VTONFS(dvp)->n_flag |= NMODIFIED;
1889	if (!wccflag)
1890		VTONFS(dvp)->n_attrstamp = 0;
1891	cache_purge(dvp);
1892	cache_purge(vp);
1893	/*
1894	 * Kludge: Map ENOENT => 0 assuming that you have a reply to a retry.
1895	 */
1896	if (error == ENOENT)
1897		error = 0;
1898	return (error);
1899}
1900
1901/*
1902 * nfs readdir call
1903 */
1904static int
1905nfs_readdir(struct vop_readdir_args *ap)
1906{
1907	struct vnode *vp = ap->a_vp;
1908	struct nfsnode *np = VTONFS(vp);
1909	struct uio *uio = ap->a_uio;
1910	int tresid, error;
1911	struct vattr vattr;
1912
1913	if (vp->v_type != VDIR)
1914		return (EPERM);
1915	/*
1916	 * First, check for hit on the EOF offset cache
1917	 */
1918	if (np->n_direofoffset > 0 && uio->uio_offset >= np->n_direofoffset &&
1919	    (np->n_flag & NMODIFIED) == 0) {
1920		if (VOP_GETATTR(vp, &vattr, ap->a_cred, uio->uio_td) == 0 &&
1921		    !NFS_TIMESPEC_COMPARE(&np->n_mtime, &vattr.va_mtime)) {
1922			nfsstats.direofcache_hits++;
1923			return (0);
1924		}
1925	}
1926
1927	/*
1928	 * Call nfs_bioread() to do the real work.
1929	 */
1930	tresid = uio->uio_resid;
1931	error = nfs_bioread(vp, uio, 0, ap->a_cred);
1932
1933	if (!error && uio->uio_resid == tresid)
1934		nfsstats.direofcache_misses++;
1935	return (error);
1936}
1937
1938/*
1939 * Readdir rpc call.
1940 * Called from below the buffer cache by nfs_doio().
1941 */
1942int
1943nfs_readdirrpc(struct vnode *vp, struct uio *uiop, struct ucred *cred)
1944{
1945	int len, left;
1946	struct dirent *dp = NULL;
1947	u_int32_t *tl;
1948	caddr_t cp;
1949	nfsuint64 *cookiep;
1950	caddr_t bpos, dpos;
1951	struct mbuf *mreq, *mrep, *md, *mb;
1952	nfsuint64 cookie;
1953	struct nfsmount *nmp = VFSTONFS(vp->v_mount);
1954	struct nfsnode *dnp = VTONFS(vp);
1955	u_quad_t fileno;
1956	int error = 0, tlen, more_dirs = 1, blksiz = 0, bigenough = 1;
1957	int attrflag;
1958	int v3 = NFS_ISV3(vp);
1959
1960#ifndef DIAGNOSTIC
1961	if (uiop->uio_iovcnt != 1 || (uiop->uio_offset & (DIRBLKSIZ - 1)) ||
1962		(uiop->uio_resid & (DIRBLKSIZ - 1)))
1963		panic("nfs readdirrpc bad uio");
1964#endif
1965
1966	/*
1967	 * If there is no cookie, assume directory was stale.
1968	 */
1969	cookiep = nfs_getcookie(dnp, uiop->uio_offset, 0);
1970	if (cookiep)
1971		cookie = *cookiep;
1972	else
1973		return (NFSERR_BAD_COOKIE);
1974	/*
1975	 * Loop around doing readdir rpc's of size nm_readdirsize
1976	 * truncated to a multiple of DIRBLKSIZ.
1977	 * The stopping criteria is EOF or buffer full.
1978	 */
1979	while (more_dirs && bigenough) {
1980		nfsstats.rpccnt[NFSPROC_READDIR]++;
1981		mreq = nfsm_reqhead(vp, NFSPROC_READDIR, NFSX_FH(v3) +
1982			NFSX_READDIR(v3));
1983		mb = mreq;
1984		bpos = mtod(mb, caddr_t);
1985		nfsm_fhtom(vp, v3);
1986		if (v3) {
1987			tl = nfsm_build(u_int32_t *, 5 * NFSX_UNSIGNED);
1988			*tl++ = cookie.nfsuquad[0];
1989			*tl++ = cookie.nfsuquad[1];
1990			*tl++ = dnp->n_cookieverf.nfsuquad[0];
1991			*tl++ = dnp->n_cookieverf.nfsuquad[1];
1992		} else {
1993			tl = nfsm_build(u_int32_t *, 2 * NFSX_UNSIGNED);
1994			*tl++ = cookie.nfsuquad[0];
1995		}
1996		*tl = txdr_unsigned(nmp->nm_readdirsize);
1997		nfsm_request(vp, NFSPROC_READDIR, uiop->uio_td, cred);
1998		if (v3) {
1999			nfsm_postop_attr(vp, attrflag);
2000			if (!error) {
2001				tl = nfsm_dissect(u_int32_t *,
2002				    2 * NFSX_UNSIGNED);
2003				dnp->n_cookieverf.nfsuquad[0] = *tl++;
2004				dnp->n_cookieverf.nfsuquad[1] = *tl;
2005			} else {
2006				m_freem(mrep);
2007				goto nfsmout;
2008			}
2009		}
2010		tl = nfsm_dissect(u_int32_t *, NFSX_UNSIGNED);
2011		more_dirs = fxdr_unsigned(int, *tl);
2012
2013		/* loop thru the dir entries, doctoring them to 4bsd form */
2014		while (more_dirs && bigenough) {
2015			if (v3) {
2016				tl = nfsm_dissect(u_int32_t *,
2017				    3 * NFSX_UNSIGNED);
2018				fileno = fxdr_hyper(tl);
2019				len = fxdr_unsigned(int, *(tl + 2));
2020			} else {
2021				tl = nfsm_dissect(u_int32_t *,
2022				    2 * NFSX_UNSIGNED);
2023				fileno = fxdr_unsigned(u_quad_t, *tl++);
2024				len = fxdr_unsigned(int, *tl);
2025			}
2026			if (len <= 0 || len > NFS_MAXNAMLEN) {
2027				error = EBADRPC;
2028				m_freem(mrep);
2029				goto nfsmout;
2030			}
2031			tlen = nfsm_rndup(len);
2032			if (tlen == len)
2033				tlen += 4;	/* To ensure null termination */
2034			left = DIRBLKSIZ - blksiz;
2035			if ((tlen + DIRHDSIZ) > left) {
2036				dp->d_reclen += left;
2037				uiop->uio_iov->iov_base =
2038				    (char *)uiop->uio_iov->iov_base + left;
2039				uiop->uio_iov->iov_len -= left;
2040				uiop->uio_offset += left;
2041				uiop->uio_resid -= left;
2042				blksiz = 0;
2043			}
2044			if ((tlen + DIRHDSIZ) > uiop->uio_resid)
2045				bigenough = 0;
2046			if (bigenough) {
2047				dp = (struct dirent *)uiop->uio_iov->iov_base;
2048				dp->d_fileno = (int)fileno;
2049				dp->d_namlen = len;
2050				dp->d_reclen = tlen + DIRHDSIZ;
2051				dp->d_type = DT_UNKNOWN;
2052				blksiz += dp->d_reclen;
2053				if (blksiz == DIRBLKSIZ)
2054					blksiz = 0;
2055				uiop->uio_offset += DIRHDSIZ;
2056				uiop->uio_resid -= DIRHDSIZ;
2057				uiop->uio_iov->iov_base =
2058				    (char *)uiop->uio_iov->iov_base + DIRHDSIZ;
2059				uiop->uio_iov->iov_len -= DIRHDSIZ;
2060				nfsm_mtouio(uiop, len);
2061				cp = uiop->uio_iov->iov_base;
2062				tlen -= len;
2063				*cp = '\0';	/* null terminate */
2064				uiop->uio_iov->iov_base =
2065				    (char *)uiop->uio_iov->iov_base + tlen;
2066				uiop->uio_iov->iov_len -= tlen;
2067				uiop->uio_offset += tlen;
2068				uiop->uio_resid -= tlen;
2069			} else
2070				nfsm_adv(nfsm_rndup(len));
2071			if (v3) {
2072				tl = nfsm_dissect(u_int32_t *,
2073				    3 * NFSX_UNSIGNED);
2074			} else {
2075				tl = nfsm_dissect(u_int32_t *,
2076				    2 * NFSX_UNSIGNED);
2077			}
2078			if (bigenough) {
2079				cookie.nfsuquad[0] = *tl++;
2080				if (v3)
2081					cookie.nfsuquad[1] = *tl++;
2082			} else if (v3)
2083				tl += 2;
2084			else
2085				tl++;
2086			more_dirs = fxdr_unsigned(int, *tl);
2087		}
2088		/*
2089		 * If at end of rpc data, get the eof boolean
2090		 */
2091		if (!more_dirs) {
2092			tl = nfsm_dissect(u_int32_t *, NFSX_UNSIGNED);
2093			more_dirs = (fxdr_unsigned(int, *tl) == 0);
2094		}
2095		m_freem(mrep);
2096	}
2097	/*
2098	 * Fill last record, iff any, out to a multiple of DIRBLKSIZ
2099	 * by increasing d_reclen for the last record.
2100	 */
2101	if (blksiz > 0) {
2102		left = DIRBLKSIZ - blksiz;
2103		dp->d_reclen += left;
2104		uiop->uio_iov->iov_base =
2105		    (char *)uiop->uio_iov->iov_base + left;
2106		uiop->uio_iov->iov_len -= left;
2107		uiop->uio_offset += left;
2108		uiop->uio_resid -= left;
2109	}
2110
2111	/*
2112	 * We are now either at the end of the directory or have filled the
2113	 * block.
2114	 */
2115	if (bigenough)
2116		dnp->n_direofoffset = uiop->uio_offset;
2117	else {
2118		if (uiop->uio_resid > 0)
2119			printf("EEK! readdirrpc resid > 0\n");
2120		cookiep = nfs_getcookie(dnp, uiop->uio_offset, 1);
2121		*cookiep = cookie;
2122	}
2123nfsmout:
2124	return (error);
2125}
2126
2127/*
2128 * NFS V3 readdir plus RPC. Used in place of nfs_readdirrpc().
2129 */
2130int
2131nfs_readdirplusrpc(struct vnode *vp, struct uio *uiop, struct ucred *cred)
2132{
2133	int len, left;
2134	struct dirent *dp;
2135	u_int32_t *tl;
2136	caddr_t cp;
2137	struct vnode *newvp;
2138	nfsuint64 *cookiep;
2139	caddr_t bpos, dpos, dpossav1, dpossav2;
2140	struct mbuf *mreq, *mrep, *md, *mb, *mdsav1, *mdsav2;
2141	struct nameidata nami, *ndp = &nami;
2142	struct componentname *cnp = &ndp->ni_cnd;
2143	nfsuint64 cookie;
2144	struct nfsmount *nmp = VFSTONFS(vp->v_mount);
2145	struct nfsnode *dnp = VTONFS(vp), *np;
2146	nfsfh_t *fhp;
2147	u_quad_t fileno;
2148	int error = 0, tlen, more_dirs = 1, blksiz = 0, doit, bigenough = 1, i;
2149	int attrflag, fhsize;
2150
2151#ifndef nolint
2152	dp = NULL;
2153#endif
2154#ifndef DIAGNOSTIC
2155	if (uiop->uio_iovcnt != 1 || (uiop->uio_offset & (DIRBLKSIZ - 1)) ||
2156		(uiop->uio_resid & (DIRBLKSIZ - 1)))
2157		panic("nfs readdirplusrpc bad uio");
2158#endif
2159	ndp->ni_dvp = vp;
2160	newvp = NULLVP;
2161
2162	/*
2163	 * If there is no cookie, assume directory was stale.
2164	 */
2165	cookiep = nfs_getcookie(dnp, uiop->uio_offset, 0);
2166	if (cookiep)
2167		cookie = *cookiep;
2168	else
2169		return (NFSERR_BAD_COOKIE);
2170	/*
2171	 * Loop around doing readdir rpc's of size nm_readdirsize
2172	 * truncated to a multiple of DIRBLKSIZ.
2173	 * The stopping criteria is EOF or buffer full.
2174	 */
2175	while (more_dirs && bigenough) {
2176		nfsstats.rpccnt[NFSPROC_READDIRPLUS]++;
2177		mreq = nfsm_reqhead(vp, NFSPROC_READDIRPLUS,
2178			NFSX_FH(1) + 6 * NFSX_UNSIGNED);
2179		mb = mreq;
2180		bpos = mtod(mb, caddr_t);
2181		nfsm_fhtom(vp, 1);
2182 		tl = nfsm_build(u_int32_t *, 6 * NFSX_UNSIGNED);
2183		*tl++ = cookie.nfsuquad[0];
2184		*tl++ = cookie.nfsuquad[1];
2185		*tl++ = dnp->n_cookieverf.nfsuquad[0];
2186		*tl++ = dnp->n_cookieverf.nfsuquad[1];
2187		*tl++ = txdr_unsigned(nmp->nm_readdirsize);
2188		*tl = txdr_unsigned(nmp->nm_rsize);
2189		nfsm_request(vp, NFSPROC_READDIRPLUS, uiop->uio_td, cred);
2190		nfsm_postop_attr(vp, attrflag);
2191		if (error) {
2192			m_freem(mrep);
2193			goto nfsmout;
2194		}
2195		tl = nfsm_dissect(u_int32_t *, 3 * NFSX_UNSIGNED);
2196		dnp->n_cookieverf.nfsuquad[0] = *tl++;
2197		dnp->n_cookieverf.nfsuquad[1] = *tl++;
2198		more_dirs = fxdr_unsigned(int, *tl);
2199
2200		/* loop thru the dir entries, doctoring them to 4bsd form */
2201		while (more_dirs && bigenough) {
2202			tl = nfsm_dissect(u_int32_t *, 3 * NFSX_UNSIGNED);
2203			fileno = fxdr_hyper(tl);
2204			len = fxdr_unsigned(int, *(tl + 2));
2205			if (len <= 0 || len > NFS_MAXNAMLEN) {
2206				error = EBADRPC;
2207				m_freem(mrep);
2208				goto nfsmout;
2209			}
2210			tlen = nfsm_rndup(len);
2211			if (tlen == len)
2212				tlen += 4;	/* To ensure null termination*/
2213			left = DIRBLKSIZ - blksiz;
2214			if ((tlen + DIRHDSIZ) > left) {
2215				dp->d_reclen += left;
2216				uiop->uio_iov->iov_base =
2217				    (char *)uiop->uio_iov->iov_base + left;
2218				uiop->uio_iov->iov_len -= left;
2219				uiop->uio_offset += left;
2220				uiop->uio_resid -= left;
2221				blksiz = 0;
2222			}
2223			if ((tlen + DIRHDSIZ) > uiop->uio_resid)
2224				bigenough = 0;
2225			if (bigenough) {
2226				dp = (struct dirent *)uiop->uio_iov->iov_base;
2227				dp->d_fileno = (int)fileno;
2228				dp->d_namlen = len;
2229				dp->d_reclen = tlen + DIRHDSIZ;
2230				dp->d_type = DT_UNKNOWN;
2231				blksiz += dp->d_reclen;
2232				if (blksiz == DIRBLKSIZ)
2233					blksiz = 0;
2234				uiop->uio_offset += DIRHDSIZ;
2235				uiop->uio_resid -= DIRHDSIZ;
2236				uiop->uio_iov->iov_base =
2237				    (char *)uiop->uio_iov->iov_base + DIRHDSIZ;
2238				uiop->uio_iov->iov_len -= DIRHDSIZ;
2239				cnp->cn_nameptr = uiop->uio_iov->iov_base;
2240				cnp->cn_namelen = len;
2241				nfsm_mtouio(uiop, len);
2242				cp = uiop->uio_iov->iov_base;
2243				tlen -= len;
2244				*cp = '\0';
2245				uiop->uio_iov->iov_base =
2246				    (char *)uiop->uio_iov->iov_base + tlen;
2247				uiop->uio_iov->iov_len -= tlen;
2248				uiop->uio_offset += tlen;
2249				uiop->uio_resid -= tlen;
2250			} else
2251				nfsm_adv(nfsm_rndup(len));
2252			tl = nfsm_dissect(u_int32_t *, 3 * NFSX_UNSIGNED);
2253			if (bigenough) {
2254				cookie.nfsuquad[0] = *tl++;
2255				cookie.nfsuquad[1] = *tl++;
2256			} else
2257				tl += 2;
2258
2259			/*
2260			 * Since the attributes are before the file handle
2261			 * (sigh), we must skip over the attributes and then
2262			 * come back and get them.
2263			 */
2264			attrflag = fxdr_unsigned(int, *tl);
2265			if (attrflag) {
2266			    dpossav1 = dpos;
2267			    mdsav1 = md;
2268			    nfsm_adv(NFSX_V3FATTR);
2269			    tl = nfsm_dissect(u_int32_t *, NFSX_UNSIGNED);
2270			    doit = fxdr_unsigned(int, *tl);
2271			    /*
2272 			     * Skip loading the attrs for "..". There's a
2273 			     * race between loading the attrs here and
2274 			     * lookups that look for the directory currently
2275 			     * being read (in the parent). We try to acquire
2276 			     * the exclusive lock on ".." here, owning the
2277 			     * lock on the directory being read. Lookup will
2278 			     * hold the lock on ".." and try to acquire the
2279 			     * lock on the directory being read.
2280 			     *
2281 			     * There are other ways of fixing this, one would
2282 			     * be to do a trylock on the ".." vnode and skip
2283 			     * loading the attrs on ".." if it happens to be
2284 			     * locked by another process. But skipping the
2285 			     * attrload on ".." seems the easiest option.
2286 			     */
2287 			    if (strcmp(dp->d_name, "..") == 0) {
2288 				    doit = 0;
2289 				    /*
2290 				     * We've already skipped over the attrs,
2291 				     * skip over the filehandle. And store d_type
2292 				     * as VDIR.
2293 				     */
2294 				    tl = nfsm_dissect(u_int32_t *, NFSX_UNSIGNED);
2295 				    i = fxdr_unsigned(int, *tl);
2296 				    nfsm_adv(nfsm_rndup(i));
2297 				    dp->d_type = IFTODT(VTTOIF(VDIR));
2298 			    }
2299			    if (doit) {
2300				nfsm_getfh(fhp, fhsize, 1);
2301				if (NFS_CMPFH(dnp, fhp, fhsize)) {
2302				    VREF(vp);
2303				    newvp = vp;
2304				    np = dnp;
2305				} else {
2306				    error = nfs_nget(vp->v_mount, fhp,
2307					fhsize, &np);
2308				    if (error)
2309					doit = 0;
2310				    else
2311					newvp = NFSTOV(np);
2312				}
2313			    }
2314			    if (doit && bigenough) {
2315				dpossav2 = dpos;
2316				dpos = dpossav1;
2317				mdsav2 = md;
2318				md = mdsav1;
2319				nfsm_loadattr(newvp, NULL);
2320				dpos = dpossav2;
2321				md = mdsav2;
2322				dp->d_type =
2323				    IFTODT(VTTOIF(np->n_vattr.va_type));
2324				ndp->ni_vp = newvp;
2325			        cache_enter(ndp->ni_dvp, ndp->ni_vp, cnp);
2326			    }
2327			} else {
2328			    /* Just skip over the file handle */
2329			    tl = nfsm_dissect(u_int32_t *, NFSX_UNSIGNED);
2330			    i = fxdr_unsigned(int, *tl);
2331			    nfsm_adv(nfsm_rndup(i));
2332			}
2333			if (newvp != NULLVP) {
2334			    if (newvp == vp)
2335				vrele(newvp);
2336			    else
2337				vput(newvp);
2338			    newvp = NULLVP;
2339			}
2340			tl = nfsm_dissect(u_int32_t *, NFSX_UNSIGNED);
2341			more_dirs = fxdr_unsigned(int, *tl);
2342		}
2343		/*
2344		 * If at end of rpc data, get the eof boolean
2345		 */
2346		if (!more_dirs) {
2347			tl = nfsm_dissect(u_int32_t *, NFSX_UNSIGNED);
2348			more_dirs = (fxdr_unsigned(int, *tl) == 0);
2349		}
2350		m_freem(mrep);
2351	}
2352	/*
2353	 * Fill last record, iff any, out to a multiple of DIRBLKSIZ
2354	 * by increasing d_reclen for the last record.
2355	 */
2356	if (blksiz > 0) {
2357		left = DIRBLKSIZ - blksiz;
2358		dp->d_reclen += left;
2359		uiop->uio_iov->iov_base =
2360		    (char *)uiop->uio_iov->iov_base + left;
2361		uiop->uio_iov->iov_len -= left;
2362		uiop->uio_offset += left;
2363		uiop->uio_resid -= left;
2364	}
2365
2366	/*
2367	 * We are now either at the end of the directory or have filled the
2368	 * block.
2369	 */
2370	if (bigenough)
2371		dnp->n_direofoffset = uiop->uio_offset;
2372	else {
2373		if (uiop->uio_resid > 0)
2374			printf("EEK! readdirplusrpc resid > 0\n");
2375		cookiep = nfs_getcookie(dnp, uiop->uio_offset, 1);
2376		*cookiep = cookie;
2377	}
2378nfsmout:
2379	if (newvp != NULLVP) {
2380	        if (newvp == vp)
2381			vrele(newvp);
2382		else
2383			vput(newvp);
2384		newvp = NULLVP;
2385	}
2386	return (error);
2387}
2388
2389/*
2390 * Silly rename. To make the NFS filesystem that is stateless look a little
2391 * more like the "ufs" a remove of an active vnode is translated to a rename
2392 * to a funny looking filename that is removed by nfs_inactive on the
2393 * nfsnode. There is the potential for another process on a different client
2394 * to create the same funny name between the nfs_lookitup() fails and the
2395 * nfs_rename() completes, but...
2396 */
2397static int
2398nfs_sillyrename(struct vnode *dvp, struct vnode *vp, struct componentname *cnp)
2399{
2400	struct sillyrename *sp;
2401	struct nfsnode *np;
2402	int error;
2403	short pid;
2404
2405	cache_purge(dvp);
2406	np = VTONFS(vp);
2407#ifndef DIAGNOSTIC
2408	if (vp->v_type == VDIR)
2409		panic("nfs: sillyrename dir");
2410#endif
2411	MALLOC(sp, struct sillyrename *, sizeof (struct sillyrename),
2412		M_NFSREQ, M_WAITOK);
2413	sp->s_cred = crhold(cnp->cn_cred);
2414	sp->s_dvp = dvp;
2415	sp->s_removeit = nfs_removeit;
2416	VREF(dvp);
2417
2418	/* Fudge together a funny name */
2419	pid = cnp->cn_thread->td_proc->p_pid;
2420	sp->s_namlen = sprintf(sp->s_name, ".nfsA%04x4.4", pid);
2421
2422	/* Try lookitups until we get one that isn't there */
2423	while (nfs_lookitup(dvp, sp->s_name, sp->s_namlen, sp->s_cred,
2424		cnp->cn_thread, NULL) == 0) {
2425		sp->s_name[4]++;
2426		if (sp->s_name[4] > 'z') {
2427			error = EINVAL;
2428			goto bad;
2429		}
2430	}
2431	error = nfs_renameit(dvp, cnp, sp);
2432	if (error)
2433		goto bad;
2434	error = nfs_lookitup(dvp, sp->s_name, sp->s_namlen, sp->s_cred,
2435		cnp->cn_thread, &np);
2436	np->n_sillyrename = sp;
2437	return (0);
2438bad:
2439	vrele(sp->s_dvp);
2440	crfree(sp->s_cred);
2441	free((caddr_t)sp, M_NFSREQ);
2442	return (error);
2443}
2444
2445/*
2446 * Look up a file name and optionally either update the file handle or
2447 * allocate an nfsnode, depending on the value of npp.
2448 * npp == NULL	--> just do the lookup
2449 * *npp == NULL --> allocate a new nfsnode and make sure attributes are
2450 *			handled too
2451 * *npp != NULL --> update the file handle in the vnode
2452 */
2453static int
2454nfs_lookitup(struct vnode *dvp, const char *name, int len, struct ucred *cred,
2455    struct thread *td, struct nfsnode **npp)
2456{
2457	struct vnode *newvp = NULL;
2458	struct nfsnode *np, *dnp = VTONFS(dvp);
2459	caddr_t bpos, dpos;
2460	int error = 0, fhlen, attrflag;
2461	struct mbuf *mreq, *mrep, *md, *mb;
2462	nfsfh_t *nfhp;
2463	int v3 = NFS_ISV3(dvp);
2464
2465	nfsstats.rpccnt[NFSPROC_LOOKUP]++;
2466	mreq = nfsm_reqhead(dvp, NFSPROC_LOOKUP,
2467		NFSX_FH(v3) + NFSX_UNSIGNED + nfsm_rndup(len));
2468	mb = mreq;
2469	bpos = mtod(mb, caddr_t);
2470	nfsm_fhtom(dvp, v3);
2471	nfsm_strtom(name, len, NFS_MAXNAMLEN);
2472	nfsm_request(dvp, NFSPROC_LOOKUP, td, cred);
2473	if (npp && !error) {
2474		nfsm_getfh(nfhp, fhlen, v3);
2475		if (*npp) {
2476		    np = *npp;
2477		    if (np->n_fhsize > NFS_SMALLFH && fhlen <= NFS_SMALLFH) {
2478			free((caddr_t)np->n_fhp, M_NFSBIGFH);
2479			np->n_fhp = &np->n_fh;
2480		    } else if (np->n_fhsize <= NFS_SMALLFH && fhlen>NFS_SMALLFH)
2481			np->n_fhp =(nfsfh_t *)malloc(fhlen, M_NFSBIGFH, M_WAITOK);
2482		    bcopy((caddr_t)nfhp, (caddr_t)np->n_fhp, fhlen);
2483		    np->n_fhsize = fhlen;
2484		    newvp = NFSTOV(np);
2485		} else if (NFS_CMPFH(dnp, nfhp, fhlen)) {
2486		    VREF(dvp);
2487		    newvp = dvp;
2488		} else {
2489		    error = nfs_nget(dvp->v_mount, nfhp, fhlen, &np);
2490		    if (error) {
2491			m_freem(mrep);
2492			return (error);
2493		    }
2494		    newvp = NFSTOV(np);
2495		}
2496		if (v3) {
2497			nfsm_postop_attr(newvp, attrflag);
2498			if (!attrflag && *npp == NULL) {
2499				m_freem(mrep);
2500				if (newvp == dvp)
2501					vrele(newvp);
2502				else
2503					vput(newvp);
2504				return (ENOENT);
2505			}
2506		} else
2507			nfsm_loadattr(newvp, NULL);
2508	}
2509	m_freem(mrep);
2510nfsmout:
2511	if (npp && *npp == NULL) {
2512		if (error) {
2513			if (newvp) {
2514				if (newvp == dvp)
2515					vrele(newvp);
2516				else
2517					vput(newvp);
2518			}
2519		} else
2520			*npp = np;
2521	}
2522	return (error);
2523}
2524
2525/*
2526 * Nfs Version 3 commit rpc
2527 */
2528int
2529nfs_commit(struct vnode *vp, u_quad_t offset, int cnt, struct ucred *cred,
2530    struct thread *td)
2531{
2532	u_int32_t *tl;
2533	struct nfsmount *nmp = VFSTONFS(vp->v_mount);
2534	caddr_t bpos, dpos;
2535	int error = 0, wccflag = NFSV3_WCCRATTR;
2536	struct mbuf *mreq, *mrep, *md, *mb;
2537
2538	if ((nmp->nm_state & NFSSTA_HASWRITEVERF) == 0)
2539		return (0);
2540	nfsstats.rpccnt[NFSPROC_COMMIT]++;
2541	mreq = nfsm_reqhead(vp, NFSPROC_COMMIT, NFSX_FH(1));
2542	mb = mreq;
2543	bpos = mtod(mb, caddr_t);
2544	nfsm_fhtom(vp, 1);
2545	tl = nfsm_build(u_int32_t *, 3 * NFSX_UNSIGNED);
2546	txdr_hyper(offset, tl);
2547	tl += 2;
2548	*tl = txdr_unsigned(cnt);
2549	nfsm_request(vp, NFSPROC_COMMIT, td, cred);
2550	nfsm_wcc_data(vp, wccflag);
2551	if (!error) {
2552		tl = nfsm_dissect(u_int32_t *, NFSX_V3WRITEVERF);
2553		if (bcmp((caddr_t)nmp->nm_verf, (caddr_t)tl,
2554			NFSX_V3WRITEVERF)) {
2555			bcopy((caddr_t)tl, (caddr_t)nmp->nm_verf,
2556				NFSX_V3WRITEVERF);
2557			error = NFSERR_STALEWRITEVERF;
2558		}
2559	}
2560	m_freem(mrep);
2561nfsmout:
2562	return (error);
2563}
2564
2565/*
2566 * Strategy routine.
2567 * For async requests when nfsiod(s) are running, queue the request by
2568 * calling nfs_asyncio(), otherwise just all nfs_doio() to do the
2569 * request.
2570 */
2571static int
2572nfs_strategy(struct vop_strategy_args *ap)
2573{
2574	struct buf *bp = ap->a_bp;
2575	struct ucred *cr;
2576	struct thread *td;
2577	int error = 0;
2578
2579	KASSERT(!(bp->b_flags & B_DONE), ("nfs_strategy: buffer %p unexpectedly marked B_DONE", bp));
2580	KASSERT(BUF_REFCNT(bp) > 0, ("nfs_strategy: buffer %p not locked", bp));
2581
2582	if (bp->b_flags & B_ASYNC)
2583		td = NULL;
2584	else
2585		td = curthread;	/* XXX */
2586
2587	if (bp->b_iocmd == BIO_READ)
2588		cr = bp->b_rcred;
2589	else
2590		cr = bp->b_wcred;
2591
2592	/*
2593	 * If the op is asynchronous and an i/o daemon is waiting
2594	 * queue the request, wake it up and wait for completion
2595	 * otherwise just do it ourselves.
2596	 */
2597	if ((bp->b_flags & B_ASYNC) == 0 ||
2598		nfs_asyncio(VFSTONFS(ap->a_vp->v_mount), bp, NOCRED, td))
2599		error = nfs_doio(ap->a_vp, bp, cr, td);
2600	return (error);
2601}
2602
2603/*
2604 * fsync vnode op. Just call nfs_flush() with commit == 1.
2605 */
2606/* ARGSUSED */
2607static int
2608nfs_fsync(struct vop_fsync_args *ap)
2609{
2610
2611	return (nfs_flush(ap->a_vp, ap->a_cred, ap->a_waitfor, ap->a_td, 1));
2612}
2613
2614/*
2615 * Flush all the blocks associated with a vnode.
2616 * 	Walk through the buffer pool and push any dirty pages
2617 *	associated with the vnode.
2618 */
2619static int
2620nfs_flush(struct vnode *vp, struct ucred *cred, int waitfor, struct thread *td,
2621    int commit)
2622{
2623	struct nfsnode *np = VTONFS(vp);
2624	struct buf *bp;
2625	int i;
2626	struct buf *nbp;
2627	struct nfsmount *nmp = VFSTONFS(vp->v_mount);
2628	int s, error = 0, slptimeo = 0, slpflag = 0, retv, bvecpos;
2629	int passone = 1;
2630	u_quad_t off, endoff, toff;
2631	struct ucred* wcred = NULL;
2632	struct buf **bvec = NULL;
2633#ifndef NFS_COMMITBVECSIZ
2634#define NFS_COMMITBVECSIZ	20
2635#endif
2636	struct buf *bvec_on_stack[NFS_COMMITBVECSIZ];
2637	int bvecsize = 0, bveccount;
2638
2639	if (nmp->nm_flag & NFSMNT_INT)
2640		slpflag = PCATCH;
2641	if (!commit)
2642		passone = 0;
2643	/*
2644	 * A b_flags == (B_DELWRI | B_NEEDCOMMIT) block has been written to the
2645	 * server, but nas not been committed to stable storage on the server
2646	 * yet. On the first pass, the byte range is worked out and the commit
2647	 * rpc is done. On the second pass, nfs_writebp() is called to do the
2648	 * job.
2649	 */
2650again:
2651	off = (u_quad_t)-1;
2652	endoff = 0;
2653	bvecpos = 0;
2654	if (NFS_ISV3(vp) && commit) {
2655		s = splbio();
2656		if (bvec != NULL && bvec != bvec_on_stack)
2657			free(bvec, M_TEMP);
2658		/*
2659		 * Count up how many buffers waiting for a commit.
2660		 */
2661		bveccount = 0;
2662		VI_LOCK(vp);
2663		TAILQ_FOREACH_SAFE(bp, &vp->v_bufobj.bo_dirty.bv_hd, b_bobufs, nbp) {
2664			if (BUF_REFCNT(bp) == 0 &&
2665			    (bp->b_flags & (B_DELWRI | B_NEEDCOMMIT))
2666				== (B_DELWRI | B_NEEDCOMMIT))
2667				bveccount++;
2668		}
2669		/*
2670		 * Allocate space to remember the list of bufs to commit.  It is
2671		 * important to use M_NOWAIT here to avoid a race with nfs_write.
2672		 * If we can't get memory (for whatever reason), we will end up
2673		 * committing the buffers one-by-one in the loop below.
2674		 */
2675		if (bveccount > NFS_COMMITBVECSIZ) {
2676			/*
2677			 * Release the vnode interlock to avoid a lock
2678			 * order reversal.
2679			 */
2680			VI_UNLOCK(vp);
2681			bvec = (struct buf **)
2682				malloc(bveccount * sizeof(struct buf *),
2683				       M_TEMP, M_NOWAIT);
2684			VI_LOCK(vp);
2685			if (bvec == NULL) {
2686				bvec = bvec_on_stack;
2687				bvecsize = NFS_COMMITBVECSIZ;
2688			} else
2689				bvecsize = bveccount;
2690		} else {
2691			bvec = bvec_on_stack;
2692			bvecsize = NFS_COMMITBVECSIZ;
2693		}
2694		TAILQ_FOREACH_SAFE(bp, &vp->v_bufobj.bo_dirty.bv_hd, b_bobufs, nbp) {
2695			if (bvecpos >= bvecsize)
2696				break;
2697			if (BUF_LOCK(bp, LK_EXCLUSIVE | LK_NOWAIT, NULL)) {
2698				nbp = TAILQ_NEXT(bp, b_bobufs);
2699				continue;
2700			}
2701			if ((bp->b_flags & (B_DELWRI | B_NEEDCOMMIT)) !=
2702			    (B_DELWRI | B_NEEDCOMMIT)) {
2703				BUF_UNLOCK(bp);
2704				nbp = TAILQ_NEXT(bp, b_bobufs);
2705				continue;
2706			}
2707			VI_UNLOCK(vp);
2708			bremfree(bp);
2709			/*
2710			 * Work out if all buffers are using the same cred
2711			 * so we can deal with them all with one commit.
2712			 *
2713			 * NOTE: we are not clearing B_DONE here, so we have
2714			 * to do it later on in this routine if we intend to
2715			 * initiate I/O on the bp.
2716			 *
2717			 * Note: to avoid loopback deadlocks, we do not
2718			 * assign b_runningbufspace.
2719			 */
2720			if (wcred == NULL)
2721				wcred = bp->b_wcred;
2722			else if (wcred != bp->b_wcred)
2723				wcred = NOCRED;
2724			vfs_busy_pages(bp, 1);
2725
2726			VI_LOCK(vp);
2727			/*
2728			 * bp is protected by being locked, but nbp is not
2729			 * and vfs_busy_pages() may sleep.  We have to
2730			 * recalculate nbp.
2731			 */
2732			nbp = TAILQ_NEXT(bp, b_bobufs);
2733
2734			/*
2735			 * A list of these buffers is kept so that the
2736			 * second loop knows which buffers have actually
2737			 * been committed. This is necessary, since there
2738			 * may be a race between the commit rpc and new
2739			 * uncommitted writes on the file.
2740			 */
2741			bvec[bvecpos++] = bp;
2742			toff = ((u_quad_t)bp->b_blkno) * DEV_BSIZE +
2743				bp->b_dirtyoff;
2744			if (toff < off)
2745				off = toff;
2746			toff += (u_quad_t)(bp->b_dirtyend - bp->b_dirtyoff);
2747			if (toff > endoff)
2748				endoff = toff;
2749		}
2750		splx(s);
2751		VI_UNLOCK(vp);
2752	}
2753	if (bvecpos > 0) {
2754		/*
2755		 * Commit data on the server, as required.
2756		 * If all bufs are using the same wcred, then use that with
2757		 * one call for all of them, otherwise commit each one
2758		 * separately.
2759		 */
2760		if (wcred != NOCRED)
2761			retv = nfs_commit(vp, off, (int)(endoff - off),
2762					  wcred, td);
2763		else {
2764			retv = 0;
2765			for (i = 0; i < bvecpos; i++) {
2766				off_t off, size;
2767				bp = bvec[i];
2768				off = ((u_quad_t)bp->b_blkno) * DEV_BSIZE +
2769					bp->b_dirtyoff;
2770				size = (u_quad_t)(bp->b_dirtyend
2771						  - bp->b_dirtyoff);
2772				retv = nfs_commit(vp, off, (int)size,
2773						  bp->b_wcred, td);
2774				if (retv) break;
2775			}
2776		}
2777
2778		if (retv == NFSERR_STALEWRITEVERF)
2779			nfs_clearcommit(vp->v_mount);
2780
2781		/*
2782		 * Now, either mark the blocks I/O done or mark the
2783		 * blocks dirty, depending on whether the commit
2784		 * succeeded.
2785		 */
2786		for (i = 0; i < bvecpos; i++) {
2787			bp = bvec[i];
2788			bp->b_flags &= ~(B_NEEDCOMMIT | B_CLUSTEROK);
2789			if (retv) {
2790				/*
2791				 * Error, leave B_DELWRI intact
2792				 */
2793				vfs_unbusy_pages(bp);
2794				brelse(bp);
2795			} else {
2796				/*
2797				 * Success, remove B_DELWRI ( bundirty() ).
2798				 *
2799				 * b_dirtyoff/b_dirtyend seem to be NFS
2800				 * specific.  We should probably move that
2801				 * into bundirty(). XXX
2802				 */
2803				s = splbio();
2804				bufobj_wref(&vp->v_bufobj);
2805				bp->b_flags |= B_ASYNC;
2806				bundirty(bp);
2807				bp->b_flags &= ~B_DONE;
2808				bp->b_ioflags &= ~BIO_ERROR;
2809				bp->b_dirtyoff = bp->b_dirtyend = 0;
2810				splx(s);
2811				bufdone(bp);
2812			}
2813		}
2814	}
2815
2816	/*
2817	 * Start/do any write(s) that are required.
2818	 */
2819loop:
2820	s = splbio();
2821	VI_LOCK(vp);
2822	TAILQ_FOREACH_SAFE(bp, &vp->v_bufobj.bo_dirty.bv_hd, b_bobufs, nbp) {
2823		if (BUF_LOCK(bp, LK_EXCLUSIVE | LK_NOWAIT, NULL)) {
2824			if (waitfor != MNT_WAIT || passone)
2825				continue;
2826
2827			error = BUF_TIMELOCK(bp,
2828			    LK_EXCLUSIVE | LK_SLEEPFAIL | LK_INTERLOCK,
2829			    VI_MTX(vp), "nfsfsync", slpflag, slptimeo);
2830			splx(s);
2831			if (error == 0)
2832				panic("nfs_fsync: inconsistent lock");
2833			if (error == ENOLCK)
2834				goto loop;
2835			if (nfs_sigintr(nmp, NULL, td)) {
2836				error = EINTR;
2837				goto done;
2838			}
2839			if (slpflag == PCATCH) {
2840				slpflag = 0;
2841				slptimeo = 2 * hz;
2842			}
2843			goto loop;
2844		}
2845		if ((bp->b_flags & B_DELWRI) == 0)
2846			panic("nfs_fsync: not dirty");
2847		if ((passone || !commit) && (bp->b_flags & B_NEEDCOMMIT)) {
2848			BUF_UNLOCK(bp);
2849			continue;
2850		}
2851		VI_UNLOCK(vp);
2852		bremfree(bp);
2853		if (passone || !commit)
2854		    bp->b_flags |= B_ASYNC;
2855		else
2856		    bp->b_flags |= B_ASYNC;
2857		splx(s);
2858		bwrite(bp);
2859		if (nfs_sigintr(nmp, NULL, td)) {
2860			error = EINTR;
2861			goto done;
2862		}
2863		goto loop;
2864	}
2865	splx(s);
2866	if (passone) {
2867		passone = 0;
2868		VI_UNLOCK(vp);
2869		goto again;
2870	}
2871	if (waitfor == MNT_WAIT) {
2872		while (vp->v_bufobj.bo_numoutput) {
2873			error = bufobj_wwait(&vp->v_bufobj, slpflag, slptimeo);
2874			if (error) {
2875			    VI_UNLOCK(vp);
2876			    error = nfs_sigintr(nmp, NULL, td);
2877			    if (error)
2878				goto done;
2879			    if (slpflag == PCATCH) {
2880				slpflag = 0;
2881				slptimeo = 2 * hz;
2882			    }
2883			    VI_LOCK(vp);
2884			}
2885		}
2886		if (vp->v_bufobj.bo_dirty.bv_cnt != 0 && commit) {
2887			VI_UNLOCK(vp);
2888			goto loop;
2889		}
2890	}
2891	VI_UNLOCK(vp);
2892	if (np->n_flag & NWRITEERR) {
2893		error = np->n_error;
2894		np->n_flag &= ~NWRITEERR;
2895	}
2896  	if (commit && vp->v_bufobj.bo_dirty.bv_cnt == 0)
2897  		np->n_flag &= ~NMODIFIED;
2898done:
2899	if (bvec != NULL && bvec != bvec_on_stack)
2900		free(bvec, M_TEMP);
2901	return (error);
2902}
2903
2904/*
2905 * NFS advisory byte-level locks.
2906 */
2907static int
2908nfs_advlock(struct vop_advlock_args *ap)
2909{
2910
2911	if ((VFSTONFS(ap->a_vp->v_mount)->nm_flag & NFSMNT_NOLOCKD) != 0) {
2912		struct nfsnode *np = VTONFS(ap->a_vp);
2913
2914		return (lf_advlock(ap, &(np->n_lockf), np->n_size));
2915	}
2916	return (nfs_dolock(ap));
2917}
2918
2919/*
2920 * Print out the contents of an nfsnode.
2921 */
2922static int
2923nfs_print(struct vop_print_args *ap)
2924{
2925	struct vnode *vp = ap->a_vp;
2926	struct nfsnode *np = VTONFS(vp);
2927
2928	printf("\tfileid %ld fsid 0x%x",
2929	   np->n_vattr.va_fileid, np->n_vattr.va_fsid);
2930	if (vp->v_type == VFIFO)
2931		fifo_printinfo(vp);
2932	printf("\n");
2933	return (0);
2934}
2935
2936/*
2937 * This is the "real" nfs::bwrite(struct buf*).
2938 * We set B_CACHE if this is a VMIO buffer.
2939 */
2940int
2941nfs_writebp(struct buf *bp, int force __unused, struct thread *td)
2942{
2943	int s;
2944	int oldflags = bp->b_flags;
2945#if 0
2946	int retv = 1;
2947	off_t off;
2948#endif
2949
2950	if (BUF_REFCNT(bp) == 0)
2951		panic("bwrite: buffer is not locked???");
2952
2953	if (bp->b_flags & B_INVAL) {
2954		brelse(bp);
2955		return(0);
2956	}
2957
2958	bp->b_flags |= B_CACHE;
2959
2960	/*
2961	 * Undirty the bp.  We will redirty it later if the I/O fails.
2962	 */
2963
2964	s = splbio();
2965	bundirty(bp);
2966	bp->b_flags &= ~B_DONE;
2967	bp->b_ioflags &= ~BIO_ERROR;
2968	bp->b_iocmd = BIO_WRITE;
2969
2970	bufobj_wref(bp->b_bufobj);
2971	curthread->td_proc->p_stats->p_ru.ru_oublock++;
2972	splx(s);
2973
2974	/*
2975	 * Note: to avoid loopback deadlocks, we do not
2976	 * assign b_runningbufspace.
2977	 */
2978	vfs_busy_pages(bp, 1);
2979
2980	BUF_KERNPROC(bp);
2981	bp->b_iooffset = dbtob(bp->b_blkno);
2982	bstrategy(bp);
2983
2984	if( (oldflags & B_ASYNC) == 0) {
2985		int rtval = bufwait(bp);
2986
2987		if (oldflags & B_DELWRI) {
2988			s = splbio();
2989			reassignbuf(bp);
2990			splx(s);
2991		}
2992
2993		brelse(bp);
2994		return (rtval);
2995	}
2996
2997	return (0);
2998}
2999
3000/*
3001 * nfs special file access vnode op.
3002 * Essentially just get vattr and then imitate iaccess() since the device is
3003 * local to the client.
3004 */
3005static int
3006nfsspec_access(struct vop_access_args *ap)
3007{
3008	struct vattr *vap;
3009	struct ucred *cred = ap->a_cred;
3010	struct vnode *vp = ap->a_vp;
3011	mode_t mode = ap->a_mode;
3012	struct vattr vattr;
3013	int error;
3014
3015	/*
3016	 * Disallow write attempts on filesystems mounted read-only;
3017	 * unless the file is a socket, fifo, or a block or character
3018	 * device resident on the filesystem.
3019	 */
3020	if ((mode & VWRITE) && (vp->v_mount->mnt_flag & MNT_RDONLY)) {
3021		switch (vp->v_type) {
3022		case VREG:
3023		case VDIR:
3024		case VLNK:
3025			return (EROFS);
3026		default:
3027			break;
3028		}
3029	}
3030	vap = &vattr;
3031	error = VOP_GETATTR(vp, vap, cred, ap->a_td);
3032	if (error)
3033		return (error);
3034	return (vaccess(vp->v_type, vap->va_mode, vap->va_uid, vap->va_gid,
3035	    mode, cred, NULL));
3036}
3037
3038/*
3039 * Read wrapper for fifos.
3040 */
3041static int
3042nfsfifo_read(struct vop_read_args *ap)
3043{
3044	struct nfsnode *np = VTONFS(ap->a_vp);
3045
3046	/*
3047	 * Set access flag.
3048	 */
3049	np->n_flag |= NACC;
3050	getnanotime(&np->n_atim);
3051	return (vop_read(&fifo_specops, ap));
3052}
3053
3054/*
3055 * Write wrapper for fifos.
3056 */
3057static int
3058nfsfifo_write(struct vop_write_args *ap)
3059{
3060	struct nfsnode *np = VTONFS(ap->a_vp);
3061
3062	/*
3063	 * Set update flag.
3064	 */
3065	np->n_flag |= NUPD;
3066	getnanotime(&np->n_mtim);
3067	return (vop_write(&fifo_specops, ap));
3068}
3069
3070/*
3071 * Close wrapper for fifos.
3072 *
3073 * Update the times on the nfsnode then do fifo close.
3074 */
3075static int
3076nfsfifo_close(struct vop_close_args *ap)
3077{
3078	struct vnode *vp = ap->a_vp;
3079	struct nfsnode *np = VTONFS(vp);
3080	struct vattr vattr;
3081	struct timespec ts;
3082
3083	if (np->n_flag & (NACC | NUPD)) {
3084		getnanotime(&ts);
3085		if (np->n_flag & NACC)
3086			np->n_atim = ts;
3087		if (np->n_flag & NUPD)
3088			np->n_mtim = ts;
3089		np->n_flag |= NCHG;
3090		if (vrefcnt(vp) == 1 &&
3091		    (vp->v_mount->mnt_flag & MNT_RDONLY) == 0) {
3092			VATTR_NULL(&vattr);
3093			if (np->n_flag & NACC)
3094				vattr.va_atime = np->n_atim;
3095			if (np->n_flag & NUPD)
3096				vattr.va_mtime = np->n_mtim;
3097			vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, ap->a_td);
3098			(void)VOP_SETATTR(vp, &vattr, ap->a_cred, ap->a_td);
3099			VOP_UNLOCK(vp, 0, ap->a_td);
3100		}
3101	}
3102	return (vop_close(&fifo_specops, ap));
3103}
3104
3105/*
3106 * Just call nfs_writebp() with the force argument set to 1.
3107 *
3108 * NOTE: B_DONE may or may not be set in a_bp on call.
3109 */
3110static int
3111nfs_bwrite(struct buf *bp)
3112{
3113
3114	return (nfs_writebp(bp, 1, curthread));
3115}
3116
3117struct buf_ops buf_ops_nfs = {
3118	.bop_name	=	"buf_ops_nfs",
3119	.bop_write	=	nfs_bwrite,
3120	.bop_strategy	=	bufstrategy,
3121};
3122