nfs_vnops.c revision 59391
1/*
2 * Copyright (c) 1989, 1993
3 *	The Regents of the University of California.  All rights reserved.
4 *
5 * This code is derived from software contributed to Berkeley by
6 * Rick Macklem at The University of Guelph.
7 *
8 * Redistribution and use in source and binary forms, with or without
9 * modification, are permitted provided that the following conditions
10 * are met:
11 * 1. Redistributions of source code must retain the above copyright
12 *    notice, this list of conditions and the following disclaimer.
13 * 2. Redistributions in binary form must reproduce the above copyright
14 *    notice, this list of conditions and the following disclaimer in the
15 *    documentation and/or other materials provided with the distribution.
16 * 3. All advertising materials mentioning features or use of this software
17 *    must display the following acknowledgement:
18 *	This product includes software developed by the University of
19 *	California, Berkeley and its contributors.
20 * 4. Neither the name of the University nor the names of its contributors
21 *    may be used to endorse or promote products derived from this software
22 *    without specific prior written permission.
23 *
24 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
25 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
26 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
27 * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
28 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
29 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
30 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
31 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
32 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
33 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
34 * SUCH DAMAGE.
35 *
36 *	@(#)nfs_vnops.c	8.16 (Berkeley) 5/27/95
37 * $FreeBSD: head/sys/nfsclient/nfs_vnops.c 59391 2000-04-19 14:58:28Z phk $
38 */
39
40
41/*
42 * vnode op calls for Sun NFS version 2 and 3
43 */
44
45#include "opt_inet.h"
46
47#include <sys/param.h>
48#include <sys/kernel.h>
49#include <sys/systm.h>
50#include <sys/resourcevar.h>
51#include <sys/proc.h>
52#include <sys/mount.h>
53#include <sys/buf.h>
54#include <sys/malloc.h>
55#include <sys/mbuf.h>
56#include <sys/namei.h>
57#include <sys/socket.h>
58#include <sys/vnode.h>
59#include <sys/dirent.h>
60#include <sys/fcntl.h>
61#include <sys/lockf.h>
62#include <sys/stat.h>
63#include <sys/sysctl.h>
64
65#include <vm/vm.h>
66#include <vm/vm_extern.h>
67#include <vm/vm_zone.h>
68
69#include <miscfs/fifofs/fifo.h>
70
71#include <nfs/rpcv2.h>
72#include <nfs/nfsproto.h>
73#include <nfs/nfs.h>
74#include <nfs/nfsnode.h>
75#include <nfs/nfsmount.h>
76#include <nfs/xdr_subs.h>
77#include <nfs/nfsm_subs.h>
78#include <nfs/nqnfs.h>
79
80#include <net/if.h>
81#include <netinet/in.h>
82#include <netinet/in_var.h>
83
84/* Defs */
85#define	TRUE	1
86#define	FALSE	0
87
88/*
89 * Ifdef for FreeBSD-current merged buffer cache. It is unfortunate that these
90 * calls are not in getblk() and brelse() so that they would not be necessary
91 * here.
92 */
93#ifndef B_VMIO
94#define vfs_busy_pages(bp, f)
95#endif
96
97static int	nfsspec_read __P((struct vop_read_args *));
98static int	nfsspec_write __P((struct vop_write_args *));
99static int	nfsfifo_read __P((struct vop_read_args *));
100static int	nfsfifo_write __P((struct vop_write_args *));
101static int	nfsspec_close __P((struct vop_close_args *));
102static int	nfsfifo_close __P((struct vop_close_args *));
103#define nfs_poll vop_nopoll
104static int	nfs_flush __P((struct vnode *,struct ucred *,int,struct proc *,int));
105static int	nfs_setattrrpc __P((struct vnode *,struct vattr *,struct ucred *,struct proc *));
106static	int	nfs_lookup __P((struct vop_lookup_args *));
107static	int	nfs_create __P((struct vop_create_args *));
108static	int	nfs_mknod __P((struct vop_mknod_args *));
109static	int	nfs_open __P((struct vop_open_args *));
110static	int	nfs_close __P((struct vop_close_args *));
111static	int	nfs_access __P((struct vop_access_args *));
112static	int	nfs_getattr __P((struct vop_getattr_args *));
113static	int	nfs_setattr __P((struct vop_setattr_args *));
114static	int	nfs_read __P((struct vop_read_args *));
115static	int	nfs_mmap __P((struct vop_mmap_args *));
116static	int	nfs_fsync __P((struct vop_fsync_args *));
117static	int	nfs_remove __P((struct vop_remove_args *));
118static	int	nfs_link __P((struct vop_link_args *));
119static	int	nfs_rename __P((struct vop_rename_args *));
120static	int	nfs_mkdir __P((struct vop_mkdir_args *));
121static	int	nfs_rmdir __P((struct vop_rmdir_args *));
122static	int	nfs_symlink __P((struct vop_symlink_args *));
123static	int	nfs_readdir __P((struct vop_readdir_args *));
124static	int	nfs_bmap __P((struct vop_bmap_args *));
125static	int	nfs_strategy __P((struct vop_strategy_args *));
126static	int	nfs_lookitup __P((struct vnode *, const char *, int,
127			struct ucred *, struct proc *, struct nfsnode **));
128static	int	nfs_sillyrename __P((struct vnode *,struct vnode *,struct componentname *));
129static int	nfsspec_access __P((struct vop_access_args *));
130static int	nfs_readlink __P((struct vop_readlink_args *));
131static int	nfs_print __P((struct vop_print_args *));
132static int	nfs_advlock __P((struct vop_advlock_args *));
133static int	nfs_bwrite __P((struct vop_bwrite_args *));
134/*
135 * Global vfs data structures for nfs
136 */
137vop_t **nfsv2_vnodeop_p;
138static struct vnodeopv_entry_desc nfsv2_vnodeop_entries[] = {
139	{ &vop_default_desc,		(vop_t *) vop_defaultop },
140	{ &vop_access_desc,		(vop_t *) nfs_access },
141	{ &vop_advlock_desc,		(vop_t *) nfs_advlock },
142	{ &vop_bmap_desc,		(vop_t *) nfs_bmap },
143	{ &vop_bwrite_desc,		(vop_t *) nfs_bwrite },
144	{ &vop_close_desc,		(vop_t *) nfs_close },
145	{ &vop_create_desc,		(vop_t *) nfs_create },
146	{ &vop_fsync_desc,		(vop_t *) nfs_fsync },
147	{ &vop_getattr_desc,		(vop_t *) nfs_getattr },
148	{ &vop_getpages_desc,		(vop_t *) nfs_getpages },
149	{ &vop_putpages_desc,		(vop_t *) nfs_putpages },
150	{ &vop_inactive_desc,		(vop_t *) nfs_inactive },
151	{ &vop_lease_desc,		(vop_t *) vop_null },
152	{ &vop_link_desc,		(vop_t *) nfs_link },
153	{ &vop_lock_desc,		(vop_t *) vop_sharedlock },
154	{ &vop_lookup_desc,		(vop_t *) nfs_lookup },
155	{ &vop_mkdir_desc,		(vop_t *) nfs_mkdir },
156	{ &vop_mknod_desc,		(vop_t *) nfs_mknod },
157	{ &vop_mmap_desc,		(vop_t *) nfs_mmap },
158	{ &vop_open_desc,		(vop_t *) nfs_open },
159	{ &vop_poll_desc,		(vop_t *) nfs_poll },
160	{ &vop_print_desc,		(vop_t *) nfs_print },
161	{ &vop_read_desc,		(vop_t *) nfs_read },
162	{ &vop_readdir_desc,		(vop_t *) nfs_readdir },
163	{ &vop_readlink_desc,		(vop_t *) nfs_readlink },
164	{ &vop_reclaim_desc,		(vop_t *) nfs_reclaim },
165	{ &vop_remove_desc,		(vop_t *) nfs_remove },
166	{ &vop_rename_desc,		(vop_t *) nfs_rename },
167	{ &vop_rmdir_desc,		(vop_t *) nfs_rmdir },
168	{ &vop_setattr_desc,		(vop_t *) nfs_setattr },
169	{ &vop_strategy_desc,		(vop_t *) nfs_strategy },
170	{ &vop_symlink_desc,		(vop_t *) nfs_symlink },
171	{ &vop_write_desc,		(vop_t *) nfs_write },
172	{ NULL, NULL }
173};
174static struct vnodeopv_desc nfsv2_vnodeop_opv_desc =
175	{ &nfsv2_vnodeop_p, nfsv2_vnodeop_entries };
176VNODEOP_SET(nfsv2_vnodeop_opv_desc);
177
178/*
179 * Special device vnode ops
180 */
181vop_t **spec_nfsv2nodeop_p;
182static struct vnodeopv_entry_desc nfsv2_specop_entries[] = {
183	{ &vop_default_desc,		(vop_t *) spec_vnoperate },
184	{ &vop_access_desc,		(vop_t *) nfsspec_access },
185	{ &vop_close_desc,		(vop_t *) nfsspec_close },
186	{ &vop_fsync_desc,		(vop_t *) nfs_fsync },
187	{ &vop_getattr_desc,		(vop_t *) nfs_getattr },
188	{ &vop_inactive_desc,		(vop_t *) nfs_inactive },
189	{ &vop_lock_desc,		(vop_t *) vop_sharedlock },
190	{ &vop_print_desc,		(vop_t *) nfs_print },
191	{ &vop_read_desc,		(vop_t *) nfsspec_read },
192	{ &vop_reclaim_desc,		(vop_t *) nfs_reclaim },
193	{ &vop_setattr_desc,		(vop_t *) nfs_setattr },
194	{ &vop_write_desc,		(vop_t *) nfsspec_write },
195	{ NULL, NULL }
196};
197static struct vnodeopv_desc spec_nfsv2nodeop_opv_desc =
198	{ &spec_nfsv2nodeop_p, nfsv2_specop_entries };
199VNODEOP_SET(spec_nfsv2nodeop_opv_desc);
200
201vop_t **fifo_nfsv2nodeop_p;
202static struct vnodeopv_entry_desc nfsv2_fifoop_entries[] = {
203	{ &vop_default_desc,		(vop_t *) fifo_vnoperate },
204	{ &vop_access_desc,		(vop_t *) nfsspec_access },
205	{ &vop_close_desc,		(vop_t *) nfsfifo_close },
206	{ &vop_fsync_desc,		(vop_t *) nfs_fsync },
207	{ &vop_getattr_desc,		(vop_t *) nfs_getattr },
208	{ &vop_inactive_desc,		(vop_t *) nfs_inactive },
209	{ &vop_lock_desc,		(vop_t *) vop_sharedlock },
210	{ &vop_print_desc,		(vop_t *) nfs_print },
211	{ &vop_read_desc,		(vop_t *) nfsfifo_read },
212	{ &vop_reclaim_desc,		(vop_t *) nfs_reclaim },
213	{ &vop_setattr_desc,		(vop_t *) nfs_setattr },
214	{ &vop_write_desc,		(vop_t *) nfsfifo_write },
215	{ NULL, NULL }
216};
217static struct vnodeopv_desc fifo_nfsv2nodeop_opv_desc =
218	{ &fifo_nfsv2nodeop_p, nfsv2_fifoop_entries };
219VNODEOP_SET(fifo_nfsv2nodeop_opv_desc);
220
221static int	nfs_mknodrpc __P((struct vnode *dvp, struct vnode **vpp,
222				  struct componentname *cnp,
223				  struct vattr *vap));
224static int	nfs_removerpc __P((struct vnode *dvp, const char *name,
225				   int namelen,
226				   struct ucred *cred, struct proc *proc));
227static int	nfs_renamerpc __P((struct vnode *fdvp, const char *fnameptr,
228				   int fnamelen, struct vnode *tdvp,
229				   const char *tnameptr, int tnamelen,
230				   struct ucred *cred, struct proc *proc));
231static int	nfs_renameit __P((struct vnode *sdvp,
232				  struct componentname *scnp,
233				  struct sillyrename *sp));
234
235/*
236 * Global variables
237 */
238extern u_int32_t nfs_true, nfs_false;
239extern u_int32_t nfs_xdrneg1;
240extern struct nfsstats nfsstats;
241extern nfstype nfsv3_type[9];
242struct proc *nfs_iodwant[NFS_MAXASYNCDAEMON];
243struct nfsmount *nfs_iodmount[NFS_MAXASYNCDAEMON];
244int nfs_numasync = 0;
245#define	DIRHDSIZ	(sizeof (struct dirent) - (MAXNAMLEN + 1))
246
247SYSCTL_DECL(_vfs_nfs);
248
249static int	nfsaccess_cache_timeout = NFS_MAXATTRTIMO;
250SYSCTL_INT(_vfs_nfs, OID_AUTO, access_cache_timeout, CTLFLAG_RW,
251	   &nfsaccess_cache_timeout, 0, "NFS ACCESS cache timeout");
252
253static int	nfsv3_commit_on_close = 0;
254SYSCTL_INT(_vfs_nfs, OID_AUTO, nfsv3_commit_on_close, CTLFLAG_RW,
255	   &nfsv3_commit_on_close, 0, "write+commit on close, else only write");
256#if 0
257SYSCTL_INT(_vfs_nfs, OID_AUTO, access_cache_hits, CTLFLAG_RD,
258	   &nfsstats.accesscache_hits, 0, "NFS ACCESS cache hit count");
259
260SYSCTL_INT(_vfs_nfs, OID_AUTO, access_cache_misses, CTLFLAG_RD,
261	   &nfsstats.accesscache_misses, 0, "NFS ACCESS cache miss count");
262#endif
263
264#define	NFSV3ACCESS_ALL (NFSV3ACCESS_READ | NFSV3ACCESS_MODIFY		\
265			 | NFSV3ACCESS_EXTEND | NFSV3ACCESS_EXECUTE	\
266			 | NFSV3ACCESS_DELETE | NFSV3ACCESS_LOOKUP)
267static int
268nfs3_access_otw(struct vnode *vp,
269		int wmode,
270		struct proc *p,
271		struct ucred *cred)
272{
273	const int v3 = 1;
274	u_int32_t *tl;
275	int error = 0, attrflag;
276
277	struct mbuf *mreq, *mrep, *md, *mb, *mb2;
278	caddr_t bpos, dpos, cp2;
279	register int32_t t1, t2;
280	register caddr_t cp;
281	u_int32_t rmode;
282	struct nfsnode *np = VTONFS(vp);
283
284	nfsstats.rpccnt[NFSPROC_ACCESS]++;
285	nfsm_reqhead(vp, NFSPROC_ACCESS, NFSX_FH(v3) + NFSX_UNSIGNED);
286	nfsm_fhtom(vp, v3);
287	nfsm_build(tl, u_int32_t *, NFSX_UNSIGNED);
288	*tl = txdr_unsigned(wmode);
289	nfsm_request(vp, NFSPROC_ACCESS, p, cred);
290	nfsm_postop_attr(vp, attrflag);
291	if (!error) {
292		nfsm_dissect(tl, u_int32_t *, NFSX_UNSIGNED);
293		rmode = fxdr_unsigned(u_int32_t, *tl);
294		np->n_mode = rmode;
295		np->n_modeuid = cred->cr_uid;
296		np->n_modestamp = time_second;
297	}
298	nfsm_reqdone;
299	return error;
300}
301
302/*
303 * nfs access vnode op.
304 * For nfs version 2, just return ok. File accesses may fail later.
305 * For nfs version 3, use the access rpc to check accessibility. If file modes
306 * are changed on the server, accesses might still fail later.
307 */
308static int
309nfs_access(ap)
310	struct vop_access_args /* {
311		struct vnode *a_vp;
312		int  a_mode;
313		struct ucred *a_cred;
314		struct proc *a_p;
315	} */ *ap;
316{
317	register struct vnode *vp = ap->a_vp;
318	int error = 0;
319	u_int32_t mode, wmode;
320	int v3 = NFS_ISV3(vp);
321	struct nfsnode *np = VTONFS(vp);
322
323	/*
324	 * Disallow write attempts on filesystems mounted read-only;
325	 * unless the file is a socket, fifo, or a block or character
326	 * device resident on the filesystem.
327	 */
328	if ((ap->a_mode & VWRITE) && (vp->v_mount->mnt_flag & MNT_RDONLY)) {
329		switch (vp->v_type) {
330		case VREG:
331		case VDIR:
332		case VLNK:
333			return (EROFS);
334		default:
335			break;
336		}
337	}
338	/*
339	 * For nfs v3, check to see if we have done this recently, and if
340	 * so return our cached result instead of making an ACCESS call.
341	 * If not, do an access rpc, otherwise you are stuck emulating
342	 * ufs_access() locally using the vattr. This may not be correct,
343	 * since the server may apply other access criteria such as
344	 * client uid-->server uid mapping that we do not know about.
345	 */
346	if (v3) {
347		if (ap->a_mode & VREAD)
348			mode = NFSV3ACCESS_READ;
349		else
350			mode = 0;
351		if (vp->v_type != VDIR) {
352			if (ap->a_mode & VWRITE)
353				mode |= (NFSV3ACCESS_MODIFY | NFSV3ACCESS_EXTEND);
354			if (ap->a_mode & VEXEC)
355				mode |= NFSV3ACCESS_EXECUTE;
356		} else {
357			if (ap->a_mode & VWRITE)
358				mode |= (NFSV3ACCESS_MODIFY | NFSV3ACCESS_EXTEND |
359					 NFSV3ACCESS_DELETE);
360			if (ap->a_mode & VEXEC)
361				mode |= NFSV3ACCESS_LOOKUP;
362		}
363		/* XXX safety belt, only make blanket request if caching */
364		if (nfsaccess_cache_timeout > 0) {
365			wmode = NFSV3ACCESS_READ | NFSV3ACCESS_MODIFY |
366				NFSV3ACCESS_EXTEND | NFSV3ACCESS_EXECUTE |
367				NFSV3ACCESS_DELETE | NFSV3ACCESS_LOOKUP;
368		} else {
369			wmode = mode;
370		}
371
372		/*
373		 * Does our cached result allow us to give a definite yes to
374		 * this request?
375		 */
376		if ((time_second < (np->n_modestamp + nfsaccess_cache_timeout)) &&
377		    (ap->a_cred->cr_uid == np->n_modeuid) &&
378		    ((np->n_mode & mode) == mode)) {
379			nfsstats.accesscache_hits++;
380		} else {
381			/*
382			 * Either a no, or a don't know.  Go to the wire.
383			 */
384			nfsstats.accesscache_misses++;
385		        error = nfs3_access_otw(vp, wmode, ap->a_p,ap->a_cred);
386			if (!error) {
387				if ((np->n_mode & mode) != mode) {
388					error = EACCES;
389				}
390			}
391		}
392		return (error);
393	} else {
394		if ((error = nfsspec_access(ap)) != 0)
395			return (error);
396
397		/*
398		 * Attempt to prevent a mapped root from accessing a file
399		 * which it shouldn't.  We try to read a byte from the file
400		 * if the user is root and the file is not zero length.
401		 * After calling nfsspec_access, we should have the correct
402		 * file size cached.
403		 */
404		if (ap->a_cred->cr_uid == 0 && (ap->a_mode & VREAD)
405		    && VTONFS(vp)->n_size > 0) {
406			struct iovec aiov;
407			struct uio auio;
408			char buf[1];
409
410			aiov.iov_base = buf;
411			aiov.iov_len = 1;
412			auio.uio_iov = &aiov;
413			auio.uio_iovcnt = 1;
414			auio.uio_offset = 0;
415			auio.uio_resid = 1;
416			auio.uio_segflg = UIO_SYSSPACE;
417			auio.uio_rw = UIO_READ;
418			auio.uio_procp = ap->a_p;
419
420			if (vp->v_type == VREG)
421				error = nfs_readrpc(vp, &auio, ap->a_cred);
422			else if (vp->v_type == VDIR) {
423				char* bp;
424				bp = malloc(NFS_DIRBLKSIZ, M_TEMP, M_WAITOK);
425				aiov.iov_base = bp;
426				aiov.iov_len = auio.uio_resid = NFS_DIRBLKSIZ;
427				error = nfs_readdirrpc(vp, &auio, ap->a_cred);
428				free(bp, M_TEMP);
429			} else if (vp->v_type == VLNK)
430				error = nfs_readlinkrpc(vp, &auio, ap->a_cred);
431			else
432				error = EACCES;
433		}
434		return (error);
435	}
436}
437
438/*
439 * nfs open vnode op
440 * Check to see if the type is ok
441 * and that deletion is not in progress.
442 * For paged in text files, you will need to flush the page cache
443 * if consistency is lost.
444 */
445/* ARGSUSED */
446static int
447nfs_open(ap)
448	struct vop_open_args /* {
449		struct vnode *a_vp;
450		int  a_mode;
451		struct ucred *a_cred;
452		struct proc *a_p;
453	} */ *ap;
454{
455	register struct vnode *vp = ap->a_vp;
456	struct nfsnode *np = VTONFS(vp);
457	struct nfsmount *nmp = VFSTONFS(vp->v_mount);
458	struct vattr vattr;
459	int error;
460
461	if (vp->v_type != VREG && vp->v_type != VDIR && vp->v_type != VLNK) {
462#ifdef DIAGNOSTIC
463		printf("open eacces vtyp=%d\n",vp->v_type);
464#endif
465		return (EACCES);
466	}
467	/*
468	 * Get a valid lease. If cached data is stale, flush it.
469	 */
470	if (nmp->nm_flag & NFSMNT_NQNFS) {
471		if (NQNFS_CKINVALID(vp, np, ND_READ)) {
472		    do {
473			error = nqnfs_getlease(vp, ND_READ, ap->a_cred,
474			    ap->a_p);
475		    } while (error == NQNFS_EXPIRED);
476		    if (error)
477			return (error);
478		    if (np->n_lrev != np->n_brev ||
479			(np->n_flag & NQNFSNONCACHE)) {
480			if ((error = nfs_vinvalbuf(vp, V_SAVE, ap->a_cred,
481				ap->a_p, 1)) == EINTR)
482				return (error);
483			np->n_brev = np->n_lrev;
484		    }
485		}
486	} else {
487		if (np->n_flag & NMODIFIED) {
488			if ((error = nfs_vinvalbuf(vp, V_SAVE, ap->a_cred,
489				ap->a_p, 1)) == EINTR)
490				return (error);
491			np->n_attrstamp = 0;
492			if (vp->v_type == VDIR)
493				np->n_direofoffset = 0;
494			error = VOP_GETATTR(vp, &vattr, ap->a_cred, ap->a_p);
495			if (error)
496				return (error);
497			np->n_mtime = vattr.va_mtime.tv_sec;
498		} else {
499			error = VOP_GETATTR(vp, &vattr, ap->a_cred, ap->a_p);
500			if (error)
501				return (error);
502			if (np->n_mtime != vattr.va_mtime.tv_sec) {
503				if (vp->v_type == VDIR)
504					np->n_direofoffset = 0;
505				if ((error = nfs_vinvalbuf(vp, V_SAVE,
506					ap->a_cred, ap->a_p, 1)) == EINTR)
507					return (error);
508				np->n_mtime = vattr.va_mtime.tv_sec;
509			}
510		}
511	}
512	if ((nmp->nm_flag & NFSMNT_NQNFS) == 0)
513		np->n_attrstamp = 0; /* For Open/Close consistency */
514	return (0);
515}
516
517/*
518 * nfs close vnode op
519 * What an NFS client should do upon close after writing is a debatable issue.
520 * Most NFS clients push delayed writes to the server upon close, basically for
521 * two reasons:
522 * 1 - So that any write errors may be reported back to the client process
523 *     doing the close system call. By far the two most likely errors are
524 *     NFSERR_NOSPC and NFSERR_DQUOT to indicate space allocation failure.
525 * 2 - To put a worst case upper bound on cache inconsistency between
526 *     multiple clients for the file.
527 * There is also a consistency problem for Version 2 of the protocol w.r.t.
528 * not being able to tell if other clients are writing a file concurrently,
529 * since there is no way of knowing if the changed modify time in the reply
530 * is only due to the write for this client.
531 * (NFS Version 3 provides weak cache consistency data in the reply that
532 *  should be sufficient to detect and handle this case.)
533 *
534 * The current code does the following:
535 * for NFS Version 2 - play it safe and flush/invalidate all dirty buffers
536 * for NFS Version 3 - flush dirty buffers to the server but don't invalidate
537 *                     or commit them (this satisfies 1 and 2 except for the
538 *                     case where the server crashes after this close but
539 *                     before the commit RPC, which is felt to be "good
540 *                     enough". Changing the last argument to nfs_flush() to
541 *                     a 1 would force a commit operation, if it is felt a
542 *                     commit is necessary now.
543 * for NQNFS         - do nothing now, since 2 is dealt with via leases and
544 *                     1 should be dealt with via an fsync() system call for
545 *                     cases where write errors are important.
546 */
547/* ARGSUSED */
548static int
549nfs_close(ap)
550	struct vop_close_args /* {
551		struct vnodeop_desc *a_desc;
552		struct vnode *a_vp;
553		int  a_fflag;
554		struct ucred *a_cred;
555		struct proc *a_p;
556	} */ *ap;
557{
558	register struct vnode *vp = ap->a_vp;
559	register struct nfsnode *np = VTONFS(vp);
560	int error = 0;
561
562	if (vp->v_type == VREG) {
563	    if ((VFSTONFS(vp->v_mount)->nm_flag & NFSMNT_NQNFS) == 0 &&
564		(np->n_flag & NMODIFIED)) {
565		if (NFS_ISV3(vp)) {
566		    /*
567		     * Under NFSv3 we have dirty buffers to dispose of.  We
568		     * must flush them to the NFS server.  We have the option
569		     * of waiting all the way through the commit rpc or just
570		     * waiting for the initial write.  The default is to only
571		     * wait through the initial write so the data is in the
572		     * server's cache, which is roughly similar to the state
573		     * a standard disk subsystem leaves the file in on close().
574		     *
575		     * We cannot clear the NMODIFIED bit in np->n_flag due to
576		     * potential races with other processes, and certainly
577		     * cannot clear it if we don't commit.
578		     */
579		    int cm = nfsv3_commit_on_close ? 1 : 0;
580		    error = nfs_flush(vp, ap->a_cred, MNT_WAIT, ap->a_p, cm);
581		    /* np->n_flag &= ~NMODIFIED; */
582		} else {
583		    error = nfs_vinvalbuf(vp, V_SAVE, ap->a_cred, ap->a_p, 1);
584		}
585		np->n_attrstamp = 0;
586	    }
587	    if (np->n_flag & NWRITEERR) {
588		np->n_flag &= ~NWRITEERR;
589		error = np->n_error;
590	    }
591	}
592	return (error);
593}
594
595/*
596 * nfs getattr call from vfs.
597 */
598static int
599nfs_getattr(ap)
600	struct vop_getattr_args /* {
601		struct vnode *a_vp;
602		struct vattr *a_vap;
603		struct ucred *a_cred;
604		struct proc *a_p;
605	} */ *ap;
606{
607	register struct vnode *vp = ap->a_vp;
608	register struct nfsnode *np = VTONFS(vp);
609	register caddr_t cp;
610	register u_int32_t *tl;
611	register int32_t t1, t2;
612	caddr_t bpos, dpos;
613	int error = 0;
614	struct mbuf *mreq, *mrep, *md, *mb, *mb2;
615	int v3 = NFS_ISV3(vp);
616
617	/*
618	 * Update local times for special files.
619	 */
620	if (np->n_flag & (NACC | NUPD))
621		np->n_flag |= NCHG;
622	/*
623	 * First look in the cache.
624	 */
625	if (nfs_getattrcache(vp, ap->a_vap) == 0)
626		return (0);
627
628	if (v3 && nfsaccess_cache_timeout > 0) {
629		nfsstats.accesscache_misses++;
630		nfs3_access_otw(vp, NFSV3ACCESS_ALL, ap->a_p, ap->a_cred);
631		if (nfs_getattrcache(vp, ap->a_vap) == 0)
632			return (0);
633	}
634
635	nfsstats.rpccnt[NFSPROC_GETATTR]++;
636	nfsm_reqhead(vp, NFSPROC_GETATTR, NFSX_FH(v3));
637	nfsm_fhtom(vp, v3);
638	nfsm_request(vp, NFSPROC_GETATTR, ap->a_p, ap->a_cred);
639	if (!error) {
640		nfsm_loadattr(vp, ap->a_vap);
641	}
642	nfsm_reqdone;
643	return (error);
644}
645
646/*
647 * nfs setattr call.
648 */
649static int
650nfs_setattr(ap)
651	struct vop_setattr_args /* {
652		struct vnodeop_desc *a_desc;
653		struct vnode *a_vp;
654		struct vattr *a_vap;
655		struct ucred *a_cred;
656		struct proc *a_p;
657	} */ *ap;
658{
659	register struct vnode *vp = ap->a_vp;
660	register struct nfsnode *np = VTONFS(vp);
661	register struct vattr *vap = ap->a_vap;
662	int error = 0;
663	u_quad_t tsize;
664
665#ifndef nolint
666	tsize = (u_quad_t)0;
667#endif
668
669	/*
670	 * Setting of flags is not supported.
671	 */
672	if (vap->va_flags != VNOVAL)
673		return (EOPNOTSUPP);
674
675	/*
676	 * Disallow write attempts if the filesystem is mounted read-only.
677	 */
678  	if ((vap->va_flags != VNOVAL || vap->va_uid != (uid_t)VNOVAL ||
679	    vap->va_gid != (gid_t)VNOVAL || vap->va_atime.tv_sec != VNOVAL ||
680	    vap->va_mtime.tv_sec != VNOVAL || vap->va_mode != (mode_t)VNOVAL) &&
681	    (vp->v_mount->mnt_flag & MNT_RDONLY))
682		return (EROFS);
683	if (vap->va_size != VNOVAL) {
684 		switch (vp->v_type) {
685 		case VDIR:
686 			return (EISDIR);
687 		case VCHR:
688 		case VBLK:
689 		case VSOCK:
690 		case VFIFO:
691			if (vap->va_mtime.tv_sec == VNOVAL &&
692			    vap->va_atime.tv_sec == VNOVAL &&
693			    vap->va_mode == (mode_t)VNOVAL &&
694			    vap->va_uid == (uid_t)VNOVAL &&
695			    vap->va_gid == (gid_t)VNOVAL)
696				return (0);
697 			vap->va_size = VNOVAL;
698 			break;
699 		default:
700			/*
701			 * Disallow write attempts if the filesystem is
702			 * mounted read-only.
703			 */
704			if (vp->v_mount->mnt_flag & MNT_RDONLY)
705				return (EROFS);
706			vnode_pager_setsize(vp, vap->va_size);
707 			if (np->n_flag & NMODIFIED) {
708 			    if (vap->va_size == 0)
709 				error = nfs_vinvalbuf(vp, 0,
710 					ap->a_cred, ap->a_p, 1);
711 			    else
712 				error = nfs_vinvalbuf(vp, V_SAVE,
713 					ap->a_cred, ap->a_p, 1);
714 			    if (error) {
715				vnode_pager_setsize(vp, np->n_size);
716 				return (error);
717			    }
718 			}
719 			tsize = np->n_size;
720 			np->n_size = np->n_vattr.va_size = vap->va_size;
721  		};
722  	} else if ((vap->va_mtime.tv_sec != VNOVAL ||
723		vap->va_atime.tv_sec != VNOVAL) && (np->n_flag & NMODIFIED) &&
724		vp->v_type == VREG &&
725  		(error = nfs_vinvalbuf(vp, V_SAVE, ap->a_cred,
726		 ap->a_p, 1)) == EINTR)
727		return (error);
728	error = nfs_setattrrpc(vp, vap, ap->a_cred, ap->a_p);
729	if (error && vap->va_size != VNOVAL) {
730		np->n_size = np->n_vattr.va_size = tsize;
731		vnode_pager_setsize(vp, np->n_size);
732	}
733	return (error);
734}
735
736/*
737 * Do an nfs setattr rpc.
738 */
739static int
740nfs_setattrrpc(vp, vap, cred, procp)
741	register struct vnode *vp;
742	register struct vattr *vap;
743	struct ucred *cred;
744	struct proc *procp;
745{
746	register struct nfsv2_sattr *sp;
747	register caddr_t cp;
748	register int32_t t1, t2;
749	caddr_t bpos, dpos, cp2;
750	u_int32_t *tl;
751	int error = 0, wccflag = NFSV3_WCCRATTR;
752	struct mbuf *mreq, *mrep, *md, *mb, *mb2;
753	int v3 = NFS_ISV3(vp);
754
755	nfsstats.rpccnt[NFSPROC_SETATTR]++;
756	nfsm_reqhead(vp, NFSPROC_SETATTR, NFSX_FH(v3) + NFSX_SATTR(v3));
757	nfsm_fhtom(vp, v3);
758	if (v3) {
759		nfsm_v3attrbuild(vap, TRUE);
760		nfsm_build(tl, u_int32_t *, NFSX_UNSIGNED);
761		*tl = nfs_false;
762	} else {
763		nfsm_build(sp, struct nfsv2_sattr *, NFSX_V2SATTR);
764		if (vap->va_mode == (mode_t)VNOVAL)
765			sp->sa_mode = nfs_xdrneg1;
766		else
767			sp->sa_mode = vtonfsv2_mode(vp->v_type, vap->va_mode);
768		if (vap->va_uid == (uid_t)VNOVAL)
769			sp->sa_uid = nfs_xdrneg1;
770		else
771			sp->sa_uid = txdr_unsigned(vap->va_uid);
772		if (vap->va_gid == (gid_t)VNOVAL)
773			sp->sa_gid = nfs_xdrneg1;
774		else
775			sp->sa_gid = txdr_unsigned(vap->va_gid);
776		sp->sa_size = txdr_unsigned(vap->va_size);
777		txdr_nfsv2time(&vap->va_atime, &sp->sa_atime);
778		txdr_nfsv2time(&vap->va_mtime, &sp->sa_mtime);
779	}
780	nfsm_request(vp, NFSPROC_SETATTR, procp, cred);
781	if (v3) {
782		nfsm_wcc_data(vp, wccflag);
783	} else
784		nfsm_loadattr(vp, (struct vattr *)0);
785	nfsm_reqdone;
786	return (error);
787}
788
789/*
790 * nfs lookup call, one step at a time...
791 * First look in cache
792 * If not found, unlock the directory nfsnode and do the rpc
793 */
794static int
795nfs_lookup(ap)
796	struct vop_lookup_args /* {
797		struct vnodeop_desc *a_desc;
798		struct vnode *a_dvp;
799		struct vnode **a_vpp;
800		struct componentname *a_cnp;
801	} */ *ap;
802{
803	struct componentname *cnp = ap->a_cnp;
804	struct vnode *dvp = ap->a_dvp;
805	struct vnode **vpp = ap->a_vpp;
806	int flags = cnp->cn_flags;
807	struct vnode *newvp;
808	u_int32_t *tl;
809	caddr_t cp;
810	int32_t t1, t2;
811	struct nfsmount *nmp;
812	caddr_t bpos, dpos, cp2;
813	struct mbuf *mreq, *mrep, *md, *mb, *mb2;
814	long len;
815	nfsfh_t *fhp;
816	struct nfsnode *np;
817	int lockparent, wantparent, error = 0, attrflag, fhsize;
818	int v3 = NFS_ISV3(dvp);
819	struct proc *p = cnp->cn_proc;
820
821	*vpp = NULLVP;
822	if ((flags & ISLASTCN) && (dvp->v_mount->mnt_flag & MNT_RDONLY) &&
823	    (cnp->cn_nameiop == DELETE || cnp->cn_nameiop == RENAME))
824		return (EROFS);
825	if (dvp->v_type != VDIR)
826		return (ENOTDIR);
827	lockparent = flags & LOCKPARENT;
828	wantparent = flags & (LOCKPARENT|WANTPARENT);
829	nmp = VFSTONFS(dvp->v_mount);
830	np = VTONFS(dvp);
831	if ((error = cache_lookup(dvp, vpp, cnp)) && error != ENOENT) {
832		struct vattr vattr;
833		int vpid;
834
835		if ((error = VOP_ACCESS(dvp, VEXEC, cnp->cn_cred, p)) != 0) {
836			*vpp = NULLVP;
837			return (error);
838		}
839
840		newvp = *vpp;
841		vpid = newvp->v_id;
842		/*
843		 * See the comment starting `Step through' in ufs/ufs_lookup.c
844		 * for an explanation of the locking protocol
845		 */
846		if (dvp == newvp) {
847			VREF(newvp);
848			error = 0;
849		} else if (flags & ISDOTDOT) {
850			VOP_UNLOCK(dvp, 0, p);
851			error = vget(newvp, LK_EXCLUSIVE, p);
852			if (!error && lockparent && (flags & ISLASTCN))
853				error = vn_lock(dvp, LK_EXCLUSIVE, p);
854		} else {
855			error = vget(newvp, LK_EXCLUSIVE, p);
856			if (!lockparent || error || !(flags & ISLASTCN))
857				VOP_UNLOCK(dvp, 0, p);
858		}
859		if (!error) {
860			if (vpid == newvp->v_id) {
861			   if (!VOP_GETATTR(newvp, &vattr, cnp->cn_cred, p)
862			    && vattr.va_ctime.tv_sec == VTONFS(newvp)->n_ctime) {
863				nfsstats.lookupcache_hits++;
864				if (cnp->cn_nameiop != LOOKUP &&
865				    (flags & ISLASTCN))
866					cnp->cn_flags |= SAVENAME;
867				return (0);
868			   }
869			   cache_purge(newvp);
870			}
871			vput(newvp);
872			if (lockparent && dvp != newvp && (flags & ISLASTCN))
873				VOP_UNLOCK(dvp, 0, p);
874		}
875		error = vn_lock(dvp, LK_EXCLUSIVE, p);
876		*vpp = NULLVP;
877		if (error)
878			return (error);
879	}
880	error = 0;
881	newvp = NULLVP;
882	nfsstats.lookupcache_misses++;
883	nfsstats.rpccnt[NFSPROC_LOOKUP]++;
884	len = cnp->cn_namelen;
885	nfsm_reqhead(dvp, NFSPROC_LOOKUP,
886		NFSX_FH(v3) + NFSX_UNSIGNED + nfsm_rndup(len));
887	nfsm_fhtom(dvp, v3);
888	nfsm_strtom(cnp->cn_nameptr, len, NFS_MAXNAMLEN);
889	nfsm_request(dvp, NFSPROC_LOOKUP, cnp->cn_proc, cnp->cn_cred);
890	if (error) {
891		nfsm_postop_attr(dvp, attrflag);
892		m_freem(mrep);
893		goto nfsmout;
894	}
895	nfsm_getfh(fhp, fhsize, v3);
896
897	/*
898	 * Handle RENAME case...
899	 */
900	if (cnp->cn_nameiop == RENAME && wantparent && (flags & ISLASTCN)) {
901		if (NFS_CMPFH(np, fhp, fhsize)) {
902			m_freem(mrep);
903			return (EISDIR);
904		}
905		error = nfs_nget(dvp->v_mount, fhp, fhsize, &np);
906		if (error) {
907			m_freem(mrep);
908			return (error);
909		}
910		newvp = NFSTOV(np);
911		if (v3) {
912			nfsm_postop_attr(newvp, attrflag);
913			nfsm_postop_attr(dvp, attrflag);
914		} else
915			nfsm_loadattr(newvp, (struct vattr *)0);
916		*vpp = newvp;
917		m_freem(mrep);
918		cnp->cn_flags |= SAVENAME;
919		if (!lockparent)
920			VOP_UNLOCK(dvp, 0, p);
921		return (0);
922	}
923
924	if (flags & ISDOTDOT) {
925		VOP_UNLOCK(dvp, 0, p);
926		error = nfs_nget(dvp->v_mount, fhp, fhsize, &np);
927		if (error) {
928			vn_lock(dvp, LK_EXCLUSIVE | LK_RETRY, p);
929			return (error);
930		}
931		newvp = NFSTOV(np);
932		if (lockparent && (flags & ISLASTCN) &&
933		    (error = vn_lock(dvp, LK_EXCLUSIVE, p))) {
934		    	vput(newvp);
935			return (error);
936		}
937	} else if (NFS_CMPFH(np, fhp, fhsize)) {
938		VREF(dvp);
939		newvp = dvp;
940	} else {
941		error = nfs_nget(dvp->v_mount, fhp, fhsize, &np);
942		if (error) {
943			m_freem(mrep);
944			return (error);
945		}
946		if (!lockparent || !(flags & ISLASTCN))
947			VOP_UNLOCK(dvp, 0, p);
948		newvp = NFSTOV(np);
949	}
950	if (v3) {
951		nfsm_postop_attr(newvp, attrflag);
952		nfsm_postop_attr(dvp, attrflag);
953	} else
954		nfsm_loadattr(newvp, (struct vattr *)0);
955	if (cnp->cn_nameiop != LOOKUP && (flags & ISLASTCN))
956		cnp->cn_flags |= SAVENAME;
957	if ((cnp->cn_flags & MAKEENTRY) &&
958	    (cnp->cn_nameiop != DELETE || !(flags & ISLASTCN))) {
959		np->n_ctime = np->n_vattr.va_ctime.tv_sec;
960		cache_enter(dvp, newvp, cnp);
961	}
962	*vpp = newvp;
963	nfsm_reqdone;
964	if (error) {
965		if (newvp != NULLVP) {
966			vrele(newvp);
967			*vpp = NULLVP;
968		}
969		if ((cnp->cn_nameiop == CREATE || cnp->cn_nameiop == RENAME) &&
970		    (flags & ISLASTCN) && error == ENOENT) {
971			if (!lockparent)
972				VOP_UNLOCK(dvp, 0, p);
973			if (dvp->v_mount->mnt_flag & MNT_RDONLY)
974				error = EROFS;
975			else
976				error = EJUSTRETURN;
977		}
978		if (cnp->cn_nameiop != LOOKUP && (flags & ISLASTCN))
979			cnp->cn_flags |= SAVENAME;
980	}
981	return (error);
982}
983
984/*
985 * nfs read call.
986 * Just call nfs_bioread() to do the work.
987 */
988static int
989nfs_read(ap)
990	struct vop_read_args /* {
991		struct vnode *a_vp;
992		struct uio *a_uio;
993		int  a_ioflag;
994		struct ucred *a_cred;
995	} */ *ap;
996{
997	register struct vnode *vp = ap->a_vp;
998
999	if (vp->v_type != VREG)
1000		return (EPERM);
1001	return (nfs_bioread(vp, ap->a_uio, ap->a_ioflag, ap->a_cred));
1002}
1003
1004/*
1005 * nfs readlink call
1006 */
1007static int
1008nfs_readlink(ap)
1009	struct vop_readlink_args /* {
1010		struct vnode *a_vp;
1011		struct uio *a_uio;
1012		struct ucred *a_cred;
1013	} */ *ap;
1014{
1015	register struct vnode *vp = ap->a_vp;
1016
1017	if (vp->v_type != VLNK)
1018		return (EINVAL);
1019	return (nfs_bioread(vp, ap->a_uio, 0, ap->a_cred));
1020}
1021
1022/*
1023 * Do a readlink rpc.
1024 * Called by nfs_doio() from below the buffer cache.
1025 */
1026int
1027nfs_readlinkrpc(vp, uiop, cred)
1028	register struct vnode *vp;
1029	struct uio *uiop;
1030	struct ucred *cred;
1031{
1032	register u_int32_t *tl;
1033	register caddr_t cp;
1034	register int32_t t1, t2;
1035	caddr_t bpos, dpos, cp2;
1036	int error = 0, len, attrflag;
1037	struct mbuf *mreq, *mrep, *md, *mb, *mb2;
1038	int v3 = NFS_ISV3(vp);
1039
1040	nfsstats.rpccnt[NFSPROC_READLINK]++;
1041	nfsm_reqhead(vp, NFSPROC_READLINK, NFSX_FH(v3));
1042	nfsm_fhtom(vp, v3);
1043	nfsm_request(vp, NFSPROC_READLINK, uiop->uio_procp, cred);
1044	if (v3)
1045		nfsm_postop_attr(vp, attrflag);
1046	if (!error) {
1047		nfsm_strsiz(len, NFS_MAXPATHLEN);
1048		if (len == NFS_MAXPATHLEN) {
1049			struct nfsnode *np = VTONFS(vp);
1050			if (np->n_size && np->n_size < NFS_MAXPATHLEN)
1051				len = np->n_size;
1052		}
1053		nfsm_mtouio(uiop, len);
1054	}
1055	nfsm_reqdone;
1056	return (error);
1057}
1058
1059/*
1060 * nfs read rpc call
1061 * Ditto above
1062 */
1063int
1064nfs_readrpc(vp, uiop, cred)
1065	register struct vnode *vp;
1066	struct uio *uiop;
1067	struct ucred *cred;
1068{
1069	register u_int32_t *tl;
1070	register caddr_t cp;
1071	register int32_t t1, t2;
1072	caddr_t bpos, dpos, cp2;
1073	struct mbuf *mreq, *mrep, *md, *mb, *mb2;
1074	struct nfsmount *nmp;
1075	int error = 0, len, retlen, tsiz, eof, attrflag;
1076	int v3 = NFS_ISV3(vp);
1077
1078#ifndef nolint
1079	eof = 0;
1080#endif
1081	nmp = VFSTONFS(vp->v_mount);
1082	tsiz = uiop->uio_resid;
1083	if (uiop->uio_offset + tsiz > nmp->nm_maxfilesize)
1084		return (EFBIG);
1085	while (tsiz > 0) {
1086		nfsstats.rpccnt[NFSPROC_READ]++;
1087		len = (tsiz > nmp->nm_rsize) ? nmp->nm_rsize : tsiz;
1088		nfsm_reqhead(vp, NFSPROC_READ, NFSX_FH(v3) + NFSX_UNSIGNED * 3);
1089		nfsm_fhtom(vp, v3);
1090		nfsm_build(tl, u_int32_t *, NFSX_UNSIGNED * 3);
1091		if (v3) {
1092			txdr_hyper(uiop->uio_offset, tl);
1093			*(tl + 2) = txdr_unsigned(len);
1094		} else {
1095			*tl++ = txdr_unsigned(uiop->uio_offset);
1096			*tl++ = txdr_unsigned(len);
1097			*tl = 0;
1098		}
1099		nfsm_request(vp, NFSPROC_READ, uiop->uio_procp, cred);
1100		if (v3) {
1101			nfsm_postop_attr(vp, attrflag);
1102			if (error) {
1103				m_freem(mrep);
1104				goto nfsmout;
1105			}
1106			nfsm_dissect(tl, u_int32_t *, 2 * NFSX_UNSIGNED);
1107			eof = fxdr_unsigned(int, *(tl + 1));
1108		} else
1109			nfsm_loadattr(vp, (struct vattr *)0);
1110		nfsm_strsiz(retlen, nmp->nm_rsize);
1111		nfsm_mtouio(uiop, retlen);
1112		m_freem(mrep);
1113		tsiz -= retlen;
1114		if (v3) {
1115			if (eof || retlen == 0)
1116				tsiz = 0;
1117		} else if (retlen < len)
1118			tsiz = 0;
1119	}
1120nfsmout:
1121	return (error);
1122}
1123
1124/*
1125 * nfs write call
1126 */
1127int
1128nfs_writerpc(vp, uiop, cred, iomode, must_commit)
1129	register struct vnode *vp;
1130	register struct uio *uiop;
1131	struct ucred *cred;
1132	int *iomode, *must_commit;
1133{
1134	register u_int32_t *tl;
1135	register caddr_t cp;
1136	register int32_t t1, t2, backup;
1137	caddr_t bpos, dpos, cp2;
1138	struct mbuf *mreq, *mrep, *md, *mb, *mb2;
1139	struct nfsmount *nmp = VFSTONFS(vp->v_mount);
1140	int error = 0, len, tsiz, wccflag = NFSV3_WCCRATTR, rlen, commit;
1141	int v3 = NFS_ISV3(vp), committed = NFSV3WRITE_FILESYNC;
1142
1143#ifndef DIAGNOSTIC
1144	if (uiop->uio_iovcnt != 1)
1145		panic("nfs: writerpc iovcnt > 1");
1146#endif
1147	*must_commit = 0;
1148	tsiz = uiop->uio_resid;
1149	if (uiop->uio_offset + tsiz > nmp->nm_maxfilesize)
1150		return (EFBIG);
1151	while (tsiz > 0) {
1152		nfsstats.rpccnt[NFSPROC_WRITE]++;
1153		len = (tsiz > nmp->nm_wsize) ? nmp->nm_wsize : tsiz;
1154		nfsm_reqhead(vp, NFSPROC_WRITE,
1155			NFSX_FH(v3) + 5 * NFSX_UNSIGNED + nfsm_rndup(len));
1156		nfsm_fhtom(vp, v3);
1157		if (v3) {
1158			nfsm_build(tl, u_int32_t *, 5 * NFSX_UNSIGNED);
1159			txdr_hyper(uiop->uio_offset, tl);
1160			tl += 2;
1161			*tl++ = txdr_unsigned(len);
1162			*tl++ = txdr_unsigned(*iomode);
1163			*tl = txdr_unsigned(len);
1164		} else {
1165			register u_int32_t x;
1166
1167			nfsm_build(tl, u_int32_t *, 4 * NFSX_UNSIGNED);
1168			/* Set both "begin" and "current" to non-garbage. */
1169			x = txdr_unsigned((u_int32_t)uiop->uio_offset);
1170			*tl++ = x;	/* "begin offset" */
1171			*tl++ = x;	/* "current offset" */
1172			x = txdr_unsigned(len);
1173			*tl++ = x;	/* total to this offset */
1174			*tl = x;	/* size of this write */
1175		}
1176		nfsm_uiotom(uiop, len);
1177		nfsm_request(vp, NFSPROC_WRITE, uiop->uio_procp, cred);
1178		if (v3) {
1179			wccflag = NFSV3_WCCCHK;
1180			nfsm_wcc_data(vp, wccflag);
1181			if (!error) {
1182				nfsm_dissect(tl, u_int32_t *, 2 * NFSX_UNSIGNED
1183					+ NFSX_V3WRITEVERF);
1184				rlen = fxdr_unsigned(int, *tl++);
1185				if (rlen == 0) {
1186					error = NFSERR_IO;
1187					m_freem(mrep);
1188					break;
1189				} else if (rlen < len) {
1190					backup = len - rlen;
1191					uiop->uio_iov->iov_base -= backup;
1192					uiop->uio_iov->iov_len += backup;
1193					uiop->uio_offset -= backup;
1194					uiop->uio_resid += backup;
1195					len = rlen;
1196				}
1197				commit = fxdr_unsigned(int, *tl++);
1198
1199				/*
1200				 * Return the lowest committment level
1201				 * obtained by any of the RPCs.
1202				 */
1203				if (committed == NFSV3WRITE_FILESYNC)
1204					committed = commit;
1205				else if (committed == NFSV3WRITE_DATASYNC &&
1206					commit == NFSV3WRITE_UNSTABLE)
1207					committed = commit;
1208				if ((nmp->nm_state & NFSSTA_HASWRITEVERF) == 0){
1209				    bcopy((caddr_t)tl, (caddr_t)nmp->nm_verf,
1210					NFSX_V3WRITEVERF);
1211				    nmp->nm_state |= NFSSTA_HASWRITEVERF;
1212				} else if (bcmp((caddr_t)tl,
1213				    (caddr_t)nmp->nm_verf, NFSX_V3WRITEVERF)) {
1214				    *must_commit = 1;
1215				    bcopy((caddr_t)tl, (caddr_t)nmp->nm_verf,
1216					NFSX_V3WRITEVERF);
1217				}
1218			}
1219		} else
1220		    nfsm_loadattr(vp, (struct vattr *)0);
1221		if (wccflag)
1222		    VTONFS(vp)->n_mtime = VTONFS(vp)->n_vattr.va_mtime.tv_sec;
1223		m_freem(mrep);
1224		if (error)
1225			break;
1226		tsiz -= len;
1227	}
1228nfsmout:
1229	if (vp->v_mount->mnt_flag & MNT_ASYNC)
1230		committed = NFSV3WRITE_FILESYNC;
1231	*iomode = committed;
1232	if (error)
1233		uiop->uio_resid = tsiz;
1234	return (error);
1235}
1236
1237/*
1238 * nfs mknod rpc
1239 * For NFS v2 this is a kludge. Use a create rpc but with the IFMT bits of the
1240 * mode set to specify the file type and the size field for rdev.
1241 */
1242static int
1243nfs_mknodrpc(dvp, vpp, cnp, vap)
1244	register struct vnode *dvp;
1245	register struct vnode **vpp;
1246	register struct componentname *cnp;
1247	register struct vattr *vap;
1248{
1249	register struct nfsv2_sattr *sp;
1250	register u_int32_t *tl;
1251	register caddr_t cp;
1252	register int32_t t1, t2;
1253	struct vnode *newvp = (struct vnode *)0;
1254	struct nfsnode *np = (struct nfsnode *)0;
1255	struct vattr vattr;
1256	char *cp2;
1257	caddr_t bpos, dpos;
1258	int error = 0, wccflag = NFSV3_WCCRATTR, gotvp = 0;
1259	struct mbuf *mreq, *mrep, *md, *mb, *mb2;
1260	u_int32_t rdev;
1261	int v3 = NFS_ISV3(dvp);
1262
1263	if (vap->va_type == VCHR || vap->va_type == VBLK)
1264		rdev = txdr_unsigned(vap->va_rdev);
1265	else if (vap->va_type == VFIFO || vap->va_type == VSOCK)
1266		rdev = nfs_xdrneg1;
1267	else {
1268		return (EOPNOTSUPP);
1269	}
1270	if ((error = VOP_GETATTR(dvp, &vattr, cnp->cn_cred, cnp->cn_proc)) != 0) {
1271		return (error);
1272	}
1273	nfsstats.rpccnt[NFSPROC_MKNOD]++;
1274	nfsm_reqhead(dvp, NFSPROC_MKNOD, NFSX_FH(v3) + 4 * NFSX_UNSIGNED +
1275		+ nfsm_rndup(cnp->cn_namelen) + NFSX_SATTR(v3));
1276	nfsm_fhtom(dvp, v3);
1277	nfsm_strtom(cnp->cn_nameptr, cnp->cn_namelen, NFS_MAXNAMLEN);
1278	if (v3) {
1279		nfsm_build(tl, u_int32_t *, NFSX_UNSIGNED);
1280		*tl++ = vtonfsv3_type(vap->va_type);
1281		nfsm_v3attrbuild(vap, FALSE);
1282		if (vap->va_type == VCHR || vap->va_type == VBLK) {
1283			nfsm_build(tl, u_int32_t *, 2 * NFSX_UNSIGNED);
1284			*tl++ = txdr_unsigned(umajor(vap->va_rdev));
1285			*tl = txdr_unsigned(uminor(vap->va_rdev));
1286		}
1287	} else {
1288		nfsm_build(sp, struct nfsv2_sattr *, NFSX_V2SATTR);
1289		sp->sa_mode = vtonfsv2_mode(vap->va_type, vap->va_mode);
1290		sp->sa_uid = nfs_xdrneg1;
1291		sp->sa_gid = nfs_xdrneg1;
1292		sp->sa_size = rdev;
1293		txdr_nfsv2time(&vap->va_atime, &sp->sa_atime);
1294		txdr_nfsv2time(&vap->va_mtime, &sp->sa_mtime);
1295	}
1296	nfsm_request(dvp, NFSPROC_MKNOD, cnp->cn_proc, cnp->cn_cred);
1297	if (!error) {
1298		nfsm_mtofh(dvp, newvp, v3, gotvp);
1299		if (!gotvp) {
1300			if (newvp) {
1301				vput(newvp);
1302				newvp = (struct vnode *)0;
1303			}
1304			error = nfs_lookitup(dvp, cnp->cn_nameptr,
1305			    cnp->cn_namelen, cnp->cn_cred, cnp->cn_proc, &np);
1306			if (!error)
1307				newvp = NFSTOV(np);
1308		}
1309	}
1310	if (v3)
1311		nfsm_wcc_data(dvp, wccflag);
1312	nfsm_reqdone;
1313	if (error) {
1314		if (newvp)
1315			vput(newvp);
1316	} else {
1317		if (cnp->cn_flags & MAKEENTRY)
1318			cache_enter(dvp, newvp, cnp);
1319		*vpp = newvp;
1320	}
1321	VTONFS(dvp)->n_flag |= NMODIFIED;
1322	if (!wccflag)
1323		VTONFS(dvp)->n_attrstamp = 0;
1324	return (error);
1325}
1326
1327/*
1328 * nfs mknod vop
1329 * just call nfs_mknodrpc() to do the work.
1330 */
1331/* ARGSUSED */
1332static int
1333nfs_mknod(ap)
1334	struct vop_mknod_args /* {
1335		struct vnode *a_dvp;
1336		struct vnode **a_vpp;
1337		struct componentname *a_cnp;
1338		struct vattr *a_vap;
1339	} */ *ap;
1340{
1341	return nfs_mknodrpc(ap->a_dvp, ap->a_vpp, ap->a_cnp, ap->a_vap);
1342}
1343
1344static u_long create_verf;
1345/*
1346 * nfs file create call
1347 */
1348static int
1349nfs_create(ap)
1350	struct vop_create_args /* {
1351		struct vnode *a_dvp;
1352		struct vnode **a_vpp;
1353		struct componentname *a_cnp;
1354		struct vattr *a_vap;
1355	} */ *ap;
1356{
1357	register struct vnode *dvp = ap->a_dvp;
1358	register struct vattr *vap = ap->a_vap;
1359	register struct componentname *cnp = ap->a_cnp;
1360	register struct nfsv2_sattr *sp;
1361	register u_int32_t *tl;
1362	register caddr_t cp;
1363	register int32_t t1, t2;
1364	struct nfsnode *np = (struct nfsnode *)0;
1365	struct vnode *newvp = (struct vnode *)0;
1366	caddr_t bpos, dpos, cp2;
1367	int error = 0, wccflag = NFSV3_WCCRATTR, gotvp = 0, fmode = 0;
1368	struct mbuf *mreq, *mrep, *md, *mb, *mb2;
1369	struct vattr vattr;
1370	int v3 = NFS_ISV3(dvp);
1371
1372	/*
1373	 * Oops, not for me..
1374	 */
1375	if (vap->va_type == VSOCK)
1376		return (nfs_mknodrpc(dvp, ap->a_vpp, cnp, vap));
1377
1378	if ((error = VOP_GETATTR(dvp, &vattr, cnp->cn_cred, cnp->cn_proc)) != 0) {
1379		return (error);
1380	}
1381	if (vap->va_vaflags & VA_EXCLUSIVE)
1382		fmode |= O_EXCL;
1383again:
1384	nfsstats.rpccnt[NFSPROC_CREATE]++;
1385	nfsm_reqhead(dvp, NFSPROC_CREATE, NFSX_FH(v3) + 2 * NFSX_UNSIGNED +
1386		nfsm_rndup(cnp->cn_namelen) + NFSX_SATTR(v3));
1387	nfsm_fhtom(dvp, v3);
1388	nfsm_strtom(cnp->cn_nameptr, cnp->cn_namelen, NFS_MAXNAMLEN);
1389	if (v3) {
1390		nfsm_build(tl, u_int32_t *, NFSX_UNSIGNED);
1391		if (fmode & O_EXCL) {
1392			*tl = txdr_unsigned(NFSV3CREATE_EXCLUSIVE);
1393			nfsm_build(tl, u_int32_t *, NFSX_V3CREATEVERF);
1394#ifdef INET
1395			if (!TAILQ_EMPTY(&in_ifaddrhead))
1396				*tl++ = IA_SIN(in_ifaddrhead.tqh_first)->sin_addr.s_addr;
1397			else
1398#endif
1399				*tl++ = create_verf;
1400			*tl = ++create_verf;
1401		} else {
1402			*tl = txdr_unsigned(NFSV3CREATE_UNCHECKED);
1403			nfsm_v3attrbuild(vap, FALSE);
1404		}
1405	} else {
1406		nfsm_build(sp, struct nfsv2_sattr *, NFSX_V2SATTR);
1407		sp->sa_mode = vtonfsv2_mode(vap->va_type, vap->va_mode);
1408		sp->sa_uid = nfs_xdrneg1;
1409		sp->sa_gid = nfs_xdrneg1;
1410		sp->sa_size = 0;
1411		txdr_nfsv2time(&vap->va_atime, &sp->sa_atime);
1412		txdr_nfsv2time(&vap->va_mtime, &sp->sa_mtime);
1413	}
1414	nfsm_request(dvp, NFSPROC_CREATE, cnp->cn_proc, cnp->cn_cred);
1415	if (!error) {
1416		nfsm_mtofh(dvp, newvp, v3, gotvp);
1417		if (!gotvp) {
1418			if (newvp) {
1419				vput(newvp);
1420				newvp = (struct vnode *)0;
1421			}
1422			error = nfs_lookitup(dvp, cnp->cn_nameptr,
1423			    cnp->cn_namelen, cnp->cn_cred, cnp->cn_proc, &np);
1424			if (!error)
1425				newvp = NFSTOV(np);
1426		}
1427	}
1428	if (v3)
1429		nfsm_wcc_data(dvp, wccflag);
1430	nfsm_reqdone;
1431	if (error) {
1432		if (v3 && (fmode & O_EXCL) && error == NFSERR_NOTSUPP) {
1433			fmode &= ~O_EXCL;
1434			goto again;
1435		}
1436		if (newvp)
1437			vput(newvp);
1438	} else if (v3 && (fmode & O_EXCL))
1439		error = nfs_setattrrpc(newvp, vap, cnp->cn_cred, cnp->cn_proc);
1440	if (!error) {
1441		if (cnp->cn_flags & MAKEENTRY)
1442			cache_enter(dvp, newvp, cnp);
1443		*ap->a_vpp = newvp;
1444	}
1445	VTONFS(dvp)->n_flag |= NMODIFIED;
1446	if (!wccflag)
1447		VTONFS(dvp)->n_attrstamp = 0;
1448	return (error);
1449}
1450
1451/*
1452 * nfs file remove call
1453 * To try and make nfs semantics closer to ufs semantics, a file that has
1454 * other processes using the vnode is renamed instead of removed and then
1455 * removed later on the last close.
1456 * - If v_usecount > 1
1457 *	  If a rename is not already in the works
1458 *	     call nfs_sillyrename() to set it up
1459 *     else
1460 *	  do the remove rpc
1461 */
1462static int
1463nfs_remove(ap)
1464	struct vop_remove_args /* {
1465		struct vnodeop_desc *a_desc;
1466		struct vnode * a_dvp;
1467		struct vnode * a_vp;
1468		struct componentname * a_cnp;
1469	} */ *ap;
1470{
1471	register struct vnode *vp = ap->a_vp;
1472	register struct vnode *dvp = ap->a_dvp;
1473	register struct componentname *cnp = ap->a_cnp;
1474	register struct nfsnode *np = VTONFS(vp);
1475	int error = 0;
1476	struct vattr vattr;
1477
1478#ifndef DIAGNOSTIC
1479	if ((cnp->cn_flags & HASBUF) == 0)
1480		panic("nfs_remove: no name");
1481	if (vp->v_usecount < 1)
1482		panic("nfs_remove: bad v_usecount");
1483#endif
1484	if (vp->v_type == VDIR)
1485		error = EPERM;
1486	else if (vp->v_usecount == 1 || (np->n_sillyrename &&
1487	    VOP_GETATTR(vp, &vattr, cnp->cn_cred, cnp->cn_proc) == 0 &&
1488	    vattr.va_nlink > 1)) {
1489		/*
1490		 * Purge the name cache so that the chance of a lookup for
1491		 * the name succeeding while the remove is in progress is
1492		 * minimized. Without node locking it can still happen, such
1493		 * that an I/O op returns ESTALE, but since you get this if
1494		 * another host removes the file..
1495		 */
1496		cache_purge(vp);
1497		/*
1498		 * throw away biocache buffers, mainly to avoid
1499		 * unnecessary delayed writes later.
1500		 */
1501		error = nfs_vinvalbuf(vp, 0, cnp->cn_cred, cnp->cn_proc, 1);
1502		/* Do the rpc */
1503		if (error != EINTR)
1504			error = nfs_removerpc(dvp, cnp->cn_nameptr,
1505				cnp->cn_namelen, cnp->cn_cred, cnp->cn_proc);
1506		/*
1507		 * Kludge City: If the first reply to the remove rpc is lost..
1508		 *   the reply to the retransmitted request will be ENOENT
1509		 *   since the file was in fact removed
1510		 *   Therefore, we cheat and return success.
1511		 */
1512		if (error == ENOENT)
1513			error = 0;
1514	} else if (!np->n_sillyrename)
1515		error = nfs_sillyrename(dvp, vp, cnp);
1516	np->n_attrstamp = 0;
1517	return (error);
1518}
1519
1520/*
1521 * nfs file remove rpc called from nfs_inactive
1522 */
1523int
1524nfs_removeit(sp)
1525	register struct sillyrename *sp;
1526{
1527
1528	return (nfs_removerpc(sp->s_dvp, sp->s_name, sp->s_namlen, sp->s_cred,
1529		(struct proc *)0));
1530}
1531
1532/*
1533 * Nfs remove rpc, called from nfs_remove() and nfs_removeit().
1534 */
1535static int
1536nfs_removerpc(dvp, name, namelen, cred, proc)
1537	register struct vnode *dvp;
1538	const char *name;
1539	int namelen;
1540	struct ucred *cred;
1541	struct proc *proc;
1542{
1543	register u_int32_t *tl;
1544	register caddr_t cp;
1545	register int32_t t1, t2;
1546	caddr_t bpos, dpos, cp2;
1547	int error = 0, wccflag = NFSV3_WCCRATTR;
1548	struct mbuf *mreq, *mrep, *md, *mb, *mb2;
1549	int v3 = NFS_ISV3(dvp);
1550
1551	nfsstats.rpccnt[NFSPROC_REMOVE]++;
1552	nfsm_reqhead(dvp, NFSPROC_REMOVE,
1553		NFSX_FH(v3) + NFSX_UNSIGNED + nfsm_rndup(namelen));
1554	nfsm_fhtom(dvp, v3);
1555	nfsm_strtom(name, namelen, NFS_MAXNAMLEN);
1556	nfsm_request(dvp, NFSPROC_REMOVE, proc, cred);
1557	if (v3)
1558		nfsm_wcc_data(dvp, wccflag);
1559	nfsm_reqdone;
1560	VTONFS(dvp)->n_flag |= NMODIFIED;
1561	if (!wccflag)
1562		VTONFS(dvp)->n_attrstamp = 0;
1563	return (error);
1564}
1565
1566/*
1567 * nfs file rename call
1568 */
1569static int
1570nfs_rename(ap)
1571	struct vop_rename_args  /* {
1572		struct vnode *a_fdvp;
1573		struct vnode *a_fvp;
1574		struct componentname *a_fcnp;
1575		struct vnode *a_tdvp;
1576		struct vnode *a_tvp;
1577		struct componentname *a_tcnp;
1578	} */ *ap;
1579{
1580	register struct vnode *fvp = ap->a_fvp;
1581	register struct vnode *tvp = ap->a_tvp;
1582	register struct vnode *fdvp = ap->a_fdvp;
1583	register struct vnode *tdvp = ap->a_tdvp;
1584	register struct componentname *tcnp = ap->a_tcnp;
1585	register struct componentname *fcnp = ap->a_fcnp;
1586	int error;
1587
1588#ifndef DIAGNOSTIC
1589	if ((tcnp->cn_flags & HASBUF) == 0 ||
1590	    (fcnp->cn_flags & HASBUF) == 0)
1591		panic("nfs_rename: no name");
1592#endif
1593	/* Check for cross-device rename */
1594	if ((fvp->v_mount != tdvp->v_mount) ||
1595	    (tvp && (fvp->v_mount != tvp->v_mount))) {
1596		error = EXDEV;
1597		goto out;
1598	}
1599
1600	/*
1601	 * We have to flush B_DELWRI data prior to renaming
1602	 * the file.  If we don't, the delayed-write buffers
1603	 * can be flushed out later after the file has gone stale
1604	 * under NFSV3.  NFSV2 does not have this problem because
1605	 * ( as far as I can tell ) it flushes dirty buffers more
1606	 * often.
1607	 */
1608
1609	VOP_FSYNC(fvp, fcnp->cn_cred, MNT_WAIT, fcnp->cn_proc);
1610	if (tvp)
1611	    VOP_FSYNC(tvp, tcnp->cn_cred, MNT_WAIT, tcnp->cn_proc);
1612
1613	/*
1614	 * If the tvp exists and is in use, sillyrename it before doing the
1615	 * rename of the new file over it.
1616	 * XXX Can't sillyrename a directory.
1617	 */
1618	if (tvp && tvp->v_usecount > 1 && !VTONFS(tvp)->n_sillyrename &&
1619		tvp->v_type != VDIR && !nfs_sillyrename(tdvp, tvp, tcnp)) {
1620		vput(tvp);
1621		tvp = NULL;
1622	}
1623
1624	error = nfs_renamerpc(fdvp, fcnp->cn_nameptr, fcnp->cn_namelen,
1625		tdvp, tcnp->cn_nameptr, tcnp->cn_namelen, tcnp->cn_cred,
1626		tcnp->cn_proc);
1627
1628	if (fvp->v_type == VDIR) {
1629		if (tvp != NULL && tvp->v_type == VDIR)
1630			cache_purge(tdvp);
1631		cache_purge(fdvp);
1632	}
1633
1634out:
1635	if (tdvp == tvp)
1636		vrele(tdvp);
1637	else
1638		vput(tdvp);
1639	if (tvp)
1640		vput(tvp);
1641	vrele(fdvp);
1642	vrele(fvp);
1643	/*
1644	 * Kludge: Map ENOENT => 0 assuming that it is a reply to a retry.
1645	 */
1646	if (error == ENOENT)
1647		error = 0;
1648	return (error);
1649}
1650
1651/*
1652 * nfs file rename rpc called from nfs_remove() above
1653 */
1654static int
1655nfs_renameit(sdvp, scnp, sp)
1656	struct vnode *sdvp;
1657	struct componentname *scnp;
1658	register struct sillyrename *sp;
1659{
1660	return (nfs_renamerpc(sdvp, scnp->cn_nameptr, scnp->cn_namelen,
1661		sdvp, sp->s_name, sp->s_namlen, scnp->cn_cred, scnp->cn_proc));
1662}
1663
1664/*
1665 * Do an nfs rename rpc. Called from nfs_rename() and nfs_renameit().
1666 */
1667static int
1668nfs_renamerpc(fdvp, fnameptr, fnamelen, tdvp, tnameptr, tnamelen, cred, proc)
1669	register struct vnode *fdvp;
1670	const char *fnameptr;
1671	int fnamelen;
1672	register struct vnode *tdvp;
1673	const char *tnameptr;
1674	int tnamelen;
1675	struct ucred *cred;
1676	struct proc *proc;
1677{
1678	register u_int32_t *tl;
1679	register caddr_t cp;
1680	register int32_t t1, t2;
1681	caddr_t bpos, dpos, cp2;
1682	int error = 0, fwccflag = NFSV3_WCCRATTR, twccflag = NFSV3_WCCRATTR;
1683	struct mbuf *mreq, *mrep, *md, *mb, *mb2;
1684	int v3 = NFS_ISV3(fdvp);
1685
1686	nfsstats.rpccnt[NFSPROC_RENAME]++;
1687	nfsm_reqhead(fdvp, NFSPROC_RENAME,
1688		(NFSX_FH(v3) + NFSX_UNSIGNED)*2 + nfsm_rndup(fnamelen) +
1689		nfsm_rndup(tnamelen));
1690	nfsm_fhtom(fdvp, v3);
1691	nfsm_strtom(fnameptr, fnamelen, NFS_MAXNAMLEN);
1692	nfsm_fhtom(tdvp, v3);
1693	nfsm_strtom(tnameptr, tnamelen, NFS_MAXNAMLEN);
1694	nfsm_request(fdvp, NFSPROC_RENAME, proc, cred);
1695	if (v3) {
1696		nfsm_wcc_data(fdvp, fwccflag);
1697		nfsm_wcc_data(tdvp, twccflag);
1698	}
1699	nfsm_reqdone;
1700	VTONFS(fdvp)->n_flag |= NMODIFIED;
1701	VTONFS(tdvp)->n_flag |= NMODIFIED;
1702	if (!fwccflag)
1703		VTONFS(fdvp)->n_attrstamp = 0;
1704	if (!twccflag)
1705		VTONFS(tdvp)->n_attrstamp = 0;
1706	return (error);
1707}
1708
1709/*
1710 * nfs hard link create call
1711 */
1712static int
1713nfs_link(ap)
1714	struct vop_link_args /* {
1715		struct vnode *a_tdvp;
1716		struct vnode *a_vp;
1717		struct componentname *a_cnp;
1718	} */ *ap;
1719{
1720	register struct vnode *vp = ap->a_vp;
1721	register struct vnode *tdvp = ap->a_tdvp;
1722	register struct componentname *cnp = ap->a_cnp;
1723	register u_int32_t *tl;
1724	register caddr_t cp;
1725	register int32_t t1, t2;
1726	caddr_t bpos, dpos, cp2;
1727	int error = 0, wccflag = NFSV3_WCCRATTR, attrflag = 0;
1728	struct mbuf *mreq, *mrep, *md, *mb, *mb2;
1729	int v3;
1730
1731	if (vp->v_mount != tdvp->v_mount) {
1732		return (EXDEV);
1733	}
1734
1735	/*
1736	 * Push all writes to the server, so that the attribute cache
1737	 * doesn't get "out of sync" with the server.
1738	 * XXX There should be a better way!
1739	 */
1740	VOP_FSYNC(vp, cnp->cn_cred, MNT_WAIT, cnp->cn_proc);
1741
1742	v3 = NFS_ISV3(vp);
1743	nfsstats.rpccnt[NFSPROC_LINK]++;
1744	nfsm_reqhead(vp, NFSPROC_LINK,
1745		NFSX_FH(v3)*2 + NFSX_UNSIGNED + nfsm_rndup(cnp->cn_namelen));
1746	nfsm_fhtom(vp, v3);
1747	nfsm_fhtom(tdvp, v3);
1748	nfsm_strtom(cnp->cn_nameptr, cnp->cn_namelen, NFS_MAXNAMLEN);
1749	nfsm_request(vp, NFSPROC_LINK, cnp->cn_proc, cnp->cn_cred);
1750	if (v3) {
1751		nfsm_postop_attr(vp, attrflag);
1752		nfsm_wcc_data(tdvp, wccflag);
1753	}
1754	nfsm_reqdone;
1755	VTONFS(tdvp)->n_flag |= NMODIFIED;
1756	if (!attrflag)
1757		VTONFS(vp)->n_attrstamp = 0;
1758	if (!wccflag)
1759		VTONFS(tdvp)->n_attrstamp = 0;
1760	/*
1761	 * Kludge: Map EEXIST => 0 assuming that it is a reply to a retry.
1762	 */
1763	if (error == EEXIST)
1764		error = 0;
1765	return (error);
1766}
1767
1768/*
1769 * nfs symbolic link create call
1770 */
1771static int
1772nfs_symlink(ap)
1773	struct vop_symlink_args /* {
1774		struct vnode *a_dvp;
1775		struct vnode **a_vpp;
1776		struct componentname *a_cnp;
1777		struct vattr *a_vap;
1778		char *a_target;
1779	} */ *ap;
1780{
1781	register struct vnode *dvp = ap->a_dvp;
1782	register struct vattr *vap = ap->a_vap;
1783	register struct componentname *cnp = ap->a_cnp;
1784	register struct nfsv2_sattr *sp;
1785	register u_int32_t *tl;
1786	register caddr_t cp;
1787	register int32_t t1, t2;
1788	caddr_t bpos, dpos, cp2;
1789	int slen, error = 0, wccflag = NFSV3_WCCRATTR, gotvp;
1790	struct mbuf *mreq, *mrep, *md, *mb, *mb2;
1791	struct vnode *newvp = (struct vnode *)0;
1792	int v3 = NFS_ISV3(dvp);
1793
1794	nfsstats.rpccnt[NFSPROC_SYMLINK]++;
1795	slen = strlen(ap->a_target);
1796	nfsm_reqhead(dvp, NFSPROC_SYMLINK, NFSX_FH(v3) + 2*NFSX_UNSIGNED +
1797	    nfsm_rndup(cnp->cn_namelen) + nfsm_rndup(slen) + NFSX_SATTR(v3));
1798	nfsm_fhtom(dvp, v3);
1799	nfsm_strtom(cnp->cn_nameptr, cnp->cn_namelen, NFS_MAXNAMLEN);
1800	if (v3) {
1801		nfsm_v3attrbuild(vap, FALSE);
1802	}
1803	nfsm_strtom(ap->a_target, slen, NFS_MAXPATHLEN);
1804	if (!v3) {
1805		nfsm_build(sp, struct nfsv2_sattr *, NFSX_V2SATTR);
1806		sp->sa_mode = vtonfsv2_mode(VLNK, vap->va_mode);
1807		sp->sa_uid = nfs_xdrneg1;
1808		sp->sa_gid = nfs_xdrneg1;
1809		sp->sa_size = nfs_xdrneg1;
1810		txdr_nfsv2time(&vap->va_atime, &sp->sa_atime);
1811		txdr_nfsv2time(&vap->va_mtime, &sp->sa_mtime);
1812	}
1813
1814	/*
1815	 * Issue the NFS request and get the rpc response.
1816	 *
1817	 * Only NFSv3 responses returning an error of 0 actually return
1818	 * a file handle that can be converted into newvp without having
1819	 * to do an extra lookup rpc.
1820	 */
1821	nfsm_request(dvp, NFSPROC_SYMLINK, cnp->cn_proc, cnp->cn_cred);
1822	if (v3) {
1823		if (error == 0)
1824			nfsm_mtofh(dvp, newvp, v3, gotvp);
1825		nfsm_wcc_data(dvp, wccflag);
1826	}
1827
1828	/*
1829	 * out code jumps -> here, mrep is also freed.
1830	 */
1831
1832	nfsm_reqdone;
1833
1834	/*
1835	 * If we get an EEXIST error, silently convert it to no-error
1836	 * in case of an NFS retry.
1837	 */
1838	if (error == EEXIST)
1839		error = 0;
1840
1841	/*
1842	 * If we do not have (or no longer have) an error, and we could
1843	 * not extract the newvp from the response due to the request being
1844	 * NFSv2 or the error being EEXIST.  We have to do a lookup in order
1845	 * to obtain a newvp to return.
1846	 */
1847	if (error == 0 && newvp == NULL) {
1848		struct nfsnode *np = NULL;
1849
1850		error = nfs_lookitup(dvp, cnp->cn_nameptr, cnp->cn_namelen,
1851		    cnp->cn_cred, cnp->cn_proc, &np);
1852		if (!error)
1853			newvp = NFSTOV(np);
1854	}
1855	if (error) {
1856		if (newvp)
1857			vput(newvp);
1858	} else {
1859		*ap->a_vpp = newvp;
1860	}
1861	VTONFS(dvp)->n_flag |= NMODIFIED;
1862	if (!wccflag)
1863		VTONFS(dvp)->n_attrstamp = 0;
1864	return (error);
1865}
1866
1867/*
1868 * nfs make dir call
1869 */
1870static int
1871nfs_mkdir(ap)
1872	struct vop_mkdir_args /* {
1873		struct vnode *a_dvp;
1874		struct vnode **a_vpp;
1875		struct componentname *a_cnp;
1876		struct vattr *a_vap;
1877	} */ *ap;
1878{
1879	register struct vnode *dvp = ap->a_dvp;
1880	register struct vattr *vap = ap->a_vap;
1881	register struct componentname *cnp = ap->a_cnp;
1882	register struct nfsv2_sattr *sp;
1883	register u_int32_t *tl;
1884	register caddr_t cp;
1885	register int32_t t1, t2;
1886	register int len;
1887	struct nfsnode *np = (struct nfsnode *)0;
1888	struct vnode *newvp = (struct vnode *)0;
1889	caddr_t bpos, dpos, cp2;
1890	int error = 0, wccflag = NFSV3_WCCRATTR;
1891	int gotvp = 0;
1892	struct mbuf *mreq, *mrep, *md, *mb, *mb2;
1893	struct vattr vattr;
1894	int v3 = NFS_ISV3(dvp);
1895
1896	if ((error = VOP_GETATTR(dvp, &vattr, cnp->cn_cred, cnp->cn_proc)) != 0) {
1897		return (error);
1898	}
1899	len = cnp->cn_namelen;
1900	nfsstats.rpccnt[NFSPROC_MKDIR]++;
1901	nfsm_reqhead(dvp, NFSPROC_MKDIR,
1902	  NFSX_FH(v3) + NFSX_UNSIGNED + nfsm_rndup(len) + NFSX_SATTR(v3));
1903	nfsm_fhtom(dvp, v3);
1904	nfsm_strtom(cnp->cn_nameptr, len, NFS_MAXNAMLEN);
1905	if (v3) {
1906		nfsm_v3attrbuild(vap, FALSE);
1907	} else {
1908		nfsm_build(sp, struct nfsv2_sattr *, NFSX_V2SATTR);
1909		sp->sa_mode = vtonfsv2_mode(VDIR, vap->va_mode);
1910		sp->sa_uid = nfs_xdrneg1;
1911		sp->sa_gid = nfs_xdrneg1;
1912		sp->sa_size = nfs_xdrneg1;
1913		txdr_nfsv2time(&vap->va_atime, &sp->sa_atime);
1914		txdr_nfsv2time(&vap->va_mtime, &sp->sa_mtime);
1915	}
1916	nfsm_request(dvp, NFSPROC_MKDIR, cnp->cn_proc, cnp->cn_cred);
1917	if (!error)
1918		nfsm_mtofh(dvp, newvp, v3, gotvp);
1919	if (v3)
1920		nfsm_wcc_data(dvp, wccflag);
1921	nfsm_reqdone;
1922	VTONFS(dvp)->n_flag |= NMODIFIED;
1923	if (!wccflag)
1924		VTONFS(dvp)->n_attrstamp = 0;
1925	/*
1926	 * Kludge: Map EEXIST => 0 assuming that you have a reply to a retry
1927	 * if we can succeed in looking up the directory.
1928	 */
1929	if (error == EEXIST || (!error && !gotvp)) {
1930		if (newvp) {
1931			vrele(newvp);
1932			newvp = (struct vnode *)0;
1933		}
1934		error = nfs_lookitup(dvp, cnp->cn_nameptr, len, cnp->cn_cred,
1935			cnp->cn_proc, &np);
1936		if (!error) {
1937			newvp = NFSTOV(np);
1938			if (newvp->v_type != VDIR)
1939				error = EEXIST;
1940		}
1941	}
1942	if (error) {
1943		if (newvp)
1944			vrele(newvp);
1945	} else
1946		*ap->a_vpp = newvp;
1947	return (error);
1948}
1949
1950/*
1951 * nfs remove directory call
1952 */
1953static int
1954nfs_rmdir(ap)
1955	struct vop_rmdir_args /* {
1956		struct vnode *a_dvp;
1957		struct vnode *a_vp;
1958		struct componentname *a_cnp;
1959	} */ *ap;
1960{
1961	register struct vnode *vp = ap->a_vp;
1962	register struct vnode *dvp = ap->a_dvp;
1963	register struct componentname *cnp = ap->a_cnp;
1964	register u_int32_t *tl;
1965	register caddr_t cp;
1966	register int32_t t1, t2;
1967	caddr_t bpos, dpos, cp2;
1968	int error = 0, wccflag = NFSV3_WCCRATTR;
1969	struct mbuf *mreq, *mrep, *md, *mb, *mb2;
1970	int v3 = NFS_ISV3(dvp);
1971
1972	if (dvp == vp)
1973		return (EINVAL);
1974	nfsstats.rpccnt[NFSPROC_RMDIR]++;
1975	nfsm_reqhead(dvp, NFSPROC_RMDIR,
1976		NFSX_FH(v3) + NFSX_UNSIGNED + nfsm_rndup(cnp->cn_namelen));
1977	nfsm_fhtom(dvp, v3);
1978	nfsm_strtom(cnp->cn_nameptr, cnp->cn_namelen, NFS_MAXNAMLEN);
1979	nfsm_request(dvp, NFSPROC_RMDIR, cnp->cn_proc, cnp->cn_cred);
1980	if (v3)
1981		nfsm_wcc_data(dvp, wccflag);
1982	nfsm_reqdone;
1983	VTONFS(dvp)->n_flag |= NMODIFIED;
1984	if (!wccflag)
1985		VTONFS(dvp)->n_attrstamp = 0;
1986	cache_purge(dvp);
1987	cache_purge(vp);
1988	/*
1989	 * Kludge: Map ENOENT => 0 assuming that you have a reply to a retry.
1990	 */
1991	if (error == ENOENT)
1992		error = 0;
1993	return (error);
1994}
1995
1996/*
1997 * nfs readdir call
1998 */
1999static int
2000nfs_readdir(ap)
2001	struct vop_readdir_args /* {
2002		struct vnode *a_vp;
2003		struct uio *a_uio;
2004		struct ucred *a_cred;
2005	} */ *ap;
2006{
2007	register struct vnode *vp = ap->a_vp;
2008	register struct nfsnode *np = VTONFS(vp);
2009	register struct uio *uio = ap->a_uio;
2010	int tresid, error;
2011	struct vattr vattr;
2012
2013	if (vp->v_type != VDIR)
2014		return (EPERM);
2015	/*
2016	 * First, check for hit on the EOF offset cache
2017	 */
2018	if (np->n_direofoffset > 0 && uio->uio_offset >= np->n_direofoffset &&
2019	    (np->n_flag & NMODIFIED) == 0) {
2020		if (VFSTONFS(vp->v_mount)->nm_flag & NFSMNT_NQNFS) {
2021			if (NQNFS_CKCACHABLE(vp, ND_READ)) {
2022				nfsstats.direofcache_hits++;
2023				return (0);
2024			}
2025		} else if (VOP_GETATTR(vp, &vattr, ap->a_cred, uio->uio_procp) == 0 &&
2026			np->n_mtime == vattr.va_mtime.tv_sec) {
2027			nfsstats.direofcache_hits++;
2028			return (0);
2029		}
2030	}
2031
2032	/*
2033	 * Call nfs_bioread() to do the real work.
2034	 */
2035	tresid = uio->uio_resid;
2036	error = nfs_bioread(vp, uio, 0, ap->a_cred);
2037
2038	if (!error && uio->uio_resid == tresid)
2039		nfsstats.direofcache_misses++;
2040	return (error);
2041}
2042
2043/*
2044 * Readdir rpc call.
2045 * Called from below the buffer cache by nfs_doio().
2046 */
2047int
2048nfs_readdirrpc(vp, uiop, cred)
2049	struct vnode *vp;
2050	register struct uio *uiop;
2051	struct ucred *cred;
2052
2053{
2054	register int len, left;
2055	register struct dirent *dp = NULL;
2056	register u_int32_t *tl;
2057	register caddr_t cp;
2058	register int32_t t1, t2;
2059	register nfsuint64 *cookiep;
2060	caddr_t bpos, dpos, cp2;
2061	struct mbuf *mreq, *mrep, *md, *mb, *mb2;
2062	nfsuint64 cookie;
2063	struct nfsmount *nmp = VFSTONFS(vp->v_mount);
2064	struct nfsnode *dnp = VTONFS(vp);
2065	u_quad_t fileno;
2066	int error = 0, tlen, more_dirs = 1, blksiz = 0, bigenough = 1;
2067	int attrflag;
2068	int v3 = NFS_ISV3(vp);
2069
2070#ifndef DIAGNOSTIC
2071	if (uiop->uio_iovcnt != 1 || (uiop->uio_offset & (DIRBLKSIZ - 1)) ||
2072		(uiop->uio_resid & (DIRBLKSIZ - 1)))
2073		panic("nfs readdirrpc bad uio");
2074#endif
2075
2076	/*
2077	 * If there is no cookie, assume directory was stale.
2078	 */
2079	cookiep = nfs_getcookie(dnp, uiop->uio_offset, 0);
2080	if (cookiep)
2081		cookie = *cookiep;
2082	else
2083		return (NFSERR_BAD_COOKIE);
2084	/*
2085	 * Loop around doing readdir rpc's of size nm_readdirsize
2086	 * truncated to a multiple of DIRBLKSIZ.
2087	 * The stopping criteria is EOF or buffer full.
2088	 */
2089	while (more_dirs && bigenough) {
2090		nfsstats.rpccnt[NFSPROC_READDIR]++;
2091		nfsm_reqhead(vp, NFSPROC_READDIR, NFSX_FH(v3) +
2092			NFSX_READDIR(v3));
2093		nfsm_fhtom(vp, v3);
2094		if (v3) {
2095			nfsm_build(tl, u_int32_t *, 5 * NFSX_UNSIGNED);
2096			*tl++ = cookie.nfsuquad[0];
2097			*tl++ = cookie.nfsuquad[1];
2098			*tl++ = dnp->n_cookieverf.nfsuquad[0];
2099			*tl++ = dnp->n_cookieverf.nfsuquad[1];
2100		} else {
2101			nfsm_build(tl, u_int32_t *, 2 * NFSX_UNSIGNED);
2102			*tl++ = cookie.nfsuquad[0];
2103		}
2104		*tl = txdr_unsigned(nmp->nm_readdirsize);
2105		nfsm_request(vp, NFSPROC_READDIR, uiop->uio_procp, cred);
2106		if (v3) {
2107			nfsm_postop_attr(vp, attrflag);
2108			if (!error) {
2109				nfsm_dissect(tl, u_int32_t *,
2110				    2 * NFSX_UNSIGNED);
2111				dnp->n_cookieverf.nfsuquad[0] = *tl++;
2112				dnp->n_cookieverf.nfsuquad[1] = *tl;
2113			} else {
2114				m_freem(mrep);
2115				goto nfsmout;
2116			}
2117		}
2118		nfsm_dissect(tl, u_int32_t *, NFSX_UNSIGNED);
2119		more_dirs = fxdr_unsigned(int, *tl);
2120
2121		/* loop thru the dir entries, doctoring them to 4bsd form */
2122		while (more_dirs && bigenough) {
2123			if (v3) {
2124				nfsm_dissect(tl, u_int32_t *,
2125				    3 * NFSX_UNSIGNED);
2126				fileno = fxdr_hyper(tl);
2127				len = fxdr_unsigned(int, *(tl + 2));
2128			} else {
2129				nfsm_dissect(tl, u_int32_t *,
2130				    2 * NFSX_UNSIGNED);
2131				fileno = fxdr_unsigned(u_quad_t, *tl++);
2132				len = fxdr_unsigned(int, *tl);
2133			}
2134			if (len <= 0 || len > NFS_MAXNAMLEN) {
2135				error = EBADRPC;
2136				m_freem(mrep);
2137				goto nfsmout;
2138			}
2139			tlen = nfsm_rndup(len);
2140			if (tlen == len)
2141				tlen += 4;	/* To ensure null termination */
2142			left = DIRBLKSIZ - blksiz;
2143			if ((tlen + DIRHDSIZ) > left) {
2144				dp->d_reclen += left;
2145				uiop->uio_iov->iov_base += left;
2146				uiop->uio_iov->iov_len -= left;
2147				uiop->uio_offset += left;
2148				uiop->uio_resid -= left;
2149				blksiz = 0;
2150			}
2151			if ((tlen + DIRHDSIZ) > uiop->uio_resid)
2152				bigenough = 0;
2153			if (bigenough) {
2154				dp = (struct dirent *)uiop->uio_iov->iov_base;
2155				dp->d_fileno = (int)fileno;
2156				dp->d_namlen = len;
2157				dp->d_reclen = tlen + DIRHDSIZ;
2158				dp->d_type = DT_UNKNOWN;
2159				blksiz += dp->d_reclen;
2160				if (blksiz == DIRBLKSIZ)
2161					blksiz = 0;
2162				uiop->uio_offset += DIRHDSIZ;
2163				uiop->uio_resid -= DIRHDSIZ;
2164				uiop->uio_iov->iov_base += DIRHDSIZ;
2165				uiop->uio_iov->iov_len -= DIRHDSIZ;
2166				nfsm_mtouio(uiop, len);
2167				cp = uiop->uio_iov->iov_base;
2168				tlen -= len;
2169				*cp = '\0';	/* null terminate */
2170				uiop->uio_iov->iov_base += tlen;
2171				uiop->uio_iov->iov_len -= tlen;
2172				uiop->uio_offset += tlen;
2173				uiop->uio_resid -= tlen;
2174			} else
2175				nfsm_adv(nfsm_rndup(len));
2176			if (v3) {
2177				nfsm_dissect(tl, u_int32_t *,
2178				    3 * NFSX_UNSIGNED);
2179			} else {
2180				nfsm_dissect(tl, u_int32_t *,
2181				    2 * NFSX_UNSIGNED);
2182			}
2183			if (bigenough) {
2184				cookie.nfsuquad[0] = *tl++;
2185				if (v3)
2186					cookie.nfsuquad[1] = *tl++;
2187			} else if (v3)
2188				tl += 2;
2189			else
2190				tl++;
2191			more_dirs = fxdr_unsigned(int, *tl);
2192		}
2193		/*
2194		 * If at end of rpc data, get the eof boolean
2195		 */
2196		if (!more_dirs) {
2197			nfsm_dissect(tl, u_int32_t *, NFSX_UNSIGNED);
2198			more_dirs = (fxdr_unsigned(int, *tl) == 0);
2199		}
2200		m_freem(mrep);
2201	}
2202	/*
2203	 * Fill last record, iff any, out to a multiple of DIRBLKSIZ
2204	 * by increasing d_reclen for the last record.
2205	 */
2206	if (blksiz > 0) {
2207		left = DIRBLKSIZ - blksiz;
2208		dp->d_reclen += left;
2209		uiop->uio_iov->iov_base += left;
2210		uiop->uio_iov->iov_len -= left;
2211		uiop->uio_offset += left;
2212		uiop->uio_resid -= left;
2213	}
2214
2215	/*
2216	 * We are now either at the end of the directory or have filled the
2217	 * block.
2218	 */
2219	if (bigenough)
2220		dnp->n_direofoffset = uiop->uio_offset;
2221	else {
2222		if (uiop->uio_resid > 0)
2223			printf("EEK! readdirrpc resid > 0\n");
2224		cookiep = nfs_getcookie(dnp, uiop->uio_offset, 1);
2225		*cookiep = cookie;
2226	}
2227nfsmout:
2228	return (error);
2229}
2230
2231/*
2232 * NFS V3 readdir plus RPC. Used in place of nfs_readdirrpc().
2233 */
2234int
2235nfs_readdirplusrpc(vp, uiop, cred)
2236	struct vnode *vp;
2237	register struct uio *uiop;
2238	struct ucred *cred;
2239{
2240	register int len, left;
2241	register struct dirent *dp;
2242	register u_int32_t *tl;
2243	register caddr_t cp;
2244	register int32_t t1, t2;
2245	register struct vnode *newvp;
2246	register nfsuint64 *cookiep;
2247	caddr_t bpos, dpos, cp2, dpossav1, dpossav2;
2248	struct mbuf *mreq, *mrep, *md, *mb, *mb2, *mdsav1, *mdsav2;
2249	struct nameidata nami, *ndp = &nami;
2250	struct componentname *cnp = &ndp->ni_cnd;
2251	nfsuint64 cookie;
2252	struct nfsmount *nmp = VFSTONFS(vp->v_mount);
2253	struct nfsnode *dnp = VTONFS(vp), *np;
2254	nfsfh_t *fhp;
2255	u_quad_t fileno;
2256	int error = 0, tlen, more_dirs = 1, blksiz = 0, doit, bigenough = 1, i;
2257	int attrflag, fhsize;
2258
2259#ifndef nolint
2260	dp = (struct dirent *)0;
2261#endif
2262#ifndef DIAGNOSTIC
2263	if (uiop->uio_iovcnt != 1 || (uiop->uio_offset & (DIRBLKSIZ - 1)) ||
2264		(uiop->uio_resid & (DIRBLKSIZ - 1)))
2265		panic("nfs readdirplusrpc bad uio");
2266#endif
2267	ndp->ni_dvp = vp;
2268	newvp = NULLVP;
2269
2270	/*
2271	 * If there is no cookie, assume directory was stale.
2272	 */
2273	cookiep = nfs_getcookie(dnp, uiop->uio_offset, 0);
2274	if (cookiep)
2275		cookie = *cookiep;
2276	else
2277		return (NFSERR_BAD_COOKIE);
2278	/*
2279	 * Loop around doing readdir rpc's of size nm_readdirsize
2280	 * truncated to a multiple of DIRBLKSIZ.
2281	 * The stopping criteria is EOF or buffer full.
2282	 */
2283	while (more_dirs && bigenough) {
2284		nfsstats.rpccnt[NFSPROC_READDIRPLUS]++;
2285		nfsm_reqhead(vp, NFSPROC_READDIRPLUS,
2286			NFSX_FH(1) + 6 * NFSX_UNSIGNED);
2287		nfsm_fhtom(vp, 1);
2288 		nfsm_build(tl, u_int32_t *, 6 * NFSX_UNSIGNED);
2289		*tl++ = cookie.nfsuquad[0];
2290		*tl++ = cookie.nfsuquad[1];
2291		*tl++ = dnp->n_cookieverf.nfsuquad[0];
2292		*tl++ = dnp->n_cookieverf.nfsuquad[1];
2293		*tl++ = txdr_unsigned(nmp->nm_readdirsize);
2294		*tl = txdr_unsigned(nmp->nm_rsize);
2295		nfsm_request(vp, NFSPROC_READDIRPLUS, uiop->uio_procp, cred);
2296		nfsm_postop_attr(vp, attrflag);
2297		if (error) {
2298			m_freem(mrep);
2299			goto nfsmout;
2300		}
2301		nfsm_dissect(tl, u_int32_t *, 3 * NFSX_UNSIGNED);
2302		dnp->n_cookieverf.nfsuquad[0] = *tl++;
2303		dnp->n_cookieverf.nfsuquad[1] = *tl++;
2304		more_dirs = fxdr_unsigned(int, *tl);
2305
2306		/* loop thru the dir entries, doctoring them to 4bsd form */
2307		while (more_dirs && bigenough) {
2308			nfsm_dissect(tl, u_int32_t *, 3 * NFSX_UNSIGNED);
2309			fileno = fxdr_hyper(tl);
2310			len = fxdr_unsigned(int, *(tl + 2));
2311			if (len <= 0 || len > NFS_MAXNAMLEN) {
2312				error = EBADRPC;
2313				m_freem(mrep);
2314				goto nfsmout;
2315			}
2316			tlen = nfsm_rndup(len);
2317			if (tlen == len)
2318				tlen += 4;	/* To ensure null termination*/
2319			left = DIRBLKSIZ - blksiz;
2320			if ((tlen + DIRHDSIZ) > left) {
2321				dp->d_reclen += left;
2322				uiop->uio_iov->iov_base += left;
2323				uiop->uio_iov->iov_len -= left;
2324				uiop->uio_offset += left;
2325				uiop->uio_resid -= left;
2326				blksiz = 0;
2327			}
2328			if ((tlen + DIRHDSIZ) > uiop->uio_resid)
2329				bigenough = 0;
2330			if (bigenough) {
2331				dp = (struct dirent *)uiop->uio_iov->iov_base;
2332				dp->d_fileno = (int)fileno;
2333				dp->d_namlen = len;
2334				dp->d_reclen = tlen + DIRHDSIZ;
2335				dp->d_type = DT_UNKNOWN;
2336				blksiz += dp->d_reclen;
2337				if (blksiz == DIRBLKSIZ)
2338					blksiz = 0;
2339				uiop->uio_offset += DIRHDSIZ;
2340				uiop->uio_resid -= DIRHDSIZ;
2341				uiop->uio_iov->iov_base += DIRHDSIZ;
2342				uiop->uio_iov->iov_len -= DIRHDSIZ;
2343				cnp->cn_nameptr = uiop->uio_iov->iov_base;
2344				cnp->cn_namelen = len;
2345				nfsm_mtouio(uiop, len);
2346				cp = uiop->uio_iov->iov_base;
2347				tlen -= len;
2348				*cp = '\0';
2349				uiop->uio_iov->iov_base += tlen;
2350				uiop->uio_iov->iov_len -= tlen;
2351				uiop->uio_offset += tlen;
2352				uiop->uio_resid -= tlen;
2353			} else
2354				nfsm_adv(nfsm_rndup(len));
2355			nfsm_dissect(tl, u_int32_t *, 3 * NFSX_UNSIGNED);
2356			if (bigenough) {
2357				cookie.nfsuquad[0] = *tl++;
2358				cookie.nfsuquad[1] = *tl++;
2359			} else
2360				tl += 2;
2361
2362			/*
2363			 * Since the attributes are before the file handle
2364			 * (sigh), we must skip over the attributes and then
2365			 * come back and get them.
2366			 */
2367			attrflag = fxdr_unsigned(int, *tl);
2368			if (attrflag) {
2369			    dpossav1 = dpos;
2370			    mdsav1 = md;
2371			    nfsm_adv(NFSX_V3FATTR);
2372			    nfsm_dissect(tl, u_int32_t *, NFSX_UNSIGNED);
2373			    doit = fxdr_unsigned(int, *tl);
2374			    if (doit) {
2375				nfsm_getfh(fhp, fhsize, 1);
2376				if (NFS_CMPFH(dnp, fhp, fhsize)) {
2377				    VREF(vp);
2378				    newvp = vp;
2379				    np = dnp;
2380				} else {
2381				    error = nfs_nget(vp->v_mount, fhp,
2382					fhsize, &np);
2383				    if (error)
2384					doit = 0;
2385				    else
2386					newvp = NFSTOV(np);
2387				}
2388			    }
2389			    if (doit && bigenough) {
2390				dpossav2 = dpos;
2391				dpos = dpossav1;
2392				mdsav2 = md;
2393				md = mdsav1;
2394				nfsm_loadattr(newvp, (struct vattr *)0);
2395				dpos = dpossav2;
2396				md = mdsav2;
2397				dp->d_type =
2398				    IFTODT(VTTOIF(np->n_vattr.va_type));
2399				ndp->ni_vp = newvp;
2400			        cache_enter(ndp->ni_dvp, ndp->ni_vp, cnp);
2401			    }
2402			} else {
2403			    /* Just skip over the file handle */
2404			    nfsm_dissect(tl, u_int32_t *, NFSX_UNSIGNED);
2405			    i = fxdr_unsigned(int, *tl);
2406			    nfsm_adv(nfsm_rndup(i));
2407			}
2408			if (newvp != NULLVP) {
2409			    if (newvp == vp)
2410				vrele(newvp);
2411			    else
2412				vput(newvp);
2413			    newvp = NULLVP;
2414			}
2415			nfsm_dissect(tl, u_int32_t *, NFSX_UNSIGNED);
2416			more_dirs = fxdr_unsigned(int, *tl);
2417		}
2418		/*
2419		 * If at end of rpc data, get the eof boolean
2420		 */
2421		if (!more_dirs) {
2422			nfsm_dissect(tl, u_int32_t *, NFSX_UNSIGNED);
2423			more_dirs = (fxdr_unsigned(int, *tl) == 0);
2424		}
2425		m_freem(mrep);
2426	}
2427	/*
2428	 * Fill last record, iff any, out to a multiple of DIRBLKSIZ
2429	 * by increasing d_reclen for the last record.
2430	 */
2431	if (blksiz > 0) {
2432		left = DIRBLKSIZ - blksiz;
2433		dp->d_reclen += left;
2434		uiop->uio_iov->iov_base += left;
2435		uiop->uio_iov->iov_len -= left;
2436		uiop->uio_offset += left;
2437		uiop->uio_resid -= left;
2438	}
2439
2440	/*
2441	 * We are now either at the end of the directory or have filled the
2442	 * block.
2443	 */
2444	if (bigenough)
2445		dnp->n_direofoffset = uiop->uio_offset;
2446	else {
2447		if (uiop->uio_resid > 0)
2448			printf("EEK! readdirplusrpc resid > 0\n");
2449		cookiep = nfs_getcookie(dnp, uiop->uio_offset, 1);
2450		*cookiep = cookie;
2451	}
2452nfsmout:
2453	if (newvp != NULLVP) {
2454	        if (newvp == vp)
2455			vrele(newvp);
2456		else
2457			vput(newvp);
2458		newvp = NULLVP;
2459	}
2460	return (error);
2461}
2462
2463/*
2464 * Silly rename. To make the NFS filesystem that is stateless look a little
2465 * more like the "ufs" a remove of an active vnode is translated to a rename
2466 * to a funny looking filename that is removed by nfs_inactive on the
2467 * nfsnode. There is the potential for another process on a different client
2468 * to create the same funny name between the nfs_lookitup() fails and the
2469 * nfs_rename() completes, but...
2470 */
2471static int
2472nfs_sillyrename(dvp, vp, cnp)
2473	struct vnode *dvp, *vp;
2474	struct componentname *cnp;
2475{
2476	register struct sillyrename *sp;
2477	struct nfsnode *np;
2478	int error;
2479	short pid;
2480
2481	cache_purge(dvp);
2482	np = VTONFS(vp);
2483#ifndef DIAGNOSTIC
2484	if (vp->v_type == VDIR)
2485		panic("nfs: sillyrename dir");
2486#endif
2487	MALLOC(sp, struct sillyrename *, sizeof (struct sillyrename),
2488		M_NFSREQ, M_WAITOK);
2489	sp->s_cred = crdup(cnp->cn_cred);
2490	sp->s_dvp = dvp;
2491	VREF(dvp);
2492
2493	/* Fudge together a funny name */
2494	pid = cnp->cn_proc->p_pid;
2495	sp->s_namlen = sprintf(sp->s_name, ".nfsA%04x4.4", pid);
2496
2497	/* Try lookitups until we get one that isn't there */
2498	while (nfs_lookitup(dvp, sp->s_name, sp->s_namlen, sp->s_cred,
2499		cnp->cn_proc, (struct nfsnode **)0) == 0) {
2500		sp->s_name[4]++;
2501		if (sp->s_name[4] > 'z') {
2502			error = EINVAL;
2503			goto bad;
2504		}
2505	}
2506	error = nfs_renameit(dvp, cnp, sp);
2507	if (error)
2508		goto bad;
2509	error = nfs_lookitup(dvp, sp->s_name, sp->s_namlen, sp->s_cred,
2510		cnp->cn_proc, &np);
2511	np->n_sillyrename = sp;
2512	return (0);
2513bad:
2514	vrele(sp->s_dvp);
2515	crfree(sp->s_cred);
2516	free((caddr_t)sp, M_NFSREQ);
2517	return (error);
2518}
2519
2520/*
2521 * Look up a file name and optionally either update the file handle or
2522 * allocate an nfsnode, depending on the value of npp.
2523 * npp == NULL	--> just do the lookup
2524 * *npp == NULL --> allocate a new nfsnode and make sure attributes are
2525 *			handled too
2526 * *npp != NULL --> update the file handle in the vnode
2527 */
2528static int
2529nfs_lookitup(dvp, name, len, cred, procp, npp)
2530	register struct vnode *dvp;
2531	const char *name;
2532	int len;
2533	struct ucred *cred;
2534	struct proc *procp;
2535	struct nfsnode **npp;
2536{
2537	register u_int32_t *tl;
2538	register caddr_t cp;
2539	register int32_t t1, t2;
2540	struct vnode *newvp = (struct vnode *)0;
2541	struct nfsnode *np, *dnp = VTONFS(dvp);
2542	caddr_t bpos, dpos, cp2;
2543	int error = 0, fhlen, attrflag;
2544	struct mbuf *mreq, *mrep, *md, *mb, *mb2;
2545	nfsfh_t *nfhp;
2546	int v3 = NFS_ISV3(dvp);
2547
2548	nfsstats.rpccnt[NFSPROC_LOOKUP]++;
2549	nfsm_reqhead(dvp, NFSPROC_LOOKUP,
2550		NFSX_FH(v3) + NFSX_UNSIGNED + nfsm_rndup(len));
2551	nfsm_fhtom(dvp, v3);
2552	nfsm_strtom(name, len, NFS_MAXNAMLEN);
2553	nfsm_request(dvp, NFSPROC_LOOKUP, procp, cred);
2554	if (npp && !error) {
2555		nfsm_getfh(nfhp, fhlen, v3);
2556		if (*npp) {
2557		    np = *npp;
2558		    if (np->n_fhsize > NFS_SMALLFH && fhlen <= NFS_SMALLFH) {
2559			free((caddr_t)np->n_fhp, M_NFSBIGFH);
2560			np->n_fhp = &np->n_fh;
2561		    } else if (np->n_fhsize <= NFS_SMALLFH && fhlen>NFS_SMALLFH)
2562			np->n_fhp =(nfsfh_t *)malloc(fhlen,M_NFSBIGFH,M_WAITOK);
2563		    bcopy((caddr_t)nfhp, (caddr_t)np->n_fhp, fhlen);
2564		    np->n_fhsize = fhlen;
2565		    newvp = NFSTOV(np);
2566		} else if (NFS_CMPFH(dnp, nfhp, fhlen)) {
2567		    VREF(dvp);
2568		    newvp = dvp;
2569		} else {
2570		    error = nfs_nget(dvp->v_mount, nfhp, fhlen, &np);
2571		    if (error) {
2572			m_freem(mrep);
2573			return (error);
2574		    }
2575		    newvp = NFSTOV(np);
2576		}
2577		if (v3) {
2578			nfsm_postop_attr(newvp, attrflag);
2579			if (!attrflag && *npp == NULL) {
2580				m_freem(mrep);
2581				if (newvp == dvp)
2582					vrele(newvp);
2583				else
2584					vput(newvp);
2585				return (ENOENT);
2586			}
2587		} else
2588			nfsm_loadattr(newvp, (struct vattr *)0);
2589	}
2590	nfsm_reqdone;
2591	if (npp && *npp == NULL) {
2592		if (error) {
2593			if (newvp) {
2594				if (newvp == dvp)
2595					vrele(newvp);
2596				else
2597					vput(newvp);
2598			}
2599		} else
2600			*npp = np;
2601	}
2602	return (error);
2603}
2604
2605/*
2606 * Nfs Version 3 commit rpc
2607 */
2608int
2609nfs_commit(vp, offset, cnt, cred, procp)
2610	struct vnode *vp;
2611	u_quad_t offset;
2612	int cnt;
2613	struct ucred *cred;
2614	struct proc *procp;
2615{
2616	register caddr_t cp;
2617	register u_int32_t *tl;
2618	register int32_t t1, t2;
2619	register struct nfsmount *nmp = VFSTONFS(vp->v_mount);
2620	caddr_t bpos, dpos, cp2;
2621	int error = 0, wccflag = NFSV3_WCCRATTR;
2622	struct mbuf *mreq, *mrep, *md, *mb, *mb2;
2623
2624	if ((nmp->nm_state & NFSSTA_HASWRITEVERF) == 0)
2625		return (0);
2626	nfsstats.rpccnt[NFSPROC_COMMIT]++;
2627	nfsm_reqhead(vp, NFSPROC_COMMIT, NFSX_FH(1));
2628	nfsm_fhtom(vp, 1);
2629	nfsm_build(tl, u_int32_t *, 3 * NFSX_UNSIGNED);
2630	txdr_hyper(offset, tl);
2631	tl += 2;
2632	*tl = txdr_unsigned(cnt);
2633	nfsm_request(vp, NFSPROC_COMMIT, procp, cred);
2634	nfsm_wcc_data(vp, wccflag);
2635	if (!error) {
2636		nfsm_dissect(tl, u_int32_t *, NFSX_V3WRITEVERF);
2637		if (bcmp((caddr_t)nmp->nm_verf, (caddr_t)tl,
2638			NFSX_V3WRITEVERF)) {
2639			bcopy((caddr_t)tl, (caddr_t)nmp->nm_verf,
2640				NFSX_V3WRITEVERF);
2641			error = NFSERR_STALEWRITEVERF;
2642		}
2643	}
2644	nfsm_reqdone;
2645	return (error);
2646}
2647
2648/*
2649 * Kludge City..
2650 * - make nfs_bmap() essentially a no-op that does no translation
2651 * - do nfs_strategy() by doing I/O with nfs_readrpc/nfs_writerpc
2652 *   (Maybe I could use the process's page mapping, but I was concerned that
2653 *    Kernel Write might not be enabled and also figured copyout() would do
2654 *    a lot more work than bcopy() and also it currently happens in the
2655 *    context of the swapper process (2).
2656 */
2657static int
2658nfs_bmap(ap)
2659	struct vop_bmap_args /* {
2660		struct vnode *a_vp;
2661		daddr_t  a_bn;
2662		struct vnode **a_vpp;
2663		daddr_t *a_bnp;
2664		int *a_runp;
2665		int *a_runb;
2666	} */ *ap;
2667{
2668	register struct vnode *vp = ap->a_vp;
2669
2670	if (ap->a_vpp != NULL)
2671		*ap->a_vpp = vp;
2672	if (ap->a_bnp != NULL)
2673		*ap->a_bnp = ap->a_bn * btodb(vp->v_mount->mnt_stat.f_iosize);
2674	if (ap->a_runp != NULL)
2675		*ap->a_runp = 0;
2676	if (ap->a_runb != NULL)
2677		*ap->a_runb = 0;
2678	return (0);
2679}
2680
2681/*
2682 * Strategy routine.
2683 * For async requests when nfsiod(s) are running, queue the request by
2684 * calling nfs_asyncio(), otherwise just all nfs_doio() to do the
2685 * request.
2686 */
2687static int
2688nfs_strategy(ap)
2689	struct vop_strategy_args *ap;
2690{
2691	register struct buf *bp = ap->a_bp;
2692	struct ucred *cr;
2693	struct proc *p;
2694	int error = 0;
2695
2696	KASSERT(!(bp->b_flags & B_DONE), ("nfs_strategy: buffer %p unexpectedly marked B_DONE", bp));
2697	KASSERT(BUF_REFCNT(bp) > 0, ("nfs_strategy: buffer %p not locked", bp));
2698
2699	if (bp->b_flags & B_PHYS)
2700		panic("nfs physio");
2701
2702	if (bp->b_flags & B_ASYNC)
2703		p = (struct proc *)0;
2704	else
2705		p = curproc;	/* XXX */
2706
2707	if (bp->b_iocmd == BIO_READ)
2708		cr = bp->b_rcred;
2709	else
2710		cr = bp->b_wcred;
2711
2712	/*
2713	 * If the op is asynchronous and an i/o daemon is waiting
2714	 * queue the request, wake it up and wait for completion
2715	 * otherwise just do it ourselves.
2716	 */
2717	if ((bp->b_flags & B_ASYNC) == 0 ||
2718		nfs_asyncio(bp, NOCRED, p))
2719		error = nfs_doio(bp, cr, p);
2720	return (error);
2721}
2722
2723/*
2724 * Mmap a file
2725 *
2726 * NB Currently unsupported.
2727 */
2728/* ARGSUSED */
2729static int
2730nfs_mmap(ap)
2731	struct vop_mmap_args /* {
2732		struct vnode *a_vp;
2733		int  a_fflags;
2734		struct ucred *a_cred;
2735		struct proc *a_p;
2736	} */ *ap;
2737{
2738
2739	return (EINVAL);
2740}
2741
2742/*
2743 * fsync vnode op. Just call nfs_flush() with commit == 1.
2744 */
2745/* ARGSUSED */
2746static int
2747nfs_fsync(ap)
2748	struct vop_fsync_args /* {
2749		struct vnodeop_desc *a_desc;
2750		struct vnode * a_vp;
2751		struct ucred * a_cred;
2752		int  a_waitfor;
2753		struct proc * a_p;
2754	} */ *ap;
2755{
2756
2757	return (nfs_flush(ap->a_vp, ap->a_cred, ap->a_waitfor, ap->a_p, 1));
2758}
2759
2760/*
2761 * Flush all the blocks associated with a vnode.
2762 * 	Walk through the buffer pool and push any dirty pages
2763 *	associated with the vnode.
2764 */
2765static int
2766nfs_flush(vp, cred, waitfor, p, commit)
2767	register struct vnode *vp;
2768	struct ucred *cred;
2769	int waitfor;
2770	struct proc *p;
2771	int commit;
2772{
2773	register struct nfsnode *np = VTONFS(vp);
2774	register struct buf *bp;
2775	register int i;
2776	struct buf *nbp;
2777	struct nfsmount *nmp = VFSTONFS(vp->v_mount);
2778	int s, error = 0, slptimeo = 0, slpflag = 0, retv, bvecpos;
2779	int passone = 1;
2780	u_quad_t off, endoff, toff;
2781	struct ucred* wcred = NULL;
2782	struct buf **bvec = NULL;
2783#ifndef NFS_COMMITBVECSIZ
2784#define NFS_COMMITBVECSIZ	20
2785#endif
2786	struct buf *bvec_on_stack[NFS_COMMITBVECSIZ];
2787	int bvecsize = 0, bveccount;
2788
2789	if (nmp->nm_flag & NFSMNT_INT)
2790		slpflag = PCATCH;
2791	if (!commit)
2792		passone = 0;
2793	/*
2794	 * A b_flags == (B_DELWRI | B_NEEDCOMMIT) block has been written to the
2795	 * server, but nas not been committed to stable storage on the server
2796	 * yet. On the first pass, the byte range is worked out and the commit
2797	 * rpc is done. On the second pass, nfs_writebp() is called to do the
2798	 * job.
2799	 */
2800again:
2801	off = (u_quad_t)-1;
2802	endoff = 0;
2803	bvecpos = 0;
2804	if (NFS_ISV3(vp) && commit) {
2805		s = splbio();
2806		/*
2807		 * Count up how many buffers waiting for a commit.
2808		 */
2809		bveccount = 0;
2810		for (bp = TAILQ_FIRST(&vp->v_dirtyblkhd); bp; bp = nbp) {
2811			nbp = TAILQ_NEXT(bp, b_vnbufs);
2812			if (BUF_REFCNT(bp) == 0 &&
2813			    (bp->b_flags & (B_DELWRI | B_NEEDCOMMIT))
2814				== (B_DELWRI | B_NEEDCOMMIT))
2815				bveccount++;
2816		}
2817		/*
2818		 * Allocate space to remember the list of bufs to commit.  It is
2819		 * important to use M_NOWAIT here to avoid a race with nfs_write.
2820		 * If we can't get memory (for whatever reason), we will end up
2821		 * committing the buffers one-by-one in the loop below.
2822		 */
2823		if (bveccount > NFS_COMMITBVECSIZ) {
2824			if (bvec != NULL && bvec != bvec_on_stack)
2825				free(bvec, M_TEMP);
2826			bvec = (struct buf **)
2827				malloc(bveccount * sizeof(struct buf *),
2828				       M_TEMP, M_NOWAIT);
2829			if (bvec == NULL) {
2830				bvec = bvec_on_stack;
2831				bvecsize = NFS_COMMITBVECSIZ;
2832			} else
2833				bvecsize = bveccount;
2834		} else {
2835			bvec = bvec_on_stack;
2836			bvecsize = NFS_COMMITBVECSIZ;
2837		}
2838		for (bp = TAILQ_FIRST(&vp->v_dirtyblkhd); bp; bp = nbp) {
2839			nbp = TAILQ_NEXT(bp, b_vnbufs);
2840			if (bvecpos >= bvecsize)
2841				break;
2842			if ((bp->b_flags & (B_DELWRI | B_NEEDCOMMIT)) !=
2843			    (B_DELWRI | B_NEEDCOMMIT) ||
2844			    BUF_LOCK(bp, LK_EXCLUSIVE | LK_NOWAIT))
2845				continue;
2846			bremfree(bp);
2847			/*
2848			 * Work out if all buffers are using the same cred
2849			 * so we can deal with them all with one commit.
2850			 *
2851			 * NOTE: we are not clearing B_DONE here, so we have
2852			 * to do it later on in this routine if we intend to
2853			 * initiate I/O on the bp.
2854			 */
2855			if (wcred == NULL)
2856				wcred = bp->b_wcred;
2857			else if (wcred != bp->b_wcred)
2858				wcred = NOCRED;
2859			bp->b_flags |= B_WRITEINPROG;
2860			vfs_busy_pages(bp, 1);
2861
2862			/*
2863			 * bp is protected by being locked, but nbp is not
2864			 * and vfs_busy_pages() may sleep.  We have to
2865			 * recalculate nbp.
2866			 */
2867			nbp = TAILQ_NEXT(bp, b_vnbufs);
2868
2869			/*
2870			 * A list of these buffers is kept so that the
2871			 * second loop knows which buffers have actually
2872			 * been committed. This is necessary, since there
2873			 * may be a race between the commit rpc and new
2874			 * uncommitted writes on the file.
2875			 */
2876			bvec[bvecpos++] = bp;
2877			toff = ((u_quad_t)bp->b_blkno) * DEV_BSIZE +
2878				bp->b_dirtyoff;
2879			if (toff < off)
2880				off = toff;
2881			toff += (u_quad_t)(bp->b_dirtyend - bp->b_dirtyoff);
2882			if (toff > endoff)
2883				endoff = toff;
2884		}
2885		splx(s);
2886	}
2887	if (bvecpos > 0) {
2888		/*
2889		 * Commit data on the server, as required.
2890		 * If all bufs are using the same wcred, then use that with
2891		 * one call for all of them, otherwise commit each one
2892		 * separately.
2893		 */
2894		if (wcred != NOCRED)
2895			retv = nfs_commit(vp, off, (int)(endoff - off),
2896					  wcred, p);
2897		else {
2898			retv = 0;
2899			for (i = 0; i < bvecpos; i++) {
2900				off_t off, size;
2901				bp = bvec[i];
2902				off = ((u_quad_t)bp->b_blkno) * DEV_BSIZE +
2903					bp->b_dirtyoff;
2904				size = (u_quad_t)(bp->b_dirtyend
2905						  - bp->b_dirtyoff);
2906				retv = nfs_commit(vp, off, (int)size,
2907						  bp->b_wcred, p);
2908				if (retv) break;
2909			}
2910		}
2911
2912		if (retv == NFSERR_STALEWRITEVERF)
2913			nfs_clearcommit(vp->v_mount);
2914
2915		/*
2916		 * Now, either mark the blocks I/O done or mark the
2917		 * blocks dirty, depending on whether the commit
2918		 * succeeded.
2919		 */
2920		for (i = 0; i < bvecpos; i++) {
2921			bp = bvec[i];
2922			bp->b_flags &= ~(B_NEEDCOMMIT | B_WRITEINPROG | B_CLUSTEROK);
2923			if (retv) {
2924				/*
2925				 * Error, leave B_DELWRI intact
2926				 */
2927				vfs_unbusy_pages(bp);
2928				brelse(bp);
2929			} else {
2930				/*
2931				 * Success, remove B_DELWRI ( bundirty() ).
2932				 *
2933				 * b_dirtyoff/b_dirtyend seem to be NFS
2934				 * specific.  We should probably move that
2935				 * into bundirty(). XXX
2936				 */
2937				s = splbio();
2938				vp->v_numoutput++;
2939				bp->b_flags |= B_ASYNC;
2940				bundirty(bp);
2941				bp->b_flags &= ~B_DONE;
2942				bp->b_ioflags &= ~BIO_ERROR;
2943				bp->b_dirtyoff = bp->b_dirtyend = 0;
2944				splx(s);
2945				bufdone(bp);
2946			}
2947		}
2948	}
2949
2950	/*
2951	 * Start/do any write(s) that are required.
2952	 */
2953loop:
2954	s = splbio();
2955	for (bp = TAILQ_FIRST(&vp->v_dirtyblkhd); bp; bp = nbp) {
2956		nbp = TAILQ_NEXT(bp, b_vnbufs);
2957		if (BUF_LOCK(bp, LK_EXCLUSIVE | LK_NOWAIT)) {
2958			if (waitfor != MNT_WAIT || passone)
2959				continue;
2960			error = BUF_TIMELOCK(bp, LK_EXCLUSIVE | LK_SLEEPFAIL,
2961			    "nfsfsync", slpflag, slptimeo);
2962			splx(s);
2963			if (error == 0)
2964				panic("nfs_fsync: inconsistent lock");
2965			if (error == ENOLCK)
2966				goto loop;
2967			if (nfs_sigintr(nmp, (struct nfsreq *)0, p)) {
2968				error = EINTR;
2969				goto done;
2970			}
2971			if (slpflag == PCATCH) {
2972				slpflag = 0;
2973				slptimeo = 2 * hz;
2974			}
2975			goto loop;
2976		}
2977		if ((bp->b_flags & B_DELWRI) == 0)
2978			panic("nfs_fsync: not dirty");
2979		if ((passone || !commit) && (bp->b_flags & B_NEEDCOMMIT)) {
2980			BUF_UNLOCK(bp);
2981			continue;
2982		}
2983		bremfree(bp);
2984		if (passone || !commit)
2985		    bp->b_flags |= B_ASYNC;
2986		else
2987		    bp->b_flags |= B_ASYNC | B_WRITEINPROG;
2988		splx(s);
2989		BUF_WRITE(bp);
2990		goto loop;
2991	}
2992	splx(s);
2993	if (passone) {
2994		passone = 0;
2995		goto again;
2996	}
2997	if (waitfor == MNT_WAIT) {
2998		while (vp->v_numoutput) {
2999			vp->v_flag |= VBWAIT;
3000			error = tsleep((caddr_t)&vp->v_numoutput,
3001				slpflag | (PRIBIO + 1), "nfsfsync", slptimeo);
3002			if (error) {
3003			    if (nfs_sigintr(nmp, (struct nfsreq *)0, p)) {
3004				error = EINTR;
3005				goto done;
3006			    }
3007			    if (slpflag == PCATCH) {
3008				slpflag = 0;
3009				slptimeo = 2 * hz;
3010			    }
3011			}
3012		}
3013		if (!TAILQ_EMPTY(&vp->v_dirtyblkhd) && commit) {
3014			goto loop;
3015		}
3016	}
3017	if (np->n_flag & NWRITEERR) {
3018		error = np->n_error;
3019		np->n_flag &= ~NWRITEERR;
3020	}
3021done:
3022	if (bvec != NULL && bvec != bvec_on_stack)
3023		free(bvec, M_TEMP);
3024	return (error);
3025}
3026
3027/*
3028 * NFS advisory byte-level locks.
3029 * Currently unsupported.
3030 */
3031static int
3032nfs_advlock(ap)
3033	struct vop_advlock_args /* {
3034		struct vnode *a_vp;
3035		caddr_t  a_id;
3036		int  a_op;
3037		struct flock *a_fl;
3038		int  a_flags;
3039	} */ *ap;
3040{
3041	register struct nfsnode *np = VTONFS(ap->a_vp);
3042
3043	/*
3044	 * The following kludge is to allow diskless support to work
3045	 * until a real NFS lockd is implemented. Basically, just pretend
3046	 * that this is a local lock.
3047	 */
3048	return (lf_advlock(ap, &(np->n_lockf), np->n_size));
3049}
3050
3051/*
3052 * Print out the contents of an nfsnode.
3053 */
3054static int
3055nfs_print(ap)
3056	struct vop_print_args /* {
3057		struct vnode *a_vp;
3058	} */ *ap;
3059{
3060	register struct vnode *vp = ap->a_vp;
3061	register struct nfsnode *np = VTONFS(vp);
3062
3063	printf("tag VT_NFS, fileid %ld fsid 0x%x",
3064		np->n_vattr.va_fileid, np->n_vattr.va_fsid);
3065	if (vp->v_type == VFIFO)
3066		fifo_printinfo(vp);
3067	printf("\n");
3068	return (0);
3069}
3070
3071/*
3072 * Just call nfs_writebp() with the force argument set to 1.
3073 *
3074 * NOTE: B_DONE may or may not be set in a_bp on call.
3075 */
3076static int
3077nfs_bwrite(ap)
3078	struct vop_bwrite_args /* {
3079		struct vnode *a_bp;
3080	} */ *ap;
3081{
3082	return (nfs_writebp(ap->a_bp, 1, curproc));
3083}
3084
3085/*
3086 * This is a clone of vn_bwrite(), except that B_WRITEINPROG isn't set unless
3087 * the force flag is one and it also handles the B_NEEDCOMMIT flag.  We set
3088 * B_CACHE if this is a VMIO buffer.
3089 */
3090int
3091nfs_writebp(bp, force, procp)
3092	register struct buf *bp;
3093	int force;
3094	struct proc *procp;
3095{
3096	int s;
3097	int oldflags = bp->b_flags;
3098#if 0
3099	int retv = 1;
3100	off_t off;
3101#endif
3102
3103	if (BUF_REFCNT(bp) == 0)
3104		panic("bwrite: buffer is not locked???");
3105
3106	if (bp->b_flags & B_INVAL) {
3107		brelse(bp);
3108		return(0);
3109	}
3110
3111	bp->b_flags |= B_CACHE;
3112
3113	/*
3114	 * Undirty the bp.  We will redirty it later if the I/O fails.
3115	 */
3116
3117	s = splbio();
3118	bundirty(bp);
3119	bp->b_flags &= ~B_DONE;
3120	bp->b_ioflags &= ~BIO_ERROR;
3121	bp->b_iocmd = BIO_WRITE;
3122
3123	bp->b_vp->v_numoutput++;
3124	curproc->p_stats->p_ru.ru_oublock++;
3125	splx(s);
3126
3127	vfs_busy_pages(bp, 1);
3128	if (force)
3129		bp->b_flags |= B_WRITEINPROG;
3130	BUF_KERNPROC(bp);
3131	BUF_STRATEGY(bp);
3132
3133	if( (oldflags & B_ASYNC) == 0) {
3134		int rtval = biowait(bp);
3135
3136		if (oldflags & B_DELWRI) {
3137			s = splbio();
3138			reassignbuf(bp, bp->b_vp);
3139			splx(s);
3140		}
3141
3142		brelse(bp);
3143		return (rtval);
3144	}
3145
3146	return (0);
3147}
3148
3149/*
3150 * nfs special file access vnode op.
3151 * Essentially just get vattr and then imitate iaccess() since the device is
3152 * local to the client.
3153 */
3154static int
3155nfsspec_access(ap)
3156	struct vop_access_args /* {
3157		struct vnode *a_vp;
3158		int  a_mode;
3159		struct ucred *a_cred;
3160		struct proc *a_p;
3161	} */ *ap;
3162{
3163	register struct vattr *vap;
3164	register gid_t *gp;
3165	register struct ucred *cred = ap->a_cred;
3166	struct vnode *vp = ap->a_vp;
3167	mode_t mode = ap->a_mode;
3168	struct vattr vattr;
3169	register int i;
3170	int error;
3171
3172	/*
3173	 * Disallow write attempts on filesystems mounted read-only;
3174	 * unless the file is a socket, fifo, or a block or character
3175	 * device resident on the filesystem.
3176	 */
3177	if ((mode & VWRITE) && (vp->v_mount->mnt_flag & MNT_RDONLY)) {
3178		switch (vp->v_type) {
3179		case VREG:
3180		case VDIR:
3181		case VLNK:
3182			return (EROFS);
3183		default:
3184			break;
3185		}
3186	}
3187	/*
3188	 * If you're the super-user,
3189	 * you always get access.
3190	 */
3191	if (cred->cr_uid == 0)
3192		return (0);
3193	vap = &vattr;
3194	error = VOP_GETATTR(vp, vap, cred, ap->a_p);
3195	if (error)
3196		return (error);
3197	/*
3198	 * Access check is based on only one of owner, group, public.
3199	 * If not owner, then check group. If not a member of the
3200	 * group, then check public access.
3201	 */
3202	if (cred->cr_uid != vap->va_uid) {
3203		mode >>= 3;
3204		gp = cred->cr_groups;
3205		for (i = 0; i < cred->cr_ngroups; i++, gp++)
3206			if (vap->va_gid == *gp)
3207				goto found;
3208		mode >>= 3;
3209found:
3210		;
3211	}
3212	error = (vap->va_mode & mode) == mode ? 0 : EACCES;
3213	return (error);
3214}
3215
3216/*
3217 * Read wrapper for special devices.
3218 */
3219static int
3220nfsspec_read(ap)
3221	struct vop_read_args /* {
3222		struct vnode *a_vp;
3223		struct uio *a_uio;
3224		int  a_ioflag;
3225		struct ucred *a_cred;
3226	} */ *ap;
3227{
3228	register struct nfsnode *np = VTONFS(ap->a_vp);
3229
3230	/*
3231	 * Set access flag.
3232	 */
3233	np->n_flag |= NACC;
3234	getnanotime(&np->n_atim);
3235	return (VOCALL(spec_vnodeop_p, VOFFSET(vop_read), ap));
3236}
3237
3238/*
3239 * Write wrapper for special devices.
3240 */
3241static int
3242nfsspec_write(ap)
3243	struct vop_write_args /* {
3244		struct vnode *a_vp;
3245		struct uio *a_uio;
3246		int  a_ioflag;
3247		struct ucred *a_cred;
3248	} */ *ap;
3249{
3250	register struct nfsnode *np = VTONFS(ap->a_vp);
3251
3252	/*
3253	 * Set update flag.
3254	 */
3255	np->n_flag |= NUPD;
3256	getnanotime(&np->n_mtim);
3257	return (VOCALL(spec_vnodeop_p, VOFFSET(vop_write), ap));
3258}
3259
3260/*
3261 * Close wrapper for special devices.
3262 *
3263 * Update the times on the nfsnode then do device close.
3264 */
3265static int
3266nfsspec_close(ap)
3267	struct vop_close_args /* {
3268		struct vnode *a_vp;
3269		int  a_fflag;
3270		struct ucred *a_cred;
3271		struct proc *a_p;
3272	} */ *ap;
3273{
3274	register struct vnode *vp = ap->a_vp;
3275	register struct nfsnode *np = VTONFS(vp);
3276	struct vattr vattr;
3277
3278	if (np->n_flag & (NACC | NUPD)) {
3279		np->n_flag |= NCHG;
3280		if (vp->v_usecount == 1 &&
3281		    (vp->v_mount->mnt_flag & MNT_RDONLY) == 0) {
3282			VATTR_NULL(&vattr);
3283			if (np->n_flag & NACC)
3284				vattr.va_atime = np->n_atim;
3285			if (np->n_flag & NUPD)
3286				vattr.va_mtime = np->n_mtim;
3287			(void)VOP_SETATTR(vp, &vattr, ap->a_cred, ap->a_p);
3288		}
3289	}
3290	return (VOCALL(spec_vnodeop_p, VOFFSET(vop_close), ap));
3291}
3292
3293/*
3294 * Read wrapper for fifos.
3295 */
3296static int
3297nfsfifo_read(ap)
3298	struct vop_read_args /* {
3299		struct vnode *a_vp;
3300		struct uio *a_uio;
3301		int  a_ioflag;
3302		struct ucred *a_cred;
3303	} */ *ap;
3304{
3305	register struct nfsnode *np = VTONFS(ap->a_vp);
3306
3307	/*
3308	 * Set access flag.
3309	 */
3310	np->n_flag |= NACC;
3311	getnanotime(&np->n_atim);
3312	return (VOCALL(fifo_vnodeop_p, VOFFSET(vop_read), ap));
3313}
3314
3315/*
3316 * Write wrapper for fifos.
3317 */
3318static int
3319nfsfifo_write(ap)
3320	struct vop_write_args /* {
3321		struct vnode *a_vp;
3322		struct uio *a_uio;
3323		int  a_ioflag;
3324		struct ucred *a_cred;
3325	} */ *ap;
3326{
3327	register struct nfsnode *np = VTONFS(ap->a_vp);
3328
3329	/*
3330	 * Set update flag.
3331	 */
3332	np->n_flag |= NUPD;
3333	getnanotime(&np->n_mtim);
3334	return (VOCALL(fifo_vnodeop_p, VOFFSET(vop_write), ap));
3335}
3336
3337/*
3338 * Close wrapper for fifos.
3339 *
3340 * Update the times on the nfsnode then do fifo close.
3341 */
3342static int
3343nfsfifo_close(ap)
3344	struct vop_close_args /* {
3345		struct vnode *a_vp;
3346		int  a_fflag;
3347		struct ucred *a_cred;
3348		struct proc *a_p;
3349	} */ *ap;
3350{
3351	register struct vnode *vp = ap->a_vp;
3352	register struct nfsnode *np = VTONFS(vp);
3353	struct vattr vattr;
3354	struct timespec ts;
3355
3356	if (np->n_flag & (NACC | NUPD)) {
3357		getnanotime(&ts);
3358		if (np->n_flag & NACC)
3359			np->n_atim = ts;
3360		if (np->n_flag & NUPD)
3361			np->n_mtim = ts;
3362		np->n_flag |= NCHG;
3363		if (vp->v_usecount == 1 &&
3364		    (vp->v_mount->mnt_flag & MNT_RDONLY) == 0) {
3365			VATTR_NULL(&vattr);
3366			if (np->n_flag & NACC)
3367				vattr.va_atime = np->n_atim;
3368			if (np->n_flag & NUPD)
3369				vattr.va_mtime = np->n_mtim;
3370			(void)VOP_SETATTR(vp, &vattr, ap->a_cred, ap->a_p);
3371		}
3372	}
3373	return (VOCALL(fifo_vnodeop_p, VOFFSET(vop_close), ap));
3374}
3375