nfs_vnops.c revision 67152
1/*
2 * Copyright (c) 1989, 1993
3 *	The Regents of the University of California.  All rights reserved.
4 *
5 * This code is derived from software contributed to Berkeley by
6 * Rick Macklem at The University of Guelph.
7 *
8 * Redistribution and use in source and binary forms, with or without
9 * modification, are permitted provided that the following conditions
10 * are met:
11 * 1. Redistributions of source code must retain the above copyright
12 *    notice, this list of conditions and the following disclaimer.
13 * 2. Redistributions in binary form must reproduce the above copyright
14 *    notice, this list of conditions and the following disclaimer in the
15 *    documentation and/or other materials provided with the distribution.
16 * 3. All advertising materials mentioning features or use of this software
17 *    must display the following acknowledgement:
18 *	This product includes software developed by the University of
19 *	California, Berkeley and its contributors.
20 * 4. Neither the name of the University nor the names of its contributors
21 *    may be used to endorse or promote products derived from this software
22 *    without specific prior written permission.
23 *
24 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
25 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
26 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
27 * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
28 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
29 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
30 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
31 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
32 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
33 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
34 * SUCH DAMAGE.
35 *
36 *	@(#)nfs_vnops.c	8.16 (Berkeley) 5/27/95
37 * $FreeBSD: head/sys/nfsclient/nfs_vnops.c 67152 2000-10-15 08:06:32Z bp $
38 */
39
40
41/*
42 * vnode op calls for Sun NFS version 2 and 3
43 */
44
45#include "opt_inet.h"
46
47#include <sys/param.h>
48#include <sys/kernel.h>
49#include <sys/systm.h>
50#include <sys/resourcevar.h>
51#include <sys/proc.h>
52#include <sys/mount.h>
53#include <sys/bio.h>
54#include <sys/buf.h>
55#include <sys/malloc.h>
56#include <sys/mbuf.h>
57#include <sys/namei.h>
58#include <sys/socket.h>
59#include <sys/vnode.h>
60#include <sys/dirent.h>
61#include <sys/fcntl.h>
62#include <sys/lockf.h>
63#include <sys/stat.h>
64#include <sys/sysctl.h>
65
66#include <vm/vm.h>
67#include <vm/vm_extern.h>
68
69#include <miscfs/fifofs/fifo.h>
70
71#include <nfs/rpcv2.h>
72#include <nfs/nfsproto.h>
73#include <nfs/nfs.h>
74#include <nfs/nfsnode.h>
75#include <nfs/nfsmount.h>
76#include <nfs/xdr_subs.h>
77#include <nfs/nfsm_subs.h>
78#include <nfs/nqnfs.h>
79
80#include <net/if.h>
81#include <netinet/in.h>
82#include <netinet/in_var.h>
83
84/* Defs */
85#define	TRUE	1
86#define	FALSE	0
87
88/*
89 * Ifdef for FreeBSD-current merged buffer cache. It is unfortunate that these
90 * calls are not in getblk() and brelse() so that they would not be necessary
91 * here.
92 */
93#ifndef B_VMIO
94#define vfs_busy_pages(bp, f)
95#endif
96
97static int	nfsspec_read __P((struct vop_read_args *));
98static int	nfsspec_write __P((struct vop_write_args *));
99static int	nfsfifo_read __P((struct vop_read_args *));
100static int	nfsfifo_write __P((struct vop_write_args *));
101static int	nfsspec_close __P((struct vop_close_args *));
102static int	nfsfifo_close __P((struct vop_close_args *));
103#define nfs_poll vop_nopoll
104static int	nfs_flush __P((struct vnode *,struct ucred *,int,struct proc *,int));
105static int	nfs_setattrrpc __P((struct vnode *,struct vattr *,struct ucred *,struct proc *));
106static	int	nfs_lookup __P((struct vop_lookup_args *));
107static	int	nfs_create __P((struct vop_create_args *));
108static	int	nfs_mknod __P((struct vop_mknod_args *));
109static	int	nfs_open __P((struct vop_open_args *));
110static	int	nfs_close __P((struct vop_close_args *));
111static	int	nfs_access __P((struct vop_access_args *));
112static	int	nfs_getattr __P((struct vop_getattr_args *));
113static	int	nfs_setattr __P((struct vop_setattr_args *));
114static	int	nfs_read __P((struct vop_read_args *));
115static	int	nfs_mmap __P((struct vop_mmap_args *));
116static	int	nfs_fsync __P((struct vop_fsync_args *));
117static	int	nfs_remove __P((struct vop_remove_args *));
118static	int	nfs_link __P((struct vop_link_args *));
119static	int	nfs_rename __P((struct vop_rename_args *));
120static	int	nfs_mkdir __P((struct vop_mkdir_args *));
121static	int	nfs_rmdir __P((struct vop_rmdir_args *));
122static	int	nfs_symlink __P((struct vop_symlink_args *));
123static	int	nfs_readdir __P((struct vop_readdir_args *));
124static	int	nfs_bmap __P((struct vop_bmap_args *));
125static	int	nfs_strategy __P((struct vop_strategy_args *));
126static	int	nfs_lookitup __P((struct vnode *, const char *, int,
127			struct ucred *, struct proc *, struct nfsnode **));
128static	int	nfs_sillyrename __P((struct vnode *,struct vnode *,struct componentname *));
129static int	nfsspec_access __P((struct vop_access_args *));
130static int	nfs_readlink __P((struct vop_readlink_args *));
131static int	nfs_print __P((struct vop_print_args *));
132static int	nfs_advlock __P((struct vop_advlock_args *));
133static int	nfs_bwrite __P((struct vop_bwrite_args *));
134/*
135 * Global vfs data structures for nfs
136 */
137vop_t **nfsv2_vnodeop_p;
138static struct vnodeopv_entry_desc nfsv2_vnodeop_entries[] = {
139	{ &vop_default_desc,		(vop_t *) vop_defaultop },
140	{ &vop_access_desc,		(vop_t *) nfs_access },
141	{ &vop_advlock_desc,		(vop_t *) nfs_advlock },
142	{ &vop_bmap_desc,		(vop_t *) nfs_bmap },
143	{ &vop_bwrite_desc,		(vop_t *) nfs_bwrite },
144	{ &vop_close_desc,		(vop_t *) nfs_close },
145	{ &vop_create_desc,		(vop_t *) nfs_create },
146	{ &vop_fsync_desc,		(vop_t *) nfs_fsync },
147	{ &vop_getattr_desc,		(vop_t *) nfs_getattr },
148	{ &vop_getpages_desc,		(vop_t *) nfs_getpages },
149	{ &vop_putpages_desc,		(vop_t *) nfs_putpages },
150	{ &vop_inactive_desc,		(vop_t *) nfs_inactive },
151	{ &vop_islocked_desc,		(vop_t *) vop_stdislocked },
152	{ &vop_lease_desc,		(vop_t *) vop_null },
153	{ &vop_link_desc,		(vop_t *) nfs_link },
154	{ &vop_lock_desc,		(vop_t *) vop_sharedlock },
155	{ &vop_lookup_desc,		(vop_t *) nfs_lookup },
156	{ &vop_mkdir_desc,		(vop_t *) nfs_mkdir },
157	{ &vop_mknod_desc,		(vop_t *) nfs_mknod },
158	{ &vop_mmap_desc,		(vop_t *) nfs_mmap },
159	{ &vop_open_desc,		(vop_t *) nfs_open },
160	{ &vop_poll_desc,		(vop_t *) nfs_poll },
161	{ &vop_print_desc,		(vop_t *) nfs_print },
162	{ &vop_read_desc,		(vop_t *) nfs_read },
163	{ &vop_readdir_desc,		(vop_t *) nfs_readdir },
164	{ &vop_readlink_desc,		(vop_t *) nfs_readlink },
165	{ &vop_reclaim_desc,		(vop_t *) nfs_reclaim },
166	{ &vop_remove_desc,		(vop_t *) nfs_remove },
167	{ &vop_rename_desc,		(vop_t *) nfs_rename },
168	{ &vop_rmdir_desc,		(vop_t *) nfs_rmdir },
169	{ &vop_setattr_desc,		(vop_t *) nfs_setattr },
170	{ &vop_strategy_desc,		(vop_t *) nfs_strategy },
171	{ &vop_symlink_desc,		(vop_t *) nfs_symlink },
172	{ &vop_unlock_desc,		(vop_t *) vop_stdunlock },
173	{ &vop_write_desc,		(vop_t *) nfs_write },
174	{ NULL, NULL }
175};
176static struct vnodeopv_desc nfsv2_vnodeop_opv_desc =
177	{ &nfsv2_vnodeop_p, nfsv2_vnodeop_entries };
178VNODEOP_SET(nfsv2_vnodeop_opv_desc);
179
180/*
181 * Special device vnode ops
182 */
183vop_t **spec_nfsv2nodeop_p;
184static struct vnodeopv_entry_desc nfsv2_specop_entries[] = {
185	{ &vop_default_desc,		(vop_t *) spec_vnoperate },
186	{ &vop_access_desc,		(vop_t *) nfsspec_access },
187	{ &vop_close_desc,		(vop_t *) nfsspec_close },
188	{ &vop_fsync_desc,		(vop_t *) nfs_fsync },
189	{ &vop_getattr_desc,		(vop_t *) nfs_getattr },
190	{ &vop_islocked_desc,		(vop_t *) vop_stdislocked },
191	{ &vop_inactive_desc,		(vop_t *) nfs_inactive },
192	{ &vop_lock_desc,		(vop_t *) vop_sharedlock },
193	{ &vop_print_desc,		(vop_t *) nfs_print },
194	{ &vop_read_desc,		(vop_t *) nfsspec_read },
195	{ &vop_reclaim_desc,		(vop_t *) nfs_reclaim },
196	{ &vop_setattr_desc,		(vop_t *) nfs_setattr },
197	{ &vop_unlock_desc,		(vop_t *) vop_stdunlock },
198	{ &vop_write_desc,		(vop_t *) nfsspec_write },
199	{ NULL, NULL }
200};
201static struct vnodeopv_desc spec_nfsv2nodeop_opv_desc =
202	{ &spec_nfsv2nodeop_p, nfsv2_specop_entries };
203VNODEOP_SET(spec_nfsv2nodeop_opv_desc);
204
205vop_t **fifo_nfsv2nodeop_p;
206static struct vnodeopv_entry_desc nfsv2_fifoop_entries[] = {
207	{ &vop_default_desc,		(vop_t *) fifo_vnoperate },
208	{ &vop_access_desc,		(vop_t *) nfsspec_access },
209	{ &vop_close_desc,		(vop_t *) nfsfifo_close },
210	{ &vop_fsync_desc,		(vop_t *) nfs_fsync },
211	{ &vop_getattr_desc,		(vop_t *) nfs_getattr },
212	{ &vop_inactive_desc,		(vop_t *) nfs_inactive },
213	{ &vop_islocked_desc,		(vop_t *) vop_stdislocked },
214	{ &vop_lock_desc,		(vop_t *) vop_sharedlock },
215	{ &vop_print_desc,		(vop_t *) nfs_print },
216	{ &vop_read_desc,		(vop_t *) nfsfifo_read },
217	{ &vop_reclaim_desc,		(vop_t *) nfs_reclaim },
218	{ &vop_setattr_desc,		(vop_t *) nfs_setattr },
219	{ &vop_unlock_desc,		(vop_t *) vop_stdunlock },
220	{ &vop_write_desc,		(vop_t *) nfsfifo_write },
221	{ NULL, NULL }
222};
223static struct vnodeopv_desc fifo_nfsv2nodeop_opv_desc =
224	{ &fifo_nfsv2nodeop_p, nfsv2_fifoop_entries };
225VNODEOP_SET(fifo_nfsv2nodeop_opv_desc);
226
227static int	nfs_mknodrpc __P((struct vnode *dvp, struct vnode **vpp,
228				  struct componentname *cnp,
229				  struct vattr *vap));
230static int	nfs_removerpc __P((struct vnode *dvp, const char *name,
231				   int namelen,
232				   struct ucred *cred, struct proc *proc));
233static int	nfs_renamerpc __P((struct vnode *fdvp, const char *fnameptr,
234				   int fnamelen, struct vnode *tdvp,
235				   const char *tnameptr, int tnamelen,
236				   struct ucred *cred, struct proc *proc));
237static int	nfs_renameit __P((struct vnode *sdvp,
238				  struct componentname *scnp,
239				  struct sillyrename *sp));
240
241/*
242 * Global variables
243 */
244extern u_int32_t nfs_true, nfs_false;
245extern u_int32_t nfs_xdrneg1;
246extern struct nfsstats nfsstats;
247extern nfstype nfsv3_type[9];
248struct proc *nfs_iodwant[NFS_MAXASYNCDAEMON];
249struct nfsmount *nfs_iodmount[NFS_MAXASYNCDAEMON];
250int nfs_numasync = 0;
251#define	DIRHDSIZ	(sizeof (struct dirent) - (MAXNAMLEN + 1))
252
253SYSCTL_DECL(_vfs_nfs);
254
255static int	nfsaccess_cache_timeout = NFS_MAXATTRTIMO;
256SYSCTL_INT(_vfs_nfs, OID_AUTO, access_cache_timeout, CTLFLAG_RW,
257	   &nfsaccess_cache_timeout, 0, "NFS ACCESS cache timeout");
258
259static int	nfsv3_commit_on_close = 0;
260SYSCTL_INT(_vfs_nfs, OID_AUTO, nfsv3_commit_on_close, CTLFLAG_RW,
261	   &nfsv3_commit_on_close, 0, "write+commit on close, else only write");
262#if 0
263SYSCTL_INT(_vfs_nfs, OID_AUTO, access_cache_hits, CTLFLAG_RD,
264	   &nfsstats.accesscache_hits, 0, "NFS ACCESS cache hit count");
265
266SYSCTL_INT(_vfs_nfs, OID_AUTO, access_cache_misses, CTLFLAG_RD,
267	   &nfsstats.accesscache_misses, 0, "NFS ACCESS cache miss count");
268#endif
269
270#define	NFSV3ACCESS_ALL (NFSV3ACCESS_READ | NFSV3ACCESS_MODIFY		\
271			 | NFSV3ACCESS_EXTEND | NFSV3ACCESS_EXECUTE	\
272			 | NFSV3ACCESS_DELETE | NFSV3ACCESS_LOOKUP)
273static int
274nfs3_access_otw(struct vnode *vp,
275		int wmode,
276		struct proc *p,
277		struct ucred *cred)
278{
279	const int v3 = 1;
280	u_int32_t *tl;
281	int error = 0, attrflag;
282
283	struct mbuf *mreq, *mrep, *md, *mb, *mb2;
284	caddr_t bpos, dpos, cp2;
285	register int32_t t1, t2;
286	register caddr_t cp;
287	u_int32_t rmode;
288	struct nfsnode *np = VTONFS(vp);
289
290	nfsstats.rpccnt[NFSPROC_ACCESS]++;
291	nfsm_reqhead(vp, NFSPROC_ACCESS, NFSX_FH(v3) + NFSX_UNSIGNED);
292	nfsm_fhtom(vp, v3);
293	nfsm_build(tl, u_int32_t *, NFSX_UNSIGNED);
294	*tl = txdr_unsigned(wmode);
295	nfsm_request(vp, NFSPROC_ACCESS, p, cred);
296	nfsm_postop_attr(vp, attrflag);
297	if (!error) {
298		nfsm_dissect(tl, u_int32_t *, NFSX_UNSIGNED);
299		rmode = fxdr_unsigned(u_int32_t, *tl);
300		np->n_mode = rmode;
301		np->n_modeuid = cred->cr_uid;
302		np->n_modestamp = time_second;
303	}
304	nfsm_reqdone;
305	return error;
306}
307
308/*
309 * nfs access vnode op.
310 * For nfs version 2, just return ok. File accesses may fail later.
311 * For nfs version 3, use the access rpc to check accessibility. If file modes
312 * are changed on the server, accesses might still fail later.
313 */
314static int
315nfs_access(ap)
316	struct vop_access_args /* {
317		struct vnode *a_vp;
318		int  a_mode;
319		struct ucred *a_cred;
320		struct proc *a_p;
321	} */ *ap;
322{
323	register struct vnode *vp = ap->a_vp;
324	int error = 0;
325	u_int32_t mode, wmode;
326	int v3 = NFS_ISV3(vp);
327	struct nfsnode *np = VTONFS(vp);
328
329	/*
330	 * Disallow write attempts on filesystems mounted read-only;
331	 * unless the file is a socket, fifo, or a block or character
332	 * device resident on the filesystem.
333	 */
334	if ((ap->a_mode & VWRITE) && (vp->v_mount->mnt_flag & MNT_RDONLY)) {
335		switch (vp->v_type) {
336		case VREG:
337		case VDIR:
338		case VLNK:
339			return (EROFS);
340		default:
341			break;
342		}
343	}
344	/*
345	 * For nfs v3, check to see if we have done this recently, and if
346	 * so return our cached result instead of making an ACCESS call.
347	 * If not, do an access rpc, otherwise you are stuck emulating
348	 * ufs_access() locally using the vattr. This may not be correct,
349	 * since the server may apply other access criteria such as
350	 * client uid-->server uid mapping that we do not know about.
351	 */
352	if (v3) {
353		if (ap->a_mode & VREAD)
354			mode = NFSV3ACCESS_READ;
355		else
356			mode = 0;
357		if (vp->v_type != VDIR) {
358			if (ap->a_mode & VWRITE)
359				mode |= (NFSV3ACCESS_MODIFY | NFSV3ACCESS_EXTEND);
360			if (ap->a_mode & VEXEC)
361				mode |= NFSV3ACCESS_EXECUTE;
362		} else {
363			if (ap->a_mode & VWRITE)
364				mode |= (NFSV3ACCESS_MODIFY | NFSV3ACCESS_EXTEND |
365					 NFSV3ACCESS_DELETE);
366			if (ap->a_mode & VEXEC)
367				mode |= NFSV3ACCESS_LOOKUP;
368		}
369		/* XXX safety belt, only make blanket request if caching */
370		if (nfsaccess_cache_timeout > 0) {
371			wmode = NFSV3ACCESS_READ | NFSV3ACCESS_MODIFY |
372				NFSV3ACCESS_EXTEND | NFSV3ACCESS_EXECUTE |
373				NFSV3ACCESS_DELETE | NFSV3ACCESS_LOOKUP;
374		} else {
375			wmode = mode;
376		}
377
378		/*
379		 * Does our cached result allow us to give a definite yes to
380		 * this request?
381		 */
382		if ((time_second < (np->n_modestamp + nfsaccess_cache_timeout)) &&
383		    (ap->a_cred->cr_uid == np->n_modeuid) &&
384		    ((np->n_mode & mode) == mode)) {
385			nfsstats.accesscache_hits++;
386		} else {
387			/*
388			 * Either a no, or a don't know.  Go to the wire.
389			 */
390			nfsstats.accesscache_misses++;
391		        error = nfs3_access_otw(vp, wmode, ap->a_p,ap->a_cred);
392			if (!error) {
393				if ((np->n_mode & mode) != mode) {
394					error = EACCES;
395				}
396			}
397		}
398		return (error);
399	} else {
400		if ((error = nfsspec_access(ap)) != 0)
401			return (error);
402
403		/*
404		 * Attempt to prevent a mapped root from accessing a file
405		 * which it shouldn't.  We try to read a byte from the file
406		 * if the user is root and the file is not zero length.
407		 * After calling nfsspec_access, we should have the correct
408		 * file size cached.
409		 */
410		if (ap->a_cred->cr_uid == 0 && (ap->a_mode & VREAD)
411		    && VTONFS(vp)->n_size > 0) {
412			struct iovec aiov;
413			struct uio auio;
414			char buf[1];
415
416			aiov.iov_base = buf;
417			aiov.iov_len = 1;
418			auio.uio_iov = &aiov;
419			auio.uio_iovcnt = 1;
420			auio.uio_offset = 0;
421			auio.uio_resid = 1;
422			auio.uio_segflg = UIO_SYSSPACE;
423			auio.uio_rw = UIO_READ;
424			auio.uio_procp = ap->a_p;
425
426			if (vp->v_type == VREG)
427				error = nfs_readrpc(vp, &auio, ap->a_cred);
428			else if (vp->v_type == VDIR) {
429				char* bp;
430				bp = malloc(NFS_DIRBLKSIZ, M_TEMP, M_WAITOK);
431				aiov.iov_base = bp;
432				aiov.iov_len = auio.uio_resid = NFS_DIRBLKSIZ;
433				error = nfs_readdirrpc(vp, &auio, ap->a_cred);
434				free(bp, M_TEMP);
435			} else if (vp->v_type == VLNK)
436				error = nfs_readlinkrpc(vp, &auio, ap->a_cred);
437			else
438				error = EACCES;
439		}
440		return (error);
441	}
442}
443
444/*
445 * nfs open vnode op
446 * Check to see if the type is ok
447 * and that deletion is not in progress.
448 * For paged in text files, you will need to flush the page cache
449 * if consistency is lost.
450 */
451/* ARGSUSED */
452static int
453nfs_open(ap)
454	struct vop_open_args /* {
455		struct vnode *a_vp;
456		int  a_mode;
457		struct ucred *a_cred;
458		struct proc *a_p;
459	} */ *ap;
460{
461	register struct vnode *vp = ap->a_vp;
462	struct nfsnode *np = VTONFS(vp);
463	struct nfsmount *nmp = VFSTONFS(vp->v_mount);
464	struct vattr vattr;
465	int error;
466
467	if (vp->v_type != VREG && vp->v_type != VDIR && vp->v_type != VLNK) {
468#ifdef DIAGNOSTIC
469		printf("open eacces vtyp=%d\n",vp->v_type);
470#endif
471		return (EACCES);
472	}
473	/*
474	 * Get a valid lease. If cached data is stale, flush it.
475	 */
476	if (nmp->nm_flag & NFSMNT_NQNFS) {
477		if (NQNFS_CKINVALID(vp, np, ND_READ)) {
478		    do {
479			error = nqnfs_getlease(vp, ND_READ, ap->a_cred,
480			    ap->a_p);
481		    } while (error == NQNFS_EXPIRED);
482		    if (error)
483			return (error);
484		    if (np->n_lrev != np->n_brev ||
485			(np->n_flag & NQNFSNONCACHE)) {
486			if ((error = nfs_vinvalbuf(vp, V_SAVE, ap->a_cred,
487				ap->a_p, 1)) == EINTR)
488				return (error);
489			np->n_brev = np->n_lrev;
490		    }
491		}
492	} else {
493		if (np->n_flag & NMODIFIED) {
494			if ((error = nfs_vinvalbuf(vp, V_SAVE, ap->a_cred,
495				ap->a_p, 1)) == EINTR)
496				return (error);
497			np->n_attrstamp = 0;
498			if (vp->v_type == VDIR)
499				np->n_direofoffset = 0;
500			error = VOP_GETATTR(vp, &vattr, ap->a_cred, ap->a_p);
501			if (error)
502				return (error);
503			np->n_mtime = vattr.va_mtime.tv_sec;
504		} else {
505			error = VOP_GETATTR(vp, &vattr, ap->a_cred, ap->a_p);
506			if (error)
507				return (error);
508			if (np->n_mtime != vattr.va_mtime.tv_sec) {
509				if (vp->v_type == VDIR)
510					np->n_direofoffset = 0;
511				if ((error = nfs_vinvalbuf(vp, V_SAVE,
512					ap->a_cred, ap->a_p, 1)) == EINTR)
513					return (error);
514				np->n_mtime = vattr.va_mtime.tv_sec;
515			}
516		}
517	}
518	if ((nmp->nm_flag & NFSMNT_NQNFS) == 0)
519		np->n_attrstamp = 0; /* For Open/Close consistency */
520	return (0);
521}
522
523/*
524 * nfs close vnode op
525 * What an NFS client should do upon close after writing is a debatable issue.
526 * Most NFS clients push delayed writes to the server upon close, basically for
527 * two reasons:
528 * 1 - So that any write errors may be reported back to the client process
529 *     doing the close system call. By far the two most likely errors are
530 *     NFSERR_NOSPC and NFSERR_DQUOT to indicate space allocation failure.
531 * 2 - To put a worst case upper bound on cache inconsistency between
532 *     multiple clients for the file.
533 * There is also a consistency problem for Version 2 of the protocol w.r.t.
534 * not being able to tell if other clients are writing a file concurrently,
535 * since there is no way of knowing if the changed modify time in the reply
536 * is only due to the write for this client.
537 * (NFS Version 3 provides weak cache consistency data in the reply that
538 *  should be sufficient to detect and handle this case.)
539 *
540 * The current code does the following:
541 * for NFS Version 2 - play it safe and flush/invalidate all dirty buffers
542 * for NFS Version 3 - flush dirty buffers to the server but don't invalidate
543 *                     or commit them (this satisfies 1 and 2 except for the
544 *                     case where the server crashes after this close but
545 *                     before the commit RPC, which is felt to be "good
546 *                     enough". Changing the last argument to nfs_flush() to
547 *                     a 1 would force a commit operation, if it is felt a
548 *                     commit is necessary now.
549 * for NQNFS         - do nothing now, since 2 is dealt with via leases and
550 *                     1 should be dealt with via an fsync() system call for
551 *                     cases where write errors are important.
552 */
553/* ARGSUSED */
554static int
555nfs_close(ap)
556	struct vop_close_args /* {
557		struct vnodeop_desc *a_desc;
558		struct vnode *a_vp;
559		int  a_fflag;
560		struct ucred *a_cred;
561		struct proc *a_p;
562	} */ *ap;
563{
564	register struct vnode *vp = ap->a_vp;
565	register struct nfsnode *np = VTONFS(vp);
566	int error = 0;
567
568	if (vp->v_type == VREG) {
569	    if ((VFSTONFS(vp->v_mount)->nm_flag & NFSMNT_NQNFS) == 0 &&
570		(np->n_flag & NMODIFIED)) {
571		if (NFS_ISV3(vp)) {
572		    /*
573		     * Under NFSv3 we have dirty buffers to dispose of.  We
574		     * must flush them to the NFS server.  We have the option
575		     * of waiting all the way through the commit rpc or just
576		     * waiting for the initial write.  The default is to only
577		     * wait through the initial write so the data is in the
578		     * server's cache, which is roughly similar to the state
579		     * a standard disk subsystem leaves the file in on close().
580		     *
581		     * We cannot clear the NMODIFIED bit in np->n_flag due to
582		     * potential races with other processes, and certainly
583		     * cannot clear it if we don't commit.
584		     */
585		    int cm = nfsv3_commit_on_close ? 1 : 0;
586		    error = nfs_flush(vp, ap->a_cred, MNT_WAIT, ap->a_p, cm);
587		    /* np->n_flag &= ~NMODIFIED; */
588		} else {
589		    error = nfs_vinvalbuf(vp, V_SAVE, ap->a_cred, ap->a_p, 1);
590		}
591		np->n_attrstamp = 0;
592	    }
593	    if (np->n_flag & NWRITEERR) {
594		np->n_flag &= ~NWRITEERR;
595		error = np->n_error;
596	    }
597	}
598	return (error);
599}
600
601/*
602 * nfs getattr call from vfs.
603 */
604static int
605nfs_getattr(ap)
606	struct vop_getattr_args /* {
607		struct vnode *a_vp;
608		struct vattr *a_vap;
609		struct ucred *a_cred;
610		struct proc *a_p;
611	} */ *ap;
612{
613	register struct vnode *vp = ap->a_vp;
614	register struct nfsnode *np = VTONFS(vp);
615	register caddr_t cp;
616	register u_int32_t *tl;
617	register int32_t t1, t2;
618	caddr_t bpos, dpos;
619	int error = 0;
620	struct mbuf *mreq, *mrep, *md, *mb, *mb2;
621	int v3 = NFS_ISV3(vp);
622
623	/*
624	 * Update local times for special files.
625	 */
626	if (np->n_flag & (NACC | NUPD))
627		np->n_flag |= NCHG;
628	/*
629	 * First look in the cache.
630	 */
631	if (nfs_getattrcache(vp, ap->a_vap) == 0)
632		return (0);
633
634	if (v3 && nfsaccess_cache_timeout > 0) {
635		nfsstats.accesscache_misses++;
636		nfs3_access_otw(vp, NFSV3ACCESS_ALL, ap->a_p, ap->a_cred);
637		if (nfs_getattrcache(vp, ap->a_vap) == 0)
638			return (0);
639	}
640
641	nfsstats.rpccnt[NFSPROC_GETATTR]++;
642	nfsm_reqhead(vp, NFSPROC_GETATTR, NFSX_FH(v3));
643	nfsm_fhtom(vp, v3);
644	nfsm_request(vp, NFSPROC_GETATTR, ap->a_p, ap->a_cred);
645	if (!error) {
646		nfsm_loadattr(vp, ap->a_vap);
647	}
648	nfsm_reqdone;
649	return (error);
650}
651
652/*
653 * nfs setattr call.
654 */
655static int
656nfs_setattr(ap)
657	struct vop_setattr_args /* {
658		struct vnodeop_desc *a_desc;
659		struct vnode *a_vp;
660		struct vattr *a_vap;
661		struct ucred *a_cred;
662		struct proc *a_p;
663	} */ *ap;
664{
665	register struct vnode *vp = ap->a_vp;
666	register struct nfsnode *np = VTONFS(vp);
667	register struct vattr *vap = ap->a_vap;
668	int error = 0;
669	u_quad_t tsize;
670
671#ifndef nolint
672	tsize = (u_quad_t)0;
673#endif
674
675	/*
676	 * Setting of flags is not supported.
677	 */
678	if (vap->va_flags != VNOVAL)
679		return (EOPNOTSUPP);
680
681	/*
682	 * Disallow write attempts if the filesystem is mounted read-only.
683	 */
684  	if ((vap->va_flags != VNOVAL || vap->va_uid != (uid_t)VNOVAL ||
685	    vap->va_gid != (gid_t)VNOVAL || vap->va_atime.tv_sec != VNOVAL ||
686	    vap->va_mtime.tv_sec != VNOVAL || vap->va_mode != (mode_t)VNOVAL) &&
687	    (vp->v_mount->mnt_flag & MNT_RDONLY))
688		return (EROFS);
689	if (vap->va_size != VNOVAL) {
690 		switch (vp->v_type) {
691 		case VDIR:
692 			return (EISDIR);
693 		case VCHR:
694 		case VBLK:
695 		case VSOCK:
696 		case VFIFO:
697			if (vap->va_mtime.tv_sec == VNOVAL &&
698			    vap->va_atime.tv_sec == VNOVAL &&
699			    vap->va_mode == (mode_t)VNOVAL &&
700			    vap->va_uid == (uid_t)VNOVAL &&
701			    vap->va_gid == (gid_t)VNOVAL)
702				return (0);
703 			vap->va_size = VNOVAL;
704 			break;
705 		default:
706			/*
707			 * Disallow write attempts if the filesystem is
708			 * mounted read-only.
709			 */
710			if (vp->v_mount->mnt_flag & MNT_RDONLY)
711				return (EROFS);
712			vnode_pager_setsize(vp, vap->va_size);
713 			if (np->n_flag & NMODIFIED) {
714 			    if (vap->va_size == 0)
715 				error = nfs_vinvalbuf(vp, 0,
716 					ap->a_cred, ap->a_p, 1);
717 			    else
718 				error = nfs_vinvalbuf(vp, V_SAVE,
719 					ap->a_cred, ap->a_p, 1);
720 			    if (error) {
721				vnode_pager_setsize(vp, np->n_size);
722 				return (error);
723			    }
724 			}
725 			tsize = np->n_size;
726 			np->n_size = np->n_vattr.va_size = vap->va_size;
727  		};
728  	} else if ((vap->va_mtime.tv_sec != VNOVAL ||
729		vap->va_atime.tv_sec != VNOVAL) && (np->n_flag & NMODIFIED) &&
730		vp->v_type == VREG &&
731  		(error = nfs_vinvalbuf(vp, V_SAVE, ap->a_cred,
732		 ap->a_p, 1)) == EINTR)
733		return (error);
734	error = nfs_setattrrpc(vp, vap, ap->a_cred, ap->a_p);
735	if (error && vap->va_size != VNOVAL) {
736		np->n_size = np->n_vattr.va_size = tsize;
737		vnode_pager_setsize(vp, np->n_size);
738	}
739	return (error);
740}
741
742/*
743 * Do an nfs setattr rpc.
744 */
745static int
746nfs_setattrrpc(vp, vap, cred, procp)
747	register struct vnode *vp;
748	register struct vattr *vap;
749	struct ucred *cred;
750	struct proc *procp;
751{
752	register struct nfsv2_sattr *sp;
753	register caddr_t cp;
754	register int32_t t1, t2;
755	caddr_t bpos, dpos, cp2;
756	u_int32_t *tl;
757	int error = 0, wccflag = NFSV3_WCCRATTR;
758	struct mbuf *mreq, *mrep, *md, *mb, *mb2;
759	int v3 = NFS_ISV3(vp);
760
761	nfsstats.rpccnt[NFSPROC_SETATTR]++;
762	nfsm_reqhead(vp, NFSPROC_SETATTR, NFSX_FH(v3) + NFSX_SATTR(v3));
763	nfsm_fhtom(vp, v3);
764	if (v3) {
765		nfsm_v3attrbuild(vap, TRUE);
766		nfsm_build(tl, u_int32_t *, NFSX_UNSIGNED);
767		*tl = nfs_false;
768	} else {
769		nfsm_build(sp, struct nfsv2_sattr *, NFSX_V2SATTR);
770		if (vap->va_mode == (mode_t)VNOVAL)
771			sp->sa_mode = nfs_xdrneg1;
772		else
773			sp->sa_mode = vtonfsv2_mode(vp->v_type, vap->va_mode);
774		if (vap->va_uid == (uid_t)VNOVAL)
775			sp->sa_uid = nfs_xdrneg1;
776		else
777			sp->sa_uid = txdr_unsigned(vap->va_uid);
778		if (vap->va_gid == (gid_t)VNOVAL)
779			sp->sa_gid = nfs_xdrneg1;
780		else
781			sp->sa_gid = txdr_unsigned(vap->va_gid);
782		sp->sa_size = txdr_unsigned(vap->va_size);
783		txdr_nfsv2time(&vap->va_atime, &sp->sa_atime);
784		txdr_nfsv2time(&vap->va_mtime, &sp->sa_mtime);
785	}
786	nfsm_request(vp, NFSPROC_SETATTR, procp, cred);
787	if (v3) {
788		nfsm_wcc_data(vp, wccflag);
789	} else
790		nfsm_loadattr(vp, (struct vattr *)0);
791	nfsm_reqdone;
792	return (error);
793}
794
795/*
796 * nfs lookup call, one step at a time...
797 * First look in cache
798 * If not found, unlock the directory nfsnode and do the rpc
799 */
800static int
801nfs_lookup(ap)
802	struct vop_lookup_args /* {
803		struct vnodeop_desc *a_desc;
804		struct vnode *a_dvp;
805		struct vnode **a_vpp;
806		struct componentname *a_cnp;
807	} */ *ap;
808{
809	struct componentname *cnp = ap->a_cnp;
810	struct vnode *dvp = ap->a_dvp;
811	struct vnode **vpp = ap->a_vpp;
812	int flags = cnp->cn_flags;
813	struct vnode *newvp;
814	u_int32_t *tl;
815	caddr_t cp;
816	int32_t t1, t2;
817	struct nfsmount *nmp;
818	caddr_t bpos, dpos, cp2;
819	struct mbuf *mreq, *mrep, *md, *mb, *mb2;
820	long len;
821	nfsfh_t *fhp;
822	struct nfsnode *np;
823	int lockparent, wantparent, error = 0, attrflag, fhsize;
824	int v3 = NFS_ISV3(dvp);
825	struct proc *p = cnp->cn_proc;
826
827	*vpp = NULLVP;
828	cnp->cn_flags &= ~PDIRUNLOCK;
829	if ((flags & ISLASTCN) && (dvp->v_mount->mnt_flag & MNT_RDONLY) &&
830	    (cnp->cn_nameiop == DELETE || cnp->cn_nameiop == RENAME))
831		return (EROFS);
832	if (dvp->v_type != VDIR)
833		return (ENOTDIR);
834	lockparent = flags & LOCKPARENT;
835	wantparent = flags & (LOCKPARENT|WANTPARENT);
836	nmp = VFSTONFS(dvp->v_mount);
837	np = VTONFS(dvp);
838	if ((error = cache_lookup(dvp, vpp, cnp)) && error != ENOENT) {
839		struct vattr vattr;
840		int vpid;
841
842		if ((error = VOP_ACCESS(dvp, VEXEC, cnp->cn_cred, p)) != 0) {
843			*vpp = NULLVP;
844			return (error);
845		}
846
847		newvp = *vpp;
848		vpid = newvp->v_id;
849		/*
850		 * See the comment starting `Step through' in ufs/ufs_lookup.c
851		 * for an explanation of the locking protocol
852		 */
853		if (dvp == newvp) {
854			VREF(newvp);
855			error = 0;
856		} else if (flags & ISDOTDOT) {
857			VOP_UNLOCK(dvp, 0, p);
858			cnp->cn_flags |= PDIRUNLOCK;
859			error = vget(newvp, LK_EXCLUSIVE, p);
860			if (!error && lockparent && (flags & ISLASTCN)) {
861				error = vn_lock(dvp, LK_EXCLUSIVE, p);
862				if (error == 0)
863					cnp->cn_flags &= ~PDIRUNLOCK;
864			}
865		} else {
866			error = vget(newvp, LK_EXCLUSIVE, p);
867			if (!lockparent || error || !(flags & ISLASTCN)) {
868				VOP_UNLOCK(dvp, 0, p);
869				cnp->cn_flags |= PDIRUNLOCK;
870			}
871		}
872		if (!error) {
873			if (vpid == newvp->v_id) {
874			   if (!VOP_GETATTR(newvp, &vattr, cnp->cn_cred, p)
875			    && vattr.va_ctime.tv_sec == VTONFS(newvp)->n_ctime) {
876				nfsstats.lookupcache_hits++;
877				if (cnp->cn_nameiop != LOOKUP &&
878				    (flags & ISLASTCN))
879					cnp->cn_flags |= SAVENAME;
880				return (0);
881			   }
882			   cache_purge(newvp);
883			}
884			vput(newvp);
885			if (lockparent && dvp != newvp && (flags & ISLASTCN))
886				VOP_UNLOCK(dvp, 0, p);
887		}
888		error = vn_lock(dvp, LK_EXCLUSIVE, p);
889		*vpp = NULLVP;
890		if (error) {
891			cnp->cn_flags |= PDIRUNLOCK;
892			return (error);
893		}
894		cnp->cn_flags &= ~PDIRUNLOCK;
895	}
896	error = 0;
897	newvp = NULLVP;
898	nfsstats.lookupcache_misses++;
899	nfsstats.rpccnt[NFSPROC_LOOKUP]++;
900	len = cnp->cn_namelen;
901	nfsm_reqhead(dvp, NFSPROC_LOOKUP,
902		NFSX_FH(v3) + NFSX_UNSIGNED + nfsm_rndup(len));
903	nfsm_fhtom(dvp, v3);
904	nfsm_strtom(cnp->cn_nameptr, len, NFS_MAXNAMLEN);
905	nfsm_request(dvp, NFSPROC_LOOKUP, cnp->cn_proc, cnp->cn_cred);
906	if (error) {
907		nfsm_postop_attr(dvp, attrflag);
908		m_freem(mrep);
909		goto nfsmout;
910	}
911	nfsm_getfh(fhp, fhsize, v3);
912
913	/*
914	 * Handle RENAME case...
915	 */
916	if (cnp->cn_nameiop == RENAME && wantparent && (flags & ISLASTCN)) {
917		if (NFS_CMPFH(np, fhp, fhsize)) {
918			m_freem(mrep);
919			return (EISDIR);
920		}
921		error = nfs_nget(dvp->v_mount, fhp, fhsize, &np);
922		if (error) {
923			m_freem(mrep);
924			return (error);
925		}
926		newvp = NFSTOV(np);
927		if (v3) {
928			nfsm_postop_attr(newvp, attrflag);
929			nfsm_postop_attr(dvp, attrflag);
930		} else
931			nfsm_loadattr(newvp, (struct vattr *)0);
932		*vpp = newvp;
933		m_freem(mrep);
934		cnp->cn_flags |= SAVENAME;
935		if (!lockparent) {
936			VOP_UNLOCK(dvp, 0, p);
937			cnp->cn_flags |= PDIRUNLOCK;
938		}
939		return (0);
940	}
941
942	if (flags & ISDOTDOT) {
943		VOP_UNLOCK(dvp, 0, p);
944		error = nfs_nget(dvp->v_mount, fhp, fhsize, &np);
945		if (error) {
946			vn_lock(dvp, LK_EXCLUSIVE | LK_RETRY, p);
947			return (error);
948		}
949		newvp = NFSTOV(np);
950		if (lockparent && (flags & ISLASTCN)) {
951			error = vn_lock(dvp, LK_EXCLUSIVE, p);
952			if (error) {
953				cnp->cn_flags |= PDIRUNLOCK;
954		    		vput(newvp);
955				return (error);
956			}
957		} else
958			cnp->cn_flags |= PDIRUNLOCK;
959	} else if (NFS_CMPFH(np, fhp, fhsize)) {
960		VREF(dvp);
961		newvp = dvp;
962	} else {
963		error = nfs_nget(dvp->v_mount, fhp, fhsize, &np);
964		if (error) {
965			m_freem(mrep);
966			return (error);
967		}
968		if (!lockparent || !(flags & ISLASTCN)) {
969			cnp->cn_flags |= PDIRUNLOCK;
970			VOP_UNLOCK(dvp, 0, p);
971		}
972		newvp = NFSTOV(np);
973	}
974	if (v3) {
975		nfsm_postop_attr(newvp, attrflag);
976		nfsm_postop_attr(dvp, attrflag);
977	} else
978		nfsm_loadattr(newvp, (struct vattr *)0);
979	if (cnp->cn_nameiop != LOOKUP && (flags & ISLASTCN))
980		cnp->cn_flags |= SAVENAME;
981	if ((cnp->cn_flags & MAKEENTRY) &&
982	    (cnp->cn_nameiop != DELETE || !(flags & ISLASTCN))) {
983		np->n_ctime = np->n_vattr.va_ctime.tv_sec;
984		cache_enter(dvp, newvp, cnp);
985	}
986	*vpp = newvp;
987	nfsm_reqdone;
988	if (error) {
989		if (newvp != NULLVP) {
990			vrele(newvp);
991			*vpp = NULLVP;
992		}
993		if ((cnp->cn_nameiop == CREATE || cnp->cn_nameiop == RENAME) &&
994		    (flags & ISLASTCN) && error == ENOENT) {
995			if (!lockparent) {
996				VOP_UNLOCK(dvp, 0, p);
997				cnp->cn_flags |= PDIRUNLOCK;
998			}
999			if (dvp->v_mount->mnt_flag & MNT_RDONLY)
1000				error = EROFS;
1001			else
1002				error = EJUSTRETURN;
1003		}
1004		if (cnp->cn_nameiop != LOOKUP && (flags & ISLASTCN))
1005			cnp->cn_flags |= SAVENAME;
1006	}
1007	return (error);
1008}
1009
1010/*
1011 * nfs read call.
1012 * Just call nfs_bioread() to do the work.
1013 */
1014static int
1015nfs_read(ap)
1016	struct vop_read_args /* {
1017		struct vnode *a_vp;
1018		struct uio *a_uio;
1019		int  a_ioflag;
1020		struct ucred *a_cred;
1021	} */ *ap;
1022{
1023	register struct vnode *vp = ap->a_vp;
1024
1025	if (vp->v_type != VREG)
1026		return (EPERM);
1027	return (nfs_bioread(vp, ap->a_uio, ap->a_ioflag, ap->a_cred));
1028}
1029
1030/*
1031 * nfs readlink call
1032 */
1033static int
1034nfs_readlink(ap)
1035	struct vop_readlink_args /* {
1036		struct vnode *a_vp;
1037		struct uio *a_uio;
1038		struct ucred *a_cred;
1039	} */ *ap;
1040{
1041	register struct vnode *vp = ap->a_vp;
1042
1043	if (vp->v_type != VLNK)
1044		return (EINVAL);
1045	return (nfs_bioread(vp, ap->a_uio, 0, ap->a_cred));
1046}
1047
1048/*
1049 * Do a readlink rpc.
1050 * Called by nfs_doio() from below the buffer cache.
1051 */
1052int
1053nfs_readlinkrpc(vp, uiop, cred)
1054	register struct vnode *vp;
1055	struct uio *uiop;
1056	struct ucred *cred;
1057{
1058	register u_int32_t *tl;
1059	register caddr_t cp;
1060	register int32_t t1, t2;
1061	caddr_t bpos, dpos, cp2;
1062	int error = 0, len, attrflag;
1063	struct mbuf *mreq, *mrep, *md, *mb, *mb2;
1064	int v3 = NFS_ISV3(vp);
1065
1066	nfsstats.rpccnt[NFSPROC_READLINK]++;
1067	nfsm_reqhead(vp, NFSPROC_READLINK, NFSX_FH(v3));
1068	nfsm_fhtom(vp, v3);
1069	nfsm_request(vp, NFSPROC_READLINK, uiop->uio_procp, cred);
1070	if (v3)
1071		nfsm_postop_attr(vp, attrflag);
1072	if (!error) {
1073		nfsm_strsiz(len, NFS_MAXPATHLEN);
1074		if (len == NFS_MAXPATHLEN) {
1075			struct nfsnode *np = VTONFS(vp);
1076			if (np->n_size && np->n_size < NFS_MAXPATHLEN)
1077				len = np->n_size;
1078		}
1079		nfsm_mtouio(uiop, len);
1080	}
1081	nfsm_reqdone;
1082	return (error);
1083}
1084
1085/*
1086 * nfs read rpc call
1087 * Ditto above
1088 */
1089int
1090nfs_readrpc(vp, uiop, cred)
1091	register struct vnode *vp;
1092	struct uio *uiop;
1093	struct ucred *cred;
1094{
1095	register u_int32_t *tl;
1096	register caddr_t cp;
1097	register int32_t t1, t2;
1098	caddr_t bpos, dpos, cp2;
1099	struct mbuf *mreq, *mrep, *md, *mb, *mb2;
1100	struct nfsmount *nmp;
1101	int error = 0, len, retlen, tsiz, eof, attrflag;
1102	int v3 = NFS_ISV3(vp);
1103
1104#ifndef nolint
1105	eof = 0;
1106#endif
1107	nmp = VFSTONFS(vp->v_mount);
1108	tsiz = uiop->uio_resid;
1109	if (uiop->uio_offset + tsiz > nmp->nm_maxfilesize)
1110		return (EFBIG);
1111	while (tsiz > 0) {
1112		nfsstats.rpccnt[NFSPROC_READ]++;
1113		len = (tsiz > nmp->nm_rsize) ? nmp->nm_rsize : tsiz;
1114		nfsm_reqhead(vp, NFSPROC_READ, NFSX_FH(v3) + NFSX_UNSIGNED * 3);
1115		nfsm_fhtom(vp, v3);
1116		nfsm_build(tl, u_int32_t *, NFSX_UNSIGNED * 3);
1117		if (v3) {
1118			txdr_hyper(uiop->uio_offset, tl);
1119			*(tl + 2) = txdr_unsigned(len);
1120		} else {
1121			*tl++ = txdr_unsigned(uiop->uio_offset);
1122			*tl++ = txdr_unsigned(len);
1123			*tl = 0;
1124		}
1125		nfsm_request(vp, NFSPROC_READ, uiop->uio_procp, cred);
1126		if (v3) {
1127			nfsm_postop_attr(vp, attrflag);
1128			if (error) {
1129				m_freem(mrep);
1130				goto nfsmout;
1131			}
1132			nfsm_dissect(tl, u_int32_t *, 2 * NFSX_UNSIGNED);
1133			eof = fxdr_unsigned(int, *(tl + 1));
1134		} else
1135			nfsm_loadattr(vp, (struct vattr *)0);
1136		nfsm_strsiz(retlen, nmp->nm_rsize);
1137		nfsm_mtouio(uiop, retlen);
1138		m_freem(mrep);
1139		tsiz -= retlen;
1140		if (v3) {
1141			if (eof || retlen == 0)
1142				tsiz = 0;
1143		} else if (retlen < len)
1144			tsiz = 0;
1145	}
1146nfsmout:
1147	return (error);
1148}
1149
1150/*
1151 * nfs write call
1152 */
1153int
1154nfs_writerpc(vp, uiop, cred, iomode, must_commit)
1155	register struct vnode *vp;
1156	register struct uio *uiop;
1157	struct ucred *cred;
1158	int *iomode, *must_commit;
1159{
1160	register u_int32_t *tl;
1161	register caddr_t cp;
1162	register int32_t t1, t2, backup;
1163	caddr_t bpos, dpos, cp2;
1164	struct mbuf *mreq, *mrep, *md, *mb, *mb2;
1165	struct nfsmount *nmp = VFSTONFS(vp->v_mount);
1166	int error = 0, len, tsiz, wccflag = NFSV3_WCCRATTR, rlen, commit;
1167	int v3 = NFS_ISV3(vp), committed = NFSV3WRITE_FILESYNC;
1168
1169#ifndef DIAGNOSTIC
1170	if (uiop->uio_iovcnt != 1)
1171		panic("nfs: writerpc iovcnt > 1");
1172#endif
1173	*must_commit = 0;
1174	tsiz = uiop->uio_resid;
1175	if (uiop->uio_offset + tsiz > nmp->nm_maxfilesize)
1176		return (EFBIG);
1177	while (tsiz > 0) {
1178		nfsstats.rpccnt[NFSPROC_WRITE]++;
1179		len = (tsiz > nmp->nm_wsize) ? nmp->nm_wsize : tsiz;
1180		nfsm_reqhead(vp, NFSPROC_WRITE,
1181			NFSX_FH(v3) + 5 * NFSX_UNSIGNED + nfsm_rndup(len));
1182		nfsm_fhtom(vp, v3);
1183		if (v3) {
1184			nfsm_build(tl, u_int32_t *, 5 * NFSX_UNSIGNED);
1185			txdr_hyper(uiop->uio_offset, tl);
1186			tl += 2;
1187			*tl++ = txdr_unsigned(len);
1188			*tl++ = txdr_unsigned(*iomode);
1189			*tl = txdr_unsigned(len);
1190		} else {
1191			register u_int32_t x;
1192
1193			nfsm_build(tl, u_int32_t *, 4 * NFSX_UNSIGNED);
1194			/* Set both "begin" and "current" to non-garbage. */
1195			x = txdr_unsigned((u_int32_t)uiop->uio_offset);
1196			*tl++ = x;	/* "begin offset" */
1197			*tl++ = x;	/* "current offset" */
1198			x = txdr_unsigned(len);
1199			*tl++ = x;	/* total to this offset */
1200			*tl = x;	/* size of this write */
1201		}
1202		nfsm_uiotom(uiop, len);
1203		nfsm_request(vp, NFSPROC_WRITE, uiop->uio_procp, cred);
1204		if (v3) {
1205			wccflag = NFSV3_WCCCHK;
1206			nfsm_wcc_data(vp, wccflag);
1207			if (!error) {
1208				nfsm_dissect(tl, u_int32_t *, 2 * NFSX_UNSIGNED
1209					+ NFSX_V3WRITEVERF);
1210				rlen = fxdr_unsigned(int, *tl++);
1211				if (rlen == 0) {
1212					error = NFSERR_IO;
1213					m_freem(mrep);
1214					break;
1215				} else if (rlen < len) {
1216					backup = len - rlen;
1217					uiop->uio_iov->iov_base -= backup;
1218					uiop->uio_iov->iov_len += backup;
1219					uiop->uio_offset -= backup;
1220					uiop->uio_resid += backup;
1221					len = rlen;
1222				}
1223				commit = fxdr_unsigned(int, *tl++);
1224
1225				/*
1226				 * Return the lowest committment level
1227				 * obtained by any of the RPCs.
1228				 */
1229				if (committed == NFSV3WRITE_FILESYNC)
1230					committed = commit;
1231				else if (committed == NFSV3WRITE_DATASYNC &&
1232					commit == NFSV3WRITE_UNSTABLE)
1233					committed = commit;
1234				if ((nmp->nm_state & NFSSTA_HASWRITEVERF) == 0){
1235				    bcopy((caddr_t)tl, (caddr_t)nmp->nm_verf,
1236					NFSX_V3WRITEVERF);
1237				    nmp->nm_state |= NFSSTA_HASWRITEVERF;
1238				} else if (bcmp((caddr_t)tl,
1239				    (caddr_t)nmp->nm_verf, NFSX_V3WRITEVERF)) {
1240				    *must_commit = 1;
1241				    bcopy((caddr_t)tl, (caddr_t)nmp->nm_verf,
1242					NFSX_V3WRITEVERF);
1243				}
1244			}
1245		} else
1246		    nfsm_loadattr(vp, (struct vattr *)0);
1247		if (wccflag)
1248		    VTONFS(vp)->n_mtime = VTONFS(vp)->n_vattr.va_mtime.tv_sec;
1249		m_freem(mrep);
1250		if (error)
1251			break;
1252		tsiz -= len;
1253	}
1254nfsmout:
1255	if (vp->v_mount->mnt_flag & MNT_ASYNC)
1256		committed = NFSV3WRITE_FILESYNC;
1257	*iomode = committed;
1258	if (error)
1259		uiop->uio_resid = tsiz;
1260	return (error);
1261}
1262
1263/*
1264 * nfs mknod rpc
1265 * For NFS v2 this is a kludge. Use a create rpc but with the IFMT bits of the
1266 * mode set to specify the file type and the size field for rdev.
1267 */
1268static int
1269nfs_mknodrpc(dvp, vpp, cnp, vap)
1270	register struct vnode *dvp;
1271	register struct vnode **vpp;
1272	register struct componentname *cnp;
1273	register struct vattr *vap;
1274{
1275	register struct nfsv2_sattr *sp;
1276	register u_int32_t *tl;
1277	register caddr_t cp;
1278	register int32_t t1, t2;
1279	struct vnode *newvp = (struct vnode *)0;
1280	struct nfsnode *np = (struct nfsnode *)0;
1281	struct vattr vattr;
1282	char *cp2;
1283	caddr_t bpos, dpos;
1284	int error = 0, wccflag = NFSV3_WCCRATTR, gotvp = 0;
1285	struct mbuf *mreq, *mrep, *md, *mb, *mb2;
1286	u_int32_t rdev;
1287	int v3 = NFS_ISV3(dvp);
1288
1289	if (vap->va_type == VCHR || vap->va_type == VBLK)
1290		rdev = txdr_unsigned(vap->va_rdev);
1291	else if (vap->va_type == VFIFO || vap->va_type == VSOCK)
1292		rdev = nfs_xdrneg1;
1293	else {
1294		return (EOPNOTSUPP);
1295	}
1296	if ((error = VOP_GETATTR(dvp, &vattr, cnp->cn_cred, cnp->cn_proc)) != 0) {
1297		return (error);
1298	}
1299	nfsstats.rpccnt[NFSPROC_MKNOD]++;
1300	nfsm_reqhead(dvp, NFSPROC_MKNOD, NFSX_FH(v3) + 4 * NFSX_UNSIGNED +
1301		+ nfsm_rndup(cnp->cn_namelen) + NFSX_SATTR(v3));
1302	nfsm_fhtom(dvp, v3);
1303	nfsm_strtom(cnp->cn_nameptr, cnp->cn_namelen, NFS_MAXNAMLEN);
1304	if (v3) {
1305		nfsm_build(tl, u_int32_t *, NFSX_UNSIGNED);
1306		*tl++ = vtonfsv3_type(vap->va_type);
1307		nfsm_v3attrbuild(vap, FALSE);
1308		if (vap->va_type == VCHR || vap->va_type == VBLK) {
1309			nfsm_build(tl, u_int32_t *, 2 * NFSX_UNSIGNED);
1310			*tl++ = txdr_unsigned(umajor(vap->va_rdev));
1311			*tl = txdr_unsigned(uminor(vap->va_rdev));
1312		}
1313	} else {
1314		nfsm_build(sp, struct nfsv2_sattr *, NFSX_V2SATTR);
1315		sp->sa_mode = vtonfsv2_mode(vap->va_type, vap->va_mode);
1316		sp->sa_uid = nfs_xdrneg1;
1317		sp->sa_gid = nfs_xdrneg1;
1318		sp->sa_size = rdev;
1319		txdr_nfsv2time(&vap->va_atime, &sp->sa_atime);
1320		txdr_nfsv2time(&vap->va_mtime, &sp->sa_mtime);
1321	}
1322	nfsm_request(dvp, NFSPROC_MKNOD, cnp->cn_proc, cnp->cn_cred);
1323	if (!error) {
1324		nfsm_mtofh(dvp, newvp, v3, gotvp);
1325		if (!gotvp) {
1326			if (newvp) {
1327				vput(newvp);
1328				newvp = (struct vnode *)0;
1329			}
1330			error = nfs_lookitup(dvp, cnp->cn_nameptr,
1331			    cnp->cn_namelen, cnp->cn_cred, cnp->cn_proc, &np);
1332			if (!error)
1333				newvp = NFSTOV(np);
1334		}
1335	}
1336	if (v3)
1337		nfsm_wcc_data(dvp, wccflag);
1338	nfsm_reqdone;
1339	if (error) {
1340		if (newvp)
1341			vput(newvp);
1342	} else {
1343		if (cnp->cn_flags & MAKEENTRY)
1344			cache_enter(dvp, newvp, cnp);
1345		*vpp = newvp;
1346	}
1347	VTONFS(dvp)->n_flag |= NMODIFIED;
1348	if (!wccflag)
1349		VTONFS(dvp)->n_attrstamp = 0;
1350	return (error);
1351}
1352
1353/*
1354 * nfs mknod vop
1355 * just call nfs_mknodrpc() to do the work.
1356 */
1357/* ARGSUSED */
1358static int
1359nfs_mknod(ap)
1360	struct vop_mknod_args /* {
1361		struct vnode *a_dvp;
1362		struct vnode **a_vpp;
1363		struct componentname *a_cnp;
1364		struct vattr *a_vap;
1365	} */ *ap;
1366{
1367	return nfs_mknodrpc(ap->a_dvp, ap->a_vpp, ap->a_cnp, ap->a_vap);
1368}
1369
1370static u_long create_verf;
1371/*
1372 * nfs file create call
1373 */
1374static int
1375nfs_create(ap)
1376	struct vop_create_args /* {
1377		struct vnode *a_dvp;
1378		struct vnode **a_vpp;
1379		struct componentname *a_cnp;
1380		struct vattr *a_vap;
1381	} */ *ap;
1382{
1383	register struct vnode *dvp = ap->a_dvp;
1384	register struct vattr *vap = ap->a_vap;
1385	register struct componentname *cnp = ap->a_cnp;
1386	register struct nfsv2_sattr *sp;
1387	register u_int32_t *tl;
1388	register caddr_t cp;
1389	register int32_t t1, t2;
1390	struct nfsnode *np = (struct nfsnode *)0;
1391	struct vnode *newvp = (struct vnode *)0;
1392	caddr_t bpos, dpos, cp2;
1393	int error = 0, wccflag = NFSV3_WCCRATTR, gotvp = 0, fmode = 0;
1394	struct mbuf *mreq, *mrep, *md, *mb, *mb2;
1395	struct vattr vattr;
1396	int v3 = NFS_ISV3(dvp);
1397
1398	/*
1399	 * Oops, not for me..
1400	 */
1401	if (vap->va_type == VSOCK)
1402		return (nfs_mknodrpc(dvp, ap->a_vpp, cnp, vap));
1403
1404	if ((error = VOP_GETATTR(dvp, &vattr, cnp->cn_cred, cnp->cn_proc)) != 0) {
1405		return (error);
1406	}
1407	if (vap->va_vaflags & VA_EXCLUSIVE)
1408		fmode |= O_EXCL;
1409again:
1410	nfsstats.rpccnt[NFSPROC_CREATE]++;
1411	nfsm_reqhead(dvp, NFSPROC_CREATE, NFSX_FH(v3) + 2 * NFSX_UNSIGNED +
1412		nfsm_rndup(cnp->cn_namelen) + NFSX_SATTR(v3));
1413	nfsm_fhtom(dvp, v3);
1414	nfsm_strtom(cnp->cn_nameptr, cnp->cn_namelen, NFS_MAXNAMLEN);
1415	if (v3) {
1416		nfsm_build(tl, u_int32_t *, NFSX_UNSIGNED);
1417		if (fmode & O_EXCL) {
1418			*tl = txdr_unsigned(NFSV3CREATE_EXCLUSIVE);
1419			nfsm_build(tl, u_int32_t *, NFSX_V3CREATEVERF);
1420#ifdef INET
1421			if (!TAILQ_EMPTY(&in_ifaddrhead))
1422				*tl++ = IA_SIN(in_ifaddrhead.tqh_first)->sin_addr.s_addr;
1423			else
1424#endif
1425				*tl++ = create_verf;
1426			*tl = ++create_verf;
1427		} else {
1428			*tl = txdr_unsigned(NFSV3CREATE_UNCHECKED);
1429			nfsm_v3attrbuild(vap, FALSE);
1430		}
1431	} else {
1432		nfsm_build(sp, struct nfsv2_sattr *, NFSX_V2SATTR);
1433		sp->sa_mode = vtonfsv2_mode(vap->va_type, vap->va_mode);
1434		sp->sa_uid = nfs_xdrneg1;
1435		sp->sa_gid = nfs_xdrneg1;
1436		sp->sa_size = 0;
1437		txdr_nfsv2time(&vap->va_atime, &sp->sa_atime);
1438		txdr_nfsv2time(&vap->va_mtime, &sp->sa_mtime);
1439	}
1440	nfsm_request(dvp, NFSPROC_CREATE, cnp->cn_proc, cnp->cn_cred);
1441	if (!error) {
1442		nfsm_mtofh(dvp, newvp, v3, gotvp);
1443		if (!gotvp) {
1444			if (newvp) {
1445				vput(newvp);
1446				newvp = (struct vnode *)0;
1447			}
1448			error = nfs_lookitup(dvp, cnp->cn_nameptr,
1449			    cnp->cn_namelen, cnp->cn_cred, cnp->cn_proc, &np);
1450			if (!error)
1451				newvp = NFSTOV(np);
1452		}
1453	}
1454	if (v3)
1455		nfsm_wcc_data(dvp, wccflag);
1456	nfsm_reqdone;
1457	if (error) {
1458		if (v3 && (fmode & O_EXCL) && error == NFSERR_NOTSUPP) {
1459			fmode &= ~O_EXCL;
1460			goto again;
1461		}
1462		if (newvp)
1463			vput(newvp);
1464	} else if (v3 && (fmode & O_EXCL))
1465		error = nfs_setattrrpc(newvp, vap, cnp->cn_cred, cnp->cn_proc);
1466	if (!error) {
1467		if (cnp->cn_flags & MAKEENTRY)
1468			cache_enter(dvp, newvp, cnp);
1469		*ap->a_vpp = newvp;
1470	}
1471	VTONFS(dvp)->n_flag |= NMODIFIED;
1472	if (!wccflag)
1473		VTONFS(dvp)->n_attrstamp = 0;
1474	return (error);
1475}
1476
1477/*
1478 * nfs file remove call
1479 * To try and make nfs semantics closer to ufs semantics, a file that has
1480 * other processes using the vnode is renamed instead of removed and then
1481 * removed later on the last close.
1482 * - If v_usecount > 1
1483 *	  If a rename is not already in the works
1484 *	     call nfs_sillyrename() to set it up
1485 *     else
1486 *	  do the remove rpc
1487 */
1488static int
1489nfs_remove(ap)
1490	struct vop_remove_args /* {
1491		struct vnodeop_desc *a_desc;
1492		struct vnode * a_dvp;
1493		struct vnode * a_vp;
1494		struct componentname * a_cnp;
1495	} */ *ap;
1496{
1497	register struct vnode *vp = ap->a_vp;
1498	register struct vnode *dvp = ap->a_dvp;
1499	register struct componentname *cnp = ap->a_cnp;
1500	register struct nfsnode *np = VTONFS(vp);
1501	int error = 0;
1502	struct vattr vattr;
1503
1504#ifndef DIAGNOSTIC
1505	if ((cnp->cn_flags & HASBUF) == 0)
1506		panic("nfs_remove: no name");
1507	if (vp->v_usecount < 1)
1508		panic("nfs_remove: bad v_usecount");
1509#endif
1510	if (vp->v_type == VDIR)
1511		error = EPERM;
1512	else if (vp->v_usecount == 1 || (np->n_sillyrename &&
1513	    VOP_GETATTR(vp, &vattr, cnp->cn_cred, cnp->cn_proc) == 0 &&
1514	    vattr.va_nlink > 1)) {
1515		/*
1516		 * Purge the name cache so that the chance of a lookup for
1517		 * the name succeeding while the remove is in progress is
1518		 * minimized. Without node locking it can still happen, such
1519		 * that an I/O op returns ESTALE, but since you get this if
1520		 * another host removes the file..
1521		 */
1522		cache_purge(vp);
1523		/*
1524		 * throw away biocache buffers, mainly to avoid
1525		 * unnecessary delayed writes later.
1526		 */
1527		error = nfs_vinvalbuf(vp, 0, cnp->cn_cred, cnp->cn_proc, 1);
1528		/* Do the rpc */
1529		if (error != EINTR)
1530			error = nfs_removerpc(dvp, cnp->cn_nameptr,
1531				cnp->cn_namelen, cnp->cn_cred, cnp->cn_proc);
1532		/*
1533		 * Kludge City: If the first reply to the remove rpc is lost..
1534		 *   the reply to the retransmitted request will be ENOENT
1535		 *   since the file was in fact removed
1536		 *   Therefore, we cheat and return success.
1537		 */
1538		if (error == ENOENT)
1539			error = 0;
1540	} else if (!np->n_sillyrename)
1541		error = nfs_sillyrename(dvp, vp, cnp);
1542	np->n_attrstamp = 0;
1543	return (error);
1544}
1545
1546/*
1547 * nfs file remove rpc called from nfs_inactive
1548 */
1549int
1550nfs_removeit(sp)
1551	register struct sillyrename *sp;
1552{
1553
1554	return (nfs_removerpc(sp->s_dvp, sp->s_name, sp->s_namlen, sp->s_cred,
1555		(struct proc *)0));
1556}
1557
1558/*
1559 * Nfs remove rpc, called from nfs_remove() and nfs_removeit().
1560 */
1561static int
1562nfs_removerpc(dvp, name, namelen, cred, proc)
1563	register struct vnode *dvp;
1564	const char *name;
1565	int namelen;
1566	struct ucred *cred;
1567	struct proc *proc;
1568{
1569	register u_int32_t *tl;
1570	register caddr_t cp;
1571	register int32_t t1, t2;
1572	caddr_t bpos, dpos, cp2;
1573	int error = 0, wccflag = NFSV3_WCCRATTR;
1574	struct mbuf *mreq, *mrep, *md, *mb, *mb2;
1575	int v3 = NFS_ISV3(dvp);
1576
1577	nfsstats.rpccnt[NFSPROC_REMOVE]++;
1578	nfsm_reqhead(dvp, NFSPROC_REMOVE,
1579		NFSX_FH(v3) + NFSX_UNSIGNED + nfsm_rndup(namelen));
1580	nfsm_fhtom(dvp, v3);
1581	nfsm_strtom(name, namelen, NFS_MAXNAMLEN);
1582	nfsm_request(dvp, NFSPROC_REMOVE, proc, cred);
1583	if (v3)
1584		nfsm_wcc_data(dvp, wccflag);
1585	nfsm_reqdone;
1586	VTONFS(dvp)->n_flag |= NMODIFIED;
1587	if (!wccflag)
1588		VTONFS(dvp)->n_attrstamp = 0;
1589	return (error);
1590}
1591
1592/*
1593 * nfs file rename call
1594 */
1595static int
1596nfs_rename(ap)
1597	struct vop_rename_args  /* {
1598		struct vnode *a_fdvp;
1599		struct vnode *a_fvp;
1600		struct componentname *a_fcnp;
1601		struct vnode *a_tdvp;
1602		struct vnode *a_tvp;
1603		struct componentname *a_tcnp;
1604	} */ *ap;
1605{
1606	register struct vnode *fvp = ap->a_fvp;
1607	register struct vnode *tvp = ap->a_tvp;
1608	register struct vnode *fdvp = ap->a_fdvp;
1609	register struct vnode *tdvp = ap->a_tdvp;
1610	register struct componentname *tcnp = ap->a_tcnp;
1611	register struct componentname *fcnp = ap->a_fcnp;
1612	int error;
1613
1614#ifndef DIAGNOSTIC
1615	if ((tcnp->cn_flags & HASBUF) == 0 ||
1616	    (fcnp->cn_flags & HASBUF) == 0)
1617		panic("nfs_rename: no name");
1618#endif
1619	/* Check for cross-device rename */
1620	if ((fvp->v_mount != tdvp->v_mount) ||
1621	    (tvp && (fvp->v_mount != tvp->v_mount))) {
1622		error = EXDEV;
1623		goto out;
1624	}
1625
1626	/*
1627	 * We have to flush B_DELWRI data prior to renaming
1628	 * the file.  If we don't, the delayed-write buffers
1629	 * can be flushed out later after the file has gone stale
1630	 * under NFSV3.  NFSV2 does not have this problem because
1631	 * ( as far as I can tell ) it flushes dirty buffers more
1632	 * often.
1633	 */
1634
1635	VOP_FSYNC(fvp, fcnp->cn_cred, MNT_WAIT, fcnp->cn_proc);
1636	if (tvp)
1637	    VOP_FSYNC(tvp, tcnp->cn_cred, MNT_WAIT, tcnp->cn_proc);
1638
1639	/*
1640	 * If the tvp exists and is in use, sillyrename it before doing the
1641	 * rename of the new file over it.
1642	 * XXX Can't sillyrename a directory.
1643	 */
1644	if (tvp && tvp->v_usecount > 1 && !VTONFS(tvp)->n_sillyrename &&
1645		tvp->v_type != VDIR && !nfs_sillyrename(tdvp, tvp, tcnp)) {
1646		vput(tvp);
1647		tvp = NULL;
1648	}
1649
1650	error = nfs_renamerpc(fdvp, fcnp->cn_nameptr, fcnp->cn_namelen,
1651		tdvp, tcnp->cn_nameptr, tcnp->cn_namelen, tcnp->cn_cred,
1652		tcnp->cn_proc);
1653
1654	if (fvp->v_type == VDIR) {
1655		if (tvp != NULL && tvp->v_type == VDIR)
1656			cache_purge(tdvp);
1657		cache_purge(fdvp);
1658	}
1659
1660out:
1661	if (tdvp == tvp)
1662		vrele(tdvp);
1663	else
1664		vput(tdvp);
1665	if (tvp)
1666		vput(tvp);
1667	vrele(fdvp);
1668	vrele(fvp);
1669	/*
1670	 * Kludge: Map ENOENT => 0 assuming that it is a reply to a retry.
1671	 */
1672	if (error == ENOENT)
1673		error = 0;
1674	return (error);
1675}
1676
1677/*
1678 * nfs file rename rpc called from nfs_remove() above
1679 */
1680static int
1681nfs_renameit(sdvp, scnp, sp)
1682	struct vnode *sdvp;
1683	struct componentname *scnp;
1684	register struct sillyrename *sp;
1685{
1686	return (nfs_renamerpc(sdvp, scnp->cn_nameptr, scnp->cn_namelen,
1687		sdvp, sp->s_name, sp->s_namlen, scnp->cn_cred, scnp->cn_proc));
1688}
1689
1690/*
1691 * Do an nfs rename rpc. Called from nfs_rename() and nfs_renameit().
1692 */
1693static int
1694nfs_renamerpc(fdvp, fnameptr, fnamelen, tdvp, tnameptr, tnamelen, cred, proc)
1695	register struct vnode *fdvp;
1696	const char *fnameptr;
1697	int fnamelen;
1698	register struct vnode *tdvp;
1699	const char *tnameptr;
1700	int tnamelen;
1701	struct ucred *cred;
1702	struct proc *proc;
1703{
1704	register u_int32_t *tl;
1705	register caddr_t cp;
1706	register int32_t t1, t2;
1707	caddr_t bpos, dpos, cp2;
1708	int error = 0, fwccflag = NFSV3_WCCRATTR, twccflag = NFSV3_WCCRATTR;
1709	struct mbuf *mreq, *mrep, *md, *mb, *mb2;
1710	int v3 = NFS_ISV3(fdvp);
1711
1712	nfsstats.rpccnt[NFSPROC_RENAME]++;
1713	nfsm_reqhead(fdvp, NFSPROC_RENAME,
1714		(NFSX_FH(v3) + NFSX_UNSIGNED)*2 + nfsm_rndup(fnamelen) +
1715		nfsm_rndup(tnamelen));
1716	nfsm_fhtom(fdvp, v3);
1717	nfsm_strtom(fnameptr, fnamelen, NFS_MAXNAMLEN);
1718	nfsm_fhtom(tdvp, v3);
1719	nfsm_strtom(tnameptr, tnamelen, NFS_MAXNAMLEN);
1720	nfsm_request(fdvp, NFSPROC_RENAME, proc, cred);
1721	if (v3) {
1722		nfsm_wcc_data(fdvp, fwccflag);
1723		nfsm_wcc_data(tdvp, twccflag);
1724	}
1725	nfsm_reqdone;
1726	VTONFS(fdvp)->n_flag |= NMODIFIED;
1727	VTONFS(tdvp)->n_flag |= NMODIFIED;
1728	if (!fwccflag)
1729		VTONFS(fdvp)->n_attrstamp = 0;
1730	if (!twccflag)
1731		VTONFS(tdvp)->n_attrstamp = 0;
1732	return (error);
1733}
1734
1735/*
1736 * nfs hard link create call
1737 */
1738static int
1739nfs_link(ap)
1740	struct vop_link_args /* {
1741		struct vnode *a_tdvp;
1742		struct vnode *a_vp;
1743		struct componentname *a_cnp;
1744	} */ *ap;
1745{
1746	register struct vnode *vp = ap->a_vp;
1747	register struct vnode *tdvp = ap->a_tdvp;
1748	register struct componentname *cnp = ap->a_cnp;
1749	register u_int32_t *tl;
1750	register caddr_t cp;
1751	register int32_t t1, t2;
1752	caddr_t bpos, dpos, cp2;
1753	int error = 0, wccflag = NFSV3_WCCRATTR, attrflag = 0;
1754	struct mbuf *mreq, *mrep, *md, *mb, *mb2;
1755	int v3;
1756
1757	if (vp->v_mount != tdvp->v_mount) {
1758		return (EXDEV);
1759	}
1760
1761	/*
1762	 * Push all writes to the server, so that the attribute cache
1763	 * doesn't get "out of sync" with the server.
1764	 * XXX There should be a better way!
1765	 */
1766	VOP_FSYNC(vp, cnp->cn_cred, MNT_WAIT, cnp->cn_proc);
1767
1768	v3 = NFS_ISV3(vp);
1769	nfsstats.rpccnt[NFSPROC_LINK]++;
1770	nfsm_reqhead(vp, NFSPROC_LINK,
1771		NFSX_FH(v3)*2 + NFSX_UNSIGNED + nfsm_rndup(cnp->cn_namelen));
1772	nfsm_fhtom(vp, v3);
1773	nfsm_fhtom(tdvp, v3);
1774	nfsm_strtom(cnp->cn_nameptr, cnp->cn_namelen, NFS_MAXNAMLEN);
1775	nfsm_request(vp, NFSPROC_LINK, cnp->cn_proc, cnp->cn_cred);
1776	if (v3) {
1777		nfsm_postop_attr(vp, attrflag);
1778		nfsm_wcc_data(tdvp, wccflag);
1779	}
1780	nfsm_reqdone;
1781	VTONFS(tdvp)->n_flag |= NMODIFIED;
1782	if (!attrflag)
1783		VTONFS(vp)->n_attrstamp = 0;
1784	if (!wccflag)
1785		VTONFS(tdvp)->n_attrstamp = 0;
1786	/*
1787	 * Kludge: Map EEXIST => 0 assuming that it is a reply to a retry.
1788	 */
1789	if (error == EEXIST)
1790		error = 0;
1791	return (error);
1792}
1793
1794/*
1795 * nfs symbolic link create call
1796 */
1797static int
1798nfs_symlink(ap)
1799	struct vop_symlink_args /* {
1800		struct vnode *a_dvp;
1801		struct vnode **a_vpp;
1802		struct componentname *a_cnp;
1803		struct vattr *a_vap;
1804		char *a_target;
1805	} */ *ap;
1806{
1807	register struct vnode *dvp = ap->a_dvp;
1808	register struct vattr *vap = ap->a_vap;
1809	register struct componentname *cnp = ap->a_cnp;
1810	register struct nfsv2_sattr *sp;
1811	register u_int32_t *tl;
1812	register caddr_t cp;
1813	register int32_t t1, t2;
1814	caddr_t bpos, dpos, cp2;
1815	int slen, error = 0, wccflag = NFSV3_WCCRATTR, gotvp;
1816	struct mbuf *mreq, *mrep, *md, *mb, *mb2;
1817	struct vnode *newvp = (struct vnode *)0;
1818	int v3 = NFS_ISV3(dvp);
1819
1820	nfsstats.rpccnt[NFSPROC_SYMLINK]++;
1821	slen = strlen(ap->a_target);
1822	nfsm_reqhead(dvp, NFSPROC_SYMLINK, NFSX_FH(v3) + 2*NFSX_UNSIGNED +
1823	    nfsm_rndup(cnp->cn_namelen) + nfsm_rndup(slen) + NFSX_SATTR(v3));
1824	nfsm_fhtom(dvp, v3);
1825	nfsm_strtom(cnp->cn_nameptr, cnp->cn_namelen, NFS_MAXNAMLEN);
1826	if (v3) {
1827		nfsm_v3attrbuild(vap, FALSE);
1828	}
1829	nfsm_strtom(ap->a_target, slen, NFS_MAXPATHLEN);
1830	if (!v3) {
1831		nfsm_build(sp, struct nfsv2_sattr *, NFSX_V2SATTR);
1832		sp->sa_mode = vtonfsv2_mode(VLNK, vap->va_mode);
1833		sp->sa_uid = nfs_xdrneg1;
1834		sp->sa_gid = nfs_xdrneg1;
1835		sp->sa_size = nfs_xdrneg1;
1836		txdr_nfsv2time(&vap->va_atime, &sp->sa_atime);
1837		txdr_nfsv2time(&vap->va_mtime, &sp->sa_mtime);
1838	}
1839
1840	/*
1841	 * Issue the NFS request and get the rpc response.
1842	 *
1843	 * Only NFSv3 responses returning an error of 0 actually return
1844	 * a file handle that can be converted into newvp without having
1845	 * to do an extra lookup rpc.
1846	 */
1847	nfsm_request(dvp, NFSPROC_SYMLINK, cnp->cn_proc, cnp->cn_cred);
1848	if (v3) {
1849		if (error == 0)
1850			nfsm_mtofh(dvp, newvp, v3, gotvp);
1851		nfsm_wcc_data(dvp, wccflag);
1852	}
1853
1854	/*
1855	 * out code jumps -> here, mrep is also freed.
1856	 */
1857
1858	nfsm_reqdone;
1859
1860	/*
1861	 * If we get an EEXIST error, silently convert it to no-error
1862	 * in case of an NFS retry.
1863	 */
1864	if (error == EEXIST)
1865		error = 0;
1866
1867	/*
1868	 * If we do not have (or no longer have) an error, and we could
1869	 * not extract the newvp from the response due to the request being
1870	 * NFSv2 or the error being EEXIST.  We have to do a lookup in order
1871	 * to obtain a newvp to return.
1872	 */
1873	if (error == 0 && newvp == NULL) {
1874		struct nfsnode *np = NULL;
1875
1876		error = nfs_lookitup(dvp, cnp->cn_nameptr, cnp->cn_namelen,
1877		    cnp->cn_cred, cnp->cn_proc, &np);
1878		if (!error)
1879			newvp = NFSTOV(np);
1880	}
1881	if (error) {
1882		if (newvp)
1883			vput(newvp);
1884	} else {
1885		*ap->a_vpp = newvp;
1886	}
1887	VTONFS(dvp)->n_flag |= NMODIFIED;
1888	if (!wccflag)
1889		VTONFS(dvp)->n_attrstamp = 0;
1890	return (error);
1891}
1892
1893/*
1894 * nfs make dir call
1895 */
1896static int
1897nfs_mkdir(ap)
1898	struct vop_mkdir_args /* {
1899		struct vnode *a_dvp;
1900		struct vnode **a_vpp;
1901		struct componentname *a_cnp;
1902		struct vattr *a_vap;
1903	} */ *ap;
1904{
1905	register struct vnode *dvp = ap->a_dvp;
1906	register struct vattr *vap = ap->a_vap;
1907	register struct componentname *cnp = ap->a_cnp;
1908	register struct nfsv2_sattr *sp;
1909	register u_int32_t *tl;
1910	register caddr_t cp;
1911	register int32_t t1, t2;
1912	register int len;
1913	struct nfsnode *np = (struct nfsnode *)0;
1914	struct vnode *newvp = (struct vnode *)0;
1915	caddr_t bpos, dpos, cp2;
1916	int error = 0, wccflag = NFSV3_WCCRATTR;
1917	int gotvp = 0;
1918	struct mbuf *mreq, *mrep, *md, *mb, *mb2;
1919	struct vattr vattr;
1920	int v3 = NFS_ISV3(dvp);
1921
1922	if ((error = VOP_GETATTR(dvp, &vattr, cnp->cn_cred, cnp->cn_proc)) != 0) {
1923		return (error);
1924	}
1925	len = cnp->cn_namelen;
1926	nfsstats.rpccnt[NFSPROC_MKDIR]++;
1927	nfsm_reqhead(dvp, NFSPROC_MKDIR,
1928	  NFSX_FH(v3) + NFSX_UNSIGNED + nfsm_rndup(len) + NFSX_SATTR(v3));
1929	nfsm_fhtom(dvp, v3);
1930	nfsm_strtom(cnp->cn_nameptr, len, NFS_MAXNAMLEN);
1931	if (v3) {
1932		nfsm_v3attrbuild(vap, FALSE);
1933	} else {
1934		nfsm_build(sp, struct nfsv2_sattr *, NFSX_V2SATTR);
1935		sp->sa_mode = vtonfsv2_mode(VDIR, vap->va_mode);
1936		sp->sa_uid = nfs_xdrneg1;
1937		sp->sa_gid = nfs_xdrneg1;
1938		sp->sa_size = nfs_xdrneg1;
1939		txdr_nfsv2time(&vap->va_atime, &sp->sa_atime);
1940		txdr_nfsv2time(&vap->va_mtime, &sp->sa_mtime);
1941	}
1942	nfsm_request(dvp, NFSPROC_MKDIR, cnp->cn_proc, cnp->cn_cred);
1943	if (!error)
1944		nfsm_mtofh(dvp, newvp, v3, gotvp);
1945	if (v3)
1946		nfsm_wcc_data(dvp, wccflag);
1947	nfsm_reqdone;
1948	VTONFS(dvp)->n_flag |= NMODIFIED;
1949	if (!wccflag)
1950		VTONFS(dvp)->n_attrstamp = 0;
1951	/*
1952	 * Kludge: Map EEXIST => 0 assuming that you have a reply to a retry
1953	 * if we can succeed in looking up the directory.
1954	 */
1955	if (error == EEXIST || (!error && !gotvp)) {
1956		if (newvp) {
1957			vrele(newvp);
1958			newvp = (struct vnode *)0;
1959		}
1960		error = nfs_lookitup(dvp, cnp->cn_nameptr, len, cnp->cn_cred,
1961			cnp->cn_proc, &np);
1962		if (!error) {
1963			newvp = NFSTOV(np);
1964			if (newvp->v_type != VDIR)
1965				error = EEXIST;
1966		}
1967	}
1968	if (error) {
1969		if (newvp)
1970			vrele(newvp);
1971	} else
1972		*ap->a_vpp = newvp;
1973	return (error);
1974}
1975
1976/*
1977 * nfs remove directory call
1978 */
1979static int
1980nfs_rmdir(ap)
1981	struct vop_rmdir_args /* {
1982		struct vnode *a_dvp;
1983		struct vnode *a_vp;
1984		struct componentname *a_cnp;
1985	} */ *ap;
1986{
1987	register struct vnode *vp = ap->a_vp;
1988	register struct vnode *dvp = ap->a_dvp;
1989	register struct componentname *cnp = ap->a_cnp;
1990	register u_int32_t *tl;
1991	register caddr_t cp;
1992	register int32_t t1, t2;
1993	caddr_t bpos, dpos, cp2;
1994	int error = 0, wccflag = NFSV3_WCCRATTR;
1995	struct mbuf *mreq, *mrep, *md, *mb, *mb2;
1996	int v3 = NFS_ISV3(dvp);
1997
1998	if (dvp == vp)
1999		return (EINVAL);
2000	nfsstats.rpccnt[NFSPROC_RMDIR]++;
2001	nfsm_reqhead(dvp, NFSPROC_RMDIR,
2002		NFSX_FH(v3) + NFSX_UNSIGNED + nfsm_rndup(cnp->cn_namelen));
2003	nfsm_fhtom(dvp, v3);
2004	nfsm_strtom(cnp->cn_nameptr, cnp->cn_namelen, NFS_MAXNAMLEN);
2005	nfsm_request(dvp, NFSPROC_RMDIR, cnp->cn_proc, cnp->cn_cred);
2006	if (v3)
2007		nfsm_wcc_data(dvp, wccflag);
2008	nfsm_reqdone;
2009	VTONFS(dvp)->n_flag |= NMODIFIED;
2010	if (!wccflag)
2011		VTONFS(dvp)->n_attrstamp = 0;
2012	cache_purge(dvp);
2013	cache_purge(vp);
2014	/*
2015	 * Kludge: Map ENOENT => 0 assuming that you have a reply to a retry.
2016	 */
2017	if (error == ENOENT)
2018		error = 0;
2019	return (error);
2020}
2021
2022/*
2023 * nfs readdir call
2024 */
2025static int
2026nfs_readdir(ap)
2027	struct vop_readdir_args /* {
2028		struct vnode *a_vp;
2029		struct uio *a_uio;
2030		struct ucred *a_cred;
2031	} */ *ap;
2032{
2033	register struct vnode *vp = ap->a_vp;
2034	register struct nfsnode *np = VTONFS(vp);
2035	register struct uio *uio = ap->a_uio;
2036	int tresid, error;
2037	struct vattr vattr;
2038
2039	if (vp->v_type != VDIR)
2040		return (EPERM);
2041	/*
2042	 * First, check for hit on the EOF offset cache
2043	 */
2044	if (np->n_direofoffset > 0 && uio->uio_offset >= np->n_direofoffset &&
2045	    (np->n_flag & NMODIFIED) == 0) {
2046		if (VFSTONFS(vp->v_mount)->nm_flag & NFSMNT_NQNFS) {
2047			if (NQNFS_CKCACHABLE(vp, ND_READ)) {
2048				nfsstats.direofcache_hits++;
2049				return (0);
2050			}
2051		} else if (VOP_GETATTR(vp, &vattr, ap->a_cred, uio->uio_procp) == 0 &&
2052			np->n_mtime == vattr.va_mtime.tv_sec) {
2053			nfsstats.direofcache_hits++;
2054			return (0);
2055		}
2056	}
2057
2058	/*
2059	 * Call nfs_bioread() to do the real work.
2060	 */
2061	tresid = uio->uio_resid;
2062	error = nfs_bioread(vp, uio, 0, ap->a_cred);
2063
2064	if (!error && uio->uio_resid == tresid)
2065		nfsstats.direofcache_misses++;
2066	return (error);
2067}
2068
2069/*
2070 * Readdir rpc call.
2071 * Called from below the buffer cache by nfs_doio().
2072 */
2073int
2074nfs_readdirrpc(vp, uiop, cred)
2075	struct vnode *vp;
2076	register struct uio *uiop;
2077	struct ucred *cred;
2078
2079{
2080	register int len, left;
2081	register struct dirent *dp = NULL;
2082	register u_int32_t *tl;
2083	register caddr_t cp;
2084	register int32_t t1, t2;
2085	register nfsuint64 *cookiep;
2086	caddr_t bpos, dpos, cp2;
2087	struct mbuf *mreq, *mrep, *md, *mb, *mb2;
2088	nfsuint64 cookie;
2089	struct nfsmount *nmp = VFSTONFS(vp->v_mount);
2090	struct nfsnode *dnp = VTONFS(vp);
2091	u_quad_t fileno;
2092	int error = 0, tlen, more_dirs = 1, blksiz = 0, bigenough = 1;
2093	int attrflag;
2094	int v3 = NFS_ISV3(vp);
2095
2096#ifndef DIAGNOSTIC
2097	if (uiop->uio_iovcnt != 1 || (uiop->uio_offset & (DIRBLKSIZ - 1)) ||
2098		(uiop->uio_resid & (DIRBLKSIZ - 1)))
2099		panic("nfs readdirrpc bad uio");
2100#endif
2101
2102	/*
2103	 * If there is no cookie, assume directory was stale.
2104	 */
2105	cookiep = nfs_getcookie(dnp, uiop->uio_offset, 0);
2106	if (cookiep)
2107		cookie = *cookiep;
2108	else
2109		return (NFSERR_BAD_COOKIE);
2110	/*
2111	 * Loop around doing readdir rpc's of size nm_readdirsize
2112	 * truncated to a multiple of DIRBLKSIZ.
2113	 * The stopping criteria is EOF or buffer full.
2114	 */
2115	while (more_dirs && bigenough) {
2116		nfsstats.rpccnt[NFSPROC_READDIR]++;
2117		nfsm_reqhead(vp, NFSPROC_READDIR, NFSX_FH(v3) +
2118			NFSX_READDIR(v3));
2119		nfsm_fhtom(vp, v3);
2120		if (v3) {
2121			nfsm_build(tl, u_int32_t *, 5 * NFSX_UNSIGNED);
2122			*tl++ = cookie.nfsuquad[0];
2123			*tl++ = cookie.nfsuquad[1];
2124			*tl++ = dnp->n_cookieverf.nfsuquad[0];
2125			*tl++ = dnp->n_cookieverf.nfsuquad[1];
2126		} else {
2127			nfsm_build(tl, u_int32_t *, 2 * NFSX_UNSIGNED);
2128			*tl++ = cookie.nfsuquad[0];
2129		}
2130		*tl = txdr_unsigned(nmp->nm_readdirsize);
2131		nfsm_request(vp, NFSPROC_READDIR, uiop->uio_procp, cred);
2132		if (v3) {
2133			nfsm_postop_attr(vp, attrflag);
2134			if (!error) {
2135				nfsm_dissect(tl, u_int32_t *,
2136				    2 * NFSX_UNSIGNED);
2137				dnp->n_cookieverf.nfsuquad[0] = *tl++;
2138				dnp->n_cookieverf.nfsuquad[1] = *tl;
2139			} else {
2140				m_freem(mrep);
2141				goto nfsmout;
2142			}
2143		}
2144		nfsm_dissect(tl, u_int32_t *, NFSX_UNSIGNED);
2145		more_dirs = fxdr_unsigned(int, *tl);
2146
2147		/* loop thru the dir entries, doctoring them to 4bsd form */
2148		while (more_dirs && bigenough) {
2149			if (v3) {
2150				nfsm_dissect(tl, u_int32_t *,
2151				    3 * NFSX_UNSIGNED);
2152				fileno = fxdr_hyper(tl);
2153				len = fxdr_unsigned(int, *(tl + 2));
2154			} else {
2155				nfsm_dissect(tl, u_int32_t *,
2156				    2 * NFSX_UNSIGNED);
2157				fileno = fxdr_unsigned(u_quad_t, *tl++);
2158				len = fxdr_unsigned(int, *tl);
2159			}
2160			if (len <= 0 || len > NFS_MAXNAMLEN) {
2161				error = EBADRPC;
2162				m_freem(mrep);
2163				goto nfsmout;
2164			}
2165			tlen = nfsm_rndup(len);
2166			if (tlen == len)
2167				tlen += 4;	/* To ensure null termination */
2168			left = DIRBLKSIZ - blksiz;
2169			if ((tlen + DIRHDSIZ) > left) {
2170				dp->d_reclen += left;
2171				uiop->uio_iov->iov_base += left;
2172				uiop->uio_iov->iov_len -= left;
2173				uiop->uio_offset += left;
2174				uiop->uio_resid -= left;
2175				blksiz = 0;
2176			}
2177			if ((tlen + DIRHDSIZ) > uiop->uio_resid)
2178				bigenough = 0;
2179			if (bigenough) {
2180				dp = (struct dirent *)uiop->uio_iov->iov_base;
2181				dp->d_fileno = (int)fileno;
2182				dp->d_namlen = len;
2183				dp->d_reclen = tlen + DIRHDSIZ;
2184				dp->d_type = DT_UNKNOWN;
2185				blksiz += dp->d_reclen;
2186				if (blksiz == DIRBLKSIZ)
2187					blksiz = 0;
2188				uiop->uio_offset += DIRHDSIZ;
2189				uiop->uio_resid -= DIRHDSIZ;
2190				uiop->uio_iov->iov_base += DIRHDSIZ;
2191				uiop->uio_iov->iov_len -= DIRHDSIZ;
2192				nfsm_mtouio(uiop, len);
2193				cp = uiop->uio_iov->iov_base;
2194				tlen -= len;
2195				*cp = '\0';	/* null terminate */
2196				uiop->uio_iov->iov_base += tlen;
2197				uiop->uio_iov->iov_len -= tlen;
2198				uiop->uio_offset += tlen;
2199				uiop->uio_resid -= tlen;
2200			} else
2201				nfsm_adv(nfsm_rndup(len));
2202			if (v3) {
2203				nfsm_dissect(tl, u_int32_t *,
2204				    3 * NFSX_UNSIGNED);
2205			} else {
2206				nfsm_dissect(tl, u_int32_t *,
2207				    2 * NFSX_UNSIGNED);
2208			}
2209			if (bigenough) {
2210				cookie.nfsuquad[0] = *tl++;
2211				if (v3)
2212					cookie.nfsuquad[1] = *tl++;
2213			} else if (v3)
2214				tl += 2;
2215			else
2216				tl++;
2217			more_dirs = fxdr_unsigned(int, *tl);
2218		}
2219		/*
2220		 * If at end of rpc data, get the eof boolean
2221		 */
2222		if (!more_dirs) {
2223			nfsm_dissect(tl, u_int32_t *, NFSX_UNSIGNED);
2224			more_dirs = (fxdr_unsigned(int, *tl) == 0);
2225		}
2226		m_freem(mrep);
2227	}
2228	/*
2229	 * Fill last record, iff any, out to a multiple of DIRBLKSIZ
2230	 * by increasing d_reclen for the last record.
2231	 */
2232	if (blksiz > 0) {
2233		left = DIRBLKSIZ - blksiz;
2234		dp->d_reclen += left;
2235		uiop->uio_iov->iov_base += left;
2236		uiop->uio_iov->iov_len -= left;
2237		uiop->uio_offset += left;
2238		uiop->uio_resid -= left;
2239	}
2240
2241	/*
2242	 * We are now either at the end of the directory or have filled the
2243	 * block.
2244	 */
2245	if (bigenough)
2246		dnp->n_direofoffset = uiop->uio_offset;
2247	else {
2248		if (uiop->uio_resid > 0)
2249			printf("EEK! readdirrpc resid > 0\n");
2250		cookiep = nfs_getcookie(dnp, uiop->uio_offset, 1);
2251		*cookiep = cookie;
2252	}
2253nfsmout:
2254	return (error);
2255}
2256
2257/*
2258 * NFS V3 readdir plus RPC. Used in place of nfs_readdirrpc().
2259 */
2260int
2261nfs_readdirplusrpc(vp, uiop, cred)
2262	struct vnode *vp;
2263	register struct uio *uiop;
2264	struct ucred *cred;
2265{
2266	register int len, left;
2267	register struct dirent *dp;
2268	register u_int32_t *tl;
2269	register caddr_t cp;
2270	register int32_t t1, t2;
2271	register struct vnode *newvp;
2272	register nfsuint64 *cookiep;
2273	caddr_t bpos, dpos, cp2, dpossav1, dpossav2;
2274	struct mbuf *mreq, *mrep, *md, *mb, *mb2, *mdsav1, *mdsav2;
2275	struct nameidata nami, *ndp = &nami;
2276	struct componentname *cnp = &ndp->ni_cnd;
2277	nfsuint64 cookie;
2278	struct nfsmount *nmp = VFSTONFS(vp->v_mount);
2279	struct nfsnode *dnp = VTONFS(vp), *np;
2280	nfsfh_t *fhp;
2281	u_quad_t fileno;
2282	int error = 0, tlen, more_dirs = 1, blksiz = 0, doit, bigenough = 1, i;
2283	int attrflag, fhsize;
2284
2285#ifndef nolint
2286	dp = (struct dirent *)0;
2287#endif
2288#ifndef DIAGNOSTIC
2289	if (uiop->uio_iovcnt != 1 || (uiop->uio_offset & (DIRBLKSIZ - 1)) ||
2290		(uiop->uio_resid & (DIRBLKSIZ - 1)))
2291		panic("nfs readdirplusrpc bad uio");
2292#endif
2293	ndp->ni_dvp = vp;
2294	newvp = NULLVP;
2295
2296	/*
2297	 * If there is no cookie, assume directory was stale.
2298	 */
2299	cookiep = nfs_getcookie(dnp, uiop->uio_offset, 0);
2300	if (cookiep)
2301		cookie = *cookiep;
2302	else
2303		return (NFSERR_BAD_COOKIE);
2304	/*
2305	 * Loop around doing readdir rpc's of size nm_readdirsize
2306	 * truncated to a multiple of DIRBLKSIZ.
2307	 * The stopping criteria is EOF or buffer full.
2308	 */
2309	while (more_dirs && bigenough) {
2310		nfsstats.rpccnt[NFSPROC_READDIRPLUS]++;
2311		nfsm_reqhead(vp, NFSPROC_READDIRPLUS,
2312			NFSX_FH(1) + 6 * NFSX_UNSIGNED);
2313		nfsm_fhtom(vp, 1);
2314 		nfsm_build(tl, u_int32_t *, 6 * NFSX_UNSIGNED);
2315		*tl++ = cookie.nfsuquad[0];
2316		*tl++ = cookie.nfsuquad[1];
2317		*tl++ = dnp->n_cookieverf.nfsuquad[0];
2318		*tl++ = dnp->n_cookieverf.nfsuquad[1];
2319		*tl++ = txdr_unsigned(nmp->nm_readdirsize);
2320		*tl = txdr_unsigned(nmp->nm_rsize);
2321		nfsm_request(vp, NFSPROC_READDIRPLUS, uiop->uio_procp, cred);
2322		nfsm_postop_attr(vp, attrflag);
2323		if (error) {
2324			m_freem(mrep);
2325			goto nfsmout;
2326		}
2327		nfsm_dissect(tl, u_int32_t *, 3 * NFSX_UNSIGNED);
2328		dnp->n_cookieverf.nfsuquad[0] = *tl++;
2329		dnp->n_cookieverf.nfsuquad[1] = *tl++;
2330		more_dirs = fxdr_unsigned(int, *tl);
2331
2332		/* loop thru the dir entries, doctoring them to 4bsd form */
2333		while (more_dirs && bigenough) {
2334			nfsm_dissect(tl, u_int32_t *, 3 * NFSX_UNSIGNED);
2335			fileno = fxdr_hyper(tl);
2336			len = fxdr_unsigned(int, *(tl + 2));
2337			if (len <= 0 || len > NFS_MAXNAMLEN) {
2338				error = EBADRPC;
2339				m_freem(mrep);
2340				goto nfsmout;
2341			}
2342			tlen = nfsm_rndup(len);
2343			if (tlen == len)
2344				tlen += 4;	/* To ensure null termination*/
2345			left = DIRBLKSIZ - blksiz;
2346			if ((tlen + DIRHDSIZ) > left) {
2347				dp->d_reclen += left;
2348				uiop->uio_iov->iov_base += left;
2349				uiop->uio_iov->iov_len -= left;
2350				uiop->uio_offset += left;
2351				uiop->uio_resid -= left;
2352				blksiz = 0;
2353			}
2354			if ((tlen + DIRHDSIZ) > uiop->uio_resid)
2355				bigenough = 0;
2356			if (bigenough) {
2357				dp = (struct dirent *)uiop->uio_iov->iov_base;
2358				dp->d_fileno = (int)fileno;
2359				dp->d_namlen = len;
2360				dp->d_reclen = tlen + DIRHDSIZ;
2361				dp->d_type = DT_UNKNOWN;
2362				blksiz += dp->d_reclen;
2363				if (blksiz == DIRBLKSIZ)
2364					blksiz = 0;
2365				uiop->uio_offset += DIRHDSIZ;
2366				uiop->uio_resid -= DIRHDSIZ;
2367				uiop->uio_iov->iov_base += DIRHDSIZ;
2368				uiop->uio_iov->iov_len -= DIRHDSIZ;
2369				cnp->cn_nameptr = uiop->uio_iov->iov_base;
2370				cnp->cn_namelen = len;
2371				nfsm_mtouio(uiop, len);
2372				cp = uiop->uio_iov->iov_base;
2373				tlen -= len;
2374				*cp = '\0';
2375				uiop->uio_iov->iov_base += tlen;
2376				uiop->uio_iov->iov_len -= tlen;
2377				uiop->uio_offset += tlen;
2378				uiop->uio_resid -= tlen;
2379			} else
2380				nfsm_adv(nfsm_rndup(len));
2381			nfsm_dissect(tl, u_int32_t *, 3 * NFSX_UNSIGNED);
2382			if (bigenough) {
2383				cookie.nfsuquad[0] = *tl++;
2384				cookie.nfsuquad[1] = *tl++;
2385			} else
2386				tl += 2;
2387
2388			/*
2389			 * Since the attributes are before the file handle
2390			 * (sigh), we must skip over the attributes and then
2391			 * come back and get them.
2392			 */
2393			attrflag = fxdr_unsigned(int, *tl);
2394			if (attrflag) {
2395			    dpossav1 = dpos;
2396			    mdsav1 = md;
2397			    nfsm_adv(NFSX_V3FATTR);
2398			    nfsm_dissect(tl, u_int32_t *, NFSX_UNSIGNED);
2399			    doit = fxdr_unsigned(int, *tl);
2400			    if (doit) {
2401				nfsm_getfh(fhp, fhsize, 1);
2402				if (NFS_CMPFH(dnp, fhp, fhsize)) {
2403				    VREF(vp);
2404				    newvp = vp;
2405				    np = dnp;
2406				} else {
2407				    error = nfs_nget(vp->v_mount, fhp,
2408					fhsize, &np);
2409				    if (error)
2410					doit = 0;
2411				    else
2412					newvp = NFSTOV(np);
2413				}
2414			    }
2415			    if (doit && bigenough) {
2416				dpossav2 = dpos;
2417				dpos = dpossav1;
2418				mdsav2 = md;
2419				md = mdsav1;
2420				nfsm_loadattr(newvp, (struct vattr *)0);
2421				dpos = dpossav2;
2422				md = mdsav2;
2423				dp->d_type =
2424				    IFTODT(VTTOIF(np->n_vattr.va_type));
2425				ndp->ni_vp = newvp;
2426			        cache_enter(ndp->ni_dvp, ndp->ni_vp, cnp);
2427			    }
2428			} else {
2429			    /* Just skip over the file handle */
2430			    nfsm_dissect(tl, u_int32_t *, NFSX_UNSIGNED);
2431			    i = fxdr_unsigned(int, *tl);
2432			    nfsm_adv(nfsm_rndup(i));
2433			}
2434			if (newvp != NULLVP) {
2435			    if (newvp == vp)
2436				vrele(newvp);
2437			    else
2438				vput(newvp);
2439			    newvp = NULLVP;
2440			}
2441			nfsm_dissect(tl, u_int32_t *, NFSX_UNSIGNED);
2442			more_dirs = fxdr_unsigned(int, *tl);
2443		}
2444		/*
2445		 * If at end of rpc data, get the eof boolean
2446		 */
2447		if (!more_dirs) {
2448			nfsm_dissect(tl, u_int32_t *, NFSX_UNSIGNED);
2449			more_dirs = (fxdr_unsigned(int, *tl) == 0);
2450		}
2451		m_freem(mrep);
2452	}
2453	/*
2454	 * Fill last record, iff any, out to a multiple of DIRBLKSIZ
2455	 * by increasing d_reclen for the last record.
2456	 */
2457	if (blksiz > 0) {
2458		left = DIRBLKSIZ - blksiz;
2459		dp->d_reclen += left;
2460		uiop->uio_iov->iov_base += left;
2461		uiop->uio_iov->iov_len -= left;
2462		uiop->uio_offset += left;
2463		uiop->uio_resid -= left;
2464	}
2465
2466	/*
2467	 * We are now either at the end of the directory or have filled the
2468	 * block.
2469	 */
2470	if (bigenough)
2471		dnp->n_direofoffset = uiop->uio_offset;
2472	else {
2473		if (uiop->uio_resid > 0)
2474			printf("EEK! readdirplusrpc resid > 0\n");
2475		cookiep = nfs_getcookie(dnp, uiop->uio_offset, 1);
2476		*cookiep = cookie;
2477	}
2478nfsmout:
2479	if (newvp != NULLVP) {
2480	        if (newvp == vp)
2481			vrele(newvp);
2482		else
2483			vput(newvp);
2484		newvp = NULLVP;
2485	}
2486	return (error);
2487}
2488
2489/*
2490 * Silly rename. To make the NFS filesystem that is stateless look a little
2491 * more like the "ufs" a remove of an active vnode is translated to a rename
2492 * to a funny looking filename that is removed by nfs_inactive on the
2493 * nfsnode. There is the potential for another process on a different client
2494 * to create the same funny name between the nfs_lookitup() fails and the
2495 * nfs_rename() completes, but...
2496 */
2497static int
2498nfs_sillyrename(dvp, vp, cnp)
2499	struct vnode *dvp, *vp;
2500	struct componentname *cnp;
2501{
2502	register struct sillyrename *sp;
2503	struct nfsnode *np;
2504	int error;
2505	short pid;
2506
2507	cache_purge(dvp);
2508	np = VTONFS(vp);
2509#ifndef DIAGNOSTIC
2510	if (vp->v_type == VDIR)
2511		panic("nfs: sillyrename dir");
2512#endif
2513	MALLOC(sp, struct sillyrename *, sizeof (struct sillyrename),
2514		M_NFSREQ, M_WAITOK);
2515	sp->s_cred = crdup(cnp->cn_cred);
2516	sp->s_dvp = dvp;
2517	VREF(dvp);
2518
2519	/* Fudge together a funny name */
2520	pid = cnp->cn_proc->p_pid;
2521	sp->s_namlen = sprintf(sp->s_name, ".nfsA%04x4.4", pid);
2522
2523	/* Try lookitups until we get one that isn't there */
2524	while (nfs_lookitup(dvp, sp->s_name, sp->s_namlen, sp->s_cred,
2525		cnp->cn_proc, (struct nfsnode **)0) == 0) {
2526		sp->s_name[4]++;
2527		if (sp->s_name[4] > 'z') {
2528			error = EINVAL;
2529			goto bad;
2530		}
2531	}
2532	error = nfs_renameit(dvp, cnp, sp);
2533	if (error)
2534		goto bad;
2535	error = nfs_lookitup(dvp, sp->s_name, sp->s_namlen, sp->s_cred,
2536		cnp->cn_proc, &np);
2537	np->n_sillyrename = sp;
2538	return (0);
2539bad:
2540	vrele(sp->s_dvp);
2541	crfree(sp->s_cred);
2542	free((caddr_t)sp, M_NFSREQ);
2543	return (error);
2544}
2545
2546/*
2547 * Look up a file name and optionally either update the file handle or
2548 * allocate an nfsnode, depending on the value of npp.
2549 * npp == NULL	--> just do the lookup
2550 * *npp == NULL --> allocate a new nfsnode and make sure attributes are
2551 *			handled too
2552 * *npp != NULL --> update the file handle in the vnode
2553 */
2554static int
2555nfs_lookitup(dvp, name, len, cred, procp, npp)
2556	register struct vnode *dvp;
2557	const char *name;
2558	int len;
2559	struct ucred *cred;
2560	struct proc *procp;
2561	struct nfsnode **npp;
2562{
2563	register u_int32_t *tl;
2564	register caddr_t cp;
2565	register int32_t t1, t2;
2566	struct vnode *newvp = (struct vnode *)0;
2567	struct nfsnode *np, *dnp = VTONFS(dvp);
2568	caddr_t bpos, dpos, cp2;
2569	int error = 0, fhlen, attrflag;
2570	struct mbuf *mreq, *mrep, *md, *mb, *mb2;
2571	nfsfh_t *nfhp;
2572	int v3 = NFS_ISV3(dvp);
2573
2574	nfsstats.rpccnt[NFSPROC_LOOKUP]++;
2575	nfsm_reqhead(dvp, NFSPROC_LOOKUP,
2576		NFSX_FH(v3) + NFSX_UNSIGNED + nfsm_rndup(len));
2577	nfsm_fhtom(dvp, v3);
2578	nfsm_strtom(name, len, NFS_MAXNAMLEN);
2579	nfsm_request(dvp, NFSPROC_LOOKUP, procp, cred);
2580	if (npp && !error) {
2581		nfsm_getfh(nfhp, fhlen, v3);
2582		if (*npp) {
2583		    np = *npp;
2584		    if (np->n_fhsize > NFS_SMALLFH && fhlen <= NFS_SMALLFH) {
2585			free((caddr_t)np->n_fhp, M_NFSBIGFH);
2586			np->n_fhp = &np->n_fh;
2587		    } else if (np->n_fhsize <= NFS_SMALLFH && fhlen>NFS_SMALLFH)
2588			np->n_fhp =(nfsfh_t *)malloc(fhlen,M_NFSBIGFH,M_WAITOK);
2589		    bcopy((caddr_t)nfhp, (caddr_t)np->n_fhp, fhlen);
2590		    np->n_fhsize = fhlen;
2591		    newvp = NFSTOV(np);
2592		} else if (NFS_CMPFH(dnp, nfhp, fhlen)) {
2593		    VREF(dvp);
2594		    newvp = dvp;
2595		} else {
2596		    error = nfs_nget(dvp->v_mount, nfhp, fhlen, &np);
2597		    if (error) {
2598			m_freem(mrep);
2599			return (error);
2600		    }
2601		    newvp = NFSTOV(np);
2602		}
2603		if (v3) {
2604			nfsm_postop_attr(newvp, attrflag);
2605			if (!attrflag && *npp == NULL) {
2606				m_freem(mrep);
2607				if (newvp == dvp)
2608					vrele(newvp);
2609				else
2610					vput(newvp);
2611				return (ENOENT);
2612			}
2613		} else
2614			nfsm_loadattr(newvp, (struct vattr *)0);
2615	}
2616	nfsm_reqdone;
2617	if (npp && *npp == NULL) {
2618		if (error) {
2619			if (newvp) {
2620				if (newvp == dvp)
2621					vrele(newvp);
2622				else
2623					vput(newvp);
2624			}
2625		} else
2626			*npp = np;
2627	}
2628	return (error);
2629}
2630
2631/*
2632 * Nfs Version 3 commit rpc
2633 */
2634int
2635nfs_commit(vp, offset, cnt, cred, procp)
2636	struct vnode *vp;
2637	u_quad_t offset;
2638	int cnt;
2639	struct ucred *cred;
2640	struct proc *procp;
2641{
2642	register caddr_t cp;
2643	register u_int32_t *tl;
2644	register int32_t t1, t2;
2645	register struct nfsmount *nmp = VFSTONFS(vp->v_mount);
2646	caddr_t bpos, dpos, cp2;
2647	int error = 0, wccflag = NFSV3_WCCRATTR;
2648	struct mbuf *mreq, *mrep, *md, *mb, *mb2;
2649
2650	if ((nmp->nm_state & NFSSTA_HASWRITEVERF) == 0)
2651		return (0);
2652	nfsstats.rpccnt[NFSPROC_COMMIT]++;
2653	nfsm_reqhead(vp, NFSPROC_COMMIT, NFSX_FH(1));
2654	nfsm_fhtom(vp, 1);
2655	nfsm_build(tl, u_int32_t *, 3 * NFSX_UNSIGNED);
2656	txdr_hyper(offset, tl);
2657	tl += 2;
2658	*tl = txdr_unsigned(cnt);
2659	nfsm_request(vp, NFSPROC_COMMIT, procp, cred);
2660	nfsm_wcc_data(vp, wccflag);
2661	if (!error) {
2662		nfsm_dissect(tl, u_int32_t *, NFSX_V3WRITEVERF);
2663		if (bcmp((caddr_t)nmp->nm_verf, (caddr_t)tl,
2664			NFSX_V3WRITEVERF)) {
2665			bcopy((caddr_t)tl, (caddr_t)nmp->nm_verf,
2666				NFSX_V3WRITEVERF);
2667			error = NFSERR_STALEWRITEVERF;
2668		}
2669	}
2670	nfsm_reqdone;
2671	return (error);
2672}
2673
2674/*
2675 * Kludge City..
2676 * - make nfs_bmap() essentially a no-op that does no translation
2677 * - do nfs_strategy() by doing I/O with nfs_readrpc/nfs_writerpc
2678 *   (Maybe I could use the process's page mapping, but I was concerned that
2679 *    Kernel Write might not be enabled and also figured copyout() would do
2680 *    a lot more work than bcopy() and also it currently happens in the
2681 *    context of the swapper process (2).
2682 */
2683static int
2684nfs_bmap(ap)
2685	struct vop_bmap_args /* {
2686		struct vnode *a_vp;
2687		daddr_t  a_bn;
2688		struct vnode **a_vpp;
2689		daddr_t *a_bnp;
2690		int *a_runp;
2691		int *a_runb;
2692	} */ *ap;
2693{
2694	register struct vnode *vp = ap->a_vp;
2695
2696	if (ap->a_vpp != NULL)
2697		*ap->a_vpp = vp;
2698	if (ap->a_bnp != NULL)
2699		*ap->a_bnp = ap->a_bn * btodb(vp->v_mount->mnt_stat.f_iosize);
2700	if (ap->a_runp != NULL)
2701		*ap->a_runp = 0;
2702	if (ap->a_runb != NULL)
2703		*ap->a_runb = 0;
2704	return (0);
2705}
2706
2707/*
2708 * Strategy routine.
2709 * For async requests when nfsiod(s) are running, queue the request by
2710 * calling nfs_asyncio(), otherwise just all nfs_doio() to do the
2711 * request.
2712 */
2713static int
2714nfs_strategy(ap)
2715	struct vop_strategy_args *ap;
2716{
2717	register struct buf *bp = ap->a_bp;
2718	struct ucred *cr;
2719	struct proc *p;
2720	int error = 0;
2721
2722	KASSERT(!(bp->b_flags & B_DONE), ("nfs_strategy: buffer %p unexpectedly marked B_DONE", bp));
2723	KASSERT(BUF_REFCNT(bp) > 0, ("nfs_strategy: buffer %p not locked", bp));
2724
2725	if (bp->b_flags & B_PHYS)
2726		panic("nfs physio");
2727
2728	if (bp->b_flags & B_ASYNC)
2729		p = (struct proc *)0;
2730	else
2731		p = curproc;	/* XXX */
2732
2733	if (bp->b_iocmd == BIO_READ)
2734		cr = bp->b_rcred;
2735	else
2736		cr = bp->b_wcred;
2737
2738	/*
2739	 * If the op is asynchronous and an i/o daemon is waiting
2740	 * queue the request, wake it up and wait for completion
2741	 * otherwise just do it ourselves.
2742	 */
2743	if ((bp->b_flags & B_ASYNC) == 0 ||
2744		nfs_asyncio(bp, NOCRED, p))
2745		error = nfs_doio(bp, cr, p);
2746	return (error);
2747}
2748
2749/*
2750 * Mmap a file
2751 *
2752 * NB Currently unsupported.
2753 */
2754/* ARGSUSED */
2755static int
2756nfs_mmap(ap)
2757	struct vop_mmap_args /* {
2758		struct vnode *a_vp;
2759		int  a_fflags;
2760		struct ucred *a_cred;
2761		struct proc *a_p;
2762	} */ *ap;
2763{
2764
2765	return (EINVAL);
2766}
2767
2768/*
2769 * fsync vnode op. Just call nfs_flush() with commit == 1.
2770 */
2771/* ARGSUSED */
2772static int
2773nfs_fsync(ap)
2774	struct vop_fsync_args /* {
2775		struct vnodeop_desc *a_desc;
2776		struct vnode * a_vp;
2777		struct ucred * a_cred;
2778		int  a_waitfor;
2779		struct proc * a_p;
2780	} */ *ap;
2781{
2782
2783	return (nfs_flush(ap->a_vp, ap->a_cred, ap->a_waitfor, ap->a_p, 1));
2784}
2785
2786/*
2787 * Flush all the blocks associated with a vnode.
2788 * 	Walk through the buffer pool and push any dirty pages
2789 *	associated with the vnode.
2790 */
2791static int
2792nfs_flush(vp, cred, waitfor, p, commit)
2793	register struct vnode *vp;
2794	struct ucred *cred;
2795	int waitfor;
2796	struct proc *p;
2797	int commit;
2798{
2799	register struct nfsnode *np = VTONFS(vp);
2800	register struct buf *bp;
2801	register int i;
2802	struct buf *nbp;
2803	struct nfsmount *nmp = VFSTONFS(vp->v_mount);
2804	int s, error = 0, slptimeo = 0, slpflag = 0, retv, bvecpos;
2805	int passone = 1;
2806	u_quad_t off, endoff, toff;
2807	struct ucred* wcred = NULL;
2808	struct buf **bvec = NULL;
2809#ifndef NFS_COMMITBVECSIZ
2810#define NFS_COMMITBVECSIZ	20
2811#endif
2812	struct buf *bvec_on_stack[NFS_COMMITBVECSIZ];
2813	int bvecsize = 0, bveccount;
2814
2815	if (nmp->nm_flag & NFSMNT_INT)
2816		slpflag = PCATCH;
2817	if (!commit)
2818		passone = 0;
2819	/*
2820	 * A b_flags == (B_DELWRI | B_NEEDCOMMIT) block has been written to the
2821	 * server, but nas not been committed to stable storage on the server
2822	 * yet. On the first pass, the byte range is worked out and the commit
2823	 * rpc is done. On the second pass, nfs_writebp() is called to do the
2824	 * job.
2825	 */
2826again:
2827	off = (u_quad_t)-1;
2828	endoff = 0;
2829	bvecpos = 0;
2830	if (NFS_ISV3(vp) && commit) {
2831		s = splbio();
2832		/*
2833		 * Count up how many buffers waiting for a commit.
2834		 */
2835		bveccount = 0;
2836		for (bp = TAILQ_FIRST(&vp->v_dirtyblkhd); bp; bp = nbp) {
2837			nbp = TAILQ_NEXT(bp, b_vnbufs);
2838			if (BUF_REFCNT(bp) == 0 &&
2839			    (bp->b_flags & (B_DELWRI | B_NEEDCOMMIT))
2840				== (B_DELWRI | B_NEEDCOMMIT))
2841				bveccount++;
2842		}
2843		/*
2844		 * Allocate space to remember the list of bufs to commit.  It is
2845		 * important to use M_NOWAIT here to avoid a race with nfs_write.
2846		 * If we can't get memory (for whatever reason), we will end up
2847		 * committing the buffers one-by-one in the loop below.
2848		 */
2849		if (bveccount > NFS_COMMITBVECSIZ) {
2850			if (bvec != NULL && bvec != bvec_on_stack)
2851				free(bvec, M_TEMP);
2852			bvec = (struct buf **)
2853				malloc(bveccount * sizeof(struct buf *),
2854				       M_TEMP, M_NOWAIT);
2855			if (bvec == NULL) {
2856				bvec = bvec_on_stack;
2857				bvecsize = NFS_COMMITBVECSIZ;
2858			} else
2859				bvecsize = bveccount;
2860		} else {
2861			bvec = bvec_on_stack;
2862			bvecsize = NFS_COMMITBVECSIZ;
2863		}
2864		for (bp = TAILQ_FIRST(&vp->v_dirtyblkhd); bp; bp = nbp) {
2865			nbp = TAILQ_NEXT(bp, b_vnbufs);
2866			if (bvecpos >= bvecsize)
2867				break;
2868			if ((bp->b_flags & (B_DELWRI | B_NEEDCOMMIT)) !=
2869			    (B_DELWRI | B_NEEDCOMMIT) ||
2870			    BUF_LOCK(bp, LK_EXCLUSIVE | LK_NOWAIT))
2871				continue;
2872			bremfree(bp);
2873			/*
2874			 * Work out if all buffers are using the same cred
2875			 * so we can deal with them all with one commit.
2876			 *
2877			 * NOTE: we are not clearing B_DONE here, so we have
2878			 * to do it later on in this routine if we intend to
2879			 * initiate I/O on the bp.
2880			 */
2881			if (wcred == NULL)
2882				wcred = bp->b_wcred;
2883			else if (wcred != bp->b_wcred)
2884				wcred = NOCRED;
2885			bp->b_flags |= B_WRITEINPROG;
2886			vfs_busy_pages(bp, 1);
2887
2888			/*
2889			 * bp is protected by being locked, but nbp is not
2890			 * and vfs_busy_pages() may sleep.  We have to
2891			 * recalculate nbp.
2892			 */
2893			nbp = TAILQ_NEXT(bp, b_vnbufs);
2894
2895			/*
2896			 * A list of these buffers is kept so that the
2897			 * second loop knows which buffers have actually
2898			 * been committed. This is necessary, since there
2899			 * may be a race between the commit rpc and new
2900			 * uncommitted writes on the file.
2901			 */
2902			bvec[bvecpos++] = bp;
2903			toff = ((u_quad_t)bp->b_blkno) * DEV_BSIZE +
2904				bp->b_dirtyoff;
2905			if (toff < off)
2906				off = toff;
2907			toff += (u_quad_t)(bp->b_dirtyend - bp->b_dirtyoff);
2908			if (toff > endoff)
2909				endoff = toff;
2910		}
2911		splx(s);
2912	}
2913	if (bvecpos > 0) {
2914		/*
2915		 * Commit data on the server, as required.
2916		 * If all bufs are using the same wcred, then use that with
2917		 * one call for all of them, otherwise commit each one
2918		 * separately.
2919		 */
2920		if (wcred != NOCRED)
2921			retv = nfs_commit(vp, off, (int)(endoff - off),
2922					  wcred, p);
2923		else {
2924			retv = 0;
2925			for (i = 0; i < bvecpos; i++) {
2926				off_t off, size;
2927				bp = bvec[i];
2928				off = ((u_quad_t)bp->b_blkno) * DEV_BSIZE +
2929					bp->b_dirtyoff;
2930				size = (u_quad_t)(bp->b_dirtyend
2931						  - bp->b_dirtyoff);
2932				retv = nfs_commit(vp, off, (int)size,
2933						  bp->b_wcred, p);
2934				if (retv) break;
2935			}
2936		}
2937
2938		if (retv == NFSERR_STALEWRITEVERF)
2939			nfs_clearcommit(vp->v_mount);
2940
2941		/*
2942		 * Now, either mark the blocks I/O done or mark the
2943		 * blocks dirty, depending on whether the commit
2944		 * succeeded.
2945		 */
2946		for (i = 0; i < bvecpos; i++) {
2947			bp = bvec[i];
2948			bp->b_flags &= ~(B_NEEDCOMMIT | B_WRITEINPROG | B_CLUSTEROK);
2949			if (retv) {
2950				/*
2951				 * Error, leave B_DELWRI intact
2952				 */
2953				vfs_unbusy_pages(bp);
2954				brelse(bp);
2955			} else {
2956				/*
2957				 * Success, remove B_DELWRI ( bundirty() ).
2958				 *
2959				 * b_dirtyoff/b_dirtyend seem to be NFS
2960				 * specific.  We should probably move that
2961				 * into bundirty(). XXX
2962				 */
2963				s = splbio();
2964				vp->v_numoutput++;
2965				bp->b_flags |= B_ASYNC;
2966				bundirty(bp);
2967				bp->b_flags &= ~B_DONE;
2968				bp->b_ioflags &= ~BIO_ERROR;
2969				bp->b_dirtyoff = bp->b_dirtyend = 0;
2970				splx(s);
2971				bufdone(bp);
2972			}
2973		}
2974	}
2975
2976	/*
2977	 * Start/do any write(s) that are required.
2978	 */
2979loop:
2980	s = splbio();
2981	for (bp = TAILQ_FIRST(&vp->v_dirtyblkhd); bp; bp = nbp) {
2982		nbp = TAILQ_NEXT(bp, b_vnbufs);
2983		if (BUF_LOCK(bp, LK_EXCLUSIVE | LK_NOWAIT)) {
2984			if (waitfor != MNT_WAIT || passone)
2985				continue;
2986			error = BUF_TIMELOCK(bp, LK_EXCLUSIVE | LK_SLEEPFAIL,
2987			    "nfsfsync", slpflag, slptimeo);
2988			splx(s);
2989			if (error == 0)
2990				panic("nfs_fsync: inconsistent lock");
2991			if (error == ENOLCK)
2992				goto loop;
2993			if (nfs_sigintr(nmp, (struct nfsreq *)0, p)) {
2994				error = EINTR;
2995				goto done;
2996			}
2997			if (slpflag == PCATCH) {
2998				slpflag = 0;
2999				slptimeo = 2 * hz;
3000			}
3001			goto loop;
3002		}
3003		if ((bp->b_flags & B_DELWRI) == 0)
3004			panic("nfs_fsync: not dirty");
3005		if ((passone || !commit) && (bp->b_flags & B_NEEDCOMMIT)) {
3006			BUF_UNLOCK(bp);
3007			continue;
3008		}
3009		bremfree(bp);
3010		if (passone || !commit)
3011		    bp->b_flags |= B_ASYNC;
3012		else
3013		    bp->b_flags |= B_ASYNC | B_WRITEINPROG;
3014		splx(s);
3015		BUF_WRITE(bp);
3016		goto loop;
3017	}
3018	splx(s);
3019	if (passone) {
3020		passone = 0;
3021		goto again;
3022	}
3023	if (waitfor == MNT_WAIT) {
3024		while (vp->v_numoutput) {
3025			vp->v_flag |= VBWAIT;
3026			error = tsleep((caddr_t)&vp->v_numoutput,
3027				slpflag | (PRIBIO + 1), "nfsfsync", slptimeo);
3028			if (error) {
3029			    if (nfs_sigintr(nmp, (struct nfsreq *)0, p)) {
3030				error = EINTR;
3031				goto done;
3032			    }
3033			    if (slpflag == PCATCH) {
3034				slpflag = 0;
3035				slptimeo = 2 * hz;
3036			    }
3037			}
3038		}
3039		if (!TAILQ_EMPTY(&vp->v_dirtyblkhd) && commit) {
3040			goto loop;
3041		}
3042	}
3043	if (np->n_flag & NWRITEERR) {
3044		error = np->n_error;
3045		np->n_flag &= ~NWRITEERR;
3046	}
3047done:
3048	if (bvec != NULL && bvec != bvec_on_stack)
3049		free(bvec, M_TEMP);
3050	return (error);
3051}
3052
3053/*
3054 * NFS advisory byte-level locks.
3055 * Currently unsupported.
3056 */
3057static int
3058nfs_advlock(ap)
3059	struct vop_advlock_args /* {
3060		struct vnode *a_vp;
3061		caddr_t  a_id;
3062		int  a_op;
3063		struct flock *a_fl;
3064		int  a_flags;
3065	} */ *ap;
3066{
3067	register struct nfsnode *np = VTONFS(ap->a_vp);
3068
3069	/*
3070	 * The following kludge is to allow diskless support to work
3071	 * until a real NFS lockd is implemented. Basically, just pretend
3072	 * that this is a local lock.
3073	 */
3074	return (lf_advlock(ap, &(np->n_lockf), np->n_size));
3075}
3076
3077/*
3078 * Print out the contents of an nfsnode.
3079 */
3080static int
3081nfs_print(ap)
3082	struct vop_print_args /* {
3083		struct vnode *a_vp;
3084	} */ *ap;
3085{
3086	register struct vnode *vp = ap->a_vp;
3087	register struct nfsnode *np = VTONFS(vp);
3088
3089	printf("tag VT_NFS, fileid %ld fsid 0x%x",
3090		np->n_vattr.va_fileid, np->n_vattr.va_fsid);
3091	if (vp->v_type == VFIFO)
3092		fifo_printinfo(vp);
3093	printf("\n");
3094	return (0);
3095}
3096
3097/*
3098 * Just call nfs_writebp() with the force argument set to 1.
3099 *
3100 * NOTE: B_DONE may or may not be set in a_bp on call.
3101 */
3102static int
3103nfs_bwrite(ap)
3104	struct vop_bwrite_args /* {
3105		struct vnode *a_bp;
3106	} */ *ap;
3107{
3108	return (nfs_writebp(ap->a_bp, 1, curproc));
3109}
3110
3111/*
3112 * This is a clone of vn_bwrite(), except that B_WRITEINPROG isn't set unless
3113 * the force flag is one and it also handles the B_NEEDCOMMIT flag.  We set
3114 * B_CACHE if this is a VMIO buffer.
3115 */
3116int
3117nfs_writebp(bp, force, procp)
3118	register struct buf *bp;
3119	int force;
3120	struct proc *procp;
3121{
3122	int s;
3123	int oldflags = bp->b_flags;
3124#if 0
3125	int retv = 1;
3126	off_t off;
3127#endif
3128
3129	if (BUF_REFCNT(bp) == 0)
3130		panic("bwrite: buffer is not locked???");
3131
3132	if (bp->b_flags & B_INVAL) {
3133		brelse(bp);
3134		return(0);
3135	}
3136
3137	bp->b_flags |= B_CACHE;
3138
3139	/*
3140	 * Undirty the bp.  We will redirty it later if the I/O fails.
3141	 */
3142
3143	s = splbio();
3144	bundirty(bp);
3145	bp->b_flags &= ~B_DONE;
3146	bp->b_ioflags &= ~BIO_ERROR;
3147	bp->b_iocmd = BIO_WRITE;
3148
3149	bp->b_vp->v_numoutput++;
3150	curproc->p_stats->p_ru.ru_oublock++;
3151	splx(s);
3152
3153	vfs_busy_pages(bp, 1);
3154	if (force)
3155		bp->b_flags |= B_WRITEINPROG;
3156	BUF_KERNPROC(bp);
3157	BUF_STRATEGY(bp);
3158
3159	if( (oldflags & B_ASYNC) == 0) {
3160		int rtval = bufwait(bp);
3161
3162		if (oldflags & B_DELWRI) {
3163			s = splbio();
3164			reassignbuf(bp, bp->b_vp);
3165			splx(s);
3166		}
3167
3168		brelse(bp);
3169		return (rtval);
3170	}
3171
3172	return (0);
3173}
3174
3175/*
3176 * nfs special file access vnode op.
3177 * Essentially just get vattr and then imitate iaccess() since the device is
3178 * local to the client.
3179 */
3180static int
3181nfsspec_access(ap)
3182	struct vop_access_args /* {
3183		struct vnode *a_vp;
3184		int  a_mode;
3185		struct ucred *a_cred;
3186		struct proc *a_p;
3187	} */ *ap;
3188{
3189	register struct vattr *vap;
3190	register gid_t *gp;
3191	register struct ucred *cred = ap->a_cred;
3192	struct vnode *vp = ap->a_vp;
3193	mode_t mode = ap->a_mode;
3194	struct vattr vattr;
3195	register int i;
3196	int error;
3197
3198	/*
3199	 * Disallow write attempts on filesystems mounted read-only;
3200	 * unless the file is a socket, fifo, or a block or character
3201	 * device resident on the filesystem.
3202	 */
3203	if ((mode & VWRITE) && (vp->v_mount->mnt_flag & MNT_RDONLY)) {
3204		switch (vp->v_type) {
3205		case VREG:
3206		case VDIR:
3207		case VLNK:
3208			return (EROFS);
3209		default:
3210			break;
3211		}
3212	}
3213	/*
3214	 * If you're the super-user,
3215	 * you always get access.
3216	 */
3217	if (cred->cr_uid == 0)
3218		return (0);
3219	vap = &vattr;
3220	error = VOP_GETATTR(vp, vap, cred, ap->a_p);
3221	if (error)
3222		return (error);
3223	/*
3224	 * Access check is based on only one of owner, group, public.
3225	 * If not owner, then check group. If not a member of the
3226	 * group, then check public access.
3227	 */
3228	if (cred->cr_uid != vap->va_uid) {
3229		mode >>= 3;
3230		gp = cred->cr_groups;
3231		for (i = 0; i < cred->cr_ngroups; i++, gp++)
3232			if (vap->va_gid == *gp)
3233				goto found;
3234		mode >>= 3;
3235found:
3236		;
3237	}
3238	error = (vap->va_mode & mode) == mode ? 0 : EACCES;
3239	return (error);
3240}
3241
3242/*
3243 * Read wrapper for special devices.
3244 */
3245static int
3246nfsspec_read(ap)
3247	struct vop_read_args /* {
3248		struct vnode *a_vp;
3249		struct uio *a_uio;
3250		int  a_ioflag;
3251		struct ucred *a_cred;
3252	} */ *ap;
3253{
3254	register struct nfsnode *np = VTONFS(ap->a_vp);
3255
3256	/*
3257	 * Set access flag.
3258	 */
3259	np->n_flag |= NACC;
3260	getnanotime(&np->n_atim);
3261	return (VOCALL(spec_vnodeop_p, VOFFSET(vop_read), ap));
3262}
3263
3264/*
3265 * Write wrapper for special devices.
3266 */
3267static int
3268nfsspec_write(ap)
3269	struct vop_write_args /* {
3270		struct vnode *a_vp;
3271		struct uio *a_uio;
3272		int  a_ioflag;
3273		struct ucred *a_cred;
3274	} */ *ap;
3275{
3276	register struct nfsnode *np = VTONFS(ap->a_vp);
3277
3278	/*
3279	 * Set update flag.
3280	 */
3281	np->n_flag |= NUPD;
3282	getnanotime(&np->n_mtim);
3283	return (VOCALL(spec_vnodeop_p, VOFFSET(vop_write), ap));
3284}
3285
3286/*
3287 * Close wrapper for special devices.
3288 *
3289 * Update the times on the nfsnode then do device close.
3290 */
3291static int
3292nfsspec_close(ap)
3293	struct vop_close_args /* {
3294		struct vnode *a_vp;
3295		int  a_fflag;
3296		struct ucred *a_cred;
3297		struct proc *a_p;
3298	} */ *ap;
3299{
3300	register struct vnode *vp = ap->a_vp;
3301	register struct nfsnode *np = VTONFS(vp);
3302	struct vattr vattr;
3303
3304	if (np->n_flag & (NACC | NUPD)) {
3305		np->n_flag |= NCHG;
3306		if (vp->v_usecount == 1 &&
3307		    (vp->v_mount->mnt_flag & MNT_RDONLY) == 0) {
3308			VATTR_NULL(&vattr);
3309			if (np->n_flag & NACC)
3310				vattr.va_atime = np->n_atim;
3311			if (np->n_flag & NUPD)
3312				vattr.va_mtime = np->n_mtim;
3313			(void)VOP_SETATTR(vp, &vattr, ap->a_cred, ap->a_p);
3314		}
3315	}
3316	return (VOCALL(spec_vnodeop_p, VOFFSET(vop_close), ap));
3317}
3318
3319/*
3320 * Read wrapper for fifos.
3321 */
3322static int
3323nfsfifo_read(ap)
3324	struct vop_read_args /* {
3325		struct vnode *a_vp;
3326		struct uio *a_uio;
3327		int  a_ioflag;
3328		struct ucred *a_cred;
3329	} */ *ap;
3330{
3331	register struct nfsnode *np = VTONFS(ap->a_vp);
3332
3333	/*
3334	 * Set access flag.
3335	 */
3336	np->n_flag |= NACC;
3337	getnanotime(&np->n_atim);
3338	return (VOCALL(fifo_vnodeop_p, VOFFSET(vop_read), ap));
3339}
3340
3341/*
3342 * Write wrapper for fifos.
3343 */
3344static int
3345nfsfifo_write(ap)
3346	struct vop_write_args /* {
3347		struct vnode *a_vp;
3348		struct uio *a_uio;
3349		int  a_ioflag;
3350		struct ucred *a_cred;
3351	} */ *ap;
3352{
3353	register struct nfsnode *np = VTONFS(ap->a_vp);
3354
3355	/*
3356	 * Set update flag.
3357	 */
3358	np->n_flag |= NUPD;
3359	getnanotime(&np->n_mtim);
3360	return (VOCALL(fifo_vnodeop_p, VOFFSET(vop_write), ap));
3361}
3362
3363/*
3364 * Close wrapper for fifos.
3365 *
3366 * Update the times on the nfsnode then do fifo close.
3367 */
3368static int
3369nfsfifo_close(ap)
3370	struct vop_close_args /* {
3371		struct vnode *a_vp;
3372		int  a_fflag;
3373		struct ucred *a_cred;
3374		struct proc *a_p;
3375	} */ *ap;
3376{
3377	register struct vnode *vp = ap->a_vp;
3378	register struct nfsnode *np = VTONFS(vp);
3379	struct vattr vattr;
3380	struct timespec ts;
3381
3382	if (np->n_flag & (NACC | NUPD)) {
3383		getnanotime(&ts);
3384		if (np->n_flag & NACC)
3385			np->n_atim = ts;
3386		if (np->n_flag & NUPD)
3387			np->n_mtim = ts;
3388		np->n_flag |= NCHG;
3389		if (vp->v_usecount == 1 &&
3390		    (vp->v_mount->mnt_flag & MNT_RDONLY) == 0) {
3391			VATTR_NULL(&vattr);
3392			if (np->n_flag & NACC)
3393				vattr.va_atime = np->n_atim;
3394			if (np->n_flag & NUPD)
3395				vattr.va_mtime = np->n_mtim;
3396			(void)VOP_SETATTR(vp, &vattr, ap->a_cred, ap->a_p);
3397		}
3398	}
3399	return (VOCALL(fifo_vnodeop_p, VOFFSET(vop_close), ap));
3400}
3401