1/*	$OpenBSD: nfs_vnops.c,v 1.200 2024/05/14 06:26:05 jsg Exp $	*/
2/*	$NetBSD: nfs_vnops.c,v 1.62.4.1 1996/07/08 20:26:52 jtc Exp $	*/
3
4/*
5 * Copyright (c) 1989, 1993
6 *	The Regents of the University of California.  All rights reserved.
7 *
8 * This code is derived from software contributed to Berkeley by
9 * Rick Macklem at The University of Guelph.
10 *
11 * Redistribution and use in source and binary forms, with or without
12 * modification, are permitted provided that the following conditions
13 * are met:
14 * 1. Redistributions of source code must retain the above copyright
15 *    notice, this list of conditions and the following disclaimer.
16 * 2. Redistributions in binary form must reproduce the above copyright
17 *    notice, this list of conditions and the following disclaimer in the
18 *    documentation and/or other materials provided with the distribution.
19 * 3. Neither the name of the University nor the names of its contributors
20 *    may be used to endorse or promote products derived from this software
21 *    without specific prior written permission.
22 *
23 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
24 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
25 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
26 * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
27 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
28 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
29 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
30 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
31 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
32 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
33 * SUCH DAMAGE.
34 *
35 *	@(#)nfs_vnops.c	8.16 (Berkeley) 5/27/95
36 */
37
38
39/*
40 * vnode op calls for Sun NFS version 2 and 3
41 */
42
43#include <sys/param.h>
44#include <sys/systm.h>
45#include <sys/proc.h>
46#include <sys/mount.h>
47#include <sys/buf.h>
48#include <sys/malloc.h>
49#include <sys/pool.h>
50#include <sys/mbuf.h>
51#include <sys/namei.h>
52#include <sys/vnode.h>
53#include <sys/lock.h>
54#include <sys/dirent.h>
55#include <sys/fcntl.h>
56#include <sys/lockf.h>
57#include <sys/queue.h>
58#include <sys/specdev.h>
59#include <sys/unistd.h>
60
61#include <miscfs/fifofs/fifo.h>
62
63#include <nfs/nfsproto.h>
64#include <nfs/nfs.h>
65#include <nfs/nfsnode.h>
66#include <nfs/nfsmount.h>
67#include <nfs/xdr_subs.h>
68#include <nfs/nfs_var.h>
69#include <nfs/nfsm_subs.h>
70
71int nfs_access(void *);
72int nfs_advlock(void *);
73int nfs_bmap(void *);
74int nfs_bwrite(void *);
75int nfs_close(void *);
76int nfs_commit(struct vnode *, u_quad_t, int, struct proc *);
77int nfs_create(void *);
78int nfs_flush(struct vnode *, struct ucred *, int, struct proc *, int);
79int nfs_fsync(void *);
80int nfs_getattr(void *);
81int nfs_islocked(void *);
82int nfs_link(void *);
83int nfs_lock(void *);
84int nfs_lookitup(struct vnode *, char *, int, struct ucred *, struct proc *,
85	struct nfsnode **);
86int nfs_lookup(void *);
87int nfs_mkdir(void *);
88int nfs_mknod(void *);
89int nfs_mknodrpc(struct vnode *, struct vnode **, struct componentname *,
90	struct vattr *);
91int nfs_null(struct vnode *, struct ucred *, struct proc *);
92int nfs_open(void *);
93int nfs_pathconf(void *);
94int nfs_print(void *);
95int nfs_read(void *);
96int nfs_readdir(void *);
97int nfs_readdirplusrpc(struct vnode *, struct uio *, struct ucred *, int *,
98	struct proc *);
99int nfs_readdirrpc(struct vnode *, struct uio *, struct ucred *, int *);
100int nfs_remove(void *);
101int nfs_removerpc(struct vnode *, char *, int, struct ucred *, struct proc *);
102int nfs_rename(void *);
103int nfs_renameit(struct vnode *, struct componentname *, struct sillyrename *);
104int nfs_renamerpc(struct vnode *, char *, int, struct vnode *, char *, int,
105	struct ucred *, struct proc *);
106int nfs_rmdir(void *);
107int nfs_setattr(void *);
108int nfs_setattrrpc(struct vnode *, struct vattr *, struct ucred *,
109	struct proc *);
110int nfs_sillyrename(struct vnode *, struct vnode *,
111			 struct componentname *);
112int nfs_strategy(void *);
113int nfs_symlink(void *);
114int nfs_unlock(void *);
115
116void nfs_cache_enter(struct vnode *, struct vnode *, struct componentname *);
117
118int nfsfifo_close(void *);
119int nfsfifo_read(void *);
120int nfsfifo_reclaim(void *);
121int nfsfifo_write(void *);
122
123int nfsspec_access(void *);
124int nfsspec_close(void *);
125int nfsspec_read(void *);
126int nfsspec_write(void *);
127
128/* Global vfs data structures for nfs. */
129const struct vops nfs_vops = {
130	.vop_lookup	= nfs_lookup,
131	.vop_create	= nfs_create,
132	.vop_mknod	= nfs_mknod,
133	.vop_open	= nfs_open,
134	.vop_close	= nfs_close,
135	.vop_access	= nfs_access,
136	.vop_getattr	= nfs_getattr,
137	.vop_setattr	= nfs_setattr,
138	.vop_read	= nfs_read,
139	.vop_write	= nfs_write,
140	.vop_ioctl	= nfs_ioctl,
141	.vop_kqfilter	= nfs_kqfilter,
142	.vop_revoke	= vop_generic_revoke,
143	.vop_fsync	= nfs_fsync,
144	.vop_remove	= nfs_remove,
145	.vop_link	= nfs_link,
146	.vop_rename	= nfs_rename,
147	.vop_mkdir	= nfs_mkdir,
148	.vop_rmdir	= nfs_rmdir,
149	.vop_symlink	= nfs_symlink,
150	.vop_readdir	= nfs_readdir,
151	.vop_readlink	= nfs_readlink,
152	.vop_abortop	= vop_generic_abortop,
153	.vop_inactive	= nfs_inactive,
154	.vop_reclaim	= nfs_reclaim,
155	.vop_lock	= nfs_lock,
156	.vop_unlock	= nfs_unlock,
157	.vop_bmap	= nfs_bmap,
158	.vop_strategy	= nfs_strategy,
159	.vop_print	= nfs_print,
160	.vop_islocked	= nfs_islocked,
161	.vop_pathconf	= nfs_pathconf,
162	.vop_advlock	= nfs_advlock,
163	.vop_bwrite	= nfs_bwrite
164};
165
166/* Special device vnode ops. */
167const struct vops nfs_specvops = {
168	.vop_close	= nfsspec_close,
169	.vop_access	= nfsspec_access,
170	.vop_getattr	= nfs_getattr,
171	.vop_setattr	= nfs_setattr,
172	.vop_read	= nfsspec_read,
173	.vop_write	= nfsspec_write,
174	.vop_fsync	= nfs_fsync,
175	.vop_inactive	= nfs_inactive,
176	.vop_reclaim	= nfs_reclaim,
177	.vop_lock	= nfs_lock,
178	.vop_unlock	= nfs_unlock,
179	.vop_print	= nfs_print,
180	.vop_islocked	= nfs_islocked,
181
182	/* XXX: Keep in sync with spec_vops. */
183	.vop_lookup	= vop_generic_lookup,
184	.vop_create	= vop_generic_badop,
185	.vop_mknod	= vop_generic_badop,
186	.vop_open	= spec_open,
187	.vop_ioctl	= spec_ioctl,
188	.vop_kqfilter	= spec_kqfilter,
189	.vop_revoke	= vop_generic_revoke,
190	.vop_remove	= vop_generic_badop,
191	.vop_link	= vop_generic_badop,
192	.vop_rename	= vop_generic_badop,
193	.vop_mkdir	= vop_generic_badop,
194	.vop_rmdir	= vop_generic_badop,
195	.vop_symlink	= vop_generic_badop,
196	.vop_readdir	= vop_generic_badop,
197	.vop_readlink	= vop_generic_badop,
198	.vop_abortop	= vop_generic_badop,
199	.vop_bmap	= vop_generic_bmap,
200	.vop_strategy	= spec_strategy,
201	.vop_pathconf	= spec_pathconf,
202	.vop_advlock	= spec_advlock,
203	.vop_bwrite	= vop_generic_bwrite,
204};
205
206#ifdef FIFO
207const struct vops nfs_fifovops = {
208	.vop_close	= nfsfifo_close,
209	.vop_access	= nfsspec_access,
210	.vop_getattr	= nfs_getattr,
211	.vop_setattr	= nfs_setattr,
212	.vop_read	= nfsfifo_read,
213	.vop_write	= nfsfifo_write,
214	.vop_fsync	= nfs_fsync,
215	.vop_inactive	= nfs_inactive,
216	.vop_reclaim	= nfsfifo_reclaim,
217	.vop_lock	= nfs_lock,
218	.vop_unlock	= nfs_unlock,
219	.vop_print	= nfs_print,
220	.vop_islocked	= nfs_islocked,
221	.vop_bwrite	= vop_generic_bwrite,
222
223	/* XXX: Keep in sync with fifo_vops. */
224	.vop_lookup	= vop_generic_lookup,
225	.vop_create	= vop_generic_badop,
226	.vop_mknod	= vop_generic_badop,
227	.vop_open	= fifo_open,
228	.vop_ioctl	= fifo_ioctl,
229	.vop_kqfilter	= fifo_kqfilter,
230	.vop_revoke	= vop_generic_revoke,
231	.vop_remove	= vop_generic_badop,
232	.vop_link	= vop_generic_badop,
233	.vop_rename	= vop_generic_badop,
234	.vop_mkdir	= vop_generic_badop,
235	.vop_rmdir	= vop_generic_badop,
236	.vop_symlink	= vop_generic_badop,
237	.vop_readdir	= vop_generic_badop,
238	.vop_readlink	= vop_generic_badop,
239	.vop_abortop	= vop_generic_badop,
240	.vop_bmap	= vop_generic_bmap,
241	.vop_strategy	= vop_generic_badop,
242	.vop_pathconf	= fifo_pathconf,
243	.vop_advlock	= fifo_advlock,
244};
245#endif /* FIFO */
246
247/*
248 * Global variables
249 */
250extern u_int32_t nfs_true, nfs_false;
251extern u_int32_t nfs_xdrneg1;
252extern struct nfsstats nfsstats;
253extern const nfstype nfsv3_type[9];
254int nfs_numasync = 0;
255
256void
257nfs_cache_enter(struct vnode *dvp, struct vnode *vp, struct componentname *cnp)
258{
259	struct nfsnode *np;
260
261	if (vp != NULL) {
262		np = VTONFS(vp);
263		np->n_ctime = np->n_vattr.va_ctime.tv_sec;
264	} else {
265		np = VTONFS(dvp);
266		if (!np->n_ctime)
267			np->n_ctime = np->n_vattr.va_mtime.tv_sec;
268	}
269
270	cache_enter(dvp, vp, cnp);
271}
272
273/*
274 * nfs null call from vfs.
275 */
276int
277nfs_null(struct vnode *vp, struct ucred *cred, struct proc *procp)
278{
279	struct nfsm_info	 info;
280	int			 error = 0;
281
282	info.nmi_mb = info.nmi_mreq = nfsm_reqhead(0);
283	info.nmi_errorp = &error;
284	error = nfs_request(vp, NFSPROC_NULL, &info);
285	m_freem(info.nmi_mrep);
286	return (error);
287}
288
289/*
290 * nfs access vnode op.
291 * For nfs version 2, just return ok. File accesses may fail later.
292 * For nfs version 3, use the access rpc to check accessibility. If file modes
293 * are changed on the server, accesses might still fail later.
294 */
295int
296nfs_access(void *v)
297{
298	struct vop_access_args *ap = v;
299	struct vnode *vp = ap->a_vp;
300	u_int32_t *tl;
301	int error = 0, attrflag;
302	u_int32_t mode, rmode;
303	int v3 = NFS_ISV3(vp);
304	int cachevalid;
305	struct nfsm_info	info;
306
307	struct nfsnode *np = VTONFS(vp);
308
309	/*
310	 * Disallow write attempts on filesystems mounted read-only;
311	 * unless the file is a socket, fifo, or a block or character
312	 * device resident on the filesystem.
313	 */
314	if ((ap->a_mode & VWRITE) && (vp->v_mount->mnt_flag & MNT_RDONLY)) {
315		switch (vp->v_type) {
316		case VREG:
317		case VDIR:
318		case VLNK:
319			return (EROFS);
320		default:
321			break;
322		}
323	}
324
325	/*
326	 * Check access cache first. If a request has been made for this uid
327	 * shortly before, use the cached result.
328	 */
329	cachevalid = (np->n_accstamp != -1 &&
330	    (gettime() - np->n_accstamp) < nfs_attrtimeo(np) &&
331	    np->n_accuid == ap->a_cred->cr_uid);
332
333	if (cachevalid) {
334		if (!np->n_accerror) {
335			if ((np->n_accmode & ap->a_mode) == ap->a_mode)
336				return (np->n_accerror);
337		} else if ((np->n_accmode & ap->a_mode) == np->n_accmode)
338			return (np->n_accerror);
339	}
340
341	/*
342	 * For nfs v3, do an access rpc, otherwise you are stuck emulating
343	 * ufs_access() locally using the vattr. This may not be correct,
344	 * since the server may apply other access criteria such as
345	 * client uid-->server uid mapping that we do not know about, but
346	 * this is better than just returning anything that is lying about
347	 * in the cache.
348	 */
349	if (v3) {
350		nfsstats.rpccnt[NFSPROC_ACCESS]++;
351		info.nmi_mb = info.nmi_mreq = nfsm_reqhead(NFSX_FH(v3) + NFSX_UNSIGNED);
352		nfsm_fhtom(&info, vp, v3);
353		tl = nfsm_build(&info.nmi_mb, NFSX_UNSIGNED);
354		if (ap->a_mode & VREAD)
355			mode = NFSV3ACCESS_READ;
356		else
357			mode = 0;
358		if (vp->v_type == VDIR) {
359			if (ap->a_mode & VWRITE)
360				mode |= (NFSV3ACCESS_MODIFY | NFSV3ACCESS_EXTEND |
361					 NFSV3ACCESS_DELETE);
362			if (ap->a_mode & VEXEC)
363				mode |= NFSV3ACCESS_LOOKUP;
364		} else {
365			if (ap->a_mode & VWRITE)
366				mode |= (NFSV3ACCESS_MODIFY | NFSV3ACCESS_EXTEND);
367			if (ap->a_mode & VEXEC)
368				mode |= NFSV3ACCESS_EXECUTE;
369		}
370		*tl = txdr_unsigned(mode);
371
372		info.nmi_procp = ap->a_p;
373		info.nmi_cred = ap->a_cred;
374		info.nmi_errorp = &error;
375		error = nfs_request(vp, NFSPROC_ACCESS, &info);
376
377		if (nfsm_postop_attr(&info, &vp, &attrflag) != 0)
378			goto nfsmout;
379		if (error) {
380			m_freem(info.nmi_mrep);
381			goto nfsmout;
382		}
383
384		tl = (uint32_t *)nfsm_dissect(&info, NFSX_UNSIGNED);
385		if (tl == NULL)
386			goto nfsmout;
387		rmode = fxdr_unsigned(u_int32_t, *tl);
388		/*
389		 * The NFS V3 spec does not clarify whether or not
390		 * the returned access bits can be a superset of
391		 * the ones requested, so...
392		 */
393		if ((rmode & mode) != mode)
394			error = EACCES;
395
396		m_freem(info.nmi_mrep);
397	} else
398		return (nfsspec_access(ap));
399
400
401	/*
402	 * If we got the same result as for a previous, different request, OR
403	 * it in. Don't update the timestamp in that case.
404	 */
405	if (!error || error == EACCES) {
406		if (cachevalid && np->n_accstamp != -1 &&
407		    error == np->n_accerror) {
408			if (!error)
409				np->n_accmode |= ap->a_mode;
410			else {
411				if ((np->n_accmode & ap->a_mode) == ap->a_mode)
412					np->n_accmode = ap->a_mode;
413			}
414		} else {
415			np->n_accstamp = gettime();
416			np->n_accuid = ap->a_cred->cr_uid;
417			np->n_accmode = ap->a_mode;
418			np->n_accerror = error;
419		}
420	}
421nfsmout:
422	return (error);
423}
424
425/*
426 * nfs open vnode op
427 * Check to see if the type is ok
428 * and that deletion is not in progress.
429 * For paged in text files, you will need to flush the page cache
430 * if consistency is lost.
431 */
432int
433nfs_open(void *v)
434{
435	struct vop_open_args *ap = v;
436	struct vnode *vp = ap->a_vp;
437	struct nfsnode *np = VTONFS(vp);
438	struct vattr vattr;
439	int error;
440
441	if (vp->v_type != VREG && vp->v_type != VDIR && vp->v_type != VLNK) {
442#ifdef DIAGNOSTIC
443		printf("open eacces vtyp=%d\n",vp->v_type);
444#endif
445		return (EACCES);
446	}
447
448	/*
449	 * Initialize read and write creds here, for swapfiles
450	 * and other paths that don't set the creds themselves.
451	 */
452
453	if (ap->a_mode & FREAD) {
454		if (np->n_rcred) {
455			crfree(np->n_rcred);
456		}
457		np->n_rcred = ap->a_cred;
458		crhold(np->n_rcred);
459	}
460	if (ap->a_mode & FWRITE) {
461		if (np->n_wcred) {
462			crfree(np->n_wcred);
463		}
464		np->n_wcred = ap->a_cred;
465		crhold(np->n_wcred);
466	}
467
468	if (np->n_flag & NMODIFIED) {
469		error = nfs_vinvalbuf(vp, V_SAVE, ap->a_cred, ap->a_p);
470		if (error == EINTR)
471			return (error);
472		uvm_vnp_uncache(vp);
473		NFS_INVALIDATE_ATTRCACHE(np);
474		if (vp->v_type == VDIR)
475			np->n_direofoffset = 0;
476		error = VOP_GETATTR(vp, &vattr, ap->a_cred, ap->a_p);
477		if (error)
478			return (error);
479		np->n_mtime = vattr.va_mtime;
480	} else {
481		error = VOP_GETATTR(vp, &vattr, ap->a_cred, ap->a_p);
482		if (error)
483			return (error);
484		if (timespeccmp(&np->n_mtime, &vattr.va_mtime, !=)) {
485			if (vp->v_type == VDIR)
486				np->n_direofoffset = 0;
487			error = nfs_vinvalbuf(vp, V_SAVE, ap->a_cred, ap->a_p);
488			if (error == EINTR)
489				return (error);
490			uvm_vnp_uncache(vp);
491			np->n_mtime = vattr.va_mtime;
492		}
493	}
494	/* For open/close consistency. */
495	NFS_INVALIDATE_ATTRCACHE(np);
496	return (0);
497}
498
499/*
500 * nfs close vnode op
501 * What an NFS client should do upon close after writing is a debatable issue.
502 * Most NFS clients push delayed writes to the server upon close, basically for
503 * two reasons:
504 * 1 - So that any write errors may be reported back to the client process
505 *     doing the close system call. By far the two most likely errors are
506 *     NFSERR_NOSPC and NFSERR_DQUOT to indicate space allocation failure.
507 * 2 - To put a worst case upper bound on cache inconsistency between
508 *     multiple clients for the file.
509 * There is also a consistency problem for Version 2 of the protocol w.r.t.
510 * not being able to tell if other clients are writing a file concurrently,
511 * since there is no way of knowing if the changed modify time in the reply
512 * is only due to the write for this client.
513 * (NFS Version 3 provides weak cache consistency data in the reply that
514 *  should be sufficient to detect and handle this case.)
515 *
516 * The current code does the following:
517 * for NFS Version 2 - play it safe and flush/invalidate all dirty buffers
518 * for NFS Version 3 - flush dirty buffers to the server but don't invalidate
519 *		       or commit them (this satisfies 1 and 2 except for the
520 *		       case where the server crashes after this close but
521 *		       before the commit RPC, which is felt to be "good
522 *		       enough". Changing the last argument to nfs_flush() to
523 *		       a 1 would force a commit operation, if it is felt a
524 *		       commit is necessary now.
525 */
526int
527nfs_close(void *v)
528{
529	struct vop_close_args *ap = v;
530	struct vnode *vp = ap->a_vp;
531	struct nfsnode *np = VTONFS(vp);
532	int error = 0;
533
534	if (vp->v_type == VREG) {
535	    if (np->n_flag & NMODIFIED) {
536		if (NFS_ISV3(vp)) {
537		    error = nfs_flush(vp, ap->a_cred, MNT_WAIT, ap->a_p, 0);
538		    np->n_flag &= ~NMODIFIED;
539		} else
540		    error = nfs_vinvalbuf(vp, V_SAVE, ap->a_cred, ap->a_p);
541		NFS_INVALIDATE_ATTRCACHE(np);
542	    }
543	    if (np->n_flag & NWRITEERR) {
544		np->n_flag &= ~NWRITEERR;
545		error = np->n_error;
546	    }
547	}
548	return (error);
549}
550
551static inline int
552nfsm_loadattr(struct nfsm_info *infop, struct vnode **vpp, struct vattr *vap)
553{
554	struct vnode *ttvp = *vpp;
555	int error;
556
557	error = nfs_loadattrcache(&ttvp, &infop->nmi_md, &infop->nmi_dpos, vap);
558	if (error != 0) {
559		m_freem(infop->nmi_mrep);
560		*infop->nmi_errorp = error;
561		return error;
562	}
563	*vpp = ttvp;
564	return 0;
565}
566
567/*
568 * nfs getattr call from vfs.
569 */
570int
571nfs_getattr(void *v)
572{
573	struct vop_getattr_args *ap = v;
574	struct vnode *vp = ap->a_vp;
575	struct nfsnode *np = VTONFS(vp);
576	struct nfsm_info	info;
577	int error = 0;
578
579	info.nmi_v3 = NFS_ISV3(vp);
580
581	/*
582	 * Update local times for special files.
583	 */
584	if (np->n_flag & (NACC | NUPD))
585		np->n_flag |= NCHG;
586	/*
587	 * First look in the cache.
588	 */
589	if (nfs_getattrcache(vp, ap->a_vap) == 0)
590		return (0);
591
592	nfsstats.rpccnt[NFSPROC_GETATTR]++;
593	info.nmi_mb = info.nmi_mreq = nfsm_reqhead(NFSX_FH(info.nmi_v3));
594	nfsm_fhtom(&info, vp, info.nmi_v3);
595	info.nmi_procp = ap->a_p;
596	info.nmi_cred = ap->a_cred;
597	info.nmi_errorp = &error;
598	error = nfs_request(vp, NFSPROC_GETATTR, &info);
599	if (!error) {
600		if (nfsm_loadattr(&info, &vp, ap->a_vap) != 0)
601			goto nfsmout;
602	}
603	m_freem(info.nmi_mrep);
604nfsmout:
605	return (error);
606}
607
608/*
609 * nfs setattr call.
610 */
611int
612nfs_setattr(void *v)
613{
614	struct vop_setattr_args *ap = v;
615	struct vnode *vp = ap->a_vp;
616	struct nfsnode *np = VTONFS(vp);
617	struct vattr *vap = ap->a_vap;
618	int hint = NOTE_ATTRIB;
619	int error = 0;
620	u_quad_t tsize = 0;
621
622	/*
623	 * Setting of flags is not supported.
624	 */
625	if (vap->va_flags != VNOVAL)
626		return (EOPNOTSUPP);
627
628	/*
629	 * Disallow write attempts if the filesystem is mounted read-only.
630	 */
631	if ((vap->va_uid != (uid_t)VNOVAL ||
632	    vap->va_gid != (gid_t)VNOVAL ||
633	    vap->va_atime.tv_nsec != VNOVAL ||
634	    vap->va_mtime.tv_nsec != VNOVAL ||
635	    vap->va_mode != (mode_t)VNOVAL) &&
636	    (vp->v_mount->mnt_flag & MNT_RDONLY))
637		return (EROFS);
638	if (vap->va_size != VNOVAL) {
639		switch (vp->v_type) {
640		case VDIR:
641			return (EISDIR);
642		case VCHR:
643		case VBLK:
644		case VSOCK:
645		case VFIFO:
646			if (vap->va_mtime.tv_nsec == VNOVAL &&
647			    vap->va_atime.tv_nsec == VNOVAL &&
648			    vap->va_mode == (mode_t)VNOVAL &&
649			    vap->va_uid == (uid_t)VNOVAL &&
650			    vap->va_gid == (gid_t)VNOVAL)
651				return (0);
652			vap->va_size = VNOVAL;
653			break;
654		default:
655			/*
656			 * Disallow write attempts if the filesystem is
657			 * mounted read-only.
658			 */
659			if (vp->v_mount->mnt_flag & MNT_RDONLY)
660				return (EROFS);
661			if (vap->va_size == 0)
662				error = nfs_vinvalbuf(vp, 0,
663				     ap->a_cred, ap->a_p);
664			else
665				error = nfs_vinvalbuf(vp, V_SAVE,
666				     ap->a_cred, ap->a_p);
667			if (error)
668				return (error);
669			tsize = np->n_size;
670			np->n_size = np->n_vattr.va_size = vap->va_size;
671			uvm_vnp_setsize(vp, np->n_size);
672		};
673	} else if ((vap->va_mtime.tv_nsec != VNOVAL ||
674		vap->va_atime.tv_nsec != VNOVAL) &&
675		vp->v_type == VREG &&
676		(error = nfs_vinvalbuf(vp, V_SAVE, ap->a_cred,
677		    ap->a_p)) == EINTR)
678		return (error);
679	error = nfs_setattrrpc(vp, vap, ap->a_cred, ap->a_p);
680	if (error && vap->va_size != VNOVAL) {
681		np->n_size = np->n_vattr.va_size = tsize;
682		uvm_vnp_setsize(vp, np->n_size);
683	}
684
685	if (vap->va_size != VNOVAL && vap->va_size < tsize)
686		hint |= NOTE_TRUNCATE;
687
688	VN_KNOTE(vp, hint); /* XXX setattrrpc? */
689
690	return (error);
691}
692
693/* Used as *flagp for nfsm_wcc_data() below */
694#define NFSV3_WCCRATTR	0
695#define NFSV3_WCCCHK	1
696
697static inline int
698nfsm_wcc_data(struct nfsm_info *infop, struct vnode **vpp, int *flagp)
699{
700	struct timespec mtime;
701	int ttattrf, ttretf = 0;
702	uint32_t *tl;
703
704	if (infop->nmi_mrep == NULL)
705		return 0;
706
707	tl = (uint32_t *)nfsm_dissect(infop, NFSX_UNSIGNED);
708	if (tl == NULL)
709		return 1;
710	if (*tl == nfs_true) {
711		tl = (uint32_t *)nfsm_dissect(infop, 6 * NFSX_UNSIGNED);
712		if (tl == NULL)
713			return 1;
714		fxdr_nfsv3time(tl + 2, &mtime);
715		if (*flagp != NFSV3_WCCRATTR) {
716			ttretf =
717			    timespeccmp(&VTONFS(*vpp)->n_mtime, &mtime, !=);
718		}
719	}
720	if (nfsm_postop_attr(infop, vpp, &ttattrf) != 0)
721		return 1;
722	if (*flagp != NFSV3_WCCRATTR)
723		*flagp = ttretf;
724	else
725		*flagp = ttattrf;
726	return 0;
727}
728
729/*
730 * Do an nfs setattr rpc.
731 */
732int
733nfs_setattrrpc(struct vnode *vp, struct vattr *vap, struct ucred *cred,
734    struct proc *procp)
735{
736	struct nfsv2_sattr *sp;
737	struct nfsm_info	info;
738	u_int32_t *tl;
739	int error = 0, wccflag = NFSV3_WCCRATTR;
740	int v3 = NFS_ISV3(vp);
741
742	info.nmi_v3 = NFS_ISV3(vp);
743
744	nfsstats.rpccnt[NFSPROC_SETATTR]++;
745	info.nmi_mb = info.nmi_mreq = nfsm_reqhead(NFSX_FH(v3) + NFSX_SATTR(v3));
746	nfsm_fhtom(&info, vp, v3);
747	info.nmi_errorp = &error;
748
749	if (info.nmi_v3) {
750		nfsm_v3attrbuild(&info.nmi_mb, vap, 1);
751		tl = nfsm_build(&info.nmi_mb, NFSX_UNSIGNED);
752		*tl = nfs_false;
753	} else {
754		sp = nfsm_build(&info.nmi_mb, NFSX_V2SATTR);
755		if (vap->va_mode == (mode_t)VNOVAL)
756			sp->sa_mode = nfs_xdrneg1;
757		else
758			sp->sa_mode = vtonfsv2_mode(vp->v_type, vap->va_mode);
759		if (vap->va_uid == (uid_t)VNOVAL)
760			sp->sa_uid = nfs_xdrneg1;
761		else
762			sp->sa_uid = txdr_unsigned(vap->va_uid);
763		if (vap->va_gid == (gid_t)VNOVAL)
764			sp->sa_gid = nfs_xdrneg1;
765		else
766			sp->sa_gid = txdr_unsigned(vap->va_gid);
767		sp->sa_size = txdr_unsigned(vap->va_size);
768		txdr_nfsv2time(&vap->va_atime, &sp->sa_atime);
769		txdr_nfsv2time(&vap->va_mtime, &sp->sa_mtime);
770	}
771
772	info.nmi_procp = procp;
773	info.nmi_cred = cred;
774	error = nfs_request(vp, NFSPROC_SETATTR, &info);
775
776	if (info.nmi_v3) {
777		if (nfsm_wcc_data(&info, &vp, &wccflag) != 0)
778			goto nfsmout;
779	}
780	else if (error == 0) {
781		if (nfsm_loadattr(&info, &vp, NULL) != 0)
782			goto nfsmout;
783	}
784
785	m_freem(info.nmi_mrep);
786nfsmout:
787	return (error);
788}
789
790static inline nfsfh_t *
791nfsm_getfh(struct nfsm_info *infop, int *sizep, int v3)
792{
793	int size;
794	if (v3) {
795		uint32_t *tl = (uint32_t *)nfsm_dissect(infop, NFSX_UNSIGNED);
796		if (tl == NULL)
797			return NULL;
798		size = fxdr_unsigned(int, *tl);
799		if (size <= 0 || size > NFSX_V3FHMAX) {
800			m_freem(infop->nmi_mrep);
801			*infop->nmi_errorp = EBADRPC;
802			return NULL;
803		}
804	} else
805		size = NFSX_V2FH;
806	*sizep = size;
807	return (nfsfh_t *)nfsm_dissect(infop, nfsm_rndup(size));
808}
809
810/*
811 * nfs lookup call, one step at a time...
812 * First look in cache
813 * If not found, unlock the directory nfsnode and do the rpc
814 */
815int
816nfs_lookup(void *v)
817{
818	struct vop_lookup_args *ap = v;
819	struct componentname *cnp = ap->a_cnp;
820	struct vnode *dvp = ap->a_dvp;
821	struct vnode **vpp = ap->a_vpp;
822	struct nfsm_info	info;
823	int flags;
824	struct vnode *newvp;
825	struct nfsmount *nmp;
826	long len;
827	nfsfh_t *fhp;
828	struct nfsnode *np;
829	int lockparent, wantparent, error = 0, attrflag, fhsize;
830
831	info.nmi_v3 = NFS_ISV3(dvp);
832	info.nmi_errorp = &error;
833
834	cnp->cn_flags &= ~PDIRUNLOCK;
835	flags = cnp->cn_flags;
836
837	*vpp = NULLVP;
838	newvp = NULLVP;
839	if ((flags & ISLASTCN) && (dvp->v_mount->mnt_flag & MNT_RDONLY) &&
840	    (cnp->cn_nameiop == DELETE || cnp->cn_nameiop == RENAME))
841		return (EROFS);
842	if (dvp->v_type != VDIR)
843		return (ENOTDIR);
844	lockparent = flags & LOCKPARENT;
845	wantparent = flags & (LOCKPARENT|WANTPARENT);
846	nmp = VFSTONFS(dvp->v_mount);
847	np = VTONFS(dvp);
848
849	/*
850	 * Before tediously performing a linear scan of the directory,
851	 * check the name cache to see if the directory/name pair
852	 * we are looking for is known already.
853	 * If the directory/name pair is found in the name cache,
854	 * we have to ensure the directory has not changed from
855	 * the time the cache entry has been created. If it has,
856	 * the cache entry has to be ignored.
857	 */
858	if ((error = cache_lookup(dvp, vpp, cnp)) >= 0) {
859		struct vattr vattr;
860		int err2;
861
862		if (error && error != ENOENT) {
863			*vpp = NULLVP;
864			return (error);
865		}
866
867		if (cnp->cn_flags & PDIRUNLOCK) {
868			err2 = vn_lock(dvp, LK_EXCLUSIVE | LK_RETRY);
869			if (err2 != 0) {
870				*vpp = NULLVP;
871				return (err2);
872			}
873			cnp->cn_flags &= ~PDIRUNLOCK;
874		}
875
876		err2 = VOP_ACCESS(dvp, VEXEC, cnp->cn_cred, cnp->cn_proc);
877		if (err2 != 0) {
878			if (error == 0) {
879				if (*vpp != dvp)
880					vput(*vpp);
881				else
882					vrele(*vpp);
883			}
884			*vpp = NULLVP;
885			return (err2);
886		}
887
888		if (error == ENOENT) {
889			if (!VOP_GETATTR(dvp, &vattr, cnp->cn_cred,
890			    cnp->cn_proc) && vattr.va_mtime.tv_sec ==
891			    VTONFS(dvp)->n_ctime)
892				return (ENOENT);
893			cache_purge(dvp);
894			np->n_ctime = 0;
895			goto dorpc;
896		}
897
898		newvp = *vpp;
899		if (!VOP_GETATTR(newvp, &vattr, cnp->cn_cred, cnp->cn_proc)
900			&& vattr.va_ctime.tv_sec == VTONFS(newvp)->n_ctime)
901		{
902			nfsstats.lookupcache_hits++;
903			if (cnp->cn_nameiop != LOOKUP && (flags & ISLASTCN))
904				cnp->cn_flags |= SAVENAME;
905			if ((!lockparent || !(flags & ISLASTCN)) &&
906			     newvp != dvp) {
907				VOP_UNLOCK(dvp);
908				cnp->cn_flags |= PDIRUNLOCK;
909			}
910			return (0);
911		}
912		cache_purge(newvp);
913		if (newvp != dvp)
914			vput(newvp);
915		else
916			vrele(newvp);
917		*vpp = NULLVP;
918	}
919dorpc:
920	error = 0;
921	newvp = NULLVP;
922	nfsstats.lookupcache_misses++;
923	nfsstats.rpccnt[NFSPROC_LOOKUP]++;
924	len = cnp->cn_namelen;
925	info.nmi_mb = info.nmi_mreq = nfsm_reqhead(NFSX_FH(info.nmi_v3) +
926	    NFSX_UNSIGNED + nfsm_rndup(len));
927	nfsm_fhtom(&info, dvp, info.nmi_v3);
928	if (nfsm_strtom(&info, cnp->cn_nameptr, len, NFS_MAXNAMLEN) != 0)
929		goto nfsmout;
930
931	info.nmi_procp = cnp->cn_proc;
932	info.nmi_cred = cnp->cn_cred;
933	error = nfs_request(dvp, NFSPROC_LOOKUP, &info);
934
935	if (error) {
936		if (info.nmi_v3) {
937			if (nfsm_postop_attr(&info, &dvp, &attrflag) != 0)
938				goto nfsmout;
939		}
940		m_freem(info.nmi_mrep);
941		goto nfsmout;
942	}
943
944	if ((fhp = nfsm_getfh(&info, &fhsize, info.nmi_v3)) == NULL)
945		goto nfsmout;
946
947	/*
948	 * Handle RENAME case...
949	 */
950	if (cnp->cn_nameiop == RENAME && wantparent && (flags & ISLASTCN)) {
951		if (NFS_CMPFH(np, fhp, fhsize)) {
952			m_freem(info.nmi_mrep);
953			return (EISDIR);
954		}
955		error = nfs_nget(dvp->v_mount, fhp, fhsize, &np);
956		if (error) {
957			m_freem(info.nmi_mrep);
958			return (error);
959		}
960		newvp = NFSTOV(np);
961		if (info.nmi_v3) {
962			if (nfsm_postop_attr(&info, &newvp, &attrflag) != 0)
963				goto nfsmout;
964			if (nfsm_postop_attr(&info, &dvp, &attrflag) != 0)
965				goto nfsmout;
966		} else {
967			if (nfsm_loadattr(&info, &newvp, NULL) != 0)
968				goto nfsmout;
969		}
970		*vpp = newvp;
971		m_freem(info.nmi_mrep);
972		cnp->cn_flags |= SAVENAME;
973		if (!lockparent) {
974			VOP_UNLOCK(dvp);
975			cnp->cn_flags |= PDIRUNLOCK;
976		}
977		return (0);
978	}
979
980	/*
981	 * The postop attr handling is duplicated for each if case,
982	 * because it should be done while dvp is locked (unlocking
983	 * dvp is different for each case).
984	 */
985
986	if (NFS_CMPFH(np, fhp, fhsize)) {
987		vref(dvp);
988		newvp = dvp;
989		if (info.nmi_v3) {
990			if (nfsm_postop_attr(&info, &newvp, &attrflag) != 0)
991				goto nfsmout;
992			if (nfsm_postop_attr(&info, &dvp, &attrflag) != 0)
993				goto nfsmout;
994		} else {
995			if (nfsm_loadattr(&info, &newvp, NULL) != 0)
996				goto nfsmout;
997		}
998	} else if (flags & ISDOTDOT) {
999		VOP_UNLOCK(dvp);
1000		cnp->cn_flags |= PDIRUNLOCK;
1001
1002		error = nfs_nget(dvp->v_mount, fhp, fhsize, &np);
1003		if (error) {
1004			if (vn_lock(dvp, LK_EXCLUSIVE | LK_RETRY) == 0)
1005				cnp->cn_flags &= ~PDIRUNLOCK;
1006			m_freem(info.nmi_mrep);
1007			return (error);
1008		}
1009		newvp = NFSTOV(np);
1010
1011		if (info.nmi_v3) {
1012			if (nfsm_postop_attr(&info, &newvp, &attrflag) != 0)
1013				goto nfsmout;
1014			if (nfsm_postop_attr(&info, &dvp, &attrflag) != 0)
1015				goto nfsmout;
1016		} else {
1017			if (nfsm_loadattr(&info, &newvp, NULL) != 0)
1018				goto nfsmout;
1019		}
1020
1021		if (lockparent && (flags & ISLASTCN)) {
1022			if ((error = vn_lock(dvp, LK_EXCLUSIVE))) {
1023				m_freem(info.nmi_mrep);
1024				vput(newvp);
1025				return error;
1026			}
1027			cnp->cn_flags &= ~PDIRUNLOCK;
1028		}
1029
1030	} else {
1031		error = nfs_nget(dvp->v_mount, fhp, fhsize, &np);
1032		if (error) {
1033			m_freem(info.nmi_mrep);
1034			return error;
1035		}
1036		newvp = NFSTOV(np);
1037		if (info.nmi_v3) {
1038			if (nfsm_postop_attr(&info, &newvp, &attrflag) != 0)
1039				goto nfsmout;
1040			if (nfsm_postop_attr(&info, &dvp, &attrflag) != 0)
1041				goto nfsmout;
1042		} else {
1043			if (nfsm_loadattr(&info, &newvp, NULL) != 0)
1044				goto nfsmout;
1045		}
1046		if (!lockparent || !(flags & ISLASTCN)) {
1047			VOP_UNLOCK(dvp);
1048			cnp->cn_flags |= PDIRUNLOCK;
1049		}
1050	}
1051
1052	if (cnp->cn_nameiop != LOOKUP && (flags & ISLASTCN))
1053		cnp->cn_flags |= SAVENAME;
1054	if ((cnp->cn_flags & MAKEENTRY) &&
1055	    (cnp->cn_nameiop != DELETE || !(flags & ISLASTCN))) {
1056		nfs_cache_enter(dvp, newvp, cnp);
1057	}
1058
1059	*vpp = newvp;
1060	m_freem(info.nmi_mrep);
1061
1062nfsmout:
1063	if (error) {
1064		/*
1065		 * We get here only because of errors returned by the RPC.
1066		 * Otherwise we'd already have returned.
1067		 */
1068		if (error == ENOENT && (cnp->cn_flags & MAKEENTRY) &&
1069		    cnp->cn_nameiop != CREATE) {
1070			nfs_cache_enter(dvp, NULL, cnp);
1071		}
1072		if (newvp != NULLVP) {
1073			if (newvp != dvp)
1074				vput(newvp);
1075			else
1076				vrele(newvp);
1077		}
1078		if ((cnp->cn_nameiop == CREATE || cnp->cn_nameiop == RENAME) &&
1079		    (flags & ISLASTCN) && error == ENOENT) {
1080			if (dvp->v_mount->mnt_flag & MNT_RDONLY)
1081				error = EROFS;
1082			else
1083				error = EJUSTRETURN;
1084		}
1085		if (cnp->cn_nameiop != LOOKUP && (flags & ISLASTCN))
1086			cnp->cn_flags |= SAVENAME;
1087		*vpp = NULL;
1088	}
1089	return (error);
1090}
1091
1092/*
1093 * nfs read call.
1094 * Just call nfs_bioread() to do the work.
1095 */
1096int
1097nfs_read(void *v)
1098{
1099	struct vop_read_args *ap = v;
1100	struct vnode *vp = ap->a_vp;
1101
1102	if (vp->v_type != VREG)
1103		return (EPERM);
1104	return (nfs_bioread(vp, ap->a_uio, ap->a_ioflag, ap->a_cred));
1105}
1106
1107/*
1108 * nfs readlink call
1109 */
1110int
1111nfs_readlink(void *v)
1112{
1113	struct vop_readlink_args *ap = v;
1114	struct vnode *vp = ap->a_vp;
1115
1116	if (vp->v_type != VLNK)
1117		return (EPERM);
1118	return (nfs_bioread(vp, ap->a_uio, 0, ap->a_cred));
1119}
1120
1121/*
1122 * Lock an inode.
1123 */
1124int
1125nfs_lock(void *v)
1126{
1127	struct vop_lock_args *ap = v;
1128	struct vnode *vp = ap->a_vp;
1129
1130	return rrw_enter(&VTONFS(vp)->n_lock, ap->a_flags & LK_RWFLAGS);
1131}
1132
1133/*
1134 * Unlock an inode.
1135 */
1136int
1137nfs_unlock(void *v)
1138{
1139	struct vop_unlock_args *ap = v;
1140	struct vnode *vp = ap->a_vp;
1141
1142	rrw_exit(&VTONFS(vp)->n_lock);
1143	return 0;
1144}
1145
1146/*
1147 * Check for a locked inode.
1148 */
1149int
1150nfs_islocked(void *v)
1151{
1152	struct vop_islocked_args *ap = v;
1153
1154	return rrw_status(&VTONFS(ap->a_vp)->n_lock);
1155}
1156
1157/*
1158 * Do a readlink rpc.
1159 * Called by nfs_doio() from below the buffer cache.
1160 */
1161int
1162nfs_readlinkrpc(struct vnode *vp, struct uio *uiop, struct ucred *cred)
1163{
1164	struct nfsm_info	info;
1165	int error = 0, len, attrflag;
1166
1167	info.nmi_v3 = NFS_ISV3(vp);
1168	info.nmi_errorp = &error;
1169
1170	nfsstats.rpccnt[NFSPROC_READLINK]++;
1171	info.nmi_mb = info.nmi_mreq = nfsm_reqhead(NFSX_FH(info.nmi_v3));
1172	nfsm_fhtom(&info, vp, info.nmi_v3);
1173
1174	info.nmi_procp = curproc;
1175	info.nmi_cred = cred;
1176	error = nfs_request(vp, NFSPROC_READLINK, &info);
1177
1178	if (info.nmi_v3) {
1179		if (nfsm_postop_attr(&info, &vp, &attrflag) != 0)
1180			goto nfsmout;
1181	}
1182	if (!error) {
1183		if (nfsm_strsiz(&info, &len, NFS_MAXPATHLEN) != 0)
1184			goto nfsmout;
1185		if (nfsm_mtouio(&info, uiop, len) != 0)
1186			goto nfsmout;
1187	}
1188
1189	m_freem(info.nmi_mrep);
1190
1191nfsmout:
1192	return (error);
1193}
1194
1195/*
1196 * nfs read rpc call
1197 * Ditto above
1198 */
1199int
1200nfs_readrpc(struct vnode *vp, struct uio *uiop)
1201{
1202	struct nfsm_info	info;
1203	u_int32_t *tl;
1204	struct nfsmount *nmp;
1205	int error = 0, len, retlen, tsiz, eof, attrflag;
1206
1207	info.nmi_v3 = NFS_ISV3(vp);
1208	info.nmi_errorp = &error;
1209
1210	eof = 0;
1211
1212	nmp = VFSTONFS(vp->v_mount);
1213	tsiz = uiop->uio_resid;
1214	if (uiop->uio_offset + tsiz > 0xffffffff && !info.nmi_v3)
1215		return (EFBIG);
1216	while (tsiz > 0) {
1217		nfsstats.rpccnt[NFSPROC_READ]++;
1218		len = (tsiz > nmp->nm_rsize) ? nmp->nm_rsize : tsiz;
1219		info.nmi_mb = info.nmi_mreq = nfsm_reqhead(NFSX_FH(info.nmi_v3) +
1220		    NFSX_UNSIGNED * 3);
1221		nfsm_fhtom(&info, vp, info.nmi_v3);
1222		tl = nfsm_build(&info.nmi_mb, NFSX_UNSIGNED * 3);
1223		if (info.nmi_v3) {
1224			txdr_hyper(uiop->uio_offset, tl);
1225			*(tl + 2) = txdr_unsigned(len);
1226		} else {
1227			*tl++ = txdr_unsigned(uiop->uio_offset);
1228			*tl++ = txdr_unsigned(len);
1229			*tl = 0;
1230		}
1231
1232		info.nmi_procp = curproc;
1233		info.nmi_cred = VTONFS(vp)->n_rcred;
1234		error = nfs_request(vp, NFSPROC_READ, &info);
1235		if (info.nmi_v3) {
1236			if (nfsm_postop_attr(&info, &vp, &attrflag) != 0)
1237				goto nfsmout;
1238		}
1239		if (error) {
1240			m_freem(info.nmi_mrep);
1241			goto nfsmout;
1242		}
1243
1244		if (info.nmi_v3) {
1245			tl = (uint32_t *)nfsm_dissect(&info, 2 * NFSX_UNSIGNED);
1246			if (tl == NULL)
1247				goto nfsmout;
1248			eof = fxdr_unsigned(int, *(tl + 1));
1249		} else {
1250			if (nfsm_loadattr(&info, &vp, NULL) != 0)
1251				goto nfsmout;
1252		}
1253
1254		if (nfsm_strsiz(&info, &retlen, nmp->nm_rsize) != 0)
1255			goto nfsmout;
1256		if (nfsm_mtouio(&info, uiop, retlen) != 0)
1257			goto nfsmout;
1258		m_freem(info.nmi_mrep);
1259		tsiz -= retlen;
1260		if (info.nmi_v3) {
1261			if (eof || retlen == 0)
1262				tsiz = 0;
1263		} else if (retlen < len)
1264			tsiz = 0;
1265	}
1266
1267nfsmout:
1268	return (error);
1269}
1270
1271/*
1272 * nfs write call
1273 */
1274int
1275nfs_writerpc(struct vnode *vp, struct uio *uiop, int *iomode, int *must_commit)
1276{
1277	struct nfsm_info	info;
1278	u_int32_t *tl;
1279	int32_t backup;
1280	struct nfsmount *nmp = VFSTONFS(vp->v_mount);
1281	int error = 0, len, tsiz, wccflag = NFSV3_WCCRATTR, rlen, commit;
1282	int committed = NFSV3WRITE_FILESYNC;
1283
1284	info.nmi_v3 = NFS_ISV3(vp);
1285	info.nmi_errorp = &error;
1286
1287#ifdef DIAGNOSTIC
1288	if (uiop->uio_iovcnt != 1)
1289		panic("nfs: writerpc iovcnt > 1");
1290#endif
1291	*must_commit = 0;
1292	tsiz = uiop->uio_resid;
1293	if (uiop->uio_offset + tsiz > 0xffffffff && !info.nmi_v3)
1294		return (EFBIG);
1295	while (tsiz > 0) {
1296		nfsstats.rpccnt[NFSPROC_WRITE]++;
1297		len = (tsiz > nmp->nm_wsize) ? nmp->nm_wsize : tsiz;
1298		info.nmi_mb = info.nmi_mreq = nfsm_reqhead(NFSX_FH(info.nmi_v3)
1299		    + 5 * NFSX_UNSIGNED + nfsm_rndup(len));
1300		nfsm_fhtom(&info, vp, info.nmi_v3);
1301		if (info.nmi_v3) {
1302			tl = nfsm_build(&info.nmi_mb, 5 * NFSX_UNSIGNED);
1303			txdr_hyper(uiop->uio_offset, tl);
1304			tl += 2;
1305			*tl++ = txdr_unsigned(len);
1306			*tl++ = txdr_unsigned(*iomode);
1307			*tl = txdr_unsigned(len);
1308		} else {
1309			u_int32_t x;
1310
1311			tl = nfsm_build(&info.nmi_mb, 4 * NFSX_UNSIGNED);
1312			/* Set both "begin" and "current" to non-garbage. */
1313			x = txdr_unsigned((u_int32_t)uiop->uio_offset);
1314			*tl++ = x;	/* "begin offset" */
1315			*tl++ = x;	/* "current offset" */
1316			x = txdr_unsigned(len);
1317			*tl++ = x;	/* total to this offset */
1318			*tl = x;	/* size of this write */
1319
1320		}
1321		nfsm_uiotombuf(&info.nmi_mb, uiop, len);
1322
1323		info.nmi_procp = curproc;
1324		info.nmi_cred = VTONFS(vp)->n_wcred;
1325		error = nfs_request(vp, NFSPROC_WRITE, &info);
1326		if (info.nmi_v3) {
1327			wccflag = NFSV3_WCCCHK;
1328			if (nfsm_wcc_data(&info, &vp, &wccflag) != 0)
1329				goto nfsmout;
1330		}
1331
1332		if (error) {
1333			m_freem(info.nmi_mrep);
1334			goto nfsmout;
1335		}
1336
1337		if (info.nmi_v3) {
1338			wccflag = NFSV3_WCCCHK;
1339			tl = (uint32_t *)nfsm_dissect(&info,
1340			    2 * NFSX_UNSIGNED + NFSX_V3WRITEVERF);
1341			if (tl == NULL)
1342				goto nfsmout;
1343			rlen = fxdr_unsigned(int, *tl++);
1344			if (rlen <= 0) {
1345				error = NFSERR_IO;
1346				break;
1347			} else if (rlen < len) {
1348				backup = len - rlen;
1349				uiop->uio_iov->iov_base =
1350				    (char *)uiop->uio_iov->iov_base -
1351				    backup;
1352				uiop->uio_iov->iov_len += backup;
1353				uiop->uio_offset -= backup;
1354				uiop->uio_resid += backup;
1355				len = rlen;
1356			}
1357			commit = fxdr_unsigned(int, *tl++);
1358
1359			/*
1360			 * Return the lowest commitment level
1361			 * obtained by any of the RPCs.
1362			 */
1363			if (committed == NFSV3WRITE_FILESYNC)
1364				committed = commit;
1365			else if (committed == NFSV3WRITE_DATASYNC &&
1366				commit == NFSV3WRITE_UNSTABLE)
1367				committed = commit;
1368			if ((nmp->nm_flag & NFSMNT_HASWRITEVERF) == 0) {
1369				bcopy(tl, nmp->nm_verf,
1370				    NFSX_V3WRITEVERF);
1371				nmp->nm_flag |= NFSMNT_HASWRITEVERF;
1372			} else if (bcmp(tl,
1373			    nmp->nm_verf, NFSX_V3WRITEVERF)) {
1374				*must_commit = 1;
1375				bcopy(tl, nmp->nm_verf,
1376				    NFSX_V3WRITEVERF);
1377			}
1378		} else {
1379			if (nfsm_loadattr(&info, &vp, NULL) != 0)
1380				goto nfsmout;
1381		}
1382		if (wccflag)
1383		    VTONFS(vp)->n_mtime = VTONFS(vp)->n_vattr.va_mtime;
1384		m_freem(info.nmi_mrep);
1385		tsiz -= len;
1386	}
1387nfsmout:
1388	*iomode = committed;
1389	if (error)
1390		uiop->uio_resid = tsiz;
1391	return (error);
1392}
1393
1394static inline int
1395nfsm_mtofh(struct nfsm_info *infop, struct vnode *dvp, struct vnode **vpp,
1396    int *flagp)
1397{
1398	struct nfsnode *ttnp;
1399	nfsfh_t *ttfhp;
1400	int ttfhsize;
1401	uint32_t *tl;
1402	int error;
1403	int flag;
1404
1405	if (infop->nmi_v3) {
1406		tl = (uint32_t *)nfsm_dissect(infop, NFSX_UNSIGNED);
1407		if (tl == NULL)
1408			return 1;
1409		flag = fxdr_unsigned(int, *tl);
1410	} else
1411		flag = 1;
1412	if (flag) {
1413		if ((ttfhp = nfsm_getfh(infop, &ttfhsize, infop->nmi_v3)) ==
1414		    NULL) {
1415			return 1;
1416		}
1417		error = nfs_nget(dvp->v_mount, ttfhp, ttfhsize, &ttnp);
1418		if (error != 0) {
1419			m_freem(infop->nmi_mrep);
1420			*infop->nmi_errorp = error;
1421			return error;
1422		}
1423		*vpp = NFSTOV(ttnp);
1424	}
1425	if (infop->nmi_v3) {
1426		tl = (uint32_t *)nfsm_dissect(infop, NFSX_UNSIGNED);
1427		if (tl == NULL)
1428			return 1;
1429		if (flag)
1430			flag = fxdr_unsigned(int, *tl);
1431		else if (fxdr_unsigned(int, *tl)) {
1432			if (nfsm_adv(infop, NFSX_V3FATTR) != 0)
1433				return 1;
1434		}
1435	}
1436	if (flag) {
1437		if (nfsm_loadattr(infop, vpp, NULL) != 0)
1438			return 1;
1439	}
1440	*flagp = flag;
1441	return 0;
1442}
1443
1444/*
1445 * nfs mknod rpc
1446 * For NFS v2 this is a kludge. Use a create rpc but with the IFMT bits of the
1447 * mode set to specify the file type and the size field for rdev.
1448 */
1449int
1450nfs_mknodrpc(struct vnode *dvp, struct vnode **vpp, struct componentname *cnp,
1451    struct vattr *vap)
1452{
1453	struct nfsv2_sattr *sp;
1454	struct nfsm_info	info;
1455	u_int32_t *tl;
1456	struct vnode *newvp = NULL;
1457	struct nfsnode *np = NULL;
1458	int error = 0, wccflag = NFSV3_WCCRATTR, gotvp = 0;
1459	u_int32_t rdev;
1460
1461	info.nmi_v3 = NFS_ISV3(dvp);
1462	info.nmi_errorp = &error;
1463
1464	if (vap->va_type == VCHR || vap->va_type == VBLK)
1465		rdev = txdr_unsigned(vap->va_rdev);
1466	else if (vap->va_type == VFIFO || vap->va_type == VSOCK)
1467		rdev = nfs_xdrneg1;
1468	else {
1469		VOP_ABORTOP(dvp, cnp);
1470		return (EOPNOTSUPP);
1471	}
1472	nfsstats.rpccnt[NFSPROC_MKNOD]++;
1473	info.nmi_mb = info.nmi_mreq = nfsm_reqhead(NFSX_FH(info.nmi_v3) +
1474	    4 * NFSX_UNSIGNED + nfsm_rndup(cnp->cn_namelen) +
1475	    NFSX_SATTR(info.nmi_v3));
1476	nfsm_fhtom(&info, dvp, info.nmi_v3);
1477	if (nfsm_strtom(&info, cnp->cn_nameptr, cnp->cn_namelen,
1478	    NFS_MAXNAMLEN) != 0)
1479		goto nfsmout;
1480
1481	if (info.nmi_v3) {
1482		tl = nfsm_build(&info.nmi_mb, NFSX_UNSIGNED);
1483		*tl++ = vtonfsv3_type(vap->va_type);
1484		nfsm_v3attrbuild(&info.nmi_mb, vap, 0);
1485		if (vap->va_type == VCHR || vap->va_type == VBLK) {
1486			tl = nfsm_build(&info.nmi_mb, 2 * NFSX_UNSIGNED);
1487			*tl++ = txdr_unsigned(major(vap->va_rdev));
1488			*tl = txdr_unsigned(minor(vap->va_rdev));
1489		}
1490	} else {
1491		sp = nfsm_build(&info.nmi_mb, NFSX_V2SATTR);
1492		sp->sa_mode = vtonfsv2_mode(vap->va_type, vap->va_mode);
1493		sp->sa_uid = nfs_xdrneg1;
1494		sp->sa_gid = nfs_xdrneg1;
1495		sp->sa_size = rdev;
1496		txdr_nfsv2time(&vap->va_atime, &sp->sa_atime);
1497		txdr_nfsv2time(&vap->va_mtime, &sp->sa_mtime);
1498	}
1499
1500	KASSERT(cnp->cn_proc == curproc);
1501	info.nmi_procp = cnp->cn_proc;
1502	info.nmi_cred = cnp->cn_cred;
1503	error = nfs_request(dvp, NFSPROC_MKNOD, &info);
1504	if (!error) {
1505		if (nfsm_mtofh(&info, dvp, &newvp, &gotvp) != 0)
1506			goto nfsmout;
1507		if (!gotvp) {
1508			error = nfs_lookitup(dvp, cnp->cn_nameptr,
1509			    cnp->cn_namelen, cnp->cn_cred, cnp->cn_proc, &np);
1510			if (!error)
1511				newvp = NFSTOV(np);
1512		}
1513	}
1514	if (info.nmi_v3) {
1515		if (nfsm_wcc_data(&info, &dvp, &wccflag) != 0)
1516			goto nfsmout;
1517	}
1518	m_freem(info.nmi_mrep);
1519
1520nfsmout:
1521	if (error) {
1522		if (newvp)
1523			vput(newvp);
1524	} else {
1525		if (cnp->cn_flags & MAKEENTRY)
1526			nfs_cache_enter(dvp, newvp, cnp);
1527		*vpp = newvp;
1528	}
1529	pool_put(&namei_pool, cnp->cn_pnbuf);
1530	VTONFS(dvp)->n_flag |= NMODIFIED;
1531	if (!wccflag)
1532		NFS_INVALIDATE_ATTRCACHE(VTONFS(dvp));
1533	return (error);
1534}
1535
1536/*
1537 * nfs mknod vop
1538 * just call nfs_mknodrpc() to do the work.
1539 */
1540int
1541nfs_mknod(void *v)
1542{
1543	struct vop_mknod_args *ap = v;
1544	struct vnode *newvp;
1545	int error;
1546
1547	error = nfs_mknodrpc(ap->a_dvp, &newvp, ap->a_cnp, ap->a_vap);
1548	if (!error)
1549		vput(newvp);
1550
1551	VN_KNOTE(ap->a_dvp, NOTE_WRITE);
1552
1553	return (error);
1554}
1555
1556int
1557nfs_create(void *v)
1558{
1559	struct vop_create_args *ap = v;
1560	struct vnode *dvp = ap->a_dvp;
1561	struct vattr *vap = ap->a_vap;
1562	struct componentname *cnp = ap->a_cnp;
1563	struct nfsv2_sattr *sp;
1564	struct nfsm_info	info;
1565	struct timespec ts;
1566	u_int32_t *tl;
1567	struct nfsnode *np = NULL;
1568	struct vnode *newvp = NULL;
1569	int error = 0, wccflag = NFSV3_WCCRATTR, gotvp = 0, fmode = 0;
1570
1571	info.nmi_v3 = NFS_ISV3(dvp);
1572	info.nmi_errorp = &error;
1573
1574	/*
1575	 * Oops, not for me..
1576	 */
1577	if (vap->va_type == VSOCK)
1578		return (nfs_mknodrpc(dvp, ap->a_vpp, cnp, vap));
1579
1580	if (vap->va_vaflags & VA_EXCLUSIVE)
1581		fmode |= O_EXCL;
1582
1583again:
1584	nfsstats.rpccnt[NFSPROC_CREATE]++;
1585	info.nmi_mb = info.nmi_mreq = nfsm_reqhead(NFSX_FH(info.nmi_v3) +
1586	    2 * NFSX_UNSIGNED + nfsm_rndup(cnp->cn_namelen) +
1587	    NFSX_SATTR(info.nmi_v3));
1588	nfsm_fhtom(&info, dvp, info.nmi_v3);
1589	if (nfsm_strtom(&info, cnp->cn_nameptr, cnp->cn_namelen,
1590	    NFS_MAXNAMLEN) != 0)
1591		goto nfsmout;
1592	if (info.nmi_v3) {
1593		tl = nfsm_build(&info.nmi_mb, NFSX_UNSIGNED);
1594		if (fmode & O_EXCL) {
1595			*tl = txdr_unsigned(NFSV3CREATE_EXCLUSIVE);
1596			tl = nfsm_build(&info.nmi_mb, NFSX_V3CREATEVERF);
1597			arc4random_buf(tl, sizeof(*tl) * 2);
1598		} else {
1599			*tl = txdr_unsigned(NFSV3CREATE_UNCHECKED);
1600			nfsm_v3attrbuild(&info.nmi_mb, vap, 0);
1601		}
1602	} else {
1603		sp = nfsm_build(&info.nmi_mb, NFSX_V2SATTR);
1604		sp->sa_mode = vtonfsv2_mode(vap->va_type, vap->va_mode);
1605		sp->sa_uid = nfs_xdrneg1;
1606		sp->sa_gid = nfs_xdrneg1;
1607		sp->sa_size = 0;
1608		txdr_nfsv2time(&vap->va_atime, &sp->sa_atime);
1609		txdr_nfsv2time(&vap->va_mtime, &sp->sa_mtime);
1610	}
1611
1612	KASSERT(cnp->cn_proc == curproc);
1613	info.nmi_procp = cnp->cn_proc;
1614	info.nmi_cred = cnp->cn_cred;
1615	error = nfs_request(dvp, NFSPROC_CREATE, &info);
1616	if (!error) {
1617		if (nfsm_mtofh(&info, dvp, &newvp, &gotvp) != 0)
1618			goto nfsmout;
1619		if (!gotvp) {
1620			error = nfs_lookitup(dvp, cnp->cn_nameptr,
1621			    cnp->cn_namelen, cnp->cn_cred, cnp->cn_proc, &np);
1622			if (!error)
1623				newvp = NFSTOV(np);
1624		}
1625	}
1626	if (info.nmi_v3) {
1627		if (nfsm_wcc_data(&info, &dvp, &wccflag) != 0)
1628			goto nfsmout;
1629	}
1630	m_freem(info.nmi_mrep);
1631
1632nfsmout:
1633	if (error) {
1634		if (newvp) {
1635			vput(newvp);
1636			newvp = NULL;
1637		}
1638		if (info.nmi_v3 && (fmode & O_EXCL) && error == NFSERR_NOTSUPP) {
1639			fmode &= ~O_EXCL;
1640			goto again;
1641		}
1642	} else if (info.nmi_v3 && (fmode & O_EXCL)) {
1643		getnanotime(&ts);
1644		if (vap->va_atime.tv_nsec == VNOVAL)
1645			vap->va_atime = ts;
1646		if (vap->va_mtime.tv_nsec == VNOVAL)
1647			vap->va_mtime = ts;
1648		error = nfs_setattrrpc(newvp, vap, cnp->cn_cred, cnp->cn_proc);
1649	}
1650	if (!error) {
1651		if (cnp->cn_flags & MAKEENTRY)
1652			nfs_cache_enter(dvp, newvp, cnp);
1653		*ap->a_vpp = newvp;
1654	}
1655	pool_put(&namei_pool, cnp->cn_pnbuf);
1656	VTONFS(dvp)->n_flag |= NMODIFIED;
1657	if (!wccflag)
1658		NFS_INVALIDATE_ATTRCACHE(VTONFS(dvp));
1659	VN_KNOTE(ap->a_dvp, NOTE_WRITE);
1660	return (error);
1661}
1662
1663/*
1664 * nfs file remove call
1665 * To try and make nfs semantics closer to ufs semantics, a file that has
1666 * other processes using the vnode is renamed instead of removed and then
1667 * removed later on the last close.
1668 * - If v_usecount > 1
1669 *	  If a rename is not already in the works
1670 *	     call nfs_sillyrename() to set it up
1671 *     else
1672 *	  do the remove rpc
1673 */
1674int
1675nfs_remove(void *v)
1676{
1677	struct vop_remove_args *ap = v;
1678	struct vnode *vp = ap->a_vp;
1679	struct vnode *dvp = ap->a_dvp;
1680	struct componentname *cnp = ap->a_cnp;
1681	struct nfsnode *np = VTONFS(vp);
1682	int error = 0;
1683	struct vattr vattr;
1684
1685#ifdef DIAGNOSTIC
1686	if ((cnp->cn_flags & HASBUF) == 0)
1687		panic("nfs_remove: no name");
1688	if (vp->v_usecount < 1)
1689		panic("nfs_remove: bad v_usecount");
1690#endif
1691	if (vp->v_type == VDIR)
1692		error = EPERM;
1693	else if (vp->v_usecount == 1 || (np->n_sillyrename &&
1694	    VOP_GETATTR(vp, &vattr, cnp->cn_cred, cnp->cn_proc) == 0 &&
1695	    vattr.va_nlink > 1)) {
1696		/*
1697		 * Purge the name cache so that the chance of a lookup for
1698		 * the name succeeding while the remove is in progress is
1699		 * minimized. Without node locking it can still happen, such
1700		 * that an I/O op returns ESTALE, but since you get this if
1701		 * another host removes the file..
1702		 */
1703		cache_purge(vp);
1704		/*
1705		 * throw away biocache buffers, mainly to avoid
1706		 * unnecessary delayed writes later.
1707		 */
1708		error = nfs_vinvalbuf(vp, 0, cnp->cn_cred, cnp->cn_proc);
1709		/* Do the rpc */
1710		if (error != EINTR)
1711			error = nfs_removerpc(dvp, cnp->cn_nameptr,
1712				cnp->cn_namelen, cnp->cn_cred, cnp->cn_proc);
1713		/*
1714		 * Kludge City: If the first reply to the remove rpc is lost..
1715		 *   the reply to the retransmitted request will be ENOENT
1716		 *   since the file was in fact removed
1717		 *   Therefore, we cheat and return success.
1718		 */
1719		if (error == ENOENT)
1720			error = 0;
1721	} else if (!np->n_sillyrename)
1722		error = nfs_sillyrename(dvp, vp, cnp);
1723	pool_put(&namei_pool, cnp->cn_pnbuf);
1724	NFS_INVALIDATE_ATTRCACHE(np);
1725	VN_KNOTE(vp, NOTE_DELETE);
1726	VN_KNOTE(dvp, NOTE_WRITE);
1727	return (error);
1728}
1729
1730/*
1731 * nfs file remove rpc called from nfs_inactive
1732 */
1733int
1734nfs_removeit(struct sillyrename *sp)
1735{
1736	KASSERT(VOP_ISLOCKED(sp->s_dvp));
1737	/*
1738	 * Make sure that the directory vnode is still valid.
1739	 *
1740	 * NFS can potentially try to nuke a silly *after* the directory
1741	 * has already been pushed out on a forced unmount. Since the silly
1742	 * is going to go away anyway, this is fine.
1743	 */
1744	if (sp->s_dvp->v_type == VBAD)
1745		return (0);
1746	return (nfs_removerpc(sp->s_dvp, sp->s_name, sp->s_namlen, sp->s_cred,
1747		NULL));
1748}
1749
1750/*
1751 * Nfs remove rpc, called from nfs_remove() and nfs_removeit().
1752 */
1753int
1754nfs_removerpc(struct vnode *dvp, char *name, int namelen, struct ucred *cred,
1755    struct proc *proc)
1756{
1757	struct nfsm_info	info;
1758	int error = 0, wccflag = NFSV3_WCCRATTR;
1759
1760	info.nmi_v3 = NFS_ISV3(dvp);
1761	info.nmi_errorp = &error;
1762
1763	nfsstats.rpccnt[NFSPROC_REMOVE]++;
1764	info.nmi_mb = info.nmi_mreq = nfsm_reqhead(NFSX_FH(info.nmi_v3) +
1765	     NFSX_UNSIGNED + nfsm_rndup(namelen));
1766	nfsm_fhtom(&info, dvp, info.nmi_v3);
1767	if (nfsm_strtom(&info, name, namelen, NFS_MAXNAMLEN) != 0)
1768		goto nfsmout;
1769
1770	info.nmi_procp = proc;
1771	info.nmi_cred = cred;
1772	error = nfs_request(dvp, NFSPROC_REMOVE, &info);
1773	if (info.nmi_v3) {
1774		if (nfsm_wcc_data(&info, &dvp, &wccflag) != 0)
1775			goto nfsmout;
1776	}
1777	m_freem(info.nmi_mrep);
1778
1779nfsmout:
1780	VTONFS(dvp)->n_flag |= NMODIFIED;
1781	if (!wccflag)
1782		NFS_INVALIDATE_ATTRCACHE(VTONFS(dvp));
1783	return (error);
1784}
1785
1786/*
1787 * nfs file rename call
1788 */
1789int
1790nfs_rename(void *v)
1791{
1792	struct vop_rename_args	*ap = v;
1793	struct vnode *fvp = ap->a_fvp;
1794	struct vnode *tvp = ap->a_tvp;
1795	struct vnode *fdvp = ap->a_fdvp;
1796	struct vnode *tdvp = ap->a_tdvp;
1797	struct componentname *tcnp = ap->a_tcnp;
1798	struct componentname *fcnp = ap->a_fcnp;
1799	int error;
1800
1801#ifdef DIAGNOSTIC
1802	if ((tcnp->cn_flags & HASBUF) == 0 ||
1803	    (fcnp->cn_flags & HASBUF) == 0)
1804		panic("nfs_rename: no name");
1805#endif
1806	/* Check for cross-device rename */
1807	if ((fvp->v_mount != tdvp->v_mount) ||
1808	    (tvp && (fvp->v_mount != tvp->v_mount))) {
1809		error = EXDEV;
1810		goto out;
1811	}
1812
1813	/*
1814	 * If the tvp exists and is in use, sillyrename it before doing the
1815	 * rename of the new file over it.
1816	 */
1817	if (tvp && tvp->v_usecount > 1 && !VTONFS(tvp)->n_sillyrename &&
1818	    tvp->v_type != VDIR && !nfs_sillyrename(tdvp, tvp, tcnp)) {
1819		VN_KNOTE(tvp, NOTE_DELETE);
1820		vput(tvp);
1821		tvp = NULL;
1822	}
1823
1824	error = nfs_renamerpc(fdvp, fcnp->cn_nameptr, fcnp->cn_namelen,
1825		tdvp, tcnp->cn_nameptr, tcnp->cn_namelen, tcnp->cn_cred,
1826		tcnp->cn_proc);
1827
1828	VN_KNOTE(fdvp, NOTE_WRITE);
1829	VN_KNOTE(tdvp, NOTE_WRITE);
1830
1831	if (fvp->v_type == VDIR) {
1832		if (tvp != NULL && tvp->v_type == VDIR)
1833			cache_purge(tdvp);
1834		cache_purge(fdvp);
1835	}
1836out:
1837	if (tdvp == tvp)
1838		vrele(tdvp);
1839	else
1840		vput(tdvp);
1841	if (tvp)
1842		vput(tvp);
1843	vrele(fdvp);
1844	vrele(fvp);
1845	/*
1846	 * Kludge: Map ENOENT => 0 assuming that it is a reply to a retry.
1847	 */
1848	if (error == ENOENT)
1849		error = 0;
1850	return (error);
1851}
1852
1853/*
1854 * nfs file rename rpc called from nfs_remove() above
1855 */
1856int
1857nfs_renameit(struct vnode *sdvp, struct componentname *scnp,
1858    struct sillyrename *sp)
1859{
1860	return (nfs_renamerpc(sdvp, scnp->cn_nameptr, scnp->cn_namelen,
1861		sdvp, sp->s_name, sp->s_namlen, scnp->cn_cred, curproc));
1862}
1863
1864/*
1865 * Do an nfs rename rpc. Called from nfs_rename() and nfs_renameit().
1866 */
1867int
1868nfs_renamerpc(struct vnode *fdvp, char *fnameptr, int fnamelen,
1869    struct vnode *tdvp, char *tnameptr, int tnamelen, struct ucred *cred,
1870    struct proc *proc)
1871{
1872	struct nfsm_info	info;
1873	int error = 0, fwccflag = NFSV3_WCCRATTR, twccflag = NFSV3_WCCRATTR;
1874
1875	info.nmi_v3 = NFS_ISV3(fdvp);
1876	info.nmi_errorp = &error;
1877
1878	nfsstats.rpccnt[NFSPROC_RENAME]++;
1879	info.nmi_mb = info.nmi_mreq = nfsm_reqhead((NFSX_FH(info.nmi_v3) +
1880	    NFSX_UNSIGNED) * 2 + nfsm_rndup(fnamelen) + nfsm_rndup(tnamelen));
1881	nfsm_fhtom(&info, fdvp, info.nmi_v3);
1882	if (nfsm_strtom(&info, fnameptr, fnamelen, NFS_MAXNAMLEN) != 0)
1883		goto nfsmout;
1884	nfsm_fhtom(&info, tdvp, info.nmi_v3);
1885	if (nfsm_strtom(&info, tnameptr, tnamelen, NFS_MAXNAMLEN) != 0)
1886		goto nfsmout;
1887
1888	info.nmi_procp = proc;
1889	info.nmi_cred = cred;
1890	error = nfs_request(fdvp, NFSPROC_RENAME, &info);
1891	if (info.nmi_v3) {
1892		if (nfsm_wcc_data(&info, &fdvp, &fwccflag) != 0)
1893			goto nfsmout;
1894		if (nfsm_wcc_data(&info, &tdvp, &twccflag) != 0)
1895			goto nfsmout;
1896	}
1897	m_freem(info.nmi_mrep);
1898
1899nfsmout:
1900	VTONFS(fdvp)->n_flag |= NMODIFIED;
1901	VTONFS(tdvp)->n_flag |= NMODIFIED;
1902	if (!fwccflag)
1903		NFS_INVALIDATE_ATTRCACHE(VTONFS(fdvp));
1904	if (!twccflag)
1905		NFS_INVALIDATE_ATTRCACHE(VTONFS(tdvp));
1906	return (error);
1907}
1908
1909/*
1910 * nfs hard link create call
1911 */
1912int
1913nfs_link(void *v)
1914{
1915	struct vop_link_args *ap = v;
1916	struct vnode *vp = ap->a_vp;
1917	struct vnode *dvp = ap->a_dvp;
1918	struct componentname *cnp = ap->a_cnp;
1919	struct nfsm_info	info;
1920	int error = 0, wccflag = NFSV3_WCCRATTR, attrflag = 0;
1921
1922	info.nmi_v3 = NFS_ISV3(vp);
1923	info.nmi_errorp = &error;
1924
1925	error = vn_lock(vp, LK_EXCLUSIVE);
1926	if (error != 0) {
1927		VOP_ABORTOP(dvp, cnp);
1928		vput(dvp);
1929		return (error);
1930	}
1931
1932	/*
1933	 * Push all writes to the server, so that the attribute cache
1934	 * doesn't get "out of sync" with the server.
1935	 * XXX There should be a better way!
1936	 */
1937	VOP_FSYNC(vp, cnp->cn_cred, MNT_WAIT, cnp->cn_proc);
1938
1939	nfsstats.rpccnt[NFSPROC_LINK]++;
1940	info.nmi_mb = info.nmi_mreq = nfsm_reqhead(2 * NFSX_FH(info.nmi_v3) +
1941	    NFSX_UNSIGNED + nfsm_rndup(cnp->cn_namelen));
1942	nfsm_fhtom(&info, vp, info.nmi_v3);
1943	nfsm_fhtom(&info, dvp, info.nmi_v3);
1944	if (nfsm_strtom(&info, cnp->cn_nameptr, cnp->cn_namelen,
1945	    NFS_MAXNAMLEN) != 0)
1946		goto nfsmout;
1947
1948	info.nmi_procp = cnp->cn_proc;
1949	info.nmi_cred = cnp->cn_cred;
1950	error = nfs_request(vp, NFSPROC_LINK, &info);
1951	if (info.nmi_v3) {
1952		if (nfsm_postop_attr(&info, &vp, &attrflag) != 0)
1953			goto nfsmout;
1954		if (nfsm_wcc_data(&info, &dvp, &wccflag) != 0)
1955			goto nfsmout;
1956	}
1957	m_freem(info.nmi_mrep);
1958nfsmout:
1959	pool_put(&namei_pool, cnp->cn_pnbuf);
1960	VTONFS(dvp)->n_flag |= NMODIFIED;
1961	if (!attrflag)
1962		NFS_INVALIDATE_ATTRCACHE(VTONFS(vp));
1963	if (!wccflag)
1964		NFS_INVALIDATE_ATTRCACHE(VTONFS(dvp));
1965
1966	VN_KNOTE(vp, NOTE_LINK);
1967	VN_KNOTE(dvp, NOTE_WRITE);
1968	VOP_UNLOCK(vp);
1969	vput(dvp);
1970	return (error);
1971}
1972
1973/*
1974 * nfs symbolic link create call
1975 */
1976int
1977nfs_symlink(void *v)
1978{
1979	struct vop_symlink_args *ap = v;
1980	struct vnode *dvp = ap->a_dvp;
1981	struct vattr *vap = ap->a_vap;
1982	struct componentname *cnp = ap->a_cnp;
1983	struct nfsv2_sattr *sp;
1984	struct nfsm_info	info;
1985	int slen, error = 0, wccflag = NFSV3_WCCRATTR, gotvp;
1986	struct vnode *newvp = NULL;
1987
1988	info.nmi_v3 = NFS_ISV3(dvp);
1989	info.nmi_errorp = &error;
1990
1991	nfsstats.rpccnt[NFSPROC_SYMLINK]++;
1992	slen = strlen(ap->a_target);
1993	info.nmi_mb = info.nmi_mreq = nfsm_reqhead(NFSX_FH(info.nmi_v3) +
1994	    2 * NFSX_UNSIGNED + nfsm_rndup(cnp->cn_namelen) + nfsm_rndup(slen) +
1995	    NFSX_SATTR(info.nmi_v3));
1996	nfsm_fhtom(&info, dvp, info.nmi_v3);
1997	if (nfsm_strtom(&info, cnp->cn_nameptr, cnp->cn_namelen,
1998	    NFS_MAXNAMLEN) != 0)
1999		goto nfsmout;
2000	if (info.nmi_v3)
2001		nfsm_v3attrbuild(&info.nmi_mb, vap, 0);
2002	if (nfsm_strtom(&info, ap->a_target, slen, NFS_MAXPATHLEN) != 0)
2003		goto nfsmout;
2004	if (!info.nmi_v3) {
2005		sp = nfsm_build(&info.nmi_mb, NFSX_V2SATTR);
2006		sp->sa_mode = vtonfsv2_mode(VLNK, vap->va_mode);
2007		sp->sa_uid = nfs_xdrneg1;
2008		sp->sa_gid = nfs_xdrneg1;
2009		sp->sa_size = nfs_xdrneg1;
2010		txdr_nfsv2time(&vap->va_atime, &sp->sa_atime);
2011		txdr_nfsv2time(&vap->va_mtime, &sp->sa_mtime);
2012	}
2013
2014	info.nmi_procp = cnp->cn_proc;
2015	info.nmi_cred = cnp->cn_cred;
2016	error = nfs_request(dvp, NFSPROC_SYMLINK, &info);
2017	if (info.nmi_v3) {
2018		if (!error) {
2019			if (nfsm_mtofh(&info, dvp, &newvp, &gotvp) != 0)
2020				goto nfsmout;
2021		}
2022		if (nfsm_wcc_data(&info, &dvp, &wccflag) != 0)
2023			goto nfsmout;
2024	}
2025	m_freem(info.nmi_mrep);
2026
2027nfsmout:
2028	if (newvp)
2029		vput(newvp);
2030	pool_put(&namei_pool, cnp->cn_pnbuf);
2031	VTONFS(dvp)->n_flag |= NMODIFIED;
2032	if (!wccflag)
2033		NFS_INVALIDATE_ATTRCACHE(VTONFS(dvp));
2034	VN_KNOTE(dvp, NOTE_WRITE);
2035	vput(dvp);
2036	return (error);
2037}
2038
2039/*
2040 * nfs make dir call
2041 */
2042int
2043nfs_mkdir(void *v)
2044{
2045	struct vop_mkdir_args *ap = v;
2046	struct vnode *dvp = ap->a_dvp;
2047	struct vattr *vap = ap->a_vap;
2048	struct componentname *cnp = ap->a_cnp;
2049	struct nfsv2_sattr *sp;
2050	struct nfsm_info	info;
2051	int len;
2052	struct nfsnode *np = NULL;
2053	struct vnode *newvp = NULL;
2054	int error = 0, wccflag = NFSV3_WCCRATTR;
2055	int gotvp = 0;
2056
2057	info.nmi_v3 = NFS_ISV3(dvp);
2058	info.nmi_errorp = &error;
2059
2060	len = cnp->cn_namelen;
2061	nfsstats.rpccnt[NFSPROC_MKDIR]++;
2062	info.nmi_mb = info.nmi_mreq = nfsm_reqhead(NFSX_FH(info.nmi_v3) +
2063	    NFSX_UNSIGNED + nfsm_rndup(len) + NFSX_SATTR(info.nmi_v3));
2064	nfsm_fhtom(&info, dvp, info.nmi_v3);
2065	if (nfsm_strtom(&info, cnp->cn_nameptr, len, NFS_MAXNAMLEN) != 0)
2066		goto nfsmout;
2067
2068	if (info.nmi_v3) {
2069		nfsm_v3attrbuild(&info.nmi_mb, vap, 0);
2070	} else {
2071		sp = nfsm_build(&info.nmi_mb, NFSX_V2SATTR);
2072		sp->sa_mode = vtonfsv2_mode(VDIR, vap->va_mode);
2073		sp->sa_uid = nfs_xdrneg1;
2074		sp->sa_gid = nfs_xdrneg1;
2075		sp->sa_size = nfs_xdrneg1;
2076		txdr_nfsv2time(&vap->va_atime, &sp->sa_atime);
2077		txdr_nfsv2time(&vap->va_mtime, &sp->sa_mtime);
2078	}
2079
2080	info.nmi_procp = cnp->cn_proc;
2081	info.nmi_cred = cnp->cn_cred;
2082	error = nfs_request(dvp, NFSPROC_MKDIR, &info);
2083	if (!error) {
2084		if (nfsm_mtofh(&info, dvp, &newvp, &gotvp) != 0)
2085			goto nfsmout;
2086	}
2087	if (info.nmi_v3) {
2088		if (nfsm_wcc_data(&info, &dvp, &wccflag) != 0)
2089			goto nfsmout;
2090	}
2091	m_freem(info.nmi_mrep);
2092
2093nfsmout:
2094	VTONFS(dvp)->n_flag |= NMODIFIED;
2095	if (!wccflag)
2096		NFS_INVALIDATE_ATTRCACHE(VTONFS(dvp));
2097
2098	if (error == 0 && newvp == NULL) {
2099		error = nfs_lookitup(dvp, cnp->cn_nameptr, len, cnp->cn_cred,
2100			cnp->cn_proc, &np);
2101		if (!error) {
2102			newvp = NFSTOV(np);
2103			if (newvp->v_type != VDIR)
2104				error = EEXIST;
2105		}
2106	}
2107	if (error) {
2108		if (newvp)
2109			vput(newvp);
2110	} else {
2111		VN_KNOTE(dvp, NOTE_WRITE|NOTE_LINK);
2112		if (cnp->cn_flags & MAKEENTRY)
2113			nfs_cache_enter(dvp, newvp, cnp);
2114		*ap->a_vpp = newvp;
2115	}
2116	pool_put(&namei_pool, cnp->cn_pnbuf);
2117	vput(dvp);
2118	return (error);
2119}
2120
2121/*
2122 * nfs remove directory call
2123 */
2124int
2125nfs_rmdir(void *v)
2126{
2127	struct vop_rmdir_args *ap = v;
2128	struct vnode *vp = ap->a_vp;
2129	struct vnode *dvp = ap->a_dvp;
2130	struct componentname *cnp = ap->a_cnp;
2131	struct nfsm_info	info;
2132	int error = 0, wccflag = NFSV3_WCCRATTR;
2133
2134	info.nmi_v3 = NFS_ISV3(dvp);
2135	info.nmi_errorp = &error;
2136
2137	nfsstats.rpccnt[NFSPROC_RMDIR]++;
2138	info.nmi_mb = info.nmi_mreq = nfsm_reqhead(NFSX_FH(info.nmi_v3) +
2139	    NFSX_UNSIGNED + nfsm_rndup(cnp->cn_namelen));
2140	nfsm_fhtom(&info, dvp, info.nmi_v3);
2141	if (nfsm_strtom(&info, cnp->cn_nameptr, cnp->cn_namelen,
2142	    NFS_MAXNAMLEN) != 0)
2143		goto nfsmout;
2144
2145	info.nmi_procp = cnp->cn_proc;
2146	info.nmi_cred = cnp->cn_cred;
2147	error = nfs_request(dvp,  NFSPROC_RMDIR, &info);
2148	if (info.nmi_v3) {
2149		if (nfsm_wcc_data(&info, &dvp, &wccflag) != 0)
2150			goto nfsmout;
2151	}
2152	m_freem(info.nmi_mrep);
2153
2154nfsmout:
2155	pool_put(&namei_pool, cnp->cn_pnbuf);
2156	VTONFS(dvp)->n_flag |= NMODIFIED;
2157	if (!wccflag)
2158		NFS_INVALIDATE_ATTRCACHE(VTONFS(dvp));
2159
2160	VN_KNOTE(dvp, NOTE_WRITE|NOTE_LINK);
2161	VN_KNOTE(vp, NOTE_DELETE);
2162
2163	cache_purge(vp);
2164	vput(vp);
2165	vput(dvp);
2166	/*
2167	 * Kludge: Map ENOENT => 0 assuming that you have a reply to a retry.
2168	 */
2169	if (error == ENOENT)
2170		error = 0;
2171	return (error);
2172}
2173
2174
2175/*
2176 * The readdir logic below has a big design bug. It stores the NFS cookie in
2177 * the returned uio->uio_offset but does not store the verifier (it cannot).
2178 * Instead, the code stores the verifier in the nfsnode and applies that
2179 * verifies to all cookies, no matter what verifier was originally with
2180 * the cookie.
2181 *
2182 * From a practical standpoint, this is not a problem since almost all
2183 * NFS servers do not change the validity of cookies across deletes
2184 * and inserts.
2185 */
2186
2187struct nfs_dirent {
2188	u_int32_t cookie[2];
2189	struct dirent dirent;
2190};
2191
2192#define	NFS_DIRHDSIZ	(sizeof (struct nfs_dirent) - (MAXNAMLEN + 1))
2193#define NFS_DIRENT_OVERHEAD  offsetof(struct nfs_dirent, dirent)
2194
2195/*
2196 * nfs readdir call
2197 */
2198int
2199nfs_readdir(void *v)
2200{
2201	struct vop_readdir_args *ap = v;
2202	struct vnode *vp = ap->a_vp;
2203	struct nfsnode *np = VTONFS(vp);
2204	struct uio *uio = ap->a_uio;
2205	int tresid, error = 0;
2206	struct vattr vattr;
2207	int cnt;
2208	u_int64_t  newoff = uio->uio_offset;
2209	struct nfsmount *nmp = VFSTONFS(vp->v_mount);
2210	struct uio readdir_uio;
2211	struct iovec readdir_iovec;
2212	struct proc * p = uio->uio_procp;
2213	int done = 0, eof = 0;
2214	struct ucred *cred = ap->a_cred;
2215	void *data;
2216
2217	if (vp->v_type != VDIR)
2218		return (EPERM);
2219	/*
2220	 * First, check for hit on the EOF offset cache
2221	 */
2222	if (np->n_direofoffset != 0 &&
2223	    uio->uio_offset == np->n_direofoffset) {
2224		if (VOP_GETATTR(vp, &vattr, ap->a_cred, uio->uio_procp) == 0 &&
2225		    timespeccmp(&np->n_mtime, &vattr.va_mtime, ==)) {
2226			nfsstats.direofcache_hits++;
2227			*ap->a_eofflag = 1;
2228			return (0);
2229		}
2230	}
2231
2232	if (uio->uio_resid < NFS_FABLKSIZE)
2233		return (EINVAL);
2234
2235	tresid = uio->uio_resid;
2236
2237	if (uio->uio_rw != UIO_READ)
2238		return (EINVAL);
2239
2240	if ((nmp->nm_flag & (NFSMNT_NFSV3 | NFSMNT_GOTFSINFO)) == NFSMNT_NFSV3)
2241		(void)nfs_fsinfo(nmp, vp, cred, p);
2242
2243	cnt = 5;
2244
2245	/* M_ZERO to avoid leaking kernel data in dirent padding */
2246	data = malloc(NFS_DIRBLKSIZ, M_TEMP, M_WAITOK|M_ZERO);
2247	do {
2248		struct nfs_dirent *ndp = data;
2249
2250		readdir_iovec.iov_len = NFS_DIRBLKSIZ;
2251		readdir_iovec.iov_base = data;
2252		readdir_uio.uio_offset = newoff;
2253		readdir_uio.uio_iov = &readdir_iovec;
2254		readdir_uio.uio_iovcnt = 1;
2255		readdir_uio.uio_segflg = UIO_SYSSPACE;
2256		readdir_uio.uio_rw = UIO_READ;
2257		readdir_uio.uio_resid = NFS_DIRBLKSIZ;
2258		readdir_uio.uio_procp = curproc;
2259
2260		if (nmp->nm_flag & NFSMNT_RDIRPLUS) {
2261			error = nfs_readdirplusrpc(vp, &readdir_uio, cred,
2262			    &eof, p);
2263			if (error == NFSERR_NOTSUPP)
2264				nmp->nm_flag &= ~NFSMNT_RDIRPLUS;
2265		}
2266		if ((nmp->nm_flag & NFSMNT_RDIRPLUS) == 0)
2267			error = nfs_readdirrpc(vp, &readdir_uio, cred, &eof);
2268
2269		if (error == NFSERR_BAD_COOKIE)
2270			error = EINVAL;
2271
2272		while (error == 0 &&
2273		    ndp < (struct nfs_dirent *)readdir_iovec.iov_base) {
2274			struct dirent *dp = &ndp->dirent;
2275			int reclen = dp->d_reclen;
2276
2277			dp->d_reclen -= NFS_DIRENT_OVERHEAD;
2278			dp->d_off = fxdr_hyper(&ndp->cookie[0]);
2279
2280			if (uio->uio_resid < dp->d_reclen) {
2281				eof = 0;
2282				done = 1;
2283				break;
2284			}
2285
2286			if ((error = uiomove(dp, dp->d_reclen, uio)))
2287				break;
2288
2289			newoff = fxdr_hyper(&ndp->cookie[0]);
2290
2291			ndp = (struct nfs_dirent *)((u_int8_t *)ndp + reclen);
2292		}
2293	} while (!error && !done && !eof && cnt--);
2294
2295	free(data, M_TEMP, NFS_DIRBLKSIZ);
2296	data = NULL;
2297
2298	uio->uio_offset = newoff;
2299
2300	if (!error && (eof || uio->uio_resid == tresid)) {
2301		nfsstats.direofcache_misses++;
2302		*ap->a_eofflag = 1;
2303		return (0);
2304	}
2305
2306	*ap->a_eofflag = 0;
2307	return (error);
2308}
2309
2310
2311/*
2312 * The function below stuff the cookies in after the name
2313 */
2314
2315/*
2316 * Readdir rpc call.
2317 */
2318int
2319nfs_readdirrpc(struct vnode *vp, struct uio *uiop, struct ucred *cred,
2320    int *end_of_directory)
2321{
2322	int len, left;
2323	struct nfs_dirent *ndp = NULL;
2324	struct dirent *dp = NULL;
2325	struct nfsm_info	info;
2326	u_int32_t *tl;
2327	caddr_t cp;
2328	nfsuint64 cookie;
2329	struct nfsmount *nmp = VFSTONFS(vp->v_mount);
2330	struct nfsnode *dnp = VTONFS(vp);
2331	u_quad_t fileno;
2332	int error = 0, tlen, more_dirs = 1, blksiz = 0, bigenough = 1;
2333	int attrflag;
2334
2335	info.nmi_v3 = NFS_ISV3(vp);
2336	info.nmi_errorp = &error;
2337
2338#ifdef DIAGNOSTIC
2339	if (uiop->uio_iovcnt != 1 ||
2340		(uiop->uio_resid & (NFS_DIRBLKSIZ - 1)))
2341		panic("nfs readdirrpc bad uio");
2342#endif
2343
2344	txdr_hyper(uiop->uio_offset, &cookie.nfsuquad[0]);
2345
2346	/*
2347	 * Loop around doing readdir rpc's of size nm_readdirsize
2348	 * truncated to a multiple of NFS_READDIRBLKSIZ.
2349	 * The stopping criteria is EOF or buffer full.
2350	 */
2351	while (more_dirs && bigenough) {
2352		nfsstats.rpccnt[NFSPROC_READDIR]++;
2353		info.nmi_mb = info.nmi_mreq = nfsm_reqhead(NFSX_FH(info.nmi_v3)
2354		    + NFSX_READDIR(info.nmi_v3));
2355		nfsm_fhtom(&info, vp, info.nmi_v3);
2356		if (info.nmi_v3) {
2357			tl = nfsm_build(&info.nmi_mb, 5 * NFSX_UNSIGNED);
2358			*tl++ = cookie.nfsuquad[0];
2359			*tl++ = cookie.nfsuquad[1];
2360			if (cookie.nfsuquad[0] == 0 &&
2361			    cookie.nfsuquad[1] == 0) {
2362				*tl++ = 0;
2363				*tl++ = 0;
2364			} else {
2365				*tl++ = dnp->n_cookieverf.nfsuquad[0];
2366				*tl++ = dnp->n_cookieverf.nfsuquad[1];
2367			}
2368		} else {
2369			tl = nfsm_build(&info.nmi_mb, 2 * NFSX_UNSIGNED);
2370			*tl++ = cookie.nfsuquad[1];
2371		}
2372		*tl = txdr_unsigned(nmp->nm_readdirsize);
2373
2374		info.nmi_procp = uiop->uio_procp;
2375		info.nmi_cred = cred;
2376		error = nfs_request(vp, NFSPROC_READDIR, &info);
2377		if (info.nmi_v3) {
2378			if (nfsm_postop_attr(&info, &vp, &attrflag) != 0)
2379				goto nfsmout;
2380		}
2381
2382		if (error) {
2383			m_freem(info.nmi_mrep);
2384			goto nfsmout;
2385		}
2386
2387		if (info.nmi_v3) {
2388			tl = (uint32_t *)nfsm_dissect(&info, 2 * NFSX_UNSIGNED);
2389			if (tl == NULL)
2390				goto nfsmout;
2391			dnp->n_cookieverf.nfsuquad[0] = *tl++;
2392			dnp->n_cookieverf.nfsuquad[1] = *tl;
2393		}
2394
2395		tl = (uint32_t *)nfsm_dissect(&info, NFSX_UNSIGNED);
2396		if (tl == NULL)
2397			goto nfsmout;
2398		more_dirs = fxdr_unsigned(int, *tl);
2399
2400		/* loop thru the dir entries, doctoring them to dirent form */
2401		while (more_dirs && bigenough) {
2402			if (info.nmi_v3) {
2403				tl = (uint32_t *)nfsm_dissect(&info,
2404				    3 * NFSX_UNSIGNED);
2405				if (tl == NULL)
2406					goto nfsmout;
2407				fileno = fxdr_hyper(tl);
2408				len = fxdr_unsigned(int, *(tl + 2));
2409			} else {
2410				tl = (uint32_t *)nfsm_dissect(&info,
2411				    2 * NFSX_UNSIGNED);
2412				if (tl == NULL)
2413					goto nfsmout;
2414				fileno = fxdr_unsigned(u_quad_t, *tl++);
2415				len = fxdr_unsigned(int, *tl);
2416			}
2417			if (len <= 0 || len > NFS_MAXNAMLEN) {
2418				error = EBADRPC;
2419				m_freem(info.nmi_mrep);
2420				goto nfsmout;
2421			}
2422			tlen = DIRENT_RECSIZE(len) + NFS_DIRENT_OVERHEAD;
2423			left = NFS_READDIRBLKSIZ - blksiz;
2424			if (tlen > left) {
2425				dp->d_reclen += left;
2426				uiop->uio_iov->iov_base += left;
2427				uiop->uio_iov->iov_len -= left;
2428				uiop->uio_resid -= left;
2429				blksiz = 0;
2430			}
2431			if (tlen > uiop->uio_resid)
2432				bigenough = 0;
2433			if (bigenough) {
2434				ndp = (struct nfs_dirent *)
2435				    uiop->uio_iov->iov_base;
2436				dp = &ndp->dirent;
2437				dp->d_fileno = fileno;
2438				dp->d_namlen = len;
2439				dp->d_reclen = tlen;
2440				dp->d_type = DT_UNKNOWN;
2441				blksiz += tlen;
2442				if (blksiz == NFS_READDIRBLKSIZ)
2443					blksiz = 0;
2444				uiop->uio_resid -= NFS_DIRHDSIZ;
2445				uiop->uio_iov->iov_base =
2446				    (char *)uiop->uio_iov->iov_base +
2447				    NFS_DIRHDSIZ;
2448				uiop->uio_iov->iov_len -= NFS_DIRHDSIZ;
2449				if (nfsm_mtouio(&info, uiop, len) != 0)
2450					goto nfsmout;
2451				cp = uiop->uio_iov->iov_base;
2452				tlen -= NFS_DIRHDSIZ + len;
2453				*cp = '\0';	/* null terminate */
2454				uiop->uio_iov->iov_base += tlen;
2455				uiop->uio_iov->iov_len -= tlen;
2456				uiop->uio_resid -= tlen;
2457			} else {
2458				if (nfsm_adv(&info, nfsm_rndup(len)) != 0)
2459					goto nfsmout;
2460			}
2461			if (info.nmi_v3) {
2462				tl = (uint32_t *)nfsm_dissect(&info,
2463				    3 * NFSX_UNSIGNED);
2464			} else {
2465				tl = (uint32_t *)nfsm_dissect(&info,
2466				    2 * NFSX_UNSIGNED);
2467			}
2468			if (tl == NULL)
2469				goto nfsmout;
2470			if (bigenough) {
2471				if (info.nmi_v3) {
2472					ndp->cookie[0] = cookie.nfsuquad[0] =
2473					    *tl++;
2474				} else
2475					ndp->cookie[0] = 0;
2476
2477				ndp->cookie[1] = cookie.nfsuquad[1] = *tl++;
2478			} else if (info.nmi_v3)
2479				tl += 2;
2480			else
2481				tl++;
2482			more_dirs = fxdr_unsigned(int, *tl);
2483		}
2484		/*
2485		 * If at end of rpc data, get the eof boolean
2486		 */
2487		if (!more_dirs) {
2488			tl = (uint32_t *)nfsm_dissect(&info, NFSX_UNSIGNED);
2489			if (tl == NULL)
2490				goto nfsmout;
2491			more_dirs = (fxdr_unsigned(int, *tl) == 0);
2492		}
2493		m_freem(info.nmi_mrep);
2494	}
2495	/*
2496	 * Fill last record, iff any, out to a multiple of NFS_READDIRBLKSIZ
2497	 * by increasing d_reclen for the last record.
2498	 */
2499	if (blksiz > 0) {
2500		left = NFS_READDIRBLKSIZ - blksiz;
2501		dp->d_reclen += left;
2502		uiop->uio_iov->iov_base = (char *)uiop->uio_iov->iov_base +
2503		    left;
2504		uiop->uio_iov->iov_len -= left;
2505		uiop->uio_resid -= left;
2506	}
2507
2508	/*
2509	 * We are now either at the end of the directory or have filled the
2510	 * block.
2511	 */
2512	if (bigenough) {
2513		dnp->n_direofoffset = fxdr_hyper(&cookie.nfsuquad[0]);
2514		if (end_of_directory) *end_of_directory = 1;
2515	} else {
2516		if (uiop->uio_resid > 0)
2517			printf("EEK! readdirrpc resid > 0\n");
2518	}
2519
2520nfsmout:
2521	return (error);
2522}
2523
2524/*
2525 * NFS V3 readdir plus RPC. Used in place of nfs_readdirrpc().
2526 */
2527int
2528nfs_readdirplusrpc(struct vnode *vp, struct uio *uiop, struct ucred *cred,
2529    int *end_of_directory, struct proc *p)
2530{
2531	int len, left;
2532	struct nfs_dirent *ndirp = NULL;
2533	struct dirent *dp = NULL;
2534	struct nfsm_info	info;
2535	u_int32_t *tl;
2536	caddr_t cp;
2537	struct vnode *newvp;
2538	caddr_t dpossav1, dpossav2;
2539	struct mbuf *mdsav1, *mdsav2;
2540	struct nameidata nami, *ndp = &nami;
2541	struct componentname *cnp = &ndp->ni_cnd;
2542	nfsuint64 cookie;
2543	struct nfsmount *nmp = VFSTONFS(vp->v_mount);
2544	struct nfsnode *dnp = VTONFS(vp), *np;
2545	nfsfh_t *fhp;
2546	u_quad_t fileno;
2547	int error = 0, tlen, more_dirs = 1, blksiz = 0, doit, bigenough = 1, i;
2548	int attrflag, fhsize;
2549
2550#ifdef DIAGNOSTIC
2551	if (uiop->uio_iovcnt != 1 ||
2552		(uiop->uio_resid & (NFS_DIRBLKSIZ - 1)))
2553		panic("nfs readdirplusrpc bad uio");
2554#endif
2555	NDINIT(ndp, 0, 0, UIO_SYSSPACE, NULL, p);
2556	ndp->ni_dvp = vp;
2557	newvp = NULLVP;
2558
2559	txdr_hyper(uiop->uio_offset, &cookie.nfsuquad[0]);
2560
2561	/*
2562	 * Loop around doing readdir rpc's of size nm_readdirsize
2563	 * truncated to a multiple of NFS_READDIRBLKSIZ.
2564	 * The stopping criteria is EOF or buffer full.
2565	 */
2566	while (more_dirs && bigenough) {
2567		nfsstats.rpccnt[NFSPROC_READDIRPLUS]++;
2568		info.nmi_mb = info.nmi_mreq = nfsm_reqhead(NFSX_FH(1) + 6 * NFSX_UNSIGNED);
2569		nfsm_fhtom(&info, vp, 1);
2570		tl = nfsm_build(&info.nmi_mb, 6 * NFSX_UNSIGNED);
2571		*tl++ = cookie.nfsuquad[0];
2572		*tl++ = cookie.nfsuquad[1];
2573		if (cookie.nfsuquad[0] == 0 &&
2574		    cookie.nfsuquad[1] == 0) {
2575			*tl++ = 0;
2576			*tl++ = 0;
2577		} else {
2578			*tl++ = dnp->n_cookieverf.nfsuquad[0];
2579			*tl++ = dnp->n_cookieverf.nfsuquad[1];
2580		}
2581		*tl++ = txdr_unsigned(nmp->nm_readdirsize);
2582		*tl = txdr_unsigned(nmp->nm_rsize);
2583
2584		info.nmi_procp = uiop->uio_procp;
2585		info.nmi_cred = cred;
2586		info.nmi_errorp = &error;
2587		error = nfs_request(vp, NFSPROC_READDIRPLUS, &info);
2588		if (nfsm_postop_attr(&info, &vp, &attrflag) != 0)
2589			goto nfsmout;
2590		if (error) {
2591			m_freem(info.nmi_mrep);
2592			goto nfsmout;
2593		}
2594
2595		tl = (uint32_t *)nfsm_dissect(&info, 3 * NFSX_UNSIGNED);
2596		if (tl == NULL)
2597			goto nfsmout;
2598		dnp->n_cookieverf.nfsuquad[0] = *tl++;
2599		dnp->n_cookieverf.nfsuquad[1] = *tl++;
2600		more_dirs = fxdr_unsigned(int, *tl);
2601
2602		/* loop thru the dir entries, doctoring them to 4bsd form */
2603		while (more_dirs && bigenough) {
2604			tl = (uint32_t *)nfsm_dissect(&info, 3 * NFSX_UNSIGNED);
2605			if (tl == NULL)
2606				goto nfsmout;
2607			fileno = fxdr_hyper(tl);
2608			len = fxdr_unsigned(int, *(tl + 2));
2609			if (len <= 0 || len > NFS_MAXNAMLEN) {
2610				error = EBADRPC;
2611				m_freem(info.nmi_mrep);
2612				goto nfsmout;
2613			}
2614			tlen = DIRENT_RECSIZE(len) + NFS_DIRENT_OVERHEAD;
2615			left = NFS_READDIRBLKSIZ - blksiz;
2616			if (tlen > left) {
2617				dp->d_reclen += left;
2618				uiop->uio_iov->iov_base =
2619				    (char *)uiop->uio_iov->iov_base + left;
2620				uiop->uio_iov->iov_len -= left;
2621				uiop->uio_resid -= left;
2622				blksiz = 0;
2623			}
2624			if (tlen > uiop->uio_resid)
2625				bigenough = 0;
2626			if (bigenough) {
2627				ndirp = (struct nfs_dirent *)
2628				    uiop->uio_iov->iov_base;
2629				dp = &ndirp->dirent;
2630				dp->d_fileno = fileno;
2631				dp->d_namlen = len;
2632				dp->d_reclen = tlen;
2633				dp->d_type = DT_UNKNOWN;
2634				blksiz += tlen;
2635				if (blksiz == NFS_READDIRBLKSIZ)
2636					blksiz = 0;
2637				uiop->uio_resid -= NFS_DIRHDSIZ;
2638				uiop->uio_iov->iov_base =
2639				    (char *)uiop->uio_iov->iov_base +
2640				    NFS_DIRHDSIZ;
2641				uiop->uio_iov->iov_len -= NFS_DIRHDSIZ;
2642				cnp->cn_nameptr = uiop->uio_iov->iov_base;
2643				cnp->cn_namelen = len;
2644				if (nfsm_mtouio(&info, uiop, len) != 0)
2645					goto nfsmout;
2646				cp = uiop->uio_iov->iov_base;
2647				tlen -= NFS_DIRHDSIZ + len;
2648				*cp = '\0';
2649				uiop->uio_iov->iov_base += tlen;
2650				uiop->uio_iov->iov_len -= tlen;
2651				uiop->uio_resid -= tlen;
2652			} else {
2653				if (nfsm_adv(&info, nfsm_rndup(len)) != 0)
2654					goto nfsmout;
2655			}
2656			tl = (uint32_t *)nfsm_dissect(&info, 3 * NFSX_UNSIGNED);
2657			if (tl == NULL)
2658				goto nfsmout;
2659			if (bigenough) {
2660				ndirp->cookie[0] = cookie.nfsuquad[0] = *tl++;
2661				ndirp->cookie[1] = cookie.nfsuquad[1] = *tl++;
2662			} else
2663				tl += 2;
2664
2665			/*
2666			 * Since the attributes are before the file handle
2667			 * (sigh), we must skip over the attributes and then
2668			 * come back and get them.
2669			 */
2670			attrflag = fxdr_unsigned(int, *tl);
2671			if (attrflag) {
2672				dpossav1 = info.nmi_dpos;
2673				mdsav1 = info.nmi_md;
2674				if (nfsm_adv(&info, NFSX_V3FATTR) != 0)
2675					goto nfsmout;
2676				tl = (uint32_t *)
2677				    nfsm_dissect(&info, NFSX_UNSIGNED);
2678				if (tl == NULL)
2679					goto nfsmout;
2680				doit = fxdr_unsigned(int, *tl);
2681				if (doit) {
2682					if ((fhp =
2683					    nfsm_getfh(&info, &fhsize, 1)) ==
2684					    NULL)
2685						goto nfsmout;
2686					if (NFS_CMPFH(dnp, fhp, fhsize)) {
2687						vref(vp);
2688						newvp = vp;
2689						np = dnp;
2690					} else {
2691						error = nfs_nget(vp->v_mount,
2692						    fhp, fhsize, &np);
2693						if (error)
2694							doit = 0;
2695						else
2696							newvp = NFSTOV(np);
2697					}
2698				}
2699				if (doit && bigenough) {
2700					dpossav2 = info.nmi_dpos;
2701					info.nmi_dpos = dpossav1;
2702					mdsav2 = info.nmi_md;
2703					info.nmi_md = mdsav1;
2704					if (nfsm_loadattr(&info, &newvp,
2705					    NULL) != 0)
2706						goto nfsmout;
2707					info.nmi_dpos = dpossav2;
2708					info.nmi_md = mdsav2;
2709					dp->d_type = IFTODT(
2710						VTTOIF(np->n_vattr.va_type));
2711					if (cnp->cn_namelen <=
2712					    NAMECACHE_MAXLEN) {
2713						ndp->ni_vp = newvp;
2714						cache_purge(ndp->ni_dvp);
2715						nfs_cache_enter(ndp->ni_dvp,
2716						    ndp->ni_vp, cnp);
2717					}
2718				}
2719			} else {
2720				/* Just skip over the file handle */
2721				tl = (uint32_t *)
2722				    nfsm_dissect(&info, NFSX_UNSIGNED);
2723				if (tl == NULL)
2724					goto nfsmout;
2725				i = fxdr_unsigned(int, *tl);
2726				if (i > 0) {
2727					if (nfsm_adv(&info, nfsm_rndup(i)) != 0)
2728						goto nfsmout;
2729				}
2730			}
2731			if (newvp != NULLVP) {
2732				if (newvp == vp)
2733					vrele(newvp);
2734				else
2735					vput(newvp);
2736				newvp = NULLVP;
2737			}
2738			tl = (uint32_t *)nfsm_dissect(&info, NFSX_UNSIGNED);
2739			if (tl == NULL)
2740				goto nfsmout;
2741			more_dirs = fxdr_unsigned(int, *tl);
2742		}
2743		/*
2744		 * If at end of rpc data, get the eof boolean
2745		 */
2746		if (!more_dirs) {
2747			tl = (uint32_t *)nfsm_dissect(&info, NFSX_UNSIGNED);
2748			if (tl == NULL)
2749				goto nfsmout;
2750			more_dirs = (fxdr_unsigned(int, *tl) == 0);
2751		}
2752		m_freem(info.nmi_mrep);
2753	}
2754	/*
2755	 * Fill last record, iff any, out to a multiple of NFS_READDIRBLKSIZ
2756	 * by increasing d_reclen for the last record.
2757	 */
2758	if (blksiz > 0) {
2759		left = NFS_READDIRBLKSIZ - blksiz;
2760		dp->d_reclen += left;
2761		uiop->uio_iov->iov_base = (char *)uiop->uio_iov->iov_base +
2762		    left;
2763		uiop->uio_iov->iov_len -= left;
2764		uiop->uio_resid -= left;
2765	}
2766
2767	/*
2768	 * We are now either at the end of the directory or have filled the
2769	 * block.
2770	 */
2771	if (bigenough) {
2772		dnp->n_direofoffset = fxdr_hyper(&cookie.nfsuquad[0]);
2773		if (end_of_directory) *end_of_directory = 1;
2774	} else {
2775		if (uiop->uio_resid > 0)
2776			printf("EEK! readdirplusrpc resid > 0\n");
2777	}
2778
2779nfsmout:
2780	if (newvp != NULLVP) {
2781		if (newvp == vp)
2782			vrele(newvp);
2783		else
2784			vput(newvp);
2785	}
2786	return (error);
2787}
2788
2789/*
2790 * Silly rename. To make the NFS filesystem that is stateless look a little
2791 * more like the "ufs" a remove of an active vnode is translated to a rename
2792 * to a funny looking filename that is removed by nfs_inactive on the
2793 * nfsnode. There is the potential for another process on a different client
2794 * to create the same funny name between the nfs_lookitup() fails and the
2795 * nfs_rename() completes, but...
2796 */
2797int
2798nfs_sillyrename(struct vnode *dvp, struct vnode *vp, struct componentname *cnp)
2799{
2800	struct sillyrename *sp;
2801	struct nfsnode *np;
2802	int error;
2803
2804	cache_purge(dvp);
2805	np = VTONFS(vp);
2806	sp = malloc(sizeof(*sp), M_NFSREQ, M_WAITOK);
2807	sp->s_cred = crdup(cnp->cn_cred);
2808	sp->s_dvp = dvp;
2809	vref(dvp);
2810
2811	if (vp->v_type == VDIR) {
2812#ifdef DIAGNOSTIC
2813		printf("nfs: sillyrename dir\n");
2814#endif
2815		error = EINVAL;
2816		goto bad;
2817	}
2818
2819	/* Try lookitups until we get one that isn't there */
2820	while (1) {
2821		/* Fudge together a funny name */
2822		u_int32_t rnd[2];
2823
2824		arc4random_buf(&rnd, sizeof rnd);
2825		sp->s_namlen = snprintf(sp->s_name, sizeof sp->s_name,
2826		    ".nfs%08X%08X", rnd[0], rnd[1]);
2827		if (sp->s_namlen > sizeof sp->s_name)
2828			sp->s_namlen = strlen(sp->s_name);
2829
2830		if (nfs_lookitup(dvp, sp->s_name, sp->s_namlen, sp->s_cred,
2831		    cnp->cn_proc, NULL))
2832			break;
2833	}
2834
2835	error = nfs_renameit(dvp, cnp, sp);
2836	if (error)
2837		goto bad;
2838	error = nfs_lookitup(dvp, sp->s_name, sp->s_namlen, sp->s_cred,
2839		cnp->cn_proc, &np);
2840	np->n_sillyrename = sp;
2841	return (0);
2842bad:
2843	vrele(sp->s_dvp);
2844	crfree(sp->s_cred);
2845	free(sp, M_NFSREQ, sizeof(*sp));
2846	return (error);
2847}
2848
2849/*
2850 * Look up a file name and optionally either update the file handle or
2851 * allocate an nfsnode, depending on the value of npp.
2852 * npp == NULL	--> just do the lookup
2853 * *npp == NULL --> allocate a new nfsnode and make sure attributes are
2854 *			handled too
2855 * *npp != NULL --> update the file handle in the vnode
2856 */
2857int
2858nfs_lookitup(struct vnode *dvp, char *name, int len, struct ucred *cred,
2859    struct proc *procp, struct nfsnode **npp)
2860{
2861	struct nfsm_info	info;
2862	struct vnode *newvp = NULL;
2863	struct nfsnode *np, *dnp = VTONFS(dvp);
2864	int error = 0, fhlen, attrflag = 0;
2865	nfsfh_t *nfhp;
2866
2867	info.nmi_v3 = NFS_ISV3(dvp);
2868	info.nmi_errorp = &error;
2869
2870	nfsstats.rpccnt[NFSPROC_LOOKUP]++;
2871	info.nmi_mb = info.nmi_mreq = nfsm_reqhead(NFSX_FH(info.nmi_v3) + NFSX_UNSIGNED +
2872	    nfsm_rndup(len));
2873	nfsm_fhtom(&info, dvp, info.nmi_v3);
2874	if (nfsm_strtom(&info, name, len, NFS_MAXNAMLEN) != 0)
2875		goto nfsmout;
2876
2877	info.nmi_procp = procp;
2878	info.nmi_cred = cred;
2879	error = nfs_request(dvp, NFSPROC_LOOKUP, &info);
2880	if (error && !info.nmi_v3) {
2881		m_freem(info.nmi_mrep);
2882		goto nfsmout;
2883	}
2884
2885	if (npp && !error) {
2886		if ((nfhp = nfsm_getfh(&info, &fhlen, info.nmi_v3)) == NULL)
2887			goto nfsmout;
2888		if (*npp) {
2889			np = *npp;
2890			np->n_fhp = &np->n_fh;
2891			bcopy(nfhp, np->n_fhp, fhlen);
2892			np->n_fhsize = fhlen;
2893			newvp = NFSTOV(np);
2894		} else if (NFS_CMPFH(dnp, nfhp, fhlen)) {
2895			vref(dvp);
2896			newvp = dvp;
2897			np = dnp;
2898		} else {
2899			error = nfs_nget(dvp->v_mount, nfhp, fhlen, &np);
2900			if (error) {
2901				m_freem(info.nmi_mrep);
2902				return (error);
2903			}
2904			newvp = NFSTOV(np);
2905		}
2906		if (info.nmi_v3) {
2907			if (nfsm_postop_attr(&info, &newvp, &attrflag) != 0)
2908				goto nfsmout;
2909			if (!attrflag && *npp == NULL) {
2910				m_freem(info.nmi_mrep);
2911				if (newvp == dvp)
2912					vrele(newvp);
2913				else
2914					vput(newvp);
2915				return (ENOENT);
2916			}
2917		} else {
2918			if (nfsm_loadattr(&info, &newvp, NULL) != 0)
2919				goto nfsmout;
2920		}
2921	}
2922	m_freem(info.nmi_mrep);
2923nfsmout:
2924	if (npp && *npp == NULL) {
2925		if (error) {
2926			if (newvp == dvp)
2927				vrele(newvp);
2928			else
2929				vput(newvp);
2930		} else
2931			*npp = np;
2932	}
2933	return (error);
2934}
2935
2936/*
2937 * Nfs Version 3 commit rpc
2938 */
2939int
2940nfs_commit(struct vnode *vp, u_quad_t offset, int cnt, struct proc *procp)
2941{
2942	struct nfsm_info	info;
2943	u_int32_t *tl;
2944	struct nfsmount *nmp = VFSTONFS(vp->v_mount);
2945	int error = 0, wccflag = NFSV3_WCCRATTR;
2946
2947	if ((nmp->nm_flag & NFSMNT_HASWRITEVERF) == 0)
2948		return (0);
2949	nfsstats.rpccnt[NFSPROC_COMMIT]++;
2950	info.nmi_mb = info.nmi_mreq = nfsm_reqhead(NFSX_FH(1));
2951	nfsm_fhtom(&info, vp, 1);
2952	info.nmi_errorp = &error;
2953
2954	tl = nfsm_build(&info.nmi_mb, 3 * NFSX_UNSIGNED);
2955	txdr_hyper(offset, tl);
2956	tl += 2;
2957	*tl = txdr_unsigned(cnt);
2958
2959	info.nmi_procp = procp;
2960	info.nmi_cred = VTONFS(vp)->n_wcred;
2961	error = nfs_request(vp, NFSPROC_COMMIT, &info);
2962	if (nfsm_wcc_data(&info, &vp, &wccflag) != 0)
2963		goto nfsmout;
2964
2965	if (!error) {
2966		tl = (uint32_t *)nfsm_dissect(&info, NFSX_V3WRITEVERF);
2967		if (tl == NULL)
2968			goto nfsmout;
2969		if (bcmp(nmp->nm_verf, tl,
2970			NFSX_V3WRITEVERF)) {
2971			bcopy(tl, nmp->nm_verf,
2972				NFSX_V3WRITEVERF);
2973			error = NFSERR_STALEWRITEVERF;
2974		}
2975	}
2976	m_freem(info.nmi_mrep);
2977
2978nfsmout:
2979	return (error);
2980}
2981
2982/*
2983 * Kludge City..
2984 * - make nfs_bmap() essentially a no-op that does no translation
2985 * - do nfs_strategy() by doing I/O with nfs_readrpc/nfs_writerpc
2986 *   (Maybe I could use the process's page mapping, but I was concerned that
2987 *    Kernel Write might not be enabled and also figured copyout() would do
2988 *    a lot more work than bcopy() and also it currently happens in the
2989 *    context of the swapper process (2).
2990 */
2991int
2992nfs_bmap(void *v)
2993{
2994	struct vop_bmap_args *ap = v;
2995	struct vnode *vp = ap->a_vp;
2996
2997	if (ap->a_vpp != NULL)
2998		*ap->a_vpp = vp;
2999	if (ap->a_bnp != NULL)
3000		*ap->a_bnp = ap->a_bn * btodb(vp->v_mount->mnt_stat.f_iosize);
3001	return (0);
3002}
3003
3004/*
3005 * Strategy routine.
3006 * For async requests when nfsiod(s) are running, queue the request by
3007 * calling nfs_asyncio(), otherwise just all nfs_doio() to do the
3008 * request.
3009 */
3010int
3011nfs_strategy(void *v)
3012{
3013	struct vop_strategy_args *ap = v;
3014	struct buf *bp = ap->a_bp;
3015	struct proc *p;
3016	int error = 0;
3017
3018	if ((bp->b_flags & (B_PHYS|B_ASYNC)) == (B_PHYS|B_ASYNC))
3019		panic("nfs physio/async");
3020	if (bp->b_flags & B_ASYNC)
3021		p = NULL;
3022	else
3023		p = curproc;	/* XXX */
3024	/*
3025	 * If the op is asynchronous and an i/o daemon is waiting
3026	 * queue the request, wake it up and wait for completion
3027	 * otherwise just do it ourselves.
3028	 */
3029	if ((bp->b_flags & B_ASYNC) == 0 || nfs_asyncio(bp, 0))
3030		error = nfs_doio(bp, p);
3031	return (error);
3032}
3033
3034/*
3035 * fsync vnode op. Just call nfs_flush() with commit == 1.
3036 */
3037int
3038nfs_fsync(void *v)
3039{
3040	struct vop_fsync_args *ap = v;
3041
3042	return (nfs_flush(ap->a_vp, ap->a_cred, ap->a_waitfor, ap->a_p, 1));
3043}
3044
3045/*
3046 * Flush all the blocks associated with a vnode.
3047 *	Walk through the buffer pool and push any dirty pages
3048 *	associated with the vnode.
3049 */
3050int
3051nfs_flush(struct vnode *vp, struct ucred *cred, int waitfor, struct proc *p,
3052    int commit)
3053{
3054	struct nfsnode *np = VTONFS(vp);
3055	struct buf *bp;
3056	int i;
3057	struct buf *nbp;
3058	struct nfsmount *nmp = VFSTONFS(vp->v_mount);
3059	uint64_t slptimeo = INFSLP;
3060	int s, error = 0, slpflag = 0, retv, bvecpos;
3061	int dirty, passone = 1;
3062	u_quad_t off = (u_quad_t)-1, endoff = 0, toff;
3063#ifndef NFS_COMMITBVECSIZ
3064#define NFS_COMMITBVECSIZ	20
3065#endif
3066	struct buf *bvec[NFS_COMMITBVECSIZ];
3067
3068	if (nmp->nm_flag & NFSMNT_INT)
3069		slpflag = PCATCH;
3070	if (!commit)
3071		passone = 0;
3072	/*
3073	 * A b_flags == (B_DELWRI | B_NEEDCOMMIT) block has been written to the
3074	 * server, but nas not been committed to stable storage on the server
3075	 * yet. On the first pass, the byte range is worked out and the commit
3076	 * rpc is done. On the second pass, nfs_writebp() is called to do the
3077	 * job.
3078	 */
3079again:
3080	bvecpos = 0;
3081	if (NFS_ISV3(vp) && commit) {
3082		s = splbio();
3083		LIST_FOREACH_SAFE(bp, &vp->v_dirtyblkhd, b_vnbufs, nbp) {
3084			if (bvecpos >= NFS_COMMITBVECSIZ)
3085				break;
3086			if ((bp->b_flags & (B_BUSY | B_DELWRI | B_NEEDCOMMIT))
3087			    != (B_DELWRI | B_NEEDCOMMIT))
3088				continue;
3089			bremfree(bp);
3090			bp->b_flags |= B_WRITEINPROG;
3091			buf_acquire(bp);
3092
3093			/*
3094			 * A list of these buffers is kept so that the
3095			 * second loop knows which buffers have actually
3096			 * been committed. This is necessary, since there
3097			 * may be a race between the commit rpc and new
3098			 * uncommitted writes on the file.
3099			 */
3100			bvec[bvecpos++] = bp;
3101			toff = ((u_quad_t)bp->b_blkno) * DEV_BSIZE +
3102				bp->b_dirtyoff;
3103			if (toff < off)
3104				off = toff;
3105			toff += (u_quad_t)(bp->b_dirtyend - bp->b_dirtyoff);
3106			if (toff > endoff)
3107				endoff = toff;
3108		}
3109		splx(s);
3110	}
3111	if (bvecpos > 0) {
3112		/*
3113		 * Commit data on the server, as required.
3114		 */
3115		bcstats.pendingwrites++;
3116		bcstats.numwrites++;
3117		retv = nfs_commit(vp, off, (int)(endoff - off), p);
3118		if (retv == NFSERR_STALEWRITEVERF)
3119			nfs_clearcommit(vp->v_mount);
3120		/*
3121		 * Now, either mark the blocks I/O done or mark the
3122		 * blocks dirty, depending on whether the commit
3123		 * succeeded.
3124		 */
3125		for (i = 0; i < bvecpos; i++) {
3126			bp = bvec[i];
3127			bp->b_flags &= ~(B_NEEDCOMMIT | B_WRITEINPROG);
3128			if (retv) {
3129				if (i == 0)
3130					bcstats.pendingwrites--;
3131				brelse(bp);
3132			} else {
3133				if (i > 0)
3134					bcstats.pendingwrites++;
3135				s = splbio();
3136				buf_undirty(bp);
3137				vp->v_numoutput++;
3138				bp->b_flags |= B_ASYNC;
3139				bp->b_flags &= ~(B_READ|B_DONE|B_ERROR);
3140				bp->b_dirtyoff = bp->b_dirtyend = 0;
3141				biodone(bp);
3142				splx(s);
3143			}
3144		}
3145	}
3146
3147	/*
3148	 * Start/do any write(s) that are required.
3149	 */
3150loop:
3151	s = splbio();
3152	LIST_FOREACH_SAFE(bp, &vp->v_dirtyblkhd, b_vnbufs, nbp) {
3153		if (bp->b_flags & B_BUSY) {
3154			if (waitfor != MNT_WAIT || passone)
3155				continue;
3156			bp->b_flags |= B_WANTED;
3157			error = tsleep_nsec(bp, slpflag | (PRIBIO + 1),
3158			    "nfsfsync", slptimeo);
3159			splx(s);
3160			if (error) {
3161				if (nfs_sigintr(nmp, NULL, p))
3162					return (EINTR);
3163				if (slpflag == PCATCH) {
3164					slpflag = 0;
3165					slptimeo = SEC_TO_NSEC(2);
3166				}
3167			}
3168			goto loop;
3169		}
3170		if ((bp->b_flags & B_DELWRI) == 0)
3171			panic("nfs_fsync: not dirty");
3172		if ((passone || !commit) && (bp->b_flags & B_NEEDCOMMIT))
3173			continue;
3174		bremfree(bp);
3175		if (passone || !commit) {
3176			bp->b_flags |= B_ASYNC;
3177		} else {
3178			bp->b_flags |= (B_ASYNC|B_WRITEINPROG|B_NEEDCOMMIT);
3179		}
3180		buf_acquire(bp);
3181		splx(s);
3182		VOP_BWRITE(bp);
3183		goto loop;
3184	}
3185	splx(s);
3186	if (passone) {
3187		passone = 0;
3188		goto again;
3189	}
3190	if (waitfor == MNT_WAIT) {
3191 loop2:
3192		s = splbio();
3193		error = vwaitforio(vp, slpflag, "nfs_fsync", slptimeo);
3194		if (error) {
3195			splx(s);
3196			if (nfs_sigintr(nmp, NULL, p))
3197				return (EINTR);
3198			if (slpflag == PCATCH) {
3199				slpflag = 0;
3200				slptimeo = SEC_TO_NSEC(2);
3201			}
3202			goto loop2;
3203		}
3204		dirty = (!LIST_EMPTY(&vp->v_dirtyblkhd) && commit);
3205		splx(s);
3206		if (dirty) {
3207#if 0
3208			vprint("nfs_fsync: dirty", vp);
3209#endif
3210			goto loop;
3211		}
3212	}
3213	if (np->n_flag & NWRITEERR) {
3214		error = np->n_error;
3215		np->n_flag &= ~NWRITEERR;
3216	}
3217	return (error);
3218}
3219
3220/*
3221 * Return POSIX pathconf information applicable to nfs.
3222 * Fake it. For v3 we could ask the server, but such code
3223 * hasn't been written yet.
3224 */
3225int
3226nfs_pathconf(void *v)
3227{
3228	struct vop_pathconf_args *ap = v;
3229	struct nfsmount *nmp = VFSTONFS(ap->a_vp->v_mount);
3230	int error = 0;
3231
3232	switch (ap->a_name) {
3233	case _PC_LINK_MAX:
3234		*ap->a_retval = LINK_MAX;
3235		break;
3236	case _PC_NAME_MAX:
3237		*ap->a_retval = NAME_MAX;
3238		break;
3239	case _PC_CHOWN_RESTRICTED:
3240		*ap->a_retval = 1;
3241		break;
3242	case _PC_NO_TRUNC:
3243		*ap->a_retval = 1;
3244		break;
3245	case _PC_ALLOC_SIZE_MIN:
3246		*ap->a_retval = NFS_FABLKSIZE;
3247		break;
3248	case _PC_FILESIZEBITS:
3249		*ap->a_retval = 64;
3250		break;
3251	case _PC_REC_INCR_XFER_SIZE:
3252		*ap->a_retval = min(nmp->nm_rsize, nmp->nm_wsize);
3253		break;
3254	case _PC_REC_MAX_XFER_SIZE:
3255		*ap->a_retval = -1; /* means ``unlimited'' */
3256		break;
3257	case _PC_REC_MIN_XFER_SIZE:
3258		*ap->a_retval = min(nmp->nm_rsize, nmp->nm_wsize);
3259		break;
3260	case _PC_REC_XFER_ALIGN:
3261		*ap->a_retval = PAGE_SIZE;
3262		break;
3263	case _PC_SYMLINK_MAX:
3264		*ap->a_retval = MAXPATHLEN;
3265		break;
3266	case _PC_2_SYMLINKS:
3267		*ap->a_retval = 1;
3268		break;
3269	case _PC_TIMESTAMP_RESOLUTION:
3270		*ap->a_retval = NFS_ISV3(ap->a_vp) ? 1 : 1000;
3271		break;
3272	default:
3273		error = EINVAL;
3274		break;
3275	}
3276
3277	return (error);
3278}
3279
3280/*
3281 * NFS advisory byte-level locks.
3282 */
3283int
3284nfs_advlock(void *v)
3285{
3286	struct vop_advlock_args *ap = v;
3287	struct nfsnode *np = VTONFS(ap->a_vp);
3288
3289	return (lf_advlock(&np->n_lockf, np->n_size, ap->a_id, ap->a_op,
3290	    ap->a_fl, ap->a_flags));
3291}
3292
3293/*
3294 * Print out the contents of an nfsnode.
3295 */
3296int
3297nfs_print(void *v)
3298{
3299	struct vop_print_args *ap = v;
3300	struct vnode *vp = ap->a_vp;
3301	struct nfsnode *np = VTONFS(vp);
3302
3303	printf("tag VT_NFS, fileid %lld fsid 0x%lx",
3304		np->n_vattr.va_fileid, np->n_vattr.va_fsid);
3305#ifdef FIFO
3306	if (vp->v_type == VFIFO)
3307		fifo_printinfo(vp);
3308#endif
3309	printf("\n");
3310	return (0);
3311}
3312
3313/*
3314 * Just call nfs_writebp() with the force argument set to 1.
3315 */
3316int
3317nfs_bwrite(void *v)
3318{
3319	struct vop_bwrite_args *ap = v;
3320
3321	return (nfs_writebp(ap->a_bp, 1));
3322}
3323
3324/*
3325 * This is a clone of vop_generic_bwrite(), except that B_WRITEINPROG isn't set unless
3326 * the force flag is one and it also handles the B_NEEDCOMMIT flag.
3327 */
3328int
3329nfs_writebp(struct buf *bp, int force)
3330{
3331	int oldflags = bp->b_flags, retv = 1;
3332	struct proc *p = curproc;	/* XXX */
3333	off_t off;
3334	size_t cnt;
3335	int   s;
3336	struct vnode *vp;
3337	struct nfsnode *np;
3338
3339	if(!(bp->b_flags & B_BUSY))
3340		panic("bwrite: buffer is not busy???");
3341
3342	vp = bp->b_vp;
3343	np = VTONFS(vp);
3344
3345	bp->b_flags &= ~(B_READ|B_DONE|B_ERROR);
3346
3347	s = splbio();
3348	buf_undirty(bp);
3349
3350	if ((oldflags & B_ASYNC) && !(oldflags & B_DELWRI) && p)
3351		++p->p_ru.ru_oublock;
3352
3353	bp->b_vp->v_numoutput++;
3354	splx(s);
3355
3356	/*
3357	 * If B_NEEDCOMMIT is set, a commit rpc may do the trick. If not
3358	 * an actual write will have to be scheduled via. VOP_STRATEGY().
3359	 * If B_WRITEINPROG is already set, then push it with a write anyhow.
3360	 */
3361	if ((oldflags & (B_NEEDCOMMIT | B_WRITEINPROG)) == B_NEEDCOMMIT) {
3362		off = ((u_quad_t)bp->b_blkno) * DEV_BSIZE + bp->b_dirtyoff;
3363		cnt = bp->b_dirtyend - bp->b_dirtyoff;
3364
3365		rw_enter_write(&np->n_commitlock);
3366		if (!(bp->b_flags & B_NEEDCOMMIT)) {
3367			rw_exit_write(&np->n_commitlock);
3368			return (0);
3369		}
3370
3371		/*
3372		 * If it's already been committed by somebody else,
3373		 * bail.
3374		 */
3375		if (!nfs_in_committed_range(vp, bp)) {
3376			int pushedrange = 0;
3377			/*
3378			 * Since we're going to do this, push as much
3379			 * as we can.
3380			 */
3381
3382			if (nfs_in_tobecommitted_range(vp, bp)) {
3383				pushedrange = 1;
3384				off = np->n_pushlo;
3385				cnt = np->n_pushhi - np->n_pushlo;
3386			}
3387
3388			bp->b_flags |= B_WRITEINPROG;
3389			bcstats.pendingwrites++;
3390			bcstats.numwrites++;
3391			retv = nfs_commit(bp->b_vp, off, cnt, curproc);
3392			bp->b_flags &= ~B_WRITEINPROG;
3393
3394			if (retv == 0) {
3395				if (pushedrange)
3396					nfs_merge_commit_ranges(vp);
3397				else
3398					nfs_add_committed_range(vp, bp);
3399			} else
3400				bcstats.pendingwrites--;
3401		} else
3402			retv = 0; /* It has already been committed. */
3403
3404		rw_exit_write(&np->n_commitlock);
3405		if (!retv) {
3406			bp->b_dirtyoff = bp->b_dirtyend = 0;
3407			bp->b_flags &= ~B_NEEDCOMMIT;
3408			s = splbio();
3409			biodone(bp);
3410			splx(s);
3411		} else if (retv == NFSERR_STALEWRITEVERF)
3412			nfs_clearcommit(bp->b_vp->v_mount);
3413	}
3414	if (retv) {
3415		s = splbio();
3416		buf_flip_dma(bp);
3417		if (force)
3418			bp->b_flags |= B_WRITEINPROG;
3419		splx(s);
3420		VOP_STRATEGY(bp->b_vp, bp);
3421	}
3422
3423	if( (oldflags & B_ASYNC) == 0) {
3424		int rtval;
3425
3426		bp->b_flags |= B_RAW;
3427		rtval = biowait(bp);
3428		if (!(oldflags & B_DELWRI) && p) {
3429			++p->p_ru.ru_oublock;
3430		}
3431		brelse(bp);
3432		return (rtval);
3433	}
3434
3435	return (0);
3436}
3437
3438/*
3439 * nfs special file access vnode op.
3440 * Essentially just get vattr and then imitate iaccess() since the device is
3441 * local to the client.
3442 */
3443int
3444nfsspec_access(void *v)
3445{
3446	struct vop_access_args *ap = v;
3447	struct vattr va;
3448	struct vnode *vp = ap->a_vp;
3449	int error;
3450
3451	/*
3452	 * Disallow write attempts on filesystems mounted read-only;
3453	 * unless the file is a socket, fifo, or a block or character
3454	 * device resident on the filesystem.
3455	 */
3456	if ((ap->a_mode & VWRITE) && (vp->v_mount->mnt_flag & MNT_RDONLY)) {
3457		switch (vp->v_type) {
3458		case VREG:
3459		case VDIR:
3460		case VLNK:
3461			return (EROFS);
3462		default:
3463			break;
3464		}
3465	}
3466
3467	error = VOP_GETATTR(vp, &va, ap->a_cred, ap->a_p);
3468	if (error)
3469		return (error);
3470
3471	return (vaccess(vp->v_type, va.va_mode, va.va_uid, va.va_gid,
3472	    ap->a_mode, ap->a_cred));
3473}
3474
3475/*
3476 * Read wrapper for special devices.
3477 */
3478int
3479nfsspec_read(void *v)
3480{
3481	struct vop_read_args *ap = v;
3482	struct nfsnode *np = VTONFS(ap->a_vp);
3483
3484	/*
3485	 * Set access flag.
3486	 */
3487	np->n_flag |= NACC;
3488	getnanotime(&np->n_atim);
3489	return (spec_read(ap));
3490}
3491
3492/*
3493 * Write wrapper for special devices.
3494 */
3495int
3496nfsspec_write(void *v)
3497{
3498	struct vop_write_args *ap = v;
3499	struct nfsnode *np = VTONFS(ap->a_vp);
3500
3501	/*
3502	 * Set update flag.
3503	 */
3504	np->n_flag |= NUPD;
3505	getnanotime(&np->n_mtim);
3506	return (spec_write(ap));
3507}
3508
3509/*
3510 * Close wrapper for special devices.
3511 *
3512 * Update the times on the nfsnode then do device close.
3513 */
3514int
3515nfsspec_close(void *v)
3516{
3517	struct vop_close_args *ap = v;
3518	struct vnode *vp = ap->a_vp;
3519	struct nfsnode *np = VTONFS(vp);
3520	struct vattr vattr;
3521
3522	if (np->n_flag & (NACC | NUPD)) {
3523		np->n_flag |= NCHG;
3524		if (vp->v_usecount == 1 &&
3525		    (vp->v_mount->mnt_flag & MNT_RDONLY) == 0) {
3526			VATTR_NULL(&vattr);
3527			if (np->n_flag & NACC)
3528				vattr.va_atime = np->n_atim;
3529			if (np->n_flag & NUPD)
3530				vattr.va_mtime = np->n_mtim;
3531			(void)VOP_SETATTR(vp, &vattr, ap->a_cred, ap->a_p);
3532		}
3533	}
3534	return (spec_close(ap));
3535}
3536
3537#ifdef FIFO
3538/*
3539 * Read wrapper for fifos.
3540 */
3541int
3542nfsfifo_read(void *v)
3543{
3544	struct vop_read_args *ap = v;
3545	struct nfsnode *np = VTONFS(ap->a_vp);
3546
3547	/*
3548	 * Set access flag.
3549	 */
3550	np->n_flag |= NACC;
3551	getnanotime(&np->n_atim);
3552	return (fifo_read(ap));
3553}
3554
3555/*
3556 * Write wrapper for fifos.
3557 */
3558int
3559nfsfifo_write(void *v)
3560{
3561	struct vop_write_args *ap = v;
3562	struct nfsnode *np = VTONFS(ap->a_vp);
3563
3564	/*
3565	 * Set update flag.
3566	 */
3567	np->n_flag |= NUPD;
3568	getnanotime(&np->n_mtim);
3569	return (fifo_write(ap));
3570}
3571
3572/*
3573 * Close wrapper for fifos.
3574 *
3575 * Update the times on the nfsnode then do fifo close.
3576 */
3577int
3578nfsfifo_close(void *v)
3579{
3580	struct vop_close_args *ap = v;
3581	struct vnode *vp = ap->a_vp;
3582	struct nfsnode *np = VTONFS(vp);
3583	struct vattr vattr;
3584
3585	if (np->n_flag & (NACC | NUPD)) {
3586		if (np->n_flag & NACC) {
3587			getnanotime(&np->n_atim);
3588		}
3589		if (np->n_flag & NUPD) {
3590			getnanotime(&np->n_mtim);
3591		}
3592		np->n_flag |= NCHG;
3593		if (vp->v_usecount == 1 &&
3594		    (vp->v_mount->mnt_flag & MNT_RDONLY) == 0) {
3595			VATTR_NULL(&vattr);
3596			if (np->n_flag & NACC)
3597				vattr.va_atime = np->n_atim;
3598			if (np->n_flag & NUPD)
3599				vattr.va_mtime = np->n_mtim;
3600			(void)VOP_SETATTR(vp, &vattr, ap->a_cred, ap->a_p);
3601		}
3602	}
3603	return (fifo_close(ap));
3604}
3605
3606int
3607nfsfifo_reclaim(void *v)
3608{
3609	fifo_reclaim(v);
3610	return (nfs_reclaim(v));
3611}
3612#endif /* ! FIFO */
3613