nfs_subs.c revision 177599
1/*-
2 * Copyright (c) 1989, 1993
3 *	The Regents of the University of California.  All rights reserved.
4 *
5 * This code is derived from software contributed to Berkeley by
6 * Rick Macklem at The University of Guelph.
7 *
8 * Redistribution and use in source and binary forms, with or without
9 * modification, are permitted provided that the following conditions
10 * are met:
11 * 1. Redistributions of source code must retain the above copyright
12 *    notice, this list of conditions and the following disclaimer.
13 * 2. Redistributions in binary form must reproduce the above copyright
14 *    notice, this list of conditions and the following disclaimer in the
15 *    documentation and/or other materials provided with the distribution.
16 * 4. Neither the name of the University nor the names of its contributors
17 *    may be used to endorse or promote products derived from this software
18 *    without specific prior written permission.
19 *
20 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
21 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
22 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
23 * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
24 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
25 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
26 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
27 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
28 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
29 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
30 * SUCH DAMAGE.
31 *
32 *	@(#)nfs_subs.c  8.8 (Berkeley) 5/22/95
33 */
34
35#include <sys/cdefs.h>
36__FBSDID("$FreeBSD: head/sys/nfsclient/nfs_subs.c 177599 2008-03-25 09:39:02Z ru $");
37
38/*
39 * These functions support the macros and help fiddle mbuf chains for
40 * the nfs op functions. They do things like create the rpc header and
41 * copy data between mbuf chains and uio lists.
42 */
43
44#include <sys/param.h>
45#include <sys/systm.h>
46#include <sys/kernel.h>
47#include <sys/bio.h>
48#include <sys/buf.h>
49#include <sys/proc.h>
50#include <sys/mount.h>
51#include <sys/vnode.h>
52#include <sys/namei.h>
53#include <sys/mbuf.h>
54#include <sys/socket.h>
55#include <sys/stat.h>
56#include <sys/malloc.h>
57#include <sys/sysent.h>
58#include <sys/syscall.h>
59#include <sys/sysproto.h>
60
61#include <vm/vm.h>
62#include <vm/vm_object.h>
63#include <vm/vm_extern.h>
64#include <vm/uma.h>
65
66#include <rpc/rpcclnt.h>
67
68#include <nfs/rpcv2.h>
69#include <nfs/nfsproto.h>
70#include <nfsclient/nfs.h>
71#include <nfsclient/nfsnode.h>
72#include <nfs/xdr_subs.h>
73#include <nfsclient/nfsm_subs.h>
74#include <nfsclient/nfsmount.h>
75
76#include <netinet/in.h>
77
78/*
79 * Note that stdarg.h and the ANSI style va_start macro is used for both
80 * ANSI and traditional C compilers.
81 */
82#include <machine/stdarg.h>
83
84/*
85 * Data items converted to xdr at startup, since they are constant
86 * This is kinda hokey, but may save a little time doing byte swaps
87 */
88u_int32_t	nfs_xdrneg1;
89u_int32_t	rpc_call, rpc_vers, rpc_reply, rpc_msgdenied, rpc_autherr,
90		    rpc_mismatch, rpc_auth_unix, rpc_msgaccepted;
91u_int32_t	nfs_true, nfs_false;
92
93/* And other global data */
94static u_int32_t nfs_xid = 0;
95static enum vtype nv2tov_type[8]= {
96	VNON, VREG, VDIR, VBLK, VCHR, VLNK, VNON,  VNON
97};
98
99int		nfs_ticks;
100int		nfs_pbuf_freecnt = -1;	/* start out unlimited */
101
102struct nfs_reqq	nfs_reqq;
103struct mtx nfs_reqq_mtx;
104struct nfs_bufq	nfs_bufq;
105static struct mtx nfs_xid_mtx;
106
107/*
108 * and the reverse mapping from generic to Version 2 procedure numbers
109 */
110int nfsv2_procid[NFS_NPROCS] = {
111	NFSV2PROC_NULL,
112	NFSV2PROC_GETATTR,
113	NFSV2PROC_SETATTR,
114	NFSV2PROC_LOOKUP,
115	NFSV2PROC_NOOP,
116	NFSV2PROC_READLINK,
117	NFSV2PROC_READ,
118	NFSV2PROC_WRITE,
119	NFSV2PROC_CREATE,
120	NFSV2PROC_MKDIR,
121	NFSV2PROC_SYMLINK,
122	NFSV2PROC_CREATE,
123	NFSV2PROC_REMOVE,
124	NFSV2PROC_RMDIR,
125	NFSV2PROC_RENAME,
126	NFSV2PROC_LINK,
127	NFSV2PROC_READDIR,
128	NFSV2PROC_NOOP,
129	NFSV2PROC_STATFS,
130	NFSV2PROC_NOOP,
131	NFSV2PROC_NOOP,
132	NFSV2PROC_NOOP,
133	NFSV2PROC_NOOP,
134};
135
136LIST_HEAD(nfsnodehashhead, nfsnode);
137
138u_int32_t
139nfs_xid_gen(void)
140{
141	uint32_t xid;
142
143	mtx_lock(&nfs_xid_mtx);
144
145	/* Get a pretty random xid to start with */
146	if (!nfs_xid)
147		nfs_xid = random();
148	/*
149	 * Skip zero xid if it should ever happen.
150	 */
151	if (++nfs_xid == 0)
152		nfs_xid++;
153	xid = nfs_xid;
154	mtx_unlock(&nfs_xid_mtx);
155	return xid;
156}
157
158/*
159 * Create the header for an rpc request packet
160 * The hsiz is the size of the rest of the nfs request header.
161 * (just used to decide if a cluster is a good idea)
162 */
163struct mbuf *
164nfsm_reqhead(struct vnode *vp, u_long procid, int hsiz)
165{
166	struct mbuf *mb;
167
168	MGET(mb, M_WAIT, MT_DATA);
169	if (hsiz >= MINCLSIZE)
170		MCLGET(mb, M_WAIT);
171	mb->m_len = 0;
172	return (mb);
173}
174
175/*
176 * Build the RPC header and fill in the authorization info.
177 * The authorization string argument is only used when the credentials
178 * come from outside of the kernel.
179 * Returns the head of the mbuf list.
180 */
181struct mbuf *
182nfsm_rpchead(struct ucred *cr, int nmflag, int procid, int auth_type,
183    int auth_len, struct mbuf *mrest, int mrest_len, struct mbuf **mbp,
184    u_int32_t **xidpp)
185{
186	struct mbuf *mb;
187	u_int32_t *tl;
188	caddr_t bpos;
189	int i;
190	struct mbuf *mreq;
191	int grpsiz, authsiz;
192
193	authsiz = nfsm_rndup(auth_len);
194	MGETHDR(mb, M_WAIT, MT_DATA);
195	if ((authsiz + 10 * NFSX_UNSIGNED) >= MINCLSIZE) {
196		MCLGET(mb, M_WAIT);
197	} else if ((authsiz + 10 * NFSX_UNSIGNED) < MHLEN) {
198		MH_ALIGN(mb, authsiz + 10 * NFSX_UNSIGNED);
199	} else {
200		MH_ALIGN(mb, 8 * NFSX_UNSIGNED);
201	}
202	mb->m_len = 0;
203	mreq = mb;
204	bpos = mtod(mb, caddr_t);
205
206	/*
207	 * First the RPC header.
208	 */
209	tl = nfsm_build(u_int32_t *, 8 * NFSX_UNSIGNED);
210
211	*xidpp = tl;
212	*tl++ = txdr_unsigned(nfs_xid_gen());
213	*tl++ = rpc_call;
214	*tl++ = rpc_vers;
215	*tl++ = txdr_unsigned(NFS_PROG);
216	if (nmflag & NFSMNT_NFSV3) {
217		*tl++ = txdr_unsigned(NFS_VER3);
218		*tl++ = txdr_unsigned(procid);
219	} else {
220		*tl++ = txdr_unsigned(NFS_VER2);
221		*tl++ = txdr_unsigned(nfsv2_procid[procid]);
222	}
223
224	/*
225	 * And then the authorization cred.
226	 */
227	*tl++ = txdr_unsigned(auth_type);
228	*tl = txdr_unsigned(authsiz);
229	switch (auth_type) {
230	case RPCAUTH_UNIX:
231		tl = nfsm_build(u_int32_t *, auth_len);
232		*tl++ = 0;		/* stamp ?? */
233		*tl++ = 0;		/* NULL hostname */
234		*tl++ = txdr_unsigned(cr->cr_uid);
235		*tl++ = txdr_unsigned(cr->cr_groups[0]);
236		grpsiz = (auth_len >> 2) - 5;
237		*tl++ = txdr_unsigned(grpsiz);
238		for (i = 1; i <= grpsiz; i++)
239			*tl++ = txdr_unsigned(cr->cr_groups[i]);
240		break;
241	}
242
243	/*
244	 * And the verifier...
245	 */
246	tl = nfsm_build(u_int32_t *, 2 * NFSX_UNSIGNED);
247	*tl++ = txdr_unsigned(RPCAUTH_NULL);
248	*tl = 0;
249	mb->m_next = mrest;
250	mreq->m_pkthdr.len = authsiz + 10 * NFSX_UNSIGNED + mrest_len;
251	mreq->m_pkthdr.rcvif = NULL;
252	*mbp = mb;
253	return (mreq);
254}
255
256/*
257 * copies a uio scatter/gather list to an mbuf chain.
258 * NOTE: can ony handle iovcnt == 1
259 */
260int
261nfsm_uiotombuf(struct uio *uiop, struct mbuf **mq, int siz, caddr_t *bpos)
262{
263	char *uiocp;
264	struct mbuf *mp, *mp2;
265	int xfer, left, mlen;
266	int uiosiz, clflg, rem;
267	char *cp;
268
269#ifdef DIAGNOSTIC
270	if (uiop->uio_iovcnt != 1)
271		panic("nfsm_uiotombuf: iovcnt != 1");
272#endif
273
274	if (siz > MLEN)		/* or should it >= MCLBYTES ?? */
275		clflg = 1;
276	else
277		clflg = 0;
278	rem = nfsm_rndup(siz)-siz;
279	mp = mp2 = *mq;
280	while (siz > 0) {
281		left = uiop->uio_iov->iov_len;
282		uiocp = uiop->uio_iov->iov_base;
283		if (left > siz)
284			left = siz;
285		uiosiz = left;
286		while (left > 0) {
287			mlen = M_TRAILINGSPACE(mp);
288			if (mlen == 0) {
289				MGET(mp, M_WAIT, MT_DATA);
290				if (clflg)
291					MCLGET(mp, M_WAIT);
292				mp->m_len = 0;
293				mp2->m_next = mp;
294				mp2 = mp;
295				mlen = M_TRAILINGSPACE(mp);
296			}
297			xfer = (left > mlen) ? mlen : left;
298#ifdef notdef
299			/* Not Yet.. */
300			if (uiop->uio_iov->iov_op != NULL)
301				(*(uiop->uio_iov->iov_op))
302				(uiocp, mtod(mp, caddr_t)+mp->m_len, xfer);
303			else
304#endif
305			if (uiop->uio_segflg == UIO_SYSSPACE)
306				bcopy(uiocp, mtod(mp, caddr_t)+mp->m_len, xfer);
307			else
308				copyin(uiocp, mtod(mp, caddr_t)+mp->m_len, xfer);
309			mp->m_len += xfer;
310			left -= xfer;
311			uiocp += xfer;
312			uiop->uio_offset += xfer;
313			uiop->uio_resid -= xfer;
314		}
315		uiop->uio_iov->iov_base =
316		    (char *)uiop->uio_iov->iov_base + uiosiz;
317		uiop->uio_iov->iov_len -= uiosiz;
318		siz -= uiosiz;
319	}
320	if (rem > 0) {
321		if (rem > M_TRAILINGSPACE(mp)) {
322			MGET(mp, M_WAIT, MT_DATA);
323			mp->m_len = 0;
324			mp2->m_next = mp;
325		}
326		cp = mtod(mp, caddr_t)+mp->m_len;
327		for (left = 0; left < rem; left++)
328			*cp++ = '\0';
329		mp->m_len += rem;
330		*bpos = cp;
331	} else
332		*bpos = mtod(mp, caddr_t)+mp->m_len;
333	*mq = mp;
334	return (0);
335}
336
337/*
338 * Copy a string into mbufs for the hard cases...
339 */
340int
341nfsm_strtmbuf(struct mbuf **mb, char **bpos, const char *cp, long siz)
342{
343	struct mbuf *m1 = NULL, *m2;
344	long left, xfer, len, tlen;
345	u_int32_t *tl;
346	int putsize;
347
348	putsize = 1;
349	m2 = *mb;
350	left = M_TRAILINGSPACE(m2);
351	if (left > 0) {
352		tl = ((u_int32_t *)(*bpos));
353		*tl++ = txdr_unsigned(siz);
354		putsize = 0;
355		left -= NFSX_UNSIGNED;
356		m2->m_len += NFSX_UNSIGNED;
357		if (left > 0) {
358			bcopy(cp, (caddr_t) tl, left);
359			siz -= left;
360			cp += left;
361			m2->m_len += left;
362			left = 0;
363		}
364	}
365	/* Loop around adding mbufs */
366	while (siz > 0) {
367		MGET(m1, M_WAIT, MT_DATA);
368		if (siz > MLEN)
369			MCLGET(m1, M_WAIT);
370		m1->m_len = NFSMSIZ(m1);
371		m2->m_next = m1;
372		m2 = m1;
373		tl = mtod(m1, u_int32_t *);
374		tlen = 0;
375		if (putsize) {
376			*tl++ = txdr_unsigned(siz);
377			m1->m_len -= NFSX_UNSIGNED;
378			tlen = NFSX_UNSIGNED;
379			putsize = 0;
380		}
381		if (siz < m1->m_len) {
382			len = nfsm_rndup(siz);
383			xfer = siz;
384			if (xfer < len)
385				*(tl+(xfer>>2)) = 0;
386		} else {
387			xfer = len = m1->m_len;
388		}
389		bcopy(cp, (caddr_t) tl, xfer);
390		m1->m_len = len+tlen;
391		siz -= xfer;
392		cp += xfer;
393	}
394	*mb = m1;
395	*bpos = mtod(m1, caddr_t)+m1->m_len;
396	return (0);
397}
398
399/*
400 * Called once to initialize data structures...
401 */
402int
403nfs_init(struct vfsconf *vfsp)
404{
405	int i;
406
407	nfsmount_zone = uma_zcreate("NFSMOUNT", sizeof(struct nfsmount),
408	    NULL, NULL, NULL, NULL, UMA_ALIGN_PTR, 0);
409	rpc_vers = txdr_unsigned(RPC_VER2);
410	rpc_call = txdr_unsigned(RPC_CALL);
411	rpc_reply = txdr_unsigned(RPC_REPLY);
412	rpc_msgdenied = txdr_unsigned(RPC_MSGDENIED);
413	rpc_msgaccepted = txdr_unsigned(RPC_MSGACCEPTED);
414	rpc_mismatch = txdr_unsigned(RPC_MISMATCH);
415	rpc_autherr = txdr_unsigned(RPC_AUTHERR);
416	rpc_auth_unix = txdr_unsigned(RPCAUTH_UNIX);
417	nfs_true = txdr_unsigned(TRUE);
418	nfs_false = txdr_unsigned(FALSE);
419	nfs_xdrneg1 = txdr_unsigned(-1);
420	nfs_ticks = (hz * NFS_TICKINTVL + 500) / 1000;
421	if (nfs_ticks < 1)
422		nfs_ticks = 1;
423	/* Ensure async daemons disabled */
424	for (i = 0; i < NFS_MAXASYNCDAEMON; i++) {
425		nfs_iodwant[i] = NULL;
426		nfs_iodmount[i] = NULL;
427	}
428	nfs_nhinit();			/* Init the nfsnode table */
429
430	/*
431	 * Initialize reply list and start timer
432	 */
433	TAILQ_INIT(&nfs_reqq);
434	callout_init(&nfs_callout, CALLOUT_MPSAFE);
435	mtx_init(&nfs_reqq_mtx, "NFS reqq lock", NULL, MTX_DEF);
436	mtx_init(&nfs_iod_mtx, "NFS iod lock", NULL, MTX_DEF);
437	mtx_init(&nfs_xid_mtx, "NFS xid lock", NULL, MTX_DEF);
438
439	nfs_pbuf_freecnt = nswbuf / 2 + 1;
440
441	return (0);
442}
443
444int
445nfs_uninit(struct vfsconf *vfsp)
446{
447	int i;
448
449	callout_stop(&nfs_callout);
450
451	KASSERT(TAILQ_EMPTY(&nfs_reqq),
452	    ("nfs_uninit: request queue not empty"));
453
454	/*
455	 * Tell all nfsiod processes to exit. Clear nfs_iodmax, and wakeup
456	 * any sleeping nfsiods so they check nfs_iodmax and exit.
457	 */
458	mtx_lock(&nfs_iod_mtx);
459	nfs_iodmax = 0;
460	for (i = 0; i < nfs_numasync; i++)
461		if (nfs_iodwant[i])
462			wakeup(&nfs_iodwant[i]);
463	/* The last nfsiod to exit will wake us up when nfs_numasync hits 0 */
464	while (nfs_numasync)
465		msleep(&nfs_numasync, &nfs_iod_mtx, PWAIT, "ioddie", 0);
466	mtx_unlock(&nfs_iod_mtx);
467	nfs_nhuninit();
468	uma_zdestroy(nfsmount_zone);
469	return (0);
470}
471
472void
473nfs_dircookie_lock(struct nfsnode *np)
474{
475	mtx_lock(&np->n_mtx);
476	while (np->n_flag & NDIRCOOKIELK)
477		(void) msleep(&np->n_flag, &np->n_mtx, PZERO, "nfsdirlk", 0);
478	np->n_flag |= NDIRCOOKIELK;
479	mtx_unlock(&np->n_mtx);
480}
481
482void
483nfs_dircookie_unlock(struct nfsnode *np)
484{
485	mtx_lock(&np->n_mtx);
486	np->n_flag &= ~NDIRCOOKIELK;
487	wakeup(&np->n_flag);
488	mtx_unlock(&np->n_mtx);
489}
490
491int
492nfs_upgrade_vnlock(struct vnode *vp)
493{
494	int old_lock;
495
496 	if ((old_lock = VOP_ISLOCKED(vp)) != LK_EXCLUSIVE) {
497 		if (old_lock == LK_SHARED) {
498 			/* Upgrade to exclusive lock, this might block */
499 			vn_lock(vp, LK_UPGRADE | LK_RETRY);
500 		} else {
501 			vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
502 		}
503  	}
504	return old_lock;
505}
506
507void
508nfs_downgrade_vnlock(struct vnode *vp, int old_lock)
509{
510	if (old_lock != LK_EXCLUSIVE) {
511 		if (old_lock == LK_SHARED) {
512 			/* Downgrade from exclusive lock, this might block */
513 			vn_lock(vp, LK_DOWNGRADE);
514 		} else {
515 			VOP_UNLOCK(vp, 0);
516 		}
517  	}
518}
519
520void
521nfs_printf(const char *fmt, ...)
522{
523	va_list ap;
524
525	mtx_lock(&Giant);
526	va_start(ap, fmt);
527	printf(fmt, ap);
528	va_end(ap);
529	mtx_unlock(&Giant);
530}
531
532/*
533 * Attribute cache routines.
534 * nfs_loadattrcache() - loads or updates the cache contents from attributes
535 *	that are on the mbuf list
536 * nfs_getattrcache() - returns valid attributes if found in cache, returns
537 *	error otherwise
538 */
539
540/*
541 * Load the attribute cache (that lives in the nfsnode entry) with
542 * the values on the mbuf list and
543 * Iff vap not NULL
544 *    copy the attributes to *vaper
545 */
546int
547nfs_loadattrcache(struct vnode **vpp, struct mbuf **mdp, caddr_t *dposp,
548		  struct vattr *vaper, int dontshrink)
549{
550	struct vnode *vp = *vpp;
551	struct vattr *vap;
552	struct nfs_fattr *fp;
553	struct nfsnode *np;
554	int32_t t1;
555	caddr_t cp2;
556	int rdev;
557	struct mbuf *md;
558	enum vtype vtyp;
559	u_short vmode;
560	struct timespec mtime, mtime_save;
561	int v3 = NFS_ISV3(vp);
562	struct thread *td = curthread;
563
564	md = *mdp;
565	t1 = (mtod(md, caddr_t) + md->m_len) - *dposp;
566	cp2 = nfsm_disct(mdp, dposp, NFSX_FATTR(v3), t1, M_WAIT);
567	if (cp2 == NULL)
568		return EBADRPC;
569	fp = (struct nfs_fattr *)cp2;
570	if (v3) {
571		vtyp = nfsv3tov_type(fp->fa_type);
572		vmode = fxdr_unsigned(u_short, fp->fa_mode);
573		rdev = makedev(fxdr_unsigned(int, fp->fa3_rdev.specdata1),
574			fxdr_unsigned(int, fp->fa3_rdev.specdata2));
575		fxdr_nfsv3time(&fp->fa3_mtime, &mtime);
576	} else {
577		vtyp = nfsv2tov_type(fp->fa_type);
578		vmode = fxdr_unsigned(u_short, fp->fa_mode);
579		/*
580		 * XXX
581		 *
582		 * The duplicate information returned in fa_type and fa_mode
583		 * is an ambiguity in the NFS version 2 protocol.
584		 *
585		 * VREG should be taken literally as a regular file.  If a
586		 * server intents to return some type information differently
587		 * in the upper bits of the mode field (e.g. for sockets, or
588		 * FIFOs), NFSv2 mandates fa_type to be VNON.  Anyway, we
589		 * leave the examination of the mode bits even in the VREG
590		 * case to avoid breakage for bogus servers, but we make sure
591		 * that there are actually type bits set in the upper part of
592		 * fa_mode (and failing that, trust the va_type field).
593		 *
594		 * NFSv3 cleared the issue, and requires fa_mode to not
595		 * contain any type information (while also introduing sockets
596		 * and FIFOs for fa_type).
597		 */
598		if (vtyp == VNON || (vtyp == VREG && (vmode & S_IFMT) != 0))
599			vtyp = IFTOVT(vmode);
600		rdev = fxdr_unsigned(int32_t, fp->fa2_rdev);
601		fxdr_nfsv2time(&fp->fa2_mtime, &mtime);
602
603		/*
604		 * Really ugly NFSv2 kludge.
605		 */
606		if (vtyp == VCHR && rdev == 0xffffffff)
607			vtyp = VFIFO;
608	}
609
610	/*
611	 * If v_type == VNON it is a new node, so fill in the v_type,
612	 * n_mtime fields. Check to see if it represents a special
613	 * device, and if so, check for a possible alias. Once the
614	 * correct vnode has been obtained, fill in the rest of the
615	 * information.
616	 */
617	np = VTONFS(vp);
618	mtx_lock(&np->n_mtx);
619	if (vp->v_type != vtyp) {
620		vp->v_type = vtyp;
621		if (vp->v_type == VFIFO)
622			vp->v_op = &nfs_fifoops;
623		np->n_mtime = mtime;
624	}
625	vap = &np->n_vattr;
626	vap->va_type = vtyp;
627	vap->va_mode = (vmode & 07777);
628	vap->va_rdev = rdev;
629	mtime_save = vap->va_mtime;
630	vap->va_mtime = mtime;
631	vap->va_fsid = vp->v_mount->mnt_stat.f_fsid.val[0];
632	if (v3) {
633		vap->va_nlink = fxdr_unsigned(u_short, fp->fa_nlink);
634		vap->va_uid = fxdr_unsigned(uid_t, fp->fa_uid);
635		vap->va_gid = fxdr_unsigned(gid_t, fp->fa_gid);
636		vap->va_size = fxdr_hyper(&fp->fa3_size);
637		vap->va_blocksize = NFS_FABLKSIZE;
638		vap->va_bytes = fxdr_hyper(&fp->fa3_used);
639		vap->va_fileid = fxdr_unsigned(int32_t,
640		    fp->fa3_fileid.nfsuquad[1]);
641		fxdr_nfsv3time(&fp->fa3_atime, &vap->va_atime);
642		fxdr_nfsv3time(&fp->fa3_ctime, &vap->va_ctime);
643		vap->va_flags = 0;
644		vap->va_filerev = 0;
645	} else {
646		vap->va_nlink = fxdr_unsigned(u_short, fp->fa_nlink);
647		vap->va_uid = fxdr_unsigned(uid_t, fp->fa_uid);
648		vap->va_gid = fxdr_unsigned(gid_t, fp->fa_gid);
649		vap->va_size = fxdr_unsigned(u_int32_t, fp->fa2_size);
650		vap->va_blocksize = fxdr_unsigned(int32_t, fp->fa2_blocksize);
651		vap->va_bytes = (u_quad_t)fxdr_unsigned(int32_t, fp->fa2_blocks)
652		    * NFS_FABLKSIZE;
653		vap->va_fileid = fxdr_unsigned(int32_t, fp->fa2_fileid);
654		fxdr_nfsv2time(&fp->fa2_atime, &vap->va_atime);
655		vap->va_flags = 0;
656		vap->va_ctime.tv_sec = fxdr_unsigned(u_int32_t,
657		    fp->fa2_ctime.nfsv2_sec);
658		vap->va_ctime.tv_nsec = 0;
659		vap->va_gen = fxdr_unsigned(u_int32_t, fp->fa2_ctime.nfsv2_usec);
660		vap->va_filerev = 0;
661	}
662	np->n_attrstamp = time_second;
663	/* Timestamp the NFS otw getattr fetch */
664	if (td->td_proc) {
665		np->n_ac_ts_tid = td->td_tid;
666		np->n_ac_ts_pid = td->td_proc->p_pid;
667		np->n_ac_ts_syscalls = td->td_syscalls;
668	} else
669		bzero(&np->n_ac_ts, sizeof(struct nfs_attrcache_timestamp));
670
671	if (vap->va_size != np->n_size) {
672		if (vap->va_type == VREG) {
673			if (dontshrink && vap->va_size < np->n_size) {
674				/*
675				 * We've been told not to shrink the file;
676				 * zero np->n_attrstamp to indicate that
677				 * the attributes are stale.
678				 */
679				vap->va_size = np->n_size;
680				np->n_attrstamp = 0;
681			} else if (np->n_flag & NMODIFIED) {
682				/*
683				 * We've modified the file: Use the larger
684				 * of our size, and the server's size.
685				 */
686				if (vap->va_size < np->n_size) {
687					vap->va_size = np->n_size;
688				} else {
689					np->n_size = vap->va_size;
690					np->n_flag |= NSIZECHANGED;
691				}
692			} else {
693				np->n_size = vap->va_size;
694				np->n_flag |= NSIZECHANGED;
695			}
696			vnode_pager_setsize(vp, np->n_size);
697		} else {
698			np->n_size = vap->va_size;
699		}
700	}
701	/*
702	 * The following checks are added to prevent a race between (say)
703	 * a READDIR+ and a WRITE.
704	 * READDIR+, WRITE requests sent out.
705	 * READDIR+ resp, WRITE resp received on client.
706	 * However, the WRITE resp was handled before the READDIR+ resp
707	 * causing the post op attrs from the write to be loaded first
708	 * and the attrs from the READDIR+ to be loaded later. If this
709	 * happens, we have stale attrs loaded into the attrcache.
710	 * We detect this by for the mtime moving back. We invalidate the
711	 * attrcache when this happens.
712	 */
713	if (timespeccmp(&mtime_save, &vap->va_mtime, >))
714		/* Size changed or mtime went backwards */
715		np->n_attrstamp = 0;
716	if (vaper != NULL) {
717		bcopy((caddr_t)vap, (caddr_t)vaper, sizeof(*vap));
718		if (np->n_flag & NCHG) {
719			if (np->n_flag & NACC)
720				vaper->va_atime = np->n_atim;
721			if (np->n_flag & NUPD)
722				vaper->va_mtime = np->n_mtim;
723		}
724	}
725	mtx_unlock(&np->n_mtx);
726	return (0);
727}
728
729#ifdef NFS_ACDEBUG
730#include <sys/sysctl.h>
731SYSCTL_DECL(_vfs_nfs);
732static int nfs_acdebug;
733SYSCTL_INT(_vfs_nfs, OID_AUTO, acdebug, CTLFLAG_RW, &nfs_acdebug, 0, "");
734#endif
735
736/*
737 * Check the time stamp
738 * If the cache is valid, copy contents to *vap and return 0
739 * otherwise return an error
740 */
741int
742nfs_getattrcache(struct vnode *vp, struct vattr *vaper)
743{
744	struct nfsnode *np;
745	struct vattr *vap;
746	struct nfsmount *nmp;
747	int timeo;
748
749	np = VTONFS(vp);
750	vap = &np->n_vattr;
751	nmp = VFSTONFS(vp->v_mount);
752#ifdef NFS_ACDEBUG
753	mtx_lock(&Giant);	/* nfs_printf() */
754#endif
755	mtx_lock(&np->n_mtx);
756	/* XXX n_mtime doesn't seem to be updated on a miss-and-reload */
757	timeo = (time_second - np->n_mtime.tv_sec) / 10;
758
759#ifdef NFS_ACDEBUG
760	if (nfs_acdebug>1)
761		nfs_printf("nfs_getattrcache: initial timeo = %d\n", timeo);
762#endif
763
764	if (vap->va_type == VDIR) {
765		if ((np->n_flag & NMODIFIED) || timeo < nmp->nm_acdirmin)
766			timeo = nmp->nm_acdirmin;
767		else if (timeo > nmp->nm_acdirmax)
768			timeo = nmp->nm_acdirmax;
769	} else {
770		if ((np->n_flag & NMODIFIED) || timeo < nmp->nm_acregmin)
771			timeo = nmp->nm_acregmin;
772		else if (timeo > nmp->nm_acregmax)
773			timeo = nmp->nm_acregmax;
774	}
775
776#ifdef NFS_ACDEBUG
777	if (nfs_acdebug > 2)
778		nfs_printf("acregmin %d; acregmax %d; acdirmin %d; acdirmax %d\n",
779			   nmp->nm_acregmin, nmp->nm_acregmax,
780			   nmp->nm_acdirmin, nmp->nm_acdirmax);
781
782	if (nfs_acdebug)
783		nfs_printf("nfs_getattrcache: age = %d; final timeo = %d\n",
784			   (time_second - np->n_attrstamp), timeo);
785#endif
786
787	if ((time_second - np->n_attrstamp) >= timeo) {
788		nfsstats.attrcache_misses++;
789		mtx_unlock(&np->n_mtx);
790		return( ENOENT);
791	}
792	nfsstats.attrcache_hits++;
793	if (vap->va_size != np->n_size) {
794		if (vap->va_type == VREG) {
795			if (np->n_flag & NMODIFIED) {
796				if (vap->va_size < np->n_size)
797					vap->va_size = np->n_size;
798				else
799					np->n_size = vap->va_size;
800			} else {
801				np->n_size = vap->va_size;
802			}
803			vnode_pager_setsize(vp, np->n_size);
804		} else {
805			np->n_size = vap->va_size;
806		}
807	}
808	bcopy((caddr_t)vap, (caddr_t)vaper, sizeof(struct vattr));
809	if (np->n_flag & NCHG) {
810		if (np->n_flag & NACC)
811			vaper->va_atime = np->n_atim;
812		if (np->n_flag & NUPD)
813			vaper->va_mtime = np->n_mtim;
814	}
815	mtx_unlock(&np->n_mtx);
816#ifdef NFS_ACDEBUG
817	mtx_unlock(&Giant);	/* nfs_printf() */
818#endif
819	return (0);
820}
821
822static nfsuint64 nfs_nullcookie = { { 0, 0 } };
823/*
824 * This function finds the directory cookie that corresponds to the
825 * logical byte offset given.
826 */
827nfsuint64 *
828nfs_getcookie(struct nfsnode *np, off_t off, int add)
829{
830	struct nfsdmap *dp, *dp2;
831	int pos;
832	nfsuint64 *retval = NULL;
833
834	pos = (uoff_t)off / NFS_DIRBLKSIZ;
835	if (pos == 0 || off < 0) {
836#ifdef DIAGNOSTIC
837		if (add)
838			panic("nfs getcookie add at <= 0");
839#endif
840		return (&nfs_nullcookie);
841	}
842	pos--;
843	dp = LIST_FIRST(&np->n_cookies);
844	if (!dp) {
845		if (add) {
846			MALLOC(dp, struct nfsdmap *, sizeof (struct nfsdmap),
847				M_NFSDIROFF, M_WAITOK);
848			dp->ndm_eocookie = 0;
849			LIST_INSERT_HEAD(&np->n_cookies, dp, ndm_list);
850		} else
851			goto out;
852	}
853	while (pos >= NFSNUMCOOKIES) {
854		pos -= NFSNUMCOOKIES;
855		if (LIST_NEXT(dp, ndm_list)) {
856			if (!add && dp->ndm_eocookie < NFSNUMCOOKIES &&
857			    pos >= dp->ndm_eocookie)
858				goto out;
859			dp = LIST_NEXT(dp, ndm_list);
860		} else if (add) {
861			MALLOC(dp2, struct nfsdmap *, sizeof (struct nfsdmap),
862				M_NFSDIROFF, M_WAITOK);
863			dp2->ndm_eocookie = 0;
864			LIST_INSERT_AFTER(dp, dp2, ndm_list);
865			dp = dp2;
866		} else
867			goto out;
868	}
869	if (pos >= dp->ndm_eocookie) {
870		if (add)
871			dp->ndm_eocookie = pos + 1;
872		else
873			goto out;
874	}
875	retval = &dp->ndm_cookies[pos];
876out:
877	return (retval);
878}
879
880/*
881 * Invalidate cached directory information, except for the actual directory
882 * blocks (which are invalidated separately).
883 * Done mainly to avoid the use of stale offset cookies.
884 */
885void
886nfs_invaldir(struct vnode *vp)
887{
888	struct nfsnode *np = VTONFS(vp);
889
890#ifdef DIAGNOSTIC
891	if (vp->v_type != VDIR)
892		panic("nfs: invaldir not dir");
893#endif
894	nfs_dircookie_lock(np);
895	np->n_direofoffset = 0;
896	np->n_cookieverf.nfsuquad[0] = 0;
897	np->n_cookieverf.nfsuquad[1] = 0;
898	if (LIST_FIRST(&np->n_cookies))
899		LIST_FIRST(&np->n_cookies)->ndm_eocookie = 0;
900	nfs_dircookie_unlock(np);
901}
902
903/*
904 * The write verifier has changed (probably due to a server reboot), so all
905 * B_NEEDCOMMIT blocks will have to be written again. Since they are on the
906 * dirty block list as B_DELWRI, all this takes is clearing the B_NEEDCOMMIT
907 * and B_CLUSTEROK flags.  Once done the new write verifier can be set for the
908 * mount point.
909 *
910 * B_CLUSTEROK must be cleared along with B_NEEDCOMMIT because stage 1 data
911 * writes are not clusterable.
912 */
913void
914nfs_clearcommit(struct mount *mp)
915{
916	struct vnode *vp, *nvp;
917	struct buf *bp, *nbp;
918	struct bufobj *bo;
919
920	MNT_ILOCK(mp);
921	MNT_VNODE_FOREACH(vp, mp, nvp) {
922		bo = &vp->v_bufobj;
923		VI_LOCK(vp);
924		if (vp->v_iflag & VI_DOOMED) {
925			VI_UNLOCK(vp);
926			continue;
927		}
928		vholdl(vp);
929		VI_UNLOCK(vp);
930		MNT_IUNLOCK(mp);
931		BO_LOCK(bo);
932		TAILQ_FOREACH_SAFE(bp, &bo->bo_dirty.bv_hd, b_bobufs, nbp) {
933			if (!BUF_ISLOCKED(bp) &&
934			    (bp->b_flags & (B_DELWRI | B_NEEDCOMMIT))
935				== (B_DELWRI | B_NEEDCOMMIT))
936				bp->b_flags &= ~(B_NEEDCOMMIT | B_CLUSTEROK);
937		}
938		BO_UNLOCK(bo);
939		vdrop(vp);
940		MNT_ILOCK(mp);
941	}
942	MNT_IUNLOCK(mp);
943}
944
945/*
946 * Helper functions for former macros.  Some of these should be
947 * moved to their callers.
948 */
949
950int
951nfsm_mtofh_xx(struct vnode *d, struct vnode **v, int v3, int *f,
952    struct mbuf **md, caddr_t *dpos)
953{
954	struct nfsnode *ttnp;
955	struct vnode *ttvp;
956	nfsfh_t *ttfhp;
957	u_int32_t *tl;
958	int ttfhsize;
959	int t1;
960
961	if (v3) {
962		tl = nfsm_dissect_xx(NFSX_UNSIGNED, md, dpos);
963		if (tl == NULL)
964			return EBADRPC;
965		*f = fxdr_unsigned(int, *tl);
966	} else
967		*f = 1;
968	if (*f) {
969		t1 = nfsm_getfh_xx(&ttfhp, &ttfhsize, (v3), md, dpos);
970		if (t1 != 0)
971			return t1;
972		t1 = nfs_nget(d->v_mount, ttfhp, ttfhsize, &ttnp, LK_EXCLUSIVE);
973		if (t1 != 0)
974			return t1;
975		*v = NFSTOV(ttnp);
976	}
977	if (v3) {
978		tl = nfsm_dissect_xx(NFSX_UNSIGNED, md, dpos);
979		if (tl == NULL)
980			return EBADRPC;
981		if (*f)
982			*f = fxdr_unsigned(int, *tl);
983		else if (fxdr_unsigned(int, *tl))
984			nfsm_adv_xx(NFSX_V3FATTR, md, dpos);
985	}
986	if (*f) {
987		ttvp = *v;
988		t1 = nfs_loadattrcache(&ttvp, md, dpos, NULL, 0);
989		if (t1)
990			return t1;
991		*v = ttvp;
992	}
993	return 0;
994}
995
996int
997nfsm_getfh_xx(nfsfh_t **f, int *s, int v3, struct mbuf **md, caddr_t *dpos)
998{
999	u_int32_t *tl;
1000
1001	if (v3) {
1002		tl = nfsm_dissect_xx(NFSX_UNSIGNED, md, dpos);
1003		if (tl == NULL)
1004			return EBADRPC;
1005		*s = fxdr_unsigned(int, *tl);
1006		if (*s <= 0 || *s > NFSX_V3FHMAX)
1007			return EBADRPC;
1008	} else
1009		*s = NFSX_V2FH;
1010	*f = nfsm_dissect_xx(nfsm_rndup(*s), md, dpos);
1011	if (*f == NULL)
1012		return EBADRPC;
1013	else
1014		return 0;
1015}
1016
1017
1018int
1019nfsm_loadattr_xx(struct vnode **v, struct vattr *va, struct mbuf **md,
1020		 caddr_t *dpos)
1021{
1022	int t1;
1023
1024	struct vnode *ttvp = *v;
1025	t1 = nfs_loadattrcache(&ttvp, md, dpos, va, 0);
1026	if (t1 != 0)
1027		return t1;
1028	*v = ttvp;
1029	return 0;
1030}
1031
1032int
1033nfsm_postop_attr_xx(struct vnode **v, int *f, struct mbuf **md,
1034		    caddr_t *dpos)
1035{
1036	u_int32_t *tl;
1037	int t1;
1038
1039	struct vnode *ttvp = *v;
1040	tl = nfsm_dissect_xx(NFSX_UNSIGNED, md, dpos);
1041	if (tl == NULL)
1042		return EBADRPC;
1043	*f = fxdr_unsigned(int, *tl);
1044	if (*f != 0) {
1045		t1 = nfs_loadattrcache(&ttvp, md, dpos, NULL, 1);
1046		if (t1 != 0) {
1047			*f = 0;
1048			return t1;
1049		}
1050		*v = ttvp;
1051	}
1052	return 0;
1053}
1054
1055int
1056nfsm_wcc_data_xx(struct vnode **v, int *f, struct mbuf **md, caddr_t *dpos)
1057{
1058	u_int32_t *tl;
1059	int ttattrf, ttretf = 0;
1060	int t1;
1061
1062	tl = nfsm_dissect_xx(NFSX_UNSIGNED, md, dpos);
1063	if (tl == NULL)
1064		return EBADRPC;
1065	if (*tl == nfs_true) {
1066		tl = nfsm_dissect_xx(6 * NFSX_UNSIGNED, md, dpos);
1067		if (tl == NULL)
1068			return EBADRPC;
1069		mtx_lock(&(VTONFS(*v))->n_mtx);
1070		if (*f)
1071 			ttretf = (VTONFS(*v)->n_mtime.tv_sec == fxdr_unsigned(u_int32_t, *(tl + 2)) &&
1072				  VTONFS(*v)->n_mtime.tv_nsec == fxdr_unsigned(u_int32_t, *(tl + 3)));
1073		mtx_unlock(&(VTONFS(*v))->n_mtx);
1074	}
1075	t1 = nfsm_postop_attr_xx(v, &ttattrf, md, dpos);
1076	if (t1)
1077		return t1;
1078	if (*f)
1079		*f = ttretf;
1080	else
1081		*f = ttattrf;
1082	return 0;
1083}
1084
1085int
1086nfsm_strtom_xx(const char *a, int s, int m, struct mbuf **mb, caddr_t *bpos)
1087{
1088	u_int32_t *tl;
1089	int t1;
1090
1091	if (s > m)
1092		return ENAMETOOLONG;
1093	t1 = nfsm_rndup(s) + NFSX_UNSIGNED;
1094	if (t1 <= M_TRAILINGSPACE(*mb)) {
1095		tl = nfsm_build_xx(t1, mb, bpos);
1096		*tl++ = txdr_unsigned(s);
1097		*(tl + ((t1 >> 2) - 2)) = 0;
1098		bcopy(a, tl, s);
1099	} else {
1100		t1 = nfsm_strtmbuf(mb, bpos, a, s);
1101		if (t1 != 0)
1102			return t1;
1103	}
1104	return 0;
1105}
1106
1107int
1108nfsm_fhtom_xx(struct vnode *v, int v3, struct mbuf **mb, caddr_t *bpos)
1109{
1110	u_int32_t *tl;
1111	int t1;
1112	caddr_t cp;
1113
1114	if (v3) {
1115		t1 = nfsm_rndup(VTONFS(v)->n_fhsize) + NFSX_UNSIGNED;
1116		if (t1 < M_TRAILINGSPACE(*mb)) {
1117			tl = nfsm_build_xx(t1, mb, bpos);
1118			*tl++ = txdr_unsigned(VTONFS(v)->n_fhsize);
1119			*(tl + ((t1 >> 2) - 2)) = 0;
1120			bcopy(VTONFS(v)->n_fhp, tl, VTONFS(v)->n_fhsize);
1121		} else {
1122			t1 = nfsm_strtmbuf(mb, bpos,
1123			    (const char *)VTONFS(v)->n_fhp,
1124			    VTONFS(v)->n_fhsize);
1125			if (t1 != 0)
1126				return t1;
1127		}
1128	} else {
1129		cp = nfsm_build_xx(NFSX_V2FH, mb, bpos);
1130		bcopy(VTONFS(v)->n_fhp, cp, NFSX_V2FH);
1131	}
1132	return 0;
1133}
1134
1135void
1136nfsm_v3attrbuild_xx(struct vattr *va, int full, struct mbuf **mb,
1137    caddr_t *bpos)
1138{
1139	u_int32_t *tl;
1140
1141	if (va->va_mode != (mode_t)VNOVAL) {
1142		tl = nfsm_build_xx(2 * NFSX_UNSIGNED, mb, bpos);
1143		*tl++ = nfs_true;
1144		*tl = txdr_unsigned(va->va_mode);
1145	} else {
1146		tl = nfsm_build_xx(NFSX_UNSIGNED, mb, bpos);
1147		*tl = nfs_false;
1148	}
1149	if (full && va->va_uid != (uid_t)VNOVAL) {
1150		tl = nfsm_build_xx(2 * NFSX_UNSIGNED, mb, bpos);
1151		*tl++ = nfs_true;
1152		*tl = txdr_unsigned(va->va_uid);
1153	} else {
1154		tl = nfsm_build_xx(NFSX_UNSIGNED, mb, bpos);
1155		*tl = nfs_false;
1156	}
1157	if (full && va->va_gid != (gid_t)VNOVAL) {
1158		tl = nfsm_build_xx(2 * NFSX_UNSIGNED, mb, bpos);
1159		*tl++ = nfs_true;
1160		*tl = txdr_unsigned(va->va_gid);
1161	} else {
1162		tl = nfsm_build_xx(NFSX_UNSIGNED, mb, bpos);
1163		*tl = nfs_false;
1164	}
1165	if (full && va->va_size != VNOVAL) {
1166		tl = nfsm_build_xx(3 * NFSX_UNSIGNED, mb, bpos);
1167		*tl++ = nfs_true;
1168		txdr_hyper(va->va_size, tl);
1169	} else {
1170		tl = nfsm_build_xx(NFSX_UNSIGNED, mb, bpos);
1171		*tl = nfs_false;
1172	}
1173	if (va->va_atime.tv_sec != VNOVAL) {
1174		if (va->va_atime.tv_sec != time_second) {
1175			tl = nfsm_build_xx(3 * NFSX_UNSIGNED, mb, bpos);
1176			*tl++ = txdr_unsigned(NFSV3SATTRTIME_TOCLIENT);
1177			txdr_nfsv3time(&va->va_atime, tl);
1178		} else {
1179			tl = nfsm_build_xx(NFSX_UNSIGNED, mb, bpos);
1180			*tl = txdr_unsigned(NFSV3SATTRTIME_TOSERVER);
1181		}
1182	} else {
1183		tl = nfsm_build_xx(NFSX_UNSIGNED, mb, bpos);
1184		*tl = txdr_unsigned(NFSV3SATTRTIME_DONTCHANGE);
1185	}
1186	if (va->va_mtime.tv_sec != VNOVAL) {
1187		if (va->va_mtime.tv_sec != time_second) {
1188			tl = nfsm_build_xx(3 * NFSX_UNSIGNED, mb, bpos);
1189			*tl++ = txdr_unsigned(NFSV3SATTRTIME_TOCLIENT);
1190			txdr_nfsv3time(&va->va_mtime, tl);
1191		} else {
1192			tl = nfsm_build_xx(NFSX_UNSIGNED, mb, bpos);
1193			*tl = txdr_unsigned(NFSV3SATTRTIME_TOSERVER);
1194		}
1195	} else {
1196		tl = nfsm_build_xx(NFSX_UNSIGNED, mb, bpos);
1197		*tl = txdr_unsigned(NFSV3SATTRTIME_DONTCHANGE);
1198	}
1199}
1200