1/*	$NetBSD$	*/
2
3/*
4 * Copyright (c) 1989, 1993
5 *	The Regents of the University of California.  All rights reserved.
6 *
7 * This code is derived from software contributed to Berkeley by
8 * Rick Macklem at The University of Guelph.
9 *
10 * Redistribution and use in source and binary forms, with or without
11 * modification, are permitted provided that the following conditions
12 * are met:
13 * 1. Redistributions of source code must retain the above copyright
14 *    notice, this list of conditions and the following disclaimer.
15 * 2. Redistributions in binary form must reproduce the above copyright
16 *    notice, this list of conditions and the following disclaimer in the
17 *    documentation and/or other materials provided with the distribution.
18 * 3. Neither the name of the University nor the names of its contributors
19 *    may be used to endorse or promote products derived from this software
20 *    without specific prior written permission.
21 *
22 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
23 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
24 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
25 * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
26 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
27 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
28 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
29 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
30 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
31 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
32 * SUCH DAMAGE.
33 *
34 *	@(#)nfs_subs.c	8.8 (Berkeley) 5/22/95
35 */
36
37/*
38 * Copyright 2000 Wasabi Systems, Inc.
39 * All rights reserved.
40 *
41 * Written by Frank van der Linden for Wasabi Systems, Inc.
42 *
43 * Redistribution and use in source and binary forms, with or without
44 * modification, are permitted provided that the following conditions
45 * are met:
46 * 1. Redistributions of source code must retain the above copyright
47 *    notice, this list of conditions and the following disclaimer.
48 * 2. Redistributions in binary form must reproduce the above copyright
49 *    notice, this list of conditions and the following disclaimer in the
50 *    documentation and/or other materials provided with the distribution.
51 * 3. All advertising materials mentioning features or use of this software
52 *    must display the following acknowledgement:
53 *      This product includes software developed for the NetBSD Project by
54 *      Wasabi Systems, Inc.
55 * 4. The name of Wasabi Systems, Inc. may not be used to endorse
56 *    or promote products derived from this software without specific prior
57 *    written permission.
58 *
59 * THIS SOFTWARE IS PROVIDED BY WASABI SYSTEMS, INC. ``AS IS'' AND
60 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
61 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
62 * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL WASABI SYSTEMS, INC
63 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
64 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
65 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
66 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
67 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
68 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
69 * POSSIBILITY OF SUCH DAMAGE.
70 */
71
72#include <sys/cdefs.h>
73__KERNEL_RCSID(0, "$NetBSD$");
74
75#ifdef _KERNEL_OPT
76#include "opt_nfs.h"
77#endif
78
79/*
80 * These functions support the macros and help fiddle mbuf chains for
81 * the nfs op functions. They do things like create the rpc header and
82 * copy data between mbuf chains and uio lists.
83 */
84#include <sys/param.h>
85#include <sys/proc.h>
86#include <sys/systm.h>
87#include <sys/kernel.h>
88#include <sys/kmem.h>
89#include <sys/mount.h>
90#include <sys/vnode.h>
91#include <sys/namei.h>
92#include <sys/mbuf.h>
93#include <sys/socket.h>
94#include <sys/stat.h>
95#include <sys/filedesc.h>
96#include <sys/time.h>
97#include <sys/dirent.h>
98#include <sys/once.h>
99#include <sys/kauth.h>
100#include <sys/atomic.h>
101
102#include <uvm/uvm_extern.h>
103
104#include <nfs/rpcv2.h>
105#include <nfs/nfsproto.h>
106#include <nfs/nfsnode.h>
107#include <nfs/nfs.h>
108#include <nfs/xdr_subs.h>
109#include <nfs/nfsm_subs.h>
110#include <nfs/nfsmount.h>
111#include <nfs/nfsrtt.h>
112#include <nfs/nfs_var.h>
113
114#include <miscfs/specfs/specdev.h>
115
116#include <netinet/in.h>
117
118/*
119 * Attribute cache routines.
120 * nfs_loadattrcache() - loads or updates the cache contents from attributes
121 *	that are on the mbuf list
122 * nfs_getattrcache() - returns valid attributes if found in cache, returns
123 *	error otherwise
124 */
125
126/*
127 * Load the attribute cache (that lives in the nfsnode entry) with
128 * the values on the mbuf list and
129 * Iff vap not NULL
130 *    copy the attributes to *vaper
131 */
132int
133nfsm_loadattrcache(struct vnode **vpp, struct mbuf **mdp, char **dposp, struct vattr *vaper, int flags)
134{
135	int32_t t1;
136	char *cp2;
137	int error = 0;
138	struct mbuf *md;
139	int v3 = NFS_ISV3(*vpp);
140
141	md = *mdp;
142	t1 = (mtod(md, char *) + md->m_len) - *dposp;
143	error = nfsm_disct(mdp, dposp, NFSX_FATTR(v3), t1, &cp2);
144	if (error)
145		return (error);
146	return nfs_loadattrcache(vpp, (struct nfs_fattr *)cp2, vaper, flags);
147}
148
149int
150nfs_loadattrcache(struct vnode **vpp, struct nfs_fattr *fp, struct vattr *vaper, int flags)
151{
152	struct vnode *vp = *vpp;
153	struct vattr *vap;
154	int v3 = NFS_ISV3(vp);
155	enum vtype vtyp;
156	u_short vmode;
157	struct timespec mtime;
158	struct timespec ctime;
159	int32_t rdev;
160	struct nfsnode *np;
161	extern int (**spec_nfsv2nodeop_p)(void *);
162	uid_t uid;
163	gid_t gid;
164
165	if (v3) {
166		vtyp = nfsv3tov_type(fp->fa_type);
167		vmode = fxdr_unsigned(u_short, fp->fa_mode);
168		rdev = makedev(fxdr_unsigned(u_int32_t, fp->fa3_rdev.specdata1),
169			fxdr_unsigned(u_int32_t, fp->fa3_rdev.specdata2));
170		fxdr_nfsv3time(&fp->fa3_mtime, &mtime);
171		fxdr_nfsv3time(&fp->fa3_ctime, &ctime);
172	} else {
173		vtyp = nfsv2tov_type(fp->fa_type);
174		vmode = fxdr_unsigned(u_short, fp->fa_mode);
175		if (vtyp == VNON || vtyp == VREG)
176			vtyp = IFTOVT(vmode);
177		rdev = fxdr_unsigned(int32_t, fp->fa2_rdev);
178		fxdr_nfsv2time(&fp->fa2_mtime, &mtime);
179		ctime.tv_sec = fxdr_unsigned(u_int32_t,
180		    fp->fa2_ctime.nfsv2_sec);
181		ctime.tv_nsec = 0;
182
183		/*
184		 * Really ugly NFSv2 kludge.
185		 */
186		if (vtyp == VCHR && rdev == 0xffffffff)
187			vtyp = VFIFO;
188	}
189
190	vmode &= ALLPERMS;
191
192	/*
193	 * If v_type == VNON it is a new node, so fill in the v_type,
194	 * n_mtime fields. Check to see if it represents a special
195	 * device, and if so, check for a possible alias. Once the
196	 * correct vnode has been obtained, fill in the rest of the
197	 * information.
198	 */
199	np = VTONFS(vp);
200	if (vp->v_type == VNON) {
201		vp->v_type = vtyp;
202		if (vp->v_type == VFIFO) {
203			extern int (**fifo_nfsv2nodeop_p)(void *);
204			vp->v_op = fifo_nfsv2nodeop_p;
205		} else if (vp->v_type == VREG) {
206			mutex_init(&np->n_commitlock, MUTEX_DEFAULT, IPL_NONE);
207		} else if (vp->v_type == VCHR || vp->v_type == VBLK) {
208			vp->v_op = spec_nfsv2nodeop_p;
209			spec_node_init(vp, (dev_t)rdev);
210		}
211		np->n_mtime = mtime;
212	}
213	uid = fxdr_unsigned(uid_t, fp->fa_uid);
214	gid = fxdr_unsigned(gid_t, fp->fa_gid);
215	vap = np->n_vattr;
216
217	/*
218	 * Invalidate access cache if uid, gid, mode or ctime changed.
219	 */
220	if (np->n_accstamp != -1 &&
221	    (gid != vap->va_gid || uid != vap->va_uid || vmode != vap->va_mode
222	    || timespeccmp(&ctime, &vap->va_ctime, !=)))
223		np->n_accstamp = -1;
224
225	vap->va_type = vtyp;
226	vap->va_mode = vmode;
227	vap->va_rdev = (dev_t)rdev;
228	vap->va_mtime = mtime;
229	vap->va_ctime = ctime;
230	vap->va_birthtime.tv_sec = VNOVAL;
231	vap->va_birthtime.tv_nsec = VNOVAL;
232	vap->va_fsid = vp->v_mount->mnt_stat.f_fsidx.__fsid_val[0];
233	switch (vtyp) {
234	case VDIR:
235		vap->va_blocksize = NFS_DIRFRAGSIZ;
236		break;
237	case VBLK:
238		vap->va_blocksize = BLKDEV_IOSIZE;
239		break;
240	case VCHR:
241		vap->va_blocksize = MAXBSIZE;
242		break;
243	default:
244		vap->va_blocksize = v3 ? vp->v_mount->mnt_stat.f_iosize :
245		    fxdr_unsigned(int32_t, fp->fa2_blocksize);
246		break;
247	}
248	if (v3) {
249		vap->va_nlink = fxdr_unsigned(u_short, fp->fa_nlink);
250		vap->va_uid = uid;
251		vap->va_gid = gid;
252		vap->va_size = fxdr_hyper(&fp->fa3_size);
253		vap->va_bytes = fxdr_hyper(&fp->fa3_used);
254		vap->va_fileid = fxdr_hyper(&fp->fa3_fileid);
255		fxdr_nfsv3time(&fp->fa3_atime, &vap->va_atime);
256		vap->va_flags = 0;
257		vap->va_filerev = 0;
258	} else {
259		vap->va_nlink = fxdr_unsigned(u_short, fp->fa_nlink);
260		vap->va_uid = uid;
261		vap->va_gid = gid;
262		vap->va_size = fxdr_unsigned(u_int32_t, fp->fa2_size);
263		vap->va_bytes = fxdr_unsigned(int32_t, fp->fa2_blocks)
264		    * NFS_FABLKSIZE;
265		vap->va_fileid = fxdr_unsigned(int32_t, fp->fa2_fileid);
266		fxdr_nfsv2time(&fp->fa2_atime, &vap->va_atime);
267		vap->va_flags = 0;
268		vap->va_gen = fxdr_unsigned(u_int32_t,fp->fa2_ctime.nfsv2_usec);
269		vap->va_filerev = 0;
270	}
271	if (vap->va_size > VFSTONFS(vp->v_mount)->nm_maxfilesize) {
272		return EFBIG;
273	}
274	if (vap->va_size != np->n_size) {
275		if ((np->n_flag & NMODIFIED) && vap->va_size < np->n_size) {
276			vap->va_size = np->n_size;
277		} else {
278			np->n_size = vap->va_size;
279			if (vap->va_type == VREG) {
280				/*
281				 * we can't free pages if NAC_NOTRUNC because
282				 * the pages can be owned by ourselves.
283				 */
284				if (flags & NAC_NOTRUNC) {
285					np->n_flag |= NTRUNCDELAYED;
286				} else {
287					genfs_node_wrlock(vp);
288					mutex_enter(vp->v_interlock);
289					(void)VOP_PUTPAGES(vp, 0,
290					    0, PGO_SYNCIO | PGO_CLEANIT |
291					    PGO_FREE | PGO_ALLPAGES);
292					uvm_vnp_setsize(vp, np->n_size);
293					genfs_node_unlock(vp);
294				}
295			}
296		}
297	}
298	np->n_attrstamp = time_second;
299	if (vaper != NULL) {
300		memcpy((void *)vaper, (void *)vap, sizeof(*vap));
301		if (np->n_flag & NCHG) {
302			if (np->n_flag & NACC)
303				vaper->va_atime = np->n_atim;
304			if (np->n_flag & NUPD)
305				vaper->va_mtime = np->n_mtim;
306		}
307	}
308	return (0);
309}
310
311/*
312 * Check the time stamp
313 * If the cache is valid, copy contents to *vap and return 0
314 * otherwise return an error
315 */
316int
317nfs_getattrcache(struct vnode *vp, struct vattr *vaper)
318{
319	struct nfsnode *np = VTONFS(vp);
320	struct nfsmount *nmp = VFSTONFS(vp->v_mount);
321	struct vattr *vap;
322
323	if (np->n_attrstamp == 0 ||
324	    (time_second - np->n_attrstamp) >= nfs_attrtimeo(nmp, np)) {
325		nfsstats.attrcache_misses++;
326		return (ENOENT);
327	}
328	nfsstats.attrcache_hits++;
329	vap = np->n_vattr;
330	if (vap->va_size != np->n_size) {
331		if (vap->va_type == VREG) {
332			if ((np->n_flag & NMODIFIED) != 0 &&
333			    vap->va_size < np->n_size) {
334				vap->va_size = np->n_size;
335			} else {
336				np->n_size = vap->va_size;
337			}
338			genfs_node_wrlock(vp);
339			uvm_vnp_setsize(vp, np->n_size);
340			genfs_node_unlock(vp);
341		} else
342			np->n_size = vap->va_size;
343	}
344	memcpy((void *)vaper, (void *)vap, sizeof(struct vattr));
345	if (np->n_flag & NCHG) {
346		if (np->n_flag & NACC)
347			vaper->va_atime = np->n_atim;
348		if (np->n_flag & NUPD)
349			vaper->va_mtime = np->n_mtim;
350	}
351	return (0);
352}
353
354void
355nfs_delayedtruncate(struct vnode *vp)
356{
357	struct nfsnode *np = VTONFS(vp);
358
359	if (np->n_flag & NTRUNCDELAYED) {
360		np->n_flag &= ~NTRUNCDELAYED;
361		genfs_node_wrlock(vp);
362		mutex_enter(vp->v_interlock);
363		(void)VOP_PUTPAGES(vp, 0,
364		    0, PGO_SYNCIO | PGO_CLEANIT | PGO_FREE | PGO_ALLPAGES);
365		uvm_vnp_setsize(vp, np->n_size);
366		genfs_node_unlock(vp);
367	}
368}
369
370#define	NFS_WCCKLUDGE_TIMEOUT	(24 * 60 * 60)	/* 1 day */
371#define	NFS_WCCKLUDGE(nmp, now) \
372	(((nmp)->nm_iflag & NFSMNT_WCCKLUDGE) && \
373	((now) - (nmp)->nm_wcckludgetime - NFS_WCCKLUDGE_TIMEOUT) < 0)
374
375/*
376 * nfs_check_wccdata: check inaccurate wcc_data
377 *
378 * => return non-zero if we shouldn't trust the wcc_data.
379 * => NFS_WCCKLUDGE_TIMEOUT is for the case that the server is "fixed".
380 */
381
382int
383nfs_check_wccdata(struct nfsnode *np, const struct timespec *ctime,
384    struct timespec *mtime, bool docheck)
385{
386	int error = 0;
387
388#if !defined(NFS_V2_ONLY)
389
390	if (docheck) {
391		struct vnode *vp = NFSTOV(np);
392		struct nfsmount *nmp;
393		long now = time_second;
394		const struct timespec *omtime = &np->n_vattr->va_mtime;
395		const struct timespec *octime = &np->n_vattr->va_ctime;
396		const char *reason = NULL; /* XXX: gcc */
397
398		if (timespeccmp(omtime, mtime, <=)) {
399			reason = "mtime";
400			error = EINVAL;
401		}
402
403		if (vp->v_type == VDIR && timespeccmp(octime, ctime, <=)) {
404			reason = "ctime";
405			error = EINVAL;
406		}
407
408		nmp = VFSTONFS(vp->v_mount);
409		if (error) {
410
411			/*
412			 * despite of the fact that we've updated the file,
413			 * timestamps of the file were not updated as we
414			 * expected.
415			 * it means that the server has incompatible
416			 * semantics of timestamps or (more likely)
417			 * the server time is not precise enough to
418			 * track each modifications.
419			 * in that case, we disable wcc processing.
420			 *
421			 * yes, strictly speaking, we should disable all
422			 * caching.  it's a compromise.
423			 */
424
425			mutex_enter(&nmp->nm_lock);
426			if (!NFS_WCCKLUDGE(nmp, now)) {
427				printf("%s: inaccurate wcc data (%s) detected,"
428				    " disabling wcc"
429				    " (ctime %u.%09u %u.%09u,"
430				    " mtime %u.%09u %u.%09u)\n",
431				    vp->v_mount->mnt_stat.f_mntfromname,
432				    reason,
433				    (unsigned int)octime->tv_sec,
434				    (unsigned int)octime->tv_nsec,
435				    (unsigned int)ctime->tv_sec,
436				    (unsigned int)ctime->tv_nsec,
437				    (unsigned int)omtime->tv_sec,
438				    (unsigned int)omtime->tv_nsec,
439				    (unsigned int)mtime->tv_sec,
440				    (unsigned int)mtime->tv_nsec);
441			}
442			nmp->nm_iflag |= NFSMNT_WCCKLUDGE;
443			nmp->nm_wcckludgetime = now;
444			mutex_exit(&nmp->nm_lock);
445		} else if (NFS_WCCKLUDGE(nmp, now)) {
446			error = EPERM; /* XXX */
447		} else if (nmp->nm_iflag & NFSMNT_WCCKLUDGE) {
448			mutex_enter(&nmp->nm_lock);
449			if (nmp->nm_iflag & NFSMNT_WCCKLUDGE) {
450				printf("%s: re-enabling wcc\n",
451				    vp->v_mount->mnt_stat.f_mntfromname);
452				nmp->nm_iflag &= ~NFSMNT_WCCKLUDGE;
453			}
454			mutex_exit(&nmp->nm_lock);
455		}
456	}
457
458#endif /* !defined(NFS_V2_ONLY) */
459
460	return error;
461}
462
463/*
464 * Heuristic to see if the server XDR encodes directory cookies or not.
465 * it is not supposed to, but a lot of servers may do this. Also, since
466 * most/all servers will implement V2 as well, it is expected that they
467 * may return just 32 bits worth of cookie information, so we need to
468 * find out in which 32 bits this information is available. We do this
469 * to avoid trouble with emulated binaries that can't handle 64 bit
470 * directory offsets.
471 */
472
473void
474nfs_cookieheuristic(struct vnode *vp, int *flagp, struct lwp *l, kauth_cred_t cred)
475{
476	struct uio auio;
477	struct iovec aiov;
478	char *tbuf, *cp;
479	struct dirent *dp;
480	off_t *cookies = NULL, *cop;
481	int error, eof, nc, len;
482
483	tbuf = malloc(NFS_DIRFRAGSIZ, M_TEMP, M_WAITOK);
484
485	aiov.iov_base = tbuf;
486	aiov.iov_len = NFS_DIRFRAGSIZ;
487	auio.uio_iov = &aiov;
488	auio.uio_iovcnt = 1;
489	auio.uio_rw = UIO_READ;
490	auio.uio_resid = NFS_DIRFRAGSIZ;
491	auio.uio_offset = 0;
492	UIO_SETUP_SYSSPACE(&auio);
493
494	error = VOP_READDIR(vp, &auio, cred, &eof, &cookies, &nc);
495
496	len = NFS_DIRFRAGSIZ - auio.uio_resid;
497	if (error || len == 0) {
498		free(tbuf, M_TEMP);
499		if (cookies)
500			free(cookies, M_TEMP);
501		return;
502	}
503
504	/*
505	 * Find the first valid entry and look at its offset cookie.
506	 */
507
508	cp = tbuf;
509	for (cop = cookies; len > 0; len -= dp->d_reclen) {
510		dp = (struct dirent *)cp;
511		if (dp->d_fileno != 0 && len >= dp->d_reclen) {
512			if ((*cop >> 32) != 0 && (*cop & 0xffffffffLL) == 0) {
513				*flagp |= NFSMNT_SWAPCOOKIE;
514				nfs_invaldircache(vp, 0);
515				nfs_vinvalbuf(vp, 0, cred, l, 1);
516			}
517			break;
518		}
519		cop++;
520		cp += dp->d_reclen;
521	}
522
523	free(tbuf, M_TEMP);
524	free(cookies, M_TEMP);
525}
526
527/*
528 * Set the attribute timeout based on how recently the file has been modified.
529 */
530
531time_t
532nfs_attrtimeo(struct nfsmount *nmp, struct nfsnode *np)
533{
534	time_t timeo;
535
536	if ((nmp->nm_flag & NFSMNT_NOAC) != 0)
537		return 0;
538
539	if (((np)->n_flag & NMODIFIED) != 0)
540		return NFS_MINATTRTIMO;
541
542	timeo = (time_second - np->n_mtime.tv_sec) / 10;
543	timeo = max(timeo, NFS_MINATTRTIMO);
544	timeo = min(timeo, NFS_MAXATTRTIMO);
545	return timeo;
546}
547