1/* $NetBSD$ */ 2 3/* 4 * Copyright (c) 1989, 1993 5 * The Regents of the University of California. All rights reserved. 6 * 7 * This code is derived from software contributed to Berkeley by 8 * Rick Macklem at The University of Guelph. 9 * 10 * Redistribution and use in source and binary forms, with or without 11 * modification, are permitted provided that the following conditions 12 * are met: 13 * 1. Redistributions of source code must retain the above copyright 14 * notice, this list of conditions and the following disclaimer. 15 * 2. Redistributions in binary form must reproduce the above copyright 16 * notice, this list of conditions and the following disclaimer in the 17 * documentation and/or other materials provided with the distribution. 18 * 3. Neither the name of the University nor the names of its contributors 19 * may be used to endorse or promote products derived from this software 20 * without specific prior written permission. 21 * 22 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 23 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 24 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 25 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 26 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 27 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 28 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 29 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 30 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 31 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 32 * SUCH DAMAGE. 33 * 34 * @(#)nfs_subs.c 8.8 (Berkeley) 5/22/95 35 */ 36 37/* 38 * Copyright 2000 Wasabi Systems, Inc. 39 * All rights reserved. 40 * 41 * Written by Frank van der Linden for Wasabi Systems, Inc. 42 * 43 * Redistribution and use in source and binary forms, with or without 44 * modification, are permitted provided that the following conditions 45 * are met: 46 * 1. Redistributions of source code must retain the above copyright 47 * notice, this list of conditions and the following disclaimer. 48 * 2. Redistributions in binary form must reproduce the above copyright 49 * notice, this list of conditions and the following disclaimer in the 50 * documentation and/or other materials provided with the distribution. 51 * 3. All advertising materials mentioning features or use of this software 52 * must display the following acknowledgement: 53 * This product includes software developed for the NetBSD Project by 54 * Wasabi Systems, Inc. 55 * 4. The name of Wasabi Systems, Inc. may not be used to endorse 56 * or promote products derived from this software without specific prior 57 * written permission. 58 * 59 * THIS SOFTWARE IS PROVIDED BY WASABI SYSTEMS, INC. ``AS IS'' AND 60 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED 61 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR 62 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL WASABI SYSTEMS, INC 63 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 64 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 65 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 66 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 67 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 68 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 69 * POSSIBILITY OF SUCH DAMAGE. 70 */ 71 72#include <sys/cdefs.h> 73__KERNEL_RCSID(0, "$NetBSD$"); 74 75#ifdef _KERNEL_OPT 76#include "opt_nfs.h" 77#endif 78 79/* 80 * These functions support the macros and help fiddle mbuf chains for 81 * the nfs op functions. They do things like create the rpc header and 82 * copy data between mbuf chains and uio lists. 83 */ 84#include <sys/param.h> 85#include <sys/proc.h> 86#include <sys/systm.h> 87#include <sys/kernel.h> 88#include <sys/kmem.h> 89#include <sys/mount.h> 90#include <sys/vnode.h> 91#include <sys/namei.h> 92#include <sys/mbuf.h> 93#include <sys/socket.h> 94#include <sys/stat.h> 95#include <sys/filedesc.h> 96#include <sys/time.h> 97#include <sys/dirent.h> 98#include <sys/once.h> 99#include <sys/kauth.h> 100#include <sys/atomic.h> 101 102#include <uvm/uvm_extern.h> 103 104#include <nfs/rpcv2.h> 105#include <nfs/nfsproto.h> 106#include <nfs/nfsnode.h> 107#include <nfs/nfs.h> 108#include <nfs/xdr_subs.h> 109#include <nfs/nfsm_subs.h> 110#include <nfs/nfsmount.h> 111#include <nfs/nfsrtt.h> 112#include <nfs/nfs_var.h> 113 114#include <miscfs/specfs/specdev.h> 115 116#include <netinet/in.h> 117 118/* 119 * Attribute cache routines. 120 * nfs_loadattrcache() - loads or updates the cache contents from attributes 121 * that are on the mbuf list 122 * nfs_getattrcache() - returns valid attributes if found in cache, returns 123 * error otherwise 124 */ 125 126/* 127 * Load the attribute cache (that lives in the nfsnode entry) with 128 * the values on the mbuf list and 129 * Iff vap not NULL 130 * copy the attributes to *vaper 131 */ 132int 133nfsm_loadattrcache(struct vnode **vpp, struct mbuf **mdp, char **dposp, struct vattr *vaper, int flags) 134{ 135 int32_t t1; 136 char *cp2; 137 int error = 0; 138 struct mbuf *md; 139 int v3 = NFS_ISV3(*vpp); 140 141 md = *mdp; 142 t1 = (mtod(md, char *) + md->m_len) - *dposp; 143 error = nfsm_disct(mdp, dposp, NFSX_FATTR(v3), t1, &cp2); 144 if (error) 145 return (error); 146 return nfs_loadattrcache(vpp, (struct nfs_fattr *)cp2, vaper, flags); 147} 148 149int 150nfs_loadattrcache(struct vnode **vpp, struct nfs_fattr *fp, struct vattr *vaper, int flags) 151{ 152 struct vnode *vp = *vpp; 153 struct vattr *vap; 154 int v3 = NFS_ISV3(vp); 155 enum vtype vtyp; 156 u_short vmode; 157 struct timespec mtime; 158 struct timespec ctime; 159 int32_t rdev; 160 struct nfsnode *np; 161 extern int (**spec_nfsv2nodeop_p)(void *); 162 uid_t uid; 163 gid_t gid; 164 165 if (v3) { 166 vtyp = nfsv3tov_type(fp->fa_type); 167 vmode = fxdr_unsigned(u_short, fp->fa_mode); 168 rdev = makedev(fxdr_unsigned(u_int32_t, fp->fa3_rdev.specdata1), 169 fxdr_unsigned(u_int32_t, fp->fa3_rdev.specdata2)); 170 fxdr_nfsv3time(&fp->fa3_mtime, &mtime); 171 fxdr_nfsv3time(&fp->fa3_ctime, &ctime); 172 } else { 173 vtyp = nfsv2tov_type(fp->fa_type); 174 vmode = fxdr_unsigned(u_short, fp->fa_mode); 175 if (vtyp == VNON || vtyp == VREG) 176 vtyp = IFTOVT(vmode); 177 rdev = fxdr_unsigned(int32_t, fp->fa2_rdev); 178 fxdr_nfsv2time(&fp->fa2_mtime, &mtime); 179 ctime.tv_sec = fxdr_unsigned(u_int32_t, 180 fp->fa2_ctime.nfsv2_sec); 181 ctime.tv_nsec = 0; 182 183 /* 184 * Really ugly NFSv2 kludge. 185 */ 186 if (vtyp == VCHR && rdev == 0xffffffff) 187 vtyp = VFIFO; 188 } 189 190 vmode &= ALLPERMS; 191 192 /* 193 * If v_type == VNON it is a new node, so fill in the v_type, 194 * n_mtime fields. Check to see if it represents a special 195 * device, and if so, check for a possible alias. Once the 196 * correct vnode has been obtained, fill in the rest of the 197 * information. 198 */ 199 np = VTONFS(vp); 200 if (vp->v_type == VNON) { 201 vp->v_type = vtyp; 202 if (vp->v_type == VFIFO) { 203 extern int (**fifo_nfsv2nodeop_p)(void *); 204 vp->v_op = fifo_nfsv2nodeop_p; 205 } else if (vp->v_type == VREG) { 206 mutex_init(&np->n_commitlock, MUTEX_DEFAULT, IPL_NONE); 207 } else if (vp->v_type == VCHR || vp->v_type == VBLK) { 208 vp->v_op = spec_nfsv2nodeop_p; 209 spec_node_init(vp, (dev_t)rdev); 210 } 211 np->n_mtime = mtime; 212 } 213 uid = fxdr_unsigned(uid_t, fp->fa_uid); 214 gid = fxdr_unsigned(gid_t, fp->fa_gid); 215 vap = np->n_vattr; 216 217 /* 218 * Invalidate access cache if uid, gid, mode or ctime changed. 219 */ 220 if (np->n_accstamp != -1 && 221 (gid != vap->va_gid || uid != vap->va_uid || vmode != vap->va_mode 222 || timespeccmp(&ctime, &vap->va_ctime, !=))) 223 np->n_accstamp = -1; 224 225 vap->va_type = vtyp; 226 vap->va_mode = vmode; 227 vap->va_rdev = (dev_t)rdev; 228 vap->va_mtime = mtime; 229 vap->va_ctime = ctime; 230 vap->va_birthtime.tv_sec = VNOVAL; 231 vap->va_birthtime.tv_nsec = VNOVAL; 232 vap->va_fsid = vp->v_mount->mnt_stat.f_fsidx.__fsid_val[0]; 233 switch (vtyp) { 234 case VDIR: 235 vap->va_blocksize = NFS_DIRFRAGSIZ; 236 break; 237 case VBLK: 238 vap->va_blocksize = BLKDEV_IOSIZE; 239 break; 240 case VCHR: 241 vap->va_blocksize = MAXBSIZE; 242 break; 243 default: 244 vap->va_blocksize = v3 ? vp->v_mount->mnt_stat.f_iosize : 245 fxdr_unsigned(int32_t, fp->fa2_blocksize); 246 break; 247 } 248 if (v3) { 249 vap->va_nlink = fxdr_unsigned(u_short, fp->fa_nlink); 250 vap->va_uid = uid; 251 vap->va_gid = gid; 252 vap->va_size = fxdr_hyper(&fp->fa3_size); 253 vap->va_bytes = fxdr_hyper(&fp->fa3_used); 254 vap->va_fileid = fxdr_hyper(&fp->fa3_fileid); 255 fxdr_nfsv3time(&fp->fa3_atime, &vap->va_atime); 256 vap->va_flags = 0; 257 vap->va_filerev = 0; 258 } else { 259 vap->va_nlink = fxdr_unsigned(u_short, fp->fa_nlink); 260 vap->va_uid = uid; 261 vap->va_gid = gid; 262 vap->va_size = fxdr_unsigned(u_int32_t, fp->fa2_size); 263 vap->va_bytes = fxdr_unsigned(int32_t, fp->fa2_blocks) 264 * NFS_FABLKSIZE; 265 vap->va_fileid = fxdr_unsigned(int32_t, fp->fa2_fileid); 266 fxdr_nfsv2time(&fp->fa2_atime, &vap->va_atime); 267 vap->va_flags = 0; 268 vap->va_gen = fxdr_unsigned(u_int32_t,fp->fa2_ctime.nfsv2_usec); 269 vap->va_filerev = 0; 270 } 271 if (vap->va_size > VFSTONFS(vp->v_mount)->nm_maxfilesize) { 272 return EFBIG; 273 } 274 if (vap->va_size != np->n_size) { 275 if ((np->n_flag & NMODIFIED) && vap->va_size < np->n_size) { 276 vap->va_size = np->n_size; 277 } else { 278 np->n_size = vap->va_size; 279 if (vap->va_type == VREG) { 280 /* 281 * we can't free pages if NAC_NOTRUNC because 282 * the pages can be owned by ourselves. 283 */ 284 if (flags & NAC_NOTRUNC) { 285 np->n_flag |= NTRUNCDELAYED; 286 } else { 287 genfs_node_wrlock(vp); 288 mutex_enter(vp->v_interlock); 289 (void)VOP_PUTPAGES(vp, 0, 290 0, PGO_SYNCIO | PGO_CLEANIT | 291 PGO_FREE | PGO_ALLPAGES); 292 uvm_vnp_setsize(vp, np->n_size); 293 genfs_node_unlock(vp); 294 } 295 } 296 } 297 } 298 np->n_attrstamp = time_second; 299 if (vaper != NULL) { 300 memcpy((void *)vaper, (void *)vap, sizeof(*vap)); 301 if (np->n_flag & NCHG) { 302 if (np->n_flag & NACC) 303 vaper->va_atime = np->n_atim; 304 if (np->n_flag & NUPD) 305 vaper->va_mtime = np->n_mtim; 306 } 307 } 308 return (0); 309} 310 311/* 312 * Check the time stamp 313 * If the cache is valid, copy contents to *vap and return 0 314 * otherwise return an error 315 */ 316int 317nfs_getattrcache(struct vnode *vp, struct vattr *vaper) 318{ 319 struct nfsnode *np = VTONFS(vp); 320 struct nfsmount *nmp = VFSTONFS(vp->v_mount); 321 struct vattr *vap; 322 323 if (np->n_attrstamp == 0 || 324 (time_second - np->n_attrstamp) >= nfs_attrtimeo(nmp, np)) { 325 nfsstats.attrcache_misses++; 326 return (ENOENT); 327 } 328 nfsstats.attrcache_hits++; 329 vap = np->n_vattr; 330 if (vap->va_size != np->n_size) { 331 if (vap->va_type == VREG) { 332 if ((np->n_flag & NMODIFIED) != 0 && 333 vap->va_size < np->n_size) { 334 vap->va_size = np->n_size; 335 } else { 336 np->n_size = vap->va_size; 337 } 338 genfs_node_wrlock(vp); 339 uvm_vnp_setsize(vp, np->n_size); 340 genfs_node_unlock(vp); 341 } else 342 np->n_size = vap->va_size; 343 } 344 memcpy((void *)vaper, (void *)vap, sizeof(struct vattr)); 345 if (np->n_flag & NCHG) { 346 if (np->n_flag & NACC) 347 vaper->va_atime = np->n_atim; 348 if (np->n_flag & NUPD) 349 vaper->va_mtime = np->n_mtim; 350 } 351 return (0); 352} 353 354void 355nfs_delayedtruncate(struct vnode *vp) 356{ 357 struct nfsnode *np = VTONFS(vp); 358 359 if (np->n_flag & NTRUNCDELAYED) { 360 np->n_flag &= ~NTRUNCDELAYED; 361 genfs_node_wrlock(vp); 362 mutex_enter(vp->v_interlock); 363 (void)VOP_PUTPAGES(vp, 0, 364 0, PGO_SYNCIO | PGO_CLEANIT | PGO_FREE | PGO_ALLPAGES); 365 uvm_vnp_setsize(vp, np->n_size); 366 genfs_node_unlock(vp); 367 } 368} 369 370#define NFS_WCCKLUDGE_TIMEOUT (24 * 60 * 60) /* 1 day */ 371#define NFS_WCCKLUDGE(nmp, now) \ 372 (((nmp)->nm_iflag & NFSMNT_WCCKLUDGE) && \ 373 ((now) - (nmp)->nm_wcckludgetime - NFS_WCCKLUDGE_TIMEOUT) < 0) 374 375/* 376 * nfs_check_wccdata: check inaccurate wcc_data 377 * 378 * => return non-zero if we shouldn't trust the wcc_data. 379 * => NFS_WCCKLUDGE_TIMEOUT is for the case that the server is "fixed". 380 */ 381 382int 383nfs_check_wccdata(struct nfsnode *np, const struct timespec *ctime, 384 struct timespec *mtime, bool docheck) 385{ 386 int error = 0; 387 388#if !defined(NFS_V2_ONLY) 389 390 if (docheck) { 391 struct vnode *vp = NFSTOV(np); 392 struct nfsmount *nmp; 393 long now = time_second; 394 const struct timespec *omtime = &np->n_vattr->va_mtime; 395 const struct timespec *octime = &np->n_vattr->va_ctime; 396 const char *reason = NULL; /* XXX: gcc */ 397 398 if (timespeccmp(omtime, mtime, <=)) { 399 reason = "mtime"; 400 error = EINVAL; 401 } 402 403 if (vp->v_type == VDIR && timespeccmp(octime, ctime, <=)) { 404 reason = "ctime"; 405 error = EINVAL; 406 } 407 408 nmp = VFSTONFS(vp->v_mount); 409 if (error) { 410 411 /* 412 * despite of the fact that we've updated the file, 413 * timestamps of the file were not updated as we 414 * expected. 415 * it means that the server has incompatible 416 * semantics of timestamps or (more likely) 417 * the server time is not precise enough to 418 * track each modifications. 419 * in that case, we disable wcc processing. 420 * 421 * yes, strictly speaking, we should disable all 422 * caching. it's a compromise. 423 */ 424 425 mutex_enter(&nmp->nm_lock); 426 if (!NFS_WCCKLUDGE(nmp, now)) { 427 printf("%s: inaccurate wcc data (%s) detected," 428 " disabling wcc" 429 " (ctime %u.%09u %u.%09u," 430 " mtime %u.%09u %u.%09u)\n", 431 vp->v_mount->mnt_stat.f_mntfromname, 432 reason, 433 (unsigned int)octime->tv_sec, 434 (unsigned int)octime->tv_nsec, 435 (unsigned int)ctime->tv_sec, 436 (unsigned int)ctime->tv_nsec, 437 (unsigned int)omtime->tv_sec, 438 (unsigned int)omtime->tv_nsec, 439 (unsigned int)mtime->tv_sec, 440 (unsigned int)mtime->tv_nsec); 441 } 442 nmp->nm_iflag |= NFSMNT_WCCKLUDGE; 443 nmp->nm_wcckludgetime = now; 444 mutex_exit(&nmp->nm_lock); 445 } else if (NFS_WCCKLUDGE(nmp, now)) { 446 error = EPERM; /* XXX */ 447 } else if (nmp->nm_iflag & NFSMNT_WCCKLUDGE) { 448 mutex_enter(&nmp->nm_lock); 449 if (nmp->nm_iflag & NFSMNT_WCCKLUDGE) { 450 printf("%s: re-enabling wcc\n", 451 vp->v_mount->mnt_stat.f_mntfromname); 452 nmp->nm_iflag &= ~NFSMNT_WCCKLUDGE; 453 } 454 mutex_exit(&nmp->nm_lock); 455 } 456 } 457 458#endif /* !defined(NFS_V2_ONLY) */ 459 460 return error; 461} 462 463/* 464 * Heuristic to see if the server XDR encodes directory cookies or not. 465 * it is not supposed to, but a lot of servers may do this. Also, since 466 * most/all servers will implement V2 as well, it is expected that they 467 * may return just 32 bits worth of cookie information, so we need to 468 * find out in which 32 bits this information is available. We do this 469 * to avoid trouble with emulated binaries that can't handle 64 bit 470 * directory offsets. 471 */ 472 473void 474nfs_cookieheuristic(struct vnode *vp, int *flagp, struct lwp *l, kauth_cred_t cred) 475{ 476 struct uio auio; 477 struct iovec aiov; 478 char *tbuf, *cp; 479 struct dirent *dp; 480 off_t *cookies = NULL, *cop; 481 int error, eof, nc, len; 482 483 tbuf = malloc(NFS_DIRFRAGSIZ, M_TEMP, M_WAITOK); 484 485 aiov.iov_base = tbuf; 486 aiov.iov_len = NFS_DIRFRAGSIZ; 487 auio.uio_iov = &aiov; 488 auio.uio_iovcnt = 1; 489 auio.uio_rw = UIO_READ; 490 auio.uio_resid = NFS_DIRFRAGSIZ; 491 auio.uio_offset = 0; 492 UIO_SETUP_SYSSPACE(&auio); 493 494 error = VOP_READDIR(vp, &auio, cred, &eof, &cookies, &nc); 495 496 len = NFS_DIRFRAGSIZ - auio.uio_resid; 497 if (error || len == 0) { 498 free(tbuf, M_TEMP); 499 if (cookies) 500 free(cookies, M_TEMP); 501 return; 502 } 503 504 /* 505 * Find the first valid entry and look at its offset cookie. 506 */ 507 508 cp = tbuf; 509 for (cop = cookies; len > 0; len -= dp->d_reclen) { 510 dp = (struct dirent *)cp; 511 if (dp->d_fileno != 0 && len >= dp->d_reclen) { 512 if ((*cop >> 32) != 0 && (*cop & 0xffffffffLL) == 0) { 513 *flagp |= NFSMNT_SWAPCOOKIE; 514 nfs_invaldircache(vp, 0); 515 nfs_vinvalbuf(vp, 0, cred, l, 1); 516 } 517 break; 518 } 519 cop++; 520 cp += dp->d_reclen; 521 } 522 523 free(tbuf, M_TEMP); 524 free(cookies, M_TEMP); 525} 526 527/* 528 * Set the attribute timeout based on how recently the file has been modified. 529 */ 530 531time_t 532nfs_attrtimeo(struct nfsmount *nmp, struct nfsnode *np) 533{ 534 time_t timeo; 535 536 if ((nmp->nm_flag & NFSMNT_NOAC) != 0) 537 return 0; 538 539 if (((np)->n_flag & NMODIFIED) != 0) 540 return NFS_MINATTRTIMO; 541 542 timeo = (time_second - np->n_mtime.tv_sec) / 10; 543 timeo = max(timeo, NFS_MINATTRTIMO); 544 timeo = min(timeo, NFS_MAXATTRTIMO); 545 return timeo; 546} 547