1/*- 2 * Copyright (c) 1989, 1993 3 * The Regents of the University of California. All rights reserved. 4 * 5 * This code is derived from software contributed to Berkeley by 6 * Rick Macklem at The University of Guelph. 7 * 8 * Redistribution and use in source and binary forms, with or without 9 * modification, are permitted provided that the following conditions 10 * are met: 11 * 1. Redistributions of source code must retain the above copyright 12 * notice, this list of conditions and the following disclaimer. 13 * 2. Redistributions in binary form must reproduce the above copyright 14 * notice, this list of conditions and the following disclaimer in the 15 * documentation and/or other materials provided with the distribution. 16 * 4. Neither the name of the University nor the names of its contributors 17 * may be used to endorse or promote products derived from this software 18 * without specific prior written permission. 19 * 20 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 21 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 22 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 23 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 24 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 25 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 26 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 27 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 28 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 29 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 30 * SUCH DAMAGE. 31 * 32 * @(#)nfs_subs.c 8.8 (Berkeley) 5/22/95 33 */ 34 35#include <sys/cdefs.h> 36__FBSDID("$FreeBSD$"); 37 38/* 39 * These functions support the macros and help fiddle mbuf chains for 40 * the nfs op functions. They do things like create the rpc header and 41 * copy data between mbuf chains and uio lists. 42 */ 43 44#include "opt_kdtrace.h" 45 46#include <sys/param.h> 47#include <sys/systm.h> 48#include <sys/kernel.h> 49#include <sys/bio.h> 50#include <sys/buf.h> 51#include <sys/proc.h> 52#include <sys/mount.h> 53#include <sys/vnode.h> 54#include <sys/namei.h> 55#include <sys/mbuf.h> 56#include <sys/socket.h> 57#include <sys/stat.h> 58#include <sys/malloc.h> 59#include <sys/sysent.h> 60#include <sys/syscall.h> 61#include <sys/sysproto.h> 62#include <sys/taskqueue.h> 63 64#include <vm/vm.h> 65#include <vm/vm_object.h> 66#include <vm/vm_extern.h> 67#include <vm/uma.h> 68 69#include <nfs/nfsproto.h> 70#include <nfsclient/nfs.h> 71#include <nfsclient/nfsnode.h> 72#include <nfs/nfs_kdtrace.h> 73#include <nfs/xdr_subs.h> 74#include <nfsclient/nfsm_subs.h> 75#include <nfsclient/nfsmount.h> 76 77#include <netinet/in.h> 78 79/* 80 * Note that stdarg.h and the ANSI style va_start macro is used for both 81 * ANSI and traditional C compilers. 82 */ 83#include <machine/stdarg.h> 84 85#ifdef KDTRACE_HOOKS 86dtrace_nfsclient_attrcache_flush_probe_func_t 87 dtrace_nfsclient_attrcache_flush_done_probe; 88uint32_t nfsclient_attrcache_flush_done_id; 89 90dtrace_nfsclient_attrcache_get_hit_probe_func_t 91 dtrace_nfsclient_attrcache_get_hit_probe; 92uint32_t nfsclient_attrcache_get_hit_id; 93 94dtrace_nfsclient_attrcache_get_miss_probe_func_t 95 dtrace_nfsclient_attrcache_get_miss_probe; 96uint32_t nfsclient_attrcache_get_miss_id; 97 98dtrace_nfsclient_attrcache_load_probe_func_t 99 dtrace_nfsclient_attrcache_load_done_probe; 100uint32_t nfsclient_attrcache_load_done_id; 101#endif /* !KDTRACE_HOOKS */ 102 103/* 104 * Data items converted to xdr at startup, since they are constant 105 * This is kinda hokey, but may save a little time doing byte swaps 106 */ 107u_int32_t nfs_xdrneg1; 108u_int32_t nfs_true, nfs_false; 109 110/* And other global data */ 111static u_int32_t nfs_xid = 0; 112static enum vtype nv2tov_type[8]= { 113 VNON, VREG, VDIR, VBLK, VCHR, VLNK, VNON, VNON 114}; 115 116int nfs_ticks; 117int nfs_pbuf_freecnt = -1; /* start out unlimited */ 118 119struct nfs_bufq nfs_bufq; 120static struct mtx nfs_xid_mtx; 121struct task nfs_nfsiodnew_task; 122 123/* 124 * and the reverse mapping from generic to Version 2 procedure numbers 125 */ 126int nfsv2_procid[NFS_NPROCS] = { 127 NFSV2PROC_NULL, 128 NFSV2PROC_GETATTR, 129 NFSV2PROC_SETATTR, 130 NFSV2PROC_LOOKUP, 131 NFSV2PROC_NOOP, 132 NFSV2PROC_READLINK, 133 NFSV2PROC_READ, 134 NFSV2PROC_WRITE, 135 NFSV2PROC_CREATE, 136 NFSV2PROC_MKDIR, 137 NFSV2PROC_SYMLINK, 138 NFSV2PROC_CREATE, 139 NFSV2PROC_REMOVE, 140 NFSV2PROC_RMDIR, 141 NFSV2PROC_RENAME, 142 NFSV2PROC_LINK, 143 NFSV2PROC_READDIR, 144 NFSV2PROC_NOOP, 145 NFSV2PROC_STATFS, 146 NFSV2PROC_NOOP, 147 NFSV2PROC_NOOP, 148 NFSV2PROC_NOOP, 149 NFSV2PROC_NOOP, 150}; 151 152LIST_HEAD(nfsnodehashhead, nfsnode); 153 154u_int32_t 155nfs_xid_gen(void) 156{ 157 uint32_t xid; 158 159 mtx_lock(&nfs_xid_mtx); 160 161 /* Get a pretty random xid to start with */ 162 if (!nfs_xid) 163 nfs_xid = random(); 164 /* 165 * Skip zero xid if it should ever happen. 166 */ 167 if (++nfs_xid == 0) 168 nfs_xid++; 169 xid = nfs_xid; 170 mtx_unlock(&nfs_xid_mtx); 171 return xid; 172} 173 174/* 175 * Create the header for an rpc request packet 176 * The hsiz is the size of the rest of the nfs request header. 177 * (just used to decide if a cluster is a good idea) 178 */ 179struct mbuf * 180nfsm_reqhead(struct vnode *vp, u_long procid, int hsiz) 181{ 182 struct mbuf *mb; 183 184 MGET(mb, M_WAIT, MT_DATA); 185 if (hsiz >= MINCLSIZE) 186 MCLGET(mb, M_WAIT); 187 mb->m_len = 0; 188 return (mb); 189} 190 191/* 192 * copies a uio scatter/gather list to an mbuf chain. 193 * NOTE: can ony handle iovcnt == 1 194 */ 195int 196nfsm_uiotombuf(struct uio *uiop, struct mbuf **mq, int siz, caddr_t *bpos) 197{ 198 char *uiocp; 199 struct mbuf *mp, *mp2; 200 int xfer, left, mlen; 201 int uiosiz, clflg, rem; 202 char *cp; 203 204 KASSERT(uiop->uio_iovcnt == 1, ("nfsm_uiotombuf: iovcnt != 1")); 205 206 if (siz > MLEN) /* or should it >= MCLBYTES ?? */ 207 clflg = 1; 208 else 209 clflg = 0; 210 rem = nfsm_rndup(siz)-siz; 211 mp = mp2 = *mq; 212 while (siz > 0) { 213 left = uiop->uio_iov->iov_len; 214 uiocp = uiop->uio_iov->iov_base; 215 if (left > siz) 216 left = siz; 217 uiosiz = left; 218 while (left > 0) { 219 mlen = M_TRAILINGSPACE(mp); 220 if (mlen == 0) { 221 MGET(mp, M_WAIT, MT_DATA); 222 if (clflg) 223 MCLGET(mp, M_WAIT); 224 mp->m_len = 0; 225 mp2->m_next = mp; 226 mp2 = mp; 227 mlen = M_TRAILINGSPACE(mp); 228 } 229 xfer = (left > mlen) ? mlen : left; 230#ifdef notdef 231 /* Not Yet.. */ 232 if (uiop->uio_iov->iov_op != NULL) 233 (*(uiop->uio_iov->iov_op)) 234 (uiocp, mtod(mp, caddr_t)+mp->m_len, xfer); 235 else 236#endif 237 if (uiop->uio_segflg == UIO_SYSSPACE) 238 bcopy(uiocp, mtod(mp, caddr_t)+mp->m_len, xfer); 239 else 240 copyin(uiocp, mtod(mp, caddr_t)+mp->m_len, xfer); 241 mp->m_len += xfer; 242 left -= xfer; 243 uiocp += xfer; 244 uiop->uio_offset += xfer; 245 uiop->uio_resid -= xfer; 246 } 247 uiop->uio_iov->iov_base = 248 (char *)uiop->uio_iov->iov_base + uiosiz; 249 uiop->uio_iov->iov_len -= uiosiz; 250 siz -= uiosiz; 251 } 252 if (rem > 0) { 253 if (rem > M_TRAILINGSPACE(mp)) { 254 MGET(mp, M_WAIT, MT_DATA); 255 mp->m_len = 0; 256 mp2->m_next = mp; 257 } 258 cp = mtod(mp, caddr_t)+mp->m_len; 259 for (left = 0; left < rem; left++) 260 *cp++ = '\0'; 261 mp->m_len += rem; 262 *bpos = cp; 263 } else 264 *bpos = mtod(mp, caddr_t)+mp->m_len; 265 *mq = mp; 266 return (0); 267} 268 269/* 270 * Copy a string into mbufs for the hard cases... 271 */ 272int 273nfsm_strtmbuf(struct mbuf **mb, char **bpos, const char *cp, long siz) 274{ 275 struct mbuf *m1 = NULL, *m2; 276 long left, xfer, len, tlen; 277 u_int32_t *tl; 278 int putsize; 279 280 putsize = 1; 281 m2 = *mb; 282 left = M_TRAILINGSPACE(m2); 283 if (left > 0) { 284 tl = ((u_int32_t *)(*bpos)); 285 *tl++ = txdr_unsigned(siz); 286 putsize = 0; 287 left -= NFSX_UNSIGNED; 288 m2->m_len += NFSX_UNSIGNED; 289 if (left > 0) { 290 bcopy(cp, (caddr_t) tl, left); 291 siz -= left; 292 cp += left; 293 m2->m_len += left; 294 left = 0; 295 } 296 } 297 /* Loop around adding mbufs */ 298 while (siz > 0) { 299 MGET(m1, M_WAIT, MT_DATA); 300 if (siz > MLEN) 301 MCLGET(m1, M_WAIT); 302 m1->m_len = NFSMSIZ(m1); 303 m2->m_next = m1; 304 m2 = m1; 305 tl = mtod(m1, u_int32_t *); 306 tlen = 0; 307 if (putsize) { 308 *tl++ = txdr_unsigned(siz); 309 m1->m_len -= NFSX_UNSIGNED; 310 tlen = NFSX_UNSIGNED; 311 putsize = 0; 312 } 313 if (siz < m1->m_len) { 314 len = nfsm_rndup(siz); 315 xfer = siz; 316 if (xfer < len) 317 *(tl+(xfer>>2)) = 0; 318 } else { 319 xfer = len = m1->m_len; 320 } 321 bcopy(cp, (caddr_t) tl, xfer); 322 m1->m_len = len+tlen; 323 siz -= xfer; 324 cp += xfer; 325 } 326 *mb = m1; 327 *bpos = mtod(m1, caddr_t)+m1->m_len; 328 return (0); 329} 330 331/* 332 * Called once to initialize data structures... 333 */ 334int 335nfs_init(struct vfsconf *vfsp) 336{ 337 int i; 338 339 nfsmount_zone = uma_zcreate("NFSMOUNT", sizeof(struct nfsmount), 340 NULL, NULL, NULL, NULL, UMA_ALIGN_PTR, 0); 341 nfs_true = txdr_unsigned(TRUE); 342 nfs_false = txdr_unsigned(FALSE); 343 nfs_xdrneg1 = txdr_unsigned(-1); 344 nfs_ticks = (hz * NFS_TICKINTVL + 500) / 1000; 345 if (nfs_ticks < 1) 346 nfs_ticks = 1; 347 /* Ensure async daemons disabled */ 348 for (i = 0; i < NFS_MAXASYNCDAEMON; i++) { 349 nfs_iodwant[i] = NFSIOD_NOT_AVAILABLE; 350 nfs_iodmount[i] = NULL; 351 } 352 nfs_nhinit(); /* Init the nfsnode table */ 353 354 /* 355 * Initialize reply list and start timer 356 */ 357 mtx_init(&nfs_iod_mtx, "NFS iod lock", NULL, MTX_DEF); 358 mtx_init(&nfs_xid_mtx, "NFS xid lock", NULL, MTX_DEF); 359 TASK_INIT(&nfs_nfsiodnew_task, 0, nfs_nfsiodnew_tq, NULL); 360 361 nfs_pbuf_freecnt = nswbuf / 2 + 1; 362 363 return (0); 364} 365 366int 367nfs_uninit(struct vfsconf *vfsp) 368{ 369 int i; 370 371 /* 372 * Tell all nfsiod processes to exit. Clear nfs_iodmax, and wakeup 373 * any sleeping nfsiods so they check nfs_iodmax and exit. 374 * Drain nfsiodnew task before we wait for them to finish. 375 */ 376 mtx_lock(&nfs_iod_mtx); 377 nfs_iodmax = 0; 378 mtx_unlock(&nfs_iod_mtx); 379 taskqueue_drain(taskqueue_thread, &nfs_nfsiodnew_task); 380 mtx_lock(&nfs_iod_mtx); 381 for (i = 0; i < nfs_numasync; i++) 382 if (nfs_iodwant[i] == NFSIOD_AVAILABLE) 383 wakeup(&nfs_iodwant[i]); 384 /* The last nfsiod to exit will wake us up when nfs_numasync hits 0 */ 385 while (nfs_numasync) 386 msleep(&nfs_numasync, &nfs_iod_mtx, PWAIT, "ioddie", 0); 387 mtx_unlock(&nfs_iod_mtx); 388 nfs_nhuninit(); 389 uma_zdestroy(nfsmount_zone); 390 return (0); 391} 392 393void 394nfs_dircookie_lock(struct nfsnode *np) 395{ 396 mtx_lock(&np->n_mtx); 397 while (np->n_flag & NDIRCOOKIELK) 398 (void) msleep(&np->n_flag, &np->n_mtx, PZERO, "nfsdirlk", 0); 399 np->n_flag |= NDIRCOOKIELK; 400 mtx_unlock(&np->n_mtx); 401} 402 403void 404nfs_dircookie_unlock(struct nfsnode *np) 405{ 406 mtx_lock(&np->n_mtx); 407 np->n_flag &= ~NDIRCOOKIELK; 408 wakeup(&np->n_flag); 409 mtx_unlock(&np->n_mtx); 410} 411 412int 413nfs_upgrade_vnlock(struct vnode *vp) 414{ 415 int old_lock; 416 417 ASSERT_VOP_LOCKED(vp, "nfs_upgrade_vnlock"); 418 old_lock = VOP_ISLOCKED(vp); 419 if (old_lock != LK_EXCLUSIVE) { 420 KASSERT(old_lock == LK_SHARED, 421 ("nfs_upgrade_vnlock: wrong old_lock %d", old_lock)); 422 /* Upgrade to exclusive lock, this might block */ 423 vn_lock(vp, LK_UPGRADE | LK_RETRY); 424 } 425 return (old_lock); 426} 427 428void 429nfs_downgrade_vnlock(struct vnode *vp, int old_lock) 430{ 431 if (old_lock != LK_EXCLUSIVE) { 432 KASSERT(old_lock == LK_SHARED, ("wrong old_lock %d", old_lock)); 433 /* Downgrade from exclusive lock. */ 434 vn_lock(vp, LK_DOWNGRADE | LK_RETRY); 435 } 436} 437 438void 439nfs_printf(const char *fmt, ...) 440{ 441 va_list ap; 442 443 mtx_lock(&Giant); 444 va_start(ap, fmt); 445 vprintf(fmt, ap); 446 va_end(ap); 447 mtx_unlock(&Giant); 448} 449 450/* 451 * Attribute cache routines. 452 * nfs_loadattrcache() - loads or updates the cache contents from attributes 453 * that are on the mbuf list 454 * nfs_getattrcache() - returns valid attributes if found in cache, returns 455 * error otherwise 456 */ 457 458/* 459 * Load the attribute cache (that lives in the nfsnode entry) with 460 * the values on the mbuf list and 461 * Iff vap not NULL 462 * copy the attributes to *vaper 463 */ 464int 465nfs_loadattrcache(struct vnode **vpp, struct mbuf **mdp, caddr_t *dposp, 466 struct vattr *vaper, int dontshrink) 467{ 468 struct vnode *vp = *vpp; 469 struct vattr *vap; 470 struct nfs_fattr *fp; 471 struct nfsnode *np = NULL; 472 int32_t t1; 473 caddr_t cp2; 474 int rdev; 475 struct mbuf *md; 476 enum vtype vtyp; 477 u_short vmode; 478 struct timespec mtime, mtime_save; 479 int v3 = NFS_ISV3(vp); 480 int error = 0; 481 u_quad_t nsize; 482 int setnsize; 483 484 md = *mdp; 485 t1 = (mtod(md, caddr_t) + md->m_len) - *dposp; 486 cp2 = nfsm_disct(mdp, dposp, NFSX_FATTR(v3), t1, M_WAIT); 487 if (cp2 == NULL) { 488 error = EBADRPC; 489 goto out; 490 } 491 fp = (struct nfs_fattr *)cp2; 492 if (v3) { 493 vtyp = nfsv3tov_type(fp->fa_type); 494 vmode = fxdr_unsigned(u_short, fp->fa_mode); 495 rdev = makedev(fxdr_unsigned(int, fp->fa3_rdev.specdata1), 496 fxdr_unsigned(int, fp->fa3_rdev.specdata2)); 497 fxdr_nfsv3time(&fp->fa3_mtime, &mtime); 498 } else { 499 vtyp = nfsv2tov_type(fp->fa_type); 500 vmode = fxdr_unsigned(u_short, fp->fa_mode); 501 /* 502 * XXX 503 * 504 * The duplicate information returned in fa_type and fa_mode 505 * is an ambiguity in the NFS version 2 protocol. 506 * 507 * VREG should be taken literally as a regular file. If a 508 * server intents to return some type information differently 509 * in the upper bits of the mode field (e.g. for sockets, or 510 * FIFOs), NFSv2 mandates fa_type to be VNON. Anyway, we 511 * leave the examination of the mode bits even in the VREG 512 * case to avoid breakage for bogus servers, but we make sure 513 * that there are actually type bits set in the upper part of 514 * fa_mode (and failing that, trust the va_type field). 515 * 516 * NFSv3 cleared the issue, and requires fa_mode to not 517 * contain any type information (while also introduing sockets 518 * and FIFOs for fa_type). 519 */ 520 if (vtyp == VNON || (vtyp == VREG && (vmode & S_IFMT) != 0)) 521 vtyp = IFTOVT(vmode); 522 rdev = fxdr_unsigned(int32_t, fp->fa2_rdev); 523 fxdr_nfsv2time(&fp->fa2_mtime, &mtime); 524 525 /* 526 * Really ugly NFSv2 kludge. 527 */ 528 if (vtyp == VCHR && rdev == 0xffffffff) 529 vtyp = VFIFO; 530 } 531 532 /* 533 * If v_type == VNON it is a new node, so fill in the v_type, 534 * n_mtime fields. Check to see if it represents a special 535 * device, and if so, check for a possible alias. Once the 536 * correct vnode has been obtained, fill in the rest of the 537 * information. 538 */ 539 np = VTONFS(vp); 540 mtx_lock(&np->n_mtx); 541 if (vp->v_type != vtyp) { 542 vp->v_type = vtyp; 543 if (vp->v_type == VFIFO) 544 vp->v_op = &nfs_fifoops; 545 np->n_mtime = mtime; 546 } 547 vap = &np->n_vattr; 548 vap->va_type = vtyp; 549 vap->va_mode = (vmode & 07777); 550 vap->va_rdev = rdev; 551 mtime_save = vap->va_mtime; 552 vap->va_mtime = mtime; 553 vap->va_fsid = vp->v_mount->mnt_stat.f_fsid.val[0]; 554 if (v3) { 555 vap->va_nlink = fxdr_unsigned(u_short, fp->fa_nlink); 556 vap->va_uid = fxdr_unsigned(uid_t, fp->fa_uid); 557 vap->va_gid = fxdr_unsigned(gid_t, fp->fa_gid); 558 vap->va_size = fxdr_hyper(&fp->fa3_size); 559 vap->va_blocksize = NFS_FABLKSIZE; 560 vap->va_bytes = fxdr_hyper(&fp->fa3_used); 561 vap->va_fileid = fxdr_unsigned(int32_t, 562 fp->fa3_fileid.nfsuquad[1]); 563 fxdr_nfsv3time(&fp->fa3_atime, &vap->va_atime); 564 fxdr_nfsv3time(&fp->fa3_ctime, &vap->va_ctime); 565 vap->va_flags = 0; 566 vap->va_filerev = 0; 567 } else { 568 vap->va_nlink = fxdr_unsigned(u_short, fp->fa_nlink); 569 vap->va_uid = fxdr_unsigned(uid_t, fp->fa_uid); 570 vap->va_gid = fxdr_unsigned(gid_t, fp->fa_gid); 571 vap->va_size = fxdr_unsigned(u_int32_t, fp->fa2_size); 572 vap->va_blocksize = fxdr_unsigned(int32_t, fp->fa2_blocksize); 573 vap->va_bytes = (u_quad_t)fxdr_unsigned(int32_t, fp->fa2_blocks) 574 * NFS_FABLKSIZE; 575 vap->va_fileid = fxdr_unsigned(int32_t, fp->fa2_fileid); 576 fxdr_nfsv2time(&fp->fa2_atime, &vap->va_atime); 577 vap->va_flags = 0; 578 vap->va_ctime.tv_sec = fxdr_unsigned(u_int32_t, 579 fp->fa2_ctime.nfsv2_sec); 580 vap->va_ctime.tv_nsec = 0; 581 vap->va_gen = fxdr_unsigned(u_int32_t, fp->fa2_ctime.nfsv2_usec); 582 vap->va_filerev = 0; 583 } 584 np->n_attrstamp = time_second; 585 setnsize = 0; 586 nsize = 0; 587 if (vap->va_size != np->n_size) { 588 if (vap->va_type == VREG) { 589 if (dontshrink && vap->va_size < np->n_size) { 590 /* 591 * We've been told not to shrink the file; 592 * zero np->n_attrstamp to indicate that 593 * the attributes are stale. 594 */ 595 vap->va_size = np->n_size; 596 np->n_attrstamp = 0; 597 KDTRACE_NFS_ATTRCACHE_FLUSH_DONE(vp); 598 vnode_pager_setsize(vp, np->n_size); 599 } else if (np->n_flag & NMODIFIED) { 600 /* 601 * We've modified the file: Use the larger 602 * of our size, and the server's size. 603 */ 604 if (vap->va_size < np->n_size) { 605 vap->va_size = np->n_size; 606 } else { 607 np->n_size = vap->va_size; 608 np->n_flag |= NSIZECHANGED; 609 } 610 vnode_pager_setsize(vp, np->n_size); 611 } else if (vap->va_size < np->n_size) { 612 /* 613 * When shrinking the size, the call to 614 * vnode_pager_setsize() cannot be done 615 * with the mutex held, so delay it until 616 * after the mtx_unlock call. 617 */ 618 nsize = np->n_size = vap->va_size; 619 np->n_flag |= NSIZECHANGED; 620 setnsize = 1; 621 } else { 622 np->n_size = vap->va_size; 623 np->n_flag |= NSIZECHANGED; 624 vnode_pager_setsize(vp, np->n_size); 625 } 626 } else { 627 np->n_size = vap->va_size; 628 } 629 } 630 /* 631 * The following checks are added to prevent a race between (say) 632 * a READDIR+ and a WRITE. 633 * READDIR+, WRITE requests sent out. 634 * READDIR+ resp, WRITE resp received on client. 635 * However, the WRITE resp was handled before the READDIR+ resp 636 * causing the post op attrs from the write to be loaded first 637 * and the attrs from the READDIR+ to be loaded later. If this 638 * happens, we have stale attrs loaded into the attrcache. 639 * We detect this by for the mtime moving back. We invalidate the 640 * attrcache when this happens. 641 */ 642 if (timespeccmp(&mtime_save, &vap->va_mtime, >)) { 643 /* Size changed or mtime went backwards */ 644 np->n_attrstamp = 0; 645 KDTRACE_NFS_ATTRCACHE_FLUSH_DONE(vp); 646 } 647 if (vaper != NULL) { 648 bcopy((caddr_t)vap, (caddr_t)vaper, sizeof(*vap)); 649 if (np->n_flag & NCHG) { 650 if (np->n_flag & NACC) 651 vaper->va_atime = np->n_atim; 652 if (np->n_flag & NUPD) 653 vaper->va_mtime = np->n_mtim; 654 } 655 } 656 657#ifdef KDTRACE_HOOKS 658 if (np->n_attrstamp != 0) 659 KDTRACE_NFS_ATTRCACHE_LOAD_DONE(vp, &np->n_vattr, 0); 660#endif 661 mtx_unlock(&np->n_mtx); 662 if (setnsize) 663 vnode_pager_setsize(vp, nsize); 664out: 665#ifdef KDTRACE_HOOKS 666 if (error) 667 KDTRACE_NFS_ATTRCACHE_LOAD_DONE(vp, NULL, error); 668#endif 669 return (error); 670} 671 672#ifdef NFS_ACDEBUG 673#include <sys/sysctl.h> 674SYSCTL_DECL(_vfs_oldnfs); 675static int nfs_acdebug; 676SYSCTL_INT(_vfs_oldnfs, OID_AUTO, acdebug, CTLFLAG_RW, &nfs_acdebug, 0, 677 "Toggle acdebug (attribute cache debug) flag"); 678#endif 679 680/* 681 * Check the time stamp 682 * If the cache is valid, copy contents to *vap and return 0 683 * otherwise return an error 684 */ 685int 686nfs_getattrcache(struct vnode *vp, struct vattr *vaper) 687{ 688 struct nfsnode *np; 689 struct vattr *vap; 690 struct nfsmount *nmp; 691 int timeo; 692 693 np = VTONFS(vp); 694 vap = &np->n_vattr; 695 nmp = VFSTONFS(vp->v_mount); 696#ifdef NFS_ACDEBUG 697 mtx_lock(&Giant); /* nfs_printf() */ 698#endif 699 mtx_lock(&np->n_mtx); 700 /* XXX n_mtime doesn't seem to be updated on a miss-and-reload */ 701 timeo = (time_second - np->n_mtime.tv_sec) / 10; 702 703#ifdef NFS_ACDEBUG 704 if (nfs_acdebug>1) 705 nfs_printf("nfs_getattrcache: initial timeo = %d\n", timeo); 706#endif 707 708 if (vap->va_type == VDIR) { 709 if ((np->n_flag & NMODIFIED) || timeo < nmp->nm_acdirmin) 710 timeo = nmp->nm_acdirmin; 711 else if (timeo > nmp->nm_acdirmax) 712 timeo = nmp->nm_acdirmax; 713 } else { 714 if ((np->n_flag & NMODIFIED) || timeo < nmp->nm_acregmin) 715 timeo = nmp->nm_acregmin; 716 else if (timeo > nmp->nm_acregmax) 717 timeo = nmp->nm_acregmax; 718 } 719 720#ifdef NFS_ACDEBUG 721 if (nfs_acdebug > 2) 722 nfs_printf("acregmin %d; acregmax %d; acdirmin %d; acdirmax %d\n", 723 nmp->nm_acregmin, nmp->nm_acregmax, 724 nmp->nm_acdirmin, nmp->nm_acdirmax); 725 726 if (nfs_acdebug) 727 nfs_printf("nfs_getattrcache: age = %d; final timeo = %d\n", 728 (time_second - np->n_attrstamp), timeo); 729#endif 730 731 if ((time_second - np->n_attrstamp) >= timeo) { 732 nfsstats.attrcache_misses++; 733 mtx_unlock(&np->n_mtx); 734#ifdef NFS_ACDEBUG 735 mtx_unlock(&Giant); /* nfs_printf() */ 736#endif 737 KDTRACE_NFS_ATTRCACHE_GET_MISS(vp); 738 return (ENOENT); 739 } 740 nfsstats.attrcache_hits++; 741 if (vap->va_size != np->n_size) { 742 if (vap->va_type == VREG) { 743 if (np->n_flag & NMODIFIED) { 744 if (vap->va_size < np->n_size) 745 vap->va_size = np->n_size; 746 else 747 np->n_size = vap->va_size; 748 } else { 749 np->n_size = vap->va_size; 750 } 751 vnode_pager_setsize(vp, np->n_size); 752 } else { 753 np->n_size = vap->va_size; 754 } 755 } 756 bcopy((caddr_t)vap, (caddr_t)vaper, sizeof(struct vattr)); 757 if (np->n_flag & NCHG) { 758 if (np->n_flag & NACC) 759 vaper->va_atime = np->n_atim; 760 if (np->n_flag & NUPD) 761 vaper->va_mtime = np->n_mtim; 762 } 763 mtx_unlock(&np->n_mtx); 764#ifdef NFS_ACDEBUG 765 mtx_unlock(&Giant); /* nfs_printf() */ 766#endif 767 KDTRACE_NFS_ATTRCACHE_GET_HIT(vp, vap); 768 return (0); 769} 770 771/* 772 * Purge all cached information about an NFS vnode including name 773 * cache entries, the attribute cache, and the access cache. This is 774 * called when an NFS request for a node fails with a stale 775 * filehandle. 776 */ 777void 778nfs_purgecache(struct vnode *vp) 779{ 780 struct nfsnode *np; 781 int i; 782 783 np = VTONFS(vp); 784 cache_purge(vp); 785 mtx_lock(&np->n_mtx); 786 np->n_attrstamp = 0; 787 KDTRACE_NFS_ATTRCACHE_FLUSH_DONE(vp); 788 for (i = 0; i < NFS_ACCESSCACHESIZE; i++) 789 np->n_accesscache[i].stamp = 0; 790 KDTRACE_NFS_ACCESSCACHE_FLUSH_DONE(vp); 791 mtx_unlock(&np->n_mtx); 792} 793 794static nfsuint64 nfs_nullcookie = { { 0, 0 } }; 795/* 796 * This function finds the directory cookie that corresponds to the 797 * logical byte offset given. 798 */ 799nfsuint64 * 800nfs_getcookie(struct nfsnode *np, off_t off, int add) 801{ 802 struct nfsdmap *dp, *dp2; 803 int pos; 804 nfsuint64 *retval = NULL; 805 806 pos = (uoff_t)off / NFS_DIRBLKSIZ; 807 if (pos == 0 || off < 0) { 808 KASSERT(!add, ("nfs getcookie add at <= 0")); 809 return (&nfs_nullcookie); 810 } 811 pos--; 812 dp = LIST_FIRST(&np->n_cookies); 813 if (!dp) { 814 if (add) { 815 dp = malloc(sizeof (struct nfsdmap), 816 M_NFSDIROFF, M_WAITOK); 817 dp->ndm_eocookie = 0; 818 LIST_INSERT_HEAD(&np->n_cookies, dp, ndm_list); 819 } else 820 goto out; 821 } 822 while (pos >= NFSNUMCOOKIES) { 823 pos -= NFSNUMCOOKIES; 824 if (LIST_NEXT(dp, ndm_list)) { 825 if (!add && dp->ndm_eocookie < NFSNUMCOOKIES && 826 pos >= dp->ndm_eocookie) 827 goto out; 828 dp = LIST_NEXT(dp, ndm_list); 829 } else if (add) { 830 dp2 = malloc(sizeof (struct nfsdmap), 831 M_NFSDIROFF, M_WAITOK); 832 dp2->ndm_eocookie = 0; 833 LIST_INSERT_AFTER(dp, dp2, ndm_list); 834 dp = dp2; 835 } else 836 goto out; 837 } 838 if (pos >= dp->ndm_eocookie) { 839 if (add) 840 dp->ndm_eocookie = pos + 1; 841 else 842 goto out; 843 } 844 retval = &dp->ndm_cookies[pos]; 845out: 846 return (retval); 847} 848 849/* 850 * Invalidate cached directory information, except for the actual directory 851 * blocks (which are invalidated separately). 852 * Done mainly to avoid the use of stale offset cookies. 853 */ 854void 855nfs_invaldir(struct vnode *vp) 856{ 857 struct nfsnode *np = VTONFS(vp); 858 859 KASSERT(vp->v_type == VDIR, ("nfs: invaldir not dir")); 860 nfs_dircookie_lock(np); 861 np->n_direofoffset = 0; 862 np->n_cookieverf.nfsuquad[0] = 0; 863 np->n_cookieverf.nfsuquad[1] = 0; 864 if (LIST_FIRST(&np->n_cookies)) 865 LIST_FIRST(&np->n_cookies)->ndm_eocookie = 0; 866 nfs_dircookie_unlock(np); 867} 868 869/* 870 * The write verifier has changed (probably due to a server reboot), so all 871 * B_NEEDCOMMIT blocks will have to be written again. Since they are on the 872 * dirty block list as B_DELWRI, all this takes is clearing the B_NEEDCOMMIT 873 * and B_CLUSTEROK flags. Once done the new write verifier can be set for the 874 * mount point. 875 * 876 * B_CLUSTEROK must be cleared along with B_NEEDCOMMIT because stage 1 data 877 * writes are not clusterable. 878 */ 879void 880nfs_clearcommit(struct mount *mp) 881{ 882 struct vnode *vp, *nvp; 883 struct buf *bp, *nbp; 884 struct bufobj *bo; 885 886 MNT_VNODE_FOREACH_ALL(vp, mp, nvp) { 887 bo = &vp->v_bufobj; 888 vholdl(vp); 889 VI_UNLOCK(vp); 890 BO_LOCK(bo); 891 TAILQ_FOREACH_SAFE(bp, &bo->bo_dirty.bv_hd, b_bobufs, nbp) { 892 if (!BUF_ISLOCKED(bp) && 893 (bp->b_flags & (B_DELWRI | B_NEEDCOMMIT)) 894 == (B_DELWRI | B_NEEDCOMMIT)) 895 bp->b_flags &= ~(B_NEEDCOMMIT | B_CLUSTEROK); 896 } 897 BO_UNLOCK(bo); 898 vdrop(vp); 899 } 900} 901 902/* 903 * Helper functions for former macros. Some of these should be 904 * moved to their callers. 905 */ 906 907int 908nfsm_mtofh_xx(struct vnode *d, struct vnode **v, int v3, int *f, 909 struct mbuf **md, caddr_t *dpos) 910{ 911 struct nfsnode *ttnp; 912 struct vnode *ttvp; 913 nfsfh_t *ttfhp; 914 u_int32_t *tl; 915 int ttfhsize; 916 int t1; 917 918 if (v3) { 919 tl = nfsm_dissect_xx(NFSX_UNSIGNED, md, dpos); 920 if (tl == NULL) 921 return EBADRPC; 922 *f = fxdr_unsigned(int, *tl); 923 } else 924 *f = 1; 925 if (*f) { 926 t1 = nfsm_getfh_xx(&ttfhp, &ttfhsize, (v3), md, dpos); 927 if (t1 != 0) 928 return t1; 929 t1 = nfs_nget(d->v_mount, ttfhp, ttfhsize, &ttnp, LK_EXCLUSIVE); 930 if (t1 != 0) 931 return t1; 932 *v = NFSTOV(ttnp); 933 } 934 if (v3) { 935 tl = nfsm_dissect_xx(NFSX_UNSIGNED, md, dpos); 936 if (tl == NULL) 937 return EBADRPC; 938 if (*f) 939 *f = fxdr_unsigned(int, *tl); 940 else if (fxdr_unsigned(int, *tl)) 941 nfsm_adv_xx(NFSX_V3FATTR, md, dpos); 942 } 943 if (*f) { 944 ttvp = *v; 945 t1 = nfs_loadattrcache(&ttvp, md, dpos, NULL, 0); 946 if (t1) 947 return t1; 948 *v = ttvp; 949 } 950 return 0; 951} 952 953int 954nfsm_getfh_xx(nfsfh_t **f, int *s, int v3, struct mbuf **md, caddr_t *dpos) 955{ 956 u_int32_t *tl; 957 958 if (v3) { 959 tl = nfsm_dissect_xx(NFSX_UNSIGNED, md, dpos); 960 if (tl == NULL) 961 return EBADRPC; 962 *s = fxdr_unsigned(int, *tl); 963 if (*s <= 0 || *s > NFSX_V3FHMAX) 964 return EBADRPC; 965 } else 966 *s = NFSX_V2FH; 967 *f = nfsm_dissect_xx(nfsm_rndup(*s), md, dpos); 968 if (*f == NULL) 969 return EBADRPC; 970 else 971 return 0; 972} 973 974 975int 976nfsm_loadattr_xx(struct vnode **v, struct vattr *va, struct mbuf **md, 977 caddr_t *dpos) 978{ 979 int t1; 980 981 struct vnode *ttvp = *v; 982 t1 = nfs_loadattrcache(&ttvp, md, dpos, va, 0); 983 if (t1 != 0) 984 return t1; 985 *v = ttvp; 986 return 0; 987} 988 989int 990nfsm_postop_attr_xx(struct vnode **v, int *f, struct vattr *va, 991 struct mbuf **md, caddr_t *dpos) 992{ 993 u_int32_t *tl; 994 int t1; 995 996 struct vnode *ttvp = *v; 997 tl = nfsm_dissect_xx(NFSX_UNSIGNED, md, dpos); 998 if (tl == NULL) 999 return EBADRPC; 1000 *f = fxdr_unsigned(int, *tl); 1001 if (*f != 0) { 1002 t1 = nfs_loadattrcache(&ttvp, md, dpos, va, 1); 1003 if (t1 != 0) { 1004 *f = 0; 1005 return t1; 1006 } 1007 *v = ttvp; 1008 } 1009 return 0; 1010} 1011 1012int 1013nfsm_wcc_data_xx(struct vnode **v, int *f, struct mbuf **md, caddr_t *dpos) 1014{ 1015 u_int32_t *tl; 1016 int ttattrf, ttretf = 0; 1017 int t1; 1018 1019 tl = nfsm_dissect_xx(NFSX_UNSIGNED, md, dpos); 1020 if (tl == NULL) 1021 return EBADRPC; 1022 if (*tl == nfs_true) { 1023 tl = nfsm_dissect_xx(6 * NFSX_UNSIGNED, md, dpos); 1024 if (tl == NULL) 1025 return EBADRPC; 1026 mtx_lock(&(VTONFS(*v))->n_mtx); 1027 if (*f) 1028 ttretf = (VTONFS(*v)->n_mtime.tv_sec == fxdr_unsigned(u_int32_t, *(tl + 2)) && 1029 VTONFS(*v)->n_mtime.tv_nsec == fxdr_unsigned(u_int32_t, *(tl + 3))); 1030 mtx_unlock(&(VTONFS(*v))->n_mtx); 1031 } 1032 t1 = nfsm_postop_attr_xx(v, &ttattrf, NULL, md, dpos); 1033 if (t1) 1034 return t1; 1035 if (*f) 1036 *f = ttretf; 1037 else 1038 *f = ttattrf; 1039 return 0; 1040} 1041 1042int 1043nfsm_strtom_xx(const char *a, int s, int m, struct mbuf **mb, caddr_t *bpos) 1044{ 1045 u_int32_t *tl; 1046 int t1; 1047 1048 if (s > m) 1049 return ENAMETOOLONG; 1050 t1 = nfsm_rndup(s) + NFSX_UNSIGNED; 1051 if (t1 <= M_TRAILINGSPACE(*mb)) { 1052 tl = nfsm_build_xx(t1, mb, bpos); 1053 *tl++ = txdr_unsigned(s); 1054 *(tl + ((t1 >> 2) - 2)) = 0; 1055 bcopy(a, tl, s); 1056 } else { 1057 t1 = nfsm_strtmbuf(mb, bpos, a, s); 1058 if (t1 != 0) 1059 return t1; 1060 } 1061 return 0; 1062} 1063 1064int 1065nfsm_fhtom_xx(struct vnode *v, int v3, struct mbuf **mb, caddr_t *bpos) 1066{ 1067 u_int32_t *tl; 1068 int t1; 1069 caddr_t cp; 1070 1071 if (v3) { 1072 t1 = nfsm_rndup(VTONFS(v)->n_fhsize) + NFSX_UNSIGNED; 1073 if (t1 < M_TRAILINGSPACE(*mb)) { 1074 tl = nfsm_build_xx(t1, mb, bpos); 1075 *tl++ = txdr_unsigned(VTONFS(v)->n_fhsize); 1076 *(tl + ((t1 >> 2) - 2)) = 0; 1077 bcopy(VTONFS(v)->n_fhp, tl, VTONFS(v)->n_fhsize); 1078 } else { 1079 t1 = nfsm_strtmbuf(mb, bpos, 1080 (const char *)VTONFS(v)->n_fhp, 1081 VTONFS(v)->n_fhsize); 1082 if (t1 != 0) 1083 return t1; 1084 } 1085 } else { 1086 cp = nfsm_build_xx(NFSX_V2FH, mb, bpos); 1087 bcopy(VTONFS(v)->n_fhp, cp, NFSX_V2FH); 1088 } 1089 return 0; 1090} 1091 1092void 1093nfsm_v3attrbuild_xx(struct vattr *va, int full, struct mbuf **mb, 1094 caddr_t *bpos) 1095{ 1096 u_int32_t *tl; 1097 1098 if (va->va_mode != (mode_t)VNOVAL) { 1099 tl = nfsm_build_xx(2 * NFSX_UNSIGNED, mb, bpos); 1100 *tl++ = nfs_true; 1101 *tl = txdr_unsigned(va->va_mode); 1102 } else { 1103 tl = nfsm_build_xx(NFSX_UNSIGNED, mb, bpos); 1104 *tl = nfs_false; 1105 } 1106 if (full && va->va_uid != (uid_t)VNOVAL) { 1107 tl = nfsm_build_xx(2 * NFSX_UNSIGNED, mb, bpos); 1108 *tl++ = nfs_true; 1109 *tl = txdr_unsigned(va->va_uid); 1110 } else { 1111 tl = nfsm_build_xx(NFSX_UNSIGNED, mb, bpos); 1112 *tl = nfs_false; 1113 } 1114 if (full && va->va_gid != (gid_t)VNOVAL) { 1115 tl = nfsm_build_xx(2 * NFSX_UNSIGNED, mb, bpos); 1116 *tl++ = nfs_true; 1117 *tl = txdr_unsigned(va->va_gid); 1118 } else { 1119 tl = nfsm_build_xx(NFSX_UNSIGNED, mb, bpos); 1120 *tl = nfs_false; 1121 } 1122 if (full && va->va_size != VNOVAL) { 1123 tl = nfsm_build_xx(3 * NFSX_UNSIGNED, mb, bpos); 1124 *tl++ = nfs_true; 1125 txdr_hyper(va->va_size, tl); 1126 } else { 1127 tl = nfsm_build_xx(NFSX_UNSIGNED, mb, bpos); 1128 *tl = nfs_false; 1129 } 1130 if (va->va_atime.tv_sec != VNOVAL) { 1131 if ((va->va_vaflags & VA_UTIMES_NULL) == 0) { 1132 tl = nfsm_build_xx(3 * NFSX_UNSIGNED, mb, bpos); 1133 *tl++ = txdr_unsigned(NFSV3SATTRTIME_TOCLIENT); 1134 txdr_nfsv3time(&va->va_atime, tl); 1135 } else { 1136 tl = nfsm_build_xx(NFSX_UNSIGNED, mb, bpos); 1137 *tl = txdr_unsigned(NFSV3SATTRTIME_TOSERVER); 1138 } 1139 } else { 1140 tl = nfsm_build_xx(NFSX_UNSIGNED, mb, bpos); 1141 *tl = txdr_unsigned(NFSV3SATTRTIME_DONTCHANGE); 1142 } 1143 if (va->va_mtime.tv_sec != VNOVAL) { 1144 if ((va->va_vaflags & VA_UTIMES_NULL) == 0) { 1145 tl = nfsm_build_xx(3 * NFSX_UNSIGNED, mb, bpos); 1146 *tl++ = txdr_unsigned(NFSV3SATTRTIME_TOCLIENT); 1147 txdr_nfsv3time(&va->va_mtime, tl); 1148 } else { 1149 tl = nfsm_build_xx(NFSX_UNSIGNED, mb, bpos); 1150 *tl = txdr_unsigned(NFSV3SATTRTIME_TOSERVER); 1151 } 1152 } else { 1153 tl = nfsm_build_xx(NFSX_UNSIGNED, mb, bpos); 1154 *tl = txdr_unsigned(NFSV3SATTRTIME_DONTCHANGE); 1155 } 1156} 1157