nfs_subs.c revision 175486
1/*- 2 * Copyright (c) 1989, 1993 3 * The Regents of the University of California. All rights reserved. 4 * 5 * This code is derived from software contributed to Berkeley by 6 * Rick Macklem at The University of Guelph. 7 * 8 * Redistribution and use in source and binary forms, with or without 9 * modification, are permitted provided that the following conditions 10 * are met: 11 * 1. Redistributions of source code must retain the above copyright 12 * notice, this list of conditions and the following disclaimer. 13 * 2. Redistributions in binary form must reproduce the above copyright 14 * notice, this list of conditions and the following disclaimer in the 15 * documentation and/or other materials provided with the distribution. 16 * 4. Neither the name of the University nor the names of its contributors 17 * may be used to endorse or promote products derived from this software 18 * without specific prior written permission. 19 * 20 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 21 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 22 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 23 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 24 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 25 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 26 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 27 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 28 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 29 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 30 * SUCH DAMAGE. 31 * 32 * @(#)nfs_subs.c 8.8 (Berkeley) 5/22/95 33 */ 34 35#include <sys/cdefs.h> 36__FBSDID("$FreeBSD: head/sys/nfsclient/nfs_subs.c 175486 2008-01-19 17:36:23Z attilio $"); 37 38/* 39 * These functions support the macros and help fiddle mbuf chains for 40 * the nfs op functions. They do things like create the rpc header and 41 * copy data between mbuf chains and uio lists. 42 */ 43 44#include <sys/param.h> 45#include <sys/systm.h> 46#include <sys/kernel.h> 47#include <sys/bio.h> 48#include <sys/buf.h> 49#include <sys/proc.h> 50#include <sys/mount.h> 51#include <sys/vnode.h> 52#include <sys/namei.h> 53#include <sys/mbuf.h> 54#include <sys/socket.h> 55#include <sys/stat.h> 56#include <sys/malloc.h> 57#include <sys/sysent.h> 58#include <sys/syscall.h> 59#include <sys/sysproto.h> 60 61#include <vm/vm.h> 62#include <vm/vm_object.h> 63#include <vm/vm_extern.h> 64#include <vm/uma.h> 65 66#include <rpc/rpcclnt.h> 67 68#include <nfs/rpcv2.h> 69#include <nfs/nfsproto.h> 70#include <nfsclient/nfs.h> 71#include <nfsclient/nfsnode.h> 72#include <nfs/xdr_subs.h> 73#include <nfsclient/nfsm_subs.h> 74#include <nfsclient/nfsmount.h> 75 76#include <netinet/in.h> 77 78/* 79 * Note that stdarg.h and the ANSI style va_start macro is used for both 80 * ANSI and traditional C compilers. 81 */ 82#include <machine/stdarg.h> 83 84/* 85 * Data items converted to xdr at startup, since they are constant 86 * This is kinda hokey, but may save a little time doing byte swaps 87 */ 88u_int32_t nfs_xdrneg1; 89u_int32_t rpc_call, rpc_vers, rpc_reply, rpc_msgdenied, rpc_autherr, 90 rpc_mismatch, rpc_auth_unix, rpc_msgaccepted; 91u_int32_t nfs_true, nfs_false; 92 93/* And other global data */ 94u_int32_t nfs_xid = 0; 95static enum vtype nv2tov_type[8]= { 96 VNON, VREG, VDIR, VBLK, VCHR, VLNK, VNON, VNON 97}; 98 99int nfs_ticks; 100int nfs_pbuf_freecnt = -1; /* start out unlimited */ 101 102struct nfs_reqq nfs_reqq; 103struct mtx nfs_reqq_mtx; 104struct nfs_bufq nfs_bufq; 105struct mtx nfs_xid_mtx; 106 107/* 108 * and the reverse mapping from generic to Version 2 procedure numbers 109 */ 110int nfsv2_procid[NFS_NPROCS] = { 111 NFSV2PROC_NULL, 112 NFSV2PROC_GETATTR, 113 NFSV2PROC_SETATTR, 114 NFSV2PROC_LOOKUP, 115 NFSV2PROC_NOOP, 116 NFSV2PROC_READLINK, 117 NFSV2PROC_READ, 118 NFSV2PROC_WRITE, 119 NFSV2PROC_CREATE, 120 NFSV2PROC_MKDIR, 121 NFSV2PROC_SYMLINK, 122 NFSV2PROC_CREATE, 123 NFSV2PROC_REMOVE, 124 NFSV2PROC_RMDIR, 125 NFSV2PROC_RENAME, 126 NFSV2PROC_LINK, 127 NFSV2PROC_READDIR, 128 NFSV2PROC_NOOP, 129 NFSV2PROC_STATFS, 130 NFSV2PROC_NOOP, 131 NFSV2PROC_NOOP, 132 NFSV2PROC_NOOP, 133 NFSV2PROC_NOOP, 134}; 135 136LIST_HEAD(nfsnodehashhead, nfsnode); 137 138/* 139 * Create the header for an rpc request packet 140 * The hsiz is the size of the rest of the nfs request header. 141 * (just used to decide if a cluster is a good idea) 142 */ 143struct mbuf * 144nfsm_reqhead(struct vnode *vp, u_long procid, int hsiz) 145{ 146 struct mbuf *mb; 147 148 MGET(mb, M_TRYWAIT, MT_DATA); 149 if (hsiz >= MINCLSIZE) 150 MCLGET(mb, M_TRYWAIT); 151 mb->m_len = 0; 152 return (mb); 153} 154 155/* 156 * Build the RPC header and fill in the authorization info. 157 * The authorization string argument is only used when the credentials 158 * come from outside of the kernel. 159 * Returns the head of the mbuf list. 160 */ 161struct mbuf * 162nfsm_rpchead(struct ucred *cr, int nmflag, int procid, int auth_type, 163 int auth_len, struct mbuf *mrest, int mrest_len, struct mbuf **mbp, 164 u_int32_t **xidpp) 165{ 166 struct mbuf *mb; 167 u_int32_t *tl; 168 caddr_t bpos; 169 int i; 170 struct mbuf *mreq; 171 int grpsiz, authsiz; 172 173 authsiz = nfsm_rndup(auth_len); 174 MGETHDR(mb, M_TRYWAIT, MT_DATA); 175 if ((authsiz + 10 * NFSX_UNSIGNED) >= MINCLSIZE) { 176 MCLGET(mb, M_TRYWAIT); 177 } else if ((authsiz + 10 * NFSX_UNSIGNED) < MHLEN) { 178 MH_ALIGN(mb, authsiz + 10 * NFSX_UNSIGNED); 179 } else { 180 MH_ALIGN(mb, 8 * NFSX_UNSIGNED); 181 } 182 mb->m_len = 0; 183 mreq = mb; 184 bpos = mtod(mb, caddr_t); 185 186 /* 187 * First the RPC header. 188 */ 189 tl = nfsm_build(u_int32_t *, 8 * NFSX_UNSIGNED); 190 191 mtx_lock(&nfs_xid_mtx); 192 /* Get a pretty random xid to start with */ 193 if (!nfs_xid) 194 nfs_xid = random(); 195 /* 196 * Skip zero xid if it should ever happen. 197 */ 198 if (++nfs_xid == 0) 199 nfs_xid++; 200 201 *xidpp = tl; 202 *tl++ = txdr_unsigned(nfs_xid); 203 mtx_unlock(&nfs_xid_mtx); 204 *tl++ = rpc_call; 205 *tl++ = rpc_vers; 206 *tl++ = txdr_unsigned(NFS_PROG); 207 if (nmflag & NFSMNT_NFSV3) { 208 *tl++ = txdr_unsigned(NFS_VER3); 209 *tl++ = txdr_unsigned(procid); 210 } else { 211 *tl++ = txdr_unsigned(NFS_VER2); 212 *tl++ = txdr_unsigned(nfsv2_procid[procid]); 213 } 214 215 /* 216 * And then the authorization cred. 217 */ 218 *tl++ = txdr_unsigned(auth_type); 219 *tl = txdr_unsigned(authsiz); 220 switch (auth_type) { 221 case RPCAUTH_UNIX: 222 tl = nfsm_build(u_int32_t *, auth_len); 223 *tl++ = 0; /* stamp ?? */ 224 *tl++ = 0; /* NULL hostname */ 225 *tl++ = txdr_unsigned(cr->cr_uid); 226 *tl++ = txdr_unsigned(cr->cr_groups[0]); 227 grpsiz = (auth_len >> 2) - 5; 228 *tl++ = txdr_unsigned(grpsiz); 229 for (i = 1; i <= grpsiz; i++) 230 *tl++ = txdr_unsigned(cr->cr_groups[i]); 231 break; 232 } 233 234 /* 235 * And the verifier... 236 */ 237 tl = nfsm_build(u_int32_t *, 2 * NFSX_UNSIGNED); 238 *tl++ = txdr_unsigned(RPCAUTH_NULL); 239 *tl = 0; 240 mb->m_next = mrest; 241 mreq->m_pkthdr.len = authsiz + 10 * NFSX_UNSIGNED + mrest_len; 242 mreq->m_pkthdr.rcvif = NULL; 243 *mbp = mb; 244 return (mreq); 245} 246 247/* 248 * copies a uio scatter/gather list to an mbuf chain. 249 * NOTE: can ony handle iovcnt == 1 250 */ 251int 252nfsm_uiotombuf(struct uio *uiop, struct mbuf **mq, int siz, caddr_t *bpos) 253{ 254 char *uiocp; 255 struct mbuf *mp, *mp2; 256 int xfer, left, mlen; 257 int uiosiz, clflg, rem; 258 char *cp; 259 260#ifdef DIAGNOSTIC 261 if (uiop->uio_iovcnt != 1) 262 panic("nfsm_uiotombuf: iovcnt != 1"); 263#endif 264 265 if (siz > MLEN) /* or should it >= MCLBYTES ?? */ 266 clflg = 1; 267 else 268 clflg = 0; 269 rem = nfsm_rndup(siz)-siz; 270 mp = mp2 = *mq; 271 while (siz > 0) { 272 left = uiop->uio_iov->iov_len; 273 uiocp = uiop->uio_iov->iov_base; 274 if (left > siz) 275 left = siz; 276 uiosiz = left; 277 while (left > 0) { 278 mlen = M_TRAILINGSPACE(mp); 279 if (mlen == 0) { 280 MGET(mp, M_TRYWAIT, MT_DATA); 281 if (clflg) 282 MCLGET(mp, M_TRYWAIT); 283 mp->m_len = 0; 284 mp2->m_next = mp; 285 mp2 = mp; 286 mlen = M_TRAILINGSPACE(mp); 287 } 288 xfer = (left > mlen) ? mlen : left; 289#ifdef notdef 290 /* Not Yet.. */ 291 if (uiop->uio_iov->iov_op != NULL) 292 (*(uiop->uio_iov->iov_op)) 293 (uiocp, mtod(mp, caddr_t)+mp->m_len, xfer); 294 else 295#endif 296 if (uiop->uio_segflg == UIO_SYSSPACE) 297 bcopy(uiocp, mtod(mp, caddr_t)+mp->m_len, xfer); 298 else 299 copyin(uiocp, mtod(mp, caddr_t)+mp->m_len, xfer); 300 mp->m_len += xfer; 301 left -= xfer; 302 uiocp += xfer; 303 uiop->uio_offset += xfer; 304 uiop->uio_resid -= xfer; 305 } 306 uiop->uio_iov->iov_base = 307 (char *)uiop->uio_iov->iov_base + uiosiz; 308 uiop->uio_iov->iov_len -= uiosiz; 309 siz -= uiosiz; 310 } 311 if (rem > 0) { 312 if (rem > M_TRAILINGSPACE(mp)) { 313 MGET(mp, M_TRYWAIT, MT_DATA); 314 mp->m_len = 0; 315 mp2->m_next = mp; 316 } 317 cp = mtod(mp, caddr_t)+mp->m_len; 318 for (left = 0; left < rem; left++) 319 *cp++ = '\0'; 320 mp->m_len += rem; 321 *bpos = cp; 322 } else 323 *bpos = mtod(mp, caddr_t)+mp->m_len; 324 *mq = mp; 325 return (0); 326} 327 328/* 329 * Copy a string into mbufs for the hard cases... 330 */ 331int 332nfsm_strtmbuf(struct mbuf **mb, char **bpos, const char *cp, long siz) 333{ 334 struct mbuf *m1 = NULL, *m2; 335 long left, xfer, len, tlen; 336 u_int32_t *tl; 337 int putsize; 338 339 putsize = 1; 340 m2 = *mb; 341 left = M_TRAILINGSPACE(m2); 342 if (left > 0) { 343 tl = ((u_int32_t *)(*bpos)); 344 *tl++ = txdr_unsigned(siz); 345 putsize = 0; 346 left -= NFSX_UNSIGNED; 347 m2->m_len += NFSX_UNSIGNED; 348 if (left > 0) { 349 bcopy(cp, (caddr_t) tl, left); 350 siz -= left; 351 cp += left; 352 m2->m_len += left; 353 left = 0; 354 } 355 } 356 /* Loop around adding mbufs */ 357 while (siz > 0) { 358 MGET(m1, M_TRYWAIT, MT_DATA); 359 if (siz > MLEN) 360 MCLGET(m1, M_TRYWAIT); 361 m1->m_len = NFSMSIZ(m1); 362 m2->m_next = m1; 363 m2 = m1; 364 tl = mtod(m1, u_int32_t *); 365 tlen = 0; 366 if (putsize) { 367 *tl++ = txdr_unsigned(siz); 368 m1->m_len -= NFSX_UNSIGNED; 369 tlen = NFSX_UNSIGNED; 370 putsize = 0; 371 } 372 if (siz < m1->m_len) { 373 len = nfsm_rndup(siz); 374 xfer = siz; 375 if (xfer < len) 376 *(tl+(xfer>>2)) = 0; 377 } else { 378 xfer = len = m1->m_len; 379 } 380 bcopy(cp, (caddr_t) tl, xfer); 381 m1->m_len = len+tlen; 382 siz -= xfer; 383 cp += xfer; 384 } 385 *mb = m1; 386 *bpos = mtod(m1, caddr_t)+m1->m_len; 387 return (0); 388} 389 390/* 391 * Called once to initialize data structures... 392 */ 393int 394nfs_init(struct vfsconf *vfsp) 395{ 396 int i; 397 398 nfsmount_zone = uma_zcreate("NFSMOUNT", sizeof(struct nfsmount), 399 NULL, NULL, NULL, NULL, UMA_ALIGN_PTR, 0); 400 rpc_vers = txdr_unsigned(RPC_VER2); 401 rpc_call = txdr_unsigned(RPC_CALL); 402 rpc_reply = txdr_unsigned(RPC_REPLY); 403 rpc_msgdenied = txdr_unsigned(RPC_MSGDENIED); 404 rpc_msgaccepted = txdr_unsigned(RPC_MSGACCEPTED); 405 rpc_mismatch = txdr_unsigned(RPC_MISMATCH); 406 rpc_autherr = txdr_unsigned(RPC_AUTHERR); 407 rpc_auth_unix = txdr_unsigned(RPCAUTH_UNIX); 408 nfs_true = txdr_unsigned(TRUE); 409 nfs_false = txdr_unsigned(FALSE); 410 nfs_xdrneg1 = txdr_unsigned(-1); 411 nfs_ticks = (hz * NFS_TICKINTVL + 500) / 1000; 412 if (nfs_ticks < 1) 413 nfs_ticks = 1; 414 /* Ensure async daemons disabled */ 415 for (i = 0; i < NFS_MAXASYNCDAEMON; i++) { 416 nfs_iodwant[i] = NULL; 417 nfs_iodmount[i] = NULL; 418 } 419 nfs_nhinit(); /* Init the nfsnode table */ 420 421 /* 422 * Initialize reply list and start timer 423 */ 424 TAILQ_INIT(&nfs_reqq); 425 callout_init(&nfs_callout, CALLOUT_MPSAFE); 426 mtx_init(&nfs_reqq_mtx, "NFS reqq lock", NULL, MTX_DEF); 427 mtx_init(&nfs_iod_mtx, "NFS iod lock", NULL, MTX_DEF); 428 mtx_init(&nfs_xid_mtx, "NFS xid lock", NULL, MTX_DEF); 429 430 nfs_pbuf_freecnt = nswbuf / 2 + 1; 431 432 return (0); 433} 434 435int 436nfs_uninit(struct vfsconf *vfsp) 437{ 438 int i; 439 440 callout_stop(&nfs_callout); 441 442 KASSERT(TAILQ_EMPTY(&nfs_reqq), 443 ("nfs_uninit: request queue not empty")); 444 445 /* 446 * Tell all nfsiod processes to exit. Clear nfs_iodmax, and wakeup 447 * any sleeping nfsiods so they check nfs_iodmax and exit. 448 */ 449 mtx_lock(&nfs_iod_mtx); 450 nfs_iodmax = 0; 451 for (i = 0; i < nfs_numasync; i++) 452 if (nfs_iodwant[i]) 453 wakeup(&nfs_iodwant[i]); 454 /* The last nfsiod to exit will wake us up when nfs_numasync hits 0 */ 455 while (nfs_numasync) 456 msleep(&nfs_numasync, &nfs_iod_mtx, PWAIT, "ioddie", 0); 457 mtx_unlock(&nfs_iod_mtx); 458 nfs_nhuninit(); 459 uma_zdestroy(nfsmount_zone); 460 return (0); 461} 462 463void 464nfs_dircookie_lock(struct nfsnode *np) 465{ 466 mtx_lock(&np->n_mtx); 467 while (np->n_flag & NDIRCOOKIELK) 468 (void) msleep(&np->n_flag, &np->n_mtx, PZERO, "nfsdirlk", 0); 469 np->n_flag |= NDIRCOOKIELK; 470 mtx_unlock(&np->n_mtx); 471} 472 473void 474nfs_dircookie_unlock(struct nfsnode *np) 475{ 476 mtx_lock(&np->n_mtx); 477 np->n_flag &= ~NDIRCOOKIELK; 478 wakeup(&np->n_flag); 479 mtx_unlock(&np->n_mtx); 480} 481 482int 483nfs_upgrade_vnlock(struct vnode *vp, struct thread *td) 484{ 485 int old_lock; 486 487 if ((old_lock = VOP_ISLOCKED(vp, td)) != LK_EXCLUSIVE) { 488 if (old_lock == LK_SHARED) { 489 /* Upgrade to exclusive lock, this might block */ 490 vn_lock(vp, LK_UPGRADE | LK_RETRY); 491 } else { 492 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY); 493 } 494 } 495 return old_lock; 496} 497 498void 499nfs_downgrade_vnlock(struct vnode *vp, struct thread *td, int old_lock) 500{ 501 if (old_lock != LK_EXCLUSIVE) { 502 if (old_lock == LK_SHARED) { 503 /* Downgrade from exclusive lock, this might block */ 504 vn_lock(vp, LK_DOWNGRADE); 505 } else { 506 VOP_UNLOCK(vp, 0); 507 } 508 } 509} 510 511void 512nfs_printf(const char *fmt, ...) 513{ 514 va_list ap; 515 516 mtx_lock(&Giant); 517 va_start(ap, fmt); 518 printf(fmt, ap); 519 va_end(ap); 520 mtx_unlock(&Giant); 521} 522 523/* 524 * Attribute cache routines. 525 * nfs_loadattrcache() - loads or updates the cache contents from attributes 526 * that are on the mbuf list 527 * nfs_getattrcache() - returns valid attributes if found in cache, returns 528 * error otherwise 529 */ 530 531/* 532 * Load the attribute cache (that lives in the nfsnode entry) with 533 * the values on the mbuf list and 534 * Iff vap not NULL 535 * copy the attributes to *vaper 536 */ 537int 538nfs_loadattrcache(struct vnode **vpp, struct mbuf **mdp, caddr_t *dposp, 539 struct vattr *vaper, int dontshrink) 540{ 541 struct vnode *vp = *vpp; 542 struct vattr *vap; 543 struct nfs_fattr *fp; 544 struct nfsnode *np; 545 int32_t t1; 546 caddr_t cp2; 547 int rdev; 548 struct mbuf *md; 549 enum vtype vtyp; 550 u_short vmode; 551 struct timespec mtime, mtime_save; 552 int v3 = NFS_ISV3(vp); 553 struct thread *td = curthread; 554 555 md = *mdp; 556 t1 = (mtod(md, caddr_t) + md->m_len) - *dposp; 557 cp2 = nfsm_disct(mdp, dposp, NFSX_FATTR(v3), t1, M_TRYWAIT); 558 if (cp2 == NULL) 559 return EBADRPC; 560 fp = (struct nfs_fattr *)cp2; 561 if (v3) { 562 vtyp = nfsv3tov_type(fp->fa_type); 563 vmode = fxdr_unsigned(u_short, fp->fa_mode); 564 rdev = makedev(fxdr_unsigned(int, fp->fa3_rdev.specdata1), 565 fxdr_unsigned(int, fp->fa3_rdev.specdata2)); 566 fxdr_nfsv3time(&fp->fa3_mtime, &mtime); 567 } else { 568 vtyp = nfsv2tov_type(fp->fa_type); 569 vmode = fxdr_unsigned(u_short, fp->fa_mode); 570 /* 571 * XXX 572 * 573 * The duplicate information returned in fa_type and fa_mode 574 * is an ambiguity in the NFS version 2 protocol. 575 * 576 * VREG should be taken literally as a regular file. If a 577 * server intents to return some type information differently 578 * in the upper bits of the mode field (e.g. for sockets, or 579 * FIFOs), NFSv2 mandates fa_type to be VNON. Anyway, we 580 * leave the examination of the mode bits even in the VREG 581 * case to avoid breakage for bogus servers, but we make sure 582 * that there are actually type bits set in the upper part of 583 * fa_mode (and failing that, trust the va_type field). 584 * 585 * NFSv3 cleared the issue, and requires fa_mode to not 586 * contain any type information (while also introduing sockets 587 * and FIFOs for fa_type). 588 */ 589 if (vtyp == VNON || (vtyp == VREG && (vmode & S_IFMT) != 0)) 590 vtyp = IFTOVT(vmode); 591 rdev = fxdr_unsigned(int32_t, fp->fa2_rdev); 592 fxdr_nfsv2time(&fp->fa2_mtime, &mtime); 593 594 /* 595 * Really ugly NFSv2 kludge. 596 */ 597 if (vtyp == VCHR && rdev == 0xffffffff) 598 vtyp = VFIFO; 599 } 600 601 /* 602 * If v_type == VNON it is a new node, so fill in the v_type, 603 * n_mtime fields. Check to see if it represents a special 604 * device, and if so, check for a possible alias. Once the 605 * correct vnode has been obtained, fill in the rest of the 606 * information. 607 */ 608 np = VTONFS(vp); 609 mtx_lock(&np->n_mtx); 610 if (vp->v_type != vtyp) { 611 vp->v_type = vtyp; 612 if (vp->v_type == VFIFO) 613 vp->v_op = &nfs_fifoops; 614 np->n_mtime = mtime; 615 } 616 vap = &np->n_vattr; 617 vap->va_type = vtyp; 618 vap->va_mode = (vmode & 07777); 619 vap->va_rdev = rdev; 620 mtime_save = vap->va_mtime; 621 vap->va_mtime = mtime; 622 vap->va_fsid = vp->v_mount->mnt_stat.f_fsid.val[0]; 623 if (v3) { 624 vap->va_nlink = fxdr_unsigned(u_short, fp->fa_nlink); 625 vap->va_uid = fxdr_unsigned(uid_t, fp->fa_uid); 626 vap->va_gid = fxdr_unsigned(gid_t, fp->fa_gid); 627 vap->va_size = fxdr_hyper(&fp->fa3_size); 628 vap->va_blocksize = NFS_FABLKSIZE; 629 vap->va_bytes = fxdr_hyper(&fp->fa3_used); 630 vap->va_fileid = fxdr_unsigned(int32_t, 631 fp->fa3_fileid.nfsuquad[1]); 632 fxdr_nfsv3time(&fp->fa3_atime, &vap->va_atime); 633 fxdr_nfsv3time(&fp->fa3_ctime, &vap->va_ctime); 634 vap->va_flags = 0; 635 vap->va_filerev = 0; 636 } else { 637 vap->va_nlink = fxdr_unsigned(u_short, fp->fa_nlink); 638 vap->va_uid = fxdr_unsigned(uid_t, fp->fa_uid); 639 vap->va_gid = fxdr_unsigned(gid_t, fp->fa_gid); 640 vap->va_size = fxdr_unsigned(u_int32_t, fp->fa2_size); 641 vap->va_blocksize = fxdr_unsigned(int32_t, fp->fa2_blocksize); 642 vap->va_bytes = (u_quad_t)fxdr_unsigned(int32_t, fp->fa2_blocks) 643 * NFS_FABLKSIZE; 644 vap->va_fileid = fxdr_unsigned(int32_t, fp->fa2_fileid); 645 fxdr_nfsv2time(&fp->fa2_atime, &vap->va_atime); 646 vap->va_flags = 0; 647 vap->va_ctime.tv_sec = fxdr_unsigned(u_int32_t, 648 fp->fa2_ctime.nfsv2_sec); 649 vap->va_ctime.tv_nsec = 0; 650 vap->va_gen = fxdr_unsigned(u_int32_t, fp->fa2_ctime.nfsv2_usec); 651 vap->va_filerev = 0; 652 } 653 np->n_attrstamp = time_second; 654 /* Timestamp the NFS otw getattr fetch */ 655 if (td->td_proc) { 656 np->n_ac_ts_tid = td->td_tid; 657 np->n_ac_ts_pid = td->td_proc->p_pid; 658 np->n_ac_ts_syscalls = td->td_syscalls; 659 } else 660 bzero(&np->n_ac_ts, sizeof(struct nfs_attrcache_timestamp)); 661 662 if (vap->va_size != np->n_size) { 663 if (vap->va_type == VREG) { 664 if (dontshrink && vap->va_size < np->n_size) { 665 /* 666 * We've been told not to shrink the file; 667 * zero np->n_attrstamp to indicate that 668 * the attributes are stale. 669 */ 670 vap->va_size = np->n_size; 671 np->n_attrstamp = 0; 672 } else if (np->n_flag & NMODIFIED) { 673 /* 674 * We've modified the file: Use the larger 675 * of our size, and the server's size. 676 */ 677 if (vap->va_size < np->n_size) { 678 vap->va_size = np->n_size; 679 } else { 680 np->n_size = vap->va_size; 681 np->n_flag |= NSIZECHANGED; 682 } 683 } else { 684 np->n_size = vap->va_size; 685 np->n_flag |= NSIZECHANGED; 686 } 687 vnode_pager_setsize(vp, np->n_size); 688 } else { 689 np->n_size = vap->va_size; 690 } 691 } 692 /* 693 * The following checks are added to prevent a race between (say) 694 * a READDIR+ and a WRITE. 695 * READDIR+, WRITE requests sent out. 696 * READDIR+ resp, WRITE resp received on client. 697 * However, the WRITE resp was handled before the READDIR+ resp 698 * causing the post op attrs from the write to be loaded first 699 * and the attrs from the READDIR+ to be loaded later. If this 700 * happens, we have stale attrs loaded into the attrcache. 701 * We detect this by for the mtime moving back. We invalidate the 702 * attrcache when this happens. 703 */ 704 if (timespeccmp(&mtime_save, &vap->va_mtime, >)) 705 /* Size changed or mtime went backwards */ 706 np->n_attrstamp = 0; 707 if (vaper != NULL) { 708 bcopy((caddr_t)vap, (caddr_t)vaper, sizeof(*vap)); 709 if (np->n_flag & NCHG) { 710 if (np->n_flag & NACC) 711 vaper->va_atime = np->n_atim; 712 if (np->n_flag & NUPD) 713 vaper->va_mtime = np->n_mtim; 714 } 715 } 716 mtx_unlock(&np->n_mtx); 717 return (0); 718} 719 720#ifdef NFS_ACDEBUG 721#include <sys/sysctl.h> 722SYSCTL_DECL(_vfs_nfs); 723static int nfs_acdebug; 724SYSCTL_INT(_vfs_nfs, OID_AUTO, acdebug, CTLFLAG_RW, &nfs_acdebug, 0, ""); 725#endif 726 727/* 728 * Check the time stamp 729 * If the cache is valid, copy contents to *vap and return 0 730 * otherwise return an error 731 */ 732int 733nfs_getattrcache(struct vnode *vp, struct vattr *vaper) 734{ 735 struct nfsnode *np; 736 struct vattr *vap; 737 struct nfsmount *nmp; 738 int timeo; 739 740 np = VTONFS(vp); 741 vap = &np->n_vattr; 742 nmp = VFSTONFS(vp->v_mount); 743#ifdef NFS_ACDEBUG 744 mtx_lock(&Giant); /* nfs_printf() */ 745#endif 746 mtx_lock(&np->n_mtx); 747 /* XXX n_mtime doesn't seem to be updated on a miss-and-reload */ 748 timeo = (time_second - np->n_mtime.tv_sec) / 10; 749 750#ifdef NFS_ACDEBUG 751 if (nfs_acdebug>1) 752 nfs_printf("nfs_getattrcache: initial timeo = %d\n", timeo); 753#endif 754 755 if (vap->va_type == VDIR) { 756 if ((np->n_flag & NMODIFIED) || timeo < nmp->nm_acdirmin) 757 timeo = nmp->nm_acdirmin; 758 else if (timeo > nmp->nm_acdirmax) 759 timeo = nmp->nm_acdirmax; 760 } else { 761 if ((np->n_flag & NMODIFIED) || timeo < nmp->nm_acregmin) 762 timeo = nmp->nm_acregmin; 763 else if (timeo > nmp->nm_acregmax) 764 timeo = nmp->nm_acregmax; 765 } 766 767#ifdef NFS_ACDEBUG 768 if (nfs_acdebug > 2) 769 nfs_printf("acregmin %d; acregmax %d; acdirmin %d; acdirmax %d\n", 770 nmp->nm_acregmin, nmp->nm_acregmax, 771 nmp->nm_acdirmin, nmp->nm_acdirmax); 772 773 if (nfs_acdebug) 774 nfs_printf("nfs_getattrcache: age = %d; final timeo = %d\n", 775 (time_second - np->n_attrstamp), timeo); 776#endif 777 778 if ((time_second - np->n_attrstamp) >= timeo) { 779 nfsstats.attrcache_misses++; 780 mtx_unlock(&np->n_mtx); 781 return( ENOENT); 782 } 783 nfsstats.attrcache_hits++; 784 if (vap->va_size != np->n_size) { 785 if (vap->va_type == VREG) { 786 if (np->n_flag & NMODIFIED) { 787 if (vap->va_size < np->n_size) 788 vap->va_size = np->n_size; 789 else 790 np->n_size = vap->va_size; 791 } else { 792 np->n_size = vap->va_size; 793 } 794 vnode_pager_setsize(vp, np->n_size); 795 } else { 796 np->n_size = vap->va_size; 797 } 798 } 799 bcopy((caddr_t)vap, (caddr_t)vaper, sizeof(struct vattr)); 800 if (np->n_flag & NCHG) { 801 if (np->n_flag & NACC) 802 vaper->va_atime = np->n_atim; 803 if (np->n_flag & NUPD) 804 vaper->va_mtime = np->n_mtim; 805 } 806 mtx_unlock(&np->n_mtx); 807#ifdef NFS_ACDEBUG 808 mtx_unlock(&Giant); /* nfs_printf() */ 809#endif 810 return (0); 811} 812 813static nfsuint64 nfs_nullcookie = { { 0, 0 } }; 814/* 815 * This function finds the directory cookie that corresponds to the 816 * logical byte offset given. 817 */ 818nfsuint64 * 819nfs_getcookie(struct nfsnode *np, off_t off, int add) 820{ 821 struct nfsdmap *dp, *dp2; 822 int pos; 823 nfsuint64 *retval = NULL; 824 825 pos = (uoff_t)off / NFS_DIRBLKSIZ; 826 if (pos == 0 || off < 0) { 827#ifdef DIAGNOSTIC 828 if (add) 829 panic("nfs getcookie add at <= 0"); 830#endif 831 return (&nfs_nullcookie); 832 } 833 pos--; 834 dp = LIST_FIRST(&np->n_cookies); 835 if (!dp) { 836 if (add) { 837 MALLOC(dp, struct nfsdmap *, sizeof (struct nfsdmap), 838 M_NFSDIROFF, M_WAITOK); 839 dp->ndm_eocookie = 0; 840 LIST_INSERT_HEAD(&np->n_cookies, dp, ndm_list); 841 } else 842 goto out; 843 } 844 while (pos >= NFSNUMCOOKIES) { 845 pos -= NFSNUMCOOKIES; 846 if (LIST_NEXT(dp, ndm_list)) { 847 if (!add && dp->ndm_eocookie < NFSNUMCOOKIES && 848 pos >= dp->ndm_eocookie) 849 goto out; 850 dp = LIST_NEXT(dp, ndm_list); 851 } else if (add) { 852 MALLOC(dp2, struct nfsdmap *, sizeof (struct nfsdmap), 853 M_NFSDIROFF, M_WAITOK); 854 dp2->ndm_eocookie = 0; 855 LIST_INSERT_AFTER(dp, dp2, ndm_list); 856 dp = dp2; 857 } else 858 goto out; 859 } 860 if (pos >= dp->ndm_eocookie) { 861 if (add) 862 dp->ndm_eocookie = pos + 1; 863 else 864 goto out; 865 } 866 retval = &dp->ndm_cookies[pos]; 867out: 868 return (retval); 869} 870 871/* 872 * Invalidate cached directory information, except for the actual directory 873 * blocks (which are invalidated separately). 874 * Done mainly to avoid the use of stale offset cookies. 875 */ 876void 877nfs_invaldir(struct vnode *vp) 878{ 879 struct nfsnode *np = VTONFS(vp); 880 881#ifdef DIAGNOSTIC 882 if (vp->v_type != VDIR) 883 panic("nfs: invaldir not dir"); 884#endif 885 nfs_dircookie_lock(np); 886 np->n_direofoffset = 0; 887 np->n_cookieverf.nfsuquad[0] = 0; 888 np->n_cookieverf.nfsuquad[1] = 0; 889 if (LIST_FIRST(&np->n_cookies)) 890 LIST_FIRST(&np->n_cookies)->ndm_eocookie = 0; 891 nfs_dircookie_unlock(np); 892} 893 894/* 895 * The write verifier has changed (probably due to a server reboot), so all 896 * B_NEEDCOMMIT blocks will have to be written again. Since they are on the 897 * dirty block list as B_DELWRI, all this takes is clearing the B_NEEDCOMMIT 898 * and B_CLUSTEROK flags. Once done the new write verifier can be set for the 899 * mount point. 900 * 901 * B_CLUSTEROK must be cleared along with B_NEEDCOMMIT because stage 1 data 902 * writes are not clusterable. 903 */ 904void 905nfs_clearcommit(struct mount *mp) 906{ 907 struct vnode *vp, *nvp; 908 struct buf *bp, *nbp; 909 int s; 910 911 s = splbio(); 912 MNT_ILOCK(mp); 913 MNT_VNODE_FOREACH(vp, mp, nvp) { 914 VI_LOCK(vp); 915 if (vp->v_iflag & VI_DOOMED) { 916 VI_UNLOCK(vp); 917 continue; 918 } 919 MNT_IUNLOCK(mp); 920 TAILQ_FOREACH_SAFE(bp, &vp->v_bufobj.bo_dirty.bv_hd, b_bobufs, nbp) { 921 if (!BUF_ISLOCKED(bp) && 922 (bp->b_flags & (B_DELWRI | B_NEEDCOMMIT)) 923 == (B_DELWRI | B_NEEDCOMMIT)) 924 bp->b_flags &= ~(B_NEEDCOMMIT | B_CLUSTEROK); 925 } 926 VI_UNLOCK(vp); 927 MNT_ILOCK(mp); 928 } 929 MNT_IUNLOCK(mp); 930 splx(s); 931} 932 933/* 934 * Helper functions for former macros. Some of these should be 935 * moved to their callers. 936 */ 937 938int 939nfsm_mtofh_xx(struct vnode *d, struct vnode **v, int v3, int *f, 940 struct mbuf **md, caddr_t *dpos) 941{ 942 struct nfsnode *ttnp; 943 struct vnode *ttvp; 944 nfsfh_t *ttfhp; 945 u_int32_t *tl; 946 int ttfhsize; 947 int t1; 948 949 if (v3) { 950 tl = nfsm_dissect_xx(NFSX_UNSIGNED, md, dpos); 951 if (tl == NULL) 952 return EBADRPC; 953 *f = fxdr_unsigned(int, *tl); 954 } else 955 *f = 1; 956 if (*f) { 957 t1 = nfsm_getfh_xx(&ttfhp, &ttfhsize, (v3), md, dpos); 958 if (t1 != 0) 959 return t1; 960 t1 = nfs_nget(d->v_mount, ttfhp, ttfhsize, &ttnp, LK_EXCLUSIVE); 961 if (t1 != 0) 962 return t1; 963 *v = NFSTOV(ttnp); 964 } 965 if (v3) { 966 tl = nfsm_dissect_xx(NFSX_UNSIGNED, md, dpos); 967 if (tl == NULL) 968 return EBADRPC; 969 if (*f) 970 *f = fxdr_unsigned(int, *tl); 971 else if (fxdr_unsigned(int, *tl)) 972 nfsm_adv_xx(NFSX_V3FATTR, md, dpos); 973 } 974 if (*f) { 975 ttvp = *v; 976 t1 = nfs_loadattrcache(&ttvp, md, dpos, NULL, 0); 977 if (t1) 978 return t1; 979 *v = ttvp; 980 } 981 return 0; 982} 983 984int 985nfsm_getfh_xx(nfsfh_t **f, int *s, int v3, struct mbuf **md, caddr_t *dpos) 986{ 987 u_int32_t *tl; 988 989 if (v3) { 990 tl = nfsm_dissect_xx(NFSX_UNSIGNED, md, dpos); 991 if (tl == NULL) 992 return EBADRPC; 993 *s = fxdr_unsigned(int, *tl); 994 if (*s <= 0 || *s > NFSX_V3FHMAX) 995 return EBADRPC; 996 } else 997 *s = NFSX_V2FH; 998 *f = nfsm_dissect_xx(nfsm_rndup(*s), md, dpos); 999 if (*f == NULL) 1000 return EBADRPC; 1001 else 1002 return 0; 1003} 1004 1005 1006int 1007nfsm_loadattr_xx(struct vnode **v, struct vattr *va, struct mbuf **md, 1008 caddr_t *dpos) 1009{ 1010 int t1; 1011 1012 struct vnode *ttvp = *v; 1013 t1 = nfs_loadattrcache(&ttvp, md, dpos, va, 0); 1014 if (t1 != 0) 1015 return t1; 1016 *v = ttvp; 1017 return 0; 1018} 1019 1020int 1021nfsm_postop_attr_xx(struct vnode **v, int *f, struct mbuf **md, 1022 caddr_t *dpos) 1023{ 1024 u_int32_t *tl; 1025 int t1; 1026 1027 struct vnode *ttvp = *v; 1028 tl = nfsm_dissect_xx(NFSX_UNSIGNED, md, dpos); 1029 if (tl == NULL) 1030 return EBADRPC; 1031 *f = fxdr_unsigned(int, *tl); 1032 if (*f != 0) { 1033 t1 = nfs_loadattrcache(&ttvp, md, dpos, NULL, 1); 1034 if (t1 != 0) { 1035 *f = 0; 1036 return t1; 1037 } 1038 *v = ttvp; 1039 } 1040 return 0; 1041} 1042 1043int 1044nfsm_wcc_data_xx(struct vnode **v, int *f, struct mbuf **md, caddr_t *dpos) 1045{ 1046 u_int32_t *tl; 1047 int ttattrf, ttretf = 0; 1048 int t1; 1049 1050 tl = nfsm_dissect_xx(NFSX_UNSIGNED, md, dpos); 1051 if (tl == NULL) 1052 return EBADRPC; 1053 if (*tl == nfs_true) { 1054 tl = nfsm_dissect_xx(6 * NFSX_UNSIGNED, md, dpos); 1055 if (tl == NULL) 1056 return EBADRPC; 1057 mtx_lock(&(VTONFS(*v))->n_mtx); 1058 if (*f) 1059 ttretf = (VTONFS(*v)->n_mtime.tv_sec == fxdr_unsigned(u_int32_t, *(tl + 2)) && 1060 VTONFS(*v)->n_mtime.tv_nsec == fxdr_unsigned(u_int32_t, *(tl + 3))); 1061 mtx_unlock(&(VTONFS(*v))->n_mtx); 1062 } 1063 t1 = nfsm_postop_attr_xx(v, &ttattrf, md, dpos); 1064 if (t1) 1065 return t1; 1066 if (*f) 1067 *f = ttretf; 1068 else 1069 *f = ttattrf; 1070 return 0; 1071} 1072 1073int 1074nfsm_strtom_xx(const char *a, int s, int m, struct mbuf **mb, caddr_t *bpos) 1075{ 1076 u_int32_t *tl; 1077 int t1; 1078 1079 if (s > m) 1080 return ENAMETOOLONG; 1081 t1 = nfsm_rndup(s) + NFSX_UNSIGNED; 1082 if (t1 <= M_TRAILINGSPACE(*mb)) { 1083 tl = nfsm_build_xx(t1, mb, bpos); 1084 *tl++ = txdr_unsigned(s); 1085 *(tl + ((t1 >> 2) - 2)) = 0; 1086 bcopy(a, tl, s); 1087 } else { 1088 t1 = nfsm_strtmbuf(mb, bpos, a, s); 1089 if (t1 != 0) 1090 return t1; 1091 } 1092 return 0; 1093} 1094 1095int 1096nfsm_fhtom_xx(struct vnode *v, int v3, struct mbuf **mb, caddr_t *bpos) 1097{ 1098 u_int32_t *tl; 1099 int t1; 1100 caddr_t cp; 1101 1102 if (v3) { 1103 t1 = nfsm_rndup(VTONFS(v)->n_fhsize) + NFSX_UNSIGNED; 1104 if (t1 < M_TRAILINGSPACE(*mb)) { 1105 tl = nfsm_build_xx(t1, mb, bpos); 1106 *tl++ = txdr_unsigned(VTONFS(v)->n_fhsize); 1107 *(tl + ((t1 >> 2) - 2)) = 0; 1108 bcopy(VTONFS(v)->n_fhp, tl, VTONFS(v)->n_fhsize); 1109 } else { 1110 t1 = nfsm_strtmbuf(mb, bpos, 1111 (const char *)VTONFS(v)->n_fhp, 1112 VTONFS(v)->n_fhsize); 1113 if (t1 != 0) 1114 return t1; 1115 } 1116 } else { 1117 cp = nfsm_build_xx(NFSX_V2FH, mb, bpos); 1118 bcopy(VTONFS(v)->n_fhp, cp, NFSX_V2FH); 1119 } 1120 return 0; 1121} 1122 1123void 1124nfsm_v3attrbuild_xx(struct vattr *va, int full, struct mbuf **mb, 1125 caddr_t *bpos) 1126{ 1127 u_int32_t *tl; 1128 1129 if (va->va_mode != (mode_t)VNOVAL) { 1130 tl = nfsm_build_xx(2 * NFSX_UNSIGNED, mb, bpos); 1131 *tl++ = nfs_true; 1132 *tl = txdr_unsigned(va->va_mode); 1133 } else { 1134 tl = nfsm_build_xx(NFSX_UNSIGNED, mb, bpos); 1135 *tl = nfs_false; 1136 } 1137 if (full && va->va_uid != (uid_t)VNOVAL) { 1138 tl = nfsm_build_xx(2 * NFSX_UNSIGNED, mb, bpos); 1139 *tl++ = nfs_true; 1140 *tl = txdr_unsigned(va->va_uid); 1141 } else { 1142 tl = nfsm_build_xx(NFSX_UNSIGNED, mb, bpos); 1143 *tl = nfs_false; 1144 } 1145 if (full && va->va_gid != (gid_t)VNOVAL) { 1146 tl = nfsm_build_xx(2 * NFSX_UNSIGNED, mb, bpos); 1147 *tl++ = nfs_true; 1148 *tl = txdr_unsigned(va->va_gid); 1149 } else { 1150 tl = nfsm_build_xx(NFSX_UNSIGNED, mb, bpos); 1151 *tl = nfs_false; 1152 } 1153 if (full && va->va_size != VNOVAL) { 1154 tl = nfsm_build_xx(3 * NFSX_UNSIGNED, mb, bpos); 1155 *tl++ = nfs_true; 1156 txdr_hyper(va->va_size, tl); 1157 } else { 1158 tl = nfsm_build_xx(NFSX_UNSIGNED, mb, bpos); 1159 *tl = nfs_false; 1160 } 1161 if (va->va_atime.tv_sec != VNOVAL) { 1162 if (va->va_atime.tv_sec != time_second) { 1163 tl = nfsm_build_xx(3 * NFSX_UNSIGNED, mb, bpos); 1164 *tl++ = txdr_unsigned(NFSV3SATTRTIME_TOCLIENT); 1165 txdr_nfsv3time(&va->va_atime, tl); 1166 } else { 1167 tl = nfsm_build_xx(NFSX_UNSIGNED, mb, bpos); 1168 *tl = txdr_unsigned(NFSV3SATTRTIME_TOSERVER); 1169 } 1170 } else { 1171 tl = nfsm_build_xx(NFSX_UNSIGNED, mb, bpos); 1172 *tl = txdr_unsigned(NFSV3SATTRTIME_DONTCHANGE); 1173 } 1174 if (va->va_mtime.tv_sec != VNOVAL) { 1175 if (va->va_mtime.tv_sec != time_second) { 1176 tl = nfsm_build_xx(3 * NFSX_UNSIGNED, mb, bpos); 1177 *tl++ = txdr_unsigned(NFSV3SATTRTIME_TOCLIENT); 1178 txdr_nfsv3time(&va->va_mtime, tl); 1179 } else { 1180 tl = nfsm_build_xx(NFSX_UNSIGNED, mb, bpos); 1181 *tl = txdr_unsigned(NFSV3SATTRTIME_TOSERVER); 1182 } 1183 } else { 1184 tl = nfsm_build_xx(NFSX_UNSIGNED, mb, bpos); 1185 *tl = txdr_unsigned(NFSV3SATTRTIME_DONTCHANGE); 1186 } 1187} 1188