nfs_subs.c revision 177599
1/*- 2 * Copyright (c) 1989, 1993 3 * The Regents of the University of California. All rights reserved. 4 * 5 * This code is derived from software contributed to Berkeley by 6 * Rick Macklem at The University of Guelph. 7 * 8 * Redistribution and use in source and binary forms, with or without 9 * modification, are permitted provided that the following conditions 10 * are met: 11 * 1. Redistributions of source code must retain the above copyright 12 * notice, this list of conditions and the following disclaimer. 13 * 2. Redistributions in binary form must reproduce the above copyright 14 * notice, this list of conditions and the following disclaimer in the 15 * documentation and/or other materials provided with the distribution. 16 * 4. Neither the name of the University nor the names of its contributors 17 * may be used to endorse or promote products derived from this software 18 * without specific prior written permission. 19 * 20 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 21 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 22 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 23 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 24 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 25 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 26 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 27 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 28 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 29 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 30 * SUCH DAMAGE. 31 * 32 * @(#)nfs_subs.c 8.8 (Berkeley) 5/22/95 33 */ 34 35#include <sys/cdefs.h> 36__FBSDID("$FreeBSD: head/sys/nfsclient/nfs_subs.c 177599 2008-03-25 09:39:02Z ru $"); 37 38/* 39 * These functions support the macros and help fiddle mbuf chains for 40 * the nfs op functions. They do things like create the rpc header and 41 * copy data between mbuf chains and uio lists. 42 */ 43 44#include <sys/param.h> 45#include <sys/systm.h> 46#include <sys/kernel.h> 47#include <sys/bio.h> 48#include <sys/buf.h> 49#include <sys/proc.h> 50#include <sys/mount.h> 51#include <sys/vnode.h> 52#include <sys/namei.h> 53#include <sys/mbuf.h> 54#include <sys/socket.h> 55#include <sys/stat.h> 56#include <sys/malloc.h> 57#include <sys/sysent.h> 58#include <sys/syscall.h> 59#include <sys/sysproto.h> 60 61#include <vm/vm.h> 62#include <vm/vm_object.h> 63#include <vm/vm_extern.h> 64#include <vm/uma.h> 65 66#include <rpc/rpcclnt.h> 67 68#include <nfs/rpcv2.h> 69#include <nfs/nfsproto.h> 70#include <nfsclient/nfs.h> 71#include <nfsclient/nfsnode.h> 72#include <nfs/xdr_subs.h> 73#include <nfsclient/nfsm_subs.h> 74#include <nfsclient/nfsmount.h> 75 76#include <netinet/in.h> 77 78/* 79 * Note that stdarg.h and the ANSI style va_start macro is used for both 80 * ANSI and traditional C compilers. 81 */ 82#include <machine/stdarg.h> 83 84/* 85 * Data items converted to xdr at startup, since they are constant 86 * This is kinda hokey, but may save a little time doing byte swaps 87 */ 88u_int32_t nfs_xdrneg1; 89u_int32_t rpc_call, rpc_vers, rpc_reply, rpc_msgdenied, rpc_autherr, 90 rpc_mismatch, rpc_auth_unix, rpc_msgaccepted; 91u_int32_t nfs_true, nfs_false; 92 93/* And other global data */ 94static u_int32_t nfs_xid = 0; 95static enum vtype nv2tov_type[8]= { 96 VNON, VREG, VDIR, VBLK, VCHR, VLNK, VNON, VNON 97}; 98 99int nfs_ticks; 100int nfs_pbuf_freecnt = -1; /* start out unlimited */ 101 102struct nfs_reqq nfs_reqq; 103struct mtx nfs_reqq_mtx; 104struct nfs_bufq nfs_bufq; 105static struct mtx nfs_xid_mtx; 106 107/* 108 * and the reverse mapping from generic to Version 2 procedure numbers 109 */ 110int nfsv2_procid[NFS_NPROCS] = { 111 NFSV2PROC_NULL, 112 NFSV2PROC_GETATTR, 113 NFSV2PROC_SETATTR, 114 NFSV2PROC_LOOKUP, 115 NFSV2PROC_NOOP, 116 NFSV2PROC_READLINK, 117 NFSV2PROC_READ, 118 NFSV2PROC_WRITE, 119 NFSV2PROC_CREATE, 120 NFSV2PROC_MKDIR, 121 NFSV2PROC_SYMLINK, 122 NFSV2PROC_CREATE, 123 NFSV2PROC_REMOVE, 124 NFSV2PROC_RMDIR, 125 NFSV2PROC_RENAME, 126 NFSV2PROC_LINK, 127 NFSV2PROC_READDIR, 128 NFSV2PROC_NOOP, 129 NFSV2PROC_STATFS, 130 NFSV2PROC_NOOP, 131 NFSV2PROC_NOOP, 132 NFSV2PROC_NOOP, 133 NFSV2PROC_NOOP, 134}; 135 136LIST_HEAD(nfsnodehashhead, nfsnode); 137 138u_int32_t 139nfs_xid_gen(void) 140{ 141 uint32_t xid; 142 143 mtx_lock(&nfs_xid_mtx); 144 145 /* Get a pretty random xid to start with */ 146 if (!nfs_xid) 147 nfs_xid = random(); 148 /* 149 * Skip zero xid if it should ever happen. 150 */ 151 if (++nfs_xid == 0) 152 nfs_xid++; 153 xid = nfs_xid; 154 mtx_unlock(&nfs_xid_mtx); 155 return xid; 156} 157 158/* 159 * Create the header for an rpc request packet 160 * The hsiz is the size of the rest of the nfs request header. 161 * (just used to decide if a cluster is a good idea) 162 */ 163struct mbuf * 164nfsm_reqhead(struct vnode *vp, u_long procid, int hsiz) 165{ 166 struct mbuf *mb; 167 168 MGET(mb, M_WAIT, MT_DATA); 169 if (hsiz >= MINCLSIZE) 170 MCLGET(mb, M_WAIT); 171 mb->m_len = 0; 172 return (mb); 173} 174 175/* 176 * Build the RPC header and fill in the authorization info. 177 * The authorization string argument is only used when the credentials 178 * come from outside of the kernel. 179 * Returns the head of the mbuf list. 180 */ 181struct mbuf * 182nfsm_rpchead(struct ucred *cr, int nmflag, int procid, int auth_type, 183 int auth_len, struct mbuf *mrest, int mrest_len, struct mbuf **mbp, 184 u_int32_t **xidpp) 185{ 186 struct mbuf *mb; 187 u_int32_t *tl; 188 caddr_t bpos; 189 int i; 190 struct mbuf *mreq; 191 int grpsiz, authsiz; 192 193 authsiz = nfsm_rndup(auth_len); 194 MGETHDR(mb, M_WAIT, MT_DATA); 195 if ((authsiz + 10 * NFSX_UNSIGNED) >= MINCLSIZE) { 196 MCLGET(mb, M_WAIT); 197 } else if ((authsiz + 10 * NFSX_UNSIGNED) < MHLEN) { 198 MH_ALIGN(mb, authsiz + 10 * NFSX_UNSIGNED); 199 } else { 200 MH_ALIGN(mb, 8 * NFSX_UNSIGNED); 201 } 202 mb->m_len = 0; 203 mreq = mb; 204 bpos = mtod(mb, caddr_t); 205 206 /* 207 * First the RPC header. 208 */ 209 tl = nfsm_build(u_int32_t *, 8 * NFSX_UNSIGNED); 210 211 *xidpp = tl; 212 *tl++ = txdr_unsigned(nfs_xid_gen()); 213 *tl++ = rpc_call; 214 *tl++ = rpc_vers; 215 *tl++ = txdr_unsigned(NFS_PROG); 216 if (nmflag & NFSMNT_NFSV3) { 217 *tl++ = txdr_unsigned(NFS_VER3); 218 *tl++ = txdr_unsigned(procid); 219 } else { 220 *tl++ = txdr_unsigned(NFS_VER2); 221 *tl++ = txdr_unsigned(nfsv2_procid[procid]); 222 } 223 224 /* 225 * And then the authorization cred. 226 */ 227 *tl++ = txdr_unsigned(auth_type); 228 *tl = txdr_unsigned(authsiz); 229 switch (auth_type) { 230 case RPCAUTH_UNIX: 231 tl = nfsm_build(u_int32_t *, auth_len); 232 *tl++ = 0; /* stamp ?? */ 233 *tl++ = 0; /* NULL hostname */ 234 *tl++ = txdr_unsigned(cr->cr_uid); 235 *tl++ = txdr_unsigned(cr->cr_groups[0]); 236 grpsiz = (auth_len >> 2) - 5; 237 *tl++ = txdr_unsigned(grpsiz); 238 for (i = 1; i <= grpsiz; i++) 239 *tl++ = txdr_unsigned(cr->cr_groups[i]); 240 break; 241 } 242 243 /* 244 * And the verifier... 245 */ 246 tl = nfsm_build(u_int32_t *, 2 * NFSX_UNSIGNED); 247 *tl++ = txdr_unsigned(RPCAUTH_NULL); 248 *tl = 0; 249 mb->m_next = mrest; 250 mreq->m_pkthdr.len = authsiz + 10 * NFSX_UNSIGNED + mrest_len; 251 mreq->m_pkthdr.rcvif = NULL; 252 *mbp = mb; 253 return (mreq); 254} 255 256/* 257 * copies a uio scatter/gather list to an mbuf chain. 258 * NOTE: can ony handle iovcnt == 1 259 */ 260int 261nfsm_uiotombuf(struct uio *uiop, struct mbuf **mq, int siz, caddr_t *bpos) 262{ 263 char *uiocp; 264 struct mbuf *mp, *mp2; 265 int xfer, left, mlen; 266 int uiosiz, clflg, rem; 267 char *cp; 268 269#ifdef DIAGNOSTIC 270 if (uiop->uio_iovcnt != 1) 271 panic("nfsm_uiotombuf: iovcnt != 1"); 272#endif 273 274 if (siz > MLEN) /* or should it >= MCLBYTES ?? */ 275 clflg = 1; 276 else 277 clflg = 0; 278 rem = nfsm_rndup(siz)-siz; 279 mp = mp2 = *mq; 280 while (siz > 0) { 281 left = uiop->uio_iov->iov_len; 282 uiocp = uiop->uio_iov->iov_base; 283 if (left > siz) 284 left = siz; 285 uiosiz = left; 286 while (left > 0) { 287 mlen = M_TRAILINGSPACE(mp); 288 if (mlen == 0) { 289 MGET(mp, M_WAIT, MT_DATA); 290 if (clflg) 291 MCLGET(mp, M_WAIT); 292 mp->m_len = 0; 293 mp2->m_next = mp; 294 mp2 = mp; 295 mlen = M_TRAILINGSPACE(mp); 296 } 297 xfer = (left > mlen) ? mlen : left; 298#ifdef notdef 299 /* Not Yet.. */ 300 if (uiop->uio_iov->iov_op != NULL) 301 (*(uiop->uio_iov->iov_op)) 302 (uiocp, mtod(mp, caddr_t)+mp->m_len, xfer); 303 else 304#endif 305 if (uiop->uio_segflg == UIO_SYSSPACE) 306 bcopy(uiocp, mtod(mp, caddr_t)+mp->m_len, xfer); 307 else 308 copyin(uiocp, mtod(mp, caddr_t)+mp->m_len, xfer); 309 mp->m_len += xfer; 310 left -= xfer; 311 uiocp += xfer; 312 uiop->uio_offset += xfer; 313 uiop->uio_resid -= xfer; 314 } 315 uiop->uio_iov->iov_base = 316 (char *)uiop->uio_iov->iov_base + uiosiz; 317 uiop->uio_iov->iov_len -= uiosiz; 318 siz -= uiosiz; 319 } 320 if (rem > 0) { 321 if (rem > M_TRAILINGSPACE(mp)) { 322 MGET(mp, M_WAIT, MT_DATA); 323 mp->m_len = 0; 324 mp2->m_next = mp; 325 } 326 cp = mtod(mp, caddr_t)+mp->m_len; 327 for (left = 0; left < rem; left++) 328 *cp++ = '\0'; 329 mp->m_len += rem; 330 *bpos = cp; 331 } else 332 *bpos = mtod(mp, caddr_t)+mp->m_len; 333 *mq = mp; 334 return (0); 335} 336 337/* 338 * Copy a string into mbufs for the hard cases... 339 */ 340int 341nfsm_strtmbuf(struct mbuf **mb, char **bpos, const char *cp, long siz) 342{ 343 struct mbuf *m1 = NULL, *m2; 344 long left, xfer, len, tlen; 345 u_int32_t *tl; 346 int putsize; 347 348 putsize = 1; 349 m2 = *mb; 350 left = M_TRAILINGSPACE(m2); 351 if (left > 0) { 352 tl = ((u_int32_t *)(*bpos)); 353 *tl++ = txdr_unsigned(siz); 354 putsize = 0; 355 left -= NFSX_UNSIGNED; 356 m2->m_len += NFSX_UNSIGNED; 357 if (left > 0) { 358 bcopy(cp, (caddr_t) tl, left); 359 siz -= left; 360 cp += left; 361 m2->m_len += left; 362 left = 0; 363 } 364 } 365 /* Loop around adding mbufs */ 366 while (siz > 0) { 367 MGET(m1, M_WAIT, MT_DATA); 368 if (siz > MLEN) 369 MCLGET(m1, M_WAIT); 370 m1->m_len = NFSMSIZ(m1); 371 m2->m_next = m1; 372 m2 = m1; 373 tl = mtod(m1, u_int32_t *); 374 tlen = 0; 375 if (putsize) { 376 *tl++ = txdr_unsigned(siz); 377 m1->m_len -= NFSX_UNSIGNED; 378 tlen = NFSX_UNSIGNED; 379 putsize = 0; 380 } 381 if (siz < m1->m_len) { 382 len = nfsm_rndup(siz); 383 xfer = siz; 384 if (xfer < len) 385 *(tl+(xfer>>2)) = 0; 386 } else { 387 xfer = len = m1->m_len; 388 } 389 bcopy(cp, (caddr_t) tl, xfer); 390 m1->m_len = len+tlen; 391 siz -= xfer; 392 cp += xfer; 393 } 394 *mb = m1; 395 *bpos = mtod(m1, caddr_t)+m1->m_len; 396 return (0); 397} 398 399/* 400 * Called once to initialize data structures... 401 */ 402int 403nfs_init(struct vfsconf *vfsp) 404{ 405 int i; 406 407 nfsmount_zone = uma_zcreate("NFSMOUNT", sizeof(struct nfsmount), 408 NULL, NULL, NULL, NULL, UMA_ALIGN_PTR, 0); 409 rpc_vers = txdr_unsigned(RPC_VER2); 410 rpc_call = txdr_unsigned(RPC_CALL); 411 rpc_reply = txdr_unsigned(RPC_REPLY); 412 rpc_msgdenied = txdr_unsigned(RPC_MSGDENIED); 413 rpc_msgaccepted = txdr_unsigned(RPC_MSGACCEPTED); 414 rpc_mismatch = txdr_unsigned(RPC_MISMATCH); 415 rpc_autherr = txdr_unsigned(RPC_AUTHERR); 416 rpc_auth_unix = txdr_unsigned(RPCAUTH_UNIX); 417 nfs_true = txdr_unsigned(TRUE); 418 nfs_false = txdr_unsigned(FALSE); 419 nfs_xdrneg1 = txdr_unsigned(-1); 420 nfs_ticks = (hz * NFS_TICKINTVL + 500) / 1000; 421 if (nfs_ticks < 1) 422 nfs_ticks = 1; 423 /* Ensure async daemons disabled */ 424 for (i = 0; i < NFS_MAXASYNCDAEMON; i++) { 425 nfs_iodwant[i] = NULL; 426 nfs_iodmount[i] = NULL; 427 } 428 nfs_nhinit(); /* Init the nfsnode table */ 429 430 /* 431 * Initialize reply list and start timer 432 */ 433 TAILQ_INIT(&nfs_reqq); 434 callout_init(&nfs_callout, CALLOUT_MPSAFE); 435 mtx_init(&nfs_reqq_mtx, "NFS reqq lock", NULL, MTX_DEF); 436 mtx_init(&nfs_iod_mtx, "NFS iod lock", NULL, MTX_DEF); 437 mtx_init(&nfs_xid_mtx, "NFS xid lock", NULL, MTX_DEF); 438 439 nfs_pbuf_freecnt = nswbuf / 2 + 1; 440 441 return (0); 442} 443 444int 445nfs_uninit(struct vfsconf *vfsp) 446{ 447 int i; 448 449 callout_stop(&nfs_callout); 450 451 KASSERT(TAILQ_EMPTY(&nfs_reqq), 452 ("nfs_uninit: request queue not empty")); 453 454 /* 455 * Tell all nfsiod processes to exit. Clear nfs_iodmax, and wakeup 456 * any sleeping nfsiods so they check nfs_iodmax and exit. 457 */ 458 mtx_lock(&nfs_iod_mtx); 459 nfs_iodmax = 0; 460 for (i = 0; i < nfs_numasync; i++) 461 if (nfs_iodwant[i]) 462 wakeup(&nfs_iodwant[i]); 463 /* The last nfsiod to exit will wake us up when nfs_numasync hits 0 */ 464 while (nfs_numasync) 465 msleep(&nfs_numasync, &nfs_iod_mtx, PWAIT, "ioddie", 0); 466 mtx_unlock(&nfs_iod_mtx); 467 nfs_nhuninit(); 468 uma_zdestroy(nfsmount_zone); 469 return (0); 470} 471 472void 473nfs_dircookie_lock(struct nfsnode *np) 474{ 475 mtx_lock(&np->n_mtx); 476 while (np->n_flag & NDIRCOOKIELK) 477 (void) msleep(&np->n_flag, &np->n_mtx, PZERO, "nfsdirlk", 0); 478 np->n_flag |= NDIRCOOKIELK; 479 mtx_unlock(&np->n_mtx); 480} 481 482void 483nfs_dircookie_unlock(struct nfsnode *np) 484{ 485 mtx_lock(&np->n_mtx); 486 np->n_flag &= ~NDIRCOOKIELK; 487 wakeup(&np->n_flag); 488 mtx_unlock(&np->n_mtx); 489} 490 491int 492nfs_upgrade_vnlock(struct vnode *vp) 493{ 494 int old_lock; 495 496 if ((old_lock = VOP_ISLOCKED(vp)) != LK_EXCLUSIVE) { 497 if (old_lock == LK_SHARED) { 498 /* Upgrade to exclusive lock, this might block */ 499 vn_lock(vp, LK_UPGRADE | LK_RETRY); 500 } else { 501 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY); 502 } 503 } 504 return old_lock; 505} 506 507void 508nfs_downgrade_vnlock(struct vnode *vp, int old_lock) 509{ 510 if (old_lock != LK_EXCLUSIVE) { 511 if (old_lock == LK_SHARED) { 512 /* Downgrade from exclusive lock, this might block */ 513 vn_lock(vp, LK_DOWNGRADE); 514 } else { 515 VOP_UNLOCK(vp, 0); 516 } 517 } 518} 519 520void 521nfs_printf(const char *fmt, ...) 522{ 523 va_list ap; 524 525 mtx_lock(&Giant); 526 va_start(ap, fmt); 527 printf(fmt, ap); 528 va_end(ap); 529 mtx_unlock(&Giant); 530} 531 532/* 533 * Attribute cache routines. 534 * nfs_loadattrcache() - loads or updates the cache contents from attributes 535 * that are on the mbuf list 536 * nfs_getattrcache() - returns valid attributes if found in cache, returns 537 * error otherwise 538 */ 539 540/* 541 * Load the attribute cache (that lives in the nfsnode entry) with 542 * the values on the mbuf list and 543 * Iff vap not NULL 544 * copy the attributes to *vaper 545 */ 546int 547nfs_loadattrcache(struct vnode **vpp, struct mbuf **mdp, caddr_t *dposp, 548 struct vattr *vaper, int dontshrink) 549{ 550 struct vnode *vp = *vpp; 551 struct vattr *vap; 552 struct nfs_fattr *fp; 553 struct nfsnode *np; 554 int32_t t1; 555 caddr_t cp2; 556 int rdev; 557 struct mbuf *md; 558 enum vtype vtyp; 559 u_short vmode; 560 struct timespec mtime, mtime_save; 561 int v3 = NFS_ISV3(vp); 562 struct thread *td = curthread; 563 564 md = *mdp; 565 t1 = (mtod(md, caddr_t) + md->m_len) - *dposp; 566 cp2 = nfsm_disct(mdp, dposp, NFSX_FATTR(v3), t1, M_WAIT); 567 if (cp2 == NULL) 568 return EBADRPC; 569 fp = (struct nfs_fattr *)cp2; 570 if (v3) { 571 vtyp = nfsv3tov_type(fp->fa_type); 572 vmode = fxdr_unsigned(u_short, fp->fa_mode); 573 rdev = makedev(fxdr_unsigned(int, fp->fa3_rdev.specdata1), 574 fxdr_unsigned(int, fp->fa3_rdev.specdata2)); 575 fxdr_nfsv3time(&fp->fa3_mtime, &mtime); 576 } else { 577 vtyp = nfsv2tov_type(fp->fa_type); 578 vmode = fxdr_unsigned(u_short, fp->fa_mode); 579 /* 580 * XXX 581 * 582 * The duplicate information returned in fa_type and fa_mode 583 * is an ambiguity in the NFS version 2 protocol. 584 * 585 * VREG should be taken literally as a regular file. If a 586 * server intents to return some type information differently 587 * in the upper bits of the mode field (e.g. for sockets, or 588 * FIFOs), NFSv2 mandates fa_type to be VNON. Anyway, we 589 * leave the examination of the mode bits even in the VREG 590 * case to avoid breakage for bogus servers, but we make sure 591 * that there are actually type bits set in the upper part of 592 * fa_mode (and failing that, trust the va_type field). 593 * 594 * NFSv3 cleared the issue, and requires fa_mode to not 595 * contain any type information (while also introduing sockets 596 * and FIFOs for fa_type). 597 */ 598 if (vtyp == VNON || (vtyp == VREG && (vmode & S_IFMT) != 0)) 599 vtyp = IFTOVT(vmode); 600 rdev = fxdr_unsigned(int32_t, fp->fa2_rdev); 601 fxdr_nfsv2time(&fp->fa2_mtime, &mtime); 602 603 /* 604 * Really ugly NFSv2 kludge. 605 */ 606 if (vtyp == VCHR && rdev == 0xffffffff) 607 vtyp = VFIFO; 608 } 609 610 /* 611 * If v_type == VNON it is a new node, so fill in the v_type, 612 * n_mtime fields. Check to see if it represents a special 613 * device, and if so, check for a possible alias. Once the 614 * correct vnode has been obtained, fill in the rest of the 615 * information. 616 */ 617 np = VTONFS(vp); 618 mtx_lock(&np->n_mtx); 619 if (vp->v_type != vtyp) { 620 vp->v_type = vtyp; 621 if (vp->v_type == VFIFO) 622 vp->v_op = &nfs_fifoops; 623 np->n_mtime = mtime; 624 } 625 vap = &np->n_vattr; 626 vap->va_type = vtyp; 627 vap->va_mode = (vmode & 07777); 628 vap->va_rdev = rdev; 629 mtime_save = vap->va_mtime; 630 vap->va_mtime = mtime; 631 vap->va_fsid = vp->v_mount->mnt_stat.f_fsid.val[0]; 632 if (v3) { 633 vap->va_nlink = fxdr_unsigned(u_short, fp->fa_nlink); 634 vap->va_uid = fxdr_unsigned(uid_t, fp->fa_uid); 635 vap->va_gid = fxdr_unsigned(gid_t, fp->fa_gid); 636 vap->va_size = fxdr_hyper(&fp->fa3_size); 637 vap->va_blocksize = NFS_FABLKSIZE; 638 vap->va_bytes = fxdr_hyper(&fp->fa3_used); 639 vap->va_fileid = fxdr_unsigned(int32_t, 640 fp->fa3_fileid.nfsuquad[1]); 641 fxdr_nfsv3time(&fp->fa3_atime, &vap->va_atime); 642 fxdr_nfsv3time(&fp->fa3_ctime, &vap->va_ctime); 643 vap->va_flags = 0; 644 vap->va_filerev = 0; 645 } else { 646 vap->va_nlink = fxdr_unsigned(u_short, fp->fa_nlink); 647 vap->va_uid = fxdr_unsigned(uid_t, fp->fa_uid); 648 vap->va_gid = fxdr_unsigned(gid_t, fp->fa_gid); 649 vap->va_size = fxdr_unsigned(u_int32_t, fp->fa2_size); 650 vap->va_blocksize = fxdr_unsigned(int32_t, fp->fa2_blocksize); 651 vap->va_bytes = (u_quad_t)fxdr_unsigned(int32_t, fp->fa2_blocks) 652 * NFS_FABLKSIZE; 653 vap->va_fileid = fxdr_unsigned(int32_t, fp->fa2_fileid); 654 fxdr_nfsv2time(&fp->fa2_atime, &vap->va_atime); 655 vap->va_flags = 0; 656 vap->va_ctime.tv_sec = fxdr_unsigned(u_int32_t, 657 fp->fa2_ctime.nfsv2_sec); 658 vap->va_ctime.tv_nsec = 0; 659 vap->va_gen = fxdr_unsigned(u_int32_t, fp->fa2_ctime.nfsv2_usec); 660 vap->va_filerev = 0; 661 } 662 np->n_attrstamp = time_second; 663 /* Timestamp the NFS otw getattr fetch */ 664 if (td->td_proc) { 665 np->n_ac_ts_tid = td->td_tid; 666 np->n_ac_ts_pid = td->td_proc->p_pid; 667 np->n_ac_ts_syscalls = td->td_syscalls; 668 } else 669 bzero(&np->n_ac_ts, sizeof(struct nfs_attrcache_timestamp)); 670 671 if (vap->va_size != np->n_size) { 672 if (vap->va_type == VREG) { 673 if (dontshrink && vap->va_size < np->n_size) { 674 /* 675 * We've been told not to shrink the file; 676 * zero np->n_attrstamp to indicate that 677 * the attributes are stale. 678 */ 679 vap->va_size = np->n_size; 680 np->n_attrstamp = 0; 681 } else if (np->n_flag & NMODIFIED) { 682 /* 683 * We've modified the file: Use the larger 684 * of our size, and the server's size. 685 */ 686 if (vap->va_size < np->n_size) { 687 vap->va_size = np->n_size; 688 } else { 689 np->n_size = vap->va_size; 690 np->n_flag |= NSIZECHANGED; 691 } 692 } else { 693 np->n_size = vap->va_size; 694 np->n_flag |= NSIZECHANGED; 695 } 696 vnode_pager_setsize(vp, np->n_size); 697 } else { 698 np->n_size = vap->va_size; 699 } 700 } 701 /* 702 * The following checks are added to prevent a race between (say) 703 * a READDIR+ and a WRITE. 704 * READDIR+, WRITE requests sent out. 705 * READDIR+ resp, WRITE resp received on client. 706 * However, the WRITE resp was handled before the READDIR+ resp 707 * causing the post op attrs from the write to be loaded first 708 * and the attrs from the READDIR+ to be loaded later. If this 709 * happens, we have stale attrs loaded into the attrcache. 710 * We detect this by for the mtime moving back. We invalidate the 711 * attrcache when this happens. 712 */ 713 if (timespeccmp(&mtime_save, &vap->va_mtime, >)) 714 /* Size changed or mtime went backwards */ 715 np->n_attrstamp = 0; 716 if (vaper != NULL) { 717 bcopy((caddr_t)vap, (caddr_t)vaper, sizeof(*vap)); 718 if (np->n_flag & NCHG) { 719 if (np->n_flag & NACC) 720 vaper->va_atime = np->n_atim; 721 if (np->n_flag & NUPD) 722 vaper->va_mtime = np->n_mtim; 723 } 724 } 725 mtx_unlock(&np->n_mtx); 726 return (0); 727} 728 729#ifdef NFS_ACDEBUG 730#include <sys/sysctl.h> 731SYSCTL_DECL(_vfs_nfs); 732static int nfs_acdebug; 733SYSCTL_INT(_vfs_nfs, OID_AUTO, acdebug, CTLFLAG_RW, &nfs_acdebug, 0, ""); 734#endif 735 736/* 737 * Check the time stamp 738 * If the cache is valid, copy contents to *vap and return 0 739 * otherwise return an error 740 */ 741int 742nfs_getattrcache(struct vnode *vp, struct vattr *vaper) 743{ 744 struct nfsnode *np; 745 struct vattr *vap; 746 struct nfsmount *nmp; 747 int timeo; 748 749 np = VTONFS(vp); 750 vap = &np->n_vattr; 751 nmp = VFSTONFS(vp->v_mount); 752#ifdef NFS_ACDEBUG 753 mtx_lock(&Giant); /* nfs_printf() */ 754#endif 755 mtx_lock(&np->n_mtx); 756 /* XXX n_mtime doesn't seem to be updated on a miss-and-reload */ 757 timeo = (time_second - np->n_mtime.tv_sec) / 10; 758 759#ifdef NFS_ACDEBUG 760 if (nfs_acdebug>1) 761 nfs_printf("nfs_getattrcache: initial timeo = %d\n", timeo); 762#endif 763 764 if (vap->va_type == VDIR) { 765 if ((np->n_flag & NMODIFIED) || timeo < nmp->nm_acdirmin) 766 timeo = nmp->nm_acdirmin; 767 else if (timeo > nmp->nm_acdirmax) 768 timeo = nmp->nm_acdirmax; 769 } else { 770 if ((np->n_flag & NMODIFIED) || timeo < nmp->nm_acregmin) 771 timeo = nmp->nm_acregmin; 772 else if (timeo > nmp->nm_acregmax) 773 timeo = nmp->nm_acregmax; 774 } 775 776#ifdef NFS_ACDEBUG 777 if (nfs_acdebug > 2) 778 nfs_printf("acregmin %d; acregmax %d; acdirmin %d; acdirmax %d\n", 779 nmp->nm_acregmin, nmp->nm_acregmax, 780 nmp->nm_acdirmin, nmp->nm_acdirmax); 781 782 if (nfs_acdebug) 783 nfs_printf("nfs_getattrcache: age = %d; final timeo = %d\n", 784 (time_second - np->n_attrstamp), timeo); 785#endif 786 787 if ((time_second - np->n_attrstamp) >= timeo) { 788 nfsstats.attrcache_misses++; 789 mtx_unlock(&np->n_mtx); 790 return( ENOENT); 791 } 792 nfsstats.attrcache_hits++; 793 if (vap->va_size != np->n_size) { 794 if (vap->va_type == VREG) { 795 if (np->n_flag & NMODIFIED) { 796 if (vap->va_size < np->n_size) 797 vap->va_size = np->n_size; 798 else 799 np->n_size = vap->va_size; 800 } else { 801 np->n_size = vap->va_size; 802 } 803 vnode_pager_setsize(vp, np->n_size); 804 } else { 805 np->n_size = vap->va_size; 806 } 807 } 808 bcopy((caddr_t)vap, (caddr_t)vaper, sizeof(struct vattr)); 809 if (np->n_flag & NCHG) { 810 if (np->n_flag & NACC) 811 vaper->va_atime = np->n_atim; 812 if (np->n_flag & NUPD) 813 vaper->va_mtime = np->n_mtim; 814 } 815 mtx_unlock(&np->n_mtx); 816#ifdef NFS_ACDEBUG 817 mtx_unlock(&Giant); /* nfs_printf() */ 818#endif 819 return (0); 820} 821 822static nfsuint64 nfs_nullcookie = { { 0, 0 } }; 823/* 824 * This function finds the directory cookie that corresponds to the 825 * logical byte offset given. 826 */ 827nfsuint64 * 828nfs_getcookie(struct nfsnode *np, off_t off, int add) 829{ 830 struct nfsdmap *dp, *dp2; 831 int pos; 832 nfsuint64 *retval = NULL; 833 834 pos = (uoff_t)off / NFS_DIRBLKSIZ; 835 if (pos == 0 || off < 0) { 836#ifdef DIAGNOSTIC 837 if (add) 838 panic("nfs getcookie add at <= 0"); 839#endif 840 return (&nfs_nullcookie); 841 } 842 pos--; 843 dp = LIST_FIRST(&np->n_cookies); 844 if (!dp) { 845 if (add) { 846 MALLOC(dp, struct nfsdmap *, sizeof (struct nfsdmap), 847 M_NFSDIROFF, M_WAITOK); 848 dp->ndm_eocookie = 0; 849 LIST_INSERT_HEAD(&np->n_cookies, dp, ndm_list); 850 } else 851 goto out; 852 } 853 while (pos >= NFSNUMCOOKIES) { 854 pos -= NFSNUMCOOKIES; 855 if (LIST_NEXT(dp, ndm_list)) { 856 if (!add && dp->ndm_eocookie < NFSNUMCOOKIES && 857 pos >= dp->ndm_eocookie) 858 goto out; 859 dp = LIST_NEXT(dp, ndm_list); 860 } else if (add) { 861 MALLOC(dp2, struct nfsdmap *, sizeof (struct nfsdmap), 862 M_NFSDIROFF, M_WAITOK); 863 dp2->ndm_eocookie = 0; 864 LIST_INSERT_AFTER(dp, dp2, ndm_list); 865 dp = dp2; 866 } else 867 goto out; 868 } 869 if (pos >= dp->ndm_eocookie) { 870 if (add) 871 dp->ndm_eocookie = pos + 1; 872 else 873 goto out; 874 } 875 retval = &dp->ndm_cookies[pos]; 876out: 877 return (retval); 878} 879 880/* 881 * Invalidate cached directory information, except for the actual directory 882 * blocks (which are invalidated separately). 883 * Done mainly to avoid the use of stale offset cookies. 884 */ 885void 886nfs_invaldir(struct vnode *vp) 887{ 888 struct nfsnode *np = VTONFS(vp); 889 890#ifdef DIAGNOSTIC 891 if (vp->v_type != VDIR) 892 panic("nfs: invaldir not dir"); 893#endif 894 nfs_dircookie_lock(np); 895 np->n_direofoffset = 0; 896 np->n_cookieverf.nfsuquad[0] = 0; 897 np->n_cookieverf.nfsuquad[1] = 0; 898 if (LIST_FIRST(&np->n_cookies)) 899 LIST_FIRST(&np->n_cookies)->ndm_eocookie = 0; 900 nfs_dircookie_unlock(np); 901} 902 903/* 904 * The write verifier has changed (probably due to a server reboot), so all 905 * B_NEEDCOMMIT blocks will have to be written again. Since they are on the 906 * dirty block list as B_DELWRI, all this takes is clearing the B_NEEDCOMMIT 907 * and B_CLUSTEROK flags. Once done the new write verifier can be set for the 908 * mount point. 909 * 910 * B_CLUSTEROK must be cleared along with B_NEEDCOMMIT because stage 1 data 911 * writes are not clusterable. 912 */ 913void 914nfs_clearcommit(struct mount *mp) 915{ 916 struct vnode *vp, *nvp; 917 struct buf *bp, *nbp; 918 struct bufobj *bo; 919 920 MNT_ILOCK(mp); 921 MNT_VNODE_FOREACH(vp, mp, nvp) { 922 bo = &vp->v_bufobj; 923 VI_LOCK(vp); 924 if (vp->v_iflag & VI_DOOMED) { 925 VI_UNLOCK(vp); 926 continue; 927 } 928 vholdl(vp); 929 VI_UNLOCK(vp); 930 MNT_IUNLOCK(mp); 931 BO_LOCK(bo); 932 TAILQ_FOREACH_SAFE(bp, &bo->bo_dirty.bv_hd, b_bobufs, nbp) { 933 if (!BUF_ISLOCKED(bp) && 934 (bp->b_flags & (B_DELWRI | B_NEEDCOMMIT)) 935 == (B_DELWRI | B_NEEDCOMMIT)) 936 bp->b_flags &= ~(B_NEEDCOMMIT | B_CLUSTEROK); 937 } 938 BO_UNLOCK(bo); 939 vdrop(vp); 940 MNT_ILOCK(mp); 941 } 942 MNT_IUNLOCK(mp); 943} 944 945/* 946 * Helper functions for former macros. Some of these should be 947 * moved to their callers. 948 */ 949 950int 951nfsm_mtofh_xx(struct vnode *d, struct vnode **v, int v3, int *f, 952 struct mbuf **md, caddr_t *dpos) 953{ 954 struct nfsnode *ttnp; 955 struct vnode *ttvp; 956 nfsfh_t *ttfhp; 957 u_int32_t *tl; 958 int ttfhsize; 959 int t1; 960 961 if (v3) { 962 tl = nfsm_dissect_xx(NFSX_UNSIGNED, md, dpos); 963 if (tl == NULL) 964 return EBADRPC; 965 *f = fxdr_unsigned(int, *tl); 966 } else 967 *f = 1; 968 if (*f) { 969 t1 = nfsm_getfh_xx(&ttfhp, &ttfhsize, (v3), md, dpos); 970 if (t1 != 0) 971 return t1; 972 t1 = nfs_nget(d->v_mount, ttfhp, ttfhsize, &ttnp, LK_EXCLUSIVE); 973 if (t1 != 0) 974 return t1; 975 *v = NFSTOV(ttnp); 976 } 977 if (v3) { 978 tl = nfsm_dissect_xx(NFSX_UNSIGNED, md, dpos); 979 if (tl == NULL) 980 return EBADRPC; 981 if (*f) 982 *f = fxdr_unsigned(int, *tl); 983 else if (fxdr_unsigned(int, *tl)) 984 nfsm_adv_xx(NFSX_V3FATTR, md, dpos); 985 } 986 if (*f) { 987 ttvp = *v; 988 t1 = nfs_loadattrcache(&ttvp, md, dpos, NULL, 0); 989 if (t1) 990 return t1; 991 *v = ttvp; 992 } 993 return 0; 994} 995 996int 997nfsm_getfh_xx(nfsfh_t **f, int *s, int v3, struct mbuf **md, caddr_t *dpos) 998{ 999 u_int32_t *tl; 1000 1001 if (v3) { 1002 tl = nfsm_dissect_xx(NFSX_UNSIGNED, md, dpos); 1003 if (tl == NULL) 1004 return EBADRPC; 1005 *s = fxdr_unsigned(int, *tl); 1006 if (*s <= 0 || *s > NFSX_V3FHMAX) 1007 return EBADRPC; 1008 } else 1009 *s = NFSX_V2FH; 1010 *f = nfsm_dissect_xx(nfsm_rndup(*s), md, dpos); 1011 if (*f == NULL) 1012 return EBADRPC; 1013 else 1014 return 0; 1015} 1016 1017 1018int 1019nfsm_loadattr_xx(struct vnode **v, struct vattr *va, struct mbuf **md, 1020 caddr_t *dpos) 1021{ 1022 int t1; 1023 1024 struct vnode *ttvp = *v; 1025 t1 = nfs_loadattrcache(&ttvp, md, dpos, va, 0); 1026 if (t1 != 0) 1027 return t1; 1028 *v = ttvp; 1029 return 0; 1030} 1031 1032int 1033nfsm_postop_attr_xx(struct vnode **v, int *f, struct mbuf **md, 1034 caddr_t *dpos) 1035{ 1036 u_int32_t *tl; 1037 int t1; 1038 1039 struct vnode *ttvp = *v; 1040 tl = nfsm_dissect_xx(NFSX_UNSIGNED, md, dpos); 1041 if (tl == NULL) 1042 return EBADRPC; 1043 *f = fxdr_unsigned(int, *tl); 1044 if (*f != 0) { 1045 t1 = nfs_loadattrcache(&ttvp, md, dpos, NULL, 1); 1046 if (t1 != 0) { 1047 *f = 0; 1048 return t1; 1049 } 1050 *v = ttvp; 1051 } 1052 return 0; 1053} 1054 1055int 1056nfsm_wcc_data_xx(struct vnode **v, int *f, struct mbuf **md, caddr_t *dpos) 1057{ 1058 u_int32_t *tl; 1059 int ttattrf, ttretf = 0; 1060 int t1; 1061 1062 tl = nfsm_dissect_xx(NFSX_UNSIGNED, md, dpos); 1063 if (tl == NULL) 1064 return EBADRPC; 1065 if (*tl == nfs_true) { 1066 tl = nfsm_dissect_xx(6 * NFSX_UNSIGNED, md, dpos); 1067 if (tl == NULL) 1068 return EBADRPC; 1069 mtx_lock(&(VTONFS(*v))->n_mtx); 1070 if (*f) 1071 ttretf = (VTONFS(*v)->n_mtime.tv_sec == fxdr_unsigned(u_int32_t, *(tl + 2)) && 1072 VTONFS(*v)->n_mtime.tv_nsec == fxdr_unsigned(u_int32_t, *(tl + 3))); 1073 mtx_unlock(&(VTONFS(*v))->n_mtx); 1074 } 1075 t1 = nfsm_postop_attr_xx(v, &ttattrf, md, dpos); 1076 if (t1) 1077 return t1; 1078 if (*f) 1079 *f = ttretf; 1080 else 1081 *f = ttattrf; 1082 return 0; 1083} 1084 1085int 1086nfsm_strtom_xx(const char *a, int s, int m, struct mbuf **mb, caddr_t *bpos) 1087{ 1088 u_int32_t *tl; 1089 int t1; 1090 1091 if (s > m) 1092 return ENAMETOOLONG; 1093 t1 = nfsm_rndup(s) + NFSX_UNSIGNED; 1094 if (t1 <= M_TRAILINGSPACE(*mb)) { 1095 tl = nfsm_build_xx(t1, mb, bpos); 1096 *tl++ = txdr_unsigned(s); 1097 *(tl + ((t1 >> 2) - 2)) = 0; 1098 bcopy(a, tl, s); 1099 } else { 1100 t1 = nfsm_strtmbuf(mb, bpos, a, s); 1101 if (t1 != 0) 1102 return t1; 1103 } 1104 return 0; 1105} 1106 1107int 1108nfsm_fhtom_xx(struct vnode *v, int v3, struct mbuf **mb, caddr_t *bpos) 1109{ 1110 u_int32_t *tl; 1111 int t1; 1112 caddr_t cp; 1113 1114 if (v3) { 1115 t1 = nfsm_rndup(VTONFS(v)->n_fhsize) + NFSX_UNSIGNED; 1116 if (t1 < M_TRAILINGSPACE(*mb)) { 1117 tl = nfsm_build_xx(t1, mb, bpos); 1118 *tl++ = txdr_unsigned(VTONFS(v)->n_fhsize); 1119 *(tl + ((t1 >> 2) - 2)) = 0; 1120 bcopy(VTONFS(v)->n_fhp, tl, VTONFS(v)->n_fhsize); 1121 } else { 1122 t1 = nfsm_strtmbuf(mb, bpos, 1123 (const char *)VTONFS(v)->n_fhp, 1124 VTONFS(v)->n_fhsize); 1125 if (t1 != 0) 1126 return t1; 1127 } 1128 } else { 1129 cp = nfsm_build_xx(NFSX_V2FH, mb, bpos); 1130 bcopy(VTONFS(v)->n_fhp, cp, NFSX_V2FH); 1131 } 1132 return 0; 1133} 1134 1135void 1136nfsm_v3attrbuild_xx(struct vattr *va, int full, struct mbuf **mb, 1137 caddr_t *bpos) 1138{ 1139 u_int32_t *tl; 1140 1141 if (va->va_mode != (mode_t)VNOVAL) { 1142 tl = nfsm_build_xx(2 * NFSX_UNSIGNED, mb, bpos); 1143 *tl++ = nfs_true; 1144 *tl = txdr_unsigned(va->va_mode); 1145 } else { 1146 tl = nfsm_build_xx(NFSX_UNSIGNED, mb, bpos); 1147 *tl = nfs_false; 1148 } 1149 if (full && va->va_uid != (uid_t)VNOVAL) { 1150 tl = nfsm_build_xx(2 * NFSX_UNSIGNED, mb, bpos); 1151 *tl++ = nfs_true; 1152 *tl = txdr_unsigned(va->va_uid); 1153 } else { 1154 tl = nfsm_build_xx(NFSX_UNSIGNED, mb, bpos); 1155 *tl = nfs_false; 1156 } 1157 if (full && va->va_gid != (gid_t)VNOVAL) { 1158 tl = nfsm_build_xx(2 * NFSX_UNSIGNED, mb, bpos); 1159 *tl++ = nfs_true; 1160 *tl = txdr_unsigned(va->va_gid); 1161 } else { 1162 tl = nfsm_build_xx(NFSX_UNSIGNED, mb, bpos); 1163 *tl = nfs_false; 1164 } 1165 if (full && va->va_size != VNOVAL) { 1166 tl = nfsm_build_xx(3 * NFSX_UNSIGNED, mb, bpos); 1167 *tl++ = nfs_true; 1168 txdr_hyper(va->va_size, tl); 1169 } else { 1170 tl = nfsm_build_xx(NFSX_UNSIGNED, mb, bpos); 1171 *tl = nfs_false; 1172 } 1173 if (va->va_atime.tv_sec != VNOVAL) { 1174 if (va->va_atime.tv_sec != time_second) { 1175 tl = nfsm_build_xx(3 * NFSX_UNSIGNED, mb, bpos); 1176 *tl++ = txdr_unsigned(NFSV3SATTRTIME_TOCLIENT); 1177 txdr_nfsv3time(&va->va_atime, tl); 1178 } else { 1179 tl = nfsm_build_xx(NFSX_UNSIGNED, mb, bpos); 1180 *tl = txdr_unsigned(NFSV3SATTRTIME_TOSERVER); 1181 } 1182 } else { 1183 tl = nfsm_build_xx(NFSX_UNSIGNED, mb, bpos); 1184 *tl = txdr_unsigned(NFSV3SATTRTIME_DONTCHANGE); 1185 } 1186 if (va->va_mtime.tv_sec != VNOVAL) { 1187 if (va->va_mtime.tv_sec != time_second) { 1188 tl = nfsm_build_xx(3 * NFSX_UNSIGNED, mb, bpos); 1189 *tl++ = txdr_unsigned(NFSV3SATTRTIME_TOCLIENT); 1190 txdr_nfsv3time(&va->va_mtime, tl); 1191 } else { 1192 tl = nfsm_build_xx(NFSX_UNSIGNED, mb, bpos); 1193 *tl = txdr_unsigned(NFSV3SATTRTIME_TOSERVER); 1194 } 1195 } else { 1196 tl = nfsm_build_xx(NFSX_UNSIGNED, mb, bpos); 1197 *tl = txdr_unsigned(NFSV3SATTRTIME_DONTCHANGE); 1198 } 1199} 1200