nfs_vnops.c revision 222187
1/*- 2 * Copyright (c) 1989, 1993 3 * The Regents of the University of California. All rights reserved. 4 * 5 * This code is derived from software contributed to Berkeley by 6 * Rick Macklem at The University of Guelph. 7 * 8 * Redistribution and use in source and binary forms, with or without 9 * modification, are permitted provided that the following conditions 10 * are met: 11 * 1. Redistributions of source code must retain the above copyright 12 * notice, this list of conditions and the following disclaimer. 13 * 2. Redistributions in binary form must reproduce the above copyright 14 * notice, this list of conditions and the following disclaimer in the 15 * documentation and/or other materials provided with the distribution. 16 * 4. Neither the name of the University nor the names of its contributors 17 * may be used to endorse or promote products derived from this software 18 * without specific prior written permission. 19 * 20 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 21 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 22 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 23 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 24 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 25 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 26 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 27 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 28 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 29 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 30 * SUCH DAMAGE. 31 * 32 * @(#)nfs_vnops.c 8.16 (Berkeley) 5/27/95 33 */ 34 35#include <sys/cdefs.h> 36__FBSDID("$FreeBSD: head/sys/nfsclient/nfs_vnops.c 222187 2011-05-22 18:11:41Z alc $"); 37 38/* 39 * vnode op calls for Sun NFS version 2 and 3 40 */ 41 42#include "opt_inet.h" 43#include "opt_kdtrace.h" 44 45#include <sys/param.h> 46#include <sys/kernel.h> 47#include <sys/systm.h> 48#include <sys/resourcevar.h> 49#include <sys/proc.h> 50#include <sys/mount.h> 51#include <sys/bio.h> 52#include <sys/buf.h> 53#include <sys/jail.h> 54#include <sys/malloc.h> 55#include <sys/mbuf.h> 56#include <sys/namei.h> 57#include <sys/socket.h> 58#include <sys/vnode.h> 59#include <sys/dirent.h> 60#include <sys/fcntl.h> 61#include <sys/lockf.h> 62#include <sys/stat.h> 63#include <sys/sysctl.h> 64#include <sys/signalvar.h> 65 66#include <vm/vm.h> 67#include <vm/vm_extern.h> 68#include <vm/vm_object.h> 69 70#include <fs/fifofs/fifo.h> 71 72#include <nfs/nfsproto.h> 73#include <nfsclient/nfs.h> 74#include <nfsclient/nfsnode.h> 75#include <nfsclient/nfsmount.h> 76#include <nfs/nfs_kdtrace.h> 77#include <nfs/nfs_lock.h> 78#include <nfs/xdr_subs.h> 79#include <nfsclient/nfsm_subs.h> 80 81#include <net/if.h> 82#include <netinet/in.h> 83#include <netinet/in_var.h> 84 85#include <machine/stdarg.h> 86 87#ifdef KDTRACE_HOOKS 88#include <sys/dtrace_bsd.h> 89 90dtrace_nfsclient_accesscache_flush_probe_func_t 91 dtrace_nfsclient_accesscache_flush_done_probe; 92uint32_t nfsclient_accesscache_flush_done_id; 93 94dtrace_nfsclient_accesscache_get_probe_func_t 95 dtrace_nfsclient_accesscache_get_hit_probe, 96 dtrace_nfsclient_accesscache_get_miss_probe; 97uint32_t nfsclient_accesscache_get_hit_id; 98uint32_t nfsclient_accesscache_get_miss_id; 99 100dtrace_nfsclient_accesscache_load_probe_func_t 101 dtrace_nfsclient_accesscache_load_done_probe; 102uint32_t nfsclient_accesscache_load_done_id; 103#endif /* !KDTRACE_HOOKS */ 104 105/* Defs */ 106#define TRUE 1 107#define FALSE 0 108 109/* 110 * Ifdef for FreeBSD-current merged buffer cache. It is unfortunate that these 111 * calls are not in getblk() and brelse() so that they would not be necessary 112 * here. 113 */ 114#ifndef B_VMIO 115#define vfs_busy_pages(bp, f) 116#endif 117 118static vop_read_t nfsfifo_read; 119static vop_write_t nfsfifo_write; 120static vop_close_t nfsfifo_close; 121static int nfs_flush(struct vnode *, int, int); 122static int nfs_setattrrpc(struct vnode *, struct vattr *, struct ucred *); 123static vop_lookup_t nfs_lookup; 124static vop_create_t nfs_create; 125static vop_mknod_t nfs_mknod; 126static vop_open_t nfs_open; 127static vop_close_t nfs_close; 128static vop_access_t nfs_access; 129static vop_getattr_t nfs_getattr; 130static vop_setattr_t nfs_setattr; 131static vop_read_t nfs_read; 132static vop_fsync_t nfs_fsync; 133static vop_remove_t nfs_remove; 134static vop_link_t nfs_link; 135static vop_rename_t nfs_rename; 136static vop_mkdir_t nfs_mkdir; 137static vop_rmdir_t nfs_rmdir; 138static vop_symlink_t nfs_symlink; 139static vop_readdir_t nfs_readdir; 140static vop_strategy_t nfs_strategy; 141static int nfs_lookitup(struct vnode *, const char *, int, 142 struct ucred *, struct thread *, struct nfsnode **); 143static int nfs_sillyrename(struct vnode *, struct vnode *, 144 struct componentname *); 145static vop_access_t nfsspec_access; 146static vop_readlink_t nfs_readlink; 147static vop_print_t nfs_print; 148static vop_advlock_t nfs_advlock; 149static vop_advlockasync_t nfs_advlockasync; 150 151/* 152 * Global vfs data structures for nfs 153 */ 154struct vop_vector nfs_vnodeops = { 155 .vop_default = &default_vnodeops, 156 .vop_access = nfs_access, 157 .vop_advlock = nfs_advlock, 158 .vop_advlockasync = nfs_advlockasync, 159 .vop_close = nfs_close, 160 .vop_create = nfs_create, 161 .vop_fsync = nfs_fsync, 162 .vop_getattr = nfs_getattr, 163 .vop_getpages = nfs_getpages, 164 .vop_putpages = nfs_putpages, 165 .vop_inactive = nfs_inactive, 166 .vop_link = nfs_link, 167 .vop_lookup = nfs_lookup, 168 .vop_mkdir = nfs_mkdir, 169 .vop_mknod = nfs_mknod, 170 .vop_open = nfs_open, 171 .vop_print = nfs_print, 172 .vop_read = nfs_read, 173 .vop_readdir = nfs_readdir, 174 .vop_readlink = nfs_readlink, 175 .vop_reclaim = nfs_reclaim, 176 .vop_remove = nfs_remove, 177 .vop_rename = nfs_rename, 178 .vop_rmdir = nfs_rmdir, 179 .vop_setattr = nfs_setattr, 180 .vop_strategy = nfs_strategy, 181 .vop_symlink = nfs_symlink, 182 .vop_write = nfs_write, 183}; 184 185struct vop_vector nfs_fifoops = { 186 .vop_default = &fifo_specops, 187 .vop_access = nfsspec_access, 188 .vop_close = nfsfifo_close, 189 .vop_fsync = nfs_fsync, 190 .vop_getattr = nfs_getattr, 191 .vop_inactive = nfs_inactive, 192 .vop_print = nfs_print, 193 .vop_read = nfsfifo_read, 194 .vop_reclaim = nfs_reclaim, 195 .vop_setattr = nfs_setattr, 196 .vop_write = nfsfifo_write, 197}; 198 199static int nfs_mknodrpc(struct vnode *dvp, struct vnode **vpp, 200 struct componentname *cnp, struct vattr *vap); 201static int nfs_removerpc(struct vnode *dvp, const char *name, int namelen, 202 struct ucred *cred, struct thread *td); 203static int nfs_renamerpc(struct vnode *fdvp, const char *fnameptr, 204 int fnamelen, struct vnode *tdvp, 205 const char *tnameptr, int tnamelen, 206 struct ucred *cred, struct thread *td); 207static int nfs_renameit(struct vnode *sdvp, struct componentname *scnp, 208 struct sillyrename *sp); 209 210/* 211 * Global variables 212 */ 213struct mtx nfs_iod_mtx; 214enum nfsiod_state nfs_iodwant[NFS_MAXASYNCDAEMON]; 215struct nfsmount *nfs_iodmount[NFS_MAXASYNCDAEMON]; 216int nfs_numasync = 0; 217#define DIRHDSIZ (sizeof (struct dirent) - (MAXNAMLEN + 1)) 218 219SYSCTL_DECL(_vfs_oldnfs); 220 221static int nfsaccess_cache_timeout = NFS_MAXATTRTIMO; 222SYSCTL_INT(_vfs_oldnfs, OID_AUTO, access_cache_timeout, CTLFLAG_RW, 223 &nfsaccess_cache_timeout, 0, "NFS ACCESS cache timeout"); 224 225static int nfs_prime_access_cache = 0; 226SYSCTL_INT(_vfs_oldnfs, OID_AUTO, prime_access_cache, CTLFLAG_RW, 227 &nfs_prime_access_cache, 0, 228 "Prime NFS ACCESS cache when fetching attributes"); 229 230static int nfsv3_commit_on_close = 0; 231SYSCTL_INT(_vfs_oldnfs, OID_AUTO, nfsv3_commit_on_close, CTLFLAG_RW, 232 &nfsv3_commit_on_close, 0, "write+commit on close, else only write"); 233 234static int nfs_clean_pages_on_close = 1; 235SYSCTL_INT(_vfs_oldnfs, OID_AUTO, clean_pages_on_close, CTLFLAG_RW, 236 &nfs_clean_pages_on_close, 0, "NFS clean dirty pages on close"); 237 238int nfs_directio_enable = 0; 239SYSCTL_INT(_vfs_oldnfs, OID_AUTO, nfs_directio_enable, CTLFLAG_RW, 240 &nfs_directio_enable, 0, "Enable NFS directio"); 241 242/* 243 * This sysctl allows other processes to mmap a file that has been opened 244 * O_DIRECT by a process. In general, having processes mmap the file while 245 * Direct IO is in progress can lead to Data Inconsistencies. But, we allow 246 * this by default to prevent DoS attacks - to prevent a malicious user from 247 * opening up files O_DIRECT preventing other users from mmap'ing these 248 * files. "Protected" environments where stricter consistency guarantees are 249 * required can disable this knob. The process that opened the file O_DIRECT 250 * cannot mmap() the file, because mmap'ed IO on an O_DIRECT open() is not 251 * meaningful. 252 */ 253int nfs_directio_allow_mmap = 1; 254SYSCTL_INT(_vfs_oldnfs, OID_AUTO, nfs_directio_allow_mmap, CTLFLAG_RW, 255 &nfs_directio_allow_mmap, 0, "Enable mmaped IO on file with O_DIRECT opens"); 256 257#if 0 258SYSCTL_INT(_vfs_oldnfs, OID_AUTO, access_cache_hits, CTLFLAG_RD, 259 &nfsstats.accesscache_hits, 0, "NFS ACCESS cache hit count"); 260 261SYSCTL_INT(_vfs_oldnfs, OID_AUTO, access_cache_misses, CTLFLAG_RD, 262 &nfsstats.accesscache_misses, 0, "NFS ACCESS cache miss count"); 263#endif 264 265#define NFSV3ACCESS_ALL (NFSV3ACCESS_READ | NFSV3ACCESS_MODIFY \ 266 | NFSV3ACCESS_EXTEND | NFSV3ACCESS_EXECUTE \ 267 | NFSV3ACCESS_DELETE | NFSV3ACCESS_LOOKUP) 268 269/* 270 * SMP Locking Note : 271 * The list of locks after the description of the lock is the ordering 272 * of other locks acquired with the lock held. 273 * np->n_mtx : Protects the fields in the nfsnode. 274 VM Object Lock 275 VI_MTX (acquired indirectly) 276 * nmp->nm_mtx : Protects the fields in the nfsmount. 277 rep->r_mtx 278 * nfs_iod_mtx : Global lock, protects shared nfsiod state. 279 * nfs_reqq_mtx : Global lock, protects the nfs_reqq list. 280 nmp->nm_mtx 281 rep->r_mtx 282 * rep->r_mtx : Protects the fields in an nfsreq. 283 */ 284 285static int 286nfs3_access_otw(struct vnode *vp, int wmode, struct thread *td, 287 struct ucred *cred, uint32_t *retmode) 288{ 289 const int v3 = 1; 290 u_int32_t *tl; 291 int error = 0, attrflag, i, lrupos; 292 293 struct mbuf *mreq, *mrep, *md, *mb; 294 caddr_t bpos, dpos; 295 u_int32_t rmode; 296 struct nfsnode *np = VTONFS(vp); 297 298 nfsstats.rpccnt[NFSPROC_ACCESS]++; 299 mreq = nfsm_reqhead(vp, NFSPROC_ACCESS, NFSX_FH(v3) + NFSX_UNSIGNED); 300 mb = mreq; 301 bpos = mtod(mb, caddr_t); 302 nfsm_fhtom(vp, v3); 303 tl = nfsm_build(u_int32_t *, NFSX_UNSIGNED); 304 *tl = txdr_unsigned(wmode); 305 nfsm_request(vp, NFSPROC_ACCESS, td, cred); 306 nfsm_postop_attr(vp, attrflag); 307 if (!error) { 308 lrupos = 0; 309 tl = nfsm_dissect(u_int32_t *, NFSX_UNSIGNED); 310 rmode = fxdr_unsigned(u_int32_t, *tl); 311 mtx_lock(&np->n_mtx); 312 for (i = 0; i < NFS_ACCESSCACHESIZE; i++) { 313 if (np->n_accesscache[i].uid == cred->cr_uid) { 314 np->n_accesscache[i].mode = rmode; 315 np->n_accesscache[i].stamp = time_second; 316 break; 317 } 318 if (i > 0 && np->n_accesscache[i].stamp < 319 np->n_accesscache[lrupos].stamp) 320 lrupos = i; 321 } 322 if (i == NFS_ACCESSCACHESIZE) { 323 np->n_accesscache[lrupos].uid = cred->cr_uid; 324 np->n_accesscache[lrupos].mode = rmode; 325 np->n_accesscache[lrupos].stamp = time_second; 326 } 327 mtx_unlock(&np->n_mtx); 328 if (retmode != NULL) 329 *retmode = rmode; 330 KDTRACE_NFS_ACCESSCACHE_LOAD_DONE(vp, cred->cr_uid, rmode, 0); 331 } 332 m_freem(mrep); 333nfsmout: 334#ifdef KDTRACE_HOOKS 335 if (error) { 336 KDTRACE_NFS_ACCESSCACHE_LOAD_DONE(vp, cred->cr_uid, 0, 337 error); 338 } 339#endif 340 return (error); 341} 342 343/* 344 * nfs access vnode op. 345 * For nfs version 2, just return ok. File accesses may fail later. 346 * For nfs version 3, use the access rpc to check accessibility. If file modes 347 * are changed on the server, accesses might still fail later. 348 */ 349static int 350nfs_access(struct vop_access_args *ap) 351{ 352 struct vnode *vp = ap->a_vp; 353 int error = 0, i, gotahit; 354 u_int32_t mode, rmode, wmode; 355 int v3 = NFS_ISV3(vp); 356 struct nfsnode *np = VTONFS(vp); 357 358 /* 359 * Disallow write attempts on filesystems mounted read-only; 360 * unless the file is a socket, fifo, or a block or character 361 * device resident on the filesystem. 362 */ 363 if ((ap->a_accmode & VWRITE) && (vp->v_mount->mnt_flag & MNT_RDONLY)) { 364 switch (vp->v_type) { 365 case VREG: 366 case VDIR: 367 case VLNK: 368 return (EROFS); 369 default: 370 break; 371 } 372 } 373 /* 374 * For nfs v3, check to see if we have done this recently, and if 375 * so return our cached result instead of making an ACCESS call. 376 * If not, do an access rpc, otherwise you are stuck emulating 377 * ufs_access() locally using the vattr. This may not be correct, 378 * since the server may apply other access criteria such as 379 * client uid-->server uid mapping that we do not know about. 380 */ 381 if (v3) { 382 if (ap->a_accmode & VREAD) 383 mode = NFSV3ACCESS_READ; 384 else 385 mode = 0; 386 if (vp->v_type != VDIR) { 387 if (ap->a_accmode & VWRITE) 388 mode |= (NFSV3ACCESS_MODIFY | NFSV3ACCESS_EXTEND); 389 if (ap->a_accmode & VEXEC) 390 mode |= NFSV3ACCESS_EXECUTE; 391 } else { 392 if (ap->a_accmode & VWRITE) 393 mode |= (NFSV3ACCESS_MODIFY | NFSV3ACCESS_EXTEND | 394 NFSV3ACCESS_DELETE); 395 if (ap->a_accmode & VEXEC) 396 mode |= NFSV3ACCESS_LOOKUP; 397 } 398 /* XXX safety belt, only make blanket request if caching */ 399 if (nfsaccess_cache_timeout > 0) { 400 wmode = NFSV3ACCESS_READ | NFSV3ACCESS_MODIFY | 401 NFSV3ACCESS_EXTEND | NFSV3ACCESS_EXECUTE | 402 NFSV3ACCESS_DELETE | NFSV3ACCESS_LOOKUP; 403 } else { 404 wmode = mode; 405 } 406 407 /* 408 * Does our cached result allow us to give a definite yes to 409 * this request? 410 */ 411 gotahit = 0; 412 mtx_lock(&np->n_mtx); 413 for (i = 0; i < NFS_ACCESSCACHESIZE; i++) { 414 if (ap->a_cred->cr_uid == np->n_accesscache[i].uid) { 415 if (time_second < (np->n_accesscache[i].stamp + 416 nfsaccess_cache_timeout) && 417 (np->n_accesscache[i].mode & mode) == mode) { 418 nfsstats.accesscache_hits++; 419 gotahit = 1; 420 } 421 break; 422 } 423 } 424 mtx_unlock(&np->n_mtx); 425#ifdef KDTRACE_HOOKS 426 if (gotahit) 427 KDTRACE_NFS_ACCESSCACHE_GET_HIT(vp, 428 ap->a_cred->cr_uid, mode); 429 else 430 KDTRACE_NFS_ACCESSCACHE_GET_MISS(vp, 431 ap->a_cred->cr_uid, mode); 432#endif 433 if (gotahit == 0) { 434 /* 435 * Either a no, or a don't know. Go to the wire. 436 */ 437 nfsstats.accesscache_misses++; 438 error = nfs3_access_otw(vp, wmode, ap->a_td, ap->a_cred, 439 &rmode); 440 if (!error) { 441 if ((rmode & mode) != mode) 442 error = EACCES; 443 } 444 } 445 return (error); 446 } else { 447 if ((error = nfsspec_access(ap)) != 0) { 448 return (error); 449 } 450 /* 451 * Attempt to prevent a mapped root from accessing a file 452 * which it shouldn't. We try to read a byte from the file 453 * if the user is root and the file is not zero length. 454 * After calling nfsspec_access, we should have the correct 455 * file size cached. 456 */ 457 mtx_lock(&np->n_mtx); 458 if (ap->a_cred->cr_uid == 0 && (ap->a_accmode & VREAD) 459 && VTONFS(vp)->n_size > 0) { 460 struct iovec aiov; 461 struct uio auio; 462 char buf[1]; 463 464 mtx_unlock(&np->n_mtx); 465 aiov.iov_base = buf; 466 aiov.iov_len = 1; 467 auio.uio_iov = &aiov; 468 auio.uio_iovcnt = 1; 469 auio.uio_offset = 0; 470 auio.uio_resid = 1; 471 auio.uio_segflg = UIO_SYSSPACE; 472 auio.uio_rw = UIO_READ; 473 auio.uio_td = ap->a_td; 474 475 if (vp->v_type == VREG) 476 error = nfs_readrpc(vp, &auio, ap->a_cred); 477 else if (vp->v_type == VDIR) { 478 char* bp; 479 bp = malloc(NFS_DIRBLKSIZ, M_TEMP, M_WAITOK); 480 aiov.iov_base = bp; 481 aiov.iov_len = auio.uio_resid = NFS_DIRBLKSIZ; 482 error = nfs_readdirrpc(vp, &auio, ap->a_cred); 483 free(bp, M_TEMP); 484 } else if (vp->v_type == VLNK) 485 error = nfs_readlinkrpc(vp, &auio, ap->a_cred); 486 else 487 error = EACCES; 488 } else 489 mtx_unlock(&np->n_mtx); 490 return (error); 491 } 492} 493 494int nfs_otw_getattr_avoid = 0; 495 496/* 497 * nfs open vnode op 498 * Check to see if the type is ok 499 * and that deletion is not in progress. 500 * For paged in text files, you will need to flush the page cache 501 * if consistency is lost. 502 */ 503/* ARGSUSED */ 504static int 505nfs_open(struct vop_open_args *ap) 506{ 507 struct vnode *vp = ap->a_vp; 508 struct nfsnode *np = VTONFS(vp); 509 struct vattr vattr; 510 int error; 511 int fmode = ap->a_mode; 512 513 if (vp->v_type != VREG && vp->v_type != VDIR && vp->v_type != VLNK) 514 return (EOPNOTSUPP); 515 516 /* 517 * Get a valid lease. If cached data is stale, flush it. 518 */ 519 mtx_lock(&np->n_mtx); 520 if (np->n_flag & NMODIFIED) { 521 mtx_unlock(&np->n_mtx); 522 error = nfs_vinvalbuf(vp, V_SAVE, ap->a_td, 1); 523 if (error == EINTR || error == EIO) 524 return (error); 525 mtx_lock(&np->n_mtx); 526 np->n_attrstamp = 0; 527 KDTRACE_NFS_ATTRCACHE_FLUSH_DONE(vp); 528 if (vp->v_type == VDIR) 529 np->n_direofoffset = 0; 530 mtx_unlock(&np->n_mtx); 531 error = VOP_GETATTR(vp, &vattr, ap->a_cred); 532 if (error) 533 return (error); 534 mtx_lock(&np->n_mtx); 535 np->n_mtime = vattr.va_mtime; 536 } else { 537 mtx_unlock(&np->n_mtx); 538 error = VOP_GETATTR(vp, &vattr, ap->a_cred); 539 if (error) 540 return (error); 541 mtx_lock(&np->n_mtx); 542 if (NFS_TIMESPEC_COMPARE(&np->n_mtime, &vattr.va_mtime)) { 543 if (vp->v_type == VDIR) 544 np->n_direofoffset = 0; 545 mtx_unlock(&np->n_mtx); 546 error = nfs_vinvalbuf(vp, V_SAVE, ap->a_td, 1); 547 if (error == EINTR || error == EIO) { 548 return (error); 549 } 550 mtx_lock(&np->n_mtx); 551 np->n_mtime = vattr.va_mtime; 552 } 553 } 554 /* 555 * If the object has >= 1 O_DIRECT active opens, we disable caching. 556 */ 557 if (nfs_directio_enable && (fmode & O_DIRECT) && (vp->v_type == VREG)) { 558 if (np->n_directio_opens == 0) { 559 mtx_unlock(&np->n_mtx); 560 error = nfs_vinvalbuf(vp, V_SAVE, ap->a_td, 1); 561 if (error) 562 return (error); 563 mtx_lock(&np->n_mtx); 564 np->n_flag |= NNONCACHE; 565 } 566 np->n_directio_opens++; 567 } 568 mtx_unlock(&np->n_mtx); 569 vnode_create_vobject(vp, vattr.va_size, ap->a_td); 570 return (0); 571} 572 573/* 574 * nfs close vnode op 575 * What an NFS client should do upon close after writing is a debatable issue. 576 * Most NFS clients push delayed writes to the server upon close, basically for 577 * two reasons: 578 * 1 - So that any write errors may be reported back to the client process 579 * doing the close system call. By far the two most likely errors are 580 * NFSERR_NOSPC and NFSERR_DQUOT to indicate space allocation failure. 581 * 2 - To put a worst case upper bound on cache inconsistency between 582 * multiple clients for the file. 583 * There is also a consistency problem for Version 2 of the protocol w.r.t. 584 * not being able to tell if other clients are writing a file concurrently, 585 * since there is no way of knowing if the changed modify time in the reply 586 * is only due to the write for this client. 587 * (NFS Version 3 provides weak cache consistency data in the reply that 588 * should be sufficient to detect and handle this case.) 589 * 590 * The current code does the following: 591 * for NFS Version 2 - play it safe and flush/invalidate all dirty buffers 592 * for NFS Version 3 - flush dirty buffers to the server but don't invalidate 593 * or commit them (this satisfies 1 and 2 except for the 594 * case where the server crashes after this close but 595 * before the commit RPC, which is felt to be "good 596 * enough". Changing the last argument to nfs_flush() to 597 * a 1 would force a commit operation, if it is felt a 598 * commit is necessary now. 599 */ 600/* ARGSUSED */ 601static int 602nfs_close(struct vop_close_args *ap) 603{ 604 struct vnode *vp = ap->a_vp; 605 struct nfsnode *np = VTONFS(vp); 606 int error = 0; 607 int fmode = ap->a_fflag; 608 609 if (vp->v_type == VREG) { 610 /* 611 * Examine and clean dirty pages, regardless of NMODIFIED. 612 * This closes a major hole in close-to-open consistency. 613 * We want to push out all dirty pages (and buffers) on 614 * close, regardless of whether they were dirtied by 615 * mmap'ed writes or via write(). 616 */ 617 if (nfs_clean_pages_on_close && vp->v_object) { 618 VM_OBJECT_LOCK(vp->v_object); 619 vm_object_page_clean(vp->v_object, 0, 0, 0); 620 VM_OBJECT_UNLOCK(vp->v_object); 621 } 622 mtx_lock(&np->n_mtx); 623 if (np->n_flag & NMODIFIED) { 624 mtx_unlock(&np->n_mtx); 625 if (NFS_ISV3(vp)) { 626 /* 627 * Under NFSv3 we have dirty buffers to dispose of. We 628 * must flush them to the NFS server. We have the option 629 * of waiting all the way through the commit rpc or just 630 * waiting for the initial write. The default is to only 631 * wait through the initial write so the data is in the 632 * server's cache, which is roughly similar to the state 633 * a standard disk subsystem leaves the file in on close(). 634 * 635 * We cannot clear the NMODIFIED bit in np->n_flag due to 636 * potential races with other processes, and certainly 637 * cannot clear it if we don't commit. 638 */ 639 int cm = nfsv3_commit_on_close ? 1 : 0; 640 error = nfs_flush(vp, MNT_WAIT, cm); 641 /* np->n_flag &= ~NMODIFIED; */ 642 } else 643 error = nfs_vinvalbuf(vp, V_SAVE, ap->a_td, 1); 644 mtx_lock(&np->n_mtx); 645 } 646 if (np->n_flag & NWRITEERR) { 647 np->n_flag &= ~NWRITEERR; 648 error = np->n_error; 649 } 650 mtx_unlock(&np->n_mtx); 651 } 652 if (nfs_directio_enable) 653 KASSERT((np->n_directio_asyncwr == 0), 654 ("nfs_close: dirty unflushed (%d) directio buffers\n", 655 np->n_directio_asyncwr)); 656 if (nfs_directio_enable && (fmode & O_DIRECT) && (vp->v_type == VREG)) { 657 mtx_lock(&np->n_mtx); 658 KASSERT((np->n_directio_opens > 0), 659 ("nfs_close: unexpectedly value (0) of n_directio_opens\n")); 660 np->n_directio_opens--; 661 if (np->n_directio_opens == 0) 662 np->n_flag &= ~NNONCACHE; 663 mtx_unlock(&np->n_mtx); 664 } 665 return (error); 666} 667 668/* 669 * nfs getattr call from vfs. 670 */ 671static int 672nfs_getattr(struct vop_getattr_args *ap) 673{ 674 struct vnode *vp = ap->a_vp; 675 struct nfsnode *np = VTONFS(vp); 676 struct thread *td = curthread; 677 struct vattr *vap = ap->a_vap; 678 struct vattr vattr; 679 caddr_t bpos, dpos; 680 int error = 0; 681 struct mbuf *mreq, *mrep, *md, *mb; 682 int v3 = NFS_ISV3(vp); 683 684 /* 685 * Update local times for special files. 686 */ 687 mtx_lock(&np->n_mtx); 688 if (np->n_flag & (NACC | NUPD)) 689 np->n_flag |= NCHG; 690 mtx_unlock(&np->n_mtx); 691 /* 692 * First look in the cache. 693 */ 694 if (nfs_getattrcache(vp, &vattr) == 0) 695 goto nfsmout; 696 if (v3 && nfs_prime_access_cache && nfsaccess_cache_timeout > 0) { 697 nfsstats.accesscache_misses++; 698 nfs3_access_otw(vp, NFSV3ACCESS_ALL, td, ap->a_cred, NULL); 699 if (nfs_getattrcache(vp, &vattr) == 0) 700 goto nfsmout; 701 } 702 nfsstats.rpccnt[NFSPROC_GETATTR]++; 703 mreq = nfsm_reqhead(vp, NFSPROC_GETATTR, NFSX_FH(v3)); 704 mb = mreq; 705 bpos = mtod(mb, caddr_t); 706 nfsm_fhtom(vp, v3); 707 nfsm_request(vp, NFSPROC_GETATTR, td, ap->a_cred); 708 if (!error) { 709 nfsm_loadattr(vp, &vattr); 710 } 711 m_freem(mrep); 712nfsmout: 713 vap->va_type = vattr.va_type; 714 vap->va_mode = vattr.va_mode; 715 vap->va_nlink = vattr.va_nlink; 716 vap->va_uid = vattr.va_uid; 717 vap->va_gid = vattr.va_gid; 718 vap->va_fsid = vattr.va_fsid; 719 vap->va_fileid = vattr.va_fileid; 720 vap->va_size = vattr.va_size; 721 vap->va_blocksize = vattr.va_blocksize; 722 vap->va_atime = vattr.va_atime; 723 vap->va_mtime = vattr.va_mtime; 724 vap->va_ctime = vattr.va_ctime; 725 vap->va_gen = vattr.va_gen; 726 vap->va_flags = vattr.va_flags; 727 vap->va_rdev = vattr.va_rdev; 728 vap->va_bytes = vattr.va_bytes; 729 vap->va_filerev = vattr.va_filerev; 730 731 return (error); 732} 733 734/* 735 * nfs setattr call. 736 */ 737static int 738nfs_setattr(struct vop_setattr_args *ap) 739{ 740 struct vnode *vp = ap->a_vp; 741 struct nfsnode *np = VTONFS(vp); 742 struct vattr *vap = ap->a_vap; 743 struct thread *td = curthread; 744 int error = 0; 745 u_quad_t tsize; 746 747#ifndef nolint 748 tsize = (u_quad_t)0; 749#endif 750 751 /* 752 * Setting of flags is not supported. 753 */ 754 if (vap->va_flags != VNOVAL) 755 return (EOPNOTSUPP); 756 757 /* 758 * Disallow write attempts if the filesystem is mounted read-only. 759 */ 760 if ((vap->va_flags != VNOVAL || vap->va_uid != (uid_t)VNOVAL || 761 vap->va_gid != (gid_t)VNOVAL || vap->va_atime.tv_sec != VNOVAL || 762 vap->va_mtime.tv_sec != VNOVAL || vap->va_mode != (mode_t)VNOVAL) && 763 (vp->v_mount->mnt_flag & MNT_RDONLY)) { 764 error = EROFS; 765 goto out; 766 } 767 if (vap->va_size != VNOVAL) { 768 switch (vp->v_type) { 769 case VDIR: 770 return (EISDIR); 771 case VCHR: 772 case VBLK: 773 case VSOCK: 774 case VFIFO: 775 if (vap->va_mtime.tv_sec == VNOVAL && 776 vap->va_atime.tv_sec == VNOVAL && 777 vap->va_mode == (mode_t)VNOVAL && 778 vap->va_uid == (uid_t)VNOVAL && 779 vap->va_gid == (gid_t)VNOVAL) 780 return (0); 781 vap->va_size = VNOVAL; 782 break; 783 default: 784 /* 785 * Disallow write attempts if the filesystem is 786 * mounted read-only. 787 */ 788 if (vp->v_mount->mnt_flag & MNT_RDONLY) 789 return (EROFS); 790 /* 791 * We run vnode_pager_setsize() early (why?), 792 * we must set np->n_size now to avoid vinvalbuf 793 * V_SAVE races that might setsize a lower 794 * value. 795 */ 796 mtx_lock(&np->n_mtx); 797 tsize = np->n_size; 798 mtx_unlock(&np->n_mtx); 799 error = nfs_meta_setsize(vp, ap->a_cred, td, 800 vap->va_size); 801 mtx_lock(&np->n_mtx); 802 if (np->n_flag & NMODIFIED) { 803 tsize = np->n_size; 804 mtx_unlock(&np->n_mtx); 805 if (vap->va_size == 0) 806 error = nfs_vinvalbuf(vp, 0, td, 1); 807 else 808 error = nfs_vinvalbuf(vp, V_SAVE, td, 1); 809 if (error) { 810 vnode_pager_setsize(vp, tsize); 811 goto out; 812 } 813 } else 814 mtx_unlock(&np->n_mtx); 815 /* 816 * np->n_size has already been set to vap->va_size 817 * in nfs_meta_setsize(). We must set it again since 818 * nfs_loadattrcache() could be called through 819 * nfs_meta_setsize() and could modify np->n_size. 820 */ 821 mtx_lock(&np->n_mtx); 822 np->n_vattr.va_size = np->n_size = vap->va_size; 823 mtx_unlock(&np->n_mtx); 824 }; 825 } else { 826 mtx_lock(&np->n_mtx); 827 if ((vap->va_mtime.tv_sec != VNOVAL || vap->va_atime.tv_sec != VNOVAL) && 828 (np->n_flag & NMODIFIED) && vp->v_type == VREG) { 829 mtx_unlock(&np->n_mtx); 830 if ((error = nfs_vinvalbuf(vp, V_SAVE, td, 1)) != 0 && 831 (error == EINTR || error == EIO)) 832 return error; 833 } else 834 mtx_unlock(&np->n_mtx); 835 } 836 error = nfs_setattrrpc(vp, vap, ap->a_cred); 837 if (error && vap->va_size != VNOVAL) { 838 mtx_lock(&np->n_mtx); 839 np->n_size = np->n_vattr.va_size = tsize; 840 vnode_pager_setsize(vp, tsize); 841 mtx_unlock(&np->n_mtx); 842 } 843out: 844 return (error); 845} 846 847/* 848 * Do an nfs setattr rpc. 849 */ 850static int 851nfs_setattrrpc(struct vnode *vp, struct vattr *vap, struct ucred *cred) 852{ 853 struct nfsv2_sattr *sp; 854 struct nfsnode *np = VTONFS(vp); 855 caddr_t bpos, dpos; 856 u_int32_t *tl; 857 int error = 0, i, wccflag = NFSV3_WCCRATTR; 858 struct mbuf *mreq, *mrep, *md, *mb; 859 int v3 = NFS_ISV3(vp); 860 861 nfsstats.rpccnt[NFSPROC_SETATTR]++; 862 mreq = nfsm_reqhead(vp, NFSPROC_SETATTR, NFSX_FH(v3) + NFSX_SATTR(v3)); 863 mb = mreq; 864 bpos = mtod(mb, caddr_t); 865 nfsm_fhtom(vp, v3); 866 if (v3) { 867 nfsm_v3attrbuild(vap, TRUE); 868 tl = nfsm_build(u_int32_t *, NFSX_UNSIGNED); 869 *tl = nfs_false; 870 } else { 871 sp = nfsm_build(struct nfsv2_sattr *, NFSX_V2SATTR); 872 if (vap->va_mode == (mode_t)VNOVAL) 873 sp->sa_mode = nfs_xdrneg1; 874 else 875 sp->sa_mode = vtonfsv2_mode(vp->v_type, vap->va_mode); 876 if (vap->va_uid == (uid_t)VNOVAL) 877 sp->sa_uid = nfs_xdrneg1; 878 else 879 sp->sa_uid = txdr_unsigned(vap->va_uid); 880 if (vap->va_gid == (gid_t)VNOVAL) 881 sp->sa_gid = nfs_xdrneg1; 882 else 883 sp->sa_gid = txdr_unsigned(vap->va_gid); 884 sp->sa_size = txdr_unsigned(vap->va_size); 885 txdr_nfsv2time(&vap->va_atime, &sp->sa_atime); 886 txdr_nfsv2time(&vap->va_mtime, &sp->sa_mtime); 887 } 888 nfsm_request(vp, NFSPROC_SETATTR, curthread, cred); 889 if (v3) { 890 mtx_lock(&np->n_mtx); 891 for (i = 0; i < NFS_ACCESSCACHESIZE; i++) 892 np->n_accesscache[i].stamp = 0; 893 mtx_unlock(&np->n_mtx); 894 KDTRACE_NFS_ACCESSCACHE_FLUSH_DONE(vp); 895 nfsm_wcc_data(vp, wccflag); 896 } else 897 nfsm_loadattr(vp, NULL); 898 m_freem(mrep); 899nfsmout: 900 return (error); 901} 902 903/* 904 * nfs lookup call, one step at a time... 905 * First look in cache 906 * If not found, unlock the directory nfsnode and do the rpc 907 */ 908static int 909nfs_lookup(struct vop_lookup_args *ap) 910{ 911 struct componentname *cnp = ap->a_cnp; 912 struct vnode *dvp = ap->a_dvp; 913 struct vnode **vpp = ap->a_vpp; 914 struct mount *mp = dvp->v_mount; 915 struct vattr vattr; 916 struct timespec dmtime; 917 int flags = cnp->cn_flags; 918 struct vnode *newvp; 919 struct nfsmount *nmp; 920 caddr_t bpos, dpos; 921 struct mbuf *mreq, *mrep, *md, *mb; 922 long len; 923 nfsfh_t *fhp; 924 struct nfsnode *np, *newnp; 925 int error = 0, attrflag, fhsize, ltype; 926 int v3 = NFS_ISV3(dvp); 927 struct thread *td = cnp->cn_thread; 928 929 *vpp = NULLVP; 930 if ((flags & ISLASTCN) && (mp->mnt_flag & MNT_RDONLY) && 931 (cnp->cn_nameiop == DELETE || cnp->cn_nameiop == RENAME)) 932 return (EROFS); 933 if (dvp->v_type != VDIR) 934 return (ENOTDIR); 935 nmp = VFSTONFS(mp); 936 np = VTONFS(dvp); 937 if ((error = VOP_ACCESS(dvp, VEXEC, cnp->cn_cred, td)) != 0) { 938 *vpp = NULLVP; 939 return (error); 940 } 941 error = cache_lookup(dvp, vpp, cnp); 942 if (error > 0 && error != ENOENT) 943 return (error); 944 if (error == -1) { 945 /* 946 * We only accept a positive hit in the cache if the 947 * change time of the file matches our cached copy. 948 * Otherwise, we discard the cache entry and fallback 949 * to doing a lookup RPC. 950 * 951 * To better handle stale file handles and attributes, 952 * clear the attribute cache of this node if it is a 953 * leaf component, part of an open() call, and not 954 * locally modified before fetching the attributes. 955 * This should allow stale file handles to be detected 956 * here where we can fall back to a LOOKUP RPC to 957 * recover rather than having nfs_open() detect the 958 * stale file handle and failing open(2) with ESTALE. 959 */ 960 newvp = *vpp; 961 newnp = VTONFS(newvp); 962 if (!(nmp->nm_flag & NFSMNT_NOCTO) && 963 (flags & (ISLASTCN | ISOPEN)) == (ISLASTCN | ISOPEN) && 964 !(newnp->n_flag & NMODIFIED)) { 965 mtx_lock(&newnp->n_mtx); 966 newnp->n_attrstamp = 0; 967 KDTRACE_NFS_ATTRCACHE_FLUSH_DONE(newvp); 968 mtx_unlock(&newnp->n_mtx); 969 } 970 if (VOP_GETATTR(newvp, &vattr, cnp->cn_cred) == 0 && 971 timespeccmp(&vattr.va_ctime, &newnp->n_ctime, ==)) { 972 nfsstats.lookupcache_hits++; 973 if (cnp->cn_nameiop != LOOKUP && 974 (flags & ISLASTCN)) 975 cnp->cn_flags |= SAVENAME; 976 return (0); 977 } 978 cache_purge(newvp); 979 if (dvp != newvp) 980 vput(newvp); 981 else 982 vrele(newvp); 983 *vpp = NULLVP; 984 } else if (error == ENOENT) { 985 if (dvp->v_iflag & VI_DOOMED) 986 return (ENOENT); 987 /* 988 * We only accept a negative hit in the cache if the 989 * modification time of the parent directory matches 990 * our cached copy. Otherwise, we discard all of the 991 * negative cache entries for this directory. We also 992 * only trust -ve cache entries for less than 993 * nm_negative_namecache_timeout seconds. 994 */ 995 if ((u_int)(ticks - np->n_dmtime_ticks) < 996 (nmp->nm_negnametimeo * hz) && 997 VOP_GETATTR(dvp, &vattr, cnp->cn_cred) == 0 && 998 timespeccmp(&vattr.va_mtime, &np->n_dmtime, ==)) { 999 nfsstats.lookupcache_hits++; 1000 return (ENOENT); 1001 } 1002 cache_purge_negative(dvp); 1003 mtx_lock(&np->n_mtx); 1004 timespecclear(&np->n_dmtime); 1005 mtx_unlock(&np->n_mtx); 1006 } 1007 1008 /* 1009 * Cache the modification time of the parent directory in case 1010 * the lookup fails and results in adding the first negative 1011 * name cache entry for the directory. Since this is reading 1012 * a single time_t, don't bother with locking. The 1013 * modification time may be a bit stale, but it must be read 1014 * before performing the lookup RPC to prevent a race where 1015 * another lookup updates the timestamp on the directory after 1016 * the lookup RPC has been performed on the server but before 1017 * n_dmtime is set at the end of this function. 1018 */ 1019 dmtime = np->n_vattr.va_mtime; 1020 error = 0; 1021 newvp = NULLVP; 1022 nfsstats.lookupcache_misses++; 1023 nfsstats.rpccnt[NFSPROC_LOOKUP]++; 1024 len = cnp->cn_namelen; 1025 mreq = nfsm_reqhead(dvp, NFSPROC_LOOKUP, 1026 NFSX_FH(v3) + NFSX_UNSIGNED + nfsm_rndup(len)); 1027 mb = mreq; 1028 bpos = mtod(mb, caddr_t); 1029 nfsm_fhtom(dvp, v3); 1030 nfsm_strtom(cnp->cn_nameptr, len, NFS_MAXNAMLEN); 1031 nfsm_request(dvp, NFSPROC_LOOKUP, cnp->cn_thread, cnp->cn_cred); 1032 if (error) { 1033 if (v3) { 1034 nfsm_postop_attr(dvp, attrflag); 1035 m_freem(mrep); 1036 } 1037 goto nfsmout; 1038 } 1039 nfsm_getfh(fhp, fhsize, v3); 1040 1041 /* 1042 * Handle RENAME case... 1043 */ 1044 if (cnp->cn_nameiop == RENAME && (flags & ISLASTCN)) { 1045 if (NFS_CMPFH(np, fhp, fhsize)) { 1046 m_freem(mrep); 1047 return (EISDIR); 1048 } 1049 error = nfs_nget(mp, fhp, fhsize, &np, LK_EXCLUSIVE); 1050 if (error) { 1051 m_freem(mrep); 1052 return (error); 1053 } 1054 newvp = NFSTOV(np); 1055 if (v3) { 1056 nfsm_postop_attr(newvp, attrflag); 1057 nfsm_postop_attr(dvp, attrflag); 1058 } else 1059 nfsm_loadattr(newvp, NULL); 1060 *vpp = newvp; 1061 m_freem(mrep); 1062 cnp->cn_flags |= SAVENAME; 1063 return (0); 1064 } 1065 1066 if (flags & ISDOTDOT) { 1067 ltype = VOP_ISLOCKED(dvp); 1068 error = vfs_busy(mp, MBF_NOWAIT); 1069 if (error != 0) { 1070 vfs_ref(mp); 1071 VOP_UNLOCK(dvp, 0); 1072 error = vfs_busy(mp, 0); 1073 vn_lock(dvp, ltype | LK_RETRY); 1074 vfs_rel(mp); 1075 if (error == 0 && (dvp->v_iflag & VI_DOOMED)) { 1076 vfs_unbusy(mp); 1077 error = ENOENT; 1078 } 1079 if (error != 0) { 1080 m_freem(mrep); 1081 return (error); 1082 } 1083 } 1084 VOP_UNLOCK(dvp, 0); 1085 error = nfs_nget(mp, fhp, fhsize, &np, cnp->cn_lkflags); 1086 if (error == 0) 1087 newvp = NFSTOV(np); 1088 vfs_unbusy(mp); 1089 if (newvp != dvp) 1090 vn_lock(dvp, ltype | LK_RETRY); 1091 if (dvp->v_iflag & VI_DOOMED) { 1092 if (error == 0) { 1093 if (newvp == dvp) 1094 vrele(newvp); 1095 else 1096 vput(newvp); 1097 } 1098 error = ENOENT; 1099 } 1100 if (error) { 1101 m_freem(mrep); 1102 return (error); 1103 } 1104 } else if (NFS_CMPFH(np, fhp, fhsize)) { 1105 VREF(dvp); 1106 newvp = dvp; 1107 } else { 1108 error = nfs_nget(mp, fhp, fhsize, &np, cnp->cn_lkflags); 1109 if (error) { 1110 m_freem(mrep); 1111 return (error); 1112 } 1113 newvp = NFSTOV(np); 1114 1115 /* 1116 * Flush the attribute cache when opening a leaf node 1117 * to ensure that fresh attributes are fetched in 1118 * nfs_open() if we are unable to fetch attributes 1119 * from the LOOKUP reply. 1120 */ 1121 if ((flags & (ISLASTCN | ISOPEN)) == (ISLASTCN | ISOPEN) && 1122 !(np->n_flag & NMODIFIED)) { 1123 mtx_lock(&np->n_mtx); 1124 np->n_attrstamp = 0; 1125 KDTRACE_NFS_ATTRCACHE_FLUSH_DONE(newvp); 1126 mtx_unlock(&np->n_mtx); 1127 } 1128 } 1129 if (v3) { 1130 nfsm_postop_attr(newvp, attrflag); 1131 nfsm_postop_attr(dvp, attrflag); 1132 } else 1133 nfsm_loadattr(newvp, NULL); 1134 if (cnp->cn_nameiop != LOOKUP && (flags & ISLASTCN)) 1135 cnp->cn_flags |= SAVENAME; 1136 if ((cnp->cn_flags & MAKEENTRY) && 1137 (cnp->cn_nameiop != DELETE || !(flags & ISLASTCN))) { 1138 np->n_ctime = np->n_vattr.va_ctime; 1139 cache_enter(dvp, newvp, cnp); 1140 } 1141 *vpp = newvp; 1142 m_freem(mrep); 1143nfsmout: 1144 if (error) { 1145 if (newvp != NULLVP) { 1146 vput(newvp); 1147 *vpp = NULLVP; 1148 } 1149 1150 if (error != ENOENT) 1151 goto done; 1152 1153 /* The requested file was not found. */ 1154 if ((cnp->cn_nameiop == CREATE || cnp->cn_nameiop == RENAME) && 1155 (flags & ISLASTCN)) { 1156 /* 1157 * XXX: UFS does a full VOP_ACCESS(dvp, 1158 * VWRITE) here instead of just checking 1159 * MNT_RDONLY. 1160 */ 1161 if (mp->mnt_flag & MNT_RDONLY) 1162 return (EROFS); 1163 cnp->cn_flags |= SAVENAME; 1164 return (EJUSTRETURN); 1165 } 1166 1167 if ((cnp->cn_flags & MAKEENTRY) && cnp->cn_nameiop != CREATE) { 1168 /* 1169 * Maintain n_dmtime as the modification time 1170 * of the parent directory when the oldest -ve 1171 * name cache entry for this directory was 1172 * added. If a -ve cache entry has already 1173 * been added with a newer modification time 1174 * by a concurrent lookup, then don't bother 1175 * adding a cache entry. The modification 1176 * time of the directory might have changed 1177 * due to the file this lookup failed to find 1178 * being created. In that case a subsequent 1179 * lookup would incorrectly use the entry 1180 * added here instead of doing an extra 1181 * lookup. 1182 */ 1183 mtx_lock(&np->n_mtx); 1184 if (timespeccmp(&np->n_dmtime, &dmtime, <=)) { 1185 if (!timespecisset(&np->n_dmtime)) { 1186 np->n_dmtime = dmtime; 1187 np->n_dmtime_ticks = ticks; 1188 } 1189 mtx_unlock(&np->n_mtx); 1190 cache_enter(dvp, NULL, cnp); 1191 } else 1192 mtx_unlock(&np->n_mtx); 1193 } 1194 return (ENOENT); 1195 } 1196done: 1197 return (error); 1198} 1199 1200/* 1201 * nfs read call. 1202 * Just call nfs_bioread() to do the work. 1203 */ 1204static int 1205nfs_read(struct vop_read_args *ap) 1206{ 1207 struct vnode *vp = ap->a_vp; 1208 1209 switch (vp->v_type) { 1210 case VREG: 1211 return (nfs_bioread(vp, ap->a_uio, ap->a_ioflag, ap->a_cred)); 1212 case VDIR: 1213 return (EISDIR); 1214 default: 1215 return (EOPNOTSUPP); 1216 } 1217} 1218 1219/* 1220 * nfs readlink call 1221 */ 1222static int 1223nfs_readlink(struct vop_readlink_args *ap) 1224{ 1225 struct vnode *vp = ap->a_vp; 1226 1227 if (vp->v_type != VLNK) 1228 return (EINVAL); 1229 return (nfs_bioread(vp, ap->a_uio, 0, ap->a_cred)); 1230} 1231 1232/* 1233 * Do a readlink rpc. 1234 * Called by nfs_doio() from below the buffer cache. 1235 */ 1236int 1237nfs_readlinkrpc(struct vnode *vp, struct uio *uiop, struct ucred *cred) 1238{ 1239 caddr_t bpos, dpos; 1240 int error = 0, len, attrflag; 1241 struct mbuf *mreq, *mrep, *md, *mb; 1242 int v3 = NFS_ISV3(vp); 1243 1244 nfsstats.rpccnt[NFSPROC_READLINK]++; 1245 mreq = nfsm_reqhead(vp, NFSPROC_READLINK, NFSX_FH(v3)); 1246 mb = mreq; 1247 bpos = mtod(mb, caddr_t); 1248 nfsm_fhtom(vp, v3); 1249 nfsm_request(vp, NFSPROC_READLINK, uiop->uio_td, cred); 1250 if (v3) 1251 nfsm_postop_attr(vp, attrflag); 1252 if (!error) { 1253 nfsm_strsiz(len, NFS_MAXPATHLEN); 1254 if (len == NFS_MAXPATHLEN) { 1255 struct nfsnode *np = VTONFS(vp); 1256 mtx_lock(&np->n_mtx); 1257 if (np->n_size && np->n_size < NFS_MAXPATHLEN) 1258 len = np->n_size; 1259 mtx_unlock(&np->n_mtx); 1260 } 1261 nfsm_mtouio(uiop, len); 1262 } 1263 m_freem(mrep); 1264nfsmout: 1265 return (error); 1266} 1267 1268/* 1269 * nfs read rpc call 1270 * Ditto above 1271 */ 1272int 1273nfs_readrpc(struct vnode *vp, struct uio *uiop, struct ucred *cred) 1274{ 1275 u_int32_t *tl; 1276 caddr_t bpos, dpos; 1277 struct mbuf *mreq, *mrep, *md, *mb; 1278 struct nfsmount *nmp; 1279 int error = 0, len, retlen, tsiz, eof, attrflag; 1280 int v3 = NFS_ISV3(vp); 1281 int rsize; 1282 1283#ifndef nolint 1284 eof = 0; 1285#endif 1286 nmp = VFSTONFS(vp->v_mount); 1287 tsiz = uiop->uio_resid; 1288 mtx_lock(&nmp->nm_mtx); 1289 if (uiop->uio_offset + tsiz > nmp->nm_maxfilesize) { 1290 mtx_unlock(&nmp->nm_mtx); 1291 return (EFBIG); 1292 } 1293 rsize = nmp->nm_rsize; 1294 mtx_unlock(&nmp->nm_mtx); 1295 while (tsiz > 0) { 1296 nfsstats.rpccnt[NFSPROC_READ]++; 1297 len = (tsiz > rsize) ? rsize : tsiz; 1298 mreq = nfsm_reqhead(vp, NFSPROC_READ, NFSX_FH(v3) + NFSX_UNSIGNED * 3); 1299 mb = mreq; 1300 bpos = mtod(mb, caddr_t); 1301 nfsm_fhtom(vp, v3); 1302 tl = nfsm_build(u_int32_t *, NFSX_UNSIGNED * 3); 1303 if (v3) { 1304 txdr_hyper(uiop->uio_offset, tl); 1305 *(tl + 2) = txdr_unsigned(len); 1306 } else { 1307 *tl++ = txdr_unsigned(uiop->uio_offset); 1308 *tl++ = txdr_unsigned(len); 1309 *tl = 0; 1310 } 1311 nfsm_request(vp, NFSPROC_READ, uiop->uio_td, cred); 1312 if (v3) { 1313 nfsm_postop_attr(vp, attrflag); 1314 if (error) { 1315 m_freem(mrep); 1316 goto nfsmout; 1317 } 1318 tl = nfsm_dissect(u_int32_t *, 2 * NFSX_UNSIGNED); 1319 eof = fxdr_unsigned(int, *(tl + 1)); 1320 } else { 1321 nfsm_loadattr(vp, NULL); 1322 } 1323 nfsm_strsiz(retlen, rsize); 1324 nfsm_mtouio(uiop, retlen); 1325 m_freem(mrep); 1326 tsiz -= retlen; 1327 if (v3) { 1328 if (eof || retlen == 0) { 1329 tsiz = 0; 1330 } 1331 } else if (retlen < len) { 1332 tsiz = 0; 1333 } 1334 } 1335nfsmout: 1336 return (error); 1337} 1338 1339/* 1340 * nfs write call 1341 */ 1342int 1343nfs_writerpc(struct vnode *vp, struct uio *uiop, struct ucred *cred, 1344 int *iomode, int *must_commit) 1345{ 1346 u_int32_t *tl; 1347 int32_t backup; 1348 caddr_t bpos, dpos; 1349 struct mbuf *mreq, *mrep, *md, *mb; 1350 struct nfsmount *nmp = VFSTONFS(vp->v_mount); 1351 int error = 0, len, tsiz, wccflag = NFSV3_WCCRATTR, rlen, commit; 1352 int v3 = NFS_ISV3(vp), committed = NFSV3WRITE_FILESYNC; 1353 int wsize; 1354 1355 KASSERT(uiop->uio_iovcnt == 1, ("nfs: writerpc iovcnt > 1")); 1356 *must_commit = 0; 1357 tsiz = uiop->uio_resid; 1358 mtx_lock(&nmp->nm_mtx); 1359 if (uiop->uio_offset + tsiz > nmp->nm_maxfilesize) { 1360 mtx_unlock(&nmp->nm_mtx); 1361 return (EFBIG); 1362 } 1363 wsize = nmp->nm_wsize; 1364 mtx_unlock(&nmp->nm_mtx); 1365 while (tsiz > 0) { 1366 nfsstats.rpccnt[NFSPROC_WRITE]++; 1367 len = (tsiz > wsize) ? wsize : tsiz; 1368 mreq = nfsm_reqhead(vp, NFSPROC_WRITE, 1369 NFSX_FH(v3) + 5 * NFSX_UNSIGNED + nfsm_rndup(len)); 1370 mb = mreq; 1371 bpos = mtod(mb, caddr_t); 1372 nfsm_fhtom(vp, v3); 1373 if (v3) { 1374 tl = nfsm_build(u_int32_t *, 5 * NFSX_UNSIGNED); 1375 txdr_hyper(uiop->uio_offset, tl); 1376 tl += 2; 1377 *tl++ = txdr_unsigned(len); 1378 *tl++ = txdr_unsigned(*iomode); 1379 *tl = txdr_unsigned(len); 1380 } else { 1381 u_int32_t x; 1382 1383 tl = nfsm_build(u_int32_t *, 4 * NFSX_UNSIGNED); 1384 /* Set both "begin" and "current" to non-garbage. */ 1385 x = txdr_unsigned((u_int32_t)uiop->uio_offset); 1386 *tl++ = x; /* "begin offset" */ 1387 *tl++ = x; /* "current offset" */ 1388 x = txdr_unsigned(len); 1389 *tl++ = x; /* total to this offset */ 1390 *tl = x; /* size of this write */ 1391 } 1392 nfsm_uiotom(uiop, len); 1393 nfsm_request(vp, NFSPROC_WRITE, uiop->uio_td, cred); 1394 if (v3) { 1395 wccflag = NFSV3_WCCCHK; 1396 nfsm_wcc_data(vp, wccflag); 1397 if (!error) { 1398 tl = nfsm_dissect(u_int32_t *, 2 * NFSX_UNSIGNED 1399 + NFSX_V3WRITEVERF); 1400 rlen = fxdr_unsigned(int, *tl++); 1401 if (rlen == 0) { 1402 error = NFSERR_IO; 1403 m_freem(mrep); 1404 break; 1405 } else if (rlen < len) { 1406 backup = len - rlen; 1407 uiop->uio_iov->iov_base = 1408 (char *)uiop->uio_iov->iov_base - 1409 backup; 1410 uiop->uio_iov->iov_len += backup; 1411 uiop->uio_offset -= backup; 1412 uiop->uio_resid += backup; 1413 len = rlen; 1414 } 1415 commit = fxdr_unsigned(int, *tl++); 1416 1417 /* 1418 * Return the lowest committment level 1419 * obtained by any of the RPCs. 1420 */ 1421 if (committed == NFSV3WRITE_FILESYNC) 1422 committed = commit; 1423 else if (committed == NFSV3WRITE_DATASYNC && 1424 commit == NFSV3WRITE_UNSTABLE) 1425 committed = commit; 1426 mtx_lock(&nmp->nm_mtx); 1427 if ((nmp->nm_state & NFSSTA_HASWRITEVERF) == 0){ 1428 bcopy((caddr_t)tl, (caddr_t)nmp->nm_verf, 1429 NFSX_V3WRITEVERF); 1430 nmp->nm_state |= NFSSTA_HASWRITEVERF; 1431 } else if (bcmp((caddr_t)tl, 1432 (caddr_t)nmp->nm_verf, NFSX_V3WRITEVERF)) { 1433 *must_commit = 1; 1434 bcopy((caddr_t)tl, (caddr_t)nmp->nm_verf, 1435 NFSX_V3WRITEVERF); 1436 } 1437 mtx_unlock(&nmp->nm_mtx); 1438 } 1439 } else { 1440 nfsm_loadattr(vp, NULL); 1441 } 1442 if (wccflag) { 1443 mtx_lock(&(VTONFS(vp))->n_mtx); 1444 VTONFS(vp)->n_mtime = VTONFS(vp)->n_vattr.va_mtime; 1445 mtx_unlock(&(VTONFS(vp))->n_mtx); 1446 } 1447 m_freem(mrep); 1448 if (error) 1449 break; 1450 tsiz -= len; 1451 } 1452nfsmout: 1453 if (vp->v_mount->mnt_kern_flag & MNTK_ASYNC) 1454 committed = NFSV3WRITE_FILESYNC; 1455 *iomode = committed; 1456 if (error) 1457 uiop->uio_resid = tsiz; 1458 return (error); 1459} 1460 1461/* 1462 * nfs mknod rpc 1463 * For NFS v2 this is a kludge. Use a create rpc but with the IFMT bits of the 1464 * mode set to specify the file type and the size field for rdev. 1465 */ 1466static int 1467nfs_mknodrpc(struct vnode *dvp, struct vnode **vpp, struct componentname *cnp, 1468 struct vattr *vap) 1469{ 1470 struct nfsv2_sattr *sp; 1471 u_int32_t *tl; 1472 struct vnode *newvp = NULL; 1473 struct nfsnode *np = NULL; 1474 struct vattr vattr; 1475 caddr_t bpos, dpos; 1476 int error = 0, wccflag = NFSV3_WCCRATTR, gotvp = 0; 1477 struct mbuf *mreq, *mrep, *md, *mb; 1478 u_int32_t rdev; 1479 int v3 = NFS_ISV3(dvp); 1480 1481 if (vap->va_type == VCHR || vap->va_type == VBLK) 1482 rdev = txdr_unsigned(vap->va_rdev); 1483 else if (vap->va_type == VFIFO || vap->va_type == VSOCK) 1484 rdev = nfs_xdrneg1; 1485 else { 1486 return (EOPNOTSUPP); 1487 } 1488 if ((error = VOP_GETATTR(dvp, &vattr, cnp->cn_cred)) != 0) 1489 return (error); 1490 nfsstats.rpccnt[NFSPROC_MKNOD]++; 1491 mreq = nfsm_reqhead(dvp, NFSPROC_MKNOD, NFSX_FH(v3) + 4 * NFSX_UNSIGNED + 1492 + nfsm_rndup(cnp->cn_namelen) + NFSX_SATTR(v3)); 1493 mb = mreq; 1494 bpos = mtod(mb, caddr_t); 1495 nfsm_fhtom(dvp, v3); 1496 nfsm_strtom(cnp->cn_nameptr, cnp->cn_namelen, NFS_MAXNAMLEN); 1497 if (v3) { 1498 tl = nfsm_build(u_int32_t *, NFSX_UNSIGNED); 1499 *tl++ = vtonfsv3_type(vap->va_type); 1500 nfsm_v3attrbuild(vap, FALSE); 1501 if (vap->va_type == VCHR || vap->va_type == VBLK) { 1502 tl = nfsm_build(u_int32_t *, 2 * NFSX_UNSIGNED); 1503 *tl++ = txdr_unsigned(major(vap->va_rdev)); 1504 *tl = txdr_unsigned(minor(vap->va_rdev)); 1505 } 1506 } else { 1507 sp = nfsm_build(struct nfsv2_sattr *, NFSX_V2SATTR); 1508 sp->sa_mode = vtonfsv2_mode(vap->va_type, vap->va_mode); 1509 sp->sa_uid = nfs_xdrneg1; 1510 sp->sa_gid = nfs_xdrneg1; 1511 sp->sa_size = rdev; 1512 txdr_nfsv2time(&vap->va_atime, &sp->sa_atime); 1513 txdr_nfsv2time(&vap->va_mtime, &sp->sa_mtime); 1514 } 1515 nfsm_request(dvp, NFSPROC_MKNOD, cnp->cn_thread, cnp->cn_cred); 1516 if (!error) { 1517 nfsm_mtofh(dvp, newvp, v3, gotvp); 1518 if (!gotvp) { 1519 if (newvp) { 1520 vput(newvp); 1521 newvp = NULL; 1522 } 1523 error = nfs_lookitup(dvp, cnp->cn_nameptr, 1524 cnp->cn_namelen, cnp->cn_cred, cnp->cn_thread, &np); 1525 if (!error) 1526 newvp = NFSTOV(np); 1527 } 1528 } 1529 if (v3) 1530 nfsm_wcc_data(dvp, wccflag); 1531 m_freem(mrep); 1532nfsmout: 1533 if (error) { 1534 if (newvp) 1535 vput(newvp); 1536 } else { 1537 if (cnp->cn_flags & MAKEENTRY) 1538 cache_enter(dvp, newvp, cnp); 1539 *vpp = newvp; 1540 } 1541 mtx_lock(&(VTONFS(dvp))->n_mtx); 1542 VTONFS(dvp)->n_flag |= NMODIFIED; 1543 if (!wccflag) { 1544 VTONFS(dvp)->n_attrstamp = 0; 1545 KDTRACE_NFS_ATTRCACHE_FLUSH_DONE(dvp); 1546 } 1547 mtx_unlock(&(VTONFS(dvp))->n_mtx); 1548 return (error); 1549} 1550 1551/* 1552 * nfs mknod vop 1553 * just call nfs_mknodrpc() to do the work. 1554 */ 1555/* ARGSUSED */ 1556static int 1557nfs_mknod(struct vop_mknod_args *ap) 1558{ 1559 return (nfs_mknodrpc(ap->a_dvp, ap->a_vpp, ap->a_cnp, ap->a_vap)); 1560} 1561 1562static u_long create_verf; 1563/* 1564 * nfs file create call 1565 */ 1566static int 1567nfs_create(struct vop_create_args *ap) 1568{ 1569 struct vnode *dvp = ap->a_dvp; 1570 struct vattr *vap = ap->a_vap; 1571 struct componentname *cnp = ap->a_cnp; 1572 struct nfsv2_sattr *sp; 1573 u_int32_t *tl; 1574 struct nfsnode *np = NULL; 1575 struct vnode *newvp = NULL; 1576 caddr_t bpos, dpos; 1577 int error = 0, wccflag = NFSV3_WCCRATTR, gotvp = 0, fmode = 0; 1578 struct mbuf *mreq, *mrep, *md, *mb; 1579 struct vattr vattr; 1580 int v3 = NFS_ISV3(dvp); 1581 1582 /* 1583 * Oops, not for me.. 1584 */ 1585 if (vap->va_type == VSOCK) { 1586 error = nfs_mknodrpc(dvp, ap->a_vpp, cnp, vap); 1587 return (error); 1588 } 1589 1590 if ((error = VOP_GETATTR(dvp, &vattr, cnp->cn_cred)) != 0) { 1591 return (error); 1592 } 1593 if (vap->va_vaflags & VA_EXCLUSIVE) 1594 fmode |= O_EXCL; 1595again: 1596 nfsstats.rpccnt[NFSPROC_CREATE]++; 1597 mreq = nfsm_reqhead(dvp, NFSPROC_CREATE, NFSX_FH(v3) + 2 * NFSX_UNSIGNED + 1598 nfsm_rndup(cnp->cn_namelen) + NFSX_SATTR(v3)); 1599 mb = mreq; 1600 bpos = mtod(mb, caddr_t); 1601 nfsm_fhtom(dvp, v3); 1602 nfsm_strtom(cnp->cn_nameptr, cnp->cn_namelen, NFS_MAXNAMLEN); 1603 if (v3) { 1604 tl = nfsm_build(u_int32_t *, NFSX_UNSIGNED); 1605 if (fmode & O_EXCL) { 1606 *tl = txdr_unsigned(NFSV3CREATE_EXCLUSIVE); 1607 tl = nfsm_build(u_int32_t *, NFSX_V3CREATEVERF); 1608#ifdef INET 1609 CURVNET_SET(CRED_TO_VNET(cnp->cn_cred)); 1610 IN_IFADDR_RLOCK(); 1611 if (!TAILQ_EMPTY(&V_in_ifaddrhead)) 1612 *tl++ = IA_SIN(TAILQ_FIRST(&V_in_ifaddrhead))->sin_addr.s_addr; 1613 else 1614#endif 1615 *tl++ = create_verf; 1616#ifdef INET 1617 IN_IFADDR_RUNLOCK(); 1618 CURVNET_RESTORE(); 1619#endif 1620 *tl = ++create_verf; 1621 } else { 1622 *tl = txdr_unsigned(NFSV3CREATE_UNCHECKED); 1623 nfsm_v3attrbuild(vap, FALSE); 1624 } 1625 } else { 1626 sp = nfsm_build(struct nfsv2_sattr *, NFSX_V2SATTR); 1627 sp->sa_mode = vtonfsv2_mode(vap->va_type, vap->va_mode); 1628 sp->sa_uid = nfs_xdrneg1; 1629 sp->sa_gid = nfs_xdrneg1; 1630 sp->sa_size = 0; 1631 txdr_nfsv2time(&vap->va_atime, &sp->sa_atime); 1632 txdr_nfsv2time(&vap->va_mtime, &sp->sa_mtime); 1633 } 1634 nfsm_request(dvp, NFSPROC_CREATE, cnp->cn_thread, cnp->cn_cred); 1635 if (!error) { 1636 nfsm_mtofh(dvp, newvp, v3, gotvp); 1637 if (!gotvp) { 1638 if (newvp) { 1639 vput(newvp); 1640 newvp = NULL; 1641 } 1642 error = nfs_lookitup(dvp, cnp->cn_nameptr, 1643 cnp->cn_namelen, cnp->cn_cred, cnp->cn_thread, &np); 1644 if (!error) 1645 newvp = NFSTOV(np); 1646 } 1647 } 1648 if (v3) 1649 nfsm_wcc_data(dvp, wccflag); 1650 m_freem(mrep); 1651nfsmout: 1652 if (error) { 1653 if (v3 && (fmode & O_EXCL) && error == NFSERR_NOTSUPP) { 1654 fmode &= ~O_EXCL; 1655 goto again; 1656 } 1657 if (newvp) 1658 vput(newvp); 1659 } else if (v3 && (fmode & O_EXCL)) { 1660 /* 1661 * We are normally called with only a partially initialized 1662 * VAP. Since the NFSv3 spec says that server may use the 1663 * file attributes to store the verifier, the spec requires 1664 * us to do a SETATTR RPC. FreeBSD servers store the verifier 1665 * in atime, but we can't really assume that all servers will 1666 * so we ensure that our SETATTR sets both atime and mtime. 1667 */ 1668 if (vap->va_mtime.tv_sec == VNOVAL) 1669 vfs_timestamp(&vap->va_mtime); 1670 if (vap->va_atime.tv_sec == VNOVAL) 1671 vap->va_atime = vap->va_mtime; 1672 error = nfs_setattrrpc(newvp, vap, cnp->cn_cred); 1673 if (error) 1674 vput(newvp); 1675 } 1676 if (!error) { 1677 if (cnp->cn_flags & MAKEENTRY) 1678 cache_enter(dvp, newvp, cnp); 1679 *ap->a_vpp = newvp; 1680 } 1681 mtx_lock(&(VTONFS(dvp))->n_mtx); 1682 VTONFS(dvp)->n_flag |= NMODIFIED; 1683 if (!wccflag) { 1684 VTONFS(dvp)->n_attrstamp = 0; 1685 KDTRACE_NFS_ATTRCACHE_FLUSH_DONE(dvp); 1686 } 1687 mtx_unlock(&(VTONFS(dvp))->n_mtx); 1688 return (error); 1689} 1690 1691/* 1692 * nfs file remove call 1693 * To try and make nfs semantics closer to ufs semantics, a file that has 1694 * other processes using the vnode is renamed instead of removed and then 1695 * removed later on the last close. 1696 * - If v_usecount > 1 1697 * If a rename is not already in the works 1698 * call nfs_sillyrename() to set it up 1699 * else 1700 * do the remove rpc 1701 */ 1702static int 1703nfs_remove(struct vop_remove_args *ap) 1704{ 1705 struct vnode *vp = ap->a_vp; 1706 struct vnode *dvp = ap->a_dvp; 1707 struct componentname *cnp = ap->a_cnp; 1708 struct nfsnode *np = VTONFS(vp); 1709 int error = 0; 1710 struct vattr vattr; 1711 1712 KASSERT((cnp->cn_flags & HASBUF) != 0, ("nfs_remove: no name")); 1713 KASSERT(vrefcnt(vp) > 0, ("nfs_remove: bad v_usecount")); 1714 if (vp->v_type == VDIR) 1715 error = EPERM; 1716 else if (vrefcnt(vp) == 1 || (np->n_sillyrename && 1717 !VOP_GETATTR(vp, &vattr, cnp->cn_cred) && vattr.va_nlink > 1)) { 1718 /* 1719 * Purge the name cache so that the chance of a lookup for 1720 * the name succeeding while the remove is in progress is 1721 * minimized. Without node locking it can still happen, such 1722 * that an I/O op returns ESTALE, but since you get this if 1723 * another host removes the file.. 1724 */ 1725 cache_purge(vp); 1726 /* 1727 * throw away biocache buffers, mainly to avoid 1728 * unnecessary delayed writes later. 1729 */ 1730 error = nfs_vinvalbuf(vp, 0, cnp->cn_thread, 1); 1731 /* Do the rpc */ 1732 if (error != EINTR && error != EIO) 1733 error = nfs_removerpc(dvp, cnp->cn_nameptr, 1734 cnp->cn_namelen, cnp->cn_cred, cnp->cn_thread); 1735 /* 1736 * Kludge City: If the first reply to the remove rpc is lost.. 1737 * the reply to the retransmitted request will be ENOENT 1738 * since the file was in fact removed 1739 * Therefore, we cheat and return success. 1740 */ 1741 if (error == ENOENT) 1742 error = 0; 1743 } else if (!np->n_sillyrename) 1744 error = nfs_sillyrename(dvp, vp, cnp); 1745 mtx_lock(&np->n_mtx); 1746 np->n_attrstamp = 0; 1747 mtx_unlock(&np->n_mtx); 1748 KDTRACE_NFS_ATTRCACHE_FLUSH_DONE(vp); 1749 return (error); 1750} 1751 1752/* 1753 * nfs file remove rpc called from nfs_inactive 1754 */ 1755int 1756nfs_removeit(struct sillyrename *sp) 1757{ 1758 /* 1759 * Make sure that the directory vnode is still valid. 1760 * XXX we should lock sp->s_dvp here. 1761 */ 1762 if (sp->s_dvp->v_type == VBAD) 1763 return (0); 1764 return (nfs_removerpc(sp->s_dvp, sp->s_name, sp->s_namlen, sp->s_cred, 1765 NULL)); 1766} 1767 1768/* 1769 * Nfs remove rpc, called from nfs_remove() and nfs_removeit(). 1770 */ 1771static int 1772nfs_removerpc(struct vnode *dvp, const char *name, int namelen, 1773 struct ucred *cred, struct thread *td) 1774{ 1775 caddr_t bpos, dpos; 1776 int error = 0, wccflag = NFSV3_WCCRATTR; 1777 struct mbuf *mreq, *mrep, *md, *mb; 1778 int v3 = NFS_ISV3(dvp); 1779 1780 nfsstats.rpccnt[NFSPROC_REMOVE]++; 1781 mreq = nfsm_reqhead(dvp, NFSPROC_REMOVE, 1782 NFSX_FH(v3) + NFSX_UNSIGNED + nfsm_rndup(namelen)); 1783 mb = mreq; 1784 bpos = mtod(mb, caddr_t); 1785 nfsm_fhtom(dvp, v3); 1786 nfsm_strtom(name, namelen, NFS_MAXNAMLEN); 1787 nfsm_request(dvp, NFSPROC_REMOVE, td, cred); 1788 if (v3) 1789 nfsm_wcc_data(dvp, wccflag); 1790 m_freem(mrep); 1791nfsmout: 1792 mtx_lock(&(VTONFS(dvp))->n_mtx); 1793 VTONFS(dvp)->n_flag |= NMODIFIED; 1794 if (!wccflag) { 1795 VTONFS(dvp)->n_attrstamp = 0; 1796 KDTRACE_NFS_ATTRCACHE_FLUSH_DONE(dvp); 1797 } 1798 mtx_unlock(&(VTONFS(dvp))->n_mtx); 1799 return (error); 1800} 1801 1802/* 1803 * nfs file rename call 1804 */ 1805static int 1806nfs_rename(struct vop_rename_args *ap) 1807{ 1808 struct vnode *fvp = ap->a_fvp; 1809 struct vnode *tvp = ap->a_tvp; 1810 struct vnode *fdvp = ap->a_fdvp; 1811 struct vnode *tdvp = ap->a_tdvp; 1812 struct componentname *tcnp = ap->a_tcnp; 1813 struct componentname *fcnp = ap->a_fcnp; 1814 int error; 1815 1816 KASSERT((tcnp->cn_flags & HASBUF) != 0 && 1817 (fcnp->cn_flags & HASBUF) != 0, ("nfs_rename: no name")); 1818 /* Check for cross-device rename */ 1819 if ((fvp->v_mount != tdvp->v_mount) || 1820 (tvp && (fvp->v_mount != tvp->v_mount))) { 1821 error = EXDEV; 1822 goto out; 1823 } 1824 1825 if (fvp == tvp) { 1826 nfs_printf("nfs_rename: fvp == tvp (can't happen)\n"); 1827 error = 0; 1828 goto out; 1829 } 1830 if ((error = vn_lock(fvp, LK_EXCLUSIVE)) != 0) 1831 goto out; 1832 1833 /* 1834 * We have to flush B_DELWRI data prior to renaming 1835 * the file. If we don't, the delayed-write buffers 1836 * can be flushed out later after the file has gone stale 1837 * under NFSV3. NFSV2 does not have this problem because 1838 * ( as far as I can tell ) it flushes dirty buffers more 1839 * often. 1840 * 1841 * Skip the rename operation if the fsync fails, this can happen 1842 * due to the server's volume being full, when we pushed out data 1843 * that was written back to our cache earlier. Not checking for 1844 * this condition can result in potential (silent) data loss. 1845 */ 1846 error = VOP_FSYNC(fvp, MNT_WAIT, fcnp->cn_thread); 1847 VOP_UNLOCK(fvp, 0); 1848 if (!error && tvp) 1849 error = VOP_FSYNC(tvp, MNT_WAIT, tcnp->cn_thread); 1850 if (error) 1851 goto out; 1852 1853 /* 1854 * If the tvp exists and is in use, sillyrename it before doing the 1855 * rename of the new file over it. 1856 * XXX Can't sillyrename a directory. 1857 */ 1858 if (tvp && vrefcnt(tvp) > 1 && !VTONFS(tvp)->n_sillyrename && 1859 tvp->v_type != VDIR && !nfs_sillyrename(tdvp, tvp, tcnp)) { 1860 vput(tvp); 1861 tvp = NULL; 1862 } 1863 1864 error = nfs_renamerpc(fdvp, fcnp->cn_nameptr, fcnp->cn_namelen, 1865 tdvp, tcnp->cn_nameptr, tcnp->cn_namelen, tcnp->cn_cred, 1866 tcnp->cn_thread); 1867 1868 if (fvp->v_type == VDIR) { 1869 if (tvp != NULL && tvp->v_type == VDIR) 1870 cache_purge(tdvp); 1871 cache_purge(fdvp); 1872 } 1873 1874out: 1875 if (tdvp == tvp) 1876 vrele(tdvp); 1877 else 1878 vput(tdvp); 1879 if (tvp) 1880 vput(tvp); 1881 vrele(fdvp); 1882 vrele(fvp); 1883 /* 1884 * Kludge: Map ENOENT => 0 assuming that it is a reply to a retry. 1885 */ 1886 if (error == ENOENT) 1887 error = 0; 1888 return (error); 1889} 1890 1891/* 1892 * nfs file rename rpc called from nfs_remove() above 1893 */ 1894static int 1895nfs_renameit(struct vnode *sdvp, struct componentname *scnp, 1896 struct sillyrename *sp) 1897{ 1898 1899 return (nfs_renamerpc(sdvp, scnp->cn_nameptr, scnp->cn_namelen, sdvp, 1900 sp->s_name, sp->s_namlen, scnp->cn_cred, scnp->cn_thread)); 1901} 1902 1903/* 1904 * Do an nfs rename rpc. Called from nfs_rename() and nfs_renameit(). 1905 */ 1906static int 1907nfs_renamerpc(struct vnode *fdvp, const char *fnameptr, int fnamelen, 1908 struct vnode *tdvp, const char *tnameptr, int tnamelen, struct ucred *cred, 1909 struct thread *td) 1910{ 1911 caddr_t bpos, dpos; 1912 int error = 0, fwccflag = NFSV3_WCCRATTR, twccflag = NFSV3_WCCRATTR; 1913 struct mbuf *mreq, *mrep, *md, *mb; 1914 int v3 = NFS_ISV3(fdvp); 1915 1916 nfsstats.rpccnt[NFSPROC_RENAME]++; 1917 mreq = nfsm_reqhead(fdvp, NFSPROC_RENAME, 1918 (NFSX_FH(v3) + NFSX_UNSIGNED)*2 + nfsm_rndup(fnamelen) + 1919 nfsm_rndup(tnamelen)); 1920 mb = mreq; 1921 bpos = mtod(mb, caddr_t); 1922 nfsm_fhtom(fdvp, v3); 1923 nfsm_strtom(fnameptr, fnamelen, NFS_MAXNAMLEN); 1924 nfsm_fhtom(tdvp, v3); 1925 nfsm_strtom(tnameptr, tnamelen, NFS_MAXNAMLEN); 1926 nfsm_request(fdvp, NFSPROC_RENAME, td, cred); 1927 if (v3) { 1928 nfsm_wcc_data(fdvp, fwccflag); 1929 nfsm_wcc_data(tdvp, twccflag); 1930 } 1931 m_freem(mrep); 1932nfsmout: 1933 mtx_lock(&(VTONFS(fdvp))->n_mtx); 1934 VTONFS(fdvp)->n_flag |= NMODIFIED; 1935 mtx_unlock(&(VTONFS(fdvp))->n_mtx); 1936 mtx_lock(&(VTONFS(tdvp))->n_mtx); 1937 VTONFS(tdvp)->n_flag |= NMODIFIED; 1938 mtx_unlock(&(VTONFS(tdvp))->n_mtx); 1939 if (!fwccflag) { 1940 VTONFS(fdvp)->n_attrstamp = 0; 1941 KDTRACE_NFS_ATTRCACHE_FLUSH_DONE(fdvp); 1942 } 1943 if (!twccflag) { 1944 VTONFS(tdvp)->n_attrstamp = 0; 1945 KDTRACE_NFS_ATTRCACHE_FLUSH_DONE(tdvp); 1946 } 1947 return (error); 1948} 1949 1950/* 1951 * nfs hard link create call 1952 */ 1953static int 1954nfs_link(struct vop_link_args *ap) 1955{ 1956 struct vnode *vp = ap->a_vp; 1957 struct vnode *tdvp = ap->a_tdvp; 1958 struct componentname *cnp = ap->a_cnp; 1959 caddr_t bpos, dpos; 1960 int error = 0, wccflag = NFSV3_WCCRATTR, attrflag = 0; 1961 struct mbuf *mreq, *mrep, *md, *mb; 1962 int v3; 1963 1964 if (vp->v_mount != tdvp->v_mount) { 1965 return (EXDEV); 1966 } 1967 1968 /* 1969 * Push all writes to the server, so that the attribute cache 1970 * doesn't get "out of sync" with the server. 1971 * XXX There should be a better way! 1972 */ 1973 VOP_FSYNC(vp, MNT_WAIT, cnp->cn_thread); 1974 1975 v3 = NFS_ISV3(vp); 1976 nfsstats.rpccnt[NFSPROC_LINK]++; 1977 mreq = nfsm_reqhead(vp, NFSPROC_LINK, 1978 NFSX_FH(v3)*2 + NFSX_UNSIGNED + nfsm_rndup(cnp->cn_namelen)); 1979 mb = mreq; 1980 bpos = mtod(mb, caddr_t); 1981 nfsm_fhtom(vp, v3); 1982 nfsm_fhtom(tdvp, v3); 1983 nfsm_strtom(cnp->cn_nameptr, cnp->cn_namelen, NFS_MAXNAMLEN); 1984 nfsm_request(vp, NFSPROC_LINK, cnp->cn_thread, cnp->cn_cred); 1985 if (v3) { 1986 nfsm_postop_attr(vp, attrflag); 1987 nfsm_wcc_data(tdvp, wccflag); 1988 } 1989 m_freem(mrep); 1990nfsmout: 1991 mtx_lock(&(VTONFS(tdvp))->n_mtx); 1992 VTONFS(tdvp)->n_flag |= NMODIFIED; 1993 mtx_unlock(&(VTONFS(tdvp))->n_mtx); 1994 if (!attrflag) { 1995 VTONFS(vp)->n_attrstamp = 0; 1996 KDTRACE_NFS_ATTRCACHE_FLUSH_DONE(vp); 1997 } 1998 if (!wccflag) { 1999 VTONFS(tdvp)->n_attrstamp = 0; 2000 KDTRACE_NFS_ATTRCACHE_FLUSH_DONE(tdvp); 2001 } 2002 return (error); 2003} 2004 2005/* 2006 * nfs symbolic link create call 2007 */ 2008static int 2009nfs_symlink(struct vop_symlink_args *ap) 2010{ 2011 struct vnode *dvp = ap->a_dvp; 2012 struct vattr *vap = ap->a_vap; 2013 struct componentname *cnp = ap->a_cnp; 2014 struct nfsv2_sattr *sp; 2015 caddr_t bpos, dpos; 2016 int slen, error = 0, wccflag = NFSV3_WCCRATTR, gotvp; 2017 struct mbuf *mreq, *mrep, *md, *mb; 2018 struct vnode *newvp = NULL; 2019 int v3 = NFS_ISV3(dvp); 2020 2021 nfsstats.rpccnt[NFSPROC_SYMLINK]++; 2022 slen = strlen(ap->a_target); 2023 mreq = nfsm_reqhead(dvp, NFSPROC_SYMLINK, NFSX_FH(v3) + 2*NFSX_UNSIGNED + 2024 nfsm_rndup(cnp->cn_namelen) + nfsm_rndup(slen) + NFSX_SATTR(v3)); 2025 mb = mreq; 2026 bpos = mtod(mb, caddr_t); 2027 nfsm_fhtom(dvp, v3); 2028 nfsm_strtom(cnp->cn_nameptr, cnp->cn_namelen, NFS_MAXNAMLEN); 2029 if (v3) { 2030 nfsm_v3attrbuild(vap, FALSE); 2031 } 2032 nfsm_strtom(ap->a_target, slen, NFS_MAXPATHLEN); 2033 if (!v3) { 2034 sp = nfsm_build(struct nfsv2_sattr *, NFSX_V2SATTR); 2035 sp->sa_mode = vtonfsv2_mode(VLNK, vap->va_mode); 2036 sp->sa_uid = nfs_xdrneg1; 2037 sp->sa_gid = nfs_xdrneg1; 2038 sp->sa_size = nfs_xdrneg1; 2039 txdr_nfsv2time(&vap->va_atime, &sp->sa_atime); 2040 txdr_nfsv2time(&vap->va_mtime, &sp->sa_mtime); 2041 } 2042 2043 /* 2044 * Issue the NFS request and get the rpc response. 2045 * 2046 * Only NFSv3 responses returning an error of 0 actually return 2047 * a file handle that can be converted into newvp without having 2048 * to do an extra lookup rpc. 2049 */ 2050 nfsm_request(dvp, NFSPROC_SYMLINK, cnp->cn_thread, cnp->cn_cred); 2051 if (v3) { 2052 if (error == 0) 2053 nfsm_mtofh(dvp, newvp, v3, gotvp); 2054 nfsm_wcc_data(dvp, wccflag); 2055 } 2056 2057 /* 2058 * out code jumps -> here, mrep is also freed. 2059 */ 2060 2061 m_freem(mrep); 2062nfsmout: 2063 2064 /* 2065 * If we do not have an error and we could not extract the newvp from 2066 * the response due to the request being NFSv2, we have to do a 2067 * lookup in order to obtain a newvp to return. 2068 */ 2069 if (error == 0 && newvp == NULL) { 2070 struct nfsnode *np = NULL; 2071 2072 error = nfs_lookitup(dvp, cnp->cn_nameptr, cnp->cn_namelen, 2073 cnp->cn_cred, cnp->cn_thread, &np); 2074 if (!error) 2075 newvp = NFSTOV(np); 2076 } 2077 if (error) { 2078 if (newvp) 2079 vput(newvp); 2080 } else { 2081 *ap->a_vpp = newvp; 2082 } 2083 mtx_lock(&(VTONFS(dvp))->n_mtx); 2084 VTONFS(dvp)->n_flag |= NMODIFIED; 2085 mtx_unlock(&(VTONFS(dvp))->n_mtx); 2086 if (!wccflag) { 2087 VTONFS(dvp)->n_attrstamp = 0; 2088 KDTRACE_NFS_ATTRCACHE_FLUSH_DONE(dvp); 2089 } 2090 return (error); 2091} 2092 2093/* 2094 * nfs make dir call 2095 */ 2096static int 2097nfs_mkdir(struct vop_mkdir_args *ap) 2098{ 2099 struct vnode *dvp = ap->a_dvp; 2100 struct vattr *vap = ap->a_vap; 2101 struct componentname *cnp = ap->a_cnp; 2102 struct nfsv2_sattr *sp; 2103 int len; 2104 struct nfsnode *np = NULL; 2105 struct vnode *newvp = NULL; 2106 caddr_t bpos, dpos; 2107 int error = 0, wccflag = NFSV3_WCCRATTR; 2108 int gotvp = 0; 2109 struct mbuf *mreq, *mrep, *md, *mb; 2110 struct vattr vattr; 2111 int v3 = NFS_ISV3(dvp); 2112 2113 if ((error = VOP_GETATTR(dvp, &vattr, cnp->cn_cred)) != 0) 2114 return (error); 2115 len = cnp->cn_namelen; 2116 nfsstats.rpccnt[NFSPROC_MKDIR]++; 2117 mreq = nfsm_reqhead(dvp, NFSPROC_MKDIR, 2118 NFSX_FH(v3) + NFSX_UNSIGNED + nfsm_rndup(len) + NFSX_SATTR(v3)); 2119 mb = mreq; 2120 bpos = mtod(mb, caddr_t); 2121 nfsm_fhtom(dvp, v3); 2122 nfsm_strtom(cnp->cn_nameptr, len, NFS_MAXNAMLEN); 2123 if (v3) { 2124 nfsm_v3attrbuild(vap, FALSE); 2125 } else { 2126 sp = nfsm_build(struct nfsv2_sattr *, NFSX_V2SATTR); 2127 sp->sa_mode = vtonfsv2_mode(VDIR, vap->va_mode); 2128 sp->sa_uid = nfs_xdrneg1; 2129 sp->sa_gid = nfs_xdrneg1; 2130 sp->sa_size = nfs_xdrneg1; 2131 txdr_nfsv2time(&vap->va_atime, &sp->sa_atime); 2132 txdr_nfsv2time(&vap->va_mtime, &sp->sa_mtime); 2133 } 2134 nfsm_request(dvp, NFSPROC_MKDIR, cnp->cn_thread, cnp->cn_cred); 2135 if (!error) 2136 nfsm_mtofh(dvp, newvp, v3, gotvp); 2137 if (v3) 2138 nfsm_wcc_data(dvp, wccflag); 2139 m_freem(mrep); 2140nfsmout: 2141 mtx_lock(&(VTONFS(dvp))->n_mtx); 2142 VTONFS(dvp)->n_flag |= NMODIFIED; 2143 mtx_unlock(&(VTONFS(dvp))->n_mtx); 2144 if (!wccflag) { 2145 VTONFS(dvp)->n_attrstamp = 0; 2146 KDTRACE_NFS_ATTRCACHE_FLUSH_DONE(dvp); 2147 } 2148 if (error == 0 && newvp == NULL) { 2149 error = nfs_lookitup(dvp, cnp->cn_nameptr, len, cnp->cn_cred, 2150 cnp->cn_thread, &np); 2151 if (!error) { 2152 newvp = NFSTOV(np); 2153 if (newvp->v_type != VDIR) 2154 error = EEXIST; 2155 } 2156 } 2157 if (error) { 2158 if (newvp) 2159 vput(newvp); 2160 } else 2161 *ap->a_vpp = newvp; 2162 return (error); 2163} 2164 2165/* 2166 * nfs remove directory call 2167 */ 2168static int 2169nfs_rmdir(struct vop_rmdir_args *ap) 2170{ 2171 struct vnode *vp = ap->a_vp; 2172 struct vnode *dvp = ap->a_dvp; 2173 struct componentname *cnp = ap->a_cnp; 2174 caddr_t bpos, dpos; 2175 int error = 0, wccflag = NFSV3_WCCRATTR; 2176 struct mbuf *mreq, *mrep, *md, *mb; 2177 int v3 = NFS_ISV3(dvp); 2178 2179 if (dvp == vp) 2180 return (EINVAL); 2181 nfsstats.rpccnt[NFSPROC_RMDIR]++; 2182 mreq = nfsm_reqhead(dvp, NFSPROC_RMDIR, 2183 NFSX_FH(v3) + NFSX_UNSIGNED + nfsm_rndup(cnp->cn_namelen)); 2184 mb = mreq; 2185 bpos = mtod(mb, caddr_t); 2186 nfsm_fhtom(dvp, v3); 2187 nfsm_strtom(cnp->cn_nameptr, cnp->cn_namelen, NFS_MAXNAMLEN); 2188 nfsm_request(dvp, NFSPROC_RMDIR, cnp->cn_thread, cnp->cn_cred); 2189 if (v3) 2190 nfsm_wcc_data(dvp, wccflag); 2191 m_freem(mrep); 2192nfsmout: 2193 mtx_lock(&(VTONFS(dvp))->n_mtx); 2194 VTONFS(dvp)->n_flag |= NMODIFIED; 2195 mtx_unlock(&(VTONFS(dvp))->n_mtx); 2196 if (!wccflag) { 2197 VTONFS(dvp)->n_attrstamp = 0; 2198 KDTRACE_NFS_ATTRCACHE_FLUSH_DONE(dvp); 2199 } 2200 cache_purge(dvp); 2201 cache_purge(vp); 2202 /* 2203 * Kludge: Map ENOENT => 0 assuming that you have a reply to a retry. 2204 */ 2205 if (error == ENOENT) 2206 error = 0; 2207 return (error); 2208} 2209 2210/* 2211 * nfs readdir call 2212 */ 2213static int 2214nfs_readdir(struct vop_readdir_args *ap) 2215{ 2216 struct vnode *vp = ap->a_vp; 2217 struct nfsnode *np = VTONFS(vp); 2218 struct uio *uio = ap->a_uio; 2219 int tresid, error = 0; 2220 struct vattr vattr; 2221 2222 if (vp->v_type != VDIR) 2223 return(EPERM); 2224 2225 /* 2226 * First, check for hit on the EOF offset cache 2227 */ 2228 if (np->n_direofoffset > 0 && uio->uio_offset >= np->n_direofoffset && 2229 (np->n_flag & NMODIFIED) == 0) { 2230 if (VOP_GETATTR(vp, &vattr, ap->a_cred) == 0) { 2231 mtx_lock(&np->n_mtx); 2232 if (!NFS_TIMESPEC_COMPARE(&np->n_mtime, &vattr.va_mtime)) { 2233 mtx_unlock(&np->n_mtx); 2234 nfsstats.direofcache_hits++; 2235 goto out; 2236 } else 2237 mtx_unlock(&np->n_mtx); 2238 } 2239 } 2240 2241 /* 2242 * Call nfs_bioread() to do the real work. 2243 */ 2244 tresid = uio->uio_resid; 2245 error = nfs_bioread(vp, uio, 0, ap->a_cred); 2246 2247 if (!error && uio->uio_resid == tresid) { 2248 nfsstats.direofcache_misses++; 2249 } 2250out: 2251 return (error); 2252} 2253 2254/* 2255 * Readdir rpc call. 2256 * Called from below the buffer cache by nfs_doio(). 2257 */ 2258int 2259nfs_readdirrpc(struct vnode *vp, struct uio *uiop, struct ucred *cred) 2260{ 2261 int len, left; 2262 struct dirent *dp = NULL; 2263 u_int32_t *tl; 2264 caddr_t cp; 2265 nfsuint64 *cookiep; 2266 caddr_t bpos, dpos; 2267 struct mbuf *mreq, *mrep, *md, *mb; 2268 nfsuint64 cookie; 2269 struct nfsmount *nmp = VFSTONFS(vp->v_mount); 2270 struct nfsnode *dnp = VTONFS(vp); 2271 u_quad_t fileno; 2272 int error = 0, tlen, more_dirs = 1, blksiz = 0, bigenough = 1; 2273 int attrflag; 2274 int v3 = NFS_ISV3(vp); 2275 2276 KASSERT(uiop->uio_iovcnt == 1 && 2277 (uiop->uio_offset & (DIRBLKSIZ - 1)) == 0 && 2278 (uiop->uio_resid & (DIRBLKSIZ - 1)) == 0, 2279 ("nfs readdirrpc bad uio")); 2280 2281 /* 2282 * If there is no cookie, assume directory was stale. 2283 */ 2284 nfs_dircookie_lock(dnp); 2285 cookiep = nfs_getcookie(dnp, uiop->uio_offset, 0); 2286 if (cookiep) { 2287 cookie = *cookiep; 2288 nfs_dircookie_unlock(dnp); 2289 } else { 2290 nfs_dircookie_unlock(dnp); 2291 return (NFSERR_BAD_COOKIE); 2292 } 2293 2294 /* 2295 * Loop around doing readdir rpc's of size nm_readdirsize 2296 * truncated to a multiple of DIRBLKSIZ. 2297 * The stopping criteria is EOF or buffer full. 2298 */ 2299 while (more_dirs && bigenough) { 2300 nfsstats.rpccnt[NFSPROC_READDIR]++; 2301 mreq = nfsm_reqhead(vp, NFSPROC_READDIR, NFSX_FH(v3) + 2302 NFSX_READDIR(v3)); 2303 mb = mreq; 2304 bpos = mtod(mb, caddr_t); 2305 nfsm_fhtom(vp, v3); 2306 if (v3) { 2307 tl = nfsm_build(u_int32_t *, 5 * NFSX_UNSIGNED); 2308 *tl++ = cookie.nfsuquad[0]; 2309 *tl++ = cookie.nfsuquad[1]; 2310 mtx_lock(&dnp->n_mtx); 2311 *tl++ = dnp->n_cookieverf.nfsuquad[0]; 2312 *tl++ = dnp->n_cookieverf.nfsuquad[1]; 2313 mtx_unlock(&dnp->n_mtx); 2314 } else { 2315 tl = nfsm_build(u_int32_t *, 2 * NFSX_UNSIGNED); 2316 *tl++ = cookie.nfsuquad[0]; 2317 } 2318 *tl = txdr_unsigned(nmp->nm_readdirsize); 2319 nfsm_request(vp, NFSPROC_READDIR, uiop->uio_td, cred); 2320 if (v3) { 2321 nfsm_postop_attr(vp, attrflag); 2322 if (!error) { 2323 tl = nfsm_dissect(u_int32_t *, 2324 2 * NFSX_UNSIGNED); 2325 mtx_lock(&dnp->n_mtx); 2326 dnp->n_cookieverf.nfsuquad[0] = *tl++; 2327 dnp->n_cookieverf.nfsuquad[1] = *tl; 2328 mtx_unlock(&dnp->n_mtx); 2329 } else { 2330 m_freem(mrep); 2331 goto nfsmout; 2332 } 2333 } 2334 tl = nfsm_dissect(u_int32_t *, NFSX_UNSIGNED); 2335 more_dirs = fxdr_unsigned(int, *tl); 2336 2337 /* loop thru the dir entries, doctoring them to 4bsd form */ 2338 while (more_dirs && bigenough) { 2339 if (v3) { 2340 tl = nfsm_dissect(u_int32_t *, 2341 3 * NFSX_UNSIGNED); 2342 fileno = fxdr_hyper(tl); 2343 len = fxdr_unsigned(int, *(tl + 2)); 2344 } else { 2345 tl = nfsm_dissect(u_int32_t *, 2346 2 * NFSX_UNSIGNED); 2347 fileno = fxdr_unsigned(u_quad_t, *tl++); 2348 len = fxdr_unsigned(int, *tl); 2349 } 2350 if (len <= 0 || len > NFS_MAXNAMLEN) { 2351 error = EBADRPC; 2352 m_freem(mrep); 2353 goto nfsmout; 2354 } 2355 tlen = nfsm_rndup(len); 2356 if (tlen == len) 2357 tlen += 4; /* To ensure null termination */ 2358 left = DIRBLKSIZ - blksiz; 2359 if ((tlen + DIRHDSIZ) > left) { 2360 dp->d_reclen += left; 2361 uiop->uio_iov->iov_base = 2362 (char *)uiop->uio_iov->iov_base + left; 2363 uiop->uio_iov->iov_len -= left; 2364 uiop->uio_offset += left; 2365 uiop->uio_resid -= left; 2366 blksiz = 0; 2367 } 2368 if ((tlen + DIRHDSIZ) > uiop->uio_resid) 2369 bigenough = 0; 2370 if (bigenough) { 2371 dp = (struct dirent *)uiop->uio_iov->iov_base; 2372 dp->d_fileno = (int)fileno; 2373 dp->d_namlen = len; 2374 dp->d_reclen = tlen + DIRHDSIZ; 2375 dp->d_type = DT_UNKNOWN; 2376 blksiz += dp->d_reclen; 2377 if (blksiz == DIRBLKSIZ) 2378 blksiz = 0; 2379 uiop->uio_offset += DIRHDSIZ; 2380 uiop->uio_resid -= DIRHDSIZ; 2381 uiop->uio_iov->iov_base = 2382 (char *)uiop->uio_iov->iov_base + DIRHDSIZ; 2383 uiop->uio_iov->iov_len -= DIRHDSIZ; 2384 nfsm_mtouio(uiop, len); 2385 cp = uiop->uio_iov->iov_base; 2386 tlen -= len; 2387 *cp = '\0'; /* null terminate */ 2388 uiop->uio_iov->iov_base = 2389 (char *)uiop->uio_iov->iov_base + tlen; 2390 uiop->uio_iov->iov_len -= tlen; 2391 uiop->uio_offset += tlen; 2392 uiop->uio_resid -= tlen; 2393 } else 2394 nfsm_adv(nfsm_rndup(len)); 2395 if (v3) { 2396 tl = nfsm_dissect(u_int32_t *, 2397 3 * NFSX_UNSIGNED); 2398 } else { 2399 tl = nfsm_dissect(u_int32_t *, 2400 2 * NFSX_UNSIGNED); 2401 } 2402 if (bigenough) { 2403 cookie.nfsuquad[0] = *tl++; 2404 if (v3) 2405 cookie.nfsuquad[1] = *tl++; 2406 } else if (v3) 2407 tl += 2; 2408 else 2409 tl++; 2410 more_dirs = fxdr_unsigned(int, *tl); 2411 } 2412 /* 2413 * If at end of rpc data, get the eof boolean 2414 */ 2415 if (!more_dirs) { 2416 tl = nfsm_dissect(u_int32_t *, NFSX_UNSIGNED); 2417 more_dirs = (fxdr_unsigned(int, *tl) == 0); 2418 } 2419 m_freem(mrep); 2420 } 2421 /* 2422 * Fill last record, iff any, out to a multiple of DIRBLKSIZ 2423 * by increasing d_reclen for the last record. 2424 */ 2425 if (blksiz > 0) { 2426 left = DIRBLKSIZ - blksiz; 2427 dp->d_reclen += left; 2428 uiop->uio_iov->iov_base = 2429 (char *)uiop->uio_iov->iov_base + left; 2430 uiop->uio_iov->iov_len -= left; 2431 uiop->uio_offset += left; 2432 uiop->uio_resid -= left; 2433 } 2434 2435 /* 2436 * We are now either at the end of the directory or have filled the 2437 * block. 2438 */ 2439 if (bigenough) 2440 dnp->n_direofoffset = uiop->uio_offset; 2441 else { 2442 if (uiop->uio_resid > 0) 2443 nfs_printf("EEK! readdirrpc resid > 0\n"); 2444 nfs_dircookie_lock(dnp); 2445 cookiep = nfs_getcookie(dnp, uiop->uio_offset, 1); 2446 *cookiep = cookie; 2447 nfs_dircookie_unlock(dnp); 2448 } 2449nfsmout: 2450 return (error); 2451} 2452 2453/* 2454 * NFS V3 readdir plus RPC. Used in place of nfs_readdirrpc(). 2455 */ 2456int 2457nfs_readdirplusrpc(struct vnode *vp, struct uio *uiop, struct ucred *cred) 2458{ 2459 int len, left; 2460 struct dirent *dp; 2461 u_int32_t *tl; 2462 caddr_t cp; 2463 struct vnode *newvp; 2464 nfsuint64 *cookiep; 2465 caddr_t bpos, dpos, dpossav1, dpossav2; 2466 struct mbuf *mreq, *mrep, *md, *mb, *mdsav1, *mdsav2; 2467 struct nameidata nami, *ndp = &nami; 2468 struct componentname *cnp = &ndp->ni_cnd; 2469 nfsuint64 cookie; 2470 struct nfsmount *nmp = VFSTONFS(vp->v_mount); 2471 struct nfsnode *dnp = VTONFS(vp), *np; 2472 nfsfh_t *fhp; 2473 u_quad_t fileno; 2474 int error = 0, tlen, more_dirs = 1, blksiz = 0, doit, bigenough = 1, i; 2475 int attrflag, fhsize; 2476 2477#ifndef nolint 2478 dp = NULL; 2479#endif 2480 KASSERT(uiop->uio_iovcnt == 1 && 2481 (uiop->uio_offset & (DIRBLKSIZ - 1)) == 0 && 2482 (uiop->uio_resid & (DIRBLKSIZ - 1)) == 0, 2483 ("nfs readdirplusrpc bad uio")); 2484 ndp->ni_dvp = vp; 2485 newvp = NULLVP; 2486 2487 /* 2488 * If there is no cookie, assume directory was stale. 2489 */ 2490 nfs_dircookie_lock(dnp); 2491 cookiep = nfs_getcookie(dnp, uiop->uio_offset, 0); 2492 if (cookiep) { 2493 cookie = *cookiep; 2494 nfs_dircookie_unlock(dnp); 2495 } else { 2496 nfs_dircookie_unlock(dnp); 2497 return (NFSERR_BAD_COOKIE); 2498 } 2499 /* 2500 * Loop around doing readdir rpc's of size nm_readdirsize 2501 * truncated to a multiple of DIRBLKSIZ. 2502 * The stopping criteria is EOF or buffer full. 2503 */ 2504 while (more_dirs && bigenough) { 2505 nfsstats.rpccnt[NFSPROC_READDIRPLUS]++; 2506 mreq = nfsm_reqhead(vp, NFSPROC_READDIRPLUS, 2507 NFSX_FH(1) + 6 * NFSX_UNSIGNED); 2508 mb = mreq; 2509 bpos = mtod(mb, caddr_t); 2510 nfsm_fhtom(vp, 1); 2511 tl = nfsm_build(u_int32_t *, 6 * NFSX_UNSIGNED); 2512 *tl++ = cookie.nfsuquad[0]; 2513 *tl++ = cookie.nfsuquad[1]; 2514 mtx_lock(&dnp->n_mtx); 2515 *tl++ = dnp->n_cookieverf.nfsuquad[0]; 2516 *tl++ = dnp->n_cookieverf.nfsuquad[1]; 2517 mtx_unlock(&dnp->n_mtx); 2518 *tl++ = txdr_unsigned(nmp->nm_readdirsize); 2519 *tl = txdr_unsigned(nmp->nm_rsize); 2520 nfsm_request(vp, NFSPROC_READDIRPLUS, uiop->uio_td, cred); 2521 nfsm_postop_attr(vp, attrflag); 2522 if (error) { 2523 m_freem(mrep); 2524 goto nfsmout; 2525 } 2526 tl = nfsm_dissect(u_int32_t *, 3 * NFSX_UNSIGNED); 2527 mtx_lock(&dnp->n_mtx); 2528 dnp->n_cookieverf.nfsuquad[0] = *tl++; 2529 dnp->n_cookieverf.nfsuquad[1] = *tl++; 2530 mtx_unlock(&dnp->n_mtx); 2531 more_dirs = fxdr_unsigned(int, *tl); 2532 2533 /* loop thru the dir entries, doctoring them to 4bsd form */ 2534 while (more_dirs && bigenough) { 2535 tl = nfsm_dissect(u_int32_t *, 3 * NFSX_UNSIGNED); 2536 fileno = fxdr_hyper(tl); 2537 len = fxdr_unsigned(int, *(tl + 2)); 2538 if (len <= 0 || len > NFS_MAXNAMLEN) { 2539 error = EBADRPC; 2540 m_freem(mrep); 2541 goto nfsmout; 2542 } 2543 tlen = nfsm_rndup(len); 2544 if (tlen == len) 2545 tlen += 4; /* To ensure null termination*/ 2546 left = DIRBLKSIZ - blksiz; 2547 if ((tlen + DIRHDSIZ) > left) { 2548 dp->d_reclen += left; 2549 uiop->uio_iov->iov_base = 2550 (char *)uiop->uio_iov->iov_base + left; 2551 uiop->uio_iov->iov_len -= left; 2552 uiop->uio_offset += left; 2553 uiop->uio_resid -= left; 2554 blksiz = 0; 2555 } 2556 if ((tlen + DIRHDSIZ) > uiop->uio_resid) 2557 bigenough = 0; 2558 if (bigenough) { 2559 dp = (struct dirent *)uiop->uio_iov->iov_base; 2560 dp->d_fileno = (int)fileno; 2561 dp->d_namlen = len; 2562 dp->d_reclen = tlen + DIRHDSIZ; 2563 dp->d_type = DT_UNKNOWN; 2564 blksiz += dp->d_reclen; 2565 if (blksiz == DIRBLKSIZ) 2566 blksiz = 0; 2567 uiop->uio_offset += DIRHDSIZ; 2568 uiop->uio_resid -= DIRHDSIZ; 2569 uiop->uio_iov->iov_base = 2570 (char *)uiop->uio_iov->iov_base + DIRHDSIZ; 2571 uiop->uio_iov->iov_len -= DIRHDSIZ; 2572 cnp->cn_nameptr = uiop->uio_iov->iov_base; 2573 cnp->cn_namelen = len; 2574 nfsm_mtouio(uiop, len); 2575 cp = uiop->uio_iov->iov_base; 2576 tlen -= len; 2577 *cp = '\0'; 2578 uiop->uio_iov->iov_base = 2579 (char *)uiop->uio_iov->iov_base + tlen; 2580 uiop->uio_iov->iov_len -= tlen; 2581 uiop->uio_offset += tlen; 2582 uiop->uio_resid -= tlen; 2583 } else 2584 nfsm_adv(nfsm_rndup(len)); 2585 tl = nfsm_dissect(u_int32_t *, 3 * NFSX_UNSIGNED); 2586 if (bigenough) { 2587 cookie.nfsuquad[0] = *tl++; 2588 cookie.nfsuquad[1] = *tl++; 2589 } else 2590 tl += 2; 2591 2592 /* 2593 * Since the attributes are before the file handle 2594 * (sigh), we must skip over the attributes and then 2595 * come back and get them. 2596 */ 2597 attrflag = fxdr_unsigned(int, *tl); 2598 if (attrflag) { 2599 dpossav1 = dpos; 2600 mdsav1 = md; 2601 nfsm_adv(NFSX_V3FATTR); 2602 tl = nfsm_dissect(u_int32_t *, NFSX_UNSIGNED); 2603 doit = fxdr_unsigned(int, *tl); 2604 /* 2605 * Skip loading the attrs for "..". There's a 2606 * race between loading the attrs here and 2607 * lookups that look for the directory currently 2608 * being read (in the parent). We try to acquire 2609 * the exclusive lock on ".." here, owning the 2610 * lock on the directory being read. Lookup will 2611 * hold the lock on ".." and try to acquire the 2612 * lock on the directory being read. 2613 * 2614 * There are other ways of fixing this, one would 2615 * be to do a trylock on the ".." vnode and skip 2616 * loading the attrs on ".." if it happens to be 2617 * locked by another process. But skipping the 2618 * attrload on ".." seems the easiest option. 2619 */ 2620 if (strcmp(dp->d_name, "..") == 0) { 2621 doit = 0; 2622 /* 2623 * We've already skipped over the attrs, 2624 * skip over the filehandle. And store d_type 2625 * as VDIR. 2626 */ 2627 tl = nfsm_dissect(u_int32_t *, NFSX_UNSIGNED); 2628 i = fxdr_unsigned(int, *tl); 2629 nfsm_adv(nfsm_rndup(i)); 2630 dp->d_type = IFTODT(VTTOIF(VDIR)); 2631 } 2632 if (doit) { 2633 nfsm_getfh(fhp, fhsize, 1); 2634 if (NFS_CMPFH(dnp, fhp, fhsize)) { 2635 VREF(vp); 2636 newvp = vp; 2637 np = dnp; 2638 } else { 2639 error = nfs_nget(vp->v_mount, fhp, 2640 fhsize, &np, LK_EXCLUSIVE); 2641 if (error) 2642 doit = 0; 2643 else 2644 newvp = NFSTOV(np); 2645 } 2646 } 2647 if (doit && bigenough) { 2648 dpossav2 = dpos; 2649 dpos = dpossav1; 2650 mdsav2 = md; 2651 md = mdsav1; 2652 nfsm_loadattr(newvp, NULL); 2653 dpos = dpossav2; 2654 md = mdsav2; 2655 dp->d_type = 2656 IFTODT(VTTOIF(np->n_vattr.va_type)); 2657 ndp->ni_vp = newvp; 2658 /* 2659 * Update n_ctime so subsequent lookup 2660 * doesn't purge entry. 2661 */ 2662 np->n_ctime = np->n_vattr.va_ctime; 2663 cache_enter(ndp->ni_dvp, ndp->ni_vp, cnp); 2664 } 2665 } else { 2666 /* Just skip over the file handle */ 2667 tl = nfsm_dissect(u_int32_t *, NFSX_UNSIGNED); 2668 i = fxdr_unsigned(int, *tl); 2669 if (i) { 2670 tl = nfsm_dissect(u_int32_t *, NFSX_UNSIGNED); 2671 fhsize = fxdr_unsigned(int, *tl); 2672 nfsm_adv(nfsm_rndup(fhsize)); 2673 } 2674 } 2675 if (newvp != NULLVP) { 2676 if (newvp == vp) 2677 vrele(newvp); 2678 else 2679 vput(newvp); 2680 newvp = NULLVP; 2681 } 2682 tl = nfsm_dissect(u_int32_t *, NFSX_UNSIGNED); 2683 more_dirs = fxdr_unsigned(int, *tl); 2684 } 2685 /* 2686 * If at end of rpc data, get the eof boolean 2687 */ 2688 if (!more_dirs) { 2689 tl = nfsm_dissect(u_int32_t *, NFSX_UNSIGNED); 2690 more_dirs = (fxdr_unsigned(int, *tl) == 0); 2691 } 2692 m_freem(mrep); 2693 } 2694 /* 2695 * Fill last record, iff any, out to a multiple of DIRBLKSIZ 2696 * by increasing d_reclen for the last record. 2697 */ 2698 if (blksiz > 0) { 2699 left = DIRBLKSIZ - blksiz; 2700 dp->d_reclen += left; 2701 uiop->uio_iov->iov_base = 2702 (char *)uiop->uio_iov->iov_base + left; 2703 uiop->uio_iov->iov_len -= left; 2704 uiop->uio_offset += left; 2705 uiop->uio_resid -= left; 2706 } 2707 2708 /* 2709 * We are now either at the end of the directory or have filled the 2710 * block. 2711 */ 2712 if (bigenough) 2713 dnp->n_direofoffset = uiop->uio_offset; 2714 else { 2715 if (uiop->uio_resid > 0) 2716 nfs_printf("EEK! readdirplusrpc resid > 0\n"); 2717 nfs_dircookie_lock(dnp); 2718 cookiep = nfs_getcookie(dnp, uiop->uio_offset, 1); 2719 *cookiep = cookie; 2720 nfs_dircookie_unlock(dnp); 2721 } 2722nfsmout: 2723 if (newvp != NULLVP) { 2724 if (newvp == vp) 2725 vrele(newvp); 2726 else 2727 vput(newvp); 2728 newvp = NULLVP; 2729 } 2730 return (error); 2731} 2732 2733/* 2734 * Silly rename. To make the NFS filesystem that is stateless look a little 2735 * more like the "ufs" a remove of an active vnode is translated to a rename 2736 * to a funny looking filename that is removed by nfs_inactive on the 2737 * nfsnode. There is the potential for another process on a different client 2738 * to create the same funny name between the nfs_lookitup() fails and the 2739 * nfs_rename() completes, but... 2740 */ 2741static int 2742nfs_sillyrename(struct vnode *dvp, struct vnode *vp, struct componentname *cnp) 2743{ 2744 struct sillyrename *sp; 2745 struct nfsnode *np; 2746 int error; 2747 short pid; 2748 unsigned int lticks; 2749 2750 cache_purge(dvp); 2751 np = VTONFS(vp); 2752 KASSERT(vp->v_type != VDIR, ("nfs: sillyrename dir")); 2753 sp = malloc(sizeof (struct sillyrename), 2754 M_NFSREQ, M_WAITOK); 2755 sp->s_cred = crhold(cnp->cn_cred); 2756 sp->s_dvp = dvp; 2757 sp->s_removeit = nfs_removeit; 2758 VREF(dvp); 2759 2760 /* 2761 * Fudge together a funny name. 2762 * Changing the format of the funny name to accomodate more 2763 * sillynames per directory. 2764 * The name is now changed to .nfs.<ticks>.<pid>.4, where ticks is 2765 * CPU ticks since boot. 2766 */ 2767 pid = cnp->cn_thread->td_proc->p_pid; 2768 lticks = (unsigned int)ticks; 2769 for ( ; ; ) { 2770 sp->s_namlen = sprintf(sp->s_name, 2771 ".nfs.%08x.%04x4.4", lticks, 2772 pid); 2773 if (nfs_lookitup(dvp, sp->s_name, sp->s_namlen, sp->s_cred, 2774 cnp->cn_thread, NULL)) 2775 break; 2776 lticks++; 2777 } 2778 error = nfs_renameit(dvp, cnp, sp); 2779 if (error) 2780 goto bad; 2781 error = nfs_lookitup(dvp, sp->s_name, sp->s_namlen, sp->s_cred, 2782 cnp->cn_thread, &np); 2783 np->n_sillyrename = sp; 2784 return (0); 2785bad: 2786 vrele(sp->s_dvp); 2787 crfree(sp->s_cred); 2788 free((caddr_t)sp, M_NFSREQ); 2789 return (error); 2790} 2791 2792/* 2793 * Look up a file name and optionally either update the file handle or 2794 * allocate an nfsnode, depending on the value of npp. 2795 * npp == NULL --> just do the lookup 2796 * *npp == NULL --> allocate a new nfsnode and make sure attributes are 2797 * handled too 2798 * *npp != NULL --> update the file handle in the vnode 2799 */ 2800static int 2801nfs_lookitup(struct vnode *dvp, const char *name, int len, struct ucred *cred, 2802 struct thread *td, struct nfsnode **npp) 2803{ 2804 struct vnode *newvp = NULL; 2805 struct nfsnode *np, *dnp = VTONFS(dvp); 2806 caddr_t bpos, dpos; 2807 int error = 0, fhlen, attrflag; 2808 struct mbuf *mreq, *mrep, *md, *mb; 2809 nfsfh_t *nfhp; 2810 int v3 = NFS_ISV3(dvp); 2811 2812 nfsstats.rpccnt[NFSPROC_LOOKUP]++; 2813 mreq = nfsm_reqhead(dvp, NFSPROC_LOOKUP, 2814 NFSX_FH(v3) + NFSX_UNSIGNED + nfsm_rndup(len)); 2815 mb = mreq; 2816 bpos = mtod(mb, caddr_t); 2817 nfsm_fhtom(dvp, v3); 2818 nfsm_strtom(name, len, NFS_MAXNAMLEN); 2819 nfsm_request(dvp, NFSPROC_LOOKUP, td, cred); 2820 if (npp && !error) { 2821 nfsm_getfh(nfhp, fhlen, v3); 2822 if (*npp) { 2823 np = *npp; 2824 if (np->n_fhsize > NFS_SMALLFH && fhlen <= NFS_SMALLFH) { 2825 free((caddr_t)np->n_fhp, M_NFSBIGFH); 2826 np->n_fhp = &np->n_fh; 2827 } else if (np->n_fhsize <= NFS_SMALLFH && fhlen>NFS_SMALLFH) 2828 np->n_fhp =(nfsfh_t *)malloc(fhlen, M_NFSBIGFH, M_WAITOK); 2829 bcopy((caddr_t)nfhp, (caddr_t)np->n_fhp, fhlen); 2830 np->n_fhsize = fhlen; 2831 newvp = NFSTOV(np); 2832 } else if (NFS_CMPFH(dnp, nfhp, fhlen)) { 2833 VREF(dvp); 2834 newvp = dvp; 2835 } else { 2836 error = nfs_nget(dvp->v_mount, nfhp, fhlen, &np, LK_EXCLUSIVE); 2837 if (error) { 2838 m_freem(mrep); 2839 return (error); 2840 } 2841 newvp = NFSTOV(np); 2842 } 2843 if (v3) { 2844 nfsm_postop_attr(newvp, attrflag); 2845 if (!attrflag && *npp == NULL) { 2846 m_freem(mrep); 2847 if (newvp == dvp) 2848 vrele(newvp); 2849 else 2850 vput(newvp); 2851 return (ENOENT); 2852 } 2853 } else 2854 nfsm_loadattr(newvp, NULL); 2855 } 2856 m_freem(mrep); 2857nfsmout: 2858 if (npp && *npp == NULL) { 2859 if (error) { 2860 if (newvp) { 2861 if (newvp == dvp) 2862 vrele(newvp); 2863 else 2864 vput(newvp); 2865 } 2866 } else 2867 *npp = np; 2868 } 2869 return (error); 2870} 2871 2872/* 2873 * Nfs Version 3 commit rpc 2874 */ 2875int 2876nfs_commit(struct vnode *vp, u_quad_t offset, int cnt, struct ucred *cred, 2877 struct thread *td) 2878{ 2879 u_int32_t *tl; 2880 struct nfsmount *nmp = VFSTONFS(vp->v_mount); 2881 caddr_t bpos, dpos; 2882 int error = 0, wccflag = NFSV3_WCCRATTR; 2883 struct mbuf *mreq, *mrep, *md, *mb; 2884 2885 mtx_lock(&nmp->nm_mtx); 2886 if ((nmp->nm_state & NFSSTA_HASWRITEVERF) == 0) { 2887 mtx_unlock(&nmp->nm_mtx); 2888 return (0); 2889 } 2890 mtx_unlock(&nmp->nm_mtx); 2891 nfsstats.rpccnt[NFSPROC_COMMIT]++; 2892 mreq = nfsm_reqhead(vp, NFSPROC_COMMIT, NFSX_FH(1)); 2893 mb = mreq; 2894 bpos = mtod(mb, caddr_t); 2895 nfsm_fhtom(vp, 1); 2896 tl = nfsm_build(u_int32_t *, 3 * NFSX_UNSIGNED); 2897 txdr_hyper(offset, tl); 2898 tl += 2; 2899 *tl = txdr_unsigned(cnt); 2900 nfsm_request(vp, NFSPROC_COMMIT, td, cred); 2901 nfsm_wcc_data(vp, wccflag); 2902 if (!error) { 2903 tl = nfsm_dissect(u_int32_t *, NFSX_V3WRITEVERF); 2904 if (bcmp((caddr_t)nmp->nm_verf, (caddr_t)tl, 2905 NFSX_V3WRITEVERF)) { 2906 bcopy((caddr_t)tl, (caddr_t)nmp->nm_verf, 2907 NFSX_V3WRITEVERF); 2908 error = NFSERR_STALEWRITEVERF; 2909 } 2910 } 2911 m_freem(mrep); 2912nfsmout: 2913 return (error); 2914} 2915 2916/* 2917 * Strategy routine. 2918 * For async requests when nfsiod(s) are running, queue the request by 2919 * calling nfs_asyncio(), otherwise just all nfs_doio() to do the 2920 * request. 2921 */ 2922static int 2923nfs_strategy(struct vop_strategy_args *ap) 2924{ 2925 struct buf *bp = ap->a_bp; 2926 struct ucred *cr; 2927 2928 KASSERT(!(bp->b_flags & B_DONE), 2929 ("nfs_strategy: buffer %p unexpectedly marked B_DONE", bp)); 2930 BUF_ASSERT_HELD(bp); 2931 2932 if (bp->b_iocmd == BIO_READ) 2933 cr = bp->b_rcred; 2934 else 2935 cr = bp->b_wcred; 2936 2937 /* 2938 * If the op is asynchronous and an i/o daemon is waiting 2939 * queue the request, wake it up and wait for completion 2940 * otherwise just do it ourselves. 2941 */ 2942 if ((bp->b_flags & B_ASYNC) == 0 || 2943 nfs_asyncio(VFSTONFS(ap->a_vp->v_mount), bp, NOCRED, curthread)) 2944 (void)nfs_doio(ap->a_vp, bp, cr, curthread); 2945 return (0); 2946} 2947 2948/* 2949 * fsync vnode op. Just call nfs_flush() with commit == 1. 2950 */ 2951/* ARGSUSED */ 2952static int 2953nfs_fsync(struct vop_fsync_args *ap) 2954{ 2955 2956 return (nfs_flush(ap->a_vp, ap->a_waitfor, 1)); 2957} 2958 2959/* 2960 * Flush all the blocks associated with a vnode. 2961 * Walk through the buffer pool and push any dirty pages 2962 * associated with the vnode. 2963 */ 2964static int 2965nfs_flush(struct vnode *vp, int waitfor, int commit) 2966{ 2967 struct nfsnode *np = VTONFS(vp); 2968 struct buf *bp; 2969 int i; 2970 struct buf *nbp; 2971 struct nfsmount *nmp = VFSTONFS(vp->v_mount); 2972 int error = 0, slptimeo = 0, slpflag = 0, retv, bvecpos; 2973 int passone = 1; 2974 u_quad_t off, endoff, toff; 2975 struct ucred* wcred = NULL; 2976 struct buf **bvec = NULL; 2977 struct bufobj *bo; 2978 struct thread *td = curthread; 2979#ifndef NFS_COMMITBVECSIZ 2980#define NFS_COMMITBVECSIZ 20 2981#endif 2982 struct buf *bvec_on_stack[NFS_COMMITBVECSIZ]; 2983 int bvecsize = 0, bveccount; 2984 2985 if (nmp->nm_flag & NFSMNT_INT) 2986 slpflag = NFS_PCATCH; 2987 if (!commit) 2988 passone = 0; 2989 bo = &vp->v_bufobj; 2990 /* 2991 * A b_flags == (B_DELWRI | B_NEEDCOMMIT) block has been written to the 2992 * server, but has not been committed to stable storage on the server 2993 * yet. On the first pass, the byte range is worked out and the commit 2994 * rpc is done. On the second pass, nfs_writebp() is called to do the 2995 * job. 2996 */ 2997again: 2998 off = (u_quad_t)-1; 2999 endoff = 0; 3000 bvecpos = 0; 3001 if (NFS_ISV3(vp) && commit) { 3002 if (bvec != NULL && bvec != bvec_on_stack) 3003 free(bvec, M_TEMP); 3004 /* 3005 * Count up how many buffers waiting for a commit. 3006 */ 3007 bveccount = 0; 3008 BO_LOCK(bo); 3009 TAILQ_FOREACH_SAFE(bp, &bo->bo_dirty.bv_hd, b_bobufs, nbp) { 3010 if (!BUF_ISLOCKED(bp) && 3011 (bp->b_flags & (B_DELWRI | B_NEEDCOMMIT)) 3012 == (B_DELWRI | B_NEEDCOMMIT)) 3013 bveccount++; 3014 } 3015 /* 3016 * Allocate space to remember the list of bufs to commit. It is 3017 * important to use M_NOWAIT here to avoid a race with nfs_write. 3018 * If we can't get memory (for whatever reason), we will end up 3019 * committing the buffers one-by-one in the loop below. 3020 */ 3021 if (bveccount > NFS_COMMITBVECSIZ) { 3022 /* 3023 * Release the vnode interlock to avoid a lock 3024 * order reversal. 3025 */ 3026 BO_UNLOCK(bo); 3027 bvec = (struct buf **) 3028 malloc(bveccount * sizeof(struct buf *), 3029 M_TEMP, M_NOWAIT); 3030 BO_LOCK(bo); 3031 if (bvec == NULL) { 3032 bvec = bvec_on_stack; 3033 bvecsize = NFS_COMMITBVECSIZ; 3034 } else 3035 bvecsize = bveccount; 3036 } else { 3037 bvec = bvec_on_stack; 3038 bvecsize = NFS_COMMITBVECSIZ; 3039 } 3040 TAILQ_FOREACH_SAFE(bp, &bo->bo_dirty.bv_hd, b_bobufs, nbp) { 3041 if (bvecpos >= bvecsize) 3042 break; 3043 if (BUF_LOCK(bp, LK_EXCLUSIVE | LK_NOWAIT, NULL)) { 3044 nbp = TAILQ_NEXT(bp, b_bobufs); 3045 continue; 3046 } 3047 if ((bp->b_flags & (B_DELWRI | B_NEEDCOMMIT)) != 3048 (B_DELWRI | B_NEEDCOMMIT)) { 3049 BUF_UNLOCK(bp); 3050 nbp = TAILQ_NEXT(bp, b_bobufs); 3051 continue; 3052 } 3053 BO_UNLOCK(bo); 3054 bremfree(bp); 3055 /* 3056 * Work out if all buffers are using the same cred 3057 * so we can deal with them all with one commit. 3058 * 3059 * NOTE: we are not clearing B_DONE here, so we have 3060 * to do it later on in this routine if we intend to 3061 * initiate I/O on the bp. 3062 * 3063 * Note: to avoid loopback deadlocks, we do not 3064 * assign b_runningbufspace. 3065 */ 3066 if (wcred == NULL) 3067 wcred = bp->b_wcred; 3068 else if (wcred != bp->b_wcred) 3069 wcred = NOCRED; 3070 vfs_busy_pages(bp, 1); 3071 3072 BO_LOCK(bo); 3073 /* 3074 * bp is protected by being locked, but nbp is not 3075 * and vfs_busy_pages() may sleep. We have to 3076 * recalculate nbp. 3077 */ 3078 nbp = TAILQ_NEXT(bp, b_bobufs); 3079 3080 /* 3081 * A list of these buffers is kept so that the 3082 * second loop knows which buffers have actually 3083 * been committed. This is necessary, since there 3084 * may be a race between the commit rpc and new 3085 * uncommitted writes on the file. 3086 */ 3087 bvec[bvecpos++] = bp; 3088 toff = ((u_quad_t)bp->b_blkno) * DEV_BSIZE + 3089 bp->b_dirtyoff; 3090 if (toff < off) 3091 off = toff; 3092 toff += (u_quad_t)(bp->b_dirtyend - bp->b_dirtyoff); 3093 if (toff > endoff) 3094 endoff = toff; 3095 } 3096 BO_UNLOCK(bo); 3097 } 3098 if (bvecpos > 0) { 3099 /* 3100 * Commit data on the server, as required. 3101 * If all bufs are using the same wcred, then use that with 3102 * one call for all of them, otherwise commit each one 3103 * separately. 3104 */ 3105 if (wcred != NOCRED) 3106 retv = nfs_commit(vp, off, (int)(endoff - off), 3107 wcred, td); 3108 else { 3109 retv = 0; 3110 for (i = 0; i < bvecpos; i++) { 3111 off_t off, size; 3112 bp = bvec[i]; 3113 off = ((u_quad_t)bp->b_blkno) * DEV_BSIZE + 3114 bp->b_dirtyoff; 3115 size = (u_quad_t)(bp->b_dirtyend 3116 - bp->b_dirtyoff); 3117 retv = nfs_commit(vp, off, (int)size, 3118 bp->b_wcred, td); 3119 if (retv) break; 3120 } 3121 } 3122 3123 if (retv == NFSERR_STALEWRITEVERF) 3124 nfs_clearcommit(vp->v_mount); 3125 3126 /* 3127 * Now, either mark the blocks I/O done or mark the 3128 * blocks dirty, depending on whether the commit 3129 * succeeded. 3130 */ 3131 for (i = 0; i < bvecpos; i++) { 3132 bp = bvec[i]; 3133 bp->b_flags &= ~(B_NEEDCOMMIT | B_CLUSTEROK); 3134 if (retv) { 3135 /* 3136 * Error, leave B_DELWRI intact 3137 */ 3138 vfs_unbusy_pages(bp); 3139 brelse(bp); 3140 } else { 3141 /* 3142 * Success, remove B_DELWRI ( bundirty() ). 3143 * 3144 * b_dirtyoff/b_dirtyend seem to be NFS 3145 * specific. We should probably move that 3146 * into bundirty(). XXX 3147 */ 3148 bufobj_wref(bo); 3149 bp->b_flags |= B_ASYNC; 3150 bundirty(bp); 3151 bp->b_flags &= ~B_DONE; 3152 bp->b_ioflags &= ~BIO_ERROR; 3153 bp->b_dirtyoff = bp->b_dirtyend = 0; 3154 bufdone(bp); 3155 } 3156 } 3157 } 3158 3159 /* 3160 * Start/do any write(s) that are required. 3161 */ 3162loop: 3163 BO_LOCK(bo); 3164 TAILQ_FOREACH_SAFE(bp, &bo->bo_dirty.bv_hd, b_bobufs, nbp) { 3165 if (BUF_LOCK(bp, LK_EXCLUSIVE | LK_NOWAIT, NULL)) { 3166 if (waitfor != MNT_WAIT || passone) 3167 continue; 3168 3169 error = BUF_TIMELOCK(bp, 3170 LK_EXCLUSIVE | LK_SLEEPFAIL | LK_INTERLOCK, 3171 BO_MTX(bo), "nfsfsync", slpflag, slptimeo); 3172 if (error == 0) { 3173 BUF_UNLOCK(bp); 3174 goto loop; 3175 } 3176 if (error == ENOLCK) { 3177 error = 0; 3178 goto loop; 3179 } 3180 if (nfs_sigintr(nmp, td)) { 3181 error = EINTR; 3182 goto done; 3183 } 3184 if (slpflag & PCATCH) { 3185 slpflag = 0; 3186 slptimeo = 2 * hz; 3187 } 3188 goto loop; 3189 } 3190 if ((bp->b_flags & B_DELWRI) == 0) 3191 panic("nfs_fsync: not dirty"); 3192 if ((passone || !commit) && (bp->b_flags & B_NEEDCOMMIT)) { 3193 BUF_UNLOCK(bp); 3194 continue; 3195 } 3196 BO_UNLOCK(bo); 3197 bremfree(bp); 3198 if (passone || !commit) 3199 bp->b_flags |= B_ASYNC; 3200 else 3201 bp->b_flags |= B_ASYNC; 3202 bwrite(bp); 3203 if (nfs_sigintr(nmp, td)) { 3204 error = EINTR; 3205 goto done; 3206 } 3207 goto loop; 3208 } 3209 if (passone) { 3210 passone = 0; 3211 BO_UNLOCK(bo); 3212 goto again; 3213 } 3214 if (waitfor == MNT_WAIT) { 3215 while (bo->bo_numoutput) { 3216 error = bufobj_wwait(bo, slpflag, slptimeo); 3217 if (error) { 3218 BO_UNLOCK(bo); 3219 error = nfs_sigintr(nmp, td); 3220 if (error) 3221 goto done; 3222 if (slpflag & PCATCH) { 3223 slpflag = 0; 3224 slptimeo = 2 * hz; 3225 } 3226 BO_LOCK(bo); 3227 } 3228 } 3229 if (bo->bo_dirty.bv_cnt != 0 && commit) { 3230 BO_UNLOCK(bo); 3231 goto loop; 3232 } 3233 /* 3234 * Wait for all the async IO requests to drain 3235 */ 3236 BO_UNLOCK(bo); 3237 mtx_lock(&np->n_mtx); 3238 while (np->n_directio_asyncwr > 0) { 3239 np->n_flag |= NFSYNCWAIT; 3240 error = nfs_msleep(td, (caddr_t)&np->n_directio_asyncwr, 3241 &np->n_mtx, slpflag | (PRIBIO + 1), 3242 "nfsfsync", 0); 3243 if (error) { 3244 if (nfs_sigintr(nmp, td)) { 3245 mtx_unlock(&np->n_mtx); 3246 error = EINTR; 3247 goto done; 3248 } 3249 } 3250 } 3251 mtx_unlock(&np->n_mtx); 3252 } else 3253 BO_UNLOCK(bo); 3254 mtx_lock(&np->n_mtx); 3255 if (np->n_flag & NWRITEERR) { 3256 error = np->n_error; 3257 np->n_flag &= ~NWRITEERR; 3258 } 3259 if (commit && bo->bo_dirty.bv_cnt == 0 && 3260 bo->bo_numoutput == 0 && np->n_directio_asyncwr == 0) 3261 np->n_flag &= ~NMODIFIED; 3262 mtx_unlock(&np->n_mtx); 3263done: 3264 if (bvec != NULL && bvec != bvec_on_stack) 3265 free(bvec, M_TEMP); 3266 return (error); 3267} 3268 3269/* 3270 * NFS advisory byte-level locks. 3271 */ 3272static int 3273nfs_advlock(struct vop_advlock_args *ap) 3274{ 3275 struct vnode *vp = ap->a_vp; 3276 u_quad_t size; 3277 int error; 3278 3279 error = vn_lock(vp, LK_SHARED); 3280 if (error) 3281 return (error); 3282 if ((VFSTONFS(vp->v_mount)->nm_flag & NFSMNT_NOLOCKD) != 0) { 3283 size = VTONFS(vp)->n_size; 3284 VOP_UNLOCK(vp, 0); 3285 error = lf_advlock(ap, &(vp->v_lockf), size); 3286 } else { 3287 if (nfs_advlock_p) 3288 error = nfs_advlock_p(ap); 3289 else 3290 error = ENOLCK; 3291 } 3292 3293 return (error); 3294} 3295 3296/* 3297 * NFS advisory byte-level locks. 3298 */ 3299static int 3300nfs_advlockasync(struct vop_advlockasync_args *ap) 3301{ 3302 struct vnode *vp = ap->a_vp; 3303 u_quad_t size; 3304 int error; 3305 3306 error = vn_lock(vp, LK_SHARED); 3307 if (error) 3308 return (error); 3309 if ((VFSTONFS(vp->v_mount)->nm_flag & NFSMNT_NOLOCKD) != 0) { 3310 size = VTONFS(vp)->n_size; 3311 VOP_UNLOCK(vp, 0); 3312 error = lf_advlockasync(ap, &(vp->v_lockf), size); 3313 } else { 3314 VOP_UNLOCK(vp, 0); 3315 error = EOPNOTSUPP; 3316 } 3317 return (error); 3318} 3319 3320/* 3321 * Print out the contents of an nfsnode. 3322 */ 3323static int 3324nfs_print(struct vop_print_args *ap) 3325{ 3326 struct vnode *vp = ap->a_vp; 3327 struct nfsnode *np = VTONFS(vp); 3328 3329 nfs_printf("\tfileid %ld fsid 0x%x", 3330 np->n_vattr.va_fileid, np->n_vattr.va_fsid); 3331 if (vp->v_type == VFIFO) 3332 fifo_printinfo(vp); 3333 printf("\n"); 3334 return (0); 3335} 3336 3337/* 3338 * This is the "real" nfs::bwrite(struct buf*). 3339 * We set B_CACHE if this is a VMIO buffer. 3340 */ 3341int 3342nfs_writebp(struct buf *bp, int force __unused, struct thread *td) 3343{ 3344 int s; 3345 int oldflags = bp->b_flags; 3346#if 0 3347 int retv = 1; 3348 off_t off; 3349#endif 3350 3351 BUF_ASSERT_HELD(bp); 3352 3353 if (bp->b_flags & B_INVAL) { 3354 brelse(bp); 3355 return(0); 3356 } 3357 3358 bp->b_flags |= B_CACHE; 3359 3360 /* 3361 * Undirty the bp. We will redirty it later if the I/O fails. 3362 */ 3363 3364 s = splbio(); 3365 bundirty(bp); 3366 bp->b_flags &= ~B_DONE; 3367 bp->b_ioflags &= ~BIO_ERROR; 3368 bp->b_iocmd = BIO_WRITE; 3369 3370 bufobj_wref(bp->b_bufobj); 3371 curthread->td_ru.ru_oublock++; 3372 splx(s); 3373 3374 /* 3375 * Note: to avoid loopback deadlocks, we do not 3376 * assign b_runningbufspace. 3377 */ 3378 vfs_busy_pages(bp, 1); 3379 3380 BUF_KERNPROC(bp); 3381 bp->b_iooffset = dbtob(bp->b_blkno); 3382 bstrategy(bp); 3383 3384 if( (oldflags & B_ASYNC) == 0) { 3385 int rtval = bufwait(bp); 3386 3387 if (oldflags & B_DELWRI) { 3388 s = splbio(); 3389 reassignbuf(bp); 3390 splx(s); 3391 } 3392 brelse(bp); 3393 return (rtval); 3394 } 3395 3396 return (0); 3397} 3398 3399/* 3400 * nfs special file access vnode op. 3401 * Essentially just get vattr and then imitate iaccess() since the device is 3402 * local to the client. 3403 */ 3404static int 3405nfsspec_access(struct vop_access_args *ap) 3406{ 3407 struct vattr *vap; 3408 struct ucred *cred = ap->a_cred; 3409 struct vnode *vp = ap->a_vp; 3410 accmode_t accmode = ap->a_accmode; 3411 struct vattr vattr; 3412 int error; 3413 3414 /* 3415 * Disallow write attempts on filesystems mounted read-only; 3416 * unless the file is a socket, fifo, or a block or character 3417 * device resident on the filesystem. 3418 */ 3419 if ((accmode & VWRITE) && (vp->v_mount->mnt_flag & MNT_RDONLY)) { 3420 switch (vp->v_type) { 3421 case VREG: 3422 case VDIR: 3423 case VLNK: 3424 return (EROFS); 3425 default: 3426 break; 3427 } 3428 } 3429 vap = &vattr; 3430 error = VOP_GETATTR(vp, vap, cred); 3431 if (error) 3432 goto out; 3433 error = vaccess(vp->v_type, vap->va_mode, vap->va_uid, vap->va_gid, 3434 accmode, cred, NULL); 3435out: 3436 return error; 3437} 3438 3439/* 3440 * Read wrapper for fifos. 3441 */ 3442static int 3443nfsfifo_read(struct vop_read_args *ap) 3444{ 3445 struct nfsnode *np = VTONFS(ap->a_vp); 3446 int error; 3447 3448 /* 3449 * Set access flag. 3450 */ 3451 mtx_lock(&np->n_mtx); 3452 np->n_flag |= NACC; 3453 getnanotime(&np->n_atim); 3454 mtx_unlock(&np->n_mtx); 3455 error = fifo_specops.vop_read(ap); 3456 return error; 3457} 3458 3459/* 3460 * Write wrapper for fifos. 3461 */ 3462static int 3463nfsfifo_write(struct vop_write_args *ap) 3464{ 3465 struct nfsnode *np = VTONFS(ap->a_vp); 3466 3467 /* 3468 * Set update flag. 3469 */ 3470 mtx_lock(&np->n_mtx); 3471 np->n_flag |= NUPD; 3472 getnanotime(&np->n_mtim); 3473 mtx_unlock(&np->n_mtx); 3474 return(fifo_specops.vop_write(ap)); 3475} 3476 3477/* 3478 * Close wrapper for fifos. 3479 * 3480 * Update the times on the nfsnode then do fifo close. 3481 */ 3482static int 3483nfsfifo_close(struct vop_close_args *ap) 3484{ 3485 struct vnode *vp = ap->a_vp; 3486 struct nfsnode *np = VTONFS(vp); 3487 struct vattr vattr; 3488 struct timespec ts; 3489 3490 mtx_lock(&np->n_mtx); 3491 if (np->n_flag & (NACC | NUPD)) { 3492 getnanotime(&ts); 3493 if (np->n_flag & NACC) 3494 np->n_atim = ts; 3495 if (np->n_flag & NUPD) 3496 np->n_mtim = ts; 3497 np->n_flag |= NCHG; 3498 if (vrefcnt(vp) == 1 && 3499 (vp->v_mount->mnt_flag & MNT_RDONLY) == 0) { 3500 VATTR_NULL(&vattr); 3501 if (np->n_flag & NACC) 3502 vattr.va_atime = np->n_atim; 3503 if (np->n_flag & NUPD) 3504 vattr.va_mtime = np->n_mtim; 3505 mtx_unlock(&np->n_mtx); 3506 (void)VOP_SETATTR(vp, &vattr, ap->a_cred); 3507 goto out; 3508 } 3509 } 3510 mtx_unlock(&np->n_mtx); 3511out: 3512 return (fifo_specops.vop_close(ap)); 3513} 3514 3515/* 3516 * Just call nfs_writebp() with the force argument set to 1. 3517 * 3518 * NOTE: B_DONE may or may not be set in a_bp on call. 3519 */ 3520static int 3521nfs_bwrite(struct buf *bp) 3522{ 3523 3524 return (nfs_writebp(bp, 1, curthread)); 3525} 3526 3527struct buf_ops buf_ops_nfs = { 3528 .bop_name = "buf_ops_nfs", 3529 .bop_write = nfs_bwrite, 3530 .bop_strategy = bufstrategy, 3531 .bop_sync = bufsync, 3532 .bop_bdflush = bufbdflush, 3533}; 3534