nfs_vnops.c revision 195203
1/*- 2 * Copyright (c) 1989, 1993 3 * The Regents of the University of California. All rights reserved. 4 * 5 * This code is derived from software contributed to Berkeley by 6 * Rick Macklem at The University of Guelph. 7 * 8 * Redistribution and use in source and binary forms, with or without 9 * modification, are permitted provided that the following conditions 10 * are met: 11 * 1. Redistributions of source code must retain the above copyright 12 * notice, this list of conditions and the following disclaimer. 13 * 2. Redistributions in binary form must reproduce the above copyright 14 * notice, this list of conditions and the following disclaimer in the 15 * documentation and/or other materials provided with the distribution. 16 * 4. Neither the name of the University nor the names of its contributors 17 * may be used to endorse or promote products derived from this software 18 * without specific prior written permission. 19 * 20 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 21 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 22 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 23 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 24 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 25 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 26 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 27 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 28 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 29 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 30 * SUCH DAMAGE. 31 * 32 * @(#)nfs_vnops.c 8.16 (Berkeley) 5/27/95 33 */ 34 35#include <sys/cdefs.h> 36__FBSDID("$FreeBSD: head/sys/nfsclient/nfs_vnops.c 195203 2009-06-30 19:10:17Z dfr $"); 37 38/* 39 * vnode op calls for Sun NFS version 2 and 3 40 */ 41 42#include "opt_inet.h" 43#include "opt_kdtrace.h" 44 45#include <sys/param.h> 46#include <sys/kernel.h> 47#include <sys/systm.h> 48#include <sys/resourcevar.h> 49#include <sys/proc.h> 50#include <sys/mount.h> 51#include <sys/bio.h> 52#include <sys/buf.h> 53#include <sys/malloc.h> 54#include <sys/mbuf.h> 55#include <sys/namei.h> 56#include <sys/socket.h> 57#include <sys/vnode.h> 58#include <sys/dirent.h> 59#include <sys/fcntl.h> 60#include <sys/lockf.h> 61#include <sys/stat.h> 62#include <sys/sysctl.h> 63#include <sys/signalvar.h> 64#include <sys/vimage.h> 65 66#include <vm/vm.h> 67#include <vm/vm_object.h> 68#include <vm/vm_extern.h> 69#include <vm/vm_object.h> 70 71#include <fs/fifofs/fifo.h> 72 73#include <nfs/nfsproto.h> 74#include <nfsclient/nfs.h> 75#include <nfsclient/nfsnode.h> 76#include <nfsclient/nfsmount.h> 77#include <nfsclient/nfs_kdtrace.h> 78#include <nfsclient/nfs_lock.h> 79#include <nfs/xdr_subs.h> 80#include <nfsclient/nfsm_subs.h> 81 82#include <net/if.h> 83#include <netinet/in.h> 84#include <netinet/in_var.h> 85#include <netinet/vinet.h> 86 87#include <machine/stdarg.h> 88 89#ifdef KDTRACE_HOOKS 90#include <sys/dtrace_bsd.h> 91 92dtrace_nfsclient_accesscache_flush_probe_func_t 93 dtrace_nfsclient_accesscache_flush_done_probe; 94uint32_t nfsclient_accesscache_flush_done_id; 95 96dtrace_nfsclient_accesscache_get_probe_func_t 97 dtrace_nfsclient_accesscache_get_hit_probe, 98 dtrace_nfsclient_accesscache_get_miss_probe; 99uint32_t nfsclient_accesscache_get_hit_id; 100uint32_t nfsclient_accesscache_get_miss_id; 101 102dtrace_nfsclient_accesscache_load_probe_func_t 103 dtrace_nfsclient_accesscache_load_done_probe; 104uint32_t nfsclient_accesscache_load_done_id; 105#endif /* !KDTRACE_HOOKS */ 106 107/* Defs */ 108#define TRUE 1 109#define FALSE 0 110 111/* 112 * Ifdef for FreeBSD-current merged buffer cache. It is unfortunate that these 113 * calls are not in getblk() and brelse() so that they would not be necessary 114 * here. 115 */ 116#ifndef B_VMIO 117#define vfs_busy_pages(bp, f) 118#endif 119 120static vop_read_t nfsfifo_read; 121static vop_write_t nfsfifo_write; 122static vop_close_t nfsfifo_close; 123static int nfs_flush(struct vnode *, int, int); 124static int nfs_setattrrpc(struct vnode *, struct vattr *, struct ucred *); 125static vop_lookup_t nfs_lookup; 126static vop_create_t nfs_create; 127static vop_mknod_t nfs_mknod; 128static vop_open_t nfs_open; 129static vop_close_t nfs_close; 130static vop_access_t nfs_access; 131static vop_getattr_t nfs_getattr; 132static vop_setattr_t nfs_setattr; 133static vop_read_t nfs_read; 134static vop_fsync_t nfs_fsync; 135static vop_remove_t nfs_remove; 136static vop_link_t nfs_link; 137static vop_rename_t nfs_rename; 138static vop_mkdir_t nfs_mkdir; 139static vop_rmdir_t nfs_rmdir; 140static vop_symlink_t nfs_symlink; 141static vop_readdir_t nfs_readdir; 142static vop_strategy_t nfs_strategy; 143static int nfs_lookitup(struct vnode *, const char *, int, 144 struct ucred *, struct thread *, struct nfsnode **); 145static int nfs_sillyrename(struct vnode *, struct vnode *, 146 struct componentname *); 147static vop_access_t nfsspec_access; 148static vop_readlink_t nfs_readlink; 149static vop_print_t nfs_print; 150static vop_advlock_t nfs_advlock; 151static vop_advlockasync_t nfs_advlockasync; 152 153/* 154 * Global vfs data structures for nfs 155 */ 156struct vop_vector nfs_vnodeops = { 157 .vop_default = &default_vnodeops, 158 .vop_access = nfs_access, 159 .vop_advlock = nfs_advlock, 160 .vop_advlockasync = nfs_advlockasync, 161 .vop_close = nfs_close, 162 .vop_create = nfs_create, 163 .vop_fsync = nfs_fsync, 164 .vop_getattr = nfs_getattr, 165 .vop_getpages = nfs_getpages, 166 .vop_putpages = nfs_putpages, 167 .vop_inactive = nfs_inactive, 168 .vop_link = nfs_link, 169 .vop_lookup = nfs_lookup, 170 .vop_mkdir = nfs_mkdir, 171 .vop_mknod = nfs_mknod, 172 .vop_open = nfs_open, 173 .vop_print = nfs_print, 174 .vop_read = nfs_read, 175 .vop_readdir = nfs_readdir, 176 .vop_readlink = nfs_readlink, 177 .vop_reclaim = nfs_reclaim, 178 .vop_remove = nfs_remove, 179 .vop_rename = nfs_rename, 180 .vop_rmdir = nfs_rmdir, 181 .vop_setattr = nfs_setattr, 182 .vop_strategy = nfs_strategy, 183 .vop_symlink = nfs_symlink, 184 .vop_write = nfs_write, 185}; 186 187struct vop_vector nfs_fifoops = { 188 .vop_default = &fifo_specops, 189 .vop_access = nfsspec_access, 190 .vop_close = nfsfifo_close, 191 .vop_fsync = nfs_fsync, 192 .vop_getattr = nfs_getattr, 193 .vop_inactive = nfs_inactive, 194 .vop_print = nfs_print, 195 .vop_read = nfsfifo_read, 196 .vop_reclaim = nfs_reclaim, 197 .vop_setattr = nfs_setattr, 198 .vop_write = nfsfifo_write, 199}; 200 201static int nfs_mknodrpc(struct vnode *dvp, struct vnode **vpp, 202 struct componentname *cnp, struct vattr *vap); 203static int nfs_removerpc(struct vnode *dvp, const char *name, int namelen, 204 struct ucred *cred, struct thread *td); 205static int nfs_renamerpc(struct vnode *fdvp, const char *fnameptr, 206 int fnamelen, struct vnode *tdvp, 207 const char *tnameptr, int tnamelen, 208 struct ucred *cred, struct thread *td); 209static int nfs_renameit(struct vnode *sdvp, struct componentname *scnp, 210 struct sillyrename *sp); 211 212/* 213 * Global variables 214 */ 215struct mtx nfs_iod_mtx; 216struct proc *nfs_iodwant[NFS_MAXASYNCDAEMON]; 217struct nfsmount *nfs_iodmount[NFS_MAXASYNCDAEMON]; 218int nfs_numasync = 0; 219vop_advlock_t *nfs_advlock_p = nfs_dolock; 220vop_reclaim_t *nfs_reclaim_p = NULL; 221#define DIRHDSIZ (sizeof (struct dirent) - (MAXNAMLEN + 1)) 222 223SYSCTL_DECL(_vfs_nfs); 224 225static int nfsaccess_cache_timeout = NFS_MAXATTRTIMO; 226SYSCTL_INT(_vfs_nfs, OID_AUTO, access_cache_timeout, CTLFLAG_RW, 227 &nfsaccess_cache_timeout, 0, "NFS ACCESS cache timeout"); 228 229static int nfs_prime_access_cache = 0; 230SYSCTL_INT(_vfs_nfs, OID_AUTO, prime_access_cache, CTLFLAG_RW, 231 &nfs_prime_access_cache, 0, 232 "Prime NFS ACCESS cache when fetching attributes"); 233 234static int nfsv3_commit_on_close = 0; 235SYSCTL_INT(_vfs_nfs, OID_AUTO, nfsv3_commit_on_close, CTLFLAG_RW, 236 &nfsv3_commit_on_close, 0, "write+commit on close, else only write"); 237 238static int nfs_clean_pages_on_close = 1; 239SYSCTL_INT(_vfs_nfs, OID_AUTO, clean_pages_on_close, CTLFLAG_RW, 240 &nfs_clean_pages_on_close, 0, "NFS clean dirty pages on close"); 241 242int nfs_directio_enable = 0; 243SYSCTL_INT(_vfs_nfs, OID_AUTO, nfs_directio_enable, CTLFLAG_RW, 244 &nfs_directio_enable, 0, "Enable NFS directio"); 245 246/* 247 * This sysctl allows other processes to mmap a file that has been opened 248 * O_DIRECT by a process. In general, having processes mmap the file while 249 * Direct IO is in progress can lead to Data Inconsistencies. But, we allow 250 * this by default to prevent DoS attacks - to prevent a malicious user from 251 * opening up files O_DIRECT preventing other users from mmap'ing these 252 * files. "Protected" environments where stricter consistency guarantees are 253 * required can disable this knob. The process that opened the file O_DIRECT 254 * cannot mmap() the file, because mmap'ed IO on an O_DIRECT open() is not 255 * meaningful. 256 */ 257int nfs_directio_allow_mmap = 1; 258SYSCTL_INT(_vfs_nfs, OID_AUTO, nfs_directio_allow_mmap, CTLFLAG_RW, 259 &nfs_directio_allow_mmap, 0, "Enable mmaped IO on file with O_DIRECT opens"); 260 261#if 0 262SYSCTL_INT(_vfs_nfs, OID_AUTO, access_cache_hits, CTLFLAG_RD, 263 &nfsstats.accesscache_hits, 0, "NFS ACCESS cache hit count"); 264 265SYSCTL_INT(_vfs_nfs, OID_AUTO, access_cache_misses, CTLFLAG_RD, 266 &nfsstats.accesscache_misses, 0, "NFS ACCESS cache miss count"); 267#endif 268 269#define NFSV3ACCESS_ALL (NFSV3ACCESS_READ | NFSV3ACCESS_MODIFY \ 270 | NFSV3ACCESS_EXTEND | NFSV3ACCESS_EXECUTE \ 271 | NFSV3ACCESS_DELETE | NFSV3ACCESS_LOOKUP) 272 273/* 274 * SMP Locking Note : 275 * The list of locks after the description of the lock is the ordering 276 * of other locks acquired with the lock held. 277 * np->n_mtx : Protects the fields in the nfsnode. 278 VM Object Lock 279 VI_MTX (acquired indirectly) 280 * nmp->nm_mtx : Protects the fields in the nfsmount. 281 rep->r_mtx 282 * nfs_iod_mtx : Global lock, protects shared nfsiod state. 283 * nfs_reqq_mtx : Global lock, protects the nfs_reqq list. 284 nmp->nm_mtx 285 rep->r_mtx 286 * rep->r_mtx : Protects the fields in an nfsreq. 287 */ 288 289static int 290nfs3_access_otw(struct vnode *vp, int wmode, struct thread *td, 291 struct ucred *cred, uint32_t *retmode) 292{ 293 const int v3 = 1; 294 u_int32_t *tl; 295 int error = 0, attrflag, i, lrupos; 296 297 struct mbuf *mreq, *mrep, *md, *mb; 298 caddr_t bpos, dpos; 299 u_int32_t rmode; 300 struct nfsnode *np = VTONFS(vp); 301 302 nfsstats.rpccnt[NFSPROC_ACCESS]++; 303 mreq = nfsm_reqhead(vp, NFSPROC_ACCESS, NFSX_FH(v3) + NFSX_UNSIGNED); 304 mb = mreq; 305 bpos = mtod(mb, caddr_t); 306 nfsm_fhtom(vp, v3); 307 tl = nfsm_build(u_int32_t *, NFSX_UNSIGNED); 308 *tl = txdr_unsigned(wmode); 309 nfsm_request(vp, NFSPROC_ACCESS, td, cred); 310 nfsm_postop_attr(vp, attrflag); 311 if (!error) { 312 lrupos = 0; 313 tl = nfsm_dissect(u_int32_t *, NFSX_UNSIGNED); 314 rmode = fxdr_unsigned(u_int32_t, *tl); 315 mtx_lock(&np->n_mtx); 316 for (i = 0; i < NFS_ACCESSCACHESIZE; i++) { 317 if (np->n_accesscache[i].uid == cred->cr_uid) { 318 np->n_accesscache[i].mode = rmode; 319 np->n_accesscache[i].stamp = time_second; 320 break; 321 } 322 if (i > 0 && np->n_accesscache[i].stamp < 323 np->n_accesscache[lrupos].stamp) 324 lrupos = i; 325 } 326 if (i == NFS_ACCESSCACHESIZE) { 327 np->n_accesscache[lrupos].uid = cred->cr_uid; 328 np->n_accesscache[lrupos].mode = rmode; 329 np->n_accesscache[lrupos].stamp = time_second; 330 } 331 mtx_unlock(&np->n_mtx); 332 if (retmode != NULL) 333 *retmode = rmode; 334 KDTRACE_NFS_ACCESSCACHE_LOAD_DONE(vp, cred->cr_uid, rmode, 0); 335 } 336 m_freem(mrep); 337nfsmout: 338#ifdef KDTRACE_HOOKS 339 if (error) { 340 KDTRACE_NFS_ACCESSCACHE_LOAD_DONE(vp, cred->cr_uid, 0, 341 error); 342 } 343#endif 344 return (error); 345} 346 347/* 348 * nfs access vnode op. 349 * For nfs version 2, just return ok. File accesses may fail later. 350 * For nfs version 3, use the access rpc to check accessibility. If file modes 351 * are changed on the server, accesses might still fail later. 352 */ 353static int 354nfs_access(struct vop_access_args *ap) 355{ 356 struct vnode *vp = ap->a_vp; 357 int error = 0, i, gotahit; 358 u_int32_t mode, rmode, wmode; 359 int v3 = NFS_ISV3(vp); 360 struct nfsnode *np = VTONFS(vp); 361 362 /* 363 * Disallow write attempts on filesystems mounted read-only; 364 * unless the file is a socket, fifo, or a block or character 365 * device resident on the filesystem. 366 */ 367 if ((ap->a_accmode & VWRITE) && (vp->v_mount->mnt_flag & MNT_RDONLY)) { 368 switch (vp->v_type) { 369 case VREG: 370 case VDIR: 371 case VLNK: 372 return (EROFS); 373 default: 374 break; 375 } 376 } 377 /* 378 * For nfs v3, check to see if we have done this recently, and if 379 * so return our cached result instead of making an ACCESS call. 380 * If not, do an access rpc, otherwise you are stuck emulating 381 * ufs_access() locally using the vattr. This may not be correct, 382 * since the server may apply other access criteria such as 383 * client uid-->server uid mapping that we do not know about. 384 */ 385 if (v3) { 386 if (ap->a_accmode & VREAD) 387 mode = NFSV3ACCESS_READ; 388 else 389 mode = 0; 390 if (vp->v_type != VDIR) { 391 if (ap->a_accmode & VWRITE) 392 mode |= (NFSV3ACCESS_MODIFY | NFSV3ACCESS_EXTEND); 393 if (ap->a_accmode & VEXEC) 394 mode |= NFSV3ACCESS_EXECUTE; 395 } else { 396 if (ap->a_accmode & VWRITE) 397 mode |= (NFSV3ACCESS_MODIFY | NFSV3ACCESS_EXTEND | 398 NFSV3ACCESS_DELETE); 399 if (ap->a_accmode & VEXEC) 400 mode |= NFSV3ACCESS_LOOKUP; 401 } 402 /* XXX safety belt, only make blanket request if caching */ 403 if (nfsaccess_cache_timeout > 0) { 404 wmode = NFSV3ACCESS_READ | NFSV3ACCESS_MODIFY | 405 NFSV3ACCESS_EXTEND | NFSV3ACCESS_EXECUTE | 406 NFSV3ACCESS_DELETE | NFSV3ACCESS_LOOKUP; 407 } else { 408 wmode = mode; 409 } 410 411 /* 412 * Does our cached result allow us to give a definite yes to 413 * this request? 414 */ 415 gotahit = 0; 416 mtx_lock(&np->n_mtx); 417 for (i = 0; i < NFS_ACCESSCACHESIZE; i++) { 418 if (ap->a_cred->cr_uid == np->n_accesscache[i].uid) { 419 if (time_second < (np->n_accesscache[i].stamp + 420 nfsaccess_cache_timeout) && 421 (np->n_accesscache[i].mode & mode) == mode) { 422 nfsstats.accesscache_hits++; 423 gotahit = 1; 424 } 425 break; 426 } 427 } 428 mtx_unlock(&np->n_mtx); 429#ifdef KDTRACE_HOOKS 430 if (gotahit) 431 KDTRACE_NFS_ACCESSCACHE_GET_HIT(vp, 432 ap->a_cred->cr_uid, mode); 433 else 434 KDTRACE_NFS_ACCESSCACHE_GET_MISS(vp, 435 ap->a_cred->cr_uid, mode); 436#endif 437 if (gotahit == 0) { 438 /* 439 * Either a no, or a don't know. Go to the wire. 440 */ 441 nfsstats.accesscache_misses++; 442 error = nfs3_access_otw(vp, wmode, ap->a_td, ap->a_cred, 443 &rmode); 444 if (!error) { 445 if ((rmode & mode) != mode) 446 error = EACCES; 447 } 448 } 449 return (error); 450 } else { 451 if ((error = nfsspec_access(ap)) != 0) { 452 return (error); 453 } 454 /* 455 * Attempt to prevent a mapped root from accessing a file 456 * which it shouldn't. We try to read a byte from the file 457 * if the user is root and the file is not zero length. 458 * After calling nfsspec_access, we should have the correct 459 * file size cached. 460 */ 461 mtx_lock(&np->n_mtx); 462 if (ap->a_cred->cr_uid == 0 && (ap->a_accmode & VREAD) 463 && VTONFS(vp)->n_size > 0) { 464 struct iovec aiov; 465 struct uio auio; 466 char buf[1]; 467 468 mtx_unlock(&np->n_mtx); 469 aiov.iov_base = buf; 470 aiov.iov_len = 1; 471 auio.uio_iov = &aiov; 472 auio.uio_iovcnt = 1; 473 auio.uio_offset = 0; 474 auio.uio_resid = 1; 475 auio.uio_segflg = UIO_SYSSPACE; 476 auio.uio_rw = UIO_READ; 477 auio.uio_td = ap->a_td; 478 479 if (vp->v_type == VREG) 480 error = nfs_readrpc(vp, &auio, ap->a_cred); 481 else if (vp->v_type == VDIR) { 482 char* bp; 483 bp = malloc(NFS_DIRBLKSIZ, M_TEMP, M_WAITOK); 484 aiov.iov_base = bp; 485 aiov.iov_len = auio.uio_resid = NFS_DIRBLKSIZ; 486 error = nfs_readdirrpc(vp, &auio, ap->a_cred); 487 free(bp, M_TEMP); 488 } else if (vp->v_type == VLNK) 489 error = nfs_readlinkrpc(vp, &auio, ap->a_cred); 490 else 491 error = EACCES; 492 } else 493 mtx_unlock(&np->n_mtx); 494 return (error); 495 } 496} 497 498int nfs_otw_getattr_avoid = 0; 499 500/* 501 * nfs open vnode op 502 * Check to see if the type is ok 503 * and that deletion is not in progress. 504 * For paged in text files, you will need to flush the page cache 505 * if consistency is lost. 506 */ 507/* ARGSUSED */ 508static int 509nfs_open(struct vop_open_args *ap) 510{ 511 struct vnode *vp = ap->a_vp; 512 struct nfsnode *np = VTONFS(vp); 513 struct vattr vattr; 514 int error; 515 int fmode = ap->a_mode; 516 517 if (vp->v_type != VREG && vp->v_type != VDIR && vp->v_type != VLNK) 518 return (EOPNOTSUPP); 519 520 /* 521 * Get a valid lease. If cached data is stale, flush it. 522 */ 523 mtx_lock(&np->n_mtx); 524 if (np->n_flag & NMODIFIED) { 525 mtx_unlock(&np->n_mtx); 526 error = nfs_vinvalbuf(vp, V_SAVE, ap->a_td, 1); 527 if (error == EINTR || error == EIO) 528 return (error); 529 np->n_attrstamp = 0; 530 KDTRACE_NFS_ATTRCACHE_FLUSH_DONE(vp); 531 if (vp->v_type == VDIR) 532 np->n_direofoffset = 0; 533 error = VOP_GETATTR(vp, &vattr, ap->a_cred); 534 if (error) 535 return (error); 536 mtx_lock(&np->n_mtx); 537 np->n_mtime = vattr.va_mtime; 538 mtx_unlock(&np->n_mtx); 539 } else { 540 struct thread *td = curthread; 541 542 if (np->n_ac_ts_syscalls != td->td_syscalls || 543 np->n_ac_ts_tid != td->td_tid || 544 td->td_proc == NULL || 545 np->n_ac_ts_pid != td->td_proc->p_pid) { 546 np->n_attrstamp = 0; 547 KDTRACE_NFS_ATTRCACHE_FLUSH_DONE(vp); 548 } 549 mtx_unlock(&np->n_mtx); 550 error = VOP_GETATTR(vp, &vattr, ap->a_cred); 551 if (error) 552 return (error); 553 mtx_lock(&np->n_mtx); 554 if (NFS_TIMESPEC_COMPARE(&np->n_mtime, &vattr.va_mtime)) { 555 if (vp->v_type == VDIR) 556 np->n_direofoffset = 0; 557 mtx_unlock(&np->n_mtx); 558 error = nfs_vinvalbuf(vp, V_SAVE, ap->a_td, 1); 559 if (error == EINTR || error == EIO) { 560 return (error); 561 } 562 mtx_lock(&np->n_mtx); 563 np->n_mtime = vattr.va_mtime; 564 } 565 mtx_unlock(&np->n_mtx); 566 } 567 /* 568 * If the object has >= 1 O_DIRECT active opens, we disable caching. 569 */ 570 if (nfs_directio_enable && (fmode & O_DIRECT) && (vp->v_type == VREG)) { 571 if (np->n_directio_opens == 0) { 572 error = nfs_vinvalbuf(vp, V_SAVE, ap->a_td, 1); 573 if (error) 574 return (error); 575 mtx_lock(&np->n_mtx); 576 np->n_flag |= NNONCACHE; 577 mtx_unlock(&np->n_mtx); 578 } 579 np->n_directio_opens++; 580 } 581 vnode_create_vobject(vp, vattr.va_size, ap->a_td); 582 return (0); 583} 584 585/* 586 * nfs close vnode op 587 * What an NFS client should do upon close after writing is a debatable issue. 588 * Most NFS clients push delayed writes to the server upon close, basically for 589 * two reasons: 590 * 1 - So that any write errors may be reported back to the client process 591 * doing the close system call. By far the two most likely errors are 592 * NFSERR_NOSPC and NFSERR_DQUOT to indicate space allocation failure. 593 * 2 - To put a worst case upper bound on cache inconsistency between 594 * multiple clients for the file. 595 * There is also a consistency problem for Version 2 of the protocol w.r.t. 596 * not being able to tell if other clients are writing a file concurrently, 597 * since there is no way of knowing if the changed modify time in the reply 598 * is only due to the write for this client. 599 * (NFS Version 3 provides weak cache consistency data in the reply that 600 * should be sufficient to detect and handle this case.) 601 * 602 * The current code does the following: 603 * for NFS Version 2 - play it safe and flush/invalidate all dirty buffers 604 * for NFS Version 3 - flush dirty buffers to the server but don't invalidate 605 * or commit them (this satisfies 1 and 2 except for the 606 * case where the server crashes after this close but 607 * before the commit RPC, which is felt to be "good 608 * enough". Changing the last argument to nfs_flush() to 609 * a 1 would force a commit operation, if it is felt a 610 * commit is necessary now. 611 */ 612/* ARGSUSED */ 613static int 614nfs_close(struct vop_close_args *ap) 615{ 616 struct vnode *vp = ap->a_vp; 617 struct nfsnode *np = VTONFS(vp); 618 int error = 0; 619 int fmode = ap->a_fflag; 620 621 if (vp->v_type == VREG) { 622 /* 623 * Examine and clean dirty pages, regardless of NMODIFIED. 624 * This closes a major hole in close-to-open consistency. 625 * We want to push out all dirty pages (and buffers) on 626 * close, regardless of whether they were dirtied by 627 * mmap'ed writes or via write(). 628 */ 629 if (nfs_clean_pages_on_close && vp->v_object) { 630 VM_OBJECT_LOCK(vp->v_object); 631 vm_object_page_clean(vp->v_object, 0, 0, 0); 632 VM_OBJECT_UNLOCK(vp->v_object); 633 } 634 mtx_lock(&np->n_mtx); 635 if (np->n_flag & NMODIFIED) { 636 mtx_unlock(&np->n_mtx); 637 if (NFS_ISV3(vp)) { 638 /* 639 * Under NFSv3 we have dirty buffers to dispose of. We 640 * must flush them to the NFS server. We have the option 641 * of waiting all the way through the commit rpc or just 642 * waiting for the initial write. The default is to only 643 * wait through the initial write so the data is in the 644 * server's cache, which is roughly similar to the state 645 * a standard disk subsystem leaves the file in on close(). 646 * 647 * We cannot clear the NMODIFIED bit in np->n_flag due to 648 * potential races with other processes, and certainly 649 * cannot clear it if we don't commit. 650 */ 651 int cm = nfsv3_commit_on_close ? 1 : 0; 652 error = nfs_flush(vp, MNT_WAIT, cm); 653 /* np->n_flag &= ~NMODIFIED; */ 654 } else 655 error = nfs_vinvalbuf(vp, V_SAVE, ap->a_td, 1); 656 mtx_lock(&np->n_mtx); 657 } 658 if (np->n_flag & NWRITEERR) { 659 np->n_flag &= ~NWRITEERR; 660 error = np->n_error; 661 } 662 mtx_unlock(&np->n_mtx); 663 } 664 if (nfs_directio_enable) 665 KASSERT((np->n_directio_asyncwr == 0), 666 ("nfs_close: dirty unflushed (%d) directio buffers\n", 667 np->n_directio_asyncwr)); 668 if (nfs_directio_enable && (fmode & O_DIRECT) && (vp->v_type == VREG)) { 669 mtx_lock(&np->n_mtx); 670 KASSERT((np->n_directio_opens > 0), 671 ("nfs_close: unexpectedly value (0) of n_directio_opens\n")); 672 np->n_directio_opens--; 673 if (np->n_directio_opens == 0) 674 np->n_flag &= ~NNONCACHE; 675 mtx_unlock(&np->n_mtx); 676 } 677 return (error); 678} 679 680/* 681 * nfs getattr call from vfs. 682 */ 683static int 684nfs_getattr(struct vop_getattr_args *ap) 685{ 686 struct vnode *vp = ap->a_vp; 687 struct nfsnode *np = VTONFS(vp); 688 struct thread *td = curthread; 689 struct vattr *vap = ap->a_vap; 690 struct vattr vattr; 691 caddr_t bpos, dpos; 692 int error = 0; 693 struct mbuf *mreq, *mrep, *md, *mb; 694 int v3 = NFS_ISV3(vp); 695 696 /* 697 * Update local times for special files. 698 */ 699 mtx_lock(&np->n_mtx); 700 if (np->n_flag & (NACC | NUPD)) 701 np->n_flag |= NCHG; 702 mtx_unlock(&np->n_mtx); 703 /* 704 * First look in the cache. 705 */ 706 if (nfs_getattrcache(vp, &vattr) == 0) 707 goto nfsmout; 708 if (v3 && nfs_prime_access_cache && nfsaccess_cache_timeout > 0) { 709 nfsstats.accesscache_misses++; 710 nfs3_access_otw(vp, NFSV3ACCESS_ALL, td, ap->a_cred, NULL); 711 if (nfs_getattrcache(vp, &vattr) == 0) 712 goto nfsmout; 713 } 714 nfsstats.rpccnt[NFSPROC_GETATTR]++; 715 mreq = nfsm_reqhead(vp, NFSPROC_GETATTR, NFSX_FH(v3)); 716 mb = mreq; 717 bpos = mtod(mb, caddr_t); 718 nfsm_fhtom(vp, v3); 719 nfsm_request(vp, NFSPROC_GETATTR, td, ap->a_cred); 720 if (!error) { 721 nfsm_loadattr(vp, &vattr); 722 } 723 m_freem(mrep); 724nfsmout: 725 vap->va_type = vattr.va_type; 726 vap->va_mode = vattr.va_mode; 727 vap->va_nlink = vattr.va_nlink; 728 vap->va_uid = vattr.va_uid; 729 vap->va_gid = vattr.va_gid; 730 vap->va_fsid = vattr.va_fsid; 731 vap->va_fileid = vattr.va_fileid; 732 vap->va_size = vattr.va_size; 733 vap->va_blocksize = vattr.va_blocksize; 734 vap->va_atime = vattr.va_atime; 735 vap->va_mtime = vattr.va_mtime; 736 vap->va_ctime = vattr.va_ctime; 737 vap->va_gen = vattr.va_gen; 738 vap->va_flags = vattr.va_flags; 739 vap->va_rdev = vattr.va_rdev; 740 vap->va_bytes = vattr.va_bytes; 741 vap->va_filerev = vattr.va_filerev; 742 743 return (error); 744} 745 746/* 747 * nfs setattr call. 748 */ 749static int 750nfs_setattr(struct vop_setattr_args *ap) 751{ 752 struct vnode *vp = ap->a_vp; 753 struct nfsnode *np = VTONFS(vp); 754 struct vattr *vap = ap->a_vap; 755 struct thread *td = curthread; 756 int error = 0; 757 u_quad_t tsize; 758 759#ifndef nolint 760 tsize = (u_quad_t)0; 761#endif 762 763 /* 764 * Setting of flags is not supported. 765 */ 766 if (vap->va_flags != VNOVAL) 767 return (EOPNOTSUPP); 768 769 /* 770 * Disallow write attempts if the filesystem is mounted read-only. 771 */ 772 if ((vap->va_flags != VNOVAL || vap->va_uid != (uid_t)VNOVAL || 773 vap->va_gid != (gid_t)VNOVAL || vap->va_atime.tv_sec != VNOVAL || 774 vap->va_mtime.tv_sec != VNOVAL || vap->va_mode != (mode_t)VNOVAL) && 775 (vp->v_mount->mnt_flag & MNT_RDONLY)) { 776 error = EROFS; 777 goto out; 778 } 779 if (vap->va_size != VNOVAL) { 780 switch (vp->v_type) { 781 case VDIR: 782 return (EISDIR); 783 case VCHR: 784 case VBLK: 785 case VSOCK: 786 case VFIFO: 787 if (vap->va_mtime.tv_sec == VNOVAL && 788 vap->va_atime.tv_sec == VNOVAL && 789 vap->va_mode == (mode_t)VNOVAL && 790 vap->va_uid == (uid_t)VNOVAL && 791 vap->va_gid == (gid_t)VNOVAL) 792 return (0); 793 vap->va_size = VNOVAL; 794 break; 795 default: 796 /* 797 * Disallow write attempts if the filesystem is 798 * mounted read-only. 799 */ 800 if (vp->v_mount->mnt_flag & MNT_RDONLY) 801 return (EROFS); 802 /* 803 * We run vnode_pager_setsize() early (why?), 804 * we must set np->n_size now to avoid vinvalbuf 805 * V_SAVE races that might setsize a lower 806 * value. 807 */ 808 mtx_lock(&np->n_mtx); 809 tsize = np->n_size; 810 mtx_unlock(&np->n_mtx); 811 error = nfs_meta_setsize(vp, ap->a_cred, td, 812 vap->va_size); 813 mtx_lock(&np->n_mtx); 814 if (np->n_flag & NMODIFIED) { 815 tsize = np->n_size; 816 mtx_unlock(&np->n_mtx); 817 if (vap->va_size == 0) 818 error = nfs_vinvalbuf(vp, 0, td, 1); 819 else 820 error = nfs_vinvalbuf(vp, V_SAVE, td, 1); 821 if (error) { 822 vnode_pager_setsize(vp, tsize); 823 goto out; 824 } 825 } else 826 mtx_unlock(&np->n_mtx); 827 /* 828 * np->n_size has already been set to vap->va_size 829 * in nfs_meta_setsize(). We must set it again since 830 * nfs_loadattrcache() could be called through 831 * nfs_meta_setsize() and could modify np->n_size. 832 */ 833 mtx_lock(&np->n_mtx); 834 np->n_vattr.va_size = np->n_size = vap->va_size; 835 mtx_unlock(&np->n_mtx); 836 }; 837 } else { 838 mtx_lock(&np->n_mtx); 839 if ((vap->va_mtime.tv_sec != VNOVAL || vap->va_atime.tv_sec != VNOVAL) && 840 (np->n_flag & NMODIFIED) && vp->v_type == VREG) { 841 mtx_unlock(&np->n_mtx); 842 if ((error = nfs_vinvalbuf(vp, V_SAVE, td, 1)) != 0 && 843 (error == EINTR || error == EIO)) 844 return error; 845 } else 846 mtx_unlock(&np->n_mtx); 847 } 848 error = nfs_setattrrpc(vp, vap, ap->a_cred); 849 if (error && vap->va_size != VNOVAL) { 850 mtx_lock(&np->n_mtx); 851 np->n_size = np->n_vattr.va_size = tsize; 852 vnode_pager_setsize(vp, tsize); 853 mtx_unlock(&np->n_mtx); 854 } 855out: 856 return (error); 857} 858 859/* 860 * Do an nfs setattr rpc. 861 */ 862static int 863nfs_setattrrpc(struct vnode *vp, struct vattr *vap, struct ucred *cred) 864{ 865 struct nfsv2_sattr *sp; 866 struct nfsnode *np = VTONFS(vp); 867 caddr_t bpos, dpos; 868 u_int32_t *tl; 869 int error = 0, i, wccflag = NFSV3_WCCRATTR; 870 struct mbuf *mreq, *mrep, *md, *mb; 871 int v3 = NFS_ISV3(vp); 872 873 nfsstats.rpccnt[NFSPROC_SETATTR]++; 874 mreq = nfsm_reqhead(vp, NFSPROC_SETATTR, NFSX_FH(v3) + NFSX_SATTR(v3)); 875 mb = mreq; 876 bpos = mtod(mb, caddr_t); 877 nfsm_fhtom(vp, v3); 878 if (v3) { 879 nfsm_v3attrbuild(vap, TRUE); 880 tl = nfsm_build(u_int32_t *, NFSX_UNSIGNED); 881 *tl = nfs_false; 882 } else { 883 sp = nfsm_build(struct nfsv2_sattr *, NFSX_V2SATTR); 884 if (vap->va_mode == (mode_t)VNOVAL) 885 sp->sa_mode = nfs_xdrneg1; 886 else 887 sp->sa_mode = vtonfsv2_mode(vp->v_type, vap->va_mode); 888 if (vap->va_uid == (uid_t)VNOVAL) 889 sp->sa_uid = nfs_xdrneg1; 890 else 891 sp->sa_uid = txdr_unsigned(vap->va_uid); 892 if (vap->va_gid == (gid_t)VNOVAL) 893 sp->sa_gid = nfs_xdrneg1; 894 else 895 sp->sa_gid = txdr_unsigned(vap->va_gid); 896 sp->sa_size = txdr_unsigned(vap->va_size); 897 txdr_nfsv2time(&vap->va_atime, &sp->sa_atime); 898 txdr_nfsv2time(&vap->va_mtime, &sp->sa_mtime); 899 } 900 nfsm_request(vp, NFSPROC_SETATTR, curthread, cred); 901 if (v3) { 902 mtx_lock(&np->n_mtx); 903 for (i = 0; i < NFS_ACCESSCACHESIZE; i++) 904 np->n_accesscache[i].stamp = 0; 905 mtx_unlock(&np->n_mtx); 906 KDTRACE_NFS_ACCESSCACHE_FLUSH_DONE(vp); 907 nfsm_wcc_data(vp, wccflag); 908 } else 909 nfsm_loadattr(vp, NULL); 910 m_freem(mrep); 911nfsmout: 912 return (error); 913} 914 915/* 916 * nfs lookup call, one step at a time... 917 * First look in cache 918 * If not found, unlock the directory nfsnode and do the rpc 919 */ 920static int 921nfs_lookup(struct vop_lookup_args *ap) 922{ 923 struct componentname *cnp = ap->a_cnp; 924 struct vnode *dvp = ap->a_dvp; 925 struct vnode **vpp = ap->a_vpp; 926 struct mount *mp = dvp->v_mount; 927 struct vattr vattr; 928 int flags = cnp->cn_flags; 929 struct vnode *newvp; 930 struct nfsmount *nmp; 931 caddr_t bpos, dpos; 932 struct mbuf *mreq, *mrep, *md, *mb; 933 long len; 934 nfsfh_t *fhp; 935 struct nfsnode *np; 936 int error = 0, attrflag, fhsize, ltype; 937 int v3 = NFS_ISV3(dvp); 938 struct thread *td = cnp->cn_thread; 939 940 *vpp = NULLVP; 941 if ((flags & ISLASTCN) && (mp->mnt_flag & MNT_RDONLY) && 942 (cnp->cn_nameiop == DELETE || cnp->cn_nameiop == RENAME)) 943 return (EROFS); 944 if (dvp->v_type != VDIR) 945 return (ENOTDIR); 946 nmp = VFSTONFS(mp); 947 np = VTONFS(dvp); 948 if ((error = VOP_ACCESS(dvp, VEXEC, cnp->cn_cred, td)) != 0) { 949 *vpp = NULLVP; 950 return (error); 951 } 952 error = cache_lookup(dvp, vpp, cnp); 953 if (error > 0 && error != ENOENT) 954 return (error); 955 if (error == -1) { 956 /* 957 * We only accept a positive hit in the cache if the 958 * change time of the file matches our cached copy. 959 * Otherwise, we discard the cache entry and fallback 960 * to doing a lookup RPC. 961 */ 962 newvp = *vpp; 963 if (!VOP_GETATTR(newvp, &vattr, cnp->cn_cred) 964 && vattr.va_ctime.tv_sec == VTONFS(newvp)->n_ctime) { 965 nfsstats.lookupcache_hits++; 966 if (cnp->cn_nameiop != LOOKUP && 967 (flags & ISLASTCN)) 968 cnp->cn_flags |= SAVENAME; 969 return (0); 970 } 971 cache_purge(newvp); 972 if (dvp != newvp) 973 vput(newvp); 974 else 975 vrele(newvp); 976 *vpp = NULLVP; 977 } else if (error == ENOENT) { 978 if (dvp->v_iflag & VI_DOOMED) 979 return (ENOENT); 980 /* 981 * We only accept a negative hit in the cache if the 982 * modification time of the parent directory matches 983 * our cached copy. Otherwise, we discard all of the 984 * negative cache entries for this directory. 985 */ 986 if (VOP_GETATTR(dvp, &vattr, cnp->cn_cred) == 0 && 987 vattr.va_mtime.tv_sec == np->n_dmtime) { 988 nfsstats.lookupcache_hits++; 989 return (ENOENT); 990 } 991 cache_purge_negative(dvp); 992 mtx_lock(&np->n_mtx); 993 np->n_dmtime = 0; 994 mtx_unlock(&np->n_mtx); 995 } 996 error = 0; 997 newvp = NULLVP; 998 nfsstats.lookupcache_misses++; 999 nfsstats.rpccnt[NFSPROC_LOOKUP]++; 1000 len = cnp->cn_namelen; 1001 mreq = nfsm_reqhead(dvp, NFSPROC_LOOKUP, 1002 NFSX_FH(v3) + NFSX_UNSIGNED + nfsm_rndup(len)); 1003 mb = mreq; 1004 bpos = mtod(mb, caddr_t); 1005 nfsm_fhtom(dvp, v3); 1006 nfsm_strtom(cnp->cn_nameptr, len, NFS_MAXNAMLEN); 1007 nfsm_request(dvp, NFSPROC_LOOKUP, cnp->cn_thread, cnp->cn_cred); 1008 if (error) { 1009 if (v3) { 1010 nfsm_postop_attr(dvp, attrflag); 1011 m_freem(mrep); 1012 } 1013 goto nfsmout; 1014 } 1015 nfsm_getfh(fhp, fhsize, v3); 1016 1017 /* 1018 * Handle RENAME case... 1019 */ 1020 if (cnp->cn_nameiop == RENAME && (flags & ISLASTCN)) { 1021 if (NFS_CMPFH(np, fhp, fhsize)) { 1022 m_freem(mrep); 1023 return (EISDIR); 1024 } 1025 error = nfs_nget(mp, fhp, fhsize, &np, LK_EXCLUSIVE); 1026 if (error) { 1027 m_freem(mrep); 1028 return (error); 1029 } 1030 newvp = NFSTOV(np); 1031 if (v3) { 1032 nfsm_postop_attr(newvp, attrflag); 1033 nfsm_postop_attr(dvp, attrflag); 1034 } else 1035 nfsm_loadattr(newvp, NULL); 1036 *vpp = newvp; 1037 m_freem(mrep); 1038 cnp->cn_flags |= SAVENAME; 1039 return (0); 1040 } 1041 1042 if (flags & ISDOTDOT) { 1043 ltype = VOP_ISLOCKED(dvp); 1044 error = vfs_busy(mp, MBF_NOWAIT); 1045 if (error != 0) { 1046 VOP_UNLOCK(dvp, 0); 1047 error = vfs_busy(mp, 0); 1048 vn_lock(dvp, ltype | LK_RETRY); 1049 if (error == 0 && (dvp->v_iflag & VI_DOOMED)) { 1050 vfs_unbusy(mp); 1051 error = ENOENT; 1052 } 1053 if (error != 0) { 1054 m_freem(mrep); 1055 return (error); 1056 } 1057 } 1058 VOP_UNLOCK(dvp, 0); 1059 error = nfs_nget(mp, fhp, fhsize, &np, cnp->cn_lkflags); 1060 if (error == 0) 1061 newvp = NFSTOV(np); 1062 vfs_unbusy(mp); 1063 vn_lock(dvp, ltype | LK_RETRY); 1064 if (dvp->v_iflag & VI_DOOMED) { 1065 if (error == 0) { 1066 if (newvp == dvp) 1067 vrele(newvp); 1068 else 1069 vput(newvp); 1070 } 1071 error = ENOENT; 1072 } 1073 if (error) { 1074 m_freem(mrep); 1075 return (error); 1076 } 1077 } else if (NFS_CMPFH(np, fhp, fhsize)) { 1078 VREF(dvp); 1079 newvp = dvp; 1080 } else { 1081 error = nfs_nget(mp, fhp, fhsize, &np, cnp->cn_lkflags); 1082 if (error) { 1083 m_freem(mrep); 1084 return (error); 1085 } 1086 newvp = NFSTOV(np); 1087 } 1088 if (v3) { 1089 nfsm_postop_attr(newvp, attrflag); 1090 nfsm_postop_attr(dvp, attrflag); 1091 } else 1092 nfsm_loadattr(newvp, NULL); 1093 if (cnp->cn_nameiop != LOOKUP && (flags & ISLASTCN)) 1094 cnp->cn_flags |= SAVENAME; 1095 if ((cnp->cn_flags & MAKEENTRY) && 1096 (cnp->cn_nameiop != DELETE || !(flags & ISLASTCN))) { 1097 np->n_ctime = np->n_vattr.va_ctime.tv_sec; 1098 cache_enter(dvp, newvp, cnp); 1099 } 1100 *vpp = newvp; 1101 m_freem(mrep); 1102nfsmout: 1103 if (error) { 1104 if (newvp != NULLVP) { 1105 vput(newvp); 1106 *vpp = NULLVP; 1107 } 1108 1109 if (error != ENOENT) 1110 goto done; 1111 1112 /* The requested file was not found. */ 1113 if ((cnp->cn_nameiop == CREATE || cnp->cn_nameiop == RENAME) && 1114 (flags & ISLASTCN)) { 1115 /* 1116 * XXX: UFS does a full VOP_ACCESS(dvp, 1117 * VWRITE) here instead of just checking 1118 * MNT_RDONLY. 1119 */ 1120 if (mp->mnt_flag & MNT_RDONLY) 1121 return (EROFS); 1122 cnp->cn_flags |= SAVENAME; 1123 return (EJUSTRETURN); 1124 } 1125 1126 if ((cnp->cn_flags & MAKEENTRY) && cnp->cn_nameiop != CREATE) { 1127 /* 1128 * Maintain n_dmtime as the modification time 1129 * of the parent directory when the oldest -ve 1130 * name cache entry for this directory was 1131 * added. 1132 */ 1133 mtx_lock(&np->n_mtx); 1134 if (np->n_dmtime == 0) 1135 np->n_dmtime = np->n_vattr.va_mtime.tv_sec; 1136 mtx_unlock(&np->n_mtx); 1137 cache_enter(dvp, NULL, cnp); 1138 } 1139 return (ENOENT); 1140 } 1141done: 1142 return (error); 1143} 1144 1145/* 1146 * nfs read call. 1147 * Just call nfs_bioread() to do the work. 1148 */ 1149static int 1150nfs_read(struct vop_read_args *ap) 1151{ 1152 struct vnode *vp = ap->a_vp; 1153 1154 switch (vp->v_type) { 1155 case VREG: 1156 return (nfs_bioread(vp, ap->a_uio, ap->a_ioflag, ap->a_cred)); 1157 case VDIR: 1158 return (EISDIR); 1159 default: 1160 return (EOPNOTSUPP); 1161 } 1162} 1163 1164/* 1165 * nfs readlink call 1166 */ 1167static int 1168nfs_readlink(struct vop_readlink_args *ap) 1169{ 1170 struct vnode *vp = ap->a_vp; 1171 1172 if (vp->v_type != VLNK) 1173 return (EINVAL); 1174 return (nfs_bioread(vp, ap->a_uio, 0, ap->a_cred)); 1175} 1176 1177/* 1178 * Do a readlink rpc. 1179 * Called by nfs_doio() from below the buffer cache. 1180 */ 1181int 1182nfs_readlinkrpc(struct vnode *vp, struct uio *uiop, struct ucred *cred) 1183{ 1184 caddr_t bpos, dpos; 1185 int error = 0, len, attrflag; 1186 struct mbuf *mreq, *mrep, *md, *mb; 1187 int v3 = NFS_ISV3(vp); 1188 1189 nfsstats.rpccnt[NFSPROC_READLINK]++; 1190 mreq = nfsm_reqhead(vp, NFSPROC_READLINK, NFSX_FH(v3)); 1191 mb = mreq; 1192 bpos = mtod(mb, caddr_t); 1193 nfsm_fhtom(vp, v3); 1194 nfsm_request(vp, NFSPROC_READLINK, uiop->uio_td, cred); 1195 if (v3) 1196 nfsm_postop_attr(vp, attrflag); 1197 if (!error) { 1198 nfsm_strsiz(len, NFS_MAXPATHLEN); 1199 if (len == NFS_MAXPATHLEN) { 1200 struct nfsnode *np = VTONFS(vp); 1201 mtx_lock(&np->n_mtx); 1202 if (np->n_size && np->n_size < NFS_MAXPATHLEN) 1203 len = np->n_size; 1204 mtx_unlock(&np->n_mtx); 1205 } 1206 nfsm_mtouio(uiop, len); 1207 } 1208 m_freem(mrep); 1209nfsmout: 1210 return (error); 1211} 1212 1213/* 1214 * nfs read rpc call 1215 * Ditto above 1216 */ 1217int 1218nfs_readrpc(struct vnode *vp, struct uio *uiop, struct ucred *cred) 1219{ 1220 u_int32_t *tl; 1221 caddr_t bpos, dpos; 1222 struct mbuf *mreq, *mrep, *md, *mb; 1223 struct nfsmount *nmp; 1224 int error = 0, len, retlen, tsiz, eof, attrflag; 1225 int v3 = NFS_ISV3(vp); 1226 int rsize; 1227 1228#ifndef nolint 1229 eof = 0; 1230#endif 1231 nmp = VFSTONFS(vp->v_mount); 1232 tsiz = uiop->uio_resid; 1233 mtx_lock(&nmp->nm_mtx); 1234 if (uiop->uio_offset + tsiz > nmp->nm_maxfilesize) { 1235 mtx_unlock(&nmp->nm_mtx); 1236 return (EFBIG); 1237 } 1238 rsize = nmp->nm_rsize; 1239 mtx_unlock(&nmp->nm_mtx); 1240 while (tsiz > 0) { 1241 nfsstats.rpccnt[NFSPROC_READ]++; 1242 len = (tsiz > rsize) ? rsize : tsiz; 1243 mreq = nfsm_reqhead(vp, NFSPROC_READ, NFSX_FH(v3) + NFSX_UNSIGNED * 3); 1244 mb = mreq; 1245 bpos = mtod(mb, caddr_t); 1246 nfsm_fhtom(vp, v3); 1247 tl = nfsm_build(u_int32_t *, NFSX_UNSIGNED * 3); 1248 if (v3) { 1249 txdr_hyper(uiop->uio_offset, tl); 1250 *(tl + 2) = txdr_unsigned(len); 1251 } else { 1252 *tl++ = txdr_unsigned(uiop->uio_offset); 1253 *tl++ = txdr_unsigned(len); 1254 *tl = 0; 1255 } 1256 nfsm_request(vp, NFSPROC_READ, uiop->uio_td, cred); 1257 if (v3) { 1258 nfsm_postop_attr(vp, attrflag); 1259 if (error) { 1260 m_freem(mrep); 1261 goto nfsmout; 1262 } 1263 tl = nfsm_dissect(u_int32_t *, 2 * NFSX_UNSIGNED); 1264 eof = fxdr_unsigned(int, *(tl + 1)); 1265 } else { 1266 nfsm_loadattr(vp, NULL); 1267 } 1268 nfsm_strsiz(retlen, rsize); 1269 nfsm_mtouio(uiop, retlen); 1270 m_freem(mrep); 1271 tsiz -= retlen; 1272 if (v3) { 1273 if (eof || retlen == 0) { 1274 tsiz = 0; 1275 } 1276 } else if (retlen < len) { 1277 tsiz = 0; 1278 } 1279 } 1280nfsmout: 1281 return (error); 1282} 1283 1284/* 1285 * nfs write call 1286 */ 1287int 1288nfs_writerpc(struct vnode *vp, struct uio *uiop, struct ucred *cred, 1289 int *iomode, int *must_commit) 1290{ 1291 u_int32_t *tl; 1292 int32_t backup; 1293 caddr_t bpos, dpos; 1294 struct mbuf *mreq, *mrep, *md, *mb; 1295 struct nfsmount *nmp = VFSTONFS(vp->v_mount); 1296 int error = 0, len, tsiz, wccflag = NFSV3_WCCRATTR, rlen, commit; 1297 int v3 = NFS_ISV3(vp), committed = NFSV3WRITE_FILESYNC; 1298 int wsize; 1299 1300#ifndef DIAGNOSTIC 1301 if (uiop->uio_iovcnt != 1) 1302 panic("nfs: writerpc iovcnt > 1"); 1303#endif 1304 *must_commit = 0; 1305 tsiz = uiop->uio_resid; 1306 mtx_lock(&nmp->nm_mtx); 1307 if (uiop->uio_offset + tsiz > nmp->nm_maxfilesize) { 1308 mtx_unlock(&nmp->nm_mtx); 1309 return (EFBIG); 1310 } 1311 wsize = nmp->nm_wsize; 1312 mtx_unlock(&nmp->nm_mtx); 1313 while (tsiz > 0) { 1314 nfsstats.rpccnt[NFSPROC_WRITE]++; 1315 len = (tsiz > wsize) ? wsize : tsiz; 1316 mreq = nfsm_reqhead(vp, NFSPROC_WRITE, 1317 NFSX_FH(v3) + 5 * NFSX_UNSIGNED + nfsm_rndup(len)); 1318 mb = mreq; 1319 bpos = mtod(mb, caddr_t); 1320 nfsm_fhtom(vp, v3); 1321 if (v3) { 1322 tl = nfsm_build(u_int32_t *, 5 * NFSX_UNSIGNED); 1323 txdr_hyper(uiop->uio_offset, tl); 1324 tl += 2; 1325 *tl++ = txdr_unsigned(len); 1326 *tl++ = txdr_unsigned(*iomode); 1327 *tl = txdr_unsigned(len); 1328 } else { 1329 u_int32_t x; 1330 1331 tl = nfsm_build(u_int32_t *, 4 * NFSX_UNSIGNED); 1332 /* Set both "begin" and "current" to non-garbage. */ 1333 x = txdr_unsigned((u_int32_t)uiop->uio_offset); 1334 *tl++ = x; /* "begin offset" */ 1335 *tl++ = x; /* "current offset" */ 1336 x = txdr_unsigned(len); 1337 *tl++ = x; /* total to this offset */ 1338 *tl = x; /* size of this write */ 1339 } 1340 nfsm_uiotom(uiop, len); 1341 nfsm_request(vp, NFSPROC_WRITE, uiop->uio_td, cred); 1342 if (v3) { 1343 wccflag = NFSV3_WCCCHK; 1344 nfsm_wcc_data(vp, wccflag); 1345 if (!error) { 1346 tl = nfsm_dissect(u_int32_t *, 2 * NFSX_UNSIGNED 1347 + NFSX_V3WRITEVERF); 1348 rlen = fxdr_unsigned(int, *tl++); 1349 if (rlen == 0) { 1350 error = NFSERR_IO; 1351 m_freem(mrep); 1352 break; 1353 } else if (rlen < len) { 1354 backup = len - rlen; 1355 uiop->uio_iov->iov_base = 1356 (char *)uiop->uio_iov->iov_base - 1357 backup; 1358 uiop->uio_iov->iov_len += backup; 1359 uiop->uio_offset -= backup; 1360 uiop->uio_resid += backup; 1361 len = rlen; 1362 } 1363 commit = fxdr_unsigned(int, *tl++); 1364 1365 /* 1366 * Return the lowest committment level 1367 * obtained by any of the RPCs. 1368 */ 1369 if (committed == NFSV3WRITE_FILESYNC) 1370 committed = commit; 1371 else if (committed == NFSV3WRITE_DATASYNC && 1372 commit == NFSV3WRITE_UNSTABLE) 1373 committed = commit; 1374 mtx_lock(&nmp->nm_mtx); 1375 if ((nmp->nm_state & NFSSTA_HASWRITEVERF) == 0){ 1376 bcopy((caddr_t)tl, (caddr_t)nmp->nm_verf, 1377 NFSX_V3WRITEVERF); 1378 nmp->nm_state |= NFSSTA_HASWRITEVERF; 1379 } else if (bcmp((caddr_t)tl, 1380 (caddr_t)nmp->nm_verf, NFSX_V3WRITEVERF)) { 1381 *must_commit = 1; 1382 bcopy((caddr_t)tl, (caddr_t)nmp->nm_verf, 1383 NFSX_V3WRITEVERF); 1384 } 1385 mtx_unlock(&nmp->nm_mtx); 1386 } 1387 } else { 1388 nfsm_loadattr(vp, NULL); 1389 } 1390 if (wccflag) { 1391 mtx_lock(&(VTONFS(vp))->n_mtx); 1392 VTONFS(vp)->n_mtime = VTONFS(vp)->n_vattr.va_mtime; 1393 mtx_unlock(&(VTONFS(vp))->n_mtx); 1394 } 1395 m_freem(mrep); 1396 if (error) 1397 break; 1398 tsiz -= len; 1399 } 1400nfsmout: 1401 if (vp->v_mount->mnt_kern_flag & MNTK_ASYNC) 1402 committed = NFSV3WRITE_FILESYNC; 1403 *iomode = committed; 1404 if (error) 1405 uiop->uio_resid = tsiz; 1406 return (error); 1407} 1408 1409/* 1410 * nfs mknod rpc 1411 * For NFS v2 this is a kludge. Use a create rpc but with the IFMT bits of the 1412 * mode set to specify the file type and the size field for rdev. 1413 */ 1414static int 1415nfs_mknodrpc(struct vnode *dvp, struct vnode **vpp, struct componentname *cnp, 1416 struct vattr *vap) 1417{ 1418 struct nfsv2_sattr *sp; 1419 u_int32_t *tl; 1420 struct vnode *newvp = NULL; 1421 struct nfsnode *np = NULL; 1422 struct vattr vattr; 1423 caddr_t bpos, dpos; 1424 int error = 0, wccflag = NFSV3_WCCRATTR, gotvp = 0; 1425 struct mbuf *mreq, *mrep, *md, *mb; 1426 u_int32_t rdev; 1427 int v3 = NFS_ISV3(dvp); 1428 1429 if (vap->va_type == VCHR || vap->va_type == VBLK) 1430 rdev = txdr_unsigned(vap->va_rdev); 1431 else if (vap->va_type == VFIFO || vap->va_type == VSOCK) 1432 rdev = nfs_xdrneg1; 1433 else { 1434 return (EOPNOTSUPP); 1435 } 1436 if ((error = VOP_GETATTR(dvp, &vattr, cnp->cn_cred)) != 0) 1437 return (error); 1438 nfsstats.rpccnt[NFSPROC_MKNOD]++; 1439 mreq = nfsm_reqhead(dvp, NFSPROC_MKNOD, NFSX_FH(v3) + 4 * NFSX_UNSIGNED + 1440 + nfsm_rndup(cnp->cn_namelen) + NFSX_SATTR(v3)); 1441 mb = mreq; 1442 bpos = mtod(mb, caddr_t); 1443 nfsm_fhtom(dvp, v3); 1444 nfsm_strtom(cnp->cn_nameptr, cnp->cn_namelen, NFS_MAXNAMLEN); 1445 if (v3) { 1446 tl = nfsm_build(u_int32_t *, NFSX_UNSIGNED); 1447 *tl++ = vtonfsv3_type(vap->va_type); 1448 nfsm_v3attrbuild(vap, FALSE); 1449 if (vap->va_type == VCHR || vap->va_type == VBLK) { 1450 tl = nfsm_build(u_int32_t *, 2 * NFSX_UNSIGNED); 1451 *tl++ = txdr_unsigned(major(vap->va_rdev)); 1452 *tl = txdr_unsigned(minor(vap->va_rdev)); 1453 } 1454 } else { 1455 sp = nfsm_build(struct nfsv2_sattr *, NFSX_V2SATTR); 1456 sp->sa_mode = vtonfsv2_mode(vap->va_type, vap->va_mode); 1457 sp->sa_uid = nfs_xdrneg1; 1458 sp->sa_gid = nfs_xdrneg1; 1459 sp->sa_size = rdev; 1460 txdr_nfsv2time(&vap->va_atime, &sp->sa_atime); 1461 txdr_nfsv2time(&vap->va_mtime, &sp->sa_mtime); 1462 } 1463 nfsm_request(dvp, NFSPROC_MKNOD, cnp->cn_thread, cnp->cn_cred); 1464 if (!error) { 1465 nfsm_mtofh(dvp, newvp, v3, gotvp); 1466 if (!gotvp) { 1467 if (newvp) { 1468 vput(newvp); 1469 newvp = NULL; 1470 } 1471 error = nfs_lookitup(dvp, cnp->cn_nameptr, 1472 cnp->cn_namelen, cnp->cn_cred, cnp->cn_thread, &np); 1473 if (!error) 1474 newvp = NFSTOV(np); 1475 } 1476 } 1477 if (v3) 1478 nfsm_wcc_data(dvp, wccflag); 1479 m_freem(mrep); 1480nfsmout: 1481 if (error) { 1482 if (newvp) 1483 vput(newvp); 1484 } else { 1485 if (cnp->cn_flags & MAKEENTRY) 1486 cache_enter(dvp, newvp, cnp); 1487 *vpp = newvp; 1488 } 1489 mtx_lock(&(VTONFS(dvp))->n_mtx); 1490 VTONFS(dvp)->n_flag |= NMODIFIED; 1491 if (!wccflag) { 1492 VTONFS(dvp)->n_attrstamp = 0; 1493 KDTRACE_NFS_ATTRCACHE_FLUSH_DONE(dvp); 1494 } 1495 mtx_unlock(&(VTONFS(dvp))->n_mtx); 1496 return (error); 1497} 1498 1499/* 1500 * nfs mknod vop 1501 * just call nfs_mknodrpc() to do the work. 1502 */ 1503/* ARGSUSED */ 1504static int 1505nfs_mknod(struct vop_mknod_args *ap) 1506{ 1507 return (nfs_mknodrpc(ap->a_dvp, ap->a_vpp, ap->a_cnp, ap->a_vap)); 1508} 1509 1510static u_long create_verf; 1511/* 1512 * nfs file create call 1513 */ 1514static int 1515nfs_create(struct vop_create_args *ap) 1516{ 1517 struct vnode *dvp = ap->a_dvp; 1518 struct vattr *vap = ap->a_vap; 1519 struct componentname *cnp = ap->a_cnp; 1520 struct nfsv2_sattr *sp; 1521 u_int32_t *tl; 1522 struct nfsnode *np = NULL; 1523 struct vnode *newvp = NULL; 1524 caddr_t bpos, dpos; 1525 int error = 0, wccflag = NFSV3_WCCRATTR, gotvp = 0, fmode = 0; 1526 struct mbuf *mreq, *mrep, *md, *mb; 1527 struct vattr vattr; 1528 int v3 = NFS_ISV3(dvp); 1529 1530 /* 1531 * Oops, not for me.. 1532 */ 1533 if (vap->va_type == VSOCK) 1534 return (nfs_mknodrpc(dvp, ap->a_vpp, cnp, vap)); 1535 1536 if ((error = VOP_GETATTR(dvp, &vattr, cnp->cn_cred)) != 0) 1537 return (error); 1538 if (vap->va_vaflags & VA_EXCLUSIVE) 1539 fmode |= O_EXCL; 1540again: 1541 nfsstats.rpccnt[NFSPROC_CREATE]++; 1542 mreq = nfsm_reqhead(dvp, NFSPROC_CREATE, NFSX_FH(v3) + 2 * NFSX_UNSIGNED + 1543 nfsm_rndup(cnp->cn_namelen) + NFSX_SATTR(v3)); 1544 mb = mreq; 1545 bpos = mtod(mb, caddr_t); 1546 nfsm_fhtom(dvp, v3); 1547 nfsm_strtom(cnp->cn_nameptr, cnp->cn_namelen, NFS_MAXNAMLEN); 1548 if (v3) { 1549 tl = nfsm_build(u_int32_t *, NFSX_UNSIGNED); 1550 if (fmode & O_EXCL) { 1551 *tl = txdr_unsigned(NFSV3CREATE_EXCLUSIVE); 1552 tl = nfsm_build(u_int32_t *, NFSX_V3CREATEVERF); 1553#ifdef INET 1554 INIT_VNET_INET(curvnet); 1555 IN_IFADDR_RLOCK(); 1556 if (!TAILQ_EMPTY(&V_in_ifaddrhead)) 1557 *tl++ = IA_SIN(TAILQ_FIRST(&V_in_ifaddrhead))->sin_addr.s_addr; 1558 else 1559#endif 1560 *tl++ = create_verf; 1561#ifdef INET 1562 IN_IFADDR_RUNLOCK(); 1563#endif 1564 *tl = ++create_verf; 1565 } else { 1566 *tl = txdr_unsigned(NFSV3CREATE_UNCHECKED); 1567 nfsm_v3attrbuild(vap, FALSE); 1568 } 1569 } else { 1570 sp = nfsm_build(struct nfsv2_sattr *, NFSX_V2SATTR); 1571 sp->sa_mode = vtonfsv2_mode(vap->va_type, vap->va_mode); 1572 sp->sa_uid = nfs_xdrneg1; 1573 sp->sa_gid = nfs_xdrneg1; 1574 sp->sa_size = 0; 1575 txdr_nfsv2time(&vap->va_atime, &sp->sa_atime); 1576 txdr_nfsv2time(&vap->va_mtime, &sp->sa_mtime); 1577 } 1578 nfsm_request(dvp, NFSPROC_CREATE, cnp->cn_thread, cnp->cn_cred); 1579 if (!error) { 1580 nfsm_mtofh(dvp, newvp, v3, gotvp); 1581 if (!gotvp) { 1582 if (newvp) { 1583 vput(newvp); 1584 newvp = NULL; 1585 } 1586 error = nfs_lookitup(dvp, cnp->cn_nameptr, 1587 cnp->cn_namelen, cnp->cn_cred, cnp->cn_thread, &np); 1588 if (!error) 1589 newvp = NFSTOV(np); 1590 } 1591 } 1592 if (v3) 1593 nfsm_wcc_data(dvp, wccflag); 1594 m_freem(mrep); 1595nfsmout: 1596 if (error) { 1597 if (v3 && (fmode & O_EXCL) && error == NFSERR_NOTSUPP) { 1598 fmode &= ~O_EXCL; 1599 goto again; 1600 } 1601 if (newvp) 1602 vput(newvp); 1603 } else if (v3 && (fmode & O_EXCL)) { 1604 /* 1605 * We are normally called with only a partially initialized 1606 * VAP. Since the NFSv3 spec says that server may use the 1607 * file attributes to store the verifier, the spec requires 1608 * us to do a SETATTR RPC. FreeBSD servers store the verifier 1609 * in atime, but we can't really assume that all servers will 1610 * so we ensure that our SETATTR sets both atime and mtime. 1611 */ 1612 if (vap->va_mtime.tv_sec == VNOVAL) 1613 vfs_timestamp(&vap->va_mtime); 1614 if (vap->va_atime.tv_sec == VNOVAL) 1615 vap->va_atime = vap->va_mtime; 1616 error = nfs_setattrrpc(newvp, vap, cnp->cn_cred); 1617 if (error) 1618 vput(newvp); 1619 } 1620 if (!error) { 1621 if (cnp->cn_flags & MAKEENTRY) 1622 cache_enter(dvp, newvp, cnp); 1623 *ap->a_vpp = newvp; 1624 } 1625 mtx_lock(&(VTONFS(dvp))->n_mtx); 1626 VTONFS(dvp)->n_flag |= NMODIFIED; 1627 if (!wccflag) { 1628 VTONFS(dvp)->n_attrstamp = 0; 1629 KDTRACE_NFS_ATTRCACHE_FLUSH_DONE(dvp); 1630 } 1631 mtx_unlock(&(VTONFS(dvp))->n_mtx); 1632 return (error); 1633} 1634 1635/* 1636 * nfs file remove call 1637 * To try and make nfs semantics closer to ufs semantics, a file that has 1638 * other processes using the vnode is renamed instead of removed and then 1639 * removed later on the last close. 1640 * - If v_usecount > 1 1641 * If a rename is not already in the works 1642 * call nfs_sillyrename() to set it up 1643 * else 1644 * do the remove rpc 1645 */ 1646static int 1647nfs_remove(struct vop_remove_args *ap) 1648{ 1649 struct vnode *vp = ap->a_vp; 1650 struct vnode *dvp = ap->a_dvp; 1651 struct componentname *cnp = ap->a_cnp; 1652 struct nfsnode *np = VTONFS(vp); 1653 int error = 0; 1654 struct vattr vattr; 1655 1656#ifndef DIAGNOSTIC 1657 if ((cnp->cn_flags & HASBUF) == 0) 1658 panic("nfs_remove: no name"); 1659 if (vrefcnt(vp) < 1) 1660 panic("nfs_remove: bad v_usecount"); 1661#endif 1662 if (vp->v_type == VDIR) 1663 error = EPERM; 1664 else if (vrefcnt(vp) == 1 || (np->n_sillyrename && 1665 !VOP_GETATTR(vp, &vattr, cnp->cn_cred) && vattr.va_nlink > 1)) { 1666 /* 1667 * Purge the name cache so that the chance of a lookup for 1668 * the name succeeding while the remove is in progress is 1669 * minimized. Without node locking it can still happen, such 1670 * that an I/O op returns ESTALE, but since you get this if 1671 * another host removes the file.. 1672 */ 1673 cache_purge(vp); 1674 /* 1675 * throw away biocache buffers, mainly to avoid 1676 * unnecessary delayed writes later. 1677 */ 1678 error = nfs_vinvalbuf(vp, 0, cnp->cn_thread, 1); 1679 /* Do the rpc */ 1680 if (error != EINTR && error != EIO) 1681 error = nfs_removerpc(dvp, cnp->cn_nameptr, 1682 cnp->cn_namelen, cnp->cn_cred, cnp->cn_thread); 1683 /* 1684 * Kludge City: If the first reply to the remove rpc is lost.. 1685 * the reply to the retransmitted request will be ENOENT 1686 * since the file was in fact removed 1687 * Therefore, we cheat and return success. 1688 */ 1689 if (error == ENOENT) 1690 error = 0; 1691 } else if (!np->n_sillyrename) 1692 error = nfs_sillyrename(dvp, vp, cnp); 1693 np->n_attrstamp = 0; 1694 KDTRACE_NFS_ATTRCACHE_FLUSH_DONE(vp); 1695 return (error); 1696} 1697 1698/* 1699 * nfs file remove rpc called from nfs_inactive 1700 */ 1701int 1702nfs_removeit(struct sillyrename *sp) 1703{ 1704 /* 1705 * Make sure that the directory vnode is still valid. 1706 * XXX we should lock sp->s_dvp here. 1707 */ 1708 if (sp->s_dvp->v_type == VBAD) 1709 return (0); 1710 return (nfs_removerpc(sp->s_dvp, sp->s_name, sp->s_namlen, sp->s_cred, 1711 NULL)); 1712} 1713 1714/* 1715 * Nfs remove rpc, called from nfs_remove() and nfs_removeit(). 1716 */ 1717static int 1718nfs_removerpc(struct vnode *dvp, const char *name, int namelen, 1719 struct ucred *cred, struct thread *td) 1720{ 1721 caddr_t bpos, dpos; 1722 int error = 0, wccflag = NFSV3_WCCRATTR; 1723 struct mbuf *mreq, *mrep, *md, *mb; 1724 int v3 = NFS_ISV3(dvp); 1725 1726 nfsstats.rpccnt[NFSPROC_REMOVE]++; 1727 mreq = nfsm_reqhead(dvp, NFSPROC_REMOVE, 1728 NFSX_FH(v3) + NFSX_UNSIGNED + nfsm_rndup(namelen)); 1729 mb = mreq; 1730 bpos = mtod(mb, caddr_t); 1731 nfsm_fhtom(dvp, v3); 1732 nfsm_strtom(name, namelen, NFS_MAXNAMLEN); 1733 nfsm_request(dvp, NFSPROC_REMOVE, td, cred); 1734 if (v3) 1735 nfsm_wcc_data(dvp, wccflag); 1736 m_freem(mrep); 1737nfsmout: 1738 mtx_lock(&(VTONFS(dvp))->n_mtx); 1739 VTONFS(dvp)->n_flag |= NMODIFIED; 1740 if (!wccflag) { 1741 VTONFS(dvp)->n_attrstamp = 0; 1742 KDTRACE_NFS_ATTRCACHE_FLUSH_DONE(dvp); 1743 } 1744 mtx_unlock(&(VTONFS(dvp))->n_mtx); 1745 return (error); 1746} 1747 1748/* 1749 * nfs file rename call 1750 */ 1751static int 1752nfs_rename(struct vop_rename_args *ap) 1753{ 1754 struct vnode *fvp = ap->a_fvp; 1755 struct vnode *tvp = ap->a_tvp; 1756 struct vnode *fdvp = ap->a_fdvp; 1757 struct vnode *tdvp = ap->a_tdvp; 1758 struct componentname *tcnp = ap->a_tcnp; 1759 struct componentname *fcnp = ap->a_fcnp; 1760 int error; 1761 1762#ifndef DIAGNOSTIC 1763 if ((tcnp->cn_flags & HASBUF) == 0 || 1764 (fcnp->cn_flags & HASBUF) == 0) 1765 panic("nfs_rename: no name"); 1766#endif 1767 /* Check for cross-device rename */ 1768 if ((fvp->v_mount != tdvp->v_mount) || 1769 (tvp && (fvp->v_mount != tvp->v_mount))) { 1770 error = EXDEV; 1771 goto out; 1772 } 1773 1774 if (fvp == tvp) { 1775 nfs_printf("nfs_rename: fvp == tvp (can't happen)\n"); 1776 error = 0; 1777 goto out; 1778 } 1779 if ((error = vn_lock(fvp, LK_EXCLUSIVE)) != 0) 1780 goto out; 1781 1782 /* 1783 * We have to flush B_DELWRI data prior to renaming 1784 * the file. If we don't, the delayed-write buffers 1785 * can be flushed out later after the file has gone stale 1786 * under NFSV3. NFSV2 does not have this problem because 1787 * ( as far as I can tell ) it flushes dirty buffers more 1788 * often. 1789 * 1790 * Skip the rename operation if the fsync fails, this can happen 1791 * due to the server's volume being full, when we pushed out data 1792 * that was written back to our cache earlier. Not checking for 1793 * this condition can result in potential (silent) data loss. 1794 */ 1795 error = VOP_FSYNC(fvp, MNT_WAIT, fcnp->cn_thread); 1796 VOP_UNLOCK(fvp, 0); 1797 if (!error && tvp) 1798 error = VOP_FSYNC(tvp, MNT_WAIT, tcnp->cn_thread); 1799 if (error) 1800 goto out; 1801 1802 /* 1803 * If the tvp exists and is in use, sillyrename it before doing the 1804 * rename of the new file over it. 1805 * XXX Can't sillyrename a directory. 1806 */ 1807 if (tvp && vrefcnt(tvp) > 1 && !VTONFS(tvp)->n_sillyrename && 1808 tvp->v_type != VDIR && !nfs_sillyrename(tdvp, tvp, tcnp)) { 1809 vput(tvp); 1810 tvp = NULL; 1811 } 1812 1813 error = nfs_renamerpc(fdvp, fcnp->cn_nameptr, fcnp->cn_namelen, 1814 tdvp, tcnp->cn_nameptr, tcnp->cn_namelen, tcnp->cn_cred, 1815 tcnp->cn_thread); 1816 1817 if (fvp->v_type == VDIR) { 1818 if (tvp != NULL && tvp->v_type == VDIR) 1819 cache_purge(tdvp); 1820 cache_purge(fdvp); 1821 } 1822 1823out: 1824 if (tdvp == tvp) 1825 vrele(tdvp); 1826 else 1827 vput(tdvp); 1828 if (tvp) 1829 vput(tvp); 1830 vrele(fdvp); 1831 vrele(fvp); 1832 /* 1833 * Kludge: Map ENOENT => 0 assuming that it is a reply to a retry. 1834 */ 1835 if (error == ENOENT) 1836 error = 0; 1837 return (error); 1838} 1839 1840/* 1841 * nfs file rename rpc called from nfs_remove() above 1842 */ 1843static int 1844nfs_renameit(struct vnode *sdvp, struct componentname *scnp, 1845 struct sillyrename *sp) 1846{ 1847 1848 return (nfs_renamerpc(sdvp, scnp->cn_nameptr, scnp->cn_namelen, sdvp, 1849 sp->s_name, sp->s_namlen, scnp->cn_cred, scnp->cn_thread)); 1850} 1851 1852/* 1853 * Do an nfs rename rpc. Called from nfs_rename() and nfs_renameit(). 1854 */ 1855static int 1856nfs_renamerpc(struct vnode *fdvp, const char *fnameptr, int fnamelen, 1857 struct vnode *tdvp, const char *tnameptr, int tnamelen, struct ucred *cred, 1858 struct thread *td) 1859{ 1860 caddr_t bpos, dpos; 1861 int error = 0, fwccflag = NFSV3_WCCRATTR, twccflag = NFSV3_WCCRATTR; 1862 struct mbuf *mreq, *mrep, *md, *mb; 1863 int v3 = NFS_ISV3(fdvp); 1864 1865 nfsstats.rpccnt[NFSPROC_RENAME]++; 1866 mreq = nfsm_reqhead(fdvp, NFSPROC_RENAME, 1867 (NFSX_FH(v3) + NFSX_UNSIGNED)*2 + nfsm_rndup(fnamelen) + 1868 nfsm_rndup(tnamelen)); 1869 mb = mreq; 1870 bpos = mtod(mb, caddr_t); 1871 nfsm_fhtom(fdvp, v3); 1872 nfsm_strtom(fnameptr, fnamelen, NFS_MAXNAMLEN); 1873 nfsm_fhtom(tdvp, v3); 1874 nfsm_strtom(tnameptr, tnamelen, NFS_MAXNAMLEN); 1875 nfsm_request(fdvp, NFSPROC_RENAME, td, cred); 1876 if (v3) { 1877 nfsm_wcc_data(fdvp, fwccflag); 1878 nfsm_wcc_data(tdvp, twccflag); 1879 } 1880 m_freem(mrep); 1881nfsmout: 1882 mtx_lock(&(VTONFS(fdvp))->n_mtx); 1883 VTONFS(fdvp)->n_flag |= NMODIFIED; 1884 mtx_unlock(&(VTONFS(fdvp))->n_mtx); 1885 mtx_lock(&(VTONFS(tdvp))->n_mtx); 1886 VTONFS(tdvp)->n_flag |= NMODIFIED; 1887 mtx_unlock(&(VTONFS(tdvp))->n_mtx); 1888 if (!fwccflag) { 1889 VTONFS(fdvp)->n_attrstamp = 0; 1890 KDTRACE_NFS_ATTRCACHE_FLUSH_DONE(fdvp); 1891 } 1892 if (!twccflag) { 1893 VTONFS(tdvp)->n_attrstamp = 0; 1894 KDTRACE_NFS_ATTRCACHE_FLUSH_DONE(tdvp); 1895 } 1896 return (error); 1897} 1898 1899/* 1900 * nfs hard link create call 1901 */ 1902static int 1903nfs_link(struct vop_link_args *ap) 1904{ 1905 struct vnode *vp = ap->a_vp; 1906 struct vnode *tdvp = ap->a_tdvp; 1907 struct componentname *cnp = ap->a_cnp; 1908 caddr_t bpos, dpos; 1909 int error = 0, wccflag = NFSV3_WCCRATTR, attrflag = 0; 1910 struct mbuf *mreq, *mrep, *md, *mb; 1911 int v3; 1912 1913 if (vp->v_mount != tdvp->v_mount) { 1914 return (EXDEV); 1915 } 1916 1917 /* 1918 * Push all writes to the server, so that the attribute cache 1919 * doesn't get "out of sync" with the server. 1920 * XXX There should be a better way! 1921 */ 1922 VOP_FSYNC(vp, MNT_WAIT, cnp->cn_thread); 1923 1924 v3 = NFS_ISV3(vp); 1925 nfsstats.rpccnt[NFSPROC_LINK]++; 1926 mreq = nfsm_reqhead(vp, NFSPROC_LINK, 1927 NFSX_FH(v3)*2 + NFSX_UNSIGNED + nfsm_rndup(cnp->cn_namelen)); 1928 mb = mreq; 1929 bpos = mtod(mb, caddr_t); 1930 nfsm_fhtom(vp, v3); 1931 nfsm_fhtom(tdvp, v3); 1932 nfsm_strtom(cnp->cn_nameptr, cnp->cn_namelen, NFS_MAXNAMLEN); 1933 nfsm_request(vp, NFSPROC_LINK, cnp->cn_thread, cnp->cn_cred); 1934 if (v3) { 1935 nfsm_postop_attr(vp, attrflag); 1936 nfsm_wcc_data(tdvp, wccflag); 1937 } 1938 m_freem(mrep); 1939nfsmout: 1940 mtx_lock(&(VTONFS(tdvp))->n_mtx); 1941 VTONFS(tdvp)->n_flag |= NMODIFIED; 1942 mtx_unlock(&(VTONFS(tdvp))->n_mtx); 1943 if (!attrflag) { 1944 VTONFS(vp)->n_attrstamp = 0; 1945 KDTRACE_NFS_ATTRCACHE_FLUSH_DONE(vp); 1946 } 1947 if (!wccflag) { 1948 VTONFS(tdvp)->n_attrstamp = 0; 1949 KDTRACE_NFS_ATTRCACHE_FLUSH_DONE(tdvp); 1950 } 1951 return (error); 1952} 1953 1954/* 1955 * nfs symbolic link create call 1956 */ 1957static int 1958nfs_symlink(struct vop_symlink_args *ap) 1959{ 1960 struct vnode *dvp = ap->a_dvp; 1961 struct vattr *vap = ap->a_vap; 1962 struct componentname *cnp = ap->a_cnp; 1963 struct nfsv2_sattr *sp; 1964 caddr_t bpos, dpos; 1965 int slen, error = 0, wccflag = NFSV3_WCCRATTR, gotvp; 1966 struct mbuf *mreq, *mrep, *md, *mb; 1967 struct vnode *newvp = NULL; 1968 int v3 = NFS_ISV3(dvp); 1969 1970 nfsstats.rpccnt[NFSPROC_SYMLINK]++; 1971 slen = strlen(ap->a_target); 1972 mreq = nfsm_reqhead(dvp, NFSPROC_SYMLINK, NFSX_FH(v3) + 2*NFSX_UNSIGNED + 1973 nfsm_rndup(cnp->cn_namelen) + nfsm_rndup(slen) + NFSX_SATTR(v3)); 1974 mb = mreq; 1975 bpos = mtod(mb, caddr_t); 1976 nfsm_fhtom(dvp, v3); 1977 nfsm_strtom(cnp->cn_nameptr, cnp->cn_namelen, NFS_MAXNAMLEN); 1978 if (v3) { 1979 nfsm_v3attrbuild(vap, FALSE); 1980 } 1981 nfsm_strtom(ap->a_target, slen, NFS_MAXPATHLEN); 1982 if (!v3) { 1983 sp = nfsm_build(struct nfsv2_sattr *, NFSX_V2SATTR); 1984 sp->sa_mode = vtonfsv2_mode(VLNK, vap->va_mode); 1985 sp->sa_uid = nfs_xdrneg1; 1986 sp->sa_gid = nfs_xdrneg1; 1987 sp->sa_size = nfs_xdrneg1; 1988 txdr_nfsv2time(&vap->va_atime, &sp->sa_atime); 1989 txdr_nfsv2time(&vap->va_mtime, &sp->sa_mtime); 1990 } 1991 1992 /* 1993 * Issue the NFS request and get the rpc response. 1994 * 1995 * Only NFSv3 responses returning an error of 0 actually return 1996 * a file handle that can be converted into newvp without having 1997 * to do an extra lookup rpc. 1998 */ 1999 nfsm_request(dvp, NFSPROC_SYMLINK, cnp->cn_thread, cnp->cn_cred); 2000 if (v3) { 2001 if (error == 0) 2002 nfsm_mtofh(dvp, newvp, v3, gotvp); 2003 nfsm_wcc_data(dvp, wccflag); 2004 } 2005 2006 /* 2007 * out code jumps -> here, mrep is also freed. 2008 */ 2009 2010 m_freem(mrep); 2011nfsmout: 2012 2013 /* 2014 * If we do not have an error and we could not extract the newvp from 2015 * the response due to the request being NFSv2, we have to do a 2016 * lookup in order to obtain a newvp to return. 2017 */ 2018 if (error == 0 && newvp == NULL) { 2019 struct nfsnode *np = NULL; 2020 2021 error = nfs_lookitup(dvp, cnp->cn_nameptr, cnp->cn_namelen, 2022 cnp->cn_cred, cnp->cn_thread, &np); 2023 if (!error) 2024 newvp = NFSTOV(np); 2025 } 2026 if (error) { 2027 if (newvp) 2028 vput(newvp); 2029 } else { 2030 *ap->a_vpp = newvp; 2031 } 2032 mtx_lock(&(VTONFS(dvp))->n_mtx); 2033 VTONFS(dvp)->n_flag |= NMODIFIED; 2034 mtx_unlock(&(VTONFS(dvp))->n_mtx); 2035 if (!wccflag) { 2036 VTONFS(dvp)->n_attrstamp = 0; 2037 KDTRACE_NFS_ATTRCACHE_FLUSH_DONE(dvp); 2038 } 2039 return (error); 2040} 2041 2042/* 2043 * nfs make dir call 2044 */ 2045static int 2046nfs_mkdir(struct vop_mkdir_args *ap) 2047{ 2048 struct vnode *dvp = ap->a_dvp; 2049 struct vattr *vap = ap->a_vap; 2050 struct componentname *cnp = ap->a_cnp; 2051 struct nfsv2_sattr *sp; 2052 int len; 2053 struct nfsnode *np = NULL; 2054 struct vnode *newvp = NULL; 2055 caddr_t bpos, dpos; 2056 int error = 0, wccflag = NFSV3_WCCRATTR; 2057 int gotvp = 0; 2058 struct mbuf *mreq, *mrep, *md, *mb; 2059 struct vattr vattr; 2060 int v3 = NFS_ISV3(dvp); 2061 2062 if ((error = VOP_GETATTR(dvp, &vattr, cnp->cn_cred)) != 0) 2063 return (error); 2064 len = cnp->cn_namelen; 2065 nfsstats.rpccnt[NFSPROC_MKDIR]++; 2066 mreq = nfsm_reqhead(dvp, NFSPROC_MKDIR, 2067 NFSX_FH(v3) + NFSX_UNSIGNED + nfsm_rndup(len) + NFSX_SATTR(v3)); 2068 mb = mreq; 2069 bpos = mtod(mb, caddr_t); 2070 nfsm_fhtom(dvp, v3); 2071 nfsm_strtom(cnp->cn_nameptr, len, NFS_MAXNAMLEN); 2072 if (v3) { 2073 nfsm_v3attrbuild(vap, FALSE); 2074 } else { 2075 sp = nfsm_build(struct nfsv2_sattr *, NFSX_V2SATTR); 2076 sp->sa_mode = vtonfsv2_mode(VDIR, vap->va_mode); 2077 sp->sa_uid = nfs_xdrneg1; 2078 sp->sa_gid = nfs_xdrneg1; 2079 sp->sa_size = nfs_xdrneg1; 2080 txdr_nfsv2time(&vap->va_atime, &sp->sa_atime); 2081 txdr_nfsv2time(&vap->va_mtime, &sp->sa_mtime); 2082 } 2083 nfsm_request(dvp, NFSPROC_MKDIR, cnp->cn_thread, cnp->cn_cred); 2084 if (!error) 2085 nfsm_mtofh(dvp, newvp, v3, gotvp); 2086 if (v3) 2087 nfsm_wcc_data(dvp, wccflag); 2088 m_freem(mrep); 2089nfsmout: 2090 mtx_lock(&(VTONFS(dvp))->n_mtx); 2091 VTONFS(dvp)->n_flag |= NMODIFIED; 2092 mtx_unlock(&(VTONFS(dvp))->n_mtx); 2093 if (!wccflag) { 2094 VTONFS(dvp)->n_attrstamp = 0; 2095 KDTRACE_NFS_ATTRCACHE_FLUSH_DONE(dvp); 2096 } 2097 if (error == 0 && newvp == NULL) { 2098 error = nfs_lookitup(dvp, cnp->cn_nameptr, len, cnp->cn_cred, 2099 cnp->cn_thread, &np); 2100 if (!error) { 2101 newvp = NFSTOV(np); 2102 if (newvp->v_type != VDIR) 2103 error = EEXIST; 2104 } 2105 } 2106 if (error) { 2107 if (newvp) 2108 vput(newvp); 2109 } else 2110 *ap->a_vpp = newvp; 2111 return (error); 2112} 2113 2114/* 2115 * nfs remove directory call 2116 */ 2117static int 2118nfs_rmdir(struct vop_rmdir_args *ap) 2119{ 2120 struct vnode *vp = ap->a_vp; 2121 struct vnode *dvp = ap->a_dvp; 2122 struct componentname *cnp = ap->a_cnp; 2123 caddr_t bpos, dpos; 2124 int error = 0, wccflag = NFSV3_WCCRATTR; 2125 struct mbuf *mreq, *mrep, *md, *mb; 2126 int v3 = NFS_ISV3(dvp); 2127 2128 if (dvp == vp) 2129 return (EINVAL); 2130 nfsstats.rpccnt[NFSPROC_RMDIR]++; 2131 mreq = nfsm_reqhead(dvp, NFSPROC_RMDIR, 2132 NFSX_FH(v3) + NFSX_UNSIGNED + nfsm_rndup(cnp->cn_namelen)); 2133 mb = mreq; 2134 bpos = mtod(mb, caddr_t); 2135 nfsm_fhtom(dvp, v3); 2136 nfsm_strtom(cnp->cn_nameptr, cnp->cn_namelen, NFS_MAXNAMLEN); 2137 nfsm_request(dvp, NFSPROC_RMDIR, cnp->cn_thread, cnp->cn_cred); 2138 if (v3) 2139 nfsm_wcc_data(dvp, wccflag); 2140 m_freem(mrep); 2141nfsmout: 2142 mtx_lock(&(VTONFS(dvp))->n_mtx); 2143 VTONFS(dvp)->n_flag |= NMODIFIED; 2144 mtx_unlock(&(VTONFS(dvp))->n_mtx); 2145 if (!wccflag) { 2146 VTONFS(dvp)->n_attrstamp = 0; 2147 KDTRACE_NFS_ATTRCACHE_FLUSH_DONE(dvp); 2148 } 2149 cache_purge(dvp); 2150 cache_purge(vp); 2151 /* 2152 * Kludge: Map ENOENT => 0 assuming that you have a reply to a retry. 2153 */ 2154 if (error == ENOENT) 2155 error = 0; 2156 return (error); 2157} 2158 2159/* 2160 * nfs readdir call 2161 */ 2162static int 2163nfs_readdir(struct vop_readdir_args *ap) 2164{ 2165 struct vnode *vp = ap->a_vp; 2166 struct nfsnode *np = VTONFS(vp); 2167 struct uio *uio = ap->a_uio; 2168 int tresid, error = 0; 2169 struct vattr vattr; 2170 2171 if (vp->v_type != VDIR) 2172 return(EPERM); 2173 2174 /* 2175 * First, check for hit on the EOF offset cache 2176 */ 2177 if (np->n_direofoffset > 0 && uio->uio_offset >= np->n_direofoffset && 2178 (np->n_flag & NMODIFIED) == 0) { 2179 if (VOP_GETATTR(vp, &vattr, ap->a_cred) == 0) { 2180 mtx_lock(&np->n_mtx); 2181 if (!NFS_TIMESPEC_COMPARE(&np->n_mtime, &vattr.va_mtime)) { 2182 mtx_unlock(&np->n_mtx); 2183 nfsstats.direofcache_hits++; 2184 goto out; 2185 } else 2186 mtx_unlock(&np->n_mtx); 2187 } 2188 } 2189 2190 /* 2191 * Call nfs_bioread() to do the real work. 2192 */ 2193 tresid = uio->uio_resid; 2194 error = nfs_bioread(vp, uio, 0, ap->a_cred); 2195 2196 if (!error && uio->uio_resid == tresid) { 2197 nfsstats.direofcache_misses++; 2198 } 2199out: 2200 return (error); 2201} 2202 2203/* 2204 * Readdir rpc call. 2205 * Called from below the buffer cache by nfs_doio(). 2206 */ 2207int 2208nfs_readdirrpc(struct vnode *vp, struct uio *uiop, struct ucred *cred) 2209{ 2210 int len, left; 2211 struct dirent *dp = NULL; 2212 u_int32_t *tl; 2213 caddr_t cp; 2214 nfsuint64 *cookiep; 2215 caddr_t bpos, dpos; 2216 struct mbuf *mreq, *mrep, *md, *mb; 2217 nfsuint64 cookie; 2218 struct nfsmount *nmp = VFSTONFS(vp->v_mount); 2219 struct nfsnode *dnp = VTONFS(vp); 2220 u_quad_t fileno; 2221 int error = 0, tlen, more_dirs = 1, blksiz = 0, bigenough = 1; 2222 int attrflag; 2223 int v3 = NFS_ISV3(vp); 2224 2225#ifndef DIAGNOSTIC 2226 if (uiop->uio_iovcnt != 1 || (uiop->uio_offset & (DIRBLKSIZ - 1)) || 2227 (uiop->uio_resid & (DIRBLKSIZ - 1))) 2228 panic("nfs readdirrpc bad uio"); 2229#endif 2230 2231 /* 2232 * If there is no cookie, assume directory was stale. 2233 */ 2234 nfs_dircookie_lock(dnp); 2235 cookiep = nfs_getcookie(dnp, uiop->uio_offset, 0); 2236 if (cookiep) { 2237 cookie = *cookiep; 2238 nfs_dircookie_unlock(dnp); 2239 } else { 2240 nfs_dircookie_unlock(dnp); 2241 return (NFSERR_BAD_COOKIE); 2242 } 2243 2244 /* 2245 * Loop around doing readdir rpc's of size nm_readdirsize 2246 * truncated to a multiple of DIRBLKSIZ. 2247 * The stopping criteria is EOF or buffer full. 2248 */ 2249 while (more_dirs && bigenough) { 2250 nfsstats.rpccnt[NFSPROC_READDIR]++; 2251 mreq = nfsm_reqhead(vp, NFSPROC_READDIR, NFSX_FH(v3) + 2252 NFSX_READDIR(v3)); 2253 mb = mreq; 2254 bpos = mtod(mb, caddr_t); 2255 nfsm_fhtom(vp, v3); 2256 if (v3) { 2257 tl = nfsm_build(u_int32_t *, 5 * NFSX_UNSIGNED); 2258 *tl++ = cookie.nfsuquad[0]; 2259 *tl++ = cookie.nfsuquad[1]; 2260 mtx_lock(&dnp->n_mtx); 2261 *tl++ = dnp->n_cookieverf.nfsuquad[0]; 2262 *tl++ = dnp->n_cookieverf.nfsuquad[1]; 2263 mtx_unlock(&dnp->n_mtx); 2264 } else { 2265 tl = nfsm_build(u_int32_t *, 2 * NFSX_UNSIGNED); 2266 *tl++ = cookie.nfsuquad[0]; 2267 } 2268 *tl = txdr_unsigned(nmp->nm_readdirsize); 2269 nfsm_request(vp, NFSPROC_READDIR, uiop->uio_td, cred); 2270 if (v3) { 2271 nfsm_postop_attr(vp, attrflag); 2272 if (!error) { 2273 tl = nfsm_dissect(u_int32_t *, 2274 2 * NFSX_UNSIGNED); 2275 mtx_lock(&dnp->n_mtx); 2276 dnp->n_cookieverf.nfsuquad[0] = *tl++; 2277 dnp->n_cookieverf.nfsuquad[1] = *tl; 2278 mtx_unlock(&dnp->n_mtx); 2279 } else { 2280 m_freem(mrep); 2281 goto nfsmout; 2282 } 2283 } 2284 tl = nfsm_dissect(u_int32_t *, NFSX_UNSIGNED); 2285 more_dirs = fxdr_unsigned(int, *tl); 2286 2287 /* loop thru the dir entries, doctoring them to 4bsd form */ 2288 while (more_dirs && bigenough) { 2289 if (v3) { 2290 tl = nfsm_dissect(u_int32_t *, 2291 3 * NFSX_UNSIGNED); 2292 fileno = fxdr_hyper(tl); 2293 len = fxdr_unsigned(int, *(tl + 2)); 2294 } else { 2295 tl = nfsm_dissect(u_int32_t *, 2296 2 * NFSX_UNSIGNED); 2297 fileno = fxdr_unsigned(u_quad_t, *tl++); 2298 len = fxdr_unsigned(int, *tl); 2299 } 2300 if (len <= 0 || len > NFS_MAXNAMLEN) { 2301 error = EBADRPC; 2302 m_freem(mrep); 2303 goto nfsmout; 2304 } 2305 tlen = nfsm_rndup(len); 2306 if (tlen == len) 2307 tlen += 4; /* To ensure null termination */ 2308 left = DIRBLKSIZ - blksiz; 2309 if ((tlen + DIRHDSIZ) > left) { 2310 dp->d_reclen += left; 2311 uiop->uio_iov->iov_base = 2312 (char *)uiop->uio_iov->iov_base + left; 2313 uiop->uio_iov->iov_len -= left; 2314 uiop->uio_offset += left; 2315 uiop->uio_resid -= left; 2316 blksiz = 0; 2317 } 2318 if ((tlen + DIRHDSIZ) > uiop->uio_resid) 2319 bigenough = 0; 2320 if (bigenough) { 2321 dp = (struct dirent *)uiop->uio_iov->iov_base; 2322 dp->d_fileno = (int)fileno; 2323 dp->d_namlen = len; 2324 dp->d_reclen = tlen + DIRHDSIZ; 2325 dp->d_type = DT_UNKNOWN; 2326 blksiz += dp->d_reclen; 2327 if (blksiz == DIRBLKSIZ) 2328 blksiz = 0; 2329 uiop->uio_offset += DIRHDSIZ; 2330 uiop->uio_resid -= DIRHDSIZ; 2331 uiop->uio_iov->iov_base = 2332 (char *)uiop->uio_iov->iov_base + DIRHDSIZ; 2333 uiop->uio_iov->iov_len -= DIRHDSIZ; 2334 nfsm_mtouio(uiop, len); 2335 cp = uiop->uio_iov->iov_base; 2336 tlen -= len; 2337 *cp = '\0'; /* null terminate */ 2338 uiop->uio_iov->iov_base = 2339 (char *)uiop->uio_iov->iov_base + tlen; 2340 uiop->uio_iov->iov_len -= tlen; 2341 uiop->uio_offset += tlen; 2342 uiop->uio_resid -= tlen; 2343 } else 2344 nfsm_adv(nfsm_rndup(len)); 2345 if (v3) { 2346 tl = nfsm_dissect(u_int32_t *, 2347 3 * NFSX_UNSIGNED); 2348 } else { 2349 tl = nfsm_dissect(u_int32_t *, 2350 2 * NFSX_UNSIGNED); 2351 } 2352 if (bigenough) { 2353 cookie.nfsuquad[0] = *tl++; 2354 if (v3) 2355 cookie.nfsuquad[1] = *tl++; 2356 } else if (v3) 2357 tl += 2; 2358 else 2359 tl++; 2360 more_dirs = fxdr_unsigned(int, *tl); 2361 } 2362 /* 2363 * If at end of rpc data, get the eof boolean 2364 */ 2365 if (!more_dirs) { 2366 tl = nfsm_dissect(u_int32_t *, NFSX_UNSIGNED); 2367 more_dirs = (fxdr_unsigned(int, *tl) == 0); 2368 } 2369 m_freem(mrep); 2370 } 2371 /* 2372 * Fill last record, iff any, out to a multiple of DIRBLKSIZ 2373 * by increasing d_reclen for the last record. 2374 */ 2375 if (blksiz > 0) { 2376 left = DIRBLKSIZ - blksiz; 2377 dp->d_reclen += left; 2378 uiop->uio_iov->iov_base = 2379 (char *)uiop->uio_iov->iov_base + left; 2380 uiop->uio_iov->iov_len -= left; 2381 uiop->uio_offset += left; 2382 uiop->uio_resid -= left; 2383 } 2384 2385 /* 2386 * We are now either at the end of the directory or have filled the 2387 * block. 2388 */ 2389 if (bigenough) 2390 dnp->n_direofoffset = uiop->uio_offset; 2391 else { 2392 if (uiop->uio_resid > 0) 2393 nfs_printf("EEK! readdirrpc resid > 0\n"); 2394 nfs_dircookie_lock(dnp); 2395 cookiep = nfs_getcookie(dnp, uiop->uio_offset, 1); 2396 *cookiep = cookie; 2397 nfs_dircookie_unlock(dnp); 2398 } 2399nfsmout: 2400 return (error); 2401} 2402 2403/* 2404 * NFS V3 readdir plus RPC. Used in place of nfs_readdirrpc(). 2405 */ 2406int 2407nfs_readdirplusrpc(struct vnode *vp, struct uio *uiop, struct ucred *cred) 2408{ 2409 int len, left; 2410 struct dirent *dp; 2411 u_int32_t *tl; 2412 caddr_t cp; 2413 struct vnode *newvp; 2414 nfsuint64 *cookiep; 2415 caddr_t bpos, dpos, dpossav1, dpossav2; 2416 struct mbuf *mreq, *mrep, *md, *mb, *mdsav1, *mdsav2; 2417 struct nameidata nami, *ndp = &nami; 2418 struct componentname *cnp = &ndp->ni_cnd; 2419 nfsuint64 cookie; 2420 struct nfsmount *nmp = VFSTONFS(vp->v_mount); 2421 struct nfsnode *dnp = VTONFS(vp), *np; 2422 nfsfh_t *fhp; 2423 u_quad_t fileno; 2424 int error = 0, tlen, more_dirs = 1, blksiz = 0, doit, bigenough = 1, i; 2425 int attrflag, fhsize; 2426 2427#ifndef nolint 2428 dp = NULL; 2429#endif 2430#ifndef DIAGNOSTIC 2431 if (uiop->uio_iovcnt != 1 || (uiop->uio_offset & (DIRBLKSIZ - 1)) || 2432 (uiop->uio_resid & (DIRBLKSIZ - 1))) 2433 panic("nfs readdirplusrpc bad uio"); 2434#endif 2435 ndp->ni_dvp = vp; 2436 newvp = NULLVP; 2437 2438 /* 2439 * If there is no cookie, assume directory was stale. 2440 */ 2441 nfs_dircookie_lock(dnp); 2442 cookiep = nfs_getcookie(dnp, uiop->uio_offset, 0); 2443 if (cookiep) { 2444 cookie = *cookiep; 2445 nfs_dircookie_unlock(dnp); 2446 } else { 2447 nfs_dircookie_unlock(dnp); 2448 return (NFSERR_BAD_COOKIE); 2449 } 2450 /* 2451 * Loop around doing readdir rpc's of size nm_readdirsize 2452 * truncated to a multiple of DIRBLKSIZ. 2453 * The stopping criteria is EOF or buffer full. 2454 */ 2455 while (more_dirs && bigenough) { 2456 nfsstats.rpccnt[NFSPROC_READDIRPLUS]++; 2457 mreq = nfsm_reqhead(vp, NFSPROC_READDIRPLUS, 2458 NFSX_FH(1) + 6 * NFSX_UNSIGNED); 2459 mb = mreq; 2460 bpos = mtod(mb, caddr_t); 2461 nfsm_fhtom(vp, 1); 2462 tl = nfsm_build(u_int32_t *, 6 * NFSX_UNSIGNED); 2463 *tl++ = cookie.nfsuquad[0]; 2464 *tl++ = cookie.nfsuquad[1]; 2465 mtx_lock(&dnp->n_mtx); 2466 *tl++ = dnp->n_cookieverf.nfsuquad[0]; 2467 *tl++ = dnp->n_cookieverf.nfsuquad[1]; 2468 mtx_unlock(&dnp->n_mtx); 2469 *tl++ = txdr_unsigned(nmp->nm_readdirsize); 2470 *tl = txdr_unsigned(nmp->nm_rsize); 2471 nfsm_request(vp, NFSPROC_READDIRPLUS, uiop->uio_td, cred); 2472 nfsm_postop_attr(vp, attrflag); 2473 if (error) { 2474 m_freem(mrep); 2475 goto nfsmout; 2476 } 2477 tl = nfsm_dissect(u_int32_t *, 3 * NFSX_UNSIGNED); 2478 mtx_lock(&dnp->n_mtx); 2479 dnp->n_cookieverf.nfsuquad[0] = *tl++; 2480 dnp->n_cookieverf.nfsuquad[1] = *tl++; 2481 mtx_unlock(&dnp->n_mtx); 2482 more_dirs = fxdr_unsigned(int, *tl); 2483 2484 /* loop thru the dir entries, doctoring them to 4bsd form */ 2485 while (more_dirs && bigenough) { 2486 tl = nfsm_dissect(u_int32_t *, 3 * NFSX_UNSIGNED); 2487 fileno = fxdr_hyper(tl); 2488 len = fxdr_unsigned(int, *(tl + 2)); 2489 if (len <= 0 || len > NFS_MAXNAMLEN) { 2490 error = EBADRPC; 2491 m_freem(mrep); 2492 goto nfsmout; 2493 } 2494 tlen = nfsm_rndup(len); 2495 if (tlen == len) 2496 tlen += 4; /* To ensure null termination*/ 2497 left = DIRBLKSIZ - blksiz; 2498 if ((tlen + DIRHDSIZ) > left) { 2499 dp->d_reclen += left; 2500 uiop->uio_iov->iov_base = 2501 (char *)uiop->uio_iov->iov_base + left; 2502 uiop->uio_iov->iov_len -= left; 2503 uiop->uio_offset += left; 2504 uiop->uio_resid -= left; 2505 blksiz = 0; 2506 } 2507 if ((tlen + DIRHDSIZ) > uiop->uio_resid) 2508 bigenough = 0; 2509 if (bigenough) { 2510 dp = (struct dirent *)uiop->uio_iov->iov_base; 2511 dp->d_fileno = (int)fileno; 2512 dp->d_namlen = len; 2513 dp->d_reclen = tlen + DIRHDSIZ; 2514 dp->d_type = DT_UNKNOWN; 2515 blksiz += dp->d_reclen; 2516 if (blksiz == DIRBLKSIZ) 2517 blksiz = 0; 2518 uiop->uio_offset += DIRHDSIZ; 2519 uiop->uio_resid -= DIRHDSIZ; 2520 uiop->uio_iov->iov_base = 2521 (char *)uiop->uio_iov->iov_base + DIRHDSIZ; 2522 uiop->uio_iov->iov_len -= DIRHDSIZ; 2523 cnp->cn_nameptr = uiop->uio_iov->iov_base; 2524 cnp->cn_namelen = len; 2525 nfsm_mtouio(uiop, len); 2526 cp = uiop->uio_iov->iov_base; 2527 tlen -= len; 2528 *cp = '\0'; 2529 uiop->uio_iov->iov_base = 2530 (char *)uiop->uio_iov->iov_base + tlen; 2531 uiop->uio_iov->iov_len -= tlen; 2532 uiop->uio_offset += tlen; 2533 uiop->uio_resid -= tlen; 2534 } else 2535 nfsm_adv(nfsm_rndup(len)); 2536 tl = nfsm_dissect(u_int32_t *, 3 * NFSX_UNSIGNED); 2537 if (bigenough) { 2538 cookie.nfsuquad[0] = *tl++; 2539 cookie.nfsuquad[1] = *tl++; 2540 } else 2541 tl += 2; 2542 2543 /* 2544 * Since the attributes are before the file handle 2545 * (sigh), we must skip over the attributes and then 2546 * come back and get them. 2547 */ 2548 attrflag = fxdr_unsigned(int, *tl); 2549 if (attrflag) { 2550 dpossav1 = dpos; 2551 mdsav1 = md; 2552 nfsm_adv(NFSX_V3FATTR); 2553 tl = nfsm_dissect(u_int32_t *, NFSX_UNSIGNED); 2554 doit = fxdr_unsigned(int, *tl); 2555 /* 2556 * Skip loading the attrs for "..". There's a 2557 * race between loading the attrs here and 2558 * lookups that look for the directory currently 2559 * being read (in the parent). We try to acquire 2560 * the exclusive lock on ".." here, owning the 2561 * lock on the directory being read. Lookup will 2562 * hold the lock on ".." and try to acquire the 2563 * lock on the directory being read. 2564 * 2565 * There are other ways of fixing this, one would 2566 * be to do a trylock on the ".." vnode and skip 2567 * loading the attrs on ".." if it happens to be 2568 * locked by another process. But skipping the 2569 * attrload on ".." seems the easiest option. 2570 */ 2571 if (strcmp(dp->d_name, "..") == 0) { 2572 doit = 0; 2573 /* 2574 * We've already skipped over the attrs, 2575 * skip over the filehandle. And store d_type 2576 * as VDIR. 2577 */ 2578 tl = nfsm_dissect(u_int32_t *, NFSX_UNSIGNED); 2579 i = fxdr_unsigned(int, *tl); 2580 nfsm_adv(nfsm_rndup(i)); 2581 dp->d_type = IFTODT(VTTOIF(VDIR)); 2582 } 2583 if (doit) { 2584 nfsm_getfh(fhp, fhsize, 1); 2585 if (NFS_CMPFH(dnp, fhp, fhsize)) { 2586 VREF(vp); 2587 newvp = vp; 2588 np = dnp; 2589 } else { 2590 error = nfs_nget(vp->v_mount, fhp, 2591 fhsize, &np, LK_EXCLUSIVE); 2592 if (error) 2593 doit = 0; 2594 else 2595 newvp = NFSTOV(np); 2596 } 2597 } 2598 if (doit && bigenough) { 2599 dpossav2 = dpos; 2600 dpos = dpossav1; 2601 mdsav2 = md; 2602 md = mdsav1; 2603 nfsm_loadattr(newvp, NULL); 2604 dpos = dpossav2; 2605 md = mdsav2; 2606 dp->d_type = 2607 IFTODT(VTTOIF(np->n_vattr.va_type)); 2608 ndp->ni_vp = newvp; 2609 /* Update n_ctime, so subsequent lookup doesn't purge entry */ 2610 np->n_ctime = np->n_vattr.va_ctime.tv_sec; 2611 cache_enter(ndp->ni_dvp, ndp->ni_vp, cnp); 2612 } 2613 } else { 2614 /* Just skip over the file handle */ 2615 tl = nfsm_dissect(u_int32_t *, NFSX_UNSIGNED); 2616 i = fxdr_unsigned(int, *tl); 2617 if (i) { 2618 tl = nfsm_dissect(u_int32_t *, NFSX_UNSIGNED); 2619 fhsize = fxdr_unsigned(int, *tl); 2620 nfsm_adv(nfsm_rndup(fhsize)); 2621 } 2622 } 2623 if (newvp != NULLVP) { 2624 if (newvp == vp) 2625 vrele(newvp); 2626 else 2627 vput(newvp); 2628 newvp = NULLVP; 2629 } 2630 tl = nfsm_dissect(u_int32_t *, NFSX_UNSIGNED); 2631 more_dirs = fxdr_unsigned(int, *tl); 2632 } 2633 /* 2634 * If at end of rpc data, get the eof boolean 2635 */ 2636 if (!more_dirs) { 2637 tl = nfsm_dissect(u_int32_t *, NFSX_UNSIGNED); 2638 more_dirs = (fxdr_unsigned(int, *tl) == 0); 2639 } 2640 m_freem(mrep); 2641 } 2642 /* 2643 * Fill last record, iff any, out to a multiple of DIRBLKSIZ 2644 * by increasing d_reclen for the last record. 2645 */ 2646 if (blksiz > 0) { 2647 left = DIRBLKSIZ - blksiz; 2648 dp->d_reclen += left; 2649 uiop->uio_iov->iov_base = 2650 (char *)uiop->uio_iov->iov_base + left; 2651 uiop->uio_iov->iov_len -= left; 2652 uiop->uio_offset += left; 2653 uiop->uio_resid -= left; 2654 } 2655 2656 /* 2657 * We are now either at the end of the directory or have filled the 2658 * block. 2659 */ 2660 if (bigenough) 2661 dnp->n_direofoffset = uiop->uio_offset; 2662 else { 2663 if (uiop->uio_resid > 0) 2664 nfs_printf("EEK! readdirplusrpc resid > 0\n"); 2665 nfs_dircookie_lock(dnp); 2666 cookiep = nfs_getcookie(dnp, uiop->uio_offset, 1); 2667 *cookiep = cookie; 2668 nfs_dircookie_unlock(dnp); 2669 } 2670nfsmout: 2671 if (newvp != NULLVP) { 2672 if (newvp == vp) 2673 vrele(newvp); 2674 else 2675 vput(newvp); 2676 newvp = NULLVP; 2677 } 2678 return (error); 2679} 2680 2681/* 2682 * Silly rename. To make the NFS filesystem that is stateless look a little 2683 * more like the "ufs" a remove of an active vnode is translated to a rename 2684 * to a funny looking filename that is removed by nfs_inactive on the 2685 * nfsnode. There is the potential for another process on a different client 2686 * to create the same funny name between the nfs_lookitup() fails and the 2687 * nfs_rename() completes, but... 2688 */ 2689static int 2690nfs_sillyrename(struct vnode *dvp, struct vnode *vp, struct componentname *cnp) 2691{ 2692 struct sillyrename *sp; 2693 struct nfsnode *np; 2694 int error; 2695 short pid; 2696 unsigned int lticks; 2697 2698 cache_purge(dvp); 2699 np = VTONFS(vp); 2700#ifndef DIAGNOSTIC 2701 if (vp->v_type == VDIR) 2702 panic("nfs: sillyrename dir"); 2703#endif 2704 sp = malloc(sizeof (struct sillyrename), 2705 M_NFSREQ, M_WAITOK); 2706 sp->s_cred = crhold(cnp->cn_cred); 2707 sp->s_dvp = dvp; 2708 sp->s_removeit = nfs_removeit; 2709 VREF(dvp); 2710 2711 /* 2712 * Fudge together a funny name. 2713 * Changing the format of the funny name to accomodate more 2714 * sillynames per directory. 2715 * The name is now changed to .nfs.<ticks>.<pid>.4, where ticks is 2716 * CPU ticks since boot. 2717 */ 2718 pid = cnp->cn_thread->td_proc->p_pid; 2719 lticks = (unsigned int)ticks; 2720 for ( ; ; ) { 2721 sp->s_namlen = sprintf(sp->s_name, 2722 ".nfs.%08x.%04x4.4", lticks, 2723 pid); 2724 if (nfs_lookitup(dvp, sp->s_name, sp->s_namlen, sp->s_cred, 2725 cnp->cn_thread, NULL)) 2726 break; 2727 lticks++; 2728 } 2729 error = nfs_renameit(dvp, cnp, sp); 2730 if (error) 2731 goto bad; 2732 error = nfs_lookitup(dvp, sp->s_name, sp->s_namlen, sp->s_cred, 2733 cnp->cn_thread, &np); 2734 np->n_sillyrename = sp; 2735 return (0); 2736bad: 2737 vrele(sp->s_dvp); 2738 crfree(sp->s_cred); 2739 free((caddr_t)sp, M_NFSREQ); 2740 return (error); 2741} 2742 2743/* 2744 * Look up a file name and optionally either update the file handle or 2745 * allocate an nfsnode, depending on the value of npp. 2746 * npp == NULL --> just do the lookup 2747 * *npp == NULL --> allocate a new nfsnode and make sure attributes are 2748 * handled too 2749 * *npp != NULL --> update the file handle in the vnode 2750 */ 2751static int 2752nfs_lookitup(struct vnode *dvp, const char *name, int len, struct ucred *cred, 2753 struct thread *td, struct nfsnode **npp) 2754{ 2755 struct vnode *newvp = NULL; 2756 struct nfsnode *np, *dnp = VTONFS(dvp); 2757 caddr_t bpos, dpos; 2758 int error = 0, fhlen, attrflag; 2759 struct mbuf *mreq, *mrep, *md, *mb; 2760 nfsfh_t *nfhp; 2761 int v3 = NFS_ISV3(dvp); 2762 2763 nfsstats.rpccnt[NFSPROC_LOOKUP]++; 2764 mreq = nfsm_reqhead(dvp, NFSPROC_LOOKUP, 2765 NFSX_FH(v3) + NFSX_UNSIGNED + nfsm_rndup(len)); 2766 mb = mreq; 2767 bpos = mtod(mb, caddr_t); 2768 nfsm_fhtom(dvp, v3); 2769 nfsm_strtom(name, len, NFS_MAXNAMLEN); 2770 nfsm_request(dvp, NFSPROC_LOOKUP, td, cred); 2771 if (npp && !error) { 2772 nfsm_getfh(nfhp, fhlen, v3); 2773 if (*npp) { 2774 np = *npp; 2775 if (np->n_fhsize > NFS_SMALLFH && fhlen <= NFS_SMALLFH) { 2776 free((caddr_t)np->n_fhp, M_NFSBIGFH); 2777 np->n_fhp = &np->n_fh; 2778 } else if (np->n_fhsize <= NFS_SMALLFH && fhlen>NFS_SMALLFH) 2779 np->n_fhp =(nfsfh_t *)malloc(fhlen, M_NFSBIGFH, M_WAITOK); 2780 bcopy((caddr_t)nfhp, (caddr_t)np->n_fhp, fhlen); 2781 np->n_fhsize = fhlen; 2782 newvp = NFSTOV(np); 2783 } else if (NFS_CMPFH(dnp, nfhp, fhlen)) { 2784 VREF(dvp); 2785 newvp = dvp; 2786 } else { 2787 error = nfs_nget(dvp->v_mount, nfhp, fhlen, &np, LK_EXCLUSIVE); 2788 if (error) { 2789 m_freem(mrep); 2790 return (error); 2791 } 2792 newvp = NFSTOV(np); 2793 } 2794 if (v3) { 2795 nfsm_postop_attr(newvp, attrflag); 2796 if (!attrflag && *npp == NULL) { 2797 m_freem(mrep); 2798 if (newvp == dvp) 2799 vrele(newvp); 2800 else 2801 vput(newvp); 2802 return (ENOENT); 2803 } 2804 } else 2805 nfsm_loadattr(newvp, NULL); 2806 } 2807 m_freem(mrep); 2808nfsmout: 2809 if (npp && *npp == NULL) { 2810 if (error) { 2811 if (newvp) { 2812 if (newvp == dvp) 2813 vrele(newvp); 2814 else 2815 vput(newvp); 2816 } 2817 } else 2818 *npp = np; 2819 } 2820 return (error); 2821} 2822 2823/* 2824 * Nfs Version 3 commit rpc 2825 */ 2826int 2827nfs_commit(struct vnode *vp, u_quad_t offset, int cnt, struct ucred *cred, 2828 struct thread *td) 2829{ 2830 u_int32_t *tl; 2831 struct nfsmount *nmp = VFSTONFS(vp->v_mount); 2832 caddr_t bpos, dpos; 2833 int error = 0, wccflag = NFSV3_WCCRATTR; 2834 struct mbuf *mreq, *mrep, *md, *mb; 2835 2836 mtx_lock(&nmp->nm_mtx); 2837 if ((nmp->nm_state & NFSSTA_HASWRITEVERF) == 0) { 2838 mtx_unlock(&nmp->nm_mtx); 2839 return (0); 2840 } 2841 mtx_unlock(&nmp->nm_mtx); 2842 nfsstats.rpccnt[NFSPROC_COMMIT]++; 2843 mreq = nfsm_reqhead(vp, NFSPROC_COMMIT, NFSX_FH(1)); 2844 mb = mreq; 2845 bpos = mtod(mb, caddr_t); 2846 nfsm_fhtom(vp, 1); 2847 tl = nfsm_build(u_int32_t *, 3 * NFSX_UNSIGNED); 2848 txdr_hyper(offset, tl); 2849 tl += 2; 2850 *tl = txdr_unsigned(cnt); 2851 nfsm_request(vp, NFSPROC_COMMIT, td, cred); 2852 nfsm_wcc_data(vp, wccflag); 2853 if (!error) { 2854 tl = nfsm_dissect(u_int32_t *, NFSX_V3WRITEVERF); 2855 if (bcmp((caddr_t)nmp->nm_verf, (caddr_t)tl, 2856 NFSX_V3WRITEVERF)) { 2857 bcopy((caddr_t)tl, (caddr_t)nmp->nm_verf, 2858 NFSX_V3WRITEVERF); 2859 error = NFSERR_STALEWRITEVERF; 2860 } 2861 } 2862 m_freem(mrep); 2863nfsmout: 2864 return (error); 2865} 2866 2867/* 2868 * Strategy routine. 2869 * For async requests when nfsiod(s) are running, queue the request by 2870 * calling nfs_asyncio(), otherwise just all nfs_doio() to do the 2871 * request. 2872 */ 2873static int 2874nfs_strategy(struct vop_strategy_args *ap) 2875{ 2876 struct buf *bp = ap->a_bp; 2877 struct ucred *cr; 2878 2879 KASSERT(!(bp->b_flags & B_DONE), 2880 ("nfs_strategy: buffer %p unexpectedly marked B_DONE", bp)); 2881 BUF_ASSERT_HELD(bp); 2882 2883 if (bp->b_iocmd == BIO_READ) 2884 cr = bp->b_rcred; 2885 else 2886 cr = bp->b_wcred; 2887 2888 /* 2889 * If the op is asynchronous and an i/o daemon is waiting 2890 * queue the request, wake it up and wait for completion 2891 * otherwise just do it ourselves. 2892 */ 2893 if ((bp->b_flags & B_ASYNC) == 0 || 2894 nfs_asyncio(VFSTONFS(ap->a_vp->v_mount), bp, NOCRED, curthread)) 2895 (void)nfs_doio(ap->a_vp, bp, cr, curthread); 2896 return (0); 2897} 2898 2899/* 2900 * fsync vnode op. Just call nfs_flush() with commit == 1. 2901 */ 2902/* ARGSUSED */ 2903static int 2904nfs_fsync(struct vop_fsync_args *ap) 2905{ 2906 2907 return (nfs_flush(ap->a_vp, ap->a_waitfor, 1)); 2908} 2909 2910/* 2911 * Flush all the blocks associated with a vnode. 2912 * Walk through the buffer pool and push any dirty pages 2913 * associated with the vnode. 2914 */ 2915static int 2916nfs_flush(struct vnode *vp, int waitfor, int commit) 2917{ 2918 struct nfsnode *np = VTONFS(vp); 2919 struct buf *bp; 2920 int i; 2921 struct buf *nbp; 2922 struct nfsmount *nmp = VFSTONFS(vp->v_mount); 2923 int error = 0, slptimeo = 0, slpflag = 0, retv, bvecpos; 2924 int passone = 1; 2925 u_quad_t off, endoff, toff; 2926 struct ucred* wcred = NULL; 2927 struct buf **bvec = NULL; 2928 struct bufobj *bo; 2929 struct thread *td = curthread; 2930#ifndef NFS_COMMITBVECSIZ 2931#define NFS_COMMITBVECSIZ 20 2932#endif 2933 struct buf *bvec_on_stack[NFS_COMMITBVECSIZ]; 2934 int bvecsize = 0, bveccount; 2935 2936 if (nmp->nm_flag & NFSMNT_INT) 2937 slpflag = PCATCH; 2938 if (!commit) 2939 passone = 0; 2940 bo = &vp->v_bufobj; 2941 /* 2942 * A b_flags == (B_DELWRI | B_NEEDCOMMIT) block has been written to the 2943 * server, but has not been committed to stable storage on the server 2944 * yet. On the first pass, the byte range is worked out and the commit 2945 * rpc is done. On the second pass, nfs_writebp() is called to do the 2946 * job. 2947 */ 2948again: 2949 off = (u_quad_t)-1; 2950 endoff = 0; 2951 bvecpos = 0; 2952 if (NFS_ISV3(vp) && commit) { 2953 if (bvec != NULL && bvec != bvec_on_stack) 2954 free(bvec, M_TEMP); 2955 /* 2956 * Count up how many buffers waiting for a commit. 2957 */ 2958 bveccount = 0; 2959 BO_LOCK(bo); 2960 TAILQ_FOREACH_SAFE(bp, &bo->bo_dirty.bv_hd, b_bobufs, nbp) { 2961 if (!BUF_ISLOCKED(bp) && 2962 (bp->b_flags & (B_DELWRI | B_NEEDCOMMIT)) 2963 == (B_DELWRI | B_NEEDCOMMIT)) 2964 bveccount++; 2965 } 2966 /* 2967 * Allocate space to remember the list of bufs to commit. It is 2968 * important to use M_NOWAIT here to avoid a race with nfs_write. 2969 * If we can't get memory (for whatever reason), we will end up 2970 * committing the buffers one-by-one in the loop below. 2971 */ 2972 if (bveccount > NFS_COMMITBVECSIZ) { 2973 /* 2974 * Release the vnode interlock to avoid a lock 2975 * order reversal. 2976 */ 2977 BO_UNLOCK(bo); 2978 bvec = (struct buf **) 2979 malloc(bveccount * sizeof(struct buf *), 2980 M_TEMP, M_NOWAIT); 2981 BO_LOCK(bo); 2982 if (bvec == NULL) { 2983 bvec = bvec_on_stack; 2984 bvecsize = NFS_COMMITBVECSIZ; 2985 } else 2986 bvecsize = bveccount; 2987 } else { 2988 bvec = bvec_on_stack; 2989 bvecsize = NFS_COMMITBVECSIZ; 2990 } 2991 TAILQ_FOREACH_SAFE(bp, &bo->bo_dirty.bv_hd, b_bobufs, nbp) { 2992 if (bvecpos >= bvecsize) 2993 break; 2994 if (BUF_LOCK(bp, LK_EXCLUSIVE | LK_NOWAIT, NULL)) { 2995 nbp = TAILQ_NEXT(bp, b_bobufs); 2996 continue; 2997 } 2998 if ((bp->b_flags & (B_DELWRI | B_NEEDCOMMIT)) != 2999 (B_DELWRI | B_NEEDCOMMIT)) { 3000 BUF_UNLOCK(bp); 3001 nbp = TAILQ_NEXT(bp, b_bobufs); 3002 continue; 3003 } 3004 BO_UNLOCK(bo); 3005 bremfree(bp); 3006 /* 3007 * Work out if all buffers are using the same cred 3008 * so we can deal with them all with one commit. 3009 * 3010 * NOTE: we are not clearing B_DONE here, so we have 3011 * to do it later on in this routine if we intend to 3012 * initiate I/O on the bp. 3013 * 3014 * Note: to avoid loopback deadlocks, we do not 3015 * assign b_runningbufspace. 3016 */ 3017 if (wcred == NULL) 3018 wcred = bp->b_wcred; 3019 else if (wcred != bp->b_wcred) 3020 wcred = NOCRED; 3021 vfs_busy_pages(bp, 1); 3022 3023 BO_LOCK(bo); 3024 /* 3025 * bp is protected by being locked, but nbp is not 3026 * and vfs_busy_pages() may sleep. We have to 3027 * recalculate nbp. 3028 */ 3029 nbp = TAILQ_NEXT(bp, b_bobufs); 3030 3031 /* 3032 * A list of these buffers is kept so that the 3033 * second loop knows which buffers have actually 3034 * been committed. This is necessary, since there 3035 * may be a race between the commit rpc and new 3036 * uncommitted writes on the file. 3037 */ 3038 bvec[bvecpos++] = bp; 3039 toff = ((u_quad_t)bp->b_blkno) * DEV_BSIZE + 3040 bp->b_dirtyoff; 3041 if (toff < off) 3042 off = toff; 3043 toff += (u_quad_t)(bp->b_dirtyend - bp->b_dirtyoff); 3044 if (toff > endoff) 3045 endoff = toff; 3046 } 3047 BO_UNLOCK(bo); 3048 } 3049 if (bvecpos > 0) { 3050 /* 3051 * Commit data on the server, as required. 3052 * If all bufs are using the same wcred, then use that with 3053 * one call for all of them, otherwise commit each one 3054 * separately. 3055 */ 3056 if (wcred != NOCRED) 3057 retv = nfs_commit(vp, off, (int)(endoff - off), 3058 wcred, td); 3059 else { 3060 retv = 0; 3061 for (i = 0; i < bvecpos; i++) { 3062 off_t off, size; 3063 bp = bvec[i]; 3064 off = ((u_quad_t)bp->b_blkno) * DEV_BSIZE + 3065 bp->b_dirtyoff; 3066 size = (u_quad_t)(bp->b_dirtyend 3067 - bp->b_dirtyoff); 3068 retv = nfs_commit(vp, off, (int)size, 3069 bp->b_wcred, td); 3070 if (retv) break; 3071 } 3072 } 3073 3074 if (retv == NFSERR_STALEWRITEVERF) 3075 nfs_clearcommit(vp->v_mount); 3076 3077 /* 3078 * Now, either mark the blocks I/O done or mark the 3079 * blocks dirty, depending on whether the commit 3080 * succeeded. 3081 */ 3082 for (i = 0; i < bvecpos; i++) { 3083 bp = bvec[i]; 3084 bp->b_flags &= ~(B_NEEDCOMMIT | B_CLUSTEROK); 3085 if (retv) { 3086 /* 3087 * Error, leave B_DELWRI intact 3088 */ 3089 vfs_unbusy_pages(bp); 3090 brelse(bp); 3091 } else { 3092 /* 3093 * Success, remove B_DELWRI ( bundirty() ). 3094 * 3095 * b_dirtyoff/b_dirtyend seem to be NFS 3096 * specific. We should probably move that 3097 * into bundirty(). XXX 3098 */ 3099 bufobj_wref(bo); 3100 bp->b_flags |= B_ASYNC; 3101 bundirty(bp); 3102 bp->b_flags &= ~B_DONE; 3103 bp->b_ioflags &= ~BIO_ERROR; 3104 bp->b_dirtyoff = bp->b_dirtyend = 0; 3105 bufdone(bp); 3106 } 3107 } 3108 } 3109 3110 /* 3111 * Start/do any write(s) that are required. 3112 */ 3113loop: 3114 BO_LOCK(bo); 3115 TAILQ_FOREACH_SAFE(bp, &bo->bo_dirty.bv_hd, b_bobufs, nbp) { 3116 if (BUF_LOCK(bp, LK_EXCLUSIVE | LK_NOWAIT, NULL)) { 3117 if (waitfor != MNT_WAIT || passone) 3118 continue; 3119 3120 error = BUF_TIMELOCK(bp, 3121 LK_EXCLUSIVE | LK_SLEEPFAIL | LK_INTERLOCK, 3122 BO_MTX(bo), "nfsfsync", slpflag, slptimeo); 3123 if (error == 0) { 3124 BUF_UNLOCK(bp); 3125 goto loop; 3126 } 3127 if (error == ENOLCK) { 3128 error = 0; 3129 goto loop; 3130 } 3131 if (nfs_sigintr(nmp, td)) { 3132 error = EINTR; 3133 goto done; 3134 } 3135 if (slpflag == PCATCH) { 3136 slpflag = 0; 3137 slptimeo = 2 * hz; 3138 } 3139 goto loop; 3140 } 3141 if ((bp->b_flags & B_DELWRI) == 0) 3142 panic("nfs_fsync: not dirty"); 3143 if ((passone || !commit) && (bp->b_flags & B_NEEDCOMMIT)) { 3144 BUF_UNLOCK(bp); 3145 continue; 3146 } 3147 BO_UNLOCK(bo); 3148 bremfree(bp); 3149 if (passone || !commit) 3150 bp->b_flags |= B_ASYNC; 3151 else 3152 bp->b_flags |= B_ASYNC; 3153 bwrite(bp); 3154 if (nfs_sigintr(nmp, td)) { 3155 error = EINTR; 3156 goto done; 3157 } 3158 goto loop; 3159 } 3160 if (passone) { 3161 passone = 0; 3162 BO_UNLOCK(bo); 3163 goto again; 3164 } 3165 if (waitfor == MNT_WAIT) { 3166 while (bo->bo_numoutput) { 3167 error = bufobj_wwait(bo, slpflag, slptimeo); 3168 if (error) { 3169 BO_UNLOCK(bo); 3170 error = nfs_sigintr(nmp, td); 3171 if (error) 3172 goto done; 3173 if (slpflag == PCATCH) { 3174 slpflag = 0; 3175 slptimeo = 2 * hz; 3176 } 3177 BO_LOCK(bo); 3178 } 3179 } 3180 if (bo->bo_dirty.bv_cnt != 0 && commit) { 3181 BO_UNLOCK(bo); 3182 goto loop; 3183 } 3184 /* 3185 * Wait for all the async IO requests to drain 3186 */ 3187 BO_UNLOCK(bo); 3188 mtx_lock(&np->n_mtx); 3189 while (np->n_directio_asyncwr > 0) { 3190 np->n_flag |= NFSYNCWAIT; 3191 error = nfs_msleep(td, (caddr_t)&np->n_directio_asyncwr, 3192 &np->n_mtx, slpflag | (PRIBIO + 1), 3193 "nfsfsync", 0); 3194 if (error) { 3195 if (nfs_sigintr(nmp, td)) { 3196 mtx_unlock(&np->n_mtx); 3197 error = EINTR; 3198 goto done; 3199 } 3200 } 3201 } 3202 mtx_unlock(&np->n_mtx); 3203 } else 3204 BO_UNLOCK(bo); 3205 mtx_lock(&np->n_mtx); 3206 if (np->n_flag & NWRITEERR) { 3207 error = np->n_error; 3208 np->n_flag &= ~NWRITEERR; 3209 } 3210 if (commit && bo->bo_dirty.bv_cnt == 0 && 3211 bo->bo_numoutput == 0 && np->n_directio_asyncwr == 0) 3212 np->n_flag &= ~NMODIFIED; 3213 mtx_unlock(&np->n_mtx); 3214done: 3215 if (bvec != NULL && bvec != bvec_on_stack) 3216 free(bvec, M_TEMP); 3217 return (error); 3218} 3219 3220/* 3221 * NFS advisory byte-level locks. 3222 */ 3223static int 3224nfs_advlock(struct vop_advlock_args *ap) 3225{ 3226 struct vnode *vp = ap->a_vp; 3227 u_quad_t size; 3228 int error; 3229 3230 error = vn_lock(vp, LK_SHARED); 3231 if (error) 3232 return (error); 3233 if ((VFSTONFS(vp->v_mount)->nm_flag & NFSMNT_NOLOCKD) != 0) { 3234 size = VTONFS(vp)->n_size; 3235 VOP_UNLOCK(vp, 0); 3236 error = lf_advlock(ap, &(vp->v_lockf), size); 3237 } else { 3238 if (nfs_advlock_p) 3239 error = nfs_advlock_p(ap); 3240 else 3241 error = ENOLCK; 3242 } 3243 3244 return (error); 3245} 3246 3247/* 3248 * NFS advisory byte-level locks. 3249 */ 3250static int 3251nfs_advlockasync(struct vop_advlockasync_args *ap) 3252{ 3253 struct vnode *vp = ap->a_vp; 3254 u_quad_t size; 3255 int error; 3256 3257 error = vn_lock(vp, LK_SHARED); 3258 if (error) 3259 return (error); 3260 if ((VFSTONFS(vp->v_mount)->nm_flag & NFSMNT_NOLOCKD) != 0) { 3261 size = VTONFS(vp)->n_size; 3262 VOP_UNLOCK(vp, 0); 3263 error = lf_advlockasync(ap, &(vp->v_lockf), size); 3264 } else { 3265 VOP_UNLOCK(vp, 0); 3266 error = EOPNOTSUPP; 3267 } 3268 return (error); 3269} 3270 3271/* 3272 * Print out the contents of an nfsnode. 3273 */ 3274static int 3275nfs_print(struct vop_print_args *ap) 3276{ 3277 struct vnode *vp = ap->a_vp; 3278 struct nfsnode *np = VTONFS(vp); 3279 3280 nfs_printf("\tfileid %ld fsid 0x%x", 3281 np->n_vattr.va_fileid, np->n_vattr.va_fsid); 3282 if (vp->v_type == VFIFO) 3283 fifo_printinfo(vp); 3284 printf("\n"); 3285 return (0); 3286} 3287 3288/* 3289 * This is the "real" nfs::bwrite(struct buf*). 3290 * We set B_CACHE if this is a VMIO buffer. 3291 */ 3292int 3293nfs_writebp(struct buf *bp, int force __unused, struct thread *td) 3294{ 3295 int s; 3296 int oldflags = bp->b_flags; 3297#if 0 3298 int retv = 1; 3299 off_t off; 3300#endif 3301 3302 BUF_ASSERT_HELD(bp); 3303 3304 if (bp->b_flags & B_INVAL) { 3305 brelse(bp); 3306 return(0); 3307 } 3308 3309 bp->b_flags |= B_CACHE; 3310 3311 /* 3312 * Undirty the bp. We will redirty it later if the I/O fails. 3313 */ 3314 3315 s = splbio(); 3316 bundirty(bp); 3317 bp->b_flags &= ~B_DONE; 3318 bp->b_ioflags &= ~BIO_ERROR; 3319 bp->b_iocmd = BIO_WRITE; 3320 3321 bufobj_wref(bp->b_bufobj); 3322 curthread->td_ru.ru_oublock++; 3323 splx(s); 3324 3325 /* 3326 * Note: to avoid loopback deadlocks, we do not 3327 * assign b_runningbufspace. 3328 */ 3329 vfs_busy_pages(bp, 1); 3330 3331 BUF_KERNPROC(bp); 3332 bp->b_iooffset = dbtob(bp->b_blkno); 3333 bstrategy(bp); 3334 3335 if( (oldflags & B_ASYNC) == 0) { 3336 int rtval = bufwait(bp); 3337 3338 if (oldflags & B_DELWRI) { 3339 s = splbio(); 3340 reassignbuf(bp); 3341 splx(s); 3342 } 3343 brelse(bp); 3344 return (rtval); 3345 } 3346 3347 return (0); 3348} 3349 3350/* 3351 * nfs special file access vnode op. 3352 * Essentially just get vattr and then imitate iaccess() since the device is 3353 * local to the client. 3354 */ 3355static int 3356nfsspec_access(struct vop_access_args *ap) 3357{ 3358 struct vattr *vap; 3359 struct ucred *cred = ap->a_cred; 3360 struct vnode *vp = ap->a_vp; 3361 accmode_t accmode = ap->a_accmode; 3362 struct vattr vattr; 3363 int error; 3364 3365 /* 3366 * Disallow write attempts on filesystems mounted read-only; 3367 * unless the file is a socket, fifo, or a block or character 3368 * device resident on the filesystem. 3369 */ 3370 if ((accmode & VWRITE) && (vp->v_mount->mnt_flag & MNT_RDONLY)) { 3371 switch (vp->v_type) { 3372 case VREG: 3373 case VDIR: 3374 case VLNK: 3375 return (EROFS); 3376 default: 3377 break; 3378 } 3379 } 3380 vap = &vattr; 3381 error = VOP_GETATTR(vp, vap, cred); 3382 if (error) 3383 goto out; 3384 error = vaccess(vp->v_type, vap->va_mode, vap->va_uid, vap->va_gid, 3385 accmode, cred, NULL); 3386out: 3387 return error; 3388} 3389 3390/* 3391 * Read wrapper for fifos. 3392 */ 3393static int 3394nfsfifo_read(struct vop_read_args *ap) 3395{ 3396 struct nfsnode *np = VTONFS(ap->a_vp); 3397 int error; 3398 3399 /* 3400 * Set access flag. 3401 */ 3402 mtx_lock(&np->n_mtx); 3403 np->n_flag |= NACC; 3404 getnanotime(&np->n_atim); 3405 mtx_unlock(&np->n_mtx); 3406 error = fifo_specops.vop_read(ap); 3407 return error; 3408} 3409 3410/* 3411 * Write wrapper for fifos. 3412 */ 3413static int 3414nfsfifo_write(struct vop_write_args *ap) 3415{ 3416 struct nfsnode *np = VTONFS(ap->a_vp); 3417 3418 /* 3419 * Set update flag. 3420 */ 3421 mtx_lock(&np->n_mtx); 3422 np->n_flag |= NUPD; 3423 getnanotime(&np->n_mtim); 3424 mtx_unlock(&np->n_mtx); 3425 return(fifo_specops.vop_write(ap)); 3426} 3427 3428/* 3429 * Close wrapper for fifos. 3430 * 3431 * Update the times on the nfsnode then do fifo close. 3432 */ 3433static int 3434nfsfifo_close(struct vop_close_args *ap) 3435{ 3436 struct vnode *vp = ap->a_vp; 3437 struct nfsnode *np = VTONFS(vp); 3438 struct vattr vattr; 3439 struct timespec ts; 3440 3441 mtx_lock(&np->n_mtx); 3442 if (np->n_flag & (NACC | NUPD)) { 3443 getnanotime(&ts); 3444 if (np->n_flag & NACC) 3445 np->n_atim = ts; 3446 if (np->n_flag & NUPD) 3447 np->n_mtim = ts; 3448 np->n_flag |= NCHG; 3449 if (vrefcnt(vp) == 1 && 3450 (vp->v_mount->mnt_flag & MNT_RDONLY) == 0) { 3451 VATTR_NULL(&vattr); 3452 if (np->n_flag & NACC) 3453 vattr.va_atime = np->n_atim; 3454 if (np->n_flag & NUPD) 3455 vattr.va_mtime = np->n_mtim; 3456 mtx_unlock(&np->n_mtx); 3457 (void)VOP_SETATTR(vp, &vattr, ap->a_cred); 3458 goto out; 3459 } 3460 } 3461 mtx_unlock(&np->n_mtx); 3462out: 3463 return (fifo_specops.vop_close(ap)); 3464} 3465 3466/* 3467 * Just call nfs_writebp() with the force argument set to 1. 3468 * 3469 * NOTE: B_DONE may or may not be set in a_bp on call. 3470 */ 3471static int 3472nfs_bwrite(struct buf *bp) 3473{ 3474 3475 return (nfs_writebp(bp, 1, curthread)); 3476} 3477 3478struct buf_ops buf_ops_nfs = { 3479 .bop_name = "buf_ops_nfs", 3480 .bop_write = nfs_bwrite, 3481 .bop_strategy = bufstrategy, 3482 .bop_sync = bufsync, 3483 .bop_bdflush = bufbdflush, 3484}; 3485