nfs_vnops.c revision 195699
1/*- 2 * Copyright (c) 1989, 1993 3 * The Regents of the University of California. All rights reserved. 4 * 5 * This code is derived from software contributed to Berkeley by 6 * Rick Macklem at The University of Guelph. 7 * 8 * Redistribution and use in source and binary forms, with or without 9 * modification, are permitted provided that the following conditions 10 * are met: 11 * 1. Redistributions of source code must retain the above copyright 12 * notice, this list of conditions and the following disclaimer. 13 * 2. Redistributions in binary form must reproduce the above copyright 14 * notice, this list of conditions and the following disclaimer in the 15 * documentation and/or other materials provided with the distribution. 16 * 4. Neither the name of the University nor the names of its contributors 17 * may be used to endorse or promote products derived from this software 18 * without specific prior written permission. 19 * 20 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 21 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 22 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 23 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 24 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 25 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 26 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 27 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 28 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 29 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 30 * SUCH DAMAGE. 31 * 32 * @(#)nfs_vnops.c 8.16 (Berkeley) 5/27/95 33 */ 34 35#include <sys/cdefs.h> 36__FBSDID("$FreeBSD: head/sys/nfsclient/nfs_vnops.c 195699 2009-07-14 22:48:30Z rwatson $"); 37 38/* 39 * vnode op calls for Sun NFS version 2 and 3 40 */ 41 42#include "opt_inet.h" 43#include "opt_kdtrace.h" 44 45#include <sys/param.h> 46#include <sys/kernel.h> 47#include <sys/systm.h> 48#include <sys/resourcevar.h> 49#include <sys/proc.h> 50#include <sys/mount.h> 51#include <sys/bio.h> 52#include <sys/buf.h> 53#include <sys/malloc.h> 54#include <sys/mbuf.h> 55#include <sys/namei.h> 56#include <sys/socket.h> 57#include <sys/vnode.h> 58#include <sys/dirent.h> 59#include <sys/fcntl.h> 60#include <sys/lockf.h> 61#include <sys/stat.h> 62#include <sys/sysctl.h> 63#include <sys/signalvar.h> 64#include <sys/vimage.h> 65 66#include <vm/vm.h> 67#include <vm/vm_object.h> 68#include <vm/vm_extern.h> 69#include <vm/vm_object.h> 70 71#include <fs/fifofs/fifo.h> 72 73#include <nfs/nfsproto.h> 74#include <nfsclient/nfs.h> 75#include <nfsclient/nfsnode.h> 76#include <nfsclient/nfsmount.h> 77#include <nfsclient/nfs_kdtrace.h> 78#include <nfsclient/nfs_lock.h> 79#include <nfs/xdr_subs.h> 80#include <nfsclient/nfsm_subs.h> 81 82#include <net/if.h> 83#include <netinet/in.h> 84#include <netinet/in_var.h> 85 86#include <machine/stdarg.h> 87 88#ifdef KDTRACE_HOOKS 89#include <sys/dtrace_bsd.h> 90 91dtrace_nfsclient_accesscache_flush_probe_func_t 92 dtrace_nfsclient_accesscache_flush_done_probe; 93uint32_t nfsclient_accesscache_flush_done_id; 94 95dtrace_nfsclient_accesscache_get_probe_func_t 96 dtrace_nfsclient_accesscache_get_hit_probe, 97 dtrace_nfsclient_accesscache_get_miss_probe; 98uint32_t nfsclient_accesscache_get_hit_id; 99uint32_t nfsclient_accesscache_get_miss_id; 100 101dtrace_nfsclient_accesscache_load_probe_func_t 102 dtrace_nfsclient_accesscache_load_done_probe; 103uint32_t nfsclient_accesscache_load_done_id; 104#endif /* !KDTRACE_HOOKS */ 105 106/* Defs */ 107#define TRUE 1 108#define FALSE 0 109 110/* 111 * Ifdef for FreeBSD-current merged buffer cache. It is unfortunate that these 112 * calls are not in getblk() and brelse() so that they would not be necessary 113 * here. 114 */ 115#ifndef B_VMIO 116#define vfs_busy_pages(bp, f) 117#endif 118 119static vop_read_t nfsfifo_read; 120static vop_write_t nfsfifo_write; 121static vop_close_t nfsfifo_close; 122static int nfs_flush(struct vnode *, int, int); 123static int nfs_setattrrpc(struct vnode *, struct vattr *, struct ucred *); 124static vop_lookup_t nfs_lookup; 125static vop_create_t nfs_create; 126static vop_mknod_t nfs_mknod; 127static vop_open_t nfs_open; 128static vop_close_t nfs_close; 129static vop_access_t nfs_access; 130static vop_getattr_t nfs_getattr; 131static vop_setattr_t nfs_setattr; 132static vop_read_t nfs_read; 133static vop_fsync_t nfs_fsync; 134static vop_remove_t nfs_remove; 135static vop_link_t nfs_link; 136static vop_rename_t nfs_rename; 137static vop_mkdir_t nfs_mkdir; 138static vop_rmdir_t nfs_rmdir; 139static vop_symlink_t nfs_symlink; 140static vop_readdir_t nfs_readdir; 141static vop_strategy_t nfs_strategy; 142static int nfs_lookitup(struct vnode *, const char *, int, 143 struct ucred *, struct thread *, struct nfsnode **); 144static int nfs_sillyrename(struct vnode *, struct vnode *, 145 struct componentname *); 146static vop_access_t nfsspec_access; 147static vop_readlink_t nfs_readlink; 148static vop_print_t nfs_print; 149static vop_advlock_t nfs_advlock; 150static vop_advlockasync_t nfs_advlockasync; 151 152/* 153 * Global vfs data structures for nfs 154 */ 155struct vop_vector nfs_vnodeops = { 156 .vop_default = &default_vnodeops, 157 .vop_access = nfs_access, 158 .vop_advlock = nfs_advlock, 159 .vop_advlockasync = nfs_advlockasync, 160 .vop_close = nfs_close, 161 .vop_create = nfs_create, 162 .vop_fsync = nfs_fsync, 163 .vop_getattr = nfs_getattr, 164 .vop_getpages = nfs_getpages, 165 .vop_putpages = nfs_putpages, 166 .vop_inactive = nfs_inactive, 167 .vop_link = nfs_link, 168 .vop_lookup = nfs_lookup, 169 .vop_mkdir = nfs_mkdir, 170 .vop_mknod = nfs_mknod, 171 .vop_open = nfs_open, 172 .vop_print = nfs_print, 173 .vop_read = nfs_read, 174 .vop_readdir = nfs_readdir, 175 .vop_readlink = nfs_readlink, 176 .vop_reclaim = nfs_reclaim, 177 .vop_remove = nfs_remove, 178 .vop_rename = nfs_rename, 179 .vop_rmdir = nfs_rmdir, 180 .vop_setattr = nfs_setattr, 181 .vop_strategy = nfs_strategy, 182 .vop_symlink = nfs_symlink, 183 .vop_write = nfs_write, 184}; 185 186struct vop_vector nfs_fifoops = { 187 .vop_default = &fifo_specops, 188 .vop_access = nfsspec_access, 189 .vop_close = nfsfifo_close, 190 .vop_fsync = nfs_fsync, 191 .vop_getattr = nfs_getattr, 192 .vop_inactive = nfs_inactive, 193 .vop_print = nfs_print, 194 .vop_read = nfsfifo_read, 195 .vop_reclaim = nfs_reclaim, 196 .vop_setattr = nfs_setattr, 197 .vop_write = nfsfifo_write, 198}; 199 200static int nfs_mknodrpc(struct vnode *dvp, struct vnode **vpp, 201 struct componentname *cnp, struct vattr *vap); 202static int nfs_removerpc(struct vnode *dvp, const char *name, int namelen, 203 struct ucred *cred, struct thread *td); 204static int nfs_renamerpc(struct vnode *fdvp, const char *fnameptr, 205 int fnamelen, struct vnode *tdvp, 206 const char *tnameptr, int tnamelen, 207 struct ucred *cred, struct thread *td); 208static int nfs_renameit(struct vnode *sdvp, struct componentname *scnp, 209 struct sillyrename *sp); 210 211/* 212 * Global variables 213 */ 214struct mtx nfs_iod_mtx; 215struct proc *nfs_iodwant[NFS_MAXASYNCDAEMON]; 216struct nfsmount *nfs_iodmount[NFS_MAXASYNCDAEMON]; 217int nfs_numasync = 0; 218vop_advlock_t *nfs_advlock_p = nfs_dolock; 219vop_reclaim_t *nfs_reclaim_p = NULL; 220#define DIRHDSIZ (sizeof (struct dirent) - (MAXNAMLEN + 1)) 221 222SYSCTL_DECL(_vfs_nfs); 223 224static int nfsaccess_cache_timeout = NFS_MAXATTRTIMO; 225SYSCTL_INT(_vfs_nfs, OID_AUTO, access_cache_timeout, CTLFLAG_RW, 226 &nfsaccess_cache_timeout, 0, "NFS ACCESS cache timeout"); 227 228static int nfs_prime_access_cache = 0; 229SYSCTL_INT(_vfs_nfs, OID_AUTO, prime_access_cache, CTLFLAG_RW, 230 &nfs_prime_access_cache, 0, 231 "Prime NFS ACCESS cache when fetching attributes"); 232 233static int nfsv3_commit_on_close = 0; 234SYSCTL_INT(_vfs_nfs, OID_AUTO, nfsv3_commit_on_close, CTLFLAG_RW, 235 &nfsv3_commit_on_close, 0, "write+commit on close, else only write"); 236 237static int nfs_clean_pages_on_close = 1; 238SYSCTL_INT(_vfs_nfs, OID_AUTO, clean_pages_on_close, CTLFLAG_RW, 239 &nfs_clean_pages_on_close, 0, "NFS clean dirty pages on close"); 240 241int nfs_directio_enable = 0; 242SYSCTL_INT(_vfs_nfs, OID_AUTO, nfs_directio_enable, CTLFLAG_RW, 243 &nfs_directio_enable, 0, "Enable NFS directio"); 244 245/* 246 * This sysctl allows other processes to mmap a file that has been opened 247 * O_DIRECT by a process. In general, having processes mmap the file while 248 * Direct IO is in progress can lead to Data Inconsistencies. But, we allow 249 * this by default to prevent DoS attacks - to prevent a malicious user from 250 * opening up files O_DIRECT preventing other users from mmap'ing these 251 * files. "Protected" environments where stricter consistency guarantees are 252 * required can disable this knob. The process that opened the file O_DIRECT 253 * cannot mmap() the file, because mmap'ed IO on an O_DIRECT open() is not 254 * meaningful. 255 */ 256int nfs_directio_allow_mmap = 1; 257SYSCTL_INT(_vfs_nfs, OID_AUTO, nfs_directio_allow_mmap, CTLFLAG_RW, 258 &nfs_directio_allow_mmap, 0, "Enable mmaped IO on file with O_DIRECT opens"); 259 260#if 0 261SYSCTL_INT(_vfs_nfs, OID_AUTO, access_cache_hits, CTLFLAG_RD, 262 &nfsstats.accesscache_hits, 0, "NFS ACCESS cache hit count"); 263 264SYSCTL_INT(_vfs_nfs, OID_AUTO, access_cache_misses, CTLFLAG_RD, 265 &nfsstats.accesscache_misses, 0, "NFS ACCESS cache miss count"); 266#endif 267 268#define NFSV3ACCESS_ALL (NFSV3ACCESS_READ | NFSV3ACCESS_MODIFY \ 269 | NFSV3ACCESS_EXTEND | NFSV3ACCESS_EXECUTE \ 270 | NFSV3ACCESS_DELETE | NFSV3ACCESS_LOOKUP) 271 272/* 273 * SMP Locking Note : 274 * The list of locks after the description of the lock is the ordering 275 * of other locks acquired with the lock held. 276 * np->n_mtx : Protects the fields in the nfsnode. 277 VM Object Lock 278 VI_MTX (acquired indirectly) 279 * nmp->nm_mtx : Protects the fields in the nfsmount. 280 rep->r_mtx 281 * nfs_iod_mtx : Global lock, protects shared nfsiod state. 282 * nfs_reqq_mtx : Global lock, protects the nfs_reqq list. 283 nmp->nm_mtx 284 rep->r_mtx 285 * rep->r_mtx : Protects the fields in an nfsreq. 286 */ 287 288static int 289nfs3_access_otw(struct vnode *vp, int wmode, struct thread *td, 290 struct ucred *cred, uint32_t *retmode) 291{ 292 const int v3 = 1; 293 u_int32_t *tl; 294 int error = 0, attrflag, i, lrupos; 295 296 struct mbuf *mreq, *mrep, *md, *mb; 297 caddr_t bpos, dpos; 298 u_int32_t rmode; 299 struct nfsnode *np = VTONFS(vp); 300 301 nfsstats.rpccnt[NFSPROC_ACCESS]++; 302 mreq = nfsm_reqhead(vp, NFSPROC_ACCESS, NFSX_FH(v3) + NFSX_UNSIGNED); 303 mb = mreq; 304 bpos = mtod(mb, caddr_t); 305 nfsm_fhtom(vp, v3); 306 tl = nfsm_build(u_int32_t *, NFSX_UNSIGNED); 307 *tl = txdr_unsigned(wmode); 308 nfsm_request(vp, NFSPROC_ACCESS, td, cred); 309 nfsm_postop_attr(vp, attrflag); 310 if (!error) { 311 lrupos = 0; 312 tl = nfsm_dissect(u_int32_t *, NFSX_UNSIGNED); 313 rmode = fxdr_unsigned(u_int32_t, *tl); 314 mtx_lock(&np->n_mtx); 315 for (i = 0; i < NFS_ACCESSCACHESIZE; i++) { 316 if (np->n_accesscache[i].uid == cred->cr_uid) { 317 np->n_accesscache[i].mode = rmode; 318 np->n_accesscache[i].stamp = time_second; 319 break; 320 } 321 if (i > 0 && np->n_accesscache[i].stamp < 322 np->n_accesscache[lrupos].stamp) 323 lrupos = i; 324 } 325 if (i == NFS_ACCESSCACHESIZE) { 326 np->n_accesscache[lrupos].uid = cred->cr_uid; 327 np->n_accesscache[lrupos].mode = rmode; 328 np->n_accesscache[lrupos].stamp = time_second; 329 } 330 mtx_unlock(&np->n_mtx); 331 if (retmode != NULL) 332 *retmode = rmode; 333 KDTRACE_NFS_ACCESSCACHE_LOAD_DONE(vp, cred->cr_uid, rmode, 0); 334 } 335 m_freem(mrep); 336nfsmout: 337#ifdef KDTRACE_HOOKS 338 if (error) { 339 KDTRACE_NFS_ACCESSCACHE_LOAD_DONE(vp, cred->cr_uid, 0, 340 error); 341 } 342#endif 343 return (error); 344} 345 346/* 347 * nfs access vnode op. 348 * For nfs version 2, just return ok. File accesses may fail later. 349 * For nfs version 3, use the access rpc to check accessibility. If file modes 350 * are changed on the server, accesses might still fail later. 351 */ 352static int 353nfs_access(struct vop_access_args *ap) 354{ 355 struct vnode *vp = ap->a_vp; 356 int error = 0, i, gotahit; 357 u_int32_t mode, rmode, wmode; 358 int v3 = NFS_ISV3(vp); 359 struct nfsnode *np = VTONFS(vp); 360 361 /* 362 * Disallow write attempts on filesystems mounted read-only; 363 * unless the file is a socket, fifo, or a block or character 364 * device resident on the filesystem. 365 */ 366 if ((ap->a_accmode & VWRITE) && (vp->v_mount->mnt_flag & MNT_RDONLY)) { 367 switch (vp->v_type) { 368 case VREG: 369 case VDIR: 370 case VLNK: 371 return (EROFS); 372 default: 373 break; 374 } 375 } 376 /* 377 * For nfs v3, check to see if we have done this recently, and if 378 * so return our cached result instead of making an ACCESS call. 379 * If not, do an access rpc, otherwise you are stuck emulating 380 * ufs_access() locally using the vattr. This may not be correct, 381 * since the server may apply other access criteria such as 382 * client uid-->server uid mapping that we do not know about. 383 */ 384 if (v3) { 385 if (ap->a_accmode & VREAD) 386 mode = NFSV3ACCESS_READ; 387 else 388 mode = 0; 389 if (vp->v_type != VDIR) { 390 if (ap->a_accmode & VWRITE) 391 mode |= (NFSV3ACCESS_MODIFY | NFSV3ACCESS_EXTEND); 392 if (ap->a_accmode & VEXEC) 393 mode |= NFSV3ACCESS_EXECUTE; 394 } else { 395 if (ap->a_accmode & VWRITE) 396 mode |= (NFSV3ACCESS_MODIFY | NFSV3ACCESS_EXTEND | 397 NFSV3ACCESS_DELETE); 398 if (ap->a_accmode & VEXEC) 399 mode |= NFSV3ACCESS_LOOKUP; 400 } 401 /* XXX safety belt, only make blanket request if caching */ 402 if (nfsaccess_cache_timeout > 0) { 403 wmode = NFSV3ACCESS_READ | NFSV3ACCESS_MODIFY | 404 NFSV3ACCESS_EXTEND | NFSV3ACCESS_EXECUTE | 405 NFSV3ACCESS_DELETE | NFSV3ACCESS_LOOKUP; 406 } else { 407 wmode = mode; 408 } 409 410 /* 411 * Does our cached result allow us to give a definite yes to 412 * this request? 413 */ 414 gotahit = 0; 415 mtx_lock(&np->n_mtx); 416 for (i = 0; i < NFS_ACCESSCACHESIZE; i++) { 417 if (ap->a_cred->cr_uid == np->n_accesscache[i].uid) { 418 if (time_second < (np->n_accesscache[i].stamp + 419 nfsaccess_cache_timeout) && 420 (np->n_accesscache[i].mode & mode) == mode) { 421 nfsstats.accesscache_hits++; 422 gotahit = 1; 423 } 424 break; 425 } 426 } 427 mtx_unlock(&np->n_mtx); 428#ifdef KDTRACE_HOOKS 429 if (gotahit) 430 KDTRACE_NFS_ACCESSCACHE_GET_HIT(vp, 431 ap->a_cred->cr_uid, mode); 432 else 433 KDTRACE_NFS_ACCESSCACHE_GET_MISS(vp, 434 ap->a_cred->cr_uid, mode); 435#endif 436 if (gotahit == 0) { 437 /* 438 * Either a no, or a don't know. Go to the wire. 439 */ 440 nfsstats.accesscache_misses++; 441 error = nfs3_access_otw(vp, wmode, ap->a_td, ap->a_cred, 442 &rmode); 443 if (!error) { 444 if ((rmode & mode) != mode) 445 error = EACCES; 446 } 447 } 448 return (error); 449 } else { 450 if ((error = nfsspec_access(ap)) != 0) { 451 return (error); 452 } 453 /* 454 * Attempt to prevent a mapped root from accessing a file 455 * which it shouldn't. We try to read a byte from the file 456 * if the user is root and the file is not zero length. 457 * After calling nfsspec_access, we should have the correct 458 * file size cached. 459 */ 460 mtx_lock(&np->n_mtx); 461 if (ap->a_cred->cr_uid == 0 && (ap->a_accmode & VREAD) 462 && VTONFS(vp)->n_size > 0) { 463 struct iovec aiov; 464 struct uio auio; 465 char buf[1]; 466 467 mtx_unlock(&np->n_mtx); 468 aiov.iov_base = buf; 469 aiov.iov_len = 1; 470 auio.uio_iov = &aiov; 471 auio.uio_iovcnt = 1; 472 auio.uio_offset = 0; 473 auio.uio_resid = 1; 474 auio.uio_segflg = UIO_SYSSPACE; 475 auio.uio_rw = UIO_READ; 476 auio.uio_td = ap->a_td; 477 478 if (vp->v_type == VREG) 479 error = nfs_readrpc(vp, &auio, ap->a_cred); 480 else if (vp->v_type == VDIR) { 481 char* bp; 482 bp = malloc(NFS_DIRBLKSIZ, M_TEMP, M_WAITOK); 483 aiov.iov_base = bp; 484 aiov.iov_len = auio.uio_resid = NFS_DIRBLKSIZ; 485 error = nfs_readdirrpc(vp, &auio, ap->a_cred); 486 free(bp, M_TEMP); 487 } else if (vp->v_type == VLNK) 488 error = nfs_readlinkrpc(vp, &auio, ap->a_cred); 489 else 490 error = EACCES; 491 } else 492 mtx_unlock(&np->n_mtx); 493 return (error); 494 } 495} 496 497int nfs_otw_getattr_avoid = 0; 498 499/* 500 * nfs open vnode op 501 * Check to see if the type is ok 502 * and that deletion is not in progress. 503 * For paged in text files, you will need to flush the page cache 504 * if consistency is lost. 505 */ 506/* ARGSUSED */ 507static int 508nfs_open(struct vop_open_args *ap) 509{ 510 struct vnode *vp = ap->a_vp; 511 struct nfsnode *np = VTONFS(vp); 512 struct vattr vattr; 513 int error; 514 int fmode = ap->a_mode; 515 516 if (vp->v_type != VREG && vp->v_type != VDIR && vp->v_type != VLNK) 517 return (EOPNOTSUPP); 518 519 /* 520 * Get a valid lease. If cached data is stale, flush it. 521 */ 522 mtx_lock(&np->n_mtx); 523 if (np->n_flag & NMODIFIED) { 524 mtx_unlock(&np->n_mtx); 525 error = nfs_vinvalbuf(vp, V_SAVE, ap->a_td, 1); 526 if (error == EINTR || error == EIO) 527 return (error); 528 np->n_attrstamp = 0; 529 KDTRACE_NFS_ATTRCACHE_FLUSH_DONE(vp); 530 if (vp->v_type == VDIR) 531 np->n_direofoffset = 0; 532 error = VOP_GETATTR(vp, &vattr, ap->a_cred); 533 if (error) 534 return (error); 535 mtx_lock(&np->n_mtx); 536 np->n_mtime = vattr.va_mtime; 537 mtx_unlock(&np->n_mtx); 538 } else { 539 struct thread *td = curthread; 540 541 if (np->n_ac_ts_syscalls != td->td_syscalls || 542 np->n_ac_ts_tid != td->td_tid || 543 td->td_proc == NULL || 544 np->n_ac_ts_pid != td->td_proc->p_pid) { 545 np->n_attrstamp = 0; 546 KDTRACE_NFS_ATTRCACHE_FLUSH_DONE(vp); 547 } 548 mtx_unlock(&np->n_mtx); 549 error = VOP_GETATTR(vp, &vattr, ap->a_cred); 550 if (error) 551 return (error); 552 mtx_lock(&np->n_mtx); 553 if (NFS_TIMESPEC_COMPARE(&np->n_mtime, &vattr.va_mtime)) { 554 if (vp->v_type == VDIR) 555 np->n_direofoffset = 0; 556 mtx_unlock(&np->n_mtx); 557 error = nfs_vinvalbuf(vp, V_SAVE, ap->a_td, 1); 558 if (error == EINTR || error == EIO) { 559 return (error); 560 } 561 mtx_lock(&np->n_mtx); 562 np->n_mtime = vattr.va_mtime; 563 } 564 mtx_unlock(&np->n_mtx); 565 } 566 /* 567 * If the object has >= 1 O_DIRECT active opens, we disable caching. 568 */ 569 if (nfs_directio_enable && (fmode & O_DIRECT) && (vp->v_type == VREG)) { 570 if (np->n_directio_opens == 0) { 571 error = nfs_vinvalbuf(vp, V_SAVE, ap->a_td, 1); 572 if (error) 573 return (error); 574 mtx_lock(&np->n_mtx); 575 np->n_flag |= NNONCACHE; 576 mtx_unlock(&np->n_mtx); 577 } 578 np->n_directio_opens++; 579 } 580 vnode_create_vobject(vp, vattr.va_size, ap->a_td); 581 return (0); 582} 583 584/* 585 * nfs close vnode op 586 * What an NFS client should do upon close after writing is a debatable issue. 587 * Most NFS clients push delayed writes to the server upon close, basically for 588 * two reasons: 589 * 1 - So that any write errors may be reported back to the client process 590 * doing the close system call. By far the two most likely errors are 591 * NFSERR_NOSPC and NFSERR_DQUOT to indicate space allocation failure. 592 * 2 - To put a worst case upper bound on cache inconsistency between 593 * multiple clients for the file. 594 * There is also a consistency problem for Version 2 of the protocol w.r.t. 595 * not being able to tell if other clients are writing a file concurrently, 596 * since there is no way of knowing if the changed modify time in the reply 597 * is only due to the write for this client. 598 * (NFS Version 3 provides weak cache consistency data in the reply that 599 * should be sufficient to detect and handle this case.) 600 * 601 * The current code does the following: 602 * for NFS Version 2 - play it safe and flush/invalidate all dirty buffers 603 * for NFS Version 3 - flush dirty buffers to the server but don't invalidate 604 * or commit them (this satisfies 1 and 2 except for the 605 * case where the server crashes after this close but 606 * before the commit RPC, which is felt to be "good 607 * enough". Changing the last argument to nfs_flush() to 608 * a 1 would force a commit operation, if it is felt a 609 * commit is necessary now. 610 */ 611/* ARGSUSED */ 612static int 613nfs_close(struct vop_close_args *ap) 614{ 615 struct vnode *vp = ap->a_vp; 616 struct nfsnode *np = VTONFS(vp); 617 int error = 0; 618 int fmode = ap->a_fflag; 619 620 if (vp->v_type == VREG) { 621 /* 622 * Examine and clean dirty pages, regardless of NMODIFIED. 623 * This closes a major hole in close-to-open consistency. 624 * We want to push out all dirty pages (and buffers) on 625 * close, regardless of whether they were dirtied by 626 * mmap'ed writes or via write(). 627 */ 628 if (nfs_clean_pages_on_close && vp->v_object) { 629 VM_OBJECT_LOCK(vp->v_object); 630 vm_object_page_clean(vp->v_object, 0, 0, 0); 631 VM_OBJECT_UNLOCK(vp->v_object); 632 } 633 mtx_lock(&np->n_mtx); 634 if (np->n_flag & NMODIFIED) { 635 mtx_unlock(&np->n_mtx); 636 if (NFS_ISV3(vp)) { 637 /* 638 * Under NFSv3 we have dirty buffers to dispose of. We 639 * must flush them to the NFS server. We have the option 640 * of waiting all the way through the commit rpc or just 641 * waiting for the initial write. The default is to only 642 * wait through the initial write so the data is in the 643 * server's cache, which is roughly similar to the state 644 * a standard disk subsystem leaves the file in on close(). 645 * 646 * We cannot clear the NMODIFIED bit in np->n_flag due to 647 * potential races with other processes, and certainly 648 * cannot clear it if we don't commit. 649 */ 650 int cm = nfsv3_commit_on_close ? 1 : 0; 651 error = nfs_flush(vp, MNT_WAIT, cm); 652 /* np->n_flag &= ~NMODIFIED; */ 653 } else 654 error = nfs_vinvalbuf(vp, V_SAVE, ap->a_td, 1); 655 mtx_lock(&np->n_mtx); 656 } 657 if (np->n_flag & NWRITEERR) { 658 np->n_flag &= ~NWRITEERR; 659 error = np->n_error; 660 } 661 mtx_unlock(&np->n_mtx); 662 } 663 if (nfs_directio_enable) 664 KASSERT((np->n_directio_asyncwr == 0), 665 ("nfs_close: dirty unflushed (%d) directio buffers\n", 666 np->n_directio_asyncwr)); 667 if (nfs_directio_enable && (fmode & O_DIRECT) && (vp->v_type == VREG)) { 668 mtx_lock(&np->n_mtx); 669 KASSERT((np->n_directio_opens > 0), 670 ("nfs_close: unexpectedly value (0) of n_directio_opens\n")); 671 np->n_directio_opens--; 672 if (np->n_directio_opens == 0) 673 np->n_flag &= ~NNONCACHE; 674 mtx_unlock(&np->n_mtx); 675 } 676 return (error); 677} 678 679/* 680 * nfs getattr call from vfs. 681 */ 682static int 683nfs_getattr(struct vop_getattr_args *ap) 684{ 685 struct vnode *vp = ap->a_vp; 686 struct nfsnode *np = VTONFS(vp); 687 struct thread *td = curthread; 688 struct vattr *vap = ap->a_vap; 689 struct vattr vattr; 690 caddr_t bpos, dpos; 691 int error = 0; 692 struct mbuf *mreq, *mrep, *md, *mb; 693 int v3 = NFS_ISV3(vp); 694 695 /* 696 * Update local times for special files. 697 */ 698 mtx_lock(&np->n_mtx); 699 if (np->n_flag & (NACC | NUPD)) 700 np->n_flag |= NCHG; 701 mtx_unlock(&np->n_mtx); 702 /* 703 * First look in the cache. 704 */ 705 if (nfs_getattrcache(vp, &vattr) == 0) 706 goto nfsmout; 707 if (v3 && nfs_prime_access_cache && nfsaccess_cache_timeout > 0) { 708 nfsstats.accesscache_misses++; 709 nfs3_access_otw(vp, NFSV3ACCESS_ALL, td, ap->a_cred, NULL); 710 if (nfs_getattrcache(vp, &vattr) == 0) 711 goto nfsmout; 712 } 713 nfsstats.rpccnt[NFSPROC_GETATTR]++; 714 mreq = nfsm_reqhead(vp, NFSPROC_GETATTR, NFSX_FH(v3)); 715 mb = mreq; 716 bpos = mtod(mb, caddr_t); 717 nfsm_fhtom(vp, v3); 718 nfsm_request(vp, NFSPROC_GETATTR, td, ap->a_cred); 719 if (!error) { 720 nfsm_loadattr(vp, &vattr); 721 } 722 m_freem(mrep); 723nfsmout: 724 vap->va_type = vattr.va_type; 725 vap->va_mode = vattr.va_mode; 726 vap->va_nlink = vattr.va_nlink; 727 vap->va_uid = vattr.va_uid; 728 vap->va_gid = vattr.va_gid; 729 vap->va_fsid = vattr.va_fsid; 730 vap->va_fileid = vattr.va_fileid; 731 vap->va_size = vattr.va_size; 732 vap->va_blocksize = vattr.va_blocksize; 733 vap->va_atime = vattr.va_atime; 734 vap->va_mtime = vattr.va_mtime; 735 vap->va_ctime = vattr.va_ctime; 736 vap->va_gen = vattr.va_gen; 737 vap->va_flags = vattr.va_flags; 738 vap->va_rdev = vattr.va_rdev; 739 vap->va_bytes = vattr.va_bytes; 740 vap->va_filerev = vattr.va_filerev; 741 742 return (error); 743} 744 745/* 746 * nfs setattr call. 747 */ 748static int 749nfs_setattr(struct vop_setattr_args *ap) 750{ 751 struct vnode *vp = ap->a_vp; 752 struct nfsnode *np = VTONFS(vp); 753 struct vattr *vap = ap->a_vap; 754 struct thread *td = curthread; 755 int error = 0; 756 u_quad_t tsize; 757 758#ifndef nolint 759 tsize = (u_quad_t)0; 760#endif 761 762 /* 763 * Setting of flags is not supported. 764 */ 765 if (vap->va_flags != VNOVAL) 766 return (EOPNOTSUPP); 767 768 /* 769 * Disallow write attempts if the filesystem is mounted read-only. 770 */ 771 if ((vap->va_flags != VNOVAL || vap->va_uid != (uid_t)VNOVAL || 772 vap->va_gid != (gid_t)VNOVAL || vap->va_atime.tv_sec != VNOVAL || 773 vap->va_mtime.tv_sec != VNOVAL || vap->va_mode != (mode_t)VNOVAL) && 774 (vp->v_mount->mnt_flag & MNT_RDONLY)) { 775 error = EROFS; 776 goto out; 777 } 778 if (vap->va_size != VNOVAL) { 779 switch (vp->v_type) { 780 case VDIR: 781 return (EISDIR); 782 case VCHR: 783 case VBLK: 784 case VSOCK: 785 case VFIFO: 786 if (vap->va_mtime.tv_sec == VNOVAL && 787 vap->va_atime.tv_sec == VNOVAL && 788 vap->va_mode == (mode_t)VNOVAL && 789 vap->va_uid == (uid_t)VNOVAL && 790 vap->va_gid == (gid_t)VNOVAL) 791 return (0); 792 vap->va_size = VNOVAL; 793 break; 794 default: 795 /* 796 * Disallow write attempts if the filesystem is 797 * mounted read-only. 798 */ 799 if (vp->v_mount->mnt_flag & MNT_RDONLY) 800 return (EROFS); 801 /* 802 * We run vnode_pager_setsize() early (why?), 803 * we must set np->n_size now to avoid vinvalbuf 804 * V_SAVE races that might setsize a lower 805 * value. 806 */ 807 mtx_lock(&np->n_mtx); 808 tsize = np->n_size; 809 mtx_unlock(&np->n_mtx); 810 error = nfs_meta_setsize(vp, ap->a_cred, td, 811 vap->va_size); 812 mtx_lock(&np->n_mtx); 813 if (np->n_flag & NMODIFIED) { 814 tsize = np->n_size; 815 mtx_unlock(&np->n_mtx); 816 if (vap->va_size == 0) 817 error = nfs_vinvalbuf(vp, 0, td, 1); 818 else 819 error = nfs_vinvalbuf(vp, V_SAVE, td, 1); 820 if (error) { 821 vnode_pager_setsize(vp, tsize); 822 goto out; 823 } 824 } else 825 mtx_unlock(&np->n_mtx); 826 /* 827 * np->n_size has already been set to vap->va_size 828 * in nfs_meta_setsize(). We must set it again since 829 * nfs_loadattrcache() could be called through 830 * nfs_meta_setsize() and could modify np->n_size. 831 */ 832 mtx_lock(&np->n_mtx); 833 np->n_vattr.va_size = np->n_size = vap->va_size; 834 mtx_unlock(&np->n_mtx); 835 }; 836 } else { 837 mtx_lock(&np->n_mtx); 838 if ((vap->va_mtime.tv_sec != VNOVAL || vap->va_atime.tv_sec != VNOVAL) && 839 (np->n_flag & NMODIFIED) && vp->v_type == VREG) { 840 mtx_unlock(&np->n_mtx); 841 if ((error = nfs_vinvalbuf(vp, V_SAVE, td, 1)) != 0 && 842 (error == EINTR || error == EIO)) 843 return error; 844 } else 845 mtx_unlock(&np->n_mtx); 846 } 847 error = nfs_setattrrpc(vp, vap, ap->a_cred); 848 if (error && vap->va_size != VNOVAL) { 849 mtx_lock(&np->n_mtx); 850 np->n_size = np->n_vattr.va_size = tsize; 851 vnode_pager_setsize(vp, tsize); 852 mtx_unlock(&np->n_mtx); 853 } 854out: 855 return (error); 856} 857 858/* 859 * Do an nfs setattr rpc. 860 */ 861static int 862nfs_setattrrpc(struct vnode *vp, struct vattr *vap, struct ucred *cred) 863{ 864 struct nfsv2_sattr *sp; 865 struct nfsnode *np = VTONFS(vp); 866 caddr_t bpos, dpos; 867 u_int32_t *tl; 868 int error = 0, i, wccflag = NFSV3_WCCRATTR; 869 struct mbuf *mreq, *mrep, *md, *mb; 870 int v3 = NFS_ISV3(vp); 871 872 nfsstats.rpccnt[NFSPROC_SETATTR]++; 873 mreq = nfsm_reqhead(vp, NFSPROC_SETATTR, NFSX_FH(v3) + NFSX_SATTR(v3)); 874 mb = mreq; 875 bpos = mtod(mb, caddr_t); 876 nfsm_fhtom(vp, v3); 877 if (v3) { 878 nfsm_v3attrbuild(vap, TRUE); 879 tl = nfsm_build(u_int32_t *, NFSX_UNSIGNED); 880 *tl = nfs_false; 881 } else { 882 sp = nfsm_build(struct nfsv2_sattr *, NFSX_V2SATTR); 883 if (vap->va_mode == (mode_t)VNOVAL) 884 sp->sa_mode = nfs_xdrneg1; 885 else 886 sp->sa_mode = vtonfsv2_mode(vp->v_type, vap->va_mode); 887 if (vap->va_uid == (uid_t)VNOVAL) 888 sp->sa_uid = nfs_xdrneg1; 889 else 890 sp->sa_uid = txdr_unsigned(vap->va_uid); 891 if (vap->va_gid == (gid_t)VNOVAL) 892 sp->sa_gid = nfs_xdrneg1; 893 else 894 sp->sa_gid = txdr_unsigned(vap->va_gid); 895 sp->sa_size = txdr_unsigned(vap->va_size); 896 txdr_nfsv2time(&vap->va_atime, &sp->sa_atime); 897 txdr_nfsv2time(&vap->va_mtime, &sp->sa_mtime); 898 } 899 nfsm_request(vp, NFSPROC_SETATTR, curthread, cred); 900 if (v3) { 901 mtx_lock(&np->n_mtx); 902 for (i = 0; i < NFS_ACCESSCACHESIZE; i++) 903 np->n_accesscache[i].stamp = 0; 904 mtx_unlock(&np->n_mtx); 905 KDTRACE_NFS_ACCESSCACHE_FLUSH_DONE(vp); 906 nfsm_wcc_data(vp, wccflag); 907 } else 908 nfsm_loadattr(vp, NULL); 909 m_freem(mrep); 910nfsmout: 911 return (error); 912} 913 914/* 915 * nfs lookup call, one step at a time... 916 * First look in cache 917 * If not found, unlock the directory nfsnode and do the rpc 918 */ 919static int 920nfs_lookup(struct vop_lookup_args *ap) 921{ 922 struct componentname *cnp = ap->a_cnp; 923 struct vnode *dvp = ap->a_dvp; 924 struct vnode **vpp = ap->a_vpp; 925 struct mount *mp = dvp->v_mount; 926 struct vattr vattr; 927 int flags = cnp->cn_flags; 928 struct vnode *newvp; 929 struct nfsmount *nmp; 930 caddr_t bpos, dpos; 931 struct mbuf *mreq, *mrep, *md, *mb; 932 long len; 933 nfsfh_t *fhp; 934 struct nfsnode *np; 935 int error = 0, attrflag, fhsize, ltype; 936 int v3 = NFS_ISV3(dvp); 937 struct thread *td = cnp->cn_thread; 938 939 *vpp = NULLVP; 940 if ((flags & ISLASTCN) && (mp->mnt_flag & MNT_RDONLY) && 941 (cnp->cn_nameiop == DELETE || cnp->cn_nameiop == RENAME)) 942 return (EROFS); 943 if (dvp->v_type != VDIR) 944 return (ENOTDIR); 945 nmp = VFSTONFS(mp); 946 np = VTONFS(dvp); 947 if ((error = VOP_ACCESS(dvp, VEXEC, cnp->cn_cred, td)) != 0) { 948 *vpp = NULLVP; 949 return (error); 950 } 951 error = cache_lookup(dvp, vpp, cnp); 952 if (error > 0 && error != ENOENT) 953 return (error); 954 if (error == -1) { 955 /* 956 * We only accept a positive hit in the cache if the 957 * change time of the file matches our cached copy. 958 * Otherwise, we discard the cache entry and fallback 959 * to doing a lookup RPC. 960 */ 961 newvp = *vpp; 962 if (!VOP_GETATTR(newvp, &vattr, cnp->cn_cred) 963 && vattr.va_ctime.tv_sec == VTONFS(newvp)->n_ctime) { 964 nfsstats.lookupcache_hits++; 965 if (cnp->cn_nameiop != LOOKUP && 966 (flags & ISLASTCN)) 967 cnp->cn_flags |= SAVENAME; 968 return (0); 969 } 970 cache_purge(newvp); 971 if (dvp != newvp) 972 vput(newvp); 973 else 974 vrele(newvp); 975 *vpp = NULLVP; 976 } else if (error == ENOENT) { 977 if (dvp->v_iflag & VI_DOOMED) 978 return (ENOENT); 979 /* 980 * We only accept a negative hit in the cache if the 981 * modification time of the parent directory matches 982 * our cached copy. Otherwise, we discard all of the 983 * negative cache entries for this directory. 984 */ 985 if (VOP_GETATTR(dvp, &vattr, cnp->cn_cred) == 0 && 986 vattr.va_mtime.tv_sec == np->n_dmtime) { 987 nfsstats.lookupcache_hits++; 988 return (ENOENT); 989 } 990 cache_purge_negative(dvp); 991 mtx_lock(&np->n_mtx); 992 np->n_dmtime = 0; 993 mtx_unlock(&np->n_mtx); 994 } 995 error = 0; 996 newvp = NULLVP; 997 nfsstats.lookupcache_misses++; 998 nfsstats.rpccnt[NFSPROC_LOOKUP]++; 999 len = cnp->cn_namelen; 1000 mreq = nfsm_reqhead(dvp, NFSPROC_LOOKUP, 1001 NFSX_FH(v3) + NFSX_UNSIGNED + nfsm_rndup(len)); 1002 mb = mreq; 1003 bpos = mtod(mb, caddr_t); 1004 nfsm_fhtom(dvp, v3); 1005 nfsm_strtom(cnp->cn_nameptr, len, NFS_MAXNAMLEN); 1006 nfsm_request(dvp, NFSPROC_LOOKUP, cnp->cn_thread, cnp->cn_cred); 1007 if (error) { 1008 if (v3) { 1009 nfsm_postop_attr(dvp, attrflag); 1010 m_freem(mrep); 1011 } 1012 goto nfsmout; 1013 } 1014 nfsm_getfh(fhp, fhsize, v3); 1015 1016 /* 1017 * Handle RENAME case... 1018 */ 1019 if (cnp->cn_nameiop == RENAME && (flags & ISLASTCN)) { 1020 if (NFS_CMPFH(np, fhp, fhsize)) { 1021 m_freem(mrep); 1022 return (EISDIR); 1023 } 1024 error = nfs_nget(mp, fhp, fhsize, &np, LK_EXCLUSIVE); 1025 if (error) { 1026 m_freem(mrep); 1027 return (error); 1028 } 1029 newvp = NFSTOV(np); 1030 if (v3) { 1031 nfsm_postop_attr(newvp, attrflag); 1032 nfsm_postop_attr(dvp, attrflag); 1033 } else 1034 nfsm_loadattr(newvp, NULL); 1035 *vpp = newvp; 1036 m_freem(mrep); 1037 cnp->cn_flags |= SAVENAME; 1038 return (0); 1039 } 1040 1041 if (flags & ISDOTDOT) { 1042 ltype = VOP_ISLOCKED(dvp); 1043 error = vfs_busy(mp, MBF_NOWAIT); 1044 if (error != 0) { 1045 vfs_ref(mp); 1046 VOP_UNLOCK(dvp, 0); 1047 error = vfs_busy(mp, 0); 1048 vn_lock(dvp, ltype | LK_RETRY); 1049 vfs_rel(mp); 1050 if (error == 0 && (dvp->v_iflag & VI_DOOMED)) { 1051 vfs_unbusy(mp); 1052 error = ENOENT; 1053 } 1054 if (error != 0) { 1055 m_freem(mrep); 1056 return (error); 1057 } 1058 } 1059 VOP_UNLOCK(dvp, 0); 1060 error = nfs_nget(mp, fhp, fhsize, &np, cnp->cn_lkflags); 1061 if (error == 0) 1062 newvp = NFSTOV(np); 1063 vfs_unbusy(mp); 1064 vn_lock(dvp, ltype | LK_RETRY); 1065 if (dvp->v_iflag & VI_DOOMED) { 1066 if (error == 0) { 1067 if (newvp == dvp) 1068 vrele(newvp); 1069 else 1070 vput(newvp); 1071 } 1072 error = ENOENT; 1073 } 1074 if (error) { 1075 m_freem(mrep); 1076 return (error); 1077 } 1078 } else if (NFS_CMPFH(np, fhp, fhsize)) { 1079 VREF(dvp); 1080 newvp = dvp; 1081 } else { 1082 error = nfs_nget(mp, fhp, fhsize, &np, cnp->cn_lkflags); 1083 if (error) { 1084 m_freem(mrep); 1085 return (error); 1086 } 1087 newvp = NFSTOV(np); 1088 } 1089 if (v3) { 1090 nfsm_postop_attr(newvp, attrflag); 1091 nfsm_postop_attr(dvp, attrflag); 1092 } else 1093 nfsm_loadattr(newvp, NULL); 1094 if (cnp->cn_nameiop != LOOKUP && (flags & ISLASTCN)) 1095 cnp->cn_flags |= SAVENAME; 1096 if ((cnp->cn_flags & MAKEENTRY) && 1097 (cnp->cn_nameiop != DELETE || !(flags & ISLASTCN))) { 1098 np->n_ctime = np->n_vattr.va_ctime.tv_sec; 1099 cache_enter(dvp, newvp, cnp); 1100 } 1101 *vpp = newvp; 1102 m_freem(mrep); 1103nfsmout: 1104 if (error) { 1105 if (newvp != NULLVP) { 1106 vput(newvp); 1107 *vpp = NULLVP; 1108 } 1109 1110 if (error != ENOENT) 1111 goto done; 1112 1113 /* The requested file was not found. */ 1114 if ((cnp->cn_nameiop == CREATE || cnp->cn_nameiop == RENAME) && 1115 (flags & ISLASTCN)) { 1116 /* 1117 * XXX: UFS does a full VOP_ACCESS(dvp, 1118 * VWRITE) here instead of just checking 1119 * MNT_RDONLY. 1120 */ 1121 if (mp->mnt_flag & MNT_RDONLY) 1122 return (EROFS); 1123 cnp->cn_flags |= SAVENAME; 1124 return (EJUSTRETURN); 1125 } 1126 1127 if ((cnp->cn_flags & MAKEENTRY) && cnp->cn_nameiop != CREATE) { 1128 /* 1129 * Maintain n_dmtime as the modification time 1130 * of the parent directory when the oldest -ve 1131 * name cache entry for this directory was 1132 * added. 1133 */ 1134 mtx_lock(&np->n_mtx); 1135 if (np->n_dmtime == 0) 1136 np->n_dmtime = np->n_vattr.va_mtime.tv_sec; 1137 mtx_unlock(&np->n_mtx); 1138 cache_enter(dvp, NULL, cnp); 1139 } 1140 return (ENOENT); 1141 } 1142done: 1143 return (error); 1144} 1145 1146/* 1147 * nfs read call. 1148 * Just call nfs_bioread() to do the work. 1149 */ 1150static int 1151nfs_read(struct vop_read_args *ap) 1152{ 1153 struct vnode *vp = ap->a_vp; 1154 1155 switch (vp->v_type) { 1156 case VREG: 1157 return (nfs_bioread(vp, ap->a_uio, ap->a_ioflag, ap->a_cred)); 1158 case VDIR: 1159 return (EISDIR); 1160 default: 1161 return (EOPNOTSUPP); 1162 } 1163} 1164 1165/* 1166 * nfs readlink call 1167 */ 1168static int 1169nfs_readlink(struct vop_readlink_args *ap) 1170{ 1171 struct vnode *vp = ap->a_vp; 1172 1173 if (vp->v_type != VLNK) 1174 return (EINVAL); 1175 return (nfs_bioread(vp, ap->a_uio, 0, ap->a_cred)); 1176} 1177 1178/* 1179 * Do a readlink rpc. 1180 * Called by nfs_doio() from below the buffer cache. 1181 */ 1182int 1183nfs_readlinkrpc(struct vnode *vp, struct uio *uiop, struct ucred *cred) 1184{ 1185 caddr_t bpos, dpos; 1186 int error = 0, len, attrflag; 1187 struct mbuf *mreq, *mrep, *md, *mb; 1188 int v3 = NFS_ISV3(vp); 1189 1190 nfsstats.rpccnt[NFSPROC_READLINK]++; 1191 mreq = nfsm_reqhead(vp, NFSPROC_READLINK, NFSX_FH(v3)); 1192 mb = mreq; 1193 bpos = mtod(mb, caddr_t); 1194 nfsm_fhtom(vp, v3); 1195 nfsm_request(vp, NFSPROC_READLINK, uiop->uio_td, cred); 1196 if (v3) 1197 nfsm_postop_attr(vp, attrflag); 1198 if (!error) { 1199 nfsm_strsiz(len, NFS_MAXPATHLEN); 1200 if (len == NFS_MAXPATHLEN) { 1201 struct nfsnode *np = VTONFS(vp); 1202 mtx_lock(&np->n_mtx); 1203 if (np->n_size && np->n_size < NFS_MAXPATHLEN) 1204 len = np->n_size; 1205 mtx_unlock(&np->n_mtx); 1206 } 1207 nfsm_mtouio(uiop, len); 1208 } 1209 m_freem(mrep); 1210nfsmout: 1211 return (error); 1212} 1213 1214/* 1215 * nfs read rpc call 1216 * Ditto above 1217 */ 1218int 1219nfs_readrpc(struct vnode *vp, struct uio *uiop, struct ucred *cred) 1220{ 1221 u_int32_t *tl; 1222 caddr_t bpos, dpos; 1223 struct mbuf *mreq, *mrep, *md, *mb; 1224 struct nfsmount *nmp; 1225 int error = 0, len, retlen, tsiz, eof, attrflag; 1226 int v3 = NFS_ISV3(vp); 1227 int rsize; 1228 1229#ifndef nolint 1230 eof = 0; 1231#endif 1232 nmp = VFSTONFS(vp->v_mount); 1233 tsiz = uiop->uio_resid; 1234 mtx_lock(&nmp->nm_mtx); 1235 if (uiop->uio_offset + tsiz > nmp->nm_maxfilesize) { 1236 mtx_unlock(&nmp->nm_mtx); 1237 return (EFBIG); 1238 } 1239 rsize = nmp->nm_rsize; 1240 mtx_unlock(&nmp->nm_mtx); 1241 while (tsiz > 0) { 1242 nfsstats.rpccnt[NFSPROC_READ]++; 1243 len = (tsiz > rsize) ? rsize : tsiz; 1244 mreq = nfsm_reqhead(vp, NFSPROC_READ, NFSX_FH(v3) + NFSX_UNSIGNED * 3); 1245 mb = mreq; 1246 bpos = mtod(mb, caddr_t); 1247 nfsm_fhtom(vp, v3); 1248 tl = nfsm_build(u_int32_t *, NFSX_UNSIGNED * 3); 1249 if (v3) { 1250 txdr_hyper(uiop->uio_offset, tl); 1251 *(tl + 2) = txdr_unsigned(len); 1252 } else { 1253 *tl++ = txdr_unsigned(uiop->uio_offset); 1254 *tl++ = txdr_unsigned(len); 1255 *tl = 0; 1256 } 1257 nfsm_request(vp, NFSPROC_READ, uiop->uio_td, cred); 1258 if (v3) { 1259 nfsm_postop_attr(vp, attrflag); 1260 if (error) { 1261 m_freem(mrep); 1262 goto nfsmout; 1263 } 1264 tl = nfsm_dissect(u_int32_t *, 2 * NFSX_UNSIGNED); 1265 eof = fxdr_unsigned(int, *(tl + 1)); 1266 } else { 1267 nfsm_loadattr(vp, NULL); 1268 } 1269 nfsm_strsiz(retlen, rsize); 1270 nfsm_mtouio(uiop, retlen); 1271 m_freem(mrep); 1272 tsiz -= retlen; 1273 if (v3) { 1274 if (eof || retlen == 0) { 1275 tsiz = 0; 1276 } 1277 } else if (retlen < len) { 1278 tsiz = 0; 1279 } 1280 } 1281nfsmout: 1282 return (error); 1283} 1284 1285/* 1286 * nfs write call 1287 */ 1288int 1289nfs_writerpc(struct vnode *vp, struct uio *uiop, struct ucred *cred, 1290 int *iomode, int *must_commit) 1291{ 1292 u_int32_t *tl; 1293 int32_t backup; 1294 caddr_t bpos, dpos; 1295 struct mbuf *mreq, *mrep, *md, *mb; 1296 struct nfsmount *nmp = VFSTONFS(vp->v_mount); 1297 int error = 0, len, tsiz, wccflag = NFSV3_WCCRATTR, rlen, commit; 1298 int v3 = NFS_ISV3(vp), committed = NFSV3WRITE_FILESYNC; 1299 int wsize; 1300 1301#ifndef DIAGNOSTIC 1302 if (uiop->uio_iovcnt != 1) 1303 panic("nfs: writerpc iovcnt > 1"); 1304#endif 1305 *must_commit = 0; 1306 tsiz = uiop->uio_resid; 1307 mtx_lock(&nmp->nm_mtx); 1308 if (uiop->uio_offset + tsiz > nmp->nm_maxfilesize) { 1309 mtx_unlock(&nmp->nm_mtx); 1310 return (EFBIG); 1311 } 1312 wsize = nmp->nm_wsize; 1313 mtx_unlock(&nmp->nm_mtx); 1314 while (tsiz > 0) { 1315 nfsstats.rpccnt[NFSPROC_WRITE]++; 1316 len = (tsiz > wsize) ? wsize : tsiz; 1317 mreq = nfsm_reqhead(vp, NFSPROC_WRITE, 1318 NFSX_FH(v3) + 5 * NFSX_UNSIGNED + nfsm_rndup(len)); 1319 mb = mreq; 1320 bpos = mtod(mb, caddr_t); 1321 nfsm_fhtom(vp, v3); 1322 if (v3) { 1323 tl = nfsm_build(u_int32_t *, 5 * NFSX_UNSIGNED); 1324 txdr_hyper(uiop->uio_offset, tl); 1325 tl += 2; 1326 *tl++ = txdr_unsigned(len); 1327 *tl++ = txdr_unsigned(*iomode); 1328 *tl = txdr_unsigned(len); 1329 } else { 1330 u_int32_t x; 1331 1332 tl = nfsm_build(u_int32_t *, 4 * NFSX_UNSIGNED); 1333 /* Set both "begin" and "current" to non-garbage. */ 1334 x = txdr_unsigned((u_int32_t)uiop->uio_offset); 1335 *tl++ = x; /* "begin offset" */ 1336 *tl++ = x; /* "current offset" */ 1337 x = txdr_unsigned(len); 1338 *tl++ = x; /* total to this offset */ 1339 *tl = x; /* size of this write */ 1340 } 1341 nfsm_uiotom(uiop, len); 1342 nfsm_request(vp, NFSPROC_WRITE, uiop->uio_td, cred); 1343 if (v3) { 1344 wccflag = NFSV3_WCCCHK; 1345 nfsm_wcc_data(vp, wccflag); 1346 if (!error) { 1347 tl = nfsm_dissect(u_int32_t *, 2 * NFSX_UNSIGNED 1348 + NFSX_V3WRITEVERF); 1349 rlen = fxdr_unsigned(int, *tl++); 1350 if (rlen == 0) { 1351 error = NFSERR_IO; 1352 m_freem(mrep); 1353 break; 1354 } else if (rlen < len) { 1355 backup = len - rlen; 1356 uiop->uio_iov->iov_base = 1357 (char *)uiop->uio_iov->iov_base - 1358 backup; 1359 uiop->uio_iov->iov_len += backup; 1360 uiop->uio_offset -= backup; 1361 uiop->uio_resid += backup; 1362 len = rlen; 1363 } 1364 commit = fxdr_unsigned(int, *tl++); 1365 1366 /* 1367 * Return the lowest committment level 1368 * obtained by any of the RPCs. 1369 */ 1370 if (committed == NFSV3WRITE_FILESYNC) 1371 committed = commit; 1372 else if (committed == NFSV3WRITE_DATASYNC && 1373 commit == NFSV3WRITE_UNSTABLE) 1374 committed = commit; 1375 mtx_lock(&nmp->nm_mtx); 1376 if ((nmp->nm_state & NFSSTA_HASWRITEVERF) == 0){ 1377 bcopy((caddr_t)tl, (caddr_t)nmp->nm_verf, 1378 NFSX_V3WRITEVERF); 1379 nmp->nm_state |= NFSSTA_HASWRITEVERF; 1380 } else if (bcmp((caddr_t)tl, 1381 (caddr_t)nmp->nm_verf, NFSX_V3WRITEVERF)) { 1382 *must_commit = 1; 1383 bcopy((caddr_t)tl, (caddr_t)nmp->nm_verf, 1384 NFSX_V3WRITEVERF); 1385 } 1386 mtx_unlock(&nmp->nm_mtx); 1387 } 1388 } else { 1389 nfsm_loadattr(vp, NULL); 1390 } 1391 if (wccflag) { 1392 mtx_lock(&(VTONFS(vp))->n_mtx); 1393 VTONFS(vp)->n_mtime = VTONFS(vp)->n_vattr.va_mtime; 1394 mtx_unlock(&(VTONFS(vp))->n_mtx); 1395 } 1396 m_freem(mrep); 1397 if (error) 1398 break; 1399 tsiz -= len; 1400 } 1401nfsmout: 1402 if (vp->v_mount->mnt_kern_flag & MNTK_ASYNC) 1403 committed = NFSV3WRITE_FILESYNC; 1404 *iomode = committed; 1405 if (error) 1406 uiop->uio_resid = tsiz; 1407 return (error); 1408} 1409 1410/* 1411 * nfs mknod rpc 1412 * For NFS v2 this is a kludge. Use a create rpc but with the IFMT bits of the 1413 * mode set to specify the file type and the size field for rdev. 1414 */ 1415static int 1416nfs_mknodrpc(struct vnode *dvp, struct vnode **vpp, struct componentname *cnp, 1417 struct vattr *vap) 1418{ 1419 struct nfsv2_sattr *sp; 1420 u_int32_t *tl; 1421 struct vnode *newvp = NULL; 1422 struct nfsnode *np = NULL; 1423 struct vattr vattr; 1424 caddr_t bpos, dpos; 1425 int error = 0, wccflag = NFSV3_WCCRATTR, gotvp = 0; 1426 struct mbuf *mreq, *mrep, *md, *mb; 1427 u_int32_t rdev; 1428 int v3 = NFS_ISV3(dvp); 1429 1430 if (vap->va_type == VCHR || vap->va_type == VBLK) 1431 rdev = txdr_unsigned(vap->va_rdev); 1432 else if (vap->va_type == VFIFO || vap->va_type == VSOCK) 1433 rdev = nfs_xdrneg1; 1434 else { 1435 return (EOPNOTSUPP); 1436 } 1437 if ((error = VOP_GETATTR(dvp, &vattr, cnp->cn_cred)) != 0) 1438 return (error); 1439 nfsstats.rpccnt[NFSPROC_MKNOD]++; 1440 mreq = nfsm_reqhead(dvp, NFSPROC_MKNOD, NFSX_FH(v3) + 4 * NFSX_UNSIGNED + 1441 + nfsm_rndup(cnp->cn_namelen) + NFSX_SATTR(v3)); 1442 mb = mreq; 1443 bpos = mtod(mb, caddr_t); 1444 nfsm_fhtom(dvp, v3); 1445 nfsm_strtom(cnp->cn_nameptr, cnp->cn_namelen, NFS_MAXNAMLEN); 1446 if (v3) { 1447 tl = nfsm_build(u_int32_t *, NFSX_UNSIGNED); 1448 *tl++ = vtonfsv3_type(vap->va_type); 1449 nfsm_v3attrbuild(vap, FALSE); 1450 if (vap->va_type == VCHR || vap->va_type == VBLK) { 1451 tl = nfsm_build(u_int32_t *, 2 * NFSX_UNSIGNED); 1452 *tl++ = txdr_unsigned(major(vap->va_rdev)); 1453 *tl = txdr_unsigned(minor(vap->va_rdev)); 1454 } 1455 } else { 1456 sp = nfsm_build(struct nfsv2_sattr *, NFSX_V2SATTR); 1457 sp->sa_mode = vtonfsv2_mode(vap->va_type, vap->va_mode); 1458 sp->sa_uid = nfs_xdrneg1; 1459 sp->sa_gid = nfs_xdrneg1; 1460 sp->sa_size = rdev; 1461 txdr_nfsv2time(&vap->va_atime, &sp->sa_atime); 1462 txdr_nfsv2time(&vap->va_mtime, &sp->sa_mtime); 1463 } 1464 nfsm_request(dvp, NFSPROC_MKNOD, cnp->cn_thread, cnp->cn_cred); 1465 if (!error) { 1466 nfsm_mtofh(dvp, newvp, v3, gotvp); 1467 if (!gotvp) { 1468 if (newvp) { 1469 vput(newvp); 1470 newvp = NULL; 1471 } 1472 error = nfs_lookitup(dvp, cnp->cn_nameptr, 1473 cnp->cn_namelen, cnp->cn_cred, cnp->cn_thread, &np); 1474 if (!error) 1475 newvp = NFSTOV(np); 1476 } 1477 } 1478 if (v3) 1479 nfsm_wcc_data(dvp, wccflag); 1480 m_freem(mrep); 1481nfsmout: 1482 if (error) { 1483 if (newvp) 1484 vput(newvp); 1485 } else { 1486 if (cnp->cn_flags & MAKEENTRY) 1487 cache_enter(dvp, newvp, cnp); 1488 *vpp = newvp; 1489 } 1490 mtx_lock(&(VTONFS(dvp))->n_mtx); 1491 VTONFS(dvp)->n_flag |= NMODIFIED; 1492 if (!wccflag) { 1493 VTONFS(dvp)->n_attrstamp = 0; 1494 KDTRACE_NFS_ATTRCACHE_FLUSH_DONE(dvp); 1495 } 1496 mtx_unlock(&(VTONFS(dvp))->n_mtx); 1497 return (error); 1498} 1499 1500/* 1501 * nfs mknod vop 1502 * just call nfs_mknodrpc() to do the work. 1503 */ 1504/* ARGSUSED */ 1505static int 1506nfs_mknod(struct vop_mknod_args *ap) 1507{ 1508 return (nfs_mknodrpc(ap->a_dvp, ap->a_vpp, ap->a_cnp, ap->a_vap)); 1509} 1510 1511static u_long create_verf; 1512/* 1513 * nfs file create call 1514 */ 1515static int 1516nfs_create(struct vop_create_args *ap) 1517{ 1518 struct vnode *dvp = ap->a_dvp; 1519 struct vattr *vap = ap->a_vap; 1520 struct componentname *cnp = ap->a_cnp; 1521 struct nfsv2_sattr *sp; 1522 u_int32_t *tl; 1523 struct nfsnode *np = NULL; 1524 struct vnode *newvp = NULL; 1525 caddr_t bpos, dpos; 1526 int error = 0, wccflag = NFSV3_WCCRATTR, gotvp = 0, fmode = 0; 1527 struct mbuf *mreq, *mrep, *md, *mb; 1528 struct vattr vattr; 1529 int v3 = NFS_ISV3(dvp); 1530 1531 /* 1532 * Oops, not for me.. 1533 */ 1534 if (vap->va_type == VSOCK) 1535 return (nfs_mknodrpc(dvp, ap->a_vpp, cnp, vap)); 1536 1537 if ((error = VOP_GETATTR(dvp, &vattr, cnp->cn_cred)) != 0) 1538 return (error); 1539 if (vap->va_vaflags & VA_EXCLUSIVE) 1540 fmode |= O_EXCL; 1541again: 1542 nfsstats.rpccnt[NFSPROC_CREATE]++; 1543 mreq = nfsm_reqhead(dvp, NFSPROC_CREATE, NFSX_FH(v3) + 2 * NFSX_UNSIGNED + 1544 nfsm_rndup(cnp->cn_namelen) + NFSX_SATTR(v3)); 1545 mb = mreq; 1546 bpos = mtod(mb, caddr_t); 1547 nfsm_fhtom(dvp, v3); 1548 nfsm_strtom(cnp->cn_nameptr, cnp->cn_namelen, NFS_MAXNAMLEN); 1549 if (v3) { 1550 tl = nfsm_build(u_int32_t *, NFSX_UNSIGNED); 1551 if (fmode & O_EXCL) { 1552 *tl = txdr_unsigned(NFSV3CREATE_EXCLUSIVE); 1553 tl = nfsm_build(u_int32_t *, NFSX_V3CREATEVERF); 1554#ifdef INET 1555 IN_IFADDR_RLOCK(); 1556 if (!TAILQ_EMPTY(&V_in_ifaddrhead)) 1557 *tl++ = IA_SIN(TAILQ_FIRST(&V_in_ifaddrhead))->sin_addr.s_addr; 1558 else 1559#endif 1560 *tl++ = create_verf; 1561#ifdef INET 1562 IN_IFADDR_RUNLOCK(); 1563#endif 1564 *tl = ++create_verf; 1565 } else { 1566 *tl = txdr_unsigned(NFSV3CREATE_UNCHECKED); 1567 nfsm_v3attrbuild(vap, FALSE); 1568 } 1569 } else { 1570 sp = nfsm_build(struct nfsv2_sattr *, NFSX_V2SATTR); 1571 sp->sa_mode = vtonfsv2_mode(vap->va_type, vap->va_mode); 1572 sp->sa_uid = nfs_xdrneg1; 1573 sp->sa_gid = nfs_xdrneg1; 1574 sp->sa_size = 0; 1575 txdr_nfsv2time(&vap->va_atime, &sp->sa_atime); 1576 txdr_nfsv2time(&vap->va_mtime, &sp->sa_mtime); 1577 } 1578 nfsm_request(dvp, NFSPROC_CREATE, cnp->cn_thread, cnp->cn_cred); 1579 if (!error) { 1580 nfsm_mtofh(dvp, newvp, v3, gotvp); 1581 if (!gotvp) { 1582 if (newvp) { 1583 vput(newvp); 1584 newvp = NULL; 1585 } 1586 error = nfs_lookitup(dvp, cnp->cn_nameptr, 1587 cnp->cn_namelen, cnp->cn_cred, cnp->cn_thread, &np); 1588 if (!error) 1589 newvp = NFSTOV(np); 1590 } 1591 } 1592 if (v3) 1593 nfsm_wcc_data(dvp, wccflag); 1594 m_freem(mrep); 1595nfsmout: 1596 if (error) { 1597 if (v3 && (fmode & O_EXCL) && error == NFSERR_NOTSUPP) { 1598 fmode &= ~O_EXCL; 1599 goto again; 1600 } 1601 if (newvp) 1602 vput(newvp); 1603 } else if (v3 && (fmode & O_EXCL)) { 1604 /* 1605 * We are normally called with only a partially initialized 1606 * VAP. Since the NFSv3 spec says that server may use the 1607 * file attributes to store the verifier, the spec requires 1608 * us to do a SETATTR RPC. FreeBSD servers store the verifier 1609 * in atime, but we can't really assume that all servers will 1610 * so we ensure that our SETATTR sets both atime and mtime. 1611 */ 1612 if (vap->va_mtime.tv_sec == VNOVAL) 1613 vfs_timestamp(&vap->va_mtime); 1614 if (vap->va_atime.tv_sec == VNOVAL) 1615 vap->va_atime = vap->va_mtime; 1616 error = nfs_setattrrpc(newvp, vap, cnp->cn_cred); 1617 if (error) 1618 vput(newvp); 1619 } 1620 if (!error) { 1621 if (cnp->cn_flags & MAKEENTRY) 1622 cache_enter(dvp, newvp, cnp); 1623 *ap->a_vpp = newvp; 1624 } 1625 mtx_lock(&(VTONFS(dvp))->n_mtx); 1626 VTONFS(dvp)->n_flag |= NMODIFIED; 1627 if (!wccflag) { 1628 VTONFS(dvp)->n_attrstamp = 0; 1629 KDTRACE_NFS_ATTRCACHE_FLUSH_DONE(dvp); 1630 } 1631 mtx_unlock(&(VTONFS(dvp))->n_mtx); 1632 return (error); 1633} 1634 1635/* 1636 * nfs file remove call 1637 * To try and make nfs semantics closer to ufs semantics, a file that has 1638 * other processes using the vnode is renamed instead of removed and then 1639 * removed later on the last close. 1640 * - If v_usecount > 1 1641 * If a rename is not already in the works 1642 * call nfs_sillyrename() to set it up 1643 * else 1644 * do the remove rpc 1645 */ 1646static int 1647nfs_remove(struct vop_remove_args *ap) 1648{ 1649 struct vnode *vp = ap->a_vp; 1650 struct vnode *dvp = ap->a_dvp; 1651 struct componentname *cnp = ap->a_cnp; 1652 struct nfsnode *np = VTONFS(vp); 1653 int error = 0; 1654 struct vattr vattr; 1655 1656#ifndef DIAGNOSTIC 1657 if ((cnp->cn_flags & HASBUF) == 0) 1658 panic("nfs_remove: no name"); 1659 if (vrefcnt(vp) < 1) 1660 panic("nfs_remove: bad v_usecount"); 1661#endif 1662 if (vp->v_type == VDIR) 1663 error = EPERM; 1664 else if (vrefcnt(vp) == 1 || (np->n_sillyrename && 1665 !VOP_GETATTR(vp, &vattr, cnp->cn_cred) && vattr.va_nlink > 1)) { 1666 /* 1667 * Purge the name cache so that the chance of a lookup for 1668 * the name succeeding while the remove is in progress is 1669 * minimized. Without node locking it can still happen, such 1670 * that an I/O op returns ESTALE, but since you get this if 1671 * another host removes the file.. 1672 */ 1673 cache_purge(vp); 1674 /* 1675 * throw away biocache buffers, mainly to avoid 1676 * unnecessary delayed writes later. 1677 */ 1678 error = nfs_vinvalbuf(vp, 0, cnp->cn_thread, 1); 1679 /* Do the rpc */ 1680 if (error != EINTR && error != EIO) 1681 error = nfs_removerpc(dvp, cnp->cn_nameptr, 1682 cnp->cn_namelen, cnp->cn_cred, cnp->cn_thread); 1683 /* 1684 * Kludge City: If the first reply to the remove rpc is lost.. 1685 * the reply to the retransmitted request will be ENOENT 1686 * since the file was in fact removed 1687 * Therefore, we cheat and return success. 1688 */ 1689 if (error == ENOENT) 1690 error = 0; 1691 } else if (!np->n_sillyrename) 1692 error = nfs_sillyrename(dvp, vp, cnp); 1693 np->n_attrstamp = 0; 1694 KDTRACE_NFS_ATTRCACHE_FLUSH_DONE(vp); 1695 return (error); 1696} 1697 1698/* 1699 * nfs file remove rpc called from nfs_inactive 1700 */ 1701int 1702nfs_removeit(struct sillyrename *sp) 1703{ 1704 /* 1705 * Make sure that the directory vnode is still valid. 1706 * XXX we should lock sp->s_dvp here. 1707 */ 1708 if (sp->s_dvp->v_type == VBAD) 1709 return (0); 1710 return (nfs_removerpc(sp->s_dvp, sp->s_name, sp->s_namlen, sp->s_cred, 1711 NULL)); 1712} 1713 1714/* 1715 * Nfs remove rpc, called from nfs_remove() and nfs_removeit(). 1716 */ 1717static int 1718nfs_removerpc(struct vnode *dvp, const char *name, int namelen, 1719 struct ucred *cred, struct thread *td) 1720{ 1721 caddr_t bpos, dpos; 1722 int error = 0, wccflag = NFSV3_WCCRATTR; 1723 struct mbuf *mreq, *mrep, *md, *mb; 1724 int v3 = NFS_ISV3(dvp); 1725 1726 nfsstats.rpccnt[NFSPROC_REMOVE]++; 1727 mreq = nfsm_reqhead(dvp, NFSPROC_REMOVE, 1728 NFSX_FH(v3) + NFSX_UNSIGNED + nfsm_rndup(namelen)); 1729 mb = mreq; 1730 bpos = mtod(mb, caddr_t); 1731 nfsm_fhtom(dvp, v3); 1732 nfsm_strtom(name, namelen, NFS_MAXNAMLEN); 1733 nfsm_request(dvp, NFSPROC_REMOVE, td, cred); 1734 if (v3) 1735 nfsm_wcc_data(dvp, wccflag); 1736 m_freem(mrep); 1737nfsmout: 1738 mtx_lock(&(VTONFS(dvp))->n_mtx); 1739 VTONFS(dvp)->n_flag |= NMODIFIED; 1740 if (!wccflag) { 1741 VTONFS(dvp)->n_attrstamp = 0; 1742 KDTRACE_NFS_ATTRCACHE_FLUSH_DONE(dvp); 1743 } 1744 mtx_unlock(&(VTONFS(dvp))->n_mtx); 1745 return (error); 1746} 1747 1748/* 1749 * nfs file rename call 1750 */ 1751static int 1752nfs_rename(struct vop_rename_args *ap) 1753{ 1754 struct vnode *fvp = ap->a_fvp; 1755 struct vnode *tvp = ap->a_tvp; 1756 struct vnode *fdvp = ap->a_fdvp; 1757 struct vnode *tdvp = ap->a_tdvp; 1758 struct componentname *tcnp = ap->a_tcnp; 1759 struct componentname *fcnp = ap->a_fcnp; 1760 int error; 1761 1762#ifndef DIAGNOSTIC 1763 if ((tcnp->cn_flags & HASBUF) == 0 || 1764 (fcnp->cn_flags & HASBUF) == 0) 1765 panic("nfs_rename: no name"); 1766#endif 1767 /* Check for cross-device rename */ 1768 if ((fvp->v_mount != tdvp->v_mount) || 1769 (tvp && (fvp->v_mount != tvp->v_mount))) { 1770 error = EXDEV; 1771 goto out; 1772 } 1773 1774 if (fvp == tvp) { 1775 nfs_printf("nfs_rename: fvp == tvp (can't happen)\n"); 1776 error = 0; 1777 goto out; 1778 } 1779 if ((error = vn_lock(fvp, LK_EXCLUSIVE)) != 0) 1780 goto out; 1781 1782 /* 1783 * We have to flush B_DELWRI data prior to renaming 1784 * the file. If we don't, the delayed-write buffers 1785 * can be flushed out later after the file has gone stale 1786 * under NFSV3. NFSV2 does not have this problem because 1787 * ( as far as I can tell ) it flushes dirty buffers more 1788 * often. 1789 * 1790 * Skip the rename operation if the fsync fails, this can happen 1791 * due to the server's volume being full, when we pushed out data 1792 * that was written back to our cache earlier. Not checking for 1793 * this condition can result in potential (silent) data loss. 1794 */ 1795 error = VOP_FSYNC(fvp, MNT_WAIT, fcnp->cn_thread); 1796 VOP_UNLOCK(fvp, 0); 1797 if (!error && tvp) 1798 error = VOP_FSYNC(tvp, MNT_WAIT, tcnp->cn_thread); 1799 if (error) 1800 goto out; 1801 1802 /* 1803 * If the tvp exists and is in use, sillyrename it before doing the 1804 * rename of the new file over it. 1805 * XXX Can't sillyrename a directory. 1806 */ 1807 if (tvp && vrefcnt(tvp) > 1 && !VTONFS(tvp)->n_sillyrename && 1808 tvp->v_type != VDIR && !nfs_sillyrename(tdvp, tvp, tcnp)) { 1809 vput(tvp); 1810 tvp = NULL; 1811 } 1812 1813 error = nfs_renamerpc(fdvp, fcnp->cn_nameptr, fcnp->cn_namelen, 1814 tdvp, tcnp->cn_nameptr, tcnp->cn_namelen, tcnp->cn_cred, 1815 tcnp->cn_thread); 1816 1817 if (fvp->v_type == VDIR) { 1818 if (tvp != NULL && tvp->v_type == VDIR) 1819 cache_purge(tdvp); 1820 cache_purge(fdvp); 1821 } 1822 1823out: 1824 if (tdvp == tvp) 1825 vrele(tdvp); 1826 else 1827 vput(tdvp); 1828 if (tvp) 1829 vput(tvp); 1830 vrele(fdvp); 1831 vrele(fvp); 1832 /* 1833 * Kludge: Map ENOENT => 0 assuming that it is a reply to a retry. 1834 */ 1835 if (error == ENOENT) 1836 error = 0; 1837 return (error); 1838} 1839 1840/* 1841 * nfs file rename rpc called from nfs_remove() above 1842 */ 1843static int 1844nfs_renameit(struct vnode *sdvp, struct componentname *scnp, 1845 struct sillyrename *sp) 1846{ 1847 1848 return (nfs_renamerpc(sdvp, scnp->cn_nameptr, scnp->cn_namelen, sdvp, 1849 sp->s_name, sp->s_namlen, scnp->cn_cred, scnp->cn_thread)); 1850} 1851 1852/* 1853 * Do an nfs rename rpc. Called from nfs_rename() and nfs_renameit(). 1854 */ 1855static int 1856nfs_renamerpc(struct vnode *fdvp, const char *fnameptr, int fnamelen, 1857 struct vnode *tdvp, const char *tnameptr, int tnamelen, struct ucred *cred, 1858 struct thread *td) 1859{ 1860 caddr_t bpos, dpos; 1861 int error = 0, fwccflag = NFSV3_WCCRATTR, twccflag = NFSV3_WCCRATTR; 1862 struct mbuf *mreq, *mrep, *md, *mb; 1863 int v3 = NFS_ISV3(fdvp); 1864 1865 nfsstats.rpccnt[NFSPROC_RENAME]++; 1866 mreq = nfsm_reqhead(fdvp, NFSPROC_RENAME, 1867 (NFSX_FH(v3) + NFSX_UNSIGNED)*2 + nfsm_rndup(fnamelen) + 1868 nfsm_rndup(tnamelen)); 1869 mb = mreq; 1870 bpos = mtod(mb, caddr_t); 1871 nfsm_fhtom(fdvp, v3); 1872 nfsm_strtom(fnameptr, fnamelen, NFS_MAXNAMLEN); 1873 nfsm_fhtom(tdvp, v3); 1874 nfsm_strtom(tnameptr, tnamelen, NFS_MAXNAMLEN); 1875 nfsm_request(fdvp, NFSPROC_RENAME, td, cred); 1876 if (v3) { 1877 nfsm_wcc_data(fdvp, fwccflag); 1878 nfsm_wcc_data(tdvp, twccflag); 1879 } 1880 m_freem(mrep); 1881nfsmout: 1882 mtx_lock(&(VTONFS(fdvp))->n_mtx); 1883 VTONFS(fdvp)->n_flag |= NMODIFIED; 1884 mtx_unlock(&(VTONFS(fdvp))->n_mtx); 1885 mtx_lock(&(VTONFS(tdvp))->n_mtx); 1886 VTONFS(tdvp)->n_flag |= NMODIFIED; 1887 mtx_unlock(&(VTONFS(tdvp))->n_mtx); 1888 if (!fwccflag) { 1889 VTONFS(fdvp)->n_attrstamp = 0; 1890 KDTRACE_NFS_ATTRCACHE_FLUSH_DONE(fdvp); 1891 } 1892 if (!twccflag) { 1893 VTONFS(tdvp)->n_attrstamp = 0; 1894 KDTRACE_NFS_ATTRCACHE_FLUSH_DONE(tdvp); 1895 } 1896 return (error); 1897} 1898 1899/* 1900 * nfs hard link create call 1901 */ 1902static int 1903nfs_link(struct vop_link_args *ap) 1904{ 1905 struct vnode *vp = ap->a_vp; 1906 struct vnode *tdvp = ap->a_tdvp; 1907 struct componentname *cnp = ap->a_cnp; 1908 caddr_t bpos, dpos; 1909 int error = 0, wccflag = NFSV3_WCCRATTR, attrflag = 0; 1910 struct mbuf *mreq, *mrep, *md, *mb; 1911 int v3; 1912 1913 if (vp->v_mount != tdvp->v_mount) { 1914 return (EXDEV); 1915 } 1916 1917 /* 1918 * Push all writes to the server, so that the attribute cache 1919 * doesn't get "out of sync" with the server. 1920 * XXX There should be a better way! 1921 */ 1922 VOP_FSYNC(vp, MNT_WAIT, cnp->cn_thread); 1923 1924 v3 = NFS_ISV3(vp); 1925 nfsstats.rpccnt[NFSPROC_LINK]++; 1926 mreq = nfsm_reqhead(vp, NFSPROC_LINK, 1927 NFSX_FH(v3)*2 + NFSX_UNSIGNED + nfsm_rndup(cnp->cn_namelen)); 1928 mb = mreq; 1929 bpos = mtod(mb, caddr_t); 1930 nfsm_fhtom(vp, v3); 1931 nfsm_fhtom(tdvp, v3); 1932 nfsm_strtom(cnp->cn_nameptr, cnp->cn_namelen, NFS_MAXNAMLEN); 1933 nfsm_request(vp, NFSPROC_LINK, cnp->cn_thread, cnp->cn_cred); 1934 if (v3) { 1935 nfsm_postop_attr(vp, attrflag); 1936 nfsm_wcc_data(tdvp, wccflag); 1937 } 1938 m_freem(mrep); 1939nfsmout: 1940 mtx_lock(&(VTONFS(tdvp))->n_mtx); 1941 VTONFS(tdvp)->n_flag |= NMODIFIED; 1942 mtx_unlock(&(VTONFS(tdvp))->n_mtx); 1943 if (!attrflag) { 1944 VTONFS(vp)->n_attrstamp = 0; 1945 KDTRACE_NFS_ATTRCACHE_FLUSH_DONE(vp); 1946 } 1947 if (!wccflag) { 1948 VTONFS(tdvp)->n_attrstamp = 0; 1949 KDTRACE_NFS_ATTRCACHE_FLUSH_DONE(tdvp); 1950 } 1951 return (error); 1952} 1953 1954/* 1955 * nfs symbolic link create call 1956 */ 1957static int 1958nfs_symlink(struct vop_symlink_args *ap) 1959{ 1960 struct vnode *dvp = ap->a_dvp; 1961 struct vattr *vap = ap->a_vap; 1962 struct componentname *cnp = ap->a_cnp; 1963 struct nfsv2_sattr *sp; 1964 caddr_t bpos, dpos; 1965 int slen, error = 0, wccflag = NFSV3_WCCRATTR, gotvp; 1966 struct mbuf *mreq, *mrep, *md, *mb; 1967 struct vnode *newvp = NULL; 1968 int v3 = NFS_ISV3(dvp); 1969 1970 nfsstats.rpccnt[NFSPROC_SYMLINK]++; 1971 slen = strlen(ap->a_target); 1972 mreq = nfsm_reqhead(dvp, NFSPROC_SYMLINK, NFSX_FH(v3) + 2*NFSX_UNSIGNED + 1973 nfsm_rndup(cnp->cn_namelen) + nfsm_rndup(slen) + NFSX_SATTR(v3)); 1974 mb = mreq; 1975 bpos = mtod(mb, caddr_t); 1976 nfsm_fhtom(dvp, v3); 1977 nfsm_strtom(cnp->cn_nameptr, cnp->cn_namelen, NFS_MAXNAMLEN); 1978 if (v3) { 1979 nfsm_v3attrbuild(vap, FALSE); 1980 } 1981 nfsm_strtom(ap->a_target, slen, NFS_MAXPATHLEN); 1982 if (!v3) { 1983 sp = nfsm_build(struct nfsv2_sattr *, NFSX_V2SATTR); 1984 sp->sa_mode = vtonfsv2_mode(VLNK, vap->va_mode); 1985 sp->sa_uid = nfs_xdrneg1; 1986 sp->sa_gid = nfs_xdrneg1; 1987 sp->sa_size = nfs_xdrneg1; 1988 txdr_nfsv2time(&vap->va_atime, &sp->sa_atime); 1989 txdr_nfsv2time(&vap->va_mtime, &sp->sa_mtime); 1990 } 1991 1992 /* 1993 * Issue the NFS request and get the rpc response. 1994 * 1995 * Only NFSv3 responses returning an error of 0 actually return 1996 * a file handle that can be converted into newvp without having 1997 * to do an extra lookup rpc. 1998 */ 1999 nfsm_request(dvp, NFSPROC_SYMLINK, cnp->cn_thread, cnp->cn_cred); 2000 if (v3) { 2001 if (error == 0) 2002 nfsm_mtofh(dvp, newvp, v3, gotvp); 2003 nfsm_wcc_data(dvp, wccflag); 2004 } 2005 2006 /* 2007 * out code jumps -> here, mrep is also freed. 2008 */ 2009 2010 m_freem(mrep); 2011nfsmout: 2012 2013 /* 2014 * If we do not have an error and we could not extract the newvp from 2015 * the response due to the request being NFSv2, we have to do a 2016 * lookup in order to obtain a newvp to return. 2017 */ 2018 if (error == 0 && newvp == NULL) { 2019 struct nfsnode *np = NULL; 2020 2021 error = nfs_lookitup(dvp, cnp->cn_nameptr, cnp->cn_namelen, 2022 cnp->cn_cred, cnp->cn_thread, &np); 2023 if (!error) 2024 newvp = NFSTOV(np); 2025 } 2026 if (error) { 2027 if (newvp) 2028 vput(newvp); 2029 } else { 2030 *ap->a_vpp = newvp; 2031 } 2032 mtx_lock(&(VTONFS(dvp))->n_mtx); 2033 VTONFS(dvp)->n_flag |= NMODIFIED; 2034 mtx_unlock(&(VTONFS(dvp))->n_mtx); 2035 if (!wccflag) { 2036 VTONFS(dvp)->n_attrstamp = 0; 2037 KDTRACE_NFS_ATTRCACHE_FLUSH_DONE(dvp); 2038 } 2039 return (error); 2040} 2041 2042/* 2043 * nfs make dir call 2044 */ 2045static int 2046nfs_mkdir(struct vop_mkdir_args *ap) 2047{ 2048 struct vnode *dvp = ap->a_dvp; 2049 struct vattr *vap = ap->a_vap; 2050 struct componentname *cnp = ap->a_cnp; 2051 struct nfsv2_sattr *sp; 2052 int len; 2053 struct nfsnode *np = NULL; 2054 struct vnode *newvp = NULL; 2055 caddr_t bpos, dpos; 2056 int error = 0, wccflag = NFSV3_WCCRATTR; 2057 int gotvp = 0; 2058 struct mbuf *mreq, *mrep, *md, *mb; 2059 struct vattr vattr; 2060 int v3 = NFS_ISV3(dvp); 2061 2062 if ((error = VOP_GETATTR(dvp, &vattr, cnp->cn_cred)) != 0) 2063 return (error); 2064 len = cnp->cn_namelen; 2065 nfsstats.rpccnt[NFSPROC_MKDIR]++; 2066 mreq = nfsm_reqhead(dvp, NFSPROC_MKDIR, 2067 NFSX_FH(v3) + NFSX_UNSIGNED + nfsm_rndup(len) + NFSX_SATTR(v3)); 2068 mb = mreq; 2069 bpos = mtod(mb, caddr_t); 2070 nfsm_fhtom(dvp, v3); 2071 nfsm_strtom(cnp->cn_nameptr, len, NFS_MAXNAMLEN); 2072 if (v3) { 2073 nfsm_v3attrbuild(vap, FALSE); 2074 } else { 2075 sp = nfsm_build(struct nfsv2_sattr *, NFSX_V2SATTR); 2076 sp->sa_mode = vtonfsv2_mode(VDIR, vap->va_mode); 2077 sp->sa_uid = nfs_xdrneg1; 2078 sp->sa_gid = nfs_xdrneg1; 2079 sp->sa_size = nfs_xdrneg1; 2080 txdr_nfsv2time(&vap->va_atime, &sp->sa_atime); 2081 txdr_nfsv2time(&vap->va_mtime, &sp->sa_mtime); 2082 } 2083 nfsm_request(dvp, NFSPROC_MKDIR, cnp->cn_thread, cnp->cn_cred); 2084 if (!error) 2085 nfsm_mtofh(dvp, newvp, v3, gotvp); 2086 if (v3) 2087 nfsm_wcc_data(dvp, wccflag); 2088 m_freem(mrep); 2089nfsmout: 2090 mtx_lock(&(VTONFS(dvp))->n_mtx); 2091 VTONFS(dvp)->n_flag |= NMODIFIED; 2092 mtx_unlock(&(VTONFS(dvp))->n_mtx); 2093 if (!wccflag) { 2094 VTONFS(dvp)->n_attrstamp = 0; 2095 KDTRACE_NFS_ATTRCACHE_FLUSH_DONE(dvp); 2096 } 2097 if (error == 0 && newvp == NULL) { 2098 error = nfs_lookitup(dvp, cnp->cn_nameptr, len, cnp->cn_cred, 2099 cnp->cn_thread, &np); 2100 if (!error) { 2101 newvp = NFSTOV(np); 2102 if (newvp->v_type != VDIR) 2103 error = EEXIST; 2104 } 2105 } 2106 if (error) { 2107 if (newvp) 2108 vput(newvp); 2109 } else 2110 *ap->a_vpp = newvp; 2111 return (error); 2112} 2113 2114/* 2115 * nfs remove directory call 2116 */ 2117static int 2118nfs_rmdir(struct vop_rmdir_args *ap) 2119{ 2120 struct vnode *vp = ap->a_vp; 2121 struct vnode *dvp = ap->a_dvp; 2122 struct componentname *cnp = ap->a_cnp; 2123 caddr_t bpos, dpos; 2124 int error = 0, wccflag = NFSV3_WCCRATTR; 2125 struct mbuf *mreq, *mrep, *md, *mb; 2126 int v3 = NFS_ISV3(dvp); 2127 2128 if (dvp == vp) 2129 return (EINVAL); 2130 nfsstats.rpccnt[NFSPROC_RMDIR]++; 2131 mreq = nfsm_reqhead(dvp, NFSPROC_RMDIR, 2132 NFSX_FH(v3) + NFSX_UNSIGNED + nfsm_rndup(cnp->cn_namelen)); 2133 mb = mreq; 2134 bpos = mtod(mb, caddr_t); 2135 nfsm_fhtom(dvp, v3); 2136 nfsm_strtom(cnp->cn_nameptr, cnp->cn_namelen, NFS_MAXNAMLEN); 2137 nfsm_request(dvp, NFSPROC_RMDIR, cnp->cn_thread, cnp->cn_cred); 2138 if (v3) 2139 nfsm_wcc_data(dvp, wccflag); 2140 m_freem(mrep); 2141nfsmout: 2142 mtx_lock(&(VTONFS(dvp))->n_mtx); 2143 VTONFS(dvp)->n_flag |= NMODIFIED; 2144 mtx_unlock(&(VTONFS(dvp))->n_mtx); 2145 if (!wccflag) { 2146 VTONFS(dvp)->n_attrstamp = 0; 2147 KDTRACE_NFS_ATTRCACHE_FLUSH_DONE(dvp); 2148 } 2149 cache_purge(dvp); 2150 cache_purge(vp); 2151 /* 2152 * Kludge: Map ENOENT => 0 assuming that you have a reply to a retry. 2153 */ 2154 if (error == ENOENT) 2155 error = 0; 2156 return (error); 2157} 2158 2159/* 2160 * nfs readdir call 2161 */ 2162static int 2163nfs_readdir(struct vop_readdir_args *ap) 2164{ 2165 struct vnode *vp = ap->a_vp; 2166 struct nfsnode *np = VTONFS(vp); 2167 struct uio *uio = ap->a_uio; 2168 int tresid, error = 0; 2169 struct vattr vattr; 2170 2171 if (vp->v_type != VDIR) 2172 return(EPERM); 2173 2174 /* 2175 * First, check for hit on the EOF offset cache 2176 */ 2177 if (np->n_direofoffset > 0 && uio->uio_offset >= np->n_direofoffset && 2178 (np->n_flag & NMODIFIED) == 0) { 2179 if (VOP_GETATTR(vp, &vattr, ap->a_cred) == 0) { 2180 mtx_lock(&np->n_mtx); 2181 if (!NFS_TIMESPEC_COMPARE(&np->n_mtime, &vattr.va_mtime)) { 2182 mtx_unlock(&np->n_mtx); 2183 nfsstats.direofcache_hits++; 2184 goto out; 2185 } else 2186 mtx_unlock(&np->n_mtx); 2187 } 2188 } 2189 2190 /* 2191 * Call nfs_bioread() to do the real work. 2192 */ 2193 tresid = uio->uio_resid; 2194 error = nfs_bioread(vp, uio, 0, ap->a_cred); 2195 2196 if (!error && uio->uio_resid == tresid) { 2197 nfsstats.direofcache_misses++; 2198 } 2199out: 2200 return (error); 2201} 2202 2203/* 2204 * Readdir rpc call. 2205 * Called from below the buffer cache by nfs_doio(). 2206 */ 2207int 2208nfs_readdirrpc(struct vnode *vp, struct uio *uiop, struct ucred *cred) 2209{ 2210 int len, left; 2211 struct dirent *dp = NULL; 2212 u_int32_t *tl; 2213 caddr_t cp; 2214 nfsuint64 *cookiep; 2215 caddr_t bpos, dpos; 2216 struct mbuf *mreq, *mrep, *md, *mb; 2217 nfsuint64 cookie; 2218 struct nfsmount *nmp = VFSTONFS(vp->v_mount); 2219 struct nfsnode *dnp = VTONFS(vp); 2220 u_quad_t fileno; 2221 int error = 0, tlen, more_dirs = 1, blksiz = 0, bigenough = 1; 2222 int attrflag; 2223 int v3 = NFS_ISV3(vp); 2224 2225#ifndef DIAGNOSTIC 2226 if (uiop->uio_iovcnt != 1 || (uiop->uio_offset & (DIRBLKSIZ - 1)) || 2227 (uiop->uio_resid & (DIRBLKSIZ - 1))) 2228 panic("nfs readdirrpc bad uio"); 2229#endif 2230 2231 /* 2232 * If there is no cookie, assume directory was stale. 2233 */ 2234 nfs_dircookie_lock(dnp); 2235 cookiep = nfs_getcookie(dnp, uiop->uio_offset, 0); 2236 if (cookiep) { 2237 cookie = *cookiep; 2238 nfs_dircookie_unlock(dnp); 2239 } else { 2240 nfs_dircookie_unlock(dnp); 2241 return (NFSERR_BAD_COOKIE); 2242 } 2243 2244 /* 2245 * Loop around doing readdir rpc's of size nm_readdirsize 2246 * truncated to a multiple of DIRBLKSIZ. 2247 * The stopping criteria is EOF or buffer full. 2248 */ 2249 while (more_dirs && bigenough) { 2250 nfsstats.rpccnt[NFSPROC_READDIR]++; 2251 mreq = nfsm_reqhead(vp, NFSPROC_READDIR, NFSX_FH(v3) + 2252 NFSX_READDIR(v3)); 2253 mb = mreq; 2254 bpos = mtod(mb, caddr_t); 2255 nfsm_fhtom(vp, v3); 2256 if (v3) { 2257 tl = nfsm_build(u_int32_t *, 5 * NFSX_UNSIGNED); 2258 *tl++ = cookie.nfsuquad[0]; 2259 *tl++ = cookie.nfsuquad[1]; 2260 mtx_lock(&dnp->n_mtx); 2261 *tl++ = dnp->n_cookieverf.nfsuquad[0]; 2262 *tl++ = dnp->n_cookieverf.nfsuquad[1]; 2263 mtx_unlock(&dnp->n_mtx); 2264 } else { 2265 tl = nfsm_build(u_int32_t *, 2 * NFSX_UNSIGNED); 2266 *tl++ = cookie.nfsuquad[0]; 2267 } 2268 *tl = txdr_unsigned(nmp->nm_readdirsize); 2269 nfsm_request(vp, NFSPROC_READDIR, uiop->uio_td, cred); 2270 if (v3) { 2271 nfsm_postop_attr(vp, attrflag); 2272 if (!error) { 2273 tl = nfsm_dissect(u_int32_t *, 2274 2 * NFSX_UNSIGNED); 2275 mtx_lock(&dnp->n_mtx); 2276 dnp->n_cookieverf.nfsuquad[0] = *tl++; 2277 dnp->n_cookieverf.nfsuquad[1] = *tl; 2278 mtx_unlock(&dnp->n_mtx); 2279 } else { 2280 m_freem(mrep); 2281 goto nfsmout; 2282 } 2283 } 2284 tl = nfsm_dissect(u_int32_t *, NFSX_UNSIGNED); 2285 more_dirs = fxdr_unsigned(int, *tl); 2286 2287 /* loop thru the dir entries, doctoring them to 4bsd form */ 2288 while (more_dirs && bigenough) { 2289 if (v3) { 2290 tl = nfsm_dissect(u_int32_t *, 2291 3 * NFSX_UNSIGNED); 2292 fileno = fxdr_hyper(tl); 2293 len = fxdr_unsigned(int, *(tl + 2)); 2294 } else { 2295 tl = nfsm_dissect(u_int32_t *, 2296 2 * NFSX_UNSIGNED); 2297 fileno = fxdr_unsigned(u_quad_t, *tl++); 2298 len = fxdr_unsigned(int, *tl); 2299 } 2300 if (len <= 0 || len > NFS_MAXNAMLEN) { 2301 error = EBADRPC; 2302 m_freem(mrep); 2303 goto nfsmout; 2304 } 2305 tlen = nfsm_rndup(len); 2306 if (tlen == len) 2307 tlen += 4; /* To ensure null termination */ 2308 left = DIRBLKSIZ - blksiz; 2309 if ((tlen + DIRHDSIZ) > left) { 2310 dp->d_reclen += left; 2311 uiop->uio_iov->iov_base = 2312 (char *)uiop->uio_iov->iov_base + left; 2313 uiop->uio_iov->iov_len -= left; 2314 uiop->uio_offset += left; 2315 uiop->uio_resid -= left; 2316 blksiz = 0; 2317 } 2318 if ((tlen + DIRHDSIZ) > uiop->uio_resid) 2319 bigenough = 0; 2320 if (bigenough) { 2321 dp = (struct dirent *)uiop->uio_iov->iov_base; 2322 dp->d_fileno = (int)fileno; 2323 dp->d_namlen = len; 2324 dp->d_reclen = tlen + DIRHDSIZ; 2325 dp->d_type = DT_UNKNOWN; 2326 blksiz += dp->d_reclen; 2327 if (blksiz == DIRBLKSIZ) 2328 blksiz = 0; 2329 uiop->uio_offset += DIRHDSIZ; 2330 uiop->uio_resid -= DIRHDSIZ; 2331 uiop->uio_iov->iov_base = 2332 (char *)uiop->uio_iov->iov_base + DIRHDSIZ; 2333 uiop->uio_iov->iov_len -= DIRHDSIZ; 2334 nfsm_mtouio(uiop, len); 2335 cp = uiop->uio_iov->iov_base; 2336 tlen -= len; 2337 *cp = '\0'; /* null terminate */ 2338 uiop->uio_iov->iov_base = 2339 (char *)uiop->uio_iov->iov_base + tlen; 2340 uiop->uio_iov->iov_len -= tlen; 2341 uiop->uio_offset += tlen; 2342 uiop->uio_resid -= tlen; 2343 } else 2344 nfsm_adv(nfsm_rndup(len)); 2345 if (v3) { 2346 tl = nfsm_dissect(u_int32_t *, 2347 3 * NFSX_UNSIGNED); 2348 } else { 2349 tl = nfsm_dissect(u_int32_t *, 2350 2 * NFSX_UNSIGNED); 2351 } 2352 if (bigenough) { 2353 cookie.nfsuquad[0] = *tl++; 2354 if (v3) 2355 cookie.nfsuquad[1] = *tl++; 2356 } else if (v3) 2357 tl += 2; 2358 else 2359 tl++; 2360 more_dirs = fxdr_unsigned(int, *tl); 2361 } 2362 /* 2363 * If at end of rpc data, get the eof boolean 2364 */ 2365 if (!more_dirs) { 2366 tl = nfsm_dissect(u_int32_t *, NFSX_UNSIGNED); 2367 more_dirs = (fxdr_unsigned(int, *tl) == 0); 2368 } 2369 m_freem(mrep); 2370 } 2371 /* 2372 * Fill last record, iff any, out to a multiple of DIRBLKSIZ 2373 * by increasing d_reclen for the last record. 2374 */ 2375 if (blksiz > 0) { 2376 left = DIRBLKSIZ - blksiz; 2377 dp->d_reclen += left; 2378 uiop->uio_iov->iov_base = 2379 (char *)uiop->uio_iov->iov_base + left; 2380 uiop->uio_iov->iov_len -= left; 2381 uiop->uio_offset += left; 2382 uiop->uio_resid -= left; 2383 } 2384 2385 /* 2386 * We are now either at the end of the directory or have filled the 2387 * block. 2388 */ 2389 if (bigenough) 2390 dnp->n_direofoffset = uiop->uio_offset; 2391 else { 2392 if (uiop->uio_resid > 0) 2393 nfs_printf("EEK! readdirrpc resid > 0\n"); 2394 nfs_dircookie_lock(dnp); 2395 cookiep = nfs_getcookie(dnp, uiop->uio_offset, 1); 2396 *cookiep = cookie; 2397 nfs_dircookie_unlock(dnp); 2398 } 2399nfsmout: 2400 return (error); 2401} 2402 2403/* 2404 * NFS V3 readdir plus RPC. Used in place of nfs_readdirrpc(). 2405 */ 2406int 2407nfs_readdirplusrpc(struct vnode *vp, struct uio *uiop, struct ucred *cred) 2408{ 2409 int len, left; 2410 struct dirent *dp; 2411 u_int32_t *tl; 2412 caddr_t cp; 2413 struct vnode *newvp; 2414 nfsuint64 *cookiep; 2415 caddr_t bpos, dpos, dpossav1, dpossav2; 2416 struct mbuf *mreq, *mrep, *md, *mb, *mdsav1, *mdsav2; 2417 struct nameidata nami, *ndp = &nami; 2418 struct componentname *cnp = &ndp->ni_cnd; 2419 nfsuint64 cookie; 2420 struct nfsmount *nmp = VFSTONFS(vp->v_mount); 2421 struct nfsnode *dnp = VTONFS(vp), *np; 2422 nfsfh_t *fhp; 2423 u_quad_t fileno; 2424 int error = 0, tlen, more_dirs = 1, blksiz = 0, doit, bigenough = 1, i; 2425 int attrflag, fhsize; 2426 2427#ifndef nolint 2428 dp = NULL; 2429#endif 2430#ifndef DIAGNOSTIC 2431 if (uiop->uio_iovcnt != 1 || (uiop->uio_offset & (DIRBLKSIZ - 1)) || 2432 (uiop->uio_resid & (DIRBLKSIZ - 1))) 2433 panic("nfs readdirplusrpc bad uio"); 2434#endif 2435 ndp->ni_dvp = vp; 2436 newvp = NULLVP; 2437 2438 /* 2439 * If there is no cookie, assume directory was stale. 2440 */ 2441 nfs_dircookie_lock(dnp); 2442 cookiep = nfs_getcookie(dnp, uiop->uio_offset, 0); 2443 if (cookiep) { 2444 cookie = *cookiep; 2445 nfs_dircookie_unlock(dnp); 2446 } else { 2447 nfs_dircookie_unlock(dnp); 2448 return (NFSERR_BAD_COOKIE); 2449 } 2450 /* 2451 * Loop around doing readdir rpc's of size nm_readdirsize 2452 * truncated to a multiple of DIRBLKSIZ. 2453 * The stopping criteria is EOF or buffer full. 2454 */ 2455 while (more_dirs && bigenough) { 2456 nfsstats.rpccnt[NFSPROC_READDIRPLUS]++; 2457 mreq = nfsm_reqhead(vp, NFSPROC_READDIRPLUS, 2458 NFSX_FH(1) + 6 * NFSX_UNSIGNED); 2459 mb = mreq; 2460 bpos = mtod(mb, caddr_t); 2461 nfsm_fhtom(vp, 1); 2462 tl = nfsm_build(u_int32_t *, 6 * NFSX_UNSIGNED); 2463 *tl++ = cookie.nfsuquad[0]; 2464 *tl++ = cookie.nfsuquad[1]; 2465 mtx_lock(&dnp->n_mtx); 2466 *tl++ = dnp->n_cookieverf.nfsuquad[0]; 2467 *tl++ = dnp->n_cookieverf.nfsuquad[1]; 2468 mtx_unlock(&dnp->n_mtx); 2469 *tl++ = txdr_unsigned(nmp->nm_readdirsize); 2470 *tl = txdr_unsigned(nmp->nm_rsize); 2471 nfsm_request(vp, NFSPROC_READDIRPLUS, uiop->uio_td, cred); 2472 nfsm_postop_attr(vp, attrflag); 2473 if (error) { 2474 m_freem(mrep); 2475 goto nfsmout; 2476 } 2477 tl = nfsm_dissect(u_int32_t *, 3 * NFSX_UNSIGNED); 2478 mtx_lock(&dnp->n_mtx); 2479 dnp->n_cookieverf.nfsuquad[0] = *tl++; 2480 dnp->n_cookieverf.nfsuquad[1] = *tl++; 2481 mtx_unlock(&dnp->n_mtx); 2482 more_dirs = fxdr_unsigned(int, *tl); 2483 2484 /* loop thru the dir entries, doctoring them to 4bsd form */ 2485 while (more_dirs && bigenough) { 2486 tl = nfsm_dissect(u_int32_t *, 3 * NFSX_UNSIGNED); 2487 fileno = fxdr_hyper(tl); 2488 len = fxdr_unsigned(int, *(tl + 2)); 2489 if (len <= 0 || len > NFS_MAXNAMLEN) { 2490 error = EBADRPC; 2491 m_freem(mrep); 2492 goto nfsmout; 2493 } 2494 tlen = nfsm_rndup(len); 2495 if (tlen == len) 2496 tlen += 4; /* To ensure null termination*/ 2497 left = DIRBLKSIZ - blksiz; 2498 if ((tlen + DIRHDSIZ) > left) { 2499 dp->d_reclen += left; 2500 uiop->uio_iov->iov_base = 2501 (char *)uiop->uio_iov->iov_base + left; 2502 uiop->uio_iov->iov_len -= left; 2503 uiop->uio_offset += left; 2504 uiop->uio_resid -= left; 2505 blksiz = 0; 2506 } 2507 if ((tlen + DIRHDSIZ) > uiop->uio_resid) 2508 bigenough = 0; 2509 if (bigenough) { 2510 dp = (struct dirent *)uiop->uio_iov->iov_base; 2511 dp->d_fileno = (int)fileno; 2512 dp->d_namlen = len; 2513 dp->d_reclen = tlen + DIRHDSIZ; 2514 dp->d_type = DT_UNKNOWN; 2515 blksiz += dp->d_reclen; 2516 if (blksiz == DIRBLKSIZ) 2517 blksiz = 0; 2518 uiop->uio_offset += DIRHDSIZ; 2519 uiop->uio_resid -= DIRHDSIZ; 2520 uiop->uio_iov->iov_base = 2521 (char *)uiop->uio_iov->iov_base + DIRHDSIZ; 2522 uiop->uio_iov->iov_len -= DIRHDSIZ; 2523 cnp->cn_nameptr = uiop->uio_iov->iov_base; 2524 cnp->cn_namelen = len; 2525 nfsm_mtouio(uiop, len); 2526 cp = uiop->uio_iov->iov_base; 2527 tlen -= len; 2528 *cp = '\0'; 2529 uiop->uio_iov->iov_base = 2530 (char *)uiop->uio_iov->iov_base + tlen; 2531 uiop->uio_iov->iov_len -= tlen; 2532 uiop->uio_offset += tlen; 2533 uiop->uio_resid -= tlen; 2534 } else 2535 nfsm_adv(nfsm_rndup(len)); 2536 tl = nfsm_dissect(u_int32_t *, 3 * NFSX_UNSIGNED); 2537 if (bigenough) { 2538 cookie.nfsuquad[0] = *tl++; 2539 cookie.nfsuquad[1] = *tl++; 2540 } else 2541 tl += 2; 2542 2543 /* 2544 * Since the attributes are before the file handle 2545 * (sigh), we must skip over the attributes and then 2546 * come back and get them. 2547 */ 2548 attrflag = fxdr_unsigned(int, *tl); 2549 if (attrflag) { 2550 dpossav1 = dpos; 2551 mdsav1 = md; 2552 nfsm_adv(NFSX_V3FATTR); 2553 tl = nfsm_dissect(u_int32_t *, NFSX_UNSIGNED); 2554 doit = fxdr_unsigned(int, *tl); 2555 /* 2556 * Skip loading the attrs for "..". There's a 2557 * race between loading the attrs here and 2558 * lookups that look for the directory currently 2559 * being read (in the parent). We try to acquire 2560 * the exclusive lock on ".." here, owning the 2561 * lock on the directory being read. Lookup will 2562 * hold the lock on ".." and try to acquire the 2563 * lock on the directory being read. 2564 * 2565 * There are other ways of fixing this, one would 2566 * be to do a trylock on the ".." vnode and skip 2567 * loading the attrs on ".." if it happens to be 2568 * locked by another process. But skipping the 2569 * attrload on ".." seems the easiest option. 2570 */ 2571 if (strcmp(dp->d_name, "..") == 0) { 2572 doit = 0; 2573 /* 2574 * We've already skipped over the attrs, 2575 * skip over the filehandle. And store d_type 2576 * as VDIR. 2577 */ 2578 tl = nfsm_dissect(u_int32_t *, NFSX_UNSIGNED); 2579 i = fxdr_unsigned(int, *tl); 2580 nfsm_adv(nfsm_rndup(i)); 2581 dp->d_type = IFTODT(VTTOIF(VDIR)); 2582 } 2583 if (doit) { 2584 nfsm_getfh(fhp, fhsize, 1); 2585 if (NFS_CMPFH(dnp, fhp, fhsize)) { 2586 VREF(vp); 2587 newvp = vp; 2588 np = dnp; 2589 } else { 2590 error = nfs_nget(vp->v_mount, fhp, 2591 fhsize, &np, LK_EXCLUSIVE); 2592 if (error) 2593 doit = 0; 2594 else 2595 newvp = NFSTOV(np); 2596 } 2597 } 2598 if (doit && bigenough) { 2599 dpossav2 = dpos; 2600 dpos = dpossav1; 2601 mdsav2 = md; 2602 md = mdsav1; 2603 nfsm_loadattr(newvp, NULL); 2604 dpos = dpossav2; 2605 md = mdsav2; 2606 dp->d_type = 2607 IFTODT(VTTOIF(np->n_vattr.va_type)); 2608 ndp->ni_vp = newvp; 2609 /* Update n_ctime, so subsequent lookup doesn't purge entry */ 2610 np->n_ctime = np->n_vattr.va_ctime.tv_sec; 2611 cache_enter(ndp->ni_dvp, ndp->ni_vp, cnp); 2612 } 2613 } else { 2614 /* Just skip over the file handle */ 2615 tl = nfsm_dissect(u_int32_t *, NFSX_UNSIGNED); 2616 i = fxdr_unsigned(int, *tl); 2617 if (i) { 2618 tl = nfsm_dissect(u_int32_t *, NFSX_UNSIGNED); 2619 fhsize = fxdr_unsigned(int, *tl); 2620 nfsm_adv(nfsm_rndup(fhsize)); 2621 } 2622 } 2623 if (newvp != NULLVP) { 2624 if (newvp == vp) 2625 vrele(newvp); 2626 else 2627 vput(newvp); 2628 newvp = NULLVP; 2629 } 2630 tl = nfsm_dissect(u_int32_t *, NFSX_UNSIGNED); 2631 more_dirs = fxdr_unsigned(int, *tl); 2632 } 2633 /* 2634 * If at end of rpc data, get the eof boolean 2635 */ 2636 if (!more_dirs) { 2637 tl = nfsm_dissect(u_int32_t *, NFSX_UNSIGNED); 2638 more_dirs = (fxdr_unsigned(int, *tl) == 0); 2639 } 2640 m_freem(mrep); 2641 } 2642 /* 2643 * Fill last record, iff any, out to a multiple of DIRBLKSIZ 2644 * by increasing d_reclen for the last record. 2645 */ 2646 if (blksiz > 0) { 2647 left = DIRBLKSIZ - blksiz; 2648 dp->d_reclen += left; 2649 uiop->uio_iov->iov_base = 2650 (char *)uiop->uio_iov->iov_base + left; 2651 uiop->uio_iov->iov_len -= left; 2652 uiop->uio_offset += left; 2653 uiop->uio_resid -= left; 2654 } 2655 2656 /* 2657 * We are now either at the end of the directory or have filled the 2658 * block. 2659 */ 2660 if (bigenough) 2661 dnp->n_direofoffset = uiop->uio_offset; 2662 else { 2663 if (uiop->uio_resid > 0) 2664 nfs_printf("EEK! readdirplusrpc resid > 0\n"); 2665 nfs_dircookie_lock(dnp); 2666 cookiep = nfs_getcookie(dnp, uiop->uio_offset, 1); 2667 *cookiep = cookie; 2668 nfs_dircookie_unlock(dnp); 2669 } 2670nfsmout: 2671 if (newvp != NULLVP) { 2672 if (newvp == vp) 2673 vrele(newvp); 2674 else 2675 vput(newvp); 2676 newvp = NULLVP; 2677 } 2678 return (error); 2679} 2680 2681/* 2682 * Silly rename. To make the NFS filesystem that is stateless look a little 2683 * more like the "ufs" a remove of an active vnode is translated to a rename 2684 * to a funny looking filename that is removed by nfs_inactive on the 2685 * nfsnode. There is the potential for another process on a different client 2686 * to create the same funny name between the nfs_lookitup() fails and the 2687 * nfs_rename() completes, but... 2688 */ 2689static int 2690nfs_sillyrename(struct vnode *dvp, struct vnode *vp, struct componentname *cnp) 2691{ 2692 struct sillyrename *sp; 2693 struct nfsnode *np; 2694 int error; 2695 short pid; 2696 unsigned int lticks; 2697 2698 cache_purge(dvp); 2699 np = VTONFS(vp); 2700#ifndef DIAGNOSTIC 2701 if (vp->v_type == VDIR) 2702 panic("nfs: sillyrename dir"); 2703#endif 2704 sp = malloc(sizeof (struct sillyrename), 2705 M_NFSREQ, M_WAITOK); 2706 sp->s_cred = crhold(cnp->cn_cred); 2707 sp->s_dvp = dvp; 2708 sp->s_removeit = nfs_removeit; 2709 VREF(dvp); 2710 2711 /* 2712 * Fudge together a funny name. 2713 * Changing the format of the funny name to accomodate more 2714 * sillynames per directory. 2715 * The name is now changed to .nfs.<ticks>.<pid>.4, where ticks is 2716 * CPU ticks since boot. 2717 */ 2718 pid = cnp->cn_thread->td_proc->p_pid; 2719 lticks = (unsigned int)ticks; 2720 for ( ; ; ) { 2721 sp->s_namlen = sprintf(sp->s_name, 2722 ".nfs.%08x.%04x4.4", lticks, 2723 pid); 2724 if (nfs_lookitup(dvp, sp->s_name, sp->s_namlen, sp->s_cred, 2725 cnp->cn_thread, NULL)) 2726 break; 2727 lticks++; 2728 } 2729 error = nfs_renameit(dvp, cnp, sp); 2730 if (error) 2731 goto bad; 2732 error = nfs_lookitup(dvp, sp->s_name, sp->s_namlen, sp->s_cred, 2733 cnp->cn_thread, &np); 2734 np->n_sillyrename = sp; 2735 return (0); 2736bad: 2737 vrele(sp->s_dvp); 2738 crfree(sp->s_cred); 2739 free((caddr_t)sp, M_NFSREQ); 2740 return (error); 2741} 2742 2743/* 2744 * Look up a file name and optionally either update the file handle or 2745 * allocate an nfsnode, depending on the value of npp. 2746 * npp == NULL --> just do the lookup 2747 * *npp == NULL --> allocate a new nfsnode and make sure attributes are 2748 * handled too 2749 * *npp != NULL --> update the file handle in the vnode 2750 */ 2751static int 2752nfs_lookitup(struct vnode *dvp, const char *name, int len, struct ucred *cred, 2753 struct thread *td, struct nfsnode **npp) 2754{ 2755 struct vnode *newvp = NULL; 2756 struct nfsnode *np, *dnp = VTONFS(dvp); 2757 caddr_t bpos, dpos; 2758 int error = 0, fhlen, attrflag; 2759 struct mbuf *mreq, *mrep, *md, *mb; 2760 nfsfh_t *nfhp; 2761 int v3 = NFS_ISV3(dvp); 2762 2763 nfsstats.rpccnt[NFSPROC_LOOKUP]++; 2764 mreq = nfsm_reqhead(dvp, NFSPROC_LOOKUP, 2765 NFSX_FH(v3) + NFSX_UNSIGNED + nfsm_rndup(len)); 2766 mb = mreq; 2767 bpos = mtod(mb, caddr_t); 2768 nfsm_fhtom(dvp, v3); 2769 nfsm_strtom(name, len, NFS_MAXNAMLEN); 2770 nfsm_request(dvp, NFSPROC_LOOKUP, td, cred); 2771 if (npp && !error) { 2772 nfsm_getfh(nfhp, fhlen, v3); 2773 if (*npp) { 2774 np = *npp; 2775 if (np->n_fhsize > NFS_SMALLFH && fhlen <= NFS_SMALLFH) { 2776 free((caddr_t)np->n_fhp, M_NFSBIGFH); 2777 np->n_fhp = &np->n_fh; 2778 } else if (np->n_fhsize <= NFS_SMALLFH && fhlen>NFS_SMALLFH) 2779 np->n_fhp =(nfsfh_t *)malloc(fhlen, M_NFSBIGFH, M_WAITOK); 2780 bcopy((caddr_t)nfhp, (caddr_t)np->n_fhp, fhlen); 2781 np->n_fhsize = fhlen; 2782 newvp = NFSTOV(np); 2783 } else if (NFS_CMPFH(dnp, nfhp, fhlen)) { 2784 VREF(dvp); 2785 newvp = dvp; 2786 } else { 2787 error = nfs_nget(dvp->v_mount, nfhp, fhlen, &np, LK_EXCLUSIVE); 2788 if (error) { 2789 m_freem(mrep); 2790 return (error); 2791 } 2792 newvp = NFSTOV(np); 2793 } 2794 if (v3) { 2795 nfsm_postop_attr(newvp, attrflag); 2796 if (!attrflag && *npp == NULL) { 2797 m_freem(mrep); 2798 if (newvp == dvp) 2799 vrele(newvp); 2800 else 2801 vput(newvp); 2802 return (ENOENT); 2803 } 2804 } else 2805 nfsm_loadattr(newvp, NULL); 2806 } 2807 m_freem(mrep); 2808nfsmout: 2809 if (npp && *npp == NULL) { 2810 if (error) { 2811 if (newvp) { 2812 if (newvp == dvp) 2813 vrele(newvp); 2814 else 2815 vput(newvp); 2816 } 2817 } else 2818 *npp = np; 2819 } 2820 return (error); 2821} 2822 2823/* 2824 * Nfs Version 3 commit rpc 2825 */ 2826int 2827nfs_commit(struct vnode *vp, u_quad_t offset, int cnt, struct ucred *cred, 2828 struct thread *td) 2829{ 2830 u_int32_t *tl; 2831 struct nfsmount *nmp = VFSTONFS(vp->v_mount); 2832 caddr_t bpos, dpos; 2833 int error = 0, wccflag = NFSV3_WCCRATTR; 2834 struct mbuf *mreq, *mrep, *md, *mb; 2835 2836 mtx_lock(&nmp->nm_mtx); 2837 if ((nmp->nm_state & NFSSTA_HASWRITEVERF) == 0) { 2838 mtx_unlock(&nmp->nm_mtx); 2839 return (0); 2840 } 2841 mtx_unlock(&nmp->nm_mtx); 2842 nfsstats.rpccnt[NFSPROC_COMMIT]++; 2843 mreq = nfsm_reqhead(vp, NFSPROC_COMMIT, NFSX_FH(1)); 2844 mb = mreq; 2845 bpos = mtod(mb, caddr_t); 2846 nfsm_fhtom(vp, 1); 2847 tl = nfsm_build(u_int32_t *, 3 * NFSX_UNSIGNED); 2848 txdr_hyper(offset, tl); 2849 tl += 2; 2850 *tl = txdr_unsigned(cnt); 2851 nfsm_request(vp, NFSPROC_COMMIT, td, cred); 2852 nfsm_wcc_data(vp, wccflag); 2853 if (!error) { 2854 tl = nfsm_dissect(u_int32_t *, NFSX_V3WRITEVERF); 2855 if (bcmp((caddr_t)nmp->nm_verf, (caddr_t)tl, 2856 NFSX_V3WRITEVERF)) { 2857 bcopy((caddr_t)tl, (caddr_t)nmp->nm_verf, 2858 NFSX_V3WRITEVERF); 2859 error = NFSERR_STALEWRITEVERF; 2860 } 2861 } 2862 m_freem(mrep); 2863nfsmout: 2864 return (error); 2865} 2866 2867/* 2868 * Strategy routine. 2869 * For async requests when nfsiod(s) are running, queue the request by 2870 * calling nfs_asyncio(), otherwise just all nfs_doio() to do the 2871 * request. 2872 */ 2873static int 2874nfs_strategy(struct vop_strategy_args *ap) 2875{ 2876 struct buf *bp = ap->a_bp; 2877 struct ucred *cr; 2878 2879 KASSERT(!(bp->b_flags & B_DONE), 2880 ("nfs_strategy: buffer %p unexpectedly marked B_DONE", bp)); 2881 BUF_ASSERT_HELD(bp); 2882 2883 if (bp->b_iocmd == BIO_READ) 2884 cr = bp->b_rcred; 2885 else 2886 cr = bp->b_wcred; 2887 2888 /* 2889 * If the op is asynchronous and an i/o daemon is waiting 2890 * queue the request, wake it up and wait for completion 2891 * otherwise just do it ourselves. 2892 */ 2893 if ((bp->b_flags & B_ASYNC) == 0 || 2894 nfs_asyncio(VFSTONFS(ap->a_vp->v_mount), bp, NOCRED, curthread)) 2895 (void)nfs_doio(ap->a_vp, bp, cr, curthread); 2896 return (0); 2897} 2898 2899/* 2900 * fsync vnode op. Just call nfs_flush() with commit == 1. 2901 */ 2902/* ARGSUSED */ 2903static int 2904nfs_fsync(struct vop_fsync_args *ap) 2905{ 2906 2907 return (nfs_flush(ap->a_vp, ap->a_waitfor, 1)); 2908} 2909 2910/* 2911 * Flush all the blocks associated with a vnode. 2912 * Walk through the buffer pool and push any dirty pages 2913 * associated with the vnode. 2914 */ 2915static int 2916nfs_flush(struct vnode *vp, int waitfor, int commit) 2917{ 2918 struct nfsnode *np = VTONFS(vp); 2919 struct buf *bp; 2920 int i; 2921 struct buf *nbp; 2922 struct nfsmount *nmp = VFSTONFS(vp->v_mount); 2923 int error = 0, slptimeo = 0, slpflag = 0, retv, bvecpos; 2924 int passone = 1; 2925 u_quad_t off, endoff, toff; 2926 struct ucred* wcred = NULL; 2927 struct buf **bvec = NULL; 2928 struct bufobj *bo; 2929 struct thread *td = curthread; 2930#ifndef NFS_COMMITBVECSIZ 2931#define NFS_COMMITBVECSIZ 20 2932#endif 2933 struct buf *bvec_on_stack[NFS_COMMITBVECSIZ]; 2934 int bvecsize = 0, bveccount; 2935 2936 if (nmp->nm_flag & NFSMNT_INT) 2937 slpflag = PCATCH; 2938 if (!commit) 2939 passone = 0; 2940 bo = &vp->v_bufobj; 2941 /* 2942 * A b_flags == (B_DELWRI | B_NEEDCOMMIT) block has been written to the 2943 * server, but has not been committed to stable storage on the server 2944 * yet. On the first pass, the byte range is worked out and the commit 2945 * rpc is done. On the second pass, nfs_writebp() is called to do the 2946 * job. 2947 */ 2948again: 2949 off = (u_quad_t)-1; 2950 endoff = 0; 2951 bvecpos = 0; 2952 if (NFS_ISV3(vp) && commit) { 2953 if (bvec != NULL && bvec != bvec_on_stack) 2954 free(bvec, M_TEMP); 2955 /* 2956 * Count up how many buffers waiting for a commit. 2957 */ 2958 bveccount = 0; 2959 BO_LOCK(bo); 2960 TAILQ_FOREACH_SAFE(bp, &bo->bo_dirty.bv_hd, b_bobufs, nbp) { 2961 if (!BUF_ISLOCKED(bp) && 2962 (bp->b_flags & (B_DELWRI | B_NEEDCOMMIT)) 2963 == (B_DELWRI | B_NEEDCOMMIT)) 2964 bveccount++; 2965 } 2966 /* 2967 * Allocate space to remember the list of bufs to commit. It is 2968 * important to use M_NOWAIT here to avoid a race with nfs_write. 2969 * If we can't get memory (for whatever reason), we will end up 2970 * committing the buffers one-by-one in the loop below. 2971 */ 2972 if (bveccount > NFS_COMMITBVECSIZ) { 2973 /* 2974 * Release the vnode interlock to avoid a lock 2975 * order reversal. 2976 */ 2977 BO_UNLOCK(bo); 2978 bvec = (struct buf **) 2979 malloc(bveccount * sizeof(struct buf *), 2980 M_TEMP, M_NOWAIT); 2981 BO_LOCK(bo); 2982 if (bvec == NULL) { 2983 bvec = bvec_on_stack; 2984 bvecsize = NFS_COMMITBVECSIZ; 2985 } else 2986 bvecsize = bveccount; 2987 } else { 2988 bvec = bvec_on_stack; 2989 bvecsize = NFS_COMMITBVECSIZ; 2990 } 2991 TAILQ_FOREACH_SAFE(bp, &bo->bo_dirty.bv_hd, b_bobufs, nbp) { 2992 if (bvecpos >= bvecsize) 2993 break; 2994 if (BUF_LOCK(bp, LK_EXCLUSIVE | LK_NOWAIT, NULL)) { 2995 nbp = TAILQ_NEXT(bp, b_bobufs); 2996 continue; 2997 } 2998 if ((bp->b_flags & (B_DELWRI | B_NEEDCOMMIT)) != 2999 (B_DELWRI | B_NEEDCOMMIT)) { 3000 BUF_UNLOCK(bp); 3001 nbp = TAILQ_NEXT(bp, b_bobufs); 3002 continue; 3003 } 3004 BO_UNLOCK(bo); 3005 bremfree(bp); 3006 /* 3007 * Work out if all buffers are using the same cred 3008 * so we can deal with them all with one commit. 3009 * 3010 * NOTE: we are not clearing B_DONE here, so we have 3011 * to do it later on in this routine if we intend to 3012 * initiate I/O on the bp. 3013 * 3014 * Note: to avoid loopback deadlocks, we do not 3015 * assign b_runningbufspace. 3016 */ 3017 if (wcred == NULL) 3018 wcred = bp->b_wcred; 3019 else if (wcred != bp->b_wcred) 3020 wcred = NOCRED; 3021 vfs_busy_pages(bp, 1); 3022 3023 BO_LOCK(bo); 3024 /* 3025 * bp is protected by being locked, but nbp is not 3026 * and vfs_busy_pages() may sleep. We have to 3027 * recalculate nbp. 3028 */ 3029 nbp = TAILQ_NEXT(bp, b_bobufs); 3030 3031 /* 3032 * A list of these buffers is kept so that the 3033 * second loop knows which buffers have actually 3034 * been committed. This is necessary, since there 3035 * may be a race between the commit rpc and new 3036 * uncommitted writes on the file. 3037 */ 3038 bvec[bvecpos++] = bp; 3039 toff = ((u_quad_t)bp->b_blkno) * DEV_BSIZE + 3040 bp->b_dirtyoff; 3041 if (toff < off) 3042 off = toff; 3043 toff += (u_quad_t)(bp->b_dirtyend - bp->b_dirtyoff); 3044 if (toff > endoff) 3045 endoff = toff; 3046 } 3047 BO_UNLOCK(bo); 3048 } 3049 if (bvecpos > 0) { 3050 /* 3051 * Commit data on the server, as required. 3052 * If all bufs are using the same wcred, then use that with 3053 * one call for all of them, otherwise commit each one 3054 * separately. 3055 */ 3056 if (wcred != NOCRED) 3057 retv = nfs_commit(vp, off, (int)(endoff - off), 3058 wcred, td); 3059 else { 3060 retv = 0; 3061 for (i = 0; i < bvecpos; i++) { 3062 off_t off, size; 3063 bp = bvec[i]; 3064 off = ((u_quad_t)bp->b_blkno) * DEV_BSIZE + 3065 bp->b_dirtyoff; 3066 size = (u_quad_t)(bp->b_dirtyend 3067 - bp->b_dirtyoff); 3068 retv = nfs_commit(vp, off, (int)size, 3069 bp->b_wcred, td); 3070 if (retv) break; 3071 } 3072 } 3073 3074 if (retv == NFSERR_STALEWRITEVERF) 3075 nfs_clearcommit(vp->v_mount); 3076 3077 /* 3078 * Now, either mark the blocks I/O done or mark the 3079 * blocks dirty, depending on whether the commit 3080 * succeeded. 3081 */ 3082 for (i = 0; i < bvecpos; i++) { 3083 bp = bvec[i]; 3084 bp->b_flags &= ~(B_NEEDCOMMIT | B_CLUSTEROK); 3085 if (retv) { 3086 /* 3087 * Error, leave B_DELWRI intact 3088 */ 3089 vfs_unbusy_pages(bp); 3090 brelse(bp); 3091 } else { 3092 /* 3093 * Success, remove B_DELWRI ( bundirty() ). 3094 * 3095 * b_dirtyoff/b_dirtyend seem to be NFS 3096 * specific. We should probably move that 3097 * into bundirty(). XXX 3098 */ 3099 bufobj_wref(bo); 3100 bp->b_flags |= B_ASYNC; 3101 bundirty(bp); 3102 bp->b_flags &= ~B_DONE; 3103 bp->b_ioflags &= ~BIO_ERROR; 3104 bp->b_dirtyoff = bp->b_dirtyend = 0; 3105 bufdone(bp); 3106 } 3107 } 3108 } 3109 3110 /* 3111 * Start/do any write(s) that are required. 3112 */ 3113loop: 3114 BO_LOCK(bo); 3115 TAILQ_FOREACH_SAFE(bp, &bo->bo_dirty.bv_hd, b_bobufs, nbp) { 3116 if (BUF_LOCK(bp, LK_EXCLUSIVE | LK_NOWAIT, NULL)) { 3117 if (waitfor != MNT_WAIT || passone) 3118 continue; 3119 3120 error = BUF_TIMELOCK(bp, 3121 LK_EXCLUSIVE | LK_SLEEPFAIL | LK_INTERLOCK, 3122 BO_MTX(bo), "nfsfsync", slpflag, slptimeo); 3123 if (error == 0) { 3124 BUF_UNLOCK(bp); 3125 goto loop; 3126 } 3127 if (error == ENOLCK) { 3128 error = 0; 3129 goto loop; 3130 } 3131 if (nfs_sigintr(nmp, td)) { 3132 error = EINTR; 3133 goto done; 3134 } 3135 if (slpflag == PCATCH) { 3136 slpflag = 0; 3137 slptimeo = 2 * hz; 3138 } 3139 goto loop; 3140 } 3141 if ((bp->b_flags & B_DELWRI) == 0) 3142 panic("nfs_fsync: not dirty"); 3143 if ((passone || !commit) && (bp->b_flags & B_NEEDCOMMIT)) { 3144 BUF_UNLOCK(bp); 3145 continue; 3146 } 3147 BO_UNLOCK(bo); 3148 bremfree(bp); 3149 if (passone || !commit) 3150 bp->b_flags |= B_ASYNC; 3151 else 3152 bp->b_flags |= B_ASYNC; 3153 bwrite(bp); 3154 if (nfs_sigintr(nmp, td)) { 3155 error = EINTR; 3156 goto done; 3157 } 3158 goto loop; 3159 } 3160 if (passone) { 3161 passone = 0; 3162 BO_UNLOCK(bo); 3163 goto again; 3164 } 3165 if (waitfor == MNT_WAIT) { 3166 while (bo->bo_numoutput) { 3167 error = bufobj_wwait(bo, slpflag, slptimeo); 3168 if (error) { 3169 BO_UNLOCK(bo); 3170 error = nfs_sigintr(nmp, td); 3171 if (error) 3172 goto done; 3173 if (slpflag == PCATCH) { 3174 slpflag = 0; 3175 slptimeo = 2 * hz; 3176 } 3177 BO_LOCK(bo); 3178 } 3179 } 3180 if (bo->bo_dirty.bv_cnt != 0 && commit) { 3181 BO_UNLOCK(bo); 3182 goto loop; 3183 } 3184 /* 3185 * Wait for all the async IO requests to drain 3186 */ 3187 BO_UNLOCK(bo); 3188 mtx_lock(&np->n_mtx); 3189 while (np->n_directio_asyncwr > 0) { 3190 np->n_flag |= NFSYNCWAIT; 3191 error = nfs_msleep(td, (caddr_t)&np->n_directio_asyncwr, 3192 &np->n_mtx, slpflag | (PRIBIO + 1), 3193 "nfsfsync", 0); 3194 if (error) { 3195 if (nfs_sigintr(nmp, td)) { 3196 mtx_unlock(&np->n_mtx); 3197 error = EINTR; 3198 goto done; 3199 } 3200 } 3201 } 3202 mtx_unlock(&np->n_mtx); 3203 } else 3204 BO_UNLOCK(bo); 3205 mtx_lock(&np->n_mtx); 3206 if (np->n_flag & NWRITEERR) { 3207 error = np->n_error; 3208 np->n_flag &= ~NWRITEERR; 3209 } 3210 if (commit && bo->bo_dirty.bv_cnt == 0 && 3211 bo->bo_numoutput == 0 && np->n_directio_asyncwr == 0) 3212 np->n_flag &= ~NMODIFIED; 3213 mtx_unlock(&np->n_mtx); 3214done: 3215 if (bvec != NULL && bvec != bvec_on_stack) 3216 free(bvec, M_TEMP); 3217 return (error); 3218} 3219 3220/* 3221 * NFS advisory byte-level locks. 3222 */ 3223static int 3224nfs_advlock(struct vop_advlock_args *ap) 3225{ 3226 struct vnode *vp = ap->a_vp; 3227 u_quad_t size; 3228 int error; 3229 3230 error = vn_lock(vp, LK_SHARED); 3231 if (error) 3232 return (error); 3233 if ((VFSTONFS(vp->v_mount)->nm_flag & NFSMNT_NOLOCKD) != 0) { 3234 size = VTONFS(vp)->n_size; 3235 VOP_UNLOCK(vp, 0); 3236 error = lf_advlock(ap, &(vp->v_lockf), size); 3237 } else { 3238 if (nfs_advlock_p) 3239 error = nfs_advlock_p(ap); 3240 else 3241 error = ENOLCK; 3242 } 3243 3244 return (error); 3245} 3246 3247/* 3248 * NFS advisory byte-level locks. 3249 */ 3250static int 3251nfs_advlockasync(struct vop_advlockasync_args *ap) 3252{ 3253 struct vnode *vp = ap->a_vp; 3254 u_quad_t size; 3255 int error; 3256 3257 error = vn_lock(vp, LK_SHARED); 3258 if (error) 3259 return (error); 3260 if ((VFSTONFS(vp->v_mount)->nm_flag & NFSMNT_NOLOCKD) != 0) { 3261 size = VTONFS(vp)->n_size; 3262 VOP_UNLOCK(vp, 0); 3263 error = lf_advlockasync(ap, &(vp->v_lockf), size); 3264 } else { 3265 VOP_UNLOCK(vp, 0); 3266 error = EOPNOTSUPP; 3267 } 3268 return (error); 3269} 3270 3271/* 3272 * Print out the contents of an nfsnode. 3273 */ 3274static int 3275nfs_print(struct vop_print_args *ap) 3276{ 3277 struct vnode *vp = ap->a_vp; 3278 struct nfsnode *np = VTONFS(vp); 3279 3280 nfs_printf("\tfileid %ld fsid 0x%x", 3281 np->n_vattr.va_fileid, np->n_vattr.va_fsid); 3282 if (vp->v_type == VFIFO) 3283 fifo_printinfo(vp); 3284 printf("\n"); 3285 return (0); 3286} 3287 3288/* 3289 * This is the "real" nfs::bwrite(struct buf*). 3290 * We set B_CACHE if this is a VMIO buffer. 3291 */ 3292int 3293nfs_writebp(struct buf *bp, int force __unused, struct thread *td) 3294{ 3295 int s; 3296 int oldflags = bp->b_flags; 3297#if 0 3298 int retv = 1; 3299 off_t off; 3300#endif 3301 3302 BUF_ASSERT_HELD(bp); 3303 3304 if (bp->b_flags & B_INVAL) { 3305 brelse(bp); 3306 return(0); 3307 } 3308 3309 bp->b_flags |= B_CACHE; 3310 3311 /* 3312 * Undirty the bp. We will redirty it later if the I/O fails. 3313 */ 3314 3315 s = splbio(); 3316 bundirty(bp); 3317 bp->b_flags &= ~B_DONE; 3318 bp->b_ioflags &= ~BIO_ERROR; 3319 bp->b_iocmd = BIO_WRITE; 3320 3321 bufobj_wref(bp->b_bufobj); 3322 curthread->td_ru.ru_oublock++; 3323 splx(s); 3324 3325 /* 3326 * Note: to avoid loopback deadlocks, we do not 3327 * assign b_runningbufspace. 3328 */ 3329 vfs_busy_pages(bp, 1); 3330 3331 BUF_KERNPROC(bp); 3332 bp->b_iooffset = dbtob(bp->b_blkno); 3333 bstrategy(bp); 3334 3335 if( (oldflags & B_ASYNC) == 0) { 3336 int rtval = bufwait(bp); 3337 3338 if (oldflags & B_DELWRI) { 3339 s = splbio(); 3340 reassignbuf(bp); 3341 splx(s); 3342 } 3343 brelse(bp); 3344 return (rtval); 3345 } 3346 3347 return (0); 3348} 3349 3350/* 3351 * nfs special file access vnode op. 3352 * Essentially just get vattr and then imitate iaccess() since the device is 3353 * local to the client. 3354 */ 3355static int 3356nfsspec_access(struct vop_access_args *ap) 3357{ 3358 struct vattr *vap; 3359 struct ucred *cred = ap->a_cred; 3360 struct vnode *vp = ap->a_vp; 3361 accmode_t accmode = ap->a_accmode; 3362 struct vattr vattr; 3363 int error; 3364 3365 /* 3366 * Disallow write attempts on filesystems mounted read-only; 3367 * unless the file is a socket, fifo, or a block or character 3368 * device resident on the filesystem. 3369 */ 3370 if ((accmode & VWRITE) && (vp->v_mount->mnt_flag & MNT_RDONLY)) { 3371 switch (vp->v_type) { 3372 case VREG: 3373 case VDIR: 3374 case VLNK: 3375 return (EROFS); 3376 default: 3377 break; 3378 } 3379 } 3380 vap = &vattr; 3381 error = VOP_GETATTR(vp, vap, cred); 3382 if (error) 3383 goto out; 3384 error = vaccess(vp->v_type, vap->va_mode, vap->va_uid, vap->va_gid, 3385 accmode, cred, NULL); 3386out: 3387 return error; 3388} 3389 3390/* 3391 * Read wrapper for fifos. 3392 */ 3393static int 3394nfsfifo_read(struct vop_read_args *ap) 3395{ 3396 struct nfsnode *np = VTONFS(ap->a_vp); 3397 int error; 3398 3399 /* 3400 * Set access flag. 3401 */ 3402 mtx_lock(&np->n_mtx); 3403 np->n_flag |= NACC; 3404 getnanotime(&np->n_atim); 3405 mtx_unlock(&np->n_mtx); 3406 error = fifo_specops.vop_read(ap); 3407 return error; 3408} 3409 3410/* 3411 * Write wrapper for fifos. 3412 */ 3413static int 3414nfsfifo_write(struct vop_write_args *ap) 3415{ 3416 struct nfsnode *np = VTONFS(ap->a_vp); 3417 3418 /* 3419 * Set update flag. 3420 */ 3421 mtx_lock(&np->n_mtx); 3422 np->n_flag |= NUPD; 3423 getnanotime(&np->n_mtim); 3424 mtx_unlock(&np->n_mtx); 3425 return(fifo_specops.vop_write(ap)); 3426} 3427 3428/* 3429 * Close wrapper for fifos. 3430 * 3431 * Update the times on the nfsnode then do fifo close. 3432 */ 3433static int 3434nfsfifo_close(struct vop_close_args *ap) 3435{ 3436 struct vnode *vp = ap->a_vp; 3437 struct nfsnode *np = VTONFS(vp); 3438 struct vattr vattr; 3439 struct timespec ts; 3440 3441 mtx_lock(&np->n_mtx); 3442 if (np->n_flag & (NACC | NUPD)) { 3443 getnanotime(&ts); 3444 if (np->n_flag & NACC) 3445 np->n_atim = ts; 3446 if (np->n_flag & NUPD) 3447 np->n_mtim = ts; 3448 np->n_flag |= NCHG; 3449 if (vrefcnt(vp) == 1 && 3450 (vp->v_mount->mnt_flag & MNT_RDONLY) == 0) { 3451 VATTR_NULL(&vattr); 3452 if (np->n_flag & NACC) 3453 vattr.va_atime = np->n_atim; 3454 if (np->n_flag & NUPD) 3455 vattr.va_mtime = np->n_mtim; 3456 mtx_unlock(&np->n_mtx); 3457 (void)VOP_SETATTR(vp, &vattr, ap->a_cred); 3458 goto out; 3459 } 3460 } 3461 mtx_unlock(&np->n_mtx); 3462out: 3463 return (fifo_specops.vop_close(ap)); 3464} 3465 3466/* 3467 * Just call nfs_writebp() with the force argument set to 1. 3468 * 3469 * NOTE: B_DONE may or may not be set in a_bp on call. 3470 */ 3471static int 3472nfs_bwrite(struct buf *bp) 3473{ 3474 3475 return (nfs_writebp(bp, 1, curthread)); 3476} 3477 3478struct buf_ops buf_ops_nfs = { 3479 .bop_name = "buf_ops_nfs", 3480 .bop_write = nfs_bwrite, 3481 .bop_strategy = bufstrategy, 3482 .bop_sync = bufsync, 3483 .bop_bdflush = bufbdflush, 3484}; 3485