nfs_vnops.c revision 200471
1/*- 2 * Copyright (c) 1989, 1993 3 * The Regents of the University of California. All rights reserved. 4 * 5 * This code is derived from software contributed to Berkeley by 6 * Rick Macklem at The University of Guelph. 7 * 8 * Redistribution and use in source and binary forms, with or without 9 * modification, are permitted provided that the following conditions 10 * are met: 11 * 1. Redistributions of source code must retain the above copyright 12 * notice, this list of conditions and the following disclaimer. 13 * 2. Redistributions in binary form must reproduce the above copyright 14 * notice, this list of conditions and the following disclaimer in the 15 * documentation and/or other materials provided with the distribution. 16 * 4. Neither the name of the University nor the names of its contributors 17 * may be used to endorse or promote products derived from this software 18 * without specific prior written permission. 19 * 20 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 21 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 22 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 23 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 24 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 25 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 26 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 27 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 28 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 29 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 30 * SUCH DAMAGE. 31 * 32 * @(#)nfs_vnops.c 8.16 (Berkeley) 5/27/95 33 */ 34 35#include <sys/cdefs.h> 36__FBSDID("$FreeBSD: head/sys/nfsclient/nfs_vnops.c 200471 2009-12-13 11:06:39Z bz $"); 37 38/* 39 * vnode op calls for Sun NFS version 2 and 3 40 */ 41 42#include "opt_inet.h" 43#include "opt_kdtrace.h" 44 45#include <sys/param.h> 46#include <sys/kernel.h> 47#include <sys/systm.h> 48#include <sys/resourcevar.h> 49#include <sys/proc.h> 50#include <sys/mount.h> 51#include <sys/bio.h> 52#include <sys/buf.h> 53#include <sys/jail.h> 54#include <sys/malloc.h> 55#include <sys/mbuf.h> 56#include <sys/namei.h> 57#include <sys/socket.h> 58#include <sys/vnode.h> 59#include <sys/dirent.h> 60#include <sys/fcntl.h> 61#include <sys/lockf.h> 62#include <sys/stat.h> 63#include <sys/sysctl.h> 64#include <sys/signalvar.h> 65 66#include <vm/vm.h> 67#include <vm/vm_object.h> 68#include <vm/vm_extern.h> 69#include <vm/vm_object.h> 70 71#include <fs/fifofs/fifo.h> 72 73#include <nfs/nfsproto.h> 74#include <nfsclient/nfs.h> 75#include <nfsclient/nfsnode.h> 76#include <nfsclient/nfsmount.h> 77#include <nfsclient/nfs_kdtrace.h> 78#include <nfsclient/nfs_lock.h> 79#include <nfs/xdr_subs.h> 80#include <nfsclient/nfsm_subs.h> 81 82#include <net/if.h> 83#include <netinet/in.h> 84#include <netinet/in_var.h> 85 86#include <machine/stdarg.h> 87 88#ifdef KDTRACE_HOOKS 89#include <sys/dtrace_bsd.h> 90 91dtrace_nfsclient_accesscache_flush_probe_func_t 92 dtrace_nfsclient_accesscache_flush_done_probe; 93uint32_t nfsclient_accesscache_flush_done_id; 94 95dtrace_nfsclient_accesscache_get_probe_func_t 96 dtrace_nfsclient_accesscache_get_hit_probe, 97 dtrace_nfsclient_accesscache_get_miss_probe; 98uint32_t nfsclient_accesscache_get_hit_id; 99uint32_t nfsclient_accesscache_get_miss_id; 100 101dtrace_nfsclient_accesscache_load_probe_func_t 102 dtrace_nfsclient_accesscache_load_done_probe; 103uint32_t nfsclient_accesscache_load_done_id; 104#endif /* !KDTRACE_HOOKS */ 105 106/* Defs */ 107#define TRUE 1 108#define FALSE 0 109 110/* 111 * Ifdef for FreeBSD-current merged buffer cache. It is unfortunate that these 112 * calls are not in getblk() and brelse() so that they would not be necessary 113 * here. 114 */ 115#ifndef B_VMIO 116#define vfs_busy_pages(bp, f) 117#endif 118 119static vop_read_t nfsfifo_read; 120static vop_write_t nfsfifo_write; 121static vop_close_t nfsfifo_close; 122static int nfs_flush(struct vnode *, int, int); 123static int nfs_setattrrpc(struct vnode *, struct vattr *, struct ucred *); 124static vop_lookup_t nfs_lookup; 125static vop_create_t nfs_create; 126static vop_mknod_t nfs_mknod; 127static vop_open_t nfs_open; 128static vop_close_t nfs_close; 129static vop_access_t nfs_access; 130static vop_getattr_t nfs_getattr; 131static vop_setattr_t nfs_setattr; 132static vop_read_t nfs_read; 133static vop_fsync_t nfs_fsync; 134static vop_remove_t nfs_remove; 135static vop_link_t nfs_link; 136static vop_rename_t nfs_rename; 137static vop_mkdir_t nfs_mkdir; 138static vop_rmdir_t nfs_rmdir; 139static vop_symlink_t nfs_symlink; 140static vop_readdir_t nfs_readdir; 141static vop_strategy_t nfs_strategy; 142static int nfs_lookitup(struct vnode *, const char *, int, 143 struct ucred *, struct thread *, struct nfsnode **); 144static int nfs_sillyrename(struct vnode *, struct vnode *, 145 struct componentname *); 146static vop_access_t nfsspec_access; 147static vop_readlink_t nfs_readlink; 148static vop_print_t nfs_print; 149static vop_advlock_t nfs_advlock; 150static vop_advlockasync_t nfs_advlockasync; 151 152/* 153 * Global vfs data structures for nfs 154 */ 155struct vop_vector nfs_vnodeops = { 156 .vop_default = &default_vnodeops, 157 .vop_access = nfs_access, 158 .vop_advlock = nfs_advlock, 159 .vop_advlockasync = nfs_advlockasync, 160 .vop_close = nfs_close, 161 .vop_create = nfs_create, 162 .vop_fsync = nfs_fsync, 163 .vop_getattr = nfs_getattr, 164 .vop_getpages = nfs_getpages, 165 .vop_putpages = nfs_putpages, 166 .vop_inactive = nfs_inactive, 167 .vop_link = nfs_link, 168 .vop_lookup = nfs_lookup, 169 .vop_mkdir = nfs_mkdir, 170 .vop_mknod = nfs_mknod, 171 .vop_open = nfs_open, 172 .vop_print = nfs_print, 173 .vop_read = nfs_read, 174 .vop_readdir = nfs_readdir, 175 .vop_readlink = nfs_readlink, 176 .vop_reclaim = nfs_reclaim, 177 .vop_remove = nfs_remove, 178 .vop_rename = nfs_rename, 179 .vop_rmdir = nfs_rmdir, 180 .vop_setattr = nfs_setattr, 181 .vop_strategy = nfs_strategy, 182 .vop_symlink = nfs_symlink, 183 .vop_write = nfs_write, 184}; 185 186struct vop_vector nfs_fifoops = { 187 .vop_default = &fifo_specops, 188 .vop_access = nfsspec_access, 189 .vop_close = nfsfifo_close, 190 .vop_fsync = nfs_fsync, 191 .vop_getattr = nfs_getattr, 192 .vop_inactive = nfs_inactive, 193 .vop_print = nfs_print, 194 .vop_read = nfsfifo_read, 195 .vop_reclaim = nfs_reclaim, 196 .vop_setattr = nfs_setattr, 197 .vop_write = nfsfifo_write, 198}; 199 200static int nfs_mknodrpc(struct vnode *dvp, struct vnode **vpp, 201 struct componentname *cnp, struct vattr *vap); 202static int nfs_removerpc(struct vnode *dvp, const char *name, int namelen, 203 struct ucred *cred, struct thread *td); 204static int nfs_renamerpc(struct vnode *fdvp, const char *fnameptr, 205 int fnamelen, struct vnode *tdvp, 206 const char *tnameptr, int tnamelen, 207 struct ucred *cred, struct thread *td); 208static int nfs_renameit(struct vnode *sdvp, struct componentname *scnp, 209 struct sillyrename *sp); 210 211/* 212 * Global variables 213 */ 214struct mtx nfs_iod_mtx; 215struct proc *nfs_iodwant[NFS_MAXASYNCDAEMON]; 216struct nfsmount *nfs_iodmount[NFS_MAXASYNCDAEMON]; 217int nfs_numasync = 0; 218vop_advlock_t *nfs_advlock_p = nfs_dolock; 219vop_reclaim_t *nfs_reclaim_p = NULL; 220#define DIRHDSIZ (sizeof (struct dirent) - (MAXNAMLEN + 1)) 221 222SYSCTL_DECL(_vfs_nfs); 223 224static int nfsaccess_cache_timeout = NFS_MAXATTRTIMO; 225SYSCTL_INT(_vfs_nfs, OID_AUTO, access_cache_timeout, CTLFLAG_RW, 226 &nfsaccess_cache_timeout, 0, "NFS ACCESS cache timeout"); 227 228static int nfs_prime_access_cache = 0; 229SYSCTL_INT(_vfs_nfs, OID_AUTO, prime_access_cache, CTLFLAG_RW, 230 &nfs_prime_access_cache, 0, 231 "Prime NFS ACCESS cache when fetching attributes"); 232 233static int nfsv3_commit_on_close = 0; 234SYSCTL_INT(_vfs_nfs, OID_AUTO, nfsv3_commit_on_close, CTLFLAG_RW, 235 &nfsv3_commit_on_close, 0, "write+commit on close, else only write"); 236 237static int nfs_clean_pages_on_close = 1; 238SYSCTL_INT(_vfs_nfs, OID_AUTO, clean_pages_on_close, CTLFLAG_RW, 239 &nfs_clean_pages_on_close, 0, "NFS clean dirty pages on close"); 240 241int nfs_directio_enable = 0; 242SYSCTL_INT(_vfs_nfs, OID_AUTO, nfs_directio_enable, CTLFLAG_RW, 243 &nfs_directio_enable, 0, "Enable NFS directio"); 244 245/* 246 * This sysctl allows other processes to mmap a file that has been opened 247 * O_DIRECT by a process. In general, having processes mmap the file while 248 * Direct IO is in progress can lead to Data Inconsistencies. But, we allow 249 * this by default to prevent DoS attacks - to prevent a malicious user from 250 * opening up files O_DIRECT preventing other users from mmap'ing these 251 * files. "Protected" environments where stricter consistency guarantees are 252 * required can disable this knob. The process that opened the file O_DIRECT 253 * cannot mmap() the file, because mmap'ed IO on an O_DIRECT open() is not 254 * meaningful. 255 */ 256int nfs_directio_allow_mmap = 1; 257SYSCTL_INT(_vfs_nfs, OID_AUTO, nfs_directio_allow_mmap, CTLFLAG_RW, 258 &nfs_directio_allow_mmap, 0, "Enable mmaped IO on file with O_DIRECT opens"); 259 260#if 0 261SYSCTL_INT(_vfs_nfs, OID_AUTO, access_cache_hits, CTLFLAG_RD, 262 &nfsstats.accesscache_hits, 0, "NFS ACCESS cache hit count"); 263 264SYSCTL_INT(_vfs_nfs, OID_AUTO, access_cache_misses, CTLFLAG_RD, 265 &nfsstats.accesscache_misses, 0, "NFS ACCESS cache miss count"); 266#endif 267 268#define NFSV3ACCESS_ALL (NFSV3ACCESS_READ | NFSV3ACCESS_MODIFY \ 269 | NFSV3ACCESS_EXTEND | NFSV3ACCESS_EXECUTE \ 270 | NFSV3ACCESS_DELETE | NFSV3ACCESS_LOOKUP) 271 272/* 273 * SMP Locking Note : 274 * The list of locks after the description of the lock is the ordering 275 * of other locks acquired with the lock held. 276 * np->n_mtx : Protects the fields in the nfsnode. 277 VM Object Lock 278 VI_MTX (acquired indirectly) 279 * nmp->nm_mtx : Protects the fields in the nfsmount. 280 rep->r_mtx 281 * nfs_iod_mtx : Global lock, protects shared nfsiod state. 282 * nfs_reqq_mtx : Global lock, protects the nfs_reqq list. 283 nmp->nm_mtx 284 rep->r_mtx 285 * rep->r_mtx : Protects the fields in an nfsreq. 286 */ 287 288static int 289nfs3_access_otw(struct vnode *vp, int wmode, struct thread *td, 290 struct ucred *cred, uint32_t *retmode) 291{ 292 const int v3 = 1; 293 u_int32_t *tl; 294 int error = 0, attrflag, i, lrupos; 295 296 struct mbuf *mreq, *mrep, *md, *mb; 297 caddr_t bpos, dpos; 298 u_int32_t rmode; 299 struct nfsnode *np = VTONFS(vp); 300 301 nfsstats.rpccnt[NFSPROC_ACCESS]++; 302 mreq = nfsm_reqhead(vp, NFSPROC_ACCESS, NFSX_FH(v3) + NFSX_UNSIGNED); 303 mb = mreq; 304 bpos = mtod(mb, caddr_t); 305 nfsm_fhtom(vp, v3); 306 tl = nfsm_build(u_int32_t *, NFSX_UNSIGNED); 307 *tl = txdr_unsigned(wmode); 308 nfsm_request(vp, NFSPROC_ACCESS, td, cred); 309 nfsm_postop_attr(vp, attrflag); 310 if (!error) { 311 lrupos = 0; 312 tl = nfsm_dissect(u_int32_t *, NFSX_UNSIGNED); 313 rmode = fxdr_unsigned(u_int32_t, *tl); 314 mtx_lock(&np->n_mtx); 315 for (i = 0; i < NFS_ACCESSCACHESIZE; i++) { 316 if (np->n_accesscache[i].uid == cred->cr_uid) { 317 np->n_accesscache[i].mode = rmode; 318 np->n_accesscache[i].stamp = time_second; 319 break; 320 } 321 if (i > 0 && np->n_accesscache[i].stamp < 322 np->n_accesscache[lrupos].stamp) 323 lrupos = i; 324 } 325 if (i == NFS_ACCESSCACHESIZE) { 326 np->n_accesscache[lrupos].uid = cred->cr_uid; 327 np->n_accesscache[lrupos].mode = rmode; 328 np->n_accesscache[lrupos].stamp = time_second; 329 } 330 mtx_unlock(&np->n_mtx); 331 if (retmode != NULL) 332 *retmode = rmode; 333 KDTRACE_NFS_ACCESSCACHE_LOAD_DONE(vp, cred->cr_uid, rmode, 0); 334 } 335 m_freem(mrep); 336nfsmout: 337#ifdef KDTRACE_HOOKS 338 if (error) { 339 KDTRACE_NFS_ACCESSCACHE_LOAD_DONE(vp, cred->cr_uid, 0, 340 error); 341 } 342#endif 343 return (error); 344} 345 346/* 347 * nfs access vnode op. 348 * For nfs version 2, just return ok. File accesses may fail later. 349 * For nfs version 3, use the access rpc to check accessibility. If file modes 350 * are changed on the server, accesses might still fail later. 351 */ 352static int 353nfs_access(struct vop_access_args *ap) 354{ 355 struct vnode *vp = ap->a_vp; 356 int error = 0, i, gotahit; 357 u_int32_t mode, rmode, wmode; 358 int v3 = NFS_ISV3(vp); 359 struct nfsnode *np = VTONFS(vp); 360 361 /* 362 * Disallow write attempts on filesystems mounted read-only; 363 * unless the file is a socket, fifo, or a block or character 364 * device resident on the filesystem. 365 */ 366 if ((ap->a_accmode & VWRITE) && (vp->v_mount->mnt_flag & MNT_RDONLY)) { 367 switch (vp->v_type) { 368 case VREG: 369 case VDIR: 370 case VLNK: 371 return (EROFS); 372 default: 373 break; 374 } 375 } 376 /* 377 * For nfs v3, check to see if we have done this recently, and if 378 * so return our cached result instead of making an ACCESS call. 379 * If not, do an access rpc, otherwise you are stuck emulating 380 * ufs_access() locally using the vattr. This may not be correct, 381 * since the server may apply other access criteria such as 382 * client uid-->server uid mapping that we do not know about. 383 */ 384 if (v3) { 385 if (ap->a_accmode & VREAD) 386 mode = NFSV3ACCESS_READ; 387 else 388 mode = 0; 389 if (vp->v_type != VDIR) { 390 if (ap->a_accmode & VWRITE) 391 mode |= (NFSV3ACCESS_MODIFY | NFSV3ACCESS_EXTEND); 392 if (ap->a_accmode & VEXEC) 393 mode |= NFSV3ACCESS_EXECUTE; 394 } else { 395 if (ap->a_accmode & VWRITE) 396 mode |= (NFSV3ACCESS_MODIFY | NFSV3ACCESS_EXTEND | 397 NFSV3ACCESS_DELETE); 398 if (ap->a_accmode & VEXEC) 399 mode |= NFSV3ACCESS_LOOKUP; 400 } 401 /* XXX safety belt, only make blanket request if caching */ 402 if (nfsaccess_cache_timeout > 0) { 403 wmode = NFSV3ACCESS_READ | NFSV3ACCESS_MODIFY | 404 NFSV3ACCESS_EXTEND | NFSV3ACCESS_EXECUTE | 405 NFSV3ACCESS_DELETE | NFSV3ACCESS_LOOKUP; 406 } else { 407 wmode = mode; 408 } 409 410 /* 411 * Does our cached result allow us to give a definite yes to 412 * this request? 413 */ 414 gotahit = 0; 415 mtx_lock(&np->n_mtx); 416 for (i = 0; i < NFS_ACCESSCACHESIZE; i++) { 417 if (ap->a_cred->cr_uid == np->n_accesscache[i].uid) { 418 if (time_second < (np->n_accesscache[i].stamp + 419 nfsaccess_cache_timeout) && 420 (np->n_accesscache[i].mode & mode) == mode) { 421 nfsstats.accesscache_hits++; 422 gotahit = 1; 423 } 424 break; 425 } 426 } 427 mtx_unlock(&np->n_mtx); 428#ifdef KDTRACE_HOOKS 429 if (gotahit) 430 KDTRACE_NFS_ACCESSCACHE_GET_HIT(vp, 431 ap->a_cred->cr_uid, mode); 432 else 433 KDTRACE_NFS_ACCESSCACHE_GET_MISS(vp, 434 ap->a_cred->cr_uid, mode); 435#endif 436 if (gotahit == 0) { 437 /* 438 * Either a no, or a don't know. Go to the wire. 439 */ 440 nfsstats.accesscache_misses++; 441 error = nfs3_access_otw(vp, wmode, ap->a_td, ap->a_cred, 442 &rmode); 443 if (!error) { 444 if ((rmode & mode) != mode) 445 error = EACCES; 446 } 447 } 448 return (error); 449 } else { 450 if ((error = nfsspec_access(ap)) != 0) { 451 return (error); 452 } 453 /* 454 * Attempt to prevent a mapped root from accessing a file 455 * which it shouldn't. We try to read a byte from the file 456 * if the user is root and the file is not zero length. 457 * After calling nfsspec_access, we should have the correct 458 * file size cached. 459 */ 460 mtx_lock(&np->n_mtx); 461 if (ap->a_cred->cr_uid == 0 && (ap->a_accmode & VREAD) 462 && VTONFS(vp)->n_size > 0) { 463 struct iovec aiov; 464 struct uio auio; 465 char buf[1]; 466 467 mtx_unlock(&np->n_mtx); 468 aiov.iov_base = buf; 469 aiov.iov_len = 1; 470 auio.uio_iov = &aiov; 471 auio.uio_iovcnt = 1; 472 auio.uio_offset = 0; 473 auio.uio_resid = 1; 474 auio.uio_segflg = UIO_SYSSPACE; 475 auio.uio_rw = UIO_READ; 476 auio.uio_td = ap->a_td; 477 478 if (vp->v_type == VREG) 479 error = nfs_readrpc(vp, &auio, ap->a_cred); 480 else if (vp->v_type == VDIR) { 481 char* bp; 482 bp = malloc(NFS_DIRBLKSIZ, M_TEMP, M_WAITOK); 483 aiov.iov_base = bp; 484 aiov.iov_len = auio.uio_resid = NFS_DIRBLKSIZ; 485 error = nfs_readdirrpc(vp, &auio, ap->a_cred); 486 free(bp, M_TEMP); 487 } else if (vp->v_type == VLNK) 488 error = nfs_readlinkrpc(vp, &auio, ap->a_cred); 489 else 490 error = EACCES; 491 } else 492 mtx_unlock(&np->n_mtx); 493 return (error); 494 } 495} 496 497int nfs_otw_getattr_avoid = 0; 498 499/* 500 * nfs open vnode op 501 * Check to see if the type is ok 502 * and that deletion is not in progress. 503 * For paged in text files, you will need to flush the page cache 504 * if consistency is lost. 505 */ 506/* ARGSUSED */ 507static int 508nfs_open(struct vop_open_args *ap) 509{ 510 struct vnode *vp = ap->a_vp; 511 struct nfsnode *np = VTONFS(vp); 512 struct vattr vattr; 513 int error; 514 int fmode = ap->a_mode; 515 516 if (vp->v_type != VREG && vp->v_type != VDIR && vp->v_type != VLNK) 517 return (EOPNOTSUPP); 518 519 /* 520 * Get a valid lease. If cached data is stale, flush it. 521 */ 522 mtx_lock(&np->n_mtx); 523 if (np->n_flag & NMODIFIED) { 524 mtx_unlock(&np->n_mtx); 525 error = nfs_vinvalbuf(vp, V_SAVE, ap->a_td, 1); 526 if (error == EINTR || error == EIO) 527 return (error); 528 np->n_attrstamp = 0; 529 KDTRACE_NFS_ATTRCACHE_FLUSH_DONE(vp); 530 if (vp->v_type == VDIR) 531 np->n_direofoffset = 0; 532 error = VOP_GETATTR(vp, &vattr, ap->a_cred); 533 if (error) 534 return (error); 535 mtx_lock(&np->n_mtx); 536 np->n_mtime = vattr.va_mtime; 537 mtx_unlock(&np->n_mtx); 538 } else { 539 struct thread *td = curthread; 540 541 if (np->n_ac_ts_syscalls != td->td_syscalls || 542 np->n_ac_ts_tid != td->td_tid || 543 td->td_proc == NULL || 544 np->n_ac_ts_pid != td->td_proc->p_pid) { 545 np->n_attrstamp = 0; 546 KDTRACE_NFS_ATTRCACHE_FLUSH_DONE(vp); 547 } 548 mtx_unlock(&np->n_mtx); 549 error = VOP_GETATTR(vp, &vattr, ap->a_cred); 550 if (error) 551 return (error); 552 mtx_lock(&np->n_mtx); 553 if (NFS_TIMESPEC_COMPARE(&np->n_mtime, &vattr.va_mtime)) { 554 if (vp->v_type == VDIR) 555 np->n_direofoffset = 0; 556 mtx_unlock(&np->n_mtx); 557 error = nfs_vinvalbuf(vp, V_SAVE, ap->a_td, 1); 558 if (error == EINTR || error == EIO) { 559 return (error); 560 } 561 mtx_lock(&np->n_mtx); 562 np->n_mtime = vattr.va_mtime; 563 } 564 mtx_unlock(&np->n_mtx); 565 } 566 /* 567 * If the object has >= 1 O_DIRECT active opens, we disable caching. 568 */ 569 if (nfs_directio_enable && (fmode & O_DIRECT) && (vp->v_type == VREG)) { 570 if (np->n_directio_opens == 0) { 571 error = nfs_vinvalbuf(vp, V_SAVE, ap->a_td, 1); 572 if (error) 573 return (error); 574 mtx_lock(&np->n_mtx); 575 np->n_flag |= NNONCACHE; 576 mtx_unlock(&np->n_mtx); 577 } 578 np->n_directio_opens++; 579 } 580 vnode_create_vobject(vp, vattr.va_size, ap->a_td); 581 return (0); 582} 583 584/* 585 * nfs close vnode op 586 * What an NFS client should do upon close after writing is a debatable issue. 587 * Most NFS clients push delayed writes to the server upon close, basically for 588 * two reasons: 589 * 1 - So that any write errors may be reported back to the client process 590 * doing the close system call. By far the two most likely errors are 591 * NFSERR_NOSPC and NFSERR_DQUOT to indicate space allocation failure. 592 * 2 - To put a worst case upper bound on cache inconsistency between 593 * multiple clients for the file. 594 * There is also a consistency problem for Version 2 of the protocol w.r.t. 595 * not being able to tell if other clients are writing a file concurrently, 596 * since there is no way of knowing if the changed modify time in the reply 597 * is only due to the write for this client. 598 * (NFS Version 3 provides weak cache consistency data in the reply that 599 * should be sufficient to detect and handle this case.) 600 * 601 * The current code does the following: 602 * for NFS Version 2 - play it safe and flush/invalidate all dirty buffers 603 * for NFS Version 3 - flush dirty buffers to the server but don't invalidate 604 * or commit them (this satisfies 1 and 2 except for the 605 * case where the server crashes after this close but 606 * before the commit RPC, which is felt to be "good 607 * enough". Changing the last argument to nfs_flush() to 608 * a 1 would force a commit operation, if it is felt a 609 * commit is necessary now. 610 */ 611/* ARGSUSED */ 612static int 613nfs_close(struct vop_close_args *ap) 614{ 615 struct vnode *vp = ap->a_vp; 616 struct nfsnode *np = VTONFS(vp); 617 int error = 0; 618 int fmode = ap->a_fflag; 619 620 if (vp->v_type == VREG) { 621 /* 622 * Examine and clean dirty pages, regardless of NMODIFIED. 623 * This closes a major hole in close-to-open consistency. 624 * We want to push out all dirty pages (and buffers) on 625 * close, regardless of whether they were dirtied by 626 * mmap'ed writes or via write(). 627 */ 628 if (nfs_clean_pages_on_close && vp->v_object) { 629 VM_OBJECT_LOCK(vp->v_object); 630 vm_object_page_clean(vp->v_object, 0, 0, 0); 631 VM_OBJECT_UNLOCK(vp->v_object); 632 } 633 mtx_lock(&np->n_mtx); 634 if (np->n_flag & NMODIFIED) { 635 mtx_unlock(&np->n_mtx); 636 if (NFS_ISV3(vp)) { 637 /* 638 * Under NFSv3 we have dirty buffers to dispose of. We 639 * must flush them to the NFS server. We have the option 640 * of waiting all the way through the commit rpc or just 641 * waiting for the initial write. The default is to only 642 * wait through the initial write so the data is in the 643 * server's cache, which is roughly similar to the state 644 * a standard disk subsystem leaves the file in on close(). 645 * 646 * We cannot clear the NMODIFIED bit in np->n_flag due to 647 * potential races with other processes, and certainly 648 * cannot clear it if we don't commit. 649 */ 650 int cm = nfsv3_commit_on_close ? 1 : 0; 651 error = nfs_flush(vp, MNT_WAIT, cm); 652 /* np->n_flag &= ~NMODIFIED; */ 653 } else 654 error = nfs_vinvalbuf(vp, V_SAVE, ap->a_td, 1); 655 mtx_lock(&np->n_mtx); 656 } 657 if (np->n_flag & NWRITEERR) { 658 np->n_flag &= ~NWRITEERR; 659 error = np->n_error; 660 } 661 mtx_unlock(&np->n_mtx); 662 } 663 if (nfs_directio_enable) 664 KASSERT((np->n_directio_asyncwr == 0), 665 ("nfs_close: dirty unflushed (%d) directio buffers\n", 666 np->n_directio_asyncwr)); 667 if (nfs_directio_enable && (fmode & O_DIRECT) && (vp->v_type == VREG)) { 668 mtx_lock(&np->n_mtx); 669 KASSERT((np->n_directio_opens > 0), 670 ("nfs_close: unexpectedly value (0) of n_directio_opens\n")); 671 np->n_directio_opens--; 672 if (np->n_directio_opens == 0) 673 np->n_flag &= ~NNONCACHE; 674 mtx_unlock(&np->n_mtx); 675 } 676 return (error); 677} 678 679/* 680 * nfs getattr call from vfs. 681 */ 682static int 683nfs_getattr(struct vop_getattr_args *ap) 684{ 685 struct vnode *vp = ap->a_vp; 686 struct nfsnode *np = VTONFS(vp); 687 struct thread *td = curthread; 688 struct vattr *vap = ap->a_vap; 689 struct vattr vattr; 690 caddr_t bpos, dpos; 691 int error = 0; 692 struct mbuf *mreq, *mrep, *md, *mb; 693 int v3 = NFS_ISV3(vp); 694 695 /* 696 * Update local times for special files. 697 */ 698 mtx_lock(&np->n_mtx); 699 if (np->n_flag & (NACC | NUPD)) 700 np->n_flag |= NCHG; 701 mtx_unlock(&np->n_mtx); 702 /* 703 * First look in the cache. 704 */ 705 if (nfs_getattrcache(vp, &vattr) == 0) 706 goto nfsmout; 707 if (v3 && nfs_prime_access_cache && nfsaccess_cache_timeout > 0) { 708 nfsstats.accesscache_misses++; 709 nfs3_access_otw(vp, NFSV3ACCESS_ALL, td, ap->a_cred, NULL); 710 if (nfs_getattrcache(vp, &vattr) == 0) 711 goto nfsmout; 712 } 713 nfsstats.rpccnt[NFSPROC_GETATTR]++; 714 mreq = nfsm_reqhead(vp, NFSPROC_GETATTR, NFSX_FH(v3)); 715 mb = mreq; 716 bpos = mtod(mb, caddr_t); 717 nfsm_fhtom(vp, v3); 718 nfsm_request(vp, NFSPROC_GETATTR, td, ap->a_cred); 719 if (!error) { 720 nfsm_loadattr(vp, &vattr); 721 } 722 m_freem(mrep); 723nfsmout: 724 vap->va_type = vattr.va_type; 725 vap->va_mode = vattr.va_mode; 726 vap->va_nlink = vattr.va_nlink; 727 vap->va_uid = vattr.va_uid; 728 vap->va_gid = vattr.va_gid; 729 vap->va_fsid = vattr.va_fsid; 730 vap->va_fileid = vattr.va_fileid; 731 vap->va_size = vattr.va_size; 732 vap->va_blocksize = vattr.va_blocksize; 733 vap->va_atime = vattr.va_atime; 734 vap->va_mtime = vattr.va_mtime; 735 vap->va_ctime = vattr.va_ctime; 736 vap->va_gen = vattr.va_gen; 737 vap->va_flags = vattr.va_flags; 738 vap->va_rdev = vattr.va_rdev; 739 vap->va_bytes = vattr.va_bytes; 740 vap->va_filerev = vattr.va_filerev; 741 742 return (error); 743} 744 745/* 746 * nfs setattr call. 747 */ 748static int 749nfs_setattr(struct vop_setattr_args *ap) 750{ 751 struct vnode *vp = ap->a_vp; 752 struct nfsnode *np = VTONFS(vp); 753 struct vattr *vap = ap->a_vap; 754 struct thread *td = curthread; 755 int error = 0; 756 u_quad_t tsize; 757 758#ifndef nolint 759 tsize = (u_quad_t)0; 760#endif 761 762 /* 763 * Setting of flags is not supported. 764 */ 765 if (vap->va_flags != VNOVAL) 766 return (EOPNOTSUPP); 767 768 /* 769 * Disallow write attempts if the filesystem is mounted read-only. 770 */ 771 if ((vap->va_flags != VNOVAL || vap->va_uid != (uid_t)VNOVAL || 772 vap->va_gid != (gid_t)VNOVAL || vap->va_atime.tv_sec != VNOVAL || 773 vap->va_mtime.tv_sec != VNOVAL || vap->va_mode != (mode_t)VNOVAL) && 774 (vp->v_mount->mnt_flag & MNT_RDONLY)) { 775 error = EROFS; 776 goto out; 777 } 778 if (vap->va_size != VNOVAL) { 779 switch (vp->v_type) { 780 case VDIR: 781 return (EISDIR); 782 case VCHR: 783 case VBLK: 784 case VSOCK: 785 case VFIFO: 786 if (vap->va_mtime.tv_sec == VNOVAL && 787 vap->va_atime.tv_sec == VNOVAL && 788 vap->va_mode == (mode_t)VNOVAL && 789 vap->va_uid == (uid_t)VNOVAL && 790 vap->va_gid == (gid_t)VNOVAL) 791 return (0); 792 vap->va_size = VNOVAL; 793 break; 794 default: 795 /* 796 * Disallow write attempts if the filesystem is 797 * mounted read-only. 798 */ 799 if (vp->v_mount->mnt_flag & MNT_RDONLY) 800 return (EROFS); 801 /* 802 * We run vnode_pager_setsize() early (why?), 803 * we must set np->n_size now to avoid vinvalbuf 804 * V_SAVE races that might setsize a lower 805 * value. 806 */ 807 mtx_lock(&np->n_mtx); 808 tsize = np->n_size; 809 mtx_unlock(&np->n_mtx); 810 error = nfs_meta_setsize(vp, ap->a_cred, td, 811 vap->va_size); 812 mtx_lock(&np->n_mtx); 813 if (np->n_flag & NMODIFIED) { 814 tsize = np->n_size; 815 mtx_unlock(&np->n_mtx); 816 if (vap->va_size == 0) 817 error = nfs_vinvalbuf(vp, 0, td, 1); 818 else 819 error = nfs_vinvalbuf(vp, V_SAVE, td, 1); 820 if (error) { 821 vnode_pager_setsize(vp, tsize); 822 goto out; 823 } 824 } else 825 mtx_unlock(&np->n_mtx); 826 /* 827 * np->n_size has already been set to vap->va_size 828 * in nfs_meta_setsize(). We must set it again since 829 * nfs_loadattrcache() could be called through 830 * nfs_meta_setsize() and could modify np->n_size. 831 */ 832 mtx_lock(&np->n_mtx); 833 np->n_vattr.va_size = np->n_size = vap->va_size; 834 mtx_unlock(&np->n_mtx); 835 }; 836 } else { 837 mtx_lock(&np->n_mtx); 838 if ((vap->va_mtime.tv_sec != VNOVAL || vap->va_atime.tv_sec != VNOVAL) && 839 (np->n_flag & NMODIFIED) && vp->v_type == VREG) { 840 mtx_unlock(&np->n_mtx); 841 if ((error = nfs_vinvalbuf(vp, V_SAVE, td, 1)) != 0 && 842 (error == EINTR || error == EIO)) 843 return error; 844 } else 845 mtx_unlock(&np->n_mtx); 846 } 847 error = nfs_setattrrpc(vp, vap, ap->a_cred); 848 if (error && vap->va_size != VNOVAL) { 849 mtx_lock(&np->n_mtx); 850 np->n_size = np->n_vattr.va_size = tsize; 851 vnode_pager_setsize(vp, tsize); 852 mtx_unlock(&np->n_mtx); 853 } 854out: 855 return (error); 856} 857 858/* 859 * Do an nfs setattr rpc. 860 */ 861static int 862nfs_setattrrpc(struct vnode *vp, struct vattr *vap, struct ucred *cred) 863{ 864 struct nfsv2_sattr *sp; 865 struct nfsnode *np = VTONFS(vp); 866 caddr_t bpos, dpos; 867 u_int32_t *tl; 868 int error = 0, i, wccflag = NFSV3_WCCRATTR; 869 struct mbuf *mreq, *mrep, *md, *mb; 870 int v3 = NFS_ISV3(vp); 871 872 nfsstats.rpccnt[NFSPROC_SETATTR]++; 873 mreq = nfsm_reqhead(vp, NFSPROC_SETATTR, NFSX_FH(v3) + NFSX_SATTR(v3)); 874 mb = mreq; 875 bpos = mtod(mb, caddr_t); 876 nfsm_fhtom(vp, v3); 877 if (v3) { 878 nfsm_v3attrbuild(vap, TRUE); 879 tl = nfsm_build(u_int32_t *, NFSX_UNSIGNED); 880 *tl = nfs_false; 881 } else { 882 sp = nfsm_build(struct nfsv2_sattr *, NFSX_V2SATTR); 883 if (vap->va_mode == (mode_t)VNOVAL) 884 sp->sa_mode = nfs_xdrneg1; 885 else 886 sp->sa_mode = vtonfsv2_mode(vp->v_type, vap->va_mode); 887 if (vap->va_uid == (uid_t)VNOVAL) 888 sp->sa_uid = nfs_xdrneg1; 889 else 890 sp->sa_uid = txdr_unsigned(vap->va_uid); 891 if (vap->va_gid == (gid_t)VNOVAL) 892 sp->sa_gid = nfs_xdrneg1; 893 else 894 sp->sa_gid = txdr_unsigned(vap->va_gid); 895 sp->sa_size = txdr_unsigned(vap->va_size); 896 txdr_nfsv2time(&vap->va_atime, &sp->sa_atime); 897 txdr_nfsv2time(&vap->va_mtime, &sp->sa_mtime); 898 } 899 nfsm_request(vp, NFSPROC_SETATTR, curthread, cred); 900 if (v3) { 901 mtx_lock(&np->n_mtx); 902 for (i = 0; i < NFS_ACCESSCACHESIZE; i++) 903 np->n_accesscache[i].stamp = 0; 904 mtx_unlock(&np->n_mtx); 905 KDTRACE_NFS_ACCESSCACHE_FLUSH_DONE(vp); 906 nfsm_wcc_data(vp, wccflag); 907 } else 908 nfsm_loadattr(vp, NULL); 909 m_freem(mrep); 910nfsmout: 911 return (error); 912} 913 914/* 915 * nfs lookup call, one step at a time... 916 * First look in cache 917 * If not found, unlock the directory nfsnode and do the rpc 918 */ 919static int 920nfs_lookup(struct vop_lookup_args *ap) 921{ 922 struct componentname *cnp = ap->a_cnp; 923 struct vnode *dvp = ap->a_dvp; 924 struct vnode **vpp = ap->a_vpp; 925 struct mount *mp = dvp->v_mount; 926 struct vattr vattr; 927 time_t dmtime; 928 int flags = cnp->cn_flags; 929 struct vnode *newvp; 930 struct nfsmount *nmp; 931 caddr_t bpos, dpos; 932 struct mbuf *mreq, *mrep, *md, *mb; 933 long len; 934 nfsfh_t *fhp; 935 struct nfsnode *np; 936 int error = 0, attrflag, fhsize, ltype; 937 int v3 = NFS_ISV3(dvp); 938 struct thread *td = cnp->cn_thread; 939 940 *vpp = NULLVP; 941 if ((flags & ISLASTCN) && (mp->mnt_flag & MNT_RDONLY) && 942 (cnp->cn_nameiop == DELETE || cnp->cn_nameiop == RENAME)) 943 return (EROFS); 944 if (dvp->v_type != VDIR) 945 return (ENOTDIR); 946 nmp = VFSTONFS(mp); 947 np = VTONFS(dvp); 948 if ((error = VOP_ACCESS(dvp, VEXEC, cnp->cn_cred, td)) != 0) { 949 *vpp = NULLVP; 950 return (error); 951 } 952 error = cache_lookup(dvp, vpp, cnp); 953 if (error > 0 && error != ENOENT) 954 return (error); 955 if (error == -1) { 956 /* 957 * We only accept a positive hit in the cache if the 958 * change time of the file matches our cached copy. 959 * Otherwise, we discard the cache entry and fallback 960 * to doing a lookup RPC. 961 */ 962 newvp = *vpp; 963 if (!VOP_GETATTR(newvp, &vattr, cnp->cn_cred) 964 && vattr.va_ctime.tv_sec == VTONFS(newvp)->n_ctime) { 965 nfsstats.lookupcache_hits++; 966 if (cnp->cn_nameiop != LOOKUP && 967 (flags & ISLASTCN)) 968 cnp->cn_flags |= SAVENAME; 969 return (0); 970 } 971 cache_purge(newvp); 972 if (dvp != newvp) 973 vput(newvp); 974 else 975 vrele(newvp); 976 *vpp = NULLVP; 977 } else if (error == ENOENT) { 978 if (dvp->v_iflag & VI_DOOMED) 979 return (ENOENT); 980 /* 981 * We only accept a negative hit in the cache if the 982 * modification time of the parent directory matches 983 * our cached copy. Otherwise, we discard all of the 984 * negative cache entries for this directory. 985 */ 986 if (VOP_GETATTR(dvp, &vattr, cnp->cn_cred) == 0 && 987 vattr.va_mtime.tv_sec == np->n_dmtime) { 988 nfsstats.lookupcache_hits++; 989 return (ENOENT); 990 } 991 cache_purge_negative(dvp); 992 mtx_lock(&np->n_mtx); 993 np->n_dmtime = 0; 994 mtx_unlock(&np->n_mtx); 995 } 996 997 /* 998 * Cache the modification time of the parent directory in case 999 * the lookup fails and results in adding the first negative 1000 * name cache entry for the directory. Since this is reading 1001 * a single time_t, don't bother with locking. The 1002 * modification time may be a bit stale, but it must be read 1003 * before performing the lookup RPC to prevent a race where 1004 * another lookup updates the timestamp on the directory after 1005 * the lookup RPC has been performed on the server but before 1006 * n_dmtime is set at the end of this function. 1007 */ 1008 dmtime = np->n_vattr.va_mtime.tv_sec; 1009 error = 0; 1010 newvp = NULLVP; 1011 nfsstats.lookupcache_misses++; 1012 nfsstats.rpccnt[NFSPROC_LOOKUP]++; 1013 len = cnp->cn_namelen; 1014 mreq = nfsm_reqhead(dvp, NFSPROC_LOOKUP, 1015 NFSX_FH(v3) + NFSX_UNSIGNED + nfsm_rndup(len)); 1016 mb = mreq; 1017 bpos = mtod(mb, caddr_t); 1018 nfsm_fhtom(dvp, v3); 1019 nfsm_strtom(cnp->cn_nameptr, len, NFS_MAXNAMLEN); 1020 nfsm_request(dvp, NFSPROC_LOOKUP, cnp->cn_thread, cnp->cn_cred); 1021 if (error) { 1022 if (v3) { 1023 nfsm_postop_attr(dvp, attrflag); 1024 m_freem(mrep); 1025 } 1026 goto nfsmout; 1027 } 1028 nfsm_getfh(fhp, fhsize, v3); 1029 1030 /* 1031 * Handle RENAME case... 1032 */ 1033 if (cnp->cn_nameiop == RENAME && (flags & ISLASTCN)) { 1034 if (NFS_CMPFH(np, fhp, fhsize)) { 1035 m_freem(mrep); 1036 return (EISDIR); 1037 } 1038 error = nfs_nget(mp, fhp, fhsize, &np, LK_EXCLUSIVE); 1039 if (error) { 1040 m_freem(mrep); 1041 return (error); 1042 } 1043 newvp = NFSTOV(np); 1044 if (v3) { 1045 nfsm_postop_attr(newvp, attrflag); 1046 nfsm_postop_attr(dvp, attrflag); 1047 } else 1048 nfsm_loadattr(newvp, NULL); 1049 *vpp = newvp; 1050 m_freem(mrep); 1051 cnp->cn_flags |= SAVENAME; 1052 return (0); 1053 } 1054 1055 if (flags & ISDOTDOT) { 1056 ltype = VOP_ISLOCKED(dvp); 1057 error = vfs_busy(mp, MBF_NOWAIT); 1058 if (error != 0) { 1059 vfs_ref(mp); 1060 VOP_UNLOCK(dvp, 0); 1061 error = vfs_busy(mp, 0); 1062 vn_lock(dvp, ltype | LK_RETRY); 1063 vfs_rel(mp); 1064 if (error == 0 && (dvp->v_iflag & VI_DOOMED)) { 1065 vfs_unbusy(mp); 1066 error = ENOENT; 1067 } 1068 if (error != 0) { 1069 m_freem(mrep); 1070 return (error); 1071 } 1072 } 1073 VOP_UNLOCK(dvp, 0); 1074 error = nfs_nget(mp, fhp, fhsize, &np, cnp->cn_lkflags); 1075 if (error == 0) 1076 newvp = NFSTOV(np); 1077 vfs_unbusy(mp); 1078 if (newvp != dvp) 1079 vn_lock(dvp, ltype | LK_RETRY); 1080 if (dvp->v_iflag & VI_DOOMED) { 1081 if (error == 0) { 1082 if (newvp == dvp) 1083 vrele(newvp); 1084 else 1085 vput(newvp); 1086 } 1087 error = ENOENT; 1088 } 1089 if (error) { 1090 m_freem(mrep); 1091 return (error); 1092 } 1093 } else if (NFS_CMPFH(np, fhp, fhsize)) { 1094 VREF(dvp); 1095 newvp = dvp; 1096 } else { 1097 error = nfs_nget(mp, fhp, fhsize, &np, cnp->cn_lkflags); 1098 if (error) { 1099 m_freem(mrep); 1100 return (error); 1101 } 1102 newvp = NFSTOV(np); 1103 } 1104 if (v3) { 1105 nfsm_postop_attr(newvp, attrflag); 1106 nfsm_postop_attr(dvp, attrflag); 1107 } else 1108 nfsm_loadattr(newvp, NULL); 1109 if (cnp->cn_nameiop != LOOKUP && (flags & ISLASTCN)) 1110 cnp->cn_flags |= SAVENAME; 1111 if ((cnp->cn_flags & MAKEENTRY) && 1112 (cnp->cn_nameiop != DELETE || !(flags & ISLASTCN))) { 1113 np->n_ctime = np->n_vattr.va_ctime.tv_sec; 1114 cache_enter(dvp, newvp, cnp); 1115 } 1116 *vpp = newvp; 1117 m_freem(mrep); 1118nfsmout: 1119 if (error) { 1120 if (newvp != NULLVP) { 1121 vput(newvp); 1122 *vpp = NULLVP; 1123 } 1124 1125 if (error != ENOENT) 1126 goto done; 1127 1128 /* The requested file was not found. */ 1129 if ((cnp->cn_nameiop == CREATE || cnp->cn_nameiop == RENAME) && 1130 (flags & ISLASTCN)) { 1131 /* 1132 * XXX: UFS does a full VOP_ACCESS(dvp, 1133 * VWRITE) here instead of just checking 1134 * MNT_RDONLY. 1135 */ 1136 if (mp->mnt_flag & MNT_RDONLY) 1137 return (EROFS); 1138 cnp->cn_flags |= SAVENAME; 1139 return (EJUSTRETURN); 1140 } 1141 1142 if ((cnp->cn_flags & MAKEENTRY) && cnp->cn_nameiop != CREATE) { 1143 /* 1144 * Maintain n_dmtime as the modification time 1145 * of the parent directory when the oldest -ve 1146 * name cache entry for this directory was 1147 * added. If a -ve cache entry has already 1148 * been added with a newer modification time 1149 * by a concurrent lookup, then don't bother 1150 * adding a cache entry. The modification 1151 * time of the directory might have changed 1152 * due to the file this lookup failed to find 1153 * being created. In that case a subsequent 1154 * lookup would incorrectly use the entry 1155 * added here instead of doing an extra 1156 * lookup. 1157 */ 1158 mtx_lock(&np->n_mtx); 1159 if (np->n_dmtime <= dmtime) { 1160 if (np->n_dmtime == 0) 1161 np->n_dmtime = dmtime; 1162 mtx_unlock(&np->n_mtx); 1163 cache_enter(dvp, NULL, cnp); 1164 } else 1165 mtx_unlock(&np->n_mtx); 1166 } 1167 return (ENOENT); 1168 } 1169done: 1170 return (error); 1171} 1172 1173/* 1174 * nfs read call. 1175 * Just call nfs_bioread() to do the work. 1176 */ 1177static int 1178nfs_read(struct vop_read_args *ap) 1179{ 1180 struct vnode *vp = ap->a_vp; 1181 1182 switch (vp->v_type) { 1183 case VREG: 1184 return (nfs_bioread(vp, ap->a_uio, ap->a_ioflag, ap->a_cred)); 1185 case VDIR: 1186 return (EISDIR); 1187 default: 1188 return (EOPNOTSUPP); 1189 } 1190} 1191 1192/* 1193 * nfs readlink call 1194 */ 1195static int 1196nfs_readlink(struct vop_readlink_args *ap) 1197{ 1198 struct vnode *vp = ap->a_vp; 1199 1200 if (vp->v_type != VLNK) 1201 return (EINVAL); 1202 return (nfs_bioread(vp, ap->a_uio, 0, ap->a_cred)); 1203} 1204 1205/* 1206 * Do a readlink rpc. 1207 * Called by nfs_doio() from below the buffer cache. 1208 */ 1209int 1210nfs_readlinkrpc(struct vnode *vp, struct uio *uiop, struct ucred *cred) 1211{ 1212 caddr_t bpos, dpos; 1213 int error = 0, len, attrflag; 1214 struct mbuf *mreq, *mrep, *md, *mb; 1215 int v3 = NFS_ISV3(vp); 1216 1217 nfsstats.rpccnt[NFSPROC_READLINK]++; 1218 mreq = nfsm_reqhead(vp, NFSPROC_READLINK, NFSX_FH(v3)); 1219 mb = mreq; 1220 bpos = mtod(mb, caddr_t); 1221 nfsm_fhtom(vp, v3); 1222 nfsm_request(vp, NFSPROC_READLINK, uiop->uio_td, cred); 1223 if (v3) 1224 nfsm_postop_attr(vp, attrflag); 1225 if (!error) { 1226 nfsm_strsiz(len, NFS_MAXPATHLEN); 1227 if (len == NFS_MAXPATHLEN) { 1228 struct nfsnode *np = VTONFS(vp); 1229 mtx_lock(&np->n_mtx); 1230 if (np->n_size && np->n_size < NFS_MAXPATHLEN) 1231 len = np->n_size; 1232 mtx_unlock(&np->n_mtx); 1233 } 1234 nfsm_mtouio(uiop, len); 1235 } 1236 m_freem(mrep); 1237nfsmout: 1238 return (error); 1239} 1240 1241/* 1242 * nfs read rpc call 1243 * Ditto above 1244 */ 1245int 1246nfs_readrpc(struct vnode *vp, struct uio *uiop, struct ucred *cred) 1247{ 1248 u_int32_t *tl; 1249 caddr_t bpos, dpos; 1250 struct mbuf *mreq, *mrep, *md, *mb; 1251 struct nfsmount *nmp; 1252 int error = 0, len, retlen, tsiz, eof, attrflag; 1253 int v3 = NFS_ISV3(vp); 1254 int rsize; 1255 1256#ifndef nolint 1257 eof = 0; 1258#endif 1259 nmp = VFSTONFS(vp->v_mount); 1260 tsiz = uiop->uio_resid; 1261 mtx_lock(&nmp->nm_mtx); 1262 if (uiop->uio_offset + tsiz > nmp->nm_maxfilesize) { 1263 mtx_unlock(&nmp->nm_mtx); 1264 return (EFBIG); 1265 } 1266 rsize = nmp->nm_rsize; 1267 mtx_unlock(&nmp->nm_mtx); 1268 while (tsiz > 0) { 1269 nfsstats.rpccnt[NFSPROC_READ]++; 1270 len = (tsiz > rsize) ? rsize : tsiz; 1271 mreq = nfsm_reqhead(vp, NFSPROC_READ, NFSX_FH(v3) + NFSX_UNSIGNED * 3); 1272 mb = mreq; 1273 bpos = mtod(mb, caddr_t); 1274 nfsm_fhtom(vp, v3); 1275 tl = nfsm_build(u_int32_t *, NFSX_UNSIGNED * 3); 1276 if (v3) { 1277 txdr_hyper(uiop->uio_offset, tl); 1278 *(tl + 2) = txdr_unsigned(len); 1279 } else { 1280 *tl++ = txdr_unsigned(uiop->uio_offset); 1281 *tl++ = txdr_unsigned(len); 1282 *tl = 0; 1283 } 1284 nfsm_request(vp, NFSPROC_READ, uiop->uio_td, cred); 1285 if (v3) { 1286 nfsm_postop_attr(vp, attrflag); 1287 if (error) { 1288 m_freem(mrep); 1289 goto nfsmout; 1290 } 1291 tl = nfsm_dissect(u_int32_t *, 2 * NFSX_UNSIGNED); 1292 eof = fxdr_unsigned(int, *(tl + 1)); 1293 } else { 1294 nfsm_loadattr(vp, NULL); 1295 } 1296 nfsm_strsiz(retlen, rsize); 1297 nfsm_mtouio(uiop, retlen); 1298 m_freem(mrep); 1299 tsiz -= retlen; 1300 if (v3) { 1301 if (eof || retlen == 0) { 1302 tsiz = 0; 1303 } 1304 } else if (retlen < len) { 1305 tsiz = 0; 1306 } 1307 } 1308nfsmout: 1309 return (error); 1310} 1311 1312/* 1313 * nfs write call 1314 */ 1315int 1316nfs_writerpc(struct vnode *vp, struct uio *uiop, struct ucred *cred, 1317 int *iomode, int *must_commit) 1318{ 1319 u_int32_t *tl; 1320 int32_t backup; 1321 caddr_t bpos, dpos; 1322 struct mbuf *mreq, *mrep, *md, *mb; 1323 struct nfsmount *nmp = VFSTONFS(vp->v_mount); 1324 int error = 0, len, tsiz, wccflag = NFSV3_WCCRATTR, rlen, commit; 1325 int v3 = NFS_ISV3(vp), committed = NFSV3WRITE_FILESYNC; 1326 int wsize; 1327 1328#ifndef DIAGNOSTIC 1329 if (uiop->uio_iovcnt != 1) 1330 panic("nfs: writerpc iovcnt > 1"); 1331#endif 1332 *must_commit = 0; 1333 tsiz = uiop->uio_resid; 1334 mtx_lock(&nmp->nm_mtx); 1335 if (uiop->uio_offset + tsiz > nmp->nm_maxfilesize) { 1336 mtx_unlock(&nmp->nm_mtx); 1337 return (EFBIG); 1338 } 1339 wsize = nmp->nm_wsize; 1340 mtx_unlock(&nmp->nm_mtx); 1341 while (tsiz > 0) { 1342 nfsstats.rpccnt[NFSPROC_WRITE]++; 1343 len = (tsiz > wsize) ? wsize : tsiz; 1344 mreq = nfsm_reqhead(vp, NFSPROC_WRITE, 1345 NFSX_FH(v3) + 5 * NFSX_UNSIGNED + nfsm_rndup(len)); 1346 mb = mreq; 1347 bpos = mtod(mb, caddr_t); 1348 nfsm_fhtom(vp, v3); 1349 if (v3) { 1350 tl = nfsm_build(u_int32_t *, 5 * NFSX_UNSIGNED); 1351 txdr_hyper(uiop->uio_offset, tl); 1352 tl += 2; 1353 *tl++ = txdr_unsigned(len); 1354 *tl++ = txdr_unsigned(*iomode); 1355 *tl = txdr_unsigned(len); 1356 } else { 1357 u_int32_t x; 1358 1359 tl = nfsm_build(u_int32_t *, 4 * NFSX_UNSIGNED); 1360 /* Set both "begin" and "current" to non-garbage. */ 1361 x = txdr_unsigned((u_int32_t)uiop->uio_offset); 1362 *tl++ = x; /* "begin offset" */ 1363 *tl++ = x; /* "current offset" */ 1364 x = txdr_unsigned(len); 1365 *tl++ = x; /* total to this offset */ 1366 *tl = x; /* size of this write */ 1367 } 1368 nfsm_uiotom(uiop, len); 1369 nfsm_request(vp, NFSPROC_WRITE, uiop->uio_td, cred); 1370 if (v3) { 1371 wccflag = NFSV3_WCCCHK; 1372 nfsm_wcc_data(vp, wccflag); 1373 if (!error) { 1374 tl = nfsm_dissect(u_int32_t *, 2 * NFSX_UNSIGNED 1375 + NFSX_V3WRITEVERF); 1376 rlen = fxdr_unsigned(int, *tl++); 1377 if (rlen == 0) { 1378 error = NFSERR_IO; 1379 m_freem(mrep); 1380 break; 1381 } else if (rlen < len) { 1382 backup = len - rlen; 1383 uiop->uio_iov->iov_base = 1384 (char *)uiop->uio_iov->iov_base - 1385 backup; 1386 uiop->uio_iov->iov_len += backup; 1387 uiop->uio_offset -= backup; 1388 uiop->uio_resid += backup; 1389 len = rlen; 1390 } 1391 commit = fxdr_unsigned(int, *tl++); 1392 1393 /* 1394 * Return the lowest committment level 1395 * obtained by any of the RPCs. 1396 */ 1397 if (committed == NFSV3WRITE_FILESYNC) 1398 committed = commit; 1399 else if (committed == NFSV3WRITE_DATASYNC && 1400 commit == NFSV3WRITE_UNSTABLE) 1401 committed = commit; 1402 mtx_lock(&nmp->nm_mtx); 1403 if ((nmp->nm_state & NFSSTA_HASWRITEVERF) == 0){ 1404 bcopy((caddr_t)tl, (caddr_t)nmp->nm_verf, 1405 NFSX_V3WRITEVERF); 1406 nmp->nm_state |= NFSSTA_HASWRITEVERF; 1407 } else if (bcmp((caddr_t)tl, 1408 (caddr_t)nmp->nm_verf, NFSX_V3WRITEVERF)) { 1409 *must_commit = 1; 1410 bcopy((caddr_t)tl, (caddr_t)nmp->nm_verf, 1411 NFSX_V3WRITEVERF); 1412 } 1413 mtx_unlock(&nmp->nm_mtx); 1414 } 1415 } else { 1416 nfsm_loadattr(vp, NULL); 1417 } 1418 if (wccflag) { 1419 mtx_lock(&(VTONFS(vp))->n_mtx); 1420 VTONFS(vp)->n_mtime = VTONFS(vp)->n_vattr.va_mtime; 1421 mtx_unlock(&(VTONFS(vp))->n_mtx); 1422 } 1423 m_freem(mrep); 1424 if (error) 1425 break; 1426 tsiz -= len; 1427 } 1428nfsmout: 1429 if (vp->v_mount->mnt_kern_flag & MNTK_ASYNC) 1430 committed = NFSV3WRITE_FILESYNC; 1431 *iomode = committed; 1432 if (error) 1433 uiop->uio_resid = tsiz; 1434 return (error); 1435} 1436 1437/* 1438 * nfs mknod rpc 1439 * For NFS v2 this is a kludge. Use a create rpc but with the IFMT bits of the 1440 * mode set to specify the file type and the size field for rdev. 1441 */ 1442static int 1443nfs_mknodrpc(struct vnode *dvp, struct vnode **vpp, struct componentname *cnp, 1444 struct vattr *vap) 1445{ 1446 struct nfsv2_sattr *sp; 1447 u_int32_t *tl; 1448 struct vnode *newvp = NULL; 1449 struct nfsnode *np = NULL; 1450 struct vattr vattr; 1451 caddr_t bpos, dpos; 1452 int error = 0, wccflag = NFSV3_WCCRATTR, gotvp = 0; 1453 struct mbuf *mreq, *mrep, *md, *mb; 1454 u_int32_t rdev; 1455 int v3 = NFS_ISV3(dvp); 1456 1457 if (vap->va_type == VCHR || vap->va_type == VBLK) 1458 rdev = txdr_unsigned(vap->va_rdev); 1459 else if (vap->va_type == VFIFO || vap->va_type == VSOCK) 1460 rdev = nfs_xdrneg1; 1461 else { 1462 return (EOPNOTSUPP); 1463 } 1464 if ((error = VOP_GETATTR(dvp, &vattr, cnp->cn_cred)) != 0) 1465 return (error); 1466 nfsstats.rpccnt[NFSPROC_MKNOD]++; 1467 mreq = nfsm_reqhead(dvp, NFSPROC_MKNOD, NFSX_FH(v3) + 4 * NFSX_UNSIGNED + 1468 + nfsm_rndup(cnp->cn_namelen) + NFSX_SATTR(v3)); 1469 mb = mreq; 1470 bpos = mtod(mb, caddr_t); 1471 nfsm_fhtom(dvp, v3); 1472 nfsm_strtom(cnp->cn_nameptr, cnp->cn_namelen, NFS_MAXNAMLEN); 1473 if (v3) { 1474 tl = nfsm_build(u_int32_t *, NFSX_UNSIGNED); 1475 *tl++ = vtonfsv3_type(vap->va_type); 1476 nfsm_v3attrbuild(vap, FALSE); 1477 if (vap->va_type == VCHR || vap->va_type == VBLK) { 1478 tl = nfsm_build(u_int32_t *, 2 * NFSX_UNSIGNED); 1479 *tl++ = txdr_unsigned(major(vap->va_rdev)); 1480 *tl = txdr_unsigned(minor(vap->va_rdev)); 1481 } 1482 } else { 1483 sp = nfsm_build(struct nfsv2_sattr *, NFSX_V2SATTR); 1484 sp->sa_mode = vtonfsv2_mode(vap->va_type, vap->va_mode); 1485 sp->sa_uid = nfs_xdrneg1; 1486 sp->sa_gid = nfs_xdrneg1; 1487 sp->sa_size = rdev; 1488 txdr_nfsv2time(&vap->va_atime, &sp->sa_atime); 1489 txdr_nfsv2time(&vap->va_mtime, &sp->sa_mtime); 1490 } 1491 nfsm_request(dvp, NFSPROC_MKNOD, cnp->cn_thread, cnp->cn_cred); 1492 if (!error) { 1493 nfsm_mtofh(dvp, newvp, v3, gotvp); 1494 if (!gotvp) { 1495 if (newvp) { 1496 vput(newvp); 1497 newvp = NULL; 1498 } 1499 error = nfs_lookitup(dvp, cnp->cn_nameptr, 1500 cnp->cn_namelen, cnp->cn_cred, cnp->cn_thread, &np); 1501 if (!error) 1502 newvp = NFSTOV(np); 1503 } 1504 } 1505 if (v3) 1506 nfsm_wcc_data(dvp, wccflag); 1507 m_freem(mrep); 1508nfsmout: 1509 if (error) { 1510 if (newvp) 1511 vput(newvp); 1512 } else { 1513 if (cnp->cn_flags & MAKEENTRY) 1514 cache_enter(dvp, newvp, cnp); 1515 *vpp = newvp; 1516 } 1517 mtx_lock(&(VTONFS(dvp))->n_mtx); 1518 VTONFS(dvp)->n_flag |= NMODIFIED; 1519 if (!wccflag) { 1520 VTONFS(dvp)->n_attrstamp = 0; 1521 KDTRACE_NFS_ATTRCACHE_FLUSH_DONE(dvp); 1522 } 1523 mtx_unlock(&(VTONFS(dvp))->n_mtx); 1524 return (error); 1525} 1526 1527/* 1528 * nfs mknod vop 1529 * just call nfs_mknodrpc() to do the work. 1530 */ 1531/* ARGSUSED */ 1532static int 1533nfs_mknod(struct vop_mknod_args *ap) 1534{ 1535 return (nfs_mknodrpc(ap->a_dvp, ap->a_vpp, ap->a_cnp, ap->a_vap)); 1536} 1537 1538static u_long create_verf; 1539/* 1540 * nfs file create call 1541 */ 1542static int 1543nfs_create(struct vop_create_args *ap) 1544{ 1545 struct vnode *dvp = ap->a_dvp; 1546 struct vattr *vap = ap->a_vap; 1547 struct componentname *cnp = ap->a_cnp; 1548 struct nfsv2_sattr *sp; 1549 u_int32_t *tl; 1550 struct nfsnode *np = NULL; 1551 struct vnode *newvp = NULL; 1552 caddr_t bpos, dpos; 1553 int error = 0, wccflag = NFSV3_WCCRATTR, gotvp = 0, fmode = 0; 1554 struct mbuf *mreq, *mrep, *md, *mb; 1555 struct vattr vattr; 1556 int v3 = NFS_ISV3(dvp); 1557 1558 CURVNET_SET(CRED_TO_VNET(curthread->td_ucred)); 1559 1560 /* 1561 * Oops, not for me.. 1562 */ 1563 if (vap->va_type == VSOCK) { 1564 error = nfs_mknodrpc(dvp, ap->a_vpp, cnp, vap); 1565 CURVNET_RESTORE(); 1566 return (error); 1567 } 1568 1569 if ((error = VOP_GETATTR(dvp, &vattr, cnp->cn_cred)) != 0) { 1570 CURVNET_RESTORE(); 1571 return (error); 1572 } 1573 if (vap->va_vaflags & VA_EXCLUSIVE) 1574 fmode |= O_EXCL; 1575again: 1576 nfsstats.rpccnt[NFSPROC_CREATE]++; 1577 mreq = nfsm_reqhead(dvp, NFSPROC_CREATE, NFSX_FH(v3) + 2 * NFSX_UNSIGNED + 1578 nfsm_rndup(cnp->cn_namelen) + NFSX_SATTR(v3)); 1579 mb = mreq; 1580 bpos = mtod(mb, caddr_t); 1581 nfsm_fhtom(dvp, v3); 1582 nfsm_strtom(cnp->cn_nameptr, cnp->cn_namelen, NFS_MAXNAMLEN); 1583 if (v3) { 1584 tl = nfsm_build(u_int32_t *, NFSX_UNSIGNED); 1585 if (fmode & O_EXCL) { 1586 *tl = txdr_unsigned(NFSV3CREATE_EXCLUSIVE); 1587 tl = nfsm_build(u_int32_t *, NFSX_V3CREATEVERF); 1588#ifdef INET 1589 CURVNET_SET(CRED_TO_VNET(cnp->cn_cred)); 1590 IN_IFADDR_RLOCK(); 1591 if (!TAILQ_EMPTY(&V_in_ifaddrhead)) 1592 *tl++ = IA_SIN(TAILQ_FIRST(&V_in_ifaddrhead))->sin_addr.s_addr; 1593 else 1594#endif 1595 *tl++ = create_verf; 1596#ifdef INET 1597 IN_IFADDR_RUNLOCK(); 1598 CURVNET_RESTORE(); 1599#endif 1600 *tl = ++create_verf; 1601 } else { 1602 *tl = txdr_unsigned(NFSV3CREATE_UNCHECKED); 1603 nfsm_v3attrbuild(vap, FALSE); 1604 } 1605 } else { 1606 sp = nfsm_build(struct nfsv2_sattr *, NFSX_V2SATTR); 1607 sp->sa_mode = vtonfsv2_mode(vap->va_type, vap->va_mode); 1608 sp->sa_uid = nfs_xdrneg1; 1609 sp->sa_gid = nfs_xdrneg1; 1610 sp->sa_size = 0; 1611 txdr_nfsv2time(&vap->va_atime, &sp->sa_atime); 1612 txdr_nfsv2time(&vap->va_mtime, &sp->sa_mtime); 1613 } 1614 nfsm_request(dvp, NFSPROC_CREATE, cnp->cn_thread, cnp->cn_cred); 1615 if (!error) { 1616 nfsm_mtofh(dvp, newvp, v3, gotvp); 1617 if (!gotvp) { 1618 if (newvp) { 1619 vput(newvp); 1620 newvp = NULL; 1621 } 1622 error = nfs_lookitup(dvp, cnp->cn_nameptr, 1623 cnp->cn_namelen, cnp->cn_cred, cnp->cn_thread, &np); 1624 if (!error) 1625 newvp = NFSTOV(np); 1626 } 1627 } 1628 if (v3) 1629 nfsm_wcc_data(dvp, wccflag); 1630 m_freem(mrep); 1631nfsmout: 1632 if (error) { 1633 if (v3 && (fmode & O_EXCL) && error == NFSERR_NOTSUPP) { 1634 fmode &= ~O_EXCL; 1635 goto again; 1636 } 1637 if (newvp) 1638 vput(newvp); 1639 } else if (v3 && (fmode & O_EXCL)) { 1640 /* 1641 * We are normally called with only a partially initialized 1642 * VAP. Since the NFSv3 spec says that server may use the 1643 * file attributes to store the verifier, the spec requires 1644 * us to do a SETATTR RPC. FreeBSD servers store the verifier 1645 * in atime, but we can't really assume that all servers will 1646 * so we ensure that our SETATTR sets both atime and mtime. 1647 */ 1648 if (vap->va_mtime.tv_sec == VNOVAL) 1649 vfs_timestamp(&vap->va_mtime); 1650 if (vap->va_atime.tv_sec == VNOVAL) 1651 vap->va_atime = vap->va_mtime; 1652 error = nfs_setattrrpc(newvp, vap, cnp->cn_cred); 1653 if (error) 1654 vput(newvp); 1655 } 1656 if (!error) { 1657 if (cnp->cn_flags & MAKEENTRY) 1658 cache_enter(dvp, newvp, cnp); 1659 *ap->a_vpp = newvp; 1660 } 1661 mtx_lock(&(VTONFS(dvp))->n_mtx); 1662 VTONFS(dvp)->n_flag |= NMODIFIED; 1663 if (!wccflag) { 1664 VTONFS(dvp)->n_attrstamp = 0; 1665 KDTRACE_NFS_ATTRCACHE_FLUSH_DONE(dvp); 1666 } 1667 mtx_unlock(&(VTONFS(dvp))->n_mtx); 1668 CURVNET_RESTORE(); 1669 return (error); 1670} 1671 1672/* 1673 * nfs file remove call 1674 * To try and make nfs semantics closer to ufs semantics, a file that has 1675 * other processes using the vnode is renamed instead of removed and then 1676 * removed later on the last close. 1677 * - If v_usecount > 1 1678 * If a rename is not already in the works 1679 * call nfs_sillyrename() to set it up 1680 * else 1681 * do the remove rpc 1682 */ 1683static int 1684nfs_remove(struct vop_remove_args *ap) 1685{ 1686 struct vnode *vp = ap->a_vp; 1687 struct vnode *dvp = ap->a_dvp; 1688 struct componentname *cnp = ap->a_cnp; 1689 struct nfsnode *np = VTONFS(vp); 1690 int error = 0; 1691 struct vattr vattr; 1692 1693#ifndef DIAGNOSTIC 1694 if ((cnp->cn_flags & HASBUF) == 0) 1695 panic("nfs_remove: no name"); 1696 if (vrefcnt(vp) < 1) 1697 panic("nfs_remove: bad v_usecount"); 1698#endif 1699 if (vp->v_type == VDIR) 1700 error = EPERM; 1701 else if (vrefcnt(vp) == 1 || (np->n_sillyrename && 1702 !VOP_GETATTR(vp, &vattr, cnp->cn_cred) && vattr.va_nlink > 1)) { 1703 /* 1704 * Purge the name cache so that the chance of a lookup for 1705 * the name succeeding while the remove is in progress is 1706 * minimized. Without node locking it can still happen, such 1707 * that an I/O op returns ESTALE, but since you get this if 1708 * another host removes the file.. 1709 */ 1710 cache_purge(vp); 1711 /* 1712 * throw away biocache buffers, mainly to avoid 1713 * unnecessary delayed writes later. 1714 */ 1715 error = nfs_vinvalbuf(vp, 0, cnp->cn_thread, 1); 1716 /* Do the rpc */ 1717 if (error != EINTR && error != EIO) 1718 error = nfs_removerpc(dvp, cnp->cn_nameptr, 1719 cnp->cn_namelen, cnp->cn_cred, cnp->cn_thread); 1720 /* 1721 * Kludge City: If the first reply to the remove rpc is lost.. 1722 * the reply to the retransmitted request will be ENOENT 1723 * since the file was in fact removed 1724 * Therefore, we cheat and return success. 1725 */ 1726 if (error == ENOENT) 1727 error = 0; 1728 } else if (!np->n_sillyrename) 1729 error = nfs_sillyrename(dvp, vp, cnp); 1730 np->n_attrstamp = 0; 1731 KDTRACE_NFS_ATTRCACHE_FLUSH_DONE(vp); 1732 return (error); 1733} 1734 1735/* 1736 * nfs file remove rpc called from nfs_inactive 1737 */ 1738int 1739nfs_removeit(struct sillyrename *sp) 1740{ 1741 /* 1742 * Make sure that the directory vnode is still valid. 1743 * XXX we should lock sp->s_dvp here. 1744 */ 1745 if (sp->s_dvp->v_type == VBAD) 1746 return (0); 1747 return (nfs_removerpc(sp->s_dvp, sp->s_name, sp->s_namlen, sp->s_cred, 1748 NULL)); 1749} 1750 1751/* 1752 * Nfs remove rpc, called from nfs_remove() and nfs_removeit(). 1753 */ 1754static int 1755nfs_removerpc(struct vnode *dvp, const char *name, int namelen, 1756 struct ucred *cred, struct thread *td) 1757{ 1758 caddr_t bpos, dpos; 1759 int error = 0, wccflag = NFSV3_WCCRATTR; 1760 struct mbuf *mreq, *mrep, *md, *mb; 1761 int v3 = NFS_ISV3(dvp); 1762 1763 nfsstats.rpccnt[NFSPROC_REMOVE]++; 1764 mreq = nfsm_reqhead(dvp, NFSPROC_REMOVE, 1765 NFSX_FH(v3) + NFSX_UNSIGNED + nfsm_rndup(namelen)); 1766 mb = mreq; 1767 bpos = mtod(mb, caddr_t); 1768 nfsm_fhtom(dvp, v3); 1769 nfsm_strtom(name, namelen, NFS_MAXNAMLEN); 1770 nfsm_request(dvp, NFSPROC_REMOVE, td, cred); 1771 if (v3) 1772 nfsm_wcc_data(dvp, wccflag); 1773 m_freem(mrep); 1774nfsmout: 1775 mtx_lock(&(VTONFS(dvp))->n_mtx); 1776 VTONFS(dvp)->n_flag |= NMODIFIED; 1777 if (!wccflag) { 1778 VTONFS(dvp)->n_attrstamp = 0; 1779 KDTRACE_NFS_ATTRCACHE_FLUSH_DONE(dvp); 1780 } 1781 mtx_unlock(&(VTONFS(dvp))->n_mtx); 1782 return (error); 1783} 1784 1785/* 1786 * nfs file rename call 1787 */ 1788static int 1789nfs_rename(struct vop_rename_args *ap) 1790{ 1791 struct vnode *fvp = ap->a_fvp; 1792 struct vnode *tvp = ap->a_tvp; 1793 struct vnode *fdvp = ap->a_fdvp; 1794 struct vnode *tdvp = ap->a_tdvp; 1795 struct componentname *tcnp = ap->a_tcnp; 1796 struct componentname *fcnp = ap->a_fcnp; 1797 int error; 1798 1799#ifndef DIAGNOSTIC 1800 if ((tcnp->cn_flags & HASBUF) == 0 || 1801 (fcnp->cn_flags & HASBUF) == 0) 1802 panic("nfs_rename: no name"); 1803#endif 1804 /* Check for cross-device rename */ 1805 if ((fvp->v_mount != tdvp->v_mount) || 1806 (tvp && (fvp->v_mount != tvp->v_mount))) { 1807 error = EXDEV; 1808 goto out; 1809 } 1810 1811 if (fvp == tvp) { 1812 nfs_printf("nfs_rename: fvp == tvp (can't happen)\n"); 1813 error = 0; 1814 goto out; 1815 } 1816 if ((error = vn_lock(fvp, LK_EXCLUSIVE)) != 0) 1817 goto out; 1818 1819 /* 1820 * We have to flush B_DELWRI data prior to renaming 1821 * the file. If we don't, the delayed-write buffers 1822 * can be flushed out later after the file has gone stale 1823 * under NFSV3. NFSV2 does not have this problem because 1824 * ( as far as I can tell ) it flushes dirty buffers more 1825 * often. 1826 * 1827 * Skip the rename operation if the fsync fails, this can happen 1828 * due to the server's volume being full, when we pushed out data 1829 * that was written back to our cache earlier. Not checking for 1830 * this condition can result in potential (silent) data loss. 1831 */ 1832 error = VOP_FSYNC(fvp, MNT_WAIT, fcnp->cn_thread); 1833 VOP_UNLOCK(fvp, 0); 1834 if (!error && tvp) 1835 error = VOP_FSYNC(tvp, MNT_WAIT, tcnp->cn_thread); 1836 if (error) 1837 goto out; 1838 1839 /* 1840 * If the tvp exists and is in use, sillyrename it before doing the 1841 * rename of the new file over it. 1842 * XXX Can't sillyrename a directory. 1843 */ 1844 if (tvp && vrefcnt(tvp) > 1 && !VTONFS(tvp)->n_sillyrename && 1845 tvp->v_type != VDIR && !nfs_sillyrename(tdvp, tvp, tcnp)) { 1846 vput(tvp); 1847 tvp = NULL; 1848 } 1849 1850 error = nfs_renamerpc(fdvp, fcnp->cn_nameptr, fcnp->cn_namelen, 1851 tdvp, tcnp->cn_nameptr, tcnp->cn_namelen, tcnp->cn_cred, 1852 tcnp->cn_thread); 1853 1854 if (fvp->v_type == VDIR) { 1855 if (tvp != NULL && tvp->v_type == VDIR) 1856 cache_purge(tdvp); 1857 cache_purge(fdvp); 1858 } 1859 1860out: 1861 if (tdvp == tvp) 1862 vrele(tdvp); 1863 else 1864 vput(tdvp); 1865 if (tvp) 1866 vput(tvp); 1867 vrele(fdvp); 1868 vrele(fvp); 1869 /* 1870 * Kludge: Map ENOENT => 0 assuming that it is a reply to a retry. 1871 */ 1872 if (error == ENOENT) 1873 error = 0; 1874 return (error); 1875} 1876 1877/* 1878 * nfs file rename rpc called from nfs_remove() above 1879 */ 1880static int 1881nfs_renameit(struct vnode *sdvp, struct componentname *scnp, 1882 struct sillyrename *sp) 1883{ 1884 1885 return (nfs_renamerpc(sdvp, scnp->cn_nameptr, scnp->cn_namelen, sdvp, 1886 sp->s_name, sp->s_namlen, scnp->cn_cred, scnp->cn_thread)); 1887} 1888 1889/* 1890 * Do an nfs rename rpc. Called from nfs_rename() and nfs_renameit(). 1891 */ 1892static int 1893nfs_renamerpc(struct vnode *fdvp, const char *fnameptr, int fnamelen, 1894 struct vnode *tdvp, const char *tnameptr, int tnamelen, struct ucred *cred, 1895 struct thread *td) 1896{ 1897 caddr_t bpos, dpos; 1898 int error = 0, fwccflag = NFSV3_WCCRATTR, twccflag = NFSV3_WCCRATTR; 1899 struct mbuf *mreq, *mrep, *md, *mb; 1900 int v3 = NFS_ISV3(fdvp); 1901 1902 nfsstats.rpccnt[NFSPROC_RENAME]++; 1903 mreq = nfsm_reqhead(fdvp, NFSPROC_RENAME, 1904 (NFSX_FH(v3) + NFSX_UNSIGNED)*2 + nfsm_rndup(fnamelen) + 1905 nfsm_rndup(tnamelen)); 1906 mb = mreq; 1907 bpos = mtod(mb, caddr_t); 1908 nfsm_fhtom(fdvp, v3); 1909 nfsm_strtom(fnameptr, fnamelen, NFS_MAXNAMLEN); 1910 nfsm_fhtom(tdvp, v3); 1911 nfsm_strtom(tnameptr, tnamelen, NFS_MAXNAMLEN); 1912 nfsm_request(fdvp, NFSPROC_RENAME, td, cred); 1913 if (v3) { 1914 nfsm_wcc_data(fdvp, fwccflag); 1915 nfsm_wcc_data(tdvp, twccflag); 1916 } 1917 m_freem(mrep); 1918nfsmout: 1919 mtx_lock(&(VTONFS(fdvp))->n_mtx); 1920 VTONFS(fdvp)->n_flag |= NMODIFIED; 1921 mtx_unlock(&(VTONFS(fdvp))->n_mtx); 1922 mtx_lock(&(VTONFS(tdvp))->n_mtx); 1923 VTONFS(tdvp)->n_flag |= NMODIFIED; 1924 mtx_unlock(&(VTONFS(tdvp))->n_mtx); 1925 if (!fwccflag) { 1926 VTONFS(fdvp)->n_attrstamp = 0; 1927 KDTRACE_NFS_ATTRCACHE_FLUSH_DONE(fdvp); 1928 } 1929 if (!twccflag) { 1930 VTONFS(tdvp)->n_attrstamp = 0; 1931 KDTRACE_NFS_ATTRCACHE_FLUSH_DONE(tdvp); 1932 } 1933 return (error); 1934} 1935 1936/* 1937 * nfs hard link create call 1938 */ 1939static int 1940nfs_link(struct vop_link_args *ap) 1941{ 1942 struct vnode *vp = ap->a_vp; 1943 struct vnode *tdvp = ap->a_tdvp; 1944 struct componentname *cnp = ap->a_cnp; 1945 caddr_t bpos, dpos; 1946 int error = 0, wccflag = NFSV3_WCCRATTR, attrflag = 0; 1947 struct mbuf *mreq, *mrep, *md, *mb; 1948 int v3; 1949 1950 if (vp->v_mount != tdvp->v_mount) { 1951 return (EXDEV); 1952 } 1953 1954 /* 1955 * Push all writes to the server, so that the attribute cache 1956 * doesn't get "out of sync" with the server. 1957 * XXX There should be a better way! 1958 */ 1959 VOP_FSYNC(vp, MNT_WAIT, cnp->cn_thread); 1960 1961 v3 = NFS_ISV3(vp); 1962 nfsstats.rpccnt[NFSPROC_LINK]++; 1963 mreq = nfsm_reqhead(vp, NFSPROC_LINK, 1964 NFSX_FH(v3)*2 + NFSX_UNSIGNED + nfsm_rndup(cnp->cn_namelen)); 1965 mb = mreq; 1966 bpos = mtod(mb, caddr_t); 1967 nfsm_fhtom(vp, v3); 1968 nfsm_fhtom(tdvp, v3); 1969 nfsm_strtom(cnp->cn_nameptr, cnp->cn_namelen, NFS_MAXNAMLEN); 1970 nfsm_request(vp, NFSPROC_LINK, cnp->cn_thread, cnp->cn_cred); 1971 if (v3) { 1972 nfsm_postop_attr(vp, attrflag); 1973 nfsm_wcc_data(tdvp, wccflag); 1974 } 1975 m_freem(mrep); 1976nfsmout: 1977 mtx_lock(&(VTONFS(tdvp))->n_mtx); 1978 VTONFS(tdvp)->n_flag |= NMODIFIED; 1979 mtx_unlock(&(VTONFS(tdvp))->n_mtx); 1980 if (!attrflag) { 1981 VTONFS(vp)->n_attrstamp = 0; 1982 KDTRACE_NFS_ATTRCACHE_FLUSH_DONE(vp); 1983 } 1984 if (!wccflag) { 1985 VTONFS(tdvp)->n_attrstamp = 0; 1986 KDTRACE_NFS_ATTRCACHE_FLUSH_DONE(tdvp); 1987 } 1988 return (error); 1989} 1990 1991/* 1992 * nfs symbolic link create call 1993 */ 1994static int 1995nfs_symlink(struct vop_symlink_args *ap) 1996{ 1997 struct vnode *dvp = ap->a_dvp; 1998 struct vattr *vap = ap->a_vap; 1999 struct componentname *cnp = ap->a_cnp; 2000 struct nfsv2_sattr *sp; 2001 caddr_t bpos, dpos; 2002 int slen, error = 0, wccflag = NFSV3_WCCRATTR, gotvp; 2003 struct mbuf *mreq, *mrep, *md, *mb; 2004 struct vnode *newvp = NULL; 2005 int v3 = NFS_ISV3(dvp); 2006 2007 nfsstats.rpccnt[NFSPROC_SYMLINK]++; 2008 slen = strlen(ap->a_target); 2009 mreq = nfsm_reqhead(dvp, NFSPROC_SYMLINK, NFSX_FH(v3) + 2*NFSX_UNSIGNED + 2010 nfsm_rndup(cnp->cn_namelen) + nfsm_rndup(slen) + NFSX_SATTR(v3)); 2011 mb = mreq; 2012 bpos = mtod(mb, caddr_t); 2013 nfsm_fhtom(dvp, v3); 2014 nfsm_strtom(cnp->cn_nameptr, cnp->cn_namelen, NFS_MAXNAMLEN); 2015 if (v3) { 2016 nfsm_v3attrbuild(vap, FALSE); 2017 } 2018 nfsm_strtom(ap->a_target, slen, NFS_MAXPATHLEN); 2019 if (!v3) { 2020 sp = nfsm_build(struct nfsv2_sattr *, NFSX_V2SATTR); 2021 sp->sa_mode = vtonfsv2_mode(VLNK, vap->va_mode); 2022 sp->sa_uid = nfs_xdrneg1; 2023 sp->sa_gid = nfs_xdrneg1; 2024 sp->sa_size = nfs_xdrneg1; 2025 txdr_nfsv2time(&vap->va_atime, &sp->sa_atime); 2026 txdr_nfsv2time(&vap->va_mtime, &sp->sa_mtime); 2027 } 2028 2029 /* 2030 * Issue the NFS request and get the rpc response. 2031 * 2032 * Only NFSv3 responses returning an error of 0 actually return 2033 * a file handle that can be converted into newvp without having 2034 * to do an extra lookup rpc. 2035 */ 2036 nfsm_request(dvp, NFSPROC_SYMLINK, cnp->cn_thread, cnp->cn_cred); 2037 if (v3) { 2038 if (error == 0) 2039 nfsm_mtofh(dvp, newvp, v3, gotvp); 2040 nfsm_wcc_data(dvp, wccflag); 2041 } 2042 2043 /* 2044 * out code jumps -> here, mrep is also freed. 2045 */ 2046 2047 m_freem(mrep); 2048nfsmout: 2049 2050 /* 2051 * If we do not have an error and we could not extract the newvp from 2052 * the response due to the request being NFSv2, we have to do a 2053 * lookup in order to obtain a newvp to return. 2054 */ 2055 if (error == 0 && newvp == NULL) { 2056 struct nfsnode *np = NULL; 2057 2058 error = nfs_lookitup(dvp, cnp->cn_nameptr, cnp->cn_namelen, 2059 cnp->cn_cred, cnp->cn_thread, &np); 2060 if (!error) 2061 newvp = NFSTOV(np); 2062 } 2063 if (error) { 2064 if (newvp) 2065 vput(newvp); 2066 } else { 2067 *ap->a_vpp = newvp; 2068 } 2069 mtx_lock(&(VTONFS(dvp))->n_mtx); 2070 VTONFS(dvp)->n_flag |= NMODIFIED; 2071 mtx_unlock(&(VTONFS(dvp))->n_mtx); 2072 if (!wccflag) { 2073 VTONFS(dvp)->n_attrstamp = 0; 2074 KDTRACE_NFS_ATTRCACHE_FLUSH_DONE(dvp); 2075 } 2076 return (error); 2077} 2078 2079/* 2080 * nfs make dir call 2081 */ 2082static int 2083nfs_mkdir(struct vop_mkdir_args *ap) 2084{ 2085 struct vnode *dvp = ap->a_dvp; 2086 struct vattr *vap = ap->a_vap; 2087 struct componentname *cnp = ap->a_cnp; 2088 struct nfsv2_sattr *sp; 2089 int len; 2090 struct nfsnode *np = NULL; 2091 struct vnode *newvp = NULL; 2092 caddr_t bpos, dpos; 2093 int error = 0, wccflag = NFSV3_WCCRATTR; 2094 int gotvp = 0; 2095 struct mbuf *mreq, *mrep, *md, *mb; 2096 struct vattr vattr; 2097 int v3 = NFS_ISV3(dvp); 2098 2099 if ((error = VOP_GETATTR(dvp, &vattr, cnp->cn_cred)) != 0) 2100 return (error); 2101 len = cnp->cn_namelen; 2102 nfsstats.rpccnt[NFSPROC_MKDIR]++; 2103 mreq = nfsm_reqhead(dvp, NFSPROC_MKDIR, 2104 NFSX_FH(v3) + NFSX_UNSIGNED + nfsm_rndup(len) + NFSX_SATTR(v3)); 2105 mb = mreq; 2106 bpos = mtod(mb, caddr_t); 2107 nfsm_fhtom(dvp, v3); 2108 nfsm_strtom(cnp->cn_nameptr, len, NFS_MAXNAMLEN); 2109 if (v3) { 2110 nfsm_v3attrbuild(vap, FALSE); 2111 } else { 2112 sp = nfsm_build(struct nfsv2_sattr *, NFSX_V2SATTR); 2113 sp->sa_mode = vtonfsv2_mode(VDIR, vap->va_mode); 2114 sp->sa_uid = nfs_xdrneg1; 2115 sp->sa_gid = nfs_xdrneg1; 2116 sp->sa_size = nfs_xdrneg1; 2117 txdr_nfsv2time(&vap->va_atime, &sp->sa_atime); 2118 txdr_nfsv2time(&vap->va_mtime, &sp->sa_mtime); 2119 } 2120 nfsm_request(dvp, NFSPROC_MKDIR, cnp->cn_thread, cnp->cn_cred); 2121 if (!error) 2122 nfsm_mtofh(dvp, newvp, v3, gotvp); 2123 if (v3) 2124 nfsm_wcc_data(dvp, wccflag); 2125 m_freem(mrep); 2126nfsmout: 2127 mtx_lock(&(VTONFS(dvp))->n_mtx); 2128 VTONFS(dvp)->n_flag |= NMODIFIED; 2129 mtx_unlock(&(VTONFS(dvp))->n_mtx); 2130 if (!wccflag) { 2131 VTONFS(dvp)->n_attrstamp = 0; 2132 KDTRACE_NFS_ATTRCACHE_FLUSH_DONE(dvp); 2133 } 2134 if (error == 0 && newvp == NULL) { 2135 error = nfs_lookitup(dvp, cnp->cn_nameptr, len, cnp->cn_cred, 2136 cnp->cn_thread, &np); 2137 if (!error) { 2138 newvp = NFSTOV(np); 2139 if (newvp->v_type != VDIR) 2140 error = EEXIST; 2141 } 2142 } 2143 if (error) { 2144 if (newvp) 2145 vput(newvp); 2146 } else 2147 *ap->a_vpp = newvp; 2148 return (error); 2149} 2150 2151/* 2152 * nfs remove directory call 2153 */ 2154static int 2155nfs_rmdir(struct vop_rmdir_args *ap) 2156{ 2157 struct vnode *vp = ap->a_vp; 2158 struct vnode *dvp = ap->a_dvp; 2159 struct componentname *cnp = ap->a_cnp; 2160 caddr_t bpos, dpos; 2161 int error = 0, wccflag = NFSV3_WCCRATTR; 2162 struct mbuf *mreq, *mrep, *md, *mb; 2163 int v3 = NFS_ISV3(dvp); 2164 2165 if (dvp == vp) 2166 return (EINVAL); 2167 nfsstats.rpccnt[NFSPROC_RMDIR]++; 2168 mreq = nfsm_reqhead(dvp, NFSPROC_RMDIR, 2169 NFSX_FH(v3) + NFSX_UNSIGNED + nfsm_rndup(cnp->cn_namelen)); 2170 mb = mreq; 2171 bpos = mtod(mb, caddr_t); 2172 nfsm_fhtom(dvp, v3); 2173 nfsm_strtom(cnp->cn_nameptr, cnp->cn_namelen, NFS_MAXNAMLEN); 2174 nfsm_request(dvp, NFSPROC_RMDIR, cnp->cn_thread, cnp->cn_cred); 2175 if (v3) 2176 nfsm_wcc_data(dvp, wccflag); 2177 m_freem(mrep); 2178nfsmout: 2179 mtx_lock(&(VTONFS(dvp))->n_mtx); 2180 VTONFS(dvp)->n_flag |= NMODIFIED; 2181 mtx_unlock(&(VTONFS(dvp))->n_mtx); 2182 if (!wccflag) { 2183 VTONFS(dvp)->n_attrstamp = 0; 2184 KDTRACE_NFS_ATTRCACHE_FLUSH_DONE(dvp); 2185 } 2186 cache_purge(dvp); 2187 cache_purge(vp); 2188 /* 2189 * Kludge: Map ENOENT => 0 assuming that you have a reply to a retry. 2190 */ 2191 if (error == ENOENT) 2192 error = 0; 2193 return (error); 2194} 2195 2196/* 2197 * nfs readdir call 2198 */ 2199static int 2200nfs_readdir(struct vop_readdir_args *ap) 2201{ 2202 struct vnode *vp = ap->a_vp; 2203 struct nfsnode *np = VTONFS(vp); 2204 struct uio *uio = ap->a_uio; 2205 int tresid, error = 0; 2206 struct vattr vattr; 2207 2208 if (vp->v_type != VDIR) 2209 return(EPERM); 2210 2211 /* 2212 * First, check for hit on the EOF offset cache 2213 */ 2214 if (np->n_direofoffset > 0 && uio->uio_offset >= np->n_direofoffset && 2215 (np->n_flag & NMODIFIED) == 0) { 2216 if (VOP_GETATTR(vp, &vattr, ap->a_cred) == 0) { 2217 mtx_lock(&np->n_mtx); 2218 if (!NFS_TIMESPEC_COMPARE(&np->n_mtime, &vattr.va_mtime)) { 2219 mtx_unlock(&np->n_mtx); 2220 nfsstats.direofcache_hits++; 2221 goto out; 2222 } else 2223 mtx_unlock(&np->n_mtx); 2224 } 2225 } 2226 2227 /* 2228 * Call nfs_bioread() to do the real work. 2229 */ 2230 tresid = uio->uio_resid; 2231 error = nfs_bioread(vp, uio, 0, ap->a_cred); 2232 2233 if (!error && uio->uio_resid == tresid) { 2234 nfsstats.direofcache_misses++; 2235 } 2236out: 2237 return (error); 2238} 2239 2240/* 2241 * Readdir rpc call. 2242 * Called from below the buffer cache by nfs_doio(). 2243 */ 2244int 2245nfs_readdirrpc(struct vnode *vp, struct uio *uiop, struct ucred *cred) 2246{ 2247 int len, left; 2248 struct dirent *dp = NULL; 2249 u_int32_t *tl; 2250 caddr_t cp; 2251 nfsuint64 *cookiep; 2252 caddr_t bpos, dpos; 2253 struct mbuf *mreq, *mrep, *md, *mb; 2254 nfsuint64 cookie; 2255 struct nfsmount *nmp = VFSTONFS(vp->v_mount); 2256 struct nfsnode *dnp = VTONFS(vp); 2257 u_quad_t fileno; 2258 int error = 0, tlen, more_dirs = 1, blksiz = 0, bigenough = 1; 2259 int attrflag; 2260 int v3 = NFS_ISV3(vp); 2261 2262#ifndef DIAGNOSTIC 2263 if (uiop->uio_iovcnt != 1 || (uiop->uio_offset & (DIRBLKSIZ - 1)) || 2264 (uiop->uio_resid & (DIRBLKSIZ - 1))) 2265 panic("nfs readdirrpc bad uio"); 2266#endif 2267 2268 /* 2269 * If there is no cookie, assume directory was stale. 2270 */ 2271 nfs_dircookie_lock(dnp); 2272 cookiep = nfs_getcookie(dnp, uiop->uio_offset, 0); 2273 if (cookiep) { 2274 cookie = *cookiep; 2275 nfs_dircookie_unlock(dnp); 2276 } else { 2277 nfs_dircookie_unlock(dnp); 2278 return (NFSERR_BAD_COOKIE); 2279 } 2280 2281 /* 2282 * Loop around doing readdir rpc's of size nm_readdirsize 2283 * truncated to a multiple of DIRBLKSIZ. 2284 * The stopping criteria is EOF or buffer full. 2285 */ 2286 while (more_dirs && bigenough) { 2287 nfsstats.rpccnt[NFSPROC_READDIR]++; 2288 mreq = nfsm_reqhead(vp, NFSPROC_READDIR, NFSX_FH(v3) + 2289 NFSX_READDIR(v3)); 2290 mb = mreq; 2291 bpos = mtod(mb, caddr_t); 2292 nfsm_fhtom(vp, v3); 2293 if (v3) { 2294 tl = nfsm_build(u_int32_t *, 5 * NFSX_UNSIGNED); 2295 *tl++ = cookie.nfsuquad[0]; 2296 *tl++ = cookie.nfsuquad[1]; 2297 mtx_lock(&dnp->n_mtx); 2298 *tl++ = dnp->n_cookieverf.nfsuquad[0]; 2299 *tl++ = dnp->n_cookieverf.nfsuquad[1]; 2300 mtx_unlock(&dnp->n_mtx); 2301 } else { 2302 tl = nfsm_build(u_int32_t *, 2 * NFSX_UNSIGNED); 2303 *tl++ = cookie.nfsuquad[0]; 2304 } 2305 *tl = txdr_unsigned(nmp->nm_readdirsize); 2306 nfsm_request(vp, NFSPROC_READDIR, uiop->uio_td, cred); 2307 if (v3) { 2308 nfsm_postop_attr(vp, attrflag); 2309 if (!error) { 2310 tl = nfsm_dissect(u_int32_t *, 2311 2 * NFSX_UNSIGNED); 2312 mtx_lock(&dnp->n_mtx); 2313 dnp->n_cookieverf.nfsuquad[0] = *tl++; 2314 dnp->n_cookieverf.nfsuquad[1] = *tl; 2315 mtx_unlock(&dnp->n_mtx); 2316 } else { 2317 m_freem(mrep); 2318 goto nfsmout; 2319 } 2320 } 2321 tl = nfsm_dissect(u_int32_t *, NFSX_UNSIGNED); 2322 more_dirs = fxdr_unsigned(int, *tl); 2323 2324 /* loop thru the dir entries, doctoring them to 4bsd form */ 2325 while (more_dirs && bigenough) { 2326 if (v3) { 2327 tl = nfsm_dissect(u_int32_t *, 2328 3 * NFSX_UNSIGNED); 2329 fileno = fxdr_hyper(tl); 2330 len = fxdr_unsigned(int, *(tl + 2)); 2331 } else { 2332 tl = nfsm_dissect(u_int32_t *, 2333 2 * NFSX_UNSIGNED); 2334 fileno = fxdr_unsigned(u_quad_t, *tl++); 2335 len = fxdr_unsigned(int, *tl); 2336 } 2337 if (len <= 0 || len > NFS_MAXNAMLEN) { 2338 error = EBADRPC; 2339 m_freem(mrep); 2340 goto nfsmout; 2341 } 2342 tlen = nfsm_rndup(len); 2343 if (tlen == len) 2344 tlen += 4; /* To ensure null termination */ 2345 left = DIRBLKSIZ - blksiz; 2346 if ((tlen + DIRHDSIZ) > left) { 2347 dp->d_reclen += left; 2348 uiop->uio_iov->iov_base = 2349 (char *)uiop->uio_iov->iov_base + left; 2350 uiop->uio_iov->iov_len -= left; 2351 uiop->uio_offset += left; 2352 uiop->uio_resid -= left; 2353 blksiz = 0; 2354 } 2355 if ((tlen + DIRHDSIZ) > uiop->uio_resid) 2356 bigenough = 0; 2357 if (bigenough) { 2358 dp = (struct dirent *)uiop->uio_iov->iov_base; 2359 dp->d_fileno = (int)fileno; 2360 dp->d_namlen = len; 2361 dp->d_reclen = tlen + DIRHDSIZ; 2362 dp->d_type = DT_UNKNOWN; 2363 blksiz += dp->d_reclen; 2364 if (blksiz == DIRBLKSIZ) 2365 blksiz = 0; 2366 uiop->uio_offset += DIRHDSIZ; 2367 uiop->uio_resid -= DIRHDSIZ; 2368 uiop->uio_iov->iov_base = 2369 (char *)uiop->uio_iov->iov_base + DIRHDSIZ; 2370 uiop->uio_iov->iov_len -= DIRHDSIZ; 2371 nfsm_mtouio(uiop, len); 2372 cp = uiop->uio_iov->iov_base; 2373 tlen -= len; 2374 *cp = '\0'; /* null terminate */ 2375 uiop->uio_iov->iov_base = 2376 (char *)uiop->uio_iov->iov_base + tlen; 2377 uiop->uio_iov->iov_len -= tlen; 2378 uiop->uio_offset += tlen; 2379 uiop->uio_resid -= tlen; 2380 } else 2381 nfsm_adv(nfsm_rndup(len)); 2382 if (v3) { 2383 tl = nfsm_dissect(u_int32_t *, 2384 3 * NFSX_UNSIGNED); 2385 } else { 2386 tl = nfsm_dissect(u_int32_t *, 2387 2 * NFSX_UNSIGNED); 2388 } 2389 if (bigenough) { 2390 cookie.nfsuquad[0] = *tl++; 2391 if (v3) 2392 cookie.nfsuquad[1] = *tl++; 2393 } else if (v3) 2394 tl += 2; 2395 else 2396 tl++; 2397 more_dirs = fxdr_unsigned(int, *tl); 2398 } 2399 /* 2400 * If at end of rpc data, get the eof boolean 2401 */ 2402 if (!more_dirs) { 2403 tl = nfsm_dissect(u_int32_t *, NFSX_UNSIGNED); 2404 more_dirs = (fxdr_unsigned(int, *tl) == 0); 2405 } 2406 m_freem(mrep); 2407 } 2408 /* 2409 * Fill last record, iff any, out to a multiple of DIRBLKSIZ 2410 * by increasing d_reclen for the last record. 2411 */ 2412 if (blksiz > 0) { 2413 left = DIRBLKSIZ - blksiz; 2414 dp->d_reclen += left; 2415 uiop->uio_iov->iov_base = 2416 (char *)uiop->uio_iov->iov_base + left; 2417 uiop->uio_iov->iov_len -= left; 2418 uiop->uio_offset += left; 2419 uiop->uio_resid -= left; 2420 } 2421 2422 /* 2423 * We are now either at the end of the directory or have filled the 2424 * block. 2425 */ 2426 if (bigenough) 2427 dnp->n_direofoffset = uiop->uio_offset; 2428 else { 2429 if (uiop->uio_resid > 0) 2430 nfs_printf("EEK! readdirrpc resid > 0\n"); 2431 nfs_dircookie_lock(dnp); 2432 cookiep = nfs_getcookie(dnp, uiop->uio_offset, 1); 2433 *cookiep = cookie; 2434 nfs_dircookie_unlock(dnp); 2435 } 2436nfsmout: 2437 return (error); 2438} 2439 2440/* 2441 * NFS V3 readdir plus RPC. Used in place of nfs_readdirrpc(). 2442 */ 2443int 2444nfs_readdirplusrpc(struct vnode *vp, struct uio *uiop, struct ucred *cred) 2445{ 2446 int len, left; 2447 struct dirent *dp; 2448 u_int32_t *tl; 2449 caddr_t cp; 2450 struct vnode *newvp; 2451 nfsuint64 *cookiep; 2452 caddr_t bpos, dpos, dpossav1, dpossav2; 2453 struct mbuf *mreq, *mrep, *md, *mb, *mdsav1, *mdsav2; 2454 struct nameidata nami, *ndp = &nami; 2455 struct componentname *cnp = &ndp->ni_cnd; 2456 nfsuint64 cookie; 2457 struct nfsmount *nmp = VFSTONFS(vp->v_mount); 2458 struct nfsnode *dnp = VTONFS(vp), *np; 2459 nfsfh_t *fhp; 2460 u_quad_t fileno; 2461 int error = 0, tlen, more_dirs = 1, blksiz = 0, doit, bigenough = 1, i; 2462 int attrflag, fhsize; 2463 2464#ifndef nolint 2465 dp = NULL; 2466#endif 2467#ifndef DIAGNOSTIC 2468 if (uiop->uio_iovcnt != 1 || (uiop->uio_offset & (DIRBLKSIZ - 1)) || 2469 (uiop->uio_resid & (DIRBLKSIZ - 1))) 2470 panic("nfs readdirplusrpc bad uio"); 2471#endif 2472 ndp->ni_dvp = vp; 2473 newvp = NULLVP; 2474 2475 /* 2476 * If there is no cookie, assume directory was stale. 2477 */ 2478 nfs_dircookie_lock(dnp); 2479 cookiep = nfs_getcookie(dnp, uiop->uio_offset, 0); 2480 if (cookiep) { 2481 cookie = *cookiep; 2482 nfs_dircookie_unlock(dnp); 2483 } else { 2484 nfs_dircookie_unlock(dnp); 2485 return (NFSERR_BAD_COOKIE); 2486 } 2487 /* 2488 * Loop around doing readdir rpc's of size nm_readdirsize 2489 * truncated to a multiple of DIRBLKSIZ. 2490 * The stopping criteria is EOF or buffer full. 2491 */ 2492 while (more_dirs && bigenough) { 2493 nfsstats.rpccnt[NFSPROC_READDIRPLUS]++; 2494 mreq = nfsm_reqhead(vp, NFSPROC_READDIRPLUS, 2495 NFSX_FH(1) + 6 * NFSX_UNSIGNED); 2496 mb = mreq; 2497 bpos = mtod(mb, caddr_t); 2498 nfsm_fhtom(vp, 1); 2499 tl = nfsm_build(u_int32_t *, 6 * NFSX_UNSIGNED); 2500 *tl++ = cookie.nfsuquad[0]; 2501 *tl++ = cookie.nfsuquad[1]; 2502 mtx_lock(&dnp->n_mtx); 2503 *tl++ = dnp->n_cookieverf.nfsuquad[0]; 2504 *tl++ = dnp->n_cookieverf.nfsuquad[1]; 2505 mtx_unlock(&dnp->n_mtx); 2506 *tl++ = txdr_unsigned(nmp->nm_readdirsize); 2507 *tl = txdr_unsigned(nmp->nm_rsize); 2508 nfsm_request(vp, NFSPROC_READDIRPLUS, uiop->uio_td, cred); 2509 nfsm_postop_attr(vp, attrflag); 2510 if (error) { 2511 m_freem(mrep); 2512 goto nfsmout; 2513 } 2514 tl = nfsm_dissect(u_int32_t *, 3 * NFSX_UNSIGNED); 2515 mtx_lock(&dnp->n_mtx); 2516 dnp->n_cookieverf.nfsuquad[0] = *tl++; 2517 dnp->n_cookieverf.nfsuquad[1] = *tl++; 2518 mtx_unlock(&dnp->n_mtx); 2519 more_dirs = fxdr_unsigned(int, *tl); 2520 2521 /* loop thru the dir entries, doctoring them to 4bsd form */ 2522 while (more_dirs && bigenough) { 2523 tl = nfsm_dissect(u_int32_t *, 3 * NFSX_UNSIGNED); 2524 fileno = fxdr_hyper(tl); 2525 len = fxdr_unsigned(int, *(tl + 2)); 2526 if (len <= 0 || len > NFS_MAXNAMLEN) { 2527 error = EBADRPC; 2528 m_freem(mrep); 2529 goto nfsmout; 2530 } 2531 tlen = nfsm_rndup(len); 2532 if (tlen == len) 2533 tlen += 4; /* To ensure null termination*/ 2534 left = DIRBLKSIZ - blksiz; 2535 if ((tlen + DIRHDSIZ) > left) { 2536 dp->d_reclen += left; 2537 uiop->uio_iov->iov_base = 2538 (char *)uiop->uio_iov->iov_base + left; 2539 uiop->uio_iov->iov_len -= left; 2540 uiop->uio_offset += left; 2541 uiop->uio_resid -= left; 2542 blksiz = 0; 2543 } 2544 if ((tlen + DIRHDSIZ) > uiop->uio_resid) 2545 bigenough = 0; 2546 if (bigenough) { 2547 dp = (struct dirent *)uiop->uio_iov->iov_base; 2548 dp->d_fileno = (int)fileno; 2549 dp->d_namlen = len; 2550 dp->d_reclen = tlen + DIRHDSIZ; 2551 dp->d_type = DT_UNKNOWN; 2552 blksiz += dp->d_reclen; 2553 if (blksiz == DIRBLKSIZ) 2554 blksiz = 0; 2555 uiop->uio_offset += DIRHDSIZ; 2556 uiop->uio_resid -= DIRHDSIZ; 2557 uiop->uio_iov->iov_base = 2558 (char *)uiop->uio_iov->iov_base + DIRHDSIZ; 2559 uiop->uio_iov->iov_len -= DIRHDSIZ; 2560 cnp->cn_nameptr = uiop->uio_iov->iov_base; 2561 cnp->cn_namelen = len; 2562 nfsm_mtouio(uiop, len); 2563 cp = uiop->uio_iov->iov_base; 2564 tlen -= len; 2565 *cp = '\0'; 2566 uiop->uio_iov->iov_base = 2567 (char *)uiop->uio_iov->iov_base + tlen; 2568 uiop->uio_iov->iov_len -= tlen; 2569 uiop->uio_offset += tlen; 2570 uiop->uio_resid -= tlen; 2571 } else 2572 nfsm_adv(nfsm_rndup(len)); 2573 tl = nfsm_dissect(u_int32_t *, 3 * NFSX_UNSIGNED); 2574 if (bigenough) { 2575 cookie.nfsuquad[0] = *tl++; 2576 cookie.nfsuquad[1] = *tl++; 2577 } else 2578 tl += 2; 2579 2580 /* 2581 * Since the attributes are before the file handle 2582 * (sigh), we must skip over the attributes and then 2583 * come back and get them. 2584 */ 2585 attrflag = fxdr_unsigned(int, *tl); 2586 if (attrflag) { 2587 dpossav1 = dpos; 2588 mdsav1 = md; 2589 nfsm_adv(NFSX_V3FATTR); 2590 tl = nfsm_dissect(u_int32_t *, NFSX_UNSIGNED); 2591 doit = fxdr_unsigned(int, *tl); 2592 /* 2593 * Skip loading the attrs for "..". There's a 2594 * race between loading the attrs here and 2595 * lookups that look for the directory currently 2596 * being read (in the parent). We try to acquire 2597 * the exclusive lock on ".." here, owning the 2598 * lock on the directory being read. Lookup will 2599 * hold the lock on ".." and try to acquire the 2600 * lock on the directory being read. 2601 * 2602 * There are other ways of fixing this, one would 2603 * be to do a trylock on the ".." vnode and skip 2604 * loading the attrs on ".." if it happens to be 2605 * locked by another process. But skipping the 2606 * attrload on ".." seems the easiest option. 2607 */ 2608 if (strcmp(dp->d_name, "..") == 0) { 2609 doit = 0; 2610 /* 2611 * We've already skipped over the attrs, 2612 * skip over the filehandle. And store d_type 2613 * as VDIR. 2614 */ 2615 tl = nfsm_dissect(u_int32_t *, NFSX_UNSIGNED); 2616 i = fxdr_unsigned(int, *tl); 2617 nfsm_adv(nfsm_rndup(i)); 2618 dp->d_type = IFTODT(VTTOIF(VDIR)); 2619 } 2620 if (doit) { 2621 nfsm_getfh(fhp, fhsize, 1); 2622 if (NFS_CMPFH(dnp, fhp, fhsize)) { 2623 VREF(vp); 2624 newvp = vp; 2625 np = dnp; 2626 } else { 2627 error = nfs_nget(vp->v_mount, fhp, 2628 fhsize, &np, LK_EXCLUSIVE); 2629 if (error) 2630 doit = 0; 2631 else 2632 newvp = NFSTOV(np); 2633 } 2634 } 2635 if (doit && bigenough) { 2636 dpossav2 = dpos; 2637 dpos = dpossav1; 2638 mdsav2 = md; 2639 md = mdsav1; 2640 nfsm_loadattr(newvp, NULL); 2641 dpos = dpossav2; 2642 md = mdsav2; 2643 dp->d_type = 2644 IFTODT(VTTOIF(np->n_vattr.va_type)); 2645 ndp->ni_vp = newvp; 2646 /* Update n_ctime, so subsequent lookup doesn't purge entry */ 2647 np->n_ctime = np->n_vattr.va_ctime.tv_sec; 2648 cache_enter(ndp->ni_dvp, ndp->ni_vp, cnp); 2649 } 2650 } else { 2651 /* Just skip over the file handle */ 2652 tl = nfsm_dissect(u_int32_t *, NFSX_UNSIGNED); 2653 i = fxdr_unsigned(int, *tl); 2654 if (i) { 2655 tl = nfsm_dissect(u_int32_t *, NFSX_UNSIGNED); 2656 fhsize = fxdr_unsigned(int, *tl); 2657 nfsm_adv(nfsm_rndup(fhsize)); 2658 } 2659 } 2660 if (newvp != NULLVP) { 2661 if (newvp == vp) 2662 vrele(newvp); 2663 else 2664 vput(newvp); 2665 newvp = NULLVP; 2666 } 2667 tl = nfsm_dissect(u_int32_t *, NFSX_UNSIGNED); 2668 more_dirs = fxdr_unsigned(int, *tl); 2669 } 2670 /* 2671 * If at end of rpc data, get the eof boolean 2672 */ 2673 if (!more_dirs) { 2674 tl = nfsm_dissect(u_int32_t *, NFSX_UNSIGNED); 2675 more_dirs = (fxdr_unsigned(int, *tl) == 0); 2676 } 2677 m_freem(mrep); 2678 } 2679 /* 2680 * Fill last record, iff any, out to a multiple of DIRBLKSIZ 2681 * by increasing d_reclen for the last record. 2682 */ 2683 if (blksiz > 0) { 2684 left = DIRBLKSIZ - blksiz; 2685 dp->d_reclen += left; 2686 uiop->uio_iov->iov_base = 2687 (char *)uiop->uio_iov->iov_base + left; 2688 uiop->uio_iov->iov_len -= left; 2689 uiop->uio_offset += left; 2690 uiop->uio_resid -= left; 2691 } 2692 2693 /* 2694 * We are now either at the end of the directory or have filled the 2695 * block. 2696 */ 2697 if (bigenough) 2698 dnp->n_direofoffset = uiop->uio_offset; 2699 else { 2700 if (uiop->uio_resid > 0) 2701 nfs_printf("EEK! readdirplusrpc resid > 0\n"); 2702 nfs_dircookie_lock(dnp); 2703 cookiep = nfs_getcookie(dnp, uiop->uio_offset, 1); 2704 *cookiep = cookie; 2705 nfs_dircookie_unlock(dnp); 2706 } 2707nfsmout: 2708 if (newvp != NULLVP) { 2709 if (newvp == vp) 2710 vrele(newvp); 2711 else 2712 vput(newvp); 2713 newvp = NULLVP; 2714 } 2715 return (error); 2716} 2717 2718/* 2719 * Silly rename. To make the NFS filesystem that is stateless look a little 2720 * more like the "ufs" a remove of an active vnode is translated to a rename 2721 * to a funny looking filename that is removed by nfs_inactive on the 2722 * nfsnode. There is the potential for another process on a different client 2723 * to create the same funny name between the nfs_lookitup() fails and the 2724 * nfs_rename() completes, but... 2725 */ 2726static int 2727nfs_sillyrename(struct vnode *dvp, struct vnode *vp, struct componentname *cnp) 2728{ 2729 struct sillyrename *sp; 2730 struct nfsnode *np; 2731 int error; 2732 short pid; 2733 unsigned int lticks; 2734 2735 cache_purge(dvp); 2736 np = VTONFS(vp); 2737#ifndef DIAGNOSTIC 2738 if (vp->v_type == VDIR) 2739 panic("nfs: sillyrename dir"); 2740#endif 2741 sp = malloc(sizeof (struct sillyrename), 2742 M_NFSREQ, M_WAITOK); 2743 sp->s_cred = crhold(cnp->cn_cred); 2744 sp->s_dvp = dvp; 2745 sp->s_removeit = nfs_removeit; 2746 VREF(dvp); 2747 2748 /* 2749 * Fudge together a funny name. 2750 * Changing the format of the funny name to accomodate more 2751 * sillynames per directory. 2752 * The name is now changed to .nfs.<ticks>.<pid>.4, where ticks is 2753 * CPU ticks since boot. 2754 */ 2755 pid = cnp->cn_thread->td_proc->p_pid; 2756 lticks = (unsigned int)ticks; 2757 for ( ; ; ) { 2758 sp->s_namlen = sprintf(sp->s_name, 2759 ".nfs.%08x.%04x4.4", lticks, 2760 pid); 2761 if (nfs_lookitup(dvp, sp->s_name, sp->s_namlen, sp->s_cred, 2762 cnp->cn_thread, NULL)) 2763 break; 2764 lticks++; 2765 } 2766 error = nfs_renameit(dvp, cnp, sp); 2767 if (error) 2768 goto bad; 2769 error = nfs_lookitup(dvp, sp->s_name, sp->s_namlen, sp->s_cred, 2770 cnp->cn_thread, &np); 2771 np->n_sillyrename = sp; 2772 return (0); 2773bad: 2774 vrele(sp->s_dvp); 2775 crfree(sp->s_cred); 2776 free((caddr_t)sp, M_NFSREQ); 2777 return (error); 2778} 2779 2780/* 2781 * Look up a file name and optionally either update the file handle or 2782 * allocate an nfsnode, depending on the value of npp. 2783 * npp == NULL --> just do the lookup 2784 * *npp == NULL --> allocate a new nfsnode and make sure attributes are 2785 * handled too 2786 * *npp != NULL --> update the file handle in the vnode 2787 */ 2788static int 2789nfs_lookitup(struct vnode *dvp, const char *name, int len, struct ucred *cred, 2790 struct thread *td, struct nfsnode **npp) 2791{ 2792 struct vnode *newvp = NULL; 2793 struct nfsnode *np, *dnp = VTONFS(dvp); 2794 caddr_t bpos, dpos; 2795 int error = 0, fhlen, attrflag; 2796 struct mbuf *mreq, *mrep, *md, *mb; 2797 nfsfh_t *nfhp; 2798 int v3 = NFS_ISV3(dvp); 2799 2800 nfsstats.rpccnt[NFSPROC_LOOKUP]++; 2801 mreq = nfsm_reqhead(dvp, NFSPROC_LOOKUP, 2802 NFSX_FH(v3) + NFSX_UNSIGNED + nfsm_rndup(len)); 2803 mb = mreq; 2804 bpos = mtod(mb, caddr_t); 2805 nfsm_fhtom(dvp, v3); 2806 nfsm_strtom(name, len, NFS_MAXNAMLEN); 2807 nfsm_request(dvp, NFSPROC_LOOKUP, td, cred); 2808 if (npp && !error) { 2809 nfsm_getfh(nfhp, fhlen, v3); 2810 if (*npp) { 2811 np = *npp; 2812 if (np->n_fhsize > NFS_SMALLFH && fhlen <= NFS_SMALLFH) { 2813 free((caddr_t)np->n_fhp, M_NFSBIGFH); 2814 np->n_fhp = &np->n_fh; 2815 } else if (np->n_fhsize <= NFS_SMALLFH && fhlen>NFS_SMALLFH) 2816 np->n_fhp =(nfsfh_t *)malloc(fhlen, M_NFSBIGFH, M_WAITOK); 2817 bcopy((caddr_t)nfhp, (caddr_t)np->n_fhp, fhlen); 2818 np->n_fhsize = fhlen; 2819 newvp = NFSTOV(np); 2820 } else if (NFS_CMPFH(dnp, nfhp, fhlen)) { 2821 VREF(dvp); 2822 newvp = dvp; 2823 } else { 2824 error = nfs_nget(dvp->v_mount, nfhp, fhlen, &np, LK_EXCLUSIVE); 2825 if (error) { 2826 m_freem(mrep); 2827 return (error); 2828 } 2829 newvp = NFSTOV(np); 2830 } 2831 if (v3) { 2832 nfsm_postop_attr(newvp, attrflag); 2833 if (!attrflag && *npp == NULL) { 2834 m_freem(mrep); 2835 if (newvp == dvp) 2836 vrele(newvp); 2837 else 2838 vput(newvp); 2839 return (ENOENT); 2840 } 2841 } else 2842 nfsm_loadattr(newvp, NULL); 2843 } 2844 m_freem(mrep); 2845nfsmout: 2846 if (npp && *npp == NULL) { 2847 if (error) { 2848 if (newvp) { 2849 if (newvp == dvp) 2850 vrele(newvp); 2851 else 2852 vput(newvp); 2853 } 2854 } else 2855 *npp = np; 2856 } 2857 return (error); 2858} 2859 2860/* 2861 * Nfs Version 3 commit rpc 2862 */ 2863int 2864nfs_commit(struct vnode *vp, u_quad_t offset, int cnt, struct ucred *cred, 2865 struct thread *td) 2866{ 2867 u_int32_t *tl; 2868 struct nfsmount *nmp = VFSTONFS(vp->v_mount); 2869 caddr_t bpos, dpos; 2870 int error = 0, wccflag = NFSV3_WCCRATTR; 2871 struct mbuf *mreq, *mrep, *md, *mb; 2872 2873 mtx_lock(&nmp->nm_mtx); 2874 if ((nmp->nm_state & NFSSTA_HASWRITEVERF) == 0) { 2875 mtx_unlock(&nmp->nm_mtx); 2876 return (0); 2877 } 2878 mtx_unlock(&nmp->nm_mtx); 2879 nfsstats.rpccnt[NFSPROC_COMMIT]++; 2880 mreq = nfsm_reqhead(vp, NFSPROC_COMMIT, NFSX_FH(1)); 2881 mb = mreq; 2882 bpos = mtod(mb, caddr_t); 2883 nfsm_fhtom(vp, 1); 2884 tl = nfsm_build(u_int32_t *, 3 * NFSX_UNSIGNED); 2885 txdr_hyper(offset, tl); 2886 tl += 2; 2887 *tl = txdr_unsigned(cnt); 2888 nfsm_request(vp, NFSPROC_COMMIT, td, cred); 2889 nfsm_wcc_data(vp, wccflag); 2890 if (!error) { 2891 tl = nfsm_dissect(u_int32_t *, NFSX_V3WRITEVERF); 2892 if (bcmp((caddr_t)nmp->nm_verf, (caddr_t)tl, 2893 NFSX_V3WRITEVERF)) { 2894 bcopy((caddr_t)tl, (caddr_t)nmp->nm_verf, 2895 NFSX_V3WRITEVERF); 2896 error = NFSERR_STALEWRITEVERF; 2897 } 2898 } 2899 m_freem(mrep); 2900nfsmout: 2901 return (error); 2902} 2903 2904/* 2905 * Strategy routine. 2906 * For async requests when nfsiod(s) are running, queue the request by 2907 * calling nfs_asyncio(), otherwise just all nfs_doio() to do the 2908 * request. 2909 */ 2910static int 2911nfs_strategy(struct vop_strategy_args *ap) 2912{ 2913 struct buf *bp = ap->a_bp; 2914 struct ucred *cr; 2915 2916 KASSERT(!(bp->b_flags & B_DONE), 2917 ("nfs_strategy: buffer %p unexpectedly marked B_DONE", bp)); 2918 BUF_ASSERT_HELD(bp); 2919 2920 if (bp->b_iocmd == BIO_READ) 2921 cr = bp->b_rcred; 2922 else 2923 cr = bp->b_wcred; 2924 2925 /* 2926 * If the op is asynchronous and an i/o daemon is waiting 2927 * queue the request, wake it up and wait for completion 2928 * otherwise just do it ourselves. 2929 */ 2930 if ((bp->b_flags & B_ASYNC) == 0 || 2931 nfs_asyncio(VFSTONFS(ap->a_vp->v_mount), bp, NOCRED, curthread)) 2932 (void)nfs_doio(ap->a_vp, bp, cr, curthread); 2933 return (0); 2934} 2935 2936/* 2937 * fsync vnode op. Just call nfs_flush() with commit == 1. 2938 */ 2939/* ARGSUSED */ 2940static int 2941nfs_fsync(struct vop_fsync_args *ap) 2942{ 2943 2944 return (nfs_flush(ap->a_vp, ap->a_waitfor, 1)); 2945} 2946 2947/* 2948 * Flush all the blocks associated with a vnode. 2949 * Walk through the buffer pool and push any dirty pages 2950 * associated with the vnode. 2951 */ 2952static int 2953nfs_flush(struct vnode *vp, int waitfor, int commit) 2954{ 2955 struct nfsnode *np = VTONFS(vp); 2956 struct buf *bp; 2957 int i; 2958 struct buf *nbp; 2959 struct nfsmount *nmp = VFSTONFS(vp->v_mount); 2960 int error = 0, slptimeo = 0, slpflag = 0, retv, bvecpos; 2961 int passone = 1; 2962 u_quad_t off, endoff, toff; 2963 struct ucred* wcred = NULL; 2964 struct buf **bvec = NULL; 2965 struct bufobj *bo; 2966 struct thread *td = curthread; 2967#ifndef NFS_COMMITBVECSIZ 2968#define NFS_COMMITBVECSIZ 20 2969#endif 2970 struct buf *bvec_on_stack[NFS_COMMITBVECSIZ]; 2971 int bvecsize = 0, bveccount; 2972 2973 if (nmp->nm_flag & NFSMNT_INT) 2974 slpflag = NFS_PCATCH; 2975 if (!commit) 2976 passone = 0; 2977 bo = &vp->v_bufobj; 2978 /* 2979 * A b_flags == (B_DELWRI | B_NEEDCOMMIT) block has been written to the 2980 * server, but has not been committed to stable storage on the server 2981 * yet. On the first pass, the byte range is worked out and the commit 2982 * rpc is done. On the second pass, nfs_writebp() is called to do the 2983 * job. 2984 */ 2985again: 2986 off = (u_quad_t)-1; 2987 endoff = 0; 2988 bvecpos = 0; 2989 if (NFS_ISV3(vp) && commit) { 2990 if (bvec != NULL && bvec != bvec_on_stack) 2991 free(bvec, M_TEMP); 2992 /* 2993 * Count up how many buffers waiting for a commit. 2994 */ 2995 bveccount = 0; 2996 BO_LOCK(bo); 2997 TAILQ_FOREACH_SAFE(bp, &bo->bo_dirty.bv_hd, b_bobufs, nbp) { 2998 if (!BUF_ISLOCKED(bp) && 2999 (bp->b_flags & (B_DELWRI | B_NEEDCOMMIT)) 3000 == (B_DELWRI | B_NEEDCOMMIT)) 3001 bveccount++; 3002 } 3003 /* 3004 * Allocate space to remember the list of bufs to commit. It is 3005 * important to use M_NOWAIT here to avoid a race with nfs_write. 3006 * If we can't get memory (for whatever reason), we will end up 3007 * committing the buffers one-by-one in the loop below. 3008 */ 3009 if (bveccount > NFS_COMMITBVECSIZ) { 3010 /* 3011 * Release the vnode interlock to avoid a lock 3012 * order reversal. 3013 */ 3014 BO_UNLOCK(bo); 3015 bvec = (struct buf **) 3016 malloc(bveccount * sizeof(struct buf *), 3017 M_TEMP, M_NOWAIT); 3018 BO_LOCK(bo); 3019 if (bvec == NULL) { 3020 bvec = bvec_on_stack; 3021 bvecsize = NFS_COMMITBVECSIZ; 3022 } else 3023 bvecsize = bveccount; 3024 } else { 3025 bvec = bvec_on_stack; 3026 bvecsize = NFS_COMMITBVECSIZ; 3027 } 3028 TAILQ_FOREACH_SAFE(bp, &bo->bo_dirty.bv_hd, b_bobufs, nbp) { 3029 if (bvecpos >= bvecsize) 3030 break; 3031 if (BUF_LOCK(bp, LK_EXCLUSIVE | LK_NOWAIT, NULL)) { 3032 nbp = TAILQ_NEXT(bp, b_bobufs); 3033 continue; 3034 } 3035 if ((bp->b_flags & (B_DELWRI | B_NEEDCOMMIT)) != 3036 (B_DELWRI | B_NEEDCOMMIT)) { 3037 BUF_UNLOCK(bp); 3038 nbp = TAILQ_NEXT(bp, b_bobufs); 3039 continue; 3040 } 3041 BO_UNLOCK(bo); 3042 bremfree(bp); 3043 /* 3044 * Work out if all buffers are using the same cred 3045 * so we can deal with them all with one commit. 3046 * 3047 * NOTE: we are not clearing B_DONE here, so we have 3048 * to do it later on in this routine if we intend to 3049 * initiate I/O on the bp. 3050 * 3051 * Note: to avoid loopback deadlocks, we do not 3052 * assign b_runningbufspace. 3053 */ 3054 if (wcred == NULL) 3055 wcred = bp->b_wcred; 3056 else if (wcred != bp->b_wcred) 3057 wcred = NOCRED; 3058 vfs_busy_pages(bp, 1); 3059 3060 BO_LOCK(bo); 3061 /* 3062 * bp is protected by being locked, but nbp is not 3063 * and vfs_busy_pages() may sleep. We have to 3064 * recalculate nbp. 3065 */ 3066 nbp = TAILQ_NEXT(bp, b_bobufs); 3067 3068 /* 3069 * A list of these buffers is kept so that the 3070 * second loop knows which buffers have actually 3071 * been committed. This is necessary, since there 3072 * may be a race between the commit rpc and new 3073 * uncommitted writes on the file. 3074 */ 3075 bvec[bvecpos++] = bp; 3076 toff = ((u_quad_t)bp->b_blkno) * DEV_BSIZE + 3077 bp->b_dirtyoff; 3078 if (toff < off) 3079 off = toff; 3080 toff += (u_quad_t)(bp->b_dirtyend - bp->b_dirtyoff); 3081 if (toff > endoff) 3082 endoff = toff; 3083 } 3084 BO_UNLOCK(bo); 3085 } 3086 if (bvecpos > 0) { 3087 /* 3088 * Commit data on the server, as required. 3089 * If all bufs are using the same wcred, then use that with 3090 * one call for all of them, otherwise commit each one 3091 * separately. 3092 */ 3093 if (wcred != NOCRED) 3094 retv = nfs_commit(vp, off, (int)(endoff - off), 3095 wcred, td); 3096 else { 3097 retv = 0; 3098 for (i = 0; i < bvecpos; i++) { 3099 off_t off, size; 3100 bp = bvec[i]; 3101 off = ((u_quad_t)bp->b_blkno) * DEV_BSIZE + 3102 bp->b_dirtyoff; 3103 size = (u_quad_t)(bp->b_dirtyend 3104 - bp->b_dirtyoff); 3105 retv = nfs_commit(vp, off, (int)size, 3106 bp->b_wcred, td); 3107 if (retv) break; 3108 } 3109 } 3110 3111 if (retv == NFSERR_STALEWRITEVERF) 3112 nfs_clearcommit(vp->v_mount); 3113 3114 /* 3115 * Now, either mark the blocks I/O done or mark the 3116 * blocks dirty, depending on whether the commit 3117 * succeeded. 3118 */ 3119 for (i = 0; i < bvecpos; i++) { 3120 bp = bvec[i]; 3121 bp->b_flags &= ~(B_NEEDCOMMIT | B_CLUSTEROK); 3122 if (retv) { 3123 /* 3124 * Error, leave B_DELWRI intact 3125 */ 3126 vfs_unbusy_pages(bp); 3127 brelse(bp); 3128 } else { 3129 /* 3130 * Success, remove B_DELWRI ( bundirty() ). 3131 * 3132 * b_dirtyoff/b_dirtyend seem to be NFS 3133 * specific. We should probably move that 3134 * into bundirty(). XXX 3135 */ 3136 bufobj_wref(bo); 3137 bp->b_flags |= B_ASYNC; 3138 bundirty(bp); 3139 bp->b_flags &= ~B_DONE; 3140 bp->b_ioflags &= ~BIO_ERROR; 3141 bp->b_dirtyoff = bp->b_dirtyend = 0; 3142 bufdone(bp); 3143 } 3144 } 3145 } 3146 3147 /* 3148 * Start/do any write(s) that are required. 3149 */ 3150loop: 3151 BO_LOCK(bo); 3152 TAILQ_FOREACH_SAFE(bp, &bo->bo_dirty.bv_hd, b_bobufs, nbp) { 3153 if (BUF_LOCK(bp, LK_EXCLUSIVE | LK_NOWAIT, NULL)) { 3154 if (waitfor != MNT_WAIT || passone) 3155 continue; 3156 3157 error = BUF_TIMELOCK(bp, 3158 LK_EXCLUSIVE | LK_SLEEPFAIL | LK_INTERLOCK, 3159 BO_MTX(bo), "nfsfsync", slpflag, slptimeo); 3160 if (error == 0) { 3161 BUF_UNLOCK(bp); 3162 goto loop; 3163 } 3164 if (error == ENOLCK) { 3165 error = 0; 3166 goto loop; 3167 } 3168 if (nfs_sigintr(nmp, td)) { 3169 error = EINTR; 3170 goto done; 3171 } 3172 if (slpflag & PCATCH) { 3173 slpflag = 0; 3174 slptimeo = 2 * hz; 3175 } 3176 goto loop; 3177 } 3178 if ((bp->b_flags & B_DELWRI) == 0) 3179 panic("nfs_fsync: not dirty"); 3180 if ((passone || !commit) && (bp->b_flags & B_NEEDCOMMIT)) { 3181 BUF_UNLOCK(bp); 3182 continue; 3183 } 3184 BO_UNLOCK(bo); 3185 bremfree(bp); 3186 if (passone || !commit) 3187 bp->b_flags |= B_ASYNC; 3188 else 3189 bp->b_flags |= B_ASYNC; 3190 bwrite(bp); 3191 if (nfs_sigintr(nmp, td)) { 3192 error = EINTR; 3193 goto done; 3194 } 3195 goto loop; 3196 } 3197 if (passone) { 3198 passone = 0; 3199 BO_UNLOCK(bo); 3200 goto again; 3201 } 3202 if (waitfor == MNT_WAIT) { 3203 while (bo->bo_numoutput) { 3204 error = bufobj_wwait(bo, slpflag, slptimeo); 3205 if (error) { 3206 BO_UNLOCK(bo); 3207 error = nfs_sigintr(nmp, td); 3208 if (error) 3209 goto done; 3210 if (slpflag & PCATCH) { 3211 slpflag = 0; 3212 slptimeo = 2 * hz; 3213 } 3214 BO_LOCK(bo); 3215 } 3216 } 3217 if (bo->bo_dirty.bv_cnt != 0 && commit) { 3218 BO_UNLOCK(bo); 3219 goto loop; 3220 } 3221 /* 3222 * Wait for all the async IO requests to drain 3223 */ 3224 BO_UNLOCK(bo); 3225 mtx_lock(&np->n_mtx); 3226 while (np->n_directio_asyncwr > 0) { 3227 np->n_flag |= NFSYNCWAIT; 3228 error = nfs_msleep(td, (caddr_t)&np->n_directio_asyncwr, 3229 &np->n_mtx, slpflag | (PRIBIO + 1), 3230 "nfsfsync", 0); 3231 if (error) { 3232 if (nfs_sigintr(nmp, td)) { 3233 mtx_unlock(&np->n_mtx); 3234 error = EINTR; 3235 goto done; 3236 } 3237 } 3238 } 3239 mtx_unlock(&np->n_mtx); 3240 } else 3241 BO_UNLOCK(bo); 3242 mtx_lock(&np->n_mtx); 3243 if (np->n_flag & NWRITEERR) { 3244 error = np->n_error; 3245 np->n_flag &= ~NWRITEERR; 3246 } 3247 if (commit && bo->bo_dirty.bv_cnt == 0 && 3248 bo->bo_numoutput == 0 && np->n_directio_asyncwr == 0) 3249 np->n_flag &= ~NMODIFIED; 3250 mtx_unlock(&np->n_mtx); 3251done: 3252 if (bvec != NULL && bvec != bvec_on_stack) 3253 free(bvec, M_TEMP); 3254 return (error); 3255} 3256 3257/* 3258 * NFS advisory byte-level locks. 3259 */ 3260static int 3261nfs_advlock(struct vop_advlock_args *ap) 3262{ 3263 struct vnode *vp = ap->a_vp; 3264 u_quad_t size; 3265 int error; 3266 3267 error = vn_lock(vp, LK_SHARED); 3268 if (error) 3269 return (error); 3270 if ((VFSTONFS(vp->v_mount)->nm_flag & NFSMNT_NOLOCKD) != 0) { 3271 size = VTONFS(vp)->n_size; 3272 VOP_UNLOCK(vp, 0); 3273 error = lf_advlock(ap, &(vp->v_lockf), size); 3274 } else { 3275 if (nfs_advlock_p) 3276 error = nfs_advlock_p(ap); 3277 else 3278 error = ENOLCK; 3279 } 3280 3281 return (error); 3282} 3283 3284/* 3285 * NFS advisory byte-level locks. 3286 */ 3287static int 3288nfs_advlockasync(struct vop_advlockasync_args *ap) 3289{ 3290 struct vnode *vp = ap->a_vp; 3291 u_quad_t size; 3292 int error; 3293 3294 error = vn_lock(vp, LK_SHARED); 3295 if (error) 3296 return (error); 3297 if ((VFSTONFS(vp->v_mount)->nm_flag & NFSMNT_NOLOCKD) != 0) { 3298 size = VTONFS(vp)->n_size; 3299 VOP_UNLOCK(vp, 0); 3300 error = lf_advlockasync(ap, &(vp->v_lockf), size); 3301 } else { 3302 VOP_UNLOCK(vp, 0); 3303 error = EOPNOTSUPP; 3304 } 3305 return (error); 3306} 3307 3308/* 3309 * Print out the contents of an nfsnode. 3310 */ 3311static int 3312nfs_print(struct vop_print_args *ap) 3313{ 3314 struct vnode *vp = ap->a_vp; 3315 struct nfsnode *np = VTONFS(vp); 3316 3317 nfs_printf("\tfileid %ld fsid 0x%x", 3318 np->n_vattr.va_fileid, np->n_vattr.va_fsid); 3319 if (vp->v_type == VFIFO) 3320 fifo_printinfo(vp); 3321 printf("\n"); 3322 return (0); 3323} 3324 3325/* 3326 * This is the "real" nfs::bwrite(struct buf*). 3327 * We set B_CACHE if this is a VMIO buffer. 3328 */ 3329int 3330nfs_writebp(struct buf *bp, int force __unused, struct thread *td) 3331{ 3332 int s; 3333 int oldflags = bp->b_flags; 3334#if 0 3335 int retv = 1; 3336 off_t off; 3337#endif 3338 3339 BUF_ASSERT_HELD(bp); 3340 3341 if (bp->b_flags & B_INVAL) { 3342 brelse(bp); 3343 return(0); 3344 } 3345 3346 bp->b_flags |= B_CACHE; 3347 3348 /* 3349 * Undirty the bp. We will redirty it later if the I/O fails. 3350 */ 3351 3352 s = splbio(); 3353 bundirty(bp); 3354 bp->b_flags &= ~B_DONE; 3355 bp->b_ioflags &= ~BIO_ERROR; 3356 bp->b_iocmd = BIO_WRITE; 3357 3358 bufobj_wref(bp->b_bufobj); 3359 curthread->td_ru.ru_oublock++; 3360 splx(s); 3361 3362 /* 3363 * Note: to avoid loopback deadlocks, we do not 3364 * assign b_runningbufspace. 3365 */ 3366 vfs_busy_pages(bp, 1); 3367 3368 BUF_KERNPROC(bp); 3369 bp->b_iooffset = dbtob(bp->b_blkno); 3370 bstrategy(bp); 3371 3372 if( (oldflags & B_ASYNC) == 0) { 3373 int rtval = bufwait(bp); 3374 3375 if (oldflags & B_DELWRI) { 3376 s = splbio(); 3377 reassignbuf(bp); 3378 splx(s); 3379 } 3380 brelse(bp); 3381 return (rtval); 3382 } 3383 3384 return (0); 3385} 3386 3387/* 3388 * nfs special file access vnode op. 3389 * Essentially just get vattr and then imitate iaccess() since the device is 3390 * local to the client. 3391 */ 3392static int 3393nfsspec_access(struct vop_access_args *ap) 3394{ 3395 struct vattr *vap; 3396 struct ucred *cred = ap->a_cred; 3397 struct vnode *vp = ap->a_vp; 3398 accmode_t accmode = ap->a_accmode; 3399 struct vattr vattr; 3400 int error; 3401 3402 /* 3403 * Disallow write attempts on filesystems mounted read-only; 3404 * unless the file is a socket, fifo, or a block or character 3405 * device resident on the filesystem. 3406 */ 3407 if ((accmode & VWRITE) && (vp->v_mount->mnt_flag & MNT_RDONLY)) { 3408 switch (vp->v_type) { 3409 case VREG: 3410 case VDIR: 3411 case VLNK: 3412 return (EROFS); 3413 default: 3414 break; 3415 } 3416 } 3417 vap = &vattr; 3418 error = VOP_GETATTR(vp, vap, cred); 3419 if (error) 3420 goto out; 3421 error = vaccess(vp->v_type, vap->va_mode, vap->va_uid, vap->va_gid, 3422 accmode, cred, NULL); 3423out: 3424 return error; 3425} 3426 3427/* 3428 * Read wrapper for fifos. 3429 */ 3430static int 3431nfsfifo_read(struct vop_read_args *ap) 3432{ 3433 struct nfsnode *np = VTONFS(ap->a_vp); 3434 int error; 3435 3436 /* 3437 * Set access flag. 3438 */ 3439 mtx_lock(&np->n_mtx); 3440 np->n_flag |= NACC; 3441 getnanotime(&np->n_atim); 3442 mtx_unlock(&np->n_mtx); 3443 error = fifo_specops.vop_read(ap); 3444 return error; 3445} 3446 3447/* 3448 * Write wrapper for fifos. 3449 */ 3450static int 3451nfsfifo_write(struct vop_write_args *ap) 3452{ 3453 struct nfsnode *np = VTONFS(ap->a_vp); 3454 3455 /* 3456 * Set update flag. 3457 */ 3458 mtx_lock(&np->n_mtx); 3459 np->n_flag |= NUPD; 3460 getnanotime(&np->n_mtim); 3461 mtx_unlock(&np->n_mtx); 3462 return(fifo_specops.vop_write(ap)); 3463} 3464 3465/* 3466 * Close wrapper for fifos. 3467 * 3468 * Update the times on the nfsnode then do fifo close. 3469 */ 3470static int 3471nfsfifo_close(struct vop_close_args *ap) 3472{ 3473 struct vnode *vp = ap->a_vp; 3474 struct nfsnode *np = VTONFS(vp); 3475 struct vattr vattr; 3476 struct timespec ts; 3477 3478 mtx_lock(&np->n_mtx); 3479 if (np->n_flag & (NACC | NUPD)) { 3480 getnanotime(&ts); 3481 if (np->n_flag & NACC) 3482 np->n_atim = ts; 3483 if (np->n_flag & NUPD) 3484 np->n_mtim = ts; 3485 np->n_flag |= NCHG; 3486 if (vrefcnt(vp) == 1 && 3487 (vp->v_mount->mnt_flag & MNT_RDONLY) == 0) { 3488 VATTR_NULL(&vattr); 3489 if (np->n_flag & NACC) 3490 vattr.va_atime = np->n_atim; 3491 if (np->n_flag & NUPD) 3492 vattr.va_mtime = np->n_mtim; 3493 mtx_unlock(&np->n_mtx); 3494 (void)VOP_SETATTR(vp, &vattr, ap->a_cred); 3495 goto out; 3496 } 3497 } 3498 mtx_unlock(&np->n_mtx); 3499out: 3500 return (fifo_specops.vop_close(ap)); 3501} 3502 3503/* 3504 * Just call nfs_writebp() with the force argument set to 1. 3505 * 3506 * NOTE: B_DONE may or may not be set in a_bp on call. 3507 */ 3508static int 3509nfs_bwrite(struct buf *bp) 3510{ 3511 3512 return (nfs_writebp(bp, 1, curthread)); 3513} 3514 3515struct buf_ops buf_ops_nfs = { 3516 .bop_name = "buf_ops_nfs", 3517 .bop_write = nfs_bwrite, 3518 .bop_strategy = bufstrategy, 3519 .bop_sync = bufsync, 3520 .bop_bdflush = bufbdflush, 3521}; 3522