nfs_vnops.c revision 166378
1/*- 2 * Copyright (c) 1989, 1993 3 * The Regents of the University of California. All rights reserved. 4 * 5 * This code is derived from software contributed to Berkeley by 6 * Rick Macklem at The University of Guelph. 7 * 8 * Redistribution and use in source and binary forms, with or without 9 * modification, are permitted provided that the following conditions 10 * are met: 11 * 1. Redistributions of source code must retain the above copyright 12 * notice, this list of conditions and the following disclaimer. 13 * 2. Redistributions in binary form must reproduce the above copyright 14 * notice, this list of conditions and the following disclaimer in the 15 * documentation and/or other materials provided with the distribution. 16 * 4. Neither the name of the University nor the names of its contributors 17 * may be used to endorse or promote products derived from this software 18 * without specific prior written permission. 19 * 20 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 21 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 22 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 23 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 24 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 25 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 26 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 27 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 28 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 29 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 30 * SUCH DAMAGE. 31 * 32 * @(#)nfs_vnops.c 8.16 (Berkeley) 5/27/95 33 */ 34 35#include <sys/cdefs.h> 36__FBSDID("$FreeBSD: head/sys/nfsclient/nfs_vnops.c 166378 2007-01-31 23:10:27Z mohans $"); 37 38/* 39 * vnode op calls for Sun NFS version 2 and 3 40 */ 41 42#include "opt_inet.h" 43 44#include <sys/param.h> 45#include <sys/kernel.h> 46#include <sys/systm.h> 47#include <sys/resourcevar.h> 48#include <sys/proc.h> 49#include <sys/mount.h> 50#include <sys/bio.h> 51#include <sys/buf.h> 52#include <sys/malloc.h> 53#include <sys/mbuf.h> 54#include <sys/namei.h> 55#include <sys/socket.h> 56#include <sys/vnode.h> 57#include <sys/dirent.h> 58#include <sys/fcntl.h> 59#include <sys/lockf.h> 60#include <sys/stat.h> 61#include <sys/sysctl.h> 62#include <sys/signalvar.h> 63 64#include <vm/vm.h> 65#include <vm/vm_object.h> 66#include <vm/vm_extern.h> 67#include <vm/vm_object.h> 68 69#include <fs/fifofs/fifo.h> 70 71#include <rpc/rpcclnt.h> 72 73#include <nfs/rpcv2.h> 74#include <nfs/nfsproto.h> 75#include <nfsclient/nfs.h> 76#include <nfsclient/nfsnode.h> 77#include <nfsclient/nfsmount.h> 78#include <nfsclient/nfs_lock.h> 79#include <nfs/xdr_subs.h> 80#include <nfsclient/nfsm_subs.h> 81 82#include <net/if.h> 83#include <netinet/in.h> 84#include <netinet/in_var.h> 85 86/* Defs */ 87#define TRUE 1 88#define FALSE 0 89 90/* 91 * Ifdef for FreeBSD-current merged buffer cache. It is unfortunate that these 92 * calls are not in getblk() and brelse() so that they would not be necessary 93 * here. 94 */ 95#ifndef B_VMIO 96#define vfs_busy_pages(bp, f) 97#endif 98 99static vop_read_t nfsfifo_read; 100static vop_write_t nfsfifo_write; 101static vop_close_t nfsfifo_close; 102static int nfs_flush(struct vnode *, int, struct thread *, 103 int); 104static int nfs_setattrrpc(struct vnode *, struct vattr *, struct ucred *, 105 struct thread *); 106static vop_lookup_t nfs_lookup; 107static vop_create_t nfs_create; 108static vop_mknod_t nfs_mknod; 109static vop_open_t nfs_open; 110static vop_close_t nfs_close; 111static vop_access_t nfs_access; 112static vop_getattr_t nfs_getattr; 113static vop_setattr_t nfs_setattr; 114static vop_read_t nfs_read; 115static vop_fsync_t nfs_fsync; 116static vop_remove_t nfs_remove; 117static vop_link_t nfs_link; 118static vop_rename_t nfs_rename; 119static vop_mkdir_t nfs_mkdir; 120static vop_rmdir_t nfs_rmdir; 121static vop_symlink_t nfs_symlink; 122static vop_readdir_t nfs_readdir; 123static vop_strategy_t nfs_strategy; 124static int nfs_lookitup(struct vnode *, const char *, int, 125 struct ucred *, struct thread *, struct nfsnode **); 126static int nfs_sillyrename(struct vnode *, struct vnode *, 127 struct componentname *); 128static vop_access_t nfsspec_access; 129static vop_readlink_t nfs_readlink; 130static vop_print_t nfs_print; 131static vop_advlock_t nfs_advlock; 132 133/* 134 * Global vfs data structures for nfs 135 */ 136struct vop_vector nfs_vnodeops = { 137 .vop_default = &default_vnodeops, 138 .vop_access = nfs_access, 139 .vop_advlock = nfs_advlock, 140 .vop_close = nfs_close, 141 .vop_create = nfs_create, 142 .vop_fsync = nfs_fsync, 143 .vop_getattr = nfs_getattr, 144 .vop_getpages = nfs_getpages, 145 .vop_putpages = nfs_putpages, 146 .vop_inactive = nfs_inactive, 147 .vop_lease = VOP_NULL, 148 .vop_link = nfs_link, 149 .vop_lookup = nfs_lookup, 150 .vop_mkdir = nfs_mkdir, 151 .vop_mknod = nfs_mknod, 152 .vop_open = nfs_open, 153 .vop_print = nfs_print, 154 .vop_read = nfs_read, 155 .vop_readdir = nfs_readdir, 156 .vop_readlink = nfs_readlink, 157 .vop_reclaim = nfs_reclaim, 158 .vop_remove = nfs_remove, 159 .vop_rename = nfs_rename, 160 .vop_rmdir = nfs_rmdir, 161 .vop_setattr = nfs_setattr, 162 .vop_strategy = nfs_strategy, 163 .vop_symlink = nfs_symlink, 164 .vop_write = nfs_write, 165}; 166 167struct vop_vector nfs_fifoops = { 168 .vop_default = &fifo_specops, 169 .vop_access = nfsspec_access, 170 .vop_close = nfsfifo_close, 171 .vop_fsync = nfs_fsync, 172 .vop_getattr = nfs_getattr, 173 .vop_inactive = nfs_inactive, 174 .vop_print = nfs_print, 175 .vop_read = nfsfifo_read, 176 .vop_reclaim = nfs_reclaim, 177 .vop_setattr = nfs_setattr, 178 .vop_write = nfsfifo_write, 179}; 180 181static int nfs_mknodrpc(struct vnode *dvp, struct vnode **vpp, 182 struct componentname *cnp, struct vattr *vap); 183static int nfs_removerpc(struct vnode *dvp, const char *name, int namelen, 184 struct ucred *cred, struct thread *td); 185static int nfs_renamerpc(struct vnode *fdvp, const char *fnameptr, 186 int fnamelen, struct vnode *tdvp, 187 const char *tnameptr, int tnamelen, 188 struct ucred *cred, struct thread *td); 189static int nfs_renameit(struct vnode *sdvp, struct componentname *scnp, 190 struct sillyrename *sp); 191 192/* 193 * Global variables 194 */ 195struct mtx nfs_iod_mtx; 196struct proc *nfs_iodwant[NFS_MAXASYNCDAEMON]; 197struct nfsmount *nfs_iodmount[NFS_MAXASYNCDAEMON]; 198int nfs_numasync = 0; 199#define DIRHDSIZ (sizeof (struct dirent) - (MAXNAMLEN + 1)) 200 201SYSCTL_DECL(_vfs_nfs); 202 203static int nfsaccess_cache_timeout = NFS_MAXATTRTIMO; 204SYSCTL_INT(_vfs_nfs, OID_AUTO, access_cache_timeout, CTLFLAG_RW, 205 &nfsaccess_cache_timeout, 0, "NFS ACCESS cache timeout"); 206 207static int nfsv3_commit_on_close = 0; 208SYSCTL_INT(_vfs_nfs, OID_AUTO, nfsv3_commit_on_close, CTLFLAG_RW, 209 &nfsv3_commit_on_close, 0, "write+commit on close, else only write"); 210 211static int nfs_clean_pages_on_close = 1; 212SYSCTL_INT(_vfs_nfs, OID_AUTO, clean_pages_on_close, CTLFLAG_RW, 213 &nfs_clean_pages_on_close, 0, "NFS clean dirty pages on close"); 214 215int nfs_directio_enable = 0; 216SYSCTL_INT(_vfs_nfs, OID_AUTO, nfs_directio_enable, CTLFLAG_RW, 217 &nfs_directio_enable, 0, "Enable NFS directio"); 218 219/* 220 * This sysctl allows other processes to mmap a file that has been opened 221 * O_DIRECT by a process. In general, having processes mmap the file while 222 * Direct IO is in progress can lead to Data Inconsistencies. But, we allow 223 * this by default to prevent DoS attacks - to prevent a malicious user from 224 * opening up files O_DIRECT preventing other users from mmap'ing these 225 * files. "Protected" environments where stricter consistency guarantees are 226 * required can disable this knob. The process that opened the file O_DIRECT 227 * cannot mmap() the file, because mmap'ed IO on an O_DIRECT open() is not 228 * meaningful. 229 */ 230int nfs_directio_allow_mmap = 1; 231SYSCTL_INT(_vfs_nfs, OID_AUTO, nfs_directio_allow_mmap, CTLFLAG_RW, 232 &nfs_directio_allow_mmap, 0, "Enable mmaped IO on file with O_DIRECT opens"); 233 234#if 0 235SYSCTL_INT(_vfs_nfs, OID_AUTO, access_cache_hits, CTLFLAG_RD, 236 &nfsstats.accesscache_hits, 0, "NFS ACCESS cache hit count"); 237 238SYSCTL_INT(_vfs_nfs, OID_AUTO, access_cache_misses, CTLFLAG_RD, 239 &nfsstats.accesscache_misses, 0, "NFS ACCESS cache miss count"); 240#endif 241 242#define NFSV3ACCESS_ALL (NFSV3ACCESS_READ | NFSV3ACCESS_MODIFY \ 243 | NFSV3ACCESS_EXTEND | NFSV3ACCESS_EXECUTE \ 244 | NFSV3ACCESS_DELETE | NFSV3ACCESS_LOOKUP) 245 246/* 247 * SMP Locking Note : 248 * The list of locks after the description of the lock is the ordering 249 * of other locks acquired with the lock held. 250 * np->n_mtx : Protects the fields in the nfsnode. 251 VM Object Lock 252 VI_MTX (acquired indirectly) 253 * nmp->nm_mtx : Protects the fields in the nfsmount. 254 rep->r_mtx 255 * nfs_iod_mtx : Global lock, protects shared nfsiod state. 256 * nfs_reqq_mtx : Global lock, protects the nfs_reqq list. 257 nmp->nm_mtx 258 rep->r_mtx 259 * rep->r_mtx : Protects the fields in an nfsreq. 260 */ 261 262static int 263nfs3_access_otw(struct vnode *vp, int wmode, struct thread *td, 264 struct ucred *cred) 265{ 266 const int v3 = 1; 267 u_int32_t *tl; 268 int error = 0, attrflag; 269 270 struct mbuf *mreq, *mrep, *md, *mb; 271 caddr_t bpos, dpos; 272 u_int32_t rmode; 273 struct nfsnode *np = VTONFS(vp); 274 275 nfsstats.rpccnt[NFSPROC_ACCESS]++; 276 mreq = nfsm_reqhead(vp, NFSPROC_ACCESS, NFSX_FH(v3) + NFSX_UNSIGNED); 277 mb = mreq; 278 bpos = mtod(mb, caddr_t); 279 nfsm_fhtom(vp, v3); 280 tl = nfsm_build(u_int32_t *, NFSX_UNSIGNED); 281 *tl = txdr_unsigned(wmode); 282 nfsm_request(vp, NFSPROC_ACCESS, td, cred); 283 nfsm_postop_attr(vp, attrflag); 284 if (!error) { 285 tl = nfsm_dissect(u_int32_t *, NFSX_UNSIGNED); 286 rmode = fxdr_unsigned(u_int32_t, *tl); 287 mtx_lock(&np->n_mtx); 288 np->n_mode = rmode; 289 np->n_modeuid = cred->cr_uid; 290 np->n_modestamp = time_second; 291 mtx_unlock(&np->n_mtx); 292 } 293 m_freem(mrep); 294nfsmout: 295 return (error); 296} 297 298/* 299 * nfs access vnode op. 300 * For nfs version 2, just return ok. File accesses may fail later. 301 * For nfs version 3, use the access rpc to check accessibility. If file modes 302 * are changed on the server, accesses might still fail later. 303 */ 304static int 305nfs_access(struct vop_access_args *ap) 306{ 307 struct vnode *vp = ap->a_vp; 308 int error = 0; 309 u_int32_t mode, wmode; 310 int v3 = NFS_ISV3(vp); 311 struct nfsnode *np = VTONFS(vp); 312 313 /* 314 * Disallow write attempts on filesystems mounted read-only; 315 * unless the file is a socket, fifo, or a block or character 316 * device resident on the filesystem. 317 */ 318 if ((ap->a_mode & VWRITE) && (vp->v_mount->mnt_flag & MNT_RDONLY)) { 319 switch (vp->v_type) { 320 case VREG: 321 case VDIR: 322 case VLNK: 323 return (EROFS); 324 default: 325 break; 326 } 327 } 328 /* 329 * For nfs v3, check to see if we have done this recently, and if 330 * so return our cached result instead of making an ACCESS call. 331 * If not, do an access rpc, otherwise you are stuck emulating 332 * ufs_access() locally using the vattr. This may not be correct, 333 * since the server may apply other access criteria such as 334 * client uid-->server uid mapping that we do not know about. 335 */ 336 if (v3) { 337 if (ap->a_mode & VREAD) 338 mode = NFSV3ACCESS_READ; 339 else 340 mode = 0; 341 if (vp->v_type != VDIR) { 342 if (ap->a_mode & VWRITE) 343 mode |= (NFSV3ACCESS_MODIFY | NFSV3ACCESS_EXTEND); 344 if (ap->a_mode & VEXEC) 345 mode |= NFSV3ACCESS_EXECUTE; 346 } else { 347 if (ap->a_mode & VWRITE) 348 mode |= (NFSV3ACCESS_MODIFY | NFSV3ACCESS_EXTEND | 349 NFSV3ACCESS_DELETE); 350 if (ap->a_mode & VEXEC) 351 mode |= NFSV3ACCESS_LOOKUP; 352 } 353 /* XXX safety belt, only make blanket request if caching */ 354 if (nfsaccess_cache_timeout > 0) { 355 wmode = NFSV3ACCESS_READ | NFSV3ACCESS_MODIFY | 356 NFSV3ACCESS_EXTEND | NFSV3ACCESS_EXECUTE | 357 NFSV3ACCESS_DELETE | NFSV3ACCESS_LOOKUP; 358 } else { 359 wmode = mode; 360 } 361 362 /* 363 * Does our cached result allow us to give a definite yes to 364 * this request? 365 */ 366 mtx_lock(&np->n_mtx); 367 if ((time_second < (np->n_modestamp + nfsaccess_cache_timeout)) && 368 (ap->a_cred->cr_uid == np->n_modeuid) && 369 ((np->n_mode & mode) == mode)) { 370 nfsstats.accesscache_hits++; 371 } else { 372 /* 373 * Either a no, or a don't know. Go to the wire. 374 */ 375 nfsstats.accesscache_misses++; 376 mtx_unlock(&np->n_mtx); 377 error = nfs3_access_otw(vp, wmode, ap->a_td,ap->a_cred); 378 mtx_lock(&np->n_mtx); 379 if (!error) { 380 if ((np->n_mode & mode) != mode) { 381 error = EACCES; 382 } 383 } 384 } 385 mtx_unlock(&np->n_mtx); 386 return (error); 387 } else { 388 if ((error = nfsspec_access(ap)) != 0) { 389 return (error); 390 } 391 /* 392 * Attempt to prevent a mapped root from accessing a file 393 * which it shouldn't. We try to read a byte from the file 394 * if the user is root and the file is not zero length. 395 * After calling nfsspec_access, we should have the correct 396 * file size cached. 397 */ 398 mtx_lock(&np->n_mtx); 399 if (ap->a_cred->cr_uid == 0 && (ap->a_mode & VREAD) 400 && VTONFS(vp)->n_size > 0) { 401 struct iovec aiov; 402 struct uio auio; 403 char buf[1]; 404 405 mtx_unlock(&np->n_mtx); 406 aiov.iov_base = buf; 407 aiov.iov_len = 1; 408 auio.uio_iov = &aiov; 409 auio.uio_iovcnt = 1; 410 auio.uio_offset = 0; 411 auio.uio_resid = 1; 412 auio.uio_segflg = UIO_SYSSPACE; 413 auio.uio_rw = UIO_READ; 414 auio.uio_td = ap->a_td; 415 416 if (vp->v_type == VREG) 417 error = nfs_readrpc(vp, &auio, ap->a_cred); 418 else if (vp->v_type == VDIR) { 419 char* bp; 420 bp = malloc(NFS_DIRBLKSIZ, M_TEMP, M_WAITOK); 421 aiov.iov_base = bp; 422 aiov.iov_len = auio.uio_resid = NFS_DIRBLKSIZ; 423 error = nfs_readdirrpc(vp, &auio, ap->a_cred); 424 free(bp, M_TEMP); 425 } else if (vp->v_type == VLNK) 426 error = nfs_readlinkrpc(vp, &auio, ap->a_cred); 427 else 428 error = EACCES; 429 } else 430 mtx_unlock(&np->n_mtx); 431 return (error); 432 } 433} 434 435/* 436 * nfs open vnode op 437 * Check to see if the type is ok 438 * and that deletion is not in progress. 439 * For paged in text files, you will need to flush the page cache 440 * if consistency is lost. 441 */ 442/* ARGSUSED */ 443static int 444nfs_open(struct vop_open_args *ap) 445{ 446 struct vnode *vp = ap->a_vp; 447 struct nfsnode *np = VTONFS(vp); 448 struct vattr vattr; 449 int error; 450 int fmode = ap->a_mode; 451 452 if (vp->v_type != VREG && vp->v_type != VDIR && vp->v_type != VLNK) 453 return (EOPNOTSUPP); 454 455 /* 456 * Get a valid lease. If cached data is stale, flush it. 457 */ 458 mtx_lock(&np->n_mtx); 459 if (np->n_flag & NMODIFIED) { 460 mtx_unlock(&np->n_mtx); 461 error = nfs_vinvalbuf(vp, V_SAVE, ap->a_td, 1); 462 if (error == EINTR || error == EIO) 463 return (error); 464 np->n_attrstamp = 0; 465 if (vp->v_type == VDIR) 466 np->n_direofoffset = 0; 467 error = VOP_GETATTR(vp, &vattr, ap->a_cred, ap->a_td); 468 if (error) 469 return (error); 470 mtx_lock(&np->n_mtx); 471 np->n_mtime = vattr.va_mtime; 472 mtx_unlock(&np->n_mtx); 473 } else { 474 np->n_attrstamp = 0; 475 mtx_unlock(&np->n_mtx); 476 error = VOP_GETATTR(vp, &vattr, ap->a_cred, ap->a_td); 477 if (error) 478 return (error); 479 mtx_lock(&np->n_mtx); 480 if (NFS_TIMESPEC_COMPARE(&np->n_mtime, &vattr.va_mtime)) { 481 if (vp->v_type == VDIR) 482 np->n_direofoffset = 0; 483 mtx_unlock(&np->n_mtx); 484 error = nfs_vinvalbuf(vp, V_SAVE, ap->a_td, 1); 485 if (error == EINTR || error == EIO) { 486 return (error); 487 } 488 mtx_lock(&np->n_mtx); 489 np->n_mtime = vattr.va_mtime; 490 } 491 mtx_unlock(&np->n_mtx); 492 } 493 /* 494 * If the object has >= 1 O_DIRECT active opens, we disable caching. 495 */ 496 if (nfs_directio_enable && (fmode & O_DIRECT) && (vp->v_type == VREG)) { 497 if (np->n_directio_opens == 0) { 498 error = nfs_vinvalbuf(vp, V_SAVE, ap->a_td, 1); 499 if (error) 500 return (error); 501 mtx_lock(&np->n_mtx); 502 np->n_flag |= NNONCACHE; 503 mtx_unlock(&np->n_mtx); 504 } 505 np->n_directio_opens++; 506 } 507 vnode_create_vobject(vp, vattr.va_size, ap->a_td); 508 return (0); 509} 510 511/* 512 * nfs close vnode op 513 * What an NFS client should do upon close after writing is a debatable issue. 514 * Most NFS clients push delayed writes to the server upon close, basically for 515 * two reasons: 516 * 1 - So that any write errors may be reported back to the client process 517 * doing the close system call. By far the two most likely errors are 518 * NFSERR_NOSPC and NFSERR_DQUOT to indicate space allocation failure. 519 * 2 - To put a worst case upper bound on cache inconsistency between 520 * multiple clients for the file. 521 * There is also a consistency problem for Version 2 of the protocol w.r.t. 522 * not being able to tell if other clients are writing a file concurrently, 523 * since there is no way of knowing if the changed modify time in the reply 524 * is only due to the write for this client. 525 * (NFS Version 3 provides weak cache consistency data in the reply that 526 * should be sufficient to detect and handle this case.) 527 * 528 * The current code does the following: 529 * for NFS Version 2 - play it safe and flush/invalidate all dirty buffers 530 * for NFS Version 3 - flush dirty buffers to the server but don't invalidate 531 * or commit them (this satisfies 1 and 2 except for the 532 * case where the server crashes after this close but 533 * before the commit RPC, which is felt to be "good 534 * enough". Changing the last argument to nfs_flush() to 535 * a 1 would force a commit operation, if it is felt a 536 * commit is necessary now. 537 */ 538/* ARGSUSED */ 539static int 540nfs_close(struct vop_close_args *ap) 541{ 542 struct vnode *vp = ap->a_vp; 543 struct nfsnode *np = VTONFS(vp); 544 int error = 0; 545 int fmode = ap->a_fflag; 546 547 if (vp->v_type == VREG) { 548 /* 549 * Examine and clean dirty pages, regardless of NMODIFIED. 550 * This closes a major hole in close-to-open consistency. 551 * We want to push out all dirty pages (and buffers) on 552 * close, regardless of whether they were dirtied by 553 * mmap'ed writes or via write(). 554 */ 555 if (nfs_clean_pages_on_close && vp->v_object) { 556 VM_OBJECT_LOCK(vp->v_object); 557 vm_object_page_clean(vp->v_object, 0, 0, 0); 558 VM_OBJECT_UNLOCK(vp->v_object); 559 } 560 mtx_lock(&np->n_mtx); 561 if (np->n_flag & NMODIFIED) { 562 mtx_unlock(&np->n_mtx); 563 if (NFS_ISV3(vp)) { 564 /* 565 * Under NFSv3 we have dirty buffers to dispose of. We 566 * must flush them to the NFS server. We have the option 567 * of waiting all the way through the commit rpc or just 568 * waiting for the initial write. The default is to only 569 * wait through the initial write so the data is in the 570 * server's cache, which is roughly similar to the state 571 * a standard disk subsystem leaves the file in on close(). 572 * 573 * We cannot clear the NMODIFIED bit in np->n_flag due to 574 * potential races with other processes, and certainly 575 * cannot clear it if we don't commit. 576 */ 577 int cm = nfsv3_commit_on_close ? 1 : 0; 578 error = nfs_flush(vp, MNT_WAIT, ap->a_td, cm); 579 /* np->n_flag &= ~NMODIFIED; */ 580 } else 581 error = nfs_vinvalbuf(vp, V_SAVE, ap->a_td, 1); 582 mtx_lock(&np->n_mtx); 583 } 584 /* 585 * Invalidate the attribute cache in all cases. 586 * An open is going to fetch fresh attrs any way, other procs 587 * on this node that have file open will be forced to do an 588 * otw attr fetch, but this is safe. 589 */ 590 np->n_attrstamp = 0; 591 if (np->n_flag & NWRITEERR) { 592 np->n_flag &= ~NWRITEERR; 593 error = np->n_error; 594 } 595 mtx_unlock(&np->n_mtx); 596 } 597 if (nfs_directio_enable && (fmode & O_DIRECT) && (vp->v_type == VREG)) { 598 mtx_lock(&np->n_mtx); 599 KASSERT((np->n_directio_opens > 0), 600 ("nfs_close: unexpectedly value (0) of n_directio_opens\n")); 601 np->n_directio_opens--; 602 if (np->n_directio_opens == 0) 603 np->n_flag &= ~NNONCACHE; 604 mtx_unlock(&np->n_mtx); 605 } 606 return (error); 607} 608 609/* 610 * nfs getattr call from vfs. 611 */ 612static int 613nfs_getattr(struct vop_getattr_args *ap) 614{ 615 struct vnode *vp = ap->a_vp; 616 struct nfsnode *np = VTONFS(vp); 617 caddr_t bpos, dpos; 618 int error = 0; 619 struct mbuf *mreq, *mrep, *md, *mb; 620 int v3 = NFS_ISV3(vp); 621 622 /* 623 * Update local times for special files. 624 */ 625 mtx_lock(&np->n_mtx); 626 if (np->n_flag & (NACC | NUPD)) 627 np->n_flag |= NCHG; 628 mtx_unlock(&np->n_mtx); 629 /* 630 * First look in the cache. 631 */ 632 if (nfs_getattrcache(vp, ap->a_vap) == 0) 633 goto nfsmout; 634 if (v3 && nfsaccess_cache_timeout > 0) { 635 nfsstats.accesscache_misses++; 636 nfs3_access_otw(vp, NFSV3ACCESS_ALL, ap->a_td, ap->a_cred); 637 if (nfs_getattrcache(vp, ap->a_vap) == 0) 638 goto nfsmout; 639 } 640 nfsstats.rpccnt[NFSPROC_GETATTR]++; 641 mreq = nfsm_reqhead(vp, NFSPROC_GETATTR, NFSX_FH(v3)); 642 mb = mreq; 643 bpos = mtod(mb, caddr_t); 644 nfsm_fhtom(vp, v3); 645 nfsm_request(vp, NFSPROC_GETATTR, ap->a_td, ap->a_cred); 646 if (!error) { 647 nfsm_loadattr(vp, ap->a_vap); 648 } 649 m_freem(mrep); 650nfsmout: 651 return (error); 652} 653 654/* 655 * nfs setattr call. 656 */ 657static int 658nfs_setattr(struct vop_setattr_args *ap) 659{ 660 struct vnode *vp = ap->a_vp; 661 struct nfsnode *np = VTONFS(vp); 662 struct vattr *vap = ap->a_vap; 663 int error = 0; 664 u_quad_t tsize; 665 666#ifndef nolint 667 tsize = (u_quad_t)0; 668#endif 669 670 /* 671 * Setting of flags and marking of atimes are not supported. 672 */ 673 if (vap->va_flags != VNOVAL || (vap->va_vaflags & VA_MARK_ATIME)) 674 return (EOPNOTSUPP); 675 676 /* 677 * Disallow write attempts if the filesystem is mounted read-only. 678 */ 679 if ((vap->va_flags != VNOVAL || vap->va_uid != (uid_t)VNOVAL || 680 vap->va_gid != (gid_t)VNOVAL || vap->va_atime.tv_sec != VNOVAL || 681 vap->va_mtime.tv_sec != VNOVAL || vap->va_mode != (mode_t)VNOVAL) && 682 (vp->v_mount->mnt_flag & MNT_RDONLY)) { 683 error = EROFS; 684 goto out; 685 } 686 if (vap->va_size != VNOVAL) { 687 switch (vp->v_type) { 688 case VDIR: 689 return (EISDIR); 690 case VCHR: 691 case VBLK: 692 case VSOCK: 693 case VFIFO: 694 if (vap->va_mtime.tv_sec == VNOVAL && 695 vap->va_atime.tv_sec == VNOVAL && 696 vap->va_mode == (mode_t)VNOVAL && 697 vap->va_uid == (uid_t)VNOVAL && 698 vap->va_gid == (gid_t)VNOVAL) 699 return (0); 700 vap->va_size = VNOVAL; 701 break; 702 default: 703 /* 704 * Disallow write attempts if the filesystem is 705 * mounted read-only. 706 */ 707 if (vp->v_mount->mnt_flag & MNT_RDONLY) 708 return (EROFS); 709 /* 710 * We run vnode_pager_setsize() early (why?), 711 * we must set np->n_size now to avoid vinvalbuf 712 * V_SAVE races that might setsize a lower 713 * value. 714 */ 715 mtx_lock(&np->n_mtx); 716 tsize = np->n_size; 717 mtx_unlock(&np->n_mtx); 718 error = nfs_meta_setsize(vp, ap->a_cred, 719 ap->a_td, vap->va_size); 720 mtx_lock(&np->n_mtx); 721 if (np->n_flag & NMODIFIED) { 722 tsize = np->n_size; 723 mtx_unlock(&np->n_mtx); 724 if (vap->va_size == 0) 725 error = nfs_vinvalbuf(vp, 0, ap->a_td, 1); 726 else 727 error = nfs_vinvalbuf(vp, V_SAVE, ap->a_td, 1); 728 if (error) { 729 vnode_pager_setsize(vp, tsize); 730 goto out; 731 } 732 } else 733 mtx_unlock(&np->n_mtx); 734 /* 735 * np->n_size has already been set to vap->va_size 736 * in nfs_meta_setsize(). We must set it again since 737 * nfs_loadattrcache() could be called through 738 * nfs_meta_setsize() and could modify np->n_size. 739 */ 740 mtx_lock(&np->n_mtx); 741 np->n_vattr.va_size = np->n_size = vap->va_size; 742 mtx_unlock(&np->n_mtx); 743 }; 744 } else { 745 mtx_lock(&np->n_mtx); 746 if ((vap->va_mtime.tv_sec != VNOVAL || vap->va_atime.tv_sec != VNOVAL) && 747 (np->n_flag & NMODIFIED) && vp->v_type == VREG) { 748 mtx_unlock(&np->n_mtx); 749 if ((error = nfs_vinvalbuf(vp, V_SAVE, ap->a_td, 1)) != 0 && 750 (error == EINTR || error == EIO)) 751 return error; 752 } else 753 mtx_unlock(&np->n_mtx); 754 } 755 error = nfs_setattrrpc(vp, vap, ap->a_cred, ap->a_td); 756 if (error && vap->va_size != VNOVAL) { 757 mtx_lock(&np->n_mtx); 758 np->n_size = np->n_vattr.va_size = tsize; 759 vnode_pager_setsize(vp, tsize); 760 mtx_unlock(&np->n_mtx); 761 } 762out: 763 return (error); 764} 765 766/* 767 * Do an nfs setattr rpc. 768 */ 769static int 770nfs_setattrrpc(struct vnode *vp, struct vattr *vap, struct ucred *cred, 771 struct thread *td) 772{ 773 struct nfsv2_sattr *sp; 774 struct nfsnode *np = VTONFS(vp); 775 caddr_t bpos, dpos; 776 u_int32_t *tl; 777 int error = 0, wccflag = NFSV3_WCCRATTR; 778 struct mbuf *mreq, *mrep, *md, *mb; 779 int v3 = NFS_ISV3(vp); 780 781 nfsstats.rpccnt[NFSPROC_SETATTR]++; 782 mreq = nfsm_reqhead(vp, NFSPROC_SETATTR, NFSX_FH(v3) + NFSX_SATTR(v3)); 783 mb = mreq; 784 bpos = mtod(mb, caddr_t); 785 nfsm_fhtom(vp, v3); 786 if (v3) { 787 nfsm_v3attrbuild(vap, TRUE); 788 tl = nfsm_build(u_int32_t *, NFSX_UNSIGNED); 789 *tl = nfs_false; 790 } else { 791 sp = nfsm_build(struct nfsv2_sattr *, NFSX_V2SATTR); 792 if (vap->va_mode == (mode_t)VNOVAL) 793 sp->sa_mode = nfs_xdrneg1; 794 else 795 sp->sa_mode = vtonfsv2_mode(vp->v_type, vap->va_mode); 796 if (vap->va_uid == (uid_t)VNOVAL) 797 sp->sa_uid = nfs_xdrneg1; 798 else 799 sp->sa_uid = txdr_unsigned(vap->va_uid); 800 if (vap->va_gid == (gid_t)VNOVAL) 801 sp->sa_gid = nfs_xdrneg1; 802 else 803 sp->sa_gid = txdr_unsigned(vap->va_gid); 804 sp->sa_size = txdr_unsigned(vap->va_size); 805 txdr_nfsv2time(&vap->va_atime, &sp->sa_atime); 806 txdr_nfsv2time(&vap->va_mtime, &sp->sa_mtime); 807 } 808 nfsm_request(vp, NFSPROC_SETATTR, td, cred); 809 if (v3) { 810 np->n_modestamp = 0; 811 nfsm_wcc_data(vp, wccflag); 812 } else 813 nfsm_loadattr(vp, NULL); 814 m_freem(mrep); 815nfsmout: 816 return (error); 817} 818 819/* 820 * nfs lookup call, one step at a time... 821 * First look in cache 822 * If not found, unlock the directory nfsnode and do the rpc 823 */ 824static int 825nfs_lookup(struct vop_lookup_args *ap) 826{ 827 struct componentname *cnp = ap->a_cnp; 828 struct vnode *dvp = ap->a_dvp; 829 struct vnode **vpp = ap->a_vpp; 830 int flags = cnp->cn_flags; 831 struct vnode *newvp; 832 struct nfsmount *nmp; 833 caddr_t bpos, dpos; 834 struct mbuf *mreq, *mrep, *md, *mb; 835 long len; 836 nfsfh_t *fhp; 837 struct nfsnode *np; 838 int error = 0, attrflag, fhsize; 839 int v3 = NFS_ISV3(dvp); 840 struct thread *td = cnp->cn_thread; 841 842 *vpp = NULLVP; 843 if ((flags & ISLASTCN) && (dvp->v_mount->mnt_flag & MNT_RDONLY) && 844 (cnp->cn_nameiop == DELETE || cnp->cn_nameiop == RENAME)) 845 return (EROFS); 846 if (dvp->v_type != VDIR) 847 return (ENOTDIR); 848 nmp = VFSTONFS(dvp->v_mount); 849 np = VTONFS(dvp); 850 if ((error = VOP_ACCESS(dvp, VEXEC, cnp->cn_cred, td)) != 0) { 851 *vpp = NULLVP; 852 return (error); 853 } 854 if ((error = cache_lookup(dvp, vpp, cnp)) && error != ENOENT) { 855 struct vattr vattr; 856 857 newvp = *vpp; 858 if (!VOP_GETATTR(newvp, &vattr, cnp->cn_cred, td) 859 && vattr.va_ctime.tv_sec == VTONFS(newvp)->n_ctime) { 860 nfsstats.lookupcache_hits++; 861 if (cnp->cn_nameiop != LOOKUP && 862 (flags & ISLASTCN)) 863 cnp->cn_flags |= SAVENAME; 864 return (0); 865 } 866 cache_purge(newvp); 867 if (dvp != newvp) 868 vput(newvp); 869 else 870 vrele(newvp); 871 *vpp = NULLVP; 872 } 873 error = 0; 874 newvp = NULLVP; 875 nfsstats.lookupcache_misses++; 876 nfsstats.rpccnt[NFSPROC_LOOKUP]++; 877 len = cnp->cn_namelen; 878 mreq = nfsm_reqhead(dvp, NFSPROC_LOOKUP, 879 NFSX_FH(v3) + NFSX_UNSIGNED + nfsm_rndup(len)); 880 mb = mreq; 881 bpos = mtod(mb, caddr_t); 882 nfsm_fhtom(dvp, v3); 883 nfsm_strtom(cnp->cn_nameptr, len, NFS_MAXNAMLEN); 884 nfsm_request(dvp, NFSPROC_LOOKUP, cnp->cn_thread, cnp->cn_cred); 885 if (error) { 886 if (v3) { 887 nfsm_postop_attr(dvp, attrflag); 888 m_freem(mrep); 889 } 890 goto nfsmout; 891 } 892 nfsm_getfh(fhp, fhsize, v3); 893 894 /* 895 * Handle RENAME case... 896 */ 897 if (cnp->cn_nameiop == RENAME && (flags & ISLASTCN)) { 898 if (NFS_CMPFH(np, fhp, fhsize)) { 899 m_freem(mrep); 900 return (EISDIR); 901 } 902 error = nfs_nget(dvp->v_mount, fhp, fhsize, &np, LK_EXCLUSIVE); 903 if (error) { 904 m_freem(mrep); 905 return (error); 906 } 907 newvp = NFSTOV(np); 908 if (v3) { 909 nfsm_postop_attr(newvp, attrflag); 910 nfsm_postop_attr(dvp, attrflag); 911 } else 912 nfsm_loadattr(newvp, NULL); 913 *vpp = newvp; 914 m_freem(mrep); 915 cnp->cn_flags |= SAVENAME; 916 return (0); 917 } 918 919 if (flags & ISDOTDOT) { 920 VOP_UNLOCK(dvp, 0, td); 921 error = nfs_nget(dvp->v_mount, fhp, fhsize, &np, cnp->cn_lkflags); 922 vn_lock(dvp, LK_EXCLUSIVE | LK_RETRY, td); 923 if (error) 924 return (error); 925 newvp = NFSTOV(np); 926 } else if (NFS_CMPFH(np, fhp, fhsize)) { 927 VREF(dvp); 928 newvp = dvp; 929 } else { 930 error = nfs_nget(dvp->v_mount, fhp, fhsize, &np, cnp->cn_lkflags); 931 if (error) { 932 m_freem(mrep); 933 return (error); 934 } 935 newvp = NFSTOV(np); 936 } 937 if (v3) { 938 nfsm_postop_attr(newvp, attrflag); 939 nfsm_postop_attr(dvp, attrflag); 940 } else 941 nfsm_loadattr(newvp, NULL); 942 if (cnp->cn_nameiop != LOOKUP && (flags & ISLASTCN)) 943 cnp->cn_flags |= SAVENAME; 944 if ((cnp->cn_flags & MAKEENTRY) && 945 (cnp->cn_nameiop != DELETE || !(flags & ISLASTCN))) { 946 np->n_ctime = np->n_vattr.va_ctime.tv_sec; 947 cache_enter(dvp, newvp, cnp); 948 } 949 *vpp = newvp; 950 m_freem(mrep); 951nfsmout: 952 if (error) { 953 if (newvp != NULLVP) { 954 vput(newvp); 955 *vpp = NULLVP; 956 } 957 if ((cnp->cn_nameiop == CREATE || cnp->cn_nameiop == RENAME) && 958 (flags & ISLASTCN) && error == ENOENT) { 959 if (dvp->v_mount->mnt_flag & MNT_RDONLY) 960 error = EROFS; 961 else 962 error = EJUSTRETURN; 963 } 964 if (cnp->cn_nameiop != LOOKUP && (flags & ISLASTCN)) 965 cnp->cn_flags |= SAVENAME; 966 } 967 return (error); 968} 969 970/* 971 * nfs read call. 972 * Just call nfs_bioread() to do the work. 973 */ 974static int 975nfs_read(struct vop_read_args *ap) 976{ 977 struct vnode *vp = ap->a_vp; 978 979 switch (vp->v_type) { 980 case VREG: 981 return (nfs_bioread(vp, ap->a_uio, ap->a_ioflag, ap->a_cred)); 982 case VDIR: 983 return (EISDIR); 984 default: 985 return (EOPNOTSUPP); 986 } 987} 988 989/* 990 * nfs readlink call 991 */ 992static int 993nfs_readlink(struct vop_readlink_args *ap) 994{ 995 struct vnode *vp = ap->a_vp; 996 997 if (vp->v_type != VLNK) 998 return (EINVAL); 999 return (nfs_bioread(vp, ap->a_uio, 0, ap->a_cred)); 1000} 1001 1002/* 1003 * Do a readlink rpc. 1004 * Called by nfs_doio() from below the buffer cache. 1005 */ 1006int 1007nfs_readlinkrpc(struct vnode *vp, struct uio *uiop, struct ucred *cred) 1008{ 1009 caddr_t bpos, dpos; 1010 int error = 0, len, attrflag; 1011 struct mbuf *mreq, *mrep, *md, *mb; 1012 int v3 = NFS_ISV3(vp); 1013 1014 nfsstats.rpccnt[NFSPROC_READLINK]++; 1015 mreq = nfsm_reqhead(vp, NFSPROC_READLINK, NFSX_FH(v3)); 1016 mb = mreq; 1017 bpos = mtod(mb, caddr_t); 1018 nfsm_fhtom(vp, v3); 1019 nfsm_request(vp, NFSPROC_READLINK, uiop->uio_td, cred); 1020 if (v3) 1021 nfsm_postop_attr(vp, attrflag); 1022 if (!error) { 1023 nfsm_strsiz(len, NFS_MAXPATHLEN); 1024 if (len == NFS_MAXPATHLEN) { 1025 struct nfsnode *np = VTONFS(vp); 1026 mtx_lock(&np->n_mtx); 1027 if (np->n_size && np->n_size < NFS_MAXPATHLEN) 1028 len = np->n_size; 1029 mtx_unlock(&np->n_mtx); 1030 } 1031 nfsm_mtouio(uiop, len); 1032 } 1033 m_freem(mrep); 1034nfsmout: 1035 return (error); 1036} 1037 1038/* 1039 * nfs read rpc call 1040 * Ditto above 1041 */ 1042int 1043nfs_readrpc(struct vnode *vp, struct uio *uiop, struct ucred *cred) 1044{ 1045 u_int32_t *tl; 1046 caddr_t bpos, dpos; 1047 struct mbuf *mreq, *mrep, *md, *mb; 1048 struct nfsmount *nmp; 1049 int error = 0, len, retlen, tsiz, eof, attrflag; 1050 int v3 = NFS_ISV3(vp); 1051 int rsize; 1052 1053#ifndef nolint 1054 eof = 0; 1055#endif 1056 nmp = VFSTONFS(vp->v_mount); 1057 tsiz = uiop->uio_resid; 1058 mtx_lock(&nmp->nm_mtx); 1059 if (uiop->uio_offset + tsiz > nmp->nm_maxfilesize) { 1060 mtx_unlock(&nmp->nm_mtx); 1061 return (EFBIG); 1062 } 1063 rsize = nmp->nm_rsize; 1064 mtx_unlock(&nmp->nm_mtx); 1065 while (tsiz > 0) { 1066 nfsstats.rpccnt[NFSPROC_READ]++; 1067 len = (tsiz > rsize) ? rsize : tsiz; 1068 mreq = nfsm_reqhead(vp, NFSPROC_READ, NFSX_FH(v3) + NFSX_UNSIGNED * 3); 1069 mb = mreq; 1070 bpos = mtod(mb, caddr_t); 1071 nfsm_fhtom(vp, v3); 1072 tl = nfsm_build(u_int32_t *, NFSX_UNSIGNED * 3); 1073 if (v3) { 1074 txdr_hyper(uiop->uio_offset, tl); 1075 *(tl + 2) = txdr_unsigned(len); 1076 } else { 1077 *tl++ = txdr_unsigned(uiop->uio_offset); 1078 *tl++ = txdr_unsigned(len); 1079 *tl = 0; 1080 } 1081 nfsm_request(vp, NFSPROC_READ, uiop->uio_td, cred); 1082 if (v3) { 1083 nfsm_postop_attr(vp, attrflag); 1084 if (error) { 1085 m_freem(mrep); 1086 goto nfsmout; 1087 } 1088 tl = nfsm_dissect(u_int32_t *, 2 * NFSX_UNSIGNED); 1089 eof = fxdr_unsigned(int, *(tl + 1)); 1090 } else { 1091 nfsm_loadattr(vp, NULL); 1092 } 1093 nfsm_strsiz(retlen, rsize); 1094 nfsm_mtouio(uiop, retlen); 1095 m_freem(mrep); 1096 tsiz -= retlen; 1097 if (v3) { 1098 if (eof || retlen == 0) { 1099 tsiz = 0; 1100 } 1101 } else if (retlen < len) { 1102 tsiz = 0; 1103 } 1104 } 1105nfsmout: 1106 return (error); 1107} 1108 1109/* 1110 * nfs write call 1111 */ 1112int 1113nfs_writerpc(struct vnode *vp, struct uio *uiop, struct ucred *cred, 1114 int *iomode, int *must_commit) 1115{ 1116 u_int32_t *tl; 1117 int32_t backup; 1118 caddr_t bpos, dpos; 1119 struct mbuf *mreq, *mrep, *md, *mb; 1120 struct nfsmount *nmp = VFSTONFS(vp->v_mount); 1121 int error = 0, len, tsiz, wccflag = NFSV3_WCCRATTR, rlen, commit; 1122 int v3 = NFS_ISV3(vp), committed = NFSV3WRITE_FILESYNC; 1123 int wsize; 1124 1125#ifndef DIAGNOSTIC 1126 if (uiop->uio_iovcnt != 1) 1127 panic("nfs: writerpc iovcnt > 1"); 1128#endif 1129 *must_commit = 0; 1130 tsiz = uiop->uio_resid; 1131 mtx_lock(&nmp->nm_mtx); 1132 if (uiop->uio_offset + tsiz > nmp->nm_maxfilesize) { 1133 mtx_unlock(&nmp->nm_mtx); 1134 return (EFBIG); 1135 } 1136 wsize = nmp->nm_wsize; 1137 mtx_unlock(&nmp->nm_mtx); 1138 while (tsiz > 0) { 1139 nfsstats.rpccnt[NFSPROC_WRITE]++; 1140 len = (tsiz > wsize) ? wsize : tsiz; 1141 mreq = nfsm_reqhead(vp, NFSPROC_WRITE, 1142 NFSX_FH(v3) + 5 * NFSX_UNSIGNED + nfsm_rndup(len)); 1143 mb = mreq; 1144 bpos = mtod(mb, caddr_t); 1145 nfsm_fhtom(vp, v3); 1146 if (v3) { 1147 tl = nfsm_build(u_int32_t *, 5 * NFSX_UNSIGNED); 1148 txdr_hyper(uiop->uio_offset, tl); 1149 tl += 2; 1150 *tl++ = txdr_unsigned(len); 1151 *tl++ = txdr_unsigned(*iomode); 1152 *tl = txdr_unsigned(len); 1153 } else { 1154 u_int32_t x; 1155 1156 tl = nfsm_build(u_int32_t *, 4 * NFSX_UNSIGNED); 1157 /* Set both "begin" and "current" to non-garbage. */ 1158 x = txdr_unsigned((u_int32_t)uiop->uio_offset); 1159 *tl++ = x; /* "begin offset" */ 1160 *tl++ = x; /* "current offset" */ 1161 x = txdr_unsigned(len); 1162 *tl++ = x; /* total to this offset */ 1163 *tl = x; /* size of this write */ 1164 } 1165 nfsm_uiotom(uiop, len); 1166 nfsm_request(vp, NFSPROC_WRITE, uiop->uio_td, cred); 1167 if (v3) { 1168 wccflag = NFSV3_WCCCHK; 1169 nfsm_wcc_data(vp, wccflag); 1170 if (!error) { 1171 tl = nfsm_dissect(u_int32_t *, 2 * NFSX_UNSIGNED 1172 + NFSX_V3WRITEVERF); 1173 rlen = fxdr_unsigned(int, *tl++); 1174 if (rlen == 0) { 1175 error = NFSERR_IO; 1176 m_freem(mrep); 1177 break; 1178 } else if (rlen < len) { 1179 backup = len - rlen; 1180 uiop->uio_iov->iov_base = 1181 (char *)uiop->uio_iov->iov_base - 1182 backup; 1183 uiop->uio_iov->iov_len += backup; 1184 uiop->uio_offset -= backup; 1185 uiop->uio_resid += backup; 1186 len = rlen; 1187 } 1188 commit = fxdr_unsigned(int, *tl++); 1189 1190 /* 1191 * Return the lowest committment level 1192 * obtained by any of the RPCs. 1193 */ 1194 if (committed == NFSV3WRITE_FILESYNC) 1195 committed = commit; 1196 else if (committed == NFSV3WRITE_DATASYNC && 1197 commit == NFSV3WRITE_UNSTABLE) 1198 committed = commit; 1199 mtx_lock(&nmp->nm_mtx); 1200 if ((nmp->nm_state & NFSSTA_HASWRITEVERF) == 0){ 1201 bcopy((caddr_t)tl, (caddr_t)nmp->nm_verf, 1202 NFSX_V3WRITEVERF); 1203 nmp->nm_state |= NFSSTA_HASWRITEVERF; 1204 } else if (bcmp((caddr_t)tl, 1205 (caddr_t)nmp->nm_verf, NFSX_V3WRITEVERF)) { 1206 *must_commit = 1; 1207 bcopy((caddr_t)tl, (caddr_t)nmp->nm_verf, 1208 NFSX_V3WRITEVERF); 1209 } 1210 mtx_unlock(&nmp->nm_mtx); 1211 } 1212 } else { 1213 nfsm_loadattr(vp, NULL); 1214 } 1215 if (wccflag) { 1216 mtx_lock(&(VTONFS(vp))->n_mtx); 1217 VTONFS(vp)->n_mtime = VTONFS(vp)->n_vattr.va_mtime; 1218 mtx_unlock(&(VTONFS(vp))->n_mtx); 1219 } 1220 m_freem(mrep); 1221 if (error) 1222 break; 1223 tsiz -= len; 1224 } 1225nfsmout: 1226 if (vp->v_mount->mnt_kern_flag & MNTK_ASYNC) 1227 committed = NFSV3WRITE_FILESYNC; 1228 *iomode = committed; 1229 if (error) 1230 uiop->uio_resid = tsiz; 1231 return (error); 1232} 1233 1234/* 1235 * nfs mknod rpc 1236 * For NFS v2 this is a kludge. Use a create rpc but with the IFMT bits of the 1237 * mode set to specify the file type and the size field for rdev. 1238 */ 1239static int 1240nfs_mknodrpc(struct vnode *dvp, struct vnode **vpp, struct componentname *cnp, 1241 struct vattr *vap) 1242{ 1243 struct nfsv2_sattr *sp; 1244 u_int32_t *tl; 1245 struct vnode *newvp = NULL; 1246 struct nfsnode *np = NULL; 1247 struct vattr vattr; 1248 caddr_t bpos, dpos; 1249 int error = 0, wccflag = NFSV3_WCCRATTR, gotvp = 0; 1250 struct mbuf *mreq, *mrep, *md, *mb; 1251 u_int32_t rdev; 1252 int v3 = NFS_ISV3(dvp); 1253 1254 if (vap->va_type == VCHR || vap->va_type == VBLK) 1255 rdev = txdr_unsigned(vap->va_rdev); 1256 else if (vap->va_type == VFIFO || vap->va_type == VSOCK) 1257 rdev = nfs_xdrneg1; 1258 else { 1259 return (EOPNOTSUPP); 1260 } 1261 if ((error = VOP_GETATTR(dvp, &vattr, cnp->cn_cred, cnp->cn_thread)) != 0) { 1262 return (error); 1263 } 1264 nfsstats.rpccnt[NFSPROC_MKNOD]++; 1265 mreq = nfsm_reqhead(dvp, NFSPROC_MKNOD, NFSX_FH(v3) + 4 * NFSX_UNSIGNED + 1266 + nfsm_rndup(cnp->cn_namelen) + NFSX_SATTR(v3)); 1267 mb = mreq; 1268 bpos = mtod(mb, caddr_t); 1269 nfsm_fhtom(dvp, v3); 1270 nfsm_strtom(cnp->cn_nameptr, cnp->cn_namelen, NFS_MAXNAMLEN); 1271 if (v3) { 1272 tl = nfsm_build(u_int32_t *, NFSX_UNSIGNED); 1273 *tl++ = vtonfsv3_type(vap->va_type); 1274 nfsm_v3attrbuild(vap, FALSE); 1275 if (vap->va_type == VCHR || vap->va_type == VBLK) { 1276 tl = nfsm_build(u_int32_t *, 2 * NFSX_UNSIGNED); 1277 *tl++ = txdr_unsigned(umajor(vap->va_rdev)); 1278 *tl = txdr_unsigned(uminor(vap->va_rdev)); 1279 } 1280 } else { 1281 sp = nfsm_build(struct nfsv2_sattr *, NFSX_V2SATTR); 1282 sp->sa_mode = vtonfsv2_mode(vap->va_type, vap->va_mode); 1283 sp->sa_uid = nfs_xdrneg1; 1284 sp->sa_gid = nfs_xdrneg1; 1285 sp->sa_size = rdev; 1286 txdr_nfsv2time(&vap->va_atime, &sp->sa_atime); 1287 txdr_nfsv2time(&vap->va_mtime, &sp->sa_mtime); 1288 } 1289 nfsm_request(dvp, NFSPROC_MKNOD, cnp->cn_thread, cnp->cn_cred); 1290 if (!error) { 1291 nfsm_mtofh(dvp, newvp, v3, gotvp); 1292 if (!gotvp) { 1293 if (newvp) { 1294 vput(newvp); 1295 newvp = NULL; 1296 } 1297 error = nfs_lookitup(dvp, cnp->cn_nameptr, 1298 cnp->cn_namelen, cnp->cn_cred, cnp->cn_thread, &np); 1299 if (!error) 1300 newvp = NFSTOV(np); 1301 } 1302 } 1303 if (v3) 1304 nfsm_wcc_data(dvp, wccflag); 1305 m_freem(mrep); 1306nfsmout: 1307 if (error) { 1308 if (newvp) 1309 vput(newvp); 1310 } else { 1311 if (cnp->cn_flags & MAKEENTRY) 1312 cache_enter(dvp, newvp, cnp); 1313 *vpp = newvp; 1314 } 1315 mtx_lock(&(VTONFS(dvp))->n_mtx); 1316 VTONFS(dvp)->n_flag |= NMODIFIED; 1317 if (!wccflag) 1318 VTONFS(dvp)->n_attrstamp = 0; 1319 mtx_unlock(&(VTONFS(dvp))->n_mtx); 1320 return (error); 1321} 1322 1323/* 1324 * nfs mknod vop 1325 * just call nfs_mknodrpc() to do the work. 1326 */ 1327/* ARGSUSED */ 1328static int 1329nfs_mknod(struct vop_mknod_args *ap) 1330{ 1331 return (nfs_mknodrpc(ap->a_dvp, ap->a_vpp, ap->a_cnp, ap->a_vap)); 1332} 1333 1334static u_long create_verf; 1335/* 1336 * nfs file create call 1337 */ 1338static int 1339nfs_create(struct vop_create_args *ap) 1340{ 1341 struct vnode *dvp = ap->a_dvp; 1342 struct vattr *vap = ap->a_vap; 1343 struct componentname *cnp = ap->a_cnp; 1344 struct nfsv2_sattr *sp; 1345 u_int32_t *tl; 1346 struct nfsnode *np = NULL; 1347 struct vnode *newvp = NULL; 1348 caddr_t bpos, dpos; 1349 int error = 0, wccflag = NFSV3_WCCRATTR, gotvp = 0, fmode = 0; 1350 struct mbuf *mreq, *mrep, *md, *mb; 1351 struct vattr vattr; 1352 int v3 = NFS_ISV3(dvp); 1353 1354 /* 1355 * Oops, not for me.. 1356 */ 1357 if (vap->va_type == VSOCK) 1358 return (nfs_mknodrpc(dvp, ap->a_vpp, cnp, vap)); 1359 1360 if ((error = VOP_GETATTR(dvp, &vattr, cnp->cn_cred, cnp->cn_thread)) != 0) { 1361 return (error); 1362 } 1363 if (vap->va_vaflags & VA_EXCLUSIVE) 1364 fmode |= O_EXCL; 1365again: 1366 nfsstats.rpccnt[NFSPROC_CREATE]++; 1367 mreq = nfsm_reqhead(dvp, NFSPROC_CREATE, NFSX_FH(v3) + 2 * NFSX_UNSIGNED + 1368 nfsm_rndup(cnp->cn_namelen) + NFSX_SATTR(v3)); 1369 mb = mreq; 1370 bpos = mtod(mb, caddr_t); 1371 nfsm_fhtom(dvp, v3); 1372 nfsm_strtom(cnp->cn_nameptr, cnp->cn_namelen, NFS_MAXNAMLEN); 1373 if (v3) { 1374 tl = nfsm_build(u_int32_t *, NFSX_UNSIGNED); 1375 if (fmode & O_EXCL) { 1376 *tl = txdr_unsigned(NFSV3CREATE_EXCLUSIVE); 1377 tl = nfsm_build(u_int32_t *, NFSX_V3CREATEVERF); 1378#ifdef INET 1379 if (!TAILQ_EMPTY(&in_ifaddrhead)) 1380 *tl++ = IA_SIN(TAILQ_FIRST(&in_ifaddrhead))->sin_addr.s_addr; 1381 else 1382#endif 1383 *tl++ = create_verf; 1384 *tl = ++create_verf; 1385 } else { 1386 *tl = txdr_unsigned(NFSV3CREATE_UNCHECKED); 1387 nfsm_v3attrbuild(vap, FALSE); 1388 } 1389 } else { 1390 sp = nfsm_build(struct nfsv2_sattr *, NFSX_V2SATTR); 1391 sp->sa_mode = vtonfsv2_mode(vap->va_type, vap->va_mode); 1392 sp->sa_uid = nfs_xdrneg1; 1393 sp->sa_gid = nfs_xdrneg1; 1394 sp->sa_size = 0; 1395 txdr_nfsv2time(&vap->va_atime, &sp->sa_atime); 1396 txdr_nfsv2time(&vap->va_mtime, &sp->sa_mtime); 1397 } 1398 nfsm_request(dvp, NFSPROC_CREATE, cnp->cn_thread, cnp->cn_cred); 1399 if (!error) { 1400 nfsm_mtofh(dvp, newvp, v3, gotvp); 1401 if (!gotvp) { 1402 if (newvp) { 1403 vput(newvp); 1404 newvp = NULL; 1405 } 1406 error = nfs_lookitup(dvp, cnp->cn_nameptr, 1407 cnp->cn_namelen, cnp->cn_cred, cnp->cn_thread, &np); 1408 if (!error) 1409 newvp = NFSTOV(np); 1410 } 1411 } 1412 if (v3) 1413 nfsm_wcc_data(dvp, wccflag); 1414 m_freem(mrep); 1415nfsmout: 1416 if (error) { 1417 if (v3 && (fmode & O_EXCL) && error == NFSERR_NOTSUPP) { 1418 fmode &= ~O_EXCL; 1419 goto again; 1420 } 1421 if (newvp) 1422 vput(newvp); 1423 } else if (v3 && (fmode & O_EXCL)) { 1424 /* 1425 * We are normally called with only a partially initialized 1426 * VAP. Since the NFSv3 spec says that server may use the 1427 * file attributes to store the verifier, the spec requires 1428 * us to do a SETATTR RPC. FreeBSD servers store the verifier 1429 * in atime, but we can't really assume that all servers will 1430 * so we ensure that our SETATTR sets both atime and mtime. 1431 */ 1432 if (vap->va_mtime.tv_sec == VNOVAL) 1433 vfs_timestamp(&vap->va_mtime); 1434 if (vap->va_atime.tv_sec == VNOVAL) 1435 vap->va_atime = vap->va_mtime; 1436 error = nfs_setattrrpc(newvp, vap, cnp->cn_cred, cnp->cn_thread); 1437 if (error) 1438 vput(newvp); 1439 } 1440 if (!error) { 1441 if (cnp->cn_flags & MAKEENTRY) 1442 cache_enter(dvp, newvp, cnp); 1443 *ap->a_vpp = newvp; 1444 } 1445 mtx_lock(&(VTONFS(dvp))->n_mtx); 1446 VTONFS(dvp)->n_flag |= NMODIFIED; 1447 if (!wccflag) 1448 VTONFS(dvp)->n_attrstamp = 0; 1449 mtx_unlock(&(VTONFS(dvp))->n_mtx); 1450 return (error); 1451} 1452 1453/* 1454 * nfs file remove call 1455 * To try and make nfs semantics closer to ufs semantics, a file that has 1456 * other processes using the vnode is renamed instead of removed and then 1457 * removed later on the last close. 1458 * - If v_usecount > 1 1459 * If a rename is not already in the works 1460 * call nfs_sillyrename() to set it up 1461 * else 1462 * do the remove rpc 1463 */ 1464static int 1465nfs_remove(struct vop_remove_args *ap) 1466{ 1467 struct vnode *vp = ap->a_vp; 1468 struct vnode *dvp = ap->a_dvp; 1469 struct componentname *cnp = ap->a_cnp; 1470 struct nfsnode *np = VTONFS(vp); 1471 int error = 0; 1472 struct vattr vattr; 1473 1474#ifndef DIAGNOSTIC 1475 if ((cnp->cn_flags & HASBUF) == 0) 1476 panic("nfs_remove: no name"); 1477 if (vrefcnt(vp) < 1) 1478 panic("nfs_remove: bad v_usecount"); 1479#endif 1480 if (vp->v_type == VDIR) 1481 error = EPERM; 1482 else if (vrefcnt(vp) == 1 || (np->n_sillyrename && 1483 VOP_GETATTR(vp, &vattr, cnp->cn_cred, cnp->cn_thread) == 0 && 1484 vattr.va_nlink > 1)) { 1485 /* 1486 * Purge the name cache so that the chance of a lookup for 1487 * the name succeeding while the remove is in progress is 1488 * minimized. Without node locking it can still happen, such 1489 * that an I/O op returns ESTALE, but since you get this if 1490 * another host removes the file.. 1491 */ 1492 cache_purge(vp); 1493 /* 1494 * throw away biocache buffers, mainly to avoid 1495 * unnecessary delayed writes later. 1496 */ 1497 error = nfs_vinvalbuf(vp, 0, cnp->cn_thread, 1); 1498 /* Do the rpc */ 1499 if (error != EINTR && error != EIO) 1500 error = nfs_removerpc(dvp, cnp->cn_nameptr, 1501 cnp->cn_namelen, cnp->cn_cred, cnp->cn_thread); 1502 /* 1503 * Kludge City: If the first reply to the remove rpc is lost.. 1504 * the reply to the retransmitted request will be ENOENT 1505 * since the file was in fact removed 1506 * Therefore, we cheat and return success. 1507 */ 1508 if (error == ENOENT) 1509 error = 0; 1510 } else if (!np->n_sillyrename) 1511 error = nfs_sillyrename(dvp, vp, cnp); 1512 np->n_attrstamp = 0; 1513 return (error); 1514} 1515 1516/* 1517 * nfs file remove rpc called from nfs_inactive 1518 */ 1519int 1520nfs_removeit(struct sillyrename *sp) 1521{ 1522 /* 1523 * Make sure that the directory vnode is still valid. 1524 * XXX we should lock sp->s_dvp here. 1525 */ 1526 if (sp->s_dvp->v_type == VBAD) 1527 return (0); 1528 return (nfs_removerpc(sp->s_dvp, sp->s_name, sp->s_namlen, sp->s_cred, 1529 NULL)); 1530} 1531 1532/* 1533 * Nfs remove rpc, called from nfs_remove() and nfs_removeit(). 1534 */ 1535static int 1536nfs_removerpc(struct vnode *dvp, const char *name, int namelen, 1537 struct ucred *cred, struct thread *td) 1538{ 1539 caddr_t bpos, dpos; 1540 int error = 0, wccflag = NFSV3_WCCRATTR; 1541 struct mbuf *mreq, *mrep, *md, *mb; 1542 int v3 = NFS_ISV3(dvp); 1543 1544 nfsstats.rpccnt[NFSPROC_REMOVE]++; 1545 mreq = nfsm_reqhead(dvp, NFSPROC_REMOVE, 1546 NFSX_FH(v3) + NFSX_UNSIGNED + nfsm_rndup(namelen)); 1547 mb = mreq; 1548 bpos = mtod(mb, caddr_t); 1549 nfsm_fhtom(dvp, v3); 1550 nfsm_strtom(name, namelen, NFS_MAXNAMLEN); 1551 nfsm_request(dvp, NFSPROC_REMOVE, td, cred); 1552 if (v3) 1553 nfsm_wcc_data(dvp, wccflag); 1554 m_freem(mrep); 1555nfsmout: 1556 mtx_lock(&(VTONFS(dvp))->n_mtx); 1557 VTONFS(dvp)->n_flag |= NMODIFIED; 1558 if (!wccflag) 1559 VTONFS(dvp)->n_attrstamp = 0; 1560 mtx_unlock(&(VTONFS(dvp))->n_mtx); 1561 return (error); 1562} 1563 1564/* 1565 * nfs file rename call 1566 */ 1567static int 1568nfs_rename(struct vop_rename_args *ap) 1569{ 1570 struct vnode *fvp = ap->a_fvp; 1571 struct vnode *tvp = ap->a_tvp; 1572 struct vnode *fdvp = ap->a_fdvp; 1573 struct vnode *tdvp = ap->a_tdvp; 1574 struct componentname *tcnp = ap->a_tcnp; 1575 struct componentname *fcnp = ap->a_fcnp; 1576 int error; 1577 1578#ifndef DIAGNOSTIC 1579 if ((tcnp->cn_flags & HASBUF) == 0 || 1580 (fcnp->cn_flags & HASBUF) == 0) 1581 panic("nfs_rename: no name"); 1582#endif 1583 /* Check for cross-device rename */ 1584 if ((fvp->v_mount != tdvp->v_mount) || 1585 (tvp && (fvp->v_mount != tvp->v_mount))) { 1586 error = EXDEV; 1587 goto out; 1588 } 1589 1590 if (fvp == tvp) { 1591 nfs_printf("nfs_rename: fvp == tvp (can't happen)\n"); 1592 error = 0; 1593 goto out; 1594 } 1595 if ((error = vn_lock(fvp, LK_EXCLUSIVE, fcnp->cn_thread)) != 0) 1596 goto out; 1597 1598 /* 1599 * We have to flush B_DELWRI data prior to renaming 1600 * the file. If we don't, the delayed-write buffers 1601 * can be flushed out later after the file has gone stale 1602 * under NFSV3. NFSV2 does not have this problem because 1603 * ( as far as I can tell ) it flushes dirty buffers more 1604 * often. 1605 * 1606 * Skip the rename operation if the fsync fails, this can happen 1607 * due to the server's volume being full, when we pushed out data 1608 * that was written back to our cache earlier. Not checking for 1609 * this condition can result in potential (silent) data loss. 1610 */ 1611 error = VOP_FSYNC(fvp, MNT_WAIT, fcnp->cn_thread); 1612 VOP_UNLOCK(fvp, 0, fcnp->cn_thread); 1613 if (!error && tvp) 1614 error = VOP_FSYNC(tvp, MNT_WAIT, tcnp->cn_thread); 1615 if (error) 1616 goto out; 1617 1618 /* 1619 * If the tvp exists and is in use, sillyrename it before doing the 1620 * rename of the new file over it. 1621 * XXX Can't sillyrename a directory. 1622 */ 1623 if (tvp && vrefcnt(tvp) > 1 && !VTONFS(tvp)->n_sillyrename && 1624 tvp->v_type != VDIR && !nfs_sillyrename(tdvp, tvp, tcnp)) { 1625 vput(tvp); 1626 tvp = NULL; 1627 } 1628 1629 error = nfs_renamerpc(fdvp, fcnp->cn_nameptr, fcnp->cn_namelen, 1630 tdvp, tcnp->cn_nameptr, tcnp->cn_namelen, tcnp->cn_cred, 1631 tcnp->cn_thread); 1632 1633 if (fvp->v_type == VDIR) { 1634 if (tvp != NULL && tvp->v_type == VDIR) 1635 cache_purge(tdvp); 1636 cache_purge(fdvp); 1637 } 1638 1639out: 1640 if (tdvp == tvp) 1641 vrele(tdvp); 1642 else 1643 vput(tdvp); 1644 if (tvp) 1645 vput(tvp); 1646 vrele(fdvp); 1647 vrele(fvp); 1648 /* 1649 * Kludge: Map ENOENT => 0 assuming that it is a reply to a retry. 1650 */ 1651 if (error == ENOENT) 1652 error = 0; 1653 return (error); 1654} 1655 1656/* 1657 * nfs file rename rpc called from nfs_remove() above 1658 */ 1659static int 1660nfs_renameit(struct vnode *sdvp, struct componentname *scnp, 1661 struct sillyrename *sp) 1662{ 1663 1664 return (nfs_renamerpc(sdvp, scnp->cn_nameptr, scnp->cn_namelen, sdvp, 1665 sp->s_name, sp->s_namlen, scnp->cn_cred, scnp->cn_thread)); 1666} 1667 1668/* 1669 * Do an nfs rename rpc. Called from nfs_rename() and nfs_renameit(). 1670 */ 1671static int 1672nfs_renamerpc(struct vnode *fdvp, const char *fnameptr, int fnamelen, 1673 struct vnode *tdvp, const char *tnameptr, int tnamelen, struct ucred *cred, 1674 struct thread *td) 1675{ 1676 caddr_t bpos, dpos; 1677 int error = 0, fwccflag = NFSV3_WCCRATTR, twccflag = NFSV3_WCCRATTR; 1678 struct mbuf *mreq, *mrep, *md, *mb; 1679 int v3 = NFS_ISV3(fdvp); 1680 1681 nfsstats.rpccnt[NFSPROC_RENAME]++; 1682 mreq = nfsm_reqhead(fdvp, NFSPROC_RENAME, 1683 (NFSX_FH(v3) + NFSX_UNSIGNED)*2 + nfsm_rndup(fnamelen) + 1684 nfsm_rndup(tnamelen)); 1685 mb = mreq; 1686 bpos = mtod(mb, caddr_t); 1687 nfsm_fhtom(fdvp, v3); 1688 nfsm_strtom(fnameptr, fnamelen, NFS_MAXNAMLEN); 1689 nfsm_fhtom(tdvp, v3); 1690 nfsm_strtom(tnameptr, tnamelen, NFS_MAXNAMLEN); 1691 nfsm_request(fdvp, NFSPROC_RENAME, td, cred); 1692 if (v3) { 1693 nfsm_wcc_data(fdvp, fwccflag); 1694 nfsm_wcc_data(tdvp, twccflag); 1695 } 1696 m_freem(mrep); 1697nfsmout: 1698 mtx_lock(&(VTONFS(fdvp))->n_mtx); 1699 VTONFS(fdvp)->n_flag |= NMODIFIED; 1700 mtx_unlock(&(VTONFS(fdvp))->n_mtx); 1701 mtx_lock(&(VTONFS(tdvp))->n_mtx); 1702 VTONFS(tdvp)->n_flag |= NMODIFIED; 1703 mtx_unlock(&(VTONFS(tdvp))->n_mtx); 1704 if (!fwccflag) 1705 VTONFS(fdvp)->n_attrstamp = 0; 1706 if (!twccflag) 1707 VTONFS(tdvp)->n_attrstamp = 0; 1708 return (error); 1709} 1710 1711/* 1712 * nfs hard link create call 1713 */ 1714static int 1715nfs_link(struct vop_link_args *ap) 1716{ 1717 struct vnode *vp = ap->a_vp; 1718 struct vnode *tdvp = ap->a_tdvp; 1719 struct componentname *cnp = ap->a_cnp; 1720 caddr_t bpos, dpos; 1721 int error = 0, wccflag = NFSV3_WCCRATTR, attrflag = 0; 1722 struct mbuf *mreq, *mrep, *md, *mb; 1723 int v3; 1724 1725 if (vp->v_mount != tdvp->v_mount) { 1726 return (EXDEV); 1727 } 1728 1729 /* 1730 * Push all writes to the server, so that the attribute cache 1731 * doesn't get "out of sync" with the server. 1732 * XXX There should be a better way! 1733 */ 1734 VOP_FSYNC(vp, MNT_WAIT, cnp->cn_thread); 1735 1736 v3 = NFS_ISV3(vp); 1737 nfsstats.rpccnt[NFSPROC_LINK]++; 1738 mreq = nfsm_reqhead(vp, NFSPROC_LINK, 1739 NFSX_FH(v3)*2 + NFSX_UNSIGNED + nfsm_rndup(cnp->cn_namelen)); 1740 mb = mreq; 1741 bpos = mtod(mb, caddr_t); 1742 nfsm_fhtom(vp, v3); 1743 nfsm_fhtom(tdvp, v3); 1744 nfsm_strtom(cnp->cn_nameptr, cnp->cn_namelen, NFS_MAXNAMLEN); 1745 nfsm_request(vp, NFSPROC_LINK, cnp->cn_thread, cnp->cn_cred); 1746 if (v3) { 1747 nfsm_postop_attr(vp, attrflag); 1748 nfsm_wcc_data(tdvp, wccflag); 1749 } 1750 m_freem(mrep); 1751nfsmout: 1752 mtx_lock(&(VTONFS(tdvp))->n_mtx); 1753 VTONFS(tdvp)->n_flag |= NMODIFIED; 1754 mtx_unlock(&(VTONFS(tdvp))->n_mtx); 1755 if (!attrflag) 1756 VTONFS(vp)->n_attrstamp = 0; 1757 if (!wccflag) 1758 VTONFS(tdvp)->n_attrstamp = 0; 1759 /* 1760 * Kludge: Map EEXIST => 0 assuming that it is a reply to a retry. 1761 */ 1762 if (error == EEXIST) 1763 error = 0; 1764 return (error); 1765} 1766 1767/* 1768 * nfs symbolic link create call 1769 */ 1770static int 1771nfs_symlink(struct vop_symlink_args *ap) 1772{ 1773 struct vnode *dvp = ap->a_dvp; 1774 struct vattr *vap = ap->a_vap; 1775 struct componentname *cnp = ap->a_cnp; 1776 struct nfsv2_sattr *sp; 1777 caddr_t bpos, dpos; 1778 int slen, error = 0, wccflag = NFSV3_WCCRATTR, gotvp; 1779 struct mbuf *mreq, *mrep, *md, *mb; 1780 struct vnode *newvp = NULL; 1781 int v3 = NFS_ISV3(dvp); 1782 1783 nfsstats.rpccnt[NFSPROC_SYMLINK]++; 1784 slen = strlen(ap->a_target); 1785 mreq = nfsm_reqhead(dvp, NFSPROC_SYMLINK, NFSX_FH(v3) + 2*NFSX_UNSIGNED + 1786 nfsm_rndup(cnp->cn_namelen) + nfsm_rndup(slen) + NFSX_SATTR(v3)); 1787 mb = mreq; 1788 bpos = mtod(mb, caddr_t); 1789 nfsm_fhtom(dvp, v3); 1790 nfsm_strtom(cnp->cn_nameptr, cnp->cn_namelen, NFS_MAXNAMLEN); 1791 if (v3) { 1792 nfsm_v3attrbuild(vap, FALSE); 1793 } 1794 nfsm_strtom(ap->a_target, slen, NFS_MAXPATHLEN); 1795 if (!v3) { 1796 sp = nfsm_build(struct nfsv2_sattr *, NFSX_V2SATTR); 1797 sp->sa_mode = vtonfsv2_mode(VLNK, vap->va_mode); 1798 sp->sa_uid = nfs_xdrneg1; 1799 sp->sa_gid = nfs_xdrneg1; 1800 sp->sa_size = nfs_xdrneg1; 1801 txdr_nfsv2time(&vap->va_atime, &sp->sa_atime); 1802 txdr_nfsv2time(&vap->va_mtime, &sp->sa_mtime); 1803 } 1804 1805 /* 1806 * Issue the NFS request and get the rpc response. 1807 * 1808 * Only NFSv3 responses returning an error of 0 actually return 1809 * a file handle that can be converted into newvp without having 1810 * to do an extra lookup rpc. 1811 */ 1812 nfsm_request(dvp, NFSPROC_SYMLINK, cnp->cn_thread, cnp->cn_cred); 1813 if (v3) { 1814 if (error == 0) 1815 nfsm_mtofh(dvp, newvp, v3, gotvp); 1816 nfsm_wcc_data(dvp, wccflag); 1817 } 1818 1819 /* 1820 * out code jumps -> here, mrep is also freed. 1821 */ 1822 1823 m_freem(mrep); 1824nfsmout: 1825 1826 /* 1827 * If we get an EEXIST error, silently convert it to no-error 1828 * in case of an NFS retry. 1829 */ 1830 if (error == EEXIST) 1831 error = 0; 1832 1833 /* 1834 * If we do not have (or no longer have) an error, and we could 1835 * not extract the newvp from the response due to the request being 1836 * NFSv2 or the error being EEXIST. We have to do a lookup in order 1837 * to obtain a newvp to return. 1838 */ 1839 if (error == 0 && newvp == NULL) { 1840 struct nfsnode *np = NULL; 1841 1842 error = nfs_lookitup(dvp, cnp->cn_nameptr, cnp->cn_namelen, 1843 cnp->cn_cred, cnp->cn_thread, &np); 1844 if (!error) 1845 newvp = NFSTOV(np); 1846 } 1847 if (error) { 1848 if (newvp) 1849 vput(newvp); 1850 } else { 1851 *ap->a_vpp = newvp; 1852 } 1853 mtx_lock(&(VTONFS(dvp))->n_mtx); 1854 VTONFS(dvp)->n_flag |= NMODIFIED; 1855 mtx_unlock(&(VTONFS(dvp))->n_mtx); 1856 if (!wccflag) 1857 VTONFS(dvp)->n_attrstamp = 0; 1858 return (error); 1859} 1860 1861/* 1862 * nfs make dir call 1863 */ 1864static int 1865nfs_mkdir(struct vop_mkdir_args *ap) 1866{ 1867 struct vnode *dvp = ap->a_dvp; 1868 struct vattr *vap = ap->a_vap; 1869 struct componentname *cnp = ap->a_cnp; 1870 struct nfsv2_sattr *sp; 1871 int len; 1872 struct nfsnode *np = NULL; 1873 struct vnode *newvp = NULL; 1874 caddr_t bpos, dpos; 1875 int error = 0, wccflag = NFSV3_WCCRATTR; 1876 int gotvp = 0; 1877 struct mbuf *mreq, *mrep, *md, *mb; 1878 struct vattr vattr; 1879 int v3 = NFS_ISV3(dvp); 1880 1881 if ((error = VOP_GETATTR(dvp, &vattr, cnp->cn_cred, cnp->cn_thread)) != 0) { 1882 return (error); 1883 } 1884 len = cnp->cn_namelen; 1885 nfsstats.rpccnt[NFSPROC_MKDIR]++; 1886 mreq = nfsm_reqhead(dvp, NFSPROC_MKDIR, 1887 NFSX_FH(v3) + NFSX_UNSIGNED + nfsm_rndup(len) + NFSX_SATTR(v3)); 1888 mb = mreq; 1889 bpos = mtod(mb, caddr_t); 1890 nfsm_fhtom(dvp, v3); 1891 nfsm_strtom(cnp->cn_nameptr, len, NFS_MAXNAMLEN); 1892 if (v3) { 1893 nfsm_v3attrbuild(vap, FALSE); 1894 } else { 1895 sp = nfsm_build(struct nfsv2_sattr *, NFSX_V2SATTR); 1896 sp->sa_mode = vtonfsv2_mode(VDIR, vap->va_mode); 1897 sp->sa_uid = nfs_xdrneg1; 1898 sp->sa_gid = nfs_xdrneg1; 1899 sp->sa_size = nfs_xdrneg1; 1900 txdr_nfsv2time(&vap->va_atime, &sp->sa_atime); 1901 txdr_nfsv2time(&vap->va_mtime, &sp->sa_mtime); 1902 } 1903 nfsm_request(dvp, NFSPROC_MKDIR, cnp->cn_thread, cnp->cn_cred); 1904 if (!error) 1905 nfsm_mtofh(dvp, newvp, v3, gotvp); 1906 if (v3) 1907 nfsm_wcc_data(dvp, wccflag); 1908 m_freem(mrep); 1909nfsmout: 1910 mtx_lock(&(VTONFS(dvp))->n_mtx); 1911 VTONFS(dvp)->n_flag |= NMODIFIED; 1912 mtx_unlock(&(VTONFS(dvp))->n_mtx); 1913 if (!wccflag) 1914 VTONFS(dvp)->n_attrstamp = 0; 1915 /* 1916 * Kludge: Map EEXIST => 0 assuming that you have a reply to a retry 1917 * if we can succeed in looking up the directory. 1918 */ 1919 if (error == EEXIST || (!error && !gotvp)) { 1920 if (newvp) { 1921 vput(newvp); 1922 newvp = NULL; 1923 } 1924 error = nfs_lookitup(dvp, cnp->cn_nameptr, len, cnp->cn_cred, 1925 cnp->cn_thread, &np); 1926 if (!error) { 1927 newvp = NFSTOV(np); 1928 if (newvp->v_type != VDIR) 1929 error = EEXIST; 1930 } 1931 } 1932 if (error) { 1933 if (newvp) 1934 vput(newvp); 1935 } else 1936 *ap->a_vpp = newvp; 1937 return (error); 1938} 1939 1940/* 1941 * nfs remove directory call 1942 */ 1943static int 1944nfs_rmdir(struct vop_rmdir_args *ap) 1945{ 1946 struct vnode *vp = ap->a_vp; 1947 struct vnode *dvp = ap->a_dvp; 1948 struct componentname *cnp = ap->a_cnp; 1949 caddr_t bpos, dpos; 1950 int error = 0, wccflag = NFSV3_WCCRATTR; 1951 struct mbuf *mreq, *mrep, *md, *mb; 1952 int v3 = NFS_ISV3(dvp); 1953 1954 if (dvp == vp) 1955 return (EINVAL); 1956 nfsstats.rpccnt[NFSPROC_RMDIR]++; 1957 mreq = nfsm_reqhead(dvp, NFSPROC_RMDIR, 1958 NFSX_FH(v3) + NFSX_UNSIGNED + nfsm_rndup(cnp->cn_namelen)); 1959 mb = mreq; 1960 bpos = mtod(mb, caddr_t); 1961 nfsm_fhtom(dvp, v3); 1962 nfsm_strtom(cnp->cn_nameptr, cnp->cn_namelen, NFS_MAXNAMLEN); 1963 nfsm_request(dvp, NFSPROC_RMDIR, cnp->cn_thread, cnp->cn_cred); 1964 if (v3) 1965 nfsm_wcc_data(dvp, wccflag); 1966 m_freem(mrep); 1967nfsmout: 1968 mtx_lock(&(VTONFS(dvp))->n_mtx); 1969 VTONFS(dvp)->n_flag |= NMODIFIED; 1970 mtx_unlock(&(VTONFS(dvp))->n_mtx); 1971 if (!wccflag) 1972 VTONFS(dvp)->n_attrstamp = 0; 1973 cache_purge(dvp); 1974 cache_purge(vp); 1975 /* 1976 * Kludge: Map ENOENT => 0 assuming that you have a reply to a retry. 1977 */ 1978 if (error == ENOENT) 1979 error = 0; 1980 return (error); 1981} 1982 1983/* 1984 * nfs readdir call 1985 */ 1986static int 1987nfs_readdir(struct vop_readdir_args *ap) 1988{ 1989 struct vnode *vp = ap->a_vp; 1990 struct nfsnode *np = VTONFS(vp); 1991 struct uio *uio = ap->a_uio; 1992 int tresid, error = 0; 1993 struct vattr vattr; 1994 1995 if (vp->v_type != VDIR) 1996 return(EPERM); 1997 1998 /* 1999 * First, check for hit on the EOF offset cache 2000 */ 2001 if (np->n_direofoffset > 0 && uio->uio_offset >= np->n_direofoffset && 2002 (np->n_flag & NMODIFIED) == 0) { 2003 if (VOP_GETATTR(vp, &vattr, ap->a_cred, uio->uio_td) == 0) { 2004 mtx_lock(&np->n_mtx); 2005 if (!NFS_TIMESPEC_COMPARE(&np->n_mtime, &vattr.va_mtime)) { 2006 mtx_unlock(&np->n_mtx); 2007 nfsstats.direofcache_hits++; 2008 goto out; 2009 } else 2010 mtx_unlock(&np->n_mtx); 2011 } 2012 } 2013 2014 /* 2015 * Call nfs_bioread() to do the real work. 2016 */ 2017 tresid = uio->uio_resid; 2018 error = nfs_bioread(vp, uio, 0, ap->a_cred); 2019 2020 if (!error && uio->uio_resid == tresid) { 2021 nfsstats.direofcache_misses++; 2022 } 2023out: 2024 return (error); 2025} 2026 2027/* 2028 * Readdir rpc call. 2029 * Called from below the buffer cache by nfs_doio(). 2030 */ 2031int 2032nfs_readdirrpc(struct vnode *vp, struct uio *uiop, struct ucred *cred) 2033{ 2034 int len, left; 2035 struct dirent *dp = NULL; 2036 u_int32_t *tl; 2037 caddr_t cp; 2038 nfsuint64 *cookiep; 2039 caddr_t bpos, dpos; 2040 struct mbuf *mreq, *mrep, *md, *mb; 2041 nfsuint64 cookie; 2042 struct nfsmount *nmp = VFSTONFS(vp->v_mount); 2043 struct nfsnode *dnp = VTONFS(vp); 2044 u_quad_t fileno; 2045 int error = 0, tlen, more_dirs = 1, blksiz = 0, bigenough = 1; 2046 int attrflag; 2047 int v3 = NFS_ISV3(vp); 2048 2049#ifndef DIAGNOSTIC 2050 if (uiop->uio_iovcnt != 1 || (uiop->uio_offset & (DIRBLKSIZ - 1)) || 2051 (uiop->uio_resid & (DIRBLKSIZ - 1))) 2052 panic("nfs readdirrpc bad uio"); 2053#endif 2054 2055 /* 2056 * If there is no cookie, assume directory was stale. 2057 */ 2058 nfs_dircookie_lock(dnp); 2059 cookiep = nfs_getcookie(dnp, uiop->uio_offset, 0); 2060 if (cookiep) { 2061 cookie = *cookiep; 2062 nfs_dircookie_unlock(dnp); 2063 } else { 2064 nfs_dircookie_unlock(dnp); 2065 return (NFSERR_BAD_COOKIE); 2066 } 2067 2068 /* 2069 * Loop around doing readdir rpc's of size nm_readdirsize 2070 * truncated to a multiple of DIRBLKSIZ. 2071 * The stopping criteria is EOF or buffer full. 2072 */ 2073 while (more_dirs && bigenough) { 2074 nfsstats.rpccnt[NFSPROC_READDIR]++; 2075 mreq = nfsm_reqhead(vp, NFSPROC_READDIR, NFSX_FH(v3) + 2076 NFSX_READDIR(v3)); 2077 mb = mreq; 2078 bpos = mtod(mb, caddr_t); 2079 nfsm_fhtom(vp, v3); 2080 if (v3) { 2081 tl = nfsm_build(u_int32_t *, 5 * NFSX_UNSIGNED); 2082 *tl++ = cookie.nfsuquad[0]; 2083 *tl++ = cookie.nfsuquad[1]; 2084 mtx_lock(&dnp->n_mtx); 2085 *tl++ = dnp->n_cookieverf.nfsuquad[0]; 2086 *tl++ = dnp->n_cookieverf.nfsuquad[1]; 2087 mtx_unlock(&dnp->n_mtx); 2088 } else { 2089 tl = nfsm_build(u_int32_t *, 2 * NFSX_UNSIGNED); 2090 *tl++ = cookie.nfsuquad[0]; 2091 } 2092 *tl = txdr_unsigned(nmp->nm_readdirsize); 2093 nfsm_request(vp, NFSPROC_READDIR, uiop->uio_td, cred); 2094 if (v3) { 2095 nfsm_postop_attr(vp, attrflag); 2096 if (!error) { 2097 tl = nfsm_dissect(u_int32_t *, 2098 2 * NFSX_UNSIGNED); 2099 mtx_lock(&dnp->n_mtx); 2100 dnp->n_cookieverf.nfsuquad[0] = *tl++; 2101 dnp->n_cookieverf.nfsuquad[1] = *tl; 2102 mtx_unlock(&dnp->n_mtx); 2103 } else { 2104 m_freem(mrep); 2105 goto nfsmout; 2106 } 2107 } 2108 tl = nfsm_dissect(u_int32_t *, NFSX_UNSIGNED); 2109 more_dirs = fxdr_unsigned(int, *tl); 2110 2111 /* loop thru the dir entries, doctoring them to 4bsd form */ 2112 while (more_dirs && bigenough) { 2113 if (v3) { 2114 tl = nfsm_dissect(u_int32_t *, 2115 3 * NFSX_UNSIGNED); 2116 fileno = fxdr_hyper(tl); 2117 len = fxdr_unsigned(int, *(tl + 2)); 2118 } else { 2119 tl = nfsm_dissect(u_int32_t *, 2120 2 * NFSX_UNSIGNED); 2121 fileno = fxdr_unsigned(u_quad_t, *tl++); 2122 len = fxdr_unsigned(int, *tl); 2123 } 2124 if (len <= 0 || len > NFS_MAXNAMLEN) { 2125 error = EBADRPC; 2126 m_freem(mrep); 2127 goto nfsmout; 2128 } 2129 tlen = nfsm_rndup(len); 2130 if (tlen == len) 2131 tlen += 4; /* To ensure null termination */ 2132 left = DIRBLKSIZ - blksiz; 2133 if ((tlen + DIRHDSIZ) > left) { 2134 dp->d_reclen += left; 2135 uiop->uio_iov->iov_base = 2136 (char *)uiop->uio_iov->iov_base + left; 2137 uiop->uio_iov->iov_len -= left; 2138 uiop->uio_offset += left; 2139 uiop->uio_resid -= left; 2140 blksiz = 0; 2141 } 2142 if ((tlen + DIRHDSIZ) > uiop->uio_resid) 2143 bigenough = 0; 2144 if (bigenough) { 2145 dp = (struct dirent *)uiop->uio_iov->iov_base; 2146 dp->d_fileno = (int)fileno; 2147 dp->d_namlen = len; 2148 dp->d_reclen = tlen + DIRHDSIZ; 2149 dp->d_type = DT_UNKNOWN; 2150 blksiz += dp->d_reclen; 2151 if (blksiz == DIRBLKSIZ) 2152 blksiz = 0; 2153 uiop->uio_offset += DIRHDSIZ; 2154 uiop->uio_resid -= DIRHDSIZ; 2155 uiop->uio_iov->iov_base = 2156 (char *)uiop->uio_iov->iov_base + DIRHDSIZ; 2157 uiop->uio_iov->iov_len -= DIRHDSIZ; 2158 nfsm_mtouio(uiop, len); 2159 cp = uiop->uio_iov->iov_base; 2160 tlen -= len; 2161 *cp = '\0'; /* null terminate */ 2162 uiop->uio_iov->iov_base = 2163 (char *)uiop->uio_iov->iov_base + tlen; 2164 uiop->uio_iov->iov_len -= tlen; 2165 uiop->uio_offset += tlen; 2166 uiop->uio_resid -= tlen; 2167 } else 2168 nfsm_adv(nfsm_rndup(len)); 2169 if (v3) { 2170 tl = nfsm_dissect(u_int32_t *, 2171 3 * NFSX_UNSIGNED); 2172 } else { 2173 tl = nfsm_dissect(u_int32_t *, 2174 2 * NFSX_UNSIGNED); 2175 } 2176 if (bigenough) { 2177 cookie.nfsuquad[0] = *tl++; 2178 if (v3) 2179 cookie.nfsuquad[1] = *tl++; 2180 } else if (v3) 2181 tl += 2; 2182 else 2183 tl++; 2184 more_dirs = fxdr_unsigned(int, *tl); 2185 } 2186 /* 2187 * If at end of rpc data, get the eof boolean 2188 */ 2189 if (!more_dirs) { 2190 tl = nfsm_dissect(u_int32_t *, NFSX_UNSIGNED); 2191 more_dirs = (fxdr_unsigned(int, *tl) == 0); 2192 } 2193 m_freem(mrep); 2194 } 2195 /* 2196 * Fill last record, iff any, out to a multiple of DIRBLKSIZ 2197 * by increasing d_reclen for the last record. 2198 */ 2199 if (blksiz > 0) { 2200 left = DIRBLKSIZ - blksiz; 2201 dp->d_reclen += left; 2202 uiop->uio_iov->iov_base = 2203 (char *)uiop->uio_iov->iov_base + left; 2204 uiop->uio_iov->iov_len -= left; 2205 uiop->uio_offset += left; 2206 uiop->uio_resid -= left; 2207 } 2208 2209 /* 2210 * We are now either at the end of the directory or have filled the 2211 * block. 2212 */ 2213 if (bigenough) 2214 dnp->n_direofoffset = uiop->uio_offset; 2215 else { 2216 if (uiop->uio_resid > 0) 2217 nfs_printf("EEK! readdirrpc resid > 0\n"); 2218 nfs_dircookie_lock(dnp); 2219 cookiep = nfs_getcookie(dnp, uiop->uio_offset, 1); 2220 *cookiep = cookie; 2221 nfs_dircookie_unlock(dnp); 2222 } 2223nfsmout: 2224 return (error); 2225} 2226 2227/* 2228 * NFS V3 readdir plus RPC. Used in place of nfs_readdirrpc(). 2229 */ 2230int 2231nfs_readdirplusrpc(struct vnode *vp, struct uio *uiop, struct ucred *cred) 2232{ 2233 int len, left; 2234 struct dirent *dp; 2235 u_int32_t *tl; 2236 caddr_t cp; 2237 struct vnode *newvp; 2238 nfsuint64 *cookiep; 2239 caddr_t bpos, dpos, dpossav1, dpossav2; 2240 struct mbuf *mreq, *mrep, *md, *mb, *mdsav1, *mdsav2; 2241 struct nameidata nami, *ndp = &nami; 2242 struct componentname *cnp = &ndp->ni_cnd; 2243 nfsuint64 cookie; 2244 struct nfsmount *nmp = VFSTONFS(vp->v_mount); 2245 struct nfsnode *dnp = VTONFS(vp), *np; 2246 nfsfh_t *fhp; 2247 u_quad_t fileno; 2248 int error = 0, tlen, more_dirs = 1, blksiz = 0, doit, bigenough = 1, i; 2249 int attrflag, fhsize; 2250 2251#ifndef nolint 2252 dp = NULL; 2253#endif 2254#ifndef DIAGNOSTIC 2255 if (uiop->uio_iovcnt != 1 || (uiop->uio_offset & (DIRBLKSIZ - 1)) || 2256 (uiop->uio_resid & (DIRBLKSIZ - 1))) 2257 panic("nfs readdirplusrpc bad uio"); 2258#endif 2259 ndp->ni_dvp = vp; 2260 newvp = NULLVP; 2261 2262 /* 2263 * If there is no cookie, assume directory was stale. 2264 */ 2265 nfs_dircookie_lock(dnp); 2266 cookiep = nfs_getcookie(dnp, uiop->uio_offset, 0); 2267 if (cookiep) { 2268 cookie = *cookiep; 2269 nfs_dircookie_unlock(dnp); 2270 } else { 2271 nfs_dircookie_unlock(dnp); 2272 return (NFSERR_BAD_COOKIE); 2273 } 2274 /* 2275 * Loop around doing readdir rpc's of size nm_readdirsize 2276 * truncated to a multiple of DIRBLKSIZ. 2277 * The stopping criteria is EOF or buffer full. 2278 */ 2279 while (more_dirs && bigenough) { 2280 nfsstats.rpccnt[NFSPROC_READDIRPLUS]++; 2281 mreq = nfsm_reqhead(vp, NFSPROC_READDIRPLUS, 2282 NFSX_FH(1) + 6 * NFSX_UNSIGNED); 2283 mb = mreq; 2284 bpos = mtod(mb, caddr_t); 2285 nfsm_fhtom(vp, 1); 2286 tl = nfsm_build(u_int32_t *, 6 * NFSX_UNSIGNED); 2287 *tl++ = cookie.nfsuquad[0]; 2288 *tl++ = cookie.nfsuquad[1]; 2289 mtx_lock(&dnp->n_mtx); 2290 *tl++ = dnp->n_cookieverf.nfsuquad[0]; 2291 *tl++ = dnp->n_cookieverf.nfsuquad[1]; 2292 mtx_unlock(&dnp->n_mtx); 2293 *tl++ = txdr_unsigned(nmp->nm_readdirsize); 2294 *tl = txdr_unsigned(nmp->nm_rsize); 2295 nfsm_request(vp, NFSPROC_READDIRPLUS, uiop->uio_td, cred); 2296 nfsm_postop_attr(vp, attrflag); 2297 if (error) { 2298 m_freem(mrep); 2299 goto nfsmout; 2300 } 2301 tl = nfsm_dissect(u_int32_t *, 3 * NFSX_UNSIGNED); 2302 mtx_lock(&dnp->n_mtx); 2303 dnp->n_cookieverf.nfsuquad[0] = *tl++; 2304 dnp->n_cookieverf.nfsuquad[1] = *tl++; 2305 mtx_unlock(&dnp->n_mtx); 2306 more_dirs = fxdr_unsigned(int, *tl); 2307 2308 /* loop thru the dir entries, doctoring them to 4bsd form */ 2309 while (more_dirs && bigenough) { 2310 tl = nfsm_dissect(u_int32_t *, 3 * NFSX_UNSIGNED); 2311 fileno = fxdr_hyper(tl); 2312 len = fxdr_unsigned(int, *(tl + 2)); 2313 if (len <= 0 || len > NFS_MAXNAMLEN) { 2314 error = EBADRPC; 2315 m_freem(mrep); 2316 goto nfsmout; 2317 } 2318 tlen = nfsm_rndup(len); 2319 if (tlen == len) 2320 tlen += 4; /* To ensure null termination*/ 2321 left = DIRBLKSIZ - blksiz; 2322 if ((tlen + DIRHDSIZ) > left) { 2323 dp->d_reclen += left; 2324 uiop->uio_iov->iov_base = 2325 (char *)uiop->uio_iov->iov_base + left; 2326 uiop->uio_iov->iov_len -= left; 2327 uiop->uio_offset += left; 2328 uiop->uio_resid -= left; 2329 blksiz = 0; 2330 } 2331 if ((tlen + DIRHDSIZ) > uiop->uio_resid) 2332 bigenough = 0; 2333 if (bigenough) { 2334 dp = (struct dirent *)uiop->uio_iov->iov_base; 2335 dp->d_fileno = (int)fileno; 2336 dp->d_namlen = len; 2337 dp->d_reclen = tlen + DIRHDSIZ; 2338 dp->d_type = DT_UNKNOWN; 2339 blksiz += dp->d_reclen; 2340 if (blksiz == DIRBLKSIZ) 2341 blksiz = 0; 2342 uiop->uio_offset += DIRHDSIZ; 2343 uiop->uio_resid -= DIRHDSIZ; 2344 uiop->uio_iov->iov_base = 2345 (char *)uiop->uio_iov->iov_base + DIRHDSIZ; 2346 uiop->uio_iov->iov_len -= DIRHDSIZ; 2347 cnp->cn_nameptr = uiop->uio_iov->iov_base; 2348 cnp->cn_namelen = len; 2349 nfsm_mtouio(uiop, len); 2350 cp = uiop->uio_iov->iov_base; 2351 tlen -= len; 2352 *cp = '\0'; 2353 uiop->uio_iov->iov_base = 2354 (char *)uiop->uio_iov->iov_base + tlen; 2355 uiop->uio_iov->iov_len -= tlen; 2356 uiop->uio_offset += tlen; 2357 uiop->uio_resid -= tlen; 2358 } else 2359 nfsm_adv(nfsm_rndup(len)); 2360 tl = nfsm_dissect(u_int32_t *, 3 * NFSX_UNSIGNED); 2361 if (bigenough) { 2362 cookie.nfsuquad[0] = *tl++; 2363 cookie.nfsuquad[1] = *tl++; 2364 } else 2365 tl += 2; 2366 2367 /* 2368 * Since the attributes are before the file handle 2369 * (sigh), we must skip over the attributes and then 2370 * come back and get them. 2371 */ 2372 attrflag = fxdr_unsigned(int, *tl); 2373 if (attrflag) { 2374 dpossav1 = dpos; 2375 mdsav1 = md; 2376 nfsm_adv(NFSX_V3FATTR); 2377 tl = nfsm_dissect(u_int32_t *, NFSX_UNSIGNED); 2378 doit = fxdr_unsigned(int, *tl); 2379 /* 2380 * Skip loading the attrs for "..". There's a 2381 * race between loading the attrs here and 2382 * lookups that look for the directory currently 2383 * being read (in the parent). We try to acquire 2384 * the exclusive lock on ".." here, owning the 2385 * lock on the directory being read. Lookup will 2386 * hold the lock on ".." and try to acquire the 2387 * lock on the directory being read. 2388 * 2389 * There are other ways of fixing this, one would 2390 * be to do a trylock on the ".." vnode and skip 2391 * loading the attrs on ".." if it happens to be 2392 * locked by another process. But skipping the 2393 * attrload on ".." seems the easiest option. 2394 */ 2395 if (strcmp(dp->d_name, "..") == 0) { 2396 doit = 0; 2397 /* 2398 * We've already skipped over the attrs, 2399 * skip over the filehandle. And store d_type 2400 * as VDIR. 2401 */ 2402 tl = nfsm_dissect(u_int32_t *, NFSX_UNSIGNED); 2403 i = fxdr_unsigned(int, *tl); 2404 nfsm_adv(nfsm_rndup(i)); 2405 dp->d_type = IFTODT(VTTOIF(VDIR)); 2406 } 2407 if (doit) { 2408 nfsm_getfh(fhp, fhsize, 1); 2409 if (NFS_CMPFH(dnp, fhp, fhsize)) { 2410 VREF(vp); 2411 newvp = vp; 2412 np = dnp; 2413 } else { 2414 error = nfs_nget(vp->v_mount, fhp, 2415 fhsize, &np, LK_EXCLUSIVE); 2416 if (error) 2417 doit = 0; 2418 else 2419 newvp = NFSTOV(np); 2420 } 2421 } 2422 if (doit && bigenough) { 2423 dpossav2 = dpos; 2424 dpos = dpossav1; 2425 mdsav2 = md; 2426 md = mdsav1; 2427 nfsm_loadattr(newvp, NULL); 2428 dpos = dpossav2; 2429 md = mdsav2; 2430 dp->d_type = 2431 IFTODT(VTTOIF(np->n_vattr.va_type)); 2432 ndp->ni_vp = newvp; 2433 /* Update n_ctime, so subsequent lookup doesn't purge entry */ 2434 np->n_ctime = np->n_vattr.va_ctime.tv_sec; 2435 cache_enter(ndp->ni_dvp, ndp->ni_vp, cnp); 2436 } 2437 } else { 2438 /* Just skip over the file handle */ 2439 tl = nfsm_dissect(u_int32_t *, NFSX_UNSIGNED); 2440 i = fxdr_unsigned(int, *tl); 2441 if (i) { 2442 tl = nfsm_dissect(u_int32_t *, NFSX_UNSIGNED); 2443 fhsize = fxdr_unsigned(int, *tl); 2444 nfsm_adv(nfsm_rndup(fhsize)); 2445 } 2446 } 2447 if (newvp != NULLVP) { 2448 if (newvp == vp) 2449 vrele(newvp); 2450 else 2451 vput(newvp); 2452 newvp = NULLVP; 2453 } 2454 tl = nfsm_dissect(u_int32_t *, NFSX_UNSIGNED); 2455 more_dirs = fxdr_unsigned(int, *tl); 2456 } 2457 /* 2458 * If at end of rpc data, get the eof boolean 2459 */ 2460 if (!more_dirs) { 2461 tl = nfsm_dissect(u_int32_t *, NFSX_UNSIGNED); 2462 more_dirs = (fxdr_unsigned(int, *tl) == 0); 2463 } 2464 m_freem(mrep); 2465 } 2466 /* 2467 * Fill last record, iff any, out to a multiple of DIRBLKSIZ 2468 * by increasing d_reclen for the last record. 2469 */ 2470 if (blksiz > 0) { 2471 left = DIRBLKSIZ - blksiz; 2472 dp->d_reclen += left; 2473 uiop->uio_iov->iov_base = 2474 (char *)uiop->uio_iov->iov_base + left; 2475 uiop->uio_iov->iov_len -= left; 2476 uiop->uio_offset += left; 2477 uiop->uio_resid -= left; 2478 } 2479 2480 /* 2481 * We are now either at the end of the directory or have filled the 2482 * block. 2483 */ 2484 if (bigenough) 2485 dnp->n_direofoffset = uiop->uio_offset; 2486 else { 2487 if (uiop->uio_resid > 0) 2488 nfs_printf("EEK! readdirplusrpc resid > 0\n"); 2489 nfs_dircookie_lock(dnp); 2490 cookiep = nfs_getcookie(dnp, uiop->uio_offset, 1); 2491 *cookiep = cookie; 2492 nfs_dircookie_unlock(dnp); 2493 } 2494nfsmout: 2495 if (newvp != NULLVP) { 2496 if (newvp == vp) 2497 vrele(newvp); 2498 else 2499 vput(newvp); 2500 newvp = NULLVP; 2501 } 2502 return (error); 2503} 2504 2505/* 2506 * Silly rename. To make the NFS filesystem that is stateless look a little 2507 * more like the "ufs" a remove of an active vnode is translated to a rename 2508 * to a funny looking filename that is removed by nfs_inactive on the 2509 * nfsnode. There is the potential for another process on a different client 2510 * to create the same funny name between the nfs_lookitup() fails and the 2511 * nfs_rename() completes, but... 2512 */ 2513static int 2514nfs_sillyrename(struct vnode *dvp, struct vnode *vp, struct componentname *cnp) 2515{ 2516 struct sillyrename *sp; 2517 struct nfsnode *np; 2518 int error; 2519 short pid; 2520 unsigned int lticks; 2521 2522 cache_purge(dvp); 2523 np = VTONFS(vp); 2524#ifndef DIAGNOSTIC 2525 if (vp->v_type == VDIR) 2526 panic("nfs: sillyrename dir"); 2527#endif 2528 MALLOC(sp, struct sillyrename *, sizeof (struct sillyrename), 2529 M_NFSREQ, M_WAITOK); 2530 sp->s_cred = crhold(cnp->cn_cred); 2531 sp->s_dvp = dvp; 2532 sp->s_removeit = nfs_removeit; 2533 VREF(dvp); 2534 2535 /* 2536 * Fudge together a funny name. 2537 * Changing the format of the funny name to accomodate more 2538 * sillynames per directory. 2539 * The name is now changed to .nfs.<ticks>.<pid>.4, where ticks is 2540 * CPU ticks since boot. 2541 */ 2542 pid = cnp->cn_thread->td_proc->p_pid; 2543 lticks = (unsigned int)ticks; 2544 for ( ; ; ) { 2545 sp->s_namlen = sprintf(sp->s_name, 2546 ".nfs.%08x.%04x4.4", lticks, 2547 pid); 2548 if (nfs_lookitup(dvp, sp->s_name, sp->s_namlen, sp->s_cred, 2549 cnp->cn_thread, NULL)) 2550 break; 2551 lticks++; 2552 } 2553 error = nfs_renameit(dvp, cnp, sp); 2554 if (error) 2555 goto bad; 2556 error = nfs_lookitup(dvp, sp->s_name, sp->s_namlen, sp->s_cred, 2557 cnp->cn_thread, &np); 2558 np->n_sillyrename = sp; 2559 return (0); 2560bad: 2561 vrele(sp->s_dvp); 2562 crfree(sp->s_cred); 2563 free((caddr_t)sp, M_NFSREQ); 2564 return (error); 2565} 2566 2567/* 2568 * Look up a file name and optionally either update the file handle or 2569 * allocate an nfsnode, depending on the value of npp. 2570 * npp == NULL --> just do the lookup 2571 * *npp == NULL --> allocate a new nfsnode and make sure attributes are 2572 * handled too 2573 * *npp != NULL --> update the file handle in the vnode 2574 */ 2575static int 2576nfs_lookitup(struct vnode *dvp, const char *name, int len, struct ucred *cred, 2577 struct thread *td, struct nfsnode **npp) 2578{ 2579 struct vnode *newvp = NULL; 2580 struct nfsnode *np, *dnp = VTONFS(dvp); 2581 caddr_t bpos, dpos; 2582 int error = 0, fhlen, attrflag; 2583 struct mbuf *mreq, *mrep, *md, *mb; 2584 nfsfh_t *nfhp; 2585 int v3 = NFS_ISV3(dvp); 2586 2587 nfsstats.rpccnt[NFSPROC_LOOKUP]++; 2588 mreq = nfsm_reqhead(dvp, NFSPROC_LOOKUP, 2589 NFSX_FH(v3) + NFSX_UNSIGNED + nfsm_rndup(len)); 2590 mb = mreq; 2591 bpos = mtod(mb, caddr_t); 2592 nfsm_fhtom(dvp, v3); 2593 nfsm_strtom(name, len, NFS_MAXNAMLEN); 2594 nfsm_request(dvp, NFSPROC_LOOKUP, td, cred); 2595 if (npp && !error) { 2596 nfsm_getfh(nfhp, fhlen, v3); 2597 if (*npp) { 2598 np = *npp; 2599 if (np->n_fhsize > NFS_SMALLFH && fhlen <= NFS_SMALLFH) { 2600 free((caddr_t)np->n_fhp, M_NFSBIGFH); 2601 np->n_fhp = &np->n_fh; 2602 } else if (np->n_fhsize <= NFS_SMALLFH && fhlen>NFS_SMALLFH) 2603 np->n_fhp =(nfsfh_t *)malloc(fhlen, M_NFSBIGFH, M_WAITOK); 2604 bcopy((caddr_t)nfhp, (caddr_t)np->n_fhp, fhlen); 2605 np->n_fhsize = fhlen; 2606 newvp = NFSTOV(np); 2607 } else if (NFS_CMPFH(dnp, nfhp, fhlen)) { 2608 VREF(dvp); 2609 newvp = dvp; 2610 } else { 2611 error = nfs_nget(dvp->v_mount, nfhp, fhlen, &np, LK_EXCLUSIVE); 2612 if (error) { 2613 m_freem(mrep); 2614 return (error); 2615 } 2616 newvp = NFSTOV(np); 2617 } 2618 if (v3) { 2619 nfsm_postop_attr(newvp, attrflag); 2620 if (!attrflag && *npp == NULL) { 2621 m_freem(mrep); 2622 if (newvp == dvp) 2623 vrele(newvp); 2624 else 2625 vput(newvp); 2626 return (ENOENT); 2627 } 2628 } else 2629 nfsm_loadattr(newvp, NULL); 2630 } 2631 m_freem(mrep); 2632nfsmout: 2633 if (npp && *npp == NULL) { 2634 if (error) { 2635 if (newvp) { 2636 if (newvp == dvp) 2637 vrele(newvp); 2638 else 2639 vput(newvp); 2640 } 2641 } else 2642 *npp = np; 2643 } 2644 return (error); 2645} 2646 2647/* 2648 * Nfs Version 3 commit rpc 2649 */ 2650int 2651nfs_commit(struct vnode *vp, u_quad_t offset, int cnt, struct ucred *cred, 2652 struct thread *td) 2653{ 2654 u_int32_t *tl; 2655 struct nfsmount *nmp = VFSTONFS(vp->v_mount); 2656 caddr_t bpos, dpos; 2657 int error = 0, wccflag = NFSV3_WCCRATTR; 2658 struct mbuf *mreq, *mrep, *md, *mb; 2659 2660 mtx_lock(&nmp->nm_mtx); 2661 if ((nmp->nm_state & NFSSTA_HASWRITEVERF) == 0) { 2662 mtx_unlock(&nmp->nm_mtx); 2663 return (0); 2664 } 2665 mtx_unlock(&nmp->nm_mtx); 2666 nfsstats.rpccnt[NFSPROC_COMMIT]++; 2667 mreq = nfsm_reqhead(vp, NFSPROC_COMMIT, NFSX_FH(1)); 2668 mb = mreq; 2669 bpos = mtod(mb, caddr_t); 2670 nfsm_fhtom(vp, 1); 2671 tl = nfsm_build(u_int32_t *, 3 * NFSX_UNSIGNED); 2672 txdr_hyper(offset, tl); 2673 tl += 2; 2674 *tl = txdr_unsigned(cnt); 2675 nfsm_request(vp, NFSPROC_COMMIT, td, cred); 2676 nfsm_wcc_data(vp, wccflag); 2677 if (!error) { 2678 tl = nfsm_dissect(u_int32_t *, NFSX_V3WRITEVERF); 2679 if (bcmp((caddr_t)nmp->nm_verf, (caddr_t)tl, 2680 NFSX_V3WRITEVERF)) { 2681 bcopy((caddr_t)tl, (caddr_t)nmp->nm_verf, 2682 NFSX_V3WRITEVERF); 2683 error = NFSERR_STALEWRITEVERF; 2684 } 2685 } 2686 m_freem(mrep); 2687nfsmout: 2688 return (error); 2689} 2690 2691/* 2692 * Strategy routine. 2693 * For async requests when nfsiod(s) are running, queue the request by 2694 * calling nfs_asyncio(), otherwise just all nfs_doio() to do the 2695 * request. 2696 */ 2697static int 2698nfs_strategy(struct vop_strategy_args *ap) 2699{ 2700 struct buf *bp = ap->a_bp; 2701 struct ucred *cr; 2702 2703 KASSERT(!(bp->b_flags & B_DONE), ("nfs_strategy: buffer %p unexpectedly marked B_DONE", bp)); 2704 KASSERT(BUF_REFCNT(bp) > 0, ("nfs_strategy: buffer %p not locked", bp)); 2705 2706 if (bp->b_iocmd == BIO_READ) 2707 cr = bp->b_rcred; 2708 else 2709 cr = bp->b_wcred; 2710 2711 /* 2712 * If the op is asynchronous and an i/o daemon is waiting 2713 * queue the request, wake it up and wait for completion 2714 * otherwise just do it ourselves. 2715 */ 2716 if ((bp->b_flags & B_ASYNC) == 0 || 2717 nfs_asyncio(VFSTONFS(ap->a_vp->v_mount), bp, NOCRED, curthread)) 2718 (void)nfs_doio(ap->a_vp, bp, cr, curthread); 2719 return (0); 2720} 2721 2722/* 2723 * fsync vnode op. Just call nfs_flush() with commit == 1. 2724 */ 2725/* ARGSUSED */ 2726static int 2727nfs_fsync(struct vop_fsync_args *ap) 2728{ 2729 return (nfs_flush(ap->a_vp, ap->a_waitfor, ap->a_td, 1)); 2730} 2731 2732/* 2733 * Flush all the blocks associated with a vnode. 2734 * Walk through the buffer pool and push any dirty pages 2735 * associated with the vnode. 2736 */ 2737static int 2738nfs_flush(struct vnode *vp, int waitfor, struct thread *td, 2739 int commit) 2740{ 2741 struct nfsnode *np = VTONFS(vp); 2742 struct buf *bp; 2743 int i; 2744 struct buf *nbp; 2745 struct nfsmount *nmp = VFSTONFS(vp->v_mount); 2746 int s, error = 0, slptimeo = 0, slpflag = 0, retv, bvecpos; 2747 int passone = 1; 2748 u_quad_t off, endoff, toff; 2749 struct ucred* wcred = NULL; 2750 struct buf **bvec = NULL; 2751#ifndef NFS_COMMITBVECSIZ 2752#define NFS_COMMITBVECSIZ 20 2753#endif 2754 struct buf *bvec_on_stack[NFS_COMMITBVECSIZ]; 2755 int bvecsize = 0, bveccount; 2756 2757 if (nmp->nm_flag & NFSMNT_INT) 2758 slpflag = PCATCH; 2759 if (!commit) 2760 passone = 0; 2761 /* 2762 * A b_flags == (B_DELWRI | B_NEEDCOMMIT) block has been written to the 2763 * server, but has not been committed to stable storage on the server 2764 * yet. On the first pass, the byte range is worked out and the commit 2765 * rpc is done. On the second pass, nfs_writebp() is called to do the 2766 * job. 2767 */ 2768again: 2769 off = (u_quad_t)-1; 2770 endoff = 0; 2771 bvecpos = 0; 2772 if (NFS_ISV3(vp) && commit) { 2773 s = splbio(); 2774 if (bvec != NULL && bvec != bvec_on_stack) 2775 free(bvec, M_TEMP); 2776 /* 2777 * Count up how many buffers waiting for a commit. 2778 */ 2779 bveccount = 0; 2780 VI_LOCK(vp); 2781 TAILQ_FOREACH_SAFE(bp, &vp->v_bufobj.bo_dirty.bv_hd, b_bobufs, nbp) { 2782 if (BUF_REFCNT(bp) == 0 && 2783 (bp->b_flags & (B_DELWRI | B_NEEDCOMMIT)) 2784 == (B_DELWRI | B_NEEDCOMMIT)) 2785 bveccount++; 2786 } 2787 /* 2788 * Allocate space to remember the list of bufs to commit. It is 2789 * important to use M_NOWAIT here to avoid a race with nfs_write. 2790 * If we can't get memory (for whatever reason), we will end up 2791 * committing the buffers one-by-one in the loop below. 2792 */ 2793 if (bveccount > NFS_COMMITBVECSIZ) { 2794 /* 2795 * Release the vnode interlock to avoid a lock 2796 * order reversal. 2797 */ 2798 VI_UNLOCK(vp); 2799 bvec = (struct buf **) 2800 malloc(bveccount * sizeof(struct buf *), 2801 M_TEMP, M_NOWAIT); 2802 VI_LOCK(vp); 2803 if (bvec == NULL) { 2804 bvec = bvec_on_stack; 2805 bvecsize = NFS_COMMITBVECSIZ; 2806 } else 2807 bvecsize = bveccount; 2808 } else { 2809 bvec = bvec_on_stack; 2810 bvecsize = NFS_COMMITBVECSIZ; 2811 } 2812 TAILQ_FOREACH_SAFE(bp, &vp->v_bufobj.bo_dirty.bv_hd, b_bobufs, nbp) { 2813 if (bvecpos >= bvecsize) 2814 break; 2815 if (BUF_LOCK(bp, LK_EXCLUSIVE | LK_NOWAIT, NULL)) { 2816 nbp = TAILQ_NEXT(bp, b_bobufs); 2817 continue; 2818 } 2819 if ((bp->b_flags & (B_DELWRI | B_NEEDCOMMIT)) != 2820 (B_DELWRI | B_NEEDCOMMIT)) { 2821 BUF_UNLOCK(bp); 2822 nbp = TAILQ_NEXT(bp, b_bobufs); 2823 continue; 2824 } 2825 VI_UNLOCK(vp); 2826 bremfree(bp); 2827 /* 2828 * Work out if all buffers are using the same cred 2829 * so we can deal with them all with one commit. 2830 * 2831 * NOTE: we are not clearing B_DONE here, so we have 2832 * to do it later on in this routine if we intend to 2833 * initiate I/O on the bp. 2834 * 2835 * Note: to avoid loopback deadlocks, we do not 2836 * assign b_runningbufspace. 2837 */ 2838 if (wcred == NULL) 2839 wcred = bp->b_wcred; 2840 else if (wcred != bp->b_wcred) 2841 wcred = NOCRED; 2842 vfs_busy_pages(bp, 1); 2843 2844 VI_LOCK(vp); 2845 /* 2846 * bp is protected by being locked, but nbp is not 2847 * and vfs_busy_pages() may sleep. We have to 2848 * recalculate nbp. 2849 */ 2850 nbp = TAILQ_NEXT(bp, b_bobufs); 2851 2852 /* 2853 * A list of these buffers is kept so that the 2854 * second loop knows which buffers have actually 2855 * been committed. This is necessary, since there 2856 * may be a race between the commit rpc and new 2857 * uncommitted writes on the file. 2858 */ 2859 bvec[bvecpos++] = bp; 2860 toff = ((u_quad_t)bp->b_blkno) * DEV_BSIZE + 2861 bp->b_dirtyoff; 2862 if (toff < off) 2863 off = toff; 2864 toff += (u_quad_t)(bp->b_dirtyend - bp->b_dirtyoff); 2865 if (toff > endoff) 2866 endoff = toff; 2867 } 2868 splx(s); 2869 VI_UNLOCK(vp); 2870 } 2871 if (bvecpos > 0) { 2872 /* 2873 * Commit data on the server, as required. 2874 * If all bufs are using the same wcred, then use that with 2875 * one call for all of them, otherwise commit each one 2876 * separately. 2877 */ 2878 if (wcred != NOCRED) 2879 retv = nfs_commit(vp, off, (int)(endoff - off), 2880 wcred, td); 2881 else { 2882 retv = 0; 2883 for (i = 0; i < bvecpos; i++) { 2884 off_t off, size; 2885 bp = bvec[i]; 2886 off = ((u_quad_t)bp->b_blkno) * DEV_BSIZE + 2887 bp->b_dirtyoff; 2888 size = (u_quad_t)(bp->b_dirtyend 2889 - bp->b_dirtyoff); 2890 retv = nfs_commit(vp, off, (int)size, 2891 bp->b_wcred, td); 2892 if (retv) break; 2893 } 2894 } 2895 2896 if (retv == NFSERR_STALEWRITEVERF) 2897 nfs_clearcommit(vp->v_mount); 2898 2899 /* 2900 * Now, either mark the blocks I/O done or mark the 2901 * blocks dirty, depending on whether the commit 2902 * succeeded. 2903 */ 2904 for (i = 0; i < bvecpos; i++) { 2905 bp = bvec[i]; 2906 bp->b_flags &= ~(B_NEEDCOMMIT | B_CLUSTEROK); 2907 if (retv) { 2908 /* 2909 * Error, leave B_DELWRI intact 2910 */ 2911 vfs_unbusy_pages(bp); 2912 brelse(bp); 2913 } else { 2914 /* 2915 * Success, remove B_DELWRI ( bundirty() ). 2916 * 2917 * b_dirtyoff/b_dirtyend seem to be NFS 2918 * specific. We should probably move that 2919 * into bundirty(). XXX 2920 */ 2921 s = splbio(); 2922 bufobj_wref(&vp->v_bufobj); 2923 bp->b_flags |= B_ASYNC; 2924 bundirty(bp); 2925 bp->b_flags &= ~B_DONE; 2926 bp->b_ioflags &= ~BIO_ERROR; 2927 bp->b_dirtyoff = bp->b_dirtyend = 0; 2928 splx(s); 2929 bufdone(bp); 2930 } 2931 } 2932 } 2933 2934 /* 2935 * Start/do any write(s) that are required. 2936 */ 2937loop: 2938 s = splbio(); 2939 VI_LOCK(vp); 2940 TAILQ_FOREACH_SAFE(bp, &vp->v_bufobj.bo_dirty.bv_hd, b_bobufs, nbp) { 2941 if (BUF_LOCK(bp, LK_EXCLUSIVE | LK_NOWAIT, NULL)) { 2942 if (waitfor != MNT_WAIT || passone) 2943 continue; 2944 2945 error = BUF_TIMELOCK(bp, 2946 LK_EXCLUSIVE | LK_SLEEPFAIL | LK_INTERLOCK, 2947 VI_MTX(vp), "nfsfsync", slpflag, slptimeo); 2948 splx(s); 2949 if (error == 0) { 2950 BUF_UNLOCK(bp); 2951 goto loop; 2952 } 2953 if (error == ENOLCK) 2954 goto loop; 2955 if (nfs_sigintr(nmp, NULL, td)) { 2956 error = EINTR; 2957 goto done; 2958 } 2959 if (slpflag == PCATCH) { 2960 slpflag = 0; 2961 slptimeo = 2 * hz; 2962 } 2963 goto loop; 2964 } 2965 if ((bp->b_flags & B_DELWRI) == 0) 2966 panic("nfs_fsync: not dirty"); 2967 if ((passone || !commit) && (bp->b_flags & B_NEEDCOMMIT)) { 2968 BUF_UNLOCK(bp); 2969 continue; 2970 } 2971 VI_UNLOCK(vp); 2972 bremfree(bp); 2973 if (passone || !commit) 2974 bp->b_flags |= B_ASYNC; 2975 else 2976 bp->b_flags |= B_ASYNC; 2977 splx(s); 2978 bwrite(bp); 2979 if (nfs_sigintr(nmp, NULL, td)) { 2980 error = EINTR; 2981 goto done; 2982 } 2983 goto loop; 2984 } 2985 splx(s); 2986 if (passone) { 2987 passone = 0; 2988 VI_UNLOCK(vp); 2989 goto again; 2990 } 2991 if (waitfor == MNT_WAIT) { 2992 while (vp->v_bufobj.bo_numoutput) { 2993 error = bufobj_wwait(&vp->v_bufobj, slpflag, slptimeo); 2994 if (error) { 2995 VI_UNLOCK(vp); 2996 error = nfs_sigintr(nmp, NULL, td); 2997 if (error) 2998 goto done; 2999 if (slpflag == PCATCH) { 3000 slpflag = 0; 3001 slptimeo = 2 * hz; 3002 } 3003 VI_LOCK(vp); 3004 } 3005 } 3006 if (vp->v_bufobj.bo_dirty.bv_cnt != 0 && commit) { 3007 VI_UNLOCK(vp); 3008 goto loop; 3009 } 3010 /* 3011 * Wait for all the async IO requests to drain 3012 */ 3013 VI_UNLOCK(vp); 3014 mtx_lock(&np->n_mtx); 3015 while (np->n_directio_asyncwr > 0) { 3016 np->n_flag |= NFSYNCWAIT; 3017 error = nfs_msleep(td, (caddr_t)&np->n_directio_asyncwr, 3018 &np->n_mtx, slpflag | (PRIBIO + 1), 3019 "nfsfsync", 0); 3020 if (error) { 3021 if (nfs_sigintr(nmp, (struct nfsreq *)0, td)) { 3022 mtx_unlock(&np->n_mtx); 3023 error = EINTR; 3024 goto done; 3025 } 3026 } 3027 } 3028 mtx_unlock(&np->n_mtx); 3029 } else 3030 VI_UNLOCK(vp); 3031 mtx_lock(&np->n_mtx); 3032 if (np->n_flag & NWRITEERR) { 3033 error = np->n_error; 3034 np->n_flag &= ~NWRITEERR; 3035 } 3036 if (commit && vp->v_bufobj.bo_dirty.bv_cnt == 0 && 3037 vp->v_bufobj.bo_numoutput == 0 && np->n_directio_asyncwr == 0) 3038 np->n_flag &= ~NMODIFIED; 3039 mtx_unlock(&np->n_mtx); 3040done: 3041 if (bvec != NULL && bvec != bvec_on_stack) 3042 free(bvec, M_TEMP); 3043 return (error); 3044} 3045 3046/* 3047 * NFS advisory byte-level locks. 3048 */ 3049static int 3050nfs_advlock(struct vop_advlock_args *ap) 3051{ 3052 int error; 3053 3054 mtx_lock(&Giant); 3055 if ((VFSTONFS(ap->a_vp->v_mount)->nm_flag & NFSMNT_NOLOCKD) != 0) { 3056 struct nfsnode *np = VTONFS(ap->a_vp); 3057 3058 error = lf_advlock(ap, &(np->n_lockf), np->n_size); 3059 goto out; 3060 } 3061 error = nfs_dolock(ap); 3062out: 3063 mtx_unlock(&Giant); 3064 return (error); 3065} 3066 3067/* 3068 * Print out the contents of an nfsnode. 3069 */ 3070static int 3071nfs_print(struct vop_print_args *ap) 3072{ 3073 struct vnode *vp = ap->a_vp; 3074 struct nfsnode *np = VTONFS(vp); 3075 3076 nfs_printf("\tfileid %ld fsid 0x%x", 3077 np->n_vattr.va_fileid, np->n_vattr.va_fsid); 3078 if (vp->v_type == VFIFO) 3079 fifo_printinfo(vp); 3080 printf("\n"); 3081 return (0); 3082} 3083 3084/* 3085 * This is the "real" nfs::bwrite(struct buf*). 3086 * We set B_CACHE if this is a VMIO buffer. 3087 */ 3088int 3089nfs_writebp(struct buf *bp, int force __unused, struct thread *td) 3090{ 3091 int s; 3092 int oldflags = bp->b_flags; 3093#if 0 3094 int retv = 1; 3095 off_t off; 3096#endif 3097 3098 if (BUF_REFCNT(bp) == 0) 3099 panic("bwrite: buffer is not locked???"); 3100 3101 if (bp->b_flags & B_INVAL) { 3102 brelse(bp); 3103 return(0); 3104 } 3105 3106 bp->b_flags |= B_CACHE; 3107 3108 /* 3109 * Undirty the bp. We will redirty it later if the I/O fails. 3110 */ 3111 3112 s = splbio(); 3113 bundirty(bp); 3114 bp->b_flags &= ~B_DONE; 3115 bp->b_ioflags &= ~BIO_ERROR; 3116 bp->b_iocmd = BIO_WRITE; 3117 3118 bufobj_wref(bp->b_bufobj); 3119 curthread->td_proc->p_stats->p_ru.ru_oublock++; 3120 splx(s); 3121 3122 /* 3123 * Note: to avoid loopback deadlocks, we do not 3124 * assign b_runningbufspace. 3125 */ 3126 vfs_busy_pages(bp, 1); 3127 3128 BUF_KERNPROC(bp); 3129 bp->b_iooffset = dbtob(bp->b_blkno); 3130 bstrategy(bp); 3131 3132 if( (oldflags & B_ASYNC) == 0) { 3133 int rtval = bufwait(bp); 3134 3135 if (oldflags & B_DELWRI) { 3136 s = splbio(); 3137 reassignbuf(bp); 3138 splx(s); 3139 } 3140 brelse(bp); 3141 return (rtval); 3142 } 3143 3144 return (0); 3145} 3146 3147/* 3148 * nfs special file access vnode op. 3149 * Essentially just get vattr and then imitate iaccess() since the device is 3150 * local to the client. 3151 */ 3152static int 3153nfsspec_access(struct vop_access_args *ap) 3154{ 3155 struct vattr *vap; 3156 struct ucred *cred = ap->a_cred; 3157 struct vnode *vp = ap->a_vp; 3158 mode_t mode = ap->a_mode; 3159 struct vattr vattr; 3160 int error; 3161 3162 /* 3163 * Disallow write attempts on filesystems mounted read-only; 3164 * unless the file is a socket, fifo, or a block or character 3165 * device resident on the filesystem. 3166 */ 3167 if ((mode & VWRITE) && (vp->v_mount->mnt_flag & MNT_RDONLY)) { 3168 switch (vp->v_type) { 3169 case VREG: 3170 case VDIR: 3171 case VLNK: 3172 return (EROFS); 3173 default: 3174 break; 3175 } 3176 } 3177 vap = &vattr; 3178 error = VOP_GETATTR(vp, vap, cred, ap->a_td); 3179 if (error) 3180 goto out; 3181 error = vaccess(vp->v_type, vap->va_mode, vap->va_uid, vap->va_gid, 3182 mode, cred, NULL); 3183out: 3184 return error; 3185} 3186 3187/* 3188 * Read wrapper for fifos. 3189 */ 3190static int 3191nfsfifo_read(struct vop_read_args *ap) 3192{ 3193 struct nfsnode *np = VTONFS(ap->a_vp); 3194 int error; 3195 3196 /* 3197 * Set access flag. 3198 */ 3199 mtx_lock(&np->n_mtx); 3200 np->n_flag |= NACC; 3201 getnanotime(&np->n_atim); 3202 mtx_unlock(&np->n_mtx); 3203 error = fifo_specops.vop_read(ap); 3204 return error; 3205} 3206 3207/* 3208 * Write wrapper for fifos. 3209 */ 3210static int 3211nfsfifo_write(struct vop_write_args *ap) 3212{ 3213 struct nfsnode *np = VTONFS(ap->a_vp); 3214 3215 /* 3216 * Set update flag. 3217 */ 3218 mtx_lock(&np->n_mtx); 3219 np->n_flag |= NUPD; 3220 getnanotime(&np->n_mtim); 3221 mtx_unlock(&np->n_mtx); 3222 return(fifo_specops.vop_write(ap)); 3223} 3224 3225/* 3226 * Close wrapper for fifos. 3227 * 3228 * Update the times on the nfsnode then do fifo close. 3229 */ 3230static int 3231nfsfifo_close(struct vop_close_args *ap) 3232{ 3233 struct vnode *vp = ap->a_vp; 3234 struct nfsnode *np = VTONFS(vp); 3235 struct vattr vattr; 3236 struct timespec ts; 3237 3238 mtx_lock(&np->n_mtx); 3239 if (np->n_flag & (NACC | NUPD)) { 3240 getnanotime(&ts); 3241 if (np->n_flag & NACC) 3242 np->n_atim = ts; 3243 if (np->n_flag & NUPD) 3244 np->n_mtim = ts; 3245 np->n_flag |= NCHG; 3246 if (vrefcnt(vp) == 1 && 3247 (vp->v_mount->mnt_flag & MNT_RDONLY) == 0) { 3248 VATTR_NULL(&vattr); 3249 if (np->n_flag & NACC) 3250 vattr.va_atime = np->n_atim; 3251 if (np->n_flag & NUPD) 3252 vattr.va_mtime = np->n_mtim; 3253 mtx_unlock(&np->n_mtx); 3254 (void)VOP_SETATTR(vp, &vattr, ap->a_cred, ap->a_td); 3255 goto out; 3256 } 3257 } 3258 mtx_unlock(&np->n_mtx); 3259out: 3260 return (fifo_specops.vop_close(ap)); 3261} 3262 3263/* 3264 * Just call nfs_writebp() with the force argument set to 1. 3265 * 3266 * NOTE: B_DONE may or may not be set in a_bp on call. 3267 */ 3268static int 3269nfs_bwrite(struct buf *bp) 3270{ 3271 3272 return (nfs_writebp(bp, 1, curthread)); 3273} 3274 3275struct buf_ops buf_ops_nfs = { 3276 .bop_name = "buf_ops_nfs", 3277 .bop_write = nfs_bwrite, 3278 .bop_strategy = bufstrategy, 3279 .bop_sync = bufsync, 3280 .bop_bdflush = bufbdflush, 3281}; 3282