1/* $NetBSD: nfs_clvnops.c,v 1.4 2016/12/13 22:17:33 pgoyette Exp $ */ 2/*- 3 * Copyright (c) 1989, 1993 4 * The Regents of the University of California. All rights reserved. 5 * 6 * This code is derived from software contributed to Berkeley by 7 * Rick Macklem at The University of Guelph. 8 * 9 * Redistribution and use in source and binary forms, with or without 10 * modification, are permitted provided that the following conditions 11 * are met: 12 * 1. Redistributions of source code must retain the above copyright 13 * notice, this list of conditions and the following disclaimer. 14 * 2. Redistributions in binary form must reproduce the above copyright 15 * notice, this list of conditions and the following disclaimer in the 16 * documentation and/or other materials provided with the distribution. 17 * 4. Neither the name of the University nor the names of its contributors 18 * may be used to endorse or promote products derived from this software 19 * without specific prior written permission. 20 * 21 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 22 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 23 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 24 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 25 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 26 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 27 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 28 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 29 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 30 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 31 * SUCH DAMAGE. 32 * 33 * from nfs_vnops.c 8.16 (Berkeley) 5/27/95 34 */ 35 36#include <sys/cdefs.h> 37/* __FBSDID("FreeBSD: head/sys/fs/nfsclient/nfs_clvnops.c 304026 2016-08-12 22:44:59Z rmacklem "); */ 38__RCSID("$NetBSD: nfs_clvnops.c,v 1.4 2016/12/13 22:17:33 pgoyette Exp $"); 39 40/* 41 * vnode op calls for Sun NFS version 2, 3 and 4 42 */ 43 44#ifdef _KERNEL_OPT 45#include "opt_dtrace.h" 46#include "opt_inet.h" 47#endif 48 49#include <sys/param.h> 50#include <sys/kernel.h> 51#include <sys/systm.h> 52#include <sys/resourcevar.h> 53#include <sys/proc.h> 54#include <sys/mount.h> 55#include <sys/bio.h> 56#include <sys/buf.h> 57#include <sys/jail.h> 58#include <sys/malloc.h> 59#include <sys/mbuf.h> 60#include <sys/namei.h> 61#include <sys/socket.h> 62#include <sys/vnode.h> 63#include <sys/dirent.h> 64#include <sys/fcntl.h> 65#include <sys/lockf.h> 66#include <sys/stat.h> 67#include <sys/sysctl.h> 68#include <sys/signalvar.h> 69 70#include <vm/vm.h> 71#include <vm/vm_extern.h> 72#include <vm/vm_object.h> 73 74#include <fs/nfs/common/nfsport.h> 75#include <fs/nfs/client/nfsnode.h> 76#include <fs/nfs/client/nfsmount.h> 77#include <fs/nfs/client/nfs.h> 78#include <fs/nfs/client/nfs_kdtrace.h> 79 80#include <net/if.h> 81#include <netinet/in.h> 82#include <netinet/in_var.h> 83 84#include <fs/nfs/common/nfs_lock.h> 85 86#ifdef KDTRACE_HOOKS 87#include <sys/dtrace_bsd.h> 88 89dtrace_nfsclient_accesscache_flush_probe_func_t 90 dtrace_nfscl_accesscache_flush_done_probe; 91uint32_t nfscl_accesscache_flush_done_id; 92 93dtrace_nfsclient_accesscache_get_probe_func_t 94 dtrace_nfscl_accesscache_get_hit_probe, 95 dtrace_nfscl_accesscache_get_miss_probe; 96uint32_t nfscl_accesscache_get_hit_id; 97uint32_t nfscl_accesscache_get_miss_id; 98 99dtrace_nfsclient_accesscache_load_probe_func_t 100 dtrace_nfscl_accesscache_load_done_probe; 101uint32_t nfscl_accesscache_load_done_id; 102#endif /* !KDTRACE_HOOKS */ 103 104/* Defs */ 105#define TRUE 1 106#define FALSE 0 107 108extern struct nfsstatsv1 nfsstatsv1; 109extern int nfsrv_useacl; 110extern int nfscl_debuglevel; 111MALLOC_DECLARE(M_NEWNFSREQ); 112 113/* 114 * Ifdef for FreeBSD-current merged buffer cache. It is unfortunate that these 115 * calls are not in getblk() and brelse() so that they would not be necessary 116 * here. 117 */ 118#ifndef B_VMIO 119#define vfs_busy_pages(bp, f) 120#endif 121 122static vop_read_t nfsfifo_read; 123static vop_write_t nfsfifo_write; 124static vop_close_t nfsfifo_close; 125static int nfs_setattrrpc(struct vnode *, struct vattr *, struct ucred *, 126 struct thread *); 127static vop_lookup_t nfs_lookup; 128static vop_create_t nfs_create; 129static vop_mknod_t nfs_mknod; 130static vop_open_t nfs_open; 131static vop_pathconf_t nfs_pathconf; 132static vop_close_t nfs_close; 133static vop_access_t nfs_access; 134static vop_getattr_t nfs_getattr; 135static vop_setattr_t nfs_setattr; 136static vop_read_t nfs_read; 137static vop_fsync_t nfs_fsync; 138static vop_remove_t nfs_remove; 139static vop_link_t nfs_link; 140static vop_rename_t nfs_rename; 141static vop_mkdir_t nfs_mkdir; 142static vop_rmdir_t nfs_rmdir; 143static vop_symlink_t nfs_symlink; 144static vop_readdir_t nfs_readdir; 145static vop_strategy_t nfs_strategy; 146static int nfs_lookitup(struct vnode *, char *, int, 147 struct ucred *, struct thread *, struct nfsnode **); 148static int nfs_sillyrename(struct vnode *, struct vnode *, 149 struct componentname *); 150static vop_access_t nfsspec_access; 151static vop_readlink_t nfs_readlink; 152static vop_print_t nfs_print; 153static vop_advlock_t nfs_advlock; 154static vop_advlockasync_t nfs_advlockasync; 155static vop_getacl_t nfs_getacl; 156static vop_setacl_t nfs_setacl; 157 158/* 159 * Global vfs data structures for nfs 160 */ 161struct vop_vector newnfs_vnodeops = { 162 .vop_default = &default_vnodeops, 163 .vop_access = nfs_access, 164 .vop_advlock = nfs_advlock, 165 .vop_advlockasync = nfs_advlockasync, 166 .vop_close = nfs_close, 167 .vop_create = nfs_create, 168 .vop_fsync = nfs_fsync, 169 .vop_getattr = nfs_getattr, 170 .vop_getpages = ncl_getpages, 171 .vop_putpages = ncl_putpages, 172 .vop_inactive = ncl_inactive, 173 .vop_link = nfs_link, 174 .vop_lookup = nfs_lookup, 175 .vop_mkdir = nfs_mkdir, 176 .vop_mknod = nfs_mknod, 177 .vop_open = nfs_open, 178 .vop_pathconf = nfs_pathconf, 179 .vop_print = nfs_print, 180 .vop_read = nfs_read, 181 .vop_readdir = nfs_readdir, 182 .vop_readlink = nfs_readlink, 183 .vop_reclaim = ncl_reclaim, 184 .vop_remove = nfs_remove, 185 .vop_rename = nfs_rename, 186 .vop_rmdir = nfs_rmdir, 187 .vop_setattr = nfs_setattr, 188 .vop_strategy = nfs_strategy, 189 .vop_symlink = nfs_symlink, 190 .vop_write = ncl_write, 191 .vop_getacl = nfs_getacl, 192 .vop_setacl = nfs_setacl, 193}; 194 195struct vop_vector newnfs_fifoops = { 196 .vop_default = &fifo_specops, 197 .vop_access = nfsspec_access, 198 .vop_close = nfsfifo_close, 199 .vop_fsync = nfs_fsync, 200 .vop_getattr = nfs_getattr, 201 .vop_inactive = ncl_inactive, 202 .vop_print = nfs_print, 203 .vop_read = nfsfifo_read, 204 .vop_reclaim = ncl_reclaim, 205 .vop_setattr = nfs_setattr, 206 .vop_write = nfsfifo_write, 207}; 208 209static int nfs_mknodrpc(struct vnode *dvp, struct vnode **vpp, 210 struct componentname *cnp, struct vattr *vap); 211static int nfs_removerpc(struct vnode *dvp, struct vnode *vp, char *name, 212 int namelen, struct ucred *cred, struct thread *td); 213static int nfs_renamerpc(struct vnode *fdvp, struct vnode *fvp, 214 char *fnameptr, int fnamelen, struct vnode *tdvp, struct vnode *tvp, 215 char *tnameptr, int tnamelen, struct ucred *cred, struct thread *td); 216static int nfs_renameit(struct vnode *sdvp, struct vnode *svp, 217 struct componentname *scnp, struct sillyrename *sp); 218 219/* 220 * Global variables 221 */ 222#define DIRHDSIZ (sizeof (struct dirent) - (MAXNAMLEN + 1)) 223 224SYSCTL_DECL(_vfs_nfs); 225 226static int nfsaccess_cache_timeout = NFS_MAXATTRTIMO; 227SYSCTL_INT(_vfs_nfs, OID_AUTO, access_cache_timeout, CTLFLAG_RW, 228 &nfsaccess_cache_timeout, 0, "NFS ACCESS cache timeout"); 229 230static int nfs_prime_access_cache = 0; 231SYSCTL_INT(_vfs_nfs, OID_AUTO, prime_access_cache, CTLFLAG_RW, 232 &nfs_prime_access_cache, 0, 233 "Prime NFS ACCESS cache when fetching attributes"); 234 235static int newnfs_commit_on_close = 0; 236SYSCTL_INT(_vfs_nfs, OID_AUTO, commit_on_close, CTLFLAG_RW, 237 &newnfs_commit_on_close, 0, "write+commit on close, else only write"); 238 239static int nfs_clean_pages_on_close = 1; 240SYSCTL_INT(_vfs_nfs, OID_AUTO, clean_pages_on_close, CTLFLAG_RW, 241 &nfs_clean_pages_on_close, 0, "NFS clean dirty pages on close"); 242 243int newnfs_directio_enable = 0; 244SYSCTL_INT(_vfs_nfs, OID_AUTO, nfs_directio_enable, CTLFLAG_RW, 245 &newnfs_directio_enable, 0, "Enable NFS directio"); 246 247int nfs_keep_dirty_on_error; 248SYSCTL_INT(_vfs_nfs, OID_AUTO, nfs_keep_dirty_on_error, CTLFLAG_RW, 249 &nfs_keep_dirty_on_error, 0, "Retry pageout if error returned"); 250 251/* 252 * This sysctl allows other processes to mmap a file that has been opened 253 * O_DIRECT by a process. In general, having processes mmap the file while 254 * Direct IO is in progress can lead to Data Inconsistencies. But, we allow 255 * this by default to prevent DoS attacks - to prevent a malicious user from 256 * opening up files O_DIRECT preventing other users from mmap'ing these 257 * files. "Protected" environments where stricter consistency guarantees are 258 * required can disable this knob. The process that opened the file O_DIRECT 259 * cannot mmap() the file, because mmap'ed IO on an O_DIRECT open() is not 260 * meaningful. 261 */ 262int newnfs_directio_allow_mmap = 1; 263SYSCTL_INT(_vfs_nfs, OID_AUTO, nfs_directio_allow_mmap, CTLFLAG_RW, 264 &newnfs_directio_allow_mmap, 0, "Enable mmaped IO on file with O_DIRECT opens"); 265 266#define NFSACCESS_ALL (NFSACCESS_READ | NFSACCESS_MODIFY \ 267 | NFSACCESS_EXTEND | NFSACCESS_EXECUTE \ 268 | NFSACCESS_DELETE | NFSACCESS_LOOKUP) 269 270/* 271 * SMP Locking Note : 272 * The list of locks after the description of the lock is the ordering 273 * of other locks acquired with the lock held. 274 * np->n_mtx : Protects the fields in the nfsnode. 275 VM Object Lock 276 VI_MTX (acquired indirectly) 277 * nmp->nm_mtx : Protects the fields in the nfsmount. 278 rep->r_mtx 279 * ncl_iod_mutex : Global lock, protects shared nfsiod state. 280 * nfs_reqq_mtx : Global lock, protects the nfs_reqq list. 281 nmp->nm_mtx 282 rep->r_mtx 283 * rep->r_mtx : Protects the fields in an nfsreq. 284 */ 285 286static int 287nfs34_access_otw(struct vnode *vp, int wmode, struct thread *td, 288 struct ucred *cred, u_int32_t *retmode) 289{ 290 int error = 0, attrflag, i, lrupos; 291 u_int32_t rmode; 292 struct nfsnode *np = VTONFS(vp); 293 struct nfsvattr nfsva; 294 295 error = nfsrpc_accessrpc(vp, wmode, cred, td, &nfsva, &attrflag, 296 &rmode, NULL); 297 if (attrflag) 298 (void) nfscl_loadattrcache(&vp, &nfsva, NULL, NULL, 0, 1); 299 if (!error) { 300 lrupos = 0; 301 mtx_lock(&np->n_mtx); 302 for (i = 0; i < NFS_ACCESSCACHESIZE; i++) { 303 if (np->n_accesscache[i].uid == cred->cr_uid) { 304 np->n_accesscache[i].mode = rmode; 305 np->n_accesscache[i].stamp = time_second; 306 break; 307 } 308 if (i > 0 && np->n_accesscache[i].stamp < 309 np->n_accesscache[lrupos].stamp) 310 lrupos = i; 311 } 312 if (i == NFS_ACCESSCACHESIZE) { 313 np->n_accesscache[lrupos].uid = cred->cr_uid; 314 np->n_accesscache[lrupos].mode = rmode; 315 np->n_accesscache[lrupos].stamp = time_second; 316 } 317 mtx_unlock(&np->n_mtx); 318 if (retmode != NULL) 319 *retmode = rmode; 320 KDTRACE_NFS_ACCESSCACHE_LOAD_DONE(vp, cred->cr_uid, rmode, 0); 321 } else if (NFS_ISV4(vp)) { 322 error = nfscl_maperr(td, error, (uid_t)0, (gid_t)0); 323 } 324#ifdef KDTRACE_HOOKS 325 if (error != 0) 326 KDTRACE_NFS_ACCESSCACHE_LOAD_DONE(vp, cred->cr_uid, 0, 327 error); 328#endif 329 return (error); 330} 331 332/* 333 * nfs access vnode op. 334 * For nfs version 2, just return ok. File accesses may fail later. 335 * For nfs version 3, use the access rpc to check accessibility. If file modes 336 * are changed on the server, accesses might still fail later. 337 */ 338static int 339nfs_access(struct vop_access_args *ap) 340{ 341 struct vnode *vp = ap->a_vp; 342 int error = 0, i, gotahit; 343 u_int32_t mode, wmode, rmode; 344 int v34 = NFS_ISV34(vp); 345 struct nfsnode *np = VTONFS(vp); 346 347 /* 348 * Disallow write attempts on filesystems mounted read-only; 349 * unless the file is a socket, fifo, or a block or character 350 * device resident on the filesystem. 351 */ 352 if ((ap->a_accmode & (VWRITE | VAPPEND | VWRITE_NAMED_ATTRS | 353 VDELETE_CHILD | VWRITE_ATTRIBUTES | VDELETE | VWRITE_ACL | 354 VWRITE_OWNER)) != 0 && (vp->v_mount->mnt_flag & MNT_RDONLY) != 0) { 355 switch (vp->v_type) { 356 case VREG: 357 case VDIR: 358 case VLNK: 359 return (EROFS); 360 default: 361 break; 362 } 363 } 364 /* 365 * For nfs v3 or v4, check to see if we have done this recently, and if 366 * so return our cached result instead of making an ACCESS call. 367 * If not, do an access rpc, otherwise you are stuck emulating 368 * ufs_access() locally using the vattr. This may not be correct, 369 * since the server may apply other access criteria such as 370 * client uid-->server uid mapping that we do not know about. 371 */ 372 if (v34) { 373 if (ap->a_accmode & VREAD) 374 mode = NFSACCESS_READ; 375 else 376 mode = 0; 377 if (vp->v_type != VDIR) { 378 if (ap->a_accmode & VWRITE) 379 mode |= (NFSACCESS_MODIFY | NFSACCESS_EXTEND); 380 if (ap->a_accmode & VAPPEND) 381 mode |= NFSACCESS_EXTEND; 382 if (ap->a_accmode & VEXEC) 383 mode |= NFSACCESS_EXECUTE; 384 if (ap->a_accmode & VDELETE) 385 mode |= NFSACCESS_DELETE; 386 } else { 387 if (ap->a_accmode & VWRITE) 388 mode |= (NFSACCESS_MODIFY | NFSACCESS_EXTEND); 389 if (ap->a_accmode & VAPPEND) 390 mode |= NFSACCESS_EXTEND; 391 if (ap->a_accmode & VEXEC) 392 mode |= NFSACCESS_LOOKUP; 393 if (ap->a_accmode & VDELETE) 394 mode |= NFSACCESS_DELETE; 395 if (ap->a_accmode & VDELETE_CHILD) 396 mode |= NFSACCESS_MODIFY; 397 } 398 /* XXX safety belt, only make blanket request if caching */ 399 if (nfsaccess_cache_timeout > 0) { 400 wmode = NFSACCESS_READ | NFSACCESS_MODIFY | 401 NFSACCESS_EXTEND | NFSACCESS_EXECUTE | 402 NFSACCESS_DELETE | NFSACCESS_LOOKUP; 403 } else { 404 wmode = mode; 405 } 406 407 /* 408 * Does our cached result allow us to give a definite yes to 409 * this request? 410 */ 411 gotahit = 0; 412 mtx_lock(&np->n_mtx); 413 for (i = 0; i < NFS_ACCESSCACHESIZE; i++) { 414 if (ap->a_cred->cr_uid == np->n_accesscache[i].uid) { 415 if (time_second < (np->n_accesscache[i].stamp 416 + nfsaccess_cache_timeout) && 417 (np->n_accesscache[i].mode & mode) == mode) { 418 NFSINCRGLOBAL(nfsstatsv1.accesscache_hits); 419 gotahit = 1; 420 } 421 break; 422 } 423 } 424 mtx_unlock(&np->n_mtx); 425#ifdef KDTRACE_HOOKS 426 if (gotahit != 0) 427 KDTRACE_NFS_ACCESSCACHE_GET_HIT(vp, 428 ap->a_cred->cr_uid, mode); 429 else 430 KDTRACE_NFS_ACCESSCACHE_GET_MISS(vp, 431 ap->a_cred->cr_uid, mode); 432#endif 433 if (gotahit == 0) { 434 /* 435 * Either a no, or a don't know. Go to the wire. 436 */ 437 NFSINCRGLOBAL(nfsstatsv1.accesscache_misses); 438 error = nfs34_access_otw(vp, wmode, ap->a_td, 439 ap->a_cred, &rmode); 440 if (!error && 441 (rmode & mode) != mode) 442 error = EACCES; 443 } 444 return (error); 445 } else { 446 if ((error = nfsspec_access(ap)) != 0) { 447 return (error); 448 } 449 /* 450 * Attempt to prevent a mapped root from accessing a file 451 * which it shouldn't. We try to read a byte from the file 452 * if the user is root and the file is not zero length. 453 * After calling nfsspec_access, we should have the correct 454 * file size cached. 455 */ 456 mtx_lock(&np->n_mtx); 457 if (ap->a_cred->cr_uid == 0 && (ap->a_accmode & VREAD) 458 && VTONFS(vp)->n_size > 0) { 459 struct iovec aiov; 460 struct uio auio; 461 char buf[1]; 462 463 mtx_unlock(&np->n_mtx); 464 aiov.iov_base = buf; 465 aiov.iov_len = 1; 466 auio.uio_iov = &aiov; 467 auio.uio_iovcnt = 1; 468 auio.uio_offset = 0; 469 auio.uio_resid = 1; 470 auio.uio_segflg = UIO_SYSSPACE; 471 auio.uio_rw = UIO_READ; 472 auio.uio_td = ap->a_td; 473 474 if (vp->v_type == VREG) 475 error = ncl_readrpc(vp, &auio, ap->a_cred); 476 else if (vp->v_type == VDIR) { 477 char* bp; 478 bp = malloc(NFS_DIRBLKSIZ, M_TEMP, M_WAITOK); 479 aiov.iov_base = bp; 480 aiov.iov_len = auio.uio_resid = NFS_DIRBLKSIZ; 481 error = ncl_readdirrpc(vp, &auio, ap->a_cred, 482 ap->a_td); 483 free(bp, M_TEMP); 484 } else if (vp->v_type == VLNK) 485 error = ncl_readlinkrpc(vp, &auio, ap->a_cred); 486 else 487 error = EACCES; 488 } else 489 mtx_unlock(&np->n_mtx); 490 return (error); 491 } 492} 493 494 495/* 496 * nfs open vnode op 497 * Check to see if the type is ok 498 * and that deletion is not in progress. 499 * For paged in text files, you will need to flush the page cache 500 * if consistency is lost. 501 */ 502/* ARGSUSED */ 503static int 504nfs_open(struct vop_open_args *ap) 505{ 506 struct vnode *vp = ap->a_vp; 507 struct nfsnode *np = VTONFS(vp); 508 struct vattr vattr; 509 int error; 510 int fmode = ap->a_mode; 511 struct ucred *cred; 512 513 if (vp->v_type != VREG && vp->v_type != VDIR && vp->v_type != VLNK) 514 return (EOPNOTSUPP); 515 516 /* 517 * For NFSv4, we need to do the Open Op before cache validation, 518 * so that we conform to RFC3530 Sec. 9.3.1. 519 */ 520 if (NFS_ISV4(vp)) { 521 error = nfsrpc_open(vp, fmode, ap->a_cred, ap->a_td); 522 if (error) { 523 error = nfscl_maperr(ap->a_td, error, (uid_t)0, 524 (gid_t)0); 525 return (error); 526 } 527 } 528 529 /* 530 * Now, if this Open will be doing reading, re-validate/flush the 531 * cache, so that Close/Open coherency is maintained. 532 */ 533 mtx_lock(&np->n_mtx); 534 if (np->n_flag & NMODIFIED) { 535 mtx_unlock(&np->n_mtx); 536 error = ncl_vinvalbuf(vp, V_SAVE, ap->a_td, 1); 537 if (error == EINTR || error == EIO) { 538 if (NFS_ISV4(vp)) 539 (void) nfsrpc_close(vp, 0, ap->a_td); 540 return (error); 541 } 542 mtx_lock(&np->n_mtx); 543 np->n_attrstamp = 0; 544 KDTRACE_NFS_ATTRCACHE_FLUSH_DONE(vp); 545 if (vp->v_type == VDIR) 546 np->n_direofoffset = 0; 547 mtx_unlock(&np->n_mtx); 548 error = VOP_GETATTR(vp, &vattr, ap->a_cred); 549 if (error) { 550 if (NFS_ISV4(vp)) 551 (void) nfsrpc_close(vp, 0, ap->a_td); 552 return (error); 553 } 554 mtx_lock(&np->n_mtx); 555 np->n_mtime = vattr.va_mtime; 556 if (NFS_ISV4(vp)) 557 np->n_change = vattr.va_filerev; 558 } else { 559 mtx_unlock(&np->n_mtx); 560 error = VOP_GETATTR(vp, &vattr, ap->a_cred); 561 if (error) { 562 if (NFS_ISV4(vp)) 563 (void) nfsrpc_close(vp, 0, ap->a_td); 564 return (error); 565 } 566 mtx_lock(&np->n_mtx); 567 if ((NFS_ISV4(vp) && np->n_change != vattr.va_filerev) || 568 NFS_TIMESPEC_COMPARE(&np->n_mtime, &vattr.va_mtime)) { 569 if (vp->v_type == VDIR) 570 np->n_direofoffset = 0; 571 mtx_unlock(&np->n_mtx); 572 error = ncl_vinvalbuf(vp, V_SAVE, ap->a_td, 1); 573 if (error == EINTR || error == EIO) { 574 if (NFS_ISV4(vp)) 575 (void) nfsrpc_close(vp, 0, ap->a_td); 576 return (error); 577 } 578 mtx_lock(&np->n_mtx); 579 np->n_mtime = vattr.va_mtime; 580 if (NFS_ISV4(vp)) 581 np->n_change = vattr.va_filerev; 582 } 583 } 584 585 /* 586 * If the object has >= 1 O_DIRECT active opens, we disable caching. 587 */ 588 if (newnfs_directio_enable && (fmode & O_DIRECT) && 589 (vp->v_type == VREG)) { 590 if (np->n_directio_opens == 0) { 591 mtx_unlock(&np->n_mtx); 592 error = ncl_vinvalbuf(vp, V_SAVE, ap->a_td, 1); 593 if (error) { 594 if (NFS_ISV4(vp)) 595 (void) nfsrpc_close(vp, 0, ap->a_td); 596 return (error); 597 } 598 mtx_lock(&np->n_mtx); 599 np->n_flag |= NNONCACHE; 600 } 601 np->n_directio_opens++; 602 } 603 604 /* If opened for writing via NFSv4.1 or later, mark that for pNFS. */ 605 if (NFSHASPNFS(VFSTONFS(vp->v_mount)) && (fmode & FWRITE) != 0) 606 np->n_flag |= NWRITEOPENED; 607 608 /* 609 * If this is an open for writing, capture a reference to the 610 * credentials, so they can be used by ncl_putpages(). Using 611 * these write credentials is preferable to the credentials of 612 * whatever thread happens to be doing the VOP_PUTPAGES() since 613 * the write RPCs are less likely to fail with EACCES. 614 */ 615 if ((fmode & FWRITE) != 0) { 616 cred = np->n_writecred; 617 np->n_writecred = crhold(ap->a_cred); 618 } else 619 cred = NULL; 620 mtx_unlock(&np->n_mtx); 621 622 if (cred != NULL) 623 crfree(cred); 624 vnode_create_vobject(vp, vattr.va_size, ap->a_td); 625 return (0); 626} 627 628/* 629 * nfs close vnode op 630 * What an NFS client should do upon close after writing is a debatable issue. 631 * Most NFS clients push delayed writes to the server upon close, basically for 632 * two reasons: 633 * 1 - So that any write errors may be reported back to the client process 634 * doing the close system call. By far the two most likely errors are 635 * NFSERR_NOSPC and NFSERR_DQUOT to indicate space allocation failure. 636 * 2 - To put a worst case upper bound on cache inconsistency between 637 * multiple clients for the file. 638 * There is also a consistency problem for Version 2 of the protocol w.r.t. 639 * not being able to tell if other clients are writing a file concurrently, 640 * since there is no way of knowing if the changed modify time in the reply 641 * is only due to the write for this client. 642 * (NFS Version 3 provides weak cache consistency data in the reply that 643 * should be sufficient to detect and handle this case.) 644 * 645 * The current code does the following: 646 * for NFS Version 2 - play it safe and flush/invalidate all dirty buffers 647 * for NFS Version 3 - flush dirty buffers to the server but don't invalidate 648 * or commit them (this satisfies 1 and 2 except for the 649 * case where the server crashes after this close but 650 * before the commit RPC, which is felt to be "good 651 * enough". Changing the last argument to ncl_flush() to 652 * a 1 would force a commit operation, if it is felt a 653 * commit is necessary now. 654 * for NFS Version 4 - flush the dirty buffers and commit them, if 655 * nfscl_mustflush() says this is necessary. 656 * It is necessary if there is no write delegation held, 657 * in order to satisfy open/close coherency. 658 * If the file isn't cached on local stable storage, 659 * it may be necessary in order to detect "out of space" 660 * errors from the server, if the write delegation 661 * issued by the server doesn't allow the file to grow. 662 */ 663/* ARGSUSED */ 664static int 665nfs_close(struct vop_close_args *ap) 666{ 667 struct vnode *vp = ap->a_vp; 668 struct nfsnode *np = VTONFS(vp); 669 struct nfsvattr nfsva; 670 struct ucred *cred; 671 int error = 0, ret, localcred = 0; 672 int fmode = ap->a_fflag; 673 674 if ((vp->v_mount->mnt_kern_flag & MNTK_UNMOUNTF)) 675 return (0); 676 /* 677 * During shutdown, a_cred isn't valid, so just use root. 678 */ 679 if (ap->a_cred == NOCRED) { 680 cred = newnfs_getcred(); 681 localcred = 1; 682 } else { 683 cred = ap->a_cred; 684 } 685 if (vp->v_type == VREG) { 686 /* 687 * Examine and clean dirty pages, regardless of NMODIFIED. 688 * This closes a major hole in close-to-open consistency. 689 * We want to push out all dirty pages (and buffers) on 690 * close, regardless of whether they were dirtied by 691 * mmap'ed writes or via write(). 692 */ 693 if (nfs_clean_pages_on_close && vp->v_object) { 694 VM_OBJECT_WLOCK(vp->v_object); 695 vm_object_page_clean(vp->v_object, 0, 0, 0); 696 VM_OBJECT_WUNLOCK(vp->v_object); 697 } 698 mtx_lock(&np->n_mtx); 699 if (np->n_flag & NMODIFIED) { 700 mtx_unlock(&np->n_mtx); 701 if (NFS_ISV3(vp)) { 702 /* 703 * Under NFSv3 we have dirty buffers to dispose of. We 704 * must flush them to the NFS server. We have the option 705 * of waiting all the way through the commit rpc or just 706 * waiting for the initial write. The default is to only 707 * wait through the initial write so the data is in the 708 * server's cache, which is roughly similar to the state 709 * a standard disk subsystem leaves the file in on close(). 710 * 711 * We cannot clear the NMODIFIED bit in np->n_flag due to 712 * potential races with other processes, and certainly 713 * cannot clear it if we don't commit. 714 * These races occur when there is no longer the old 715 * traditional vnode locking implemented for Vnode Ops. 716 */ 717 int cm = newnfs_commit_on_close ? 1 : 0; 718 error = ncl_flush(vp, MNT_WAIT, cred, ap->a_td, cm, 0); 719 /* np->n_flag &= ~NMODIFIED; */ 720 } else if (NFS_ISV4(vp)) { 721 if (nfscl_mustflush(vp) != 0) { 722 int cm = newnfs_commit_on_close ? 1 : 0; 723 error = ncl_flush(vp, MNT_WAIT, cred, ap->a_td, 724 cm, 0); 725 /* 726 * as above w.r.t races when clearing 727 * NMODIFIED. 728 * np->n_flag &= ~NMODIFIED; 729 */ 730 } 731 } else 732 error = ncl_vinvalbuf(vp, V_SAVE, ap->a_td, 1); 733 mtx_lock(&np->n_mtx); 734 } 735 /* 736 * Invalidate the attribute cache in all cases. 737 * An open is going to fetch fresh attrs any way, other procs 738 * on this node that have file open will be forced to do an 739 * otw attr fetch, but this is safe. 740 * --> A user found that their RPC count dropped by 20% when 741 * this was commented out and I can't see any requirement 742 * for it, so I've disabled it when negative lookups are 743 * enabled. (What does this have to do with negative lookup 744 * caching? Well nothing, except it was reported by the 745 * same user that needed negative lookup caching and I wanted 746 * there to be a way to disable it to see if it 747 * is the cause of some caching/coherency issue that might 748 * crop up.) 749 */ 750 if (VFSTONFS(vp->v_mount)->nm_negnametimeo == 0) { 751 np->n_attrstamp = 0; 752 KDTRACE_NFS_ATTRCACHE_FLUSH_DONE(vp); 753 } 754 if (np->n_flag & NWRITEERR) { 755 np->n_flag &= ~NWRITEERR; 756 error = np->n_error; 757 } 758 mtx_unlock(&np->n_mtx); 759 } 760 761 if (NFS_ISV4(vp)) { 762 /* 763 * Get attributes so "change" is up to date. 764 */ 765 if (error == 0 && nfscl_mustflush(vp) != 0 && 766 vp->v_type == VREG && 767 (VFSTONFS(vp->v_mount)->nm_flag & NFSMNT_NOCTO) == 0) { 768 ret = nfsrpc_getattr(vp, cred, ap->a_td, &nfsva, 769 NULL); 770 if (!ret) { 771 np->n_change = nfsva.na_filerev; 772 (void) nfscl_loadattrcache(&vp, &nfsva, NULL, 773 NULL, 0, 0); 774 } 775 } 776 777 /* 778 * and do the close. 779 */ 780 ret = nfsrpc_close(vp, 0, ap->a_td); 781 if (!error && ret) 782 error = ret; 783 if (error) 784 error = nfscl_maperr(ap->a_td, error, (uid_t)0, 785 (gid_t)0); 786 } 787 if (newnfs_directio_enable) 788 KASSERT((np->n_directio_asyncwr == 0), 789 ("nfs_close: dirty unflushed (%d) directio buffers\n", 790 np->n_directio_asyncwr)); 791 if (newnfs_directio_enable && (fmode & O_DIRECT) && (vp->v_type == VREG)) { 792 mtx_lock(&np->n_mtx); 793 KASSERT((np->n_directio_opens > 0), 794 ("nfs_close: unexpectedly value (0) of n_directio_opens\n")); 795 np->n_directio_opens--; 796 if (np->n_directio_opens == 0) 797 np->n_flag &= ~NNONCACHE; 798 mtx_unlock(&np->n_mtx); 799 } 800 if (localcred) 801 NFSFREECRED(cred); 802 return (error); 803} 804 805/* 806 * nfs getattr call from vfs. 807 */ 808static int 809nfs_getattr(struct vop_getattr_args *ap) 810{ 811 struct vnode *vp = ap->a_vp; 812 struct thread *td = curthread; /* XXX */ 813 struct nfsnode *np = VTONFS(vp); 814 int error = 0; 815 struct nfsvattr nfsva; 816 struct vattr *vap = ap->a_vap; 817 struct vattr vattr; 818 819 /* 820 * Update local times for special files. 821 */ 822 mtx_lock(&np->n_mtx); 823 if (np->n_flag & (NACC | NUPD)) 824 np->n_flag |= NCHG; 825 mtx_unlock(&np->n_mtx); 826 /* 827 * First look in the cache. 828 */ 829 if (ncl_getattrcache(vp, &vattr) == 0) { 830 vap->va_type = vattr.va_type; 831 vap->va_mode = vattr.va_mode; 832 vap->va_nlink = vattr.va_nlink; 833 vap->va_uid = vattr.va_uid; 834 vap->va_gid = vattr.va_gid; 835 vap->va_fsid = vattr.va_fsid; 836 vap->va_fileid = vattr.va_fileid; 837 vap->va_size = vattr.va_size; 838 vap->va_blocksize = vattr.va_blocksize; 839 vap->va_atime = vattr.va_atime; 840 vap->va_mtime = vattr.va_mtime; 841 vap->va_ctime = vattr.va_ctime; 842 vap->va_gen = vattr.va_gen; 843 vap->va_flags = vattr.va_flags; 844 vap->va_rdev = vattr.va_rdev; 845 vap->va_bytes = vattr.va_bytes; 846 vap->va_filerev = vattr.va_filerev; 847 /* 848 * Get the local modify time for the case of a write 849 * delegation. 850 */ 851 nfscl_deleggetmodtime(vp, &vap->va_mtime); 852 return (0); 853 } 854 855 if (NFS_ISV34(vp) && nfs_prime_access_cache && 856 nfsaccess_cache_timeout > 0) { 857 NFSINCRGLOBAL(nfsstatsv1.accesscache_misses); 858 nfs34_access_otw(vp, NFSACCESS_ALL, td, ap->a_cred, NULL); 859 if (ncl_getattrcache(vp, ap->a_vap) == 0) { 860 nfscl_deleggetmodtime(vp, &ap->a_vap->va_mtime); 861 return (0); 862 } 863 } 864 error = nfsrpc_getattr(vp, ap->a_cred, td, &nfsva, NULL); 865 if (!error) 866 error = nfscl_loadattrcache(&vp, &nfsva, vap, NULL, 0, 0); 867 if (!error) { 868 /* 869 * Get the local modify time for the case of a write 870 * delegation. 871 */ 872 nfscl_deleggetmodtime(vp, &vap->va_mtime); 873 } else if (NFS_ISV4(vp)) { 874 error = nfscl_maperr(td, error, (uid_t)0, (gid_t)0); 875 } 876 return (error); 877} 878 879/* 880 * nfs setattr call. 881 */ 882static int 883nfs_setattr(struct vop_setattr_args *ap) 884{ 885 struct vnode *vp = ap->a_vp; 886 struct nfsnode *np = VTONFS(vp); 887 struct thread *td = curthread; /* XXX */ 888 struct vattr *vap = ap->a_vap; 889 int error = 0; 890 u_quad_t tsize; 891 892#ifndef nolint 893 tsize = (u_quad_t)0; 894#endif 895 896 /* 897 * Setting of flags and marking of atimes are not supported. 898 */ 899 if (vap->va_flags != VNOVAL) 900 return (EOPNOTSUPP); 901 902 /* 903 * Disallow write attempts if the filesystem is mounted read-only. 904 */ 905 if ((vap->va_flags != VNOVAL || vap->va_uid != (uid_t)VNOVAL || 906 vap->va_gid != (gid_t)VNOVAL || vap->va_atime.tv_sec != VNOVAL || 907 vap->va_mtime.tv_sec != VNOVAL || vap->va_mode != (mode_t)VNOVAL) && 908 (vp->v_mount->mnt_flag & MNT_RDONLY)) 909 return (EROFS); 910 if (vap->va_size != VNOVAL) { 911 switch (vp->v_type) { 912 case VDIR: 913 return (EISDIR); 914 case VCHR: 915 case VBLK: 916 case VSOCK: 917 case VFIFO: 918 if (vap->va_mtime.tv_sec == VNOVAL && 919 vap->va_atime.tv_sec == VNOVAL && 920 vap->va_mode == (mode_t)VNOVAL && 921 vap->va_uid == (uid_t)VNOVAL && 922 vap->va_gid == (gid_t)VNOVAL) 923 return (0); 924 vap->va_size = VNOVAL; 925 break; 926 default: 927 /* 928 * Disallow write attempts if the filesystem is 929 * mounted read-only. 930 */ 931 if (vp->v_mount->mnt_flag & MNT_RDONLY) 932 return (EROFS); 933 /* 934 * We run vnode_pager_setsize() early (why?), 935 * we must set np->n_size now to avoid vinvalbuf 936 * V_SAVE races that might setsize a lower 937 * value. 938 */ 939 mtx_lock(&np->n_mtx); 940 tsize = np->n_size; 941 mtx_unlock(&np->n_mtx); 942 error = ncl_meta_setsize(vp, ap->a_cred, td, 943 vap->va_size); 944 mtx_lock(&np->n_mtx); 945 if (np->n_flag & NMODIFIED) { 946 tsize = np->n_size; 947 mtx_unlock(&np->n_mtx); 948 if (vap->va_size == 0) 949 error = ncl_vinvalbuf(vp, 0, td, 1); 950 else 951 error = ncl_vinvalbuf(vp, V_SAVE, td, 1); 952 if (error) { 953 vnode_pager_setsize(vp, tsize); 954 return (error); 955 } 956 /* 957 * Call nfscl_delegmodtime() to set the modify time 958 * locally, as required. 959 */ 960 nfscl_delegmodtime(vp); 961 } else 962 mtx_unlock(&np->n_mtx); 963 /* 964 * np->n_size has already been set to vap->va_size 965 * in ncl_meta_setsize(). We must set it again since 966 * nfs_loadattrcache() could be called through 967 * ncl_meta_setsize() and could modify np->n_size. 968 */ 969 mtx_lock(&np->n_mtx); 970 np->n_vattr.na_size = np->n_size = vap->va_size; 971 mtx_unlock(&np->n_mtx); 972 } 973 } else { 974 mtx_lock(&np->n_mtx); 975 if ((vap->va_mtime.tv_sec != VNOVAL || vap->va_atime.tv_sec != VNOVAL) && 976 (np->n_flag & NMODIFIED) && vp->v_type == VREG) { 977 mtx_unlock(&np->n_mtx); 978 if ((error = ncl_vinvalbuf(vp, V_SAVE, td, 1)) != 0 && 979 (error == EINTR || error == EIO)) 980 return (error); 981 } else 982 mtx_unlock(&np->n_mtx); 983 } 984 error = nfs_setattrrpc(vp, vap, ap->a_cred, td); 985 if (error && vap->va_size != VNOVAL) { 986 mtx_lock(&np->n_mtx); 987 np->n_size = np->n_vattr.na_size = tsize; 988 vnode_pager_setsize(vp, tsize); 989 mtx_unlock(&np->n_mtx); 990 } 991 return (error); 992} 993 994/* 995 * Do an nfs setattr rpc. 996 */ 997static int 998nfs_setattrrpc(struct vnode *vp, struct vattr *vap, struct ucred *cred, 999 struct thread *td) 1000{ 1001 struct nfsnode *np = VTONFS(vp); 1002 int error, ret, attrflag, i; 1003 struct nfsvattr nfsva; 1004 1005 if (NFS_ISV34(vp)) { 1006 mtx_lock(&np->n_mtx); 1007 for (i = 0; i < NFS_ACCESSCACHESIZE; i++) 1008 np->n_accesscache[i].stamp = 0; 1009 np->n_flag |= NDELEGMOD; 1010 mtx_unlock(&np->n_mtx); 1011 KDTRACE_NFS_ACCESSCACHE_FLUSH_DONE(vp); 1012 } 1013 error = nfsrpc_setattr(vp, vap, NULL, cred, td, &nfsva, &attrflag, 1014 NULL); 1015 if (attrflag) { 1016 ret = nfscl_loadattrcache(&vp, &nfsva, NULL, NULL, 0, 1); 1017 if (ret && !error) 1018 error = ret; 1019 } 1020 if (error && NFS_ISV4(vp)) 1021 error = nfscl_maperr(td, error, vap->va_uid, vap->va_gid); 1022 return (error); 1023} 1024 1025/* 1026 * nfs lookup call, one step at a time... 1027 * First look in cache 1028 * If not found, unlock the directory nfsnode and do the rpc 1029 */ 1030static int 1031nfs_lookup(struct vop_lookup_args *ap) 1032{ 1033 struct componentname *cnp = ap->a_cnp; 1034 struct vnode *dvp = ap->a_dvp; 1035 struct vnode **vpp = ap->a_vpp; 1036 struct mount *mp = dvp->v_mount; 1037 int flags = cnp->cn_flags; 1038 struct vnode *newvp; 1039 struct nfsmount *nmp; 1040 struct nfsnode *np, *newnp; 1041 int error = 0, attrflag, dattrflag, ltype, ncticks; 1042 struct thread *td = cnp->cn_thread; 1043 struct nfsfh *nfhp; 1044 struct nfsvattr dnfsva, nfsva; 1045 struct vattr vattr; 1046 struct timespec nctime; 1047 1048 *vpp = NULLVP; 1049 if ((flags & ISLASTCN) && (mp->mnt_flag & MNT_RDONLY) && 1050 (cnp->cn_nameiop == DELETE || cnp->cn_nameiop == RENAME)) 1051 return (EROFS); 1052 if (dvp->v_type != VDIR) 1053 return (ENOTDIR); 1054 nmp = VFSTONFS(mp); 1055 np = VTONFS(dvp); 1056 1057 /* For NFSv4, wait until any remove is done. */ 1058 mtx_lock(&np->n_mtx); 1059 while (NFSHASNFSV4(nmp) && (np->n_flag & NREMOVEINPROG)) { 1060 np->n_flag |= NREMOVEWANT; 1061 (void) msleep((caddr_t)np, &np->n_mtx, PZERO, "nfslkup", 0); 1062 } 1063 mtx_unlock(&np->n_mtx); 1064 1065 if ((error = VOP_ACCESS(dvp, VEXEC, cnp->cn_cred, td)) != 0) 1066 return (error); 1067 error = cache_lookup(dvp, vpp, cnp, &nctime, &ncticks); 1068 if (error > 0 && error != ENOENT) 1069 return (error); 1070 if (error == -1) { 1071 /* 1072 * Lookups of "." are special and always return the 1073 * current directory. cache_lookup() already handles 1074 * associated locking bookkeeping, etc. 1075 */ 1076 if (cnp->cn_namelen == 1 && cnp->cn_nameptr[0] == '.') { 1077 /* XXX: Is this really correct? */ 1078 if (cnp->cn_nameiop != LOOKUP && 1079 (flags & ISLASTCN)) 1080 cnp->cn_flags |= SAVENAME; 1081 return (0); 1082 } 1083 1084 /* 1085 * We only accept a positive hit in the cache if the 1086 * change time of the file matches our cached copy. 1087 * Otherwise, we discard the cache entry and fallback 1088 * to doing a lookup RPC. We also only trust cache 1089 * entries for less than nm_nametimeo seconds. 1090 * 1091 * To better handle stale file handles and attributes, 1092 * clear the attribute cache of this node if it is a 1093 * leaf component, part of an open() call, and not 1094 * locally modified before fetching the attributes. 1095 * This should allow stale file handles to be detected 1096 * here where we can fall back to a LOOKUP RPC to 1097 * recover rather than having nfs_open() detect the 1098 * stale file handle and failing open(2) with ESTALE. 1099 */ 1100 newvp = *vpp; 1101 newnp = VTONFS(newvp); 1102 if (!(nmp->nm_flag & NFSMNT_NOCTO) && 1103 (flags & (ISLASTCN | ISOPEN)) == (ISLASTCN | ISOPEN) && 1104 !(newnp->n_flag & NMODIFIED)) { 1105 mtx_lock(&newnp->n_mtx); 1106 newnp->n_attrstamp = 0; 1107 KDTRACE_NFS_ATTRCACHE_FLUSH_DONE(newvp); 1108 mtx_unlock(&newnp->n_mtx); 1109 } 1110 if (nfscl_nodeleg(newvp, 0) == 0 || 1111 ((u_int)(ticks - ncticks) < (nmp->nm_nametimeo * hz) && 1112 VOP_GETATTR(newvp, &vattr, cnp->cn_cred) == 0 && 1113 timespeccmp(&vattr.va_ctime, &nctime, ==))) { 1114 NFSINCRGLOBAL(nfsstatsv1.lookupcache_hits); 1115 if (cnp->cn_nameiop != LOOKUP && 1116 (flags & ISLASTCN)) 1117 cnp->cn_flags |= SAVENAME; 1118 return (0); 1119 } 1120 cache_purge(newvp); 1121 if (dvp != newvp) 1122 vput(newvp); 1123 else 1124 vrele(newvp); 1125 *vpp = NULLVP; 1126 } else if (error == ENOENT) { 1127 if (dvp->v_iflag & VI_DOOMED) 1128 return (ENOENT); 1129 /* 1130 * We only accept a negative hit in the cache if the 1131 * modification time of the parent directory matches 1132 * the cached copy in the name cache entry. 1133 * Otherwise, we discard all of the negative cache 1134 * entries for this directory. We also only trust 1135 * negative cache entries for up to nm_negnametimeo 1136 * seconds. 1137 */ 1138 if ((u_int)(ticks - ncticks) < (nmp->nm_negnametimeo * hz) && 1139 VOP_GETATTR(dvp, &vattr, cnp->cn_cred) == 0 && 1140 timespeccmp(&vattr.va_mtime, &nctime, ==)) { 1141 NFSINCRGLOBAL(nfsstatsv1.lookupcache_hits); 1142 return (ENOENT); 1143 } 1144 cache_purge_negative(dvp); 1145 } 1146 1147 error = 0; 1148 newvp = NULLVP; 1149 NFSINCRGLOBAL(nfsstatsv1.lookupcache_misses); 1150 error = nfsrpc_lookup(dvp, cnp->cn_nameptr, cnp->cn_namelen, 1151 cnp->cn_cred, td, &dnfsva, &nfsva, &nfhp, &attrflag, &dattrflag, 1152 NULL); 1153 if (dattrflag) 1154 (void) nfscl_loadattrcache(&dvp, &dnfsva, NULL, NULL, 0, 1); 1155 if (error) { 1156 if (newvp != NULLVP) { 1157 vput(newvp); 1158 *vpp = NULLVP; 1159 } 1160 1161 if (error != ENOENT) { 1162 if (NFS_ISV4(dvp)) 1163 error = nfscl_maperr(td, error, (uid_t)0, 1164 (gid_t)0); 1165 return (error); 1166 } 1167 1168 /* The requested file was not found. */ 1169 if ((cnp->cn_nameiop == CREATE || cnp->cn_nameiop == RENAME) && 1170 (flags & ISLASTCN)) { 1171 /* 1172 * XXX: UFS does a full VOP_ACCESS(dvp, 1173 * VWRITE) here instead of just checking 1174 * MNT_RDONLY. 1175 */ 1176 if (mp->mnt_flag & MNT_RDONLY) 1177 return (EROFS); 1178 cnp->cn_flags |= SAVENAME; 1179 return (EJUSTRETURN); 1180 } 1181 1182 if ((cnp->cn_flags & MAKEENTRY) != 0 && dattrflag) { 1183 /* 1184 * Cache the modification time of the parent 1185 * directory from the post-op attributes in 1186 * the name cache entry. The negative cache 1187 * entry will be ignored once the directory 1188 * has changed. Don't bother adding the entry 1189 * if the directory has already changed. 1190 */ 1191 mtx_lock(&np->n_mtx); 1192 if (timespeccmp(&np->n_vattr.na_mtime, 1193 &dnfsva.na_mtime, ==)) { 1194 mtx_unlock(&np->n_mtx); 1195 cache_enter_time(dvp, NULL, cnp, 1196 &dnfsva.na_mtime, NULL); 1197 } else 1198 mtx_unlock(&np->n_mtx); 1199 } 1200 return (ENOENT); 1201 } 1202 1203 /* 1204 * Handle RENAME case... 1205 */ 1206 if (cnp->cn_nameiop == RENAME && (flags & ISLASTCN)) { 1207 if (NFS_CMPFH(np, nfhp->nfh_fh, nfhp->nfh_len)) { 1208 FREE((caddr_t)nfhp, M_NFSFH); 1209 return (EISDIR); 1210 } 1211 error = nfscl_nget(mp, dvp, nfhp, cnp, td, &np, NULL, 1212 LK_EXCLUSIVE); 1213 if (error) 1214 return (error); 1215 newvp = NFSTOV(np); 1216 if (attrflag) 1217 (void) nfscl_loadattrcache(&newvp, &nfsva, NULL, NULL, 1218 0, 1); 1219 *vpp = newvp; 1220 cnp->cn_flags |= SAVENAME; 1221 return (0); 1222 } 1223 1224 if (flags & ISDOTDOT) { 1225 ltype = NFSVOPISLOCKED(dvp); 1226 error = vfs_busy(mp, MBF_NOWAIT); 1227 if (error != 0) { 1228 vfs_ref(mp); 1229 NFSVOPUNLOCK(dvp, 0); 1230 error = vfs_busy(mp, 0); 1231 NFSVOPLOCK(dvp, ltype | LK_RETRY); 1232 vfs_rel(mp); 1233 if (error == 0 && (dvp->v_iflag & VI_DOOMED)) { 1234 vfs_unbusy(mp); 1235 error = ENOENT; 1236 } 1237 if (error != 0) 1238 return (error); 1239 } 1240 NFSVOPUNLOCK(dvp, 0); 1241 error = nfscl_nget(mp, dvp, nfhp, cnp, td, &np, NULL, 1242 cnp->cn_lkflags); 1243 if (error == 0) 1244 newvp = NFSTOV(np); 1245 vfs_unbusy(mp); 1246 if (newvp != dvp) 1247 NFSVOPLOCK(dvp, ltype | LK_RETRY); 1248 if (dvp->v_iflag & VI_DOOMED) { 1249 if (error == 0) { 1250 if (newvp == dvp) 1251 vrele(newvp); 1252 else 1253 vput(newvp); 1254 } 1255 error = ENOENT; 1256 } 1257 if (error != 0) 1258 return (error); 1259 if (attrflag) 1260 (void) nfscl_loadattrcache(&newvp, &nfsva, NULL, NULL, 1261 0, 1); 1262 } else if (NFS_CMPFH(np, nfhp->nfh_fh, nfhp->nfh_len)) { 1263 FREE((caddr_t)nfhp, M_NFSFH); 1264 VREF(dvp); 1265 newvp = dvp; 1266 if (attrflag) 1267 (void) nfscl_loadattrcache(&newvp, &nfsva, NULL, NULL, 1268 0, 1); 1269 } else { 1270 error = nfscl_nget(mp, dvp, nfhp, cnp, td, &np, NULL, 1271 cnp->cn_lkflags); 1272 if (error) 1273 return (error); 1274 newvp = NFSTOV(np); 1275 if (attrflag) 1276 (void) nfscl_loadattrcache(&newvp, &nfsva, NULL, NULL, 1277 0, 1); 1278 else if ((flags & (ISLASTCN | ISOPEN)) == (ISLASTCN | ISOPEN) && 1279 !(np->n_flag & NMODIFIED)) { 1280 /* 1281 * Flush the attribute cache when opening a 1282 * leaf node to ensure that fresh attributes 1283 * are fetched in nfs_open() since we did not 1284 * fetch attributes from the LOOKUP reply. 1285 */ 1286 mtx_lock(&np->n_mtx); 1287 np->n_attrstamp = 0; 1288 KDTRACE_NFS_ATTRCACHE_FLUSH_DONE(newvp); 1289 mtx_unlock(&np->n_mtx); 1290 } 1291 } 1292 if (cnp->cn_nameiop != LOOKUP && (flags & ISLASTCN)) 1293 cnp->cn_flags |= SAVENAME; 1294 if ((cnp->cn_flags & MAKEENTRY) && 1295 (cnp->cn_nameiop != DELETE || !(flags & ISLASTCN)) && 1296 attrflag != 0 && (newvp->v_type != VDIR || dattrflag != 0)) 1297 cache_enter_time(dvp, newvp, cnp, &nfsva.na_ctime, 1298 newvp->v_type != VDIR ? NULL : &dnfsva.na_ctime); 1299 *vpp = newvp; 1300 return (0); 1301} 1302 1303/* 1304 * nfs read call. 1305 * Just call ncl_bioread() to do the work. 1306 */ 1307static int 1308nfs_read(struct vop_read_args *ap) 1309{ 1310 struct vnode *vp = ap->a_vp; 1311 1312 switch (vp->v_type) { 1313 case VREG: 1314 return (ncl_bioread(vp, ap->a_uio, ap->a_ioflag, ap->a_cred)); 1315 case VDIR: 1316 return (EISDIR); 1317 default: 1318 return (EOPNOTSUPP); 1319 } 1320} 1321 1322/* 1323 * nfs readlink call 1324 */ 1325static int 1326nfs_readlink(struct vop_readlink_args *ap) 1327{ 1328 struct vnode *vp = ap->a_vp; 1329 1330 if (vp->v_type != VLNK) 1331 return (EINVAL); 1332 return (ncl_bioread(vp, ap->a_uio, 0, ap->a_cred)); 1333} 1334 1335/* 1336 * Do a readlink rpc. 1337 * Called by ncl_doio() from below the buffer cache. 1338 */ 1339int 1340ncl_readlinkrpc(struct vnode *vp, struct uio *uiop, struct ucred *cred) 1341{ 1342 int error, ret, attrflag; 1343 struct nfsvattr nfsva; 1344 1345 error = nfsrpc_readlink(vp, uiop, cred, uiop->uio_td, &nfsva, 1346 &attrflag, NULL); 1347 if (attrflag) { 1348 ret = nfscl_loadattrcache(&vp, &nfsva, NULL, NULL, 0, 1); 1349 if (ret && !error) 1350 error = ret; 1351 } 1352 if (error && NFS_ISV4(vp)) 1353 error = nfscl_maperr(uiop->uio_td, error, (uid_t)0, (gid_t)0); 1354 return (error); 1355} 1356 1357/* 1358 * nfs read rpc call 1359 * Ditto above 1360 */ 1361int 1362ncl_readrpc(struct vnode *vp, struct uio *uiop, struct ucred *cred) 1363{ 1364 int error, ret, attrflag; 1365 struct nfsvattr nfsva; 1366 struct nfsmount *nmp; 1367 1368 nmp = VFSTONFS(vnode_mount(vp)); 1369 error = EIO; 1370 attrflag = 0; 1371 if (NFSHASPNFS(nmp)) 1372 error = nfscl_doiods(vp, uiop, NULL, NULL, 1373 NFSV4OPEN_ACCESSREAD, cred, uiop->uio_td); 1374 NFSCL_DEBUG(4, "readrpc: aft doiods=%d\n", error); 1375 if (error != 0) 1376 error = nfsrpc_read(vp, uiop, cred, uiop->uio_td, &nfsva, 1377 &attrflag, NULL); 1378 if (attrflag) { 1379 ret = nfscl_loadattrcache(&vp, &nfsva, NULL, NULL, 0, 1); 1380 if (ret && !error) 1381 error = ret; 1382 } 1383 if (error && NFS_ISV4(vp)) 1384 error = nfscl_maperr(uiop->uio_td, error, (uid_t)0, (gid_t)0); 1385 return (error); 1386} 1387 1388/* 1389 * nfs write call 1390 */ 1391int 1392ncl_writerpc(struct vnode *vp, struct uio *uiop, struct ucred *cred, 1393 int *iomode, int *must_commit, int called_from_strategy) 1394{ 1395 struct nfsvattr nfsva; 1396 int error, attrflag, ret; 1397 struct nfsmount *nmp; 1398 1399 nmp = VFSTONFS(vnode_mount(vp)); 1400 error = EIO; 1401 attrflag = 0; 1402 if (NFSHASPNFS(nmp)) 1403 error = nfscl_doiods(vp, uiop, iomode, must_commit, 1404 NFSV4OPEN_ACCESSWRITE, cred, uiop->uio_td); 1405 NFSCL_DEBUG(4, "writerpc: aft doiods=%d\n", error); 1406 if (error != 0) 1407 error = nfsrpc_write(vp, uiop, iomode, must_commit, cred, 1408 uiop->uio_td, &nfsva, &attrflag, NULL, 1409 called_from_strategy); 1410 if (attrflag) { 1411 if (VTONFS(vp)->n_flag & ND_NFSV4) 1412 ret = nfscl_loadattrcache(&vp, &nfsva, NULL, NULL, 1, 1413 1); 1414 else 1415 ret = nfscl_loadattrcache(&vp, &nfsva, NULL, NULL, 0, 1416 1); 1417 if (ret && !error) 1418 error = ret; 1419 } 1420 if (DOINGASYNC(vp)) 1421 *iomode = NFSWRITE_FILESYNC; 1422 if (error && NFS_ISV4(vp)) 1423 error = nfscl_maperr(uiop->uio_td, error, (uid_t)0, (gid_t)0); 1424 return (error); 1425} 1426 1427/* 1428 * nfs mknod rpc 1429 * For NFS v2 this is a kludge. Use a create rpc but with the IFMT bits of the 1430 * mode set to specify the file type and the size field for rdev. 1431 */ 1432static int 1433nfs_mknodrpc(struct vnode *dvp, struct vnode **vpp, struct componentname *cnp, 1434 struct vattr *vap) 1435{ 1436 struct nfsvattr nfsva, dnfsva; 1437 struct vnode *newvp = NULL; 1438 struct nfsnode *np = NULL, *dnp; 1439 struct nfsfh *nfhp; 1440 struct vattr vattr; 1441 int error = 0, attrflag, dattrflag; 1442 u_int32_t rdev; 1443 1444 if (vap->va_type == VCHR || vap->va_type == VBLK) 1445 rdev = vap->va_rdev; 1446 else if (vap->va_type == VFIFO || vap->va_type == VSOCK) 1447 rdev = 0xffffffff; 1448 else 1449 return (EOPNOTSUPP); 1450 if ((error = VOP_GETATTR(dvp, &vattr, cnp->cn_cred))) 1451 return (error); 1452 error = nfsrpc_mknod(dvp, cnp->cn_nameptr, cnp->cn_namelen, vap, 1453 rdev, vap->va_type, cnp->cn_cred, cnp->cn_thread, &dnfsva, 1454 &nfsva, &nfhp, &attrflag, &dattrflag, NULL); 1455 if (!error) { 1456 if (!nfhp) 1457 (void) nfsrpc_lookup(dvp, cnp->cn_nameptr, 1458 cnp->cn_namelen, cnp->cn_cred, cnp->cn_thread, 1459 &dnfsva, &nfsva, &nfhp, &attrflag, &dattrflag, 1460 NULL); 1461 if (nfhp) 1462 error = nfscl_nget(dvp->v_mount, dvp, nfhp, cnp, 1463 cnp->cn_thread, &np, NULL, LK_EXCLUSIVE); 1464 } 1465 if (dattrflag) 1466 (void) nfscl_loadattrcache(&dvp, &dnfsva, NULL, NULL, 0, 1); 1467 if (!error) { 1468 newvp = NFSTOV(np); 1469 if (attrflag != 0) { 1470 error = nfscl_loadattrcache(&newvp, &nfsva, NULL, NULL, 1471 0, 1); 1472 if (error != 0) 1473 vput(newvp); 1474 } 1475 } 1476 if (!error) { 1477 *vpp = newvp; 1478 } else if (NFS_ISV4(dvp)) { 1479 error = nfscl_maperr(cnp->cn_thread, error, vap->va_uid, 1480 vap->va_gid); 1481 } 1482 dnp = VTONFS(dvp); 1483 mtx_lock(&dnp->n_mtx); 1484 dnp->n_flag |= NMODIFIED; 1485 if (!dattrflag) { 1486 dnp->n_attrstamp = 0; 1487 KDTRACE_NFS_ATTRCACHE_FLUSH_DONE(dvp); 1488 } 1489 mtx_unlock(&dnp->n_mtx); 1490 return (error); 1491} 1492 1493/* 1494 * nfs mknod vop 1495 * just call nfs_mknodrpc() to do the work. 1496 */ 1497/* ARGSUSED */ 1498static int 1499nfs_mknod(struct vop_mknod_args *ap) 1500{ 1501 return (nfs_mknodrpc(ap->a_dvp, ap->a_vpp, ap->a_cnp, ap->a_vap)); 1502} 1503 1504static struct mtx nfs_cverf_mtx; 1505MTX_SYSINIT(nfs_cverf_mtx, &nfs_cverf_mtx, "NFS create verifier mutex", 1506 MTX_DEF); 1507 1508static nfsquad_t 1509nfs_get_cverf(void) 1510{ 1511 static nfsquad_t cverf; 1512 nfsquad_t ret; 1513 static int cverf_initialized = 0; 1514 1515 mtx_lock(&nfs_cverf_mtx); 1516 if (cverf_initialized == 0) { 1517 cverf.lval[0] = arc4random(); 1518 cverf.lval[1] = arc4random(); 1519 cverf_initialized = 1; 1520 } else 1521 cverf.qval++; 1522 ret = cverf; 1523 mtx_unlock(&nfs_cverf_mtx); 1524 1525 return (ret); 1526} 1527 1528/* 1529 * nfs file create call 1530 */ 1531static int 1532nfs_create(struct vop_create_args *ap) 1533{ 1534 struct vnode *dvp = ap->a_dvp; 1535 struct vattr *vap = ap->a_vap; 1536 struct componentname *cnp = ap->a_cnp; 1537 struct nfsnode *np = NULL, *dnp; 1538 struct vnode *newvp = NULL; 1539 struct nfsmount *nmp; 1540 struct nfsvattr dnfsva, nfsva; 1541 struct nfsfh *nfhp; 1542 nfsquad_t cverf; 1543 int error = 0, attrflag, dattrflag, fmode = 0; 1544 struct vattr vattr; 1545 1546 /* 1547 * Oops, not for me.. 1548 */ 1549 if (vap->va_type == VSOCK) 1550 return (nfs_mknodrpc(dvp, ap->a_vpp, cnp, vap)); 1551 1552 if ((error = VOP_GETATTR(dvp, &vattr, cnp->cn_cred))) 1553 return (error); 1554 if (vap->va_vaflags & VA_EXCLUSIVE) 1555 fmode |= O_EXCL; 1556 dnp = VTONFS(dvp); 1557 nmp = VFSTONFS(vnode_mount(dvp)); 1558again: 1559 /* For NFSv4, wait until any remove is done. */ 1560 mtx_lock(&dnp->n_mtx); 1561 while (NFSHASNFSV4(nmp) && (dnp->n_flag & NREMOVEINPROG)) { 1562 dnp->n_flag |= NREMOVEWANT; 1563 (void) msleep((caddr_t)dnp, &dnp->n_mtx, PZERO, "nfscrt", 0); 1564 } 1565 mtx_unlock(&dnp->n_mtx); 1566 1567 cverf = nfs_get_cverf(); 1568 error = nfsrpc_create(dvp, cnp->cn_nameptr, cnp->cn_namelen, 1569 vap, cverf, fmode, cnp->cn_cred, cnp->cn_thread, &dnfsva, &nfsva, 1570 &nfhp, &attrflag, &dattrflag, NULL); 1571 if (!error) { 1572 if (nfhp == NULL) 1573 (void) nfsrpc_lookup(dvp, cnp->cn_nameptr, 1574 cnp->cn_namelen, cnp->cn_cred, cnp->cn_thread, 1575 &dnfsva, &nfsva, &nfhp, &attrflag, &dattrflag, 1576 NULL); 1577 if (nfhp != NULL) 1578 error = nfscl_nget(dvp->v_mount, dvp, nfhp, cnp, 1579 cnp->cn_thread, &np, NULL, LK_EXCLUSIVE); 1580 } 1581 if (dattrflag) 1582 (void) nfscl_loadattrcache(&dvp, &dnfsva, NULL, NULL, 0, 1); 1583 if (!error) { 1584 newvp = NFSTOV(np); 1585 if (attrflag == 0) 1586 error = nfsrpc_getattr(newvp, cnp->cn_cred, 1587 cnp->cn_thread, &nfsva, NULL); 1588 if (error == 0) 1589 error = nfscl_loadattrcache(&newvp, &nfsva, NULL, NULL, 1590 0, 1); 1591 } 1592 if (error) { 1593 if (newvp != NULL) { 1594 vput(newvp); 1595 newvp = NULL; 1596 } 1597 if (NFS_ISV34(dvp) && (fmode & O_EXCL) && 1598 error == NFSERR_NOTSUPP) { 1599 fmode &= ~O_EXCL; 1600 goto again; 1601 } 1602 } else if (NFS_ISV34(dvp) && (fmode & O_EXCL)) { 1603 if (nfscl_checksattr(vap, &nfsva)) { 1604 error = nfsrpc_setattr(newvp, vap, NULL, cnp->cn_cred, 1605 cnp->cn_thread, &nfsva, &attrflag, NULL); 1606 if (error && (vap->va_uid != (uid_t)VNOVAL || 1607 vap->va_gid != (gid_t)VNOVAL)) { 1608 /* try again without setting uid/gid */ 1609 vap->va_uid = (uid_t)VNOVAL; 1610 vap->va_gid = (uid_t)VNOVAL; 1611 error = nfsrpc_setattr(newvp, vap, NULL, 1612 cnp->cn_cred, cnp->cn_thread, &nfsva, 1613 &attrflag, NULL); 1614 } 1615 if (attrflag) 1616 (void) nfscl_loadattrcache(&newvp, &nfsva, NULL, 1617 NULL, 0, 1); 1618 if (error != 0) 1619 vput(newvp); 1620 } 1621 } 1622 if (!error) { 1623 if ((cnp->cn_flags & MAKEENTRY) && attrflag) 1624 cache_enter_time(dvp, newvp, cnp, &nfsva.na_ctime, 1625 NULL); 1626 *ap->a_vpp = newvp; 1627 } else if (NFS_ISV4(dvp)) { 1628 error = nfscl_maperr(cnp->cn_thread, error, vap->va_uid, 1629 vap->va_gid); 1630 } 1631 mtx_lock(&dnp->n_mtx); 1632 dnp->n_flag |= NMODIFIED; 1633 if (!dattrflag) { 1634 dnp->n_attrstamp = 0; 1635 KDTRACE_NFS_ATTRCACHE_FLUSH_DONE(dvp); 1636 } 1637 mtx_unlock(&dnp->n_mtx); 1638 return (error); 1639} 1640 1641/* 1642 * nfs file remove call 1643 * To try and make nfs semantics closer to ufs semantics, a file that has 1644 * other processes using the vnode is renamed instead of removed and then 1645 * removed later on the last close. 1646 * - If v_usecount > 1 1647 * If a rename is not already in the works 1648 * call nfs_sillyrename() to set it up 1649 * else 1650 * do the remove rpc 1651 */ 1652static int 1653nfs_remove(struct vop_remove_args *ap) 1654{ 1655 struct vnode *vp = ap->a_vp; 1656 struct vnode *dvp = ap->a_dvp; 1657 struct componentname *cnp = ap->a_cnp; 1658 struct nfsnode *np = VTONFS(vp); 1659 int error = 0; 1660 struct vattr vattr; 1661 1662 KASSERT((cnp->cn_flags & HASBUF) != 0, ("nfs_remove: no name")); 1663 KASSERT(vrefcnt(vp) > 0, ("nfs_remove: bad v_usecount")); 1664 if (vp->v_type == VDIR) 1665 error = EPERM; 1666 else if (vrefcnt(vp) == 1 || (np->n_sillyrename && 1667 VOP_GETATTR(vp, &vattr, cnp->cn_cred) == 0 && 1668 vattr.va_nlink > 1)) { 1669 /* 1670 * Purge the name cache so that the chance of a lookup for 1671 * the name succeeding while the remove is in progress is 1672 * minimized. Without node locking it can still happen, such 1673 * that an I/O op returns ESTALE, but since you get this if 1674 * another host removes the file.. 1675 */ 1676 cache_purge(vp); 1677 /* 1678 * throw away biocache buffers, mainly to avoid 1679 * unnecessary delayed writes later. 1680 */ 1681 error = ncl_vinvalbuf(vp, 0, cnp->cn_thread, 1); 1682 /* Do the rpc */ 1683 if (error != EINTR && error != EIO) 1684 error = nfs_removerpc(dvp, vp, cnp->cn_nameptr, 1685 cnp->cn_namelen, cnp->cn_cred, cnp->cn_thread); 1686 /* 1687 * Kludge City: If the first reply to the remove rpc is lost.. 1688 * the reply to the retransmitted request will be ENOENT 1689 * since the file was in fact removed 1690 * Therefore, we cheat and return success. 1691 */ 1692 if (error == ENOENT) 1693 error = 0; 1694 } else if (!np->n_sillyrename) 1695 error = nfs_sillyrename(dvp, vp, cnp); 1696 mtx_lock(&np->n_mtx); 1697 np->n_attrstamp = 0; 1698 mtx_unlock(&np->n_mtx); 1699 KDTRACE_NFS_ATTRCACHE_FLUSH_DONE(vp); 1700 return (error); 1701} 1702 1703/* 1704 * nfs file remove rpc called from nfs_inactive 1705 */ 1706int 1707ncl_removeit(struct sillyrename *sp, struct vnode *vp) 1708{ 1709 /* 1710 * Make sure that the directory vnode is still valid. 1711 * XXX we should lock sp->s_dvp here. 1712 */ 1713 if (sp->s_dvp->v_type == VBAD) 1714 return (0); 1715 return (nfs_removerpc(sp->s_dvp, vp, sp->s_name, sp->s_namlen, 1716 sp->s_cred, NULL)); 1717} 1718 1719/* 1720 * Nfs remove rpc, called from nfs_remove() and ncl_removeit(). 1721 */ 1722static int 1723nfs_removerpc(struct vnode *dvp, struct vnode *vp, char *name, 1724 int namelen, struct ucred *cred, struct thread *td) 1725{ 1726 struct nfsvattr dnfsva; 1727 struct nfsnode *dnp = VTONFS(dvp); 1728 int error = 0, dattrflag; 1729 1730 mtx_lock(&dnp->n_mtx); 1731 dnp->n_flag |= NREMOVEINPROG; 1732 mtx_unlock(&dnp->n_mtx); 1733 error = nfsrpc_remove(dvp, name, namelen, vp, cred, td, &dnfsva, 1734 &dattrflag, NULL); 1735 mtx_lock(&dnp->n_mtx); 1736 if ((dnp->n_flag & NREMOVEWANT)) { 1737 dnp->n_flag &= ~(NREMOVEWANT | NREMOVEINPROG); 1738 mtx_unlock(&dnp->n_mtx); 1739 wakeup((caddr_t)dnp); 1740 } else { 1741 dnp->n_flag &= ~NREMOVEINPROG; 1742 mtx_unlock(&dnp->n_mtx); 1743 } 1744 if (dattrflag) 1745 (void) nfscl_loadattrcache(&dvp, &dnfsva, NULL, NULL, 0, 1); 1746 mtx_lock(&dnp->n_mtx); 1747 dnp->n_flag |= NMODIFIED; 1748 if (!dattrflag) { 1749 dnp->n_attrstamp = 0; 1750 KDTRACE_NFS_ATTRCACHE_FLUSH_DONE(dvp); 1751 } 1752 mtx_unlock(&dnp->n_mtx); 1753 if (error && NFS_ISV4(dvp)) 1754 error = nfscl_maperr(td, error, (uid_t)0, (gid_t)0); 1755 return (error); 1756} 1757 1758/* 1759 * nfs file rename call 1760 */ 1761static int 1762nfs_rename(struct vop_rename_args *ap) 1763{ 1764 struct vnode *fvp = ap->a_fvp; 1765 struct vnode *tvp = ap->a_tvp; 1766 struct vnode *fdvp = ap->a_fdvp; 1767 struct vnode *tdvp = ap->a_tdvp; 1768 struct componentname *tcnp = ap->a_tcnp; 1769 struct componentname *fcnp = ap->a_fcnp; 1770 struct nfsnode *fnp = VTONFS(ap->a_fvp); 1771 struct nfsnode *tdnp = VTONFS(ap->a_tdvp); 1772 struct nfsv4node *newv4 = NULL; 1773 int error; 1774 1775 KASSERT((tcnp->cn_flags & HASBUF) != 0 && 1776 (fcnp->cn_flags & HASBUF) != 0, ("nfs_rename: no name")); 1777 /* Check for cross-device rename */ 1778 if ((fvp->v_mount != tdvp->v_mount) || 1779 (tvp && (fvp->v_mount != tvp->v_mount))) { 1780 error = EXDEV; 1781 goto out; 1782 } 1783 1784 if (fvp == tvp) { 1785 printf("nfs_rename: fvp == tvp (can't happen)\n"); 1786 error = 0; 1787 goto out; 1788 } 1789 if ((error = NFSVOPLOCK(fvp, LK_EXCLUSIVE)) != 0) 1790 goto out; 1791 1792 /* 1793 * We have to flush B_DELWRI data prior to renaming 1794 * the file. If we don't, the delayed-write buffers 1795 * can be flushed out later after the file has gone stale 1796 * under NFSV3. NFSV2 does not have this problem because 1797 * ( as far as I can tell ) it flushes dirty buffers more 1798 * often. 1799 * 1800 * Skip the rename operation if the fsync fails, this can happen 1801 * due to the server's volume being full, when we pushed out data 1802 * that was written back to our cache earlier. Not checking for 1803 * this condition can result in potential (silent) data loss. 1804 */ 1805 error = VOP_FSYNC(fvp, MNT_WAIT, fcnp->cn_thread); 1806 NFSVOPUNLOCK(fvp, 0); 1807 if (!error && tvp) 1808 error = VOP_FSYNC(tvp, MNT_WAIT, tcnp->cn_thread); 1809 if (error) 1810 goto out; 1811 1812 /* 1813 * If the tvp exists and is in use, sillyrename it before doing the 1814 * rename of the new file over it. 1815 * XXX Can't sillyrename a directory. 1816 */ 1817 if (tvp && vrefcnt(tvp) > 1 && !VTONFS(tvp)->n_sillyrename && 1818 tvp->v_type != VDIR && !nfs_sillyrename(tdvp, tvp, tcnp)) { 1819 vput(tvp); 1820 tvp = NULL; 1821 } 1822 1823 error = nfs_renamerpc(fdvp, fvp, fcnp->cn_nameptr, fcnp->cn_namelen, 1824 tdvp, tvp, tcnp->cn_nameptr, tcnp->cn_namelen, tcnp->cn_cred, 1825 tcnp->cn_thread); 1826 1827 if (error == 0 && NFS_ISV4(tdvp)) { 1828 /* 1829 * For NFSv4, check to see if it is the same name and 1830 * replace the name, if it is different. 1831 */ 1832 MALLOC(newv4, struct nfsv4node *, 1833 sizeof (struct nfsv4node) + 1834 tdnp->n_fhp->nfh_len + tcnp->cn_namelen - 1, 1835 M_NFSV4NODE, M_WAITOK); 1836 mtx_lock(&tdnp->n_mtx); 1837 mtx_lock(&fnp->n_mtx); 1838 if (fnp->n_v4 != NULL && fvp->v_type == VREG && 1839 (fnp->n_v4->n4_namelen != tcnp->cn_namelen || 1840 NFSBCMP(tcnp->cn_nameptr, NFS4NODENAME(fnp->n_v4), 1841 tcnp->cn_namelen) || 1842 tdnp->n_fhp->nfh_len != fnp->n_v4->n4_fhlen || 1843 NFSBCMP(tdnp->n_fhp->nfh_fh, fnp->n_v4->n4_data, 1844 tdnp->n_fhp->nfh_len))) { 1845#ifdef notdef 1846{ char nnn[100]; int nnnl; 1847nnnl = (tcnp->cn_namelen < 100) ? tcnp->cn_namelen : 99; 1848bcopy(tcnp->cn_nameptr, nnn, nnnl); 1849nnn[nnnl] = '\0'; 1850printf("ren replace=%s\n",nnn); 1851} 1852#endif 1853 FREE((caddr_t)fnp->n_v4, M_NFSV4NODE); 1854 fnp->n_v4 = newv4; 1855 newv4 = NULL; 1856 fnp->n_v4->n4_fhlen = tdnp->n_fhp->nfh_len; 1857 fnp->n_v4->n4_namelen = tcnp->cn_namelen; 1858 NFSBCOPY(tdnp->n_fhp->nfh_fh, fnp->n_v4->n4_data, 1859 tdnp->n_fhp->nfh_len); 1860 NFSBCOPY(tcnp->cn_nameptr, 1861 NFS4NODENAME(fnp->n_v4), tcnp->cn_namelen); 1862 } 1863 mtx_unlock(&tdnp->n_mtx); 1864 mtx_unlock(&fnp->n_mtx); 1865 if (newv4 != NULL) 1866 FREE((caddr_t)newv4, M_NFSV4NODE); 1867 } 1868 1869 if (fvp->v_type == VDIR) { 1870 if (tvp != NULL && tvp->v_type == VDIR) 1871 cache_purge(tdvp); 1872 cache_purge(fdvp); 1873 } 1874 1875out: 1876 if (tdvp == tvp) 1877 vrele(tdvp); 1878 else 1879 vput(tdvp); 1880 if (tvp) 1881 vput(tvp); 1882 vrele(fdvp); 1883 vrele(fvp); 1884 /* 1885 * Kludge: Map ENOENT => 0 assuming that it is a reply to a retry. 1886 */ 1887 if (error == ENOENT) 1888 error = 0; 1889 return (error); 1890} 1891 1892/* 1893 * nfs file rename rpc called from nfs_remove() above 1894 */ 1895static int 1896nfs_renameit(struct vnode *sdvp, struct vnode *svp, struct componentname *scnp, 1897 struct sillyrename *sp) 1898{ 1899 1900 return (nfs_renamerpc(sdvp, svp, scnp->cn_nameptr, scnp->cn_namelen, 1901 sdvp, NULL, sp->s_name, sp->s_namlen, scnp->cn_cred, 1902 scnp->cn_thread)); 1903} 1904 1905/* 1906 * Do an nfs rename rpc. Called from nfs_rename() and nfs_renameit(). 1907 */ 1908static int 1909nfs_renamerpc(struct vnode *fdvp, struct vnode *fvp, char *fnameptr, 1910 int fnamelen, struct vnode *tdvp, struct vnode *tvp, char *tnameptr, 1911 int tnamelen, struct ucred *cred, struct thread *td) 1912{ 1913 struct nfsvattr fnfsva, tnfsva; 1914 struct nfsnode *fdnp = VTONFS(fdvp); 1915 struct nfsnode *tdnp = VTONFS(tdvp); 1916 int error = 0, fattrflag, tattrflag; 1917 1918 error = nfsrpc_rename(fdvp, fvp, fnameptr, fnamelen, tdvp, tvp, 1919 tnameptr, tnamelen, cred, td, &fnfsva, &tnfsva, &fattrflag, 1920 &tattrflag, NULL, NULL); 1921 mtx_lock(&fdnp->n_mtx); 1922 fdnp->n_flag |= NMODIFIED; 1923 if (fattrflag != 0) { 1924 mtx_unlock(&fdnp->n_mtx); 1925 (void) nfscl_loadattrcache(&fdvp, &fnfsva, NULL, NULL, 0, 1); 1926 } else { 1927 fdnp->n_attrstamp = 0; 1928 mtx_unlock(&fdnp->n_mtx); 1929 KDTRACE_NFS_ATTRCACHE_FLUSH_DONE(fdvp); 1930 } 1931 mtx_lock(&tdnp->n_mtx); 1932 tdnp->n_flag |= NMODIFIED; 1933 if (tattrflag != 0) { 1934 mtx_unlock(&tdnp->n_mtx); 1935 (void) nfscl_loadattrcache(&tdvp, &tnfsva, NULL, NULL, 0, 1); 1936 } else { 1937 tdnp->n_attrstamp = 0; 1938 mtx_unlock(&tdnp->n_mtx); 1939 KDTRACE_NFS_ATTRCACHE_FLUSH_DONE(tdvp); 1940 } 1941 if (error && NFS_ISV4(fdvp)) 1942 error = nfscl_maperr(td, error, (uid_t)0, (gid_t)0); 1943 return (error); 1944} 1945 1946/* 1947 * nfs hard link create call 1948 */ 1949static int 1950nfs_link(struct vop_link_args *ap) 1951{ 1952 struct vnode *vp = ap->a_vp; 1953 struct vnode *tdvp = ap->a_tdvp; 1954 struct componentname *cnp = ap->a_cnp; 1955 struct nfsnode *np, *tdnp; 1956 struct nfsvattr nfsva, dnfsva; 1957 int error = 0, attrflag, dattrflag; 1958 1959 /* 1960 * Push all writes to the server, so that the attribute cache 1961 * doesn't get "out of sync" with the server. 1962 * XXX There should be a better way! 1963 */ 1964 VOP_FSYNC(vp, MNT_WAIT, cnp->cn_thread); 1965 1966 error = nfsrpc_link(tdvp, vp, cnp->cn_nameptr, cnp->cn_namelen, 1967 cnp->cn_cred, cnp->cn_thread, &dnfsva, &nfsva, &attrflag, 1968 &dattrflag, NULL); 1969 tdnp = VTONFS(tdvp); 1970 mtx_lock(&tdnp->n_mtx); 1971 tdnp->n_flag |= NMODIFIED; 1972 if (dattrflag != 0) { 1973 mtx_unlock(&tdnp->n_mtx); 1974 (void) nfscl_loadattrcache(&tdvp, &dnfsva, NULL, NULL, 0, 1); 1975 } else { 1976 tdnp->n_attrstamp = 0; 1977 mtx_unlock(&tdnp->n_mtx); 1978 KDTRACE_NFS_ATTRCACHE_FLUSH_DONE(tdvp); 1979 } 1980 if (attrflag) 1981 (void) nfscl_loadattrcache(&vp, &nfsva, NULL, NULL, 0, 1); 1982 else { 1983 np = VTONFS(vp); 1984 mtx_lock(&np->n_mtx); 1985 np->n_attrstamp = 0; 1986 mtx_unlock(&np->n_mtx); 1987 KDTRACE_NFS_ATTRCACHE_FLUSH_DONE(vp); 1988 } 1989 /* 1990 * If negative lookup caching is enabled, I might as well 1991 * add an entry for this node. Not necessary for correctness, 1992 * but if negative caching is enabled, then the system 1993 * must care about lookup caching hit rate, so... 1994 */ 1995 if (VFSTONFS(vp->v_mount)->nm_negnametimeo != 0 && 1996 (cnp->cn_flags & MAKEENTRY) && attrflag != 0 && error == 0) { 1997 cache_enter_time(tdvp, vp, cnp, &nfsva.na_ctime, NULL); 1998 } 1999 if (error && NFS_ISV4(vp)) 2000 error = nfscl_maperr(cnp->cn_thread, error, (uid_t)0, 2001 (gid_t)0); 2002 return (error); 2003} 2004 2005/* 2006 * nfs symbolic link create call 2007 */ 2008static int 2009nfs_symlink(struct vop_symlink_args *ap) 2010{ 2011 struct vnode *dvp = ap->a_dvp; 2012 struct vattr *vap = ap->a_vap; 2013 struct componentname *cnp = ap->a_cnp; 2014 struct nfsvattr nfsva, dnfsva; 2015 struct nfsfh *nfhp; 2016 struct nfsnode *np = NULL, *dnp; 2017 struct vnode *newvp = NULL; 2018 int error = 0, attrflag, dattrflag, ret; 2019 2020 vap->va_type = VLNK; 2021 error = nfsrpc_symlink(dvp, cnp->cn_nameptr, cnp->cn_namelen, 2022 ap->a_target, vap, cnp->cn_cred, cnp->cn_thread, &dnfsva, 2023 &nfsva, &nfhp, &attrflag, &dattrflag, NULL); 2024 if (nfhp) { 2025 ret = nfscl_nget(dvp->v_mount, dvp, nfhp, cnp, cnp->cn_thread, 2026 &np, NULL, LK_EXCLUSIVE); 2027 if (!ret) 2028 newvp = NFSTOV(np); 2029 else if (!error) 2030 error = ret; 2031 } 2032 if (newvp != NULL) { 2033 if (attrflag) 2034 (void) nfscl_loadattrcache(&newvp, &nfsva, NULL, NULL, 2035 0, 1); 2036 } else if (!error) { 2037 /* 2038 * If we do not have an error and we could not extract the 2039 * newvp from the response due to the request being NFSv2, we 2040 * have to do a lookup in order to obtain a newvp to return. 2041 */ 2042 error = nfs_lookitup(dvp, cnp->cn_nameptr, cnp->cn_namelen, 2043 cnp->cn_cred, cnp->cn_thread, &np); 2044 if (!error) 2045 newvp = NFSTOV(np); 2046 } 2047 if (error) { 2048 if (newvp) 2049 vput(newvp); 2050 if (NFS_ISV4(dvp)) 2051 error = nfscl_maperr(cnp->cn_thread, error, 2052 vap->va_uid, vap->va_gid); 2053 } else { 2054 *ap->a_vpp = newvp; 2055 } 2056 2057 dnp = VTONFS(dvp); 2058 mtx_lock(&dnp->n_mtx); 2059 dnp->n_flag |= NMODIFIED; 2060 if (dattrflag != 0) { 2061 mtx_unlock(&dnp->n_mtx); 2062 (void) nfscl_loadattrcache(&dvp, &dnfsva, NULL, NULL, 0, 1); 2063 } else { 2064 dnp->n_attrstamp = 0; 2065 mtx_unlock(&dnp->n_mtx); 2066 KDTRACE_NFS_ATTRCACHE_FLUSH_DONE(dvp); 2067 } 2068 /* 2069 * If negative lookup caching is enabled, I might as well 2070 * add an entry for this node. Not necessary for correctness, 2071 * but if negative caching is enabled, then the system 2072 * must care about lookup caching hit rate, so... 2073 */ 2074 if (VFSTONFS(dvp->v_mount)->nm_negnametimeo != 0 && 2075 (cnp->cn_flags & MAKEENTRY) && attrflag != 0 && error == 0) { 2076 cache_enter_time(dvp, newvp, cnp, &nfsva.na_ctime, NULL); 2077 } 2078 return (error); 2079} 2080 2081/* 2082 * nfs make dir call 2083 */ 2084static int 2085nfs_mkdir(struct vop_mkdir_args *ap) 2086{ 2087 struct vnode *dvp = ap->a_dvp; 2088 struct vattr *vap = ap->a_vap; 2089 struct componentname *cnp = ap->a_cnp; 2090 struct nfsnode *np = NULL, *dnp; 2091 struct vnode *newvp = NULL; 2092 struct vattr vattr; 2093 struct nfsfh *nfhp; 2094 struct nfsvattr nfsva, dnfsva; 2095 int error = 0, attrflag, dattrflag, ret; 2096 2097 if ((error = VOP_GETATTR(dvp, &vattr, cnp->cn_cred)) != 0) 2098 return (error); 2099 vap->va_type = VDIR; 2100 error = nfsrpc_mkdir(dvp, cnp->cn_nameptr, cnp->cn_namelen, 2101 vap, cnp->cn_cred, cnp->cn_thread, &dnfsva, &nfsva, &nfhp, 2102 &attrflag, &dattrflag, NULL); 2103 dnp = VTONFS(dvp); 2104 mtx_lock(&dnp->n_mtx); 2105 dnp->n_flag |= NMODIFIED; 2106 if (dattrflag != 0) { 2107 mtx_unlock(&dnp->n_mtx); 2108 (void) nfscl_loadattrcache(&dvp, &dnfsva, NULL, NULL, 0, 1); 2109 } else { 2110 dnp->n_attrstamp = 0; 2111 mtx_unlock(&dnp->n_mtx); 2112 KDTRACE_NFS_ATTRCACHE_FLUSH_DONE(dvp); 2113 } 2114 if (nfhp) { 2115 ret = nfscl_nget(dvp->v_mount, dvp, nfhp, cnp, cnp->cn_thread, 2116 &np, NULL, LK_EXCLUSIVE); 2117 if (!ret) { 2118 newvp = NFSTOV(np); 2119 if (attrflag) 2120 (void) nfscl_loadattrcache(&newvp, &nfsva, NULL, 2121 NULL, 0, 1); 2122 } else if (!error) 2123 error = ret; 2124 } 2125 if (!error && newvp == NULL) { 2126 error = nfs_lookitup(dvp, cnp->cn_nameptr, cnp->cn_namelen, 2127 cnp->cn_cred, cnp->cn_thread, &np); 2128 if (!error) { 2129 newvp = NFSTOV(np); 2130 if (newvp->v_type != VDIR) 2131 error = EEXIST; 2132 } 2133 } 2134 if (error) { 2135 if (newvp) 2136 vput(newvp); 2137 if (NFS_ISV4(dvp)) 2138 error = nfscl_maperr(cnp->cn_thread, error, 2139 vap->va_uid, vap->va_gid); 2140 } else { 2141 /* 2142 * If negative lookup caching is enabled, I might as well 2143 * add an entry for this node. Not necessary for correctness, 2144 * but if negative caching is enabled, then the system 2145 * must care about lookup caching hit rate, so... 2146 */ 2147 if (VFSTONFS(dvp->v_mount)->nm_negnametimeo != 0 && 2148 (cnp->cn_flags & MAKEENTRY) && 2149 attrflag != 0 && dattrflag != 0) 2150 cache_enter_time(dvp, newvp, cnp, &nfsva.na_ctime, 2151 &dnfsva.na_ctime); 2152 *ap->a_vpp = newvp; 2153 } 2154 return (error); 2155} 2156 2157/* 2158 * nfs remove directory call 2159 */ 2160static int 2161nfs_rmdir(struct vop_rmdir_args *ap) 2162{ 2163 struct vnode *vp = ap->a_vp; 2164 struct vnode *dvp = ap->a_dvp; 2165 struct componentname *cnp = ap->a_cnp; 2166 struct nfsnode *dnp; 2167 struct nfsvattr dnfsva; 2168 int error, dattrflag; 2169 2170 if (dvp == vp) 2171 return (EINVAL); 2172 error = nfsrpc_rmdir(dvp, cnp->cn_nameptr, cnp->cn_namelen, 2173 cnp->cn_cred, cnp->cn_thread, &dnfsva, &dattrflag, NULL); 2174 dnp = VTONFS(dvp); 2175 mtx_lock(&dnp->n_mtx); 2176 dnp->n_flag |= NMODIFIED; 2177 if (dattrflag != 0) { 2178 mtx_unlock(&dnp->n_mtx); 2179 (void) nfscl_loadattrcache(&dvp, &dnfsva, NULL, NULL, 0, 1); 2180 } else { 2181 dnp->n_attrstamp = 0; 2182 mtx_unlock(&dnp->n_mtx); 2183 KDTRACE_NFS_ATTRCACHE_FLUSH_DONE(dvp); 2184 } 2185 2186 cache_purge(dvp); 2187 cache_purge(vp); 2188 if (error && NFS_ISV4(dvp)) 2189 error = nfscl_maperr(cnp->cn_thread, error, (uid_t)0, 2190 (gid_t)0); 2191 /* 2192 * Kludge: Map ENOENT => 0 assuming that you have a reply to a retry. 2193 */ 2194 if (error == ENOENT) 2195 error = 0; 2196 return (error); 2197} 2198 2199/* 2200 * nfs readdir call 2201 */ 2202static int 2203nfs_readdir(struct vop_readdir_args *ap) 2204{ 2205 struct vnode *vp = ap->a_vp; 2206 struct nfsnode *np = VTONFS(vp); 2207 struct uio *uio = ap->a_uio; 2208 ssize_t tresid, left; 2209 int error = 0; 2210 struct vattr vattr; 2211 2212 if (ap->a_eofflag != NULL) 2213 *ap->a_eofflag = 0; 2214 if (vp->v_type != VDIR) 2215 return(EPERM); 2216 2217 /* 2218 * First, check for hit on the EOF offset cache 2219 */ 2220 if (np->n_direofoffset > 0 && uio->uio_offset >= np->n_direofoffset && 2221 (np->n_flag & NMODIFIED) == 0) { 2222 if (VOP_GETATTR(vp, &vattr, ap->a_cred) == 0) { 2223 mtx_lock(&np->n_mtx); 2224 if ((NFS_ISV4(vp) && np->n_change == vattr.va_filerev) || 2225 !NFS_TIMESPEC_COMPARE(&np->n_mtime, &vattr.va_mtime)) { 2226 mtx_unlock(&np->n_mtx); 2227 NFSINCRGLOBAL(nfsstatsv1.direofcache_hits); 2228 if (ap->a_eofflag != NULL) 2229 *ap->a_eofflag = 1; 2230 return (0); 2231 } else 2232 mtx_unlock(&np->n_mtx); 2233 } 2234 } 2235 2236 /* 2237 * NFS always guarantees that directory entries don't straddle 2238 * DIRBLKSIZ boundaries. As such, we need to limit the size 2239 * to an exact multiple of DIRBLKSIZ, to avoid copying a partial 2240 * directory entry. 2241 */ 2242 left = uio->uio_resid % DIRBLKSIZ; 2243 if (left == uio->uio_resid) 2244 return (EINVAL); 2245 uio->uio_resid -= left; 2246 2247 /* 2248 * Call ncl_bioread() to do the real work. 2249 */ 2250 tresid = uio->uio_resid; 2251 error = ncl_bioread(vp, uio, 0, ap->a_cred); 2252 2253 if (!error && uio->uio_resid == tresid) { 2254 NFSINCRGLOBAL(nfsstatsv1.direofcache_misses); 2255 if (ap->a_eofflag != NULL) 2256 *ap->a_eofflag = 1; 2257 } 2258 2259 /* Add the partial DIRBLKSIZ (left) back in. */ 2260 uio->uio_resid += left; 2261 return (error); 2262} 2263 2264/* 2265 * Readdir rpc call. 2266 * Called from below the buffer cache by ncl_doio(). 2267 */ 2268int 2269ncl_readdirrpc(struct vnode *vp, struct uio *uiop, struct ucred *cred, 2270 struct thread *td) 2271{ 2272 struct nfsvattr nfsva; 2273 nfsuint64 *cookiep, cookie; 2274 struct nfsnode *dnp = VTONFS(vp); 2275 struct nfsmount *nmp = VFSTONFS(vp->v_mount); 2276 int error = 0, eof, attrflag; 2277 2278 KASSERT(uiop->uio_iovcnt == 1 && 2279 (uiop->uio_offset & (DIRBLKSIZ - 1)) == 0 && 2280 (uiop->uio_resid & (DIRBLKSIZ - 1)) == 0, 2281 ("nfs readdirrpc bad uio")); 2282 2283 /* 2284 * If there is no cookie, assume directory was stale. 2285 */ 2286 ncl_dircookie_lock(dnp); 2287 cookiep = ncl_getcookie(dnp, uiop->uio_offset, 0); 2288 if (cookiep) { 2289 cookie = *cookiep; 2290 ncl_dircookie_unlock(dnp); 2291 } else { 2292 ncl_dircookie_unlock(dnp); 2293 return (NFSERR_BAD_COOKIE); 2294 } 2295 2296 if (NFSHASNFSV3(nmp) && !NFSHASGOTFSINFO(nmp)) 2297 (void)ncl_fsinfo(nmp, vp, cred, td); 2298 2299 error = nfsrpc_readdir(vp, uiop, &cookie, cred, td, &nfsva, 2300 &attrflag, &eof, NULL); 2301 if (attrflag) 2302 (void) nfscl_loadattrcache(&vp, &nfsva, NULL, NULL, 0, 1); 2303 2304 if (!error) { 2305 /* 2306 * We are now either at the end of the directory or have filled 2307 * the block. 2308 */ 2309 if (eof) 2310 dnp->n_direofoffset = uiop->uio_offset; 2311 else { 2312 if (uiop->uio_resid > 0) 2313 printf("EEK! readdirrpc resid > 0\n"); 2314 ncl_dircookie_lock(dnp); 2315 cookiep = ncl_getcookie(dnp, uiop->uio_offset, 1); 2316 *cookiep = cookie; 2317 ncl_dircookie_unlock(dnp); 2318 } 2319 } else if (NFS_ISV4(vp)) { 2320 error = nfscl_maperr(td, error, (uid_t)0, (gid_t)0); 2321 } 2322 return (error); 2323} 2324 2325/* 2326 * NFS V3 readdir plus RPC. Used in place of ncl_readdirrpc(). 2327 */ 2328int 2329ncl_readdirplusrpc(struct vnode *vp, struct uio *uiop, struct ucred *cred, 2330 struct thread *td) 2331{ 2332 struct nfsvattr nfsva; 2333 nfsuint64 *cookiep, cookie; 2334 struct nfsnode *dnp = VTONFS(vp); 2335 struct nfsmount *nmp = VFSTONFS(vp->v_mount); 2336 int error = 0, attrflag, eof; 2337 2338 KASSERT(uiop->uio_iovcnt == 1 && 2339 (uiop->uio_offset & (DIRBLKSIZ - 1)) == 0 && 2340 (uiop->uio_resid & (DIRBLKSIZ - 1)) == 0, 2341 ("nfs readdirplusrpc bad uio")); 2342 2343 /* 2344 * If there is no cookie, assume directory was stale. 2345 */ 2346 ncl_dircookie_lock(dnp); 2347 cookiep = ncl_getcookie(dnp, uiop->uio_offset, 0); 2348 if (cookiep) { 2349 cookie = *cookiep; 2350 ncl_dircookie_unlock(dnp); 2351 } else { 2352 ncl_dircookie_unlock(dnp); 2353 return (NFSERR_BAD_COOKIE); 2354 } 2355 2356 if (NFSHASNFSV3(nmp) && !NFSHASGOTFSINFO(nmp)) 2357 (void)ncl_fsinfo(nmp, vp, cred, td); 2358 error = nfsrpc_readdirplus(vp, uiop, &cookie, cred, td, &nfsva, 2359 &attrflag, &eof, NULL); 2360 if (attrflag) 2361 (void) nfscl_loadattrcache(&vp, &nfsva, NULL, NULL, 0, 1); 2362 2363 if (!error) { 2364 /* 2365 * We are now either at end of the directory or have filled the 2366 * the block. 2367 */ 2368 if (eof) 2369 dnp->n_direofoffset = uiop->uio_offset; 2370 else { 2371 if (uiop->uio_resid > 0) 2372 printf("EEK! readdirplusrpc resid > 0\n"); 2373 ncl_dircookie_lock(dnp); 2374 cookiep = ncl_getcookie(dnp, uiop->uio_offset, 1); 2375 *cookiep = cookie; 2376 ncl_dircookie_unlock(dnp); 2377 } 2378 } else if (NFS_ISV4(vp)) { 2379 error = nfscl_maperr(td, error, (uid_t)0, (gid_t)0); 2380 } 2381 return (error); 2382} 2383 2384/* 2385 * Silly rename. To make the NFS filesystem that is stateless look a little 2386 * more like the "ufs" a remove of an active vnode is translated to a rename 2387 * to a funny looking filename that is removed by nfs_inactive on the 2388 * nfsnode. There is the potential for another process on a different client 2389 * to create the same funny name between the nfs_lookitup() fails and the 2390 * nfs_rename() completes, but... 2391 */ 2392static int 2393nfs_sillyrename(struct vnode *dvp, struct vnode *vp, struct componentname *cnp) 2394{ 2395 struct sillyrename *sp; 2396 struct nfsnode *np; 2397 int error; 2398 short pid; 2399 unsigned int lticks; 2400 2401 cache_purge(dvp); 2402 np = VTONFS(vp); 2403 KASSERT(vp->v_type != VDIR, ("nfs: sillyrename dir")); 2404 MALLOC(sp, struct sillyrename *, sizeof (struct sillyrename), 2405 M_NEWNFSREQ, M_WAITOK); 2406 sp->s_cred = crhold(cnp->cn_cred); 2407 sp->s_dvp = dvp; 2408 VREF(dvp); 2409 2410 /* 2411 * Fudge together a funny name. 2412 * Changing the format of the funny name to accommodate more 2413 * sillynames per directory. 2414 * The name is now changed to .nfs.<ticks>.<pid>.4, where ticks is 2415 * CPU ticks since boot. 2416 */ 2417 pid = cnp->cn_thread->td_proc->p_pid; 2418 lticks = (unsigned int)ticks; 2419 for ( ; ; ) { 2420 sp->s_namlen = snprintf(sp->s_name, sizeof(sp->s_name), 2421 ".nfs.%08x.%04x4.4", lticks, 2422 pid); 2423 if (nfs_lookitup(dvp, sp->s_name, sp->s_namlen, sp->s_cred, 2424 cnp->cn_thread, NULL)) 2425 break; 2426 lticks++; 2427 } 2428 error = nfs_renameit(dvp, vp, cnp, sp); 2429 if (error) 2430 goto bad; 2431 error = nfs_lookitup(dvp, sp->s_name, sp->s_namlen, sp->s_cred, 2432 cnp->cn_thread, &np); 2433 np->n_sillyrename = sp; 2434 return (0); 2435bad: 2436 vrele(sp->s_dvp); 2437 crfree(sp->s_cred); 2438 free((caddr_t)sp, M_NEWNFSREQ); 2439 return (error); 2440} 2441 2442/* 2443 * Look up a file name and optionally either update the file handle or 2444 * allocate an nfsnode, depending on the value of npp. 2445 * npp == NULL --> just do the lookup 2446 * *npp == NULL --> allocate a new nfsnode and make sure attributes are 2447 * handled too 2448 * *npp != NULL --> update the file handle in the vnode 2449 */ 2450static int 2451nfs_lookitup(struct vnode *dvp, char *name, int len, struct ucred *cred, 2452 struct thread *td, struct nfsnode **npp) 2453{ 2454 struct vnode *newvp = NULL, *vp; 2455 struct nfsnode *np, *dnp = VTONFS(dvp); 2456 struct nfsfh *nfhp, *onfhp; 2457 struct nfsvattr nfsva, dnfsva; 2458 struct componentname cn; 2459 int error = 0, attrflag, dattrflag; 2460 u_int hash; 2461 2462 error = nfsrpc_lookup(dvp, name, len, cred, td, &dnfsva, &nfsva, 2463 &nfhp, &attrflag, &dattrflag, NULL); 2464 if (dattrflag) 2465 (void) nfscl_loadattrcache(&dvp, &dnfsva, NULL, NULL, 0, 1); 2466 if (npp && !error) { 2467 if (*npp != NULL) { 2468 np = *npp; 2469 vp = NFSTOV(np); 2470 /* 2471 * For NFSv4, check to see if it is the same name and 2472 * replace the name, if it is different. 2473 */ 2474 if (np->n_v4 != NULL && nfsva.na_type == VREG && 2475 (np->n_v4->n4_namelen != len || 2476 NFSBCMP(name, NFS4NODENAME(np->n_v4), len) || 2477 dnp->n_fhp->nfh_len != np->n_v4->n4_fhlen || 2478 NFSBCMP(dnp->n_fhp->nfh_fh, np->n_v4->n4_data, 2479 dnp->n_fhp->nfh_len))) { 2480#ifdef notdef 2481{ char nnn[100]; int nnnl; 2482nnnl = (len < 100) ? len : 99; 2483bcopy(name, nnn, nnnl); 2484nnn[nnnl] = '\0'; 2485printf("replace=%s\n",nnn); 2486} 2487#endif 2488 FREE((caddr_t)np->n_v4, M_NFSV4NODE); 2489 MALLOC(np->n_v4, struct nfsv4node *, 2490 sizeof (struct nfsv4node) + 2491 dnp->n_fhp->nfh_len + len - 1, 2492 M_NFSV4NODE, M_WAITOK); 2493 np->n_v4->n4_fhlen = dnp->n_fhp->nfh_len; 2494 np->n_v4->n4_namelen = len; 2495 NFSBCOPY(dnp->n_fhp->nfh_fh, np->n_v4->n4_data, 2496 dnp->n_fhp->nfh_len); 2497 NFSBCOPY(name, NFS4NODENAME(np->n_v4), len); 2498 } 2499 hash = fnv_32_buf(nfhp->nfh_fh, nfhp->nfh_len, 2500 FNV1_32_INIT); 2501 onfhp = np->n_fhp; 2502 /* 2503 * Rehash node for new file handle. 2504 */ 2505 vfs_hash_rehash(vp, hash); 2506 np->n_fhp = nfhp; 2507 if (onfhp != NULL) 2508 FREE((caddr_t)onfhp, M_NFSFH); 2509 newvp = NFSTOV(np); 2510 } else if (NFS_CMPFH(dnp, nfhp->nfh_fh, nfhp->nfh_len)) { 2511 FREE((caddr_t)nfhp, M_NFSFH); 2512 VREF(dvp); 2513 newvp = dvp; 2514 } else { 2515 cn.cn_nameptr = name; 2516 cn.cn_namelen = len; 2517 error = nfscl_nget(dvp->v_mount, dvp, nfhp, &cn, td, 2518 &np, NULL, LK_EXCLUSIVE); 2519 if (error) 2520 return (error); 2521 newvp = NFSTOV(np); 2522 } 2523 if (!attrflag && *npp == NULL) { 2524 if (newvp == dvp) 2525 vrele(newvp); 2526 else 2527 vput(newvp); 2528 return (ENOENT); 2529 } 2530 if (attrflag) 2531 (void) nfscl_loadattrcache(&newvp, &nfsva, NULL, NULL, 2532 0, 1); 2533 } 2534 if (npp && *npp == NULL) { 2535 if (error) { 2536 if (newvp) { 2537 if (newvp == dvp) 2538 vrele(newvp); 2539 else 2540 vput(newvp); 2541 } 2542 } else 2543 *npp = np; 2544 } 2545 if (error && NFS_ISV4(dvp)) 2546 error = nfscl_maperr(td, error, (uid_t)0, (gid_t)0); 2547 return (error); 2548} 2549 2550/* 2551 * Nfs Version 3 and 4 commit rpc 2552 */ 2553int 2554ncl_commit(struct vnode *vp, u_quad_t offset, int cnt, struct ucred *cred, 2555 struct thread *td) 2556{ 2557 struct nfsvattr nfsva; 2558 struct nfsmount *nmp = VFSTONFS(vp->v_mount); 2559 int error, attrflag; 2560 2561 mtx_lock(&nmp->nm_mtx); 2562 if ((nmp->nm_state & NFSSTA_HASWRITEVERF) == 0) { 2563 mtx_unlock(&nmp->nm_mtx); 2564 return (0); 2565 } 2566 mtx_unlock(&nmp->nm_mtx); 2567 error = nfsrpc_commit(vp, offset, cnt, cred, td, &nfsva, 2568 &attrflag, NULL); 2569 if (attrflag != 0) 2570 (void) nfscl_loadattrcache(&vp, &nfsva, NULL, NULL, 2571 0, 1); 2572 if (error != 0 && NFS_ISV4(vp)) 2573 error = nfscl_maperr(td, error, (uid_t)0, (gid_t)0); 2574 return (error); 2575} 2576 2577/* 2578 * Strategy routine. 2579 * For async requests when nfsiod(s) are running, queue the request by 2580 * calling ncl_asyncio(), otherwise just all ncl_doio() to do the 2581 * request. 2582 */ 2583static int 2584nfs_strategy(struct vop_strategy_args *ap) 2585{ 2586 struct buf *bp = ap->a_bp; 2587 struct ucred *cr; 2588 2589 KASSERT(!(bp->b_flags & B_DONE), 2590 ("nfs_strategy: buffer %p unexpectedly marked B_DONE", bp)); 2591 BUF_ASSERT_HELD(bp); 2592 2593 if (bp->b_iocmd == BIO_READ) 2594 cr = bp->b_rcred; 2595 else 2596 cr = bp->b_wcred; 2597 2598 /* 2599 * If the op is asynchronous and an i/o daemon is waiting 2600 * queue the request, wake it up and wait for completion 2601 * otherwise just do it ourselves. 2602 */ 2603 if ((bp->b_flags & B_ASYNC) == 0 || 2604 ncl_asyncio(VFSTONFS(ap->a_vp->v_mount), bp, NOCRED, curthread)) 2605 (void) ncl_doio(ap->a_vp, bp, cr, curthread, 1); 2606 return (0); 2607} 2608 2609/* 2610 * fsync vnode op. Just call ncl_flush() with commit == 1. 2611 */ 2612/* ARGSUSED */ 2613static int 2614nfs_fsync(struct vop_fsync_args *ap) 2615{ 2616 2617 if (ap->a_vp->v_type != VREG) { 2618 /* 2619 * For NFS, metadata is changed synchronously on the server, 2620 * so there is nothing to flush. Also, ncl_flush() clears 2621 * the NMODIFIED flag and that shouldn't be done here for 2622 * directories. 2623 */ 2624 return (0); 2625 } 2626 return (ncl_flush(ap->a_vp, ap->a_waitfor, NULL, ap->a_td, 1, 0)); 2627} 2628 2629/* 2630 * Flush all the blocks associated with a vnode. 2631 * Walk through the buffer pool and push any dirty pages 2632 * associated with the vnode. 2633 * If the called_from_renewthread argument is TRUE, it has been called 2634 * from the NFSv4 renew thread and, as such, cannot block indefinitely 2635 * waiting for a buffer write to complete. 2636 */ 2637int 2638ncl_flush(struct vnode *vp, int waitfor, struct ucred *cred, struct thread *td, 2639 int commit, int called_from_renewthread) 2640{ 2641 struct nfsnode *np = VTONFS(vp); 2642 struct buf *bp; 2643 int i; 2644 struct buf *nbp; 2645 struct nfsmount *nmp = VFSTONFS(vp->v_mount); 2646 int error = 0, slptimeo = 0, slpflag = 0, retv, bvecpos; 2647 int passone = 1, trycnt = 0; 2648 u_quad_t off, endoff, toff; 2649 struct ucred* wcred = NULL; 2650 struct buf **bvec = NULL; 2651 struct bufobj *bo; 2652#ifndef NFS_COMMITBVECSIZ 2653#define NFS_COMMITBVECSIZ 20 2654#endif 2655 struct buf *bvec_on_stack[NFS_COMMITBVECSIZ]; 2656 int bvecsize = 0, bveccount; 2657 2658 if (called_from_renewthread != 0) 2659 slptimeo = hz; 2660 if (nmp->nm_flag & NFSMNT_INT) 2661 slpflag = PCATCH; 2662 if (!commit) 2663 passone = 0; 2664 bo = &vp->v_bufobj; 2665 /* 2666 * A b_flags == (B_DELWRI | B_NEEDCOMMIT) block has been written to the 2667 * server, but has not been committed to stable storage on the server 2668 * yet. On the first pass, the byte range is worked out and the commit 2669 * rpc is done. On the second pass, ncl_writebp() is called to do the 2670 * job. 2671 */ 2672again: 2673 off = (u_quad_t)-1; 2674 endoff = 0; 2675 bvecpos = 0; 2676 if (NFS_ISV34(vp) && commit) { 2677 if (bvec != NULL && bvec != bvec_on_stack) 2678 free(bvec, M_TEMP); 2679 /* 2680 * Count up how many buffers waiting for a commit. 2681 */ 2682 bveccount = 0; 2683 BO_LOCK(bo); 2684 TAILQ_FOREACH_SAFE(bp, &bo->bo_dirty.bv_hd, b_bobufs, nbp) { 2685 if (!BUF_ISLOCKED(bp) && 2686 (bp->b_flags & (B_DELWRI | B_NEEDCOMMIT)) 2687 == (B_DELWRI | B_NEEDCOMMIT)) 2688 bveccount++; 2689 } 2690 /* 2691 * Allocate space to remember the list of bufs to commit. It is 2692 * important to use M_NOWAIT here to avoid a race with nfs_write. 2693 * If we can't get memory (for whatever reason), we will end up 2694 * committing the buffers one-by-one in the loop below. 2695 */ 2696 if (bveccount > NFS_COMMITBVECSIZ) { 2697 /* 2698 * Release the vnode interlock to avoid a lock 2699 * order reversal. 2700 */ 2701 BO_UNLOCK(bo); 2702 bvec = (struct buf **) 2703 malloc(bveccount * sizeof(struct buf *), 2704 M_TEMP, M_NOWAIT); 2705 BO_LOCK(bo); 2706 if (bvec == NULL) { 2707 bvec = bvec_on_stack; 2708 bvecsize = NFS_COMMITBVECSIZ; 2709 } else 2710 bvecsize = bveccount; 2711 } else { 2712 bvec = bvec_on_stack; 2713 bvecsize = NFS_COMMITBVECSIZ; 2714 } 2715 TAILQ_FOREACH_SAFE(bp, &bo->bo_dirty.bv_hd, b_bobufs, nbp) { 2716 if (bvecpos >= bvecsize) 2717 break; 2718 if (BUF_LOCK(bp, LK_EXCLUSIVE | LK_NOWAIT, NULL)) { 2719 nbp = TAILQ_NEXT(bp, b_bobufs); 2720 continue; 2721 } 2722 if ((bp->b_flags & (B_DELWRI | B_NEEDCOMMIT)) != 2723 (B_DELWRI | B_NEEDCOMMIT)) { 2724 BUF_UNLOCK(bp); 2725 nbp = TAILQ_NEXT(bp, b_bobufs); 2726 continue; 2727 } 2728 BO_UNLOCK(bo); 2729 bremfree(bp); 2730 /* 2731 * Work out if all buffers are using the same cred 2732 * so we can deal with them all with one commit. 2733 * 2734 * NOTE: we are not clearing B_DONE here, so we have 2735 * to do it later on in this routine if we intend to 2736 * initiate I/O on the bp. 2737 * 2738 * Note: to avoid loopback deadlocks, we do not 2739 * assign b_runningbufspace. 2740 */ 2741 if (wcred == NULL) 2742 wcred = bp->b_wcred; 2743 else if (wcred != bp->b_wcred) 2744 wcred = NOCRED; 2745 vfs_busy_pages(bp, 1); 2746 2747 BO_LOCK(bo); 2748 /* 2749 * bp is protected by being locked, but nbp is not 2750 * and vfs_busy_pages() may sleep. We have to 2751 * recalculate nbp. 2752 */ 2753 nbp = TAILQ_NEXT(bp, b_bobufs); 2754 2755 /* 2756 * A list of these buffers is kept so that the 2757 * second loop knows which buffers have actually 2758 * been committed. This is necessary, since there 2759 * may be a race between the commit rpc and new 2760 * uncommitted writes on the file. 2761 */ 2762 bvec[bvecpos++] = bp; 2763 toff = ((u_quad_t)bp->b_blkno) * DEV_BSIZE + 2764 bp->b_dirtyoff; 2765 if (toff < off) 2766 off = toff; 2767 toff += (u_quad_t)(bp->b_dirtyend - bp->b_dirtyoff); 2768 if (toff > endoff) 2769 endoff = toff; 2770 } 2771 BO_UNLOCK(bo); 2772 } 2773 if (bvecpos > 0) { 2774 /* 2775 * Commit data on the server, as required. 2776 * If all bufs are using the same wcred, then use that with 2777 * one call for all of them, otherwise commit each one 2778 * separately. 2779 */ 2780 if (wcred != NOCRED) 2781 retv = ncl_commit(vp, off, (int)(endoff - off), 2782 wcred, td); 2783 else { 2784 retv = 0; 2785 for (i = 0; i < bvecpos; i++) { 2786 off_t off, size; 2787 bp = bvec[i]; 2788 off = ((u_quad_t)bp->b_blkno) * DEV_BSIZE + 2789 bp->b_dirtyoff; 2790 size = (u_quad_t)(bp->b_dirtyend 2791 - bp->b_dirtyoff); 2792 retv = ncl_commit(vp, off, (int)size, 2793 bp->b_wcred, td); 2794 if (retv) break; 2795 } 2796 } 2797 2798 if (retv == NFSERR_STALEWRITEVERF) 2799 ncl_clearcommit(vp->v_mount); 2800 2801 /* 2802 * Now, either mark the blocks I/O done or mark the 2803 * blocks dirty, depending on whether the commit 2804 * succeeded. 2805 */ 2806 for (i = 0; i < bvecpos; i++) { 2807 bp = bvec[i]; 2808 bp->b_flags &= ~(B_NEEDCOMMIT | B_CLUSTEROK); 2809 if (retv) { 2810 /* 2811 * Error, leave B_DELWRI intact 2812 */ 2813 vfs_unbusy_pages(bp); 2814 brelse(bp); 2815 } else { 2816 /* 2817 * Success, remove B_DELWRI ( bundirty() ). 2818 * 2819 * b_dirtyoff/b_dirtyend seem to be NFS 2820 * specific. We should probably move that 2821 * into bundirty(). XXX 2822 */ 2823 bufobj_wref(bo); 2824 bp->b_flags |= B_ASYNC; 2825 bundirty(bp); 2826 bp->b_flags &= ~B_DONE; 2827 bp->b_ioflags &= ~BIO_ERROR; 2828 bp->b_dirtyoff = bp->b_dirtyend = 0; 2829 bufdone(bp); 2830 } 2831 } 2832 } 2833 2834 /* 2835 * Start/do any write(s) that are required. 2836 */ 2837loop: 2838 BO_LOCK(bo); 2839 TAILQ_FOREACH_SAFE(bp, &bo->bo_dirty.bv_hd, b_bobufs, nbp) { 2840 if (BUF_LOCK(bp, LK_EXCLUSIVE | LK_NOWAIT, NULL)) { 2841 if (waitfor != MNT_WAIT || passone) 2842 continue; 2843 2844 error = BUF_TIMELOCK(bp, 2845 LK_EXCLUSIVE | LK_SLEEPFAIL | LK_INTERLOCK, 2846 BO_LOCKPTR(bo), "nfsfsync", slpflag, slptimeo); 2847 if (error == 0) { 2848 BUF_UNLOCK(bp); 2849 goto loop; 2850 } 2851 if (error == ENOLCK) { 2852 error = 0; 2853 goto loop; 2854 } 2855 if (called_from_renewthread != 0) { 2856 /* 2857 * Return EIO so the flush will be retried 2858 * later. 2859 */ 2860 error = EIO; 2861 goto done; 2862 } 2863 if (newnfs_sigintr(nmp, td)) { 2864 error = EINTR; 2865 goto done; 2866 } 2867 if (slpflag == PCATCH) { 2868 slpflag = 0; 2869 slptimeo = 2 * hz; 2870 } 2871 goto loop; 2872 } 2873 if ((bp->b_flags & B_DELWRI) == 0) 2874 panic("nfs_fsync: not dirty"); 2875 if ((passone || !commit) && (bp->b_flags & B_NEEDCOMMIT)) { 2876 BUF_UNLOCK(bp); 2877 continue; 2878 } 2879 BO_UNLOCK(bo); 2880 bremfree(bp); 2881 if (passone || !commit) 2882 bp->b_flags |= B_ASYNC; 2883 else 2884 bp->b_flags |= B_ASYNC; 2885 bwrite(bp); 2886 if (newnfs_sigintr(nmp, td)) { 2887 error = EINTR; 2888 goto done; 2889 } 2890 goto loop; 2891 } 2892 if (passone) { 2893 passone = 0; 2894 BO_UNLOCK(bo); 2895 goto again; 2896 } 2897 if (waitfor == MNT_WAIT) { 2898 while (bo->bo_numoutput) { 2899 error = bufobj_wwait(bo, slpflag, slptimeo); 2900 if (error) { 2901 BO_UNLOCK(bo); 2902 if (called_from_renewthread != 0) { 2903 /* 2904 * Return EIO so that the flush will be 2905 * retried later. 2906 */ 2907 error = EIO; 2908 goto done; 2909 } 2910 error = newnfs_sigintr(nmp, td); 2911 if (error) 2912 goto done; 2913 if (slpflag == PCATCH) { 2914 slpflag = 0; 2915 slptimeo = 2 * hz; 2916 } 2917 BO_LOCK(bo); 2918 } 2919 } 2920 if (bo->bo_dirty.bv_cnt != 0 && commit) { 2921 BO_UNLOCK(bo); 2922 goto loop; 2923 } 2924 /* 2925 * Wait for all the async IO requests to drain 2926 */ 2927 BO_UNLOCK(bo); 2928 mtx_lock(&np->n_mtx); 2929 while (np->n_directio_asyncwr > 0) { 2930 np->n_flag |= NFSYNCWAIT; 2931 error = newnfs_msleep(td, &np->n_directio_asyncwr, 2932 &np->n_mtx, slpflag | (PRIBIO + 1), 2933 "nfsfsync", 0); 2934 if (error) { 2935 if (newnfs_sigintr(nmp, td)) { 2936 mtx_unlock(&np->n_mtx); 2937 error = EINTR; 2938 goto done; 2939 } 2940 } 2941 } 2942 mtx_unlock(&np->n_mtx); 2943 } else 2944 BO_UNLOCK(bo); 2945 if (NFSHASPNFS(nmp)) { 2946 nfscl_layoutcommit(vp, td); 2947 /* 2948 * Invalidate the attribute cache, since writes to a DS 2949 * won't update the size attribute. 2950 */ 2951 mtx_lock(&np->n_mtx); 2952 np->n_attrstamp = 0; 2953 } else 2954 mtx_lock(&np->n_mtx); 2955 if (np->n_flag & NWRITEERR) { 2956 error = np->n_error; 2957 np->n_flag &= ~NWRITEERR; 2958 } 2959 if (commit && bo->bo_dirty.bv_cnt == 0 && 2960 bo->bo_numoutput == 0 && np->n_directio_asyncwr == 0) 2961 np->n_flag &= ~NMODIFIED; 2962 mtx_unlock(&np->n_mtx); 2963done: 2964 if (bvec != NULL && bvec != bvec_on_stack) 2965 free(bvec, M_TEMP); 2966 if (error == 0 && commit != 0 && waitfor == MNT_WAIT && 2967 (bo->bo_dirty.bv_cnt != 0 || bo->bo_numoutput != 0 || 2968 np->n_directio_asyncwr != 0) && trycnt++ < 5) { 2969 /* try, try again... */ 2970 passone = 1; 2971 wcred = NULL; 2972 bvec = NULL; 2973 bvecsize = 0; 2974printf("try%d\n", trycnt); 2975 goto again; 2976 } 2977 return (error); 2978} 2979 2980/* 2981 * NFS advisory byte-level locks. 2982 */ 2983static int 2984nfs_advlock(struct vop_advlock_args *ap) 2985{ 2986 struct vnode *vp = ap->a_vp; 2987 struct ucred *cred; 2988 struct nfsnode *np = VTONFS(ap->a_vp); 2989 struct proc *p = (struct proc *)ap->a_id; 2990 struct thread *td = curthread; /* XXX */ 2991 struct vattr va; 2992 int ret, error = EOPNOTSUPP; 2993 u_quad_t size; 2994 2995 if (NFS_ISV4(vp) && (ap->a_flags & (F_POSIX | F_FLOCK)) != 0) { 2996 if (vp->v_type != VREG) 2997 return (EINVAL); 2998 if ((ap->a_flags & F_POSIX) != 0) 2999 cred = p->p_ucred; 3000 else 3001 cred = td->td_ucred; 3002 NFSVOPLOCK(vp, LK_EXCLUSIVE | LK_RETRY); 3003 if (vp->v_iflag & VI_DOOMED) { 3004 NFSVOPUNLOCK(vp, 0); 3005 return (EBADF); 3006 } 3007 3008 /* 3009 * If this is unlocking a write locked region, flush and 3010 * commit them before unlocking. This is required by 3011 * RFC3530 Sec. 9.3.2. 3012 */ 3013 if (ap->a_op == F_UNLCK && 3014 nfscl_checkwritelocked(vp, ap->a_fl, cred, td, ap->a_id, 3015 ap->a_flags)) 3016 (void) ncl_flush(vp, MNT_WAIT, cred, td, 1, 0); 3017 3018 /* 3019 * Loop around doing the lock op, while a blocking lock 3020 * must wait for the lock op to succeed. 3021 */ 3022 do { 3023 ret = nfsrpc_advlock(vp, np->n_size, ap->a_op, 3024 ap->a_fl, 0, cred, td, ap->a_id, ap->a_flags); 3025 if (ret == NFSERR_DENIED && (ap->a_flags & F_WAIT) && 3026 ap->a_op == F_SETLK) { 3027 NFSVOPUNLOCK(vp, 0); 3028 error = nfs_catnap(PZERO | PCATCH, ret, 3029 "ncladvl"); 3030 if (error) 3031 return (EINTR); 3032 NFSVOPLOCK(vp, LK_EXCLUSIVE | LK_RETRY); 3033 if (vp->v_iflag & VI_DOOMED) { 3034 NFSVOPUNLOCK(vp, 0); 3035 return (EBADF); 3036 } 3037 } 3038 } while (ret == NFSERR_DENIED && (ap->a_flags & F_WAIT) && 3039 ap->a_op == F_SETLK); 3040 if (ret == NFSERR_DENIED) { 3041 NFSVOPUNLOCK(vp, 0); 3042 return (EAGAIN); 3043 } else if (ret == EINVAL || ret == EBADF || ret == EINTR) { 3044 NFSVOPUNLOCK(vp, 0); 3045 return (ret); 3046 } else if (ret != 0) { 3047 NFSVOPUNLOCK(vp, 0); 3048 return (EACCES); 3049 } 3050 3051 /* 3052 * Now, if we just got a lock, invalidate data in the buffer 3053 * cache, as required, so that the coherency conforms with 3054 * RFC3530 Sec. 9.3.2. 3055 */ 3056 if (ap->a_op == F_SETLK) { 3057 if ((np->n_flag & NMODIFIED) == 0) { 3058 np->n_attrstamp = 0; 3059 KDTRACE_NFS_ATTRCACHE_FLUSH_DONE(vp); 3060 ret = VOP_GETATTR(vp, &va, cred); 3061 } 3062 if ((np->n_flag & NMODIFIED) || ret || 3063 np->n_change != va.va_filerev) { 3064 (void) ncl_vinvalbuf(vp, V_SAVE, td, 1); 3065 np->n_attrstamp = 0; 3066 KDTRACE_NFS_ATTRCACHE_FLUSH_DONE(vp); 3067 ret = VOP_GETATTR(vp, &va, cred); 3068 if (!ret) { 3069 np->n_mtime = va.va_mtime; 3070 np->n_change = va.va_filerev; 3071 } 3072 } 3073 /* Mark that a file lock has been acquired. */ 3074 mtx_lock(&np->n_mtx); 3075 np->n_flag |= NHASBEENLOCKED; 3076 mtx_unlock(&np->n_mtx); 3077 } 3078 NFSVOPUNLOCK(vp, 0); 3079 return (0); 3080 } else if (!NFS_ISV4(vp)) { 3081 error = NFSVOPLOCK(vp, LK_SHARED); 3082 if (error) 3083 return (error); 3084 if ((VFSTONFS(vp->v_mount)->nm_flag & NFSMNT_NOLOCKD) != 0) { 3085 size = VTONFS(vp)->n_size; 3086 NFSVOPUNLOCK(vp, 0); 3087 error = lf_advlock(ap, &(vp->v_lockf), size); 3088 } else { 3089 if (nfs_advlock_p != NULL) 3090 error = nfs_advlock_p(ap); 3091 else { 3092 NFSVOPUNLOCK(vp, 0); 3093 error = ENOLCK; 3094 } 3095 } 3096 if (error == 0 && ap->a_op == F_SETLK) { 3097 error = NFSVOPLOCK(vp, LK_SHARED); 3098 if (error == 0) { 3099 /* Mark that a file lock has been acquired. */ 3100 mtx_lock(&np->n_mtx); 3101 np->n_flag |= NHASBEENLOCKED; 3102 mtx_unlock(&np->n_mtx); 3103 NFSVOPUNLOCK(vp, 0); 3104 } 3105 } 3106 } 3107 return (error); 3108} 3109 3110/* 3111 * NFS advisory byte-level locks. 3112 */ 3113static int 3114nfs_advlockasync(struct vop_advlockasync_args *ap) 3115{ 3116 struct vnode *vp = ap->a_vp; 3117 u_quad_t size; 3118 int error; 3119 3120 if (NFS_ISV4(vp)) 3121 return (EOPNOTSUPP); 3122 error = NFSVOPLOCK(vp, LK_SHARED); 3123 if (error) 3124 return (error); 3125 if ((VFSTONFS(vp->v_mount)->nm_flag & NFSMNT_NOLOCKD) != 0) { 3126 size = VTONFS(vp)->n_size; 3127 NFSVOPUNLOCK(vp, 0); 3128 error = lf_advlockasync(ap, &(vp->v_lockf), size); 3129 } else { 3130 NFSVOPUNLOCK(vp, 0); 3131 error = EOPNOTSUPP; 3132 } 3133 return (error); 3134} 3135 3136/* 3137 * Print out the contents of an nfsnode. 3138 */ 3139static int 3140nfs_print(struct vop_print_args *ap) 3141{ 3142 struct vnode *vp = ap->a_vp; 3143 struct nfsnode *np = VTONFS(vp); 3144 3145 printf("\tfileid %ld fsid 0x%x", np->n_vattr.na_fileid, 3146 np->n_vattr.na_fsid); 3147 if (vp->v_type == VFIFO) 3148 fifo_printinfo(vp); 3149 printf("\n"); 3150 return (0); 3151} 3152 3153/* 3154 * This is the "real" nfs::bwrite(struct buf*). 3155 * We set B_CACHE if this is a VMIO buffer. 3156 */ 3157int 3158ncl_writebp(struct buf *bp, int force __unused, struct thread *td) 3159{ 3160 int s; 3161 int oldflags = bp->b_flags; 3162#if 0 3163 int retv = 1; 3164 off_t off; 3165#endif 3166 3167 BUF_ASSERT_HELD(bp); 3168 3169 if (bp->b_flags & B_INVAL) { 3170 brelse(bp); 3171 return(0); 3172 } 3173 3174 bp->b_flags |= B_CACHE; 3175 3176 /* 3177 * Undirty the bp. We will redirty it later if the I/O fails. 3178 */ 3179 3180 s = splbio(); 3181 bundirty(bp); 3182 bp->b_flags &= ~B_DONE; 3183 bp->b_ioflags &= ~BIO_ERROR; 3184 bp->b_iocmd = BIO_WRITE; 3185 3186 bufobj_wref(bp->b_bufobj); 3187 curthread->td_ru.ru_oublock++; 3188 splx(s); 3189 3190 /* 3191 * Note: to avoid loopback deadlocks, we do not 3192 * assign b_runningbufspace. 3193 */ 3194 vfs_busy_pages(bp, 1); 3195 3196 BUF_KERNPROC(bp); 3197 bp->b_iooffset = dbtob(bp->b_blkno); 3198 bstrategy(bp); 3199 3200 if( (oldflags & B_ASYNC) == 0) { 3201 int rtval = bufwait(bp); 3202 3203 if (oldflags & B_DELWRI) { 3204 s = splbio(); 3205 reassignbuf(bp); 3206 splx(s); 3207 } 3208 brelse(bp); 3209 return (rtval); 3210 } 3211 3212 return (0); 3213} 3214 3215/* 3216 * nfs special file access vnode op. 3217 * Essentially just get vattr and then imitate iaccess() since the device is 3218 * local to the client. 3219 */ 3220static int 3221nfsspec_access(struct vop_access_args *ap) 3222{ 3223 struct vattr *vap; 3224 struct ucred *cred = ap->a_cred; 3225 struct vnode *vp = ap->a_vp; 3226 accmode_t accmode = ap->a_accmode; 3227 struct vattr vattr; 3228 int error; 3229 3230 /* 3231 * Disallow write attempts on filesystems mounted read-only; 3232 * unless the file is a socket, fifo, or a block or character 3233 * device resident on the filesystem. 3234 */ 3235 if ((accmode & VWRITE) && (vp->v_mount->mnt_flag & MNT_RDONLY)) { 3236 switch (vp->v_type) { 3237 case VREG: 3238 case VDIR: 3239 case VLNK: 3240 return (EROFS); 3241 default: 3242 break; 3243 } 3244 } 3245 vap = &vattr; 3246 error = VOP_GETATTR(vp, vap, cred); 3247 if (error) 3248 goto out; 3249 error = vaccess(vp->v_type, vap->va_mode, vap->va_uid, vap->va_gid, 3250 accmode, cred, NULL); 3251out: 3252 return error; 3253} 3254 3255/* 3256 * Read wrapper for fifos. 3257 */ 3258static int 3259nfsfifo_read(struct vop_read_args *ap) 3260{ 3261 struct nfsnode *np = VTONFS(ap->a_vp); 3262 int error; 3263 3264 /* 3265 * Set access flag. 3266 */ 3267 mtx_lock(&np->n_mtx); 3268 np->n_flag |= NACC; 3269 vfs_timestamp(&np->n_atim); 3270 mtx_unlock(&np->n_mtx); 3271 error = fifo_specops.vop_read(ap); 3272 return error; 3273} 3274 3275/* 3276 * Write wrapper for fifos. 3277 */ 3278static int 3279nfsfifo_write(struct vop_write_args *ap) 3280{ 3281 struct nfsnode *np = VTONFS(ap->a_vp); 3282 3283 /* 3284 * Set update flag. 3285 */ 3286 mtx_lock(&np->n_mtx); 3287 np->n_flag |= NUPD; 3288 vfs_timestamp(&np->n_mtim); 3289 mtx_unlock(&np->n_mtx); 3290 return(fifo_specops.vop_write(ap)); 3291} 3292 3293/* 3294 * Close wrapper for fifos. 3295 * 3296 * Update the times on the nfsnode then do fifo close. 3297 */ 3298static int 3299nfsfifo_close(struct vop_close_args *ap) 3300{ 3301 struct vnode *vp = ap->a_vp; 3302 struct nfsnode *np = VTONFS(vp); 3303 struct vattr vattr; 3304 struct timespec ts; 3305 3306 mtx_lock(&np->n_mtx); 3307 if (np->n_flag & (NACC | NUPD)) { 3308 vfs_timestamp(&ts); 3309 if (np->n_flag & NACC) 3310 np->n_atim = ts; 3311 if (np->n_flag & NUPD) 3312 np->n_mtim = ts; 3313 np->n_flag |= NCHG; 3314 if (vrefcnt(vp) == 1 && 3315 (vp->v_mount->mnt_flag & MNT_RDONLY) == 0) { 3316 VATTR_NULL(&vattr); 3317 if (np->n_flag & NACC) 3318 vattr.va_atime = np->n_atim; 3319 if (np->n_flag & NUPD) 3320 vattr.va_mtime = np->n_mtim; 3321 mtx_unlock(&np->n_mtx); 3322 (void)VOP_SETATTR(vp, &vattr, ap->a_cred); 3323 goto out; 3324 } 3325 } 3326 mtx_unlock(&np->n_mtx); 3327out: 3328 return (fifo_specops.vop_close(ap)); 3329} 3330 3331/* 3332 * Just call ncl_writebp() with the force argument set to 1. 3333 * 3334 * NOTE: B_DONE may or may not be set in a_bp on call. 3335 */ 3336static int 3337nfs_bwrite(struct buf *bp) 3338{ 3339 3340 return (ncl_writebp(bp, 1, curthread)); 3341} 3342 3343struct buf_ops buf_ops_newnfs = { 3344 .bop_name = "buf_ops_nfs", 3345 .bop_write = nfs_bwrite, 3346 .bop_strategy = bufstrategy, 3347 .bop_sync = bufsync, 3348 .bop_bdflush = bufbdflush, 3349}; 3350 3351static int 3352nfs_getacl(struct vop_getacl_args *ap) 3353{ 3354 int error; 3355 3356 if (ap->a_type != ACL_TYPE_NFS4) 3357 return (EOPNOTSUPP); 3358 error = nfsrpc_getacl(ap->a_vp, ap->a_cred, ap->a_td, ap->a_aclp, 3359 NULL); 3360 if (error > NFSERR_STALE) { 3361 (void) nfscl_maperr(ap->a_td, error, (uid_t)0, (gid_t)0); 3362 error = EPERM; 3363 } 3364 return (error); 3365} 3366 3367static int 3368nfs_setacl(struct vop_setacl_args *ap) 3369{ 3370 int error; 3371 3372 if (ap->a_type != ACL_TYPE_NFS4) 3373 return (EOPNOTSUPP); 3374 error = nfsrpc_setacl(ap->a_vp, ap->a_cred, ap->a_td, ap->a_aclp, 3375 NULL); 3376 if (error > NFSERR_STALE) { 3377 (void) nfscl_maperr(ap->a_td, error, (uid_t)0, (gid_t)0); 3378 error = EPERM; 3379 } 3380 return (error); 3381} 3382 3383/* 3384 * Return POSIX pathconf information applicable to nfs filesystems. 3385 */ 3386static int 3387nfs_pathconf(struct vop_pathconf_args *ap) 3388{ 3389 struct nfsv3_pathconf pc; 3390 struct nfsvattr nfsva; 3391 struct vnode *vp = ap->a_vp; 3392 struct thread *td = curthread; 3393 int attrflag, error; 3394 3395 if ((NFS_ISV34(vp) && (ap->a_name == _PC_LINK_MAX || 3396 ap->a_name == _PC_NAME_MAX || ap->a_name == _PC_CHOWN_RESTRICTED || 3397 ap->a_name == _PC_NO_TRUNC)) || 3398 (NFS_ISV4(vp) && ap->a_name == _PC_ACL_NFS4)) { 3399 /* 3400 * Since only the above 4 a_names are returned by the NFSv3 3401 * Pathconf RPC, there is no point in doing it for others. 3402 * For NFSv4, the Pathconf RPC (actually a Getattr Op.) can 3403 * be used for _PC_NFS4_ACL as well. 3404 */ 3405 error = nfsrpc_pathconf(vp, &pc, td->td_ucred, td, &nfsva, 3406 &attrflag, NULL); 3407 if (attrflag != 0) 3408 (void) nfscl_loadattrcache(&vp, &nfsva, NULL, NULL, 0, 3409 1); 3410 if (error != 0) 3411 return (error); 3412 } else { 3413 /* 3414 * For NFSv2 (or NFSv3 when not one of the above 4 a_names), 3415 * just fake them. 3416 */ 3417 pc.pc_linkmax = LINK_MAX; 3418 pc.pc_namemax = NFS_MAXNAMLEN; 3419 pc.pc_notrunc = 1; 3420 pc.pc_chownrestricted = 1; 3421 pc.pc_caseinsensitive = 0; 3422 pc.pc_casepreserving = 1; 3423 error = 0; 3424 } 3425 switch (ap->a_name) { 3426 case _PC_LINK_MAX: 3427 *ap->a_retval = pc.pc_linkmax; 3428 break; 3429 case _PC_NAME_MAX: 3430 *ap->a_retval = pc.pc_namemax; 3431 break; 3432 case _PC_PATH_MAX: 3433 *ap->a_retval = PATH_MAX; 3434 break; 3435 case _PC_PIPE_BUF: 3436 *ap->a_retval = PIPE_BUF; 3437 break; 3438 case _PC_CHOWN_RESTRICTED: 3439 *ap->a_retval = pc.pc_chownrestricted; 3440 break; 3441 case _PC_NO_TRUNC: 3442 *ap->a_retval = pc.pc_notrunc; 3443 break; 3444 case _PC_ACL_EXTENDED: 3445 *ap->a_retval = 0; 3446 break; 3447 case _PC_ACL_NFS4: 3448 if (NFS_ISV4(vp) && nfsrv_useacl != 0 && attrflag != 0 && 3449 NFSISSET_ATTRBIT(&nfsva.na_suppattr, NFSATTRBIT_ACL)) 3450 *ap->a_retval = 1; 3451 else 3452 *ap->a_retval = 0; 3453 break; 3454 case _PC_ACL_PATH_MAX: 3455 if (NFS_ISV4(vp)) 3456 *ap->a_retval = ACL_MAX_ENTRIES; 3457 else 3458 *ap->a_retval = 3; 3459 break; 3460 case _PC_MAC_PRESENT: 3461 *ap->a_retval = 0; 3462 break; 3463 case _PC_ASYNC_IO: 3464 /* _PC_ASYNC_IO should have been handled by upper layers. */ 3465 KASSERT(0, ("_PC_ASYNC_IO should not get here")); 3466 error = EINVAL; 3467 break; 3468 case _PC_PRIO_IO: 3469 *ap->a_retval = 0; 3470 break; 3471 case _PC_SYNC_IO: 3472 *ap->a_retval = 0; 3473 break; 3474 case _PC_ALLOC_SIZE_MIN: 3475 *ap->a_retval = vp->v_mount->mnt_stat.f_bsize; 3476 break; 3477 case _PC_FILESIZEBITS: 3478 if (NFS_ISV34(vp)) 3479 *ap->a_retval = 64; 3480 else 3481 *ap->a_retval = 32; 3482 break; 3483 case _PC_REC_INCR_XFER_SIZE: 3484 *ap->a_retval = vp->v_mount->mnt_stat.f_iosize; 3485 break; 3486 case _PC_REC_MAX_XFER_SIZE: 3487 *ap->a_retval = -1; /* means ``unlimited'' */ 3488 break; 3489 case _PC_REC_MIN_XFER_SIZE: 3490 *ap->a_retval = vp->v_mount->mnt_stat.f_iosize; 3491 break; 3492 case _PC_REC_XFER_ALIGN: 3493 *ap->a_retval = PAGE_SIZE; 3494 break; 3495 case _PC_SYMLINK_MAX: 3496 *ap->a_retval = NFS_MAXPATHLEN; 3497 break; 3498 3499 default: 3500 error = EINVAL; 3501 break; 3502 } 3503 return (error); 3504} 3505 3506