1#define MSNFS /* HACK HACK */ 2/* 3 * linux/fs/nfsd/vfs.c 4 * 5 * File operations used by nfsd. Some of these have been ripped from 6 * other parts of the kernel because they weren't exported, others 7 * are partial duplicates with added or changed functionality. 8 * 9 * Note that several functions dget() the dentry upon which they want 10 * to act, most notably those that create directory entries. Response 11 * dentry's are dput()'d if necessary in the release callback. 12 * So if you notice code paths that apparently fail to dput() the 13 * dentry, don't worry--they have been taken care of. 14 * 15 * Copyright (C) 1995-1999 Olaf Kirch <okir@monad.swb.de> 16 * Zerocpy NFS support (C) 2002 Hirokazu Takahashi <taka@valinux.co.jp> 17 */ 18 19#include <linux/string.h> 20#include <linux/time.h> 21#include <linux/errno.h> 22#include <linux/fs.h> 23#include <linux/file.h> 24#include <linux/mount.h> 25#include <linux/major.h> 26#include <linux/ext2_fs.h> 27#include <linux/proc_fs.h> 28#include <linux/stat.h> 29#include <linux/fcntl.h> 30#include <linux/net.h> 31#include <linux/unistd.h> 32#include <linux/slab.h> 33#include <linux/pagemap.h> 34#include <linux/in.h> 35#include <linux/module.h> 36#include <linux/namei.h> 37#include <linux/vfs.h> 38#include <linux/delay.h> 39#include <linux/sunrpc/svc.h> 40#include <linux/nfsd/nfsd.h> 41#ifdef CONFIG_NFSD_V3 42#include <linux/nfs3.h> 43#include <linux/nfsd/xdr3.h> 44#endif /* CONFIG_NFSD_V3 */ 45#include <linux/nfsd/nfsfh.h> 46#include <linux/quotaops.h> 47#include <linux/fsnotify.h> 48#include <linux/posix_acl.h> 49#include <linux/posix_acl_xattr.h> 50#include <linux/xattr.h> 51#ifdef CONFIG_NFSD_V4 52#include <linux/nfs4.h> 53#include <linux/nfs4_acl.h> 54#include <linux/nfsd_idmap.h> 55#include <linux/security.h> 56#endif /* CONFIG_NFSD_V4 */ 57#include <linux/jhash.h> 58 59#include <asm/uaccess.h> 60 61#define NFSDDBG_FACILITY NFSDDBG_FILEOP 62 63 64/* We must ignore files (but only files) which might have mandatory 65 * locks on them because there is no way to know if the accesser has 66 * the lock. 67 */ 68#define IS_ISMNDLK(i) (S_ISREG((i)->i_mode) && MANDATORY_LOCK(i)) 69 70/* 71 * This is a cache of readahead params that help us choose the proper 72 * readahead strategy. Initially, we set all readahead parameters to 0 73 * and let the VFS handle things. 74 * If you increase the number of cached files very much, you'll need to 75 * add a hash table here. 76 */ 77struct raparms { 78 struct raparms *p_next; 79 unsigned int p_count; 80 ino_t p_ino; 81 dev_t p_dev; 82 int p_set; 83 struct file_ra_state p_ra; 84 unsigned int p_hindex; 85}; 86 87struct raparm_hbucket { 88 struct raparms *pb_head; 89 spinlock_t pb_lock; 90} ____cacheline_aligned_in_smp; 91 92static struct raparms * raparml; 93#define RAPARM_HASH_BITS 4 94#define RAPARM_HASH_SIZE (1<<RAPARM_HASH_BITS) 95#define RAPARM_HASH_MASK (RAPARM_HASH_SIZE-1) 96static struct raparm_hbucket raparm_hash[RAPARM_HASH_SIZE]; 97 98/* 99 * Called from nfsd_lookup and encode_dirent. Check if we have crossed 100 * a mount point. 101 * Returns -EAGAIN or -ETIMEDOUT leaving *dpp and *expp unchanged, 102 * or nfs_ok having possibly changed *dpp and *expp 103 */ 104int 105nfsd_cross_mnt(struct svc_rqst *rqstp, struct dentry **dpp, 106 struct svc_export **expp) 107{ 108 struct svc_export *exp = *expp, *exp2 = NULL; 109 struct dentry *dentry = *dpp; 110 struct vfsmount *mnt = mntget(exp->ex_mnt); 111 struct dentry *mounts = dget(dentry); 112 int err = 0; 113 114 while (follow_down(&mnt,&mounts)&&d_mountpoint(mounts)); 115 116 exp2 = exp_get_by_name(exp->ex_client, mnt, mounts, &rqstp->rq_chandle); 117 if (IS_ERR(exp2)) { 118 err = PTR_ERR(exp2); 119 dput(mounts); 120 mntput(mnt); 121 goto out; 122 } 123 if (exp2 && ((exp->ex_flags & NFSEXP_CROSSMOUNT) || EX_NOHIDE(exp2))) { 124 /* successfully crossed mount point */ 125 exp_put(exp); 126 *expp = exp2; 127 dput(dentry); 128 *dpp = mounts; 129 } else { 130 if (exp2) exp_put(exp2); 131 dput(mounts); 132 } 133 mntput(mnt); 134out: 135 return err; 136} 137 138/* 139 * Look up one component of a pathname. 140 * N.B. After this call _both_ fhp and resfh need an fh_put 141 * 142 * If the lookup would cross a mountpoint, and the mounted filesystem 143 * is exported to the client with NFSEXP_NOHIDE, then the lookup is 144 * accepted as it stands and the mounted directory is 145 * returned. Otherwise the covered directory is returned. 146 * NOTE: this mountpoint crossing is not supported properly by all 147 * clients and is explicitly disallowed for NFSv3 148 * NeilBrown <neilb@cse.unsw.edu.au> 149 */ 150__be32 151nfsd_lookup(struct svc_rqst *rqstp, struct svc_fh *fhp, const char *name, 152 int len, struct svc_fh *resfh) 153{ 154 struct svc_export *exp; 155 struct dentry *dparent; 156 struct dentry *dentry; 157 __be32 err; 158 int host_err; 159 160 dprintk("nfsd: nfsd_lookup(fh %s, %.*s)\n", SVCFH_fmt(fhp), len,name); 161 162 /* Obtain dentry and export. */ 163 err = fh_verify(rqstp, fhp, S_IFDIR, MAY_EXEC); 164 if (err) 165 return err; 166 167 dparent = fhp->fh_dentry; 168 exp = fhp->fh_export; 169 exp_get(exp); 170 171 err = nfserr_acces; 172 173 /* Lookup the name, but don't follow links */ 174 if (isdotent(name, len)) { 175 if (len==1) 176 dentry = dget(dparent); 177 else if (dparent != exp->ex_dentry) { 178 dentry = dget_parent(dparent); 179 } else if (!EX_NOHIDE(exp)) 180 dentry = dget(dparent); /* .. == . just like at / */ 181 else { 182 /* checking mountpoint crossing is very different when stepping up */ 183 struct svc_export *exp2 = NULL; 184 struct dentry *dp; 185 struct vfsmount *mnt = mntget(exp->ex_mnt); 186 dentry = dget(dparent); 187 while(dentry == mnt->mnt_root && follow_up(&mnt, &dentry)) 188 ; 189 dp = dget_parent(dentry); 190 dput(dentry); 191 dentry = dp; 192 193 exp2 = exp_parent(exp->ex_client, mnt, dentry, 194 &rqstp->rq_chandle); 195 if (IS_ERR(exp2)) { 196 host_err = PTR_ERR(exp2); 197 dput(dentry); 198 mntput(mnt); 199 goto out_nfserr; 200 } 201 if (!exp2) { 202 dput(dentry); 203 dentry = dget(dparent); 204 } else { 205 exp_put(exp); 206 exp = exp2; 207 } 208 mntput(mnt); 209 } 210 } else { 211 fh_lock(fhp); 212 dentry = lookup_one_len(name, dparent, len); 213 host_err = PTR_ERR(dentry); 214 if (IS_ERR(dentry)) 215 goto out_nfserr; 216 /* 217 * check if we have crossed a mount point ... 218 */ 219 if (d_mountpoint(dentry)) { 220 if ((host_err = nfsd_cross_mnt(rqstp, &dentry, &exp))) { 221 dput(dentry); 222 goto out_nfserr; 223 } 224 } 225 } 226 /* 227 * Note: we compose the file handle now, but as the 228 * dentry may be negative, it may need to be updated. 229 */ 230 err = fh_compose(resfh, exp, dentry, fhp); 231 if (!err && !dentry->d_inode) 232 err = nfserr_noent; 233 dput(dentry); 234out: 235 exp_put(exp); 236 return err; 237 238out_nfserr: 239 err = nfserrno(host_err); 240 goto out; 241} 242 243/* 244 * Set various file attributes. 245 * N.B. After this call fhp needs an fh_put 246 */ 247__be32 248nfsd_setattr(struct svc_rqst *rqstp, struct svc_fh *fhp, struct iattr *iap, 249 int check_guard, time_t guardtime) 250{ 251 struct dentry *dentry; 252 struct inode *inode; 253 int accmode = MAY_SATTR; 254 int ftype = 0; 255 int imode; 256 __be32 err; 257 int host_err; 258 int size_change = 0; 259 260 if (iap->ia_valid & (ATTR_ATIME | ATTR_MTIME | ATTR_SIZE)) 261 accmode |= MAY_WRITE|MAY_OWNER_OVERRIDE; 262 if (iap->ia_valid & ATTR_SIZE) 263 ftype = S_IFREG; 264 265 /* Get inode */ 266 err = fh_verify(rqstp, fhp, ftype, accmode); 267 if (err) 268 goto out; 269 270 dentry = fhp->fh_dentry; 271 inode = dentry->d_inode; 272 273 /* Ignore any mode updates on symlinks */ 274 if (S_ISLNK(inode->i_mode)) 275 iap->ia_valid &= ~ATTR_MODE; 276 277 if (!iap->ia_valid) 278 goto out; 279 280 /* NFSv2 does not differentiate between "set-[ac]time-to-now" 281 * which only requires access, and "set-[ac]time-to-X" which 282 * requires ownership. 283 * So if it looks like it might be "set both to the same time which 284 * is close to now", and if inode_change_ok fails, then we 285 * convert to "set to now" instead of "set to explicit time" 286 * 287 * We only call inode_change_ok as the last test as technically 288 * it is not an interface that we should be using. It is only 289 * valid if the filesystem does not define it's own i_op->setattr. 290 */ 291#define BOTH_TIME_SET (ATTR_ATIME_SET | ATTR_MTIME_SET) 292#define MAX_TOUCH_TIME_ERROR (30*60) 293 if ((iap->ia_valid & BOTH_TIME_SET) == BOTH_TIME_SET 294 && iap->ia_mtime.tv_sec == iap->ia_atime.tv_sec 295 ) { 296 /* Looks probable. Now just make sure time is in the right ballpark. 297 * Solaris, at least, doesn't seem to care what the time request is. 298 * We require it be within 30 minutes of now. 299 */ 300 time_t delta = iap->ia_atime.tv_sec - get_seconds(); 301 if (delta<0) delta = -delta; 302 if (delta < MAX_TOUCH_TIME_ERROR && 303 inode_change_ok(inode, iap) != 0) { 304 /* turn off ATTR_[AM]TIME_SET but leave ATTR_[AM]TIME 305 * this will cause notify_change to set these times to "now" 306 */ 307 iap->ia_valid &= ~BOTH_TIME_SET; 308 } 309 } 310 311 /* The size case is special. It changes the file as well as the attributes. */ 312 if (iap->ia_valid & ATTR_SIZE) { 313 if (iap->ia_size < inode->i_size) { 314 err = nfsd_permission(fhp->fh_export, dentry, MAY_TRUNC|MAY_OWNER_OVERRIDE); 315 if (err) 316 goto out; 317 } 318 319 /* 320 * If we are changing the size of the file, then 321 * we need to break all leases. 322 */ 323 host_err = break_lease(inode, FMODE_WRITE | O_NONBLOCK); 324 if (host_err == -EWOULDBLOCK) 325 host_err = -ETIMEDOUT; 326 if (host_err) /* ENOMEM or EWOULDBLOCK */ 327 goto out_nfserr; 328 329 host_err = get_write_access(inode); 330 if (host_err) 331 goto out_nfserr; 332 333 size_change = 1; 334 host_err = locks_verify_truncate(inode, NULL, iap->ia_size); 335 if (host_err) { 336 put_write_access(inode); 337 goto out_nfserr; 338 } 339 DQUOT_INIT(inode); 340 } 341 342 imode = inode->i_mode; 343 if (iap->ia_valid & ATTR_MODE) { 344 iap->ia_mode &= S_IALLUGO; 345 imode = iap->ia_mode |= (imode & ~S_IALLUGO); 346 } 347 348 /* Revoke setuid/setgid bit on chown/chgrp */ 349 if ((iap->ia_valid & ATTR_UID) && iap->ia_uid != inode->i_uid) 350 iap->ia_valid |= ATTR_KILL_SUID; 351 if ((iap->ia_valid & ATTR_GID) && iap->ia_gid != inode->i_gid) 352 iap->ia_valid |= ATTR_KILL_SGID; 353 354 /* Change the attributes. */ 355 356 iap->ia_valid |= ATTR_CTIME; 357 358 err = nfserr_notsync; 359 if (!check_guard || guardtime == inode->i_ctime.tv_sec) { 360 fh_lock(fhp); 361 host_err = notify_change(dentry, iap); 362 err = nfserrno(host_err); 363 fh_unlock(fhp); 364 } 365 if (size_change) 366 put_write_access(inode); 367 if (!err) 368 if (EX_ISSYNC(fhp->fh_export)) 369 write_inode_now(inode, 1); 370out: 371 return err; 372 373out_nfserr: 374 err = nfserrno(host_err); 375 goto out; 376} 377 378#if defined(CONFIG_NFSD_V2_ACL) || defined(CONFIG_NFSD_V3_ACL) || \ 379 defined(CONFIG_NFSD_V4) 380static ssize_t nfsd_getxattr(struct dentry *dentry, char *key, void **buf) 381{ 382 ssize_t buflen; 383 384 buflen = vfs_getxattr(dentry, key, NULL, 0); 385 if (buflen <= 0) 386 return buflen; 387 388 *buf = kmalloc(buflen, GFP_KERNEL); 389 if (!*buf) 390 return -ENOMEM; 391 392 return vfs_getxattr(dentry, key, *buf, buflen); 393} 394#endif 395 396#if defined(CONFIG_NFSD_V4) 397static int 398set_nfsv4_acl_one(struct dentry *dentry, struct posix_acl *pacl, char *key) 399{ 400 int len; 401 size_t buflen; 402 char *buf = NULL; 403 int error = 0; 404 405 buflen = posix_acl_xattr_size(pacl->a_count); 406 buf = kmalloc(buflen, GFP_KERNEL); 407 error = -ENOMEM; 408 if (buf == NULL) 409 goto out; 410 411 len = posix_acl_to_xattr(pacl, buf, buflen); 412 if (len < 0) { 413 error = len; 414 goto out; 415 } 416 417 error = vfs_setxattr(dentry, key, buf, len, 0); 418out: 419 kfree(buf); 420 return error; 421} 422 423__be32 424nfsd4_set_nfs4_acl(struct svc_rqst *rqstp, struct svc_fh *fhp, 425 struct nfs4_acl *acl) 426{ 427 __be32 error; 428 int host_error; 429 struct dentry *dentry; 430 struct inode *inode; 431 struct posix_acl *pacl = NULL, *dpacl = NULL; 432 unsigned int flags = 0; 433 434 /* Get inode */ 435 error = fh_verify(rqstp, fhp, 0 /* S_IFREG */, MAY_SATTR); 436 if (error) 437 goto out; 438 439 dentry = fhp->fh_dentry; 440 inode = dentry->d_inode; 441 if (S_ISDIR(inode->i_mode)) 442 flags = NFS4_ACL_DIR; 443 444 host_error = nfs4_acl_nfsv4_to_posix(acl, &pacl, &dpacl, flags); 445 if (host_error == -EINVAL) { 446 error = nfserr_attrnotsupp; 447 goto out; 448 } else if (host_error < 0) 449 goto out_nfserr; 450 451 host_error = set_nfsv4_acl_one(dentry, pacl, POSIX_ACL_XATTR_ACCESS); 452 if (host_error < 0) 453 goto out_nfserr; 454 455 if (S_ISDIR(inode->i_mode)) { 456 host_error = set_nfsv4_acl_one(dentry, dpacl, POSIX_ACL_XATTR_DEFAULT); 457 if (host_error < 0) 458 goto out_nfserr; 459 } 460 461 error = nfs_ok; 462 463out: 464 posix_acl_release(pacl); 465 posix_acl_release(dpacl); 466 return (error); 467out_nfserr: 468 if (host_error == -EOPNOTSUPP) 469 error = nfserr_attrnotsupp; 470 else 471 error = nfserrno(host_error); 472 goto out; 473} 474 475static struct posix_acl * 476_get_posix_acl(struct dentry *dentry, char *key) 477{ 478 void *buf = NULL; 479 struct posix_acl *pacl = NULL; 480 int buflen; 481 482 buflen = nfsd_getxattr(dentry, key, &buf); 483 if (!buflen) 484 buflen = -ENODATA; 485 if (buflen <= 0) 486 return ERR_PTR(buflen); 487 488 pacl = posix_acl_from_xattr(buf, buflen); 489 kfree(buf); 490 return pacl; 491} 492 493int 494nfsd4_get_nfs4_acl(struct svc_rqst *rqstp, struct dentry *dentry, struct nfs4_acl **acl) 495{ 496 struct inode *inode = dentry->d_inode; 497 int error = 0; 498 struct posix_acl *pacl = NULL, *dpacl = NULL; 499 unsigned int flags = 0; 500 501 pacl = _get_posix_acl(dentry, POSIX_ACL_XATTR_ACCESS); 502 if (IS_ERR(pacl) && PTR_ERR(pacl) == -ENODATA) 503 pacl = posix_acl_from_mode(inode->i_mode, GFP_KERNEL); 504 if (IS_ERR(pacl)) { 505 error = PTR_ERR(pacl); 506 pacl = NULL; 507 goto out; 508 } 509 510 if (S_ISDIR(inode->i_mode)) { 511 dpacl = _get_posix_acl(dentry, POSIX_ACL_XATTR_DEFAULT); 512 if (IS_ERR(dpacl) && PTR_ERR(dpacl) == -ENODATA) 513 dpacl = NULL; 514 else if (IS_ERR(dpacl)) { 515 error = PTR_ERR(dpacl); 516 dpacl = NULL; 517 goto out; 518 } 519 flags = NFS4_ACL_DIR; 520 } 521 522 *acl = nfs4_acl_posix_to_nfsv4(pacl, dpacl, flags); 523 if (IS_ERR(*acl)) { 524 error = PTR_ERR(*acl); 525 *acl = NULL; 526 } 527 out: 528 posix_acl_release(pacl); 529 posix_acl_release(dpacl); 530 return error; 531} 532 533#endif /* defined(CONFIG_NFS_V4) */ 534 535#ifdef CONFIG_NFSD_V3 536/* 537 * Check server access rights to a file system object 538 */ 539struct accessmap { 540 u32 access; 541 int how; 542}; 543static struct accessmap nfs3_regaccess[] = { 544 { NFS3_ACCESS_READ, MAY_READ }, 545 { NFS3_ACCESS_EXECUTE, MAY_EXEC }, 546 { NFS3_ACCESS_MODIFY, MAY_WRITE|MAY_TRUNC }, 547 { NFS3_ACCESS_EXTEND, MAY_WRITE }, 548 549 { 0, 0 } 550}; 551 552static struct accessmap nfs3_diraccess[] = { 553 { NFS3_ACCESS_READ, MAY_READ }, 554 { NFS3_ACCESS_LOOKUP, MAY_EXEC }, 555 { NFS3_ACCESS_MODIFY, MAY_EXEC|MAY_WRITE|MAY_TRUNC }, 556 { NFS3_ACCESS_EXTEND, MAY_EXEC|MAY_WRITE }, 557 { NFS3_ACCESS_DELETE, MAY_REMOVE }, 558 559 { 0, 0 } 560}; 561 562static struct accessmap nfs3_anyaccess[] = { 563 /* Some clients - Solaris 2.6 at least, make an access call 564 * to the server to check for access for things like /dev/null 565 * (which really, the server doesn't care about). So 566 * We provide simple access checking for them, looking 567 * mainly at mode bits, and we make sure to ignore read-only 568 * filesystem checks 569 */ 570 { NFS3_ACCESS_READ, MAY_READ }, 571 { NFS3_ACCESS_EXECUTE, MAY_EXEC }, 572 { NFS3_ACCESS_MODIFY, MAY_WRITE|MAY_LOCAL_ACCESS }, 573 { NFS3_ACCESS_EXTEND, MAY_WRITE|MAY_LOCAL_ACCESS }, 574 575 { 0, 0 } 576}; 577 578__be32 579nfsd_access(struct svc_rqst *rqstp, struct svc_fh *fhp, u32 *access, u32 *supported) 580{ 581 struct accessmap *map; 582 struct svc_export *export; 583 struct dentry *dentry; 584 u32 query, result = 0, sresult = 0; 585 __be32 error; 586 587 error = fh_verify(rqstp, fhp, 0, MAY_NOP); 588 if (error) 589 goto out; 590 591 export = fhp->fh_export; 592 dentry = fhp->fh_dentry; 593 594 if (S_ISREG(dentry->d_inode->i_mode)) 595 map = nfs3_regaccess; 596 else if (S_ISDIR(dentry->d_inode->i_mode)) 597 map = nfs3_diraccess; 598 else 599 map = nfs3_anyaccess; 600 601 602 query = *access; 603 for (; map->access; map++) { 604 if (map->access & query) { 605 __be32 err2; 606 607 sresult |= map->access; 608 609 err2 = nfsd_permission(export, dentry, map->how); 610 switch (err2) { 611 case nfs_ok: 612 result |= map->access; 613 break; 614 615 /* the following error codes just mean the access was not allowed, 616 * rather than an error occurred */ 617 case nfserr_rofs: 618 case nfserr_acces: 619 case nfserr_perm: 620 /* simply don't "or" in the access bit. */ 621 break; 622 default: 623 error = err2; 624 goto out; 625 } 626 } 627 } 628 *access = result; 629 if (supported) 630 *supported = sresult; 631 632 out: 633 return error; 634} 635#endif /* CONFIG_NFSD_V3 */ 636 637 638 639/* 640 * Open an existing file or directory. 641 * The access argument indicates the type of open (read/write/lock) 642 * N.B. After this call fhp needs an fh_put 643 */ 644__be32 645nfsd_open(struct svc_rqst *rqstp, struct svc_fh *fhp, int type, 646 int access, struct file **filp) 647{ 648 struct dentry *dentry; 649 struct inode *inode; 650 int flags = O_RDONLY|O_LARGEFILE; 651 __be32 err; 652 int host_err; 653 654 /* 655 * If we get here, then the client has already done an "open", 656 * and (hopefully) checked permission - so allow OWNER_OVERRIDE 657 * in case a chmod has now revoked permission. 658 */ 659 err = fh_verify(rqstp, fhp, type, access | MAY_OWNER_OVERRIDE); 660 if (err) 661 goto out; 662 663 dentry = fhp->fh_dentry; 664 inode = dentry->d_inode; 665 666 /* Disallow write access to files with the append-only bit set 667 * or any access when mandatory locking enabled 668 */ 669 err = nfserr_perm; 670 if (IS_APPEND(inode) && (access & MAY_WRITE)) 671 goto out; 672 if (IS_ISMNDLK(inode)) 673 goto out; 674 675 if (!inode->i_fop) 676 goto out; 677 678 /* 679 * Check to see if there are any leases on this file. 680 * This may block while leases are broken. 681 */ 682 host_err = break_lease(inode, O_NONBLOCK | ((access & MAY_WRITE) ? FMODE_WRITE : 0)); 683 if (host_err == -EWOULDBLOCK) 684 host_err = -ETIMEDOUT; 685 if (host_err) /* NOMEM or WOULDBLOCK */ 686 goto out_nfserr; 687 688 if (access & MAY_WRITE) { 689 if (access & MAY_READ) 690 flags = O_RDWR|O_LARGEFILE; 691 else 692 flags = O_WRONLY|O_LARGEFILE; 693 694 DQUOT_INIT(inode); 695 } 696 *filp = dentry_open(dget(dentry), mntget(fhp->fh_export->ex_mnt), flags); 697 if (IS_ERR(*filp)) 698 host_err = PTR_ERR(*filp); 699out_nfserr: 700 err = nfserrno(host_err); 701out: 702 return err; 703} 704 705/* 706 * Close a file. 707 */ 708void 709nfsd_close(struct file *filp) 710{ 711 fput(filp); 712} 713 714/* 715 * Sync a file 716 * As this calls fsync (not fdatasync) there is no need for a write_inode 717 * after it. 718 */ 719static inline int nfsd_dosync(struct file *filp, struct dentry *dp, 720 const struct file_operations *fop) 721{ 722 struct inode *inode = dp->d_inode; 723 int (*fsync) (struct file *, struct dentry *, int); 724 int err; 725 726 err = filemap_fdatawrite(inode->i_mapping); 727 if (err == 0 && fop && (fsync = fop->fsync)) 728 err = fsync(filp, dp, 0); 729 if (err == 0) 730 err = filemap_fdatawait(inode->i_mapping); 731 732 return err; 733} 734 735 736static int 737nfsd_sync(struct file *filp) 738{ 739 int err; 740 struct inode *inode = filp->f_path.dentry->d_inode; 741 dprintk("nfsd: sync file %s\n", filp->f_path.dentry->d_name.name); 742 mutex_lock(&inode->i_mutex); 743 err=nfsd_dosync(filp, filp->f_path.dentry, filp->f_op); 744 mutex_unlock(&inode->i_mutex); 745 746 return err; 747} 748 749int 750nfsd_sync_dir(struct dentry *dp) 751{ 752 return nfsd_dosync(NULL, dp, dp->d_inode->i_fop); 753} 754 755/* 756 * Obtain the readahead parameters for the file 757 * specified by (dev, ino). 758 */ 759 760static inline struct raparms * 761nfsd_get_raparms(dev_t dev, ino_t ino) 762{ 763 struct raparms *ra, **rap, **frap = NULL; 764 int depth = 0; 765 unsigned int hash; 766 struct raparm_hbucket *rab; 767 768 hash = jhash_2words(dev, ino, 0xfeedbeef) & RAPARM_HASH_MASK; 769 rab = &raparm_hash[hash]; 770 771 spin_lock(&rab->pb_lock); 772 for (rap = &rab->pb_head; (ra = *rap); rap = &ra->p_next) { 773 if (ra->p_ino == ino && ra->p_dev == dev) 774 goto found; 775 depth++; 776 if (ra->p_count == 0) 777 frap = rap; 778 } 779 depth = nfsdstats.ra_size*11/10; 780 if (!frap) { 781 spin_unlock(&rab->pb_lock); 782 return NULL; 783 } 784 rap = frap; 785 ra = *frap; 786 ra->p_dev = dev; 787 ra->p_ino = ino; 788 ra->p_set = 0; 789 ra->p_hindex = hash; 790found: 791 if (rap != &rab->pb_head) { 792 *rap = ra->p_next; 793 ra->p_next = rab->pb_head; 794 rab->pb_head = ra; 795 } 796 ra->p_count++; 797 nfsdstats.ra_depth[depth*10/nfsdstats.ra_size]++; 798 spin_unlock(&rab->pb_lock); 799 return ra; 800} 801 802/* 803 * Grab and keep cached pages assosiated with a file in the svc_rqst 804 * so that they can be passed to the netowork sendmsg/sendpage routines 805 * directrly. They will be released after the sending has completed. 806 */ 807static int 808nfsd_read_actor(read_descriptor_t *desc, struct page *page, unsigned long offset , unsigned long size) 809{ 810 unsigned long count = desc->count; 811 struct svc_rqst *rqstp = desc->arg.data; 812 struct page **pp = rqstp->rq_respages + rqstp->rq_resused; 813 814 if (size > count) 815 size = count; 816 817 if (rqstp->rq_res.page_len == 0) { 818 get_page(page); 819 put_page(*pp); 820 *pp = page; 821 rqstp->rq_resused++; 822 rqstp->rq_res.page_base = offset; 823 rqstp->rq_res.page_len = size; 824 } else if (page != pp[-1]) { 825 get_page(page); 826 if (*pp) 827 put_page(*pp); 828 *pp = page; 829 rqstp->rq_resused++; 830 rqstp->rq_res.page_len += size; 831 } else 832 rqstp->rq_res.page_len += size; 833 834 desc->count = count - size; 835 desc->written += size; 836 return size; 837} 838 839static __be32 840nfsd_vfs_read(struct svc_rqst *rqstp, struct svc_fh *fhp, struct file *file, 841 loff_t offset, struct kvec *vec, int vlen, unsigned long *count) 842{ 843 struct inode *inode; 844 struct raparms *ra; 845 mm_segment_t oldfs; 846 __be32 err; 847 int host_err; 848 849 err = nfserr_perm; 850 inode = file->f_path.dentry->d_inode; 851#ifdef MSNFS 852 if ((fhp->fh_export->ex_flags & NFSEXP_MSNFS) && 853 (!lock_may_read(inode, offset, *count))) 854 goto out; 855#endif 856 857 /* Get readahead parameters */ 858 ra = nfsd_get_raparms(inode->i_sb->s_dev, inode->i_ino); 859 860 if (ra && ra->p_set) 861 file->f_ra = ra->p_ra; 862 863 if (file->f_op->sendfile && rqstp->rq_sendfile_ok) { 864 rqstp->rq_resused = 1; 865 host_err = file->f_op->sendfile(file, &offset, *count, 866 nfsd_read_actor, rqstp); 867 } else { 868 oldfs = get_fs(); 869 set_fs(KERNEL_DS); 870 host_err = vfs_readv(file, (struct iovec __user *)vec, vlen, &offset); 871 set_fs(oldfs); 872 } 873 874 /* Write back readahead params */ 875 if (ra) { 876 struct raparm_hbucket *rab = &raparm_hash[ra->p_hindex]; 877 spin_lock(&rab->pb_lock); 878 ra->p_ra = file->f_ra; 879 ra->p_set = 1; 880 ra->p_count--; 881 spin_unlock(&rab->pb_lock); 882 } 883 884 if (host_err >= 0) { 885 nfsdstats.io_read += host_err; 886 *count = host_err; 887 err = 0; 888 fsnotify_access(file->f_path.dentry); 889 } else 890 err = nfserrno(host_err); 891out: 892 return err; 893} 894 895static void kill_suid(struct dentry *dentry) 896{ 897 struct iattr ia; 898 ia.ia_valid = ATTR_KILL_SUID | ATTR_KILL_SGID; 899 900 mutex_lock(&dentry->d_inode->i_mutex); 901 notify_change(dentry, &ia); 902 mutex_unlock(&dentry->d_inode->i_mutex); 903} 904 905static __be32 906nfsd_vfs_write(struct svc_rqst *rqstp, struct svc_fh *fhp, struct file *file, 907 loff_t offset, struct kvec *vec, int vlen, 908 unsigned long cnt, int *stablep) 909{ 910 struct svc_export *exp; 911 struct dentry *dentry; 912 struct inode *inode; 913 mm_segment_t oldfs; 914 __be32 err = 0; 915 int host_err; 916 int stable = *stablep; 917 918#ifdef MSNFS 919 err = nfserr_perm; 920 921 if ((fhp->fh_export->ex_flags & NFSEXP_MSNFS) && 922 (!lock_may_write(file->f_path.dentry->d_inode, offset, cnt))) 923 goto out; 924#endif 925 926 dentry = file->f_path.dentry; 927 inode = dentry->d_inode; 928 exp = fhp->fh_export; 929 930 /* 931 * Request sync writes if 932 * - the sync export option has been set, or 933 * - the client requested O_SYNC behavior (NFSv3 feature). 934 * - The file system doesn't support fsync(). 935 * When gathered writes have been configured for this volume, 936 * flushing the data to disk is handled separately below. 937 */ 938 939 if (file->f_op->fsync == 0) {/* COMMIT3 cannot work */ 940 stable = 2; 941 *stablep = 2; /* FILE_SYNC */ 942 } 943 944 if (!EX_ISSYNC(exp)) 945 stable = 0; 946 if (stable && !EX_WGATHER(exp)) 947 file->f_flags |= O_SYNC; 948 949 /* Write the data. */ 950 oldfs = get_fs(); set_fs(KERNEL_DS); 951 host_err = vfs_writev(file, (struct iovec __user *)vec, vlen, &offset); 952 set_fs(oldfs); 953 if (host_err >= 0) { 954 nfsdstats.io_write += cnt; 955 fsnotify_modify(file->f_path.dentry); 956 } 957 958 /* clear setuid/setgid flag after write */ 959 if (host_err >= 0 && (inode->i_mode & (S_ISUID | S_ISGID))) 960 kill_suid(dentry); 961 962 if (host_err >= 0 && stable) { 963 static ino_t last_ino; 964 static dev_t last_dev; 965 966 /* 967 * Gathered writes: If another process is currently 968 * writing to the file, there's a high chance 969 * this is another nfsd (triggered by a bulk write 970 * from a client's biod). Rather than syncing the 971 * file with each write request, we sleep for 10 msec. 972 * 973 * I don't know if this roughly approximates 974 * C. Juszak's idea of gathered writes, but it's a 975 * nice and simple solution (IMHO), and it seems to 976 * work:-) 977 */ 978 if (EX_WGATHER(exp)) { 979 if (atomic_read(&inode->i_writecount) > 1 980 || (last_ino == inode->i_ino && last_dev == inode->i_sb->s_dev)) { 981 dprintk("nfsd: write defer %d\n", current->pid); 982 msleep(10); 983 dprintk("nfsd: write resume %d\n", current->pid); 984 } 985 986 if (inode->i_state & I_DIRTY) { 987 dprintk("nfsd: write sync %d\n", current->pid); 988 host_err=nfsd_sync(file); 989 } 990 } 991 last_ino = inode->i_ino; 992 last_dev = inode->i_sb->s_dev; 993 } 994 995 dprintk("nfsd: write complete host_err=%d\n", host_err); 996 if (host_err >= 0) 997 err = 0; 998 else 999 err = nfserrno(host_err); 1000out: 1001 return err; 1002} 1003 1004/* 1005 * Read data from a file. count must contain the requested read count 1006 * on entry. On return, *count contains the number of bytes actually read. 1007 * N.B. After this call fhp needs an fh_put 1008 */ 1009__be32 1010nfsd_read(struct svc_rqst *rqstp, struct svc_fh *fhp, struct file *file, 1011 loff_t offset, struct kvec *vec, int vlen, 1012 unsigned long *count) 1013{ 1014 __be32 err; 1015 1016 if (file) { 1017 err = nfsd_permission(fhp->fh_export, fhp->fh_dentry, 1018 MAY_READ|MAY_OWNER_OVERRIDE); 1019 if (err) 1020 goto out; 1021 err = nfsd_vfs_read(rqstp, fhp, file, offset, vec, vlen, count); 1022 } else { 1023 err = nfsd_open(rqstp, fhp, S_IFREG, MAY_READ, &file); 1024 if (err) 1025 goto out; 1026 err = nfsd_vfs_read(rqstp, fhp, file, offset, vec, vlen, count); 1027 nfsd_close(file); 1028 } 1029out: 1030 return err; 1031} 1032 1033/* 1034 * Write data to a file. 1035 * The stable flag requests synchronous writes. 1036 * N.B. After this call fhp needs an fh_put 1037 */ 1038__be32 1039nfsd_write(struct svc_rqst *rqstp, struct svc_fh *fhp, struct file *file, 1040 loff_t offset, struct kvec *vec, int vlen, unsigned long cnt, 1041 int *stablep) 1042{ 1043 __be32 err = 0; 1044 1045 if (file) { 1046 err = nfsd_permission(fhp->fh_export, fhp->fh_dentry, 1047 MAY_WRITE|MAY_OWNER_OVERRIDE); 1048 if (err) 1049 goto out; 1050 err = nfsd_vfs_write(rqstp, fhp, file, offset, vec, vlen, cnt, 1051 stablep); 1052 } else { 1053 err = nfsd_open(rqstp, fhp, S_IFREG, MAY_WRITE, &file); 1054 if (err) 1055 goto out; 1056 1057 if (cnt) 1058 err = nfsd_vfs_write(rqstp, fhp, file, offset, vec, vlen, 1059 cnt, stablep); 1060 nfsd_close(file); 1061 } 1062out: 1063 return err; 1064} 1065 1066#ifdef CONFIG_NFSD_V3 1067/* 1068 * Commit all pending writes to stable storage. 1069 * Strictly speaking, we could sync just the indicated file region here, 1070 * but there's currently no way we can ask the VFS to do so. 1071 * 1072 * Unfortunately we cannot lock the file to make sure we return full WCC 1073 * data to the client, as locking happens lower down in the filesystem. 1074 */ 1075__be32 1076nfsd_commit(struct svc_rqst *rqstp, struct svc_fh *fhp, 1077 loff_t offset, unsigned long count) 1078{ 1079 struct file *file; 1080 __be32 err; 1081 1082 if ((u64)count > ~(u64)offset) 1083 return nfserr_inval; 1084 1085 if ((err = nfsd_open(rqstp, fhp, S_IFREG, MAY_WRITE, &file)) != 0) 1086 return err; 1087 if (EX_ISSYNC(fhp->fh_export)) { 1088 if (file->f_op && file->f_op->fsync) { 1089 err = nfserrno(nfsd_sync(file)); 1090 } else { 1091 err = nfserr_notsupp; 1092 } 1093 } 1094 1095 nfsd_close(file); 1096 return err; 1097} 1098#endif /* CONFIG_NFSD_V3 */ 1099 1100/* 1101 * Create a file (regular, directory, device, fifo); UNIX sockets 1102 * not yet implemented. 1103 * If the response fh has been verified, the parent directory should 1104 * already be locked. Note that the parent directory is left locked. 1105 * 1106 * N.B. Every call to nfsd_create needs an fh_put for _both_ fhp and resfhp 1107 */ 1108__be32 1109nfsd_create(struct svc_rqst *rqstp, struct svc_fh *fhp, 1110 char *fname, int flen, struct iattr *iap, 1111 int type, dev_t rdev, struct svc_fh *resfhp) 1112{ 1113 struct dentry *dentry, *dchild = NULL; 1114 struct inode *dirp; 1115 __be32 err; 1116 int host_err; 1117 1118 err = nfserr_perm; 1119 if (!flen) 1120 goto out; 1121 err = nfserr_exist; 1122 if (isdotent(fname, flen)) 1123 goto out; 1124 1125 err = fh_verify(rqstp, fhp, S_IFDIR, MAY_CREATE); 1126 if (err) 1127 goto out; 1128 1129 dentry = fhp->fh_dentry; 1130 dirp = dentry->d_inode; 1131 1132 err = nfserr_notdir; 1133 if(!dirp->i_op || !dirp->i_op->lookup) 1134 goto out; 1135 /* 1136 * Check whether the response file handle has been verified yet. 1137 * If it has, the parent directory should already be locked. 1138 */ 1139 if (!resfhp->fh_dentry) { 1140 /* called from nfsd_proc_mkdir, or possibly nfsd3_proc_create */ 1141 fh_lock_nested(fhp, I_MUTEX_PARENT); 1142 dchild = lookup_one_len(fname, dentry, flen); 1143 host_err = PTR_ERR(dchild); 1144 if (IS_ERR(dchild)) 1145 goto out_nfserr; 1146 err = fh_compose(resfhp, fhp->fh_export, dchild, fhp); 1147 if (err) 1148 goto out; 1149 } else { 1150 /* called from nfsd_proc_create */ 1151 dchild = dget(resfhp->fh_dentry); 1152 if (!fhp->fh_locked) { 1153 /* not actually possible */ 1154 printk(KERN_ERR 1155 "nfsd_create: parent %s/%s not locked!\n", 1156 dentry->d_parent->d_name.name, 1157 dentry->d_name.name); 1158 err = nfserr_io; 1159 goto out; 1160 } 1161 } 1162 /* 1163 * Make sure the child dentry is still negative ... 1164 */ 1165 err = nfserr_exist; 1166 if (dchild->d_inode) { 1167 dprintk("nfsd_create: dentry %s/%s not negative!\n", 1168 dentry->d_name.name, dchild->d_name.name); 1169 goto out; 1170 } 1171 1172 if (!(iap->ia_valid & ATTR_MODE)) 1173 iap->ia_mode = 0; 1174 iap->ia_mode = (iap->ia_mode & S_IALLUGO) | type; 1175 1176 /* 1177 * Get the dir op function pointer. 1178 */ 1179 err = 0; 1180 switch (type) { 1181 case S_IFREG: 1182 host_err = vfs_create(dirp, dchild, iap->ia_mode, NULL); 1183 break; 1184 case S_IFDIR: 1185 host_err = vfs_mkdir(dirp, dchild, iap->ia_mode); 1186 break; 1187 case S_IFCHR: 1188 case S_IFBLK: 1189 case S_IFIFO: 1190 case S_IFSOCK: 1191 host_err = vfs_mknod(dirp, dchild, iap->ia_mode, rdev); 1192 break; 1193 default: 1194 printk("nfsd: bad file type %o in nfsd_create\n", type); 1195 host_err = -EINVAL; 1196 } 1197 if (host_err < 0) 1198 goto out_nfserr; 1199 1200 if (EX_ISSYNC(fhp->fh_export)) { 1201 err = nfserrno(nfsd_sync_dir(dentry)); 1202 write_inode_now(dchild->d_inode, 1); 1203 } 1204 1205 1206 /* Set file attributes. Mode has already been set and 1207 * setting uid/gid works only for root. Irix appears to 1208 * send along the gid when it tries to implement setgid 1209 * directories via NFS. 1210 */ 1211 if ((iap->ia_valid &= ~(ATTR_UID|ATTR_GID|ATTR_MODE)) != 0) { 1212 __be32 err2 = nfsd_setattr(rqstp, resfhp, iap, 0, (time_t)0); 1213 if (err2) 1214 err = err2; 1215 } 1216 /* 1217 * Update the file handle to get the new inode info. 1218 */ 1219 if (!err) 1220 err = fh_update(resfhp); 1221out: 1222 if (dchild && !IS_ERR(dchild)) 1223 dput(dchild); 1224 return err; 1225 1226out_nfserr: 1227 err = nfserrno(host_err); 1228 goto out; 1229} 1230 1231#ifdef CONFIG_NFSD_V3 1232/* 1233 * NFSv3 version of nfsd_create 1234 */ 1235__be32 1236nfsd_create_v3(struct svc_rqst *rqstp, struct svc_fh *fhp, 1237 char *fname, int flen, struct iattr *iap, 1238 struct svc_fh *resfhp, int createmode, u32 *verifier, 1239 int *truncp, int *created) 1240{ 1241 struct dentry *dentry, *dchild = NULL; 1242 struct inode *dirp; 1243 __be32 err; 1244 int host_err; 1245 __u32 v_mtime=0, v_atime=0; 1246 1247 err = nfserr_perm; 1248 if (!flen) 1249 goto out; 1250 err = nfserr_exist; 1251 if (isdotent(fname, flen)) 1252 goto out; 1253 if (!(iap->ia_valid & ATTR_MODE)) 1254 iap->ia_mode = 0; 1255 err = fh_verify(rqstp, fhp, S_IFDIR, MAY_CREATE); 1256 if (err) 1257 goto out; 1258 1259 dentry = fhp->fh_dentry; 1260 dirp = dentry->d_inode; 1261 1262 /* Get all the sanity checks out of the way before 1263 * we lock the parent. */ 1264 err = nfserr_notdir; 1265 if(!dirp->i_op || !dirp->i_op->lookup) 1266 goto out; 1267 fh_lock_nested(fhp, I_MUTEX_PARENT); 1268 1269 /* 1270 * Compose the response file handle. 1271 */ 1272 dchild = lookup_one_len(fname, dentry, flen); 1273 host_err = PTR_ERR(dchild); 1274 if (IS_ERR(dchild)) 1275 goto out_nfserr; 1276 1277 err = fh_compose(resfhp, fhp->fh_export, dchild, fhp); 1278 if (err) 1279 goto out; 1280 1281 if (createmode == NFS3_CREATE_EXCLUSIVE) { 1282 /* solaris7 gets confused (bugid 4218508) if these have 1283 * the high bit set, so just clear the high bits. 1284 */ 1285 v_mtime = verifier[0]&0x7fffffff; 1286 v_atime = verifier[1]&0x7fffffff; 1287 } 1288 1289 if (dchild->d_inode) { 1290 err = 0; 1291 1292 switch (createmode) { 1293 case NFS3_CREATE_UNCHECKED: 1294 if (! S_ISREG(dchild->d_inode->i_mode)) 1295 err = nfserr_exist; 1296 else if (truncp) { 1297 /* in nfsv4, we need to treat this case a little 1298 * differently. we don't want to truncate the 1299 * file now; this would be wrong if the OPEN 1300 * fails for some other reason. furthermore, 1301 * if the size is nonzero, we should ignore it 1302 * according to spec! 1303 */ 1304 *truncp = (iap->ia_valid & ATTR_SIZE) && !iap->ia_size; 1305 } 1306 else { 1307 iap->ia_valid &= ATTR_SIZE; 1308 goto set_attr; 1309 } 1310 break; 1311 case NFS3_CREATE_EXCLUSIVE: 1312 if ( dchild->d_inode->i_mtime.tv_sec == v_mtime 1313 && dchild->d_inode->i_atime.tv_sec == v_atime 1314 && dchild->d_inode->i_size == 0 ) 1315 break; 1316 /* fallthru */ 1317 case NFS3_CREATE_GUARDED: 1318 err = nfserr_exist; 1319 } 1320 goto out; 1321 } 1322 1323 host_err = vfs_create(dirp, dchild, iap->ia_mode, NULL); 1324 if (host_err < 0) 1325 goto out_nfserr; 1326 if (created) 1327 *created = 1; 1328 1329 if (EX_ISSYNC(fhp->fh_export)) { 1330 err = nfserrno(nfsd_sync_dir(dentry)); 1331 /* setattr will sync the child (or not) */ 1332 } 1333 1334 if (createmode == NFS3_CREATE_EXCLUSIVE) { 1335 /* Cram the verifier into atime/mtime */ 1336 iap->ia_valid = ATTR_MTIME|ATTR_ATIME 1337 | ATTR_MTIME_SET|ATTR_ATIME_SET; 1338 iap->ia_mtime.tv_sec = v_mtime; 1339 iap->ia_atime.tv_sec = v_atime; 1340 iap->ia_mtime.tv_nsec = 0; 1341 iap->ia_atime.tv_nsec = 0; 1342 } 1343 1344 /* Set file attributes. 1345 * Irix appears to send along the gid when it tries to 1346 * implement setgid directories via NFS. Clear out all that cruft. 1347 */ 1348 set_attr: 1349 if ((iap->ia_valid &= ~(ATTR_UID|ATTR_GID|ATTR_MODE)) != 0) { 1350 __be32 err2 = nfsd_setattr(rqstp, resfhp, iap, 0, (time_t)0); 1351 if (err2) 1352 err = err2; 1353 } 1354 1355 /* 1356 * Update the filehandle to get the new inode info. 1357 */ 1358 if (!err) 1359 err = fh_update(resfhp); 1360 1361 out: 1362 fh_unlock(fhp); 1363 if (dchild && !IS_ERR(dchild)) 1364 dput(dchild); 1365 return err; 1366 1367 out_nfserr: 1368 err = nfserrno(host_err); 1369 goto out; 1370} 1371#endif /* CONFIG_NFSD_V3 */ 1372 1373/* 1374 * Read a symlink. On entry, *lenp must contain the maximum path length that 1375 * fits into the buffer. On return, it contains the true length. 1376 * N.B. After this call fhp needs an fh_put 1377 */ 1378__be32 1379nfsd_readlink(struct svc_rqst *rqstp, struct svc_fh *fhp, char *buf, int *lenp) 1380{ 1381 struct dentry *dentry; 1382 struct inode *inode; 1383 mm_segment_t oldfs; 1384 __be32 err; 1385 int host_err; 1386 1387 err = fh_verify(rqstp, fhp, S_IFLNK, MAY_NOP); 1388 if (err) 1389 goto out; 1390 1391 dentry = fhp->fh_dentry; 1392 inode = dentry->d_inode; 1393 1394 err = nfserr_inval; 1395 if (!inode->i_op || !inode->i_op->readlink) 1396 goto out; 1397 1398 touch_atime(fhp->fh_export->ex_mnt, dentry); 1399 /* N.B. Why does this call need a get_fs()?? 1400 * Remove the set_fs and watch the fireworks:-) --okir 1401 */ 1402 1403 oldfs = get_fs(); set_fs(KERNEL_DS); 1404 host_err = inode->i_op->readlink(dentry, buf, *lenp); 1405 set_fs(oldfs); 1406 1407 if (host_err < 0) 1408 goto out_nfserr; 1409 *lenp = host_err; 1410 err = 0; 1411out: 1412 return err; 1413 1414out_nfserr: 1415 err = nfserrno(host_err); 1416 goto out; 1417} 1418 1419/* 1420 * Create a symlink and look up its inode 1421 * N.B. After this call _both_ fhp and resfhp need an fh_put 1422 */ 1423__be32 1424nfsd_symlink(struct svc_rqst *rqstp, struct svc_fh *fhp, 1425 char *fname, int flen, 1426 char *path, int plen, 1427 struct svc_fh *resfhp, 1428 struct iattr *iap) 1429{ 1430 struct dentry *dentry, *dnew; 1431 __be32 err, cerr; 1432 int host_err; 1433 umode_t mode; 1434 1435 err = nfserr_noent; 1436 if (!flen || !plen) 1437 goto out; 1438 err = nfserr_exist; 1439 if (isdotent(fname, flen)) 1440 goto out; 1441 1442 err = fh_verify(rqstp, fhp, S_IFDIR, MAY_CREATE); 1443 if (err) 1444 goto out; 1445 fh_lock(fhp); 1446 dentry = fhp->fh_dentry; 1447 dnew = lookup_one_len(fname, dentry, flen); 1448 host_err = PTR_ERR(dnew); 1449 if (IS_ERR(dnew)) 1450 goto out_nfserr; 1451 1452 mode = S_IALLUGO; 1453 /* Only the MODE ATTRibute is even vaguely meaningful */ 1454 if (iap && (iap->ia_valid & ATTR_MODE)) 1455 mode = iap->ia_mode & S_IALLUGO; 1456 1457 if (unlikely(path[plen] != 0)) { 1458 char *path_alloced = kmalloc(plen+1, GFP_KERNEL); 1459 if (path_alloced == NULL) 1460 host_err = -ENOMEM; 1461 else { 1462 strncpy(path_alloced, path, plen); 1463 path_alloced[plen] = 0; 1464 host_err = vfs_symlink(dentry->d_inode, dnew, path_alloced, mode); 1465 kfree(path_alloced); 1466 } 1467 } else 1468 host_err = vfs_symlink(dentry->d_inode, dnew, path, mode); 1469 1470 if (!host_err) { 1471 if (EX_ISSYNC(fhp->fh_export)) 1472 host_err = nfsd_sync_dir(dentry); 1473 } 1474 err = nfserrno(host_err); 1475 fh_unlock(fhp); 1476 1477 cerr = fh_compose(resfhp, fhp->fh_export, dnew, fhp); 1478 dput(dnew); 1479 if (err==0) err = cerr; 1480out: 1481 return err; 1482 1483out_nfserr: 1484 err = nfserrno(host_err); 1485 goto out; 1486} 1487 1488/* 1489 * Create a hardlink 1490 * N.B. After this call _both_ ffhp and tfhp need an fh_put 1491 */ 1492__be32 1493nfsd_link(struct svc_rqst *rqstp, struct svc_fh *ffhp, 1494 char *name, int len, struct svc_fh *tfhp) 1495{ 1496 struct dentry *ddir, *dnew, *dold; 1497 struct inode *dirp, *dest; 1498 __be32 err; 1499 int host_err; 1500 1501 err = fh_verify(rqstp, ffhp, S_IFDIR, MAY_CREATE); 1502 if (err) 1503 goto out; 1504 err = fh_verify(rqstp, tfhp, -S_IFDIR, MAY_NOP); 1505 if (err) 1506 goto out; 1507 1508 err = nfserr_perm; 1509 if (!len) 1510 goto out; 1511 err = nfserr_exist; 1512 if (isdotent(name, len)) 1513 goto out; 1514 1515 fh_lock_nested(ffhp, I_MUTEX_PARENT); 1516 ddir = ffhp->fh_dentry; 1517 dirp = ddir->d_inode; 1518 1519 dnew = lookup_one_len(name, ddir, len); 1520 host_err = PTR_ERR(dnew); 1521 if (IS_ERR(dnew)) 1522 goto out_nfserr; 1523 1524 dold = tfhp->fh_dentry; 1525 dest = dold->d_inode; 1526 1527 host_err = vfs_link(dold, dirp, dnew); 1528 if (!host_err) { 1529 if (EX_ISSYNC(ffhp->fh_export)) { 1530 err = nfserrno(nfsd_sync_dir(ddir)); 1531 write_inode_now(dest, 1); 1532 } 1533 err = 0; 1534 } else { 1535 if (host_err == -EXDEV && rqstp->rq_vers == 2) 1536 err = nfserr_acces; 1537 else 1538 err = nfserrno(host_err); 1539 } 1540 1541 dput(dnew); 1542out_unlock: 1543 fh_unlock(ffhp); 1544out: 1545 return err; 1546 1547out_nfserr: 1548 err = nfserrno(host_err); 1549 goto out_unlock; 1550} 1551 1552/* 1553 * Rename a file 1554 * N.B. After this call _both_ ffhp and tfhp need an fh_put 1555 */ 1556__be32 1557nfsd_rename(struct svc_rqst *rqstp, struct svc_fh *ffhp, char *fname, int flen, 1558 struct svc_fh *tfhp, char *tname, int tlen) 1559{ 1560 struct dentry *fdentry, *tdentry, *odentry, *ndentry, *trap; 1561 struct inode *fdir, *tdir; 1562 __be32 err; 1563 int host_err; 1564 1565 err = fh_verify(rqstp, ffhp, S_IFDIR, MAY_REMOVE); 1566 if (err) 1567 goto out; 1568 err = fh_verify(rqstp, tfhp, S_IFDIR, MAY_CREATE); 1569 if (err) 1570 goto out; 1571 1572 fdentry = ffhp->fh_dentry; 1573 fdir = fdentry->d_inode; 1574 1575 tdentry = tfhp->fh_dentry; 1576 tdir = tdentry->d_inode; 1577 1578 err = (rqstp->rq_vers == 2) ? nfserr_acces : nfserr_xdev; 1579 if (ffhp->fh_export != tfhp->fh_export) 1580 goto out; 1581 1582 err = nfserr_perm; 1583 if (!flen || isdotent(fname, flen) || !tlen || isdotent(tname, tlen)) 1584 goto out; 1585 1586 /* cannot use fh_lock as we need deadlock protective ordering 1587 * so do it by hand */ 1588 trap = lock_rename(tdentry, fdentry); 1589 ffhp->fh_locked = tfhp->fh_locked = 1; 1590 fill_pre_wcc(ffhp); 1591 fill_pre_wcc(tfhp); 1592 1593 odentry = lookup_one_len(fname, fdentry, flen); 1594 host_err = PTR_ERR(odentry); 1595 if (IS_ERR(odentry)) 1596 goto out_nfserr; 1597 1598 host_err = -ENOENT; 1599 if (!odentry->d_inode) 1600 goto out_dput_old; 1601 host_err = -EINVAL; 1602 if (odentry == trap) 1603 goto out_dput_old; 1604 1605 ndentry = lookup_one_len(tname, tdentry, tlen); 1606 host_err = PTR_ERR(ndentry); 1607 if (IS_ERR(ndentry)) 1608 goto out_dput_old; 1609 host_err = -ENOTEMPTY; 1610 if (ndentry == trap) 1611 goto out_dput_new; 1612 1613#ifdef MSNFS 1614 if ((ffhp->fh_export->ex_flags & NFSEXP_MSNFS) && 1615 ((atomic_read(&odentry->d_count) > 1) 1616 || (atomic_read(&ndentry->d_count) > 1))) { 1617 host_err = -EPERM; 1618 } else 1619#endif 1620 host_err = vfs_rename(fdir, odentry, tdir, ndentry); 1621 if (!host_err && EX_ISSYNC(tfhp->fh_export)) { 1622 host_err = nfsd_sync_dir(tdentry); 1623 if (!host_err) 1624 host_err = nfsd_sync_dir(fdentry); 1625 } 1626 1627 out_dput_new: 1628 dput(ndentry); 1629 out_dput_old: 1630 dput(odentry); 1631 out_nfserr: 1632 err = nfserrno(host_err); 1633 1634 /* we cannot reply on fh_unlock on the two filehandles, 1635 * as that would do the wrong thing if the two directories 1636 * were the same, so again we do it by hand 1637 */ 1638 fill_post_wcc(ffhp); 1639 fill_post_wcc(tfhp); 1640 unlock_rename(tdentry, fdentry); 1641 ffhp->fh_locked = tfhp->fh_locked = 0; 1642 1643out: 1644 return err; 1645} 1646 1647/* 1648 * Unlink a file or directory 1649 * N.B. After this call fhp needs an fh_put 1650 */ 1651__be32 1652nfsd_unlink(struct svc_rqst *rqstp, struct svc_fh *fhp, int type, 1653 char *fname, int flen) 1654{ 1655 struct dentry *dentry, *rdentry; 1656 struct inode *dirp; 1657 __be32 err; 1658 int host_err; 1659 1660 err = nfserr_acces; 1661 if (!flen || isdotent(fname, flen)) 1662 goto out; 1663 err = fh_verify(rqstp, fhp, S_IFDIR, MAY_REMOVE); 1664 if (err) 1665 goto out; 1666 1667 fh_lock_nested(fhp, I_MUTEX_PARENT); 1668 dentry = fhp->fh_dentry; 1669 dirp = dentry->d_inode; 1670 1671 rdentry = lookup_one_len(fname, dentry, flen); 1672 host_err = PTR_ERR(rdentry); 1673 if (IS_ERR(rdentry)) 1674 goto out_nfserr; 1675 1676 if (!rdentry->d_inode) { 1677 dput(rdentry); 1678 err = nfserr_noent; 1679 goto out; 1680 } 1681 1682 if (!type) 1683 type = rdentry->d_inode->i_mode & S_IFMT; 1684 1685 if (type != S_IFDIR) { /* It's UNLINK */ 1686#ifdef MSNFS 1687 if ((fhp->fh_export->ex_flags & NFSEXP_MSNFS) && 1688 (atomic_read(&rdentry->d_count) > 1)) { 1689 host_err = -EPERM; 1690 } else 1691#endif 1692 host_err = vfs_unlink(dirp, rdentry); 1693 } else { /* It's RMDIR */ 1694 host_err = vfs_rmdir(dirp, rdentry); 1695 } 1696 1697 dput(rdentry); 1698 1699 if (host_err) 1700 goto out_nfserr; 1701 if (EX_ISSYNC(fhp->fh_export)) 1702 host_err = nfsd_sync_dir(dentry); 1703 1704out_nfserr: 1705 err = nfserrno(host_err); 1706out: 1707 return err; 1708} 1709 1710/* 1711 * Read entries from a directory. 1712 * The NFSv3/4 verifier we ignore for now. 1713 */ 1714__be32 1715nfsd_readdir(struct svc_rqst *rqstp, struct svc_fh *fhp, loff_t *offsetp, 1716 struct readdir_cd *cdp, filldir_t func) 1717{ 1718 __be32 err; 1719 int host_err; 1720 struct file *file; 1721 loff_t offset = *offsetp; 1722 1723 err = nfsd_open(rqstp, fhp, S_IFDIR, MAY_READ, &file); 1724 if (err) 1725 goto out; 1726 1727 offset = vfs_llseek(file, offset, 0); 1728 if (offset < 0) { 1729 err = nfserrno((int)offset); 1730 goto out_close; 1731 } 1732 1733 /* 1734 * Read the directory entries. This silly loop is necessary because 1735 * readdir() is not guaranteed to fill up the entire buffer, but 1736 * may choose to do less. 1737 */ 1738 1739 do { 1740 cdp->err = nfserr_eof; /* will be cleared on successful read */ 1741 host_err = vfs_readdir(file, func, cdp); 1742 } while (host_err >=0 && cdp->err == nfs_ok); 1743 if (host_err) 1744 err = nfserrno(host_err); 1745 else 1746 err = cdp->err; 1747 *offsetp = vfs_llseek(file, 0, 1); 1748 1749 if (err == nfserr_eof || err == nfserr_toosmall) 1750 err = nfs_ok; /* can still be found in ->err */ 1751out_close: 1752 nfsd_close(file); 1753out: 1754 return err; 1755} 1756 1757/* 1758 * Get file system stats 1759 * N.B. After this call fhp needs an fh_put 1760 */ 1761__be32 1762nfsd_statfs(struct svc_rqst *rqstp, struct svc_fh *fhp, struct kstatfs *stat) 1763{ 1764 __be32 err = fh_verify(rqstp, fhp, 0, MAY_NOP); 1765 if (!err && vfs_statfs(fhp->fh_dentry,stat)) 1766 err = nfserr_io; 1767 return err; 1768} 1769 1770/* 1771 * Check for a user's access permissions to this inode. 1772 */ 1773__be32 1774nfsd_permission(struct svc_export *exp, struct dentry *dentry, int acc) 1775{ 1776 struct inode *inode = dentry->d_inode; 1777 int err; 1778 1779 if (acc == MAY_NOP) 1780 return 0; 1781 1782 /* Normally we reject any write/sattr etc access on a read-only file 1783 * system. But if it is IRIX doing check on write-access for a 1784 * device special file, we ignore rofs. 1785 */ 1786 if (!(acc & MAY_LOCAL_ACCESS)) 1787 if (acc & (MAY_WRITE | MAY_SATTR | MAY_TRUNC)) { 1788 if (EX_RDONLY(exp) || IS_RDONLY(inode)) 1789 return nfserr_rofs; 1790 if (/* (acc & MAY_WRITE) && */ IS_IMMUTABLE(inode)) 1791 return nfserr_perm; 1792 } 1793 if ((acc & MAY_TRUNC) && IS_APPEND(inode)) 1794 return nfserr_perm; 1795 1796 if (acc & MAY_LOCK) { 1797 /* If we cannot rely on authentication in NLM requests, 1798 * just allow locks, otherwise require read permission, or 1799 * ownership 1800 */ 1801 if (exp->ex_flags & NFSEXP_NOAUTHNLM) 1802 return 0; 1803 else 1804 acc = MAY_READ | MAY_OWNER_OVERRIDE; 1805 } 1806 /* 1807 * The file owner always gets access permission for accesses that 1808 * would normally be checked at open time. This is to make 1809 * file access work even when the client has done a fchmod(fd, 0). 1810 * 1811 * However, `cp foo bar' should fail nevertheless when bar is 1812 * readonly. A sensible way to do this might be to reject all 1813 * attempts to truncate a read-only file, because a creat() call 1814 * always implies file truncation. 1815 * ... but this isn't really fair. A process may reasonably call 1816 * ftruncate on an open file descriptor on a file with perm 000. 1817 * We must trust the client to do permission checking - using "ACCESS" 1818 * with NFSv3. 1819 */ 1820 if ((acc & MAY_OWNER_OVERRIDE) && 1821 inode->i_uid == current->fsuid) 1822 return 0; 1823 1824 err = permission(inode, acc & (MAY_READ|MAY_WRITE|MAY_EXEC), NULL); 1825 1826 /* Allow read access to binaries even when mode 111 */ 1827 if (err == -EACCES && S_ISREG(inode->i_mode) && 1828 acc == (MAY_READ | MAY_OWNER_OVERRIDE)) 1829 err = permission(inode, MAY_EXEC, NULL); 1830 1831 return err? nfserrno(err) : 0; 1832} 1833 1834void 1835nfsd_racache_shutdown(void) 1836{ 1837 if (!raparml) 1838 return; 1839 dprintk("nfsd: freeing readahead buffers.\n"); 1840 kfree(raparml); 1841 raparml = NULL; 1842} 1843/* 1844 * Initialize readahead param cache 1845 */ 1846int 1847nfsd_racache_init(int cache_size) 1848{ 1849 int i; 1850 int j = 0; 1851 int nperbucket; 1852 1853 1854 if (raparml) 1855 return 0; 1856 if (cache_size < 2*RAPARM_HASH_SIZE) 1857 cache_size = 2*RAPARM_HASH_SIZE; 1858 raparml = kcalloc(cache_size, sizeof(struct raparms), GFP_KERNEL); 1859 1860 if (!raparml) { 1861 printk(KERN_WARNING 1862 "nfsd: Could not allocate memory read-ahead cache.\n"); 1863 return -ENOMEM; 1864 } 1865 1866 dprintk("nfsd: allocating %d readahead buffers.\n", cache_size); 1867 for (i = 0 ; i < RAPARM_HASH_SIZE ; i++) { 1868 raparm_hash[i].pb_head = NULL; 1869 spin_lock_init(&raparm_hash[i].pb_lock); 1870 } 1871 nperbucket = cache_size >> RAPARM_HASH_BITS; 1872 for (i = 0; i < cache_size - 1; i++) { 1873 if (i % nperbucket == 0) 1874 raparm_hash[j++].pb_head = raparml + i; 1875 if (i % nperbucket < nperbucket-1) 1876 raparml[i].p_next = raparml + i + 1; 1877 } 1878 1879 nfsdstats.ra_size = cache_size; 1880 return 0; 1881} 1882 1883#if defined(CONFIG_NFSD_V2_ACL) || defined(CONFIG_NFSD_V3_ACL) 1884struct posix_acl * 1885nfsd_get_posix_acl(struct svc_fh *fhp, int type) 1886{ 1887 struct inode *inode = fhp->fh_dentry->d_inode; 1888 char *name; 1889 void *value = NULL; 1890 ssize_t size; 1891 struct posix_acl *acl; 1892 1893 if (!IS_POSIXACL(inode)) 1894 return ERR_PTR(-EOPNOTSUPP); 1895 1896 switch (type) { 1897 case ACL_TYPE_ACCESS: 1898 name = POSIX_ACL_XATTR_ACCESS; 1899 break; 1900 case ACL_TYPE_DEFAULT: 1901 name = POSIX_ACL_XATTR_DEFAULT; 1902 break; 1903 default: 1904 return ERR_PTR(-EOPNOTSUPP); 1905 } 1906 1907 size = nfsd_getxattr(fhp->fh_dentry, name, &value); 1908 if (size < 0) 1909 return ERR_PTR(size); 1910 1911 acl = posix_acl_from_xattr(value, size); 1912 kfree(value); 1913 return acl; 1914} 1915 1916int 1917nfsd_set_posix_acl(struct svc_fh *fhp, int type, struct posix_acl *acl) 1918{ 1919 struct inode *inode = fhp->fh_dentry->d_inode; 1920 char *name; 1921 void *value = NULL; 1922 size_t size; 1923 int error; 1924 1925 if (!IS_POSIXACL(inode) || !inode->i_op || 1926 !inode->i_op->setxattr || !inode->i_op->removexattr) 1927 return -EOPNOTSUPP; 1928 switch(type) { 1929 case ACL_TYPE_ACCESS: 1930 name = POSIX_ACL_XATTR_ACCESS; 1931 break; 1932 case ACL_TYPE_DEFAULT: 1933 name = POSIX_ACL_XATTR_DEFAULT; 1934 break; 1935 default: 1936 return -EOPNOTSUPP; 1937 } 1938 1939 if (acl && acl->a_count) { 1940 size = posix_acl_xattr_size(acl->a_count); 1941 value = kmalloc(size, GFP_KERNEL); 1942 if (!value) 1943 return -ENOMEM; 1944 error = posix_acl_to_xattr(acl, value, size); 1945 if (error < 0) 1946 goto getout; 1947 size = error; 1948 } else 1949 size = 0; 1950 1951 if (size) 1952 error = vfs_setxattr(fhp->fh_dentry, name, value, size, 0); 1953 else { 1954 if (!S_ISDIR(inode->i_mode) && type == ACL_TYPE_DEFAULT) 1955 error = 0; 1956 else { 1957 error = vfs_removexattr(fhp->fh_dentry, name); 1958 if (error == -ENODATA) 1959 error = 0; 1960 } 1961 } 1962 1963getout: 1964 kfree(value); 1965 return error; 1966} 1967#endif /* defined(CONFIG_NFSD_V2_ACL) || defined(CONFIG_NFSD_V3_ACL) */ 1968