1#define MSNFS /* HACK HACK */ 2/* 3 * File operations used by nfsd. Some of these have been ripped from 4 * other parts of the kernel because they weren't exported, others 5 * are partial duplicates with added or changed functionality. 6 * 7 * Note that several functions dget() the dentry upon which they want 8 * to act, most notably those that create directory entries. Response 9 * dentry's are dput()'d if necessary in the release callback. 10 * So if you notice code paths that apparently fail to dput() the 11 * dentry, don't worry--they have been taken care of. 12 * 13 * Copyright (C) 1995-1999 Olaf Kirch <okir@monad.swb.de> 14 * Zerocpy NFS support (C) 2002 Hirokazu Takahashi <taka@valinux.co.jp> 15 */ 16 17#include <linux/fs.h> 18#include <linux/file.h> 19#include <linux/splice.h> 20#include <linux/fcntl.h> 21#include <linux/namei.h> 22#include <linux/delay.h> 23#include <linux/fsnotify.h> 24#include <linux/posix_acl_xattr.h> 25#include <linux/xattr.h> 26#include <linux/jhash.h> 27#include <linux/ima.h> 28#include <linux/slab.h> 29#include <asm/uaccess.h> 30#include <linux/exportfs.h> 31#include <linux/writeback.h> 32 33#ifdef CONFIG_NFSD_V3 34#include "xdr3.h" 35#endif /* CONFIG_NFSD_V3 */ 36 37#ifdef CONFIG_NFSD_V4 38#include <linux/nfs4_acl.h> 39#include <linux/nfsd_idmap.h> 40#endif /* CONFIG_NFSD_V4 */ 41 42#include "nfsd.h" 43#include "vfs.h" 44 45#define NFSDDBG_FACILITY NFSDDBG_FILEOP 46 47 48/* 49 * This is a cache of readahead params that help us choose the proper 50 * readahead strategy. Initially, we set all readahead parameters to 0 51 * and let the VFS handle things. 52 * If you increase the number of cached files very much, you'll need to 53 * add a hash table here. 54 */ 55struct raparms { 56 struct raparms *p_next; 57 unsigned int p_count; 58 ino_t p_ino; 59 dev_t p_dev; 60 int p_set; 61 struct file_ra_state p_ra; 62 unsigned int p_hindex; 63}; 64 65struct raparm_hbucket { 66 struct raparms *pb_head; 67 spinlock_t pb_lock; 68} ____cacheline_aligned_in_smp; 69 70#define RAPARM_HASH_BITS 4 71#define RAPARM_HASH_SIZE (1<<RAPARM_HASH_BITS) 72#define RAPARM_HASH_MASK (RAPARM_HASH_SIZE-1) 73static struct raparm_hbucket raparm_hash[RAPARM_HASH_SIZE]; 74 75/* 76 * Called from nfsd_lookup and encode_dirent. Check if we have crossed 77 * a mount point. 78 * Returns -EAGAIN or -ETIMEDOUT leaving *dpp and *expp unchanged, 79 * or nfs_ok having possibly changed *dpp and *expp 80 */ 81int 82nfsd_cross_mnt(struct svc_rqst *rqstp, struct dentry **dpp, 83 struct svc_export **expp) 84{ 85 struct svc_export *exp = *expp, *exp2 = NULL; 86 struct dentry *dentry = *dpp; 87 struct path path = {.mnt = mntget(exp->ex_path.mnt), 88 .dentry = dget(dentry)}; 89 int err = 0; 90 91 while (d_mountpoint(path.dentry) && follow_down(&path)) 92 ; 93 94 exp2 = rqst_exp_get_by_name(rqstp, &path); 95 if (IS_ERR(exp2)) { 96 err = PTR_ERR(exp2); 97 /* 98 * We normally allow NFS clients to continue 99 * "underneath" a mountpoint that is not exported. 100 * The exception is V4ROOT, where no traversal is ever 101 * allowed without an explicit export of the new 102 * directory. 103 */ 104 if (err == -ENOENT && !(exp->ex_flags & NFSEXP_V4ROOT)) 105 err = 0; 106 path_put(&path); 107 goto out; 108 } 109 if (nfsd_v4client(rqstp) || 110 (exp->ex_flags & NFSEXP_CROSSMOUNT) || EX_NOHIDE(exp2)) { 111 /* successfully crossed mount point */ 112 /* 113 * This is subtle: path.dentry is *not* on path.mnt 114 * at this point. The only reason we are safe is that 115 * original mnt is pinned down by exp, so we should 116 * put path *before* putting exp 117 */ 118 *dpp = path.dentry; 119 path.dentry = dentry; 120 *expp = exp2; 121 exp2 = exp; 122 } 123 path_put(&path); 124 exp_put(exp2); 125out: 126 return err; 127} 128 129static void follow_to_parent(struct path *path) 130{ 131 struct dentry *dp; 132 133 while (path->dentry == path->mnt->mnt_root && follow_up(path)) 134 ; 135 dp = dget_parent(path->dentry); 136 dput(path->dentry); 137 path->dentry = dp; 138} 139 140static int nfsd_lookup_parent(struct svc_rqst *rqstp, struct dentry *dparent, struct svc_export **exp, struct dentry **dentryp) 141{ 142 struct svc_export *exp2; 143 struct path path = {.mnt = mntget((*exp)->ex_path.mnt), 144 .dentry = dget(dparent)}; 145 146 follow_to_parent(&path); 147 148 exp2 = rqst_exp_parent(rqstp, &path); 149 if (PTR_ERR(exp2) == -ENOENT) { 150 *dentryp = dget(dparent); 151 } else if (IS_ERR(exp2)) { 152 path_put(&path); 153 return PTR_ERR(exp2); 154 } else { 155 *dentryp = dget(path.dentry); 156 exp_put(*exp); 157 *exp = exp2; 158 } 159 path_put(&path); 160 return 0; 161} 162 163/* 164 * For nfsd purposes, we treat V4ROOT exports as though there was an 165 * export at *every* directory. 166 */ 167int nfsd_mountpoint(struct dentry *dentry, struct svc_export *exp) 168{ 169 if (d_mountpoint(dentry)) 170 return 1; 171 if (!(exp->ex_flags & NFSEXP_V4ROOT)) 172 return 0; 173 return dentry->d_inode != NULL; 174} 175 176__be32 177nfsd_lookup_dentry(struct svc_rqst *rqstp, struct svc_fh *fhp, 178 const char *name, unsigned int len, 179 struct svc_export **exp_ret, struct dentry **dentry_ret) 180{ 181 struct svc_export *exp; 182 struct dentry *dparent; 183 struct dentry *dentry; 184 __be32 err; 185 int host_err; 186 187 dprintk("nfsd: nfsd_lookup(fh %s, %.*s)\n", SVCFH_fmt(fhp), len,name); 188 189 /* Obtain dentry and export. */ 190 err = fh_verify(rqstp, fhp, S_IFDIR, NFSD_MAY_EXEC); 191 if (err) 192 return err; 193 194 dparent = fhp->fh_dentry; 195 exp = fhp->fh_export; 196 exp_get(exp); 197 198 /* Lookup the name, but don't follow links */ 199 if (isdotent(name, len)) { 200 if (len==1) 201 dentry = dget(dparent); 202 else if (dparent != exp->ex_path.dentry) 203 dentry = dget_parent(dparent); 204 else if (!EX_NOHIDE(exp) && !nfsd_v4client(rqstp)) 205 dentry = dget(dparent); /* .. == . just like at / */ 206 else { 207 /* checking mountpoint crossing is very different when stepping up */ 208 host_err = nfsd_lookup_parent(rqstp, dparent, &exp, &dentry); 209 if (host_err) 210 goto out_nfserr; 211 } 212 } else { 213 fh_lock(fhp); 214 dentry = lookup_one_len(name, dparent, len); 215 host_err = PTR_ERR(dentry); 216 if (IS_ERR(dentry)) 217 goto out_nfserr; 218 /* 219 * check if we have crossed a mount point ... 220 */ 221 if (nfsd_mountpoint(dentry, exp)) { 222 if ((host_err = nfsd_cross_mnt(rqstp, &dentry, &exp))) { 223 dput(dentry); 224 goto out_nfserr; 225 } 226 } 227 } 228 *dentry_ret = dentry; 229 *exp_ret = exp; 230 return 0; 231 232out_nfserr: 233 exp_put(exp); 234 return nfserrno(host_err); 235} 236 237/* 238 * Look up one component of a pathname. 239 * N.B. After this call _both_ fhp and resfh need an fh_put 240 * 241 * If the lookup would cross a mountpoint, and the mounted filesystem 242 * is exported to the client with NFSEXP_NOHIDE, then the lookup is 243 * accepted as it stands and the mounted directory is 244 * returned. Otherwise the covered directory is returned. 245 * NOTE: this mountpoint crossing is not supported properly by all 246 * clients and is explicitly disallowed for NFSv3 247 * NeilBrown <neilb@cse.unsw.edu.au> 248 */ 249__be32 250nfsd_lookup(struct svc_rqst *rqstp, struct svc_fh *fhp, const char *name, 251 unsigned int len, struct svc_fh *resfh) 252{ 253 struct svc_export *exp; 254 struct dentry *dentry; 255 __be32 err; 256 257 err = nfsd_lookup_dentry(rqstp, fhp, name, len, &exp, &dentry); 258 if (err) 259 return err; 260 err = check_nfsd_access(exp, rqstp); 261 if (err) 262 goto out; 263 /* 264 * Note: we compose the file handle now, but as the 265 * dentry may be negative, it may need to be updated. 266 */ 267 err = fh_compose(resfh, exp, dentry, fhp); 268 if (!err && !dentry->d_inode) 269 err = nfserr_noent; 270out: 271 dput(dentry); 272 exp_put(exp); 273 return err; 274} 275 276/* 277 * Commit metadata changes to stable storage. 278 */ 279static int 280commit_metadata(struct svc_fh *fhp) 281{ 282 struct inode *inode = fhp->fh_dentry->d_inode; 283 const struct export_operations *export_ops = inode->i_sb->s_export_op; 284 int error = 0; 285 286 if (!EX_ISSYNC(fhp->fh_export)) 287 return 0; 288 289 if (export_ops->commit_metadata) { 290 error = export_ops->commit_metadata(inode); 291 } else { 292 struct writeback_control wbc = { 293 .sync_mode = WB_SYNC_ALL, 294 .nr_to_write = 0, /* metadata only */ 295 }; 296 297 error = sync_inode(inode, &wbc); 298 } 299 300 return error; 301} 302 303/* 304 * Set various file attributes. 305 * N.B. After this call fhp needs an fh_put 306 */ 307__be32 308nfsd_setattr(struct svc_rqst *rqstp, struct svc_fh *fhp, struct iattr *iap, 309 int check_guard, time_t guardtime) 310{ 311 struct dentry *dentry; 312 struct inode *inode; 313 int accmode = NFSD_MAY_SATTR; 314 int ftype = 0; 315 __be32 err; 316 int host_err; 317 int size_change = 0; 318 319 if (iap->ia_valid & (ATTR_ATIME | ATTR_MTIME | ATTR_SIZE)) 320 accmode |= NFSD_MAY_WRITE|NFSD_MAY_OWNER_OVERRIDE; 321 if (iap->ia_valid & ATTR_SIZE) 322 ftype = S_IFREG; 323 324 /* Get inode */ 325 err = fh_verify(rqstp, fhp, ftype, accmode); 326 if (err) 327 goto out; 328 329 dentry = fhp->fh_dentry; 330 inode = dentry->d_inode; 331 332 /* Ignore any mode updates on symlinks */ 333 if (S_ISLNK(inode->i_mode)) 334 iap->ia_valid &= ~ATTR_MODE; 335 336 if (!iap->ia_valid) 337 goto out; 338 339 /* 340 * NFSv2 does not differentiate between "set-[ac]time-to-now" 341 * which only requires access, and "set-[ac]time-to-X" which 342 * requires ownership. 343 * So if it looks like it might be "set both to the same time which 344 * is close to now", and if inode_change_ok fails, then we 345 * convert to "set to now" instead of "set to explicit time" 346 * 347 * We only call inode_change_ok as the last test as technically 348 * it is not an interface that we should be using. It is only 349 * valid if the filesystem does not define it's own i_op->setattr. 350 */ 351#define BOTH_TIME_SET (ATTR_ATIME_SET | ATTR_MTIME_SET) 352#define MAX_TOUCH_TIME_ERROR (30*60) 353 if ((iap->ia_valid & BOTH_TIME_SET) == BOTH_TIME_SET && 354 iap->ia_mtime.tv_sec == iap->ia_atime.tv_sec) { 355 /* 356 * Looks probable. 357 * 358 * Now just make sure time is in the right ballpark. 359 * Solaris, at least, doesn't seem to care what the time 360 * request is. We require it be within 30 minutes of now. 361 */ 362 time_t delta = iap->ia_atime.tv_sec - get_seconds(); 363 if (delta < 0) 364 delta = -delta; 365 if (delta < MAX_TOUCH_TIME_ERROR && 366 inode_change_ok(inode, iap) != 0) { 367 /* 368 * Turn off ATTR_[AM]TIME_SET but leave ATTR_[AM]TIME. 369 * This will cause notify_change to set these times 370 * to "now" 371 */ 372 iap->ia_valid &= ~BOTH_TIME_SET; 373 } 374 } 375 376 /* 377 * The size case is special. 378 * It changes the file as well as the attributes. 379 */ 380 if (iap->ia_valid & ATTR_SIZE) { 381 if (iap->ia_size < inode->i_size) { 382 err = nfsd_permission(rqstp, fhp->fh_export, dentry, 383 NFSD_MAY_TRUNC|NFSD_MAY_OWNER_OVERRIDE); 384 if (err) 385 goto out; 386 } 387 388 /* 389 * If we are changing the size of the file, then 390 * we need to break all leases. 391 */ 392 host_err = break_lease(inode, O_WRONLY | O_NONBLOCK); 393 if (host_err == -EWOULDBLOCK) 394 host_err = -ETIMEDOUT; 395 if (host_err) /* ENOMEM or EWOULDBLOCK */ 396 goto out_nfserr; 397 398 host_err = get_write_access(inode); 399 if (host_err) 400 goto out_nfserr; 401 402 size_change = 1; 403 host_err = locks_verify_truncate(inode, NULL, iap->ia_size); 404 if (host_err) { 405 put_write_access(inode); 406 goto out_nfserr; 407 } 408 } 409 410 /* sanitize the mode change */ 411 if (iap->ia_valid & ATTR_MODE) { 412 iap->ia_mode &= S_IALLUGO; 413 iap->ia_mode |= (inode->i_mode & ~S_IALLUGO); 414 } 415 416 /* Revoke setuid/setgid on chown */ 417 if (!S_ISDIR(inode->i_mode) && 418 (((iap->ia_valid & ATTR_UID) && iap->ia_uid != inode->i_uid) || 419 ((iap->ia_valid & ATTR_GID) && iap->ia_gid != inode->i_gid))) { 420 iap->ia_valid |= ATTR_KILL_PRIV; 421 if (iap->ia_valid & ATTR_MODE) { 422 /* we're setting mode too, just clear the s*id bits */ 423 iap->ia_mode &= ~S_ISUID; 424 if (iap->ia_mode & S_IXGRP) 425 iap->ia_mode &= ~S_ISGID; 426 } else { 427 /* set ATTR_KILL_* bits and let VFS handle it */ 428 iap->ia_valid |= (ATTR_KILL_SUID | ATTR_KILL_SGID); 429 } 430 } 431 432 /* Change the attributes. */ 433 434 iap->ia_valid |= ATTR_CTIME; 435 436 err = nfserr_notsync; 437 if (!check_guard || guardtime == inode->i_ctime.tv_sec) { 438 fh_lock(fhp); 439 host_err = notify_change(dentry, iap); 440 err = nfserrno(host_err); 441 fh_unlock(fhp); 442 } 443 if (size_change) 444 put_write_access(inode); 445 if (!err) 446 commit_metadata(fhp); 447out: 448 return err; 449 450out_nfserr: 451 err = nfserrno(host_err); 452 goto out; 453} 454 455#if defined(CONFIG_NFSD_V2_ACL) || defined(CONFIG_NFSD_V3_ACL) || \ 456 defined(CONFIG_NFSD_V4) 457static ssize_t nfsd_getxattr(struct dentry *dentry, char *key, void **buf) 458{ 459 ssize_t buflen; 460 ssize_t ret; 461 462 buflen = vfs_getxattr(dentry, key, NULL, 0); 463 if (buflen <= 0) 464 return buflen; 465 466 *buf = kmalloc(buflen, GFP_KERNEL); 467 if (!*buf) 468 return -ENOMEM; 469 470 ret = vfs_getxattr(dentry, key, *buf, buflen); 471 if (ret < 0) 472 kfree(*buf); 473 return ret; 474} 475#endif 476 477#if defined(CONFIG_NFSD_V4) 478static int 479set_nfsv4_acl_one(struct dentry *dentry, struct posix_acl *pacl, char *key) 480{ 481 int len; 482 size_t buflen; 483 char *buf = NULL; 484 int error = 0; 485 486 buflen = posix_acl_xattr_size(pacl->a_count); 487 buf = kmalloc(buflen, GFP_KERNEL); 488 error = -ENOMEM; 489 if (buf == NULL) 490 goto out; 491 492 len = posix_acl_to_xattr(pacl, buf, buflen); 493 if (len < 0) { 494 error = len; 495 goto out; 496 } 497 498 error = vfs_setxattr(dentry, key, buf, len, 0); 499out: 500 kfree(buf); 501 return error; 502} 503 504__be32 505nfsd4_set_nfs4_acl(struct svc_rqst *rqstp, struct svc_fh *fhp, 506 struct nfs4_acl *acl) 507{ 508 __be32 error; 509 int host_error; 510 struct dentry *dentry; 511 struct inode *inode; 512 struct posix_acl *pacl = NULL, *dpacl = NULL; 513 unsigned int flags = 0; 514 515 /* Get inode */ 516 error = fh_verify(rqstp, fhp, 0 /* S_IFREG */, NFSD_MAY_SATTR); 517 if (error) 518 return error; 519 520 dentry = fhp->fh_dentry; 521 inode = dentry->d_inode; 522 if (S_ISDIR(inode->i_mode)) 523 flags = NFS4_ACL_DIR; 524 525 host_error = nfs4_acl_nfsv4_to_posix(acl, &pacl, &dpacl, flags); 526 if (host_error == -EINVAL) { 527 return nfserr_attrnotsupp; 528 } else if (host_error < 0) 529 goto out_nfserr; 530 531 host_error = set_nfsv4_acl_one(dentry, pacl, POSIX_ACL_XATTR_ACCESS); 532 if (host_error < 0) 533 goto out_release; 534 535 if (S_ISDIR(inode->i_mode)) 536 host_error = set_nfsv4_acl_one(dentry, dpacl, POSIX_ACL_XATTR_DEFAULT); 537 538out_release: 539 posix_acl_release(pacl); 540 posix_acl_release(dpacl); 541out_nfserr: 542 if (host_error == -EOPNOTSUPP) 543 return nfserr_attrnotsupp; 544 else 545 return nfserrno(host_error); 546} 547 548static struct posix_acl * 549_get_posix_acl(struct dentry *dentry, char *key) 550{ 551 void *buf = NULL; 552 struct posix_acl *pacl = NULL; 553 int buflen; 554 555 buflen = nfsd_getxattr(dentry, key, &buf); 556 if (!buflen) 557 buflen = -ENODATA; 558 if (buflen <= 0) 559 return ERR_PTR(buflen); 560 561 pacl = posix_acl_from_xattr(buf, buflen); 562 kfree(buf); 563 return pacl; 564} 565 566int 567nfsd4_get_nfs4_acl(struct svc_rqst *rqstp, struct dentry *dentry, struct nfs4_acl **acl) 568{ 569 struct inode *inode = dentry->d_inode; 570 int error = 0; 571 struct posix_acl *pacl = NULL, *dpacl = NULL; 572 unsigned int flags = 0; 573 574 pacl = _get_posix_acl(dentry, POSIX_ACL_XATTR_ACCESS); 575 if (IS_ERR(pacl) && PTR_ERR(pacl) == -ENODATA) 576 pacl = posix_acl_from_mode(inode->i_mode, GFP_KERNEL); 577 if (IS_ERR(pacl)) { 578 error = PTR_ERR(pacl); 579 pacl = NULL; 580 goto out; 581 } 582 583 if (S_ISDIR(inode->i_mode)) { 584 dpacl = _get_posix_acl(dentry, POSIX_ACL_XATTR_DEFAULT); 585 if (IS_ERR(dpacl) && PTR_ERR(dpacl) == -ENODATA) 586 dpacl = NULL; 587 else if (IS_ERR(dpacl)) { 588 error = PTR_ERR(dpacl); 589 dpacl = NULL; 590 goto out; 591 } 592 flags = NFS4_ACL_DIR; 593 } 594 595 *acl = nfs4_acl_posix_to_nfsv4(pacl, dpacl, flags); 596 if (IS_ERR(*acl)) { 597 error = PTR_ERR(*acl); 598 *acl = NULL; 599 } 600 out: 601 posix_acl_release(pacl); 602 posix_acl_release(dpacl); 603 return error; 604} 605 606#endif /* defined(CONFIG_NFSD_V4) */ 607 608#ifdef CONFIG_NFSD_V3 609/* 610 * Check server access rights to a file system object 611 */ 612struct accessmap { 613 u32 access; 614 int how; 615}; 616static struct accessmap nfs3_regaccess[] = { 617 { NFS3_ACCESS_READ, NFSD_MAY_READ }, 618 { NFS3_ACCESS_EXECUTE, NFSD_MAY_EXEC }, 619 { NFS3_ACCESS_MODIFY, NFSD_MAY_WRITE|NFSD_MAY_TRUNC }, 620 { NFS3_ACCESS_EXTEND, NFSD_MAY_WRITE }, 621 622 { 0, 0 } 623}; 624 625static struct accessmap nfs3_diraccess[] = { 626 { NFS3_ACCESS_READ, NFSD_MAY_READ }, 627 { NFS3_ACCESS_LOOKUP, NFSD_MAY_EXEC }, 628 { NFS3_ACCESS_MODIFY, NFSD_MAY_EXEC|NFSD_MAY_WRITE|NFSD_MAY_TRUNC}, 629 { NFS3_ACCESS_EXTEND, NFSD_MAY_EXEC|NFSD_MAY_WRITE }, 630 { NFS3_ACCESS_DELETE, NFSD_MAY_REMOVE }, 631 632 { 0, 0 } 633}; 634 635static struct accessmap nfs3_anyaccess[] = { 636 /* Some clients - Solaris 2.6 at least, make an access call 637 * to the server to check for access for things like /dev/null 638 * (which really, the server doesn't care about). So 639 * We provide simple access checking for them, looking 640 * mainly at mode bits, and we make sure to ignore read-only 641 * filesystem checks 642 */ 643 { NFS3_ACCESS_READ, NFSD_MAY_READ }, 644 { NFS3_ACCESS_EXECUTE, NFSD_MAY_EXEC }, 645 { NFS3_ACCESS_MODIFY, NFSD_MAY_WRITE|NFSD_MAY_LOCAL_ACCESS }, 646 { NFS3_ACCESS_EXTEND, NFSD_MAY_WRITE|NFSD_MAY_LOCAL_ACCESS }, 647 648 { 0, 0 } 649}; 650 651__be32 652nfsd_access(struct svc_rqst *rqstp, struct svc_fh *fhp, u32 *access, u32 *supported) 653{ 654 struct accessmap *map; 655 struct svc_export *export; 656 struct dentry *dentry; 657 u32 query, result = 0, sresult = 0; 658 __be32 error; 659 660 error = fh_verify(rqstp, fhp, 0, NFSD_MAY_NOP); 661 if (error) 662 goto out; 663 664 export = fhp->fh_export; 665 dentry = fhp->fh_dentry; 666 667 if (S_ISREG(dentry->d_inode->i_mode)) 668 map = nfs3_regaccess; 669 else if (S_ISDIR(dentry->d_inode->i_mode)) 670 map = nfs3_diraccess; 671 else 672 map = nfs3_anyaccess; 673 674 675 query = *access; 676 for (; map->access; map++) { 677 if (map->access & query) { 678 __be32 err2; 679 680 sresult |= map->access; 681 682 err2 = nfsd_permission(rqstp, export, dentry, map->how); 683 switch (err2) { 684 case nfs_ok: 685 result |= map->access; 686 break; 687 688 /* the following error codes just mean the access was not allowed, 689 * rather than an error occurred */ 690 case nfserr_rofs: 691 case nfserr_acces: 692 case nfserr_perm: 693 /* simply don't "or" in the access bit. */ 694 break; 695 default: 696 error = err2; 697 goto out; 698 } 699 } 700 } 701 *access = result; 702 if (supported) 703 *supported = sresult; 704 705 out: 706 return error; 707} 708#endif /* CONFIG_NFSD_V3 */ 709 710 711 712/* 713 * Open an existing file or directory. 714 * The access argument indicates the type of open (read/write/lock) 715 * N.B. After this call fhp needs an fh_put 716 */ 717__be32 718nfsd_open(struct svc_rqst *rqstp, struct svc_fh *fhp, int type, 719 int access, struct file **filp) 720{ 721 struct dentry *dentry; 722 struct inode *inode; 723 int flags = O_RDONLY|O_LARGEFILE; 724 __be32 err; 725 int host_err = 0; 726 727 validate_process_creds(); 728 729 /* 730 * If we get here, then the client has already done an "open", 731 * and (hopefully) checked permission - so allow OWNER_OVERRIDE 732 * in case a chmod has now revoked permission. 733 */ 734 err = fh_verify(rqstp, fhp, type, access | NFSD_MAY_OWNER_OVERRIDE); 735 if (err) 736 goto out; 737 738 dentry = fhp->fh_dentry; 739 inode = dentry->d_inode; 740 741 /* Disallow write access to files with the append-only bit set 742 * or any access when mandatory locking enabled 743 */ 744 err = nfserr_perm; 745 if (IS_APPEND(inode) && (access & NFSD_MAY_WRITE)) 746 goto out; 747 /* 748 * We must ignore files (but only files) which might have mandatory 749 * locks on them because there is no way to know if the accesser has 750 * the lock. 751 */ 752 if (S_ISREG((inode)->i_mode) && mandatory_lock(inode)) 753 goto out; 754 755 if (!inode->i_fop) 756 goto out; 757 758 /* 759 * Check to see if there are any leases on this file. 760 * This may block while leases are broken. 761 */ 762 if (!(access & NFSD_MAY_NOT_BREAK_LEASE)) 763 host_err = break_lease(inode, O_NONBLOCK | ((access & NFSD_MAY_WRITE) ? O_WRONLY : 0)); 764 if (host_err == -EWOULDBLOCK) 765 host_err = -ETIMEDOUT; 766 if (host_err) /* NOMEM or WOULDBLOCK */ 767 goto out_nfserr; 768 769 if (access & NFSD_MAY_WRITE) { 770 if (access & NFSD_MAY_READ) 771 flags = O_RDWR|O_LARGEFILE; 772 else 773 flags = O_WRONLY|O_LARGEFILE; 774 } 775 *filp = dentry_open(dget(dentry), mntget(fhp->fh_export->ex_path.mnt), 776 flags, current_cred()); 777 if (IS_ERR(*filp)) 778 host_err = PTR_ERR(*filp); 779 else 780 host_err = ima_file_check(*filp, access); 781out_nfserr: 782 err = nfserrno(host_err); 783out: 784 validate_process_creds(); 785 return err; 786} 787 788/* 789 * Close a file. 790 */ 791void 792nfsd_close(struct file *filp) 793{ 794 fput(filp); 795} 796 797/* 798 * Obtain the readahead parameters for the file 799 * specified by (dev, ino). 800 */ 801 802static inline struct raparms * 803nfsd_get_raparms(dev_t dev, ino_t ino) 804{ 805 struct raparms *ra, **rap, **frap = NULL; 806 int depth = 0; 807 unsigned int hash; 808 struct raparm_hbucket *rab; 809 810 hash = jhash_2words(dev, ino, 0xfeedbeef) & RAPARM_HASH_MASK; 811 rab = &raparm_hash[hash]; 812 813 spin_lock(&rab->pb_lock); 814 for (rap = &rab->pb_head; (ra = *rap); rap = &ra->p_next) { 815 if (ra->p_ino == ino && ra->p_dev == dev) 816 goto found; 817 depth++; 818 if (ra->p_count == 0) 819 frap = rap; 820 } 821 depth = nfsdstats.ra_size*11/10; 822 if (!frap) { 823 spin_unlock(&rab->pb_lock); 824 return NULL; 825 } 826 rap = frap; 827 ra = *frap; 828 ra->p_dev = dev; 829 ra->p_ino = ino; 830 ra->p_set = 0; 831 ra->p_hindex = hash; 832found: 833 if (rap != &rab->pb_head) { 834 *rap = ra->p_next; 835 ra->p_next = rab->pb_head; 836 rab->pb_head = ra; 837 } 838 ra->p_count++; 839 nfsdstats.ra_depth[depth*10/nfsdstats.ra_size]++; 840 spin_unlock(&rab->pb_lock); 841 return ra; 842} 843 844/* 845 * Grab and keep cached pages associated with a file in the svc_rqst 846 * so that they can be passed to the network sendmsg/sendpage routines 847 * directly. They will be released after the sending has completed. 848 */ 849static int 850nfsd_splice_actor(struct pipe_inode_info *pipe, struct pipe_buffer *buf, 851 struct splice_desc *sd) 852{ 853 struct svc_rqst *rqstp = sd->u.data; 854 struct page **pp = rqstp->rq_respages + rqstp->rq_resused; 855 struct page *page = buf->page; 856 size_t size; 857 int ret; 858 859 ret = buf->ops->confirm(pipe, buf); 860 if (unlikely(ret)) 861 return ret; 862 863 size = sd->len; 864 865 if (rqstp->rq_res.page_len == 0) { 866 get_page(page); 867 put_page(*pp); 868 *pp = page; 869 rqstp->rq_resused++; 870 rqstp->rq_res.page_base = buf->offset; 871 rqstp->rq_res.page_len = size; 872 } else if (page != pp[-1]) { 873 get_page(page); 874 if (*pp) 875 put_page(*pp); 876 *pp = page; 877 rqstp->rq_resused++; 878 rqstp->rq_res.page_len += size; 879 } else 880 rqstp->rq_res.page_len += size; 881 882 return size; 883} 884 885static int nfsd_direct_splice_actor(struct pipe_inode_info *pipe, 886 struct splice_desc *sd) 887{ 888 return __splice_from_pipe(pipe, sd, nfsd_splice_actor); 889} 890 891static inline int svc_msnfs(struct svc_fh *ffhp) 892{ 893#ifdef MSNFS 894 return (ffhp->fh_export->ex_flags & NFSEXP_MSNFS); 895#else 896 return 0; 897#endif 898} 899 900static __be32 901nfsd_vfs_read(struct svc_rqst *rqstp, struct svc_fh *fhp, struct file *file, 902 loff_t offset, struct kvec *vec, int vlen, unsigned long *count) 903{ 904 struct inode *inode; 905 mm_segment_t oldfs; 906 __be32 err; 907 int host_err; 908 909 err = nfserr_perm; 910 inode = file->f_path.dentry->d_inode; 911 912 if (svc_msnfs(fhp) && !lock_may_read(inode, offset, *count)) 913 goto out; 914 915 if (file->f_op->splice_read && rqstp->rq_splice_ok) { 916 struct splice_desc sd = { 917 .len = 0, 918 .total_len = *count, 919 .pos = offset, 920 .u.data = rqstp, 921 }; 922 923 rqstp->rq_resused = 1; 924 host_err = splice_direct_to_actor(file, &sd, nfsd_direct_splice_actor); 925 } else { 926 oldfs = get_fs(); 927 set_fs(KERNEL_DS); 928 host_err = vfs_readv(file, (struct iovec __user *)vec, vlen, &offset); 929 set_fs(oldfs); 930 } 931 932 if (host_err >= 0) { 933 nfsdstats.io_read += host_err; 934 *count = host_err; 935 err = 0; 936 fsnotify_access(file); 937 } else 938 err = nfserrno(host_err); 939out: 940 return err; 941} 942 943static void kill_suid(struct dentry *dentry) 944{ 945 struct iattr ia; 946 ia.ia_valid = ATTR_KILL_SUID | ATTR_KILL_SGID | ATTR_KILL_PRIV; 947 948 mutex_lock(&dentry->d_inode->i_mutex); 949 notify_change(dentry, &ia); 950 mutex_unlock(&dentry->d_inode->i_mutex); 951} 952 953/* 954 * Gathered writes: If another process is currently writing to the file, 955 * there's a high chance this is another nfsd (triggered by a bulk write 956 * from a client's biod). Rather than syncing the file with each write 957 * request, we sleep for 10 msec. 958 * 959 * I don't know if this roughly approximates C. Juszak's idea of 960 * gathered writes, but it's a nice and simple solution (IMHO), and it 961 * seems to work:-) 962 * 963 * Note: we do this only in the NFSv2 case, since v3 and higher have a 964 * better tool (separate unstable writes and commits) for solving this 965 * problem. 966 */ 967static int wait_for_concurrent_writes(struct file *file) 968{ 969 struct inode *inode = file->f_path.dentry->d_inode; 970 static ino_t last_ino; 971 static dev_t last_dev; 972 int err = 0; 973 974 if (atomic_read(&inode->i_writecount) > 1 975 || (last_ino == inode->i_ino && last_dev == inode->i_sb->s_dev)) { 976 dprintk("nfsd: write defer %d\n", task_pid_nr(current)); 977 msleep(10); 978 dprintk("nfsd: write resume %d\n", task_pid_nr(current)); 979 } 980 981 if (inode->i_state & I_DIRTY) { 982 dprintk("nfsd: write sync %d\n", task_pid_nr(current)); 983 err = vfs_fsync(file, 0); 984 } 985 last_ino = inode->i_ino; 986 last_dev = inode->i_sb->s_dev; 987 return err; 988} 989 990static __be32 991nfsd_vfs_write(struct svc_rqst *rqstp, struct svc_fh *fhp, struct file *file, 992 loff_t offset, struct kvec *vec, int vlen, 993 unsigned long *cnt, int *stablep) 994{ 995 struct svc_export *exp; 996 struct dentry *dentry; 997 struct inode *inode; 998 mm_segment_t oldfs; 999 __be32 err = 0; 1000 int host_err; 1001 int stable = *stablep; 1002 int use_wgather; 1003 1004#ifdef MSNFS 1005 err = nfserr_perm; 1006 1007 if ((fhp->fh_export->ex_flags & NFSEXP_MSNFS) && 1008 (!lock_may_write(file->f_path.dentry->d_inode, offset, *cnt))) 1009 goto out; 1010#endif 1011 1012 dentry = file->f_path.dentry; 1013 inode = dentry->d_inode; 1014 exp = fhp->fh_export; 1015 1016 /* 1017 * Request sync writes if 1018 * - the sync export option has been set, or 1019 * - the client requested O_SYNC behavior (NFSv3 feature). 1020 * - The file system doesn't support fsync(). 1021 * When NFSv2 gathered writes have been configured for this volume, 1022 * flushing the data to disk is handled separately below. 1023 */ 1024 use_wgather = (rqstp->rq_vers == 2) && EX_WGATHER(exp); 1025 1026 if (!file->f_op->fsync) {/* COMMIT3 cannot work */ 1027 stable = 2; 1028 *stablep = 2; /* FILE_SYNC */ 1029 } 1030 1031 if (!EX_ISSYNC(exp)) 1032 stable = 0; 1033 if (stable && !use_wgather) { 1034 spin_lock(&file->f_lock); 1035 file->f_flags |= O_SYNC; 1036 spin_unlock(&file->f_lock); 1037 } 1038 1039 /* Write the data. */ 1040 oldfs = get_fs(); set_fs(KERNEL_DS); 1041 host_err = vfs_writev(file, (struct iovec __user *)vec, vlen, &offset); 1042 set_fs(oldfs); 1043 if (host_err < 0) 1044 goto out_nfserr; 1045 *cnt = host_err; 1046 nfsdstats.io_write += host_err; 1047 fsnotify_modify(file); 1048 1049 /* clear setuid/setgid flag after write */ 1050 if (inode->i_mode & (S_ISUID | S_ISGID)) 1051 kill_suid(dentry); 1052 1053 if (stable && use_wgather) 1054 host_err = wait_for_concurrent_writes(file); 1055 1056out_nfserr: 1057 dprintk("nfsd: write complete host_err=%d\n", host_err); 1058 if (host_err >= 0) 1059 err = 0; 1060 else 1061 err = nfserrno(host_err); 1062out: 1063 return err; 1064} 1065 1066/* 1067 * Read data from a file. count must contain the requested read count 1068 * on entry. On return, *count contains the number of bytes actually read. 1069 * N.B. After this call fhp needs an fh_put 1070 */ 1071__be32 nfsd_read(struct svc_rqst *rqstp, struct svc_fh *fhp, 1072 loff_t offset, struct kvec *vec, int vlen, unsigned long *count) 1073{ 1074 struct file *file; 1075 struct inode *inode; 1076 struct raparms *ra; 1077 __be32 err; 1078 1079 err = nfsd_open(rqstp, fhp, S_IFREG, NFSD_MAY_READ, &file); 1080 if (err) 1081 return err; 1082 1083 inode = file->f_path.dentry->d_inode; 1084 1085 /* Get readahead parameters */ 1086 ra = nfsd_get_raparms(inode->i_sb->s_dev, inode->i_ino); 1087 1088 if (ra && ra->p_set) 1089 file->f_ra = ra->p_ra; 1090 1091 err = nfsd_vfs_read(rqstp, fhp, file, offset, vec, vlen, count); 1092 1093 /* Write back readahead params */ 1094 if (ra) { 1095 struct raparm_hbucket *rab = &raparm_hash[ra->p_hindex]; 1096 spin_lock(&rab->pb_lock); 1097 ra->p_ra = file->f_ra; 1098 ra->p_set = 1; 1099 ra->p_count--; 1100 spin_unlock(&rab->pb_lock); 1101 } 1102 1103 nfsd_close(file); 1104 return err; 1105} 1106 1107/* As above, but use the provided file descriptor. */ 1108__be32 1109nfsd_read_file(struct svc_rqst *rqstp, struct svc_fh *fhp, struct file *file, 1110 loff_t offset, struct kvec *vec, int vlen, 1111 unsigned long *count) 1112{ 1113 __be32 err; 1114 1115 if (file) { 1116 err = nfsd_permission(rqstp, fhp->fh_export, fhp->fh_dentry, 1117 NFSD_MAY_READ|NFSD_MAY_OWNER_OVERRIDE); 1118 if (err) 1119 goto out; 1120 err = nfsd_vfs_read(rqstp, fhp, file, offset, vec, vlen, count); 1121 } else /* Note file may still be NULL in NFSv4 special stateid case: */ 1122 err = nfsd_read(rqstp, fhp, offset, vec, vlen, count); 1123out: 1124 return err; 1125} 1126 1127/* 1128 * Write data to a file. 1129 * The stable flag requests synchronous writes. 1130 * N.B. After this call fhp needs an fh_put 1131 */ 1132__be32 1133nfsd_write(struct svc_rqst *rqstp, struct svc_fh *fhp, struct file *file, 1134 loff_t offset, struct kvec *vec, int vlen, unsigned long *cnt, 1135 int *stablep) 1136{ 1137 __be32 err = 0; 1138 1139 if (file) { 1140 err = nfsd_permission(rqstp, fhp->fh_export, fhp->fh_dentry, 1141 NFSD_MAY_WRITE|NFSD_MAY_OWNER_OVERRIDE); 1142 if (err) 1143 goto out; 1144 err = nfsd_vfs_write(rqstp, fhp, file, offset, vec, vlen, cnt, 1145 stablep); 1146 } else { 1147 err = nfsd_open(rqstp, fhp, S_IFREG, NFSD_MAY_WRITE, &file); 1148 if (err) 1149 goto out; 1150 1151 if (cnt) 1152 err = nfsd_vfs_write(rqstp, fhp, file, offset, vec, vlen, 1153 cnt, stablep); 1154 nfsd_close(file); 1155 } 1156out: 1157 return err; 1158} 1159 1160#ifdef CONFIG_NFSD_V3 1161/* 1162 * Commit all pending writes to stable storage. 1163 * 1164 * Note: we only guarantee that data that lies within the range specified 1165 * by the 'offset' and 'count' parameters will be synced. 1166 * 1167 * Unfortunately we cannot lock the file to make sure we return full WCC 1168 * data to the client, as locking happens lower down in the filesystem. 1169 */ 1170__be32 1171nfsd_commit(struct svc_rqst *rqstp, struct svc_fh *fhp, 1172 loff_t offset, unsigned long count) 1173{ 1174 struct file *file; 1175 loff_t end = LLONG_MAX; 1176 __be32 err = nfserr_inval; 1177 1178 if (offset < 0) 1179 goto out; 1180 if (count != 0) { 1181 end = offset + (loff_t)count - 1; 1182 if (end < offset) 1183 goto out; 1184 } 1185 1186 err = nfsd_open(rqstp, fhp, S_IFREG, 1187 NFSD_MAY_WRITE|NFSD_MAY_NOT_BREAK_LEASE, &file); 1188 if (err) 1189 goto out; 1190 if (EX_ISSYNC(fhp->fh_export)) { 1191 int err2 = vfs_fsync_range(file, offset, end, 0); 1192 1193 if (err2 != -EINVAL) 1194 err = nfserrno(err2); 1195 else 1196 err = nfserr_notsupp; 1197 } 1198 1199 nfsd_close(file); 1200out: 1201 return err; 1202} 1203#endif /* CONFIG_NFSD_V3 */ 1204 1205static __be32 1206nfsd_create_setattr(struct svc_rqst *rqstp, struct svc_fh *resfhp, 1207 struct iattr *iap) 1208{ 1209 /* 1210 * Mode has already been set earlier in create: 1211 */ 1212 iap->ia_valid &= ~ATTR_MODE; 1213 /* 1214 * Setting uid/gid works only for root. Irix appears to 1215 * send along the gid on create when it tries to implement 1216 * setgid directories via NFS: 1217 */ 1218 if (current_fsuid() != 0) 1219 iap->ia_valid &= ~(ATTR_UID|ATTR_GID); 1220 if (iap->ia_valid) 1221 return nfsd_setattr(rqstp, resfhp, iap, 0, (time_t)0); 1222 return 0; 1223} 1224 1225/* HPUX client sometimes creates a file in mode 000, and sets size to 0. 1226 * setting size to 0 may fail for some specific file systems by the permission 1227 * checking which requires WRITE permission but the mode is 000. 1228 * we ignore the resizing(to 0) on the just new created file, since the size is 1229 * 0 after file created. 1230 * 1231 * call this only after vfs_create() is called. 1232 * */ 1233static void 1234nfsd_check_ignore_resizing(struct iattr *iap) 1235{ 1236 if ((iap->ia_valid & ATTR_SIZE) && (iap->ia_size == 0)) 1237 iap->ia_valid &= ~ATTR_SIZE; 1238} 1239 1240/* 1241 * Create a file (regular, directory, device, fifo); UNIX sockets 1242 * not yet implemented. 1243 * If the response fh has been verified, the parent directory should 1244 * already be locked. Note that the parent directory is left locked. 1245 * 1246 * N.B. Every call to nfsd_create needs an fh_put for _both_ fhp and resfhp 1247 */ 1248__be32 1249nfsd_create(struct svc_rqst *rqstp, struct svc_fh *fhp, 1250 char *fname, int flen, struct iattr *iap, 1251 int type, dev_t rdev, struct svc_fh *resfhp) 1252{ 1253 struct dentry *dentry, *dchild = NULL; 1254 struct inode *dirp; 1255 __be32 err; 1256 __be32 err2; 1257 int host_err; 1258 1259 err = nfserr_perm; 1260 if (!flen) 1261 goto out; 1262 err = nfserr_exist; 1263 if (isdotent(fname, flen)) 1264 goto out; 1265 1266 err = fh_verify(rqstp, fhp, S_IFDIR, NFSD_MAY_CREATE); 1267 if (err) 1268 goto out; 1269 1270 dentry = fhp->fh_dentry; 1271 dirp = dentry->d_inode; 1272 1273 err = nfserr_notdir; 1274 if (!dirp->i_op->lookup) 1275 goto out; 1276 /* 1277 * Check whether the response file handle has been verified yet. 1278 * If it has, the parent directory should already be locked. 1279 */ 1280 if (!resfhp->fh_dentry) { 1281 /* called from nfsd_proc_mkdir, or possibly nfsd3_proc_create */ 1282 fh_lock_nested(fhp, I_MUTEX_PARENT); 1283 dchild = lookup_one_len(fname, dentry, flen); 1284 host_err = PTR_ERR(dchild); 1285 if (IS_ERR(dchild)) 1286 goto out_nfserr; 1287 err = fh_compose(resfhp, fhp->fh_export, dchild, fhp); 1288 if (err) 1289 goto out; 1290 } else { 1291 /* called from nfsd_proc_create */ 1292 dchild = dget(resfhp->fh_dentry); 1293 if (!fhp->fh_locked) { 1294 /* not actually possible */ 1295 printk(KERN_ERR 1296 "nfsd_create: parent %s/%s not locked!\n", 1297 dentry->d_parent->d_name.name, 1298 dentry->d_name.name); 1299 err = nfserr_io; 1300 goto out; 1301 } 1302 } 1303 /* 1304 * Make sure the child dentry is still negative ... 1305 */ 1306 err = nfserr_exist; 1307 if (dchild->d_inode) { 1308 dprintk("nfsd_create: dentry %s/%s not negative!\n", 1309 dentry->d_name.name, dchild->d_name.name); 1310 goto out; 1311 } 1312 1313 if (!(iap->ia_valid & ATTR_MODE)) 1314 iap->ia_mode = 0; 1315 iap->ia_mode = (iap->ia_mode & S_IALLUGO) | type; 1316 1317 err = nfserr_inval; 1318 if (!S_ISREG(type) && !S_ISDIR(type) && !special_file(type)) { 1319 printk(KERN_WARNING "nfsd: bad file type %o in nfsd_create\n", 1320 type); 1321 goto out; 1322 } 1323 1324 host_err = mnt_want_write(fhp->fh_export->ex_path.mnt); 1325 if (host_err) 1326 goto out_nfserr; 1327 1328 /* 1329 * Get the dir op function pointer. 1330 */ 1331 err = 0; 1332 switch (type) { 1333 case S_IFREG: 1334 host_err = vfs_create(dirp, dchild, iap->ia_mode, NULL); 1335 if (!host_err) 1336 nfsd_check_ignore_resizing(iap); 1337 break; 1338 case S_IFDIR: 1339 host_err = vfs_mkdir(dirp, dchild, iap->ia_mode); 1340 break; 1341 case S_IFCHR: 1342 case S_IFBLK: 1343 case S_IFIFO: 1344 case S_IFSOCK: 1345 host_err = vfs_mknod(dirp, dchild, iap->ia_mode, rdev); 1346 break; 1347 } 1348 if (host_err < 0) { 1349 mnt_drop_write(fhp->fh_export->ex_path.mnt); 1350 goto out_nfserr; 1351 } 1352 1353 err = nfsd_create_setattr(rqstp, resfhp, iap); 1354 1355 /* 1356 * nfsd_setattr already committed the child. Transactional filesystems 1357 * had a chance to commit changes for both parent and child 1358 * simultaneously making the following commit_metadata a noop. 1359 */ 1360 err2 = nfserrno(commit_metadata(fhp)); 1361 if (err2) 1362 err = err2; 1363 mnt_drop_write(fhp->fh_export->ex_path.mnt); 1364 /* 1365 * Update the file handle to get the new inode info. 1366 */ 1367 if (!err) 1368 err = fh_update(resfhp); 1369out: 1370 if (dchild && !IS_ERR(dchild)) 1371 dput(dchild); 1372 return err; 1373 1374out_nfserr: 1375 err = nfserrno(host_err); 1376 goto out; 1377} 1378 1379#ifdef CONFIG_NFSD_V3 1380/* 1381 * NFSv3 version of nfsd_create 1382 */ 1383__be32 1384nfsd_create_v3(struct svc_rqst *rqstp, struct svc_fh *fhp, 1385 char *fname, int flen, struct iattr *iap, 1386 struct svc_fh *resfhp, int createmode, u32 *verifier, 1387 int *truncp, int *created) 1388{ 1389 struct dentry *dentry, *dchild = NULL; 1390 struct inode *dirp; 1391 __be32 err; 1392 int host_err; 1393 __u32 v_mtime=0, v_atime=0; 1394 1395 err = nfserr_perm; 1396 if (!flen) 1397 goto out; 1398 err = nfserr_exist; 1399 if (isdotent(fname, flen)) 1400 goto out; 1401 if (!(iap->ia_valid & ATTR_MODE)) 1402 iap->ia_mode = 0; 1403 err = fh_verify(rqstp, fhp, S_IFDIR, NFSD_MAY_CREATE); 1404 if (err) 1405 goto out; 1406 1407 dentry = fhp->fh_dentry; 1408 dirp = dentry->d_inode; 1409 1410 /* Get all the sanity checks out of the way before 1411 * we lock the parent. */ 1412 err = nfserr_notdir; 1413 if (!dirp->i_op->lookup) 1414 goto out; 1415 fh_lock_nested(fhp, I_MUTEX_PARENT); 1416 1417 /* 1418 * Compose the response file handle. 1419 */ 1420 dchild = lookup_one_len(fname, dentry, flen); 1421 host_err = PTR_ERR(dchild); 1422 if (IS_ERR(dchild)) 1423 goto out_nfserr; 1424 1425 err = fh_compose(resfhp, fhp->fh_export, dchild, fhp); 1426 if (err) 1427 goto out; 1428 1429 if (createmode == NFS3_CREATE_EXCLUSIVE) { 1430 /* solaris7 gets confused (bugid 4218508) if these have 1431 * the high bit set, so just clear the high bits. If this is 1432 * ever changed to use different attrs for storing the 1433 * verifier, then do_open_lookup() will also need to be fixed 1434 * accordingly. 1435 */ 1436 v_mtime = verifier[0]&0x7fffffff; 1437 v_atime = verifier[1]&0x7fffffff; 1438 } 1439 1440 host_err = mnt_want_write(fhp->fh_export->ex_path.mnt); 1441 if (host_err) 1442 goto out_nfserr; 1443 if (dchild->d_inode) { 1444 err = 0; 1445 1446 switch (createmode) { 1447 case NFS3_CREATE_UNCHECKED: 1448 if (! S_ISREG(dchild->d_inode->i_mode)) 1449 err = nfserr_exist; 1450 else if (truncp) { 1451 /* in nfsv4, we need to treat this case a little 1452 * differently. we don't want to truncate the 1453 * file now; this would be wrong if the OPEN 1454 * fails for some other reason. furthermore, 1455 * if the size is nonzero, we should ignore it 1456 * according to spec! 1457 */ 1458 *truncp = (iap->ia_valid & ATTR_SIZE) && !iap->ia_size; 1459 } 1460 else { 1461 iap->ia_valid &= ATTR_SIZE; 1462 goto set_attr; 1463 } 1464 break; 1465 case NFS3_CREATE_EXCLUSIVE: 1466 if ( dchild->d_inode->i_mtime.tv_sec == v_mtime 1467 && dchild->d_inode->i_atime.tv_sec == v_atime 1468 && dchild->d_inode->i_size == 0 ) 1469 break; 1470 /* fallthru */ 1471 case NFS3_CREATE_GUARDED: 1472 err = nfserr_exist; 1473 } 1474 mnt_drop_write(fhp->fh_export->ex_path.mnt); 1475 goto out; 1476 } 1477 1478 host_err = vfs_create(dirp, dchild, iap->ia_mode, NULL); 1479 if (host_err < 0) { 1480 mnt_drop_write(fhp->fh_export->ex_path.mnt); 1481 goto out_nfserr; 1482 } 1483 if (created) 1484 *created = 1; 1485 1486 nfsd_check_ignore_resizing(iap); 1487 1488 if (createmode == NFS3_CREATE_EXCLUSIVE) { 1489 /* Cram the verifier into atime/mtime */ 1490 iap->ia_valid = ATTR_MTIME|ATTR_ATIME 1491 | ATTR_MTIME_SET|ATTR_ATIME_SET; 1492 iap->ia_mtime.tv_sec = v_mtime; 1493 iap->ia_atime.tv_sec = v_atime; 1494 iap->ia_mtime.tv_nsec = 0; 1495 iap->ia_atime.tv_nsec = 0; 1496 } 1497 1498 set_attr: 1499 err = nfsd_create_setattr(rqstp, resfhp, iap); 1500 1501 /* 1502 * nfsd_setattr already committed the child (and possibly also the parent). 1503 */ 1504 if (!err) 1505 err = nfserrno(commit_metadata(fhp)); 1506 1507 mnt_drop_write(fhp->fh_export->ex_path.mnt); 1508 /* 1509 * Update the filehandle to get the new inode info. 1510 */ 1511 if (!err) 1512 err = fh_update(resfhp); 1513 1514 out: 1515 fh_unlock(fhp); 1516 if (dchild && !IS_ERR(dchild)) 1517 dput(dchild); 1518 return err; 1519 1520 out_nfserr: 1521 err = nfserrno(host_err); 1522 goto out; 1523} 1524#endif /* CONFIG_NFSD_V3 */ 1525 1526/* 1527 * Read a symlink. On entry, *lenp must contain the maximum path length that 1528 * fits into the buffer. On return, it contains the true length. 1529 * N.B. After this call fhp needs an fh_put 1530 */ 1531__be32 1532nfsd_readlink(struct svc_rqst *rqstp, struct svc_fh *fhp, char *buf, int *lenp) 1533{ 1534 struct dentry *dentry; 1535 struct inode *inode; 1536 mm_segment_t oldfs; 1537 __be32 err; 1538 int host_err; 1539 1540 err = fh_verify(rqstp, fhp, S_IFLNK, NFSD_MAY_NOP); 1541 if (err) 1542 goto out; 1543 1544 dentry = fhp->fh_dentry; 1545 inode = dentry->d_inode; 1546 1547 err = nfserr_inval; 1548 if (!inode->i_op->readlink) 1549 goto out; 1550 1551 touch_atime(fhp->fh_export->ex_path.mnt, dentry); 1552 /* N.B. Why does this call need a get_fs()?? 1553 * Remove the set_fs and watch the fireworks:-) --okir 1554 */ 1555 1556 oldfs = get_fs(); set_fs(KERNEL_DS); 1557 host_err = inode->i_op->readlink(dentry, buf, *lenp); 1558 set_fs(oldfs); 1559 1560 if (host_err < 0) 1561 goto out_nfserr; 1562 *lenp = host_err; 1563 err = 0; 1564out: 1565 return err; 1566 1567out_nfserr: 1568 err = nfserrno(host_err); 1569 goto out; 1570} 1571 1572/* 1573 * Create a symlink and look up its inode 1574 * N.B. After this call _both_ fhp and resfhp need an fh_put 1575 */ 1576__be32 1577nfsd_symlink(struct svc_rqst *rqstp, struct svc_fh *fhp, 1578 char *fname, int flen, 1579 char *path, int plen, 1580 struct svc_fh *resfhp, 1581 struct iattr *iap) 1582{ 1583 struct dentry *dentry, *dnew; 1584 __be32 err, cerr; 1585 int host_err; 1586 1587 err = nfserr_noent; 1588 if (!flen || !plen) 1589 goto out; 1590 err = nfserr_exist; 1591 if (isdotent(fname, flen)) 1592 goto out; 1593 1594 err = fh_verify(rqstp, fhp, S_IFDIR, NFSD_MAY_CREATE); 1595 if (err) 1596 goto out; 1597 fh_lock(fhp); 1598 dentry = fhp->fh_dentry; 1599 dnew = lookup_one_len(fname, dentry, flen); 1600 host_err = PTR_ERR(dnew); 1601 if (IS_ERR(dnew)) 1602 goto out_nfserr; 1603 1604 host_err = mnt_want_write(fhp->fh_export->ex_path.mnt); 1605 if (host_err) 1606 goto out_nfserr; 1607 1608 if (unlikely(path[plen] != 0)) { 1609 char *path_alloced = kmalloc(plen+1, GFP_KERNEL); 1610 if (path_alloced == NULL) 1611 host_err = -ENOMEM; 1612 else { 1613 strncpy(path_alloced, path, plen); 1614 path_alloced[plen] = 0; 1615 host_err = vfs_symlink(dentry->d_inode, dnew, path_alloced); 1616 kfree(path_alloced); 1617 } 1618 } else 1619 host_err = vfs_symlink(dentry->d_inode, dnew, path); 1620 err = nfserrno(host_err); 1621 if (!err) 1622 err = nfserrno(commit_metadata(fhp)); 1623 fh_unlock(fhp); 1624 1625 mnt_drop_write(fhp->fh_export->ex_path.mnt); 1626 1627 cerr = fh_compose(resfhp, fhp->fh_export, dnew, fhp); 1628 dput(dnew); 1629 if (err==0) err = cerr; 1630out: 1631 return err; 1632 1633out_nfserr: 1634 err = nfserrno(host_err); 1635 goto out; 1636} 1637 1638/* 1639 * Create a hardlink 1640 * N.B. After this call _both_ ffhp and tfhp need an fh_put 1641 */ 1642__be32 1643nfsd_link(struct svc_rqst *rqstp, struct svc_fh *ffhp, 1644 char *name, int len, struct svc_fh *tfhp) 1645{ 1646 struct dentry *ddir, *dnew, *dold; 1647 struct inode *dirp; 1648 __be32 err; 1649 int host_err; 1650 1651 err = fh_verify(rqstp, ffhp, S_IFDIR, NFSD_MAY_CREATE); 1652 if (err) 1653 goto out; 1654 err = fh_verify(rqstp, tfhp, -S_IFDIR, NFSD_MAY_NOP); 1655 if (err) 1656 goto out; 1657 1658 err = nfserr_perm; 1659 if (!len) 1660 goto out; 1661 err = nfserr_exist; 1662 if (isdotent(name, len)) 1663 goto out; 1664 1665 fh_lock_nested(ffhp, I_MUTEX_PARENT); 1666 ddir = ffhp->fh_dentry; 1667 dirp = ddir->d_inode; 1668 1669 dnew = lookup_one_len(name, ddir, len); 1670 host_err = PTR_ERR(dnew); 1671 if (IS_ERR(dnew)) 1672 goto out_nfserr; 1673 1674 dold = tfhp->fh_dentry; 1675 1676 host_err = mnt_want_write(tfhp->fh_export->ex_path.mnt); 1677 if (host_err) { 1678 err = nfserrno(host_err); 1679 goto out_dput; 1680 } 1681 host_err = vfs_link(dold, dirp, dnew); 1682 if (!host_err) { 1683 err = nfserrno(commit_metadata(ffhp)); 1684 if (!err) 1685 err = nfserrno(commit_metadata(tfhp)); 1686 } else { 1687 if (host_err == -EXDEV && rqstp->rq_vers == 2) 1688 err = nfserr_acces; 1689 else 1690 err = nfserrno(host_err); 1691 } 1692 mnt_drop_write(tfhp->fh_export->ex_path.mnt); 1693out_dput: 1694 dput(dnew); 1695out_unlock: 1696 fh_unlock(ffhp); 1697out: 1698 return err; 1699 1700out_nfserr: 1701 err = nfserrno(host_err); 1702 goto out_unlock; 1703} 1704 1705/* 1706 * Rename a file 1707 * N.B. After this call _both_ ffhp and tfhp need an fh_put 1708 */ 1709__be32 1710nfsd_rename(struct svc_rqst *rqstp, struct svc_fh *ffhp, char *fname, int flen, 1711 struct svc_fh *tfhp, char *tname, int tlen) 1712{ 1713 struct dentry *fdentry, *tdentry, *odentry, *ndentry, *trap; 1714 struct inode *fdir, *tdir; 1715 __be32 err; 1716 int host_err; 1717 1718 err = fh_verify(rqstp, ffhp, S_IFDIR, NFSD_MAY_REMOVE); 1719 if (err) 1720 goto out; 1721 err = fh_verify(rqstp, tfhp, S_IFDIR, NFSD_MAY_CREATE); 1722 if (err) 1723 goto out; 1724 1725 fdentry = ffhp->fh_dentry; 1726 fdir = fdentry->d_inode; 1727 1728 tdentry = tfhp->fh_dentry; 1729 tdir = tdentry->d_inode; 1730 1731 err = (rqstp->rq_vers == 2) ? nfserr_acces : nfserr_xdev; 1732 if (ffhp->fh_export != tfhp->fh_export) 1733 goto out; 1734 1735 err = nfserr_perm; 1736 if (!flen || isdotent(fname, flen) || !tlen || isdotent(tname, tlen)) 1737 goto out; 1738 1739 /* cannot use fh_lock as we need deadlock protective ordering 1740 * so do it by hand */ 1741 trap = lock_rename(tdentry, fdentry); 1742 ffhp->fh_locked = tfhp->fh_locked = 1; 1743 fill_pre_wcc(ffhp); 1744 fill_pre_wcc(tfhp); 1745 1746 odentry = lookup_one_len(fname, fdentry, flen); 1747 host_err = PTR_ERR(odentry); 1748 if (IS_ERR(odentry)) 1749 goto out_nfserr; 1750 1751 host_err = -ENOENT; 1752 if (!odentry->d_inode) 1753 goto out_dput_old; 1754 host_err = -EINVAL; 1755 if (odentry == trap) 1756 goto out_dput_old; 1757 1758 ndentry = lookup_one_len(tname, tdentry, tlen); 1759 host_err = PTR_ERR(ndentry); 1760 if (IS_ERR(ndentry)) 1761 goto out_dput_old; 1762 host_err = -ENOTEMPTY; 1763 if (ndentry == trap) 1764 goto out_dput_new; 1765 1766 if (svc_msnfs(ffhp) && 1767 ((atomic_read(&odentry->d_count) > 1) 1768 || (atomic_read(&ndentry->d_count) > 1))) { 1769 host_err = -EPERM; 1770 goto out_dput_new; 1771 } 1772 1773 host_err = -EXDEV; 1774 if (ffhp->fh_export->ex_path.mnt != tfhp->fh_export->ex_path.mnt) 1775 goto out_dput_new; 1776 host_err = mnt_want_write(ffhp->fh_export->ex_path.mnt); 1777 if (host_err) 1778 goto out_dput_new; 1779 1780 host_err = vfs_rename(fdir, odentry, tdir, ndentry); 1781 if (!host_err) { 1782 host_err = commit_metadata(tfhp); 1783 if (!host_err) 1784 host_err = commit_metadata(ffhp); 1785 } 1786 1787 mnt_drop_write(ffhp->fh_export->ex_path.mnt); 1788 1789 out_dput_new: 1790 dput(ndentry); 1791 out_dput_old: 1792 dput(odentry); 1793 out_nfserr: 1794 err = nfserrno(host_err); 1795 1796 /* we cannot reply on fh_unlock on the two filehandles, 1797 * as that would do the wrong thing if the two directories 1798 * were the same, so again we do it by hand 1799 */ 1800 fill_post_wcc(ffhp); 1801 fill_post_wcc(tfhp); 1802 unlock_rename(tdentry, fdentry); 1803 ffhp->fh_locked = tfhp->fh_locked = 0; 1804 1805out: 1806 return err; 1807} 1808 1809/* 1810 * Unlink a file or directory 1811 * N.B. After this call fhp needs an fh_put 1812 */ 1813__be32 1814nfsd_unlink(struct svc_rqst *rqstp, struct svc_fh *fhp, int type, 1815 char *fname, int flen) 1816{ 1817 struct dentry *dentry, *rdentry; 1818 struct inode *dirp; 1819 __be32 err; 1820 int host_err; 1821 1822 err = nfserr_acces; 1823 if (!flen || isdotent(fname, flen)) 1824 goto out; 1825 err = fh_verify(rqstp, fhp, S_IFDIR, NFSD_MAY_REMOVE); 1826 if (err) 1827 goto out; 1828 1829 fh_lock_nested(fhp, I_MUTEX_PARENT); 1830 dentry = fhp->fh_dentry; 1831 dirp = dentry->d_inode; 1832 1833 rdentry = lookup_one_len(fname, dentry, flen); 1834 host_err = PTR_ERR(rdentry); 1835 if (IS_ERR(rdentry)) 1836 goto out_nfserr; 1837 1838 if (!rdentry->d_inode) { 1839 dput(rdentry); 1840 err = nfserr_noent; 1841 goto out; 1842 } 1843 1844 if (!type) 1845 type = rdentry->d_inode->i_mode & S_IFMT; 1846 1847 host_err = mnt_want_write(fhp->fh_export->ex_path.mnt); 1848 if (host_err) 1849 goto out_nfserr; 1850 1851 if (type != S_IFDIR) { /* It's UNLINK */ 1852#ifdef MSNFS 1853 if ((fhp->fh_export->ex_flags & NFSEXP_MSNFS) && 1854 (atomic_read(&rdentry->d_count) > 1)) { 1855 host_err = -EPERM; 1856 } else 1857#endif 1858 host_err = vfs_unlink(dirp, rdentry); 1859 } else { /* It's RMDIR */ 1860 host_err = vfs_rmdir(dirp, rdentry); 1861 } 1862 1863 dput(rdentry); 1864 1865 if (!host_err) 1866 host_err = commit_metadata(fhp); 1867 1868 mnt_drop_write(fhp->fh_export->ex_path.mnt); 1869out_nfserr: 1870 err = nfserrno(host_err); 1871out: 1872 return err; 1873} 1874 1875/* 1876 * We do this buffering because we must not call back into the file 1877 * system's ->lookup() method from the filldir callback. That may well 1878 * deadlock a number of file systems. 1879 * 1880 * This is based heavily on the implementation of same in XFS. 1881 */ 1882struct buffered_dirent { 1883 u64 ino; 1884 loff_t offset; 1885 int namlen; 1886 unsigned int d_type; 1887 char name[]; 1888}; 1889 1890struct readdir_data { 1891 char *dirent; 1892 size_t used; 1893 int full; 1894}; 1895 1896static int nfsd_buffered_filldir(void *__buf, const char *name, int namlen, 1897 loff_t offset, u64 ino, unsigned int d_type) 1898{ 1899 struct readdir_data *buf = __buf; 1900 struct buffered_dirent *de = (void *)(buf->dirent + buf->used); 1901 unsigned int reclen; 1902 1903 reclen = ALIGN(sizeof(struct buffered_dirent) + namlen, sizeof(u64)); 1904 if (buf->used + reclen > PAGE_SIZE) { 1905 buf->full = 1; 1906 return -EINVAL; 1907 } 1908 1909 de->namlen = namlen; 1910 de->offset = offset; 1911 de->ino = ino; 1912 de->d_type = d_type; 1913 memcpy(de->name, name, namlen); 1914 buf->used += reclen; 1915 1916 return 0; 1917} 1918 1919static __be32 nfsd_buffered_readdir(struct file *file, filldir_t func, 1920 struct readdir_cd *cdp, loff_t *offsetp) 1921{ 1922 struct readdir_data buf; 1923 struct buffered_dirent *de; 1924 int host_err; 1925 int size; 1926 loff_t offset; 1927 1928 buf.dirent = (void *)__get_free_page(GFP_KERNEL); 1929 if (!buf.dirent) 1930 return nfserrno(-ENOMEM); 1931 1932 offset = *offsetp; 1933 1934 while (1) { 1935 struct inode *dir_inode = file->f_path.dentry->d_inode; 1936 unsigned int reclen; 1937 1938 cdp->err = nfserr_eof; /* will be cleared on successful read */ 1939 buf.used = 0; 1940 buf.full = 0; 1941 1942 host_err = vfs_readdir(file, nfsd_buffered_filldir, &buf); 1943 if (buf.full) 1944 host_err = 0; 1945 1946 if (host_err < 0) 1947 break; 1948 1949 size = buf.used; 1950 1951 if (!size) 1952 break; 1953 1954 /* 1955 * Various filldir functions may end up calling back into 1956 * lookup_one_len() and the file system's ->lookup() method. 1957 * These expect i_mutex to be held, as it would within readdir. 1958 */ 1959 host_err = mutex_lock_killable(&dir_inode->i_mutex); 1960 if (host_err) 1961 break; 1962 1963 de = (struct buffered_dirent *)buf.dirent; 1964 while (size > 0) { 1965 offset = de->offset; 1966 1967 if (func(cdp, de->name, de->namlen, de->offset, 1968 de->ino, de->d_type)) 1969 break; 1970 1971 if (cdp->err != nfs_ok) 1972 break; 1973 1974 reclen = ALIGN(sizeof(*de) + de->namlen, 1975 sizeof(u64)); 1976 size -= reclen; 1977 de = (struct buffered_dirent *)((char *)de + reclen); 1978 } 1979 mutex_unlock(&dir_inode->i_mutex); 1980 if (size > 0) /* We bailed out early */ 1981 break; 1982 1983 offset = vfs_llseek(file, 0, SEEK_CUR); 1984 } 1985 1986 free_page((unsigned long)(buf.dirent)); 1987 1988 if (host_err) 1989 return nfserrno(host_err); 1990 1991 *offsetp = offset; 1992 return cdp->err; 1993} 1994 1995/* 1996 * Read entries from a directory. 1997 * The NFSv3/4 verifier we ignore for now. 1998 */ 1999__be32 2000nfsd_readdir(struct svc_rqst *rqstp, struct svc_fh *fhp, loff_t *offsetp, 2001 struct readdir_cd *cdp, filldir_t func) 2002{ 2003 __be32 err; 2004 struct file *file; 2005 loff_t offset = *offsetp; 2006 2007 err = nfsd_open(rqstp, fhp, S_IFDIR, NFSD_MAY_READ, &file); 2008 if (err) 2009 goto out; 2010 2011 offset = vfs_llseek(file, offset, 0); 2012 if (offset < 0) { 2013 err = nfserrno((int)offset); 2014 goto out_close; 2015 } 2016 2017 err = nfsd_buffered_readdir(file, func, cdp, offsetp); 2018 2019 if (err == nfserr_eof || err == nfserr_toosmall) 2020 err = nfs_ok; /* can still be found in ->err */ 2021out_close: 2022 nfsd_close(file); 2023out: 2024 return err; 2025} 2026 2027/* 2028 * Get file system stats 2029 * N.B. After this call fhp needs an fh_put 2030 */ 2031__be32 2032nfsd_statfs(struct svc_rqst *rqstp, struct svc_fh *fhp, struct kstatfs *stat, int access) 2033{ 2034 __be32 err; 2035 2036 err = fh_verify(rqstp, fhp, 0, NFSD_MAY_NOP | access); 2037 if (!err) { 2038 struct path path = { 2039 .mnt = fhp->fh_export->ex_path.mnt, 2040 .dentry = fhp->fh_dentry, 2041 }; 2042 if (vfs_statfs(&path, stat)) 2043 err = nfserr_io; 2044 } 2045 return err; 2046} 2047 2048static int exp_rdonly(struct svc_rqst *rqstp, struct svc_export *exp) 2049{ 2050 return nfsexp_flags(rqstp, exp) & NFSEXP_READONLY; 2051} 2052 2053/* 2054 * Check for a user's access permissions to this inode. 2055 */ 2056__be32 2057nfsd_permission(struct svc_rqst *rqstp, struct svc_export *exp, 2058 struct dentry *dentry, int acc) 2059{ 2060 struct inode *inode = dentry->d_inode; 2061 int err; 2062 2063 if (acc == NFSD_MAY_NOP) 2064 return 0; 2065 2066 /* Normally we reject any write/sattr etc access on a read-only file 2067 * system. But if it is IRIX doing check on write-access for a 2068 * device special file, we ignore rofs. 2069 */ 2070 if (!(acc & NFSD_MAY_LOCAL_ACCESS)) 2071 if (acc & (NFSD_MAY_WRITE | NFSD_MAY_SATTR | NFSD_MAY_TRUNC)) { 2072 if (exp_rdonly(rqstp, exp) || 2073 __mnt_is_readonly(exp->ex_path.mnt)) 2074 return nfserr_rofs; 2075 if (/* (acc & NFSD_MAY_WRITE) && */ IS_IMMUTABLE(inode)) 2076 return nfserr_perm; 2077 } 2078 if ((acc & NFSD_MAY_TRUNC) && IS_APPEND(inode)) 2079 return nfserr_perm; 2080 2081 if (acc & NFSD_MAY_LOCK) { 2082 /* If we cannot rely on authentication in NLM requests, 2083 * just allow locks, otherwise require read permission, or 2084 * ownership 2085 */ 2086 if (exp->ex_flags & NFSEXP_NOAUTHNLM) 2087 return 0; 2088 else 2089 acc = NFSD_MAY_READ | NFSD_MAY_OWNER_OVERRIDE; 2090 } 2091 /* 2092 * The file owner always gets access permission for accesses that 2093 * would normally be checked at open time. This is to make 2094 * file access work even when the client has done a fchmod(fd, 0). 2095 * 2096 * However, `cp foo bar' should fail nevertheless when bar is 2097 * readonly. A sensible way to do this might be to reject all 2098 * attempts to truncate a read-only file, because a creat() call 2099 * always implies file truncation. 2100 * ... but this isn't really fair. A process may reasonably call 2101 * ftruncate on an open file descriptor on a file with perm 000. 2102 * We must trust the client to do permission checking - using "ACCESS" 2103 * with NFSv3. 2104 */ 2105 if ((acc & NFSD_MAY_OWNER_OVERRIDE) && 2106 inode->i_uid == current_fsuid()) 2107 return 0; 2108 2109 /* This assumes NFSD_MAY_{READ,WRITE,EXEC} == MAY_{READ,WRITE,EXEC} */ 2110 err = inode_permission(inode, acc & (MAY_READ|MAY_WRITE|MAY_EXEC)); 2111 2112 /* Allow read access to binaries even when mode 111 */ 2113 if (err == -EACCES && S_ISREG(inode->i_mode) && 2114 acc == (NFSD_MAY_READ | NFSD_MAY_OWNER_OVERRIDE)) 2115 err = inode_permission(inode, MAY_EXEC); 2116 2117 return err? nfserrno(err) : 0; 2118} 2119 2120void 2121nfsd_racache_shutdown(void) 2122{ 2123 struct raparms *raparm, *last_raparm; 2124 unsigned int i; 2125 2126 dprintk("nfsd: freeing readahead buffers.\n"); 2127 2128 for (i = 0; i < RAPARM_HASH_SIZE; i++) { 2129 raparm = raparm_hash[i].pb_head; 2130 while(raparm) { 2131 last_raparm = raparm; 2132 raparm = raparm->p_next; 2133 kfree(last_raparm); 2134 } 2135 raparm_hash[i].pb_head = NULL; 2136 } 2137} 2138/* 2139 * Initialize readahead param cache 2140 */ 2141int 2142nfsd_racache_init(int cache_size) 2143{ 2144 int i; 2145 int j = 0; 2146 int nperbucket; 2147 struct raparms **raparm = NULL; 2148 2149 2150 if (raparm_hash[0].pb_head) 2151 return 0; 2152 nperbucket = DIV_ROUND_UP(cache_size, RAPARM_HASH_SIZE); 2153 if (nperbucket < 2) 2154 nperbucket = 2; 2155 cache_size = nperbucket * RAPARM_HASH_SIZE; 2156 2157 dprintk("nfsd: allocating %d readahead buffers.\n", cache_size); 2158 2159 for (i = 0; i < RAPARM_HASH_SIZE; i++) { 2160 spin_lock_init(&raparm_hash[i].pb_lock); 2161 2162 raparm = &raparm_hash[i].pb_head; 2163 for (j = 0; j < nperbucket; j++) { 2164 *raparm = kzalloc(sizeof(struct raparms), GFP_KERNEL); 2165 if (!*raparm) 2166 goto out_nomem; 2167 raparm = &(*raparm)->p_next; 2168 } 2169 *raparm = NULL; 2170 } 2171 2172 nfsdstats.ra_size = cache_size; 2173 return 0; 2174 2175out_nomem: 2176 dprintk("nfsd: kmalloc failed, freeing readahead buffers\n"); 2177 nfsd_racache_shutdown(); 2178 return -ENOMEM; 2179} 2180 2181#if defined(CONFIG_NFSD_V2_ACL) || defined(CONFIG_NFSD_V3_ACL) 2182struct posix_acl * 2183nfsd_get_posix_acl(struct svc_fh *fhp, int type) 2184{ 2185 struct inode *inode = fhp->fh_dentry->d_inode; 2186 char *name; 2187 void *value = NULL; 2188 ssize_t size; 2189 struct posix_acl *acl; 2190 2191 if (!IS_POSIXACL(inode)) 2192 return ERR_PTR(-EOPNOTSUPP); 2193 2194 switch (type) { 2195 case ACL_TYPE_ACCESS: 2196 name = POSIX_ACL_XATTR_ACCESS; 2197 break; 2198 case ACL_TYPE_DEFAULT: 2199 name = POSIX_ACL_XATTR_DEFAULT; 2200 break; 2201 default: 2202 return ERR_PTR(-EOPNOTSUPP); 2203 } 2204 2205 size = nfsd_getxattr(fhp->fh_dentry, name, &value); 2206 if (size < 0) 2207 return ERR_PTR(size); 2208 2209 acl = posix_acl_from_xattr(value, size); 2210 kfree(value); 2211 return acl; 2212} 2213 2214int 2215nfsd_set_posix_acl(struct svc_fh *fhp, int type, struct posix_acl *acl) 2216{ 2217 struct inode *inode = fhp->fh_dentry->d_inode; 2218 char *name; 2219 void *value = NULL; 2220 size_t size; 2221 int error; 2222 2223 if (!IS_POSIXACL(inode) || 2224 !inode->i_op->setxattr || !inode->i_op->removexattr) 2225 return -EOPNOTSUPP; 2226 switch(type) { 2227 case ACL_TYPE_ACCESS: 2228 name = POSIX_ACL_XATTR_ACCESS; 2229 break; 2230 case ACL_TYPE_DEFAULT: 2231 name = POSIX_ACL_XATTR_DEFAULT; 2232 break; 2233 default: 2234 return -EOPNOTSUPP; 2235 } 2236 2237 if (acl && acl->a_count) { 2238 size = posix_acl_xattr_size(acl->a_count); 2239 value = kmalloc(size, GFP_KERNEL); 2240 if (!value) 2241 return -ENOMEM; 2242 error = posix_acl_to_xattr(acl, value, size); 2243 if (error < 0) 2244 goto getout; 2245 size = error; 2246 } else 2247 size = 0; 2248 2249 error = mnt_want_write(fhp->fh_export->ex_path.mnt); 2250 if (error) 2251 goto getout; 2252 if (size) 2253 error = vfs_setxattr(fhp->fh_dentry, name, value, size, 0); 2254 else { 2255 if (!S_ISDIR(inode->i_mode) && type == ACL_TYPE_DEFAULT) 2256 error = 0; 2257 else { 2258 error = vfs_removexattr(fhp->fh_dentry, name); 2259 if (error == -ENODATA) 2260 error = 0; 2261 } 2262 } 2263 mnt_drop_write(fhp->fh_export->ex_path.mnt); 2264 2265getout: 2266 kfree(value); 2267 return error; 2268} 2269#endif /* defined(CONFIG_NFSD_V2_ACL) || defined(CONFIG_NFSD_V3_ACL) */ 2270