1/* 2 * Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved. 3 * Copyright (C) 2004-2006 Red Hat, Inc. All rights reserved. 4 * 5 * This copyrighted material is made available to anyone wishing to use, 6 * modify, copy, or redistribute it subject to the terms and conditions 7 * of the GNU General Public License version 2. 8 */ 9 10#include <linux/slab.h> 11#include <linux/spinlock.h> 12#include <linux/completion.h> 13#include <linux/buffer_head.h> 14#include <linux/pagemap.h> 15#include <linux/uio.h> 16#include <linux/blkdev.h> 17#include <linux/mm.h> 18#include <linux/fs.h> 19#include <linux/gfs2_ondisk.h> 20#include <linux/ext2_fs.h> 21#include <linux/crc32.h> 22#include <linux/lm_interface.h> 23#include <linux/writeback.h> 24#include <asm/uaccess.h> 25 26#include "gfs2.h" 27#include "incore.h" 28#include "bmap.h" 29#include "dir.h" 30#include "glock.h" 31#include "glops.h" 32#include "inode.h" 33#include "lm.h" 34#include "log.h" 35#include "meta_io.h" 36#include "ops_file.h" 37#include "ops_vm.h" 38#include "quota.h" 39#include "rgrp.h" 40#include "trans.h" 41#include "util.h" 42#include "eaops.h" 43 44/* 45 * Most fields left uninitialised to catch anybody who tries to 46 * use them. f_flags set to prevent file_accessed() from touching 47 * any other part of this. Its use is purely as a flag so that we 48 * know (in readpage()) whether or not do to locking. 49 */ 50struct file gfs2_internal_file_sentinel = { 51 .f_flags = O_NOATIME|O_RDONLY, 52}; 53 54static int gfs2_read_actor(read_descriptor_t *desc, struct page *page, 55 unsigned long offset, unsigned long size) 56{ 57 char *kaddr; 58 unsigned long count = desc->count; 59 60 if (size > count) 61 size = count; 62 63 kaddr = kmap(page); 64 memcpy(desc->arg.data, kaddr + offset, size); 65 kunmap(page); 66 67 desc->count = count - size; 68 desc->written += size; 69 desc->arg.buf += size; 70 return size; 71} 72 73int gfs2_internal_read(struct gfs2_inode *ip, struct file_ra_state *ra_state, 74 char *buf, loff_t *pos, unsigned size) 75{ 76 struct inode *inode = &ip->i_inode; 77 read_descriptor_t desc; 78 desc.written = 0; 79 desc.arg.data = buf; 80 desc.count = size; 81 desc.error = 0; 82 do_generic_mapping_read(inode->i_mapping, ra_state, 83 &gfs2_internal_file_sentinel, pos, &desc, 84 gfs2_read_actor); 85 return desc.written ? desc.written : desc.error; 86} 87 88/** 89 * gfs2_llseek - seek to a location in a file 90 * @file: the file 91 * @offset: the offset 92 * @origin: Where to seek from (SEEK_SET, SEEK_CUR, or SEEK_END) 93 * 94 * SEEK_END requires the glock for the file because it references the 95 * file's size. 96 * 97 * Returns: The new offset, or errno 98 */ 99 100static loff_t gfs2_llseek(struct file *file, loff_t offset, int origin) 101{ 102 struct gfs2_inode *ip = GFS2_I(file->f_mapping->host); 103 struct gfs2_holder i_gh; 104 loff_t error; 105 106 if (origin == 2) { 107 error = gfs2_glock_nq_init(ip->i_gl, LM_ST_SHARED, LM_FLAG_ANY, 108 &i_gh); 109 if (!error) { 110 error = remote_llseek(file, offset, origin); 111 gfs2_glock_dq_uninit(&i_gh); 112 } 113 } else 114 error = remote_llseek(file, offset, origin); 115 116 return error; 117} 118 119/** 120 * gfs2_readdir - Read directory entries from a directory 121 * @file: The directory to read from 122 * @dirent: Buffer for dirents 123 * @filldir: Function used to do the copying 124 * 125 * Returns: errno 126 */ 127 128static int gfs2_readdir(struct file *file, void *dirent, filldir_t filldir) 129{ 130 struct inode *dir = file->f_mapping->host; 131 struct gfs2_inode *dip = GFS2_I(dir); 132 struct gfs2_holder d_gh; 133 u64 offset = file->f_pos; 134 int error; 135 136 gfs2_holder_init(dip->i_gl, LM_ST_SHARED, GL_ATIME, &d_gh); 137 error = gfs2_glock_nq_atime(&d_gh); 138 if (error) { 139 gfs2_holder_uninit(&d_gh); 140 return error; 141 } 142 143 error = gfs2_dir_read(dir, &offset, dirent, filldir); 144 145 gfs2_glock_dq_uninit(&d_gh); 146 147 file->f_pos = offset; 148 149 return error; 150} 151 152/** 153 * fsflags_cvt 154 * @table: A table of 32 u32 flags 155 * @val: a 32 bit value to convert 156 * 157 * This function can be used to convert between fsflags values and 158 * GFS2's own flags values. 159 * 160 * Returns: the converted flags 161 */ 162static u32 fsflags_cvt(const u32 *table, u32 val) 163{ 164 u32 res = 0; 165 while(val) { 166 if (val & 1) 167 res |= *table; 168 table++; 169 val >>= 1; 170 } 171 return res; 172} 173 174static const u32 fsflags_to_gfs2[32] = { 175 [3] = GFS2_DIF_SYNC, 176 [4] = GFS2_DIF_IMMUTABLE, 177 [5] = GFS2_DIF_APPENDONLY, 178 [7] = GFS2_DIF_NOATIME, 179 [12] = GFS2_DIF_EXHASH, 180 [14] = GFS2_DIF_JDATA, 181 [20] = GFS2_DIF_DIRECTIO, 182}; 183 184static const u32 gfs2_to_fsflags[32] = { 185 [gfs2fl_Sync] = FS_SYNC_FL, 186 [gfs2fl_Immutable] = FS_IMMUTABLE_FL, 187 [gfs2fl_AppendOnly] = FS_APPEND_FL, 188 [gfs2fl_NoAtime] = FS_NOATIME_FL, 189 [gfs2fl_ExHash] = FS_INDEX_FL, 190 [gfs2fl_Jdata] = FS_JOURNAL_DATA_FL, 191 [gfs2fl_Directio] = FS_DIRECTIO_FL, 192 [gfs2fl_InheritDirectio] = FS_DIRECTIO_FL, 193 [gfs2fl_InheritJdata] = FS_JOURNAL_DATA_FL, 194}; 195 196static int gfs2_get_flags(struct file *filp, u32 __user *ptr) 197{ 198 struct inode *inode = filp->f_path.dentry->d_inode; 199 struct gfs2_inode *ip = GFS2_I(inode); 200 struct gfs2_holder gh; 201 int error; 202 u32 fsflags; 203 204 gfs2_holder_init(ip->i_gl, LM_ST_SHARED, GL_ATIME, &gh); 205 error = gfs2_glock_nq_atime(&gh); 206 if (error) 207 return error; 208 209 fsflags = fsflags_cvt(gfs2_to_fsflags, ip->i_di.di_flags); 210 if (put_user(fsflags, ptr)) 211 error = -EFAULT; 212 213 gfs2_glock_dq_m(1, &gh); 214 gfs2_holder_uninit(&gh); 215 return error; 216} 217 218void gfs2_set_inode_flags(struct inode *inode) 219{ 220 struct gfs2_inode *ip = GFS2_I(inode); 221 struct gfs2_dinode_host *di = &ip->i_di; 222 unsigned int flags = inode->i_flags; 223 224 flags &= ~(S_SYNC|S_APPEND|S_IMMUTABLE|S_NOATIME|S_DIRSYNC); 225 if (di->di_flags & GFS2_DIF_IMMUTABLE) 226 flags |= S_IMMUTABLE; 227 if (di->di_flags & GFS2_DIF_APPENDONLY) 228 flags |= S_APPEND; 229 if (di->di_flags & GFS2_DIF_NOATIME) 230 flags |= S_NOATIME; 231 if (di->di_flags & GFS2_DIF_SYNC) 232 flags |= S_SYNC; 233 inode->i_flags = flags; 234} 235 236/* Flags that can be set by user space */ 237#define GFS2_FLAGS_USER_SET (GFS2_DIF_JDATA| \ 238 GFS2_DIF_DIRECTIO| \ 239 GFS2_DIF_IMMUTABLE| \ 240 GFS2_DIF_APPENDONLY| \ 241 GFS2_DIF_NOATIME| \ 242 GFS2_DIF_SYNC| \ 243 GFS2_DIF_SYSTEM| \ 244 GFS2_DIF_INHERIT_DIRECTIO| \ 245 GFS2_DIF_INHERIT_JDATA) 246 247/** 248 * gfs2_set_flags - set flags on an inode 249 * @inode: The inode 250 * @flags: The flags to set 251 * @mask: Indicates which flags are valid 252 * 253 */ 254static int do_gfs2_set_flags(struct file *filp, u32 reqflags, u32 mask) 255{ 256 struct inode *inode = filp->f_path.dentry->d_inode; 257 struct gfs2_inode *ip = GFS2_I(inode); 258 struct gfs2_sbd *sdp = GFS2_SB(inode); 259 struct buffer_head *bh; 260 struct gfs2_holder gh; 261 int error; 262 u32 new_flags, flags; 263 264 error = gfs2_glock_nq_init(ip->i_gl, LM_ST_EXCLUSIVE, 0, &gh); 265 if (error) 266 return error; 267 268 flags = ip->i_di.di_flags; 269 new_flags = (flags & ~mask) | (reqflags & mask); 270 if ((new_flags ^ flags) == 0) 271 goto out; 272 273 if (S_ISDIR(inode->i_mode)) { 274 if ((new_flags ^ flags) & GFS2_DIF_JDATA) 275 new_flags ^= (GFS2_DIF_JDATA|GFS2_DIF_INHERIT_JDATA); 276 if ((new_flags ^ flags) & GFS2_DIF_DIRECTIO) 277 new_flags ^= (GFS2_DIF_DIRECTIO|GFS2_DIF_INHERIT_DIRECTIO); 278 } 279 280 error = -EINVAL; 281 if ((new_flags ^ flags) & ~GFS2_FLAGS_USER_SET) 282 goto out; 283 284 error = -EPERM; 285 if (IS_IMMUTABLE(inode) && (new_flags & GFS2_DIF_IMMUTABLE)) 286 goto out; 287 if (IS_APPEND(inode) && (new_flags & GFS2_DIF_APPENDONLY)) 288 goto out; 289 if (((new_flags ^ flags) & GFS2_DIF_IMMUTABLE) && 290 !capable(CAP_LINUX_IMMUTABLE)) 291 goto out; 292 if (!IS_IMMUTABLE(inode)) { 293 error = permission(inode, MAY_WRITE, NULL); 294 if (error) 295 goto out; 296 } 297 298 error = gfs2_trans_begin(sdp, RES_DINODE, 0); 299 if (error) 300 goto out; 301 error = gfs2_meta_inode_buffer(ip, &bh); 302 if (error) 303 goto out_trans_end; 304 gfs2_trans_add_bh(ip->i_gl, bh, 1); 305 ip->i_di.di_flags = new_flags; 306 gfs2_dinode_out(ip, bh->b_data); 307 brelse(bh); 308 gfs2_set_inode_flags(inode); 309out_trans_end: 310 gfs2_trans_end(sdp); 311out: 312 gfs2_glock_dq_uninit(&gh); 313 return error; 314} 315 316static int gfs2_set_flags(struct file *filp, u32 __user *ptr) 317{ 318 u32 fsflags, gfsflags; 319 if (get_user(fsflags, ptr)) 320 return -EFAULT; 321 gfsflags = fsflags_cvt(fsflags_to_gfs2, fsflags); 322 return do_gfs2_set_flags(filp, gfsflags, ~0); 323} 324 325static long gfs2_ioctl(struct file *filp, unsigned int cmd, unsigned long arg) 326{ 327 switch(cmd) { 328 case FS_IOC_GETFLAGS: 329 return gfs2_get_flags(filp, (u32 __user *)arg); 330 case FS_IOC_SETFLAGS: 331 return gfs2_set_flags(filp, (u32 __user *)arg); 332 } 333 return -ENOTTY; 334} 335 336 337/** 338 * gfs2_mmap - 339 * @file: The file to map 340 * @vma: The VMA which described the mapping 341 * 342 * Returns: 0 or error code 343 */ 344 345static int gfs2_mmap(struct file *file, struct vm_area_struct *vma) 346{ 347 struct gfs2_inode *ip = GFS2_I(file->f_mapping->host); 348 struct gfs2_holder i_gh; 349 int error; 350 351 gfs2_holder_init(ip->i_gl, LM_ST_SHARED, GL_ATIME, &i_gh); 352 error = gfs2_glock_nq_atime(&i_gh); 353 if (error) { 354 gfs2_holder_uninit(&i_gh); 355 return error; 356 } 357 358 /* This is VM_MAYWRITE instead of VM_WRITE because a call 359 to mprotect() can turn on VM_WRITE later. */ 360 361 if ((vma->vm_flags & (VM_MAYSHARE | VM_MAYWRITE)) == 362 (VM_MAYSHARE | VM_MAYWRITE)) 363 vma->vm_ops = &gfs2_vm_ops_sharewrite; 364 else 365 vma->vm_ops = &gfs2_vm_ops_private; 366 367 gfs2_glock_dq_uninit(&i_gh); 368 369 return error; 370} 371 372/** 373 * gfs2_open - open a file 374 * @inode: the inode to open 375 * @file: the struct file for this opening 376 * 377 * Returns: errno 378 */ 379 380static int gfs2_open(struct inode *inode, struct file *file) 381{ 382 struct gfs2_inode *ip = GFS2_I(inode); 383 struct gfs2_holder i_gh; 384 struct gfs2_file *fp; 385 int error; 386 387 fp = kzalloc(sizeof(struct gfs2_file), GFP_KERNEL); 388 if (!fp) 389 return -ENOMEM; 390 391 mutex_init(&fp->f_fl_mutex); 392 393 gfs2_assert_warn(GFS2_SB(inode), !file->private_data); 394 file->private_data = fp; 395 396 if (S_ISREG(ip->i_inode.i_mode)) { 397 error = gfs2_glock_nq_init(ip->i_gl, LM_ST_SHARED, LM_FLAG_ANY, 398 &i_gh); 399 if (error) 400 goto fail; 401 402 if (!(file->f_flags & O_LARGEFILE) && 403 ip->i_di.di_size > MAX_NON_LFS) { 404 error = -EFBIG; 405 goto fail_gunlock; 406 } 407 408 /* Listen to the Direct I/O flag */ 409 410 if (ip->i_di.di_flags & GFS2_DIF_DIRECTIO) 411 file->f_flags |= O_DIRECT; 412 413 gfs2_glock_dq_uninit(&i_gh); 414 } 415 416 return 0; 417 418fail_gunlock: 419 gfs2_glock_dq_uninit(&i_gh); 420fail: 421 file->private_data = NULL; 422 kfree(fp); 423 return error; 424} 425 426/** 427 * gfs2_close - called to close a struct file 428 * @inode: the inode the struct file belongs to 429 * @file: the struct file being closed 430 * 431 * Returns: errno 432 */ 433 434static int gfs2_close(struct inode *inode, struct file *file) 435{ 436 struct gfs2_sbd *sdp = inode->i_sb->s_fs_info; 437 struct gfs2_file *fp; 438 439 fp = file->private_data; 440 file->private_data = NULL; 441 442 if (gfs2_assert_warn(sdp, fp)) 443 return -EIO; 444 445 kfree(fp); 446 447 return 0; 448} 449 450/** 451 * gfs2_fsync - sync the dirty data for a file (across the cluster) 452 * @file: the file that points to the dentry (we ignore this) 453 * @dentry: the dentry that points to the inode to sync 454 * 455 * The VFS will flush "normal" data for us. We only need to worry 456 * about metadata here. For journaled data, we just do a log flush 457 * as we can't avoid it. Otherwise we can just bale out if datasync 458 * is set. For stuffed inodes we must flush the log in order to 459 * ensure that all data is on disk. 460 * 461 * The call to write_inode_now() is there to write back metadata and 462 * the inode itself. It does also try and write the data, but thats 463 * (hopefully) a no-op due to the VFS having already called filemap_fdatawrite() 464 * for us. 465 * 466 * Returns: errno 467 */ 468 469static int gfs2_fsync(struct file *file, struct dentry *dentry, int datasync) 470{ 471 struct inode *inode = dentry->d_inode; 472 int sync_state = inode->i_state & (I_DIRTY_SYNC|I_DIRTY_DATASYNC); 473 int ret = 0; 474 475 if (gfs2_is_jdata(GFS2_I(inode))) { 476 gfs2_log_flush(GFS2_SB(inode), GFS2_I(inode)->i_gl); 477 return 0; 478 } 479 480 if (sync_state != 0) { 481 if (!datasync) 482 ret = write_inode_now(inode, 0); 483 484 if (gfs2_is_stuffed(GFS2_I(inode))) 485 gfs2_log_flush(GFS2_SB(inode), GFS2_I(inode)->i_gl); 486 } 487 488 return ret; 489} 490 491/** 492 * gfs2_lock - acquire/release a posix lock on a file 493 * @file: the file pointer 494 * @cmd: either modify or retrieve lock state, possibly wait 495 * @fl: type and range of lock 496 * 497 * Returns: errno 498 */ 499 500static int gfs2_lock(struct file *file, int cmd, struct file_lock *fl) 501{ 502 struct gfs2_inode *ip = GFS2_I(file->f_mapping->host); 503 struct gfs2_sbd *sdp = GFS2_SB(file->f_mapping->host); 504 struct lm_lockname name = 505 { .ln_number = ip->i_num.no_addr, 506 .ln_type = LM_TYPE_PLOCK }; 507 508 if (!(fl->fl_flags & FL_POSIX)) 509 return -ENOLCK; 510 if ((ip->i_inode.i_mode & (S_ISGID | S_IXGRP)) == S_ISGID) 511 return -ENOLCK; 512 513 if (sdp->sd_args.ar_localflocks) { 514 if (IS_GETLK(cmd)) { 515 posix_test_lock(file, fl); 516 return 0; 517 } else { 518 return posix_lock_file_wait(file, fl); 519 } 520 } 521 522 if (cmd == F_CANCELLK) { 523 /* Hack: */ 524 cmd = F_SETLK; 525 fl->fl_type = F_UNLCK; 526 } 527 if (IS_GETLK(cmd)) 528 return gfs2_lm_plock_get(sdp, &name, file, fl); 529 else if (fl->fl_type == F_UNLCK) 530 return gfs2_lm_punlock(sdp, &name, file, fl); 531 else 532 return gfs2_lm_plock(sdp, &name, file, cmd, fl); 533} 534 535static int do_flock(struct file *file, int cmd, struct file_lock *fl) 536{ 537 struct gfs2_file *fp = file->private_data; 538 struct gfs2_holder *fl_gh = &fp->f_fl_gh; 539 struct gfs2_inode *ip = GFS2_I(file->f_path.dentry->d_inode); 540 struct gfs2_glock *gl; 541 unsigned int state; 542 int flags; 543 int error = 0; 544 545 state = (fl->fl_type == F_WRLCK) ? LM_ST_EXCLUSIVE : LM_ST_SHARED; 546 flags = (IS_SETLKW(cmd) ? 0 : LM_FLAG_TRY) | GL_EXACT | GL_NOCACHE; 547 548 mutex_lock(&fp->f_fl_mutex); 549 550 gl = fl_gh->gh_gl; 551 if (gl) { 552 if (fl_gh->gh_state == state) 553 goto out; 554 gfs2_glock_hold(gl); 555 flock_lock_file_wait(file, 556 &(struct file_lock){.fl_type = F_UNLCK}); 557 gfs2_glock_dq_uninit(fl_gh); 558 } else { 559 error = gfs2_glock_get(GFS2_SB(&ip->i_inode), 560 ip->i_num.no_addr, &gfs2_flock_glops, 561 CREATE, &gl); 562 if (error) 563 goto out; 564 } 565 566 gfs2_holder_init(gl, state, flags, fl_gh); 567 gfs2_glock_put(gl); 568 569 error = gfs2_glock_nq(fl_gh); 570 if (error) { 571 gfs2_holder_uninit(fl_gh); 572 if (error == GLR_TRYFAILED) 573 error = -EAGAIN; 574 } else { 575 error = flock_lock_file_wait(file, fl); 576 gfs2_assert_warn(GFS2_SB(&ip->i_inode), !error); 577 } 578 579out: 580 mutex_unlock(&fp->f_fl_mutex); 581 return error; 582} 583 584static void do_unflock(struct file *file, struct file_lock *fl) 585{ 586 struct gfs2_file *fp = file->private_data; 587 struct gfs2_holder *fl_gh = &fp->f_fl_gh; 588 589 mutex_lock(&fp->f_fl_mutex); 590 flock_lock_file_wait(file, fl); 591 if (fl_gh->gh_gl) 592 gfs2_glock_dq_uninit(fl_gh); 593 mutex_unlock(&fp->f_fl_mutex); 594} 595 596/** 597 * gfs2_flock - acquire/release a flock lock on a file 598 * @file: the file pointer 599 * @cmd: either modify or retrieve lock state, possibly wait 600 * @fl: type and range of lock 601 * 602 * Returns: errno 603 */ 604 605static int gfs2_flock(struct file *file, int cmd, struct file_lock *fl) 606{ 607 struct gfs2_inode *ip = GFS2_I(file->f_mapping->host); 608 struct gfs2_sbd *sdp = GFS2_SB(file->f_mapping->host); 609 610 if (!(fl->fl_flags & FL_FLOCK)) 611 return -ENOLCK; 612 if ((ip->i_inode.i_mode & (S_ISGID | S_IXGRP)) == S_ISGID) 613 return -ENOLCK; 614 615 if (sdp->sd_args.ar_localflocks) 616 return flock_lock_file_wait(file, fl); 617 618 if (fl->fl_type == F_UNLCK) { 619 do_unflock(file, fl); 620 return 0; 621 } else { 622 return do_flock(file, cmd, fl); 623 } 624} 625 626const struct file_operations gfs2_file_fops = { 627 .llseek = gfs2_llseek, 628 .read = do_sync_read, 629 .aio_read = generic_file_aio_read, 630 .write = do_sync_write, 631 .aio_write = generic_file_aio_write, 632 .unlocked_ioctl = gfs2_ioctl, 633 .mmap = gfs2_mmap, 634 .open = gfs2_open, 635 .release = gfs2_close, 636 .fsync = gfs2_fsync, 637 .lock = gfs2_lock, 638 .sendfile = generic_file_sendfile, 639 .flock = gfs2_flock, 640 .splice_read = generic_file_splice_read, 641 .splice_write = generic_file_splice_write, 642}; 643 644const struct file_operations gfs2_dir_fops = { 645 .readdir = gfs2_readdir, 646 .unlocked_ioctl = gfs2_ioctl, 647 .open = gfs2_open, 648 .release = gfs2_close, 649 .fsync = gfs2_fsync, 650 .lock = gfs2_lock, 651 .flock = gfs2_flock, 652}; 653