1/* 2 * Copyright 2000 by Hans Reiser, licensing governed by reiserfs/README 3 */ 4 5#include <linux/time.h> 6#include <linux/fs.h> 7#include <linux/reiserfs_fs.h> 8#include <linux/reiserfs_acl.h> 9#include <linux/reiserfs_xattr.h> 10#include <linux/smp_lock.h> 11#include <linux/pagemap.h> 12#include <linux/highmem.h> 13#include <asm/uaccess.h> 14#include <asm/unaligned.h> 15#include <linux/buffer_head.h> 16#include <linux/mpage.h> 17#include <linux/writeback.h> 18#include <linux/quotaops.h> 19 20static int reiserfs_commit_write(struct file *f, struct page *page, 21 unsigned from, unsigned to); 22static int reiserfs_prepare_write(struct file *f, struct page *page, 23 unsigned from, unsigned to); 24 25void reiserfs_delete_inode(struct inode *inode) 26{ 27 /* We need blocks for transaction + (user+group) quota update (possibly delete) */ 28 int jbegin_count = 29 JOURNAL_PER_BALANCE_CNT * 2 + 30 2 * REISERFS_QUOTA_INIT_BLOCKS(inode->i_sb); 31 struct reiserfs_transaction_handle th; 32 int err; 33 34 truncate_inode_pages(&inode->i_data, 0); 35 36 reiserfs_write_lock(inode->i_sb); 37 38 /* The = 0 happens when we abort creating a new inode for some reason like lack of space.. */ 39 if (!(inode->i_state & I_NEW) && INODE_PKEY(inode)->k_objectid != 0) { /* also handles bad_inode case */ 40 reiserfs_delete_xattrs(inode); 41 42 if (journal_begin(&th, inode->i_sb, jbegin_count)) 43 goto out; 44 reiserfs_update_inode_transaction(inode); 45 46 err = reiserfs_delete_object(&th, inode); 47 48 /* Do quota update inside a transaction for journaled quotas. We must do that 49 * after delete_object so that quota updates go into the same transaction as 50 * stat data deletion */ 51 if (!err) 52 DQUOT_FREE_INODE(inode); 53 54 if (journal_end(&th, inode->i_sb, jbegin_count)) 55 goto out; 56 57 /* check return value from reiserfs_delete_object after 58 * ending the transaction 59 */ 60 if (err) 61 goto out; 62 63 /* all items of file are deleted, so we can remove "save" link */ 64 remove_save_link(inode, 0 /* not truncate */ ); /* we can't do anything 65 * about an error here */ 66 } else { 67 /* no object items are in the tree */ 68 ; 69 } 70 out: 71 clear_inode(inode); /* note this must go after the journal_end to prevent deadlock */ 72 inode->i_blocks = 0; 73 reiserfs_write_unlock(inode->i_sb); 74} 75 76static void _make_cpu_key(struct cpu_key *key, int version, __u32 dirid, 77 __u32 objectid, loff_t offset, int type, int length) 78{ 79 key->version = version; 80 81 key->on_disk_key.k_dir_id = dirid; 82 key->on_disk_key.k_objectid = objectid; 83 set_cpu_key_k_offset(key, offset); 84 set_cpu_key_k_type(key, type); 85 key->key_length = length; 86} 87 88/* take base of inode_key (it comes from inode always) (dirid, objectid) and version from an inode, set 89 offset and type of key */ 90void make_cpu_key(struct cpu_key *key, struct inode *inode, loff_t offset, 91 int type, int length) 92{ 93 _make_cpu_key(key, get_inode_item_key_version(inode), 94 le32_to_cpu(INODE_PKEY(inode)->k_dir_id), 95 le32_to_cpu(INODE_PKEY(inode)->k_objectid), offset, type, 96 length); 97} 98 99// 100// when key is 0, do not set version and short key 101// 102inline void make_le_item_head(struct item_head *ih, const struct cpu_key *key, 103 int version, 104 loff_t offset, int type, int length, 105 int entry_count /*or ih_free_space */ ) 106{ 107 if (key) { 108 ih->ih_key.k_dir_id = cpu_to_le32(key->on_disk_key.k_dir_id); 109 ih->ih_key.k_objectid = 110 cpu_to_le32(key->on_disk_key.k_objectid); 111 } 112 put_ih_version(ih, version); 113 set_le_ih_k_offset(ih, offset); 114 set_le_ih_k_type(ih, type); 115 put_ih_item_len(ih, length); 116 /* set_ih_free_space (ih, 0); */ 117 // for directory items it is entry count, for directs and stat 118 // datas - 0xffff, for indirects - 0 119 put_ih_entry_count(ih, entry_count); 120} 121 122// 123 124// Ugh. Not too eager for that.... 125// I cut the code until such time as I see a convincing argument (benchmark). 126// I don't want a bloated inode struct..., and I don't like code complexity.... 127 128/* cutting the code is fine, since it really isn't in use yet and is easy 129** to add back in. But, Vladimir has a really good idea here. Think 130** about what happens for reading a file. For each page, 131** The VFS layer calls reiserfs_readpage, who searches the tree to find 132** an indirect item. This indirect item has X number of pointers, where 133** X is a big number if we've done the block allocation right. But, 134** we only use one or two of these pointers during each call to readpage, 135** needlessly researching again later on. 136** 137** The size of the cache could be dynamic based on the size of the file. 138** 139** I'd also like to see us cache the location the stat data item, since 140** we are needlessly researching for that frequently. 141** 142** --chris 143*/ 144 145/* If this page has a file tail in it, and 146** it was read in by get_block_create_0, the page data is valid, 147** but tail is still sitting in a direct item, and we can't write to 148** it. So, look through this page, and check all the mapped buffers 149** to make sure they have valid block numbers. Any that don't need 150** to be unmapped, so that block_prepare_write will correctly call 151** reiserfs_get_block to convert the tail into an unformatted node 152*/ 153static inline void fix_tail_page_for_writing(struct page *page) 154{ 155 struct buffer_head *head, *next, *bh; 156 157 if (page && page_has_buffers(page)) { 158 head = page_buffers(page); 159 bh = head; 160 do { 161 next = bh->b_this_page; 162 if (buffer_mapped(bh) && bh->b_blocknr == 0) { 163 reiserfs_unmap_buffer(bh); 164 } 165 bh = next; 166 } while (bh != head); 167 } 168} 169 170/* reiserfs_get_block does not need to allocate a block only if it has been 171 done already or non-hole position has been found in the indirect item */ 172static inline int allocation_needed(int retval, b_blocknr_t allocated, 173 struct item_head *ih, 174 __le32 * item, int pos_in_item) 175{ 176 if (allocated) 177 return 0; 178 if (retval == POSITION_FOUND && is_indirect_le_ih(ih) && 179 get_block_num(item, pos_in_item)) 180 return 0; 181 return 1; 182} 183 184static inline int indirect_item_found(int retval, struct item_head *ih) 185{ 186 return (retval == POSITION_FOUND) && is_indirect_le_ih(ih); 187} 188 189static inline void set_block_dev_mapped(struct buffer_head *bh, 190 b_blocknr_t block, struct inode *inode) 191{ 192 map_bh(bh, inode->i_sb, block); 193} 194 195// 196// files which were created in the earlier version can not be longer, 197// than 2 gb 198// 199static int file_capable(struct inode *inode, long block) 200{ 201 if (get_inode_item_key_version(inode) != KEY_FORMAT_3_5 || // it is new file. 202 block < (1 << (31 - inode->i_sb->s_blocksize_bits))) // old file, but 'block' is inside of 2gb 203 return 1; 204 205 return 0; 206} 207 208/*static*/ int restart_transaction(struct reiserfs_transaction_handle *th, 209 struct inode *inode, struct treepath *path) 210{ 211 struct super_block *s = th->t_super; 212 int len = th->t_blocks_allocated; 213 int err; 214 215 BUG_ON(!th->t_trans_id); 216 BUG_ON(!th->t_refcount); 217 218 pathrelse(path); 219 220 /* we cannot restart while nested */ 221 if (th->t_refcount > 1) { 222 return 0; 223 } 224 reiserfs_update_sd(th, inode); 225 err = journal_end(th, s, len); 226 if (!err) { 227 err = journal_begin(th, s, JOURNAL_PER_BALANCE_CNT * 6); 228 if (!err) 229 reiserfs_update_inode_transaction(inode); 230 } 231 return err; 232} 233 234// it is called by get_block when create == 0. Returns block number 235// for 'block'-th logical block of file. When it hits direct item it 236// returns 0 (being called from bmap) or read direct item into piece 237// of page (bh_result) 238 239// Please improve the english/clarity in the comment above, as it is 240// hard to understand. 241 242static int _get_block_create_0(struct inode *inode, long block, 243 struct buffer_head *bh_result, int args) 244{ 245 INITIALIZE_PATH(path); 246 struct cpu_key key; 247 struct buffer_head *bh; 248 struct item_head *ih, tmp_ih; 249 int fs_gen; 250 int blocknr; 251 char *p = NULL; 252 int chars; 253 int ret; 254 int result; 255 int done = 0; 256 unsigned long offset; 257 258 // prepare the key to look for the 'block'-th block of file 259 make_cpu_key(&key, inode, 260 (loff_t) block * inode->i_sb->s_blocksize + 1, TYPE_ANY, 261 3); 262 263 research: 264 result = search_for_position_by_key(inode->i_sb, &key, &path); 265 if (result != POSITION_FOUND) { 266 pathrelse(&path); 267 if (p) 268 kunmap(bh_result->b_page); 269 if (result == IO_ERROR) 270 return -EIO; 271 // We do not return -ENOENT if there is a hole but page is uptodate, because it means 272 // That there is some MMAPED data associated with it that is yet to be written to disk. 273 if ((args & GET_BLOCK_NO_HOLE) 274 && !PageUptodate(bh_result->b_page)) { 275 return -ENOENT; 276 } 277 return 0; 278 } 279 // 280 bh = get_last_bh(&path); 281 ih = get_ih(&path); 282 if (is_indirect_le_ih(ih)) { 283 __le32 *ind_item = (__le32 *) B_I_PITEM(bh, ih); 284 285 blocknr = get_block_num(ind_item, path.pos_in_item); 286 ret = 0; 287 if (blocknr) { 288 map_bh(bh_result, inode->i_sb, blocknr); 289 if (path.pos_in_item == 290 ((ih_item_len(ih) / UNFM_P_SIZE) - 1)) { 291 set_buffer_boundary(bh_result); 292 } 293 } else 294 // We do not return -ENOENT if there is a hole but page is uptodate, because it means 295 // That there is some MMAPED data associated with it that is yet to be written to disk. 296 if ((args & GET_BLOCK_NO_HOLE) 297 && !PageUptodate(bh_result->b_page)) { 298 ret = -ENOENT; 299 } 300 301 pathrelse(&path); 302 if (p) 303 kunmap(bh_result->b_page); 304 return ret; 305 } 306 // requested data are in direct item(s) 307 if (!(args & GET_BLOCK_READ_DIRECT)) { 308 // when it is stored in direct item(s) 309 pathrelse(&path); 310 if (p) 311 kunmap(bh_result->b_page); 312 return -ENOENT; 313 } 314 315 /* if we've got a direct item, and the buffer or page was uptodate, 316 ** we don't want to pull data off disk again. skip to the 317 ** end, where we map the buffer and return 318 */ 319 if (buffer_uptodate(bh_result)) { 320 goto finished; 321 } else 322 /* 323 ** grab_tail_page can trigger calls to reiserfs_get_block on up to date 324 ** pages without any buffers. If the page is up to date, we don't want 325 ** read old data off disk. Set the up to date bit on the buffer instead 326 ** and jump to the end 327 */ 328 if (!bh_result->b_page || PageUptodate(bh_result->b_page)) { 329 set_buffer_uptodate(bh_result); 330 goto finished; 331 } 332 // read file tail into part of page 333 offset = (cpu_key_k_offset(&key) - 1) & (PAGE_CACHE_SIZE - 1); 334 fs_gen = get_generation(inode->i_sb); 335 copy_item_head(&tmp_ih, ih); 336 337 /* we only want to kmap if we are reading the tail into the page. 338 ** this is not the common case, so we don't kmap until we are 339 ** sure we need to. But, this means the item might move if 340 ** kmap schedules 341 */ 342 if (!p) { 343 p = (char *)kmap(bh_result->b_page); 344 if (fs_changed(fs_gen, inode->i_sb) 345 && item_moved(&tmp_ih, &path)) { 346 goto research; 347 } 348 } 349 p += offset; 350 memset(p, 0, inode->i_sb->s_blocksize); 351 do { 352 if (!is_direct_le_ih(ih)) { 353 BUG(); 354 } 355 /* make sure we don't read more bytes than actually exist in 356 ** the file. This can happen in odd cases where i_size isn't 357 ** correct, and when direct item padding results in a few 358 ** extra bytes at the end of the direct item 359 */ 360 if ((le_ih_k_offset(ih) + path.pos_in_item) > inode->i_size) 361 break; 362 if ((le_ih_k_offset(ih) - 1 + ih_item_len(ih)) > inode->i_size) { 363 chars = 364 inode->i_size - (le_ih_k_offset(ih) - 1) - 365 path.pos_in_item; 366 done = 1; 367 } else { 368 chars = ih_item_len(ih) - path.pos_in_item; 369 } 370 memcpy(p, B_I_PITEM(bh, ih) + path.pos_in_item, chars); 371 372 if (done) 373 break; 374 375 p += chars; 376 377 if (PATH_LAST_POSITION(&path) != (B_NR_ITEMS(bh) - 1)) 378 // we done, if read direct item is not the last item of 379 // to see whether direct item continues in the right 380 // neighbor or rely on i_size 381 break; 382 383 // update key to look for the next piece 384 set_cpu_key_k_offset(&key, cpu_key_k_offset(&key) + chars); 385 result = search_for_position_by_key(inode->i_sb, &key, &path); 386 if (result != POSITION_FOUND) 387 // i/o error most likely 388 break; 389 bh = get_last_bh(&path); 390 ih = get_ih(&path); 391 } while (1); 392 393 flush_dcache_page(bh_result->b_page); 394 kunmap(bh_result->b_page); 395 396 finished: 397 pathrelse(&path); 398 399 if (result == IO_ERROR) 400 return -EIO; 401 402 /* this buffer has valid data, but isn't valid for io. mapping it to 403 * block #0 tells the rest of reiserfs it just has a tail in it 404 */ 405 map_bh(bh_result, inode->i_sb, 0); 406 set_buffer_uptodate(bh_result); 407 return 0; 408} 409 410// this is called to create file map. So, _get_block_create_0 will not 411// read direct item 412static int reiserfs_bmap(struct inode *inode, sector_t block, 413 struct buffer_head *bh_result, int create) 414{ 415 if (!file_capable(inode, block)) 416 return -EFBIG; 417 418 reiserfs_write_lock(inode->i_sb); 419 /* do not read the direct item */ 420 _get_block_create_0(inode, block, bh_result, 0); 421 reiserfs_write_unlock(inode->i_sb); 422 return 0; 423} 424 425/* special version of get_block that is only used by grab_tail_page right 426** now. It is sent to block_prepare_write, and when you try to get a 427** block past the end of the file (or a block from a hole) it returns 428** -ENOENT instead of a valid buffer. block_prepare_write expects to 429** be able to do i/o on the buffers returned, unless an error value 430** is also returned. 431** 432** So, this allows block_prepare_write to be used for reading a single block 433** in a page. Where it does not produce a valid page for holes, or past the 434** end of the file. This turns out to be exactly what we need for reading 435** tails for conversion. 436** 437** The point of the wrapper is forcing a certain value for create, even 438** though the VFS layer is calling this function with create==1. If you 439** don't want to send create == GET_BLOCK_NO_HOLE to reiserfs_get_block, 440** don't use this function. 441*/ 442static int reiserfs_get_block_create_0(struct inode *inode, sector_t block, 443 struct buffer_head *bh_result, 444 int create) 445{ 446 return reiserfs_get_block(inode, block, bh_result, GET_BLOCK_NO_HOLE); 447} 448 449/* This is special helper for reiserfs_get_block in case we are executing 450 direct_IO request. */ 451static int reiserfs_get_blocks_direct_io(struct inode *inode, 452 sector_t iblock, 453 struct buffer_head *bh_result, 454 int create) 455{ 456 int ret; 457 458 bh_result->b_page = NULL; 459 460 /* We set the b_size before reiserfs_get_block call since it is 461 referenced in convert_tail_for_hole() that may be called from 462 reiserfs_get_block() */ 463 bh_result->b_size = (1 << inode->i_blkbits); 464 465 ret = reiserfs_get_block(inode, iblock, bh_result, 466 create | GET_BLOCK_NO_DANGLE); 467 if (ret) 468 goto out; 469 470 /* don't allow direct io onto tail pages */ 471 if (buffer_mapped(bh_result) && bh_result->b_blocknr == 0) { 472 /* make sure future calls to the direct io funcs for this offset 473 ** in the file fail by unmapping the buffer 474 */ 475 clear_buffer_mapped(bh_result); 476 ret = -EINVAL; 477 } 478 /* Possible unpacked tail. Flush the data before pages have 479 disappeared */ 480 if (REISERFS_I(inode)->i_flags & i_pack_on_close_mask) { 481 int err; 482 lock_kernel(); 483 err = reiserfs_commit_for_inode(inode); 484 REISERFS_I(inode)->i_flags &= ~i_pack_on_close_mask; 485 unlock_kernel(); 486 if (err < 0) 487 ret = err; 488 } 489 out: 490 return ret; 491} 492 493/* 494** helper function for when reiserfs_get_block is called for a hole 495** but the file tail is still in a direct item 496** bh_result is the buffer head for the hole 497** tail_offset is the offset of the start of the tail in the file 498** 499** This calls prepare_write, which will start a new transaction 500** you should not be in a transaction, or have any paths held when you 501** call this. 502*/ 503static int convert_tail_for_hole(struct inode *inode, 504 struct buffer_head *bh_result, 505 loff_t tail_offset) 506{ 507 unsigned long index; 508 unsigned long tail_end; 509 unsigned long tail_start; 510 struct page *tail_page; 511 struct page *hole_page = bh_result->b_page; 512 int retval = 0; 513 514 if ((tail_offset & (bh_result->b_size - 1)) != 1) 515 return -EIO; 516 517 /* always try to read until the end of the block */ 518 tail_start = tail_offset & (PAGE_CACHE_SIZE - 1); 519 tail_end = (tail_start | (bh_result->b_size - 1)) + 1; 520 521 index = tail_offset >> PAGE_CACHE_SHIFT; 522 /* hole_page can be zero in case of direct_io, we are sure 523 that we cannot get here if we write with O_DIRECT into 524 tail page */ 525 if (!hole_page || index != hole_page->index) { 526 tail_page = grab_cache_page(inode->i_mapping, index); 527 retval = -ENOMEM; 528 if (!tail_page) { 529 goto out; 530 } 531 } else { 532 tail_page = hole_page; 533 } 534 535 /* we don't have to make sure the conversion did not happen while 536 ** we were locking the page because anyone that could convert 537 ** must first take i_mutex. 538 ** 539 ** We must fix the tail page for writing because it might have buffers 540 ** that are mapped, but have a block number of 0. This indicates tail 541 ** data that has been read directly into the page, and block_prepare_write 542 ** won't trigger a get_block in this case. 543 */ 544 fix_tail_page_for_writing(tail_page); 545 retval = reiserfs_prepare_write(NULL, tail_page, tail_start, tail_end); 546 if (retval) 547 goto unlock; 548 549 /* tail conversion might change the data in the page */ 550 flush_dcache_page(tail_page); 551 552 retval = reiserfs_commit_write(NULL, tail_page, tail_start, tail_end); 553 554 unlock: 555 if (tail_page != hole_page) { 556 unlock_page(tail_page); 557 page_cache_release(tail_page); 558 } 559 out: 560 return retval; 561} 562 563static inline int _allocate_block(struct reiserfs_transaction_handle *th, 564 long block, 565 struct inode *inode, 566 b_blocknr_t * allocated_block_nr, 567 struct treepath *path, int flags) 568{ 569 BUG_ON(!th->t_trans_id); 570 571#ifdef REISERFS_PREALLOCATE 572 if (!(flags & GET_BLOCK_NO_IMUX)) { 573 return reiserfs_new_unf_blocknrs2(th, inode, allocated_block_nr, 574 path, block); 575 } 576#endif 577 return reiserfs_new_unf_blocknrs(th, inode, allocated_block_nr, path, 578 block); 579} 580 581int reiserfs_get_block(struct inode *inode, sector_t block, 582 struct buffer_head *bh_result, int create) 583{ 584 int repeat, retval = 0; 585 b_blocknr_t allocated_block_nr = 0; // b_blocknr_t is (unsigned) 32 bit int 586 INITIALIZE_PATH(path); 587 int pos_in_item; 588 struct cpu_key key; 589 struct buffer_head *bh, *unbh = NULL; 590 struct item_head *ih, tmp_ih; 591 __le32 *item; 592 int done; 593 int fs_gen; 594 struct reiserfs_transaction_handle *th = NULL; 595 int jbegin_count = 596 JOURNAL_PER_BALANCE_CNT * 3 + 1 + 597 2 * REISERFS_QUOTA_TRANS_BLOCKS(inode->i_sb); 598 int version; 599 int dangle = 1; 600 loff_t new_offset = 601 (((loff_t) block) << inode->i_sb->s_blocksize_bits) + 1; 602 603 /* bad.... */ 604 reiserfs_write_lock(inode->i_sb); 605 version = get_inode_item_key_version(inode); 606 607 if (!file_capable(inode, block)) { 608 reiserfs_write_unlock(inode->i_sb); 609 return -EFBIG; 610 } 611 612 /* if !create, we aren't changing the FS, so we don't need to 613 ** log anything, so we don't need to start a transaction 614 */ 615 if (!(create & GET_BLOCK_CREATE)) { 616 int ret; 617 /* find number of block-th logical block of the file */ 618 ret = _get_block_create_0(inode, block, bh_result, 619 create | GET_BLOCK_READ_DIRECT); 620 reiserfs_write_unlock(inode->i_sb); 621 return ret; 622 } 623 /* 624 * if we're already in a transaction, make sure to close 625 * any new transactions we start in this func 626 */ 627 if ((create & GET_BLOCK_NO_DANGLE) || 628 reiserfs_transaction_running(inode->i_sb)) 629 dangle = 0; 630 631 /* If file is of such a size, that it might have a tail and tails are enabled 632 ** we should mark it as possibly needing tail packing on close 633 */ 634 if ((have_large_tails(inode->i_sb) 635 && inode->i_size < i_block_size(inode) * 4) 636 || (have_small_tails(inode->i_sb) 637 && inode->i_size < i_block_size(inode))) 638 REISERFS_I(inode)->i_flags |= i_pack_on_close_mask; 639 640 /* set the key of the first byte in the 'block'-th block of file */ 641 make_cpu_key(&key, inode, new_offset, TYPE_ANY, 3 /*key length */ ); 642 if ((new_offset + inode->i_sb->s_blocksize - 1) > inode->i_size) { 643 start_trans: 644 th = reiserfs_persistent_transaction(inode->i_sb, jbegin_count); 645 if (!th) { 646 retval = -ENOMEM; 647 goto failure; 648 } 649 reiserfs_update_inode_transaction(inode); 650 } 651 research: 652 653 retval = search_for_position_by_key(inode->i_sb, &key, &path); 654 if (retval == IO_ERROR) { 655 retval = -EIO; 656 goto failure; 657 } 658 659 bh = get_last_bh(&path); 660 ih = get_ih(&path); 661 item = get_item(&path); 662 pos_in_item = path.pos_in_item; 663 664 fs_gen = get_generation(inode->i_sb); 665 copy_item_head(&tmp_ih, ih); 666 667 if (allocation_needed 668 (retval, allocated_block_nr, ih, item, pos_in_item)) { 669 /* we have to allocate block for the unformatted node */ 670 if (!th) { 671 pathrelse(&path); 672 goto start_trans; 673 } 674 675 repeat = 676 _allocate_block(th, block, inode, &allocated_block_nr, 677 &path, create); 678 679 if (repeat == NO_DISK_SPACE || repeat == QUOTA_EXCEEDED) { 680 /* restart the transaction to give the journal a chance to free 681 ** some blocks. releases the path, so we have to go back to 682 ** research if we succeed on the second try 683 */ 684 SB_JOURNAL(inode->i_sb)->j_next_async_flush = 1; 685 retval = restart_transaction(th, inode, &path); 686 if (retval) 687 goto failure; 688 repeat = 689 _allocate_block(th, block, inode, 690 &allocated_block_nr, NULL, create); 691 692 if (repeat != NO_DISK_SPACE && repeat != QUOTA_EXCEEDED) { 693 goto research; 694 } 695 if (repeat == QUOTA_EXCEEDED) 696 retval = -EDQUOT; 697 else 698 retval = -ENOSPC; 699 goto failure; 700 } 701 702 if (fs_changed(fs_gen, inode->i_sb) 703 && item_moved(&tmp_ih, &path)) { 704 goto research; 705 } 706 } 707 708 if (indirect_item_found(retval, ih)) { 709 b_blocknr_t unfm_ptr; 710 /* 'block'-th block is in the file already (there is 711 corresponding cell in some indirect item). But it may be 712 zero unformatted node pointer (hole) */ 713 unfm_ptr = get_block_num(item, pos_in_item); 714 if (unfm_ptr == 0) { 715 /* use allocated block to plug the hole */ 716 reiserfs_prepare_for_journal(inode->i_sb, bh, 1); 717 if (fs_changed(fs_gen, inode->i_sb) 718 && item_moved(&tmp_ih, &path)) { 719 reiserfs_restore_prepared_buffer(inode->i_sb, 720 bh); 721 goto research; 722 } 723 set_buffer_new(bh_result); 724 if (buffer_dirty(bh_result) 725 && reiserfs_data_ordered(inode->i_sb)) 726 reiserfs_add_ordered_list(inode, bh_result); 727 put_block_num(item, pos_in_item, allocated_block_nr); 728 unfm_ptr = allocated_block_nr; 729 journal_mark_dirty(th, inode->i_sb, bh); 730 reiserfs_update_sd(th, inode); 731 } 732 set_block_dev_mapped(bh_result, unfm_ptr, inode); 733 pathrelse(&path); 734 retval = 0; 735 if (!dangle && th) 736 retval = reiserfs_end_persistent_transaction(th); 737 738 reiserfs_write_unlock(inode->i_sb); 739 740 /* the item was found, so new blocks were not added to the file 741 ** there is no need to make sure the inode is updated with this 742 ** transaction 743 */ 744 return retval; 745 } 746 747 if (!th) { 748 pathrelse(&path); 749 goto start_trans; 750 } 751 752 /* desired position is not found or is in the direct item. We have 753 to append file with holes up to 'block'-th block converting 754 direct items to indirect one if necessary */ 755 done = 0; 756 do { 757 if (is_statdata_le_ih(ih)) { 758 __le32 unp = 0; 759 struct cpu_key tmp_key; 760 761 /* indirect item has to be inserted */ 762 make_le_item_head(&tmp_ih, &key, version, 1, 763 TYPE_INDIRECT, UNFM_P_SIZE, 764 0 /* free_space */ ); 765 766 if (cpu_key_k_offset(&key) == 1) { 767 /* we are going to add 'block'-th block to the file. Use 768 allocated block for that */ 769 unp = cpu_to_le32(allocated_block_nr); 770 set_block_dev_mapped(bh_result, 771 allocated_block_nr, inode); 772 set_buffer_new(bh_result); 773 done = 1; 774 } 775 tmp_key = key; // ;) 776 set_cpu_key_k_offset(&tmp_key, 1); 777 PATH_LAST_POSITION(&path)++; 778 779 retval = 780 reiserfs_insert_item(th, &path, &tmp_key, &tmp_ih, 781 inode, (char *)&unp); 782 if (retval) { 783 reiserfs_free_block(th, inode, 784 allocated_block_nr, 1); 785 goto failure; // retval == -ENOSPC, -EDQUOT or -EIO or -EEXIST 786 } 787 //mark_tail_converted (inode); 788 } else if (is_direct_le_ih(ih)) { 789 /* direct item has to be converted */ 790 loff_t tail_offset; 791 792 tail_offset = 793 ((le_ih_k_offset(ih) - 794 1) & ~(inode->i_sb->s_blocksize - 1)) + 1; 795 if (tail_offset == cpu_key_k_offset(&key)) { 796 /* direct item we just found fits into block we have 797 to map. Convert it into unformatted node: use 798 bh_result for the conversion */ 799 set_block_dev_mapped(bh_result, 800 allocated_block_nr, inode); 801 unbh = bh_result; 802 done = 1; 803 } else { 804 805 pathrelse(&path); 806 /* 807 * ugly, but we can only end the transaction if 808 * we aren't nested 809 */ 810 BUG_ON(!th->t_refcount); 811 if (th->t_refcount == 1) { 812 retval = 813 reiserfs_end_persistent_transaction 814 (th); 815 th = NULL; 816 if (retval) 817 goto failure; 818 } 819 820 retval = 821 convert_tail_for_hole(inode, bh_result, 822 tail_offset); 823 if (retval) { 824 if (retval != -ENOSPC) 825 reiserfs_warning(inode->i_sb, 826 "clm-6004: convert tail failed inode %lu, error %d", 827 inode->i_ino, 828 retval); 829 if (allocated_block_nr) { 830 /* the bitmap, the super, and the stat data == 3 */ 831 if (!th) 832 th = reiserfs_persistent_transaction(inode->i_sb, 3); 833 if (th) 834 reiserfs_free_block(th, 835 inode, 836 allocated_block_nr, 837 1); 838 } 839 goto failure; 840 } 841 goto research; 842 } 843 retval = 844 direct2indirect(th, inode, &path, unbh, 845 tail_offset); 846 if (retval) { 847 reiserfs_unmap_buffer(unbh); 848 reiserfs_free_block(th, inode, 849 allocated_block_nr, 1); 850 goto failure; 851 } 852 /* it is important the set_buffer_uptodate is done after 853 ** the direct2indirect. The buffer might contain valid 854 ** data newer than the data on disk (read by readpage, changed, 855 ** and then sent here by writepage). direct2indirect needs 856 ** to know if unbh was already up to date, so it can decide 857 ** if the data in unbh needs to be replaced with data from 858 ** the disk 859 */ 860 set_buffer_uptodate(unbh); 861 862 /* unbh->b_page == NULL in case of DIRECT_IO request, this means 863 buffer will disappear shortly, so it should not be added to 864 */ 865 if (unbh->b_page) { 866 /* we've converted the tail, so we must 867 ** flush unbh before the transaction commits 868 */ 869 reiserfs_add_tail_list(inode, unbh); 870 871 /* mark it dirty now to prevent commit_write from adding 872 ** this buffer to the inode's dirty buffer list 873 */ 874 /* 875 * AKPM: changed __mark_buffer_dirty to mark_buffer_dirty(). 876 * It's still atomic, but it sets the page dirty too, 877 * which makes it eligible for writeback at any time by the 878 * VM (which was also the case with __mark_buffer_dirty()) 879 */ 880 mark_buffer_dirty(unbh); 881 } 882 } else { 883 /* append indirect item with holes if needed, when appending 884 pointer to 'block'-th block use block, which is already 885 allocated */ 886 struct cpu_key tmp_key; 887 unp_t unf_single = 0; // We use this in case we need to allocate only 888 // one block which is a fastpath 889 unp_t *un; 890 __u64 max_to_insert = 891 MAX_ITEM_LEN(inode->i_sb->s_blocksize) / 892 UNFM_P_SIZE; 893 __u64 blocks_needed; 894 895 RFALSE(pos_in_item != ih_item_len(ih) / UNFM_P_SIZE, 896 "vs-804: invalid position for append"); 897 /* indirect item has to be appended, set up key of that position */ 898 make_cpu_key(&tmp_key, inode, 899 le_key_k_offset(version, 900 &(ih->ih_key)) + 901 op_bytes_number(ih, 902 inode->i_sb->s_blocksize), 903 //pos_in_item * inode->i_sb->s_blocksize, 904 TYPE_INDIRECT, 3); // key type is unimportant 905 906 RFALSE(cpu_key_k_offset(&tmp_key) > cpu_key_k_offset(&key), 907 "green-805: invalid offset"); 908 blocks_needed = 909 1 + 910 ((cpu_key_k_offset(&key) - 911 cpu_key_k_offset(&tmp_key)) >> inode->i_sb-> 912 s_blocksize_bits); 913 914 if (blocks_needed == 1) { 915 un = &unf_single; 916 } else { 917 un = kzalloc(min(blocks_needed, max_to_insert) * UNFM_P_SIZE, GFP_ATOMIC); // We need to avoid scheduling. 918 if (!un) { 919 un = &unf_single; 920 blocks_needed = 1; 921 max_to_insert = 0; 922 } 923 } 924 if (blocks_needed <= max_to_insert) { 925 /* we are going to add target block to the file. Use allocated 926 block for that */ 927 un[blocks_needed - 1] = 928 cpu_to_le32(allocated_block_nr); 929 set_block_dev_mapped(bh_result, 930 allocated_block_nr, inode); 931 set_buffer_new(bh_result); 932 done = 1; 933 } else { 934 /* paste hole to the indirect item */ 935 /* If kmalloc failed, max_to_insert becomes zero and it means we 936 only have space for one block */ 937 blocks_needed = 938 max_to_insert ? max_to_insert : 1; 939 } 940 retval = 941 reiserfs_paste_into_item(th, &path, &tmp_key, inode, 942 (char *)un, 943 UNFM_P_SIZE * 944 blocks_needed); 945 946 if (blocks_needed != 1) 947 kfree(un); 948 949 if (retval) { 950 reiserfs_free_block(th, inode, 951 allocated_block_nr, 1); 952 goto failure; 953 } 954 if (!done) { 955 /* We need to mark new file size in case this function will be 956 interrupted/aborted later on. And we may do this only for 957 holes. */ 958 inode->i_size += 959 inode->i_sb->s_blocksize * blocks_needed; 960 } 961 } 962 963 if (done == 1) 964 break; 965 966 /* this loop could log more blocks than we had originally asked 967 ** for. So, we have to allow the transaction to end if it is 968 ** too big or too full. Update the inode so things are 969 ** consistent if we crash before the function returns 970 ** 971 ** release the path so that anybody waiting on the path before 972 ** ending their transaction will be able to continue. 973 */ 974 if (journal_transaction_should_end(th, th->t_blocks_allocated)) { 975 retval = restart_transaction(th, inode, &path); 976 if (retval) 977 goto failure; 978 } 979 /* inserting indirect pointers for a hole can take a 980 ** long time. reschedule if needed 981 */ 982 cond_resched(); 983 984 retval = search_for_position_by_key(inode->i_sb, &key, &path); 985 if (retval == IO_ERROR) { 986 retval = -EIO; 987 goto failure; 988 } 989 if (retval == POSITION_FOUND) { 990 reiserfs_warning(inode->i_sb, 991 "vs-825: reiserfs_get_block: " 992 "%K should not be found", &key); 993 retval = -EEXIST; 994 if (allocated_block_nr) 995 reiserfs_free_block(th, inode, 996 allocated_block_nr, 1); 997 pathrelse(&path); 998 goto failure; 999 } 1000 bh = get_last_bh(&path); 1001 ih = get_ih(&path); 1002 item = get_item(&path); 1003 pos_in_item = path.pos_in_item; 1004 } while (1); 1005 1006 retval = 0; 1007 1008 failure: 1009 if (th && (!dangle || (retval && !th->t_trans_id))) { 1010 int err; 1011 if (th->t_trans_id) 1012 reiserfs_update_sd(th, inode); 1013 err = reiserfs_end_persistent_transaction(th); 1014 if (err) 1015 retval = err; 1016 } 1017 1018 reiserfs_write_unlock(inode->i_sb); 1019 reiserfs_check_path(&path); 1020 return retval; 1021} 1022 1023static int 1024reiserfs_readpages(struct file *file, struct address_space *mapping, 1025 struct list_head *pages, unsigned nr_pages) 1026{ 1027 return mpage_readpages(mapping, pages, nr_pages, reiserfs_get_block); 1028} 1029 1030/* Compute real number of used bytes by file 1031 * Following three functions can go away when we'll have enough space in stat item 1032 */ 1033static int real_space_diff(struct inode *inode, int sd_size) 1034{ 1035 int bytes; 1036 loff_t blocksize = inode->i_sb->s_blocksize; 1037 1038 if (S_ISLNK(inode->i_mode) || S_ISDIR(inode->i_mode)) 1039 return sd_size; 1040 1041 /* End of file is also in full block with indirect reference, so round 1042 ** up to the next block. 1043 ** 1044 ** there is just no way to know if the tail is actually packed 1045 ** on the file, so we have to assume it isn't. When we pack the 1046 ** tail, we add 4 bytes to pretend there really is an unformatted 1047 ** node pointer 1048 */ 1049 bytes = 1050 ((inode->i_size + 1051 (blocksize - 1)) >> inode->i_sb->s_blocksize_bits) * UNFM_P_SIZE + 1052 sd_size; 1053 return bytes; 1054} 1055 1056static inline loff_t to_real_used_space(struct inode *inode, ulong blocks, 1057 int sd_size) 1058{ 1059 if (S_ISLNK(inode->i_mode) || S_ISDIR(inode->i_mode)) { 1060 return inode->i_size + 1061 (loff_t) (real_space_diff(inode, sd_size)); 1062 } 1063 return ((loff_t) real_space_diff(inode, sd_size)) + 1064 (((loff_t) blocks) << 9); 1065} 1066 1067/* Compute number of blocks used by file in ReiserFS counting */ 1068static inline ulong to_fake_used_blocks(struct inode *inode, int sd_size) 1069{ 1070 loff_t bytes = inode_get_bytes(inode); 1071 loff_t real_space = real_space_diff(inode, sd_size); 1072 1073 /* keeps fsck and non-quota versions of reiserfs happy */ 1074 if (S_ISLNK(inode->i_mode) || S_ISDIR(inode->i_mode)) { 1075 bytes += (loff_t) 511; 1076 } 1077 1078 /* files from before the quota patch might i_blocks such that 1079 ** bytes < real_space. Deal with that here to prevent it from 1080 ** going negative. 1081 */ 1082 if (bytes < real_space) 1083 return 0; 1084 return (bytes - real_space) >> 9; 1085} 1086 1087// 1088// BAD: new directories have stat data of new type and all other items 1089// of old type. Version stored in the inode says about body items, so 1090// in update_stat_data we can not rely on inode, but have to check 1091// item version directly 1092// 1093 1094// called by read_locked_inode 1095static void init_inode(struct inode *inode, struct treepath *path) 1096{ 1097 struct buffer_head *bh; 1098 struct item_head *ih; 1099 __u32 rdev; 1100 //int version = ITEM_VERSION_1; 1101 1102 bh = PATH_PLAST_BUFFER(path); 1103 ih = PATH_PITEM_HEAD(path); 1104 1105 copy_key(INODE_PKEY(inode), &(ih->ih_key)); 1106 1107 INIT_LIST_HEAD(&(REISERFS_I(inode)->i_prealloc_list)); 1108 REISERFS_I(inode)->i_flags = 0; 1109 REISERFS_I(inode)->i_prealloc_block = 0; 1110 REISERFS_I(inode)->i_prealloc_count = 0; 1111 REISERFS_I(inode)->i_trans_id = 0; 1112 REISERFS_I(inode)->i_jl = NULL; 1113 mutex_init(&(REISERFS_I(inode)->i_mmap)); 1114 reiserfs_init_acl_access(inode); 1115 reiserfs_init_acl_default(inode); 1116 reiserfs_init_xattr_rwsem(inode); 1117 1118 if (stat_data_v1(ih)) { 1119 struct stat_data_v1 *sd = 1120 (struct stat_data_v1 *)B_I_PITEM(bh, ih); 1121 unsigned long blocks; 1122 1123 set_inode_item_key_version(inode, KEY_FORMAT_3_5); 1124 set_inode_sd_version(inode, STAT_DATA_V1); 1125 inode->i_mode = sd_v1_mode(sd); 1126 inode->i_nlink = sd_v1_nlink(sd); 1127 inode->i_uid = sd_v1_uid(sd); 1128 inode->i_gid = sd_v1_gid(sd); 1129 inode->i_size = sd_v1_size(sd); 1130 inode->i_atime.tv_sec = sd_v1_atime(sd); 1131 inode->i_mtime.tv_sec = sd_v1_mtime(sd); 1132 inode->i_ctime.tv_sec = sd_v1_ctime(sd); 1133 inode->i_atime.tv_nsec = 0; 1134 inode->i_ctime.tv_nsec = 0; 1135 inode->i_mtime.tv_nsec = 0; 1136 1137 inode->i_blocks = sd_v1_blocks(sd); 1138 inode->i_generation = le32_to_cpu(INODE_PKEY(inode)->k_dir_id); 1139 blocks = (inode->i_size + 511) >> 9; 1140 blocks = _ROUND_UP(blocks, inode->i_sb->s_blocksize >> 9); 1141 if (inode->i_blocks > blocks) { 1142 // there was a bug in <=3.5.23 when i_blocks could take negative 1143 // values. Starting from 3.5.17 this value could even be stored in 1144 // stat data. For such files we set i_blocks based on file 1145 // size. Just 2 notes: this can be wrong for sparce files. On-disk value will be 1146 // only updated if file's inode will ever change 1147 inode->i_blocks = blocks; 1148 } 1149 1150 rdev = sd_v1_rdev(sd); 1151 REISERFS_I(inode)->i_first_direct_byte = 1152 sd_v1_first_direct_byte(sd); 1153 /* an early bug in the quota code can give us an odd number for the 1154 ** block count. This is incorrect, fix it here. 1155 */ 1156 if (inode->i_blocks & 1) { 1157 inode->i_blocks++; 1158 } 1159 inode_set_bytes(inode, 1160 to_real_used_space(inode, inode->i_blocks, 1161 SD_V1_SIZE)); 1162 /* nopack is initially zero for v1 objects. For v2 objects, 1163 nopack is initialised from sd_attrs */ 1164 REISERFS_I(inode)->i_flags &= ~i_nopack_mask; 1165 } else { 1166 // new stat data found, but object may have old items 1167 // (directories and symlinks) 1168 struct stat_data *sd = (struct stat_data *)B_I_PITEM(bh, ih); 1169 1170 inode->i_mode = sd_v2_mode(sd); 1171 inode->i_nlink = sd_v2_nlink(sd); 1172 inode->i_uid = sd_v2_uid(sd); 1173 inode->i_size = sd_v2_size(sd); 1174 inode->i_gid = sd_v2_gid(sd); 1175 inode->i_mtime.tv_sec = sd_v2_mtime(sd); 1176 inode->i_atime.tv_sec = sd_v2_atime(sd); 1177 inode->i_ctime.tv_sec = sd_v2_ctime(sd); 1178 inode->i_ctime.tv_nsec = 0; 1179 inode->i_mtime.tv_nsec = 0; 1180 inode->i_atime.tv_nsec = 0; 1181 inode->i_blocks = sd_v2_blocks(sd); 1182 rdev = sd_v2_rdev(sd); 1183 if (S_ISCHR(inode->i_mode) || S_ISBLK(inode->i_mode)) 1184 inode->i_generation = 1185 le32_to_cpu(INODE_PKEY(inode)->k_dir_id); 1186 else 1187 inode->i_generation = sd_v2_generation(sd); 1188 1189 if (S_ISDIR(inode->i_mode) || S_ISLNK(inode->i_mode)) 1190 set_inode_item_key_version(inode, KEY_FORMAT_3_5); 1191 else 1192 set_inode_item_key_version(inode, KEY_FORMAT_3_6); 1193 REISERFS_I(inode)->i_first_direct_byte = 0; 1194 set_inode_sd_version(inode, STAT_DATA_V2); 1195 inode_set_bytes(inode, 1196 to_real_used_space(inode, inode->i_blocks, 1197 SD_V2_SIZE)); 1198 /* read persistent inode attributes from sd and initalise 1199 generic inode flags from them */ 1200 REISERFS_I(inode)->i_attrs = sd_v2_attrs(sd); 1201 sd_attrs_to_i_attrs(sd_v2_attrs(sd), inode); 1202 } 1203 1204 pathrelse(path); 1205 if (S_ISREG(inode->i_mode)) { 1206 inode->i_op = &reiserfs_file_inode_operations; 1207 inode->i_fop = &reiserfs_file_operations; 1208 inode->i_mapping->a_ops = &reiserfs_address_space_operations; 1209 } else if (S_ISDIR(inode->i_mode)) { 1210 inode->i_op = &reiserfs_dir_inode_operations; 1211 inode->i_fop = &reiserfs_dir_operations; 1212 } else if (S_ISLNK(inode->i_mode)) { 1213 inode->i_op = &reiserfs_symlink_inode_operations; 1214 inode->i_mapping->a_ops = &reiserfs_address_space_operations; 1215 } else { 1216 inode->i_blocks = 0; 1217 inode->i_op = &reiserfs_special_inode_operations; 1218 init_special_inode(inode, inode->i_mode, new_decode_dev(rdev)); 1219 } 1220} 1221 1222// update new stat data with inode fields 1223static void inode2sd(void *sd, struct inode *inode, loff_t size) 1224{ 1225 struct stat_data *sd_v2 = (struct stat_data *)sd; 1226 __u16 flags; 1227 1228 set_sd_v2_mode(sd_v2, inode->i_mode); 1229 set_sd_v2_nlink(sd_v2, inode->i_nlink); 1230 set_sd_v2_uid(sd_v2, inode->i_uid); 1231 set_sd_v2_size(sd_v2, size); 1232 set_sd_v2_gid(sd_v2, inode->i_gid); 1233 set_sd_v2_mtime(sd_v2, inode->i_mtime.tv_sec); 1234 set_sd_v2_atime(sd_v2, inode->i_atime.tv_sec); 1235 set_sd_v2_ctime(sd_v2, inode->i_ctime.tv_sec); 1236 set_sd_v2_blocks(sd_v2, to_fake_used_blocks(inode, SD_V2_SIZE)); 1237 if (S_ISCHR(inode->i_mode) || S_ISBLK(inode->i_mode)) 1238 set_sd_v2_rdev(sd_v2, new_encode_dev(inode->i_rdev)); 1239 else 1240 set_sd_v2_generation(sd_v2, inode->i_generation); 1241 flags = REISERFS_I(inode)->i_attrs; 1242 i_attrs_to_sd_attrs(inode, &flags); 1243 set_sd_v2_attrs(sd_v2, flags); 1244} 1245 1246// used to copy inode's fields to old stat data 1247static void inode2sd_v1(void *sd, struct inode *inode, loff_t size) 1248{ 1249 struct stat_data_v1 *sd_v1 = (struct stat_data_v1 *)sd; 1250 1251 set_sd_v1_mode(sd_v1, inode->i_mode); 1252 set_sd_v1_uid(sd_v1, inode->i_uid); 1253 set_sd_v1_gid(sd_v1, inode->i_gid); 1254 set_sd_v1_nlink(sd_v1, inode->i_nlink); 1255 set_sd_v1_size(sd_v1, size); 1256 set_sd_v1_atime(sd_v1, inode->i_atime.tv_sec); 1257 set_sd_v1_ctime(sd_v1, inode->i_ctime.tv_sec); 1258 set_sd_v1_mtime(sd_v1, inode->i_mtime.tv_sec); 1259 1260 if (S_ISCHR(inode->i_mode) || S_ISBLK(inode->i_mode)) 1261 set_sd_v1_rdev(sd_v1, new_encode_dev(inode->i_rdev)); 1262 else 1263 set_sd_v1_blocks(sd_v1, to_fake_used_blocks(inode, SD_V1_SIZE)); 1264 1265 // Sigh. i_first_direct_byte is back 1266 set_sd_v1_first_direct_byte(sd_v1, 1267 REISERFS_I(inode)->i_first_direct_byte); 1268} 1269 1270/* NOTE, you must prepare the buffer head before sending it here, 1271** and then log it after the call 1272*/ 1273static void update_stat_data(struct treepath *path, struct inode *inode, 1274 loff_t size) 1275{ 1276 struct buffer_head *bh; 1277 struct item_head *ih; 1278 1279 bh = PATH_PLAST_BUFFER(path); 1280 ih = PATH_PITEM_HEAD(path); 1281 1282 if (!is_statdata_le_ih(ih)) 1283 reiserfs_panic(inode->i_sb, 1284 "vs-13065: update_stat_data: key %k, found item %h", 1285 INODE_PKEY(inode), ih); 1286 1287 if (stat_data_v1(ih)) { 1288 // path points to old stat data 1289 inode2sd_v1(B_I_PITEM(bh, ih), inode, size); 1290 } else { 1291 inode2sd(B_I_PITEM(bh, ih), inode, size); 1292 } 1293 1294 return; 1295} 1296 1297void reiserfs_update_sd_size(struct reiserfs_transaction_handle *th, 1298 struct inode *inode, loff_t size) 1299{ 1300 struct cpu_key key; 1301 INITIALIZE_PATH(path); 1302 struct buffer_head *bh; 1303 int fs_gen; 1304 struct item_head *ih, tmp_ih; 1305 int retval; 1306 1307 BUG_ON(!th->t_trans_id); 1308 1309 make_cpu_key(&key, inode, SD_OFFSET, TYPE_STAT_DATA, 3); //key type is unimportant 1310 1311 for (;;) { 1312 int pos; 1313 /* look for the object's stat data */ 1314 retval = search_item(inode->i_sb, &key, &path); 1315 if (retval == IO_ERROR) { 1316 reiserfs_warning(inode->i_sb, 1317 "vs-13050: reiserfs_update_sd: " 1318 "i/o failure occurred trying to update %K stat data", 1319 &key); 1320 return; 1321 } 1322 if (retval == ITEM_NOT_FOUND) { 1323 pos = PATH_LAST_POSITION(&path); 1324 pathrelse(&path); 1325 if (inode->i_nlink == 0) { 1326 /*reiserfs_warning (inode->i_sb, "vs-13050: reiserfs_update_sd: i_nlink == 0, stat data not found"); */ 1327 return; 1328 } 1329 reiserfs_warning(inode->i_sb, 1330 "vs-13060: reiserfs_update_sd: " 1331 "stat data of object %k (nlink == %d) not found (pos %d)", 1332 INODE_PKEY(inode), inode->i_nlink, 1333 pos); 1334 reiserfs_check_path(&path); 1335 return; 1336 } 1337 1338 /* sigh, prepare_for_journal might schedule. When it schedules the 1339 ** FS might change. We have to detect that, and loop back to the 1340 ** search if the stat data item has moved 1341 */ 1342 bh = get_last_bh(&path); 1343 ih = get_ih(&path); 1344 copy_item_head(&tmp_ih, ih); 1345 fs_gen = get_generation(inode->i_sb); 1346 reiserfs_prepare_for_journal(inode->i_sb, bh, 1); 1347 if (fs_changed(fs_gen, inode->i_sb) 1348 && item_moved(&tmp_ih, &path)) { 1349 reiserfs_restore_prepared_buffer(inode->i_sb, bh); 1350 continue; /* Stat_data item has been moved after scheduling. */ 1351 } 1352 break; 1353 } 1354 update_stat_data(&path, inode, size); 1355 journal_mark_dirty(th, th->t_super, bh); 1356 pathrelse(&path); 1357 return; 1358} 1359 1360/* reiserfs_read_locked_inode is called to read the inode off disk, and it 1361** does a make_bad_inode when things go wrong. But, we need to make sure 1362** and clear the key in the private portion of the inode, otherwise a 1363** corresponding iput might try to delete whatever object the inode last 1364** represented. 1365*/ 1366static void reiserfs_make_bad_inode(struct inode *inode) 1367{ 1368 memset(INODE_PKEY(inode), 0, KEY_SIZE); 1369 make_bad_inode(inode); 1370} 1371 1372// 1373// initially this function was derived from minix or ext2's analog and 1374// evolved as the prototype did 1375// 1376 1377int reiserfs_init_locked_inode(struct inode *inode, void *p) 1378{ 1379 struct reiserfs_iget_args *args = (struct reiserfs_iget_args *)p; 1380 inode->i_ino = args->objectid; 1381 INODE_PKEY(inode)->k_dir_id = cpu_to_le32(args->dirid); 1382 return 0; 1383} 1384 1385/* looks for stat data in the tree, and fills up the fields of in-core 1386 inode stat data fields */ 1387void reiserfs_read_locked_inode(struct inode *inode, 1388 struct reiserfs_iget_args *args) 1389{ 1390 INITIALIZE_PATH(path_to_sd); 1391 struct cpu_key key; 1392 unsigned long dirino; 1393 int retval; 1394 1395 dirino = args->dirid; 1396 1397 /* set version 1, version 2 could be used too, because stat data 1398 key is the same in both versions */ 1399 key.version = KEY_FORMAT_3_5; 1400 key.on_disk_key.k_dir_id = dirino; 1401 key.on_disk_key.k_objectid = inode->i_ino; 1402 key.on_disk_key.k_offset = 0; 1403 key.on_disk_key.k_type = 0; 1404 1405 /* look for the object's stat data */ 1406 retval = search_item(inode->i_sb, &key, &path_to_sd); 1407 if (retval == IO_ERROR) { 1408 reiserfs_warning(inode->i_sb, 1409 "vs-13070: reiserfs_read_locked_inode: " 1410 "i/o failure occurred trying to find stat data of %K", 1411 &key); 1412 reiserfs_make_bad_inode(inode); 1413 return; 1414 } 1415 if (retval != ITEM_FOUND) { 1416 /* a stale NFS handle can trigger this without it being an error */ 1417 pathrelse(&path_to_sd); 1418 reiserfs_make_bad_inode(inode); 1419 inode->i_nlink = 0; 1420 return; 1421 } 1422 1423 init_inode(inode, &path_to_sd); 1424 1425 /* It is possible that knfsd is trying to access inode of a file 1426 that is being removed from the disk by some other thread. As we 1427 update sd on unlink all that is required is to check for nlink 1428 here. This bug was first found by Sizif when debugging 1429 SquidNG/Butterfly, forgotten, and found again after Philippe 1430 Gramoulle <philippe.gramoulle@mmania.com> reproduced it. 1431 1432 More logical fix would require changes in fs/inode.c:iput() to 1433 remove inode from hash-table _after_ fs cleaned disk stuff up and 1434 in iget() to return NULL if I_FREEING inode is found in 1435 hash-table. */ 1436 /* Currently there is one place where it's ok to meet inode with 1437 nlink==0: processing of open-unlinked and half-truncated files 1438 during mount (fs/reiserfs/super.c:finish_unfinished()). */ 1439 if ((inode->i_nlink == 0) && 1440 !REISERFS_SB(inode->i_sb)->s_is_unlinked_ok) { 1441 reiserfs_warning(inode->i_sb, 1442 "vs-13075: reiserfs_read_locked_inode: " 1443 "dead inode read from disk %K. " 1444 "This is likely to be race with knfsd. Ignore", 1445 &key); 1446 reiserfs_make_bad_inode(inode); 1447 } 1448 1449 reiserfs_check_path(&path_to_sd); /* init inode should be relsing */ 1450 1451} 1452 1453/** 1454 * reiserfs_find_actor() - "find actor" reiserfs supplies to iget5_locked(). 1455 * 1456 * @inode: inode from hash table to check 1457 * @opaque: "cookie" passed to iget5_locked(). This is &reiserfs_iget_args. 1458 * 1459 * This function is called by iget5_locked() to distinguish reiserfs inodes 1460 * having the same inode numbers. Such inodes can only exist due to some 1461 * error condition. One of them should be bad. Inodes with identical 1462 * inode numbers (objectids) are distinguished by parent directory ids. 1463 * 1464 */ 1465int reiserfs_find_actor(struct inode *inode, void *opaque) 1466{ 1467 struct reiserfs_iget_args *args; 1468 1469 args = opaque; 1470 /* args is already in CPU order */ 1471 return (inode->i_ino == args->objectid) && 1472 (le32_to_cpu(INODE_PKEY(inode)->k_dir_id) == args->dirid); 1473} 1474 1475struct inode *reiserfs_iget(struct super_block *s, const struct cpu_key *key) 1476{ 1477 struct inode *inode; 1478 struct reiserfs_iget_args args; 1479 1480 args.objectid = key->on_disk_key.k_objectid; 1481 args.dirid = key->on_disk_key.k_dir_id; 1482 inode = iget5_locked(s, key->on_disk_key.k_objectid, 1483 reiserfs_find_actor, reiserfs_init_locked_inode, 1484 (void *)(&args)); 1485 if (!inode) 1486 return ERR_PTR(-ENOMEM); 1487 1488 if (inode->i_state & I_NEW) { 1489 reiserfs_read_locked_inode(inode, &args); 1490 unlock_new_inode(inode); 1491 } 1492 1493 if (comp_short_keys(INODE_PKEY(inode), key) || is_bad_inode(inode)) { 1494 /* either due to i/o error or a stale NFS handle */ 1495 iput(inode); 1496 inode = NULL; 1497 } 1498 return inode; 1499} 1500 1501struct dentry *reiserfs_get_dentry(struct super_block *sb, void *vobjp) 1502{ 1503 __u32 *data = vobjp; 1504 struct cpu_key key; 1505 struct dentry *result; 1506 struct inode *inode; 1507 1508 key.on_disk_key.k_objectid = data[0]; 1509 key.on_disk_key.k_dir_id = data[1]; 1510 reiserfs_write_lock(sb); 1511 inode = reiserfs_iget(sb, &key); 1512 if (inode && !IS_ERR(inode) && data[2] != 0 && 1513 data[2] != inode->i_generation) { 1514 iput(inode); 1515 inode = NULL; 1516 } 1517 reiserfs_write_unlock(sb); 1518 if (!inode) 1519 inode = ERR_PTR(-ESTALE); 1520 if (IS_ERR(inode)) 1521 return ERR_PTR(PTR_ERR(inode)); 1522 result = d_alloc_anon(inode); 1523 if (!result) { 1524 iput(inode); 1525 return ERR_PTR(-ENOMEM); 1526 } 1527 return result; 1528} 1529 1530struct dentry *reiserfs_decode_fh(struct super_block *sb, __u32 * data, 1531 int len, int fhtype, 1532 int (*acceptable) (void *contect, 1533 struct dentry * de), 1534 void *context) 1535{ 1536 __u32 obj[3], parent[3]; 1537 1538 /* fhtype happens to reflect the number of u32s encoded. 1539 * due to a bug in earlier code, fhtype might indicate there 1540 * are more u32s then actually fitted. 1541 * so if fhtype seems to be more than len, reduce fhtype. 1542 * Valid types are: 1543 * 2 - objectid + dir_id - legacy support 1544 * 3 - objectid + dir_id + generation 1545 * 4 - objectid + dir_id + objectid and dirid of parent - legacy 1546 * 5 - objectid + dir_id + generation + objectid and dirid of parent 1547 * 6 - as above plus generation of directory 1548 * 6 does not fit in NFSv2 handles 1549 */ 1550 if (fhtype > len) { 1551 if (fhtype != 6 || len != 5) 1552 reiserfs_warning(sb, 1553 "nfsd/reiserfs, fhtype=%d, len=%d - odd", 1554 fhtype, len); 1555 fhtype = 5; 1556 } 1557 1558 obj[0] = data[0]; 1559 obj[1] = data[1]; 1560 if (fhtype == 3 || fhtype >= 5) 1561 obj[2] = data[2]; 1562 else 1563 obj[2] = 0; /* generation number */ 1564 1565 if (fhtype >= 4) { 1566 parent[0] = data[fhtype >= 5 ? 3 : 2]; 1567 parent[1] = data[fhtype >= 5 ? 4 : 3]; 1568 if (fhtype == 6) 1569 parent[2] = data[5]; 1570 else 1571 parent[2] = 0; 1572 } 1573 return sb->s_export_op->find_exported_dentry(sb, obj, 1574 fhtype < 4 ? NULL : parent, 1575 acceptable, context); 1576} 1577 1578int reiserfs_encode_fh(struct dentry *dentry, __u32 * data, int *lenp, 1579 int need_parent) 1580{ 1581 struct inode *inode = dentry->d_inode; 1582 int maxlen = *lenp; 1583 1584 if (maxlen < 3) 1585 return 255; 1586 1587 data[0] = inode->i_ino; 1588 data[1] = le32_to_cpu(INODE_PKEY(inode)->k_dir_id); 1589 data[2] = inode->i_generation; 1590 *lenp = 3; 1591 /* no room for directory info? return what we've stored so far */ 1592 if (maxlen < 5 || !need_parent) 1593 return 3; 1594 1595 spin_lock(&dentry->d_lock); 1596 inode = dentry->d_parent->d_inode; 1597 data[3] = inode->i_ino; 1598 data[4] = le32_to_cpu(INODE_PKEY(inode)->k_dir_id); 1599 *lenp = 5; 1600 if (maxlen >= 6) { 1601 data[5] = inode->i_generation; 1602 *lenp = 6; 1603 } 1604 spin_unlock(&dentry->d_lock); 1605 return *lenp; 1606} 1607 1608/* looks for stat data, then copies fields to it, marks the buffer 1609 containing stat data as dirty */ 1610/* reiserfs inodes are never really dirty, since the dirty inode call 1611** always logs them. This call allows the VFS inode marking routines 1612** to properly mark inodes for datasync and such, but only actually 1613** does something when called for a synchronous update. 1614*/ 1615int reiserfs_write_inode(struct inode *inode, int do_sync) 1616{ 1617 struct reiserfs_transaction_handle th; 1618 int jbegin_count = 1; 1619 1620 if (inode->i_sb->s_flags & MS_RDONLY) 1621 return -EROFS; 1622 /* memory pressure can sometimes initiate write_inode calls with sync == 1, 1623 ** these cases are just when the system needs ram, not when the 1624 ** inode needs to reach disk for safety, and they can safely be 1625 ** ignored because the altered inode has already been logged. 1626 */ 1627 if (do_sync && !(current->flags & PF_MEMALLOC)) { 1628 reiserfs_write_lock(inode->i_sb); 1629 if (!journal_begin(&th, inode->i_sb, jbegin_count)) { 1630 reiserfs_update_sd(&th, inode); 1631 journal_end_sync(&th, inode->i_sb, jbegin_count); 1632 } 1633 reiserfs_write_unlock(inode->i_sb); 1634 } 1635 return 0; 1636} 1637 1638/* stat data of new object is inserted already, this inserts the item 1639 containing "." and ".." entries */ 1640static int reiserfs_new_directory(struct reiserfs_transaction_handle *th, 1641 struct inode *inode, 1642 struct item_head *ih, struct treepath *path, 1643 struct inode *dir) 1644{ 1645 struct super_block *sb = th->t_super; 1646 char empty_dir[EMPTY_DIR_SIZE]; 1647 char *body = empty_dir; 1648 struct cpu_key key; 1649 int retval; 1650 1651 BUG_ON(!th->t_trans_id); 1652 1653 _make_cpu_key(&key, KEY_FORMAT_3_5, le32_to_cpu(ih->ih_key.k_dir_id), 1654 le32_to_cpu(ih->ih_key.k_objectid), DOT_OFFSET, 1655 TYPE_DIRENTRY, 3 /*key length */ ); 1656 1657 /* compose item head for new item. Directories consist of items of 1658 old type (ITEM_VERSION_1). Do not set key (second arg is 0), it 1659 is done by reiserfs_new_inode */ 1660 if (old_format_only(sb)) { 1661 make_le_item_head(ih, NULL, KEY_FORMAT_3_5, DOT_OFFSET, 1662 TYPE_DIRENTRY, EMPTY_DIR_SIZE_V1, 2); 1663 1664 make_empty_dir_item_v1(body, ih->ih_key.k_dir_id, 1665 ih->ih_key.k_objectid, 1666 INODE_PKEY(dir)->k_dir_id, 1667 INODE_PKEY(dir)->k_objectid); 1668 } else { 1669 make_le_item_head(ih, NULL, KEY_FORMAT_3_5, DOT_OFFSET, 1670 TYPE_DIRENTRY, EMPTY_DIR_SIZE, 2); 1671 1672 make_empty_dir_item(body, ih->ih_key.k_dir_id, 1673 ih->ih_key.k_objectid, 1674 INODE_PKEY(dir)->k_dir_id, 1675 INODE_PKEY(dir)->k_objectid); 1676 } 1677 1678 /* look for place in the tree for new item */ 1679 retval = search_item(sb, &key, path); 1680 if (retval == IO_ERROR) { 1681 reiserfs_warning(sb, "vs-13080: reiserfs_new_directory: " 1682 "i/o failure occurred creating new directory"); 1683 return -EIO; 1684 } 1685 if (retval == ITEM_FOUND) { 1686 pathrelse(path); 1687 reiserfs_warning(sb, "vs-13070: reiserfs_new_directory: " 1688 "object with this key exists (%k)", 1689 &(ih->ih_key)); 1690 return -EEXIST; 1691 } 1692 1693 /* insert item, that is empty directory item */ 1694 return reiserfs_insert_item(th, path, &key, ih, inode, body); 1695} 1696 1697/* stat data of object has been inserted, this inserts the item 1698 containing the body of symlink */ 1699static int reiserfs_new_symlink(struct reiserfs_transaction_handle *th, struct inode *inode, /* Inode of symlink */ 1700 struct item_head *ih, 1701 struct treepath *path, const char *symname, 1702 int item_len) 1703{ 1704 struct super_block *sb = th->t_super; 1705 struct cpu_key key; 1706 int retval; 1707 1708 BUG_ON(!th->t_trans_id); 1709 1710 _make_cpu_key(&key, KEY_FORMAT_3_5, 1711 le32_to_cpu(ih->ih_key.k_dir_id), 1712 le32_to_cpu(ih->ih_key.k_objectid), 1713 1, TYPE_DIRECT, 3 /*key length */ ); 1714 1715 make_le_item_head(ih, NULL, KEY_FORMAT_3_5, 1, TYPE_DIRECT, item_len, 1716 0 /*free_space */ ); 1717 1718 /* look for place in the tree for new item */ 1719 retval = search_item(sb, &key, path); 1720 if (retval == IO_ERROR) { 1721 reiserfs_warning(sb, "vs-13080: reiserfs_new_symlinik: " 1722 "i/o failure occurred creating new symlink"); 1723 return -EIO; 1724 } 1725 if (retval == ITEM_FOUND) { 1726 pathrelse(path); 1727 reiserfs_warning(sb, "vs-13080: reiserfs_new_symlink: " 1728 "object with this key exists (%k)", 1729 &(ih->ih_key)); 1730 return -EEXIST; 1731 } 1732 1733 /* insert item, that is body of symlink */ 1734 return reiserfs_insert_item(th, path, &key, ih, inode, symname); 1735} 1736 1737/* inserts the stat data into the tree, and then calls 1738 reiserfs_new_directory (to insert ".", ".." item if new object is 1739 directory) or reiserfs_new_symlink (to insert symlink body if new 1740 object is symlink) or nothing (if new object is regular file) 1741 1742 NOTE! uid and gid must already be set in the inode. If we return 1743 non-zero due to an error, we have to drop the quota previously allocated 1744 for the fresh inode. This can only be done outside a transaction, so 1745 if we return non-zero, we also end the transaction. */ 1746int reiserfs_new_inode(struct reiserfs_transaction_handle *th, 1747 struct inode *dir, int mode, const char *symname, 1748 /* 0 for regular, EMTRY_DIR_SIZE for dirs, 1749 strlen (symname) for symlinks) */ 1750 loff_t i_size, struct dentry *dentry, 1751 struct inode *inode) 1752{ 1753 struct super_block *sb; 1754 INITIALIZE_PATH(path_to_key); 1755 struct cpu_key key; 1756 struct item_head ih; 1757 struct stat_data sd; 1758 int retval; 1759 int err; 1760 1761 BUG_ON(!th->t_trans_id); 1762 1763 if (DQUOT_ALLOC_INODE(inode)) { 1764 err = -EDQUOT; 1765 goto out_end_trans; 1766 } 1767 if (!dir->i_nlink) { 1768 err = -EPERM; 1769 goto out_bad_inode; 1770 } 1771 1772 sb = dir->i_sb; 1773 1774 /* item head of new item */ 1775 ih.ih_key.k_dir_id = reiserfs_choose_packing(dir); 1776 ih.ih_key.k_objectid = cpu_to_le32(reiserfs_get_unused_objectid(th)); 1777 if (!ih.ih_key.k_objectid) { 1778 err = -ENOMEM; 1779 goto out_bad_inode; 1780 } 1781 if (old_format_only(sb)) 1782 /* not a perfect generation count, as object ids can be reused, but 1783 ** this is as good as reiserfs can do right now. 1784 ** note that the private part of inode isn't filled in yet, we have 1785 ** to use the directory. 1786 */ 1787 inode->i_generation = le32_to_cpu(INODE_PKEY(dir)->k_objectid); 1788 else 1789#if defined(USE_INODE_GENERATION_COUNTER) 1790 inode->i_generation = 1791 le32_to_cpu(REISERFS_SB(sb)->s_rs->s_inode_generation); 1792#else 1793 inode->i_generation = ++event; 1794#endif 1795 1796 /* fill stat data */ 1797 inode->i_nlink = (S_ISDIR(mode) ? 2 : 1); 1798 1799 /* uid and gid must already be set by the caller for quota init */ 1800 1801 /* symlink cannot be immutable or append only, right? */ 1802 if (S_ISLNK(inode->i_mode)) 1803 inode->i_flags &= ~(S_IMMUTABLE | S_APPEND); 1804 1805 inode->i_mtime = inode->i_atime = inode->i_ctime = CURRENT_TIME_SEC; 1806 inode->i_size = i_size; 1807 inode->i_blocks = 0; 1808 inode->i_bytes = 0; 1809 REISERFS_I(inode)->i_first_direct_byte = S_ISLNK(mode) ? 1 : 1810 U32_MAX /*NO_BYTES_IN_DIRECT_ITEM */ ; 1811 1812 INIT_LIST_HEAD(&(REISERFS_I(inode)->i_prealloc_list)); 1813 REISERFS_I(inode)->i_flags = 0; 1814 REISERFS_I(inode)->i_prealloc_block = 0; 1815 REISERFS_I(inode)->i_prealloc_count = 0; 1816 REISERFS_I(inode)->i_trans_id = 0; 1817 REISERFS_I(inode)->i_jl = NULL; 1818 REISERFS_I(inode)->i_attrs = 1819 REISERFS_I(dir)->i_attrs & REISERFS_INHERIT_MASK; 1820 sd_attrs_to_i_attrs(REISERFS_I(inode)->i_attrs, inode); 1821 mutex_init(&(REISERFS_I(inode)->i_mmap)); 1822 reiserfs_init_acl_access(inode); 1823 reiserfs_init_acl_default(inode); 1824 reiserfs_init_xattr_rwsem(inode); 1825 1826 if (old_format_only(sb)) 1827 make_le_item_head(&ih, NULL, KEY_FORMAT_3_5, SD_OFFSET, 1828 TYPE_STAT_DATA, SD_V1_SIZE, MAX_US_INT); 1829 else 1830 make_le_item_head(&ih, NULL, KEY_FORMAT_3_6, SD_OFFSET, 1831 TYPE_STAT_DATA, SD_SIZE, MAX_US_INT); 1832 1833 /* key to search for correct place for new stat data */ 1834 _make_cpu_key(&key, KEY_FORMAT_3_6, le32_to_cpu(ih.ih_key.k_dir_id), 1835 le32_to_cpu(ih.ih_key.k_objectid), SD_OFFSET, 1836 TYPE_STAT_DATA, 3 /*key length */ ); 1837 1838 /* find proper place for inserting of stat data */ 1839 retval = search_item(sb, &key, &path_to_key); 1840 if (retval == IO_ERROR) { 1841 err = -EIO; 1842 goto out_bad_inode; 1843 } 1844 if (retval == ITEM_FOUND) { 1845 pathrelse(&path_to_key); 1846 err = -EEXIST; 1847 goto out_bad_inode; 1848 } 1849 if (old_format_only(sb)) { 1850 if (inode->i_uid & ~0xffff || inode->i_gid & ~0xffff) { 1851 pathrelse(&path_to_key); 1852 /* i_uid or i_gid is too big to be stored in stat data v3.5 */ 1853 err = -EINVAL; 1854 goto out_bad_inode; 1855 } 1856 inode2sd_v1(&sd, inode, inode->i_size); 1857 } else { 1858 inode2sd(&sd, inode, inode->i_size); 1859 } 1860 // these do not go to on-disk stat data 1861 inode->i_ino = le32_to_cpu(ih.ih_key.k_objectid); 1862 1863 // store in in-core inode the key of stat data and version all 1864 // object items will have (directory items will have old offset 1865 // format, other new objects will consist of new items) 1866 memcpy(INODE_PKEY(inode), &(ih.ih_key), KEY_SIZE); 1867 if (old_format_only(sb) || S_ISDIR(mode) || S_ISLNK(mode)) 1868 set_inode_item_key_version(inode, KEY_FORMAT_3_5); 1869 else 1870 set_inode_item_key_version(inode, KEY_FORMAT_3_6); 1871 if (old_format_only(sb)) 1872 set_inode_sd_version(inode, STAT_DATA_V1); 1873 else 1874 set_inode_sd_version(inode, STAT_DATA_V2); 1875 1876 /* insert the stat data into the tree */ 1877#ifdef DISPLACE_NEW_PACKING_LOCALITIES 1878 if (REISERFS_I(dir)->new_packing_locality) 1879 th->displace_new_blocks = 1; 1880#endif 1881 retval = 1882 reiserfs_insert_item(th, &path_to_key, &key, &ih, inode, 1883 (char *)(&sd)); 1884 if (retval) { 1885 err = retval; 1886 reiserfs_check_path(&path_to_key); 1887 goto out_bad_inode; 1888 } 1889#ifdef DISPLACE_NEW_PACKING_LOCALITIES 1890 if (!th->displace_new_blocks) 1891 REISERFS_I(dir)->new_packing_locality = 0; 1892#endif 1893 if (S_ISDIR(mode)) { 1894 /* insert item with "." and ".." */ 1895 retval = 1896 reiserfs_new_directory(th, inode, &ih, &path_to_key, dir); 1897 } 1898 1899 if (S_ISLNK(mode)) { 1900 /* insert body of symlink */ 1901 if (!old_format_only(sb)) 1902 i_size = ROUND_UP(i_size); 1903 retval = 1904 reiserfs_new_symlink(th, inode, &ih, &path_to_key, symname, 1905 i_size); 1906 } 1907 if (retval) { 1908 err = retval; 1909 reiserfs_check_path(&path_to_key); 1910 journal_end(th, th->t_super, th->t_blocks_allocated); 1911 goto out_inserted_sd; 1912 } 1913 1914 if (reiserfs_posixacl(inode->i_sb)) { 1915 retval = reiserfs_inherit_default_acl(dir, dentry, inode); 1916 if (retval) { 1917 err = retval; 1918 reiserfs_check_path(&path_to_key); 1919 journal_end(th, th->t_super, th->t_blocks_allocated); 1920 goto out_inserted_sd; 1921 } 1922 } else if (inode->i_sb->s_flags & MS_POSIXACL) { 1923 reiserfs_warning(inode->i_sb, "ACLs aren't enabled in the fs, " 1924 "but vfs thinks they are!"); 1925 } else if (is_reiserfs_priv_object(dir)) { 1926 reiserfs_mark_inode_private(inode); 1927 } 1928 1929 insert_inode_hash(inode); 1930 reiserfs_update_sd(th, inode); 1931 reiserfs_check_path(&path_to_key); 1932 1933 return 0; 1934 1935/* it looks like you can easily compress these two goto targets into 1936 * one. Keeping it like this doesn't actually hurt anything, and they 1937 * are place holders for what the quota code actually needs. 1938 */ 1939 out_bad_inode: 1940 /* Invalidate the object, nothing was inserted yet */ 1941 INODE_PKEY(inode)->k_objectid = 0; 1942 1943 /* Quota change must be inside a transaction for journaling */ 1944 DQUOT_FREE_INODE(inode); 1945 1946 out_end_trans: 1947 journal_end(th, th->t_super, th->t_blocks_allocated); 1948 /* Drop can be outside and it needs more credits so it's better to have it outside */ 1949 DQUOT_DROP(inode); 1950 inode->i_flags |= S_NOQUOTA; 1951 make_bad_inode(inode); 1952 1953 out_inserted_sd: 1954 inode->i_nlink = 0; 1955 th->t_trans_id = 0; /* so the caller can't use this handle later */ 1956 1957 /* If we were inheriting an ACL, we need to release the lock so that 1958 * iput doesn't deadlock in reiserfs_delete_xattrs. The locking 1959 * code really needs to be reworked, but this will take care of it 1960 * for now. -jeffm */ 1961#ifdef CONFIG_REISERFS_FS_POSIX_ACL 1962 if (REISERFS_I(dir)->i_acl_default && !IS_ERR(REISERFS_I(dir)->i_acl_default)) { 1963 reiserfs_write_unlock_xattrs(dir->i_sb); 1964 iput(inode); 1965 reiserfs_write_lock_xattrs(dir->i_sb); 1966 } else 1967#endif 1968 iput(inode); 1969 return err; 1970} 1971 1972/* 1973** finds the tail page in the page cache, 1974** reads the last block in. 1975** 1976** On success, page_result is set to a locked, pinned page, and bh_result 1977** is set to an up to date buffer for the last block in the file. returns 0. 1978** 1979** tail conversion is not done, so bh_result might not be valid for writing 1980** check buffer_mapped(bh_result) and bh_result->b_blocknr != 0 before 1981** trying to write the block. 1982** 1983** on failure, nonzero is returned, page_result and bh_result are untouched. 1984*/ 1985static int grab_tail_page(struct inode *p_s_inode, 1986 struct page **page_result, 1987 struct buffer_head **bh_result) 1988{ 1989 1990 /* we want the page with the last byte in the file, 1991 ** not the page that will hold the next byte for appending 1992 */ 1993 unsigned long index = (p_s_inode->i_size - 1) >> PAGE_CACHE_SHIFT; 1994 unsigned long pos = 0; 1995 unsigned long start = 0; 1996 unsigned long blocksize = p_s_inode->i_sb->s_blocksize; 1997 unsigned long offset = (p_s_inode->i_size) & (PAGE_CACHE_SIZE - 1); 1998 struct buffer_head *bh; 1999 struct buffer_head *head; 2000 struct page *page; 2001 int error; 2002 2003 /* we know that we are only called with inode->i_size > 0. 2004 ** we also know that a file tail can never be as big as a block 2005 ** If i_size % blocksize == 0, our file is currently block aligned 2006 ** and it won't need converting or zeroing after a truncate. 2007 */ 2008 if ((offset & (blocksize - 1)) == 0) { 2009 return -ENOENT; 2010 } 2011 page = grab_cache_page(p_s_inode->i_mapping, index); 2012 error = -ENOMEM; 2013 if (!page) { 2014 goto out; 2015 } 2016 /* start within the page of the last block in the file */ 2017 start = (offset / blocksize) * blocksize; 2018 2019 error = block_prepare_write(page, start, offset, 2020 reiserfs_get_block_create_0); 2021 if (error) 2022 goto unlock; 2023 2024 head = page_buffers(page); 2025 bh = head; 2026 do { 2027 if (pos >= start) { 2028 break; 2029 } 2030 bh = bh->b_this_page; 2031 pos += blocksize; 2032 } while (bh != head); 2033 2034 if (!buffer_uptodate(bh)) { 2035 /* note, this should never happen, prepare_write should 2036 ** be taking care of this for us. If the buffer isn't up to date, 2037 ** I've screwed up the code to find the buffer, or the code to 2038 ** call prepare_write 2039 */ 2040 reiserfs_warning(p_s_inode->i_sb, 2041 "clm-6000: error reading block %lu on dev %s", 2042 bh->b_blocknr, 2043 reiserfs_bdevname(p_s_inode->i_sb)); 2044 error = -EIO; 2045 goto unlock; 2046 } 2047 *bh_result = bh; 2048 *page_result = page; 2049 2050 out: 2051 return error; 2052 2053 unlock: 2054 unlock_page(page); 2055 page_cache_release(page); 2056 return error; 2057} 2058 2059/* 2060** vfs version of truncate file. Must NOT be called with 2061** a transaction already started. 2062** 2063** some code taken from block_truncate_page 2064*/ 2065int reiserfs_truncate_file(struct inode *p_s_inode, int update_timestamps) 2066{ 2067 struct reiserfs_transaction_handle th; 2068 /* we want the offset for the first byte after the end of the file */ 2069 unsigned long offset = p_s_inode->i_size & (PAGE_CACHE_SIZE - 1); 2070 unsigned blocksize = p_s_inode->i_sb->s_blocksize; 2071 unsigned length; 2072 struct page *page = NULL; 2073 int error; 2074 struct buffer_head *bh = NULL; 2075 int err2; 2076 2077 reiserfs_write_lock(p_s_inode->i_sb); 2078 2079 if (p_s_inode->i_size > 0) { 2080 if ((error = grab_tail_page(p_s_inode, &page, &bh))) { 2081 // -ENOENT means we truncated past the end of the file, 2082 // and get_block_create_0 could not find a block to read in, 2083 // which is ok. 2084 if (error != -ENOENT) 2085 reiserfs_warning(p_s_inode->i_sb, 2086 "clm-6001: grab_tail_page failed %d", 2087 error); 2088 page = NULL; 2089 bh = NULL; 2090 } 2091 } 2092 2093 /* so, if page != NULL, we have a buffer head for the offset at 2094 ** the end of the file. if the bh is mapped, and bh->b_blocknr != 0, 2095 ** then we have an unformatted node. Otherwise, we have a direct item, 2096 ** and no zeroing is required on disk. We zero after the truncate, 2097 ** because the truncate might pack the item anyway 2098 ** (it will unmap bh if it packs). 2099 */ 2100 /* it is enough to reserve space in transaction for 2 balancings: 2101 one for "save" link adding and another for the first 2102 cut_from_item. 1 is for update_sd */ 2103 error = journal_begin(&th, p_s_inode->i_sb, 2104 JOURNAL_PER_BALANCE_CNT * 2 + 1); 2105 if (error) 2106 goto out; 2107 reiserfs_update_inode_transaction(p_s_inode); 2108 if (update_timestamps) 2109 /* we are doing real truncate: if the system crashes before the last 2110 transaction of truncating gets committed - on reboot the file 2111 either appears truncated properly or not truncated at all */ 2112 add_save_link(&th, p_s_inode, 1); 2113 err2 = reiserfs_do_truncate(&th, p_s_inode, page, update_timestamps); 2114 error = 2115 journal_end(&th, p_s_inode->i_sb, JOURNAL_PER_BALANCE_CNT * 2 + 1); 2116 if (error) 2117 goto out; 2118 2119 /* check reiserfs_do_truncate after ending the transaction */ 2120 if (err2) { 2121 error = err2; 2122 goto out; 2123 } 2124 2125 if (update_timestamps) { 2126 error = remove_save_link(p_s_inode, 1 /* truncate */ ); 2127 if (error) 2128 goto out; 2129 } 2130 2131 if (page) { 2132 length = offset & (blocksize - 1); 2133 /* if we are not on a block boundary */ 2134 if (length) { 2135 length = blocksize - length; 2136 zero_user_page(page, offset, length, KM_USER0); 2137 if (buffer_mapped(bh) && bh->b_blocknr != 0) { 2138 mark_buffer_dirty(bh); 2139 } 2140 } 2141 unlock_page(page); 2142 page_cache_release(page); 2143 } 2144 2145 reiserfs_write_unlock(p_s_inode->i_sb); 2146 return 0; 2147 out: 2148 if (page) { 2149 unlock_page(page); 2150 page_cache_release(page); 2151 } 2152 reiserfs_write_unlock(p_s_inode->i_sb); 2153 return error; 2154} 2155 2156static int map_block_for_writepage(struct inode *inode, 2157 struct buffer_head *bh_result, 2158 unsigned long block) 2159{ 2160 struct reiserfs_transaction_handle th; 2161 int fs_gen; 2162 struct item_head tmp_ih; 2163 struct item_head *ih; 2164 struct buffer_head *bh; 2165 __le32 *item; 2166 struct cpu_key key; 2167 INITIALIZE_PATH(path); 2168 int pos_in_item; 2169 int jbegin_count = JOURNAL_PER_BALANCE_CNT; 2170 loff_t byte_offset = ((loff_t)block << inode->i_sb->s_blocksize_bits)+1; 2171 int retval; 2172 int use_get_block = 0; 2173 int bytes_copied = 0; 2174 int copy_size; 2175 int trans_running = 0; 2176 2177 /* catch places below that try to log something without starting a trans */ 2178 th.t_trans_id = 0; 2179 2180 if (!buffer_uptodate(bh_result)) { 2181 return -EIO; 2182 } 2183 2184 kmap(bh_result->b_page); 2185 start_over: 2186 reiserfs_write_lock(inode->i_sb); 2187 make_cpu_key(&key, inode, byte_offset, TYPE_ANY, 3); 2188 2189 research: 2190 retval = search_for_position_by_key(inode->i_sb, &key, &path); 2191 if (retval != POSITION_FOUND) { 2192 use_get_block = 1; 2193 goto out; 2194 } 2195 2196 bh = get_last_bh(&path); 2197 ih = get_ih(&path); 2198 item = get_item(&path); 2199 pos_in_item = path.pos_in_item; 2200 2201 /* we've found an unformatted node */ 2202 if (indirect_item_found(retval, ih)) { 2203 if (bytes_copied > 0) { 2204 reiserfs_warning(inode->i_sb, 2205 "clm-6002: bytes_copied %d", 2206 bytes_copied); 2207 } 2208 if (!get_block_num(item, pos_in_item)) { 2209 /* crap, we are writing to a hole */ 2210 use_get_block = 1; 2211 goto out; 2212 } 2213 set_block_dev_mapped(bh_result, 2214 get_block_num(item, pos_in_item), inode); 2215 } else if (is_direct_le_ih(ih)) { 2216 char *p; 2217 p = page_address(bh_result->b_page); 2218 p += (byte_offset - 1) & (PAGE_CACHE_SIZE - 1); 2219 copy_size = ih_item_len(ih) - pos_in_item; 2220 2221 fs_gen = get_generation(inode->i_sb); 2222 copy_item_head(&tmp_ih, ih); 2223 2224 if (!trans_running) { 2225 /* vs-3050 is gone, no need to drop the path */ 2226 retval = journal_begin(&th, inode->i_sb, jbegin_count); 2227 if (retval) 2228 goto out; 2229 reiserfs_update_inode_transaction(inode); 2230 trans_running = 1; 2231 if (fs_changed(fs_gen, inode->i_sb) 2232 && item_moved(&tmp_ih, &path)) { 2233 reiserfs_restore_prepared_buffer(inode->i_sb, 2234 bh); 2235 goto research; 2236 } 2237 } 2238 2239 reiserfs_prepare_for_journal(inode->i_sb, bh, 1); 2240 2241 if (fs_changed(fs_gen, inode->i_sb) 2242 && item_moved(&tmp_ih, &path)) { 2243 reiserfs_restore_prepared_buffer(inode->i_sb, bh); 2244 goto research; 2245 } 2246 2247 memcpy(B_I_PITEM(bh, ih) + pos_in_item, p + bytes_copied, 2248 copy_size); 2249 2250 journal_mark_dirty(&th, inode->i_sb, bh); 2251 bytes_copied += copy_size; 2252 set_block_dev_mapped(bh_result, 0, inode); 2253 2254 /* are there still bytes left? */ 2255 if (bytes_copied < bh_result->b_size && 2256 (byte_offset + bytes_copied) < inode->i_size) { 2257 set_cpu_key_k_offset(&key, 2258 cpu_key_k_offset(&key) + 2259 copy_size); 2260 goto research; 2261 } 2262 } else { 2263 reiserfs_warning(inode->i_sb, 2264 "clm-6003: bad item inode %lu, device %s", 2265 inode->i_ino, reiserfs_bdevname(inode->i_sb)); 2266 retval = -EIO; 2267 goto out; 2268 } 2269 retval = 0; 2270 2271 out: 2272 pathrelse(&path); 2273 if (trans_running) { 2274 int err = journal_end(&th, inode->i_sb, jbegin_count); 2275 if (err) 2276 retval = err; 2277 trans_running = 0; 2278 } 2279 reiserfs_write_unlock(inode->i_sb); 2280 2281 /* this is where we fill in holes in the file. */ 2282 if (use_get_block) { 2283 retval = reiserfs_get_block(inode, block, bh_result, 2284 GET_BLOCK_CREATE | GET_BLOCK_NO_IMUX 2285 | GET_BLOCK_NO_DANGLE); 2286 if (!retval) { 2287 if (!buffer_mapped(bh_result) 2288 || bh_result->b_blocknr == 0) { 2289 /* get_block failed to find a mapped unformatted node. */ 2290 use_get_block = 0; 2291 goto start_over; 2292 } 2293 } 2294 } 2295 kunmap(bh_result->b_page); 2296 2297 if (!retval && buffer_mapped(bh_result) && bh_result->b_blocknr == 0) { 2298 /* we've copied data from the page into the direct item, so the 2299 * buffer in the page is now clean, mark it to reflect that. 2300 */ 2301 lock_buffer(bh_result); 2302 clear_buffer_dirty(bh_result); 2303 unlock_buffer(bh_result); 2304 } 2305 return retval; 2306} 2307 2308/* 2309 * mason@suse.com: updated in 2.5.54 to follow the same general io 2310 * start/recovery path as __block_write_full_page, along with special 2311 * code to handle reiserfs tails. 2312 */ 2313static int reiserfs_write_full_page(struct page *page, 2314 struct writeback_control *wbc) 2315{ 2316 struct inode *inode = page->mapping->host; 2317 unsigned long end_index = inode->i_size >> PAGE_CACHE_SHIFT; 2318 int error = 0; 2319 unsigned long block; 2320 sector_t last_block; 2321 struct buffer_head *head, *bh; 2322 int partial = 0; 2323 int nr = 0; 2324 int checked = PageChecked(page); 2325 struct reiserfs_transaction_handle th; 2326 struct super_block *s = inode->i_sb; 2327 int bh_per_page = PAGE_CACHE_SIZE / s->s_blocksize; 2328 th.t_trans_id = 0; 2329 2330 /* no logging allowed when nonblocking or from PF_MEMALLOC */ 2331 if (checked && (current->flags & PF_MEMALLOC)) { 2332 redirty_page_for_writepage(wbc, page); 2333 unlock_page(page); 2334 return 0; 2335 } 2336 2337 /* The page dirty bit is cleared before writepage is called, which 2338 * means we have to tell create_empty_buffers to make dirty buffers 2339 * The page really should be up to date at this point, so tossing 2340 * in the BH_Uptodate is just a sanity check. 2341 */ 2342 if (!page_has_buffers(page)) { 2343 create_empty_buffers(page, s->s_blocksize, 2344 (1 << BH_Dirty) | (1 << BH_Uptodate)); 2345 } 2346 head = page_buffers(page); 2347 2348 /* last page in the file, zero out any contents past the 2349 ** last byte in the file 2350 */ 2351 if (page->index >= end_index) { 2352 unsigned last_offset; 2353 2354 last_offset = inode->i_size & (PAGE_CACHE_SIZE - 1); 2355 /* no file contents in this page */ 2356 if (page->index >= end_index + 1 || !last_offset) { 2357 unlock_page(page); 2358 return 0; 2359 } 2360 zero_user_page(page, last_offset, PAGE_CACHE_SIZE - last_offset, KM_USER0); 2361 } 2362 bh = head; 2363 block = page->index << (PAGE_CACHE_SHIFT - s->s_blocksize_bits); 2364 last_block = (i_size_read(inode) - 1) >> inode->i_blkbits; 2365 /* first map all the buffers, logging any direct items we find */ 2366 do { 2367 if (block > last_block) { 2368 /* 2369 * This can happen when the block size is less than 2370 * the page size. The corresponding bytes in the page 2371 * were zero filled above 2372 */ 2373 clear_buffer_dirty(bh); 2374 set_buffer_uptodate(bh); 2375 } else if ((checked || buffer_dirty(bh)) && 2376 (!buffer_mapped(bh) || (buffer_mapped(bh) 2377 && bh->b_blocknr == 2378 0))) { 2379 /* not mapped yet, or it points to a direct item, search 2380 * the btree for the mapping info, and log any direct 2381 * items found 2382 */ 2383 if ((error = map_block_for_writepage(inode, bh, block))) { 2384 goto fail; 2385 } 2386 } 2387 bh = bh->b_this_page; 2388 block++; 2389 } while (bh != head); 2390 2391 /* 2392 * we start the transaction after map_block_for_writepage, 2393 * because it can create holes in the file (an unbounded operation). 2394 * starting it here, we can make a reliable estimate for how many 2395 * blocks we're going to log 2396 */ 2397 if (checked) { 2398 ClearPageChecked(page); 2399 reiserfs_write_lock(s); 2400 error = journal_begin(&th, s, bh_per_page + 1); 2401 if (error) { 2402 reiserfs_write_unlock(s); 2403 goto fail; 2404 } 2405 reiserfs_update_inode_transaction(inode); 2406 } 2407 /* now go through and lock any dirty buffers on the page */ 2408 do { 2409 get_bh(bh); 2410 if (!buffer_mapped(bh)) 2411 continue; 2412 if (buffer_mapped(bh) && bh->b_blocknr == 0) 2413 continue; 2414 2415 if (checked) { 2416 reiserfs_prepare_for_journal(s, bh, 1); 2417 journal_mark_dirty(&th, s, bh); 2418 continue; 2419 } 2420 /* from this point on, we know the buffer is mapped to a 2421 * real block and not a direct item 2422 */ 2423 if (wbc->sync_mode != WB_SYNC_NONE || !wbc->nonblocking) { 2424 lock_buffer(bh); 2425 } else { 2426 if (test_set_buffer_locked(bh)) { 2427 redirty_page_for_writepage(wbc, page); 2428 continue; 2429 } 2430 } 2431 if (test_clear_buffer_dirty(bh)) { 2432 mark_buffer_async_write(bh); 2433 } else { 2434 unlock_buffer(bh); 2435 } 2436 } while ((bh = bh->b_this_page) != head); 2437 2438 if (checked) { 2439 error = journal_end(&th, s, bh_per_page + 1); 2440 reiserfs_write_unlock(s); 2441 if (error) 2442 goto fail; 2443 } 2444 BUG_ON(PageWriteback(page)); 2445 set_page_writeback(page); 2446 unlock_page(page); 2447 2448 /* 2449 * since any buffer might be the only dirty buffer on the page, 2450 * the first submit_bh can bring the page out of writeback. 2451 * be careful with the buffers. 2452 */ 2453 do { 2454 struct buffer_head *next = bh->b_this_page; 2455 if (buffer_async_write(bh)) { 2456 submit_bh(WRITE, bh); 2457 nr++; 2458 } 2459 put_bh(bh); 2460 bh = next; 2461 } while (bh != head); 2462 2463 error = 0; 2464 done: 2465 if (nr == 0) { 2466 /* 2467 * if this page only had a direct item, it is very possible for 2468 * no io to be required without there being an error. Or, 2469 * someone else could have locked them and sent them down the 2470 * pipe without locking the page 2471 */ 2472 bh = head; 2473 do { 2474 if (!buffer_uptodate(bh)) { 2475 partial = 1; 2476 break; 2477 } 2478 bh = bh->b_this_page; 2479 } while (bh != head); 2480 if (!partial) 2481 SetPageUptodate(page); 2482 end_page_writeback(page); 2483 } 2484 return error; 2485 2486 fail: 2487 /* catches various errors, we need to make sure any valid dirty blocks 2488 * get to the media. The page is currently locked and not marked for 2489 * writeback 2490 */ 2491 ClearPageUptodate(page); 2492 bh = head; 2493 do { 2494 get_bh(bh); 2495 if (buffer_mapped(bh) && buffer_dirty(bh) && bh->b_blocknr) { 2496 lock_buffer(bh); 2497 mark_buffer_async_write(bh); 2498 } else { 2499 /* 2500 * clear any dirty bits that might have come from getting 2501 * attached to a dirty page 2502 */ 2503 clear_buffer_dirty(bh); 2504 } 2505 bh = bh->b_this_page; 2506 } while (bh != head); 2507 SetPageError(page); 2508 BUG_ON(PageWriteback(page)); 2509 set_page_writeback(page); 2510 unlock_page(page); 2511 do { 2512 struct buffer_head *next = bh->b_this_page; 2513 if (buffer_async_write(bh)) { 2514 clear_buffer_dirty(bh); 2515 submit_bh(WRITE, bh); 2516 nr++; 2517 } 2518 put_bh(bh); 2519 bh = next; 2520 } while (bh != head); 2521 goto done; 2522} 2523 2524static int reiserfs_readpage(struct file *f, struct page *page) 2525{ 2526 return block_read_full_page(page, reiserfs_get_block); 2527} 2528 2529static int reiserfs_writepage(struct page *page, struct writeback_control *wbc) 2530{ 2531 struct inode *inode = page->mapping->host; 2532 reiserfs_wait_on_write_block(inode->i_sb); 2533 return reiserfs_write_full_page(page, wbc); 2534} 2535 2536static int reiserfs_prepare_write(struct file *f, struct page *page, 2537 unsigned from, unsigned to) 2538{ 2539 struct inode *inode = page->mapping->host; 2540 int ret; 2541 int old_ref = 0; 2542 2543 reiserfs_wait_on_write_block(inode->i_sb); 2544 fix_tail_page_for_writing(page); 2545 if (reiserfs_transaction_running(inode->i_sb)) { 2546 struct reiserfs_transaction_handle *th; 2547 th = (struct reiserfs_transaction_handle *)current-> 2548 journal_info; 2549 BUG_ON(!th->t_refcount); 2550 BUG_ON(!th->t_trans_id); 2551 old_ref = th->t_refcount; 2552 th->t_refcount++; 2553 } 2554 2555 ret = block_prepare_write(page, from, to, reiserfs_get_block); 2556 if (ret && reiserfs_transaction_running(inode->i_sb)) { 2557 struct reiserfs_transaction_handle *th = current->journal_info; 2558 /* this gets a little ugly. If reiserfs_get_block returned an 2559 * error and left a transacstion running, we've got to close it, 2560 * and we've got to free handle if it was a persistent transaction. 2561 * 2562 * But, if we had nested into an existing transaction, we need 2563 * to just drop the ref count on the handle. 2564 * 2565 * If old_ref == 0, the transaction is from reiserfs_get_block, 2566 * and it was a persistent trans. Otherwise, it was nested above. 2567 */ 2568 if (th->t_refcount > old_ref) { 2569 if (old_ref) 2570 th->t_refcount--; 2571 else { 2572 int err; 2573 reiserfs_write_lock(inode->i_sb); 2574 err = reiserfs_end_persistent_transaction(th); 2575 reiserfs_write_unlock(inode->i_sb); 2576 if (err) 2577 ret = err; 2578 } 2579 } 2580 } 2581 return ret; 2582 2583} 2584 2585static sector_t reiserfs_aop_bmap(struct address_space *as, sector_t block) 2586{ 2587 return generic_block_bmap(as, block, reiserfs_bmap); 2588} 2589 2590static int reiserfs_commit_write(struct file *f, struct page *page, 2591 unsigned from, unsigned to) 2592{ 2593 struct inode *inode = page->mapping->host; 2594 loff_t pos = ((loff_t) page->index << PAGE_CACHE_SHIFT) + to; 2595 int ret = 0; 2596 int update_sd = 0; 2597 struct reiserfs_transaction_handle *th = NULL; 2598 2599 reiserfs_wait_on_write_block(inode->i_sb); 2600 if (reiserfs_transaction_running(inode->i_sb)) { 2601 th = current->journal_info; 2602 } 2603 reiserfs_commit_page(inode, page, from, to); 2604 2605 /* generic_commit_write does this for us, but does not update the 2606 ** transaction tracking stuff when the size changes. So, we have 2607 ** to do the i_size updates here. 2608 */ 2609 if (pos > inode->i_size) { 2610 struct reiserfs_transaction_handle myth; 2611 reiserfs_write_lock(inode->i_sb); 2612 /* If the file have grown beyond the border where it 2613 can have a tail, unmark it as needing a tail 2614 packing */ 2615 if ((have_large_tails(inode->i_sb) 2616 && inode->i_size > i_block_size(inode) * 4) 2617 || (have_small_tails(inode->i_sb) 2618 && inode->i_size > i_block_size(inode))) 2619 REISERFS_I(inode)->i_flags &= ~i_pack_on_close_mask; 2620 2621 ret = journal_begin(&myth, inode->i_sb, 1); 2622 if (ret) { 2623 reiserfs_write_unlock(inode->i_sb); 2624 goto journal_error; 2625 } 2626 reiserfs_update_inode_transaction(inode); 2627 inode->i_size = pos; 2628 /* 2629 * this will just nest into our transaction. It's important 2630 * to use mark_inode_dirty so the inode gets pushed around on the 2631 * dirty lists, and so that O_SYNC works as expected 2632 */ 2633 mark_inode_dirty(inode); 2634 reiserfs_update_sd(&myth, inode); 2635 update_sd = 1; 2636 ret = journal_end(&myth, inode->i_sb, 1); 2637 reiserfs_write_unlock(inode->i_sb); 2638 if (ret) 2639 goto journal_error; 2640 } 2641 if (th) { 2642 reiserfs_write_lock(inode->i_sb); 2643 if (!update_sd) 2644 mark_inode_dirty(inode); 2645 ret = reiserfs_end_persistent_transaction(th); 2646 reiserfs_write_unlock(inode->i_sb); 2647 if (ret) 2648 goto out; 2649 } 2650 2651 out: 2652 return ret; 2653 2654 journal_error: 2655 if (th) { 2656 reiserfs_write_lock(inode->i_sb); 2657 if (!update_sd) 2658 reiserfs_update_sd(th, inode); 2659 ret = reiserfs_end_persistent_transaction(th); 2660 reiserfs_write_unlock(inode->i_sb); 2661 } 2662 2663 return ret; 2664} 2665 2666void sd_attrs_to_i_attrs(__u16 sd_attrs, struct inode *inode) 2667{ 2668 if (reiserfs_attrs(inode->i_sb)) { 2669 if (sd_attrs & REISERFS_SYNC_FL) 2670 inode->i_flags |= S_SYNC; 2671 else 2672 inode->i_flags &= ~S_SYNC; 2673 if (sd_attrs & REISERFS_IMMUTABLE_FL) 2674 inode->i_flags |= S_IMMUTABLE; 2675 else 2676 inode->i_flags &= ~S_IMMUTABLE; 2677 if (sd_attrs & REISERFS_APPEND_FL) 2678 inode->i_flags |= S_APPEND; 2679 else 2680 inode->i_flags &= ~S_APPEND; 2681 if (sd_attrs & REISERFS_NOATIME_FL) 2682 inode->i_flags |= S_NOATIME; 2683 else 2684 inode->i_flags &= ~S_NOATIME; 2685 if (sd_attrs & REISERFS_NOTAIL_FL) 2686 REISERFS_I(inode)->i_flags |= i_nopack_mask; 2687 else 2688 REISERFS_I(inode)->i_flags &= ~i_nopack_mask; 2689 } 2690} 2691 2692void i_attrs_to_sd_attrs(struct inode *inode, __u16 * sd_attrs) 2693{ 2694 if (reiserfs_attrs(inode->i_sb)) { 2695 if (inode->i_flags & S_IMMUTABLE) 2696 *sd_attrs |= REISERFS_IMMUTABLE_FL; 2697 else 2698 *sd_attrs &= ~REISERFS_IMMUTABLE_FL; 2699 if (inode->i_flags & S_SYNC) 2700 *sd_attrs |= REISERFS_SYNC_FL; 2701 else 2702 *sd_attrs &= ~REISERFS_SYNC_FL; 2703 if (inode->i_flags & S_NOATIME) 2704 *sd_attrs |= REISERFS_NOATIME_FL; 2705 else 2706 *sd_attrs &= ~REISERFS_NOATIME_FL; 2707 if (REISERFS_I(inode)->i_flags & i_nopack_mask) 2708 *sd_attrs |= REISERFS_NOTAIL_FL; 2709 else 2710 *sd_attrs &= ~REISERFS_NOTAIL_FL; 2711 } 2712} 2713 2714/* decide if this buffer needs to stay around for data logging or ordered 2715** write purposes 2716*/ 2717static int invalidatepage_can_drop(struct inode *inode, struct buffer_head *bh) 2718{ 2719 int ret = 1; 2720 struct reiserfs_journal *j = SB_JOURNAL(inode->i_sb); 2721 2722 lock_buffer(bh); 2723 spin_lock(&j->j_dirty_buffers_lock); 2724 if (!buffer_mapped(bh)) { 2725 goto free_jh; 2726 } 2727 /* the page is locked, and the only places that log a data buffer 2728 * also lock the page. 2729 */ 2730 if (reiserfs_file_data_log(inode)) { 2731 /* 2732 * very conservative, leave the buffer pinned if 2733 * anyone might need it. 2734 */ 2735 if (buffer_journaled(bh) || buffer_journal_dirty(bh)) { 2736 ret = 0; 2737 } 2738 } else if (buffer_dirty(bh)) { 2739 struct reiserfs_journal_list *jl; 2740 struct reiserfs_jh *jh = bh->b_private; 2741 2742 /* why is this safe? 2743 * reiserfs_setattr updates i_size in the on disk 2744 * stat data before allowing vmtruncate to be called. 2745 * 2746 * If buffer was put onto the ordered list for this 2747 * transaction, we know for sure either this transaction 2748 * or an older one already has updated i_size on disk, 2749 * and this ordered data won't be referenced in the file 2750 * if we crash. 2751 * 2752 * if the buffer was put onto the ordered list for an older 2753 * transaction, we need to leave it around 2754 */ 2755 if (jh && (jl = jh->jl) 2756 && jl != SB_JOURNAL(inode->i_sb)->j_current_jl) 2757 ret = 0; 2758 } 2759 free_jh: 2760 if (ret && bh->b_private) { 2761 reiserfs_free_jh(bh); 2762 } 2763 spin_unlock(&j->j_dirty_buffers_lock); 2764 unlock_buffer(bh); 2765 return ret; 2766} 2767 2768/* clm -- taken from fs/buffer.c:block_invalidate_page */ 2769static void reiserfs_invalidatepage(struct page *page, unsigned long offset) 2770{ 2771 struct buffer_head *head, *bh, *next; 2772 struct inode *inode = page->mapping->host; 2773 unsigned int curr_off = 0; 2774 int ret = 1; 2775 2776 BUG_ON(!PageLocked(page)); 2777 2778 if (offset == 0) 2779 ClearPageChecked(page); 2780 2781 if (!page_has_buffers(page)) 2782 goto out; 2783 2784 head = page_buffers(page); 2785 bh = head; 2786 do { 2787 unsigned int next_off = curr_off + bh->b_size; 2788 next = bh->b_this_page; 2789 2790 /* 2791 * is this block fully invalidated? 2792 */ 2793 if (offset <= curr_off) { 2794 if (invalidatepage_can_drop(inode, bh)) 2795 reiserfs_unmap_buffer(bh); 2796 else 2797 ret = 0; 2798 } 2799 curr_off = next_off; 2800 bh = next; 2801 } while (bh != head); 2802 2803 /* 2804 * We release buffers only if the entire page is being invalidated. 2805 * The get_block cached value has been unconditionally invalidated, 2806 * so real IO is not possible anymore. 2807 */ 2808 if (!offset && ret) { 2809 ret = try_to_release_page(page, 0); 2810 /* maybe should BUG_ON(!ret); - neilb */ 2811 } 2812 out: 2813 return; 2814} 2815 2816static int reiserfs_set_page_dirty(struct page *page) 2817{ 2818 struct inode *inode = page->mapping->host; 2819 if (reiserfs_file_data_log(inode)) { 2820 SetPageChecked(page); 2821 return __set_page_dirty_nobuffers(page); 2822 } 2823 return __set_page_dirty_buffers(page); 2824} 2825 2826/* 2827 * Returns 1 if the page's buffers were dropped. The page is locked. 2828 * 2829 * Takes j_dirty_buffers_lock to protect the b_assoc_buffers list_heads 2830 * in the buffers at page_buffers(page). 2831 * 2832 * even in -o notail mode, we can't be sure an old mount without -o notail 2833 * didn't create files with tails. 2834 */ 2835static int reiserfs_releasepage(struct page *page, gfp_t unused_gfp_flags) 2836{ 2837 struct inode *inode = page->mapping->host; 2838 struct reiserfs_journal *j = SB_JOURNAL(inode->i_sb); 2839 struct buffer_head *head; 2840 struct buffer_head *bh; 2841 int ret = 1; 2842 2843 WARN_ON(PageChecked(page)); 2844 spin_lock(&j->j_dirty_buffers_lock); 2845 head = page_buffers(page); 2846 bh = head; 2847 do { 2848 if (bh->b_private) { 2849 if (!buffer_dirty(bh) && !buffer_locked(bh)) { 2850 reiserfs_free_jh(bh); 2851 } else { 2852 ret = 0; 2853 break; 2854 } 2855 } 2856 bh = bh->b_this_page; 2857 } while (bh != head); 2858 if (ret) 2859 ret = try_to_free_buffers(page); 2860 spin_unlock(&j->j_dirty_buffers_lock); 2861 return ret; 2862} 2863 2864/* We thank Mingming Cao for helping us understand in great detail what 2865 to do in this section of the code. */ 2866static ssize_t reiserfs_direct_IO(int rw, struct kiocb *iocb, 2867 const struct iovec *iov, loff_t offset, 2868 unsigned long nr_segs) 2869{ 2870 struct file *file = iocb->ki_filp; 2871 struct inode *inode = file->f_mapping->host; 2872 2873 return blockdev_direct_IO(rw, iocb, inode, inode->i_sb->s_bdev, iov, 2874 offset, nr_segs, 2875 reiserfs_get_blocks_direct_io, NULL); 2876} 2877 2878int reiserfs_setattr(struct dentry *dentry, struct iattr *attr) 2879{ 2880 struct inode *inode = dentry->d_inode; 2881 int error; 2882 unsigned int ia_valid = attr->ia_valid; 2883 reiserfs_write_lock(inode->i_sb); 2884 if (attr->ia_valid & ATTR_SIZE) { 2885 /* version 2 items will be caught by the s_maxbytes check 2886 ** done for us in vmtruncate 2887 */ 2888 if (get_inode_item_key_version(inode) == KEY_FORMAT_3_5 && 2889 attr->ia_size > MAX_NON_LFS) { 2890 error = -EFBIG; 2891 goto out; 2892 } 2893 /* fill in hole pointers in the expanding truncate case. */ 2894 if (attr->ia_size > inode->i_size) { 2895 error = generic_cont_expand(inode, attr->ia_size); 2896 if (REISERFS_I(inode)->i_prealloc_count > 0) { 2897 int err; 2898 struct reiserfs_transaction_handle th; 2899 /* we're changing at most 2 bitmaps, inode + super */ 2900 err = journal_begin(&th, inode->i_sb, 4); 2901 if (!err) { 2902 reiserfs_discard_prealloc(&th, inode); 2903 err = journal_end(&th, inode->i_sb, 4); 2904 } 2905 if (err) 2906 error = err; 2907 } 2908 if (error) 2909 goto out; 2910 /* 2911 * file size is changed, ctime and mtime are 2912 * to be updated 2913 */ 2914 attr->ia_valid |= (ATTR_MTIME | ATTR_CTIME); 2915 } 2916 } 2917 2918 if ((((attr->ia_valid & ATTR_UID) && (attr->ia_uid & ~0xffff)) || 2919 ((attr->ia_valid & ATTR_GID) && (attr->ia_gid & ~0xffff))) && 2920 (get_inode_sd_version(inode) == STAT_DATA_V1)) { 2921 /* stat data of format v3.5 has 16 bit uid and gid */ 2922 error = -EINVAL; 2923 goto out; 2924 } 2925 2926 error = inode_change_ok(inode, attr); 2927 if (!error) { 2928 if ((ia_valid & ATTR_UID && attr->ia_uid != inode->i_uid) || 2929 (ia_valid & ATTR_GID && attr->ia_gid != inode->i_gid)) { 2930 error = reiserfs_chown_xattrs(inode, attr); 2931 2932 if (!error) { 2933 struct reiserfs_transaction_handle th; 2934 int jbegin_count = 2935 2 * 2936 (REISERFS_QUOTA_INIT_BLOCKS(inode->i_sb) + 2937 REISERFS_QUOTA_DEL_BLOCKS(inode->i_sb)) + 2938 2; 2939 2940 /* (user+group)*(old+new) structure - we count quota info and , inode write (sb, inode) */ 2941 error = 2942 journal_begin(&th, inode->i_sb, 2943 jbegin_count); 2944 if (error) 2945 goto out; 2946 error = 2947 DQUOT_TRANSFER(inode, attr) ? -EDQUOT : 0; 2948 if (error) { 2949 journal_end(&th, inode->i_sb, 2950 jbegin_count); 2951 goto out; 2952 } 2953 /* Update corresponding info in inode so that everything is in 2954 * one transaction */ 2955 if (attr->ia_valid & ATTR_UID) 2956 inode->i_uid = attr->ia_uid; 2957 if (attr->ia_valid & ATTR_GID) 2958 inode->i_gid = attr->ia_gid; 2959 mark_inode_dirty(inode); 2960 error = 2961 journal_end(&th, inode->i_sb, jbegin_count); 2962 } 2963 } 2964 if (!error) 2965 error = inode_setattr(inode, attr); 2966 } 2967 2968 if (!error && reiserfs_posixacl(inode->i_sb)) { 2969 if (attr->ia_valid & ATTR_MODE) 2970 error = reiserfs_acl_chmod(inode); 2971 } 2972 2973 out: 2974 reiserfs_write_unlock(inode->i_sb); 2975 return error; 2976} 2977 2978const struct address_space_operations reiserfs_address_space_operations = { 2979 .writepage = reiserfs_writepage, 2980 .readpage = reiserfs_readpage, 2981 .readpages = reiserfs_readpages, 2982 .releasepage = reiserfs_releasepage, 2983 .invalidatepage = reiserfs_invalidatepage, 2984 .sync_page = block_sync_page, 2985 .prepare_write = reiserfs_prepare_write, 2986 .commit_write = reiserfs_commit_write, 2987 .bmap = reiserfs_aop_bmap, 2988 .direct_IO = reiserfs_direct_IO, 2989 .set_page_dirty = reiserfs_set_page_dirty, 2990}; 2991