1/* 2 * Copyright (c) 2000-2005 Silicon Graphics, Inc. 3 * All Rights Reserved. 4 * 5 * This program is free software; you can redistribute it and/or 6 * modify it under the terms of the GNU General Public License as 7 * published by the Free Software Foundation. 8 * 9 * This program is distributed in the hope that it would be useful, 10 * but WITHOUT ANY WARRANTY; without even the implied warranty of 11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 12 * GNU General Public License for more details. 13 * 14 * You should have received a copy of the GNU General Public License 15 * along with this program; if not, write the Free Software Foundation, 16 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA 17 */ 18#include "xfs.h" 19#include "xfs_bit.h" 20#include "xfs_log.h" 21#include "xfs_inum.h" 22#include "xfs_sb.h" 23#include "xfs_ag.h" 24#include "xfs_dir2.h" 25#include "xfs_trans.h" 26#include "xfs_dmapi.h" 27#include "xfs_mount.h" 28#include "xfs_bmap_btree.h" 29#include "xfs_alloc_btree.h" 30#include "xfs_ialloc_btree.h" 31#include "xfs_dir2_sf.h" 32#include "xfs_attr_sf.h" 33#include "xfs_dinode.h" 34#include "xfs_inode.h" 35#include "xfs_alloc.h" 36#include "xfs_btree.h" 37#include "xfs_error.h" 38#include "xfs_rw.h" 39#include "xfs_iomap.h" 40#include <linux/mpage.h> 41#include <linux/pagevec.h> 42#include <linux/writeback.h> 43 44STATIC void 45xfs_count_page_state( 46 struct page *page, 47 int *delalloc, 48 int *unmapped, 49 int *unwritten) 50{ 51 struct buffer_head *bh, *head; 52 53 *delalloc = *unmapped = *unwritten = 0; 54 55 bh = head = page_buffers(page); 56 do { 57 if (buffer_uptodate(bh) && !buffer_mapped(bh)) 58 (*unmapped) = 1; 59 else if (buffer_unwritten(bh)) 60 (*unwritten) = 1; 61 else if (buffer_delay(bh)) 62 (*delalloc) = 1; 63 } while ((bh = bh->b_this_page) != head); 64} 65 66#if defined(XFS_RW_TRACE) 67void 68xfs_page_trace( 69 int tag, 70 struct inode *inode, 71 struct page *page, 72 unsigned long pgoff) 73{ 74 xfs_inode_t *ip; 75 bhv_vnode_t *vp = vn_from_inode(inode); 76 loff_t isize = i_size_read(inode); 77 loff_t offset = page_offset(page); 78 int delalloc = -1, unmapped = -1, unwritten = -1; 79 80 if (page_has_buffers(page)) 81 xfs_count_page_state(page, &delalloc, &unmapped, &unwritten); 82 83 ip = xfs_vtoi(vp); 84 if (!ip->i_rwtrace) 85 return; 86 87 ktrace_enter(ip->i_rwtrace, 88 (void *)((unsigned long)tag), 89 (void *)ip, 90 (void *)inode, 91 (void *)page, 92 (void *)pgoff, 93 (void *)((unsigned long)((ip->i_d.di_size >> 32) & 0xffffffff)), 94 (void *)((unsigned long)(ip->i_d.di_size & 0xffffffff)), 95 (void *)((unsigned long)((isize >> 32) & 0xffffffff)), 96 (void *)((unsigned long)(isize & 0xffffffff)), 97 (void *)((unsigned long)((offset >> 32) & 0xffffffff)), 98 (void *)((unsigned long)(offset & 0xffffffff)), 99 (void *)((unsigned long)delalloc), 100 (void *)((unsigned long)unmapped), 101 (void *)((unsigned long)unwritten), 102 (void *)((unsigned long)current_pid()), 103 (void *)NULL); 104} 105#else 106#define xfs_page_trace(tag, inode, page, pgoff) 107#endif 108 109/* 110 * Schedule IO completion handling on a xfsdatad if this was 111 * the final hold on this ioend. 112 */ 113STATIC void 114xfs_finish_ioend( 115 xfs_ioend_t *ioend) 116{ 117 if (atomic_dec_and_test(&ioend->io_remaining)) 118 queue_work(xfsdatad_workqueue, &ioend->io_work); 119} 120 121/* 122 * We're now finished for good with this ioend structure. 123 * Update the page state via the associated buffer_heads, 124 * release holds on the inode and bio, and finally free 125 * up memory. Do not use the ioend after this. 126 */ 127STATIC void 128xfs_destroy_ioend( 129 xfs_ioend_t *ioend) 130{ 131 struct buffer_head *bh, *next; 132 133 for (bh = ioend->io_buffer_head; bh; bh = next) { 134 next = bh->b_private; 135 bh->b_end_io(bh, !ioend->io_error); 136 } 137 if (unlikely(ioend->io_error)) 138 vn_ioerror(ioend->io_vnode, ioend->io_error, __FILE__,__LINE__); 139 vn_iowake(ioend->io_vnode); 140 mempool_free(ioend, xfs_ioend_pool); 141} 142 143/* 144 * Update on-disk file size now that data has been written to disk. 145 * The current in-memory file size is i_size. If a write is beyond 146 * eof io_new_size will be the intended file size until i_size is 147 * updated. If this write does not extend all the way to the valid 148 * file size then restrict this update to the end of the write. 149 */ 150STATIC void 151xfs_setfilesize( 152 xfs_ioend_t *ioend) 153{ 154 xfs_inode_t *ip; 155 xfs_fsize_t isize; 156 xfs_fsize_t bsize; 157 158 ip = xfs_vtoi(ioend->io_vnode); 159 160 ASSERT((ip->i_d.di_mode & S_IFMT) == S_IFREG); 161 ASSERT(ioend->io_type != IOMAP_READ); 162 163 if (unlikely(ioend->io_error)) 164 return; 165 166 bsize = ioend->io_offset + ioend->io_size; 167 168 xfs_ilock(ip, XFS_ILOCK_EXCL); 169 170 isize = MAX(ip->i_size, ip->i_iocore.io_new_size); 171 isize = MIN(isize, bsize); 172 173 if (ip->i_d.di_size < isize) { 174 ip->i_d.di_size = isize; 175 ip->i_update_core = 1; 176 ip->i_update_size = 1; 177 } 178 179 xfs_iunlock(ip, XFS_ILOCK_EXCL); 180} 181 182/* 183 * Buffered IO write completion for delayed allocate extents. 184 */ 185STATIC void 186xfs_end_bio_delalloc( 187 struct work_struct *work) 188{ 189 xfs_ioend_t *ioend = 190 container_of(work, xfs_ioend_t, io_work); 191 192 xfs_setfilesize(ioend); 193 xfs_destroy_ioend(ioend); 194} 195 196/* 197 * Buffered IO write completion for regular, written extents. 198 */ 199STATIC void 200xfs_end_bio_written( 201 struct work_struct *work) 202{ 203 xfs_ioend_t *ioend = 204 container_of(work, xfs_ioend_t, io_work); 205 206 xfs_setfilesize(ioend); 207 xfs_destroy_ioend(ioend); 208} 209 210/* 211 * IO write completion for unwritten extents. 212 * 213 * Issue transactions to convert a buffer range from unwritten 214 * to written extents. 215 */ 216STATIC void 217xfs_end_bio_unwritten( 218 struct work_struct *work) 219{ 220 xfs_ioend_t *ioend = 221 container_of(work, xfs_ioend_t, io_work); 222 bhv_vnode_t *vp = ioend->io_vnode; 223 xfs_off_t offset = ioend->io_offset; 224 size_t size = ioend->io_size; 225 226 if (likely(!ioend->io_error)) { 227 bhv_vop_bmap(vp, offset, size, BMAPI_UNWRITTEN, NULL, NULL); 228 xfs_setfilesize(ioend); 229 } 230 xfs_destroy_ioend(ioend); 231} 232 233/* 234 * IO read completion for regular, written extents. 235 */ 236STATIC void 237xfs_end_bio_read( 238 struct work_struct *work) 239{ 240 xfs_ioend_t *ioend = 241 container_of(work, xfs_ioend_t, io_work); 242 243 xfs_destroy_ioend(ioend); 244} 245 246/* 247 * Allocate and initialise an IO completion structure. 248 * We need to track unwritten extent write completion here initially. 249 * We'll need to extend this for updating the ondisk inode size later 250 * (vs. incore size). 251 */ 252STATIC xfs_ioend_t * 253xfs_alloc_ioend( 254 struct inode *inode, 255 unsigned int type) 256{ 257 xfs_ioend_t *ioend; 258 259 ioend = mempool_alloc(xfs_ioend_pool, GFP_NOFS); 260 261 /* 262 * Set the count to 1 initially, which will prevent an I/O 263 * completion callback from happening before we have started 264 * all the I/O from calling the completion routine too early. 265 */ 266 atomic_set(&ioend->io_remaining, 1); 267 ioend->io_error = 0; 268 ioend->io_list = NULL; 269 ioend->io_type = type; 270 ioend->io_vnode = vn_from_inode(inode); 271 ioend->io_buffer_head = NULL; 272 ioend->io_buffer_tail = NULL; 273 atomic_inc(&ioend->io_vnode->v_iocount); 274 ioend->io_offset = 0; 275 ioend->io_size = 0; 276 277 if (type == IOMAP_UNWRITTEN) 278 INIT_WORK(&ioend->io_work, xfs_end_bio_unwritten); 279 else if (type == IOMAP_DELAY) 280 INIT_WORK(&ioend->io_work, xfs_end_bio_delalloc); 281 else if (type == IOMAP_READ) 282 INIT_WORK(&ioend->io_work, xfs_end_bio_read); 283 else 284 INIT_WORK(&ioend->io_work, xfs_end_bio_written); 285 286 return ioend; 287} 288 289STATIC int 290xfs_map_blocks( 291 struct inode *inode, 292 loff_t offset, 293 ssize_t count, 294 xfs_iomap_t *mapp, 295 int flags) 296{ 297 bhv_vnode_t *vp = vn_from_inode(inode); 298 int error, nmaps = 1; 299 300 error = bhv_vop_bmap(vp, offset, count, flags, mapp, &nmaps); 301 if (!error && (flags & (BMAPI_WRITE|BMAPI_ALLOCATE))) 302 VMODIFY(vp); 303 return -error; 304} 305 306STATIC_INLINE int 307xfs_iomap_valid( 308 xfs_iomap_t *iomapp, 309 loff_t offset) 310{ 311 return offset >= iomapp->iomap_offset && 312 offset < iomapp->iomap_offset + iomapp->iomap_bsize; 313} 314 315/* 316 * BIO completion handler for buffered IO. 317 */ 318STATIC int 319xfs_end_bio( 320 struct bio *bio, 321 unsigned int bytes_done, 322 int error) 323{ 324 xfs_ioend_t *ioend = bio->bi_private; 325 326 if (bio->bi_size) 327 return 1; 328 329 ASSERT(atomic_read(&bio->bi_cnt) >= 1); 330 ioend->io_error = test_bit(BIO_UPTODATE, &bio->bi_flags) ? 0 : error; 331 332 /* Toss bio and pass work off to an xfsdatad thread */ 333 bio->bi_private = NULL; 334 bio->bi_end_io = NULL; 335 bio_put(bio); 336 337 xfs_finish_ioend(ioend); 338 return 0; 339} 340 341STATIC void 342xfs_submit_ioend_bio( 343 xfs_ioend_t *ioend, 344 struct bio *bio) 345{ 346 atomic_inc(&ioend->io_remaining); 347 348 bio->bi_private = ioend; 349 bio->bi_end_io = xfs_end_bio; 350 351 submit_bio(WRITE, bio); 352 ASSERT(!bio_flagged(bio, BIO_EOPNOTSUPP)); 353 bio_put(bio); 354} 355 356STATIC struct bio * 357xfs_alloc_ioend_bio( 358 struct buffer_head *bh) 359{ 360 struct bio *bio; 361 int nvecs = bio_get_nr_vecs(bh->b_bdev); 362 363 do { 364 bio = bio_alloc(GFP_NOIO, nvecs); 365 nvecs >>= 1; 366 } while (!bio); 367 368 ASSERT(bio->bi_private == NULL); 369 bio->bi_sector = bh->b_blocknr * (bh->b_size >> 9); 370 bio->bi_bdev = bh->b_bdev; 371 bio_get(bio); 372 return bio; 373} 374 375STATIC void 376xfs_start_buffer_writeback( 377 struct buffer_head *bh) 378{ 379 ASSERT(buffer_mapped(bh)); 380 ASSERT(buffer_locked(bh)); 381 ASSERT(!buffer_delay(bh)); 382 ASSERT(!buffer_unwritten(bh)); 383 384 mark_buffer_async_write(bh); 385 set_buffer_uptodate(bh); 386 clear_buffer_dirty(bh); 387} 388 389STATIC void 390xfs_start_page_writeback( 391 struct page *page, 392 struct writeback_control *wbc, 393 int clear_dirty, 394 int buffers) 395{ 396 ASSERT(PageLocked(page)); 397 ASSERT(!PageWriteback(page)); 398 if (clear_dirty) 399 clear_page_dirty_for_io(page); 400 set_page_writeback(page); 401 unlock_page(page); 402 if (!buffers) { 403 end_page_writeback(page); 404 wbc->pages_skipped++; /* We didn't write this page */ 405 } 406} 407 408static inline int bio_add_buffer(struct bio *bio, struct buffer_head *bh) 409{ 410 return bio_add_page(bio, bh->b_page, bh->b_size, bh_offset(bh)); 411} 412 413/* 414 * Submit all of the bios for all of the ioends we have saved up, covering the 415 * initial writepage page and also any probed pages. 416 * 417 * Because we may have multiple ioends spanning a page, we need to start 418 * writeback on all the buffers before we submit them for I/O. If we mark the 419 * buffers as we got, then we can end up with a page that only has buffers 420 * marked async write and I/O complete on can occur before we mark the other 421 * buffers async write. 422 * 423 * The end result of this is that we trip a bug in end_page_writeback() because 424 * we call it twice for the one page as the code in end_buffer_async_write() 425 * assumes that all buffers on the page are started at the same time. 426 * 427 * The fix is two passes across the ioend list - one to start writeback on the 428 * buffer_heads, and then submit them for I/O on the second pass. 429 */ 430STATIC void 431xfs_submit_ioend( 432 xfs_ioend_t *ioend) 433{ 434 xfs_ioend_t *head = ioend; 435 xfs_ioend_t *next; 436 struct buffer_head *bh; 437 struct bio *bio; 438 sector_t lastblock = 0; 439 440 /* Pass 1 - start writeback */ 441 do { 442 next = ioend->io_list; 443 for (bh = ioend->io_buffer_head; bh; bh = bh->b_private) { 444 xfs_start_buffer_writeback(bh); 445 } 446 } while ((ioend = next) != NULL); 447 448 /* Pass 2 - submit I/O */ 449 ioend = head; 450 do { 451 next = ioend->io_list; 452 bio = NULL; 453 454 for (bh = ioend->io_buffer_head; bh; bh = bh->b_private) { 455 456 if (!bio) { 457 retry: 458 bio = xfs_alloc_ioend_bio(bh); 459 } else if (bh->b_blocknr != lastblock + 1) { 460 xfs_submit_ioend_bio(ioend, bio); 461 goto retry; 462 } 463 464 if (bio_add_buffer(bio, bh) != bh->b_size) { 465 xfs_submit_ioend_bio(ioend, bio); 466 goto retry; 467 } 468 469 lastblock = bh->b_blocknr; 470 } 471 if (bio) 472 xfs_submit_ioend_bio(ioend, bio); 473 xfs_finish_ioend(ioend); 474 } while ((ioend = next) != NULL); 475} 476 477/* 478 * Cancel submission of all buffer_heads so far in this endio. 479 * Toss the endio too. Only ever called for the initial page 480 * in a writepage request, so only ever one page. 481 */ 482STATIC void 483xfs_cancel_ioend( 484 xfs_ioend_t *ioend) 485{ 486 xfs_ioend_t *next; 487 struct buffer_head *bh, *next_bh; 488 489 do { 490 next = ioend->io_list; 491 bh = ioend->io_buffer_head; 492 do { 493 next_bh = bh->b_private; 494 clear_buffer_async_write(bh); 495 unlock_buffer(bh); 496 } while ((bh = next_bh) != NULL); 497 498 vn_iowake(ioend->io_vnode); 499 mempool_free(ioend, xfs_ioend_pool); 500 } while ((ioend = next) != NULL); 501} 502 503/* 504 * Test to see if we've been building up a completion structure for 505 * earlier buffers -- if so, we try to append to this ioend if we 506 * can, otherwise we finish off any current ioend and start another. 507 * Return true if we've finished the given ioend. 508 */ 509STATIC void 510xfs_add_to_ioend( 511 struct inode *inode, 512 struct buffer_head *bh, 513 xfs_off_t offset, 514 unsigned int type, 515 xfs_ioend_t **result, 516 int need_ioend) 517{ 518 xfs_ioend_t *ioend = *result; 519 520 if (!ioend || need_ioend || type != ioend->io_type) { 521 xfs_ioend_t *previous = *result; 522 523 ioend = xfs_alloc_ioend(inode, type); 524 ioend->io_offset = offset; 525 ioend->io_buffer_head = bh; 526 ioend->io_buffer_tail = bh; 527 if (previous) 528 previous->io_list = ioend; 529 *result = ioend; 530 } else { 531 ioend->io_buffer_tail->b_private = bh; 532 ioend->io_buffer_tail = bh; 533 } 534 535 bh->b_private = NULL; 536 ioend->io_size += bh->b_size; 537} 538 539STATIC void 540xfs_map_buffer( 541 struct buffer_head *bh, 542 xfs_iomap_t *mp, 543 xfs_off_t offset, 544 uint block_bits) 545{ 546 sector_t bn; 547 548 ASSERT(mp->iomap_bn != IOMAP_DADDR_NULL); 549 550 bn = (mp->iomap_bn >> (block_bits - BBSHIFT)) + 551 ((offset - mp->iomap_offset) >> block_bits); 552 553 ASSERT(bn || (mp->iomap_flags & IOMAP_REALTIME)); 554 555 bh->b_blocknr = bn; 556 set_buffer_mapped(bh); 557} 558 559STATIC void 560xfs_map_at_offset( 561 struct buffer_head *bh, 562 loff_t offset, 563 int block_bits, 564 xfs_iomap_t *iomapp) 565{ 566 ASSERT(!(iomapp->iomap_flags & IOMAP_HOLE)); 567 ASSERT(!(iomapp->iomap_flags & IOMAP_DELAY)); 568 569 lock_buffer(bh); 570 xfs_map_buffer(bh, iomapp, offset, block_bits); 571 bh->b_bdev = iomapp->iomap_target->bt_bdev; 572 set_buffer_mapped(bh); 573 clear_buffer_delay(bh); 574 clear_buffer_unwritten(bh); 575} 576 577/* 578 * Look for a page at index that is suitable for clustering. 579 */ 580STATIC unsigned int 581xfs_probe_page( 582 struct page *page, 583 unsigned int pg_offset, 584 int mapped) 585{ 586 int ret = 0; 587 588 if (PageWriteback(page)) 589 return 0; 590 591 if (page->mapping && PageDirty(page)) { 592 if (page_has_buffers(page)) { 593 struct buffer_head *bh, *head; 594 595 bh = head = page_buffers(page); 596 do { 597 if (!buffer_uptodate(bh)) 598 break; 599 if (mapped != buffer_mapped(bh)) 600 break; 601 ret += bh->b_size; 602 if (ret >= pg_offset) 603 break; 604 } while ((bh = bh->b_this_page) != head); 605 } else 606 ret = mapped ? 0 : PAGE_CACHE_SIZE; 607 } 608 609 return ret; 610} 611 612STATIC size_t 613xfs_probe_cluster( 614 struct inode *inode, 615 struct page *startpage, 616 struct buffer_head *bh, 617 struct buffer_head *head, 618 int mapped) 619{ 620 struct pagevec pvec; 621 pgoff_t tindex, tlast, tloff; 622 size_t total = 0; 623 int done = 0, i; 624 625 /* First sum forwards in this page */ 626 do { 627 if (!buffer_uptodate(bh) || (mapped != buffer_mapped(bh))) 628 return total; 629 total += bh->b_size; 630 } while ((bh = bh->b_this_page) != head); 631 632 /* if we reached the end of the page, sum forwards in following pages */ 633 tlast = i_size_read(inode) >> PAGE_CACHE_SHIFT; 634 tindex = startpage->index + 1; 635 636 /* Prune this back to avoid pathological behavior */ 637 tloff = min(tlast, startpage->index + 64); 638 639 pagevec_init(&pvec, 0); 640 while (!done && tindex <= tloff) { 641 unsigned len = min_t(pgoff_t, PAGEVEC_SIZE, tlast - tindex + 1); 642 643 if (!pagevec_lookup(&pvec, inode->i_mapping, tindex, len)) 644 break; 645 646 for (i = 0; i < pagevec_count(&pvec); i++) { 647 struct page *page = pvec.pages[i]; 648 size_t pg_offset, len = 0; 649 650 if (tindex == tlast) { 651 pg_offset = 652 i_size_read(inode) & (PAGE_CACHE_SIZE - 1); 653 if (!pg_offset) { 654 done = 1; 655 break; 656 } 657 } else 658 pg_offset = PAGE_CACHE_SIZE; 659 660 if (page->index == tindex && !TestSetPageLocked(page)) { 661 len = xfs_probe_page(page, pg_offset, mapped); 662 unlock_page(page); 663 } 664 665 if (!len) { 666 done = 1; 667 break; 668 } 669 670 total += len; 671 tindex++; 672 } 673 674 pagevec_release(&pvec); 675 cond_resched(); 676 } 677 678 return total; 679} 680 681/* 682 * Test if a given page is suitable for writing as part of an unwritten 683 * or delayed allocate extent. 684 */ 685STATIC int 686xfs_is_delayed_page( 687 struct page *page, 688 unsigned int type) 689{ 690 if (PageWriteback(page)) 691 return 0; 692 693 if (page->mapping && page_has_buffers(page)) { 694 struct buffer_head *bh, *head; 695 int acceptable = 0; 696 697 bh = head = page_buffers(page); 698 do { 699 if (buffer_unwritten(bh)) 700 acceptable = (type == IOMAP_UNWRITTEN); 701 else if (buffer_delay(bh)) 702 acceptable = (type == IOMAP_DELAY); 703 else if (buffer_dirty(bh) && buffer_mapped(bh)) 704 acceptable = (type == IOMAP_NEW); 705 else 706 break; 707 } while ((bh = bh->b_this_page) != head); 708 709 if (acceptable) 710 return 1; 711 } 712 713 return 0; 714} 715 716/* 717 * Allocate & map buffers for page given the extent map. Write it out. 718 * except for the original page of a writepage, this is called on 719 * delalloc/unwritten pages only, for the original page it is possible 720 * that the page has no mapping at all. 721 */ 722STATIC int 723xfs_convert_page( 724 struct inode *inode, 725 struct page *page, 726 loff_t tindex, 727 xfs_iomap_t *mp, 728 xfs_ioend_t **ioendp, 729 struct writeback_control *wbc, 730 int startio, 731 int all_bh) 732{ 733 struct buffer_head *bh, *head; 734 xfs_off_t end_offset; 735 unsigned long p_offset; 736 unsigned int type; 737 int bbits = inode->i_blkbits; 738 int len, page_dirty; 739 int count = 0, done = 0, uptodate = 1; 740 xfs_off_t offset = page_offset(page); 741 742 if (page->index != tindex) 743 goto fail; 744 if (TestSetPageLocked(page)) 745 goto fail; 746 if (PageWriteback(page)) 747 goto fail_unlock_page; 748 if (page->mapping != inode->i_mapping) 749 goto fail_unlock_page; 750 if (!xfs_is_delayed_page(page, (*ioendp)->io_type)) 751 goto fail_unlock_page; 752 753 /* 754 * page_dirty is initially a count of buffers on the page before 755 * EOF and is decremented as we move each into a cleanable state. 756 * 757 * Derivation: 758 * 759 * End offset is the highest offset that this page should represent. 760 * If we are on the last page, (end_offset & (PAGE_CACHE_SIZE - 1)) 761 * will evaluate non-zero and be less than PAGE_CACHE_SIZE and 762 * hence give us the correct page_dirty count. On any other page, 763 * it will be zero and in that case we need page_dirty to be the 764 * count of buffers on the page. 765 */ 766 end_offset = min_t(unsigned long long, 767 (xfs_off_t)(page->index + 1) << PAGE_CACHE_SHIFT, 768 i_size_read(inode)); 769 770 len = 1 << inode->i_blkbits; 771 p_offset = min_t(unsigned long, end_offset & (PAGE_CACHE_SIZE - 1), 772 PAGE_CACHE_SIZE); 773 p_offset = p_offset ? roundup(p_offset, len) : PAGE_CACHE_SIZE; 774 page_dirty = p_offset / len; 775 776 bh = head = page_buffers(page); 777 do { 778 if (offset >= end_offset) 779 break; 780 if (!buffer_uptodate(bh)) 781 uptodate = 0; 782 if (!(PageUptodate(page) || buffer_uptodate(bh))) { 783 done = 1; 784 continue; 785 } 786 787 if (buffer_unwritten(bh) || buffer_delay(bh)) { 788 if (buffer_unwritten(bh)) 789 type = IOMAP_UNWRITTEN; 790 else 791 type = IOMAP_DELAY; 792 793 if (!xfs_iomap_valid(mp, offset)) { 794 done = 1; 795 continue; 796 } 797 798 ASSERT(!(mp->iomap_flags & IOMAP_HOLE)); 799 ASSERT(!(mp->iomap_flags & IOMAP_DELAY)); 800 801 xfs_map_at_offset(bh, offset, bbits, mp); 802 if (startio) { 803 xfs_add_to_ioend(inode, bh, offset, 804 type, ioendp, done); 805 } else { 806 set_buffer_dirty(bh); 807 unlock_buffer(bh); 808 mark_buffer_dirty(bh); 809 } 810 page_dirty--; 811 count++; 812 } else { 813 type = IOMAP_NEW; 814 if (buffer_mapped(bh) && all_bh && startio) { 815 lock_buffer(bh); 816 xfs_add_to_ioend(inode, bh, offset, 817 type, ioendp, done); 818 count++; 819 page_dirty--; 820 } else { 821 done = 1; 822 } 823 } 824 } while (offset += len, (bh = bh->b_this_page) != head); 825 826 if (uptodate && bh == head) 827 SetPageUptodate(page); 828 829 if (startio) { 830 if (count) { 831 struct backing_dev_info *bdi; 832 833 bdi = inode->i_mapping->backing_dev_info; 834 wbc->nr_to_write--; 835 if (bdi_write_congested(bdi)) { 836 wbc->encountered_congestion = 1; 837 done = 1; 838 } else if (wbc->nr_to_write <= 0) { 839 done = 1; 840 } 841 } 842 xfs_start_page_writeback(page, wbc, !page_dirty, count); 843 } 844 845 return done; 846 fail_unlock_page: 847 unlock_page(page); 848 fail: 849 return 1; 850} 851 852/* 853 * Convert & write out a cluster of pages in the same extent as defined 854 * by mp and following the start page. 855 */ 856STATIC void 857xfs_cluster_write( 858 struct inode *inode, 859 pgoff_t tindex, 860 xfs_iomap_t *iomapp, 861 xfs_ioend_t **ioendp, 862 struct writeback_control *wbc, 863 int startio, 864 int all_bh, 865 pgoff_t tlast) 866{ 867 struct pagevec pvec; 868 int done = 0, i; 869 870 pagevec_init(&pvec, 0); 871 while (!done && tindex <= tlast) { 872 unsigned len = min_t(pgoff_t, PAGEVEC_SIZE, tlast - tindex + 1); 873 874 if (!pagevec_lookup(&pvec, inode->i_mapping, tindex, len)) 875 break; 876 877 for (i = 0; i < pagevec_count(&pvec); i++) { 878 done = xfs_convert_page(inode, pvec.pages[i], tindex++, 879 iomapp, ioendp, wbc, startio, all_bh); 880 if (done) 881 break; 882 } 883 884 pagevec_release(&pvec); 885 cond_resched(); 886 } 887} 888 889/* 890 * Calling this without startio set means we are being asked to make a dirty 891 * page ready for freeing it's buffers. When called with startio set then 892 * we are coming from writepage. 893 * 894 * When called with startio set it is important that we write the WHOLE 895 * page if possible. 896 * The bh->b_state's cannot know if any of the blocks or which block for 897 * that matter are dirty due to mmap writes, and therefore bh uptodate is 898 * only valid if the page itself isn't completely uptodate. Some layers 899 * may clear the page dirty flag prior to calling write page, under the 900 * assumption the entire page will be written out; by not writing out the 901 * whole page the page can be reused before all valid dirty data is 902 * written out. Note: in the case of a page that has been dirty'd by 903 * mapwrite and but partially setup by block_prepare_write the 904 * bh->b_states's will not agree and only ones setup by BPW/BCW will have 905 * valid state, thus the whole page must be written out thing. 906 */ 907 908STATIC int 909xfs_page_state_convert( 910 struct inode *inode, 911 struct page *page, 912 struct writeback_control *wbc, 913 int startio, 914 int unmapped) /* also implies page uptodate */ 915{ 916 struct buffer_head *bh, *head; 917 xfs_iomap_t iomap; 918 xfs_ioend_t *ioend = NULL, *iohead = NULL; 919 loff_t offset; 920 unsigned long p_offset = 0; 921 unsigned int type; 922 __uint64_t end_offset; 923 pgoff_t end_index, last_index, tlast; 924 ssize_t size, len; 925 int flags, err, iomap_valid = 0, uptodate = 1; 926 int page_dirty, count = 0; 927 int trylock = 0; 928 int all_bh = unmapped; 929 930 if (startio) { 931 if (wbc->sync_mode == WB_SYNC_NONE && wbc->nonblocking) 932 trylock |= BMAPI_TRYLOCK; 933 } 934 935 /* Is this page beyond the end of the file? */ 936 offset = i_size_read(inode); 937 end_index = offset >> PAGE_CACHE_SHIFT; 938 last_index = (offset - 1) >> PAGE_CACHE_SHIFT; 939 if (page->index >= end_index) { 940 if ((page->index >= end_index + 1) || 941 !(i_size_read(inode) & (PAGE_CACHE_SIZE - 1))) { 942 if (startio) 943 unlock_page(page); 944 return 0; 945 } 946 } 947 948 /* 949 * page_dirty is initially a count of buffers on the page before 950 * EOF and is decremented as we move each into a cleanable state. 951 * 952 * Derivation: 953 * 954 * End offset is the highest offset that this page should represent. 955 * If we are on the last page, (end_offset & (PAGE_CACHE_SIZE - 1)) 956 * will evaluate non-zero and be less than PAGE_CACHE_SIZE and 957 * hence give us the correct page_dirty count. On any other page, 958 * it will be zero and in that case we need page_dirty to be the 959 * count of buffers on the page. 960 */ 961 end_offset = min_t(unsigned long long, 962 (xfs_off_t)(page->index + 1) << PAGE_CACHE_SHIFT, offset); 963 len = 1 << inode->i_blkbits; 964 p_offset = min_t(unsigned long, end_offset & (PAGE_CACHE_SIZE - 1), 965 PAGE_CACHE_SIZE); 966 p_offset = p_offset ? roundup(p_offset, len) : PAGE_CACHE_SIZE; 967 page_dirty = p_offset / len; 968 969 bh = head = page_buffers(page); 970 offset = page_offset(page); 971 flags = BMAPI_READ; 972 type = IOMAP_NEW; 973 974 /* TODO: cleanup count and page_dirty */ 975 976 do { 977 if (offset >= end_offset) 978 break; 979 if (!buffer_uptodate(bh)) 980 uptodate = 0; 981 if (!(PageUptodate(page) || buffer_uptodate(bh)) && !startio) { 982 /* 983 * the iomap is actually still valid, but the ioend 984 * isn't. shouldn't happen too often. 985 */ 986 iomap_valid = 0; 987 continue; 988 } 989 990 if (iomap_valid) 991 iomap_valid = xfs_iomap_valid(&iomap, offset); 992 993 /* 994 * First case, map an unwritten extent and prepare for 995 * extent state conversion transaction on completion. 996 * 997 * Second case, allocate space for a delalloc buffer. 998 * We can return EAGAIN here in the release page case. 999 * 1000 * Third case, an unmapped buffer was found, and we are 1001 * in a path where we need to write the whole page out. 1002 */ 1003 if (buffer_unwritten(bh) || buffer_delay(bh) || 1004 ((buffer_uptodate(bh) || PageUptodate(page)) && 1005 !buffer_mapped(bh) && (unmapped || startio))) { 1006 /* 1007 * Make sure we don't use a read-only iomap 1008 */ 1009 if (flags == BMAPI_READ) 1010 iomap_valid = 0; 1011 1012 if (buffer_unwritten(bh)) { 1013 type = IOMAP_UNWRITTEN; 1014 flags = BMAPI_WRITE | BMAPI_IGNSTATE; 1015 } else if (buffer_delay(bh)) { 1016 type = IOMAP_DELAY; 1017 flags = BMAPI_ALLOCATE | trylock; 1018 } else { 1019 type = IOMAP_NEW; 1020 flags = BMAPI_WRITE | BMAPI_MMAP; 1021 } 1022 1023 if (!iomap_valid) { 1024 if (type == IOMAP_NEW) { 1025 size = xfs_probe_cluster(inode, 1026 page, bh, head, 0); 1027 } else { 1028 size = len; 1029 } 1030 1031 err = xfs_map_blocks(inode, offset, size, 1032 &iomap, flags); 1033 if (err) 1034 goto error; 1035 iomap_valid = xfs_iomap_valid(&iomap, offset); 1036 } 1037 if (iomap_valid) { 1038 xfs_map_at_offset(bh, offset, 1039 inode->i_blkbits, &iomap); 1040 if (startio) { 1041 xfs_add_to_ioend(inode, bh, offset, 1042 type, &ioend, 1043 !iomap_valid); 1044 } else { 1045 set_buffer_dirty(bh); 1046 unlock_buffer(bh); 1047 mark_buffer_dirty(bh); 1048 } 1049 page_dirty--; 1050 count++; 1051 } 1052 } else if (buffer_uptodate(bh) && startio) { 1053 /* 1054 * we got here because the buffer is already mapped. 1055 * That means it must already have extents allocated 1056 * underneath it. Map the extent by reading it. 1057 */ 1058 if (!iomap_valid || flags != BMAPI_READ) { 1059 flags = BMAPI_READ; 1060 size = xfs_probe_cluster(inode, page, bh, 1061 head, 1); 1062 err = xfs_map_blocks(inode, offset, size, 1063 &iomap, flags); 1064 if (err) 1065 goto error; 1066 iomap_valid = xfs_iomap_valid(&iomap, offset); 1067 } 1068 1069 /* 1070 * We set the type to IOMAP_NEW in case we are doing a 1071 * small write at EOF that is extending the file but 1072 * without needing an allocation. We need to update the 1073 * file size on I/O completion in this case so it is 1074 * the same case as having just allocated a new extent 1075 * that we are writing into for the first time. 1076 */ 1077 type = IOMAP_NEW; 1078 if (!test_and_set_bit(BH_Lock, &bh->b_state)) { 1079 ASSERT(buffer_mapped(bh)); 1080 if (iomap_valid) 1081 all_bh = 1; 1082 xfs_add_to_ioend(inode, bh, offset, type, 1083 &ioend, !iomap_valid); 1084 page_dirty--; 1085 count++; 1086 } else { 1087 iomap_valid = 0; 1088 } 1089 } else if ((buffer_uptodate(bh) || PageUptodate(page)) && 1090 (unmapped || startio)) { 1091 iomap_valid = 0; 1092 } 1093 1094 if (!iohead) 1095 iohead = ioend; 1096 1097 } while (offset += len, ((bh = bh->b_this_page) != head)); 1098 1099 if (uptodate && bh == head) 1100 SetPageUptodate(page); 1101 1102 if (startio) 1103 xfs_start_page_writeback(page, wbc, 1, count); 1104 1105 if (ioend && iomap_valid) { 1106 offset = (iomap.iomap_offset + iomap.iomap_bsize - 1) >> 1107 PAGE_CACHE_SHIFT; 1108 tlast = min_t(pgoff_t, offset, last_index); 1109 xfs_cluster_write(inode, page->index + 1, &iomap, &ioend, 1110 wbc, startio, all_bh, tlast); 1111 } 1112 1113 if (iohead) 1114 xfs_submit_ioend(iohead); 1115 1116 return page_dirty; 1117 1118error: 1119 if (iohead) 1120 xfs_cancel_ioend(iohead); 1121 1122 /* 1123 * If it's delalloc and we have nowhere to put it, 1124 * throw it away, unless the lower layers told 1125 * us to try again. 1126 */ 1127 if (err != -EAGAIN) { 1128 if (!unmapped) 1129 block_invalidatepage(page, 0); 1130 ClearPageUptodate(page); 1131 } 1132 return err; 1133} 1134 1135/* 1136 * writepage: Called from one of two places: 1137 * 1138 * 1. we are flushing a delalloc buffer head. 1139 * 1140 * 2. we are writing out a dirty page. Typically the page dirty 1141 * state is cleared before we get here. In this case is it 1142 * conceivable we have no buffer heads. 1143 * 1144 * For delalloc space on the page we need to allocate space and 1145 * flush it. For unmapped buffer heads on the page we should 1146 * allocate space if the page is uptodate. For any other dirty 1147 * buffer heads on the page we should flush them. 1148 * 1149 * If we detect that a transaction would be required to flush 1150 * the page, we have to check the process flags first, if we 1151 * are already in a transaction or disk I/O during allocations 1152 * is off, we need to fail the writepage and redirty the page. 1153 */ 1154 1155STATIC int 1156xfs_vm_writepage( 1157 struct page *page, 1158 struct writeback_control *wbc) 1159{ 1160 int error; 1161 int need_trans; 1162 int delalloc, unmapped, unwritten; 1163 struct inode *inode = page->mapping->host; 1164 1165 xfs_page_trace(XFS_WRITEPAGE_ENTER, inode, page, 0); 1166 1167 /* 1168 * We need a transaction if: 1169 * 1. There are delalloc buffers on the page 1170 * 2. The page is uptodate and we have unmapped buffers 1171 * 3. The page is uptodate and we have no buffers 1172 * 4. There are unwritten buffers on the page 1173 */ 1174 1175 if (!page_has_buffers(page)) { 1176 unmapped = 1; 1177 need_trans = 1; 1178 } else { 1179 xfs_count_page_state(page, &delalloc, &unmapped, &unwritten); 1180 if (!PageUptodate(page)) 1181 unmapped = 0; 1182 need_trans = delalloc + unmapped + unwritten; 1183 } 1184 1185 /* 1186 * If we need a transaction and the process flags say 1187 * we are already in a transaction, or no IO is allowed 1188 * then mark the page dirty again and leave the page 1189 * as is. 1190 */ 1191 if (current_test_flags(PF_FSTRANS) && need_trans) 1192 goto out_fail; 1193 1194 /* 1195 * Delay hooking up buffer heads until we have 1196 * made our go/no-go decision. 1197 */ 1198 if (!page_has_buffers(page)) 1199 create_empty_buffers(page, 1 << inode->i_blkbits, 0); 1200 1201 /* 1202 * Convert delayed allocate, unwritten or unmapped space 1203 * to real space and flush out to disk. 1204 */ 1205 error = xfs_page_state_convert(inode, page, wbc, 1, unmapped); 1206 if (error == -EAGAIN) 1207 goto out_fail; 1208 if (unlikely(error < 0)) 1209 goto out_unlock; 1210 1211 return 0; 1212 1213out_fail: 1214 redirty_page_for_writepage(wbc, page); 1215 unlock_page(page); 1216 return 0; 1217out_unlock: 1218 unlock_page(page); 1219 return error; 1220} 1221 1222STATIC int 1223xfs_vm_writepages( 1224 struct address_space *mapping, 1225 struct writeback_control *wbc) 1226{ 1227 struct bhv_vnode *vp = vn_from_inode(mapping->host); 1228 1229 if (VN_TRUNC(vp)) 1230 VUNTRUNCATE(vp); 1231 return generic_writepages(mapping, wbc); 1232} 1233 1234/* 1235 * Called to move a page into cleanable state - and from there 1236 * to be released. Possibly the page is already clean. We always 1237 * have buffer heads in this call. 1238 * 1239 * Returns 0 if the page is ok to release, 1 otherwise. 1240 * 1241 * Possible scenarios are: 1242 * 1243 * 1. We are being called to release a page which has been written 1244 * to via regular I/O. buffer heads will be dirty and possibly 1245 * delalloc. If no delalloc buffer heads in this case then we 1246 * can just return zero. 1247 * 1248 * 2. We are called to release a page which has been written via 1249 * mmap, all we need to do is ensure there is no delalloc 1250 * state in the buffer heads, if not we can let the caller 1251 * free them and we should come back later via writepage. 1252 */ 1253STATIC int 1254xfs_vm_releasepage( 1255 struct page *page, 1256 gfp_t gfp_mask) 1257{ 1258 struct inode *inode = page->mapping->host; 1259 int dirty, delalloc, unmapped, unwritten; 1260 struct writeback_control wbc = { 1261 .sync_mode = WB_SYNC_ALL, 1262 .nr_to_write = 1, 1263 }; 1264 1265 xfs_page_trace(XFS_RELEASEPAGE_ENTER, inode, page, 0); 1266 1267 if (!page_has_buffers(page)) 1268 return 0; 1269 1270 xfs_count_page_state(page, &delalloc, &unmapped, &unwritten); 1271 if (!delalloc && !unwritten) 1272 goto free_buffers; 1273 1274 if (!(gfp_mask & __GFP_FS)) 1275 return 0; 1276 1277 /* If we are already inside a transaction or the thread cannot 1278 * do I/O, we cannot release this page. 1279 */ 1280 if (current_test_flags(PF_FSTRANS)) 1281 return 0; 1282 1283 /* 1284 * Convert delalloc space to real space, do not flush the 1285 * data out to disk, that will be done by the caller. 1286 * Never need to allocate space here - we will always 1287 * come back to writepage in that case. 1288 */ 1289 dirty = xfs_page_state_convert(inode, page, &wbc, 0, 0); 1290 if (dirty == 0 && !unwritten) 1291 goto free_buffers; 1292 return 0; 1293 1294free_buffers: 1295 return try_to_free_buffers(page); 1296} 1297 1298STATIC int 1299__xfs_get_blocks( 1300 struct inode *inode, 1301 sector_t iblock, 1302 struct buffer_head *bh_result, 1303 int create, 1304 int direct, 1305 bmapi_flags_t flags) 1306{ 1307 bhv_vnode_t *vp = vn_from_inode(inode); 1308 xfs_iomap_t iomap; 1309 xfs_off_t offset; 1310 ssize_t size; 1311 int niomap = 1; 1312 int error; 1313 1314 offset = (xfs_off_t)iblock << inode->i_blkbits; 1315 ASSERT(bh_result->b_size >= (1 << inode->i_blkbits)); 1316 size = bh_result->b_size; 1317 error = bhv_vop_bmap(vp, offset, size, 1318 create ? flags : BMAPI_READ, &iomap, &niomap); 1319 if (error) 1320 return -error; 1321 if (niomap == 0) 1322 return 0; 1323 1324 if (iomap.iomap_bn != IOMAP_DADDR_NULL) { 1325 /* 1326 * For unwritten extents do not report a disk address on 1327 * the read case (treat as if we're reading into a hole). 1328 */ 1329 if (create || !(iomap.iomap_flags & IOMAP_UNWRITTEN)) { 1330 xfs_map_buffer(bh_result, &iomap, offset, 1331 inode->i_blkbits); 1332 } 1333 if (create && (iomap.iomap_flags & IOMAP_UNWRITTEN)) { 1334 if (direct) 1335 bh_result->b_private = inode; 1336 set_buffer_unwritten(bh_result); 1337 } 1338 } 1339 1340 /* 1341 * If this is a realtime file, data may be on a different device. 1342 * to that pointed to from the buffer_head b_bdev currently. 1343 */ 1344 bh_result->b_bdev = iomap.iomap_target->bt_bdev; 1345 1346 /* 1347 * If we previously allocated a block out beyond eof and we are now 1348 * coming back to use it then we will need to flag it as new even if it 1349 * has a disk address. 1350 * 1351 * With sub-block writes into unwritten extents we also need to mark 1352 * the buffer as new so that the unwritten parts of the buffer gets 1353 * correctly zeroed. 1354 */ 1355 if (create && 1356 ((!buffer_mapped(bh_result) && !buffer_uptodate(bh_result)) || 1357 (offset >= i_size_read(inode)) || 1358 (iomap.iomap_flags & (IOMAP_NEW|IOMAP_UNWRITTEN)))) 1359 set_buffer_new(bh_result); 1360 1361 if (iomap.iomap_flags & IOMAP_DELAY) { 1362 BUG_ON(direct); 1363 if (create) { 1364 set_buffer_uptodate(bh_result); 1365 set_buffer_mapped(bh_result); 1366 set_buffer_delay(bh_result); 1367 } 1368 } 1369 1370 if (direct || size > (1 << inode->i_blkbits)) { 1371 ASSERT(iomap.iomap_bsize - iomap.iomap_delta > 0); 1372 offset = min_t(xfs_off_t, 1373 iomap.iomap_bsize - iomap.iomap_delta, size); 1374 bh_result->b_size = (ssize_t)min_t(xfs_off_t, LONG_MAX, offset); 1375 } 1376 1377 return 0; 1378} 1379 1380int 1381xfs_get_blocks( 1382 struct inode *inode, 1383 sector_t iblock, 1384 struct buffer_head *bh_result, 1385 int create) 1386{ 1387 return __xfs_get_blocks(inode, iblock, 1388 bh_result, create, 0, BMAPI_WRITE); 1389} 1390 1391STATIC int 1392xfs_get_blocks_direct( 1393 struct inode *inode, 1394 sector_t iblock, 1395 struct buffer_head *bh_result, 1396 int create) 1397{ 1398 return __xfs_get_blocks(inode, iblock, 1399 bh_result, create, 1, BMAPI_WRITE|BMAPI_DIRECT); 1400} 1401 1402STATIC void 1403xfs_end_io_direct( 1404 struct kiocb *iocb, 1405 loff_t offset, 1406 ssize_t size, 1407 void *private) 1408{ 1409 xfs_ioend_t *ioend = iocb->private; 1410 1411 /* 1412 * Non-NULL private data means we need to issue a transaction to 1413 * convert a range from unwritten to written extents. This needs 1414 * to happen from process context but aio+dio I/O completion 1415 * happens from irq context so we need to defer it to a workqueue. 1416 * This is not necessary for synchronous direct I/O, but we do 1417 * it anyway to keep the code uniform and simpler. 1418 * 1419 * The core direct I/O code might be changed to always call the 1420 * completion handler in the future, in which case all this can 1421 * go away. 1422 */ 1423 ioend->io_offset = offset; 1424 ioend->io_size = size; 1425 if (ioend->io_type == IOMAP_READ) { 1426 xfs_finish_ioend(ioend); 1427 } else if (private && size > 0) { 1428 xfs_finish_ioend(ioend); 1429 } else { 1430 /* 1431 * A direct I/O write ioend starts it's life in unwritten 1432 * state in case they map an unwritten extent. This write 1433 * didn't map an unwritten extent so switch it's completion 1434 * handler. 1435 */ 1436 INIT_WORK(&ioend->io_work, xfs_end_bio_written); 1437 xfs_finish_ioend(ioend); 1438 } 1439 1440 /* 1441 * blockdev_direct_IO can return an error even after the I/O 1442 * completion handler was called. Thus we need to protect 1443 * against double-freeing. 1444 */ 1445 iocb->private = NULL; 1446} 1447 1448STATIC ssize_t 1449xfs_vm_direct_IO( 1450 int rw, 1451 struct kiocb *iocb, 1452 const struct iovec *iov, 1453 loff_t offset, 1454 unsigned long nr_segs) 1455{ 1456 struct file *file = iocb->ki_filp; 1457 struct inode *inode = file->f_mapping->host; 1458 bhv_vnode_t *vp = vn_from_inode(inode); 1459 xfs_iomap_t iomap; 1460 int maps = 1; 1461 int error; 1462 ssize_t ret; 1463 1464 error = bhv_vop_bmap(vp, offset, 0, BMAPI_DEVICE, &iomap, &maps); 1465 if (error) 1466 return -error; 1467 1468 if (rw == WRITE) { 1469 iocb->private = xfs_alloc_ioend(inode, IOMAP_UNWRITTEN); 1470 ret = blockdev_direct_IO_own_locking(rw, iocb, inode, 1471 iomap.iomap_target->bt_bdev, 1472 iov, offset, nr_segs, 1473 xfs_get_blocks_direct, 1474 xfs_end_io_direct); 1475 } else { 1476 iocb->private = xfs_alloc_ioend(inode, IOMAP_READ); 1477 ret = blockdev_direct_IO_no_locking(rw, iocb, inode, 1478 iomap.iomap_target->bt_bdev, 1479 iov, offset, nr_segs, 1480 xfs_get_blocks_direct, 1481 xfs_end_io_direct); 1482 } 1483 1484 if (unlikely(ret != -EIOCBQUEUED && iocb->private)) 1485 xfs_destroy_ioend(iocb->private); 1486 return ret; 1487} 1488 1489STATIC int 1490xfs_vm_prepare_write( 1491 struct file *file, 1492 struct page *page, 1493 unsigned int from, 1494 unsigned int to) 1495{ 1496 return block_prepare_write(page, from, to, xfs_get_blocks); 1497} 1498 1499STATIC sector_t 1500xfs_vm_bmap( 1501 struct address_space *mapping, 1502 sector_t block) 1503{ 1504 struct inode *inode = (struct inode *)mapping->host; 1505 bhv_vnode_t *vp = vn_from_inode(inode); 1506 1507 vn_trace_entry(vp, __FUNCTION__, (inst_t *)__return_address); 1508 bhv_vop_rwlock(vp, VRWLOCK_READ); 1509 bhv_vop_flush_pages(vp, (xfs_off_t)0, -1, 0, FI_REMAPF); 1510 bhv_vop_rwunlock(vp, VRWLOCK_READ); 1511 return generic_block_bmap(mapping, block, xfs_get_blocks); 1512} 1513 1514STATIC int 1515xfs_vm_readpage( 1516 struct file *unused, 1517 struct page *page) 1518{ 1519 return mpage_readpage(page, xfs_get_blocks); 1520} 1521 1522STATIC int 1523xfs_vm_readpages( 1524 struct file *unused, 1525 struct address_space *mapping, 1526 struct list_head *pages, 1527 unsigned nr_pages) 1528{ 1529 return mpage_readpages(mapping, pages, nr_pages, xfs_get_blocks); 1530} 1531 1532STATIC void 1533xfs_vm_invalidatepage( 1534 struct page *page, 1535 unsigned long offset) 1536{ 1537 xfs_page_trace(XFS_INVALIDPAGE_ENTER, 1538 page->mapping->host, page, offset); 1539 block_invalidatepage(page, offset); 1540} 1541 1542const struct address_space_operations xfs_address_space_operations = { 1543 .readpage = xfs_vm_readpage, 1544 .readpages = xfs_vm_readpages, 1545 .writepage = xfs_vm_writepage, 1546 .writepages = xfs_vm_writepages, 1547 .sync_page = block_sync_page, 1548 .releasepage = xfs_vm_releasepage, 1549 .invalidatepage = xfs_vm_invalidatepage, 1550 .prepare_write = xfs_vm_prepare_write, 1551 .commit_write = generic_commit_write, 1552 .bmap = xfs_vm_bmap, 1553 .direct_IO = xfs_vm_direct_IO, 1554 .migratepage = buffer_migrate_page, 1555}; 1556