1/* 2 * 2007+ Copyright (c) Evgeniy Polyakov <zbr@ioremap.net> 3 * All rights reserved. 4 * 5 * This program is free software; you can redistribute it and/or modify 6 * it under the terms of the GNU General Public License as published by 7 * the Free Software Foundation; either version 2 of the License, or 8 * (at your option) any later version. 9 * 10 * This program is distributed in the hope that it will be useful, 11 * but WITHOUT ANY WARRANTY; without even the implied warranty of 12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 13 * GNU General Public License for more details. 14 */ 15 16#include <linux/module.h> 17#include <linux/backing-dev.h> 18#include <linux/crypto.h> 19#include <linux/fs.h> 20#include <linux/jhash.h> 21#include <linux/hash.h> 22#include <linux/ktime.h> 23#include <linux/mm.h> 24#include <linux/mount.h> 25#include <linux/pagemap.h> 26#include <linux/pagevec.h> 27#include <linux/parser.h> 28#include <linux/swap.h> 29#include <linux/slab.h> 30#include <linux/statfs.h> 31#include <linux/writeback.h> 32 33#include "netfs.h" 34 35#define POHMELFS_MAGIC_NUM 0x504f482e 36 37static struct kmem_cache *pohmelfs_inode_cache; 38static atomic_t psb_bdi_num = ATOMIC_INIT(0); 39 40/* 41 * Removes inode from all trees, drops local name cache and removes all queued 42 * requests for object removal. 43 */ 44void pohmelfs_inode_del_inode(struct pohmelfs_sb *psb, struct pohmelfs_inode *pi) 45{ 46 mutex_lock(&pi->offset_lock); 47 pohmelfs_free_names(pi); 48 mutex_unlock(&pi->offset_lock); 49 50 dprintk("%s: deleted stuff in ino: %llu.\n", __func__, pi->ino); 51} 52 53/* 54 * Sync inode to server. 55 * Returns zero in success and negative error value otherwise. 56 * It will gather path to root directory into structures containing 57 * creation mode, permissions and names, so that the whole path 58 * to given inode could be created using only single network command. 59 */ 60int pohmelfs_write_inode_create(struct inode *inode, struct netfs_trans *trans) 61{ 62 struct pohmelfs_inode *pi = POHMELFS_I(inode); 63 int err = -ENOMEM, size; 64 struct netfs_cmd *cmd; 65 void *data; 66 int cur_len = netfs_trans_cur_len(trans); 67 68 if (unlikely(cur_len < 0)) 69 return -ETOOSMALL; 70 71 cmd = netfs_trans_current(trans); 72 cur_len -= sizeof(struct netfs_cmd); 73 74 data = (void *)(cmd + 1); 75 76 err = pohmelfs_construct_path_string(pi, data, cur_len); 77 if (err < 0) 78 goto err_out_exit; 79 80 size = err; 81 82 cmd->start = i_size_read(inode); 83 cmd->cmd = NETFS_CREATE; 84 cmd->size = size; 85 cmd->id = pi->ino; 86 cmd->ext = inode->i_mode; 87 88 netfs_convert_cmd(cmd); 89 90 netfs_trans_update(cmd, trans, size); 91 92 return 0; 93 94err_out_exit: 95 printk("%s: completed ino: %llu, err: %d.\n", __func__, pi->ino, err); 96 return err; 97} 98 99static int pohmelfs_write_trans_complete(struct page **pages, unsigned int page_num, 100 void *private, int err) 101{ 102 unsigned i; 103 104 dprintk("%s: pages: %lu-%lu, page_num: %u, err: %d.\n", 105 __func__, pages[0]->index, pages[page_num-1]->index, 106 page_num, err); 107 108 for (i = 0; i < page_num; i++) { 109 struct page *page = pages[i]; 110 111 if (!page) 112 continue; 113 114 end_page_writeback(page); 115 116 if (err < 0) { 117 SetPageError(page); 118 set_page_dirty(page); 119 } 120 121 unlock_page(page); 122 page_cache_release(page); 123 124 /* dprintk("%s: %3u/%u: page: %p.\n", __func__, i, page_num, page); */ 125 } 126 return err; 127} 128 129static int pohmelfs_inode_has_dirty_pages(struct address_space *mapping, pgoff_t index) 130{ 131 int ret; 132 struct page *page; 133 134 rcu_read_lock(); 135 ret = radix_tree_gang_lookup_tag(&mapping->page_tree, 136 (void **)&page, index, 1, PAGECACHE_TAG_DIRTY); 137 rcu_read_unlock(); 138 return ret; 139} 140 141static int pohmelfs_writepages(struct address_space *mapping, struct writeback_control *wbc) 142{ 143 struct inode *inode = mapping->host; 144 struct pohmelfs_inode *pi = POHMELFS_I(inode); 145 struct pohmelfs_sb *psb = POHMELFS_SB(inode->i_sb); 146 int err = 0; 147 int done = 0; 148 int nr_pages; 149 pgoff_t index; 150 pgoff_t end; /* Inclusive */ 151 int scanned = 0; 152 int range_whole = 0; 153 154 if (wbc->range_cyclic) { 155 index = mapping->writeback_index; /* Start from prev offset */ 156 end = -1; 157 } else { 158 index = wbc->range_start >> PAGE_CACHE_SHIFT; 159 end = wbc->range_end >> PAGE_CACHE_SHIFT; 160 if (wbc->range_start == 0 && wbc->range_end == LLONG_MAX) 161 range_whole = 1; 162 scanned = 1; 163 } 164retry: 165 while (!done && (index <= end)) { 166 unsigned int i = min(end - index, (pgoff_t)psb->trans_max_pages); 167 int path_len; 168 struct netfs_trans *trans; 169 170 err = pohmelfs_inode_has_dirty_pages(mapping, index); 171 if (!err) 172 break; 173 174 err = pohmelfs_path_length(pi); 175 if (err < 0) 176 break; 177 178 path_len = err; 179 180 if (path_len <= 2) { 181 err = -ENOENT; 182 break; 183 } 184 185 trans = netfs_trans_alloc(psb, path_len, 0, i); 186 if (!trans) { 187 err = -ENOMEM; 188 break; 189 } 190 trans->complete = &pohmelfs_write_trans_complete; 191 192 trans->page_num = nr_pages = find_get_pages_tag(mapping, &index, 193 PAGECACHE_TAG_DIRTY, trans->page_num, 194 trans->pages); 195 196 dprintk("%s: t: %p, nr_pages: %u, end: %lu, index: %lu, max: %u.\n", 197 __func__, trans, nr_pages, end, index, trans->page_num); 198 199 if (!nr_pages) 200 goto err_out_reset; 201 202 err = pohmelfs_write_inode_create(inode, trans); 203 if (err) 204 goto err_out_reset; 205 206 err = 0; 207 scanned = 1; 208 209 for (i = 0; i < trans->page_num; i++) { 210 struct page *page = trans->pages[i]; 211 212 lock_page(page); 213 214 if (unlikely(page->mapping != mapping)) 215 goto out_continue; 216 217 if (!wbc->range_cyclic && page->index > end) { 218 done = 1; 219 goto out_continue; 220 } 221 222 if (wbc->sync_mode != WB_SYNC_NONE) 223 wait_on_page_writeback(page); 224 225 if (PageWriteback(page) || 226 !clear_page_dirty_for_io(page)) { 227 dprintk("%s: not clear for io page: %p, writeback: %d.\n", 228 __func__, page, PageWriteback(page)); 229 goto out_continue; 230 } 231 232 set_page_writeback(page); 233 234 trans->attached_size += page_private(page); 235 trans->attached_pages++; 236 wbc->nr_to_write--; 237 238 if (wbc->nr_to_write <= 0) 239 done = 1; 240 241 continue; 242out_continue: 243 unlock_page(page); 244 trans->pages[i] = NULL; 245 } 246 247 err = netfs_trans_finish(trans, psb); 248 if (err) 249 break; 250 251 continue; 252 253err_out_reset: 254 trans->result = err; 255 netfs_trans_reset(trans); 256 netfs_trans_put(trans); 257 break; 258 } 259 260 if (!scanned && !done) { 261 /* 262 * We hit the last page and there is more work to be done: wrap 263 * back to the start of the file 264 */ 265 scanned = 1; 266 index = 0; 267 goto retry; 268 } 269 270 if (wbc->range_cyclic || (range_whole && wbc->nr_to_write > 0)) 271 mapping->writeback_index = index; 272 273 return err; 274} 275 276/* 277 * Inode writeback creation completion callback. 278 * Only invoked for just created inodes, which do not have pages attached, 279 * like dirs and empty files. 280 */ 281static int pohmelfs_write_inode_complete(struct page **pages, unsigned int page_num, 282 void *private, int err) 283{ 284 struct inode *inode = private; 285 struct pohmelfs_inode *pi = POHMELFS_I(inode); 286 287 if (inode) { 288 if (err) { 289 mark_inode_dirty(inode); 290 clear_bit(NETFS_INODE_REMOTE_SYNCED, &pi->state); 291 } else { 292 set_bit(NETFS_INODE_REMOTE_SYNCED, &pi->state); 293 } 294 295 pohmelfs_put_inode(pi); 296 } 297 298 return err; 299} 300 301int pohmelfs_write_create_inode(struct pohmelfs_inode *pi) 302{ 303 struct netfs_trans *t; 304 struct inode *inode = &pi->vfs_inode; 305 struct pohmelfs_sb *psb = POHMELFS_SB(inode->i_sb); 306 int err; 307 308 if (test_bit(NETFS_INODE_REMOTE_SYNCED, &pi->state)) 309 return 0; 310 311 dprintk("%s: started ino: %llu.\n", __func__, pi->ino); 312 313 err = pohmelfs_path_length(pi); 314 if (err < 0) 315 goto err_out_exit; 316 317 t = netfs_trans_alloc(psb, err + 1, 0, 0); 318 if (!t) { 319 err = -ENOMEM; 320 goto err_out_exit; 321 } 322 t->complete = pohmelfs_write_inode_complete; 323 t->private = igrab(inode); 324 if (!t->private) { 325 err = -ENOENT; 326 goto err_out_put; 327 } 328 329 err = pohmelfs_write_inode_create(inode, t); 330 if (err) 331 goto err_out_put; 332 333 netfs_trans_finish(t, POHMELFS_SB(inode->i_sb)); 334 335 return 0; 336 337err_out_put: 338 t->result = err; 339 netfs_trans_put(t); 340err_out_exit: 341 return err; 342} 343 344/* 345 * Sync all not-yet-created children in given directory to the server. 346 */ 347static int pohmelfs_write_inode_create_children(struct inode *inode) 348{ 349 struct pohmelfs_inode *parent = POHMELFS_I(inode); 350 struct super_block *sb = inode->i_sb; 351 struct pohmelfs_name *n; 352 353 while (!list_empty(&parent->sync_create_list)) { 354 n = NULL; 355 mutex_lock(&parent->offset_lock); 356 if (!list_empty(&parent->sync_create_list)) { 357 n = list_first_entry(&parent->sync_create_list, 358 struct pohmelfs_name, sync_create_entry); 359 list_del_init(&n->sync_create_entry); 360 } 361 mutex_unlock(&parent->offset_lock); 362 363 if (!n) 364 break; 365 366 inode = ilookup(sb, n->ino); 367 368 dprintk("%s: parent: %llu, ino: %llu, inode: %p.\n", 369 __func__, parent->ino, n->ino, inode); 370 371 if (inode && (inode->i_state & I_DIRTY)) { 372 struct pohmelfs_inode *pi = POHMELFS_I(inode); 373 pohmelfs_write_create_inode(pi); 374 /* pohmelfs_meta_command(pi, NETFS_INODE_INFO, 0, NULL, NULL, 0); */ 375 iput(inode); 376 } 377 } 378 379 return 0; 380} 381 382/* 383 * Removes given child from given inode on server. 384 */ 385int pohmelfs_remove_child(struct pohmelfs_inode *pi, struct pohmelfs_name *n) 386{ 387 return pohmelfs_meta_command_data(pi, pi->ino, NETFS_REMOVE, NULL, 0, NULL, NULL, 0); 388} 389 390/* 391 * Writeback for given inode. 392 */ 393static int pohmelfs_write_inode(struct inode *inode, 394 struct writeback_control *wbc) 395{ 396 struct pohmelfs_inode *pi = POHMELFS_I(inode); 397 398 pohmelfs_write_create_inode(pi); 399 pohmelfs_write_inode_create_children(inode); 400 401 return 0; 402} 403 404/* 405 * It is not exported, sorry... 406 */ 407static inline wait_queue_head_t *page_waitqueue(struct page *page) 408{ 409 const struct zone *zone = page_zone(page); 410 411 return &zone->wait_table[hash_ptr(page, zone->wait_table_bits)]; 412} 413 414static int pohmelfs_wait_on_page_locked(struct page *page) 415{ 416 struct pohmelfs_sb *psb = POHMELFS_SB(page->mapping->host->i_sb); 417 long ret = psb->wait_on_page_timeout; 418 DEFINE_WAIT_BIT(wait, &page->flags, PG_locked); 419 int err = 0; 420 421 if (!PageLocked(page)) 422 return 0; 423 424 for (;;) { 425 prepare_to_wait(page_waitqueue(page), 426 &wait.wait, TASK_INTERRUPTIBLE); 427 428 dprintk("%s: page: %p, locked: %d, uptodate: %d, error: %d, flags: %lx.\n", 429 __func__, page, PageLocked(page), PageUptodate(page), 430 PageError(page), page->flags); 431 432 if (!PageLocked(page)) 433 break; 434 435 if (!signal_pending(current)) { 436 ret = schedule_timeout(ret); 437 if (!ret) 438 break; 439 continue; 440 } 441 ret = -ERESTARTSYS; 442 break; 443 } 444 finish_wait(page_waitqueue(page), &wait.wait); 445 446 if (!ret) 447 err = -ETIMEDOUT; 448 449 450 if (!err) 451 SetPageUptodate(page); 452 453 if (err) 454 printk("%s: page: %p, uptodate: %d, locked: %d, err: %d.\n", 455 __func__, page, PageUptodate(page), PageLocked(page), err); 456 457 return err; 458} 459 460static int pohmelfs_read_page_complete(struct page **pages, unsigned int page_num, 461 void *private, int err) 462{ 463 struct page *page = private; 464 465 if (PageChecked(page)) 466 return err; 467 468 if (err < 0) { 469 dprintk("%s: page: %p, err: %d.\n", __func__, page, err); 470 SetPageError(page); 471 } 472 473 unlock_page(page); 474 475 return err; 476} 477 478/* 479 * Read a page from remote server. 480 * Function will wait until page is unlocked. 481 */ 482static int pohmelfs_readpage(struct file *file, struct page *page) 483{ 484 struct inode *inode = page->mapping->host; 485 struct pohmelfs_sb *psb = POHMELFS_SB(inode->i_sb); 486 struct pohmelfs_inode *pi = POHMELFS_I(inode); 487 struct netfs_trans *t; 488 struct netfs_cmd *cmd; 489 int err, path_len; 490 void *data; 491 u64 isize; 492 493 err = pohmelfs_data_lock(pi, page->index << PAGE_CACHE_SHIFT, 494 PAGE_SIZE, POHMELFS_READ_LOCK); 495 if (err) 496 goto err_out_exit; 497 498 isize = i_size_read(inode); 499 if (isize <= page->index << PAGE_CACHE_SHIFT) { 500 SetPageUptodate(page); 501 unlock_page(page); 502 return 0; 503 } 504 505 path_len = pohmelfs_path_length(pi); 506 if (path_len < 0) { 507 err = path_len; 508 goto err_out_exit; 509 } 510 511 t = netfs_trans_alloc(psb, path_len, NETFS_TRANS_SINGLE_DST, 0); 512 if (!t) { 513 err = -ENOMEM; 514 goto err_out_exit; 515 } 516 517 t->complete = pohmelfs_read_page_complete; 518 t->private = page; 519 520 cmd = netfs_trans_current(t); 521 data = (void *)(cmd + 1); 522 523 err = pohmelfs_construct_path_string(pi, data, path_len); 524 if (err < 0) 525 goto err_out_free; 526 527 path_len = err; 528 529 cmd->id = pi->ino; 530 cmd->start = page->index; 531 cmd->start <<= PAGE_CACHE_SHIFT; 532 cmd->size = PAGE_CACHE_SIZE + path_len; 533 cmd->cmd = NETFS_READ_PAGE; 534 cmd->ext = path_len; 535 536 dprintk("%s: path: '%s', page: %p, ino: %llu, start: %llu, size: %lu.\n", 537 __func__, (char *)data, page, pi->ino, cmd->start, PAGE_CACHE_SIZE); 538 539 netfs_convert_cmd(cmd); 540 netfs_trans_update(cmd, t, path_len); 541 542 err = netfs_trans_finish(t, psb); 543 if (err) 544 goto err_out_return; 545 546 return pohmelfs_wait_on_page_locked(page); 547 548err_out_free: 549 t->result = err; 550 netfs_trans_put(t); 551err_out_exit: 552 SetPageError(page); 553 if (PageLocked(page)) 554 unlock_page(page); 555err_out_return: 556 printk("%s: page: %p, start: %lu, size: %lu, err: %d.\n", 557 __func__, page, page->index << PAGE_CACHE_SHIFT, PAGE_CACHE_SIZE, err); 558 559 return err; 560} 561 562/* 563 * Write begin/end magic. 564 * Allocates a page and writes inode if it was not synced to server before. 565 */ 566static int pohmelfs_write_begin(struct file *file, struct address_space *mapping, 567 loff_t pos, unsigned len, unsigned flags, 568 struct page **pagep, void **fsdata) 569{ 570 struct inode *inode = mapping->host; 571 struct page *page; 572 pgoff_t index; 573 unsigned start, end; 574 int err; 575 576 *pagep = NULL; 577 578 index = pos >> PAGE_CACHE_SHIFT; 579 start = pos & (PAGE_CACHE_SIZE - 1); 580 end = start + len; 581 582 page = grab_cache_page(mapping, index); 583 if (!page) { 584 err = -ENOMEM; 585 goto err_out_exit; 586 } 587 588 while (!PageUptodate(page)) { 589 if (start && test_bit(NETFS_INODE_REMOTE_SYNCED, &POHMELFS_I(inode)->state)) { 590 err = pohmelfs_readpage(file, page); 591 if (err) 592 goto err_out_exit; 593 594 lock_page(page); 595 continue; 596 } 597 598 if (len != PAGE_CACHE_SIZE) { 599 void *kaddr = kmap_atomic(page, KM_USER0); 600 601 memset(kaddr + start, 0, PAGE_CACHE_SIZE - start); 602 flush_dcache_page(page); 603 kunmap_atomic(kaddr, KM_USER0); 604 } 605 SetPageUptodate(page); 606 } 607 608 set_page_private(page, end); 609 610 *pagep = page; 611 612 return 0; 613 614err_out_exit: 615 page_cache_release(page); 616 *pagep = NULL; 617 618 return err; 619} 620 621static int pohmelfs_write_end(struct file *file, struct address_space *mapping, 622 loff_t pos, unsigned len, unsigned copied, 623 struct page *page, void *fsdata) 624{ 625 struct inode *inode = mapping->host; 626 627 if (copied != len) { 628 unsigned from = pos & (PAGE_CACHE_SIZE - 1); 629 void *kaddr = kmap_atomic(page, KM_USER0); 630 631 memset(kaddr + from + copied, 0, len - copied); 632 flush_dcache_page(page); 633 kunmap_atomic(kaddr, KM_USER0); 634 } 635 636 SetPageUptodate(page); 637 set_page_dirty(page); 638 flush_dcache_page(page); 639 640 unlock_page(page); 641 page_cache_release(page); 642 643 if (pos + copied > inode->i_size) { 644 struct pohmelfs_sb *psb = POHMELFS_SB(inode->i_sb); 645 646 psb->avail_size -= pos + copied - inode->i_size; 647 648 i_size_write(inode, pos + copied); 649 } 650 651 return copied; 652} 653 654static int pohmelfs_readpages_trans_complete(struct page **__pages, unsigned int page_num, 655 void *private, int err) 656{ 657 struct pohmelfs_inode *pi = private; 658 unsigned int i, num; 659 struct page **pages, *page = (struct page *)__pages; 660 loff_t index = page->index; 661 662 pages = kzalloc(sizeof(void *) * page_num, GFP_NOIO); 663 if (!pages) 664 return -ENOMEM; 665 666 num = find_get_pages_contig(pi->vfs_inode.i_mapping, index, page_num, pages); 667 if (num <= 0) { 668 err = num; 669 goto err_out_free; 670 } 671 672 for (i = 0; i < num; ++i) { 673 page = pages[i]; 674 675 if (err) 676 printk("%s: %u/%u: page: %p, index: %lu, uptodate: %d, locked: %d, err: %d.\n", 677 __func__, i, num, page, page->index, 678 PageUptodate(page), PageLocked(page), err); 679 680 if (!PageChecked(page)) { 681 if (err < 0) 682 SetPageError(page); 683 unlock_page(page); 684 } 685 page_cache_release(page); 686 page_cache_release(page); 687 } 688 689err_out_free: 690 kfree(pages); 691 return err; 692} 693 694static int pohmelfs_send_readpages(struct pohmelfs_inode *pi, struct page *first, unsigned int num) 695{ 696 struct netfs_trans *t; 697 struct netfs_cmd *cmd; 698 struct pohmelfs_sb *psb = POHMELFS_SB(pi->vfs_inode.i_sb); 699 int err, path_len; 700 void *data; 701 702 err = pohmelfs_data_lock(pi, first->index << PAGE_CACHE_SHIFT, 703 num * PAGE_SIZE, POHMELFS_READ_LOCK); 704 if (err) 705 goto err_out_exit; 706 707 path_len = pohmelfs_path_length(pi); 708 if (path_len < 0) { 709 err = path_len; 710 goto err_out_exit; 711 } 712 713 t = netfs_trans_alloc(psb, path_len, NETFS_TRANS_SINGLE_DST, 0); 714 if (!t) { 715 err = -ENOMEM; 716 goto err_out_exit; 717 } 718 719 cmd = netfs_trans_current(t); 720 data = (void *)(cmd + 1); 721 722 t->complete = pohmelfs_readpages_trans_complete; 723 t->private = pi; 724 t->page_num = num; 725 t->pages = (struct page **)first; 726 727 err = pohmelfs_construct_path_string(pi, data, path_len); 728 if (err < 0) 729 goto err_out_put; 730 731 path_len = err; 732 733 cmd->cmd = NETFS_READ_PAGES; 734 cmd->start = first->index; 735 cmd->start <<= PAGE_CACHE_SHIFT; 736 cmd->size = (num << 8 | PAGE_CACHE_SHIFT); 737 cmd->id = pi->ino; 738 cmd->ext = path_len; 739 740 dprintk("%s: t: %p, gen: %u, path: '%s', path_len: %u, " 741 "start: %lu, num: %u.\n", 742 __func__, t, t->gen, (char *)data, path_len, 743 first->index, num); 744 745 netfs_convert_cmd(cmd); 746 netfs_trans_update(cmd, t, path_len); 747 748 return netfs_trans_finish(t, psb); 749 750err_out_put: 751 netfs_trans_free(t); 752err_out_exit: 753 pohmelfs_readpages_trans_complete((struct page **)first, num, pi, err); 754 return err; 755} 756 757#define list_to_page(head) (list_entry((head)->prev, struct page, lru)) 758 759static int pohmelfs_readpages(struct file *file, struct address_space *mapping, 760 struct list_head *pages, unsigned nr_pages) 761{ 762 unsigned int page_idx, num = 0; 763 struct page *page = NULL, *first = NULL; 764 765 for (page_idx = 0; page_idx < nr_pages; page_idx++) { 766 page = list_to_page(pages); 767 768 prefetchw(&page->flags); 769 list_del(&page->lru); 770 771 if (!add_to_page_cache_lru(page, mapping, 772 page->index, GFP_KERNEL)) { 773 774 if (!num) { 775 num = 1; 776 first = page; 777 continue; 778 } 779 780 dprintk("%s: added to lru page: %p, page_index: %lu, first_index: %lu.\n", 781 __func__, page, page->index, first->index); 782 783 if (unlikely(first->index + num != page->index) || (num > 500)) { 784 pohmelfs_send_readpages(POHMELFS_I(mapping->host), 785 first, num); 786 first = page; 787 num = 0; 788 } 789 790 num++; 791 } 792 } 793 pohmelfs_send_readpages(POHMELFS_I(mapping->host), first, num); 794 795 /* 796 * This will be sync read, so when last page is processed, 797 * all previous are alerady unlocked and ready to be used. 798 */ 799 return 0; 800} 801 802/* 803 * Small address space operations for POHMELFS. 804 */ 805const struct address_space_operations pohmelfs_aops = { 806 .readpage = pohmelfs_readpage, 807 .readpages = pohmelfs_readpages, 808 .writepages = pohmelfs_writepages, 809 .write_begin = pohmelfs_write_begin, 810 .write_end = pohmelfs_write_end, 811 .set_page_dirty = __set_page_dirty_nobuffers, 812}; 813 814/* 815 * ->detroy_inode() callback. Deletes inode from the caches 816 * and frees private data. 817 */ 818static void pohmelfs_destroy_inode(struct inode *inode) 819{ 820 struct super_block *sb = inode->i_sb; 821 struct pohmelfs_sb *psb = POHMELFS_SB(sb); 822 struct pohmelfs_inode *pi = POHMELFS_I(inode); 823 824 /* pohmelfs_data_unlock(pi, 0, inode->i_size, POHMELFS_READ_LOCK); */ 825 826 pohmelfs_inode_del_inode(psb, pi); 827 828 dprintk("%s: pi: %p, inode: %p, ino: %llu.\n", 829 __func__, pi, &pi->vfs_inode, pi->ino); 830 kmem_cache_free(pohmelfs_inode_cache, pi); 831 atomic_long_dec(&psb->total_inodes); 832} 833 834/* 835 * ->alloc_inode() callback. Allocates inode and initializes private data. 836 */ 837static struct inode *pohmelfs_alloc_inode(struct super_block *sb) 838{ 839 struct pohmelfs_inode *pi; 840 841 pi = kmem_cache_alloc(pohmelfs_inode_cache, GFP_NOIO); 842 if (!pi) 843 return NULL; 844 845 pi->hash_root = RB_ROOT; 846 mutex_init(&pi->offset_lock); 847 848 INIT_LIST_HEAD(&pi->sync_create_list); 849 850 INIT_LIST_HEAD(&pi->inode_entry); 851 852 pi->lock_type = 0; 853 pi->state = 0; 854 pi->total_len = 0; 855 pi->drop_count = 0; 856 857 dprintk("%s: pi: %p, inode: %p.\n", __func__, pi, &pi->vfs_inode); 858 859 atomic_long_inc(&POHMELFS_SB(sb)->total_inodes); 860 861 return &pi->vfs_inode; 862} 863 864/* 865 * We want fsync() to work on POHMELFS. 866 */ 867static int pohmelfs_fsync(struct file *file, int datasync) 868{ 869 struct inode *inode = file->f_mapping->host; 870 struct writeback_control wbc = { 871 .sync_mode = WB_SYNC_ALL, 872 .nr_to_write = 0, /* sys_fsync did this */ 873 }; 874 875 return sync_inode(inode, &wbc); 876} 877 878ssize_t pohmelfs_write(struct file *file, const char __user *buf, 879 size_t len, loff_t *ppos) 880{ 881 struct address_space *mapping = file->f_mapping; 882 struct inode *inode = mapping->host; 883 struct pohmelfs_inode *pi = POHMELFS_I(inode); 884 struct iovec iov = { .iov_base = (void __user *)buf, .iov_len = len }; 885 struct kiocb kiocb; 886 ssize_t ret; 887 loff_t pos = *ppos; 888 889 init_sync_kiocb(&kiocb, file); 890 kiocb.ki_pos = pos; 891 kiocb.ki_left = len; 892 893 dprintk("%s: len: %zu, pos: %llu.\n", __func__, len, pos); 894 895 mutex_lock(&inode->i_mutex); 896 ret = pohmelfs_data_lock(pi, pos, len, POHMELFS_WRITE_LOCK); 897 if (ret) 898 goto err_out_unlock; 899 900 ret = __generic_file_aio_write(&kiocb, &iov, 1, &kiocb.ki_pos); 901 *ppos = kiocb.ki_pos; 902 903 mutex_unlock(&inode->i_mutex); 904 WARN_ON(ret < 0); 905 906 if (ret > 0) { 907 ssize_t err; 908 909 err = generic_write_sync(file, pos, ret); 910 if (err < 0) 911 ret = err; 912 WARN_ON(ret < 0); 913 } 914 915 return ret; 916 917err_out_unlock: 918 mutex_unlock(&inode->i_mutex); 919 return ret; 920} 921 922static const struct file_operations pohmelfs_file_ops = { 923 .open = generic_file_open, 924 .fsync = pohmelfs_fsync, 925 926 .llseek = generic_file_llseek, 927 928 .read = do_sync_read, 929 .aio_read = generic_file_aio_read, 930 931 .mmap = generic_file_mmap, 932 933 .splice_read = generic_file_splice_read, 934 .splice_write = generic_file_splice_write, 935 936 .write = pohmelfs_write, 937 .aio_write = generic_file_aio_write, 938}; 939 940const struct inode_operations pohmelfs_symlink_inode_operations = { 941 .readlink = generic_readlink, 942 .follow_link = page_follow_link_light, 943 .put_link = page_put_link, 944}; 945 946int pohmelfs_setattr_raw(struct inode *inode, struct iattr *attr) 947{ 948 int err; 949 950 err = inode_change_ok(inode, attr); 951 if (err) { 952 dprintk("%s: ino: %llu, inode changes are not allowed.\n", __func__, POHMELFS_I(inode)->ino); 953 goto err_out_exit; 954 } 955 956 if ((attr->ia_valid & ATTR_SIZE) && 957 attr->ia_size != i_size_read(inode)) { 958 err = vmtruncate(inode, attr->ia_size); 959 if (err) { 960 dprintk("%s: ino: %llu, failed to set the attributes.\n", __func__, POHMELFS_I(inode)->ino); 961 goto err_out_exit; 962 } 963 } 964 965 setattr_copy(inode, attr); 966 mark_inode_dirty(inode); 967 968 dprintk("%s: ino: %llu, mode: %o -> %o, uid: %u -> %u, gid: %u -> %u, size: %llu -> %llu.\n", 969 __func__, POHMELFS_I(inode)->ino, inode->i_mode, attr->ia_mode, 970 inode->i_uid, attr->ia_uid, inode->i_gid, attr->ia_gid, inode->i_size, attr->ia_size); 971 972 return 0; 973 974err_out_exit: 975 return err; 976} 977 978int pohmelfs_setattr(struct dentry *dentry, struct iattr *attr) 979{ 980 struct inode *inode = dentry->d_inode; 981 struct pohmelfs_inode *pi = POHMELFS_I(inode); 982 int err; 983 984 err = pohmelfs_data_lock(pi, 0, ~0, POHMELFS_WRITE_LOCK); 985 if (err) 986 goto err_out_exit; 987 988 err = security_inode_setattr(dentry, attr); 989 if (err) 990 goto err_out_exit; 991 992 err = pohmelfs_setattr_raw(inode, attr); 993 if (err) 994 goto err_out_exit; 995 996 return 0; 997 998err_out_exit: 999 return err; 1000} 1001 1002static int pohmelfs_send_xattr_req(struct pohmelfs_inode *pi, u64 id, u64 start, 1003 const char *name, const void *value, size_t attrsize, int command) 1004{ 1005 struct pohmelfs_sb *psb = POHMELFS_SB(pi->vfs_inode.i_sb); 1006 int err, path_len, namelen = strlen(name) + 1; /* 0-byte */ 1007 struct netfs_trans *t; 1008 struct netfs_cmd *cmd; 1009 void *data; 1010 1011 dprintk("%s: id: %llu, start: %llu, name: '%s', attrsize: %zu, cmd: %d.\n", 1012 __func__, id, start, name, attrsize, command); 1013 1014 path_len = pohmelfs_path_length(pi); 1015 if (path_len < 0) { 1016 err = path_len; 1017 goto err_out_exit; 1018 } 1019 1020 t = netfs_trans_alloc(psb, namelen + path_len + attrsize, 0, 0); 1021 if (!t) { 1022 err = -ENOMEM; 1023 goto err_out_exit; 1024 } 1025 1026 cmd = netfs_trans_current(t); 1027 data = cmd + 1; 1028 1029 path_len = pohmelfs_construct_path_string(pi, data, path_len); 1030 if (path_len < 0) { 1031 err = path_len; 1032 goto err_out_put; 1033 } 1034 data += path_len; 1035 1036 /* 1037 * 'name' is a NUL-terminated string already and 1038 * 'namelen' includes 0-byte. 1039 */ 1040 memcpy(data, name, namelen); 1041 data += namelen; 1042 1043 memcpy(data, value, attrsize); 1044 1045 cmd->cmd = command; 1046 cmd->id = id; 1047 cmd->start = start; 1048 cmd->size = attrsize + namelen + path_len; 1049 cmd->ext = path_len; 1050 cmd->csize = 0; 1051 cmd->cpad = 0; 1052 1053 netfs_convert_cmd(cmd); 1054 netfs_trans_update(cmd, t, namelen + path_len + attrsize); 1055 1056 return netfs_trans_finish(t, psb); 1057 1058err_out_put: 1059 t->result = err; 1060 netfs_trans_put(t); 1061err_out_exit: 1062 return err; 1063} 1064 1065static int pohmelfs_setxattr(struct dentry *dentry, const char *name, 1066 const void *value, size_t attrsize, int flags) 1067{ 1068 struct inode *inode = dentry->d_inode; 1069 struct pohmelfs_inode *pi = POHMELFS_I(inode); 1070 struct pohmelfs_sb *psb = POHMELFS_SB(inode->i_sb); 1071 1072 if (!(psb->state_flags & POHMELFS_FLAGS_XATTR)) 1073 return -EOPNOTSUPP; 1074 1075 return pohmelfs_send_xattr_req(pi, flags, attrsize, name, 1076 value, attrsize, NETFS_XATTR_SET); 1077} 1078 1079static ssize_t pohmelfs_getxattr(struct dentry *dentry, const char *name, 1080 void *value, size_t attrsize) 1081{ 1082 struct inode *inode = dentry->d_inode; 1083 struct pohmelfs_inode *pi = POHMELFS_I(inode); 1084 struct pohmelfs_sb *psb = POHMELFS_SB(inode->i_sb); 1085 struct pohmelfs_mcache *m; 1086 int err; 1087 long timeout = psb->mcache_timeout; 1088 1089 if (!(psb->state_flags & POHMELFS_FLAGS_XATTR)) 1090 return -EOPNOTSUPP; 1091 1092 m = pohmelfs_mcache_alloc(psb, 0, attrsize, value); 1093 if (IS_ERR(m)) 1094 return PTR_ERR(m); 1095 1096 dprintk("%s: ino: %llu, name: '%s', size: %zu.\n", 1097 __func__, pi->ino, name, attrsize); 1098 1099 err = pohmelfs_send_xattr_req(pi, m->gen, attrsize, name, value, 0, NETFS_XATTR_GET); 1100 if (err) 1101 goto err_out_put; 1102 1103 do { 1104 err = wait_for_completion_timeout(&m->complete, timeout); 1105 if (err) { 1106 err = m->err; 1107 break; 1108 } 1109 1110 /* 1111 * This loop is a bit ugly, since it waits until reference counter 1112 * hits 1 and then put object here. Main goal is to prevent race with 1113 * network thread, when it can start processing given request, i.e. 1114 * increase its reference counter but yet not complete it, while 1115 * we will exit from ->getxattr() with timeout, and although request 1116 * will not be freed (its reference counter was increased by network 1117 * thread), data pointer provided by user may be released, so we will 1118 * overwrite already freed area in network thread. 1119 * 1120 * Now after timeout we remove request from the cache, so it can not be 1121 * found by network thread, and wait for its reference counter to hit 1, 1122 * i.e. if network thread already started to process this request, we wait 1123 * it to finish, and then free object locally. If reference counter is 1124 * already 1, i.e. request is not used by anyone else, we can free it without 1125 * problem. 1126 */ 1127 err = -ETIMEDOUT; 1128 timeout = HZ; 1129 1130 pohmelfs_mcache_remove_locked(psb, m); 1131 } while (atomic_read(&m->refcnt) != 1); 1132 1133 pohmelfs_mcache_put(psb, m); 1134 1135 dprintk("%s: ino: %llu, err: %d.\n", __func__, pi->ino, err); 1136 1137 return err; 1138 1139err_out_put: 1140 pohmelfs_mcache_put(psb, m); 1141 return err; 1142} 1143 1144static int pohmelfs_getattr(struct vfsmount *mnt, struct dentry *dentry, struct kstat *stat) 1145{ 1146 struct inode *inode = dentry->d_inode; 1147 1148 generic_fillattr(inode, stat); 1149 return 0; 1150} 1151 1152const struct inode_operations pohmelfs_file_inode_operations = { 1153 .setattr = pohmelfs_setattr, 1154 .getattr = pohmelfs_getattr, 1155 .setxattr = pohmelfs_setxattr, 1156 .getxattr = pohmelfs_getxattr, 1157}; 1158 1159/* 1160 * Fill inode data: mode, size, operation callbacks and so on... 1161 */ 1162void pohmelfs_fill_inode(struct inode *inode, struct netfs_inode_info *info) 1163{ 1164 inode->i_mode = info->mode; 1165 inode->i_nlink = info->nlink; 1166 inode->i_uid = info->uid; 1167 inode->i_gid = info->gid; 1168 inode->i_blocks = info->blocks; 1169 inode->i_rdev = info->rdev; 1170 inode->i_size = info->size; 1171 inode->i_version = info->version; 1172 inode->i_blkbits = ffs(info->blocksize); 1173 1174 dprintk("%s: inode: %p, num: %lu/%llu inode is regular: %d, dir: %d, link: %d, mode: %o, size: %llu.\n", 1175 __func__, inode, inode->i_ino, info->ino, 1176 S_ISREG(inode->i_mode), S_ISDIR(inode->i_mode), 1177 S_ISLNK(inode->i_mode), inode->i_mode, inode->i_size); 1178 1179 inode->i_mtime = inode->i_atime = inode->i_ctime = CURRENT_TIME_SEC; 1180 1181 /* 1182 * i_mapping is a pointer to i_data during inode initialization. 1183 */ 1184 inode->i_data.a_ops = &pohmelfs_aops; 1185 1186 if (S_ISREG(inode->i_mode)) { 1187 inode->i_fop = &pohmelfs_file_ops; 1188 inode->i_op = &pohmelfs_file_inode_operations; 1189 } else if (S_ISDIR(inode->i_mode)) { 1190 inode->i_fop = &pohmelfs_dir_fops; 1191 inode->i_op = &pohmelfs_dir_inode_ops; 1192 } else if (S_ISLNK(inode->i_mode)) { 1193 inode->i_op = &pohmelfs_symlink_inode_operations; 1194 inode->i_fop = &pohmelfs_file_ops; 1195 } else { 1196 inode->i_fop = &generic_ro_fops; 1197 } 1198} 1199 1200static int pohmelfs_drop_inode(struct inode *inode) 1201{ 1202 struct pohmelfs_sb *psb = POHMELFS_SB(inode->i_sb); 1203 struct pohmelfs_inode *pi = POHMELFS_I(inode); 1204 1205 spin_lock(&psb->ino_lock); 1206 list_del_init(&pi->inode_entry); 1207 spin_unlock(&psb->ino_lock); 1208 1209 return generic_drop_inode(inode); 1210} 1211 1212static struct pohmelfs_inode *pohmelfs_get_inode_from_list(struct pohmelfs_sb *psb, 1213 struct list_head *head, unsigned int *count) 1214{ 1215 struct pohmelfs_inode *pi = NULL; 1216 1217 spin_lock(&psb->ino_lock); 1218 if (!list_empty(head)) { 1219 pi = list_entry(head->next, struct pohmelfs_inode, 1220 inode_entry); 1221 list_del_init(&pi->inode_entry); 1222 *count = pi->drop_count; 1223 pi->drop_count = 0; 1224 } 1225 spin_unlock(&psb->ino_lock); 1226 1227 return pi; 1228} 1229 1230static void pohmelfs_flush_transactions(struct pohmelfs_sb *psb) 1231{ 1232 struct pohmelfs_config *c; 1233 1234 mutex_lock(&psb->state_lock); 1235 list_for_each_entry(c, &psb->state_list, config_entry) { 1236 pohmelfs_state_flush_transactions(&c->state); 1237 } 1238 mutex_unlock(&psb->state_lock); 1239} 1240 1241/* 1242 * ->put_super() callback. Invoked before superblock is destroyed, 1243 * so it has to clean all private data. 1244 */ 1245static void pohmelfs_put_super(struct super_block *sb) 1246{ 1247 struct pohmelfs_sb *psb = POHMELFS_SB(sb); 1248 struct pohmelfs_inode *pi; 1249 unsigned int count = 0; 1250 unsigned int in_drop_list = 0; 1251 struct inode *inode, *tmp; 1252 1253 dprintk("%s.\n", __func__); 1254 1255 /* 1256 * Kill pending transactions, which could affect inodes in-flight. 1257 */ 1258 pohmelfs_flush_transactions(psb); 1259 1260 while ((pi = pohmelfs_get_inode_from_list(psb, &psb->drop_list, &count))) { 1261 inode = &pi->vfs_inode; 1262 1263 dprintk("%s: ino: %llu, pi: %p, inode: %p, count: %u.\n", 1264 __func__, pi->ino, pi, inode, count); 1265 1266 if (atomic_read(&inode->i_count) != count) { 1267 printk("%s: ino: %llu, pi: %p, inode: %p, count: %u, i_count: %d.\n", 1268 __func__, pi->ino, pi, inode, count, 1269 atomic_read(&inode->i_count)); 1270 count = atomic_read(&inode->i_count); 1271 in_drop_list++; 1272 } 1273 1274 while (count--) 1275 iput(&pi->vfs_inode); 1276 } 1277 1278 list_for_each_entry_safe(inode, tmp, &sb->s_inodes, i_sb_list) { 1279 pi = POHMELFS_I(inode); 1280 1281 dprintk("%s: ino: %llu, pi: %p, inode: %p, i_count: %u.\n", 1282 __func__, pi->ino, pi, inode, atomic_read(&inode->i_count)); 1283 1284 /* 1285 * These are special inodes, they were created during 1286 * directory reading or lookup, and were not bound to dentry, 1287 * so they live here with reference counter being 1 and prevent 1288 * umount from succeed since it believes that they are busy. 1289 */ 1290 count = atomic_read(&inode->i_count); 1291 if (count) { 1292 list_del_init(&inode->i_sb_list); 1293 while (count--) 1294 iput(&pi->vfs_inode); 1295 } 1296 } 1297 1298 psb->trans_scan_timeout = psb->drop_scan_timeout = 0; 1299 cancel_rearming_delayed_work(&psb->dwork); 1300 cancel_rearming_delayed_work(&psb->drop_dwork); 1301 flush_scheduled_work(); 1302 1303 dprintk("%s: stopped workqueues.\n", __func__); 1304 1305 pohmelfs_crypto_exit(psb); 1306 pohmelfs_state_exit(psb); 1307 1308 bdi_destroy(&psb->bdi); 1309 1310 kfree(psb); 1311 sb->s_fs_info = NULL; 1312} 1313 1314static int pohmelfs_statfs(struct dentry *dentry, struct kstatfs *buf) 1315{ 1316 struct super_block *sb = dentry->d_sb; 1317 struct pohmelfs_sb *psb = POHMELFS_SB(sb); 1318 1319 /* 1320 * There are no filesystem size limits yet. 1321 */ 1322 memset(buf, 0, sizeof(struct kstatfs)); 1323 1324 buf->f_type = POHMELFS_MAGIC_NUM; /* 'POH.' */ 1325 buf->f_bsize = sb->s_blocksize; 1326 buf->f_files = psb->ino; 1327 buf->f_namelen = 255; 1328 buf->f_files = atomic_long_read(&psb->total_inodes); 1329 buf->f_bfree = buf->f_bavail = psb->avail_size >> PAGE_SHIFT; 1330 buf->f_blocks = psb->total_size >> PAGE_SHIFT; 1331 1332 dprintk("%s: total: %llu, avail: %llu, inodes: %llu, bsize: %lu.\n", 1333 __func__, psb->total_size, psb->avail_size, buf->f_files, sb->s_blocksize); 1334 1335 return 0; 1336} 1337 1338static int pohmelfs_show_options(struct seq_file *seq, struct vfsmount *vfs) 1339{ 1340 struct pohmelfs_sb *psb = POHMELFS_SB(vfs->mnt_sb); 1341 1342 seq_printf(seq, ",idx=%u", psb->idx); 1343 seq_printf(seq, ",trans_scan_timeout=%u", jiffies_to_msecs(psb->trans_scan_timeout)); 1344 seq_printf(seq, ",drop_scan_timeout=%u", jiffies_to_msecs(psb->drop_scan_timeout)); 1345 seq_printf(seq, ",wait_on_page_timeout=%u", jiffies_to_msecs(psb->wait_on_page_timeout)); 1346 seq_printf(seq, ",trans_retries=%u", psb->trans_retries); 1347 seq_printf(seq, ",crypto_thread_num=%u", psb->crypto_thread_num); 1348 seq_printf(seq, ",trans_max_pages=%u", psb->trans_max_pages); 1349 seq_printf(seq, ",mcache_timeout=%u", jiffies_to_msecs(psb->mcache_timeout)); 1350 if (psb->crypto_fail_unsupported) 1351 seq_printf(seq, ",crypto_fail_unsupported"); 1352 1353 return 0; 1354} 1355 1356enum { 1357 pohmelfs_opt_idx, 1358 pohmelfs_opt_crypto_thread_num, 1359 pohmelfs_opt_trans_max_pages, 1360 pohmelfs_opt_crypto_fail_unsupported, 1361 1362 /* Remountable options */ 1363 pohmelfs_opt_trans_scan_timeout, 1364 pohmelfs_opt_drop_scan_timeout, 1365 pohmelfs_opt_wait_on_page_timeout, 1366 pohmelfs_opt_trans_retries, 1367 pohmelfs_opt_mcache_timeout, 1368}; 1369 1370static struct match_token pohmelfs_tokens[] = { 1371 {pohmelfs_opt_idx, "idx=%u"}, 1372 {pohmelfs_opt_crypto_thread_num, "crypto_thread_num=%u"}, 1373 {pohmelfs_opt_trans_max_pages, "trans_max_pages=%u"}, 1374 {pohmelfs_opt_crypto_fail_unsupported, "crypto_fail_unsupported"}, 1375 {pohmelfs_opt_trans_scan_timeout, "trans_scan_timeout=%u"}, 1376 {pohmelfs_opt_drop_scan_timeout, "drop_scan_timeout=%u"}, 1377 {pohmelfs_opt_wait_on_page_timeout, "wait_on_page_timeout=%u"}, 1378 {pohmelfs_opt_trans_retries, "trans_retries=%u"}, 1379 {pohmelfs_opt_mcache_timeout, "mcache_timeout=%u"}, 1380}; 1381 1382static int pohmelfs_parse_options(char *options, struct pohmelfs_sb *psb, int remount) 1383{ 1384 char *p; 1385 substring_t args[MAX_OPT_ARGS]; 1386 int option, err; 1387 1388 if (!options) 1389 return 0; 1390 1391 while ((p = strsep(&options, ",")) != NULL) { 1392 int token; 1393 if (!*p) 1394 continue; 1395 1396 token = match_token(p, pohmelfs_tokens, args); 1397 1398 err = match_int(&args[0], &option); 1399 if (err) 1400 return err; 1401 1402 if (remount && token <= pohmelfs_opt_crypto_fail_unsupported) 1403 continue; 1404 1405 switch (token) { 1406 case pohmelfs_opt_idx: 1407 psb->idx = option; 1408 break; 1409 case pohmelfs_opt_trans_scan_timeout: 1410 psb->trans_scan_timeout = msecs_to_jiffies(option); 1411 break; 1412 case pohmelfs_opt_drop_scan_timeout: 1413 psb->drop_scan_timeout = msecs_to_jiffies(option); 1414 break; 1415 case pohmelfs_opt_wait_on_page_timeout: 1416 psb->wait_on_page_timeout = msecs_to_jiffies(option); 1417 break; 1418 case pohmelfs_opt_mcache_timeout: 1419 psb->mcache_timeout = msecs_to_jiffies(option); 1420 break; 1421 case pohmelfs_opt_trans_retries: 1422 psb->trans_retries = option; 1423 break; 1424 case pohmelfs_opt_crypto_thread_num: 1425 psb->crypto_thread_num = option; 1426 break; 1427 case pohmelfs_opt_trans_max_pages: 1428 psb->trans_max_pages = option; 1429 break; 1430 case pohmelfs_opt_crypto_fail_unsupported: 1431 psb->crypto_fail_unsupported = 1; 1432 break; 1433 default: 1434 return -EINVAL; 1435 } 1436 } 1437 1438 return 0; 1439} 1440 1441static int pohmelfs_remount(struct super_block *sb, int *flags, char *data) 1442{ 1443 int err; 1444 struct pohmelfs_sb *psb = POHMELFS_SB(sb); 1445 unsigned long old_sb_flags = sb->s_flags; 1446 1447 err = pohmelfs_parse_options(data, psb, 1); 1448 if (err) 1449 goto err_out_restore; 1450 1451 if (!(*flags & MS_RDONLY)) 1452 sb->s_flags &= ~MS_RDONLY; 1453 return 0; 1454 1455err_out_restore: 1456 sb->s_flags = old_sb_flags; 1457 return err; 1458} 1459 1460static void pohmelfs_flush_inode(struct pohmelfs_inode *pi, unsigned int count) 1461{ 1462 struct inode *inode = &pi->vfs_inode; 1463 1464 dprintk("%s: %p: ino: %llu, owned: %d.\n", 1465 __func__, inode, pi->ino, test_bit(NETFS_INODE_OWNED, &pi->state)); 1466 1467 mutex_lock(&inode->i_mutex); 1468 if (test_and_clear_bit(NETFS_INODE_OWNED, &pi->state)) { 1469 filemap_fdatawrite(inode->i_mapping); 1470 inode->i_sb->s_op->write_inode(inode, 0); 1471 } 1472 1473#ifdef POHMELFS_TRUNCATE_ON_INODE_FLUSH 1474 truncate_inode_pages(inode->i_mapping, 0); 1475#endif 1476 1477 pohmelfs_data_unlock(pi, 0, ~0, POHMELFS_WRITE_LOCK); 1478 mutex_unlock(&inode->i_mutex); 1479} 1480 1481static void pohmelfs_put_inode_count(struct pohmelfs_inode *pi, unsigned int count) 1482{ 1483 dprintk("%s: ino: %llu, pi: %p, inode: %p, count: %u.\n", 1484 __func__, pi->ino, pi, &pi->vfs_inode, count); 1485 1486 if (test_and_clear_bit(NETFS_INODE_NEED_FLUSH, &pi->state)) 1487 pohmelfs_flush_inode(pi, count); 1488 1489 while (count--) 1490 iput(&pi->vfs_inode); 1491} 1492 1493static void pohmelfs_drop_scan(struct work_struct *work) 1494{ 1495 struct pohmelfs_sb *psb = 1496 container_of(work, struct pohmelfs_sb, drop_dwork.work); 1497 struct pohmelfs_inode *pi; 1498 unsigned int count = 0; 1499 1500 while ((pi = pohmelfs_get_inode_from_list(psb, &psb->drop_list, &count))) 1501 pohmelfs_put_inode_count(pi, count); 1502 1503 pohmelfs_check_states(psb); 1504 1505 if (psb->drop_scan_timeout) 1506 schedule_delayed_work(&psb->drop_dwork, psb->drop_scan_timeout); 1507} 1508 1509/* 1510 * Run through all transactions starting from the oldest, 1511 * drop transaction from current state and try to send it 1512 * to all remote nodes, which are currently installed. 1513 */ 1514static void pohmelfs_trans_scan_state(struct netfs_state *st) 1515{ 1516 struct rb_node *rb_node; 1517 struct netfs_trans_dst *dst; 1518 struct pohmelfs_sb *psb = st->psb; 1519 unsigned int timeout = psb->trans_scan_timeout; 1520 struct netfs_trans *t; 1521 int err; 1522 1523 mutex_lock(&st->trans_lock); 1524 for (rb_node = rb_first(&st->trans_root); rb_node; ) { 1525 dst = rb_entry(rb_node, struct netfs_trans_dst, state_entry); 1526 t = dst->trans; 1527 1528 if (timeout && time_after(dst->send_time + timeout, jiffies) 1529 && dst->retries == 0) 1530 break; 1531 1532 dprintk("%s: t: %p, gen: %u, st: %p, retries: %u, max: %u.\n", 1533 __func__, t, t->gen, st, dst->retries, psb->trans_retries); 1534 netfs_trans_get(t); 1535 1536 rb_node = rb_next(rb_node); 1537 1538 err = -ETIMEDOUT; 1539 if (timeout && (++dst->retries < psb->trans_retries)) 1540 err = netfs_trans_resend(t, psb); 1541 1542 if (err || (t->flags & NETFS_TRANS_SINGLE_DST)) { 1543 if (netfs_trans_remove_nolock(dst, st)) 1544 netfs_trans_drop_dst_nostate(dst); 1545 } 1546 1547 t->result = err; 1548 netfs_trans_put(t); 1549 } 1550 mutex_unlock(&st->trans_lock); 1551} 1552 1553/* 1554 * Walk through all installed network states and resend all 1555 * transactions, which are old enough. 1556 */ 1557static void pohmelfs_trans_scan(struct work_struct *work) 1558{ 1559 struct pohmelfs_sb *psb = 1560 container_of(work, struct pohmelfs_sb, dwork.work); 1561 struct netfs_state *st; 1562 struct pohmelfs_config *c; 1563 1564 mutex_lock(&psb->state_lock); 1565 list_for_each_entry(c, &psb->state_list, config_entry) { 1566 st = &c->state; 1567 1568 pohmelfs_trans_scan_state(st); 1569 } 1570 mutex_unlock(&psb->state_lock); 1571 1572 /* 1573 * If no timeout specified then system is in the middle of umount process, 1574 * so no need to reschedule scanning process again. 1575 */ 1576 if (psb->trans_scan_timeout) 1577 schedule_delayed_work(&psb->dwork, psb->trans_scan_timeout); 1578} 1579 1580int pohmelfs_meta_command_data(struct pohmelfs_inode *pi, u64 id, unsigned int cmd_op, char *addon, 1581 unsigned int flags, netfs_trans_complete_t complete, void *priv, u64 start) 1582{ 1583 struct inode *inode = &pi->vfs_inode; 1584 struct pohmelfs_sb *psb = POHMELFS_SB(inode->i_sb); 1585 int err = 0, sz; 1586 struct netfs_trans *t; 1587 int path_len, addon_len = 0; 1588 void *data; 1589 struct netfs_inode_info *info; 1590 struct netfs_cmd *cmd; 1591 1592 dprintk("%s: ino: %llu, cmd: %u, addon: %p.\n", __func__, pi->ino, cmd_op, addon); 1593 1594 path_len = pohmelfs_path_length(pi); 1595 if (path_len < 0) { 1596 err = path_len; 1597 goto err_out_exit; 1598 } 1599 1600 if (addon) 1601 addon_len = strlen(addon) + 1; /* 0-byte */ 1602 sz = addon_len; 1603 1604 if (cmd_op == NETFS_INODE_INFO) 1605 sz += sizeof(struct netfs_inode_info); 1606 1607 t = netfs_trans_alloc(psb, sz + path_len, flags, 0); 1608 if (!t) { 1609 err = -ENOMEM; 1610 goto err_out_exit; 1611 } 1612 t->complete = complete; 1613 t->private = priv; 1614 1615 cmd = netfs_trans_current(t); 1616 data = (void *)(cmd + 1); 1617 1618 if (cmd_op == NETFS_INODE_INFO) { 1619 info = (struct netfs_inode_info *)(cmd + 1); 1620 data = (void *)(info + 1); 1621 1622 /* 1623 * We are under i_mutex, can read and change whatever we want... 1624 */ 1625 info->mode = inode->i_mode; 1626 info->nlink = inode->i_nlink; 1627 info->uid = inode->i_uid; 1628 info->gid = inode->i_gid; 1629 info->blocks = inode->i_blocks; 1630 info->rdev = inode->i_rdev; 1631 info->size = inode->i_size; 1632 info->version = inode->i_version; 1633 1634 netfs_convert_inode_info(info); 1635 } 1636 1637 path_len = pohmelfs_construct_path_string(pi, data, path_len); 1638 if (path_len < 0) 1639 goto err_out_free; 1640 1641 dprintk("%s: path_len: %d.\n", __func__, path_len); 1642 1643 if (addon) { 1644 path_len--; /* Do not place null-byte before the addon */ 1645 path_len += sprintf(data + path_len, "/%s", addon) + 1; /* 0 - byte */ 1646 } 1647 1648 sz += path_len; 1649 1650 cmd->cmd = cmd_op; 1651 cmd->ext = path_len; 1652 cmd->size = sz; 1653 cmd->id = id; 1654 cmd->start = start; 1655 1656 netfs_convert_cmd(cmd); 1657 netfs_trans_update(cmd, t, sz); 1658 1659 /* 1660 * Note, that it is possible to leak error here: transaction callback will not 1661 * be invoked for allocation path failure. 1662 */ 1663 return netfs_trans_finish(t, psb); 1664 1665err_out_free: 1666 netfs_trans_free(t); 1667err_out_exit: 1668 if (complete) 1669 complete(NULL, 0, priv, err); 1670 return err; 1671} 1672 1673int pohmelfs_meta_command(struct pohmelfs_inode *pi, unsigned int cmd_op, unsigned int flags, 1674 netfs_trans_complete_t complete, void *priv, u64 start) 1675{ 1676 return pohmelfs_meta_command_data(pi, pi->ino, cmd_op, NULL, flags, complete, priv, start); 1677} 1678 1679/* 1680 * Send request and wait for POHMELFS root capabilities response, 1681 * which will update server's informaion about size of the export, 1682 * permissions, number of objects, available size and so on. 1683 */ 1684static int pohmelfs_root_handshake(struct pohmelfs_sb *psb) 1685{ 1686 struct netfs_trans *t; 1687 struct netfs_cmd *cmd; 1688 int err = -ENOMEM; 1689 1690 t = netfs_trans_alloc(psb, 0, 0, 0); 1691 if (!t) 1692 goto err_out_exit; 1693 1694 cmd = netfs_trans_current(t); 1695 1696 cmd->cmd = NETFS_CAPABILITIES; 1697 cmd->id = POHMELFS_ROOT_CAPABILITIES; 1698 cmd->size = 0; 1699 cmd->start = 0; 1700 cmd->ext = 0; 1701 cmd->csize = 0; 1702 1703 netfs_convert_cmd(cmd); 1704 netfs_trans_update(cmd, t, 0); 1705 1706 err = netfs_trans_finish(t, psb); 1707 if (err) 1708 goto err_out_exit; 1709 1710 psb->flags = ~0; 1711 err = wait_event_interruptible_timeout(psb->wait, 1712 (psb->flags != ~0), 1713 psb->wait_on_page_timeout); 1714 if (!err) 1715 err = -ETIMEDOUT; 1716 else if (err > 0) 1717 err = -psb->flags; 1718 1719 if (err) 1720 goto err_out_exit; 1721 1722 return 0; 1723 1724err_out_exit: 1725 return err; 1726} 1727 1728static int pohmelfs_show_stats(struct seq_file *m, struct vfsmount *mnt) 1729{ 1730 struct netfs_state *st; 1731 struct pohmelfs_ctl *ctl; 1732 struct pohmelfs_sb *psb = POHMELFS_SB(mnt->mnt_sb); 1733 struct pohmelfs_config *c; 1734 1735 mutex_lock(&psb->state_lock); 1736 1737 seq_printf(m, "\nidx addr(:port) socket_type protocol active priority permissions\n"); 1738 1739 list_for_each_entry(c, &psb->state_list, config_entry) { 1740 st = &c->state; 1741 ctl = &st->ctl; 1742 1743 seq_printf(m, "%u ", ctl->idx); 1744 if (ctl->addr.sa_family == AF_INET) { 1745 struct sockaddr_in *sin = (struct sockaddr_in *)&st->ctl.addr; 1746 seq_printf(m, "%pI4:%u", &sin->sin_addr.s_addr, ntohs(sin->sin_port)); 1747 } else if (ctl->addr.sa_family == AF_INET6) { 1748 struct sockaddr_in6 *sin = (struct sockaddr_in6 *)&st->ctl.addr; 1749 seq_printf(m, "%pi6:%u", &sin->sin6_addr, ntohs(sin->sin6_port)); 1750 } else { 1751 unsigned int i; 1752 for (i = 0; i < ctl->addrlen; ++i) 1753 seq_printf(m, "%02x.", ctl->addr.addr[i]); 1754 } 1755 1756 seq_printf(m, " %u %u %d %u %x\n", 1757 ctl->type, ctl->proto, 1758 st->socket != NULL, 1759 ctl->prio, ctl->perm); 1760 } 1761 mutex_unlock(&psb->state_lock); 1762 1763 return 0; 1764} 1765 1766static const struct super_operations pohmelfs_sb_ops = { 1767 .alloc_inode = pohmelfs_alloc_inode, 1768 .destroy_inode = pohmelfs_destroy_inode, 1769 .drop_inode = pohmelfs_drop_inode, 1770 .write_inode = pohmelfs_write_inode, 1771 .put_super = pohmelfs_put_super, 1772 .remount_fs = pohmelfs_remount, 1773 .statfs = pohmelfs_statfs, 1774 .show_options = pohmelfs_show_options, 1775 .show_stats = pohmelfs_show_stats, 1776}; 1777 1778/* 1779 * Allocate private superblock and create root dir. 1780 */ 1781static int pohmelfs_fill_super(struct super_block *sb, void *data, int silent) 1782{ 1783 struct pohmelfs_sb *psb; 1784 int err = -ENOMEM; 1785 struct inode *root; 1786 struct pohmelfs_inode *npi; 1787 struct qstr str; 1788 1789 psb = kzalloc(sizeof(struct pohmelfs_sb), GFP_KERNEL); 1790 if (!psb) 1791 goto err_out_exit; 1792 1793 err = bdi_init(&psb->bdi); 1794 if (err) 1795 goto err_out_free_sb; 1796 1797 err = bdi_register(&psb->bdi, NULL, "pfs-%d", atomic_inc_return(&psb_bdi_num)); 1798 if (err) { 1799 bdi_destroy(&psb->bdi); 1800 goto err_out_free_sb; 1801 } 1802 1803 sb->s_fs_info = psb; 1804 sb->s_op = &pohmelfs_sb_ops; 1805 sb->s_magic = POHMELFS_MAGIC_NUM; 1806 sb->s_maxbytes = MAX_LFS_FILESIZE; 1807 sb->s_blocksize = PAGE_SIZE; 1808 sb->s_bdi = &psb->bdi; 1809 1810 psb->sb = sb; 1811 1812 psb->ino = 2; 1813 psb->idx = 0; 1814 psb->active_state = NULL; 1815 psb->trans_retries = 5; 1816 psb->trans_data_size = PAGE_SIZE; 1817 psb->drop_scan_timeout = msecs_to_jiffies(1000); 1818 psb->trans_scan_timeout = msecs_to_jiffies(5000); 1819 psb->wait_on_page_timeout = msecs_to_jiffies(5000); 1820 init_waitqueue_head(&psb->wait); 1821 1822 spin_lock_init(&psb->ino_lock); 1823 1824 INIT_LIST_HEAD(&psb->drop_list); 1825 1826 mutex_init(&psb->mcache_lock); 1827 psb->mcache_root = RB_ROOT; 1828 psb->mcache_timeout = msecs_to_jiffies(5000); 1829 atomic_long_set(&psb->mcache_gen, 0); 1830 1831 psb->trans_max_pages = 100; 1832 1833 psb->crypto_align_size = 16; 1834 psb->crypto_attached_size = 0; 1835 psb->hash_strlen = 0; 1836 psb->cipher_strlen = 0; 1837 psb->perform_crypto = 0; 1838 psb->crypto_thread_num = 2; 1839 psb->crypto_fail_unsupported = 0; 1840 mutex_init(&psb->crypto_thread_lock); 1841 INIT_LIST_HEAD(&psb->crypto_ready_list); 1842 INIT_LIST_HEAD(&psb->crypto_active_list); 1843 1844 atomic_set(&psb->trans_gen, 1); 1845 atomic_long_set(&psb->total_inodes, 0); 1846 1847 mutex_init(&psb->state_lock); 1848 INIT_LIST_HEAD(&psb->state_list); 1849 1850 err = pohmelfs_parse_options((char *) data, psb, 0); 1851 if (err) 1852 goto err_out_free_bdi; 1853 1854 err = pohmelfs_copy_crypto(psb); 1855 if (err) 1856 goto err_out_free_bdi; 1857 1858 err = pohmelfs_state_init(psb); 1859 if (err) 1860 goto err_out_free_strings; 1861 1862 err = pohmelfs_crypto_init(psb); 1863 if (err) 1864 goto err_out_state_exit; 1865 1866 err = pohmelfs_root_handshake(psb); 1867 if (err) 1868 goto err_out_crypto_exit; 1869 1870 str.name = "/"; 1871 str.hash = jhash("/", 1, 0); 1872 str.len = 1; 1873 1874 npi = pohmelfs_create_entry_local(psb, NULL, &str, 0, 0755|S_IFDIR); 1875 if (IS_ERR(npi)) { 1876 err = PTR_ERR(npi); 1877 goto err_out_crypto_exit; 1878 } 1879 set_bit(NETFS_INODE_REMOTE_SYNCED, &npi->state); 1880 clear_bit(NETFS_INODE_OWNED, &npi->state); 1881 1882 root = &npi->vfs_inode; 1883 1884 sb->s_root = d_alloc_root(root); 1885 if (!sb->s_root) 1886 goto err_out_put_root; 1887 1888 INIT_DELAYED_WORK(&psb->drop_dwork, pohmelfs_drop_scan); 1889 schedule_delayed_work(&psb->drop_dwork, psb->drop_scan_timeout); 1890 1891 INIT_DELAYED_WORK(&psb->dwork, pohmelfs_trans_scan); 1892 schedule_delayed_work(&psb->dwork, psb->trans_scan_timeout); 1893 1894 return 0; 1895 1896err_out_put_root: 1897 iput(root); 1898err_out_crypto_exit: 1899 pohmelfs_crypto_exit(psb); 1900err_out_state_exit: 1901 pohmelfs_state_exit(psb); 1902err_out_free_strings: 1903 kfree(psb->cipher_string); 1904 kfree(psb->hash_string); 1905err_out_free_bdi: 1906 bdi_destroy(&psb->bdi); 1907err_out_free_sb: 1908 kfree(psb); 1909err_out_exit: 1910 1911 dprintk("%s: err: %d.\n", __func__, err); 1912 return err; 1913} 1914 1915/* 1916 * Some VFS magic here... 1917 */ 1918static int pohmelfs_get_sb(struct file_system_type *fs_type, 1919 int flags, const char *dev_name, void *data, struct vfsmount *mnt) 1920{ 1921 return get_sb_nodev(fs_type, flags, data, pohmelfs_fill_super, 1922 mnt); 1923} 1924 1925/* 1926 * We need this to sync all inodes earlier, since when writeback 1927 * is invoked from the umount/mntput path dcache is already shrunk, 1928 * see generic_shutdown_super(), and no inodes can access the path. 1929 */ 1930static void pohmelfs_kill_super(struct super_block *sb) 1931{ 1932 sync_inodes_sb(sb); 1933 kill_anon_super(sb); 1934} 1935 1936static struct file_system_type pohmel_fs_type = { 1937 .owner = THIS_MODULE, 1938 .name = "pohmel", 1939 .get_sb = pohmelfs_get_sb, 1940 .kill_sb = pohmelfs_kill_super, 1941}; 1942 1943/* 1944 * Cache and module initializations and freeing routings. 1945 */ 1946static void pohmelfs_init_once(void *data) 1947{ 1948 struct pohmelfs_inode *pi = data; 1949 1950 inode_init_once(&pi->vfs_inode); 1951} 1952 1953static int __init pohmelfs_init_inodecache(void) 1954{ 1955 pohmelfs_inode_cache = kmem_cache_create("pohmelfs_inode_cache", 1956 sizeof(struct pohmelfs_inode), 1957 0, (SLAB_RECLAIM_ACCOUNT|SLAB_MEM_SPREAD), 1958 pohmelfs_init_once); 1959 if (!pohmelfs_inode_cache) 1960 return -ENOMEM; 1961 1962 return 0; 1963} 1964 1965static void pohmelfs_destroy_inodecache(void) 1966{ 1967 kmem_cache_destroy(pohmelfs_inode_cache); 1968} 1969 1970static int __init init_pohmel_fs(void) 1971{ 1972 int err; 1973 1974 err = pohmelfs_config_init(); 1975 if (err) 1976 goto err_out_exit; 1977 1978 err = pohmelfs_init_inodecache(); 1979 if (err) 1980 goto err_out_config_exit; 1981 1982 err = pohmelfs_mcache_init(); 1983 if (err) 1984 goto err_out_destroy; 1985 1986 err = netfs_trans_init(); 1987 if (err) 1988 goto err_out_mcache_exit; 1989 1990 err = register_filesystem(&pohmel_fs_type); 1991 if (err) 1992 goto err_out_trans; 1993 1994 return 0; 1995 1996err_out_trans: 1997 netfs_trans_exit(); 1998err_out_mcache_exit: 1999 pohmelfs_mcache_exit(); 2000err_out_destroy: 2001 pohmelfs_destroy_inodecache(); 2002err_out_config_exit: 2003 pohmelfs_config_exit(); 2004err_out_exit: 2005 return err; 2006} 2007 2008static void __exit exit_pohmel_fs(void) 2009{ 2010 unregister_filesystem(&pohmel_fs_type); 2011 pohmelfs_destroy_inodecache(); 2012 pohmelfs_mcache_exit(); 2013 pohmelfs_config_exit(); 2014 netfs_trans_exit(); 2015} 2016 2017module_init(init_pohmel_fs); 2018module_exit(exit_pohmel_fs); 2019 2020MODULE_LICENSE("GPL"); 2021MODULE_AUTHOR("Evgeniy Polyakov <zbr@ioremap.net>"); 2022MODULE_DESCRIPTION("Pohmel filesystem"); 2023